1 files changed, 3 insertions, 3 deletions
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 460a4ce9c044..ef9fc15fbfbf 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -376,7 +376,7 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread,
 					  struct perf_sample *sample)
 {
 	/* Only kvm_entry records vcpu id. */
-	if (!thread->priv && kvm_entry_event(evsel)) {
+	if (!thread__priv(thread) && kvm_entry_event(evsel)) {
 		struct vcpu_event_record *vcpu_record;
 
 		vcpu_record = zalloc(sizeof(*vcpu_record));
@@ -386,10 +386,10 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread,
 		}
 
 		vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, VCPU_ID);
-		thread->priv = vcpu_record;
+		thread__set_priv(thread, vcpu_record);
 	}
 
-	return thread->priv;
+	return thread__priv(thread);
 }
 
 static bool handle_kvm_event(struct perf_kvm_stat *kvm,
diff --git a/.gitattributes b/.gitattributes
index 89c411b5ce6b..4b32eaa9571e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,4 @@
 *.c   diff=cpp
 *.h   diff=cpp
+*.dtsi diff=dts
+*.dts  diff=dts
diff --git a/.gitignore b/.gitignore
index 70580bdd352c..72ef86a5570d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,7 +32,6 @@
 *.lzo
 *.mod
 *.mod.c
-*.ns_deps
 *.o
 *.o.*
 *.patch
@@ -61,6 +60,7 @@ modules.order
 /System.map
 /Module.markers
 /modules.builtin.modinfo
+/modules.nsdeps
 
 #
 # RPM spec file (make rpm-pkg)
diff --git a/.mailmap b/.mailmap
index fd6219293057..bf581623ee3a 100644
--- a/.mailmap
+++ b/.mailmap
@@ -32,6 +32,7 @@ Andy Adamson <andros@citi.umich.edu>
 Antoine Tenart <antoine.tenart@free-electrons.com>
 Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
 Archit Taneja <archit@ti.com>
+Ard Biesheuvel <ardb@kernel.org> <ard.biesheuvel@linaro.org>
 Arnaud Patard <arnaud.patard@rtp-net.org>
 Arnd Bergmann <arnd@arndb.de>
 Axel Dyks <xl@xlsigned.net>
@@ -73,6 +74,7 @@ Dmitry Safonov <0x7f454c46@gmail.com> <dima@arista.com>
 Domen Puncer <domen@coderock.org>
 Douglas Gilbert <dougg@torque.net>
 Ed L. Cashin <ecashin@coraid.com>
+Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
 Felipe W Damasio <felipewd@terra.com.br>
 Felix Kuhling <fxkuehl@gmx.de>
@@ -98,12 +100,16 @@ Jacob Shin <Jacob.Shin@amd.com>
 Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@google.com>
 Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@motorola.com>
 Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk.kim@samsung.com>
+Jakub Kicinski <kuba@kernel.org> <jakub.kicinski@netronome.com>
 James Bottomley <jejb@mulgrave.(none)>
 James Bottomley <jejb@titanic.il.steeleye.com>
 James E Wilson <wilson@specifix.com>
 James Hogan <jhogan@kernel.org> <james.hogan@imgtec.com>
 James Hogan <jhogan@kernel.org> <james@albanarts.com>
 James Ketrenos <jketreno@io.(none)>
+Jan Glauber <jan.glauber@gmail.com> <jang@de.ibm.com>
+Jan Glauber <jan.glauber@gmail.com> <jang@linux.vnet.ibm.com>
+Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
 Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
 Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
 Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com>
@@ -133,6 +139,7 @@ Juha Yrjola <at solidboot.com>
 Juha Yrjola <juha.yrjola@nokia.com>
 Juha Yrjola <juha.yrjola@solidboot.com>
 Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
+Kamil Konieczny <k.konieczny@samsung.com> <k.konieczny@partner.samsung.com>
 Kay Sievers <kay.sievers@vrfy.org>
 Kenneth W Chen <kenneth.w.chen@intel.com>
 Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
@@ -148,6 +155,7 @@ Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
 Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
 Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
 Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
+Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
 Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
 Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
 Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
@@ -155,6 +163,7 @@ Mark Brown <broonie@sirena.org.uk>
 Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
+Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
 Mathieu Othacehe <m.othacehe@gmail.com>
 Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
 Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
@@ -202,6 +211,10 @@ Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
 Patrick Mochel <mochel@digitalimplant.org>
 Paul Burton <paulburton@kernel.org> <paul.burton@imgtec.com>
 Paul Burton <paulburton@kernel.org> <paul.burton@mips.com>
+Paul E. McKenney <paulmck@kernel.org> <paulmck@linux.ibm.com>
+Paul E. McKenney <paulmck@kernel.org> <paulmck@linux.vnet.ibm.com>
+Paul E. McKenney <paulmck@kernel.org> <paul.mckenney@linaro.org>
+Paul E. McKenney <paulmck@kernel.org> <paulmck@us.ibm.com>
 Peter A Jonsson <pj@ludd.ltu.se>
 Peter Oruba <peter@oruba.de>
 Peter Oruba <peter.oruba@amd.com>
@@ -210,6 +223,7 @@ Praveen BP <praveenbp@ti.com>
 Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
 Qais Yousef <qsyousef@gmail.com> <qais.yousef@imgtec.com>
 Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com>
+Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl>
 Rajesh Shah <rajesh.shah@intel.com>
 Ralf Baechle <ralf@linux-mips.org>
 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
@@ -260,6 +274,7 @@ Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>
 Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com>
 Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com>
 Viresh Kumar <vireshk@kernel.org> <viresh.kumar2@arm.com>
+Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
 Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
 Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
 Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
@@ -271,3 +286,5 @@ Gustavo Padovan <gustavo@las.ic.unicamp.br>
 Gustavo Padovan <padovan@profusion.mobi>
 Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
 Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
+Steve Wise <larrystevenwise@gmail.com> <swise@chelsio.com>
+Steve Wise <larrystevenwise@gmail.com> <swise@opengridcomputing.com>
diff --git a/CREDITS b/CREDITS
index 031605d46b4d..a97d3280a627 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1875,8 +1875,9 @@ S: The Netherlands
 
 N: Martin Kepplinger
 E: martink@posteo.de
-E: martin.kepplinger@ginzinger.com
+E: martin.kepplinger@puri.sm
 W: http://www.martinkepplinger.com
+P: 4096R/5AB387D3 F208 2B88 0F9E 4239 3468  6E3F 5003 98DF 5AB3 87D3
 D: mma8452 accelerators iio driver
 D: pegasus_notetaker input driver
 D: Kernel fixes and cleanups
@@ -3301,7 +3302,9 @@ S: France
 N: Aleksa Sarai
 E: cyphar@cyphar.com
 W: https://www.cyphar.com/
-D: `pids` cgroup subsystem
+D: /sys/fs/cgroup/pids
+D: openat2(2)
+S: Sydney, Australia
 
 N: Dipankar Sarma
 E: dipankar@in.ibm.com
diff --git a/Documentation/ABI/obsolete/sysfs-selinux-disable b/Documentation/ABI/obsolete/sysfs-selinux-disable
new file mode 100644
index 000000000000..c340278e3cf8
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-selinux-disable
@@ -0,0 +1,26 @@
+What:		/sys/fs/selinux/disable
+Date:		April 2005 (predates git)
+KernelVersion:	2.6.12-rc2 (predates git)
+Contact:	selinux@vger.kernel.org
+Description:
+
+	The selinuxfs "disable" node allows SELinux to be disabled at runtime
+	prior to a policy being loaded into the kernel.  If disabled via this
+	mechanism, SELinux will remain disabled until the system is rebooted.
+
+	The preferred method of disabling SELinux is via the "selinux=0" boot
+	parameter, but the selinuxfs "disable" node was created to make it
+	easier for systems with primitive bootloaders that did not allow for
+	easy modification of the kernel command line.  Unfortunately, allowing
+	for SELinux to be disabled at runtime makes it difficult to secure the
+	kernel's LSM hooks using the "__ro_after_init" feature.
+
+	Thankfully, the need for the SELinux runtime disable appears to be
+	gone, the default Kconfig configuration disables this selinuxfs node,
+	and only one of the major distributions, Fedora, supports disabling
+	SELinux at runtime.  Fedora is in the process of removing the
+	selinuxfs "disable" node and once that is complete we will start the
+	slow process of removing this code from the kernel.
+
+	More information on /sys/fs/selinux/disable can be found under the
+	CONFIG_SECURITY_SELINUX_DISABLE Kconfig option.
diff --git a/Documentation/ABI/stable/sysfs-class-infiniband b/Documentation/ABI/stable/sysfs-class-infiniband
index aed21b8916a2..96dfe1926b76 100644
--- a/Documentation/ABI/stable/sysfs-class-infiniband
+++ b/Documentation/ABI/stable/sysfs-class-infiniband
@@ -314,25 +314,6 @@ Description:
 		board_id:	(RO) Manufacturing board ID
 
 
-sysfs interface for Chelsio T3 RDMA Driver (cxgb3)
---------------------------------------------------
-
-What:		/sys/class/infiniband/cxgb3_X/hw_rev
-What:		/sys/class/infiniband/cxgb3_X/hca_type
-What:		/sys/class/infiniband/cxgb3_X/board_id
-Date:		Feb, 2007
-KernelVersion:	v2.6.21
-Contact:	linux-rdma@vger.kernel.org
-Description:
-		hw_rev:		(RO) Hardware revision number
-
-		hca_type:	(RO) HCA type. Here it is a driver short name.
-				It should normally match the name in its bus
-				driver structure (e.g.  pci_driver::name).
-
-		board_id:	(RO) Manufacturing board id
-
-
 sysfs interface for Mellanox ConnectX HCA IB driver (mlx4)
 ----------------------------------------------------------
 
diff --git a/Documentation/ABI/stable/sysfs-class-tpm b/Documentation/ABI/stable/sysfs-class-tpm
index c0e23830f56a..58e94e7d55be 100644
--- a/Documentation/ABI/stable/sysfs-class-tpm
+++ b/Documentation/ABI/stable/sysfs-class-tpm
@@ -1,7 +1,7 @@
 What:		/sys/class/tpm/tpmX/device/
 Date:		April 2005
 KernelVersion:	2.6.12
-Contact:	tpmdd-devel@lists.sf.net
+Contact:	linux-integrity@vger.kernel.org
 Description:	The device/ directory under a specific TPM instance exposes
 		the properties of that TPM chip
 
@@ -9,7 +9,7 @@ Description:	The device/ directory under a specific TPM instance exposes
 What:		/sys/class/tpm/tpmX/device/active
 Date:		April 2006
 KernelVersion:	2.6.17
-Contact:	tpmdd-devel@lists.sf.net
+Contact:	linux-integrity@vger.kernel.org
 Description:	The "active" property prints a '1' if the TPM chip is accepting
 		commands. An inactive TPM chip still contains all the state of
 		an active chip (Storage Root Key, NVRAM, etc), and can be
@@ -21,7 +21,7 @@ Description:	The "active" property prints a '1' if the TPM chip is accepting
 What:		/sys/class/tpm/tpmX/device/cancel
 Date:		June 2005
 KernelVersion:	2.6.13
-Contact:	tpmdd-devel@lists.sf.net
+Contact:	linux-integrity@vger.kernel.org
 Description:	The "cancel" property allows you to cancel the currently
 		pending TPM command. Writing any value to cancel will call the
 		TPM vendor specific cancel operation.
@@ -29,7 +29,7 @@ Description:	The "cancel" property allows you to cancel the currently
 What:		/sys/class/tpm/tpmX/device/caps
 Date:		April 2005
 KernelVersion:	2.6.12
-Contact:	tpmdd-devel@lists.sf.net
+Contact:	linux-integrity@vger.kernel.org
 Description:	The "caps" property contains TPM manufacturer and version info.
 
 		Example output:
@@ -46,7 +46,7 @@ Description:	The "caps" property contains TPM manufacturer and version info.
 What:		/sys/class/tpm/tpmX/device/durations
 Date:		March 2011
 KernelVersion:	3.1
-Contact:	tpmdd-devel@lists.sf.net
+Contact:	linux-integrity@vger.kernel.org
 Description:	The "durations" property shows the 3 vendor-specific values
 		used to wait for a short, medium and long TPM command. All
 		TPM commands are categorized as short, medium or long in
@@ -69,7 +69,7 @@ Description:	The "durations" property shows the 3 vendor-specific values
 What:		/sys/class/tpm/tpmX/device/enabled
 Date:		April 2006
 KernelVersion:	2.6.17
-Contact:	tpmdd-devel@lists.sf.net
+Contact:	linux-integrity@vger.kernel.org
 Description:	The "enabled" property prints a '1' if the TPM chip is enabled,
 		meaning that it should be visible to the OS. This property
 		may be visible but produce a '0' after some operation that
@@ -78,7 +78,7 @@ Description:	The "enabled" property prints a '1' if the TPM chip is enabled,
 What:		/sys/class/tpm/tpmX/device/owned
 Date:		April 2006
 KernelVersion:	2.6.17
-Contact:	tpmdd-devel@lists.sf.net
+Contact:	linux-integrity@vger.kernel.org
 Description:	The "owned" property produces a '1' if the TPM_TakeOwnership
 		ordinal has been executed successfully in the chip. A '0'
 		indicates that ownership hasn't been taken.
@@ -86,7 +86,7 @@ Description:	The "owned" property produces a '1' if the TPM_TakeOwnership
 What:		/sys/class/tpm/tpmX/device/pcrs
 Date:		April 2005
 KernelVersion:	2.6.12
-Contact:	tpmdd-devel@lists.sf.net
+Contact:	linux-integrity@vger.kernel.org
 Description:	The "pcrs" property will dump the current value of all Platform
 		Configuration Registers in the TPM. Note that since these
 		values may be constantly changing, the output is only valid
@@ -109,7 +109,7 @@ Description:	The "pcrs" property will dump the current value of all Platform
 What:		/sys/class/tpm/tpmX/device/pubek
 Date:		April 2005
 KernelVersion:	2.6.12
-Contact:	tpmdd-devel@lists.sf.net
+Contact:	linux-integrity@vger.kernel.org
 Description:	The "pubek" property will return the TPM's public endorsement
 		key if possible. If the TPM has had ownership established and
 		is version 1.2, the pubek will not be available without the
@@ -161,7 +161,7 @@ Description:	The "pubek" property will return the TPM's public endorsement
 What:		/sys/class/tpm/tpmX/device/temp_deactivated
 Date:		April 2006
 KernelVersion:	2.6.17
-Contact:	tpmdd-devel@lists.sf.net
+Contact:	linux-integrity@vger.kernel.org
 Description:	The "temp_deactivated" property returns a '1' if the chip has
 		been temporarily deactivated, usually until the next power
 		cycle. Whether a warm boot (reboot) will clear a TPM chip
@@ -170,7 +170,7 @@ Description:	The "temp_deactivated" property returns a '1' if the chip has
 What:		/sys/class/tpm/tpmX/device/timeouts
 Date:		March 2011
 KernelVersion:	3.1
-Contact:	tpmdd-devel@lists.sf.net
+Contact:	linux-integrity@vger.kernel.org
 Description:	The "timeouts" property shows the 4 vendor-specific values
 		for the TPM's interface spec timeouts. The use of these
 		timeouts is defined by the TPM interface spec that the chip
@@ -183,3 +183,14 @@ Description:	The "timeouts" property shows the 4 vendor-specific values
 		The four timeout values are shown in usecs, with a trailing
 		"[original]" or "[adjusted]" depending on whether the values
 		were scaled by the driver to be reported in usec from msecs.
+
+What:		/sys/class/tpm/tpmX/tpm_version_major
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	linux-integrity@vger.kernel.org
+Description:	The "tpm_version_major" property shows the TCG spec major version
+		implemented by the TPM device.
+
+		Example output:
+
+		2
diff --git a/Documentation/ABI/stable/sysfs-driver-aspeed-vuart b/Documentation/ABI/stable/sysfs-driver-aspeed-vuart
index 8062953ce77b..950cafc9443a 100644
--- a/Documentation/ABI/stable/sysfs-driver-aspeed-vuart
+++ b/Documentation/ABI/stable/sysfs-driver-aspeed-vuart
@@ -6,10 +6,19 @@ Description:	Configures which IO port the host side of the UART
 Users:		OpenBMC.  Proposed changes should be mailed to
 		openbmc@lists.ozlabs.org
 
-What:		/sys/bus/platform/drivers/aspeed-vuart*/sirq
+What:		/sys/bus/platform/drivers/aspeed-vuart/*/sirq
 Date:		April 2017
 Contact:	Jeremy Kerr <jk@ozlabs.org>
 Description:	Configures which interrupt number the host side of
 		the UART will appear on the host <-> BMC LPC bus.
 Users:		OpenBMC.  Proposed changes should be mailed to
 		openbmc@lists.ozlabs.org
+
+What:		/sys/bus/platform/drivers/aspeed-vuart/*/sirq_polarity
+Date:		July 2019
+Contact:	Oskar Senft <osk@google.com>
+Description:	Configures the polarity of the serial interrupt to the
+		host via the BMC LPC bus.
+		Set to 0 for active-low or 1 for active-high.
+Users:		OpenBMC.  Proposed changes should be mailed to
+		openbmc@lists.ozlabs.org
diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
new file mode 100644
index 000000000000..f4be46cc6cb6
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -0,0 +1,171 @@
+What:           sys/bus/dsa/devices/dsa<m>/cdev_major
+Date:           Oct 25, 2019
+KernelVersion: 	5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:	The major number that the character device driver assigned to
+		this device.
+
+What:           sys/bus/dsa/devices/dsa<m>/errors
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The error information for this device.
+
+What:           sys/bus/dsa/devices/dsa<m>/max_batch_size
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The largest number of work descriptors in a batch.
+
+What:           sys/bus/dsa/devices/dsa<m>/max_work_queues_size
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The maximum work queue size supported by this device.
+
+What:           sys/bus/dsa/devices/dsa<m>/max_engines
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The maximum number of engines supported by this device.
+
+What:           sys/bus/dsa/devices/dsa<m>/max_groups
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The maximum number of groups can be created under this device.
+
+What:           sys/bus/dsa/devices/dsa<m>/max_tokens
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The total number of bandwidth tokens supported by this device.
+		The bandwidth tokens represent resources within the DSA
+		implementation, and these resources are allocated by engines to
+		support operations.
+
+What:           sys/bus/dsa/devices/dsa<m>/max_transfer_size
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The number of bytes to be read from the source address to
+		perform the operation. The maximum transfer size is dependent on
+		the workqueue the descriptor was submitted to.
+
+What:           sys/bus/dsa/devices/dsa<m>/max_work_queues
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The maximum work queue number that this device supports.
+
+What:           sys/bus/dsa/devices/dsa<m>/numa_node
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The numa node number for this device.
+
+What:           sys/bus/dsa/devices/dsa<m>/op_cap
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The operation capability bit mask specify the operation types
+		supported by the this device.
+
+What:           sys/bus/dsa/devices/dsa<m>/state
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The state information of this device. It can be either enabled
+		or disabled.
+
+What:           sys/bus/dsa/devices/dsa<m>/group<m>.<n>
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The assigned group under this device.
+
+What:           sys/bus/dsa/devices/dsa<m>/engine<m>.<n>
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The assigned engine under this device.
+
+What:           sys/bus/dsa/devices/dsa<m>/wq<m>.<n>
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The assigned work queue under this device.
+
+What:           sys/bus/dsa/devices/dsa<m>/configurable
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    To indicate if this device is configurable or not.
+
+What:           sys/bus/dsa/devices/dsa<m>/token_limit
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The maximum number of bandwidth tokens that may be in use at
+		one time by operations that access low bandwidth memory in the
+		device.
+
+What:           sys/bus/dsa/devices/wq<m>.<n>/group_id
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The group id that this work queue belongs to.
+
+What:           sys/bus/dsa/devices/wq<m>.<n>/size
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The work queue size for this work queue.
+
+What:           sys/bus/dsa/devices/wq<m>.<n>/type
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The type of this work queue, it can be "kernel" type for work
+		queue usages in the kernel space or "user" type for work queue
+		usages by applications in user space.
+
+What:           sys/bus/dsa/devices/wq<m>.<n>/cdev_minor
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The minor number assigned to this work queue by the character
+		device driver.
+
+What:           sys/bus/dsa/devices/wq<m>.<n>/mode
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The work queue mode type for this work queue.
+
+What:           sys/bus/dsa/devices/wq<m>.<n>/priority
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The priority value of this work queue, it is a vlue relative to
+		other work queue in the same group to control quality of service
+		for dispatching work from multiple workqueues in the same group.
+
+What:           sys/bus/dsa/devices/wq<m>.<n>/state
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The current state of the work queue.
+
+What:           sys/bus/dsa/devices/wq<m>.<n>/threshold
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The number of entries in this work queue that may be filled
+		via a limited portal.
+
+What:           sys/bus/dsa/devices/engine<m>.<n>/group_id
+Date:           Oct 25, 2019
+KernelVersion:  5.6.0
+Contact:        dmaengine@vger.kernel.org
+Description:    The group that this engine belongs to.
diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp
index 7049a2b50359..84972a57caae 100644
--- a/Documentation/ABI/stable/sysfs-driver-ib_srp
+++ b/Documentation/ABI/stable/sysfs-driver-ib_srp
@@ -67,6 +67,8 @@ Description:	Interface for making ib_srp connect to a new target.
 		  initiator is allowed to queue per SCSI host. The default
 		  value for this parameter is 62. The lowest supported value
 		  is 2.
+		* max_it_iu_size, a decimal number specifying the maximum
+		  initiator to target information unit length.
 
 What:		/sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev
 Date:		January 2, 2006
diff --git a/Documentation/ABI/stable/sysfs-driver-mlxreg-io b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
index 8ca498447aeb..b0d90cc696a8 100644
--- a/Documentation/ABI/stable/sysfs-driver-mlxreg-io
+++ b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
@@ -1,5 +1,4 @@
 What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic_health
-
 Date:		June 2018
 KernelVersion:	4.19
 Contact:	Vadim Pasternak <vadimpmellanox.com>
@@ -19,7 +18,6 @@ Description:	These files show with which CPLD versions have been burned
 		The files are read only.
 
 What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/fan_dir
-
 Date:		December 2018
 KernelVersion:	5.0
 Contact:	Vadim Pasternak <vadimpmellanox.com>
@@ -29,18 +27,16 @@ Description:	This file shows the system fans direction:
 
 		The files are read only.
 
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/jtag_enable
-
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld3_version
 Date:		November 2018
 KernelVersion:	5.0
 Contact:	Vadim Pasternak <vadimpmellanox.com>
 Description:	These files show with which CPLD versions have been burned
-		on LED board.
+		on LED or Gearbox board.
 
 		The files are read only.
 
 What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/jtag_enable
-
 Date:		November 2018
 KernelVersion:	5.0
 Contact:	Vadim Pasternak <vadimpmellanox.com>
@@ -108,7 +104,6 @@ What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_comex_pwr_fail
 What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_from_comex
 What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_system
 What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_voltmon_upgrade_fail
-
 Date:		November 2018
 KernelVersion:	5.0
 Contact:	Vadim Pasternak <vadimpmellanox.com>
@@ -121,6 +116,21 @@ Description:	These files show the system reset cause, as following: ComEx
 
 		The files are read only.
 
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld4_version
+Date:		November 2018
+KernelVersion:	5.0
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	These files show with which CPLD versions have been burned
+		on LED board.
+
+		The files are read only.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_comex_thermal
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_comex_wd
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_from_asic
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_reload_bios
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sff_wd
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_swb_wd
 Date:		June 2019
 KernelVersion:	5.3
 Contact:	Vadim Pasternak <vadimpmellanox.com>
@@ -134,9 +144,65 @@ Description:	These files show the system reset cause, as following:
 
 		The files are read only.
 
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_comex_thermal
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_comex_wd
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_from_asic
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_reload_bios
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sff_wd
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_swb_wd
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/config1
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/config2
+Date:		January 2020
+KernelVersion:	5.6
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	These files show system static topology identification
+		like system's static I2C topology, number and type of FPGA
+		devices within the system and so on.
+
+		The files are read only.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_ac_pwr_fail
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_platform
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_soc
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sw_pwr_off
+Date:		January 2020
+KernelVersion:	5.6
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	These files show the system reset causes, as following: reset
+		due to AC power failure, reset invoked from software by
+		assertion reset signal through CPLD. reset caused by signal
+		asserted by SOC through ACPI register, reset invoked from
+		software by assertion power off signal through CPLD.
+
+		The files are read only.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/pcie_asic_reset_dis
+Date:		January 2020
+KernelVersion:	5.6
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	This file allows to retain ASIC up during PCIe root complex
+		reset, when attribute is set 1.
+
+		The file is read/write.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/vpd_wp
+Date:		January 2020
+KernelVersion:	5.6
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	This file allows to overwrite system VPD hardware wrtie
+		protection when attribute is set 1.
+
+		The file is read/write.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/voltreg_update_status
+Date:		January 2020
+KernelVersion:	5.6
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	This file exposes the configuration update status of burnable
+		voltage regulator devices. The status values are as following:
+		0 - OK; 1 - CRC failure; 2 = I2C failure; 3 - in progress.
+
+		The file is read only.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/ufm_version
+Date:		January 2020
+KernelVersion:	5.6
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	This file exposes the firmware version of burnable voltage
+		regulator devices.
+
+		The file is read only.
diff --git a/Documentation/ABI/testing/configfs-usb-gadget b/Documentation/ABI/testing/configfs-usb-gadget
index 95a36589a66b..4594cc2435e8 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget
+++ b/Documentation/ABI/testing/configfs-usb-gadget
@@ -16,6 +16,10 @@ Description:
 				write UDC's name found in /sys/class/udc/*
 				to bind a gadget, empty string "" to unbind.
 
+		max_speed	- maximum speed the driver supports. Valid
+				names are super-speed-plus, super-speed,
+				high-speed, full-speed, and low-speed.
+
 		bDeviceClass	- USB device class code
 		bDeviceSubClass	- USB device subclass code
 		bDeviceProtocol	- USB device protocol code
diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre
new file mode 100644
index 000000000000..ec4a79e3a807
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hisi-hpre
@@ -0,0 +1,57 @@
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/regs
+Date:           Sep 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump debug registers from the HPRE cluster.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/cluster_ctrl
+Date:           Sep 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    Write the HPRE core selection in the cluster into this file,
+		and then we can read the debug information of the core.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/rdclr_en
+Date:           Sep 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    HPRE cores debug registers read clear control. 1 means enable
+		register read clear, otherwise 0. Writing to this file has no
+		functional effect, only enable or disable counters clear after
+		reading of these registers.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/current_qm
+Date:           Sep 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    One HPRE controller has one PF and multiple VFs, each function
+		has a QM. Select the QM which below qm refers to.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/regs
+Date:           Sep 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump debug registers from the HPRE.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/qm_regs
+Date:           Sep 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump debug registers from the QM.
+		Available for PF and VF in host. VF in guest currently only
+		has one debug register.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
+Date:           Sep 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    One QM may contain multiple queues. Select specific queue to
+		show its debug registers in above qm_regs.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
+Date:           Sep 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    QM debug registers(qm_regs) read clear control. 1 means enable
+		register read clear, otherwise 0.
+		Writing to this file has no functional effect, only enable or
+		disable counters clear after reading of these registers.
+		Only available for PF.
diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec
new file mode 100644
index 000000000000..06adb899495e
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hisi-sec
@@ -0,0 +1,43 @@
+What:           /sys/kernel/debug/hisi_sec/<bdf>/sec_dfx
+Date:           Oct 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the debug registers of SEC cores.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_sec/<bdf>/clear_enable
+Date:           Oct 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    Enabling/disabling of clear action after reading
+		the SEC debug registers.
+		0: disable, 1: enable.
+		Only available for PF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec/<bdf>/current_qm
+Date:           Oct 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    One SEC controller has one PF and multiple VFs, each function
+		has a QM. This file can be used to select the QM which below
+		qm refers to.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_sec/<bdf>/qm/qm_regs
+Date:           Oct 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump of QM related debug registers.
+		Available for PF and VF in host. VF in guest currently only
+		has one debug register.
+
+What:           /sys/kernel/debug/hisi_sec/<bdf>/qm/current_q
+Date:           Oct 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    One QM of SEC may contain multiple queues. Select specific
+		queue to show its debug registers in above 'qm_regs'.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_sec/<bdf>/qm/clear_enable
+Date:           Oct 2019
+Contact:        linux-crypto@vger.kernel.org
+Description:    Enabling/disabling of clear action after reading
+		the SEC's QM debug registers.
+		0: disable, 1: enable.
+		Only available for PF, and take no other effect on SEC.
diff --git a/Documentation/ABI/testing/debugfs-hyperv b/Documentation/ABI/testing/debugfs-hyperv
new file mode 100644
index 000000000000..9185e1b06bba
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hyperv
@@ -0,0 +1,23 @@
+What:           /sys/kernel/debug/hyperv/<UUID>/fuzz_test_state
+Date:           October 2019
+KernelVersion:  5.5
+Contact:        Branden Bonaby <brandonbonaby94@gmail.com>
+Description:    Fuzz testing status of a vmbus device, whether its in an ON
+                state or a OFF state
+Users:          Debugging tools
+
+What:           /sys/kernel/debug/hyperv/<UUID>/delay/fuzz_test_buffer_interrupt_delay
+Date:           October 2019
+KernelVersion:  5.5
+Contact:        Branden Bonaby <brandonbonaby94@gmail.com>
+Description:    Fuzz testing buffer interrupt delay value between 0 - 1000
+		 microseconds (inclusive).
+Users:          Debugging tools
+
+What:           /sys/kernel/debug/hyperv/<UUID>/delay/fuzz_test_message_delay
+Date:           October 2019
+KernelVersion:  5.5
+Contact:        Branden Bonaby <brandonbonaby94@gmail.com>
+Description:    Fuzz testing message delay value between 0 - 1000 microseconds
+		 (inclusive).
+Users:          Debugging tools
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 29ebe9afdac4..cd572912c593 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -25,10 +25,11 @@ Description:
 			lsm:	[[subj_user=] [subj_role=] [subj_type=]
 				 [obj_user=] [obj_role=] [obj_type=]]
 			option:	[[appraise_type=]] [template=] [permit_directio]
+				[appraise_flag=] [keyrings=]
 		base: 	func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
 				[FIRMWARE_CHECK]
 				[KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
-				[KEXEC_CMDLINE]
+				[KEXEC_CMDLINE] [KEY_CHECK]
 			mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND]
 			       [[^]MAY_EXEC]
 			fsmagic:= hex value
@@ -38,6 +39,12 @@ Description:
 			fowner:= decimal value
 		lsm:  	are LSM specific
 		option:	appraise_type:= [imasig] [imasig|modsig]
+			appraise_flag:= [check_blacklist]
+			Currently, blacklist check is only for files signed with appended
+			signature.
+			keyrings:= list of keyrings
+			(eg, .builtin_trusted_keys|.ima). Only valid
+			when action is "measure" and func is KEY_CHECK.
 			template:= name of a defined IMA template type
 			(eg, ima-ng). Only valid when action is "measure".
 			pcr:= decimal value
@@ -109,3 +116,12 @@ Description:
 		Example of appraise rule allowing modsig appended signatures:
 
 			appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig
+
+		Example of measure rule using KEY_CHECK to measure all keys:
+
+			measure func=KEY_CHECK
+
+		Example of measure rule using KEY_CHECK to only measure
+		keys added to .builtin_trusted_keys or .ima keyring:
+
+			measure func=KEY_CHECK keyrings=.builtin_trusted_keys|.ima
diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats
index 2c44b4f1b060..70dcaf2481f4 100644
--- a/Documentation/ABI/testing/procfs-diskstats
+++ b/Documentation/ABI/testing/procfs-diskstats
@@ -29,4 +29,9 @@ Description:
 		17 - sectors discarded
 		18 - time spent discarding
 
+		Kernel 5.5+ appends two more fields for flush requests:
+
+		19 - flush requests completed successfully
+		20 - time spent flushing
+
 		For more details refer to Documentation/admin-guide/iostats.rst
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index f8c7c7126bb1..ed8c14f161ee 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -15,6 +15,12 @@ Description:
 		 9 - I/Os currently in progress
 		10 - time spent doing I/Os (ms)
 		11 - weighted time spent doing I/Os (ms)
+		12 - discards completed
+		13 - discards merged
+		14 - sectors discarded
+		15 - time spent discarding (ms)
+		16 - flush requests completed
+		17 - time spent flushing (ms)
 		For more details refer Documentation/admin-guide/iostats.rst
 
 
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
index 36258bc1b473..614874e2cf53 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
@@ -1,4 +1,4 @@
-What:		/sys/bus/coresight/devices/<memory_map>.etm/enable_source
+What:		/sys/bus/coresight/devices/etm<N>/enable_source
 Date:		April 2015
 KernelVersion:  4.01
 Contact:        Mathieu Poirier <mathieu.poirier@linaro.org>
@@ -8,82 +8,82 @@ Description:	(RW) Enable/disable tracing on this specific trace entiry.
 		of coresight components linking the source to the sink is
 		configured and managed automatically by the coresight framework.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/cpu
+What:		/sys/bus/coresight/devices/etm<N>/cpu
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) The CPU this tracing entity is associated with.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/nr_pe_cmp
+What:		/sys/bus/coresight/devices/etm<N>/nr_pe_cmp
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Indicates the number of PE comparator inputs that are
 		available for tracing.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/nr_addr_cmp
+What:		/sys/bus/coresight/devices/etm<N>/nr_addr_cmp
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Indicates the number of address comparator pairs that are
 		available for tracing.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/nr_cntr
+What:		/sys/bus/coresight/devices/etm<N>/nr_cntr
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Indicates the number of counters that are available for
 		tracing.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/nr_ext_inp
+What:		/sys/bus/coresight/devices/etm<N>/nr_ext_inp
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Indicates how many external inputs are implemented.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/numcidc
+What:		/sys/bus/coresight/devices/etm<N>/numcidc
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Indicates the number of Context ID comparators that are
 		available for tracing.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/numvmidc
+What:		/sys/bus/coresight/devices/etm<N>/numvmidc
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Indicates the number of VMID comparators that are available
 		for tracing.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/nrseqstate
+What:		/sys/bus/coresight/devices/etm<N>/nrseqstate
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Indicates the number of sequencer states that are
 		implemented.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/nr_resource
+What:		/sys/bus/coresight/devices/etm<N>/nr_resource
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Indicates the number of resource selection pairs that are
 		available for tracing.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/nr_ss_cmp
+What:		/sys/bus/coresight/devices/etm<N>/nr_ss_cmp
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Indicates the number of single-shot comparator controls that
 		are available for tracing.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/reset
+What:		/sys/bus/coresight/devices/etm<N>/reset
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(W) Cancels all configuration on a trace unit and set it back
 		to its boot configuration.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mode
+What:		/sys/bus/coresight/devices/etm<N>/mode
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
@@ -91,302 +91,349 @@ Description: 	(RW) Controls various modes supported by this ETM, for example
 		P0 instruction tracing, branch broadcast, cycle counting and
 		context ID tracing.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/pe
+What:		/sys/bus/coresight/devices/etm<N>/pe
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Controls which PE to trace.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/event
+What:		/sys/bus/coresight/devices/etm<N>/event
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Controls the tracing of arbitrary events from bank 0 to 3.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/event_instren
+What:		/sys/bus/coresight/devices/etm<N>/event_instren
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Controls the behavior of the events in bank 0 to 3.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/event_ts
+What:		/sys/bus/coresight/devices/etm<N>/event_ts
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Controls the insertion of global timestamps in the trace
 		streams.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/syncfreq
+What:		/sys/bus/coresight/devices/etm<N>/syncfreq
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Controls how often trace synchronization requests occur.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/cyc_threshold
+What:		/sys/bus/coresight/devices/etm<N>/cyc_threshold
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Sets the threshold value for cycle counting.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/bb_ctrl
+What:		/sys/bus/coresight/devices/etm<N>/bb_ctrl
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Controls which regions in the memory map are enabled to
 		use branch broadcasting.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/event_vinst
+What:		/sys/bus/coresight/devices/etm<N>/event_vinst
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Controls instruction trace filtering.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/s_exlevel_vinst
+What:		/sys/bus/coresight/devices/etm<N>/s_exlevel_vinst
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) In Secure state, each bit controls whether instruction
 		tracing is enabled for the corresponding exception level.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/ns_exlevel_vinst
+What:		/sys/bus/coresight/devices/etm<N>/ns_exlevel_vinst
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) In non-secure state, each bit controls whether instruction
 		tracing is enabled for the corresponding exception level.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/addr_idx
+What:		/sys/bus/coresight/devices/etm<N>/addr_idx
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Select which address comparator or pair (of comparators) to
 		work with.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/addr_instdatatype
+What:		/sys/bus/coresight/devices/etm<N>/addr_instdatatype
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Controls what type of comparison the trace unit performs.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/addr_single
+What:		/sys/bus/coresight/devices/etm<N>/addr_single
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Used to setup single address comparator values.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/addr_range
+What:		/sys/bus/coresight/devices/etm<N>/addr_range
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Used to setup address range comparator values.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/seq_idx
+What:		/sys/bus/coresight/devices/etm<N>/seq_idx
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Select which sequensor.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/seq_state
+What:		/sys/bus/coresight/devices/etm<N>/seq_state
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Use this to set, or read, the sequencer state.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/seq_event
+What:		/sys/bus/coresight/devices/etm<N>/seq_event
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Moves the sequencer state to a specific state.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/seq_reset_event
+What:		/sys/bus/coresight/devices/etm<N>/seq_reset_event
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Moves the sequencer to state 0 when a programmed event
 		occurs.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/cntr_idx
+What:		/sys/bus/coresight/devices/etm<N>/cntr_idx
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Select which counter unit to work with.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/cntrldvr
+What:		/sys/bus/coresight/devices/etm<N>/cntrldvr
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) This sets or returns the reload count value of the
 		specific counter.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/cntr_val
+What:		/sys/bus/coresight/devices/etm<N>/cntr_val
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) This sets or returns the current count value of the
                 specific counter.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/cntr_ctrl
+What:		/sys/bus/coresight/devices/etm<N>/cntr_ctrl
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Controls the operation of the selected counter.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/res_idx
+What:		/sys/bus/coresight/devices/etm<N>/res_idx
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Select which resource selection unit to work with.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/res_ctrl
+What:		/sys/bus/coresight/devices/etm<N>/res_ctrl
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description: 	(RW) Controls the selection of the resources in the trace unit.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/ctxid_idx
+What:		/sys/bus/coresight/devices/etm<N>/ctxid_idx
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(RW) Select which context ID comparator to work with.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/ctxid_pid
+What:		/sys/bus/coresight/devices/etm<N>/ctxid_pid
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(RW) Get/Set the context ID comparator value to trigger on.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/ctxid_masks
+What:		/sys/bus/coresight/devices/etm<N>/ctxid_masks
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(RW) Mask for all 8 context ID comparator value
 		registers (if implemented).
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/vmid_idx
+What:		/sys/bus/coresight/devices/etm<N>/vmid_idx
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(RW) Select which virtual machine ID comparator to work with.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/vmid_val
+What:		/sys/bus/coresight/devices/etm<N>/vmid_val
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(RW) Get/Set the virtual machine ID comparator value to
 		trigger on.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/vmid_masks
+What:		/sys/bus/coresight/devices/etm<N>/vmid_masks
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(RW) Mask for all 8 virtual machine ID comparator value
 		registers (if implemented).
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcoslsr
+What:		/sys/bus/coresight/devices/etm<N>/addr_exlevel_s_ns
+Date:		December 2019
+KernelVersion:	5.5
+Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
+Description:	(RW) Set the Exception Level matching bits for secure and
+		non-secure exception levels.
+
+What:		/sys/bus/coresight/devices/etm<N>/vinst_pe_cmp_start_stop
+Date:		December 2019
+KernelVersion:	5.5
+Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
+Description:	(RW) Access the start stop control register for PE input
+		comparators.
+
+What:		/sys/bus/coresight/devices/etm<N>/addr_cmp_view
+Date:		December 2019
+KernelVersion:	5.5
+Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
+Description:	(R) Print the current settings for the selected address
+		comparator.
+
+What:		/sys/bus/coresight/devices/etm<N>/sshot_idx
+Date:		December 2019
+KernelVersion:	5.5
+Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
+Description:	(RW) Select the single shot control register to access.
+
+What:		/sys/bus/coresight/devices/etm<N>/sshot_ctrl
+Date:		December 2019
+KernelVersion:	5.5
+Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
+Description:	(RW) Access the selected single shot control register.
+
+What:		/sys/bus/coresight/devices/etm<N>/sshot_status
+Date:		December 2019
+KernelVersion:	5.5
+Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
+Description:	(R) Print the current value of the selected single shot
+		status register.
+
+What:		/sys/bus/coresight/devices/etm<N>/sshot_pe_ctrl
+Date:		December 2019
+KernelVersion:	5.5
+Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
+Description:	(RW) Access the selected single show PE comparator control
+		register.
+
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trcoslsr
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the OS Lock Status Register (0x304).
 		The value it taken directly  from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpdcr
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trcpdcr
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the Power Down Control Register
 		(0x310).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpdsr
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trcpdsr
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the Power Down Status Register
 		(0x314).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trclsr
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trclsr
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the SW Lock Status Register
 		(0xFB4).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcauthstatus
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trcauthstatus
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the Authentication Status Register
 		(0xFB8).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcdevid
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trcdevid
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the Device ID Register
 		(0xFC8).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcdevtype
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trcdevtype
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the Device Type Register
 		(0xFCC).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpidr0
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trcpidr0
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the Peripheral ID0 Register
 		(0xFE0).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpidr1
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trcpidr1
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the Peripheral ID1 Register
 		(0xFE4).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpidr2
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trcpidr2
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the Peripheral ID2 Register
 		(0xFE8).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpidr3
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trcpidr3
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the Peripheral ID3 Register
 		(0xFEC).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcconfig
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trcconfig
 Date:		February 2016
 KernelVersion:	4.07
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the trace configuration register
 		(0x010) as currently set by SW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/mgmt/trctraceid
+What:		/sys/bus/coresight/devices/etm<N>/mgmt/trctraceid
 Date:		February 2016
 KernelVersion:	4.07
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Print the content of the trace ID register (0x040).
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr0
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr0
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Returns the tracing capabilities of the trace unit (0x1E0).
 		The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr1
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr1
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Returns the tracing capabilities of the trace unit (0x1E4).
 		The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr2
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr2
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
@@ -394,7 +441,7 @@ Description:	(R) Returns the maximum size of the data value, data address,
 		VMID, context ID and instuction address in the trace unit
 		(0x1E8).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr3
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr3
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
@@ -403,42 +450,42 @@ Description:	(R) Returns the value associated with various resources
 		architecture specification for more details (0x1E8).
 		The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr4
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr4
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Returns how many resources the trace unit supports (0x1F0).
 		The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr5
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr5
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Returns how many resources the trace unit supports (0x1F4).
 		The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr8
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr8
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Returns the maximum speculation depth of the instruction
 		trace stream. (0x180).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr9
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr9
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Returns the number of P0 right-hand keys that the trace unit
 		can use (0x184).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr10
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr10
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:	(R) Returns the number of P1 right-hand keys that the trace unit
 		can use (0x188).  The value is taken directly from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr11
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr11
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
@@ -446,7 +493,7 @@ Description:	(R) Returns the number of special P1 right-hand keys that the
 		trace unit can use (0x18C).  The value is taken directly from
 		the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr12
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr12
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
@@ -454,7 +501,7 @@ Description:	(R) Returns the number of conditional P1 right-hand keys that
 		the trace unit can use (0x190).  The value is taken directly
 		from the HW.
 
-What:		/sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr13
+What:		/sys/bus/coresight/devices/etm<N>/trcidr/trcidr13
 Date:		April 2015
 KernelVersion:	4.01
 Contact:	Mathieu Poirier <mathieu.poirier@linaro.org>
diff --git a/Documentation/ABI/testing/sysfs-bus-fsi b/Documentation/ABI/testing/sysfs-bus-fsi
index 57c806350d6c..320697bdf41d 100644
--- a/Documentation/ABI/testing/sysfs-bus-fsi
+++ b/Documentation/ABI/testing/sysfs-bus-fsi
@@ -1,25 +1,25 @@
-What:           /sys/bus/platform/devices/fsi-master/rescan
+What:           /sys/bus/platform/devices/../fsi-master/fsi0/rescan
 Date:		May 2017
 KernelVersion:  4.12
-Contact:        cbostic@linux.vnet.ibm.com
+Contact:        linux-fsi@lists.ozlabs.org
 Description:
                 Initiates a FSI master scan for all connected slave devices
 		on its links.
 
-What:           /sys/bus/platform/devices/fsi-master/break
+What:           /sys/bus/platform/devices/../fsi-master/fsi0/break
 Date:		May 2017
 KernelVersion:  4.12
-Contact:        cbostic@linux.vnet.ibm.com
+Contact:        linux-fsi@lists.ozlabs.org
 Description:
 		Sends an FSI BREAK command on a master's communication
 		link to any connnected slaves.  A BREAK resets connected
 		device's logic and preps it to receive further commands
 		from the master.
 
-What:           /sys/bus/platform/devices/fsi-master/slave@00:00/term
+What:           /sys/bus/platform/devices/../fsi-master/fsi0/slave@00:00/term
 Date:		May 2017
 KernelVersion:  4.12
-Contact:        cbostic@linux.vnet.ibm.com
+Contact:        linux-fsi@lists.ozlabs.org
 Description:
 		Sends an FSI terminate command from the master to its
 		connected slave. A terminate resets the slave's state machines
@@ -29,10 +29,10 @@ Description:
 		ongoing operation in case of an expired 'Master Time Out'
 		timer.
 
-What:           /sys/bus/platform/devices/fsi-master/slave@00:00/raw
+What:           /sys/bus/platform/devices/../fsi-master/fsi0/slave@00:00/raw
 Date:		May 2017
 KernelVersion:  4.12
-Contact:        cbostic@linux.vnet.ibm.com
+Contact:        linux-fsi@lists.ozlabs.org
 Description:
 		Provides a means of reading/writing a 32 bit value from/to a
 		specified FSI bus address.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 680451695422..d3e53a6d8331 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -753,6 +753,8 @@ What:		/sys/.../events/in_illuminance0_thresh_falling_value
 what:		/sys/.../events/in_illuminance0_thresh_rising_value
 what:		/sys/.../events/in_proximity0_thresh_falling_value
 what:		/sys/.../events/in_proximity0_thresh_rising_value
+What:		/sys/.../events/in_illuminance_thresh_rising_value
+What:		/sys/.../events/in_illuminance_thresh_falling_value
 KernelVersion:	2.6.37
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -972,6 +974,7 @@ What:		/sys/.../events/in_activity_jogging_thresh_rising_period
 What:		/sys/.../events/in_activity_jogging_thresh_falling_period
 What:		/sys/.../events/in_activity_running_thresh_rising_period
 What:		/sys/.../events/in_activity_running_thresh_falling_period
+What:		/sys/.../events/in_illuminance_thresh_either_period
 KernelVersion:	2.6.37
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -1715,3 +1718,24 @@ Description:
 		Mass concentration reading of particulate matter in ug / m3.
 		pmX consists of particles with aerodynamic diameter less or
 		equal to X micrometers.
+
+What:		/sys/bus/iio/devices/iio:deviceX/events/in_illuminance_period_available
+Date:		November 2019
+KernelVersion:	5.4
+Contact:	linux-iio@vger.kernel.org
+Description:
+		List of valid periods (in seconds) for which the light intensity
+		must be above the threshold level before interrupt is asserted.
+
+What:		/sys/bus/iio/devices/iio:deviceX/in_filter_notch_center_frequency
+KernelVersion:	5.5
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Center frequency in Hz for a notch filter. Used i.e. for line
+		noise suppression.
+
+What:		/sys/bus/iio/devices/iio:deviceX/in_temp_thermocouple_type
+KernelVersion:	5.5
+Contact:	linux-iio@vger.kernel.org
+Description:
+		One of the following thermocouple types: B, E, J, K, N, R, S, T.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192 b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192
new file mode 100644
index 000000000000..7627d3be08f5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192
@@ -0,0 +1,39 @@
+What:		/sys/bus/iio/devices/iio:deviceX/ac_excitation_en
+KernelVersion:
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Reading gives the state of AC excitation.
+		Writing '1' enables AC excitation.
+
+What:		/sys/bus/iio/devices/iio:deviceX/bridge_switch_en
+KernelVersion:
+Contact:	linux-iio@vger.kernel.org
+Description:
+		This bridge switch is used to disconnect it when there is a
+		need to minimize the system current consumption.
+		Reading gives the state of the bridge switch.
+		Writing '1' enables the bridge switch.
+
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration
+KernelVersion:
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Initiates the system calibration procedure. This is done on a
+		single channel at a time. Write '1' to start the calibration.
+
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration_mode_available
+KernelVersion:
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Reading returns a list with the possible calibration modes.
+		There are two available options:
+		"zero_scale" - calibrate to zero scale
+		"full_scale" - calibrate to full scale
+
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration_mode
+KernelVersion:
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Sets up the calibration mode used in the system calibration
+		procedure. Reading returns the current calibration mode.
+		Writing sets the system calibration mode.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-dma-buffer b/Documentation/ABI/testing/sysfs-bus-iio-dma-buffer
new file mode 100644
index 000000000000..d526e6571001
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-dma-buffer
@@ -0,0 +1,19 @@
+What:		/sys/bus/iio/devices/iio:deviceX/buffer/length_align_bytes
+KernelVersion:	5.4
+Contact:	linux-iio@vger.kernel.org
+Description:
+		DMA buffers tend to have a alignment requirement for the
+		buffers. If this alignment requirement is not met samples might
+		be dropped from the buffer.
+
+		This property reports the alignment requirements in bytes.
+		This means that the buffer size in bytes needs to be a integer
+		multiple of the number reported by this file.
+
+		The alignment requirements in number of sample sets will depend
+		on the enabled channels and the bytes per channel. This means
+		that the alignment requirement in samples sets might change
+		depending on which and how many channels are enabled. Whereas
+		the alignment requirement reported in bytes by this property
+		will remain static and does not depend on which channels are
+		enabled.
diff --git a/Documentation/ABI/testing/sysfs-bus-mdio b/Documentation/ABI/testing/sysfs-bus-mdio
new file mode 100644
index 000000000000..da86efc7781b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-mdio
@@ -0,0 +1,63 @@
+What:          /sys/bus/mdio_bus/devices/.../statistics/
+Date:          January 2020
+KernelVersion: 5.6
+Contact:       netdev@vger.kernel.org
+Description:
+		This folder contains statistics about global and per
+		MDIO bus address statistics.
+
+What:          /sys/bus/mdio_bus/devices/.../statistics/transfers
+Date:          January 2020
+KernelVersion: 5.6
+Contact:       netdev@vger.kernel.org
+Description:
+		Total number of transfers for this MDIO bus.
+
+What:          /sys/bus/mdio_bus/devices/.../statistics/errors
+Date:          January 2020
+KernelVersion: 5.6
+Contact:       netdev@vger.kernel.org
+Description:
+		Total number of transfer errors for this MDIO bus.
+
+What:          /sys/bus/mdio_bus/devices/.../statistics/writes
+Date:          January 2020
+KernelVersion: 5.6
+Contact:       netdev@vger.kernel.org
+Description:
+		Total number of write transactions for this MDIO bus.
+
+What:          /sys/bus/mdio_bus/devices/.../statistics/reads
+Date:          January 2020
+KernelVersion: 5.6
+Contact:       netdev@vger.kernel.org
+Description:
+		Total number of read transactions for this MDIO bus.
+
+What:          /sys/bus/mdio_bus/devices/.../statistics/transfers_<addr>
+Date:          January 2020
+KernelVersion: 5.6
+Contact:       netdev@vger.kernel.org
+Description:
+		Total number of transfers for this MDIO bus address.
+
+What:          /sys/bus/mdio_bus/devices/.../statistics/errors_<addr>
+Date:          January 2020
+KernelVersion: 5.6
+Contact:       netdev@vger.kernel.org
+Description:
+		Total number of transfer errors for this MDIO bus address.
+
+What:          /sys/bus/mdio_bus/devices/.../statistics/writes_<addr>
+Date:          January 2020
+KernelVersion: 5.6
+Contact:       netdev@vger.kernel.org
+Description:
+		Total number of write transactions for this MDIO bus address.
+
+What:          /sys/bus/mdio_bus/devices/.../statistics/reads_<addr>
+Date:          January 2020
+KernelVersion: 5.6
+Contact:       netdev@vger.kernel.org
+Description:
+		Total number of read transactions for this MDIO bus address.
diff --git a/Documentation/ABI/testing/sysfs-bus-mei b/Documentation/ABI/testing/sysfs-bus-mei
index 6bd45346ac7e..3d37e2796d5a 100644
--- a/Documentation/ABI/testing/sysfs-bus-mei
+++ b/Documentation/ABI/testing/sysfs-bus-mei
@@ -4,7 +4,7 @@ KernelVersion:	3.10
 Contact:	Samuel Ortiz <sameo@linux.intel.com>
 		linux-mei@linux.intel.com
 Description:	Stores the same MODALIAS value emitted by uevent
-		Format: mei:<mei device name>:<device uuid>:
+		Format: mei:<mei device name>:<device uuid>:<protocol version>
 
 What:		/sys/bus/mei/devices/.../name
 Date:		May 2015
@@ -26,3 +26,24 @@ KernelVersion:	4.3
 Contact:	Tomas Winkler <tomas.winkler@intel.com>
 Description:	Stores mei client protocol version
 		Format: %d
+
+What:		/sys/bus/mei/devices/.../max_conn
+Date:		Nov 2019
+KernelVersion:	5.5
+Contact:	Tomas Winkler <tomas.winkler@intel.com>
+Description:	Stores mei client maximum number of connections
+		Format: %d
+
+What:		/sys/bus/mei/devices/.../fixed
+Date:		Nov 2019
+KernelVersion:	5.5
+Contact:	Tomas Winkler <tomas.winkler@intel.com>
+Description:	Stores mei client fixed address, if any
+		Format: %d
+
+What:		/sys/bus/mei/devices/.../max_len
+Date:		Nov 2019
+KernelVersion:	5.5
+Contact:	Tomas Winkler <tomas.winkler@intel.com>
+Description:	Stores mei client maximum message length
+		Format: %d
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 8bfee557e50e..450296cc7948 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -347,3 +347,16 @@ Description:
 		If the device has any Peer-to-Peer memory registered, this
 	        file contains a '1' if the memory has been published for
 		use outside the driver that owns the device.
+
+What:		/sys/bus/pci/devices/.../link/clkpm
+		/sys/bus/pci/devices/.../link/l0s_aspm
+		/sys/bus/pci/devices/.../link/l1_aspm
+		/sys/bus/pci/devices/.../link/l1_1_aspm
+		/sys/bus/pci/devices/.../link/l1_2_aspm
+		/sys/bus/pci/devices/.../link/l1_1_pcipm
+		/sys/bus/pci/devices/.../link/l1_2_pcipm
+Date:		October 2019
+Contact:	Heiner Kallweit <hkallweit1@gmail.com>
+Description:	If ASPM is supported for an endpoint, these files can be
+		used to disable or enable the individual power management
+		states. Write y/1/on to enable, n/0/off to disable.
diff --git a/Documentation/ABI/testing/sysfs-bus-thunderbolt b/Documentation/ABI/testing/sysfs-bus-thunderbolt
index b21fba14689b..82e80de78dd0 100644
--- a/Documentation/ABI/testing/sysfs-bus-thunderbolt
+++ b/Documentation/ABI/testing/sysfs-bus-thunderbolt
@@ -80,6 +80,14 @@ Contact:	thunderbolt-software@lists.01.org
 Description:	This attribute contains 1 if Thunderbolt device was already
 		authorized on boot and 0 otherwise.
 
+What: /sys/bus/thunderbolt/devices/.../generation
+Date:		Jan 2020
+KernelVersion:	5.5
+Contact:	Christian Kellner <christian@kellner.me>
+Description:	This attribute contains the generation of the Thunderbolt
+		controller associated with the device. It will contain 4
+		for USB4.
+
 What: /sys/bus/thunderbolt/devices/.../key
 Date:		Sep 2017
 KernelVersion:	4.13
@@ -104,6 +112,34 @@ Contact:	thunderbolt-software@lists.01.org
 Description:	This attribute contains name of this device extracted from
 		the device DROM.
 
+What:		/sys/bus/thunderbolt/devices/.../rx_speed
+Date:		Jan 2020
+KernelVersion:	5.5
+Contact:	Mika Westerberg <mika.westerberg@linux.intel.com>
+Description:	This attribute reports the device RX speed per lane.
+		All RX lanes run at the same speed.
+
+What:		/sys/bus/thunderbolt/devices/.../rx_lanes
+Date:		Jan 2020
+KernelVersion:	5.5
+Contact:	Mika Westerberg <mika.westerberg@linux.intel.com>
+Description:	This attribute reports number of RX lanes the device is
+		using simultaneusly through its upstream port.
+
+What:		/sys/bus/thunderbolt/devices/.../tx_speed
+Date:		Jan 2020
+KernelVersion:	5.5
+Contact:	Mika Westerberg <mika.westerberg@linux.intel.com>
+Description:	This attribute reports the TX speed per lane.
+		All TX lanes run at the same speed.
+
+What:		/sys/bus/thunderbolt/devices/.../tx_lanes
+Date:		Jan 2020
+KernelVersion:	5.5
+Contact:	Mika Westerberg <mika.westerberg@linux.intel.com>
+Description:	This attribute reports number of TX lanes the device is
+		using simultaneusly through its upstream port.
+
 What:		/sys/bus/thunderbolt/devices/.../vendor
 Date:		Sep 2017
 KernelVersion:	4.13
diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
index 01196e19afca..9758eb85ade3 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -7,6 +7,13 @@ Description:
 		The name of devfreq object denoted as ... is same as the
 		name of device using devfreq.
 
+What:		/sys/class/devfreq/.../name
+Date:		November 2019
+Contact:	Chanwoo Choi <cw00.choi@samsung.com>
+Description:
+		The /sys/class/devfreq/.../name shows the name of device
+		of the corresponding devfreq object.
+
 What:		/sys/class/devfreq/.../governor
 Date:		September 2011
 Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
@@ -48,12 +55,15 @@ What:		/sys/class/devfreq/.../trans_stat
 Date:		October 2012
 Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
 Description:
-		This ABI shows the statistics of devfreq behavior on a
-		specific device. It shows the time spent in each state and
-		the number of transitions between states.
+		This ABI shows or clears the statistics of devfreq behavior
+		on a specific device. It shows the time spent in each state
+		and the number of transitions between states.
 		In order to activate this ABI, the devfreq target device
 		driver should provide the list of available frequencies
-		with its profile.
+		with its profile. If need to reset the statistics of devfreq
+		behavior on a specific device, enter 0(zero) to 'trans_stat'
+		as following:
+			echo 0 > /sys/class/devfreq/.../trans_stat
 
 What:		/sys/class/devfreq/.../userspace/set_freq
 Date:		September 2011
diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-el15203000 b/Documentation/ABI/testing/sysfs-class-led-driver-el15203000
new file mode 100644
index 000000000000..f520ece9b64c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-driver-el15203000
@@ -0,0 +1,139 @@
+What:		/sys/class/leds/<led>/hw_pattern
+Date:		September 2019
+KernelVersion:	5.5
+Description:
+		Specify a hardware pattern for the EL15203000 LED.
+		The LEDs board supports only predefined patterns by firmware
+		for specific LEDs.
+
+		Breathing mode for Screen frame light tube:
+		"0 4000 1 4000"
+
+		    ^
+		    |
+		Max-|     ---
+		    |    /   \
+		    |   /     \
+		    |  /       \     /
+		    | /         \   /
+		Min-|-           ---
+		    |
+		    0------4------8--> time (sec)
+
+		Cascade mode for Pipe LED:
+		"1 800 2 800 4 800 8 800 16 800"
+
+		      ^
+		      |
+		0 On -|----+                   +----+                   +---
+		      |    |                   |    |                   |
+		  Off-|    +-------------------+    +-------------------+
+		      |
+		1 On -|    +----+                   +----+
+		      |    |    |                   |    |
+		  Off |----+    +-------------------+    +------------------
+		      |
+		2 On -|         +----+                   +----+
+		      |         |    |                   |    |
+		  Off-|---------+    +-------------------+    +-------------
+		      |
+		3 On -|              +----+                   +----+
+		      |              |    |                   |    |
+		  Off-|--------------+    +-------------------+    +--------
+		      |
+		4 On -|                   +----+                   +----+
+		      |                   |    |                   |    |
+		  Off-|-------------------+    +-------------------+    +---
+		      |
+		      0---0.8--1.6--2.4--3.2---4---4.8--5.6--6.4--7.2---8--> time (sec)
+
+		Inverted cascade mode for Pipe LED:
+		"30 800 29 800 27 800 23 800 15 800"
+
+		      ^
+		      |
+		0 On -|    +-------------------+    +-------------------+
+		      |    |                   |    |                   |
+		  Off-|----+                   +----+                   +---
+		      |
+		1 On -|----+    +-------------------+    +------------------
+		      |    |    |                   |    |
+		  Off |    +----+                   +----+
+		      |
+		2 On -|---------+    +-------------------+    +-------------
+		      |         |    |                   |    |
+		  Off-|         +----+                   +----+
+		      |
+		3 On -|--------------+    +-------------------+    +--------
+		      |              |    |                   |    |
+		  Off-|              +----+                   +----+
+		      |
+		4 On -|-------------------+    +-------------------+    +---
+		      |                   |    |                   |    |
+		  Off-|                   +----+                   +----+
+		      |
+		      0---0.8--1.6--2.4--3.2---4---4.8--5.6--6.4--7.2---8--> time (sec)
+
+		Bounce mode for Pipe LED:
+		"1 800 2 800 4 800 8 800 16 800 16 800 8 800 4 800 2 800 1 800"
+
+		      ^
+		      |
+		0 On -|----+                                       +--------
+		      |    |                                       |
+		  Off-|    +---------------------------------------+
+		      |
+		1 On -|    +----+                             +----+
+		      |    |    |                             |    |
+		  Off |----+    +-----------------------------+    +--------
+		      |
+		2 On -|         +----+                   +----+
+		      |         |    |                   |    |
+		  Off-|---------+    +-------------------+    +-------------
+		      |
+		3 On -|              +----+         +----+
+		      |              |    |         |    |
+		  Off-|--------------+    +---------+    +------------------
+		      |
+		4 On -|                   +---------+
+		      |                   |         |
+		  Off-|-------------------+         +-----------------------
+		      |
+		      0---0.8--1.6--2.4--3.2---4---4.8--5.6--6.4--7.2---8--> time (sec)
+
+		Inverted bounce mode for Pipe LED:
+		"30 800 29 800 27 800 23 800 15 800 15 800 23 800 27 800 29 800 30 800"
+
+		      ^
+		      |
+		0 On -|    +---------------------------------------+
+		      |    |                                       |
+		  Off-|----+                                       +--------
+		      |
+		1 On -|----+    +-----------------------------+    +--------
+		      |    |    |                             |    |
+		  Off |    +----+                             +----+
+		      |
+		2 On -|---------+    +-------------------+    +-------------
+		      |         |    |                   |    |
+		  Off-|         +----+                   +----+
+		      |
+		3 On -|--------------+    +---------+    +------------------
+		      |              |    |         |    |
+		  Off-|              +----+         +----+
+		      |
+		4 On -|-------------------+         +-----------------------
+		      |                   |         |
+		  Off-|                   +---------+
+		      |
+		      0---0.8--1.6--2.4--3.2---4---4.8--5.6--6.4--7.2---8--> time (sec)
+
+What:		/sys/class/leds/<led>/repeat
+Date:		September 2019
+KernelVersion:	5.5
+Description:
+		EL15203000 supports only indefinitely patterns,
+		so this file should always store -1.
+
+		For more info, please see:
+		Documentation/ABI/testing/sysfs-class-led-trigger-pattern
diff --git a/Documentation/ABI/testing/sysfs-class-mei b/Documentation/ABI/testing/sysfs-class-mei
index a92d844f806e..e9dc110650ae 100644
--- a/Documentation/ABI/testing/sysfs-class-mei
+++ b/Documentation/ABI/testing/sysfs-class-mei
@@ -80,3 +80,13 @@ Description:	Display the ME device state.
 		DISABLED
 		POWER_DOWN
 		POWER_UP
+
+What:		/sys/class/mei/meiN/trc
+Date:		Nov 2019
+KernelVersion:	5.5
+Contact:	Tomas Winkler <tomas.winkler@intel.com>
+Description:	Display trc status register content
+
+		The ME FW writes Glitch Detection HW (TRC)
+		status information into trc status register
+		for BIOS and OS to monitor fw health.
diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics
index 397118de7b5e..55db27815361 100644
--- a/Documentation/ABI/testing/sysfs-class-net-statistics
+++ b/Documentation/ABI/testing/sysfs-class-net-statistics
@@ -51,6 +51,14 @@ Description:
 		packet processing. See the network driver for the exact
 		meaning of this value.
 
+What:		/sys/class/<iface>/statistics/rx_errors
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of receive errors on this network device.
+		See the network driver for the exact meaning of this value.
+
 What:		/sys/class/<iface>/statistics/rx_fifo_errors
 Date:		April 2005
 KernelVersion:	2.6.12
@@ -88,6 +96,14 @@ Description:
 		due to lack of capacity in the receive side. See the network
 		driver for the exact meaning of this value.
 
+What:		/sys/class/<iface>/statistics/rx_nohandler
+Date:		February 2016
+KernelVersion:	4.6
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the number of received packets that were dropped on
+		an inactive device by the network core.
+
 What:		/sys/class/<iface>/statistics/rx_over_errors
 Date:		April 2005
 KernelVersion:	2.6.12
diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
index 27edc06e2495..bf3b48f022dc 100644
--- a/Documentation/ABI/testing/sysfs-class-power
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -189,7 +189,8 @@ Description:
 		Access: Read
 		Valid values: "Unknown", "Good", "Overheat", "Dead",
 			      "Over voltage", "Unspecified failure", "Cold",
-			      "Watchdog timer expire", "Safety timer expire"
+			      "Watchdog timer expire", "Safety timer expire",
+			      "Over current"
 
 What:		/sys/class/power_supply/<supply_name>/precharge_current
 Date:		June 2017
diff --git a/Documentation/ABI/testing/sysfs-class-watchdog b/Documentation/ABI/testing/sysfs-class-watchdog
index 675f9b537661..9860a8b2ba75 100644
--- a/Documentation/ABI/testing/sysfs-class-watchdog
+++ b/Documentation/ABI/testing/sysfs-class-watchdog
@@ -17,8 +17,13 @@ What:		/sys/class/watchdog/watchdogn/nowayout
 Date:		August 2015
 Contact:	Wim Van Sebroeck <wim@iguana.be>
 Description:
-		It is a read only file. While reading, it gives '1' if that
-		device supports nowayout feature else, it gives '0'.
+		It is a read/write file. While reading, it gives '1'
+		if the device has the nowayout feature set, otherwise
+		it gives '0'. Writing a '1' to the file enables the
+		nowayout feature. Once set, the nowayout feature
+		cannot be disabled, so writing a '0' either has no
+		effect (if the feature was already disabled) or
+		results in a permission error.
 
 What:		/sys/class/watchdog/watchdogn/state
 Date:		August 2015
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index fc20cde63d1e..2e0e3b45d02a 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -196,6 +196,12 @@ Description:
 		does not reflect it. Likewise, if one enables a deep state but a
 		lighter state still is disabled, then this has no effect.
 
+What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/default_status
+Date:		December 2019
+KernelVersion:	v5.6
+Contact:	Linux power management list <linux-pm@vger.kernel.org>
+Description:
+		(RO) The default status of this state, "enabled" or "disabled".
 
 What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/residency
 Date:		March 2014
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 7ab2b1b5e255..1a6cd5397129 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -1,260 +1,320 @@
 What:		/sys/fs/f2fs/<disk>/gc_max_sleep_time
 Date:		July 2013
 Contact:	"Namjae Jeon" <namjae.jeon@samsung.com>
-Description:
-		 Controls the maximun sleep time for gc_thread. Time
-		 is in milliseconds.
+Description:	Controls the maximum sleep time for gc_thread. Time
+		is in milliseconds.
 
 What:		/sys/fs/f2fs/<disk>/gc_min_sleep_time
 Date:		July 2013
 Contact:	"Namjae Jeon" <namjae.jeon@samsung.com>
-Description:
-		 Controls the minimum sleep time for gc_thread. Time
-		 is in milliseconds.
+Description:	Controls the minimum sleep time for gc_thread. Time
+		is in milliseconds.
 
 What:		/sys/fs/f2fs/<disk>/gc_no_gc_sleep_time
 Date:		July 2013
 Contact:	"Namjae Jeon" <namjae.jeon@samsung.com>
-Description:
-		 Controls the default sleep time for gc_thread. Time
-		 is in milliseconds.
+Description:	Controls the default sleep time for gc_thread. Time
+		is in milliseconds.
 
 What:		/sys/fs/f2fs/<disk>/gc_idle
 Date:		July 2013
 Contact:	"Namjae Jeon" <namjae.jeon@samsung.com>
-Description:
-		 Controls the victim selection policy for garbage collection.
+Description:	Controls the victim selection policy for garbage collection.
+		Setting gc_idle = 0(default) will disable this option. Setting
+		gc_idle = 1 will select the Cost Benefit approach & setting
+		gc_idle = 2 will select the greedy approach.
 
 What:		/sys/fs/f2fs/<disk>/reclaim_segments
 Date:		October 2013
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:	This parameter controls the number of prefree segments to be
+		reclaimed. If the number of prefree segments is larger than
+		the number of segments in the proportion to the percentage
+		over total volume size, f2fs tries to conduct checkpoint to
+		reclaim the prefree segments to free segments.
+		By default, 5% over total # of segments.
+
+What:		/sys/fs/f2fs/<disk>/main_blkaddr
+Date:		November 2019
+Contact:	"Ramon Pantin" <pantin@google.com>
 Description:
-		 Controls the issue rate of segment discard commands.
+		 Shows first block address of MAIN area.
 
 What:		/sys/fs/f2fs/<disk>/ipu_policy
 Date:		November 2013
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
-		 Controls the in-place-update policy.
+Description:	Controls the in-place-update policy.
+		updates in f2fs. User can set:
+		0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR,
+		0x04: F2FS_IPU_UTIL,  0x08: F2FS_IPU_SSR_UTIL,
+		0x10: F2FS_IPU_FSYNC, 0x20: F2FS_IPU_ASYNC,
+		0x40: F2FS_IPU_NOCACHE.
+		Refer segment.h for details.
 
 What:		/sys/fs/f2fs/<disk>/min_ipu_util
 Date:		November 2013
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
-		 Controls the FS utilization condition for the in-place-update
-		 policies.
+Description:	Controls the FS utilization condition for the in-place-update
+		policies. It is used by F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies.
 
 What:		/sys/fs/f2fs/<disk>/min_fsync_blocks
 Date:		September 2014
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
-		 Controls the dirty page count condition for the in-place-update
-		 policies.
+Description:	Controls the dirty page count condition for the in-place-update
+		policies.
 
 What:		/sys/fs/f2fs/<disk>/min_seq_blocks
 Date:		August 2018
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
-		 Controls the dirty page count condition for batched sequential
-		 writes in ->writepages.
-
+Description:	Controls the dirty page count condition for batched sequential
+		writes in writepages.
 
 What:		/sys/fs/f2fs/<disk>/min_hot_blocks
 Date:		March 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
-		 Controls the dirty page count condition for redefining hot data.
+Description:	Controls the dirty page count condition for redefining hot data.
 
 What:		/sys/fs/f2fs/<disk>/min_ssr_sections
 Date:		October 2017
 Contact:	"Chao Yu" <yuchao0@huawei.com>
-Description:
-		 Controls the fee section threshold to trigger SSR allocation.
+Description:	Controls the free section threshold to trigger SSR allocation.
+		If this is large, SSR mode will be enabled early.
 
 What:		/sys/fs/f2fs/<disk>/max_small_discards
 Date:		November 2013
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
-		 Controls the issue rate of small discard commands.
+Description:	Controls the issue rate of discard commands that consist of small
+		blocks less than 2MB. The candidates to be discarded are cached until
+		checkpoint is triggered, and issued during the checkpoint.
+		By default, it is disabled with 0.
 
-What:          /sys/fs/f2fs/<disk>/discard_granularity
-Date:          July 2017
-Contact:       "Chao Yu" <yuchao0@huawei.com>
-Description:
-		Controls discard granularity of inner discard thread, inner thread
+What:		/sys/fs/f2fs/<disk>/discard_granularity
+Date:		July 2017
+Contact:	"Chao Yu" <yuchao0@huawei.com>
+Description:	Controls discard granularity of inner discard thread. Inner thread
 		will not issue discards with size that is smaller than granularity.
-		The unit size is one block, now only support configuring in range
-		of [1, 512].
+		The unit size is one block(4KB), now only support configuring
+		in range of [1, 512]. Default value is 4(=16KB).
 
-What:          /sys/fs/f2fs/<disk>/umount_discard_timeout
-Date:          January 2019
-Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
-		Set timeout to issue discard commands during umount.
-		Default: 5 secs
+What:		/sys/fs/f2fs/<disk>/umount_discard_timeout
+Date:		January 2019
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:	Set timeout to issue discard commands during umount.
+	        Default: 5 secs
 
 What:		/sys/fs/f2fs/<disk>/max_victim_search
 Date:		January 2014
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
-		 Controls the number of trials to find a victim segment.
+Description:	Controls the number of trials to find a victim segment
+		when conducting SSR and cleaning operations. The default value
+		is 4096 which covers 8GB block address range.
 
 What:		/sys/fs/f2fs/<disk>/migration_granularity
 Date:		October 2018
 Contact:	"Chao Yu" <yuchao0@huawei.com>
-Description:
-		 Controls migration granularity of garbage collection on large
-		 section, it can let GC move partial segment{s} of one section
-		 in one GC cycle, so that dispersing heavy overhead GC to
-		 multiple lightweight one.
+Description:	Controls migration granularity of garbage collection on large
+		section, it can let GC move partial segment{s} of one section
+		in one GC cycle, so that dispersing heavy overhead GC to
+		multiple lightweight one.
 
 What:		/sys/fs/f2fs/<disk>/dir_level
 Date:		March 2014
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
-		 Controls the directory level for large directory.
+Description:	Controls the directory level for large directory. If a
+		directory has a number of files, it can reduce the file lookup
+		latency by increasing this dir_level value. Otherwise, it
+		needs to decrease this value to reduce the space overhead.
+		The default value is 0.
 
 What:		/sys/fs/f2fs/<disk>/ram_thresh
 Date:		March 2014
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
-		 Controls the memory footprint used by f2fs.
+Description:	Controls the memory footprint used by free nids and cached
+		nat entries. By default, 1 is set, which indicates
+		10 MB / 1 GB RAM.
 
 What:		/sys/fs/f2fs/<disk>/batched_trim_sections
 Date:		February 2015
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
-		 Controls the trimming rate in batch mode.
-		 <deprecated>
+Description:	Controls the trimming rate in batch mode.
+		<deprecated>
 
 What:		/sys/fs/f2fs/<disk>/cp_interval
 Date:		October 2015
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
-		 Controls the checkpoint timing.
+Description:	Controls the checkpoint timing, set to 60 seconds by default.
 
 What:		/sys/fs/f2fs/<disk>/idle_interval
 Date:		January 2016
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
-		 Controls the idle timing for all paths other than
-		 discard and gc path.
+Description:	Controls the idle timing of system, if there is no FS operation
+		during given interval.
+		Set to 5 seconds by default.
 
 What:		/sys/fs/f2fs/<disk>/discard_idle_interval
 Date:		September 2018
 Contact:	"Chao Yu" <yuchao0@huawei.com>
 Contact:	"Sahitya Tummala" <stummala@codeaurora.org>
-Description:
-		 Controls the idle timing for discard path.
+Description:	Controls the idle timing of discard thread given
+		this time interval.
+		Default is 5 secs.
 
 What:		/sys/fs/f2fs/<disk>/gc_idle_interval
 Date:		September 2018
 Contact:	"Chao Yu" <yuchao0@huawei.com>
 Contact:	"Sahitya Tummala" <stummala@codeaurora.org>
-Description:
-		 Controls the idle timing for gc path.
+Description:    Controls the idle timing for gc path. Set to 5 seconds by default.
 
 What:		/sys/fs/f2fs/<disk>/iostat_enable
 Date:		August 2017
 Contact:	"Chao Yu" <yuchao0@huawei.com>
-Description:
-		 Controls to enable/disable IO stat.
+Description:	Controls to enable/disable IO stat.
 
 What:		/sys/fs/f2fs/<disk>/ra_nid_pages
 Date:		October 2015
 Contact:	"Chao Yu" <chao2.yu@samsung.com>
-Description:
-		 Controls the count of nid pages to be readaheaded.
+Description:	Controls the count of nid pages to be readaheaded.
+		When building free nids, F2FS reads NAT blocks ahead for
+		speed up. Default is 0.
 
 What:		/sys/fs/f2fs/<disk>/dirty_nats_ratio
 Date:		January 2016
 Contact:	"Chao Yu" <chao2.yu@samsung.com>
-Description:
-		 Controls dirty nat entries ratio threshold, if current
-		 ratio exceeds configured threshold, checkpoint will
-		 be triggered for flushing dirty nat entries.
+Description:	Controls dirty nat entries ratio threshold, if current
+		ratio exceeds configured threshold, checkpoint will
+		be triggered for flushing dirty nat entries.
 
 What:		/sys/fs/f2fs/<disk>/lifetime_write_kbytes
 Date:		January 2016
 Contact:	"Shuoran Liu" <liushuoran@huawei.com>
-Description:
-		 Shows total written kbytes issued to disk.
+Description:	Shows total written kbytes issued to disk.
 
 What:		/sys/fs/f2fs/<disk>/features
 Date:		July 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
-		 Shows all enabled features in current device.
+Description:	Shows all enabled features in current device.
 
 What:		/sys/fs/f2fs/<disk>/inject_rate
 Date:		May 2016
 Contact:	"Sheng Yong" <shengyong1@huawei.com>
-Description:
-		 Controls the injection rate.
+Description:	Controls the injection rate of arbitrary faults.
 
 What:		/sys/fs/f2fs/<disk>/inject_type
 Date:		May 2016
 Contact:	"Sheng Yong" <shengyong1@huawei.com>
-Description:
-		 Controls the injection type.
+Description:	Controls the injection type of arbitrary faults.
+
+What:		/sys/fs/f2fs/<disk>/dirty_segments
+Date:		October 2017
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:	Shows the number of dirty segments.
 
 What:		/sys/fs/f2fs/<disk>/reserved_blocks
 Date:		June 2017
 Contact:	"Chao Yu" <yuchao0@huawei.com>
-Description:
-		 Controls target reserved blocks in system, the threshold
-		 is soft, it could exceed current available user space.
+Description:	Controls target reserved blocks in system, the threshold
+		is soft, it could exceed current available user space.
 
 What:		/sys/fs/f2fs/<disk>/current_reserved_blocks
 Date:		October 2017
 Contact:	"Yunlong Song" <yunlong.song@huawei.com>
 Contact:	"Chao Yu" <yuchao0@huawei.com>
-Description:
-		 Shows current reserved blocks in system, it may be temporarily
-		 smaller than target_reserved_blocks, but will gradually
-		 increase to target_reserved_blocks when more free blocks are
-		 freed by user later.
+Description:	Shows current reserved blocks in system, it may be temporarily
+		smaller than target_reserved_blocks, but will gradually
+		increase to target_reserved_blocks when more free blocks are
+		freed by user later.
 
 What:		/sys/fs/f2fs/<disk>/gc_urgent
 Date:		August 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
-		 Do background GC agressively
+Description:	Do background GC agressively when set. When gc_urgent = 1,
+		background thread starts to do GC by given gc_urgent_sleep_time
+		interval. It is set to 0 by default.
 
 What:		/sys/fs/f2fs/<disk>/gc_urgent_sleep_time
 Date:		August 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
-		 Controls sleep time of GC urgent mode
+Description:	Controls sleep time of GC urgent mode. Set to 500ms by default.
 
 What:		/sys/fs/f2fs/<disk>/readdir_ra
 Date:		November 2017
 Contact:	"Sheng Yong" <shengyong1@huawei.com>
-Description:
-		 Controls readahead inode block in readdir.
+Description:	Controls readahead inode block in readdir. Enabled by default.
+
+What:		/sys/fs/f2fs/<disk>/gc_pin_file_thresh
+Date:		January 2018
+Contact:	Jaegeuk Kim <jaegeuk@kernel.org>
+Description:	This indicates how many GC can be failed for the pinned
+		file. If it exceeds this, F2FS doesn't guarantee its pinning
+		state. 2048 trials is set by default.
 
 What:		/sys/fs/f2fs/<disk>/extension_list
 Date:		Feburary 2018
 Contact:	"Chao Yu" <yuchao0@huawei.com>
-Description:
-		 Used to control configure extension list:
-		 - Query: cat /sys/fs/f2fs/<disk>/extension_list
-		 - Add: echo '[h/c]extension' > /sys/fs/f2fs/<disk>/extension_list
-		 - Del: echo '[h/c]!extension' > /sys/fs/f2fs/<disk>/extension_list
-		 - [h] means add/del hot file extension
-		 - [c] means add/del cold file extension
+Description:	Used to control configure extension list:
+		- Query: cat /sys/fs/f2fs/<disk>/extension_list
+		- Add: echo '[h/c]extension' > /sys/fs/f2fs/<disk>/extension_list
+		- Del: echo '[h/c]!extension' > /sys/fs/f2fs/<disk>/extension_list
+		- [h] means add/del hot file extension
+		- [c] means add/del cold file extension
 
 What:		/sys/fs/f2fs/<disk>/unusable
 Date		April 2019
 Contact:	"Daniel Rosenberg" <drosen@google.com>
-Description:
-		If checkpoint=disable, it displays the number of blocks that are unusable.
-                If checkpoint=enable it displays the enumber of blocks that would be unusable
-                if checkpoint=disable were to be set.
+Description:	If checkpoint=disable, it displays the number of blocks that
+		are unusable.
+		If checkpoint=enable it displays the enumber of blocks that
+		would be unusable if checkpoint=disable were to be set.
 
 What:		/sys/fs/f2fs/<disk>/encoding
 Date		July 2019
 Contact:	"Daniel Rosenberg" <drosen@google.com>
-Description:
-		Displays name and version of the encoding set for the filesystem.
-                If no encoding is set, displays (none)
+Description:	Displays name and version of the encoding set for the filesystem.
+		If no encoding is set, displays (none)
+
+What:		/sys/fs/f2fs/<disk>/free_segments
+Date:		September 2019
+Contact:	"Hridya Valsaraju" <hridya@google.com>
+Description:	Number of free segments in disk.
+
+What:		/sys/fs/f2fs/<disk>/cp_foreground_calls
+Date:		September 2019
+Contact:	"Hridya Valsaraju" <hridya@google.com>
+Description:	Number of checkpoint operations performed on demand. Available when
+		CONFIG_F2FS_STAT_FS=y.
+
+What:		/sys/fs/f2fs/<disk>/cp_background_calls
+Date:		September 2019
+Contact:	"Hridya Valsaraju" <hridya@google.com>
+Description:	Number of checkpoint operations performed in the background to
+		free segments. Available when CONFIG_F2FS_STAT_FS=y.
+
+What:		/sys/fs/f2fs/<disk>/gc_foreground_calls
+Date:		September 2019
+Contact:	"Hridya Valsaraju" <hridya@google.com>
+Description:	Number of garbage collection operations performed on demand.
+		Available when CONFIG_F2FS_STAT_FS=y.
+
+What:		/sys/fs/f2fs/<disk>/gc_background_calls
+Date:		September 2019
+Contact:	"Hridya Valsaraju" <hridya@google.com>
+Description:	Number of garbage collection operations triggered in background.
+		Available when CONFIG_F2FS_STAT_FS=y.
+
+What:		/sys/fs/f2fs/<disk>/moved_blocks_foreground
+Date:		September 2019
+Contact:	"Hridya Valsaraju" <hridya@google.com>
+Description:	Number of blocks moved by garbage collection in foreground.
+		Available when CONFIG_F2FS_STAT_FS=y.
+
+What:		/sys/fs/f2fs/<disk>/moved_blocks_background
+Date:		September 2019
+Contact:	"Hridya Valsaraju" <hridya@google.com>
+Description:	Number of blocks moved by garbage collection in background.
+		Available when CONFIG_F2FS_STAT_FS=y.
+
+What:		/sys/fs/f2fs/<disk>/avg_vblocks
+Date:		September 2019
+Contact:	"Hridya Valsaraju" <hridya@google.com>
+Description:	Average number of valid blocks.
+		Available when CONFIG_F2FS_STAT_FS=y.
diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi
index 9e99f2909612..1efac0ddb417 100644
--- a/Documentation/ABI/testing/sysfs-platform-asus-wmi
+++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi
@@ -46,3 +46,13 @@ Description:
 			* 0 - normal,
 			* 1 - overboost,
 			* 2 - silent
+
+What:		/sys/devices/platform/<platform>/throttle_thermal_policy
+Date:		Dec 2019
+KernelVersion:	5.6
+Contact:	"Leonid Maksymchuk" <leonmaxx@gmail.com>
+Description:
+		Throttle thermal policy mode:
+			* 0 - default,
+			* 1 - overboost,
+			* 2 - silent
diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-fme b/Documentation/ABI/testing/sysfs-platform-dfl-fme
index 72634d3ae4f4..3683cb1cdc3d 100644
--- a/Documentation/ABI/testing/sysfs-platform-dfl-fme
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-fme
@@ -106,3 +106,135 @@ KernelVersion:  5.4
 Contact:	Wu Hao <hao.wu@intel.com>
 Description:	Read-only. Read this file to get the second error detected by
 		hardware.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/name
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-Only. Read this file to get the name of hwmon device, it
+		supports values:
+		    'dfl_fme_thermal' - thermal hwmon device name
+		    'dfl_fme_power'   - power hwmon device name
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_input
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-Only. It returns FPGA device temperature in millidegrees
+		Celsius.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_max
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-Only. It returns hardware threshold1 temperature in
+		millidegrees Celsius. If temperature rises at or above this
+		threshold, hardware starts 50% or 90% throttling (see
+		'temp1_max_policy').
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_crit
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-Only. It returns hardware threshold2 temperature in
+		millidegrees Celsius. If temperature rises at or above this
+		threshold, hardware starts 100% throttling.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_emergency
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-Only. It returns hardware trip threshold temperature in
+		millidegrees Celsius. If temperature rises at or above this
+		threshold, a fatal event will be triggered to board management
+		controller (BMC) to shutdown FPGA.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_max_alarm
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-only. It returns 1 if temperature is currently at or above
+		hardware threshold1 (see 'temp1_max'), otherwise 0.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_crit_alarm
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-only. It returns 1 if temperature is currently at or above
+		hardware threshold2 (see 'temp1_crit'), otherwise 0.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_max_policy
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-Only. Read this file to get the policy of hardware threshold1
+		(see 'temp1_max'). It only supports two values (policies):
+		    0 - AP2 state (90% throttling)
+		    1 - AP1 state (50% throttling)
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_input
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-Only. It returns current FPGA power consumption in uW.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_max
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-Write. Read this file to get current hardware power
+		threshold1 in uW. If power consumption rises at or above
+		this threshold, hardware starts 50% throttling.
+		Write this file to set current hardware power threshold1 in uW.
+		As hardware only accepts values in Watts, so input value will
+		be round down per Watts (< 1 watts part will be discarded) and
+		clamped within the range from 0 to 127 Watts. Write fails with
+		-EINVAL if input parsing fails.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_crit
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-Write. Read this file to get current hardware power
+		threshold2 in uW. If power consumption rises at or above
+		this threshold, hardware starts 90% throttling.
+		Write this file to set current hardware power threshold2 in uW.
+		As hardware only accepts values in Watts, so input value will
+		be round down per Watts (< 1 watts part will be discarded) and
+		clamped within the range from 0 to 127 Watts. Write fails with
+		-EINVAL if input parsing fails.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_max_alarm
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-only. It returns 1 if power consumption is currently at or
+		above hardware threshold1 (see 'power1_max'), otherwise 0.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_crit_alarm
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-only. It returns 1 if power consumption is currently at or
+		above hardware threshold2 (see 'power1_crit'), otherwise 0.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_xeon_limit
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-Only. It returns power limit for XEON in uW.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_fpga_limit
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-Only. It returns power limit for FPGA in uW.
+
+What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_ltr
+Date:		October 2019
+KernelVersion:	5.5
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-only. Read this file to get current Latency Tolerance
+		Reporting (ltr) value. It returns 1 if all Accelerated
+		Function Units (AFUs) can tolerate latency >= 40us for memory
+		access or 0 if any AFU is latency sensitive (< 40us).
diff --git a/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
new file mode 100644
index 000000000000..401d202f478b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
@@ -0,0 +1,58 @@
+What:		/sys/bus/platform/devices/MLNXBF04:00/lifecycle_state
+Date:		Oct 2019
+KernelVersion:	5.5
+Contact:	"Liming Sun <lsun@mellanox.com>"
+Description:
+		The Life-cycle state of the SoC, which could be one of the
+		following values.
+		  Production - Production state and can be updated to secure
+		  GA Secured - Secure chip and not able to change state
+		  GA Non-Secured - Non-Secure chip and not able to change state
+		  RMA - Return Merchandise Authorization
+
+What:		/sys/bus/platform/devices/MLNXBF04:00/post_reset_wdog
+Date:		Oct 2019
+KernelVersion:	5.5
+Contact:	"Liming Sun <lsun@mellanox.com>"
+Description:
+		The watchdog setting in seconds for the next booting. It's used
+		to reboot the chip and recover it to the old state if the new
+		boot partition fails.
+
+What:		/sys/bus/platform/devices/MLNXBF04:00/reset_action
+Date:		Oct 2019
+KernelVersion:	5.5
+Contact:	"Liming Sun <lsun@mellanox.com>"
+Description:
+		The source of the boot stream for the next reset. It could be
+		one of the following values.
+		  external - boot from external source (USB or PCIe)
+		  emmc - boot from the onchip eMMC
+		  emmc_legacy - boot from the onchip eMMC in legacy (slow) mode
+
+What:		/sys/bus/platform/devices/MLNXBF04:00/second_reset_action
+Date:		Oct 2019
+KernelVersion:	5.5
+Contact:	"Liming Sun <lsun@mellanox.com>"
+Description:
+		Update the source of the boot stream after next reset. It could
+		be one of the following values and will be applied after next
+		reset.
+		  external - boot from external source (USB or PCIe)
+		  emmc - boot from the onchip eMMC
+		  emmc_legacy - boot from the onchip eMMC in legacy (slow) mode
+		  swap_emmc - swap the primary / secondary boot partition
+		  none - cancel the action
+
+What:		/sys/bus/platform/devices/MLNXBF04:00/secure_boot_fuse_state
+Date:		Oct 2019
+KernelVersion:	5.5
+Contact:	"Liming Sun <lsun@mellanox.com>"
+Description:
+		The state of eFuse versions with the following values.
+		  InUse - burnt, valid and currently in use
+		  Used - burnt and valid
+		  Free - not burnt and free to use
+		  Skipped - not burnt but not free (skipped)
+		  Wasted - burnt and invalid
+		  Invalid - not burnt but marked as valid (error state).
diff --git a/Documentation/ABI/testing/sysfs-platform-wilco-ec b/Documentation/ABI/testing/sysfs-platform-wilco-ec
index 8827a734f933..5f60b184a5a5 100644
--- a/Documentation/ABI/testing/sysfs-platform-wilco-ec
+++ b/Documentation/ABI/testing/sysfs-platform-wilco-ec
@@ -31,6 +31,23 @@ Description:
                Output will a version string be similar to the example below:
                08B6
 
+What:          /sys/bus/platform/devices/GOOG000C\:00/usb_charge
+Date:          October 2019
+KernelVersion: 5.5
+Description:
+               Control the USB PowerShare Policy. USB PowerShare is a policy
+               which affects charging via the special USB PowerShare port
+               (marked with a small lightning bolt or battery icon) when in
+               low power states:
+               - In S0, the port will always provide power.
+               - In S0ix, if usb_charge is enabled, then power will be
+                 supplied to the port when on AC or if battery is > 50%.
+                 Else no power is supplied.
+               - In S5, if usb_charge is enabled, then power will be supplied
+                 to the port when on AC. Else no power is supplied.
+
+               Input should be either "0" or "1".
+
 What:          /sys/bus/platform/devices/GOOG000C\:00/version
 Date:          May 2019
 KernelVersion: 5.3
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index 6f87b9dd384b..5e6ead29124c 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -407,3 +407,16 @@ Contact:	Kalesh Singh <kaleshsingh96@gmail.com>
 Description:
 		The /sys/power/suspend_stats/last_failed_step file contains
 		the last failed step in the suspend/resume path.
+
+What:		/sys/power/sync_on_suspend
+Date:		October 2019
+Contact:	Jonas Meurer <jonas@freesources.org>
+Description:
+		This file controls whether or not the kernel will sync()
+		filesystems during system suspend (after freezing user space
+		and before suspending devices).
+
+		Writing a "1" to this file enables the sync() and writing a "0"
+		disables it.  Reads from the file return the current value.
+		The default is "1" if the build-time "SUSPEND_SKIP_SYNC" config
+		flag is unset, or "0" otherwise.
diff --git a/Documentation/ABI/testing/sysfs-secvar b/Documentation/ABI/testing/sysfs-secvar
new file mode 100644
index 000000000000..feebb8c57294
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-secvar
@@ -0,0 +1,46 @@
+What:		/sys/firmware/secvar
+Date:		August 2019
+Contact:	Nayna Jain <nayna@linux.ibm.com>
+Description:	This directory is created if the POWER firmware supports OS
+		secureboot, thereby secure variables. It exposes interface
+		for reading/writing the secure variables
+
+What:		/sys/firmware/secvar/vars
+Date:		August 2019
+Contact:	Nayna Jain <nayna@linux.ibm.com>
+Description:	This directory lists all the secure variables that are supported
+		by the firmware.
+
+What:		/sys/firmware/secvar/format
+Date:		August 2019
+Contact:	Nayna Jain <nayna@linux.ibm.com>
+Description:	A string indicating which backend is in use by the firmware.
+		This determines the format of the variable and the accepted
+		format of variable updates.
+
+What:		/sys/firmware/secvar/vars/<variable name>
+Date:		August 2019
+Contact:	Nayna Jain <nayna@linux.ibm.com>
+Description:	Each secure variable is represented as a directory named as
+		<variable_name>. The variable name is unique and is in ASCII
+		representation. The data and size can be determined by reading
+		their respective attribute files.
+
+What:		/sys/firmware/secvar/vars/<variable_name>/size
+Date:		August 2019
+Contact:	Nayna Jain <nayna@linux.ibm.com>
+Description:	An integer representation of the size of the content of the
+		variable. In other words, it represents the size of the data.
+
+What:		/sys/firmware/secvar/vars/<variable_name>/data
+Date:		August 2019
+Contact:	Nayna Jain h<nayna@linux.ibm.com>
+Description:	A read-only file containing the value of the variable. The size
+		of the file represents the maximum size of the variable data.
+
+What:		/sys/firmware/secvar/vars/<variable_name>/update
+Date:		August 2019
+Contact:	Nayna Jain <nayna@linux.ibm.com>
+Description:	A write-only file that is used to submit the new value for the
+		variable. The size of the file represents the maximum size of
+		the variable data that can be written.
diff --git a/Documentation/ABI/testing/usb-charger-uevent b/Documentation/ABI/testing/usb-charger-uevent
new file mode 100644
index 000000000000..419a92dd0d86
--- /dev/null
+++ b/Documentation/ABI/testing/usb-charger-uevent
@@ -0,0 +1,46 @@
+What:		Raise a uevent when a USB charger is inserted or removed
+Date:		2020-01-14
+KernelVersion:	5.6
+Contact:	linux-usb@vger.kernel.org
+Description:	There are two USB charger states:
+		USB_CHARGER_ABSENT
+		USB_CHARGER_PRESENT
+		There are five USB charger types:
+		USB_CHARGER_UNKNOWN_TYPE: Charger type is unknown
+		USB_CHARGER_SDP_TYPE: Standard Downstream Port
+		USB_CHARGER_CDP_TYPE: Charging Downstream Port
+		USB_CHARGER_DCP_TYPE: Dedicated Charging Port
+		USB_CHARGER_ACA_TYPE: Accessory Charging Adapter
+		https://www.usb.org/document-library/battery-charging-v12-spec-and-adopters-agreement
+
+		Here are two examples taken using udevadm monitor -p when
+		USB charger is online:
+		UDEV  change   /devices/soc0/usbphynop1 (platform)
+		ACTION=change
+		DEVPATH=/devices/soc0/usbphynop1
+		DRIVER=usb_phy_generic
+		MODALIAS=of:Nusbphynop1T(null)Cusb-nop-xceiv
+		OF_COMPATIBLE_0=usb-nop-xceiv
+		OF_COMPATIBLE_N=1
+		OF_FULLNAME=/usbphynop1
+		OF_NAME=usbphynop1
+		SEQNUM=2493
+		SUBSYSTEM=platform
+		USB_CHARGER_STATE=USB_CHARGER_PRESENT
+		USB_CHARGER_TYPE=USB_CHARGER_SDP_TYPE
+		USEC_INITIALIZED=227422826
+
+		USB charger is offline:
+		KERNEL change   /devices/soc0/usbphynop1 (platform)
+		ACTION=change
+		DEVPATH=/devices/soc0/usbphynop1
+		DRIVER=usb_phy_generic
+		MODALIAS=of:Nusbphynop1T(null)Cusb-nop-xceiv
+		OF_COMPATIBLE_0=usb-nop-xceiv
+		OF_COMPATIBLE_N=1
+		OF_FULLNAME=/usbphynop1
+		OF_NAME=usbphynop1
+		SEQNUM=2494
+		SUBSYSTEM=platform
+		USB_CHARGER_STATE=USB_CHARGER_ABSENT
+		USB_CHARGER_TYPE=USB_CHARGER_UNKNOWN_TYPE
diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 8f8d97f65d73..29dcbe8826e8 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -5,24 +5,6 @@ DMA attributes
 This document describes the semantics of the DMA attributes that are
 defined in linux/dma-mapping.h.
 
-DMA_ATTR_WRITE_BARRIER
-----------------------
-
-DMA_ATTR_WRITE_BARRIER is a (write) barrier attribute for DMA.  DMA
-to a memory region with the DMA_ATTR_WRITE_BARRIER attribute forces
-all pending DMA writes to complete, and thus provides a mechanism to
-strictly order DMA from a device across all intervening busses and
-bridges.  This barrier is not specific to a particular type of
-interconnect, it applies to the system as a whole, and so its
-implementation must account for the idiosyncrasies of the system all
-the way from the DMA device to memory.
-
-As an example of a situation where DMA_ATTR_WRITE_BARRIER would be
-useful, suppose that a device does a DMA write to indicate that data is
-ready and available in memory.  The DMA of the "completion indication"
-could race with data DMA.  Mapping the memory used for completion
-indications with DMA_ATTR_WRITE_BARRIER would prevent the race.
-
 DMA_ATTR_WEAK_ORDERING
 ----------------------
 
diff --git a/Documentation/Makefile b/Documentation/Makefile
index e145e4db508b..d77bb607aea4 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -13,7 +13,7 @@ endif
 SPHINXBUILD   = sphinx-build
 SPHINXOPTS    =
 SPHINXDIRS    = .
-_SPHINXDIRS   = $(patsubst $(srctree)/Documentation/%/conf.py,%,$(wildcard $(srctree)/Documentation/*/conf.py))
+_SPHINXDIRS   = $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst))
 SPHINX_CONF   = conf.py
 PAPER         =
 BUILDDIR      = $(obj)/output
@@ -33,8 +33,6 @@ ifeq ($(HAVE_SPHINX),0)
 
 else # HAVE_SPHINX
 
-export SPHINXOPTS = $(shell perl -e 'open IN,"sphinx-build --version 2>&1 |"; while (<IN>) { if (m/([\d\.]+)/) { print "-jauto" if ($$1 >= "1.7") } ;} close IN')
-
 # User-friendly check for pdflatex and latexmk
 HAVE_PDFLATEX := $(shell if which $(PDFLATEX) >/dev/null 2>&1; then echo 1; else echo 0; fi)
 HAVE_LATEXMK := $(shell if which latexmk >/dev/null 2>&1; then echo 1; else echo 0; fi)
@@ -67,6 +65,8 @@ quiet_cmd_sphinx = SPHINX  $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
       cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media $2 && \
 	PYTHONDONTWRITEBYTECODE=1 \
 	BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(srctree)/$(src)/$5/$(SPHINX_CONF)) \
+	$(PYTHON) $(srctree)/scripts/jobserver-exec \
+	$(SHELL) $(srctree)/Documentation/sphinx/parallel-wrapper.sh \
 	$(SPHINXBUILD) \
 	-b $2 \
 	-c $(abspath $(srctree)/$(src)) \
@@ -128,8 +128,10 @@ dochelp:
 	@echo  '  pdfdocs         - PDF'
 	@echo  '  epubdocs        - EPUB'
 	@echo  '  xmldocs         - XML'
-	@echo  '  linkcheckdocs   - check for broken external links (will connect to external hosts)'
-	@echo  '  refcheckdocs    - check for references to non-existing files under Documentation'
+	@echo  '  linkcheckdocs   - check for broken external links'
+	@echo  '                    (will connect to external hosts)'
+	@echo  '  refcheckdocs    - check for references to non-existing files under'
+	@echo  '                    Documentation'
 	@echo  '  cleandocs       - clean all generated files'
 	@echo
 	@echo  '  make SPHINXDIRS="s1 s2" [target] Generate only docs of folder s1, s2'
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
deleted file mode 100644
index c30c1957c7e6..000000000000
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ /dev/null
@@ -1,1391 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
-        "http://www.w3.org/TR/html4/loose.dtd">
-        <html>
-        <head><title>A Tour Through TREE_RCU's Data Structures [LWN.net]</title>
-        <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
-
-           <p>December 18, 2016</p>
-           <p>This article was contributed by Paul E.&nbsp;McKenney</p>
-
-<h3>Introduction</h3>
-
-This document describes RCU's major data structures and their relationship
-to each other.
-
-<ol>
-<li>	<a href="#Data-Structure Relationships">
-	Data-Structure Relationships</a>
-<li>	<a href="#The rcu_state Structure">
-	The <tt>rcu_state</tt> Structure</a>
-<li>	<a href="#The rcu_node Structure">
-	The <tt>rcu_node</tt> Structure</a>
-<li>	<a href="#The rcu_segcblist Structure">
-	The <tt>rcu_segcblist</tt> Structure</a>
-<li>	<a href="#The rcu_data Structure">
-	The <tt>rcu_data</tt> Structure</a>
-<li>	<a href="#The rcu_head Structure">
-	The <tt>rcu_head</tt> Structure</a>
-<li>	<a href="#RCU-Specific Fields in the task_struct Structure">
-	RCU-Specific Fields in the <tt>task_struct</tt> Structure</a>
-<li>	<a href="#Accessor Functions">
-	Accessor Functions</a>
-</ol>
-
-<h3><a name="Data-Structure Relationships">Data-Structure Relationships</a></h3>
-
-<p>RCU is for all intents and purposes a large state machine, and its
-data structures maintain the state in such a way as to allow RCU readers
-to execute extremely quickly, while also processing the RCU grace periods
-requested by updaters in an efficient and extremely scalable fashion.
-The efficiency and scalability of RCU updaters is provided primarily
-by a combining tree, as shown below:
-
-</p><p><img src="BigTreeClassicRCU.svg" alt="BigTreeClassicRCU.svg" width="30%">
-
-</p><p>This diagram shows an enclosing <tt>rcu_state</tt> structure
-containing a tree of <tt>rcu_node</tt> structures.
-Each leaf node of the <tt>rcu_node</tt> tree has up to 16
-<tt>rcu_data</tt> structures associated with it, so that there
-are <tt>NR_CPUS</tt> number of <tt>rcu_data</tt> structures,
-one for each possible CPU.
-This structure is adjusted at boot time, if needed, to handle the
-common case where <tt>nr_cpu_ids</tt> is much less than
-<tt>NR_CPUs</tt>.
-For example, a number of Linux distributions set <tt>NR_CPUs=4096</tt>,
-which results in a three-level <tt>rcu_node</tt> tree.
-If the actual hardware has only 16 CPUs, RCU will adjust itself
-at boot time, resulting in an <tt>rcu_node</tt> tree with only a single node.
-
-</p><p>The purpose of this combining tree is to allow per-CPU events
-such as quiescent states, dyntick-idle transitions,
-and CPU hotplug operations to be processed efficiently
-and scalably.
-Quiescent states are recorded by the per-CPU <tt>rcu_data</tt> structures,
-and other events are recorded by the leaf-level <tt>rcu_node</tt>
-structures.
-All of these events are combined at each level of the tree until finally
-grace periods are completed at the tree's root <tt>rcu_node</tt>
-structure.
-A grace period can be completed at the root once every CPU
-(or, in the case of <tt>CONFIG_PREEMPT_RCU</tt>, task)
-has passed through a quiescent state.
-Once a grace period has completed, record of that fact is propagated
-back down the tree.
-
-</p><p>As can be seen from the diagram, on a 64-bit system
-a two-level tree with 64 leaves can accommodate 1,024 CPUs, with a fanout
-of 64 at the root and a fanout of 16 at the leaves.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Why isn't the fanout at the leaves also 64?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Because there are more types of events that affect the leaf-level
-	<tt>rcu_node</tt> structures than further up the tree.
-	Therefore, if the leaf <tt>rcu_node</tt> structures have fanout of
-	64, the contention on these structures' <tt>-&gt;structures</tt>
-	becomes excessive.
-	Experimentation on a wide variety of systems has shown that a fanout
-	of 16 works well for the leaves of the <tt>rcu_node</tt> tree.
-	</font>
-
-	<p><font color="ffffff">Of course, further experience with
-	systems having hundreds or thousands of CPUs may demonstrate
-	that the fanout for the non-leaf <tt>rcu_node</tt> structures
-	must also be reduced.
-	Such reduction can be easily carried out when and if it proves
-	necessary.
-	In the meantime, if you are using such a system and running into
-	contention problems on the non-leaf <tt>rcu_node</tt> structures,
-	you may use the <tt>CONFIG_RCU_FANOUT</tt> kernel configuration
-	parameter to reduce the non-leaf fanout as needed.
-	</font>
-
-	<p><font color="ffffff">Kernels built for systems with
-	strong NUMA characteristics might also need to adjust
-	<tt>CONFIG_RCU_FANOUT</tt> so that the domains of the
-	<tt>rcu_node</tt> structures align with hardware boundaries.
-	However, there has thus far been no need for this.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>If your system has more than 1,024 CPUs (or more than 512 CPUs on
-a 32-bit system), then RCU will automatically add more levels to the
-tree.
-For example, if you are crazy enough to build a 64-bit system with 65,536
-CPUs, RCU would configure the <tt>rcu_node</tt> tree as follows:
-
-</p><p><img src="HugeTreeClassicRCU.svg" alt="HugeTreeClassicRCU.svg" width="50%">
-
-</p><p>RCU currently permits up to a four-level tree, which on a 64-bit system
-accommodates up to 4,194,304 CPUs, though only a mere 524,288 CPUs for
-32-bit systems.
-On the other hand, you can set both <tt>CONFIG_RCU_FANOUT</tt> and
-<tt>CONFIG_RCU_FANOUT_LEAF</tt> to be as small as 2, which would result
-in a 16-CPU test using a 4-level tree.
-This can be useful for testing large-system capabilities on small test
-machines.
-
-</p><p>This multi-level combining tree allows us to get most of the
-performance and scalability
-benefits of partitioning, even though RCU grace-period detection is
-inherently a global operation.
-The trick here is that only the last CPU to report a quiescent state
-into a given <tt>rcu_node</tt> structure need advance to the <tt>rcu_node</tt>
-structure at the next level up the tree.
-This means that at the leaf-level <tt>rcu_node</tt> structure, only
-one access out of sixteen will progress up the tree.
-For the internal <tt>rcu_node</tt> structures, the situation is even
-more extreme:  Only one access out of sixty-four will progress up
-the tree.
-Because the vast majority of the CPUs do not progress up the tree,
-the lock contention remains roughly constant up the tree.
-No matter how many CPUs there are in the system, at most 64 quiescent-state
-reports per grace period will progress all the way to the root
-<tt>rcu_node</tt> structure, thus ensuring that the lock contention
-on that root <tt>rcu_node</tt> structure remains acceptably low.
-
-</p><p>In effect, the combining tree acts like a big shock absorber,
-keeping lock contention under control at all tree levels regardless
-of the level of loading on the system.
-
-</p><p>RCU updaters wait for normal grace periods by registering
-RCU callbacks, either directly via <tt>call_rcu()</tt>
-or indirectly via <tt>synchronize_rcu()</tt> and friends.
-RCU callbacks are represented by <tt>rcu_head</tt> structures,
-which are queued on <tt>rcu_data</tt> structures while they are
-waiting for a grace period to elapse, as shown in the following figure:
-
-</p><p><img src="BigTreePreemptRCUBHdyntickCB.svg" alt="BigTreePreemptRCUBHdyntickCB.svg" width="40%">
-
-</p><p>This figure shows how <tt>TREE_RCU</tt>'s and
-<tt>PREEMPT_RCU</tt>'s major data structures are related.
-Lesser data structures will be introduced with the algorithms that
-make use of them.
-
-</p><p>Note that each of the data structures in the above figure has
-its own synchronization:
-
-<p><ol>
-<li>	Each <tt>rcu_state</tt> structures has a lock and a mutex,
-	and some fields are protected by the corresponding root
-	<tt>rcu_node</tt> structure's lock.
-<li>	Each <tt>rcu_node</tt> structure has a spinlock.
-<li>	The fields in <tt>rcu_data</tt> are private to the corresponding
-	CPU, although a few can be read and written by other CPUs.
-</ol>
-
-<p>It is important to note that different data structures can have
-very different ideas about the state of RCU at any given time.
-For but one example, awareness of the start or end of a given RCU
-grace period propagates slowly through the data structures.
-This slow propagation is absolutely necessary for RCU to have good
-read-side performance.
-If this balkanized implementation seems foreign to you, one useful
-trick is to consider each instance of these data structures to be
-a different person, each having the usual slightly different
-view of reality.
-
-</p><p>The general role of each of these data structures is as
-follows:
-
-</p><ol>
-<li>	<tt>rcu_state</tt>:
-	This structure forms the interconnection between the
-	<tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
-	tracks grace periods, serves as short-term repository
-	for callbacks orphaned by CPU-hotplug events,
-	maintains <tt>rcu_barrier()</tt> state,
-	tracks expedited grace-period state,
-	and maintains state used to force quiescent states when
-	grace periods extend too long,
-<li>	<tt>rcu_node</tt>: This structure forms the combining
-	tree that propagates quiescent-state
-	information from the leaves to the root, and also propagates
-	grace-period information from the root to the leaves.
-	It provides local copies of the grace-period state in order
-	to allow this information to be accessed in a synchronized
-	manner without suffering the scalability limitations that
-	would otherwise be imposed by global locking.
-	In <tt>CONFIG_PREEMPT_RCU</tt> kernels, it manages the lists
-	of tasks that have blocked while in their current
-	RCU read-side critical section.
-	In <tt>CONFIG_PREEMPT_RCU</tt> with
-	<tt>CONFIG_RCU_BOOST</tt>, it manages the
-	per-<tt>rcu_node</tt> priority-boosting
-	kernel threads (kthreads) and state.
-	Finally, it records CPU-hotplug state in order to determine
-	which CPUs should be ignored during a given grace period.
-<li>	<tt>rcu_data</tt>: This per-CPU structure is the
-	focus of quiescent-state detection and RCU callback queuing.
-	It also tracks its relationship to the corresponding leaf
-	<tt>rcu_node</tt> structure to allow more-efficient
-	propagation of quiescent states up the <tt>rcu_node</tt>
-	combining tree.
-	Like the <tt>rcu_node</tt> structure, it provides a local
-	copy of the grace-period information to allow for-free
-	synchronized
-	access to this information from the corresponding CPU.
-	Finally, this structure records past dyntick-idle state
-	for the corresponding CPU and also tracks statistics.
-<li>	<tt>rcu_head</tt>:
-	This structure represents RCU callbacks, and is the
-	only structure allocated and managed by RCU users.
-	The <tt>rcu_head</tt> structure is normally embedded
-	within the RCU-protected data structure.
-</ol>
-
-<p>If all you wanted from this article was a general notion of how
-RCU's data structures are related, you are done.
-Otherwise, each of the following sections give more details on
-the <tt>rcu_state</tt>, <tt>rcu_node</tt> and <tt>rcu_data</tt> data
-structures.
-
-<h3><a name="The rcu_state Structure">
-The <tt>rcu_state</tt> Structure</a></h3>
-
-<p>The <tt>rcu_state</tt> structure is the base structure that
-represents the state of RCU in the system.
-This structure forms the interconnection between the
-<tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
-tracks grace periods, contains the lock used to
-synchronize with CPU-hotplug events,
-and maintains state used to force quiescent states when
-grace periods extend too long,
-
-</p><p>A few of the <tt>rcu_state</tt> structure's fields are discussed,
-singly and in groups, in the following sections.
-The more specialized fields are covered in the discussion of their
-use.
-
-<h5>Relationship to rcu_node and rcu_data Structures</h5>
-
-This portion of the <tt>rcu_state</tt> structure is declared
-as follows:
-
-<pre>
-  1   struct rcu_node node[NUM_RCU_NODES];
-  2   struct rcu_node *level[NUM_RCU_LVLS + 1];
-  3   struct rcu_data __percpu *rda;
-</pre>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Wait a minute!
-	You said that the <tt>rcu_node</tt> structures formed a tree,
-	but they are declared as a flat array!
-	What gives?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	The tree is laid out in the array.
-	The first node In the array is the head, the next set of nodes in the
-	array are children of the head node, and so on until the last set of
-	nodes in the array are the leaves.
-	</font>
-
-	<p><font color="ffffff">See the following diagrams to see how
-	this works.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>The <tt>rcu_node</tt> tree is embedded into the
-<tt>-&gt;node[]</tt> array as shown in the following figure:
-
-</p><p><img src="TreeMapping.svg" alt="TreeMapping.svg" width="40%">
-
-</p><p>One interesting consequence of this mapping is that a
-breadth-first traversal of the tree is implemented as a simple
-linear scan of the array, which is in fact what the
-<tt>rcu_for_each_node_breadth_first()</tt> macro does.
-This macro is used at the beginning and ends of grace periods.
-
-</p><p>Each entry of the <tt>-&gt;level</tt> array references
-the first <tt>rcu_node</tt> structure on the corresponding level
-of the tree, for example, as shown below:
-
-</p><p><img src="TreeMappingLevel.svg" alt="TreeMappingLevel.svg" width="40%">
-
-</p><p>The zero<sup>th</sup> element of the array references the root
-<tt>rcu_node</tt> structure, the first element references the
-first child of the root <tt>rcu_node</tt>, and finally the second
-element references the first leaf <tt>rcu_node</tt> structure.
-
-</p><p>For whatever it is worth, if you draw the tree to be tree-shaped
-rather than array-shaped, it is easy to draw a planar representation:
-
-</p><p><img src="TreeLevel.svg" alt="TreeLevel.svg" width="60%">
-
-</p><p>Finally, the <tt>-&gt;rda</tt> field references a per-CPU
-pointer to the corresponding CPU's <tt>rcu_data</tt> structure.
-
-</p><p>All of these fields are constant once initialization is complete,
-and therefore need no protection.
-
-<h5>Grace-Period Tracking</h5>
-
-<p>This portion of the <tt>rcu_state</tt> structure is declared
-as follows:
-
-<pre>
-  1   unsigned long gp_seq;
-</pre>
-
-<p>RCU grace periods are numbered, and
-the <tt>-&gt;gp_seq</tt> field contains the current grace-period
-sequence number.
-The bottom two bits are the state of the current grace period,
-which can be zero for not yet started or one for in progress.
-In other words, if the bottom two bits of <tt>-&gt;gp_seq</tt> are
-zero, then RCU is idle.
-Any other value in the bottom two bits indicates that something is broken.
-This field is protected by the root <tt>rcu_node</tt> structure's
-<tt>-&gt;lock</tt> field.
-
-</p><p>There are <tt>-&gt;gp_seq</tt> fields
-in the <tt>rcu_node</tt> and <tt>rcu_data</tt> structures
-as well.
-The fields in the <tt>rcu_state</tt> structure represent the
-most current value, and those of the other structures are compared
-in order to detect the beginnings and ends of grace periods in a distributed
-fashion.
-The values flow from <tt>rcu_state</tt> to <tt>rcu_node</tt>
-(down the tree from the root to the leaves) to <tt>rcu_data</tt>.
-
-<h5>Miscellaneous</h5>
-
-<p>This portion of the <tt>rcu_state</tt> structure is declared
-as follows:
-
-<pre>
-  1   unsigned long gp_max;
-  2   char abbr;
-  3   char *name;
-</pre>
-
-<p>The <tt>-&gt;gp_max</tt> field tracks the duration of the longest
-grace period in jiffies.
-It is protected by the root <tt>rcu_node</tt>'s <tt>-&gt;lock</tt>.
-
-<p>The <tt>-&gt;name</tt> and <tt>-&gt;abbr</tt> fields distinguish
-between preemptible RCU (&ldquo;rcu_preempt&rdquo; and &ldquo;p&rdquo;)
-and non-preemptible RCU (&ldquo;rcu_sched&rdquo; and &ldquo;s&rdquo;).
-These fields are used for diagnostic and tracing purposes.
-
-<h3><a name="The rcu_node Structure">
-The <tt>rcu_node</tt> Structure</a></h3>
-
-<p>The <tt>rcu_node</tt> structures form the combining
-tree that propagates quiescent-state
-information from the leaves to the root and also that propagates
-grace-period information from the root down to the leaves.
-They provides local copies of the grace-period state in order
-to allow this information to be accessed in a synchronized
-manner without suffering the scalability limitations that
-would otherwise be imposed by global locking.
-In <tt>CONFIG_PREEMPT_RCU</tt> kernels, they manage the lists
-of tasks that have blocked while in their current
-RCU read-side critical section.
-In <tt>CONFIG_PREEMPT_RCU</tt> with
-<tt>CONFIG_RCU_BOOST</tt>, they manage the
-per-<tt>rcu_node</tt> priority-boosting
-kernel threads (kthreads) and state.
-Finally, they record CPU-hotplug state in order to determine
-which CPUs should be ignored during a given grace period.
-
-</p><p>The <tt>rcu_node</tt> structure's fields are discussed,
-singly and in groups, in the following sections.
-
-<h5>Connection to Combining Tree</h5>
-
-<p>This portion of the <tt>rcu_node</tt> structure is declared
-as follows:
-
-<pre>
-  1   struct rcu_node *parent;
-  2   u8 level;
-  3   u8 grpnum;
-  4   unsigned long grpmask;
-  5   int grplo;
-  6   int grphi;
-</pre>
-
-<p>The <tt>-&gt;parent</tt> pointer references the <tt>rcu_node</tt>
-one level up in the tree, and is <tt>NULL</tt> for the root
-<tt>rcu_node</tt>.
-The RCU implementation makes heavy use of this field to push quiescent
-states up the tree.
-The <tt>-&gt;level</tt> field gives the level in the tree, with
-the root being at level zero, its children at level one, and so on.
-The <tt>-&gt;grpnum</tt> field gives this node's position within
-the children of its parent, so this number can range between 0 and 31
-on 32-bit systems and between 0 and 63 on 64-bit systems.
-The <tt>-&gt;level</tt> and <tt>-&gt;grpnum</tt> fields are
-used only during initialization and for tracing.
-The <tt>-&gt;grpmask</tt> field is the bitmask counterpart of
-<tt>-&gt;grpnum</tt>, and therefore always has exactly one bit set.
-This mask is used to clear the bit corresponding to this <tt>rcu_node</tt>
-structure in its parent's bitmasks, which are described later.
-Finally, the <tt>-&gt;grplo</tt> and <tt>-&gt;grphi</tt> fields
-contain the lowest and highest numbered CPU served by this
-<tt>rcu_node</tt> structure, respectively.
-
-</p><p>All of these fields are constant, and thus do not require any
-synchronization.
-
-<h5>Synchronization</h5>
-
-<p>This field of the <tt>rcu_node</tt> structure is declared
-as follows:
-
-<pre>
-  1   raw_spinlock_t lock;
-</pre>
-
-<p>This field is used to protect the remaining fields in this structure,
-unless otherwise stated.
-That said, all of the fields in this structure can be accessed without
-locking for tracing purposes.
-Yes, this can result in confusing traces, but better some tracing confusion
-than to be heisenbugged out of existence.
-
-<h5>Grace-Period Tracking</h5>
-
-<p>This portion of the <tt>rcu_node</tt> structure is declared
-as follows:
-
-<pre>
-  1   unsigned long gp_seq;
-  2   unsigned long gp_seq_needed;
-</pre>
-
-<p>The <tt>rcu_node</tt> structures' <tt>-&gt;gp_seq</tt> fields are
-the counterparts of the field of the same name in the <tt>rcu_state</tt>
-structure.
-They each may lag up to one step behind their <tt>rcu_state</tt>
-counterpart.
-If the bottom two bits of a given <tt>rcu_node</tt> structure's
-<tt>-&gt;gp_seq</tt> field is zero, then this <tt>rcu_node</tt>
-structure believes that RCU is idle.
-</p><p>The <tt>&gt;gp_seq</tt> field of each <tt>rcu_node</tt>
-structure is updated at the beginning and the end
-of each grace period.
-
-<p>The <tt>-&gt;gp_seq_needed</tt> fields record the
-furthest-in-the-future grace period request seen by the corresponding
-<tt>rcu_node</tt> structure.  The request is considered fulfilled when
-the value of the <tt>-&gt;gp_seq</tt> field equals or exceeds that of
-the <tt>-&gt;gp_seq_needed</tt> field.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Suppose that this <tt>rcu_node</tt> structure doesn't see
-	a request for a very long time.
-	Won't wrapping of the <tt>-&gt;gp_seq</tt> field cause
-	problems?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	No, because if the <tt>-&gt;gp_seq_needed</tt> field lags behind the
-	<tt>-&gt;gp_seq</tt> field, the <tt>-&gt;gp_seq_needed</tt> field
-	will be updated at the end of the grace period.
-	Modulo-arithmetic comparisons therefore will always get the
-	correct answer, even with wrapping.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h5>Quiescent-State Tracking</h5>
-
-<p>These fields manage the propagation of quiescent states up the
-combining tree.
-
-</p><p>This portion of the <tt>rcu_node</tt> structure has fields
-as follows:
-
-<pre>
-  1   unsigned long qsmask;
-  2   unsigned long expmask;
-  3   unsigned long qsmaskinit;
-  4   unsigned long expmaskinit;
-</pre>
-
-<p>The <tt>-&gt;qsmask</tt> field tracks which of this
-<tt>rcu_node</tt> structure's children still need to report
-quiescent states for the current normal grace period.
-Such children will have a value of 1 in their corresponding bit.
-Note that the leaf <tt>rcu_node</tt> structures should be
-thought of as having <tt>rcu_data</tt> structures as their
-children.
-Similarly, the <tt>-&gt;expmask</tt> field tracks which
-of this <tt>rcu_node</tt> structure's children still need to report
-quiescent states for the current expedited grace period.
-An expedited grace period has
-the same conceptual properties as a normal grace period, but the
-expedited implementation accepts extreme CPU overhead to obtain
-much lower grace-period latency, for example, consuming a few
-tens of microseconds worth of CPU time to reduce grace-period
-duration from milliseconds to tens of microseconds.
-The <tt>-&gt;qsmaskinit</tt> field tracks which of this
-<tt>rcu_node</tt> structure's children cover for at least
-one online CPU.
-This mask is used to initialize <tt>-&gt;qsmask</tt>,
-and <tt>-&gt;expmaskinit</tt> is used to initialize
-<tt>-&gt;expmask</tt> and the beginning of the
-normal and expedited grace periods, respectively.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Why are these bitmasks protected by locking?
-	Come on, haven't you heard of atomic instructions???
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Lockless grace-period computation!  Such a tantalizing possibility!
-	</font>
-
-	<p><font color="ffffff">But consider the following sequence of events:
-	</font>
-
-	<ol>
-	<li>	<font color="ffffff">CPU&nbsp;0 has been in dyntick-idle
-		mode for quite some time.
-		When it wakes up, it notices that the current RCU
-		grace period needs it to report in, so it sets a
-		flag where the scheduling clock interrupt will find it.
-		</font><p>
-	<li>	<font color="ffffff">Meanwhile, CPU&nbsp;1 is running
-		<tt>force_quiescent_state()</tt>,
-		and notices that CPU&nbsp;0 has been in dyntick idle mode,
-		which qualifies as an extended quiescent state.
-		</font><p>
-	<li>	<font color="ffffff">CPU&nbsp;0's scheduling clock
-		interrupt fires in the
-		middle of an RCU read-side critical section, and notices
-		that the RCU core needs something, so commences RCU softirq
-		processing.
-		</font>
-		<p>
-	<li>	<font color="ffffff">CPU&nbsp;0's softirq handler
-		executes and is just about ready
-		to report its quiescent state up the <tt>rcu_node</tt>
-		tree.
-		</font><p>
-	<li>	<font color="ffffff">But CPU&nbsp;1 beats it to the punch,
-		completing the current
-		grace period and starting a new one.
-		</font><p>
-	<li>	<font color="ffffff">CPU&nbsp;0 now reports its quiescent
-		state for the wrong
-		grace period.
-		That grace period might now end before the RCU read-side
-		critical section.
-		If that happens, disaster will ensue.
-		</font>
-	</ol>
-
-	<p><font color="ffffff">So the locking is absolutely required in
-	order to coordinate clearing of the bits with updating of the
-	grace-period sequence number in <tt>-&gt;gp_seq</tt>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h5>Blocked-Task Management</h5>
-
-<p><tt>PREEMPT_RCU</tt> allows tasks to be preempted in the
-midst of their RCU read-side critical sections, and these tasks
-must be tracked explicitly.
-The details of exactly why and how they are tracked will be covered
-in a separate article on RCU read-side processing.
-For now, it is enough to know that the <tt>rcu_node</tt>
-structure tracks them.
-
-<pre>
-  1   struct list_head blkd_tasks;
-  2   struct list_head *gp_tasks;
-  3   struct list_head *exp_tasks;
-  4   bool wait_blkd_tasks;
-</pre>
-
-<p>The <tt>-&gt;blkd_tasks</tt> field is a list header for
-the list of blocked and preempted tasks.
-As tasks undergo context switches within RCU read-side critical
-sections, their <tt>task_struct</tt> structures are enqueued
-(via the <tt>task_struct</tt>'s <tt>-&gt;rcu_node_entry</tt>
-field) onto the head of the <tt>-&gt;blkd_tasks</tt> list for the
-leaf <tt>rcu_node</tt> structure corresponding to the CPU
-on which the outgoing context switch executed.
-As these tasks later exit their RCU read-side critical sections,
-they remove themselves from the list.
-This list is therefore in reverse time order, so that if one of the tasks
-is blocking the current grace period, all subsequent tasks must
-also be blocking that same grace period.
-Therefore, a single pointer into this list suffices to track
-all tasks blocking a given grace period.
-That pointer is stored in <tt>-&gt;gp_tasks</tt> for normal
-grace periods and in <tt>-&gt;exp_tasks</tt> for expedited
-grace periods.
-These last two fields are <tt>NULL</tt> if either there is
-no grace period in flight or if there are no blocked tasks
-preventing that grace period from completing.
-If either of these two pointers is referencing a task that
-removes itself from the <tt>-&gt;blkd_tasks</tt> list,
-then that task must advance the pointer to the next task on
-the list, or set the pointer to <tt>NULL</tt> if there
-are no subsequent tasks on the list.
-
-</p><p>For example, suppose that tasks&nbsp;T1, T2, and&nbsp;T3 are
-all hard-affinitied to the largest-numbered CPU in the system.
-Then if task&nbsp;T1 blocked in an RCU read-side
-critical section, then an expedited grace period started,
-then task&nbsp;T2 blocked in an RCU read-side critical section,
-then a normal grace period started, and finally task&nbsp;3 blocked
-in an RCU read-side critical section, then the state of the
-last leaf <tt>rcu_node</tt> structure's blocked-task list
-would be as shown below:
-
-</p><p><img src="blkd_task.svg" alt="blkd_task.svg" width="60%">
-
-</p><p>Task&nbsp;T1 is blocking both grace periods, task&nbsp;T2 is
-blocking only the normal grace period, and task&nbsp;T3 is blocking
-neither grace period.
-Note that these tasks will not remove themselves from this list
-immediately upon resuming execution.
-They will instead remain on the list until they execute the outermost
-<tt>rcu_read_unlock()</tt> that ends their RCU read-side critical
-section.
-
-<p>
-The <tt>-&gt;wait_blkd_tasks</tt> field indicates whether or not
-the current grace period is waiting on a blocked task.
-
-<h5>Sizing the <tt>rcu_node</tt> Array</h5>
-
-<p>The <tt>rcu_node</tt> array is sized via a series of
-C-preprocessor expressions as follows:
-
-<pre>
- 1 #ifdef CONFIG_RCU_FANOUT
- 2 #define RCU_FANOUT CONFIG_RCU_FANOUT
- 3 #else
- 4 # ifdef CONFIG_64BIT
- 5 # define RCU_FANOUT 64
- 6 # else
- 7 # define RCU_FANOUT 32
- 8 # endif
- 9 #endif
-10
-11 #ifdef CONFIG_RCU_FANOUT_LEAF
-12 #define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
-13 #else
-14 # ifdef CONFIG_64BIT
-15 # define RCU_FANOUT_LEAF 64
-16 # else
-17 # define RCU_FANOUT_LEAF 32
-18 # endif
-19 #endif
-20
-21 #define RCU_FANOUT_1        (RCU_FANOUT_LEAF)
-22 #define RCU_FANOUT_2        (RCU_FANOUT_1 * RCU_FANOUT)
-23 #define RCU_FANOUT_3        (RCU_FANOUT_2 * RCU_FANOUT)
-24 #define RCU_FANOUT_4        (RCU_FANOUT_3 * RCU_FANOUT)
-25
-26 #if NR_CPUS &lt;= RCU_FANOUT_1
-27 #  define RCU_NUM_LVLS        1
-28 #  define NUM_RCU_LVL_0        1
-29 #  define NUM_RCU_NODES        NUM_RCU_LVL_0
-30 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
-31 #  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
-32 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
-33 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0" }
-34 #elif NR_CPUS &lt;= RCU_FANOUT_2
-35 #  define RCU_NUM_LVLS        2
-36 #  define NUM_RCU_LVL_0        1
-37 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-38 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
-39 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
-40 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
-41 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
-42 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1" }
-43 #elif NR_CPUS &lt;= RCU_FANOUT_3
-44 #  define RCU_NUM_LVLS        3
-45 #  define NUM_RCU_LVL_0        1
-46 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
-47 #  define NUM_RCU_LVL_2        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-48 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
-49 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
-50 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
-51 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
-52 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
-53 #elif NR_CPUS &lt;= RCU_FANOUT_4
-54 #  define RCU_NUM_LVLS        4
-55 #  define NUM_RCU_LVL_0        1
-56 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
-57 #  define NUM_RCU_LVL_2        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
-58 #  define NUM_RCU_LVL_3        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-59 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
-60 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
-61 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
-62 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
-63 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
-64 #else
-65 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
-66 #endif
-</pre>
-
-<p>The maximum number of levels in the <tt>rcu_node</tt> structure
-is currently limited to four, as specified by lines&nbsp;21-24
-and the structure of the subsequent &ldquo;if&rdquo; statement.
-For 32-bit systems, this allows 16*32*32*32=524,288 CPUs, which
-should be sufficient for the next few years at least.
-For 64-bit systems, 16*64*64*64=4,194,304 CPUs is allowed, which
-should see us through the next decade or so.
-This four-level tree also allows kernels built with
-<tt>CONFIG_RCU_FANOUT=8</tt> to support up to 4096 CPUs,
-which might be useful in very large systems having eight CPUs per
-socket (but please note that no one has yet shown any measurable
-performance degradation due to misaligned socket and <tt>rcu_node</tt>
-boundaries).
-In addition, building kernels with a full four levels of <tt>rcu_node</tt>
-tree permits better testing of RCU's combining-tree code.
-
-</p><p>The <tt>RCU_FANOUT</tt> symbol controls how many children
-are permitted at each non-leaf level of the <tt>rcu_node</tt> tree.
-If the <tt>CONFIG_RCU_FANOUT</tt> Kconfig option is not specified,
-it is set based on the word size of the system, which is also
-the Kconfig default.
-
-</p><p>The <tt>RCU_FANOUT_LEAF</tt> symbol controls how many CPUs are
-handled by each leaf <tt>rcu_node</tt> structure.
-Experience has shown that allowing a given leaf <tt>rcu_node</tt>
-structure to handle 64 CPUs, as permitted by the number of bits in
-the <tt>-&gt;qsmask</tt> field on a 64-bit system, results in
-excessive contention for the leaf <tt>rcu_node</tt> structures'
-<tt>-&gt;lock</tt> fields.
-The number of CPUs per leaf <tt>rcu_node</tt> structure is therefore
-limited to 16 given the default value of <tt>CONFIG_RCU_FANOUT_LEAF</tt>.
-If <tt>CONFIG_RCU_FANOUT_LEAF</tt> is unspecified, the value
-selected is based on the word size of the system, just as for
-<tt>CONFIG_RCU_FANOUT</tt>.
-Lines&nbsp;11-19 perform this computation.
-
-</p><p>Lines&nbsp;21-24 compute the maximum number of CPUs supported by
-a single-level (which contains a single <tt>rcu_node</tt> structure),
-two-level, three-level, and four-level <tt>rcu_node</tt> tree,
-respectively, given the fanout specified by <tt>RCU_FANOUT</tt>
-and <tt>RCU_FANOUT_LEAF</tt>.
-These numbers of CPUs are retained in the
-<tt>RCU_FANOUT_1</tt>,
-<tt>RCU_FANOUT_2</tt>,
-<tt>RCU_FANOUT_3</tt>, and
-<tt>RCU_FANOUT_4</tt>
-C-preprocessor variables, respectively.
-
-</p><p>These variables are used to control the C-preprocessor <tt>#if</tt>
-statement spanning lines&nbsp;26-66 that computes the number of
-<tt>rcu_node</tt> structures required for each level of the tree,
-as well as the number of levels required.
-The number of levels is placed in the <tt>NUM_RCU_LVLS</tt>
-C-preprocessor variable by lines&nbsp;27, 35, 44, and&nbsp;54.
-The number of <tt>rcu_node</tt> structures for the topmost level
-of the tree is always exactly one, and this value is unconditionally
-placed into <tt>NUM_RCU_LVL_0</tt> by lines&nbsp;28, 36, 45, and&nbsp;55.
-The rest of the levels (if any) of the <tt>rcu_node</tt> tree
-are computed by dividing the maximum number of CPUs by the
-fanout supported by the number of levels from the current level down,
-rounding up.  This computation is performed by lines&nbsp;37,
-46-47, and&nbsp;56-58.
-Lines&nbsp;31-33, 40-42, 50-52, and&nbsp;62-63 create initializers
-for lockdep lock-class names.
-Finally, lines&nbsp;64-66 produce an error if the maximum number of
-CPUs is too large for the specified fanout.
-
-<h3><a name="The rcu_segcblist Structure">
-The <tt>rcu_segcblist</tt> Structure</a></h3>
-
-The <tt>rcu_segcblist</tt> structure maintains a segmented list of
-callbacks as follows:
-
-<pre>
- 1 #define RCU_DONE_TAIL        0
- 2 #define RCU_WAIT_TAIL        1
- 3 #define RCU_NEXT_READY_TAIL  2
- 4 #define RCU_NEXT_TAIL        3
- 5 #define RCU_CBLIST_NSEGS     4
- 6
- 7 struct rcu_segcblist {
- 8   struct rcu_head *head;
- 9   struct rcu_head **tails[RCU_CBLIST_NSEGS];
-10   unsigned long gp_seq[RCU_CBLIST_NSEGS];
-11   long len;
-12   long len_lazy;
-13 };
-</pre>
-
-<p>
-The segments are as follows:
-
-<ol>
-<li>	<tt>RCU_DONE_TAIL</tt>: Callbacks whose grace periods have elapsed.
-	These callbacks are ready to be invoked.
-<li>	<tt>RCU_WAIT_TAIL</tt>: Callbacks that are waiting for the
-	current grace period.
-	Note that different CPUs can have different ideas about which
-	grace period is current, hence the <tt>-&gt;gp_seq</tt> field.
-<li>	<tt>RCU_NEXT_READY_TAIL</tt>: Callbacks waiting for the next
-	grace period to start.
-<li>	<tt>RCU_NEXT_TAIL</tt>: Callbacks that have not yet been
-	associated with a grace period.
-</ol>
-
-<p>
-The <tt>-&gt;head</tt> pointer references the first callback or
-is <tt>NULL</tt> if the list contains no callbacks (which is
-<i>not</i> the same as being empty).
-Each element of the <tt>-&gt;tails[]</tt> array references the
-<tt>-&gt;next</tt> pointer of the last callback in the corresponding
-segment of the list, or the list's <tt>-&gt;head</tt> pointer if
-that segment and all previous segments are empty.
-If the corresponding segment is empty but some previous segment is
-not empty, then the array element is identical to its predecessor.
-Older callbacks are closer to the head of the list, and new callbacks
-are added at the tail.
-This relationship between the <tt>-&gt;head</tt> pointer, the
-<tt>-&gt;tails[]</tt> array, and the callbacks is shown in this
-diagram:
-
-</p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%">
-
-</p><p>In this figure, the <tt>-&gt;head</tt> pointer references the
-first
-RCU callback in the list.
-The <tt>-&gt;tails[RCU_DONE_TAIL]</tt> array element references
-the <tt>-&gt;head</tt> pointer itself, indicating that none
-of the callbacks is ready to invoke.
-The <tt>-&gt;tails[RCU_WAIT_TAIL]</tt> array element references callback
-CB&nbsp;2's <tt>-&gt;next</tt> pointer, which indicates that
-CB&nbsp;1 and CB&nbsp;2 are both waiting on the current grace period,
-give or take possible disagreements about exactly which grace period
-is the current one.
-The <tt>-&gt;tails[RCU_NEXT_READY_TAIL]</tt> array element
-references the same RCU callback that <tt>-&gt;tails[RCU_WAIT_TAIL]</tt>
-does, which indicates that there are no callbacks waiting on the next
-RCU grace period.
-The <tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element references
-CB&nbsp;4's <tt>-&gt;next</tt> pointer, indicating that all the
-remaining RCU callbacks have not yet been assigned to an RCU grace
-period.
-Note that the <tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element
-always references the last RCU callback's <tt>-&gt;next</tt> pointer
-unless the callback list is empty, in which case it references
-the <tt>-&gt;head</tt> pointer.
-
-<p>
-There is one additional important special case for the
-<tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element: It can be <tt>NULL</tt>
-when this list is <i>disabled</i>.
-Lists are disabled when the corresponding CPU is offline or when
-the corresponding CPU's callbacks are offloaded to a kthread,
-both of which are described elsewhere.
-
-</p><p>CPUs advance their callbacks from the
-<tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the
-<tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments
-as grace periods advance.
-
-</p><p>The <tt>-&gt;gp_seq[]</tt> array records grace-period
-numbers corresponding to the list segments.
-This is what allows different CPUs to have different ideas as to
-which is the current grace period while still avoiding premature
-invocation of their callbacks.
-In particular, this allows CPUs that go idle for extended periods
-to determine which of their callbacks are ready to be invoked after
-reawakening.
-
-</p><p>The <tt>-&gt;len</tt> counter contains the number of
-callbacks in <tt>-&gt;head</tt>, and the
-<tt>-&gt;len_lazy</tt> contains the number of those callbacks that
-are known to only free memory, and whose invocation can therefore
-be safely deferred.
-
-<p><b>Important note</b>: It is the <tt>-&gt;len</tt> field that
-determines whether or not there are callbacks associated with
-this <tt>rcu_segcblist</tt> structure, <i>not</i> the <tt>-&gt;head</tt>
-pointer.
-The reason for this is that all the ready-to-invoke callbacks
-(that is, those in the <tt>RCU_DONE_TAIL</tt> segment) are extracted
-all at once at callback-invocation time (<tt>rcu_do_batch</tt>), due
-to which <tt>-&gt;head</tt> may be set to NULL if there are no not-done
-callbacks remaining in the <tt>rcu_segcblist</tt>.
-If callback invocation must be postponed, for example, because a
-high-priority process just woke up on this CPU, then the remaining
-callbacks are placed back on the <tt>RCU_DONE_TAIL</tt> segment and
-<tt>-&gt;head</tt> once again points to the start of the segment.
-In short, the head field can briefly be <tt>NULL</tt> even though the
-CPU has callbacks present the entire time.
-Therefore, it is not appropriate to test the <tt>-&gt;head</tt> pointer
-for <tt>NULL</tt>.
-
-<p>In contrast, the <tt>-&gt;len</tt> and <tt>-&gt;len_lazy</tt> counts
-are adjusted only after the corresponding callbacks have been invoked.
-This means that the <tt>-&gt;len</tt> count is zero only if
-the <tt>rcu_segcblist</tt> structure really is devoid of callbacks.
-Of course, off-CPU sampling of the <tt>-&gt;len</tt> count requires
-careful use of appropriate synchronization, for example, memory barriers.
-This synchronization can be a bit subtle, particularly in the case
-of <tt>rcu_barrier()</tt>.
-
-<h3><a name="The rcu_data Structure">
-The <tt>rcu_data</tt> Structure</a></h3>
-
-<p>The <tt>rcu_data</tt> maintains the per-CPU state for the RCU subsystem.
-The fields in this structure may be accessed only from the corresponding
-CPU (and from tracing) unless otherwise stated.
-This structure is the
-focus of quiescent-state detection and RCU callback queuing.
-It also tracks its relationship to the corresponding leaf
-<tt>rcu_node</tt> structure to allow more-efficient
-propagation of quiescent states up the <tt>rcu_node</tt>
-combining tree.
-Like the <tt>rcu_node</tt> structure, it provides a local
-copy of the grace-period information to allow for-free
-synchronized
-access to this information from the corresponding CPU.
-Finally, this structure records past dyntick-idle state
-for the corresponding CPU and also tracks statistics.
-
-</p><p>The <tt>rcu_data</tt> structure's fields are discussed,
-singly and in groups, in the following sections.
-
-<h5>Connection to Other Data Structures</h5>
-
-<p>This portion of the <tt>rcu_data</tt> structure is declared
-as follows:
-
-<pre>
-  1   int cpu;
-  2   struct rcu_node *mynode;
-  3   unsigned long grpmask;
-  4   bool beenonline;
-</pre>
-
-<p>The <tt>-&gt;cpu</tt> field contains the number of the
-corresponding CPU and the <tt>-&gt;mynode</tt> field references the
-corresponding <tt>rcu_node</tt> structure.
-The <tt>-&gt;mynode</tt> is used to propagate quiescent states
-up the combining tree.
-These two fields are constant and therefore do not require synchronization.
-
-<p>The <tt>-&gt;grpmask</tt> field indicates the bit in
-the <tt>-&gt;mynode-&gt;qsmask</tt> corresponding to this
-<tt>rcu_data</tt> structure, and is also used when propagating
-quiescent states.
-The <tt>-&gt;beenonline</tt> flag is set whenever the corresponding
-CPU comes online, which means that the debugfs tracing need not dump
-out any <tt>rcu_data</tt> structure for which this flag is not set.
-
-<h5>Quiescent-State and Grace-Period Tracking</h5>
-
-<p>This portion of the <tt>rcu_data</tt> structure is declared
-as follows:
-
-<pre>
-  1   unsigned long gp_seq;
-  2   unsigned long gp_seq_needed;
-  3   bool cpu_no_qs;
-  4   bool core_needs_qs;
-  5   bool gpwrap;
-</pre>
-
-<p>The <tt>-&gt;gp_seq</tt> field is the counterpart of the field of the same
-name in the <tt>rcu_state</tt> and <tt>rcu_node</tt> structures.  The
-<tt>-&gt;gp_seq_needed</tt> field is the counterpart of the field of the same
-name in the rcu_node</tt> structure.
-They may each lag up to one behind their <tt>rcu_node</tt>
-counterparts, but in <tt>CONFIG_NO_HZ_IDLE</tt> and
-<tt>CONFIG_NO_HZ_FULL</tt> kernels can lag
-arbitrarily far behind for CPUs in dyntick-idle mode (but these counters
-will catch up upon exit from dyntick-idle mode).
-If the lower two bits of a given <tt>rcu_data</tt> structure's
-<tt>-&gt;gp_seq</tt> are zero, then this <tt>rcu_data</tt>
-structure believes that RCU is idle.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	All this replication of the grace period numbers can only cause
-	massive confusion.
-	Why not just keep a global sequence number and be done with it???
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Because if there was only a single global sequence
-	numbers, there would need to be a single global lock to allow
-	safely accessing and updating it.
-	And if we are not going to have a single global lock, we need
-	to carefully manage the numbers on a per-node basis.
-	Recall from the answer to a previous Quick Quiz that the consequences
-	of applying a previously sampled quiescent state to the wrong
-	grace period are quite severe.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>The <tt>-&gt;cpu_no_qs</tt> flag indicates that the
-CPU has not yet passed through a quiescent state,
-while the <tt>-&gt;core_needs_qs</tt> flag indicates that the
-RCU core needs a quiescent state from the corresponding CPU.
-The <tt>-&gt;gpwrap</tt> field indicates that the corresponding
-CPU has remained idle for so long that the
-<tt>gp_seq</tt> counter is in danger of overflow, which
-will cause the CPU to disregard the values of its counters on
-its next exit from idle.
-
-<h5>RCU Callback Handling</h5>
-
-<p>In the absence of CPU-hotplug events, RCU callbacks are invoked by
-the same CPU that registered them.
-This is strictly a cache-locality optimization: callbacks can and
-do get invoked on CPUs other than the one that registered them.
-After all, if the CPU that registered a given callback has gone
-offline before the callback can be invoked, there really is no other
-choice.
-
-</p><p>This portion of the <tt>rcu_data</tt> structure is declared
-as follows:
-
-<pre>
- 1 struct rcu_segcblist cblist;
- 2 long qlen_last_fqs_check;
- 3 unsigned long n_cbs_invoked;
- 4 unsigned long n_nocbs_invoked;
- 5 unsigned long n_cbs_orphaned;
- 6 unsigned long n_cbs_adopted;
- 7 unsigned long n_force_qs_snap;
- 8 long blimit;
-</pre>
-
-<p>The <tt>-&gt;cblist</tt> structure is the segmented callback list
-described earlier.
-The CPU advances the callbacks in its <tt>rcu_data</tt> structure
-whenever it notices that another RCU grace period has completed.
-The CPU detects the completion of an RCU grace period by noticing
-that the value of its <tt>rcu_data</tt> structure's
-<tt>-&gt;gp_seq</tt> field differs from that of its leaf
-<tt>rcu_node</tt> structure.
-Recall that each <tt>rcu_node</tt> structure's
-<tt>-&gt;gp_seq</tt> field is updated at the beginnings and ends of each
-grace period.
-
-<p>
-The <tt>-&gt;qlen_last_fqs_check</tt> and
-<tt>-&gt;n_force_qs_snap</tt> coordinate the forcing of quiescent
-states from <tt>call_rcu()</tt> and friends when callback
-lists grow excessively long.
-
-</p><p>The <tt>-&gt;n_cbs_invoked</tt>,
-<tt>-&gt;n_cbs_orphaned</tt>, and <tt>-&gt;n_cbs_adopted</tt>
-fields count the number of callbacks invoked,
-sent to other CPUs when this CPU goes offline,
-and received from other CPUs when those other CPUs go offline.
-The <tt>-&gt;n_nocbs_invoked</tt> is used when the CPU's callbacks
-are offloaded to a kthread.
-
-<p>
-Finally, the <tt>-&gt;blimit</tt> counter is the maximum number of
-RCU callbacks that may be invoked at a given time.
-
-<h5>Dyntick-Idle Handling</h5>
-
-<p>This portion of the <tt>rcu_data</tt> structure is declared
-as follows:
-
-<pre>
-  1   int dynticks_snap;
-  2   unsigned long dynticks_fqs;
-</pre>
-
-The <tt>-&gt;dynticks_snap</tt> field is used to take a snapshot
-of the corresponding CPU's dyntick-idle state when forcing
-quiescent states, and is therefore accessed from other CPUs.
-Finally, the <tt>-&gt;dynticks_fqs</tt> field is used to
-count the number of times this CPU is determined to be in
-dyntick-idle state, and is used for tracing and debugging purposes.
-
-<p>
-This portion of the rcu_data structure is declared as follows:
-
-<pre>
-  1   long dynticks_nesting;
-  2   long dynticks_nmi_nesting;
-  3   atomic_t dynticks;
-  4   bool rcu_need_heavy_qs;
-  5   bool rcu_urgent_qs;
-</pre>
-
-<p>These fields in the rcu_data structure maintain the per-CPU dyntick-idle
-state for the corresponding CPU.
-The fields may be accessed only from the corresponding CPU (and from tracing)
-unless otherwise stated.
-
-<p>The <tt>-&gt;dynticks_nesting</tt> field counts the
-nesting depth of process execution, so that in normal circumstances
-this counter has value zero or one.
-NMIs, irqs, and tracers are counted by the <tt>-&gt;dynticks_nmi_nesting</tt>
-field.
-Because NMIs cannot be masked, changes to this variable have to be
-undertaken carefully using an algorithm provided by Andy Lutomirski.
-The initial transition from idle adds one, and nested transitions
-add two, so that a nesting level of five is represented by a
-<tt>-&gt;dynticks_nmi_nesting</tt> value of nine.
-This counter can therefore be thought of as counting the number
-of reasons why this CPU cannot be permitted to enter dyntick-idle
-mode, aside from process-level transitions.
-
-<p>However, it turns out that when running in non-idle kernel context,
-the Linux kernel is fully capable of entering interrupt handlers that
-never exit and perhaps also vice versa.
-Therefore, whenever the <tt>-&gt;dynticks_nesting</tt> field is
-incremented up from zero, the <tt>-&gt;dynticks_nmi_nesting</tt> field
-is set to a large positive number, and whenever the
-<tt>-&gt;dynticks_nesting</tt> field is decremented down to zero,
-the the <tt>-&gt;dynticks_nmi_nesting</tt> field is set to zero.
-Assuming that the number of misnested interrupts is not sufficient
-to overflow the counter, this approach corrects the
-<tt>-&gt;dynticks_nmi_nesting</tt> field every time the corresponding
-CPU enters the idle loop from process context.
-
-</p><p>The <tt>-&gt;dynticks</tt> field counts the corresponding
-CPU's transitions to and from either dyntick-idle or user mode, so
-that this counter has an even value when the CPU is in dyntick-idle
-mode or user mode and an odd value otherwise. The transitions to/from
-user mode need to be counted for user mode adaptive-ticks support
-(see timers/NO_HZ.txt).
-
-</p><p>The <tt>-&gt;rcu_need_heavy_qs</tt> field is used
-to record the fact that the RCU core code would really like to
-see a quiescent state from the corresponding CPU, so much so that
-it is willing to call for heavy-weight dyntick-counter operations.
-This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
-code, which provide a momentary idle sojourn in response.
-
-</p><p>Finally, the <tt>-&gt;rcu_urgent_qs</tt> field is used to record
-the fact that the RCU core code would really like to see a quiescent state from
-the corresponding CPU, with the various other fields indicating just how badly
-RCU wants this quiescent state.
-This flag is checked by RCU's context-switch path
-(<tt>rcu_note_context_switch</tt>) and the cond_resched code.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Why not simply combine the <tt>-&gt;dynticks_nesting</tt>
-	and <tt>-&gt;dynticks_nmi_nesting</tt> counters into a
-	single counter that just counts the number of reasons that
-	the corresponding CPU is non-idle?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Because this would fail in the presence of interrupts whose
-	handlers never return and of handlers that manage to return
-	from a made-up interrupt.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>Additional fields are present for some special-purpose
-builds, and are discussed separately.
-
-<h3><a name="The rcu_head Structure">
-The <tt>rcu_head</tt> Structure</a></h3>
-
-<p>Each <tt>rcu_head</tt> structure represents an RCU callback.
-These structures are normally embedded within RCU-protected data
-structures whose algorithms use asynchronous grace periods.
-In contrast, when using algorithms that block waiting for RCU grace periods,
-RCU users need not provide <tt>rcu_head</tt> structures.
-
-</p><p>The <tt>rcu_head</tt> structure has fields as follows:
-
-<pre>
-  1   struct rcu_head *next;
-  2   void (*func)(struct rcu_head *head);
-</pre>
-
-<p>The <tt>-&gt;next</tt> field is used
-to link the <tt>rcu_head</tt> structures together in the
-lists within the <tt>rcu_data</tt> structures.
-The <tt>-&gt;func</tt> field is a pointer to the function
-to be called when the callback is ready to be invoked, and
-this function is passed a pointer to the <tt>rcu_head</tt>
-structure.
-However, <tt>kfree_rcu()</tt> uses the <tt>-&gt;func</tt>
-field to record the offset of the <tt>rcu_head</tt>
-structure within the enclosing RCU-protected data structure.
-
-</p><p>Both of these fields are used internally by RCU.
-From the viewpoint of RCU users, this structure is an
-opaque &ldquo;cookie&rdquo;.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Given that the callback function <tt>-&gt;func</tt>
-	is passed a pointer to the <tt>rcu_head</tt> structure,
-	how is that function supposed to find the beginning of the
-	enclosing RCU-protected data structure?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	In actual practice, there is a separate callback function per
-	type of RCU-protected data structure.
-	The callback function can therefore use the <tt>container_of()</tt>
-	macro in the Linux kernel (or other pointer-manipulation facilities
-	in other software environments) to find the beginning of the
-	enclosing structure.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="RCU-Specific Fields in the task_struct Structure">
-RCU-Specific Fields in the <tt>task_struct</tt> Structure</a></h3>
-
-<p>The <tt>CONFIG_PREEMPT_RCU</tt> implementation uses some
-additional fields in the <tt>task_struct</tt> structure:
-
-<pre>
- 1 #ifdef CONFIG_PREEMPT_RCU
- 2   int rcu_read_lock_nesting;
- 3   union rcu_special rcu_read_unlock_special;
- 4   struct list_head rcu_node_entry;
- 5   struct rcu_node *rcu_blocked_node;
- 6 #endif /* #ifdef CONFIG_PREEMPT_RCU */
- 7 #ifdef CONFIG_TASKS_RCU
- 8   unsigned long rcu_tasks_nvcsw;
- 9   bool rcu_tasks_holdout;
-10   struct list_head rcu_tasks_holdout_list;
-11   int rcu_tasks_idle_cpu;
-12 #endif /* #ifdef CONFIG_TASKS_RCU */
-</pre>
-
-<p>The <tt>-&gt;rcu_read_lock_nesting</tt> field records the
-nesting level for RCU read-side critical sections, and
-the <tt>-&gt;rcu_read_unlock_special</tt> field is a bitmask
-that records special conditions that require <tt>rcu_read_unlock()</tt>
-to do additional work.
-The <tt>-&gt;rcu_node_entry</tt> field is used to form lists of
-tasks that have blocked within preemptible-RCU read-side critical
-sections and the <tt>-&gt;rcu_blocked_node</tt> field references
-the <tt>rcu_node</tt> structure whose list this task is a member of,
-or <tt>NULL</tt> if it is not blocked within a preemptible-RCU
-read-side critical section.
-
-<p>The <tt>-&gt;rcu_tasks_nvcsw</tt> field tracks the number of
-voluntary context switches that this task had undergone at the
-beginning of the current tasks-RCU grace period,
-<tt>-&gt;rcu_tasks_holdout</tt> is set if the current tasks-RCU
-grace period is waiting on this task, <tt>-&gt;rcu_tasks_holdout_list</tt>
-is a list element enqueuing this task on the holdout list,
-and <tt>-&gt;rcu_tasks_idle_cpu</tt> tracks which CPU this
-idle task is running, but only if the task is currently running,
-that is, if the CPU is currently idle.
-
-<h3><a name="Accessor Functions">
-Accessor Functions</a></h3>
-
-<p>The following listing shows the
-<tt>rcu_get_root()</tt>, <tt>rcu_for_each_node_breadth_first</tt> and
-<tt>rcu_for_each_leaf_node()</tt> function and macros:
-
-<pre>
-  1 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
-  2 {
-  3   return &amp;rsp-&gt;node[0];
-  4 }
-  5
-  6 #define rcu_for_each_node_breadth_first(rsp, rnp) \
-  7   for ((rnp) = &amp;(rsp)-&gt;node[0]; \
-  8        (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
-  9
- 10 #define rcu_for_each_leaf_node(rsp, rnp) \
- 11   for ((rnp) = (rsp)-&gt;level[NUM_RCU_LVLS - 1]; \
- 12        (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
-</pre>
-
-<p>The <tt>rcu_get_root()</tt> simply returns a pointer to the
-first element of the specified <tt>rcu_state</tt> structure's
-<tt>-&gt;node[]</tt> array, which is the root <tt>rcu_node</tt>
-structure.
-
-</p><p>As noted earlier, the <tt>rcu_for_each_node_breadth_first()</tt>
-macro takes advantage of the layout of the <tt>rcu_node</tt>
-structures in the <tt>rcu_state</tt> structure's
-<tt>-&gt;node[]</tt> array, performing a breadth-first traversal by
-simply traversing the array in order.
-Similarly, the <tt>rcu_for_each_leaf_node()</tt> macro traverses only
-the last part of the array, thus traversing only the leaf
-<tt>rcu_node</tt> structures.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	What does
-	<tt>rcu_for_each_leaf_node()</tt> do if the <tt>rcu_node</tt> tree
-	contains only a single node?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	In the single-node case,
-	<tt>rcu_for_each_leaf_node()</tt> traverses the single node.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="Summary">
-Summary</a></h3>
-
-So the state of RCU is represented by an <tt>rcu_state</tt> structure,
-which contains a combining tree of <tt>rcu_node</tt> and
-<tt>rcu_data</tt> structures.
-Finally, in <tt>CONFIG_NO_HZ_IDLE</tt> kernels, each CPU's dyntick-idle
-state is tracked by dynticks-related fields in the <tt>rcu_data</tt> structure.
-
-If you made it this far, you are well prepared to read the code
-walkthroughs in the other articles in this series.
-
-<h3><a name="Acknowledgments">
-Acknowledgments</a></h3>
-
-I owe thanks to Cyrill Gorcunov, Mathieu Desnoyers, Dhaval Giani, Paul
-Turner, Abhishek Srivastava, Matt Kowalczyk, and Serge Hallyn
-for helping me get this document into a more human-readable state.
-
-<h3><a name="Legal Statement">
-Legal Statement</a></h3>
-
-<p>This work represents the view of the author and does not necessarily
-represent the view of IBM.
-
-</p><p>Linux is a registered trademark of Linus Torvalds.
-
-</p><p>Other company, product, and service names may be trademarks or
-service marks of others.
-
-</body></html>
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
new file mode 100644
index 000000000000..4a48e20a46f2
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
@@ -0,0 +1,1163 @@
+===================================================
+A Tour Through TREE_RCU's Data Structures [LWN.net]
+===================================================
+
+December 18, 2016
+
+This article was contributed by Paul E. McKenney
+
+Introduction
+============
+
+This document describes RCU's major data structures and their relationship
+to each other.
+
+Data-Structure Relationships
+============================
+
+RCU is for all intents and purposes a large state machine, and its
+data structures maintain the state in such a way as to allow RCU readers
+to execute extremely quickly, while also processing the RCU grace periods
+requested by updaters in an efficient and extremely scalable fashion.
+The efficiency and scalability of RCU updaters is provided primarily
+by a combining tree, as shown below:
+
+.. kernel-figure:: BigTreeClassicRCU.svg
+
+This diagram shows an enclosing ``rcu_state`` structure containing a tree
+of ``rcu_node`` structures. Each leaf node of the ``rcu_node`` tree has up
+to 16 ``rcu_data`` structures associated with it, so that there are
+``NR_CPUS`` number of ``rcu_data`` structures, one for each possible CPU.
+This structure is adjusted at boot time, if needed, to handle the common
+case where ``nr_cpu_ids`` is much less than ``NR_CPUs``.
+For example, a number of Linux distributions set ``NR_CPUs=4096``,
+which results in a three-level ``rcu_node`` tree.
+If the actual hardware has only 16 CPUs, RCU will adjust itself
+at boot time, resulting in an ``rcu_node`` tree with only a single node.
+
+The purpose of this combining tree is to allow per-CPU events
+such as quiescent states, dyntick-idle transitions,
+and CPU hotplug operations to be processed efficiently
+and scalably.
+Quiescent states are recorded by the per-CPU ``rcu_data`` structures,
+and other events are recorded by the leaf-level ``rcu_node``
+structures.
+All of these events are combined at each level of the tree until finally
+grace periods are completed at the tree's root ``rcu_node``
+structure.
+A grace period can be completed at the root once every CPU
+(or, in the case of ``CONFIG_PREEMPT_RCU``, task)
+has passed through a quiescent state.
+Once a grace period has completed, record of that fact is propagated
+back down the tree.
+
+As can be seen from the diagram, on a 64-bit system
+a two-level tree with 64 leaves can accommodate 1,024 CPUs, with a fanout
+of 64 at the root and a fanout of 16 at the leaves.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Why isn't the fanout at the leaves also 64?                           |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Because there are more types of events that affect the leaf-level     |
+| ``rcu_node`` structures than further up the tree. Therefore, if the   |
+| leaf ``rcu_node`` structures have fanout of 64, the contention on     |
+| these structures' ``->structures`` becomes excessive. Experimentation |
+| on a wide variety of systems has shown that a fanout of 16 works well |
+| for the leaves of the ``rcu_node`` tree.                              |
+|                                                                       |
+| Of course, further experience with systems having hundreds or         |
+| thousands of CPUs may demonstrate that the fanout for the non-leaf    |
+| ``rcu_node`` structures must also be reduced. Such reduction can be   |
+| easily carried out when and if it proves necessary. In the meantime,  |
+| if you are using such a system and running into contention problems   |
+| on the non-leaf ``rcu_node`` structures, you may use the              |
+| ``CONFIG_RCU_FANOUT`` kernel configuration parameter to reduce the    |
+| non-leaf fanout as needed.                                            |
+|                                                                       |
+| Kernels built for systems with strong NUMA characteristics might      |
+| also need to adjust ``CONFIG_RCU_FANOUT`` so that the domains of      |
+| the ``rcu_node`` structures align with hardware boundaries.           |
+| However, there has thus far been no need for this.                    |
++-----------------------------------------------------------------------+
+
+If your system has more than 1,024 CPUs (or more than 512 CPUs on a
+32-bit system), then RCU will automatically add more levels to the tree.
+For example, if you are crazy enough to build a 64-bit system with
+65,536 CPUs, RCU would configure the ``rcu_node`` tree as follows:
+
+.. kernel-figure:: HugeTreeClassicRCU.svg
+
+RCU currently permits up to a four-level tree, which on a 64-bit system
+accommodates up to 4,194,304 CPUs, though only a mere 524,288 CPUs for
+32-bit systems. On the other hand, you can set both
+``CONFIG_RCU_FANOUT`` and ``CONFIG_RCU_FANOUT_LEAF`` to be as small as
+2, which would result in a 16-CPU test using a 4-level tree. This can be
+useful for testing large-system capabilities on small test machines.
+
+This multi-level combining tree allows us to get most of the performance
+and scalability benefits of partitioning, even though RCU grace-period
+detection is inherently a global operation. The trick here is that only
+the last CPU to report a quiescent state into a given ``rcu_node``
+structure need advance to the ``rcu_node`` structure at the next level
+up the tree. This means that at the leaf-level ``rcu_node`` structure,
+only one access out of sixteen will progress up the tree. For the
+internal ``rcu_node`` structures, the situation is even more extreme:
+Only one access out of sixty-four will progress up the tree. Because the
+vast majority of the CPUs do not progress up the tree, the lock
+contention remains roughly constant up the tree. No matter how many CPUs
+there are in the system, at most 64 quiescent-state reports per grace
+period will progress all the way to the root ``rcu_node`` structure,
+thus ensuring that the lock contention on that root ``rcu_node``
+structure remains acceptably low.
+
+In effect, the combining tree acts like a big shock absorber, keeping
+lock contention under control at all tree levels regardless of the level
+of loading on the system.
+
+RCU updaters wait for normal grace periods by registering RCU callbacks,
+either directly via ``call_rcu()`` or indirectly via
+``synchronize_rcu()`` and friends. RCU callbacks are represented by
+``rcu_head`` structures, which are queued on ``rcu_data`` structures
+while they are waiting for a grace period to elapse, as shown in the
+following figure:
+
+.. kernel-figure:: BigTreePreemptRCUBHdyntickCB.svg
+
+This figure shows how ``TREE_RCU``'s and ``PREEMPT_RCU``'s major data
+structures are related. Lesser data structures will be introduced with
+the algorithms that make use of them.
+
+Note that each of the data structures in the above figure has its own
+synchronization:
+
+#. Each ``rcu_state`` structures has a lock and a mutex, and some fields
+   are protected by the corresponding root ``rcu_node`` structure's lock.
+#. Each ``rcu_node`` structure has a spinlock.
+#. The fields in ``rcu_data`` are private to the corresponding CPU,
+   although a few can be read and written by other CPUs.
+
+It is important to note that different data structures can have very
+different ideas about the state of RCU at any given time. For but one
+example, awareness of the start or end of a given RCU grace period
+propagates slowly through the data structures. This slow propagation is
+absolutely necessary for RCU to have good read-side performance. If this
+balkanized implementation seems foreign to you, one useful trick is to
+consider each instance of these data structures to be a different
+person, each having the usual slightly different view of reality.
+
+The general role of each of these data structures is as follows:
+
+#. ``rcu_state``: This structure forms the interconnection between the
+   ``rcu_node`` and ``rcu_data`` structures, tracks grace periods,
+   serves as short-term repository for callbacks orphaned by CPU-hotplug
+   events, maintains ``rcu_barrier()`` state, tracks expedited
+   grace-period state, and maintains state used to force quiescent
+   states when grace periods extend too long,
+#. ``rcu_node``: This structure forms the combining tree that propagates
+   quiescent-state information from the leaves to the root, and also
+   propagates grace-period information from the root to the leaves. It
+   provides local copies of the grace-period state in order to allow
+   this information to be accessed in a synchronized manner without
+   suffering the scalability limitations that would otherwise be imposed
+   by global locking. In ``CONFIG_PREEMPT_RCU`` kernels, it manages the
+   lists of tasks that have blocked while in their current RCU read-side
+   critical section. In ``CONFIG_PREEMPT_RCU`` with
+   ``CONFIG_RCU_BOOST``, it manages the per-\ ``rcu_node``
+   priority-boosting kernel threads (kthreads) and state. Finally, it
+   records CPU-hotplug state in order to determine which CPUs should be
+   ignored during a given grace period.
+#. ``rcu_data``: This per-CPU structure is the focus of quiescent-state
+   detection and RCU callback queuing. It also tracks its relationship
+   to the corresponding leaf ``rcu_node`` structure to allow
+   more-efficient propagation of quiescent states up the ``rcu_node``
+   combining tree. Like the ``rcu_node`` structure, it provides a local
+   copy of the grace-period information to allow for-free synchronized
+   access to this information from the corresponding CPU. Finally, this
+   structure records past dyntick-idle state for the corresponding CPU
+   and also tracks statistics.
+#. ``rcu_head``: This structure represents RCU callbacks, and is the
+   only structure allocated and managed by RCU users. The ``rcu_head``
+   structure is normally embedded within the RCU-protected data
+   structure.
+
+If all you wanted from this article was a general notion of how RCU's
+data structures are related, you are done. Otherwise, each of the
+following sections give more details on the ``rcu_state``, ``rcu_node``
+and ``rcu_data`` data structures.
+
+The ``rcu_state`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``rcu_state`` structure is the base structure that represents the
+state of RCU in the system. This structure forms the interconnection
+between the ``rcu_node`` and ``rcu_data`` structures, tracks grace
+periods, contains the lock used to synchronize with CPU-hotplug events,
+and maintains state used to force quiescent states when grace periods
+extend too long,
+
+A few of the ``rcu_state`` structure's fields are discussed, singly and
+in groups, in the following sections. The more specialized fields are
+covered in the discussion of their use.
+
+Relationship to rcu_node and rcu_data Structures
+''''''''''''''''''''''''''''''''''''''''''''''''
+
+This portion of the ``rcu_state`` structure is declared as follows:
+
+::
+
+     1   struct rcu_node node[NUM_RCU_NODES];
+     2   struct rcu_node *level[NUM_RCU_LVLS + 1];
+     3   struct rcu_data __percpu *rda;
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Wait a minute! You said that the ``rcu_node`` structures formed a     |
+| tree, but they are declared as a flat array! What gives?              |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| The tree is laid out in the array. The first node In the array is the |
+| head, the next set of nodes in the array are children of the head     |
+| node, and so on until the last set of nodes in the array are the      |
+| leaves.                                                               |
+| See the following diagrams to see how this works.                     |
++-----------------------------------------------------------------------+
+
+The ``rcu_node`` tree is embedded into the ``->node[]`` array as shown
+in the following figure:
+
+.. kernel-figure:: TreeMapping.svg
+
+One interesting consequence of this mapping is that a breadth-first
+traversal of the tree is implemented as a simple linear scan of the
+array, which is in fact what the ``rcu_for_each_node_breadth_first()``
+macro does. This macro is used at the beginning and ends of grace
+periods.
+
+Each entry of the ``->level`` array references the first ``rcu_node``
+structure on the corresponding level of the tree, for example, as shown
+below:
+
+.. kernel-figure:: TreeMappingLevel.svg
+
+The zero\ :sup:`th` element of the array references the root
+``rcu_node`` structure, the first element references the first child of
+the root ``rcu_node``, and finally the second element references the
+first leaf ``rcu_node`` structure.
+
+For whatever it is worth, if you draw the tree to be tree-shaped rather
+than array-shaped, it is easy to draw a planar representation:
+
+.. kernel-figure:: TreeLevel.svg
+
+Finally, the ``->rda`` field references a per-CPU pointer to the
+corresponding CPU's ``rcu_data`` structure.
+
+All of these fields are constant once initialization is complete, and
+therefore need no protection.
+
+Grace-Period Tracking
+'''''''''''''''''''''
+
+This portion of the ``rcu_state`` structure is declared as follows:
+
+::
+
+     1   unsigned long gp_seq;
+
+RCU grace periods are numbered, and the ``->gp_seq`` field contains the
+current grace-period sequence number. The bottom two bits are the state
+of the current grace period, which can be zero for not yet started or
+one for in progress. In other words, if the bottom two bits of
+``->gp_seq`` are zero, then RCU is idle. Any other value in the bottom
+two bits indicates that something is broken. This field is protected by
+the root ``rcu_node`` structure's ``->lock`` field.
+
+There are ``->gp_seq`` fields in the ``rcu_node`` and ``rcu_data``
+structures as well. The fields in the ``rcu_state`` structure represent
+the most current value, and those of the other structures are compared
+in order to detect the beginnings and ends of grace periods in a
+distributed fashion. The values flow from ``rcu_state`` to ``rcu_node``
+(down the tree from the root to the leaves) to ``rcu_data``.
+
+Miscellaneous
+'''''''''''''
+
+This portion of the ``rcu_state`` structure is declared as follows:
+
+::
+
+     1   unsigned long gp_max;
+     2   char abbr;
+     3   char *name;
+
+The ``->gp_max`` field tracks the duration of the longest grace period
+in jiffies. It is protected by the root ``rcu_node``'s ``->lock``.
+
+The ``->name`` and ``->abbr`` fields distinguish between preemptible RCU
+(“rcu_preempt” and “p”) and non-preemptible RCU (“rcu_sched” and “s”).
+These fields are used for diagnostic and tracing purposes.
+
+The ``rcu_node`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``rcu_node`` structures form the combining tree that propagates
+quiescent-state information from the leaves to the root and also that
+propagates grace-period information from the root down to the leaves.
+They provides local copies of the grace-period state in order to allow
+this information to be accessed in a synchronized manner without
+suffering the scalability limitations that would otherwise be imposed by
+global locking. In ``CONFIG_PREEMPT_RCU`` kernels, they manage the lists
+of tasks that have blocked while in their current RCU read-side critical
+section. In ``CONFIG_PREEMPT_RCU`` with ``CONFIG_RCU_BOOST``, they
+manage the per-\ ``rcu_node`` priority-boosting kernel threads
+(kthreads) and state. Finally, they record CPU-hotplug state in order to
+determine which CPUs should be ignored during a given grace period.
+
+The ``rcu_node`` structure's fields are discussed, singly and in groups,
+in the following sections.
+
+Connection to Combining Tree
+''''''''''''''''''''''''''''
+
+This portion of the ``rcu_node`` structure is declared as follows:
+
+::
+
+     1   struct rcu_node *parent;
+     2   u8 level;
+     3   u8 grpnum;
+     4   unsigned long grpmask;
+     5   int grplo;
+     6   int grphi;
+
+The ``->parent`` pointer references the ``rcu_node`` one level up in the
+tree, and is ``NULL`` for the root ``rcu_node``. The RCU implementation
+makes heavy use of this field to push quiescent states up the tree. The
+``->level`` field gives the level in the tree, with the root being at
+level zero, its children at level one, and so on. The ``->grpnum`` field
+gives this node's position within the children of its parent, so this
+number can range between 0 and 31 on 32-bit systems and between 0 and 63
+on 64-bit systems. The ``->level`` and ``->grpnum`` fields are used only
+during initialization and for tracing. The ``->grpmask`` field is the
+bitmask counterpart of ``->grpnum``, and therefore always has exactly
+one bit set. This mask is used to clear the bit corresponding to this
+``rcu_node`` structure in its parent's bitmasks, which are described
+later. Finally, the ``->grplo`` and ``->grphi`` fields contain the
+lowest and highest numbered CPU served by this ``rcu_node`` structure,
+respectively.
+
+All of these fields are constant, and thus do not require any
+synchronization.
+
+Synchronization
+'''''''''''''''
+
+This field of the ``rcu_node`` structure is declared as follows:
+
+::
+
+     1   raw_spinlock_t lock;
+
+This field is used to protect the remaining fields in this structure,
+unless otherwise stated. That said, all of the fields in this structure
+can be accessed without locking for tracing purposes. Yes, this can
+result in confusing traces, but better some tracing confusion than to be
+heisenbugged out of existence.
+
+.. _grace-period-tracking-1:
+
+Grace-Period Tracking
+'''''''''''''''''''''
+
+This portion of the ``rcu_node`` structure is declared as follows:
+
+::
+
+     1   unsigned long gp_seq;
+     2   unsigned long gp_seq_needed;
+
+The ``rcu_node`` structures' ``->gp_seq`` fields are the counterparts of
+the field of the same name in the ``rcu_state`` structure. They each may
+lag up to one step behind their ``rcu_state`` counterpart. If the bottom
+two bits of a given ``rcu_node`` structure's ``->gp_seq`` field is zero,
+then this ``rcu_node`` structure believes that RCU is idle.
+
+The ``>gp_seq`` field of each ``rcu_node`` structure is updated at the
+beginning and the end of each grace period.
+
+The ``->gp_seq_needed`` fields record the furthest-in-the-future grace
+period request seen by the corresponding ``rcu_node`` structure. The
+request is considered fulfilled when the value of the ``->gp_seq`` field
+equals or exceeds that of the ``->gp_seq_needed`` field.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Suppose that this ``rcu_node`` structure doesn't see a request for a  |
+| very long time. Won't wrapping of the ``->gp_seq`` field cause        |
+| problems?                                                             |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| No, because if the ``->gp_seq_needed`` field lags behind the          |
+| ``->gp_seq`` field, the ``->gp_seq_needed`` field will be updated at  |
+| the end of the grace period. Modulo-arithmetic comparisons therefore  |
+| will always get the correct answer, even with wrapping.               |
++-----------------------------------------------------------------------+
+
+Quiescent-State Tracking
+''''''''''''''''''''''''
+
+These fields manage the propagation of quiescent states up the combining
+tree.
+
+This portion of the ``rcu_node`` structure has fields as follows:
+
+::
+
+     1   unsigned long qsmask;
+     2   unsigned long expmask;
+     3   unsigned long qsmaskinit;
+     4   unsigned long expmaskinit;
+
+The ``->qsmask`` field tracks which of this ``rcu_node`` structure's
+children still need to report quiescent states for the current normal
+grace period. Such children will have a value of 1 in their
+corresponding bit. Note that the leaf ``rcu_node`` structures should be
+thought of as having ``rcu_data`` structures as their children.
+Similarly, the ``->expmask`` field tracks which of this ``rcu_node``
+structure's children still need to report quiescent states for the
+current expedited grace period. An expedited grace period has the same
+conceptual properties as a normal grace period, but the expedited
+implementation accepts extreme CPU overhead to obtain much lower
+grace-period latency, for example, consuming a few tens of microseconds
+worth of CPU time to reduce grace-period duration from milliseconds to
+tens of microseconds. The ``->qsmaskinit`` field tracks which of this
+``rcu_node`` structure's children cover for at least one online CPU.
+This mask is used to initialize ``->qsmask``, and ``->expmaskinit`` is
+used to initialize ``->expmask`` and the beginning of the normal and
+expedited grace periods, respectively.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Why are these bitmasks protected by locking? Come on, haven't you     |
+| heard of atomic instructions???                                       |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Lockless grace-period computation! Such a tantalizing possibility!    |
+| But consider the following sequence of events:                        |
+|                                                                       |
+| #. CPU 0 has been in dyntick-idle mode for quite some time. When it   |
+|    wakes up, it notices that the current RCU grace period needs it to |
+|    report in, so it sets a flag where the scheduling clock interrupt  |
+|    will find it.                                                      |
+| #. Meanwhile, CPU 1 is running ``force_quiescent_state()``, and       |
+|    notices that CPU 0 has been in dyntick idle mode, which qualifies  |
+|    as an extended quiescent state.                                    |
+| #. CPU 0's scheduling clock interrupt fires in the middle of an RCU   |
+|    read-side critical section, and notices that the RCU core needs    |
+|    something, so commences RCU softirq processing.                    |
+| #. CPU 0's softirq handler executes and is just about ready to report |
+|    its quiescent state up the ``rcu_node`` tree.                      |
+| #. But CPU 1 beats it to the punch, completing the current grace      |
+|    period and starting a new one.                                     |
+| #. CPU 0 now reports its quiescent state for the wrong grace period.  |
+|    That grace period might now end before the RCU read-side critical  |
+|    section. If that happens, disaster will ensue.                     |
+|                                                                       |
+| So the locking is absolutely required in order to coordinate clearing |
+| of the bits with updating of the grace-period sequence number in      |
+| ``->gp_seq``.                                                         |
++-----------------------------------------------------------------------+
+
+Blocked-Task Management
+'''''''''''''''''''''''
+
+``PREEMPT_RCU`` allows tasks to be preempted in the midst of their RCU
+read-side critical sections, and these tasks must be tracked explicitly.
+The details of exactly why and how they are tracked will be covered in a
+separate article on RCU read-side processing. For now, it is enough to
+know that the ``rcu_node`` structure tracks them.
+
+::
+
+     1   struct list_head blkd_tasks;
+     2   struct list_head *gp_tasks;
+     3   struct list_head *exp_tasks;
+     4   bool wait_blkd_tasks;
+
+The ``->blkd_tasks`` field is a list header for the list of blocked and
+preempted tasks. As tasks undergo context switches within RCU read-side
+critical sections, their ``task_struct`` structures are enqueued (via
+the ``task_struct``'s ``->rcu_node_entry`` field) onto the head of the
+``->blkd_tasks`` list for the leaf ``rcu_node`` structure corresponding
+to the CPU on which the outgoing context switch executed. As these tasks
+later exit their RCU read-side critical sections, they remove themselves
+from the list. This list is therefore in reverse time order, so that if
+one of the tasks is blocking the current grace period, all subsequent
+tasks must also be blocking that same grace period. Therefore, a single
+pointer into this list suffices to track all tasks blocking a given
+grace period. That pointer is stored in ``->gp_tasks`` for normal grace
+periods and in ``->exp_tasks`` for expedited grace periods. These last
+two fields are ``NULL`` if either there is no grace period in flight or
+if there are no blocked tasks preventing that grace period from
+completing. If either of these two pointers is referencing a task that
+removes itself from the ``->blkd_tasks`` list, then that task must
+advance the pointer to the next task on the list, or set the pointer to
+``NULL`` if there are no subsequent tasks on the list.
+
+For example, suppose that tasks T1, T2, and T3 are all hard-affinitied
+to the largest-numbered CPU in the system. Then if task T1 blocked in an
+RCU read-side critical section, then an expedited grace period started,
+then task T2 blocked in an RCU read-side critical section, then a normal
+grace period started, and finally task 3 blocked in an RCU read-side
+critical section, then the state of the last leaf ``rcu_node``
+structure's blocked-task list would be as shown below:
+
+.. kernel-figure:: blkd_task.svg
+
+Task T1 is blocking both grace periods, task T2 is blocking only the
+normal grace period, and task T3 is blocking neither grace period. Note
+that these tasks will not remove themselves from this list immediately
+upon resuming execution. They will instead remain on the list until they
+execute the outermost ``rcu_read_unlock()`` that ends their RCU
+read-side critical section.
+
+The ``->wait_blkd_tasks`` field indicates whether or not the current
+grace period is waiting on a blocked task.
+
+Sizing the ``rcu_node`` Array
+'''''''''''''''''''''''''''''
+
+The ``rcu_node`` array is sized via a series of C-preprocessor
+expressions as follows:
+
+::
+
+    1 #ifdef CONFIG_RCU_FANOUT
+    2 #define RCU_FANOUT CONFIG_RCU_FANOUT
+    3 #else
+    4 # ifdef CONFIG_64BIT
+    5 # define RCU_FANOUT 64
+    6 # else
+    7 # define RCU_FANOUT 32
+    8 # endif
+    9 #endif
+   10
+   11 #ifdef CONFIG_RCU_FANOUT_LEAF
+   12 #define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
+   13 #else
+   14 # ifdef CONFIG_64BIT
+   15 # define RCU_FANOUT_LEAF 64
+   16 # else
+   17 # define RCU_FANOUT_LEAF 32
+   18 # endif
+   19 #endif
+   20
+   21 #define RCU_FANOUT_1        (RCU_FANOUT_LEAF)
+   22 #define RCU_FANOUT_2        (RCU_FANOUT_1 * RCU_FANOUT)
+   23 #define RCU_FANOUT_3        (RCU_FANOUT_2 * RCU_FANOUT)
+   24 #define RCU_FANOUT_4        (RCU_FANOUT_3 * RCU_FANOUT)
+   25
+   26 #if NR_CPUS <= RCU_FANOUT_1
+   27 #  define RCU_NUM_LVLS        1
+   28 #  define NUM_RCU_LVL_0        1
+   29 #  define NUM_RCU_NODES        NUM_RCU_LVL_0
+   30 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
+   31 #  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
+   32 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
+   33 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0" }
+   34 #elif NR_CPUS <= RCU_FANOUT_2
+   35 #  define RCU_NUM_LVLS        2
+   36 #  define NUM_RCU_LVL_0        1
+   37 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+   38 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
+   39 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
+   40 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
+   41 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
+   42 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1" }
+   43 #elif NR_CPUS <= RCU_FANOUT_3
+   44 #  define RCU_NUM_LVLS        3
+   45 #  define NUM_RCU_LVL_0        1
+   46 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+   47 #  define NUM_RCU_LVL_2        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+   48 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
+   49 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
+   50 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
+   51 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
+   52 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
+   53 #elif NR_CPUS <= RCU_FANOUT_4
+   54 #  define RCU_NUM_LVLS        4
+   55 #  define NUM_RCU_LVL_0        1
+   56 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+   57 #  define NUM_RCU_LVL_2        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+   58 #  define NUM_RCU_LVL_3        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+   59 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+   60 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
+   61 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
+   62 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
+   63 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
+   64 #else
+   65 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+   66 #endif
+
+The maximum number of levels in the ``rcu_node`` structure is currently
+limited to four, as specified by lines 21-24 and the structure of the
+subsequent “if” statement. For 32-bit systems, this allows
+16*32*32*32=524,288 CPUs, which should be sufficient for the next few
+years at least. For 64-bit systems, 16*64*64*64=4,194,304 CPUs is
+allowed, which should see us through the next decade or so. This
+four-level tree also allows kernels built with ``CONFIG_RCU_FANOUT=8``
+to support up to 4096 CPUs, which might be useful in very large systems
+having eight CPUs per socket (but please note that no one has yet shown
+any measurable performance degradation due to misaligned socket and
+``rcu_node`` boundaries). In addition, building kernels with a full four
+levels of ``rcu_node`` tree permits better testing of RCU's
+combining-tree code.
+
+The ``RCU_FANOUT`` symbol controls how many children are permitted at
+each non-leaf level of the ``rcu_node`` tree. If the
+``CONFIG_RCU_FANOUT`` Kconfig option is not specified, it is set based
+on the word size of the system, which is also the Kconfig default.
+
+The ``RCU_FANOUT_LEAF`` symbol controls how many CPUs are handled by
+each leaf ``rcu_node`` structure. Experience has shown that allowing a
+given leaf ``rcu_node`` structure to handle 64 CPUs, as permitted by the
+number of bits in the ``->qsmask`` field on a 64-bit system, results in
+excessive contention for the leaf ``rcu_node`` structures' ``->lock``
+fields. The number of CPUs per leaf ``rcu_node`` structure is therefore
+limited to 16 given the default value of ``CONFIG_RCU_FANOUT_LEAF``. If
+``CONFIG_RCU_FANOUT_LEAF`` is unspecified, the value selected is based
+on the word size of the system, just as for ``CONFIG_RCU_FANOUT``.
+Lines 11-19 perform this computation.
+
+Lines 21-24 compute the maximum number of CPUs supported by a
+single-level (which contains a single ``rcu_node`` structure),
+two-level, three-level, and four-level ``rcu_node`` tree, respectively,
+given the fanout specified by ``RCU_FANOUT`` and ``RCU_FANOUT_LEAF``.
+These numbers of CPUs are retained in the ``RCU_FANOUT_1``,
+``RCU_FANOUT_2``, ``RCU_FANOUT_3``, and ``RCU_FANOUT_4`` C-preprocessor
+variables, respectively.
+
+These variables are used to control the C-preprocessor ``#if`` statement
+spanning lines 26-66 that computes the number of ``rcu_node`` structures
+required for each level of the tree, as well as the number of levels
+required. The number of levels is placed in the ``NUM_RCU_LVLS``
+C-preprocessor variable by lines 27, 35, 44, and 54. The number of
+``rcu_node`` structures for the topmost level of the tree is always
+exactly one, and this value is unconditionally placed into
+``NUM_RCU_LVL_0`` by lines 28, 36, 45, and 55. The rest of the levels
+(if any) of the ``rcu_node`` tree are computed by dividing the maximum
+number of CPUs by the fanout supported by the number of levels from the
+current level down, rounding up. This computation is performed by
+lines 37, 46-47, and 56-58. Lines 31-33, 40-42, 50-52, and 62-63 create
+initializers for lockdep lock-class names. Finally, lines 64-66 produce
+an error if the maximum number of CPUs is too large for the specified
+fanout.
+
+The ``rcu_segcblist`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``rcu_segcblist`` structure maintains a segmented list of callbacks
+as follows:
+
+::
+
+    1 #define RCU_DONE_TAIL        0
+    2 #define RCU_WAIT_TAIL        1
+    3 #define RCU_NEXT_READY_TAIL  2
+    4 #define RCU_NEXT_TAIL        3
+    5 #define RCU_CBLIST_NSEGS     4
+    6
+    7 struct rcu_segcblist {
+    8   struct rcu_head *head;
+    9   struct rcu_head **tails[RCU_CBLIST_NSEGS];
+   10   unsigned long gp_seq[RCU_CBLIST_NSEGS];
+   11   long len;
+   12   long len_lazy;
+   13 };
+
+The segments are as follows:
+
+#. ``RCU_DONE_TAIL``: Callbacks whose grace periods have elapsed. These
+   callbacks are ready to be invoked.
+#. ``RCU_WAIT_TAIL``: Callbacks that are waiting for the current grace
+   period. Note that different CPUs can have different ideas about which
+   grace period is current, hence the ``->gp_seq`` field.
+#. ``RCU_NEXT_READY_TAIL``: Callbacks waiting for the next grace period
+   to start.
+#. ``RCU_NEXT_TAIL``: Callbacks that have not yet been associated with a
+   grace period.
+
+The ``->head`` pointer references the first callback or is ``NULL`` if
+the list contains no callbacks (which is *not* the same as being empty).
+Each element of the ``->tails[]`` array references the ``->next``
+pointer of the last callback in the corresponding segment of the list,
+or the list's ``->head`` pointer if that segment and all previous
+segments are empty. If the corresponding segment is empty but some
+previous segment is not empty, then the array element is identical to
+its predecessor. Older callbacks are closer to the head of the list, and
+new callbacks are added at the tail. This relationship between the
+``->head`` pointer, the ``->tails[]`` array, and the callbacks is shown
+in this diagram:
+
+.. kernel-figure:: nxtlist.svg
+
+In this figure, the ``->head`` pointer references the first RCU callback
+in the list. The ``->tails[RCU_DONE_TAIL]`` array element references the
+``->head`` pointer itself, indicating that none of the callbacks is
+ready to invoke. The ``->tails[RCU_WAIT_TAIL]`` array element references
+callback CB 2's ``->next`` pointer, which indicates that CB 1 and CB 2
+are both waiting on the current grace period, give or take possible
+disagreements about exactly which grace period is the current one. The
+``->tails[RCU_NEXT_READY_TAIL]`` array element references the same RCU
+callback that ``->tails[RCU_WAIT_TAIL]`` does, which indicates that
+there are no callbacks waiting on the next RCU grace period. The
+``->tails[RCU_NEXT_TAIL]`` array element references CB 4's ``->next``
+pointer, indicating that all the remaining RCU callbacks have not yet
+been assigned to an RCU grace period. Note that the
+``->tails[RCU_NEXT_TAIL]`` array element always references the last RCU
+callback's ``->next`` pointer unless the callback list is empty, in
+which case it references the ``->head`` pointer.
+
+There is one additional important special case for the
+``->tails[RCU_NEXT_TAIL]`` array element: It can be ``NULL`` when this
+list is *disabled*. Lists are disabled when the corresponding CPU is
+offline or when the corresponding CPU's callbacks are offloaded to a
+kthread, both of which are described elsewhere.
+
+CPUs advance their callbacks from the ``RCU_NEXT_TAIL`` to the
+``RCU_NEXT_READY_TAIL`` to the ``RCU_WAIT_TAIL`` to the
+``RCU_DONE_TAIL`` list segments as grace periods advance.
+
+The ``->gp_seq[]`` array records grace-period numbers corresponding to
+the list segments. This is what allows different CPUs to have different
+ideas as to which is the current grace period while still avoiding
+premature invocation of their callbacks. In particular, this allows CPUs
+that go idle for extended periods to determine which of their callbacks
+are ready to be invoked after reawakening.
+
+The ``->len`` counter contains the number of callbacks in ``->head``,
+and the ``->len_lazy`` contains the number of those callbacks that are
+known to only free memory, and whose invocation can therefore be safely
+deferred.
+
+.. important::
+
+   It is the ``->len`` field that determines whether or
+   not there are callbacks associated with this ``rcu_segcblist``
+   structure, *not* the ``->head`` pointer. The reason for this is that all
+   the ready-to-invoke callbacks (that is, those in the ``RCU_DONE_TAIL``
+   segment) are extracted all at once at callback-invocation time
+   (``rcu_do_batch``), due to which ``->head`` may be set to NULL if there
+   are no not-done callbacks remaining in the ``rcu_segcblist``. If
+   callback invocation must be postponed, for example, because a
+   high-priority process just woke up on this CPU, then the remaining
+   callbacks are placed back on the ``RCU_DONE_TAIL`` segment and
+   ``->head`` once again points to the start of the segment. In short, the
+   head field can briefly be ``NULL`` even though the CPU has callbacks
+   present the entire time. Therefore, it is not appropriate to test the
+   ``->head`` pointer for ``NULL``.
+
+In contrast, the ``->len`` and ``->len_lazy`` counts are adjusted only
+after the corresponding callbacks have been invoked. This means that the
+``->len`` count is zero only if the ``rcu_segcblist`` structure really
+is devoid of callbacks. Of course, off-CPU sampling of the ``->len``
+count requires careful use of appropriate synchronization, for example,
+memory barriers. This synchronization can be a bit subtle, particularly
+in the case of ``rcu_barrier()``.
+
+The ``rcu_data`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``rcu_data`` maintains the per-CPU state for the RCU subsystem. The
+fields in this structure may be accessed only from the corresponding CPU
+(and from tracing) unless otherwise stated. This structure is the focus
+of quiescent-state detection and RCU callback queuing. It also tracks
+its relationship to the corresponding leaf ``rcu_node`` structure to
+allow more-efficient propagation of quiescent states up the ``rcu_node``
+combining tree. Like the ``rcu_node`` structure, it provides a local
+copy of the grace-period information to allow for-free synchronized
+access to this information from the corresponding CPU. Finally, this
+structure records past dyntick-idle state for the corresponding CPU and
+also tracks statistics.
+
+The ``rcu_data`` structure's fields are discussed, singly and in groups,
+in the following sections.
+
+Connection to Other Data Structures
+'''''''''''''''''''''''''''''''''''
+
+This portion of the ``rcu_data`` structure is declared as follows:
+
+::
+
+     1   int cpu;
+     2   struct rcu_node *mynode;
+     3   unsigned long grpmask;
+     4   bool beenonline;
+
+The ``->cpu`` field contains the number of the corresponding CPU and the
+``->mynode`` field references the corresponding ``rcu_node`` structure.
+The ``->mynode`` is used to propagate quiescent states up the combining
+tree. These two fields are constant and therefore do not require
+synchronization.
+
+The ``->grpmask`` field indicates the bit in the ``->mynode->qsmask``
+corresponding to this ``rcu_data`` structure, and is also used when
+propagating quiescent states. The ``->beenonline`` flag is set whenever
+the corresponding CPU comes online, which means that the debugfs tracing
+need not dump out any ``rcu_data`` structure for which this flag is not
+set.
+
+Quiescent-State and Grace-Period Tracking
+'''''''''''''''''''''''''''''''''''''''''
+
+This portion of the ``rcu_data`` structure is declared as follows:
+
+::
+
+     1   unsigned long gp_seq;
+     2   unsigned long gp_seq_needed;
+     3   bool cpu_no_qs;
+     4   bool core_needs_qs;
+     5   bool gpwrap;
+
+The ``->gp_seq`` field is the counterpart of the field of the same name
+in the ``rcu_state`` and ``rcu_node`` structures. The
+``->gp_seq_needed`` field is the counterpart of the field of the same
+name in the rcu_node structure. They may each lag up to one behind their
+``rcu_node`` counterparts, but in ``CONFIG_NO_HZ_IDLE`` and
+``CONFIG_NO_HZ_FULL`` kernels can lag arbitrarily far behind for CPUs in
+dyntick-idle mode (but these counters will catch up upon exit from
+dyntick-idle mode). If the lower two bits of a given ``rcu_data``
+structure's ``->gp_seq`` are zero, then this ``rcu_data`` structure
+believes that RCU is idle.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| All this replication of the grace period numbers can only cause       |
+| massive confusion. Why not just keep a global sequence number and be  |
+| done with it???                                                       |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Because if there was only a single global sequence numbers, there     |
+| would need to be a single global lock to allow safely accessing and   |
+| updating it. And if we are not going to have a single global lock, we |
+| need to carefully manage the numbers on a per-node basis. Recall from |
+| the answer to a previous Quick Quiz that the consequences of applying |
+| a previously sampled quiescent state to the wrong grace period are    |
+| quite severe.                                                         |
++-----------------------------------------------------------------------+
+
+The ``->cpu_no_qs`` flag indicates that the CPU has not yet passed
+through a quiescent state, while the ``->core_needs_qs`` flag indicates
+that the RCU core needs a quiescent state from the corresponding CPU.
+The ``->gpwrap`` field indicates that the corresponding CPU has remained
+idle for so long that the ``gp_seq`` counter is in danger of overflow,
+which will cause the CPU to disregard the values of its counters on its
+next exit from idle.
+
+RCU Callback Handling
+'''''''''''''''''''''
+
+In the absence of CPU-hotplug events, RCU callbacks are invoked by the
+same CPU that registered them. This is strictly a cache-locality
+optimization: callbacks can and do get invoked on CPUs other than the
+one that registered them. After all, if the CPU that registered a given
+callback has gone offline before the callback can be invoked, there
+really is no other choice.
+
+This portion of the ``rcu_data`` structure is declared as follows:
+
+::
+
+    1 struct rcu_segcblist cblist;
+    2 long qlen_last_fqs_check;
+    3 unsigned long n_cbs_invoked;
+    4 unsigned long n_nocbs_invoked;
+    5 unsigned long n_cbs_orphaned;
+    6 unsigned long n_cbs_adopted;
+    7 unsigned long n_force_qs_snap;
+    8 long blimit;
+
+The ``->cblist`` structure is the segmented callback list described
+earlier. The CPU advances the callbacks in its ``rcu_data`` structure
+whenever it notices that another RCU grace period has completed. The CPU
+detects the completion of an RCU grace period by noticing that the value
+of its ``rcu_data`` structure's ``->gp_seq`` field differs from that of
+its leaf ``rcu_node`` structure. Recall that each ``rcu_node``
+structure's ``->gp_seq`` field is updated at the beginnings and ends of
+each grace period.
+
+The ``->qlen_last_fqs_check`` and ``->n_force_qs_snap`` coordinate the
+forcing of quiescent states from ``call_rcu()`` and friends when
+callback lists grow excessively long.
+
+The ``->n_cbs_invoked``, ``->n_cbs_orphaned``, and ``->n_cbs_adopted``
+fields count the number of callbacks invoked, sent to other CPUs when
+this CPU goes offline, and received from other CPUs when those other
+CPUs go offline. The ``->n_nocbs_invoked`` is used when the CPU's
+callbacks are offloaded to a kthread.
+
+Finally, the ``->blimit`` counter is the maximum number of RCU callbacks
+that may be invoked at a given time.
+
+Dyntick-Idle Handling
+'''''''''''''''''''''
+
+This portion of the ``rcu_data`` structure is declared as follows:
+
+::
+
+     1   int dynticks_snap;
+     2   unsigned long dynticks_fqs;
+
+The ``->dynticks_snap`` field is used to take a snapshot of the
+corresponding CPU's dyntick-idle state when forcing quiescent states,
+and is therefore accessed from other CPUs. Finally, the
+``->dynticks_fqs`` field is used to count the number of times this CPU
+is determined to be in dyntick-idle state, and is used for tracing and
+debugging purposes.
+
+This portion of the rcu_data structure is declared as follows:
+
+::
+
+     1   long dynticks_nesting;
+     2   long dynticks_nmi_nesting;
+     3   atomic_t dynticks;
+     4   bool rcu_need_heavy_qs;
+     5   bool rcu_urgent_qs;
+
+These fields in the rcu_data structure maintain the per-CPU dyntick-idle
+state for the corresponding CPU. The fields may be accessed only from
+the corresponding CPU (and from tracing) unless otherwise stated.
+
+The ``->dynticks_nesting`` field counts the nesting depth of process
+execution, so that in normal circumstances this counter has value zero
+or one. NMIs, irqs, and tracers are counted by the
+``->dynticks_nmi_nesting`` field. Because NMIs cannot be masked, changes
+to this variable have to be undertaken carefully using an algorithm
+provided by Andy Lutomirski. The initial transition from idle adds one,
+and nested transitions add two, so that a nesting level of five is
+represented by a ``->dynticks_nmi_nesting`` value of nine. This counter
+can therefore be thought of as counting the number of reasons why this
+CPU cannot be permitted to enter dyntick-idle mode, aside from
+process-level transitions.
+
+However, it turns out that when running in non-idle kernel context, the
+Linux kernel is fully capable of entering interrupt handlers that never
+exit and perhaps also vice versa. Therefore, whenever the
+``->dynticks_nesting`` field is incremented up from zero, the
+``->dynticks_nmi_nesting`` field is set to a large positive number, and
+whenever the ``->dynticks_nesting`` field is decremented down to zero,
+the the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that
+the number of misnested interrupts is not sufficient to overflow the
+counter, this approach corrects the ``->dynticks_nmi_nesting`` field
+every time the corresponding CPU enters the idle loop from process
+context.
+
+The ``->dynticks`` field counts the corresponding CPU's transitions to
+and from either dyntick-idle or user mode, so that this counter has an
+even value when the CPU is in dyntick-idle mode or user mode and an odd
+value otherwise. The transitions to/from user mode need to be counted
+for user mode adaptive-ticks support (see timers/NO_HZ.txt).
+
+The ``->rcu_need_heavy_qs`` field is used to record the fact that the
+RCU core code would really like to see a quiescent state from the
+corresponding CPU, so much so that it is willing to call for
+heavy-weight dyntick-counter operations. This flag is checked by RCU's
+context-switch and ``cond_resched()`` code, which provide a momentary
+idle sojourn in response.
+
+Finally, the ``->rcu_urgent_qs`` field is used to record the fact that
+the RCU core code would really like to see a quiescent state from the
+corresponding CPU, with the various other fields indicating just how
+badly RCU wants this quiescent state. This flag is checked by RCU's
+context-switch path (``rcu_note_context_switch``) and the cond_resched
+code.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Why not simply combine the ``->dynticks_nesting`` and                 |
+| ``->dynticks_nmi_nesting`` counters into a single counter that just   |
+| counts the number of reasons that the corresponding CPU is non-idle?  |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Because this would fail in the presence of interrupts whose handlers  |
+| never return and of handlers that manage to return from a made-up     |
+| interrupt.                                                            |
++-----------------------------------------------------------------------+
+
+Additional fields are present for some special-purpose builds, and are
+discussed separately.
+
+The ``rcu_head`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Each ``rcu_head`` structure represents an RCU callback. These structures
+are normally embedded within RCU-protected data structures whose
+algorithms use asynchronous grace periods. In contrast, when using
+algorithms that block waiting for RCU grace periods, RCU users need not
+provide ``rcu_head`` structures.
+
+The ``rcu_head`` structure has fields as follows:
+
+::
+
+     1   struct rcu_head *next;
+     2   void (*func)(struct rcu_head *head);
+
+The ``->next`` field is used to link the ``rcu_head`` structures
+together in the lists within the ``rcu_data`` structures. The ``->func``
+field is a pointer to the function to be called when the callback is
+ready to be invoked, and this function is passed a pointer to the
+``rcu_head`` structure. However, ``kfree_rcu()`` uses the ``->func``
+field to record the offset of the ``rcu_head`` structure within the
+enclosing RCU-protected data structure.
+
+Both of these fields are used internally by RCU. From the viewpoint of
+RCU users, this structure is an opaque “cookie”.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Given that the callback function ``->func`` is passed a pointer to    |
+| the ``rcu_head`` structure, how is that function supposed to find the |
+| beginning of the enclosing RCU-protected data structure?              |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| In actual practice, there is a separate callback function per type of |
+| RCU-protected data structure. The callback function can therefore use |
+| the ``container_of()`` macro in the Linux kernel (or other            |
+| pointer-manipulation facilities in other software environments) to    |
+| find the beginning of the enclosing structure.                        |
++-----------------------------------------------------------------------+
+
+RCU-Specific Fields in the ``task_struct`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``CONFIG_PREEMPT_RCU`` implementation uses some additional fields in
+the ``task_struct`` structure:
+
+::
+
+    1 #ifdef CONFIG_PREEMPT_RCU
+    2   int rcu_read_lock_nesting;
+    3   union rcu_special rcu_read_unlock_special;
+    4   struct list_head rcu_node_entry;
+    5   struct rcu_node *rcu_blocked_node;
+    6 #endif /* #ifdef CONFIG_PREEMPT_RCU */
+    7 #ifdef CONFIG_TASKS_RCU
+    8   unsigned long rcu_tasks_nvcsw;
+    9   bool rcu_tasks_holdout;
+   10   struct list_head rcu_tasks_holdout_list;
+   11   int rcu_tasks_idle_cpu;
+   12 #endif /* #ifdef CONFIG_TASKS_RCU */
+
+The ``->rcu_read_lock_nesting`` field records the nesting level for RCU
+read-side critical sections, and the ``->rcu_read_unlock_special`` field
+is a bitmask that records special conditions that require
+``rcu_read_unlock()`` to do additional work. The ``->rcu_node_entry``
+field is used to form lists of tasks that have blocked within
+preemptible-RCU read-side critical sections and the
+``->rcu_blocked_node`` field references the ``rcu_node`` structure whose
+list this task is a member of, or ``NULL`` if it is not blocked within a
+preemptible-RCU read-side critical section.
+
+The ``->rcu_tasks_nvcsw`` field tracks the number of voluntary context
+switches that this task had undergone at the beginning of the current
+tasks-RCU grace period, ``->rcu_tasks_holdout`` is set if the current
+tasks-RCU grace period is waiting on this task,
+``->rcu_tasks_holdout_list`` is a list element enqueuing this task on
+the holdout list, and ``->rcu_tasks_idle_cpu`` tracks which CPU this
+idle task is running, but only if the task is currently running, that
+is, if the CPU is currently idle.
+
+Accessor Functions
+~~~~~~~~~~~~~~~~~~
+
+The following listing shows the ``rcu_get_root()``,
+``rcu_for_each_node_breadth_first`` and ``rcu_for_each_leaf_node()``
+function and macros:
+
+::
+
+     1 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
+     2 {
+     3   return &rsp->node[0];
+     4 }
+     5
+     6 #define rcu_for_each_node_breadth_first(rsp, rnp) \
+     7   for ((rnp) = &(rsp)->node[0]; \
+     8        (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+     9
+    10 #define rcu_for_each_leaf_node(rsp, rnp) \
+    11   for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
+    12        (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+
+The ``rcu_get_root()`` simply returns a pointer to the first element of
+the specified ``rcu_state`` structure's ``->node[]`` array, which is the
+root ``rcu_node`` structure.
+
+As noted earlier, the ``rcu_for_each_node_breadth_first()`` macro takes
+advantage of the layout of the ``rcu_node`` structures in the
+``rcu_state`` structure's ``->node[]`` array, performing a breadth-first
+traversal by simply traversing the array in order. Similarly, the
+``rcu_for_each_leaf_node()`` macro traverses only the last part of the
+array, thus traversing only the leaf ``rcu_node`` structures.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| What does ``rcu_for_each_leaf_node()`` do if the ``rcu_node`` tree    |
+| contains only a single node?                                          |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| In the single-node case, ``rcu_for_each_leaf_node()`` traverses the   |
+| single node.                                                          |
++-----------------------------------------------------------------------+
+
+Summary
+~~~~~~~
+
+So the state of RCU is represented by an ``rcu_state`` structure, which
+contains a combining tree of ``rcu_node`` and ``rcu_data`` structures.
+Finally, in ``CONFIG_NO_HZ_IDLE`` kernels, each CPU's dyntick-idle state
+is tracked by dynticks-related fields in the ``rcu_data`` structure. If
+you made it this far, you are well prepared to read the code
+walkthroughs in the other articles in this series.
+
+Acknowledgments
+~~~~~~~~~~~~~~~
+
+I owe thanks to Cyrill Gorcunov, Mathieu Desnoyers, Dhaval Giani, Paul
+Turner, Abhishek Srivastava, Matt Kowalczyk, and Serge Hallyn for
+helping me get this document into a more human-readable state.
+
+Legal Statement
+~~~~~~~~~~~~~~~
+
+This work represents the view of the author and does not necessarily
+represent the view of IBM.
+
+Linux is a registered trademark of Linus Torvalds.
+
+Other company, product, and service names may be trademarks or service
+marks of others.
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
deleted file mode 100644
index 57300db4b5ff..000000000000
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
+++ /dev/null
@@ -1,668 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
-        "http://www.w3.org/TR/html4/loose.dtd">
-        <html>
-        <head><title>A Tour Through TREE_RCU's Expedited Grace Periods</title>
-        <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
-
-<h2>Introduction</h2>
-
-This document describes RCU's expedited grace periods.
-Unlike RCU's normal grace periods, which accept long latencies to attain
-high efficiency and minimal disturbance, expedited grace periods accept
-lower efficiency and significant disturbance to attain shorter latencies.
-
-<p>
-There are two flavors of RCU (RCU-preempt and RCU-sched), with an earlier
-third RCU-bh flavor having been implemented in terms of the other two.
-Each of the two implementations is covered in its own section.
-
-<ol>
-<li>	<a href="#Expedited Grace Period Design">
-	Expedited Grace Period Design</a>
-<li>	<a href="#RCU-preempt Expedited Grace Periods">
-	RCU-preempt Expedited Grace Periods</a>
-<li>	<a href="#RCU-sched Expedited Grace Periods">
-	RCU-sched Expedited Grace Periods</a>
-<li>	<a href="#Expedited Grace Period and CPU Hotplug">
-	Expedited Grace Period and CPU Hotplug</a>
-<li>	<a href="#Expedited Grace Period Refinements">
-	Expedited Grace Period Refinements</a>
-</ol>
-
-<h2><a name="Expedited Grace Period Design">
-Expedited Grace Period Design</a></h2>
-
-<p>
-The expedited RCU grace periods cannot be accused of being subtle,
-given that they for all intents and purposes hammer every CPU that
-has not yet provided a quiescent state for the current expedited
-grace period.
-The one saving grace is that the hammer has grown a bit smaller
-over time:  The old call to <tt>try_stop_cpus()</tt> has been
-replaced with a set of calls to <tt>smp_call_function_single()</tt>,
-each of which results in an IPI to the target CPU.
-The corresponding handler function checks the CPU's state, motivating
-a faster quiescent state where possible, and triggering a report
-of that quiescent state.
-As always for RCU, once everything has spent some time in a quiescent
-state, the expedited grace period has completed.
-
-<p>
-The details of the <tt>smp_call_function_single()</tt> handler's
-operation depend on the RCU flavor, as described in the following
-sections.
-
-<h2><a name="RCU-preempt Expedited Grace Periods">
-RCU-preempt Expedited Grace Periods</a></h2>
-
-<p>
-<tt>CONFIG_PREEMPT=y</tt> kernels implement RCU-preempt.
-The overall flow of the handling of a given CPU by an RCU-preempt
-expedited grace period is shown in the following diagram:
-
-<p><img src="ExpRCUFlow.svg" alt="ExpRCUFlow.svg" width="55%">
-
-<p>
-The solid arrows denote direct action, for example, a function call.
-The dotted arrows denote indirect action, for example, an IPI
-or a state that is reached after some time.
-
-<p>
-If a given CPU is offline or idle, <tt>synchronize_rcu_expedited()</tt>
-will ignore it because idle and offline CPUs are already residing
-in quiescent states.
-Otherwise, the expedited grace period will use
-<tt>smp_call_function_single()</tt> to send the CPU an IPI, which
-is handled by <tt>rcu_exp_handler()</tt>.
-
-<p>
-However, because this is preemptible RCU, <tt>rcu_exp_handler()</tt>
-can check to see if the CPU is currently running in an RCU read-side
-critical section.
-If not, the handler can immediately report a quiescent state.
-Otherwise, it sets flags so that the outermost <tt>rcu_read_unlock()</tt>
-invocation will provide the needed quiescent-state report.
-This flag-setting avoids the previous forced preemption of all
-CPUs that might have RCU read-side critical sections.
-In addition, this flag-setting is done so as to avoid increasing
-the overhead of the common-case fastpath through the scheduler.
-
-<p>
-Again because this is preemptible RCU, an RCU read-side critical section
-can be preempted.
-When that happens, RCU will enqueue the task, which will the continue to
-block the current expedited grace period until it resumes and finds its
-outermost <tt>rcu_read_unlock()</tt>.
-The CPU will report a quiescent state just after enqueuing the task because
-the CPU is no longer blocking the grace period.
-It is instead the preempted task doing the blocking.
-The list of blocked tasks is managed by <tt>rcu_preempt_ctxt_queue()</tt>,
-which is called from <tt>rcu_preempt_note_context_switch()</tt>, which
-in turn is called from <tt>rcu_note_context_switch()</tt>, which in
-turn is called from the scheduler.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Why not just have the expedited grace period check the
-	state of all the CPUs?
-	After all, that would avoid all those real-time-unfriendly IPIs.
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Because we want the RCU read-side critical sections to run fast,
-	which means no memory barriers.
-	Therefore, it is not possible to safely check the state from some
-	other CPU.
-	And even if it was possible to safely check the state, it would
-	still be necessary to IPI the CPU to safely interact with the
-	upcoming <tt>rcu_read_unlock()</tt> invocation, which means that
-	the remote state testing would not help the worst-case
-	latency that real-time applications care about.
-
-	<p><font color="ffffff">One way to prevent your real-time
-	application from getting hit with these IPIs is to
-	build your kernel with <tt>CONFIG_NO_HZ_FULL=y</tt>.
-	RCU would then perceive the CPU running your application
-	as being idle, and it would be able to safely detect that
-	state without needing to IPI the CPU.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-Please note that this is just the overall flow:
-Additional complications can arise due to races with CPUs going idle
-or offline, among other things.
-
-<h2><a name="RCU-sched Expedited Grace Periods">
-RCU-sched Expedited Grace Periods</a></h2>
-
-<p>
-<tt>CONFIG_PREEMPT=n</tt> kernels implement RCU-sched.
-The overall flow of the handling of a given CPU by an RCU-sched
-expedited grace period is shown in the following diagram:
-
-<p><img src="ExpSchedFlow.svg" alt="ExpSchedFlow.svg" width="55%">
-
-<p>
-As with RCU-preempt, RCU-sched's
-<tt>synchronize_rcu_expedited()</tt> ignores offline and
-idle CPUs, again because they are in remotely detectable
-quiescent states.
-However, because the
-<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
-leave no trace of their invocation, in general it is not possible to tell
-whether or not the current CPU is in an RCU read-side critical section.
-The best that RCU-sched's <tt>rcu_exp_handler()</tt> can do is to check
-for idle, on the off-chance that the CPU went idle while the IPI
-was in flight.
-If the CPU is idle, then <tt>rcu_exp_handler()</tt> reports
-the quiescent state.
-
-<p> Otherwise, the handler forces a future context switch by setting the
-NEED_RESCHED flag of the current task's thread flag and the CPU preempt
-counter.
-At the time of the context switch, the CPU reports the quiescent state.
-Should the CPU go offline first, it will report the quiescent state
-at that time.
-
-<h2><a name="Expedited Grace Period and CPU Hotplug">
-Expedited Grace Period and CPU Hotplug</a></h2>
-
-<p>
-The expedited nature of expedited grace periods require a much tighter
-interaction with CPU hotplug operations than is required for normal
-grace periods.
-In addition, attempting to IPI offline CPUs will result in splats, but
-failing to IPI online CPUs can result in too-short grace periods.
-Neither option is acceptable in production kernels.
-
-<p>
-The interaction between expedited grace periods and CPU hotplug operations
-is carried out at several levels:
-
-<ol>
-<li>	The number of CPUs that have ever been online is tracked
-	by the <tt>rcu_state</tt> structure's <tt>-&gt;ncpus</tt>
-	field.
-	The <tt>rcu_state</tt> structure's <tt>-&gt;ncpus_snap</tt>
-	field tracks the number of CPUs that have ever been online
-	at the beginning of an RCU expedited grace period.
-	Note that this number never decreases, at least in the absence
-	of a time machine.
-<li>	The identities of the CPUs that have ever been online is
-	tracked by the <tt>rcu_node</tt> structure's
-	<tt>-&gt;expmaskinitnext</tt> field.
-	The <tt>rcu_node</tt> structure's <tt>-&gt;expmaskinit</tt>
-	field tracks the identities of the CPUs that were online
-	at least once at the beginning of the most recent RCU
-	expedited grace period.
-	The <tt>rcu_state</tt> structure's <tt>-&gt;ncpus</tt> and
-	<tt>-&gt;ncpus_snap</tt> fields are used to detect when
-	new CPUs have come online for the first time, that is,
-	when the <tt>rcu_node</tt> structure's <tt>-&gt;expmaskinitnext</tt>
-	field has changed since the beginning of the last RCU
-	expedited grace period, which triggers an update of each
-	<tt>rcu_node</tt> structure's <tt>-&gt;expmaskinit</tt>
-	field from its <tt>-&gt;expmaskinitnext</tt> field.
-<li>	Each <tt>rcu_node</tt> structure's <tt>-&gt;expmaskinit</tt>
-	field is used to initialize that structure's
-	<tt>-&gt;expmask</tt> at the beginning of each RCU
-	expedited grace period.
-	This means that only those CPUs that have been online at least
-	once will be considered for a given grace period.
-<li>	Any CPU that goes offline will clear its bit in its leaf
-	<tt>rcu_node</tt> structure's <tt>-&gt;qsmaskinitnext</tt>
-	field, so any CPU with that bit clear can safely be ignored.
-	However, it is possible for a CPU coming online or going offline
-	to have this bit set for some time while <tt>cpu_online</tt>
-	returns <tt>false</tt>.
-<li>	For each non-idle CPU that RCU believes is currently online, the grace
-	period invokes <tt>smp_call_function_single()</tt>.
-	If this succeeds, the CPU was fully online.
-	Failure indicates that the CPU is in the process of coming online
-	or going offline, in which case it is necessary to wait for a
-	short time period and try again.
-	The purpose of this wait (or series of waits, as the case may be)
-	is to permit a concurrent CPU-hotplug operation to complete.
-<li>	In the case of RCU-sched, one of the last acts of an outgoing CPU
-	is to invoke <tt>rcu_report_dead()</tt>, which
-	reports a quiescent state for that CPU.
-	However, this is likely paranoia-induced redundancy. <!-- @@@ -->
-</ol>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Why all the dancing around with multiple counters and masks
-	tracking CPUs that were once online?
-	Why not just have a single set of masks tracking the currently
-	online CPUs and be done with it?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Maintaining single set of masks tracking the online CPUs <i>sounds</i>
-	easier, at least until you try working out all the race conditions
-	between grace-period initialization and CPU-hotplug operations.
-	For example, suppose initialization is progressing down the
-	tree while a CPU-offline operation is progressing up the tree.
-	This situation can result in bits set at the top of the tree
-	that have no counterparts at the bottom of the tree.
-	Those bits will never be cleared, which will result in
-	grace-period hangs.
-	In short, that way lies madness, to say nothing of a great many
-	bugs, hangs, and deadlocks.
-
-	<p><font color="ffffff">
-	In contrast, the current multi-mask multi-counter scheme ensures
-	that grace-period initialization will always see consistent masks
-	up and down the tree, which brings significant simplifications
-	over the single-mask method.
-
-	<p><font color="ffffff">
-	This is an instance of
-	<a href="http://www.cs.columbia.edu/~library/TR-repository/reports/reports-1992/cucs-039-92.ps.gz"><font color="ffffff">
-	deferring work in order to avoid synchronization</a>.
-	Lazily recording CPU-hotplug events at the beginning of the next
-	grace period greatly simplifies maintenance of the CPU-tracking
-	bitmasks in the <tt>rcu_node</tt> tree.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h2><a name="Expedited Grace Period Refinements">
-Expedited Grace Period Refinements</a></h2>
-
-<ol>
-<li>	<a href="#Idle-CPU Checks">Idle-CPU checks</a>.
-<li>	<a href="#Batching via Sequence Counter">
-	Batching via sequence counter</a>.
-<li>	<a href="#Funnel Locking and Wait/Wakeup">
-	Funnel locking and wait/wakeup</a>.
-<li>	<a href="#Use of Workqueues">Use of Workqueues</a>.
-<li>	<a href="#Stall Warnings">Stall warnings</a>.
-<li>	<a href="#Mid-Boot Operation">Mid-boot operation</a>.
-</ol>
-
-<h3><a name="Idle-CPU Checks">Idle-CPU Checks</a></h3>
-
-<p>
-Each expedited grace period checks for idle CPUs when initially forming
-the mask of CPUs to be IPIed and again just before IPIing a CPU
-(both checks are carried out by <tt>sync_rcu_exp_select_cpus()</tt>).
-If the CPU is idle at any time between those two times, the CPU will
-not be IPIed.
-Instead, the task pushing the grace period forward will include the
-idle CPUs in the mask passed to <tt>rcu_report_exp_cpu_mult()</tt>.
-
-<p>
-For RCU-sched, there is an additional check:
-If the IPI has interrupted the idle loop, then
-<tt>rcu_exp_handler()</tt> invokes <tt>rcu_report_exp_rdp()</tt>
-to report the corresponding quiescent state.
-
-<p>
-For RCU-preempt, there is no specific check for idle in the
-IPI handler (<tt>rcu_exp_handler()</tt>), but because
-RCU read-side critical sections are not permitted within the
-idle loop, if <tt>rcu_exp_handler()</tt> sees that the CPU is within
-RCU read-side critical section, the CPU cannot possibly be idle.
-Otherwise, <tt>rcu_exp_handler()</tt> invokes
-<tt>rcu_report_exp_rdp()</tt> to report the corresponding quiescent
-state, regardless of whether or not that quiescent state was due to
-the CPU being idle.
-
-<p>
-In summary, RCU expedited grace periods check for idle when building
-the bitmask of CPUs that must be IPIed, just before sending each IPI,
-and (either explicitly or implicitly) within the IPI handler.
-
-<h3><a name="Batching via Sequence Counter">
-Batching via Sequence Counter</a></h3>
-
-<p>
-If each grace-period request was carried out separately, expedited
-grace periods would have abysmal scalability and
-problematic high-load characteristics.
-Because each grace-period operation can serve an unlimited number of
-updates, it is important to <i>batch</i> requests, so that a single
-expedited grace-period operation will cover all requests in the
-corresponding batch.
-
-<p>
-This batching is controlled by a sequence counter named
-<tt>-&gt;expedited_sequence</tt> in the <tt>rcu_state</tt> structure.
-This counter has an odd value when there is an expedited grace period
-in progress and an even value otherwise, so that dividing the counter
-value by two gives the number of completed grace periods.
-During any given update request, the counter must transition from
-even to odd and then back to even, thus indicating that a grace
-period has elapsed.
-Therefore, if the initial value of the counter is <tt>s</tt>,
-the updater must wait until the counter reaches at least the
-value <tt>(s+3)&amp;~0x1</tt>.
-This counter is managed by the following access functions:
-
-<ol>
-<li>	<tt>rcu_exp_gp_seq_start()</tt>, which marks the start of
-	an expedited grace period.
-<li>	<tt>rcu_exp_gp_seq_end()</tt>, which marks the end of an
-	expedited grace period.
-<li>	<tt>rcu_exp_gp_seq_snap()</tt>, which obtains a snapshot of
-	the counter.
-<li>	<tt>rcu_exp_gp_seq_done()</tt>, which returns <tt>true</tt>
-	if a full expedited grace period has elapsed since the
-	corresponding call to <tt>rcu_exp_gp_seq_snap()</tt>.
-</ol>
-
-<p>
-Again, only one request in a given batch need actually carry out
-a grace-period operation, which means there must be an efficient
-way to identify which of many concurrent reqeusts will initiate
-the grace period, and that there be an efficient way for the
-remaining requests to wait for that grace period to complete.
-However, that is the topic of the next section.
-
-<h3><a name="Funnel Locking and Wait/Wakeup">
-Funnel Locking and Wait/Wakeup</a></h3>
-
-<p>
-The natural way to sort out which of a batch of updaters will initiate
-the expedited grace period is to use the <tt>rcu_node</tt> combining
-tree, as implemented by the <tt>exp_funnel_lock()</tt> function.
-The first updater corresponding to a given grace period arriving
-at a given <tt>rcu_node</tt> structure records its desired grace-period
-sequence number in the <tt>-&gt;exp_seq_rq</tt> field and moves up
-to the next level in the tree.
-Otherwise, if the <tt>-&gt;exp_seq_rq</tt> field already contains
-the sequence number for the desired grace period or some later one,
-the updater blocks on one of four wait queues in the
-<tt>-&gt;exp_wq[]</tt> array, using the second-from-bottom
-and third-from bottom bits as an index.
-An <tt>-&gt;exp_lock</tt> field in the <tt>rcu_node</tt> structure
-synchronizes access to these fields.
-
-<p>
-An empty <tt>rcu_node</tt> tree is shown in the following diagram,
-with the white cells representing the <tt>-&gt;exp_seq_rq</tt> field
-and the red cells representing the elements of the
-<tt>-&gt;exp_wq[]</tt> array.
-
-<p><img src="Funnel0.svg" alt="Funnel0.svg" width="75%">
-
-<p>
-The next diagram shows the situation after the arrival of Task&nbsp;A
-and Task&nbsp;B at the leftmost and rightmost leaf <tt>rcu_node</tt>
-structures, respectively.
-The current value of the <tt>rcu_state</tt> structure's
-<tt>-&gt;expedited_sequence</tt> field is zero, so adding three and
-clearing the bottom bit results in the value two, which both tasks
-record in the <tt>-&gt;exp_seq_rq</tt> field of their respective
-<tt>rcu_node</tt> structures:
-
-<p><img src="Funnel1.svg" alt="Funnel1.svg" width="75%">
-
-<p>
-Each of Tasks&nbsp;A and&nbsp;B will move up to the root
-<tt>rcu_node</tt> structure.
-Suppose that Task&nbsp;A wins, recording its desired grace-period sequence
-number and resulting in the state shown below:
-
-<p><img src="Funnel2.svg" alt="Funnel2.svg" width="75%">
-
-<p>
-Task&nbsp;A now advances to initiate a new grace period, while Task&nbsp;B
-moves up to the root <tt>rcu_node</tt> structure, and, seeing that
-its desired sequence number is already recorded, blocks on
-<tt>-&gt;exp_wq[1]</tt>.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Why <tt>-&gt;exp_wq[1]</tt>?
-	Given that the value of these tasks' desired sequence number is
-	two, so shouldn't they instead block on <tt>-&gt;exp_wq[2]</tt>?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	No.
-
-	<p><font color="ffffff">
-	Recall that the bottom bit of the desired sequence number indicates
-	whether or not a grace period is currently in progress.
-	It is therefore necessary to shift the sequence number right one
-	bit position to obtain the number of the grace period.
-	This results in <tt>-&gt;exp_wq[1]</tt>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-If Tasks&nbsp;C and&nbsp;D also arrive at this point, they will compute the
-same desired grace-period sequence number, and see that both leaf
-<tt>rcu_node</tt> structures already have that value recorded.
-They will therefore block on their respective <tt>rcu_node</tt>
-structures' <tt>-&gt;exp_wq[1]</tt> fields, as shown below:
-
-<p><img src="Funnel3.svg" alt="Funnel3.svg" width="75%">
-
-<p>
-Task&nbsp;A now acquires the <tt>rcu_state</tt> structure's
-<tt>-&gt;exp_mutex</tt> and initiates the grace period, which
-increments <tt>-&gt;expedited_sequence</tt>.
-Therefore, if Tasks&nbsp;E and&nbsp;F arrive, they will compute
-a desired sequence number of 4 and will record this value as
-shown below:
-
-<p><img src="Funnel4.svg" alt="Funnel4.svg" width="75%">
-
-<p>
-Tasks&nbsp;E and&nbsp;F will propagate up the <tt>rcu_node</tt>
-combining tree, with Task&nbsp;F blocking on the root <tt>rcu_node</tt>
-structure and Task&nbsp;E wait for Task&nbsp;A to finish so that
-it can start the next grace period.
-The resulting state is as shown below:
-
-<p><img src="Funnel5.svg" alt="Funnel5.svg" width="75%">
-
-<p>
-Once the grace period completes, Task&nbsp;A
-starts waking up the tasks waiting for this grace period to complete,
-increments the <tt>-&gt;expedited_sequence</tt>,
-acquires the <tt>-&gt;exp_wake_mutex</tt> and then releases the
-<tt>-&gt;exp_mutex</tt>.
-This results in the following state:
-
-<p><img src="Funnel6.svg" alt="Funnel6.svg" width="75%">
-
-<p>
-Task&nbsp;E can then acquire <tt>-&gt;exp_mutex</tt> and increment
-<tt>-&gt;expedited_sequence</tt> to the value three.
-If new tasks&nbsp;G and&nbsp;H arrive and moves up the combining tree at the
-same time, the state will be as follows:
-
-<p><img src="Funnel7.svg" alt="Funnel7.svg" width="75%">
-
-<p>
-Note that three of the root <tt>rcu_node</tt> structure's
-waitqueues are now occupied.
-However, at some point, Task&nbsp;A will wake up the
-tasks blocked on the <tt>-&gt;exp_wq</tt> waitqueues, resulting
-in the following state:
-
-<p><img src="Funnel8.svg" alt="Funnel8.svg" width="75%">
-
-<p>
-Execution will continue with Tasks&nbsp;E and&nbsp;H completing
-their grace periods and carrying out their wakeups.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	What happens if Task&nbsp;A takes so long to do its wakeups
-	that Task&nbsp;E's grace period completes?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Then Task&nbsp;E will block on the <tt>-&gt;exp_wake_mutex</tt>,
-	which will also prevent it from releasing <tt>-&gt;exp_mutex</tt>,
-	which in turn will prevent the next grace period from starting.
-	This last is important in preventing overflow of the
-	<tt>-&gt;exp_wq[]</tt> array.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="Use of Workqueues">Use of Workqueues</a></h3>
-
-<p>
-In earlier implementations, the task requesting the expedited
-grace period also drove it to completion.
-This straightforward approach had the disadvantage of needing to
-account for POSIX signals sent to user tasks,
-so more recent implemementations use the Linux kernel's
-<a href="https://www.kernel.org/doc/Documentation/core-api/workqueue.rst">workqueues</a>.
-
-<p>
-The requesting task still does counter snapshotting and funnel-lock
-processing, but the task reaching the top of the funnel lock
-does a <tt>schedule_work()</tt> (from <tt>_synchronize_rcu_expedited()</tt>
-so that a workqueue kthread does the actual grace-period processing.
-Because workqueue kthreads do not accept POSIX signals, grace-period-wait
-processing need not allow for POSIX signals.
-
-In addition, this approach allows wakeups for the previous expedited
-grace period to be overlapped with processing for the next expedited
-grace period.
-Because there are only four sets of waitqueues, it is necessary to
-ensure that the previous grace period's wakeups complete before the
-next grace period's wakeups start.
-This is handled by having the <tt>-&gt;exp_mutex</tt>
-guard expedited grace-period processing and the
-<tt>-&gt;exp_wake_mutex</tt> guard wakeups.
-The key point is that the <tt>-&gt;exp_mutex</tt> is not released
-until the first wakeup is complete, which means that the
-<tt>-&gt;exp_wake_mutex</tt> has already been acquired at that point.
-This approach ensures that the previous grace period's wakeups can
-be carried out while the current grace period is in process, but
-that these wakeups will complete before the next grace period starts.
-This means that only three waitqueues are required, guaranteeing that
-the four that are provided are sufficient.
-
-<h3><a name="Stall Warnings">Stall Warnings</a></h3>
-
-<p>
-Expediting grace periods does nothing to speed things up when RCU
-readers take too long, and therefore expedited grace periods check
-for stalls just as normal grace periods do.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	But why not just let the normal grace-period machinery
-	detect the stalls, given that a given reader must block
-	both normal and expedited grace periods?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Because it is quite possible that at a given time there
-	is no normal grace period in progress, in which case the
-	normal grace period cannot emit a stall warning.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-The <tt>synchronize_sched_expedited_wait()</tt> function loops waiting
-for the expedited grace period to end, but with a timeout set to the
-current RCU CPU stall-warning time.
-If this time is exceeded, any CPUs or <tt>rcu_node</tt> structures
-blocking the current grace period are printed.
-Each stall warning results in another pass through the loop, but the
-second and subsequent passes use longer stall times.
-
-<h3><a name="Mid-Boot Operation">Mid-boot operation</a></h3>
-
-<p>
-The use of workqueues has the advantage that the expedited
-grace-period code need not worry about POSIX signals.
-Unfortunately, it has the
-corresponding disadvantage that workqueues cannot be used until
-they are initialized, which does not happen until some time after
-the scheduler spawns the first task.
-Given that there are parts of the kernel that really do want to
-execute grace periods during this mid-boot &ldquo;dead zone&rdquo;,
-expedited grace periods must do something else during thie time.
-
-<p>
-What they do is to fall back to the old practice of requiring that the
-requesting task drive the expedited grace period, as was the case
-before the use of workqueues.
-However, the requesting task is only required to drive the grace period
-during the mid-boot dead zone.
-Before mid-boot, a synchronous grace period is a no-op.
-Some time after mid-boot, workqueues are used.
-
-<p>
-Non-expedited non-SRCU synchronous grace periods must also operate
-normally during mid-boot.
-This is handled by causing non-expedited grace periods to take the
-expedited code path during mid-boot.
-
-<p>
-The current code assumes that there are no POSIX signals during
-the mid-boot dead zone.
-However, if an overwhelming need for POSIX signals somehow arises,
-appropriate adjustments can be made to the expedited stall-warning code.
-One such adjustment would reinstate the pre-workqueue stall-warning
-checks, but only during the mid-boot dead zone.
-
-<p>
-With this refinement, synchronous grace periods can now be used from
-task context pretty much any time during the life of the kernel.
-That is, aside from some points in the suspend, hibernate, or shutdown
-code path.
-
-<h3><a name="Summary">
-Summary</a></h3>
-
-<p>
-Expedited grace periods use a sequence-number approach to promote
-batching, so that a single grace-period operation can serve numerous
-requests.
-A funnel lock is used to efficiently identify the one task out of
-a concurrent group that will request the grace period.
-All members of the group will block on waitqueues provided in
-the <tt>rcu_node</tt> structure.
-The actual grace-period processing is carried out by a workqueue.
-
-<p>
-CPU-hotplug operations are noted lazily in order to prevent the need
-for tight synchronization between expedited grace periods and
-CPU-hotplug operations.
-The dyntick-idle counters are used to avoid sending IPIs to idle CPUs,
-at least in the common case.
-RCU-preempt and RCU-sched use different IPI handlers and different
-code to respond to the state changes carried out by those handlers,
-but otherwise use common code.
-
-<p>
-Quiescent states are tracked using the <tt>rcu_node</tt> tree,
-and once all necessary quiescent states have been reported,
-all tasks waiting on this expedited grace period are awakened.
-A pair of mutexes are used to allow one grace period's wakeups
-to proceed concurrently with the next grace period's processing.
-
-<p>
-This combination of mechanisms allows expedited grace periods to
-run reasonably efficiently.
-However, for non-time-critical tasks, normal grace periods should be
-used instead because their longer duration permits much higher
-degrees of batching, and thus much lower per-request overheads.
-
-</body></html>
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
new file mode 100644
index 000000000000..72f0f6fbd53c
--- /dev/null
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
@@ -0,0 +1,521 @@
+=================================================
+A Tour Through TREE_RCU's Expedited Grace Periods
+=================================================
+
+Introduction
+============
+
+This document describes RCU's expedited grace periods.
+Unlike RCU's normal grace periods, which accept long latencies to attain
+high efficiency and minimal disturbance, expedited grace periods accept
+lower efficiency and significant disturbance to attain shorter latencies.
+
+There are two flavors of RCU (RCU-preempt and RCU-sched), with an earlier
+third RCU-bh flavor having been implemented in terms of the other two.
+Each of the two implementations is covered in its own section.
+
+Expedited Grace Period Design
+=============================
+
+The expedited RCU grace periods cannot be accused of being subtle,
+given that they for all intents and purposes hammer every CPU that
+has not yet provided a quiescent state for the current expedited
+grace period.
+The one saving grace is that the hammer has grown a bit smaller
+over time:  The old call to ``try_stop_cpus()`` has been
+replaced with a set of calls to ``smp_call_function_single()``,
+each of which results in an IPI to the target CPU.
+The corresponding handler function checks the CPU's state, motivating
+a faster quiescent state where possible, and triggering a report
+of that quiescent state.
+As always for RCU, once everything has spent some time in a quiescent
+state, the expedited grace period has completed.
+
+The details of the ``smp_call_function_single()`` handler's
+operation depend on the RCU flavor, as described in the following
+sections.
+
+RCU-preempt Expedited Grace Periods
+===================================
+
+``CONFIG_PREEMPT=y`` kernels implement RCU-preempt.
+The overall flow of the handling of a given CPU by an RCU-preempt
+expedited grace period is shown in the following diagram:
+
+.. kernel-figure:: ExpRCUFlow.svg
+
+The solid arrows denote direct action, for example, a function call.
+The dotted arrows denote indirect action, for example, an IPI
+or a state that is reached after some time.
+
+If a given CPU is offline or idle, ``synchronize_rcu_expedited()``
+will ignore it because idle and offline CPUs are already residing
+in quiescent states.
+Otherwise, the expedited grace period will use
+``smp_call_function_single()`` to send the CPU an IPI, which
+is handled by ``rcu_exp_handler()``.
+
+However, because this is preemptible RCU, ``rcu_exp_handler()``
+can check to see if the CPU is currently running in an RCU read-side
+critical section.
+If not, the handler can immediately report a quiescent state.
+Otherwise, it sets flags so that the outermost ``rcu_read_unlock()``
+invocation will provide the needed quiescent-state report.
+This flag-setting avoids the previous forced preemption of all
+CPUs that might have RCU read-side critical sections.
+In addition, this flag-setting is done so as to avoid increasing
+the overhead of the common-case fastpath through the scheduler.
+
+Again because this is preemptible RCU, an RCU read-side critical section
+can be preempted.
+When that happens, RCU will enqueue the task, which will the continue to
+block the current expedited grace period until it resumes and finds its
+outermost ``rcu_read_unlock()``.
+The CPU will report a quiescent state just after enqueuing the task because
+the CPU is no longer blocking the grace period.
+It is instead the preempted task doing the blocking.
+The list of blocked tasks is managed by ``rcu_preempt_ctxt_queue()``,
+which is called from ``rcu_preempt_note_context_switch()``, which
+in turn is called from ``rcu_note_context_switch()``, which in
+turn is called from the scheduler.
+
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Why not just have the expedited grace period check the state of all   |
+| the CPUs? After all, that would avoid all those real-time-unfriendly  |
+| IPIs.                                                                 |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Because we want the RCU read-side critical sections to run fast,      |
+| which means no memory barriers. Therefore, it is not possible to      |
+| safely check the state from some other CPU. And even if it was        |
+| possible to safely check the state, it would still be necessary to    |
+| IPI the CPU to safely interact with the upcoming                      |
+| ``rcu_read_unlock()`` invocation, which means that the remote state   |
+| testing would not help the worst-case latency that real-time          |
+| applications care about.                                              |
+|                                                                       |
+| One way to prevent your real-time application from getting hit with   |
+| these IPIs is to build your kernel with ``CONFIG_NO_HZ_FULL=y``. RCU  |
+| would then perceive the CPU running your application as being idle,   |
+| and it would be able to safely detect that state without needing to   |
+| IPI the CPU.                                                          |
++-----------------------------------------------------------------------+
+
+Please note that this is just the overall flow: Additional complications
+can arise due to races with CPUs going idle or offline, among other
+things.
+
+RCU-sched Expedited Grace Periods
+---------------------------------
+
+``CONFIG_PREEMPT=n`` kernels implement RCU-sched. The overall flow of
+the handling of a given CPU by an RCU-sched expedited grace period is
+shown in the following diagram:
+
+.. kernel-figure:: ExpSchedFlow.svg
+
+As with RCU-preempt, RCU-sched's ``synchronize_rcu_expedited()`` ignores
+offline and idle CPUs, again because they are in remotely detectable
+quiescent states. However, because the ``rcu_read_lock_sched()`` and
+``rcu_read_unlock_sched()`` leave no trace of their invocation, in
+general it is not possible to tell whether or not the current CPU is in
+an RCU read-side critical section. The best that RCU-sched's
+``rcu_exp_handler()`` can do is to check for idle, on the off-chance
+that the CPU went idle while the IPI was in flight. If the CPU is idle,
+then ``rcu_exp_handler()`` reports the quiescent state.
+
+Otherwise, the handler forces a future context switch by setting the
+NEED_RESCHED flag of the current task's thread flag and the CPU preempt
+counter. At the time of the context switch, the CPU reports the
+quiescent state. Should the CPU go offline first, it will report the
+quiescent state at that time.
+
+Expedited Grace Period and CPU Hotplug
+--------------------------------------
+
+The expedited nature of expedited grace periods require a much tighter
+interaction with CPU hotplug operations than is required for normal
+grace periods. In addition, attempting to IPI offline CPUs will result
+in splats, but failing to IPI online CPUs can result in too-short grace
+periods. Neither option is acceptable in production kernels.
+
+The interaction between expedited grace periods and CPU hotplug
+operations is carried out at several levels:
+
+#. The number of CPUs that have ever been online is tracked by the
+   ``rcu_state`` structure's ``->ncpus`` field. The ``rcu_state``
+   structure's ``->ncpus_snap`` field tracks the number of CPUs that
+   have ever been online at the beginning of an RCU expedited grace
+   period. Note that this number never decreases, at least in the
+   absence of a time machine.
+#. The identities of the CPUs that have ever been online is tracked by
+   the ``rcu_node`` structure's ``->expmaskinitnext`` field. The
+   ``rcu_node`` structure's ``->expmaskinit`` field tracks the
+   identities of the CPUs that were online at least once at the
+   beginning of the most recent RCU expedited grace period. The
+   ``rcu_state`` structure's ``->ncpus`` and ``->ncpus_snap`` fields are
+   used to detect when new CPUs have come online for the first time,
+   that is, when the ``rcu_node`` structure's ``->expmaskinitnext``
+   field has changed since the beginning of the last RCU expedited grace
+   period, which triggers an update of each ``rcu_node`` structure's
+   ``->expmaskinit`` field from its ``->expmaskinitnext`` field.
+#. Each ``rcu_node`` structure's ``->expmaskinit`` field is used to
+   initialize that structure's ``->expmask`` at the beginning of each
+   RCU expedited grace period. This means that only those CPUs that have
+   been online at least once will be considered for a given grace
+   period.
+#. Any CPU that goes offline will clear its bit in its leaf ``rcu_node``
+   structure's ``->qsmaskinitnext`` field, so any CPU with that bit
+   clear can safely be ignored. However, it is possible for a CPU coming
+   online or going offline to have this bit set for some time while
+   ``cpu_online`` returns ``false``.
+#. For each non-idle CPU that RCU believes is currently online, the
+   grace period invokes ``smp_call_function_single()``. If this
+   succeeds, the CPU was fully online. Failure indicates that the CPU is
+   in the process of coming online or going offline, in which case it is
+   necessary to wait for a short time period and try again. The purpose
+   of this wait (or series of waits, as the case may be) is to permit a
+   concurrent CPU-hotplug operation to complete.
+#. In the case of RCU-sched, one of the last acts of an outgoing CPU is
+   to invoke ``rcu_report_dead()``, which reports a quiescent state for
+   that CPU. However, this is likely paranoia-induced redundancy.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Why all the dancing around with multiple counters and masks tracking  |
+| CPUs that were once online? Why not just have a single set of masks   |
+| tracking the currently online CPUs and be done with it?               |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Maintaining single set of masks tracking the online CPUs *sounds*     |
+| easier, at least until you try working out all the race conditions    |
+| between grace-period initialization and CPU-hotplug operations. For   |
+| example, suppose initialization is progressing down the tree while a  |
+| CPU-offline operation is progressing up the tree. This situation can  |
+| result in bits set at the top of the tree that have no counterparts   |
+| at the bottom of the tree. Those bits will never be cleared, which    |
+| will result in grace-period hangs. In short, that way lies madness,   |
+| to say nothing of a great many bugs, hangs, and deadlocks.            |
+| In contrast, the current multi-mask multi-counter scheme ensures that |
+| grace-period initialization will always see consistent masks up and   |
+| down the tree, which brings significant simplifications over the      |
+| single-mask method.                                                   |
+|                                                                       |
+| This is an instance of `deferring work in order to avoid              |
+| synchronization <http://www.cs.columbia.edu/~library/TR-repository/re |
+| ports/reports-1992/cucs-039-92.ps.gz>`__.                             |
+| Lazily recording CPU-hotplug events at the beginning of the next      |
+| grace period greatly simplifies maintenance of the CPU-tracking       |
+| bitmasks in the ``rcu_node`` tree.                                    |
++-----------------------------------------------------------------------+
+
+Expedited Grace Period Refinements
+----------------------------------
+
+Idle-CPU Checks
+~~~~~~~~~~~~~~~
+
+Each expedited grace period checks for idle CPUs when initially forming
+the mask of CPUs to be IPIed and again just before IPIing a CPU (both
+checks are carried out by ``sync_rcu_exp_select_cpus()``). If the CPU is
+idle at any time between those two times, the CPU will not be IPIed.
+Instead, the task pushing the grace period forward will include the idle
+CPUs in the mask passed to ``rcu_report_exp_cpu_mult()``.
+
+For RCU-sched, there is an additional check: If the IPI has interrupted
+the idle loop, then ``rcu_exp_handler()`` invokes
+``rcu_report_exp_rdp()`` to report the corresponding quiescent state.
+
+For RCU-preempt, there is no specific check for idle in the IPI handler
+(``rcu_exp_handler()``), but because RCU read-side critical sections are
+not permitted within the idle loop, if ``rcu_exp_handler()`` sees that
+the CPU is within RCU read-side critical section, the CPU cannot
+possibly be idle. Otherwise, ``rcu_exp_handler()`` invokes
+``rcu_report_exp_rdp()`` to report the corresponding quiescent state,
+regardless of whether or not that quiescent state was due to the CPU
+being idle.
+
+In summary, RCU expedited grace periods check for idle when building the
+bitmask of CPUs that must be IPIed, just before sending each IPI, and
+(either explicitly or implicitly) within the IPI handler.
+
+Batching via Sequence Counter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If each grace-period request was carried out separately, expedited grace
+periods would have abysmal scalability and problematic high-load
+characteristics. Because each grace-period operation can serve an
+unlimited number of updates, it is important to *batch* requests, so
+that a single expedited grace-period operation will cover all requests
+in the corresponding batch.
+
+This batching is controlled by a sequence counter named
+``->expedited_sequence`` in the ``rcu_state`` structure. This counter
+has an odd value when there is an expedited grace period in progress and
+an even value otherwise, so that dividing the counter value by two gives
+the number of completed grace periods. During any given update request,
+the counter must transition from even to odd and then back to even, thus
+indicating that a grace period has elapsed. Therefore, if the initial
+value of the counter is ``s``, the updater must wait until the counter
+reaches at least the value ``(s+3)&~0x1``. This counter is managed by
+the following access functions:
+
+#. ``rcu_exp_gp_seq_start()``, which marks the start of an expedited
+   grace period.
+#. ``rcu_exp_gp_seq_end()``, which marks the end of an expedited grace
+   period.
+#. ``rcu_exp_gp_seq_snap()``, which obtains a snapshot of the counter.
+#. ``rcu_exp_gp_seq_done()``, which returns ``true`` if a full expedited
+   grace period has elapsed since the corresponding call to
+   ``rcu_exp_gp_seq_snap()``.
+
+Again, only one request in a given batch need actually carry out a
+grace-period operation, which means there must be an efficient way to
+identify which of many concurrent reqeusts will initiate the grace
+period, and that there be an efficient way for the remaining requests to
+wait for that grace period to complete. However, that is the topic of
+the next section.
+
+Funnel Locking and Wait/Wakeup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The natural way to sort out which of a batch of updaters will initiate
+the expedited grace period is to use the ``rcu_node`` combining tree, as
+implemented by the ``exp_funnel_lock()`` function. The first updater
+corresponding to a given grace period arriving at a given ``rcu_node``
+structure records its desired grace-period sequence number in the
+``->exp_seq_rq`` field and moves up to the next level in the tree.
+Otherwise, if the ``->exp_seq_rq`` field already contains the sequence
+number for the desired grace period or some later one, the updater
+blocks on one of four wait queues in the ``->exp_wq[]`` array, using the
+second-from-bottom and third-from bottom bits as an index. An
+``->exp_lock`` field in the ``rcu_node`` structure synchronizes access
+to these fields.
+
+An empty ``rcu_node`` tree is shown in the following diagram, with the
+white cells representing the ``->exp_seq_rq`` field and the red cells
+representing the elements of the ``->exp_wq[]`` array.
+
+.. kernel-figure:: Funnel0.svg
+
+The next diagram shows the situation after the arrival of Task A and
+Task B at the leftmost and rightmost leaf ``rcu_node`` structures,
+respectively. The current value of the ``rcu_state`` structure's
+``->expedited_sequence`` field is zero, so adding three and clearing the
+bottom bit results in the value two, which both tasks record in the
+``->exp_seq_rq`` field of their respective ``rcu_node`` structures:
+
+.. kernel-figure:: Funnel1.svg
+
+Each of Tasks A and B will move up to the root ``rcu_node`` structure.
+Suppose that Task A wins, recording its desired grace-period sequence
+number and resulting in the state shown below:
+
+.. kernel-figure:: Funnel2.svg
+
+Task A now advances to initiate a new grace period, while Task B moves
+up to the root ``rcu_node`` structure, and, seeing that its desired
+sequence number is already recorded, blocks on ``->exp_wq[1]``.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Why ``->exp_wq[1]``? Given that the value of these tasks' desired     |
+| sequence number is two, so shouldn't they instead block on            |
+| ``->exp_wq[2]``?                                                      |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| No.                                                                   |
+| Recall that the bottom bit of the desired sequence number indicates   |
+| whether or not a grace period is currently in progress. It is         |
+| therefore necessary to shift the sequence number right one bit        |
+| position to obtain the number of the grace period. This results in    |
+| ``->exp_wq[1]``.                                                      |
++-----------------------------------------------------------------------+
+
+If Tasks C and D also arrive at this point, they will compute the same
+desired grace-period sequence number, and see that both leaf
+``rcu_node`` structures already have that value recorded. They will
+therefore block on their respective ``rcu_node`` structures'
+``->exp_wq[1]`` fields, as shown below:
+
+.. kernel-figure:: Funnel3.svg
+
+Task A now acquires the ``rcu_state`` structure's ``->exp_mutex`` and
+initiates the grace period, which increments ``->expedited_sequence``.
+Therefore, if Tasks E and F arrive, they will compute a desired sequence
+number of 4 and will record this value as shown below:
+
+.. kernel-figure:: Funnel4.svg
+
+Tasks E and F will propagate up the ``rcu_node`` combining tree, with
+Task F blocking on the root ``rcu_node`` structure and Task E wait for
+Task A to finish so that it can start the next grace period. The
+resulting state is as shown below:
+
+.. kernel-figure:: Funnel5.svg
+
+Once the grace period completes, Task A starts waking up the tasks
+waiting for this grace period to complete, increments the
+``->expedited_sequence``, acquires the ``->exp_wake_mutex`` and then
+releases the ``->exp_mutex``. This results in the following state:
+
+.. kernel-figure:: Funnel6.svg
+
+Task E can then acquire ``->exp_mutex`` and increment
+``->expedited_sequence`` to the value three. If new tasks G and H arrive
+and moves up the combining tree at the same time, the state will be as
+follows:
+
+.. kernel-figure:: Funnel7.svg
+
+Note that three of the root ``rcu_node`` structure's waitqueues are now
+occupied. However, at some point, Task A will wake up the tasks blocked
+on the ``->exp_wq`` waitqueues, resulting in the following state:
+
+.. kernel-figure:: Funnel8.svg
+
+Execution will continue with Tasks E and H completing their grace
+periods and carrying out their wakeups.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| What happens if Task A takes so long to do its wakeups that Task E's  |
+| grace period completes?                                               |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Then Task E will block on the ``->exp_wake_mutex``, which will also   |
+| prevent it from releasing ``->exp_mutex``, which in turn will prevent |
+| the next grace period from starting. This last is important in        |
+| preventing overflow of the ``->exp_wq[]`` array.                      |
++-----------------------------------------------------------------------+
+
+Use of Workqueues
+~~~~~~~~~~~~~~~~~
+
+In earlier implementations, the task requesting the expedited grace
+period also drove it to completion. This straightforward approach had
+the disadvantage of needing to account for POSIX signals sent to user
+tasks, so more recent implemementations use the Linux kernel's
+`workqueues <https://www.kernel.org/doc/Documentation/core-api/workqueue.rst>`__.
+
+The requesting task still does counter snapshotting and funnel-lock
+processing, but the task reaching the top of the funnel lock does a
+``schedule_work()`` (from ``_synchronize_rcu_expedited()`` so that a
+workqueue kthread does the actual grace-period processing. Because
+workqueue kthreads do not accept POSIX signals, grace-period-wait
+processing need not allow for POSIX signals. In addition, this approach
+allows wakeups for the previous expedited grace period to be overlapped
+with processing for the next expedited grace period. Because there are
+only four sets of waitqueues, it is necessary to ensure that the
+previous grace period's wakeups complete before the next grace period's
+wakeups start. This is handled by having the ``->exp_mutex`` guard
+expedited grace-period processing and the ``->exp_wake_mutex`` guard
+wakeups. The key point is that the ``->exp_mutex`` is not released until
+the first wakeup is complete, which means that the ``->exp_wake_mutex``
+has already been acquired at that point. This approach ensures that the
+previous grace period's wakeups can be carried out while the current
+grace period is in process, but that these wakeups will complete before
+the next grace period starts. This means that only three waitqueues are
+required, guaranteeing that the four that are provided are sufficient.
+
+Stall Warnings
+~~~~~~~~~~~~~~
+
+Expediting grace periods does nothing to speed things up when RCU
+readers take too long, and therefore expedited grace periods check for
+stalls just as normal grace periods do.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| But why not just let the normal grace-period machinery detect the     |
+| stalls, given that a given reader must block both normal and          |
+| expedited grace periods?                                              |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Because it is quite possible that at a given time there is no normal  |
+| grace period in progress, in which case the normal grace period       |
+| cannot emit a stall warning.                                          |
++-----------------------------------------------------------------------+
+
+The ``synchronize_sched_expedited_wait()`` function loops waiting for
+the expedited grace period to end, but with a timeout set to the current
+RCU CPU stall-warning time. If this time is exceeded, any CPUs or
+``rcu_node`` structures blocking the current grace period are printed.
+Each stall warning results in another pass through the loop, but the
+second and subsequent passes use longer stall times.
+
+Mid-boot operation
+~~~~~~~~~~~~~~~~~~
+
+The use of workqueues has the advantage that the expedited grace-period
+code need not worry about POSIX signals. Unfortunately, it has the
+corresponding disadvantage that workqueues cannot be used until they are
+initialized, which does not happen until some time after the scheduler
+spawns the first task. Given that there are parts of the kernel that
+really do want to execute grace periods during this mid-boot “dead
+zone”, expedited grace periods must do something else during thie time.
+
+What they do is to fall back to the old practice of requiring that the
+requesting task drive the expedited grace period, as was the case before
+the use of workqueues. However, the requesting task is only required to
+drive the grace period during the mid-boot dead zone. Before mid-boot, a
+synchronous grace period is a no-op. Some time after mid-boot,
+workqueues are used.
+
+Non-expedited non-SRCU synchronous grace periods must also operate
+normally during mid-boot. This is handled by causing non-expedited grace
+periods to take the expedited code path during mid-boot.
+
+The current code assumes that there are no POSIX signals during the
+mid-boot dead zone. However, if an overwhelming need for POSIX signals
+somehow arises, appropriate adjustments can be made to the expedited
+stall-warning code. One such adjustment would reinstate the
+pre-workqueue stall-warning checks, but only during the mid-boot dead
+zone.
+
+With this refinement, synchronous grace periods can now be used from
+task context pretty much any time during the life of the kernel. That
+is, aside from some points in the suspend, hibernate, or shutdown code
+path.
+
+Summary
+~~~~~~~
+
+Expedited grace periods use a sequence-number approach to promote
+batching, so that a single grace-period operation can serve numerous
+requests. A funnel lock is used to efficiently identify the one task out
+of a concurrent group that will request the grace period. All members of
+the group will block on waitqueues provided in the ``rcu_node``
+structure. The actual grace-period processing is carried out by a
+workqueue.
+
+CPU-hotplug operations are noted lazily in order to prevent the need for
+tight synchronization between expedited grace periods and CPU-hotplug
+operations. The dyntick-idle counters are used to avoid sending IPIs to
+idle CPUs, at least in the common case. RCU-preempt and RCU-sched use
+different IPI handlers and different code to respond to the state
+changes carried out by those handlers, but otherwise use common code.
+
+Quiescent states are tracked using the ``rcu_node`` tree, and once all
+necessary quiescent states have been reported, all tasks waiting on this
+expedited grace period are awakened. A pair of mutexes are used to allow
+one grace period's wakeups to proceed concurrently with the next grace
+period's processing.
+
+This combination of mechanisms allows expedited grace periods to run
+reasonably efficiently. However, for non-time-critical tasks, normal
+grace periods should be used instead because their longer duration
+permits much higher degrees of batching, and thus much lower per-request
+overheads.
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Diagram.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Diagram.html
deleted file mode 100644
index e5b42a798ff3..000000000000
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Diagram.html
+++ /dev/null
@@ -1,9 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
-        "http://www.w3.org/TR/html4/loose.dtd">
-        <html>
-        <head><title>A Diagram of TREE_RCU's Grace-Period Memory Ordering</title>
-        <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
-
-<p><img src="TreeRCU-gp.svg" alt="TreeRCU-gp.svg">
-
-</body></html>
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
deleted file mode 100644
index c64f8d26609f..000000000000
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
+++ /dev/null
@@ -1,704 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
-        "http://www.w3.org/TR/html4/loose.dtd">
-        <html>
-        <head><title>A Tour Through TREE_RCU's Grace-Period Memory Ordering</title>
-        <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
-
-           <p>August 8, 2017</p>
-           <p>This article was contributed by Paul E.&nbsp;McKenney</p>
-
-<h3>Introduction</h3>
-
-<p>This document gives a rough visual overview of how Tree RCU's
-grace-period memory ordering guarantee is provided.
-
-<ol>
-<li>	<a href="#What Is Tree RCU's Grace Period Memory Ordering Guarantee?">
-	What Is Tree RCU's Grace Period Memory Ordering Guarantee?</a>
-<li>	<a href="#Tree RCU Grace Period Memory Ordering Building Blocks">
-	Tree RCU Grace Period Memory Ordering Building Blocks</a>
-<li>	<a href="#Tree RCU Grace Period Memory Ordering Components">
-	Tree RCU Grace Period Memory Ordering Components</a>
-<li>	<a href="#Putting It All Together">Putting It All Together</a>
-</ol>
-
-<h3><a name="What Is Tree RCU's Grace Period Memory Ordering Guarantee?">
-What Is Tree RCU's Grace Period Memory Ordering Guarantee?</a></h3>
-
-<p>RCU grace periods provide extremely strong memory-ordering guarantees
-for non-idle non-offline code.
-Any code that happens after the end of a given RCU grace period is guaranteed
-to see the effects of all accesses prior to the beginning of that grace
-period that are within RCU read-side critical sections.
-Similarly, any code that happens before the beginning of a given RCU grace
-period is guaranteed to see the effects of all accesses following the end
-of that grace period that are within RCU read-side critical sections.
-
-<p>Note well that RCU-sched read-side critical sections include any region
-of code for which preemption is disabled.
-Given that each individual machine instruction can be thought of as
-an extremely small region of preemption-disabled code, one can think of
-<tt>synchronize_rcu()</tt> as <tt>smp_mb()</tt> on steroids.
-
-<p>RCU updaters use this guarantee by splitting their updates into
-two phases, one of which is executed before the grace period and
-the other of which is executed after the grace period.
-In the most common use case, phase one removes an element from
-a linked RCU-protected data structure, and phase two frees that element.
-For this to work, any readers that have witnessed state prior to the
-phase-one update (in the common case, removal) must not witness state
-following the phase-two update (in the common case, freeing).
-
-<p>The RCU implementation provides this guarantee using a network
-of lock-based critical sections, memory barriers, and per-CPU
-processing, as is described in the following sections.
-
-<h3><a name="Tree RCU Grace Period Memory Ordering Building Blocks">
-Tree RCU Grace Period Memory Ordering Building Blocks</a></h3>
-
-<p>The workhorse for RCU's grace-period memory ordering is the
-critical section for the <tt>rcu_node</tt> structure's
-<tt>-&gt;lock</tt>.
-These critical sections use helper functions for lock acquisition, including
-<tt>raw_spin_lock_rcu_node()</tt>,
-<tt>raw_spin_lock_irq_rcu_node()</tt>, and
-<tt>raw_spin_lock_irqsave_rcu_node()</tt>.
-Their lock-release counterparts are
-<tt>raw_spin_unlock_rcu_node()</tt>,
-<tt>raw_spin_unlock_irq_rcu_node()</tt>, and
-<tt>raw_spin_unlock_irqrestore_rcu_node()</tt>,
-respectively.
-For completeness, a
-<tt>raw_spin_trylock_rcu_node()</tt>
-is also provided.
-The key point is that the lock-acquisition functions, including
-<tt>raw_spin_trylock_rcu_node()</tt>, all invoke
-<tt>smp_mb__after_unlock_lock()</tt> immediately after successful
-acquisition of the lock.
-
-<p>Therefore, for any given <tt>rcu_node</tt> structure, any access
-happening before one of the above lock-release functions will be seen
-by all CPUs as happening before any access happening after a later
-one of the above lock-acquisition functions.
-Furthermore, any access happening before one of the
-above lock-release function on any given CPU will be seen by all
-CPUs as happening before any access happening after a later one
-of the above lock-acquisition functions executing on that same CPU,
-even if the lock-release and lock-acquisition functions are operating
-on different <tt>rcu_node</tt> structures.
-Tree RCU uses these two ordering guarantees to form an ordering
-network among all CPUs that were in any way involved in the grace
-period, including any CPUs that came online or went offline during
-the grace period in question.
-
-<p>The following litmus test exhibits the ordering effects of these
-lock-acquisition and lock-release functions:
-
-<pre>
- 1 int x, y, z;
- 2
- 3 void task0(void)
- 4 {
- 5   raw_spin_lock_rcu_node(rnp);
- 6   WRITE_ONCE(x, 1);
- 7   r1 = READ_ONCE(y);
- 8   raw_spin_unlock_rcu_node(rnp);
- 9 }
-10
-11 void task1(void)
-12 {
-13   raw_spin_lock_rcu_node(rnp);
-14   WRITE_ONCE(y, 1);
-15   r2 = READ_ONCE(z);
-16   raw_spin_unlock_rcu_node(rnp);
-17 }
-18
-19 void task2(void)
-20 {
-21   WRITE_ONCE(z, 1);
-22   smp_mb();
-23   r3 = READ_ONCE(x);
-24 }
-25
-26 WARN_ON(r1 == 0 &amp;&amp; r2 == 0 &amp;&amp; r3 == 0);
-</pre>
-
-<p>The <tt>WARN_ON()</tt> is evaluated at &ldquo;the end of time&rdquo;,
-after all changes have propagated throughout the system.
-Without the <tt>smp_mb__after_unlock_lock()</tt> provided by the
-acquisition functions, this <tt>WARN_ON()</tt> could trigger, for example
-on PowerPC.
-The <tt>smp_mb__after_unlock_lock()</tt> invocations prevent this
-<tt>WARN_ON()</tt> from triggering.
-
-<p>This approach must be extended to include idle CPUs, which need
-RCU's grace-period memory ordering guarantee to extend to any
-RCU read-side critical sections preceding and following the current
-idle sojourn.
-This case is handled by calls to the strongly ordered
-<tt>atomic_add_return()</tt> read-modify-write atomic operation that
-is invoked within <tt>rcu_dynticks_eqs_enter()</tt> at idle-entry
-time and within <tt>rcu_dynticks_eqs_exit()</tt> at idle-exit time.
-The grace-period kthread invokes <tt>rcu_dynticks_snap()</tt> and
-<tt>rcu_dynticks_in_eqs_since()</tt> (both of which invoke
-an <tt>atomic_add_return()</tt> of zero) to detect idle CPUs.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	But what about CPUs that remain offline for the entire
-	grace period?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Such CPUs will be offline at the beginning of the grace period,
-	so the grace period won't expect quiescent states from them.
-	Races between grace-period start and CPU-hotplug operations
-	are mediated by the CPU's leaf <tt>rcu_node</tt> structure's
-	<tt>-&gt;lock</tt> as described above.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>The approach must be extended to handle one final case, that
-of waking a task blocked in <tt>synchronize_rcu()</tt>.
-This task might be affinitied to a CPU that is not yet aware that
-the grace period has ended, and thus might not yet be subject to
-the grace period's memory ordering.
-Therefore, there is an <tt>smp_mb()</tt> after the return from
-<tt>wait_for_completion()</tt> in the <tt>synchronize_rcu()</tt>
-code path.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	What?  Where???
-	I don't see any <tt>smp_mb()</tt> after the return from
-	<tt>wait_for_completion()</tt>!!!
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	That would be because I spotted the need for that
-	<tt>smp_mb()</tt> during the creation of this documentation,
-	and it is therefore unlikely to hit mainline before v4.14.
-	Kudos to Lance Roy, Will Deacon, Peter Zijlstra, and
-	Jonathan Cameron for asking questions that sensitized me
-	to the rather elaborate sequence of events that demonstrate
-	the need for this memory barrier.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>Tree RCU's grace--period memory-ordering guarantees rely most
-heavily on the <tt>rcu_node</tt> structure's <tt>-&gt;lock</tt>
-field, so much so that it is necessary to abbreviate this pattern
-in the diagrams in the next section.
-For example, consider the <tt>rcu_prepare_for_idle()</tt> function
-shown below, which is one of several functions that enforce ordering
-of newly arrived RCU callbacks against future grace periods:
-
-<pre>
- 1 static void rcu_prepare_for_idle(void)
- 2 {
- 3   bool needwake;
- 4   struct rcu_data *rdp;
- 5   struct rcu_dynticks *rdtp = this_cpu_ptr(&amp;rcu_dynticks);
- 6   struct rcu_node *rnp;
- 7   struct rcu_state *rsp;
- 8   int tne;
- 9
-10   if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
-11       rcu_is_nocb_cpu(smp_processor_id()))
-12     return;
-13   tne = READ_ONCE(tick_nohz_active);
-14   if (tne != rdtp-&gt;tick_nohz_enabled_snap) {
-15     if (rcu_cpu_has_callbacks(NULL))
-16       invoke_rcu_core();
-17     rdtp-&gt;tick_nohz_enabled_snap = tne;
-18     return;
-19   }
-20   if (!tne)
-21     return;
-22   if (rdtp-&gt;all_lazy &amp;&amp;
-23       rdtp-&gt;nonlazy_posted != rdtp-&gt;nonlazy_posted_snap) {
-24     rdtp-&gt;all_lazy = false;
-25     rdtp-&gt;nonlazy_posted_snap = rdtp-&gt;nonlazy_posted;
-26     invoke_rcu_core();
-27     return;
-28   }
-29   if (rdtp-&gt;last_accelerate == jiffies)
-30     return;
-31   rdtp-&gt;last_accelerate = jiffies;
-32   for_each_rcu_flavor(rsp) {
-33     rdp = this_cpu_ptr(rsp-&gt;rda);
-34     if (rcu_segcblist_pend_cbs(&amp;rdp-&gt;cblist))
-35       continue;
-36     rnp = rdp-&gt;mynode;
-37     raw_spin_lock_rcu_node(rnp);
-38     needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
-39     raw_spin_unlock_rcu_node(rnp);
-40     if (needwake)
-41       rcu_gp_kthread_wake(rsp);
-42   }
-43 }
-</pre>
-
-<p>But the only part of <tt>rcu_prepare_for_idle()</tt> that really
-matters for this discussion are lines&nbsp;37&ndash;39.
-We will therefore abbreviate this function as follows:
-
-</p><p><img src="rcu_node-lock.svg" alt="rcu_node-lock.svg">
-
-<p>The box represents the <tt>rcu_node</tt> structure's <tt>-&gt;lock</tt>
-critical section, with the double line on top representing the additional
-<tt>smp_mb__after_unlock_lock()</tt>.
-
-<h3><a name="Tree RCU Grace Period Memory Ordering Components">
-Tree RCU Grace Period Memory Ordering Components</a></h3>
-
-<p>Tree RCU's grace-period memory-ordering guarantee is provided by
-a number of RCU components:
-
-<ol>
-<li>	<a href="#Callback Registry">Callback Registry</a>
-<li>	<a href="#Grace-Period Initialization">Grace-Period Initialization</a>
-<li>	<a href="#Self-Reported Quiescent States">
-	Self-Reported Quiescent States</a>
-<li>	<a href="#Dynamic Tick Interface">Dynamic Tick Interface</a>
-<li>	<a href="#CPU-Hotplug Interface">CPU-Hotplug Interface</a>
-<li>	<a href="Forcing Quiescent States">Forcing Quiescent States</a>
-<li>	<a href="Grace-Period Cleanup">Grace-Period Cleanup</a>
-<li>	<a href="Callback Invocation">Callback Invocation</a>
-</ol>
-
-<p>Each of the following section looks at the corresponding component
-in detail.
-
-<h4><a name="Callback Registry">Callback Registry</a></h4>
-
-<p>If RCU's grace-period guarantee is to mean anything at all, any
-access that happens before a given invocation of <tt>call_rcu()</tt>
-must also happen before the corresponding grace period.
-The implementation of this portion of RCU's grace period guarantee
-is shown in the following figure:
-
-</p><p><img src="TreeRCU-callback-registry.svg" alt="TreeRCU-callback-registry.svg">
-
-<p>Because <tt>call_rcu()</tt> normally acts only on CPU-local state,
-it provides no ordering guarantees, either for itself or for
-phase one of the update (which again will usually be removal of
-an element from an RCU-protected data structure).
-It simply enqueues the <tt>rcu_head</tt> structure on a per-CPU list,
-which cannot become associated with a grace period until a later
-call to <tt>rcu_accelerate_cbs()</tt>, as shown in the diagram above.
-
-<p>One set of code paths shown on the left invokes
-<tt>rcu_accelerate_cbs()</tt> via
-<tt>note_gp_changes()</tt>, either directly from <tt>call_rcu()</tt> (if
-the current CPU is inundated with queued <tt>rcu_head</tt> structures)
-or more likely from an <tt>RCU_SOFTIRQ</tt> handler.
-Another code path in the middle is taken only in kernels built with
-<tt>CONFIG_RCU_FAST_NO_HZ=y</tt>, which invokes
-<tt>rcu_accelerate_cbs()</tt> via <tt>rcu_prepare_for_idle()</tt>.
-The final code path on the right is taken only in kernels built with
-<tt>CONFIG_HOTPLUG_CPU=y</tt>, which invokes
-<tt>rcu_accelerate_cbs()</tt> via
-<tt>rcu_advance_cbs()</tt>, <tt>rcu_migrate_callbacks</tt>,
-<tt>rcutree_migrate_callbacks()</tt>, and <tt>takedown_cpu()</tt>,
-which in turn is invoked on a surviving CPU after the outgoing
-CPU has been completely offlined.
-
-<p>There are a few other code paths within grace-period processing
-that opportunistically invoke <tt>rcu_accelerate_cbs()</tt>.
-However, either way, all of the CPU's recently queued <tt>rcu_head</tt>
-structures are associated with a future grace-period number under
-the protection of the CPU's lead <tt>rcu_node</tt> structure's
-<tt>-&gt;lock</tt>.
-In all cases, there is full ordering against any prior critical section
-for that same <tt>rcu_node</tt> structure's <tt>-&gt;lock</tt>, and
-also full ordering against any of the current task's or CPU's prior critical
-sections for any <tt>rcu_node</tt> structure's <tt>-&gt;lock</tt>.
-
-<p>The next section will show how this ordering ensures that any
-accesses prior to the <tt>call_rcu()</tt> (particularly including phase
-one of the update)
-happen before the start of the corresponding grace period.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	But what about <tt>synchronize_rcu()</tt>?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	The <tt>synchronize_rcu()</tt> passes <tt>call_rcu()</tt>
-	to <tt>wait_rcu_gp()</tt>, which invokes it.
-	So either way, it eventually comes down to <tt>call_rcu()</tt>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h4><a name="Grace-Period Initialization">Grace-Period Initialization</a></h4>
-
-<p>Grace-period initialization is carried out by
-the grace-period kernel thread, which makes several passes over the
-<tt>rcu_node</tt> tree within the <tt>rcu_gp_init()</tt> function.
-This means that showing the full flow of ordering through the
-grace-period computation will require duplicating this tree.
-If you find this confusing, please note that the state of the
-<tt>rcu_node</tt> changes over time, just like Heraclitus's river.
-However, to keep the <tt>rcu_node</tt> river tractable, the
-grace-period kernel thread's traversals are presented in multiple
-parts, starting in this section with the various phases of
-grace-period initialization.
-
-<p>The first ordering-related grace-period initialization action is to
-advance the <tt>rcu_state</tt> structure's <tt>-&gt;gp_seq</tt>
-grace-period-number counter, as shown below:
-
-</p><p><img src="TreeRCU-gp-init-1.svg" alt="TreeRCU-gp-init-1.svg" width="75%">
-
-<p>The actual increment is carried out using <tt>smp_store_release()</tt>,
-which helps reject false-positive RCU CPU stall detection.
-Note that only the root <tt>rcu_node</tt> structure is touched.
-
-<p>The first pass through the <tt>rcu_node</tt> tree updates bitmasks
-based on CPUs having come online or gone offline since the start of
-the previous grace period.
-In the common case where the number of online CPUs for this <tt>rcu_node</tt>
-structure has not transitioned to or from zero,
-this pass will scan only the leaf <tt>rcu_node</tt> structures.
-However, if the number of online CPUs for a given leaf <tt>rcu_node</tt>
-structure has transitioned from zero,
-<tt>rcu_init_new_rnp()</tt> will be invoked for the first incoming CPU.
-Similarly, if the number of online CPUs for a given leaf <tt>rcu_node</tt>
-structure has transitioned to zero,
-<tt>rcu_cleanup_dead_rnp()</tt> will be invoked for the last outgoing CPU.
-The diagram below shows the path of ordering if the leftmost
-<tt>rcu_node</tt> structure onlines its first CPU and if the next
-<tt>rcu_node</tt> structure has no online CPUs
-(or, alternatively if the leftmost <tt>rcu_node</tt> structure offlines
-its last CPU and if the next <tt>rcu_node</tt> structure has no online CPUs).
-
-</p><p><img src="TreeRCU-gp-init-2.svg" alt="TreeRCU-gp-init-1.svg" width="75%">
-
-<p>The final <tt>rcu_gp_init()</tt> pass through the <tt>rcu_node</tt>
-tree traverses breadth-first, setting each <tt>rcu_node</tt> structure's
-<tt>-&gt;gp_seq</tt> field to the newly advanced value from the
-<tt>rcu_state</tt> structure, as shown in the following diagram.
-
-</p><p><img src="TreeRCU-gp-init-3.svg" alt="TreeRCU-gp-init-1.svg" width="75%">
-
-<p>This change will also cause each CPU's next call to
-<tt>__note_gp_changes()</tt>
-to notice that a new grace period has started, as described in the next
-section.
-But because the grace-period kthread started the grace period at the
-root (with the advancing of the <tt>rcu_state</tt> structure's
-<tt>-&gt;gp_seq</tt> field) before setting each leaf <tt>rcu_node</tt>
-structure's <tt>-&gt;gp_seq</tt> field, each CPU's observation of
-the start of the grace period will happen after the actual start
-of the grace period.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	But what about the CPU that started the grace period?
-	Why wouldn't it see the start of the grace period right when
-	it started that grace period?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	In some deep philosophical and overly anthromorphized
-	sense, yes, the CPU starting the grace period is immediately
-	aware of having done so.
-	However, if we instead assume that RCU is not self-aware,
-	then even the CPU starting the grace period does not really
-	become aware of the start of this grace period until its
-	first call to <tt>__note_gp_changes()</tt>.
-	On the other hand, this CPU potentially gets early notification
-	because it invokes <tt>__note_gp_changes()</tt> during its
-	last <tt>rcu_gp_init()</tt> pass through its leaf
-	<tt>rcu_node</tt> structure.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h4><a name="Self-Reported Quiescent States">
-Self-Reported Quiescent States</a></h4>
-
-<p>When all entities that might block the grace period have reported
-quiescent states (or as described in a later section, had quiescent
-states reported on their behalf), the grace period can end.
-Online non-idle CPUs report their own quiescent states, as shown
-in the following diagram:
-
-</p><p><img src="TreeRCU-qs.svg" alt="TreeRCU-qs.svg" width="75%">
-
-<p>This is for the last CPU to report a quiescent state, which signals
-the end of the grace period.
-Earlier quiescent states would push up the <tt>rcu_node</tt> tree
-only until they encountered an <tt>rcu_node</tt> structure that
-is waiting for additional quiescent states.
-However, ordering is nevertheless preserved because some later quiescent
-state will acquire that <tt>rcu_node</tt> structure's <tt>-&gt;lock</tt>.
-
-<p>Any number of events can lead up to a CPU invoking
-<tt>note_gp_changes</tt> (or alternatively, directly invoking
-<tt>__note_gp_changes()</tt>), at which point that CPU will notice
-the start of a new grace period while holding its leaf
-<tt>rcu_node</tt> lock.
-Therefore, all execution shown in this diagram happens after the
-start of the grace period.
-In addition, this CPU will consider any RCU read-side critical
-section that started before the invocation of <tt>__note_gp_changes()</tt>
-to have started before the grace period, and thus a critical
-section that the grace period must wait on.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	But a RCU read-side critical section might have started
-	after the beginning of the grace period
-	(the advancing of <tt>-&gt;gp_seq</tt> from earlier), so why should
-	the grace period wait on such a critical section?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	It is indeed not necessary for the grace period to wait on such
-	a critical section.
-	However, it is permissible to wait on it.
-	And it is furthermore important to wait on it, as this
-	lazy approach is far more scalable than a &ldquo;big bang&rdquo;
-	all-at-once grace-period start could possibly be.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>If the CPU does a context switch, a quiescent state will be
-noted by <tt>rcu_node_context_switch()</tt> on the left.
-On the other hand, if the CPU takes a scheduler-clock interrupt
-while executing in usermode, a quiescent state will be noted by
-<tt>rcu_sched_clock_irq()</tt> on the right.
-Either way, the passage through a quiescent state will be noted
-in a per-CPU variable.
-
-<p>The next time an <tt>RCU_SOFTIRQ</tt> handler executes on
-this CPU (for example, after the next scheduler-clock
-interrupt), <tt>rcu_core()</tt> will invoke
-<tt>rcu_check_quiescent_state()</tt>, which will notice the
-recorded quiescent state, and invoke
-<tt>rcu_report_qs_rdp()</tt>.
-If <tt>rcu_report_qs_rdp()</tt> verifies that the quiescent state
-really does apply to the current grace period, it invokes
-<tt>rcu_report_rnp()</tt> which traverses up the <tt>rcu_node</tt>
-tree as shown at the bottom of the diagram, clearing bits from
-each <tt>rcu_node</tt> structure's <tt>-&gt;qsmask</tt> field,
-and propagating up the tree when the result is zero.
-
-<p>Note that traversal passes upwards out of a given <tt>rcu_node</tt>
-structure only if the current CPU is reporting the last quiescent
-state for the subtree headed by that <tt>rcu_node</tt> structure.
-A key point is that if a CPU's traversal stops at a given <tt>rcu_node</tt>
-structure, then there will be a later traversal by another CPU
-(or perhaps the same one) that proceeds upwards
-from that point, and the <tt>rcu_node</tt> <tt>-&gt;lock</tt>
-guarantees that the first CPU's quiescent state happens before the
-remainder of the second CPU's traversal.
-Applying this line of thought repeatedly shows that all CPUs'
-quiescent states happen before the last CPU traverses through
-the root <tt>rcu_node</tt> structure, the &ldquo;last CPU&rdquo;
-being the one that clears the last bit in the root <tt>rcu_node</tt>
-structure's <tt>-&gt;qsmask</tt> field.
-
-<h4><a name="Dynamic Tick Interface">Dynamic Tick Interface</a></h4>
-
-<p>Due to energy-efficiency considerations, RCU is forbidden from
-disturbing idle CPUs.
-CPUs are therefore required to notify RCU when entering or leaving idle
-state, which they do via fully ordered value-returning atomic operations
-on a per-CPU variable.
-The ordering effects are as shown below:
-
-</p><p><img src="TreeRCU-dyntick.svg" alt="TreeRCU-dyntick.svg" width="50%">
-
-<p>The RCU grace-period kernel thread samples the per-CPU idleness
-variable while holding the corresponding CPU's leaf <tt>rcu_node</tt>
-structure's <tt>-&gt;lock</tt>.
-This means that any RCU read-side critical sections that precede the
-idle period (the oval near the top of the diagram above) will happen
-before the end of the current grace period.
-Similarly, the beginning of the current grace period will happen before
-any RCU read-side critical sections that follow the
-idle period (the oval near the bottom of the diagram above).
-
-<p>Plumbing this into the full grace-period execution is described
-<a href="#Forcing Quiescent States">below</a>.
-
-<h4><a name="CPU-Hotplug Interface">CPU-Hotplug Interface</a></h4>
-
-<p>RCU is also forbidden from disturbing offline CPUs, which might well
-be powered off and removed from the system completely.
-CPUs are therefore required to notify RCU of their comings and goings
-as part of the corresponding CPU hotplug operations.
-The ordering effects are shown below:
-
-</p><p><img src="TreeRCU-hotplug.svg" alt="TreeRCU-hotplug.svg" width="50%">
-
-<p>Because CPU hotplug operations are much less frequent than idle transitions,
-they are heavier weight, and thus acquire the CPU's leaf <tt>rcu_node</tt>
-structure's <tt>-&gt;lock</tt> and update this structure's
-<tt>-&gt;qsmaskinitnext</tt>.
-The RCU grace-period kernel thread samples this mask to detect CPUs
-having gone offline since the beginning of this grace period.
-
-<p>Plumbing this into the full grace-period execution is described
-<a href="#Forcing Quiescent States">below</a>.
-
-<h4><a name="Forcing Quiescent States">Forcing Quiescent States</a></h4>
-
-<p>As noted above, idle and offline CPUs cannot report their own
-quiescent states, and therefore the grace-period kernel thread
-must do the reporting on their behalf.
-This process is called &ldquo;forcing quiescent states&rdquo;, it is
-repeated every few jiffies, and its ordering effects are shown below:
-
-</p><p><img src="TreeRCU-gp-fqs.svg" alt="TreeRCU-gp-fqs.svg" width="100%">
-
-<p>Each pass of quiescent state forcing is guaranteed to traverse the
-leaf <tt>rcu_node</tt> structures, and if there are no new quiescent
-states due to recently idled and/or offlined CPUs, then only the
-leaves are traversed.
-However, if there is a newly offlined CPU as illustrated on the left
-or a newly idled CPU as illustrated on the right, the corresponding
-quiescent state will be driven up towards the root.
-As with self-reported quiescent states, the upwards driving stops
-once it reaches an <tt>rcu_node</tt> structure that has quiescent
-states outstanding from other CPUs.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	The leftmost drive to root stopped before it reached
-	the root <tt>rcu_node</tt> structure, which means that
-	there are still CPUs subordinate to that structure on
-	which the current grace period is waiting.
-	Given that, how is it possible that the rightmost drive
-	to root ended the grace period?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Good analysis!
-	It is in fact impossible in the absence of bugs in RCU.
-	But this diagram is complex enough as it is, so simplicity
-	overrode accuracy.
-	You can think of it as poetic license, or you can think of
-	it as misdirection that is resolved in the
-	<a href="#Putting It All Together">stitched-together diagram</a>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h4><a name="Grace-Period Cleanup">Grace-Period Cleanup</a></h4>
-
-<p>Grace-period cleanup first scans the <tt>rcu_node</tt> tree
-breadth-first advancing all the <tt>-&gt;gp_seq</tt> fields, then it
-advances the <tt>rcu_state</tt> structure's <tt>-&gt;gp_seq</tt> field.
-The ordering effects are shown below:
-
-</p><p><img src="TreeRCU-gp-cleanup.svg" alt="TreeRCU-gp-cleanup.svg" width="75%">
-
-<p>As indicated by the oval at the bottom of the diagram, once
-grace-period cleanup is complete, the next grace period can begin.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	But when precisely does the grace period end?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	There is no useful single point at which the grace period
-	can be said to end.
-	The earliest reasonable candidate is as soon as the last
-	CPU has reported its quiescent state, but it may be some
-	milliseconds before RCU becomes aware of this.
-	The latest reasonable candidate is once the <tt>rcu_state</tt>
-	structure's <tt>-&gt;gp_seq</tt> field has been updated,
-	but it is quite possible that some CPUs have already completed
-	phase two of their updates by that time.
-	In short, if you are going to work with RCU, you need to
-	learn to embrace uncertainty.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-
-<h4><a name="Callback Invocation">Callback Invocation</a></h4>
-
-<p>Once a given CPU's leaf <tt>rcu_node</tt> structure's
-<tt>-&gt;gp_seq</tt> field has been updated, that CPU can begin
-invoking its RCU callbacks that were waiting for this grace period
-to end.
-These callbacks are identified by <tt>rcu_advance_cbs()</tt>,
-which is usually invoked by <tt>__note_gp_changes()</tt>.
-As shown in the diagram below, this invocation can be triggered by
-the scheduling-clock interrupt (<tt>rcu_sched_clock_irq()</tt> on
-the left) or by idle entry (<tt>rcu_cleanup_after_idle()</tt> on
-the right, but only for kernels build with
-<tt>CONFIG_RCU_FAST_NO_HZ=y</tt>).
-Either way, <tt>RCU_SOFTIRQ</tt> is raised, which results in
-<tt>rcu_do_batch()</tt> invoking the callbacks, which in turn
-allows those callbacks to carry out (either directly or indirectly
-via wakeup) the needed phase-two processing for each update.
-
-</p><p><img src="TreeRCU-callback-invocation.svg" alt="TreeRCU-callback-invocation.svg" width="60%">
-
-<p>Please note that callback invocation can also be prompted by any
-number of corner-case code paths, for example, when a CPU notes that
-it has excessive numbers of callbacks queued.
-In all cases, the CPU acquires its leaf <tt>rcu_node</tt> structure's
-<tt>-&gt;lock</tt> before invoking callbacks, which preserves the
-required ordering against the newly completed grace period.
-
-<p>However, if the callback function communicates to other CPUs,
-for example, doing a wakeup, then it is that function's responsibility
-to maintain ordering.
-For example, if the callback function wakes up a task that runs on
-some other CPU, proper ordering must in place in both the callback
-function and the task being awakened.
-To see why this is important, consider the top half of the
-<a href="#Grace-Period Cleanup">grace-period cleanup</a> diagram.
-The callback might be running on a CPU corresponding to the leftmost
-leaf <tt>rcu_node</tt> structure, and awaken a task that is to run on
-a CPU corresponding to the rightmost leaf <tt>rcu_node</tt> structure,
-and the grace-period kernel thread might not yet have reached the
-rightmost leaf.
-In this case, the grace period's memory ordering might not yet have
-reached that CPU, so again the callback function and the awakened
-task must supply proper ordering.
-
-<h3><a name="Putting It All Together">Putting It All Together</a></h3>
-
-<p>A stitched-together diagram is
-<a href="Tree-RCU-Diagram.html">here</a>.
-
-<h3><a name="Legal Statement">
-Legal Statement</a></h3>
-
-<p>This work represents the view of the author and does not necessarily
-represent the view of IBM.
-
-</p><p>Linux is a registered trademark of Linus Torvalds.
-
-</p><p>Other company, product, and service names may be trademarks or
-service marks of others.
-
-</body></html>
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
new file mode 100644
index 000000000000..1a8b129cfc04
--- /dev/null
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
@@ -0,0 +1,624 @@
+======================================================
+A Tour Through TREE_RCU's Grace-Period Memory Ordering
+======================================================
+
+August 8, 2017
+
+This article was contributed by Paul E.&nbsp;McKenney
+
+Introduction
+============
+
+This document gives a rough visual overview of how Tree RCU's
+grace-period memory ordering guarantee is provided.
+
+What Is Tree RCU's Grace Period Memory Ordering Guarantee?
+==========================================================
+
+RCU grace periods provide extremely strong memory-ordering guarantees
+for non-idle non-offline code.
+Any code that happens after the end of a given RCU grace period is guaranteed
+to see the effects of all accesses prior to the beginning of that grace
+period that are within RCU read-side critical sections.
+Similarly, any code that happens before the beginning of a given RCU grace
+period is guaranteed to see the effects of all accesses following the end
+of that grace period that are within RCU read-side critical sections.
+
+Note well that RCU-sched read-side critical sections include any region
+of code for which preemption is disabled.
+Given that each individual machine instruction can be thought of as
+an extremely small region of preemption-disabled code, one can think of
+``synchronize_rcu()`` as ``smp_mb()`` on steroids.
+
+RCU updaters use this guarantee by splitting their updates into
+two phases, one of which is executed before the grace period and
+the other of which is executed after the grace period.
+In the most common use case, phase one removes an element from
+a linked RCU-protected data structure, and phase two frees that element.
+For this to work, any readers that have witnessed state prior to the
+phase-one update (in the common case, removal) must not witness state
+following the phase-two update (in the common case, freeing).
+
+The RCU implementation provides this guarantee using a network
+of lock-based critical sections, memory barriers, and per-CPU
+processing, as is described in the following sections.
+
+Tree RCU Grace Period Memory Ordering Building Blocks
+=====================================================
+
+The workhorse for RCU's grace-period memory ordering is the
+critical section for the ``rcu_node`` structure's
+``-&gt;lock``. These critical sections use helper functions for lock
+acquisition, including ``raw_spin_lock_rcu_node()``,
+``raw_spin_lock_irq_rcu_node()``, and ``raw_spin_lock_irqsave_rcu_node()``.
+Their lock-release counterparts are ``raw_spin_unlock_rcu_node()``,
+``raw_spin_unlock_irq_rcu_node()``, and
+``raw_spin_unlock_irqrestore_rcu_node()``, respectively.
+For completeness, a ``raw_spin_trylock_rcu_node()`` is also provided.
+The key point is that the lock-acquisition functions, including
+``raw_spin_trylock_rcu_node()``, all invoke ``smp_mb__after_unlock_lock()``
+immediately after successful acquisition of the lock.
+
+Therefore, for any given ``rcu_node`` structure, any access
+happening before one of the above lock-release functions will be seen
+by all CPUs as happening before any access happening after a later
+one of the above lock-acquisition functions.
+Furthermore, any access happening before one of the
+above lock-release function on any given CPU will be seen by all
+CPUs as happening before any access happening after a later one
+of the above lock-acquisition functions executing on that same CPU,
+even if the lock-release and lock-acquisition functions are operating
+on different ``rcu_node`` structures.
+Tree RCU uses these two ordering guarantees to form an ordering
+network among all CPUs that were in any way involved in the grace
+period, including any CPUs that came online or went offline during
+the grace period in question.
+
+The following litmus test exhibits the ordering effects of these
+lock-acquisition and lock-release functions::
+
+    1 int x, y, z;
+    2
+    3 void task0(void)
+    4 {
+    5   raw_spin_lock_rcu_node(rnp);
+    6   WRITE_ONCE(x, 1);
+    7   r1 = READ_ONCE(y);
+    8   raw_spin_unlock_rcu_node(rnp);
+    9 }
+   10
+   11 void task1(void)
+   12 {
+   13   raw_spin_lock_rcu_node(rnp);
+   14   WRITE_ONCE(y, 1);
+   15   r2 = READ_ONCE(z);
+   16   raw_spin_unlock_rcu_node(rnp);
+   17 }
+   18
+   19 void task2(void)
+   20 {
+   21   WRITE_ONCE(z, 1);
+   22   smp_mb();
+   23   r3 = READ_ONCE(x);
+   24 }
+   25
+   26 WARN_ON(r1 == 0 &amp;&amp; r2 == 0 &amp;&amp; r3 == 0);
+
+The ``WARN_ON()`` is evaluated at &ldquo;the end of time&rdquo;,
+after all changes have propagated throughout the system.
+Without the ``smp_mb__after_unlock_lock()`` provided by the
+acquisition functions, this ``WARN_ON()`` could trigger, for example
+on PowerPC.
+The ``smp_mb__after_unlock_lock()`` invocations prevent this
+``WARN_ON()`` from triggering.
+
+This approach must be extended to include idle CPUs, which need
+RCU's grace-period memory ordering guarantee to extend to any
+RCU read-side critical sections preceding and following the current
+idle sojourn.
+This case is handled by calls to the strongly ordered
+``atomic_add_return()`` read-modify-write atomic operation that
+is invoked within ``rcu_dynticks_eqs_enter()`` at idle-entry
+time and within ``rcu_dynticks_eqs_exit()`` at idle-exit time.
+The grace-period kthread invokes ``rcu_dynticks_snap()`` and
+``rcu_dynticks_in_eqs_since()`` (both of which invoke
+an ``atomic_add_return()`` of zero) to detect idle CPUs.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| But what about CPUs that remain offline for the entire grace period?  |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Such CPUs will be offline at the beginning of the grace period, so    |
+| the grace period won't expect quiescent states from them. Races       |
+| between grace-period start and CPU-hotplug operations are mediated    |
+| by the CPU's leaf ``rcu_node`` structure's ``->lock`` as described    |
+| above.                                                                |
++-----------------------------------------------------------------------+
+
+The approach must be extended to handle one final case, that of waking a
+task blocked in ``synchronize_rcu()``. This task might be affinitied to
+a CPU that is not yet aware that the grace period has ended, and thus
+might not yet be subject to the grace period's memory ordering.
+Therefore, there is an ``smp_mb()`` after the return from
+``wait_for_completion()`` in the ``synchronize_rcu()`` code path.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| What? Where??? I don't see any ``smp_mb()`` after the return from     |
+| ``wait_for_completion()``!!!                                          |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| That would be because I spotted the need for that ``smp_mb()`` during |
+| the creation of this documentation, and it is therefore unlikely to   |
+| hit mainline before v4.14. Kudos to Lance Roy, Will Deacon, Peter     |
+| Zijlstra, and Jonathan Cameron for asking questions that sensitized   |
+| me to the rather elaborate sequence of events that demonstrate the    |
+| need for this memory barrier.                                         |
++-----------------------------------------------------------------------+
+
+Tree RCU's grace--period memory-ordering guarantees rely most heavily on
+the ``rcu_node`` structure's ``->lock`` field, so much so that it is
+necessary to abbreviate this pattern in the diagrams in the next
+section. For example, consider the ``rcu_prepare_for_idle()`` function
+shown below, which is one of several functions that enforce ordering of
+newly arrived RCU callbacks against future grace periods:
+
+::
+
+    1 static void rcu_prepare_for_idle(void)
+    2 {
+    3   bool needwake;
+    4   struct rcu_data *rdp;
+    5   struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+    6   struct rcu_node *rnp;
+    7   struct rcu_state *rsp;
+    8   int tne;
+    9
+   10   if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
+   11       rcu_is_nocb_cpu(smp_processor_id()))
+   12     return;
+   13   tne = READ_ONCE(tick_nohz_active);
+   14   if (tne != rdtp->tick_nohz_enabled_snap) {
+   15     if (rcu_cpu_has_callbacks(NULL))
+   16       invoke_rcu_core();
+   17     rdtp->tick_nohz_enabled_snap = tne;
+   18     return;
+   19   }
+   20   if (!tne)
+   21     return;
+   22   if (rdtp->all_lazy &&
+   23       rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
+   24     rdtp->all_lazy = false;
+   25     rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
+   26     invoke_rcu_core();
+   27     return;
+   28   }
+   29   if (rdtp->last_accelerate == jiffies)
+   30     return;
+   31   rdtp->last_accelerate = jiffies;
+   32   for_each_rcu_flavor(rsp) {
+   33     rdp = this_cpu_ptr(rsp->rda);
+   34     if (rcu_segcblist_pend_cbs(&rdp->cblist))
+   35       continue;
+   36     rnp = rdp->mynode;
+   37     raw_spin_lock_rcu_node(rnp);
+   38     needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
+   39     raw_spin_unlock_rcu_node(rnp);
+   40     if (needwake)
+   41       rcu_gp_kthread_wake(rsp);
+   42   }
+   43 }
+
+But the only part of ``rcu_prepare_for_idle()`` that really matters for
+this discussion are lines 37–39. We will therefore abbreviate this
+function as follows:
+
+.. kernel-figure:: rcu_node-lock.svg
+
+The box represents the ``rcu_node`` structure's ``->lock`` critical
+section, with the double line on top representing the additional
+``smp_mb__after_unlock_lock()``.
+
+Tree RCU Grace Period Memory Ordering Components
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Tree RCU's grace-period memory-ordering guarantee is provided by a
+number of RCU components:
+
+#. `Callback Registry`_
+#. `Grace-Period Initialization`_
+#. `Self-Reported Quiescent States`_
+#. `Dynamic Tick Interface`_
+#. `CPU-Hotplug Interface`_
+#. `Forcing Quiescent States`_
+#. `Grace-Period Cleanup`_
+#. `Callback Invocation`_
+
+Each of the following section looks at the corresponding component in
+detail.
+
+Callback Registry
+^^^^^^^^^^^^^^^^^
+
+If RCU's grace-period guarantee is to mean anything at all, any access
+that happens before a given invocation of ``call_rcu()`` must also
+happen before the corresponding grace period. The implementation of this
+portion of RCU's grace period guarantee is shown in the following
+figure:
+
+.. kernel-figure:: TreeRCU-callback-registry.svg
+
+Because ``call_rcu()`` normally acts only on CPU-local state, it
+provides no ordering guarantees, either for itself or for phase one of
+the update (which again will usually be removal of an element from an
+RCU-protected data structure). It simply enqueues the ``rcu_head``
+structure on a per-CPU list, which cannot become associated with a grace
+period until a later call to ``rcu_accelerate_cbs()``, as shown in the
+diagram above.
+
+One set of code paths shown on the left invokes ``rcu_accelerate_cbs()``
+via ``note_gp_changes()``, either directly from ``call_rcu()`` (if the
+current CPU is inundated with queued ``rcu_head`` structures) or more
+likely from an ``RCU_SOFTIRQ`` handler. Another code path in the middle
+is taken only in kernels built with ``CONFIG_RCU_FAST_NO_HZ=y``, which
+invokes ``rcu_accelerate_cbs()`` via ``rcu_prepare_for_idle()``. The
+final code path on the right is taken only in kernels built with
+``CONFIG_HOTPLUG_CPU=y``, which invokes ``rcu_accelerate_cbs()`` via
+``rcu_advance_cbs()``, ``rcu_migrate_callbacks``,
+``rcutree_migrate_callbacks()``, and ``takedown_cpu()``, which in turn
+is invoked on a surviving CPU after the outgoing CPU has been completely
+offlined.
+
+There are a few other code paths within grace-period processing that
+opportunistically invoke ``rcu_accelerate_cbs()``. However, either way,
+all of the CPU's recently queued ``rcu_head`` structures are associated
+with a future grace-period number under the protection of the CPU's lead
+``rcu_node`` structure's ``->lock``. In all cases, there is full
+ordering against any prior critical section for that same ``rcu_node``
+structure's ``->lock``, and also full ordering against any of the
+current task's or CPU's prior critical sections for any ``rcu_node``
+structure's ``->lock``.
+
+The next section will show how this ordering ensures that any accesses
+prior to the ``call_rcu()`` (particularly including phase one of the
+update) happen before the start of the corresponding grace period.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| But what about ``synchronize_rcu()``?                                 |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| The ``synchronize_rcu()`` passes ``call_rcu()`` to ``wait_rcu_gp()``, |
+| which invokes it. So either way, it eventually comes down to          |
+| ``call_rcu()``.                                                       |
++-----------------------------------------------------------------------+
+
+Grace-Period Initialization
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Grace-period initialization is carried out by the grace-period kernel
+thread, which makes several passes over the ``rcu_node`` tree within the
+``rcu_gp_init()`` function. This means that showing the full flow of
+ordering through the grace-period computation will require duplicating
+this tree. If you find this confusing, please note that the state of the
+``rcu_node`` changes over time, just like Heraclitus's river. However,
+to keep the ``rcu_node`` river tractable, the grace-period kernel
+thread's traversals are presented in multiple parts, starting in this
+section with the various phases of grace-period initialization.
+
+The first ordering-related grace-period initialization action is to
+advance the ``rcu_state`` structure's ``->gp_seq`` grace-period-number
+counter, as shown below:
+
+.. kernel-figure:: TreeRCU-gp-init-1.svg
+
+The actual increment is carried out using ``smp_store_release()``, which
+helps reject false-positive RCU CPU stall detection. Note that only the
+root ``rcu_node`` structure is touched.
+
+The first pass through the ``rcu_node`` tree updates bitmasks based on
+CPUs having come online or gone offline since the start of the previous
+grace period. In the common case where the number of online CPUs for
+this ``rcu_node`` structure has not transitioned to or from zero, this
+pass will scan only the leaf ``rcu_node`` structures. However, if the
+number of online CPUs for a given leaf ``rcu_node`` structure has
+transitioned from zero, ``rcu_init_new_rnp()`` will be invoked for the
+first incoming CPU. Similarly, if the number of online CPUs for a given
+leaf ``rcu_node`` structure has transitioned to zero,
+``rcu_cleanup_dead_rnp()`` will be invoked for the last outgoing CPU.
+The diagram below shows the path of ordering if the leftmost
+``rcu_node`` structure onlines its first CPU and if the next
+``rcu_node`` structure has no online CPUs (or, alternatively if the
+leftmost ``rcu_node`` structure offlines its last CPU and if the next
+``rcu_node`` structure has no online CPUs).
+
+.. kernel-figure:: TreeRCU-gp-init-1.svg
+
+The final ``rcu_gp_init()`` pass through the ``rcu_node`` tree traverses
+breadth-first, setting each ``rcu_node`` structure's ``->gp_seq`` field
+to the newly advanced value from the ``rcu_state`` structure, as shown
+in the following diagram.
+
+.. kernel-figure:: TreeRCU-gp-init-1.svg
+
+This change will also cause each CPU's next call to
+``__note_gp_changes()`` to notice that a new grace period has started,
+as described in the next section. But because the grace-period kthread
+started the grace period at the root (with the advancing of the
+``rcu_state`` structure's ``->gp_seq`` field) before setting each leaf
+``rcu_node`` structure's ``->gp_seq`` field, each CPU's observation of
+the start of the grace period will happen after the actual start of the
+grace period.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| But what about the CPU that started the grace period? Why wouldn't it |
+| see the start of the grace period right when it started that grace    |
+| period?                                                               |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| In some deep philosophical and overly anthromorphized sense, yes, the |
+| CPU starting the grace period is immediately aware of having done so. |
+| However, if we instead assume that RCU is not self-aware, then even   |
+| the CPU starting the grace period does not really become aware of the |
+| start of this grace period until its first call to                    |
+| ``__note_gp_changes()``. On the other hand, this CPU potentially gets |
+| early notification because it invokes ``__note_gp_changes()`` during  |
+| its last ``rcu_gp_init()`` pass through its leaf ``rcu_node``         |
+| structure.                                                            |
++-----------------------------------------------------------------------+
+
+Self-Reported Quiescent States
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When all entities that might block the grace period have reported
+quiescent states (or as described in a later section, had quiescent
+states reported on their behalf), the grace period can end. Online
+non-idle CPUs report their own quiescent states, as shown in the
+following diagram:
+
+.. kernel-figure:: TreeRCU-qs.svg
+
+This is for the last CPU to report a quiescent state, which signals the
+end of the grace period. Earlier quiescent states would push up the
+``rcu_node`` tree only until they encountered an ``rcu_node`` structure
+that is waiting for additional quiescent states. However, ordering is
+nevertheless preserved because some later quiescent state will acquire
+that ``rcu_node`` structure's ``->lock``.
+
+Any number of events can lead up to a CPU invoking ``note_gp_changes``
+(or alternatively, directly invoking ``__note_gp_changes()``), at which
+point that CPU will notice the start of a new grace period while holding
+its leaf ``rcu_node`` lock. Therefore, all execution shown in this
+diagram happens after the start of the grace period. In addition, this
+CPU will consider any RCU read-side critical section that started before
+the invocation of ``__note_gp_changes()`` to have started before the
+grace period, and thus a critical section that the grace period must
+wait on.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| But a RCU read-side critical section might have started after the     |
+| beginning of the grace period (the advancing of ``->gp_seq`` from     |
+| earlier), so why should the grace period wait on such a critical      |
+| section?                                                              |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| It is indeed not necessary for the grace period to wait on such a     |
+| critical section. However, it is permissible to wait on it. And it is |
+| furthermore important to wait on it, as this lazy approach is far     |
+| more scalable than a “big bang” all-at-once grace-period start could  |
+| possibly be.                                                          |
++-----------------------------------------------------------------------+
+
+If the CPU does a context switch, a quiescent state will be noted by
+``rcu_note_context_switch()`` on the left. On the other hand, if the CPU
+takes a scheduler-clock interrupt while executing in usermode, a
+quiescent state will be noted by ``rcu_sched_clock_irq()`` on the right.
+Either way, the passage through a quiescent state will be noted in a
+per-CPU variable.
+
+The next time an ``RCU_SOFTIRQ`` handler executes on this CPU (for
+example, after the next scheduler-clock interrupt), ``rcu_core()`` will
+invoke ``rcu_check_quiescent_state()``, which will notice the recorded
+quiescent state, and invoke ``rcu_report_qs_rdp()``. If
+``rcu_report_qs_rdp()`` verifies that the quiescent state really does
+apply to the current grace period, it invokes ``rcu_report_rnp()`` which
+traverses up the ``rcu_node`` tree as shown at the bottom of the
+diagram, clearing bits from each ``rcu_node`` structure's ``->qsmask``
+field, and propagating up the tree when the result is zero.
+
+Note that traversal passes upwards out of a given ``rcu_node`` structure
+only if the current CPU is reporting the last quiescent state for the
+subtree headed by that ``rcu_node`` structure. A key point is that if a
+CPU's traversal stops at a given ``rcu_node`` structure, then there will
+be a later traversal by another CPU (or perhaps the same one) that
+proceeds upwards from that point, and the ``rcu_node`` ``->lock``
+guarantees that the first CPU's quiescent state happens before the
+remainder of the second CPU's traversal. Applying this line of thought
+repeatedly shows that all CPUs' quiescent states happen before the last
+CPU traverses through the root ``rcu_node`` structure, the “last CPU”
+being the one that clears the last bit in the root ``rcu_node``
+structure's ``->qsmask`` field.
+
+Dynamic Tick Interface
+^^^^^^^^^^^^^^^^^^^^^^
+
+Due to energy-efficiency considerations, RCU is forbidden from
+disturbing idle CPUs. CPUs are therefore required to notify RCU when
+entering or leaving idle state, which they do via fully ordered
+value-returning atomic operations on a per-CPU variable. The ordering
+effects are as shown below:
+
+.. kernel-figure:: TreeRCU-dyntick.svg
+
+The RCU grace-period kernel thread samples the per-CPU idleness variable
+while holding the corresponding CPU's leaf ``rcu_node`` structure's
+``->lock``. This means that any RCU read-side critical sections that
+precede the idle period (the oval near the top of the diagram above)
+will happen before the end of the current grace period. Similarly, the
+beginning of the current grace period will happen before any RCU
+read-side critical sections that follow the idle period (the oval near
+the bottom of the diagram above).
+
+Plumbing this into the full grace-period execution is described
+`below <#Forcing%20Quiescent%20States>`__.
+
+CPU-Hotplug Interface
+^^^^^^^^^^^^^^^^^^^^^
+
+RCU is also forbidden from disturbing offline CPUs, which might well be
+powered off and removed from the system completely. CPUs are therefore
+required to notify RCU of their comings and goings as part of the
+corresponding CPU hotplug operations. The ordering effects are shown
+below:
+
+.. kernel-figure:: TreeRCU-hotplug.svg
+
+Because CPU hotplug operations are much less frequent than idle
+transitions, they are heavier weight, and thus acquire the CPU's leaf
+``rcu_node`` structure's ``->lock`` and update this structure's
+``->qsmaskinitnext``. The RCU grace-period kernel thread samples this
+mask to detect CPUs having gone offline since the beginning of this
+grace period.
+
+Plumbing this into the full grace-period execution is described
+`below <#Forcing%20Quiescent%20States>`__.
+
+Forcing Quiescent States
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+As noted above, idle and offline CPUs cannot report their own quiescent
+states, and therefore the grace-period kernel thread must do the
+reporting on their behalf. This process is called “forcing quiescent
+states”, it is repeated every few jiffies, and its ordering effects are
+shown below:
+
+.. kernel-figure:: TreeRCU-gp-fqs.svg
+
+Each pass of quiescent state forcing is guaranteed to traverse the leaf
+``rcu_node`` structures, and if there are no new quiescent states due to
+recently idled and/or offlined CPUs, then only the leaves are traversed.
+However, if there is a newly offlined CPU as illustrated on the left or
+a newly idled CPU as illustrated on the right, the corresponding
+quiescent state will be driven up towards the root. As with
+self-reported quiescent states, the upwards driving stops once it
+reaches an ``rcu_node`` structure that has quiescent states outstanding
+from other CPUs.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| The leftmost drive to root stopped before it reached the root         |
+| ``rcu_node`` structure, which means that there are still CPUs         |
+| subordinate to that structure on which the current grace period is    |
+| waiting. Given that, how is it possible that the rightmost drive to   |
+| root ended the grace period?                                          |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Good analysis! It is in fact impossible in the absence of bugs in     |
+| RCU. But this diagram is complex enough as it is, so simplicity       |
+| overrode accuracy. You can think of it as poetic license, or you can  |
+| think of it as misdirection that is resolved in the                   |
+| `stitched-together diagram <#Putting%20It%20All%20Together>`__.       |
++-----------------------------------------------------------------------+
+
+Grace-Period Cleanup
+^^^^^^^^^^^^^^^^^^^^
+
+Grace-period cleanup first scans the ``rcu_node`` tree breadth-first
+advancing all the ``->gp_seq`` fields, then it advances the
+``rcu_state`` structure's ``->gp_seq`` field. The ordering effects are
+shown below:
+
+.. kernel-figure:: TreeRCU-gp-cleanup.svg
+
+As indicated by the oval at the bottom of the diagram, once grace-period
+cleanup is complete, the next grace period can begin.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| But when precisely does the grace period end?                         |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| There is no useful single point at which the grace period can be said |
+| to end. The earliest reasonable candidate is as soon as the last CPU  |
+| has reported its quiescent state, but it may be some milliseconds     |
+| before RCU becomes aware of this. The latest reasonable candidate is  |
+| once the ``rcu_state`` structure's ``->gp_seq`` field has been        |
+| updated, but it is quite possible that some CPUs have already         |
+| completed phase two of their updates by that time. In short, if you   |
+| are going to work with RCU, you need to learn to embrace uncertainty. |
++-----------------------------------------------------------------------+
+
+Callback Invocation
+^^^^^^^^^^^^^^^^^^^
+
+Once a given CPU's leaf ``rcu_node`` structure's ``->gp_seq`` field has
+been updated, that CPU can begin invoking its RCU callbacks that were
+waiting for this grace period to end. These callbacks are identified by
+``rcu_advance_cbs()``, which is usually invoked by
+``__note_gp_changes()``. As shown in the diagram below, this invocation
+can be triggered by the scheduling-clock interrupt
+(``rcu_sched_clock_irq()`` on the left) or by idle entry
+(``rcu_cleanup_after_idle()`` on the right, but only for kernels build
+with ``CONFIG_RCU_FAST_NO_HZ=y``). Either way, ``RCU_SOFTIRQ`` is
+raised, which results in ``rcu_do_batch()`` invoking the callbacks,
+which in turn allows those callbacks to carry out (either directly or
+indirectly via wakeup) the needed phase-two processing for each update.
+
+.. kernel-figure:: TreeRCU-callback-invocation.svg
+
+Please note that callback invocation can also be prompted by any number
+of corner-case code paths, for example, when a CPU notes that it has
+excessive numbers of callbacks queued. In all cases, the CPU acquires
+its leaf ``rcu_node`` structure's ``->lock`` before invoking callbacks,
+which preserves the required ordering against the newly completed grace
+period.
+
+However, if the callback function communicates to other CPUs, for
+example, doing a wakeup, then it is that function's responsibility to
+maintain ordering. For example, if the callback function wakes up a task
+that runs on some other CPU, proper ordering must in place in both the
+callback function and the task being awakened. To see why this is
+important, consider the top half of the `grace-period
+cleanup <#Grace-Period%20Cleanup>`__ diagram. The callback might be
+running on a CPU corresponding to the leftmost leaf ``rcu_node``
+structure, and awaken a task that is to run on a CPU corresponding to
+the rightmost leaf ``rcu_node`` structure, and the grace-period kernel
+thread might not yet have reached the rightmost leaf. In this case, the
+grace period's memory ordering might not yet have reached that CPU, so
+again the callback function and the awakened task must supply proper
+ordering.
+
+Putting It All Together
+~~~~~~~~~~~~~~~~~~~~~~~
+
+A stitched-together diagram is here:
+
+.. kernel-figure:: TreeRCU-gp.svg
+
+Legal Statement
+~~~~~~~~~~~~~~~
+
+This work represents the view of the author and does not necessarily
+represent the view of IBM.
+
+Linux is a registered trademark of Linus Torvalds.
+
+Other company, product, and service names may be trademarks or service
+marks of others.
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
index 2bcd742d6e49..069f6f8371c2 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
@@ -3880,7 +3880,7 @@
          font-style="normal"
          y="-4418.6582"
          x="3745.7725"
-         xml:space="preserve">rcu_node_context_switch()</text>
+         xml:space="preserve">rcu_note_context_switch()</text>
     </g>
     <g
        transform="translate(1881.1886,54048.57)"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
index 779c9ac31a52..7d6c5f7e505c 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
@@ -753,7 +753,7 @@
          font-style="normal"
          y="-4418.6582"
          x="3745.7725"
-         xml:space="preserve">rcu_node_context_switch()</text>
+         xml:space="preserve">rcu_note_context_switch()</text>
     </g>
     <g
        transform="translate(3131.2648,-585.6713)"
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
deleted file mode 100644
index 467251f7fef6..000000000000
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ /dev/null
@@ -1,3401 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
-        "http://www.w3.org/TR/html4/loose.dtd">
-        <html>
-        <head><title>A Tour Through RCU's Requirements [LWN.net]</title>
-        <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
-
-<h1>A Tour Through RCU's Requirements</h1>
-
-<p>Copyright IBM Corporation, 2015</p>
-<p>Author: Paul E.&nbsp;McKenney</p>
-<p><i>The initial version of this document appeared in the
-<a href="https://lwn.net/">LWN</a> articles
-<a href="https://lwn.net/Articles/652156/">here</a>,
-<a href="https://lwn.net/Articles/652677/">here</a>, and
-<a href="https://lwn.net/Articles/653326/">here</a>.</i></p>
-
-<h2>Introduction</h2>
-
-<p>
-Read-copy update (RCU) is a synchronization mechanism that is often
-used as a replacement for reader-writer locking.
-RCU is unusual in that updaters do not block readers,
-which means that RCU's read-side primitives can be exceedingly fast
-and scalable.
-In addition, updaters can make useful forward progress concurrently
-with readers.
-However, all this concurrency between RCU readers and updaters does raise
-the question of exactly what RCU readers are doing, which in turn
-raises the question of exactly what RCU's requirements are.
-
-<p>
-This document therefore summarizes RCU's requirements, and can be thought
-of as an informal, high-level specification for RCU.
-It is important to understand that RCU's specification is primarily
-empirical in nature;
-in fact, I learned about many of these requirements the hard way.
-This situation might cause some consternation, however, not only
-has this learning process been a lot of fun, but it has also been
-a great privilege to work with so many people willing to apply
-technologies in interesting new ways.
-
-<p>
-All that aside, here are the categories of currently known RCU requirements:
-</p>
-
-<ol>
-<li>	<a href="#Fundamental Requirements">
-	Fundamental Requirements</a>
-<li>	<a href="#Fundamental Non-Requirements">Fundamental Non-Requirements</a>
-<li>	<a href="#Parallelism Facts of Life">
-	Parallelism Facts of Life</a>
-<li>	<a href="#Quality-of-Implementation Requirements">
-	Quality-of-Implementation Requirements</a>
-<li>	<a href="#Linux Kernel Complications">
-	Linux Kernel Complications</a>
-<li>	<a href="#Software-Engineering Requirements">
-	Software-Engineering Requirements</a>
-<li>	<a href="#Other RCU Flavors">
-	Other RCU Flavors</a>
-<li>	<a href="#Possible Future Changes">
-	Possible Future Changes</a>
-</ol>
-
-<p>
-This is followed by a <a href="#Summary">summary</a>,
-however, the answers to each quick quiz immediately follows the quiz.
-Select the big white space with your mouse to see the answer.
-
-<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
-
-<p>
-RCU's fundamental requirements are the closest thing RCU has to hard
-mathematical requirements.
-These are:
-
-<ol>
-<li>	<a href="#Grace-Period Guarantee">
-	Grace-Period Guarantee</a>
-<li>	<a href="#Publish-Subscribe Guarantee">
-	Publish-Subscribe Guarantee</a>
-<li>	<a href="#Memory-Barrier Guarantees">
-	Memory-Barrier Guarantees</a>
-<li>	<a href="#RCU Primitives Guaranteed to Execute Unconditionally">
-	RCU Primitives Guaranteed to Execute Unconditionally</a>
-<li>	<a href="#Guaranteed Read-to-Write Upgrade">
-	Guaranteed Read-to-Write Upgrade</a>
-</ol>
-
-<h3><a name="Grace-Period Guarantee">Grace-Period Guarantee</a></h3>
-
-<p>
-RCU's grace-period guarantee is unusual in being premeditated:
-Jack Slingwine and I had this guarantee firmly in mind when we started
-work on RCU (then called &ldquo;rclock&rdquo;) in the early 1990s.
-That said, the past two decades of experience with RCU have produced
-a much more detailed understanding of this guarantee.
-
-<p>
-RCU's grace-period guarantee allows updaters to wait for the completion
-of all pre-existing RCU read-side critical sections.
-An RCU read-side critical section
-begins with the marker <tt>rcu_read_lock()</tt> and ends with
-the marker <tt>rcu_read_unlock()</tt>.
-These markers may be nested, and RCU treats a nested set as one
-big RCU read-side critical section.
-Production-quality implementations of <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> are extremely lightweight, and in
-fact have exactly zero overhead in Linux kernels built for production
-use with <tt>CONFIG_PREEMPT=n</tt>.
-
-<p>
-This guarantee allows ordering to be enforced with extremely low
-overhead to readers, for example:
-
-<blockquote>
-<pre>
- 1 int x, y;
- 2
- 3 void thread0(void)
- 4 {
- 5   rcu_read_lock();
- 6   r1 = READ_ONCE(x);
- 7   r2 = READ_ONCE(y);
- 8   rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13   WRITE_ONCE(x, 1);
-14   synchronize_rcu();
-15   WRITE_ONCE(y, 1);
-16 }
-</pre>
-</blockquote>
-
-<p>
-Because the <tt>synchronize_rcu()</tt> on line&nbsp;14 waits for
-all pre-existing readers, any instance of <tt>thread0()</tt> that
-loads a value of zero from <tt>x</tt> must complete before
-<tt>thread1()</tt> stores to <tt>y</tt>, so that instance must
-also load a value of zero from <tt>y</tt>.
-Similarly, any instance of <tt>thread0()</tt> that loads a value of
-one from <tt>y</tt> must have started after the
-<tt>synchronize_rcu()</tt> started, and must therefore also load
-a value of one from <tt>x</tt>.
-Therefore, the outcome:
-<blockquote>
-<pre>
-(r1 == 0 &amp;&amp; r2 == 1)
-</pre>
-</blockquote>
-cannot happen.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Wait a minute!
-	You said that updaters can make useful forward progress concurrently
-	with readers, but pre-existing readers will block
-	<tt>synchronize_rcu()</tt>!!!
-	Just who are you trying to fool???
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	First, if updaters do not wish to be blocked by readers, they can use
-	<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
-	be discussed later.
-	Second, even when using <tt>synchronize_rcu()</tt>, the other
-	update-side code does run concurrently with readers, whether
-	pre-existing or not.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-This scenario resembles one of the first uses of RCU in
-<a href="https://en.wikipedia.org/wiki/DYNIX">DYNIX/ptx</a>,
-which managed a distributed lock manager's transition into
-a state suitable for handling recovery from node failure,
-more or less as follows:
-
-<blockquote>
-<pre>
- 1 #define STATE_NORMAL        0
- 2 #define STATE_WANT_RECOVERY 1
- 3 #define STATE_RECOVERING    2
- 4 #define STATE_WANT_NORMAL   3
- 5
- 6 int state = STATE_NORMAL;
- 7
- 8 void do_something_dlm(void)
- 9 {
-10   int state_snap;
-11
-12   rcu_read_lock();
-13   state_snap = READ_ONCE(state);
-14   if (state_snap == STATE_NORMAL)
-15     do_something();
-16   else
-17     do_something_carefully();
-18   rcu_read_unlock();
-19 }
-20
-21 void start_recovery(void)
-22 {
-23   WRITE_ONCE(state, STATE_WANT_RECOVERY);
-24   synchronize_rcu();
-25   WRITE_ONCE(state, STATE_RECOVERING);
-26   recovery();
-27   WRITE_ONCE(state, STATE_WANT_NORMAL);
-28   synchronize_rcu();
-29   WRITE_ONCE(state, STATE_NORMAL);
-30 }
-</pre>
-</blockquote>
-
-<p>
-The RCU read-side critical section in <tt>do_something_dlm()</tt>
-works with the <tt>synchronize_rcu()</tt> in <tt>start_recovery()</tt>
-to guarantee that <tt>do_something()</tt> never runs concurrently
-with <tt>recovery()</tt>, but with little or no synchronization
-overhead in <tt>do_something_dlm()</tt>.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Without that extra grace period, memory reordering could result in
-	<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
-	concurrently with the last bits of <tt>recovery()</tt>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-In order to avoid fatal problems such as deadlocks,
-an RCU read-side critical section must not contain calls to
-<tt>synchronize_rcu()</tt>.
-Similarly, an RCU read-side critical section must not
-contain anything that waits, directly or indirectly, on completion of
-an invocation of <tt>synchronize_rcu()</tt>.
-
-<p>
-Although RCU's grace-period guarantee is useful in and of itself, with
-<a href="https://lwn.net/Articles/573497/">quite a few use cases</a>,
-it would be good to be able to use RCU to coordinate read-side
-access to linked data structures.
-For this, the grace-period guarantee is not sufficient, as can
-be seen in function <tt>add_gp_buggy()</tt> below.
-We will look at the reader's code later, but in the meantime, just think of
-the reader as locklessly picking up the <tt>gp</tt> pointer,
-and, if the value loaded is non-<tt>NULL</tt>, locklessly accessing the
-<tt>-&gt;a</tt> and <tt>-&gt;b</tt> fields.
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy(int a, int b)
- 2 {
- 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4   if (!p)
- 5     return -ENOMEM;
- 6   spin_lock(&amp;gp_lock);
- 7   if (rcu_access_pointer(gp)) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-11   p-&gt;a = a;
-12   p-&gt;b = a;
-13   gp = p; /* ORDERING BUG */
-14   spin_unlock(&amp;gp_lock);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The problem is that both the compiler and weakly ordered CPUs are within
-their rights to reorder this code as follows:
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy_optimized(int a, int b)
- 2 {
- 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4   if (!p)
- 5     return -ENOMEM;
- 6   spin_lock(&amp;gp_lock);
- 7   if (rcu_access_pointer(gp)) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-<b>11   gp = p; /* ORDERING BUG */
-12   p-&gt;a = a;
-13   p-&gt;b = a;</b>
-14   spin_unlock(&amp;gp_lock);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-If an RCU reader fetches <tt>gp</tt> just after
-<tt>add_gp_buggy_optimized</tt> executes line&nbsp;11,
-it will see garbage in the <tt>-&gt;a</tt> and <tt>-&gt;b</tt>
-fields.
-And this is but one of many ways in which compiler and hardware optimizations
-could cause trouble.
-Therefore, we clearly need some way to prevent the compiler and the CPU from
-reordering in this manner, which brings us to the publish-subscribe
-guarantee discussed in the next section.
-
-<h3><a name="Publish-Subscribe Guarantee">Publish/Subscribe Guarantee</a></h3>
-
-<p>
-RCU's publish-subscribe guarantee allows data to be inserted
-into a linked data structure without disrupting RCU readers.
-The updater uses <tt>rcu_assign_pointer()</tt> to insert the
-new data, and readers use <tt>rcu_dereference()</tt> to
-access data, whether new or old.
-The following shows an example of insertion:
-
-<blockquote>
-<pre>
- 1 bool add_gp(int a, int b)
- 2 {
- 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4   if (!p)
- 5     return -ENOMEM;
- 6   spin_lock(&amp;gp_lock);
- 7   if (rcu_access_pointer(gp)) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-11   p-&gt;a = a;
-12   p-&gt;b = a;
-13   rcu_assign_pointer(gp, p);
-14   spin_unlock(&amp;gp_lock);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_assign_pointer()</tt> on line&nbsp;13 is conceptually
-equivalent to a simple assignment statement, but also guarantees
-that its assignment will
-happen after the two assignments in lines&nbsp;11 and&nbsp;12,
-similar to the C11 <tt>memory_order_release</tt> store operation.
-It also prevents any number of &ldquo;interesting&rdquo; compiler
-optimizations, for example, the use of <tt>gp</tt> as a scratch
-location immediately preceding the assignment.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-	two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
-	from being reordered.
-	Can't that also cause problems?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	No, it cannot.
-	The readers cannot see either of these two fields until
-	the assignment to <tt>gp</tt>, by which time both fields are
-	fully initialized.
-	So reordering the assignments
-	to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
-	cause any problems.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-It is tempting to assume that the reader need not do anything special
-to control its accesses to the RCU-protected data,
-as shown in <tt>do_something_gp_buggy()</tt> below:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy(void)
- 2 {
- 3   rcu_read_lock();
- 4   p = gp;  /* OPTIMIZATIONS GALORE!!! */
- 5   if (p) {
- 6     do_something(p-&gt;a, p-&gt;b);
- 7     rcu_read_unlock();
- 8     return true;
- 9   }
-10   rcu_read_unlock();
-11   return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-However, this temptation must be resisted because there are a
-surprisingly large number of ways that the compiler
-(to say nothing of
-<a href="https://h71000.www7.hp.com/wizard/wiz_2637.html">DEC Alpha CPUs</a>)
-can trip this code up.
-For but one example, if the compiler were short of registers, it
-might choose to refetch from <tt>gp</tt> rather than keeping
-a separate copy in <tt>p</tt> as follows:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy_optimized(void)
- 2 {
- 3   rcu_read_lock();
- 4   if (gp) { /* OPTIMIZATIONS GALORE!!! */
-<b> 5     do_something(gp-&gt;a, gp-&gt;b);</b>
- 6     rcu_read_unlock();
- 7     return true;
- 8   }
- 9   rcu_read_unlock();
-10   return false;
-11 }
-</pre>
-</blockquote>
-
-<p>
-If this function ran concurrently with a series of updates that
-replaced the current structure with a new one,
-the fetches of <tt>gp-&gt;a</tt>
-and <tt>gp-&gt;b</tt> might well come from two different structures,
-which could cause serious confusion.
-To prevent this (and much else besides), <tt>do_something_gp()</tt> uses
-<tt>rcu_dereference()</tt> to fetch from <tt>gp</tt>:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp(void)
- 2 {
- 3   rcu_read_lock();
- 4   p = rcu_dereference(gp);
- 5   if (p) {
- 6     do_something(p-&gt;a, p-&gt;b);
- 7     rcu_read_unlock();
- 8     return true;
- 9   }
-10   rcu_read_unlock();
-11   return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_dereference()</tt> uses volatile casts and (for DEC Alpha)
-memory barriers in the Linux kernel.
-Should a
-<a href="http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf">high-quality implementation of C11 <tt>memory_order_consume</tt> [PDF]</a>
-ever appear, then <tt>rcu_dereference()</tt> could be implemented
-as a <tt>memory_order_consume</tt> load.
-Regardless of the exact implementation, a pointer fetched by
-<tt>rcu_dereference()</tt> may not be used outside of the
-outermost RCU read-side critical section containing that
-<tt>rcu_dereference()</tt>, unless protection of
-the corresponding data element has been passed from RCU to some
-other synchronization mechanism, most commonly locking or
-<a href="https://www.kernel.org/doc/Documentation/RCU/rcuref.txt">reference counting</a>.
-
-<p>
-In short, updaters use <tt>rcu_assign_pointer()</tt> and readers
-use <tt>rcu_dereference()</tt>, and these two RCU API elements
-work together to ensure that readers have a consistent view of
-newly added data elements.
-
-<p>
-Of course, it is also necessary to remove elements from RCU-protected
-data structures, for example, using the following process:
-
-<ol>
-<li>	Remove the data element from the enclosing structure.
-<li>	Wait for all pre-existing RCU read-side critical sections
-	to complete (because only pre-existing readers can possibly have
-	a reference to the newly removed data element).
-<li>	At this point, only the updater has a reference to the
-	newly removed data element, so it can safely reclaim
-	the data element, for example, by passing it to <tt>kfree()</tt>.
-</ol>
-
-This process is implemented by <tt>remove_gp_synchronous()</tt>:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_synchronous(void)
- 2 {
- 3   struct foo *p;
- 4
- 5   spin_lock(&amp;gp_lock);
- 6   p = rcu_access_pointer(gp);
- 7   if (!p) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-11   rcu_assign_pointer(gp, NULL);
-12   spin_unlock(&amp;gp_lock);
-13   synchronize_rcu();
-14   kfree(p);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-This function is straightforward, with line&nbsp;13 waiting for a grace
-period before line&nbsp;14 frees the old data element.
-This waiting ensures that readers will reach line&nbsp;7 of
-<tt>do_something_gp()</tt> before the data element referenced by
-<tt>p</tt> is freed.
-The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
-<tt>rcu_dereference()</tt>, except that:
-
-<ol>
-<li>	The value returned by <tt>rcu_access_pointer()</tt>
-	cannot be dereferenced.
-	If you want to access the value pointed to as well as
-	the pointer itself, use <tt>rcu_dereference()</tt>
-	instead of <tt>rcu_access_pointer()</tt>.
-<li>	The call to <tt>rcu_access_pointer()</tt> need not be
-	protected.
-	In contrast, <tt>rcu_dereference()</tt> must either be
-	within an RCU read-side critical section or in a code
-	segment where the pointer cannot change, for example, in
-	code protected by the corresponding update-side lock.
-</ol>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Without the <tt>rcu_dereference()</tt> or the
-	<tt>rcu_access_pointer()</tt>, what destructive optimizations
-	might the compiler make use of?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Let's start with what happens to <tt>do_something_gp()</tt>
-	if it fails to use <tt>rcu_dereference()</tt>.
-	It could reuse a value formerly fetched from this same pointer.
-	It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
-	manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
-	mash-up of two distinct pointer values.
-	It might even use value-speculation optimizations, where it makes
-	a wrong guess, but by the time it gets around to checking the
-	value, an update has changed the pointer to match the wrong guess.
-	Too bad about any dereferences that returned pre-initialization garbage
-	in the meantime!
-	</font>
-
-	<p><font color="ffffff">
-	For <tt>remove_gp_synchronous()</tt>, as long as all modifications
-	to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
-	the above optimizations are harmless.
-	However, <tt>sparse</tt> will complain if you
-	define <tt>gp</tt> with <tt>__rcu</tt> and then
-	access it without using
-	either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-In short, RCU's publish-subscribe guarantee is provided by the combination
-of <tt>rcu_assign_pointer()</tt> and <tt>rcu_dereference()</tt>.
-This guarantee allows data elements to be safely added to RCU-protected
-linked data structures without disrupting RCU readers.
-This guarantee can be used in combination with the grace-period
-guarantee to also allow data elements to be removed from RCU-protected
-linked data structures, again without disrupting RCU readers.
-
-<p>
-This guarantee was only partially premeditated.
-DYNIX/ptx used an explicit memory barrier for publication, but had nothing
-resembling <tt>rcu_dereference()</tt> for subscription, nor did it
-have anything resembling the <tt>smp_read_barrier_depends()</tt>
-that was later subsumed into <tt>rcu_dereference()</tt> and later
-still into <tt>READ_ONCE()</tt>.
-The need for these operations made itself known quite suddenly at a
-late-1990s meeting with the DEC Alpha architects, back in the days when
-DEC was still a free-standing company.
-It took the Alpha architects a good hour to convince me that any sort
-of barrier would ever be needed, and it then took me a good <i>two</i> hours
-to convince them that their documentation did not make this point clear.
-More recent work with the C and C++ standards committees have provided
-much education on tricks and traps from the compiler.
-In short, compilers were much less tricky in the early 1990s, but in
-2015, don't even think about omitting <tt>rcu_dereference()</tt>!
-
-<h3><a name="Memory-Barrier Guarantees">Memory-Barrier Guarantees</a></h3>
-
-<p>
-The previous section's simple linked-data-structure scenario clearly
-demonstrates the need for RCU's stringent memory-ordering guarantees on
-systems with more than one CPU:
-
-<ol>
-<li>	Each CPU that has an RCU read-side critical section that
-	begins before <tt>synchronize_rcu()</tt> starts is
-	guaranteed to execute a full memory barrier between the time
-	that the RCU read-side critical section ends and the time that
-	<tt>synchronize_rcu()</tt> returns.
-	Without this guarantee, a pre-existing RCU read-side critical section
-	might hold a reference to the newly removed <tt>struct foo</tt>
-	after the <tt>kfree()</tt> on line&nbsp;14 of
-	<tt>remove_gp_synchronous()</tt>.
-<li>	Each CPU that has an RCU read-side critical section that ends
-	after <tt>synchronize_rcu()</tt> returns is guaranteed
-	to execute a full memory barrier between the time that
-	<tt>synchronize_rcu()</tt> begins and the time that the RCU
-	read-side critical section begins.
-	Without this guarantee, a later RCU read-side critical section
-	running after the <tt>kfree()</tt> on line&nbsp;14 of
-	<tt>remove_gp_synchronous()</tt> might
-	later run <tt>do_something_gp()</tt> and find the
-	newly deleted <tt>struct foo</tt>.
-<li>	If the task invoking <tt>synchronize_rcu()</tt> remains
-	on a given CPU, then that CPU is guaranteed to execute a full
-	memory barrier sometime during the execution of
-	<tt>synchronize_rcu()</tt>.
-	This guarantee ensures that the <tt>kfree()</tt> on
-	line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
-	execute after the removal on line&nbsp;11.
-<li>	If the task invoking <tt>synchronize_rcu()</tt> migrates
-	among a group of CPUs during that invocation, then each of the
-	CPUs in that group is guaranteed to execute a full memory barrier
-	sometime during the execution of <tt>synchronize_rcu()</tt>.
-	This guarantee also ensures that the <tt>kfree()</tt> on
-	line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
-	execute after the removal on
-	line&nbsp;11, but also in the case where the thread executing the
-	<tt>synchronize_rcu()</tt> migrates in the meantime.
-</ol>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Given that multiple CPUs can start RCU read-side critical sections
-	at any time without any ordering whatsoever, how can RCU possibly
-	tell whether or not a given RCU read-side critical section starts
-	before a given instance of <tt>synchronize_rcu()</tt>?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	If RCU cannot tell whether or not a given
-	RCU read-side critical section starts before a
-	given instance of <tt>synchronize_rcu()</tt>,
-	then it must assume that the RCU read-side critical section
-	started first.
-	In other words, a given instance of <tt>synchronize_rcu()</tt>
-	can avoid waiting on a given RCU read-side critical section only
-	if it can prove that <tt>synchronize_rcu()</tt> started first.
-	</font>
-
-	<p><font color="ffffff">
-	A related question is &ldquo;When <tt>rcu_read_lock()</tt>
-	doesn't generate any code, why does it matter how it relates
-	to a grace period?&rdquo;
-	The answer is that it is not the relationship of
-	<tt>rcu_read_lock()</tt> itself that is important, but rather
-	the relationship of the code within the enclosed RCU read-side
-	critical section to the code preceding and following the
-	grace period.
-	If we take this viewpoint, then a given RCU read-side critical
-	section begins before a given grace period when some access
-	preceding the grace period observes the effect of some access
-	within the critical section, in which case none of the accesses
-	within the critical section may observe the effects of any
-	access following the grace period.
-	</font>
-
-	<p><font color="ffffff">
-	As of late 2016, mathematical models of RCU take this
-	viewpoint, for example, see slides&nbsp;62 and&nbsp;63
-	of the
-	<a href="http://www2.rdrop.com/users/paulmck/scalability/paper/LinuxMM.2016.10.04c.LCE.pdf">2016 LinuxCon EU</a>
-	presentation.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	The first and second guarantees require unbelievably strict ordering!
-	Are all these memory barriers <i> really</i> required?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Yes, they really are required.
-	To see why the first guarantee is required, consider the following
-	sequence of events:
-	</font>
-
-	<ol>
-	<li>	<font color="ffffff">
-		CPU 1: <tt>rcu_read_lock()</tt>
-		</font>
-	<li>	<font color="ffffff">
-		CPU 1: <tt>q = rcu_dereference(gp);
-		/* Very likely to return p. */</tt>
-		</font>
-	<li>	<font color="ffffff">
-		CPU 0: <tt>list_del_rcu(p);</tt>
-		</font>
-	<li>	<font color="ffffff">
-		CPU 0: <tt>synchronize_rcu()</tt> starts.
-		</font>
-	<li>	<font color="ffffff">
-		CPU 1: <tt>do_something_with(q-&gt;a);
-		/* No smp_mb(), so might happen after kfree(). */</tt>
-		</font>
-	<li>	<font color="ffffff">
-		CPU 1: <tt>rcu_read_unlock()</tt>
-		</font>
-	<li>	<font color="ffffff">
-		CPU 0: <tt>synchronize_rcu()</tt> returns.
-		</font>
-	<li>	<font color="ffffff">
-		CPU 0: <tt>kfree(p);</tt>
-		</font>
-	</ol>
-
-	<p><font color="ffffff">
-	Therefore, there absolutely must be a full memory barrier between the
-	end of the RCU read-side critical section and the end of the
-	grace period.
-	</font>
-
-	<p><font color="ffffff">
-	The sequence of events demonstrating the necessity of the second rule
-	is roughly similar:
-	</font>
-
-	<ol>
-	<li>	<font color="ffffff">CPU 0: <tt>list_del_rcu(p);</tt>
-		</font>
-	<li>	<font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> starts.
-		</font>
-	<li>	<font color="ffffff">CPU 1: <tt>rcu_read_lock()</tt>
-		</font>
-	<li>	<font color="ffffff">CPU 1: <tt>q = rcu_dereference(gp);
-		/* Might return p if no memory barrier. */</tt>
-		</font>
-	<li>	<font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> returns.
-		</font>
-	<li>	<font color="ffffff">CPU 0: <tt>kfree(p);</tt>
-		</font>
-	<li>	<font color="ffffff">
-		CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
-		</font>
-	<li>	<font color="ffffff">CPU 1: <tt>rcu_read_unlock()</tt>
-		</font>
-	</ol>
-
-	<p><font color="ffffff">
-	And similarly, without a memory barrier between the beginning of the
-	grace period and the beginning of the RCU read-side critical section,
-	CPU&nbsp;1 might end up accessing the freelist.
-	</font>
-
-	<p><font color="ffffff">
-	The &ldquo;as if&rdquo; rule of course applies, so that any
-	implementation that acts as if the appropriate memory barriers
-	were in place is a correct implementation.
-	That said, it is much easier to fool yourself into believing
-	that you have adhered to the as-if rule than it is to actually
-	adhere to it!
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	You claim that <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
-	generate absolutely no code in some kernel builds.
-	This means that the compiler might arbitrarily rearrange consecutive
-	RCU read-side critical sections.
-	Given such rearrangement, if a given RCU read-side critical section
-	is done, how can you be sure that all prior RCU read-side critical
-	sections are done?
-	Won't the compiler rearrangements make that impossible to determine?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	In cases where <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
-	generate absolutely no code, RCU infers quiescent states only at
-	special locations, for example, within the scheduler.
-	Because calls to <tt>schedule()</tt> had better prevent calling-code
-	accesses to shared variables from being rearranged across the call to
-	<tt>schedule()</tt>, if RCU detects the end of a given RCU read-side
-	critical section, it will necessarily detect the end of all prior
-	RCU read-side critical sections, no matter how aggressively the
-	compiler scrambles the code.
-	</font>
-
-	<p><font color="ffffff">
-	Again, this all assumes that the compiler cannot scramble code across
-	calls to the scheduler, out of interrupt handlers, into the idle loop,
-	into user-mode code, and so on.
-	But if your kernel build allows that sort of scrambling, you have broken
-	far more than just RCU!
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-Note that these memory-barrier requirements do not replace the fundamental
-RCU requirement that a grace period wait for all pre-existing readers.
-On the contrary, the memory barriers called out in this section must operate in
-such a way as to <i>enforce</i> this fundamental requirement.
-Of course, different implementations enforce this requirement in different
-ways, but enforce it they must.
-
-<h3><a name="RCU Primitives Guaranteed to Execute Unconditionally">RCU Primitives Guaranteed to Execute Unconditionally</a></h3>
-
-<p>
-The common-case RCU primitives are unconditional.
-They are invoked, they do their job, and they return, with no possibility
-of error, and no need to retry.
-This is a key RCU design philosophy.
-
-<p>
-However, this philosophy is pragmatic rather than pigheaded.
-If someone comes up with a good justification for a particular conditional
-RCU primitive, it might well be implemented and added.
-After all, this guarantee was reverse-engineered, not premeditated.
-The unconditional nature of the RCU primitives was initially an
-accident of implementation, and later experience with synchronization
-primitives with conditional primitives caused me to elevate this
-accident to a guarantee.
-Therefore, the justification for adding a conditional primitive to
-RCU would need to be based on detailed and compelling use cases.
-
-<h3><a name="Guaranteed Read-to-Write Upgrade">Guaranteed Read-to-Write Upgrade</a></h3>
-
-<p>
-As far as RCU is concerned, it is always possible to carry out an
-update within an RCU read-side critical section.
-For example, that RCU read-side critical section might search for
-a given data element, and then might acquire the update-side
-spinlock in order to update that element, all while remaining
-in that RCU read-side critical section.
-Of course, it is necessary to exit the RCU read-side critical section
-before invoking <tt>synchronize_rcu()</tt>, however, this
-inconvenience can be avoided through use of the
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
-described later in this document.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	But how does the upgrade-to-write operation exclude other readers?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	It doesn't, just like normal RCU updates, which also do not exclude
-	RCU readers.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-This guarantee allows lookup code to be shared between read-side
-and update-side code, and was premeditated, appearing in the earliest
-DYNIX/ptx RCU documentation.
-
-<h2><a name="Fundamental Non-Requirements">Fundamental Non-Requirements</a></h2>
-
-<p>
-RCU provides extremely lightweight readers, and its read-side guarantees,
-though quite useful, are correspondingly lightweight.
-It is therefore all too easy to assume that RCU is guaranteeing more
-than it really is.
-Of course, the list of things that RCU does not guarantee is infinitely
-long, however, the following sections list a few non-guarantees that
-have caused confusion.
-Except where otherwise noted, these non-guarantees were premeditated.
-
-<ol>
-<li>	<a href="#Readers Impose Minimal Ordering">
-	Readers Impose Minimal Ordering</a>
-<li>	<a href="#Readers Do Not Exclude Updaters">
-	Readers Do Not Exclude Updaters</a>
-<li>	<a href="#Updaters Only Wait For Old Readers">
-	Updaters Only Wait For Old Readers</a>
-<li>	<a href="#Grace Periods Don't Partition Read-Side Critical Sections">
-	Grace Periods Don't Partition Read-Side Critical Sections</a>
-<li>	<a href="#Read-Side Critical Sections Don't Partition Grace Periods">
-	Read-Side Critical Sections Don't Partition Grace Periods</a>
-</ol>
-
-<h3><a name="Readers Impose Minimal Ordering">Readers Impose Minimal Ordering</a></h3>
-
-<p>
-Reader-side markers such as <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> provide absolutely no ordering guarantees
-except through their interaction with the grace-period APIs such as
-<tt>synchronize_rcu()</tt>.
-To see this, consider the following pair of threads:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(x, 1);
- 5   rcu_read_unlock();
- 6   rcu_read_lock();
- 7   WRITE_ONCE(y, 1);
- 8   rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13   rcu_read_lock();
-14   r1 = READ_ONCE(y);
-15   rcu_read_unlock();
-16   rcu_read_lock();
-17   r2 = READ_ONCE(x);
-18   rcu_read_unlock();
-19 }
-</pre>
-</blockquote>
-
-<p>
-After <tt>thread0()</tt> and <tt>thread1()</tt> execute
-concurrently, it is quite possible to have
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 0)
-</pre>
-</blockquote>
-
-(that is, <tt>y</tt> appears to have been assigned before <tt>x</tt>),
-which would not be possible if <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> had much in the way of ordering
-properties.
-But they do not, so the CPU is within its rights
-to do significant reordering.
-This is by design:  Any significant ordering constraints would slow down
-these fast-path APIs.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Can't the compiler also reorder this code?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	No, the volatile casts in <tt>READ_ONCE()</tt> and
-	<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
-	this particular case.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
-
-<p>
-Neither <tt>rcu_read_lock()</tt> nor <tt>rcu_read_unlock()</tt>
-exclude updates.
-All they do is to prevent grace periods from ending.
-The following example illustrates this:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   r1 = READ_ONCE(y);
- 5   if (r1) {
- 6     do_something_with_nonzero_x();
- 7     r2 = READ_ONCE(x);
- 8     WARN_ON(!r2); /* BUG!!! */
- 9   }
-10   rcu_read_unlock();
-11 }
-12
-13 void thread1(void)
-14 {
-15   spin_lock(&amp;my_lock);
-16   WRITE_ONCE(x, 1);
-17   WRITE_ONCE(y, 1);
-18   spin_unlock(&amp;my_lock);
-19 }
-</pre>
-</blockquote>
-
-<p>
-If the <tt>thread0()</tt> function's <tt>rcu_read_lock()</tt>
-excluded the <tt>thread1()</tt> function's update,
-the <tt>WARN_ON()</tt> could never fire.
-But the fact is that <tt>rcu_read_lock()</tt> does not exclude
-much of anything aside from subsequent grace periods, of which
-<tt>thread1()</tt> has none, so the
-<tt>WARN_ON()</tt> can and does fire.
-
-<h3><a name="Updaters Only Wait For Old Readers">Updaters Only Wait For Old Readers</a></h3>
-
-<p>
-It might be tempting to assume that after <tt>synchronize_rcu()</tt>
-completes, there are no readers executing.
-This temptation must be avoided because
-new readers can start immediately after <tt>synchronize_rcu()</tt>
-starts, and <tt>synchronize_rcu()</tt> is under no
-obligation to wait for these new readers.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Suppose that synchronize_rcu() did wait until <i>all</i>
-	readers had completed instead of waiting only on
-	pre-existing readers.
-	For how long would the updater be able to rely on there
-	being no readers?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	For no time at all.
-	Even if <tt>synchronize_rcu()</tt> were to wait until
-	all readers had completed, a new reader might start immediately after
-	<tt>synchronize_rcu()</tt> completed.
-	Therefore, the code following
-	<tt>synchronize_rcu()</tt> can <i>never</i> rely on there being
-	no readers.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
-Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
-
-<p>
-It is tempting to assume that if any part of one RCU read-side critical
-section precedes a given grace period, and if any part of another RCU
-read-side critical section follows that same grace period, then all of
-the first RCU read-side critical section must precede all of the second.
-However, this just isn't the case: A single grace period does not
-partition the set of RCU read-side critical sections.
-An example of this situation can be illustrated as follows, where
-<tt>x</tt>, <tt>y</tt>, and <tt>z</tt> are initially all zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(a, 1);
- 5   WRITE_ONCE(b, 1);
- 6   rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11   r1 = READ_ONCE(a);
-12   synchronize_rcu();
-13   WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18   rcu_read_lock();
-19   r2 = READ_ONCE(b);
-20   r3 = READ_ONCE(c);
-21   rcu_read_unlock();
-22 }
-</pre>
-</blockquote>
-
-<p>
-It turns out that the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 0 &amp;&amp; r3 == 1)
-</pre>
-</blockquote>
-
-is entirely possible.
-The following figure show how this can happen, with each circled
-<tt>QS</tt> indicating the point at which RCU recorded a
-<i>quiescent state</i> for each thread, that is, a state in which
-RCU knows that the thread cannot be in the midst of an RCU read-side
-critical section that started before the current grace period:
-
-<p><img src="GPpartitionReaders1.svg" alt="GPpartitionReaders1.svg" width="60%"></p>
-
-<p>
-If it is necessary to partition RCU read-side critical sections in this
-manner, it is necessary to use two grace periods, where the first
-grace period is known to end before the second grace period starts:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(a, 1);
- 5   WRITE_ONCE(b, 1);
- 6   rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11   r1 = READ_ONCE(a);
-12   synchronize_rcu();
-13   WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18   r2 = READ_ONCE(c);
-19   synchronize_rcu();
-20   WRITE_ONCE(d, 1);
-21 }
-22
-23 void thread3(void)
-24 {
-25   rcu_read_lock();
-26   r3 = READ_ONCE(b);
-27   r4 = READ_ONCE(d);
-28   rcu_read_unlock();
-29 }
-</pre>
-</blockquote>
-
-<p>
-Here, if <tt>(r1 == 1)</tt>, then
-<tt>thread0()</tt>'s write to <tt>b</tt> must happen
-before the end of <tt>thread1()</tt>'s grace period.
-If in addition <tt>(r4 == 1)</tt>, then
-<tt>thread3()</tt>'s read from <tt>b</tt> must happen
-after the beginning of <tt>thread2()</tt>'s grace period.
-If it is also the case that <tt>(r2 == 1)</tt>, then the
-end of <tt>thread1()</tt>'s grace period must precede the
-beginning of <tt>thread2()</tt>'s grace period.
-This mean that the two RCU read-side critical sections cannot overlap,
-guaranteeing that <tt>(r3 == 1)</tt>.
-As a result, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 0 &amp;&amp; r4 == 1)
-</pre>
-</blockquote>
-
-cannot happen.
-
-<p>
-This non-requirement was also non-premeditated, but became apparent
-when studying RCU's interaction with memory ordering.
-
-<h3><a name="Read-Side Critical Sections Don't Partition Grace Periods">
-Read-Side Critical Sections Don't Partition Grace Periods</a></h3>
-
-<p>
-It is also tempting to assume that if an RCU read-side critical section
-happens between a pair of grace periods, then those grace periods cannot
-overlap.
-However, this temptation leads nowhere good, as can be illustrated by
-the following, with all variables initially zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(a, 1);
- 5   WRITE_ONCE(b, 1);
- 6   rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11   r1 = READ_ONCE(a);
-12   synchronize_rcu();
-13   WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18   rcu_read_lock();
-19   WRITE_ONCE(d, 1);
-20   r2 = READ_ONCE(c);
-21   rcu_read_unlock();
-22 }
-23
-24 void thread3(void)
-25 {
-26   r3 = READ_ONCE(d);
-27   synchronize_rcu();
-28   WRITE_ONCE(e, 1);
-29 }
-30
-31 void thread4(void)
-32 {
-33   rcu_read_lock();
-34   r4 = READ_ONCE(b);
-35   r5 = READ_ONCE(e);
-36   rcu_read_unlock();
-37 }
-</pre>
-</blockquote>
-
-<p>
-In this case, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 1 &amp;&amp; r4 == 0 &amp&amp; r5 == 1)
-</pre>
-</blockquote>
-
-is entirely possible, as illustrated below:
-
-<p><img src="ReadersPartitionGP1.svg" alt="ReadersPartitionGP1.svg" width="100%"></p>
-
-<p>
-Again, an RCU read-side critical section can overlap almost all of a
-given grace period, just so long as it does not overlap the entire
-grace period.
-As a result, an RCU read-side critical section cannot partition a pair
-of RCU grace periods.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	How long a sequence of grace periods, each separated by an RCU
-	read-side critical section, would be required to partition the RCU
-	read-side critical sections at the beginning and end of the chain?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	In theory, an infinite number.
-	In practice, an unknown number that is sensitive to both implementation
-	details and timing considerations.
-	Therefore, even in practice, RCU users must abide by the
-	theoretical rather than the practical answer.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h2><a name="Parallelism Facts of Life">Parallelism Facts of Life</a></h2>
-
-<p>
-These parallelism facts of life are by no means specific to RCU, but
-the RCU implementation must abide by them.
-They therefore bear repeating:
-
-<ol>
-<li>	Any CPU or task may be delayed at any time,
-	and any attempts to avoid these delays by disabling
-	preemption, interrupts, or whatever are completely futile.
-	This is most obvious in preemptible user-level
-	environments and in virtualized environments (where
-	a given guest OS's VCPUs can be preempted at any time by
-	the underlying hypervisor), but can also happen in bare-metal
-	environments due to ECC errors, NMIs, and other hardware
-	events.
-	Although a delay of more than about 20 seconds can result
-	in splats, the RCU implementation is obligated to use
-	algorithms that can tolerate extremely long delays, but where
-	&ldquo;extremely long&rdquo; is not long enough to allow
-	wrap-around when incrementing a 64-bit counter.
-<li>	Both the compiler and the CPU can reorder memory accesses.
-	Where it matters, RCU must use compiler directives and
-	memory-barrier instructions to preserve ordering.
-<li>	Conflicting writes to memory locations in any given cache line
-	will result in expensive cache misses.
-	Greater numbers of concurrent writes and more-frequent
-	concurrent writes will result in more dramatic slowdowns.
-	RCU is therefore obligated to use algorithms that have
-	sufficient locality to avoid significant performance and
-	scalability problems.
-<li>	As a rough rule of thumb, only one CPU's worth of processing
-	may be carried out under the protection of any given exclusive
-	lock.
-	RCU must therefore use scalable locking designs.
-<li>	Counters are finite, especially on 32-bit systems.
-	RCU's use of counters must therefore tolerate counter wrap,
-	or be designed such that counter wrap would take way more
-	time than a single system is likely to run.
-	An uptime of ten years is quite possible, a runtime
-	of a century much less so.
-	As an example of the latter, RCU's dyntick-idle nesting counter
-	allows 54 bits for interrupt nesting level (this counter
-	is 64 bits even on a 32-bit system).
-	Overflowing this counter requires 2<sup>54</sup>
-	half-interrupts on a given CPU without that CPU ever going idle.
-	If a half-interrupt happened every microsecond, it would take
-	570 years of runtime to overflow this counter, which is currently
-	believed to be an acceptably long time.
-<li>	Linux systems can have thousands of CPUs running a single
-	Linux kernel in a single shared-memory environment.
-	RCU must therefore pay close attention to high-end scalability.
-</ol>
-
-<p>
-This last parallelism fact of life means that RCU must pay special
-attention to the preceding facts of life.
-The idea that Linux might scale to systems with thousands of CPUs would
-have been met with some skepticism in the 1990s, but these requirements
-would have otherwise have been unsurprising, even in the early 1990s.
-
-<h2><a name="Quality-of-Implementation Requirements">Quality-of-Implementation Requirements</a></h2>
-
-<p>
-These sections list quality-of-implementation requirements.
-Although an RCU implementation that ignores these requirements could
-still be used, it would likely be subject to limitations that would
-make it inappropriate for industrial-strength production use.
-Classes of quality-of-implementation requirements are as follows:
-
-<ol>
-<li>	<a href="#Specialization">Specialization</a>
-<li>	<a href="#Performance and Scalability">Performance and Scalability</a>
-<li>	<a href="#Forward Progress">Forward Progress</a>
-<li>	<a href="#Composability">Composability</a>
-<li>	<a href="#Corner Cases">Corner Cases</a>
-</ol>
-
-<p>
-These classes is covered in the following sections.
-
-<h3><a name="Specialization">Specialization</a></h3>
-
-<p>
-RCU is and always has been intended primarily for read-mostly situations,
-which means that RCU's read-side primitives are optimized, often at the
-expense of its update-side primitives.
-Experience thus far is captured by the following list of situations:
-
-<ol>
-<li>	Read-mostly data, where stale and inconsistent data is not
-	a problem:   RCU works great!
-<li>	Read-mostly data, where data must be consistent:
-	RCU works well.
-<li>	Read-write data, where data must be consistent:
-	RCU <i>might</i> work OK.
-	Or not.
-<li>	Write-mostly data, where data must be consistent:
-	RCU is very unlikely to be the right tool for the job,
-	with the following exceptions, where RCU can provide:
-	<ol type=a>
-	<li>	Existence guarantees for update-friendly mechanisms.
-	<li>	Wait-free read-side primitives for real-time use.
-	</ol>
-</ol>
-
-<p>
-This focus on read-mostly situations means that RCU must interoperate
-with other synchronization primitives.
-For example, the <tt>add_gp()</tt> and <tt>remove_gp_synchronous()</tt>
-examples discussed earlier use RCU to protect readers and locking to
-coordinate updaters.
-However, the need extends much farther, requiring that a variety of
-synchronization primitives be legal within RCU read-side critical sections,
-including spinlocks, sequence locks, atomic operations, reference
-counters, and memory barriers.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	What about sleeping locks?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	These are forbidden within Linux-kernel RCU read-side critical
-	sections because it is not legal to place a quiescent state
-	(in this case, voluntary context switch) within an RCU read-side
-	critical section.
-	However, sleeping locks may be used within userspace RCU read-side
-	critical sections, and also within Linux-kernel sleepable RCU
-	<a href="#Sleepable RCU"><font color="ffffff">(SRCU)</font></a>
-	read-side critical sections.
-	In addition, the -rt patchset turns spinlocks into a
-	sleeping locks so that the corresponding critical sections
-	can be preempted, which also means that these sleeplockified
-	spinlocks (but not other sleeping locks!)  may be acquire within
-	-rt-Linux-kernel RCU read-side critical sections.
-	</font>
-
-	<p><font color="ffffff">
-	Note that it <i>is</i> legal for a normal RCU read-side
-	critical section to conditionally acquire a sleeping locks
-	(as in <tt>mutex_trylock()</tt>), but only as long as it does
-	not loop indefinitely attempting to conditionally acquire that
-	sleeping locks.
-	The key point is that things like <tt>mutex_trylock()</tt>
-	either return with the mutex held, or return an error indication if
-	the mutex was not immediately available.
-	Either way, <tt>mutex_trylock()</tt> returns immediately without
-	sleeping.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-It often comes as a surprise that many algorithms do not require a
-consistent view of data, but many can function in that mode,
-with network routing being the poster child.
-Internet routing algorithms take significant time to propagate
-updates, so that by the time an update arrives at a given system,
-that system has been sending network traffic the wrong way for
-a considerable length of time.
-Having a few threads continue to send traffic the wrong way for a
-few more milliseconds is clearly not a problem:  In the worst case,
-TCP retransmissions will eventually get the data where it needs to go.
-In general, when tracking the state of the universe outside of the
-computer, some level of inconsistency must be tolerated due to
-speed-of-light delays if nothing else.
-
-<p>
-Furthermore, uncertainty about external state is inherent in many cases.
-For example, a pair of veterinarians might use heartbeat to determine
-whether or not a given cat was alive.
-But how long should they wait after the last heartbeat to decide that
-the cat is in fact dead?
-Waiting less than 400 milliseconds makes no sense because this would
-mean that a relaxed cat would be considered to cycle between death
-and life more than 100 times per minute.
-Moreover, just as with human beings, a cat's heart might stop for
-some period of time, so the exact wait period is a judgment call.
-One of our pair of veterinarians might wait 30 seconds before pronouncing
-the cat dead, while the other might insist on waiting a full minute.
-The two veterinarians would then disagree on the state of the cat during
-the final 30 seconds of the minute following the last heartbeat.
-
-<p>
-Interestingly enough, this same situation applies to hardware.
-When push comes to shove, how do we tell whether or not some
-external server has failed?
-We send messages to it periodically, and declare it failed if we
-don't receive a response within a given period of time.
-Policy decisions can usually tolerate short
-periods of inconsistency.
-The policy was decided some time ago, and is only now being put into
-effect, so a few milliseconds of delay is normally inconsequential.
-
-<p>
-However, there are algorithms that absolutely must see consistent data.
-For example, the translation between a user-level SystemV semaphore
-ID to the corresponding in-kernel data structure is protected by RCU,
-but it is absolutely forbidden to update a semaphore that has just been
-removed.
-In the Linux kernel, this need for consistency is accommodated by acquiring
-spinlocks located in the in-kernel data structure from within
-the RCU read-side critical section, and this is indicated by the
-green box in the figure above.
-Many other techniques may be used, and are in fact used within the
-Linux kernel.
-
-<p>
-In short, RCU is not required to maintain consistency, and other
-mechanisms may be used in concert with RCU when consistency is required.
-RCU's specialization allows it to do its job extremely well, and its
-ability to interoperate with other synchronization mechanisms allows
-the right mix of synchronization tools to be used for a given job.
-
-<h3><a name="Performance and Scalability">Performance and Scalability</a></h3>
-
-<p>
-Energy efficiency is a critical component of performance today,
-and Linux-kernel RCU implementations must therefore avoid unnecessarily
-awakening idle CPUs.
-I cannot claim that this requirement was premeditated.
-In fact, I learned of it during a telephone conversation in which I
-was given &ldquo;frank and open&rdquo; feedback on the importance
-of energy efficiency in battery-powered systems and on specific
-energy-efficiency shortcomings of the Linux-kernel RCU implementation.
-In my experience, the battery-powered embedded community will consider
-any unnecessary wakeups to be extremely unfriendly acts.
-So much so that mere Linux-kernel-mailing-list posts are
-insufficient to vent their ire.
-
-<p>
-Memory consumption is not particularly important for in most
-situations, and has become decreasingly
-so as memory sizes have expanded and memory
-costs have plummeted.
-However, as I learned from Matt Mackall's
-<a href="http://elinux.org/Linux_Tiny-FAQ">bloatwatch</a>
-efforts, memory footprint is critically important on single-CPU systems with
-non-preemptible (<tt>CONFIG_PREEMPT=n</tt>) kernels, and thus
-<a href="https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com">tiny RCU</a>
-was born.
-Josh Triplett has since taken over the small-memory banner with his
-<a href="https://tiny.wiki.kernel.org/">Linux kernel tinification</a>
-project, which resulted in
-<a href="#Sleepable RCU">SRCU</a>
-becoming optional for those kernels not needing it.
-
-<p>
-The remaining performance requirements are, for the most part,
-unsurprising.
-For example, in keeping with RCU's read-side specialization,
-<tt>rcu_dereference()</tt> should have negligible overhead (for
-example, suppression of a few minor compiler optimizations).
-Similarly, in non-preemptible environments, <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have exactly zero overhead.
-
-<p>
-In preemptible environments, in the case where the RCU read-side
-critical section was not preempted (as will be the case for the
-highest-priority real-time process), <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have minimal overhead.
-In particular, they should not contain atomic read-modify-write
-operations, memory-barrier instructions, preemption disabling,
-interrupt disabling, or backwards branches.
-However, in the case where the RCU read-side critical section was preempted,
-<tt>rcu_read_unlock()</tt> may acquire spinlocks and disable interrupts.
-This is why it is better to nest an RCU read-side critical section
-within a preempt-disable region than vice versa, at least in cases
-where that critical section is short enough to avoid unduly degrading
-real-time latencies.
-
-<p>
-The <tt>synchronize_rcu()</tt> grace-period-wait primitive is
-optimized for throughput.
-It may therefore incur several milliseconds of latency in addition to
-the duration of the longest RCU read-side critical section.
-On the other hand, multiple concurrent invocations of
-<tt>synchronize_rcu()</tt> are required to use batching optimizations
-so that they can be satisfied by a single underlying grace-period-wait
-operation.
-For example, in the Linux kernel, it is not unusual for a single
-grace-period-wait operation to serve more than
-<a href="https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response">1,000 separate invocations</a>
-of <tt>synchronize_rcu()</tt>, thus amortizing the per-invocation
-overhead down to nearly zero.
-However, the grace-period optimization is also required to avoid
-measurable degradation of real-time scheduling and interrupt latencies.
-
-<p>
-In some cases, the multi-millisecond <tt>synchronize_rcu()</tt>
-latencies are unacceptable.
-In these cases, <tt>synchronize_rcu_expedited()</tt> may be used
-instead, reducing the grace-period latency down to a few tens of
-microseconds on small systems, at least in cases where the RCU read-side
-critical sections are short.
-There are currently no special latency requirements for
-<tt>synchronize_rcu_expedited()</tt> on large systems, but,
-consistent with the empirical nature of the RCU specification,
-that is subject to change.
-However, there most definitely are scalability requirements:
-A storm of <tt>synchronize_rcu_expedited()</tt> invocations on 4096
-CPUs should at least make reasonable forward progress.
-In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
-is permitted to impose modest degradation of real-time latency
-on non-idle online CPUs.
-Here, &ldquo;modest&rdquo; means roughly the same latency
-degradation as a scheduling-clock interrupt.
-
-<p>
-There are a number of situations where even
-<tt>synchronize_rcu_expedited()</tt>'s reduced grace-period
-latency is unacceptable.
-In these situations, the asynchronous <tt>call_rcu()</tt> can be
-used in place of <tt>synchronize_rcu()</tt> as follows:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2   int a;
- 3   int b;
- 4   struct rcu_head rh;
- 5 };
- 6
- 7 static void remove_gp_cb(struct rcu_head *rhp)
- 8 {
- 9   struct foo *p = container_of(rhp, struct foo, rh);
-10
-11   kfree(p);
-12 }
-13
-14 bool remove_gp_asynchronous(void)
-15 {
-16   struct foo *p;
-17
-18   spin_lock(&amp;gp_lock);
-19   p = rcu_access_pointer(gp);
-20   if (!p) {
-21     spin_unlock(&amp;gp_lock);
-22     return false;
-23   }
-24   rcu_assign_pointer(gp, NULL);
-25   call_rcu(&amp;p-&gt;rh, remove_gp_cb);
-26   spin_unlock(&amp;gp_lock);
-27   return true;
-28 }
-</pre>
-</blockquote>
-
-<p>
-A definition of <tt>struct foo</tt> is finally needed, and appears
-on lines&nbsp;1-5.
-The function <tt>remove_gp_cb()</tt> is passed to <tt>call_rcu()</tt>
-on line&nbsp;25, and will be invoked after the end of a subsequent
-grace period.
-This gets the same effect as <tt>remove_gp_synchronous()</tt>,
-but without forcing the updater to wait for a grace period to elapse.
-The <tt>call_rcu()</tt> function may be used in a number of
-situations where neither <tt>synchronize_rcu()</tt> nor
-<tt>synchronize_rcu_expedited()</tt> would be legal,
-including within preempt-disable code, <tt>local_bh_disable()</tt> code,
-interrupt-disable code, and interrupt handlers.
-However, even <tt>call_rcu()</tt> is illegal within NMI handlers
-and from idle and offline CPUs.
-The callback function (<tt>remove_gp_cb()</tt> in this case) will be
-executed within softirq (software interrupt) environment within the
-Linux kernel,
-either within a real softirq handler or under the protection
-of <tt>local_bh_disable()</tt>.
-In both the Linux kernel and in userspace, it is bad practice to
-write an RCU callback function that takes too long.
-Long-running operations should be relegated to separate threads or
-(in the Linux kernel) workqueues.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
-	After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
-	structure, which would interact badly with concurrent insertions.
-	Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
-	any changes, including any insertions that <tt>rcu_dereference()</tt>
-	would protect against.
-	Therefore, any insertions will be delayed until after
-	<tt>-&gt;gp_lock</tt>
-	is released on line&nbsp;25, which in turn means that
-	<tt>rcu_access_pointer()</tt> suffices.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-However, all that <tt>remove_gp_cb()</tt> is doing is
-invoking <tt>kfree()</tt> on the data element.
-This is a common idiom, and is supported by <tt>kfree_rcu()</tt>,
-which allows &ldquo;fire and forget&rdquo; operation as shown below:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2   int a;
- 3   int b;
- 4   struct rcu_head rh;
- 5 };
- 6
- 7 bool remove_gp_faf(void)
- 8 {
- 9   struct foo *p;
-10
-11   spin_lock(&amp;gp_lock);
-12   p = rcu_dereference(gp);
-13   if (!p) {
-14     spin_unlock(&amp;gp_lock);
-15     return false;
-16   }
-17   rcu_assign_pointer(gp, NULL);
-18   kfree_rcu(p, rh);
-19   spin_unlock(&amp;gp_lock);
-20   return true;
-21 }
-</pre>
-</blockquote>
-
-<p>
-Note that <tt>remove_gp_faf()</tt> simply invokes
-<tt>kfree_rcu()</tt> and proceeds, without any need to pay any
-further attention to the subsequent grace period and <tt>kfree()</tt>.
-It is permissible to invoke <tt>kfree_rcu()</tt> from the same
-environments as for <tt>call_rcu()</tt>.
-Interestingly enough, DYNIX/ptx had the equivalents of
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>, but not
-<tt>synchronize_rcu()</tt>.
-This was due to the fact that RCU was not heavily used within DYNIX/ptx,
-so the very few places that needed something like
-<tt>synchronize_rcu()</tt> simply open-coded it.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Earlier it was claimed that <tt>call_rcu()</tt> and
-	<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-	by readers.
-	But how can that be correct, given that the invocation of the callback
-	and the freeing of the memory (respectively) must still wait for
-	a grace period to elapse?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	We could define things this way, but keep in mind that this sort of
-	definition would say that updates in garbage-collected languages
-	cannot complete until the next time the garbage collector runs,
-	which does not seem at all reasonable.
-	The key point is that in most cases, an updater using either
-	<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
-	next update as soon as it has invoked <tt>call_rcu()</tt> or
-	<tt>kfree_rcu()</tt>, without having to wait for a subsequent
-	grace period.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-But what if the updater must wait for the completion of code to be
-executed after the end of the grace period, but has other tasks
-that can be carried out in the meantime?
-The polling-style <tt>get_state_synchronize_rcu()</tt> and
-<tt>cond_synchronize_rcu()</tt> functions may be used for this
-purpose, as shown below:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_poll(void)
- 2 {
- 3   struct foo *p;
- 4   unsigned long s;
- 5
- 6   spin_lock(&amp;gp_lock);
- 7   p = rcu_access_pointer(gp);
- 8   if (!p) {
- 9     spin_unlock(&amp;gp_lock);
-10     return false;
-11   }
-12   rcu_assign_pointer(gp, NULL);
-13   spin_unlock(&amp;gp_lock);
-14   s = get_state_synchronize_rcu();
-15   do_something_while_waiting();
-16   cond_synchronize_rcu(s);
-17   kfree(p);
-18   return true;
-19 }
-</pre>
-</blockquote>
-
-<p>
-On line&nbsp;14, <tt>get_state_synchronize_rcu()</tt> obtains a
-&ldquo;cookie&rdquo; from RCU,
-then line&nbsp;15 carries out other tasks,
-and finally, line&nbsp;16 returns immediately if a grace period has
-elapsed in the meantime, but otherwise waits as required.
-The need for <tt>get_state_synchronize_rcu</tt> and
-<tt>cond_synchronize_rcu()</tt> has appeared quite recently,
-so it is too early to tell whether they will stand the test of time.
-
-<p>
-RCU thus provides a range of tools to allow updaters to strike the
-required tradeoff between latency, flexibility and CPU overhead.
-
-<h3><a name="Forward Progress">Forward Progress</a></h3>
-
-<p>
-In theory, delaying grace-period completion and callback invocation
-is harmless.
-In practice, not only are memory sizes finite but also callbacks sometimes
-do wakeups, and sufficiently deferred wakeups can be difficult
-to distinguish from system hangs.
-Therefore, RCU must provide a number of mechanisms to promote forward
-progress.
-
-<p>
-These mechanisms are not foolproof, nor can they be.
-For one simple example, an infinite loop in an RCU read-side critical
-section must by definition prevent later grace periods from ever completing.
-For a more involved example, consider a 64-CPU system built with
-<tt>CONFIG_RCU_NOCB_CPU=y</tt> and booted with <tt>rcu_nocbs=1-63</tt>,
-where CPUs&nbsp;1 through&nbsp;63 spin in tight loops that invoke
-<tt>call_rcu()</tt>.
-Even if these tight loops also contain calls to <tt>cond_resched()</tt>
-(thus allowing grace periods to complete), CPU&nbsp;0 simply will
-not be able to invoke callbacks as fast as the other 63 CPUs can
-register them, at least not until the system runs out of memory.
-In both of these examples, the Spiderman principle applies:  With great
-power comes great responsibility.
-However, short of this level of abuse, RCU is required to
-ensure timely completion of grace periods and timely invocation of
-callbacks.
-
-<p>
-RCU takes the following steps to encourage timely completion of
-grace periods:
-
-<ol>
-<li>	If a grace period fails to complete within 100&nbsp;milliseconds,
-	RCU causes future invocations of <tt>cond_resched()</tt> on
-	the holdout CPUs to provide an RCU quiescent state.
-	RCU also causes those CPUs' <tt>need_resched()</tt> invocations
-	to return <tt>true</tt>, but only after the corresponding CPU's
-	next scheduling-clock.
-<li>	CPUs mentioned in the <tt>nohz_full</tt> kernel boot parameter
-	can run indefinitely in the kernel without scheduling-clock
-	interrupts, which defeats the above <tt>need_resched()</tt>
-	strategem.
-	RCU will therefore invoke <tt>resched_cpu()</tt> on any
-	<tt>nohz_full</tt> CPUs still holding out after
-	109&nbsp;milliseconds.
-<li>	In kernels built with <tt>CONFIG_RCU_BOOST=y</tt>, if a given
-	task that has been preempted within an RCU read-side critical
-	section is holding out for more than 500&nbsp;milliseconds,
-	RCU will resort to priority boosting.
-<li>	If a CPU is still holding out 10&nbsp;seconds into the grace
-	period, RCU will invoke <tt>resched_cpu()</tt> on it regardless
-	of its <tt>nohz_full</tt> state.
-</ol>
-
-<p>
-The above values are defaults for systems running with <tt>HZ=1000</tt>.
-They will vary as the value of <tt>HZ</tt> varies, and can also be
-changed using the relevant Kconfig options and kernel boot parameters.
-RCU currently does not do much sanity checking of these
-parameters, so please use caution when changing them.
-Note that these forward-progress measures are provided only for RCU,
-not for
-<a href="#Sleepable RCU">SRCU</a> or
-<a href="#Tasks RCU">Tasks RCU</a>.
-
-<p>
-RCU takes the following steps in <tt>call_rcu()</tt> to encourage timely
-invocation of callbacks when any given non-<tt>rcu_nocbs</tt> CPU has
-10,000 callbacks, or has 10,000 more callbacks than it had the last time
-encouragement was provided:
-
-<ol>
-<li>	Starts a grace period, if one is not already in progress.
-<li>	Forces immediate checking for quiescent states, rather than
-	waiting for three milliseconds to have elapsed since the
-	beginning of the grace period.
-<li>	Immediately tags the CPU's callbacks with their grace period
-	completion numbers, rather than waiting for the <tt>RCU_SOFTIRQ</tt>
-	handler to get around to it.
-<li>	Lifts callback-execution batch limits, which speeds up callback
-	invocation at the expense of degrading realtime response.
-</ol>
-
-<p>
-Again, these are default values when running at <tt>HZ=1000</tt>,
-and can be overridden.
-Again, these forward-progress measures are provided only for RCU,
-not for
-<a href="#Sleepable RCU">SRCU</a> or
-<a href="#Tasks RCU">Tasks RCU</a>.
-Even for RCU, callback-invocation forward progress for <tt>rcu_nocbs</tt>
-CPUs is much less well-developed, in part because workloads benefiting
-from <tt>rcu_nocbs</tt> CPUs tend to invoke <tt>call_rcu()</tt>
-relatively infrequently.
-If workloads emerge that need both <tt>rcu_nocbs</tt> CPUs and high
-<tt>call_rcu()</tt> invocation rates, then additional forward-progress
-work will be required.
-
-<h3><a name="Composability">Composability</a></h3>
-
-<p>
-Composability has received much attention in recent years, perhaps in part
-due to the collision of multicore hardware with object-oriented techniques
-designed in single-threaded environments for single-threaded use.
-And in theory, RCU read-side critical sections may be composed, and in
-fact may be nested arbitrarily deeply.
-In practice, as with all real-world implementations of composable
-constructs, there are limitations.
-
-<p>
-Implementations of RCU for which <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> generate no code, such as
-Linux-kernel RCU when <tt>CONFIG_PREEMPT=n</tt>, can be
-nested arbitrarily deeply.
-After all, there is no overhead.
-Except that if all these instances of <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> are visible to the compiler,
-compilation will eventually fail due to exhausting memory,
-mass storage, or user patience, whichever comes first.
-If the nesting is not visible to the compiler, as is the case with
-mutually recursive functions each in its own translation unit,
-stack overflow will result.
-If the nesting takes the form of loops, perhaps in the guise of tail
-recursion, either the control variable
-will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
-Nevertheless, this class of RCU implementations is one
-of the most composable constructs in existence.
-
-<p>
-RCU implementations that explicitly track nesting depth
-are limited by the nesting-depth counter.
-For example, the Linux kernel's preemptible RCU limits nesting to
-<tt>INT_MAX</tt>.
-This should suffice for almost all practical purposes.
-That said, a consecutive pair of RCU read-side critical sections
-between which there is an operation that waits for a grace period
-cannot be enclosed in another RCU read-side critical section.
-This is because it is not legal to wait for a grace period within
-an RCU read-side critical section:  To do so would result either
-in deadlock or
-in RCU implicitly splitting the enclosing RCU read-side critical
-section, neither of which is conducive to a long-lived and prosperous
-kernel.
-
-<p>
-It is worth noting that RCU is not alone in limiting composability.
-For example, many transactional-memory implementations prohibit
-composing a pair of transactions separated by an irrevocable
-operation (for example, a network receive operation).
-For another example, lock-based critical sections can be composed
-surprisingly freely, but only if deadlock is avoided.
-
-<p>
-In short, although RCU read-side critical sections are highly composable,
-care is required in some situations, just as is the case for any other
-composable synchronization mechanism.
-
-<h3><a name="Corner Cases">Corner Cases</a></h3>
-
-<p>
-A given RCU workload might have an endless and intense stream of
-RCU read-side critical sections, perhaps even so intense that there
-was never a point in time during which there was not at least one
-RCU read-side critical section in flight.
-RCU cannot allow this situation to block grace periods:  As long as
-all the RCU read-side critical sections are finite, grace periods
-must also be finite.
-
-<p>
-That said, preemptible RCU implementations could potentially result
-in RCU read-side critical sections being preempted for long durations,
-which has the effect of creating a long-duration RCU read-side
-critical section.
-This situation can arise only in heavily loaded systems, but systems using
-real-time priorities are of course more vulnerable.
-Therefore, RCU priority boosting is provided to help deal with this
-case.
-That said, the exact requirements on RCU priority boosting will likely
-evolve as more experience accumulates.
-
-<p>
-Other workloads might have very high update rates.
-Although one can argue that such workloads should instead use
-something other than RCU, the fact remains that RCU must
-handle such workloads gracefully.
-This requirement is another factor driving batching of grace periods,
-but it is also the driving force behind the checks for large numbers
-of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
-Finally, high update rates should not delay RCU read-side critical
-sections, although some small read-side delays can occur when using
-<tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
-of <tt>smp_call_function_single()</tt>.
-
-<p>
-Although all three of these corner cases were understood in the early
-1990s, a simple user-level test consisting of <tt>close(open(path))</tt>
-in a tight loop
-in the early 2000s suddenly provided a much deeper appreciation of the
-high-update-rate corner case.
-This test also motivated addition of some RCU code to react to high update
-rates, for example, if a given CPU finds itself with more than 10,000
-RCU callbacks queued, it will cause RCU to take evasive action by
-more aggressively starting grace periods and more aggressively forcing
-completion of grace-period processing.
-This evasive action causes the grace period to complete more quickly,
-but at the cost of restricting RCU's batching optimizations, thus
-increasing the CPU overhead incurred by that grace period.
-
-<h2><a name="Software-Engineering Requirements">
-Software-Engineering Requirements</a></h2>
-
-<p>
-Between Murphy's Law and &ldquo;To err is human&rdquo;, it is necessary to
-guard against mishaps and misuse:
-
-<ol>
-<li>	It is all too easy to forget to use <tt>rcu_read_lock()</tt>
-	everywhere that it is needed, so kernels built with
-	<tt>CONFIG_PROVE_RCU=y</tt> will splat if
-	<tt>rcu_dereference()</tt> is used outside of an
-	RCU read-side critical section.
-	Update-side code can use <tt>rcu_dereference_protected()</tt>,
-	which takes a
-	<a href="https://lwn.net/Articles/371986/">lockdep expression</a>
-	to indicate what is providing the protection.
-	If the indicated protection is not provided, a lockdep splat
-	is emitted.
-
-	<p>
-	Code shared between readers and updaters can use
-	<tt>rcu_dereference_check()</tt>, which also takes a
-	lockdep expression, and emits a lockdep splat if neither
-	<tt>rcu_read_lock()</tt> nor the indicated protection
-	is in place.
-	In addition, <tt>rcu_dereference_raw()</tt> is used in those
-	(hopefully rare) cases where the required protection cannot
-	be easily described.
-	Finally, <tt>rcu_read_lock_held()</tt> is provided to
-	allow a function to verify that it has been invoked within
-	an RCU read-side critical section.
-	I was made aware of this set of requirements shortly after Thomas
-	Gleixner audited a number of RCU uses.
-<li>	A given function might wish to check for RCU-related preconditions
-	upon entry, before using any other RCU API.
-	The <tt>rcu_lockdep_assert()</tt> does this job,
-	asserting the expression in kernels having lockdep enabled
-	and doing nothing otherwise.
-<li>	It is also easy to forget to use <tt>rcu_assign_pointer()</tt>
-	and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
-	substituting a simple assignment.
-	To catch this sort of error, a given RCU-protected pointer may be
-	tagged with <tt>__rcu</tt>, after which sparse
-	will complain about simple-assignment accesses to that pointer.
-	Arnd Bergmann made me aware of this requirement, and also
-	supplied the needed
-	<a href="https://lwn.net/Articles/376011/">patch series</a>.
-<li>	Kernels built with <tt>CONFIG_DEBUG_OBJECTS_RCU_HEAD=y</tt>
-	will splat if a data element is passed to <tt>call_rcu()</tt>
-	twice in a row, without a grace period in between.
-	(This error is similar to a double free.)
-	The corresponding <tt>rcu_head</tt> structures that are
-	dynamically allocated are automatically tracked, but
-	<tt>rcu_head</tt> structures allocated on the stack
-	must be initialized with <tt>init_rcu_head_on_stack()</tt>
-	and cleaned up with <tt>destroy_rcu_head_on_stack()</tt>.
-	Similarly, statically allocated non-stack <tt>rcu_head</tt>
-	structures must be initialized with <tt>init_rcu_head()</tt>
-	and cleaned up with <tt>destroy_rcu_head()</tt>.
-	Mathieu Desnoyers made me aware of this requirement, and also
-	supplied the needed
-	<a href="https://lkml.kernel.org/g/20100319013024.GA28456@Krystal">patch</a>.
-<li>	An infinite loop in an RCU read-side critical section will
-	eventually trigger an RCU CPU stall warning splat, with
-	the duration of &ldquo;eventually&rdquo; being controlled by the
-	<tt>RCU_CPU_STALL_TIMEOUT</tt> <tt>Kconfig</tt> option, or,
-	alternatively, by the
-	<tt>rcupdate.rcu_cpu_stall_timeout</tt> boot/sysfs
-	parameter.
-	However, RCU is not obligated to produce this splat
-	unless there is a grace period waiting on that particular
-	RCU read-side critical section.
-	<p>
-	Some extreme workloads might intentionally delay
-	RCU grace periods, and systems running those workloads can
-	be booted with <tt>rcupdate.rcu_cpu_stall_suppress</tt>
-	to suppress the splats.
-	This kernel parameter may also be set via <tt>sysfs</tt>.
-	Furthermore, RCU CPU stall warnings are counter-productive
-	during sysrq dumps and during panics.
-	RCU therefore supplies the <tt>rcu_sysrq_start()</tt> and
-	<tt>rcu_sysrq_end()</tt> API members to be called before
-	and after long sysrq dumps.
-	RCU also supplies the <tt>rcu_panic()</tt> notifier that is
-	automatically invoked at the beginning of a panic to suppress
-	further RCU CPU stall warnings.
-
-	<p>
-	This requirement made itself known in the early 1990s, pretty
-	much the first time that it was necessary to debug a CPU stall.
-	That said, the initial implementation in DYNIX/ptx was quite
-	generic in comparison with that of Linux.
-<li>	Although it would be very good to detect pointers leaking out
-	of RCU read-side critical sections, there is currently no
-	good way of doing this.
-	One complication is the need to distinguish between pointers
-	leaking and pointers that have been handed off from RCU to
-	some other synchronization mechanism, for example, reference
-	counting.
-<li>	In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
-	information is provided via event tracing.
-<li>	Open-coded use of <tt>rcu_assign_pointer()</tt> and
-	<tt>rcu_dereference()</tt> to create typical linked
-	data structures can be surprisingly error-prone.
-	Therefore, RCU-protected
-	<a href="https://lwn.net/Articles/609973/#RCU List APIs">linked lists</a>
-	and, more recently, RCU-protected
-	<a href="https://lwn.net/Articles/612100/">hash tables</a>
-	are available.
-	Many other special-purpose RCU-protected data structures are
-	available in the Linux kernel and the userspace RCU library.
-<li>	Some linked structures are created at compile time, but still
-	require <tt>__rcu</tt> checking.
-	The <tt>RCU_POINTER_INITIALIZER()</tt> macro serves this
-	purpose.
-<li>	It is not necessary to use <tt>rcu_assign_pointer()</tt>
-	when creating linked structures that are to be published via
-	a single external pointer.
-	The <tt>RCU_INIT_POINTER()</tt> macro is provided for
-	this task and also for assigning <tt>NULL</tt> pointers
-	at runtime.
-</ol>
-
-<p>
-This not a hard-and-fast list:  RCU's diagnostic capabilities will
-continue to be guided by the number and type of usage bugs found
-in real-world RCU usage.
-
-<h2><a name="Linux Kernel Complications">Linux Kernel Complications</a></h2>
-
-<p>
-The Linux kernel provides an interesting environment for all kinds of
-software, including RCU.
-Some of the relevant points of interest are as follows:
-
-<ol>
-<li>	<a href="#Configuration">Configuration</a>.
-<li>	<a href="#Firmware Interface">Firmware Interface</a>.
-<li>	<a href="#Early Boot">Early Boot</a>.
-<li>	<a href="#Interrupts and NMIs">
-	Interrupts and non-maskable interrupts (NMIs)</a>.
-<li>	<a href="#Loadable Modules">Loadable Modules</a>.
-<li>	<a href="#Hotplug CPU">Hotplug CPU</a>.
-<li>	<a href="#Scheduler and RCU">Scheduler and RCU</a>.
-<li>	<a href="#Tracing and RCU">Tracing and RCU</a>.
-<li>	<a href="#Accesses to User Memory and RCU">
-Accesses to User Memory and RCU</a>.
-<li>	<a href="#Energy Efficiency">Energy Efficiency</a>.
-<li>	<a href="#Scheduling-Clock Interrupts and RCU">
-	Scheduling-Clock Interrupts and RCU</a>.
-<li>	<a href="#Memory Efficiency">Memory Efficiency</a>.
-<li>	<a href="#Performance, Scalability, Response Time, and Reliability">
-	Performance, Scalability, Response Time, and Reliability</a>.
-</ol>
-
-<p>
-This list is probably incomplete, but it does give a feel for the
-most notable Linux-kernel complications.
-Each of the following sections covers one of the above topics.
-
-<h3><a name="Configuration">Configuration</a></h3>
-
-<p>
-RCU's goal is automatic configuration, so that almost nobody
-needs to worry about RCU's <tt>Kconfig</tt> options.
-And for almost all users, RCU does in fact work well
-&ldquo;out of the box.&rdquo;
-
-<p>
-However, there are specialized use cases that are handled by
-kernel boot parameters and <tt>Kconfig</tt> options.
-Unfortunately, the <tt>Kconfig</tt> system will explicitly ask users
-about new <tt>Kconfig</tt> options, which requires almost all of them
-be hidden behind a <tt>CONFIG_RCU_EXPERT</tt> <tt>Kconfig</tt> option.
-
-<p>
-This all should be quite obvious, but the fact remains that
-Linus Torvalds recently had to
-<a href="https://lkml.kernel.org/g/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com">remind</a>
-me of this requirement.
-
-<h3><a name="Firmware Interface">Firmware Interface</a></h3>
-
-<p>
-In many cases, kernel obtains information about the system from the
-firmware, and sometimes things are lost in translation.
-Or the translation is accurate, but the original message is bogus.
-
-<p>
-For example, some systems' firmware overreports the number of CPUs,
-sometimes by a large factor.
-If RCU naively believed the firmware, as it used to do,
-it would create too many per-CPU kthreads.
-Although the resulting system will still run correctly, the extra
-kthreads needlessly consume memory and can cause confusion
-when they show up in <tt>ps</tt> listings.
-
-<p>
-RCU must therefore wait for a given CPU to actually come online before
-it can allow itself to believe that the CPU actually exists.
-The resulting &ldquo;ghost CPUs&rdquo; (which are never going to
-come online) cause a number of
-<a href="https://paulmck.livejournal.com/37494.html">interesting complications</a>.
-
-<h3><a name="Early Boot">Early Boot</a></h3>
-
-<p>
-The Linux kernel's boot sequence is an interesting process,
-and RCU is used early, even before <tt>rcu_init()</tt>
-is invoked.
-In fact, a number of RCU's primitives can be used as soon as the
-initial task's <tt>task_struct</tt> is available and the
-boot CPU's per-CPU variables are set up.
-The read-side primitives (<tt>rcu_read_lock()</tt>,
-<tt>rcu_read_unlock()</tt>, <tt>rcu_dereference()</tt>,
-and <tt>rcu_access_pointer()</tt>) will operate normally very early on,
-as will <tt>rcu_assign_pointer()</tt>.
-
-<p>
-Although <tt>call_rcu()</tt> may be invoked at any
-time during boot, callbacks are not guaranteed to be invoked until after
-all of RCU's kthreads have been spawned, which occurs at
-<tt>early_initcall()</tt> time.
-This delay in callback invocation is due to the fact that RCU does not
-invoke callbacks until it is fully initialized, and this full initialization
-cannot occur until after the scheduler has initialized itself to the
-point where RCU can spawn and run its kthreads.
-In theory, it would be possible to invoke callbacks earlier,
-however, this is not a panacea because there would be severe restrictions
-on what operations those callbacks could invoke.
-
-<p>
-Perhaps surprisingly, <tt>synchronize_rcu()</tt> and
-<tt>synchronize_rcu_expedited()</tt>,
-will operate normally
-during very early boot, the reason being that there is only one CPU
-and preemption is disabled.
-This means that the call <tt>synchronize_rcu()</tt> (or friends)
-itself is a quiescent
-state and thus a grace period, so the early-boot implementation can
-be a no-op.
-
-<p>
-However, once the scheduler has spawned its first kthread, this early
-boot trick fails for <tt>synchronize_rcu()</tt> (as well as for
-<tt>synchronize_rcu_expedited()</tt>) in <tt>CONFIG_PREEMPT=y</tt>
-kernels.
-The reason is that an RCU read-side critical section might be preempted,
-which means that a subsequent <tt>synchronize_rcu()</tt> really does have
-to wait for something, as opposed to simply returning immediately.
-Unfortunately, <tt>synchronize_rcu()</tt> can't do this until all of
-its kthreads are spawned, which doesn't happen until some time during
-<tt>early_initcalls()</tt> time.
-But this is no excuse:  RCU is nevertheless required to correctly handle
-synchronous grace periods during this time period.
-Once all of its kthreads are up and running, RCU starts running
-normally.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	How can RCU possibly handle grace periods before all of its
-	kthreads have been spawned???
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Very carefully!
-	</font>
-
-	<p><font color="ffffff">
-	During the &ldquo;dead zone&rdquo; between the time that the
-	scheduler spawns the first task and the time that all of RCU's
-	kthreads have been spawned, all synchronous grace periods are
-	handled by the expedited grace-period mechanism.
-	At runtime, this expedited mechanism relies on workqueues, but
-	during the dead zone the requesting task itself drives the
-	desired expedited grace period.
-	Because dead-zone execution takes place within task context,
-	everything works.
-	Once the dead zone ends, expedited grace periods go back to
-	using workqueues, as is required to avoid problems that would
-	otherwise occur when a user task received a POSIX signal while
-	driving an expedited grace period.
-	</font>
-
-	<p><font color="ffffff">
-	And yes, this does mean that it is unhelpful to send POSIX
-	signals to random tasks between the time that the scheduler
-	spawns its first kthread and the time that RCU's kthreads
-	have all been spawned.
-	If there ever turns out to be a good reason for sending POSIX
-	signals during that time, appropriate adjustments will be made.
-	(If it turns out that POSIX signals are sent during this time for
-	no good reason, other adjustments will be made, appropriate
-	or otherwise.)
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-I learned of these boot-time requirements as a result of a series of
-system hangs.
-
-<h3><a name="Interrupts and NMIs">Interrupts and NMIs</a></h3>
-
-<p>
-The Linux kernel has interrupts, and RCU read-side critical sections are
-legal within interrupt handlers and within interrupt-disabled regions
-of code, as are invocations of <tt>call_rcu()</tt>.
-
-<p>
-Some Linux-kernel architectures can enter an interrupt handler from
-non-idle process context, and then just never leave it, instead stealthily
-transitioning back to process context.
-This trick is sometimes used to invoke system calls from inside the kernel.
-These &ldquo;half-interrupts&rdquo; mean that RCU has to be very careful
-about how it counts interrupt nesting levels.
-I learned of this requirement the hard way during a rewrite
-of RCU's dyntick-idle code.
-
-<p>
-The Linux kernel has non-maskable interrupts (NMIs), and
-RCU read-side critical sections are legal within NMI handlers.
-Thankfully, RCU update-side primitives, including
-<tt>call_rcu()</tt>, are prohibited within NMI handlers.
-
-<p>
-The name notwithstanding, some Linux-kernel architectures
-can have nested NMIs, which RCU must handle correctly.
-Andy Lutomirski
-<a href="https://lkml.kernel.org/r/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
-with this requirement;
-he also kindly surprised me with
-<a href="https://lkml.kernel.org/r/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
-that meets this requirement.
-
-<p>
-Furthermore, NMI handlers can be interrupted by what appear to RCU
-to be normal interrupts.
-One way that this can happen is for code that directly invokes
-<tt>rcu_irq_enter()</tt> and <tt>rcu_irq_exit()</tt> to be called
-from an NMI handler.
-This astonishing fact of life prompted the current code structure,
-which has <tt>rcu_irq_enter()</tt> invoking <tt>rcu_nmi_enter()</tt>
-and <tt>rcu_irq_exit()</tt> invoking <tt>rcu_nmi_exit()</tt>.
-And yes, I also learned of this requirement the hard way.
-
-<h3><a name="Loadable Modules">Loadable Modules</a></h3>
-
-<p>
-The Linux kernel has loadable modules, and these modules can
-also be unloaded.
-After a given module has been unloaded, any attempt to call
-one of its functions results in a segmentation fault.
-The module-unload functions must therefore cancel any
-delayed calls to loadable-module functions, for example,
-any outstanding <tt>mod_timer()</tt> must be dealt with
-via <tt>del_timer_sync()</tt> or similar.
-
-<p>
-Unfortunately, there is no way to cancel an RCU callback;
-once you invoke <tt>call_rcu()</tt>, the callback function is
-eventually going to be invoked, unless the system goes down first.
-Because it is normally considered socially irresponsible to crash the system
-in response to a module unload request, we need some other way
-to deal with in-flight RCU callbacks.
-
-<p>
-RCU therefore provides
-<tt><a href="https://lwn.net/Articles/217484/">rcu_barrier()</a></tt>,
-which waits until all in-flight RCU callbacks have been invoked.
-If a module uses <tt>call_rcu()</tt>, its exit function should therefore
-prevent any future invocation of <tt>call_rcu()</tt>, then invoke
-<tt>rcu_barrier()</tt>.
-In theory, the underlying module-unload code could invoke
-<tt>rcu_barrier()</tt> unconditionally, but in practice this would
-incur unacceptable latencies.
-
-<p>
-Nikita Danilov noted this requirement for an analogous filesystem-unmount
-situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
-The need for <tt>rcu_barrier()</tt> for module unloading became
-apparent later.
-
-<p>
-<b>Important note</b>: The <tt>rcu_barrier()</tt> function is not,
-repeat, <i>not</i>, obligated to wait for a grace period.
-It is instead only required to wait for RCU callbacks that have
-already been posted.
-Therefore, if there are no RCU callbacks posted anywhere in the system,
-<tt>rcu_barrier()</tt> is within its rights to return immediately.
-Even if there are callbacks posted, <tt>rcu_barrier()</tt> does not
-necessarily need to wait for a grace period.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Wait a minute!
-	Each RCU callbacks must wait for a grace period to complete,
-	and <tt>rcu_barrier()</tt> must wait for each pre-existing
-	callback to be invoked.
-	Doesn't <tt>rcu_barrier()</tt> therefore need to wait for
-	a full grace period if there is even one callback posted anywhere
-	in the system?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Absolutely not!!!
-	</font>
-
-	<p><font color="ffffff">
-	Yes, each RCU callbacks must wait for a grace period to complete,
-	but it might well be partly (or even completely) finished waiting
-	by the time <tt>rcu_barrier()</tt> is invoked.
-	In that case, <tt>rcu_barrier()</tt> need only wait for the
-	remaining portion of the grace period to elapse.
-	So even if there are quite a few callbacks posted,
-	<tt>rcu_barrier()</tt> might well return quite quickly.
-	</font>
-
-	<p><font color="ffffff">
-	So if you need to wait for a grace period as well as for all
-	pre-existing callbacks, you will need to invoke both
-	<tt>synchronize_rcu()</tt> and <tt>rcu_barrier()</tt>.
-	If latency is a concern, you can always use workqueues
-	to invoke them concurrently.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
-
-<p>
-The Linux kernel supports CPU hotplug, which means that CPUs
-can come and go.
-It is of course illegal to use any RCU API member from an offline CPU,
-with the exception of <a href="#Sleepable RCU">SRCU</a> read-side
-critical sections.
-This requirement was present from day one in DYNIX/ptx, but
-on the other hand, the Linux kernel's CPU-hotplug implementation
-is &ldquo;interesting.&rdquo;
-
-<p>
-The Linux-kernel CPU-hotplug implementation has notifiers that
-are used to allow the various kernel subsystems (including RCU)
-to respond appropriately to a given CPU-hotplug operation.
-Most RCU operations may be invoked from CPU-hotplug notifiers,
-including even synchronous grace-period operations such as
-<tt>synchronize_rcu()</tt> and <tt>synchronize_rcu_expedited()</tt>.
-
-<p>
-However, all-callback-wait operations such as
-<tt>rcu_barrier()</tt> are also not supported, due to the
-fact that there are phases of CPU-hotplug operations where
-the outgoing CPU's callbacks will not be invoked until after
-the CPU-hotplug operation ends, which could also result in deadlock.
-Furthermore, <tt>rcu_barrier()</tt> blocks CPU-hotplug operations
-during its execution, which results in another type of deadlock
-when invoked from a CPU-hotplug notifier.
-
-<h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
-
-<p>
-RCU depends on the scheduler, and the scheduler uses RCU to
-protect some of its data structures.
-The preemptible-RCU <tt>rcu_read_unlock()</tt>
-implementation must therefore be written carefully to avoid deadlocks
-involving the scheduler's runqueue and priority-inheritance locks.
-In particular, <tt>rcu_read_unlock()</tt> must tolerate an
-interrupt where the interrupt handler invokes both
-<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-This possibility requires <tt>rcu_read_unlock()</tt> to use
-negative nesting levels to avoid destructive recursion via
-interrupt handler's use of RCU.
-
-<p>
-This scheduler-RCU requirement came as a
-<a href="https://lwn.net/Articles/453002/">complete surprise</a>.
-
-<p>
-As noted above, RCU makes use of kthreads, and it is necessary to
-avoid excessive CPU-time accumulation by these kthreads.
-This requirement was no surprise, but RCU's violation of it
-when running context-switch-heavy workloads when built with
-<tt>CONFIG_NO_HZ_FULL=y</tt>
-<a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
-RCU has made good progress towards meeting this requirement, even
-for context-switch-heavy <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
-but there is room for further improvement.
-
-<p>
-It is forbidden to hold any of scheduler's runqueue or priority-inheritance
-spinlocks across an <tt>rcu_read_unlock()</tt> unless interrupts have been
-disabled across the entire RCU read-side critical section, that is,
-up to and including the matching <tt>rcu_read_lock()</tt>.
-Violating this restriction can result in deadlocks involving these
-scheduler spinlocks.
-There was hope that this restriction might be lifted when interrupt-disabled
-calls to <tt>rcu_read_unlock()</tt> started deferring the reporting of
-the resulting RCU-preempt quiescent state until the end of the corresponding
-interrupts-disabled region.
-Unfortunately, timely reporting of the corresponding quiescent state
-to expedited grace periods requires a call to <tt>raise_softirq()</tt>,
-which can acquire these scheduler spinlocks.
-In addition, real-time systems using RCU priority boosting
-need this restriction to remain in effect because deferred
-quiescent-state reporting would also defer deboosting, which in turn
-would degrade real-time latencies.
-
-<p>
-In theory, if a given RCU read-side critical section could be
-guaranteed to be less than one second in duration, holding a scheduler
-spinlock across that critical section's <tt>rcu_read_unlock()</tt>
-would require only that preemption be disabled across the entire
-RCU read-side critical section, not interrupts.
-Unfortunately, given the possibility of vCPU preemption, long-running
-interrupts, and so on, it is not possible in practice to guarantee
-that a given RCU read-side critical section will complete in less than
-one second.
-Therefore, as noted above, if scheduler spinlocks are held across
-a given call to <tt>rcu_read_unlock()</tt>, interrupts must be
-disabled across the entire RCU read-side critical section.
-
-<h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
-
-<p>
-It is possible to use tracing on RCU code, but tracing itself
-uses RCU.
-For this reason, <tt>rcu_dereference_raw_check()</tt>
-is provided for use by tracing, which avoids the destructive
-recursion that could otherwise ensue.
-This API is also used by virtualization in some architectures,
-where RCU readers execute in environments in which tracing
-cannot be used.
-The tracing folks both located the requirement and provided the
-needed fix, so this surprise requirement was relatively painless.
-
-<h3><a name="Accesses to User Memory and RCU">
-Accesses to User Memory and RCU</a></h3>
-
-<p>
-The kernel needs to access user-space memory, for example, to access
-data referenced by system-call parameters.
-The <tt>get_user()</tt> macro does this job.
-
-<p>
-However, user-space memory might well be paged out, which means
-that <tt>get_user()</tt> might well page-fault and thus block while
-waiting for the resulting I/O to complete.
-It would be a very bad thing for the compiler to reorder
-a <tt>get_user()</tt> invocation into an RCU read-side critical
-section.
-For example, suppose that the source code looked like this:
-
-<blockquote>
-<pre>
- 1 rcu_read_lock();
- 2 p = rcu_dereference(gp);
- 3 v = p-&gt;value;
- 4 rcu_read_unlock();
- 5 get_user(user_v, user_p);
- 6 do_something_with(v, user_v);
-</pre>
-</blockquote>
-
-<p>
-The compiler must not be permitted to transform this source code into
-the following:
-
-<blockquote>
-<pre>
- 1 rcu_read_lock();
- 2 p = rcu_dereference(gp);
- 3 get_user(user_v, user_p); // BUG: POSSIBLE PAGE FAULT!!!
- 4 v = p-&gt;value;
- 5 rcu_read_unlock();
- 6 do_something_with(v, user_v);
-</pre>
-</blockquote>
-
-<p>
-If the compiler did make this transformation in a
-<tt>CONFIG_PREEMPT=n</tt> kernel build, and if <tt>get_user()</tt> did
-page fault, the result would be a quiescent state in the middle
-of an RCU read-side critical section.
-This misplaced quiescent state could result in line&nbsp;4 being
-a use-after-free access, which could be bad for your kernel's
-actuarial statistics.
-Similar examples can be constructed with the call to <tt>get_user()</tt>
-preceding the <tt>rcu_read_lock()</tt>.
-
-<p>
-Unfortunately, <tt>get_user()</tt> doesn't have any particular
-ordering properties, and in some architectures the underlying <tt>asm</tt>
-isn't even marked <tt>volatile</tt>.
-And even if it was marked <tt>volatile</tt>, the above access to
-<tt>p-&gt;value</tt> is not volatile, so the compiler would not have any
-reason to keep those two accesses in order.
-
-<p>
-Therefore, the Linux-kernel definitions of <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> must act as compiler barriers,
-at least for outermost instances of <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> within a nested set of RCU read-side critical
-sections.
-
-<h3><a name="Energy Efficiency">Energy Efficiency</a></h3>
-
-<p>
-Interrupting idle CPUs is considered socially unacceptable,
-especially by people with battery-powered embedded systems.
-RCU therefore conserves energy by detecting which CPUs are
-idle, including tracking CPUs that have been interrupted from idle.
-This is a large part of the energy-efficiency requirement,
-so I learned of this via an irate phone call.
-
-<p>
-Because RCU avoids interrupting idle CPUs, it is illegal to
-execute an RCU read-side critical section on an idle CPU.
-(Kernels built with <tt>CONFIG_PROVE_RCU=y</tt> will splat
-if you try it.)
-The <tt>RCU_NONIDLE()</tt> macro and <tt>_rcuidle</tt>
-event tracing is provided to work around this restriction.
-In addition, <tt>rcu_is_watching()</tt> may be used to
-test whether or not it is currently legal to run RCU read-side
-critical sections on this CPU.
-I learned of the need for diagnostics on the one hand
-and <tt>RCU_NONIDLE()</tt> on the other while inspecting
-idle-loop code.
-Steven Rostedt supplied <tt>_rcuidle</tt> event tracing,
-which is used quite heavily in the idle loop.
-However, there are some restrictions on the code placed within
-<tt>RCU_NONIDLE()</tt>:
-
-<ol>
-<li>	Blocking is prohibited.
-	In practice, this is not a serious restriction given that idle
-	tasks are prohibited from blocking to begin with.
-<li>	Although nesting <tt>RCU_NONIDLE()</tt> is permitted, they cannot
-	nest indefinitely deeply.
-	However, given that they can be nested on the order of a million
-	deep, even on 32-bit systems, this should not be a serious
-	restriction.
-	This nesting limit would probably be reached long after the
-	compiler OOMed or the stack overflowed.
-<li>	Any code path that enters <tt>RCU_NONIDLE()</tt> must sequence
-	out of that same <tt>RCU_NONIDLE()</tt>.
-	For example, the following is grossly illegal:
-
-	<blockquote>
-	<pre>
- 1     RCU_NONIDLE({
- 2       do_something();
- 3       goto bad_idea;  /* BUG!!! */
- 4       do_something_else();});
- 5   bad_idea:
-	</pre>
-	</blockquote>
-
-	<p>
-	It is just as illegal to transfer control into the middle of
-	<tt>RCU_NONIDLE()</tt>'s argument.
-	Yes, in theory, you could transfer in as long as you also
-	transferred out, but in practice you could also expect to get sharply
-	worded review comments.
-</ol>
-
-<p>
-It is similarly socially unacceptable to interrupt an
-<tt>nohz_full</tt> CPU running in userspace.
-RCU must therefore track <tt>nohz_full</tt> userspace
-execution.
-RCU must therefore be able to sample state at two points in
-time, and be able to determine whether or not some other CPU spent
-any time idle and/or executing in userspace.
-
-<p>
-These energy-efficiency requirements have proven quite difficult to
-understand and to meet, for example, there have been more than five
-clean-sheet rewrites of RCU's energy-efficiency code, the last of
-which was finally able to demonstrate
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf">real energy savings running on real hardware [PDF]</a>.
-As noted earlier,
-I learned of many of these requirements via angry phone calls:
-Flaming me on the Linux-kernel mailing list was apparently not
-sufficient to fully vent their ire at RCU's energy-efficiency bugs!
-
-<h3><a name="Scheduling-Clock Interrupts and RCU">
-Scheduling-Clock Interrupts and RCU</a></h3>
-
-<p>
-The kernel transitions between in-kernel non-idle execution, userspace
-execution, and the idle loop.
-Depending on kernel configuration, RCU handles these states differently:
-
-<table border=3>
-<tr><th><tt>HZ</tt> Kconfig</th>
-	<th>In-Kernel</th>
-		<th>Usermode</th>
-			<th>Idle</th></tr>
-<tr><th align="left"><tt>HZ_PERIODIC</tt></th>
-	<td>Can rely on scheduling-clock interrupt.</td>
-		<td>Can rely on scheduling-clock interrupt and its
-		    detection of interrupt from usermode.</td>
-			<td>Can rely on RCU's dyntick-idle detection.</td></tr>
-<tr><th align="left"><tt>NO_HZ_IDLE</tt></th>
-	<td>Can rely on scheduling-clock interrupt.</td>
-		<td>Can rely on scheduling-clock interrupt and its
-		    detection of interrupt from usermode.</td>
-			<td>Can rely on RCU's dyntick-idle detection.</td></tr>
-<tr><th align="left"><tt>NO_HZ_FULL</tt></th>
-	<td>Can only sometimes rely on scheduling-clock interrupt.
-	    In other cases, it is necessary to bound kernel execution
-	    times and/or use IPIs.</td>
-		<td>Can rely on RCU's dyntick-idle detection.</td>
-			<td>Can rely on RCU's dyntick-idle detection.</td></tr>
-</table>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	Why can't <tt>NO_HZ_FULL</tt> in-kernel execution rely on the
-	scheduling-clock interrupt, just like <tt>HZ_PERIODIC</tt>
-	and <tt>NO_HZ_IDLE</tt> do?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	Because, as a performance optimization, <tt>NO_HZ_FULL</tt>
-	does not necessarily re-enable the scheduling-clock interrupt
-	on entry to each and every system call.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-However, RCU must be reliably informed as to whether any given
-CPU is currently in the idle loop, and, for <tt>NO_HZ_FULL</tt>,
-also whether that CPU is executing in usermode, as discussed
-<a href="#Energy Efficiency">earlier</a>.
-It also requires that the scheduling-clock interrupt be enabled when
-RCU needs it to be:
-
-<ol>
-<li>	If a CPU is either idle or executing in usermode, and RCU believes
-	it is non-idle, the scheduling-clock tick had better be running.
-	Otherwise, you will get RCU CPU stall warnings.  Or at best,
-	very long (11-second) grace periods, with a pointless IPI waking
-	the CPU from time to time.
-<li>	If a CPU is in a portion of the kernel that executes RCU read-side
-	critical sections, and RCU believes this CPU to be idle, you will get
-	random memory corruption.  <b>DON'T DO THIS!!!</b>
-
-	<br>This is one reason to test with lockdep, which will complain
-	about this sort of thing.
-<li>	If a CPU is in a portion of the kernel that is absolutely
-	positively no-joking guaranteed to never execute any RCU read-side
-	critical sections, and RCU believes this CPU to to be idle,
-	no problem.  This sort of thing is used by some architectures
-	for light-weight exception handlers, which can then avoid the
-	overhead of <tt>rcu_irq_enter()</tt> and <tt>rcu_irq_exit()</tt>
-	at exception entry and exit, respectively.
-	Some go further and avoid the entireties of <tt>irq_enter()</tt>
-	and <tt>irq_exit()</tt>.
-
-	<br>Just make very sure you are running some of your tests with
-	<tt>CONFIG_PROVE_RCU=y</tt>, just in case one of your code paths
-	was in fact joking about not doing RCU read-side critical sections.
-<li>	If a CPU is executing in the kernel with the scheduling-clock
-	interrupt disabled and RCU believes this CPU to be non-idle,
-	and if the CPU goes idle (from an RCU perspective) every few
-	jiffies, no problem.  It is usually OK for there to be the
-	occasional gap between idle periods of up to a second or so.
-
-	<br>If the gap grows too long, you get RCU CPU stall warnings.
-<li>	If a CPU is either idle or executing in usermode, and RCU believes
-	it to be idle, of course no problem.
-<li>	If a CPU is executing in the kernel, the kernel code
-	path is passing through quiescent states at a reasonable
-	frequency (preferably about once per few jiffies, but the
-	occasional excursion to a second or so is usually OK) and the
-	scheduling-clock interrupt is enabled, of course no problem.
-
-	<br>If the gap between a successive pair of quiescent states grows
-	too long, you get RCU CPU stall warnings.
-</ol>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	But what if my driver has a hardware interrupt handler
-	that can run for many seconds?
-	I cannot invoke <tt>schedule()</tt> from an hardware
-	interrupt handler, after all!
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	One approach is to do <tt>rcu_irq_exit();rcu_irq_enter();</tt>
-	every so often.
-	But given that long-running interrupt handlers can cause
-	other problems, not least for response time, shouldn't you
-	work to keep your interrupt handler's runtime within reasonable
-	bounds?
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-But as long as RCU is properly informed of kernel state transitions between
-in-kernel execution, usermode execution, and idle, and as long as the
-scheduling-clock interrupt is enabled when RCU needs it to be, you
-can rest assured that the bugs you encounter will be in some other
-part of RCU or some other part of the kernel!
-
-<h3><a name="Memory Efficiency">Memory Efficiency</a></h3>
-
-<p>
-Although small-memory non-realtime systems can simply use Tiny RCU,
-code size is only one aspect of memory efficiency.
-Another aspect is the size of the <tt>rcu_head</tt> structure
-used by <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>.
-Although this structure contains nothing more than a pair of pointers,
-it does appear in many RCU-protected data structures, including
-some that are size critical.
-The <tt>page</tt> structure is a case in point, as evidenced by
-the many occurrences of the <tt>union</tt> keyword within that structure.
-
-<p>
-This need for memory efficiency is one reason that RCU uses hand-crafted
-singly linked lists to track the <tt>rcu_head</tt> structures that
-are waiting for a grace period to elapse.
-It is also the reason why <tt>rcu_head</tt> structures do not contain
-debug information, such as fields tracking the file and line of the
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> that posted them.
-Although this information might appear in debug-only kernel builds at some
-point, in the meantime, the <tt>-&gt;func</tt> field will often provide
-the needed debug information.
-
-<p>
-However, in some cases, the need for memory efficiency leads to even
-more extreme measures.
-Returning to the <tt>page</tt> structure, the <tt>rcu_head</tt> field
-shares storage with a great many other structures that are used at
-various points in the corresponding page's lifetime.
-In order to correctly resolve certain
-<a href="https://lkml.kernel.org/g/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com">race conditions</a>,
-the Linux kernel's memory-management subsystem needs a particular bit
-to remain zero during all phases of grace-period processing,
-and that bit happens to map to the bottom bit of the
-<tt>rcu_head</tt> structure's <tt>-&gt;next</tt> field.
-RCU makes this guarantee as long as <tt>call_rcu()</tt>
-is used to post the callback, as opposed to <tt>kfree_rcu()</tt>
-or some future &ldquo;lazy&rdquo;
-variant of <tt>call_rcu()</tt> that might one day be created for
-energy-efficiency purposes.
-
-<p>
-That said, there are limits.
-RCU requires that the <tt>rcu_head</tt> structure be aligned to a
-two-byte boundary, and passing a misaligned <tt>rcu_head</tt>
-structure to one of the <tt>call_rcu()</tt> family of functions
-will result in a splat.
-It is therefore necessary to exercise caution when packing
-structures containing fields of type <tt>rcu_head</tt>.
-Why not a four-byte or even eight-byte alignment requirement?
-Because the m68k architecture provides only two-byte alignment,
-and thus acts as alignment's least common denominator.
-
-<p>
-The reason for reserving the bottom bit of pointers to
-<tt>rcu_head</tt> structures is to leave the door open to
-&ldquo;lazy&rdquo; callbacks whose invocations can safely be deferred.
-Deferring invocation could potentially have energy-efficiency
-benefits, but only if the rate of non-lazy callbacks decreases
-significantly for some important workload.
-In the meantime, reserving the bottom bit keeps this option open
-in case it one day becomes useful.
-
-<h3><a name="Performance, Scalability, Response Time, and Reliability">
-Performance, Scalability, Response Time, and Reliability</a></h3>
-
-<p>
-Expanding on the
-<a href="#Performance and Scalability">earlier discussion</a>,
-RCU is used heavily by hot code paths in performance-critical
-portions of the Linux kernel's networking, security, virtualization,
-and scheduling code paths.
-RCU must therefore use efficient implementations, especially in its
-read-side primitives.
-To that end, it would be good if preemptible RCU's implementation
-of <tt>rcu_read_lock()</tt> could be inlined, however, doing
-this requires resolving <tt>#include</tt> issues with the
-<tt>task_struct</tt> structure.
-
-<p>
-The Linux kernel supports hardware configurations with up to
-4096 CPUs, which means that RCU must be extremely scalable.
-Algorithms that involve frequent acquisitions of global locks or
-frequent atomic operations on global variables simply cannot be
-tolerated within the RCU implementation.
-RCU therefore makes heavy use of a combining tree based on the
-<tt>rcu_node</tt> structure.
-RCU is required to tolerate all CPUs continuously invoking any
-combination of RCU's runtime primitives with minimal per-operation
-overhead.
-In fact, in many cases, increasing load must <i>decrease</i> the
-per-operation overhead, witness the batching optimizations for
-<tt>synchronize_rcu()</tt>, <tt>call_rcu()</tt>,
-<tt>synchronize_rcu_expedited()</tt>, and <tt>rcu_barrier()</tt>.
-As a general rule, RCU must cheerfully accept whatever the
-rest of the Linux kernel decides to throw at it.
-
-<p>
-The Linux kernel is used for real-time workloads, especially
-in conjunction with the
-<a href="https://rt.wiki.kernel.org/index.php/Main_Page">-rt patchset</a>.
-The real-time-latency response requirements are such that the
-traditional approach of disabling preemption across RCU
-read-side critical sections is inappropriate.
-Kernels built with <tt>CONFIG_PREEMPT=y</tt> therefore
-use an RCU implementation that allows RCU read-side critical
-sections to be preempted.
-This requirement made its presence known after users made it
-clear that an earlier
-<a href="https://lwn.net/Articles/107930/">real-time patch</a>
-did not meet their needs, in conjunction with some
-<a href="https://lkml.kernel.org/g/20050318002026.GA2693@us.ibm.com">RCU issues</a>
-encountered by a very early version of the -rt patchset.
-
-<p>
-In addition, RCU must make do with a sub-100-microsecond real-time latency
-budget.
-In fact, on smaller systems with the -rt patchset, the Linux kernel
-provides sub-20-microsecond real-time latencies for the whole kernel,
-including RCU.
-RCU's scalability and latency must therefore be sufficient for
-these sorts of configurations.
-To my surprise, the sub-100-microsecond real-time latency budget
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf">
-applies to even the largest systems [PDF]</a>,
-up to and including systems with 4096 CPUs.
-This real-time requirement motivated the grace-period kthread, which
-also simplified handling of a number of race conditions.
-
-<p>
-RCU must avoid degrading real-time response for CPU-bound threads, whether
-executing in usermode (which is one use case for
-<tt>CONFIG_NO_HZ_FULL=y</tt>) or in the kernel.
-That said, CPU-bound loops in the kernel must execute
-<tt>cond_resched()</tt> at least once per few tens of milliseconds
-in order to avoid receiving an IPI from RCU.
-
-<p>
-Finally, RCU's status as a synchronization primitive means that
-any RCU failure can result in arbitrary memory corruption that can be
-extremely difficult to debug.
-This means that RCU must be extremely reliable, which in
-practice also means that RCU must have an aggressive stress-test
-suite.
-This stress-test suite is called <tt>rcutorture</tt>.
-
-<p>
-Although the need for <tt>rcutorture</tt> was no surprise,
-the current immense popularity of the Linux kernel is posing
-interesting&mdash;and perhaps unprecedented&mdash;validation
-challenges.
-To see this, keep in mind that there are well over one billion
-instances of the Linux kernel running today, given Android
-smartphones, Linux-powered televisions, and servers.
-This number can be expected to increase sharply with the advent of
-the celebrated Internet of Things.
-
-<p>
-Suppose that RCU contains a race condition that manifests on average
-once per million years of runtime.
-This bug will be occurring about three times per <i>day</i> across
-the installed base.
-RCU could simply hide behind hardware error rates, given that no one
-should really expect their smartphone to last for a million years.
-However, anyone taking too much comfort from this thought should
-consider the fact that in most jurisdictions, a successful multi-year
-test of a given mechanism, which might include a Linux kernel,
-suffices for a number of types of safety-critical certifications.
-In fact, rumor has it that the Linux kernel is already being used
-in production for safety-critical applications.
-I don't know about you, but I would feel quite bad if a bug in RCU
-killed someone.
-Which might explain my recent focus on validation and verification.
-
-<h2><a name="Other RCU Flavors">Other RCU Flavors</a></h2>
-
-<p>
-One of the more surprising things about RCU is that there are now
-no fewer than five <i>flavors</i>, or API families.
-In addition, the primary flavor that has been the sole focus up to
-this point has two different implementations, non-preemptible and
-preemptible.
-The other four flavors are listed below, with requirements for each
-described in a separate section.
-
-<ol>
-<li>	<a href="#Bottom-Half Flavor">Bottom-Half Flavor (Historical)</a>
-<li>	<a href="#Sched Flavor">Sched Flavor (Historical)</a>
-<li>	<a href="#Sleepable RCU">Sleepable RCU</a>
-<li>	<a href="#Tasks RCU">Tasks RCU</a>
-</ol>
-
-<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor (Historical)</a></h3>
-
-<p>
-The RCU-bh flavor of RCU has since been expressed in terms of
-the other RCU flavors as part of a consolidation of the three
-flavors into a single flavor.
-The read-side API remains, and continues to disable softirq and to
-be accounted for by lockdep.
-Much of the material in this section is therefore strictly historical
-in nature.
-
-<p>
-The softirq-disable (AKA &ldquo;bottom-half&rdquo;,
-hence the &ldquo;_bh&rdquo; abbreviations)
-flavor of RCU, or <i>RCU-bh</i>, was developed by
-Dipankar Sarma to provide a flavor of RCU that could withstand the
-network-based denial-of-service attacks researched by Robert
-Olsson.
-These attacks placed so much networking load on the system
-that some of the CPUs never exited softirq execution,
-which in turn prevented those CPUs from ever executing a context switch,
-which, in the RCU implementation of that time, prevented grace periods
-from ever ending.
-The result was an out-of-memory condition and a system hang.
-
-<p>
-The solution was the creation of RCU-bh, which does
-<tt>local_bh_disable()</tt>
-across its read-side critical sections, and which uses the transition
-from one type of softirq processing to another as a quiescent state
-in addition to context switch, idle, user mode, and offline.
-This means that RCU-bh grace periods can complete even when some of
-the CPUs execute in softirq indefinitely, thus allowing algorithms
-based on RCU-bh to withstand network-based denial-of-service attacks.
-
-<p>
-Because
-<tt>rcu_read_lock_bh()</tt> and <tt>rcu_read_unlock_bh()</tt>
-disable and re-enable softirq handlers, any attempt to start a softirq
-handlers during the
-RCU-bh read-side critical section will be deferred.
-In this case, <tt>rcu_read_unlock_bh()</tt>
-will invoke softirq processing, which can take considerable time.
-One can of course argue that this softirq overhead should be associated
-with the code following the RCU-bh read-side critical section rather
-than <tt>rcu_read_unlock_bh()</tt>, but the fact
-is that most profiling tools cannot be expected to make this sort
-of fine distinction.
-For example, suppose that a three-millisecond-long RCU-bh read-side
-critical section executes during a time of heavy networking load.
-There will very likely be an attempt to invoke at least one softirq
-handler during that three milliseconds, but any such invocation will
-be delayed until the time of the <tt>rcu_read_unlock_bh()</tt>.
-This can of course make it appear at first glance as if
-<tt>rcu_read_unlock_bh()</tt> was executing very slowly.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-bh API</a>
-includes
-<tt>rcu_read_lock_bh()</tt>,
-<tt>rcu_read_unlock_bh()</tt>,
-<tt>rcu_dereference_bh()</tt>,
-<tt>rcu_dereference_bh_check()</tt>,
-<tt>synchronize_rcu_bh()</tt>,
-<tt>synchronize_rcu_bh_expedited()</tt>,
-<tt>call_rcu_bh()</tt>,
-<tt>rcu_barrier_bh()</tt>, and
-<tt>rcu_read_lock_bh_held()</tt>.
-However, the update-side APIs are now simple wrappers for other RCU
-flavors, namely RCU-sched in CONFIG_PREEMPT=n kernels and RCU-preempt
-otherwise.
-
-<h3><a name="Sched Flavor">Sched Flavor (Historical)</a></h3>
-
-<p>
-The RCU-sched flavor of RCU has since been expressed in terms of
-the other RCU flavors as part of a consolidation of the three
-flavors into a single flavor.
-The read-side API remains, and continues to disable preemption and to
-be accounted for by lockdep.
-Much of the material in this section is therefore strictly historical
-in nature.
-
-<p>
-Before preemptible RCU, waiting for an RCU grace period had the
-side effect of also waiting for all pre-existing interrupt
-and NMI handlers.
-However, there are legitimate preemptible-RCU implementations that
-do not have this property, given that any point in the code outside
-of an RCU read-side critical section can be a quiescent state.
-Therefore, <i>RCU-sched</i> was created, which follows &ldquo;classic&rdquo;
-RCU in that an RCU-sched grace period waits for for pre-existing
-interrupt and NMI handlers.
-In kernels built with <tt>CONFIG_PREEMPT=n</tt>, the RCU and RCU-sched
-APIs have identical implementations, while kernels built with
-<tt>CONFIG_PREEMPT=y</tt> provide a separate implementation for each.
-
-<p>
-Note well that in <tt>CONFIG_PREEMPT=y</tt> kernels,
-<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
-disable and re-enable preemption, respectively.
-This means that if there was a preemption attempt during the
-RCU-sched read-side critical section, <tt>rcu_read_unlock_sched()</tt>
-will enter the scheduler, with all the latency and overhead entailed.
-Just as with <tt>rcu_read_unlock_bh()</tt>, this can make it look
-as if <tt>rcu_read_unlock_sched()</tt> was executing very slowly.
-However, the highest-priority task won't be preempted, so that task
-will enjoy low-overhead <tt>rcu_read_unlock_sched()</tt> invocations.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-sched API</a>
-includes
-<tt>rcu_read_lock_sched()</tt>,
-<tt>rcu_read_unlock_sched()</tt>,
-<tt>rcu_read_lock_sched_notrace()</tt>,
-<tt>rcu_read_unlock_sched_notrace()</tt>,
-<tt>rcu_dereference_sched()</tt>,
-<tt>rcu_dereference_sched_check()</tt>,
-<tt>synchronize_sched()</tt>,
-<tt>synchronize_rcu_sched_expedited()</tt>,
-<tt>call_rcu_sched()</tt>,
-<tt>rcu_barrier_sched()</tt>, and
-<tt>rcu_read_lock_sched_held()</tt>.
-However, anything that disables preemption also marks an RCU-sched
-read-side critical section, including
-<tt>preempt_disable()</tt> and <tt>preempt_enable()</tt>,
-<tt>local_irq_save()</tt> and <tt>local_irq_restore()</tt>,
-and so on.
-
-<h3><a name="Sleepable RCU">Sleepable RCU</a></h3>
-
-<p>
-For well over a decade, someone saying &ldquo;I need to block within
-an RCU read-side critical section&rdquo; was a reliable indication
-that this someone did not understand RCU.
-After all, if you are always blocking in an RCU read-side critical
-section, you can probably afford to use a higher-overhead synchronization
-mechanism.
-However, that changed with the advent of the Linux kernel's notifiers,
-whose RCU read-side critical
-sections almost never sleep, but sometimes need to.
-This resulted in the introduction of
-<a href="https://lwn.net/Articles/202847/">sleepable RCU</a>,
-or <i>SRCU</i>.
-
-<p>
-SRCU allows different domains to be defined, with each such domain
-defined by an instance of an <tt>srcu_struct</tt> structure.
-A pointer to this structure must be passed in to each SRCU function,
-for example, <tt>synchronize_srcu(&amp;ss)</tt>, where
-<tt>ss</tt> is the <tt>srcu_struct</tt> structure.
-The key benefit of these domains is that a slow SRCU reader in one
-domain does not delay an SRCU grace period in some other domain.
-That said, one consequence of these domains is that read-side code
-must pass a &ldquo;cookie&rdquo; from <tt>srcu_read_lock()</tt>
-to <tt>srcu_read_unlock()</tt>, for example, as follows:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&amp;ss);
- 4 do_something();
- 5 srcu_read_unlock(&amp;ss, idx);
-</pre>
-</blockquote>
-
-<p>
-As noted above, it is legal to block within SRCU read-side critical sections,
-however, with great power comes great responsibility.
-If you block forever in one of a given domain's SRCU read-side critical
-sections, then that domain's grace periods will also be blocked forever.
-Of course, one good way to block forever is to deadlock, which can
-happen if any operation in a given domain's SRCU read-side critical
-section can wait, either directly or indirectly, for that domain's
-grace period to elapse.
-For example, this results in a self-deadlock:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&amp;ss);
- 4 do_something();
- 5 synchronize_srcu(&amp;ss);
- 6 srcu_read_unlock(&amp;ss, idx);
-</pre>
-</blockquote>
-
-<p>
-However, if line&nbsp;5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for domain <tt>ss</tt>,
-deadlock would still be possible.
-Furthermore, if line&nbsp;5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for some other domain <tt>ss1</tt>,
-and if an <tt>ss1</tt>-domain SRCU read-side critical section
-acquired another mutex that was held across as <tt>ss</tt>-domain
-<tt>synchronize_srcu()</tt>,
-deadlock would again be possible.
-Such a deadlock cycle could extend across an arbitrarily large number
-of different SRCU domains.
-Again, with great power comes great responsibility.
-
-<p>
-Unlike the other RCU flavors, SRCU read-side critical sections can
-run on idle and even offline CPUs.
-This ability requires that <tt>srcu_read_lock()</tt> and
-<tt>srcu_read_unlock()</tt> contain memory barriers, which means
-that SRCU readers will run a bit slower than would RCU readers.
-It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt>
-API, which, in combination with <tt>srcu_read_unlock()</tt>,
-guarantees a full memory barrier.
-
-<p>
-Also unlike other RCU flavors, <tt>synchronize_srcu()</tt> may <b>not</b>
-be invoked from CPU-hotplug notifiers, due to the fact that SRCU grace
-periods make use of timers and the possibility of timers being temporarily
-&ldquo;stranded&rdquo; on the outgoing CPU.
-This stranding of timers means that timers posted to the outgoing CPU
-will not fire until late in the CPU-hotplug process.
-The problem is that if a notifier is waiting on an SRCU grace period,
-that grace period is waiting on a timer, and that timer is stranded on the
-outgoing CPU, then the notifier will never be awakened, in other words,
-deadlock has occurred.
-This same situation of course also prohibits <tt>srcu_barrier()</tt>
-from being invoked from CPU-hotplug notifiers.
-
-<p>
-SRCU also differs from other RCU flavors in that SRCU's expedited and
-non-expedited grace periods are implemented by the same mechanism.
-This means that in the current SRCU implementation, expediting a
-future grace period has the side effect of expediting all prior
-grace periods that have not yet completed.
-(But please note that this is a property of the current implementation,
-not necessarily of future implementations.)
-In addition, if SRCU has been idle for longer than the interval
-specified by the <tt>srcutree.exp_holdoff</tt> kernel boot parameter
-(25&nbsp;microseconds by default),
-and if a <tt>synchronize_srcu()</tt> invocation ends this idle period,
-that invocation will be automatically expedited.
-
-<p>
-As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating
-a locking bottleneck present in prior kernel versions.
-Although this will allow users to put much heavier stress on
-<tt>call_srcu()</tt>, it is important to note that SRCU does not
-yet take any special steps to deal with callback flooding.
-So if you are posting (say) 10,000 SRCU callbacks per second per CPU,
-you are probably totally OK, but if you intend to post (say) 1,000,000
-SRCU callbacks per second per CPU, please run some tests first.
-SRCU just might need a few adjustment to deal with that sort of load.
-Of course, your mileage may vary based on the speed of your CPUs and
-the size of your memory.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
-includes
-<tt>srcu_read_lock()</tt>,
-<tt>srcu_read_unlock()</tt>,
-<tt>srcu_dereference()</tt>,
-<tt>srcu_dereference_check()</tt>,
-<tt>synchronize_srcu()</tt>,
-<tt>synchronize_srcu_expedited()</tt>,
-<tt>call_srcu()</tt>,
-<tt>srcu_barrier()</tt>, and
-<tt>srcu_read_lock_held()</tt>.
-It also includes
-<tt>DEFINE_SRCU()</tt>,
-<tt>DEFINE_STATIC_SRCU()</tt>, and
-<tt>init_srcu_struct()</tt>
-APIs for defining and initializing <tt>srcu_struct</tt> structures.
-
-<h3><a name="Tasks RCU">Tasks RCU</a></h3>
-
-<p>
-Some forms of tracing use &ldquo;trampolines&rdquo; to handle the
-binary rewriting required to install different types of probes.
-It would be good to be able to free old trampolines, which sounds
-like a job for some form of RCU.
-However, because it is necessary to be able to install a trace
-anywhere in the code, it is not possible to use read-side markers
-such as <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-In addition, it does not work to have these markers in the trampoline
-itself, because there would need to be instructions following
-<tt>rcu_read_unlock()</tt>.
-Although <tt>synchronize_rcu()</tt> would guarantee that execution
-reached the <tt>rcu_read_unlock()</tt>, it would not be able to
-guarantee that execution had completely left the trampoline.
-
-<p>
-The solution, in the form of
-<a href="https://lwn.net/Articles/607117/"><i>Tasks RCU</i></a>,
-is to have implicit
-read-side critical sections that are delimited by voluntary context
-switches, that is, calls to <tt>schedule()</tt>,
-<tt>cond_resched()</tt>, and
-<tt>synchronize_rcu_tasks()</tt>.
-In addition, transitions to and from userspace execution also delimit
-tasks-RCU read-side critical sections.
-
-<p>
-The tasks-RCU API is quite compact, consisting only of
-<tt>call_rcu_tasks()</tt>,
-<tt>synchronize_rcu_tasks()</tt>, and
-<tt>rcu_barrier_tasks()</tt>.
-In <tt>CONFIG_PREEMPT=n</tt> kernels, trampolines cannot be preempted,
-so these APIs map to
-<tt>call_rcu()</tt>,
-<tt>synchronize_rcu()</tt>, and
-<tt>rcu_barrier()</tt>, respectively.
-In <tt>CONFIG_PREEMPT=y</tt> kernels, trampolines can be preempted,
-and these three APIs are therefore implemented by separate functions
-that check for voluntary context switches.
-
-<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
-
-<p>
-One of the tricks that RCU uses to attain update-side scalability is
-to increase grace-period latency with increasing numbers of CPUs.
-If this becomes a serious problem, it will be necessary to rework the
-grace-period state machine so as to avoid the need for the additional
-latency.
-
-<p>
-RCU disables CPU hotplug in a few places, perhaps most notably in the
-<tt>rcu_barrier()</tt> operations.
-If there is a strong reason to use <tt>rcu_barrier()</tt> in CPU-hotplug
-notifiers, it will be necessary to avoid disabling CPU hotplug.
-This would introduce some complexity, so there had better be a <i>very</i>
-good reason.
-
-<p>
-The tradeoff between grace-period latency on the one hand and interruptions
-of other CPUs on the other hand may need to be re-examined.
-The desire is of course for zero grace-period latency as well as zero
-interprocessor interrupts undertaken during an expedited grace period
-operation.
-While this ideal is unlikely to be achievable, it is quite possible that
-further improvements can be made.
-
-<p>
-The multiprocessor implementations of RCU use a combining tree that
-groups CPUs so as to reduce lock contention and increase cache locality.
-However, this combining tree does not spread its memory across NUMA
-nodes nor does it align the CPU groups with hardware features such
-as sockets or cores.
-Such spreading and alignment is currently believed to be unnecessary
-because the hotpath read-side primitives do not access the combining
-tree, nor does <tt>call_rcu()</tt> in the common case.
-If you believe that your architecture needs such spreading and alignment,
-then your architecture should also benefit from the
-<tt>rcutree.rcu_fanout_leaf</tt> boot parameter, which can be set
-to the number of CPUs in a socket, NUMA node, or whatever.
-If the number of CPUs is too large, use a fraction of the number of
-CPUs.
-If the number of CPUs is a large prime number, well, that certainly
-is an &ldquo;interesting&rdquo; architectural choice!
-More flexible arrangements might be considered, but only if
-<tt>rcutree.rcu_fanout_leaf</tt> has proven inadequate, and only
-if the inadequacy has been demonstrated by a carefully run and
-realistic system-level workload.
-
-<p>
-Please note that arrangements that require RCU to remap CPU numbers will
-require extremely good demonstration of need and full exploration of
-alternatives.
-
-<p>
-RCU's various kthreads are reasonably recent additions.
-It is quite likely that adjustments will be required to more gracefully
-handle extreme loads.
-It might also be necessary to be able to relate CPU utilization by
-RCU's kthreads and softirq handlers to the code that instigated this
-CPU utilization.
-For example, RCU callback overhead might be charged back to the
-originating <tt>call_rcu()</tt> instance, though probably not
-in production kernels.
-
-<p>
-Additional work may be required to provide reasonable forward-progress
-guarantees under heavy load for grace periods and for callback
-invocation.
-
-<h2><a name="Summary">Summary</a></h2>
-
-<p>
-This document has presented more than two decade's worth of RCU
-requirements.
-Given that the requirements keep changing, this will not be the last
-word on this subject, but at least it serves to get an important
-subset of the requirements set forth.
-
-<h2><a name="Acknowledgments">Acknowledgments</a></h2>
-
-I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar,
-Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and
-Andy Lutomirski for their help in rendering
-this article human readable, and to Michelle Rankin for her support
-of this effort.
-Other contributions are acknowledged in the Linux kernel's git archive.
-
-</body></html>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
new file mode 100644
index 000000000000..fd5e2cbc4935
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -0,0 +1,2704 @@
+=================================
+A Tour Through RCU's Requirements
+=================================
+
+Copyright IBM Corporation, 2015
+
+Author: Paul E. McKenney
+
+The initial version of this document appeared in the
+`LWN <https://lwn.net/>`_ on those articles:
+`part 1 <https://lwn.net/Articles/652156/>`_,
+`part 2 <https://lwn.net/Articles/652677/>`_, and
+`part 3 <https://lwn.net/Articles/653326/>`_.
+
+Introduction
+------------
+
+Read-copy update (RCU) is a synchronization mechanism that is often used
+as a replacement for reader-writer locking. RCU is unusual in that
+updaters do not block readers, which means that RCU's read-side
+primitives can be exceedingly fast and scalable. In addition, updaters
+can make useful forward progress concurrently with readers. However, all
+this concurrency between RCU readers and updaters does raise the
+question of exactly what RCU readers are doing, which in turn raises the
+question of exactly what RCU's requirements are.
+
+This document therefore summarizes RCU's requirements, and can be
+thought of as an informal, high-level specification for RCU. It is
+important to understand that RCU's specification is primarily empirical
+in nature; in fact, I learned about many of these requirements the hard
+way. This situation might cause some consternation, however, not only
+has this learning process been a lot of fun, but it has also been a
+great privilege to work with so many people willing to apply
+technologies in interesting new ways.
+
+All that aside, here are the categories of currently known RCU
+requirements:
+
+#. `Fundamental Requirements`_
+#. `Fundamental Non-Requirements`_
+#. `Parallelism Facts of Life`_
+#. `Quality-of-Implementation Requirements`_
+#. `Linux Kernel Complications`_
+#. `Software-Engineering Requirements`_
+#. `Other RCU Flavors`_
+#. `Possible Future Changes`_
+
+This is followed by a `summary <#Summary>`__, however, the answers to
+each quick quiz immediately follows the quiz. Select the big white space
+with your mouse to see the answer.
+
+Fundamental Requirements
+------------------------
+
+RCU's fundamental requirements are the closest thing RCU has to hard
+mathematical requirements. These are:
+
+#. `Grace-Period Guarantee`_
+#. `Publish/Subscribe Guarantee`_
+#. `Memory-Barrier Guarantees`_
+#. `RCU Primitives Guaranteed to Execute Unconditionally`_
+#. `Guaranteed Read-to-Write Upgrade`_
+
+Grace-Period Guarantee
+~~~~~~~~~~~~~~~~~~~~~~
+
+RCU's grace-period guarantee is unusual in being premeditated: Jack
+Slingwine and I had this guarantee firmly in mind when we started work
+on RCU (then called “rclock”) in the early 1990s. That said, the past
+two decades of experience with RCU have produced a much more detailed
+understanding of this guarantee.
+
+RCU's grace-period guarantee allows updaters to wait for the completion
+of all pre-existing RCU read-side critical sections. An RCU read-side
+critical section begins with the marker ``rcu_read_lock()`` and ends
+with the marker ``rcu_read_unlock()``. These markers may be nested, and
+RCU treats a nested set as one big RCU read-side critical section.
+Production-quality implementations of ``rcu_read_lock()`` and
+``rcu_read_unlock()`` are extremely lightweight, and in fact have
+exactly zero overhead in Linux kernels built for production use with
+``CONFIG_PREEMPT=n``.
+
+This guarantee allows ordering to be enforced with extremely low
+overhead to readers, for example:
+
+   ::
+
+       1 int x, y;
+       2
+       3 void thread0(void)
+       4 {
+       5   rcu_read_lock();
+       6   r1 = READ_ONCE(x);
+       7   r2 = READ_ONCE(y);
+       8   rcu_read_unlock();
+       9 }
+      10
+      11 void thread1(void)
+      12 {
+      13   WRITE_ONCE(x, 1);
+      14   synchronize_rcu();
+      15   WRITE_ONCE(y, 1);
+      16 }
+
+Because the ``synchronize_rcu()`` on line 14 waits for all pre-existing
+readers, any instance of ``thread0()`` that loads a value of zero from
+``x`` must complete before ``thread1()`` stores to ``y``, so that
+instance must also load a value of zero from ``y``. Similarly, any
+instance of ``thread0()`` that loads a value of one from ``y`` must have
+started after the ``synchronize_rcu()`` started, and must therefore also
+load a value of one from ``x``. Therefore, the outcome:
+
+   ::
+
+      (r1 == 0 && r2 == 1)
+
+cannot happen.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Wait a minute! You said that updaters can make useful forward         |
+| progress concurrently with readers, but pre-existing readers will     |
+| block ``synchronize_rcu()``!!!                                        |
+| Just who are you trying to fool???                                    |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| First, if updaters do not wish to be blocked by readers, they can use |
+| ``call_rcu()`` or ``kfree_rcu()``, which will be discussed later.     |
+| Second, even when using ``synchronize_rcu()``, the other update-side  |
+| code does run concurrently with readers, whether pre-existing or not. |
++-----------------------------------------------------------------------+
+
+This scenario resembles one of the first uses of RCU in
+`DYNIX/ptx <https://en.wikipedia.org/wiki/DYNIX>`__, which managed a
+distributed lock manager's transition into a state suitable for handling
+recovery from node failure, more or less as follows:
+
+   ::
+
+       1 #define STATE_NORMAL        0
+       2 #define STATE_WANT_RECOVERY 1
+       3 #define STATE_RECOVERING    2
+       4 #define STATE_WANT_NORMAL   3
+       5
+       6 int state = STATE_NORMAL;
+       7
+       8 void do_something_dlm(void)
+       9 {
+      10   int state_snap;
+      11
+      12   rcu_read_lock();
+      13   state_snap = READ_ONCE(state);
+      14   if (state_snap == STATE_NORMAL)
+      15     do_something();
+      16   else
+      17     do_something_carefully();
+      18   rcu_read_unlock();
+      19 }
+      20
+      21 void start_recovery(void)
+      22 {
+      23   WRITE_ONCE(state, STATE_WANT_RECOVERY);
+      24   synchronize_rcu();
+      25   WRITE_ONCE(state, STATE_RECOVERING);
+      26   recovery();
+      27   WRITE_ONCE(state, STATE_WANT_NORMAL);
+      28   synchronize_rcu();
+      29   WRITE_ONCE(state, STATE_NORMAL);
+      30 }
+
+The RCU read-side critical section in ``do_something_dlm()`` works with
+the ``synchronize_rcu()`` in ``start_recovery()`` to guarantee that
+``do_something()`` never runs concurrently with ``recovery()``, but with
+little or no synchronization overhead in ``do_something_dlm()``.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Why is the ``synchronize_rcu()`` on line 28 needed?                   |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Without that extra grace period, memory reordering could result in    |
+| ``do_something_dlm()`` executing ``do_something()`` concurrently with |
+| the last bits of ``recovery()``.                                      |
++-----------------------------------------------------------------------+
+
+In order to avoid fatal problems such as deadlocks, an RCU read-side
+critical section must not contain calls to ``synchronize_rcu()``.
+Similarly, an RCU read-side critical section must not contain anything
+that waits, directly or indirectly, on completion of an invocation of
+``synchronize_rcu()``.
+
+Although RCU's grace-period guarantee is useful in and of itself, with
+`quite a few use cases <https://lwn.net/Articles/573497/>`__, it would
+be good to be able to use RCU to coordinate read-side access to linked
+data structures. For this, the grace-period guarantee is not sufficient,
+as can be seen in function ``add_gp_buggy()`` below. We will look at the
+reader's code later, but in the meantime, just think of the reader as
+locklessly picking up the ``gp`` pointer, and, if the value loaded is
+non-\ ``NULL``, locklessly accessing the ``->a`` and ``->b`` fields.
+
+   ::
+
+       1 bool add_gp_buggy(int a, int b)
+       2 {
+       3   p = kmalloc(sizeof(*p), GFP_KERNEL);
+       4   if (!p)
+       5     return -ENOMEM;
+       6   spin_lock(&gp_lock);
+       7   if (rcu_access_pointer(gp)) {
+       8     spin_unlock(&gp_lock);
+       9     return false;
+      10   }
+      11   p->a = a;
+      12   p->b = a;
+      13   gp = p; /* ORDERING BUG */
+      14   spin_unlock(&gp_lock);
+      15   return true;
+      16 }
+
+The problem is that both the compiler and weakly ordered CPUs are within
+their rights to reorder this code as follows:
+
+   ::
+
+       1 bool add_gp_buggy_optimized(int a, int b)
+       2 {
+       3   p = kmalloc(sizeof(*p), GFP_KERNEL);
+       4   if (!p)
+       5     return -ENOMEM;
+       6   spin_lock(&gp_lock);
+       7   if (rcu_access_pointer(gp)) {
+       8     spin_unlock(&gp_lock);
+       9     return false;
+      10   }
+      11   gp = p; /* ORDERING BUG */
+      12   p->a = a;
+      13   p->b = a;
+      14   spin_unlock(&gp_lock);
+      15   return true;
+      16 }
+
+If an RCU reader fetches ``gp`` just after ``add_gp_buggy_optimized``
+executes line 11, it will see garbage in the ``->a`` and ``->b`` fields.
+And this is but one of many ways in which compiler and hardware
+optimizations could cause trouble. Therefore, we clearly need some way
+to prevent the compiler and the CPU from reordering in this manner,
+which brings us to the publish-subscribe guarantee discussed in the next
+section.
+
+Publish/Subscribe Guarantee
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+RCU's publish-subscribe guarantee allows data to be inserted into a
+linked data structure without disrupting RCU readers. The updater uses
+``rcu_assign_pointer()`` to insert the new data, and readers use
+``rcu_dereference()`` to access data, whether new or old. The following
+shows an example of insertion:
+
+   ::
+
+       1 bool add_gp(int a, int b)
+       2 {
+       3   p = kmalloc(sizeof(*p), GFP_KERNEL);
+       4   if (!p)
+       5     return -ENOMEM;
+       6   spin_lock(&gp_lock);
+       7   if (rcu_access_pointer(gp)) {
+       8     spin_unlock(&gp_lock);
+       9     return false;
+      10   }
+      11   p->a = a;
+      12   p->b = a;
+      13   rcu_assign_pointer(gp, p);
+      14   spin_unlock(&gp_lock);
+      15   return true;
+      16 }
+
+The ``rcu_assign_pointer()`` on line 13 is conceptually equivalent to a
+simple assignment statement, but also guarantees that its assignment
+will happen after the two assignments in lines 11 and 12, similar to the
+C11 ``memory_order_release`` store operation. It also prevents any
+number of “interesting” compiler optimizations, for example, the use of
+``gp`` as a scratch location immediately preceding the assignment.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| But ``rcu_assign_pointer()`` does nothing to prevent the two          |
+| assignments to ``p->a`` and ``p->b`` from being reordered. Can't that |
+| also cause problems?                                                  |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| No, it cannot. The readers cannot see either of these two fields      |
+| until the assignment to ``gp``, by which time both fields are fully   |
+| initialized. So reordering the assignments to ``p->a`` and ``p->b``   |
+| cannot possibly cause any problems.                                   |
++-----------------------------------------------------------------------+
+
+It is tempting to assume that the reader need not do anything special to
+control its accesses to the RCU-protected data, as shown in
+``do_something_gp_buggy()`` below:
+
+   ::
+
+       1 bool do_something_gp_buggy(void)
+       2 {
+       3   rcu_read_lock();
+       4   p = gp;  /* OPTIMIZATIONS GALORE!!! */
+       5   if (p) {
+       6     do_something(p->a, p->b);
+       7     rcu_read_unlock();
+       8     return true;
+       9   }
+      10   rcu_read_unlock();
+      11   return false;
+      12 }
+
+However, this temptation must be resisted because there are a
+surprisingly large number of ways that the compiler (to say nothing of
+`DEC Alpha CPUs <https://h71000.www7.hp.com/wizard/wiz_2637.html>`__)
+can trip this code up. For but one example, if the compiler were short
+of registers, it might choose to refetch from ``gp`` rather than keeping
+a separate copy in ``p`` as follows:
+
+   ::
+
+       1 bool do_something_gp_buggy_optimized(void)
+       2 {
+       3   rcu_read_lock();
+       4   if (gp) { /* OPTIMIZATIONS GALORE!!! */
+       5     do_something(gp->a, gp->b);
+       6     rcu_read_unlock();
+       7     return true;
+       8   }
+       9   rcu_read_unlock();
+      10   return false;
+      11 }
+
+If this function ran concurrently with a series of updates that replaced
+the current structure with a new one, the fetches of ``gp->a`` and
+``gp->b`` might well come from two different structures, which could
+cause serious confusion. To prevent this (and much else besides),
+``do_something_gp()`` uses ``rcu_dereference()`` to fetch from ``gp``:
+
+   ::
+
+       1 bool do_something_gp(void)
+       2 {
+       3   rcu_read_lock();
+       4   p = rcu_dereference(gp);
+       5   if (p) {
+       6     do_something(p->a, p->b);
+       7     rcu_read_unlock();
+       8     return true;
+       9   }
+      10   rcu_read_unlock();
+      11   return false;
+      12 }
+
+The ``rcu_dereference()`` uses volatile casts and (for DEC Alpha) memory
+barriers in the Linux kernel. Should a `high-quality implementation of
+C11 ``memory_order_consume``
+[PDF] <http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf>`__
+ever appear, then ``rcu_dereference()`` could be implemented as a
+``memory_order_consume`` load. Regardless of the exact implementation, a
+pointer fetched by ``rcu_dereference()`` may not be used outside of the
+outermost RCU read-side critical section containing that
+``rcu_dereference()``, unless protection of the corresponding data
+element has been passed from RCU to some other synchronization
+mechanism, most commonly locking or `reference
+counting <https://www.kernel.org/doc/Documentation/RCU/rcuref.txt>`__.
+
+In short, updaters use ``rcu_assign_pointer()`` and readers use
+``rcu_dereference()``, and these two RCU API elements work together to
+ensure that readers have a consistent view of newly added data elements.
+
+Of course, it is also necessary to remove elements from RCU-protected
+data structures, for example, using the following process:
+
+#. Remove the data element from the enclosing structure.
+#. Wait for all pre-existing RCU read-side critical sections to complete
+   (because only pre-existing readers can possibly have a reference to
+   the newly removed data element).
+#. At this point, only the updater has a reference to the newly removed
+   data element, so it can safely reclaim the data element, for example,
+   by passing it to ``kfree()``.
+
+This process is implemented by ``remove_gp_synchronous()``:
+
+   ::
+
+       1 bool remove_gp_synchronous(void)
+       2 {
+       3   struct foo *p;
+       4
+       5   spin_lock(&gp_lock);
+       6   p = rcu_access_pointer(gp);
+       7   if (!p) {
+       8     spin_unlock(&gp_lock);
+       9     return false;
+      10   }
+      11   rcu_assign_pointer(gp, NULL);
+      12   spin_unlock(&gp_lock);
+      13   synchronize_rcu();
+      14   kfree(p);
+      15   return true;
+      16 }
+
+This function is straightforward, with line 13 waiting for a grace
+period before line 14 frees the old data element. This waiting ensures
+that readers will reach line 7 of ``do_something_gp()`` before the data
+element referenced by ``p`` is freed. The ``rcu_access_pointer()`` on
+line 6 is similar to ``rcu_dereference()``, except that:
+
+#. The value returned by ``rcu_access_pointer()`` cannot be
+   dereferenced. If you want to access the value pointed to as well as
+   the pointer itself, use ``rcu_dereference()`` instead of
+   ``rcu_access_pointer()``.
+#. The call to ``rcu_access_pointer()`` need not be protected. In
+   contrast, ``rcu_dereference()`` must either be within an RCU
+   read-side critical section or in a code segment where the pointer
+   cannot change, for example, in code protected by the corresponding
+   update-side lock.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Without the ``rcu_dereference()`` or the ``rcu_access_pointer()``,    |
+| what destructive optimizations might the compiler make use of?        |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Let's start with what happens to ``do_something_gp()`` if it fails to |
+| use ``rcu_dereference()``. It could reuse a value formerly fetched    |
+| from this same pointer. It could also fetch the pointer from ``gp``   |
+| in a byte-at-a-time manner, resulting in *load tearing*, in turn      |
+| resulting a bytewise mash-up of two distinct pointer values. It might |
+| even use value-speculation optimizations, where it makes a wrong      |
+| guess, but by the time it gets around to checking the value, an       |
+| update has changed the pointer to match the wrong guess. Too bad      |
+| about any dereferences that returned pre-initialization garbage in    |
+| the meantime!                                                         |
+| For ``remove_gp_synchronous()``, as long as all modifications to      |
+| ``gp`` are carried out while holding ``gp_lock``, the above           |
+| optimizations are harmless. However, ``sparse`` will complain if you  |
+| define ``gp`` with ``__rcu`` and then access it without using either  |
+| ``rcu_access_pointer()`` or ``rcu_dereference()``.                    |
++-----------------------------------------------------------------------+
+
+In short, RCU's publish-subscribe guarantee is provided by the
+combination of ``rcu_assign_pointer()`` and ``rcu_dereference()``. This
+guarantee allows data elements to be safely added to RCU-protected
+linked data structures without disrupting RCU readers. This guarantee
+can be used in combination with the grace-period guarantee to also allow
+data elements to be removed from RCU-protected linked data structures,
+again without disrupting RCU readers.
+
+This guarantee was only partially premeditated. DYNIX/ptx used an
+explicit memory barrier for publication, but had nothing resembling
+``rcu_dereference()`` for subscription, nor did it have anything
+resembling the ``smp_read_barrier_depends()`` that was later subsumed
+into ``rcu_dereference()`` and later still into ``READ_ONCE()``. The
+need for these operations made itself known quite suddenly at a
+late-1990s meeting with the DEC Alpha architects, back in the days when
+DEC was still a free-standing company. It took the Alpha architects a
+good hour to convince me that any sort of barrier would ever be needed,
+and it then took me a good *two* hours to convince them that their
+documentation did not make this point clear. More recent work with the C
+and C++ standards committees have provided much education on tricks and
+traps from the compiler. In short, compilers were much less tricky in
+the early 1990s, but in 2015, don't even think about omitting
+``rcu_dereference()``!
+
+Memory-Barrier Guarantees
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The previous section's simple linked-data-structure scenario clearly
+demonstrates the need for RCU's stringent memory-ordering guarantees on
+systems with more than one CPU:
+
+#. Each CPU that has an RCU read-side critical section that begins
+   before ``synchronize_rcu()`` starts is guaranteed to execute a full
+   memory barrier between the time that the RCU read-side critical
+   section ends and the time that ``synchronize_rcu()`` returns. Without
+   this guarantee, a pre-existing RCU read-side critical section might
+   hold a reference to the newly removed ``struct foo`` after the
+   ``kfree()`` on line 14 of ``remove_gp_synchronous()``.
+#. Each CPU that has an RCU read-side critical section that ends after
+   ``synchronize_rcu()`` returns is guaranteed to execute a full memory
+   barrier between the time that ``synchronize_rcu()`` begins and the
+   time that the RCU read-side critical section begins. Without this
+   guarantee, a later RCU read-side critical section running after the
+   ``kfree()`` on line 14 of ``remove_gp_synchronous()`` might later run
+   ``do_something_gp()`` and find the newly deleted ``struct foo``.
+#. If the task invoking ``synchronize_rcu()`` remains on a given CPU,
+   then that CPU is guaranteed to execute a full memory barrier sometime
+   during the execution of ``synchronize_rcu()``. This guarantee ensures
+   that the ``kfree()`` on line 14 of ``remove_gp_synchronous()`` really
+   does execute after the removal on line 11.
+#. If the task invoking ``synchronize_rcu()`` migrates among a group of
+   CPUs during that invocation, then each of the CPUs in that group is
+   guaranteed to execute a full memory barrier sometime during the
+   execution of ``synchronize_rcu()``. This guarantee also ensures that
+   the ``kfree()`` on line 14 of ``remove_gp_synchronous()`` really does
+   execute after the removal on line 11, but also in the case where the
+   thread executing the ``synchronize_rcu()`` migrates in the meantime.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Given that multiple CPUs can start RCU read-side critical sections at |
+| any time without any ordering whatsoever, how can RCU possibly tell   |
+| whether or not a given RCU read-side critical section starts before a |
+| given instance of ``synchronize_rcu()``?                              |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| If RCU cannot tell whether or not a given RCU read-side critical      |
+| section starts before a given instance of ``synchronize_rcu()``, then |
+| it must assume that the RCU read-side critical section started first. |
+| In other words, a given instance of ``synchronize_rcu()`` can avoid   |
+| waiting on a given RCU read-side critical section only if it can      |
+| prove that ``synchronize_rcu()`` started first.                       |
+| A related question is “When ``rcu_read_lock()`` doesn't generate any  |
+| code, why does it matter how it relates to a grace period?” The       |
+| answer is that it is not the relationship of ``rcu_read_lock()``      |
+| itself that is important, but rather the relationship of the code     |
+| within the enclosed RCU read-side critical section to the code        |
+| preceding and following the grace period. If we take this viewpoint,  |
+| then a given RCU read-side critical section begins before a given     |
+| grace period when some access preceding the grace period observes the |
+| effect of some access within the critical section, in which case none |
+| of the accesses within the critical section may observe the effects   |
+| of any access following the grace period.                             |
+|                                                                       |
+| As of late 2016, mathematical models of RCU take this viewpoint, for  |
+| example, see slides 62 and 63 of the `2016 LinuxCon                   |
+| EU <http://www2.rdrop.com/users/paulmck/scalability/paper/LinuxMM.201 |
+| 6.10.04c.LCE.pdf>`__                                                  |
+| presentation.                                                         |
++-----------------------------------------------------------------------+
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| The first and second guarantees require unbelievably strict ordering! |
+| Are all these memory barriers *really* required?                      |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Yes, they really are required. To see why the first guarantee is      |
+| required, consider the following sequence of events:                  |
+|                                                                       |
+| #. CPU 1: ``rcu_read_lock()``                                         |
+| #. CPU 1: ``q = rcu_dereference(gp); /* Very likely to return p. */`` |
+| #. CPU 0: ``list_del_rcu(p);``                                        |
+| #. CPU 0: ``synchronize_rcu()`` starts.                               |
+| #. CPU 1: ``do_something_with(q->a);``                                |
+|    ``/* No smp_mb(), so might happen after kfree(). */``              |
+| #. CPU 1: ``rcu_read_unlock()``                                       |
+| #. CPU 0: ``synchronize_rcu()`` returns.                              |
+| #. CPU 0: ``kfree(p);``                                               |
+|                                                                       |
+| Therefore, there absolutely must be a full memory barrier between the |
+| end of the RCU read-side critical section and the end of the grace    |
+| period.                                                               |
+|                                                                       |
+| The sequence of events demonstrating the necessity of the second rule |
+| is roughly similar:                                                   |
+|                                                                       |
+| #. CPU 0: ``list_del_rcu(p);``                                        |
+| #. CPU 0: ``synchronize_rcu()`` starts.                               |
+| #. CPU 1: ``rcu_read_lock()``                                         |
+| #. CPU 1: ``q = rcu_dereference(gp);``                                |
+|    ``/* Might return p if no memory barrier. */``                     |
+| #. CPU 0: ``synchronize_rcu()`` returns.                              |
+| #. CPU 0: ``kfree(p);``                                               |
+| #. CPU 1: ``do_something_with(q->a); /* Boom!!! */``                  |
+| #. CPU 1: ``rcu_read_unlock()``                                       |
+|                                                                       |
+| And similarly, without a memory barrier between the beginning of the  |
+| grace period and the beginning of the RCU read-side critical section, |
+| CPU 1 might end up accessing the freelist.                            |
+|                                                                       |
+| The “as if” rule of course applies, so that any implementation that   |
+| acts as if the appropriate memory barriers were in place is a correct |
+| implementation. That said, it is much easier to fool yourself into    |
+| believing that you have adhered to the as-if rule than it is to       |
+| actually adhere to it!                                                |
++-----------------------------------------------------------------------+
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| You claim that ``rcu_read_lock()`` and ``rcu_read_unlock()`` generate |
+| absolutely no code in some kernel builds. This means that the         |
+| compiler might arbitrarily rearrange consecutive RCU read-side        |
+| critical sections. Given such rearrangement, if a given RCU read-side |
+| critical section is done, how can you be sure that all prior RCU      |
+| read-side critical sections are done? Won't the compiler              |
+| rearrangements make that impossible to determine?                     |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| In cases where ``rcu_read_lock()`` and ``rcu_read_unlock()`` generate |
+| absolutely no code, RCU infers quiescent states only at special       |
+| locations, for example, within the scheduler. Because calls to        |
+| ``schedule()`` had better prevent calling-code accesses to shared     |
+| variables from being rearranged across the call to ``schedule()``, if |
+| RCU detects the end of a given RCU read-side critical section, it     |
+| will necessarily detect the end of all prior RCU read-side critical   |
+| sections, no matter how aggressively the compiler scrambles the code. |
+| Again, this all assumes that the compiler cannot scramble code across |
+| calls to the scheduler, out of interrupt handlers, into the idle      |
+| loop, into user-mode code, and so on. But if your kernel build allows |
+| that sort of scrambling, you have broken far more than just RCU!      |
++-----------------------------------------------------------------------+
+
+Note that these memory-barrier requirements do not replace the
+fundamental RCU requirement that a grace period wait for all
+pre-existing readers. On the contrary, the memory barriers called out in
+this section must operate in such a way as to *enforce* this fundamental
+requirement. Of course, different implementations enforce this
+requirement in different ways, but enforce it they must.
+
+RCU Primitives Guaranteed to Execute Unconditionally
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The common-case RCU primitives are unconditional. They are invoked, they
+do their job, and they return, with no possibility of error, and no need
+to retry. This is a key RCU design philosophy.
+
+However, this philosophy is pragmatic rather than pigheaded. If someone
+comes up with a good justification for a particular conditional RCU
+primitive, it might well be implemented and added. After all, this
+guarantee was reverse-engineered, not premeditated. The unconditional
+nature of the RCU primitives was initially an accident of
+implementation, and later experience with synchronization primitives
+with conditional primitives caused me to elevate this accident to a
+guarantee. Therefore, the justification for adding a conditional
+primitive to RCU would need to be based on detailed and compelling use
+cases.
+
+Guaranteed Read-to-Write Upgrade
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As far as RCU is concerned, it is always possible to carry out an update
+within an RCU read-side critical section. For example, that RCU
+read-side critical section might search for a given data element, and
+then might acquire the update-side spinlock in order to update that
+element, all while remaining in that RCU read-side critical section. Of
+course, it is necessary to exit the RCU read-side critical section
+before invoking ``synchronize_rcu()``, however, this inconvenience can
+be avoided through use of the ``call_rcu()`` and ``kfree_rcu()`` API
+members described later in this document.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| But how does the upgrade-to-write operation exclude other readers?    |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| It doesn't, just like normal RCU updates, which also do not exclude   |
+| RCU readers.                                                          |
++-----------------------------------------------------------------------+
+
+This guarantee allows lookup code to be shared between read-side and
+update-side code, and was premeditated, appearing in the earliest
+DYNIX/ptx RCU documentation.
+
+Fundamental Non-Requirements
+----------------------------
+
+RCU provides extremely lightweight readers, and its read-side
+guarantees, though quite useful, are correspondingly lightweight. It is
+therefore all too easy to assume that RCU is guaranteeing more than it
+really is. Of course, the list of things that RCU does not guarantee is
+infinitely long, however, the following sections list a few
+non-guarantees that have caused confusion. Except where otherwise noted,
+these non-guarantees were premeditated.
+
+#. `Readers Impose Minimal Ordering`_
+#. `Readers Do Not Exclude Updaters`_
+#. `Updaters Only Wait For Old Readers`_
+#. `Grace Periods Don't Partition Read-Side Critical Sections`_
+#. `Read-Side Critical Sections Don't Partition Grace Periods`_
+
+Readers Impose Minimal Ordering
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Reader-side markers such as ``rcu_read_lock()`` and
+``rcu_read_unlock()`` provide absolutely no ordering guarantees except
+through their interaction with the grace-period APIs such as
+``synchronize_rcu()``. To see this, consider the following pair of
+threads:
+
+   ::
+
+       1 void thread0(void)
+       2 {
+       3   rcu_read_lock();
+       4   WRITE_ONCE(x, 1);
+       5   rcu_read_unlock();
+       6   rcu_read_lock();
+       7   WRITE_ONCE(y, 1);
+       8   rcu_read_unlock();
+       9 }
+      10
+      11 void thread1(void)
+      12 {
+      13   rcu_read_lock();
+      14   r1 = READ_ONCE(y);
+      15   rcu_read_unlock();
+      16   rcu_read_lock();
+      17   r2 = READ_ONCE(x);
+      18   rcu_read_unlock();
+      19 }
+
+After ``thread0()`` and ``thread1()`` execute concurrently, it is quite
+possible to have
+
+   ::
+
+      (r1 == 1 && r2 == 0)
+
+(that is, ``y`` appears to have been assigned before ``x``), which would
+not be possible if ``rcu_read_lock()`` and ``rcu_read_unlock()`` had
+much in the way of ordering properties. But they do not, so the CPU is
+within its rights to do significant reordering. This is by design: Any
+significant ordering constraints would slow down these fast-path APIs.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Can't the compiler also reorder this code?                            |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| No, the volatile casts in ``READ_ONCE()`` and ``WRITE_ONCE()``        |
+| prevent the compiler from reordering in this particular case.         |
++-----------------------------------------------------------------------+
+
+Readers Do Not Exclude Updaters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Neither ``rcu_read_lock()`` nor ``rcu_read_unlock()`` exclude updates.
+All they do is to prevent grace periods from ending. The following
+example illustrates this:
+
+   ::
+
+       1 void thread0(void)
+       2 {
+       3   rcu_read_lock();
+       4   r1 = READ_ONCE(y);
+       5   if (r1) {
+       6     do_something_with_nonzero_x();
+       7     r2 = READ_ONCE(x);
+       8     WARN_ON(!r2); /* BUG!!! */
+       9   }
+      10   rcu_read_unlock();
+      11 }
+      12
+      13 void thread1(void)
+      14 {
+      15   spin_lock(&my_lock);
+      16   WRITE_ONCE(x, 1);
+      17   WRITE_ONCE(y, 1);
+      18   spin_unlock(&my_lock);
+      19 }
+
+If the ``thread0()`` function's ``rcu_read_lock()`` excluded the
+``thread1()`` function's update, the ``WARN_ON()`` could never fire. But
+the fact is that ``rcu_read_lock()`` does not exclude much of anything
+aside from subsequent grace periods, of which ``thread1()`` has none, so
+the ``WARN_ON()`` can and does fire.
+
+Updaters Only Wait For Old Readers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It might be tempting to assume that after ``synchronize_rcu()``
+completes, there are no readers executing. This temptation must be
+avoided because new readers can start immediately after
+``synchronize_rcu()`` starts, and ``synchronize_rcu()`` is under no
+obligation to wait for these new readers.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Suppose that synchronize_rcu() did wait until *all* readers had       |
+| completed instead of waiting only on pre-existing readers. For how    |
+| long would the updater be able to rely on there being no readers?     |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| For no time at all. Even if ``synchronize_rcu()`` were to wait until  |
+| all readers had completed, a new reader might start immediately after |
+| ``synchronize_rcu()`` completed. Therefore, the code following        |
+| ``synchronize_rcu()`` can *never* rely on there being no readers.     |
++-----------------------------------------------------------------------+
+
+Grace Periods Don't Partition Read-Side Critical Sections
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is tempting to assume that if any part of one RCU read-side critical
+section precedes a given grace period, and if any part of another RCU
+read-side critical section follows that same grace period, then all of
+the first RCU read-side critical section must precede all of the second.
+However, this just isn't the case: A single grace period does not
+partition the set of RCU read-side critical sections. An example of this
+situation can be illustrated as follows, where ``x``, ``y``, and ``z``
+are initially all zero:
+
+   ::
+
+       1 void thread0(void)
+       2 {
+       3   rcu_read_lock();
+       4   WRITE_ONCE(a, 1);
+       5   WRITE_ONCE(b, 1);
+       6   rcu_read_unlock();
+       7 }
+       8
+       9 void thread1(void)
+      10 {
+      11   r1 = READ_ONCE(a);
+      12   synchronize_rcu();
+      13   WRITE_ONCE(c, 1);
+      14 }
+      15
+      16 void thread2(void)
+      17 {
+      18   rcu_read_lock();
+      19   r2 = READ_ONCE(b);
+      20   r3 = READ_ONCE(c);
+      21   rcu_read_unlock();
+      22 }
+
+It turns out that the outcome:
+
+   ::
+
+      (r1 == 1 && r2 == 0 && r3 == 1)
+
+is entirely possible. The following figure show how this can happen,
+with each circled ``QS`` indicating the point at which RCU recorded a
+*quiescent state* for each thread, that is, a state in which RCU knows
+that the thread cannot be in the midst of an RCU read-side critical
+section that started before the current grace period:
+
+.. kernel-figure:: GPpartitionReaders1.svg
+
+If it is necessary to partition RCU read-side critical sections in this
+manner, it is necessary to use two grace periods, where the first grace
+period is known to end before the second grace period starts:
+
+   ::
+
+       1 void thread0(void)
+       2 {
+       3   rcu_read_lock();
+       4   WRITE_ONCE(a, 1);
+       5   WRITE_ONCE(b, 1);
+       6   rcu_read_unlock();
+       7 }
+       8
+       9 void thread1(void)
+      10 {
+      11   r1 = READ_ONCE(a);
+      12   synchronize_rcu();
+      13   WRITE_ONCE(c, 1);
+      14 }
+      15
+      16 void thread2(void)
+      17 {
+      18   r2 = READ_ONCE(c);
+      19   synchronize_rcu();
+      20   WRITE_ONCE(d, 1);
+      21 }
+      22
+      23 void thread3(void)
+      24 {
+      25   rcu_read_lock();
+      26   r3 = READ_ONCE(b);
+      27   r4 = READ_ONCE(d);
+      28   rcu_read_unlock();
+      29 }
+
+Here, if ``(r1 == 1)``, then ``thread0()``'s write to ``b`` must happen
+before the end of ``thread1()``'s grace period. If in addition
+``(r4 == 1)``, then ``thread3()``'s read from ``b`` must happen after
+the beginning of ``thread2()``'s grace period. If it is also the case
+that ``(r2 == 1)``, then the end of ``thread1()``'s grace period must
+precede the beginning of ``thread2()``'s grace period. This mean that
+the two RCU read-side critical sections cannot overlap, guaranteeing
+that ``(r3 == 1)``. As a result, the outcome:
+
+   ::
+
+      (r1 == 1 && r2 == 1 && r3 == 0 && r4 == 1)
+
+cannot happen.
+
+This non-requirement was also non-premeditated, but became apparent when
+studying RCU's interaction with memory ordering.
+
+Read-Side Critical Sections Don't Partition Grace Periods
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is also tempting to assume that if an RCU read-side critical section
+happens between a pair of grace periods, then those grace periods cannot
+overlap. However, this temptation leads nowhere good, as can be
+illustrated by the following, with all variables initially zero:
+
+   ::
+
+       1 void thread0(void)
+       2 {
+       3   rcu_read_lock();
+       4   WRITE_ONCE(a, 1);
+       5   WRITE_ONCE(b, 1);
+       6   rcu_read_unlock();
+       7 }
+       8
+       9 void thread1(void)
+      10 {
+      11   r1 = READ_ONCE(a);
+      12   synchronize_rcu();
+      13   WRITE_ONCE(c, 1);
+      14 }
+      15
+      16 void thread2(void)
+      17 {
+      18   rcu_read_lock();
+      19   WRITE_ONCE(d, 1);
+      20   r2 = READ_ONCE(c);
+      21   rcu_read_unlock();
+      22 }
+      23
+      24 void thread3(void)
+      25 {
+      26   r3 = READ_ONCE(d);
+      27   synchronize_rcu();
+      28   WRITE_ONCE(e, 1);
+      29 }
+      30
+      31 void thread4(void)
+      32 {
+      33   rcu_read_lock();
+      34   r4 = READ_ONCE(b);
+      35   r5 = READ_ONCE(e);
+      36   rcu_read_unlock();
+      37 }
+
+In this case, the outcome:
+
+   ::
+
+      (r1 == 1 && r2 == 1 && r3 == 1 && r4 == 0 && r5 == 1)
+
+is entirely possible, as illustrated below:
+
+.. kernel-figure:: ReadersPartitionGP1.svg
+
+Again, an RCU read-side critical section can overlap almost all of a
+given grace period, just so long as it does not overlap the entire grace
+period. As a result, an RCU read-side critical section cannot partition
+a pair of RCU grace periods.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| How long a sequence of grace periods, each separated by an RCU        |
+| read-side critical section, would be required to partition the RCU    |
+| read-side critical sections at the beginning and end of the chain?    |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| In theory, an infinite number. In practice, an unknown number that is |
+| sensitive to both implementation details and timing considerations.   |
+| Therefore, even in practice, RCU users must abide by the theoretical  |
+| rather than the practical answer.                                     |
++-----------------------------------------------------------------------+
+
+Parallelism Facts of Life
+-------------------------
+
+These parallelism facts of life are by no means specific to RCU, but the
+RCU implementation must abide by them. They therefore bear repeating:
+
+#. Any CPU or task may be delayed at any time, and any attempts to avoid
+   these delays by disabling preemption, interrupts, or whatever are
+   completely futile. This is most obvious in preemptible user-level
+   environments and in virtualized environments (where a given guest
+   OS's VCPUs can be preempted at any time by the underlying
+   hypervisor), but can also happen in bare-metal environments due to
+   ECC errors, NMIs, and other hardware events. Although a delay of more
+   than about 20 seconds can result in splats, the RCU implementation is
+   obligated to use algorithms that can tolerate extremely long delays,
+   but where “extremely long” is not long enough to allow wrap-around
+   when incrementing a 64-bit counter.
+#. Both the compiler and the CPU can reorder memory accesses. Where it
+   matters, RCU must use compiler directives and memory-barrier
+   instructions to preserve ordering.
+#. Conflicting writes to memory locations in any given cache line will
+   result in expensive cache misses. Greater numbers of concurrent
+   writes and more-frequent concurrent writes will result in more
+   dramatic slowdowns. RCU is therefore obligated to use algorithms that
+   have sufficient locality to avoid significant performance and
+   scalability problems.
+#. As a rough rule of thumb, only one CPU's worth of processing may be
+   carried out under the protection of any given exclusive lock. RCU
+   must therefore use scalable locking designs.
+#. Counters are finite, especially on 32-bit systems. RCU's use of
+   counters must therefore tolerate counter wrap, or be designed such
+   that counter wrap would take way more time than a single system is
+   likely to run. An uptime of ten years is quite possible, a runtime of
+   a century much less so. As an example of the latter, RCU's
+   dyntick-idle nesting counter allows 54 bits for interrupt nesting
+   level (this counter is 64 bits even on a 32-bit system). Overflowing
+   this counter requires 2\ :sup:`54` half-interrupts on a given CPU
+   without that CPU ever going idle. If a half-interrupt happened every
+   microsecond, it would take 570 years of runtime to overflow this
+   counter, which is currently believed to be an acceptably long time.
+#. Linux systems can have thousands of CPUs running a single Linux
+   kernel in a single shared-memory environment. RCU must therefore pay
+   close attention to high-end scalability.
+
+This last parallelism fact of life means that RCU must pay special
+attention to the preceding facts of life. The idea that Linux might
+scale to systems with thousands of CPUs would have been met with some
+skepticism in the 1990s, but these requirements would have otherwise
+have been unsurprising, even in the early 1990s.
+
+Quality-of-Implementation Requirements
+--------------------------------------
+
+These sections list quality-of-implementation requirements. Although an
+RCU implementation that ignores these requirements could still be used,
+it would likely be subject to limitations that would make it
+inappropriate for industrial-strength production use. Classes of
+quality-of-implementation requirements are as follows:
+
+#. `Specialization`_
+#. `Performance and Scalability`_
+#. `Forward Progress`_
+#. `Composability`_
+#. `Corner Cases`_
+
+These classes is covered in the following sections.
+
+Specialization
+~~~~~~~~~~~~~~
+
+RCU is and always has been intended primarily for read-mostly
+situations, which means that RCU's read-side primitives are optimized,
+often at the expense of its update-side primitives. Experience thus far
+is captured by the following list of situations:
+
+#. Read-mostly data, where stale and inconsistent data is not a problem:
+   RCU works great!
+#. Read-mostly data, where data must be consistent: RCU works well.
+#. Read-write data, where data must be consistent: RCU *might* work OK.
+   Or not.
+#. Write-mostly data, where data must be consistent: RCU is very
+   unlikely to be the right tool for the job, with the following
+   exceptions, where RCU can provide:
+
+   a. Existence guarantees for update-friendly mechanisms.
+   b. Wait-free read-side primitives for real-time use.
+
+This focus on read-mostly situations means that RCU must interoperate
+with other synchronization primitives. For example, the ``add_gp()`` and
+``remove_gp_synchronous()`` examples discussed earlier use RCU to
+protect readers and locking to coordinate updaters. However, the need
+extends much farther, requiring that a variety of synchronization
+primitives be legal within RCU read-side critical sections, including
+spinlocks, sequence locks, atomic operations, reference counters, and
+memory barriers.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| What about sleeping locks?                                            |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| These are forbidden within Linux-kernel RCU read-side critical        |
+| sections because it is not legal to place a quiescent state (in this  |
+| case, voluntary context switch) within an RCU read-side critical      |
+| section. However, sleeping locks may be used within userspace RCU     |
+| read-side critical sections, and also within Linux-kernel sleepable   |
+| RCU `(SRCU) <#Sleepable%20RCU>`__ read-side critical sections. In     |
+| addition, the -rt patchset turns spinlocks into a sleeping locks so   |
+| that the corresponding critical sections can be preempted, which also |
+| means that these sleeplockified spinlocks (but not other sleeping     |
+| locks!) may be acquire within -rt-Linux-kernel RCU read-side critical |
+| sections.                                                             |
+| Note that it *is* legal for a normal RCU read-side critical section   |
+| to conditionally acquire a sleeping locks (as in                      |
+| ``mutex_trylock()``), but only as long as it does not loop            |
+| indefinitely attempting to conditionally acquire that sleeping locks. |
+| The key point is that things like ``mutex_trylock()`` either return   |
+| with the mutex held, or return an error indication if the mutex was   |
+| not immediately available. Either way, ``mutex_trylock()`` returns    |
+| immediately without sleeping.                                         |
++-----------------------------------------------------------------------+
+
+It often comes as a surprise that many algorithms do not require a
+consistent view of data, but many can function in that mode, with
+network routing being the poster child. Internet routing algorithms take
+significant time to propagate updates, so that by the time an update
+arrives at a given system, that system has been sending network traffic
+the wrong way for a considerable length of time. Having a few threads
+continue to send traffic the wrong way for a few more milliseconds is
+clearly not a problem: In the worst case, TCP retransmissions will
+eventually get the data where it needs to go. In general, when tracking
+the state of the universe outside of the computer, some level of
+inconsistency must be tolerated due to speed-of-light delays if nothing
+else.
+
+Furthermore, uncertainty about external state is inherent in many cases.
+For example, a pair of veterinarians might use heartbeat to determine
+whether or not a given cat was alive. But how long should they wait
+after the last heartbeat to decide that the cat is in fact dead? Waiting
+less than 400 milliseconds makes no sense because this would mean that a
+relaxed cat would be considered to cycle between death and life more
+than 100 times per minute. Moreover, just as with human beings, a cat's
+heart might stop for some period of time, so the exact wait period is a
+judgment call. One of our pair of veterinarians might wait 30 seconds
+before pronouncing the cat dead, while the other might insist on waiting
+a full minute. The two veterinarians would then disagree on the state of
+the cat during the final 30 seconds of the minute following the last
+heartbeat.
+
+Interestingly enough, this same situation applies to hardware. When push
+comes to shove, how do we tell whether or not some external server has
+failed? We send messages to it periodically, and declare it failed if we
+don't receive a response within a given period of time. Policy decisions
+can usually tolerate short periods of inconsistency. The policy was
+decided some time ago, and is only now being put into effect, so a few
+milliseconds of delay is normally inconsequential.
+
+However, there are algorithms that absolutely must see consistent data.
+For example, the translation between a user-level SystemV semaphore ID
+to the corresponding in-kernel data structure is protected by RCU, but
+it is absolutely forbidden to update a semaphore that has just been
+removed. In the Linux kernel, this need for consistency is accommodated
+by acquiring spinlocks located in the in-kernel data structure from
+within the RCU read-side critical section, and this is indicated by the
+green box in the figure above. Many other techniques may be used, and
+are in fact used within the Linux kernel.
+
+In short, RCU is not required to maintain consistency, and other
+mechanisms may be used in concert with RCU when consistency is required.
+RCU's specialization allows it to do its job extremely well, and its
+ability to interoperate with other synchronization mechanisms allows the
+right mix of synchronization tools to be used for a given job.
+
+Performance and Scalability
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Energy efficiency is a critical component of performance today, and
+Linux-kernel RCU implementations must therefore avoid unnecessarily
+awakening idle CPUs. I cannot claim that this requirement was
+premeditated. In fact, I learned of it during a telephone conversation
+in which I was given “frank and open” feedback on the importance of
+energy efficiency in battery-powered systems and on specific
+energy-efficiency shortcomings of the Linux-kernel RCU implementation.
+In my experience, the battery-powered embedded community will consider
+any unnecessary wakeups to be extremely unfriendly acts. So much so that
+mere Linux-kernel-mailing-list posts are insufficient to vent their ire.
+
+Memory consumption is not particularly important for in most situations,
+and has become decreasingly so as memory sizes have expanded and memory
+costs have plummeted. However, as I learned from Matt Mackall's
+`bloatwatch <http://elinux.org/Linux_Tiny-FAQ>`__ efforts, memory
+footprint is critically important on single-CPU systems with
+non-preemptible (``CONFIG_PREEMPT=n``) kernels, and thus `tiny
+RCU <https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com>`__
+was born. Josh Triplett has since taken over the small-memory banner
+with his `Linux kernel tinification <https://tiny.wiki.kernel.org/>`__
+project, which resulted in `SRCU <#Sleepable%20RCU>`__ becoming optional
+for those kernels not needing it.
+
+The remaining performance requirements are, for the most part,
+unsurprising. For example, in keeping with RCU's read-side
+specialization, ``rcu_dereference()`` should have negligible overhead
+(for example, suppression of a few minor compiler optimizations).
+Similarly, in non-preemptible environments, ``rcu_read_lock()`` and
+``rcu_read_unlock()`` should have exactly zero overhead.
+
+In preemptible environments, in the case where the RCU read-side
+critical section was not preempted (as will be the case for the
+highest-priority real-time process), ``rcu_read_lock()`` and
+``rcu_read_unlock()`` should have minimal overhead. In particular, they
+should not contain atomic read-modify-write operations, memory-barrier
+instructions, preemption disabling, interrupt disabling, or backwards
+branches. However, in the case where the RCU read-side critical section
+was preempted, ``rcu_read_unlock()`` may acquire spinlocks and disable
+interrupts. This is why it is better to nest an RCU read-side critical
+section within a preempt-disable region than vice versa, at least in
+cases where that critical section is short enough to avoid unduly
+degrading real-time latencies.
+
+The ``synchronize_rcu()`` grace-period-wait primitive is optimized for
+throughput. It may therefore incur several milliseconds of latency in
+addition to the duration of the longest RCU read-side critical section.
+On the other hand, multiple concurrent invocations of
+``synchronize_rcu()`` are required to use batching optimizations so that
+they can be satisfied by a single underlying grace-period-wait
+operation. For example, in the Linux kernel, it is not unusual for a
+single grace-period-wait operation to serve more than `1,000 separate
+invocations <https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response>`__
+of ``synchronize_rcu()``, thus amortizing the per-invocation overhead
+down to nearly zero. However, the grace-period optimization is also
+required to avoid measurable degradation of real-time scheduling and
+interrupt latencies.
+
+In some cases, the multi-millisecond ``synchronize_rcu()`` latencies are
+unacceptable. In these cases, ``synchronize_rcu_expedited()`` may be
+used instead, reducing the grace-period latency down to a few tens of
+microseconds on small systems, at least in cases where the RCU read-side
+critical sections are short. There are currently no special latency
+requirements for ``synchronize_rcu_expedited()`` on large systems, but,
+consistent with the empirical nature of the RCU specification, that is
+subject to change. However, there most definitely are scalability
+requirements: A storm of ``synchronize_rcu_expedited()`` invocations on
+4096 CPUs should at least make reasonable forward progress. In return
+for its shorter latencies, ``synchronize_rcu_expedited()`` is permitted
+to impose modest degradation of real-time latency on non-idle online
+CPUs. Here, “modest” means roughly the same latency degradation as a
+scheduling-clock interrupt.
+
+There are a number of situations where even
+``synchronize_rcu_expedited()``'s reduced grace-period latency is
+unacceptable. In these situations, the asynchronous ``call_rcu()`` can
+be used in place of ``synchronize_rcu()`` as follows:
+
+   ::
+
+       1 struct foo {
+       2   int a;
+       3   int b;
+       4   struct rcu_head rh;
+       5 };
+       6
+       7 static void remove_gp_cb(struct rcu_head *rhp)
+       8 {
+       9   struct foo *p = container_of(rhp, struct foo, rh);
+      10
+      11   kfree(p);
+      12 }
+      13
+      14 bool remove_gp_asynchronous(void)
+      15 {
+      16   struct foo *p;
+      17
+      18   spin_lock(&gp_lock);
+      19   p = rcu_access_pointer(gp);
+      20   if (!p) {
+      21     spin_unlock(&gp_lock);
+      22     return false;
+      23   }
+      24   rcu_assign_pointer(gp, NULL);
+      25   call_rcu(&p->rh, remove_gp_cb);
+      26   spin_unlock(&gp_lock);
+      27   return true;
+      28 }
+
+A definition of ``struct foo`` is finally needed, and appears on
+lines 1-5. The function ``remove_gp_cb()`` is passed to ``call_rcu()``
+on line 25, and will be invoked after the end of a subsequent grace
+period. This gets the same effect as ``remove_gp_synchronous()``, but
+without forcing the updater to wait for a grace period to elapse. The
+``call_rcu()`` function may be used in a number of situations where
+neither ``synchronize_rcu()`` nor ``synchronize_rcu_expedited()`` would
+be legal, including within preempt-disable code, ``local_bh_disable()``
+code, interrupt-disable code, and interrupt handlers. However, even
+``call_rcu()`` is illegal within NMI handlers and from idle and offline
+CPUs. The callback function (``remove_gp_cb()`` in this case) will be
+executed within softirq (software interrupt) environment within the
+Linux kernel, either within a real softirq handler or under the
+protection of ``local_bh_disable()``. In both the Linux kernel and in
+userspace, it is bad practice to write an RCU callback function that
+takes too long. Long-running operations should be relegated to separate
+threads or (in the Linux kernel) workqueues.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Why does line 19 use ``rcu_access_pointer()``? After all,             |
+| ``call_rcu()`` on line 25 stores into the structure, which would      |
+| interact badly with concurrent insertions. Doesn't this mean that     |
+| ``rcu_dereference()`` is required?                                    |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Presumably the ``->gp_lock`` acquired on line 18 excludes any         |
+| changes, including any insertions that ``rcu_dereference()`` would    |
+| protect against. Therefore, any insertions will be delayed until      |
+| after ``->gp_lock`` is released on line 25, which in turn means that  |
+| ``rcu_access_pointer()`` suffices.                                    |
++-----------------------------------------------------------------------+
+
+However, all that ``remove_gp_cb()`` is doing is invoking ``kfree()`` on
+the data element. This is a common idiom, and is supported by
+``kfree_rcu()``, which allows “fire and forget” operation as shown
+below:
+
+   ::
+
+       1 struct foo {
+       2   int a;
+       3   int b;
+       4   struct rcu_head rh;
+       5 };
+       6
+       7 bool remove_gp_faf(void)
+       8 {
+       9   struct foo *p;
+      10
+      11   spin_lock(&gp_lock);
+      12   p = rcu_dereference(gp);
+      13   if (!p) {
+      14     spin_unlock(&gp_lock);
+      15     return false;
+      16   }
+      17   rcu_assign_pointer(gp, NULL);
+      18   kfree_rcu(p, rh);
+      19   spin_unlock(&gp_lock);
+      20   return true;
+      21 }
+
+Note that ``remove_gp_faf()`` simply invokes ``kfree_rcu()`` and
+proceeds, without any need to pay any further attention to the
+subsequent grace period and ``kfree()``. It is permissible to invoke
+``kfree_rcu()`` from the same environments as for ``call_rcu()``.
+Interestingly enough, DYNIX/ptx had the equivalents of ``call_rcu()``
+and ``kfree_rcu()``, but not ``synchronize_rcu()``. This was due to the
+fact that RCU was not heavily used within DYNIX/ptx, so the very few
+places that needed something like ``synchronize_rcu()`` simply
+open-coded it.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Earlier it was claimed that ``call_rcu()`` and ``kfree_rcu()``        |
+| allowed updaters to avoid being blocked by readers. But how can that  |
+| be correct, given that the invocation of the callback and the freeing |
+| of the memory (respectively) must still wait for a grace period to    |
+| elapse?                                                               |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| We could define things this way, but keep in mind that this sort of   |
+| definition would say that updates in garbage-collected languages      |
+| cannot complete until the next time the garbage collector runs, which |
+| does not seem at all reasonable. The key point is that in most cases, |
+| an updater using either ``call_rcu()`` or ``kfree_rcu()`` can proceed |
+| to the next update as soon as it has invoked ``call_rcu()`` or        |
+| ``kfree_rcu()``, without having to wait for a subsequent grace        |
+| period.                                                               |
++-----------------------------------------------------------------------+
+
+But what if the updater must wait for the completion of code to be
+executed after the end of the grace period, but has other tasks that can
+be carried out in the meantime? The polling-style
+``get_state_synchronize_rcu()`` and ``cond_synchronize_rcu()`` functions
+may be used for this purpose, as shown below:
+
+   ::
+
+       1 bool remove_gp_poll(void)
+       2 {
+       3   struct foo *p;
+       4   unsigned long s;
+       5
+       6   spin_lock(&gp_lock);
+       7   p = rcu_access_pointer(gp);
+       8   if (!p) {
+       9     spin_unlock(&gp_lock);
+      10     return false;
+      11   }
+      12   rcu_assign_pointer(gp, NULL);
+      13   spin_unlock(&gp_lock);
+      14   s = get_state_synchronize_rcu();
+      15   do_something_while_waiting();
+      16   cond_synchronize_rcu(s);
+      17   kfree(p);
+      18   return true;
+      19 }
+
+On line 14, ``get_state_synchronize_rcu()`` obtains a “cookie” from RCU,
+then line 15 carries out other tasks, and finally, line 16 returns
+immediately if a grace period has elapsed in the meantime, but otherwise
+waits as required. The need for ``get_state_synchronize_rcu`` and
+``cond_synchronize_rcu()`` has appeared quite recently, so it is too
+early to tell whether they will stand the test of time.
+
+RCU thus provides a range of tools to allow updaters to strike the
+required tradeoff between latency, flexibility and CPU overhead.
+
+Forward Progress
+~~~~~~~~~~~~~~~~
+
+In theory, delaying grace-period completion and callback invocation is
+harmless. In practice, not only are memory sizes finite but also
+callbacks sometimes do wakeups, and sufficiently deferred wakeups can be
+difficult to distinguish from system hangs. Therefore, RCU must provide
+a number of mechanisms to promote forward progress.
+
+These mechanisms are not foolproof, nor can they be. For one simple
+example, an infinite loop in an RCU read-side critical section must by
+definition prevent later grace periods from ever completing. For a more
+involved example, consider a 64-CPU system built with
+``CONFIG_RCU_NOCB_CPU=y`` and booted with ``rcu_nocbs=1-63``, where
+CPUs 1 through 63 spin in tight loops that invoke ``call_rcu()``. Even
+if these tight loops also contain calls to ``cond_resched()`` (thus
+allowing grace periods to complete), CPU 0 simply will not be able to
+invoke callbacks as fast as the other 63 CPUs can register them, at
+least not until the system runs out of memory. In both of these
+examples, the Spiderman principle applies: With great power comes great
+responsibility. However, short of this level of abuse, RCU is required
+to ensure timely completion of grace periods and timely invocation of
+callbacks.
+
+RCU takes the following steps to encourage timely completion of grace
+periods:
+
+#. If a grace period fails to complete within 100 milliseconds, RCU
+   causes future invocations of ``cond_resched()`` on the holdout CPUs
+   to provide an RCU quiescent state. RCU also causes those CPUs'
+   ``need_resched()`` invocations to return ``true``, but only after the
+   corresponding CPU's next scheduling-clock.
+#. CPUs mentioned in the ``nohz_full`` kernel boot parameter can run
+   indefinitely in the kernel without scheduling-clock interrupts, which
+   defeats the above ``need_resched()`` strategem. RCU will therefore
+   invoke ``resched_cpu()`` on any ``nohz_full`` CPUs still holding out
+   after 109 milliseconds.
+#. In kernels built with ``CONFIG_RCU_BOOST=y``, if a given task that
+   has been preempted within an RCU read-side critical section is
+   holding out for more than 500 milliseconds, RCU will resort to
+   priority boosting.
+#. If a CPU is still holding out 10 seconds into the grace period, RCU
+   will invoke ``resched_cpu()`` on it regardless of its ``nohz_full``
+   state.
+
+The above values are defaults for systems running with ``HZ=1000``. They
+will vary as the value of ``HZ`` varies, and can also be changed using
+the relevant Kconfig options and kernel boot parameters. RCU currently
+does not do much sanity checking of these parameters, so please use
+caution when changing them. Note that these forward-progress measures
+are provided only for RCU, not for `SRCU <#Sleepable%20RCU>`__ or `Tasks
+RCU <#Tasks%20RCU>`__.
+
+RCU takes the following steps in ``call_rcu()`` to encourage timely
+invocation of callbacks when any given non-\ ``rcu_nocbs`` CPU has
+10,000 callbacks, or has 10,000 more callbacks than it had the last time
+encouragement was provided:
+
+#. Starts a grace period, if one is not already in progress.
+#. Forces immediate checking for quiescent states, rather than waiting
+   for three milliseconds to have elapsed since the beginning of the
+   grace period.
+#. Immediately tags the CPU's callbacks with their grace period
+   completion numbers, rather than waiting for the ``RCU_SOFTIRQ``
+   handler to get around to it.
+#. Lifts callback-execution batch limits, which speeds up callback
+   invocation at the expense of degrading realtime response.
+
+Again, these are default values when running at ``HZ=1000``, and can be
+overridden. Again, these forward-progress measures are provided only for
+RCU, not for `SRCU <#Sleepable%20RCU>`__ or `Tasks
+RCU <#Tasks%20RCU>`__. Even for RCU, callback-invocation forward
+progress for ``rcu_nocbs`` CPUs is much less well-developed, in part
+because workloads benefiting from ``rcu_nocbs`` CPUs tend to invoke
+``call_rcu()`` relatively infrequently. If workloads emerge that need
+both ``rcu_nocbs`` CPUs and high ``call_rcu()`` invocation rates, then
+additional forward-progress work will be required.
+
+Composability
+~~~~~~~~~~~~~
+
+Composability has received much attention in recent years, perhaps in
+part due to the collision of multicore hardware with object-oriented
+techniques designed in single-threaded environments for single-threaded
+use. And in theory, RCU read-side critical sections may be composed, and
+in fact may be nested arbitrarily deeply. In practice, as with all
+real-world implementations of composable constructs, there are
+limitations.
+
+Implementations of RCU for which ``rcu_read_lock()`` and
+``rcu_read_unlock()`` generate no code, such as Linux-kernel RCU when
+``CONFIG_PREEMPT=n``, can be nested arbitrarily deeply. After all, there
+is no overhead. Except that if all these instances of
+``rcu_read_lock()`` and ``rcu_read_unlock()`` are visible to the
+compiler, compilation will eventually fail due to exhausting memory,
+mass storage, or user patience, whichever comes first. If the nesting is
+not visible to the compiler, as is the case with mutually recursive
+functions each in its own translation unit, stack overflow will result.
+If the nesting takes the form of loops, perhaps in the guise of tail
+recursion, either the control variable will overflow or (in the Linux
+kernel) you will get an RCU CPU stall warning. Nevertheless, this class
+of RCU implementations is one of the most composable constructs in
+existence.
+
+RCU implementations that explicitly track nesting depth are limited by
+the nesting-depth counter. For example, the Linux kernel's preemptible
+RCU limits nesting to ``INT_MAX``. This should suffice for almost all
+practical purposes. That said, a consecutive pair of RCU read-side
+critical sections between which there is an operation that waits for a
+grace period cannot be enclosed in another RCU read-side critical
+section. This is because it is not legal to wait for a grace period
+within an RCU read-side critical section: To do so would result either
+in deadlock or in RCU implicitly splitting the enclosing RCU read-side
+critical section, neither of which is conducive to a long-lived and
+prosperous kernel.
+
+It is worth noting that RCU is not alone in limiting composability. For
+example, many transactional-memory implementations prohibit composing a
+pair of transactions separated by an irrevocable operation (for example,
+a network receive operation). For another example, lock-based critical
+sections can be composed surprisingly freely, but only if deadlock is
+avoided.
+
+In short, although RCU read-side critical sections are highly
+composable, care is required in some situations, just as is the case for
+any other composable synchronization mechanism.
+
+Corner Cases
+~~~~~~~~~~~~
+
+A given RCU workload might have an endless and intense stream of RCU
+read-side critical sections, perhaps even so intense that there was
+never a point in time during which there was not at least one RCU
+read-side critical section in flight. RCU cannot allow this situation to
+block grace periods: As long as all the RCU read-side critical sections
+are finite, grace periods must also be finite.
+
+That said, preemptible RCU implementations could potentially result in
+RCU read-side critical sections being preempted for long durations,
+which has the effect of creating a long-duration RCU read-side critical
+section. This situation can arise only in heavily loaded systems, but
+systems using real-time priorities are of course more vulnerable.
+Therefore, RCU priority boosting is provided to help deal with this
+case. That said, the exact requirements on RCU priority boosting will
+likely evolve as more experience accumulates.
+
+Other workloads might have very high update rates. Although one can
+argue that such workloads should instead use something other than RCU,
+the fact remains that RCU must handle such workloads gracefully. This
+requirement is another factor driving batching of grace periods, but it
+is also the driving force behind the checks for large numbers of queued
+RCU callbacks in the ``call_rcu()`` code path. Finally, high update
+rates should not delay RCU read-side critical sections, although some
+small read-side delays can occur when using
+``synchronize_rcu_expedited()``, courtesy of this function's use of
+``smp_call_function_single()``.
+
+Although all three of these corner cases were understood in the early
+1990s, a simple user-level test consisting of ``close(open(path))`` in a
+tight loop in the early 2000s suddenly provided a much deeper
+appreciation of the high-update-rate corner case. This test also
+motivated addition of some RCU code to react to high update rates, for
+example, if a given CPU finds itself with more than 10,000 RCU callbacks
+queued, it will cause RCU to take evasive action by more aggressively
+starting grace periods and more aggressively forcing completion of
+grace-period processing. This evasive action causes the grace period to
+complete more quickly, but at the cost of restricting RCU's batching
+optimizations, thus increasing the CPU overhead incurred by that grace
+period.
+
+Software-Engineering Requirements
+---------------------------------
+
+Between Murphy's Law and “To err is human”, it is necessary to guard
+against mishaps and misuse:
+
+#. It is all too easy to forget to use ``rcu_read_lock()`` everywhere
+   that it is needed, so kernels built with ``CONFIG_PROVE_RCU=y`` will
+   splat if ``rcu_dereference()`` is used outside of an RCU read-side
+   critical section. Update-side code can use
+   ``rcu_dereference_protected()``, which takes a `lockdep
+   expression <https://lwn.net/Articles/371986/>`__ to indicate what is
+   providing the protection. If the indicated protection is not
+   provided, a lockdep splat is emitted.
+   Code shared between readers and updaters can use
+   ``rcu_dereference_check()``, which also takes a lockdep expression,
+   and emits a lockdep splat if neither ``rcu_read_lock()`` nor the
+   indicated protection is in place. In addition,
+   ``rcu_dereference_raw()`` is used in those (hopefully rare) cases
+   where the required protection cannot be easily described. Finally,
+   ``rcu_read_lock_held()`` is provided to allow a function to verify
+   that it has been invoked within an RCU read-side critical section. I
+   was made aware of this set of requirements shortly after Thomas
+   Gleixner audited a number of RCU uses.
+#. A given function might wish to check for RCU-related preconditions
+   upon entry, before using any other RCU API. The
+   ``rcu_lockdep_assert()`` does this job, asserting the expression in
+   kernels having lockdep enabled and doing nothing otherwise.
+#. It is also easy to forget to use ``rcu_assign_pointer()`` and
+   ``rcu_dereference()``, perhaps (incorrectly) substituting a simple
+   assignment. To catch this sort of error, a given RCU-protected
+   pointer may be tagged with ``__rcu``, after which sparse will
+   complain about simple-assignment accesses to that pointer. Arnd
+   Bergmann made me aware of this requirement, and also supplied the
+   needed `patch series <https://lwn.net/Articles/376011/>`__.
+#. Kernels built with ``CONFIG_DEBUG_OBJECTS_RCU_HEAD=y`` will splat if
+   a data element is passed to ``call_rcu()`` twice in a row, without a
+   grace period in between. (This error is similar to a double free.)
+   The corresponding ``rcu_head`` structures that are dynamically
+   allocated are automatically tracked, but ``rcu_head`` structures
+   allocated on the stack must be initialized with
+   ``init_rcu_head_on_stack()`` and cleaned up with
+   ``destroy_rcu_head_on_stack()``. Similarly, statically allocated
+   non-stack ``rcu_head`` structures must be initialized with
+   ``init_rcu_head()`` and cleaned up with ``destroy_rcu_head()``.
+   Mathieu Desnoyers made me aware of this requirement, and also
+   supplied the needed
+   `patch <https://lkml.kernel.org/g/20100319013024.GA28456@Krystal>`__.
+#. An infinite loop in an RCU read-side critical section will eventually
+   trigger an RCU CPU stall warning splat, with the duration of
+   “eventually” being controlled by the ``RCU_CPU_STALL_TIMEOUT``
+   ``Kconfig`` option, or, alternatively, by the
+   ``rcupdate.rcu_cpu_stall_timeout`` boot/sysfs parameter. However, RCU
+   is not obligated to produce this splat unless there is a grace period
+   waiting on that particular RCU read-side critical section.
+
+   Some extreme workloads might intentionally delay RCU grace periods,
+   and systems running those workloads can be booted with
+   ``rcupdate.rcu_cpu_stall_suppress`` to suppress the splats. This
+   kernel parameter may also be set via ``sysfs``. Furthermore, RCU CPU
+   stall warnings are counter-productive during sysrq dumps and during
+   panics. RCU therefore supplies the ``rcu_sysrq_start()`` and
+   ``rcu_sysrq_end()`` API members to be called before and after long
+   sysrq dumps. RCU also supplies the ``rcu_panic()`` notifier that is
+   automatically invoked at the beginning of a panic to suppress further
+   RCU CPU stall warnings.
+
+   This requirement made itself known in the early 1990s, pretty much
+   the first time that it was necessary to debug a CPU stall. That said,
+   the initial implementation in DYNIX/ptx was quite generic in
+   comparison with that of Linux.
+
+#. Although it would be very good to detect pointers leaking out of RCU
+   read-side critical sections, there is currently no good way of doing
+   this. One complication is the need to distinguish between pointers
+   leaking and pointers that have been handed off from RCU to some other
+   synchronization mechanism, for example, reference counting.
+#. In kernels built with ``CONFIG_RCU_TRACE=y``, RCU-related information
+   is provided via event tracing.
+#. Open-coded use of ``rcu_assign_pointer()`` and ``rcu_dereference()``
+   to create typical linked data structures can be surprisingly
+   error-prone. Therefore, RCU-protected `linked
+   lists <https://lwn.net/Articles/609973/#RCU%20List%20APIs>`__ and,
+   more recently, RCU-protected `hash
+   tables <https://lwn.net/Articles/612100/>`__ are available. Many
+   other special-purpose RCU-protected data structures are available in
+   the Linux kernel and the userspace RCU library.
+#. Some linked structures are created at compile time, but still require
+   ``__rcu`` checking. The ``RCU_POINTER_INITIALIZER()`` macro serves
+   this purpose.
+#. It is not necessary to use ``rcu_assign_pointer()`` when creating
+   linked structures that are to be published via a single external
+   pointer. The ``RCU_INIT_POINTER()`` macro is provided for this task
+   and also for assigning ``NULL`` pointers at runtime.
+
+This not a hard-and-fast list: RCU's diagnostic capabilities will
+continue to be guided by the number and type of usage bugs found in
+real-world RCU usage.
+
+Linux Kernel Complications
+--------------------------
+
+The Linux kernel provides an interesting environment for all kinds of
+software, including RCU. Some of the relevant points of interest are as
+follows:
+
+#. `Configuration`_
+#. `Firmware Interface`_
+#. `Early Boot`_
+#. `Interrupts and NMIs`_
+#. `Loadable Modules`_
+#. `Hotplug CPU`_
+#. `Scheduler and RCU`_
+#. `Tracing and RCU`_
+#. `Accesses to User Memory and RCU`_
+#. `Energy Efficiency`_
+#. `Scheduling-Clock Interrupts and RCU`_
+#. `Memory Efficiency`_
+#. `Performance, Scalability, Response Time, and Reliability`_
+
+This list is probably incomplete, but it does give a feel for the most
+notable Linux-kernel complications. Each of the following sections
+covers one of the above topics.
+
+Configuration
+~~~~~~~~~~~~~
+
+RCU's goal is automatic configuration, so that almost nobody needs to
+worry about RCU's ``Kconfig`` options. And for almost all users, RCU
+does in fact work well “out of the box.”
+
+However, there are specialized use cases that are handled by kernel boot
+parameters and ``Kconfig`` options. Unfortunately, the ``Kconfig``
+system will explicitly ask users about new ``Kconfig`` options, which
+requires almost all of them be hidden behind a ``CONFIG_RCU_EXPERT``
+``Kconfig`` option.
+
+This all should be quite obvious, but the fact remains that Linus
+Torvalds recently had to
+`remind <https://lkml.kernel.org/g/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com>`__
+me of this requirement.
+
+Firmware Interface
+~~~~~~~~~~~~~~~~~~
+
+In many cases, kernel obtains information about the system from the
+firmware, and sometimes things are lost in translation. Or the
+translation is accurate, but the original message is bogus.
+
+For example, some systems' firmware overreports the number of CPUs,
+sometimes by a large factor. If RCU naively believed the firmware, as it
+used to do, it would create too many per-CPU kthreads. Although the
+resulting system will still run correctly, the extra kthreads needlessly
+consume memory and can cause confusion when they show up in ``ps``
+listings.
+
+RCU must therefore wait for a given CPU to actually come online before
+it can allow itself to believe that the CPU actually exists. The
+resulting “ghost CPUs” (which are never going to come online) cause a
+number of `interesting
+complications <https://paulmck.livejournal.com/37494.html>`__.
+
+Early Boot
+~~~~~~~~~~
+
+The Linux kernel's boot sequence is an interesting process, and RCU is
+used early, even before ``rcu_init()`` is invoked. In fact, a number of
+RCU's primitives can be used as soon as the initial task's
+``task_struct`` is available and the boot CPU's per-CPU variables are
+set up. The read-side primitives (``rcu_read_lock()``,
+``rcu_read_unlock()``, ``rcu_dereference()``, and
+``rcu_access_pointer()``) will operate normally very early on, as will
+``rcu_assign_pointer()``.
+
+Although ``call_rcu()`` may be invoked at any time during boot,
+callbacks are not guaranteed to be invoked until after all of RCU's
+kthreads have been spawned, which occurs at ``early_initcall()`` time.
+This delay in callback invocation is due to the fact that RCU does not
+invoke callbacks until it is fully initialized, and this full
+initialization cannot occur until after the scheduler has initialized
+itself to the point where RCU can spawn and run its kthreads. In theory,
+it would be possible to invoke callbacks earlier, however, this is not a
+panacea because there would be severe restrictions on what operations
+those callbacks could invoke.
+
+Perhaps surprisingly, ``synchronize_rcu()`` and
+``synchronize_rcu_expedited()``, will operate normally during very early
+boot, the reason being that there is only one CPU and preemption is
+disabled. This means that the call ``synchronize_rcu()`` (or friends)
+itself is a quiescent state and thus a grace period, so the early-boot
+implementation can be a no-op.
+
+However, once the scheduler has spawned its first kthread, this early
+boot trick fails for ``synchronize_rcu()`` (as well as for
+``synchronize_rcu_expedited()``) in ``CONFIG_PREEMPT=y`` kernels. The
+reason is that an RCU read-side critical section might be preempted,
+which means that a subsequent ``synchronize_rcu()`` really does have to
+wait for something, as opposed to simply returning immediately.
+Unfortunately, ``synchronize_rcu()`` can't do this until all of its
+kthreads are spawned, which doesn't happen until some time during
+``early_initcalls()`` time. But this is no excuse: RCU is nevertheless
+required to correctly handle synchronous grace periods during this time
+period. Once all of its kthreads are up and running, RCU starts running
+normally.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| How can RCU possibly handle grace periods before all of its kthreads  |
+| have been spawned???                                                  |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Very carefully!                                                       |
+| During the “dead zone” between the time that the scheduler spawns the |
+| first task and the time that all of RCU's kthreads have been spawned, |
+| all synchronous grace periods are handled by the expedited            |
+| grace-period mechanism. At runtime, this expedited mechanism relies   |
+| on workqueues, but during the dead zone the requesting task itself    |
+| drives the desired expedited grace period. Because dead-zone          |
+| execution takes place within task context, everything works. Once the |
+| dead zone ends, expedited grace periods go back to using workqueues,  |
+| as is required to avoid problems that would otherwise occur when a    |
+| user task received a POSIX signal while driving an expedited grace    |
+| period.                                                               |
+|                                                                       |
+| And yes, this does mean that it is unhelpful to send POSIX signals to |
+| random tasks between the time that the scheduler spawns its first     |
+| kthread and the time that RCU's kthreads have all been spawned. If    |
+| there ever turns out to be a good reason for sending POSIX signals    |
+| during that time, appropriate adjustments will be made. (If it turns  |
+| out that POSIX signals are sent during this time for no good reason,  |
+| other adjustments will be made, appropriate or otherwise.)            |
++-----------------------------------------------------------------------+
+
+I learned of these boot-time requirements as a result of a series of
+system hangs.
+
+Interrupts and NMIs
+~~~~~~~~~~~~~~~~~~~
+
+The Linux kernel has interrupts, and RCU read-side critical sections are
+legal within interrupt handlers and within interrupt-disabled regions of
+code, as are invocations of ``call_rcu()``.
+
+Some Linux-kernel architectures can enter an interrupt handler from
+non-idle process context, and then just never leave it, instead
+stealthily transitioning back to process context. This trick is
+sometimes used to invoke system calls from inside the kernel. These
+“half-interrupts” mean that RCU has to be very careful about how it
+counts interrupt nesting levels. I learned of this requirement the hard
+way during a rewrite of RCU's dyntick-idle code.
+
+The Linux kernel has non-maskable interrupts (NMIs), and RCU read-side
+critical sections are legal within NMI handlers. Thankfully, RCU
+update-side primitives, including ``call_rcu()``, are prohibited within
+NMI handlers.
+
+The name notwithstanding, some Linux-kernel architectures can have
+nested NMIs, which RCU must handle correctly. Andy Lutomirski `surprised
+me <https://lkml.kernel.org/r/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com>`__
+with this requirement; he also kindly surprised me with `an
+algorithm <https://lkml.kernel.org/r/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com>`__
+that meets this requirement.
+
+Furthermore, NMI handlers can be interrupted by what appear to RCU to be
+normal interrupts. One way that this can happen is for code that
+directly invokes ``rcu_irq_enter()`` and ``rcu_irq_exit()`` to be called
+from an NMI handler. This astonishing fact of life prompted the current
+code structure, which has ``rcu_irq_enter()`` invoking
+``rcu_nmi_enter()`` and ``rcu_irq_exit()`` invoking ``rcu_nmi_exit()``.
+And yes, I also learned of this requirement the hard way.
+
+Loadable Modules
+~~~~~~~~~~~~~~~~
+
+The Linux kernel has loadable modules, and these modules can also be
+unloaded. After a given module has been unloaded, any attempt to call
+one of its functions results in a segmentation fault. The module-unload
+functions must therefore cancel any delayed calls to loadable-module
+functions, for example, any outstanding ``mod_timer()`` must be dealt
+with via ``del_timer_sync()`` or similar.
+
+Unfortunately, there is no way to cancel an RCU callback; once you
+invoke ``call_rcu()``, the callback function is eventually going to be
+invoked, unless the system goes down first. Because it is normally
+considered socially irresponsible to crash the system in response to a
+module unload request, we need some other way to deal with in-flight RCU
+callbacks.
+
+RCU therefore provides ``rcu_barrier()``, which waits until all
+in-flight RCU callbacks have been invoked. If a module uses
+``call_rcu()``, its exit function should therefore prevent any future
+invocation of ``call_rcu()``, then invoke ``rcu_barrier()``. In theory,
+the underlying module-unload code could invoke ``rcu_barrier()``
+unconditionally, but in practice this would incur unacceptable
+latencies.
+
+Nikita Danilov noted this requirement for an analogous
+filesystem-unmount situation, and Dipankar Sarma incorporated
+``rcu_barrier()`` into RCU. The need for ``rcu_barrier()`` for module
+unloading became apparent later.
+
+.. important::
+
+   The ``rcu_barrier()`` function is not, repeat,
+   *not*, obligated to wait for a grace period. It is instead only required
+   to wait for RCU callbacks that have already been posted. Therefore, if
+   there are no RCU callbacks posted anywhere in the system,
+   ``rcu_barrier()`` is within its rights to return immediately. Even if
+   there are callbacks posted, ``rcu_barrier()`` does not necessarily need
+   to wait for a grace period.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Wait a minute! Each RCU callbacks must wait for a grace period to     |
+| complete, and ``rcu_barrier()`` must wait for each pre-existing       |
+| callback to be invoked. Doesn't ``rcu_barrier()`` therefore need to   |
+| wait for a full grace period if there is even one callback posted     |
+| anywhere in the system?                                               |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Absolutely not!!!                                                     |
+| Yes, each RCU callbacks must wait for a grace period to complete, but |
+| it might well be partly (or even completely) finished waiting by the  |
+| time ``rcu_barrier()`` is invoked. In that case, ``rcu_barrier()``    |
+| need only wait for the remaining portion of the grace period to       |
+| elapse. So even if there are quite a few callbacks posted,            |
+| ``rcu_barrier()`` might well return quite quickly.                    |
+|                                                                       |
+| So if you need to wait for a grace period as well as for all          |
+| pre-existing callbacks, you will need to invoke both                  |
+| ``synchronize_rcu()`` and ``rcu_barrier()``. If latency is a concern, |
+| you can always use workqueues to invoke them concurrently.            |
++-----------------------------------------------------------------------+
+
+Hotplug CPU
+~~~~~~~~~~~
+
+The Linux kernel supports CPU hotplug, which means that CPUs can come
+and go. It is of course illegal to use any RCU API member from an
+offline CPU, with the exception of `SRCU <#Sleepable%20RCU>`__ read-side
+critical sections. This requirement was present from day one in
+DYNIX/ptx, but on the other hand, the Linux kernel's CPU-hotplug
+implementation is “interesting.”
+
+The Linux-kernel CPU-hotplug implementation has notifiers that are used
+to allow the various kernel subsystems (including RCU) to respond
+appropriately to a given CPU-hotplug operation. Most RCU operations may
+be invoked from CPU-hotplug notifiers, including even synchronous
+grace-period operations such as ``synchronize_rcu()`` and
+``synchronize_rcu_expedited()``.
+
+However, all-callback-wait operations such as ``rcu_barrier()`` are also
+not supported, due to the fact that there are phases of CPU-hotplug
+operations where the outgoing CPU's callbacks will not be invoked until
+after the CPU-hotplug operation ends, which could also result in
+deadlock. Furthermore, ``rcu_barrier()`` blocks CPU-hotplug operations
+during its execution, which results in another type of deadlock when
+invoked from a CPU-hotplug notifier.
+
+Scheduler and RCU
+~~~~~~~~~~~~~~~~~
+
+RCU depends on the scheduler, and the scheduler uses RCU to protect some
+of its data structures. The preemptible-RCU ``rcu_read_unlock()``
+implementation must therefore be written carefully to avoid deadlocks
+involving the scheduler's runqueue and priority-inheritance locks. In
+particular, ``rcu_read_unlock()`` must tolerate an interrupt where the
+interrupt handler invokes both ``rcu_read_lock()`` and
+``rcu_read_unlock()``. This possibility requires ``rcu_read_unlock()``
+to use negative nesting levels to avoid destructive recursion via
+interrupt handler's use of RCU.
+
+This scheduler-RCU requirement came as a `complete
+surprise <https://lwn.net/Articles/453002/>`__.
+
+As noted above, RCU makes use of kthreads, and it is necessary to avoid
+excessive CPU-time accumulation by these kthreads. This requirement was
+no surprise, but RCU's violation of it when running context-switch-heavy
+workloads when built with ``CONFIG_NO_HZ_FULL=y`` `did come as a
+surprise
+[PDF] <http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf>`__.
+RCU has made good progress towards meeting this requirement, even for
+context-switch-heavy ``CONFIG_NO_HZ_FULL=y`` workloads, but there is
+room for further improvement.
+
+It is forbidden to hold any of scheduler's runqueue or
+priority-inheritance spinlocks across an ``rcu_read_unlock()`` unless
+interrupts have been disabled across the entire RCU read-side critical
+section, that is, up to and including the matching ``rcu_read_lock()``.
+Violating this restriction can result in deadlocks involving these
+scheduler spinlocks. There was hope that this restriction might be
+lifted when interrupt-disabled calls to ``rcu_read_unlock()`` started
+deferring the reporting of the resulting RCU-preempt quiescent state
+until the end of the corresponding interrupts-disabled region.
+Unfortunately, timely reporting of the corresponding quiescent state to
+expedited grace periods requires a call to ``raise_softirq()``, which
+can acquire these scheduler spinlocks. In addition, real-time systems
+using RCU priority boosting need this restriction to remain in effect
+because deferred quiescent-state reporting would also defer deboosting,
+which in turn would degrade real-time latencies.
+
+In theory, if a given RCU read-side critical section could be guaranteed
+to be less than one second in duration, holding a scheduler spinlock
+across that critical section's ``rcu_read_unlock()`` would require only
+that preemption be disabled across the entire RCU read-side critical
+section, not interrupts. Unfortunately, given the possibility of vCPU
+preemption, long-running interrupts, and so on, it is not possible in
+practice to guarantee that a given RCU read-side critical section will
+complete in less than one second. Therefore, as noted above, if
+scheduler spinlocks are held across a given call to
+``rcu_read_unlock()``, interrupts must be disabled across the entire RCU
+read-side critical section.
+
+Tracing and RCU
+~~~~~~~~~~~~~~~
+
+It is possible to use tracing on RCU code, but tracing itself uses RCU.
+For this reason, ``rcu_dereference_raw_check()`` is provided for use
+by tracing, which avoids the destructive recursion that could otherwise
+ensue. This API is also used by virtualization in some architectures,
+where RCU readers execute in environments in which tracing cannot be
+used. The tracing folks both located the requirement and provided the
+needed fix, so this surprise requirement was relatively painless.
+
+Accesses to User Memory and RCU
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The kernel needs to access user-space memory, for example, to access data
+referenced by system-call parameters.  The ``get_user()`` macro does this job.
+
+However, user-space memory might well be paged out, which means that
+``get_user()`` might well page-fault and thus block while waiting for the
+resulting I/O to complete.  It would be a very bad thing for the compiler to
+reorder a ``get_user()`` invocation into an RCU read-side critical section.
+
+For example, suppose that the source code looked like this:
+
+  ::
+
+       1 rcu_read_lock();
+       2 p = rcu_dereference(gp);
+       3 v = p->value;
+       4 rcu_read_unlock();
+       5 get_user(user_v, user_p);
+       6 do_something_with(v, user_v);
+
+The compiler must not be permitted to transform this source code into
+the following:
+
+  ::
+
+       1 rcu_read_lock();
+       2 p = rcu_dereference(gp);
+       3 get_user(user_v, user_p); // BUG: POSSIBLE PAGE FAULT!!!
+       4 v = p->value;
+       5 rcu_read_unlock();
+       6 do_something_with(v, user_v);
+
+If the compiler did make this transformation in a ``CONFIG_PREEMPT=n`` kernel
+build, and if ``get_user()`` did page fault, the result would be a quiescent
+state in the middle of an RCU read-side critical section.  This misplaced
+quiescent state could result in line 4 being a use-after-free access,
+which could be bad for your kernel's actuarial statistics.  Similar examples
+can be constructed with the call to ``get_user()`` preceding the
+``rcu_read_lock()``.
+
+Unfortunately, ``get_user()`` doesn't have any particular ordering properties,
+and in some architectures the underlying ``asm`` isn't even marked
+``volatile``.  And even if it was marked ``volatile``, the above access to
+``p->value`` is not volatile, so the compiler would not have any reason to keep
+those two accesses in order.
+
+Therefore, the Linux-kernel definitions of ``rcu_read_lock()`` and
+``rcu_read_unlock()`` must act as compiler barriers, at least for outermost
+instances of ``rcu_read_lock()`` and ``rcu_read_unlock()`` within a nested set
+of RCU read-side critical sections.
+
+Energy Efficiency
+~~~~~~~~~~~~~~~~~
+
+Interrupting idle CPUs is considered socially unacceptable, especially
+by people with battery-powered embedded systems. RCU therefore conserves
+energy by detecting which CPUs are idle, including tracking CPUs that
+have been interrupted from idle. This is a large part of the
+energy-efficiency requirement, so I learned of this via an irate phone
+call.
+
+Because RCU avoids interrupting idle CPUs, it is illegal to execute an
+RCU read-side critical section on an idle CPU. (Kernels built with
+``CONFIG_PROVE_RCU=y`` will splat if you try it.) The ``RCU_NONIDLE()``
+macro and ``_rcuidle`` event tracing is provided to work around this
+restriction. In addition, ``rcu_is_watching()`` may be used to test
+whether or not it is currently legal to run RCU read-side critical
+sections on this CPU. I learned of the need for diagnostics on the one
+hand and ``RCU_NONIDLE()`` on the other while inspecting idle-loop code.
+Steven Rostedt supplied ``_rcuidle`` event tracing, which is used quite
+heavily in the idle loop. However, there are some restrictions on the
+code placed within ``RCU_NONIDLE()``:
+
+#. Blocking is prohibited. In practice, this is not a serious
+   restriction given that idle tasks are prohibited from blocking to
+   begin with.
+#. Although nesting ``RCU_NONIDLE()`` is permitted, they cannot nest
+   indefinitely deeply. However, given that they can be nested on the
+   order of a million deep, even on 32-bit systems, this should not be a
+   serious restriction. This nesting limit would probably be reached
+   long after the compiler OOMed or the stack overflowed.
+#. Any code path that enters ``RCU_NONIDLE()`` must sequence out of that
+   same ``RCU_NONIDLE()``. For example, the following is grossly
+   illegal:
+
+      ::
+
+	  1     RCU_NONIDLE({
+	  2       do_something();
+	  3       goto bad_idea;  /* BUG!!! */
+	  4       do_something_else();});
+	  5   bad_idea:
+
+
+   It is just as illegal to transfer control into the middle of
+   ``RCU_NONIDLE()``'s argument. Yes, in theory, you could transfer in
+   as long as you also transferred out, but in practice you could also
+   expect to get sharply worded review comments.
+
+It is similarly socially unacceptable to interrupt an ``nohz_full`` CPU
+running in userspace. RCU must therefore track ``nohz_full`` userspace
+execution. RCU must therefore be able to sample state at two points in
+time, and be able to determine whether or not some other CPU spent any
+time idle and/or executing in userspace.
+
+These energy-efficiency requirements have proven quite difficult to
+understand and to meet, for example, there have been more than five
+clean-sheet rewrites of RCU's energy-efficiency code, the last of which
+was finally able to demonstrate `real energy savings running on real
+hardware
+[PDF] <http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf>`__.
+As noted earlier, I learned of many of these requirements via angry
+phone calls: Flaming me on the Linux-kernel mailing list was apparently
+not sufficient to fully vent their ire at RCU's energy-efficiency bugs!
+
+Scheduling-Clock Interrupts and RCU
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The kernel transitions between in-kernel non-idle execution, userspace
+execution, and the idle loop. Depending on kernel configuration, RCU
+handles these states differently:
+
++-----------------+------------------+------------------+-----------------+
+| ``HZ`` Kconfig  | In-Kernel        | Usermode         | Idle            |
++=================+==================+==================+=================+
+| ``HZ_PERIODIC`` | Can rely on      | Can rely on      | Can rely on     |
+|                 | scheduling-clock | scheduling-clock | RCU's           |
+|                 | interrupt.       | interrupt and    | dyntick-idle    |
+|                 |                  | its detection    | detection.      |
+|                 |                  | of interrupt     |                 |
+|                 |                  | from usermode.   |                 |
++-----------------+------------------+------------------+-----------------+
+| ``NO_HZ_IDLE``  | Can rely on      | Can rely on      | Can rely on     |
+|                 | scheduling-clock | scheduling-clock | RCU's           |
+|                 | interrupt.       | interrupt and    | dyntick-idle    |
+|                 |                  | its detection    | detection.      |
+|                 |                  | of interrupt     |                 |
+|                 |                  | from usermode.   |                 |
++-----------------+------------------+------------------+-----------------+
+| ``NO_HZ_FULL``  | Can only         | Can rely on      | Can rely on     |
+|                 | sometimes rely   | RCU's            | RCU's           |
+|                 | on               | dyntick-idle     | dyntick-idle    |
+|                 | scheduling-clock | detection.       | detection.      |
+|                 | interrupt. In    |                  |                 |
+|                 | other cases, it  |                  |                 |
+|                 | is necessary to  |                  |                 |
+|                 | bound kernel     |                  |                 |
+|                 | execution times  |                  |                 |
+|                 | and/or use       |                  |                 |
+|                 | IPIs.            |                  |                 |
++-----------------+------------------+------------------+-----------------+
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| Why can't ``NO_HZ_FULL`` in-kernel execution rely on the              |
+| scheduling-clock interrupt, just like ``HZ_PERIODIC`` and             |
+| ``NO_HZ_IDLE`` do?                                                    |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| Because, as a performance optimization, ``NO_HZ_FULL`` does not       |
+| necessarily re-enable the scheduling-clock interrupt on entry to each |
+| and every system call.                                                |
++-----------------------------------------------------------------------+
+
+However, RCU must be reliably informed as to whether any given CPU is
+currently in the idle loop, and, for ``NO_HZ_FULL``, also whether that
+CPU is executing in usermode, as discussed
+`earlier <#Energy%20Efficiency>`__. It also requires that the
+scheduling-clock interrupt be enabled when RCU needs it to be:
+
+#. If a CPU is either idle or executing in usermode, and RCU believes it
+   is non-idle, the scheduling-clock tick had better be running.
+   Otherwise, you will get RCU CPU stall warnings. Or at best, very long
+   (11-second) grace periods, with a pointless IPI waking the CPU from
+   time to time.
+#. If a CPU is in a portion of the kernel that executes RCU read-side
+   critical sections, and RCU believes this CPU to be idle, you will get
+   random memory corruption. **DON'T DO THIS!!!**
+   This is one reason to test with lockdep, which will complain about
+   this sort of thing.
+#. If a CPU is in a portion of the kernel that is absolutely positively
+   no-joking guaranteed to never execute any RCU read-side critical
+   sections, and RCU believes this CPU to to be idle, no problem. This
+   sort of thing is used by some architectures for light-weight
+   exception handlers, which can then avoid the overhead of
+   ``rcu_irq_enter()`` and ``rcu_irq_exit()`` at exception entry and
+   exit, respectively. Some go further and avoid the entireties of
+   ``irq_enter()`` and ``irq_exit()``.
+   Just make very sure you are running some of your tests with
+   ``CONFIG_PROVE_RCU=y``, just in case one of your code paths was in
+   fact joking about not doing RCU read-side critical sections.
+#. If a CPU is executing in the kernel with the scheduling-clock
+   interrupt disabled and RCU believes this CPU to be non-idle, and if
+   the CPU goes idle (from an RCU perspective) every few jiffies, no
+   problem. It is usually OK for there to be the occasional gap between
+   idle periods of up to a second or so.
+   If the gap grows too long, you get RCU CPU stall warnings.
+#. If a CPU is either idle or executing in usermode, and RCU believes it
+   to be idle, of course no problem.
+#. If a CPU is executing in the kernel, the kernel code path is passing
+   through quiescent states at a reasonable frequency (preferably about
+   once per few jiffies, but the occasional excursion to a second or so
+   is usually OK) and the scheduling-clock interrupt is enabled, of
+   course no problem.
+   If the gap between a successive pair of quiescent states grows too
+   long, you get RCU CPU stall warnings.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**:                                                       |
++-----------------------------------------------------------------------+
+| But what if my driver has a hardware interrupt handler that can run   |
+| for many seconds? I cannot invoke ``schedule()`` from an hardware     |
+| interrupt handler, after all!                                         |
++-----------------------------------------------------------------------+
+| **Answer**:                                                           |
++-----------------------------------------------------------------------+
+| One approach is to do ``rcu_irq_exit();rcu_irq_enter();`` every so    |
+| often. But given that long-running interrupt handlers can cause other |
+| problems, not least for response time, shouldn't you work to keep     |
+| your interrupt handler's runtime within reasonable bounds?            |
++-----------------------------------------------------------------------+
+
+But as long as RCU is properly informed of kernel state transitions
+between in-kernel execution, usermode execution, and idle, and as long
+as the scheduling-clock interrupt is enabled when RCU needs it to be,
+you can rest assured that the bugs you encounter will be in some other
+part of RCU or some other part of the kernel!
+
+Memory Efficiency
+~~~~~~~~~~~~~~~~~
+
+Although small-memory non-realtime systems can simply use Tiny RCU, code
+size is only one aspect of memory efficiency. Another aspect is the size
+of the ``rcu_head`` structure used by ``call_rcu()`` and
+``kfree_rcu()``. Although this structure contains nothing more than a
+pair of pointers, it does appear in many RCU-protected data structures,
+including some that are size critical. The ``page`` structure is a case
+in point, as evidenced by the many occurrences of the ``union`` keyword
+within that structure.
+
+This need for memory efficiency is one reason that RCU uses hand-crafted
+singly linked lists to track the ``rcu_head`` structures that are
+waiting for a grace period to elapse. It is also the reason why
+``rcu_head`` structures do not contain debug information, such as fields
+tracking the file and line of the ``call_rcu()`` or ``kfree_rcu()`` that
+posted them. Although this information might appear in debug-only kernel
+builds at some point, in the meantime, the ``->func`` field will often
+provide the needed debug information.
+
+However, in some cases, the need for memory efficiency leads to even
+more extreme measures. Returning to the ``page`` structure, the
+``rcu_head`` field shares storage with a great many other structures
+that are used at various points in the corresponding page's lifetime. In
+order to correctly resolve certain `race
+conditions <https://lkml.kernel.org/g/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com>`__,
+the Linux kernel's memory-management subsystem needs a particular bit to
+remain zero during all phases of grace-period processing, and that bit
+happens to map to the bottom bit of the ``rcu_head`` structure's
+``->next`` field. RCU makes this guarantee as long as ``call_rcu()`` is
+used to post the callback, as opposed to ``kfree_rcu()`` or some future
+“lazy” variant of ``call_rcu()`` that might one day be created for
+energy-efficiency purposes.
+
+That said, there are limits. RCU requires that the ``rcu_head``
+structure be aligned to a two-byte boundary, and passing a misaligned
+``rcu_head`` structure to one of the ``call_rcu()`` family of functions
+will result in a splat. It is therefore necessary to exercise caution
+when packing structures containing fields of type ``rcu_head``. Why not
+a four-byte or even eight-byte alignment requirement? Because the m68k
+architecture provides only two-byte alignment, and thus acts as
+alignment's least common denominator.
+
+The reason for reserving the bottom bit of pointers to ``rcu_head``
+structures is to leave the door open to “lazy” callbacks whose
+invocations can safely be deferred. Deferring invocation could
+potentially have energy-efficiency benefits, but only if the rate of
+non-lazy callbacks decreases significantly for some important workload.
+In the meantime, reserving the bottom bit keeps this option open in case
+it one day becomes useful.
+
+Performance, Scalability, Response Time, and Reliability
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Expanding on the `earlier
+discussion <#Performance%20and%20Scalability>`__, RCU is used heavily by
+hot code paths in performance-critical portions of the Linux kernel's
+networking, security, virtualization, and scheduling code paths. RCU
+must therefore use efficient implementations, especially in its
+read-side primitives. To that end, it would be good if preemptible RCU's
+implementation of ``rcu_read_lock()`` could be inlined, however, doing
+this requires resolving ``#include`` issues with the ``task_struct``
+structure.
+
+The Linux kernel supports hardware configurations with up to 4096 CPUs,
+which means that RCU must be extremely scalable. Algorithms that involve
+frequent acquisitions of global locks or frequent atomic operations on
+global variables simply cannot be tolerated within the RCU
+implementation. RCU therefore makes heavy use of a combining tree based
+on the ``rcu_node`` structure. RCU is required to tolerate all CPUs
+continuously invoking any combination of RCU's runtime primitives with
+minimal per-operation overhead. In fact, in many cases, increasing load
+must *decrease* the per-operation overhead, witness the batching
+optimizations for ``synchronize_rcu()``, ``call_rcu()``,
+``synchronize_rcu_expedited()``, and ``rcu_barrier()``. As a general
+rule, RCU must cheerfully accept whatever the rest of the Linux kernel
+decides to throw at it.
+
+The Linux kernel is used for real-time workloads, especially in
+conjunction with the `-rt
+patchset <https://rt.wiki.kernel.org/index.php/Main_Page>`__. The
+real-time-latency response requirements are such that the traditional
+approach of disabling preemption across RCU read-side critical sections
+is inappropriate. Kernels built with ``CONFIG_PREEMPT=y`` therefore use
+an RCU implementation that allows RCU read-side critical sections to be
+preempted. This requirement made its presence known after users made it
+clear that an earlier `real-time
+patch <https://lwn.net/Articles/107930/>`__ did not meet their needs, in
+conjunction with some `RCU
+issues <https://lkml.kernel.org/g/20050318002026.GA2693@us.ibm.com>`__
+encountered by a very early version of the -rt patchset.
+
+In addition, RCU must make do with a sub-100-microsecond real-time
+latency budget. In fact, on smaller systems with the -rt patchset, the
+Linux kernel provides sub-20-microsecond real-time latencies for the
+whole kernel, including RCU. RCU's scalability and latency must
+therefore be sufficient for these sorts of configurations. To my
+surprise, the sub-100-microsecond real-time latency budget `applies to
+even the largest systems
+[PDF] <http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf>`__,
+up to and including systems with 4096 CPUs. This real-time requirement
+motivated the grace-period kthread, which also simplified handling of a
+number of race conditions.
+
+RCU must avoid degrading real-time response for CPU-bound threads,
+whether executing in usermode (which is one use case for
+``CONFIG_NO_HZ_FULL=y``) or in the kernel. That said, CPU-bound loops in
+the kernel must execute ``cond_resched()`` at least once per few tens of
+milliseconds in order to avoid receiving an IPI from RCU.
+
+Finally, RCU's status as a synchronization primitive means that any RCU
+failure can result in arbitrary memory corruption that can be extremely
+difficult to debug. This means that RCU must be extremely reliable,
+which in practice also means that RCU must have an aggressive
+stress-test suite. This stress-test suite is called ``rcutorture``.
+
+Although the need for ``rcutorture`` was no surprise, the current
+immense popularity of the Linux kernel is posing interesting—and perhaps
+unprecedented—validation challenges. To see this, keep in mind that
+there are well over one billion instances of the Linux kernel running
+today, given Android smartphones, Linux-powered televisions, and
+servers. This number can be expected to increase sharply with the advent
+of the celebrated Internet of Things.
+
+Suppose that RCU contains a race condition that manifests on average
+once per million years of runtime. This bug will be occurring about
+three times per *day* across the installed base. RCU could simply hide
+behind hardware error rates, given that no one should really expect
+their smartphone to last for a million years. However, anyone taking too
+much comfort from this thought should consider the fact that in most
+jurisdictions, a successful multi-year test of a given mechanism, which
+might include a Linux kernel, suffices for a number of types of
+safety-critical certifications. In fact, rumor has it that the Linux
+kernel is already being used in production for safety-critical
+applications. I don't know about you, but I would feel quite bad if a
+bug in RCU killed someone. Which might explain my recent focus on
+validation and verification.
+
+Other RCU Flavors
+-----------------
+
+One of the more surprising things about RCU is that there are now no
+fewer than five *flavors*, or API families. In addition, the primary
+flavor that has been the sole focus up to this point has two different
+implementations, non-preemptible and preemptible. The other four flavors
+are listed below, with requirements for each described in a separate
+section.
+
+#. `Bottom-Half Flavor (Historical)`_
+#. `Sched Flavor (Historical)`_
+#. `Sleepable RCU`_
+#. `Tasks RCU`_
+
+Bottom-Half Flavor (Historical)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The RCU-bh flavor of RCU has since been expressed in terms of the other
+RCU flavors as part of a consolidation of the three flavors into a
+single flavor. The read-side API remains, and continues to disable
+softirq and to be accounted for by lockdep. Much of the material in this
+section is therefore strictly historical in nature.
+
+The softirq-disable (AKA “bottom-half”, hence the “_bh” abbreviations)
+flavor of RCU, or *RCU-bh*, was developed by Dipankar Sarma to provide a
+flavor of RCU that could withstand the network-based denial-of-service
+attacks researched by Robert Olsson. These attacks placed so much
+networking load on the system that some of the CPUs never exited softirq
+execution, which in turn prevented those CPUs from ever executing a
+context switch, which, in the RCU implementation of that time, prevented
+grace periods from ever ending. The result was an out-of-memory
+condition and a system hang.
+
+The solution was the creation of RCU-bh, which does
+``local_bh_disable()`` across its read-side critical sections, and which
+uses the transition from one type of softirq processing to another as a
+quiescent state in addition to context switch, idle, user mode, and
+offline. This means that RCU-bh grace periods can complete even when
+some of the CPUs execute in softirq indefinitely, thus allowing
+algorithms based on RCU-bh to withstand network-based denial-of-service
+attacks.
+
+Because ``rcu_read_lock_bh()`` and ``rcu_read_unlock_bh()`` disable and
+re-enable softirq handlers, any attempt to start a softirq handlers
+during the RCU-bh read-side critical section will be deferred. In this
+case, ``rcu_read_unlock_bh()`` will invoke softirq processing, which can
+take considerable time. One can of course argue that this softirq
+overhead should be associated with the code following the RCU-bh
+read-side critical section rather than ``rcu_read_unlock_bh()``, but the
+fact is that most profiling tools cannot be expected to make this sort
+of fine distinction. For example, suppose that a three-millisecond-long
+RCU-bh read-side critical section executes during a time of heavy
+networking load. There will very likely be an attempt to invoke at least
+one softirq handler during that three milliseconds, but any such
+invocation will be delayed until the time of the
+``rcu_read_unlock_bh()``. This can of course make it appear at first
+glance as if ``rcu_read_unlock_bh()`` was executing very slowly.
+
+The `RCU-bh
+API <https://lwn.net/Articles/609973/#RCU%20Per-Flavor%20API%20Table>`__
+includes ``rcu_read_lock_bh()``, ``rcu_read_unlock_bh()``,
+``rcu_dereference_bh()``, ``rcu_dereference_bh_check()``,
+``synchronize_rcu_bh()``, ``synchronize_rcu_bh_expedited()``,
+``call_rcu_bh()``, ``rcu_barrier_bh()``, and
+``rcu_read_lock_bh_held()``. However, the update-side APIs are now
+simple wrappers for other RCU flavors, namely RCU-sched in
+CONFIG_PREEMPT=n kernels and RCU-preempt otherwise.
+
+Sched Flavor (Historical)
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The RCU-sched flavor of RCU has since been expressed in terms of the
+other RCU flavors as part of a consolidation of the three flavors into a
+single flavor. The read-side API remains, and continues to disable
+preemption and to be accounted for by lockdep. Much of the material in
+this section is therefore strictly historical in nature.
+
+Before preemptible RCU, waiting for an RCU grace period had the side
+effect of also waiting for all pre-existing interrupt and NMI handlers.
+However, there are legitimate preemptible-RCU implementations that do
+not have this property, given that any point in the code outside of an
+RCU read-side critical section can be a quiescent state. Therefore,
+*RCU-sched* was created, which follows “classic” RCU in that an
+RCU-sched grace period waits for for pre-existing interrupt and NMI
+handlers. In kernels built with ``CONFIG_PREEMPT=n``, the RCU and
+RCU-sched APIs have identical implementations, while kernels built with
+``CONFIG_PREEMPT=y`` provide a separate implementation for each.
+
+Note well that in ``CONFIG_PREEMPT=y`` kernels,
+``rcu_read_lock_sched()`` and ``rcu_read_unlock_sched()`` disable and
+re-enable preemption, respectively. This means that if there was a
+preemption attempt during the RCU-sched read-side critical section,
+``rcu_read_unlock_sched()`` will enter the scheduler, with all the
+latency and overhead entailed. Just as with ``rcu_read_unlock_bh()``,
+this can make it look as if ``rcu_read_unlock_sched()`` was executing
+very slowly. However, the highest-priority task won't be preempted, so
+that task will enjoy low-overhead ``rcu_read_unlock_sched()``
+invocations.
+
+The `RCU-sched
+API <https://lwn.net/Articles/609973/#RCU%20Per-Flavor%20API%20Table>`__
+includes ``rcu_read_lock_sched()``, ``rcu_read_unlock_sched()``,
+``rcu_read_lock_sched_notrace()``, ``rcu_read_unlock_sched_notrace()``,
+``rcu_dereference_sched()``, ``rcu_dereference_sched_check()``,
+``synchronize_sched()``, ``synchronize_rcu_sched_expedited()``,
+``call_rcu_sched()``, ``rcu_barrier_sched()``, and
+``rcu_read_lock_sched_held()``. However, anything that disables
+preemption also marks an RCU-sched read-side critical section, including
+``preempt_disable()`` and ``preempt_enable()``, ``local_irq_save()`` and
+``local_irq_restore()``, and so on.
+
+Sleepable RCU
+~~~~~~~~~~~~~
+
+For well over a decade, someone saying “I need to block within an RCU
+read-side critical section” was a reliable indication that this someone
+did not understand RCU. After all, if you are always blocking in an RCU
+read-side critical section, you can probably afford to use a
+higher-overhead synchronization mechanism. However, that changed with
+the advent of the Linux kernel's notifiers, whose RCU read-side critical
+sections almost never sleep, but sometimes need to. This resulted in the
+introduction of `sleepable RCU <https://lwn.net/Articles/202847/>`__, or
+*SRCU*.
+
+SRCU allows different domains to be defined, with each such domain
+defined by an instance of an ``srcu_struct`` structure. A pointer to
+this structure must be passed in to each SRCU function, for example,
+``synchronize_srcu(&ss)``, where ``ss`` is the ``srcu_struct``
+structure. The key benefit of these domains is that a slow SRCU reader
+in one domain does not delay an SRCU grace period in some other domain.
+That said, one consequence of these domains is that read-side code must
+pass a “cookie” from ``srcu_read_lock()`` to ``srcu_read_unlock()``, for
+example, as follows:
+
+   ::
+
+       1 int idx;
+       2
+       3 idx = srcu_read_lock(&ss);
+       4 do_something();
+       5 srcu_read_unlock(&ss, idx);
+
+As noted above, it is legal to block within SRCU read-side critical
+sections, however, with great power comes great responsibility. If you
+block forever in one of a given domain's SRCU read-side critical
+sections, then that domain's grace periods will also be blocked forever.
+Of course, one good way to block forever is to deadlock, which can
+happen if any operation in a given domain's SRCU read-side critical
+section can wait, either directly or indirectly, for that domain's grace
+period to elapse. For example, this results in a self-deadlock:
+
+   ::
+
+       1 int idx;
+       2
+       3 idx = srcu_read_lock(&ss);
+       4 do_something();
+       5 synchronize_srcu(&ss);
+       6 srcu_read_unlock(&ss, idx);
+
+However, if line 5 acquired a mutex that was held across a
+``synchronize_srcu()`` for domain ``ss``, deadlock would still be
+possible. Furthermore, if line 5 acquired a mutex that was held across a
+``synchronize_srcu()`` for some other domain ``ss1``, and if an
+``ss1``-domain SRCU read-side critical section acquired another mutex
+that was held across as ``ss``-domain ``synchronize_srcu()``, deadlock
+would again be possible. Such a deadlock cycle could extend across an
+arbitrarily large number of different SRCU domains. Again, with great
+power comes great responsibility.
+
+Unlike the other RCU flavors, SRCU read-side critical sections can run
+on idle and even offline CPUs. This ability requires that
+``srcu_read_lock()`` and ``srcu_read_unlock()`` contain memory barriers,
+which means that SRCU readers will run a bit slower than would RCU
+readers. It also motivates the ``smp_mb__after_srcu_read_unlock()`` API,
+which, in combination with ``srcu_read_unlock()``, guarantees a full
+memory barrier.
+
+Also unlike other RCU flavors, ``synchronize_srcu()`` may **not** be
+invoked from CPU-hotplug notifiers, due to the fact that SRCU grace
+periods make use of timers and the possibility of timers being
+temporarily “stranded” on the outgoing CPU. This stranding of timers
+means that timers posted to the outgoing CPU will not fire until late in
+the CPU-hotplug process. The problem is that if a notifier is waiting on
+an SRCU grace period, that grace period is waiting on a timer, and that
+timer is stranded on the outgoing CPU, then the notifier will never be
+awakened, in other words, deadlock has occurred. This same situation of
+course also prohibits ``srcu_barrier()`` from being invoked from
+CPU-hotplug notifiers.
+
+SRCU also differs from other RCU flavors in that SRCU's expedited and
+non-expedited grace periods are implemented by the same mechanism. This
+means that in the current SRCU implementation, expediting a future grace
+period has the side effect of expediting all prior grace periods that
+have not yet completed. (But please note that this is a property of the
+current implementation, not necessarily of future implementations.) In
+addition, if SRCU has been idle for longer than the interval specified
+by the ``srcutree.exp_holdoff`` kernel boot parameter (25 microseconds
+by default), and if a ``synchronize_srcu()`` invocation ends this idle
+period, that invocation will be automatically expedited.
+
+As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating a
+locking bottleneck present in prior kernel versions. Although this will
+allow users to put much heavier stress on ``call_srcu()``, it is
+important to note that SRCU does not yet take any special steps to deal
+with callback flooding. So if you are posting (say) 10,000 SRCU
+callbacks per second per CPU, you are probably totally OK, but if you
+intend to post (say) 1,000,000 SRCU callbacks per second per CPU, please
+run some tests first. SRCU just might need a few adjustment to deal with
+that sort of load. Of course, your mileage may vary based on the speed
+of your CPUs and the size of your memory.
+
+The `SRCU
+API <https://lwn.net/Articles/609973/#RCU%20Per-Flavor%20API%20Table>`__
+includes ``srcu_read_lock()``, ``srcu_read_unlock()``,
+``srcu_dereference()``, ``srcu_dereference_check()``,
+``synchronize_srcu()``, ``synchronize_srcu_expedited()``,
+``call_srcu()``, ``srcu_barrier()``, and ``srcu_read_lock_held()``. It
+also includes ``DEFINE_SRCU()``, ``DEFINE_STATIC_SRCU()``, and
+``init_srcu_struct()`` APIs for defining and initializing
+``srcu_struct`` structures.
+
+Tasks RCU
+~~~~~~~~~
+
+Some forms of tracing use “trampolines” to handle the binary rewriting
+required to install different types of probes. It would be good to be
+able to free old trampolines, which sounds like a job for some form of
+RCU. However, because it is necessary to be able to install a trace
+anywhere in the code, it is not possible to use read-side markers such
+as ``rcu_read_lock()`` and ``rcu_read_unlock()``. In addition, it does
+not work to have these markers in the trampoline itself, because there
+would need to be instructions following ``rcu_read_unlock()``. Although
+``synchronize_rcu()`` would guarantee that execution reached the
+``rcu_read_unlock()``, it would not be able to guarantee that execution
+had completely left the trampoline.
+
+The solution, in the form of `Tasks
+RCU <https://lwn.net/Articles/607117/>`__, is to have implicit read-side
+critical sections that are delimited by voluntary context switches, that
+is, calls to ``schedule()``, ``cond_resched()``, and
+``synchronize_rcu_tasks()``. In addition, transitions to and from
+userspace execution also delimit tasks-RCU read-side critical sections.
+
+The tasks-RCU API is quite compact, consisting only of
+``call_rcu_tasks()``, ``synchronize_rcu_tasks()``, and
+``rcu_barrier_tasks()``. In ``CONFIG_PREEMPT=n`` kernels, trampolines
+cannot be preempted, so these APIs map to ``call_rcu()``,
+``synchronize_rcu()``, and ``rcu_barrier()``, respectively. In
+``CONFIG_PREEMPT=y`` kernels, trampolines can be preempted, and these
+three APIs are therefore implemented by separate functions that check
+for voluntary context switches.
+
+Possible Future Changes
+-----------------------
+
+One of the tricks that RCU uses to attain update-side scalability is to
+increase grace-period latency with increasing numbers of CPUs. If this
+becomes a serious problem, it will be necessary to rework the
+grace-period state machine so as to avoid the need for the additional
+latency.
+
+RCU disables CPU hotplug in a few places, perhaps most notably in the
+``rcu_barrier()`` operations. If there is a strong reason to use
+``rcu_barrier()`` in CPU-hotplug notifiers, it will be necessary to
+avoid disabling CPU hotplug. This would introduce some complexity, so
+there had better be a *very* good reason.
+
+The tradeoff between grace-period latency on the one hand and
+interruptions of other CPUs on the other hand may need to be
+re-examined. The desire is of course for zero grace-period latency as
+well as zero interprocessor interrupts undertaken during an expedited
+grace period operation. While this ideal is unlikely to be achievable,
+it is quite possible that further improvements can be made.
+
+The multiprocessor implementations of RCU use a combining tree that
+groups CPUs so as to reduce lock contention and increase cache locality.
+However, this combining tree does not spread its memory across NUMA
+nodes nor does it align the CPU groups with hardware features such as
+sockets or cores. Such spreading and alignment is currently believed to
+be unnecessary because the hotpath read-side primitives do not access
+the combining tree, nor does ``call_rcu()`` in the common case. If you
+believe that your architecture needs such spreading and alignment, then
+your architecture should also benefit from the
+``rcutree.rcu_fanout_leaf`` boot parameter, which can be set to the
+number of CPUs in a socket, NUMA node, or whatever. If the number of
+CPUs is too large, use a fraction of the number of CPUs. If the number
+of CPUs is a large prime number, well, that certainly is an
+“interesting” architectural choice! More flexible arrangements might be
+considered, but only if ``rcutree.rcu_fanout_leaf`` has proven
+inadequate, and only if the inadequacy has been demonstrated by a
+carefully run and realistic system-level workload.
+
+Please note that arrangements that require RCU to remap CPU numbers will
+require extremely good demonstration of need and full exploration of
+alternatives.
+
+RCU's various kthreads are reasonably recent additions. It is quite
+likely that adjustments will be required to more gracefully handle
+extreme loads. It might also be necessary to be able to relate CPU
+utilization by RCU's kthreads and softirq handlers to the code that
+instigated this CPU utilization. For example, RCU callback overhead
+might be charged back to the originating ``call_rcu()`` instance, though
+probably not in production kernels.
+
+Additional work may be required to provide reasonable forward-progress
+guarantees under heavy load for grace periods and for callback
+invocation.
+
+Summary
+-------
+
+This document has presented more than two decade's worth of RCU
+requirements. Given that the requirements keep changing, this will not
+be the last word on this subject, but at least it serves to get an
+important subset of the requirements set forth.
+
+Acknowledgments
+---------------
+
+I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar, Oleg
+Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and Andy
+Lutomirski for their help in rendering this article human readable, and
+to Michelle Rankin for her support of this effort. Other contributions
+are acknowledged in the Linux kernel's git archive.
diff --git a/Documentation/RCU/NMI-RCU.rst b/Documentation/RCU/NMI-RCU.rst
new file mode 100644
index 000000000000..180958388ff9
--- /dev/null
+++ b/Documentation/RCU/NMI-RCU.rst
@@ -0,0 +1,124 @@
+.. _NMI_rcu_doc:
+
+Using RCU to Protect Dynamic NMI Handlers
+=========================================
+
+
+Although RCU is usually used to protect read-mostly data structures,
+it is possible to use RCU to provide dynamic non-maskable interrupt
+handlers, as well as dynamic irq handlers.  This document describes
+how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer
+work in "arch/x86/oprofile/nmi_timer_int.c" and in
+"arch/x86/kernel/traps.c".
+
+The relevant pieces of code are listed below, each followed by a
+brief explanation::
+
+	static int dummy_nmi_callback(struct pt_regs *regs, int cpu)
+	{
+		return 0;
+	}
+
+The dummy_nmi_callback() function is a "dummy" NMI handler that does
+nothing, but returns zero, thus saying that it did nothing, allowing
+the NMI handler to take the default machine-specific action::
+
+	static nmi_callback_t nmi_callback = dummy_nmi_callback;
+
+This nmi_callback variable is a global function pointer to the current
+NMI handler::
+
+	void do_nmi(struct pt_regs * regs, long error_code)
+	{
+		int cpu;
+
+		nmi_enter();
+
+		cpu = smp_processor_id();
+		++nmi_count(cpu);
+
+		if (!rcu_dereference_sched(nmi_callback)(regs, cpu))
+			default_do_nmi(regs);
+
+		nmi_exit();
+	}
+
+The do_nmi() function processes each NMI.  It first disables preemption
+in the same way that a hardware irq would, then increments the per-CPU
+count of NMIs.  It then invokes the NMI handler stored in the nmi_callback
+function pointer.  If this handler returns zero, do_nmi() invokes the
+default_do_nmi() function to handle a machine-specific NMI.  Finally,
+preemption is restored.
+
+In theory, rcu_dereference_sched() is not needed, since this code runs
+only on i386, which in theory does not need rcu_dereference_sched()
+anyway.  However, in practice it is a good documentation aid, particularly
+for anyone attempting to do something similar on Alpha or on systems
+with aggressive optimizing compilers.
+
+Quick Quiz:
+		Why might the rcu_dereference_sched() be necessary on Alpha, given that the code referenced by the pointer is read-only?
+
+:ref:`Answer to Quick Quiz <answer_quick_quiz_NMI>`
+
+Back to the discussion of NMI and RCU::
+
+	void set_nmi_callback(nmi_callback_t callback)
+	{
+		rcu_assign_pointer(nmi_callback, callback);
+	}
+
+The set_nmi_callback() function registers an NMI handler.  Note that any
+data that is to be used by the callback must be initialized up -before-
+the call to set_nmi_callback().  On architectures that do not order
+writes, the rcu_assign_pointer() ensures that the NMI handler sees the
+initialized values::
+
+	void unset_nmi_callback(void)
+	{
+		rcu_assign_pointer(nmi_callback, dummy_nmi_callback);
+	}
+
+This function unregisters an NMI handler, restoring the original
+dummy_nmi_handler().  However, there may well be an NMI handler
+currently executing on some other CPU.  We therefore cannot free
+up any data structures used by the old NMI handler until execution
+of it completes on all other CPUs.
+
+One way to accomplish this is via synchronize_rcu(), perhaps as
+follows::
+
+	unset_nmi_callback();
+	synchronize_rcu();
+	kfree(my_nmi_data);
+
+This works because (as of v4.20) synchronize_rcu() blocks until all
+CPUs complete any preemption-disabled segments of code that they were
+executing.
+Since NMI handlers disable preemption, synchronize_rcu() is guaranteed
+not to return until all ongoing NMI handlers exit.  It is therefore safe
+to free up the handler's data as soon as synchronize_rcu() returns.
+
+Important note: for this to work, the architecture in question must
+invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively.
+
+.. _answer_quick_quiz_NMI:
+
+Answer to Quick Quiz:
+	Why might the rcu_dereference_sched() be necessary on Alpha, given that the code referenced by the pointer is read-only?
+
+	The caller to set_nmi_callback() might well have
+	initialized some data that is to be used by the new NMI
+	handler.  In this case, the rcu_dereference_sched() would
+	be needed, because otherwise a CPU that received an NMI
+	just after the new handler was set might see the pointer
+	to the new NMI handler, but the old pre-initialized
+	version of the handler's data.
+
+	This same sad story can happen on other CPUs when using
+	a compiler with aggressive pointer-value speculation
+	optimizations.
+
+	More important, the rcu_dereference_sched() makes it
+	clear to someone reading the code that the pointer is
+	being protected by RCU-sched.
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
deleted file mode 100644
index 881353fd5bff..000000000000
--- a/Documentation/RCU/NMI-RCU.txt
+++ /dev/null
@@ -1,121 +0,0 @@
-Using RCU to Protect Dynamic NMI Handlers
-
-
-Although RCU is usually used to protect read-mostly data structures,
-it is possible to use RCU to provide dynamic non-maskable interrupt
-handlers, as well as dynamic irq handlers.  This document describes
-how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer
-work in "arch/x86/oprofile/nmi_timer_int.c" and in
-"arch/x86/kernel/traps.c".
-
-The relevant pieces of code are listed below, each followed by a
-brief explanation.
-
-	static int dummy_nmi_callback(struct pt_regs *regs, int cpu)
-	{
-		return 0;
-	}
-
-The dummy_nmi_callback() function is a "dummy" NMI handler that does
-nothing, but returns zero, thus saying that it did nothing, allowing
-the NMI handler to take the default machine-specific action.
-
-	static nmi_callback_t nmi_callback = dummy_nmi_callback;
-
-This nmi_callback variable is a global function pointer to the current
-NMI handler.
-
-	void do_nmi(struct pt_regs * regs, long error_code)
-	{
-		int cpu;
-
-		nmi_enter();
-
-		cpu = smp_processor_id();
-		++nmi_count(cpu);
-
-		if (!rcu_dereference_sched(nmi_callback)(regs, cpu))
-			default_do_nmi(regs);
-
-		nmi_exit();
-	}
-
-The do_nmi() function processes each NMI.  It first disables preemption
-in the same way that a hardware irq would, then increments the per-CPU
-count of NMIs.  It then invokes the NMI handler stored in the nmi_callback
-function pointer.  If this handler returns zero, do_nmi() invokes the
-default_do_nmi() function to handle a machine-specific NMI.  Finally,
-preemption is restored.
-
-In theory, rcu_dereference_sched() is not needed, since this code runs
-only on i386, which in theory does not need rcu_dereference_sched()
-anyway.  However, in practice it is a good documentation aid, particularly
-for anyone attempting to do something similar on Alpha or on systems
-with aggressive optimizing compilers.
-
-Quick Quiz:  Why might the rcu_dereference_sched() be necessary on Alpha,
-	     given that the code referenced by the pointer is read-only?
-
-
-Back to the discussion of NMI and RCU...
-
-	void set_nmi_callback(nmi_callback_t callback)
-	{
-		rcu_assign_pointer(nmi_callback, callback);
-	}
-
-The set_nmi_callback() function registers an NMI handler.  Note that any
-data that is to be used by the callback must be initialized up -before-
-the call to set_nmi_callback().  On architectures that do not order
-writes, the rcu_assign_pointer() ensures that the NMI handler sees the
-initialized values.
-
-	void unset_nmi_callback(void)
-	{
-		rcu_assign_pointer(nmi_callback, dummy_nmi_callback);
-	}
-
-This function unregisters an NMI handler, restoring the original
-dummy_nmi_handler().  However, there may well be an NMI handler
-currently executing on some other CPU.  We therefore cannot free
-up any data structures used by the old NMI handler until execution
-of it completes on all other CPUs.
-
-One way to accomplish this is via synchronize_rcu(), perhaps as
-follows:
-
-	unset_nmi_callback();
-	synchronize_rcu();
-	kfree(my_nmi_data);
-
-This works because (as of v4.20) synchronize_rcu() blocks until all
-CPUs complete any preemption-disabled segments of code that they were
-executing.
-Since NMI handlers disable preemption, synchronize_rcu() is guaranteed
-not to return until all ongoing NMI handlers exit.  It is therefore safe
-to free up the handler's data as soon as synchronize_rcu() returns.
-
-Important note: for this to work, the architecture in question must
-invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively.
-
-
-Answer to Quick Quiz
-
-	Why might the rcu_dereference_sched() be necessary on Alpha, given
-	that the code referenced by the pointer is read-only?
-
-	Answer: The caller to set_nmi_callback() might well have
-		initialized some data that is to be used by the new NMI
-		handler.  In this case, the rcu_dereference_sched() would
-		be needed, because otherwise a CPU that received an NMI
-		just after the new handler was set might see the pointer
-		to the new NMI handler, but the old pre-initialized
-		version of the handler's data.
-
-		This same sad story can happen on other CPUs when using
-		a compiler with aggressive pointer-value speculation
-		optimizations.
-
-		More important, the rcu_dereference_sched() makes it
-		clear to someone reading the code that the pointer is
-		being protected by RCU-sched.
diff --git a/Documentation/RCU/arrayRCU.rst b/Documentation/RCU/arrayRCU.rst
new file mode 100644
index 000000000000..4051ea3871ef
--- /dev/null
+++ b/Documentation/RCU/arrayRCU.rst
@@ -0,0 +1,165 @@
+.. _array_rcu_doc:
+
+Using RCU to Protect Read-Mostly Arrays
+=======================================
+
+Although RCU is more commonly used to protect linked lists, it can
+also be used to protect arrays.  Three situations are as follows:
+
+1.  :ref:`Hash Tables <hash_tables>`
+
+2.  :ref:`Static Arrays <static_arrays>`
+
+3.  :ref:`Resizable Arrays <resizable_arrays>`
+
+Each of these three situations involves an RCU-protected pointer to an
+array that is separately indexed.  It might be tempting to consider use
+of RCU to instead protect the index into an array, however, this use
+case is **not** supported.  The problem with RCU-protected indexes into
+arrays is that compilers can play way too many optimization games with
+integers, which means that the rules governing handling of these indexes
+are far more trouble than they are worth.  If RCU-protected indexes into
+arrays prove to be particularly valuable (which they have not thus far),
+explicit cooperation from the compiler will be required to permit them
+to be safely used.
+
+That aside, each of the three RCU-protected pointer situations are
+described in the following sections.
+
+.. _hash_tables:
+
+Situation 1: Hash Tables
+------------------------
+
+Hash tables are often implemented as an array, where each array entry
+has a linked-list hash chain.  Each hash chain can be protected by RCU
+as described in the listRCU.txt document.  This approach also applies
+to other array-of-list situations, such as radix trees.
+
+.. _static_arrays:
+
+Situation 2: Static Arrays
+--------------------------
+
+Static arrays, where the data (rather than a pointer to the data) is
+located in each array element, and where the array is never resized,
+have not been used with RCU.  Rik van Riel recommends using seqlock in
+this situation, which would also have minimal read-side overhead as long
+as updates are rare.
+
+Quick Quiz:
+		Why is it so important that updates be rare when using seqlock?
+
+:ref:`Answer to Quick Quiz <answer_quick_quiz_seqlock>`
+
+.. _resizable_arrays:
+
+Situation 3: Resizable Arrays
+------------------------------
+
+Use of RCU for resizable arrays is demonstrated by the grow_ary()
+function formerly used by the System V IPC code.  The array is used
+to map from semaphore, message-queue, and shared-memory IDs to the data
+structure that represents the corresponding IPC construct.  The grow_ary()
+function does not acquire any locks; instead its caller must hold the
+ids->sem semaphore.
+
+The grow_ary() function, shown below, does some limit checks, allocates a
+new ipc_id_ary, copies the old to the new portion of the new, initializes
+the remainder of the new, updates the ids->entries pointer to point to
+the new array, and invokes ipc_rcu_putref() to free up the old array.
+Note that rcu_assign_pointer() is used to update the ids->entries pointer,
+which includes any memory barriers required on whatever architecture
+you are running on::
+
+	static int grow_ary(struct ipc_ids* ids, int newsize)
+	{
+		struct ipc_id_ary* new;
+		struct ipc_id_ary* old;
+		int i;
+		int size = ids->entries->size;
+
+		if(newsize > IPCMNI)
+			newsize = IPCMNI;
+		if(newsize <= size)
+			return newsize;
+
+		new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
+				    sizeof(struct ipc_id_ary));
+		if(new == NULL)
+			return size;
+		new->size = newsize;
+		memcpy(new->p, ids->entries->p,
+		       sizeof(struct kern_ipc_perm *)*size +
+		       sizeof(struct ipc_id_ary));
+		for(i=size;i<newsize;i++) {
+			new->p[i] = NULL;
+		}
+		old = ids->entries;
+
+		/*
+		 * Use rcu_assign_pointer() to make sure the memcpyed
+		 * contents of the new array are visible before the new
+		 * array becomes visible.
+		 */
+		rcu_assign_pointer(ids->entries, new);
+
+		ipc_rcu_putref(old);
+		return newsize;
+	}
+
+The ipc_rcu_putref() function decrements the array's reference count
+and then, if the reference count has dropped to zero, uses call_rcu()
+to free the array after a grace period has elapsed.
+
+The array is traversed by the ipc_lock() function.  This function
+indexes into the array under the protection of rcu_read_lock(),
+using rcu_dereference() to pick up the pointer to the array so
+that it may later safely be dereferenced -- memory barriers are
+required on the Alpha CPU.  Since the size of the array is stored
+with the array itself, there can be no array-size mismatches, so
+a simple check suffices.  The pointer to the structure corresponding
+to the desired IPC object is placed in "out", with NULL indicating
+a non-existent entry.  After acquiring "out->lock", the "out->deleted"
+flag indicates whether the IPC object is in the process of being
+deleted, and, if not, the pointer is returned::
+
+	struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
+	{
+		struct kern_ipc_perm* out;
+		int lid = id % SEQ_MULTIPLIER;
+		struct ipc_id_ary* entries;
+
+		rcu_read_lock();
+		entries = rcu_dereference(ids->entries);
+		if(lid >= entries->size) {
+			rcu_read_unlock();
+			return NULL;
+		}
+		out = entries->p[lid];
+		if(out == NULL) {
+			rcu_read_unlock();
+			return NULL;
+		}
+		spin_lock(&out->lock);
+
+		/* ipc_rmid() may have already freed the ID while ipc_lock
+		 * was spinning: here verify that the structure is still valid
+		 */
+		if (out->deleted) {
+			spin_unlock(&out->lock);
+			rcu_read_unlock();
+			return NULL;
+		}
+		return out;
+	}
+
+.. _answer_quick_quiz_seqlock:
+
+Answer to Quick Quiz:
+	Why is it so important that updates be rare when using seqlock?
+
+	The reason that it is important that updates be rare when
+	using seqlock is that frequent updates can livelock readers.
+	One way to avoid this problem is to assign a seqlock for
+	each array entry rather than to the entire array.
diff --git a/Documentation/RCU/arrayRCU.txt b/Documentation/RCU/arrayRCU.txt
deleted file mode 100644
index f05a9afb2c39..000000000000
--- a/Documentation/RCU/arrayRCU.txt
+++ /dev/null
@@ -1,153 +0,0 @@
-Using RCU to Protect Read-Mostly Arrays
-
-
-Although RCU is more commonly used to protect linked lists, it can
-also be used to protect arrays.  Three situations are as follows:
-
-1.  Hash Tables
-
-2.  Static Arrays
-
-3.  Resizeable Arrays
-
-Each of these three situations involves an RCU-protected pointer to an
-array that is separately indexed.  It might be tempting to consider use
-of RCU to instead protect the index into an array, however, this use
-case is -not- supported.  The problem with RCU-protected indexes into
-arrays is that compilers can play way too many optimization games with
-integers, which means that the rules governing handling of these indexes
-are far more trouble than they are worth.  If RCU-protected indexes into
-arrays prove to be particularly valuable (which they have not thus far),
-explicit cooperation from the compiler will be required to permit them
-to be safely used.
-
-That aside, each of the three RCU-protected pointer situations are
-described in the following sections.
-
-
-Situation 1: Hash Tables
-
-Hash tables are often implemented as an array, where each array entry
-has a linked-list hash chain.  Each hash chain can be protected by RCU
-as described in the listRCU.txt document.  This approach also applies
-to other array-of-list situations, such as radix trees.
-
-
-Situation 2: Static Arrays
-
-Static arrays, where the data (rather than a pointer to the data) is
-located in each array element, and where the array is never resized,
-have not been used with RCU.  Rik van Riel recommends using seqlock in
-this situation, which would also have minimal read-side overhead as long
-as updates are rare.
-
-Quick Quiz:  Why is it so important that updates be rare when
-	     using seqlock?
-
-
-Situation 3: Resizeable Arrays
-
-Use of RCU for resizeable arrays is demonstrated by the grow_ary()
-function formerly used by the System V IPC code.  The array is used
-to map from semaphore, message-queue, and shared-memory IDs to the data
-structure that represents the corresponding IPC construct.  The grow_ary()
-function does not acquire any locks; instead its caller must hold the
-ids->sem semaphore.
-
-The grow_ary() function, shown below, does some limit checks, allocates a
-new ipc_id_ary, copies the old to the new portion of the new, initializes
-the remainder of the new, updates the ids->entries pointer to point to
-the new array, and invokes ipc_rcu_putref() to free up the old array.
-Note that rcu_assign_pointer() is used to update the ids->entries pointer,
-which includes any memory barriers required on whatever architecture
-you are running on.
-
-	static int grow_ary(struct ipc_ids* ids, int newsize)
-	{
-		struct ipc_id_ary* new;
-		struct ipc_id_ary* old;
-		int i;
-		int size = ids->entries->size;
-
-		if(newsize > IPCMNI)
-			newsize = IPCMNI;
-		if(newsize <= size)
-			return newsize;
-
-		new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
-				    sizeof(struct ipc_id_ary));
-		if(new == NULL)
-			return size;
-		new->size = newsize;
-		memcpy(new->p, ids->entries->p,
-		       sizeof(struct kern_ipc_perm *)*size +
-		       sizeof(struct ipc_id_ary));
-		for(i=size;i<newsize;i++) {
-			new->p[i] = NULL;
-		}
-		old = ids->entries;
-
-		/*
-		 * Use rcu_assign_pointer() to make sure the memcpyed
-		 * contents of the new array are visible before the new
-		 * array becomes visible.
-		 */
-		rcu_assign_pointer(ids->entries, new);
-
-		ipc_rcu_putref(old);
-		return newsize;
-	}
-
-The ipc_rcu_putref() function decrements the array's reference count
-and then, if the reference count has dropped to zero, uses call_rcu()
-to free the array after a grace period has elapsed.
-
-The array is traversed by the ipc_lock() function.  This function
-indexes into the array under the protection of rcu_read_lock(),
-using rcu_dereference() to pick up the pointer to the array so
-that it may later safely be dereferenced -- memory barriers are
-required on the Alpha CPU.  Since the size of the array is stored
-with the array itself, there can be no array-size mismatches, so
-a simple check suffices.  The pointer to the structure corresponding
-to the desired IPC object is placed in "out", with NULL indicating
-a non-existent entry.  After acquiring "out->lock", the "out->deleted"
-flag indicates whether the IPC object is in the process of being
-deleted, and, if not, the pointer is returned.
-
-	struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
-	{
-		struct kern_ipc_perm* out;
-		int lid = id % SEQ_MULTIPLIER;
-		struct ipc_id_ary* entries;
-
-		rcu_read_lock();
-		entries = rcu_dereference(ids->entries);
-		if(lid >= entries->size) {
-			rcu_read_unlock();
-			return NULL;
-		}
-		out = entries->p[lid];
-		if(out == NULL) {
-			rcu_read_unlock();
-			return NULL;
-		}
-		spin_lock(&out->lock);
-
-		/* ipc_rmid() may have already freed the ID while ipc_lock
-		 * was spinning: here verify that the structure is still valid
-		 */
-		if (out->deleted) {
-			spin_unlock(&out->lock);
-			rcu_read_unlock();
-			return NULL;
-		}
-		return out;
-	}
-
-
-Answer to Quick Quiz:
-
-	The reason that it is important that updates be rare when
-	using seqlock is that frequent updates can livelock readers.
-	One way to avoid this problem is to assign a seqlock for
-	each array entry rather than to the entire array.
diff --git a/Documentation/RCU/index.rst b/Documentation/RCU/index.rst
index 340a9725676c..81a0a1e5f767 100644
--- a/Documentation/RCU/index.rst
+++ b/Documentation/RCU/index.rst
@@ -5,12 +5,22 @@ RCU concepts
 ============
 
 .. toctree::
-   :maxdepth: 1
+   :maxdepth: 3
 
+   arrayRCU
+   rcubarrier
+   rcu_dereference
+   whatisRCU
    rcu
    listRCU
+   NMI-RCU
    UP
 
+   Design/Memory-Ordering/Tree-RCU-Memory-Ordering
+   Design/Expedited-Grace-Periods/Expedited-Grace-Periods
+   Design/Requirements/Requirements
+   Design/Data-Structures/Data-Structures
+
 .. only:: subproject and html
 
    Indices
diff --git a/Documentation/RCU/lockdep-splat.txt b/Documentation/RCU/lockdep-splat.txt
index 9c015976b174..b8096316fd11 100644
--- a/Documentation/RCU/lockdep-splat.txt
+++ b/Documentation/RCU/lockdep-splat.txt
@@ -99,7 +99,7 @@ With this change, the rcu_dereference() is always within an RCU
 read-side critical section, which again would have suppressed the
 above lockdep-RCU splat.
 
-But in this particular case, we don't actually deference the pointer
+But in this particular case, we don't actually dereference the pointer
 returned from rcu_dereference().  Instead, that pointer is just compared
 to the cic pointer, which means that the rcu_dereference() can be replaced
 by rcu_access_pointer() as follows:
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
index da51d3068850..89db949eeca0 100644
--- a/Documentation/RCU/lockdep.txt
+++ b/Documentation/RCU/lockdep.txt
@@ -96,7 +96,17 @@ other flavors of rcu_dereference().  On the other hand, it is illegal
 to use rcu_dereference_protected() if either the RCU-protected pointer
 or the RCU-protected data that it points to can change concurrently.
 
-There are currently only "universal" versions of the rcu_assign_pointer()
-and RCU list-/tree-traversal primitives, which do not (yet) check for
-being in an RCU read-side critical section.  In the future, separate
-versions of these primitives might be created.
+Like rcu_dereference(), when lockdep is enabled, RCU list and hlist
+traversal primitives check for being called from within an RCU read-side
+critical section.  However, a lockdep expression can be passed to them
+as a additional optional argument.  With this lockdep expression, these
+traversal primitives will complain only if the lockdep expression is
+false and they are called from outside any RCU read-side critical section.
+
+For example, the workqueue for_each_pwq() macro is intended to be used
+either within an RCU read-side critical section or with wq->mutex held.
+It is thus implemented as follows:
+
+	#define for_each_pwq(pwq, wq)
+		list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,
+					lock_is_held(&(wq->mutex).dep_map))
diff --git a/Documentation/RCU/rcu_dereference.rst b/Documentation/RCU/rcu_dereference.rst
new file mode 100644
index 000000000000..c9667eb0d444
--- /dev/null
+++ b/Documentation/RCU/rcu_dereference.rst
@@ -0,0 +1,463 @@
+.. _rcu_dereference_doc:
+
+PROPER CARE AND FEEDING OF RETURN VALUES FROM rcu_dereference()
+===============================================================
+
+Most of the time, you can use values from rcu_dereference() or one of
+the similar primitives without worries.  Dereferencing (prefix "*"),
+field selection ("->"), assignment ("="), address-of ("&"), addition and
+subtraction of constants, and casts all work quite naturally and safely.
+
+It is nevertheless possible to get into trouble with other operations.
+Follow these rules to keep your RCU code working properly:
+
+-	You must use one of the rcu_dereference() family of primitives
+	to load an RCU-protected pointer, otherwise CONFIG_PROVE_RCU
+	will complain.  Worse yet, your code can see random memory-corruption
+	bugs due to games that compilers and DEC Alpha can play.
+	Without one of the rcu_dereference() primitives, compilers
+	can reload the value, and won't your code have fun with two
+	different values for a single pointer!  Without rcu_dereference(),
+	DEC Alpha can load a pointer, dereference that pointer, and
+	return data preceding initialization that preceded the store of
+	the pointer.
+
+	In addition, the volatile cast in rcu_dereference() prevents the
+	compiler from deducing the resulting pointer value.  Please see
+	the section entitled "EXAMPLE WHERE THE COMPILER KNOWS TOO MUCH"
+	for an example where the compiler can in fact deduce the exact
+	value of the pointer, and thus cause misordering.
+
+-	You are only permitted to use rcu_dereference on pointer values.
+	The compiler simply knows too much about integral values to
+	trust it to carry dependencies through integer operations.
+	There are a very few exceptions, namely that you can temporarily
+	cast the pointer to uintptr_t in order to:
+
+	-	Set bits and clear bits down in the must-be-zero low-order
+		bits of that pointer.  This clearly means that the pointer
+		must have alignment constraints, for example, this does
+		-not- work in general for char* pointers.
+
+	-	XOR bits to translate pointers, as is done in some
+		classic buddy-allocator algorithms.
+
+	It is important to cast the value back to pointer before
+	doing much of anything else with it.
+
+-	Avoid cancellation when using the "+" and "-" infix arithmetic
+	operators.  For example, for a given variable "x", avoid
+	"(x-(uintptr_t)x)" for char* pointers.	The compiler is within its
+	rights to substitute zero for this sort of expression, so that
+	subsequent accesses no longer depend on the rcu_dereference(),
+	again possibly resulting in bugs due to misordering.
+
+	Of course, if "p" is a pointer from rcu_dereference(), and "a"
+	and "b" are integers that happen to be equal, the expression
+	"p+a-b" is safe because its value still necessarily depends on
+	the rcu_dereference(), thus maintaining proper ordering.
+
+-	If you are using RCU to protect JITed functions, so that the
+	"()" function-invocation operator is applied to a value obtained
+	(directly or indirectly) from rcu_dereference(), you may need to
+	interact directly with the hardware to flush instruction caches.
+	This issue arises on some systems when a newly JITed function is
+	using the same memory that was used by an earlier JITed function.
+
+-	Do not use the results from relational operators ("==", "!=",
+	">", ">=", "<", or "<=") when dereferencing.  For example,
+	the following (quite strange) code is buggy::
+
+		int *p;
+		int *q;
+
+		...
+
+		p = rcu_dereference(gp)
+		q = &global_q;
+		q += p > &oom_p;
+		r1 = *q;  /* BUGGY!!! */
+
+	As before, the reason this is buggy is that relational operators
+	are often compiled using branches.  And as before, although
+	weak-memory machines such as ARM or PowerPC do order stores
+	after such branches, but can speculate loads, which can again
+	result in misordering bugs.
+
+-	Be very careful about comparing pointers obtained from
+	rcu_dereference() against non-NULL values.  As Linus Torvalds
+	explained, if the two pointers are equal, the compiler could
+	substitute the pointer you are comparing against for the pointer
+	obtained from rcu_dereference().  For example::
+
+		p = rcu_dereference(gp);
+		if (p == &default_struct)
+			do_default(p->a);
+
+	Because the compiler now knows that the value of "p" is exactly
+	the address of the variable "default_struct", it is free to
+	transform this code into the following::
+
+		p = rcu_dereference(gp);
+		if (p == &default_struct)
+			do_default(default_struct.a);
+
+	On ARM and Power hardware, the load from "default_struct.a"
+	can now be speculated, such that it might happen before the
+	rcu_dereference().  This could result in bugs due to misordering.
+
+	However, comparisons are OK in the following cases:
+
+	-	The comparison was against the NULL pointer.  If the
+		compiler knows that the pointer is NULL, you had better
+		not be dereferencing it anyway.  If the comparison is
+		non-equal, the compiler is none the wiser.  Therefore,
+		it is safe to compare pointers from rcu_dereference()
+		against NULL pointers.
+
+	-	The pointer is never dereferenced after being compared.
+		Since there are no subsequent dereferences, the compiler
+		cannot use anything it learned from the comparison
+		to reorder the non-existent subsequent dereferences.
+		This sort of comparison occurs frequently when scanning
+		RCU-protected circular linked lists.
+
+		Note that if checks for being within an RCU read-side
+		critical section are not required and the pointer is never
+		dereferenced, rcu_access_pointer() should be used in place
+		of rcu_dereference().
+
+	-	The comparison is against a pointer that references memory
+		that was initialized "a long time ago."  The reason
+		this is safe is that even if misordering occurs, the
+		misordering will not affect the accesses that follow
+		the comparison.  So exactly how long ago is "a long
+		time ago"?  Here are some possibilities:
+
+		-	Compile time.
+
+		-	Boot time.
+
+		-	Module-init time for module code.
+
+		-	Prior to kthread creation for kthread code.
+
+		-	During some prior acquisition of the lock that
+			we now hold.
+
+		-	Before mod_timer() time for a timer handler.
+
+		There are many other possibilities involving the Linux
+		kernel's wide array of primitives that cause code to
+		be invoked at a later time.
+
+	-	The pointer being compared against also came from
+		rcu_dereference().  In this case, both pointers depend
+		on one rcu_dereference() or another, so you get proper
+		ordering either way.
+
+		That said, this situation can make certain RCU usage
+		bugs more likely to happen.  Which can be a good thing,
+		at least if they happen during testing.  An example
+		of such an RCU usage bug is shown in the section titled
+		"EXAMPLE OF AMPLIFIED RCU-USAGE BUG".
+
+	-	All of the accesses following the comparison are stores,
+		so that a control dependency preserves the needed ordering.
+		That said, it is easy to get control dependencies wrong.
+		Please see the "CONTROL DEPENDENCIES" section of
+		Documentation/memory-barriers.txt for more details.
+
+	-	The pointers are not equal -and- the compiler does
+		not have enough information to deduce the value of the
+		pointer.  Note that the volatile cast in rcu_dereference()
+		will normally prevent the compiler from knowing too much.
+
+		However, please note that if the compiler knows that the
+		pointer takes on only one of two values, a not-equal
+		comparison will provide exactly the information that the
+		compiler needs to deduce the value of the pointer.
+
+-	Disable any value-speculation optimizations that your compiler
+	might provide, especially if you are making use of feedback-based
+	optimizations that take data collected from prior runs.  Such
+	value-speculation optimizations reorder operations by design.
+
+	There is one exception to this rule:  Value-speculation
+	optimizations that leverage the branch-prediction hardware are
+	safe on strongly ordered systems (such as x86), but not on weakly
+	ordered systems (such as ARM or Power).  Choose your compiler
+	command-line options wisely!
+
+
+EXAMPLE OF AMPLIFIED RCU-USAGE BUG
+----------------------------------
+
+Because updaters can run concurrently with RCU readers, RCU readers can
+see stale and/or inconsistent values.  If RCU readers need fresh or
+consistent values, which they sometimes do, they need to take proper
+precautions.  To see this, consider the following code fragment::
+
+	struct foo {
+		int a;
+		int b;
+		int c;
+	};
+	struct foo *gp1;
+	struct foo *gp2;
+
+	void updater(void)
+	{
+		struct foo *p;
+
+		p = kmalloc(...);
+		if (p == NULL)
+			deal_with_it();
+		p->a = 42;  /* Each field in its own cache line. */
+		p->b = 43;
+		p->c = 44;
+		rcu_assign_pointer(gp1, p);
+		p->b = 143;
+		p->c = 144;
+		rcu_assign_pointer(gp2, p);
+	}
+
+	void reader(void)
+	{
+		struct foo *p;
+		struct foo *q;
+		int r1, r2;
+
+		p = rcu_dereference(gp2);
+		if (p == NULL)
+			return;
+		r1 = p->b;  /* Guaranteed to get 143. */
+		q = rcu_dereference(gp1);  /* Guaranteed non-NULL. */
+		if (p == q) {
+			/* The compiler decides that q->c is same as p->c. */
+			r2 = p->c; /* Could get 44 on weakly order system. */
+		}
+		do_something_with(r1, r2);
+	}
+
+You might be surprised that the outcome (r1 == 143 && r2 == 44) is possible,
+but you should not be.  After all, the updater might have been invoked
+a second time between the time reader() loaded into "r1" and the time
+that it loaded into "r2".  The fact that this same result can occur due
+to some reordering from the compiler and CPUs is beside the point.
+
+But suppose that the reader needs a consistent view?
+
+Then one approach is to use locking, for example, as follows::
+
+	struct foo {
+		int a;
+		int b;
+		int c;
+		spinlock_t lock;
+	};
+	struct foo *gp1;
+	struct foo *gp2;
+
+	void updater(void)
+	{
+		struct foo *p;
+
+		p = kmalloc(...);
+		if (p == NULL)
+			deal_with_it();
+		spin_lock(&p->lock);
+		p->a = 42;  /* Each field in its own cache line. */
+		p->b = 43;
+		p->c = 44;
+		spin_unlock(&p->lock);
+		rcu_assign_pointer(gp1, p);
+		spin_lock(&p->lock);
+		p->b = 143;
+		p->c = 144;
+		spin_unlock(&p->lock);
+		rcu_assign_pointer(gp2, p);
+	}
+
+	void reader(void)
+	{
+		struct foo *p;
+		struct foo *q;
+		int r1, r2;
+
+		p = rcu_dereference(gp2);
+		if (p == NULL)
+			return;
+		spin_lock(&p->lock);
+		r1 = p->b;  /* Guaranteed to get 143. */
+		q = rcu_dereference(gp1);  /* Guaranteed non-NULL. */
+		if (p == q) {
+			/* The compiler decides that q->c is same as p->c. */
+			r2 = p->c; /* Locking guarantees r2 == 144. */
+		}
+		spin_unlock(&p->lock);
+		do_something_with(r1, r2);
+	}
+
+As always, use the right tool for the job!
+
+
+EXAMPLE WHERE THE COMPILER KNOWS TOO MUCH
+-----------------------------------------
+
+If a pointer obtained from rcu_dereference() compares not-equal to some
+other pointer, the compiler normally has no clue what the value of the
+first pointer might be.  This lack of knowledge prevents the compiler
+from carrying out optimizations that otherwise might destroy the ordering
+guarantees that RCU depends on.  And the volatile cast in rcu_dereference()
+should prevent the compiler from guessing the value.
+
+But without rcu_dereference(), the compiler knows more than you might
+expect.  Consider the following code fragment::
+
+	struct foo {
+		int a;
+		int b;
+	};
+	static struct foo variable1;
+	static struct foo variable2;
+	static struct foo *gp = &variable1;
+
+	void updater(void)
+	{
+		initialize_foo(&variable2);
+		rcu_assign_pointer(gp, &variable2);
+		/*
+		 * The above is the only store to gp in this translation unit,
+		 * and the address of gp is not exported in any way.
+		 */
+	}
+
+	int reader(void)
+	{
+		struct foo *p;
+
+		p = gp;
+		barrier();
+		if (p == &variable1)
+			return p->a; /* Must be variable1.a. */
+		else
+			return p->b; /* Must be variable2.b. */
+	}
+
+Because the compiler can see all stores to "gp", it knows that the only
+possible values of "gp" are "variable1" on the one hand and "variable2"
+on the other.  The comparison in reader() therefore tells the compiler
+the exact value of "p" even in the not-equals case.  This allows the
+compiler to make the return values independent of the load from "gp",
+in turn destroying the ordering between this load and the loads of the
+return values.  This can result in "p->b" returning pre-initialization
+garbage values.
+
+In short, rcu_dereference() is -not- optional when you are going to
+dereference the resulting pointer.
+
+
+WHICH MEMBER OF THE rcu_dereference() FAMILY SHOULD YOU USE?
+------------------------------------------------------------
+
+First, please avoid using rcu_dereference_raw() and also please avoid
+using rcu_dereference_check() and rcu_dereference_protected() with a
+second argument with a constant value of 1 (or true, for that matter).
+With that caution out of the way, here is some guidance for which
+member of the rcu_dereference() to use in various situations:
+
+1.	If the access needs to be within an RCU read-side critical
+	section, use rcu_dereference().  With the new consolidated
+	RCU flavors, an RCU read-side critical section is entered
+	using rcu_read_lock(), anything that disables bottom halves,
+	anything that disables interrupts, or anything that disables
+	preemption.
+
+2.	If the access might be within an RCU read-side critical section
+	on the one hand, or protected by (say) my_lock on the other,
+	use rcu_dereference_check(), for example::
+
+		p1 = rcu_dereference_check(p->rcu_protected_pointer,
+					   lockdep_is_held(&my_lock));
+
+
+3.	If the access might be within an RCU read-side critical section
+	on the one hand, or protected by either my_lock or your_lock on
+	the other, again use rcu_dereference_check(), for example::
+
+		p1 = rcu_dereference_check(p->rcu_protected_pointer,
+					   lockdep_is_held(&my_lock) ||
+					   lockdep_is_held(&your_lock));
+
+4.	If the access is on the update side, so that it is always protected
+	by my_lock, use rcu_dereference_protected()::
+
+		p1 = rcu_dereference_protected(p->rcu_protected_pointer,
+					       lockdep_is_held(&my_lock));
+
+	This can be extended to handle multiple locks as in #3 above,
+	and both can be extended to check other conditions as well.
+
+5.	If the protection is supplied by the caller, and is thus unknown
+	to this code, that is the rare case when rcu_dereference_raw()
+	is appropriate.  In addition, rcu_dereference_raw() might be
+	appropriate when the lockdep expression would be excessively
+	complex, except that a better approach in that case might be to
+	take a long hard look at your synchronization design.  Still,
+	there are data-locking cases where any one of a very large number
+	of locks or reference counters suffices to protect the pointer,
+	so rcu_dereference_raw() does have its place.
+
+	However, its place is probably quite a bit smaller than one
+	might expect given the number of uses in the current kernel.
+	Ditto for its synonym, rcu_dereference_check( ... , 1), and
+	its close relative, rcu_dereference_protected(... , 1).
+
+
+SPARSE CHECKING OF RCU-PROTECTED POINTERS
+-----------------------------------------
+
+The sparse static-analysis tool checks for direct access to RCU-protected
+pointers, which can result in "interesting" bugs due to compiler
+optimizations involving invented loads and perhaps also load tearing.
+For example, suppose someone mistakenly does something like this::
+
+	p = q->rcu_protected_pointer;
+	do_something_with(p->a);
+	do_something_else_with(p->b);
+
+If register pressure is high, the compiler might optimize "p" out
+of existence, transforming the code to something like this::
+
+	do_something_with(q->rcu_protected_pointer->a);
+	do_something_else_with(q->rcu_protected_pointer->b);
+
+This could fatally disappoint your code if q->rcu_protected_pointer
+changed in the meantime.  Nor is this a theoretical problem:  Exactly
+this sort of bug cost Paul E. McKenney (and several of his innocent
+colleagues) a three-day weekend back in the early 1990s.
+
+Load tearing could of course result in dereferencing a mashup of a pair
+of pointers, which also might fatally disappoint your code.
+
+These problems could have been avoided simply by making the code instead
+read as follows::
+
+	p = rcu_dereference(q->rcu_protected_pointer);
+	do_something_with(p->a);
+	do_something_else_with(p->b);
+
+Unfortunately, these sorts of bugs can be extremely hard to spot during
+review.  This is where the sparse tool comes into play, along with the
+"__rcu" marker.  If you mark a pointer declaration, whether in a structure
+or as a formal parameter, with "__rcu", which tells sparse to complain if
+this pointer is accessed directly.  It will also cause sparse to complain
+if a pointer not marked with "__rcu" is accessed using rcu_dereference()
+and friends.  For example, ->rcu_protected_pointer might be declared as
+follows::
+
+	struct foo __rcu *rcu_protected_pointer;
+
+Use of "__rcu" is opt-in.  If you choose not to use it, then you should
+ignore the sparse warnings.
diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
deleted file mode 100644
index bf699e8cfc75..000000000000
--- a/Documentation/RCU/rcu_dereference.txt
+++ /dev/null
@@ -1,456 +0,0 @@
-PROPER CARE AND FEEDING OF RETURN VALUES FROM rcu_dereference()
-
-Most of the time, you can use values from rcu_dereference() or one of
-the similar primitives without worries.  Dereferencing (prefix "*"),
-field selection ("->"), assignment ("="), address-of ("&"), addition and
-subtraction of constants, and casts all work quite naturally and safely.
-
-It is nevertheless possible to get into trouble with other operations.
-Follow these rules to keep your RCU code working properly:
-
-o	You must use one of the rcu_dereference() family of primitives
-	to load an RCU-protected pointer, otherwise CONFIG_PROVE_RCU
-	will complain.  Worse yet, your code can see random memory-corruption
-	bugs due to games that compilers and DEC Alpha can play.
-	Without one of the rcu_dereference() primitives, compilers
-	can reload the value, and won't your code have fun with two
-	different values for a single pointer!  Without rcu_dereference(),
-	DEC Alpha can load a pointer, dereference that pointer, and
-	return data preceding initialization that preceded the store of
-	the pointer.
-
-	In addition, the volatile cast in rcu_dereference() prevents the
-	compiler from deducing the resulting pointer value.  Please see
-	the section entitled "EXAMPLE WHERE THE COMPILER KNOWS TOO MUCH"
-	for an example where the compiler can in fact deduce the exact
-	value of the pointer, and thus cause misordering.
-
-o	You are only permitted to use rcu_dereference on pointer values.
-	The compiler simply knows too much about integral values to
-	trust it to carry dependencies through integer operations.
-	There are a very few exceptions, namely that you can temporarily
-	cast the pointer to uintptr_t in order to:
-
-	o	Set bits and clear bits down in the must-be-zero low-order
-		bits of that pointer.  This clearly means that the pointer
-		must have alignment constraints, for example, this does
-		-not- work in general for char* pointers.
-
-	o	XOR bits to translate pointers, as is done in some
-		classic buddy-allocator algorithms.
-
-	It is important to cast the value back to pointer before
-	doing much of anything else with it.
-
-o	Avoid cancellation when using the "+" and "-" infix arithmetic
-	operators.  For example, for a given variable "x", avoid
-	"(x-(uintptr_t)x)" for char* pointers.	The compiler is within its
-	rights to substitute zero for this sort of expression, so that
-	subsequent accesses no longer depend on the rcu_dereference(),
-	again possibly resulting in bugs due to misordering.
-
-	Of course, if "p" is a pointer from rcu_dereference(), and "a"
-	and "b" are integers that happen to be equal, the expression
-	"p+a-b" is safe because its value still necessarily depends on
-	the rcu_dereference(), thus maintaining proper ordering.
-
-o	If you are using RCU to protect JITed functions, so that the
-	"()" function-invocation operator is applied to a value obtained
-	(directly or indirectly) from rcu_dereference(), you may need to
-	interact directly with the hardware to flush instruction caches.
-	This issue arises on some systems when a newly JITed function is
-	using the same memory that was used by an earlier JITed function.
-
-o	Do not use the results from relational operators ("==", "!=",
-	">", ">=", "<", or "<=") when dereferencing.  For example,
-	the following (quite strange) code is buggy:
-
-		int *p;
-		int *q;
-
-		...
-
-		p = rcu_dereference(gp)
-		q = &global_q;
-		q += p > &oom_p;
-		r1 = *q;  /* BUGGY!!! */
-
-	As before, the reason this is buggy is that relational operators
-	are often compiled using branches.  And as before, although
-	weak-memory machines such as ARM or PowerPC do order stores
-	after such branches, but can speculate loads, which can again
-	result in misordering bugs.
-
-o	Be very careful about comparing pointers obtained from
-	rcu_dereference() against non-NULL values.  As Linus Torvalds
-	explained, if the two pointers are equal, the compiler could
-	substitute the pointer you are comparing against for the pointer
-	obtained from rcu_dereference().  For example:
-
-		p = rcu_dereference(gp);
-		if (p == &default_struct)
-			do_default(p->a);
-
-	Because the compiler now knows that the value of "p" is exactly
-	the address of the variable "default_struct", it is free to
-	transform this code into the following:
-
-		p = rcu_dereference(gp);
-		if (p == &default_struct)
-			do_default(default_struct.a);
-
-	On ARM and Power hardware, the load from "default_struct.a"
-	can now be speculated, such that it might happen before the
-	rcu_dereference().  This could result in bugs due to misordering.
-
-	However, comparisons are OK in the following cases:
-
-	o	The comparison was against the NULL pointer.  If the
-		compiler knows that the pointer is NULL, you had better
-		not be dereferencing it anyway.  If the comparison is
-		non-equal, the compiler is none the wiser.  Therefore,
-		it is safe to compare pointers from rcu_dereference()
-		against NULL pointers.
-
-	o	The pointer is never dereferenced after being compared.
-		Since there are no subsequent dereferences, the compiler
-		cannot use anything it learned from the comparison
-		to reorder the non-existent subsequent dereferences.
-		This sort of comparison occurs frequently when scanning
-		RCU-protected circular linked lists.
-
-		Note that if checks for being within an RCU read-side
-		critical section are not required and the pointer is never
-		dereferenced, rcu_access_pointer() should be used in place
-		of rcu_dereference().
-
-	o	The comparison is against a pointer that references memory
-		that was initialized "a long time ago."  The reason
-		this is safe is that even if misordering occurs, the
-		misordering will not affect the accesses that follow
-		the comparison.  So exactly how long ago is "a long
-		time ago"?  Here are some possibilities:
-
-		o	Compile time.
-
-		o	Boot time.
-
-		o	Module-init time for module code.
-
-		o	Prior to kthread creation for kthread code.
-
-		o	During some prior acquisition of the lock that
-			we now hold.
-
-		o	Before mod_timer() time for a timer handler.
-
-		There are many other possibilities involving the Linux
-		kernel's wide array of primitives that cause code to
-		be invoked at a later time.
-
-	o	The pointer being compared against also came from
-		rcu_dereference().  In this case, both pointers depend
-		on one rcu_dereference() or another, so you get proper
-		ordering either way.
-
-		That said, this situation can make certain RCU usage
-		bugs more likely to happen.  Which can be a good thing,
-		at least if they happen during testing.  An example
-		of such an RCU usage bug is shown in the section titled
-		"EXAMPLE OF AMPLIFIED RCU-USAGE BUG".
-
-	o	All of the accesses following the comparison are stores,
-		so that a control dependency preserves the needed ordering.
-		That said, it is easy to get control dependencies wrong.
-		Please see the "CONTROL DEPENDENCIES" section of
-		Documentation/memory-barriers.txt for more details.
-
-	o	The pointers are not equal -and- the compiler does
-		not have enough information to deduce the value of the
-		pointer.  Note that the volatile cast in rcu_dereference()
-		will normally prevent the compiler from knowing too much.
-
-		However, please note that if the compiler knows that the
-		pointer takes on only one of two values, a not-equal
-		comparison will provide exactly the information that the
-		compiler needs to deduce the value of the pointer.
-
-o	Disable any value-speculation optimizations that your compiler
-	might provide, especially if you are making use of feedback-based
-	optimizations that take data collected from prior runs.  Such
-	value-speculation optimizations reorder operations by design.
-
-	There is one exception to this rule:  Value-speculation
-	optimizations that leverage the branch-prediction hardware are
-	safe on strongly ordered systems (such as x86), but not on weakly
-	ordered systems (such as ARM or Power).  Choose your compiler
-	command-line options wisely!
-
-
-EXAMPLE OF AMPLIFIED RCU-USAGE BUG
-
-Because updaters can run concurrently with RCU readers, RCU readers can
-see stale and/or inconsistent values.  If RCU readers need fresh or
-consistent values, which they sometimes do, they need to take proper
-precautions.  To see this, consider the following code fragment:
-
-	struct foo {
-		int a;
-		int b;
-		int c;
-	};
-	struct foo *gp1;
-	struct foo *gp2;
-
-	void updater(void)
-	{
-		struct foo *p;
-
-		p = kmalloc(...);
-		if (p == NULL)
-			deal_with_it();
-		p->a = 42;  /* Each field in its own cache line. */
-		p->b = 43;
-		p->c = 44;
-		rcu_assign_pointer(gp1, p);
-		p->b = 143;
-		p->c = 144;
-		rcu_assign_pointer(gp2, p);
-	}
-
-	void reader(void)
-	{
-		struct foo *p;
-		struct foo *q;
-		int r1, r2;
-
-		p = rcu_dereference(gp2);
-		if (p == NULL)
-			return;
-		r1 = p->b;  /* Guaranteed to get 143. */
-		q = rcu_dereference(gp1);  /* Guaranteed non-NULL. */
-		if (p == q) {
-			/* The compiler decides that q->c is same as p->c. */
-			r2 = p->c; /* Could get 44 on weakly order system. */
-		}
-		do_something_with(r1, r2);
-	}
-
-You might be surprised that the outcome (r1 == 143 && r2 == 44) is possible,
-but you should not be.  After all, the updater might have been invoked
-a second time between the time reader() loaded into "r1" and the time
-that it loaded into "r2".  The fact that this same result can occur due
-to some reordering from the compiler and CPUs is beside the point.
-
-But suppose that the reader needs a consistent view?
-
-Then one approach is to use locking, for example, as follows:
-
-	struct foo {
-		int a;
-		int b;
-		int c;
-		spinlock_t lock;
-	};
-	struct foo *gp1;
-	struct foo *gp2;
-
-	void updater(void)
-	{
-		struct foo *p;
-
-		p = kmalloc(...);
-		if (p == NULL)
-			deal_with_it();
-		spin_lock(&p->lock);
-		p->a = 42;  /* Each field in its own cache line. */
-		p->b = 43;
-		p->c = 44;
-		spin_unlock(&p->lock);
-		rcu_assign_pointer(gp1, p);
-		spin_lock(&p->lock);
-		p->b = 143;
-		p->c = 144;
-		spin_unlock(&p->lock);
-		rcu_assign_pointer(gp2, p);
-	}
-
-	void reader(void)
-	{
-		struct foo *p;
-		struct foo *q;
-		int r1, r2;
-
-		p = rcu_dereference(gp2);
-		if (p == NULL)
-			return;
-		spin_lock(&p->lock);
-		r1 = p->b;  /* Guaranteed to get 143. */
-		q = rcu_dereference(gp1);  /* Guaranteed non-NULL. */
-		if (p == q) {
-			/* The compiler decides that q->c is same as p->c. */
-			r2 = p->c; /* Locking guarantees r2 == 144. */
-		}
-		spin_unlock(&p->lock);
-		do_something_with(r1, r2);
-	}
-
-As always, use the right tool for the job!
-
-
-EXAMPLE WHERE THE COMPILER KNOWS TOO MUCH
-
-If a pointer obtained from rcu_dereference() compares not-equal to some
-other pointer, the compiler normally has no clue what the value of the
-first pointer might be.  This lack of knowledge prevents the compiler
-from carrying out optimizations that otherwise might destroy the ordering
-guarantees that RCU depends on.  And the volatile cast in rcu_dereference()
-should prevent the compiler from guessing the value.
-
-But without rcu_dereference(), the compiler knows more than you might
-expect.  Consider the following code fragment:
-
-	struct foo {
-		int a;
-		int b;
-	};
-	static struct foo variable1;
-	static struct foo variable2;
-	static struct foo *gp = &variable1;
-
-	void updater(void)
-	{
-		initialize_foo(&variable2);
-		rcu_assign_pointer(gp, &variable2);
-		/*
-		 * The above is the only store to gp in this translation unit,
-		 * and the address of gp is not exported in any way.
-		 */
-	}
-
-	int reader(void)
-	{
-		struct foo *p;
-
-		p = gp;
-		barrier();
-		if (p == &variable1)
-			return p->a; /* Must be variable1.a. */
-		else
-			return p->b; /* Must be variable2.b. */
-	}
-
-Because the compiler can see all stores to "gp", it knows that the only
-possible values of "gp" are "variable1" on the one hand and "variable2"
-on the other.  The comparison in reader() therefore tells the compiler
-the exact value of "p" even in the not-equals case.  This allows the
-compiler to make the return values independent of the load from "gp",
-in turn destroying the ordering between this load and the loads of the
-return values.  This can result in "p->b" returning pre-initialization
-garbage values.
-
-In short, rcu_dereference() is -not- optional when you are going to
-dereference the resulting pointer.
-
-
-WHICH MEMBER OF THE rcu_dereference() FAMILY SHOULD YOU USE?
-
-First, please avoid using rcu_dereference_raw() and also please avoid
-using rcu_dereference_check() and rcu_dereference_protected() with a
-second argument with a constant value of 1 (or true, for that matter).
-With that caution out of the way, here is some guidance for which
-member of the rcu_dereference() to use in various situations:
-
-1.	If the access needs to be within an RCU read-side critical
-	section, use rcu_dereference().  With the new consolidated
-	RCU flavors, an RCU read-side critical section is entered
-	using rcu_read_lock(), anything that disables bottom halves,
-	anything that disables interrupts, or anything that disables
-	preemption.
-
-2.	If the access might be within an RCU read-side critical section
-	on the one hand, or protected by (say) my_lock on the other,
-	use rcu_dereference_check(), for example:
-
-		p1 = rcu_dereference_check(p->rcu_protected_pointer,
-					   lockdep_is_held(&my_lock));
-
-
-3.	If the access might be within an RCU read-side critical section
-	on the one hand, or protected by either my_lock or your_lock on
-	the other, again use rcu_dereference_check(), for example:
-
-		p1 = rcu_dereference_check(p->rcu_protected_pointer,
-					   lockdep_is_held(&my_lock) ||
-					   lockdep_is_held(&your_lock));
-
-4.	If the access is on the update side, so that it is always protected
-	by my_lock, use rcu_dereference_protected():
-
-		p1 = rcu_dereference_protected(p->rcu_protected_pointer,
-					       lockdep_is_held(&my_lock));
-
-	This can be extended to handle multiple locks as in #3 above,
-	and both can be extended to check other conditions as well.
-
-5.	If the protection is supplied by the caller, and is thus unknown
-	to this code, that is the rare case when rcu_dereference_raw()
-	is appropriate.  In addition, rcu_dereference_raw() might be
-	appropriate when the lockdep expression would be excessively
-	complex, except that a better approach in that case might be to
-	take a long hard look at your synchronization design.  Still,
-	there are data-locking cases where any one of a very large number
-	of locks or reference counters suffices to protect the pointer,
-	so rcu_dereference_raw() does have its place.
-
-	However, its place is probably quite a bit smaller than one
-	might expect given the number of uses in the current kernel.
-	Ditto for its synonym, rcu_dereference_check( ... , 1), and
-	its close relative, rcu_dereference_protected(... , 1).
-
-
-SPARSE CHECKING OF RCU-PROTECTED POINTERS
-
-The sparse static-analysis tool checks for direct access to RCU-protected
-pointers, which can result in "interesting" bugs due to compiler
-optimizations involving invented loads and perhaps also load tearing.
-For example, suppose someone mistakenly does something like this:
-
-	p = q->rcu_protected_pointer;
-	do_something_with(p->a);
-	do_something_else_with(p->b);
-
-If register pressure is high, the compiler might optimize "p" out
-of existence, transforming the code to something like this:
-
-	do_something_with(q->rcu_protected_pointer->a);
-	do_something_else_with(q->rcu_protected_pointer->b);
-
-This could fatally disappoint your code if q->rcu_protected_pointer
-changed in the meantime.  Nor is this a theoretical problem:  Exactly
-this sort of bug cost Paul E. McKenney (and several of his innocent
-colleagues) a three-day weekend back in the early 1990s.
-
-Load tearing could of course result in dereferencing a mashup of a pair
-of pointers, which also might fatally disappoint your code.
-
-These problems could have been avoided simply by making the code instead
-read as follows:
-
-	p = rcu_dereference(q->rcu_protected_pointer);
-	do_something_with(p->a);
-	do_something_else_with(p->b);
-
-Unfortunately, these sorts of bugs can be extremely hard to spot during
-review.  This is where the sparse tool comes into play, along with the
-"__rcu" marker.  If you mark a pointer declaration, whether in a structure
-or as a formal parameter, with "__rcu", which tells sparse to complain if
-this pointer is accessed directly.  It will also cause sparse to complain
-if a pointer not marked with "__rcu" is accessed using rcu_dereference()
-and friends.  For example, ->rcu_protected_pointer might be declared as
-follows:
-
-	struct foo __rcu *rcu_protected_pointer;
-
-Use of "__rcu" is opt-in.  If you choose not to use it, then you should
-ignore the sparse warnings.
diff --git a/Documentation/RCU/rcubarrier.rst b/Documentation/RCU/rcubarrier.rst
new file mode 100644
index 000000000000..f64f4413a47c
--- /dev/null
+++ b/Documentation/RCU/rcubarrier.rst
@@ -0,0 +1,353 @@
+.. _rcu_barrier:
+
+RCU and Unloadable Modules
+==========================
+
+[Originally published in LWN Jan. 14, 2007: http://lwn.net/Articles/217484/]
+
+RCU (read-copy update) is a synchronization mechanism that can be thought
+of as a replacement for read-writer locking (among other things), but with
+very low-overhead readers that are immune to deadlock, priority inversion,
+and unbounded latency. RCU read-side critical sections are delimited
+by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT
+kernels, generate no code whatsoever.
+
+This means that RCU writers are unaware of the presence of concurrent
+readers, so that RCU updates to shared data must be undertaken quite
+carefully, leaving an old version of the data structure in place until all
+pre-existing readers have finished. These old versions are needed because
+such readers might hold a reference to them. RCU updates can therefore be
+rather expensive, and RCU is thus best suited for read-mostly situations.
+
+How can an RCU writer possibly determine when all readers are finished,
+given that readers might well leave absolutely no trace of their
+presence? There is a synchronize_rcu() primitive that blocks until all
+pre-existing readers have completed. An updater wishing to delete an
+element p from a linked list might do the following, while holding an
+appropriate lock, of course::
+
+	list_del_rcu(p);
+	synchronize_rcu();
+	kfree(p);
+
+But the above code cannot be used in IRQ context -- the call_rcu()
+primitive must be used instead. This primitive takes a pointer to an
+rcu_head struct placed within the RCU-protected data structure and
+another pointer to a function that may be invoked later to free that
+structure. Code to delete an element p from the linked list from IRQ
+context might then be as follows::
+
+	list_del_rcu(p);
+	call_rcu(&p->rcu, p_callback);
+
+Since call_rcu() never blocks, this code can safely be used from within
+IRQ context. The function p_callback() might be defined as follows::
+
+	static void p_callback(struct rcu_head *rp)
+	{
+		struct pstruct *p = container_of(rp, struct pstruct, rcu);
+
+		kfree(p);
+	}
+
+
+Unloading Modules That Use call_rcu()
+-------------------------------------
+
+But what if p_callback is defined in an unloadable module?
+
+If we unload the module while some RCU callbacks are pending,
+the CPUs executing these callbacks are going to be severely
+disappointed when they are later invoked, as fancifully depicted at
+http://lwn.net/images/ns/kernel/rcu-drop.jpg.
+
+We could try placing a synchronize_rcu() in the module-exit code path,
+but this is not sufficient. Although synchronize_rcu() does wait for a
+grace period to elapse, it does not wait for the callbacks to complete.
+
+One might be tempted to try several back-to-back synchronize_rcu()
+calls, but this is still not guaranteed to work. If there is a very
+heavy RCU-callback load, then some of the callbacks might be deferred
+in order to allow other processing to proceed. Such deferral is required
+in realtime kernels in order to avoid excessive scheduling latencies.
+
+
+rcu_barrier()
+-------------
+
+We instead need the rcu_barrier() primitive.  Rather than waiting for
+a grace period to elapse, rcu_barrier() waits for all outstanding RCU
+callbacks to complete.  Please note that rcu_barrier() does **not** imply
+synchronize_rcu(), in particular, if there are no RCU callbacks queued
+anywhere, rcu_barrier() is within its rights to return immediately,
+without waiting for a grace period to elapse.
+
+Pseudo-code using rcu_barrier() is as follows:
+
+   1. Prevent any new RCU callbacks from being posted.
+   2. Execute rcu_barrier().
+   3. Allow the module to be unloaded.
+
+There is also an srcu_barrier() function for SRCU, and you of course
+must match the flavor of rcu_barrier() with that of call_rcu().  If your
+module uses multiple flavors of call_rcu(), then it must also use multiple
+flavors of rcu_barrier() when unloading that module.  For example, if
+it uses call_rcu(), call_srcu() on srcu_struct_1, and call_srcu() on
+srcu_struct_2, then the following three lines of code will be required
+when unloading::
+
+ 1 rcu_barrier();
+ 2 srcu_barrier(&srcu_struct_1);
+ 3 srcu_barrier(&srcu_struct_2);
+
+The rcutorture module makes use of rcu_barrier() in its exit function
+as follows::
+
+ 1  static void
+ 2  rcu_torture_cleanup(void)
+ 3  {
+ 4    int i;
+ 5
+ 6    fullstop = 1;
+ 7    if (shuffler_task != NULL) {
+ 8     VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
+ 9     kthread_stop(shuffler_task);
+ 10   }
+ 11   shuffler_task = NULL;
+ 12
+ 13   if (writer_task != NULL) {
+ 14     VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
+ 15     kthread_stop(writer_task);
+ 16   }
+ 17   writer_task = NULL;
+ 18
+ 19   if (reader_tasks != NULL) {
+ 20     for (i = 0; i < nrealreaders; i++) {
+ 21       if (reader_tasks[i] != NULL) {
+ 22         VERBOSE_PRINTK_STRING(
+ 23           "Stopping rcu_torture_reader task");
+ 24         kthread_stop(reader_tasks[i]);
+ 25       }
+ 26       reader_tasks[i] = NULL;
+ 27     }
+ 28     kfree(reader_tasks);
+ 29     reader_tasks = NULL;
+ 30   }
+ 31   rcu_torture_current = NULL;
+ 32
+ 33   if (fakewriter_tasks != NULL) {
+ 34     for (i = 0; i < nfakewriters; i++) {
+ 35       if (fakewriter_tasks[i] != NULL) {
+ 36         VERBOSE_PRINTK_STRING(
+ 37           "Stopping rcu_torture_fakewriter task");
+ 38         kthread_stop(fakewriter_tasks[i]);
+ 39       }
+ 40       fakewriter_tasks[i] = NULL;
+ 41     }
+ 42     kfree(fakewriter_tasks);
+ 43     fakewriter_tasks = NULL;
+ 44   }
+ 45
+ 46   if (stats_task != NULL) {
+ 47     VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
+ 48     kthread_stop(stats_task);
+ 49   }
+ 50   stats_task = NULL;
+ 51
+ 52   /* Wait for all RCU callbacks to fire. */
+ 53   rcu_barrier();
+ 54
+ 55   rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
+ 56
+ 57   if (cur_ops->cleanup != NULL)
+ 58     cur_ops->cleanup();
+ 59   if (atomic_read(&n_rcu_torture_error))
+ 60     rcu_torture_print_module_parms("End of test: FAILURE");
+ 61   else
+ 62     rcu_torture_print_module_parms("End of test: SUCCESS");
+ 63 }
+
+Line 6 sets a global variable that prevents any RCU callbacks from
+re-posting themselves. This will not be necessary in most cases, since
+RCU callbacks rarely include calls to call_rcu(). However, the rcutorture
+module is an exception to this rule, and therefore needs to set this
+global variable.
+
+Lines 7-50 stop all the kernel tasks associated with the rcutorture
+module. Therefore, once execution reaches line 53, no more rcutorture
+RCU callbacks will be posted. The rcu_barrier() call on line 53 waits
+for any pre-existing callbacks to complete.
+
+Then lines 55-62 print status and do operation-specific cleanup, and
+then return, permitting the module-unload operation to be completed.
+
+.. _rcubarrier_quiz_1:
+
+Quick Quiz #1:
+	Is there any other situation where rcu_barrier() might
+	be required?
+
+:ref:`Answer to Quick Quiz #1 <answer_rcubarrier_quiz_1>`
+
+Your module might have additional complications. For example, if your
+module invokes call_rcu() from timers, you will need to first cancel all
+the timers, and only then invoke rcu_barrier() to wait for any remaining
+RCU callbacks to complete.
+
+Of course, if you module uses call_rcu(), you will need to invoke
+rcu_barrier() before unloading.  Similarly, if your module uses
+call_srcu(), you will need to invoke srcu_barrier() before unloading,
+and on the same srcu_struct structure.  If your module uses call_rcu()
+**and** call_srcu(), then you will need to invoke rcu_barrier() **and**
+srcu_barrier().
+
+
+Implementing rcu_barrier()
+--------------------------
+
+Dipankar Sarma's implementation of rcu_barrier() makes use of the fact
+that RCU callbacks are never reordered once queued on one of the per-CPU
+queues. His implementation queues an RCU callback on each of the per-CPU
+callback queues, and then waits until they have all started executing, at
+which point, all earlier RCU callbacks are guaranteed to have completed.
+
+The original code for rcu_barrier() was as follows::
+
+ 1  void rcu_barrier(void)
+ 2  {
+ 3    BUG_ON(in_interrupt());
+ 4    /* Take cpucontrol mutex to protect against CPU hotplug */
+ 5    mutex_lock(&rcu_barrier_mutex);
+ 6    init_completion(&rcu_barrier_completion);
+ 7    atomic_set(&rcu_barrier_cpu_count, 0);
+ 8    on_each_cpu(rcu_barrier_func, NULL, 0, 1);
+ 9    wait_for_completion(&rcu_barrier_completion);
+ 10   mutex_unlock(&rcu_barrier_mutex);
+ 11 }
+
+Line 3 verifies that the caller is in process context, and lines 5 and 10
+use rcu_barrier_mutex to ensure that only one rcu_barrier() is using the
+global completion and counters at a time, which are initialized on lines
+6 and 7. Line 8 causes each CPU to invoke rcu_barrier_func(), which is
+shown below. Note that the final "1" in on_each_cpu()'s argument list
+ensures that all the calls to rcu_barrier_func() will have completed
+before on_each_cpu() returns. Line 9 then waits for the completion.
+
+This code was rewritten in 2008 and several times thereafter, but this
+still gives the general idea.
+
+The rcu_barrier_func() runs on each CPU, where it invokes call_rcu()
+to post an RCU callback, as follows::
+
+ 1  static void rcu_barrier_func(void *notused)
+ 2  {
+ 3    int cpu = smp_processor_id();
+ 4    struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+ 5    struct rcu_head *head;
+ 6
+ 7    head = &rdp->barrier;
+ 8    atomic_inc(&rcu_barrier_cpu_count);
+ 9    call_rcu(head, rcu_barrier_callback);
+ 10 }
+
+Lines 3 and 4 locate RCU's internal per-CPU rcu_data structure,
+which contains the struct rcu_head that needed for the later call to
+call_rcu(). Line 7 picks up a pointer to this struct rcu_head, and line
+8 increments a global counter. This counter will later be decremented
+by the callback. Line 9 then registers the rcu_barrier_callback() on
+the current CPU's queue.
+
+The rcu_barrier_callback() function simply atomically decrements the
+rcu_barrier_cpu_count variable and finalizes the completion when it
+reaches zero, as follows::
+
+ 1 static void rcu_barrier_callback(struct rcu_head *notused)
+ 2 {
+ 3   if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+ 4     complete(&rcu_barrier_completion);
+ 5 }
+
+.. _rcubarrier_quiz_2:
+
+Quick Quiz #2:
+	What happens if CPU 0's rcu_barrier_func() executes
+	immediately (thus incrementing rcu_barrier_cpu_count to the
+	value one), but the other CPU's rcu_barrier_func() invocations
+	are delayed for a full grace period? Couldn't this result in
+	rcu_barrier() returning prematurely?
+
+:ref:`Answer to Quick Quiz #2 <answer_rcubarrier_quiz_2>`
+
+The current rcu_barrier() implementation is more complex, due to the need
+to avoid disturbing idle CPUs (especially on battery-powered systems)
+and the need to minimally disturb non-idle CPUs in real-time systems.
+However, the code above illustrates the concepts.
+
+
+rcu_barrier() Summary
+---------------------
+
+The rcu_barrier() primitive has seen relatively little use, since most
+code using RCU is in the core kernel rather than in modules. However, if
+you are using RCU from an unloadable module, you need to use rcu_barrier()
+so that your module may be safely unloaded.
+
+
+Answers to Quick Quizzes
+------------------------
+
+.. _answer_rcubarrier_quiz_1:
+
+Quick Quiz #1:
+	Is there any other situation where rcu_barrier() might
+	be required?
+
+Answer: Interestingly enough, rcu_barrier() was not originally
+	implemented for module unloading. Nikita Danilov was using
+	RCU in a filesystem, which resulted in a similar situation at
+	filesystem-unmount time. Dipankar Sarma coded up rcu_barrier()
+	in response, so that Nikita could invoke it during the
+	filesystem-unmount process.
+
+	Much later, yours truly hit the RCU module-unload problem when
+	implementing rcutorture, and found that rcu_barrier() solves
+	this problem as well.
+
+:ref:`Back to Quick Quiz #1 <rcubarrier_quiz_1>`
+
+.. _answer_rcubarrier_quiz_2:
+
+Quick Quiz #2:
+	What happens if CPU 0's rcu_barrier_func() executes
+	immediately (thus incrementing rcu_barrier_cpu_count to the
+	value one), but the other CPU's rcu_barrier_func() invocations
+	are delayed for a full grace period? Couldn't this result in
+	rcu_barrier() returning prematurely?
+
+Answer: This cannot happen. The reason is that on_each_cpu() has its last
+	argument, the wait flag, set to "1". This flag is passed through
+	to smp_call_function() and further to smp_call_function_on_cpu(),
+	causing this latter to spin until the cross-CPU invocation of
+	rcu_barrier_func() has completed. This by itself would prevent
+	a grace period from completing on non-CONFIG_PREEMPT kernels,
+	since each CPU must undergo a context switch (or other quiescent
+	state) before the grace period can complete. However, this is
+	of no use in CONFIG_PREEMPT kernels.
+
+	Therefore, on_each_cpu() disables preemption across its call
+	to smp_call_function() and also across the local call to
+	rcu_barrier_func(). This prevents the local CPU from context
+	switching, again preventing grace periods from completing. This
+	means that all CPUs have executed rcu_barrier_func() before
+	the first rcu_barrier_callback() can possibly execute, in turn
+	preventing rcu_barrier_cpu_count from prematurely reaching zero.
+
+	Currently, -rt implementations of RCU keep but a single global
+	queue for RCU callbacks, and thus do not suffer from this
+	problem. However, when the -rt RCU eventually does have per-CPU
+	callback queues, things will have to change. One simple change
+	is to add an rcu_read_lock() before line 8 of rcu_barrier()
+	and an rcu_read_unlock() after line 8 of this same function. If
+	you can think of a better change, please let me know!
+
+:ref:`Back to Quick Quiz #2 <rcubarrier_quiz_2>`
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
deleted file mode 100644
index a2782df69732..000000000000
--- a/Documentation/RCU/rcubarrier.txt
+++ /dev/null
@@ -1,325 +0,0 @@
-RCU and Unloadable Modules
-
-[Originally published in LWN Jan. 14, 2007: http://lwn.net/Articles/217484/]
-
-RCU (read-copy update) is a synchronization mechanism that can be thought
-of as a replacement for read-writer locking (among other things), but with
-very low-overhead readers that are immune to deadlock, priority inversion,
-and unbounded latency. RCU read-side critical sections are delimited
-by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT
-kernels, generate no code whatsoever.
-
-This means that RCU writers are unaware of the presence of concurrent
-readers, so that RCU updates to shared data must be undertaken quite
-carefully, leaving an old version of the data structure in place until all
-pre-existing readers have finished. These old versions are needed because
-such readers might hold a reference to them. RCU updates can therefore be
-rather expensive, and RCU is thus best suited for read-mostly situations.
-
-How can an RCU writer possibly determine when all readers are finished,
-given that readers might well leave absolutely no trace of their
-presence? There is a synchronize_rcu() primitive that blocks until all
-pre-existing readers have completed. An updater wishing to delete an
-element p from a linked list might do the following, while holding an
-appropriate lock, of course:
-
-	list_del_rcu(p);
-	synchronize_rcu();
-	kfree(p);
-
-But the above code cannot be used in IRQ context -- the call_rcu()
-primitive must be used instead. This primitive takes a pointer to an
-rcu_head struct placed within the RCU-protected data structure and
-another pointer to a function that may be invoked later to free that
-structure. Code to delete an element p from the linked list from IRQ
-context might then be as follows:
-
-	list_del_rcu(p);
-	call_rcu(&p->rcu, p_callback);
-
-Since call_rcu() never blocks, this code can safely be used from within
-IRQ context. The function p_callback() might be defined as follows:
-
-	static void p_callback(struct rcu_head *rp)
-	{
-		struct pstruct *p = container_of(rp, struct pstruct, rcu);
-
-		kfree(p);
-	}
-
-
-Unloading Modules That Use call_rcu()
-
-But what if p_callback is defined in an unloadable module?
-
-If we unload the module while some RCU callbacks are pending,
-the CPUs executing these callbacks are going to be severely
-disappointed when they are later invoked, as fancifully depicted at
-http://lwn.net/images/ns/kernel/rcu-drop.jpg.
-
-We could try placing a synchronize_rcu() in the module-exit code path,
-but this is not sufficient. Although synchronize_rcu() does wait for a
-grace period to elapse, it does not wait for the callbacks to complete.
-
-One might be tempted to try several back-to-back synchronize_rcu()
-calls, but this is still not guaranteed to work. If there is a very
-heavy RCU-callback load, then some of the callbacks might be deferred
-in order to allow other processing to proceed. Such deferral is required
-in realtime kernels in order to avoid excessive scheduling latencies.
-
-
-rcu_barrier()
-
-We instead need the rcu_barrier() primitive.  Rather than waiting for
-a grace period to elapse, rcu_barrier() waits for all outstanding RCU
-callbacks to complete.  Please note that rcu_barrier() does -not- imply
-synchronize_rcu(), in particular, if there are no RCU callbacks queued
-anywhere, rcu_barrier() is within its rights to return immediately,
-without waiting for a grace period to elapse.
-
-Pseudo-code using rcu_barrier() is as follows:
-
-   1. Prevent any new RCU callbacks from being posted.
-   2. Execute rcu_barrier().
-   3. Allow the module to be unloaded.
-
-There is also an srcu_barrier() function for SRCU, and you of course
-must match the flavor of rcu_barrier() with that of call_rcu().  If your
-module uses multiple flavors of call_rcu(), then it must also use multiple
-flavors of rcu_barrier() when unloading that module.  For example, if
-it uses call_rcu(), call_srcu() on srcu_struct_1, and call_srcu() on
-srcu_struct_2(), then the following three lines of code will be required
-when unloading:
-
- 1 rcu_barrier();
- 2 srcu_barrier(&srcu_struct_1);
- 3 srcu_barrier(&srcu_struct_2);
-
-The rcutorture module makes use of rcu_barrier() in its exit function
-as follows:
-
- 1 static void
- 2 rcu_torture_cleanup(void)
- 3 {
- 4   int i;
- 5
- 6   fullstop = 1;
- 7   if (shuffler_task != NULL) {
- 8     VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
- 9     kthread_stop(shuffler_task);
-10   }
-11   shuffler_task = NULL;
-12
-13   if (writer_task != NULL) {
-14     VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
-15     kthread_stop(writer_task);
-16   }
-17   writer_task = NULL;
-18
-19   if (reader_tasks != NULL) {
-20     for (i = 0; i < nrealreaders; i++) {
-21       if (reader_tasks[i] != NULL) {
-22         VERBOSE_PRINTK_STRING(
-23           "Stopping rcu_torture_reader task");
-24         kthread_stop(reader_tasks[i]);
-25       }
-26       reader_tasks[i] = NULL;
-27     }
-28     kfree(reader_tasks);
-29     reader_tasks = NULL;
-30   }
-31   rcu_torture_current = NULL;
-32
-33   if (fakewriter_tasks != NULL) {
-34     for (i = 0; i < nfakewriters; i++) {
-35       if (fakewriter_tasks[i] != NULL) {
-36         VERBOSE_PRINTK_STRING(
-37           "Stopping rcu_torture_fakewriter task");
-38         kthread_stop(fakewriter_tasks[i]);
-39       }
-40       fakewriter_tasks[i] = NULL;
-41     }
-42     kfree(fakewriter_tasks);
-43     fakewriter_tasks = NULL;
-44   }
-45
-46   if (stats_task != NULL) {
-47     VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
-48     kthread_stop(stats_task);
-49   }
-50   stats_task = NULL;
-51
-52   /* Wait for all RCU callbacks to fire. */
-53   rcu_barrier();
-54
-55   rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
-56
-57   if (cur_ops->cleanup != NULL)
-58     cur_ops->cleanup();
-59   if (atomic_read(&n_rcu_torture_error))
-60     rcu_torture_print_module_parms("End of test: FAILURE");
-61   else
-62     rcu_torture_print_module_parms("End of test: SUCCESS");
-63 }
-
-Line 6 sets a global variable that prevents any RCU callbacks from
-re-posting themselves. This will not be necessary in most cases, since
-RCU callbacks rarely include calls to call_rcu(). However, the rcutorture
-module is an exception to this rule, and therefore needs to set this
-global variable.
-
-Lines 7-50 stop all the kernel tasks associated with the rcutorture
-module. Therefore, once execution reaches line 53, no more rcutorture
-RCU callbacks will be posted. The rcu_barrier() call on line 53 waits
-for any pre-existing callbacks to complete.
-
-Then lines 55-62 print status and do operation-specific cleanup, and
-then return, permitting the module-unload operation to be completed.
-
-Quick Quiz #1: Is there any other situation where rcu_barrier() might
-	be required?
-
-Your module might have additional complications. For example, if your
-module invokes call_rcu() from timers, you will need to first cancel all
-the timers, and only then invoke rcu_barrier() to wait for any remaining
-RCU callbacks to complete.
-
-Of course, if you module uses call_rcu(), you will need to invoke
-rcu_barrier() before unloading.  Similarly, if your module uses
-call_srcu(), you will need to invoke srcu_barrier() before unloading,
-and on the same srcu_struct structure.  If your module uses call_rcu()
--and- call_srcu(), then you will need to invoke rcu_barrier() -and-
-srcu_barrier().
-
-
-Implementing rcu_barrier()
-
-Dipankar Sarma's implementation of rcu_barrier() makes use of the fact
-that RCU callbacks are never reordered once queued on one of the per-CPU
-queues. His implementation queues an RCU callback on each of the per-CPU
-callback queues, and then waits until they have all started executing, at
-which point, all earlier RCU callbacks are guaranteed to have completed.
-
-The original code for rcu_barrier() was as follows:
-
- 1 void rcu_barrier(void)
- 2 {
- 3   BUG_ON(in_interrupt());
- 4   /* Take cpucontrol mutex to protect against CPU hotplug */
- 5   mutex_lock(&rcu_barrier_mutex);
- 6   init_completion(&rcu_barrier_completion);
- 7   atomic_set(&rcu_barrier_cpu_count, 0);
- 8   on_each_cpu(rcu_barrier_func, NULL, 0, 1);
- 9   wait_for_completion(&rcu_barrier_completion);
-10   mutex_unlock(&rcu_barrier_mutex);
-11 }
-
-Line 3 verifies that the caller is in process context, and lines 5 and 10
-use rcu_barrier_mutex to ensure that only one rcu_barrier() is using the
-global completion and counters at a time, which are initialized on lines
-6 and 7. Line 8 causes each CPU to invoke rcu_barrier_func(), which is
-shown below. Note that the final "1" in on_each_cpu()'s argument list
-ensures that all the calls to rcu_barrier_func() will have completed
-before on_each_cpu() returns. Line 9 then waits for the completion.
-
-This code was rewritten in 2008 and several times thereafter, but this
-still gives the general idea.
-
-The rcu_barrier_func() runs on each CPU, where it invokes call_rcu()
-to post an RCU callback, as follows:
-
- 1 static void rcu_barrier_func(void *notused)
- 2 {
- 3 int cpu = smp_processor_id();
- 4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
- 5 struct rcu_head *head;
- 6
- 7 head = &rdp->barrier;
- 8 atomic_inc(&rcu_barrier_cpu_count);
- 9 call_rcu(head, rcu_barrier_callback);
-10 }
-
-Lines 3 and 4 locate RCU's internal per-CPU rcu_data structure,
-which contains the struct rcu_head that needed for the later call to
-call_rcu(). Line 7 picks up a pointer to this struct rcu_head, and line
-8 increments a global counter. This counter will later be decremented
-by the callback. Line 9 then registers the rcu_barrier_callback() on
-the current CPU's queue.
-
-The rcu_barrier_callback() function simply atomically decrements the
-rcu_barrier_cpu_count variable and finalizes the completion when it
-reaches zero, as follows:
-
- 1 static void rcu_barrier_callback(struct rcu_head *notused)
- 2 {
- 3 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
- 4 complete(&rcu_barrier_completion);
- 5 }
-
-Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
-	immediately (thus incrementing rcu_barrier_cpu_count to the
-	value one), but the other CPU's rcu_barrier_func() invocations
-	are delayed for a full grace period? Couldn't this result in
-	rcu_barrier() returning prematurely?
-
-The current rcu_barrier() implementation is more complex, due to the need
-to avoid disturbing idle CPUs (especially on battery-powered systems)
-and the need to minimally disturb non-idle CPUs in real-time systems.
-However, the code above illustrates the concepts.
-
-
-rcu_barrier() Summary
-
-The rcu_barrier() primitive has seen relatively little use, since most
-code using RCU is in the core kernel rather than in modules. However, if
-you are using RCU from an unloadable module, you need to use rcu_barrier()
-so that your module may be safely unloaded.
-
-
-Answers to Quick Quizzes
-
-Quick Quiz #1: Is there any other situation where rcu_barrier() might
-	be required?
-
-Answer: Interestingly enough, rcu_barrier() was not originally
-	implemented for module unloading. Nikita Danilov was using
-	RCU in a filesystem, which resulted in a similar situation at
-	filesystem-unmount time. Dipankar Sarma coded up rcu_barrier()
-	in response, so that Nikita could invoke it during the
-	filesystem-unmount process.
-
-	Much later, yours truly hit the RCU module-unload problem when
-	implementing rcutorture, and found that rcu_barrier() solves
-	this problem as well.
-
-Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
-	immediately (thus incrementing rcu_barrier_cpu_count to the
-	value one), but the other CPU's rcu_barrier_func() invocations
-	are delayed for a full grace period? Couldn't this result in
-	rcu_barrier() returning prematurely?
-
-Answer: This cannot happen. The reason is that on_each_cpu() has its last
-	argument, the wait flag, set to "1". This flag is passed through
-	to smp_call_function() and further to smp_call_function_on_cpu(),
-	causing this latter to spin until the cross-CPU invocation of
-	rcu_barrier_func() has completed. This by itself would prevent
-	a grace period from completing on non-CONFIG_PREEMPT kernels,
-	since each CPU must undergo a context switch (or other quiescent
-	state) before the grace period can complete. However, this is
-	of no use in CONFIG_PREEMPT kernels.
-
-	Therefore, on_each_cpu() disables preemption across its call
-	to smp_call_function() and also across the local call to
-	rcu_barrier_func(). This prevents the local CPU from context
-	switching, again preventing grace periods from completing. This
-	means that all CPUs have executed rcu_barrier_func() before
-	the first rcu_barrier_callback() can possibly execute, in turn
-	preventing rcu_barrier_cpu_count from prematurely reaching zero.
-
-	Currently, -rt implementations of RCU keep but a single global
-	queue for RCU callbacks, and thus do not suffer from this
-	problem. However, when the -rt RCU eventually does have per-CPU
-	callback queues, things will have to change. One simple change
-	is to add an rcu_read_lock() before line 8 of rcu_barrier()
-	and an rcu_read_unlock() after line 8 of this same function. If
-	you can think of a better change, please let me know!
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index f48f4621ccbc..a360a8796710 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -225,18 +225,13 @@ an estimate of the total number of RCU callbacks queued across all CPUs
 In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed
 for each CPU:
 
-	0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 Nonlazy posted: ..D
+	0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 dyntick_enabled: 1
 
 The "last_accelerate:" prints the low-order 16 bits (in hex) of the
 jiffies counter when this CPU last invoked rcu_try_advance_all_cbs()
 from rcu_needs_cpu() or last invoked rcu_accelerate_cbs() from
-rcu_prepare_for_idle().  The "Nonlazy posted:" indicates lazy-callback
-status, so that an "l" indicates that all callbacks were lazy at the start
-of the last idle period and an "L" indicates that there are currently
-no non-lazy callbacks (in both cases, "." is printed otherwise, as
-shown above) and "D" indicates that dyntick-idle processing is enabled
-("." is printed otherwise, for example, if disabled via the "nohz="
-kernel boot parameter).
+rcu_prepare_for_idle(). "dyntick_enabled: 1" indicates that dyntick-idle
+processing is enabled.
 
 If the grace period ends just as the stall warning starts printing,
 there will be a spurious stall-warning message, which will include
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
new file mode 100644
index 000000000000..c7f147b8034f
--- /dev/null
+++ b/Documentation/RCU/whatisRCU.rst
@@ -0,0 +1,1154 @@
+.. _whatisrcu_doc:
+
+What is RCU?  --  "Read, Copy, Update"
+======================================
+
+Please note that the "What is RCU?" LWN series is an excellent place
+to start learning about RCU:
+
+| 1.	What is RCU, Fundamentally?  http://lwn.net/Articles/262464/
+| 2.	What is RCU? Part 2: Usage   http://lwn.net/Articles/263130/
+| 3.	RCU part 3: the RCU API      http://lwn.net/Articles/264090/
+| 4.	The RCU API, 2010 Edition    http://lwn.net/Articles/418853/
+| 	2010 Big API Table           http://lwn.net/Articles/419086/
+| 5.	The RCU API, 2014 Edition    http://lwn.net/Articles/609904/
+|	2014 Big API Table           http://lwn.net/Articles/609973/
+
+
+What is RCU?
+
+RCU is a synchronization mechanism that was added to the Linux kernel
+during the 2.5 development effort that is optimized for read-mostly
+situations.  Although RCU is actually quite simple once you understand it,
+getting there can sometimes be a challenge.  Part of the problem is that
+most of the past descriptions of RCU have been written with the mistaken
+assumption that there is "one true way" to describe RCU.  Instead,
+the experience has been that different people must take different paths
+to arrive at an understanding of RCU.  This document provides several
+different paths, as follows:
+
+:ref:`1.	RCU OVERVIEW <1_whatisRCU>`
+
+:ref:`2.	WHAT IS RCU'S CORE API? <2_whatisRCU>`
+
+:ref:`3.	WHAT ARE SOME EXAMPLE USES OF CORE RCU API? <3_whatisRCU>`
+
+:ref:`4.	WHAT IF MY UPDATING THREAD CANNOT BLOCK? <4_whatisRCU>`
+
+:ref:`5.	WHAT ARE SOME SIMPLE IMPLEMENTATIONS OF RCU? <5_whatisRCU>`
+
+:ref:`6.	ANALOGY WITH READER-WRITER LOCKING <6_whatisRCU>`
+
+:ref:`7.	FULL LIST OF RCU APIs <7_whatisRCU>`
+
+:ref:`8.	ANSWERS TO QUICK QUIZZES <8_whatisRCU>`
+
+People who prefer starting with a conceptual overview should focus on
+Section 1, though most readers will profit by reading this section at
+some point.  People who prefer to start with an API that they can then
+experiment with should focus on Section 2.  People who prefer to start
+with example uses should focus on Sections 3 and 4.  People who need to
+understand the RCU implementation should focus on Section 5, then dive
+into the kernel source code.  People who reason best by analogy should
+focus on Section 6.  Section 7 serves as an index to the docbook API
+documentation, and Section 8 is the traditional answer key.
+
+So, start with the section that makes the most sense to you and your
+preferred method of learning.  If you need to know everything about
+everything, feel free to read the whole thing -- but if you are really
+that type of person, you have perused the source code and will therefore
+never need this document anyway.  ;-)
+
+.. _1_whatisRCU:
+
+1.  RCU OVERVIEW
+----------------
+
+The basic idea behind RCU is to split updates into "removal" and
+"reclamation" phases.  The removal phase removes references to data items
+within a data structure (possibly by replacing them with references to
+new versions of these data items), and can run concurrently with readers.
+The reason that it is safe to run the removal phase concurrently with
+readers is the semantics of modern CPUs guarantee that readers will see
+either the old or the new version of the data structure rather than a
+partially updated reference.  The reclamation phase does the work of reclaiming
+(e.g., freeing) the data items removed from the data structure during the
+removal phase.  Because reclaiming data items can disrupt any readers
+concurrently referencing those data items, the reclamation phase must
+not start until readers no longer hold references to those data items.
+
+Splitting the update into removal and reclamation phases permits the
+updater to perform the removal phase immediately, and to defer the
+reclamation phase until all readers active during the removal phase have
+completed, either by blocking until they finish or by registering a
+callback that is invoked after they finish.  Only readers that are active
+during the removal phase need be considered, because any reader starting
+after the removal phase will be unable to gain a reference to the removed
+data items, and therefore cannot be disrupted by the reclamation phase.
+
+So the typical RCU update sequence goes something like the following:
+
+a.	Remove pointers to a data structure, so that subsequent
+	readers cannot gain a reference to it.
+
+b.	Wait for all previous readers to complete their RCU read-side
+	critical sections.
+
+c.	At this point, there cannot be any readers who hold references
+	to the data structure, so it now may safely be reclaimed
+	(e.g., kfree()d).
+
+Step (b) above is the key idea underlying RCU's deferred destruction.
+The ability to wait until all readers are done allows RCU readers to
+use much lighter-weight synchronization, in some cases, absolutely no
+synchronization at all.  In contrast, in more conventional lock-based
+schemes, readers must use heavy-weight synchronization in order to
+prevent an updater from deleting the data structure out from under them.
+This is because lock-based updaters typically update data items in place,
+and must therefore exclude readers.  In contrast, RCU-based updaters
+typically take advantage of the fact that writes to single aligned
+pointers are atomic on modern CPUs, allowing atomic insertion, removal,
+and replacement of data items in a linked structure without disrupting
+readers.  Concurrent RCU readers can then continue accessing the old
+versions, and can dispense with the atomic operations, memory barriers,
+and communications cache misses that are so expensive on present-day
+SMP computer systems, even in absence of lock contention.
+
+In the three-step procedure shown above, the updater is performing both
+the removal and the reclamation step, but it is often helpful for an
+entirely different thread to do the reclamation, as is in fact the case
+in the Linux kernel's directory-entry cache (dcache).  Even if the same
+thread performs both the update step (step (a) above) and the reclamation
+step (step (c) above), it is often helpful to think of them separately.
+For example, RCU readers and updaters need not communicate at all,
+but RCU provides implicit low-overhead communication between readers
+and reclaimers, namely, in step (b) above.
+
+So how the heck can a reclaimer tell when a reader is done, given
+that readers are not doing any sort of synchronization operations???
+Read on to learn about how RCU's API makes this easy.
+
+.. _2_whatisRCU:
+
+2.  WHAT IS RCU'S CORE API?
+---------------------------
+
+The core RCU API is quite small:
+
+a.	rcu_read_lock()
+b.	rcu_read_unlock()
+c.	synchronize_rcu() / call_rcu()
+d.	rcu_assign_pointer()
+e.	rcu_dereference()
+
+There are many other members of the RCU API, but the rest can be
+expressed in terms of these five, though most implementations instead
+express synchronize_rcu() in terms of the call_rcu() callback API.
+
+The five core RCU APIs are described below, the other 18 will be enumerated
+later.  See the kernel docbook documentation for more info, or look directly
+at the function header comments.
+
+rcu_read_lock()
+^^^^^^^^^^^^^^^
+	void rcu_read_lock(void);
+
+	Used by a reader to inform the reclaimer that the reader is
+	entering an RCU read-side critical section.  It is illegal
+	to block while in an RCU read-side critical section, though
+	kernels built with CONFIG_PREEMPT_RCU can preempt RCU
+	read-side critical sections.  Any RCU-protected data structure
+	accessed during an RCU read-side critical section is guaranteed to
+	remain unreclaimed for the full duration of that critical section.
+	Reference counts may be used in conjunction with RCU to maintain
+	longer-term references to data structures.
+
+rcu_read_unlock()
+^^^^^^^^^^^^^^^^^
+	void rcu_read_unlock(void);
+
+	Used by a reader to inform the reclaimer that the reader is
+	exiting an RCU read-side critical section.  Note that RCU
+	read-side critical sections may be nested and/or overlapping.
+
+synchronize_rcu()
+^^^^^^^^^^^^^^^^^
+	void synchronize_rcu(void);
+
+	Marks the end of updater code and the beginning of reclaimer
+	code.  It does this by blocking until all pre-existing RCU
+	read-side critical sections on all CPUs have completed.
+	Note that synchronize_rcu() will **not** necessarily wait for
+	any subsequent RCU read-side critical sections to complete.
+	For example, consider the following sequence of events::
+
+	         CPU 0                  CPU 1                 CPU 2
+	     ----------------- ------------------------- ---------------
+	 1.  rcu_read_lock()
+	 2.                    enters synchronize_rcu()
+	 3.                                               rcu_read_lock()
+	 4.  rcu_read_unlock()
+	 5.                     exits synchronize_rcu()
+	 6.                                              rcu_read_unlock()
+
+	To reiterate, synchronize_rcu() waits only for ongoing RCU
+	read-side critical sections to complete, not necessarily for
+	any that begin after synchronize_rcu() is invoked.
+
+	Of course, synchronize_rcu() does not necessarily return
+	**immediately** after the last pre-existing RCU read-side critical
+	section completes.  For one thing, there might well be scheduling
+	delays.  For another thing, many RCU implementations process
+	requests in batches in order to improve efficiencies, which can
+	further delay synchronize_rcu().
+
+	Since synchronize_rcu() is the API that must figure out when
+	readers are done, its implementation is key to RCU.  For RCU
+	to be useful in all but the most read-intensive situations,
+	synchronize_rcu()'s overhead must also be quite small.
+
+	The call_rcu() API is a callback form of synchronize_rcu(),
+	and is described in more detail in a later section.  Instead of
+	blocking, it registers a function and argument which are invoked
+	after all ongoing RCU read-side critical sections have completed.
+	This callback variant is particularly useful in situations where
+	it is illegal to block or where update-side performance is
+	critically important.
+
+	However, the call_rcu() API should not be used lightly, as use
+	of the synchronize_rcu() API generally results in simpler code.
+	In addition, the synchronize_rcu() API has the nice property
+	of automatically limiting update rate should grace periods
+	be delayed.  This property results in system resilience in face
+	of denial-of-service attacks.  Code using call_rcu() should limit
+	update rate in order to gain this same sort of resilience.  See
+	checklist.txt for some approaches to limiting the update rate.
+
+rcu_assign_pointer()
+^^^^^^^^^^^^^^^^^^^^
+	void rcu_assign_pointer(p, typeof(p) v);
+
+	Yes, rcu_assign_pointer() **is** implemented as a macro, though it
+	would be cool to be able to declare a function in this manner.
+	(Compiler experts will no doubt disagree.)
+
+	The updater uses this function to assign a new value to an
+	RCU-protected pointer, in order to safely communicate the change
+	in value from the updater to the reader.  This macro does not
+	evaluate to an rvalue, but it does execute any memory-barrier
+	instructions required for a given CPU architecture.
+
+	Perhaps just as important, it serves to document (1) which
+	pointers are protected by RCU and (2) the point at which a
+	given structure becomes accessible to other CPUs.  That said,
+	rcu_assign_pointer() is most frequently used indirectly, via
+	the _rcu list-manipulation primitives such as list_add_rcu().
+
+rcu_dereference()
+^^^^^^^^^^^^^^^^^
+	typeof(p) rcu_dereference(p);
+
+	Like rcu_assign_pointer(), rcu_dereference() must be implemented
+	as a macro.
+
+	The reader uses rcu_dereference() to fetch an RCU-protected
+	pointer, which returns a value that may then be safely
+	dereferenced.  Note that rcu_dereference() does not actually
+	dereference the pointer, instead, it protects the pointer for
+	later dereferencing.  It also executes any needed memory-barrier
+	instructions for a given CPU architecture.  Currently, only Alpha
+	needs memory barriers within rcu_dereference() -- on other CPUs,
+	it compiles to nothing, not even a compiler directive.
+
+	Common coding practice uses rcu_dereference() to copy an
+	RCU-protected pointer to a local variable, then dereferences
+	this local variable, for example as follows::
+
+		p = rcu_dereference(head.next);
+		return p->data;
+
+	However, in this case, one could just as easily combine these
+	into one statement::
+
+		return rcu_dereference(head.next)->data;
+
+	If you are going to be fetching multiple fields from the
+	RCU-protected structure, using the local variable is of
+	course preferred.  Repeated rcu_dereference() calls look
+	ugly, do not guarantee that the same pointer will be returned
+	if an update happened while in the critical section, and incur
+	unnecessary overhead on Alpha CPUs.
+
+	Note that the value returned by rcu_dereference() is valid
+	only within the enclosing RCU read-side critical section [1]_.
+	For example, the following is **not** legal::
+
+		rcu_read_lock();
+		p = rcu_dereference(head.next);
+		rcu_read_unlock();
+		x = p->address;	/* BUG!!! */
+		rcu_read_lock();
+		y = p->data;	/* BUG!!! */
+		rcu_read_unlock();
+
+	Holding a reference from one RCU read-side critical section
+	to another is just as illegal as holding a reference from
+	one lock-based critical section to another!  Similarly,
+	using a reference outside of the critical section in which
+	it was acquired is just as illegal as doing so with normal
+	locking.
+
+	As with rcu_assign_pointer(), an important function of
+	rcu_dereference() is to document which pointers are protected by
+	RCU, in particular, flagging a pointer that is subject to changing
+	at any time, including immediately after the rcu_dereference().
+	And, again like rcu_assign_pointer(), rcu_dereference() is
+	typically used indirectly, via the _rcu list-manipulation
+	primitives, such as list_for_each_entry_rcu() [2]_.
+
+.. 	[1] The variant rcu_dereference_protected() can be used outside
+	of an RCU read-side critical section as long as the usage is
+	protected by locks acquired by the update-side code.  This variant
+	avoids the lockdep warning that would happen when using (for
+	example) rcu_dereference() without rcu_read_lock() protection.
+	Using rcu_dereference_protected() also has the advantage
+	of permitting compiler optimizations that rcu_dereference()
+	must prohibit.	The rcu_dereference_protected() variant takes
+	a lockdep expression to indicate which locks must be acquired
+	by the caller. If the indicated protection is not provided,
+	a lockdep splat is emitted.  See Documentation/RCU/Design/Requirements/Requirements.rst
+	and the API's code comments for more details and example usage.
+
+.. 	[2] If the list_for_each_entry_rcu() instance might be used by
+	update-side code as well as by RCU readers, then an additional
+	lockdep expression can be added to its list of arguments.
+	For example, given an additional "lock_is_held(&mylock)" argument,
+	the RCU lockdep code would complain only if this instance was
+	invoked outside of an RCU read-side critical section and without
+	the protection of mylock.
+
+The following diagram shows how each API communicates among the
+reader, updater, and reclaimer.
+::
+
+
+	    rcu_assign_pointer()
+	                            +--------+
+	    +---------------------->| reader |---------+
+	    |                       +--------+         |
+	    |                           |              |
+	    |                           |              | Protect:
+	    |                           |              | rcu_read_lock()
+	    |                           |              | rcu_read_unlock()
+	    |        rcu_dereference()  |              |
+	    +---------+                 |              |
+	    | updater |<----------------+              |
+	    +---------+                                V
+	    |                                    +-----------+
+	    +----------------------------------->| reclaimer |
+	                                         +-----------+
+	      Defer:
+	      synchronize_rcu() & call_rcu()
+
+
+The RCU infrastructure observes the time sequence of rcu_read_lock(),
+rcu_read_unlock(), synchronize_rcu(), and call_rcu() invocations in
+order to determine when (1) synchronize_rcu() invocations may return
+to their callers and (2) call_rcu() callbacks may be invoked.  Efficient
+implementations of the RCU infrastructure make heavy use of batching in
+order to amortize their overhead over many uses of the corresponding APIs.
+
+There are at least three flavors of RCU usage in the Linux kernel. The diagram
+above shows the most common one. On the updater side, the rcu_assign_pointer(),
+sychronize_rcu() and call_rcu() primitives used are the same for all three
+flavors. However for protection (on the reader side), the primitives used vary
+depending on the flavor:
+
+a.	rcu_read_lock() / rcu_read_unlock()
+	rcu_dereference()
+
+b.	rcu_read_lock_bh() / rcu_read_unlock_bh()
+	local_bh_disable() / local_bh_enable()
+	rcu_dereference_bh()
+
+c.	rcu_read_lock_sched() / rcu_read_unlock_sched()
+	preempt_disable() / preempt_enable()
+	local_irq_save() / local_irq_restore()
+	hardirq enter / hardirq exit
+	NMI enter / NMI exit
+	rcu_dereference_sched()
+
+These three flavors are used as follows:
+
+a.	RCU applied to normal data structures.
+
+b.	RCU applied to networking data structures that may be subjected
+	to remote denial-of-service attacks.
+
+c.	RCU applied to scheduler and interrupt/NMI-handler tasks.
+
+Again, most uses will be of (a).  The (b) and (c) cases are important
+for specialized uses, but are relatively uncommon.
+
+.. _3_whatisRCU:
+
+3.  WHAT ARE SOME EXAMPLE USES OF CORE RCU API?
+-----------------------------------------------
+
+This section shows a simple use of the core RCU API to protect a
+global pointer to a dynamically allocated structure.  More-typical
+uses of RCU may be found in :ref:`listRCU.rst <list_rcu_doc>`,
+:ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst <NMI_rcu_doc>`.
+::
+
+	struct foo {
+		int a;
+		char b;
+		long c;
+	};
+	DEFINE_SPINLOCK(foo_mutex);
+
+	struct foo __rcu *gbl_foo;
+
+	/*
+	 * Create a new struct foo that is the same as the one currently
+	 * pointed to by gbl_foo, except that field "a" is replaced
+	 * with "new_a".  Points gbl_foo to the new structure, and
+	 * frees up the old structure after a grace period.
+	 *
+	 * Uses rcu_assign_pointer() to ensure that concurrent readers
+	 * see the initialized version of the new structure.
+	 *
+	 * Uses synchronize_rcu() to ensure that any readers that might
+	 * have references to the old structure complete before freeing
+	 * the old structure.
+	 */
+	void foo_update_a(int new_a)
+	{
+		struct foo *new_fp;
+		struct foo *old_fp;
+
+		new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
+		spin_lock(&foo_mutex);
+		old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
+		*new_fp = *old_fp;
+		new_fp->a = new_a;
+		rcu_assign_pointer(gbl_foo, new_fp);
+		spin_unlock(&foo_mutex);
+		synchronize_rcu();
+		kfree(old_fp);
+	}
+
+	/*
+	 * Return the value of field "a" of the current gbl_foo
+	 * structure.  Use rcu_read_lock() and rcu_read_unlock()
+	 * to ensure that the structure does not get deleted out
+	 * from under us, and use rcu_dereference() to ensure that
+	 * we see the initialized version of the structure (important
+	 * for DEC Alpha and for people reading the code).
+	 */
+	int foo_get_a(void)
+	{
+		int retval;
+
+		rcu_read_lock();
+		retval = rcu_dereference(gbl_foo)->a;
+		rcu_read_unlock();
+		return retval;
+	}
+
+So, to sum up:
+
+-	Use rcu_read_lock() and rcu_read_unlock() to guard RCU
+	read-side critical sections.
+
+-	Within an RCU read-side critical section, use rcu_dereference()
+	to dereference RCU-protected pointers.
+
+-	Use some solid scheme (such as locks or semaphores) to
+	keep concurrent updates from interfering with each other.
+
+-	Use rcu_assign_pointer() to update an RCU-protected pointer.
+	This primitive protects concurrent readers from the updater,
+	**not** concurrent updates from each other!  You therefore still
+	need to use locking (or something similar) to keep concurrent
+	rcu_assign_pointer() primitives from interfering with each other.
+
+-	Use synchronize_rcu() **after** removing a data element from an
+	RCU-protected data structure, but **before** reclaiming/freeing
+	the data element, in order to wait for the completion of all
+	RCU read-side critical sections that might be referencing that
+	data item.
+
+See checklist.txt for additional rules to follow when using RCU.
+And again, more-typical uses of RCU may be found in :ref:`listRCU.rst
+<list_rcu_doc>`, :ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst
+<NMI_rcu_doc>`.
+
+.. _4_whatisRCU:
+
+4.  WHAT IF MY UPDATING THREAD CANNOT BLOCK?
+--------------------------------------------
+
+In the example above, foo_update_a() blocks until a grace period elapses.
+This is quite simple, but in some cases one cannot afford to wait so
+long -- there might be other high-priority work to be done.
+
+In such cases, one uses call_rcu() rather than synchronize_rcu().
+The call_rcu() API is as follows::
+
+	void call_rcu(struct rcu_head * head,
+		      void (*func)(struct rcu_head *head));
+
+This function invokes func(head) after a grace period has elapsed.
+This invocation might happen from either softirq or process context,
+so the function is not permitted to block.  The foo struct needs to
+have an rcu_head structure added, perhaps as follows::
+
+	struct foo {
+		int a;
+		char b;
+		long c;
+		struct rcu_head rcu;
+	};
+
+The foo_update_a() function might then be written as follows::
+
+	/*
+	 * Create a new struct foo that is the same as the one currently
+	 * pointed to by gbl_foo, except that field "a" is replaced
+	 * with "new_a".  Points gbl_foo to the new structure, and
+	 * frees up the old structure after a grace period.
+	 *
+	 * Uses rcu_assign_pointer() to ensure that concurrent readers
+	 * see the initialized version of the new structure.
+	 *
+	 * Uses call_rcu() to ensure that any readers that might have
+	 * references to the old structure complete before freeing the
+	 * old structure.
+	 */
+	void foo_update_a(int new_a)
+	{
+		struct foo *new_fp;
+		struct foo *old_fp;
+
+		new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
+		spin_lock(&foo_mutex);
+		old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
+		*new_fp = *old_fp;
+		new_fp->a = new_a;
+		rcu_assign_pointer(gbl_foo, new_fp);
+		spin_unlock(&foo_mutex);
+		call_rcu(&old_fp->rcu, foo_reclaim);
+	}
+
+The foo_reclaim() function might appear as follows::
+
+	void foo_reclaim(struct rcu_head *rp)
+	{
+		struct foo *fp = container_of(rp, struct foo, rcu);
+
+		foo_cleanup(fp->a);
+
+		kfree(fp);
+	}
+
+The container_of() primitive is a macro that, given a pointer into a
+struct, the type of the struct, and the pointed-to field within the
+struct, returns a pointer to the beginning of the struct.
+
+The use of call_rcu() permits the caller of foo_update_a() to
+immediately regain control, without needing to worry further about the
+old version of the newly updated element.  It also clearly shows the
+RCU distinction between updater, namely foo_update_a(), and reclaimer,
+namely foo_reclaim().
+
+The summary of advice is the same as for the previous section, except
+that we are now using call_rcu() rather than synchronize_rcu():
+
+-	Use call_rcu() **after** removing a data element from an
+	RCU-protected data structure in order to register a callback
+	function that will be invoked after the completion of all RCU
+	read-side critical sections that might be referencing that
+	data item.
+
+If the callback for call_rcu() is not doing anything more than calling
+kfree() on the structure, you can use kfree_rcu() instead of call_rcu()
+to avoid having to write your own callback::
+
+	kfree_rcu(old_fp, rcu);
+
+Again, see checklist.txt for additional rules governing the use of RCU.
+
+.. _5_whatisRCU:
+
+5.  WHAT ARE SOME SIMPLE IMPLEMENTATIONS OF RCU?
+------------------------------------------------
+
+One of the nice things about RCU is that it has extremely simple "toy"
+implementations that are a good first step towards understanding the
+production-quality implementations in the Linux kernel.  This section
+presents two such "toy" implementations of RCU, one that is implemented
+in terms of familiar locking primitives, and another that more closely
+resembles "classic" RCU.  Both are way too simple for real-world use,
+lacking both functionality and performance.  However, they are useful
+in getting a feel for how RCU works.  See kernel/rcu/update.c for a
+production-quality implementation, and see:
+
+	http://www.rdrop.com/users/paulmck/RCU
+
+for papers describing the Linux kernel RCU implementation.  The OLS'01
+and OLS'02 papers are a good introduction, and the dissertation provides
+more details on the current implementation as of early 2004.
+
+
+5A.  "TOY" IMPLEMENTATION #1: LOCKING
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+This section presents a "toy" RCU implementation that is based on
+familiar locking primitives.  Its overhead makes it a non-starter for
+real-life use, as does its lack of scalability.  It is also unsuitable
+for realtime use, since it allows scheduling latency to "bleed" from
+one read-side critical section to another.  It also assumes recursive
+reader-writer locks:  If you try this with non-recursive locks, and
+you allow nested rcu_read_lock() calls, you can deadlock.
+
+However, it is probably the easiest implementation to relate to, so is
+a good starting point.
+
+It is extremely simple::
+
+	static DEFINE_RWLOCK(rcu_gp_mutex);
+
+	void rcu_read_lock(void)
+	{
+		read_lock(&rcu_gp_mutex);
+	}
+
+	void rcu_read_unlock(void)
+	{
+		read_unlock(&rcu_gp_mutex);
+	}
+
+	void synchronize_rcu(void)
+	{
+		write_lock(&rcu_gp_mutex);
+		smp_mb__after_spinlock();
+		write_unlock(&rcu_gp_mutex);
+	}
+
+[You can ignore rcu_assign_pointer() and rcu_dereference() without missing
+much.  But here are simplified versions anyway.  And whatever you do,
+don't forget about them when submitting patches making use of RCU!]::
+
+	#define rcu_assign_pointer(p, v) \
+	({ \
+		smp_store_release(&(p), (v)); \
+	})
+
+	#define rcu_dereference(p) \
+	({ \
+		typeof(p) _________p1 = READ_ONCE(p); \
+		(_________p1); \
+	})
+
+
+The rcu_read_lock() and rcu_read_unlock() primitive read-acquire
+and release a global reader-writer lock.  The synchronize_rcu()
+primitive write-acquires this same lock, then releases it.  This means
+that once synchronize_rcu() exits, all RCU read-side critical sections
+that were in progress before synchronize_rcu() was called are guaranteed
+to have completed -- there is no way that synchronize_rcu() would have
+been able to write-acquire the lock otherwise.  The smp_mb__after_spinlock()
+promotes synchronize_rcu() to a full memory barrier in compliance with
+the "Memory-Barrier Guarantees" listed in:
+
+	Documentation/RCU/Design/Requirements/Requirements.rst
+
+It is possible to nest rcu_read_lock(), since reader-writer locks may
+be recursively acquired.  Note also that rcu_read_lock() is immune
+from deadlock (an important property of RCU).  The reason for this is
+that the only thing that can block rcu_read_lock() is a synchronize_rcu().
+But synchronize_rcu() does not acquire any locks while holding rcu_gp_mutex,
+so there can be no deadlock cycle.
+
+.. _quiz_1:
+
+Quick Quiz #1:
+		Why is this argument naive?  How could a deadlock
+		occur when using this algorithm in a real-world Linux
+		kernel?  How could this deadlock be avoided?
+
+:ref:`Answers to Quick Quiz <8_whatisRCU>`
+
+5B.  "TOY" EXAMPLE #2: CLASSIC RCU
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+This section presents a "toy" RCU implementation that is based on
+"classic RCU".  It is also short on performance (but only for updates) and
+on features such as hotplug CPU and the ability to run in CONFIG_PREEMPT
+kernels.  The definitions of rcu_dereference() and rcu_assign_pointer()
+are the same as those shown in the preceding section, so they are omitted.
+::
+
+	void rcu_read_lock(void) { }
+
+	void rcu_read_unlock(void) { }
+
+	void synchronize_rcu(void)
+	{
+		int cpu;
+
+		for_each_possible_cpu(cpu)
+			run_on(cpu);
+	}
+
+Note that rcu_read_lock() and rcu_read_unlock() do absolutely nothing.
+This is the great strength of classic RCU in a non-preemptive kernel:
+read-side overhead is precisely zero, at least on non-Alpha CPUs.
+And there is absolutely no way that rcu_read_lock() can possibly
+participate in a deadlock cycle!
+
+The implementation of synchronize_rcu() simply schedules itself on each
+CPU in turn.  The run_on() primitive can be implemented straightforwardly
+in terms of the sched_setaffinity() primitive.  Of course, a somewhat less
+"toy" implementation would restore the affinity upon completion rather
+than just leaving all tasks running on the last CPU, but when I said
+"toy", I meant **toy**!
+
+So how the heck is this supposed to work???
+
+Remember that it is illegal to block while in an RCU read-side critical
+section.  Therefore, if a given CPU executes a context switch, we know
+that it must have completed all preceding RCU read-side critical sections.
+Once **all** CPUs have executed a context switch, then **all** preceding
+RCU read-side critical sections will have completed.
+
+So, suppose that we remove a data item from its structure and then invoke
+synchronize_rcu().  Once synchronize_rcu() returns, we are guaranteed
+that there are no RCU read-side critical sections holding a reference
+to that data item, so we can safely reclaim it.
+
+.. _quiz_2:
+
+Quick Quiz #2:
+		Give an example where Classic RCU's read-side
+		overhead is **negative**.
+
+:ref:`Answers to Quick Quiz <8_whatisRCU>`
+
+.. _quiz_3:
+
+Quick Quiz #3:
+		If it is illegal to block in an RCU read-side
+		critical section, what the heck do you do in
+		PREEMPT_RT, where normal spinlocks can block???
+
+:ref:`Answers to Quick Quiz <8_whatisRCU>`
+
+.. _6_whatisRCU:
+
+6.  ANALOGY WITH READER-WRITER LOCKING
+--------------------------------------
+
+Although RCU can be used in many different ways, a very common use of
+RCU is analogous to reader-writer locking.  The following unified
+diff shows how closely related RCU and reader-writer locking can be.
+::
+
+	@@ -5,5 +5,5 @@ struct el {
+	 	int data;
+	 	/* Other data fields */
+	 };
+	-rwlock_t listmutex;
+	+spinlock_t listmutex;
+	 struct el head;
+
+	@@ -13,15 +14,15 @@
+		struct list_head *lp;
+		struct el *p;
+
+	-	read_lock(&listmutex);
+	-	list_for_each_entry(p, head, lp) {
+	+	rcu_read_lock();
+	+	list_for_each_entry_rcu(p, head, lp) {
+			if (p->key == key) {
+				*result = p->data;
+	-			read_unlock(&listmutex);
+	+			rcu_read_unlock();
+				return 1;
+			}
+		}
+	-	read_unlock(&listmutex);
+	+	rcu_read_unlock();
+		return 0;
+	 }
+
+	@@ -29,15 +30,16 @@
+	 {
+		struct el *p;
+
+	-	write_lock(&listmutex);
+	+	spin_lock(&listmutex);
+		list_for_each_entry(p, head, lp) {
+			if (p->key == key) {
+	-			list_del(&p->list);
+	-			write_unlock(&listmutex);
+	+			list_del_rcu(&p->list);
+	+			spin_unlock(&listmutex);
+	+			synchronize_rcu();
+				kfree(p);
+				return 1;
+			}
+		}
+	-	write_unlock(&listmutex);
+	+	spin_unlock(&listmutex);
+		return 0;
+	 }
+
+Or, for those who prefer a side-by-side listing::
+
+ 1 struct el {                          1 struct el {
+ 2   struct list_head list;             2   struct list_head list;
+ 3   long key;                          3   long key;
+ 4   spinlock_t mutex;                  4   spinlock_t mutex;
+ 5   int data;                          5   int data;
+ 6   /* Other data fields */            6   /* Other data fields */
+ 7 };                                   7 };
+ 8 rwlock_t listmutex;                  8 spinlock_t listmutex;
+ 9 struct el head;                      9 struct el head;
+
+::
+
+  1 int search(long key, int *result)    1 int search(long key, int *result)
+  2 {                                    2 {
+  3   struct list_head *lp;              3   struct list_head *lp;
+  4   struct el *p;                      4   struct el *p;
+  5                                      5
+  6   read_lock(&listmutex);             6   rcu_read_lock();
+  7   list_for_each_entry(p, head, lp) { 7   list_for_each_entry_rcu(p, head, lp) {
+  8     if (p->key == key) {             8     if (p->key == key) {
+  9       *result = p->data;             9       *result = p->data;
+ 10       read_unlock(&listmutex);      10       rcu_read_unlock();
+ 11       return 1;                     11       return 1;
+ 12     }                               12     }
+ 13   }                                 13   }
+ 14   read_unlock(&listmutex);          14   rcu_read_unlock();
+ 15   return 0;                         15   return 0;
+ 16 }                                   16 }
+
+::
+
+  1 int delete(long key)                 1 int delete(long key)
+  2 {                                    2 {
+  3   struct el *p;                      3   struct el *p;
+  4                                      4
+  5   write_lock(&listmutex);            5   spin_lock(&listmutex);
+  6   list_for_each_entry(p, head, lp) { 6   list_for_each_entry(p, head, lp) {
+  7     if (p->key == key) {             7     if (p->key == key) {
+  8       list_del(&p->list);            8       list_del_rcu(&p->list);
+  9       write_unlock(&listmutex);      9       spin_unlock(&listmutex);
+                                        10       synchronize_rcu();
+ 10       kfree(p);                     11       kfree(p);
+ 11       return 1;                     12       return 1;
+ 12     }                               13     }
+ 13   }                                 14   }
+ 14   write_unlock(&listmutex);         15   spin_unlock(&listmutex);
+ 15   return 0;                         16   return 0;
+ 16 }                                   17 }
+
+Either way, the differences are quite small.  Read-side locking moves
+to rcu_read_lock() and rcu_read_unlock, update-side locking moves from
+a reader-writer lock to a simple spinlock, and a synchronize_rcu()
+precedes the kfree().
+
+However, there is one potential catch: the read-side and update-side
+critical sections can now run concurrently.  In many cases, this will
+not be a problem, but it is necessary to check carefully regardless.
+For example, if multiple independent list updates must be seen as
+a single atomic update, converting to RCU will require special care.
+
+Also, the presence of synchronize_rcu() means that the RCU version of
+delete() can now block.  If this is a problem, there is a callback-based
+mechanism that never blocks, namely call_rcu() or kfree_rcu(), that can
+be used in place of synchronize_rcu().
+
+.. _7_whatisRCU:
+
+7.  FULL LIST OF RCU APIs
+-------------------------
+
+The RCU APIs are documented in docbook-format header comments in the
+Linux-kernel source code, but it helps to have a full list of the
+APIs, since there does not appear to be a way to categorize them
+in docbook.  Here is the list, by category.
+
+RCU list traversal::
+
+	list_entry_rcu
+	list_entry_lockless
+	list_first_entry_rcu
+	list_next_rcu
+	list_for_each_entry_rcu
+	list_for_each_entry_continue_rcu
+	list_for_each_entry_from_rcu
+	list_first_or_null_rcu
+	list_next_or_null_rcu
+	hlist_first_rcu
+	hlist_next_rcu
+	hlist_pprev_rcu
+	hlist_for_each_entry_rcu
+	hlist_for_each_entry_rcu_bh
+	hlist_for_each_entry_from_rcu
+	hlist_for_each_entry_continue_rcu
+	hlist_for_each_entry_continue_rcu_bh
+	hlist_nulls_first_rcu
+	hlist_nulls_for_each_entry_rcu
+	hlist_bl_first_rcu
+	hlist_bl_for_each_entry_rcu
+
+RCU pointer/list update::
+
+	rcu_assign_pointer
+	list_add_rcu
+	list_add_tail_rcu
+	list_del_rcu
+	list_replace_rcu
+	hlist_add_behind_rcu
+	hlist_add_before_rcu
+	hlist_add_head_rcu
+	hlist_add_tail_rcu
+	hlist_del_rcu
+	hlist_del_init_rcu
+	hlist_replace_rcu
+	list_splice_init_rcu
+	list_splice_tail_init_rcu
+	hlist_nulls_del_init_rcu
+	hlist_nulls_del_rcu
+	hlist_nulls_add_head_rcu
+	hlist_bl_add_head_rcu
+	hlist_bl_del_init_rcu
+	hlist_bl_del_rcu
+	hlist_bl_set_first_rcu
+
+RCU::
+
+	Critical sections	Grace period		Barrier
+
+	rcu_read_lock		synchronize_net		rcu_barrier
+	rcu_read_unlock		synchronize_rcu
+	rcu_dereference		synchronize_rcu_expedited
+	rcu_read_lock_held	call_rcu
+	rcu_dereference_check	kfree_rcu
+	rcu_dereference_protected
+
+bh::
+
+	Critical sections	Grace period		Barrier
+
+	rcu_read_lock_bh	call_rcu		rcu_barrier
+	rcu_read_unlock_bh	synchronize_rcu
+	[local_bh_disable]	synchronize_rcu_expedited
+	[and friends]
+	rcu_dereference_bh
+	rcu_dereference_bh_check
+	rcu_dereference_bh_protected
+	rcu_read_lock_bh_held
+
+sched::
+
+	Critical sections	Grace period		Barrier
+
+	rcu_read_lock_sched	call_rcu		rcu_barrier
+	rcu_read_unlock_sched	synchronize_rcu
+	[preempt_disable]	synchronize_rcu_expedited
+	[and friends]
+	rcu_read_lock_sched_notrace
+	rcu_read_unlock_sched_notrace
+	rcu_dereference_sched
+	rcu_dereference_sched_check
+	rcu_dereference_sched_protected
+	rcu_read_lock_sched_held
+
+
+SRCU::
+
+	Critical sections	Grace period		Barrier
+
+	srcu_read_lock		call_srcu		srcu_barrier
+	srcu_read_unlock	synchronize_srcu
+	srcu_dereference	synchronize_srcu_expedited
+	srcu_dereference_check
+	srcu_read_lock_held
+
+SRCU: Initialization/cleanup::
+
+	DEFINE_SRCU
+	DEFINE_STATIC_SRCU
+	init_srcu_struct
+	cleanup_srcu_struct
+
+All: lockdep-checked RCU-protected pointer access::
+
+	rcu_access_pointer
+	rcu_dereference_raw
+	RCU_LOCKDEP_WARN
+	rcu_sleep_check
+	RCU_NONIDLE
+
+See the comment headers in the source code (or the docbook generated
+from them) for more information.
+
+However, given that there are no fewer than four families of RCU APIs
+in the Linux kernel, how do you choose which one to use?  The following
+list can be helpful:
+
+a.	Will readers need to block?  If so, you need SRCU.
+
+b.	What about the -rt patchset?  If readers would need to block
+	in an non-rt kernel, you need SRCU.  If readers would block
+	in a -rt kernel, but not in a non-rt kernel, SRCU is not
+	necessary.  (The -rt patchset turns spinlocks into sleeplocks,
+	hence this distinction.)
+
+c.	Do you need to treat NMI handlers, hardirq handlers,
+	and code segments with preemption disabled (whether
+	via preempt_disable(), local_irq_save(), local_bh_disable(),
+	or some other mechanism) as if they were explicit RCU readers?
+	If so, RCU-sched is the only choice that will work for you.
+
+d.	Do you need RCU grace periods to complete even in the face
+	of softirq monopolization of one or more of the CPUs?  For
+	example, is your code subject to network-based denial-of-service
+	attacks?  If so, you should disable softirq across your readers,
+	for example, by using rcu_read_lock_bh().
+
+e.	Is your workload too update-intensive for normal use of
+	RCU, but inappropriate for other synchronization mechanisms?
+	If so, consider SLAB_TYPESAFE_BY_RCU (which was originally
+	named SLAB_DESTROY_BY_RCU).  But please be careful!
+
+f.	Do you need read-side critical sections that are respected
+	even though they are in the middle of the idle loop, during
+	user-mode execution, or on an offlined CPU?  If so, SRCU is the
+	only choice that will work for you.
+
+g.	Otherwise, use RCU.
+
+Of course, this all assumes that you have determined that RCU is in fact
+the right tool for your job.
+
+.. _8_whatisRCU:
+
+8.  ANSWERS TO QUICK QUIZZES
+----------------------------
+
+Quick Quiz #1:
+		Why is this argument naive?  How could a deadlock
+		occur when using this algorithm in a real-world Linux
+		kernel?  [Referring to the lock-based "toy" RCU
+		algorithm.]
+
+Answer:
+		Consider the following sequence of events:
+
+		1.	CPU 0 acquires some unrelated lock, call it
+			"problematic_lock", disabling irq via
+			spin_lock_irqsave().
+
+		2.	CPU 1 enters synchronize_rcu(), write-acquiring
+			rcu_gp_mutex.
+
+		3.	CPU 0 enters rcu_read_lock(), but must wait
+			because CPU 1 holds rcu_gp_mutex.
+
+		4.	CPU 1 is interrupted, and the irq handler
+			attempts to acquire problematic_lock.
+
+		The system is now deadlocked.
+
+		One way to avoid this deadlock is to use an approach like
+		that of CONFIG_PREEMPT_RT, where all normal spinlocks
+		become blocking locks, and all irq handlers execute in
+		the context of special tasks.  In this case, in step 4
+		above, the irq handler would block, allowing CPU 1 to
+		release rcu_gp_mutex, avoiding the deadlock.
+
+		Even in the absence of deadlock, this RCU implementation
+		allows latency to "bleed" from readers to other
+		readers through synchronize_rcu().  To see this,
+		consider task A in an RCU read-side critical section
+		(thus read-holding rcu_gp_mutex), task B blocked
+		attempting to write-acquire rcu_gp_mutex, and
+		task C blocked in rcu_read_lock() attempting to
+		read_acquire rcu_gp_mutex.  Task A's RCU read-side
+		latency is holding up task C, albeit indirectly via
+		task B.
+
+		Realtime RCU implementations therefore use a counter-based
+		approach where tasks in RCU read-side critical sections
+		cannot be blocked by tasks executing synchronize_rcu().
+
+:ref:`Back to Quick Quiz #1 <quiz_1>`
+
+Quick Quiz #2:
+		Give an example where Classic RCU's read-side
+		overhead is **negative**.
+
+Answer:
+		Imagine a single-CPU system with a non-CONFIG_PREEMPT
+		kernel where a routing table is used by process-context
+		code, but can be updated by irq-context code (for example,
+		by an "ICMP REDIRECT" packet).	The usual way of handling
+		this would be to have the process-context code disable
+		interrupts while searching the routing table.  Use of
+		RCU allows such interrupt-disabling to be dispensed with.
+		Thus, without RCU, you pay the cost of disabling interrupts,
+		and with RCU you don't.
+
+		One can argue that the overhead of RCU in this
+		case is negative with respect to the single-CPU
+		interrupt-disabling approach.  Others might argue that
+		the overhead of RCU is merely zero, and that replacing
+		the positive overhead of the interrupt-disabling scheme
+		with the zero-overhead RCU scheme does not constitute
+		negative overhead.
+
+		In real life, of course, things are more complex.  But
+		even the theoretical possibility of negative overhead for
+		a synchronization primitive is a bit unexpected.  ;-)
+
+:ref:`Back to Quick Quiz #2 <quiz_2>`
+
+Quick Quiz #3:
+		If it is illegal to block in an RCU read-side
+		critical section, what the heck do you do in
+		PREEMPT_RT, where normal spinlocks can block???
+
+Answer:
+		Just as PREEMPT_RT permits preemption of spinlock
+		critical sections, it permits preemption of RCU
+		read-side critical sections.  It also permits
+		spinlocks blocking while in RCU read-side critical
+		sections.
+
+		Why the apparent inconsistency?  Because it is
+		possible to use priority boosting to keep the RCU
+		grace periods short if need be (for example, if running
+		short of memory).  In contrast, if blocking waiting
+		for (say) network reception, there is no way to know
+		what should be boosted.  Especially given that the
+		process we need to boost might well be a human being
+		who just went out for a pizza or something.  And although
+		a computer-operated cattle prod might arouse serious
+		interest, it might also provoke serious objections.
+		Besides, how does the computer know what pizza parlor
+		the human being went to???
+
+:ref:`Back to Quick Quiz #3 <quiz_3>`
+
+ACKNOWLEDGEMENTS
+
+My thanks to the people who helped make this human-readable, including
+Jon Walpole, Josh Triplett, Serge Hallyn, Suzanne Wood, and Alan Stern.
+
+
+For more information, see http://www.rdrop.com/users/paulmck/RCU.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
deleted file mode 100644
index 7e1a8721637a..000000000000
--- a/Documentation/RCU/whatisRCU.txt
+++ /dev/null
@@ -1,1071 +0,0 @@
-What is RCU?  --  "Read, Copy, Update"
-
-Please note that the "What is RCU?" LWN series is an excellent place
-to start learning about RCU:
-
-1.	What is RCU, Fundamentally?  http://lwn.net/Articles/262464/
-2.	What is RCU? Part 2: Usage   http://lwn.net/Articles/263130/
-3.	RCU part 3: the RCU API      http://lwn.net/Articles/264090/
-4.	The RCU API, 2010 Edition    http://lwn.net/Articles/418853/
-	2010 Big API Table           http://lwn.net/Articles/419086/
-5.	The RCU API, 2014 Edition    http://lwn.net/Articles/609904/
-	2014 Big API Table           http://lwn.net/Articles/609973/
-
-
-What is RCU?
-
-RCU is a synchronization mechanism that was added to the Linux kernel
-during the 2.5 development effort that is optimized for read-mostly
-situations.  Although RCU is actually quite simple once you understand it,
-getting there can sometimes be a challenge.  Part of the problem is that
-most of the past descriptions of RCU have been written with the mistaken
-assumption that there is "one true way" to describe RCU.  Instead,
-the experience has been that different people must take different paths
-to arrive at an understanding of RCU.  This document provides several
-different paths, as follows:
-
-1.	RCU OVERVIEW
-2.	WHAT IS RCU'S CORE API?
-3.	WHAT ARE SOME EXAMPLE USES OF CORE RCU API?
-4.	WHAT IF MY UPDATING THREAD CANNOT BLOCK?
-5.	WHAT ARE SOME SIMPLE IMPLEMENTATIONS OF RCU?
-6.	ANALOGY WITH READER-WRITER LOCKING
-7.	FULL LIST OF RCU APIs
-8.	ANSWERS TO QUICK QUIZZES
-
-People who prefer starting with a conceptual overview should focus on
-Section 1, though most readers will profit by reading this section at
-some point.  People who prefer to start with an API that they can then
-experiment with should focus on Section 2.  People who prefer to start
-with example uses should focus on Sections 3 and 4.  People who need to
-understand the RCU implementation should focus on Section 5, then dive
-into the kernel source code.  People who reason best by analogy should
-focus on Section 6.  Section 7 serves as an index to the docbook API
-documentation, and Section 8 is the traditional answer key.
-
-So, start with the section that makes the most sense to you and your
-preferred method of learning.  If you need to know everything about
-everything, feel free to read the whole thing -- but if you are really
-that type of person, you have perused the source code and will therefore
-never need this document anyway.  ;-)
-
-
-1.  RCU OVERVIEW
-
-The basic idea behind RCU is to split updates into "removal" and
-"reclamation" phases.  The removal phase removes references to data items
-within a data structure (possibly by replacing them with references to
-new versions of these data items), and can run concurrently with readers.
-The reason that it is safe to run the removal phase concurrently with
-readers is the semantics of modern CPUs guarantee that readers will see
-either the old or the new version of the data structure rather than a
-partially updated reference.  The reclamation phase does the work of reclaiming
-(e.g., freeing) the data items removed from the data structure during the
-removal phase.  Because reclaiming data items can disrupt any readers
-concurrently referencing those data items, the reclamation phase must
-not start until readers no longer hold references to those data items.
-
-Splitting the update into removal and reclamation phases permits the
-updater to perform the removal phase immediately, and to defer the
-reclamation phase until all readers active during the removal phase have
-completed, either by blocking until they finish or by registering a
-callback that is invoked after they finish.  Only readers that are active
-during the removal phase need be considered, because any reader starting
-after the removal phase will be unable to gain a reference to the removed
-data items, and therefore cannot be disrupted by the reclamation phase.
-
-So the typical RCU update sequence goes something like the following:
-
-a.	Remove pointers to a data structure, so that subsequent
-	readers cannot gain a reference to it.
-
-b.	Wait for all previous readers to complete their RCU read-side
-	critical sections.
-
-c.	At this point, there cannot be any readers who hold references
-	to the data structure, so it now may safely be reclaimed
-	(e.g., kfree()d).
-
-Step (b) above is the key idea underlying RCU's deferred destruction.
-The ability to wait until all readers are done allows RCU readers to
-use much lighter-weight synchronization, in some cases, absolutely no
-synchronization at all.  In contrast, in more conventional lock-based
-schemes, readers must use heavy-weight synchronization in order to
-prevent an updater from deleting the data structure out from under them.
-This is because lock-based updaters typically update data items in place,
-and must therefore exclude readers.  In contrast, RCU-based updaters
-typically take advantage of the fact that writes to single aligned
-pointers are atomic on modern CPUs, allowing atomic insertion, removal,
-and replacement of data items in a linked structure without disrupting
-readers.  Concurrent RCU readers can then continue accessing the old
-versions, and can dispense with the atomic operations, memory barriers,
-and communications cache misses that are so expensive on present-day
-SMP computer systems, even in absence of lock contention.
-
-In the three-step procedure shown above, the updater is performing both
-the removal and the reclamation step, but it is often helpful for an
-entirely different thread to do the reclamation, as is in fact the case
-in the Linux kernel's directory-entry cache (dcache).  Even if the same
-thread performs both the update step (step (a) above) and the reclamation
-step (step (c) above), it is often helpful to think of them separately.
-For example, RCU readers and updaters need not communicate at all,
-but RCU provides implicit low-overhead communication between readers
-and reclaimers, namely, in step (b) above.
-
-So how the heck can a reclaimer tell when a reader is done, given
-that readers are not doing any sort of synchronization operations???
-Read on to learn about how RCU's API makes this easy.
-
-
-2.  WHAT IS RCU'S CORE API?
-
-The core RCU API is quite small:
-
-a.	rcu_read_lock()
-b.	rcu_read_unlock()
-c.	synchronize_rcu() / call_rcu()
-d.	rcu_assign_pointer()
-e.	rcu_dereference()
-
-There are many other members of the RCU API, but the rest can be
-expressed in terms of these five, though most implementations instead
-express synchronize_rcu() in terms of the call_rcu() callback API.
-
-The five core RCU APIs are described below, the other 18 will be enumerated
-later.  See the kernel docbook documentation for more info, or look directly
-at the function header comments.
-
-rcu_read_lock()
-
-	void rcu_read_lock(void);
-
-	Used by a reader to inform the reclaimer that the reader is
-	entering an RCU read-side critical section.  It is illegal
-	to block while in an RCU read-side critical section, though
-	kernels built with CONFIG_PREEMPT_RCU can preempt RCU
-	read-side critical sections.  Any RCU-protected data structure
-	accessed during an RCU read-side critical section is guaranteed to
-	remain unreclaimed for the full duration of that critical section.
-	Reference counts may be used in conjunction with RCU to maintain
-	longer-term references to data structures.
-
-rcu_read_unlock()
-
-	void rcu_read_unlock(void);
-
-	Used by a reader to inform the reclaimer that the reader is
-	exiting an RCU read-side critical section.  Note that RCU
-	read-side critical sections may be nested and/or overlapping.
-
-synchronize_rcu()
-
-	void synchronize_rcu(void);
-
-	Marks the end of updater code and the beginning of reclaimer
-	code.  It does this by blocking until all pre-existing RCU
-	read-side critical sections on all CPUs have completed.
-	Note that synchronize_rcu() will -not- necessarily wait for
-	any subsequent RCU read-side critical sections to complete.
-	For example, consider the following sequence of events:
-
-	         CPU 0                  CPU 1                 CPU 2
-	     ----------------- ------------------------- ---------------
-	 1.  rcu_read_lock()
-	 2.                    enters synchronize_rcu()
-	 3.                                               rcu_read_lock()
-	 4.  rcu_read_unlock()
-	 5.                     exits synchronize_rcu()
-	 6.                                              rcu_read_unlock()
-
-	To reiterate, synchronize_rcu() waits only for ongoing RCU
-	read-side critical sections to complete, not necessarily for
-	any that begin after synchronize_rcu() is invoked.
-
-	Of course, synchronize_rcu() does not necessarily return
-	-immediately- after the last pre-existing RCU read-side critical
-	section completes.  For one thing, there might well be scheduling
-	delays.  For another thing, many RCU implementations process
-	requests in batches in order to improve efficiencies, which can
-	further delay synchronize_rcu().
-
-	Since synchronize_rcu() is the API that must figure out when
-	readers are done, its implementation is key to RCU.  For RCU
-	to be useful in all but the most read-intensive situations,
-	synchronize_rcu()'s overhead must also be quite small.
-
-	The call_rcu() API is a callback form of synchronize_rcu(),
-	and is described in more detail in a later section.  Instead of
-	blocking, it registers a function and argument which are invoked
-	after all ongoing RCU read-side critical sections have completed.
-	This callback variant is particularly useful in situations where
-	it is illegal to block or where update-side performance is
-	critically important.
-
-	However, the call_rcu() API should not be used lightly, as use
-	of the synchronize_rcu() API generally results in simpler code.
-	In addition, the synchronize_rcu() API has the nice property
-	of automatically limiting update rate should grace periods
-	be delayed.  This property results in system resilience in face
-	of denial-of-service attacks.  Code using call_rcu() should limit
-	update rate in order to gain this same sort of resilience.  See
-	checklist.txt for some approaches to limiting the update rate.
-
-rcu_assign_pointer()
-
-	void rcu_assign_pointer(p, typeof(p) v);
-
-	Yes, rcu_assign_pointer() -is- implemented as a macro, though it
-	would be cool to be able to declare a function in this manner.
-	(Compiler experts will no doubt disagree.)
-
-	The updater uses this function to assign a new value to an
-	RCU-protected pointer, in order to safely communicate the change
-	in value from the updater to the reader.  This macro does not
-	evaluate to an rvalue, but it does execute any memory-barrier
-	instructions required for a given CPU architecture.
-
-	Perhaps just as important, it serves to document (1) which
-	pointers are protected by RCU and (2) the point at which a
-	given structure becomes accessible to other CPUs.  That said,
-	rcu_assign_pointer() is most frequently used indirectly, via
-	the _rcu list-manipulation primitives such as list_add_rcu().
-
-rcu_dereference()
-
-	typeof(p) rcu_dereference(p);
-
-	Like rcu_assign_pointer(), rcu_dereference() must be implemented
-	as a macro.
-
-	The reader uses rcu_dereference() to fetch an RCU-protected
-	pointer, which returns a value that may then be safely
-	dereferenced.  Note that rcu_dereference() does not actually
-	dereference the pointer, instead, it protects the pointer for
-	later dereferencing.  It also executes any needed memory-barrier
-	instructions for a given CPU architecture.  Currently, only Alpha
-	needs memory barriers within rcu_dereference() -- on other CPUs,
-	it compiles to nothing, not even a compiler directive.
-
-	Common coding practice uses rcu_dereference() to copy an
-	RCU-protected pointer to a local variable, then dereferences
-	this local variable, for example as follows:
-
-		p = rcu_dereference(head.next);
-		return p->data;
-
-	However, in this case, one could just as easily combine these
-	into one statement:
-
-		return rcu_dereference(head.next)->data;
-
-	If you are going to be fetching multiple fields from the
-	RCU-protected structure, using the local variable is of
-	course preferred.  Repeated rcu_dereference() calls look
-	ugly, do not guarantee that the same pointer will be returned
-	if an update happened while in the critical section, and incur
-	unnecessary overhead on Alpha CPUs.
-
-	Note that the value returned by rcu_dereference() is valid
-	only within the enclosing RCU read-side critical section [1].
-	For example, the following is -not- legal:
-
-		rcu_read_lock();
-		p = rcu_dereference(head.next);
-		rcu_read_unlock();
-		x = p->address;	/* BUG!!! */
-		rcu_read_lock();
-		y = p->data;	/* BUG!!! */
-		rcu_read_unlock();
-
-	Holding a reference from one RCU read-side critical section
-	to another is just as illegal as holding a reference from
-	one lock-based critical section to another!  Similarly,
-	using a reference outside of the critical section in which
-	it was acquired is just as illegal as doing so with normal
-	locking.
-
-	As with rcu_assign_pointer(), an important function of
-	rcu_dereference() is to document which pointers are protected by
-	RCU, in particular, flagging a pointer that is subject to changing
-	at any time, including immediately after the rcu_dereference().
-	And, again like rcu_assign_pointer(), rcu_dereference() is
-	typically used indirectly, via the _rcu list-manipulation
-	primitives, such as list_for_each_entry_rcu().
-
-	[1] The variant rcu_dereference_protected() can be used outside
-	of an RCU read-side critical section as long as the usage is
-	protected by locks acquired by the update-side code.  This variant
-	avoids the lockdep warning that would happen when using (for
-	example) rcu_dereference() without rcu_read_lock() protection.
-	Using rcu_dereference_protected() also has the advantage
-	of permitting compiler optimizations that rcu_dereference()
-	must prohibit.	The rcu_dereference_protected() variant takes
-	a lockdep expression to indicate which locks must be acquired
-	by the caller. If the indicated protection is not provided,
-	a lockdep splat is emitted.  See RCU/Design/Requirements/Requirements.html
-	and the API's code comments for more details and example usage.
-
-The following diagram shows how each API communicates among the
-reader, updater, and reclaimer.
-
-
-	    rcu_assign_pointer()
-	                            +--------+
-	    +---------------------->| reader |---------+
-	    |                       +--------+         |
-	    |                           |              |
-	    |                           |              | Protect:
-	    |                           |              | rcu_read_lock()
-	    |                           |              | rcu_read_unlock()
-	    |        rcu_dereference()  |              |
-	    +---------+                 |              |
-	    | updater |<----------------+              |
-	    +---------+                                V
-	    |                                    +-----------+
-	    +----------------------------------->| reclaimer |
-	                                         +-----------+
-	      Defer:
-	      synchronize_rcu() & call_rcu()
-
-
-The RCU infrastructure observes the time sequence of rcu_read_lock(),
-rcu_read_unlock(), synchronize_rcu(), and call_rcu() invocations in
-order to determine when (1) synchronize_rcu() invocations may return
-to their callers and (2) call_rcu() callbacks may be invoked.  Efficient
-implementations of the RCU infrastructure make heavy use of batching in
-order to amortize their overhead over many uses of the corresponding APIs.
-
-There are at least three flavors of RCU usage in the Linux kernel. The diagram
-above shows the most common one. On the updater side, the rcu_assign_pointer(),
-sychronize_rcu() and call_rcu() primitives used are the same for all three
-flavors. However for protection (on the reader side), the primitives used vary
-depending on the flavor:
-
-a.	rcu_read_lock() / rcu_read_unlock()
-	rcu_dereference()
-
-b.	rcu_read_lock_bh() / rcu_read_unlock_bh()
-	local_bh_disable() / local_bh_enable()
-	rcu_dereference_bh()
-
-c.	rcu_read_lock_sched() / rcu_read_unlock_sched()
-	preempt_disable() / preempt_enable()
-	local_irq_save() / local_irq_restore()
-	hardirq enter / hardirq exit
-	NMI enter / NMI exit
-	rcu_dereference_sched()
-
-These three flavors are used as follows:
-
-a.	RCU applied to normal data structures.
-
-b.	RCU applied to networking data structures that may be subjected
-	to remote denial-of-service attacks.
-
-c.	RCU applied to scheduler and interrupt/NMI-handler tasks.
-
-Again, most uses will be of (a).  The (b) and (c) cases are important
-for specialized uses, but are relatively uncommon.
-
-
-3.  WHAT ARE SOME EXAMPLE USES OF CORE RCU API?
-
-This section shows a simple use of the core RCU API to protect a
-global pointer to a dynamically allocated structure.  More-typical
-uses of RCU may be found in listRCU.txt, arrayRCU.txt, and NMI-RCU.txt.
-
-	struct foo {
-		int a;
-		char b;
-		long c;
-	};
-	DEFINE_SPINLOCK(foo_mutex);
-
-	struct foo __rcu *gbl_foo;
-
-	/*
-	 * Create a new struct foo that is the same as the one currently
-	 * pointed to by gbl_foo, except that field "a" is replaced
-	 * with "new_a".  Points gbl_foo to the new structure, and
-	 * frees up the old structure after a grace period.
-	 *
-	 * Uses rcu_assign_pointer() to ensure that concurrent readers
-	 * see the initialized version of the new structure.
-	 *
-	 * Uses synchronize_rcu() to ensure that any readers that might
-	 * have references to the old structure complete before freeing
-	 * the old structure.
-	 */
-	void foo_update_a(int new_a)
-	{
-		struct foo *new_fp;
-		struct foo *old_fp;
-
-		new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
-		spin_lock(&foo_mutex);
-		old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
-		*new_fp = *old_fp;
-		new_fp->a = new_a;
-		rcu_assign_pointer(gbl_foo, new_fp);
-		spin_unlock(&foo_mutex);
-		synchronize_rcu();
-		kfree(old_fp);
-	}
-
-	/*
-	 * Return the value of field "a" of the current gbl_foo
-	 * structure.  Use rcu_read_lock() and rcu_read_unlock()
-	 * to ensure that the structure does not get deleted out
-	 * from under us, and use rcu_dereference() to ensure that
-	 * we see the initialized version of the structure (important
-	 * for DEC Alpha and for people reading the code).
-	 */
-	int foo_get_a(void)
-	{
-		int retval;
-
-		rcu_read_lock();
-		retval = rcu_dereference(gbl_foo)->a;
-		rcu_read_unlock();
-		return retval;
-	}
-
-So, to sum up:
-
-o	Use rcu_read_lock() and rcu_read_unlock() to guard RCU
-	read-side critical sections.
-
-o	Within an RCU read-side critical section, use rcu_dereference()
-	to dereference RCU-protected pointers.
-
-o	Use some solid scheme (such as locks or semaphores) to
-	keep concurrent updates from interfering with each other.
-
-o	Use rcu_assign_pointer() to update an RCU-protected pointer.
-	This primitive protects concurrent readers from the updater,
-	-not- concurrent updates from each other!  You therefore still
-	need to use locking (or something similar) to keep concurrent
-	rcu_assign_pointer() primitives from interfering with each other.
-
-o	Use synchronize_rcu() -after- removing a data element from an
-	RCU-protected data structure, but -before- reclaiming/freeing
-	the data element, in order to wait for the completion of all
-	RCU read-side critical sections that might be referencing that
-	data item.
-
-See checklist.txt for additional rules to follow when using RCU.
-And again, more-typical uses of RCU may be found in listRCU.txt,
-arrayRCU.txt, and NMI-RCU.txt.
-
-
-4.  WHAT IF MY UPDATING THREAD CANNOT BLOCK?
-
-In the example above, foo_update_a() blocks until a grace period elapses.
-This is quite simple, but in some cases one cannot afford to wait so
-long -- there might be other high-priority work to be done.
-
-In such cases, one uses call_rcu() rather than synchronize_rcu().
-The call_rcu() API is as follows:
-
-	void call_rcu(struct rcu_head * head,
-		      void (*func)(struct rcu_head *head));
-
-This function invokes func(head) after a grace period has elapsed.
-This invocation might happen from either softirq or process context,
-so the function is not permitted to block.  The foo struct needs to
-have an rcu_head structure added, perhaps as follows:
-
-	struct foo {
-		int a;
-		char b;
-		long c;
-		struct rcu_head rcu;
-	};
-
-The foo_update_a() function might then be written as follows:
-
-	/*
-	 * Create a new struct foo that is the same as the one currently
-	 * pointed to by gbl_foo, except that field "a" is replaced
-	 * with "new_a".  Points gbl_foo to the new structure, and
-	 * frees up the old structure after a grace period.
-	 *
-	 * Uses rcu_assign_pointer() to ensure that concurrent readers
-	 * see the initialized version of the new structure.
-	 *
-	 * Uses call_rcu() to ensure that any readers that might have
-	 * references to the old structure complete before freeing the
-	 * old structure.
-	 */
-	void foo_update_a(int new_a)
-	{
-		struct foo *new_fp;
-		struct foo *old_fp;
-
-		new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
-		spin_lock(&foo_mutex);
-		old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
-		*new_fp = *old_fp;
-		new_fp->a = new_a;
-		rcu_assign_pointer(gbl_foo, new_fp);
-		spin_unlock(&foo_mutex);
-		call_rcu(&old_fp->rcu, foo_reclaim);
-	}
-
-The foo_reclaim() function might appear as follows:
-
-	void foo_reclaim(struct rcu_head *rp)
-	{
-		struct foo *fp = container_of(rp, struct foo, rcu);
-
-		foo_cleanup(fp->a);
-
-		kfree(fp);
-	}
-
-The container_of() primitive is a macro that, given a pointer into a
-struct, the type of the struct, and the pointed-to field within the
-struct, returns a pointer to the beginning of the struct.
-
-The use of call_rcu() permits the caller of foo_update_a() to
-immediately regain control, without needing to worry further about the
-old version of the newly updated element.  It also clearly shows the
-RCU distinction between updater, namely foo_update_a(), and reclaimer,
-namely foo_reclaim().
-
-The summary of advice is the same as for the previous section, except
-that we are now using call_rcu() rather than synchronize_rcu():
-
-o	Use call_rcu() -after- removing a data element from an
-	RCU-protected data structure in order to register a callback
-	function that will be invoked after the completion of all RCU
-	read-side critical sections that might be referencing that
-	data item.
-
-If the callback for call_rcu() is not doing anything more than calling
-kfree() on the structure, you can use kfree_rcu() instead of call_rcu()
-to avoid having to write your own callback:
-
-	kfree_rcu(old_fp, rcu);
-
-Again, see checklist.txt for additional rules governing the use of RCU.
-
-
-5.  WHAT ARE SOME SIMPLE IMPLEMENTATIONS OF RCU?
-
-One of the nice things about RCU is that it has extremely simple "toy"
-implementations that are a good first step towards understanding the
-production-quality implementations in the Linux kernel.  This section
-presents two such "toy" implementations of RCU, one that is implemented
-in terms of familiar locking primitives, and another that more closely
-resembles "classic" RCU.  Both are way too simple for real-world use,
-lacking both functionality and performance.  However, they are useful
-in getting a feel for how RCU works.  See kernel/rcu/update.c for a
-production-quality implementation, and see:
-
-	http://www.rdrop.com/users/paulmck/RCU
-
-for papers describing the Linux kernel RCU implementation.  The OLS'01
-and OLS'02 papers are a good introduction, and the dissertation provides
-more details on the current implementation as of early 2004.
-
-
-5A.  "TOY" IMPLEMENTATION #1: LOCKING
-
-This section presents a "toy" RCU implementation that is based on
-familiar locking primitives.  Its overhead makes it a non-starter for
-real-life use, as does its lack of scalability.  It is also unsuitable
-for realtime use, since it allows scheduling latency to "bleed" from
-one read-side critical section to another.  It also assumes recursive
-reader-writer locks:  If you try this with non-recursive locks, and
-you allow nested rcu_read_lock() calls, you can deadlock.
-
-However, it is probably the easiest implementation to relate to, so is
-a good starting point.
-
-It is extremely simple:
-
-	static DEFINE_RWLOCK(rcu_gp_mutex);
-
-	void rcu_read_lock(void)
-	{
-		read_lock(&rcu_gp_mutex);
-	}
-
-	void rcu_read_unlock(void)
-	{
-		read_unlock(&rcu_gp_mutex);
-	}
-
-	void synchronize_rcu(void)
-	{
-		write_lock(&rcu_gp_mutex);
-		smp_mb__after_spinlock();
-		write_unlock(&rcu_gp_mutex);
-	}
-
-[You can ignore rcu_assign_pointer() and rcu_dereference() without missing
-much.  But here are simplified versions anyway.  And whatever you do,
-don't forget about them when submitting patches making use of RCU!]
-
-	#define rcu_assign_pointer(p, v) \
-	({ \
-		smp_store_release(&(p), (v)); \
-	})
-
-	#define rcu_dereference(p) \
-	({ \
-		typeof(p) _________p1 = READ_ONCE(p); \
-		(_________p1); \
-	})
-
-
-The rcu_read_lock() and rcu_read_unlock() primitive read-acquire
-and release a global reader-writer lock.  The synchronize_rcu()
-primitive write-acquires this same lock, then releases it.  This means
-that once synchronize_rcu() exits, all RCU read-side critical sections
-that were in progress before synchronize_rcu() was called are guaranteed
-to have completed -- there is no way that synchronize_rcu() would have
-been able to write-acquire the lock otherwise.  The smp_mb__after_spinlock()
-promotes synchronize_rcu() to a full memory barrier in compliance with
-the "Memory-Barrier Guarantees" listed in:
-
-	Documentation/RCU/Design/Requirements/Requirements.html.
-
-It is possible to nest rcu_read_lock(), since reader-writer locks may
-be recursively acquired.  Note also that rcu_read_lock() is immune
-from deadlock (an important property of RCU).  The reason for this is
-that the only thing that can block rcu_read_lock() is a synchronize_rcu().
-But synchronize_rcu() does not acquire any locks while holding rcu_gp_mutex,
-so there can be no deadlock cycle.
-
-Quick Quiz #1:	Why is this argument naive?  How could a deadlock
-		occur when using this algorithm in a real-world Linux
-		kernel?  How could this deadlock be avoided?
-
-
-5B.  "TOY" EXAMPLE #2: CLASSIC RCU
-
-This section presents a "toy" RCU implementation that is based on
-"classic RCU".  It is also short on performance (but only for updates) and
-on features such as hotplug CPU and the ability to run in CONFIG_PREEMPT
-kernels.  The definitions of rcu_dereference() and rcu_assign_pointer()
-are the same as those shown in the preceding section, so they are omitted.
-
-	void rcu_read_lock(void) { }
-
-	void rcu_read_unlock(void) { }
-
-	void synchronize_rcu(void)
-	{
-		int cpu;
-
-		for_each_possible_cpu(cpu)
-			run_on(cpu);
-	}
-
-Note that rcu_read_lock() and rcu_read_unlock() do absolutely nothing.
-This is the great strength of classic RCU in a non-preemptive kernel:
-read-side overhead is precisely zero, at least on non-Alpha CPUs.
-And there is absolutely no way that rcu_read_lock() can possibly
-participate in a deadlock cycle!
-
-The implementation of synchronize_rcu() simply schedules itself on each
-CPU in turn.  The run_on() primitive can be implemented straightforwardly
-in terms of the sched_setaffinity() primitive.  Of course, a somewhat less
-"toy" implementation would restore the affinity upon completion rather
-than just leaving all tasks running on the last CPU, but when I said
-"toy", I meant -toy-!
-
-So how the heck is this supposed to work???
-
-Remember that it is illegal to block while in an RCU read-side critical
-section.  Therefore, if a given CPU executes a context switch, we know
-that it must have completed all preceding RCU read-side critical sections.
-Once -all- CPUs have executed a context switch, then -all- preceding
-RCU read-side critical sections will have completed.
-
-So, suppose that we remove a data item from its structure and then invoke
-synchronize_rcu().  Once synchronize_rcu() returns, we are guaranteed
-that there are no RCU read-side critical sections holding a reference
-to that data item, so we can safely reclaim it.
-
-Quick Quiz #2:	Give an example where Classic RCU's read-side
-		overhead is -negative-.
-
-Quick Quiz #3:  If it is illegal to block in an RCU read-side
-		critical section, what the heck do you do in
-		PREEMPT_RT, where normal spinlocks can block???
-
-
-6.  ANALOGY WITH READER-WRITER LOCKING
-
-Although RCU can be used in many different ways, a very common use of
-RCU is analogous to reader-writer locking.  The following unified
-diff shows how closely related RCU and reader-writer locking can be.
-
-	@@ -5,5 +5,5 @@ struct el {
-	 	int data;
-	 	/* Other data fields */
-	 };
-	-rwlock_t listmutex;
-	+spinlock_t listmutex;
-	 struct el head;
-
-	@@ -13,15 +14,15 @@
-		struct list_head *lp;
-		struct el *p;
-
-	-	read_lock(&listmutex);
-	-	list_for_each_entry(p, head, lp) {
-	+	rcu_read_lock();
-	+	list_for_each_entry_rcu(p, head, lp) {
-			if (p->key == key) {
-				*result = p->data;
-	-			read_unlock(&listmutex);
-	+			rcu_read_unlock();
-				return 1;
-			}
-		}
-	-	read_unlock(&listmutex);
-	+	rcu_read_unlock();
-		return 0;
-	 }
-
-	@@ -29,15 +30,16 @@
-	 {
-		struct el *p;
-
-	-	write_lock(&listmutex);
-	+	spin_lock(&listmutex);
-		list_for_each_entry(p, head, lp) {
-			if (p->key == key) {
-	-			list_del(&p->list);
-	-			write_unlock(&listmutex);
-	+			list_del_rcu(&p->list);
-	+			spin_unlock(&listmutex);
-	+			synchronize_rcu();
-				kfree(p);
-				return 1;
-			}
-		}
-	-	write_unlock(&listmutex);
-	+	spin_unlock(&listmutex);
-		return 0;
-	 }
-
-Or, for those who prefer a side-by-side listing:
-
- 1 struct el {                          1 struct el {
- 2   struct list_head list;             2   struct list_head list;
- 3   long key;                          3   long key;
- 4   spinlock_t mutex;                  4   spinlock_t mutex;
- 5   int data;                          5   int data;
- 6   /* Other data fields */            6   /* Other data fields */
- 7 };                                   7 };
- 8 rwlock_t listmutex;                  8 spinlock_t listmutex;
- 9 struct el head;                      9 struct el head;
-
- 1 int search(long key, int *result)    1 int search(long key, int *result)
- 2 {                                    2 {
- 3   struct list_head *lp;              3   struct list_head *lp;
- 4   struct el *p;                      4   struct el *p;
- 5                                      5
- 6   read_lock(&listmutex);             6   rcu_read_lock();
- 7   list_for_each_entry(p, head, lp) { 7   list_for_each_entry_rcu(p, head, lp) {
- 8     if (p->key == key) {             8     if (p->key == key) {
- 9       *result = p->data;             9       *result = p->data;
-10       read_unlock(&listmutex);      10       rcu_read_unlock();
-11       return 1;                     11       return 1;
-12     }                               12     }
-13   }                                 13   }
-14   read_unlock(&listmutex);          14   rcu_read_unlock();
-15   return 0;                         15   return 0;
-16 }                                   16 }
-
- 1 int delete(long key)                 1 int delete(long key)
- 2 {                                    2 {
- 3   struct el *p;                      3   struct el *p;
- 4                                      4
- 5   write_lock(&listmutex);            5   spin_lock(&listmutex);
- 6   list_for_each_entry(p, head, lp) { 6   list_for_each_entry(p, head, lp) {
- 7     if (p->key == key) {             7     if (p->key == key) {
- 8       list_del(&p->list);            8       list_del_rcu(&p->list);
- 9       write_unlock(&listmutex);      9       spin_unlock(&listmutex);
-                                       10       synchronize_rcu();
-10       kfree(p);                     11       kfree(p);
-11       return 1;                     12       return 1;
-12     }                               13     }
-13   }                                 14   }
-14   write_unlock(&listmutex);         15   spin_unlock(&listmutex);
-15   return 0;                         16   return 0;
-16 }                                   17 }
-
-Either way, the differences are quite small.  Read-side locking moves
-to rcu_read_lock() and rcu_read_unlock, update-side locking moves from
-a reader-writer lock to a simple spinlock, and a synchronize_rcu()
-precedes the kfree().
-
-However, there is one potential catch: the read-side and update-side
-critical sections can now run concurrently.  In many cases, this will
-not be a problem, but it is necessary to check carefully regardless.
-For example, if multiple independent list updates must be seen as
-a single atomic update, converting to RCU will require special care.
-
-Also, the presence of synchronize_rcu() means that the RCU version of
-delete() can now block.  If this is a problem, there is a callback-based
-mechanism that never blocks, namely call_rcu() or kfree_rcu(), that can
-be used in place of synchronize_rcu().
-
-
-7.  FULL LIST OF RCU APIs
-
-The RCU APIs are documented in docbook-format header comments in the
-Linux-kernel source code, but it helps to have a full list of the
-APIs, since there does not appear to be a way to categorize them
-in docbook.  Here is the list, by category.
-
-RCU list traversal:
-
-	list_entry_rcu
-	list_first_entry_rcu
-	list_next_rcu
-	list_for_each_entry_rcu
-	list_for_each_entry_continue_rcu
-	list_for_each_entry_from_rcu
-	hlist_first_rcu
-	hlist_next_rcu
-	hlist_pprev_rcu
-	hlist_for_each_entry_rcu
-	hlist_for_each_entry_rcu_bh
-	hlist_for_each_entry_from_rcu
-	hlist_for_each_entry_continue_rcu
-	hlist_for_each_entry_continue_rcu_bh
-	hlist_nulls_first_rcu
-	hlist_nulls_for_each_entry_rcu
-	hlist_bl_first_rcu
-	hlist_bl_for_each_entry_rcu
-
-RCU pointer/list update:
-
-	rcu_assign_pointer
-	list_add_rcu
-	list_add_tail_rcu
-	list_del_rcu
-	list_replace_rcu
-	hlist_add_behind_rcu
-	hlist_add_before_rcu
-	hlist_add_head_rcu
-	hlist_del_rcu
-	hlist_del_init_rcu
-	hlist_replace_rcu
-	list_splice_init_rcu()
-	hlist_nulls_del_init_rcu
-	hlist_nulls_del_rcu
-	hlist_nulls_add_head_rcu
-	hlist_bl_add_head_rcu
-	hlist_bl_del_init_rcu
-	hlist_bl_del_rcu
-	hlist_bl_set_first_rcu
-
-RCU:	Critical sections	Grace period		Barrier
-
-	rcu_read_lock		synchronize_net		rcu_barrier
-	rcu_read_unlock		synchronize_rcu
-	rcu_dereference		synchronize_rcu_expedited
-	rcu_read_lock_held	call_rcu
-	rcu_dereference_check	kfree_rcu
-	rcu_dereference_protected
-
-bh:	Critical sections	Grace period		Barrier
-
-	rcu_read_lock_bh	call_rcu		rcu_barrier
-	rcu_read_unlock_bh	synchronize_rcu
-	[local_bh_disable]	synchronize_rcu_expedited
-	[and friends]
-	rcu_dereference_bh
-	rcu_dereference_bh_check
-	rcu_dereference_bh_protected
-	rcu_read_lock_bh_held
-
-sched:	Critical sections	Grace period		Barrier
-
-	rcu_read_lock_sched	call_rcu		rcu_barrier
-	rcu_read_unlock_sched	synchronize_rcu
-	[preempt_disable]	synchronize_rcu_expedited
-	[and friends]
-	rcu_read_lock_sched_notrace
-	rcu_read_unlock_sched_notrace
-	rcu_dereference_sched
-	rcu_dereference_sched_check
-	rcu_dereference_sched_protected
-	rcu_read_lock_sched_held
-
-
-SRCU:	Critical sections	Grace period		Barrier
-
-	srcu_read_lock		call_srcu		srcu_barrier
-	srcu_read_unlock	synchronize_srcu
-	srcu_dereference	synchronize_srcu_expedited
-	srcu_dereference_check
-	srcu_read_lock_held
-
-SRCU:	Initialization/cleanup
-	DEFINE_SRCU
-	DEFINE_STATIC_SRCU
-	init_srcu_struct
-	cleanup_srcu_struct
-
-All:  lockdep-checked RCU-protected pointer access
-
-	rcu_access_pointer
-	rcu_dereference_raw
-	RCU_LOCKDEP_WARN
-	rcu_sleep_check
-	RCU_NONIDLE
-
-See the comment headers in the source code (or the docbook generated
-from them) for more information.
-
-However, given that there are no fewer than four families of RCU APIs
-in the Linux kernel, how do you choose which one to use?  The following
-list can be helpful:
-
-a.	Will readers need to block?  If so, you need SRCU.
-
-b.	What about the -rt patchset?  If readers would need to block
-	in an non-rt kernel, you need SRCU.  If readers would block
-	in a -rt kernel, but not in a non-rt kernel, SRCU is not
-	necessary.  (The -rt patchset turns spinlocks into sleeplocks,
-	hence this distinction.)
-
-c.	Do you need to treat NMI handlers, hardirq handlers,
-	and code segments with preemption disabled (whether
-	via preempt_disable(), local_irq_save(), local_bh_disable(),
-	or some other mechanism) as if they were explicit RCU readers?
-	If so, RCU-sched is the only choice that will work for you.
-
-d.	Do you need RCU grace periods to complete even in the face
-	of softirq monopolization of one or more of the CPUs?  For
-	example, is your code subject to network-based denial-of-service
-	attacks?  If so, you should disable softirq across your readers,
-	for example, by using rcu_read_lock_bh().
-
-e.	Is your workload too update-intensive for normal use of
-	RCU, but inappropriate for other synchronization mechanisms?
-	If so, consider SLAB_TYPESAFE_BY_RCU (which was originally
-	named SLAB_DESTROY_BY_RCU).  But please be careful!
-
-f.	Do you need read-side critical sections that are respected
-	even though they are in the middle of the idle loop, during
-	user-mode execution, or on an offlined CPU?  If so, SRCU is the
-	only choice that will work for you.
-
-g.	Otherwise, use RCU.
-
-Of course, this all assumes that you have determined that RCU is in fact
-the right tool for your job.
-
-
-8.  ANSWERS TO QUICK QUIZZES
-
-Quick Quiz #1:	Why is this argument naive?  How could a deadlock
-		occur when using this algorithm in a real-world Linux
-		kernel?  [Referring to the lock-based "toy" RCU
-		algorithm.]
-
-Answer:		Consider the following sequence of events:
-
-		1.	CPU 0 acquires some unrelated lock, call it
-			"problematic_lock", disabling irq via
-			spin_lock_irqsave().
-
-		2.	CPU 1 enters synchronize_rcu(), write-acquiring
-			rcu_gp_mutex.
-
-		3.	CPU 0 enters rcu_read_lock(), but must wait
-			because CPU 1 holds rcu_gp_mutex.
-
-		4.	CPU 1 is interrupted, and the irq handler
-			attempts to acquire problematic_lock.
-
-		The system is now deadlocked.
-
-		One way to avoid this deadlock is to use an approach like
-		that of CONFIG_PREEMPT_RT, where all normal spinlocks
-		become blocking locks, and all irq handlers execute in
-		the context of special tasks.  In this case, in step 4
-		above, the irq handler would block, allowing CPU 1 to
-		release rcu_gp_mutex, avoiding the deadlock.
-
-		Even in the absence of deadlock, this RCU implementation
-		allows latency to "bleed" from readers to other
-		readers through synchronize_rcu().  To see this,
-		consider task A in an RCU read-side critical section
-		(thus read-holding rcu_gp_mutex), task B blocked
-		attempting to write-acquire rcu_gp_mutex, and
-		task C blocked in rcu_read_lock() attempting to
-		read_acquire rcu_gp_mutex.  Task A's RCU read-side
-		latency is holding up task C, albeit indirectly via
-		task B.
-
-		Realtime RCU implementations therefore use a counter-based
-		approach where tasks in RCU read-side critical sections
-		cannot be blocked by tasks executing synchronize_rcu().
-
-Quick Quiz #2:	Give an example where Classic RCU's read-side
-		overhead is -negative-.
-
-Answer:		Imagine a single-CPU system with a non-CONFIG_PREEMPT
-		kernel where a routing table is used by process-context
-		code, but can be updated by irq-context code (for example,
-		by an "ICMP REDIRECT" packet).	The usual way of handling
-		this would be to have the process-context code disable
-		interrupts while searching the routing table.  Use of
-		RCU allows such interrupt-disabling to be dispensed with.
-		Thus, without RCU, you pay the cost of disabling interrupts,
-		and with RCU you don't.
-
-		One can argue that the overhead of RCU in this
-		case is negative with respect to the single-CPU
-		interrupt-disabling approach.  Others might argue that
-		the overhead of RCU is merely zero, and that replacing
-		the positive overhead of the interrupt-disabling scheme
-		with the zero-overhead RCU scheme does not constitute
-		negative overhead.
-
-		In real life, of course, things are more complex.  But
-		even the theoretical possibility of negative overhead for
-		a synchronization primitive is a bit unexpected.  ;-)
-
-Quick Quiz #3:  If it is illegal to block in an RCU read-side
-		critical section, what the heck do you do in
-		PREEMPT_RT, where normal spinlocks can block???
-
-Answer:		Just as PREEMPT_RT permits preemption of spinlock
-		critical sections, it permits preemption of RCU
-		read-side critical sections.  It also permits
-		spinlocks blocking while in RCU read-side critical
-		sections.
-
-		Why the apparent inconsistency?  Because it is
-		possible to use priority boosting to keep the RCU
-		grace periods short if need be (for example, if running
-		short of memory).  In contrast, if blocking waiting
-		for (say) network reception, there is no way to know
-		what should be boosted.  Especially given that the
-		process we need to boost might well be a human being
-		who just went out for a pizza or something.  And although
-		a computer-operated cattle prod might arouse serious
-		interest, it might also provoke serious objections.
-		Besides, how does the computer know what pizza parlor
-		the human being went to???
-
-
-ACKNOWLEDGEMENTS
-
-My thanks to the people who helped make this human-readable, including
-Jon Walpole, Josh Triplett, Serge Hallyn, Suzanne Wood, and Alan Stern.
-
-
-For more information, see http://www.rdrop.com/users/paulmck/RCU.
diff --git a/Documentation/admin-guide/LSM/SafeSetID.rst b/Documentation/admin-guide/LSM/SafeSetID.rst
index 212434ef65ad..7bff07ce4fdd 100644
--- a/Documentation/admin-guide/LSM/SafeSetID.rst
+++ b/Documentation/admin-guide/LSM/SafeSetID.rst
@@ -56,7 +56,7 @@ setid capabilities from the application completely and refactor the process
 spawning semantics in the application (e.g. by using a privileged helper program
 to do process spawning and UID/GID transitions). Unfortunately, there are a
 number of semantics around process spawning that would be affected by this, such
-as fork() calls where the program doesn???t immediately call exec() after the
+as fork() calls where the program doesn't immediately call exec() after the
 fork(), parent processes specifying custom environment variables or command line
 args for spawned child processes, or inheritance of file handles across a
 fork()/exec(). Because of this, as solution that uses a privileged helper in
@@ -72,7 +72,7 @@ own user namespace, and only approved UIDs/GIDs could be mapped back to the
 initial system user namespace, affectively preventing privilege escalation.
 Unfortunately, it is not generally feasible to use user namespaces in isolation,
 without pairing them with other namespace types, which is not always an option.
-Linux checks for capabilities based off of the user namespace that ???owns??? some
+Linux checks for capabilities based off of the user namespace that "owns" some
 entity. For example, Linux has the notion that network namespaces are owned by
 the user namespace in which they were created. A consequence of this is that
 capability checks for access to a given network namespace are done by checking
diff --git a/Documentation/admin-guide/acpi/fan_performance_states.rst b/Documentation/admin-guide/acpi/fan_performance_states.rst
new file mode 100644
index 000000000000..21d233ca50d8
--- /dev/null
+++ b/Documentation/admin-guide/acpi/fan_performance_states.rst
@@ -0,0 +1,62 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+ACPI Fan Performance States
+===========================
+
+When the optional _FPS object is present under an ACPI device representing a
+fan (for example, PNP0C0B or INT3404), the ACPI fan driver creates additional
+"state*" attributes in the sysfs directory of the ACPI device in question.
+These attributes list properties of fan performance states.
+
+For more information on _FPS refer to the ACPI specification at:
+
+http://uefi.org/specifications
+
+For instance, the contents of the INT3404 ACPI device sysfs directory
+may look as follows::
+
+ $ ls -l /sys/bus/acpi/devices/INT3404:00/
+ total 0
+...
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state0
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state1
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state10
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state11
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state2
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state3
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state4
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state5
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state6
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state7
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state8
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state9
+ -r--r--r-- 1 root root 4096 Dec 13 01:00 status
+ ...
+
+where each of the "state*" files represents one performance state of the fan
+and contains a colon-separated list of 5 integer numbers (fields) with the
+following interpretation::
+
+control_percent:trip_point_index:speed_rpm:noise_level_mdb:power_mw
+
+* ``control_percent``: The percent value to be used to set the fan speed to a
+  specific level using the _FSL object (0-100).
+
+* ``trip_point_index``: The active cooling trip point number that corresponds
+  to this performance state (0-9).
+
+* ``speed_rpm``: Speed of the fan in rotations per minute.
+
+* ``noise_level_mdb``: Audible noise emitted by the fan in this state in
+  millidecibels.
+
+* ``power_mw``: Power draw of the fan in this state in milliwatts.
+
+For example::
+
+ $cat /sys/bus/acpi/devices/INT3404:00/state1
+ 25:0:3200:12500:1250
+
+When a given field is not populated or its value provided by the platform
+firmware is invalid, the "not-defined" string is shown instead of the value.
diff --git a/Documentation/admin-guide/acpi/index.rst b/Documentation/admin-guide/acpi/index.rst
index 4d13eeea1eca..71277689ad97 100644
--- a/Documentation/admin-guide/acpi/index.rst
+++ b/Documentation/admin-guide/acpi/index.rst
@@ -12,3 +12,4 @@ the Linux ACPI support.
    dsdt-override
    ssdt-overlays
    cppc_sysfs
+   fan_performance_states
diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 6eccf13219ff..27c77d853028 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -1,15 +1,15 @@
 ========================================
-zram: Compressed RAM based block devices
+zram: Compressed RAM-based block devices
 ========================================
 
 Introduction
 ============
 
-The zram module creates RAM based block devices named /dev/zram<id>
+The zram module creates RAM-based block devices named /dev/zram<id>
 (<id> = 0, 1, ...). Pages written to these disks are compressed and stored
 in memory itself. These disks allow very fast I/O and compression provides
-good amounts of memory savings. Some of the usecases include /tmp storage,
-use as swap disks, various caches under /var and maybe many more :)
+good amounts of memory savings. Some of the use cases include /tmp storage,
+use as swap disks, various caches under /var and maybe many more. :)
 
 Statistics for individual zram devices are exported through sysfs nodes at
 /sys/block/zram<id>/
@@ -43,17 +43,17 @@ The list of possible return codes:
 
 ========  =============================================================
 -EBUSY	  an attempt to modify an attribute that cannot be changed once
-	  the device has been initialised. Please reset device first;
+	  the device has been initialised. Please reset device first.
 -ENOMEM	  zram was not able to allocate enough memory to fulfil your
-	  needs;
+	  needs.
 -EINVAL	  invalid input has been provided.
 ========  =============================================================
 
-If you use 'echo', the returned value that is changed by 'echo' utility,
+If you use 'echo', the returned value is set by the 'echo' utility,
 and, in general case, something like::
 
 	echo 3 > /sys/block/zram0/max_comp_streams
-	if [ $? -ne 0 ];
+	if [ $? -ne 0 ]; then
 		handle_error
 	fi
 
@@ -65,7 +65,8 @@ should suffice.
 ::
 
 	modprobe zram num_devices=4
-	This creates 4 devices: /dev/zram{0,1,2,3}
+
+This creates 4 devices: /dev/zram{0,1,2,3}
 
 num_devices parameter is optional and tells zram how many devices should be
 pre-created. Default: 1.
@@ -73,12 +74,12 @@ pre-created. Default: 1.
 2) Set max number of compression streams
 ========================================
 
-Regardless the value passed to this attribute, ZRAM will always
-allocate multiple compression streams - one per online CPUs - thus
+Regardless of the value passed to this attribute, ZRAM will always
+allocate multiple compression streams - one per online CPU - thus
 allowing several concurrent compression operations. The number of
 allocated compression streams goes down when some of the CPUs
 become offline. There is no single-compression-stream mode anymore,
-unless you are running a UP system or has only 1 CPU online.
+unless you are running a UP system or have only 1 CPU online.
 
 To find out how many streams are currently available::
 
@@ -89,7 +90,7 @@ To find out how many streams are currently available::
 
 Using comp_algorithm device attribute one can see available and
 currently selected (shown in square brackets) compression algorithms,
-change selected compression algorithm (once the device is initialised
+or change the selected compression algorithm (once the device is initialised
 there is no way to change compression algorithm).
 
 Examples::
@@ -167,9 +168,9 @@ Examples::
 zram provides a control interface, which enables dynamic (on-demand) device
 addition and removal.
 
-In order to add a new /dev/zramX device, perform read operation on hot_add
-attribute. This will return either new device's device id (meaning that you
-can use /dev/zram<id>) or error code.
+In order to add a new /dev/zramX device, perform a read operation on the hot_add
+attribute. This will return either the new device's device id (meaning that you
+can use /dev/zram<id>) or an error code.
 
 Example::
 
@@ -186,8 +187,8 @@ execute::
 
 Per-device statistics are exported as various nodes under /sys/block/zram<id>/
 
-A brief description of exported device attributes. For more details please
-read Documentation/ABI/testing/sysfs-block-zram.
+A brief description of exported device attributes follows. For more details
+please read Documentation/ABI/testing/sysfs-block-zram.
 
 ======================  ======  ===============================================
 Name            	access            description
@@ -245,7 +246,7 @@ whitespace:
 
 File /sys/block/zram<id>/mm_stat
 
-The stat file represents device's mm statistics. It consists of a single
+The mm_stat file represents the device's mm statistics. It consists of a single
 line of text and contains the following stats separated by whitespace:
 
  ================ =============================================================
@@ -261,7 +262,7 @@ line of text and contains the following stats separated by whitespace:
                   Unit: bytes
  mem_limit        the maximum amount of memory ZRAM can use to store
                   the compressed data
- mem_used_max     the maximum amount of memory zram have consumed to
+ mem_used_max     the maximum amount of memory zram has consumed to
                   store the data
  same_pages       the number of same element filled pages written to this disk.
                   No memory is allocated for such pages.
@@ -271,7 +272,7 @@ line of text and contains the following stats separated by whitespace:
 
 File /sys/block/zram<id>/bd_stat
 
-The stat file represents device's backing device statistics. It consists of
+The bd_stat file represents a device's backing device statistics. It consists of
 a single line of text and contains the following stats separated by whitespace:
 
  ============== =============================================================
@@ -316,9 +317,9 @@ To use the feature, admin should set up backing device via::
 	echo /dev/sda5 > /sys/block/zramX/backing_dev
 
 before disksize setting. It supports only partition at this moment.
-If admin want to use incompressible page writeback, they could do via::
+If admin wants to use incompressible page writeback, they could do via::
 
-	echo huge > /sys/block/zramX/write
+	echo huge > /sys/block/zramX/writeback
 
 To use idle page writeback, first, user need to declare zram pages
 as idle::
@@ -326,7 +327,7 @@ as idle::
 	echo all > /sys/block/zramX/idle
 
 From now on, any pages on zram are idle pages. The idle mark
-will be removed until someone request access of the block.
+will be removed until someone requests access of the block.
 IOW, unless there is access request, those pages are still idle pages.
 
 Admin can request writeback of those idle pages at right timing via::
@@ -341,16 +342,16 @@ to guarantee storage health for entire product life.
 
 To overcome the concern, zram supports "writeback_limit" feature.
 The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
-any writeback. IOW, if admin want to apply writeback budget, he should
+any writeback. IOW, if admin wants to apply writeback budget, he should
 enable writeback_limit_enable via::
 
 	$ echo 1 > /sys/block/zramX/writeback_limit_enable
 
 Once writeback_limit_enable is set, zram doesn't allow any writeback
-until admin set the budget via /sys/block/zramX/writeback_limit.
+until admin sets the budget via /sys/block/zramX/writeback_limit.
 
 (If admin doesn't enable writeback_limit_enable, writeback_limit's value
-assigned via /sys/block/zramX/writeback_limit is meaninless.)
+assigned via /sys/block/zramX/writeback_limit is meaningless.)
 
 If admin want to limit writeback as per-day 400M, he could do it
 like below::
@@ -361,13 +362,13 @@ like below::
 		/sys/block/zram0/writeback_limit.
 	$ echo 1 > /sys/block/zram0/writeback_limit_enable
 
-If admin want to allow further write again once the bugdet is exausted,
+If admins want to allow further write again once the bugdet is exhausted,
 he could do it like below::
 
 	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
 		/sys/block/zram0/writeback_limit
 
-If admin want to see remaining writeback budget since he set::
+If admin wants to see remaining writeback budget since last set::
 
 	$ cat /sys/block/zramX/writeback_limit
 
@@ -375,12 +376,12 @@ If admin want to disable writeback limit, he could do::
 
 	$ echo 0 > /sys/block/zramX/writeback_limit_enable
 
-The writeback_limit count will reset whenever you reset zram(e.g.,
+The writeback_limit count will reset whenever you reset zram (e.g.,
 system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
 writeback happened until you reset the zram to allocate extra writeback
 budget in next setting is user's job.
 
-If admin want to measure writeback count in a certain period, he could
+If admin wants to measure writeback count in a certain period, he could
 know it via /sys/block/zram0/bd_stat's 3rd column.
 
 memory tracking
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 5361ebec3361..3f801461f0f3 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -61,6 +61,8 @@ v1 is available under Documentation/admin-guide/cgroup-v1/.
      5-6. Device
      5-7. RDMA
        5-7-1. RDMA Interface Files
+     5-8. HugeTLB
+       5.8-1. HugeTLB Interface Files
      5-8. Misc
        5-8-1. perf_event
      5-N. Non-normative information
@@ -1120,8 +1122,9 @@ PAGE_SIZE multiple when read back.
 
 	Best-effort memory protection.  If the memory usage of a
 	cgroup is within its effective low boundary, the cgroup's
-	memory won't be reclaimed unless memory can be reclaimed
-	from unprotected cgroups.  Above the effective low boundary (or
+	memory won't be reclaimed unless there is no reclaimable
+	memory available in unprotected cgroups.
+	Above the effective low	boundary (or 
 	effective min boundary if it is higher), pages are reclaimed
 	proportionally to the overage, reducing reclaim pressure for
 	smaller overages.
@@ -1288,7 +1291,12 @@ PAGE_SIZE multiple when read back.
 	  inactive_anon, active_anon, inactive_file, active_file, unevictable
 		Amount of memory, swap-backed and filesystem-backed,
 		on the internal memory management lists used by the
-		page reclaim algorithm
+		page reclaim algorithm.
+
+		As these represent internal list state (eg. shmem pages are on anon
+		memory management lists), inactive_foo + active_foo may not be equal to
+		the value for the foo counter, since the foo counter is type-based, not
+		list-based.
 
 	  slab_reclaimable
 		Part of "slab" that might be reclaimed, such as
@@ -1334,7 +1342,7 @@ PAGE_SIZE multiple when read back.
 
 	  pgdeactivate
 
-		Amount of pages moved to the inactive LRU lis
+		Amount of pages moved to the inactive LRU list
 
 	  pglazyfree
 
@@ -1920,7 +1928,7 @@ Cpuset Interface Files
 
         It accepts only the following input values when written to.
 
-        "root"   - a paritition root
+        "root"   - a partition root
         "member" - a non-root member of a partition
 
 	When set to be a partition root, the current cgroup is the
@@ -2050,6 +2058,33 @@ RDMA Interface Files
 	  mlx4_0 hca_handle=1 hca_object=20
 	  ocrdma1 hca_handle=1 hca_object=23
 
+HugeTLB
+-------
+
+The HugeTLB controller allows to limit the HugeTLB usage per control group and
+enforces the controller limit during page fault.
+
+HugeTLB Interface Files
+~~~~~~~~~~~~~~~~~~~~~~~
+
+  hugetlb.<hugepagesize>.current
+	Show current usage for "hugepagesize" hugetlb.  It exists for all
+	the cgroup except root.
+
+  hugetlb.<hugepagesize>.max
+	Set/show the hard limit of "hugepagesize" hugetlb usage.
+	The default value is "max".  It exists for all the cgroup except root.
+
+  hugetlb.<hugepagesize>.events
+	A read-only flat-keyed file which exists on non-root cgroups.
+
+	  max
+		The number of allocation failure due to HugeTLB limit
+
+  hugetlb.<hugepagesize>.events.local
+	Similar to hugetlb.<hugepagesize>.events but the fields in the file
+	are local to the cgroup i.e. not hierarchical. The file modified event
+	generated on this file reflects only the local events.
 
 Misc
 ----
diff --git a/Documentation/driver-api/dell_rbu.rst b/Documentation/admin-guide/dell_rbu.rst
index 5d1ce7bcd04d..8d70e1fc9f9d 100644
--- a/Documentation/driver-api/dell_rbu.rst
+++ b/Documentation/admin-guide/dell_rbu.rst
@@ -1,11 +1,11 @@
-=============================================================
-Usage of the new open sourced rbu (Remote BIOS Update) driver
-=============================================================
+=========================================
+Dell Remote BIOS Update driver (dell_rbu)
+=========================================
 
 Purpose
 =======
 
-Document demonstrating the use of the Dell Remote BIOS Update driver.
+Document demonstrating the use of the Dell Remote BIOS Update driver
 for updating BIOS images on Dell servers and desktops.
 
 Scope
@@ -37,7 +37,7 @@ maintains a link list of packets for reading them back.
 
 If the dell_rbu driver is unloaded all the allocated memory is freed.
 
-The rbu driver needs to have an application (as mentioned above)which will
+The rbu driver needs to have an application (as mentioned above) which will
 inform the BIOS to enable the update in the next system reboot.
 
 The user should not unload the rbu driver after downloading the BIOS image
@@ -71,7 +71,7 @@ be downloaded. It is done as below::
 	echo XXXX > /sys/devices/platform/dell_rbu/packet_size
 
 In the packet update mechanism, the user needs to create a new file having
-packets of data arranged back to back. It can be done as follows
+packets of data arranged back to back. It can be done as follows:
 The user creates packets header, gets the chunk of the BIOS image and
 places it next to the packetheader; now, the packetheader + BIOS image chunk
 added together should match the specified packet_size. This makes one
@@ -114,7 +114,7 @@ The entries can be recreated by doing the following::
 
 	echo init > /sys/devices/platform/dell_rbu/image_type
 
-.. note:: echoing init in image_type does not change it original value.
+.. note:: echoing init in image_type does not change its original value.
 
 Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to
 read back the image downloaded.
diff --git a/Documentation/admin-guide/device-mapper/dm-dust.rst b/Documentation/admin-guide/device-mapper/dm-dust.rst
new file mode 100644
index 000000000000..b6e7e7ead831
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-dust.rst
@@ -0,0 +1,287 @@
+dm-dust
+=======
+
+This target emulates the behavior of bad sectors at arbitrary
+locations, and the ability to enable the emulation of the failures
+at an arbitrary time.
+
+This target behaves similarly to a linear target.  At a given time,
+the user can send a message to the target to start failing read
+requests on specific blocks (to emulate the behavior of a hard disk
+drive with bad sectors).
+
+When the failure behavior is enabled (i.e.: when the output of
+"dmsetup status" displays "fail_read_on_bad_block"), reads of blocks
+in the "bad block list" will fail with EIO ("Input/output error").
+
+Writes of blocks in the "bad block list will result in the following:
+
+1. Remove the block from the "bad block list".
+2. Successfully complete the write.
+
+This emulates the "remapped sector" behavior of a drive with bad
+sectors.
+
+Normally, a drive that is encountering bad sectors will most likely
+encounter more bad sectors, at an unknown time or location.
+With dm-dust, the user can use the "addbadblock" and "removebadblock"
+messages to add arbitrary bad blocks at new locations, and the
+"enable" and "disable" messages to modulate the state of whether the
+configured "bad blocks" will be treated as bad, or bypassed.
+This allows the pre-writing of test data and metadata prior to
+simulating a "failure" event where bad sectors start to appear.
+
+Table parameters
+----------------
+<device_path> <offset> <blksz>
+
+Mandatory parameters:
+    <device_path>:
+        Path to the block device.
+
+    <offset>:
+        Offset to data area from start of device_path
+
+    <blksz>:
+        Block size in bytes
+
+	     (minimum 512, maximum 1073741824, must be a power of 2)
+
+Usage instructions
+------------------
+
+First, find the size (in 512-byte sectors) of the device to be used::
+
+        $ sudo blockdev --getsz /dev/vdb1
+        33552384
+
+Create the dm-dust device:
+(For a device with a block size of 512 bytes)
+
+::
+
+        $ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 512'
+
+(For a device with a block size of 4096 bytes)
+
+::
+
+        $ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 4096'
+
+Check the status of the read behavior ("bypass" indicates that all I/O
+will be passed through to the underlying device)::
+
+        $ sudo dmsetup status dust1
+        0 33552384 dust 252:17 bypass
+
+        $ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=128 iflag=direct
+        128+0 records in
+        128+0 records out
+
+        $ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
+        128+0 records in
+        128+0 records out
+
+Adding and removing bad blocks
+------------------------------
+
+At any time (i.e.: whether the device has the "bad block" emulation
+enabled or disabled), bad blocks may be added or removed from the
+device via the "addbadblock" and "removebadblock" messages::
+
+        $ sudo dmsetup message dust1 0 addbadblock 60
+        kernel: device-mapper: dust: badblock added at block 60
+
+        $ sudo dmsetup message dust1 0 addbadblock 67
+        kernel: device-mapper: dust: badblock added at block 67
+
+        $ sudo dmsetup message dust1 0 addbadblock 72
+        kernel: device-mapper: dust: badblock added at block 72
+
+These bad blocks will be stored in the "bad block list".
+While the device is in "bypass" mode, reads and writes will succeed::
+
+        $ sudo dmsetup status dust1
+        0 33552384 dust 252:17 bypass
+
+Enabling block read failures
+----------------------------
+
+To enable the "fail read on bad block" behavior, send the "enable" message::
+
+        $ sudo dmsetup message dust1 0 enable
+        kernel: device-mapper: dust: enabling read failures on bad sectors
+
+        $ sudo dmsetup status dust1
+        0 33552384 dust 252:17 fail_read_on_bad_block
+
+With the device in "fail read on bad block" mode, attempting to read a
+block will encounter an "Input/output error"::
+
+        $ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=1 skip=67 iflag=direct
+        dd: error reading '/dev/mapper/dust1': Input/output error
+        0+0 records in
+        0+0 records out
+        0 bytes copied, 0.00040651 s, 0.0 kB/s
+
+...and writing to the bad blocks will remove the blocks from the list,
+therefore emulating the "remap" behavior of hard disk drives::
+
+        $ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
+        128+0 records in
+        128+0 records out
+
+        kernel: device-mapper: dust: block 60 removed from badblocklist by write
+        kernel: device-mapper: dust: block 67 removed from badblocklist by write
+        kernel: device-mapper: dust: block 72 removed from badblocklist by write
+        kernel: device-mapper: dust: block 87 removed from badblocklist by write
+
+Bad block add/remove error handling
+-----------------------------------
+
+Attempting to add a bad block that already exists in the list will
+result in an "Invalid argument" error, as well as a helpful message::
+
+        $ sudo dmsetup message dust1 0 addbadblock 88
+        device-mapper: message ioctl on dust1  failed: Invalid argument
+        kernel: device-mapper: dust: block 88 already in badblocklist
+
+Attempting to remove a bad block that doesn't exist in the list will
+result in an "Invalid argument" error, as well as a helpful message::
+
+        $ sudo dmsetup message dust1 0 removebadblock 87
+        device-mapper: message ioctl on dust1  failed: Invalid argument
+        kernel: device-mapper: dust: block 87 not found in badblocklist
+
+Counting the number of bad blocks in the bad block list
+-------------------------------------------------------
+
+To count the number of bad blocks configured in the device, run the
+following message command::
+
+        $ sudo dmsetup message dust1 0 countbadblocks
+
+A message will print with the number of bad blocks currently
+configured on the device::
+
+        kernel: device-mapper: dust: countbadblocks: 895 badblock(s) found
+
+Querying for specific bad blocks
+--------------------------------
+
+To find out if a specific block is in the bad block list, run the
+following message command::
+
+        $ sudo dmsetup message dust1 0 queryblock 72
+
+The following message will print if the block is in the list::
+
+        device-mapper: dust: queryblock: block 72 found in badblocklist
+
+The following message will print if the block is not in the list::
+
+        device-mapper: dust: queryblock: block 72 not found in badblocklist
+
+The "queryblock" message command will work in both the "enabled"
+and "disabled" modes, allowing the verification of whether a block
+will be treated as "bad" without having to issue I/O to the device,
+or having to "enable" the bad block emulation.
+
+Clearing the bad block list
+---------------------------
+
+To clear the bad block list (without needing to individually run
+a "removebadblock" message command for every block), run the
+following message command::
+
+        $ sudo dmsetup message dust1 0 clearbadblocks
+
+After clearing the bad block list, the following message will appear::
+
+        kernel: device-mapper: dust: clearbadblocks: badblocks cleared
+
+If there were no bad blocks to clear, the following message will
+appear::
+
+        kernel: device-mapper: dust: clearbadblocks: no badblocks found
+
+Message commands list
+---------------------
+
+Below is a list of the messages that can be sent to a dust device:
+
+Operations on blocks (requires a <blknum> argument)::
+
+        addbadblock <blknum>
+        queryblock <blknum>
+        removebadblock <blknum>
+
+...where <blknum> is a block number within range of the device
+(corresponding to the block size of the device.)
+
+Single argument message commands::
+
+        countbadblocks
+        clearbadblocks
+        disable
+        enable
+        quiet
+
+Device removal
+--------------
+
+When finished, remove the device via the "dmsetup remove" command::
+
+        $ sudo dmsetup remove dust1
+
+Quiet mode
+----------
+
+On test runs with many bad blocks, it may be desirable to avoid
+excessive logging (from bad blocks added, removed, or "remapped").
+This can be done by enabling "quiet mode" via the following message::
+
+        $ sudo dmsetup message dust1 0 quiet
+
+This will suppress log messages from add / remove / removed by write
+operations.  Log messages from "countbadblocks" or "queryblock"
+message commands will still print in quiet mode.
+
+The status of quiet mode can be seen by running "dmsetup status"::
+
+        $ sudo dmsetup status dust1
+        0 33552384 dust 252:17 fail_read_on_bad_block quiet
+
+To disable quiet mode, send the "quiet" message again::
+
+        $ sudo dmsetup message dust1 0 quiet
+
+        $ sudo dmsetup status dust1
+        0 33552384 dust 252:17 fail_read_on_bad_block verbose
+
+(The presence of "verbose" indicates normal logging.)
+
+"Why not...?"
+-------------
+
+scsi_debug has a "medium error" mode that can fail reads on one
+specified sector (sector 0x1234, hardcoded in the source code), but
+it uses RAM for the persistent storage, which drastically decreases
+the potential device size.
+
+dm-flakey fails all I/O from all block locations at a specified time
+frequency, and not a given point in time.
+
+When a bad sector occurs on a hard disk drive, reads to that sector
+are failed by the device, usually resulting in an error code of EIO
+("I/O error") or ENODATA ("No data available").  However, a write to
+the sector may succeed, and result in the sector becoming readable
+after the device controller no longer experiences errors reading the
+sector (or after a reallocation of the sector).  However, there may
+be bad sectors that occur on the device in the future, in a different,
+unpredictable location.
+
+This target seeks to provide a device that can exhibit the behavior
+of a bad sector at a known sector location, at a known time, based
+on a large storage device (at least tens of gigabytes, not occupying
+system memory).
diff --git a/Documentation/admin-guide/device-mapper/dm-dust.txt b/Documentation/admin-guide/device-mapper/dm-dust.txt
deleted file mode 100644
index 954d402a1f6a..000000000000
--- a/Documentation/admin-guide/device-mapper/dm-dust.txt
+++ /dev/null
@@ -1,272 +0,0 @@
-dm-dust
-=======
-
-This target emulates the behavior of bad sectors at arbitrary
-locations, and the ability to enable the emulation of the failures
-at an arbitrary time.
-
-This target behaves similarly to a linear target.  At a given time,
-the user can send a message to the target to start failing read
-requests on specific blocks (to emulate the behavior of a hard disk
-drive with bad sectors).
-
-When the failure behavior is enabled (i.e.: when the output of
-"dmsetup status" displays "fail_read_on_bad_block"), reads of blocks
-in the "bad block list" will fail with EIO ("Input/output error").
-
-Writes of blocks in the "bad block list will result in the following:
-
-1. Remove the block from the "bad block list".
-2. Successfully complete the write.
-
-This emulates the "remapped sector" behavior of a drive with bad
-sectors.
-
-Normally, a drive that is encountering bad sectors will most likely
-encounter more bad sectors, at an unknown time or location.
-With dm-dust, the user can use the "addbadblock" and "removebadblock"
-messages to add arbitrary bad blocks at new locations, and the
-"enable" and "disable" messages to modulate the state of whether the
-configured "bad blocks" will be treated as bad, or bypassed.
-This allows the pre-writing of test data and metadata prior to
-simulating a "failure" event where bad sectors start to appear.
-
-Table parameters:
------------------
-<device_path> <offset> <blksz>
-
-Mandatory parameters:
-    <device_path>: path to the block device.
-    <offset>: offset to data area from start of device_path
-    <blksz>: block size in bytes
-	     (minimum 512, maximum 1073741824, must be a power of 2)
-
-Usage instructions:
--------------------
-
-First, find the size (in 512-byte sectors) of the device to be used:
-
-$ sudo blockdev --getsz /dev/vdb1
-33552384
-
-Create the dm-dust device:
-(For a device with a block size of 512 bytes)
-$ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 512'
-
-(For a device with a block size of 4096 bytes)
-$ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 4096'
-
-Check the status of the read behavior ("bypass" indicates that all I/O
-will be passed through to the underlying device):
-$ sudo dmsetup status dust1
-0 33552384 dust 252:17 bypass
-
-$ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=128 iflag=direct
-128+0 records in
-128+0 records out
-
-$ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
-128+0 records in
-128+0 records out
-
-Adding and removing bad blocks:
--------------------------------
-
-At any time (i.e.: whether the device has the "bad block" emulation
-enabled or disabled), bad blocks may be added or removed from the
-device via the "addbadblock" and "removebadblock" messages:
-
-$ sudo dmsetup message dust1 0 addbadblock 60
-kernel: device-mapper: dust: badblock added at block 60
-
-$ sudo dmsetup message dust1 0 addbadblock 67
-kernel: device-mapper: dust: badblock added at block 67
-
-$ sudo dmsetup message dust1 0 addbadblock 72
-kernel: device-mapper: dust: badblock added at block 72
-
-These bad blocks will be stored in the "bad block list".
-While the device is in "bypass" mode, reads and writes will succeed:
-
-$ sudo dmsetup status dust1
-0 33552384 dust 252:17 bypass
-
-Enabling block read failures:
------------------------------
-
-To enable the "fail read on bad block" behavior, send the "enable" message:
-
-$ sudo dmsetup message dust1 0 enable
-kernel: device-mapper: dust: enabling read failures on bad sectors
-
-$ sudo dmsetup status dust1
-0 33552384 dust 252:17 fail_read_on_bad_block
-
-With the device in "fail read on bad block" mode, attempting to read a
-block will encounter an "Input/output error":
-
-$ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=1 skip=67 iflag=direct
-dd: error reading '/dev/mapper/dust1': Input/output error
-0+0 records in
-0+0 records out
-0 bytes copied, 0.00040651 s, 0.0 kB/s
-
-...and writing to the bad blocks will remove the blocks from the list,
-therefore emulating the "remap" behavior of hard disk drives:
-
-$ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
-128+0 records in
-128+0 records out
-
-kernel: device-mapper: dust: block 60 removed from badblocklist by write
-kernel: device-mapper: dust: block 67 removed from badblocklist by write
-kernel: device-mapper: dust: block 72 removed from badblocklist by write
-kernel: device-mapper: dust: block 87 removed from badblocklist by write
-
-Bad block add/remove error handling:
-------------------------------------
-
-Attempting to add a bad block that already exists in the list will
-result in an "Invalid argument" error, as well as a helpful message:
-
-$ sudo dmsetup message dust1 0 addbadblock 88
-device-mapper: message ioctl on dust1  failed: Invalid argument
-kernel: device-mapper: dust: block 88 already in badblocklist
-
-Attempting to remove a bad block that doesn't exist in the list will
-result in an "Invalid argument" error, as well as a helpful message:
-
-$ sudo dmsetup message dust1 0 removebadblock 87
-device-mapper: message ioctl on dust1  failed: Invalid argument
-kernel: device-mapper: dust: block 87 not found in badblocklist
-
-Counting the number of bad blocks in the bad block list:
---------------------------------------------------------
-
-To count the number of bad blocks configured in the device, run the
-following message command:
-
-$ sudo dmsetup message dust1 0 countbadblocks
-
-A message will print with the number of bad blocks currently
-configured on the device:
-
-kernel: device-mapper: dust: countbadblocks: 895 badblock(s) found
-
-Querying for specific bad blocks:
----------------------------------
-
-To find out if a specific block is in the bad block list, run the
-following message command:
-
-$ sudo dmsetup message dust1 0 queryblock 72
-
-The following message will print if the block is in the list:
-device-mapper: dust: queryblock: block 72 found in badblocklist
-
-The following message will print if the block is in the list:
-device-mapper: dust: queryblock: block 72 not found in badblocklist
-
-The "queryblock" message command will work in both the "enabled"
-and "disabled" modes, allowing the verification of whether a block
-will be treated as "bad" without having to issue I/O to the device,
-or having to "enable" the bad block emulation.
-
-Clearing the bad block list:
-----------------------------
-
-To clear the bad block list (without needing to individually run
-a "removebadblock" message command for every block), run the
-following message command:
-
-$ sudo dmsetup message dust1 0 clearbadblocks
-
-After clearing the bad block list, the following message will appear:
-
-kernel: device-mapper: dust: clearbadblocks: badblocks cleared
-
-If there were no bad blocks to clear, the following message will
-appear:
-
-kernel: device-mapper: dust: clearbadblocks: no badblocks found
-
-Message commands list:
-----------------------
-
-Below is a list of the messages that can be sent to a dust device:
-
-Operations on blocks (requires a <blknum> argument):
-
-addbadblock <blknum>
-queryblock <blknum>
-removebadblock <blknum>
-
-...where <blknum> is a block number within range of the device
-  (corresponding to the block size of the device.)
-
-Single argument message commands:
-
-countbadblocks
-clearbadblocks
-disable
-enable
-quiet
-
-Device removal:
----------------
-
-When finished, remove the device via the "dmsetup remove" command:
-
-$ sudo dmsetup remove dust1
-
-Quiet mode:
------------
-
-On test runs with many bad blocks, it may be desirable to avoid
-excessive logging (from bad blocks added, removed, or "remapped").
-This can be done by enabling "quiet mode" via the following message:
-
-$ sudo dmsetup message dust1 0 quiet
-
-This will suppress log messages from add / remove / removed by write
-operations.  Log messages from "countbadblocks" or "queryblock"
-message commands will still print in quiet mode.
-
-The status of quiet mode can be seen by running "dmsetup status":
-
-$ sudo dmsetup status dust1
-0 33552384 dust 252:17 fail_read_on_bad_block quiet
-
-To disable quiet mode, send the "quiet" message again:
-
-$ sudo dmsetup message dust1 0 quiet
-
-$ sudo dmsetup status dust1
-0 33552384 dust 252:17 fail_read_on_bad_block verbose
-
-(The presence of "verbose" indicates normal logging.)
-
-"Why not...?"
--------------
-
-scsi_debug has a "medium error" mode that can fail reads on one
-specified sector (sector 0x1234, hardcoded in the source code), but
-it uses RAM for the persistent storage, which drastically decreases
-the potential device size.
-
-dm-flakey fails all I/O from all block locations at a specified time
-frequency, and not a given point in time.
-
-When a bad sector occurs on a hard disk drive, reads to that sector
-are failed by the device, usually resulting in an error code of EIO
-("I/O error") or ENODATA ("No data available").  However, a write to
-the sector may succeed, and result in the sector becoming readable
-after the device controller no longer experiences errors reading the
-sector (or after a reallocation of the sector).  However, there may
-be bad sectors that occur on the device in the future, in a different,
-unpredictable location.
-
-This target seeks to provide a device that can exhibit the behavior
-of a bad sector at a known sector location, at a known time, based
-on a large storage device (at least tens of gigabytes, not occupying
-system memory).
diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst
index a30aa91b5fbe..c00f9f11e3f3 100644
--- a/Documentation/admin-guide/device-mapper/dm-integrity.rst
+++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst
@@ -144,7 +144,7 @@ journal_crypt:algorithm(:key)	(the key is optional)
 	Encrypt the journal using given algorithm to make sure that the
 	attacker can't read the journal. You can use a block cipher here
 	(such as "cbc(aes)") or a stream cipher (for example "chacha20",
-	"salsa20", "ctr(aes)" or "ecb(arc4)").
+	"salsa20" or "ctr(aes)").
 
 	The journal contains history of last writes to the block device,
 	an attacker reading the journal could see the last sector nubmers
@@ -177,6 +177,11 @@ bitmap_flush_interval:number
 	The bitmap flush interval in milliseconds. The metadata buffers
 	are synchronized when this interval expires.
 
+fix_padding
+	Use a smaller padding of the tag area that is more
+	space-efficient. If this option is not present, large padding is
+	used - that is for compatibility with older kernels.
+
 
 The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
 be changed when reloading the target (load an inactive table and swap the
diff --git a/Documentation/admin-guide/device-mapper/dm-raid.rst b/Documentation/admin-guide/device-mapper/dm-raid.rst
index 2fe255b130fb..695a2ea1d1ae 100644
--- a/Documentation/admin-guide/device-mapper/dm-raid.rst
+++ b/Documentation/admin-guide/device-mapper/dm-raid.rst
@@ -417,3 +417,7 @@ Version History
 	deadlock/potential data corruption.  Update superblock when
 	specific devices are requested via rebuild.  Fix RAID leg
 	rebuild errors.
+ 1.15.0 Fix size extensions not being synchronized in case of new MD bitmap
+        pages allocated;  also fix those not occuring after previous reductions
+ 1.15.1 Fix argument count and arguments for rebuild/write_mostly/journal_(dev|mode)
+        on the status line.
diff --git a/Documentation/admin-guide/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst
index c77c58b8f67b..ec62fcc8eece 100644
--- a/Documentation/admin-guide/device-mapper/index.rst
+++ b/Documentation/admin-guide/device-mapper/index.rst
@@ -8,7 +8,9 @@ Device Mapper
     cache-policies
     cache
     delay
+    dm-clone
     dm-crypt
+    dm-dust
     dm-flakey
     dm-init
     dm-integrity
diff --git a/Documentation/admin-guide/devices.txt b/Documentation/admin-guide/devices.txt
index 1c5d2281efc9..2a97aaec8b12 100644
--- a/Documentation/admin-guide/devices.txt
+++ b/Documentation/admin-guide/devices.txt
@@ -319,7 +319,7 @@
 		182 = /dev/perfctr	Performance-monitoring counters
 		183 = /dev/hwrng	Generic random number generator
 		184 = /dev/cpu/microcode CPU microcode update interface
-		186 = /dev/atomicps	Atomic shapshot of process state data
+		186 = /dev/atomicps	Atomic snapshot of process state data
 		187 = /dev/irnet	IrNET device
 		188 = /dev/smbusbios	SMBus BIOS
 		189 = /dev/ussp_ctl	User space serial port control
diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst
index 059ddcbe769d..9443fcef1876 100644
--- a/Documentation/admin-guide/ext4.rst
+++ b/Documentation/admin-guide/ext4.rst
@@ -92,6 +92,8 @@ Currently Available
 * efficient new ordered mode in JBD2 and ext4 (avoid using buffer head to force
   the ordering)
 * Case-insensitive file name lookups
+* file-based encryption support (fscrypt)
+* file-based verity support (fsverity)
 
 [1] Filesystems with a block size of 1k may see a limit imposed by the
 directory hash tree having a maximum depth of two.
@@ -181,14 +183,17 @@ When mounting an ext4 filesystem, the following option are accepted:
         system after its metadata has been committed to the journal.
 
   commit=nrsec	(*)
-        Ext4 can be told to sync all its data and metadata every 'nrsec'
-        seconds. The default value is 5 seconds.  This means that if you lose
-        your power, you will lose as much as the latest 5 seconds of work (your
-        filesystem will not be damaged though, thanks to the journaling).  This
-        default value (or any low value) will hurt performance, but it's good
-        for data-safety.  Setting it to 0 will have the same effect as leaving
-        it at the default (5 seconds).  Setting it to very large values will
-        improve performance.
+        This setting limits the maximum age of the running transaction to
+        'nrsec' seconds.  The default value is 5 seconds.  This means that if
+        you lose your power, you will lose as much as the latest 5 seconds of
+        metadata changes (your filesystem will not be damaged though, thanks
+        to the journaling). This default value (or any low value) will hurt
+        performance, but it's good for data-safety.  Setting it to 0 will have
+        the same effect as leaving it at the default (5 seconds).  Setting it
+        to very large values will improve performance.  Note that due to
+        delayed allocation even older data can be lost on power failure since
+        writeback of those data begins only after time set in
+        /proc/sys/vm/dirty_expire_centisecs.
 
   barrier=<0|1(*)>, barrier(*), nobarrier
         This enables/disables the use of write barriers in the jbd code.
diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst
index e3a796c0d3a2..2d19c9f4c1fe 100644
--- a/Documentation/admin-guide/hw-vuln/mds.rst
+++ b/Documentation/admin-guide/hw-vuln/mds.rst
@@ -265,8 +265,11 @@ time with the option "mds=". The valid arguments for this option are:
 
   ============  =============================================================
 
-Not specifying this option is equivalent to "mds=full".
-
+Not specifying this option is equivalent to "mds=full". For processors
+that are affected by both TAA (TSX Asynchronous Abort) and MDS,
+specifying just "mds=off" without an accompanying "tsx_async_abort=off"
+will have no effect as the same mitigation is used for both
+vulnerabilities.
 
 Mitigation selection guide
 --------------------------
diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
index fddbd7579c53..af6865b822d2 100644
--- a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
@@ -174,7 +174,10 @@ the option "tsx_async_abort=". The valid arguments for this option are:
                 CPU is not vulnerable to cross-thread TAA attacks.
   ============  =============================================================
 
-Not specifying this option is equivalent to "tsx_async_abort=full".
+Not specifying this option is equivalent to "tsx_async_abort=full". For
+processors that are affected by both TAA and MDS, specifying just
+"tsx_async_abort=off" without an accompanying "mds=off" will have no
+effect as the same mitigation is used for both vulnerabilities.
 
 The kernel command line also allows to control the TSX feature using the
 parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 34cc20ee7f3a..4433f3929481 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -57,60 +57,62 @@ configure specific aspects of kernel behavior to your liking.
 .. toctree::
    :maxdepth: 1
 
-   initrd
-   cgroup-v2
-   cgroup-v1/index
-   serial-console
-   braille-console
-   parport
-   md
-   module-signing
-   rapidio
-   sysrq
-   unicode
-   vga-softcursor
-   binfmt-misc
-   mono
-   java
-   ras
-   bcache
-   blockdev/index
-   ext4
-   binderfs
-   cifs/index
-   xfs
-   jfs
-   ufs
-   pm/index
-   thunderbolt
-   LSM/index
-   mm/index
-   namespaces/index
-   perf-security
    acpi/index
    aoe/index
+   auxdisplay/index
+   bcache
+   binderfs
+   binfmt-misc
+   blockdev/index
+   braille-console
    btmrvl
+   cgroup-v1/index
+   cgroup-v2
+   cifs/index
    clearing-warn-once
    cpu-load
    cputopology
+   dell_rbu
    device-mapper/index
    efi-stub
+   ext4
+   nfs/index
    gpio/index
    highuid
    hw_random
+   initrd
    iostats
+   java
+   jfs
    kernel-per-CPU-kthreads
    laptops/index
-   auxdisplay/index
    lcd-panel-cgram
    ldm
    lockup-watchdogs
+   LSM/index
+   md
+   mm/index
+   module-signing
+   mono
+   namespaces/index
    numastat
+   parport
+   perf-security
+   pm/index
    pnp
+   rapidio
+   ras
    rtc
+   serial-console
    svga
-   wimax/index
+   sysrq
+   thunderbolt
+   ufs
+   unicode
+   vga-softcursor
    video-output
+   wimax/index
+   xfs
 
 .. only::  subproject and html
 
diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst
index 5d63b18bd6d1..df5b8345c41d 100644
--- a/Documentation/admin-guide/iostats.rst
+++ b/Documentation/admin-guide/iostats.rst
@@ -46,81 +46,91 @@ each snapshot of your disk statistics.
 In 2.4, the statistics fields are those after the device name. In
 the above example, the first field of statistics would be 446216.
 By contrast, in 2.6+ if you look at ``/sys/block/hda/stat``, you'll
-find just the eleven fields, beginning with 446216.  If you look at
-``/proc/diskstats``, the eleven fields will be preceded by the major and
+find just the 15 fields, beginning with 446216.  If you look at
+``/proc/diskstats``, the 15 fields will be preceded by the major and
 minor device numbers, and device name.  Each of these formats provides
-eleven fields of statistics, each meaning exactly the same things.
+15 fields of statistics, each meaning exactly the same things.
 All fields except field 9 are cumulative since boot.  Field 9 should
 go to zero as I/Os complete; all others only increase (unless they
-overflow and wrap).  Yes, these are (32-bit or 64-bit) unsigned long
-(native word size) numbers, and on a very busy or long-lived system they
-may wrap. Applications should be prepared to deal with that; unless
-your observations are measured in large numbers of minutes or hours,
-they should not wrap twice before you notice them.
+overflow and wrap). Wrapping might eventually occur on a very busy
+or long-lived system; so applications should be prepared to deal with
+it. Regarding wrapping, the types of the fields are either unsigned
+int (32 bit) or unsigned long (32-bit or 64-bit, depending on your
+machine) as noted per-field below. Unless your observations are very
+spread in time, these fields should not wrap twice before you notice it.
 
 Each set of stats only applies to the indicated device; if you want
 system-wide stats you'll have to find all the devices and sum them all up.
 
-Field  1 -- # of reads completed
+Field  1 -- # of reads completed (unsigned long)
     This is the total number of reads completed successfully.
 
-Field  2 -- # of reads merged, field 6 -- # of writes merged
+Field  2 -- # of reads merged, field 6 -- # of writes merged (unsigned long)
     Reads and writes which are adjacent to each other may be merged for
     efficiency.  Thus two 4K reads may become one 8K read before it is
     ultimately handed to the disk, and so it will be counted (and queued)
     as only one I/O.  This field lets you know how often this was done.
 
-Field  3 -- # of sectors read
+Field  3 -- # of sectors read (unsigned long)
     This is the total number of sectors read successfully.
 
-Field  4 -- # of milliseconds spent reading
+Field  4 -- # of milliseconds spent reading (unsigned int)
     This is the total number of milliseconds spent by all reads (as
     measured from __make_request() to end_that_request_last()).
 
-Field  5 -- # of writes completed
+Field  5 -- # of writes completed (unsigned long)
     This is the total number of writes completed successfully.
 
-Field  6 -- # of writes merged
+Field  6 -- # of writes merged  (unsigned long)
     See the description of field 2.
 
-Field  7 -- # of sectors written
+Field  7 -- # of sectors written (unsigned long)
     This is the total number of sectors written successfully.
 
-Field  8 -- # of milliseconds spent writing
+Field  8 -- # of milliseconds spent writing (unsigned int)
     This is the total number of milliseconds spent by all writes (as
     measured from __make_request() to end_that_request_last()).
 
-Field  9 -- # of I/Os currently in progress
+Field  9 -- # of I/Os currently in progress (unsigned int)
     The only field that should go to zero. Incremented as requests are
     given to appropriate struct request_queue and decremented as they finish.
 
-Field 10 -- # of milliseconds spent doing I/Os
+Field 10 -- # of milliseconds spent doing I/Os (unsigned int)
     This field increases so long as field 9 is nonzero.
 
     Since 5.0 this field counts jiffies when at least one request was
     started or completed. If request runs more than 2 jiffies then some
     I/O time will not be accounted unless there are other requests.
 
-Field 11 -- weighted # of milliseconds spent doing I/Os
+Field 11 -- weighted # of milliseconds spent doing I/Os (unsigned int)
     This field is incremented at each I/O start, I/O completion, I/O
     merge, or read of these stats by the number of I/Os in progress
     (field 9) times the number of milliseconds spent doing I/O since the
     last update of this field.  This can provide an easy measure of both
     I/O completion time and the backlog that may be accumulating.
 
-Field 12 -- # of discards completed
+Field 12 -- # of discards completed (unsigned long)
     This is the total number of discards completed successfully.
 
-Field 13 -- # of discards merged
+Field 13 -- # of discards merged (unsigned long)
     See the description of field 2
 
-Field 14 -- # of sectors discarded
+Field 14 -- # of sectors discarded (unsigned long)
     This is the total number of sectors discarded successfully.
 
-Field 15 -- # of milliseconds spent discarding
+Field 15 -- # of milliseconds spent discarding (unsigned int)
     This is the total number of milliseconds spent by all discards (as
     measured from __make_request() to end_that_request_last()).
 
+Field 16 -- # of flush requests completed
+    This is the total number of flush requests completed successfully.
+
+    Block layer combines flush requests and executes at most one at a time.
+    This counts flush requests executed by disk. Not tracked for partitions.
+
+Field 17 -- # of milliseconds spent flushing
+    This is the total number of milliseconds spent by all flush requests.
+
 To avoid introducing performance bottlenecks, no locks are held while
 modifying these counters.  This implies that minor inaccuracies may be
 introduced when changes collide, so (for instance) adding up all the
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index d05d531b4ec9..6d421694d98e 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -127,6 +127,7 @@ parameter is applicable::
 	NET	Appropriate network support is enabled.
 	NUMA	NUMA support is enabled.
 	NFS	Appropriate NFS support is enabled.
+	OF	Devicetree is enabled.
 	OSS	OSS sound support is enabled.
 	PV_OPS	A paravirtualized kernel is enabled.
 	PARIDE	The ParIDE (parallel port IDE) subsystem is enabled.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8dee8f68fe15..ec92120a7952 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -113,7 +113,7 @@
 			the GPE dispatcher.
 			This facility can be used to prevent such uncontrolled
 			GPE floodings.
-			Format: <int>
+			Format: <byte>
 
 	acpi_no_auto_serialize	[HW,ACPI]
 			Disable auto-serialization of AML methods
@@ -437,8 +437,6 @@
 			no delay (0).
 			Format: integer
 
-	bootmem_debug	[KNL] Enable bootmem allocator debug messages.
-
 	bert_disable	[ACPI]
 			Disable BERT OS support on buggy BIOSes.
 
@@ -513,7 +511,7 @@
 			1 -- check protection requested by application.
 			Default value is set via a kernel config option.
 			Value can be changed at runtime via
-				/selinux/checkreqprot.
+				/sys/fs/selinux/checkreqprot.
 
 	cio_ignore=	[S390]
 			See Documentation/s390/common_io.rst for details.
@@ -983,12 +981,10 @@
 
 	earlycon=	[KNL] Output early console device and options.
 
-			[ARM64] The early console is determined by the
-			stdout-path property in device tree's chosen node,
-			or determined by the ACPI SPCR table.
-
-			[X86] When used with no options the early console is
-			determined by the ACPI SPCR table.
+			When used with no options, the early console is
+			determined by stdout-path property in device tree's
+			chosen node or the ACPI SPCR table if supported by
+			the platform.
 
 		cdns,<addr>[,options]
 			Start an early, polled-mode console on a Cadence
@@ -1101,7 +1097,7 @@
 			mapped with the correct attributes.
 
 		linflex,<addr>
-			Use early console provided by Freescale LinFlex UART
+			Use early console provided by Freescale LINFlexD UART
 			serial driver for NXP S32V234 SoCs. A valid base
 			address must be provided, and the serial port must
 			already be setup and configured.
@@ -1168,15 +1164,26 @@
 			Format: {"off" | "on" | "skip[mbr]"}
 
 	efi=		[EFI]
-			Format: { "old_map", "nochunk", "noruntime", "debug" }
+			Format: { "old_map", "nochunk", "noruntime", "debug",
+				  "nosoftreserve", "disable_early_pci_dma",
+				  "no_disable_early_pci_dma" }
 			old_map [X86-64]: switch to the old ioremap-based EFI
-			runtime services mapping. 32-bit still uses this one by
-			default.
+			runtime services mapping. [Needs CONFIG_X86_UV=y]
 			nochunk: disable reading files in "chunks" in the EFI
 			boot stub, as chunking can cause problems with some
 			firmware implementations.
 			noruntime : disable EFI runtime services support
 			debug: enable misc debug output
+			nosoftreserve: The EFI_MEMORY_SP (Specific Purpose)
+			attribute may cause the kernel to reserve the
+			memory range for a memory mapping driver to
+			claim. Specify efi=nosoftreserve to disable this
+			reservation and treat the memory by its base type
+			(i.e. EFI_CONVENTIONAL_MEMORY / "System RAM").
+			disable_early_pci_dma: Disable the busmaster bit on all
+			PCI bridges while in the EFI boot stub
+			no_disable_early_pci_dma: Leave the busmaster bit set
+			on all PCI bridges while in the EFI boot stub
 
 	efi_no_storage_paranoia [EFI; X86]
 			Using this parameter you can use more than 50% of
@@ -1189,15 +1196,21 @@
 			updating original EFI memory map.
 			Region of memory which aa attribute is added to is
 			from ss to ss+nn.
+
 			If efi_fake_mem=2G@4G:0x10000,2G@0x10a0000000:0x10000
 			is specified, EFI_MEMORY_MORE_RELIABLE(0x10000)
 			attribute is added to range 0x100000000-0x180000000 and
 			0x10a0000000-0x1120000000.
 
+			If efi_fake_mem=8G@9G:0x40000 is specified, the
+			EFI_MEMORY_SP(0x40000) attribute is added to
+			range 0x240000000-0x43fffffff.
+
 			Using this parameter you can do debugging of EFI memmap
-			related feature. For example, you can do debugging of
+			related features. For example, you can do debugging of
 			Address Range Mirroring feature even if your box
-			doesn't support it.
+			doesn't support it, or mark specific memory as
+			"soft reserved".
 
 	efivar_ssdt=	[EFI; X86] Name of an EFI variable that contains an SSDT
 			that is to be dynamically loaded by Linux. If there are
@@ -1236,7 +1249,8 @@
 			0 -- permissive (log only, no denials).
 			1 -- enforcing (deny and log).
 			Default value is 0.
-			Value can be changed at runtime via /selinux/enforce.
+			Value can be changed at runtime via
+			/sys/fs/selinux/enforce.
 
 	erst_disable	[ACPI]
 			Disable Error Record Serialization Table (ERST)
@@ -1924,9 +1938,31 @@
 			  <cpu number> begins at 0 and the maximum value is
 			  "number of CPUs in system - 1".
 
-			The format of <cpu-list> is described above.
-
+			managed_irq
+
+			  Isolate from being targeted by managed interrupts
+			  which have an interrupt mask containing isolated
+			  CPUs. The affinity of managed interrupts is
+			  handled by the kernel and cannot be changed via
+			  the /proc/irq/* interfaces.
+
+			  This isolation is best effort and only effective
+			  if the automatically assigned interrupt mask of a
+			  device queue contains isolated and housekeeping
+			  CPUs. If housekeeping CPUs are online then such
+			  interrupts are directed to the housekeeping CPU
+			  so that IO submitted on the housekeeping CPU
+			  cannot disturb the isolated CPU.
+
+			  If a queue's affinity mask contains only isolated
+			  CPUs then this parameter has no effect on the
+			  interrupt routing decision, though interrupts are
+			  only delivered when tasks running on those
+			  isolated CPUs submit IO. IO submitted on
+			  housekeeping CPUs has no influence on those
+			  queues.
 
+			The format of <cpu-list> is described above.
 
 	iucv=		[HW,NET]
 
@@ -2473,6 +2509,12 @@
 				     SMT on vulnerable CPUs
 			off        - Unconditionally disable MDS mitigation
 
+			On TAA-affected machines, mds=off can be prevented by
+			an active TAA mitigation as both vulnerabilities are
+			mitigated with the same mechanism so in order to disable
+			this mitigation, you need to specify tsx_async_abort=off
+			too.
+
 			Not specifying this option is equivalent to
 			mds=full.
 
@@ -3110,9 +3152,9 @@
 			[X86,PV_OPS] Disable paravirtualized VMware scheduler
 			clock and use the default one.
 
-	no-steal-acc	[X86,KVM] Disable paravirtualized steal time accounting.
-			steal time is computed, but won't influence scheduler
-			behaviour
+	no-steal-acc	[X86,KVM,ARM64] Disable paravirtualized steal time
+			accounting. steal time is computed, but won't
+			influence scheduler behaviour
 
 	nolapic		[X86-32,APIC] Do not enable or use the local APIC.
 
@@ -3221,6 +3263,12 @@
 			This can be set from sysctl after boot.
 			See Documentation/admin-guide/sysctl/vm.rst for details.
 
+	of_devlink	[OF, KNL] Create device links between consumer and
+			supplier devices by scanning the devictree to infer the
+			consumer/supplier relationships.  A consumer device
+			will not be probed until all the supplier devices have
+			probed successfully.
+
 	ohci1394_dma=early	[HW] enable debugging via the ohci1394 driver.
 			See Documentation/debugging-via-ohci1394.txt for more
 			info.
@@ -3519,8 +3567,15 @@
 		hpiosize=nn[KMG]	The fixed amount of bus space which is
 				reserved for hotplug bridge's IO window.
 				Default size is 256 bytes.
+		hpmmiosize=nn[KMG]	The fixed amount of bus space which is
+				reserved for hotplug bridge's MMIO window.
+				Default size is 2 megabytes.
+		hpmmioprefsize=nn[KMG]	The fixed amount of bus space which is
+				reserved for hotplug bridge's MMIO_PREF window.
+				Default size is 2 megabytes.
 		hpmemsize=nn[KMG]	The fixed amount of bus space which is
-				reserved for hotplug bridge's memory window.
+				reserved for hotplug bridge's MMIO and
+				MMIO_PREF window.
 				Default size is 2 megabytes.
 		hpbussize=nn	The minimum amount of additional bus numbers
 				reserved for buses below a hotplug bridge.
@@ -3567,6 +3622,8 @@
 			even if the platform doesn't give the OS permission to
 			use them.  This may cause conflicts if the platform
 			also tries to use these services.
+		dpc-native	Use native PCIe service for DPC only.  May
+				cause conflicts if firmware uses AER or DPC.
 		compat	Disable native PCIe services (PME, AER, DPC, PCIe
 			hotplug).
 
@@ -3948,6 +4005,19 @@
 			test until boot completes in order to avoid
 			interference.
 
+	rcuperf.kfree_rcu_test= [KNL]
+			Set to measure performance of kfree_rcu() flooding.
+
+	rcuperf.kfree_nthreads= [KNL]
+			The number of threads running loops of kfree_rcu().
+
+	rcuperf.kfree_alloc_num= [KNL]
+			Number of allocations and frees done in an iteration.
+
+	rcuperf.kfree_loops= [KNL]
+			Number of loops doing rcuperf.kfree_alloc_num number
+			of allocations and frees.
+
 	rcuperf.nreaders= [KNL]
 			Set number of RCU readers.  The value -1 selects
 			N, where N is the number of CPUs.  A value
@@ -4318,9 +4388,7 @@
 			See security/selinux/Kconfig help text.
 			0 -- disable.
 			1 -- enable.
-			Default value is set via kernel config option.
-			If enabled at boot time, /selinux/disable can be used
-			later to disable prior to initial policy load.
+			Default value is 1.
 
 	apparmor=	[APPARMOR] Disable or enable AppArmor at boot time
 			Format: { "0" | "1" }
@@ -4931,6 +4999,11 @@
 				     vulnerable to cross-thread TAA attacks.
 			off        - Unconditionally disable TAA mitigation
 
+			On MDS-affected machines, tsx_async_abort=off can be
+			prevented by an active MDS mitigation as both vulnerabilities
+			are mitigated with the same mechanism so in order to disable
+			this mitigation, you need to specify mds=off too.
+
 			Not specifying this option is equivalent to
 			tsx_async_abort=full.  On CPUs which are MDS affected
 			and deploy MDS mitigation, TAA mitigation is not
@@ -5090,13 +5163,13 @@
 			Flags is a set of characters, each corresponding
 			to a common usb-storage quirk flag as follows:
 				a = SANE_SENSE (collect more than 18 bytes
-					of sense data);
+					of sense data, not on uas);
 				b = BAD_SENSE (don't collect more than 18
-					bytes of sense data);
+					bytes of sense data, not on uas);
 				c = FIX_CAPACITY (decrease the reported
 					device capacity by one sector);
 				d = NO_READ_DISC_INFO (don't use
-					READ_DISC_INFO command);
+					READ_DISC_INFO command, not on uas);
 				e = NO_READ_CAPACITY_16 (don't use
 					READ_CAPACITY_16 command);
 				f = NO_REPORT_OPCODES (don't use report opcodes
@@ -5111,17 +5184,18 @@
 				j = NO_REPORT_LUNS (don't use report luns
 					command, uas only);
 				l = NOT_LOCKABLE (don't try to lock and
-					unlock ejectable media);
+					unlock ejectable media, not on uas);
 				m = MAX_SECTORS_64 (don't transfer more
-					than 64 sectors = 32 KB at a time);
+					than 64 sectors = 32 KB at a time,
+					not on uas);
 				n = INITIAL_READ10 (force a retry of the
-					initial READ(10) command);
+					initial READ(10) command, not on uas);
 				o = CAPACITY_OK (accept the capacity
-					reported by the device);
+					reported by the device, not on uas);
 				p = WRITE_CACHE (the device cache is ON
-					by default);
+					by default, not on uas);
 				r = IGNORE_RESIDUE (the device reports
-					bogus residue values);
+					bogus residue values, not on uas);
 				s = SINGLE_LUN (the device has only one
 					Logical Unit);
 				t = NO_ATA_1X (don't allow ATA(12) and ATA(16)
@@ -5130,7 +5204,8 @@
 				w = NO_WP_DETECT (don't test whether the
 					medium is write-protected).
 				y = ALWAYS_SYNC (issue a SYNCHRONIZE_CACHE
-					even if the device claims no cache)
+					even if the device claims no cache,
+					not on uas)
 			Example: quirks=0419:aaf5:rl,0421:0433:rc
 
 	user_debug=	[KNL,ARM]
diff --git a/Documentation/admin-guide/nfs/fault_injection.rst b/Documentation/admin-guide/nfs/fault_injection.rst
new file mode 100644
index 000000000000..eb029c0c15ce
--- /dev/null
+++ b/Documentation/admin-guide/nfs/fault_injection.rst
@@ -0,0 +1,70 @@
+===================
+NFS Fault Injection
+===================
+
+Fault injection is a method for forcing errors that may not normally occur, or
+may be difficult to reproduce.  Forcing these errors in a controlled environment
+can help the developer find and fix bugs before their code is shipped in a
+production system.  Injecting an error on the Linux NFS server will allow us to
+observe how the client reacts and if it manages to recover its state correctly.
+
+NFSD_FAULT_INJECTION must be selected when configuring the kernel to use this
+feature.
+
+
+Using Fault Injection
+=====================
+On the client, mount the fault injection server through NFS v4.0+ and do some
+work over NFS (open files, take locks, ...).
+
+On the server, mount the debugfs filesystem to <debug_dir> and ls
+<debug_dir>/nfsd.  This will show a list of files that will be used for
+injecting faults on the NFS server.  As root, write a number n to the file
+corresponding to the action you want the server to take.  The server will then
+process the first n items it finds.  So if you want to forget 5 locks, echo '5'
+to <debug_dir>/nfsd/forget_locks.  A value of 0 will tell the server to forget
+all corresponding items.  A log message will be created containing the number
+of items forgotten (check dmesg).
+
+Go back to work on the client and check if the client recovered from the error
+correctly.
+
+
+Available Faults
+================
+forget_clients:
+     The NFS server keeps a list of clients that have placed a mount call.  If
+     this list is cleared, the server will have no knowledge of who the client
+     is, forcing the client to reauthenticate with the server.
+
+forget_openowners:
+     The NFS server keeps a list of what files are currently opened and who
+     they were opened by.  Clearing this list will force the client to reopen
+     its files.
+
+forget_locks:
+     The NFS server keeps a list of what files are currently locked in the VFS.
+     Clearing this list will force the client to reclaim its locks (files are
+     unlocked through the VFS as they are cleared from this list).
+
+forget_delegations:
+     A delegation is used to assure the client that a file, or part of a file,
+     has not changed since the delegation was awarded.  Clearing this list will
+     force the client to reacquire its delegation before accessing the file
+     again.
+
+recall_delegations:
+     Delegations can be recalled by the server when another client attempts to
+     access a file.  This test will notify the client that its delegation has
+     been revoked, forcing the client to reacquire the delegation before using
+     the file again.
+
+
+tools/nfs/inject_faults.sh script
+=================================
+This script has been created to ease the fault injection process.  This script
+will detect the mounted debugfs directory and write to the files located there
+based on the arguments passed by the user.  For example, running
+`inject_faults.sh forget_locks 1` as root will instruct the server to forget
+one lock.  Running `inject_faults forget_locks` will instruct the server to
+forgetall locks.
diff --git a/Documentation/admin-guide/nfs/index.rst b/Documentation/admin-guide/nfs/index.rst
new file mode 100644
index 000000000000..6b5a3c90fac5
--- /dev/null
+++ b/Documentation/admin-guide/nfs/index.rst
@@ -0,0 +1,15 @@
+=============
+NFS
+=============
+
+.. toctree::
+    :maxdepth: 1
+
+    nfs-client
+    nfsroot
+    nfs-rdma
+    nfsd-admin-interfaces
+    nfs-idmapper
+    pnfs-block-server
+    pnfs-scsi-server
+    fault_injection
diff --git a/Documentation/admin-guide/nfs/nfs-client.rst b/Documentation/admin-guide/nfs/nfs-client.rst
new file mode 100644
index 000000000000..c4b777c7584b
--- /dev/null
+++ b/Documentation/admin-guide/nfs/nfs-client.rst
@@ -0,0 +1,141 @@
+==========
+NFS Client
+==========
+
+The NFS client
+==============
+
+The NFS version 2 protocol was first documented in RFC1094 (March 1989).
+Since then two more major releases of NFS have been published, with NFSv3
+being documented in RFC1813 (June 1995), and NFSv4 in RFC3530 (April
+2003).
+
+The Linux NFS client currently supports all the above published versions,
+and work is in progress on adding support for minor version 1 of the NFSv4
+protocol.
+
+The purpose of this document is to provide information on some of the
+special features of the NFS client that can be configured by system
+administrators.
+
+
+The nfs4_unique_id parameter
+============================
+
+NFSv4 requires clients to identify themselves to servers with a unique
+string.  File open and lock state shared between one client and one server
+is associated with this identity.  To support robust NFSv4 state recovery
+and transparent state migration, this identity string must not change
+across client reboots.
+
+Without any other intervention, the Linux client uses a string that contains
+the local system's node name.  System administrators, however, often do not
+take care to ensure that node names are fully qualified and do not change
+over the lifetime of a client system.  Node names can have other
+administrative requirements that require particular behavior that does not
+work well as part of an nfs_client_id4 string.
+
+The nfs.nfs4_unique_id boot parameter specifies a unique string that can be
+used instead of a system's node name when an NFS client identifies itself to
+a server.  Thus, if the system's node name is not unique, or it changes, its
+nfs.nfs4_unique_id stays the same, preventing collision with other clients
+or loss of state during NFS reboot recovery or transparent state migration.
+
+The nfs.nfs4_unique_id string is typically a UUID, though it can contain
+anything that is believed to be unique across all NFS clients.  An
+nfs4_unique_id string should be chosen when a client system is installed,
+just as a system's root file system gets a fresh UUID in its label at
+install time.
+
+The string should remain fixed for the lifetime of the client.  It can be
+changed safely if care is taken that the client shuts down cleanly and all
+outstanding NFSv4 state has expired, to prevent loss of NFSv4 state.
+
+This string can be stored in an NFS client's grub.conf, or it can be provided
+via a net boot facility such as PXE.  It may also be specified as an nfs.ko
+module parameter.  Specifying a uniquifier string is not support for NFS
+clients running in containers.
+
+
+The DNS resolver
+================
+
+NFSv4 allows for one server to refer the NFS client to data that has been
+migrated onto another server by means of the special "fs_locations"
+attribute. See `RFC3530 Section 6: Filesystem Migration and Replication`_ and
+`Implementation Guide for Referrals in NFSv4`_.
+
+.. _RFC3530 Section 6\: Filesystem Migration and Replication: http://tools.ietf.org/html/rfc3530#section-6
+.. _Implementation Guide for Referrals in NFSv4: http://tools.ietf.org/html/draft-ietf-nfsv4-referrals-00
+
+The fs_locations information can take the form of either an ip address and
+a path, or a DNS hostname and a path. The latter requires the NFS client to
+do a DNS lookup in order to mount the new volume, and hence the need for an
+upcall to allow userland to provide this service.
+
+Assuming that the user has the 'rpc_pipefs' filesystem mounted in the usual
+/var/lib/nfs/rpc_pipefs, the upcall consists of the following steps:
+
+   (1) The process checks the dns_resolve cache to see if it contains a
+       valid entry. If so, it returns that entry and exits.
+
+   (2) If no valid entry exists, the helper script '/sbin/nfs_cache_getent'
+       (may be changed using the 'nfs.cache_getent' kernel boot parameter)
+       is run, with two arguments:
+       - the cache name, "dns_resolve"
+       - the hostname to resolve
+
+   (3) After looking up the corresponding ip address, the helper script
+       writes the result into the rpc_pipefs pseudo-file
+       '/var/lib/nfs/rpc_pipefs/cache/dns_resolve/channel'
+       in the following (text) format:
+
+		"<ip address> <hostname> <ttl>\n"
+
+       Where <ip address> is in the usual IPv4 (123.456.78.90) or IPv6
+       (ffee:ddcc:bbaa:9988:7766:5544:3322:1100, ffee::1100, ...) format.
+       <hostname> is identical to the second argument of the helper
+       script, and <ttl> is the 'time to live' of this cache entry (in
+       units of seconds).
+
+       .. note::
+            If <ip address> is invalid, say the string "0", then a negative
+            entry is created, which will cause the kernel to treat the hostname
+            as having no valid DNS translation.
+
+
+
+
+A basic sample /sbin/nfs_cache_getent
+=====================================
+.. code-block:: sh
+
+    #!/bin/bash
+    #
+    ttl=600
+    #
+    cut=/usr/bin/cut
+    getent=/usr/bin/getent
+    rpc_pipefs=/var/lib/nfs/rpc_pipefs
+    #
+    die()
+    {
+        echo "Usage: $0 cache_name entry_name"
+        exit 1
+    }
+
+    [ $# -lt 2 ] && die
+    cachename="$1"
+    cache_path=${rpc_pipefs}/cache/${cachename}/channel
+
+    case "${cachename}" in
+        dns_resolve)
+            name="$2"
+            result="$(${getent} hosts ${name} | ${cut} -f1 -d\ )"
+            [ -z "${result}" ] && result="0"
+            ;;
+        *)
+            die
+            ;;
+    esac
+    echo "${result} ${name} ${ttl}" >${cache_path}
diff --git a/Documentation/admin-guide/nfs/nfs-idmapper.rst b/Documentation/admin-guide/nfs/nfs-idmapper.rst
new file mode 100644
index 000000000000..58b8e63412d5
--- /dev/null
+++ b/Documentation/admin-guide/nfs/nfs-idmapper.rst
@@ -0,0 +1,78 @@
+=============
+NFS ID Mapper
+=============
+
+Id mapper is used by NFS to translate user and group ids into names, and to
+translate user and group names into ids.  Part of this translation involves
+performing an upcall to userspace to request the information.  There are two
+ways NFS could obtain this information: placing a call to /sbin/request-key
+or by placing a call to the rpc.idmap daemon.
+
+NFS will attempt to call /sbin/request-key first.  If this succeeds, the
+result will be cached using the generic request-key cache.  This call should
+only fail if /etc/request-key.conf is not configured for the id_resolver key
+type, see the "Configuring" section below if you wish to use the request-key
+method.
+
+If the call to /sbin/request-key fails (if /etc/request-key.conf is not
+configured with the id_resolver key type), then the idmapper will ask the
+legacy rpc.idmap daemon for the id mapping.  This result will be stored
+in a custom NFS idmap cache.
+
+
+Configuring
+===========
+
+The file /etc/request-key.conf will need to be modified so /sbin/request-key can
+direct the upcall.  The following line should be added:
+
+``#OP	TYPE	DESCRIPTION	CALLOUT INFO	PROGRAM ARG1 ARG2 ARG3 ...``
+``#======	=======	===============	===============	===============================``
+``create	id_resolver	*	*		/usr/sbin/nfs.idmap %k %d 600``
+
+
+This will direct all id_resolver requests to the program /usr/sbin/nfs.idmap.
+The last parameter, 600, defines how many seconds into the future the key will
+expire.  This parameter is optional for /usr/sbin/nfs.idmap.  When the timeout
+is not specified, nfs.idmap will default to 600 seconds.
+
+id mapper uses for key descriptions::
+
+	  uid:  Find the UID for the given user
+	  gid:  Find the GID for the given group
+	 user:  Find the user  name for the given UID
+	group:  Find the group name for the given GID
+
+You can handle any of these individually, rather than using the generic upcall
+program.  If you would like to use your own program for a uid lookup then you
+would edit your request-key.conf so it look similar to this:
+
+``#OP	TYPE	DESCRIPTION	CALLOUT INFO	PROGRAM ARG1 ARG2 ARG3 ...``
+``#======	=======	===============	===============	===============================``
+``create	id_resolver	uid:*	*		/some/other/program %k %d 600``
+``create	id_resolver	*	*		/usr/sbin/nfs.idmap %k %d 600``
+
+
+Notice that the new line was added above the line for the generic program.
+request-key will find the first matching line and corresponding program.  In
+this case, /some/other/program will handle all uid lookups and
+/usr/sbin/nfs.idmap will handle gid, user, and group lookups.
+
+See Documentation/security/keys/request-key.rst for more information
+about the request-key function.
+
+
+nfs.idmap
+=========
+
+nfs.idmap is designed to be called by request-key, and should not be run "by
+hand".  This program takes two arguments, a serialized key and a key
+description.  The serialized key is first converted into a key_serial_t, and
+then passed as an argument to keyctl_instantiate (both are part of keyutils.h).
+
+The actual lookups are performed by functions found in nfsidmap.h.  nfs.idmap
+determines the correct function to call by looking at the first part of the
+description string.  For example, a uid lookup description will appear as
+"uid:user@domain".
+
+nfs.idmap will return 0 if the key was instantiated, and non-zero otherwise.
diff --git a/Documentation/admin-guide/nfs/nfs-rdma.rst b/Documentation/admin-guide/nfs/nfs-rdma.rst
new file mode 100644
index 000000000000..ef0f3678b1fb
--- /dev/null
+++ b/Documentation/admin-guide/nfs/nfs-rdma.rst
@@ -0,0 +1,292 @@
+===================
+Setting up NFS/RDMA
+===================
+
+:Author:
+  NetApp and Open Grid Computing (May 29, 2008)
+
+.. warning::
+  This document is probably obsolete.
+
+Overview
+========
+
+This document describes how to install and setup the Linux NFS/RDMA client
+and server software.
+
+The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server
+was first included in the following release, Linux 2.6.25.
+
+In our testing, we have obtained excellent performance results (full 10Gbit
+wire bandwidth at minimal client CPU) under many workloads. The code passes
+the full Connectathon test suite and operates over both Infiniband and iWARP
+RDMA adapters.
+
+Getting Help
+============
+
+If you get stuck, you can ask questions on the
+nfs-rdma-devel@lists.sourceforge.net mailing list.
+
+Installation
+============
+
+These instructions are a step by step guide to building a machine for
+use with NFS/RDMA.
+
+- Install an RDMA device
+
+  Any device supported by the drivers in drivers/infiniband/hw is acceptable.
+
+  Testing has been performed using several Mellanox-based IB cards, the
+  Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter.
+
+- Install a Linux distribution and tools
+
+  The first kernel release to contain both the NFS/RDMA client and server was
+  Linux 2.6.25  Therefore, a distribution compatible with this and subsequent
+  Linux kernel release should be installed.
+
+  The procedures described in this document have been tested with
+  distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
+
+- Install nfs-utils-1.1.2 or greater on the client
+
+  An NFS/RDMA mount point can be obtained by using the mount.nfs command in
+  nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
+  version with support for NFS/RDMA mounts, but for various reasons we
+  recommend using nfs-utils-1.1.2 or greater). To see which version of
+  mount.nfs you are using, type:
+
+  .. code-block:: sh
+
+    $ /sbin/mount.nfs -V
+
+  If the version is less than 1.1.2 or the command does not exist,
+  you should install the latest version of nfs-utils.
+
+  Download the latest package from: http://www.kernel.org/pub/linux/utils/nfs
+
+  Uncompress the package and follow the installation instructions.
+
+  If you will not need the idmapper and gssd executables (you do not need
+  these to create an NFS/RDMA enabled mount command), the installation
+  process can be simplified by disabling these features when running
+  configure:
+
+  .. code-block:: sh
+
+    $ ./configure --disable-gss --disable-nfsv4
+
+  To build nfs-utils you will need the tcp_wrappers package installed. For
+  more information on this see the package's README and INSTALL files.
+
+  After building the nfs-utils package, there will be a mount.nfs binary in
+  the utils/mount directory. This binary can be used to initiate NFS v2, v3,
+  or v4 mounts. To initiate a v4 mount, the binary must be called
+  mount.nfs4.  The standard technique is to create a symlink called
+  mount.nfs4 to mount.nfs.
+
+  This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
+
+  .. code-block:: sh
+
+    $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
+
+  In this location, mount.nfs will be invoked automatically for NFS mounts
+  by the system mount command.
+
+    .. note::
+      mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
+      on the NFS client machine. You do not need this specific version of
+      nfs-utils on the server. Furthermore, only the mount.nfs command from
+      nfs-utils-1.1.2 is needed on the client.
+
+- Install a Linux kernel with NFS/RDMA
+
+  The NFS/RDMA client and server are both included in the mainline Linux
+  kernel version 2.6.25 and later. This and other versions of the Linux
+  kernel can be found at: https://www.kernel.org/pub/linux/kernel/
+
+  Download the sources and place them in an appropriate location.
+
+- Configure the RDMA stack
+
+  Make sure your kernel configuration has RDMA support enabled. Under
+  Device Drivers -> InfiniBand support, update the kernel configuration
+  to enable InfiniBand support [NOTE: the option name is misleading. Enabling
+  InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)].
+
+  Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or
+  iWARP adapter support (amso, cxgb3, etc.).
+
+  If you are using InfiniBand, be sure to enable IP-over-InfiniBand support.
+
+- Configure the NFS client and server
+
+  Your kernel configuration must also have NFS file system support and/or
+  NFS server support enabled. These and other NFS related configuration
+  options can be found under File Systems -> Network File Systems.
+
+- Build, install, reboot
+
+  The NFS/RDMA code will be enabled automatically if NFS and RDMA
+  are turned on. The NFS/RDMA client and server are configured via the hidden
+  SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
+  value of SUNRPC_XPRT_RDMA will be:
+
+    #. N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
+       and server will not be built
+
+    #. M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M,
+       in this case the NFS/RDMA client and server will be built as modules
+
+    #. Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client
+       and server will be built into the kernel
+
+  Therefore, if you have followed the steps above and turned no NFS and RDMA,
+  the NFS/RDMA client and server will be built.
+
+  Build a new kernel, install it, boot it.
+
+Check RDMA and NFS Setup
+========================
+
+Before configuring the NFS/RDMA software, it is a good idea to test
+your new kernel to ensure that the kernel is working correctly.
+In particular, it is a good idea to verify that the RDMA stack
+is functioning as expected and standard NFS over TCP/IP and/or UDP/IP
+is working properly.
+
+- Check RDMA Setup
+
+  If you built the RDMA components as modules, load them at
+  this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
+  card:
+
+  .. code-block:: sh
+
+    $ modprobe ib_mthca
+    $ modprobe ib_ipoib
+
+  If you are using InfiniBand, make sure there is a Subnet Manager (SM)
+  running on the network. If your IB switch has an embedded SM, you can
+  use it. Otherwise, you will need to run an SM, such as OpenSM, on one
+  of your end nodes.
+
+  If an SM is running on your network, you should see the following:
+
+  .. code-block:: sh
+
+    $ cat /sys/class/infiniband/driverX/ports/1/state
+    4: ACTIVE
+
+  where driverX is mthca0, ipath5, ehca3, etc.
+
+  To further test the InfiniBand software stack, use IPoIB (this
+  assumes you have two IB hosts named host1 and host2):
+
+  .. code-block:: sh
+
+    host1$ ip link set dev ib0 up
+    host1$ ip address add dev ib0 a.b.c.x
+    host2$ ip link set dev ib0 up
+    host2$ ip address add dev ib0 a.b.c.y
+    host1$ ping a.b.c.y
+    host2$ ping a.b.c.x
+
+  For other device types, follow the appropriate procedures.
+
+- Check NFS Setup
+
+  For the NFS components enabled above (client and/or server),
+  test their functionality over standard Ethernet using TCP/IP or UDP/IP.
+
+NFS/RDMA Setup
+==============
+
+We recommend that you use two machines, one to act as the client and
+one to act as the server.
+
+One time configuration:
+-----------------------
+
+- On the server system, configure the /etc/exports file and start the NFS/RDMA server.
+
+  Exports entries with the following formats have been tested::
+
+  /vol0   192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
+  /vol0   192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
+
+  The IP address(es) is(are) the client's IPoIB address for an InfiniBand
+  HCA or the client's iWARP address(es) for an RNIC.
+
+  .. note::
+    The "insecure" option must be used because the NFS/RDMA client does
+    not use a reserved port.
+
+Each time a machine boots:
+--------------------------
+
+- Load and configure the RDMA drivers
+
+  For InfiniBand using a Mellanox adapter:
+
+  .. code-block:: sh
+
+    $ modprobe ib_mthca
+    $ modprobe ib_ipoib
+    $ ip li set dev ib0 up
+    $ ip addr add dev ib0 a.b.c.d
+
+  .. note::
+    Please use unique addresses for the client and server!
+
+- Start the NFS server
+
+  If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+  kernel config), load the RDMA transport module:
+
+  .. code-block:: sh
+
+    $ modprobe svcrdma
+
+  Regardless of how the server was built (module or built-in), start the
+  server:
+
+  .. code-block:: sh
+
+    $ /etc/init.d/nfs start
+
+  or
+
+  .. code-block:: sh
+
+    $ service nfs start
+
+  Instruct the server to listen on the RDMA transport:
+
+  .. code-block:: sh
+
+    $ echo rdma 20049 > /proc/fs/nfsd/portlist
+
+- On the client system
+
+  If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+  kernel config), load the RDMA client module:
+
+  .. code-block:: sh
+
+    $ modprobe xprtrdma.ko
+
+  Regardless of how the client was built (module or built-in), use this
+  command to mount the NFS/RDMA server:
+
+  .. code-block:: sh
+
+    $ mount -o rdma,port=20049 <IPoIB-server-name-or-address>:/<export> /mnt
+
+  To verify that the mount is using RDMA, run "cat /proc/mounts" and check
+  the "proto" field for the given mount.
+
+  Congratulations! You're using NFS/RDMA!
diff --git a/Documentation/admin-guide/nfs/nfsd-admin-interfaces.rst b/Documentation/admin-guide/nfs/nfsd-admin-interfaces.rst
new file mode 100644
index 000000000000..c05926f79054
--- /dev/null
+++ b/Documentation/admin-guide/nfs/nfsd-admin-interfaces.rst
@@ -0,0 +1,40 @@
+==================================
+Administrative interfaces for nfsd
+==================================
+
+Note that normally these interfaces are used only by the utilities in
+nfs-utils.
+
+nfsd is controlled mainly by pseudofiles under the "nfsd" filesystem,
+which is normally mounted at /proc/fs/nfsd/.
+
+The server is always started by the first write of a nonzero value to
+nfsd/threads.
+
+Before doing that, NFSD can be told which sockets to listen on by
+writing to nfsd/portlist; that write may be:
+
+	-  an ascii-encoded file descriptor, which should refer to a
+	   bound (and listening, for tcp) socket, or
+	-  "transportname port", where transportname is currently either
+	   "udp", "tcp", or "rdma".
+
+If nfsd is started without doing any of these, then it will create one
+udp and one tcp listener at port 2049 (see nfsd_init_socks).
+
+On startup, nfsd and lockd grace periods start. nfsd is shut down by a write of
+0 to nfsd/threads.  All locks and state are thrown away at that point.
+
+Between startup and shutdown, the number of threads may be adjusted up
+or down by additional writes to nfsd/threads or by writes to
+nfsd/pool_threads.
+
+For more detail about files under nfsd/ and what they control, see
+fs/nfsd/nfsctl.c; most of them have detailed comments.
+
+Implementation notes
+====================
+
+Note that the rpc server requires the caller to serialize addition and
+removal of listening sockets, and startup and shutdown of the server.
+For nfsd this is done using nfsd_mutex.
diff --git a/Documentation/admin-guide/nfs/nfsroot.rst b/Documentation/admin-guide/nfs/nfsroot.rst
new file mode 100644
index 000000000000..82a4fda057f9
--- /dev/null
+++ b/Documentation/admin-guide/nfs/nfsroot.rst
@@ -0,0 +1,364 @@
+===============================================
+Mounting the root filesystem via NFS (nfsroot)
+===============================================
+
+:Authors:
+	Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
+
+	Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+
+	Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
+
+	Updated 2006 by Horms <horms@verge.net.au>
+
+	Updated 2018 by Chris Novakovic <chris@chrisn.me.uk>
+
+
+
+In order to use a diskless system, such as an X-terminal or printer server for
+example, it is necessary for the root filesystem to be present on a non-disk
+device. This may be an initramfs (see
+Documentation/filesystems/ramfs-rootfs-initramfs.txt), a ramdisk (see
+Documentation/admin-guide/initrd.rst) or a filesystem mounted via NFS. The
+following text describes on how to use NFS for the root filesystem. For the rest
+of this text 'client' means the diskless system, and 'server' means the NFS
+server.
+
+
+
+
+Enabling nfsroot capabilities
+=============================
+
+In order to use nfsroot, NFS client support needs to be selected as
+built-in during configuration. Once this has been selected, the nfsroot
+option will become available, which should also be selected.
+
+In the networking options, kernel level autoconfiguration can be selected,
+along with the types of autoconfiguration to support. Selecting all of
+DHCP, BOOTP and RARP is safe.
+
+
+
+
+Kernel command line
+===================
+
+When the kernel has been loaded by a boot loader (see below) it needs to be
+told what root fs device to use. And in the case of nfsroot, where to find
+both the server and the name of the directory on the server to mount as root.
+This can be established using the following kernel command line parameters:
+
+
+root=/dev/nfs
+  This is necessary to enable the pseudo-NFS-device. Note that it's not a
+  real device but just a synonym to tell the kernel to use NFS instead of
+  a real device.
+
+
+nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
+  If the `nfsroot' parameter is NOT given on the command line,
+  the default ``"/tftpboot/%s"`` will be used.
+
+  <server-ip>	Specifies the IP address of the NFS server.
+		The default address is determined by the ip parameter
+		(see below). This parameter allows the use of different
+		servers for IP autoconfiguration and NFS.
+
+  <root-dir>	Name of the directory on the server to mount as root.
+		If there is a "%s" token in the string, it will be
+		replaced by the ASCII-representation of the client's
+		IP address.
+
+  <nfs-options>	Standard NFS options. All options are separated by commas.
+		The following defaults are used::
+
+			port		= as given by server portmap daemon
+			rsize		= 4096
+			wsize		= 4096
+			timeo		= 7
+			retrans		= 3
+			acregmin	= 3
+			acregmax	= 60
+			acdirmin	= 30
+			acdirmax	= 60
+			flags		= hard, nointr, noposix, cto, ac
+
+
+ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:<dns0-ip>:<dns1-ip>:<ntp0-ip>
+  This parameter tells the kernel how to configure IP addresses of devices
+  and also how to set up the IP routing table. It was originally called
+  nfsaddrs, but now the boot-time IP configuration works independently of
+  NFS, so it was renamed to ip and the old name remained as an alias for
+  compatibility reasons.
+
+  If this parameter is missing from the kernel command line, all fields are
+  assumed to be empty, and the defaults mentioned below apply. In general
+  this means that the kernel tries to configure everything using
+  autoconfiguration.
+
+  The <autoconf> parameter can appear alone as the value to the ip
+  parameter (without all the ':' characters before).  If the value is
+  "ip=off" or "ip=none", no autoconfiguration will take place, otherwise
+  autoconfiguration will take place.  The most common way to use this
+  is "ip=dhcp".
+
+  <client-ip>	IP address of the client.
+  		Default:  Determined using autoconfiguration.
+
+  <server-ip>	IP address of the NFS server.
+		If RARP is used to determine
+		the client address and this parameter is NOT empty only
+		replies from the specified server are accepted.
+
+		Only required for NFS root. That is autoconfiguration
+		will not be triggered if it is missing and NFS root is not
+		in operation.
+
+		Value is exported to /proc/net/pnp with the prefix "bootserver "
+		(see below).
+
+		Default: Determined using autoconfiguration.
+		The address of the autoconfiguration server is used.
+
+  <gw-ip>	IP address of a gateway if the server is on a different subnet.
+		Default: Determined using autoconfiguration.
+
+  <netmask>	Netmask for local network interface.
+		If unspecified the netmask is derived from the client IP address
+		assuming classful addressing.
+
+		Default:  Determined using autoconfiguration.
+
+  <hostname>	Name of the client.
+		If a '.' character is present, anything
+		before the first '.' is used as the client's hostname, and anything
+		after it is used as its NIS domain name. May be supplied by
+		autoconfiguration, but its absence will not trigger autoconfiguration.
+		If specified and DHCP is used, the user-provided hostname (and NIS
+		domain name, if present) will be carried in the DHCP request; this
+		may cause a DNS record to be created or updated for the client.
+
+  		Default: Client IP address is used in ASCII notation.
+
+  <device>	Name of network device to use.
+		Default: If the host only has one device, it is used.
+		Otherwise the device is determined using
+		autoconfiguration. This is done by sending
+		autoconfiguration requests out of all devices,
+		and using the device that received the first reply.
+
+  <autoconf>	Method to use for autoconfiguration.
+		In the case of options
+		which specify multiple autoconfiguration protocols,
+		requests are sent using all protocols, and the first one
+		to reply is used.
+
+		Only autoconfiguration protocols that have been compiled
+		into the kernel will be used, regardless of the value of
+		this option::
+
+                  off or none: don't use autoconfiguration
+				(do static IP assignment instead)
+		  on or any:   use any protocol available in the kernel
+			       (default)
+		  dhcp:        use DHCP
+		  bootp:       use BOOTP
+		  rarp:        use RARP
+		  both:        use both BOOTP and RARP but not DHCP
+		               (old option kept for backwards compatibility)
+
+		if dhcp is used, the client identifier can be used by following
+		format "ip=dhcp,client-id-type,client-id-value"
+
+                Default: any
+
+  <dns0-ip>	IP address of primary nameserver.
+		Value is exported to /proc/net/pnp with the prefix "nameserver "
+		(see below).
+
+		Default: None if not using autoconfiguration; determined
+		automatically if using autoconfiguration.
+
+  <dns1-ip>	IP address of secondary nameserver.
+		See <dns0-ip>.
+
+  <ntp0-ip>	IP address of a Network Time Protocol (NTP) server.
+		Value is exported to /proc/net/ipconfig/ntp_servers, but is
+		otherwise unused (see below).
+
+		Default: None if not using autoconfiguration; determined
+		automatically if using autoconfiguration.
+
+  After configuration (whether manual or automatic) is complete, two files
+  are created in the following format; lines are omitted if their respective
+  value is empty following configuration:
+
+  - /proc/net/pnp:
+
+	#PROTO: <DHCP|BOOTP|RARP|MANUAL>	(depending on configuration method)
+	domain <dns-domain>			(if autoconfigured, the DNS domain)
+	nameserver <dns0-ip>			(primary name server IP)
+	nameserver <dns1-ip>			(secondary name server IP)
+	nameserver <dns2-ip>			(tertiary name server IP)
+	bootserver <server-ip>			(NFS server IP)
+
+  - /proc/net/ipconfig/ntp_servers:
+
+	<ntp0-ip>				(NTP server IP)
+	<ntp1-ip>				(NTP server IP)
+	<ntp2-ip>				(NTP server IP)
+
+  <dns-domain> and <dns2-ip> (in /proc/net/pnp) and <ntp1-ip> and <ntp2-ip>
+  (in /proc/net/ipconfig/ntp_servers) are requested during autoconfiguration;
+  they cannot be specified as part of the "ip=" kernel command line parameter.
+
+  Because the "domain" and "nameserver" options are recognised by DNS
+  resolvers, /etc/resolv.conf is often linked to /proc/net/pnp on systems
+  that use an NFS root filesystem.
+
+  Note that the kernel will not synchronise the system time with any NTP
+  servers it discovers; this is the responsibility of a user space process
+  (e.g. an initrd/initramfs script that passes the IP addresses listed in
+  /proc/net/ipconfig/ntp_servers to an NTP client before mounting the real
+  root filesystem if it is on NFS).
+
+
+nfsrootdebug
+  This parameter enables debugging messages to appear in the kernel
+  log at boot time so that administrators can verify that the correct
+  NFS mount options, server address, and root path are passed to the
+  NFS client.
+
+
+rdinit=<executable file>
+  To specify which file contains the program that starts system
+  initialization, administrators can use this command line parameter.
+  The default value of this parameter is "/init".  If the specified
+  file exists and the kernel can execute it, root filesystem related
+  kernel command line parameters, including 'nfsroot=', are ignored.
+
+  A description of the process of mounting the root file system can be
+  found in Documentation/driver-api/early-userspace/early_userspace_support.rst
+
+
+Boot Loader
+===========
+
+To get the kernel into memory different approaches can be used.
+They depend on various facilities being available:
+
+
+- Booting from a floppy using syslinux
+
+	When building kernels, an easy way to create a boot floppy that uses
+	syslinux is to use the zdisk or bzdisk make targets which use zimage
+      	and bzimage images respectively. Both targets accept the
+     	FDARGS parameter which can be used to set the kernel command line.
+
+	e.g::
+
+	   make bzdisk FDARGS="root=/dev/nfs"
+
+   	Note that the user running this command will need to have
+     	access to the floppy drive device, /dev/fd0
+
+     	For more information on syslinux, including how to create bootdisks
+     	for prebuilt kernels, see http://syslinux.zytor.com/
+
+	.. note::
+		Previously it was possible to write a kernel directly to
+		a floppy using dd, configure the boot device using rdev, and
+		boot using the resulting floppy. Linux no longer supports this
+		method of booting.
+
+- Booting from a cdrom using isolinux
+
+     	When building kernels, an easy way to create a bootable cdrom that
+     	uses isolinux is to use the isoimage target which uses a bzimage
+     	image. Like zdisk and bzdisk, this target accepts the FDARGS
+     	parameter which can be used to set the kernel command line.
+
+	e.g::
+
+	  make isoimage FDARGS="root=/dev/nfs"
+
+     	The resulting iso image will be arch/<ARCH>/boot/image.iso
+     	This can be written to a cdrom using a variety of tools including
+     	cdrecord.
+
+	e.g::
+
+	  cdrecord dev=ATAPI:1,0,0 arch/x86/boot/image.iso
+
+     	For more information on isolinux, including how to create bootdisks
+     	for prebuilt kernels, see http://syslinux.zytor.com/
+
+- Using LILO
+
+	When using LILO all the necessary command line parameters may be
+	specified using the 'append=' directive in the LILO configuration
+	file.
+
+	However, to use the 'root=' directive you also need to create
+	a dummy root device, which may be removed after LILO is run.
+
+	e.g::
+
+	  mknod /dev/boot255 c 0 255
+
+	For information on configuring LILO, please refer to its documentation.
+
+- Using GRUB
+
+	When using GRUB, kernel parameter are simply appended after the kernel
+	specification: kernel <kernel> <parameters>
+
+- Using loadlin
+
+	loadlin may be used to boot Linux from a DOS command prompt without
+	requiring a local hard disk to mount as root. This has not been
+	thoroughly tested by the authors of this document, but in general
+	it should be possible configure the kernel command line similarly
+	to the configuration of LILO.
+
+	Please refer to the loadlin documentation for further information.
+
+- Using a boot ROM
+
+	This is probably the most elegant way of booting a diskless client.
+	With a boot ROM the kernel is loaded using the TFTP protocol. The
+	authors of this document are not aware of any no commercial boot
+	ROMs that support booting Linux over the network. However, there
+	are two free implementations of a boot ROM, netboot-nfs and
+	etherboot, both of which are available on sunsite.unc.edu, and both
+	of which contain everything you need to boot a diskless Linux client.
+
+- Using pxelinux
+
+	Pxelinux may be used to boot linux using the PXE boot loader
+	which is present on many modern network cards.
+
+	When using pxelinux, the kernel image is specified using
+	"kernel <relative-path-below /tftpboot>". The nfsroot parameters
+	are passed to the kernel by adding them to the "append" line.
+	It is common to use serial console in conjunction with pxeliunx,
+	see Documentation/admin-guide/serial-console.rst for more information.
+
+	For more information on isolinux, including how to create bootdisks
+	for prebuilt kernels, see http://syslinux.zytor.com/
+
+
+
+
+Credits
+=======
+
+  The nfsroot code in the kernel and the RARP support have been written
+  by Gero Kuhlmann <gero@gkminix.han.de>.
+
+  The rest of the IP layer autoconfiguration code has been written
+  by Martin Mares <mj@atrey.karlin.mff.cuni.cz>.
+
+  In order to write the initial version of nfsroot I would like to thank
+  Jens-Uwe Mager <jum@anubis.han.de> for his help.
diff --git a/Documentation/admin-guide/nfs/pnfs-block-server.rst b/Documentation/admin-guide/nfs/pnfs-block-server.rst
new file mode 100644
index 000000000000..b00a2e705cc4
--- /dev/null
+++ b/Documentation/admin-guide/nfs/pnfs-block-server.rst
@@ -0,0 +1,42 @@
+===================================
+pNFS block layout server user guide
+===================================
+
+The Linux NFS server now supports the pNFS block layout extension.  In this
+case the NFS server acts as Metadata Server (MDS) for pNFS, which in addition
+to handling all the metadata access to the NFS export also hands out layouts
+to the clients to directly access the underlying block devices that are
+shared with the client.
+
+To use pNFS block layouts with with the Linux NFS server the exported file
+system needs to support the pNFS block layouts (currently just XFS), and the
+file system must sit on shared storage (typically iSCSI) that is accessible
+to the clients in addition to the MDS.  As of now the file system needs to
+sit directly on the exported volume, striping or concatenation of
+volumes on the MDS and clients is not supported yet.
+
+On the server, pNFS block volume support is automatically if the file system
+support it.  On the client make sure the kernel has the CONFIG_PNFS_BLOCK
+option enabled, the blkmapd daemon from nfs-utils is running, and the
+file system is mounted using the NFSv4.1 protocol version (mount -o vers=4.1).
+
+If the nfsd server needs to fence a non-responding client it calls
+/sbin/nfsd-recall-failed with the first argument set to the IP address of
+the client, and the second argument set to the device node without the /dev
+prefix for the file system to be fenced. Below is an example file that shows
+how to translate the device into a serial number from SCSI EVPD 0x80::
+
+	cat > /sbin/nfsd-recall-failed << EOF
+
+.. code-block:: sh
+
+	#!/bin/sh
+
+	CLIENT="$1"
+	DEV="/dev/$2"
+	EVPD=`sg_inq --page=0x80 ${DEV} | \
+		grep "Unit serial number:" | \
+		awk -F ': ' '{print $2}'`
+
+	echo "fencing client ${CLIENT} serial ${EVPD}" >> /var/log/pnfsd-fence.log
+	EOF
diff --git a/Documentation/admin-guide/nfs/pnfs-scsi-server.rst b/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
new file mode 100644
index 000000000000..d2f6ee558071
--- /dev/null
+++ b/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
@@ -0,0 +1,24 @@
+
+==================================
+pNFS SCSI layout server user guide
+==================================
+
+This document describes support for pNFS SCSI layouts in the Linux NFS server.
+With pNFS SCSI layouts, the NFS server acts as Metadata Server (MDS) for pNFS,
+which in addition to handling all the metadata access to the NFS export,
+also hands out layouts to the clients so that they can directly access the
+underlying SCSI LUNs that are shared with the client.
+
+To use pNFS SCSI layouts with with the Linux NFS server, the exported file
+system needs to support the pNFS SCSI layouts (currently just XFS), and the
+file system must sit on a SCSI LUN that is accessible to the clients in
+addition to the MDS.  As of now the file system needs to sit directly on the
+exported LUN, striping or concatenation of LUNs on the MDS and clients
+is not supported yet.
+
+On a server built with CONFIG_NFSD_SCSI, the pNFS SCSI volume support is
+automatically enabled if the file system is exported using the "pnfs"
+option and the underlying SCSI device support persistent reservations.
+On the client make sure the kernel has the CONFIG_PNFS_BLOCK option
+enabled, and the file system is mounted using the NFSv4.1 protocol
+version (mount -o vers=4.1).
diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst
index 517a205abad6..3726a10a03ba 100644
--- a/Documentation/admin-guide/perf/imx-ddr.rst
+++ b/Documentation/admin-guide/perf/imx-ddr.rst
@@ -17,36 +17,54 @@ The "format" directory describes format of the config (event ID) and config1
 (AXI filtering) fields of the perf_event_attr structure, see /sys/bus/event_source/
 devices/imx8_ddr0/format/. The "events" directory describes the events types
 hardware supported that can be used with perf tool, see /sys/bus/event_source/
-devices/imx8_ddr0/events/.
-  e.g.::
+devices/imx8_ddr0/events/. The "caps" directory describes filter features implemented
+in DDR PMU, see /sys/bus/events_source/devices/imx8_ddr0/caps/.
+
+    .. code-block:: bash
+
         perf stat -a -e imx8_ddr0/cycles/ cmd
         perf stat -a -e imx8_ddr0/read/,imx8_ddr0/write/ cmd
 
 AXI filtering is only used by CSV modes 0x41 (axid-read) and 0x42 (axid-write)
 to count reading or writing matches filter setting. Filter setting is various
 from different DRAM controller implementations, which is distinguished by quirks
-in the driver.
+in the driver. You also can dump info from userspace, filter in "caps" directory
+indicates whether PMU supports AXI ID filter or not; enhanced_filter indicates
+whether PMU supports enhanced AXI ID filter or not. Value 0 for un-supported, and
+value 1 for supported.
 
-* With DDR_CAP_AXI_ID_FILTER quirk.
+* With DDR_CAP_AXI_ID_FILTER quirk(filter: 1, enhanced_filter: 0).
   Filter is defined with two configuration parts:
   --AXI_ID defines AxID matching value.
   --AXI_MASKING defines which bits of AxID are meaningful for the matching.
-        0：corresponding bit is masked.
-        1: corresponding bit is not masked, i.e. used to do the matching.
+
+      - 0: corresponding bit is masked.
+      - 1: corresponding bit is not masked, i.e. used to do the matching.
 
   AXI_ID and AXI_MASKING are mapped on DPCR1 register in performance counter.
   When non-masked bits are matching corresponding AXI_ID bits then counter is
   incremented. Perf counter is incremented if
-          AxID && AXI_MASKING == AXI_ID && AXI_MASKING
+        AxID && AXI_MASKING == AXI_ID && AXI_MASKING
 
   This filter doesn't support filter different AXI ID for axid-read and axid-write
   event at the same time as this filter is shared between counters.
-  e.g.::
-        perf stat -a -e imx8_ddr0/axid-read,axi_mask=0xMMMM,axi_id=0xDDDD/ cmd
-        perf stat -a -e imx8_ddr0/axid-write,axi_mask=0xMMMM,axi_id=0xDDDD/ cmd
-
-  NOTE: axi_mask is inverted in userspace(i.e. set bits are bits to mask), and
-  it will be reverted in driver automatically. so that the user can just specify
-  axi_id to monitor a specific id, rather than having to specify axi_mask.
-  e.g.::
+
+  .. code-block:: bash
+
+      perf stat -a -e imx8_ddr0/axid-read,axi_mask=0xMMMM,axi_id=0xDDDD/ cmd
+      perf stat -a -e imx8_ddr0/axid-write,axi_mask=0xMMMM,axi_id=0xDDDD/ cmd
+
+  .. note::
+
+      axi_mask is inverted in userspace(i.e. set bits are bits to mask), and
+      it will be reverted in driver automatically. so that the user can just specify
+      axi_id to monitor a specific id, rather than having to specify axi_mask.
+
+  .. code-block:: bash
+
         perf stat -a -e imx8_ddr0/axid-read,axi_id=0x12/ cmd, which will monitor ARID=0x12
+
+* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk(filter: 1, enhanced_filter: 1).
+  This is an extension to the DDR_CAP_AXI_ID_FILTER quirk which permits
+  counting the number of bytes (as opposed to the number of bursts) from DDR
+  read and write transactions concurrently with another set of data counters.
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index ee4bfd2a740f..47c99f40cc16 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -8,6 +8,7 @@ Performance monitor support
    :maxdepth: 1
 
    hisi-pmu
+   imx-ddr
    qcom_l2_pmu
    qcom_l3_pmu
    arm-ccn
diff --git a/Documentation/admin-guide/perf/thunderx2-pmu.rst b/Documentation/admin-guide/perf/thunderx2-pmu.rst
index 08e33675853a..01f158238ae1 100644
--- a/Documentation/admin-guide/perf/thunderx2-pmu.rst
+++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst
@@ -3,24 +3,26 @@ Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE)
 =============================================================
 
 The ThunderX2 SoC PMU consists of independent, system-wide, per-socket
-PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC).
+PMUs such as the Level 3 Cache (L3C), DDR4 Memory Controller (DMC) and
+Cavium Coherent Processor Interconnect (CCPI2).
 
 The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles.
 Events are counted for the default channel (i.e. channel 0) and prorated
 to the total number of channels/tiles.
 
-The DMC and L3C support up to 4 counters. Counters are independently
-programmable and can be started and stopped individually. Each counter
-can be set to a different event. Counters are 32-bit and do not support
-an overflow interrupt; they are read every 2 seconds.
+The DMC and L3C support up to 4 counters, while the CCPI2 supports up to 8
+counters. Counters are independently programmable to different events and
+can be started and stopped individually. None of the counters support an
+overflow interrupt. DMC and L3C counters are 32-bit and read every 2 seconds.
+The CCPI2 counters are 64-bit and assumed not to overflow in normal operation.
 
 PMU UNCORE (perf) driver:
 
 The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
-L3C devices.  Each PMU can be used to count up to 4 events
-simultaneously. The PMUs provide a description of their available events
-and configuration options under sysfs, see
-/sys/devices/uncore_<l3c_S/dmc_S/>; S is the socket id.
+L3C devices.  Each PMU can be used to count up to 4 (DMC/L3C) or up to 8
+(CCPI2) events simultaneously. The PMUs provide a description of their
+available events and configuration options under sysfs, see
+/sys/devices/uncore_<l3c_S/dmc_S/ccpi2_S/>; S is the socket id.
 
 The driver does not support sampling, therefore "perf record" will not
 work. Per-task perf sessions are also not supported.
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index e70b365dbc60..311cd7cc2b75 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -506,6 +506,9 @@ object corresponding to it, as follows:
 ``disable``
 	Whether or not this idle state is disabled.
 
+``default_status``
+	The default status of this state, "enabled" or "disabled".
+
 ``latency``
 	Exit latency of the idle state in microseconds.
 
diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst
new file mode 100644
index 000000000000..afbf778035f8
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_idle.rst
@@ -0,0 +1,246 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==============================================
+``intel_idle`` CPU Idle Time Management Driver
+==============================================
+
+:Copyright: |copy| 2020 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+General Information
+===================
+
+``intel_idle`` is a part of the
+:doc:`CPU idle time management subsystem <cpuidle>` in the Linux kernel
+(``CPUIdle``).  It is the default CPU idle time management driver for the
+Nehalem and later generations of Intel processors, but the level of support for
+a particular processor model in it depends on whether or not it recognizes that
+processor model and may also depend on information coming from the platform
+firmware.  [To understand ``intel_idle`` it is necessary to know how ``CPUIdle``
+works in general, so this is the time to get familiar with :doc:`cpuidle` if you
+have not done that yet.]
+
+``intel_idle`` uses the ``MWAIT`` instruction to inform the processor that the
+logical CPU executing it is idle and so it may be possible to put some of the
+processor's functional blocks into low-power states.  That instruction takes two
+arguments (passed in the ``EAX`` and ``ECX`` registers of the target CPU), the
+first of which, referred to as a *hint*, can be used by the processor to
+determine what can be done (for details refer to Intel Software Developer’s
+Manual [1]_).  Accordingly, ``intel_idle`` refuses to work with processors in
+which the support for the ``MWAIT`` instruction has been disabled (for example,
+via the platform firmware configuration menu) or which do not support that
+instruction at all.
+
+``intel_idle`` is not modular, so it cannot be unloaded, which means that the
+only way to pass early-configuration-time parameters to it is via the kernel
+command line.
+
+
+.. _intel-idle-enumeration-of-states:
+
+Enumeration of Idle States
+==========================
+
+Each ``MWAIT`` hint value is interpreted by the processor as a license to
+reconfigure itself in a certain way in order to save energy.  The processor
+configurations (with reduced power draw) resulting from that are referred to
+as C-states (in the ACPI terminology) or idle states.  The list of meaningful
+``MWAIT`` hint values and idle states (i.e. low-power configurations of the
+processor) corresponding to them depends on the processor model and it may also
+depend on the configuration of the platform.
+
+In order to create a list of available idle states required by the ``CPUIdle``
+subsystem (see :ref:`idle-states-representation` in :doc:`cpuidle`),
+``intel_idle`` can use two sources of information: static tables of idle states
+for different processor models included in the driver itself and the ACPI tables
+of the system.  The former are always used if the processor model at hand is
+recognized by ``intel_idle`` and the latter are used if that is required for
+the given processor model (which is the case for all server processor models
+recognized by ``intel_idle``) or if the processor model is not recognized.
+
+If the ACPI tables are going to be used for building the list of available idle
+states, ``intel_idle`` first looks for a ``_CST`` object under one of the ACPI
+objects corresponding to the CPUs in the system (refer to the ACPI specification
+[2]_ for the description of ``_CST`` and its output package).  Because the
+``CPUIdle`` subsystem expects that the list of idle states supplied by the
+driver will be suitable for all of the CPUs handled by it and ``intel_idle`` is
+registered as the ``CPUIdle`` driver for all of the CPUs in the system, the
+driver looks for the first ``_CST`` object returning at least one valid idle
+state description and such that all of the idle states included in its return
+package are of the FFH (Functional Fixed Hardware) type, which means that the
+``MWAIT`` instruction is expected to be used to tell the processor that it can
+enter one of them.  The return package of that ``_CST`` is then assumed to be
+applicable to all of the other CPUs in the system and the idle state
+descriptions extracted from it are stored in a preliminary list of idle states
+coming from the ACPI tables.  [This step is skipped if ``intel_idle`` is
+configured to ignore the ACPI tables; see `below <intel-idle-parameters_>`_.]
+
+Next, the first (index 0) entry in the list of available idle states is
+initialized to represent a "polling idle state" (a pseudo-idle state in which
+the target CPU continuously fetches and executes instructions), and the
+subsequent (real) idle state entries are populated as follows.
+
+If the processor model at hand is recognized by ``intel_idle``, there is a
+(static) table of idle state descriptions for it in the driver.  In that case,
+the "internal" table is the primary source of information on idle states and the
+information from it is copied to the final list of available idle states.  If
+using the ACPI tables for the enumeration of idle states is not required
+(depending on the processor model), all of the listed idle state are enabled by
+default (so all of them will be taken into consideration by ``CPUIdle``
+governors during CPU idle state selection).  Otherwise, some of the listed idle
+states may not be enabled by default if there are no matching entries in the
+preliminary list of idle states coming from the ACPI tables.  In that case user
+space still can enable them later (on a per-CPU basis) with the help of
+the ``disable`` idle state attribute in ``sysfs`` (see
+:ref:`idle-states-representation` in :doc:`cpuidle`).  This basically means that
+the idle states "known" to the driver may not be enabled by default if they have
+not been exposed by the platform firmware (through the ACPI tables).
+
+If the given processor model is not recognized by ``intel_idle``, but it
+supports ``MWAIT``, the preliminary list of idle states coming from the ACPI
+tables is used for building the final list that will be supplied to the
+``CPUIdle`` core during driver registration.  For each idle state in that list,
+the description, ``MWAIT`` hint and exit latency are copied to the corresponding
+entry in the final list of idle states.  The name of the idle state represented
+by it (to be returned by the ``name`` idle state attribute in ``sysfs``) is
+"CX_ACPI", where X is the index of that idle state in the final list (note that
+the minimum value of X is 1, because 0 is reserved for the "polling" state), and
+its target residency is based on the exit latency value.  Specifically, for
+C1-type idle states the exit latency value is also used as the target residency
+(for compatibility with the majority of the "internal" tables of idle states for
+various processor models recognized by ``intel_idle``) and for the other idle
+state types (C2 and C3) the target residency value is 3 times the exit latency
+(again, that is because it reflects the target residency to exit latency ratio
+in the majority of cases for the processor models recognized by ``intel_idle``).
+All of the idle states in the final list are enabled by default in this case.
+
+
+.. _intel-idle-initialization:
+
+Initialization
+==============
+
+The initialization of ``intel_idle`` starts with checking if the kernel command
+line options forbid the use of the ``MWAIT`` instruction.  If that is the case,
+an error code is returned right away.
+
+The next step is to check whether or not the processor model is known to the
+driver, which determines the idle states enumeration method (see
+`above <intel-idle-enumeration-of-states_>`_), and whether or not the processor
+supports ``MWAIT`` (the initialization fails if that is not the case).  Then,
+the ``MWAIT`` support in the processor is enumerated through ``CPUID`` and the
+driver initialization fails if the level of support is not as expected (for
+example, if the total number of ``MWAIT`` substates returned is 0).
+
+Next, if the driver is not configured to ignore the ACPI tables (see
+`below <intel-idle-parameters_>`_), the idle states information provided by the
+platform firmware is extracted from them.
+
+Then, ``CPUIdle`` device objects are allocated for all CPUs and the list of
+available idle states is created as explained
+`above <intel-idle-enumeration-of-states_>`_.
+
+Finally, ``intel_idle`` is registered with the help of cpuidle_register_driver()
+as the ``CPUIdle`` driver for all CPUs in the system and a CPU online callback
+for configuring individual CPUs is registered via cpuhp_setup_state(), which
+(among other things) causes the callback routine to be invoked for all of the
+CPUs present in the system at that time (each CPU executes its own instance of
+the callback routine).  That routine registers a ``CPUIdle`` device for the CPU
+running it (which enables the ``CPUIdle`` subsystem to operate that CPU) and
+optionally performs some CPU-specific initialization actions that may be
+required for the given processor model.
+
+
+.. _intel-idle-parameters:
+
+Kernel Command Line Options and Module Parameters
+=================================================
+
+The *x86* architecture support code recognizes three kernel command line
+options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
+and ``idle=nomwait``.  If any of them is present in the kernel command line, the
+``MWAIT`` instruction is not allowed to be used, so the initialization of
+``intel_idle`` will fail.
+
+Apart from that there are two module parameters recognized by ``intel_idle``
+itself that can be set via the kernel command line (they cannot be updated via
+sysfs, so that is the only way to change their values).
+
+The ``max_cstate`` parameter value is the maximum idle state index in the list
+of idle states supplied to the ``CPUIdle`` core during the registration of the
+driver.  It is also the maximum number of regular (non-polling) idle states that
+can be used by ``intel_idle``, so the enumeration of idle states is terminated
+after finding that number of usable idle states (the other idle states that
+potentially might have been used if ``max_cstate`` had been greater are not
+taken into consideration at all).  Setting ``max_cstate`` can prevent
+``intel_idle`` from exposing idle states that are regarded as "too deep" for
+some reason to the ``CPUIdle`` core, but it does so by making them effectively
+invisible until the system is shut down and started again which may not always
+be desirable.  In practice, it is only really necessary to do that if the idle
+states in question cannot be enabled during system startup, because in the
+working state of the system the CPU power management quality of service (PM
+QoS) feature can be used to prevent ``CPUIdle`` from touching those idle states
+even if they have been enumerated (see :ref:`cpu-pm-qos` in :doc:`cpuidle`).
+Setting ``max_cstate`` to 0 causes the ``intel_idle`` initialization to fail.
+
+The ``noacpi`` module parameter (which is recognized by ``intel_idle`` if the
+kernel has been configured with ACPI support), can be set to make the driver
+ignore the system's ACPI tables entirely (it is unset by default).
+
+
+.. _intel-idle-core-and-package-idle-states:
+
+Core and Package Levels of Idle States
+======================================
+
+Typically, in a processor supporting the ``MWAIT`` instruction there are (at
+least) two levels of idle states (or C-states).  One level, referred to as
+"core C-states", covers individual cores in the processor, whereas the other
+level, referred to as "package C-states", covers the entire processor package
+and it may also involve other components of the system (GPUs, memory
+controllers, I/O hubs etc.).
+
+Some of the ``MWAIT`` hint values allow the processor to use core C-states only
+(most importantly, that is the case for the ``MWAIT`` hint value corresponding
+to the ``C1`` idle state), but the majority of them give it a license to put
+the target core (i.e. the core containing the logical CPU executing ``MWAIT``
+with the given hint value) into a specific core C-state and then (if possible)
+to enter a specific package C-state at the deeper level.  For example, the
+``MWAIT`` hint value representing the ``C3`` idle state allows the processor to
+put the target core into the low-power state referred to as "core ``C3``" (or
+``CC3``), which happens if all of the logical CPUs (SMT siblings) in that core
+have executed ``MWAIT`` with the ``C3`` hint value (or with a hint value
+representing a deeper idle state), and in addition to that (in the majority of
+cases) it gives the processor a license to put the entire package (possibly
+including some non-CPU components such as a GPU or a memory controller) into the
+low-power state referred to as "package ``C3``" (or ``PC3``), which happens if
+all of the cores have gone into the ``CC3`` state and (possibly) some additional
+conditions are satisfied (for instance, if the GPU is covered by ``PC3``, it may
+be required to be in a certain GPU-specific low-power state for ``PC3`` to be
+reachable).
+
+As a rule, there is no simple way to make the processor use core C-states only
+if the conditions for entering the corresponding package C-states are met, so
+the logical CPU executing ``MWAIT`` with a hint value that is not core-level
+only (like for ``C1``) must always assume that this may cause the processor to
+enter a package C-state.  [That is why the exit latency and target residency
+values corresponding to the majority of ``MWAIT`` hint values in the "internal"
+tables of idle states in ``intel_idle`` reflect the properties of package
+C-states.]  If using package C-states is not desirable at all, either
+:ref:`PM QoS <cpu-pm-qos>` or the ``max_cstate`` module parameter of
+``intel_idle`` described `above <intel-idle-parameters_>`_ must be used to
+restrict the range of permissible idle states to the ones with core-level only
+``MWAIT`` hint values (like ``C1``).
+
+
+References
+==========
+
+.. [1] *Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 2B*,
+       https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-2b-manual.html
+
+.. [2] *Advanced Configuration and Power Interface (ACPI) Specification*,
+       https://uefi.org/specifications
diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst
index fc298eb1234b..88f717e59a42 100644
--- a/Documentation/admin-guide/pm/working-state.rst
+++ b/Documentation/admin-guide/pm/working-state.rst
@@ -8,6 +8,7 @@ Working-State Power Management
    :maxdepth: 2
 
    cpuidle
+   intel_idle
    cpufreq
    intel_pstate
    intel_epb
diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst
index 2b20f5f7380d..0310db624964 100644
--- a/Documentation/admin-guide/ras.rst
+++ b/Documentation/admin-guide/ras.rst
@@ -330,9 +330,12 @@ There can be multiple csrows and multiple channels.
 
 .. [#f4] Nowadays, the term DIMM (Dual In-line Memory Module) is widely
   used to refer to a memory module, although there are other memory
-  packaging alternatives, like SO-DIMM, SIMM, etc. Along this document,
-  and inside the EDAC system, the term "dimm" is used for all memory
-  modules, even when they use a different kind of packaging.
+  packaging alternatives, like SO-DIMM, SIMM, etc. The UEFI
+  specification (Version 2.7) defines a memory module in the Common
+  Platform Error Record (CPER) section to be an SMBIOS Memory Device
+  (Type 17). Along this document, and inside the EDAC subsystem, the term
+  "dimm" is used for all memory modules, even when they use a
+  different kind of packaging.
 
 Memory controllers allow for several csrows, with 8 csrows being a
 typical value. Yet, the actual number of csrows depends on the layout of
@@ -349,12 +352,14 @@ controllers. The following example will assume 2 channels:
 	|            |  ``ch0``  |  ``ch1``  |
 	+============+===========+===========+
 	| ``csrow0`` |  DIMM_A0  |  DIMM_B0  |
-	+------------+           |           |
-	| ``csrow1`` |           |           |
+	|            |   rank0   |   rank0   |
+	+------------+     -     |     -     |
+	| ``csrow1`` |   rank1   |   rank1   |
 	+------------+-----------+-----------+
 	| ``csrow2`` |  DIMM_A1  | DIMM_B1   |
-	+------------+           |           |
-	| ``csrow3`` |           |           |
+	|            |   rank0   |   rank0   |
+	+------------+     -     |     -     |
+	| ``csrow3`` |   rank1   |   rank1   |
 	+------------+-----------+-----------+
 
 In the above example, there are 4 physical slots on the motherboard
@@ -374,11 +379,13 @@ which the memory DIMM is placed. Thus, when 1 DIMM is placed in each
 Channel, the csrows cross both DIMMs.
 
 Memory DIMMs come single or dual "ranked". A rank is a populated csrow.
-Thus, 2 single ranked DIMMs, placed in slots DIMM_A0 and DIMM_B0 above
-will have just one csrow (csrow0). csrow1 will be empty. On the other
-hand, when 2 dual ranked DIMMs are similarly placed, then both csrow0
-and csrow1 will be populated. The pattern repeats itself for csrow2 and
-csrow3.
+In the example above 2 dual ranked DIMMs are similarly placed. Thus,
+both csrow0 and csrow1 are populated. On the other hand, when 2 single
+ranked DIMMs are placed in slots DIMM_A0 and DIMM_B0, then they will
+have just one csrow (csrow0) and csrow1 will be empty. The pattern
+repeats itself for csrow2 and csrow3. Also note that some memory
+controllers don't have any logic to identify the memory module, see
+``rankX`` directories below.
 
 The representation of the above is reflected in the directory
 tree in EDAC's sysfs interface. Starting in directory
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 032c7cd3cede..def074807cee 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -831,8 +831,8 @@ printk_ratelimit:
 =================
 
 Some warning messages are rate limited. printk_ratelimit specifies
-the minimum length of time between these messages (in jiffies), by
-default we allow one every 5 seconds.
+the minimum length of time between these messages (in seconds).
+The default value is 5 seconds.
 
 A value of 0 will disable rate limiting.
 
@@ -845,6 +845,8 @@ seconds, we do allow a burst of messages to pass through.
 printk_ratelimit_burst specifies the number of messages we can
 send before ratelimiting kicks in.
 
+The default value is 10 messages.
+
 
 printk_devkmsg:
 ===============
@@ -1101,7 +1103,7 @@ During initialization the kernel sets this value such that even if the
 maximum number of threads is created, the thread structures occupy only
 a part (1/8th) of the available RAM pages.
 
-The minimum value that can be written to threads-max is 20.
+The minimum value that can be written to threads-max is 1.
 
 The maximum value that can be written to threads-max is given by the
 constant FUTEX_TID_MASK (0x3fffffff).
@@ -1109,10 +1111,6 @@ constant FUTEX_TID_MASK (0x3fffffff).
 If a value outside of this range is written to threads-max an error
 EINVAL occurs.
 
-The value written is checked against the available RAM pages. If the
-thread structures would occupy too much (more than 1/8th) of the
-available RAM pages threads-max is reduced accordingly.
-
 
 unknown_nmi_panic:
 ==================
diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst
index 898ad78f3cc7..10c4f0ce2ad0 100644
--- a/Documentation/admin-guide/thunderbolt.rst
+++ b/Documentation/admin-guide/thunderbolt.rst
@@ -1,6 +1,28 @@
-=============
- Thunderbolt
-=============
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+ USB4 and Thunderbolt
+======================
+USB4 is the public specification based on Thunderbolt 3 protocol with
+some differences at the register level among other things. Connection
+manager is an entity running on the host router (host controller)
+responsible for enumerating routers and establishing tunnels. A
+connection manager can be implemented either in firmware or software.
+Typically PCs come with a firmware connection manager for Thunderbolt 3
+and early USB4 capable systems. Apple systems on the other hand use
+software connection manager and the later USB4 compliant devices follow
+the suit.
+
+The Linux Thunderbolt driver supports both and can detect at runtime which
+connection manager implementation is to be used. To be on the safe side the
+software connection manager in Linux also advertises security level
+``user`` which means PCIe tunneling is disabled by default. The
+documentation below applies to both implementations with the exception that
+the software connection manager only supports ``user`` security level and
+is expected to be accompanied with an IOMMU based DMA protection.
+
+Security levels and how to use them
+-----------------------------------
 The interface presented here is not meant for end users. Instead there
 should be a userspace tool that handles all the low-level details, keeps
 a database of the authorized devices and prompts users for new connections.
@@ -18,8 +40,6 @@ This will authorize all devices automatically when they appear. However,
 keep in mind that this bypasses the security levels and makes the system
 vulnerable to DMA attacks.
 
-Security levels and how to use them
------------------------------------
 Starting with Intel Falcon Ridge Thunderbolt controller there are 4
 security levels available. Intel Titan Ridge added one more security level
 (usbonly). The reason for these is the fact that the connected devices can
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
index fb5b39f73059..ad911be5b5e9 100644
--- a/Documentation/admin-guide/xfs.rst
+++ b/Documentation/admin-guide/xfs.rst
@@ -253,7 +253,7 @@ The following sysctls are available for the XFS filesystem:
 	pool.
 
   fs.xfs.speculative_prealloc_lifetime
-		(Units: seconds   Min: 1  Default: 300  Max: 86400)
+	(Units: seconds   Min: 1  Default: 300  Max: 86400)
 	The interval at which the background scanning for inodes
 	with unused speculative preallocation runs. The scan
 	removes unused preallocation from clean inodes and releases
diff --git a/Documentation/arm/microchip.rst b/Documentation/arm/microchip.rst
index c9a44c98e868..1adf53dfc494 100644
--- a/Documentation/arm/microchip.rst
+++ b/Documentation/arm/microchip.rst
@@ -103,7 +103,7 @@ the Microchip website: http://www.microchip.com.
 
           * Datasheet
 
-          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11121-32-bit-Cortex-A5-Microcontroller-SAMA5D3_Datasheet.pdf
+          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11121-32-bit-Cortex-A5-Microcontroller-SAMA5D3_Datasheet_B.pdf
 
     * ARM Cortex-A5 + NEON based SoCs
       - sama5d4 family
@@ -167,7 +167,7 @@ the Microchip website: http://www.microchip.com.
 
           * Datasheet
 
-          http://ww1.microchip.com/downloads/en/DeviceDoc/60001527A.pdf
+          http://ww1.microchip.com/downloads/en/DeviceDoc/SAM-E70-S70-V70-V71-Family-Data-Sheet-DS60001527D.pdf
 
 
 Linux kernel information
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index d3f3a60fbf25..5d78a6f5b0ae 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -213,6 +213,9 @@ Before jumping into the kernel, the following conditions must be met:
 
       - ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
       - ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
+      - ICC_CTLR_EL3.PMHE (bit 6) must be set to the same value across
+        all CPUs the kernel is executing on, and must stay constant
+        for the lifetime of the kernel.
 
   - If the kernel is entered at EL1:
 
diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst
index 2955287e9acc..41937a8091aa 100644
--- a/Documentation/arm64/cpu-feature-registers.rst
+++ b/Documentation/arm64/cpu-feature-registers.rst
@@ -117,6 +117,8 @@ infrastructure:
      +------------------------------+---------+---------+
      | Name                         |  bits   | visible |
      +------------------------------+---------+---------+
+     | RNDR                         | [63-60] |    y    |
+     +------------------------------+---------+---------+
      | TS                           | [55-52] |    y    |
      +------------------------------+---------+---------+
      | FHM                          | [51-48] |    y    |
@@ -168,8 +170,15 @@ infrastructure:
      +------------------------------+---------+---------+
 
 
-  3) MIDR_EL1 - Main ID Register
+  3) ID_AA64PFR1_EL1 - Processor Feature Register 1
+     +------------------------------+---------+---------+
+     | Name                         |  bits   | visible |
+     +------------------------------+---------+---------+
+     | SSBS                         | [7-4]   |    y    |
+     +------------------------------+---------+---------+
+
 
+  4) MIDR_EL1 - Main ID Register
      +------------------------------+---------+---------+
      | Name                         |  bits   | visible |
      +------------------------------+---------+---------+
@@ -188,11 +197,21 @@ infrastructure:
    as available on the CPU where it is fetched and is not a system
    wide safe value.
 
-  4) ID_AA64ISAR1_EL1 - Instruction set attribute register 1
+  5) ID_AA64ISAR1_EL1 - Instruction set attribute register 1
 
      +------------------------------+---------+---------+
      | Name                         |  bits   | visible |
      +------------------------------+---------+---------+
+     | I8MM                         | [55-52] |    y    |
+     +------------------------------+---------+---------+
+     | DGH                          | [51-48] |    y    |
+     +------------------------------+---------+---------+
+     | BF16                         | [47-44] |    y    |
+     +------------------------------+---------+---------+
+     | SB                           | [39-36] |    y    |
+     +------------------------------+---------+---------+
+     | FRINTTS                      | [35-32] |    y    |
+     +------------------------------+---------+---------+
      | GPI                          | [31-28] |    y    |
      +------------------------------+---------+---------+
      | GPA                          | [27-24] |    y    |
@@ -210,7 +229,7 @@ infrastructure:
      | DPB                          | [3-0]   |    y    |
      +------------------------------+---------+---------+
 
-  5) ID_AA64MMFR2_EL1 - Memory model feature register 2
+  6) ID_AA64MMFR2_EL1 - Memory model feature register 2
 
      +------------------------------+---------+---------+
      | Name                         |  bits   | visible |
@@ -218,15 +237,23 @@ infrastructure:
      | AT                           | [35-32] |    y    |
      +------------------------------+---------+---------+
 
-  6) ID_AA64ZFR0_EL1 - SVE feature ID register 0
+  7) ID_AA64ZFR0_EL1 - SVE feature ID register 0
 
      +------------------------------+---------+---------+
      | Name                         |  bits   | visible |
      +------------------------------+---------+---------+
+     | F64MM                        | [59-56] |    y    |
+     +------------------------------+---------+---------+
+     | F32MM                        | [55-52] |    y    |
+     +------------------------------+---------+---------+
+     | I8MM                         | [47-44] |    y    |
+     +------------------------------+---------+---------+
      | SM4                          | [43-40] |    y    |
      +------------------------------+---------+---------+
      | SHA3                         | [35-32] |    y    |
      +------------------------------+---------+---------+
+     | BF16                         | [23-20] |    y    |
+     +------------------------------+---------+---------+
      | BitPerm                      | [19-16] |    y    |
      +------------------------------+---------+---------+
      | AES                          | [7-4]   |    y    |
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index 91f79529c58c..7dfb97dfe416 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -119,10 +119,6 @@ HWCAP_LRCPC
 HWCAP_DCPOP
     Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001.
 
-HWCAP2_DCPODP
-
-    Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
-
 HWCAP_SHA3
     Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001.
 
@@ -141,6 +137,41 @@ HWCAP_SHA512
 HWCAP_SVE
     Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
 
+HWCAP_ASIMDFHM
+   Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
+
+HWCAP_DIT
+    Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001.
+
+HWCAP_USCAT
+    Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001.
+
+HWCAP_ILRCPC
+    Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010.
+
+HWCAP_FLAGM
+    Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
+
+HWCAP_SSBS
+    Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
+
+HWCAP_SB
+    Functionality implied by ID_AA64ISAR1_EL1.SB == 0b0001.
+
+HWCAP_PACA
+    Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or
+    ID_AA64ISAR1_EL1.API == 0b0001, as described by
+    Documentation/arm64/pointer-authentication.rst.
+
+HWCAP_PACG
+    Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or
+    ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
+    Documentation/arm64/pointer-authentication.rst.
+
+HWCAP2_DCPODP
+
+    Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
+
 HWCAP2_SVE2
 
     Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001.
@@ -165,42 +196,45 @@ HWCAP2_SVESM4
 
     Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
 
-HWCAP_ASIMDFHM
-   Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
+HWCAP2_FLAGM2
 
-HWCAP_DIT
-    Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001.
+    Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
 
-HWCAP_USCAT
-    Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001.
+HWCAP2_FRINT
 
-HWCAP_ILRCPC
-    Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010.
+    Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
 
-HWCAP_FLAGM
-    Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
+HWCAP2_SVEI8MM
 
-HWCAP2_FLAGM2
+    Functionality implied by ID_AA64ZFR0_EL1.I8MM == 0b0001.
 
-    Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
+HWCAP2_SVEF32MM
 
-HWCAP_SSBS
-    Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
+    Functionality implied by ID_AA64ZFR0_EL1.F32MM == 0b0001.
 
-HWCAP_PACA
-    Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or
-    ID_AA64ISAR1_EL1.API == 0b0001, as described by
-    Documentation/arm64/pointer-authentication.rst.
+HWCAP2_SVEF64MM
 
-HWCAP_PACG
-    Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or
-    ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
-    Documentation/arm64/pointer-authentication.rst.
+    Functionality implied by ID_AA64ZFR0_EL1.F64MM == 0b0001.
 
-HWCAP2_FRINT
+HWCAP2_SVEBF16
 
-    Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
+    Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0001.
+
+HWCAP2_I8MM
+
+    Functionality implied by ID_AA64ISAR1_EL1.I8MM == 0b0001.
+
+HWCAP2_BF16
+
+    Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0001.
+
+HWCAP2_DGH
+
+    Functionality implied by ID_AA64ISAR1_EL1.DGH == 0b0001.
+
+HWCAP2_RNG
 
+    Functionality implied by ID_AA64ISAR0_EL1.RNDR == 0b0001.
 
 4. Unused AT_HWCAP bits
 -----------------------
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 5a09661330fc..9120e59578dc 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -70,8 +70,12 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220        |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A57      | #1319537        | ARM64_ERRATUM_1319367       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A72      | #853709         | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A72      | #1319367        | ARM64_ERRATUM_1319367       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A55      | #1024718        | ARM64_ERRATUM_1024718       |
@@ -84,10 +88,14 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A76      | #1463225        | ARM64_ERRATUM_1463225       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A55      | #1530923        | ARM64_ERRATUM_1530923       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Neoverse-N1     | #1188873,1418040| ARM64_ERRATUM_1418040       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Neoverse-N1     | #1349291        | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Neoverse-N1     | #1542419        | ARM64_ERRATUM_1542419       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | MMU-500         | #841119,826419  | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst
new file mode 100644
index 000000000000..32ea57483378
--- /dev/null
+++ b/Documentation/asm-annotations.rst
@@ -0,0 +1,217 @@
+Assembler Annotations
+=====================
+
+Copyright (c) 2017-2019 Jiri Slaby
+
+This document describes the new macros for annotation of data and code in
+assembly. In particular, it contains information about ``SYM_FUNC_START``,
+``SYM_FUNC_END``, ``SYM_CODE_START``, and similar.
+
+Rationale
+---------
+Some code like entries, trampolines, or boot code needs to be written in
+assembly. The same as in C, such code is grouped into functions and
+accompanied with data. Standard assemblers do not force users into precisely
+marking these pieces as code, data, or even specifying their length.
+Nevertheless, assemblers provide developers with such annotations to aid
+debuggers throughout assembly. On top of that, developers also want to mark
+some functions as *global* in order to be visible outside of their translation
+units.
+
+Over time, the Linux kernel has adopted macros from various projects (like
+``binutils``) to facilitate such annotations. So for historic reasons,
+developers have been using ``ENTRY``, ``END``, ``ENDPROC``, and other
+annotations in assembly.  Due to the lack of their documentation, the macros
+are used in rather wrong contexts at some locations. Clearly, ``ENTRY`` was
+intended to denote the beginning of global symbols (be it data or code).
+``END`` used to mark the end of data or end of special functions with
+*non-standard* calling convention. In contrast, ``ENDPROC`` should annotate
+only ends of *standard* functions.
+
+When these macros are used correctly, they help assemblers generate a nice
+object with both sizes and types set correctly. For example, the result of
+``arch/x86/lib/putuser.S``::
+
+   Num:    Value          Size Type    Bind   Vis      Ndx Name
+    25: 0000000000000000    33 FUNC    GLOBAL DEFAULT    1 __put_user_1
+    29: 0000000000000030    37 FUNC    GLOBAL DEFAULT    1 __put_user_2
+    32: 0000000000000060    36 FUNC    GLOBAL DEFAULT    1 __put_user_4
+    35: 0000000000000090    37 FUNC    GLOBAL DEFAULT    1 __put_user_8
+
+This is not only important for debugging purposes. When there are properly
+annotated objects like this, tools can be run on them to generate more useful
+information. In particular, on properly annotated objects, ``objtool`` can be
+run to check and fix the object if needed. Currently, ``objtool`` can report
+missing frame pointer setup/destruction in functions. It can also
+automatically generate annotations for :doc:`ORC unwinder <x86/orc-unwinder>`
+for most code. Both of these are especially important to support reliable
+stack traces which are in turn necessary for :doc:`Kernel live patching
+<livepatch/livepatch>`.
+
+Caveat and Discussion
+---------------------
+As one might realize, there were only three macros previously. That is indeed
+insufficient to cover all the combinations of cases:
+
+* standard/non-standard function
+* code/data
+* global/local symbol
+
+There was a discussion_ and instead of extending the current ``ENTRY/END*``
+macros, it was decided that brand new macros should be introduced instead::
+
+    So how about using macro names that actually show the purpose, instead
+    of importing all the crappy, historic, essentially randomly chosen
+    debug symbol macro names from the binutils and older kernels?
+
+.. _discussion: https://lkml.kernel.org/r/20170217104757.28588-1-jslaby@suse.cz
+
+Macros Description
+------------------
+
+The new macros are prefixed with the ``SYM_`` prefix and can be divided into
+three main groups:
+
+1. ``SYM_FUNC_*`` -- to annotate C-like functions. This means functions with
+   standard C calling conventions. For example, on x86, this means that the
+   stack contains a return address at the predefined place and a return from
+   the function can happen in a standard way. When frame pointers are enabled,
+   save/restore of frame pointer shall happen at the start/end of a function,
+   respectively, too.
+
+   Checking tools like ``objtool`` should ensure such marked functions conform
+   to these rules. The tools can also easily annotate these functions with
+   debugging information (like *ORC data*) automatically.
+
+2. ``SYM_CODE_*`` -- special functions called with special stack. Be it
+   interrupt handlers with special stack content, trampolines, or startup
+   functions.
+
+   Checking tools mostly ignore checking of these functions. But some debug
+   information still can be generated automatically. For correct debug data,
+   this code needs hints like ``UNWIND_HINT_REGS`` provided by developers.
+
+3. ``SYM_DATA*`` -- obviously data belonging to ``.data`` sections and not to
+   ``.text``. Data do not contain instructions, so they have to be treated
+   specially by the tools: they should not treat the bytes as instructions,
+   nor assign any debug information to them.
+
+Instruction Macros
+~~~~~~~~~~~~~~~~~~
+This section covers ``SYM_FUNC_*`` and ``SYM_CODE_*`` enumerated above.
+
+* ``SYM_FUNC_START`` and ``SYM_FUNC_START_LOCAL`` are supposed to be **the
+  most frequent markings**. They are used for functions with standard calling
+  conventions -- global and local. Like in C, they both align the functions to
+  architecture specific ``__ALIGN`` bytes. There are also ``_NOALIGN`` variants
+  for special cases where developers do not want this implicit alignment.
+
+  ``SYM_FUNC_START_WEAK`` and ``SYM_FUNC_START_WEAK_NOALIGN`` markings are
+  also offered as an assembler counterpart to the *weak* attribute known from
+  C.
+
+  All of these **shall** be coupled with ``SYM_FUNC_END``. First, it marks
+  the sequence of instructions as a function and computes its size to the
+  generated object file. Second, it also eases checking and processing such
+  object files as the tools can trivially find exact function boundaries.
+
+  So in most cases, developers should write something like in the following
+  example, having some asm instructions in between the macros, of course::
+
+    SYM_FUNC_START(memset)
+        ... asm insns ...
+    SYM_FUNC_END(memset)
+
+  In fact, this kind of annotation corresponds to the now deprecated ``ENTRY``
+  and ``ENDPROC`` macros.
+
+* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those
+  who decided to have two or more names for one function. The typical use is::
+
+    SYM_FUNC_START_ALIAS(__memset)
+    SYM_FUNC_START(memset)
+        ... asm insns ...
+    SYM_FUNC_END(memset)
+    SYM_FUNC_END_ALIAS(__memset)
+
+  In this example, one can call ``__memset`` or ``memset`` with the same
+  result, except the debug information for the instructions is generated to
+  the object file only once -- for the non-``ALIAS`` case.
+
+* ``SYM_CODE_START`` and ``SYM_CODE_START_LOCAL`` should be used only in
+  special cases -- if you know what you are doing. This is used exclusively
+  for interrupt handlers and similar where the calling convention is not the C
+  one. ``_NOALIGN`` variants exist too. The use is the same as for the ``FUNC``
+  category above::
+
+    SYM_CODE_START_LOCAL(bad_put_user)
+        ... asm insns ...
+    SYM_CODE_END(bad_put_user)
+
+  Again, every ``SYM_CODE_START*`` **shall** be coupled by ``SYM_CODE_END``.
+
+  To some extent, this category corresponds to deprecated ``ENTRY`` and
+  ``END``. Except ``END`` had several other meanings too.
+
+* ``SYM_INNER_LABEL*`` is used to denote a label inside some
+  ``SYM_{CODE,FUNC}_START`` and ``SYM_{CODE,FUNC}_END``.  They are very similar
+  to C labels, except they can be made global. An example of use::
+
+    SYM_CODE_START(ftrace_caller)
+        /* save_mcount_regs fills in first two parameters */
+        ...
+
+    SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL)
+        /* Load the ftrace_ops into the 3rd parameter */
+        ...
+
+    SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
+        call ftrace_stub
+        ...
+        retq
+    SYM_CODE_END(ftrace_caller)
+
+Data Macros
+~~~~~~~~~~~
+Similar to instructions, there is a couple of macros to describe data in the
+assembly.
+
+* ``SYM_DATA_START`` and ``SYM_DATA_START_LOCAL`` mark the start of some data
+  and shall be used in conjunction with either ``SYM_DATA_END``, or
+  ``SYM_DATA_END_LABEL``. The latter adds also a label to the end, so that
+  people can use ``lstack`` and (local) ``lstack_end`` in the following
+  example::
+
+    SYM_DATA_START_LOCAL(lstack)
+        .skip 4096
+    SYM_DATA_END_LABEL(lstack, SYM_L_LOCAL, lstack_end)
+
+* ``SYM_DATA`` and ``SYM_DATA_LOCAL`` are variants for simple, mostly one-line
+  data::
+
+    SYM_DATA(HEAP,     .long rm_heap)
+    SYM_DATA(heap_end, .long rm_stack)
+
+  In the end, they expand to ``SYM_DATA_START`` with ``SYM_DATA_END``
+  internally.
+
+Support Macros
+~~~~~~~~~~~~~~
+All the above reduce themselves to some invocation of ``SYM_START``,
+``SYM_END``, or ``SYM_ENTRY`` at last. Normally, developers should avoid using
+these.
+
+Further, in the above examples, one could see ``SYM_L_LOCAL``. There are also
+``SYM_L_GLOBAL`` and ``SYM_L_WEAK``. All are intended to denote linkage of a
+symbol marked by them. They are used either in ``_LABEL`` variants of the
+earlier macros, or in ``SYM_START``.
+
+
+Overriding Macros
+~~~~~~~~~~~~~~~~~
+Architecture can also override any of the macros in their own
+``asm/linkage.h``, including macros specifying the type of a symbol
+(``SYM_T_FUNC``, ``SYM_T_OBJECT``, and ``SYM_T_NONE``).  As every macro
+described in this file is surrounded by ``#ifdef`` + ``#endif``, it is enough
+to define the macros differently in the aforementioned architecture-dependent
+header.
diff --git a/Documentation/block/biovecs.rst b/Documentation/block/biovecs.rst
index 86fa66c87172..ad303a2569d3 100644
--- a/Documentation/block/biovecs.rst
+++ b/Documentation/block/biovecs.rst
@@ -47,7 +47,7 @@ Having a real iterator, and making biovecs immutable, has a number of
 advantages:
 
  * Before, iterating over bios was very awkward when you weren't processing
-   exactly one bvec at a time - for example, bio_copy_data() in fs/bio.c,
+   exactly one bvec at a time - for example, bio_copy_data() in block/bio.c,
    which copies the contents of one bio into another. Because the biovecs
    wouldn't necessarily be the same size, the old code was tricky convoluted -
    it had to walk two different bios at the same time, keeping both bi_idx and
diff --git a/Documentation/block/stat.rst b/Documentation/block/stat.rst
index 9c07bc22b0bc..77311335c08b 100644
--- a/Documentation/block/stat.rst
+++ b/Documentation/block/stat.rst
@@ -41,6 +41,8 @@ discard I/Os    requests      number of discard I/Os processed
 discard merges  requests      number of discard I/Os merged with in-queue I/O
 discard sectors sectors       number of sectors discarded
 discard ticks   milliseconds  total wait time for discard requests
+flush I/Os      requests      number of flush I/Os processed
+flush ticks     milliseconds  total wait time for flush requests
 =============== ============= =================================================
 
 read I/Os, write I/Os, discard I/0s
@@ -48,6 +50,14 @@ read I/Os, write I/Os, discard I/0s
 
 These values increment when an I/O request completes.
 
+flush I/Os
+==========
+
+These values increment when an flush I/O request completes.
+
+Block layer combines flush requests and executes at most one at a time.
+This counts flush requests executed by disk. Not tracked for partitions.
+
 read merges, write merges, discard merges
 =========================================
 
@@ -62,8 +72,8 @@ discarded from this block device.  The "sectors" in question are the
 standard UNIX 512-byte sectors, not any device- or filesystem-specific
 block size.  The counters are incremented when the I/O completes.
 
-read ticks, write ticks, discard ticks
-======================================
+read ticks, write ticks, discard ticks, flush ticks
+===================================================
 
 These values count the number of milliseconds that I/O requests have
 waited on this block device.  If there are multiple I/O requests waiting,
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 801a6ed3f2e5..4f5410b61441 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -47,6 +47,15 @@ Program types
    prog_flow_dissector
 
 
+Testing BPF
+===========
+
+.. toctree::
+   :maxdepth: 1
+
+   s390
+
+
 .. Links:
 .. _Documentation/networking/filter.txt: ../networking/filter.txt
 .. _man-pages: https://www.kernel.org/doc/man-pages/
diff --git a/Documentation/bpf/prog_flow_dissector.rst b/Documentation/bpf/prog_flow_dissector.rst
index a78bf036cadd..4d86780ab0f1 100644
--- a/Documentation/bpf/prog_flow_dissector.rst
+++ b/Documentation/bpf/prog_flow_dissector.rst
@@ -142,3 +142,6 @@ BPF flow dissector doesn't support exporting all the metadata that in-kernel
 C-based implementation can export. Notable example is single VLAN (802.1Q)
 and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys``
 for a set of information that's currently can be exported from the BPF context.
+
+When BPF flow dissector is attached to the root network namespace (machine-wide
+policy), users can't override it in their child network namespaces.
diff --git a/Documentation/bpf/s390.rst b/Documentation/bpf/s390.rst
new file mode 100644
index 000000000000..21ecb309daea
--- /dev/null
+++ b/Documentation/bpf/s390.rst
@@ -0,0 +1,205 @@
+===================
+Testing BPF on s390
+===================
+
+1. Introduction
+***************
+
+IBM Z are mainframe computers, which are descendants of IBM System/360 from
+year 1964. They are supported by the Linux kernel under the name "s390". This
+document describes how to test BPF in an s390 QEMU guest.
+
+2. One-time setup
+*****************
+
+The following is required to build and run the test suite:
+
+  * s390 GCC
+  * s390 development headers and libraries
+  * Clang with BPF support
+  * QEMU with s390 support
+  * Disk image with s390 rootfs
+
+Debian supports installing compiler and libraries for s390 out of the box.
+Users of other distros may use debootstrap in order to set up a Debian chroot::
+
+  sudo debootstrap \
+    --variant=minbase \
+    --include=sudo \
+    testing \
+    ./s390-toolchain
+  sudo mount --rbind /dev ./s390-toolchain/dev
+  sudo mount --rbind /proc ./s390-toolchain/proc
+  sudo mount --rbind /sys ./s390-toolchain/sys
+  sudo chroot ./s390-toolchain
+
+Once on Debian, the build prerequisites can be installed as follows::
+
+  sudo dpkg --add-architecture s390x
+  sudo apt-get update
+  sudo apt-get install \
+    bc \
+    bison \
+    cmake \
+    debootstrap \
+    dwarves \
+    flex \
+    g++ \
+    gcc \
+    g++-s390x-linux-gnu \
+    gcc-s390x-linux-gnu \
+    gdb-multiarch \
+    git \
+    make \
+    python3 \
+    qemu-system-misc \
+    qemu-utils \
+    rsync \
+    libcap-dev:s390x \
+    libelf-dev:s390x \
+    libncurses-dev
+
+Latest Clang targeting BPF can be installed as follows::
+
+  git clone https://github.com/llvm/llvm-project.git
+  ln -s ../../clang llvm-project/llvm/tools/
+  mkdir llvm-project-build
+  cd llvm-project-build
+  cmake \
+    -DLLVM_TARGETS_TO_BUILD=BPF \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_INSTALL_PREFIX=/opt/clang-bpf \
+    ../llvm-project/llvm
+  make
+  sudo make install
+  export PATH=/opt/clang-bpf/bin:$PATH
+
+The disk image can be prepared using a loopback mount and debootstrap::
+
+  qemu-img create -f raw ./s390.img 1G
+  sudo losetup -f ./s390.img
+  sudo mkfs.ext4 /dev/loopX
+  mkdir ./s390.rootfs
+  sudo mount /dev/loopX ./s390.rootfs
+  sudo debootstrap \
+    --foreign \
+    --arch=s390x \
+    --variant=minbase \
+    --include=" \
+      iproute2, \
+      iputils-ping, \
+      isc-dhcp-client, \
+      kmod, \
+      libcap2, \
+      libelf1, \
+      netcat, \
+      procps" \
+    testing \
+    ./s390.rootfs
+  sudo umount ./s390.rootfs
+  sudo losetup -d /dev/loopX
+
+3. Compilation
+**************
+
+In addition to the usual Kconfig options required to run the BPF test suite, it
+is also helpful to select::
+
+  CONFIG_NET_9P=y
+  CONFIG_9P_FS=y
+  CONFIG_NET_9P_VIRTIO=y
+  CONFIG_VIRTIO_PCI=y
+
+as that would enable a very easy way to share files with the s390 virtual
+machine.
+
+Compiling kernel, modules and testsuite, as well as preparing gdb scripts to
+simplify debugging, can be done using the following commands::
+
+  make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- menuconfig
+  make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- bzImage modules scripts_gdb
+  make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- \
+    -C tools/testing/selftests \
+    TARGETS=bpf \
+    INSTALL_PATH=$PWD/tools/testing/selftests/kselftest_install \
+    install
+
+4. Running the test suite
+*************************
+
+The virtual machine can be started as follows::
+
+  qemu-system-s390x \
+    -cpu max,zpci=on \
+    -smp 2 \
+    -m 4G \
+    -kernel linux/arch/s390/boot/compressed/vmlinux \
+    -drive file=./s390.img,if=virtio,format=raw \
+    -nographic \
+    -append 'root=/dev/vda rw console=ttyS1' \
+    -virtfs local,path=./linux,security_model=none,mount_tag=linux \
+    -object rng-random,filename=/dev/urandom,id=rng0 \
+    -device virtio-rng-ccw,rng=rng0 \
+    -netdev user,id=net0 \
+    -device virtio-net-ccw,netdev=net0
+
+When using this on a real IBM Z, ``-enable-kvm`` may be added for better
+performance. When starting the virtual machine for the first time, disk image
+setup must be finalized using the following command::
+
+  /debootstrap/debootstrap --second-stage
+
+Directory with the code built on the host as well as ``/proc`` and ``/sys``
+need to be mounted as follows::
+
+  mkdir -p /linux
+  mount -t 9p linux /linux
+  mount -t proc proc /proc
+  mount -t sysfs sys /sys
+
+After that, the test suite can be run using the following commands::
+
+  cd /linux/tools/testing/selftests/kselftest_install
+  ./run_kselftest.sh
+
+As usual, tests can be also run individually::
+
+  cd /linux/tools/testing/selftests/bpf
+  ./test_verifier
+
+5. Debugging
+************
+
+It is possible to debug the s390 kernel using QEMU GDB stub, which is activated
+by passing ``-s`` to QEMU.
+
+It is preferable to turn KASLR off, so that gdb would know where to find the
+kernel image in memory, by building the kernel with::
+
+  RANDOMIZE_BASE=n
+
+GDB can then be attached using the following command::
+
+  gdb-multiarch -ex 'target remote localhost:1234' ./vmlinux
+
+6. Network
+**********
+
+In case one needs to use the network in the virtual machine in order to e.g.
+install additional packages, it can be configured using::
+
+  dhclient eth0
+
+7. Links
+********
+
+This document is a compilation of techniques, whose more comprehensive
+descriptions can be found by following these links:
+
+- `Debootstrap <https://wiki.debian.org/EmDebian/CrossDebootstrap>`_
+- `Multiarch <https://wiki.debian.org/Multiarch/HOWTO>`_
+- `Building LLVM <https://llvm.org/docs/CMake.html>`_
+- `Cross-compiling the kernel <https://wiki.gentoo.org/wiki/Embedded_Handbook/General/Cross-compiling_the_kernel>`_
+- `QEMU s390x Guest Support <https://wiki.qemu.org/Documentation/Platforms/S390X>`_
+- `Plan 9 folder sharing over Virtio <https://wiki.qemu.org/Documentation/9psetup>`_
+- `Using GDB with QEMU <https://wiki.osdev.org/Kernel_Debugging#Use_GDB_with_QEMU>`_
diff --git a/Documentation/conf.py b/Documentation/conf.py
index a8fe845832bc..3c7bdf4cd31f 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -37,7 +37,8 @@ needs_sphinx = '1.3'
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include', 'cdomain',
-              'kfigure', 'sphinx.ext.ifconfig', 'automarkup']
+              'kfigure', 'sphinx.ext.ifconfig', 'automarkup',
+              'maintainers_include']
 
 # The name of the math extension changed on Sphinx 1.4
 if (major == 1 and minor > 3) or (major > 1):
diff --git a/Documentation/core-api/genalloc.rst b/Documentation/core-api/genalloc.rst
index 6b38a39fab24..a5af2cbf58a5 100644
--- a/Documentation/core-api/genalloc.rst
+++ b/Documentation/core-api/genalloc.rst
@@ -23,7 +23,7 @@ begins with the creation of a pool using one of:
 .. kernel-doc:: lib/genalloc.c
    :functions: devm_gen_pool_create
 
-A call to :c:func:`gen_pool_create` will create a pool.  The granularity of
+A call to gen_pool_create() will create a pool.  The granularity of
 allocations is set with min_alloc_order; it is a log-base-2 number like
 those used by the page allocator, but it refers to bytes rather than pages.
 So, if min_alloc_order is passed as 3, then all allocations will be a
@@ -32,7 +32,7 @@ required to track the memory in the pool.  The nid parameter specifies
 which NUMA node should be used for the allocation of the housekeeping
 structures; it can be -1 if the caller doesn't care.
 
-The "managed" interface :c:func:`devm_gen_pool_create` ties the pool to a
+The "managed" interface devm_gen_pool_create() ties the pool to a
 specific device.  Among other things, it will automatically clean up the
 pool when the given device is destroyed.
 
@@ -53,32 +53,32 @@ to the pool.  That can be done with one of:
    :functions: gen_pool_add
 
 .. kernel-doc:: lib/genalloc.c
-   :functions: gen_pool_add_virt
+   :functions: gen_pool_add_owner
 
-A call to :c:func:`gen_pool_add` will place the size bytes of memory
+A call to gen_pool_add() will place the size bytes of memory
 starting at addr (in the kernel's virtual address space) into the given
 pool, once again using nid as the node ID for ancillary memory allocations.
-The :c:func:`gen_pool_add_virt` variant associates an explicit physical
+The gen_pool_add_virt() variant associates an explicit physical
 address with the memory; this is only necessary if the pool will be used
 for DMA allocations.
 
 The functions for allocating memory from the pool (and putting it back)
 are:
 
-.. kernel-doc:: lib/genalloc.c
+.. kernel-doc:: include/linux/genalloc.h
    :functions: gen_pool_alloc
 
 .. kernel-doc:: lib/genalloc.c
    :functions: gen_pool_dma_alloc
 
 .. kernel-doc:: lib/genalloc.c
-   :functions: gen_pool_free
+   :functions: gen_pool_free_owner
 
-As one would expect, :c:func:`gen_pool_alloc` will allocate size< bytes
-from the given pool.  The :c:func:`gen_pool_dma_alloc` variant allocates
+As one would expect, gen_pool_alloc() will allocate size< bytes
+from the given pool.  The gen_pool_dma_alloc() variant allocates
 memory for use with DMA operations, returning the associated physical
 address in the space pointed to by dma.  This will only work if the memory
-was added with :c:func:`gen_pool_add_virt`.  Note that this function
+was added with gen_pool_add_virt().  Note that this function
 departs from the usual genpool pattern of using unsigned long values to
 represent kernel addresses; it returns a void * instead.
 
@@ -89,14 +89,14 @@ return.  If that sort of control is needed, the following functions will be
 of interest:
 
 .. kernel-doc:: lib/genalloc.c
-   :functions: gen_pool_alloc_algo
+   :functions: gen_pool_alloc_algo_owner
 
 .. kernel-doc:: lib/genalloc.c
    :functions: gen_pool_set_algo
 
-Allocations with :c:func:`gen_pool_alloc_algo` specify an algorithm to be
+Allocations with gen_pool_alloc_algo() specify an algorithm to be
 used to choose the memory to be allocated; the default algorithm can be set
-with :c:func:`gen_pool_set_algo`.  The data value is passed to the
+with gen_pool_set_algo().  The data value is passed to the
 algorithm; most ignore it, but it is occasionally needed.  One can,
 naturally, write a special-purpose algorithm, but there is a fair set
 already available:
@@ -129,7 +129,7 @@ writing of special-purpose memory allocators in the future.
    :functions: gen_pool_for_each_chunk
 
 .. kernel-doc:: lib/genalloc.c
-   :functions: addr_in_gen_pool
+   :functions: gen_pool_has_addr
 
 .. kernel-doc:: lib/genalloc.c
    :functions: gen_pool_avail
diff --git a/Documentation/core-api/genericirq.rst b/Documentation/core-api/genericirq.rst
index 4da67b65cecf..8f06d885c310 100644
--- a/Documentation/core-api/genericirq.rst
+++ b/Documentation/core-api/genericirq.rst
@@ -26,7 +26,7 @@ Rationale
 =========
 
 The original implementation of interrupt handling in Linux uses the
-:c:func:`__do_IRQ` super-handler, which is able to deal with every type of
+__do_IRQ() super-handler, which is able to deal with every type of
 interrupt logic.
 
 Originally, Russell King identified different types of handlers to build
@@ -43,7 +43,7 @@ During the implementation we identified another type:
 
 -  Fast EOI type
 
-In the SMP world of the :c:func:`__do_IRQ` super-handler another type was
+In the SMP world of the __do_IRQ() super-handler another type was
 identified:
 
 -  Per CPU type
@@ -83,7 +83,7 @@ IRQ-flow implementation for 'level type' interrupts and add a
 (sub)architecture specific 'edge type' implementation.
 
 To make the transition to the new model easier and prevent the breakage
-of existing implementations, the :c:func:`__do_IRQ` super-handler is still
+of existing implementations, the __do_IRQ() super-handler is still
 available. This leads to a kind of duality for the time being. Over time
 the new model should be used in more and more architectures, as it
 enables smaller and cleaner IRQ subsystems. It's deprecated for three
@@ -116,7 +116,7 @@ status information and pointers to the interrupt flow method and the
 interrupt chip structure which are assigned to this interrupt.
 
 Whenever an interrupt triggers, the low-level architecture code calls
-into the generic interrupt code by calling :c:func:`desc->handle_irq`. This
+into the generic interrupt code by calling desc->handle_irq(). This
 high-level IRQ handling function only uses desc->irq_data.chip
 primitives referenced by the assigned chip descriptor structure.
 
@@ -125,27 +125,29 @@ High-level Driver API
 
 The high-level Driver API consists of following functions:
 
--  :c:func:`request_irq`
+-  request_irq()
 
--  :c:func:`free_irq`
+-  request_threaded_irq()
 
--  :c:func:`disable_irq`
+-  free_irq()
 
--  :c:func:`enable_irq`
+-  disable_irq()
 
--  :c:func:`disable_irq_nosync` (SMP only)
+-  enable_irq()
 
--  :c:func:`synchronize_irq` (SMP only)
+-  disable_irq_nosync() (SMP only)
 
--  :c:func:`irq_set_irq_type`
+-  synchronize_irq() (SMP only)
 
--  :c:func:`irq_set_irq_wake`
+-  irq_set_irq_type()
 
--  :c:func:`irq_set_handler_data`
+-  irq_set_irq_wake()
 
--  :c:func:`irq_set_chip`
+-  irq_set_handler_data()
 
--  :c:func:`irq_set_chip_data`
+-  irq_set_chip()
+
+-  irq_set_chip_data()
 
 See the autogenerated function documentation for details.
 
@@ -154,19 +156,19 @@ High-level IRQ flow handlers
 
 The generic layer provides a set of pre-defined irq-flow methods:
 
--  :c:func:`handle_level_irq`
+-  handle_level_irq()
 
--  :c:func:`handle_edge_irq`
+-  handle_edge_irq()
 
--  :c:func:`handle_fasteoi_irq`
+-  handle_fasteoi_irq()
 
--  :c:func:`handle_simple_irq`
+-  handle_simple_irq()
 
--  :c:func:`handle_percpu_irq`
+-  handle_percpu_irq()
 
--  :c:func:`handle_edge_eoi_irq`
+-  handle_edge_eoi_irq()
 
--  :c:func:`handle_bad_irq`
+-  handle_bad_irq()
 
 The interrupt flow handlers (either pre-defined or architecture
 specific) are assigned to specific interrupts by the architecture either
@@ -325,14 +327,14 @@ Delayed interrupt disable
 
 This per interrupt selectable feature, which was introduced by Russell
 King in the ARM interrupt implementation, does not mask an interrupt at
-the hardware level when :c:func:`disable_irq` is called. The interrupt is kept
+the hardware level when disable_irq() is called. The interrupt is kept
 enabled and is masked in the flow handler when an interrupt event
 happens. This prevents losing edge interrupts on hardware which does not
 store an edge interrupt event while the interrupt is disabled at the
 hardware level. When an interrupt arrives while the IRQ_DISABLED flag
 is set, then the interrupt is masked at the hardware level and the
 IRQ_PENDING bit is set. When the interrupt is re-enabled by
-:c:func:`enable_irq` the pending bit is checked and if it is set, the interrupt
+enable_irq() the pending bit is checked and if it is set, the interrupt
 is resent either via hardware or by a software resend mechanism. (It's
 necessary to enable CONFIG_HARDIRQS_SW_RESEND when you want to use
 the delayed interrupt disable feature and your hardware is not capable
@@ -369,7 +371,7 @@ handler(s) to use these basic units of low-level functionality.
 __do_IRQ entry point
 ====================
 
-The original implementation :c:func:`__do_IRQ` was an alternative entry point
+The original implementation __do_IRQ() was an alternative entry point
 for all types of interrupts. It no longer exists.
 
 This handler turned out to be not suitable for all interrupt hardware
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index ab0eae1c153a..bc0c727d7fd8 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -39,6 +39,8 @@ Core utilities
    ../RCU/index
    gcc-plugins
    symbol-namespaces
+   padata
+   ioctl
 
 
 Interfaces for kernel debugging
diff --git a/Documentation/core-api/ioctl.rst b/Documentation/core-api/ioctl.rst
new file mode 100644
index 000000000000..c455db0e1627
--- /dev/null
+++ b/Documentation/core-api/ioctl.rst
@@ -0,0 +1,253 @@
+======================
+ioctl based interfaces
+======================
+
+ioctl() is the most common way for applications to interface
+with device drivers. It is flexible and easily extended by adding new
+commands and can be passed through character devices, block devices as
+well as sockets and other special file descriptors.
+
+However, it is also very easy to get ioctl command definitions wrong,
+and hard to fix them later without breaking existing applications,
+so this documentation tries to help developers get it right.
+
+Command number definitions
+==========================
+
+The command number, or request number, is the second argument passed to
+the ioctl system call. While this can be any 32-bit number that uniquely
+identifies an action for a particular driver, there are a number of
+conventions around defining them.
+
+``include/uapi/asm-generic/ioctl.h`` provides four macros for defining
+ioctl commands that follow modern conventions: ``_IO``, ``_IOR``,
+``_IOW``, and ``_IOWR``. These should be used for all new commands,
+with the correct parameters:
+
+_IO/_IOR/_IOW/_IOWR
+   The macro name specifies how the argument will be used.  It may be a
+   pointer to data to be passed into the kernel (_IOW), out of the kernel
+   (_IOR), or both (_IOWR).  _IO can indicate either commands with no
+   argument or those passing an integer value instead of a pointer.
+   It is recommended to only use _IO for commands without arguments,
+   and use pointers for passing data.
+
+type
+   An 8-bit number, often a character literal, specific to a subsystem
+   or driver, and listed in :doc:`../userspace-api/ioctl/ioctl-number`
+
+nr
+  An 8-bit number identifying the specific command, unique for a give
+  value of 'type'
+
+data_type
+  The name of the data type pointed to by the argument, the command number
+  encodes the ``sizeof(data_type)`` value in a 13-bit or 14-bit integer,
+  leading to a limit of 8191 bytes for the maximum size of the argument.
+  Note: do not pass sizeof(data_type) type into _IOR/_IOW/IOWR, as that
+  will lead to encoding sizeof(sizeof(data_type)), i.e. sizeof(size_t).
+  _IO does not have a data_type parameter.
+
+
+Interface versions
+==================
+
+Some subsystems use version numbers in data structures to overload
+commands with different interpretations of the argument.
+
+This is generally a bad idea, since changes to existing commands tend
+to break existing applications.
+
+A better approach is to add a new ioctl command with a new number. The
+old command still needs to be implemented in the kernel for compatibility,
+but this can be a wrapper around the new implementation.
+
+Return code
+===========
+
+ioctl commands can return negative error codes as documented in errno(3);
+these get turned into errno values in user space. On success, the return
+code should be zero. It is also possible but not recommended to return
+a positive 'long' value.
+
+When the ioctl callback is called with an unknown command number, the
+handler returns either -ENOTTY or -ENOIOCTLCMD, which also results in
+-ENOTTY being returned from the system call. Some subsystems return
+-ENOSYS or -EINVAL here for historic reasons, but this is wrong.
+
+Prior to Linux 5.5, compat_ioctl handlers were required to return
+-ENOIOCTLCMD in order to use the fallback conversion into native
+commands. As all subsystems are now responsible for handling compat
+mode themselves, this is no longer needed, but it may be important to
+consider when backporting bug fixes to older kernels.
+
+Timestamps
+==========
+
+Traditionally, timestamps and timeout values are passed as ``struct
+timespec`` or ``struct timeval``, but these are problematic because of
+incompatible definitions of these structures in user space after the
+move to 64-bit time_t.
+
+The ``struct __kernel_timespec`` type can be used instead to be embedded
+in other data structures when separate second/nanosecond values are
+desired, or passed to user space directly. This is still not ideal though,
+as the structure matches neither the kernel's timespec64 nor the user
+space timespec exactly. The get_timespec64() and put_timespec64() helper
+functions can be used to ensure that the layout remains compatible with
+user space and the padding is treated correctly.
+
+As it is cheap to convert seconds to nanoseconds, but the opposite
+requires an expensive 64-bit division, a simple __u64 nanosecond value
+can be simpler and more efficient.
+
+Timeout values and timestamps should ideally use CLOCK_MONOTONIC time,
+as returned by ktime_get_ns() or ktime_get_ts64().  Unlike
+CLOCK_REALTIME, this makes the timestamps immune from jumping backwards
+or forwards due to leap second adjustments and clock_settime() calls.
+
+ktime_get_real_ns() can be used for CLOCK_REALTIME timestamps that
+need to be persistent across a reboot or between multiple machines.
+
+32-bit compat mode
+==================
+
+In order to support 32-bit user space running on a 64-bit machine, each
+subsystem or driver that implements an ioctl callback handler must also
+implement the corresponding compat_ioctl handler.
+
+As long as all the rules for data structures are followed, this is as
+easy as setting the .compat_ioctl pointer to a helper function such as
+compat_ptr_ioctl() or blkdev_compat_ptr_ioctl().
+
+compat_ptr()
+------------
+
+On the s390 architecture, 31-bit user space has ambiguous representations
+for data pointers, with the upper bit being ignored. When running such
+a process in compat mode, the compat_ptr() helper must be used to
+clear the upper bit of a compat_uptr_t and turn it into a valid 64-bit
+pointer.  On other architectures, this macro only performs a cast to a
+``void __user *`` pointer.
+
+In an compat_ioctl() callback, the last argument is an unsigned long,
+which can be interpreted as either a pointer or a scalar depending on
+the command. If it is a scalar, then compat_ptr() must not be used, to
+ensure that the 64-bit kernel behaves the same way as a 32-bit kernel
+for arguments with the upper bit set.
+
+The compat_ptr_ioctl() helper can be used in place of a custom
+compat_ioctl file operation for drivers that only take arguments that
+are pointers to compatible data structures.
+
+Structure layout
+----------------
+
+Compatible data structures have the same layout on all architectures,
+avoiding all problematic members:
+
+* ``long`` and ``unsigned long`` are the size of a register, so
+  they can be either 32-bit or 64-bit wide and cannot be used in portable
+  data structures. Fixed-length replacements are ``__s32``, ``__u32``,
+  ``__s64`` and ``__u64``.
+
+* Pointers have the same problem, in addition to requiring the
+  use of compat_ptr(). The best workaround is to use ``__u64``
+  in place of pointers, which requires a cast to ``uintptr_t`` in user
+  space, and the use of u64_to_user_ptr() in the kernel to convert
+  it back into a user pointer.
+
+* On the x86-32 (i386) architecture, the alignment of 64-bit variables
+  is only 32-bit, but they are naturally aligned on most other
+  architectures including x86-64. This means a structure like::
+
+    struct foo {
+        __u32 a;
+        __u64 b;
+        __u32 c;
+    };
+
+  has four bytes of padding between a and b on x86-64, plus another four
+  bytes of padding at the end, but no padding on i386, and it needs a
+  compat_ioctl conversion handler to translate between the two formats.
+
+  To avoid this problem, all structures should have their members
+  naturally aligned, or explicit reserved fields added in place of the
+  implicit padding. The ``pahole`` tool can be used for checking the
+  alignment.
+
+* On ARM OABI user space, structures are padded to multiples of 32-bit,
+  making some structs incompatible with modern EABI kernels if they
+  do not end on a 32-bit boundary.
+
+* On the m68k architecture, struct members are not guaranteed to have an
+  alignment greater than 16-bit, which is a problem when relying on
+  implicit padding.
+
+* Bitfields and enums generally work as one would expect them to,
+  but some properties of them are implementation-defined, so it is better
+  to avoid them completely in ioctl interfaces.
+
+* ``char`` members can be either signed or unsigned, depending on
+  the architecture, so the __u8 and __s8 types should be used for 8-bit
+  integer values, though char arrays are clearer for fixed-length strings.
+
+Information leaks
+=================
+
+Uninitialized data must not be copied back to user space, as this can
+cause an information leak, which can be used to defeat kernel address
+space layout randomization (KASLR), helping in an attack.
+
+For this reason (and for compat support) it is best to avoid any
+implicit padding in data structures.  Where there is implicit padding
+in an existing structure, kernel drivers must be careful to fully
+initialize an instance of the structure before copying it to user
+space.  This is usually done by calling memset() before assigning to
+individual members.
+
+Subsystem abstractions
+======================
+
+While some device drivers implement their own ioctl function, most
+subsystems implement the same command for multiple drivers.  Ideally the
+subsystem has an .ioctl() handler that copies the arguments from and
+to user space, passing them into subsystem specific callback functions
+through normal kernel pointers.
+
+This helps in various ways:
+
+* Applications written for one driver are more likely to work for
+  another one in the same subsystem if there are no subtle differences
+  in the user space ABI.
+
+* The complexity of user space access and data structure layout is done
+  in one place, reducing the potential for implementation bugs.
+
+* It is more likely to be reviewed by experienced developers
+  that can spot problems in the interface when the ioctl is shared
+  between multiple drivers than when it is only used in a single driver.
+
+Alternatives to ioctl
+=====================
+
+There are many cases in which ioctl is not the best solution for a
+problem. Alternatives include:
+
+* System calls are a better choice for a system-wide feature that
+  is not tied to a physical device or constrained by the file system
+  permissions of a character device node
+
+* netlink is the preferred way of configuring any network related
+  objects through sockets.
+
+* debugfs is used for ad-hoc interfaces for debugging functionality
+  that does not need to be exposed as a stable interface to applications.
+
+* sysfs is a good way to expose the state of an in-kernel object
+  that is not tied to a file descriptor.
+
+* configfs can be used for more complex configuration than sysfs
+
+* A custom file system can provide extra flexibility with a simple
+  user interface but adds a lot of complexity to the implementation.
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index f77de49b1d51..4ac53a1363f6 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -57,7 +57,13 @@ The Linux kernel provides more basic utility functions.
 Bit Operations
 --------------
 
-.. kernel-doc:: include/asm-generic/bitops-instrumented.h
+.. kernel-doc:: include/asm-generic/bitops/instrumented-atomic.h
+   :internal:
+
+.. kernel-doc:: include/asm-generic/bitops/instrumented-non-atomic.h
+   :internal:
+
+.. kernel-doc:: include/asm-generic/bitops/instrumented-lock.h
    :internal:
 
 Bitmap Operations
diff --git a/Documentation/core-api/memory-allocation.rst b/Documentation/core-api/memory-allocation.rst
index 939e3dfc86e9..4aa82ddd01b8 100644
--- a/Documentation/core-api/memory-allocation.rst
+++ b/Documentation/core-api/memory-allocation.rst
@@ -88,10 +88,11 @@ Selecting memory allocator
 ==========================
 
 The most straightforward way to allocate memory is to use a function
-from the :c:func:`kmalloc` family. And, to be on the safe size it's
-best to use routines that set memory to zero, like
-:c:func:`kzalloc`. If you need to allocate memory for an array, there
-are :c:func:`kmalloc_array` and :c:func:`kcalloc` helpers.
+from the kmalloc() family. And, to be on the safe side it's best to use
+routines that set memory to zero, like kzalloc(). If you need to
+allocate memory for an array, there are kmalloc_array() and kcalloc()
+helpers. The helpers struct_size(), array_size() and array3_size() can
+be used to safely calculate object sizes without overflowing.
 
 The maximal size of a chunk that can be allocated with `kmalloc` is
 limited. The actual limit depends on the hardware and the kernel
@@ -102,29 +103,26 @@ The address of a chunk allocated with `kmalloc` is aligned to at least
 ARCH_KMALLOC_MINALIGN bytes.  For sizes which are a power of two, the
 alignment is also guaranteed to be at least the respective size.
 
-For large allocations you can use :c:func:`vmalloc` and
-:c:func:`vzalloc`, or directly request pages from the page
-allocator. The memory allocated by `vmalloc` and related functions is
-not physically contiguous.
+For large allocations you can use vmalloc() and vzalloc(), or directly
+request pages from the page allocator. The memory allocated by `vmalloc`
+and related functions is not physically contiguous.
 
 If you are not sure whether the allocation size is too large for
-`kmalloc`, it is possible to use :c:func:`kvmalloc` and its
-derivatives. It will try to allocate memory with `kmalloc` and if the
-allocation fails it will be retried with `vmalloc`. There are
-restrictions on which GFP flags can be used with `kvmalloc`; please
-see :c:func:`kvmalloc_node` reference documentation. Note that
-`kvmalloc` may return memory that is not physically contiguous.
+`kmalloc`, it is possible to use kvmalloc() and its derivatives. It will
+try to allocate memory with `kmalloc` and if the allocation fails it
+will be retried with `vmalloc`. There are restrictions on which GFP
+flags can be used with `kvmalloc`; please see kvmalloc_node() reference
+documentation. Note that `kvmalloc` may return memory that is not
+physically contiguous.
 
 If you need to allocate many identical objects you can use the slab
-cache allocator. The cache should be set up with
-:c:func:`kmem_cache_create` or :c:func:`kmem_cache_create_usercopy`
-before it can be used. The second function should be used if a part of
-the cache might be copied to the userspace.  After the cache is
-created :c:func:`kmem_cache_alloc` and its convenience wrappers can
-allocate memory from that cache.
-
-When the allocated memory is no longer needed it must be freed. You
-can use :c:func:`kvfree` for the memory allocated with `kmalloc`,
-`vmalloc` and `kvmalloc`. The slab caches should be freed with
-:c:func:`kmem_cache_free`. And don't forget to destroy the cache with
-:c:func:`kmem_cache_destroy`.
+cache allocator. The cache should be set up with kmem_cache_create() or
+kmem_cache_create_usercopy() before it can be used. The second function
+should be used if a part of the cache might be copied to the userspace.
+After the cache is created kmem_cache_alloc() and its convenience
+wrappers can allocate memory from that cache.
+
+When the allocated memory is no longer needed it must be freed. You can
+use kvfree() for the memory allocated with `kmalloc`, `vmalloc` and
+`kvmalloc`. The slab caches should be freed with kmem_cache_free(). And
+don't forget to destroy the cache with kmem_cache_destroy().
diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst
index 128e8a721c1e..be726986ff75 100644
--- a/Documentation/core-api/mm-api.rst
+++ b/Documentation/core-api/mm-api.rst
@@ -11,7 +11,7 @@ User Space Memory Access
 .. kernel-doc:: arch/x86/lib/usercopy_32.c
    :export:
 
-.. kernel-doc:: mm/util.c
+.. kernel-doc:: mm/gup.c
    :functions: get_user_pages_fast
 
 .. _mm-api-gfp-flags:
diff --git a/Documentation/core-api/padata.rst b/Documentation/core-api/padata.rst
new file mode 100644
index 000000000000..9a24c111781d
--- /dev/null
+++ b/Documentation/core-api/padata.rst
@@ -0,0 +1,169 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+The padata parallel execution mechanism
+=======================================
+
+:Date: December 2019
+
+Padata is a mechanism by which the kernel can farm jobs out to be done in
+parallel on multiple CPUs while retaining their ordering.  It was developed for
+use with the IPsec code, which needs to be able to perform encryption and
+decryption on large numbers of packets without reordering those packets.  The
+crypto developers made a point of writing padata in a sufficiently general
+fashion that it could be put to other uses as well.
+
+Usage
+=====
+
+Initializing
+------------
+
+The first step in using padata is to set up a padata_instance structure for
+overall control of how jobs are to be run::
+
+    #include <linux/padata.h>
+
+    struct padata_instance *padata_alloc_possible(const char *name);
+
+'name' simply identifies the instance.
+
+There are functions for enabling and disabling the instance::
+
+    int padata_start(struct padata_instance *pinst);
+    void padata_stop(struct padata_instance *pinst);
+
+These functions are setting or clearing the "PADATA_INIT" flag; if that flag is
+not set, other functions will refuse to work.  padata_start() returns zero on
+success (flag set) or -EINVAL if the padata cpumask contains no active CPU
+(flag not set).  padata_stop() clears the flag and blocks until the padata
+instance is unused.
+
+Finally, complete padata initialization by allocating a padata_shell::
+
+   struct padata_shell *padata_alloc_shell(struct padata_instance *pinst);
+
+A padata_shell is used to submit a job to padata and allows a series of such
+jobs to be serialized independently.  A padata_instance may have one or more
+padata_shells associated with it, each allowing a separate series of jobs.
+
+Modifying cpumasks
+------------------
+
+The CPUs used to run jobs can be changed in two ways, programatically with
+padata_set_cpumask() or via sysfs.  The former is defined::
+
+    int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
+			   cpumask_var_t cpumask);
+
+Here cpumask_type is one of PADATA_CPU_PARALLEL or PADATA_CPU_SERIAL, where a
+parallel cpumask describes which processors will be used to execute jobs
+submitted to this instance in parallel and a serial cpumask defines which
+processors are allowed to be used as the serialization callback processor.
+cpumask specifies the new cpumask to use.
+
+There may be sysfs files for an instance's cpumasks.  For example, pcrypt's
+live in /sys/kernel/pcrypt/<instance-name>.  Within an instance's directory
+there are two files, parallel_cpumask and serial_cpumask, and either cpumask
+may be changed by echoing a bitmask into the file, for example::
+
+    echo f > /sys/kernel/pcrypt/pencrypt/parallel_cpumask
+
+Reading one of these files shows the user-supplied cpumask, which may be
+different from the 'usable' cpumask.
+
+Padata maintains two pairs of cpumasks internally, the user-supplied cpumasks
+and the 'usable' cpumasks.  (Each pair consists of a parallel and a serial
+cpumask.)  The user-supplied cpumasks default to all possible CPUs on instance
+allocation and may be changed as above.  The usable cpumasks are always a
+subset of the user-supplied cpumasks and contain only the online CPUs in the
+user-supplied masks; these are the cpumasks padata actually uses.  So it is
+legal to supply a cpumask to padata that contains offline CPUs.  Once an
+offline CPU in the user-supplied cpumask comes online, padata is going to use
+it.
+
+Changing the CPU masks are expensive operations, so it should not be done with
+great frequency.
+
+Running A Job
+-------------
+
+Actually submitting work to the padata instance requires the creation of a
+padata_priv structure, which represents one job::
+
+    struct padata_priv {
+        /* Other stuff here... */
+	void                    (*parallel)(struct padata_priv *padata);
+	void                    (*serial)(struct padata_priv *padata);
+    };
+
+This structure will almost certainly be embedded within some larger
+structure specific to the work to be done.  Most of its fields are private to
+padata, but the structure should be zeroed at initialisation time, and the
+parallel() and serial() functions should be provided.  Those functions will
+be called in the process of getting the work done as we will see
+momentarily.
+
+The submission of the job is done with::
+
+    int padata_do_parallel(struct padata_shell *ps,
+		           struct padata_priv *padata, int *cb_cpu);
+
+The ps and padata structures must be set up as described above; cb_cpu
+points to the preferred CPU to be used for the final callback when the job is
+done; it must be in the current instance's CPU mask (if not the cb_cpu pointer
+is updated to point to the CPU actually chosen).  The return value from
+padata_do_parallel() is zero on success, indicating that the job is in
+progress. -EBUSY means that somebody, somewhere else is messing with the
+instance's CPU mask, while -EINVAL is a complaint about cb_cpu not being in the
+serial cpumask, no online CPUs in the parallel or serial cpumasks, or a stopped
+instance.
+
+Each job submitted to padata_do_parallel() will, in turn, be passed to
+exactly one call to the above-mentioned parallel() function, on one CPU, so
+true parallelism is achieved by submitting multiple jobs.  parallel() runs with
+software interrupts disabled and thus cannot sleep.  The parallel()
+function gets the padata_priv structure pointer as its lone parameter;
+information about the actual work to be done is probably obtained by using
+container_of() to find the enclosing structure.
+
+Note that parallel() has no return value; the padata subsystem assumes that
+parallel() will take responsibility for the job from this point.  The job
+need not be completed during this call, but, if parallel() leaves work
+outstanding, it should be prepared to be called again with a new job before
+the previous one completes.
+
+Serializing Jobs
+----------------
+
+When a job does complete, parallel() (or whatever function actually finishes
+the work) should inform padata of the fact with a call to::
+
+    void padata_do_serial(struct padata_priv *padata);
+
+At some point in the future, padata_do_serial() will trigger a call to the
+serial() function in the padata_priv structure.  That call will happen on
+the CPU requested in the initial call to padata_do_parallel(); it, too, is
+run with local software interrupts disabled.
+Note that this call may be deferred for a while since the padata code takes
+pains to ensure that jobs are completed in the order in which they were
+submitted.
+
+Destroying
+----------
+
+Cleaning up a padata instance predictably involves calling the three free
+functions that correspond to the allocation in reverse::
+
+    void padata_free_shell(struct padata_shell *ps);
+    void padata_stop(struct padata_instance *pinst);
+    void padata_free(struct padata_instance *pinst);
+
+It is the user's responsibility to ensure all outstanding jobs are complete
+before any of the above are called.
+
+Interface
+=========
+
+.. kernel-doc:: include/linux/padata.h
+.. kernel-doc:: kernel/padata.c
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index ecbebf4ca8e7..8ebe46b1af39 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -79,6 +79,18 @@ has the added benefit of providing a unique identifier. On 64-bit machines
 the first 32 bits are zeroed. The kernel will print ``(ptrval)`` until it
 gathers enough entropy. If you *really* want the address see %px below.
 
+Error Pointers
+--------------
+
+::
+
+	%pe	-ENOSPC
+
+For printing error pointers (i.e. a pointer for which IS_ERR() is true)
+as a symbolic error name. Error values for which no symbolic name is
+known are printed in decimal, while a non-ERR_PTR passed as the
+argument to %pe gets treated as ordinary %p.
+
 Symbols/Function Pointers
 -------------------------
 
@@ -86,8 +98,6 @@ Symbols/Function Pointers
 
 	%pS	versatile_init+0x0/0x110
 	%ps	versatile_init
-	%pF	versatile_init+0x0/0x110
-	%pf	versatile_init
 	%pSR	versatile_init+0x9/0x110
 		(with __builtin_extract_return_addr() translation)
 	%pB	prev_fn_of_versatile_init+0x88/0x88
@@ -97,14 +107,6 @@ The ``S`` and ``s`` specifiers are used for printing a pointer in symbolic
 format. They result in the symbol name with (S) or without (s)
 offsets. If KALLSYMS are disabled then the symbol address is printed instead.
 
-Note, that the ``F`` and ``f`` specifiers are identical to ``S`` (``s``)
-and thus deprecated. We have ``F`` and ``f`` because on ia64, ppc64 and
-parisc64 function pointers are indirect and, in fact, are function
-descriptors, which require additional dereferencing before we can lookup
-the symbol. As of now, ``S`` and ``s`` perform dereferencing on those
-platforms (when needed), so ``F`` and ``f`` exist for compatibility
-reasons only.
-
 The ``B`` specifier results in the symbol name with offsets and should be
 used when printing stack backtraces. The specifier takes into
 consideration the effect of compiler optimisations which may occur
@@ -135,6 +137,20 @@ equivalent to %lx (or %lu). %px is preferred because it is more uniquely
 grep'able. If in the future we need to modify the way the kernel handles
 printing pointers we will be better equipped to find the call sites.
 
+Pointer Differences
+-------------------
+
+::
+
+	%td	2560
+	%tx	a00
+
+For printing the pointer differences, use the %t modifier for ptrdiff_t.
+
+Example::
+
+	printk("test: difference between pointers: %td\n", ptr2 - ptr1);
+
 Struct Resources
 ----------------
 
@@ -428,6 +444,30 @@ Examples::
 
 Passed by reference.
 
+Fwnode handles
+--------------
+
+::
+
+	%pfw[fP]
+
+For printing information on fwnode handles. The default is to print the full
+node name, including the path. The modifiers are functionally equivalent to
+%pOF above.
+
+	- f - full name of the node, including the path
+	- P - the name of the node including an address (if there is one)
+
+Examples (ACPI)::
+
+	%pfwf	\_SB.PCI0.CIO2.port@1.endpoint@0	- Full node name
+	%pfwP	endpoint@0				- Node name
+
+Examples (OF)::
+
+	%pfwf	/ocp@68000000/i2c@48072000/camera@10/port/endpoint - Full name
+	%pfwP	endpoint				- Node name
+
 Time and date (struct rtc_time)
 -------------------------------
 
diff --git a/Documentation/core-api/refcount-vs-atomic.rst b/Documentation/core-api/refcount-vs-atomic.rst
index 976e85adffe8..79a009ce11df 100644
--- a/Documentation/core-api/refcount-vs-atomic.rst
+++ b/Documentation/core-api/refcount-vs-atomic.rst
@@ -35,7 +35,7 @@ atomics & refcounters only provide atomicity and
 program order (po) relation (on the same CPU). It guarantees that
 each ``atomic_*()`` and ``refcount_*()`` operation is atomic and instructions
 are executed in program order on a single CPU.
-This is implemented using :c:func:`READ_ONCE`/:c:func:`WRITE_ONCE` and
+This is implemented using READ_ONCE()/WRITE_ONCE() and
 compare-and-swap primitives.
 
 A strong (full) memory ordering guarantees that all prior loads and
@@ -44,7 +44,7 @@ before any po-later instruction is executed on the same CPU.
 It also guarantees that all po-earlier stores on the same CPU
 and all propagated stores from other CPUs must propagate to all
 other CPUs before any po-later instruction is executed on the original
-CPU (A-cumulative property). This is implemented using :c:func:`smp_mb`.
+CPU (A-cumulative property). This is implemented using smp_mb().
 
 A RELEASE memory ordering guarantees that all prior loads and
 stores (all po-earlier instructions) on the same CPU are completed
@@ -52,14 +52,14 @@ before the operation. It also guarantees that all po-earlier
 stores on the same CPU and all propagated stores from other CPUs
 must propagate to all other CPUs before the release operation
 (A-cumulative property). This is implemented using
-:c:func:`smp_store_release`.
+smp_store_release().
 
 An ACQUIRE memory ordering guarantees that all post loads and
 stores (all po-later instructions) on the same CPU are
 completed after the acquire operation. It also guarantees that all
 po-later stores on the same CPU must propagate to all other CPUs
 after the acquire operation executes. This is implemented using
-:c:func:`smp_acquire__after_ctrl_dep`.
+smp_acquire__after_ctrl_dep().
 
 A control dependency (on success) for refcounters guarantees that
 if a reference for an object was successfully obtained (reference
@@ -78,8 +78,8 @@ case 1) - non-"Read/Modify/Write" (RMW) ops
 
 Function changes:
 
- * :c:func:`atomic_set` --> :c:func:`refcount_set`
- * :c:func:`atomic_read` --> :c:func:`refcount_read`
+ * atomic_set() --> refcount_set()
+ * atomic_read() --> refcount_read()
 
 Memory ordering guarantee changes:
 
@@ -91,8 +91,8 @@ case 2) - increment-based ops that return no value
 
 Function changes:
 
- * :c:func:`atomic_inc` --> :c:func:`refcount_inc`
- * :c:func:`atomic_add` --> :c:func:`refcount_add`
+ * atomic_inc() --> refcount_inc()
+ * atomic_add() --> refcount_add()
 
 Memory ordering guarantee changes:
 
@@ -103,7 +103,7 @@ case 3) - decrement-based RMW ops that return no value
 
 Function changes:
 
- * :c:func:`atomic_dec` --> :c:func:`refcount_dec`
+ * atomic_dec() --> refcount_dec()
 
 Memory ordering guarantee changes:
 
@@ -115,8 +115,8 @@ case 4) - increment-based RMW ops that return a value
 
 Function changes:
 
- * :c:func:`atomic_inc_not_zero` --> :c:func:`refcount_inc_not_zero`
- * no atomic counterpart --> :c:func:`refcount_add_not_zero`
+ * atomic_inc_not_zero() --> refcount_inc_not_zero()
+ * no atomic counterpart --> refcount_add_not_zero()
 
 Memory ordering guarantees changes:
 
@@ -131,8 +131,8 @@ case 5) - generic dec/sub decrement-based RMW ops that return a value
 
 Function changes:
 
- * :c:func:`atomic_dec_and_test` --> :c:func:`refcount_dec_and_test`
- * :c:func:`atomic_sub_and_test` --> :c:func:`refcount_sub_and_test`
+ * atomic_dec_and_test() --> refcount_dec_and_test()
+ * atomic_sub_and_test() --> refcount_sub_and_test()
 
 Memory ordering guarantees changes:
 
@@ -144,14 +144,14 @@ case 6) other decrement-based RMW ops that return a value
 
 Function changes:
 
- * no atomic counterpart --> :c:func:`refcount_dec_if_one`
+ * no atomic counterpart --> refcount_dec_if_one()
  * ``atomic_add_unless(&var, -1, 1)`` --> ``refcount_dec_not_one(&var)``
 
 Memory ordering guarantees changes:
 
  * fully ordered --> RELEASE ordering + control dependency
 
-.. note:: :c:func:`atomic_add_unless` only provides full order on success.
+.. note:: atomic_add_unless() only provides full order on success.
 
 
 case 7) - lock-based RMW
@@ -159,10 +159,10 @@ case 7) - lock-based RMW
 
 Function changes:
 
- * :c:func:`atomic_dec_and_lock` --> :c:func:`refcount_dec_and_lock`
- * :c:func:`atomic_dec_and_mutex_lock` --> :c:func:`refcount_dec_and_mutex_lock`
+ * atomic_dec_and_lock() --> refcount_dec_and_lock()
+ * atomic_dec_and_mutex_lock() --> refcount_dec_and_mutex_lock()
 
 Memory ordering guarantees changes:
 
  * fully ordered --> RELEASE ordering + control dependency + hold
-   :c:func:`spin_lock` on success
+   spin_lock() on success
diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst
index 982ed7b568ac..9b76337f6756 100644
--- a/Documentation/core-api/symbol-namespaces.rst
+++ b/Documentation/core-api/symbol-namespaces.rst
@@ -152,3 +152,6 @@ in-tree modules::
 	- notice the warning of modpost telling about a missing import
 	- run `make nsdeps` to add the import to the correct code location
 
+You can also run nsdeps for external module builds. A typical usage is::
+
+	$ make -C <path_to_kernel_src> M=$PWD nsdeps
diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index fcedc5349ace..640934b6f7b4 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -25,10 +25,6 @@ good performance with large indices.  If your index can be larger than
 ``ULONG_MAX`` then the XArray is not the data type for you.  The most
 important user of the XArray is the page cache.
 
-Each non-``NULL`` entry in the array has three bits associated with
-it called marks.  Each mark may be set or cleared independently of
-the others.  You can iterate over entries which are marked.
-
 Normal pointers may be stored in the XArray directly.  They must be 4-byte
 aligned, which is true for any pointer returned from kmalloc() and
 alloc_page().  It isn't true for arbitrary user-space pointers,
@@ -41,12 +37,11 @@ When you retrieve an entry from the XArray, you can check whether it is
 a value entry by calling xa_is_value(), and convert it back to
 an integer by calling xa_to_value().
 
-Some users want to store tagged pointers instead of using the marks
-described above.  They can call xa_tag_pointer() to create an
-entry with a tag, xa_untag_pointer() to turn a tagged entry
-back into an untagged pointer and xa_pointer_tag() to retrieve
-the tag of an entry.  Tagged pointers use the same bits that are used
-to distinguish value entries from normal pointers, so each user must
+Some users want to tag the pointers they store in the XArray.  You can
+call xa_tag_pointer() to create an entry with a tag, xa_untag_pointer()
+to turn a tagged entry back into an untagged pointer and xa_pointer_tag()
+to retrieve the tag of an entry.  Tagged pointers use the same bits that
+are used to distinguish value entries from normal pointers, so you must
 decide whether they want to store value entries or tagged pointers in
 any particular XArray.
 
@@ -56,10 +51,9 @@ conflict with value entries or internal entries.
 An unusual feature of the XArray is the ability to create entries which
 occupy a range of indices.  Once stored to, looking up any index in
 the range will return the same entry as looking up any other index in
-the range.  Setting a mark on one index will set it on all of them.
-Storing to any index will store to all of them.  Multi-index entries can
-be explicitly split into smaller entries, or storing ``NULL`` into any
-entry will cause the XArray to forget about the range.
+the range.  Storing to any index will store to all of them.  Multi-index
+entries can be explicitly split into smaller entries, or storing ``NULL``
+into any entry will cause the XArray to forget about the range.
 
 Normal API
 ==========
@@ -87,17 +81,11 @@ If you want to only store a new entry to an index if the current entry
 at that index is ``NULL``, you can use xa_insert() which
 returns ``-EBUSY`` if the entry is not empty.
 
-You can enquire whether a mark is set on an entry by using
-xa_get_mark().  If the entry is not ``NULL``, you can set a mark
-on it by using xa_set_mark() and remove the mark from an entry by
-calling xa_clear_mark().  You can ask whether any entry in the
-XArray has a particular mark set by calling xa_marked().
-
 You can copy entries out of the XArray into a plain array by calling
-xa_extract().  Or you can iterate over the present entries in
-the XArray by calling xa_for_each().  You may prefer to use
-xa_find() or xa_find_after() to move to the next present
-entry in the XArray.
+xa_extract().  Or you can iterate over the present entries in the XArray
+by calling xa_for_each(), xa_for_each_start() or xa_for_each_range().
+You may prefer to use xa_find() or xa_find_after() to move to the next
+present entry in the XArray.
 
 Calling xa_store_range() stores the same entry in a range
 of indices.  If you do this, some of the other operations will behave
@@ -124,6 +112,31 @@ xa_destroy().  If the XArray entries are pointers, you may wish
 to free the entries first.  You can do this by iterating over all present
 entries in the XArray using the xa_for_each() iterator.
 
+Search Marks
+------------
+
+Each entry in the array has three bits associated with it called marks.
+Each mark may be set or cleared independently of the others.  You can
+iterate over marked entries by using the xa_for_each_marked() iterator.
+
+You can enquire whether a mark is set on an entry by using
+xa_get_mark().  If the entry is not ``NULL``, you can set a mark on it
+by using xa_set_mark() and remove the mark from an entry by calling
+xa_clear_mark().  You can ask whether any entry in the XArray has a
+particular mark set by calling xa_marked().  Erasing an entry from the
+XArray causes all marks associated with that entry to be cleared.
+
+Setting or clearing a mark on any index of a multi-index entry will
+affect all indices covered by that entry.  Querying the mark on any
+index will return the same result.
+
+There is no way to iterate over entries which are not marked; the data
+structure does not allow this to be implemented efficiently.  There are
+not currently iterators to search for logical combinations of bits (eg
+iterate over all entries which have both ``XA_MARK_1`` and ``XA_MARK_2``
+set, or iterate over all entries which have ``XA_MARK_0`` or ``XA_MARK_2``
+set).  It would be possible to add these if a user arises.
+
 Allocating XArrays
 ------------------
 
@@ -180,6 +193,8 @@ No lock needed:
 Takes RCU read lock:
  * xa_load()
  * xa_for_each()
+ * xa_for_each_start()
+ * xa_for_each_range()
  * xa_find()
  * xa_find_after()
  * xa_extract()
@@ -419,10 +434,9 @@ you last processed.  If you have interrupts disabled while iterating,
 then it is good manners to pause the iteration and reenable interrupts
 every ``XA_CHECK_SCHED`` entries.
 
-The xas_get_mark(), xas_set_mark() and
-xas_clear_mark() functions require the xa_state cursor to have
-been moved to the appropriate location in the xarray; they will do
-nothing if you have called xas_pause() or xas_set()
+The xas_get_mark(), xas_set_mark() and xas_clear_mark() functions require
+the xa_state cursor to have been moved to the appropriate location in the
+XArray; they will do nothing if you have called xas_pause() or xas_set()
 immediately before.
 
 You can call xas_set_update() to have a callback function
diff --git a/Documentation/crypto/api-skcipher.rst b/Documentation/crypto/api-skcipher.rst
index 20ba08dddf2e..1aaf8985894b 100644
--- a/Documentation/crypto/api-skcipher.rst
+++ b/Documentation/crypto/api-skcipher.rst
@@ -5,7 +5,7 @@ Block Cipher Algorithm Definitions
    :doc: Block Cipher Algorithm Definitions
 
 .. kernel-doc:: include/linux/crypto.h
-   :functions: crypto_alg ablkcipher_alg blkcipher_alg cipher_alg compress_alg
+   :functions: crypto_alg cipher_alg compress_alg
 
 Symmetric Key Cipher API
 ------------------------
@@ -33,30 +33,3 @@ Single Block Cipher API
 
 .. kernel-doc:: include/linux/crypto.h
    :functions: crypto_alloc_cipher crypto_free_cipher crypto_has_cipher crypto_cipher_blocksize crypto_cipher_setkey crypto_cipher_encrypt_one crypto_cipher_decrypt_one
-
-Asynchronous Block Cipher API - Deprecated
-------------------------------------------
-
-.. kernel-doc:: include/linux/crypto.h
-   :doc: Asynchronous Block Cipher API
-
-.. kernel-doc:: include/linux/crypto.h
-   :functions: crypto_free_ablkcipher crypto_has_ablkcipher crypto_ablkcipher_ivsize crypto_ablkcipher_blocksize crypto_ablkcipher_setkey crypto_ablkcipher_reqtfm crypto_ablkcipher_encrypt crypto_ablkcipher_decrypt
-
-Asynchronous Cipher Request Handle - Deprecated
------------------------------------------------
-
-.. kernel-doc:: include/linux/crypto.h
-   :doc: Asynchronous Cipher Request Handle
-
-.. kernel-doc:: include/linux/crypto.h
-   :functions: crypto_ablkcipher_reqsize ablkcipher_request_set_tfm ablkcipher_request_alloc ablkcipher_request_free ablkcipher_request_set_callback ablkcipher_request_set_crypt
-
-Synchronous Block Cipher API - Deprecated
------------------------------------------
-
-.. kernel-doc:: include/linux/crypto.h
-   :doc: Synchronous Block Cipher API
-
-.. kernel-doc:: include/linux/crypto.h
-   :functions: crypto_alloc_blkcipher crypto_free_blkcipher crypto_has_blkcipher crypto_blkcipher_name crypto_blkcipher_ivsize crypto_blkcipher_blocksize crypto_blkcipher_setkey crypto_blkcipher_encrypt crypto_blkcipher_encrypt_iv crypto_blkcipher_decrypt crypto_blkcipher_decrypt_iv crypto_blkcipher_set_iv crypto_blkcipher_get_iv
diff --git a/Documentation/crypto/architecture.rst b/Documentation/crypto/architecture.rst
index 3eae1ae7f798..646c3380a7ed 100644
--- a/Documentation/crypto/architecture.rst
+++ b/Documentation/crypto/architecture.rst
@@ -201,10 +201,6 @@ the aforementioned cipher types:
 -  CRYPTO_ALG_TYPE_AEAD Authenticated Encryption with Associated Data
    (MAC)
 
--  CRYPTO_ALG_TYPE_BLKCIPHER Synchronous multi-block cipher
-
--  CRYPTO_ALG_TYPE_ABLKCIPHER Asynchronous multi-block cipher
-
 -  CRYPTO_ALG_TYPE_KPP Key-agreement Protocol Primitive (KPP) such as
    an ECDH or DH implementation
 
diff --git a/Documentation/crypto/crypto_engine.rst b/Documentation/crypto/crypto_engine.rst
index 3baa23c2cd08..25cf9836c336 100644
--- a/Documentation/crypto/crypto_engine.rst
+++ b/Documentation/crypto/crypto_engine.rst
@@ -63,8 +63,6 @@ request by using:
 When your driver receives a crypto_request, you must to transfer it to
 the crypto engine via one of:
 
-* crypto_transfer_ablkcipher_request_to_engine()
-
 * crypto_transfer_aead_request_to_engine()
 
 * crypto_transfer_akcipher_request_to_engine()
@@ -75,8 +73,6 @@ the crypto engine via one of:
 
 At the end of the request process, a call to one of the following functions is needed:
 
-* crypto_finalize_ablkcipher_request()
-
 * crypto_finalize_aead_request()
 
 * crypto_finalize_akcipher_request()
diff --git a/Documentation/crypto/devel-algos.rst b/Documentation/crypto/devel-algos.rst
index c45c6f400dbd..f225a953ab4b 100644
--- a/Documentation/crypto/devel-algos.rst
+++ b/Documentation/crypto/devel-algos.rst
@@ -31,33 +31,23 @@ The counterparts to those functions are listed below.
 
 ::
 
-       int crypto_unregister_alg(struct crypto_alg *alg);
-       int crypto_unregister_algs(struct crypto_alg *algs, int count);
+       void crypto_unregister_alg(struct crypto_alg *alg);
+       void crypto_unregister_algs(struct crypto_alg *algs, int count);
 
 
-Notice that both registration and unregistration functions do return a
-value, so make sure to handle errors. A return code of zero implies
-success. Any return code < 0 implies an error.
+The registration functions return 0 on success, or a negative errno
+value on failure.  crypto_register_algs() succeeds only if it
+successfully registered all the given algorithms; if it fails partway
+through, then any changes are rolled back.
 
-The bulk registration/unregistration functions register/unregister each
-transformation in the given array of length count. They handle errors as
-follows:
-
--  crypto_register_algs() succeeds if and only if it successfully
-   registers all the given transformations. If an error occurs partway
-   through, then it rolls back successful registrations before returning
-   the error code. Note that if a driver needs to handle registration
-   errors for individual transformations, then it will need to use the
-   non-bulk function crypto_register_alg() instead.
-
--  crypto_unregister_algs() tries to unregister all the given
-   transformations, continuing on error. It logs errors and always
-   returns zero.
+The unregistration functions always succeed, so they don't have a
+return value.  Don't try to unregister algorithms that aren't
+currently registered.
 
 Single-Block Symmetric Ciphers [CIPHER]
 ---------------------------------------
 
-Example of transformations: aes, arc4, ...
+Example of transformations: aes, serpent, ...
 
 This section describes the simplest of all transformation
 implementations, that being the CIPHER type used for symmetric ciphers.
@@ -108,7 +98,7 @@ is also valid:
 Multi-Block Ciphers
 -------------------
 
-Example of transformations: cbc(aes), ecb(arc4), ...
+Example of transformations: cbc(aes), chacha20, ...
 
 This section describes the multi-block cipher transformation
 implementations. The multi-block ciphers are used for transformations
@@ -128,25 +118,20 @@ process requests that are unaligned. This implies, however, additional
 overhead as the kernel crypto API needs to perform the realignment of
 the data which may imply moving of data.
 
-Cipher Definition With struct blkcipher_alg and ablkcipher_alg
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Struct blkcipher_alg defines a synchronous block cipher whereas struct
-ablkcipher_alg defines an asynchronous block cipher.
-
-Please refer to the single block cipher description for schematics of
-the block cipher usage.
+Cipher Definition With struct skcipher_alg
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Specifics Of Asynchronous Multi-Block Cipher
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Struct skcipher_alg defines a multi-block cipher, or more generally, a
+length-preserving symmetric cipher algorithm.
 
-There are a couple of specifics to the asynchronous interface.
+Scatterlist handling
+~~~~~~~~~~~~~~~~~~~~
 
-First of all, some of the drivers will want to use the Generic
-ScatterWalk in case the hardware needs to be fed separate chunks of the
-scatterlist which contains the plaintext and will contain the
-ciphertext. Please refer to the ScatterWalk interface offered by the
-Linux kernel scatter / gather list implementation.
+Some drivers will want to use the Generic ScatterWalk in case the
+hardware needs to be fed separate chunks of the scatterlist which
+contains the plaintext and will contain the ciphertext. Please refer
+to the ScatterWalk interface offered by the Linux kernel scatter /
+gather list implementation.
 
 Hashing [HASH]
 --------------
@@ -174,10 +159,10 @@ are as follows:
 
 ::
 
-       int crypto_unregister_ahash(struct ahash_alg *alg);
+       void crypto_unregister_ahash(struct ahash_alg *alg);
 
-       int crypto_unregister_shash(struct shash_alg *alg);
-       int crypto_unregister_shashes(struct shash_alg *algs, int count);
+       void crypto_unregister_shash(struct shash_alg *alg);
+       void crypto_unregister_shashes(struct shash_alg *algs, int count);
 
 
 Cipher Definition With struct shash_alg and ahash_alg
diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
index b0522a4dd107..09dee10d2592 100644
--- a/Documentation/dev-tools/index.rst
+++ b/Documentation/dev-tools/index.rst
@@ -24,6 +24,7 @@ whole; patches welcome!
    gdb-kernel-debugging
    kgdb
    kselftest
+   kunit/index
 
 
 .. only::  subproject and html
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index 525296121d89..e4d66e7c50de 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -218,3 +218,66 @@ brk handler is used to print bug reports.
 A potential expansion of this mode is a hardware tag-based mode, which would
 use hardware memory tagging support instead of compiler instrumentation and
 manual shadow memory manipulation.
+
+What memory accesses are sanitised by KASAN?
+--------------------------------------------
+
+The kernel maps memory in a number of different parts of the address
+space. This poses something of a problem for KASAN, which requires
+that all addresses accessed by instrumented code have a valid shadow
+region.
+
+The range of kernel virtual addresses is large: there is not enough
+real memory to support a real shadow region for every address that
+could be accessed by the kernel.
+
+By default
+~~~~~~~~~~
+
+By default, architectures only map real memory over the shadow region
+for the linear mapping (and potentially other small areas). For all
+other areas - such as vmalloc and vmemmap space - a single read-only
+page is mapped over the shadow area. This read-only shadow page
+declares all memory accesses as permitted.
+
+This presents a problem for modules: they do not live in the linear
+mapping, but in a dedicated module space. By hooking in to the module
+allocator, KASAN can temporarily map real shadow memory to cover
+them. This allows detection of invalid accesses to module globals, for
+example.
+
+This also creates an incompatibility with ``VMAP_STACK``: if the stack
+lives in vmalloc space, it will be shadowed by the read-only page, and
+the kernel will fault when trying to set up the shadow data for stack
+variables.
+
+CONFIG_KASAN_VMALLOC
+~~~~~~~~~~~~~~~~~~~~
+
+With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
+cost of greater memory usage. Currently this is only supported on x86.
+
+This works by hooking into vmalloc and vmap, and dynamically
+allocating real shadow memory to back the mappings.
+
+Most mappings in vmalloc space are small, requiring less than a full
+page of shadow space. Allocating a full shadow page per mapping would
+therefore be wasteful. Furthermore, to ensure that different mappings
+use different shadow pages, mappings would have to be aligned to
+``KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE``.
+
+Instead, we share backing space across multiple mappings. We allocate
+a backing page when a mapping in vmalloc space uses a particular page
+of the shadow region. This page can be shared by other vmalloc
+mappings later on.
+
+We hook in to the vmap infrastructure to lazily clean up unused shadow
+memory.
+
+To avoid the difficulties around swapping mappings around, we expect
+that the part of the shadow region that covers the vmalloc space will
+not be covered by the early shadow page, but will be left
+unmapped. This will require changes in arch-specific code.
+
+This allows ``VMAP_STACK`` support on x86, and can simplify support of
+architectures that do not have a fixed module region.
diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index 42b612677799..1c4e1825d769 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -34,6 +34,7 @@ Profiling data will only become accessible once debugfs has been mounted::
 
 Coverage collection
 -------------------
+
 The following program demonstrates coverage collection from within a test
 program using kcov:
 
@@ -128,6 +129,7 @@ only need to enable coverage (disable happens automatically on thread end).
 
 Comparison operands collection
 ------------------------------
+
 Comparison operands collection is similar to coverage collection:
 
 .. code-block:: c
@@ -202,3 +204,130 @@ Comparison operands collection is similar to coverage collection:
 
 Note that the kcov modes (coverage collection or comparison operands) are
 mutually exclusive.
+
+Remote coverage collection
+--------------------------
+
+With KCOV_ENABLE coverage is collected only for syscalls that are issued
+from the current process. With KCOV_REMOTE_ENABLE it's possible to collect
+coverage for arbitrary parts of the kernel code, provided that those parts
+are annotated with kcov_remote_start()/kcov_remote_stop().
+
+This allows to collect coverage from two types of kernel background
+threads: the global ones, that are spawned during kernel boot in a limited
+number of instances (e.g. one USB hub_event() worker thread is spawned per
+USB HCD); and the local ones, that are spawned when a user interacts with
+some kernel interface (e.g. vhost workers).
+
+To enable collecting coverage from a global background thread, a unique
+global handle must be assigned and passed to the corresponding
+kcov_remote_start() call. Then a userspace process can pass a list of such
+handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the
+kcov_remote_arg struct. This will attach the used kcov device to the code
+sections, that are referenced by those handles.
+
+Since there might be many local background threads spawned from different
+userspace processes, we can't use a single global handle per annotation.
+Instead, the userspace process passes a non-zero handle through the
+common_handle field of the kcov_remote_arg struct. This common handle gets
+saved to the kcov_handle field in the current task_struct and needs to be
+passed to the newly spawned threads via custom annotations. Those threads
+should in turn be annotated with kcov_remote_start()/kcov_remote_stop().
+
+Internally kcov stores handles as u64 integers. The top byte of a handle
+is used to denote the id of a subsystem that this handle belongs to, and
+the lower 4 bytes are used to denote the id of a thread instance within
+that subsystem. A reserved value 0 is used as a subsystem id for common
+handles as they don't belong to a particular subsystem. The bytes 4-7 are
+currently reserved and must be zero. In the future the number of bytes
+used for the subsystem or handle ids might be increased.
+
+When a particular userspace proccess collects coverage by via a common
+handle, kcov will collect coverage for each code section that is annotated
+to use the common handle obtained as kcov_handle from the current
+task_struct. However non common handles allow to collect coverage
+selectively from different subsystems.
+
+.. code-block:: c
+
+    struct kcov_remote_arg {
+	__u32		trace_mode;
+	__u32		area_size;
+	__u32		num_handles;
+	__aligned_u64	common_handle;
+	__aligned_u64	handles[0];
+    };
+
+    #define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
+    #define KCOV_DISABLE			_IO('c', 101)
+    #define KCOV_REMOTE_ENABLE		_IOW('c', 102, struct kcov_remote_arg)
+
+    #define COVER_SIZE	(64 << 10)
+
+    #define KCOV_TRACE_PC	0
+
+    #define KCOV_SUBSYSTEM_COMMON	(0x00ull << 56)
+    #define KCOV_SUBSYSTEM_USB	(0x01ull << 56)
+
+    #define KCOV_SUBSYSTEM_MASK	(0xffull << 56)
+    #define KCOV_INSTANCE_MASK	(0xffffffffull)
+
+    static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst)
+    {
+	if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK)
+		return 0;
+	return subsys | inst;
+    }
+
+    #define KCOV_COMMON_ID	0x42
+    #define KCOV_USB_BUS_NUM	1
+
+    int main(int argc, char **argv)
+    {
+	int fd;
+	unsigned long *cover, n, i;
+	struct kcov_remote_arg *arg;
+
+	fd = open("/sys/kernel/debug/kcov", O_RDWR);
+	if (fd == -1)
+		perror("open"), exit(1);
+	if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE))
+		perror("ioctl"), exit(1);
+	cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long),
+				     PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if ((void*)cover == MAP_FAILED)
+		perror("mmap"), exit(1);
+
+	/* Enable coverage collection via common handle and from USB bus #1. */
+	arg = calloc(1, sizeof(*arg) + sizeof(uint64_t));
+	if (!arg)
+		perror("calloc"), exit(1);
+	arg->trace_mode = KCOV_TRACE_PC;
+	arg->area_size = COVER_SIZE;
+	arg->num_handles = 1;
+	arg->common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON,
+							KCOV_COMMON_ID);
+	arg->handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB,
+						KCOV_USB_BUS_NUM);
+	if (ioctl(fd, KCOV_REMOTE_ENABLE, arg))
+		perror("ioctl"), free(arg), exit(1);
+	free(arg);
+
+	/*
+	 * Here the user needs to trigger execution of a kernel code section
+	 * that is either annotated with the common handle, or to trigger some
+	 * activity on USB bus #1.
+	 */
+	sleep(2);
+
+	n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
+	for (i = 0; i < n; i++)
+		printf("0x%lx\n", cover[i + 1]);
+	if (ioctl(fd, KCOV_DISABLE, 0))
+		perror("ioctl"), exit(1);
+	if (munmap(cover, COVER_SIZE * sizeof(unsigned long)))
+		perror("munmap"), exit(1);
+	if (close(fd))
+		perror("close"), exit(1);
+	return 0;
+    }
diff --git a/Documentation/dev-tools/kmemleak.rst b/Documentation/dev-tools/kmemleak.rst
index 3621cd5e1eef..3a289e8a1d12 100644
--- a/Documentation/dev-tools/kmemleak.rst
+++ b/Documentation/dev-tools/kmemleak.rst
@@ -69,7 +69,7 @@ the kernel command line.
 
 Memory may be allocated or freed before kmemleak is initialised and
 these actions are stored in an early log buffer. The size of this buffer
-is configured via the CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE option.
+is configured via the CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE option.
 
 If CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF are enabled, the kmemleak is
 disabled by default. Passing ``kmemleak=on`` on the kernel command
diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst
index ecdfdc9d4b03..61ae13c44f91 100644
--- a/Documentation/dev-tools/kselftest.rst
+++ b/Documentation/dev-tools/kselftest.rst
@@ -203,12 +203,12 @@ Test Module
 Kselftest tests the kernel from userspace.  Sometimes things need
 testing from within the kernel, one method of doing this is to create a
 test module.  We can tie the module into the kselftest framework by
-using a shell script test runner.  ``kselftest_module.sh`` is designed
+using a shell script test runner.  ``kselftest/module.sh`` is designed
 to facilitate this process.  There is also a header file provided to
 assist writing kernel modules that are for use with kselftest:
 
 - ``tools/testing/kselftest/kselftest_module.h``
-- ``tools/testing/kselftest/kselftest_module.sh``
+- ``tools/testing/kselftest/kselftest/module.sh``
 
 How to use
 ----------
@@ -247,7 +247,7 @@ A bare bones test module might look like this:
 
    #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-   #include "../tools/testing/selftests/kselftest_module.h"
+   #include "../tools/testing/selftests/kselftest/module.h"
 
    KSTM_MODULE_GLOBALS();
 
@@ -276,7 +276,7 @@ Example test script
 
     #!/bin/bash
     # SPDX-License-Identifier: GPL-2.0+
-    $(dirname $0)/../kselftest_module.sh "foo" test_foo
+    $(dirname $0)/../kselftest/module.sh "foo" test_foo
 
 
 Test Harness
diff --git a/Documentation/dev-tools/kunit/api/index.rst b/Documentation/dev-tools/kunit/api/index.rst
new file mode 100644
index 000000000000..9b9bffe5d41a
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/index.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+API Reference
+=============
+.. toctree::
+
+	test
+
+This section documents the KUnit kernel testing API. It is divided into the
+following sections:
+
+================================= ==============================================
+:doc:`test`                       documents all of the standard testing API
+                                  excluding mocking or mocking related features.
+================================= ==============================================
diff --git a/Documentation/dev-tools/kunit/api/test.rst b/Documentation/dev-tools/kunit/api/test.rst
new file mode 100644
index 000000000000..aaa97f17e5b3
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/test.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
+Test API
+========
+
+This file documents all of the standard testing API excluding mocking or mocking
+related features.
+
+.. kernel-doc:: include/kunit/test.h
+   :internal:
diff --git a/Documentation/dev-tools/kunit/faq.rst b/Documentation/dev-tools/kunit/faq.rst
new file mode 100644
index 000000000000..ea55b2467653
--- /dev/null
+++ b/Documentation/dev-tools/kunit/faq.rst
@@ -0,0 +1,63 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Frequently Asked Questions
+==========================
+
+How is this different from Autotest, kselftest, etc?
+====================================================
+KUnit is a unit testing framework. Autotest, kselftest (and some others) are
+not.
+
+A `unit test <https://martinfowler.com/bliki/UnitTest.html>`_ is supposed to
+test a single unit of code in isolation, hence the name. A unit test should be
+the finest granularity of testing and as such should allow all possible code
+paths to be tested in the code under test; this is only possible if the code
+under test is very small and does not have any external dependencies outside of
+the test's control like hardware.
+
+There are no testing frameworks currently available for the kernel that do not
+require installing the kernel on a test machine or in a VM and all require
+tests to be written in userspace and run on the kernel under test; this is true
+for Autotest, kselftest, and some others, disqualifying any of them from being
+considered unit testing frameworks.
+
+Does KUnit support running on architectures other than UML?
+===========================================================
+
+Yes, well, mostly.
+
+For the most part, the KUnit core framework (what you use to write the tests)
+can compile to any architecture; it compiles like just another part of the
+kernel and runs when the kernel boots, or when built as a module, when the
+module is loaded.  However, there is some infrastructure,
+like the KUnit Wrapper (``tools/testing/kunit/kunit.py``) that does not support
+other architectures.
+
+In short, this means that, yes, you can run KUnit on other architectures, but
+it might require more work than using KUnit on UML.
+
+For more information, see :ref:`kunit-on-non-uml`.
+
+What is the difference between a unit test and these other kinds of tests?
+==========================================================================
+Most existing tests for the Linux kernel would be categorized as an integration
+test, or an end-to-end test.
+
+- A unit test is supposed to test a single unit of code in isolation, hence the
+  name. A unit test should be the finest granularity of testing and as such
+  should allow all possible code paths to be tested in the code under test; this
+  is only possible if the code under test is very small and does not have any
+  external dependencies outside of the test's control like hardware.
+- An integration test tests the interaction between a minimal set of components,
+  usually just two or three. For example, someone might write an integration
+  test to test the interaction between a driver and a piece of hardware, or to
+  test the interaction between the userspace libraries the kernel provides and
+  the kernel itself; however, one of these tests would probably not test the
+  entire kernel along with hardware interactions and interactions with the
+  userspace.
+- An end-to-end test usually tests the entire system from the perspective of the
+  code under test. For example, someone might write an end-to-end test for the
+  kernel by installing a production configuration of the kernel on production
+  hardware with a production userspace and then trying to exercise some behavior
+  that depends on interactions between the hardware, the kernel, and userspace.
diff --git a/Documentation/dev-tools/kunit/index.rst b/Documentation/dev-tools/kunit/index.rst
new file mode 100644
index 000000000000..d16a4d2c3a41
--- /dev/null
+++ b/Documentation/dev-tools/kunit/index.rst
@@ -0,0 +1,83 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+KUnit - Unit Testing for the Linux Kernel
+=========================================
+
+.. toctree::
+	:maxdepth: 2
+
+	start
+	usage
+	kunit-tool
+	api/index
+	faq
+
+What is KUnit?
+==============
+
+KUnit is a lightweight unit testing and mocking framework for the Linux kernel.
+These tests are able to be run locally on a developer's workstation without a VM
+or special hardware.
+
+KUnit is heavily inspired by JUnit, Python's unittest.mock, and
+Googletest/Googlemock for C++. KUnit provides facilities for defining unit test
+cases, grouping related test cases into test suites, providing common
+infrastructure for running tests, and much more.
+
+Get started now: :doc:`start`
+
+Why KUnit?
+==========
+
+A unit test is supposed to test a single unit of code in isolation, hence the
+name. A unit test should be the finest granularity of testing and as such should
+allow all possible code paths to be tested in the code under test; this is only
+possible if the code under test is very small and does not have any external
+dependencies outside of the test's control like hardware.
+
+Outside of KUnit, there are no testing frameworks currently
+available for the kernel that do not require installing the kernel on a test
+machine or in a VM and all require tests to be written in userspace running on
+the kernel; this is true for Autotest, and kselftest, disqualifying
+any of them from being considered unit testing frameworks.
+
+KUnit addresses the problem of being able to run tests without needing a virtual
+machine or actual hardware with User Mode Linux. User Mode Linux is a Linux
+architecture, like ARM or x86; however, unlike other architectures it compiles
+to a standalone program that can be run like any other program directly inside
+of a host operating system; to be clear, it does not require any virtualization
+support; it is just a regular program.
+
+Alternatively, kunit and kunit tests can be built as modules and tests will
+run when the test module is loaded.
+
+KUnit is fast. Excluding build time, from invocation to completion KUnit can run
+several dozen tests in only 10 to 20 seconds; this might not sound like a big
+deal to some people, but having such fast and easy to run tests fundamentally
+changes the way you go about testing and even writing code in the first place.
+Linus himself said in his `git talk at Google
+<https://gist.github.com/lorn/1272686/revisions#diff-53c65572127855f1b003db4064a94573R874>`_:
+
+	"... a lot of people seem to think that performance is about doing the
+	same thing, just doing it faster, and that is not true. That is not what
+	performance is all about. If you can do something really fast, really
+	well, people will start using it differently."
+
+In this context Linus was talking about branching and merging,
+but this point also applies to testing. If your tests are slow, unreliable, are
+difficult to write, and require a special setup or special hardware to run,
+then you wait a lot longer to write tests, and you wait a lot longer to run
+tests; this means that tests are likely to break, unlikely to test a lot of
+things, and are unlikely to be rerun once they pass. If your tests are really
+fast, you run them all the time, every time you make a change, and every time
+someone sends you some code. Why trust that someone ran all their tests
+correctly on every change when you can just run them yourself in less time than
+it takes to read their test log?
+
+How do I use it?
+================
+
+*   :doc:`start` - for new users of KUnit
+*   :doc:`usage` - for a more detailed explanation of KUnit features
+*   :doc:`api/index` - for the list of KUnit APIs used for testing
diff --git a/Documentation/dev-tools/kunit/kunit-tool.rst b/Documentation/dev-tools/kunit/kunit-tool.rst
new file mode 100644
index 000000000000..50d46394e97e
--- /dev/null
+++ b/Documentation/dev-tools/kunit/kunit-tool.rst
@@ -0,0 +1,57 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+kunit_tool How-To
+=================
+
+What is kunit_tool?
+===================
+
+kunit_tool is a script (``tools/testing/kunit/kunit.py``) that aids in building
+the Linux kernel as UML (`User Mode Linux
+<http://user-mode-linux.sourceforge.net/>`_), running KUnit tests, parsing
+the test results and displaying them in a user friendly manner.
+
+What is a kunitconfig?
+======================
+
+It's just a defconfig that kunit_tool looks for in the base directory.
+kunit_tool uses it to generate a .config as you might expect. In addition, it
+verifies that the generated .config contains the CONFIG options in the
+kunitconfig; the reason it does this is so that it is easy to be sure that a
+CONFIG that enables a test actually ends up in the .config.
+
+How do I use kunit_tool?
+========================
+
+If a kunitconfig is present at the root directory, all you have to do is:
+
+.. code-block:: bash
+
+	./tools/testing/kunit/kunit.py run
+
+However, you most likely want to use it with the following options:
+
+.. code-block:: bash
+
+	./tools/testing/kunit/kunit.py run --timeout=30 --jobs=`nproc --all`
+
+- ``--timeout`` sets a maximum amount of time to allow tests to run.
+- ``--jobs`` sets the number of threads to use to build the kernel.
+
+If you just want to use the defconfig that ships with the kernel, you can
+append the ``--defconfig`` flag as well:
+
+.. code-block:: bash
+
+	./tools/testing/kunit/kunit.py run --timeout=30 --jobs=`nproc --all` --defconfig
+
+.. note::
+	This command is particularly helpful for getting started because it
+	just works. No kunitconfig needs to be present.
+
+For a list of all the flags supported by kunit_tool, you can run:
+
+.. code-block:: bash
+
+	./tools/testing/kunit/kunit.py run --help
diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst
new file mode 100644
index 000000000000..4e1d24db6b13
--- /dev/null
+++ b/Documentation/dev-tools/kunit/start.rst
@@ -0,0 +1,180 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Getting Started
+===============
+
+Installing dependencies
+=======================
+KUnit has the same dependencies as the Linux kernel. As long as you can build
+the kernel, you can run KUnit.
+
+KUnit Wrapper
+=============
+Included with KUnit is a simple Python wrapper that helps format the output to
+easily use and read KUnit output. It handles building and running the kernel, as
+well as formatting the output.
+
+The wrapper can be run with:
+
+.. code-block:: bash
+
+	./tools/testing/kunit/kunit.py run --defconfig
+
+For more information on this wrapper (also called kunit_tool) checkout the
+:doc:`kunit-tool` page.
+
+Creating a .kunitconfig
+=======================
+The Python script is a thin wrapper around Kbuild. As such, it needs to be
+configured with a ``.kunitconfig`` file. This file essentially contains the
+regular Kernel config, with the specific test targets as well.
+
+.. code-block:: bash
+
+	cd $PATH_TO_LINUX_REPO
+	cp arch/um/configs/kunit_defconfig .kunitconfig
+
+Verifying KUnit Works
+---------------------
+
+To make sure that everything is set up correctly, simply invoke the Python
+wrapper from your kernel repo:
+
+.. code-block:: bash
+
+	./tools/testing/kunit/kunit.py run
+
+.. note::
+   You may want to run ``make mrproper`` first.
+
+If everything worked correctly, you should see the following:
+
+.. code-block:: bash
+
+	Generating .config ...
+	Building KUnit Kernel ...
+	Starting KUnit Kernel ...
+
+followed by a list of tests that are run. All of them should be passing.
+
+.. note::
+	Because it is building a lot of sources for the first time, the
+	``Building KUnit kernel`` step may take a while.
+
+Writing your first test
+=======================
+
+In your kernel repo let's add some code that we can test. Create a file
+``drivers/misc/example.h`` with the contents:
+
+.. code-block:: c
+
+	int misc_example_add(int left, int right);
+
+create a file ``drivers/misc/example.c``:
+
+.. code-block:: c
+
+	#include <linux/errno.h>
+
+	#include "example.h"
+
+	int misc_example_add(int left, int right)
+	{
+		return left + right;
+	}
+
+Now add the following lines to ``drivers/misc/Kconfig``:
+
+.. code-block:: kconfig
+
+	config MISC_EXAMPLE
+		bool "My example"
+
+and the following lines to ``drivers/misc/Makefile``:
+
+.. code-block:: make
+
+	obj-$(CONFIG_MISC_EXAMPLE) += example.o
+
+Now we are ready to write the test. The test will be in
+``drivers/misc/example-test.c``:
+
+.. code-block:: c
+
+	#include <kunit/test.h>
+	#include "example.h"
+
+	/* Define the test cases. */
+
+	static void misc_example_add_test_basic(struct kunit *test)
+	{
+		KUNIT_EXPECT_EQ(test, 1, misc_example_add(1, 0));
+		KUNIT_EXPECT_EQ(test, 2, misc_example_add(1, 1));
+		KUNIT_EXPECT_EQ(test, 0, misc_example_add(-1, 1));
+		KUNIT_EXPECT_EQ(test, INT_MAX, misc_example_add(0, INT_MAX));
+		KUNIT_EXPECT_EQ(test, -1, misc_example_add(INT_MAX, INT_MIN));
+	}
+
+	static void misc_example_test_failure(struct kunit *test)
+	{
+		KUNIT_FAIL(test, "This test never passes.");
+	}
+
+	static struct kunit_case misc_example_test_cases[] = {
+		KUNIT_CASE(misc_example_add_test_basic),
+		KUNIT_CASE(misc_example_test_failure),
+		{}
+	};
+
+	static struct kunit_suite misc_example_test_suite = {
+		.name = "misc-example",
+		.test_cases = misc_example_test_cases,
+	};
+	kunit_test_suite(misc_example_test_suite);
+
+Now add the following to ``drivers/misc/Kconfig``:
+
+.. code-block:: kconfig
+
+	config MISC_EXAMPLE_TEST
+		bool "Test for my example"
+		depends on MISC_EXAMPLE && KUNIT
+
+and the following to ``drivers/misc/Makefile``:
+
+.. code-block:: make
+
+	obj-$(CONFIG_MISC_EXAMPLE_TEST) += example-test.o
+
+Now add it to your ``.kunitconfig``:
+
+.. code-block:: none
+
+	CONFIG_MISC_EXAMPLE=y
+	CONFIG_MISC_EXAMPLE_TEST=y
+
+Now you can run the test:
+
+.. code-block:: bash
+
+	./tools/testing/kunit/kunit.py run
+
+You should see the following failure:
+
+.. code-block:: none
+
+	...
+	[16:08:57] [PASSED] misc-example:misc_example_add_test_basic
+	[16:08:57] [FAILED] misc-example:misc_example_test_failure
+	[16:08:57] EXPECTATION FAILED at drivers/misc/example-test.c:17
+	[16:08:57] 	This test never passes.
+	...
+
+Congrats! You just wrote your first KUnit test!
+
+Next Steps
+==========
+*   Check out the :doc:`usage` page for a more
+    in-depth explanation of KUnit.
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
new file mode 100644
index 000000000000..7cd56a1993b1
--- /dev/null
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -0,0 +1,592 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+Using KUnit
+===========
+
+The purpose of this document is to describe what KUnit is, how it works, how it
+is intended to be used, and all the concepts and terminology that are needed to
+understand it. This guide assumes a working knowledge of the Linux kernel and
+some basic knowledge of testing.
+
+For a high level introduction to KUnit, including setting up KUnit for your
+project, see :doc:`start`.
+
+Organization of this document
+=============================
+
+This document is organized into two main sections: Testing and Isolating
+Behavior. The first covers what unit tests are and how to use KUnit to write
+them. The second covers how to use KUnit to isolate code and make it possible
+to unit test code that was otherwise un-unit-testable.
+
+Testing
+=======
+
+What is KUnit?
+--------------
+
+"K" is short for "kernel" so "KUnit" is the "(Linux) Kernel Unit Testing
+Framework." KUnit is intended first and foremost for writing unit tests; it is
+general enough that it can be used to write integration tests; however, this is
+a secondary goal. KUnit has no ambition of being the only testing framework for
+the kernel; for example, it does not intend to be an end-to-end testing
+framework.
+
+What is Unit Testing?
+---------------------
+
+A `unit test <https://martinfowler.com/bliki/UnitTest.html>`_ is a test that
+tests code at the smallest possible scope, a *unit* of code. In the C
+programming language that's a function.
+
+Unit tests should be written for all the publicly exposed functions in a
+compilation unit; so that is all the functions that are exported in either a
+*class* (defined below) or all functions which are **not** static.
+
+Writing Tests
+-------------
+
+Test Cases
+~~~~~~~~~~
+
+The fundamental unit in KUnit is the test case. A test case is a function with
+the signature ``void (*)(struct kunit *test)``. It calls a function to be tested
+and then sets *expectations* for what should happen. For example:
+
+.. code-block:: c
+
+	void example_test_success(struct kunit *test)
+	{
+	}
+
+	void example_test_failure(struct kunit *test)
+	{
+		KUNIT_FAIL(test, "This test never passes.");
+	}
+
+In the above example ``example_test_success`` always passes because it does
+nothing; no expectations are set, so all expectations pass. On the other hand
+``example_test_failure`` always fails because it calls ``KUNIT_FAIL``, which is
+a special expectation that logs a message and causes the test case to fail.
+
+Expectations
+~~~~~~~~~~~~
+An *expectation* is a way to specify that you expect a piece of code to do
+something in a test. An expectation is called like a function. A test is made
+by setting expectations about the behavior of a piece of code under test; when
+one or more of the expectations fail, the test case fails and information about
+the failure is logged. For example:
+
+.. code-block:: c
+
+	void add_test_basic(struct kunit *test)
+	{
+		KUNIT_EXPECT_EQ(test, 1, add(1, 0));
+		KUNIT_EXPECT_EQ(test, 2, add(1, 1));
+	}
+
+In the above example ``add_test_basic`` makes a number of assertions about the
+behavior of a function called ``add``; the first parameter is always of type
+``struct kunit *``, which contains information about the current test context;
+the second parameter, in this case, is what the value is expected to be; the
+last value is what the value actually is. If ``add`` passes all of these
+expectations, the test case, ``add_test_basic`` will pass; if any one of these
+expectations fail, the test case will fail.
+
+It is important to understand that a test case *fails* when any expectation is
+violated; however, the test will continue running, potentially trying other
+expectations until the test case ends or is otherwise terminated. This is as
+opposed to *assertions* which are discussed later.
+
+To learn about more expectations supported by KUnit, see :doc:`api/test`.
+
+.. note::
+   A single test case should be pretty short, pretty easy to understand,
+   focused on a single behavior.
+
+For example, if we wanted to properly test the add function above, we would
+create additional tests cases which would each test a different property that an
+add function should have like this:
+
+.. code-block:: c
+
+	void add_test_basic(struct kunit *test)
+	{
+		KUNIT_EXPECT_EQ(test, 1, add(1, 0));
+		KUNIT_EXPECT_EQ(test, 2, add(1, 1));
+	}
+
+	void add_test_negative(struct kunit *test)
+	{
+		KUNIT_EXPECT_EQ(test, 0, add(-1, 1));
+	}
+
+	void add_test_max(struct kunit *test)
+	{
+		KUNIT_EXPECT_EQ(test, INT_MAX, add(0, INT_MAX));
+		KUNIT_EXPECT_EQ(test, -1, add(INT_MAX, INT_MIN));
+	}
+
+	void add_test_overflow(struct kunit *test)
+	{
+		KUNIT_EXPECT_EQ(test, INT_MIN, add(INT_MAX, 1));
+	}
+
+Notice how it is immediately obvious what all the properties that we are testing
+for are.
+
+Assertions
+~~~~~~~~~~
+
+KUnit also has the concept of an *assertion*. An assertion is just like an
+expectation except the assertion immediately terminates the test case if it is
+not satisfied.
+
+For example:
+
+.. code-block:: c
+
+	static void mock_test_do_expect_default_return(struct kunit *test)
+	{
+		struct mock_test_context *ctx = test->priv;
+		struct mock *mock = ctx->mock;
+		int param0 = 5, param1 = -5;
+		const char *two_param_types[] = {"int", "int"};
+		const void *two_params[] = {&param0, &param1};
+		const void *ret;
+
+		ret = mock->do_expect(mock,
+				      "test_printk", test_printk,
+				      two_param_types, two_params,
+				      ARRAY_SIZE(two_params));
+		KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ret);
+		KUNIT_EXPECT_EQ(test, -4, *((int *) ret));
+	}
+
+In this example, the method under test should return a pointer to a value, so
+if the pointer returned by the method is null or an errno, we don't want to
+bother continuing the test since the following expectation could crash the test
+case. `ASSERT_NOT_ERR_OR_NULL(...)` allows us to bail out of the test case if
+the appropriate conditions have not been satisfied to complete the test.
+
+Test Suites
+~~~~~~~~~~~
+
+Now obviously one unit test isn't very helpful; the power comes from having
+many test cases covering all of a unit's behaviors. Consequently it is common
+to have many *similar* tests; in order to reduce duplication in these closely
+related tests most unit testing frameworks - including KUnit - provide the
+concept of a *test suite*. A *test suite* is just a collection of test cases
+for a unit of code with a set up function that gets invoked before every test
+case and then a tear down function that gets invoked after every test case
+completes.
+
+Example:
+
+.. code-block:: c
+
+	static struct kunit_case example_test_cases[] = {
+		KUNIT_CASE(example_test_foo),
+		KUNIT_CASE(example_test_bar),
+		KUNIT_CASE(example_test_baz),
+		{}
+	};
+
+	static struct kunit_suite example_test_suite = {
+		.name = "example",
+		.init = example_test_init,
+		.exit = example_test_exit,
+		.test_cases = example_test_cases,
+	};
+	kunit_test_suite(example_test_suite);
+
+In the above example the test suite, ``example_test_suite``, would run the test
+cases ``example_test_foo``, ``example_test_bar``, and ``example_test_baz``,
+each would have ``example_test_init`` called immediately before it and would
+have ``example_test_exit`` called immediately after it.
+``kunit_test_suite(example_test_suite)`` registers the test suite with the
+KUnit test framework.
+
+.. note::
+   A test case will only be run if it is associated with a test suite.
+
+For more information on these types of things see the :doc:`api/test`.
+
+Isolating Behavior
+==================
+
+The most important aspect of unit testing that other forms of testing do not
+provide is the ability to limit the amount of code under test to a single unit.
+In practice, this is only possible by being able to control what code gets run
+when the unit under test calls a function and this is usually accomplished
+through some sort of indirection where a function is exposed as part of an API
+such that the definition of that function can be changed without affecting the
+rest of the code base. In the kernel this primarily comes from two constructs,
+classes, structs that contain function pointers that are provided by the
+implementer, and architecture specific functions which have definitions selected
+at compile time.
+
+Classes
+-------
+
+Classes are not a construct that is built into the C programming language;
+however, it is an easily derived concept. Accordingly, pretty much every project
+that does not use a standardized object oriented library (like GNOME's GObject)
+has their own slightly different way of doing object oriented programming; the
+Linux kernel is no exception.
+
+The central concept in kernel object oriented programming is the class. In the
+kernel, a *class* is a struct that contains function pointers. This creates a
+contract between *implementers* and *users* since it forces them to use the
+same function signature without having to call the function directly. In order
+for it to truly be a class, the function pointers must specify that a pointer
+to the class, known as a *class handle*, be one of the parameters; this makes
+it possible for the member functions (also known as *methods*) to have access
+to member variables (more commonly known as *fields*) allowing the same
+implementation to have multiple *instances*.
+
+Typically a class can be *overridden* by *child classes* by embedding the
+*parent class* in the child class. Then when a method provided by the child
+class is called, the child implementation knows that the pointer passed to it is
+of a parent contained within the child; because of this, the child can compute
+the pointer to itself because the pointer to the parent is always a fixed offset
+from the pointer to the child; this offset is the offset of the parent contained
+in the child struct. For example:
+
+.. code-block:: c
+
+	struct shape {
+		int (*area)(struct shape *this);
+	};
+
+	struct rectangle {
+		struct shape parent;
+		int length;
+		int width;
+	};
+
+	int rectangle_area(struct shape *this)
+	{
+		struct rectangle *self = container_of(this, struct shape, parent);
+
+		return self->length * self->width;
+	};
+
+	void rectangle_new(struct rectangle *self, int length, int width)
+	{
+		self->parent.area = rectangle_area;
+		self->length = length;
+		self->width = width;
+	}
+
+In this example (as in most kernel code) the operation of computing the pointer
+to the child from the pointer to the parent is done by ``container_of``.
+
+Faking Classes
+~~~~~~~~~~~~~~
+
+In order to unit test a piece of code that calls a method in a class, the
+behavior of the method must be controllable, otherwise the test ceases to be a
+unit test and becomes an integration test.
+
+A fake just provides an implementation of a piece of code that is different than
+what runs in a production instance, but behaves identically from the standpoint
+of the callers; this is usually done to replace a dependency that is hard to
+deal with, or is slow.
+
+A good example for this might be implementing a fake EEPROM that just stores the
+"contents" in an internal buffer. For example, let's assume we have a class that
+represents an EEPROM:
+
+.. code-block:: c
+
+	struct eeprom {
+		ssize_t (*read)(struct eeprom *this, size_t offset, char *buffer, size_t count);
+		ssize_t (*write)(struct eeprom *this, size_t offset, const char *buffer, size_t count);
+	};
+
+And we want to test some code that buffers writes to the EEPROM:
+
+.. code-block:: c
+
+	struct eeprom_buffer {
+		ssize_t (*write)(struct eeprom_buffer *this, const char *buffer, size_t count);
+		int flush(struct eeprom_buffer *this);
+		size_t flush_count; /* Flushes when buffer exceeds flush_count. */
+	};
+
+	struct eeprom_buffer *new_eeprom_buffer(struct eeprom *eeprom);
+	void destroy_eeprom_buffer(struct eeprom *eeprom);
+
+We can easily test this code by *faking out* the underlying EEPROM:
+
+.. code-block:: c
+
+	struct fake_eeprom {
+		struct eeprom parent;
+		char contents[FAKE_EEPROM_CONTENTS_SIZE];
+	};
+
+	ssize_t fake_eeprom_read(struct eeprom *parent, size_t offset, char *buffer, size_t count)
+	{
+		struct fake_eeprom *this = container_of(parent, struct fake_eeprom, parent);
+
+		count = min(count, FAKE_EEPROM_CONTENTS_SIZE - offset);
+		memcpy(buffer, this->contents + offset, count);
+
+		return count;
+	}
+
+	ssize_t fake_eeprom_write(struct eeprom *parent, size_t offset, const char *buffer, size_t count)
+	{
+		struct fake_eeprom *this = container_of(parent, struct fake_eeprom, parent);
+
+		count = min(count, FAKE_EEPROM_CONTENTS_SIZE - offset);
+		memcpy(this->contents + offset, buffer, count);
+
+		return count;
+	}
+
+	void fake_eeprom_init(struct fake_eeprom *this)
+	{
+		this->parent.read = fake_eeprom_read;
+		this->parent.write = fake_eeprom_write;
+		memset(this->contents, 0, FAKE_EEPROM_CONTENTS_SIZE);
+	}
+
+We can now use it to test ``struct eeprom_buffer``:
+
+.. code-block:: c
+
+	struct eeprom_buffer_test {
+		struct fake_eeprom *fake_eeprom;
+		struct eeprom_buffer *eeprom_buffer;
+	};
+
+	static void eeprom_buffer_test_does_not_write_until_flush(struct kunit *test)
+	{
+		struct eeprom_buffer_test *ctx = test->priv;
+		struct eeprom_buffer *eeprom_buffer = ctx->eeprom_buffer;
+		struct fake_eeprom *fake_eeprom = ctx->fake_eeprom;
+		char buffer[] = {0xff};
+
+		eeprom_buffer->flush_count = SIZE_MAX;
+
+		eeprom_buffer->write(eeprom_buffer, buffer, 1);
+		KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0);
+
+		eeprom_buffer->write(eeprom_buffer, buffer, 1);
+		KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0);
+
+		eeprom_buffer->flush(eeprom_buffer);
+		KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0xff);
+		KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0xff);
+	}
+
+	static void eeprom_buffer_test_flushes_after_flush_count_met(struct kunit *test)
+	{
+		struct eeprom_buffer_test *ctx = test->priv;
+		struct eeprom_buffer *eeprom_buffer = ctx->eeprom_buffer;
+		struct fake_eeprom *fake_eeprom = ctx->fake_eeprom;
+		char buffer[] = {0xff};
+
+		eeprom_buffer->flush_count = 2;
+
+		eeprom_buffer->write(eeprom_buffer, buffer, 1);
+		KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0);
+
+		eeprom_buffer->write(eeprom_buffer, buffer, 1);
+		KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0xff);
+		KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0xff);
+	}
+
+	static void eeprom_buffer_test_flushes_increments_of_flush_count(struct kunit *test)
+	{
+		struct eeprom_buffer_test *ctx = test->priv;
+		struct eeprom_buffer *eeprom_buffer = ctx->eeprom_buffer;
+		struct fake_eeprom *fake_eeprom = ctx->fake_eeprom;
+		char buffer[] = {0xff, 0xff};
+
+		eeprom_buffer->flush_count = 2;
+
+		eeprom_buffer->write(eeprom_buffer, buffer, 1);
+		KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0);
+
+		eeprom_buffer->write(eeprom_buffer, buffer, 2);
+		KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0xff);
+		KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0xff);
+		/* Should have only flushed the first two bytes. */
+		KUNIT_EXPECT_EQ(test, fake_eeprom->contents[2], 0);
+	}
+
+	static int eeprom_buffer_test_init(struct kunit *test)
+	{
+		struct eeprom_buffer_test *ctx;
+
+		ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+		KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+
+		ctx->fake_eeprom = kunit_kzalloc(test, sizeof(*ctx->fake_eeprom), GFP_KERNEL);
+		KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx->fake_eeprom);
+		fake_eeprom_init(ctx->fake_eeprom);
+
+		ctx->eeprom_buffer = new_eeprom_buffer(&ctx->fake_eeprom->parent);
+		KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx->eeprom_buffer);
+
+		test->priv = ctx;
+
+		return 0;
+	}
+
+	static void eeprom_buffer_test_exit(struct kunit *test)
+	{
+		struct eeprom_buffer_test *ctx = test->priv;
+
+		destroy_eeprom_buffer(ctx->eeprom_buffer);
+	}
+
+.. _kunit-on-non-uml:
+
+KUnit on non-UML architectures
+==============================
+
+By default KUnit uses UML as a way to provide dependencies for code under test.
+Under most circumstances KUnit's usage of UML should be treated as an
+implementation detail of how KUnit works under the hood. Nevertheless, there
+are instances where being able to run architecture specific code or test
+against real hardware is desirable. For these reasons KUnit supports running on
+other architectures.
+
+Running existing KUnit tests on non-UML architectures
+-----------------------------------------------------
+
+There are some special considerations when running existing KUnit tests on
+non-UML architectures:
+
+*   Hardware may not be deterministic, so a test that always passes or fails
+    when run under UML may not always do so on real hardware.
+*   Hardware and VM environments may not be hermetic. KUnit tries its best to
+    provide a hermetic environment to run tests; however, it cannot manage state
+    that it doesn't know about outside of the kernel. Consequently, tests that
+    may be hermetic on UML may not be hermetic on other architectures.
+*   Some features and tooling may not be supported outside of UML.
+*   Hardware and VMs are slower than UML.
+
+None of these are reasons not to run your KUnit tests on real hardware; they are
+only things to be aware of when doing so.
+
+The biggest impediment will likely be that certain KUnit features and
+infrastructure may not support your target environment. For example, at this
+time the KUnit Wrapper (``tools/testing/kunit/kunit.py``) does not work outside
+of UML. Unfortunately, there is no way around this. Using UML (or even just a
+particular architecture) allows us to make a lot of assumptions that make it
+possible to do things which might otherwise be impossible.
+
+Nevertheless, all core KUnit framework features are fully supported on all
+architectures, and using them is straightforward: all you need to do is to take
+your kunitconfig, your Kconfig options for the tests you would like to run, and
+merge them into whatever config your are using for your platform. That's it!
+
+For example, let's say you have the following kunitconfig:
+
+.. code-block:: none
+
+	CONFIG_KUNIT=y
+	CONFIG_KUNIT_EXAMPLE_TEST=y
+
+If you wanted to run this test on an x86 VM, you might add the following config
+options to your ``.config``:
+
+.. code-block:: none
+
+	CONFIG_KUNIT=y
+	CONFIG_KUNIT_EXAMPLE_TEST=y
+	CONFIG_SERIAL_8250=y
+	CONFIG_SERIAL_8250_CONSOLE=y
+
+All these new options do is enable support for a common serial console needed
+for logging.
+
+Next, you could build a kernel with these tests as follows:
+
+
+.. code-block:: bash
+
+	make ARCH=x86 olddefconfig
+	make ARCH=x86
+
+Once you have built a kernel, you could run it on QEMU as follows:
+
+.. code-block:: bash
+
+	qemu-system-x86_64 -enable-kvm \
+			   -m 1024 \
+			   -kernel arch/x86_64/boot/bzImage \
+			   -append 'console=ttyS0' \
+			   --nographic
+
+Interspersed in the kernel logs you might see the following:
+
+.. code-block:: none
+
+	TAP version 14
+		# Subtest: example
+		1..1
+		# example_simple_test: initializing
+		ok 1 - example_simple_test
+	ok 1 - example
+
+Congratulations, you just ran a KUnit test on the x86 architecture!
+
+In a similar manner, kunit and kunit tests can also be built as modules,
+so if you wanted to run tests in this way you might add the following config
+options to your ``.config``:
+
+.. code-block:: none
+
+	CONFIG_KUNIT=m
+	CONFIG_KUNIT_EXAMPLE_TEST=m
+
+Once the kernel is built and installed, a simple
+
+.. code-block:: bash
+	modprobe example-test
+
+...will run the tests.
+
+Writing new tests for other architectures
+-----------------------------------------
+
+The first thing you must do is ask yourself whether it is necessary to write a
+KUnit test for a specific architecture, and then whether it is necessary to
+write that test for a particular piece of hardware. In general, writing a test
+that depends on having access to a particular piece of hardware or software (not
+included in the Linux source repo) should be avoided at all costs.
+
+Even if you only ever plan on running your KUnit test on your hardware
+configuration, other people may want to run your tests and may not have access
+to your hardware. If you write your test to run on UML, then anyone can run your
+tests without knowing anything about your particular setup, and you can still
+run your tests on your hardware setup just by compiling for your architecture.
+
+.. important::
+   Always prefer tests that run on UML to tests that only run under a particular
+   architecture, and always prefer tests that run under QEMU or another easy
+   (and monetarily free) to obtain software environment to a specific piece of
+   hardware.
+
+Nevertheless, there are still valid reasons to write an architecture or hardware
+specific test: for example, you might want to test some code that really belongs
+in ``arch/some-arch/*``. Even so, try your best to write the test so that it
+does not depend on physical hardware: if some of your test cases don't need the
+hardware, only require the hardware for tests that actually need it.
+
+Now that you have narrowed down exactly what bits are hardware specific, the
+actual procedure for writing and running the tests is pretty much the same as
+writing normal KUnit tests. One special caveat is that you have to reset
+hardware state in between test cases; if this is not possible, you may only be
+able to run one test case per invocation.
+
+.. TODO(brendanhiggins@google.com): Add an actual example of an architecture
+   dependent KUnit test.
diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile
index 5138a2f6232a..646cb3525373 100644
--- a/Documentation/devicetree/bindings/Makefile
+++ b/Documentation/devicetree/bindings/Makefile
@@ -12,7 +12,6 @@ $(obj)/%.example.dts: $(src)/%.yaml FORCE
 	$(call if_changed,chk_binding)
 
 DT_TMP_SCHEMA := processed-schema.yaml
-extra-y += $(DT_TMP_SCHEMA)
 
 quiet_cmd_mk_schema = SCHEMA  $@
       cmd_mk_schema = $(DT_MK_SCHEMA) $(DT_MK_SCHEMA_FLAGS) -o $@ $(real-prereqs)
@@ -26,8 +25,12 @@ DT_DOCS = $(shell \
 
 DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS))
 
+ifeq ($(CHECK_DTBS),)
 extra-y += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
 extra-y += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES))
+endif
 
 $(obj)/$(DT_TMP_SCHEMA): $(DT_SCHEMA_FILES) FORCE
 	$(call if_changed,mk_schema)
+
+extra-y += $(DT_TMP_SCHEMA)
diff --git a/Documentation/devicetree/bindings/arm/amlogic.yaml b/Documentation/devicetree/bindings/arm/amlogic.yaml
index 99015cef8bb1..c6a443352ef8 100644
--- a/Documentation/devicetree/bindings/arm/amlogic.yaml
+++ b/Documentation/devicetree/bindings/arm/amlogic.yaml
@@ -94,7 +94,7 @@ properties:
               - amlogic,p212
               - hwacom,amazetv
               - khadas,vim
-              - libretech,cc
+              - libretech,aml-s905x-cc
               - nexbox,a95x
           - const: amlogic,s905x
           - const: amlogic,meson-gxl
@@ -147,6 +147,7 @@ properties:
           - enum:
               - hardkernel,odroid-n2
               - khadas,vim3
+              - ugoos,am6
           - const: amlogic,s922x
           - const: amlogic,g12b
 
@@ -156,4 +157,10 @@ properties:
               - seirobotics,sei610
               - khadas,vim3l
           - const: amlogic,sm1
+
+      - description: Boards with the Amlogic Meson A1 A113L SoC
+        items:
+          - enum:
+              - amlogic,ad401
+          - const: amlogic,a1
 ...
diff --git a/Documentation/devicetree/bindings/arm/amlogic/smp-sram.txt b/Documentation/devicetree/bindings/arm/amlogic/smp-sram.txt
deleted file mode 100644
index 3473ddaadfac..000000000000
--- a/Documentation/devicetree/bindings/arm/amlogic/smp-sram.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Amlogic Meson8 and Meson8b SRAM for smp bringup:
-------------------------------------------------
-
-Amlogic's SMP-capable SoCs use part of the sram for the bringup of the cores.
-Once the core gets powered up it executes the code that is residing at a
-specific location.
-
-Therefore a reserved section sub-node has to be added to the mmio-sram
-declaration.
-
-Required sub-node properties:
-- compatible : depending on the SoC this should be one of:
-		"amlogic,meson8-smp-sram"
-		"amlogic,meson8b-smp-sram"
-
-The rest of the properties should follow the generic mmio-sram discription
-found in ../../misc/sram.txt
-
-Example:
-
-	sram: sram@d9000000 {
-		compatible = "mmio-sram";
-		reg = <0xd9000000 0x20000>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges = <0 0xd9000000 0x20000>;
-
-		smp-sram@1ff80 {
-			compatible = "amlogic,meson8b-smp-sram";
-			reg = <0x1ff80 0x8>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/arm/arm,scmi.txt b/Documentation/devicetree/bindings/arm/arm,scmi.txt
index 083dbf96ee00..f493d69e6194 100644
--- a/Documentation/devicetree/bindings/arm/arm,scmi.txt
+++ b/Documentation/devicetree/bindings/arm/arm,scmi.txt
@@ -100,7 +100,7 @@ Required sub-node properties:
 
 [0] http://infocenter.arm.com/help/topic/com.arm.doc.den0056a/index.html
 [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/power/power_domain.txt
+[2] Documentation/devicetree/bindings/power/power-domain.yaml
 [3] Documentation/devicetree/bindings/thermal/thermal.txt
 [4] Documentation/devicetree/bindings/sram/sram.txt
 [5] Documentation/devicetree/bindings/reset/reset.txt
diff --git a/Documentation/devicetree/bindings/arm/arm,scpi.txt b/Documentation/devicetree/bindings/arm/arm,scpi.txt
index 401831973638..7b83ef43b418 100644
--- a/Documentation/devicetree/bindings/arm/arm,scpi.txt
+++ b/Documentation/devicetree/bindings/arm/arm,scpi.txt
@@ -110,7 +110,7 @@ Required properties:
 [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
 [2] Documentation/devicetree/bindings/thermal/thermal.txt
 [3] Documentation/devicetree/bindings/sram/sram.txt
-[4] Documentation/devicetree/bindings/power/power_domain.txt
+[4] Documentation/devicetree/bindings/power/power-domain.yaml
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/arm/arm-boards b/Documentation/devicetree/bindings/arm/arm-boards
index b2a9f9f8430b..96b1dad58253 100644
--- a/Documentation/devicetree/bindings/arm/arm-boards
+++ b/Documentation/devicetree/bindings/arm/arm-boards
@@ -121,7 +121,7 @@ Required properties (in root node):
 Required nodes:
 
 - soc: some node of the RealView platforms must be the SoC
-  node that contain the SoC-specific devices, withe the compatible
+  node that contain the SoC-specific devices, with the compatible
   string set to one of these tuples:
    "arm,realview-eb-soc", "simple-bus"
    "arm,realview-pb1176-soc", "simple-bus"
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.yaml b/Documentation/devicetree/bindings/arm/atmel-at91.yaml
index 6e168abcd4d1..6dd8be401673 100644
--- a/Documentation/devicetree/bindings/arm/atmel-at91.yaml
+++ b/Documentation/devicetree/bindings/arm/atmel-at91.yaml
@@ -45,6 +45,13 @@ properties:
           - const: atmel,at91sam9x5
           - const: atmel,at91sam9
 
+      - description: Overkiz kizbox3 board
+        items:
+          - const: overkiz,kizbox3-hs
+          - const: atmel,sama5d27
+          - const: atmel,sama5d2
+          - const: atmel,sama5
+
       - items:
           - const: atmel,sama5d27
           - const: atmel,sama5d2
@@ -73,6 +80,13 @@ properties:
           - const: atmel,sama5d3
           - const: atmel,sama5
 
+      - description: Overkiz kizbox2 board with two heads
+        items:
+          - const: overkiz,kizbox2-2
+          - const: atmel,sama5d31
+          - const: atmel,sama5d3
+          - const: atmel,sama5
+
       - items:
           - enum:
               - atmel,sama5d31
diff --git a/Documentation/devicetree/bindings/arm/atmel-sysregs.txt b/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
index 9fbde401a090..e003a553b986 100644
--- a/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
+++ b/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
@@ -10,6 +10,12 @@ PIT Timer required properties:
 - interrupts: Should contain interrupt for the PIT which is the IRQ line
   shared across all System Controller members.
 
+PIT64B Timer required properties:
+- compatible: Should be "microchip,sam9x60-pit64b"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for PIT64B timer
+- clocks: Should contain the available clock sources for PIT64B timer.
+
 System Timer (ST) required properties:
 - compatible: Should be "atmel,at91rm9200-st", "syscon", "simple-mfd"
 - reg: Should contain registers location and length
diff --git a/Documentation/devicetree/bindings/arm/axentia.txt b/Documentation/devicetree/bindings/arm/axentia.txt
deleted file mode 100644
index de58f2463880..000000000000
--- a/Documentation/devicetree/bindings/arm/axentia.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Device tree bindings for Axentia ARM devices
-============================================
-
-Linea CPU module
-----------------
-
-Required root node properties:
-compatible = "axentia,linea",
-	     "atmel,sama5d31", "atmel,sama5d3", "atmel,sama5";
-and following the rules from atmel-at91.txt for a sama5d31 SoC.
-
-
-Nattis v2 board with Natte v2 power board
------------------------------------------
-
-Required root node properties:
-compatible = "axentia,nattis-2", "axentia,natte-2", "axentia,linea",
-	     "atmel,sama5d31", "atmel,sama5d3", "atmel,sama5";
-and following the rules from above for the axentia,linea CPU module.
-
-
-TSE-850 v3 board
-----------------
-
-Required root node properties:
-compatible = "axentia,tse850v3", "axentia,linea",
-	     "atmel,sama5d31", "atmel,sama5d3", "atmel,sama5";
-and following the rules from above for the axentia,linea CPU module.
diff --git a/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml b/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml
new file mode 100644
index 000000000000..dd52e29b0642
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/bcm2835.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM2711/BCM2835 Platforms Device Tree Bindings
+
+maintainers:
+  - Eric Anholt <eric@anholt.net>
+  - Stefan Wahren <wahrenst@gmx.net>
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - description: BCM2711 based Boards
+        items:
+          - enum:
+              - raspberrypi,4-model-b
+          - const: brcm,bcm2711
+
+      - description: BCM2835 based Boards
+        items:
+          - enum:
+              - raspberrypi,model-a
+              - raspberrypi,model-a-plus
+              - raspberrypi,model-b
+              - raspberrypi,model-b-i2c0  # Raspberry Pi Model B (no P5)
+              - raspberrypi,model-b-rev2
+              - raspberrypi,model-b-plus
+              - raspberrypi,compute-module
+              - raspberrypi,model-zero
+              - raspberrypi,model-zero-w
+          - const: brcm,bcm2835
+
+      - description: BCM2836 based Boards
+        items:
+          - enum:
+              - raspberrypi,2-model-b
+          - const: brcm,bcm2836
+
+      - description: BCM2837 based Boards
+        items:
+          - enum:
+              - raspberrypi,3-model-a-plus
+              - raspberrypi,3-model-b
+              - raspberrypi,3-model-b-plus
+              - raspberrypi,3-compute-module
+              - raspberrypi,3-compute-module-lite
+          - const: brcm,bcm2837
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt
deleted file mode 100644
index 245328f36580..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-Broadcom BCM2835 device tree bindings
--------------------------------------------
-
-Raspberry Pi Model A
-Required root node properties:
-compatible = "raspberrypi,model-a", "brcm,bcm2835";
-
-Raspberry Pi Model A+
-Required root node properties:
-compatible = "raspberrypi,model-a-plus", "brcm,bcm2835";
-
-Raspberry Pi Model B
-Required root node properties:
-compatible = "raspberrypi,model-b", "brcm,bcm2835";
-
-Raspberry Pi Model B (no P5)
-early model B with I2C0 rather than I2C1 routed to the expansion header
-Required root node properties:
-compatible = "raspberrypi,model-b-i2c0", "brcm,bcm2835";
-
-Raspberry Pi Model B rev2
-Required root node properties:
-compatible = "raspberrypi,model-b-rev2", "brcm,bcm2835";
-
-Raspberry Pi Model B+
-Required root node properties:
-compatible = "raspberrypi,model-b-plus", "brcm,bcm2835";
-
-Raspberry Pi 2 Model B
-Required root node properties:
-compatible = "raspberrypi,2-model-b", "brcm,bcm2836";
-
-Raspberry Pi 3 Model A+
-Required root node properties:
-compatible = "raspberrypi,3-model-a-plus", "brcm,bcm2837";
-
-Raspberry Pi 3 Model B
-Required root node properties:
-compatible = "raspberrypi,3-model-b", "brcm,bcm2837";
-
-Raspberry Pi 3 Model B+
-Required root node properties:
-compatible = "raspberrypi,3-model-b-plus", "brcm,bcm2837";
-
-Raspberry Pi Compute Module
-Required root node properties:
-compatible = "raspberrypi,compute-module", "brcm,bcm2835";
-
-Raspberry Pi Compute Module 3
-Required root node properties:
-compatible = "raspberrypi,3-compute-module", "brcm,bcm2837";
-
-Raspberry Pi Compute Module 3 Lite
-Required root node properties:
-compatible = "raspberrypi,3-compute-module-lite", "brcm,bcm2837";
-
-Raspberry Pi Zero
-Required root node properties:
-compatible = "raspberrypi,model-zero", "brcm,bcm2835";
-
-Raspberry Pi Zero W
-Required root node properties:
-compatible = "raspberrypi,model-zero-w", "brcm,bcm2835";
-
-Generic BCM2835 board
-Required root node properties:
-compatible = "brcm,bcm2835";
diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt
index fcc3bacfd8bc..d02c42d21f2f 100644
--- a/Documentation/devicetree/bindings/arm/coresight.txt
+++ b/Documentation/devicetree/bindings/arm/coresight.txt
@@ -87,6 +87,15 @@ its hardware characteristcs.
 
 	* port or ports: see "Graph bindings for Coresight" below.
 
+* Optional properties for all components:
+
+	* arm,coresight-loses-context-with-cpu : boolean. Indicates that the
+	  hardware will lose register context on CPU power down (e.g. CPUIdle).
+	  An example of where this may be needed are systems which contain a
+	  coresight component and CPU in the same power domain. When the CPU
+	  powers down the coresight component also powers down and loses its
+	  context. This property is currently only used for the ETM 4.x driver.
+
 * Optional properties for ETM/PTMs:
 
 	* arm,cp14: must be present if the system accesses ETM/PTM management
diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
index cb30895e3b67..c23c24ff7575 100644
--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -189,6 +189,7 @@ properties:
               - marvell,armada-390-smp
               - marvell,armada-xp-smp
               - marvell,98dx3236-smp
+              - marvell,mmp3-smp
               - mediatek,mt6589-smp
               - mediatek,mt81xx-tz-smp
               - qcom,gcc-msm8660
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
index c149fadc6f47..e07735a8c2c7 100644
--- a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
@@ -124,7 +124,7 @@ Required properties for Pinctrl sub nodes:
 			CONFIG settings.
 
 [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/power/power_domain.txt
+[2] Documentation/devicetree/bindings/power/power-domain.yaml
 [3] Documentation/devicetree/bindings/pinctrl/fsl,imx-pinctrl.txt
 
 RTC bindings based on SCU Message Protocol
@@ -157,6 +157,15 @@ Required properties:
 Optional properties:
 - timeout-sec: contains the watchdog timeout in seconds.
 
+SCU key bindings based on SCU Message Protocol
+------------------------------------------------------------
+
+Required properties:
+- compatible: should be:
+              "fsl,imx8qxp-sc-key"
+              followed by "fsl,imx-sc-key";
+- linux,keycodes: See Documentation/devicetree/bindings/input/keys.txt
+
 Example (imx8qxp):
 -------------
 aliases {
@@ -220,6 +229,11 @@ firmware {
 			compatible = "fsl,imx8qxp-sc-rtc";
 		};
 
+		scu_key: scu-key {
+			compatible = "fsl,imx8qxp-sc-key", "fsl,imx-sc-key";
+			linux,keycodes = <KEY_POWER>;
+		};
+
 		watchdog {
 			compatible = "fsl,imx8qxp-sc-wdt", "fsl,imx-sc-wdt";
 			timeout-sec = <60>;
diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
index 1b4b4e6573b5..f79683a628f0 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -38,12 +38,16 @@ properties:
       - description: i.MX27 Product Development Kit
         items:
           - enum:
+              - armadeus,imx27-apf27      # APF27 SoM
+              - armadeus,imx27-apf27dev   # APF27 SoM on APF27Dev board
               - fsl,imx27-pdk
           - const: fsl,imx27
 
       - description: i.MX28 based Boards
         items:
           - enum:
+              - armadeus,imx28-apf28      # APF28 SoM
+              - armadeus,imx28-apf28dev   # APF28 SoM on APF28Dev board
               - fsl,imx28-evk
               - i2se,duckbill
               - i2se,duckbill-2
@@ -87,7 +91,8 @@ properties:
       - description: i.MX51 Babbage Board
         items:
           - enum:
-              - armadeus,imx51-apf51
+              - armadeus,imx51-apf51    # APF51 SoM
+              - armadeus,imx51-apf51dev # APF51 SoM on APF51Dev board
               - fsl,imx51-babbage
               - technologic,imx51-ts4800
           - const: fsl,imx51
@@ -106,6 +111,8 @@ properties:
       - description: i.MX6Q based Boards
         items:
           - enum:
+              - armadeus,imx6q-apf6       # APF6 (Quad/Dual) SoM
+              - armadeus,imx6q-apf6dev    # APF6 (Quad/Dual) SoM on APF6Dev board
               - emtrion,emcon-mx6         # emCON-MX6D or emCON-MX6Q SoM
               - emtrion,emcon-mx6-avari   # emCON-MX6D or emCON-MX6Q SoM on Avari Base
               - fsl,imx6q-arm2
@@ -114,6 +121,11 @@ properties:
               - fsl,imx6q-sabresd
               - technologic,imx6q-ts4900
               - technologic,imx6q-ts7970
+              - toradex,apalis_imx6q            # Apalis iMX6 Module
+              - toradex,apalis_imx6q-eval       # Apalis iMX6 Module on Apalis Evaluation Board
+              - toradex,apalis_imx6q-ixora      # Apalis iMX6 Module on Ixora
+              - toradex,apalis_imx6q-ixora-v1.1 # Apalis iMX6 Module on Ixora V1.1
+              - variscite,dt6customboard
           - const: fsl,imx6q
 
       - description: i.MX6QP based Boards
@@ -126,6 +138,8 @@ properties:
       - description: i.MX6DL based Boards
         items:
           - enum:
+              - armadeus,imx6dl-apf6      # APF6 (Solo) SoM
+              - armadeus,imx6dl-apf6dldev # APF6 (Solo) SoM on APF6Dev board
               - eckelmann,imx6dl-ci4x10
               - emtrion,emcon-mx6         # emCON-MX6S or emCON-MX6DL SoM
               - emtrion,emcon-mx6-avari   # emCON-MX6S or emCON-MX6DL SoM on Avari Base
@@ -133,6 +147,8 @@ properties:
               - fsl,imx6dl-sabresd        # i.MX6 DualLite SABRE Smart Device Board
               - technologic,imx6dl-ts4900
               - technologic,imx6dl-ts7970
+              - toradex,colibri_imx6dl          # Colibri iMX6 Module
+              - toradex,colibri_imx6dl-eval-v3  # Colibri iMX6 Module on Colibri Evaluation Board V3
               - ysoft,imx6dl-yapp4-draco  # i.MX6 DualLite Y Soft IOTA Draco board
               - ysoft,imx6dl-yapp4-hydra  # i.MX6 DualLite Y Soft IOTA Hydra board
               - ysoft,imx6dl-yapp4-ursa   # i.MX6 Solo Y Soft IOTA Ursa board
@@ -148,6 +164,7 @@ properties:
         items:
           - enum:
               - fsl,imx6sll-evk
+              - kobo,clarahd
           - const: fsl,imx6sll
 
       - description: i.MX6SX based Boards
@@ -160,8 +177,11 @@ properties:
       - description: i.MX6UL based Boards
         items:
           - enum:
+              - armadeus,imx6ul-opos6ul    # OPOS6UL (i.MX6UL) SoM
+              - armadeus,imx6ul-opos6uldev # OPOS6UL (i.MX6UL) SoM on OPOS6ULDev board
               - fsl,imx6ul-14x14-evk      # i.MX6 UltraLite 14x14 EVK Board
               - kontron,imx6ul-n6310-som  # Kontron N6310 SOM
+              - kontron,imx6ul-n6311-som  # Kontron N6311 SOM
           - const: fsl,imx6ul
 
       - description: Kontron N6310 S Board
@@ -170,6 +190,12 @@ properties:
           - const: kontron,imx6ul-n6310-som
           - const: fsl,imx6ul
 
+      - description: Kontron N6311 S Board
+        items:
+          - const: kontron,imx6ul-n6311-s
+          - const: kontron,imx6ul-n6311-som
+          - const: fsl,imx6ul
+
       - description: Kontron N6310 S 43 Board
         items:
           - const: kontron,imx6ul-n6310-s-43
@@ -180,7 +206,18 @@ properties:
       - description: i.MX6ULL based Boards
         items:
           - enum:
+              - armadeus,imx6ull-opos6ul    # OPOS6UL (i.MX6ULL) SoM
+              - armadeus,imx6ull-opos6uldev # OPOS6UL (i.MX6ULL) SoM on OPOS6ULDev board
               - fsl,imx6ull-14x14-evk     # i.MX6 UltraLiteLite 14x14 EVK Board
+              - kontron,imx6ull-n6411-som # Kontron N6411 SOM
+              - toradex,colibri-imx6ull-eval            # Colibri iMX6ULL Module on Colibri Evaluation Board
+              - toradex,colibri-imx6ull-wifi-eval       # Colibri iMX6ULL Wi-Fi / Bluetooth Module on Colibri Evaluation Board
+          - const: fsl,imx6ull
+
+      - description: Kontron N6411 S Board
+        items:
+          - const: kontron,imx6ull-n6411-s
+          - const: kontron,imx6ull-n6411-som
           - const: fsl,imx6ull
 
       - description: i.MX6ULZ based Boards
@@ -193,6 +230,8 @@ properties:
       - description: i.MX7S based Boards
         items:
           - enum:
+              - toradex,colibri-imx7s           # Colibri iMX7 Solo Module
+              - toradex,colibri-imx7s-eval-v3   # Colibri iMX7 Solo Module on Colibri Evaluation Board V3
               - tq,imx7s-mba7             # i.MX7S TQ MBa7 with TQMa7S SoM
           - const: fsl,imx7s
 
@@ -201,6 +240,10 @@ properties:
           - enum:
               - fsl,imx7d-sdb             # i.MX7 SabreSD Board
               - novtech,imx7d-meerkat96   # i.MX7 Meerkat96 Board
+              - toradex,colibri-imx7d                   # Colibri iMX7 Dual Module
+              - toradex,colibri-imx7d-emmc              # Colibri iMX7 Dual 1GB (eMMC) Module
+              - toradex,colibri-imx7d-emmc-eval-v3      # Colibri iMX7 Dual 1GB (eMMC) Module on Colibri Evaluation Board V3
+              - toradex,colibri-imx7d-eval-v3           # Colibri iMX7 Dual Module on Colibri Evaluation Board V3
               - tq,imx7d-mba7             # i.MX7D TQ MBa7 with TQMa7D SoM
               - zii,imx7d-rmu2            # ZII RMU2 Board
               - zii,imx7d-rpu2            # ZII RPU2 Board
@@ -233,6 +276,7 @@ properties:
         items:
           - enum:
               - fsl,imx8mn-ddr4-evk       # i.MX8MN DDR4 EVK Board
+              - fsl,imx8mn-evk            # i.MX8MN LPDDR4 EVK Board
           - const: fsl,imx8mn
 
       - description: i.MX8MQ based Boards
@@ -250,6 +294,8 @@ properties:
           - enum:
               - einfochips,imx8qxp-ai_ml  # i.MX8QXP AI_ML Board
               - fsl,imx8qxp-mek           # i.MX8QXP MEK Board
+              - toradex,colibri-imx8x         # Colibri iMX8X Module
+              - toradex,colibri-imx8x-eval-v3 # Colibri iMX8X Module on Colibri Evaluation Board V3
           - const: fsl,imx8qxp
 
       - description:
@@ -267,6 +313,10 @@ properties:
               - fsl,vf600
               - fsl,vf610
               - fsl,vf610m4
+              - toradex,vf500-colibri_vf50              # Colibri VF50 Module
+              - toradex,vf500-colibri_vf50-on-eval      # Colibri VF50 Module on Colibri Evaluation Board
+              - toradex,vf610-colibri_vf61              # Colibri VF61 Module
+              - toradex,vf610-colibri_vf61-on-eval      # Colibri VF61 Module on Colibri Evaluation Board
 
       - description: ZII's VF610 based Boards
         items:
@@ -335,4 +385,10 @@ properties:
               - fsl,ls2088a-rdb
           - const: fsl,ls2088a
 
+      - description: S32V234 based Boards
+        items:
+          - enum:
+              - fsl,s32v234-evb           # S32V234-EVB2 Customer Evaluation Board
+          - const: fsl,s32v234
+
 ...
diff --git a/Documentation/devicetree/bindings/arm/idle-states.txt b/Documentation/devicetree/bindings/arm/idle-states.txt
deleted file mode 100644
index 771f5d20ae18..000000000000
--- a/Documentation/devicetree/bindings/arm/idle-states.txt
+++ /dev/null
@@ -1,706 +0,0 @@
-==========================================
-ARM idle states binding description
-==========================================
-
-==========================================
-1 - Introduction
-==========================================
-
-ARM systems contain HW capable of managing power consumption dynamically,
-where cores can be put in different low-power states (ranging from simple
-wfi to power gating) according to OS PM policies. The CPU states representing
-the range of dynamic idle states that a processor can enter at run-time, can be
-specified through device tree bindings representing the parameters required
-to enter/exit specific idle states on a given processor.
-
-According to the Server Base System Architecture document (SBSA, [3]), the
-power states an ARM CPU can be put into are identified by the following list:
-
-- Running
-- Idle_standby
-- Idle_retention
-- Sleep
-- Off
-
-The power states described in the SBSA document define the basic CPU states on
-top of which ARM platforms implement power management schemes that allow an OS
-PM implementation to put the processor in different idle states (which include
-states listed above; "off" state is not an idle state since it does not have
-wake-up capabilities, hence it is not considered in this document).
-
-Idle state parameters (e.g. entry latency) are platform specific and need to be
-characterized with bindings that provide the required information to OS PM
-code so that it can build the required tables and use them at runtime.
-
-The device tree binding definition for ARM idle states is the subject of this
-document.
-
-===========================================
-2 - idle-states definitions
-===========================================
-
-Idle states are characterized for a specific system through a set of
-timing and energy related properties, that underline the HW behaviour
-triggered upon idle states entry and exit.
-
-The following diagram depicts the CPU execution phases and related timing
-properties required to enter and exit an idle state:
-
-..__[EXEC]__|__[PREP]__|__[ENTRY]__|__[IDLE]__|__[EXIT]__|__[EXEC]__..
-	    |          |           |          |          |
-
-	    |<------ entry ------->|
-	    |       latency        |
-					      |<- exit ->|
-					      |  latency |
-	    |<-------- min-residency -------->|
-		       |<-------  wakeup-latency ------->|
-
-		Diagram 1: CPU idle state execution phases
-
-EXEC:	Normal CPU execution.
-
-PREP:	Preparation phase before committing the hardware to idle mode
-	like cache flushing. This is abortable on pending wake-up
-	event conditions. The abort latency is assumed to be negligible
-	(i.e. less than the ENTRY + EXIT duration). If aborted, CPU
-	goes back to EXEC. This phase is optional. If not abortable,
-	this should be included in the ENTRY phase instead.
-
-ENTRY:	The hardware is committed to idle mode. This period must run
-	to completion up to IDLE before anything else can happen.
-
-IDLE:	This is the actual energy-saving idle period. This may last
-	between 0 and infinite time, until a wake-up event occurs.
-
-EXIT:	Period during which the CPU is brought back to operational
-	mode (EXEC).
-
-entry-latency: Worst case latency required to enter the idle state. The
-exit-latency may be guaranteed only after entry-latency has passed.
-
-min-residency: Minimum period, including preparation and entry, for a given
-idle state to be worthwhile energywise.
-
-wakeup-latency: Maximum delay between the signaling of a wake-up event and the
-CPU being able to execute normal code again. If not specified, this is assumed
-to be entry-latency + exit-latency.
-
-These timing parameters can be used by an OS in different circumstances.
-
-An idle CPU requires the expected min-residency time to select the most
-appropriate idle state based on the expected expiry time of the next IRQ
-(i.e. wake-up) that causes the CPU to return to the EXEC phase.
-
-An operating system scheduler may need to compute the shortest wake-up delay
-for CPUs in the system by detecting how long will it take to get a CPU out
-of an idle state, e.g.:
-
-wakeup-delay = exit-latency + max(entry-latency - (now - entry-timestamp), 0)
-
-In other words, the scheduler can make its scheduling decision by selecting
-(e.g. waking-up) the CPU with the shortest wake-up delay.
-The wake-up delay must take into account the entry latency if that period
-has not expired. The abortable nature of the PREP period can be ignored
-if it cannot be relied upon (e.g. the PREP deadline may occur much sooner than
-the worst case since it depends on the CPU operating conditions, i.e. caches
-state).
-
-An OS has to reliably probe the wakeup-latency since some devices can enforce
-latency constraint guarantees to work properly, so the OS has to detect the
-worst case wake-up latency it can incur if a CPU is allowed to enter an
-idle state, and possibly to prevent that to guarantee reliable device
-functioning.
-
-The min-residency time parameter deserves further explanation since it is
-expressed in time units but must factor in energy consumption coefficients.
-
-The energy consumption of a cpu when it enters a power state can be roughly
-characterised by the following graph:
-
-               |
-               |
-               |
-           e   |
-           n   |                                      /---
-           e   |                               /------
-           r   |                        /------
-           g   |                  /-----
-           y   |           /------
-               |       ----
-               |      /|
-               |     / |
-               |    /  |
-               |   /   |
-               |  /    |
-               | /     |
-               |/      |
-          -----|-------+----------------------------------
-              0|       1                              time(ms)
-
-		Graph 1: Energy vs time example
-
-The graph is split in two parts delimited by time 1ms on the X-axis.
-The graph curve with X-axis values = { x | 0 < x < 1ms } has a steep slope
-and denotes the energy costs incurred while entering and leaving the idle
-state.
-The graph curve in the area delimited by X-axis values = {x | x > 1ms } has
-shallower slope and essentially represents the energy consumption of the idle
-state.
-
-min-residency is defined for a given idle state as the minimum expected
-residency time for a state (inclusive of preparation and entry) after
-which choosing that state become the most energy efficient option. A good
-way to visualise this, is by taking the same graph above and comparing some
-states energy consumptions plots.
-
-For sake of simplicity, let's consider a system with two idle states IDLE1,
-and IDLE2:
-
-          |
-          |
-          |
-          |                                                  /-- IDLE1
-       e  |                                              /---
-       n  |                                         /----
-       e  |                                     /---
-       r  |                                /-----/--------- IDLE2
-       g  |                    /-------/---------
-       y  |        ------------    /---|
-          |       /           /----    |
-          |      /        /---         |
-          |     /    /----             |
-          |    / /---                  |
-          |   ---                      |
-          |  /                         |
-          | /                          |
-          |/                           |                  time
-       ---/----------------------------+------------------------
-          |IDLE1-energy < IDLE2-energy | IDLE2-energy < IDLE1-energy
-                                       |
-                                IDLE2-min-residency
-
-		Graph 2: idle states min-residency example
-
-In graph 2 above, that takes into account idle states entry/exit energy
-costs, it is clear that if the idle state residency time (i.e. time till next
-wake-up IRQ) is less than IDLE2-min-residency, IDLE1 is the better idle state
-choice energywise.
-
-This is mainly down to the fact that IDLE1 entry/exit energy costs are lower
-than IDLE2.
-
-However, the lower power consumption (i.e. shallower energy curve slope) of
-idle state IDLE2 implies that after a suitable time, IDLE2 becomes more energy
-efficient.
-
-The time at which IDLE2 becomes more energy efficient than IDLE1 (and other
-shallower states in a system with multiple idle states) is defined
-IDLE2-min-residency and corresponds to the time when energy consumption of
-IDLE1 and IDLE2 states breaks even.
-
-The definitions provided in this section underpin the idle states
-properties specification that is the subject of the following sections.
-
-===========================================
-3 - idle-states node
-===========================================
-
-ARM processor idle states are defined within the idle-states node, which is
-a direct child of the cpus node [1] and provides a container where the
-processor idle states, defined as device tree nodes, are listed.
-
-- idle-states node
-
-	Usage: Optional - On ARM systems, it is a container of processor idle
-			  states nodes. If the system does not provide CPU
-			  power management capabilities, or the processor just
-			  supports idle_standby, an idle-states node is not
-			  required.
-
-	Description: idle-states node is a container node, where its
-		     subnodes describe the CPU idle states.
-
-	Node name must be "idle-states".
-
-	The idle-states node's parent node must be the cpus node.
-
-	The idle-states node's child nodes can be:
-
-	- one or more state nodes
-
-	Any other configuration is considered invalid.
-
-	An idle-states node defines the following properties:
-
-	- entry-method
-		Value type: <stringlist>
-		Usage and definition depend on ARM architecture version.
-			# On ARM v8 64-bit this property is required and must
-			  be:
-			   - "psci"
-			# On ARM 32-bit systems this property is optional
-
-This assumes that the "enable-method" property is set to "psci" in the cpu
-node[6] that is responsible for setting up CPU idle management in the OS
-implementation.
-
-The nodes describing the idle states (state) can only be defined
-within the idle-states node, any other configuration is considered invalid
-and therefore must be ignored.
-
-===========================================
-4 - state node
-===========================================
-
-A state node represents an idle state description and must be defined as
-follows:
-
-- state node
-
-	Description: must be child of the idle-states node
-
-	The state node name shall follow standard device tree naming
-	rules ([5], 2.2.1 "Node names"), in particular state nodes which
-	are siblings within a single common parent must be given a unique name.
-
-	The idle state entered by executing the wfi instruction (idle_standby
-	SBSA,[3][4]) is considered standard on all ARM platforms and therefore
-	must not be listed.
-
-	With the definitions provided above, the following list represents
-	the valid properties for a state node:
-
-	- compatible
-		Usage: Required
-		Value type: <stringlist>
-		Definition: Must be "arm,idle-state".
-
-	- local-timer-stop
-		Usage: See definition
-		Value type: <none>
-		Definition: if present the CPU local timer control logic is
-			    lost on state entry, otherwise it is retained.
-
-	- entry-latency-us
-		Usage: Required
-		Value type: <prop-encoded-array>
-		Definition: u32 value representing worst case latency in
-			    microseconds required to enter the idle state.
-
-	- exit-latency-us
-		Usage: Required
-		Value type: <prop-encoded-array>
-		Definition: u32 value representing worst case latency
-			    in microseconds required to exit the idle state.
-			    The exit-latency-us duration may be guaranteed
-			    only after entry-latency-us has passed.
-
-	- min-residency-us
-		Usage: Required
-		Value type: <prop-encoded-array>
-		Definition: u32 value representing minimum residency duration
-			    in microseconds, inclusive of preparation and
-			    entry, for this idle state to be considered
-			    worthwhile energy wise (refer to section 2 of
-			    this document for a complete description).
-
-	- wakeup-latency-us:
-		Usage: Optional
-		Value type: <prop-encoded-array>
-		Definition: u32 value representing maximum delay between the
-			    signaling of a wake-up event and the CPU being
-			    able to execute normal code again. If omitted,
-			    this is assumed to be equal to:
-
-				entry-latency-us + exit-latency-us
-
-			    It is important to supply this value on systems
-			    where the duration of PREP phase (see diagram 1,
-			    section 2) is non-neglibigle.
-			    In such systems entry-latency-us + exit-latency-us
-			    will exceed wakeup-latency-us by this duration.
-
-	- status:
-		Usage: Optional
-		Value type: <string>
-		Definition: A standard device tree property [5] that indicates
-			    the operational status of an idle-state.
-			    If present, it shall be:
-			    "okay": to indicate that the idle state is
-				    operational.
-			    "disabled": to indicate that the idle state has
-					been disabled in firmware so it is not
-					operational.
-			    If the property is not present the idle-state must
-			    be considered operational.
-
-	- idle-state-name:
-		Usage: Optional
-		Value type: <string>
-		Definition: A string used as a descriptive name for the idle
-			    state.
-
-	In addition to the properties listed above, a state node may require
-	additional properties specific to the entry-method defined in the
-	idle-states node. Please refer to the entry-method bindings
-	documentation for properties definitions.
-
-===========================================
-4 - Examples
-===========================================
-
-Example 1 (ARM 64-bit, 16-cpu system, PSCI enable-method):
-
-cpus {
-	#size-cells = <0>;
-	#address-cells = <2>;
-
-	CPU0: cpu@0 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a57";
-		reg = <0x0 0x0>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
-				   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU1: cpu@1 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a57";
-		reg = <0x0 0x1>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
-				   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU2: cpu@100 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a57";
-		reg = <0x0 0x100>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
-				   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU3: cpu@101 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a57";
-		reg = <0x0 0x101>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
-				   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU4: cpu@10000 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a57";
-		reg = <0x0 0x10000>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
-				   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU5: cpu@10001 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a57";
-		reg = <0x0 0x10001>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
-				   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU6: cpu@10100 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a57";
-		reg = <0x0 0x10100>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
-				   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU7: cpu@10101 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a57";
-		reg = <0x0 0x10101>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
-				   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU8: cpu@100000000 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a53";
-		reg = <0x1 0x0>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
-				   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
-	};
-
-	CPU9: cpu@100000001 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a53";
-		reg = <0x1 0x1>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
-				   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
-	};
-
-	CPU10: cpu@100000100 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a53";
-		reg = <0x1 0x100>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
-				   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
-	};
-
-	CPU11: cpu@100000101 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a53";
-		reg = <0x1 0x101>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
-				   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
-	};
-
-	CPU12: cpu@100010000 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a53";
-		reg = <0x1 0x10000>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
-				   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
-	};
-
-	CPU13: cpu@100010001 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a53";
-		reg = <0x1 0x10001>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
-				   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
-	};
-
-	CPU14: cpu@100010100 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a53";
-		reg = <0x1 0x10100>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
-				   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
-	};
-
-	CPU15: cpu@100010101 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a53";
-		reg = <0x1 0x10101>;
-		enable-method = "psci";
-		cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
-				   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
-	};
-
-	idle-states {
-		entry-method = "psci";
-
-		CPU_RETENTION_0_0: cpu-retention-0-0 {
-			compatible = "arm,idle-state";
-			arm,psci-suspend-param = <0x0010000>;
-			entry-latency-us = <20>;
-			exit-latency-us = <40>;
-			min-residency-us = <80>;
-		};
-
-		CLUSTER_RETENTION_0: cluster-retention-0 {
-			compatible = "arm,idle-state";
-			local-timer-stop;
-			arm,psci-suspend-param = <0x1010000>;
-			entry-latency-us = <50>;
-			exit-latency-us = <100>;
-			min-residency-us = <250>;
-			wakeup-latency-us = <130>;
-		};
-
-		CPU_SLEEP_0_0: cpu-sleep-0-0 {
-			compatible = "arm,idle-state";
-			local-timer-stop;
-			arm,psci-suspend-param = <0x0010000>;
-			entry-latency-us = <250>;
-			exit-latency-us = <500>;
-			min-residency-us = <950>;
-		};
-
-		CLUSTER_SLEEP_0: cluster-sleep-0 {
-			compatible = "arm,idle-state";
-			local-timer-stop;
-			arm,psci-suspend-param = <0x1010000>;
-			entry-latency-us = <600>;
-			exit-latency-us = <1100>;
-			min-residency-us = <2700>;
-			wakeup-latency-us = <1500>;
-		};
-
-		CPU_RETENTION_1_0: cpu-retention-1-0 {
-			compatible = "arm,idle-state";
-			arm,psci-suspend-param = <0x0010000>;
-			entry-latency-us = <20>;
-			exit-latency-us = <40>;
-			min-residency-us = <90>;
-		};
-
-		CLUSTER_RETENTION_1: cluster-retention-1 {
-			compatible = "arm,idle-state";
-			local-timer-stop;
-			arm,psci-suspend-param = <0x1010000>;
-			entry-latency-us = <50>;
-			exit-latency-us = <100>;
-			min-residency-us = <270>;
-			wakeup-latency-us = <100>;
-		};
-
-		CPU_SLEEP_1_0: cpu-sleep-1-0 {
-			compatible = "arm,idle-state";
-			local-timer-stop;
-			arm,psci-suspend-param = <0x0010000>;
-			entry-latency-us = <70>;
-			exit-latency-us = <100>;
-			min-residency-us = <300>;
-			wakeup-latency-us = <150>;
-		};
-
-		CLUSTER_SLEEP_1: cluster-sleep-1 {
-			compatible = "arm,idle-state";
-			local-timer-stop;
-			arm,psci-suspend-param = <0x1010000>;
-			entry-latency-us = <500>;
-			exit-latency-us = <1200>;
-			min-residency-us = <3500>;
-			wakeup-latency-us = <1300>;
-		};
-	};
-
-};
-
-Example 2 (ARM 32-bit, 8-cpu system, two clusters):
-
-cpus {
-	#size-cells = <0>;
-	#address-cells = <1>;
-
-	CPU0: cpu@0 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a15";
-		reg = <0x0>;
-		cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU1: cpu@1 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a15";
-		reg = <0x1>;
-		cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU2: cpu@2 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a15";
-		reg = <0x2>;
-		cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU3: cpu@3 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a15";
-		reg = <0x3>;
-		cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
-	};
-
-	CPU4: cpu@100 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a7";
-		reg = <0x100>;
-		cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
-	};
-
-	CPU5: cpu@101 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a7";
-		reg = <0x101>;
-		cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
-	};
-
-	CPU6: cpu@102 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a7";
-		reg = <0x102>;
-		cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
-	};
-
-	CPU7: cpu@103 {
-		device_type = "cpu";
-		compatible = "arm,cortex-a7";
-		reg = <0x103>;
-		cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
-	};
-
-	idle-states {
-		CPU_SLEEP_0_0: cpu-sleep-0-0 {
-			compatible = "arm,idle-state";
-			local-timer-stop;
-			entry-latency-us = <200>;
-			exit-latency-us = <100>;
-			min-residency-us = <400>;
-			wakeup-latency-us = <250>;
-		};
-
-		CLUSTER_SLEEP_0: cluster-sleep-0 {
-			compatible = "arm,idle-state";
-			local-timer-stop;
-			entry-latency-us = <500>;
-			exit-latency-us = <1500>;
-			min-residency-us = <2500>;
-			wakeup-latency-us = <1700>;
-		};
-
-		CPU_SLEEP_1_0: cpu-sleep-1-0 {
-			compatible = "arm,idle-state";
-			local-timer-stop;
-			entry-latency-us = <300>;
-			exit-latency-us = <500>;
-			min-residency-us = <900>;
-			wakeup-latency-us = <600>;
-		};
-
-		CLUSTER_SLEEP_1: cluster-sleep-1 {
-			compatible = "arm,idle-state";
-			local-timer-stop;
-			entry-latency-us = <800>;
-			exit-latency-us = <2000>;
-			min-residency-us = <6500>;
-			wakeup-latency-us = <2300>;
-		};
-	};
-
-};
-
-===========================================
-5 - References
-===========================================
-
-[1] ARM Linux Kernel documentation - CPUs bindings
-    Documentation/devicetree/bindings/arm/cpus.yaml
-
-[2] ARM Linux Kernel documentation - PSCI bindings
-    Documentation/devicetree/bindings/arm/psci.yaml
-
-[3] ARM Server Base System Architecture (SBSA)
-    http://infocenter.arm.com/help/index.jsp
-
-[4] ARM Architecture Reference Manuals
-    http://infocenter.arm.com/help/index.jsp
-
-[5] Devicetree Specification
-    https://www.devicetree.org/specifications/
-
-[6] ARM Linux Kernel documentation - Booting AArch64 Linux
-    Documentation/arm64/booting.rst
diff --git a/Documentation/devicetree/bindings/arm/idle-states.yaml b/Documentation/devicetree/bindings/arm/idle-states.yaml
new file mode 100644
index 000000000000..ea805c1e6b20
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/idle-states.yaml
@@ -0,0 +1,661 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/idle-states.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM idle states binding description
+
+maintainers:
+  - Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+
+description: |+
+  ==========================================
+  1 - Introduction
+  ==========================================
+
+  ARM systems contain HW capable of managing power consumption dynamically,
+  where cores can be put in different low-power states (ranging from simple wfi
+  to power gating) according to OS PM policies. The CPU states representing the
+  range of dynamic idle states that a processor can enter at run-time, can be
+  specified through device tree bindings representing the parameters required to
+  enter/exit specific idle states on a given processor.
+
+  According to the Server Base System Architecture document (SBSA, [3]), the
+  power states an ARM CPU can be put into are identified by the following list:
+
+  - Running
+  - Idle_standby
+  - Idle_retention
+  - Sleep
+  - Off
+
+  The power states described in the SBSA document define the basic CPU states on
+  top of which ARM platforms implement power management schemes that allow an OS
+  PM implementation to put the processor in different idle states (which include
+  states listed above; "off" state is not an idle state since it does not have
+  wake-up capabilities, hence it is not considered in this document).
+
+  Idle state parameters (e.g. entry latency) are platform specific and need to
+  be characterized with bindings that provide the required information to OS PM
+  code so that it can build the required tables and use them at runtime.
+
+  The device tree binding definition for ARM idle states is the subject of this
+  document.
+
+  ===========================================
+  2 - idle-states definitions
+  ===========================================
+
+  Idle states are characterized for a specific system through a set of
+  timing and energy related properties, that underline the HW behaviour
+  triggered upon idle states entry and exit.
+
+  The following diagram depicts the CPU execution phases and related timing
+  properties required to enter and exit an idle state:
+
+  ..__[EXEC]__|__[PREP]__|__[ENTRY]__|__[IDLE]__|__[EXIT]__|__[EXEC]__..
+              |          |           |          |          |
+
+              |<------ entry ------->|
+              |       latency        |
+                                                |<- exit ->|
+                                                |  latency |
+              |<-------- min-residency -------->|
+                         |<-------  wakeup-latency ------->|
+
+      Diagram 1: CPU idle state execution phases
+
+  EXEC:  Normal CPU execution.
+
+  PREP:  Preparation phase before committing the hardware to idle mode
+    like cache flushing. This is abortable on pending wake-up
+    event conditions. The abort latency is assumed to be negligible
+    (i.e. less than the ENTRY + EXIT duration). If aborted, CPU
+    goes back to EXEC. This phase is optional. If not abortable,
+    this should be included in the ENTRY phase instead.
+
+  ENTRY:  The hardware is committed to idle mode. This period must run
+    to completion up to IDLE before anything else can happen.
+
+  IDLE:  This is the actual energy-saving idle period. This may last
+    between 0 and infinite time, until a wake-up event occurs.
+
+  EXIT:  Period during which the CPU is brought back to operational
+    mode (EXEC).
+
+  entry-latency: Worst case latency required to enter the idle state. The
+  exit-latency may be guaranteed only after entry-latency has passed.
+
+  min-residency: Minimum period, including preparation and entry, for a given
+  idle state to be worthwhile energywise.
+
+  wakeup-latency: Maximum delay between the signaling of a wake-up event and the
+  CPU being able to execute normal code again. If not specified, this is assumed
+  to be entry-latency + exit-latency.
+
+  These timing parameters can be used by an OS in different circumstances.
+
+  An idle CPU requires the expected min-residency time to select the most
+  appropriate idle state based on the expected expiry time of the next IRQ
+  (i.e. wake-up) that causes the CPU to return to the EXEC phase.
+
+  An operating system scheduler may need to compute the shortest wake-up delay
+  for CPUs in the system by detecting how long will it take to get a CPU out
+  of an idle state, e.g.:
+
+  wakeup-delay = exit-latency + max(entry-latency - (now - entry-timestamp), 0)
+
+  In other words, the scheduler can make its scheduling decision by selecting
+  (e.g. waking-up) the CPU with the shortest wake-up delay.
+  The wake-up delay must take into account the entry latency if that period
+  has not expired. The abortable nature of the PREP period can be ignored
+  if it cannot be relied upon (e.g. the PREP deadline may occur much sooner than
+  the worst case since it depends on the CPU operating conditions, i.e. caches
+  state).
+
+  An OS has to reliably probe the wakeup-latency since some devices can enforce
+  latency constraint guarantees to work properly, so the OS has to detect the
+  worst case wake-up latency it can incur if a CPU is allowed to enter an
+  idle state, and possibly to prevent that to guarantee reliable device
+  functioning.
+
+  The min-residency time parameter deserves further explanation since it is
+  expressed in time units but must factor in energy consumption coefficients.
+
+  The energy consumption of a cpu when it enters a power state can be roughly
+  characterised by the following graph:
+
+                 |
+                 |
+                 |
+             e   |
+             n   |                                      /---
+             e   |                               /------
+             r   |                        /------
+             g   |                  /-----
+             y   |           /------
+                 |       ----
+                 |      /|
+                 |     / |
+                 |    /  |
+                 |   /   |
+                 |  /    |
+                 | /     |
+                 |/      |
+            -----|-------+----------------------------------
+                0|       1                              time(ms)
+
+      Graph 1: Energy vs time example
+
+  The graph is split in two parts delimited by time 1ms on the X-axis.
+  The graph curve with X-axis values = { x | 0 < x < 1ms } has a steep slope
+  and denotes the energy costs incurred while entering and leaving the idle
+  state.
+  The graph curve in the area delimited by X-axis values = {x | x > 1ms } has
+  shallower slope and essentially represents the energy consumption of the idle
+  state.
+
+  min-residency is defined for a given idle state as the minimum expected
+  residency time for a state (inclusive of preparation and entry) after
+  which choosing that state become the most energy efficient option. A good
+  way to visualise this, is by taking the same graph above and comparing some
+  states energy consumptions plots.
+
+  For sake of simplicity, let's consider a system with two idle states IDLE1,
+  and IDLE2:
+
+            |
+            |
+            |
+            |                                                  /-- IDLE1
+         e  |                                              /---
+         n  |                                         /----
+         e  |                                     /---
+         r  |                                /-----/--------- IDLE2
+         g  |                    /-------/---------
+         y  |        ------------    /---|
+            |       /           /----    |
+            |      /        /---         |
+            |     /    /----             |
+            |    / /---                  |
+            |   ---                      |
+            |  /                         |
+            | /                          |
+            |/                           |                  time
+         ---/----------------------------+------------------------
+            |IDLE1-energy < IDLE2-energy | IDLE2-energy < IDLE1-energy
+                                         |
+                                  IDLE2-min-residency
+
+      Graph 2: idle states min-residency example
+
+  In graph 2 above, that takes into account idle states entry/exit energy
+  costs, it is clear that if the idle state residency time (i.e. time till next
+  wake-up IRQ) is less than IDLE2-min-residency, IDLE1 is the better idle state
+  choice energywise.
+
+  This is mainly down to the fact that IDLE1 entry/exit energy costs are lower
+  than IDLE2.
+
+  However, the lower power consumption (i.e. shallower energy curve slope) of
+  idle state IDLE2 implies that after a suitable time, IDLE2 becomes more energy
+  efficient.
+
+  The time at which IDLE2 becomes more energy efficient than IDLE1 (and other
+  shallower states in a system with multiple idle states) is defined
+  IDLE2-min-residency and corresponds to the time when energy consumption of
+  IDLE1 and IDLE2 states breaks even.
+
+  The definitions provided in this section underpin the idle states
+  properties specification that is the subject of the following sections.
+
+  ===========================================
+  3 - idle-states node
+  ===========================================
+
+  ARM processor idle states are defined within the idle-states node, which is
+  a direct child of the cpus node [1] and provides a container where the
+  processor idle states, defined as device tree nodes, are listed.
+
+  On ARM systems, it is a container of processor idle states nodes. If the
+  system does not provide CPU power management capabilities, or the processor
+  just supports idle_standby, an idle-states node is not required.
+
+  ===========================================
+  4 - References
+  ===========================================
+
+  [1] ARM Linux Kernel documentation - CPUs bindings
+      Documentation/devicetree/bindings/arm/cpus.yaml
+
+  [2] ARM Linux Kernel documentation - PSCI bindings
+      Documentation/devicetree/bindings/arm/psci.yaml
+
+  [3] ARM Server Base System Architecture (SBSA)
+      http://infocenter.arm.com/help/index.jsp
+
+  [4] ARM Architecture Reference Manuals
+      http://infocenter.arm.com/help/index.jsp
+
+  [6] ARM Linux Kernel documentation - Booting AArch64 Linux
+      Documentation/arm64/booting.rst
+
+properties:
+  $nodename:
+    const: idle-states
+
+  entry-method:
+    description: |
+      Usage and definition depend on ARM architecture version.
+
+      On ARM v8 64-bit this property is required.
+      On ARM 32-bit systems this property is optional
+
+      This assumes that the "enable-method" property is set to "psci" in the cpu
+      node[6] that is responsible for setting up CPU idle management in the OS
+      implementation.
+    const: psci
+
+patternProperties:
+  "^(cpu|cluster)-":
+    type: object
+    description: |
+      Each state node represents an idle state description and must be defined
+      as follows.
+
+      The idle state entered by executing the wfi instruction (idle_standby
+      SBSA,[3][4]) is considered standard on all ARM platforms and therefore
+      must not be listed.
+
+      In addition to the properties listed above, a state node may require
+      additional properties specific to the entry-method defined in the
+      idle-states node. Please refer to the entry-method bindings
+      documentation for properties definitions.
+
+    properties:
+      compatible:
+        const: arm,idle-state
+
+      local-timer-stop:
+        description:
+          If present the CPU local timer control logic is
+             lost on state entry, otherwise it is retained.
+        type: boolean
+
+      entry-latency-us:
+        description:
+          Worst case latency in microseconds required to enter the idle state.
+
+      exit-latency-us:
+        description:
+          Worst case latency in microseconds required to exit the idle state.
+          The exit-latency-us duration may be guaranteed only after
+          entry-latency-us has passed.
+
+      min-residency-us:
+        description:
+          Minimum residency duration in microseconds, inclusive of preparation
+          and entry, for this idle state to be considered worthwhile energy wise
+          (refer to section 2 of this document for a complete description).
+
+      wakeup-latency-us:
+        description: |
+          Maximum delay between the signaling of a wake-up event and the CPU
+          being able to execute normal code again. If omitted, this is assumed
+          to be equal to:
+
+            entry-latency-us + exit-latency-us
+
+          It is important to supply this value on systems where the duration of
+          PREP phase (see diagram 1, section 2) is non-neglibigle. In such
+          systems entry-latency-us + exit-latency-us will exceed
+          wakeup-latency-us by this duration.
+
+      idle-state-name:
+        $ref: /schemas/types.yaml#definitions/string
+        description:
+          A string used as a descriptive name for the idle state.
+
+    required:
+      - compatible
+      - entry-latency-us
+      - exit-latency-us
+      - min-residency-us
+
+additionalProperties: false
+
+examples:
+  - |
+
+    cpus {
+        #size-cells = <0>;
+        #address-cells = <2>;
+
+        cpu@0 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a57";
+            reg = <0x0 0x0>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+                   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+        };
+
+        cpu@1 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a57";
+            reg = <0x0 0x1>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+                   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+        };
+
+        cpu@100 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a57";
+            reg = <0x0 0x100>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+                   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+        };
+
+        cpu@101 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a57";
+            reg = <0x0 0x101>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+                   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+        };
+
+        cpu@10000 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a57";
+            reg = <0x0 0x10000>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+                   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+        };
+
+        cpu@10001 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a57";
+            reg = <0x0 0x10001>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+                   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+        };
+
+        cpu@10100 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a57";
+            reg = <0x0 0x10100>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+                   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+        };
+
+        cpu@10101 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a57";
+            reg = <0x0 0x10101>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+                   &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+        };
+
+        cpu@100000000 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a53";
+            reg = <0x1 0x0>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+                   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+        };
+
+        cpu@100000001 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a53";
+            reg = <0x1 0x1>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+                   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+        };
+
+        cpu@100000100 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a53";
+            reg = <0x1 0x100>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+                   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+        };
+
+        cpu@100000101 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a53";
+            reg = <0x1 0x101>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+                   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+        };
+
+        cpu@100010000 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a53";
+            reg = <0x1 0x10000>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+                   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+        };
+
+        cpu@100010001 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a53";
+            reg = <0x1 0x10001>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+                   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+        };
+
+        cpu@100010100 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a53";
+            reg = <0x1 0x10100>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+                   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+        };
+
+        cpu@100010101 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a53";
+            reg = <0x1 0x10101>;
+            enable-method = "psci";
+            cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+                   &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+        };
+
+        idle-states {
+            entry-method = "psci";
+
+            CPU_RETENTION_0_0: cpu-retention-0-0 {
+                compatible = "arm,idle-state";
+                arm,psci-suspend-param = <0x0010000>;
+                entry-latency-us = <20>;
+                exit-latency-us = <40>;
+                min-residency-us = <80>;
+            };
+
+            CLUSTER_RETENTION_0: cluster-retention-0 {
+                compatible = "arm,idle-state";
+                local-timer-stop;
+                arm,psci-suspend-param = <0x1010000>;
+                entry-latency-us = <50>;
+                exit-latency-us = <100>;
+                min-residency-us = <250>;
+                wakeup-latency-us = <130>;
+            };
+
+            CPU_SLEEP_0_0: cpu-sleep-0-0 {
+                compatible = "arm,idle-state";
+                local-timer-stop;
+                arm,psci-suspend-param = <0x0010000>;
+                entry-latency-us = <250>;
+                exit-latency-us = <500>;
+                min-residency-us = <950>;
+            };
+
+            CLUSTER_SLEEP_0: cluster-sleep-0 {
+                compatible = "arm,idle-state";
+                local-timer-stop;
+                arm,psci-suspend-param = <0x1010000>;
+                entry-latency-us = <600>;
+                exit-latency-us = <1100>;
+                min-residency-us = <2700>;
+                wakeup-latency-us = <1500>;
+            };
+
+            CPU_RETENTION_1_0: cpu-retention-1-0 {
+                compatible = "arm,idle-state";
+                arm,psci-suspend-param = <0x0010000>;
+                entry-latency-us = <20>;
+                exit-latency-us = <40>;
+                min-residency-us = <90>;
+            };
+
+            CLUSTER_RETENTION_1: cluster-retention-1 {
+                compatible = "arm,idle-state";
+                local-timer-stop;
+                arm,psci-suspend-param = <0x1010000>;
+                entry-latency-us = <50>;
+                exit-latency-us = <100>;
+                min-residency-us = <270>;
+                wakeup-latency-us = <100>;
+            };
+
+            CPU_SLEEP_1_0: cpu-sleep-1-0 {
+                compatible = "arm,idle-state";
+                local-timer-stop;
+                arm,psci-suspend-param = <0x0010000>;
+                entry-latency-us = <70>;
+                exit-latency-us = <100>;
+                min-residency-us = <300>;
+                wakeup-latency-us = <150>;
+            };
+
+            CLUSTER_SLEEP_1: cluster-sleep-1 {
+                compatible = "arm,idle-state";
+                local-timer-stop;
+                arm,psci-suspend-param = <0x1010000>;
+                entry-latency-us = <500>;
+                exit-latency-us = <1200>;
+                min-residency-us = <3500>;
+                wakeup-latency-us = <1300>;
+            };
+        };
+    };
+
+  - |
+    // Example 2 (ARM 32-bit, 8-cpu system, two clusters):
+
+    cpus {
+        #size-cells = <0>;
+        #address-cells = <1>;
+
+        cpu@0 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a15";
+            reg = <0x0>;
+            cpu-idle-states = <&cpu_sleep_0_0 &cluster_sleep_0>;
+        };
+
+        cpu@1 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a15";
+            reg = <0x1>;
+            cpu-idle-states = <&cpu_sleep_0_0 &cluster_sleep_0>;
+        };
+
+        cpu@2 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a15";
+            reg = <0x2>;
+            cpu-idle-states = <&cpu_sleep_0_0 &cluster_sleep_0>;
+        };
+
+        cpu@3 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a15";
+            reg = <0x3>;
+            cpu-idle-states = <&cpu_sleep_0_0 &cluster_sleep_0>;
+        };
+
+        cpu@100 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a7";
+            reg = <0x100>;
+            cpu-idle-states = <&cpu_sleep_1_0 &cluster_sleep_1>;
+        };
+
+        cpu@101 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a7";
+            reg = <0x101>;
+            cpu-idle-states = <&cpu_sleep_1_0 &cluster_sleep_1>;
+        };
+
+        cpu@102 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a7";
+            reg = <0x102>;
+            cpu-idle-states = <&cpu_sleep_1_0 &cluster_sleep_1>;
+        };
+
+        cpu@103 {
+            device_type = "cpu";
+            compatible = "arm,cortex-a7";
+            reg = <0x103>;
+            cpu-idle-states = <&cpu_sleep_1_0 &cluster_sleep_1>;
+        };
+
+        idle-states {
+            cpu_sleep_0_0: cpu-sleep-0-0 {
+                compatible = "arm,idle-state";
+                local-timer-stop;
+                entry-latency-us = <200>;
+                exit-latency-us = <100>;
+                min-residency-us = <400>;
+                wakeup-latency-us = <250>;
+            };
+
+            cluster_sleep_0: cluster-sleep-0 {
+                compatible = "arm,idle-state";
+                local-timer-stop;
+                entry-latency-us = <500>;
+                exit-latency-us = <1500>;
+                min-residency-us = <2500>;
+                wakeup-latency-us = <1700>;
+            };
+
+            cpu_sleep_1_0: cpu-sleep-1-0 {
+                compatible = "arm,idle-state";
+                local-timer-stop;
+                entry-latency-us = <300>;
+                exit-latency-us = <500>;
+                min-residency-us = <900>;
+                wakeup-latency-us = <600>;
+            };
+
+            cluster_sleep_1: cluster-sleep-1 {
+                compatible = "arm,idle-state";
+                local-timer-stop;
+                entry-latency-us = <800>;
+                exit-latency-us = <2000>;
+                min-residency-us = <6500>;
+                wakeup-latency-us = <2300>;
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt b/Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt
deleted file mode 100644
index 26410fbb85be..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt
+++ /dev/null
@@ -1,177 +0,0 @@
-Marvell Armada AP806 System Controller
-======================================
-
-The AP806 is one of the two core HW blocks of the Marvell Armada 7K/8K
-SoCs. It contains system controllers, which provide several registers
-giving access to numerous features: clocks, pin-muxing and many other
-SoC configuration items. This DT binding allows to describe these
-system controllers.
-
-For the top level node:
- - compatible: must be: "syscon", "simple-mfd";
- - reg: register area of the AP806 system controller
-
-SYSTEM CONTROLLER 0
-===================
-
-Clocks:
--------
-
-
-The Device Tree node representing the AP806/AP807 system controller
-provides a number of clocks:
-
- - 0: reference clock of CPU cluster 0
- - 1: reference clock of CPU cluster 1
- - 2: fixed PLL at 1200 Mhz
- - 3: MSS clock, derived from the fixed PLL
-
-Required properties:
-
- - compatible: must be one of:
-   * "marvell,ap806-clock"
-   * "marvell,ap807-clock"
- - #clock-cells: must be set to 1
-
-Pinctrl:
---------
-
-For common binding part and usage, refer to
-Documentation/devicetree/bindings/pinctrl/marvell,mvebu-pinctrl.txt.
-
-Required properties:
-- compatible must be "marvell,ap806-pinctrl",
-
-Available mpp pins/groups and functions:
-Note: brackets (x) are not part of the mpp name for marvell,function and given
-only for more detailed description in this document.
-
-name	pins	functions
-================================================================================
-mpp0	0	gpio, sdio(clk), spi0(clk)
-mpp1	1	gpio, sdio(cmd), spi0(miso)
-mpp2	2	gpio, sdio(d0), spi0(mosi)
-mpp3	3	gpio, sdio(d1), spi0(cs0n)
-mpp4	4	gpio, sdio(d2), i2c0(sda)
-mpp5	5	gpio, sdio(d3), i2c0(sdk)
-mpp6	6	gpio, sdio(ds)
-mpp7	7	gpio, sdio(d4), uart1(rxd)
-mpp8	8	gpio, sdio(d5), uart1(txd)
-mpp9	9	gpio, sdio(d6), spi0(cs1n)
-mpp10	10	gpio, sdio(d7)
-mpp11	11	gpio, uart0(txd)
-mpp12	12	gpio, sdio(pw_off), sdio(hw_rst)
-mpp13	13	gpio
-mpp14	14	gpio
-mpp15	15	gpio
-mpp16	16	gpio
-mpp17	17	gpio
-mpp18	18	gpio
-mpp19	19	gpio, uart0(rxd), sdio(pw_off)
-
-GPIO:
------
-For common binding part and usage, refer to
-Documentation/devicetree/bindings/gpio/gpio-mvebu.txt.
-
-Required properties:
-
-- compatible: "marvell,armada-8k-gpio"
-
-- offset: offset address inside the syscon block
-
-Example:
-ap_syscon: system-controller@6f4000 {
-	compatible = "syscon", "simple-mfd";
-	reg = <0x6f4000 0x1000>;
-
-	ap_clk: clock {
-		compatible = "marvell,ap806-clock";
-		#clock-cells = <1>;
-	};
-
-	ap_pinctrl: pinctrl {
-		compatible = "marvell,ap806-pinctrl";
-	};
-
-	ap_gpio: gpio {
-		compatible = "marvell,armada-8k-gpio";
-		offset = <0x1040>;
-		ngpios = <19>;
-		gpio-controller;
-		#gpio-cells = <2>;
-		gpio-ranges = <&ap_pinctrl 0 0 19>;
-	};
-};
-
-SYSTEM CONTROLLER 1
-===================
-
-Thermal:
---------
-
-For common binding part and usage, refer to
-Documentation/devicetree/bindings/thermal/thermal.txt
-
-The thermal IP can probe the temperature all around the processor. It
-may feature several channels, each of them wired to one sensor.
-
-It is possible to setup an overheat interrupt by giving at least one
-critical point to any subnode of the thermal-zone node.
-
-Required properties:
-- compatible: must be one of:
-  * marvell,armada-ap806-thermal
-- reg: register range associated with the thermal functions.
-
-Optional properties:
-- interrupts: overheat interrupt handle. Should point to line 18 of the
-  SEI irqchip. See interrupt-controller/interrupts.txt
-- #thermal-sensor-cells: shall be <1> when thermal-zones subnodes refer
-  to this IP and represents the channel ID. There is one sensor per
-  channel. O refers to the thermal IP internal channel, while positive
-  IDs refer to each CPU.
-
-Example:
-ap_syscon1: system-controller@6f8000 {
-	compatible = "syscon", "simple-mfd";
-	reg = <0x6f8000 0x1000>;
-
-	ap_thermal: thermal-sensor@80 {
-		compatible = "marvell,armada-ap806-thermal";
-		reg = <0x80 0x10>;
-		interrupt-parent = <&sei>;
-		interrupts = <18>;
-		#thermal-sensor-cells = <1>;
-	};
-};
-
-Cluster clocks:
----------------
-
-Device Tree Clock bindings for cluster clock of Marvell
-AP806/AP807. Each cluster contain up to 2 CPUs running at the same
-frequency.
-
-Required properties:
- - compatible: must be one of:
-   * "marvell,ap806-cpu-clock"
-   * "marvell,ap807-cpu-clock"
-- #clock-cells : should be set to 1.
-
-- clocks : shall be the input parent clock(s) phandle for the clock
-           (one per cluster)
-
-- reg: register range associated with the cluster clocks
-
-ap_syscon1: system-controller@6f8000 {
-	compatible = "marvell,armada-ap806-syscon1", "syscon", "simple-mfd";
-	reg = <0x6f8000 0x1000>;
-
-	cpu_clk: clock-cpu@278 {
-		compatible = "marvell,ap806-cpu-clock";
-		clocks = <&ap_clk 0>, <&ap_clk 1>;
-		#clock-cells = <1>;
-		reg = <0x278 0xa30>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/arm/marvell/ap80x-system-controller.txt b/Documentation/devicetree/bindings/arm/marvell/ap80x-system-controller.txt
new file mode 100644
index 000000000000..098d932fc963
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/ap80x-system-controller.txt
@@ -0,0 +1,177 @@
+Marvell Armada AP80x System Controller
+======================================
+
+The AP806/AP807 is one of the two core HW blocks of the Marvell Armada
+7K/8K/931x SoCs. It contains system controllers, which provide several
+registers giving access to numerous features: clocks, pin-muxing and
+many other SoC configuration items. This DT binding allows to describe
+these system controllers.
+
+For the top level node:
+ - compatible: must be: "syscon", "simple-mfd";
+ - reg: register area of the AP80x system controller
+
+SYSTEM CONTROLLER 0
+===================
+
+Clocks:
+-------
+
+
+The Device Tree node representing the AP806/AP807 system controller
+provides a number of clocks:
+
+ - 0: reference clock of CPU cluster 0
+ - 1: reference clock of CPU cluster 1
+ - 2: fixed PLL at 1200 Mhz
+ - 3: MSS clock, derived from the fixed PLL
+
+Required properties:
+
+ - compatible: must be one of:
+   * "marvell,ap806-clock"
+   * "marvell,ap807-clock"
+ - #clock-cells: must be set to 1
+
+Pinctrl:
+--------
+
+For common binding part and usage, refer to
+Documentation/devicetree/bindings/pinctrl/marvell,mvebu-pinctrl.txt.
+
+Required properties:
+- compatible must be "marvell,ap806-pinctrl",
+
+Available mpp pins/groups and functions:
+Note: brackets (x) are not part of the mpp name for marvell,function and given
+only for more detailed description in this document.
+
+name	pins	functions
+================================================================================
+mpp0	0	gpio, sdio(clk), spi0(clk)
+mpp1	1	gpio, sdio(cmd), spi0(miso)
+mpp2	2	gpio, sdio(d0), spi0(mosi)
+mpp3	3	gpio, sdio(d1), spi0(cs0n)
+mpp4	4	gpio, sdio(d2), i2c0(sda)
+mpp5	5	gpio, sdio(d3), i2c0(sdk)
+mpp6	6	gpio, sdio(ds)
+mpp7	7	gpio, sdio(d4), uart1(rxd)
+mpp8	8	gpio, sdio(d5), uart1(txd)
+mpp9	9	gpio, sdio(d6), spi0(cs1n)
+mpp10	10	gpio, sdio(d7)
+mpp11	11	gpio, uart0(txd)
+mpp12	12	gpio, sdio(pw_off), sdio(hw_rst)
+mpp13	13	gpio
+mpp14	14	gpio
+mpp15	15	gpio
+mpp16	16	gpio
+mpp17	17	gpio
+mpp18	18	gpio
+mpp19	19	gpio, uart0(rxd), sdio(pw_off)
+
+GPIO:
+-----
+For common binding part and usage, refer to
+Documentation/devicetree/bindings/gpio/gpio-mvebu.txt.
+
+Required properties:
+
+- compatible: "marvell,armada-8k-gpio"
+
+- offset: offset address inside the syscon block
+
+Example:
+ap_syscon: system-controller@6f4000 {
+	compatible = "syscon", "simple-mfd";
+	reg = <0x6f4000 0x1000>;
+
+	ap_clk: clock {
+		compatible = "marvell,ap806-clock";
+		#clock-cells = <1>;
+	};
+
+	ap_pinctrl: pinctrl {
+		compatible = "marvell,ap806-pinctrl";
+	};
+
+	ap_gpio: gpio {
+		compatible = "marvell,armada-8k-gpio";
+		offset = <0x1040>;
+		ngpios = <19>;
+		gpio-controller;
+		#gpio-cells = <2>;
+		gpio-ranges = <&ap_pinctrl 0 0 19>;
+	};
+};
+
+SYSTEM CONTROLLER 1
+===================
+
+Thermal:
+--------
+
+For common binding part and usage, refer to
+Documentation/devicetree/bindings/thermal/thermal.txt
+
+The thermal IP can probe the temperature all around the processor. It
+may feature several channels, each of them wired to one sensor.
+
+It is possible to setup an overheat interrupt by giving at least one
+critical point to any subnode of the thermal-zone node.
+
+Required properties:
+- compatible: must be one of:
+  * marvell,armada-ap806-thermal
+- reg: register range associated with the thermal functions.
+
+Optional properties:
+- interrupts: overheat interrupt handle. Should point to line 18 of the
+  SEI irqchip. See interrupt-controller/interrupts.txt
+- #thermal-sensor-cells: shall be <1> when thermal-zones subnodes refer
+  to this IP and represents the channel ID. There is one sensor per
+  channel. O refers to the thermal IP internal channel, while positive
+  IDs refer to each CPU.
+
+Example:
+ap_syscon1: system-controller@6f8000 {
+	compatible = "syscon", "simple-mfd";
+	reg = <0x6f8000 0x1000>;
+
+	ap_thermal: thermal-sensor@80 {
+		compatible = "marvell,armada-ap806-thermal";
+		reg = <0x80 0x10>;
+		interrupt-parent = <&sei>;
+		interrupts = <18>;
+		#thermal-sensor-cells = <1>;
+	};
+};
+
+Cluster clocks:
+---------------
+
+Device Tree Clock bindings for cluster clock of Marvell
+AP806/AP807. Each cluster contain up to 2 CPUs running at the same
+frequency.
+
+Required properties:
+ - compatible: must be one of:
+   * "marvell,ap806-cpu-clock"
+   * "marvell,ap807-cpu-clock"
+- #clock-cells : should be set to 1.
+
+- clocks : shall be the input parent clock(s) phandle for the clock
+           (one per cluster)
+
+- reg: register range associated with the cluster clocks
+
+ap_syscon1: system-controller@6f8000 {
+	compatible = "marvell,armada-ap806-syscon1", "syscon", "simple-mfd";
+	reg = <0x6f8000 0x1000>;
+
+	cpu_clk: clock-cpu@278 {
+		compatible = "marvell,ap806-cpu-clock";
+		clocks = <&ap_clk 0>, <&ap_clk 1>;
+		#clock-cells = <1>;
+		reg = <0x278 0xa30>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.txt b/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.txt
deleted file mode 100644
index df98a9c82a8c..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Marvell Armada 7K/8K Platforms Device Tree Bindings
----------------------------------------------------
-
-Boards using a SoC of the Marvell Armada 7K or 8K families must carry
-the following root node property:
-
- - compatible, with one of the following values:
-
-   - "marvell,armada7020", "marvell,armada-ap806-dual", "marvell,armada-ap806"
-      when the SoC being used is the Armada 7020
-
-   - "marvell,armada7040", "marvell,armada-ap806-quad", "marvell,armada-ap806"
-      when the SoC being used is the Armada 7040
-
-   - "marvell,armada8020", "marvell,armada-ap806-dual", "marvell,armada-ap806"
-      when the SoC being used is the Armada 8020
-
-   - "marvell,armada8040", "marvell,armada-ap806-quad", "marvell,armada-ap806"
-      when the SoC being used is the Armada 8040
-
-Example:
-
-compatible = "marvell,armada7040-db", "marvell,armada7040",
-             "marvell,armada-ap806-quad", "marvell,armada-ap806";
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml b/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml
new file mode 100644
index 000000000000..a9828c50c0fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR X11)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell/armada-7k-8k.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Armada 7K/8K Platforms Device Tree Bindings
+
+maintainers:
+  - Gregory CLEMENT <gregory.clement@bootlin.com>
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+
+      - description: Armada 7020 SoC
+        items:
+          - const: marvell,armada7020
+          - const: marvell,armada-ap806-dual
+          - const: marvell,armada-ap806
+
+      - description: Armada 7040 SoC
+        items:
+          - const: marvell,armada7040
+          - const: marvell,armada-ap806-quad
+          - const: marvell,armada-ap806
+
+      - description: Armada 8020 SoC
+        items:
+          - const: marvell,armada8020
+          - const: marvell,armada-ap806-dual
+          - const: marvell,armada-ap806
+
+      - description: Armada 8040 SoC
+        items:
+          - const: marvell,armada8040
+          - const: marvell,armada-ap806-quad
+          - const: marvell,armada-ap806
+
+      - description: Armada CN9130 SoC with no external CP
+        items:
+          - const: marvell,cn9130
+          - const: marvell,armada-ap807-quad
+          - const: marvell,armada-ap807
+
+      - description: Armada CN9131 SoC with one external CP
+        items:
+          - const: marvell,cn9131
+          - const: marvell,cn9130
+          - const: marvell,armada-ap807-quad
+          - const: marvell,armada-ap807
+
+      - description: Armada CN9132 SoC with two external CPs
+        items:
+          - const: marvell,cn9132
+          - const: marvell,cn9131
+          - const: marvell,cn9130
+          - const: marvell,armada-ap807-quad
+          - const: marvell,armada-ap807
diff --git a/Documentation/devicetree/bindings/arm/mrvl/mrvl.txt b/Documentation/devicetree/bindings/arm/mrvl/mrvl.txt
deleted file mode 100644
index 951687528efb..000000000000
--- a/Documentation/devicetree/bindings/arm/mrvl/mrvl.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Marvell Platforms Device Tree Bindings
-----------------------------------------------------
-
-PXA168 Aspenite Board
-Required root node properties:
-	- compatible = "mrvl,pxa168-aspenite", "mrvl,pxa168";
-
-PXA910 DKB Board
-Required root node properties:
-	- compatible = "mrvl,pxa910-dkb";
-
-MMP2 Brownstone Board
-Required root node properties:
-	- compatible = "mrvl,mmp2-brownstone", "mrvl,mmp2";
diff --git a/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml b/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml
new file mode 100644
index 000000000000..818dfe6de512
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mrvl/mrvl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Platforms Device Tree Bindings
+
+maintainers:
+  - Lubomir Rintel <lkundrak@v3.sk>
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - description: PXA168 Aspenite Board
+        items:
+          - enum:
+              - mrvl,pxa168-aspenite
+          - const: mrvl,pxa168
+      - description: PXA910 DKB Board
+        items:
+          - enum:
+              - mrvl,pxa910-dkb
+          - const: mrvl,pxa910
+      - description: MMP2 based boards
+        items:
+          - enum:
+              - mrvl,mmp2-brownstone
+          - const: mrvl,mmp2
+      - description: MMP3 based boards
+        items:
+          - const: mrvl,mmp3
+...
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,llcc.txt b/Documentation/devicetree/bindings/arm/msm/qcom,llcc.txt
deleted file mode 100644
index eaee06b2d8f2..000000000000
--- a/Documentation/devicetree/bindings/arm/msm/qcom,llcc.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-== Introduction==
-
-LLCC (Last Level Cache Controller) provides last level of cache memory in SOC,
-that can be shared by multiple clients. Clients here are different cores in the
-SOC, the idea is to minimize the local caches at the clients and migrate to
-common pool of memory. Cache memory is divided into partitions called slices
-which are assigned to clients. Clients can query the slice details, activate
-and deactivate them.
-
-Properties:
-- compatible:
-	Usage: required
-	Value type: <string>
-	Definition: must be "qcom,sdm845-llcc"
-
-- reg:
-	Usage: required
-	Value Type: <prop-encoded-array>
-	Definition: The first element specifies the llcc base start address and
-		    the size of the register region. The second element specifies
-		    the llcc broadcast base address and size of the register region.
-
-- reg-names:
-        Usage: required
-        Value Type: <stringlist>
-        Definition: Register region names. Must be "llcc_base", "llcc_broadcast_base".
-
-- interrupts:
-	Usage: required
-	Definition: The interrupt is associated with the llcc edac device.
-			It's used for llcc cache single and double bit error detection
-			and reporting.
-
-Example:
-
-	cache-controller@1100000 {
-		compatible = "qcom,sdm845-llcc";
-		reg = <0x1100000 0x200000>, <0x1300000 0x50000> ;
-		reg-names = "llcc_base", "llcc_broadcast_base";
-		interrupts = <GIC_SPI 582 IRQ_TYPE_LEVEL_HIGH>;
-	};
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,llcc.yaml b/Documentation/devicetree/bindings/arm/msm/qcom,llcc.yaml
new file mode 100644
index 000000000000..558749065b97
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/msm/qcom,llcc.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/msm/qcom,llcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Last Level Cache Controller
+
+maintainers:
+  - Rishabh Bhatnagar <rishabhb@codeaurora.org>
+  - Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
+
+description: |
+  LLCC (Last Level Cache Controller) provides last level of cache memory in SoC,
+  that can be shared by multiple clients. Clients here are different cores in the
+  SoC, the idea is to minimize the local caches at the clients and migrate to
+  common pool of memory. Cache memory is divided into partitions called slices
+  which are assigned to clients. Clients can query the slice details, activate
+  and deactivate them.
+
+properties:
+  compatible:
+    enum:
+      - qcom,sc7180-llcc
+      - qcom,sdm845-llcc
+
+  reg:
+    items:
+      - description: LLCC base register region
+      - description: LLCC broadcast base register region
+
+  reg-names:
+    items:
+      - const: llcc_base
+      - const: llcc_broadcast_base
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    cache-controller@1100000 {
+      compatible = "qcom,sdm845-llcc";
+      reg = <0x1100000 0x200000>, <0x1300000 0x50000> ;
+      reg-names = "llcc_base", "llcc_broadcast_base";
+      interrupts = <GIC_SPI 582 IRQ_TYPE_LEVEL_HIGH>;
+    };
diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
index b301f753ed2c..e77635c5422c 100644
--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
@@ -43,7 +43,7 @@ SoC Families:
 
 - OMAP2 generic - defaults to OMAP2420
   compatible = "ti,omap2"
-- OMAP3 generic - defaults to OMAP3430
+- OMAP3 generic
   compatible = "ti,omap3"
 - OMAP4 generic - defaults to OMAP4430
   compatible = "ti,omap4"
@@ -51,6 +51,8 @@ SoC Families:
   compatible = "ti,omap5"
 - DRA7 generic - defaults to DRA742
   compatible = "ti,dra7"
+- AM33x generic
+  compatible = "ti,am33xx"
 - AM43x generic - defaults to AM4372
   compatible = "ti,am43"
 
@@ -63,12 +65,14 @@ SoCs:
 
 - OMAP3430
   compatible = "ti,omap3430", "ti,omap3"
+  legacy: "ti,omap34xx" - please do not use any more
 - AM3517
   compatible = "ti,am3517", "ti,omap3"
 - OMAP3630
-  compatible = "ti,omap36xx", "ti,omap3"
-- AM33xx
-  compatible = "ti,am33xx", "ti,omap3"
+  compatible = "ti,omap3630", "ti,omap3"
+  legacy: "ti,omap36xx" - please do not use any more
+- AM335x
+  compatible = "ti,am33xx"
 
 - OMAP4430
   compatible = "ti,omap4430", "ti,omap4"
@@ -110,19 +114,19 @@ SoCs:
 - AM4372
   compatible = "ti,am4372", "ti,am43"
 
-Boards:
+Boards (incomplete list of examples):
 
 - OMAP3 BeagleBoard : Low cost community board
-  compatible = "ti,omap3-beagle", "ti,omap3"
+  compatible = "ti,omap3-beagle", "ti,omap3430", "ti,omap3"
 
 - OMAP3 Tobi with Overo : Commercial expansion board with daughter board
-  compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3"
+  compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3430", "ti,omap3"
 
 - OMAP4 SDP : Software Development Board
-  compatible = "ti,omap4-sdp", "ti,omap4430"
+  compatible = "ti,omap4-sdp", "ti,omap4430", "ti,omap4"
 
 - OMAP4 PandaBoard : Low cost community board
-  compatible = "ti,omap4-panda", "ti,omap4430"
+  compatible = "ti,omap4-panda", "ti,omap4430", "ti,omap4"
 
 - OMAP4 DuoVero with Parlor : Commercial expansion board with daughter board
   compatible = "gumstix,omap4-duovero-parlor", "gumstix,omap4-duovero", "ti,omap4430", "ti,omap4";
@@ -134,16 +138,16 @@ Boards:
   compatible = "variscite,var-dvk-om44", "variscite,var-som-om44", "ti,omap4460", "ti,omap4";
 
 - OMAP3 EVM : Software Development Board for OMAP35x, AM/DM37x
-  compatible = "ti,omap3-evm", "ti,omap3"
+  compatible = "ti,omap3-evm", "ti,omap3630", "ti,omap3"
 
 - AM335X EVM : Software Development Board for AM335x
-  compatible = "ti,am335x-evm", "ti,am33xx", "ti,omap3"
+  compatible = "ti,am335x-evm", "ti,am33xx"
 
 - AM335X Bone : Low cost community board
-  compatible = "ti,am335x-bone", "ti,am33xx", "ti,omap3"
+  compatible = "ti,am335x-bone", "ti,am33xx"
 
 - AM3359 ICEv2 : Low cost Industrial Communication Engine EVM.
-  compatible = "ti,am3359-icev2", "ti,am33xx", "ti,omap3"
+  compatible = "ti,am3359-icev2", "ti,am33xx"
 
 - AM335X OrionLXm : Substation Automation Platform
   compatible = "novatech,am335x-lxm", "ti,am33xx"
diff --git a/Documentation/devicetree/bindings/arm/omap/prm-inst.txt b/Documentation/devicetree/bindings/arm/omap/prm-inst.txt
new file mode 100644
index 000000000000..fcd3456afbbe
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/prm-inst.txt
@@ -0,0 +1,29 @@
+OMAP PRM instance bindings
+
+Power and Reset Manager is an IP block on OMAP family of devices which
+handle the power domains and their current state, and provide reset
+handling for the domains and/or separate IP blocks under the power domain
+hierarchy.
+
+Required properties:
+- compatible:	Must contain one of the following:
+		"ti,am3-prm-inst"
+		"ti,am4-prm-inst"
+		"ti,omap4-prm-inst"
+		"ti,omap5-prm-inst"
+		"ti,dra7-prm-inst"
+		and additionally must contain:
+		"ti,omap-prm-inst"
+- reg:		Contains PRM instance register address range
+		(base address and length)
+
+Optional properties:
+- #reset-cells:	Should be 1 if the PRM instance in question supports resets.
+
+Example:
+
+prm_dsp2: prm@1b00 {
+	compatible = "ti,dra7-prm-inst", "ti,omap-prm-inst";
+	reg = <0x1b00 0x40>;
+	#reset-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/arm/realtek.yaml b/Documentation/devicetree/bindings/arm/realtek.yaml
index 3528b61963b4..ab59de17152d 100644
--- a/Documentation/devicetree/bindings/arm/realtek.yaml
+++ b/Documentation/devicetree/bindings/arm/realtek.yaml
@@ -13,11 +13,24 @@ properties:
   $nodename:
     const: '/'
   compatible:
-    # RTD1295 SoC based boards
-    items:
-      - enum:
-          - mele,v9
-          - probox2,ava
-          - zidoo,x9s
-      - const: realtek,rtd1295
+    oneOf:
+      # RTD1293 SoC based boards
+      - items:
+          - enum:
+              - synology,ds418j # Synology DiskStation DS418j
+          - const: realtek,rtd1293
+
+      # RTD1295 SoC based boards
+      - items:
+          - enum:
+              - mele,v9 # MeLE V9
+              - probox2,ava # ProBox2 AVA
+              - zidoo,x9s # Zidoo X9S
+          - const: realtek,rtd1295
+
+      # RTD1296 SoC based boards
+      - items:
+          - enum:
+              - synology,ds418 # Synology DiskStation DS418
+          - const: realtek,rtd1296
 ...
diff --git a/Documentation/devicetree/bindings/arm/renesas,prr.txt b/Documentation/devicetree/bindings/arm/renesas,prr.txt
deleted file mode 100644
index 08e482e953ca..000000000000
--- a/Documentation/devicetree/bindings/arm/renesas,prr.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-Renesas Product Register
-
-Most Renesas ARM SoCs have a Product Register or Boundary Scan ID Register that
-allows to retrieve SoC product and revision information.  If present, a device
-node for this register should be added.
-
-Required properties:
-  - compatible: Must be one of:
-    "renesas,prr"
-    "renesas,bsid"
-  - reg: Base address and length of the register block.
-
-
-Examples
---------
-
-	prr: chipid@ff000044 {
-		compatible = "renesas,prr";
-		reg = <0 0xff000044 0 4>;
-	};
diff --git a/Documentation/devicetree/bindings/arm/renesas,prr.yaml b/Documentation/devicetree/bindings/arm/renesas,prr.yaml
new file mode 100644
index 000000000000..7f8d17f33983
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/renesas,prr.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/renesas,prr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Product Register
+
+maintainers:
+  - Geert Uytterhoeven <geert+renesas@glider.be>
+  - Magnus Damm <magnus.damm@gmail.com>
+
+description: |
+  Most Renesas ARM SoCs have a Product Register or Boundary Scan ID
+  Register that allows to retrieve SoC product and revision information.
+  If present, a device node for this register should be added.
+
+properties:
+  compatible:
+    enum:
+      - renesas,prr
+      - renesas,bsid
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    prr: chipid@ff000044 {
+        compatible = "renesas,prr";
+        reg = <0 0xff000044 0 4>;
+    };
diff --git a/Documentation/devicetree/bindings/arm/renesas.yaml b/Documentation/devicetree/bindings/arm/renesas.yaml
index 28eb458f761a..9436124c5809 100644
--- a/Documentation/devicetree/bindings/arm/renesas.yaml
+++ b/Documentation/devicetree/bindings/arm/renesas.yaml
@@ -116,6 +116,18 @@ properties:
           - const: hoperun,hihope-rzg2m
           - const: renesas,r8a774a1
 
+      - description: RZ/G2N (R8A774B1)
+        items:
+          - enum:
+              - hoperun,hihope-rzg2n # HopeRun HiHope RZ/G2N platform
+          - const: renesas,r8a774b1
+
+      - items:
+          - enum:
+              - hoperun,hihope-rzg2-ex # HopeRun expansion board for HiHope RZ/G2 platforms
+          - const: hoperun,hihope-rzg2n
+          - const: renesas,r8a774b1
+
       - description: RZ/G2E (R8A774C0)
         items:
           - enum:
@@ -193,15 +205,23 @@ properties:
               - renesas,salvator-xs # Salvator-XS (Salvator-X 2nd version, RTP0RC7796SIPB0012S)
           - const: renesas,r8a7796
 
+      - description: R-Car M3-W+ (R8A77961)
+        items:
+          - enum:
+              - renesas,salvator-xs # Salvator-XS (Salvator-X 2nd version, RTP0RC7796SIPB0012SA5A)
+          - const: renesas,r8a77961
+
       - description: Kingfisher (SBEV-RCAR-KF-M03)
         items:
           - const: shimafuji,kingfisher
           - enum:
               - renesas,h3ulcb
               - renesas,m3ulcb
+              - renesas,m3nulcb
           - enum:
               - renesas,r8a7795
               - renesas,r8a7796
+              - renesas,r8a77965
 
       - description: R-Car M3-N (R8A77965)
         items:
diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml
index 9c7e70335ac0..d9847b306b83 100644
--- a/Documentation/devicetree/bindings/arm/rockchip.yaml
+++ b/Documentation/devicetree/bindings/arm/rockchip.yaml
@@ -40,6 +40,11 @@ properties:
           - const: asus,rk3288-tinker-s
           - const: rockchip,rk3288
 
+      - description: Beelink A1
+        items:
+          - const: azw,beelink-a1
+          - const: rockchip,rk3328
+
       - description: bq Curie 2 tablet
         items:
           - const: mundoreader,bq-curie2
@@ -82,6 +87,11 @@ properties:
           - const: firefly,firefly-rk3399
           - const: rockchip,rk3399
 
+      - description: Firefly ROC-RK3308-CC
+        items:
+          - const: firefly,roc-rk3308-cc
+          - const: rockchip,rk3308
+
       - description: Firefly roc-rk3328-cc
         items:
           - const: firefly,roc-rk3328-cc
@@ -89,7 +99,9 @@ properties:
 
       - description: Firefly ROC-RK3399-PC
         items:
-          - const: firefly,roc-rk3399-pc
+          - enum:
+              - firefly,roc-rk3399-pc
+              - firefly,roc-rk3399-pc-mezzanine
           - const: rockchip,rk3399
 
       - description: FriendlyElec NanoPi4 series boards
@@ -464,6 +476,11 @@ properties:
               - rockchip,rk3288-evb-rk808
           - const: rockchip,rk3288
 
+      - description: Rockchip RK3308 Evaluation board
+        items:
+          - const: rockchip,rk3308-evb
+          - const: rockchip,rk3308
+
       - description: Rockchip RK3328 Evaluation board
         items:
           - const: rockchip,rk3328-evb
diff --git a/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt b/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt
deleted file mode 100644
index 85c5dfd4a720..000000000000
--- a/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-SAMSUNG Exynos SoCs Chipid driver.
-
-Required properties:
-- compatible : Should at least contain "samsung,exynos4210-chipid".
-
-- reg: offset and length of the register set
-
-Example:
-	chipid@10000000 {
-		compatible = "samsung,exynos4210-chipid";
-		reg = <0x10000000 0x100>;
-	};
diff --git a/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.yaml b/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.yaml
new file mode 100644
index 000000000000..afcd70803c12
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/samsung/exynos-chipid.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC series Chipid driver
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+properties:
+  compatible:
+    items:
+      - const: samsung,exynos4210-chipid
+
+  reg:
+    maxItems: 1
+
+  samsung,asv-bin:
+    description:
+      Adaptive Supply Voltage bin selection. This can be used
+      to determine the ASV bin of an SoC if respective information
+      is missing in the CHIPID registers or in the OTP memory.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum: [ 0, 1, 2, 3 ]
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    chipid@10000000 {
+        compatible = "samsung,exynos4210-chipid";
+        reg = <0x10000000 0x100>;
+        samsung,asv-bin = <2>;
+    };
diff --git a/Documentation/devicetree/bindings/arm/samsung/pmu.txt b/Documentation/devicetree/bindings/arm/samsung/pmu.txt
deleted file mode 100644
index 433bfd7593ac..000000000000
--- a/Documentation/devicetree/bindings/arm/samsung/pmu.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-SAMSUNG Exynos SoC series PMU Registers
-
-Properties:
- - compatible : should contain two values. First value must be one from following list:
-		   - "samsung,exynos3250-pmu" - for Exynos3250 SoC,
-		   - "samsung,exynos4210-pmu" - for Exynos4210 SoC,
-		   - "samsung,exynos4412-pmu" - for Exynos4412 SoC,
-		   - "samsung,exynos5250-pmu" - for Exynos5250 SoC,
-		   - "samsung,exynos5260-pmu" - for Exynos5260 SoC.
-		   - "samsung,exynos5410-pmu" - for Exynos5410 SoC,
-		   - "samsung,exynos5420-pmu" - for Exynos5420 SoC.
-		   - "samsung,exynos5433-pmu" - for Exynos5433 SoC.
-		   - "samsung,exynos7-pmu" - for Exynos7 SoC.
-		second value must be always "syscon".
-
- - reg : offset and length of the register set.
-
- - #clock-cells : must be <1>, since PMU requires once cell as clock specifier.
-		The single specifier cell is used as index to list of clocks
-		provided by PMU, which is currently:
-			0 : SoC clock output (CLKOUT pin)
-
- - clock-names : list of clock names for particular CLKOUT mux inputs in
-		following format:
-			"clkoutN", where N is a decimal number corresponding to
-			CLKOUT mux control bits value for given input, e.g.
-				"clkout0", "clkout7", "clkout15".
-
- - clocks : list of phandles and specifiers to all input clocks listed in
-		clock-names property.
-
-Optional properties:
-
-Some PMUs are capable of behaving as an interrupt controller (mostly
-to wake up a suspended PMU). In which case, they can have the
-following properties:
-
-- interrupt-controller: indicate that said PMU is an interrupt controller
-
-- #interrupt-cells: must be identical to the that of the parent interrupt
-  controller.
-
-
-Optional nodes:
-
-- nodes defining the restart and poweroff syscon children
-
-
-Example :
-pmu_system_controller: system-controller@10040000 {
-	compatible = "samsung,exynos5250-pmu", "syscon";
-	reg = <0x10040000 0x5000>;
-	interrupt-controller;
-	#interrupt-cells = <3>;
-	interrupt-parent = <&gic>;
-	#clock-cells = <1>;
-	clock-names = "clkout0", "clkout1", "clkout2", "clkout3",
-			"clkout4", "clkout8", "clkout9";
-	clocks = <&clock CLK_OUT_DMC>, <&clock CLK_OUT_TOP>,
-		<&clock CLK_OUT_LEFTBUS>, <&clock CLK_OUT_RIGHTBUS>,
-		<&clock CLK_OUT_CPU>, <&clock CLK_XXTI>,
-		<&clock CLK_XUSBXTI>;
-};
-
-Example of clock consumer :
-
-usb3503: usb3503@8 {
-	/* ... */
-	clock-names = "refclk";
-	clocks = <&pmu_system_controller 0>;
-	/* ... */
-};
diff --git a/Documentation/devicetree/bindings/arm/samsung/pmu.yaml b/Documentation/devicetree/bindings/arm/samsung/pmu.yaml
new file mode 100644
index 000000000000..73b56fc5bf58
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/pmu.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/samsung/pmu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC series Power Management Unit (PMU)
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+# Custom select to avoid matching all nodes with 'syscon'
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - samsung,exynos3250-pmu
+          - samsung,exynos4210-pmu
+          - samsung,exynos4412-pmu
+          - samsung,exynos5250-pmu
+          - samsung,exynos5260-pmu
+          - samsung,exynos5410-pmu
+          - samsung,exynos5420-pmu
+          - samsung,exynos5433-pmu
+          - samsung,exynos7-pmu
+  required:
+    - compatible
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - samsung,exynos3250-pmu
+          - samsung,exynos4210-pmu
+          - samsung,exynos4412-pmu
+          - samsung,exynos5250-pmu
+          - samsung,exynos5260-pmu
+          - samsung,exynos5410-pmu
+          - samsung,exynos5420-pmu
+          - samsung,exynos5433-pmu
+          - samsung,exynos7-pmu
+      - const: syscon
+
+  reg:
+    maxItems: 1
+
+  '#clock-cells':
+    const: 1
+
+  clock-names:
+    description:
+      List of clock names for particular CLKOUT mux inputs
+    minItems: 1
+    maxItems: 32
+    items:
+      pattern: '^clkout([0-9]|[12][0-9]|3[0-1])$'
+
+  clocks:
+    minItems: 1
+    maxItems: 32
+
+  interrupt-controller:
+    description:
+      Some PMUs are capable of behaving as an interrupt controller (mostly
+      to wake up a suspended PMU).
+
+  '#interrupt-cells':
+    description:
+      Must be identical to the that of the parent interrupt controller.
+    const: 3
+
+  syscon-poweroff:
+    $ref: "../../power/reset/syscon-poweroff.yaml#"
+    type: object
+    description:
+      Node for power off method
+
+  syscon-reboot:
+    $ref: "../../power/reset/syscon-reboot.yaml#"
+    type: object
+    description:
+      Node for reboot method
+
+required:
+  - compatible
+  - reg
+  - '#clock-cells'
+  - clock-names
+  - clocks
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos5250.h>
+
+    pmu_system_controller: system-controller@10040000 {
+        compatible = "samsung,exynos5250-pmu", "syscon";
+        reg = <0x10040000 0x5000>;
+        interrupt-controller;
+        #interrupt-cells = <3>;
+        interrupt-parent = <&gic>;
+        #clock-cells = <1>;
+        clock-names = "clkout16";
+        clocks = <&clock CLK_FIN_PLL>;
+    };
diff --git a/Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt b/Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt
deleted file mode 100644
index 56021bf2a916..000000000000
--- a/Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-* Samsung's Exynos and S5P SoC based boards
-
-Required root node properties:
-    - compatible = should be one or more of the following.
-	- "samsung,aries"	- for S5PV210-based Samsung Aries board.
-	- "samsung,fascinate4g"	- for S5PV210-based Samsung Galaxy S Fascinate 4G (SGH-T959P) board.
-	- "samsung,galaxys"	- for S5PV210-based Samsung Galaxy S (i9000)  board.
-	- "samsung,artik5"	- for Exynos3250-based Samsung ARTIK5 module.
-	- "samsung,artik5-eval" - for Exynos3250-based Samsung ARTIK5 eval board.
-	- "samsung,monk"	- for Exynos3250-based Samsung Simband board.
-	- "samsung,rinato"	- for Exynos3250-based Samsung Gear2 board.
-	- "samsung,smdkv310"	- for Exynos4210-based Samsung SMDKV310 eval board.
-	- "samsung,trats"	- for Exynos4210-based Tizen Reference board.
-	- "samsung,universal_c210" - for Exynos4210-based Samsung board.
-	- "samsung,i9300"          - for Exynos4412-based Samsung GT-I9300 board.
-	- "samsung,i9305"          - for Exynos4412-based Samsung GT-I9305 board.
-	- "samsung,midas"       - for Exynos4412-based Samsung Midas board.
-	- "samsung,smdk4412",	- for Exynos4412-based Samsung SMDK4412 eval board.
-	- "samsung,n710x"          - for Exynos4412-based Samsung GT-N7100/GT-N7105 board.
-	- "samsung,trats2"	- for Exynos4412-based Tizen Reference board.
-	- "samsung,smdk5250"	- for Exynos5250-based Samsung SMDK5250 eval board.
-	- "samsung,xyref5260"	- for Exynos5260-based Samsung board.
-	- "samsung,smdk5410"	- for Exynos5410-based Samsung SMDK5410 eval board.
-	- "samsung,smdk5420"	- for Exynos5420-based Samsung SMDK5420 eval board.
-	- "samsung,tm2"		- for Exynos5433-based Samsung TM2 board.
-	- "samsung,tm2e"	- for Exynos5433-based Samsung TM2E board.
-
-* Other companies Exynos SoC based
-  * FriendlyARM
-	- "friendlyarm,tiny4412"  - for Exynos4412-based FriendlyARM
-				    TINY4412 board.
-  * TOPEET
-	- "topeet,itop4412-elite" - for Exynos4412-based TOPEET
-                                    Elite base board.
-
-  * Google
-	- "google,pi"		- for Exynos5800-based Google Peach Pi
-				  Rev 10+ board,
-	  also: "google,pi-rev16", "google,pi-rev15", "google,pi-rev14",
-		"google,pi-rev13", "google,pi-rev12", "google,pi-rev11",
-		"google,pi-rev10", "google,peach".
-
-	- "google,pit"		- for Exynos5420-based Google Peach Pit
-				  Rev 6+ (Exynos5420),
-	  also: "google,pit-rev16", "google,pit-rev15", "google,pit-rev14",
-		"google,pit-rev13", "google,pit-rev12", "google,pit-rev11",
-		"google,pit-rev10", "google,pit-rev9", "google,pit-rev8",
-		"google,pit-rev7", "google,pit-rev6", "google,peach".
-
-	- "google,snow-rev4"	- for Exynos5250-based Google Snow board,
-	  also: "google,snow"
-	- "google,snow-rev5"	- for Exynos5250-based Google Snow
-				  Rev 5+ board.
-	- "google,spring"	- for Exynos5250-based Google Spring board.
-
-  * Hardkernel
-	- "hardkernel,odroid-u3"  - for Exynos4412-based Hardkernel Odroid U3.
-	- "hardkernel,odroid-x"   - for Exynos4412-based Hardkernel Odroid X.
-	- "hardkernel,odroid-x2"  - for Exynos4412-based Hardkernel Odroid X2.
-	- "hardkernel,odroid-xu"  - for Exynos5410-based Hardkernel Odroid XU.
-	- "hardkernel,odroid-xu3" - for Exynos5422-based Hardkernel Odroid XU3.
-	- "hardkernel,odroid-xu3-lite" - for Exynos5422-based Hardkernel
-					 Odroid XU3 Lite board.
-	- "hardkernel,odroid-xu4" - for Exynos5422-based Hardkernel Odroid XU4.
-	- "hardkernel,odroid-hc1" - for Exynos5422-based Hardkernel Odroid HC1.
-
-  * Insignal
-	- "insignal,arndale"      - for Exynos5250-based Insignal Arndale board.
-	- "insignal,arndale-octa" - for Exynos5420-based Insignal Arndale
-				    Octa board.
-	- "insignal,origen"       - for Exynos4210-based Insignal Origen board.
-	- "insignal,origen4412"   - for Exynos4412-based Insignal Origen board.
-
-
-Optional nodes:
-    - firmware node, specifying presence and type of secure firmware:
-        - compatible: only "samsung,secure-firmware" is currently supported
-        - reg: address of non-secure SYSRAM used for communication with firmware
-
-	firmware@203f000 {
-		compatible = "samsung,secure-firmware";
-		reg = <0x0203F000 0x1000>;
-	};
diff --git a/Documentation/devicetree/bindings/arm/samsung/samsung-boards.yaml b/Documentation/devicetree/bindings/arm/samsung/samsung-boards.yaml
new file mode 100644
index 000000000000..63acd57c4799
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/samsung-boards.yaml
@@ -0,0 +1,181 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/samsung/samsung-boards.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos and S5P SoC based boards
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - description: S5PV210 based boards
+        items:
+          - enum:
+              - aesop,torbreck                  # aESOP Torbreck based on S5PV210
+              - samsung,aquila                  # Samsung Aquila based on S5PC110
+              - samsung,goni                    # Samsung Goni based on S5PC110
+              - yic,smdkc110                    # YIC System SMDKC110 based on S5PC110
+              - yic,smdkv210                    # YIC System SMDKV210 based on S5PV210
+          - const: samsung,s5pv210
+
+      - description: S5PV210 based Aries boards
+        items:
+          - enum:
+              - samsung,fascinate4g             # Samsung Galaxy S Fascinate 4G (SGH-T959P)
+              - samsung,galaxys                 # Samsung Galaxy S (i9000)
+          - const: samsung,aries
+          - const: samsung,s5pv210
+
+      - description: Exynos3250 based boards
+        items:
+          - enum:
+              - samsung,monk                    # Samsung Simband
+              - samsung,rinato                  # Samsung Gear2
+          - const: samsung,exynos3250
+          - const: samsung,exynos3
+
+      - description: Samsung ARTIK5 boards
+        items:
+          - enum:
+              - samsung,artik5-eval             # Samsung ARTIK5 eval board
+          - const: samsung,artik5               # Samsung ARTIK5 module
+          - const: samsung,exynos3250
+          - const: samsung,exynos3
+
+      - description: Exynos4210 based boards
+        items:
+          - enum:
+              - insignal,origen                 # Insignal Origen
+              - samsung,smdkv310                # Samsung SMDKV310 eval
+              - samsung,trats                   # Samsung Tizen Reference
+              - samsung,universal_c210          # Samsung C210
+          - const: samsung,exynos4210
+          - const: samsung,exynos4
+
+      - description: Exynos4412 based boards
+        items:
+          - enum:
+              - friendlyarm,tiny4412            # FriendlyARM TINY4412
+              - hardkernel,odroid-u3            # Hardkernel Odroid U3
+              - hardkernel,odroid-x             # Hardkernel Odroid X
+              - hardkernel,odroid-x2            # Hardkernel Odroid X2
+              - insignal,origen4412             # Insignal Origen
+              - samsung,smdk4412                # Samsung SMDK4412 eval
+              - topeet,itop4412-elite           # TOPEET Elite base
+          - const: samsung,exynos4412
+          - const: samsung,exynos4
+
+      - description: Samsung Midas family boards
+        items:
+          - enum:
+              - samsung,i9300                   # Samsung GT-I9300
+              - samsung,i9305                   # Samsung GT-I9305
+              - samsung,n710x                   # Samsung GT-N7100/GT-N7105
+              - samsung,trats2                  # Samsung Tizen Reference
+          - const: samsung,midas
+          - const: samsung,exynos4412
+          - const: samsung,exynos4
+
+      - description: Exynos5250 based boards
+        items:
+          - enum:
+              - google,snow-rev5                # Google Snow Rev 5+
+              - google,spring                   # Google Spring
+              - insignal,arndale                # Insignal Arndale
+              - samsung,smdk5250                # Samsung SMDK5250 eval
+          - const: samsung,exynos5250
+          - const: samsung,exynos5
+
+      - description: Google Snow Boards (Rev 4+)
+        items:
+          - const: google,snow-rev4
+          - const: google,snow
+          - const: samsung,exynos5250
+          - const: samsung,exynos5
+
+      - description: Exynos5260 based boards
+        items:
+          - enum:
+              - samsung,xyref5260               # Samsung Xyref5260 eval
+          - const: samsung,exynos5260
+          - const: samsung,exynos5
+
+      - description: Exynos5410 based boards
+        items:
+          - enum:
+              - hardkernel,odroid-xu            # Hardkernel Odroid XU
+              - samsung,smdk5410                # Samsung SMDK5410 eval
+          - const: samsung,exynos5410
+          - const: samsung,exynos5
+
+      - description: Exynos5420 based boards
+        items:
+          - enum:
+              - insignal,arndale-octa           # Insignal Arndale Octa
+              - samsung,smdk5420                # Samsung SMDK5420 eval
+          - const: samsung,exynos5420
+          - const: samsung,exynos5
+
+      - description: Google Peach Pit Boards (Rev 6+)
+        items:
+          - const: google,pit-rev16
+          - const: google,pit-rev15
+          - const: google,pit-rev14
+          - const: google,pit-rev13
+          - const: google,pit-rev12
+          - const: google,pit-rev11
+          - const: google,pit-rev10
+          - const: google,pit-rev9
+          - const: google,pit-rev8
+          - const: google,pit-rev7
+          - const: google,pit-rev6
+          - const: google,pit
+          - const: google,peach
+          - const: samsung,exynos5420
+          - const: samsung,exynos5
+
+      - description: Exynos5800 based boards
+        items:
+          - enum:
+              - hardkernel,odroid-xu3           # Hardkernel Odroid XU3
+              - hardkernel,odroid-xu3-lite      # Hardkernel Odroid XU3 Lite
+              - hardkernel,odroid-xu4           # Hardkernel Odroid XU4
+              - hardkernel,odroid-hc1           # Hardkernel Odroid HC1
+          - const: samsung,exynos5800
+          - const: samsung,exynos5
+
+      - description: Google Peach Pi Boards (Rev 10+)
+        items:
+          - const: google,pi-rev16
+          - const: google,pi-rev15
+          - const: google,pi-rev14
+          - const: google,pi-rev13
+          - const: google,pi-rev12
+          - const: google,pi-rev11
+          - const: google,pi-rev10
+          - const: google,pi
+          - const: google,peach
+          - const: samsung,exynos5800
+          - const: samsung,exynos5
+
+      - description: Exynos5433 based boards
+        items:
+          - enum:
+              - samsung,tm2                     # Samsung TM2
+              - samsung,tm2e                    # Samsung TM2E
+          - const: samsung,exynos5433
+
+      - description: Exynos7 based boards
+        items:
+          - enum:
+              - samsung,exynos7-espresso        # Samsung Exynos7 Espresso
+          - const: samsung,exynos7
+
+required:
+  - compatible
diff --git a/Documentation/devicetree/bindings/arm/samsung/samsung-secure-firmware.yaml b/Documentation/devicetree/bindings/arm/samsung/samsung-secure-firmware.yaml
new file mode 100644
index 000000000000..51d23b6f8a94
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/samsung-secure-firmware.yaml
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/samsung/samsung-secure-firmware.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos Secure Firmware
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+properties:
+  compatible:
+    items:
+      - const: samsung,secure-firmware
+
+  reg:
+    description:
+      Address of non-secure SYSRAM used for communication with firmware.
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    firmware@203f000 {
+      compatible = "samsung,secure-firmware";
+      reg = <0x0203f000 0x1000>;
+    };
diff --git a/Documentation/devicetree/bindings/arm/samsung/sysreg.txt b/Documentation/devicetree/bindings/arm/samsung/sysreg.txt
deleted file mode 100644
index 4fced6e9d5e4..000000000000
--- a/Documentation/devicetree/bindings/arm/samsung/sysreg.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-SAMSUNG S5P/Exynos SoC series System Registers (SYSREG)
-
-Properties:
- - compatible : should contain two values. First value must be one from following list:
-		- "samsung,exynos4-sysreg" - for Exynos4 based SoCs,
-		- "samsung,exynos5-sysreg" - for Exynos5 based SoCs.
-		second value must be always "syscon".
- - reg : offset and length of the register set.
-
-Example:
-	syscon@10010000 {
-		compatible = "samsung,exynos4-sysreg", "syscon";
-		reg = <0x10010000 0x400>;
-	};
-
-	syscon@10050000 {
-		compatible = "samsung,exynos5-sysreg", "syscon";
-		reg = <0x10050000 0x5000>;
-	};
diff --git a/Documentation/devicetree/bindings/arm/samsung/sysreg.yaml b/Documentation/devicetree/bindings/arm/samsung/sysreg.yaml
new file mode 100644
index 000000000000..3b7811804cb4
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/sysreg.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/samsung/sysreg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S5P/Exynos SoC series System Registers (SYSREG)
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+# Custom select to avoid matching all nodes with 'syscon'
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - samsung,exynos4-sysreg
+          - samsung,exynos5-sysreg
+  required:
+    - compatible
+
+properties:
+  compatible:
+    allOf:
+      - items:
+          - enum:
+              - samsung,exynos4-sysreg
+              - samsung,exynos5-sysreg
+          - const: syscon
+
+  reg:
+    maxItems: 1
+
+examples:
+  - |
+    syscon@10010000 {
+        compatible = "samsung,exynos4-sysreg", "syscon";
+        reg = <0x10010000 0x400>;
+    };
+
+    syscon@10050000 {
+        compatible = "samsung,exynos5-sysreg", "syscon";
+        reg = <0x10050000 0x5000>;
+    };
diff --git a/Documentation/devicetree/bindings/arm/sprd.txt b/Documentation/devicetree/bindings/arm/sprd.txt
deleted file mode 100644
index 3df034b13e28..000000000000
--- a/Documentation/devicetree/bindings/arm/sprd.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Spreadtrum SoC Platforms Device Tree Bindings
-----------------------------------------------------
-
-SC9836 openphone Board
-Required root node properties:
-	- compatible = "sprd,sc9836-openphone", "sprd,sc9836";
-
-SC9860 SoC
-Required root node properties:
-	- compatible = "sprd,sc9860"
-
-SP9860G 3GFHD Board
-Required root node properties:
-	- compatible = "sprd,sp9860g-1h10", "sprd,sc9860";
diff --git a/Documentation/devicetree/bindings/arm/sprd.yaml b/Documentation/devicetree/bindings/arm/sprd.yaml
new file mode 100644
index 000000000000..c35fb845ccaa
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sprd.yaml
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2019 Unisoc Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/sprd.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Unisoc platforms device tree bindings
+
+maintainers:
+  - Orson Zhai <orsonzhai@gmail.com>
+  - Baolin Wang <baolin.wang7@gmail.com>
+  - Chunyan Zhang <zhang.lyra@gmail.com>
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - sprd,sc9836-openphone
+          - const: sprd,sc9836
+      - items:
+          - enum:
+              - sprd,sp9860g-1h10
+          - const: sprd,sc9860
+      - items:
+          - enum:
+              - sprd,sp9863a-1h10
+          - const: sprd,sc9863a
+
+...
diff --git a/Documentation/devicetree/bindings/arm/stm32/mlahb.txt b/Documentation/devicetree/bindings/arm/stm32/mlahb.txt
deleted file mode 100644
index 25307aa1eb9b..000000000000
--- a/Documentation/devicetree/bindings/arm/stm32/mlahb.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-ML-AHB interconnect bindings
-
-These bindings describe the STM32 SoCs ML-AHB interconnect bus which connects
-a Cortex-M subsystem with dedicated memories.
-The MCU SRAM and RETRAM memory parts can be accessed through different addresses
-(see "RAM aliases" in [1]) using different buses (see [2]) : balancing the
-Cortex-M firmware accesses among those ports allows to tune the system
-performance.
-
-[1]: https://www.st.com/resource/en/reference_manual/dm00327659.pdf
-[2]: https://wiki.st.com/stm32mpu/wiki/STM32MP15_RAM_mapping
-
-Required properties:
-- compatible: should be "simple-bus"
-- dma-ranges: describes memory addresses translation between the local CPU and
-	   the remote Cortex-M processor. Each memory region, is declared with
-	   3 parameters:
-		 - param 1: device base address (Cortex-M processor address)
-		 - param 2: physical base address (local CPU address)
-		 - param 3: size of the memory region.
-
-The Cortex-M remote processor accessed via the mlahb interconnect is described
-by a child node.
-
-Example:
-mlahb {
-	compatible = "simple-bus";
-	#address-cells = <1>;
-	#size-cells = <1>;
-	dma-ranges = <0x00000000 0x38000000 0x10000>,
-		     <0x10000000 0x10000000 0x60000>,
-		     <0x30000000 0x30000000 0x60000>;
-
-	m4_rproc: m4@10000000 {
-		...
-	};
-};
diff --git a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
new file mode 100644
index 000000000000..68917bb7c7e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/stm32/st,mlahb.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: STMicroelectronics STM32 ML-AHB interconnect bindings
+
+maintainers:
+  - Fabien Dessenne <fabien.dessenne@st.com>
+  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+
+description: |
+  These bindings describe the STM32 SoCs ML-AHB interconnect bus which connects
+  a Cortex-M subsystem with dedicated memories. The MCU SRAM and RETRAM memory
+  parts can be accessed through different addresses (see "RAM aliases" in [1])
+  using different buses (see [2]): balancing the Cortex-M firmware accesses
+  among those ports allows to tune the system performance.
+  [1]: https://www.st.com/resource/en/reference_manual/dm00327659.pdf
+  [2]: https://wiki.st.com/stm32mpu/wiki/STM32MP15_RAM_mapping
+
+allOf:
+ - $ref: /schemas/simple-bus.yaml#
+
+properties:
+  compatible:
+    contains:
+      enum:
+        - st,mlahb
+
+  dma-ranges:
+    description: |
+      Describe memory addresses translation between the local CPU and the
+      remote Cortex-M processor. Each memory region, is declared with
+      3 parameters:
+      - param 1: device base address (Cortex-M processor address)
+      - param 2: physical base address (local CPU address)
+      - param 3: size of the memory region.
+    maxItems: 3
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 1
+
+required:
+  - compatible
+  - '#address-cells'
+  - '#size-cells'
+  - dma-ranges
+
+examples:
+  - |
+    mlahb: ahb {
+      compatible = "st,mlahb", "simple-bus";
+      #address-cells = <1>;
+      #size-cells = <1>;
+      reg = <0x10000000 0x40000>;
+      ranges;
+      dma-ranges = <0x00000000 0x38000000 0x10000>,
+                   <0x10000000 0x10000000 0x60000>,
+                   <0x30000000 0x30000000 0x60000>;
+
+      m4_rproc: m4@10000000 {
+       reg = <0x10000000 0x40000>;
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
new file mode 100644
index 000000000000..0dedf94c8578
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/stm32/st,stm32-syscon.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: STMicroelectronics STM32 Platforms System Controller bindings
+
+maintainers:
+  - Alexandre Torgue <alexandre.torgue@st.com>
+  - Christophe Roullier <christophe.roullier@st.com>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+        - enum:
+          - st,stm32mp157-syscfg
+        - const: syscon
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+
+examples:
+  - |
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    syscfg: syscon@50020000 {
+        compatible = "st,stm32mp157-syscfg", "syscon";
+        reg = <0x50020000 0x400>;
+        clocks = <&rcc SYSCFG>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt b/Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt
deleted file mode 100644
index c92d411fd023..000000000000
--- a/Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-STMicroelectronics STM32 Platforms System Controller
-
-Properties:
-   - compatible : should contain two values. First value must be :
-                 - " st,stm32mp157-syscfg " - for stm32mp157 based SoCs,
-                 second value must be always "syscon".
-   - reg : offset and length of the register set.
-   - clocks: phandle to the syscfg clock
-
- Example:
-         syscfg: syscon@50020000 {
-                 compatible = "st,stm32mp157-syscfg", "syscon";
-                 reg = <0x50020000 0x400>;
-                 clocks = <&rcc SYSCFG>;
-         };
-
diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
index 4d194f1eb03a..1fcf306bd2d1 100644
--- a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
@@ -13,19 +13,38 @@ properties:
   compatible:
     oneOf:
       - items:
+          - enum:
+              - st,stm32f429i-disco
+              - st,stm32429i-eval
           - const: st,stm32f429
-
       - items:
+          - enum:
+              - st,stm32f469i-disco
           - const: st,stm32f469
-
       - items:
+          - enum:
+              - st,stm32f746-disco
+              - st,stm32746g-eval
           - const: st,stm32f746
-
       - items:
+          - enum:
+              - st,stm32f769-disco
+          - const: st,stm32f769
+      - items:
+          - enum:
+              - st,stm32h743i-disco
+              - st,stm32h743i-eval
           - const: st,stm32h743
-
       - items:
           - enum:
               - arrow,stm32mp157a-avenger96 # Avenger96
+              - st,stm32mp157c-ed1
+              - st,stm32mp157a-dk1
+              - st,stm32mp157c-dk2
+
+          - const: st,stm32mp157
+      - items:
+          - const: st,stm32mp157c-ev1
+          - const: st,stm32mp157c-ed1
           - const: st,stm32mp157
 ...
diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml
index 972b1e9ee804..cffe8bb0bad1 100644
--- a/Documentation/devicetree/bindings/arm/sunxi.yaml
+++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
@@ -8,7 +8,7 @@ title: Allwinner platforms device tree bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   $nodename:
@@ -211,6 +211,11 @@ properties:
           - const: friendlyarm,nanopi-a64
           - const: allwinner,sun50i-a64
 
+      - description: FriendlyARM NanoPi Duo2
+        items:
+          - const: friendlyarm,nanopi-duo2
+          - const: allwinner,sun8i-h3
+
       - description: FriendlyARM NanoPi M1
         items:
           - const: friendlyarm,nanopi-m1
diff --git a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml
new file mode 100644
index 000000000000..9370e64992dd
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/sunxi/allwinner,sun4i-a10-mbus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner Memory Bus (MBUS) controller
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description: |
+  The MBUS controller drives the MBUS that other devices in the SoC
+  will use to perform DMA. It also has a register interface that
+  allows to monitor and control the bandwidth and priorities for
+  masters on that bus.
+
+  Each device having to perform their DMA through the MBUS must have
+  the interconnects and interconnect-names properties set to the MBUS
+  controller and with "dma-mem" as the interconnect name.
+
+properties:
+  "#interconnect-cells":
+    const: 1
+    description:
+      The content of the cell is the MBUS ID.
+
+  compatible:
+    enum:
+      - allwinner,sun5i-a13-mbus
+      - allwinner,sun8i-h3-mbus
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  dma-ranges:
+    description:
+      See section 2.3.9 of the DeviceTree Specification.
+
+required:
+  - "#interconnect-cells"
+  - compatible
+  - reg
+  - clocks
+  - dma-ranges
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/sun5i-ccu.h>
+
+    mbus: dram-controller@1c01000 {
+        compatible = "allwinner,sun5i-a13-mbus";
+        reg = <0x01c01000 0x1000>;
+        clocks = <&ccu CLK_MBUS>;
+        dma-ranges = <0x00000000 0x40000000 0x20000000>;
+        #interconnect-cells = <1>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/sunxi/smp-sram.txt b/Documentation/devicetree/bindings/arm/sunxi/smp-sram.txt
deleted file mode 100644
index 082e6a9382d3..000000000000
--- a/Documentation/devicetree/bindings/arm/sunxi/smp-sram.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Allwinner SRAM for smp bringup:
-------------------------------------------------
-
-Allwinner's A80 SoC uses part of the secure sram for hotplugging of the
-primary core (cpu0). Once the core gets powered up it checks if a magic
-value is set at a specific location. If it is then the BROM will jump
-to the software entry address, instead of executing a standard boot.
-
-Therefore a reserved section sub-node has to be added to the mmio-sram
-declaration.
-
-Note that this is separate from the Allwinner SRAM controller found in
-../../sram/sunxi-sram.txt. This SRAM is secure only and not mappable to
-any device.
-
-Also there are no "secure-only" properties. The implementation should
-check if this SRAM is usable first.
-
-Required sub-node properties:
-- compatible : depending on the SoC this should be one of:
-		"allwinner,sun9i-a80-smp-sram"
-
-The rest of the properties should follow the generic mmio-sram discription
-found in ../../misc/sram.txt
-
-Example:
-
-	sram_b: sram@20000 {
-		/* 256 KiB secure SRAM at 0x20000 */
-		compatible = "mmio-sram";
-		reg = <0x00020000 0x40000>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges = <0 0x00020000 0x40000>;
-
-		smp-sram@1000 {
-			/*
-			 * This is checked by BROM to determine if
-			 * cpu0 should jump to SMP entry vector
-			 */
-			compatible = "allwinner,sun9i-a80-smp-sram";
-			reg = <0x1000 0x8>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/arm/sunxi/sunxi-mbus.txt b/Documentation/devicetree/bindings/arm/sunxi/sunxi-mbus.txt
deleted file mode 100644
index 1464a4713553..000000000000
--- a/Documentation/devicetree/bindings/arm/sunxi/sunxi-mbus.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Allwinner Memory Bus (MBUS) controller
-
-The MBUS controller drives the MBUS that other devices in the SoC will
-use to perform DMA. It also has a register interface that allows to
-monitor and control the bandwidth and priorities for masters on that
-bus.
-
-Required properties:
- - compatible: Must be one of:
-	- allwinner,sun5i-a13-mbus
- - reg: Offset and length of the register set for the controller
- - clocks: phandle to the clock driving the controller
- - dma-ranges: See section 2.3.9 of the DeviceTree Specification
- - #interconnect-cells: Must be one, with the argument being the MBUS
-   port ID
-
-Each device having to perform their DMA through the MBUS must have the
-interconnects and interconnect-names properties set to the MBUS
-controller and with "dma-mem" as the interconnect name.
-
-Example:
-
-mbus: dram-controller@1c01000 {
-	compatible = "allwinner,sun5i-a13-mbus";
-	reg = <0x01c01000 0x1000>;
-	clocks = <&ccu CLK_MBUS>;
-	dma-ranges = <0x00000000 0x40000000 0x20000000>;
-	#interconnect-cells = <1>;
-};
-
-fe0: display-frontend@1e00000 {
-	compatible = "allwinner,sun5i-a13-display-frontend";
-	...
-	interconnects = <&mbus 19>;
-	interconnect-names = "dma-mem";
-};
diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt
index 55c6fab1b373..77091a277642 100644
--- a/Documentation/devicetree/bindings/ata/ahci-platform.txt
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt
@@ -9,8 +9,6 @@ PHYs.
 
 Required properties:
 - compatible        : compatible string, one of:
-  - "allwinner,sun4i-a10-ahci"
-  - "allwinner,sun8i-r40-ahci"
   - "brcm,iproc-ahci"
   - "hisilicon,hisi-ahci"
   - "cavium,octeon-7130-ahci"
@@ -45,8 +43,6 @@ Required properties when using sub-nodes:
 - #address-cells    : number of cells to encode an address
 - #size-cells       : number of cells representing the size of an address
 
-For allwinner,sun8i-r40-ahci, the reset property must be present.
-
 Sub-nodes required properties:
 - reg		    : the port number
 And at least one of the following properties:
@@ -60,14 +56,6 @@ Examples:
 		interrupts = <115>;
         };
 
-	ahci: sata@1c18000 {
-		compatible = "allwinner,sun4i-a10-ahci";
-		reg = <0x01c18000 0x1000>;
-		interrupts = <56>;
-		clocks = <&pll6 0>, <&ahb_gates 25>;
-		target-supply = <&reg_ahci_5v>;
-	};
-
 With sub-nodes:
 	sata@f7e90000 {
 		compatible = "marvell,berlin2q-achi", "generic-ahci";
diff --git a/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml b/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml
new file mode 100644
index 000000000000..cb530b46beff
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/allwinner,sun4i-a10-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 AHCI SATA Controller bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  compatible:
+    const: allwinner,sun4i-a10-ahci
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: AHCI Bus Clock
+      - description: AHCI Module Clock
+
+  interrupts:
+    maxItems: 1
+
+  target-supply:
+    description: Regulator for SATA target power
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    ahci: sata@1c18000 {
+        compatible = "allwinner,sun4i-a10-ahci";
+        reg = <0x01c18000 0x1000>;
+        interrupts = <56>;
+        clocks = <&pll6 0>, <&ahb_gates 25>;
+        target-supply = <&reg_ahci_5v>;
+    };
diff --git a/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml b/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml
new file mode 100644
index 000000000000..e6b42a113ff1
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/allwinner,sun8i-r40-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner R40 AHCI SATA Controller bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  compatible:
+    const: allwinner,sun8i-r40-ahci
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: AHCI Bus Clock
+      - description: AHCI Module Clock
+
+  interrupts:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    const: ahci
+
+  ahci-supply:
+    description: Regulator for the AHCI controller
+
+  phy-supply:
+    description: Regulator for the SATA PHY power
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - interrupts
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/sun8i-r40-ccu.h>
+    #include <dt-bindings/reset/sun8i-r40-ccu.h>
+
+    ahci: sata@1c18000 {
+        compatible = "allwinner,sun8i-r40-ahci";
+        reg = <0x01c18000 0x1000>;
+        interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_BUS_SATA>, <&ccu CLK_SATA>;
+        resets = <&ccu RST_BUS_SATA>;
+        reset-names = "ahci";
+        ahci-supply = <&reg_dldo4>;
+        phy-supply = <&reg_eldo3>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt b/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt
index 7713a413c6a7..b9ae4ce4a0a0 100644
--- a/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt
+++ b/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt
@@ -5,6 +5,7 @@ Each SATA controller should have its own node.
 
 Required properties:
 - compatible         : should be one or more of
+			"brcm,bcm7216-ahci"
 			"brcm,bcm7425-ahci"
 			"brcm,bcm7445-ahci"
 			"brcm,bcm-nsp-ahci"
@@ -14,6 +15,12 @@ Required properties:
 - reg-names          : "ahci" and "top-ctrl"
 - interrupts         : interrupt mapping for SATA IRQ
 
+Optional properties:
+
+- reset: for "brcm,bcm7216-ahci" must be a valid reset phandle
+  pointing to the RESCAL reset controller provider node.
+- reset-names: for "brcm,bcm7216-ahci", must be "rescal".
+
 Also see ahci-platform.txt.
 
 Example:
diff --git a/Documentation/devicetree/bindings/ata/faraday,ftide010.txt b/Documentation/devicetree/bindings/ata/faraday,ftide010.txt
deleted file mode 100644
index a0c64a29104d..000000000000
--- a/Documentation/devicetree/bindings/ata/faraday,ftide010.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-* Faraday Technology FTIDE010 PATA controller
-
-This controller is the first Faraday IDE interface block, used in the
-StorLink SL2312 and SL3516, later known as the Cortina Systems Gemini
-platform. The controller can do PIO modes 0 through 4, Multi-word DMA
-(MWDM)modes 0 through 2 and Ultra DMA modes 0 through 6.
-
-On the Gemini platform, this PATA block is accompanied by a PATA to
-SATA bridge in order to support SATA. This is why a phandle to that
-controller is compulsory on that platform.
-
-The timing properties are unique per-SoC, not per-board.
-
-Required properties:
-- compatible: should be one of
-  "cortina,gemini-pata", "faraday,ftide010"
-  "faraday,ftide010"
-- interrupts: interrupt for the block
-- reg: registers and size for the block
-
-Optional properties:
-- clocks: a SoC clock running the peripheral.
-- clock-names: should be set to "PCLK" for the peripheral clock.
-
-Required properties for "cortina,gemini-pata" compatible:
-- sata: a phande to the Gemini PATA to SATA bridge, see
-  cortina,gemini-sata-bridge.txt for details.
-
-Example:
-
-ata@63000000 {
-	compatible = "cortina,gemini-pata", "faraday,ftide010";
-	reg = <0x63000000 0x100>;
-	interrupts = <4 IRQ_TYPE_EDGE_RISING>;
-	clocks = <&gcc GEMINI_CLK_GATE_IDE>;
-	clock-names = "PCLK";
-	sata = <&sata>;
-};
diff --git a/Documentation/devicetree/bindings/ata/faraday,ftide010.yaml b/Documentation/devicetree/bindings/ata/faraday,ftide010.yaml
new file mode 100644
index 000000000000..bfc6357476fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/faraday,ftide010.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/faraday,ftide010.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Faraday Technology FTIDE010 PATA controller
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+  This controller is the first Faraday IDE interface block, used in the
+  StorLink SL3512 and SL3516, later known as the Cortina Systems Gemini
+  platform. The controller can do PIO modes 0 through 4, Multi-word DMA
+  (MWDM) modes 0 through 2 and Ultra DMA modes 0 through 6.
+
+  On the Gemini platform, this PATA block is accompanied by a PATA to
+  SATA bridge in order to support SATA. This is why a phandle to that
+  controller is compulsory on that platform.
+
+  The timing properties are unique per-SoC, not per-board.
+
+properties:
+  compatible:
+    oneOf:
+      - const: faraday,ftide010
+      - items:
+        - const: cortina,gemini-pata
+        - const: faraday,ftide010
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+
+  clock-names:
+    const: PCLK
+
+  sata:
+    description:
+      phandle to the Gemini PATA to SATA bridge, if available
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+allOf:
+  - $ref: pata-common.yaml#
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: cortina,gemini-pata
+
+    then:
+      required:
+        - sata
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/clock/cortina,gemini-clock.h>
+
+    ide@63000000 {
+      compatible = "cortina,gemini-pata", "faraday,ftide010";
+      reg = <0x63000000 0x100>;
+      interrupts = <4 IRQ_TYPE_EDGE_RISING>;
+      clocks = <&gcc GEMINI_CLK_GATE_IDE>;
+      clock-names = "PCLK";
+      sata = <&sata>;
+      #address-cells = <1>;
+      #size-cells = <0>;
+      ide-port@0 {
+        reg = <0>;
+      };
+      ide-port@1 {
+        reg = <1>;
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/ata/pata-common.yaml b/Documentation/devicetree/bindings/ata/pata-common.yaml
new file mode 100644
index 000000000000..fc5ebbe7108d
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/pata-common.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/pata-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common Properties for Parallel AT attachment (PATA) controllers
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+  This document defines device tree properties common to most Parallel
+  ATA (PATA, also known as IDE) AT attachment storage devices.
+  It doesn't constitue a device tree binding specification by itself but is
+  meant to be referenced by device tree bindings.
+
+  The PATA (IDE) controller-specific device tree bindings are responsible for
+  defining whether each property is required or optional.
+
+properties:
+  $nodename:
+    pattern: "^ide(@.*)?$"
+    description:
+      Specifies the host controller node. PATA host controller nodes are named
+      "ide".
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+patternProperties:
+  "^ide-port@[0-1]$":
+    description: |
+      DT nodes for ports connected on the PATA host. The master drive will have
+      ID number 0 and the slave drive will have ID number 1. The PATA port
+      nodes will be named "ide-port".
+    type: object
+
+    properties:
+      reg:
+        minimum: 0
+        maximum: 1
+        description:
+          The ID number of the drive port, 0 for the master port and 1 for the
+          slave port.
+
+...
diff --git a/Documentation/devicetree/bindings/ata/sata-common.yaml b/Documentation/devicetree/bindings/ata/sata-common.yaml
new file mode 100644
index 000000000000..6783a4dec6b5
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/sata-common.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/sata-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common Properties for Serial AT attachment (SATA) controllers
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+  This document defines device tree properties common to most Serial
+  AT attachment (SATA) storage devices. It doesn't constitute a device tree
+  binding specification by itself but is meant to be referenced by device
+  tree bindings.
+
+  The SATA controller-specific device tree bindings are responsible for
+  defining whether each property is required or optional.
+
+properties:
+  $nodename:
+    pattern: "^sata(@.*)?$"
+    description:
+      Specifies the host controller node. SATA host controller nodes are named
+      "sata"
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+patternProperties:
+  "^sata-port@[0-9a-e]$":
+    description: |
+      DT nodes for ports connected on the SATA host. The SATA port
+      nodes will be named "sata-port".
+    type: object
+
+    properties:
+      reg:
+        minimum: 0
+        maximum: 14
+        description:
+          The ID number of the drive port SATA can potentially use a port
+          multiplier making it possible to connect up to 15 disks to a single
+          SATA port.
+
+...
diff --git a/Documentation/devicetree/bindings/ata/sata_rcar.txt b/Documentation/devicetree/bindings/ata/sata_rcar.txt
index 4268e17d2411..a2fbdc91570d 100644
--- a/Documentation/devicetree/bindings/ata/sata_rcar.txt
+++ b/Documentation/devicetree/bindings/ata/sata_rcar.txt
@@ -2,6 +2,7 @@
 
 Required properties:
 - compatible		: should contain one or more of the following:
+			  - "renesas,sata-r8a774b1" for RZ/G2N
 			  - "renesas,sata-r8a7779" for R-Car H1
 			  - "renesas,sata-r8a7790-es1" for R-Car H2 ES1
 			  - "renesas,sata-r8a7790" for R-Car H2 other than ES1
@@ -9,8 +10,10 @@ Required properties:
 			  - "renesas,sata-r8a7793" for R-Car M2-N
 			  - "renesas,sata-r8a7795" for R-Car H3
 			  - "renesas,sata-r8a77965" for R-Car M3-N
-			  - "renesas,rcar-gen2-sata" for a generic R-Car Gen2 compatible device
-			  - "renesas,rcar-gen3-sata" for a generic R-Car Gen3 compatible device
+			  - "renesas,rcar-gen2-sata" for a generic R-Car Gen2
+			     compatible device
+			  - "renesas,rcar-gen3-sata" for a generic R-Car Gen3 or
+			     RZ/G2 compatible device
 			  - "renesas,rcar-sata" is deprecated
 
 			  When compatible with the generic version nodes
diff --git a/Documentation/devicetree/bindings/board/fsl-board.txt b/Documentation/devicetree/bindings/board/fsl-board.txt
index eb52f6b35159..9cde57015921 100644
--- a/Documentation/devicetree/bindings/board/fsl-board.txt
+++ b/Documentation/devicetree/bindings/board/fsl-board.txt
@@ -47,36 +47,6 @@ Example (LS2080A-RDB):
                 reg = <0x3 0 0x10000>;
         };
 
-* Freescale BCSR GPIO banks
-
-Some BCSR registers act as simple GPIO controllers, each such
-register can be represented by the gpio-controller node.
-
-Required properities:
-- compatible : Should be "fsl,<board>-bcsr-gpio".
-- reg : Should contain the address and the length of the GPIO bank
-  register.
-- #gpio-cells : Should be two. The first cell is the pin number and the
-  second cell is used to specify optional parameters (currently unused).
-- gpio-controller : Marks the port as GPIO controller.
-
-Example:
-
-	bcsr@1,0 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		compatible = "fsl,mpc8360mds-bcsr";
-		reg = <1 0 0x8000>;
-		ranges = <0 1 0 0x8000>;
-
-		bcsr13: gpio-controller@d {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8360mds-bcsr-gpio";
-			reg = <0xd 1>;
-			gpio-controller;
-		};
-	};
-
 * Freescale on-board FPGA connected on I2C bus
 
 Some Freescale boards like BSC9132QDS have on board FPGA connected on
diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
index d2a872286437..f0b3d30fbb76 100644
--- a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
@@ -8,7 +8,7 @@ title: Allwinner A64 Display Engine Bus Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   $nodename:
diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
index be32f087c529..9fe11ceecdba 100644
--- a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
@@ -8,7 +8,7 @@ title: Allwinner A23 RSB Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#address-cells":
diff --git a/Documentation/devicetree/bindings/bus/renesas,bsc.txt b/Documentation/devicetree/bindings/bus/renesas,bsc.txt
deleted file mode 100644
index 90e947269437..000000000000
--- a/Documentation/devicetree/bindings/bus/renesas,bsc.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-Renesas Bus State Controller (BSC)
-==================================
-
-The Renesas Bus State Controller (BSC, sometimes called "LBSC within Bus
-Bridge", or "External Bus Interface") can be found in several Renesas ARM SoCs.
-It provides an external bus for connecting multiple external devices to the
-SoC, driving several chip select lines, for e.g. NOR FLASH, Ethernet and USB.
-
-While the BSC is a fairly simple memory-mapped bus, it may be part of a PM
-domain, and may have a gateable functional clock.
-Before a device connected to the BSC can be accessed, the PM domain
-containing the BSC must be powered on, and the functional clock
-driving the BSC must be enabled.
-
-The bindings for the BSC extend the bindings for "simple-pm-bus".
-
-
-Required properties
-  - compatible: Must contain an SoC-specific value, and "renesas,bsc" and
-		"simple-pm-bus" as fallbacks.
-                SoC-specific values can be:
-		"renesas,bsc-r8a73a4" for R-Mobile APE6 (r8a73a4)
-		"renesas,bsc-sh73a0" for SH-Mobile AG5 (sh73a0)
-  - #address-cells, #size-cells, ranges: Must describe the mapping between
-		parent address and child address spaces.
-  - reg: Must contain the base address and length to access the bus controller.
-
-Optional properties:
-  - interrupts: Must contain a reference to the BSC interrupt, if available.
-  - clocks: Must contain a reference to the functional clock, if available.
-  - power-domains: Must contain a reference to the PM domain, if available.
-
-
-Example:
-
-	bsc: bus@fec10000 {
-		compatible = "renesas,bsc-sh73a0", "renesas,bsc",
-			     "simple-pm-bus";
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges = <0 0 0x20000000>;
-		reg = <0xfec10000 0x400>;
-		interrupts = <0 39 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&zb_clk>;
-		power-domains = <&pd_a4s>;
-	};
diff --git a/Documentation/devicetree/bindings/bus/renesas,bsc.yaml b/Documentation/devicetree/bindings/bus/renesas,bsc.yaml
new file mode 100644
index 000000000000..7d10b62a52d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/renesas,bsc.yaml
@@ -0,0 +1,60 @@
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/renesas,bsc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Bus State Controller (BSC)
+
+maintainers:
+  - Geert Uytterhoeven <geert+renesas@glider.be>
+
+description: |
+  The Renesas Bus State Controller (BSC, sometimes called "LBSC within Bus
+  Bridge", or "External Bus Interface") can be found in several Renesas ARM
+  SoCs.  It provides an external bus for connecting multiple external
+  devices to the SoC, driving several chip select lines, for e.g. NOR
+  FLASH, Ethernet and USB.
+
+  While the BSC is a fairly simple memory-mapped bus, it may be part of a
+  PM domain, and may have a gateable functional clock.  Before a device
+  connected to the BSC can be accessed, the PM domain containing the BSC
+  must be powered on, and the functional clock driving the BSC must be
+  enabled.
+
+  The bindings for the BSC extend the bindings for "simple-pm-bus".
+
+allOf:
+  - $ref: simple-pm-bus.yaml#
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,bsc-r8a73a4  # R-Mobile APE6 (r8a73a4)
+          - renesas,bsc-sh73a0   # SH-Mobile AG5 (sh73a0)
+      - const: renesas,bsc
+      - {} # simple-pm-bus, but not listed here to avoid false select
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    bsc: bus@fec10000 {
+        compatible = "renesas,bsc-sh73a0", "renesas,bsc", "simple-pm-bus";
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges = <0 0 0x20000000>;
+        reg = <0xfec10000 0x400>;
+        interrupts = <0 39 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&zb_clk>;
+        power-domains = <&pd_a4s>;
+    };
diff --git a/Documentation/devicetree/bindings/bus/simple-pm-bus.txt b/Documentation/devicetree/bindings/bus/simple-pm-bus.txt
deleted file mode 100644
index 6f15037131ed..000000000000
--- a/Documentation/devicetree/bindings/bus/simple-pm-bus.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Simple Power-Managed Bus
-========================
-
-A Simple Power-Managed Bus is a transparent bus that doesn't need a real
-driver, as it's typically initialized by the boot loader.
-
-However, its bus controller is part of a PM domain, or under the control of a
-functional clock.  Hence, the bus controller's PM domain and/or clock must be
-enabled for child devices connected to the bus (either on-SoC or externally)
-to function.
-
-While "simple-pm-bus" follows the "simple-bus" set of properties, as specified
-in the Devicetree Specification, it is not an extension of "simple-bus".
-
-
-Required properties:
-  - compatible: Must contain at least "simple-pm-bus".
-		Must not contain "simple-bus".
-		It's recommended to let this be preceded by one or more
-		vendor-specific compatible values.
-  - #address-cells, #size-cells, ranges: Must describe the mapping between
-		parent address and child address spaces.
-
-Optional platform-specific properties for clock or PM domain control (at least
-one of them is required):
-  - clocks: Must contain a reference to the functional clock(s),
-  - power-domains: Must contain a reference to the PM domain.
-Please refer to the binding documentation for the clock and/or PM domain
-providers for more details.
-
-
-Example:
-
-	bsc: bus@fec10000 {
-		compatible = "renesas,bsc-sh73a0", "renesas,bsc",
-			     "simple-pm-bus";
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges = <0 0 0x20000000>;
-		reg = <0xfec10000 0x400>;
-		interrupts = <0 39 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&zb_clk>;
-		power-domains = <&pd_a4s>;
-	};
diff --git a/Documentation/devicetree/bindings/bus/simple-pm-bus.yaml b/Documentation/devicetree/bindings/bus/simple-pm-bus.yaml
new file mode 100644
index 000000000000..33326ffdb266
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/simple-pm-bus.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/simple-pm-bus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Simple Power-Managed Bus
+
+maintainers:
+  - Geert Uytterhoeven <geert+renesas@glider.be>
+
+description: |
+  A Simple Power-Managed Bus is a transparent bus that doesn't need a real
+  driver, as it's typically initialized by the boot loader.
+
+  However, its bus controller is part of a PM domain, or under the control
+  of a functional clock.  Hence, the bus controller's PM domain and/or
+  clock must be enabled for child devices connected to the bus (either
+  on-SoC or externally) to function.
+
+  While "simple-pm-bus" follows the "simple-bus" set of properties, as
+  specified in the Devicetree Specification, it is not an extension of
+  "simple-bus".
+
+properties:
+  $nodename:
+    pattern: "^bus(@[0-9a-f]+)?$"
+
+  compatible:
+    contains:
+      const: simple-pm-bus
+    description:
+      Shall contain "simple-pm-bus" in addition to a optional bus-specific
+      compatible strings defined in individual pm-bus bindings.
+
+  '#address-cells':
+    enum: [ 1, 2 ]
+
+  '#size-cells':
+    enum: [ 1, 2 ]
+
+  ranges: true
+
+  clocks: true
+    # Functional clocks
+    # Required if power-domains is absent, optional otherwise
+
+  power-domains:
+    # Required if clocks is absent, optional otherwise
+    minItems: 1
+
+required:
+  - compatible
+  - '#address-cells'
+  - '#size-cells'
+  - ranges
+
+anyOf:
+  - required:
+      - clocks
+  - required:
+      - power-domains
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,gcc-msm8996.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    bus {
+        power-domains = <&gcc AGGRE0_NOC_GDSC>;
+        compatible = "simple-pm-bus";
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+    };
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ahb-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ahb-clk.yaml
new file mode 100644
index 000000000000..558db4b6ed17
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ahb-clk.yaml
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-ahb-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 AHB Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-ahb-clk
+      - allwinner,sun6i-a31-ahb1-clk
+      - allwinner,sun8i-h3-ahb2-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    maxItems: 4
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun4i-a10-ahb-clk
+
+    then:
+      properties:
+        clocks:
+          maxItems: 1
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun6i-a31-ahb1-clk
+
+    then:
+      properties:
+        clocks:
+          maxItems: 4
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun8i-h3-ahb2-clk
+
+    then:
+      properties:
+        clocks:
+          maxItems: 2
+
+examples:
+  - |
+    ahb@1c20054 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-ahb-clk";
+        reg = <0x01c20054 0x4>;
+        clocks = <&axi>;
+        clock-output-names = "ahb";
+    };
+
+  - |
+    ahb1@1c20054 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun6i-a31-ahb1-clk";
+        reg = <0x01c20054 0x4>;
+        clocks = <&osc32k>, <&osc24M>, <&axi>, <&pll6 0>;
+        clock-output-names = "ahb1";
+    };
+
+  - |
+    ahb2_clk@1c2005c {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun8i-h3-ahb2-clk";
+        reg = <0x01c2005c 0x4>;
+        clocks = <&ahb1>, <&pll6d2>;
+        clock-output-names = "ahb2";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb0-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb0-clk.yaml
new file mode 100644
index 000000000000..b1e3d739beb2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb0-clk.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-apb0-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 APB0 Bus Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun4i-a10-apb0-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    apb0@1c20054 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-apb0-clk";
+        reg = <0x01c20054 0x4>;
+        clocks = <&ahb>;
+        clock-output-names = "apb0";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb1-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb1-clk.yaml
new file mode 100644
index 000000000000..51b7a6d4ea54
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb1-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-apb1-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 APB1 Bus Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun4i-a10-apb1-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 3
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c20058 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-apb1-clk";
+        reg = <0x01c20058 0x4>;
+        clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
+        clock-output-names = "apb1";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-axi-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-axi-clk.yaml
new file mode 100644
index 000000000000..d801158e15de
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-axi-clk.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-axi-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 AXI Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-axi-clk
+      - allwinner,sun8i-a23-axi-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    axi@1c20054 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-axi-clk";
+        reg = <0x01c20054 0x4>;
+        clocks = <&cpu>;
+        clock-output-names = "axi";
+    };
+
+  - |
+    axi_clk@1c20050 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun8i-a23-axi-clk";
+        reg = <0x01c20050 0x4>;
+        clocks = <&cpu>;
+        clock-output-names = "axi";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
index 64938fdaea55..4d382128b711 100644
--- a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
@@ -8,7 +8,7 @@ title: Allwinner Clock Control Unit Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#clock-cells":
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-cpu-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-cpu-clk.yaml
new file mode 100644
index 000000000000..0dfafba1a168
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-cpu-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-cpu-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 CPU Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun4i-a10-cpu-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 4
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    cpu@1c20054 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-cpu-clk";
+        reg = <0x01c20054 0x4>;
+        clocks = <&osc32k>, <&osc24M>, <&pll1>, <&dummy>;
+        clock-output-names = "cpu";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-display-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-display-clk.yaml
new file mode 100644
index 000000000000..7484a7ab7dea
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-display-clk.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-display-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Display Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  "#reset-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun4i-a10-display-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 3
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - "#reset-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c20104 {
+        #clock-cells = <0>;
+        #reset-cells = <0>;
+        compatible = "allwinner,sun4i-a10-display-clk";
+        reg = <0x01c20104 0x4>;
+        clocks = <&pll3>, <&pll7>, <&pll5 1>;
+        clock-output-names = "de-be";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-gates-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-gates-clk.yaml
new file mode 100644
index 000000000000..ed1b2126a81b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-gates-clk.yaml
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-gates-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Bus Gates Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 1
+    description: >
+      This additional argument passed to that clock is the offset of
+      the bit controlling this particular gate in the register.
+
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-gates-clk
+      - const: allwinner,sun4i-a10-axi-gates-clk
+      - const: allwinner,sun4i-a10-ahb-gates-clk
+      - const: allwinner,sun5i-a10s-ahb-gates-clk
+      - const: allwinner,sun5i-a13-ahb-gates-clk
+      - const: allwinner,sun7i-a20-ahb-gates-clk
+      - const: allwinner,sun6i-a31-ahb1-gates-clk
+      - const: allwinner,sun8i-a23-ahb1-gates-clk
+      - const: allwinner,sun9i-a80-ahb0-gates-clk
+      - const: allwinner,sun9i-a80-ahb1-gates-clk
+      - const: allwinner,sun9i-a80-ahb2-gates-clk
+      - const: allwinner,sun4i-a10-apb0-gates-clk
+      - const: allwinner,sun5i-a10s-apb0-gates-clk
+      - const: allwinner,sun5i-a13-apb0-gates-clk
+      - const: allwinner,sun7i-a20-apb0-gates-clk
+      - const: allwinner,sun9i-a80-apb0-gates-clk
+      - const: allwinner,sun8i-a83t-apb0-gates-clk
+      - const: allwinner,sun4i-a10-apb1-gates-clk
+      - const: allwinner,sun5i-a13-apb1-gates-clk
+      - const: allwinner,sun5i-a10s-apb1-gates-clk
+      - const: allwinner,sun6i-a31-apb1-gates-clk
+      - const: allwinner,sun7i-a20-apb1-gates-clk
+      - const: allwinner,sun8i-a23-apb1-gates-clk
+      - const: allwinner,sun9i-a80-apb1-gates-clk
+      - const: allwinner,sun6i-a31-apb2-gates-clk
+      - const: allwinner,sun8i-a23-apb2-gates-clk
+      - const: allwinner,sun8i-a83t-bus-gates-clk
+      - const: allwinner,sun9i-a80-apbs-gates-clk
+      - const: allwinner,sun4i-a10-dram-gates-clk
+
+      - items:
+        - const: allwinner,sun5i-a13-dram-gates-clk
+        - const: allwinner,sun4i-a10-gates-clk
+
+      - items:
+        - const: allwinner,sun8i-h3-apb0-gates-clk
+        - const: allwinner,sun4i-a10-gates-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-indices:
+    minItems: 1
+    maxItems: 64
+
+  clock-output-names:
+    minItems: 1
+    maxItems: 64
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-indices
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c2005c {
+        #clock-cells = <1>;
+        compatible = "allwinner,sun4i-a10-axi-gates-clk";
+        reg = <0x01c2005c 0x4>;
+        clocks = <&axi>;
+        clock-indices = <0>;
+        clock-output-names = "axi_dram";
+    };
+
+  - |
+    clk@1c20060 {
+        #clock-cells = <1>;
+        compatible = "allwinner,sun4i-a10-ahb-gates-clk";
+        reg = <0x01c20060 0x8>;
+        clocks = <&ahb>;
+        clock-indices = <0>, <1>,
+                        <2>, <3>,
+                        <4>, <5>, <6>,
+                        <7>, <8>, <9>,
+                        <10>, <11>, <12>,
+                        <13>, <14>, <16>,
+                        <17>, <18>, <20>,
+                        <21>, <22>, <23>,
+                        <24>, <25>, <26>,
+                        <32>, <33>, <34>,
+                        <35>, <36>, <37>,
+                        <40>, <41>, <43>,
+                        <44>, <45>,
+                        <46>, <47>,
+                        <50>, <52>;
+        clock-output-names = "ahb_usb0", "ahb_ehci0",
+                             "ahb_ohci0", "ahb_ehci1",
+                             "ahb_ohci1", "ahb_ss", "ahb_dma",
+                             "ahb_bist", "ahb_mmc0", "ahb_mmc1",
+                             "ahb_mmc2", "ahb_mmc3", "ahb_ms",
+                             "ahb_nand", "ahb_sdram", "ahb_ace",
+                             "ahb_emac", "ahb_ts", "ahb_spi0",
+                             "ahb_spi1", "ahb_spi2", "ahb_spi3",
+                             "ahb_pata", "ahb_sata", "ahb_gps",
+                             "ahb_ve", "ahb_tvd", "ahb_tve0",
+                             "ahb_tve1", "ahb_lcd0", "ahb_lcd1",
+                             "ahb_csi0", "ahb_csi1", "ahb_hdmi",
+                             "ahb_de_be0", "ahb_de_be1",
+                             "ahb_de_fe0", "ahb_de_fe1",
+                             "ahb_mp", "ahb_mali400";
+    };
+
+
+  - |
+    clk@1c20068 {
+        #clock-cells = <1>;
+        compatible = "allwinner,sun4i-a10-apb0-gates-clk";
+        reg = <0x01c20068 0x4>;
+        clocks = <&apb0>;
+        clock-indices = <0>, <1>,
+                        <2>, <3>,
+                        <5>, <6>,
+                        <7>, <10>;
+        clock-output-names = "apb0_codec", "apb0_spdif",
+                             "apb0_ac97", "apb0_iis",
+                             "apb0_pio", "apb0_ir0",
+                             "apb0_ir1", "apb0_keypad";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mbus-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mbus-clk.yaml
new file mode 100644
index 000000000000..18f131e262b4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mbus-clk.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-mbus-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 MBUS Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    enum:
+      - allwinner,sun5i-a13-mbus-clk
+      - allwinner,sun8i-a23-mbus-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 3
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c2015c {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun5i-a13-mbus-clk";
+        reg = <0x01c2015c 0x4>;
+        clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
+        clock-output-names = "mbus";
+    };
+
+  - |
+    clk@1c2015c {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun8i-a23-mbus-clk";
+        reg = <0x01c2015c 0x4>;
+        clocks = <&osc24M>, <&pll6 1>, <&pll5>;
+        clock-output-names = "mbus";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mmc-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mmc-clk.yaml
new file mode 100644
index 000000000000..5199285a661a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mmc-clk.yaml
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-mmc-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Module 1 Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 1
+    description: >
+      There is three different outputs: the main clock, with the ID 0,
+      and the output and sample clocks, with the IDs 1 and 2,
+      respectively.
+
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-mmc-clk
+      - allwinner,sun9i-a80-mmc-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 3
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 3
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: allwinner,sun4i-a10-mmc-clk
+
+then:
+  properties:
+    clocks:
+      maxItems: 3
+
+else:
+  properties:
+    clocks:
+      maxItems: 2
+
+examples:
+  - |
+    clk@1c20088 {
+        #clock-cells = <1>;
+        compatible = "allwinner,sun4i-a10-mmc-clk";
+        reg = <0x01c20088 0x4>;
+        clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
+        clock-output-names = "mmc0",
+                             "mmc0_output",
+                             "mmc0_sample";
+    };
+
+  - |
+    clk@6000410 {
+        #clock-cells = <1>;
+        compatible = "allwinner,sun9i-a80-mmc-clk";
+        reg = <0x06000410 0x4>;
+        clocks = <&osc24M>, <&pll4>;
+        clock-output-names = "mmc0", "mmc0_output",
+                             "mmc0_sample";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod0-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod0-clk.yaml
new file mode 100644
index 000000000000..3e2abe3e67c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod0-clk.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-mod0-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Module 0 Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - allwinner,sun4i-a10-mod0-clk
+          - allwinner,sun9i-a80-mod0-clk
+
+  # The PRCM on the A31 and A23 will have the reg property missing,
+  # since it's set at the upper level node, and will be validated by
+  # PRCM's schema. Make sure we only validate standalone nodes.
+  required:
+    - compatible
+    - reg
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-mod0-clk
+      - allwinner,sun9i-a80-mod0-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    # On the A80, the PRCM mod0 clocks have 2 parents.
+    minItems: 2
+    maxItems: 3
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c20080 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-mod0-clk";
+        reg = <0x01c20080 0x4>;
+        clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
+        clock-output-names = "nand";
+    };
+
+  - |
+    clk@8001454 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-mod0-clk";
+        reg = <0x08001454 0x4>;
+        clocks = <&osc32k>, <&osc24M>;
+        clock-output-names = "r_ir";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod1-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod1-clk.yaml
new file mode 100644
index 000000000000..7ddb55c75cff
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod1-clk.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-mod1-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Module 1 Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun4i-a10-mod1-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 4
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/sun4i-a10-pll2.h>
+
+    clk@1c200c0 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-mod1-clk";
+        reg = <0x01c200c0 0x4>;
+        clocks = <&pll2 SUN4I_A10_PLL2_8X>,
+                 <&pll2 SUN4I_A10_PLL2_4X>,
+                 <&pll2 SUN4I_A10_PLL2_2X>,
+                 <&pll2 SUN4I_A10_PLL2_1X>;
+        clock-output-names = "spdif";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml
new file mode 100644
index 000000000000..69cfa4a3d562
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-osc-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Gatable Oscillator Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun4i-a10-osc-clk
+
+  reg:
+    maxItems: 1
+
+  clock-frequency:
+    description: >
+      Frequency of the main oscillator.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clock-frequency
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    osc24M: clk@01c20050 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-osc-clk";
+        reg = <0x01c20050 0x4>;
+        clock-frequency = <24000000>;
+        clock-output-names = "osc24M";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll1-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll1-clk.yaml
new file mode 100644
index 000000000000..e9c4cf834aa7
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll1-clk.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-pll1-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 CPU PLL Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-pll1-clk
+      - allwinner,sun6i-a31-pll1-clk
+      - allwinner,sun8i-a23-pll1-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c20000 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-pll1";
+        reg = <0x01c20000 0x4>;
+        clocks = <&osc24M>;
+        clock-output-names = "osc24M";
+    };
+
+  - |
+    clk@1c20000 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun6i-a31-pll1-clk";
+        reg = <0x01c20000 0x4>;
+        clocks = <&osc24M>;
+        clock-output-names = "pll1";
+    };
+
+  - |
+    clk@1c20000 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun8i-a23-pll1-clk";
+        reg = <0x01c20000 0x4>;
+        clocks = <&osc24M>;
+        clock-output-names = "pll1";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll3-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll3-clk.yaml
new file mode 100644
index 000000000000..4b80a42fb3da
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll3-clk.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-pll3-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Video PLL Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun4i-a10-pll3-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c20010 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-pll3-clk";
+        reg = <0x01c20010 0x4>;
+        clocks = <&osc3M>;
+        clock-output-names = "pll3";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll5-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll5-clk.yaml
new file mode 100644
index 000000000000..415bd77de53d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll5-clk.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-pll5-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 DRAM PLL Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 1
+    description: >
+      The first output is the DRAM clock output, the second is meant
+      for peripherals on the SoC.
+
+  compatible:
+    const: allwinner,sun4i-a10-pll5-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 2
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c20020 {
+        #clock-cells = <1>;
+        compatible = "allwinner,sun4i-a10-pll5-clk";
+        reg = <0x01c20020 0x4>;
+        clocks = <&osc24M>;
+        clock-output-names = "pll5_ddr", "pll5_other";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll6-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll6-clk.yaml
new file mode 100644
index 000000000000..ec5652f76027
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll6-clk.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-pll6-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Peripheral PLL Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 1
+    description: >
+      The first output is the SATA clock output, the second is the
+      regular PLL output, the third is a PLL output at twice the rate.
+
+  compatible:
+    const: allwinner,sun4i-a10-pll6-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 3
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c20028 {
+        #clock-cells = <1>;
+        compatible = "allwinner,sun4i-a10-pll6-clk";
+        reg = <0x01c20028 0x4>;
+        clocks = <&osc24M>;
+        clock-output-names = "pll6_sata", "pll6_other", "pll6";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-tcon-ch0-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-tcon-ch0-clk.yaml
new file mode 100644
index 000000000000..0a335c615efd
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-tcon-ch0-clk.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-tcon-ch0-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 TCON Channel 0 Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  "#reset-cells":
+    const: 1
+
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-tcon-ch0-clk
+      - allwinner,sun4i-a10-tcon-ch1-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 4
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: allwinner,sun4i-a10-tcon-ch0-clk
+
+then:
+  required:
+    - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c20118 {
+        #clock-cells = <0>;
+        #reset-cells = <1>;
+        compatible = "allwinner,sun4i-a10-tcon-ch0-clk";
+        reg = <0x01c20118 0x4>;
+        clocks = <&pll3>, <&pll7>, <&pll3x2>, <&pll7x2>;
+        clock-output-names = "tcon-ch0-sclk";
+    };
+
+  - |
+    clk@1c2012c {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun4i-a10-tcon-ch1-clk";
+        reg = <0x01c2012c 0x4>;
+        clocks = <&pll3>, <&pll7>, <&pll3x2>, <&pll7x2>;
+        clock-output-names = "tcon-ch1-sclk";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-usb-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-usb-clk.yaml
new file mode 100644
index 000000000000..cd95d25bfe7c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-usb-clk.yaml
@@ -0,0 +1,166 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-usb-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 USB Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 1
+    description: >
+      The additional ID argument passed to the clock shall refer to
+      the index of the output.
+
+  "#reset-cells":
+    const: 1
+
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-usb-clk
+      - allwinner,sun5i-a13-usb-clk
+      - allwinner,sun6i-a31-usb-clk
+      - allwinner,sun8i-a23-usb-clk
+      - allwinner,sun8i-h3-usb-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    minItems: 2
+    maxItems: 8
+
+required:
+  - "#clock-cells"
+  - "#reset-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun4i-a10-usb-clk
+
+    then:
+      properties:
+        clock-output-names:
+          maxItems: 3
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun5i-a13-usb-clk
+
+    then:
+      properties:
+        clock-output-names:
+          maxItems: 2
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun6i-a31-usb-clk
+
+    then:
+      properties:
+        clock-output-names:
+          maxItems: 6
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun8i-a23-usb-clk
+
+    then:
+      properties:
+        clock-output-names:
+          maxItems: 5
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun8i-h3-usb-clk
+
+    then:
+      properties:
+        clock-output-names:
+          maxItems: 8
+
+examples:
+  - |
+    clk@1c200cc {
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+        compatible = "allwinner,sun4i-a10-usb-clk";
+        reg = <0x01c200cc 0x4>;
+        clocks = <&pll6 1>;
+        clock-output-names = "usb_ohci0", "usb_ohci1", "usb_phy";
+    };
+
+  - |
+    clk@1c200cc {
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+        compatible = "allwinner,sun5i-a13-usb-clk";
+        reg = <0x01c200cc 0x4>;
+        clocks = <&pll6 1>;
+        clock-output-names = "usb_ohci0", "usb_phy";
+    };
+
+  - |
+    clk@1c200cc {
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+        compatible = "allwinner,sun6i-a31-usb-clk";
+        reg = <0x01c200cc 0x4>;
+        clocks = <&osc24M>;
+        clock-output-names = "usb_phy0", "usb_phy1", "usb_phy2",
+                             "usb_ohci0", "usb_ohci1",
+                             "usb_ohci2";
+    };
+
+  - |
+    clk@1c200cc {
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+        compatible = "allwinner,sun8i-a23-usb-clk";
+        reg = <0x01c200cc 0x4>;
+        clocks = <&osc24M>;
+        clock-output-names = "usb_phy0", "usb_phy1", "usb_hsic",
+                             "usb_hsic_12M", "usb_ohci0";
+    };
+
+  - |
+    clk@1c200cc {
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+        compatible = "allwinner,sun8i-h3-usb-clk";
+        reg = <0x01c200cc 0x4>;
+        clocks = <&osc24M>;
+        clock-output-names = "usb_phy0", "usb_phy1",
+                             "usb_phy2", "usb_phy3",
+                             "usb_ohci0", "usb_ohci1",
+                             "usb_ohci2", "usb_ohci3";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ve-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ve-clk.yaml
new file mode 100644
index 000000000000..5dfd0c1c27b4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ve-clk.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-ve-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Video Engine Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  "#reset-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun4i-a10-ve-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - "#reset-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c2013c {
+        #clock-cells = <0>;
+        #reset-cells = <0>;
+        compatible = "allwinner,sun4i-a10-ve-clk";
+        reg = <0x01c2013c 0x4>;
+        clocks = <&pll4>;
+        clock-output-names = "ve";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun5i-a13-ahb-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun5i-a13-ahb-clk.yaml
new file mode 100644
index 000000000000..99add7991c48
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun5i-a13-ahb-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun5i-a13-ahb-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A13 AHB Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun5i-a13-ahb-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 3
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    ahb@1c20054 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun5i-a13-ahb-clk";
+        reg = <0x01c20054 0x4>;
+        clocks = <&axi>, <&cpu>, <&pll6 1>;
+        clock-output-names = "ahb";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun6i-a31-pll6-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun6i-a31-pll6-clk.yaml
new file mode 100644
index 000000000000..5f377205af71
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun6i-a31-pll6-clk.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun6i-a31-pll6-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 Peripheral PLL Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 1
+    description: >
+      The first output is the regular PLL output, the second is a PLL
+      output at twice the rate.
+
+  compatible:
+    const: allwinner,sun6i-a31-pll6-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 2
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c20028 {
+        #clock-cells = <1>;
+        compatible = "allwinner,sun6i-a31-pll6-clk";
+        reg = <0x01c20028 0x4>;
+        clocks = <&osc24M>;
+        clock-output-names = "pll6", "pll6x2";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-gmac-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-gmac-clk.yaml
new file mode 100644
index 000000000000..59e5dce1b65a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-gmac-clk.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun7i-a20-gmac-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A20 GMAC TX Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun7i-a20-gmac-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 2
+    description: >
+      The parent clocks shall be fixed rate dummy clocks at 25 MHz and
+      125 MHz, respectively.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c20164 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun7i-a20-gmac-clk";
+        reg = <0x01c20164 0x4>;
+        clocks = <&mii_phy_tx_clk>, <&gmac_int_tx_clk>;
+        clock-output-names = "gmac_tx";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-out-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-out-clk.yaml
new file mode 100644
index 000000000000..c745733bcf04
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-out-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun7i-a20-out-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A20 Output Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun7i-a20-out-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 3
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c201f0 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun7i-a20-out-clk";
+        reg = <0x01c201f0 0x4>;
+        clocks = <&osc24M_32k>, <&osc32k>, <&osc24M>;
+        clock-output-names = "clk_out_a";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun8i-h3-bus-gates-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun8i-h3-bus-gates-clk.yaml
new file mode 100644
index 000000000000..3eb2bf65b230
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun8i-h3-bus-gates-clk.yaml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun8i-h3-bus-gates-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Bus Gates Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 1
+    description: >
+      This additional argument passed to that clock is the offset of
+      the bit controlling this particular gate in the register.
+
+  compatible:
+    const: allwinner,sun8i-h3-bus-gates-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 4
+
+  clock-names:
+    maxItems: 4
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-indices:
+    minItems: 1
+    maxItems: 64
+
+  clock-output-names:
+    minItems: 1
+    maxItems: 64
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-indices
+  - clock-names
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c20060 {
+        #clock-cells = <1>;
+        compatible = "allwinner,sun8i-h3-bus-gates-clk";
+        reg = <0x01c20060 0x14>;
+        clocks = <&ahb1>, <&ahb2>, <&apb1>, <&apb2>;
+        clock-names = "ahb1", "ahb2", "apb1", "apb2";
+        clock-indices = <5>, <6>, <8>,
+                        <9>, <10>, <13>,
+                        <14>, <17>, <18>,
+                        <19>, <20>,
+                        <21>, <23>,
+                        <24>, <25>,
+                        <26>, <27>,
+                        <28>, <29>,
+                        <30>, <31>, <32>,
+                        <35>, <36>, <37>,
+                        <40>, <41>, <43>,
+                        <44>, <52>, <53>,
+                        <54>, <64>,
+                        <65>, <69>, <72>,
+                        <76>, <77>, <78>,
+                        <96>, <97>, <98>,
+                        <112>, <113>,
+                        <114>, <115>,
+                        <116>, <128>, <135>;
+        clock-output-names = "bus_ce", "bus_dma", "bus_mmc0",
+                             "bus_mmc1", "bus_mmc2", "bus_nand",
+                             "bus_sdram", "bus_gmac", "bus_ts",
+                             "bus_hstimer", "bus_spi0",
+                             "bus_spi1", "bus_otg",
+                             "bus_otg_ehci0", "bus_ehci1",
+                             "bus_ehci2", "bus_ehci3",
+                             "bus_otg_ohci0", "bus_ohci1",
+                             "bus_ohci2", "bus_ohci3", "bus_ve",
+                             "bus_lcd0", "bus_lcd1", "bus_deint",
+                             "bus_csi", "bus_tve", "bus_hdmi",
+                             "bus_de", "bus_gpu", "bus_msgbox",
+                             "bus_spinlock", "bus_codec",
+                             "bus_spdif", "bus_pio", "bus_ths",
+                             "bus_i2s0", "bus_i2s1", "bus_i2s2",
+                             "bus_i2c0", "bus_i2c1", "bus_i2c2",
+                             "bus_uart0", "bus_uart1",
+                             "bus_uart2", "bus_uart3",
+                             "bus_scr", "bus_ephy", "bus_dbg";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-ahb-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-ahb-clk.yaml
new file mode 100644
index 000000000000..d178da90aaec
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-ahb-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-ahb-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 AHB Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun9i-a80-ahb-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 4
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@6000060 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun9i-a80-ahb-clk";
+        reg = <0x06000060 0x4>;
+        clocks = <&gt_clk>, <&pll4>, <&pll12>, <&pll12>;
+        clock-output-names = "ahb0";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-apb0-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-apb0-clk.yaml
new file mode 100644
index 000000000000..0351c79bd221
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-apb0-clk.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-apb0-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 APB0 Bus Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    enum:
+      - allwinner,sun9i-a80-apb0-clk
+      - allwinner,sun9i-a80-apb1-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 2
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@6000070 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun9i-a80-apb0-clk";
+        reg = <0x06000070 0x4>;
+        clocks = <&osc24M>, <&pll4>;
+        clock-output-names = "apb0";
+    };
+
+  - |
+    clk@6000074 {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun9i-a80-apb1-clk";
+        reg = <0x06000074 0x4>;
+        clocks = <&osc24M>, <&pll4>;
+        clock-output-names = "apb1";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-cpus-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-cpus-clk.yaml
new file mode 100644
index 000000000000..24d5b2f1a314
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-cpus-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-cpus-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 CPUS Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun9i-a80-cpus-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 4
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@8001410 {
+        compatible = "allwinner,sun9i-a80-cpus-clk";
+        reg = <0x08001410 0x4>;
+        #clock-cells = <0>;
+        clocks = <&osc32k>, <&osc24M>, <&pll4>, <&pll3>;
+        clock-output-names = "cpus";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml
new file mode 100644
index 000000000000..07f38def7dc3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-gt-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 GT Bus Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun9i-a80-gt-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 4
+    description: >
+      The parent order must match the hardware programming order.
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@0600005c {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun9i-a80-gt-clk";
+        reg = <0x0600005c 0x4>;
+        clocks = <&osc24M>, <&pll4>, <&pll12>, <&pll12>;
+        clock-output-names = "gt";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-mmc-config-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-mmc-config-clk.yaml
new file mode 100644
index 000000000000..20dc115fa211
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-mmc-config-clk.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-mmc-config-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 MMC Configuration Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+description: >
+  There is one clock/reset output per mmc controller. The number of
+  outputs is determined by the size of the address block, which is
+  related to the overall mmc block.
+
+properties:
+  "#clock-cells":
+    const: 1
+    description: >
+      The additional ID argument passed to the clock shall refer to
+      the index of the output.
+
+  "#reset-cells":
+    const: 1
+
+  compatible:
+    const: allwinner,sun9i-a80-mmc-config-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 4
+
+required:
+  - "#clock-cells"
+  - "#reset-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@1c13000 {
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+        compatible = "allwinner,sun9i-a80-mmc-config-clk";
+        reg = <0x01c13000 0x10>;
+        clocks = <&ahb0_gates 8>;
+        resets = <&ahb0_resets 8>;
+        clock-output-names = "mmc0_config", "mmc1_config",
+                             "mmc2_config", "mmc3_config";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-pll4-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-pll4-clk.yaml
new file mode 100644
index 000000000000..b76bab6a30e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-pll4-clk.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-pll4-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 Peripheral PLL Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun9i-a80-pll4-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@600000c {
+        #clock-cells = <0>;
+        compatible = "allwinner,sun9i-a80-pll4-clk";
+        reg = <0x0600000c 0x4>;
+        clocks = <&osc24M>;
+        clock-output-names = "pll4";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-mod-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-mod-clk.yaml
new file mode 100644
index 000000000000..15218d10e78e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-mod-clk.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-usb-mod-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 USB Module Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 1
+    description: >
+      The additional ID argument passed to the clock shall refer to
+      the index of the output.
+
+  "#reset-cells":
+    const: 1
+
+  compatible:
+    const: allwinner,sun9i-a80-usb-mod-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 6
+
+required:
+  - "#clock-cells"
+  - "#reset-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@a08000 {
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+        compatible = "allwinner,sun9i-a80-usb-mod-clk";
+        reg = <0x00a08000 0x4>;
+        clocks = <&ahb1_gates 1>;
+        clock-output-names = "usb0_ahb", "usb_ohci0",
+                             "usb1_ahb", "usb_ohci1",
+                             "usb2_ahb", "usb_ohci2";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-phy-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-phy-clk.yaml
new file mode 100644
index 000000000000..2569041684e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-phy-clk.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-usb-phy-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 USB PHY Clock Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  "#clock-cells":
+    const: 1
+    description: >
+      The additional ID argument passed to the clock shall refer to
+      the index of the output.
+
+  "#reset-cells":
+    const: 1
+
+  compatible:
+    const: allwinner,sun9i-a80-usb-phy-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    maxItems: 6
+
+required:
+  - "#clock-cells"
+  - "#reset-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    clk@a08004 {
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+        compatible = "allwinner,sun9i-a80-usb-phy-clk";
+        reg = <0x00a08004 0x4>;
+        clocks = <&ahb1_gates 1>;
+        clock-output-names = "usb_phy0", "usb_hsic1_480M",
+                             "usb_phy1", "usb_hsic2_480M",
+                             "usb_phy2", "usb_hsic_12M";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt b/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt
index b3957d10d241..3a8948c04bc9 100644
--- a/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt
+++ b/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt
@@ -7,7 +7,8 @@ devices.
 Required Properties:
 
 - compatible	: should be "amlogic,axg-audio-clkc" for the A113X and A113D,
-		  "amlogic,g12a-audio-clkc" for G12A.
+		  "amlogic,g12a-audio-clkc" for G12A,
+		  "amlogic,sm1-audio-clkc" for S905X3.
 - reg		: physical base address of the clock controller and length of
 		  memory mapped region.
 - clocks	: a list of phandle + clock-specifier pairs for the clocks listed
diff --git a/Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt b/Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt
index 1e3370ba189f..fbf58c443c04 100644
--- a/Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt
+++ b/Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt
@@ -9,7 +9,7 @@ bridge.
 The peripheral clock consumer should specify the desired clock by
 having the clock ID in its "clocks" phandle cell.
 
-The following is a list of provided IDs for Armada 370 North bridge clocks:
+The following is a list of provided IDs for Armada 3700 North bridge clocks:
 ID	Clock name	Description
 -----------------------------------
 0	mmc		MMC controller
@@ -30,7 +30,7 @@ ID	Clock name	Description
 15	eip97		EIP 97
 16	cpu		CPU
 
-The following is a list of provided IDs for Armada 370 South bridge clocks:
+The following is a list of provided IDs for Armada 3700 South bridge clocks:
 ID	Clock name	Description
 -----------------------------------
 0	gbe-50		50 MHz parent clock for Gigabit Ethernet
@@ -46,6 +46,7 @@ ID	Clock name	Description
 10	sdio		SDIO
 11	usb32-sub2-sys	USB 2 clock
 12	usb32-ss-sys	USB 3 clock
+13	pcie		PCIe controller
 
 Required properties:
 
diff --git a/Documentation/devicetree/bindings/clock/bitmain,bm1880-clk.yaml b/Documentation/devicetree/bindings/clock/bitmain,bm1880-clk.yaml
new file mode 100644
index 000000000000..e63827399c1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/bitmain,bm1880-clk.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bindings/clock/bitmain,bm1880-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bitmain BM1880 Clock Controller
+
+maintainers:
+  - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+description: |
+  The Bitmain BM1880 clock controller generates and supplies clock to
+  various peripherals within the SoC.
+
+  This binding uses common clock bindings
+  [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+properties:
+  compatible:
+    const: bitmain,bm1880-clk
+
+  reg:
+    items:
+      - description: pll registers
+      - description: system registers
+
+  reg-names:
+    items:
+      - const: pll
+      - const: sys
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    const: osc
+
+  '#clock-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - clocks
+  - clock-names
+  - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+  # Clock controller node:
+  - |
+    clk: clock-controller@e8 {
+        compatible = "bitmain,bm1880-clk";
+        reg = <0xe8 0x0c>, <0x800 0xb0>;
+        reg-names = "pll", "sys";
+        clocks = <&osc>;
+        clock-names = "osc";
+        #clock-cells = <1>;
+    };
+
+  # Example UART controller node that consumes clock generated by the clock controller:
+  - |
+    uart0: serial@58018000 {
+         compatible = "snps,dw-apb-uart";
+         reg = <0x0 0x58018000 0x0 0x2000>;
+         clocks = <&clk 45>, <&clk 46>;
+         clock-names = "baudclk", "apb_pclk";
+         interrupts = <0 9 4>;
+         reg-shift = <2>;
+         reg-io-width = <4>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/imx7ulp-clock.txt b/Documentation/devicetree/bindings/clock/imx7ulp-clock.txt
index a4f8cd478f92..93d89adb7afe 100644
--- a/Documentation/devicetree/bindings/clock/imx7ulp-clock.txt
+++ b/Documentation/devicetree/bindings/clock/imx7ulp-clock.txt
@@ -82,7 +82,6 @@ pcc2: pcc2@403f0000 {
 		 <&scg1 IMX7ULP_CLK_APLL_PFD0>,
 		 <&scg1 IMX7ULP_CLK_UPLL>,
 		 <&scg1 IMX7ULP_CLK_SOSC_BUS_CLK>,
-		 <&scg1 IMX7ULP_CLK_MIPI_PLL>,
 		 <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>,
 		 <&scg1 IMX7ULP_CLK_ROSC>,
 		 <&scg1 IMX7ULP_CLK_SPLL_BUS_CLK>;
diff --git a/Documentation/devicetree/bindings/clock/ingenic,cgu.txt b/Documentation/devicetree/bindings/clock/ingenic,cgu.txt
index ba5a442026b7..75598e655067 100644
--- a/Documentation/devicetree/bindings/clock/ingenic,cgu.txt
+++ b/Documentation/devicetree/bindings/clock/ingenic,cgu.txt
@@ -11,6 +11,7 @@ Required properties:
   * ingenic,jz4725b-cgu
   * ingenic,jz4770-cgu
   * ingenic,jz4780-cgu
+  * ingenic,x1000-cgu
 - reg : The address & length of the CGU registers.
 - clocks : List of phandle & clock specifiers for clocks external to the CGU.
   Two such external clocks should be specified - first the external crystal
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
deleted file mode 100644
index d14362ad4132..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt
+++ /dev/null
@@ -1,94 +0,0 @@
-Qualcomm Global Clock & Reset Controller Binding
-------------------------------------------------
-
-Required properties :
-- compatible : shall contain only one of the following:
-
-			"qcom,gcc-apq8064"
-			"qcom,gcc-apq8084"
-			"qcom,gcc-ipq8064"
-			"qcom,gcc-ipq4019"
-			"qcom,gcc-ipq8074"
-			"qcom,gcc-msm8660"
-			"qcom,gcc-msm8916"
-			"qcom,gcc-msm8960"
-			"qcom,gcc-msm8974"
-			"qcom,gcc-msm8974pro"
-			"qcom,gcc-msm8974pro-ac"
-			"qcom,gcc-msm8994"
-			"qcom,gcc-msm8996"
-			"qcom,gcc-msm8998"
-			"qcom,gcc-mdm9615"
-			"qcom,gcc-qcs404"
-			"qcom,gcc-sdm630"
-			"qcom,gcc-sdm660"
-			"qcom,gcc-sdm845"
-			"qcom,gcc-sm8150"
-
-- reg : shall contain base register location and length
-- #clock-cells : shall contain 1
-- #reset-cells : shall contain 1
-
-Optional properties :
-- #power-domain-cells : shall contain 1
-- Qualcomm TSENS (thermal sensor device) on some devices can
-be part of GCC and hence the TSENS properties can also be
-part of the GCC/clock-controller node.
-For more details on the TSENS properties please refer
-Documentation/devicetree/bindings/thermal/qcom-tsens.txt
-- protected-clocks : Protected clock specifier list as per common clock
- binding.
-
-For SM8150 only:
-       - clocks: a list of phandles and clock-specifier pairs,
-                 one for each entry in clock-names.
-       - clock-names: "bi_tcxo" (required)
-                      "sleep_clk" (optional)
-                      "aud_ref_clock" (optional)
-
-Example:
-	clock-controller@900000 {
-		compatible = "qcom,gcc-msm8960";
-		reg = <0x900000 0x4000>;
-		#clock-cells = <1>;
-		#reset-cells = <1>;
-		#power-domain-cells = <1>;
-	};
-
-Example of GCC with TSENS properties:
-	clock-controller@900000 {
-		compatible = "qcom,gcc-apq8064";
-		reg = <0x00900000 0x4000>;
-		nvmem-cells = <&tsens_calib>, <&tsens_backup>;
-		nvmem-cell-names = "calib", "calib_backup";
-		#clock-cells = <1>;
-		#reset-cells = <1>;
-		#thermal-sensor-cells = <1>;
-	};
-
-Example of GCC with protected-clocks properties:
-	clock-controller@100000 {
-		compatible = "qcom,gcc-sdm845";
-		reg = <0x100000 0x1f0000>;
-		#clock-cells = <1>;
-		#reset-cells = <1>;
-		#power-domain-cells = <1>;
-		protected-clocks = <GCC_QSPI_CORE_CLK>,
-				   <GCC_QSPI_CORE_CLK_SRC>,
-				   <GCC_QSPI_CNOC_PERIPH_AHB_CLK>,
-				   <GCC_LPASS_Q6_AXI_CLK>,
-				   <GCC_LPASS_SWAY_CLK>;
-	};
-
-Example of GCC with clocks
-	gcc: clock-controller@100000 {
-		compatible = "qcom,gcc-sm8150";
-		reg = <0x00100000 0x1f0000>;
-		#clock-cells = <1>;
-		#reset-cells = <1>;
-		#power-domain-cells = <1>;
-		clock-names = "bi_tcxo",
-		              "sleep_clk";
-		clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
-			 <&sleep_clk>;
-	};
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml
new file mode 100644
index 000000000000..e73a56fb60ca
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml
@@ -0,0 +1,188 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bindings/clock/qcom,gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller Binding
+
+maintainers:
+  - Stephen Boyd <sboyd@kernel.org>
+  - Taniya Das <tdas@codeaurora.org>
+
+description: |
+  Qualcomm global clock control module which supports the clocks, resets and
+  power domains.
+
+properties:
+  compatible :
+    enum:
+       - qcom,gcc-apq8064
+       - qcom,gcc-apq8084
+       - qcom,gcc-ipq8064
+       - qcom,gcc-ipq4019
+       - qcom,gcc-ipq8074
+       - qcom,gcc-msm8660
+       - qcom,gcc-msm8916
+       - qcom,gcc-msm8960
+       - qcom,gcc-msm8974
+       - qcom,gcc-msm8974pro
+       - qcom,gcc-msm8974pro-ac
+       - qcom,gcc-msm8994
+       - qcom,gcc-msm8996
+       - qcom,gcc-msm8998
+       - qcom,gcc-mdm9615
+       - qcom,gcc-qcs404
+       - qcom,gcc-sc7180
+       - qcom,gcc-sdm630
+       - qcom,gcc-sdm660
+       - qcom,gcc-sdm845
+       - qcom,gcc-sm8150
+
+  clocks:
+    minItems: 1
+    maxItems: 3
+    items:
+      - description: Board XO source
+      - description: Board active XO source
+      - description: Sleep clock source
+
+  clock-names:
+    minItems: 1
+    maxItems: 3
+    items:
+      - const: bi_tcxo
+      - const: bi_tcxo_ao
+      - const: sleep_clk
+
+  '#clock-cells':
+    const: 1
+
+  '#reset-cells':
+    const: 1
+
+  '#power-domain-cells':
+    const: 1
+
+  reg:
+    maxItems: 1
+
+  nvmem-cells:
+    minItems: 1
+    maxItems: 2
+    description:
+      Qualcomm TSENS (thermal sensor device) on some devices can
+      be part of GCC and hence the TSENS properties can also be part
+      of the GCC/clock-controller node.
+      For more details on the TSENS properties please refer
+      Documentation/devicetree/bindings/thermal/qcom-tsens.txt
+
+  nvmem-cell-names:
+    minItems: 1
+    maxItems: 2
+    description:
+      Names for each nvmem-cells specified.
+    items:
+      - const: calib
+      - const: calib_backup
+
+  'thermal-sensor-cells':
+    const: 1
+
+  protected-clocks:
+    description:
+       Protected clock specifier list as per common clock binding
+
+required:
+  - compatible
+  - reg
+  - '#clock-cells'
+  - '#reset-cells'
+  - '#power-domain-cells'
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: qcom,gcc-apq8064
+
+then:
+  required:
+    - nvmem-cells
+    - nvmem-cell-names
+    - '#thermal-sensor-cells'
+
+else:
+  if:
+    properties:
+      compatible:
+        contains:
+          enum:
+            - qcom,gcc-sm8150
+            - qcom,gcc-sc7180
+  then:
+    required:
+       - clocks
+       - clock-names
+
+
+examples:
+  # Example for GCC for MSM8960:
+  - |
+    clock-controller@900000 {
+      compatible = "qcom,gcc-msm8960";
+      reg = <0x900000 0x4000>;
+      #clock-cells = <1>;
+      #reset-cells = <1>;
+      #power-domain-cells = <1>;
+    };
+
+
+  # Example of GCC with TSENS properties:
+  - |
+    clock-controller@900000 {
+      compatible = "qcom,gcc-apq8064";
+      reg = <0x00900000 0x4000>;
+      nvmem-cells = <&tsens_calib>, <&tsens_backup>;
+      nvmem-cell-names = "calib", "calib_backup";
+      #clock-cells = <1>;
+      #reset-cells = <1>;
+      #power-domain-cells = <1>;
+      #thermal-sensor-cells = <1>;
+    };
+
+  # Example of GCC with protected-clocks properties:
+  - |
+    clock-controller@100000 {
+      compatible = "qcom,gcc-sdm845";
+      reg = <0x100000 0x1f0000>;
+      protected-clocks = <187>, <188>, <189>, <190>, <191>;
+      #clock-cells = <1>;
+      #reset-cells = <1>;
+      #power-domain-cells = <1>;
+    };
+
+  # Example of GCC with clock node properties for SM8150:
+  - |
+    clock-controller@100000 {
+      compatible = "qcom,gcc-sm8150";
+      reg = <0x00100000 0x1f0000>;
+      clocks = <&rpmhcc 0>, <&rpmhcc 1>, <&sleep_clk>;
+      clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk";
+      #clock-cells = <1>;
+      #reset-cells = <1>;
+      #power-domain-cells = <1>;
+     };
+
+  # Example of GCC with clock nodes properties for SC7180:
+  - |
+    clock-controller@100000 {
+      compatible = "qcom,gcc-sc7180";
+      reg = <0x100000 0x1f0000>;
+      clocks = <&rpmhcc 0>, <&rpmhcc 1>;
+      clock-names = "bi_tcxo", "bi_tcxo_ao";
+      #clock-cells = <1>;
+      #reset-cells = <1>;
+      #power-domain-cells = <1>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,q6sstopcc.yaml b/Documentation/devicetree/bindings/clock/qcom,q6sstopcc.yaml
new file mode 100644
index 000000000000..bbaaf1e2a203
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,q6sstopcc.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,q6sstopcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Q6SSTOP clock Controller
+
+maintainers:
+  - Govind Singh <govinds@codeaurora.org>
+
+properties:
+  compatible:
+    const: "qcom,qcs404-q6sstopcc"
+
+  reg:
+    items:
+      - description: Q6SSTOP clocks register region
+      - description: Q6SSTOP_TCSR register region
+
+  clocks:
+    items:
+      - description: ahb clock for the q6sstopCC
+
+  '#clock-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    q6sstopcc: clock-controller@7500000 {
+      compatible = "qcom,qcs404-q6sstopcc";
+      reg = <0x07500000 0x4e000>, <0x07550000 0x10000>;
+      clocks = <&gcc 141>;
+      #clock-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt b/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
deleted file mode 100644
index 365bbde599b1..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Qualcomm Technologies, Inc. RPMh Clocks
--------------------------------------------------------
-
-Resource Power Manager Hardened (RPMh) manages shared resources on
-some Qualcomm Technologies Inc. SoCs. It accepts clock requests from
-other hardware subsystems via RSC to control clocks.
-
-Required properties :
-- compatible : must be one of:
-	       "qcom,sdm845-rpmh-clk"
-	       "qcom,sm8150-rpmh-clk"
-
-- #clock-cells : must contain 1
-- clocks: a list of phandles and clock-specifier pairs,
-	  one for each entry in clock-names.
-- clock-names: Parent board clock: "xo".
-
-Example :
-
-#include <dt-bindings/clock/qcom,rpmh.h>
-
-	&apps_rsc {
-		rpmhcc: clock-controller {
-			compatible = "qcom,sdm845-rpmh-clk";
-			#clock-cells = <1>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml
new file mode 100644
index 000000000000..94e2f14eb967
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bindings/clock/qcom,rpmhcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies, Inc. RPMh Clocks Bindings
+
+maintainers:
+  - Taniya Das <tdas@codeaurora.org>
+
+description: |
+  Resource Power Manager Hardened (RPMh) manages shared resources on
+  some Qualcomm Technologies Inc. SoCs. It accepts clock requests from
+  other hardware subsystems via RSC to control clocks.
+
+properties:
+  compatible:
+    enum:
+      - qcom,sc7180-rpmh-clk
+      - qcom,sdm845-rpmh-clk
+      - qcom,sm8150-rpmh-clk
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: xo
+
+  '#clock-cells':
+    const: 1
+
+required:
+  - compatible
+  - '#clock-cells'
+
+examples:
+  # Example for GCC for SDM845: The below node should be defined inside
+  # &apps_rsc node.
+  - |
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    rpmhcc: clock-controller {
+      compatible = "qcom,sdm845-rpmh-clk";
+      clocks = <&xo_board>;
+      clock-names = "xo";
+      #clock-cells = <1>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt b/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt
index 916a601b76a7..c7674d0267a3 100644
--- a/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt
@@ -19,6 +19,7 @@ Required Properties:
       - "renesas,r8a7745-cpg-mssr" for the r8a7745 SoC (RZ/G1E)
       - "renesas,r8a77470-cpg-mssr" for the r8a77470 SoC (RZ/G1C)
       - "renesas,r8a774a1-cpg-mssr" for the r8a774a1 SoC (RZ/G2M)
+      - "renesas,r8a774b1-cpg-mssr" for the r8a774a1 SoC (RZ/G2N)
       - "renesas,r8a774c0-cpg-mssr" for the r8a774c0 SoC (RZ/G2E)
       - "renesas,r8a7790-cpg-mssr" for the r8a7790 SoC (R-Car H2)
       - "renesas,r8a7791-cpg-mssr" for the r8a7791 SoC (R-Car M2-W)
@@ -26,7 +27,8 @@ Required Properties:
       - "renesas,r8a7793-cpg-mssr" for the r8a7793 SoC (R-Car M2-N)
       - "renesas,r8a7794-cpg-mssr" for the r8a7794 SoC (R-Car E2)
       - "renesas,r8a7795-cpg-mssr" for the r8a7795 SoC (R-Car H3)
-      - "renesas,r8a7796-cpg-mssr" for the r8a7796 SoC (R-Car M3-W)
+      - "renesas,r8a7796-cpg-mssr" for the r8a77960 SoC (R-Car M3-W)
+      - "renesas,r8a77961-cpg-mssr" for the r8a77961 SoC (R-Car M3-W+)
       - "renesas,r8a77965-cpg-mssr" for the r8a77965 SoC (R-Car M3-N)
       - "renesas,r8a77970-cpg-mssr" for the r8a77970 SoC (R-Car V3M)
       - "renesas,r8a77980-cpg-mssr" for the r8a77980 SoC (R-Car V3H)
@@ -40,10 +42,11 @@ Required Properties:
     clock-names
   - clock-names: List of external parent clock names. Valid names are:
       - "extal" (r7s9210, r8a7743, r8a7744, r8a7745, r8a77470, r8a774a1,
-		 r8a774c0, r8a7790, r8a7791, r8a7792, r8a7793, r8a7794,
-		 r8a7795, r8a7796, r8a77965, r8a77970, r8a77980, r8a77990,
-		 r8a77995)
-      - "extalr" (r8a774a1, r8a7795, r8a7796, r8a77965, r8a77970, r8a77980)
+		 r8a774b1, r8a774c0, r8a7790, r8a7791, r8a7792, r8a7793,
+		 r8a7794, r8a7795, r8a77960, r8a77961, r8a77965, r8a77970,
+		 r8a77980, r8a77990, r8a77995)
+      - "extalr" (r8a774a1, r8a774b1, r8a7795, r8a77960, r8a77961, r8a77965,
+		  r8a77970, r8a77980)
       - "usb_extal" (r8a7743, r8a7744, r8a7745, r8a77470, r8a7790, r8a7791,
 		     r8a7793, r8a7794)
 
@@ -59,7 +62,7 @@ Required Properties:
 	power-managed through Module Standby should refer to the CPG device
 	node in their "power-domains" property, as documented by the generic PM
 	Domain bindings in
-	Documentation/devicetree/bindings/power/power_domain.txt.
+	Documentation/devicetree/bindings/power/power-domain.yaml.
 
   - #reset-cells: Must be 1
       - The single reset specifier cell must be the module number, as defined
diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt
deleted file mode 100644
index f8c05bb4116e..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-* Renesas R-Car Gen2 Clock Pulse Generator (CPG)
-
-The CPG generates core clocks for the R-Car Gen2 SoCs. It includes three PLLs
-and several fixed ratio dividers.
-The CPG also provides a Clock Domain for SoC devices, in combination with the
-CPG Module Stop (MSTP) Clocks.
-
-Required Properties:
-
-  - compatible: Must be one of
-    - "renesas,r8a7790-cpg-clocks" for the r8a7790 CPG
-    - "renesas,r8a7791-cpg-clocks" for the r8a7791 CPG
-    - "renesas,r8a7792-cpg-clocks" for the r8a7792 CPG
-    - "renesas,r8a7793-cpg-clocks" for the r8a7793 CPG
-    - "renesas,r8a7794-cpg-clocks" for the r8a7794 CPG
-    and "renesas,rcar-gen2-cpg-clocks" as a fallback.
-
-  - reg: Base address and length of the memory resource used by the CPG
-
-  - clocks: References to the parent clocks: first to the EXTAL clock, second
-    to the USB_EXTAL clock
-  - #clock-cells: Must be 1
-  - clock-output-names: The names of the clocks. Supported clocks are "main",
-    "pll0", "pll1", "pll3", "lb", "qspi", "sdh", "sd0", "sd1", "z", "rcan", and
-    "adsp"
-  - #power-domain-cells: Must be 0
-
-SoC devices that are part of the CPG/MSTP Clock Domain and can be power-managed
-through an MSTP clock should refer to the CPG device node in their
-"power-domains" property, as documented by the generic PM domain bindings in
-Documentation/devicetree/bindings/power/power_domain.txt.
-
-
-Examples
---------
-
-  - CPG device node:
-
-	cpg_clocks: cpg_clocks@e6150000 {
-		compatible = "renesas,r8a7790-cpg-clocks",
-			     "renesas,rcar-gen2-cpg-clocks";
-		reg = <0 0xe6150000 0 0x1000>;
-		clocks = <&extal_clk &usb_extal_clk>;
-		#clock-cells = <1>;
-		clock-output-names = "main", "pll0, "pll1", "pll3",
-				     "lb", "qspi", "sdh", "sd0", "sd1", "z",
-				     "rcan", "adsp";
-		#power-domain-cells = <0>;
-	};
-
-
-  - CPG/MSTP Clock Domain member device node:
-
-	thermal@e61f0000 {
-		compatible = "renesas,thermal-r8a7790", "renesas,rcar-thermal";
-		reg = <0 0xe61f0000 0 0x14>, <0 0xe61f0100 0 0x38>;
-		interrupts = <0 69 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&mstp5_clks R8A7790_CLK_THERMAL>;
-		power-domains = <&cpg_clocks>;
-	};
diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt
index e96e085271c1..83f6c6a7c41c 100644
--- a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt
+++ b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt
@@ -46,7 +46,7 @@ Required properties:
 Example (R-Car H3):
 
 	usb2_clksel: clock-controller@e6590630 {
-		compatible = "renesas,r8a77950-rcar-usb2-clock-sel",
+		compatible = "renesas,r8a7795-rcar-usb2-clock-sel",
 			     "renesas,rcar-gen3-usb2-clock-sel";
 		reg = <0 0xe6590630 0 0x02>;
 		clocks = <&cpg CPG_MOD 703>, <&usb_extal>, <&usb_xtal>;
diff --git a/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt
index 39f0c1ac84ee..55e78cddec8c 100644
--- a/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt
+++ b/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt
@@ -10,6 +10,11 @@ Required Properties:
 - compatible: CRU should be "rockchip,px30-cru"
 - reg: physical base address of the controller and length of memory mapped
   region.
+- clocks: A list of phandle + clock-specifier pairs for the clocks listed
+          in clock-names
+- clock-names: Should contain the following:
+  - "xin24m" for both PMUCRU and CRU
+  - "gpll" for CRU (sourced from PMUCRU)
 - #clock-cells: should be 1.
 - #reset-cells: should be 1.
 
diff --git a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt
deleted file mode 100644
index fb9495ea582c..000000000000
--- a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-STMicroelectronics STM32 Peripheral Reset Clock Controller
-==========================================================
-
-The RCC IP is both a reset and a clock controller.
-
-RCC makes also power management (resume/supend and wakeup interrupt).
-
-Please also refer to reset.txt for common reset controller binding usage.
-
-Please also refer to clock-bindings.txt for common clock controller
-binding usage.
-
-
-Required properties:
-- compatible: "st,stm32mp1-rcc", "syscon"
-- reg: should be register base and length as documented in the datasheet
-- #clock-cells: 1, device nodes should specify the clock in their
-  "clocks" property, containing a phandle to the clock device node,
-  an index specifying the clock to use.
-- #reset-cells: Shall be 1
-- interrupts: Should contain a general interrupt line and a interrupt line
-  to the wake-up of processor (CSTOP).
-
-Example:
-	rcc: rcc@50000000 {
-		compatible = "st,stm32mp1-rcc", "syscon";
-		reg = <0x50000000 0x1000>;
-		#clock-cells = <1>;
-		#reset-cells = <1>;
-		interrupts = <GIC_SPI 5 IRQ_TYPE_NONE>,
-			     <GIC_SPI 145 IRQ_TYPE_NONE>;
-	};
-
-Specifying clocks
-=================
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/stm32mp1-clks.h header and can be used in device
-tree sources.
-
-Specifying softreset control of devices
-=======================================
-
-Device nodes should specify the reset channel required in their "resets"
-property, containing a phandle to the reset device node and an index specifying
-which channel to use.
-The index is the bit number within the RCC registers bank, starting from RCC
-base address.
-It is calculated as: index = register_offset / 4 * 32 + bit_offset.
-Where bit_offset is the bit offset within the register.
-
-For example on STM32MP1, for LTDC reset:
- ltdc = APB4_RSTSETR_offset / 4 * 32 + LTDC_bit_offset
-      = 0x180 / 4 * 32 + 0 = 3072
-
-The list of valid indices for STM32MP1 is available in:
-include/dt-bindings/reset-controller/stm32mp1-resets.h
-
-This file implements defines like:
-#define LTDC_R	3072
diff --git a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
new file mode 100644
index 000000000000..b8f91e444d2f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bindings/clock/st,stm32mp1-rcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Reset Clock Controller Binding
+
+maintainers:
+  - Gabriel Fernandez <gabriel.fernandez@st.com>
+
+description: |
+  The RCC IP is both a reset and a clock controller.
+  RCC makes also power management (resume/supend and wakeup interrupt).
+  Please also refer to reset.txt for common reset controller binding usage.
+
+  This binding uses common clock bindings
+  Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+  Specifying clocks
+  =================
+
+  All available clocks are defined as preprocessor macros in
+  dt-bindings/clock/stm32mp1-clks.h header and can be used in device
+  tree sources.
+
+  Specifying softreset control of devices
+  =======================================
+
+  Device nodes should specify the reset channel required in their "resets"
+  property, containing a phandle to the reset device node and an index specifying
+  which channel to use.
+  The index is the bit number within the RCC registers bank, starting from RCC
+  base address.
+  It is calculated as: index = register_offset / 4 * 32 + bit_offset.
+  Where bit_offset is the bit offset within the register.
+
+  For example on STM32MP1, for LTDC reset:
+     ltdc = APB4_RSTSETR_offset / 4 * 32 + LTDC_bit_offset
+          = 0x180 / 4 * 32 + 0 = 3072
+
+  The list of valid indices for STM32MP1 is available in:
+  include/dt-bindings/reset-controller/stm32mp1-resets.h
+
+  This file implements defines like:
+  #define LTDC_R	3072
+
+properties:
+  "#clock-cells":
+    const: 1
+
+  "#reset-cells":
+    const: 1
+
+  compatible:
+    items:
+      - const: st,stm32mp1-rcc
+      - const: syscon
+
+  reg:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - "#reset-cells"
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    rcc: rcc@50000000 {
+        compatible = "st,stm32mp1-rcc", "syscon";
+        reg = <0x50000000 0x1000>;
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt
deleted file mode 100644
index 1a042e20b115..000000000000
--- a/Documentation/devicetree/bindings/clock/sunxi.txt
+++ /dev/null
@@ -1,225 +0,0 @@
-Device Tree Clock bindings for arch-sunxi
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be one of the following:
-	"allwinner,sun4i-a10-osc-clk" - for a gatable oscillator
-	"allwinner,sun4i-a10-pll1-clk" - for the main PLL clock and PLL4
-	"allwinner,sun6i-a31-pll1-clk" - for the main PLL clock on A31
-	"allwinner,sun8i-a23-pll1-clk" - for the main PLL clock on A23
-	"allwinner,sun4i-a10-pll3-clk" - for the video PLL clock on A10
-	"allwinner,sun9i-a80-pll4-clk" - for the peripheral PLLs on A80
-	"allwinner,sun4i-a10-pll5-clk" - for the PLL5 clock
-	"allwinner,sun4i-a10-pll6-clk" - for the PLL6 clock
-	"allwinner,sun6i-a31-pll6-clk" - for the PLL6 clock on A31
-	"allwinner,sun9i-a80-gt-clk" - for the GT bus clock on A80
-	"allwinner,sun4i-a10-cpu-clk" - for the CPU multiplexer clock
-	"allwinner,sun4i-a10-axi-clk" - for the AXI clock
-	"allwinner,sun8i-a23-axi-clk" - for the AXI clock on A23
-	"allwinner,sun4i-a10-gates-clk" - for generic gates on all compatible SoCs
-	"allwinner,sun4i-a10-axi-gates-clk" - for the AXI gates
-	"allwinner,sun4i-a10-ahb-clk" - for the AHB clock
-	"allwinner,sun5i-a13-ahb-clk" - for the AHB clock on A13
-	"allwinner,sun9i-a80-ahb-clk" - for the AHB bus clocks on A80
-	"allwinner,sun4i-a10-ahb-gates-clk" - for the AHB gates on A10
-	"allwinner,sun5i-a13-ahb-gates-clk" - for the AHB gates on A13
-	"allwinner,sun5i-a10s-ahb-gates-clk" - for the AHB gates on A10s
-	"allwinner,sun7i-a20-ahb-gates-clk" - for the AHB gates on A20
-	"allwinner,sun6i-a31-ar100-clk" - for the AR100 on A31
-	"allwinner,sun9i-a80-cpus-clk" - for the CPUS on A80
-	"allwinner,sun6i-a31-ahb1-clk" - for the AHB1 clock on A31
-	"allwinner,sun8i-h3-ahb2-clk" - for the AHB2 clock on H3
-	"allwinner,sun6i-a31-ahb1-gates-clk" - for the AHB1 gates on A31
-	"allwinner,sun8i-a23-ahb1-gates-clk" - for the AHB1 gates on A23
-	"allwinner,sun9i-a80-ahb0-gates-clk" - for the AHB0 gates on A80
-	"allwinner,sun9i-a80-ahb1-gates-clk" - for the AHB1 gates on A80
-	"allwinner,sun9i-a80-ahb2-gates-clk" - for the AHB2 gates on A80
-	"allwinner,sun4i-a10-apb0-clk" - for the APB0 clock
-	"allwinner,sun6i-a31-apb0-clk" - for the APB0 clock on A31
-	"allwinner,sun8i-a23-apb0-clk" - for the APB0 clock on A23
-	"allwinner,sun9i-a80-apb0-clk" - for the APB0 bus clock on A80
-	"allwinner,sun8i-a83t-apb0-gates-clk" - for the APB0 gates on A83T
-	"allwinner,sun4i-a10-apb0-gates-clk" - for the APB0 gates on A10
-	"allwinner,sun5i-a13-apb0-gates-clk" - for the APB0 gates on A13
-	"allwinner,sun5i-a10s-apb0-gates-clk" - for the APB0 gates on A10s
-	"allwinner,sun6i-a31-apb0-gates-clk" - for the APB0 gates on A31
-	"allwinner,sun7i-a20-apb0-gates-clk" - for the APB0 gates on A20
-	"allwinner,sun8i-a23-apb0-gates-clk" - for the APB0 gates on A23
-	"allwinner,sun8i-h3-apb0-gates-clk" - for the APB0 gates on H3
-	"allwinner,sun9i-a80-apb0-gates-clk" - for the APB0 gates on A80
-	"allwinner,sun4i-a10-apb1-clk" - for the APB1 clock
-	"allwinner,sun9i-a80-apb1-clk" - for the APB1 bus clock on A80
-	"allwinner,sun4i-a10-apb1-gates-clk" - for the APB1 gates on A10
-	"allwinner,sun5i-a13-apb1-gates-clk" - for the APB1 gates on A13
-	"allwinner,sun5i-a10s-apb1-gates-clk" - for the APB1 gates on A10s
-	"allwinner,sun6i-a31-apb1-gates-clk" - for the APB1 gates on A31
-	"allwinner,sun7i-a20-apb1-gates-clk" - for the APB1 gates on A20
-	"allwinner,sun8i-a23-apb1-gates-clk" - for the APB1 gates on A23
-	"allwinner,sun9i-a80-apb1-gates-clk" - for the APB1 gates on A80
-	"allwinner,sun6i-a31-apb2-gates-clk" - for the APB2 gates on A31
-	"allwinner,sun8i-a23-apb2-gates-clk" - for the APB2 gates on A23
-	"allwinner,sun8i-a83t-bus-gates-clk" - for the bus gates on A83T
-	"allwinner,sun8i-h3-bus-gates-clk" - for the bus gates on H3
-	"allwinner,sun9i-a80-apbs-gates-clk" - for the APBS gates on A80
-	"allwinner,sun4i-a10-display-clk" - for the display clocks on the A10
-	"allwinner,sun4i-a10-dram-gates-clk" - for the DRAM gates on A10
-	"allwinner,sun5i-a13-dram-gates-clk" - for the DRAM gates on A13
-	"allwinner,sun5i-a13-mbus-clk" - for the MBUS clock on A13
-	"allwinner,sun4i-a10-mmc-clk" - for the MMC clock
-	"allwinner,sun9i-a80-mmc-clk" - for mmc module clocks on A80
-	"allwinner,sun9i-a80-mmc-config-clk" - for mmc gates + resets on A80
-	"allwinner,sun4i-a10-mod0-clk" - for the module 0 family of clocks
-	"allwinner,sun9i-a80-mod0-clk" - for module 0 (storage) clocks on A80
-	"allwinner,sun8i-a23-mbus-clk" - for the MBUS clock on A23
-	"allwinner,sun7i-a20-out-clk" - for the external output clocks
-	"allwinner,sun7i-a20-gmac-clk" - for the GMAC clock module on A20/A31
-	"allwinner,sun4i-a10-tcon-ch0-clk" - for the TCON channel 0 clock on the A10
-	"allwinner,sun4i-a10-tcon-ch1-clk" - for the TCON channel 1 clock on the A10
-	"allwinner,sun4i-a10-usb-clk" - for usb gates + resets on A10 / A20
-	"allwinner,sun5i-a13-usb-clk" - for usb gates + resets on A13
-	"allwinner,sun6i-a31-usb-clk" - for usb gates + resets on A31
-	"allwinner,sun8i-a23-usb-clk" - for usb gates + resets on A23
-	"allwinner,sun8i-h3-usb-clk" - for usb gates + resets on H3
-	"allwinner,sun9i-a80-usb-mod-clk" - for usb gates + resets on A80
-	"allwinner,sun9i-a80-usb-phy-clk" - for usb phy gates + resets on A80
-	"allwinner,sun4i-a10-ve-clk" - for the Video Engine clock
-	"allwinner,sun6i-a31-display-clk" - for the display clocks
-
-Required properties for all clocks:
-- reg : shall be the control register address for the clock.
-- clocks : shall be the input parent clock(s) phandle for the clock. For
-	multiplexed clocks, the list order must match the hardware
-	programming order.
-- #clock-cells : from common clock binding; shall be set to 0 except for
-	the following compatibles where it shall be set to 1:
-	"allwinner,*-gates-clk", "allwinner,sun4i-pll5-clk",
-	"allwinner,sun4i-pll6-clk", "allwinner,sun6i-a31-pll6-clk",
-	"allwinner,*-usb-clk", "allwinner,*-mmc-clk",
-	"allwinner,*-mmc-config-clk"
-- clock-output-names : shall be the corresponding names of the outputs.
-	If the clock module only has one output, the name shall be the
-	module name.
-
-And "allwinner,*-usb-clk" clocks also require:
-- reset-cells : shall be set to 1
-
-The "allwinner,sun4i-a10-ve-clk" clock also requires:
-- reset-cells : shall be set to 0
-
-The "allwinner,sun9i-a80-mmc-config-clk" clock also requires:
-- #reset-cells : shall be set to 1
-- resets : shall be the reset control phandle for the mmc block.
-
-For "allwinner,sun7i-a20-gmac-clk", the parent clocks shall be fixed rate
-dummy clocks at 25 MHz and 125 MHz, respectively. See example.
-
-Clock consumers should specify the desired clocks they use with a
-"clocks" phandle cell. Consumers that are using a gated clock should
-provide an additional ID in their clock property. This ID is the
-offset of the bit controlling this particular gate in the register.
-For the other clocks with "#clock-cells" = 1, the additional ID shall
-refer to the index of the output.
-
-For "allwinner,sun6i-a31-pll6-clk", there are 2 outputs. The first output
-is the normal PLL6 output, or "pll6". The second output is rate doubled
-PLL6, or "pll6x2".
-
-The "allwinner,*-mmc-clk" clocks have three different outputs: the
-main clock, with the ID 0, and the output and sample clocks, with the
-IDs 1 and 2, respectively.
-
-The "allwinner,sun9i-a80-mmc-config-clk" clock has one clock/reset output
-per mmc controller. The number of outputs is determined by the size of
-the address block, which is related to the overall mmc block.
-
-For example:
-
-osc24M: clk@1c20050 {
-	#clock-cells = <0>;
-	compatible = "allwinner,sun4i-a10-osc-clk";
-	reg = <0x01c20050 0x4>;
-	clocks = <&osc24M_fixed>;
-	clock-output-names = "osc24M";
-};
-
-pll1: clk@1c20000 {
-	#clock-cells = <0>;
-	compatible = "allwinner,sun4i-a10-pll1-clk";
-	reg = <0x01c20000 0x4>;
-	clocks = <&osc24M>;
-	clock-output-names = "pll1";
-};
-
-pll5: clk@1c20020 {
-	#clock-cells = <1>;
-	compatible = "allwinner,sun4i-pll5-clk";
-	reg = <0x01c20020 0x4>;
-	clocks = <&osc24M>;
-	clock-output-names = "pll5_ddr", "pll5_other";
-};
-
-pll6: clk@1c20028 {
-	#clock-cells = <1>;
-	compatible = "allwinner,sun6i-a31-pll6-clk";
-	reg = <0x01c20028 0x4>;
-	clocks = <&osc24M>;
-	clock-output-names = "pll6", "pll6x2";
-};
-
-cpu: cpu@1c20054 {
-	#clock-cells = <0>;
-	compatible = "allwinner,sun4i-a10-cpu-clk";
-	reg = <0x01c20054 0x4>;
-	clocks = <&osc32k>, <&osc24M>, <&pll1>;
-	clock-output-names = "cpu";
-};
-
-mmc0_clk: clk@1c20088 {
-	#clock-cells = <1>;
-	compatible = "allwinner,sun4i-a10-mmc-clk";
-	reg = <0x01c20088 0x4>;
-	clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-	clock-output-names = "mmc0", "mmc0_output", "mmc0_sample";
-};
-
-mii_phy_tx_clk: clk@2 {
-	#clock-cells = <0>;
-	compatible = "fixed-clock";
-	clock-frequency = <25000000>;
-	clock-output-names = "mii_phy_tx";
-};
-
-gmac_int_tx_clk: clk@3 {
-	#clock-cells = <0>;
-	compatible = "fixed-clock";
-	clock-frequency = <125000000>;
-	clock-output-names = "gmac_int_tx";
-};
-
-gmac_clk: clk@1c20164 {
-	#clock-cells = <0>;
-	compatible = "allwinner,sun7i-a20-gmac-clk";
-	reg = <0x01c20164 0x4>;
-	/*
-	 * The first clock must be fixed at 25MHz;
-	 * the second clock must be fixed at 125MHz
-	 */
-	clocks = <&mii_phy_tx_clk>, <&gmac_int_tx_clk>;
-	clock-output-names = "gmac";
-};
-
-mmc_config_clk: clk@1c13000 {
-	compatible = "allwinner,sun9i-a80-mmc-config-clk";
-	reg = <0x01c13000 0x10>;
-	clocks = <&ahb0_gates 8>;
-	clock-names = "ahb";
-	resets = <&ahb0_resets 8>;
-	reset-names = "ahb";
-	#clock-cells = <1>;
-	#reset-cells = <1>;
-	clock-output-names = "mmc0_config", "mmc1_config",
-			     "mmc2_config", "mmc3_config";
-};
diff --git a/Documentation/devicetree/bindings/clock/ti/davinci/psc.txt b/Documentation/devicetree/bindings/clock/ti/davinci/psc.txt
index dae4ad8e198c..5f746ebf7a2c 100644
--- a/Documentation/devicetree/bindings/clock/ti/davinci/psc.txt
+++ b/Documentation/devicetree/bindings/clock/ti/davinci/psc.txt
@@ -67,5 +67,5 @@ Examples:
 
 Also see:
 - Documentation/devicetree/bindings/clock/clock-bindings.txt
-- Documentation/devicetree/bindings/power/power_domain.txt
+- Documentation/devicetree/bindings/power/power-domain.yaml
 - Documentation/devicetree/bindings/reset/reset.txt
diff --git a/Documentation/devicetree/bindings/connector/usb-connector.txt b/Documentation/devicetree/bindings/connector/usb-connector.txt
index d357987181ee..88578ac1a8a7 100644
--- a/Documentation/devicetree/bindings/connector/usb-connector.txt
+++ b/Documentation/devicetree/bindings/connector/usb-connector.txt
@@ -1,8 +1,8 @@
 USB Connector
 =============
 
-USB connector node represents physical USB connector. It should be
-a child of USB interface controller.
+A USB connector node represents a physical USB connector. It should be
+a child of a USB interface controller.
 
 Required properties:
 - compatible: describes type of the connector, must be one of:
diff --git a/Documentation/devicetree/bindings/counter/stm32-lptimer-cnt.txt b/Documentation/devicetree/bindings/counter/stm32-lptimer-cnt.txt
deleted file mode 100644
index e90bc47f752a..000000000000
--- a/Documentation/devicetree/bindings/counter/stm32-lptimer-cnt.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-STMicroelectronics STM32 Low-Power Timer quadrature encoder and counter
-
-STM32 Low-Power Timer provides several counter modes. It can be used as:
-- quadrature encoder to detect angular position and direction of rotary
-  elements, from IN1 and IN2 input signals.
-- simple counter from IN1 input signal.
-
-Must be a sub-node of an STM32 Low-Power Timer device tree node.
-See ../mfd/stm32-lptimer.txt for details about the parent node.
-
-Required properties:
-- compatible:		Must be "st,stm32-lptimer-counter".
-- pinctrl-names: 	Set to "default". An additional "sleep" state can be
-			defined to set pins in sleep state.
-- pinctrl-n: 		List of phandles pointing to pin configuration nodes,
-			to set IN1/IN2 pins in mode of operation for Low-Power
-			Timer input on external pin.
-
-Example:
-	timer@40002400 {
-		compatible = "st,stm32-lptimer";
-		...
-		counter {
-			compatible = "st,stm32-lptimer-counter";
-			pinctrl-names = "default", "sleep";
-			pinctrl-0 = <&lptim1_in_pins>;
-			pinctrl-1 = <&lptim1_sleep_in_pins>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/counter/stm32-timer-cnt.txt b/Documentation/devicetree/bindings/counter/stm32-timer-cnt.txt
deleted file mode 100644
index c52fcdd4bf6c..000000000000
--- a/Documentation/devicetree/bindings/counter/stm32-timer-cnt.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-STMicroelectronics STM32 Timer quadrature encoder
-
-STM32 Timer provides quadrature encoder to detect
-angular position and direction of rotary elements,
-from IN1 and IN2 input signals.
-
-Must be a sub-node of an STM32 Timer device tree node.
-See ../mfd/stm32-timers.txt for details about the parent node.
-
-Required properties:
-- compatible:		Must be "st,stm32-timer-counter".
-- pinctrl-names: 	Set to "default".
-- pinctrl-0: 		List of phandles pointing to pin configuration nodes,
-			to set CH1/CH2 pins in mode of operation for STM32
-			Timer input on external pin.
-
-Example:
-	timers@40010000 {
-		#address-cells = <1>;
-		#size-cells = <0>;
-		compatible = "st,stm32-timers";
-		reg = <0x40010000 0x400>;
-		clocks = <&rcc 0 160>;
-		clock-names = "int";
-
-		counter {
-			compatible = "st,stm32-timer-counter";
-			pinctrl-names = "default";
-			pinctrl-0 = <&tim1_in_pins>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/counter/ti-eqep.yaml b/Documentation/devicetree/bindings/counter/ti-eqep.yaml
new file mode 100644
index 000000000000..85f1ff83afe7
--- /dev/null
+++ b/Documentation/devicetree/bindings/counter/ti-eqep.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/counter/ti-eqep.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments Enhanced Quadrature Encoder Pulse (eQEP) Module
+
+maintainers:
+  - David Lechner <david@lechnology.com>
+
+properties:
+  compatible:
+    const: ti,am3352-eqep
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    description: The eQEP event interrupt
+    maxItems: 1
+
+  clocks:
+    description: The clock that determines the SYSCLKOUT rate for the eQEP
+      peripheral.
+    maxItems: 1
+
+  clock-names:
+    const: sysclkout
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    eqep0: counter@180 {
+        compatible = "ti,am3352-eqep";
+        reg = <0x180 0x80>;
+        clocks = <&l4ls_gclk>;
+        clock-names = "sysclkout";
+        interrupts = <79>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/cpu/cpu-topology.txt b/Documentation/devicetree/bindings/cpu/cpu-topology.txt
index 99918189403c..9bd530a35d14 100644
--- a/Documentation/devicetree/bindings/cpu/cpu-topology.txt
+++ b/Documentation/devicetree/bindings/cpu/cpu-topology.txt
@@ -549,5 +549,5 @@ Example 3: HiFive Unleashed (RISC-V 64 bit, 4 core system)
 [2] Devicetree NUMA binding description
     Documentation/devicetree/bindings/numa.txt
 [3] RISC-V Linux kernel documentation
-    Documentation/devicetree/bindings/riscv/cpus.txt
+    Documentation/devicetree/bindings/riscv/cpus.yaml
 [4] https://www.devicetree.org/specifications/
diff --git a/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
index 0c38e4b8fc51..1758051798fe 100644
--- a/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
+++ b/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
@@ -15,12 +15,16 @@ In 'cpus' nodes:
 
 In 'operating-points-v2' table:
 - compatible: Should be
-	- 'operating-points-v2-ti-cpu' for am335x, am43xx, and dra7xx/am57xx SoCs
+	- 'operating-points-v2-ti-cpu' for am335x, am43xx, and dra7xx/am57xx,
+	  omap34xx, omap36xx and am3517 SoCs
 - syscon: A phandle pointing to a syscon node representing the control module
 	  register space of the SoC.
 
 Optional properties:
 --------------------
+- "vdd-supply", "vbb-supply": to define two regulators for dra7xx
+- "cpu0-supply", "vbb-supply": to define two regulators for omap36xx
+
 For each opp entry in 'operating-points-v2' table:
 - opp-supported-hw: Two bitfields indicating:
 	1. Which revision of the SoC the OPP is supported by
diff --git a/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml b/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
index 80b3e7350a73..33c7842917f6 100644
--- a/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
+++ b/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Security System Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml b/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml
new file mode 100644
index 000000000000..2c459b8c76ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/allwinner,sun8i-ce.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner Crypto Engine driver
+
+maintainers:
+  - Corentin Labbe <clabbe.montjoie@gmail.com>
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun8i-h3-crypto
+      - allwinner,sun8i-r40-crypto
+      - allwinner,sun50i-a64-crypto
+      - allwinner,sun50i-h5-crypto
+      - allwinner,sun50i-h6-crypto
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus clock
+      - description: Module clock
+      - description: MBus clock
+    minItems: 2
+    maxItems: 3
+
+  clock-names:
+    items:
+      - const: bus
+      - const: mod
+      - const: ram
+    minItems: 2
+    maxItems: 3
+
+  resets:
+    maxItems: 1
+
+if:
+  properties:
+    compatible:
+      items:
+        const: allwinner,sun50i-h6-crypto
+then:
+  properties:
+      clocks:
+        minItems: 3
+      clock-names:
+        minItems: 3
+else:
+  properties:
+      clocks:
+        maxItems: 2
+      clock-names:
+        maxItems: 2
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/sun50i-a64-ccu.h>
+    #include <dt-bindings/reset/sun50i-a64-ccu.h>
+
+    crypto: crypto@1c15000 {
+      compatible = "allwinner,sun8i-h3-crypto";
+      reg = <0x01c15000 0x1000>;
+      interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&ccu CLK_BUS_CE>, <&ccu CLK_CE>;
+      clock-names = "bus", "mod";
+      resets = <&ccu RST_BUS_CE>;
+    };
+
diff --git a/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ss.yaml b/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ss.yaml
new file mode 100644
index 000000000000..8a29d36edf26
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ss.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/allwinner,sun8i-ss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner Security System v2 driver
+
+maintainers:
+  - Corentin Labbe <corentin.labbe@gmail.com>
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun8i-a83t-crypto
+      - allwinner,sun9i-a80-crypto
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus clock
+      - description: Module clock
+
+  clock-names:
+    items:
+      - const: bus
+      - const: mod
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/sun8i-a83t-ccu.h>
+    #include <dt-bindings/reset/sun8i-a83t-ccu.h>
+
+    crypto: crypto@1c15000 {
+      compatible = "allwinner,sun8i-a83t-crypto";
+      reg = <0x01c15000 0x1000>;
+      interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+      resets = <&ccu RST_BUS_SS>;
+      clocks = <&ccu CLK_BUS_SS>, <&ccu CLK_SS>;
+      clock-names = "bus", "mod";
+    };
diff --git a/Documentation/devicetree/bindings/crypto/amlogic,gxl-crypto.yaml b/Documentation/devicetree/bindings/crypto/amlogic,gxl-crypto.yaml
new file mode 100644
index 000000000000..5becc60a0e28
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/amlogic,gxl-crypto.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/amlogic,gxl-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic GXL Cryptographic Offloader
+
+maintainers:
+  - Corentin Labbe <clabbe@baylibre.com>
+
+properties:
+  compatible:
+    items:
+    - const: amlogic,gxl-crypto
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: "Interrupt for flow 0"
+      - description: "Interrupt for flow 1"
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    const: blkmv
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/gxbb-clkc.h>
+
+    crypto: crypto-engine@c883e000 {
+        compatible = "amlogic,gxl-crypto";
+        reg = <0x0 0xc883e000 0x0 0x36>;
+        interrupts = <GIC_SPI 188 IRQ_TYPE_EDGE_RISING>, <GIC_SPI 189 IRQ_TYPE_EDGE_RISING>;
+        clocks = <&clkc CLKID_BLKMV>;
+        clock-names = "blkmv";
+    };
diff --git a/Documentation/devicetree/bindings/crypto/samsung-slimsss.txt b/Documentation/devicetree/bindings/crypto/samsung-slimsss.txt
deleted file mode 100644
index 7ec9a5a7727a..000000000000
--- a/Documentation/devicetree/bindings/crypto/samsung-slimsss.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-Samsung SoC SlimSSS (Slim Security SubSystem) module
-
-The SlimSSS module in Exynos5433 SoC supports the following:
--- Feeder (FeedCtrl)
--- Advanced Encryption Standard (AES) with ECB,CBC,CTR,XTS and (CBC/XTS)/CTS
--- SHA-1/SHA-256 and (SHA-1/SHA-256)/HMAC
-
-Required properties:
-
-- compatible : Should contain entry for slimSSS version:
-  - "samsung,exynos5433-slim-sss" for Exynos5433 SoC.
-- reg : Offset and length of the register set for the module
-- interrupts : interrupt specifiers of SlimSSS module interrupts (one feed
-		control interrupt).
-
-- clocks : list of clock phandle and specifier pairs for all clocks listed in
-		clock-names property.
-- clock-names : list of device clock input names; should contain "pclk" and
-		"aclk" for slim-sss in Exynos5433.
diff --git a/Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml b/Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml
new file mode 100644
index 000000000000..04fe5dfa794a
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/samsung-slimsss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC SlimSSS (Slim Security SubSystem) module
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+  - Kamil Konieczny <k.konieczny@partner.samsung.com>
+
+description: |+
+  The SlimSSS module in Exynos5433 SoC supports the following:
+  -- Feeder (FeedCtrl)
+  -- Advanced Encryption Standard (AES) with ECB,CBC,CTR,XTS and (CBC/XTS)/CTS
+  -- SHA-1/SHA-256 and (SHA-1/SHA-256)/HMAC
+
+properties:
+  compatible:
+    items:
+      - const: samsung,exynos5433-slim-ss
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 2
+
+  clock-names:
+    items:
+      - const: pclk
+      - const: aclk
+
+  interrupts:
+    description: One feed control interrupt.
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clock-names
+  - clocks
+  - interrupts
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/crypto/samsung-sss.txt b/Documentation/devicetree/bindings/crypto/samsung-sss.txt
deleted file mode 100644
index 7a5ca56683cc..000000000000
--- a/Documentation/devicetree/bindings/crypto/samsung-sss.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Samsung SoC SSS (Security SubSystem) module
-
-The SSS module in S5PV210 SoC supports the following:
--- Feeder (FeedCtrl)
--- Advanced Encryption Standard (AES)
--- Data Encryption Standard (DES)/3DES
--- Public Key Accelerator (PKA)
--- SHA-1/SHA-256/MD5/HMAC (SHA-1/SHA-256/MD5)/PRNG
--- PRNG: Pseudo Random Number Generator
-
-The SSS module in Exynos4 (Exynos4210) and
-Exynos5 (Exynos5420 and Exynos5250) SoCs
-supports the following also:
--- ARCFOUR (ARC4)
--- True Random Number Generator (TRNG)
--- Secure Key Manager
-
-Required properties:
-
-- compatible : Should contain entries for this and backward compatible
-  SSS versions:
-  - "samsung,s5pv210-secss" for S5PV210 SoC.
-  - "samsung,exynos4210-secss" for Exynos4210, Exynos4212, Exynos4412, Exynos5250,
-		Exynos5260 and Exynos5420 SoCs.
-- reg : Offset and length of the register set for the module
-- interrupts : interrupt specifiers of SSS module interrupts (one feed
-		control interrupt).
-
-- clocks : list of clock phandle and specifier pairs for all clocks  listed in
-		clock-names property.
-- clock-names : list of device clock input names; should contain one entry
-		"secss".
diff --git a/Documentation/devicetree/bindings/crypto/samsung-sss.yaml b/Documentation/devicetree/bindings/crypto/samsung-sss.yaml
new file mode 100644
index 000000000000..cf1c47a81d7f
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/samsung-sss.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/samsung-sss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC SSS (Security SubSystem) module
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+  - Kamil Konieczny <k.konieczny@partner.samsung.com>
+
+description: |+
+  The SSS module in S5PV210 SoC supports the following:
+  -- Feeder (FeedCtrl)
+  -- Advanced Encryption Standard (AES)
+  -- Data Encryption Standard (DES)/3DES
+  -- Public Key Accelerator (PKA)
+  -- SHA-1/SHA-256/MD5/HMAC (SHA-1/SHA-256/MD5)/PRNG
+  -- PRNG: Pseudo Random Number Generator
+
+  The SSS module in Exynos4 (Exynos4210) and Exynos5 (Exynos5420 and Exynos5250)
+  SoCs supports the following also:
+  -- ARCFOUR (ARC4)
+  -- True Random Number Generator (TRNG)
+  -- Secure Key Manager
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - samsung,s5pv210-secss           # for S5PV210
+          - samsung,exynos4210-secss        # for Exynos4210, Exynos4212,
+                                            # Exynos4412, Exynos5250,
+                                            # Exynos5260 and Exynos5420
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: secss
+
+  interrupts:
+    description: One feed control interrupt.
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clock-names
+  - clocks
+  - interrupts
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-crc.txt b/Documentation/devicetree/bindings/crypto/st,stm32-crc.txt
deleted file mode 100644
index 3ba92a5e9b36..000000000000
--- a/Documentation/devicetree/bindings/crypto/st,stm32-crc.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-* STMicroelectronics STM32 CRC
-
-Required properties:
-- compatible: Should be "st,stm32f7-crc".
-- reg: The address and length of the peripheral registers space
-- clocks: The input clock of the CRC instance
-
-Optional properties: none
-
-Example:
-
-crc: crc@40023000 {
-	compatible = "st,stm32f7-crc";
-	reg = <0x40023000 0x400>;
-	clocks = <&rcc 0 12>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml
new file mode 100644
index 000000000000..cee624c14f07
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/st,stm32-crc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 CRC bindings
+
+maintainers:
+  - Lionel Debieve <lionel.debieve@st.com>
+
+properties:
+  compatible:
+    const: st,stm32f7-crc
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    crc@40023000 {
+      compatible = "st,stm32f7-crc";
+      reg = <0x40023000 0x400>;
+      clocks = <&rcc 0 12>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt
deleted file mode 100644
index 970487fa40b8..000000000000
--- a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-* STMicroelectronics STM32 CRYP
-
-Required properties:
-- compatible: Should be "st,stm32f756-cryp".
-- reg: The address and length of the peripheral registers space
-- clocks: The input clock of the CRYP instance
-- interrupts: The CRYP interrupt
-
-Optional properties:
-- resets: The input reset of the CRYP instance
-
-Example:
-crypto@50060000 {
-	compatible = "st,stm32f756-cryp";
-	reg = <0x50060000 0x400>;
-	interrupts = <79>;
-	clocks = <&rcc 0 STM32F7_AHB2_CLOCK(CRYP)>;
-	resets = <&rcc STM32F7_AHB2_RESET(CRYP)>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
new file mode 100644
index 000000000000..a4574552502a
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/st,stm32-cryp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 CRYP bindings
+
+maintainers:
+  - Lionel Debieve <lionel.debieve@st.com>
+
+properties:
+  compatible:
+    enum:
+      - st,stm32f756-cryp
+      - st,stm32mp1-cryp
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    cryp@54001000 {
+      compatible = "st,stm32mp1-cryp";
+      reg = <0x54001000 0x400>;
+      interrupts = <GIC_SPI 79 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&rcc CRYP1>;
+      resets = <&rcc CRYP1_R>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt b/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt
deleted file mode 100644
index 04fc246f02f7..000000000000
--- a/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-* STMicroelectronics STM32 HASH
-
-Required properties:
-- compatible: Should contain entries for this and backward compatible
-  HASH versions:
-  - "st,stm32f456-hash" for stm32 F456.
-  - "st,stm32f756-hash" for stm32 F756.
-- reg: The address and length of the peripheral registers space
-- interrupts: the interrupt specifier for the HASH
-- clocks: The input clock of the HASH instance
-
-Optional properties:
-- resets: The input reset of the HASH instance
-- dmas: DMA specifiers for the HASH. See the DMA client binding,
-	 Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: DMA request name. Should be "in" if a dma is present.
-- dma-maxburst: Set number of maximum dma burst supported
-
-Example:
-
-hash1: hash@50060400 {
-	compatible = "st,stm32f756-hash";
-	reg = <0x50060400 0x400>;
-	interrupts = <80>;
-	clocks = <&rcc 0 STM32F7_AHB2_CLOCK(HASH)>;
-	resets = <&rcc STM32F7_AHB2_RESET(HASH)>;
-	dmas = <&dma2 7 2 0x400 0x0>;
-	dma-names = "in";
-	dma-maxburst = <0>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
new file mode 100644
index 000000000000..57ae1c0b6d18
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/st,stm32-hash.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 HASH bindings
+
+maintainers:
+  - Lionel Debieve <lionel.debieve@st.com>
+
+properties:
+  compatible:
+    enum:
+      - st,stm32f456-hash
+      - st,stm32f756-hash
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  dmas:
+    maxItems: 1
+
+  dma-names:
+    items:
+      - const: in
+
+  dma-maxburst:
+    description: Set number of maximum dma burst supported
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - minimum: 0
+      - maximum: 2
+      - default: 0
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    hash@54002000 {
+      compatible = "st,stm32f756-hash";
+      reg = <0x54002000 0x400>;
+      interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&rcc HASH1>;
+      resets = <&rcc HASH1_R>;
+      dmas = <&mdma1 31 0x10 0x1000A02 0x0 0x0>;
+      dma-names = "in";
+      dma-maxburst = <2>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/lpddr2/lpddr2-timings.txt b/Documentation/devicetree/bindings/ddr/lpddr2-timings.txt
index 9ceb19e0c7fd..9ceb19e0c7fd 100644
--- a/Documentation/devicetree/bindings/lpddr2/lpddr2-timings.txt
+++ b/Documentation/devicetree/bindings/ddr/lpddr2-timings.txt
diff --git a/Documentation/devicetree/bindings/lpddr2/lpddr2.txt b/Documentation/devicetree/bindings/ddr/lpddr2.txt
index 58354a075e13..ddd40121e6f6 100644
--- a/Documentation/devicetree/bindings/lpddr2/lpddr2.txt
+++ b/Documentation/devicetree/bindings/ddr/lpddr2.txt
@@ -36,7 +36,7 @@ Child nodes:
   "lpddr2-timings" provides AC timing parameters of the device for
   a given speed-bin. The user may provide the timings for as many
   speed-bins as is required. Please see Documentation/devicetree/
-  bindings/lpddr2/lpddr2-timings.txt for more information on "lpddr2-timings"
+  bindings/ddr/lpddr2-timings.txt for more information on "lpddr2-timings"
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/ddr/lpddr3-timings.txt b/Documentation/devicetree/bindings/ddr/lpddr3-timings.txt
new file mode 100644
index 000000000000..84705e50a3fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/ddr/lpddr3-timings.txt
@@ -0,0 +1,58 @@
+* AC timing parameters of LPDDR3 memories for a given speed-bin.
+
+The structures are based on LPDDR2 and extended where needed.
+
+Required properties:
+- compatible : Should be "jedec,lpddr3-timings"
+- min-freq : minimum DDR clock frequency for the speed-bin. Type is <u32>
+- reg : maximum DDR clock frequency for the speed-bin. Type is <u32>
+
+Optional properties:
+
+The following properties represent AC timing parameters from the memory
+data-sheet of the device for a given speed-bin. All these properties are
+of type <u32> and the default unit is ps (pico seconds).
+- tRFC
+- tRRD
+- tRPab
+- tRPpb
+- tRCD
+- tRC
+- tRAS
+- tWTR
+- tWR
+- tRTP
+- tW2W-C2C
+- tR2R-C2C
+- tFAW
+- tXSR
+- tXP
+- tCKE
+- tCKESR
+- tMRD
+
+Example:
+
+timings_samsung_K3QF2F20DB_800mhz: lpddr3-timings@800000000 {
+	compatible	= "jedec,lpddr3-timings";
+	reg		= <800000000>; /* workaround: it shows max-freq */
+	min-freq	= <100000000>;
+	tRFC		= <65000>;
+	tRRD		= <6000>;
+	tRPab		= <12000>;
+	tRPpb		= <12000>;
+	tRCD		= <10000>;
+	tRC		= <33750>;
+	tRAS		= <23000>;
+	tWTR		= <3750>;
+	tWR		= <7500>;
+	tRTP		= <3750>;
+	tW2W-C2C	= <0>;
+	tR2R-C2C	= <0>;
+	tFAW		= <25000>;
+	tXSR		= <70000>;
+	tXP		= <3750>;
+	tCKE		= <3750>;
+	tCKESR		= <3750>;
+	tMRD		= <7000>;
+};
diff --git a/Documentation/devicetree/bindings/ddr/lpddr3.txt b/Documentation/devicetree/bindings/ddr/lpddr3.txt
new file mode 100644
index 000000000000..a0eda35a86ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/ddr/lpddr3.txt
@@ -0,0 +1,101 @@
+* LPDDR3 SDRAM memories compliant to JEDEC JESD209-3C
+
+Required properties:
+- compatible : Should be "<vendor>,<type>", and generic value "jedec,lpddr3".
+  Example "<vendor>,<type>" values:
+    "samsung,K3QF2F20DB"
+
+- density  : <u32> representing density in Mb (Mega bits)
+- io-width : <u32> representing bus width. Possible values are 8, 16, 32, 64
+- #address-cells: Must be set to 1
+- #size-cells: Must be set to 0
+
+Optional properties:
+
+The following optional properties represent the minimum value of some AC
+timing parameters of the DDR device in terms of number of clock cycles.
+These values shall be obtained from the device data-sheet.
+- tRFC-min-tck
+- tRRD-min-tck
+- tRPab-min-tck
+- tRPpb-min-tck
+- tRCD-min-tck
+- tRC-min-tck
+- tRAS-min-tck
+- tWTR-min-tck
+- tWR-min-tck
+- tRTP-min-tck
+- tW2W-C2C-min-tck
+- tR2R-C2C-min-tck
+- tWL-min-tck
+- tDQSCK-min-tck
+- tRL-min-tck
+- tFAW-min-tck
+- tXSR-min-tck
+- tXP-min-tck
+- tCKE-min-tck
+- tCKESR-min-tck
+- tMRD-min-tck
+
+Child nodes:
+- The lpddr3 node may have one or more child nodes of type "lpddr3-timings".
+  "lpddr3-timings" provides AC timing parameters of the device for
+  a given speed-bin. Please see Documentation/devicetree/
+  bindings/ddr/lpddr3-timings.txt for more information on "lpddr3-timings"
+
+Example:
+
+samsung_K3QF2F20DB: lpddr3 {
+	compatible	= "samsung,K3QF2F20DB", "jedec,lpddr3";
+	density		= <16384>;
+	io-width	= <32>;
+	#address-cells	= <1>;
+	#size-cells	= <0>;
+
+	tRFC-min-tck		= <17>;
+	tRRD-min-tck		= <2>;
+	tRPab-min-tck		= <2>;
+	tRPpb-min-tck		= <2>;
+	tRCD-min-tck		= <3>;
+	tRC-min-tck		= <6>;
+	tRAS-min-tck		= <5>;
+	tWTR-min-tck		= <2>;
+	tWR-min-tck		= <7>;
+	tRTP-min-tck		= <2>;
+	tW2W-C2C-min-tck	= <0>;
+	tR2R-C2C-min-tck	= <0>;
+	tWL-min-tck		= <8>;
+	tDQSCK-min-tck		= <5>;
+	tRL-min-tck		= <14>;
+	tFAW-min-tck		= <5>;
+	tXSR-min-tck		= <12>;
+	tXP-min-tck		= <2>;
+	tCKE-min-tck		= <2>;
+	tCKESR-min-tck		= <2>;
+	tMRD-min-tck		= <5>;
+
+	timings_samsung_K3QF2F20DB_800mhz: lpddr3-timings@800000000 {
+		compatible	= "jedec,lpddr3-timings";
+		/* workaround: 'reg' shows max-freq */
+		reg		= <800000000>;
+		min-freq	= <100000000>;
+		tRFC		= <65000>;
+		tRRD		= <6000>;
+		tRPab		= <12000>;
+		tRPpb		= <12000>;
+		tRCD		= <10000>;
+		tRC		= <33750>;
+		tRAS		= <23000>;
+		tWTR		= <3750>;
+		tWR		= <7500>;
+		tRTP		= <3750>;
+		tW2W-C2C	= <0>;
+		tR2R-C2C	= <0>;
+		tFAW		= <25000>;
+		tXSR		= <70000>;
+		tXP		= <3750>;
+		tCKE		= <3750>;
+		tCKESR		= <3750>;
+		tMRD		= <7000>;
+	};
+}
diff --git a/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt b/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt
index 3e36c1d11386..fb46b491791c 100644
--- a/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt
+++ b/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt
@@ -10,14 +10,23 @@ The Exynos PPMU driver uses the devfreq-event class to provide event data
 to various devfreq devices. The devfreq devices would use the event data when
 derterming the current state of each IP.
 
-Required properties:
+Required properties for PPMU device:
 - compatible: Should be "samsung,exynos-ppmu" or "samsung,exynos-ppmu-v2.
 - reg: physical base address of each PPMU and length of memory mapped region.
 
-Optional properties:
+Optional properties for PPMU device:
 - clock-names : the name of clock used by the PPMU, "ppmu"
 - clocks : phandles for clock specified in "clock-names" property
 
+Required properties for 'events' child node of PPMU device:
+- event-name : the unique event name among PPMU device
+Optional properties for 'events' child node of PPMU device:
+- event-data-type : Define the type of data which shell be counted
+by the counter. You can check include/dt-bindings/pmu/exynos_ppmu.h for
+all possible type, i.e. count read requests, count write data in bytes,
+etc. This field is optional and when it is missing, the driver code
+will use default data type.
+
 Example1 : PPMUv1 nodes in exynos3250.dtsi are listed below.
 
 		ppmu_dmc0: ppmu_dmc0@106a0000 {
@@ -145,3 +154,16 @@ Example3 : PPMUv2 nodes in exynos5433.dtsi are listed below.
 			reg = <0x104d0000 0x2000>;
 			status = "disabled";
 		};
+
+Example4 : 'event-data-type' in exynos4412-ppmu-common.dtsi are listed below.
+
+	&ppmu_dmc0 {
+		status = "okay";
+		events {
+			ppmu_dmc0_3: ppmu-event3-dmc0 {
+			event-name = "ppmu-event3-dmc0";
+			event-data-type = <(PPMU_RO_DATA_CNT |
+					PPMU_WO_DATA_CNT)>;
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
index f8e946471a58..e71f752cc18f 100644
--- a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
+++ b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
@@ -50,8 +50,6 @@ Required properties only for passive bus device:
 Optional properties only for parent bus device:
 - exynos,saturation-ratio: the percentage value which is used to calibrate
 			the performance count against total cycle count.
-- exynos,voltage-tolerance: the percentage value for bus voltage tolerance
-			which is used to calculate the max voltage.
 
 Detailed correlation between sub-blocks and power line according to Exynos SoC:
 - In case of Exynos3250, there are two power line as following:
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-backend.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-backend.yaml
new file mode 100644
index 000000000000..86057d541065
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-backend.yaml
@@ -0,0 +1,291 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-display-backend.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Display Engine Backend Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description: |
+  The display engine backend exposes layers and sprites to the system.
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-display-backend
+      - allwinner,sun5i-a13-display-backend
+      - allwinner,sun6i-a31-display-backend
+      - allwinner,sun7i-a20-display-backend
+      - allwinner,sun8i-a23-display-backend
+      - allwinner,sun8i-a33-display-backend
+      - allwinner,sun9i-a80-display-backend
+
+  reg:
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: Display Backend registers
+      - description: SAT registers
+
+  reg-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      - const: be
+      - const: sat
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 3
+    maxItems: 4
+    items:
+      - description: The backend interface clock
+      - description: The backend module clock
+      - description: The backend DRAM clock
+      - description: The SAT clock
+
+  clock-names:
+    minItems: 3
+    maxItems: 4
+    items:
+      - const: ahb
+      - const: mod
+      - const: ram
+      - const: sat
+
+  resets:
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: The Backend reset line
+      - description: The SAT reset line
+
+  reset-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      - const: be
+      - const: sat
+
+  # FIXME: This should be made required eventually once every SoC will
+  # have the MBUS declared.
+  interconnects:
+    maxItems: 1
+
+  # FIXME: This should be made required eventually once every SoC will
+  # have the MBUS declared.
+  interconnect-names:
+    const: dma-mem
+
+  ports:
+    type: object
+    description: |
+      A ports node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      port@0:
+        type: object
+        description: |
+          Input endpoints of the controller.
+
+      port@1:
+        type: object
+        description: |
+          Output endpoints of the controller.
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+      - port@0
+      - port@1
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - ports
+
+additionalProperties: false
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: allwinner,sun8i-a33-display-backend
+
+then:
+  properties:
+    reg:
+      minItems: 2
+
+    reg-names:
+      minItems: 2
+
+    clocks:
+      minItems: 4
+
+    clock-names:
+      minItems: 4
+
+    resets:
+      minItems: 2
+
+    reset-names:
+      minItems: 2
+
+  required:
+    - reg-names
+    - reset-names
+
+else:
+  properties:
+    reg:
+      maxItems: 1
+
+    reg-names:
+      maxItems: 1
+
+    clocks:
+      maxItems: 3
+
+    clock-names:
+      maxItems: 3
+
+    resets:
+      maxItems: 1
+
+    reset-names:
+      maxItems: 1
+
+examples:
+  - |
+    /*
+     * This comes from the clock/sun4i-a10-ccu.h and
+     * reset/sun4i-a10-ccu.h headers, but we can't include them since
+     * it would trigger a bunch of warnings for redefinitions of
+     * symbols with the other example.
+     */
+
+    #define CLK_AHB_DE_BE0	42
+    #define CLK_DRAM_DE_BE0	140
+    #define CLK_DE_BE0		144
+    #define RST_DE_BE0		5
+
+    display-backend@1e60000 {
+        compatible = "allwinner,sun4i-a10-display-backend";
+        reg = <0x01e60000 0x10000>;
+        interrupts = <47>;
+        clocks = <&ccu CLK_AHB_DE_BE0>, <&ccu CLK_DE_BE0>,
+                 <&ccu CLK_DRAM_DE_BE0>;
+        clock-names = "ahb", "mod",
+                      "ram";
+        resets = <&ccu RST_DE_BE0>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <0>;
+
+                endpoint@0 {
+                    reg = <0>;
+                    remote-endpoint = <&fe0_out_be0>;
+                };
+
+                endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&fe1_out_be0>;
+                };
+            };
+
+            port@1 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <1>;
+
+                endpoint@0 {
+                    reg = <0>;
+                    remote-endpoint = <&tcon0_in_be0>;
+                };
+
+                endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&tcon1_in_be0>;
+                };
+            };
+        };
+    };
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    /*
+     * This comes from the clock/sun8i-a23-a33-ccu.h and
+     * reset/sun8i-a23-a33-ccu.h headers, but we can't include them
+     * since it would trigger a bunch of warnings for redefinitions of
+     * symbols with the other example.
+     */
+
+    #define CLK_BUS_DE_BE	40
+    #define CLK_BUS_SAT		46
+    #define CLK_DRAM_DE_BE	84
+    #define CLK_DE_BE		85
+    #define RST_BUS_DE_BE	21
+    #define RST_BUS_SAT		27
+
+    display-backend@1e60000 {
+        compatible = "allwinner,sun8i-a33-display-backend";
+        reg = <0x01e60000 0x10000>, <0x01e80000 0x1000>;
+        reg-names = "be", "sat";
+        interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_BUS_DE_BE>, <&ccu CLK_DE_BE>,
+                 <&ccu CLK_DRAM_DE_BE>, <&ccu CLK_BUS_SAT>;
+        clock-names = "ahb", "mod",
+                      "ram", "sat";
+        resets = <&ccu RST_BUS_DE_BE>, <&ccu RST_BUS_SAT>;
+        reset-names = "be", "sat";
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                reg = <0>;
+
+                endpoint {
+                    remote-endpoint = <&fe0_out_be0>;
+                };
+            };
+
+            port@1 {
+                reg = <1>;
+
+                endpoint {
+                    remote-endpoint = <&drc0_in_be0>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml
new file mode 100644
index 000000000000..944ff2f1cf93
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-display-engine.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Display Engine Pipeline Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description: |
+  The display engine pipeline (and its entry point, since it can be
+  either directly the backend or the frontend) is represented as an
+  extra node.
+
+  The Allwinner A10 Display pipeline is composed of several components
+  that are going to be documented below:
+
+  For all connections between components up to the TCONs in the
+  display pipeline, when there are multiple components of the same
+  type at the same depth, the local endpoint ID must be the same as
+  the remote component's index. For example, if the remote endpoint is
+  Frontend 1, then the local endpoint ID must be 1.
+
+  Frontend 0  [0] ------- [0]  Backend 0  [0] ------- [0]  TCON 0
+              [1] --   -- [1]             [1] --   -- [1]
+                    \ /                         \ /
+                     X                           X
+                    / \                         / \
+              [0] --   -- [0]             [0] --   -- [0]
+  Frontend 1  [1] ------- [1]  Backend 1  [1] ------- [1]  TCON 1
+
+  For a two pipeline system such as the one depicted above, the lines
+  represent the connections between the components, while the numbers
+  within the square brackets corresponds to the ID of the local endpoint.
+
+  The same rule also applies to DE 2.0 mixer-TCON connections:
+
+  Mixer 0  [0] ----------- [0]  TCON 0
+           [1] ----   ---- [1]
+                   \ /
+                    X
+                   / \
+           [0] ----   ---- [0]
+  Mixer 1  [1] ----------- [1]  TCON 1
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-display-engine
+      - allwinner,sun5i-a10s-display-engine
+      - allwinner,sun5i-a13-display-engine
+      - allwinner,sun6i-a31-display-engine
+      - allwinner,sun6i-a31s-display-engine
+      - allwinner,sun7i-a20-display-engine
+      - allwinner,sun8i-a23-display-engine
+      - allwinner,sun8i-a33-display-engine
+      - allwinner,sun8i-a83t-display-engine
+      - allwinner,sun8i-h3-display-engine
+      - allwinner,sun8i-r40-display-engine
+      - allwinner,sun8i-v3s-display-engine
+      - allwinner,sun9i-a80-display-engine
+      - allwinner,sun50i-a64-display-engine
+      - allwinner,sun50i-h6-display-engine
+
+  allwinner,pipelines:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/phandle-array
+      - minItems: 1
+        maxItems: 2
+    description: |
+      Available display engine frontends (DE 1.0) or mixers (DE
+      2.0/3.0) available.
+
+required:
+  - compatible
+  - allwinner,pipelines
+
+additionalProperties: false
+
+if:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - allwinner,sun4i-a10-display-engine
+          - allwinner,sun6i-a31-display-engine
+          - allwinner,sun6i-a31s-display-engine
+          - allwinner,sun7i-a20-display-engine
+          - allwinner,sun8i-a83t-display-engine
+          - allwinner,sun8i-r40-display-engine
+          - allwinner,sun9i-a80-display-engine
+          - allwinner,sun50i-a64-display-engine
+
+then:
+  properties:
+    allwinner,pipelines:
+      minItems: 2
+
+else:
+  properties:
+    allwinner,pipelines:
+      maxItems: 1
+
+examples:
+  - |
+      de: display-engine {
+          compatible = "allwinner,sun4i-a10-display-engine";
+          allwinner,pipelines = <&fe0>, <&fe1>;
+      };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-frontend.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-frontend.yaml
new file mode 100644
index 000000000000..3eb1c2bbf4e7
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-frontend.yaml
@@ -0,0 +1,138 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-display-frontend.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Display Engine Frontend Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description: |
+  The display engine frontend does formats conversion, scaling,
+  deinterlacing and color space conversion.
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-display-frontend
+      - allwinner,sun5i-a13-display-frontend
+      - allwinner,sun6i-a31-display-frontend
+      - allwinner,sun7i-a20-display-frontend
+      - allwinner,sun8i-a23-display-frontend
+      - allwinner,sun8i-a33-display-frontend
+      - allwinner,sun9i-a80-display-frontend
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: The frontend interface clock
+      - description: The frontend module clock
+      - description: The frontend DRAM clock
+
+  clock-names:
+    items:
+      - const: ahb
+      - const: mod
+      - const: ram
+
+  # FIXME: This should be made required eventually once every SoC will
+  # have the MBUS declared.
+  interconnects:
+    maxItems: 1
+
+  # FIXME: This should be made required eventually once every SoC will
+  # have the MBUS declared.
+  interconnect-names:
+    const: dma-mem
+
+  resets:
+    maxItems: 1
+
+  ports:
+    type: object
+    description: |
+      A ports node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      port@0:
+        type: object
+        description: |
+          Input endpoints of the controller.
+
+      port@1:
+        type: object
+        description: |
+          Output endpoints of the controller.
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+      - port@1
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/sun4i-a10-ccu.h>
+    #include <dt-bindings/reset/sun4i-a10-ccu.h>
+
+    fe0: display-frontend@1e00000 {
+        compatible = "allwinner,sun4i-a10-display-frontend";
+        reg = <0x01e00000 0x20000>;
+        interrupts = <47>;
+        clocks = <&ccu CLK_AHB_DE_FE0>, <&ccu CLK_DE_FE0>,
+                 <&ccu CLK_DRAM_DE_FE0>;
+        clock-names = "ahb", "mod",
+                      "ram";
+        resets = <&ccu RST_DE_FE0>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            fe0_out: port@1 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <1>;
+
+                fe0_out_be0: endpoint@0 {
+                    reg = <0>;
+                    remote-endpoint = <&be0_in_fe0>;
+                };
+
+                fe0_out_be1: endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&be1_in_fe0>;
+                };
+            };
+        };
+    };
+
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-hdmi.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-hdmi.yaml
new file mode 100644
index 000000000000..5d4915aed1e2
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-hdmi.yaml
@@ -0,0 +1,183 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 HDMI Controller Device Tree Bindings
+
+description: |
+  The HDMI Encoder supports the HDMI video and audio outputs, and does
+  CEC. It is one end of the pipeline.
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-hdmi
+      - const: allwinner,sun5i-a10s-hdmi
+      - const: allwinner,sun6i-a31-hdmi
+      - items:
+        - const: allwinner,sun7i-a20-hdmi
+        - const: allwinner,sun5i-a10s-hdmi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    oneOf:
+      - items:
+        - description: The HDMI interface clock
+        - description: The HDMI module clock
+        - description: The first video PLL
+        - description: The second video PLL
+
+      - items:
+        - description: The HDMI interface clock
+        - description: The HDMI module clock
+        - description: The HDMI DDC clock
+        - description: The first video PLL
+        - description: The second video PLL
+
+  clock-names:
+    oneOf:
+      - items:
+        - const: ahb
+        - const: mod
+        - const: pll-0
+        - const: pll-1
+
+      - items:
+        - const: ahb
+        - const: mod
+        - const: ddc
+        - const: pll-0
+        - const: pll-1
+
+  resets:
+    maxItems: 1
+
+  dmas:
+    items:
+      - description: DDC Transmission DMA Channel
+      - description: DDC Reception DMA Channel
+      - description: Audio Transmission DMA Channel
+
+  dma-names:
+    items:
+      - const: ddc-tx
+      - const: ddc-rx
+      - const: audio-tx
+
+  ports:
+    type: object
+    description: |
+      A ports node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      port@0:
+        type: object
+        description: |
+          Input endpoints of the controller.
+
+      port@1:
+        type: object
+        description: |
+          Output endpoints of the controller. Usually an HDMI
+          connector.
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+      - port@0
+      - port@1
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - dmas
+  - dma-names
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: allwinner,sun6i-a31-hdmi
+
+then:
+  properties:
+    clocks:
+      minItems: 5
+
+    clock-names:
+      minItems: 5
+
+  required:
+    - resets
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/sun4i-a10-ccu.h>
+    #include <dt-bindings/dma/sun4i-a10.h>
+    #include <dt-bindings/reset/sun4i-a10-ccu.h>
+
+    hdmi: hdmi@1c16000 {
+        compatible = "allwinner,sun4i-a10-hdmi";
+        reg = <0x01c16000 0x1000>;
+        interrupts = <58>;
+        clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
+                 <&ccu CLK_PLL_VIDEO0_2X>,
+                 <&ccu CLK_PLL_VIDEO1_2X>;
+        clock-names = "ahb", "mod", "pll-0", "pll-1";
+        dmas = <&dma SUN4I_DMA_NORMAL 16>,
+               <&dma SUN4I_DMA_NORMAL 16>,
+               <&dma SUN4I_DMA_DEDICATED 24>;
+        dma-names = "ddc-tx", "ddc-rx", "audio-tx";
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            hdmi_in: port@0 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <0>;
+
+                hdmi_in_tcon0: endpoint@0 {
+                    reg = <0>;
+                    remote-endpoint = <&tcon0_out_hdmi>;
+                };
+
+                hdmi_in_tcon1: endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&tcon1_out_hdmi>;
+                };
+            };
+
+            hdmi_out: port@1 {
+                reg = <1>;
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml
new file mode 100644
index 000000000000..86ad617d2327
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml
@@ -0,0 +1,676 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-tcon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Timings Controller (TCON) Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description: |
+  The TCON acts as a timing controller for RGB, LVDS and TV
+  interfaces.
+
+properties:
+  "#clock-cells":
+    const: 0
+
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-tcon
+      - const: allwinner,sun5i-a13-tcon
+      - const: allwinner,sun6i-a31-tcon
+      - const: allwinner,sun6i-a31s-tcon
+      - const: allwinner,sun7i-a20-tcon
+      - const: allwinner,sun8i-a23-tcon
+      - const: allwinner,sun8i-a33-tcon
+      - const: allwinner,sun8i-a83t-tcon-lcd
+      - const: allwinner,sun8i-a83t-tcon-tv
+      - const: allwinner,sun8i-r40-tcon-tv
+      - const: allwinner,sun8i-v3s-tcon
+      - const: allwinner,sun9i-a80-tcon-lcd
+      - const: allwinner,sun9i-a80-tcon-tv
+
+      - items:
+        - enum:
+            - allwinner,sun50i-a64-tcon-lcd
+        - const: allwinner,sun8i-a83t-tcon-lcd
+
+      - items:
+        - enum:
+          - allwinner,sun8i-h3-tcon-tv
+          - allwinner,sun50i-a64-tcon-tv
+          - allwinner,sun50i-h6-tcon-tv
+        - const: allwinner,sun8i-a83t-tcon-tv
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    maxItems: 4
+
+  clock-names:
+    minItems: 1
+    maxItems: 4
+
+  clock-output-names:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/string-array
+      - maxItems: 1
+    description:
+      Name of the LCD pixel clock created.
+
+  dmas:
+    maxItems: 1
+
+  resets:
+    anyOf:
+      - items:
+        - description: TCON Reset Line
+
+      - items:
+        - description: TCON Reset Line
+        - description: TCON LVDS Reset Line
+
+      - items:
+        - description: TCON Reset Line
+        - description: TCON eDP Reset Line
+
+      - items:
+        - description: TCON Reset Line
+        - description: TCON eDP Reset Line
+        - description: TCON LVDS Reset Line
+
+  reset-names:
+    oneOf:
+      - const: lcd
+
+      - items:
+        - const: lcd
+        - const: lvds
+
+      - items:
+        - const: lcd
+        - const: edp
+
+      - items:
+        - const: lcd
+        - const: edp
+        - const: lvds
+
+  ports:
+    type: object
+    description: |
+      A ports node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      port@0:
+        type: object
+        description: |
+          Input endpoints of the controller.
+
+      port@1:
+        type: object
+        description: |
+          Output endpoints of the controller.
+
+        patternProperties:
+          "^endpoint(@[0-9])$":
+            type: object
+
+            properties:
+              allwinner,tcon-channel:
+                $ref: /schemas/types.yaml#/definitions/uint32
+                description: |
+                  TCON can have 1 or 2 channels, usually with the
+                  first channel being used for the panels interfaces
+                  (RGB, LVDS, etc.), and the second being used for the
+                  outputs that require another controller (TV Encoder,
+                  HDMI, etc.).
+
+                  If that property is present, specifies the TCON
+                  channel the endpoint is associated to. If that
+                  property is not present, the endpoint number will be
+                  used as the channel number.
+
+            unevaluatedProperties: true
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+      - port@0
+      - port@1
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - ports
+
+additionalProperties: false
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun4i-a10-tcon
+              - allwinner,sun5i-a13-tcon
+              - allwinner,sun7i-a20-tcon
+
+    then:
+      properties:
+        clocks:
+          minItems: 3
+
+        clock-names:
+          items:
+            - const: ahb
+            - const: tcon-ch0
+            - const: tcon-ch1
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun6i-a31-tcon
+              - allwinner,sun6i-a31s-tcon
+
+    then:
+      properties:
+        clocks:
+          minItems: 4
+
+        clock-names:
+          items:
+            - const: ahb
+            - const: tcon-ch0
+            - const: tcon-ch1
+            - const: lvds-alt
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun8i-a23-tcon
+              - allwinner,sun8i-a33-tcon
+
+    then:
+      properties:
+        clocks:
+          minItems: 3
+
+        clock-names:
+          items:
+            - const: ahb
+            - const: tcon-ch0
+            - const: lvds-alt
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun8i-a83t-tcon-lcd
+              - allwinner,sun8i-v3s-tcon
+              - allwinner,sun9i-a80-tcon-lcd
+
+    then:
+      properties:
+        clocks:
+          minItems: 2
+
+        clock-names:
+          items:
+            - const: ahb
+            - const: tcon-ch0
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun8i-a83t-tcon-tv
+              - allwinner,sun8i-r40-tcon-tv
+              - allwinner,sun9i-a80-tcon-tv
+
+    then:
+      properties:
+        clocks:
+          minItems: 2
+
+        clock-names:
+          items:
+            - const: ahb
+            - const: tcon-ch1
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun5i-a13-tcon
+              - allwinner,sun6i-a31-tcon
+              - allwinner,sun6i-a31s-tcon
+              - allwinner,sun7i-a20-tcon
+              - allwinner,sun8i-a23-tcon
+              - allwinner,sun8i-a33-tcon
+              - allwinner,sun8i-v3s-tcon
+              - allwinner,sun9i-a80-tcon-lcd
+              - allwinner,sun4i-a10-tcon
+              - allwinner,sun8i-a83t-tcon-lcd
+
+    then:
+      required:
+        - "#clock-cells"
+        - clock-output-names
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun6i-a31-tcon
+              - allwinner,sun6i-a31s-tcon
+              - allwinner,sun8i-a23-tcon
+              - allwinner,sun8i-a33-tcon
+              - allwinner,sun8i-a83t-tcon-lcd
+
+    then:
+      properties:
+        resets:
+          minItems: 2
+
+        reset-names:
+          items:
+            - const: lcd
+            - const: lvds
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun9i-a80-tcon-lcd
+
+    then:
+      properties:
+        resets:
+          minItems: 3
+
+        reset-names:
+          items:
+            - const: lcd
+            - const: edp
+            - const: lvds
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun9i-a80-tcon-tv
+
+    then:
+      properties:
+        resets:
+          minItems: 2
+
+        reset-names:
+          items:
+            - const: lcd
+            - const: edp
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun4i-a10-tcon
+              - allwinner,sun5i-a13-tcon
+              - allwinner,sun6i-a31-tcon
+              - allwinner,sun6i-a31s-tcon
+              - allwinner,sun7i-a20-tcon
+              - allwinner,sun8i-a23-tcon
+              - allwinner,sun8i-a33-tcon
+
+    then:
+      required:
+        - dmas
+
+examples:
+  - |
+    #include <dt-bindings/dma/sun4i-a10.h>
+
+    /*
+     * This comes from the clock/sun4i-a10-ccu.h and
+     * reset/sun4i-a10-ccu.h headers, but we can't include them since
+     * it would trigger a bunch of warnings for redefinitions of
+     * symbols with the other example.
+     */
+
+    #define CLK_AHB_LCD0	56
+    #define CLK_TCON0_CH0	149
+    #define CLK_TCON0_CH1	155
+    #define RST_TCON0		11
+
+    lcd-controller@1c0c000 {
+        compatible = "allwinner,sun4i-a10-tcon";
+        reg = <0x01c0c000 0x1000>;
+        interrupts = <44>;
+        resets = <&ccu RST_TCON0>;
+        reset-names = "lcd";
+        clocks = <&ccu CLK_AHB_LCD0>,
+                 <&ccu CLK_TCON0_CH0>,
+                 <&ccu CLK_TCON0_CH1>;
+        clock-names = "ahb",
+                      "tcon-ch0",
+                      "tcon-ch1";
+        clock-output-names = "tcon0-pixel-clock";
+        #clock-cells = <0>;
+        dmas = <&dma SUN4I_DMA_DEDICATED 14>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <0>;
+
+                endpoint@0 {
+                    reg = <0>;
+                    remote-endpoint = <&be0_out_tcon0>;
+                };
+
+                endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&be1_out_tcon0>;
+                };
+            };
+
+            port@1 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <1>;
+
+                endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&hdmi_in_tcon0>;
+                    allwinner,tcon-channel = <1>;
+                };
+            };
+        };
+    };
+
+    #undef CLK_AHB_LCD0
+    #undef CLK_TCON0_CH0
+    #undef CLK_TCON0_CH1
+    #undef RST_TCON0
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    /*
+     * This comes from the clock/sun6i-a31-ccu.h and
+     * reset/sun6i-a31-ccu.h headers, but we can't include them since
+     * it would trigger a bunch of warnings for redefinitions of
+     * symbols with the other example.
+     */
+
+    #define CLK_PLL_MIPI	15
+    #define CLK_AHB1_LCD0	47
+    #define CLK_LCD0_CH0	127
+    #define CLK_LCD0_CH1	129
+    #define RST_AHB1_LCD0	27
+    #define RST_AHB1_LVDS	41
+
+    lcd-controller@1c0c000 {
+        compatible = "allwinner,sun6i-a31-tcon";
+        reg = <0x01c0c000 0x1000>;
+        interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+        dmas = <&dma 11>;
+        resets = <&ccu RST_AHB1_LCD0>, <&ccu RST_AHB1_LVDS>;
+        reset-names = "lcd", "lvds";
+        clocks = <&ccu CLK_AHB1_LCD0>,
+                 <&ccu CLK_LCD0_CH0>,
+                 <&ccu CLK_LCD0_CH1>,
+                 <&ccu CLK_PLL_MIPI>;
+        clock-names = "ahb",
+                      "tcon-ch0",
+                      "tcon-ch1",
+                      "lvds-alt";
+        clock-output-names = "tcon0-pixel-clock";
+        #clock-cells = <0>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <0>;
+
+                endpoint@0 {
+                    reg = <0>;
+                    remote-endpoint = <&drc0_out_tcon0>;
+                };
+
+                endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&drc1_out_tcon0>;
+                };
+            };
+
+            port@1 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <1>;
+
+                endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&hdmi_in_tcon0>;
+                    allwinner,tcon-channel = <1>;
+                };
+            };
+        };
+    };
+
+    #undef CLK_PLL_MIPI
+    #undef CLK_AHB1_LCD0
+    #undef CLK_LCD0_CH0
+    #undef CLK_LCD0_CH1
+    #undef RST_AHB1_LCD0
+    #undef RST_AHB1_LVDS
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    /*
+     * This comes from the clock/sun9i-a80-ccu.h and
+     * reset/sun9i-a80-ccu.h headers, but we can't include them since
+     * it would trigger a bunch of warnings for redefinitions of
+     * symbols with the other example.
+     */
+
+    #define CLK_BUS_LCD0	102
+    #define CLK_LCD0		58
+    #define RST_BUS_LCD0	22
+    #define RST_BUS_EDP		24
+    #define RST_BUS_LVDS	25
+
+    lcd-controller@3c00000 {
+        compatible = "allwinner,sun9i-a80-tcon-lcd";
+        reg = <0x03c00000 0x10000>;
+        interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_BUS_LCD0>, <&ccu CLK_LCD0>;
+        clock-names = "ahb", "tcon-ch0";
+        resets = <&ccu RST_BUS_LCD0>, <&ccu RST_BUS_EDP>, <&ccu RST_BUS_LVDS>;
+        reset-names = "lcd", "edp", "lvds";
+        clock-output-names = "tcon0-pixel-clock";
+        #clock-cells = <0>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                reg = <0>;
+
+                endpoint {
+                    remote-endpoint = <&drc0_out_tcon0>;
+                };
+            };
+
+            port@1 {
+                reg = <1>;
+            };
+        };
+    };
+
+    #undef CLK_BUS_TCON0
+    #undef CLK_TCON0
+    #undef RST_BUS_TCON0
+    #undef RST_BUS_EDP
+    #undef RST_BUS_LVDS
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    /*
+     * This comes from the clock/sun8i-a83t-ccu.h and
+     * reset/sun8i-a83t-ccu.h headers, but we can't include them since
+     * it would trigger a bunch of warnings for redefinitions of
+     * symbols with the other example.
+     */
+
+    #define CLK_BUS_TCON0	36
+    #define CLK_TCON0		85
+    #define RST_BUS_TCON0	22
+    #define RST_BUS_LVDS	31
+
+    lcd-controller@1c0c000 {
+        compatible = "allwinner,sun8i-a83t-tcon-lcd";
+        reg = <0x01c0c000 0x1000>;
+        interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_BUS_TCON0>, <&ccu CLK_TCON0>;
+        clock-names = "ahb", "tcon-ch0";
+        clock-output-names = "tcon-pixel-clock";
+        #clock-cells = <0>;
+        resets = <&ccu RST_BUS_TCON0>, <&ccu RST_BUS_LVDS>;
+        reset-names = "lcd", "lvds";
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <0>;
+
+                endpoint@0 {
+                    reg = <0>;
+                    remote-endpoint = <&mixer0_out_tcon0>;
+                };
+
+                endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&mixer1_out_tcon0>;
+                };
+            };
+
+            port@1 {
+                reg = <1>;
+            };
+        };
+    };
+
+    #undef CLK_BUS_TCON0
+    #undef CLK_TCON0
+    #undef RST_BUS_TCON0
+    #undef RST_BUS_LVDS
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    /*
+     * This comes from the clock/sun8i-r40-ccu.h and
+     * reset/sun8i-r40-ccu.h headers, but we can't include them since
+     * it would trigger a bunch of warnings for redefinitions of
+     * symbols with the other example.
+     */
+
+    #define CLK_BUS_TCON_TV0	73
+    #define RST_BUS_TCON_TV0	49
+
+    tcon_tv0: lcd-controller@1c73000 {
+        compatible = "allwinner,sun8i-r40-tcon-tv";
+        reg = <0x01c73000 0x1000>;
+        interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_BUS_TCON_TV0>, <&tcon_top 0>;
+        clock-names = "ahb", "tcon-ch1";
+        resets = <&ccu RST_BUS_TCON_TV0>;
+        reset-names = "lcd";
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <0>;
+
+                endpoint@0 {
+                    reg = <0>;
+                    remote-endpoint = <&tcon_top_mixer0_out_tcon_tv0>;
+                };
+
+                endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&tcon_top_mixer1_out_tcon_tv0>;
+                };
+            };
+
+            tcon_tv0_out: port@1 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <1>;
+
+                endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&tcon_top_hdmi_in_tcon_tv0>;
+                };
+            };
+        };
+    };
+
+    #undef CLK_BUS_TCON_TV0
+    #undef RST_BUS_TCON_TV0
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml
new file mode 100644
index 000000000000..5d5d39665119
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-tv-encoder.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 TV Encoder Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  compatible:
+    const: allwinner,sun4i-a10-tv-encoder
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  port:
+    type: object
+    description:
+      A port node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt. The
+      first port should be the input endpoint, usually coming from the
+      associated TCON.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - resets
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    tve0: tv-encoder@1c0a000 {
+        compatible = "allwinner,sun4i-a10-tv-encoder";
+        reg = <0x01c0a000 0x1000>;
+        clocks = <&ahb_gates 34>;
+        resets = <&tcon_ch0_clk 0>;
+
+        port {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            tve0_in_tcon0: endpoint@0 {
+                reg = <0>;
+                remote-endpoint = <&tcon0_out_tve0>;
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-drc.yaml b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-drc.yaml
new file mode 100644
index 000000000000..0c1ce55940e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-drc.yaml
@@ -0,0 +1,138 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun6i-a31-drc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 Dynamic Range Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description: |
+  The DRC (Dynamic Range Controller) allows to dynamically adjust
+  pixel brightness/contrast based on histogram measurements for LCD
+  content adaptive backlight control.
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun6i-a31-drc
+      - allwinner,sun6i-a31s-drc
+      - allwinner,sun8i-a23-drc
+      - allwinner,sun8i-a33-drc
+      - allwinner,sun9i-a80-drc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: The DRC interface clock
+      - description: The DRC module clock
+      - description: The DRC DRAM clock
+
+  clock-names:
+    items:
+      - const: ahb
+      - const: mod
+      - const: ram
+
+  resets:
+    maxItems: 1
+
+  ports:
+    type: object
+    description: |
+      A ports node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      port@0:
+        type: object
+        description: |
+          Input endpoints of the controller.
+
+      port@1:
+        type: object
+        description: |
+          Output endpoints of the controller.
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+      - port@0
+      - port@1
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    #include <dt-bindings/clock/sun6i-a31-ccu.h>
+    #include <dt-bindings/reset/sun6i-a31-ccu.h>
+
+    drc0: drc@1e70000 {
+        compatible = "allwinner,sun6i-a31-drc";
+        reg = <0x01e70000 0x10000>;
+        interrupts = <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_AHB1_DRC0>, <&ccu CLK_IEP_DRC0>,
+                 <&ccu CLK_DRAM_DRC0>;
+        clock-names = "ahb", "mod",
+                      "ram";
+        resets = <&ccu RST_AHB1_DRC0>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            drc0_in: port@0 {
+                reg = <0>;
+
+                drc0_in_be0: endpoint {
+                    remote-endpoint = <&be0_out_drc0>;
+                };
+            };
+
+            drc0_out: port@1 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <1>;
+
+                drc0_out_tcon0: endpoint@0 {
+                    reg = <0>;
+                    remote-endpoint = <&tcon0_in_drc0>;
+                };
+
+                drc0_out_tcon1: endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&tcon1_in_drc0>;
+                };
+            };
+        };
+    };
+
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
index 47950fced28d..9e90c2b00960 100644
--- a/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
@@ -8,14 +8,16 @@ title: Allwinner A31 MIPI-DSI Controller Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#address-cells": true
   "#size-cells": true
 
   compatible:
-    const: allwinner,sun6i-a31-mipi-dsi
+    enum:
+      - allwinner,sun6i-a31-mipi-dsi
+      - allwinner,sun50i-a64-mipi-dsi
 
   reg:
     maxItems: 1
@@ -24,6 +26,8 @@ properties:
     maxItems: 1
 
   clocks:
+    minItems: 1
+    maxItems: 2
     items:
       - description: Bus Clock
       - description: Module Clock
@@ -36,6 +40,9 @@ properties:
   resets:
     maxItems: 1
 
+  vcc-dsi-supply:
+    description: VCC-DSI power supply of the DSI encoder
+
   phys:
     maxItems: 1
 
@@ -60,12 +67,38 @@ required:
   - reg
   - interrupts
   - clocks
-  - clock-names
   - phys
   - phy-names
   - resets
+  - vcc-dsi-supply
   - port
 
+allOf:
+  - if:
+      properties:
+         compatible:
+           contains:
+             const: allwinner,sun6i-a31-mipi-dsi
+
+    then:
+        properties:
+          clocks:
+            minItems: 2
+
+        required:
+          - clock-names
+
+  - if:
+      properties:
+         compatible:
+           contains:
+             const: allwinner,sun50i-a64-mipi-dsi
+
+    then:
+        properties:
+          clocks:
+            minItems: 1
+
 additionalProperties: false
 
 examples:
@@ -79,6 +112,7 @@ examples:
         resets = <&ccu 4>;
         phys = <&dphy0>;
         phy-names = "dphy";
+        vcc-dsi-supply = <&reg_dcdc1>;
         #address-cells = <1>;
         #size-cells = <0>;
 
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-de2-mixer.yaml b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-de2-mixer.yaml
new file mode 100644
index 000000000000..1dee641e3ea1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-de2-mixer.yaml
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun8i-a83t-de2-mixer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner Display Engine 2.0 Mixer Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun8i-a83t-de2-mixer-0
+      - allwinner,sun8i-a83t-de2-mixer-1
+      - allwinner,sun8i-h3-de2-mixer-0
+      - allwinner,sun8i-r40-de2-mixer-0
+      - allwinner,sun8i-r40-de2-mixer-1
+      - allwinner,sun8i-v3s-de2-mixer
+      - allwinner,sun50i-a64-de2-mixer-0
+      - allwinner,sun50i-a64-de2-mixer-1
+      - allwinner,sun50i-h6-de3-mixer-0
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: The mixer interface clock
+      - description: The mixer module clock
+
+  clock-names:
+    items:
+      - const: bus
+      - const: mod
+
+  resets:
+    maxItems: 1
+
+  ports:
+    type: object
+    description: |
+      A ports node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      port@0:
+        type: object
+        description: |
+          Input endpoints of the controller.
+
+      port@1:
+        type: object
+        description: |
+          Output endpoints of the controller.
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+      - port@1
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/sun8i-de2.h>
+    #include <dt-bindings/reset/sun8i-de2.h>
+
+    mixer0: mixer@1100000 {
+        compatible = "allwinner,sun8i-a83t-de2-mixer-0";
+        reg = <0x01100000 0x100000>;
+        clocks = <&display_clocks CLK_BUS_MIXER0>,
+                 <&display_clocks CLK_MIXER0>;
+        clock-names = "bus",
+                      "mod";
+        resets = <&display_clocks RST_MIXER0>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            mixer0_out: port@1 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <1>;
+
+                mixer0_out_tcon0: endpoint@0 {
+                    reg = <0>;
+                    remote-endpoint = <&tcon0_in_mixer0>;
+                };
+
+                mixer0_out_tcon1: endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&tcon1_in_mixer0>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-dw-hdmi.yaml b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-dw-hdmi.yaml
new file mode 100644
index 000000000000..4d6795690ac3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-dw-hdmi.yaml
@@ -0,0 +1,273 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun8i-a83t-dw-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A83t DWC HDMI TX Encoder Device Tree Bindings
+
+description: |
+  The HDMI transmitter is a Synopsys DesignWare HDMI 1.4 TX controller
+  IP with Allwinner\'s own PHY IP. It supports audio and video outputs
+  and CEC.
+
+  These DT bindings follow the Synopsys DWC HDMI TX bindings defined
+  in Documentation/devicetree/bindings/display/bridge/dw_hdmi.txt with
+  the following device-specific properties.
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 0
+
+  compatible:
+    oneOf:
+      - const: allwinner,sun8i-a83t-dw-hdmi
+      - const: allwinner,sun50i-h6-dw-hdmi
+
+      - items:
+        - enum:
+          - allwinner,sun8i-h3-dw-hdmi
+          - allwinner,sun8i-r40-dw-hdmi
+          - allwinner,sun50i-a64-dw-hdmi
+        - const: allwinner,sun8i-a83t-dw-hdmi
+
+  reg:
+    maxItems: 1
+
+  reg-io-width:
+    const: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 3
+    maxItems: 6
+    items:
+      - description: Bus Clock
+      - description: Register Clock
+      - description: TMDS Clock
+      - description: HDMI CEC Clock
+      - description: HDCP Clock
+      - description: HDCP Bus Clock
+
+  clock-names:
+    minItems: 3
+    maxItems: 6
+    items:
+      - const: iahb
+      - const: isfr
+      - const: tmds
+      - const: cec
+      - const: hdcp
+      - const: hdcp-bus
+
+  resets:
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: HDMI Controller Reset
+      - description: HDCP Reset
+
+  reset-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      - const: ctrl
+      - const: hdcp
+
+  phys:
+    maxItems: 1
+    description:
+      Phandle to the DWC HDMI PHY.
+
+  phy-names:
+    const: phy
+
+  hvcc-supply:
+    description:
+      The VCC power supply of the controller
+
+  ports:
+    type: object
+    description: |
+      A ports node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      port@0:
+        type: object
+        description: |
+          Input endpoints of the controller. Usually the associated
+          TCON.
+
+      port@1:
+        type: object
+        description: |
+          Output endpoints of the controller. Usually an HDMI
+          connector.
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+      - port@0
+      - port@1
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - reg-io-width
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - phys
+  - phy-names
+  - ports
+
+if:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - allwinner,sun50i-h6-dw-hdmi
+
+then:
+  properties:
+    clocks:
+      minItems: 6
+
+    clock-names:
+      minItems: 6
+
+    resets:
+      minItems: 2
+
+    reset-names:
+      minItems: 2
+
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    /*
+     * This comes from the clock/sun8i-a83t-ccu.h and
+     * reset/sun8i-a83t-ccu.h headers, but we can't include them since
+     * it would trigger a bunch of warnings for redefinitions of
+     * symbols with the other example.
+     */
+    #define CLK_BUS_HDMI	39
+    #define CLK_HDMI		93
+    #define CLK_HDMI_SLOW	94
+    #define RST_BUS_HDMI1	26
+
+    hdmi@1ee0000 {
+        compatible = "allwinner,sun8i-a83t-dw-hdmi";
+        reg = <0x01ee0000 0x10000>;
+        reg-io-width = <1>;
+        interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_BUS_HDMI>, <&ccu CLK_HDMI_SLOW>,
+                 <&ccu CLK_HDMI>;
+        clock-names = "iahb", "isfr", "tmds";
+        resets = <&ccu RST_BUS_HDMI1>;
+        reset-names = "ctrl";
+        phys = <&hdmi_phy>;
+        phy-names = "phy";
+        pinctrl-names = "default";
+        pinctrl-0 = <&hdmi_pins>;
+        status = "disabled";
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                reg = <0>;
+
+                endpoint {
+                    remote-endpoint = <&tcon1_out_hdmi>;
+                };
+            };
+
+            port@1 {
+                reg = <1>;
+            };
+        };
+    };
+
+    /* Cleanup after ourselves */
+    #undef CLK_BUS_HDMI
+    #undef CLK_HDMI
+    #undef CLK_HDMI_SLOW
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    /*
+     * This comes from the clock/sun50i-h6-ccu.h and
+     * reset/sun50i-h6-ccu.h headers, but we can't include them since
+     * it would trigger a bunch of warnings for redefinitions of
+     * symbols with the other example.
+     */
+    #define CLK_BUS_HDMI	126
+    #define CLK_BUS_HDCP	137
+    #define CLK_HDMI		123
+    #define CLK_HDMI_SLOW	124
+    #define CLK_HDMI_CEC	125
+    #define CLK_HDCP		136
+    #define RST_BUS_HDMI_SUB	57
+    #define RST_BUS_HDCP	62
+
+    hdmi@6000000 {
+        compatible = "allwinner,sun50i-h6-dw-hdmi";
+        reg = <0x06000000 0x10000>;
+        reg-io-width = <1>;
+        interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_BUS_HDMI>, <&ccu CLK_HDMI_SLOW>,
+                 <&ccu CLK_HDMI>, <&ccu CLK_HDMI_CEC>,
+                 <&ccu CLK_HDCP>, <&ccu CLK_BUS_HDCP>;
+        clock-names = "iahb", "isfr", "tmds", "cec", "hdcp",
+                      "hdcp-bus";
+        resets = <&ccu RST_BUS_HDMI_SUB>, <&ccu RST_BUS_HDCP>;
+        reset-names = "ctrl", "hdcp";
+        phys = <&hdmi_phy>;
+        phy-names = "phy";
+        pinctrl-names = "default";
+        pinctrl-0 = <&hdmi_pins>;
+        status = "disabled";
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                reg = <0>;
+
+                endpoint {
+                    remote-endpoint = <&tcon_top_hdmi_out_hdmi>;
+                };
+            };
+
+            port@1 {
+                reg = <1>;
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-hdmi-phy.yaml b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-hdmi-phy.yaml
new file mode 100644
index 000000000000..501cec16168c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-hdmi-phy.yaml
@@ -0,0 +1,117 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun8i-a83t-hdmi-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A83t HDMI PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 0
+
+  compatible:
+    enum:
+      - allwinner,sun8i-a83t-hdmi-phy
+      - allwinner,sun8i-h3-hdmi-phy
+      - allwinner,sun8i-r40-hdmi-phy
+      - allwinner,sun50i-a64-hdmi-phy
+      - allwinner,sun50i-h6-hdmi-phy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 4
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+      - description: Parent of the PHY clock
+      - description: Second possible parent of the PHY clock
+
+  clock-names:
+    minItems: 2
+    maxItems: 4
+    items:
+      - const: bus
+      - const: mod
+      - const: pll-0
+      - const: pll-1
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    const: phy
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+
+if:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - allwinner,sun8i-r40-hdmi-phy
+
+then:
+  properties:
+    clocks:
+      minItems: 4
+
+    clock-names:
+      minItems: 4
+
+else:
+  if:
+    properties:
+      compatible:
+        contains:
+          enum:
+            - allwinner,sun8i-h3-hdmi-phy
+            - allwinner,sun50i-a64-hdmi-phy
+
+  then:
+    properties:
+      clocks:
+        minItems: 3
+
+      clock-names:
+        minItems: 3
+
+  else:
+    properties:
+      clocks:
+        maxItems: 2
+
+      clock-names:
+        maxItems: 2
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/sun8i-a83t-ccu.h>
+    #include <dt-bindings/reset/sun8i-a83t-ccu.h>
+
+    hdmi_phy: hdmi-phy@1ef0000 {
+        compatible = "allwinner,sun8i-a83t-hdmi-phy";
+        reg = <0x01ef0000 0x10000>;
+        clocks = <&ccu CLK_BUS_HDMI>, <&ccu CLK_HDMI_SLOW>;
+        clock-names = "bus", "mod";
+        resets = <&ccu RST_BUS_HDMI0>;
+        reset-names = "phy";
+        #phy-cells = <0>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun8i-r40-tcon-top.yaml b/Documentation/devicetree/bindings/display/allwinner,sun8i-r40-tcon-top.yaml
new file mode 100644
index 000000000000..b98ca609824b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun8i-r40-tcon-top.yaml
@@ -0,0 +1,382 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun8i-r40-tcon-top.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner R40 TCON TOP Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description: |
+  TCON TOPs main purpose is to configure whole display pipeline. It
+  determines relationships between mixers and TCONs, selects source
+  TCON for HDMI, muxes LCD and TV encoder GPIO output, selects TV
+  encoder clock source and contains additional TV TCON and DSI gates.
+
+  It allows display pipeline to be configured in very different ways:
+
+                                  / LCD0/LVDS0
+                   / [0] TCON-LCD0
+                   |              \ MIPI DSI
+   mixer0          |
+          \        / [1] TCON-LCD1 - LCD1/LVDS1
+           TCON-TOP
+          /        \ [2] TCON-TV0 [0] - TVE0/RGB
+   mixer1          |                  \
+                   |                   TCON-TOP - HDMI
+                   |                  /
+                   \ [3] TCON-TV1 [1] - TVE1/RGB
+
+  Note that both TCON TOP references same physical unit. Both mixers
+  can be connected to any TCON. Not all TCON TOP variants support all
+  features.
+
+properties:
+  "#clock-cells":
+    const: 1
+
+  compatible:
+    enum:
+      - allwinner,sun8i-r40-tcon-top
+      - allwinner,sun50i-h6-tcon-top
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 6
+    items:
+      - description: The TCON TOP interface clock
+      - description: The TCON TOP TV0 clock
+      - description: The TCON TOP TVE0 clock
+      - description: The TCON TOP TV1 clock
+      - description: The TCON TOP TVE1 clock
+      - description: The TCON TOP MIPI DSI clock
+
+  clock-names:
+    minItems: 2
+    maxItems: 6
+    items:
+      - const: bus
+      - const: tcon-tv0
+      - const: tve0
+      - const: tcon-tv1
+      - const: tve1
+      - const: dsi
+
+  clock-output-names:
+    minItems: 1
+    maxItems: 3
+    description: >
+      The first item is the name of the clock created for the TV0
+      channel, the second item is the name of the TCON TV1 channel
+      clock and the third one is the name of the DSI channel clock.
+
+  resets:
+    maxItems: 1
+
+  ports:
+    type: object
+    description: |
+      A ports node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt.
+      All ports should have only one endpoint connected to
+      remote endpoint.
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      port@0:
+        type: object
+        description: |
+          Input endpoint for Mixer 0 mux.
+
+      port@1:
+        type: object
+        description: |
+          Output endpoint for Mixer 0 mux
+
+        properties:
+          "#address-cells":
+            const: 1
+
+          "#size-cells":
+            const: 0
+
+          reg: true
+
+        patternProperties:
+          "^endpoint@[0-9]$":
+            type: object
+
+            properties:
+              reg:
+                description: |
+                  ID of the target TCON
+
+            required:
+              - reg
+
+        required:
+          - "#address-cells"
+          - "#size-cells"
+
+        additionalProperties: false
+
+      port@2:
+        type: object
+        description: |
+          Input endpoint for Mixer 1 mux.
+
+      port@3:
+        type: object
+        description: |
+          Output endpoint for Mixer 1 mux
+
+        properties:
+          "#address-cells":
+            const: 1
+
+          "#size-cells":
+            const: 0
+
+          reg: true
+
+        patternProperties:
+          "^endpoint@[0-9]$":
+            type: object
+
+            properties:
+              reg:
+                description: |
+                  ID of the target TCON
+
+            required:
+              - reg
+
+        required:
+          - "#address-cells"
+          - "#size-cells"
+
+        additionalProperties: false
+
+      port@4:
+        type: object
+        description: |
+          Input endpoint for HDMI mux.
+
+        properties:
+          "#address-cells":
+            const: 1
+
+          "#size-cells":
+            const: 0
+
+          reg: true
+
+        patternProperties:
+          "^endpoint@[0-9]$":
+            type: object
+
+            properties:
+              reg:
+                description: |
+                  ID of the target TCON
+
+            required:
+              - reg
+
+        required:
+          - "#address-cells"
+          - "#size-cells"
+
+        additionalProperties: false
+
+      port@5:
+        type: object
+        description: |
+          Output endpoint for HDMI mux
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+      - port@0
+      - port@1
+      - port@4
+      - port@5
+
+    additionalProperties: false
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - clock-output-names
+  - resets
+  - ports
+
+additionalProperties: false
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: allwinner,sun50i-h6-tcon-top
+
+then:
+  properties:
+    clocks:
+      maxItems: 2
+
+    clock-output-names:
+      maxItems: 1
+
+else:
+  properties:
+    clocks:
+      minItems: 6
+
+    clock-output-names:
+      minItems: 3
+
+    ports:
+      required:
+        - port@2
+        - port@3
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    #include <dt-bindings/clock/sun8i-r40-ccu.h>
+    #include <dt-bindings/reset/sun8i-r40-ccu.h>
+
+      tcon_top: tcon-top@1c70000 {
+          compatible = "allwinner,sun8i-r40-tcon-top";
+          reg = <0x01c70000 0x1000>;
+          clocks = <&ccu CLK_BUS_TCON_TOP>,
+                   <&ccu CLK_TCON_TV0>,
+                   <&ccu CLK_TVE0>,
+                   <&ccu CLK_TCON_TV1>,
+                   <&ccu CLK_TVE1>,
+                   <&ccu CLK_DSI_DPHY>;
+          clock-names = "bus",
+                        "tcon-tv0",
+                        "tve0",
+                        "tcon-tv1",
+                        "tve1",
+                        "dsi";
+          clock-output-names = "tcon-top-tv0",
+                               "tcon-top-tv1",
+                               "tcon-top-dsi";
+          resets = <&ccu RST_BUS_TCON_TOP>;
+          #clock-cells = <1>;
+
+          ports {
+              #address-cells = <1>;
+              #size-cells = <0>;
+
+              tcon_top_mixer0_in: port@0 {
+                  reg = <0>;
+
+                  tcon_top_mixer0_in_mixer0: endpoint {
+                      remote-endpoint = <&mixer0_out_tcon_top>;
+                  };
+              };
+
+              tcon_top_mixer0_out: port@1 {
+                  #address-cells = <1>;
+                  #size-cells = <0>;
+                  reg = <1>;
+
+                  tcon_top_mixer0_out_tcon_lcd0: endpoint@0 {
+                      reg = <0>;
+                  };
+
+                  tcon_top_mixer0_out_tcon_lcd1: endpoint@1 {
+                      reg = <1>;
+                  };
+
+                  tcon_top_mixer0_out_tcon_tv0: endpoint@2 {
+                      reg = <2>;
+                      remote-endpoint = <&tcon_tv0_in_tcon_top_mixer0>;
+                  };
+
+                  tcon_top_mixer0_out_tcon_tv1: endpoint@3 {
+                      reg = <3>;
+                      remote-endpoint = <&tcon_tv1_in_tcon_top_mixer0>;
+                  };
+              };
+
+              tcon_top_mixer1_in: port@2 {
+                  #address-cells = <1>;
+                  #size-cells = <0>;
+                  reg = <2>;
+
+                  tcon_top_mixer1_in_mixer1: endpoint@1 {
+                      reg = <1>;
+                      remote-endpoint = <&mixer1_out_tcon_top>;
+                  };
+              };
+
+              tcon_top_mixer1_out: port@3 {
+                  #address-cells = <1>;
+                  #size-cells = <0>;
+                  reg = <3>;
+
+                  tcon_top_mixer1_out_tcon_lcd0: endpoint@0 {
+                      reg = <0>;
+                  };
+
+                  tcon_top_mixer1_out_tcon_lcd1: endpoint@1 {
+                      reg = <1>;
+                  };
+
+                  tcon_top_mixer1_out_tcon_tv0: endpoint@2 {
+                      reg = <2>;
+                      remote-endpoint = <&tcon_tv0_in_tcon_top_mixer1>;
+                  };
+
+                  tcon_top_mixer1_out_tcon_tv1: endpoint@3 {
+                      reg = <3>;
+                      remote-endpoint = <&tcon_tv1_in_tcon_top_mixer1>;
+                  };
+              };
+
+              tcon_top_hdmi_in: port@4 {
+                  #address-cells = <1>;
+                  #size-cells = <0>;
+                  reg = <4>;
+
+                  tcon_top_hdmi_in_tcon_tv0: endpoint@0 {
+                      reg = <0>;
+                      remote-endpoint = <&tcon_tv0_out_tcon_top>;
+                  };
+
+                  tcon_top_hdmi_in_tcon_tv1: endpoint@1 {
+                      reg = <1>;
+                      remote-endpoint = <&tcon_tv1_out_tcon_top>;
+                  };
+              };
+
+              tcon_top_hdmi_out: port@5 {
+                  reg = <5>;
+
+                  tcon_top_hdmi_out_hdmi: endpoint {
+                      remote-endpoint = <&hdmi_in_tcon_top>;
+                  };
+              };
+          };
+      };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun9i-a80-deu.yaml b/Documentation/devicetree/bindings/display/allwinner,sun9i-a80-deu.yaml
new file mode 100644
index 000000000000..96de41d32b3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun9i-a80-deu.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun9i-a80-deu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 Detail Enhancement Unit Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description: |
+  The DEU (Detail Enhancement Unit), found in the Allwinner A80 SoC,
+  can sharpen the display content in both luma and chroma channels.
+
+properties:
+  compatible:
+    const: allwinner,sun9i-a80-deu
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: The DEU interface clock
+      - description: The DEU module clock
+      - description: The DEU DRAM clock
+
+  clock-names:
+    items:
+      - const: ahb
+      - const: mod
+      - const: ram
+
+  resets:
+    maxItems: 1
+
+  ports:
+    type: object
+    description: |
+      A ports node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      port@0:
+        type: object
+        description: |
+          Input endpoints of the controller.
+
+      port@1:
+        type: object
+        description: |
+          Output endpoints of the controller.
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+      - port@0
+      - port@1
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    #include <dt-bindings/clock/sun9i-a80-de.h>
+    #include <dt-bindings/reset/sun9i-a80-de.h>
+
+    deu0: deu@3300000 {
+        compatible = "allwinner,sun9i-a80-deu";
+        reg = <0x03300000 0x40000>;
+        interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&de_clocks CLK_BUS_DEU0>,
+                 <&de_clocks CLK_IEP_DEU0>,
+                 <&de_clocks CLK_DRAM_DEU0>;
+        clock-names = "ahb",
+                      "mod",
+                      "ram";
+        resets = <&de_clocks RST_DEU0>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            deu0_in: port@0 {
+                reg = <0>;
+
+                deu0_in_fe0: endpoint {
+                    remote-endpoint = <&fe0_out_deu0>;
+                };
+            };
+
+            deu0_out: port@1 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <1>;
+
+                deu0_out_be0: endpoint@0 {
+                    reg = <0>;
+                    remote-endpoint = <&be0_in_deu0>;
+                };
+
+                deu0_out_be1: endpoint@1 {
+                    reg = <1>;
+                    remote-endpoint = <&be1_in_deu0>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
index fb747682006d..0da42ab8fd3a 100644
--- a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
+++ b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
@@ -79,8 +79,6 @@ properties:
 
   hdmi-supply:
     description: phandle to an external 5V regulator to power the HDMI logic
-    allOf:
-      - $ref: /schemas/types.yaml#/definitions/phandle
 
   port@0:
     type: object
diff --git a/Documentation/devicetree/bindings/display/arm,malidp.txt b/Documentation/devicetree/bindings/display/arm,malidp.txt
index 2f7870983ef1..7a97a2b48c2a 100644
--- a/Documentation/devicetree/bindings/display/arm,malidp.txt
+++ b/Documentation/devicetree/bindings/display/arm,malidp.txt
@@ -37,6 +37,8 @@ Optional properties:
     Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt)
     to be used for the framebuffer; if not present, the framebuffer may
     be located anywhere in memory.
+  - arm,malidp-arqos-high-level: integer of u32 value describing the ARQoS
+    levels of DP500's QoS signaling.
 
 
 Example:
@@ -54,6 +56,7 @@ Example:
 		clocks = <&oscclk2>, <&fpgaosc0>, <&fpgaosc1>, <&fpgaosc1>;
 		clock-names = "pxlclk", "mclk", "aclk", "pclk";
 		arm,malidp-output-port-lines = /bits/ 8 <8 8 8>;
+		arm,malidp-arqos-high-level = <0xd000d000>;
 		port {
 			dp0_output: endpoint {
 				remote-endpoint = <&tda998x_2_input>;
diff --git a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
new file mode 100644
index 000000000000..6d72b3d11fbc
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/anx6345.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analogix ANX6345 eDP Transmitter Device Tree Bindings
+
+maintainers:
+  - Torsten Duwe <duwe@lst.de>
+
+description: |
+  The ANX6345 is an ultra-low power Full-HD eDP transmitter designed for
+  portable devices.
+
+properties:
+  compatible:
+    const: analogix,anx6345
+
+  reg:
+    maxItems: 1
+    description: base I2C address of the device
+
+  reset-gpios:
+    maxItems: 1
+    description: GPIO connected to active low reset
+
+  dvdd12-supply:
+    maxItems: 1
+    description: Regulator for 1.2V digital core power.
+
+  dvdd25-supply:
+    maxItems: 1
+    description: Regulator for 2.5V digital core power.
+
+  ports:
+    type: object
+
+    properties:
+      port@0:
+        type: object
+        description: |
+          Video port for LVTTL input
+
+      port@1:
+        type: object
+        description: |
+          Video port for eDP output (panel or connector).
+          May be omitted if EDID works reliably.
+
+    required:
+      - port@0
+
+required:
+  - compatible
+  - reg
+  - reset-gpios
+  - dvdd12-supply
+  - dvdd25-supply
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c0 {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      anx6345: anx6345@38 {
+        compatible = "analogix,anx6345";
+        reg = <0x38>;
+        reset-gpios = <&pio42 1 /* GPIO_ACTIVE_LOW */>;
+        dvdd25-supply = <&reg_dldo2>;
+        dvdd12-supply = <&reg_fldo1>;
+
+        ports {
+          #address-cells = <1>;
+          #size-cells = <0>;
+
+          anx6345_in: port@0 {
+            #address-cells = <1>;
+            #size-cells = <0>;
+            reg = <0>;
+            anx6345_in_tcon0: endpoint@0 {
+              reg = <0>;
+              remote-endpoint = <&tcon0_out_anx6345>;
+            };
+          };
+
+          anx6345_out: port@1 {
+            #address-cells = <1>;
+            #size-cells = <0>;
+            reg = <1>;
+            anx6345_out_panel: endpoint@0 {
+              reg = <0>;
+              remote-endpoint = <&panel_in_edp>;
+            };
+          };
+        };
+      };
+    };
diff --git a/Documentation/devicetree/bindings/display/bridge/anx7814.txt b/Documentation/devicetree/bindings/display/bridge/anx7814.txt
index dbd7c84ee584..17258747fff6 100644
--- a/Documentation/devicetree/bindings/display/bridge/anx7814.txt
+++ b/Documentation/devicetree/bindings/display/bridge/anx7814.txt
@@ -6,7 +6,11 @@ designed for portable devices.
 
 Required properties:
 
- - compatible		: "analogix,anx7814"
+ - compatible		: Must be one of:
+			  "analogix,anx7808"
+			  "analogix,anx7812"
+			  "analogix,anx7814"
+			  "analogix,anx7818"
  - reg			: I2C address of the device
  - interrupts		: Should contain the INTP interrupt
  - hpd-gpios		: Which GPIO to use for hpd
diff --git a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
new file mode 100644
index 000000000000..8f373029f5d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
@@ -0,0 +1,131 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/lvds-codec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Transparent LVDS encoders and decoders
+
+maintainers:
+  - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+
+description: |
+  This binding supports transparent LVDS encoders and decoders that don't
+  require any configuration.
+
+  LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple
+  incompatible data link layers have been used over time to transmit image data
+  to LVDS panels. This binding targets devices compatible with the following
+  specifications only.
+
+  [JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February
+  1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA)
+  [LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National
+  Semiconductor
+  [VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video
+  Electronics Standards Association (VESA)
+
+  Those devices have been marketed under the FPD-Link and FlatLink brand names
+  among others.
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+        - enum:
+          - ti,ds90c185       # For the TI DS90C185 FPD-Link Serializer
+          - ti,ds90c187       # For the TI DS90C187 FPD-Link Serializer
+          - ti,sn75lvds83     # For the TI SN75LVDS83 FlatLink transmitter
+        - const: lvds-encoder # Generic LVDS encoder compatible fallback
+      - items:
+        - enum:
+          - ti,ds90cf384a     # For the DS90CF384A FPD-Link LVDS Receiver
+        - const: lvds-decoder # Generic LVDS decoders compatible fallback
+      - enum:
+        - thine,thc63lvdm83d  # For the THC63LVDM83D LVDS serializer
+
+  ports:
+    type: object
+    description: |
+      This device has two video ports. Their connections are modeled using the
+      OF graph bindings specified in Documentation/devicetree/bindings/graph.txt
+    properties:
+      port@0:
+        type: object
+        description: |
+          For LVDS encoders, port 0 is the parallel input
+          For LVDS decoders, port 0 is the LVDS input
+
+      port@1:
+        type: object
+        description: |
+          For LVDS encoders, port 1 is the LVDS output
+          For LVDS decoders, port 1 is the parallel output
+
+    required:
+      - port@0
+      - port@1
+
+  powerdown-gpios:
+    description:
+      The GPIO used to control the power down line of this device.
+    maxItems: 1
+
+required:
+  - compatible
+  - ports
+
+examples:
+  - |
+    lvds-encoder {
+      compatible = "ti,ds90c185", "lvds-encoder";
+
+      ports {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        port@0 {
+          reg = <0>;
+
+          lvds_enc_in: endpoint {
+            remote-endpoint = <&display_out_rgb>;
+          };
+        };
+
+        port@1 {
+          reg = <1>;
+
+          lvds_enc_out: endpoint {
+            remote-endpoint = <&lvds_panel_in>;
+          };
+        };
+      };
+    };
+
+  - |
+    lvds-decoder {
+      compatible = "ti,ds90cf384a", "lvds-decoder";
+
+      ports {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        port@0 {
+          reg = <0>;
+
+          lvds_dec_in: endpoint {
+            remote-endpoint = <&display_out_lvds>;
+          };
+        };
+
+        port@1 {
+          reg = <1>;
+
+          lvds_dec_out: endpoint {
+            remote-endpoint = <&rgb_panel_in>;
+          };
+        };
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/lvds-transmitter.txt b/Documentation/devicetree/bindings/display/bridge/lvds-transmitter.txt
deleted file mode 100644
index 60091db5dfa5..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/lvds-transmitter.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Parallel to LVDS Encoder
-------------------------
-
-This binding supports the parallel to LVDS encoders that don't require any
-configuration.
-
-LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple
-incompatible data link layers have been used over time to transmit image data
-to LVDS panels. This binding targets devices compatible with the following
-specifications only.
-
-[JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February
-1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA)
-[LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National
-Semiconductor
-[VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video
-Electronics Standards Association (VESA)
-
-Those devices have been marketed under the FPD-Link and FlatLink brand names
-among others.
-
-
-Required properties:
-
-- compatible: Must be "lvds-encoder"
-
-  Any encoder compatible with this generic binding, but with additional
-  properties not listed here, must list a device specific compatible first
-  followed by this generic compatible.
-
-Required nodes:
-
-This device has two video ports. Their connections are modeled using the OF
-graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for parallel input
-- Video port 1 for LVDS output
-
-
-Example
--------
-
-lvds-encoder {
-	compatible = "lvds-encoder";
-
-	ports {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		port@0 {
-			reg = <0>;
-
-			lvds_enc_in: endpoint {
-				remote-endpoint = <&display_out_rgb>;
-			};
-		};
-
-		port@1 {
-			reg = <1>;
-
-			lvds_enc_out: endpoint {
-				remote-endpoint = <&lvds_panel_in>;
-			};
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
index db680413e89c..819f3e31013c 100644
--- a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
@@ -13,6 +13,7 @@ Required properties:
 
 - compatible : Shall contain one or more of
   - "renesas,r8a774a1-hdmi" for R8A774A1 (RZ/G2M) compatible HDMI TX
+  - "renesas,r8a774b1-hdmi" for R8A774B1 (RZ/G2N) compatible HDMI TX
   - "renesas,r8a7795-hdmi" for R8A7795 (R-Car H3) compatible HDMI TX
   - "renesas,r8a7796-hdmi" for R8A7796 (R-Car M3-W) compatible HDMI TX
   - "renesas,r8a77965-hdmi" for R8A77965 (R-Car M3-N) compatible HDMI TX
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
index c6a196d0b075..c62ce2494ed9 100644
--- a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
@@ -10,6 +10,7 @@ Required properties:
   - "renesas,r8a7743-lvds" for R8A7743 (RZ/G1M) compatible LVDS encoders
   - "renesas,r8a7744-lvds" for R8A7744 (RZ/G1N) compatible LVDS encoders
   - "renesas,r8a774a1-lvds" for R8A774A1 (RZ/G2M) compatible LVDS encoders
+  - "renesas,r8a774b1-lvds" for R8A774B1 (RZ/G2N) compatible LVDS encoders
   - "renesas,r8a774c0-lvds" for R8A774C0 (RZ/G2E) compatible LVDS encoders
   - "renesas,r8a7790-lvds" for R8A7790 (R-Car H2) compatible LVDS encoders
   - "renesas,r8a7791-lvds" for R8A7791 (R-Car M2-W) compatible LVDS encoders
diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvdm83d.txt b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvdm83d.txt
deleted file mode 100644
index fee3c88e1a17..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvdm83d.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-THine Electronics THC63LVDM83D LVDS serializer
-----------------------------------------------
-
-The THC63LVDM83D is an LVDS serializer designed to support pixel data
-transmission between a host and a flat panel.
-
-Required properties:
-
-- compatible: Should be "thine,thc63lvdm83d"
-
-Optional properties:
-
-- powerdown-gpios: Power down control GPIO (the /PWDN pin, active low).
-
-Required nodes:
-
-The THC63LVDM83D has two video ports. Their connections are modeled using the
-OFgraph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for CMOS/TTL input
-- Video port 1 for LVDS output
-
-
-Example
--------
-
-	lvds_enc: encoder@0 {
-		compatible = "thine,thc63lvdm83d";
-
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			port@0 {
-				reg = <0>;
-
-				lvds_enc_in: endpoint@0 {
-					remote-endpoint = <&rgb_out>;
-				};
-			};
-
-			port@1 {
-				reg = <1>;
-
-				lvds_enc_out: endpoint@0 {
-					remote-endpoint = <&panel_in>;
-				};
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,ds90c185.txt b/Documentation/devicetree/bindings/display/bridge/ti,ds90c185.txt
deleted file mode 100644
index e575f996959a..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/ti,ds90c185.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-Texas Instruments FPD-Link (LVDS) Serializer
---------------------------------------------
-
-The DS90C185 and DS90C187 are low-power serializers for portable
-battery-powered applications that reduces the size of the RGB
-interface between the host GPU and the display.
-
-Required properties:
-
-- compatible: Should be
-  "ti,ds90c185", "lvds-encoder"  for the TI DS90C185 FPD-Link Serializer
-  "ti,ds90c187", "lvds-encoder"  for the TI DS90C187 FPD-Link Serializer
-
-Optional properties:
-
-- powerdown-gpios: Power down control GPIO (the PDB pin, active-low)
-
-Required nodes:
-
-The devices have two video ports. Their connections are modeled using the OF
-graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for parallel input
-- Video port 1 for LVDS output
-
-
-Example
--------
-
-lvds-encoder {
-	compatible = "ti,ds90c185", "lvds-encoder";
-
-	powerdown-gpios = <&gpio 17 GPIO_ACTIVE_LOW>;
-
-	ports {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		port@0 {
-			reg = <0>;
-
-			lvds_enc_in: endpoint {
-				remote-endpoint = <&lcdc_out_rgb>;
-			};
-		};
-
-		port@1 {
-			reg = <1>;
-
-			lvds_enc_out: endpoint {
-				remote-endpoint = <&lvds_panel_in>;
-			};
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.txt b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.txt
index 0a3fbb53a16e..8ec4a7f2623a 100644
--- a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.txt
+++ b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.txt
@@ -21,7 +21,7 @@ Optional properties:
 - #gpio-cells    : Should be two. The first cell is the pin number and
                    the second cell is used to specify flags.
                    See ../../gpio/gpio.txt for more information.
-- #pwm-cells : Should be one. See ../../pwm/pwm.txt for description of
+- #pwm-cells : Should be one. See ../../pwm/pwm.yaml for description of
                the cell formats.
 
 - clock-names: should be "refclk"
diff --git a/Documentation/devicetree/bindings/display/cirrus,clps711x-fb.txt b/Documentation/devicetree/bindings/display/cirrus,clps711x-fb.txt
index b0e506610400..0ab5f0663611 100644
--- a/Documentation/devicetree/bindings/display/cirrus,clps711x-fb.txt
+++ b/Documentation/devicetree/bindings/display/cirrus,clps711x-fb.txt
@@ -27,11 +27,11 @@ Example:
 
 	display: display {
 		model = "320x240x4";
-		native-mode = <&timing0>;
 		bits-per-pixel = <4>;
 		ac-prescale = <17>;
 
 		display-timings {
+			native-mode = <&timing0>;
 			timing0: 320x240 {
 				hactive = <320>;
 				hback-porch = <0>;
diff --git a/Documentation/devicetree/bindings/display/dsi-controller.yaml b/Documentation/devicetree/bindings/display/dsi-controller.yaml
new file mode 100644
index 000000000000..fd986c36c737
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/dsi-controller.yaml
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/dsi-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common Properties for DSI Display Panels
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+  This document defines device tree properties common to DSI, Display
+  Serial Interface controllers and attached panels. It doesn't constitute
+  a device tree binding specification by itself but is meant to be referenced
+  by device tree bindings.
+
+  When referenced from panel device tree bindings the properties defined in
+  this document are defined as follows. The panel device tree bindings are
+  responsible for defining whether each property is required or optional.
+
+  Notice: this binding concerns DSI panels connected directly to a master
+  without any intermediate port graph to the panel. Each DSI master
+  can control one to four virtual channels to one panel. Each virtual
+  channel should have a node "panel" for their virtual channel with their
+  reg-property set to the virtual channel number, usually there is just
+  one virtual channel, number 0.
+
+properties:
+  $nodename:
+    pattern: "^dsi-controller(@.*)?$"
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+patternProperties:
+  "^panel@[0-3]$":
+    description: Panels connected to the DSI link
+    type: object
+
+    properties:
+      reg:
+        minimum: 0
+        maximum: 3
+        description:
+          The virtual channel number of a DSI peripheral. Must be in the range
+          from 0 to 3, as DSI uses a 2-bit addressing scheme. Some DSI
+          peripherals respond to more than a single virtual channel. In that
+          case the reg property can take multiple entries, one for each virtual
+          channel that the peripheral responds to.
+
+      clock-master:
+        type: boolean
+        description:
+           Should be enabled if the host is being used in conjunction with
+           another DSI host to drive the same peripheral. Hardware supporting
+           such a configuration generally requires the data on both the busses
+           to be driven by the same clock. Only the DSI host instance
+           controlling this clock should contain this property.
+
+      enforce-video-mode:
+        type: boolean
+        description:
+          The best option is usually to run a panel in command mode, as this
+          gives better control over the panel hardware. However for different
+          reasons like broken hardware, missing features or testing, it may be
+          useful to be able to force a command mode-capable panel into video
+          mode.
+
+    required:
+      - reg
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    dsi-controller@a0351000 {
+        reg = <0xa0351000 0x1000>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+        panel@0 {
+            compatible = "sony,acx424akp";
+            reg = <0>;
+            vddi-supply = <&ab8500_ldo_aux1_reg>;
+            reset-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt b/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt
index e5a8b363d829..f4df9e83bcd2 100644
--- a/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt
@@ -38,10 +38,10 @@ Example:
 
 	display0: display0 {
 		model = "Primeview-PD050VL1";
-		native-mode = <&timing_disp0>;
 		bits-per-pixel = <16>;
 		fsl,pcr = <0xf0c88080>;	/* non-standard but required */
 		display-timings {
+			native-mode = <&timing_disp0>;
 			timing_disp0: 640x480 {
 				hactive = <640>;
 				vactive = <480>;
diff --git a/Documentation/devicetree/bindings/display/ingenic,lcd.txt b/Documentation/devicetree/bindings/display/ingenic,lcd.txt
index 7b536c8c6dde..01e3261defb6 100644
--- a/Documentation/devicetree/bindings/display/ingenic,lcd.txt
+++ b/Documentation/devicetree/bindings/display/ingenic,lcd.txt
@@ -4,6 +4,7 @@ Required properties:
 - compatible: one of:
   * ingenic,jz4740-lcd
   * ingenic,jz4725b-lcd
+  * ingenic,jz4770-lcd
 - reg: LCD registers location and length
 - clocks: LCD pixclock and device clock specifiers.
 	   The device clock is only required on the JZ4740.
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt
index 8469de510001..b91e709db7a4 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt
@@ -27,19 +27,22 @@ Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt.
 
 Required properties (all function blocks):
 - compatible: "mediatek,<chip>-disp-<function>", one of
-	"mediatek,<chip>-disp-ovl"   - overlay (4 layers, blending, csc)
-	"mediatek,<chip>-disp-rdma"  - read DMA / line buffer
-	"mediatek,<chip>-disp-wdma"  - write DMA
-	"mediatek,<chip>-disp-color" - color processor
-	"mediatek,<chip>-disp-aal"   - adaptive ambient light controller
-	"mediatek,<chip>-disp-gamma" - gamma correction
-	"mediatek,<chip>-disp-merge" - merge streams from two RDMA sources
-	"mediatek,<chip>-disp-split" - split stream to two encoders
-	"mediatek,<chip>-disp-ufoe"  - data compression engine
-	"mediatek,<chip>-dsi"        - DSI controller, see mediatek,dsi.txt
-	"mediatek,<chip>-dpi"        - DPI controller, see mediatek,dpi.txt
-	"mediatek,<chip>-disp-mutex" - display mutex
-	"mediatek,<chip>-disp-od"    - overdrive
+	"mediatek,<chip>-disp-ovl"   		- overlay (4 layers, blending, csc)
+	"mediatek,<chip>-disp-ovl-2l"           - overlay (2 layers, blending, csc)
+	"mediatek,<chip>-disp-rdma"  		- read DMA / line buffer
+	"mediatek,<chip>-disp-wdma"  		- write DMA
+	"mediatek,<chip>-disp-ccorr"            - color correction
+	"mediatek,<chip>-disp-color" 		- color processor
+	"mediatek,<chip>-disp-dither"           - dither
+	"mediatek,<chip>-disp-aal"   		- adaptive ambient light controller
+	"mediatek,<chip>-disp-gamma" 		- gamma correction
+	"mediatek,<chip>-disp-merge" 		- merge streams from two RDMA sources
+	"mediatek,<chip>-disp-split" 		- split stream to two encoders
+	"mediatek,<chip>-disp-ufoe"  		- data compression engine
+	"mediatek,<chip>-dsi"        		- DSI controller, see mediatek,dsi.txt
+	"mediatek,<chip>-dpi"        		- DPI controller, see mediatek,dpi.txt
+	"mediatek,<chip>-disp-mutex" 		- display mutex
+	"mediatek,<chip>-disp-od"    		- overdrive
   the supported chips are mt2701, mt2712 and mt8173.
 - reg: Physical base address and length of the function block register space
 - interrupts: The interrupt signal from the function block (required, except for
@@ -49,6 +52,7 @@ Required properties (all function blocks):
   For most function blocks this is just a single clock input. Only the DSI and
   DPI controller nodes have multiple clock inputs. These are documented in
   mediatek,dsi.txt and mediatek,dpi.txt, respectively.
+  An exception is that the mt8183 mutex is always free running with no clocks property.
 
 Required properties (DMA function blocks):
 - compatible: Should be one of
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt
index fadf327c7cdf..a19a6cc375ed 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt
@@ -7,7 +7,7 @@ channel output.
 
 Required properties:
 - compatible: "mediatek,<chip>-dsi"
-  the supported chips are mt2701 and mt8173.
+  the supported chips are mt2701, mt8173 and mt8183.
 - reg: Physical base address and length of the controller's registers
 - interrupts: The interrupt signal from the function block.
 - clocks: device clocks
@@ -26,7 +26,7 @@ The MIPI TX configuration module controls the MIPI D-PHY.
 
 Required properties:
 - compatible: "mediatek,<chip>-mipi-tx"
-  the supported chips are mt2701 and mt8173.
+  the supported chips are mt2701, mt8173 and mt8183.
 - reg: Physical base address and length of the controller's registers
 - clocks: PLL reference clock
 - clock-output-names: name of the output clock line to the DSI encoder
diff --git a/Documentation/devicetree/bindings/display/msm/dpu.txt b/Documentation/devicetree/bindings/display/msm/dpu.txt
index a61dd40f3792..551ae26f60da 100644
--- a/Documentation/devicetree/bindings/display/msm/dpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/dpu.txt
@@ -8,7 +8,7 @@ The DPU display controller is found in SDM845 SoC.
 
 MDSS:
 Required properties:
-- compatible: "qcom,sdm845-mdss"
+- compatible:  "qcom,sdm845-mdss", "qcom,sc7180-mdss"
 - reg: physical base address and length of contoller's registers.
 - reg-names: register region names. The following region is required:
   * "mdss"
@@ -41,7 +41,7 @@ Optional properties:
 
 MDP:
 Required properties:
-- compatible: "qcom,sdm845-dpu"
+- compatible: "qcom,sdm845-dpu", "qcom,sc7180-dpu"
 - reg: physical base address and length of controller's registers.
 - reg-names : register region names. The following region is required:
   * "mdp"
diff --git a/Documentation/devicetree/bindings/display/msm/gmu.txt b/Documentation/devicetree/bindings/display/msm/gmu.txt
index 90af5b0a56a9..bf9c7a2a495c 100644
--- a/Documentation/devicetree/bindings/display/msm/gmu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gmu.txt
@@ -31,6 +31,10 @@ Required properties:
 - iommus: phandle to the adreno iommu
 - operating-points-v2: phandle to the OPP operating points
 
+Optional properties:
+- sram: phandle to the On Chip Memory (OCMEM) that's present on some Snapdragon
+        SoCs. See Documentation/devicetree/bindings/sram/qcom,ocmem.yaml.
+
 Example:
 
 / {
@@ -63,3 +67,50 @@ Example:
 		operating-points-v2 = <&gmu_opp_table>;
 	};
 };
+
+a3xx example with OCMEM support:
+
+/ {
+	...
+
+	gpu: adreno@fdb00000 {
+		compatible = "qcom,adreno-330.2",
+		             "qcom,adreno";
+		reg = <0xfdb00000 0x10000>;
+		reg-names = "kgsl_3d0_reg_memory";
+		interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "kgsl_3d0_irq";
+		clock-names = "core",
+		              "iface",
+		              "mem_iface";
+		clocks = <&mmcc OXILI_GFX3D_CLK>,
+		         <&mmcc OXILICX_AHB_CLK>,
+		         <&mmcc OXILICX_AXI_CLK>;
+		sram = <&gmu_sram>;
+		power-domains = <&mmcc OXILICX_GDSC>;
+		operating-points-v2 = <&gpu_opp_table>;
+		iommus = <&gpu_iommu 0>;
+	};
+
+	ocmem@fdd00000 {
+		compatible = "qcom,msm8974-ocmem";
+
+		reg = <0xfdd00000 0x2000>,
+		      <0xfec00000 0x180000>;
+		reg-names = "ctrl",
+		             "mem";
+
+		clocks = <&rpmcc RPM_SMD_OCMEMGX_CLK>,
+		         <&mmcc OCMEMCX_OCMEMNOC_CLK>;
+		clock-names = "core",
+		              "iface";
+
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		gmu_sram: gmu-sram@0 {
+			reg = <0x0 0x100000>;
+			ranges = <0 0 0xfec00000 0x100000>;
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt b/Documentation/devicetree/bindings/display/msm/gpu.txt
index 2b8fd26c43b0..7edc298a15f2 100644
--- a/Documentation/devicetree/bindings/display/msm/gpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gpu.txt
@@ -23,13 +23,18 @@ Required properties:
 - iommus: optional phandle to an adreno iommu instance
 - operating-points-v2: optional phandle to the OPP operating points
 - interconnects: optional phandle to an interconnect provider.  See
-  ../interconnect/interconnect.txt for details.
+  ../interconnect/interconnect.txt for details. Some A3xx and all A4xx platforms
+  will have two paths; all others will have one path.
+- interconnect-names: The names of the interconnect paths that correspond to the
+  interconnects property. Values must be gfx-mem and ocmem.
 - qcom,gmu: For GMU attached devices a phandle to the GMU device that will
   control the power for the GPU. Applicable targets:
     - qcom,adreno-630.2
 - zap-shader: For a5xx and a6xx devices this node contains a memory-region that
   points to reserved memory to store the zap shader that can be used to help
   bring the GPU out of secure mode.
+- firmware-name: optional property of the 'zap-shader' node, listing the
+  relative path of the device specific zap firmware.
 
 Example 3xx/4xx/a5xx:
 
@@ -76,11 +81,13 @@ Example a6xx (with GMU):
 		operating-points-v2 = <&gpu_opp_table>;
 
 		interconnects = <&rsc_hlos MASTER_GFX3D &rsc_hlos SLAVE_EBI1>;
+		interconnect-names = "gfx-mem";
 
 		qcom,gmu = <&gmu>;
 
 		zap-shader {
 			memory-region = <&zap_shader_region>;
+			firmware-name = "qcom/LENOVO/81JL/qcdxkmsuc850.mbn"
 		};
 	};
 };
diff --git a/Documentation/devicetree/bindings/display/msm/mdp5.txt b/Documentation/devicetree/bindings/display/msm/mdp5.txt
index 4e11338548aa..43d11279c925 100644
--- a/Documentation/devicetree/bindings/display/msm/mdp5.txt
+++ b/Documentation/devicetree/bindings/display/msm/mdp5.txt
@@ -76,6 +76,8 @@ Required properties:
 Optional properties:
 - clock-names: the following clocks are optional:
   * "lut"
+  * "tbu"
+  * "tbu_rt"
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.yaml b/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.yaml
deleted file mode 100644
index c6e33e7f36d0..000000000000
--- a/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/display/panel/ampire,am-480272h3tmqw-t01h.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: Ampire AM-480272H3TMQW-T01H 4.3" WQVGA TFT LCD panel
-
-maintainers:
-  - Yannick Fertre <yannick.fertre@st.com>
-  - Thierry Reding <treding@nvidia.com>
-
-allOf:
-  - $ref: panel-common.yaml#
-
-properties:
-  compatible:
-    const: ampire,am-480272h3tmqw-t01h
-
-  power-supply: true
-  enable-gpios: true
-  backlight: true
-  port: true
-
-required:
-  - compatible
-
-additionalProperties: false
-
-examples:
-  - |
-    panel_rgb: panel {
-      compatible = "ampire,am-480272h3tmqw-t01h";
-      enable-gpios = <&gpioa 8 1>;
-      port {
-        panel_in_rgb: endpoint {
-          remote-endpoint = <&controller_out_rgb>;
-        };
-      };
-    };
-
-...
diff --git a/Documentation/devicetree/bindings/display/panel/ampire,am800480r3tmqwa1h.txt b/Documentation/devicetree/bindings/display/panel/ampire,am800480r3tmqwa1h.txt
deleted file mode 100644
index 83e2cae1cc1b..000000000000
--- a/Documentation/devicetree/bindings/display/panel/ampire,am800480r3tmqwa1h.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-Ampire AM-800480R3TMQW-A1H 7.0" WVGA TFT LCD panel
-
-Required properties:
-- compatible: should be "ampire,am800480r3tmqwa1h"
-
-This binding is compatible with the simple-panel binding, which is specified
-in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/giantplus,gpm940b0.txt b/Documentation/devicetree/bindings/display/panel/giantplus,gpm940b0.txt
deleted file mode 100644
index 3dab52f92c26..000000000000
--- a/Documentation/devicetree/bindings/display/panel/giantplus,gpm940b0.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-GiantPlus 3.0" (320x240 pixels) 24-bit TFT LCD panel
-
-Required properties:
-- compatible: should be "giantplus,gpm940b0"
-- power-supply: as specified in the base binding
-
-Optional properties:
-- backlight: as specified in the base binding
-- enable-gpios: as specified in the base binding
-
-This binding is compatible with the simple-panel binding, which is specified
-in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml b/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml
new file mode 100644
index 000000000000..4ebcea7d0c63
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/leadtek,ltk500hd1829.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Leadtek LTK500HD1829 5.0in 720x1280 DSI panel
+
+maintainers:
+  - Heiko Stuebner <heiko.stuebner@theobroma-systems.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: leadtek,ltk500hd1829
+  reg: true
+  backlight: true
+  reset-gpios: true
+  iovcc-supply:
+     description: regulator that supplies the iovcc voltage
+  vcc-supply:
+     description: regulator that supplies the vcc voltage
+
+required:
+  - compatible
+  - reg
+  - backlight
+  - iovcc-supply
+  - vcc-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    dsi@ff450000 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        panel@0 {
+            compatible = "leadtek,ltk500hd1829";
+            reg = <0>;
+            backlight = <&backlight>;
+            iovcc-supply = <&vcc_1v8>;
+            vcc-supply = <&vcc_2v8>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/logicpd,type28.yaml b/Documentation/devicetree/bindings/display/panel/logicpd,type28.yaml
new file mode 100644
index 000000000000..2834287b8d88
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/logicpd,type28.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/logicpd,type28.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Logic PD Type 28 4.3" WQVGA TFT LCD panel
+
+maintainers:
+  - Adam Ford <aford173@gmail.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: logicpd,type28
+
+  power-supply: true
+  enable-gpios: true
+  backlight: true
+  port: true
+
+required:
+  - compatible
+
+additionalProperties: false
+
+examples:
+  - |
+    lcd0: display {
+      compatible = "logicpd,type28";
+      enable-gpios = <&gpio5 27 0>;
+      backlight = <&backlight>;
+      port {
+        lcd_in: endpoint {
+          remote-endpoint = <&dpi_out>;
+        };
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
new file mode 100644
index 000000000000..8fe60ee2531c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/panel-simple.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Simple panels with one power supply
+
+maintainers:
+  - Thierry Reding <thierry.reding@gmail.com>
+  - Sam Ravnborg <sam@ravnborg.org>
+
+description: |
+  This binding file is a collection of the simple (dumb) panels that
+  requires only a single power-supply.
+  There are optionally a backlight and an enable GPIO.
+  The panel may use an OF graph binding for the association to the display,
+  or it may be a direct child node of the display.
+
+  If the panel is more advanced a dedicated binding file is required.
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+
+  compatible:
+    enum:
+    # compatible must be listed in alphabetical order, ordered by compatible.
+    # The description in the comment is mandatory for each compatible.
+
+        # Ampire AM-480272H3TMQW-T01H 4.3" WQVGA TFT LCD panel
+      - ampire,am-480272h3tmqw-t01h
+        # Ampire AM-800480R3TMQW-A1H 7.0" WVGA TFT LCD panel
+      - ampire,am800480r3tmqwa1h
+        # AUO B116XAK01 eDP TFT LCD panel
+      - auo,b116xa01
+        # BOE NV140FHM-N49 14.0" FHD a-Si FT panel
+      - boe,nv140fhmn49
+        # GiantPlus GPM940B0 3.0" QVGA TFT LCD panel
+      - giantplus,gpm940b0
+        # Satoz SAT050AT40H12R2 5.0" WVGA TFT LCD panel
+      - satoz,sat050at40h12r2
+        # Sharp LS020B1DD01D 2.0" HQVGA TFT LCD panel
+      - sharp,ls020b1dd01d
+
+  backlight: true
+  enable-gpios: true
+  port: true
+  power-supply: true
+
+additionalProperties: false
+
+required:
+  - compatible
+  - power-supply
+
+examples:
+  - |
+    panel_rgb: panel-rgb {
+      compatible = "ampire,am-480272h3tmqw-t01h";
+      power-supply = <&vcc_lcd_reg>;
+
+      port {
+        panel_in_rgb: endpoint {
+          remote-endpoint = <&ltdc_out_rgb>;
+        };
+      };
+    };
diff --git a/Documentation/devicetree/bindings/display/panel/ronbo,rb070d30.yaml b/Documentation/devicetree/bindings/display/panel/ronbo,rb070d30.yaml
index 0e7987f1cdb7..d67617f6f74a 100644
--- a/Documentation/devicetree/bindings/display/panel/ronbo,rb070d30.yaml
+++ b/Documentation/devicetree/bindings/display/panel/ronbo,rb070d30.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Ronbo RB070D30 DSI Display Panel
 
 maintainers:
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.txt b/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.txt
deleted file mode 100644
index fd9cf39bde77..000000000000
--- a/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Sharp LD-D5116Z01B 12.3" WUXGA+ eDP panel
-
-Required properties:
-- compatible: should be "sharp,ld-d5116z01b"
-- power-supply: regulator to provide the VCC supply voltage (3.3 volts)
-
-This binding is compatible with the simple-panel binding.
-
-The device node can contain one 'port' child node with one child
-'endpoint' node, according to the bindings defined in [1]. This
-node should describe panel's video bus.
-
-[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
-
-	panel: panel {
-		compatible = "sharp,ld-d5116z01b";
-		power-supply = <&vlcd_3v3>;
-
-		port {
-			panel_ep: endpoint {
-				remote-endpoint = <&bridge_out_ep>;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.yaml b/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.yaml
new file mode 100644
index 000000000000..fbb647eb33c9
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.yaml
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/sharp,ld-d5116z01b.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sharp LD-D5116Z01B 12.3" WUXGA+ eDP panel
+
+maintainers:
+  - Jeffrey Hugo <jeffrey.l.hugo@gmail.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: sharp,ld-d5116z01b
+
+  power-supply: true
+  backlight: true
+  port: true
+  no-hpd: true
+
+additionalProperties: false
+
+required:
+  - compatible
+  - power-supply
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/sharp,ls020b1dd01d.txt b/Documentation/devicetree/bindings/display/panel/sharp,ls020b1dd01d.txt
deleted file mode 100644
index e45edbc565a3..000000000000
--- a/Documentation/devicetree/bindings/display/panel/sharp,ls020b1dd01d.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Sharp 2.0" (240x160 pixels) 16-bit TFT LCD panel
-
-Required properties:
-- compatible: should be "sharp,ls020b1dd01d"
-- power-supply: as specified in the base binding
-
-Optional properties:
-- backlight: as specified in the base binding
-- enable-gpios: as specified in the base binding
-
-This binding is compatible with the simple-panel binding, which is specified
-in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/sony,acx424akp.yaml b/Documentation/devicetree/bindings/display/panel/sony,acx424akp.yaml
new file mode 100644
index 000000000000..185dcc8fd1f9
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/sony,acx424akp.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/sony,acx424akp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sony ACX424AKP 4" 480x864 AMOLED panel
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: sony,acx424akp
+  reg: true
+  reset-gpios: true
+  vddi-supply:
+     description: regulator that supplies the vddi voltage
+  enforce-video-mode: true
+
+required:
+  - compatible
+  - reg
+  - reset-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi-controller@a0351000 {
+        compatible = "ste,mcde-dsi";
+        reg = <0xa0351000 0x1000>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "sony,acx424akp";
+            reg = <0>;
+            vddi-supply = <&foo>;
+            reset-gpios = <&foo_gpio 0 GPIO_ACTIVE_LOW>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml b/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml
new file mode 100644
index 000000000000..186e5e1c8fa3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/xinpeng,xpp055c272.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xinpeng XPP055C272 5.5in 720x1280 DSI panel
+
+maintainers:
+  - Heiko Stuebner <heiko.stuebner@theobroma-systems.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: xinpeng,xpp055c272
+  reg: true
+  backlight: true
+  reset-gpios: true
+  iovcc-supply:
+     description: regulator that supplies the iovcc voltage
+  vci-supply:
+     description: regulator that supplies the vci voltage
+
+required:
+  - compatible
+  - reg
+  - backlight
+  - iovcc-supply
+  - vci-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    dsi@ff450000 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        panel@0 {
+            compatible = "xinpeng,xpp055c272";
+            reg = <0>;
+            backlight = <&backlight>;
+            iovcc-supply = <&vcc_1v8>;
+            vci-supply = <&vcc3v3_lcd>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/renesas,cmm.yaml b/Documentation/devicetree/bindings/display/renesas,cmm.yaml
new file mode 100644
index 000000000000..a57037b9e9ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/renesas,cmm.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/renesas,cmm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car Color Management Module (CMM)
+
+maintainers:
+  - Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+  - Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
+  - Jacopo Mondi <jacopo+renesas@jmondi.org>
+
+description: |+
+  Renesas R-Car color management module connected to R-Car DU video channels.
+  It provides image enhancement functions such as 1-D look-up tables (LUT),
+  3-D look-up tables (CLU), 1D-histogram generation (HGO), and color
+  space conversion (CSC).
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+        - enum:
+          - renesas,r8a7795-cmm
+          - renesas,r8a7796-cmm
+          - renesas,r8a77965-cmm
+          - renesas,r8a77990-cmm
+          - renesas,r8a77995-cmm
+        - const: renesas,rcar-gen3-cmm
+      - items:
+        - const: renesas,rcar-gen2-cmm
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - resets
+  - power-domains
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7796-cpg-mssr.h>
+    #include <dt-bindings/power/r8a7796-sysc.h>
+
+    cmm0: cmm@fea40000 {
+         compatible = "renesas,r8a7796-cmm",
+                      "renesas,rcar-gen3-cmm";
+         reg = <0 0xfea40000 0 0x1000>;
+         power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
+         clocks = <&cpg CPG_MOD 711>;
+         resets = <&cpg 711>;
+    };
diff --git a/Documentation/devicetree/bindings/display/renesas,du.txt b/Documentation/devicetree/bindings/display/renesas,du.txt
index c97dfacad281..eb4ae41fe41f 100644
--- a/Documentation/devicetree/bindings/display/renesas,du.txt
+++ b/Documentation/devicetree/bindings/display/renesas,du.txt
@@ -8,6 +8,7 @@ Required Properties:
     - "renesas,du-r8a7745" for R8A7745 (RZ/G1E) compatible DU
     - "renesas,du-r8a77470" for R8A77470 (RZ/G1C) compatible DU
     - "renesas,du-r8a774a1" for R8A774A1 (RZ/G2M) compatible DU
+    - "renesas,du-r8a774b1" for R8A774B1 (RZ/G2N) compatible DU
     - "renesas,du-r8a774c0" for R8A774C0 (RZ/G2E) compatible DU
     - "renesas,du-r8a7779" for R8A7779 (R-Car H1) compatible DU
     - "renesas,du-r8a7790" for R8A7790 (R-Car H2) compatible DU
@@ -40,10 +41,14 @@ Required Properties:
       supplied they must be named "dclkin.x" with "x" being the input clock
       numerical index.
 
-  - vsps: A list of phandle and channel index tuples to the VSPs that handle
-    the memory interfaces for the DU channels. The phandle identifies the VSP
-    instance that serves the DU channel, and the channel index identifies the
-    LIF instance in that VSP.
+  - renesas,cmms: A list of phandles to the CMM instances present in the SoC,
+    one for each available DU channel. The property shall not be specified for
+    SoCs that do not provide any CMM (such as V3M and V3H).
+
+  - renesas,vsps: A list of phandle and channel index tuples to the VSPs that
+    handle the memory interfaces for the DU channels. The phandle identifies the
+    VSP instance that serves the DU channel, and the channel index identifies
+    the LIF instance in that VSP.
 
 Required nodes:
 
@@ -60,6 +65,7 @@ corresponding to each DU output.
  R8A7745 (RZ/G1E)       DPAD 0         DPAD 1         -              -
  R8A77470 (RZ/G1C)      DPAD 0         DPAD 1         LVDS 0         -
  R8A774A1 (RZ/G2M)      DPAD 0         HDMI 0         LVDS 0         -
+ R8A774B1 (RZ/G2N)      DPAD 0         HDMI 0         LVDS 0         -
  R8A774C0 (RZ/G2E)      DPAD 0         LVDS 0         LVDS 1         -
  R8A7779 (R-Car H1)     DPAD 0         DPAD 1         -              -
  R8A7790 (R-Car H2)     DPAD 0         LVDS 0         LVDS 1         -
@@ -90,7 +96,8 @@ Example: R8A7795 (R-Car H3) ES2.0 DU
 			 <&cpg CPG_MOD 722>,
 			 <&cpg CPG_MOD 721>;
 		clock-names = "du.0", "du.1", "du.2", "du.3";
-		vsps = <&vspd0 0>, <&vspd1 0>, <&vspd2 0>, <&vspd0 1>;
+		renesas,cmms = <&cmm0>, <&cmm1>, <&cmm2>, <&cmm3>;
+		renesas,vsps = <&vspd0 0>, <&vspd1 0>, <&vspd2 0>, <&vspd0 1>;
 
 		ports {
 			#address-cells = <1>;
diff --git a/Documentation/devicetree/bindings/display/rockchip/dw_mipi_dsi_rockchip.txt b/Documentation/devicetree/bindings/display/rockchip/dw_mipi_dsi_rockchip.txt
index ce4c1fc9116c..151be3bba06f 100644
--- a/Documentation/devicetree/bindings/display/rockchip/dw_mipi_dsi_rockchip.txt
+++ b/Documentation/devicetree/bindings/display/rockchip/dw_mipi_dsi_rockchip.txt
@@ -4,13 +4,16 @@ Rockchip specific extensions to the Synopsys Designware MIPI DSI
 Required properties:
 - #address-cells: Should be <1>.
 - #size-cells: Should be <0>.
-- compatible: "rockchip,rk3288-mipi-dsi", "snps,dw-mipi-dsi".
-	      "rockchip,rk3399-mipi-dsi", "snps,dw-mipi-dsi".
+- compatible: one of
+	"rockchip,px30-mipi-dsi", "snps,dw-mipi-dsi"
+	"rockchip,rk3288-mipi-dsi", "snps,dw-mipi-dsi"
+	"rockchip,rk3399-mipi-dsi", "snps,dw-mipi-dsi"
 - reg: Represent the physical address range of the controller.
 - interrupts: Represent the controller's interrupt to the CPU(s).
 - clocks, clock-names: Phandles to the controller's pll reference
-  clock(ref) and APB clock(pclk). For RK3399, a phy config clock
-  (phy_cfg) and a grf clock(grf) are required. As described in [1].
+  clock(ref) when using an internal dphy and APB clock(pclk).
+  For RK3399, a phy config clock (phy_cfg) and a grf clock(grf)
+  are required. As described in [1].
 - rockchip,grf: this soc should set GRF regs to mux vopl/vopb.
 - ports: contain a port node with endpoint definitions as defined in [2].
   For vopb,set the reg = <0> and set the reg = <1> for vopl.
@@ -18,6 +21,8 @@ Required properties:
 - video port 1 for either a panel or subsequent encoder
 
 Optional properties:
+- phys: from general PHY binding: the phandle for the PHY device.
+- phy-names: Should be "dphy" if phys references an external phy.
 - power-domains: a phandle to mipi dsi power domain node.
 - resets: list of phandle + reset specifier pairs, as described in [3].
 - reset-names: string reset name, must be "apb".
diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt b/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt
index 7849ff039229..aaf8c44cf90f 100644
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt
@@ -4,6 +4,7 @@ Rockchip RK3288 LVDS interface
 Required properties:
 - compatible: matching the soc type, one of
 	- "rockchip,rk3288-lvds";
+	- "rockchip,px30-lvds";
 
 - reg: physical base address of the controller and length
 	of memory mapped region.
@@ -18,6 +19,9 @@ Required properties:
 - rockchip,grf: phandle to the general register files syscon
 - rockchip,output: "rgb", "lvds" or "duallvds", This describes the output interface
 
+- phys: LVDS/DSI DPHY (px30 only)
+- phy-names: name of the PHY, must be "dphy" (px30 only)
+
 Optional properties:
 - pinctrl-names: must contain a "lcdc" entry.
 - pinctrl-0: pin control group to be used for this controller.
diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt
index 4f58c5a2d195..8b3a5f514205 100644
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt
@@ -20,6 +20,10 @@ Required properties:
 		"rockchip,rk3228-vop";
 		"rockchip,rk3328-vop";
 
+- reg: Must contain one entry corresponding to the base address and length
+	of the register space. Can optionally contain a second entry
+	corresponding to the CRTC gamma LUT address.
+
 - interrupts: should contain a list of all VOP IP block interrupts in the
 		 order: VSYNC, LCD_SYSTEM. The interrupt specifier
 		 format depends on the interrupt controller used.
@@ -48,7 +52,7 @@ Example:
 SoC specific DT entry:
 	vopb: vopb@ff930000 {
 		compatible = "rockchip,rk3288-vop";
-		reg = <0xff930000 0x19c>;
+		reg = <0x0 0xff930000 0x0 0x19c>, <0x0 0xff931000 0x0 0x1000>;
 		interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru ACLK_VOP0>, <&cru DCLK_VOP0>, <&cru HCLK_VOP0>;
 		clock-names = "aclk_vop", "dclk_vop", "hclk_vop";
diff --git a/Documentation/devicetree/bindings/display/st,stm32-dsi.yaml b/Documentation/devicetree/bindings/display/st,stm32-dsi.yaml
new file mode 100644
index 000000000000..3be76d15bf6c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/st,stm32-dsi.yaml
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/st,stm32-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 DSI host controller
+
+maintainers:
+  - Philippe Cornu <philippe.cornu@st.com>
+  - Yannick Fertre <yannick.fertre@st.com>
+
+description:
+  The STMicroelectronics STM32 DSI controller uses the Synopsys DesignWare MIPI-DSI host controller.
+
+properties:
+  compatible:
+    const: st,stm32-dsi
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Module Clock
+      - description: DSI bus clock
+      - description: Pixel clock
+    minItems: 2
+    maxItems: 3
+
+  clock-names:
+    items:
+      - const: pclk
+      - const: ref
+      - const: px_clk
+    minItems: 2
+    maxItems: 3
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    items:
+      - const: apb
+
+  phy-dsi-supply:
+    description:
+        Phandle of the regulator that provides the supply voltage.
+
+  ports:
+    type: object
+    description:
+      A node containing DSI input & output port nodes with endpoint
+      definitions as documented in
+      Documentation/devicetree/bindings/media/video-interfaces.txt
+      Documentation/devicetree/bindings/graph.txt
+    properties:
+      port@0:
+        type: object
+        description:
+          DSI input port node, connected to the ltdc rgb output port.
+
+      port@1:
+        type: object
+        description:
+          DSI output port node, connected to a panel or a bridge input port"
+
+patternProperties:
+  "^(panel|panel-dsi)@[0-9]$":
+    type: object
+    description:
+      A node containing the panel or bridge description as documented in
+      Documentation/devicetree/bindings/display/mipi-dsi-bus.txt
+    properties:
+      port:
+        type: object
+        description:
+          Panel or bridge port node, connected to the DSI output port (port@1)
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+required:
+  - "#address-cells"
+  - "#size-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    #include <dt-bindings/gpio/gpio.h>
+    dsi: dsi@5a000000 {
+        compatible = "st,stm32-dsi";
+        reg = <0x5a000000 0x800>;
+        clocks = <&rcc DSI_K>, <&clk_hse>, <&rcc DSI_PX>;
+        clock-names = "pclk", "ref", "px_clk";
+        resets = <&rcc DSI_R>;
+        reset-names = "apb";
+        phy-dsi-supply = <&reg18>;
+
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        ports {
+              #address-cells = <1>;
+              #size-cells = <0>;
+
+              port@0 {
+                    reg = <0>;
+                    dsi_in: endpoint {
+                        remote-endpoint = <&ltdc_ep1_out>;
+                    };
+              };
+
+              port@1 {
+                    reg = <1>;
+                    dsi_out: endpoint {
+                        remote-endpoint = <&panel_in>;
+                    };
+              };
+        };
+
+        panel-dsi@0 {
+              compatible = "orisetech,otm8009a";
+              reg = <0>;
+              reset-gpios = <&gpioe 4 GPIO_ACTIVE_LOW>;
+              power-supply = <&v3v3>;
+
+              port {
+                    panel_in: endpoint {
+                        remote-endpoint = <&dsi_out>;
+                    };
+              };
+        };
+    };
+
+...
+
+
diff --git a/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt b/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
deleted file mode 100644
index 60c54da4e526..000000000000
--- a/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
+++ /dev/null
@@ -1,144 +0,0 @@
-* STMicroelectronics STM32 lcd-tft display controller
-
-- ltdc: lcd-tft display controller host
-  Required properties:
-  - compatible: "st,stm32-ltdc"
-  - reg: Physical base address of the IP registers and length of memory mapped region.
-  - clocks: A list of phandle + clock-specifier pairs, one for each
-    entry in 'clock-names'.
-  - clock-names: A list of clock names. For ltdc it should contain:
-      - "lcd" for the clock feeding the output pixel clock & IP clock.
-  - resets: reset to be used by the device (defined by use of RCC macro).
-  Required nodes:
-  - Video port for DPI RGB output: ltdc has one video port with up to 2
-    endpoints:
-      - for external dpi rgb panel or bridge, using gpios.
-      - for internal dpi input of the MIPI DSI host controller.
-      Note: These 2 endpoints cannot be activated simultaneously.
-
-* STMicroelectronics STM32 DSI controller specific extensions to Synopsys
-  DesignWare MIPI DSI host controller
-
-The STMicroelectronics STM32 DSI controller uses the Synopsys DesignWare MIPI
-DSI host controller. For all mandatory properties & nodes, please refer
-to the related documentation in [5].
-
-Mandatory properties specific to STM32 DSI:
-- #address-cells: Should be <1>.
-- #size-cells: Should be <0>.
-- compatible: "st,stm32-dsi".
-- clock-names:
-  - phy pll reference clock string name, must be "ref".
-- resets: see [5].
-- reset-names: see [5].
-
-Mandatory nodes specific to STM32 DSI:
-- ports: A node containing DSI input & output port nodes with endpoint
-  definitions as documented in [3] & [4].
-  - port@0: DSI input port node, connected to the ltdc rgb output port.
-  - port@1: DSI output port node, connected to a panel or a bridge input port.
-- panel or bridge node: A node containing the panel or bridge description as
-  documented in [6].
-  - port: panel or bridge port node, connected to the DSI output port (port@1).
-Optional properties:
-- phy-dsi-supply: phandle of the regulator that provides the supply voltage.
-
-Note: You can find more documentation in the following references
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/reset/reset.txt
-[3] Documentation/devicetree/bindings/media/video-interfaces.txt
-[4] Documentation/devicetree/bindings/graph.txt
-[5] Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt
-[6] Documentation/devicetree/bindings/display/mipi-dsi-bus.txt
-
-Example 1: RGB panel
-/ {
-	...
-	soc {
-	...
-		ltdc: display-controller@40016800 {
-			compatible = "st,stm32-ltdc";
-			reg = <0x40016800 0x200>;
-			interrupts = <88>, <89>;
-			resets = <&rcc STM32F4_APB2_RESET(LTDC)>;
-			clocks = <&rcc 1 CLK_LCD>;
-			clock-names = "lcd";
-
-			port {
-				ltdc_out_rgb: endpoint {
-				};
-			};
-		};
-	};
-};
-
-Example 2: DSI panel
-
-/ {
-	...
-	soc {
-	...
-		ltdc: display-controller@40016800 {
-			compatible = "st,stm32-ltdc";
-			reg = <0x40016800 0x200>;
-			interrupts = <88>, <89>;
-			resets = <&rcc STM32F4_APB2_RESET(LTDC)>;
-			clocks = <&rcc 1 CLK_LCD>;
-			clock-names = "lcd";
-
-			port {
-				ltdc_out_dsi: endpoint {
-					remote-endpoint = <&dsi_in>;
-				};
-			};
-		};
-
-
-		dsi: dsi@40016c00 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "st,stm32-dsi";
-			reg = <0x40016c00 0x800>;
-			clocks = <&rcc 1 CLK_F469_DSI>, <&clk_hse>;
-			clock-names = "pclk", "ref";
-			resets = <&rcc STM32F4_APB2_RESET(DSI)>;
-			reset-names = "apb";
-			phy-dsi-supply = <&reg18>;
-
-			ports {
-				#address-cells = <1>;
-				#size-cells = <0>;
-
-				port@0 {
-					reg = <0>;
-					dsi_in: endpoint {
-						remote-endpoint = <&ltdc_out_dsi>;
-					};
-				};
-
-				port@1 {
-					reg = <1>;
-					dsi_out: endpoint {
-						remote-endpoint = <&dsi_in_panel>;
-					};
-				};
-
-			};
-
-			panel-dsi@0 {
-				reg = <0>; /* dsi virtual channel (0..3) */
-				compatible = ...;
-				enable-gpios = ...;
-
-				port {
-					dsi_in_panel: endpoint {
-						remote-endpoint = <&dsi_out>;
-					};
-				};
-
-			};
-
-		};
-
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml b/Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml
new file mode 100644
index 000000000000..bf8ad916e9b0
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/st,stm32-ltdc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 lcd-tft display controller
+
+maintainers:
+  - Philippe Cornu <philippe.cornu@st.com>
+  - Yannick Fertre <yannick.fertre@st.com>
+
+properties:
+  compatible:
+    const: st,stm32-ltdc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: events interrupt line.
+      - description: errors interrupt line.
+    minItems: 1
+    maxItems: 2
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: lcd
+
+  resets:
+    maxItems: 1
+
+  port:
+    type: object
+    description:
+      "Video port for DPI RGB output.
+      ltdc has one video port with up to 2 endpoints:
+      - for external dpi rgb panel or bridge, using gpios.
+      - for internal dpi input of the MIPI DSI host controller.
+      Note: These 2 endpoints cannot be activated simultaneously.
+      Please refer to the bindings defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt."
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    ltdc: display-controller@40016800 {
+        compatible = "st,stm32-ltdc";
+        reg = <0x5a001000 0x400>;
+        interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&rcc LTDC_PX>;
+        clock-names = "lcd";
+        resets = <&rcc LTDC_R>;
+
+        port {
+             ltdc_out_dsi: endpoint {
+                     remote-endpoint = <&dsi_in>;
+             };
+        };
+    };
+
+...
+
diff --git a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
deleted file mode 100644
index 31ab72cba3d4..000000000000
--- a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
+++ /dev/null
@@ -1,637 +0,0 @@
-Allwinner A10 Display Pipeline
-==============================
-
-The Allwinner A10 Display pipeline is composed of several components
-that are going to be documented below:
-
-For all connections between components up to the TCONs in the display
-pipeline, when there are multiple components of the same type at the
-same depth, the local endpoint ID must be the same as the remote
-component's index. For example, if the remote endpoint is Frontend 1,
-then the local endpoint ID must be 1.
-
-    Frontend 0  [0] ------- [0]  Backend 0  [0] ------- [0]  TCON 0
-		[1] --   -- [1]             [1] --   -- [1]
-		      \ /                         \ /
-		       X                           X
-		      / \                         / \
-		[0] --   -- [0]             [0] --   -- [0]
-    Frontend 1  [1] ------- [1]  Backend 1  [1] ------- [1]  TCON 1
-
-For a two pipeline system such as the one depicted above, the lines
-represent the connections between the components, while the numbers
-within the square brackets corresponds to the ID of the local endpoint.
-
-The same rule also applies to DE 2.0 mixer-TCON connections:
-
-    Mixer 0  [0] ----------- [0]  TCON 0
-	     [1] ----   ---- [1]
-		     \ /
-		      X
-		     / \
-	     [0] ----   ---- [0]
-    Mixer 1  [1] ----------- [1]  TCON 1
-
-HDMI Encoder
-------------
-
-The HDMI Encoder supports the HDMI video and audio outputs, and does
-CEC. It is one end of the pipeline.
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun4i-a10-hdmi
-    * allwinner,sun5i-a10s-hdmi
-    * allwinner,sun6i-a31-hdmi
-  - reg: base address and size of memory-mapped region
-  - interrupts: interrupt associated to this IP
-  - clocks: phandles to the clocks feeding the HDMI encoder
-    * ahb: the HDMI interface clock
-    * mod: the HDMI module clock
-    * ddc: the HDMI ddc clock (A31 only)
-    * pll-0: the first video PLL
-    * pll-1: the second video PLL
-  - clock-names: the clock names mentioned above
-  - resets: phandle to the reset control for the HDMI encoder (A31 only)
-  - dmas: phandles to the DMA channels used by the HDMI encoder
-    * ddc-tx: The channel for DDC transmission
-    * ddc-rx: The channel for DDC reception
-    * audio-tx: The channel used for audio transmission
-  - dma-names: the channel names mentioned above
-
-  - ports: A ports node with endpoint definitions as defined in
-    Documentation/devicetree/bindings/media/video-interfaces.txt. The
-    first port should be the input endpoint. The second should be the
-    output, usually to an HDMI connector.
-
-DWC HDMI TX Encoder
--------------------
-
-The HDMI transmitter is a Synopsys DesignWare HDMI 1.4 TX controller IP
-with Allwinner's own PHY IP. It supports audio and video outputs and CEC.
-
-These DT bindings follow the Synopsys DWC HDMI TX bindings defined in
-Documentation/devicetree/bindings/display/bridge/dw_hdmi.txt with the
-following device-specific properties.
-
-Required properties:
-
-  - compatible: value must be one of:
-    * "allwinner,sun8i-a83t-dw-hdmi"
-    * "allwinner,sun50i-a64-dw-hdmi", "allwinner,sun8i-a83t-dw-hdmi"
-    * "allwinner,sun50i-h6-dw-hdmi"
-  - reg: base address and size of memory-mapped region
-  - reg-io-width: See dw_hdmi.txt. Shall be 1.
-  - interrupts: HDMI interrupt number
-  - clocks: phandles to the clocks feeding the HDMI encoder
-    * iahb: the HDMI bus clock
-    * isfr: the HDMI register clock
-    * tmds: TMDS clock
-    * cec: HDMI CEC clock (H6 only)
-    * hdcp: HDCP clock (H6 only)
-    * hdcp-bus: HDCP bus clock (H6 only)
-  - clock-names: the clock names mentioned above
-  - resets:
-    * ctrl: HDMI controller reset
-    * hdcp: HDCP reset (H6 only)
-  - reset-names: reset names mentioned above
-  - phys: phandle to the DWC HDMI PHY
-  - phy-names: must be "phy"
-
-  - ports: A ports node with endpoint definitions as defined in
-    Documentation/devicetree/bindings/media/video-interfaces.txt. The
-    first port should be the input endpoint. The second should be the
-    output, usually to an HDMI connector.
-
-Optional properties:
-  - hvcc-supply: the VCC power supply of the controller
-
-DWC HDMI PHY
-------------
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun8i-a83t-hdmi-phy
-    * allwinner,sun8i-h3-hdmi-phy
-    * allwinner,sun8i-r40-hdmi-phy
-    * allwinner,sun50i-a64-hdmi-phy
-    * allwinner,sun50i-h6-hdmi-phy
-  - reg: base address and size of memory-mapped region
-  - clocks: phandles to the clocks feeding the HDMI PHY
-    * bus: the HDMI PHY interface clock
-    * mod: the HDMI PHY module clock
-  - clock-names: the clock names mentioned above
-  - resets: phandle to the reset controller driving the PHY
-  - reset-names: must be "phy"
-
-H3, A64 and R40 HDMI PHY require additional clocks:
-  - pll-0: parent of phy clock
-  - pll-1: second possible phy clock parent (A64/R40 only)
-
-TV Encoder
-----------
-
-The TV Encoder supports the composite and VGA output. It is one end of
-the pipeline.
-
-Required properties:
- - compatible: value should be "allwinner,sun4i-a10-tv-encoder".
- - reg: base address and size of memory-mapped region
- - clocks: the clocks driving the TV encoder
- - resets: phandle to the reset controller driving the encoder
-
-- ports: A ports node with endpoint definitions as defined in
-  Documentation/devicetree/bindings/media/video-interfaces.txt. The
-  first port should be the input endpoint.
-
-TCON
-----
-
-The TCON acts as a timing controller for RGB, LVDS and TV interfaces.
-
-Required properties:
- - compatible: value must be either:
-   * allwinner,sun4i-a10-tcon
-   * allwinner,sun5i-a13-tcon
-   * allwinner,sun6i-a31-tcon
-   * allwinner,sun6i-a31s-tcon
-   * allwinner,sun7i-a20-tcon
-   * allwinner,sun8i-a23-tcon
-   * allwinner,sun8i-a33-tcon
-   * allwinner,sun8i-a83t-tcon-lcd
-   * allwinner,sun8i-a83t-tcon-tv
-   * allwinner,sun8i-r40-tcon-tv
-   * allwinner,sun8i-v3s-tcon
-   * allwinner,sun9i-a80-tcon-lcd
-   * allwinner,sun9i-a80-tcon-tv
-   * "allwinner,sun50i-a64-tcon-lcd", "allwinner,sun8i-a83t-tcon-lcd"
-   * "allwinner,sun50i-a64-tcon-tv", "allwinner,sun8i-a83t-tcon-tv"
-   * allwinner,sun50i-h6-tcon-tv, allwinner,sun8i-r40-tcon-tv
- - reg: base address and size of memory-mapped region
- - interrupts: interrupt associated to this IP
- - clocks: phandles to the clocks feeding the TCON.
-   - 'ahb': the interface clocks
-   - 'tcon-ch0': The clock driving the TCON channel 0, if supported
- - resets: phandles to the reset controllers driving the encoder
-   - "lcd": the reset line for the TCON
-   - "edp": the reset line for the eDP block (A80 only)
-
- - clock-names: the clock names mentioned above
- - reset-names: the reset names mentioned above
- - clock-output-names: Name of the pixel clock created, if TCON supports
-   channel 0.
-
-- ports: A ports node with endpoint definitions as defined in
-  Documentation/devicetree/bindings/media/video-interfaces.txt. The
-  first port should be the input endpoint, the second one the output
-
-  The output may have multiple endpoints. TCON can have 1 or 2 channels,
-  usually with the first channel being used for the panels interfaces
-  (RGB, LVDS, etc.), and the second being used for the outputs that
-  require another controller (TV Encoder, HDMI, etc.). The endpoints
-  will take an extra property, allwinner,tcon-channel, to specify the
-  channel the endpoint is associated to. If that property is not
-  present, the endpoint number will be used as the channel number.
-
-For TCONs with channel 0, there is one more clock required:
-   - 'tcon-ch0': The clock driving the TCON channel 0
-For TCONs with channel 1, there is one more clock required:
-   - 'tcon-ch1': The clock driving the TCON channel 1
-
-When TCON support LVDS (all TCONs except TV TCONs on A83T, R40 and those found
-in A13, H3, H5 and V3s SoCs), you need one more reset line:
-   - 'lvds': The reset line driving the LVDS logic
-
-And on the A23, A31, A31s and A33, you need one more clock line:
-   - 'lvds-alt': An alternative clock source, separate from the TCON channel 0
-                 clock, that can be used to drive the LVDS clock
-
-TCON TOP
---------
-
-TCON TOPs main purpose is to configure whole display pipeline. It determines
-relationships between mixers and TCONs, selects source TCON for HDMI, muxes
-LCD and TV encoder GPIO output, selects TV encoder clock source and contains
-additional TV TCON and DSI gates.
-
-It allows display pipeline to be configured in very different ways:
-
-                                / LCD0/LVDS0
-                 / [0] TCON-LCD0
-                 |              \ MIPI DSI
- mixer0          |
-        \        / [1] TCON-LCD1 - LCD1/LVDS1
-         TCON-TOP
-        /        \ [2] TCON-TV0 [0] - TVE0/RGB
- mixer1          |                  \
-                 |                   TCON-TOP - HDMI
-                 |                  /
-                 \ [3] TCON-TV1 [1] - TVE1/RGB
-
-Note that both TCON TOP references same physical unit. Both mixers can be
-connected to any TCON. Not all TCON TOP variants support all features.
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun8i-r40-tcon-top
-    * allwinner,sun50i-h6-tcon-top
-  - reg: base address and size of the memory-mapped region.
-  - clocks: phandle to the clocks feeding the TCON TOP
-    * bus: TCON TOP interface clock
-    * tcon-tv0: TCON TV0 clock
-    * tve0: TVE0 clock (R40 only)
-    * tcon-tv1: TCON TV1 clock (R40 only)
-    * tve1: TVE0 clock (R40 only)
-    * dsi: MIPI DSI clock (R40 only)
-  - clock-names: clock name mentioned above
-  - resets: phandle to the reset line driving the TCON TOP
-  - #clock-cells : must contain 1
-  - clock-output-names: Names of clocks created for TCON TV0 channel clock,
-    TCON TV1 channel clock (R40 only) and DSI channel clock (R40 only), in
-    that order.
-
-- ports: A ports node with endpoint definitions as defined in
-    Documentation/devicetree/bindings/media/video-interfaces.txt. 6 ports should
-    be defined:
-    * port 0 is input for mixer0 mux
-    * port 1 is output for mixer0 mux
-    * port 2 is input for mixer1 mux
-    * port 3 is output for mixer1 mux
-    * port 4 is input for HDMI mux
-    * port 5 is output for HDMI mux
-    All output endpoints for mixer muxes and input endpoints for HDMI mux should
-    have reg property with the id of the target TCON, as shown in above graph
-    (0-3 for mixer muxes and 0-1 for HDMI mux). All ports should have only one
-    endpoint connected to remote endpoint.
-
-DRC
----
-
-The DRC (Dynamic Range Controller), found in the latest Allwinner SoCs
-(A31, A23, A33, A80), allows to dynamically adjust pixel
-brightness/contrast based on histogram measurements for LCD content
-adaptive backlight control.
-
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun6i-a31-drc
-    * allwinner,sun6i-a31s-drc
-    * allwinner,sun8i-a23-drc
-    * allwinner,sun8i-a33-drc
-    * allwinner,sun9i-a80-drc
-  - reg: base address and size of the memory-mapped region.
-  - interrupts: interrupt associated to this IP
-  - clocks: phandles to the clocks feeding the DRC
-    * ahb: the DRC interface clock
-    * mod: the DRC module clock
-    * ram: the DRC DRAM clock
-  - clock-names: the clock names mentioned above
-  - resets: phandles to the reset line driving the DRC
-
-- ports: A ports node with endpoint definitions as defined in
-  Documentation/devicetree/bindings/media/video-interfaces.txt. The
-  first port should be the input endpoints, the second one the outputs
-
-Display Engine Backend
-----------------------
-
-The display engine backend exposes layers and sprites to the
-system.
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun4i-a10-display-backend
-    * allwinner,sun5i-a13-display-backend
-    * allwinner,sun6i-a31-display-backend
-    * allwinner,sun7i-a20-display-backend
-    * allwinner,sun8i-a23-display-backend
-    * allwinner,sun8i-a33-display-backend
-    * allwinner,sun9i-a80-display-backend
-  - reg: base address and size of the memory-mapped region.
-  - interrupts: interrupt associated to this IP
-  - clocks: phandles to the clocks feeding the frontend and backend
-    * ahb: the backend interface clock
-    * mod: the backend module clock
-    * ram: the backend DRAM clock
-  - clock-names: the clock names mentioned above
-  - resets: phandles to the reset controllers driving the backend
-
-- ports: A ports node with endpoint definitions as defined in
-  Documentation/devicetree/bindings/media/video-interfaces.txt. The
-  first port should be the input endpoints, the second one the output
-
-On the A33, some additional properties are required:
-  - reg needs to have an additional region corresponding to the SAT
-  - reg-names need to be set, with "be" and "sat"
-  - clocks and clock-names need to have a phandle to the SAT bus
-    clocks, whose name will be "sat"
-  - resets and reset-names need to have a phandle to the SAT bus
-    resets, whose name will be "sat"
-
-DEU
----
-
-The DEU (Detail Enhancement Unit), found in the Allwinner A80 SoC,
-can sharpen the display content in both luma and chroma channels.
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun9i-a80-deu
-  - reg: base address and size of the memory-mapped region.
-  - interrupts: interrupt associated to this IP
-  - clocks: phandles to the clocks feeding the DEU
-    * ahb: the DEU interface clock
-    * mod: the DEU module clock
-    * ram: the DEU DRAM clock
-  - clock-names: the clock names mentioned above
-  - resets: phandles to the reset line driving the DEU
-
-- ports: A ports node with endpoint definitions as defined in
-  Documentation/devicetree/bindings/media/video-interfaces.txt. The
-  first port should be the input endpoints, the second one the outputs
-
-Display Engine Frontend
------------------------
-
-The display engine frontend does formats conversion, scaling,
-deinterlacing and color space conversion.
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun4i-a10-display-frontend
-    * allwinner,sun5i-a13-display-frontend
-    * allwinner,sun6i-a31-display-frontend
-    * allwinner,sun7i-a20-display-frontend
-    * allwinner,sun8i-a23-display-frontend
-    * allwinner,sun8i-a33-display-frontend
-    * allwinner,sun9i-a80-display-frontend
-  - reg: base address and size of the memory-mapped region.
-  - interrupts: interrupt associated to this IP
-  - clocks: phandles to the clocks feeding the frontend and backend
-    * ahb: the backend interface clock
-    * mod: the backend module clock
-    * ram: the backend DRAM clock
-  - clock-names: the clock names mentioned above
-  - resets: phandles to the reset controllers driving the backend
-
-- ports: A ports node with endpoint definitions as defined in
-  Documentation/devicetree/bindings/media/video-interfaces.txt. The
-  first port should be the input endpoints, the second one the outputs
-
-Display Engine 2.0 Mixer
-------------------------
-
-The DE2 mixer have many functionalities, currently only layer blending is
-supported.
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun8i-a83t-de2-mixer-0
-    * allwinner,sun8i-a83t-de2-mixer-1
-    * allwinner,sun8i-h3-de2-mixer-0
-    * allwinner,sun8i-r40-de2-mixer-0
-    * allwinner,sun8i-r40-de2-mixer-1
-    * allwinner,sun8i-v3s-de2-mixer
-    * allwinner,sun50i-a64-de2-mixer-0
-    * allwinner,sun50i-a64-de2-mixer-1
-    * allwinner,sun50i-h6-de3-mixer-0
-  - reg: base address and size of the memory-mapped region.
-  - clocks: phandles to the clocks feeding the mixer
-    * bus: the mixer interface clock
-    * mod: the mixer module clock
-  - clock-names: the clock names mentioned above
-  - resets: phandles to the reset controllers driving the mixer
-
-- ports: A ports node with endpoint definitions as defined in
-  Documentation/devicetree/bindings/media/video-interfaces.txt. The
-  first port should be the input endpoints, the second one the output
-
-
-Display Engine Pipeline
------------------------
-
-The display engine pipeline (and its entry point, since it can be
-either directly the backend or the frontend) is represented as an
-extra node.
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun4i-a10-display-engine
-    * allwinner,sun5i-a10s-display-engine
-    * allwinner,sun5i-a13-display-engine
-    * allwinner,sun6i-a31-display-engine
-    * allwinner,sun6i-a31s-display-engine
-    * allwinner,sun7i-a20-display-engine
-    * allwinner,sun8i-a23-display-engine
-    * allwinner,sun8i-a33-display-engine
-    * allwinner,sun8i-a83t-display-engine
-    * allwinner,sun8i-h3-display-engine
-    * allwinner,sun8i-r40-display-engine
-    * allwinner,sun8i-v3s-display-engine
-    * allwinner,sun9i-a80-display-engine
-    * allwinner,sun50i-a64-display-engine
-    * allwinner,sun50i-h6-display-engine
-
-  - allwinner,pipelines: list of phandle to the display engine
-    frontends (DE 1.0) or mixers (DE 2.0/3.0) available.
-
-Example:
-
-panel: panel {
-	compatible = "olimex,lcd-olinuxino-43-ts";
-	#address-cells = <1>;
-	#size-cells = <0>;
-
-	port {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		panel_input: endpoint {
-			remote-endpoint = <&tcon0_out_panel>;
-		};
-	};
-};
-
-connector {
-	compatible = "hdmi-connector";
-	type = "a";
-
-	port {
-		hdmi_con_in: endpoint {
-			remote-endpoint = <&hdmi_out_con>;
-		};
-	};
-};
-
-hdmi: hdmi@1c16000 {
-	compatible = "allwinner,sun5i-a10s-hdmi";
-	reg = <0x01c16000 0x1000>;
-	interrupts = <58>;
-	clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>,
-		 <&ccu CLK_PLL_VIDEO0_2X>,
-		 <&ccu CLK_PLL_VIDEO1_2X>;
-	clock-names = "ahb", "mod", "pll-0", "pll-1";
-	dmas = <&dma SUN4I_DMA_NORMAL 16>,
-	       <&dma SUN4I_DMA_NORMAL 16>,
-	       <&dma SUN4I_DMA_DEDICATED 24>;
-	dma-names = "ddc-tx", "ddc-rx", "audio-tx";
-
-	ports {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		port@0 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <0>;
-
-			hdmi_in_tcon0: endpoint {
-				remote-endpoint = <&tcon0_out_hdmi>;
-			};
-		};
-
-		port@1 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <1>;
-
-			hdmi_out_con: endpoint {
-				remote-endpoint = <&hdmi_con_in>;
-			};
-		};
-	};
-};
-
-tve0: tv-encoder@1c0a000 {
-	compatible = "allwinner,sun4i-a10-tv-encoder";
-	reg = <0x01c0a000 0x1000>;
-	clocks = <&ahb_gates 34>;
-	resets = <&tcon_ch0_clk 0>;
-
-	port {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		tve0_in_tcon0: endpoint@0 {
-			reg = <0>;
-			remote-endpoint = <&tcon0_out_tve0>;
-		};
-	};
-};
-
-tcon0: lcd-controller@1c0c000 {
-	compatible = "allwinner,sun5i-a13-tcon";
-	reg = <0x01c0c000 0x1000>;
-	interrupts = <44>;
-	resets = <&tcon_ch0_clk 1>;
-	reset-names = "lcd";
-	clocks = <&ahb_gates 36>,
-		 <&tcon_ch0_clk>,
-		 <&tcon_ch1_clk>;
-	clock-names = "ahb",
-		      "tcon-ch0",
-		      "tcon-ch1";
-	clock-output-names = "tcon-pixel-clock";
-
-	ports {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		tcon0_in: port@0 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <0>;
-
-			tcon0_in_be0: endpoint@0 {
-				reg = <0>;
-				remote-endpoint = <&be0_out_tcon0>;
-			};
-		};
-
-		tcon0_out: port@1 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <1>;
-
-			tcon0_out_panel: endpoint@0 {
-				reg = <0>;
-				remote-endpoint = <&panel_input>;
-			};
-
-			tcon0_out_tve0: endpoint@1 {
-				reg = <1>;
-				remote-endpoint = <&tve0_in_tcon0>;
-			};
-		};
-	};
-};
-
-fe0: display-frontend@1e00000 {
-	compatible = "allwinner,sun5i-a13-display-frontend";
-	reg = <0x01e00000 0x20000>;
-	interrupts = <47>;
-	clocks = <&ahb_gates 46>, <&de_fe_clk>,
-		 <&dram_gates 25>;
-	clock-names = "ahb", "mod",
-		      "ram";
-	resets = <&de_fe_clk>;
-
-	ports {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		fe0_out: port@1 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <1>;
-
-			fe0_out_be0: endpoint {
-				remote-endpoint = <&be0_in_fe0>;
-			};
-		};
-	};
-};
-
-be0: display-backend@1e60000 {
-	compatible = "allwinner,sun5i-a13-display-backend";
-	reg = <0x01e60000 0x10000>;
-	interrupts = <47>;
-	clocks = <&ahb_gates 44>, <&de_be_clk>,
-		 <&dram_gates 26>;
-	clock-names = "ahb", "mod",
-		      "ram";
-	resets = <&de_be_clk>;
-
-	ports {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		be0_in: port@0 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <0>;
-
-			be0_in_fe0: endpoint@0 {
-				reg = <0>;
-				remote-endpoint = <&fe0_out_be0>;
-			};
-		};
-
-		be0_out: port@1 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <1>;
-
-			be0_out_tcon0: endpoint@0 {
-				reg = <0>;
-				remote-endpoint = <&tcon0_in_be0>;
-			};
-		};
-	};
-};
-
-display-engine {
-	compatible = "allwinner,sun5i-a13-display-engine";
-	allwinner,pipelines = <&fe0>;
-};
diff --git a/Documentation/devicetree/bindings/display/tilcdc/tfp410.txt b/Documentation/devicetree/bindings/display/tilcdc/tfp410.txt
deleted file mode 100644
index a58ae7756fc6..000000000000
--- a/Documentation/devicetree/bindings/display/tilcdc/tfp410.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-Device-Tree bindings for tilcdc DRM TFP410 output driver
-
-Required properties:
- - compatible: value should be "ti,tilcdc,tfp410".
- - i2c: the phandle for the i2c device to use for DDC
-
-Recommended properties:
- - pinctrl-names, pinctrl-0: the pincontrol settings to configure
-   muxing properly for pins that connect to TFP410 device
- - powerdn-gpio: the powerdown GPIO, pulled low to power down the
-   TFP410 device (for DPMS_OFF)
-
-Example:
-
-	dvicape {
-		compatible = "ti,tilcdc,tfp410";
-		i2c = <&i2c2>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&bone_dvi_cape_dvi_00A1_pins>;
-		powerdn-gpio = <&gpio2 31 0>;
-	};
diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml
index 15abc0f9429f..83808199657b 100644
--- a/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 DMA Controller Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 allOf:
   - $ref: "dma-controller.yaml#"
diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml
index 4cb9d6b93138..9e53472be194 100644
--- a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml
@@ -8,7 +8,7 @@ title: Allwinner A64 DMA Controller Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 allOf:
   - $ref: "dma-controller.yaml#"
@@ -68,9 +68,7 @@ else:
     clocks:
       maxItems: 1
 
-# FIXME: We should set it, but it would report all the generic
-# properties as additional properties.
-# additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun6i-a31-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun6i-a31-dma.yaml
index 740b7f9b535b..c1676b96daac 100644
--- a/Documentation/devicetree/bindings/dma/allwinner,sun6i-a31-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/allwinner,sun6i-a31-dma.yaml
@@ -8,7 +8,7 @@ title: Allwinner A31 DMA Controller Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 allOf:
   - $ref: "dma-controller.yaml#"
diff --git a/Documentation/devicetree/bindings/dma/dma-common.yaml b/Documentation/devicetree/bindings/dma/dma-common.yaml
index ed0a49a6f020..02a34ba2b49b 100644
--- a/Documentation/devicetree/bindings/dma/dma-common.yaml
+++ b/Documentation/devicetree/bindings/dma/dma-common.yaml
@@ -25,11 +25,18 @@ properties:
       Used to provide DMA controller specific information.
 
   dma-channel-mask:
-    $ref: /schemas/types.yaml#definitions/uint32
     description:
       Bitmask of available DMA channels in ascending order that are
       not reserved by firmware and are available to the
       kernel. i.e. first channel corresponds to LSB.
+      The first item in the array is for channels 0-31, the second is for
+      channels 32-63, etc.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+    items:
+      minItems: 1
+      # Should be enough
+      maxItems: 255
 
   dma-channels:
     $ref: /schemas/types.yaml#definitions/uint32
diff --git a/Documentation/devicetree/bindings/dma/fsl-edma.txt b/Documentation/devicetree/bindings/dma/fsl-edma.txt
index 29dd3ccb1235..e77b08ebcd06 100644
--- a/Documentation/devicetree/bindings/dma/fsl-edma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-edma.txt
@@ -10,6 +10,7 @@ Required properties:
 - compatible :
 	- "fsl,vf610-edma" for eDMA used similar to that on Vybrid vf610 SoC
 	- "fsl,imx7ulp-edma" for eDMA2 used similar to that on i.mx7ulp
+	- "fsl,fsl,ls1028a-edma" for eDMA used similar to that on Vybrid vf610 SoC
 - reg : Specifies base physical address(s) and size of the eDMA registers.
 	The 1st region is eDMA control register's address and size.
 	The 2nd and the 3rd regions are programmable channel multiplexing
diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
index 9d8bbac27d8b..c9e97409e853 100644
--- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
@@ -10,6 +10,9 @@ Required properties:
       "fsl,imx6q-sdma"
       "fsl,imx7d-sdma"
       "fsl,imx8mq-sdma"
+      "fsl,imx8mm-sdma"
+      "fsl,imx8mn-sdma"
+      "fsl,imx8mp-sdma"
   The -to variants should be preferred since they allow to determine the
   correct ROM script addresses needed for the driver to work without additional
   firmware.
diff --git a/Documentation/devicetree/bindings/dma/jz4780-dma.txt b/Documentation/devicetree/bindings/dma/jz4780-dma.txt
index 636fcb26b164..3459e77be294 100644
--- a/Documentation/devicetree/bindings/dma/jz4780-dma.txt
+++ b/Documentation/devicetree/bindings/dma/jz4780-dma.txt
@@ -1,4 +1,4 @@
-* Ingenic JZ4780 DMA Controller
+* Ingenic XBurst DMA Controller
 
 Required properties:
 
@@ -7,10 +7,13 @@ Required properties:
   * ingenic,jz4725b-dma
   * ingenic,jz4770-dma
   * ingenic,jz4780-dma
+  * ingenic,x1000-dma
+  * ingenic,x1830-dma
 - reg: Should contain the DMA channel registers location and length, followed
   by the DMA controller registers location and length.
 - interrupts: Should contain the interrupt specifier of the DMA controller.
-- clocks: Should contain a clock specifier for the JZ4780 PDMA clock.
+- clocks: Should contain a clock specifier for the JZ4780/X1000/X1830 PDMA
+  clock.
 - #dma-cells: Must be <2>. Number of integer cells in the dmas property of
   DMA clients (see below).
 
diff --git a/Documentation/devicetree/bindings/dma/milbeaut-m10v-hdmac.txt b/Documentation/devicetree/bindings/dma/milbeaut-m10v-hdmac.txt
new file mode 100644
index 000000000000..1f0875bd5abc
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/milbeaut-m10v-hdmac.txt
@@ -0,0 +1,32 @@
+* Milbeaut AHB DMA Controller
+
+Milbeaut AHB DMA controller has transfer capability below.
+ - device to memory transfer
+ - memory to device transfer
+
+Required property:
+- compatible:       Should be  "socionext,milbeaut-m10v-hdmac"
+- reg:              Should contain DMA registers location and length.
+- interrupts:       Should contain all of the per-channel DMA interrupts.
+                     Number of channels is configurable - 2, 4 or 8, so
+                     the number of interrupts specified should be {2,4,8}.
+- #dma-cells:       Should be 1. Specify the ID of the slave.
+- clocks:           Phandle to the clock used by the HDMAC module.
+
+
+Example:
+
+	hdmac1: dma-controller@1e110000 {
+		compatible = "socionext,milbeaut-m10v-hdmac";
+		reg = <0x1e110000 0x10000>;
+		interrupts = <0 132 4>,
+			     <0 133 4>,
+			     <0 134 4>,
+			     <0 135 4>,
+			     <0 136 4>,
+			     <0 137 4>,
+			     <0 138 4>,
+			     <0 139 4>;
+		#dma-cells = <1>;
+		clocks = <&dummy_clk>;
+	};
diff --git a/Documentation/devicetree/bindings/dma/milbeaut-m10v-xdmac.txt b/Documentation/devicetree/bindings/dma/milbeaut-m10v-xdmac.txt
new file mode 100644
index 000000000000..305791804062
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/milbeaut-m10v-xdmac.txt
@@ -0,0 +1,24 @@
+* Milbeaut AXI DMA Controller
+
+Milbeaut AXI DMA controller has only memory to memory transfer capability.
+
+* DMA controller
+
+Required property:
+- compatible: 	Should be  "socionext,milbeaut-m10v-xdmac"
+- reg:		Should contain DMA registers location and length.
+- interrupts: 	Should contain all of the per-channel DMA interrupts.
+                Number of channels is configurable - 2, 4 or 8, so
+                the number of interrupts specified should be {2,4,8}.
+- #dma-cells: 	Should be 1.
+
+Example:
+	xdmac0: dma-controller@1c250000 {
+		compatible = "socionext,milbeaut-m10v-xdmac";
+		reg = <0x1c250000 0x1000>;
+		interrupts = <0 17 0x4>,
+			     <0 18 0x4>,
+			     <0 19 0x4>,
+			     <0 20 0x4>;
+		#dma-cells = <1>;
+	};
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
index 5a512c5ea76a..b7f81c63be8b 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
@@ -21,6 +21,7 @@ Required Properties:
 		- "renesas,dmac-r8a7745" (RZ/G1E)
 		- "renesas,dmac-r8a77470" (RZ/G1C)
 		- "renesas,dmac-r8a774a1" (RZ/G2M)
+		- "renesas,dmac-r8a774b1" (RZ/G2N)
 		- "renesas,dmac-r8a774c0" (RZ/G2E)
 		- "renesas,dmac-r8a7790" (R-Car H2)
 		- "renesas,dmac-r8a7791" (R-Car M2-W)
@@ -29,6 +30,7 @@ Required Properties:
 		- "renesas,dmac-r8a7794" (R-Car E2)
 		- "renesas,dmac-r8a7795" (R-Car H3)
 		- "renesas,dmac-r8a7796" (R-Car M3-W)
+		- "renesas,dmac-r8a77961" (R-Car M3-W+)
 		- "renesas,dmac-r8a77965" (R-Car M3-N)
 		- "renesas,dmac-r8a77970" (R-Car V3M)
 		- "renesas,dmac-r8a77980" (R-Car V3H)
diff --git a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
index 372f0eeb5a2a..f1f95f678739 100644
--- a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
@@ -8,6 +8,7 @@ Required Properties:
 	  - "renesas,r8a7745-usb-dmac" (RZ/G1E)
 	  - "renesas,r8a77470-usb-dmac" (RZ/G1C)
 	  - "renesas,r8a774a1-usb-dmac" (RZ/G2M)
+	  - "renesas,r8a774b1-usb-dmac" (RZ/G2N)
 	  - "renesas,r8a774c0-usb-dmac" (RZ/G2E)
 	  - "renesas,r8a7790-usb-dmac" (R-Car H2)
 	  - "renesas,r8a7791-usb-dmac" (R-Car M2-W)
diff --git a/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml b/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml
new file mode 100644
index 000000000000..2ca3ddbe1ff4
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/sifive,fu540-c000-pdma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SiFive Unleashed Rev C000 Platform DMA
+
+maintainers:
+  - Green Wan <green.wan@sifive.com>
+  - Palmer Debbelt <palmer@sifive.com>
+  - Paul Walmsley <paul.walmsley@sifive.com>
+
+description: |
+  Platform DMA is a DMA engine of SiFive Unleashed. It supports 4
+  channels. Each channel has 2 interrupts. One is for DMA done and
+  the other is for DME error.
+
+  In different SoC, DMA could be attached to different IRQ line.
+  DT file need to be changed to meet the difference. For technical
+  doc,
+
+  https://static.dev.sifive.com/FU540-C000-v1.0.pdf
+
+properties:
+  compatible:
+    items:
+      - const: sifive,fu540-c000-pdma
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 8
+
+  '#dma-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - '#dma-cells'
+
+examples:
+  - |
+    dma@3000000 {
+      compatible = "sifive,fu540-c000-pdma";
+      reg = <0x0 0x3000000 0x0 0x8000>;
+      interrupts = <23 24 25 26 27 28 29 30>;
+      #dma-cells = <1>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml b/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
new file mode 100644
index 000000000000..0c0ac11ad55f
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/st,stm32-dma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 DMA Controller bindings
+
+description: |
+  The STM32 DMA is a general-purpose direct memory access controller capable of
+  supporting 8 independent DMA channels. Each channel can have up to 8 requests.
+  DMA clients connected to the STM32 DMA controller must use the format
+  described in the dma.txt file, using a four-cell specifier for each
+  channel: a phandle to the DMA controller plus the following four integer cells:
+    1. The channel id
+    2. The request line number
+    3. A 32bit mask specifying the DMA channel configuration which are device
+      dependent:
+        -bit 9: Peripheral Increment Address
+          0x0: no address increment between transfers
+          0x1: increment address between transfers
+        -bit 10: Memory Increment Address
+          0x0: no address increment between transfers
+          0x1: increment address between transfers
+        -bit 15: Peripheral Increment Offset Size
+          0x0: offset size is linked to the peripheral bus width
+          0x1: offset size is fixed to 4 (32-bit alignment)
+        -bit 16-17: Priority level
+          0x0: low
+          0x1: medium
+          0x2: high
+          0x3: very high
+    4. A 32bit bitfield value specifying DMA features which are device dependent:
+       -bit 0-1: DMA FIFO threshold selection
+         0x0: 1/4 full FIFO
+         0x1: 1/2 full FIFO
+         0x2: 3/4 full FIFO
+         0x3: full FIFO
+
+maintainers:
+  - Amelie Delaunay <amelie.delaunay@st.com>
+
+allOf:
+  - $ref: "dma-controller.yaml#"
+
+properties:
+  "#dma-cells":
+    const: 4
+
+  compatible:
+    const: st,stm32-dma
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 8
+    description: Should contain all of the per-channel DMA
+      interrupts in ascending order with respect to the
+      DMA channel index.
+
+  resets:
+    maxItems: 1
+
+  st,mem2mem:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: if defined, it indicates that the controller
+      supports memory-to-memory transfer
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    dma-controller@40026400 {
+      compatible = "st,stm32-dma";
+      reg = <0x40026400 0x400>;
+      interrupts = <56>,
+                   <57>,
+                   <58>,
+                   <59>,
+                   <60>,
+                   <68>,
+                   <69>,
+                   <70>;
+      clocks = <&clk_hclk>;
+      #dma-cells = <4>;
+      st,mem2mem;
+      resets = <&rcc 150>;
+      dma-requests = <8>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml b/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml
new file mode 100644
index 000000000000..915bc4af9568
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/st,stm32-dmamux.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 DMA MUX (DMA request router) bindings
+
+maintainers:
+  - Amelie Delaunay <amelie.delaunay@st.com>
+
+allOf:
+  - $ref: "dma-router.yaml#"
+
+properties:
+  "#dma-cells":
+    const: 3
+
+  compatible:
+    const: st,stm32h7-dmamux
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - dma-masters
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    dma-router@40020800 {
+      compatible = "st,stm32h7-dmamux";
+      reg = <0x40020800 0x3c>;
+      #dma-cells = <3>;
+      dma-requests = <128>;
+      dma-channels = <16>;
+      dma-masters = <&dma1 &dma2>;
+      clocks = <&timer_clk>;
+    };
+
+...
+
diff --git a/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml b/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml
new file mode 100644
index 000000000000..c66543d0c267
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/st,stm32-mdma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 MDMA Controller bindings
+
+description: |
+  The STM32 MDMA is a general-purpose direct memory access controller capable of
+  supporting 64 independent DMA channels with 256 HW requests.
+  DMA clients connected to the STM32 MDMA controller must use the format
+  described in the dma.txt file, using a five-cell specifier for each channel:
+  a phandle to the MDMA controller plus the following five integer cells:
+    1. The request line number
+    2. The priority level
+      0x0: Low
+      0x1: Medium
+      0x2: High
+      0x3: Very high
+    3. A 32bit mask specifying the DMA channel configuration
+      -bit 0-1: Source increment mode
+        0x0: Source address pointer is fixed
+        0x2: Source address pointer is incremented after each data transfer
+        0x3: Source address pointer is decremented after each data transfer
+      -bit 2-3: Destination increment mode
+        0x0: Destination address pointer is fixed
+        0x2: Destination address pointer is incremented after each data transfer
+        0x3: Destination address pointer is decremented after each data transfer
+      -bit 8-9: Source increment offset size
+        0x0: byte (8bit)
+        0x1: half-word (16bit)
+        0x2: word (32bit)
+        0x3: double-word (64bit)
+      -bit 10-11: Destination increment offset size
+        0x0: byte (8bit)
+        0x1: half-word (16bit)
+        0x2: word (32bit)
+        0x3: double-word (64bit)
+      -bit 25-18: The number of bytes to be transferred in a single transfer
+                  (min = 1 byte, max = 128 bytes)
+      -bit 29:28: Trigger Mode
+        0x00: Each MDMA request triggers a buffer transfer (max 128 bytes)
+        0x1: Each MDMA request triggers a block transfer (max 64K bytes)
+        0x2: Each MDMA request triggers a repeated block transfer
+        0x3: Each MDMA request triggers a linked list transfer
+    4. A 32bit value specifying the register to be used to acknowledge the request
+       if no HW ack signal is used by the MDMA client
+    5. A 32bit mask specifying the value to be written to acknowledge the request
+       if no HW ack signal is used by the MDMA client
+
+maintainers:
+  - Amelie Delaunay <amelie.delaunay@st.com>
+
+allOf:
+  - $ref: "dma-controller.yaml#"
+
+properties:
+  "#dma-cells":
+    const: 5
+
+  compatible:
+    const: st,stm32h7-mdma
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  st,ahb-addr-masks:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    description: Array of u32 mask to list memory devices addressed via AHB bus.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    dma-controller@52000000 {
+      compatible = "st,stm32h7-mdma";
+      reg = <0x52000000 0x1000>;
+      interrupts = <122>;
+      clocks = <&timer_clk>;
+      resets = <&rcc 992>;
+      #dma-cells = <5>;
+      dma-channels = <16>;
+      dma-requests = <32>;
+      st,ahb-addr-masks = <0x20000000>, <0x00000000>;
+    };
+
+...
+
diff --git a/Documentation/devicetree/bindings/dma/stm32-dma.txt b/Documentation/devicetree/bindings/dma/stm32-dma.txt
deleted file mode 100644
index c5f519097204..000000000000
--- a/Documentation/devicetree/bindings/dma/stm32-dma.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-* STMicroelectronics STM32 DMA controller
-
-The STM32 DMA is a general-purpose direct memory access controller capable of
-supporting 8 independent DMA channels. Each channel can have up to 8 requests.
-
-Required properties:
-- compatible: Should be "st,stm32-dma"
-- reg: Should contain DMA registers location and length. This should include
-  all of the per-channel registers.
-- interrupts: Should contain all of the per-channel DMA interrupts in
-  ascending order with respect to the DMA channel index.
-- clocks: Should contain the input clock of the DMA instance.
-- #dma-cells : Must be <4>. See DMA client paragraph for more details.
-
-Optional properties:
-- dma-requests : Number of DMA requests supported.
-- resets: Reference to a reset controller asserting the DMA controller
-- st,mem2mem: boolean; if defined, it indicates that the controller supports
-  memory-to-memory transfer
-
-Example:
-
-	dma2: dma-controller@40026400 {
-		compatible = "st,stm32-dma";
-		reg = <0x40026400 0x400>;
-		interrupts = <56>,
-			     <57>,
-			     <58>,
-			     <59>,
-			     <60>,
-			     <68>,
-			     <69>,
-			     <70>;
-		clocks = <&clk_hclk>;
-		#dma-cells = <4>;
-		st,mem2mem;
-		resets = <&rcc 150>;
-		dma-requests = <8>;
-	};
-
-* DMA client
-
-DMA clients connected to the STM32 DMA controller must use the format
-described in the dma.txt file, using a four-cell specifier for each
-channel: a phandle to the DMA controller plus the following four integer cells:
-
-1. The channel id
-2. The request line number
-3. A 32bit mask specifying the DMA channel configuration which are device
-   dependent:
-  -bit 9: Peripheral Increment Address
-	0x0: no address increment between transfers
-	0x1: increment address between transfers
- -bit 10: Memory Increment Address
-	0x0: no address increment between transfers
-	0x1: increment address between transfers
- -bit 15: Peripheral Increment Offset Size
-	0x0: offset size is linked to the peripheral bus width
-	0x1: offset size is fixed to 4 (32-bit alignment)
- -bit 16-17: Priority level
-	0x0: low
-	0x1: medium
-	0x2: high
-	0x3: very high
-4. A 32bit bitfield value specifying DMA features which are device dependent:
- -bit 0-1: DMA FIFO threshold selection
-	0x0: 1/4 full FIFO
-	0x1: 1/2 full FIFO
-	0x2: 3/4 full FIFO
-	0x3: full FIFO
-
-
-Example:
-
-	usart1: serial@40011000 {
-		compatible = "st,stm32-uart";
-		reg = <0x40011000 0x400>;
-		interrupts = <37>;
-		clocks = <&clk_pclk2>;
-		dmas = <&dma2 2 4 0x10400 0x3>,
-		       <&dma2 7 5 0x10200 0x3>;
-		dma-names = "rx", "tx";
-	};
diff --git a/Documentation/devicetree/bindings/dma/stm32-dmamux.txt b/Documentation/devicetree/bindings/dma/stm32-dmamux.txt
deleted file mode 100644
index 1b893b235507..000000000000
--- a/Documentation/devicetree/bindings/dma/stm32-dmamux.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-STM32 DMA MUX (DMA request router)
-
-Required properties:
-- compatible:	"st,stm32h7-dmamux"
-- reg:		Memory map for accessing module
-- #dma-cells:	Should be set to <3>.
-		First parameter is request line number.
-		Second is DMA channel configuration
-		Third is Fifo threshold
-		For more details about the three cells, please see
-		stm32-dma.txt documentation binding file
-- dma-masters:	Phandle pointing to the DMA controllers.
-		Several controllers are allowed. Only "st,stm32-dma" DMA
-		compatible are supported.
-
-Optional properties:
-- dma-channels : Number of DMA requests supported.
-- dma-requests : Number of DMAMUX requests supported.
-- resets: Reference to a reset controller asserting the DMA controller
-- clocks: Input clock of the DMAMUX instance.
-
-Example:
-
-/* DMA controller 1 */
-dma1: dma-controller@40020000 {
-	compatible = "st,stm32-dma";
-	reg = <0x40020000 0x400>;
-	interrupts = <11>,
-		     <12>,
-		     <13>,
-		     <14>,
-		     <15>,
-		     <16>,
-		     <17>,
-		     <47>;
-	clocks = <&timer_clk>;
-	#dma-cells = <4>;
-	st,mem2mem;
-	resets = <&rcc 150>;
-	dma-channels = <8>;
-	dma-requests = <8>;
-};
-
-/* DMA controller 1 */
-dma2: dma@40020400 {
-	compatible = "st,stm32-dma";
-	reg = <0x40020400 0x400>;
-	interrupts = <56>,
-		     <57>,
-		     <58>,
-		     <59>,
-		     <60>,
-		     <68>,
-		     <69>,
-		     <70>;
-	clocks = <&timer_clk>;
-	#dma-cells = <4>;
-	st,mem2mem;
-	resets = <&rcc 150>;
-	dma-channels = <8>;
-	dma-requests = <8>;
-};
-
-/* DMA mux */
-dmamux1: dma-router@40020800 {
-	compatible = "st,stm32h7-dmamux";
-	reg = <0x40020800 0x3c>;
-	#dma-cells = <3>;
-	dma-requests = <128>;
-	dma-channels = <16>;
-	dma-masters = <&dma1 &dma2>;
-	clocks = <&timer_clk>;
-};
-
-/* DMA client */
-usart1: serial@40011000 {
-	compatible = "st,stm32-usart", "st,stm32-uart";
-	reg = <0x40011000 0x400>;
-	interrupts = <37>;
-	clocks = <&timer_clk>;
-	dmas = <&dmamux1 41 0x414 0>,
-	       <&dmamux1 42 0x414 0>;
-	dma-names = "rx", "tx";
-};
diff --git a/Documentation/devicetree/bindings/dma/stm32-mdma.txt b/Documentation/devicetree/bindings/dma/stm32-mdma.txt
deleted file mode 100644
index d18772d6bc65..000000000000
--- a/Documentation/devicetree/bindings/dma/stm32-mdma.txt
+++ /dev/null
@@ -1,94 +0,0 @@
-* STMicroelectronics STM32 MDMA controller
-
-The STM32 MDMA is a general-purpose direct memory access controller capable of
-supporting 64 independent DMA channels with 256 HW requests.
-
-Required properties:
-- compatible: Should be "st,stm32h7-mdma"
-- reg: Should contain MDMA registers location and length. This should include
-  all of the per-channel registers.
-- interrupts: Should contain the MDMA interrupt.
-- clocks: Should contain the input clock of the DMA instance.
-- resets: Reference to a reset controller asserting the DMA controller.
-- #dma-cells : Must be <5>. See DMA client paragraph for more details.
-
-Optional properties:
-- dma-channels: Number of DMA channels supported by the controller.
-- dma-requests: Number of DMA request signals supported by the controller.
-- st,ahb-addr-masks: Array of u32 mask to list memory devices addressed via
-  AHB bus.
-
-Example:
-
-	mdma1: dma@52000000 {
-		compatible = "st,stm32h7-mdma";
-		reg = <0x52000000 0x1000>;
-		interrupts = <122>;
-		clocks = <&timer_clk>;
-		resets = <&rcc 992>;
-		#dma-cells = <5>;
-		dma-channels = <16>;
-		dma-requests = <32>;
-		st,ahb-addr-masks = <0x20000000>, <0x00000000>;
-	};
-
-* DMA client
-
-DMA clients connected to the STM32 MDMA controller must use the format
-described in the dma.txt file, using a five-cell specifier for each channel:
-a phandle to the MDMA controller plus the following five integer cells:
-
-1. The request line number
-2. The priority level
-	0x00: Low
-	0x01: Medium
-	0x10: High
-	0x11: Very high
-3. A 32bit mask specifying the DMA channel configuration
- -bit 0-1: Source increment mode
-	0x00: Source address pointer is fixed
-	0x10: Source address pointer is incremented after each data transfer
-	0x11: Source address pointer is decremented after each data transfer
- -bit 2-3: Destination increment mode
-	0x00: Destination address pointer is fixed
-	0x10: Destination address pointer is incremented after each data
-	transfer
-	0x11: Destination address pointer is decremented after each data
-	transfer
- -bit 8-9: Source increment offset size
-	0x00: byte (8bit)
-	0x01: half-word (16bit)
-	0x10: word (32bit)
-	0x11: double-word (64bit)
- -bit 10-11: Destination increment offset size
-	0x00: byte (8bit)
-	0x01: half-word (16bit)
-	0x10: word (32bit)
-	0x11: double-word (64bit)
--bit 25-18: The number of bytes to be transferred in a single transfer
-	(min = 1 byte, max = 128 bytes)
--bit 29:28: Trigger Mode
-	0x00: Each MDMA request triggers a buffer transfer (max 128 bytes)
-	0x01: Each MDMA request triggers a block transfer (max 64K bytes)
-	0x10: Each MDMA request triggers a repeated block transfer
-	0x11: Each MDMA request triggers a linked list transfer
-4. A 32bit value specifying the register to be used to acknowledge the request
-   if no HW ack signal is used by the MDMA client
-5. A 32bit mask specifying the value to be written to acknowledge the request
-   if no HW ack signal is used by the MDMA client
-
-Example:
-
-	i2c4: i2c@5c002000 {
-		compatible = "st,stm32f7-i2c";
-		reg = <0x5c002000 0x400>;
-		interrupts = <95>,
-			     <96>;
-		clocks = <&timer_clk>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		dmas = <&mdma1 36 0x0 0x40008 0x0 0x0>,
-		       <&mdma1 37 0x0 0x40002 0x0 0x0>;
-		dma-names = "rx", "tx";
-		status = "disabled";
-	};
diff --git a/Documentation/devicetree/bindings/dma/ti-edma.txt b/Documentation/devicetree/bindings/dma/ti-edma.txt
index 4bbc94d829c8..0e1398f93aa2 100644
--- a/Documentation/devicetree/bindings/dma/ti-edma.txt
+++ b/Documentation/devicetree/bindings/dma/ti-edma.txt
@@ -42,6 +42,11 @@ Optional properties:
 - ti,edma-reserved-slot-ranges: PaRAM slot ranges which should not be used by
 		the driver, they are allocated to be used by for example the
 		DSP. See example.
+- dma-channel-mask: Mask of usable channels.
+		Single uint32 for EDMA with 32 channels, array of two uint32 for
+		EDMA with 64 channels. See example and
+		Documentation/devicetree/bindings/dma/dma-common.yaml
+
 
 ------------------------------------------------------------------------------
 eDMA3 Transfer Controller
@@ -91,6 +96,9 @@ edma: edma@49000000 {
 	ti,edma-memcpy-channels = <20 21>;
 	/* The following PaRAM slots are reserved: 35-44 and 100-109 */
 	ti,edma-reserved-slot-ranges = <35 10>, <100 10>;
+	/* The following channels are reserved: 35-44 */
+	dma-channel-mask = <0xffffffff /* Channel 0-31 */
+			    0xffffe007>; /* Channel 32-63 */
 };
 
 edma_tptc0: tptc@49800000 {
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
new file mode 100644
index 000000000000..8b5c346f23f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/ti/k3-udma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments K3 NAVSS Unified DMA Device Tree Bindings
+
+maintainers:
+  - Peter Ujfalusi <peter.ujfalusi@ti.com>
+
+description: |
+  The UDMA-P is intended to perform similar (but significantly upgraded)
+  functions as the packet-oriented DMA used on previous SoC devices. The UDMA-P
+  module supports the transmission and reception of various packet types.
+  The UDMA-P architecture facilitates the segmentation and reassembly of SoC DMA
+  data structure compliant packets to/from smaller data blocks that are natively
+  compatible with the specific requirements of each connected peripheral.
+  Multiple Tx and Rx channels are provided within the DMA which allow multiple
+  segmentation or reassembly operations to be ongoing. The DMA controller
+  maintains state information for each of the channels which allows packet
+  segmentation and reassembly operations to be time division multiplexed between
+  channels in order to share the underlying DMA hardware. An external DMA
+  scheduler is used to control the ordering and rate at which this multiplexing
+  occurs for Transmit operations. The ordering and rate of Receive operations
+  is indirectly controlled by the order in which blocks are pushed into the DMA
+  on the Rx PSI-L interface.
+
+  The UDMA-P also supports acting as both a UTC and UDMA-C for its internal
+  channels. Channels in the UDMA-P can be configured to be either Packet-Based
+  or Third-Party channels on a channel by channel basis.
+
+  All transfers within NAVSS is done between PSI-L source and destination
+  threads.
+  The peripherals serviced by UDMA can be PSI-L native (sa2ul, cpsw, etc) or
+  legacy, non PSI-L native peripherals. In the later case a special, small PDMA
+  is tasked to act as a bridge between the PSI-L fabric and the legacy
+  peripheral.
+
+  PDMAs can be configured via UDMAP peer registers to match with the
+  configuration of the legacy peripheral.
+
+allOf:
+  - $ref: "../dma-controller.yaml#"
+
+properties:
+  "#dma-cells":
+    const: 1
+    description: |
+      The cell is the PSI-L  thread ID of the remote (to UDMAP) end.
+      Valid ranges for thread ID depends on the data movement direction:
+      for source thread IDs (rx): 0 - 0x7fff
+      for destination thread IDs (tx): 0x8000 - 0xffff
+
+      Please refer to the device documentation for the PSI-L thread map and also
+      the PSI-L peripheral chapter for the correct thread ID.
+
+  compatible:
+    enum:
+      - ti,am654-navss-main-udmap
+      - ti,am654-navss-mcu-udmap
+      - ti,j721e-navss-main-udmap
+      - ti,j721e-navss-mcu-udmap
+
+  reg:
+    maxItems: 3
+
+  reg-names:
+   items:
+     - const: gcfg
+     - const: rchanrt
+     - const: tchanrt
+
+  msi-parent: true
+
+  ti,sci:
+    description: phandle to TI-SCI compatible System controller node
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/phandle
+
+  ti,sci-dev-id:
+    description: TI-SCI device id of UDMAP
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+
+  ti,ringacc:
+    description: phandle to the ring accelerator node
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/phandle
+
+  ti,sci-rm-range-tchan:
+    description: |
+      Array of UDMA tchan resource subtypes for resource allocation for this
+      host
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 1
+    # Should be enough
+    maxItems: 255
+
+  ti,sci-rm-range-rchan:
+    description: |
+      Array of UDMA rchan resource subtypes for resource allocation for this
+      host
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 1
+    # Should be enough
+    maxItems: 255
+
+  ti,sci-rm-range-rflow:
+    description: |
+      Array of UDMA rflow resource subtypes for resource allocation for this
+      host
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 1
+    # Should be enough
+    maxItems: 255
+
+required:
+  - compatible
+  - "#dma-cells"
+  - reg
+  - reg-names
+  - msi-parent
+  - ti,sci
+  - ti,sci-dev-id
+  - ti,ringacc
+  - ti,sci-rm-range-tchan
+  - ti,sci-rm-range-rchan
+  - ti,sci-rm-range-rflow
+
+examples:
+  - |+
+    cbass_main {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        cbass_main_navss: navss@30800000 {
+            compatible = "simple-mfd";
+            #address-cells = <2>;
+            #size-cells = <2>;
+            dma-coherent;
+            dma-ranges;
+            ranges;
+
+            ti,sci-dev-id = <118>;
+
+            main_udmap: dma-controller@31150000 {
+                compatible = "ti,am654-navss-main-udmap";
+                reg = <0x0 0x31150000 0x0 0x100>,
+                      <0x0 0x34000000 0x0 0x100000>,
+                      <0x0 0x35000000 0x0 0x100000>;
+                reg-names = "gcfg", "rchanrt", "tchanrt";
+                #dma-cells = <1>;
+
+                ti,ringacc = <&ringacc>;
+
+                msi-parent = <&inta_main_udmass>;
+
+                ti,sci = <&dmsc>;
+                ti,sci-dev-id = <188>;
+
+                ti,sci-rm-range-tchan = <0x1>, /* TX_HCHAN */
+                                        <0x2>; /* TX_CHAN */
+                ti,sci-rm-range-rchan = <0x4>, /* RX_HCHAN */
+                                        <0x5>; /* RX_CHAN */
+                ti,sci-rm-range-rflow = <0x6>; /* GP RFLOW */
+            };
+        };
+
+        mcasp0: mcasp@02B00000 {
+            dmas = <&main_udmap 0xc400>, <&main_udmap 0x4400>;
+            dma-names = "tx", "rx";
+        };
+
+        crypto: crypto@4E00000 {
+            compatible = "ti,sa2ul-crypto";
+
+            dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>, <&main_udmap 0x4001>;
+            dma-names = "tx", "rx1", "rx2";
+        };
+    };
diff --git a/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt b/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt
index 93b6d961dd4f..325aca52cd43 100644
--- a/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt
+++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt
@@ -11,9 +11,16 @@ is to receive from the device.
 Xilinx AXI CDMA engine, it does transfers between memory-mapped source
 address and a memory-mapped destination address.
 
+Xilinx AXI MCDMA engine, it does transfer between memory and AXI4 stream
+target devices. It can be configured to have up to 16 independent transmit
+and receive channels.
+
 Required properties:
-- compatible: Should be "xlnx,axi-vdma-1.00.a" or "xlnx,axi-dma-1.00.a" or
-	      "xlnx,axi-cdma-1.00.a""
+- compatible: Should be one of-
+		"xlnx,axi-vdma-1.00.a"
+		"xlnx,axi-dma-1.00.a"
+		"xlnx,axi-cdma-1.00.a"
+		"xlnx,axi-mcdma-1.00.a"
 - #dma-cells: Should be <1>, see "dmas" property below
 - reg: Should contain VDMA registers location and length.
 - xlnx,addrwidth: Should be the vdma addressing size in bits(ex: 32 bits).
@@ -29,7 +36,7 @@ Required properties:
 			   "m_axis_mm2s_aclk", "s_axis_s2mm_aclk"
 	For CDMA:
 	Required elements: "s_axi_lite_aclk", "m_axi_aclk"
-	FOR AXIDMA:
+	For AXIDMA and MCDMA:
 	Required elements: "s_axi_lite_aclk"
 	Optional elements: "m_axi_mm2s_aclk", "m_axi_s2mm_aclk",
 			   "m_axi_sg_aclk"
@@ -37,12 +44,11 @@ Required properties:
 Required properties for VDMA:
 - xlnx,num-fstores: Should be the number of framebuffers as configured in h/w.
 
-Optional properties for AXI DMA:
+Optional properties for AXI DMA and MCDMA:
 - xlnx,sg-length-width: Should be set to the width in bits of the length
 	register as configured in h/w. Takes values {8...26}. If the property
 	is missing or invalid then the default value 23 is used. This is the
 	maximum value that is supported by all IP versions.
-- xlnx,mcdma: Tells whether configured for multi-channel mode in the hardware.
 Optional properties for VDMA:
 - xlnx,flush-fsync: Tells which channel to Flush on Frame sync.
 	It takes following values:
@@ -55,8 +61,8 @@ Required child node properties:
 	For VDMA: It should be either "xlnx,axi-vdma-mm2s-channel" or
 	"xlnx,axi-vdma-s2mm-channel".
 	For CDMA: It should be "xlnx,axi-cdma-channel".
-	For AXIDMA: It should be either "xlnx,axi-dma-mm2s-channel" or
-	"xlnx,axi-dma-s2mm-channel".
+	For AXIDMA and MCDMA: It should be either "xlnx,axi-dma-mm2s-channel"
+	or "xlnx,axi-dma-s2mm-channel".
 - interrupts: Should contain per channel VDMA interrupts.
 - xlnx,datawidth: Should contain the stream data width, take values
 	{32,64...1024}.
@@ -69,8 +75,8 @@ Optional child node properties for VDMA:
 	enabled/disabled in hardware.
 - xlnx,enable-vert-flip: Tells vertical flip is
 	enabled/disabled in hardware(S2MM path).
-Optional child node properties for AXI DMA:
--dma-channels: Number of dma channels in child node.
+Optional child node properties for MCDMA:
+- dma-channels: Number of dma channels in child node.
 
 Example:
 ++++++++
diff --git a/Documentation/devicetree/bindings/eeprom/at24.txt b/Documentation/devicetree/bindings/eeprom/at24.txt
index 22aead844d0f..c94acbb8cb0c 100644
--- a/Documentation/devicetree/bindings/eeprom/at24.txt
+++ b/Documentation/devicetree/bindings/eeprom/at24.txt
@@ -1,89 +1 @@
-EEPROMs (I2C)
-
-Required properties:
-
-  - compatible: Must be a "<manufacturer>,<model>" pair. The following <model>
-                values are supported (assuming "atmel" as manufacturer):
-
-                "atmel,24c00",
-                "atmel,24c01",
-                "atmel,24cs01",
-                "atmel,24c02",
-                "atmel,24cs02",
-                "atmel,24mac402",
-                "atmel,24mac602",
-                "atmel,spd",
-                "atmel,24c04",
-                "atmel,24cs04",
-                "atmel,24c08",
-                "atmel,24cs08",
-                "atmel,24c16",
-                "atmel,24cs16",
-                "atmel,24c32",
-                "atmel,24cs32",
-                "atmel,24c64",
-                "atmel,24cs64",
-                "atmel,24c128",
-                "atmel,24c256",
-                "atmel,24c512",
-                "atmel,24c1024",
-                "atmel,24c2048",
-
-                If <manufacturer> is not "atmel", then a fallback must be used
-                with the same <model> and "atmel" as manufacturer.
-
-                Example:
-                        compatible = "microchip,24c128", "atmel,24c128";
-
-                Supported manufacturers are:
-
-                "catalyst",
-                "microchip",
-                "nxp",
-                "ramtron",
-                "renesas",
-                "rohm",
-                "st",
-
-                Some vendors use different model names for chips which are just
-                variants of the above. Known such exceptions are listed below:
-
-                "nxp,se97b" - the fallback is "atmel,24c02",
-                "renesas,r1ex24002" - the fallback is "atmel,24c02"
-                "renesas,r1ex24016" - the fallback is "atmel,24c16"
-                "renesas,r1ex24128" - the fallback is "atmel,24c128"
-                "rohm,br24t01" - the fallback is "atmel,24c01"
-
-  - reg: The I2C address of the EEPROM.
-
-Optional properties:
-
-  - pagesize: The length of the pagesize for writing. Please consult the
-              manual of your device, that value varies a lot. A wrong value
-              may result in data loss! If not specified, a safety value of
-              '1' is used which will be very slow.
-
-  - read-only: This parameterless property disables writes to the eeprom.
-
-  - size: Total eeprom size in bytes.
-
-  - no-read-rollover: This parameterless property indicates that the
-                      multi-address eeprom does not automatically roll over
-                      reads to the next slave address. Please consult the
-                      manual of your device.
-
-  - wp-gpios: GPIO to which the write-protect pin of the chip is connected.
-
-  - address-width: number of address bits (one of 8, 16).
-
-  - num-addresses: total number of i2c slave addresses this device takes
-
-Example:
-
-eeprom@52 {
-	compatible = "atmel,24c32";
-	reg = <0x52>;
-	pagesize = <32>;
-	wp-gpios = <&gpio1 3 0>;
-	num-addresses = <8>;
-};
+This file has been moved to at24.yaml.
diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml
new file mode 100644
index 000000000000..e8778560d966
--- /dev/null
+++ b/Documentation/devicetree/bindings/eeprom/at24.yaml
@@ -0,0 +1,188 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright 2019 BayLibre SAS
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/eeprom/at24.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: I2C EEPROMs compatible with Atmel's AT24
+
+maintainers:
+  - Bartosz Golaszewski <bgolaszewski@baylibre.com>
+
+select:
+  properties:
+    compatible:
+      contains:
+        pattern: "^atmel,(24(c|cs|mac)[0-9]+|spd)$"
+  required:
+    - compatible
+
+properties:
+  $nodename:
+    pattern: "^eeprom@[0-9a-f]{1,2}$"
+
+  # There are multiple known vendors who manufacture EEPROM chips compatible
+  # with Atmel's AT24. The compatible string requires either a single item
+  # if the memory comes from Atmel (in which case the vendor part must be
+  # 'atmel') or two items with the same 'model' part where the vendor part of
+  # the first one is the actual manufacturer and the second item is the
+  # corresponding 'atmel,<model>' from Atmel.
+  compatible:
+    oneOf:
+      - allOf:
+          - minItems: 1
+            maxItems: 2
+            items:
+              - pattern: "^(atmel|catalyst|microchip|nxp|ramtron|renesas|rohm|st),(24(c|cs|mac)[0-9]+|spd)$"
+              - pattern: "^atmel,(24(c|cs|mac)[0-9]+|spd)$"
+          - oneOf:
+              - items:
+                  pattern: c00$
+              - items:
+                  pattern: c01$
+              - items:
+                  pattern: cs01$
+              - items:
+                  pattern: c02$
+              - items:
+                  pattern: cs02$
+              - items:
+                  pattern: mac402$
+              - items:
+                  pattern: mac602$
+              - items:
+                  pattern: c04$
+              - items:
+                  pattern: cs04$
+              - items:
+                  pattern: c08$
+              - items:
+                  pattern: cs08$
+              - items:
+                  pattern: c16$
+              - items:
+                  pattern: cs16$
+              - items:
+                  pattern: c32$
+              - items:
+                  pattern: cs32$
+              - items:
+                  pattern: c64$
+              - items:
+                  pattern: cs64$
+              - items:
+                  pattern: c128$
+              - items:
+                  pattern: cs128$
+              - items:
+                  pattern: c256$
+              - items:
+                  pattern: cs256$
+              - items:
+                  pattern: c512$
+              - items:
+                  pattern: cs512$
+              - items:
+                  pattern: c1024$
+              - items:
+                  pattern: cs1024$
+              - items:
+                  pattern: c2048$
+              - items:
+                  pattern: cs2048$
+              - items:
+                  pattern: spd$
+      # These are special cases that don't conform to the above pattern.
+      # Each requires a standard at24 model as fallback.
+      - items:
+          - const: rohm,br24t01
+          - const: atmel,24c01
+      - items:
+          - const: nxp,se97b
+          - const: atmel,24c02
+      - items:
+          - const: renesas,r1ex24002
+          - const: atmel,24c02
+      - items:
+          - const: renesas,r1ex24016
+          - const: atmel,24c16
+      - items:
+          - const: giantec,gt24c32a
+          - const: atmel,24c32
+      - items:
+          - const: renesas,r1ex24128
+          - const: atmel,24c128
+
+  reg:
+    maxItems: 1
+
+  pagesize:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      The length of the pagesize for writing. Please consult the
+      manual of your device, that value varies a lot. A wrong value
+      may result in data loss! If not specified, a safety value of
+      '1' is used which will be very slow.
+    enum: [ 1, 8, 16, 32, 64, 128, 258 ]
+    default: 1
+
+  read-only:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Disables writes to the eeprom.
+
+  size:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Total eeprom size in bytes.
+
+  no-read-rollover:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Indicates that the multi-address eeprom does not automatically roll
+      over reads to the next slave address. Please consult the manual of
+      your device.
+
+  wp-gpios:
+    description:
+      GPIO to which the write-protect pin of the chip is connected.
+    maxItems: 1
+
+  address-width:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Number of address bits.
+    default: 8
+    enum: [ 8, 16 ]
+
+  num-addresses:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Total number of i2c slave addresses this device takes.
+    default: 1
+    minimum: 1
+    maximum: 8
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      eeprom@52 {
+          compatible = "microchip,24c32", "atmel,24c32";
+          reg = <0x52>;
+          pagesize = <32>;
+          wp-gpios = <&gpio1 3 0>;
+          num-addresses = <8>;
+      };
+    };
+...
diff --git a/Documentation/devicetree/bindings/example-schema.yaml b/Documentation/devicetree/bindings/example-schema.yaml
index c43819c2783a..4ddcf709cc3c 100644
--- a/Documentation/devicetree/bindings/example-schema.yaml
+++ b/Documentation/devicetree/bindings/example-schema.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 # Copyright 2018 Linaro Ltd.
 %YAML 1.2
 ---
@@ -71,7 +71,7 @@ properties:
     # minItems/maxItems equal to 2 is implied
 
   reg-names:
-    # The core schema enforces this is a string array
+    # The core schema enforces this (*-names) is a string array
     items:
       - const: core
       - const: aux
@@ -79,7 +79,8 @@ properties:
   clocks:
     # Cases that have only a single entry just need to express that with maxItems
     maxItems: 1
-    description: bus clock
+    description: bus clock. A description is only needed for a single item if
+      there's something unique to add.
 
   clock-names:
     items:
@@ -127,6 +128,14 @@ properties:
     maxItems: 1
     description: A connection of the 'foo' gpio line.
 
+  # *-supply is always a single phandle, so nothing more to define.
+  foo-supply: true
+
+  # Vendor specific properties
+  #
+  # Vendor specific properties have slightly different schema requirements than
+  # common properties. They must have at least a type definition and
+  # 'description'.
   vendor,int-property:
     description: Vendor specific properties must have a description
     # 'allOf' is the json-schema way of subclassing a schema. Here the base
@@ -137,9 +146,9 @@ properties:
       - enum: [2, 4, 6, 8, 10]
 
   vendor,bool-property:
-    description: Vendor specific properties must have a description
-    # boolean properties is one case where the json-schema 'type' keyword
-    # can be used directly
+    description: Vendor specific properties must have a description. Boolean
+      properties are one case where the json-schema 'type' keyword can be used
+      directly.
     type: boolean
 
   vendor,string-array-property:
@@ -151,14 +160,72 @@ properties:
           - enum: [ foo, bar ]
           - enum: [ baz, boo ]
 
+  vendor,property-in-standard-units-microvolt:
+    description: Vendor specific properties having a standard unit suffix
+      don't need a type.
+    enum: [ 100, 200, 300 ]
+
+  child-node:
+    description: Child nodes are just another property from a json-schema
+      perspective.
+    type: object  # DT nodes are json objects
+    properties:
+      vendor,a-child-node-property:
+        description: Child node properties have all the same schema
+          requirements.
+        type: boolean
+
+    required:
+      - vendor,a-child-node-property
+
+# Describe the relationship between different properties
+dependencies:
+  # 'vendor,bool-property' is only allowed when 'vendor,string-array-property'
+  # is present
+  vendor,bool-property: [ vendor,string-array-property ]
+  # Expressing 2 properties in both orders means all of the set of properties
+  # must be present or none of them.
+  vendor,string-array-property: [ vendor,bool-property ]
+
 required:
   - compatible
   - reg
   - interrupts
   - interrupt-controller
 
+# if/then schema can be used to handle conditions on a property affecting
+# another property. A typical case is a specific 'compatible' value changes the
+# constraints on other properties.
+#
+# For multiple 'if' schema, group them under an 'allOf'.
+#
+# If the conditionals become too unweldy, then it may be better to just split
+# the binding into separate schema documents.
+if:
+  properties:
+    compatible:
+      contains:
+        const: vendor,soc2-ip
+then:
+  required:
+    - foo-supply
+
+# Ideally, the schema should have this line otherwise any other properties
+# present are allowed. There's a few common properties such as 'status' and
+# 'pinctrl-*' which are added automatically by the tooling.
+#
+# This can't be used in cases where another schema is referenced
+# (i.e. allOf: [{$ref: ...}]).
+additionalProperties: false
+
 examples:
-  # Examples are now compiled with dtc
+  # Examples are now compiled with dtc and validated against the schemas
+  #
+  # Examples have a default #address-cells and #size-cells value of 1. This can
+  # be overridden or an appropriate parent bus node should be shown (such as on
+  # i2c buses).
+  #
+  # Any includes used have to be explicitly included.
   - |
     node@1000 {
           compatible = "vendor,soc4-ip", "vendor,soc1-ip";
diff --git a/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
index 4f0db8ee226a..878a2079ebb6 100644
--- a/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
+++ b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
@@ -25,8 +25,6 @@ properties:
           - const: intel,ixp4xx-network-processing-engine
 
   reg:
-    minItems: 3
-    maxItems: 3
     items:
       - description: NPE0 register range
       - description: NPE1 register range
diff --git a/Documentation/devicetree/bindings/firmware/nvidia,tegra186-bpmp.txt b/Documentation/devicetree/bindings/firmware/nvidia,tegra186-bpmp.txt
index ff380dadb5f9..e44a13bc06ed 100644
--- a/Documentation/devicetree/bindings/firmware/nvidia,tegra186-bpmp.txt
+++ b/Documentation/devicetree/bindings/firmware/nvidia,tegra186-bpmp.txt
@@ -32,7 +32,7 @@ implemented by this node:
 
 - .../clock/clock-bindings.txt
 - <dt-bindings/clock/tegra186-clock.h>
-- ../power/power_domain.txt
+- ../power/power-domain.yaml
 - <dt-bindings/power/tegra186-powergate.h>
 - .../reset/reset.txt
 - <dt-bindings/reset/tegra186-reset.h>
diff --git a/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
index a4fe136be2ba..18c3aea90df2 100644
--- a/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
+++ b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
@@ -11,7 +11,9 @@ power management service, FPGA service and other platform management
 services.
 
 Required properties:
- - compatible:	Must contain:	"xlnx,zynqmp-firmware"
+ - compatible:	Must contain any of below:
+		"xlnx,zynqmp-firmware" for Zynq Ultrascale+ MPSoC
+		"xlnx,versal-firmware" for Versal
  - method:	The method of calling the PM-API firmware layer.
 		Permitted values are:
 		  - "smc" : SMC #0, following the SMCCC
@@ -21,6 +23,8 @@ Required properties:
 Example
 -------
 
+Zynq Ultrascale+ MPSoC
+----------------------
 firmware {
 	zynqmp_firmware: zynqmp-firmware {
 		compatible = "xlnx,zynqmp-firmware";
@@ -28,3 +32,13 @@ firmware {
 		...
 	};
 };
+
+Versal
+------
+firmware {
+	versal_firmware: versal-firmware {
+		compatible = "xlnx,versal-firmware";
+		method = "smc";
+		...
+	};
+};
diff --git a/Documentation/devicetree/bindings/fsi/fsi-master-aspeed.txt b/Documentation/devicetree/bindings/fsi/fsi-master-aspeed.txt
new file mode 100644
index 000000000000..b758f91914f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/fsi/fsi-master-aspeed.txt
@@ -0,0 +1,24 @@
+Device-tree bindings for AST2600 FSI master
+-------------------------------------------
+
+The AST2600 contains two identical FSI masters. They share a clock and have a
+separate interrupt line and output pins.
+
+Required properties:
+ - compatible: "aspeed,ast2600-fsi-master"
+ - reg: base address and length
+ - clocks: phandle and clock number
+ - interrupts: platform dependent interrupt description
+ - pinctrl-0: phandle to pinctrl node
+ - pinctrl-names: pinctrl state
+
+Examples:
+
+    fsi-master {
+        compatible = "aspeed,ast2600-fsi-master", "fsi-master";
+        reg = <0x1e79b000 0x94>;
+	interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_fsi1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_FSICLK>;
+    };
diff --git a/Documentation/devicetree/bindings/gpio/brcm,xgs-iproc-gpio.yaml b/Documentation/devicetree/bindings/gpio/brcm,xgs-iproc-gpio.yaml
new file mode 100644
index 000000000000..64e279a4bc10
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/brcm,xgs-iproc-gpio.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/brcm,xgs-iproc-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom XGS iProc GPIO controller
+
+maintainers:
+  - Chris Packham <chris.packham@alliedtelesis.co.nz>
+
+description: |
+  This controller is the Chip Common A GPIO present on a number of Broadcom
+  switch ASICs with integrated SoCs.
+
+properties:
+  compatible:
+    const: brcm,iproc-gpio-cca
+
+  reg:
+    items:
+      - description: the I/O address containing the GPIO controller
+                     registers.
+      - description: the I/O address containing the Chip Common A interrupt
+                     registers.
+
+  gpio-controller: true
+
+  '#gpio-cells':
+      const: 2
+
+  ngpios:
+    minimum: 0
+    maximum: 32
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 2
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - "#gpio-cells"
+  - gpio-controller
+
+dependencies:
+  interrupt-controller: [ interrupts ]
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    gpio@18000060 {
+        compatible = "brcm,iproc-gpio-cca";
+        #gpio-cells = <2>;
+        reg = <0x18000060 0x50>,
+              <0x18000000 0x50>;
+        ngpios = <12>;
+        gpio-controller;
+        interrupt-controller;
+        #interrupt-cells = <2>;
+        interrupts = <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>;
+    };
+
+
+...
diff --git a/Documentation/devicetree/bindings/gpio/gpio-rda.yaml b/Documentation/devicetree/bindings/gpio/gpio-rda.yaml
new file mode 100644
index 000000000000..6ece555f074f
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-rda.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/gpio-rda.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RDA Micro GPIO controller
+
+maintainers:
+  - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+properties:
+  compatible:
+    const: rda,8810pl-gpio
+
+  reg:
+    maxItems: 1
+
+  gpio-controller: true
+
+  "#gpio-cells":
+    const: 2
+
+  ngpios:
+    description:
+      Number of available gpios in a bank.
+    minimum: 1
+    maximum: 32
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 2
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - gpio-controller
+  - "#gpio-cells"
+  - ngpios
+  - interrupt-controller
+  - "#interrupt-cells"
+  - interrupts
+
+additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/gpio/qcom,wcd934x-gpio.yaml b/Documentation/devicetree/bindings/gpio/qcom,wcd934x-gpio.yaml
new file mode 100644
index 000000000000..32a566ec3558
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/qcom,wcd934x-gpio.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/qcom,wcd934x-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: WCD9340/WCD9341 GPIO controller
+
+maintainers:
+  - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description: |
+  Qualcomm Technologies Inc WCD9340/WCD9341 Audio Codec has integrated
+  gpio controller to control 5 gpios on the chip.
+
+properties:
+  compatible:
+    enum:
+      - qcom,wcd9340-gpio
+      - qcom,wcd9341-gpio
+
+  reg:
+    maxItems: 1
+
+  gpio-controller: true
+
+  '#gpio-cells':
+    const: 2
+
+required:
+  - compatible
+  - reg
+  - gpio-controller
+  - "#gpio-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    wcdgpio: gpio@42 {
+        compatible = "qcom,wcd9340-gpio";
+        reg = <0x042 0x2>;
+        gpio-controller;
+        #gpio-cells = <2>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt b/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
index f3f2c468c1b6..10dce84b1545 100644
--- a/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
+++ b/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
@@ -8,6 +8,7 @@ Required Properties:
     - "renesas,gpio-r8a7745": for R8A7745 (RZ/G1E) compatible GPIO controller.
     - "renesas,gpio-r8a77470": for R8A77470 (RZ/G1C) compatible GPIO controller.
     - "renesas,gpio-r8a774a1": for R8A774A1 (RZ/G2M) compatible GPIO controller.
+    - "renesas,gpio-r8a774b1": for R8A774B1 (RZ/G2N) compatible GPIO controller.
     - "renesas,gpio-r8a774c0": for R8A774C0 (RZ/G2E) compatible GPIO controller.
     - "renesas,gpio-r8a7778": for R8A7778 (R-Car M1) compatible GPIO controller.
     - "renesas,gpio-r8a7779": for R8A7779 (R-Car H1) compatible GPIO controller.
@@ -17,7 +18,8 @@ Required Properties:
     - "renesas,gpio-r8a7793": for R8A7793 (R-Car M2-N) compatible GPIO controller.
     - "renesas,gpio-r8a7794": for R8A7794 (R-Car E2) compatible GPIO controller.
     - "renesas,gpio-r8a7795": for R8A7795 (R-Car H3) compatible GPIO controller.
-    - "renesas,gpio-r8a7796": for R8A7796 (R-Car M3-W) compatible GPIO controller.
+    - "renesas,gpio-r8a7796": for R8A77960 (R-Car M3-W) compatible GPIO controller.
+    - "renesas,gpio-r8a77961": for R8A77961 (R-Car M3-W+) compatible GPIO controller.
     - "renesas,gpio-r8a77965": for R8A77965 (R-Car M3-N) compatible GPIO controller.
     - "renesas,gpio-r8a77970": for R8A77970 (R-Car V3M) compatible GPIO controller.
     - "renesas,gpio-r8a77980": for R8A77980 (R-Car V3H) compatible GPIO controller.
diff --git a/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
new file mode 100644
index 000000000000..418e8381e07c
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/sifive,gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SiFive GPIO controller
+
+maintainers:
+  - Yash Shah <yash.shah@sifive.com>
+  - Paul Walmsley <paul.walmsley@sifive.com>
+
+properties:
+  compatible:
+    items:
+      - const: sifive,fu540-c000-gpio
+      - const: sifive,gpio0
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    description:
+      interrupt mapping one per GPIO. Maximum 16 GPIOs.
+    minItems: 1
+    maxItems: 16
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 2
+
+  clocks:
+    maxItems: 1
+
+  "#gpio-cells":
+    const: 2
+
+  gpio-controller: true
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-controller
+  - "#interrupt-cells"
+  - clocks
+  - "#gpio-cells"
+  - gpio-controller
+
+additionalProperties: false
+
+examples:
+  - |
+      #include <dt-bindings/clock/sifive-fu540-prci.h>
+      gpio@10060000 {
+        compatible = "sifive,fu540-c000-gpio", "sifive,gpio0";
+        interrupt-parent = <&plic>;
+        interrupts = <7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22>;
+        reg = <0x0 0x10060000 0x0 0x1000>;
+        clocks = <&tlclk PRCI_CLK_TLCLK>;
+        gpio-controller;
+        #gpio-cells = <2>;
+        interrupt-controller;
+        #interrupt-cells = <2>;
+      };
+
+...
diff --git a/Documentation/devicetree/bindings/gpio/xylon,logicvc-gpio.yaml b/Documentation/devicetree/bindings/gpio/xylon,logicvc-gpio.yaml
new file mode 100644
index 000000000000..d102888c1be7
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/xylon,logicvc-gpio.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 Bootlin
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/gpio/xylon,logicvc-gpio.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Xylon LogiCVC GPIO controller
+
+maintainers:
+  - Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+
+description: |
+  The LogiCVC GPIO describes the GPIO block included in the LogiCVC display
+  controller. These are meant to be used for controlling display-related
+  signals.
+
+  The controller exposes GPIOs from the display and power control registers,
+  which are mapped by the driver as follows:
+  - GPIO[4:0] (display control) mapped to index 0-4
+  - EN_BLIGHT (power control) mapped to index 5
+  - EN_VDD (power control) mapped to index 6
+  - EN_VEE (power control) mapped to index 7
+  - V_EN (power control) mapped to index 8
+
+properties:
+  $nodename:
+    pattern: "^gpio@[0-9a-f]+$"
+
+  compatible:
+    enum:
+      - xylon,logicvc-3.02.a-gpio
+
+  reg:
+    maxItems: 1
+
+  "#gpio-cells":
+    const: 2
+
+  gpio-controller: true
+
+  gpio-line-names:
+    minItems: 1
+    maxItems: 9
+
+required:
+  - compatible
+  - reg
+  - "#gpio-cells"
+  - gpio-controller
+
+examples:
+  - |
+    logicvc: logicvc@43c00000 {
+      compatible = "xylon,logicvc-3.02.a", "syscon", "simple-mfd";
+      reg = <0x43c00000 0x6000>;
+
+      #address-cells = <1>;
+      #size-cells = <1>;
+
+      logicvc_gpio: gpio@40 {
+        compatible = "xylon,logicvc-3.02.a-gpio";
+        reg = <0x40 0x40>;
+        gpio-controller;
+        #gpio-cells = <2>;
+        gpio-line-names = "GPIO0", "GPIO1", "GPIO2", "GPIO3", "GPIO4",
+               "EN_BLIGHT", "EN_VDD", "EN_VEE", "V_EN";
+      };
+    };
diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml
index 5f1fd6d7ee0f..0c426e371e71 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml
@@ -17,6 +17,7 @@ properties:
     items:
       - enum:
           - amlogic,meson-g12a-mali
+          - realtek,rtd1619-mali
       - const: arm,mali-bifrost # Mali Bifrost GPU model/revision is fully discoverable
 
   reg:
@@ -37,8 +38,7 @@ properties:
   clocks:
     maxItems: 1
 
-  mali-supply:
-    maxItems: 1
+  mali-supply: true
 
   operating-points-v2: true
 
diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml
index 47bc1ac36426..36f59b3ade71 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml
@@ -16,31 +16,35 @@ properties:
     oneOf:
       - items:
           - enum:
+             - samsung,exynos5250-mali
+          - const: arm,mali-t604
+      - items:
+          - enum:
+             - samsung,exynos5420-mali
+          - const: arm,mali-t628
+      - items:
+          - enum:
              - allwinner,sun50i-h6-mali
           - const: arm,mali-t720
       - items:
           - enum:
              - amlogic,meson-gxm-mali
+             - realtek,rtd1295-mali
           - const: arm,mali-t820
       - items:
           - enum:
+             - arm,juno-mali
+          - const: arm,mali-t624
+      - items:
+          - enum:
              - rockchip,rk3288-mali
+             - samsung,exynos5433-mali
           - const: arm,mali-t760
       - items:
           - enum:
              - rockchip,rk3399-mali
           - const: arm,mali-t860
-      - items:
-          - enum:
-             - samsung,exynos5250-mali
-          - const: arm,mali-t604
-      - items:
-          - enum:
-             - samsung,exynos5433-mali
-          - const: arm,mali-t760
 
-          # "arm,mali-t624"
-          # "arm,mali-t628"
           # "arm,mali-t830"
           # "arm,mali-t880"
 
@@ -69,8 +73,7 @@ properties:
       - const: core
       - const: bus
 
-  mali-supply:
-    maxItems: 1
+  mali-supply: true
 
   resets:
     minItems: 1
diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-utgard.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-utgard.yaml
index c5d93c5839d3..afde81be3c29 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-utgard.yaml
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-utgard.yaml
@@ -97,8 +97,7 @@ properties:
 
   memory-region: true
 
-  mali-supply:
-    maxItems: 1
+  mali-supply: true
 
   power-domains:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/gpu/samsung-g2d.txt b/Documentation/devicetree/bindings/gpu/samsung-g2d.txt
deleted file mode 100644
index 1e7959332dbc..000000000000
--- a/Documentation/devicetree/bindings/gpu/samsung-g2d.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* Samsung 2D Graphics Accelerator
-
-Required properties:
-  - compatible : value should be one among the following:
-	(a) "samsung,s5pv210-g2d" for G2D IP present in S5PV210 & Exynos4210 SoC
-	(b) "samsung,exynos4212-g2d" for G2D IP present in Exynos4x12 SoCs
-	(c) "samsung,exynos5250-g2d" for G2D IP present in Exynos5250 SoC
-
-  - reg : Physical base address of the IP registers and length of memory
-	  mapped region.
-
-  - interrupts : G2D interrupt number to the CPU.
-  - clocks : from common clock binding: handle to G2D clocks.
-  - clock-names : names of clocks listed in clocks property, in the same
-		  order, depending on SoC type:
-		  - for S5PV210 and Exynos4 based SoCs: "fimg2d" and
-		    "sclk_fimg2d"
-		  - for Exynos5250 SoC: "fimg2d".
-
-Example:
-	g2d@12800000 {
-		compatible = "samsung,s5pv210-g2d";
-		reg = <0x12800000 0x1000>;
-		interrupts = <0 89 0>;
-		clocks = <&clock 177>, <&clock 277>;
-		clock-names = "sclk_fimg2d", "fimg2d";
-	};
diff --git a/Documentation/devicetree/bindings/gpu/samsung-g2d.yaml b/Documentation/devicetree/bindings/gpu/samsung-g2d.yaml
new file mode 100644
index 000000000000..e7daae862578
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/samsung-g2d.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpu/samsung-g2d.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung SoC 2D Graphics Accelerator
+
+maintainers:
+  - Inki Dae <inki.dae@samsung.com>
+
+properties:
+  compatible:
+    enum:
+      - samsung,s5pv210-g2d    # in S5PV210 & Exynos4210 SoC
+      - samsung,exynos4212-g2d # in Exynos4x12 SoCs
+      - samsung,exynos5250-g2d
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks: {}
+  clock-names: {}
+  iommus: {}
+  power-domains: {}
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: samsung,exynos5250-g2d
+
+then:
+  properties:
+    clocks:
+      items:
+        - description: fimg2d clock
+    clock-names:
+      items:
+        - const: fimg2d
+
+else:
+  properties:
+    clocks:
+      items:
+        - description: sclk_fimg2d clock
+        - description: fimg2d clock
+    clock-names:
+      items:
+        - const: sclk_fimg2d
+        - const: fimg2d
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    g2d@12800000 {
+        compatible = "samsung,s5pv210-g2d";
+        reg = <0x12800000 0x1000>;
+        interrupts = <0 89 0>;
+        clocks = <&clock 177>, <&clock 277>;
+        clock-names = "sclk_fimg2d", "fimg2d";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/gpu/samsung-rotator.txt b/Documentation/devicetree/bindings/gpu/samsung-rotator.txt
deleted file mode 100644
index 3aca2578da0b..000000000000
--- a/Documentation/devicetree/bindings/gpu/samsung-rotator.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-* Samsung Image Rotator
-
-Required properties:
-  - compatible : value should be one of the following:
-	* "samsung,s5pv210-rotator" for Rotator IP in S5PV210
-	* "samsung,exynos4210-rotator" for Rotator IP in Exynos4210
-	* "samsung,exynos4212-rotator" for Rotator IP in Exynos4212/4412
-	* "samsung,exynos5250-rotator" for Rotator IP in Exynos5250
-
-  - reg : Physical base address of the IP registers and length of memory
-	  mapped region.
-
-  - interrupts : Interrupt specifier for rotator interrupt, according to format
-		 specific to interrupt parent.
-
-  - clocks : Clock specifier for rotator clock, according to generic clock
-	     bindings. (See Documentation/devicetree/bindings/clock/exynos*.txt)
-
-  - clock-names : Names of clocks. For exynos rotator, it should be "rotator".
-
-Example:
-	rotator@12810000 {
-		compatible = "samsung,exynos4210-rotator";
-		reg = <0x12810000 0x1000>;
-		interrupts = <0 83 0>;
-		clocks = <&clock 278>;
-		clock-names = "rotator";
-	};
diff --git a/Documentation/devicetree/bindings/gpu/samsung-rotator.yaml b/Documentation/devicetree/bindings/gpu/samsung-rotator.yaml
new file mode 100644
index 000000000000..f4dfa6fc724c
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/samsung-rotator.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpu/samsung-rotator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung SoC Image Rotator
+
+maintainers:
+  - Inki Dae <inki.dae@samsung.com>
+
+properties:
+  compatible:
+    enum:
+      - "samsung,s5pv210-rotator"
+      - "samsung,exynos4210-rotator"
+      - "samsung,exynos4212-rotator"
+      - "samsung,exynos5250-rotator"
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: rotator
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    rotator@12810000 {
+        compatible = "samsung,exynos4210-rotator";
+        reg = <0x12810000 0x1000>;
+        interrupts = <0 83 0>;
+        clocks = <&clock 278>;
+        clock-names = "rotator";
+    };
+
diff --git a/Documentation/devicetree/bindings/gpu/samsung-scaler.txt b/Documentation/devicetree/bindings/gpu/samsung-scaler.txt
deleted file mode 100644
index 9c3d98105dfd..000000000000
--- a/Documentation/devicetree/bindings/gpu/samsung-scaler.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* Samsung Exynos Image Scaler
-
-Required properties:
-  - compatible : value should be one of the following:
-	(a) "samsung,exynos5420-scaler" for Scaler IP in Exynos5420
-	(b) "samsung,exynos5433-scaler" for Scaler IP in Exynos5433
-
-  - reg : Physical base address of the IP registers and length of memory
-	  mapped region.
-
-  - interrupts : Interrupt specifier for scaler interrupt, according to format
-		 specific to interrupt parent.
-
-  - clocks : Clock specifier for scaler clock, according to generic clock
-	     bindings. (See Documentation/devicetree/bindings/clock/exynos*.txt)
-
-  - clock-names : Names of clocks. For exynos scaler, it should be "mscl"
-		  on 5420 and "pclk", "aclk" and "aclk_xiu" on 5433.
-
-Example:
-	scaler@12800000 {
-		compatible = "samsung,exynos5420-scaler";
-		reg = <0x12800000 0x1294>;
-		interrupts = <0 220 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&clock CLK_MSCL0>;
-		clock-names = "mscl";
-	};
diff --git a/Documentation/devicetree/bindings/gpu/samsung-scaler.yaml b/Documentation/devicetree/bindings/gpu/samsung-scaler.yaml
new file mode 100644
index 000000000000..5317ac64426a
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/samsung-scaler.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpu/samsung-scaler.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC Image Scaler
+
+maintainers:
+  - Inki Dae <inki.dae@samsung.com>
+
+properties:
+  compatible:
+    enum:
+      - samsung,exynos5420-scaler
+      - samsung,exynos5433-scaler
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks: {}
+  clock-names: {}
+  iommus: {}
+  power-domains: {}
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: samsung,exynos5420-scaler
+
+then:
+  properties:
+    clocks:
+      items:
+        - description: mscl clock
+
+    clock-names:
+      items:
+        - const: mscl
+
+else:
+  properties:
+    clocks:
+      items:
+        - description: pclk clock
+        - description: aclk clock
+        - description: aclk_xiu clock
+
+    clock-names:
+      items:
+        - const: pclk
+        - const: aclk
+        - const: aclk_xiu
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos5420.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    scaler@12800000 {
+        compatible = "samsung,exynos5420-scaler";
+        reg = <0x12800000 0x1294>;
+        interrupts = <GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&clock CLK_MSCL0>;
+        clock-names = "mscl";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.txt b/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.txt
deleted file mode 100644
index adf4f000ea3d..000000000000
--- a/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-STM32 Hardware Spinlock Device Binding
--------------------------------------
-
-Required properties :
-- compatible : should be "st,stm32-hwspinlock".
-- reg : the register address of hwspinlock.
-- #hwlock-cells : hwlock users only use the hwlock id to represent a specific
-	hwlock, so the number of cells should be <1> here.
-- clock-names : Must contain "hsem".
-- clocks : Must contain a phandle entry for the clock in clock-names, see the
-	common clock bindings.
-
-Please look at the generic hwlock binding for usage information for consumers,
-"Documentation/devicetree/bindings/hwlock/hwlock.txt"
-
-Example of hwlock provider:
-	hwspinlock@4c000000 {
-		compatible = "st,stm32-hwspinlock";
-		#hwlock-cells = <1>;
-		reg = <0x4c000000 0x400>;
-		clocks = <&rcc HSEM>;
-		clock-names = "hsem";
-	};
diff --git a/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml b/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml
new file mode 100644
index 000000000000..47cf9c8d97e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/hwlock/st,stm32-hwspinlock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Hardware Spinlock bindings
+
+maintainers:
+  - Benjamin Gaignard <benjamin.gaignard@st.com>
+  - Fabien Dessenne <fabien.dessenne@st.com>
+
+properties:
+  "#hwlock-cells":
+    const: 1
+
+  compatible:
+    const: st,stm32-hwspinlock
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: hsem
+
+required:
+  - "#hwlock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    hwspinlock@4c000000 {
+        compatible = "st,stm32-hwspinlock";
+        #hwlock-cells = <1>;
+        reg = <0x4c000000 0x400>;
+        clocks = <&rcc HSEM>;
+        clock-names = "hsem";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/hwmon/adi,adm1177.yaml b/Documentation/devicetree/bindings/hwmon/adi,adm1177.yaml
new file mode 100644
index 000000000000..2a9822075b36
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/adi,adm1177.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/hwmon/adi,adm1177.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices ADM1177 Hot Swap Controller and Digital Power Monitor
+
+maintainers:
+  - Michael Hennerich <michael.hennerich@analog.com>
+  - Beniamin Bia <beniamin.bia@analog.com>
+
+description: |
+  Analog Devices ADM1177 Hot Swap Controller and Digital Power Monitor
+  https://www.analog.com/media/en/technical-documentation/data-sheets/ADM1177.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,adm1177
+
+  reg:
+    maxItems: 1
+
+  avcc-supply:
+    description:
+      Phandle to the Avcc power supply
+
+  shunt-resistor-micro-ohms:
+    description:
+      The value of curent sense resistor in microohms. If not provided,
+      the current reading and overcurrent alert is disabled.
+
+  adi,shutdown-threshold-microamp:
+    description:
+      Specifies the current level at which an over current alert occurs.
+      If not provided, the overcurrent alert is configured to max ADC range
+      based on shunt-resistor-micro-ohms.
+
+  adi,vrange-high-enable:
+    description:
+      Specifies which internal voltage divider to be used. A 1 selects
+      a 7:2 voltage divider while a 0 selects a 14:1 voltage divider.
+    type: boolean
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pwmon@5a {
+                compatible = "adi,adm1177";
+                reg = <0x5a>;
+                shunt-resistor-micro-ohms = <50000>; /* 50 mOhm */
+                adi,shutdown-threshold-microamp = <1059000>; /* 1.059 A */
+                adi,vrange-high-enable;
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/hwmon/adi,ltc2947.yaml b/Documentation/devicetree/bindings/hwmon/adi,ltc2947.yaml
new file mode 100644
index 000000000000..ae04903f34bf
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/adi,ltc2947.yaml
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bindings/hwmon/adi,ltc2947.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices LTC2947 high precision power and energy monitor
+
+maintainers:
+  - Nuno Sá <nuno.sa@analog.com>
+
+description: |
+  Analog Devices LTC2947 high precision power and energy monitor over SPI or I2C.
+
+  https://www.analog.com/media/en/technical-documentation/data-sheets/LTC2947.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,ltc2947
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    description:
+      The LTC2947 uses either a trimmed internal oscillator or an external clock
+      as the time base for determining the integration period to represent time,
+      charge and energy. When an external clock is used, this property must be
+      set accordingly.
+    maxItems: 1
+
+  adi,accumulator-ctl-pol:
+    description:
+      This property controls the polarity of current that is accumulated to
+      calculate charge and energy so that, they can be only accumulated for
+      positive current for example. Since there are two sets of registers for
+      the accumulated values, this entry can also have two items which sets
+      energy1/charge1 and energy2/charger2 respectively. Check table 12 of the
+      datasheet for more information on the supported options.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+      - minItems: 2
+        maxItems: 2
+        items:
+          enum: [0, 1, 2, 3]
+          default: 0
+
+  adi,accumulation-deadband-microamp:
+    description:
+      This property controls the Accumulation Dead band which allows to set the
+      level of current below which no accumulation takes place.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    maximum: 255
+    default: 0
+
+  adi,gpio-out-pol:
+    description:
+      This property controls the GPIO polarity. Setting it to one makes the GPIO
+      active high, setting it to zero makets it active low. When this property
+      is present, the GPIO is automatically configured as output and set to
+      control a fan as a function of measured temperature.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [0, 1]
+    default: 0
+
+  adi,gpio-in-accum:
+    description:
+      When set, this property sets the GPIO as input. It is then used to control
+      the accumulation of charge, energy and time. This function can be
+      enabled/configured separately for each of the two sets of accumulation
+      registers. Check table 13 of the datasheet for more information on the
+      supported options. This property cannot be used together with
+      adi,gpio-out-pol.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+      - minItems: 2
+        maxItems: 2
+        items:
+          enum: [0, 1, 2]
+          default: 0
+
+required:
+  - compatible
+  - reg
+
+
+examples:
+  - |
+    spi {
+           #address-cells = <1>;
+           #size-cells = <0>;
+
+           ltc2947_spi: ltc2947@0 {
+                   compatible = "adi,ltc2947";
+                   reg = <0>;
+                   /* accumulation takes place always for energ1/charge1. */
+                   /* accumulation only on positive current for energy2/charge2. */
+                   adi,accumulator-ctl-pol = <0 1>;
+           };
+    };
+...
diff --git a/Documentation/devicetree/bindings/hwmon/ibm,cffps1.txt b/Documentation/devicetree/bindings/hwmon/ibm,cffps1.txt
index 1036f65fb778..d9a2719f9243 100644
--- a/Documentation/devicetree/bindings/hwmon/ibm,cffps1.txt
+++ b/Documentation/devicetree/bindings/hwmon/ibm,cffps1.txt
@@ -5,6 +5,9 @@ Required properties:
  - compatible				: Must be one of the following:
 						"ibm,cffps1"
 						"ibm,cffps2"
+						or "ibm,cffps" if the system
+						must support any version of the
+						power supply
  - reg = < I2C bus address >;		: Address of the power supply on the
 					  I2C bus.
 
diff --git a/Documentation/devicetree/bindings/hwmon/pmbus/ti,ucd90320.yaml b/Documentation/devicetree/bindings/hwmon/pmbus/ti,ucd90320.yaml
new file mode 100644
index 000000000000..5d42e1304202
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/pmbus/ti,ucd90320.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+
+$id: http://devicetree.org/schemas/hwmon/pmbus/ti,ucd90320.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: UCD90320 power sequencer
+
+maintainers:
+  - Jim Wright <wrightj@linux.vnet.ibm.com>
+
+description: |
+  The UCD90320 is a 32-rail PMBus/I2C addressable power-supply sequencer and
+  monitor. The 24 integrated ADC channels (AMONx) monitor the power supply
+  voltage, current, and temperature. Of the 84 GPIO pins, 8 can be used as
+  digital monitors (DMONx), 32 to enable the power supply (ENx), 24 for
+  margining (MARx), 16 for logical GPO, and 32 GPIs for cascading, and system
+  function.
+
+  http://focus.ti.com/lit/ds/symlink/ucd90320.pdf
+
+properties:
+  compatible:
+    enum:
+      - ti,ucd90320
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        ucd90320@11 {
+            compatible = "ti,ucd90320";
+            reg = <0x11>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml
new file mode 100644
index 000000000000..168235ad5d81
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+
+$id: http://devicetree.org/schemas/hwmon/ti,tmp513.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TMP513/512 system monitor sensor
+
+maintainers:
+  - Eric Tremblay <etremblay@distech-controls.com>
+
+description: |
+  The TMP512 (dual-channel) and TMP513 (triple-channel) are system monitors
+  that include remote sensors, a local temperature sensor, and a high-side
+  current shunt monitor. These system monitors have the capability of measuring
+  remote temperatures, on-chip temperatures, and system voltage/power/current
+  consumption.
+
+  Datasheets:
+  http://www.ti.com/lit/gpn/tmp513
+  http://www.ti.com/lit/gpn/tmp512
+
+
+properties:
+  compatible:
+    enum:
+      - ti,tmp512
+      - ti,tmp513
+
+  reg:
+    maxItems: 1
+
+  shunt-resistor-micro-ohms:
+    description: |
+      If 0, the calibration process will be skiped and the current and power
+      measurement engine will not work. Temperature and voltage measurement
+      will continue to work. The shunt value also need to respect:
+      rshunt <= pga-gain * 40 * 1000 * 1000.
+      If not, it's not possible to compute a valid calibration value.
+    default: 1000
+
+  ti,pga-gain:
+    description: |
+      The gain value for the PGA function. This is 8, 4, 2 or 1.
+      The PGA gain affect the shunt voltage range.
+      The range will be equal to: pga-gain * 40mV
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [1, 2, 4, 8]
+    default: 8
+
+  ti,bus-range-microvolt:
+    description: |
+      This is the operating range of the bus voltage in microvolt
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [16000000, 32000000]
+    default: 32000000
+
+  ti,nfactor:
+    description: |
+      Array of three(TMP513) or two(TMP512) n-Factor value for each remote
+      temperature channel.
+      See datasheet Table 11 for n-Factor range list and value interpretation.
+    allOf:
+      - $ref: /schemas/types.yaml#definitions/uint32-array
+      - minItems: 2
+        maxItems: 3
+        items:
+          default: 0x00
+          minimum: 0x00
+          maximum: 0xFF
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    i2c {
+          #address-cells = <1>;
+          #size-cells = <0>;
+
+          tmp513@5c {
+              compatible = "ti,tmp513";
+              reg = <0x5C>;
+              shunt-resistor-micro-ohms = <330000>;
+              ti,bus-range-microvolt = <32000000>;
+              ti,pga-gain = <8>;
+              ti,nfactor = <0x1 0xF3 0x00>;
+          };
+    };
diff --git a/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml b/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
index f9d526b7da01..6097e8ac46c1 100644
--- a/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
+++ b/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
@@ -8,7 +8,7 @@ title: Allwinner A31 P2WI (Push/Pull 2 Wires Interface) Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 allOf:
   - $ref: /schemas/i2c/i2c-controller.yaml#
@@ -40,9 +40,7 @@ required:
   - clocks
   - resets
 
-# FIXME: We should set it, but it would report all the generic
-# properties as additional properties.
-# additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml b/Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml
new file mode 100644
index 000000000000..49cad273c8e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 BayLibre, SAS
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/i2c/amlogic,meson6-i2c.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Amlogic Meson I2C Controller
+
+maintainers:
+  - Neil Armstrong <narmstrong@baylibre.com>
+  - Beniamino Galvani <b.galvani@gmail.com>
+
+allOf:
+  - $ref: /schemas/i2c/i2c-controller.yaml#
+
+properties:
+  compatible:
+    enum:
+      - amlogic,meson6-i2c # Meson6, Meson8 and compatible SoCs
+      - amlogic,meson-gxbb-i2c # GXBB and compatible SoCs
+      - amlogic,meson-axg-i2c # AXG and compatible SoCs
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+examples:
+  - |
+    i2c@c8100500 {
+        compatible = "amlogic,meson6-i2c";
+        reg = <0xc8100500 0x20>;
+        interrupts = <92>;
+        clocks = <&clk81>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        eeprom@52 {
+            compatible = "atmel,24c32";
+            reg = <0x52>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt b/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
index 8fbd8633a387..b47f6ccb196a 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
@@ -1,4 +1,4 @@
-Device tree configuration for the I2C busses on the AST24XX and AST25XX SoCs.
+Device tree configuration for the I2C busses on the AST24XX, AST25XX, and AST26XX SoCs.
 
 Required Properties:
 - #address-cells	: should be 1
@@ -6,6 +6,7 @@ Required Properties:
 - reg			: address offset and range of bus
 - compatible		: should be "aspeed,ast2400-i2c-bus"
 			  or "aspeed,ast2500-i2c-bus"
+			  or "aspeed,ast2600-i2c-bus"
 - clocks		: root clock of bus, should reference the APB
 			  clock in the second cell
 - resets		: phandle to reset controller with the reset number in
diff --git a/Documentation/devicetree/bindings/i2c/i2c-at91.txt b/Documentation/devicetree/bindings/i2c/i2c-at91.txt
index b7cec17c3daf..8347b1e7c080 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-at91.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-at91.txt
@@ -3,7 +3,8 @@ I2C for Atmel platforms
 Required properties :
 - compatible : Must be "atmel,at91rm9200-i2c", "atmel,at91sam9261-i2c",
      "atmel,at91sam9260-i2c", "atmel,at91sam9g20-i2c", "atmel,at91sam9g10-i2c",
-     "atmel,at91sam9x5-i2c", "atmel,sama5d4-i2c" or "atmel,sama5d2-i2c"
+     "atmel,at91sam9x5-i2c", "atmel,sama5d4-i2c", "atmel,sama5d2-i2c" or
+     "microchip,sam9x60-i2c"
 - reg: physical base address of the controller and length of memory mapped
      region.
 - interrupts: interrupt number to the cpu.
@@ -17,8 +18,10 @@ Optional properties:
 - dma-names: should contain "tx" and "rx".
 - atmel,fifo-size: maximum number of data the RX and TX FIFOs can store for FIFO
   capable I2C controllers.
-- i2c-sda-hold-time-ns: TWD hold time, only available for "atmel,sama5d4-i2c"
-  and "atmel,sama5d2-i2c".
+- i2c-sda-hold-time-ns: TWD hold time, only available for:
+	"atmel,sama5d4-i2c",
+	"atmel,sama5d2-i2c",
+	"microchip,sam9x60-i2c".
 - Child nodes conforming to i2c bus binding
 
 Examples :
diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
index b245363d6d60..f0c072ff9eca 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
@@ -4,6 +4,7 @@ Required properties:
 - compatible :
   - "fsl,imx7ulp-lpi2c" for LPI2C compatible with the one integrated on i.MX7ULP soc
   - "fsl,imx8qxp-lpi2c" for LPI2C compatible with the one integrated on i.MX8QXP soc
+  - "fsl,imx8qm-lpi2c" for LPI2C compatible with the one integrated on i.MX8QM soc
 - reg : address and length of the lpi2c master registers
 - interrupts : lpi2c interrupt
 - clocks : lpi2c clock specifier
diff --git a/Documentation/devicetree/bindings/i2c/i2c-meson.txt b/Documentation/devicetree/bindings/i2c/i2c-meson.txt
deleted file mode 100644
index 13d410de077c..000000000000
--- a/Documentation/devicetree/bindings/i2c/i2c-meson.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Amlogic Meson I2C controller
-
-Required properties:
- - compatible: must be:
-	"amlogic,meson6-i2c" for Meson8 and compatible SoCs
-	"amlogic,meson-gxbb-i2c" for GXBB and compatible SoCs
-	"amlogic,meson-axg-i2c"for AXG and compatible SoCs
-
- - reg: physical address and length of the device registers
- - interrupts: a single interrupt specifier
- - clocks: clock for the device
- - #address-cells: should be <1>
- - #size-cells: should be <0>
-
-For details regarding the following core I2C bindings see also i2c.txt.
-
-Optional properties:
-- clock-frequency: the desired I2C bus clock frequency in Hz; in
-  absence of this property the default value is used (100 kHz).
-
-Examples:
-
-	i2c@c8100500 {
-		compatible = "amlogic,meson6-i2c";
-		reg = <0xc8100500 0x20>;
-		interrupts = <0 92 1>;
-		clocks = <&clk81>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-	};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-stm32.txt b/Documentation/devicetree/bindings/i2c/i2c-stm32.txt
deleted file mode 100644
index ce3df2fff6c8..000000000000
--- a/Documentation/devicetree/bindings/i2c/i2c-stm32.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-* I2C controller embedded in STMicroelectronics STM32 I2C platform
-
-Required properties:
-- compatible: Must be one of the following
-  - "st,stm32f4-i2c"
-  - "st,stm32f7-i2c"
-- reg: Offset and length of the register set for the device
-- interrupts: Must contain the interrupt id for I2C event and then the
-  interrupt id for I2C error.
-- resets: Must contain the phandle to the reset controller.
-- clocks: Must contain the input clock of the I2C instance.
-- A pinctrl state named "default" must be defined to set pins in mode of
-  operation for I2C transfer
-- #address-cells = <1>;
-- #size-cells = <0>;
-
-Optional properties:
-- clock-frequency: Desired I2C bus clock frequency in Hz. If not specified,
-  the default 100 kHz frequency will be used.
-  For STM32F4 SoC Standard-mode and Fast-mode are supported, possible values are
-  100000 and 400000.
-  For STM32F7, STM32H7 and STM32MP1 SoCs, Standard-mode, Fast-mode and Fast-mode
-  Plus are supported, possible values are 100000, 400000 and 1000000.
-- dmas: List of phandles to rx and tx DMA channels. Refer to stm32-dma.txt.
-- dma-names: List of dma names. Valid names are: "rx" and "tx".
-- i2c-scl-rising-time-ns: I2C SCL Rising time for the board (default: 25)
-  For STM32F7, STM32H7 and STM32MP1 only.
-- i2c-scl-falling-time-ns: I2C SCL Falling time for the board (default: 10)
-  For STM32F7, STM32H7 and STM32MP1 only.
-  I2C Timings are derived from these 2 values
-- st,syscfg-fmp: Use to set Fast Mode Plus bit within SYSCFG when Fast Mode
-  Plus speed is selected by slave.
-	1st cell: phandle to syscfg
-	2nd cell: register offset within SYSCFG
-	3rd cell: register bitmask for FMP bit
-  For STM32F7, STM32H7 and STM32MP1 only.
-
-Example:
-
-	i2c@40005400 {
-		compatible = "st,stm32f4-i2c";
-		#address-cells = <1>;
-		#size-cells = <0>;
-		reg = <0x40005400 0x400>;
-		interrupts = <31>,
-			     <32>;
-		resets = <&rcc 277>;
-		clocks = <&rcc 0 149>;
-		pinctrl-0 = <&i2c1_sda_pin>, <&i2c1_scl_pin>;
-		pinctrl-names = "default";
-	};
-
-	i2c@40005400 {
-		compatible = "st,stm32f7-i2c";
-		#address-cells = <1>;
-		#size-cells = <0>;
-		reg = <0x40005400 0x400>;
-		interrupts = <31>,
-			     <32>;
-		resets = <&rcc STM32F7_APB1_RESET(I2C1)>;
-		clocks = <&rcc 1 CLK_I2C1>;
-		pinctrl-0 = <&i2c1_sda_pin>, <&i2c1_scl_pin>;
-		pinctrl-names = "default";
-		st,syscfg-fmp = <&syscfg 0x4 0x1>;
-	};
diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt
index 44efafdfd7f5..9a53df4243c6 100644
--- a/Documentation/devicetree/bindings/i2c/i2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c.txt
@@ -55,6 +55,24 @@ wants to support one of the below features, it should adapt the bindings below.
 	Number of nanoseconds the SDA signal takes to fall; t(f) in the I2C
 	specification.
 
+- i2c-analog-filter
+	Enable analog filter for i2c lines.
+
+- i2c-digital-filter
+	Enable digital filter for i2c lines.
+
+- i2c-digital-filter-width-ns
+	Width of spikes which can be filtered by digital filter
+	(i2c-digital-filter). This width is specified in nanoseconds.
+
+- i2c-analog-filter-cutoff-frequency
+	Frequency that the analog filter (i2c-analog-filter) uses to distinguish
+	which signal to filter. Signal with higher frequency than specified will
+	be filtered out. Only lower frequency will pass (this is applicable to
+	a low-pass analog filter). Typical value should be above the normal
+	i2c bus clock frequency (clock-frequency).
+	Specified in Hz.
+
 - interrupts
 	interrupts used by the device.
 
diff --git a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml
index c779000515d6..2ceb05ba2df5 100644
--- a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml
@@ -93,9 +93,7 @@ allOf:
       required:
         - resets
 
-# FIXME: We should set it, but it would report all the generic
-# properties as additional properties.
-# additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/i2c/renesas,i2c.txt b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt
index 3ee5e8f6ee01..0660a3eb2547 100644
--- a/Documentation/devicetree/bindings/i2c/renesas,i2c.txt
+++ b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt
@@ -7,6 +7,7 @@ Required properties:
 	"renesas,i2c-r8a7745" if the device is a part of a R8A7745 SoC.
 	"renesas,i2c-r8a77470" if the device is a part of a R8A77470 SoC.
 	"renesas,i2c-r8a774a1" if the device is a part of a R8A774A1 SoC.
+	"renesas,i2c-r8a774b1" if the device is a part of a R8A774B1 SoC.
 	"renesas,i2c-r8a774c0" if the device is a part of a R8A774C0 SoC.
 	"renesas,i2c-r8a7778" if the device is a part of a R8A7778 SoC.
 	"renesas,i2c-r8a7779" if the device is a part of a R8A7779 SoC.
diff --git a/Documentation/devicetree/bindings/i2c/renesas,iic.txt b/Documentation/devicetree/bindings/i2c/renesas,iic.txt
index 202602e6e837..64d11ffb07c4 100644
--- a/Documentation/devicetree/bindings/i2c/renesas,iic.txt
+++ b/Documentation/devicetree/bindings/i2c/renesas,iic.txt
@@ -8,6 +8,7 @@ Required properties:
 			- "renesas,iic-r8a7744" (RZ/G1N)
 			- "renesas,iic-r8a7745" (RZ/G1E)
 			- "renesas,iic-r8a774a1" (RZ/G2M)
+			- "renesas,iic-r8a774b1" (RZ/G2N)
 			- "renesas,iic-r8a774c0" (RZ/G2E)
 			- "renesas,iic-r8a7790" (R-Car H2)
 			- "renesas,iic-r8a7791" (R-Car M2-W)
diff --git a/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml b/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml
new file mode 100644
index 000000000000..900ec1ab6a47
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/i2c/st,stm32-i2c.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: I2C controller embedded in STMicroelectronics STM32 I2C platform
+
+maintainers:
+  - Pierre-Yves MORDRET <pierre-yves.mordret@st.com>
+
+allOf:
+  - $ref: /schemas/i2c/i2c-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - st,stm32f7-i2c
+    then:
+      properties:
+        i2c-scl-rising-time-ns:
+          default: 25
+
+        i2c-scl-falling-time-ns:
+          default: 10
+
+        st,syscfg-fmp:
+          description: Use to set Fast Mode Plus bit within SYSCFG when
+                       Fast Mode Plus speed is selected by slave.
+                       Format is phandle to syscfg / register offset within
+                       syscfg / register bitmask for FMP bit.
+          allOf:
+            - $ref: "/schemas/types.yaml#/definitions/phandle-array"
+            - items:
+                minItems: 3
+                maxItems: 3
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - st,stm32f4-i2c
+    then:
+      properties:
+        clock-frequency:
+          enum: [100000, 400000]
+
+properties:
+  compatible:
+    enum:
+      - st,stm32f4-i2c
+      - st,stm32f7-i2c
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: interrupt ID for I2C event
+      - description: interrupt ID for I2C error
+
+  resets:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  dmas:
+    items:
+      - description: RX DMA Channel phandle
+      - description: TX DMA Channel phandle
+
+  dma-names:
+    items:
+      - const: rx
+      - const: tx
+
+  clock-frequency:
+    description: Desired I2C bus clock frequency in Hz. If not specified,
+                 the default 100 kHz frequency will be used.
+                 For STM32F7, STM32H7 and STM32MP1 SoCs, Standard-mode,
+                 Fast-mode and Fast-mode Plus are supported, possible
+                 values are 100000, 400000 and 1000000.
+    default: 100000
+    enum: [100000, 400000, 1000000]
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - resets
+  - clocks
+
+examples:
+  - |
+    #include <dt-bindings/mfd/stm32f7-rcc.h>
+    #include <dt-bindings/clock/stm32fx-clock.h>
+    //Example 1 (with st,stm32f4-i2c compatible)
+      i2c@40005400 {
+          compatible = "st,stm32f4-i2c";
+          #address-cells = <1>;
+          #size-cells = <0>;
+          reg = <0x40005400 0x400>;
+          interrupts = <31>,
+                       <32>;
+          resets = <&rcc 277>;
+          clocks = <&rcc 0 149>;
+      };
+
+    //Example 2 (with st,stm32f7-i2c compatible)
+      i2c@40005800 {
+          compatible = "st,stm32f7-i2c";
+          #address-cells = <1>;
+          #size-cells = <0>;
+          reg = <0x40005800 0x400>;
+          interrupts = <31>,
+                       <32>;
+          resets = <&rcc STM32F7_APB1_RESET(I2C1)>;
+          clocks = <&rcc 1 CLK_I2C1>;
+      };
+
+    //Example 3 (with st,stm32f7-i2c compatible on stm32mp)
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+      i2c@40013000 {
+          compatible = "st,stm32f7-i2c";
+          #address-cells = <1>;
+          #size-cells = <0>;
+          reg = <0x40013000 0x400>;
+          interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+                       <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+          clocks = <&rcc I2C2_K>;
+          resets = <&rcc I2C2_R>;
+          i2c-scl-rising-time-ns = <185>;
+          i2c-scl-falling-time-ns = <20>;
+          st,syscfg-fmp = <&syscfg 0x4 0x2>;
+      };
+...
diff --git a/Documentation/devicetree/bindings/iio/accel/adi,adis16240.yaml b/Documentation/devicetree/bindings/iio/accel/adi,adis16240.yaml
new file mode 100644
index 000000000000..4147f02b5e3c
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/accel/adi,adis16240.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/accel/adi,adis16240.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ADIS16240 Programmable Impact Sensor and Recorder driver
+
+maintainers:
+  - Alexandru Ardelean <alexandru.ardelean@analog.com>
+
+description: |
+  ADIS16240 Programmable Impact Sensor and Recorder driver that supports
+  SPI interface.
+    https://www.analog.com/en/products/adis16240.html
+
+properties:
+  compatible:
+    enum:
+      - adi,adis16240
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    spi0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        /* Example for a SPI device node */
+        accelerometer@0 {
+            compatible = "adi,adis16240";
+            reg = <0>;
+            spi-max-frequency = <2500000>;
+            interrupt-parent = <&gpio0>;
+            interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/iio/accel/bma180.txt b/Documentation/devicetree/bindings/iio/accel/bma180.txt
index 3b25b4c4d446..f53237270b32 100644
--- a/Documentation/devicetree/bindings/iio/accel/bma180.txt
+++ b/Documentation/devicetree/bindings/iio/accel/bma180.txt
@@ -1,11 +1,14 @@
-* Bosch BMA180 / BMA250 triaxial acceleration sensor
+* Bosch BMA180 / BMA25x triaxial acceleration sensor
 
 http://omapworld.com/BMA180_111_1002839.pdf
 http://ae-bst.resource.bosch.com/media/products/dokumente/bma250/bst-bma250-ds002-05.pdf
 
 Required properties:
 
-  - compatible : should be "bosch,bma180" or "bosch,bma250"
+  - compatible : should be one of:
+    "bosch,bma180"
+    "bosch,bma250"
+    "bosch,bma254"
   - reg : the I2C address of the sensor
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml b/Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml
new file mode 100644
index 000000000000..c1c6d6f223cf
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/accel/bosch,bma400.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bosch BMA400 triaxial acceleration sensor
+
+maintainers:
+  - Dan Robertson <dan@dlrobertson.com>
+
+description: |
+  Acceleration and temperature iio sensors with an i2c interface
+
+  Specifications about the sensor can be found at:
+    https://ae-bst.resource.bosch.com/media/_tech/media/datasheets/BST-BMA400-DS000.pdf
+
+properties:
+  compatible:
+    enum:
+      - bosch,bma400
+
+  reg:
+    maxItems: 1
+
+  vdd-supply:
+    description: phandle to the regulator that provides power to the accelerometer
+
+  vddio-supply:
+    description: phandle to the regulator that provides power to the sensor's IO
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      accelerometer@14 {
+        compatible = "bosch,bma400";
+        reg = <0x14>;
+        vdd-supply = <&vdd>;
+        vddio-supply = <&vddio>;
+        interrupt-parent = <&gpio0>;
+        interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/iio/accel/kionix,kxcjk1013.txt b/Documentation/devicetree/bindings/iio/accel/kionix,kxcjk1013.txt
index eb76a02e2a82..ce950e162d5d 100644
--- a/Documentation/devicetree/bindings/iio/accel/kionix,kxcjk1013.txt
+++ b/Documentation/devicetree/bindings/iio/accel/kionix,kxcjk1013.txt
@@ -9,9 +9,16 @@ Required properties:
     "kionix,kxtf9"
  - reg: i2c slave address
 
+Optional properties:
+
+  - mount-matrix: an optional 3x3 mounting rotation matrix
+
 Example:
 
 kxtf9@f {
 	compatible = "kionix,kxtf9";
 	reg = <0x0F>;
+	mount-matrix =	"0", "1", "0",
+			"1", "0", "0",
+			"0", "0", "1";
 };
diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7091r5.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7091r5.yaml
new file mode 100644
index 000000000000..31ffa275f5fa
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7091r5.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/adc/adi,ad7091r5.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices AD7091R5 4-Channel 12-Bit ADC
+
+maintainers:
+  - Beniamin Bia <beniamin.bia@analog.com>
+
+description: |
+  Analog Devices AD7091R5 4-Channel 12-Bit ADC
+  https://www.analog.com/media/en/technical-documentation/data-sheets/ad7091r-5.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,ad7091r5
+
+  reg:
+    maxItems: 1
+
+  vref-supply:
+    description:
+      Phandle to the vref power supply
+
+  interrupts:
+    maxItems: 1
+
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        adc@2f {
+                compatible = "adi,ad7091r5";
+                reg = <0x2f>;
+
+                interrupts = <25 IRQ_TYPE_EDGE_FALLING>;
+                interrupt-parent = <&gpio>;
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
index 9692b7f719f5..e932d5aed02f 100644
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
@@ -45,15 +45,12 @@ properties:
 
   refin1-supply:
     description: refin1 supply can be used as reference for conversion.
-    maxItems: 1
 
   refin2-supply:
     description: refin2 supply can be used as reference for conversion.
-    maxItems: 1
 
   avdd-supply:
     description: avdd supply can be used as reference for conversion.
-    maxItems: 1
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml
new file mode 100644
index 000000000000..e1f6d64bdccd
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/adc/adi,ad7292.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices AD7292 10-Bit Monitor and Control System
+
+maintainers:
+  - Marcelo Schmitt <marcelo.schmitt1@gmail.com>
+
+description: |
+  Analog Devices AD7292 10-Bit Monitor and Control System with ADC, DACs,
+  Temperature Sensor, and GPIOs
+
+  Specifications about the part can be found at:
+    https://www.analog.com/media/en/technical-documentation/data-sheets/ad7292.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,ad7292
+
+  reg:
+    maxItems: 1
+
+  vref-supply:
+    description: |
+      The regulator supply for ADC and DAC reference voltage.
+
+  spi-cpha: true
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 0
+
+required:
+  - compatible
+  - reg
+  - spi-cpha
+
+patternProperties:
+  "^channel@[0-7]$":
+    type: object
+    description: |
+      Represents the external channels which are connected to the ADC.
+      See Documentation/devicetree/bindings/iio/adc/adc.txt.
+
+    properties:
+      reg:
+        description: |
+          The channel number. It can have up to 8 channels numbered from 0 to 7.
+        items:
+          - minimum: 0
+            maximum: 7
+
+      diff-channels:
+        description: see Documentation/devicetree/bindings/iio/adc/adc.txt
+        maxItems: 1
+
+    required:
+      - reg
+
+examples:
+  - |
+    spi {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      ad7292: adc@0 {
+        compatible = "adi,ad7292";
+        reg = <0>;
+        spi-max-frequency = <25000000>;
+        vref-supply = <&adc_vref>;
+        spi-cpha;
+
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        channel@0 {
+          reg = <0>;
+          diff-channels = <0 1>;
+        };
+        channel@2 {
+          reg = <2>;
+        };
+        channel@3 {
+          reg = <3>;
+        };
+        channel@4 {
+          reg = <4>;
+        };
+        channel@5 {
+          reg = <5>;
+        };
+        channel@6 {
+          reg = <6>;
+        };
+        channel@7 {
+          reg = <7>;
+        };
+      };
+    };
diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml
index cc544fdc38be..5117ad68a584 100644
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml
@@ -31,10 +31,7 @@ properties:
 
   spi-cpha: true
 
-  avcc-supply:
-    description:
-      Phandle to the Avcc power supply
-    maxItems: 1
+  avcc-supply: true
 
   interrupts:
     maxItems: 1
@@ -85,7 +82,7 @@ properties:
       Must be the device tree identifier of the over-sampling
       mode pins. As the line is active high, it should be marked
       GPIO_ACTIVE_HIGH.
-    maxItems: 1
+    maxItems: 3
 
   adi,sw-mode:
     description:
@@ -128,9 +125,9 @@ examples:
                 adi,conversion-start-gpios = <&gpio 17 GPIO_ACTIVE_HIGH>;
                 reset-gpios = <&gpio 27 GPIO_ACTIVE_HIGH>;
                 adi,first-data-gpios = <&gpio 22 GPIO_ACTIVE_HIGH>;
-                adi,oversampling-ratio-gpios = <&gpio 18 GPIO_ACTIVE_HIGH
-                                                &gpio 23 GPIO_ACTIVE_HIGH
-                                                &gpio 26 GPIO_ACTIVE_HIGH>;
+                adi,oversampling-ratio-gpios = <&gpio 18 GPIO_ACTIVE_HIGH>,
+                                               <&gpio 23 GPIO_ACTIVE_HIGH>,
+                                               <&gpio 26 GPIO_ACTIVE_HIGH>;
                 standby-gpios = <&gpio 24 GPIO_ACTIVE_LOW>;
                 adi,sw-mode;
         };
diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml
index d1109416963c..9acde6d2e2d9 100644
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml
@@ -39,7 +39,6 @@ properties:
   avdd-supply:
     description:
       The regulator supply for the ADC reference voltage.
-    maxItems: 1
 
   powerdown-gpios:
     description:
diff --git a/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml b/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml
index d74962c0f5ae..15c514b83583 100644
--- a/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml
@@ -8,7 +8,7 @@ title: Allwinner A33 Thermal Sensor Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#io-channel-cells":
diff --git a/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml b/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml
index d76ece97c76c..91ab9c842273 100644
--- a/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml
@@ -41,7 +41,6 @@ properties:
   avdd-supply:
     description:
       Definition of the regulator used as analog supply
-    maxItems: 1
 
   clock-frequency:
     minimum: 20000
diff --git a/Documentation/devicetree/bindings/iio/adc/ingenic,adc.txt b/Documentation/devicetree/bindings/iio/adc/ingenic,adc.txt
index f01159f20d87..cd9048cf9dcf 100644
--- a/Documentation/devicetree/bindings/iio/adc/ingenic,adc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/ingenic,adc.txt
@@ -5,6 +5,7 @@ Required properties:
 - compatible: Should be one of:
   * ingenic,jz4725b-adc
   * ingenic,jz4740-adc
+  * ingenic,jz4770-adc
 - reg: ADC controller registers location and length.
 - clocks: phandle to the SoC's ADC clock.
 - clock-names: Must be set to "adc".
diff --git a/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml b/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml
new file mode 100644
index 000000000000..59009997dca0
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/adc/lltc,ltc2496.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Linear Technology / Analog Devices LTC2496 ADC
+
+maintainers:
+ - Lars-Peter Clausen <lars@metafoo.de>
+ - Michael Hennerich <Michael.Hennerich@analog.com>
+ - Stefan Popa <stefan.popa@analog.com>
+
+properties:
+  compatible:
+    enum:
+      - lltc,ltc2496
+
+  vref-supply:
+    description: phandle to an external regulator providing the reference voltage
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/phandle
+
+  reg:
+    description: spi chipselect number according to the usual spi bindings
+
+  spi-max-frequency:
+    description: maximal spi bus frequency supported
+
+required:
+  - compatible
+  - vref-supply
+  - reg
+
+examples:
+  - |
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        adc@0 {
+            compatible = "lltc,ltc2496";
+            reg = <0>;
+            vref-supply = <&ltc2496_reg>;
+            spi-max-frequency = <2000000>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/iio/adc/max1027-adc.txt b/Documentation/devicetree/bindings/iio/adc/max1027-adc.txt
deleted file mode 100644
index e680c61dfb84..000000000000
--- a/Documentation/devicetree/bindings/iio/adc/max1027-adc.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-* Maxim 1027/1029/1031 Analog to Digital Converter (ADC)
-
-Required properties:
-  - compatible: Should be "maxim,max1027" or "maxim,max1029" or "maxim,max1031"
-  - reg: SPI chip select number for the device
-  - interrupts: IRQ line for the ADC
-  see: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
-
-Recommended properties:
-- spi-max-frequency: Definition as per
-                     Documentation/devicetree/bindings/spi/spi-bus.txt
-
-Example:
-adc@0 {
-	compatible = "maxim,max1027";
-	reg = <0>;
-	interrupt-parent = <&gpio5>;
-	interrupts = <15 IRQ_TYPE_EDGE_RISING>;
-	spi-max-frequency = <1000000>;
-};
diff --git a/Documentation/devicetree/bindings/iio/adc/mcp3911.txt b/Documentation/devicetree/bindings/iio/adc/mcp3911.txt
deleted file mode 100644
index 3071f48fb30b..000000000000
--- a/Documentation/devicetree/bindings/iio/adc/mcp3911.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-* Microchip MCP3911 Dual channel analog front end (ADC)
-
-Required properties:
- - compatible: Should be "microchip,mcp3911"
- - reg: SPI chip select number for the device
-
-Recommended properties:
- - spi-max-frequency: Definition as per
-	 Documentation/devicetree/bindings/spi/spi-bus.txt.
-	 Max frequency for this chip is 20MHz.
-
-Optional properties:
- - clocks: Phandle and clock identifier for sampling clock
- - interrupt-parent: Phandle to the parent interrupt controller
- - interrupts: IRQ line for the ADC
- - microchip,device-addr: Device address when multiple MCP3911 chips are present on the
-	same SPI bus. Valid values are 0-3. Defaults to 0.
- - vref-supply: Phandle to the external reference voltage supply.
-
-Example:
-adc@0 {
-	compatible = "microchip,mcp3911";
-	reg = <0>;
-	interrupt-parent = <&gpio5>;
-	interrupts = <15 IRQ_TYPE_EDGE_RISING>;
-	spi-max-frequency = <20000000>;
-	microchip,device-addr = <0>;
-	vref-supply = <&vref_reg>;
-	clocks = <&xtal>;
-};
diff --git a/Documentation/devicetree/bindings/iio/adc/microchip,mcp3911.yaml b/Documentation/devicetree/bindings/iio/adc/microchip,mcp3911.yaml
new file mode 100644
index 000000000000..881059b80d61
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/microchip,mcp3911.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+# Copyright 2019 Marcus Folkesson <marcus.folkesson@gmail.com>
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/bindings/iio/adc/microchip,mcp3911.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Microchip MCP3911 Dual channel analog front end (ADC)
+
+maintainers:
+  - Marcus Folkesson <marcus.folkesson@gmail.com>
+  - Kent Gustavsson <nedo80@gmail.com>
+
+description: |
+  Bindings for the Microchip MCP3911 Dual channel ADC device. Datasheet can be
+  found here: https://ww1.microchip.com/downloads/en/DeviceDoc/20002286C.pdf
+
+properties:
+  compatible:
+    enum:
+      - microchip,mcp3911
+
+  reg:
+    maxItems: 1
+
+  spi-max-frequency:
+    maximum: 20000000
+
+  clocks:
+    description: |
+      Phandle and clock identifier for external sampling clock.
+      If not specified, the internal crystal oscillator will be used.
+    maxItems: 1
+
+  interrupts:
+    description: IRQ line of the ADC
+    maxItems: 1
+
+  microchip,device-addr:
+    description: Device address when multiple MCP3911 chips are present on the same SPI bus.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum: [0, 1, 2, 3]
+      - default: 0
+
+  vref-supply:
+    description: |
+      Phandle to the external reference voltage supply.
+      If not specified, the internal voltage reference (1.2V) will be used.
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    spi {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      adc@0 {
+        compatible = "microchip,mcp3911";
+        reg = <0>;
+        interrupt-parent = <&gpio5>;
+        interrupts = <15 2>;
+        spi-max-frequency = <20000000>;
+        microchip,device-addr = <0>;
+        vref-supply = <&vref_reg>;
+        clocks = <&xtal>;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.txt b/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.txt
deleted file mode 100644
index e1fe02f3e3e9..000000000000
--- a/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-Samsung Exynos Analog to Digital Converter bindings
-
-The devicetree bindings are for the new ADC driver written for
-Exynos4 and upward SoCs from Samsung.
-
-New driver handles the following
-1. Supports ADC IF found on EXYNOS4412/EXYNOS5250
-   and future SoCs from Samsung
-2. Add ADC driver under iio/adc framework
-3. Also adds the Documentation for device tree bindings
-
-Required properties:
-- compatible:		Must be "samsung,exynos-adc-v1"
-				for Exynos5250 controllers.
-			Must be "samsung,exynos-adc-v2" for
-				future controllers.
-			Must be "samsung,exynos3250-adc" for
-				controllers compatible with ADC of Exynos3250.
-			Must be "samsung,exynos4212-adc" for
-				controllers compatible with ADC of Exynos4212 and Exynos4412.
-			Must be "samsung,exynos7-adc" for
-				the ADC in Exynos7 and compatibles
-			Must be "samsung,s3c2410-adc" for
-				the ADC in s3c2410 and compatibles
-			Must be "samsung,s3c2416-adc" for
-				the ADC in s3c2416 and compatibles
-			Must be "samsung,s3c2440-adc" for
-				the ADC in s3c2440 and compatibles
-			Must be "samsung,s3c2443-adc" for
-				the ADC in s3c2443 and compatibles
-			Must be "samsung,s3c6410-adc" for
-				the ADC in s3c6410 and compatibles
-			Must be "samsung,s5pv210-adc" for
-				the ADC in s5pv210 and compatibles
-- reg:			List of ADC register address range
-			- The base address and range of ADC register
-			- The base address and range of ADC_PHY register (every
-			  SoC except for s3c24xx/s3c64xx ADC)
-- interrupts: 		Contains the interrupt information for the timer. The
-			format is being dependent on which interrupt controller
-			the Samsung device uses.
-- #io-channel-cells = <1>; As ADC has multiple outputs
-- clocks		From common clock bindings: handles to clocks specified
-			in "clock-names" property, in the same order.
-- clock-names		From common clock bindings: list of clock input names
-			used by ADC block:
-			- "adc" : ADC bus clock
-			- "sclk" : ADC special clock (only for Exynos3250 and
-				   compatible ADC block)
-- vdd-supply		VDD input supply.
-
-- samsung,syscon-phandle Contains the PMU system controller node
-			(To access the ADC_PHY register on Exynos5250/5420/5800/3250)
-Optional properties:
-- has-touchscreen:	If present, indicates that a touchscreen is
-			connected an usable.
-
-Note: child nodes can be added for auto probing from device tree.
-
-Example: adding device info in dtsi file
-
-adc: adc@12d10000 {
-	compatible = "samsung,exynos-adc-v1";
-	reg = <0x12D10000 0x100>;
-	interrupts = <0 106 0>;
-	#io-channel-cells = <1>;
-	io-channel-ranges;
-
-	clocks = <&clock 303>;
-	clock-names = "adc";
-
-	vdd-supply = <&buck5_reg>;
-	samsung,syscon-phandle = <&pmu_system_controller>;
-};
-
-Example: adding device info in dtsi file for Exynos3250 with additional sclk
-
-adc: adc@126c0000 {
-	compatible = "samsung,exynos3250-adc", "samsung,exynos-adc-v2;
-	reg = <0x126C0000 0x100>;
-	interrupts = <0 137 0>;
-	#io-channel-cells = <1>;
-	io-channel-ranges;
-
-	clocks = <&cmu CLK_TSADC>, <&cmu CLK_SCLK_TSADC>;
-	clock-names = "adc", "sclk";
-
-	vdd-supply = <&buck5_reg>;
-	samsung,syscon-phandle = <&pmu_system_controller>;
-};
-
-Example: Adding child nodes in dts file
-
-adc@12d10000 {
-
-	/* NTC thermistor is a hwmon device */
-	ncp15wb473@0 {
-		compatible = "murata,ncp15wb473";
-		pullup-uv = <1800000>;
-		pullup-ohm = <47000>;
-		pulldown-ohm = <0>;
-		io-channels = <&adc 4>;
-	};
-};
-
-Note: Does not apply to ADC driver under arch/arm/plat-samsung/
-Note: The child node can be added under the adc node or separately.
diff --git a/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml b/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml
new file mode 100644
index 000000000000..f46de17c0878
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml
@@ -0,0 +1,151 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/adc/samsung,exynos-adc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos Analog to Digital Converter (ADC)
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+properties:
+  compatible:
+    enum:
+      - samsung,exynos-adc-v1                 # Exynos5250
+      - samsung,exynos-adc-v2
+      - samsung,exynos3250-adc
+      - samsung,exynos4212-adc                # Exynos4212 and Exynos4412
+      - samsung,exynos7-adc
+      - samsung,s3c2410-adc
+      - samsung,s3c2416-adc
+      - samsung,s3c2440-adc
+      - samsung,s3c2443-adc
+      - samsung,s3c6410-adc
+      - samsung,s5pv210-adc
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    description:
+      Phandle to ADC bus clock. For Exynos3250 additional clock is needed.
+    minItems: 1
+    maxItems: 2
+
+  clock-names:
+    description:
+      Must contain clock names (adc, sclk) matching phandles in clocks
+      property.
+    minItems: 1
+    maxItems: 2
+
+  interrupts:
+    maxItems: 1
+
+  "#io-channel-cells":
+    const: 1
+
+  vdd-supply: true
+
+  samsung,syscon-phandle:
+    $ref: '/schemas/types.yaml#/definitions/phandle'
+    description:
+      Phandle to the PMU system controller node (to access the ADC_PHY
+      register on Exynos3250/4x12/5250/5420/5800).
+
+  has-touchscreen:
+    description:
+      If present, indicates that a touchscreen is connected and usable.
+    type: boolean
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+  - "#io-channel-cells"
+  - vdd-supply
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - samsung,exynos-adc-v1
+              - samsung,exynos-adc-v2
+              - samsung,exynos3250-adc
+              - samsung,exynos4212-adc
+              - samsung,s5pv210-adc
+    then:
+      required:
+        - samsung,syscon-phandle
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - samsung,exynos3250-adc
+    then:
+      properties:
+        clocks:
+          minItems: 2
+          maxItems: 2
+        clock-names:
+          items:
+            - const: adc
+            - const: sclk
+    else:
+      properties:
+        clocks:
+          minItems: 1
+          maxItems: 1
+        clock-names:
+          items:
+            - const: adc
+
+examples:
+  - |
+    adc: adc@12d10000 {
+        compatible = "samsung,exynos-adc-v1";
+        reg = <0x12d10000 0x100>;
+        interrupts = <0 106 0>;
+        #io-channel-cells = <1>;
+        io-channel-ranges;
+
+        clocks = <&clock 303>;
+        clock-names = "adc";
+
+        vdd-supply = <&buck5_reg>;
+        samsung,syscon-phandle = <&pmu_system_controller>;
+
+        /* NTC thermistor is a hwmon device */
+        ncp15wb473@0 {
+            compatible = "murata,ncp15wb473";
+            pullup-uv = <1800000>;
+            pullup-ohm = <47000>;
+            pulldown-ohm = <0>;
+            io-channels = <&adc 4>;
+          };
+    };
+
+  - |
+    #include <dt-bindings/clock/exynos3250.h>
+
+    adc@126c0000 {
+        compatible = "samsung,exynos3250-adc";
+        reg = <0x126C0000 0x100>;
+        interrupts = <0 137 0>;
+        #io-channel-cells = <1>;
+        io-channel-ranges;
+
+        clocks = <&cmu CLK_TSADC>,
+                 <&cmu CLK_SCLK_TSADC>;
+        clock-names = "adc", "sclk";
+
+        vdd-supply = <&buck5_reg>;
+        samsung,syscon-phandle = <&pmu_system_controller>;
+    };
diff --git a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt
deleted file mode 100644
index 59b92cd32552..000000000000
--- a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-Device-Tree bindings for sigma delta modulator
-
-Required properties:
-- compatible: should be "ads1201", "sd-modulator". "sd-modulator" can be use
-	as a generic SD modulator if modulator not specified in compatible list.
-- #io-channel-cells = <0>: See the IIO bindings section "IIO consumers".
-
-Example node:
-
-	ads1202: adc {
-		compatible = "sd-modulator";
-		#io-channel-cells = <0>;
-	};
diff --git a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml
new file mode 100644
index 000000000000..a390343d0c2a
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/adc/sigma-delta-modulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Device-Tree bindings for sigma delta modulator
+
+maintainers:
+  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+
+properties:
+  compatible:
+    description: |
+      "sd-modulator" can be used as a generic SD modulator,
+      if the modulator is not specified in the compatible list.
+    enum:
+      - sd-modulator
+      - ads1201
+
+  '#io-channel-cells':
+    const: 0
+
+required:
+  - compatible
+  - '#io-channel-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    ads1202: adc {
+      compatible = "sd-modulator";
+      #io-channel-cells = <0>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
index 4c0da8c74bb2..8de933146771 100644
--- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
@@ -53,6 +53,8 @@ Optional properties:
   analog input switches on stm32mp1.
 - st,syscfg: Phandle to system configuration controller. It can be used to
   control the analog circuitry on stm32mp1.
+- st,max-clk-rate-hz: Allow to specify desired max clock rate used by analog
+  circuitry.
 
 Contents of a stm32 adc child node:
 -----------------------------------
diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt
deleted file mode 100644
index 75ba25d062e1..000000000000
--- a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt
+++ /dev/null
@@ -1,135 +0,0 @@
-STMicroelectronics STM32 DFSDM ADC device driver
-
-
-STM32 DFSDM ADC is a sigma delta analog-to-digital converter dedicated to
-interface external sigma delta modulators to STM32 micro controllers.
-It is mainly targeted for:
-- Sigma delta modulators (motor control, metering...)
-- PDM microphones (audio digital microphone)
-
-It features up to 8 serial digital interfaces (SPI or Manchester) and
-up to 4 filters on stm32h7 or 6 filters on stm32mp1.
-
-Each child node match with a filter instance.
-
-Contents of a STM32 DFSDM root node:
-------------------------------------
-Required properties:
-- compatible: Should be one of:
-  "st,stm32h7-dfsdm"
-  "st,stm32mp1-dfsdm"
-- reg: Offset and length of the DFSDM block register set.
-- clocks: IP and serial interfaces clocking. Should be set according
-		to rcc clock ID and "clock-names".
-- clock-names: Input clock name "dfsdm" must be defined,
-		"audio" is optional. If defined CLKOUT is based on the audio
-		clock, else "dfsdm" is used.
-- #interrupt-cells = <1>;
-- #address-cells = <1>;
-- #size-cells = <0>;
-
-Optional properties:
-- spi-max-frequency: Requested only for SPI master mode.
-		  SPI clock OUT frequency (Hz). This clock must be set according
-		  to "clock" property. Frequency must be a multiple of the rcc
-		  clock frequency. If not, SPI CLKOUT frequency will not be
-		  accurate.
-- pinctrl-names:	Set to "default".
-- pinctrl-0:		List of phandles pointing to pin configuration
-			nodes to set pins in mode of operation for dfsdm
-			on external pin.
-
-Contents of a STM32 DFSDM child nodes:
---------------------------------------
-
-Required properties:
-- compatible: Must be:
-	"st,stm32-dfsdm-adc" for sigma delta ADCs
-	"st,stm32-dfsdm-dmic" for audio digital microphone.
-- reg: Specifies the DFSDM filter instance used.
-	Valid values are from 0 to 3 on stm32h7, 0 to 5 on stm32mp1.
-- interrupts: IRQ lines connected to each DFSDM filter instance.
-- st,adc-channels:	List of single-ended channels muxed for this ADC.
-			valid values:
-				"st,stm32h7-dfsdm" compatibility: 0 to 7.
-- st,adc-channel-names:	List of single-ended channel names.
-- st,filter-order:  SinC filter order from 0 to 5.
-			0: FastSinC
-			[1-5]: order 1 to 5.
-			For audio purpose it is recommended to use order 3 to 5.
-- #io-channel-cells = <1>: See the IIO bindings section "IIO consumers".
-
-Required properties for "st,stm32-dfsdm-adc" compatibility:
-- io-channels: From common IIO binding. Used to pipe external sigma delta
-		modulator or internal ADC output to DFSDM channel.
-		This is not required for "st,stm32-dfsdm-pdm" compatibility as
-		PDM microphone is binded in Audio DT node.
-
-Required properties for "st,stm32-dfsdm-pdm" compatibility:
-- #sound-dai-cells: Must be set to 0.
-- dma: DMA controller phandle and DMA request line associated to the
-		filter instance (specified by the field "reg")
-- dma-names: Must be "rx"
-
-Optional properties:
-- st,adc-channel-types:	Single-ended channel input type.
-			- "SPI_R": SPI with data on rising edge (default)
-			- "SPI_F": SPI with data on falling edge
-			- "MANCH_R": manchester codec, rising edge = logic 0, falling edge = logic 1
-			- "MANCH_F": manchester codec, rising edge = logic 1, falling edge = logic 0
-- st,adc-channel-clk-src: Conversion clock source.
-			  - "CLKIN": external SPI clock (CLKIN x)
-			  - "CLKOUT": internal SPI clock (CLKOUT) (default)
-			  - "CLKOUT_F": internal SPI clock divided by 2 (falling edge).
-			  - "CLKOUT_R": internal SPI clock divided by 2 (rising edge).
-
-- st,adc-alt-channel: Must be defined if two sigma delta modulator are
-			  connected on same SPI input.
-			  If not set, channel n is connected to SPI input n.
-			  If set, channel n is connected to SPI input n + 1.
-
-- st,filter0-sync: Set to 1 to synchronize with DFSDM filter instance 0.
-		   Used for multi microphones synchronization.
-
-Example of a sigma delta adc connected on DFSDM SPI port 0
-and a pdm microphone connected on DFSDM SPI port 1:
-
-	ads1202: simple_sd_adc@0 {
-		compatible = "ads1202";
-		#io-channel-cells = <1>;
-	};
-
-	dfsdm: dfsdm@40017000 {
-		compatible = "st,stm32h7-dfsdm";
-		reg = <0x40017000 0x400>;
-		clocks = <&rcc DFSDM1_CK>;
-		clock-names = "dfsdm";
-		#interrupt-cells = <1>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		dfsdm_adc0: filter@0 {
-			compatible = "st,stm32-dfsdm-adc";
-			#io-channel-cells = <1>;
-			reg = <0>;
-			interrupts = <110>;
-			st,adc-channels = <0>;
-			st,adc-channel-names = "sd_adc0";
-			st,adc-channel-types = "SPI_F";
-			st,adc-channel-clk-src = "CLKOUT";
-			io-channels = <&ads1202 0>;
-			st,filter-order = <3>;
-		};
-		dfsdm_pdm1: filter@1 {
-			compatible = "st,stm32-dfsdm-dmic";
-			reg = <1>;
-			interrupts = <111>;
-			dmas = <&dmamux1 102 0x400 0x00>;
-			dma-names = "rx";
-			st,adc-channels = <1>;
-			st,adc-channel-names = "dmic1";
-			st,adc-channel-types = "SPI_R";
-			st,adc-channel-clk-src = "CLKOUT";
-			st,filter-order = <5>;
-		};
-	}
diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
new file mode 100644
index 000000000000..c91407081aa5
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
@@ -0,0 +1,332 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/st,stm32-dfsdm-adc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 DFSDM ADC device driver
+
+maintainers:
+  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Olivier Moysan <olivier.moysan@st.com>
+
+description: |
+  STM32 DFSDM ADC is a sigma delta analog-to-digital converter dedicated to
+  interface external sigma delta modulators to STM32 micro controllers.
+  It is mainly targeted for:
+  - Sigma delta modulators (motor control, metering...)
+  - PDM microphones (audio digital microphone)
+
+  It features up to 8 serial digital interfaces (SPI or Manchester) and
+  up to 4 filters on stm32h7 or 6 filters on stm32mp1.
+
+  Each child node matches with a filter instance.
+
+properties:
+  compatible:
+    enum:
+      - st,stm32h7-dfsdm
+      - st,stm32mp1-dfsdm
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description:
+          Internal clock used for DFSDM digital processing and control blocks.
+          dfsdm clock can also feed CLKOUT, when CLKOUT is used.
+      - description: audio clock can be used as an alternate to feed CLKOUT.
+    minItems: 1
+    maxItems: 2
+
+  clock-names:
+    items:
+      - const: dfsdm
+      - const: audio
+    minItems: 1
+    maxItems: 2
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+  spi-max-frequency:
+    description:
+      SPI clock OUT frequency (Hz). Requested only for SPI master mode.
+      This clock must be set according to the "clock" property.
+      Frequency must be a multiple of the rcc clock frequency.
+      If not, SPI CLKOUT frequency will not be accurate.
+    maximum: 20000000
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - "#address-cells"
+  - "#size-cells"
+
+patternProperties:
+  "^filter@[0-9]+$":
+    type: object
+    description: child node
+
+    properties:
+      compatible:
+        enum:
+          - st,stm32-dfsdm-adc
+          - st,stm32-dfsdm-dmic
+
+      reg:
+        description: Specifies the DFSDM filter instance used.
+        maxItems: 1
+
+      interrupts:
+        maxItems: 1
+
+      st,adc-channels:
+        description: |
+          List of single-ended channels muxed for this ADC.
+          On stm32h7 and stm32mp1:
+          - For st,stm32-dfsdm-adc: up to 8 channels numbered from 0 to 7.
+          - For st,stm32-dfsdm-dmic: 1 channel numbered from 0 to 7.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32-array
+          - items:
+              minimum: 0
+              maximum: 7
+
+      st,adc-channel-names:
+        description: List of single-ended channel names.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/string-array
+
+      st,filter-order:
+        description: |
+          SinC filter order from 0 to 5.
+          - 0: FastSinC
+          - [1-5]: order 1 to 5.
+          For audio purpose it is recommended to use order 3 to 5.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+          - items:
+              minimum: 0
+              maximum: 5
+
+      "#io-channel-cells":
+        const: 1
+
+      st,adc-channel-types:
+        description: |
+          Single-ended channel input type.
+          - "SPI_R": SPI with data on rising edge (default)
+          - "SPI_F": SPI with data on falling edge
+          - "MANCH_R": manchester codec, rising edge = logic 0, falling edge = logic 1
+          - "MANCH_F": manchester codec, rising edge = logic 1, falling edge = logic 0
+        items:
+          enum: [ SPI_R, SPI_F, MANCH_R, MANCH_F ]
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/non-unique-string-array
+
+      st,adc-channel-clk-src:
+        description: |
+          Conversion clock source.
+          - "CLKIN": external SPI clock (CLKIN x)
+          - "CLKOUT": internal SPI clock (CLKOUT) (default)
+          - "CLKOUT_F": internal SPI clock divided by 2 (falling edge).
+          - "CLKOUT_R": internal SPI clock divided by 2 (rising edge).
+        items:
+          enum: [ CLKIN, CLKOUT, CLKOUT_F, CLKOUT_R ]
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/non-unique-string-array
+
+      st,adc-alt-channel:
+        description:
+          Must be defined if two sigma delta modulators are
+          connected on same SPI input.
+          If not set, channel n is connected to SPI input n.
+          If set, channel n is connected to SPI input n + 1.
+        type: boolean
+
+      st,filter0-sync:
+        description:
+          Set to 1 to synchronize with DFSDM filter instance 0.
+          Used for multi microphones synchronization.
+        type: boolean
+
+      dmas:
+        maxItems: 1
+
+      dma-names:
+        items:
+          - const: rx
+
+    required:
+      - compatible
+      - reg
+      - interrupts
+      - st,adc-channels
+      - st,adc-channel-names
+      - st,filter-order
+      - "#io-channel-cells"
+
+    allOf:
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: st,stm32-dfsdm-adc
+
+      - then:
+          properties:
+            st,adc-channels:
+              minItems: 1
+              maxItems: 8
+
+            st,adc-channel-names:
+              minItems: 1
+              maxItems: 8
+
+            st,adc-channel-types:
+              minItems: 1
+              maxItems: 8
+
+            st,adc-channel-clk-src:
+              minItems: 1
+              maxItems: 8
+
+            io-channels:
+              description:
+                From common IIO binding. Used to pipe external sigma delta
+                modulator or internal ADC output to DFSDM channel.
+                This is not required for "st,stm32-dfsdm-pdm" compatibility as
+                PDM microphone is binded in Audio DT node.
+
+          required:
+            - io-channels
+
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: st,stm32-dfsdm-dmic
+
+      - then:
+          properties:
+            st,adc-channels:
+              maxItems: 1
+
+            st,adc-channel-names:
+              maxItems: 1
+
+            st,adc-channel-types:
+              maxItems: 1
+
+            st,adc-channel-clk-src:
+              maxItems: 1
+
+          required:
+            - dmas
+            - dma-names
+
+          patternProperties:
+            "^dfsdm-dai+$":
+              type: object
+              description: child node
+
+              properties:
+                "#sound-dai-cells":
+                  const: 0
+
+                io-channels:
+                  description:
+                    From common IIO binding. Used to pipe external sigma delta
+                    modulator or internal ADC output to DFSDM channel.
+
+              required:
+                - "#sound-dai-cells"
+                - io-channels
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: st,stm32h7-dfsdm
+
+  - then:
+      patternProperties:
+        "^filter@[0-9]+$":
+          properties:
+            reg:
+              items:
+                minimum: 0
+                maximum: 3
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: st,stm32mp1-dfsdm
+
+  - then:
+      patternProperties:
+        "^filter@[0-9]+$":
+          properties:
+            reg:
+              items:
+                minimum: 0
+                maximum: 5
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    dfsdm: dfsdm@4400d000 {
+      compatible = "st,stm32mp1-dfsdm";
+      reg = <0x4400d000 0x800>;
+      clocks = <&rcc DFSDM_K>, <&rcc ADFSDM_K>;
+      clock-names = "dfsdm", "audio";
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      dfsdm0: filter@0 {
+        compatible = "st,stm32-dfsdm-dmic";
+        reg = <0>;
+        interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>;
+        dmas = <&dmamux1 101 0x400 0x01>;
+        dma-names = "rx";
+        #io-channel-cells = <1>;
+        st,adc-channels = <1>;
+        st,adc-channel-names = "dmic0";
+        st,adc-channel-types = "SPI_R";
+        st,adc-channel-clk-src = "CLKOUT";
+        st,filter-order = <5>;
+
+        asoc_pdm0: dfsdm-dai {
+          compatible = "st,stm32h7-dfsdm-dai";
+          #sound-dai-cells = <0>;
+          io-channels = <&dfsdm0 0>;
+        };
+      };
+
+      dfsdm_pdm1: filter@1 {
+        compatible = "st,stm32-dfsdm-adc";
+        reg = <1>;
+        interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
+        dmas = <&dmamux1 102 0x400 0x01>;
+        dma-names = "rx";
+        #io-channel-cells = <1>;
+        st,adc-channels = <2 3>;
+        st,adc-channel-names = "in2", "in3";
+        st,adc-channel-types = "SPI_R", "SPI_R";
+        st,adc-channel-clk-src = "CLKOUT_F", "CLKOUT_F";
+        io-channels = <&sd_adc2 &sd_adc3>;
+        st,filter-order = <1>;
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/iio/chemical/plantower,pms7003.yaml b/Documentation/devicetree/bindings/iio/chemical/plantower,pms7003.yaml
index a551d3101f93..19e53930ebf6 100644
--- a/Documentation/devicetree/bindings/iio/chemical/plantower,pms7003.yaml
+++ b/Documentation/devicetree/bindings/iio/chemical/plantower,pms7003.yaml
@@ -25,7 +25,6 @@ properties:
 
   vcc-supply:
     description: regulator that provides power to the sensor
-    maxItems: 1
 
   plantower,set-gpios:
     description: GPIO connected to the SET line
diff --git a/Documentation/devicetree/bindings/iio/dac/lltc,ltc1660.yaml b/Documentation/devicetree/bindings/iio/dac/lltc,ltc1660.yaml
new file mode 100644
index 000000000000..13d005b68931
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/dac/lltc,ltc1660.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+# Copyright 2019 Marcus Folkesson <marcus.folkesson@gmail.com>
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/bindings/iio/dac/lltc,ltc1660.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Linear Technology Micropower octal 8-Bit and 10-Bit DACs
+
+maintainers:
+  - Marcus Folkesson <marcus.folkesson@gmail.com>
+
+description: |
+  Bindings for the Linear Technology Micropower octal 8-Bit and 10-Bit DAC.
+  Datasheet can be found here: https://www.analog.com/media/en/technical-documentation/data-sheets/166560fa.pdf
+
+properties:
+  compatible:
+    enum:
+      - lltc,ltc1660
+      - lltc,ltc1665
+
+  reg:
+    maxItems: 1
+
+  spi-max-frequency:
+    maximum: 5000000
+
+  vref-supply:
+    description: Phandle to the external reference voltage supply.
+
+required:
+  - compatible
+  - reg
+  - vref-supply
+
+examples:
+  - |
+    spi {
+      #address-cells = <1>;
+      #size-cells = <0>;
+  
+      dac@0 {
+        compatible = "lltc,ltc1660";
+        reg = <0>;
+        spi-max-frequency = <5000000>;
+        vref-supply = <&vref_reg>;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/iio/dac/ltc1660.txt b/Documentation/devicetree/bindings/iio/dac/ltc1660.txt
deleted file mode 100644
index c5b5f22d6c64..000000000000
--- a/Documentation/devicetree/bindings/iio/dac/ltc1660.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-* Linear Technology Micropower octal 8-Bit and 10-Bit DACs
-
-Required properties:
- - compatible: Must be one of the following:
-		"lltc,ltc1660"
-		"lltc,ltc1665"
- - reg: SPI chip select number for the device
- - vref-supply: Phandle to the voltage reference supply
-
-Recommended properties:
- - spi-max-frequency: Definition as per
-	 Documentation/devicetree/bindings/spi/spi-bus.txt.
-	 Max frequency for this chip is 5 MHz.
-
-Example:
-dac@0 {
-	compatible = "lltc,ltc1660";
-	reg = <0>;
-	spi-max-frequency = <5000000>;
-	vref-supply = <&vref_reg>;
-};
diff --git a/Documentation/devicetree/bindings/iio/iio-bindings.txt b/Documentation/devicetree/bindings/iio/iio-bindings.txt
index 68d6f8ce063b..af33267727f4 100644
--- a/Documentation/devicetree/bindings/iio/iio-bindings.txt
+++ b/Documentation/devicetree/bindings/iio/iio-bindings.txt
@@ -18,12 +18,17 @@ Required properties:
 		   with a single IIO output and 1 for nodes with multiple
 		   IIO outputs.
 
+Optional properties:
+label:		   A symbolic name for the device.
+
+
 Example for a simple configuration with no trigger:
 
 	adc: voltage-sensor@35 {
 		compatible = "maxim,max1139";
 		reg = <0x35>;
 		#io-channel-cells = <1>;
+		label = "voltage_feedback_group1";
 	};
 
 Example for a configuration with trigger:
diff --git a/Documentation/devicetree/bindings/iio/imu/adi,adis16480.txt b/Documentation/devicetree/bindings/iio/imu/adi,adis16480.txt
index ed7783f45233..cd903a1d880d 100644
--- a/Documentation/devicetree/bindings/iio/imu/adi,adis16480.txt
+++ b/Documentation/devicetree/bindings/iio/imu/adi,adis16480.txt
@@ -8,6 +8,7 @@ Required properties for the ADIS16480:
 	* "adi,adis16480"
 	* "adi,adis16485"
 	* "adi,adis16488"
+	* "adi,adis16490"
 	* "adi,adis16495-1"
 	* "adi,adis16495-2"
 	* "adi,adis16495-3"
diff --git a/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt b/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt
index 268bf7568e19..c5ee8a20af9f 100644
--- a/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt
+++ b/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt
@@ -21,6 +21,7 @@ Required properties:
   bindings.
 
 Optional properties:
+ - vdd-supply: regulator phandle for VDD supply
  - vddio-supply: regulator phandle for VDDIO supply
  - mount-matrix: an optional 3x3 mounting rotation matrix
  - i2c-gate node.  These devices also support an auxiliary i2c bus.  This is
diff --git a/Documentation/devicetree/bindings/iio/imu/nxp,fxos8700.yaml b/Documentation/devicetree/bindings/iio/imu/nxp,fxos8700.yaml
new file mode 100644
index 000000000000..63bcb73ae309
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/imu/nxp,fxos8700.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/imu/nxp,fxos8700.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale FXOS8700 Inertial Measurement Unit
+
+maintainers:
+  - Robert Jones <rjones@gateworks.com>
+
+description: |
+  Accelerometer and magnetometer combo device with an i2c and SPI interface.
+  https://www.nxp.com/products/sensors/motion-sensors/6-axis/digital-motion-sensor-3d-accelerometer-2g-4g-8g-plus-3d-magnetometer:FXOS8700CQ
+
+properties:
+  compatible:
+    enum:
+      - nxp,fxos8700
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 2
+
+  interrupt-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      enum:
+        - INT1
+        - INT2
+
+  drive-open-drain:
+    type: boolean
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        fxos8700@1e {
+          compatible = "nxp,fxos8700";
+          reg = <0x1e>;
+
+          interrupt-parent = <&gpio2>;
+          interrupts = <7 IRQ_TYPE_EDGE_RISING>;
+          interrupt-names = "INT1";
+        };
+    };
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    spi0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        fxos8700@0 {
+          compatible = "nxp,fxos8700";
+          reg = <0>;
+
+          spi-max-frequency = <1000000>;
+          interrupt-parent = <&gpio1>;
+          interrupts = <7 IRQ_TYPE_EDGE_RISING>;
+          interrupt-names = "INT2";
+        };
+    };
diff --git a/Documentation/devicetree/bindings/iio/imu/st_lsm6dsx.txt b/Documentation/devicetree/bindings/iio/imu/st_lsm6dsx.txt
index 6d0c050d89fe..cef4bc16fce1 100644
--- a/Documentation/devicetree/bindings/iio/imu/st_lsm6dsx.txt
+++ b/Documentation/devicetree/bindings/iio/imu/st_lsm6dsx.txt
@@ -14,6 +14,8 @@ Required properties:
   "st,lsm6ds3tr-c"
   "st,ism330dhcx"
   "st,lsm9ds1-imu"
+  "st,lsm6ds0"
+  "st,lsm6dsrx"
 - reg: i2c address of the sensor / spi cs line
 
 Optional properties:
@@ -31,6 +33,7 @@ Optional properties:
 - interrupts: interrupt mapping for IRQ. It should be configured with
   flags IRQ_TYPE_LEVEL_HIGH, IRQ_TYPE_EDGE_RISING, IRQ_TYPE_LEVEL_LOW or
   IRQ_TYPE_EDGE_FALLING.
+- wakeup-source: Enables wake up of host system on event.
 
   Refer to interrupt-controller/interrupts.txt for generic interrupt
   client node bindings.
diff --git a/Documentation/devicetree/bindings/iio/light/adux1020.yaml b/Documentation/devicetree/bindings/iio/light/adux1020.yaml
new file mode 100644
index 000000000000..69bd5c06319d
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/light/adux1020.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/light/adux1020.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices ADUX1020 Photometric sensor
+
+maintainers:
+  - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+description: |
+  Photometric sensor over an i2c interface.
+  https://www.analog.com/media/en/technical-documentation/data-sheets/ADUX1020.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,adux1020
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        adux1020@64 {
+                compatible = "adi,adux1020";
+                reg = <0x64>;
+                interrupt-parent = <&msmgpio>;
+                interrupts = <24 IRQ_TYPE_LEVEL_HIGH>;
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/iio/light/bh1750.txt b/Documentation/devicetree/bindings/iio/light/bh1750.txt
deleted file mode 100644
index 1e7685797d7a..000000000000
--- a/Documentation/devicetree/bindings/iio/light/bh1750.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-ROHM BH1750 - ALS, Ambient light sensor
-
-Required properties:
-
-- compatible: Must be one of:
-    "rohm,bh1710"
-    "rohm,bh1715"
-    "rohm,bh1721"
-    "rohm,bh1750"
-    "rohm,bh1751"
-- reg: the I2C address of the sensor
-
-Example:
-
-light-sensor@23 {
-	compatible = "rohm,bh1750";
-	reg = <0x23>;
-};
diff --git a/Documentation/devicetree/bindings/iio/light/bh1750.yaml b/Documentation/devicetree/bindings/iio/light/bh1750.yaml
new file mode 100644
index 000000000000..1cc60d7ecfa0
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/light/bh1750.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/light/bh1750.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ROHM BH1750 ambient light sensor
+
+maintainers:
+  - Tomasz Duszynski <tduszyns@gmail.com>
+
+description: |
+  Ambient light sensor with an i2c interface.
+
+properties:
+  compatible:
+    enum:
+      - rohm,bh1710
+      - rohm,bh1715
+      - rohm,bh1721
+      - rohm,bh1750
+      - rohm,bh1751
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      light-sensor@23 {
+        compatible = "rohm,bh1750";
+        reg = <0x23>;
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/iio/light/veml6030.yaml b/Documentation/devicetree/bindings/iio/light/veml6030.yaml
new file mode 100644
index 000000000000..0ff9b11f9d18
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/light/veml6030.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/light/veml6030.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: VEML6030 Ambient Light Sensor (ALS)
+
+maintainers:
+  - Rishi Gupta <gupt21@gmail.com>
+
+description: |
+  Bindings for the ambient light sensor veml6030 from Vishay
+  Semiconductors over an i2c interface.
+
+  Irrespective of whether interrupt is used or not, application
+  can get the ALS and White channel reading from IIO raw interface.
+
+  If the interrupts are used, application will receive an IIO event
+  whenever configured threshold is crossed.
+
+  Specifications about the sensor can be found at:
+    https://www.vishay.com/docs/84366/veml6030.pdf
+
+properties:
+  compatible:
+    enum:
+      - vishay,veml6030
+
+  reg:
+    description:
+      I2C address of the device.
+    enum:
+      - 0x10 # ADDR pin pulled down
+      - 0x48 # ADDR pin pulled up
+
+  interrupts:
+    description:
+      interrupt mapping for IRQ. Configure with IRQ_TYPE_LEVEL_LOW.
+      Refer to interrupt-controller/interrupts.txt for generic
+      interrupt client node bindings.
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        light-sensor@10 {
+                compatible = "vishay,veml6030";
+                reg = <0x10>;
+                interrupts = <12 IRQ_TYPE_LEVEL_LOW>;
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/iio/pressure/asc,dlhl60d.yaml b/Documentation/devicetree/bindings/iio/pressure/asc,dlhl60d.yaml
new file mode 100644
index 000000000000..9f5ca9c42025
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/pressure/asc,dlhl60d.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/pressure/asc,dlhl60d.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: All Sensors DLH series low voltage digital pressure sensors
+
+maintainers:
+  - Tomislav Denis <tomislav.denis@avl.com>
+
+description: |
+  Bindings for the All Sensors DLH series pressure sensors.
+
+  Specifications about the sensors can be found at:
+    http://www.allsensors.com/cad/DS-0355_Rev_B.PDF
+
+properties:
+  compatible:
+    enum:
+      - asc,dlhl60d
+      - asc,dlhl60g
+
+  reg:
+    description: I2C device address
+    maxItems: 1
+
+  interrupts:
+    description: interrupt mapping for EOC(data ready) pin
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pressure@29 {
+            compatible = "asc,dlhl60d";
+            reg = <0x29>;
+            interrupt-parent = <&gpio0>;
+            interrupts = <10 IRQ_TYPE_EDGE_RISING>;
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/iio/pressure/bmp085.yaml b/Documentation/devicetree/bindings/iio/pressure/bmp085.yaml
index c6721a7e8938..519137e5c170 100644
--- a/Documentation/devicetree/bindings/iio/pressure/bmp085.yaml
+++ b/Documentation/devicetree/bindings/iio/pressure/bmp085.yaml
@@ -28,12 +28,10 @@ properties:
   vddd-supply:
     description:
       digital voltage regulator (see regulator/regulator.txt)
-    maxItems: 1
 
   vdda-supply:
     description:
       analog voltage regulator (see regulator/regulator.txt)
-    maxItems: 1
 
   reset-gpios:
     description:
diff --git a/Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.txt b/Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.txt
deleted file mode 100644
index dd1058fbe9c3..000000000000
--- a/Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-* MaxBotix I2CXL-MaxSonar ultrasonic distance sensor of type  mb1202,
-  mb1212, mb1222, mb1232, mb1242, mb7040 or mb7137 using the i2c interface
-  for ranging
-
-Required properties:
- - compatible:		"maxbotix,mb1202",
-			"maxbotix,mb1212",
-			"maxbotix,mb1222",
-			"maxbotix,mb1232",
-			"maxbotix,mb1242",
-			"maxbotix,mb7040" or
-			"maxbotix,mb7137"
-
- - reg:			i2c address of the device, see also i2c/i2c.txt
-
-Optional properties:
- - interrupts:		Interrupt used to announce the preceding reading
-			request has finished and that data is available.
-			If no interrupt is specified the device driver
-			falls back to wait a fixed amount of time until
-			data can be retrieved.
-
-Example:
-proximity@70 {
-	compatible = "maxbotix,mb1232";
-	reg = <0x70>;
-	interrupt-parent = <&gpio2>;
-	interrupts = <2 IRQ_TYPE_EDGE_FALLING>;
-};
diff --git a/Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.yaml b/Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.yaml
new file mode 100644
index 000000000000..3eac248f291d
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/proximity/maxbotix,mb1232.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MaxBotix I2CXL-MaxSonar ultrasonic distance sensor
+
+maintainers:
+  - Andreas Klinger <ak@it-klinger.de>
+
+description: |
+  MaxBotix I2CXL-MaxSonar ultrasonic distance sensor of type  mb1202,
+  mb1212, mb1222, mb1232, mb1242, mb7040 or mb7137 using the i2c interface
+  for ranging
+
+  Specifications about the devices can be found at:
+  https://www.maxbotix.com/documents/I2CXL-MaxSonar-EZ_Datasheet.pdf
+
+properties:
+  compatible:
+    enum:
+      - maxbotix,mb1202
+      - maxbotix,mb1212
+      - maxbotix,mb1222
+      - maxbotix,mb1232
+      - maxbotix,mb1242
+      - maxbotix,mb7040
+      - maxbotix,mb7137
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    description:
+      Interrupt used to announce the preceding reading request has finished
+      and that data is available.  If no interrupt is specified the device
+      driver falls back to wait a fixed amount of time until data can be
+      retrieved.
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      proximity@70 {
+        compatible = "maxbotix,mb1232";
+        reg = <0x70>;
+        interrupt-parent = <&gpio2>;
+        interrupts = <2 IRQ_TYPE_EDGE_FALLING>;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/iio/proximity/parallax-ping.yaml b/Documentation/devicetree/bindings/iio/proximity/parallax-ping.yaml
new file mode 100644
index 000000000000..a079c9921af6
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/proximity/parallax-ping.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/proximity/parallax-ping.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Parallax PING))) and LaserPING range finder
+
+maintainers:
+  - Andreas Klinger <ak@it-klinger.de>
+
+description: |
+  Bit-banging driver using one GPIO:
+  - ping-gpios is raised by the driver to start measurement
+  - direction of ping-gpio is then switched into input with an interrupt
+    for receiving distance value as PWM signal
+
+  Specifications about the devices can be found at:
+  http://parallax.com/sites/default/files/downloads/28041-LaserPING-2m-Rangefinder-Guide.pdf
+  http://parallax.com/sites/default/files/downloads/28015-PING-Documentation-v1.6.pdf
+
+properties:
+  compatible:
+    enum:
+      - parallax,ping
+      - parallax,laserping
+
+  ping-gpios:
+    description:
+      Definition of the GPIO for the triggering and echo (output and input)
+      This GPIO is set for about 5 us by the driver to tell the device it
+      should initiate the measurement cycle. Afterwards the GPIO is switched
+      to input direction with an interrupt. The device sets it and the
+      length of the input signal corresponds to the measured distance.
+      It needs to be an GPIO which is able to deliver an interrupt because
+      the time between two interrupts is measured in the driver.
+      See Documentation/devicetree/bindings/gpio/gpio.txt for information
+      on how to specify a consumer gpio.
+    maxItems: 1
+
+required:
+  - compatible
+  - ping-gpios
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    proximity {
+        compatible = "parallax,laserping";
+        ping-gpios = <&gpio0 26 GPIO_ACTIVE_HIGH>;
+    };
diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
new file mode 100644
index 000000000000..d4922f9f0376
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -0,0 +1,480 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/temperature/adi,ltc2983.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices LTC2983 Multi-sensor Temperature system
+
+maintainers:
+  - Nuno Sá <nuno.sa@analog.com>
+
+description: |
+  Analog Devices LTC2983 Multi-Sensor Digital Temperature Measurement System
+  https://www.analog.com/media/en/technical-documentation/data-sheets/2983fc.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,ltc2983
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  adi,mux-delay-config-us:
+    description:
+      The LTC2983 performs 2 or 3 internal conversion cycles per temperature
+      result. Each conversion cycle is performed with different excitation and
+      input multiplexer configurations. Prior to each conversion, these
+      excitation circuits and input switch configurations are changed and an
+      internal 1ms delay ensures settling prior to the conversion cycle in most
+      cases. An extra delay can be configured using this property. The value is
+      rounded to nearest 100us.
+    maximum: 255
+
+  adi,filter-notch-freq:
+    description:
+      Set's the default setting of the digital filter. The default is
+      simultaneous 50/60Hz rejection.
+      0 - 50/60Hz rejection
+      1 - 60Hz rejection
+      2 - 50Hz rejection
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - minimum: 0
+        maximum: 2
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 0
+
+patternProperties:
+  "@([1-9]|1[0-9]|20)$":
+    type: object
+
+    properties:
+      reg:
+        description:
+          The channel number. It can be connected to one of the 20 channels of
+          the device.
+        minimum: 1
+        maximum: 20
+
+      adi,sensor-type:
+        description: Identifies the type of sensor connected to the device.
+        $ref: /schemas/types.yaml#/definitions/uint32
+
+    required:
+      - reg
+      - adi,sensor-type
+
+  "^thermocouple@":
+    type: object
+    description:
+      Represents a thermocouple sensor which is connected to one of the device
+      channels.
+
+    properties:
+      adi,sensor-type:
+        description: |
+          1 - Type J Thermocouple
+          2 - Type K Thermocouple
+          3 - Type E Thermocouple
+          4 - Type N Thermocouple
+          5 - Type R Thermocouple
+          6 - Type S Thermocouple
+          7 - Type T Thermocouple
+          8 - Type B Thermocouple
+          9 - Custom Thermocouple
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 9
+
+      adi,single-ended:
+        description:
+          Boolean property which set's the thermocouple as single-ended.
+        type: boolean
+
+      adi,sensor-oc-current-microamp:
+        description:
+          This property set's the pulsed current value applied during
+          open-circuit detect.
+        enum: [10, 100, 500, 1000]
+
+      adi,cold-junction-handle:
+        description:
+          Phandle which points to a sensor object responsible for measuring
+          the thermocouple cold junction temperature.
+        $ref: "/schemas/types.yaml#/definitions/phandle"
+
+      adi,custom-thermocouple:
+        description:
+          This is a table, where each entry should be a pair of
+          voltage(mv)-temperature(K). The entries must be given in nv and uK
+          so that, the original values must be multiplied by 1000000. For
+          more details look at table 69 and 70.
+          Note should be signed, but dtc doesn't currently maintain the
+          sign.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint64-matrix
+        items:
+          minItems: 3
+          maxItems: 64
+          items:
+            minItems: 2
+            maxItems: 2
+
+  "^diode@":
+    type: object
+    description:
+      Represents a diode sensor which is connected to one of the device
+      channels.
+
+    properties:
+      adi,sensor-type:
+        description: Identifies the sensor as a diode.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+        const: 28
+
+      adi,single-ended:
+        description: Boolean property which set's the diode as single-ended.
+        type: boolean
+
+      adi,three-conversion-cycles:
+        description:
+          Boolean property which set's three conversion cycles removing
+          parasitic resistance effects between the LTC2983 and the diode.
+        type: boolean
+
+      adi,average-on:
+        description:
+          Boolean property which enables a running average of the diode
+          temperature reading. This reduces the noise when the diode is used
+          as a cold junction temperature element on an isothermal block
+          where temperatures change slowly.
+        type: boolean
+
+      adi,excitation-current-microamp:
+        description:
+          This property controls the magnitude of the excitation current
+          applied to the diode. Depending on the number of conversions
+          cycles, this property will assume different predefined values on
+          each cycle. Just set the value of the first cycle (1l).
+        enum: [10, 20, 40, 80]
+
+      adi,ideal-factor-value:
+        description:
+          This property sets the diode ideality factor. The real value must
+          be multiplied by 1000000 to remove the fractional part. For more
+          information look at table 20 of the datasheet.
+        $ref: /schemas/types.yaml#/definitions/uint32
+
+  "^rtd@":
+    type: object
+    description:
+      Represents a rtd sensor which is connected to one of the device channels.
+
+    properties:
+      reg:
+        minimum: 2
+        maximum: 20
+
+      adi,sensor-type:
+        description: |
+          10 - RTD PT-10
+          11 - RTD PT-50
+          12 - RTD PT-100
+          13 - RTD PT-200
+          14 - RTD PT-500
+          15 - RTD PT-1000
+          16 - RTD PT-1000 (0.00375)
+          17 - RTD NI-120
+          18 - RTD Custom
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 10
+        maximum: 18
+
+      adi,rsense-handle:
+        description:
+          Phandle pointing to a rsense object associated with this RTD.
+        $ref: "/schemas/types.yaml#/definitions/phandle"
+
+      adi,number-of-wires:
+        description:
+          Identifies the number of wires used by the RTD. Setting this
+          property to 5 means 4 wires with Kelvin Rsense.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+          - enum: [2, 3, 4, 5]
+
+      adi,rsense-share:
+        description:
+          Boolean property which enables Rsense sharing, where one sense
+          resistor is used for multiple 2-, 3-, and/or 4-wire RTDs.
+        type: boolean
+
+      adi,current-rotate:
+        description:
+          Boolean property which enables excitation current rotation to
+          automatically remove parasitic thermocouple effects. Note that
+          this property is not allowed for 2- and 3-wire RTDs.
+        type: boolean
+
+      adi,excitation-current-microamp:
+        description:
+          This property controls the magnitude of the excitation current
+          applied to the RTD.
+        enum: [5, 10, 25, 50, 100, 250, 500, 1000]
+
+      adi,rtd-curve:
+        description:
+          This property set the RTD curve used and the corresponding
+          Callendar-VanDusen constants. Look at table 30 of the datasheet.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+          - minimum: 0
+            maximum: 3
+
+      adi,custom-rtd:
+        description:
+          This is a table, where each entry should be a pair of
+          resistance(ohm)-temperature(K). The entries added here are in uohm
+          and uK. For more details values look at table 74 and 75.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint64-matrix
+        items:
+          minItems: 3
+          maxItems: 64
+          items:
+            minItems: 2
+            maxItems: 2
+
+    required:
+      - adi,rsense-handle
+
+    dependencies:
+      adi,current-rotate: [ adi,rsense-share ]
+
+  "^thermistor@":
+    type: object
+    description:
+      Represents a thermistor sensor which is connected to one of the device
+      channels.
+
+    properties:
+      adi,sensor-type:
+        description:
+          19 - Thermistor 44004/44033 2.252kohm at 25°C
+          20 - Thermistor 44005/44030 3kohm at 25°C
+          21 - Thermistor 44007/44034 5kohm at 25°C
+          22 - Thermistor 44006/44031 10kohm at 25°C
+          23 - Thermistor 44008/44032 30kohm at 25°C
+          24 - Thermistor YSI 400 2.252kohm at 25°C
+          25 - Thermistor Spectrum 1003k 1kohm
+          26 - Thermistor Custom Steinhart-Hart
+          27 - Custom Thermistor
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 19
+        maximum: 27
+
+      adi,rsense-handle:
+        description:
+          Phandle pointing to a rsense object associated with this
+          thermistor.
+        $ref: "/schemas/types.yaml#/definitions/phandle"
+
+      adi,single-ended:
+        description:
+          Boolean property which set's the thermistor as single-ended.
+        type: boolean
+
+      adi,rsense-share:
+        description:
+          Boolean property which enables Rsense sharing, where one sense
+          resistor is used for multiple thermistors. Note that this property
+          is ignored if adi,single-ended is set.
+        type: boolean
+
+      adi,current-rotate:
+        description:
+          Boolean property which enables excitation current rotation to
+          automatically remove parasitic thermocouple effects.
+        type: boolean
+
+      adi,excitation-current-nanoamp:
+        description:
+          This property controls the magnitude of the excitation current
+          applied to the thermistor. Value 0 set's the sensor in auto-range
+          mode.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+          - enum: [0, 250, 500, 1000, 5000, 10000, 25000, 50000, 100000,
+                   250000, 500000, 1000000]
+
+      adi,custom-thermistor:
+        description:
+          This is a table, where each entry should be a pair of
+          resistance(ohm)-temperature(K). The entries added here are in uohm
+          and uK only for custom thermistors. For more details look at table
+          78 and 79.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint64-matrix
+        items:
+          minItems: 3
+          maxItems: 64
+          items:
+            minItems: 2
+            maxItems: 2
+
+      adi,custom-steinhart:
+        description:
+          Steinhart-Hart coefficients are also supported and can
+          be programmed into the device memory using this property. For
+          Steinhart sensors the coefficients are given in the raw
+          format. Look at table 82 for more information.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32-array
+        items:
+          minItems: 6
+          maxItems: 6
+
+    required:
+      - adi,rsense-handle
+
+    dependencies:
+      adi,current-rotate: [ adi,rsense-share ]
+
+  "^adc@":
+    type: object
+    description: Represents a channel which is being used as a direct adc.
+
+    properties:
+      adi,sensor-type:
+        description: Identifies the sensor as a direct adc.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+        const: 30
+
+      adi,single-ended:
+        description: Boolean property which set's the adc as single-ended.
+        type: boolean
+
+  "^rsense@":
+    type: object
+    description:
+      Represents a rsense which is connected to one of the device channels.
+      Rsense are used by thermistors and RTD's.
+
+    properties:
+      reg:
+        minimum: 2
+        maximum: 20
+
+      adi,sensor-type:
+        description: Identifies the sensor as a rsense.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+        const: 29
+
+      adi,rsense-val-milli-ohms:
+        description:
+          Sets the value of the sense resistor. Look at table 20 of the
+          datasheet for information.
+
+    required:
+      - adi,rsense-val-milli-ohms
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        sensor_ltc2983: ltc2983@0 {
+                compatible = "adi,ltc2983";
+                reg = <0>;
+
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                interrupts = <20 IRQ_TYPE_EDGE_RISING>;
+                interrupt-parent = <&gpio>;
+
+                thermocouple@18 {
+                        reg = <18>;
+                        adi,sensor-type = <8>; //Type B
+                        adi,sensor-oc-current-microamp = <10>;
+                        adi,cold-junction-handle = <&diode5>;
+                };
+
+                diode5: diode@5 {
+                        reg = <5>;
+                        adi,sensor-type = <28>;
+                };
+
+                rsense2: rsense@2 {
+                        reg = <2>;
+                        adi,sensor-type = <29>;
+                        adi,rsense-val-milli-ohms = <1200000>; //1.2Kohms
+                };
+
+                rtd@14 {
+                        reg = <14>;
+                        adi,sensor-type = <15>; //PT1000
+                        /*2-wire, internal gnd, no current rotation*/
+                        adi,number-of-wires = <2>;
+                        adi,rsense-share;
+                        adi,excitation-current-microamp = <500>;
+                        adi,rsense-handle = <&rsense2>;
+                };
+
+                adc@10 {
+                        reg = <10>;
+                        adi,sensor-type = <30>;
+                        adi,single-ended;
+                };
+
+                thermistor@12 {
+                        reg = <12>;
+                        adi,sensor-type = <26>; //Steinhart
+                        adi,rsense-handle = <&rsense2>;
+                        adi,custom-steinhart = <0x00F371EC 0x12345678
+                                        0x2C0F8733 0x10018C66 0xA0FEACCD
+                                        0x90021D99>; //6 entries
+                };
+
+                thermocouple@20 {
+                        reg = <20>;
+                        adi,sensor-type = <9>; //custom thermocouple
+                        adi,single-ended;
+                        adi,custom-thermocouple = /bits/ 64
+                                 <(-50220000) 0
+                                  (-30200000) 99100000
+                                  (-5300000) 135400000
+                                  0 273150000
+                                  40200000 361200000
+                                  55300000 522100000
+                                  88300000 720300000
+                                  132200000 811200000
+                                  188700000 922500000
+                                  460400000 1000000000>; //10 pairs
+               };
+
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/iio/temperature/maxim_thermocouple.txt b/Documentation/devicetree/bindings/iio/temperature/maxim_thermocouple.txt
index 28bc5c4d965b..bb85cd0e039c 100644
--- a/Documentation/devicetree/bindings/iio/temperature/maxim_thermocouple.txt
+++ b/Documentation/devicetree/bindings/iio/temperature/maxim_thermocouple.txt
@@ -5,7 +5,10 @@ Maxim thermocouple support
 
 Required properties:
 
-	- compatible: must be "maxim,max31855" or "maxim,max6675"
+	- compatible: must be "maxim,max6675" or one of the following:
+	   "maxim,max31855k", "maxim,max31855j", "maxim,max31855n",
+	   "maxim,max31855s", "maxim,max31855t", "maxim,max31855e",
+	   "maxim,max31855r"; the generic "max,max31855" is deprecated.
 	- reg: SPI chip select number for the device
 	- spi-max-frequency: must be 4300000
 	- spi-cpha: must be defined for max6675 to enable SPI mode 1
@@ -15,7 +18,7 @@ Required properties:
 Example:
 
 	max31855@0 {
-		compatible = "maxim,max31855";
+		compatible = "maxim,max31855k";
 		reg = <0>;
 		spi-max-frequency = <4300000>;
 	};
diff --git a/Documentation/devicetree/bindings/iio/timer/stm32-lptimer-trigger.txt b/Documentation/devicetree/bindings/iio/timer/stm32-lptimer-trigger.txt
deleted file mode 100644
index 85e6806b17d7..000000000000
--- a/Documentation/devicetree/bindings/iio/timer/stm32-lptimer-trigger.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-STMicroelectronics STM32 Low-Power Timer Trigger
-
-STM32 Low-Power Timer provides trigger source (LPTIM output) that can be used
-by STM32 internal ADC and/or DAC.
-
-Must be a sub-node of an STM32 Low-Power Timer device tree node.
-See ../mfd/stm32-lptimer.txt for details about the parent node.
-
-Required properties:
-- compatible:		Must be "st,stm32-lptimer-trigger".
-- reg:			Identify trigger hardware block. Must be 0, 1 or 2
-			respectively for lptimer1, lptimer2 or lptimer3
-			trigger output.
-
-Example:
-	timer@40002400 {
-		compatible = "st,stm32-lptimer";
-		...
-		trigger@0 {
-			compatible = "st,stm32-lptimer-trigger";
-			reg = <0>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/iio/timer/stm32-timer-trigger.txt b/Documentation/devicetree/bindings/iio/timer/stm32-timer-trigger.txt
deleted file mode 100644
index b8e8c769d434..000000000000
--- a/Documentation/devicetree/bindings/iio/timer/stm32-timer-trigger.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-STMicroelectronics STM32 Timers IIO timer bindings
-
-Must be a sub-node of an STM32 Timers device tree node.
-See ../mfd/stm32-timers.txt for details about the parent node.
-
-Required parameters:
-- compatible:	Must be one of:
-		"st,stm32-timer-trigger"
-		"st,stm32h7-timer-trigger"
-- reg:		Identify trigger hardware block.
-
-Example:
-	timers@40010000 {
-		#address-cells = <1>;
-		#size-cells = <0>;
-		compatible = "st,stm32-timers";
-		reg = <0x40010000 0x400>;
-		clocks = <&rcc 0 160>;
-		clock-names = "int";
-
-		timer@0 {
-			compatible = "st,stm32-timer-trigger";
-			reg = <0>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml b/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
index b3bd8ef7fbd6..5b3b71c9c018 100644
--- a/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
+++ b/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 LRADC Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml b/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml
new file mode 100644
index 000000000000..5b37be0be4e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/fsl,mpr121-touchkey.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale MPR121 capacitive touch sensor controller
+
+maintainers:
+  - Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+description: |
+  The MPR121 supports up to 12 completely independent electrodes/capacitance
+  sensing inputs in which 8 are multifunctional for LED driving and GPIO.
+  https://www.nxp.com/docs/en/data-sheet/MPR121.pdf
+
+allOf:
+  - $ref: input.yaml#
+
+anyOf:
+  - required: [ interrupts ]
+  - required: [ poll-interval ]
+
+properties:
+  compatible:
+    const: fsl,mpr121-touchkey
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  vdd-supply:
+    maxItems: 1
+
+  linux,keycodes:
+    minItems: 1
+    maxItems: 12
+
+  wakeup-source:
+    description: Use any event on keypad as wakeup event.
+    type: boolean
+
+required:
+  - compatible
+  - reg
+  - vdd-supply
+  - linux,keycodes
+
+examples:
+  - |
+    // Example with interrupts
+    #include "dt-bindings/input/input.h"
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        mpr121@5a {
+            compatible = "fsl,mpr121-touchkey";
+            reg = <0x5a>;
+            interrupt-parent = <&gpio1>;
+            interrupts = <28 2>;
+            autorepeat;
+            vdd-supply = <&ldo4_reg>;
+            linux,keycodes = <KEY_0>, <KEY_1>, <KEY_2>, <KEY_3>,
+                             <KEY_4>, <KEY_5>, <KEY_6>, <KEY_7>,
+                             <KEY_8>, <KEY_9>, <KEY_A>, <KEY_B>;
+        };
+    };
+
+  - |
+    // Example with polling
+    #include "dt-bindings/input/input.h"
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        mpr121@5a {
+            compatible = "fsl,mpr121-touchkey";
+            reg = <0x5a>;
+            poll-interval = <20>;
+            autorepeat;
+            vdd-supply = <&ldo4_reg>;
+            linux,keycodes = <KEY_0>, <KEY_1>, <KEY_2>, <KEY_3>,
+                             <KEY_4>, <KEY_5>, <KEY_6>, <KEY_7>,
+                             <KEY_8>, <KEY_9>, <KEY_A>, <KEY_B>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/input/ilitek,ili2xxx.txt b/Documentation/devicetree/bindings/input/ilitek,ili2xxx.txt
index b2a76301e632..dc194b2c151a 100644
--- a/Documentation/devicetree/bindings/input/ilitek,ili2xxx.txt
+++ b/Documentation/devicetree/bindings/input/ilitek,ili2xxx.txt
@@ -1,8 +1,9 @@
-Ilitek ILI210x/ILI251x touchscreen controller
+Ilitek ILI210x/ILI2117/ILI251x touchscreen controller
 
 Required properties:
 - compatible:
     ilitek,ili210x for ILI210x
+    ilitek,ili2117 for ILI2117
     ilitek,ili251x for ILI251x
 
 - reg: The I2C address of the device
diff --git a/Documentation/devicetree/bindings/input/input.yaml b/Documentation/devicetree/bindings/input/input.yaml
new file mode 100644
index 000000000000..6d519046b3af
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/input.yaml
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/input.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common input schema binding
+
+maintainers:
+  - Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+properties:
+  autorepeat:
+    description: Enable autorepeat when key is pressed and held down.
+    type: boolean
+
+  linux,keycodes:
+    description:
+      Specifies an array of numeric keycode values to be used for reporting
+      button presses.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+      - items:
+          minimum: 0
+          maximum: 0xff
+
+  poll-interval:
+    description: Poll interval time in milliseconds.
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+  power-off-time-sec:
+    description:
+      Duration in seconds which the key should be kept pressed for device to
+      power off automatically. Device with key pressed shutdown feature can
+      specify this property.
+    $ref: /schemas/types.yaml#/definitions/uint32
diff --git a/Documentation/devicetree/bindings/input/keys.txt b/Documentation/devicetree/bindings/input/keys.txt
deleted file mode 100644
index f5a5ddde53f1..000000000000
--- a/Documentation/devicetree/bindings/input/keys.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-General Keys Properties:
-
-Optional properties for Keys:
-- power-off-time-sec: Duration in seconds which the key should be kept
-	pressed for device to power off automatically. Device with key pressed
-	shutdown feature can specify this property.
-- linux,keycodes: Specifies the numeric keycode values to be used for
-	reporting key presses.
diff --git a/Documentation/devicetree/bindings/input/max77650-onkey.txt b/Documentation/devicetree/bindings/input/max77650-onkey.txt
deleted file mode 100644
index 477dc74f452a..000000000000
--- a/Documentation/devicetree/bindings/input/max77650-onkey.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Onkey driver for MAX77650 PMIC from Maxim Integrated.
-
-This module is part of the MAX77650 MFD device. For more details
-see Documentation/devicetree/bindings/mfd/max77650.txt.
-
-The onkey controller is represented as a sub-node of the PMIC node on
-the device tree.
-
-Required properties:
---------------------
-- compatible:		Must be "maxim,max77650-onkey".
-
-Optional properties:
-- linux,code:		The key-code to be reported when the key is pressed.
-			Defaults to KEY_POWER.
-- maxim,onkey-slide:	The system's button is a slide switch, not the default
-			push button.
-
-Example:
---------
-
-	onkey {
-		compatible = "maxim,max77650-onkey";
-		linux,code = <KEY_END>;
-		maxim,onkey-slide;
-	};
diff --git a/Documentation/devicetree/bindings/input/max77650-onkey.yaml b/Documentation/devicetree/bindings/input/max77650-onkey.yaml
new file mode 100644
index 000000000000..2f2e0b6ebbbd
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/max77650-onkey.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/max77650-onkey.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Onkey driver for MAX77650 PMIC from Maxim Integrated.
+
+maintainers:
+  - Bartosz Golaszewski <bgolaszewski@baylibre.com>
+
+description: |
+  This module is part of the MAX77650 MFD device. For more details
+  see Documentation/devicetree/bindings/mfd/max77650.yaml.
+
+  The onkey controller is represented as a sub-node of the PMIC node on
+  the device tree.
+
+properties:
+  compatible:
+    const: maxim,max77650-onkey
+
+  linux,code:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      The key-code to be reported when the key is pressed. Defaults
+      to KEY_POWER.
+
+  maxim,onkey-slide:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      The system's button is a slide switch, not the default push button.
+
+required:
+  - compatible
diff --git a/Documentation/devicetree/bindings/input/mpr121-touchkey.txt b/Documentation/devicetree/bindings/input/mpr121-touchkey.txt
deleted file mode 100644
index b7c61ee5841b..000000000000
--- a/Documentation/devicetree/bindings/input/mpr121-touchkey.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-* Freescale MPR121 Controllor
-
-Required Properties:
-- compatible:		Should be "fsl,mpr121-touchkey"
-- reg:			The I2C slave address of the device.
-- interrupts:		The interrupt number to the cpu.
-- vdd-supply:		Phandle to the Vdd power supply.
-- linux,keycodes:	Specifies an array of numeric keycode values to
-			be used for reporting button presses. The array can
-			contain up to 12 entries.
-
-Optional Properties:
-- wakeup-source:	Use any event on keypad as wakeup event.
-- autorepeat:		Enable autorepeat feature.
-
-Example:
-
-#include "dt-bindings/input/input.h"
-
-	touchkey: mpr121@5a {
-		compatible = "fsl,mpr121-touchkey";
-		reg = <0x5a>;
-		interrupt-parent = <&gpio1>;
-		interrupts = <28 2>;
-		autorepeat;
-		vdd-supply = <&ldo4_reg>;
-		linux,keycodes = <KEY_0>, <KEY_1>, <KEY_2>, <KEY_3>,
-				<KEY_4> <KEY_5>, <KEY_6>, <KEY_7>,
-				<KEY_8>, <KEY_9>, <KEY_A>, <KEY_B>;
-	};
diff --git a/Documentation/devicetree/bindings/input/mtk-pmic-keys.txt b/Documentation/devicetree/bindings/input/mtk-pmic-keys.txt
index 2888d07c2ef0..535d92885372 100644
--- a/Documentation/devicetree/bindings/input/mtk-pmic-keys.txt
+++ b/Documentation/devicetree/bindings/input/mtk-pmic-keys.txt
@@ -10,13 +10,13 @@ Documentation/devicetree/bindings/mfd/mt6397.txt
 
 Required properties:
 - compatible: "mediatek,mt6397-keys" or "mediatek,mt6323-keys"
-- linux,keycodes: See Documentation/devicetree/bindings/input/keys.txt
+- linux,keycodes: See Documentation/devicetree/bindings/input/input.yaml
 
 Optional Properties:
 - wakeup-source: See Documentation/devicetree/bindings/power/wakeup-source.txt
 - mediatek,long-press-mode: Long press key shutdown setting, 1 for
 	pwrkey only, 2 for pwrkey/homekey together, others for disabled.
-- power-off-time-sec: See Documentation/devicetree/bindings/input/keys.txt
+- power-off-time-sec: See Documentation/devicetree/bindings/input/input.yaml
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/input/st,stpmic1-onkey.txt b/Documentation/devicetree/bindings/input/st,stpmic1-onkey.txt
index 4494613ae7ad..eb8e83736c02 100644
--- a/Documentation/devicetree/bindings/input/st,stpmic1-onkey.txt
+++ b/Documentation/devicetree/bindings/input/st,stpmic1-onkey.txt
@@ -15,7 +15,7 @@ Optional properties:
 - st,onkey-pu-inactive: onkey pull up is not active
 - power-off-time-sec: Duration in seconds which the key should be kept
         pressed for device to power off automatically (from 1 to 16 seconds).
-        see See Documentation/devicetree/bindings/input/keys.txt
+        see See Documentation/devicetree/bindings/input/input.yaml
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/input/touchscreen/ad7879.txt b/Documentation/devicetree/bindings/input/touchscreen/ad7879.txt
index cdd743a1f2d5..afa38dc069f0 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/ad7879.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/ad7879.txt
@@ -38,7 +38,7 @@ Optional properties:
 
 Example:
 
-	ad7879@2c {
+	touchscreen0@2c {
 		compatible = "adi,ad7879-1";
 		reg = <0x2c>;
 		interrupt-parent = <&gpio1>;
@@ -52,7 +52,7 @@ Example:
 		adi,conversion-interval = /bits/ 8 <255>;
 	};
 
-	ad7879@1 {
+	touchscreen1@1 {
 		compatible = "adi,ad7879";
 		spi-max-frequency = <5000000>;
 		reg = <1>;
diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
index 870b8c5cce9b..0f6950073d6f 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
@@ -30,6 +30,7 @@ Required properties:
 Optional properties:
  - reset-gpios: GPIO specification for the RESET input
  - wake-gpios:  GPIO specification for the WAKE input
+ - vcc-supply:  Regulator that supplies the touchscreen
 
  - pinctrl-names: should be "default"
  - pinctrl-0:   a phandle pointing to the pin settings for the
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,msm8916.yaml b/Documentation/devicetree/bindings/interconnect/qcom,msm8916.yaml
new file mode 100644
index 000000000000..4107e60cab12
--- /dev/null
+++ b/Documentation/devicetree/bindings/interconnect/qcom,msm8916.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interconnect/qcom,msm8916.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm MSM8916 Network-On-Chip interconnect
+
+maintainers:
+  - Georgi Djakov <georgi.djakov@linaro.org>
+
+description: |
+   The Qualcomm MSM8916 interconnect providers support adjusting the
+   bandwidth requirements between the various NoC fabrics.
+
+properties:
+  compatible:
+    enum:
+      - qcom,msm8916-bimc
+      - qcom,msm8916-pcnoc
+      - qcom,msm8916-snoc
+
+  reg:
+    maxItems: 1
+
+  '#interconnect-cells':
+    const: 1
+
+  clock-names:
+    items:
+      - const: bus
+      - const: bus_a
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Bus A Clock
+
+required:
+  - compatible
+  - reg
+  - '#interconnect-cells'
+  - clock-names
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+      #include <dt-bindings/clock/qcom,rpmcc.h>
+
+      bimc: interconnect@400000 {
+              compatible = "qcom,msm8916-bimc";
+              reg = <0x00400000 0x62000>;
+              #interconnect-cells = <1>;
+              clock-names = "bus", "bus_a";
+              clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
+                       <&rpmcc RPM_SMD_BIMC_A_CLK>;
+      };
+
+      pcnoc: interconnect@500000 {
+              compatible = "qcom,msm8916-pcnoc";
+              reg = <0x00500000 0x11000>;
+              #interconnect-cells = <1>;
+              clock-names = "bus", "bus_a";
+              clocks = <&rpmcc RPM_SMD_PCNOC_CLK>,
+                       <&rpmcc RPM_SMD_PCNOC_A_CLK>;
+      };
+
+      snoc: interconnect@580000 {
+              compatible = "qcom,msm8916-snoc";
+              reg = <0x00580000 0x14000>;
+              #interconnect-cells = <1>;
+              clock-names = "bus", "bus_a";
+              clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
+                       <&rpmcc RPM_SMD_SNOC_A_CLK>;
+      };
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,msm8974.yaml b/Documentation/devicetree/bindings/interconnect/qcom,msm8974.yaml
new file mode 100644
index 000000000000..9af3c6e59cff
--- /dev/null
+++ b/Documentation/devicetree/bindings/interconnect/qcom,msm8974.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interconnect/qcom,msm8974.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm MSM8974 Network-On-Chip Interconnect
+
+maintainers:
+  - Brian Masney <masneyb@onstation.org>
+
+description: |
+   The Qualcomm MSM8974 interconnect providers support setting system
+   bandwidth requirements between various network-on-chip fabrics.
+
+properties:
+  reg:
+    maxItems: 1
+
+  compatible:
+    enum:
+      - qcom,msm8974-bimc
+      - qcom,msm8974-cnoc
+      - qcom,msm8974-mmssnoc
+      - qcom,msm8974-ocmemnoc
+      - qcom,msm8974-pnoc
+      - qcom,msm8974-snoc
+
+  '#interconnect-cells':
+    const: 1
+
+  clock-names:
+    items:
+      - const: bus
+      - const: bus_a
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Bus A Clock
+
+required:
+  - compatible
+  - reg
+  - '#interconnect-cells'
+  - clock-names
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+      #include <dt-bindings/clock/qcom,rpmcc.h>
+
+      bimc: interconnect@fc380000 {
+              reg = <0xfc380000 0x6a000>;
+              compatible = "qcom,msm8974-bimc";
+              #interconnect-cells = <1>;
+              clock-names = "bus", "bus_a";
+              clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
+                       <&rpmcc RPM_SMD_BIMC_A_CLK>;
+      };
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,qcs404.txt b/Documentation/devicetree/bindings/interconnect/qcom,qcs404.txt
deleted file mode 100644
index c07d89812b73..000000000000
--- a/Documentation/devicetree/bindings/interconnect/qcom,qcs404.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-Qualcomm QCS404 Network-On-Chip interconnect driver binding
------------------------------------------------------------
-
-Required properties :
-- compatible : shall contain only one of the following:
-			"qcom,qcs404-bimc"
-			"qcom,qcs404-pcnoc"
-			"qcom,qcs404-snoc"
-- #interconnect-cells : should contain 1
-
-reg : specifies the physical base address and size of registers
-clocks : list of phandles and specifiers to all interconnect bus clocks
-clock-names : clock names should include both "bus" and "bus_a"
-
-Example:
-
-soc {
-	...
-	bimc: interconnect@400000 {
-		reg = <0x00400000 0x80000>;
-		compatible = "qcom,qcs404-bimc";
-		#interconnect-cells = <1>;
-		clock-names = "bus", "bus_a";
-		clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
-			<&rpmcc RPM_SMD_BIMC_A_CLK>;
-	};
-
-	pnoc: interconnect@500000 {
-		reg = <0x00500000 0x15080>;
-		compatible = "qcom,qcs404-pcnoc";
-		#interconnect-cells = <1>;
-		clock-names = "bus", "bus_a";
-		clocks = <&rpmcc RPM_SMD_PNOC_CLK>,
-			<&rpmcc RPM_SMD_PNOC_A_CLK>;
-	};
-
-	snoc: interconnect@580000 {
-		reg = <0x00580000 0x23080>;
-		compatible = "qcom,qcs404-snoc";
-		#interconnect-cells = <1>;
-		clock-names = "bus", "bus_a";
-		clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
-			<&rpmcc RPM_SMD_SNOC_A_CLK>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,qcs404.yaml b/Documentation/devicetree/bindings/interconnect/qcom,qcs404.yaml
new file mode 100644
index 000000000000..8d65c5f80679
--- /dev/null
+++ b/Documentation/devicetree/bindings/interconnect/qcom,qcs404.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interconnect/qcom,qcs404.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm QCS404 Network-On-Chip interconnect
+
+maintainers:
+  - Georgi Djakov <georgi.djakov@linaro.org>
+
+description: |
+   The Qualcomm QCS404 interconnect providers support adjusting the
+   bandwidth requirements between the various NoC fabrics.
+
+properties:
+  reg:
+    maxItems: 1
+
+  compatible:
+    enum:
+      - qcom,qcs404-bimc
+      - qcom,qcs404-pcnoc
+      - qcom,qcs404-snoc
+
+  '#interconnect-cells':
+    const: 1
+
+  clock-names:
+    items:
+      - const: bus
+      - const: bus_a
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Bus A Clock
+
+required:
+  - compatible
+  - reg
+  - '#interconnect-cells'
+  - clock-names
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+      #include <dt-bindings/clock/qcom,rpmcc.h>
+
+      bimc: interconnect@400000 {
+              reg = <0x00400000 0x80000>;
+              compatible = "qcom,qcs404-bimc";
+              #interconnect-cells = <1>;
+              clock-names = "bus", "bus_a";
+              clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
+                       <&rpmcc RPM_SMD_BIMC_A_CLK>;
+      };
+
+      pnoc: interconnect@500000 {
+             reg = <0x00500000 0x15080>;
+             compatible = "qcom,qcs404-pcnoc";
+             #interconnect-cells = <1>;
+             clock-names = "bus", "bus_a";
+             clocks = <&rpmcc RPM_SMD_PNOC_CLK>,
+                      <&rpmcc RPM_SMD_PNOC_A_CLK>;
+      };
+
+      snoc: interconnect@580000 {
+            reg = <0x00580000 0x23080>;
+            compatible = "qcom,qcs404-snoc";
+            #interconnect-cells = <1>;
+            clock-names = "bus", "bus_a";
+            clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
+                     <&rpmcc RPM_SMD_SNOC_A_CLK>;
+      };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-a10-ic.yaml b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-a10-ic.yaml
index 23a202d24e43..953d875b5e74 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-a10-ic.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-a10-ic.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Interrupt Controller Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 allOf:
   - $ref: /schemas/interrupt-controller.yaml#
diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun7i-a20-sc-nmi.yaml b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun7i-a20-sc-nmi.yaml
index 0eccf5551786..cf09055da78b 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun7i-a20-sc-nmi.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun7i-a20-sc-nmi.yaml
@@ -8,7 +8,7 @@ title: Allwinner A20 Non-Maskable Interrupt Controller Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 allOf:
   - $ref: /schemas/interrupt-controller.yaml#
@@ -52,9 +52,7 @@ required:
   - interrupts
   - interrupt-controller
 
-# FIXME: We should set it, but it would report all the generic
-# properties as additional properties.
-# additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
index 684bb1cd75ec..23b18b92c558 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
@@ -17,6 +17,7 @@ Required properties:
     "amlogic,meson-axg-gpio-intc" for AXG SoCs (A113D, A113X)
     "amlogic,meson-g12a-gpio-intc" for G12A SoCs (S905D2, S905X2, S905Y2)
     "amlogic,meson-sm1-gpio-intc" for SM1 SoCs (S905D3, S905X3, S905Y3)
+    "amlogic,meson-a1-gpio-intc" for A1 SoCs (A113L)
 - reg : Specifies base physical address and size of the registers.
 - interrupt-controller : Identifies the node as an interrupt controller.
 - #interrupt-cells : Specifies the number of cells needed to encode an
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
index 1fe147daca4c..66aacd106503 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
@@ -138,6 +138,7 @@ properties:
       containing a set of sub-nodes.
     patternProperties:
       "^interrupt-partition-[0-9]+$":
+        type: object
         properties:
           affinity:
             $ref: /schemas/types.yaml#/definitions/phandle-array
diff --git a/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2xxx-scu-ic.txt b/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2xxx-scu-ic.txt
new file mode 100644
index 000000000000..251ed44171db
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2xxx-scu-ic.txt
@@ -0,0 +1,23 @@
+Aspeed AST25XX and AST26XX SCU Interrupt Controller
+
+Required Properties:
+ - #interrupt-cells		: must be 1
+ - compatible			: must be "aspeed,ast2500-scu-ic",
+				  "aspeed,ast2600-scu-ic0" or
+				  "aspeed,ast2600-scu-ic1"
+ - interrupts			: interrupt from the parent controller
+ - interrupt-controller		: indicates that the controller receives and
+				  fires new interrupts for child busses
+
+Example:
+
+    syscon@1e6e2000 {
+        ranges = <0 0x1e6e2000 0x1a8>;
+
+        scu_ic: interrupt-controller@18 {
+            #interrupt-cells = <1>;
+            compatible = "aspeed,ast2500-scu-ic";
+            interrupts = <21>;
+            interrupt-controller;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7038-l1-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7038-l1-intc.txt
index 2117d4ac1ae5..5ddef1dc0c1a 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7038-l1-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7038-l1-intc.txt
@@ -31,6 +31,17 @@ Required properties:
 - interrupts: specifies the interrupt line(s) in the interrupt-parent controller
   node; valid values depend on the type of parent interrupt controller
 
+Optional properties:
+
+- brcm,irq-can-wake: If present, this means the L1 controller can be used as a
+  wakeup source for system suspend/resume.
+
+Optional properties:
+
+- brcm,int-fwd-mask: if present, a bit mask to indicate which interrupts
+  have already been configured by the firmware and should be left unmanaged.
+  This should have one 32-bit word per status/set/clear/mask group.
+
 If multiple reg ranges and interrupt-parent entries are present on an SMP
 system, the driver will allow IRQ SMP affinity to be set up through the
 /proc/irq/ interface.  In the simplest possible configuration, only one
diff --git a/Documentation/devicetree/bindings/interrupt-controller/fsl,intmux.yaml b/Documentation/devicetree/bindings/interrupt-controller/fsl,intmux.yaml
new file mode 100644
index 000000000000..43c6effbb5bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/fsl,intmux.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/fsl,intmux.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale INTMUX interrupt multiplexer
+
+maintainers:
+  - Joakim Zhang <qiangqing.zhang@nxp.com>
+
+properties:
+  compatible:
+    const: fsl,imx-intmux
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 8
+    description: |
+      Should contain the parent interrupt lines (up to 8) used to multiplex
+      the input interrupts.
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 2
+    description: |
+      The 1st cell is hw interrupt number, the 2nd cell is channel index.
+
+  clocks:
+    description: ipg clock.
+
+  clock-names:
+    const: ipg
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-controller
+  - '#interrupt-cells'
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    interrupt-controller@37400000 {
+        compatible = "fsl,imx-intmux";
+        reg = <0x37400000 0x1000>;
+        interrupts = <0 16 4>,
+                     <0 17 4>,
+                     <0 18 4>,
+                     <0 19 4>,
+                     <0 20 4>,
+                     <0 21 4>,
+                     <0 22 4>,
+                     <0 23 4>;
+        interrupt-controller;
+        interrupt-parent = <&gic>;
+        #interrupt-cells = <2>;
+        clocks = <&clk>;
+        clock-names = "ipg";
+    };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-extirq.txt b/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-extirq.txt
new file mode 100644
index 000000000000..f0ad7801e8cf
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-extirq.txt
@@ -0,0 +1,49 @@
+* Freescale Layerscape external IRQs
+
+Some Layerscape SOCs (LS1021A, LS1043A, LS1046A) support inverting
+the polarity of certain external interrupt lines.
+
+The device node must be a child of the node representing the
+Supplemental Configuration Unit (SCFG).
+
+Required properties:
+- compatible: should be "fsl,<soc-name>-extirq", e.g. "fsl,ls1021a-extirq".
+- #interrupt-cells: Must be 2. The first element is the index of the
+  external interrupt line. The second element is the trigger type.
+- #address-cells: Must be 0.
+- interrupt-controller: Identifies the node as an interrupt controller
+- reg: Specifies the Interrupt Polarity Control Register (INTPCR) in
+  the SCFG.
+- interrupt-map: Specifies the mapping from external interrupts to GIC
+  interrupts.
+- interrupt-map-mask: Must be <0xffffffff 0>.
+
+Example:
+	scfg: scfg@1570000 {
+		compatible = "fsl,ls1021a-scfg", "syscon";
+		reg = <0x0 0x1570000 0x0 0x10000>;
+		big-endian;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x0 0x0 0x1570000 0x10000>;
+
+		extirq: interrupt-controller@1ac {
+			compatible = "fsl,ls1021a-extirq";
+			#interrupt-cells = <2>;
+			#address-cells = <0>;
+			interrupt-controller;
+			reg = <0x1ac 4>;
+			interrupt-map =
+				<0 0 &gic GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>,
+				<1 0 &gic GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>,
+				<2 0 &gic GIC_SPI 165 IRQ_TYPE_LEVEL_HIGH>,
+				<3 0 &gic GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>,
+				<4 0 &gic GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
+				<5 0 &gic GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-map-mask = <0xffffffff 0x0>;
+		};
+	};
+
+
+	interrupts-extended = <&gic GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>,
+			      <&extirq 1 IRQ_TYPE_LEVEL_LOW>;
diff --git a/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
index 4a3ee253f7f0..4ebfa0008781 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
@@ -108,3 +108,15 @@ commonly used:
 			sensitivity = <7>;
 		};
 	};
+
+3) Interrupt wakeup parent
+--------------------------
+
+Some interrupt controllers in a SoC, are always powered on and have a select
+interrupts routed to them, so that they can wakeup the SoC from suspend. These
+interrupt controllers do not fall into the category of a parent interrupt
+controller and can be specified by the "wakeup-parent" property and contain a
+single phandle referring to the wakeup capable interrupt controller.
+
+   Example:
+	wakeup-parent = <&pdc_intc>;
diff --git a/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.txt b/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.txt
index 608fee15a4cf..a0ed02725a9d 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.txt
@@ -1,13 +1,17 @@
 * Marvell MMP Interrupt controller
 
 Required properties:
-- compatible : Should be "mrvl,mmp-intc", "mrvl,mmp2-intc" or
-  "mrvl,mmp2-mux-intc"
+- compatible : Should be
+               "mrvl,mmp-intc" on Marvel MMP,
+               "mrvl,mmp2-intc" along with "mrvl,mmp2-mux-intc" on MMP2 or
+               "marvell,mmp3-intc" with "mrvl,mmp2-mux-intc" on MMP3
 - reg : Address and length of the register set of the interrupt controller.
   If the interrupt controller is intc, address and length means the range
-  of the whole interrupt controller. If the interrupt controller is mux-intc,
-  address and length means one register. Since address of mux-intc is in the
-  range of intc. mux-intc is secondary interrupt controller.
+  of the whole interrupt controller. The "marvell,mmp3-intc" controller
+  also has a secondary range for the second CPU core.  If the interrupt
+  controller is mux-intc, address and length means one register. Since
+  address of mux-intc is in the range of intc. mux-intc is secondary
+  interrupt controller.
 - reg-names : Name of the register set of the interrupt controller. It's
   only required in mux-intc interrupt controller.
 - interrupts : Should be the port interrupt shared by mux interrupts. It's
diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt b/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt
index 8e0797cb1487..1df293953327 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt
@@ -17,7 +17,8 @@ Properties:
 - compatible:
 	Usage: required
 	Value type: <string>
-	Definition: Should contain "qcom,<soc>-pdc"
+	Definition: Should contain "qcom,<soc>-pdc" and "qcom,pdc"
+		    - "qcom,sc7180-pdc": For SC7180
 		    - "qcom,sdm845-pdc": For SDM845
 
 - reg:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
deleted file mode 100644
index f977ea7617f6..000000000000
--- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-DT bindings for the R-Mobile/R-Car/RZ/G interrupt controller
-
-Required properties:
-
-- compatible: must be "renesas,irqc-<soctype>" or "renesas,intc-ex-<soctype>",
-	      and "renesas,irqc" as fallback.
-  Examples with soctypes are:
-    - "renesas,irqc-r8a73a4" (R-Mobile APE6)
-    - "renesas,irqc-r8a7743" (RZ/G1M)
-    - "renesas,irqc-r8a7744" (RZ/G1N)
-    - "renesas,irqc-r8a7745" (RZ/G1E)
-    - "renesas,irqc-r8a77470" (RZ/G1C)
-    - "renesas,irqc-r8a7790" (R-Car H2)
-    - "renesas,irqc-r8a7791" (R-Car M2-W)
-    - "renesas,irqc-r8a7792" (R-Car V2H)
-    - "renesas,irqc-r8a7793" (R-Car M2-N)
-    - "renesas,irqc-r8a7794" (R-Car E2)
-    - "renesas,intc-ex-r8a774a1" (RZ/G2M)
-    - "renesas,intc-ex-r8a774c0" (RZ/G2E)
-    - "renesas,intc-ex-r8a7795" (R-Car H3)
-    - "renesas,intc-ex-r8a7796" (R-Car M3-W)
-    - "renesas,intc-ex-r8a77965" (R-Car M3-N)
-    - "renesas,intc-ex-r8a77970" (R-Car V3M)
-    - "renesas,intc-ex-r8a77980" (R-Car V3H)
-    - "renesas,intc-ex-r8a77990" (R-Car E3)
-    - "renesas,intc-ex-r8a77995" (R-Car D3)
-- #interrupt-cells: has to be <2>: an interrupt index and flags, as defined in
-  interrupts.txt in this directory
-- clocks: Must contain a reference to the functional clock.
-
-Optional properties:
-
-- any properties, listed in interrupts.txt, and any standard resource allocation
-  properties
-
-Example:
-
-	irqc0: interrupt-controller@e61c0000 {
-		compatible = "renesas,irqc-r8a7790", "renesas,irqc";
-		#interrupt-cells = <2>;
-		interrupt-controller;
-		reg = <0 0xe61c0000 0 0x200>;
-		interrupts = <0 0 IRQ_TYPE_LEVEL_HIGH>,
-			     <0 1 IRQ_TYPE_LEVEL_HIGH>,
-			     <0 2 IRQ_TYPE_LEVEL_HIGH>,
-			     <0 3 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&mstp4_clks R8A7790_CLK_IRQC>;
-	};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
new file mode 100644
index 000000000000..ee5273b6c5a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/renesas,irqc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: DT bindings for the R-Mobile/R-Car/RZ/G interrupt controller
+
+maintainers:
+  - Geert Uytterhoeven <geert+renesas@glider.be>
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,irqc-r8a73a4        # R-Mobile APE6
+          - renesas,irqc-r8a7743        # RZ/G1M
+          - renesas,irqc-r8a7744        # RZ/G1N
+          - renesas,irqc-r8a7745        # RZ/G1E
+          - renesas,irqc-r8a77470       # RZ/G1C
+          - renesas,irqc-r8a7790        # R-Car H2
+          - renesas,irqc-r8a7791        # R-Car M2-W
+          - renesas,irqc-r8a7792        # R-Car V2H
+          - renesas,irqc-r8a7793        # R-Car M2-N
+          - renesas,irqc-r8a7794        # R-Car E2
+          - renesas,intc-ex-r8a774a1    # RZ/G2M
+          - renesas,intc-ex-r8a774b1    # RZ/G2N
+          - renesas,intc-ex-r8a774c0    # RZ/G2E
+          - renesas,intc-ex-r8a7795     # R-Car H3
+          - renesas,intc-ex-r8a7796     # R-Car M3-W
+          - renesas,intc-ex-r8a77965    # R-Car M3-N
+          - renesas,intc-ex-r8a77970    # R-Car V3M
+          - renesas,intc-ex-r8a77980    # R-Car V3H
+          - renesas,intc-ex-r8a77990    # R-Car E3
+          - renesas,intc-ex-r8a77995    # R-Car D3
+      - const: renesas,irqc
+
+  '#interrupt-cells':
+    # an interrupt index and flags, as defined in interrupts.txt in
+    # this directory
+    const: 2
+
+  interrupt-controller: true
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 32
+
+  clocks:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - '#interrupt-cells'
+  - interrupt-controller
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7790-cpg-mssr.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    irqc0: interrupt-controller@e61c0000 {
+        compatible = "renesas,irqc-r8a7790", "renesas,irqc";
+        #interrupt-cells = <2>;
+        interrupt-controller;
+        reg = <0 0xe61c0000 0 0x200>;
+        interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&cpg CPG_MOD 407>;
+    };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
deleted file mode 100644
index cd01b2292ec6..000000000000
--- a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-STM32 External Interrupt Controller
-
-Required properties:
-
-- compatible: Should be:
-    "st,stm32-exti"
-    "st,stm32h7-exti"
-    "st,stm32mp1-exti"
-- reg: Specifies base physical address and size of the registers
-- interrupt-controller: Indentifies the node as an interrupt controller
-- #interrupt-cells: Specifies the number of cells to encode an interrupt
-  specifier, shall be 2
-- interrupts: interrupts references to primary interrupt controller
-  (only needed for exti controller with multiple exti under
-  same parent interrupt: st,stm32-exti and st,stm32h7-exti)
-
-Optional properties:
-
-- hwlocks: reference to a phandle of a hardware spinlock provider node.
-
-Example:
-
-exti: interrupt-controller@40013c00 {
-	compatible = "st,stm32-exti";
-	interrupt-controller;
-	#interrupt-cells = <2>;
-	reg = <0x40013C00 0x400>;
-	interrupts = <1>, <2>, <3>, <6>, <7>, <8>, <9>, <10>, <23>, <40>, <41>, <42>, <62>, <76>;
-};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
new file mode 100644
index 000000000000..9e5c6608b4e3
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/st,stm32-exti.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STM32 External Interrupt Controller Device Tree Bindings
+
+maintainers:
+  - Alexandre Torgue <alexandre.torgue@st.com>
+  - Ludovic Barre <ludovic.barre@st.com>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+        - enum:
+          - st,stm32-exti
+          - st,stm32h7-exti
+      - items:
+        - enum:
+          - st,stm32mp1-exti
+        - const: syscon
+
+  "#interrupt-cells":
+    const: 2
+
+  reg:
+    maxItems: 1
+
+  interrupt-controller: true
+
+  hwlocks:
+    maxItems: 1
+    description:
+      Reference to a phandle of a hardware spinlock provider node.
+
+  interrupts:
+    description:
+      Interrupts references to primary interrupt controller
+
+required:
+  - "#interrupt-cells"
+  - compatible
+  - reg
+  - interrupt-controller
+
+allOf:
+  - $ref: /schemas/interrupt-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - st,stm32-exti
+    then:
+      properties:
+        interrupts:
+          minItems: 1
+          maxItems: 32
+      required:
+        - interrupts
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - st,stm32h7-exti
+    then:
+      properties:
+        interrupts:
+          minItems: 1
+          maxItems: 96
+      required:
+        - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    //Example 1
+    exti1: interrupt-controller@5000d000 {
+        compatible = "st,stm32mp1-exti", "syscon";
+        interrupt-controller;
+        #interrupt-cells = <2>;
+        reg = <0x5000d000 0x400>;
+    };
+
+    //Example 2
+    exti2: interrupt-controller@40013c00 {
+        compatible = "st,stm32-exti";
+        interrupt-controller;
+        #interrupt-cells = <2>;
+        reg = <0x40013C00 0x400>;
+        interrupts = <1>, <2>, <3>, <6>, <7>, <8>, <9>, <10>, <23>, <40>, <41>, <42>, <62>, <76>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
deleted file mode 100644
index c9abbf3e4f68..000000000000
--- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-* ARM SMMUv3 Architecture Implementation
-
-The SMMUv3 architecture is a significant departure from previous
-revisions, replacing the MMIO register interface with in-memory command
-and event queues and adding support for the ATS and PRI components of
-the PCIe specification.
-
-** SMMUv3 required properties:
-
-- compatible        : Should include:
-
-                      * "arm,smmu-v3" for any SMMUv3 compliant
-                        implementation. This entry should be last in the
-                        compatible list.
-
-- reg               : Base address and size of the SMMU.
-
-- interrupts        : Non-secure interrupt list describing the wired
-                      interrupt sources corresponding to entries in
-                      interrupt-names. If no wired interrupts are
-                      present then this property may be omitted.
-
-- interrupt-names   : When the interrupts property is present, should
-                      include the following:
-                      * "eventq"    - Event Queue not empty
-                      * "priq"      - PRI Queue not empty
-                      * "cmdq-sync" - CMD_SYNC complete
-                      * "gerror"    - Global Error activated
-                      * "combined"  - The combined interrupt is optional,
-				      and should only be provided if the
-				      hardware supports just a single,
-				      combined interrupt line.
-				      If provided, then the combined interrupt
-				      will be used in preference to any others.
-
-- #iommu-cells      : See the generic IOMMU binding described in
-                        devicetree/bindings/pci/pci-iommu.txt
-                      for details. For SMMUv3, must be 1, with each cell
-                      describing a single stream ID. All possible stream
-                      IDs which a device may emit must be described.
-
-** SMMUv3 optional properties:
-
-- dma-coherent      : Present if DMA operations made by the SMMU (page
-                      table walks, stream table accesses etc) are cache
-                      coherent with the CPU.
-
-                      NOTE: this only applies to the SMMU itself, not
-                      masters connected upstream of the SMMU.
-
-- msi-parent        : See the generic MSI binding described in
-                        devicetree/bindings/interrupt-controller/msi.txt
-                      for a description of the msi-parent property.
-
-- hisilicon,broken-prefetch-cmd
-                    : Avoid sending CMD_PREFETCH_* commands to the SMMU.
-
-- cavium,cn9900-broken-page1-regspace
-                    : Replaces all page 1 offsets used for EVTQ_PROD/CONS,
-		      PRIQ_PROD/CONS register access with page 0 offsets.
-		      Set for Cavium ThunderX2 silicon that doesn't support
-		      SMMU page1 register space.
-
-** Example
-
-        smmu@2b400000 {
-                compatible = "arm,smmu-v3";
-                reg = <0x0 0x2b400000 0x0 0x20000>;
-                interrupts = <GIC_SPI 74 IRQ_TYPE_EDGE_RISING>,
-                             <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>,
-                             <GIC_SPI 77 IRQ_TYPE_EDGE_RISING>,
-                             <GIC_SPI 79 IRQ_TYPE_EDGE_RISING>;
-                interrupt-names = "eventq", "priq", "cmdq-sync", "gerror";
-                dma-coherent;
-                #iommu-cells = <1>;
-                msi-parent = <&its 0xff0000>;
-        };
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml
new file mode 100644
index 000000000000..5951c6f98c74
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/arm,smmu-v3.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM SMMUv3 Architecture Implementation
+
+maintainers:
+  - Will Deacon <will@kernel.org>
+  - Robin Murphy <Robin.Murphy@arm.com>
+
+description: |+
+  The SMMUv3 architecture is a significant departure from previous
+  revisions, replacing the MMIO register interface with in-memory command
+  and event queues and adding support for the ATS and PRI components of
+  the PCIe specification.
+
+properties:
+  $nodename:
+    pattern: "^iommu@[0-9a-f]*"
+  compatible:
+    const: arm,smmu-v3
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 4
+
+  interrupt-names:
+    oneOf:
+      - const: combined
+        description:
+          The combined interrupt is optional, and should only be provided if the
+          hardware supports just a single, combined interrupt line.
+          If provided, then the combined interrupt will be used in preference to
+          any others.
+      - minItems: 2
+        maxItems: 4
+        items:
+          - const: eventq     # Event Queue not empty
+          - const: gerror     # Global Error activated
+          - const: priq       # PRI Queue not empty
+          - const: cmdq-sync  # CMD_SYNC complete
+
+  '#iommu-cells':
+    const: 1
+
+  dma-coherent:
+    description: |
+      Present if page table walks made by the SMMU are cache coherent with the
+      CPU.
+
+      NOTE: this only applies to the SMMU itself, not masters connected
+      upstream of the SMMU.
+
+  msi-parent: true
+
+  hisilicon,broken-prefetch-cmd:
+    type: boolean
+    description: Avoid sending CMD_PREFETCH_* commands to the SMMU.
+
+  cavium,cn9900-broken-page1-regspace:
+    type: boolean
+    description:
+      Replaces all page 1 offsets used for EVTQ_PROD/CONS, PRIQ_PROD/CONS
+      register access with page 0 offsets. Set for Cavium ThunderX2 silicon that
+      doesn't support SMMU page1 register space.
+
+required:
+  - compatible
+  - reg
+  - '#iommu-cells'
+
+additionalProperties: false
+
+examples:
+  - |+
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    iommu@2b400000 {
+            compatible = "arm,smmu-v3";
+            reg = <0x2b400000 0x20000>;
+            interrupts = <GIC_SPI 74 IRQ_TYPE_EDGE_RISING>,
+                         <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>,
+                         <GIC_SPI 77 IRQ_TYPE_EDGE_RISING>,
+                         <GIC_SPI 79 IRQ_TYPE_EDGE_RISING>;
+            interrupt-names = "eventq", "gerror", "priq", "cmdq-sync";
+            dma-coherent;
+            #iommu-cells = <1>;
+            msi-parent = <&its 0xff0000>;
+    };
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
deleted file mode 100644
index 3133f3ba7567..000000000000
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ /dev/null
@@ -1,182 +0,0 @@
-* ARM System MMU Architecture Implementation
-
-ARM SoCs may contain an implementation of the ARM System Memory
-Management Unit Architecture, which can be used to provide 1 or 2 stages
-of address translation to bus masters external to the CPU.
-
-The SMMU may also raise interrupts in response to various fault
-conditions.
-
-** System MMU required properties:
-
-- compatible    : Should be one of:
-
-                        "arm,smmu-v1"
-                        "arm,smmu-v2"
-                        "arm,mmu-400"
-                        "arm,mmu-401"
-                        "arm,mmu-500"
-                        "cavium,smmu-v2"
-                        "qcom,smmu-v2"
-
-                  depending on the particular implementation and/or the
-                  version of the architecture implemented.
-
-                  Qcom SoCs must contain, as below, SoC-specific compatibles
-                  along with "qcom,smmu-v2":
-                  "qcom,msm8996-smmu-v2", "qcom,smmu-v2",
-                  "qcom,sdm845-smmu-v2", "qcom,smmu-v2".
-
-                  Qcom SoCs implementing "arm,mmu-500" must also include,
-                  as below, SoC-specific compatibles:
-                  "qcom,sdm845-smmu-500", "arm,mmu-500"
-
-- reg           : Base address and size of the SMMU.
-
-- #global-interrupts : The number of global interrupts exposed by the
-                       device.
-
-- interrupts    : Interrupt list, with the first #global-irqs entries
-                  corresponding to the global interrupts and any
-                  following entries corresponding to context interrupts,
-                  specified in order of their indexing by the SMMU.
-
-                  For SMMUv2 implementations, there must be exactly one
-                  interrupt per context bank. In the case of a single,
-                  combined interrupt, it must be listed multiple times.
-
-- #iommu-cells  : See Documentation/devicetree/bindings/iommu/iommu.txt
-                  for details. With a value of 1, each IOMMU specifier
-                  represents a distinct stream ID emitted by that device
-                  into the relevant SMMU.
-
-                  SMMUs with stream matching support and complex masters
-                  may use a value of 2, where the second cell of the
-                  IOMMU specifier represents an SMR mask to combine with
-                  the ID in the first cell.  Care must be taken to ensure
-                  the set of matched IDs does not result in conflicts.
-
-** System MMU optional properties:
-
-- dma-coherent  : Present if page table walks made by the SMMU are
-                  cache coherent with the CPU.
-
-                  NOTE: this only applies to the SMMU itself, not
-                  masters connected upstream of the SMMU.
-
-- calxeda,smmu-secure-config-access : Enable proper handling of buggy
-                  implementations that always use secure access to
-                  SMMU configuration registers. In this case non-secure
-                  aliases of secure registers have to be used during
-                  SMMU configuration.
-
-- stream-match-mask : For SMMUs supporting stream matching and using
-                  #iommu-cells = <1>, specifies a mask of bits to ignore
-		  when matching stream IDs (e.g. this may be programmed
-		  into the SMRn.MASK field of every stream match register
-		  used). For cases where it is desirable to ignore some
-                  portion of every Stream ID (e.g. for certain MMU-500
-                  configurations given globally unique input IDs). This
-                  property is not valid for SMMUs using stream indexing,
-                  or using stream matching with #iommu-cells = <2>, and
-                  may be ignored if present in such cases.
-
-- clock-names:    List of the names of clocks input to the device. The
-                  required list depends on particular implementation and
-                  is as follows:
-                  - for "qcom,smmu-v2":
-                    - "bus": clock required for downstream bus access and
-                             for the smmu ptw,
-                    - "iface": clock required to access smmu's registers
-                               through the TCU's programming interface.
-                  - unspecified for other implementations.
-
-- clocks:         Specifiers for all clocks listed in the clock-names property,
-                  as per generic clock bindings.
-
-- power-domains:  Specifiers for power domains required to be powered on for
-                  the SMMU to operate, as per generic power domain bindings.
-
-** Deprecated properties:
-
-- mmu-masters (deprecated in favour of the generic "iommus" binding) :
-                  A list of phandles to device nodes representing bus
-                  masters for which the SMMU can provide a translation
-                  and their corresponding Stream IDs. Each device node
-                  linked from this list must have a "#stream-id-cells"
-                  property, indicating the number of Stream ID
-                  arguments associated with its phandle.
-
-** Examples:
-
-        /* SMMU with stream matching or stream indexing */
-        smmu1: iommu {
-                compatible = "arm,smmu-v1";
-                reg = <0xba5e0000 0x10000>;
-                #global-interrupts = <2>;
-                interrupts = <0 32 4>,
-                             <0 33 4>,
-                             <0 34 4>, /* This is the first context interrupt */
-                             <0 35 4>,
-                             <0 36 4>,
-                             <0 37 4>;
-                #iommu-cells = <1>;
-        };
-
-        /* device with two stream IDs, 0 and 7 */
-        master1 {
-                iommus = <&smmu1 0>,
-                         <&smmu1 7>;
-        };
-
-
-        /* SMMU with stream matching */
-        smmu2: iommu {
-                ...
-                #iommu-cells = <2>;
-        };
-
-        /* device with stream IDs 0 and 7 */
-        master2 {
-                iommus = <&smmu2 0 0>,
-                         <&smmu2 7 0>;
-        };
-
-        /* device with stream IDs 1, 17, 33 and 49 */
-        master3 {
-                iommus = <&smmu2 1 0x30>;
-        };
-
-
-        /* ARM MMU-500 with 10-bit stream ID input configuration */
-        smmu3: iommu {
-                compatible = "arm,mmu-500", "arm,smmu-v2";
-                ...
-                #iommu-cells = <1>;
-                /* always ignore appended 5-bit TBU number */
-                stream-match-mask = 0x7c00;
-        };
-
-        bus {
-                /* bus whose child devices emit one unique 10-bit stream
-                   ID each, but may master through multiple SMMU TBUs */
-                iommu-map = <0 &smmu3 0 0x400>;
-                ...
-        };
-
-	/* Qcom's arm,smmu-v2 implementation */
-	smmu4: iommu@d00000 {
-		compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2";
-		reg = <0xd00000 0x10000>;
-
-		#global-interrupts = <1>;
-		interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 320 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH>;
-		#iommu-cells = <1>;
-		power-domains = <&mmcc MDSS_GDSC>;
-
-		clocks = <&mmcc SMMU_MDP_AXI_CLK>,
-			 <&mmcc SMMU_MDP_AHB_CLK>;
-		clock-names = "bus", "iface";
-	};
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
new file mode 100644
index 000000000000..6515dbe47508
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -0,0 +1,230 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/arm,smmu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM System MMU Architecture Implementation
+
+maintainers:
+  - Will Deacon <will@kernel.org>
+  - Robin Murphy <Robin.Murphy@arm.com>
+
+description: |+
+  ARM SoCs may contain an implementation of the ARM System Memory
+  Management Unit Architecture, which can be used to provide 1 or 2 stages
+  of address translation to bus masters external to the CPU.
+
+  The SMMU may also raise interrupts in response to various fault
+  conditions.
+
+properties:
+  $nodename:
+    pattern: "^iommu@[0-9a-f]*"
+  compatible:
+    oneOf:
+      - description: Qcom SoCs implementing "arm,smmu-v2"
+        items:
+          - enum:
+              - qcom,msm8996-smmu-v2
+              - qcom,msm8998-smmu-v2
+              - qcom,sdm845-smmu-v2
+          - const: qcom,smmu-v2
+
+      - description: Qcom SoCs implementing "arm,mmu-500"
+        items:
+          - enum:
+              - qcom,sc7180-smmu-500
+              - qcom,sdm845-smmu-500
+          - const: arm,mmu-500
+      - items:
+          - const: arm,mmu-500
+          - const: arm,smmu-v2
+      - items:
+          - const: arm,mmu-401
+          - const: arm,smmu-v1
+      - enum:
+          - arm,smmu-v1
+          - arm,smmu-v2
+          - arm,mmu-400
+          - arm,mmu-401
+          - arm,mmu-500
+          - cavium,smmu-v2
+
+  reg:
+    maxItems: 1
+
+  '#global-interrupts':
+    description: The number of global interrupts exposed by the device.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 260   # 2 secure, 2 non-secure, and up to 256 perf counters
+
+  '#iommu-cells':
+    enum: [ 1, 2 ]
+    description: |
+      See Documentation/devicetree/bindings/iommu/iommu.txt for details. With a
+      value of 1, each IOMMU specifier represents a distinct stream ID emitted
+      by that device into the relevant SMMU.
+
+      SMMUs with stream matching support and complex masters may use a value of
+      2, where the second cell of the IOMMU specifier represents an SMR mask to
+      combine with the ID in the first cell.  Care must be taken to ensure the
+      set of matched IDs does not result in conflicts.
+
+  interrupts:
+    minItems: 1
+    maxItems: 388   # 260 plus 128 contexts
+    description: |
+      Interrupt list, with the first #global-interrupts entries corresponding to
+      the global interrupts and any following entries corresponding to context
+      interrupts, specified in order of their indexing by the SMMU.
+
+      For SMMUv2 implementations, there must be exactly one interrupt per
+      context bank. In the case of a single, combined interrupt, it must be
+      listed multiple times.
+
+  dma-coherent:
+    description: |
+      Present if page table walks made by the SMMU are cache coherent with the
+      CPU.
+
+      NOTE: this only applies to the SMMU itself, not masters connected
+      upstream of the SMMU.
+
+  calxeda,smmu-secure-config-access:
+    type: boolean
+    description:
+      Enable proper handling of buggy implementations that always use secure
+      access to SMMU configuration registers. In this case non-secure aliases of
+      secure registers have to be used during SMMU configuration.
+
+  stream-match-mask:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      For SMMUs supporting stream matching and using #iommu-cells = <1>,
+      specifies a mask of bits to ignore when matching stream IDs (e.g. this may
+      be programmed into the SMRn.MASK field of every stream match register
+      used). For cases where it is desirable to ignore some portion of every
+      Stream ID (e.g. for certain MMU-500 configurations given globally unique
+      input IDs). This property is not valid for SMMUs using stream indexing, or
+      using stream matching with #iommu-cells = <2>, and may be ignored if
+      present in such cases.
+
+  clock-names:
+    items:
+      - const: bus
+      - const: iface
+
+  clocks:
+    items:
+      - description: bus clock required for downstream bus access and for the
+          smmu ptw
+      - description: interface clock required to access smmu's registers
+          through the TCU's programming interface.
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - '#global-interrupts'
+  - '#iommu-cells'
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |+
+    /* SMMU with stream matching or stream indexing */
+    smmu1: iommu@ba5e0000 {
+            compatible = "arm,smmu-v1";
+            reg = <0xba5e0000 0x10000>;
+            #global-interrupts = <2>;
+            interrupts = <0 32 4>,
+                         <0 33 4>,
+                         <0 34 4>, /* This is the first context interrupt */
+                         <0 35 4>,
+                         <0 36 4>,
+                         <0 37 4>;
+            #iommu-cells = <1>;
+    };
+
+    /* device with two stream IDs, 0 and 7 */
+    master1 {
+            iommus = <&smmu1 0>,
+                     <&smmu1 7>;
+    };
+
+
+    /* SMMU with stream matching */
+    smmu2: iommu@ba5f0000 {
+            compatible = "arm,smmu-v1";
+            reg = <0xba5f0000 0x10000>;
+            #global-interrupts = <2>;
+            interrupts = <0 38 4>,
+                         <0 39 4>,
+                         <0 40 4>, /* This is the first context interrupt */
+                         <0 41 4>,
+                         <0 42 4>,
+                         <0 43 4>;
+            #iommu-cells = <2>;
+    };
+
+    /* device with stream IDs 0 and 7 */
+    master2 {
+            iommus = <&smmu2 0 0>,
+                     <&smmu2 7 0>;
+    };
+
+    /* device with stream IDs 1, 17, 33 and 49 */
+    master3 {
+            iommus = <&smmu2 1 0x30>;
+    };
+
+
+    /* ARM MMU-500 with 10-bit stream ID input configuration */
+    smmu3: iommu@ba600000 {
+            compatible = "arm,mmu-500", "arm,smmu-v2";
+            reg = <0xba600000 0x10000>;
+            #global-interrupts = <2>;
+            interrupts = <0 44 4>,
+                         <0 45 4>,
+                         <0 46 4>, /* This is the first context interrupt */
+                         <0 47 4>,
+                         <0 48 4>,
+                         <0 49 4>;
+            #iommu-cells = <1>;
+            /* always ignore appended 5-bit TBU number */
+            stream-match-mask = <0x7c00>;
+    };
+
+    bus {
+            /* bus whose child devices emit one unique 10-bit stream
+               ID each, but may master through multiple SMMU TBUs */
+            iommu-map = <0 &smmu3 0 0x400>;
+
+
+    };
+
+  - |+
+    /* Qcom's arm,smmu-v2 implementation */
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    smmu4: iommu@d00000 {
+      compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2";
+      reg = <0xd00000 0x10000>;
+
+      #global-interrupts = <1>;
+      interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>,
+             <GIC_SPI 320 IRQ_TYPE_LEVEL_HIGH>,
+             <GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH>;
+      #iommu-cells = <1>;
+      power-domains = <&mmcc 0>;
+
+      clocks = <&mmcc 123>,
+        <&mmcc 124>;
+      clock-names = "bus", "iface";
+    };
diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
index b6bfbec3a849..020d6f226efb 100644
--- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
+++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
@@ -15,6 +15,7 @@ Required Properties:
     - "renesas,ipmmu-r8a7744" for the R8A7744 (RZ/G1N) IPMMU.
     - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU.
     - "renesas,ipmmu-r8a774a1" for the R8A774A1 (RZ/G2M) IPMMU.
+    - "renesas,ipmmu-r8a774b1" for the R8A774B1 (RZ/G2N) IPMMU.
     - "renesas,ipmmu-r8a774c0" for the R8A774C0 (RZ/G2E) IPMMU.
     - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
     - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
diff --git a/Documentation/devicetree/bindings/iommu/samsung,sysmmu.txt b/Documentation/devicetree/bindings/iommu/samsung,sysmmu.txt
deleted file mode 100644
index 525ec82615a6..000000000000
--- a/Documentation/devicetree/bindings/iommu/samsung,sysmmu.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-Samsung Exynos IOMMU H/W, System MMU (System Memory Management Unit)
-
-Samsung's Exynos architecture contains System MMUs that enables scattered
-physical memory chunks visible as a contiguous region to DMA-capable peripheral
-devices like MFC, FIMC, FIMD, GScaler, FIMC-IS and so forth.
-
-System MMU is an IOMMU and supports identical translation table format to
-ARMv7 translation tables with minimum set of page properties including access
-permissions, shareability and security protection. In addition, System MMU has
-another capabilities like L2 TLB or block-fetch buffers to minimize translation
-latency.
-
-System MMUs are in many to one relation with peripheral devices, i.e. single
-peripheral device might have multiple System MMUs (usually one for each bus
-master), but one System MMU can handle transactions from only one peripheral
-device. The relation between a System MMU and the peripheral device needs to be
-defined in device node of the peripheral device.
-
-MFC in all Exynos SoCs and FIMD, M2M Scalers and G2D in Exynos5420 has 2 System
-MMUs.
-* MFC has one System MMU on its left and right bus.
-* FIMD in Exynos5420 has one System MMU for window 0 and 4, the other system MMU
-  for window 1, 2 and 3.
-* M2M Scalers and G2D in Exynos5420 has one System MMU on the read channel and
-  the other System MMU on the write channel.
-
-For information on assigning System MMU controller to its peripheral devices,
-see generic IOMMU bindings.
-
-Required properties:
-- compatible: Should be "samsung,exynos-sysmmu"
-- reg: A tuple of base address and size of System MMU registers.
-- #iommu-cells: Should be <0>.
-- interrupts: An interrupt specifier for interrupt signal of System MMU,
-	      according to the format defined by a particular interrupt
-	      controller.
-- clock-names: Should be "sysmmu" or a pair of "aclk" and "pclk" to gate
-	       SYSMMU core clocks.
-	       Optional "master" if the clock to the System MMU is gated by
-	       another gate clock other core  (usually main gate clock
-	       of peripheral device this SYSMMU belongs to).
-- clocks: Phandles for respective clocks described by clock-names.
-- power-domains: Required if the System MMU is needed to gate its power.
-	  Please refer to the following document:
-	  Documentation/devicetree/bindings/power/pd-samsung.txt
-
-Examples:
-	gsc_0: gsc@13e00000 {
-		compatible = "samsung,exynos5-gsc";
-		reg = <0x13e00000 0x1000>;
-		interrupts = <0 85 0>;
-		power-domains = <&pd_gsc>;
-		clocks = <&clock CLK_GSCL0>;
-		clock-names = "gscl";
-		iommus = <&sysmmu_gsc0>;
-	};
-
-	sysmmu_gsc0: sysmmu@13e80000 {
-		compatible = "samsung,exynos-sysmmu";
-		reg = <0x13E80000 0x1000>;
-		interrupt-parent = <&combiner>;
-		interrupts = <2 0>;
-		clock-names = "sysmmu", "master";
-		clocks = <&clock CLK_SMMU_GSCL0>, <&clock CLK_GSCL0>;
-		power-domains = <&pd_gsc>;
-		#iommu-cells = <0>;
-	};
diff --git a/Documentation/devicetree/bindings/iommu/samsung,sysmmu.yaml b/Documentation/devicetree/bindings/iommu/samsung,sysmmu.yaml
new file mode 100644
index 000000000000..7cdd3aaa2ba4
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/samsung,sysmmu.yaml
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/samsung,sysmmu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos IOMMU H/W, System MMU (System Memory Management Unit)
+
+maintainers:
+  - Marek Szyprowski <m.szyprowski@samsung.com>
+
+description: |+
+  Samsung's Exynos architecture contains System MMUs that enables scattered
+  physical memory chunks visible as a contiguous region to DMA-capable peripheral
+  devices like MFC, FIMC, FIMD, GScaler, FIMC-IS and so forth.
+
+  System MMU is an IOMMU and supports identical translation table format to
+  ARMv7 translation tables with minimum set of page properties including access
+  permissions, shareability and security protection. In addition, System MMU has
+  another capabilities like L2 TLB or block-fetch buffers to minimize translation
+  latency.
+
+  System MMUs are in many to one relation with peripheral devices, i.e. single
+  peripheral device might have multiple System MMUs (usually one for each bus
+  master), but one System MMU can handle transactions from only one peripheral
+  device. The relation between a System MMU and the peripheral device needs to be
+  defined in device node of the peripheral device.
+
+  MFC in all Exynos SoCs and FIMD, M2M Scalers and G2D in Exynos5420 has 2 System
+  MMUs.
+  * MFC has one System MMU on its left and right bus.
+  * FIMD in Exynos5420 has one System MMU for window 0 and 4, the other system MMU
+    for window 1, 2 and 3.
+  * M2M Scalers and G2D in Exynos5420 has one System MMU on the read channel and
+    the other System MMU on the write channel.
+
+  For information on assigning System MMU controller to its peripheral devices,
+  see generic IOMMU bindings.
+
+properties:
+  compatible:
+    const: samsung,exynos-sysmmu
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    maxItems: 2
+
+  clock-names:
+    oneOf:
+      - items:
+        - const: sysmmu
+      - items:
+        - const: sysmmu
+        - const: master
+      - items:
+        - const: aclk
+        - const: pclk
+
+  "#iommu-cells":
+    const: 0
+
+  power-domains:
+    description: |
+      Required if the System MMU is needed to gate its power.
+      Please refer to the following document:
+      Documentation/devicetree/bindings/power/pd-samsung.yaml
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - "#iommu-cells"
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos5250.h>
+
+    gsc_0: scaler@13e00000 {
+      compatible = "samsung,exynos5-gsc";
+      reg = <0x13e00000 0x1000>;
+      interrupts = <0 85 0>;
+      power-domains = <&pd_gsc>;
+      clocks = <&clock CLK_GSCL0>;
+      clock-names = "gscl";
+      iommus = <&sysmmu_gsc0>;
+    };
+
+    sysmmu_gsc0: iommu@13e80000 {
+      compatible = "samsung,exynos-sysmmu";
+      reg = <0x13E80000 0x1000>;
+      interrupt-parent = <&combiner>;
+      interrupts = <2 0>;
+      clock-names = "sysmmu", "master";
+      clocks = <&clock CLK_SMMU_GSCL0>,
+               <&clock CLK_GSCL0>;
+      power-domains = <&pd_gsc>;
+      #iommu-cells = <0>;
+    };
+
diff --git a/Documentation/devicetree/bindings/leds/backlight/led-backlight.txt b/Documentation/devicetree/bindings/leds/backlight/led-backlight.txt
new file mode 100644
index 000000000000..4c7dfbe7f67a
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/backlight/led-backlight.txt
@@ -0,0 +1,28 @@
+led-backlight bindings
+
+This binding is used to describe a basic backlight device made of LEDs.
+It can also be used to describe a backlight device controlled by the output of
+a LED driver.
+
+Required properties:
+  - compatible: "led-backlight"
+  - leds: a list of LEDs
+
+Optional properties:
+  - brightness-levels: Array of distinct brightness levels. The levels must be
+                       in the range accepted by the underlying LED devices.
+                       This is used to translate a backlight brightness level
+                       into a LED brightness level. If it is not provided, the
+                       identity mapping is used.
+
+  - default-brightness-level: The default brightness level.
+
+Example:
+
+	backlight {
+		compatible = "led-backlight";
+
+		leds = <&led1>, <&led2>;
+		brightness-levels = <0 4 8 16 32 64 128 255>;
+		default-brightness-level = <6>;
+	};
diff --git a/Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml b/Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml
index dc129d9a329e..08fe5cf8614a 100644
--- a/Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml
+++ b/Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml
@@ -29,6 +29,10 @@ properties:
   '#size-cells':
     const: 0
 
+  enable-gpios:
+    description: GPIO to use to enable/disable the backlight (HWEN pin).
+    maxItems: 1
+
 required:
   - compatible
   - reg
@@ -89,6 +93,7 @@ additionalProperties: false
 
 examples:
   - |
+    #include <dt-bindings/gpio/gpio.h>
     i2c {
         #address-cells = <1>;
         #size-cells = <0>;
@@ -96,6 +101,7 @@ examples:
         led-controller@38 {
                 compatible = "ti,lm3630a";
                 reg = <0x38>;
+                enable-gpios = <&gpio2 5 GPIO_ACTIVE_HIGH>;
 
                 #address-cells = <1>;
                 #size-cells = <0>;
diff --git a/Documentation/devicetree/bindings/leds/backlight/pm8941-wled.txt b/Documentation/devicetree/bindings/leds/backlight/pm8941-wled.txt
deleted file mode 100644
index e5b294dafc58..000000000000
--- a/Documentation/devicetree/bindings/leds/backlight/pm8941-wled.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-Binding for Qualcomm PM8941 WLED driver
-
-Required properties:
-- compatible: should be "qcom,pm8941-wled"
-- reg: slave address
-
-Optional properties:
-- default-brightness: brightness value on boot, value from: 0-4095
-	default: 2048
-- label: The name of the backlight device
-- qcom,cs-out: bool; enable current sink output
-- qcom,cabc: bool; enable content adaptive backlight control
-- qcom,ext-gen: bool; use externally generated modulator signal to dim
-- qcom,current-limit: mA; per-string current limit; value from 0 to 25
-	default: 20mA
-- qcom,current-boost-limit: mA; boost current limit; one of:
-	105, 385, 525, 805, 980, 1260, 1400, 1680
-	default: 805mA
-- qcom,switching-freq: kHz; switching frequency; one of:
-	600, 640, 685, 738, 800, 872, 960, 1066, 1200, 1371,
-	1600, 1920, 2400, 3200, 4800, 9600,
-	default: 1600kHz
-- qcom,ovp: V; Over-voltage protection limit; one of:
-	27, 29, 32, 35
-	default: 29V
-- qcom,num-strings: #; number of led strings attached; value from 1 to 3
-	default: 2
-
-Example:
-
-pm8941-wled@d800 {
-	compatible = "qcom,pm8941-wled";
-	reg = <0xd800>;
-	label = "backlight";
-
-	qcom,cs-out;
-	qcom,current-limit = <20>;
-	qcom,current-boost-limit = <805>;
-	qcom,switching-freq = <1600>;
-	qcom,ovp = <29>;
-	qcom,num-strings = <2>;
-};
diff --git a/Documentation/devicetree/bindings/leds/backlight/qcom-wled.txt b/Documentation/devicetree/bindings/leds/backlight/qcom-wled.txt
new file mode 100644
index 000000000000..c06863badfbd
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/backlight/qcom-wled.txt
@@ -0,0 +1,154 @@
+Binding for Qualcomm Technologies, Inc. WLED driver
+
+WLED (White Light Emitting Diode) driver is used for controlling display
+backlight that is part of PMIC on Qualcomm Technologies, Inc. reference
+platforms. The PMIC is connected to the host processor via SPMI bus.
+
+- compatible
+	Usage:        required
+	Value type:   <string>
+	Definition:   should be one of:
+			"qcom,pm8941-wled"
+			"qcom,pmi8998-wled"
+			"qcom,pm660l-wled"
+
+- reg
+	Usage:        required
+	Value type:   <prop encoded array>
+	Definition:   Base address of the WLED modules.
+
+- default-brightness
+	Usage:        optional
+	Value type:   <u32>
+	Definition:   brightness value on boot, value from: 0-4095.
+		      Default: 2048
+
+- label
+	Usage:        required
+	Value type:   <string>
+	Definition:   The name of the backlight device
+
+- qcom,cs-out
+	Usage:        optional
+	Value type:   <bool>
+	Definition:   enable current sink output.
+		      This property is supported only for PM8941.
+
+- qcom,cabc
+	Usage:        optional
+	Value type:   <bool>
+	Definition:   enable content adaptive backlight control.
+
+- qcom,ext-gen
+	Usage:        optional
+	Value type:   <bool>
+	Definition:   use externally generated modulator signal to dim.
+		      This property is supported only for PM8941.
+
+- qcom,current-limit
+	Usage:        optional
+	Value type:   <u32>
+	Definition:   mA; per-string current limit; value from 0 to 25 with
+		      1 mA step. Default 20 mA.
+		      This property is supported only for pm8941.
+
+- qcom,current-limit-microamp
+	Usage:        optional
+	Value type:   <u32>
+	Definition:   uA; per-string current limit; value from 0 to 30000 with
+		      2500 uA step. Default 25 mA.
+
+- qcom,current-boost-limit
+	Usage:        optional
+	Value type:   <u32>
+	Definition:   mA; boost current limit.
+		      For pm8941: one of: 105, 385, 525, 805, 980, 1260, 1400,
+		      1680. Default: 805 mA.
+		      For pmi8998: one of: 105, 280, 450, 620, 970, 1150, 1300,
+		      1500. Default: 970 mA.
+
+- qcom,switching-freq
+	Usage:        optional
+	Value type:   <u32>
+	 Definition:   kHz; switching frequency; one of: 600, 640, 685, 738,
+		       800, 872, 960, 1066, 1200, 1371, 1600, 1920, 2400, 3200,
+		       4800, 9600.
+		       Default: for pm8941: 1600 kHz
+				for pmi8998: 800 kHz
+
+- qcom,ovp
+	Usage:        optional
+	Value type:   <u32>
+	Definition:   V; Over-voltage protection limit; one of:
+		      27, 29, 32, 35. Default: 29V
+		      This property is supported only for PM8941.
+
+- qcom,ovp-millivolt
+	Usage:        optional
+	Value type:   <u32>
+	Definition:   mV; Over-voltage protection limit;
+		      For pmi8998: one of 18100, 19600, 29600, 31100.
+		      Default 29600 mV.
+		      If this property is not specified for PM8941, it
+		      falls back to "qcom,ovp" property.
+
+- qcom,num-strings
+	Usage:        optional
+	Value type:   <u32>
+	Definition:   #; number of led strings attached;
+		      value: For PM8941 from 1 to 3. Default: 2
+			     For PMI8998 from 1 to 4.
+
+- interrupts
+	Usage:        optional
+	Value type:   <prop encoded array>
+	Definition:   Interrupts associated with WLED. This should be
+		      "short" and "ovp" interrupts. Interrupts can be
+		      specified as per the encoding listed under
+		      Documentation/devicetree/bindings/spmi/
+		      qcom,spmi-pmic-arb.txt.
+
+- interrupt-names
+	Usage:        optional
+	Value type:   <string>
+	Definition:   Interrupt names associated with the interrupts.
+		      Must be "short" and "ovp". The short circuit detection
+		      is not supported for PM8941.
+
+- qcom,enabled-strings
+	Usage:        optional
+	Value tyoe:   <u32 array>
+	Definition:   Array of the WLED strings numbered from 0 to 3. Each
+		      string of leds are operated individually. Specify the
+		      list of strings used by the device. Any combination of
+		      led strings can be used.
+
+- qcom,external-pfet
+	Usage:        optional
+	Value type:   <bool>
+	Definition:   Specify if external PFET control for short circuit
+		      protection is used. This property is supported only
+		      for PMI8998.
+
+- qcom,auto-string-detection
+	Usage:        optional
+	Value type:   <bool>
+	Definition:   Enables auto-detection of the WLED string configuration.
+		      This feature is not supported for PM8941.
+
+
+Example:
+
+pm8941-wled@d800 {
+	compatible = "qcom,pm8941-wled";
+	reg = <0xd800>;
+	label = "backlight";
+
+	qcom,cs-out;
+	qcom,current-limit = <20>;
+	qcom,current-boost-limit = <805>;
+	qcom,switching-freq = <1600>;
+	qcom,ovp = <29>;
+	qcom,num-strings = <2>;
+	qcom,enabled-strings = <0 1>;
+};
diff --git a/Documentation/devicetree/bindings/leds/common.txt b/Documentation/devicetree/bindings/leds/common.txt
index 9fa6f9795d50..26d770ef3601 100644
--- a/Documentation/devicetree/bindings/leds/common.txt
+++ b/Documentation/devicetree/bindings/leds/common.txt
@@ -1,173 +1 @@
-* Common leds properties.
-
-LED and flash LED devices provide the same basic functionality as current
-regulators, but extended with LED and flash LED specific features like
-blinking patterns, flash timeout, flash faults and external flash strobe mode.
-
-Many LED devices expose more than one current output that can be connected
-to one or more discrete LED component. Since the arrangement of connections
-can influence the way of the LED device initialization, the LED components
-have to be tightly coupled with the LED device binding. They are represented
-by child nodes of the parent LED device binding.
-
-
-Optional properties for child nodes:
-- led-sources : List of device current outputs the LED is connected to. The
-		outputs are identified by the numbers that must be defined
-		in the LED device binding documentation.
-
-- function: LED functon. Use one of the LED_FUNCTION_* prefixed definitions
-	    from the header include/dt-bindings/leds/common.h.
-	    If there is no matching LED_FUNCTION available, add a new one.
-
-- color : Color of the LED. Use one of the LED_COLOR_ID_* prefixed definitions
-	  from the header include/dt-bindings/leds/common.h.
-	  If there is no matching LED_COLOR_ID available, add a new one.
-
-- function-enumerator: Integer to be used when more than one instance
-                       of the same function is needed, differing only with
-		       an ordinal number.
-
-- label : The label for this LED. If omitted, the label is taken from the node
-	  name (excluding the unit address). It has to uniquely identify
-	  a device, i.e. no other LED class device can be assigned the same
-	  label. This property is deprecated - use 'function' and 'color'
-	  properties instead. function-enumerator has no effect when this
-	  property is present.
-
-- default-state : The initial state of the LED. Valid values are "on", "off",
-  and "keep". If the LED is already on or off and the default-state property is
-  set the to same value, then no glitch should be produced where the LED
-  momentarily turns off (or on). The "keep" setting will keep the LED at
-  whatever its current state is, without producing a glitch.  The default is
-  off if this property is not present.
-
-- linux,default-trigger :  This parameter, if present, is a
-    string defining the trigger assigned to the LED.  Current triggers are:
-     "backlight" - LED will act as a back-light, controlled by the framebuffer
-		   system
-     "default-on" - LED will turn on (but for leds-gpio see "default-state"
-		    property in Documentation/devicetree/bindings/leds/leds-gpio.txt)
-     "heartbeat" - LED "double" flashes at a load average based rate
-     "disk-activity" - LED indicates disk activity
-     "ide-disk" - LED indicates IDE disk activity (deprecated),
-                  in new implementations use "disk-activity"
-     "timer" - LED flashes at a fixed, configurable rate
-     "pattern" - LED alters the brightness for the specified duration with one
-                 software timer (requires "led-pattern" property)
-
-- led-pattern : Array of integers with default pattern for certain triggers.
-                Each trigger may parse this property differently:
-                - one-shot : two numbers specifying delay on and delay off (in ms),
-                - timer : two numbers specifying delay on and delay off (in ms),
-                - pattern : the pattern is given by a series of tuples, of
-                  brightness and duration (in ms).  The exact format is
-                  described in:
-                  Documentation/devicetree/bindings/leds/leds-trigger-pattern.txt
-
-
-- led-max-microamp : Maximum LED supply current in microamperes. This property
-                     can be made mandatory for the board configurations
-                     introducing a risk of hardware damage in case an excessive
-                     current is set.
-                     For flash LED controllers with configurable current this
-                     property is mandatory for the LEDs in the non-flash modes
-                     (e.g. torch or indicator).
-
-- panic-indicator : This property specifies that the LED should be used,
-		    if at all possible, as a panic indicator.
-
-- trigger-sources : List of devices which should be used as a source triggering
-		    this LED activity. Some LEDs can be related to a specific
-		    device and should somehow indicate its state. E.g. USB 2.0
-		    LED may react to device(s) in a USB 2.0 port(s).
-		    Another common example is switch or router with multiple
-		    Ethernet ports each of them having its own LED assigned
-		    (assuming they are not hardwired). In such cases this
-		    property should contain phandle(s) of related source
-		    device(s).
-		    In many cases LED can be related to more than one device
-		    (e.g. one USB LED vs. multiple USB ports). Each source
-		    should be represented by a node in the device tree and be
-		    referenced by a phandle and a set of phandle arguments. A
-		    length of arguments should be specified by the
-		    #trigger-source-cells property in the source node.
-
-Required properties for flash LED child nodes:
-- flash-max-microamp : Maximum flash LED supply current in microamperes.
-- flash-max-timeout-us : Maximum timeout in microseconds after which the flash
-                         LED is turned off.
-
-For controllers that have no configurable current the flash-max-microamp
-property can be omitted.
-For controllers that have no configurable timeout the flash-max-timeout-us
-property can be omitted.
-
-* Trigger source providers
-
-Each trigger source should be represented by a device tree node. It may be e.g.
-a USB port or an Ethernet device.
-
-Required properties for trigger source:
-- #trigger-source-cells : Number of cells in a source trigger. Typically 0 for
-			  nodes of simple trigger sources (e.g. a specific USB
-			  port).
-
-* Examples
-
-#include <dt-bindings/leds/common.h>
-
-led-controller@0 {
-	compatible = "gpio-leds";
-
-	led0 {
-		function = LED_FUNCTION_STATUS;
-		linux,default-trigger = "heartbeat";
-		gpios = <&gpio0 0 GPIO_ACTIVE_HIGH>;
-	};
-
-	led1 {
-		function = LED_FUNCTION_USB;
-		gpios = <&gpio0 1 GPIO_ACTIVE_HIGH>;
-		trigger-sources = <&ohci_port1>, <&ehci_port1>;
-	};
-};
-
-led-controller@0 {
-	compatible = "maxim,max77693-led";
-
-	led {
-		function = LED_FUNCTION_FLASH;
-		color = <LED_COLOR_ID_WHITE>;
-		led-sources = <0>, <1>;
-		led-max-microamp = <50000>;
-		flash-max-microamp = <320000>;
-		flash-max-timeout-us = <500000>;
-	};
-};
-
-led-controller@30 {
-        compatible = "panasonic,an30259a";
-        reg = <0x30>;
-        #address-cells = <1>;
-        #size-cells = <0>;
-
-        led@1 {
-		reg = <1>;
-		linux,default-trigger = "heartbeat";
-		function = LED_FUNCTION_INDICATOR;
-		function-enumerator = <1>;
-        };
-
-        led@2 {
-		reg = <2>;
-		function = LED_FUNCTION_INDICATOR;
-		function-enumerator = <2>;
-        };
-
-        led@3 {
-		reg = <3>;
-		function = LED_FUNCTION_INDICATOR;
-		function-enumerator = <3>;
-        };
-};
+This file has moved to ./common.yaml.
diff --git a/Documentation/devicetree/bindings/leds/common.yaml b/Documentation/devicetree/bindings/leds/common.yaml
new file mode 100644
index 000000000000..d97d099b87e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/common.yaml
@@ -0,0 +1,228 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common leds properties
+
+maintainers:
+  - Jacek Anaszewski <jacek.anaszewski@gmail.com>
+  - Pavel Machek <pavel@ucw.cz>
+
+description:
+  LED and flash LED devices provide the same basic functionality as current
+  regulators, but extended with LED and flash LED specific features like
+  blinking patterns, flash timeout, flash faults and external flash strobe mode.
+
+  Many LED devices expose more than one current output that can be connected
+  to one or more discrete LED component. Since the arrangement of connections
+  can influence the way of the LED device initialization, the LED components
+  have to be tightly coupled with the LED device binding. They are represented
+  by child nodes of the parent LED device binding.
+
+properties:
+  led-sources:
+    description:
+      List of device current outputs the LED is connected to. The outputs are
+      identified by the numbers that must be defined in the LED device binding
+      documentation.
+    $ref: /schemas/types.yaml#definitions/uint32-array
+
+  function:
+    description:
+      LED function. Use one of the LED_FUNCTION_* prefixed definitions
+      from the header include/dt-bindings/leds/common.h. If there is no
+      matching LED_FUNCTION available, add a new one.
+    $ref: /schemas/types.yaml#definitions/string
+
+  color:
+    description:
+      Color of the LED. Use one of the LED_COLOR_ID_* prefixed definitions from
+      the header include/dt-bindings/leds/common.h. If there is no matching
+      LED_COLOR_ID available, add a new one.
+    allOf:
+      - $ref: /schemas/types.yaml#definitions/uint32
+    minimum: 0
+    maximum: 8
+
+  function-enumerator:
+    description:
+      Integer to be used when more than one instance of the same function is
+      needed, differing only with an ordinal number.
+    $ref: /schemas/types.yaml#definitions/uint32
+
+  label:
+    description:
+      The label for this LED. If omitted, the label is taken from the node name
+      (excluding the unit address). It has to uniquely identify a device, i.e.
+      no other LED class device can be assigned the same label. This property is
+      deprecated - use 'function' and 'color' properties instead.
+      function-enumerator has no effect when this property is present.
+
+  default-state:
+    description:
+      The initial state of the LED. If the LED is already on or off and the
+      default-state property is set the to same value, then no glitch should be
+      produced where the LED momentarily turns off (or on). The "keep" setting
+      will keep the LED at whatever its current state is, without producing a
+      glitch.
+    allOf:
+      - $ref: /schemas/types.yaml#definitions/string
+    enum:
+      - on
+      - off
+      - keep
+    default: off
+
+  linux,default-trigger:
+    description:
+      This parameter, if present, is a string defining the trigger assigned to
+      the LED.
+    allOf:
+      - $ref: /schemas/types.yaml#definitions/string
+    enum:
+        # LED will act as a back-light, controlled by the framebuffer system
+      - backlight
+        # LED will turn on (but for leds-gpio see "default-state" property in
+        # Documentation/devicetree/bindings/leds/leds-gpio.txt)
+      - default-on
+        # LED "double" flashes at a load average based rate
+      - heartbeat
+        # LED indicates disk activity
+      - disk-activity
+        # LED indicates IDE disk activity (deprecated), in new implementations
+        # use "disk-activity"
+      - ide-disk
+        # LED flashes at a fixed, configurable rate
+      - timer
+        # LED alters the brightness for the specified duration with one software
+        # timer (requires "led-pattern" property)
+      - pattern
+
+  led-pattern:
+    description: |
+      Array of integers with default pattern for certain triggers.
+
+      Each trigger may parse this property differently:
+        - one-shot : two numbers specifying delay on and delay off (in ms),
+        - timer : two numbers specifying delay on and delay off (in ms),
+        - pattern : the pattern is given by a series of tuples, of
+          brightness and duration (in ms).  The exact format is
+          described in:
+          Documentation/devicetree/bindings/leds/leds-trigger-pattern.txt
+    allOf:
+      - $ref: /schemas/types.yaml#definitions/uint32-matrix
+    items:
+      minItems: 2
+      maxItems: 2
+
+  led-max-microamp:
+    description:
+      Maximum LED supply current in microamperes. This property can be made
+      mandatory for the board configurations introducing a risk of hardware
+      damage in case an excessive current is set.
+      For flash LED controllers with configurable current this property is
+      mandatory for the LEDs in the non-flash modes (e.g. torch or indicator).
+
+  panic-indicator:
+    description:
+      This property specifies that the LED should be used, if at all possible,
+      as a panic indicator.
+    type: boolean
+
+  trigger-sources:
+    description: |
+      List of devices which should be used as a source triggering this LED
+      activity. Some LEDs can be related to a specific device and should somehow
+      indicate its state. E.g. USB 2.0 LED may react to device(s) in a USB 2.0
+      port(s).
+      Another common example is switch or router with multiple Ethernet ports
+      each of them having its own LED assigned (assuming they are not
+      hardwired). In such cases this property should contain phandle(s) of
+      related source device(s).
+      In many cases LED can be related to more than one device (e.g. one USB LED
+      vs. multiple USB ports). Each source should be represented by a node in
+      the device tree and be referenced by a phandle and a set of phandle
+      arguments. A length of arguments should be specified by the
+      #trigger-source-cells property in the source node.
+    $ref: /schemas/types.yaml#definitions/phandle-array
+
+  # Required properties for flash LED child nodes:
+  flash-max-microamp:
+    description:
+      Maximum flash LED supply current in microamperes. Required for flash LED
+      nodes with configurable current.
+
+  flash-max-timeout-us:
+    description:
+      Maximum timeout in microseconds after which the flash LED is turned off.
+      Required for flash LED nodes with configurable timeout.
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/leds/common.h>
+
+    led-controller {
+        compatible = "gpio-leds";
+
+        led0 {
+            function = LED_FUNCTION_STATUS;
+            linux,default-trigger = "heartbeat";
+            gpios = <&gpio0 0 GPIO_ACTIVE_HIGH>;
+        };
+
+        led1 {
+            function = LED_FUNCTION_USB;
+            gpios = <&gpio0 1 GPIO_ACTIVE_HIGH>;
+            trigger-sources = <&ohci_port1>, <&ehci_port1>;
+        };
+    };
+
+    led-controller@0 {
+        compatible = "maxim,max77693-led";
+        reg = <0 0x100>;
+
+        led {
+            function = LED_FUNCTION_FLASH;
+            color = <LED_COLOR_ID_WHITE>;
+            led-sources = <0>, <1>;
+            led-max-microamp = <50000>;
+            flash-max-microamp = <320000>;
+            flash-max-timeout-us = <500000>;
+        };
+    };
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        led-controller@30 {
+            compatible = "panasonic,an30259a";
+            reg = <0x30>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            led@1 {
+                reg = <1>;
+                linux,default-trigger = "heartbeat";
+                function = LED_FUNCTION_INDICATOR;
+                function-enumerator = <1>;
+            };
+
+            led@2 {
+                reg = <2>;
+                function = LED_FUNCTION_INDICATOR;
+                function-enumerator = <2>;
+            };
+
+            led@3 {
+                reg = <3>;
+                function = LED_FUNCTION_INDICATOR;
+                function-enumerator = <3>;
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/leds/irled/spi-ir-led.txt b/Documentation/devicetree/bindings/leds/irled/spi-ir-led.txt
index 21882c8d4b0c..83ff1b4d70a6 100644
--- a/Documentation/devicetree/bindings/leds/irled/spi-ir-led.txt
+++ b/Documentation/devicetree/bindings/leds/irled/spi-ir-led.txt
@@ -8,7 +8,7 @@ Required properties:
 	- compatible: should be "ir-spi-led".
 
 Optional properties:
-	- duty-cycle: 8 bit balue that represents the percentage of one period
+	- duty-cycle: 8 bit value that represents the percentage of one period
 	  in which the signal is active.  It can be 50, 60, 70, 75, 80 or 90.
 	- led-active-low: boolean value that specifies whether the output is
 	  negated with a NOT gate.
diff --git a/Documentation/devicetree/bindings/leds/leds-el15203000.txt b/Documentation/devicetree/bindings/leds/leds-el15203000.txt
new file mode 100644
index 000000000000..182f0035ed28
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-el15203000.txt
@@ -0,0 +1,69 @@
+Crane Merchandising System - EL15203000 LED driver
+--------------------------------------------------
+
+This LED Board (aka RED LEDs board) is widely used in
+coffee vending machines produced by Crane Merchandising Systems.
+The board manages 3 LEDs and supports predefined blinking patterns
+for specific leds.
+
+Vending area LED encoded with symbol 'V' (hex code 0x56).
+Doesn't have any hardware blinking pattern.
+
+Screen light tube LED which surrounds vending machine screen and
+encoded with symbol 'S' (hex code 0x53). Supports blinking breathing pattern.
+
+Water Pipe LED encoded with symbol 'P' (hex code 0x50) and
+actually consists of 5 LEDs that exposed by protocol like one LED.
+Supports next patterns:
+- cascade pattern
+- inversed cascade pattern
+- bounce pattern
+- inversed bounce pattern
+
+Required properties:
+- compatible : "crane,el15203000"
+- #address-cells : must be 1
+- #size-cells : must be 0
+
+Property rules described in Documentation/devicetree/bindings/spi/spi-bus.txt
+apply. In particular, "reg" and "spi-max-frequency" properties must be given.
+
+Optional LED sub-node properties:
+- function:
+	see Documentation/devicetree/bindings/leds/common.txt
+- color:
+	see Documentation/devicetree/bindings/leds/common.txt
+
+Example
+-------
+
+#include <dt-bindings/leds/common.h>
+
+led-controller@0 {
+	compatible = "crane,el15203000";
+	reg = <0>;
+	spi-max-frequency = <50000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	/* water pipe */
+	led@50 {
+		reg = <0x50>;
+		function = "pipe";
+		color = <LED_COLOR_ID_RED>;
+	};
+
+	/* screen frame */
+	led@53 {
+		reg = <0x53>;
+		function = "screen";
+		color = <LED_COLOR_ID_RED>;
+	};
+
+	/* vending area */
+	led@56 {
+		reg = <0x56>;
+		function = "vend";
+		color = <LED_COLOR_ID_RED>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/leds/leds-gpio.txt b/Documentation/devicetree/bindings/leds/leds-gpio.txt
deleted file mode 100644
index d21281b63d38..000000000000
--- a/Documentation/devicetree/bindings/leds/leds-gpio.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-LEDs connected to GPIO lines
-
-Required properties:
-- compatible : should be "gpio-leds".
-
-Each LED is represented as a sub-node of the gpio-leds device.  Each
-node's name represents the name of the corresponding LED.
-
-LED sub-node properties:
-- gpios :  Should specify the LED's GPIO, see "gpios property" in
-  Documentation/devicetree/bindings/gpio/gpio.txt.  Active low LEDs should be
-  indicated using flags in the GPIO specifier.
-- function :  (optional)
-  see Documentation/devicetree/bindings/leds/common.txt
-- color :  (optional)
-  see Documentation/devicetree/bindings/leds/common.txt
-- label :  (optional)
-  see Documentation/devicetree/bindings/leds/common.txt (deprecated)
-- linux,default-trigger :  (optional)
-  see Documentation/devicetree/bindings/leds/common.txt
-- default-state:  (optional) The initial state of the LED.
-  see Documentation/devicetree/bindings/leds/common.txt
-- retain-state-suspended: (optional) The suspend state can be retained.Such
-  as charge-led gpio.
-- retain-state-shutdown: (optional) Retain the state of the LED on shutdown.
-  Useful in BMC systems, for example when the BMC is rebooted while the host
-  remains up.
-- panic-indicator : (optional)
-  see Documentation/devicetree/bindings/leds/common.txt
-
-Examples:
-
-#include <dt-bindings/gpio/gpio.h>
-#include <dt-bindings/leds/common.h>
-
-leds {
-	compatible = "gpio-leds";
-	led0 {
-		gpios = <&mcu_pio 0 GPIO_ACTIVE_LOW>;
-		linux,default-trigger = "disk-activity";
-		function = LED_FUNCTION_DISK;
-	};
-
-	led1 {
-		gpios = <&mcu_pio 1 GPIO_ACTIVE_HIGH>;
-		/* Keep LED on if BIOS detected hardware fault */
-		default-state = "keep";
-		function = LED_FUNCTION_FAULT;
-	};
-};
-
-run-control {
-	compatible = "gpio-leds";
-	led0 {
-		gpios = <&mpc8572 6 GPIO_ACTIVE_HIGH>;
-		color = <LED_COLOR_ID_RED>;
-		default-state = "off";
-	};
-	led1 {
-		gpios = <&mpc8572 7 GPIO_ACTIVE_HIGH>;
-		color = <LED_COLOR_ID_GREEN>;
-		default-state = "on";
-	};
-};
-
-leds {
-	compatible = "gpio-leds";
-
-	led0 {
-		gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
-		linux,default-trigger = "max8903-charger-charging";
-		retain-state-suspended;
-		function = LED_FUNCTION_CHARGE;
-	};
-};
diff --git a/Documentation/devicetree/bindings/leds/leds-gpio.yaml b/Documentation/devicetree/bindings/leds/leds-gpio.yaml
new file mode 100644
index 000000000000..0e75b185dd19
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-gpio.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/leds-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LEDs connected to GPIO lines
+
+maintainers:
+  - Jacek Anaszewski <jacek.anaszewski@gmail.com>
+  - Pavel Machek <pavel@ucw.cz>
+
+description:
+  Each LED is represented as a sub-node of the gpio-leds device.  Each
+  node's name represents the name of the corresponding LED.
+
+properties:
+  compatible:
+    const: gpio-leds
+
+patternProperties:
+  # The first form is preferred, but fall back to just 'led' anywhere in the
+  # node name to at least catch some child nodes.
+  "(^led-[0-9a-f]$|led)":
+    type: object
+
+    allOf:
+      - $ref: common.yaml#
+
+    properties:
+      gpios:
+        maxItems: 1
+
+      retain-state-suspended:
+        description:
+          The suspend state can be retained.Such as charge-led gpio.
+        type: boolean
+
+      retain-state-shutdown:
+        description:
+          Retain the state of the LED on shutdown. Useful in BMC systems, for
+          example when the BMC is rebooted while the host remains up.
+        type: boolean
+
+    required:
+      - gpios
+
+additionalProperties: false
+
+examples:
+  - |
+
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/leds/common.h>
+
+    leds {
+        compatible = "gpio-leds";
+        led-0 {
+            gpios = <&mcu_pio 0 GPIO_ACTIVE_LOW>;
+            linux,default-trigger = "disk-activity";
+            function = LED_FUNCTION_DISK;
+        };
+
+        led-1 {
+            gpios = <&mcu_pio 1 GPIO_ACTIVE_HIGH>;
+            /* Keep LED on if BIOS detected hardware fault */
+            default-state = "keep";
+            function = LED_FUNCTION_FAULT;
+        };
+    };
+
+    run-control {
+        compatible = "gpio-leds";
+        led-0 {
+            gpios = <&mpc8572 6 GPIO_ACTIVE_HIGH>;
+            color = <LED_COLOR_ID_RED>;
+            default-state = "off";
+        };
+        led-1 {
+            gpios = <&mpc8572 7 GPIO_ACTIVE_HIGH>;
+            color = <LED_COLOR_ID_GREEN>;
+            default-state = "on";
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/leds/leds-max77650.txt b/Documentation/devicetree/bindings/leds/leds-max77650.txt
deleted file mode 100644
index 3a67115cc1da..000000000000
--- a/Documentation/devicetree/bindings/leds/leds-max77650.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-LED driver for MAX77650 PMIC from Maxim Integrated.
-
-This module is part of the MAX77650 MFD device. For more details
-see Documentation/devicetree/bindings/mfd/max77650.txt.
-
-The LED controller is represented as a sub-node of the PMIC node on
-the device tree.
-
-This device has three current sinks.
-
-Required properties:
---------------------
-- compatible:		Must be "maxim,max77650-led"
-- #address-cells:	Must be <1>.
-- #size-cells:		Must be <0>.
-
-Each LED is represented as a sub-node of the LED-controller node. Up to
-three sub-nodes can be defined.
-
-Required properties of the sub-node:
-------------------------------------
-
-- reg:			Must be <0>, <1> or <2>.
-
-Optional properties of the sub-node:
-------------------------------------
-
-- label:		See Documentation/devicetree/bindings/leds/common.txt
-- linux,default-trigger: See Documentation/devicetree/bindings/leds/common.txt
-
-For more details, please refer to the generic GPIO DT binding document
-<devicetree/bindings/gpio/gpio.txt>.
-
-Example:
---------
-
-	leds {
-		compatible = "maxim,max77650-led";
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		led@0 {
-			reg = <0>;
-			label = "blue:usr0";
-		};
-
-		led@1 {
-			reg = <1>;
-			label = "red:usr1";
-			linux,default-trigger = "heartbeat";
-		};
-
-		led@2 {
-			reg = <2>;
-			label = "green:usr2";
-		};
-	};
diff --git a/Documentation/devicetree/bindings/leds/leds-max77650.yaml b/Documentation/devicetree/bindings/leds/leds-max77650.yaml
new file mode 100644
index 000000000000..8c43f1e1bf7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-max77650.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/leds-max77650.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LED driver for MAX77650 PMIC from Maxim Integrated.
+
+maintainers:
+  - Bartosz Golaszewski <bgolaszewski@baylibre.com>
+
+description: |
+  This module is part of the MAX77650 MFD device. For more details
+  see Documentation/devicetree/bindings/mfd/max77650.yaml.
+
+  The LED controller is represented as a sub-node of the PMIC node on
+  the device tree.
+
+  This device has three current sinks.
+
+properties:
+  compatible:
+    const: maxim,max77650-led
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+patternProperties:
+  "^led@[0-2]$":
+    type: object
+    description: |
+      Properties for a single LED.
+
+    properties:
+      reg:
+        description:
+          Index of the LED.
+        minimum: 0
+        maximum: 2
+
+      label: true
+
+      linux,default-trigger: true
+
+required:
+  - compatible
+  - "#address-cells"
+  - "#size-cells"
diff --git a/Documentation/devicetree/bindings/leds/trigger-source.yaml b/Documentation/devicetree/bindings/leds/trigger-source.yaml
new file mode 100644
index 000000000000..0618003e40bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/trigger-source.yaml
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/trigger-source.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Trigger source providers
+
+maintainers:
+  - Jacek Anaszewski <jacek.anaszewski@gmail.com>
+  - Pavel Machek <pavel@ucw.cz>
+
+description:
+  Each trigger source provider should be represented by a device tree node. It
+  may be e.g. a USB port or an Ethernet device.
+
+properties:
+  '#trigger-source-cells':
+    description:
+      Number of cells in a source trigger. Typically 0 for nodes of simple
+      trigger sources (e.g. a specific USB port).
+    enum: [ 0, 1 ]
+
+...
diff --git a/Documentation/devicetree/bindings/mailbox/fsl,mu.txt b/Documentation/devicetree/bindings/mailbox/fsl,mu.txt
index f3cf77eb5ab4..9c43357c5924 100644
--- a/Documentation/devicetree/bindings/mailbox/fsl,mu.txt
+++ b/Documentation/devicetree/bindings/mailbox/fsl,mu.txt
@@ -21,6 +21,8 @@ Required properties:
 		imx6sx, imx7s, imx8qxp, imx8qm.
 		The "fsl,imx6sx-mu" compatible is seen as generic and should
 		be included together with SoC specific compatible.
+		There is a version 1.0 MU on imx7ulp, use "fsl,imx7ulp-mu"
+		compatible to support it.
 - reg :		Should contain the registers location and length
 - interrupts :	Interrupt number. The interrupt specifier format depends
 		on the interrupt controller parent.
diff --git a/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml
new file mode 100644
index 000000000000..5b13d6672996
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/mailbox/st,stm32-ipcc.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: STMicroelectronics STM32 IPC controller bindings
+
+description:
+  The IPCC block provides a non blocking signaling mechanism to post and
+  retrieve messages in an atomic way between two processors.
+  It provides the signaling for N bidirectionnal channels. The number of
+  channels (N) can be read from a dedicated register.
+
+maintainers:
+  - Fabien Dessenne <fabien.dessenne@st.com>
+  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+
+properties:
+  compatible:
+    const: st,stm32mp1-ipcc
+
+  reg:
+    maxItems: 1
+
+  clocks:
+     maxItems: 1
+
+  interrupts:
+    items:
+      - description: rx channel occupied
+      - description: tx channel free
+      - description: wakeup source
+    minItems: 2
+    maxItems: 3
+
+  interrupt-names:
+    items:
+      - const: rx
+      - const: tx
+      - const: wakeup
+    minItems: 2
+    maxItems: 3
+
+  wakeup-source: true
+
+  "#mbox-cells":
+    const: 1
+
+  st,proc-id:
+    description: Processor id using the mailbox (0 or 1)
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum: [ 0, 1 ]
+
+required:
+  - compatible
+  - reg
+  - st,proc-id
+  - clocks
+  - interrupt-names
+  - "#mbox-cells"
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    ipcc: mailbox@4c001000 {
+      compatible = "st,stm32mp1-ipcc";
+      #mbox-cells = <1>;
+      reg = <0x4c001000 0x400>;
+      st,proc-id = <0>;
+      interrupts-extended = <&intc GIC_SPI 100 IRQ_TYPE_NONE>,
+      		      <&intc GIC_SPI 101 IRQ_TYPE_NONE>,
+      		      <&aiec 62 1>;
+      interrupt-names = "rx", "tx", "wakeup";
+      clocks = <&rcc_clk IPCC>;
+      wakeup-source;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/mailbox/stm32-ipcc.txt b/Documentation/devicetree/bindings/mailbox/stm32-ipcc.txt
deleted file mode 100644
index 1d2b7fee7b85..000000000000
--- a/Documentation/devicetree/bindings/mailbox/stm32-ipcc.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-* STMicroelectronics STM32 IPCC (Inter-Processor Communication Controller)
-
-The IPCC block provides a non blocking signaling mechanism to post and
-retrieve messages in an atomic way between two processors.
-It provides the signaling for N bidirectionnal channels. The number of channels
-(N) can be read from a dedicated register.
-
-Required properties:
-- compatible:   Must be "st,stm32mp1-ipcc"
-- reg:          Register address range (base address and length)
-- st,proc-id:   Processor id using the mailbox (0 or 1)
-- clocks:       Input clock
-- interrupt-names: List of names for the interrupts described by the interrupt
-                   property. Must contain the following entries:
-                   - "rx"
-                   - "tx"
-                   - "wakeup"
-- interrupts:   Interrupt specifiers for "rx channel occupied", "tx channel
-                free" and "system wakeup".
-- #mbox-cells:  Number of cells required for the mailbox specifier. Must be 1.
-                The data contained in the mbox specifier of the "mboxes"
-                property in the client node is the mailbox channel index.
-
-Optional properties:
-- wakeup-source: Flag to indicate whether this device can wake up the system
-
-
-
-Example:
-	ipcc: mailbox@4c001000 {
-		compatible = "st,stm32mp1-ipcc";
-		#mbox-cells = <1>;
-		reg = <0x4c001000 0x400>;
-		st,proc-id = <0>;
-		interrupts-extended = <&intc GIC_SPI 100 IRQ_TYPE_NONE>,
-				      <&intc GIC_SPI 101 IRQ_TYPE_NONE>,
-				      <&aiec 62 1>;
-		interrupt-names = "rx", "tx", "wakeup";
-		clocks = <&rcc_clk IPCC>;
-		wakeup-source;
-	}
-
-Client:
-	mbox_test {
-		...
-		mboxes = <&ipcc 0>, <&ipcc 1>;
-	};
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
index d3e423fcb6c2..0f6374ceaa69 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 CMOS Sensor Interface (CSI) Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 description: |-
   The Allwinner A10 and later has a CMOS Sensor Interface to retrieve
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-ir.yaml b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-ir.yaml
index 98c1bdde9a86..7838804700d6 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-ir.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-ir.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Infrared Controller Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 allOf:
   - $ref: "rc.yaml#"
@@ -60,9 +60,7 @@ required:
   - clocks
   - clock-names
 
-# FIXME: We should set it, but it would report all the generic
-# properties as additional properties.
-# additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-video-engine.yaml b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-video-engine.yaml
new file mode 100644
index 000000000000..526593c8c614
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-video-engine.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/allwinner,sun4i-a10-video-engine.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Video Engine Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-video-engine
+      - allwinner,sun5i-a13-video-engine
+      - allwinner,sun7i-a20-video-engine
+      - allwinner,sun8i-a33-video-engine
+      - allwinner,sun8i-h3-video-engine
+      - allwinner,sun50i-a64-video-engine
+      - allwinner,sun50i-h5-video-engine
+      - allwinner,sun50i-h6-video-engine
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+      - description: RAM Clock
+
+  clock-names:
+    items:
+      - const: ahb
+      - const: mod
+      - const: ram
+
+  resets:
+    maxItems: 1
+
+  allwinner,sram:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    description: Phandle to the device SRAM
+
+  memory-region:
+    description:
+      CMA pool to use for buffers allocation instead of the default
+      CMA pool.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - allwinner,sram
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/sun7i-a20-ccu.h>
+    #include <dt-bindings/reset/sun4i-a10-ccu.h>
+
+    video-codec@1c0e000 {
+        compatible = "allwinner,sun7i-a20-video-engine";
+        reg = <0x01c0e000 0x1000>;
+        interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_AHB_VE>, <&ccu CLK_VE>,
+                 <&ccu CLK_DRAM_VE>;
+        clock-names = "ahb", "mod", "ram";
+        resets = <&ccu RST_VE>;
+        allwinner,sram = <&ve_sram 1>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
new file mode 100644
index 000000000000..1fd9b5532a21
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/allwinner,sun6i-a31-csi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 CMOS Sensor Interface (CSI) Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun6i-a31-csi
+      - allwinner,sun8i-a83t-csi
+      - allwinner,sun8i-h3-csi
+      - allwinner,sun8i-v3s-csi
+      - allwinner,sun50i-a64-csi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+      - description: DRAM Clock
+
+  clock-names:
+    items:
+      - const: bus
+      - const: mod
+      - const: ram
+
+  resets:
+    maxItems: 1
+
+  # See ./video-interfaces.txt for details
+  port:
+    type: object
+
+    properties:
+      endpoint:
+        type: object
+
+        properties:
+          remote-endpoint: true
+
+          bus-width:
+            enum: [ 8, 10, 12, 16 ]
+
+          pclk-sample: true
+          hsync-active: true
+          vsync-active: true
+
+        required:
+          - bus-width
+          - remote-endpoint
+
+    required:
+      - endpoint
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/sun8i-v3s-ccu.h>
+    #include <dt-bindings/reset/sun8i-v3s-ccu.h>
+
+    csi1: csi@1cb4000 {
+        compatible = "allwinner,sun8i-v3s-csi";
+        reg = <0x01cb4000 0x1000>;
+        interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_BUS_CSI>,
+                 <&ccu CLK_CSI1_SCLK>,
+                 <&ccu CLK_DRAM_CSI>;
+        clock-names = "bus",
+                      "mod",
+                      "ram";
+        resets = <&ccu RST_BUS_CSI>;
+
+        port {
+            /* Parallel bus endpoint */
+            csi1_ep: endpoint {
+                remote-endpoint = <&adv7611_ep>;
+                bus-width = <16>;
+
+                /*
+                 * If hsync-active/vsync-active are missing,
+                 * embedded BT.656 sync is used.
+                 */
+                 hsync-active = <0>; /* Active low */
+                 vsync-active = <0>; /* Active low */
+                 pclk-sample = <1>;  /* Rising */
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml b/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml
new file mode 100644
index 000000000000..2e40f700e84f
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/allwinner,sun8i-h3-deinterlace.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner H3 Deinterlace Device Tree Bindings
+
+maintainers:
+  - Jernej Skrabec <jernej.skrabec@siol.net>
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description: |-
+  The Allwinner H3 and later has a deinterlace core used for
+  deinterlacing interlaced video content.
+
+properties:
+  compatible:
+    const: allwinner,sun8i-h3-deinterlace
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Deinterlace interface clock
+      - description: Deinterlace module clock
+      - description: Deinterlace DRAM clock
+
+  clock-names:
+    items:
+      - const: bus
+      - const: mod
+      - const: ram
+
+  resets:
+    maxItems: 1
+
+  interconnects:
+    maxItems: 1
+
+  interconnect-names:
+    const: dma-mem
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/sun8i-h3-ccu.h>
+    #include <dt-bindings/reset/sun8i-h3-ccu.h>
+
+    deinterlace: deinterlace@1400000 {
+        compatible = "allwinner,sun8i-h3-deinterlace";
+        reg = <0x01400000 0x20000>;
+        clocks = <&ccu CLK_BUS_DEINTERLACE>,
+                 <&ccu CLK_DEINTERLACE>,
+                 <&ccu CLK_DRAM_DEINTERLACE>;
+        clock-names = "bus", "mod", "ram";
+        resets = <&ccu RST_BUS_DEINTERLACE>;
+        interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+        interconnects = <&mbus 9>;
+        interconnect-names = "dma-mem";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/media/amlogic,meson-gx-ao-cec.yaml b/Documentation/devicetree/bindings/media/amlogic,meson-gx-ao-cec.yaml
new file mode 100644
index 000000000000..41197578f19a
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/amlogic,meson-gx-ao-cec.yaml
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 BayLibre, SAS
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/media/amlogic,meson-gx-ao-cec.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Amlogic Meson AO-CEC Controller
+
+maintainers:
+  - Neil Armstrong <narmstrong@baylibre.com>
+
+description: |
+  The Amlogic Meson AO-CEC module is present is Amlogic SoCs and its purpose is
+  to handle communication between HDMI connected devices over the CEC bus.
+
+properties:
+  compatible:
+    enum:
+      - amlogic,meson-gx-ao-cec # GXBB, GXL, GXM, G12A and SM1 AO_CEC_A module
+      - amlogic,meson-g12a-ao-cec # G12A AO_CEC_B module
+      - amlogic,meson-sm1-ao-cec # SM1 AO_CEC_B module
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  hdmi-phandle:
+    description: phandle to the HDMI controller
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/phandle
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - amlogic,meson-gx-ao-cec
+
+    then:
+      properties:
+        clocks:
+          items:
+            - description: AO-CEC clock
+
+        clock-names:
+          maxItems: 1
+          items:
+            - const: core
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - amlogic,meson-g12a-ao-cec
+              - amlogic,meson-sm1-ao-cec
+
+    then:
+      properties:
+        clocks:
+          items:
+            - description: AO-CEC clock generator source
+
+        clock-names:
+          maxItems: 1
+          items:
+            - const: oscin
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - hdmi-phandle
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    cec_AO: cec@100 {
+        compatible = "amlogic,meson-gx-ao-cec";
+        reg = <0x0 0x00100 0x0 0x14>;
+        interrupts = <199>;
+        clocks = <&clkc_cec>;
+        clock-names = "core";
+        hdmi-phandle = <&hdmi_tx>;
+    };
+
diff --git a/Documentation/devicetree/bindings/media/cedrus.txt b/Documentation/devicetree/bindings/media/cedrus.txt
deleted file mode 100644
index 20c82fb0c343..000000000000
--- a/Documentation/devicetree/bindings/media/cedrus.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Device-tree bindings for the VPU found in Allwinner SoCs, referred to as the
-Video Engine (VE) in Allwinner literature.
-
-The VPU can only access the first 256 MiB of DRAM, that are DMA-mapped starting
-from the DRAM base. This requires specific memory allocation and handling.
-
-Required properties:
-- compatible		: must be one of the following compatibles:
-			- "allwinner,sun4i-a10-video-engine"
-			- "allwinner,sun5i-a13-video-engine"
-			- "allwinner,sun7i-a20-video-engine"
-			- "allwinner,sun8i-a33-video-engine"
-			- "allwinner,sun8i-h3-video-engine"
-			- "allwinner,sun50i-a64-video-engine"
-			- "allwinner,sun50i-h5-video-engine"
-			- "allwinner,sun50i-h6-video-engine"
-- reg			: register base and length of VE;
-- clocks		: list of clock specifiers, corresponding to entries in
-			  the clock-names property;
-- clock-names		: should contain "ahb", "mod" and "ram" entries;
-- resets		: phandle for reset;
-- interrupts		: VE interrupt number;
-- allwinner,sram	: SRAM region to use with the VE.
-
-Optional properties:
-- memory-region		: CMA pool to use for buffers allocation instead of the
-			  default CMA pool.
-
-Example:
-
-reserved-memory {
-	#address-cells = <1>;
-	#size-cells = <1>;
-	ranges;
-
-	/* Address must be kept in the lower 256 MiBs of DRAM for VE. */
-	cma_pool: default-pool {
-		compatible = "shared-dma-pool";
-		size = <0x6000000>;
-		alloc-ranges = <0x4a000000 0x6000000>;
-		reusable;
-		linux,cma-default;
-	};
-};
-
-video-codec@1c0e000 {
-	compatible = "allwinner,sun7i-a20-video-engine";
-	reg = <0x01c0e000 0x1000>;
-
-	clocks = <&ccu CLK_AHB_VE>, <&ccu CLK_VE>,
-		 <&ccu CLK_DRAM_VE>;
-	clock-names = "ahb", "mod", "ram";
-
-	resets = <&ccu RST_VE>;
-	interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
-	allwinner,sram = <&ve_sram 1>;
-};
diff --git a/Documentation/devicetree/bindings/media/exynos-jpeg-codec.txt b/Documentation/devicetree/bindings/media/exynos-jpeg-codec.txt
index 38941db23dd2..ce9a22689e53 100644
--- a/Documentation/devicetree/bindings/media/exynos-jpeg-codec.txt
+++ b/Documentation/devicetree/bindings/media/exynos-jpeg-codec.txt
@@ -1,4 +1,4 @@
-Samsung S5P/EXYNOS SoC series JPEG codec
+Samsung S5P/Exynos SoC series JPEG codec
 
 Required properties:
 
diff --git a/Documentation/devicetree/bindings/media/exynos5-gsc.txt b/Documentation/devicetree/bindings/media/exynos5-gsc.txt
index bc963a6d305a..1872688fa408 100644
--- a/Documentation/devicetree/bindings/media/exynos5-gsc.txt
+++ b/Documentation/devicetree/bindings/media/exynos5-gsc.txt
@@ -1,6 +1,6 @@
 * Samsung Exynos5 G-Scaler device
 
-G-Scaler is used for scaling and color space conversion on EXYNOS5 SoCs.
+G-Scaler is used for scaling and color space conversion on Exynos5 SoCs.
 
 Required properties:
 - compatible: should be one of
diff --git a/Documentation/devicetree/bindings/media/i2c/ad5820.txt b/Documentation/devicetree/bindings/media/i2c/ad5820.txt
index 5940ca11c021..5764cbedf9b7 100644
--- a/Documentation/devicetree/bindings/media/i2c/ad5820.txt
+++ b/Documentation/devicetree/bindings/media/i2c/ad5820.txt
@@ -2,12 +2,20 @@
 
 Required Properties:
 
-  - compatible: Must contain "adi,ad5820"
+  - compatible: Must contain one of:
+		- "adi,ad5820"
+		- "adi,ad5821"
+		- "adi,ad5823"
 
   - reg: I2C slave address
 
   - VANA-supply: supply of voltage for VANA pin
 
+Optional properties:
+
+   - enable-gpios : GPIO spec for the XSHUTDOWN pin. The XSHUTDOWN signal is
+active low, a high level on the pin enables the device.
+
 Example:
 
        ad5820: coil@c {
@@ -15,5 +23,6 @@ Example:
                reg = <0x0c>;
 
                VANA-supply = <&vaux4>;
+               enable-gpios = <&msmgpio 26 GPIO_ACTIVE_HIGH>;
        };
 
diff --git a/Documentation/devicetree/bindings/media/i2c/imx290.txt b/Documentation/devicetree/bindings/media/i2c/imx290.txt
new file mode 100644
index 000000000000..a3cc21410f7c
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/imx290.txt
@@ -0,0 +1,57 @@
+* Sony IMX290 1/2.8-Inch CMOS Image Sensor
+
+The Sony IMX290 is a 1/2.8-Inch CMOS Solid-state image sensor with
+Square Pixel for Color Cameras. It is programmable through I2C and 4-wire
+interfaces. The sensor output is available via CMOS logic parallel SDR output,
+Low voltage LVDS DDR output and CSI-2 serial data output. The CSI-2 bus is the
+default. No bindings have been defined for the other busses.
+
+Required Properties:
+- compatible: Should be "sony,imx290"
+- reg: I2C bus address of the device
+- clocks: Reference to the xclk clock.
+- clock-names: Should be "xclk".
+- clock-frequency: Frequency of the xclk clock in Hz.
+- vdddo-supply: Sensor digital IO regulator.
+- vdda-supply: Sensor analog regulator.
+- vddd-supply: Sensor digital core regulator.
+
+Optional Properties:
+- reset-gpios: Sensor reset GPIO
+
+The imx290 device node should contain one 'port' child node with
+an 'endpoint' subnode. For further reading on port node refer to
+Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Required Properties on endpoint:
+- data-lanes: check ../video-interfaces.txt
+- link-frequencies: check ../video-interfaces.txt
+- remote-endpoint: check ../video-interfaces.txt
+
+Example:
+	&i2c1 {
+		...
+		imx290: camera-sensor@1a {
+			compatible = "sony,imx290";
+			reg = <0x1a>;
+
+			reset-gpios = <&msmgpio 35 GPIO_ACTIVE_LOW>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&camera_rear_default>;
+
+			clocks = <&gcc GCC_CAMSS_MCLK0_CLK>;
+			clock-names = "xclk";
+			clock-frequency = <37125000>;
+
+			vdddo-supply = <&camera_vdddo_1v8>;
+			vdda-supply = <&camera_vdda_2v8>;
+			vddd-supply = <&camera_vddd_1v5>;
+
+			port {
+				imx290_ep: endpoint {
+					data-lanes = <1 2 3 4>;
+					link-frequencies = /bits/ 64 <445500000>;
+					remote-endpoint = <&csiphy0_ep>;
+				};
+			};
+		};
diff --git a/Documentation/devicetree/bindings/media/i2c/nokia,smia.txt b/Documentation/devicetree/bindings/media/i2c/nokia,smia.txt
index c3c3479233c4..10ece8108081 100644
--- a/Documentation/devicetree/bindings/media/i2c/nokia,smia.txt
+++ b/Documentation/devicetree/bindings/media/i2c/nokia,smia.txt
@@ -27,8 +27,6 @@ Mandatory properties
 Optional properties
 -------------------
 
-- nokia,nvm-size: The size of the NVM, in bytes. If the size is not given,
-  the NVM contents will not be read.
 - reset-gpios: XSHUTDOWN GPIO
 - flash-leds: See ../video-interfaces.txt
 - lens-focus: See ../video-interfaces.txt
diff --git a/Documentation/devicetree/bindings/media/i2c/ov2659.txt b/Documentation/devicetree/bindings/media/i2c/ov2659.txt
index cabc7d827dfb..92989a619f29 100644
--- a/Documentation/devicetree/bindings/media/i2c/ov2659.txt
+++ b/Documentation/devicetree/bindings/media/i2c/ov2659.txt
@@ -12,6 +12,12 @@ Required Properties:
 - clock-names: should be "xvclk".
 - link-frequencies: target pixel clock frequency.
 
+Optional Properties:
+- powerdown-gpios: reference to the GPIO connected to the pwdn pin, if any.
+  Active high with internal pull down resistor.
+- reset-gpios: reference to the GPIO connected to the resetb pin, if any.
+  Active low with internal pull up resistor.
+
 For further reading on port node refer to
 Documentation/devicetree/bindings/media/video-interfaces.txt.
 
@@ -27,6 +33,9 @@ Example:
 			clocks = <&clk_ov2659 0>;
 			clock-names = "xvclk";
 
+			powerdown-gpios = <&gpio6 14 GPIO_ACTIVE_HIGH>;
+			reset-gpios = <&gpio6 15 GPIO_ACTIVE_LOW>;
+
 			port {
 				ov2659_0: endpoint {
 					remote-endpoint = <&vpfe_ep>;
diff --git a/Documentation/devicetree/bindings/media/meson-ao-cec.txt b/Documentation/devicetree/bindings/media/meson-ao-cec.txt
deleted file mode 100644
index ad92ee41c0dd..000000000000
--- a/Documentation/devicetree/bindings/media/meson-ao-cec.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-* Amlogic Meson AO-CEC driver
-
-The Amlogic Meson AO-CEC module is present is Amlogic SoCs and its purpose is
-to handle communication between HDMI connected devices over the CEC bus.
-
-Required properties:
-  - compatible : value should be following depending on the SoC :
-	For GXBB, GXL, GXM, G12A and SM1 (AO_CEC_A module) :
-	"amlogic,meson-gx-ao-cec"
-	For G12A (AO_CEC_B module) :
-	"amlogic,meson-g12a-ao-cec"
-	For SM1 (AO_CEC_B module) :
-	"amlogic,meson-sm1-ao-cec"
-
-  - reg : Physical base address of the IP registers and length of memory
-	  mapped region.
-
-  - interrupts : AO-CEC interrupt number to the CPU.
-  - clocks : from common clock binding: handle to AO-CEC clock.
-  - clock-names : from common clock binding, must contain :
-		For GXBB, GXL, GXM, G12A and SM1 (AO_CEC_A module) :
-		- "core"
-		For G12A, SM1 (AO_CEC_B module) :
-		- "oscin"
-		corresponding to entry in the clocks property.
-  - hdmi-phandle: phandle to the HDMI controller
-
-Example:
-
-cec_AO: cec@100 {
-	compatible = "amlogic,meson-gx-ao-cec";
-	reg = <0x0 0x00100 0x0 0x14>;
-	interrupts = <GIC_SPI 199 IRQ_TYPE_EDGE_RISING>;
-	clocks = <&clkc_AO CLKID_AO_CEC_32K>;
-	clock-names = "core";
-	hdmi-phandle = <&hdmi_tx>;
-};
diff --git a/Documentation/devicetree/bindings/media/rc.yaml b/Documentation/devicetree/bindings/media/rc.yaml
index 9054555e6608..d11380794ff4 100644
--- a/Documentation/devicetree/bindings/media/rc.yaml
+++ b/Documentation/devicetree/bindings/media/rc.yaml
@@ -39,6 +39,7 @@ properties:
           - rc-avermedia-rm-ks
           - rc-avertv-303
           - rc-azurewave-ad-tu700
+          - rc-beelink-gs1
           - rc-behold
           - rc-behold-columbus
           - rc-budget-ci-old
@@ -82,6 +83,7 @@ properties:
           - rc-it913x-v1
           - rc-it913x-v2
           - rc-kaiomy
+          - rc-khadas
           - rc-kworld-315u
           - rc-kworld-pc150u
           - rc-kworld-plus-tv-analog
@@ -99,6 +101,7 @@ properties:
           - rc-nec-terratec-cinergy-xs
           - rc-norwood
           - rc-npgtech
+          - rc-odroid
           - rc-pctv-sedna
           - rc-pinnacle-color
           - rc-pinnacle-grey
@@ -119,6 +122,7 @@ properties:
           - rc-streamzap
           - rc-su3000
           - rc-tango
+          - rc-tanix-tx3mini
           - rc-tbs-nec
           - rc-technisat-ts35
           - rc-technisat-usb2
@@ -138,7 +142,10 @@ properties:
           - rc-videomate-k100
           - rc-videomate-s350
           - rc-videomate-tv-pvr
+          - rc-wetek-hub
+          - rc-wetek-play2
           - rc-winfast
           - rc-winfast-usbii-deluxe
+          - rc-x96max
           - rc-xbox-dvd
           - rc-zx-irdec
diff --git a/Documentation/devicetree/bindings/media/renesas,ceu.txt b/Documentation/devicetree/bindings/media/renesas,ceu.txt
deleted file mode 100644
index 3e2a2652eb19..000000000000
--- a/Documentation/devicetree/bindings/media/renesas,ceu.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-Renesas Capture Engine Unit (CEU)
-----------------------------------------------
-
-The Capture Engine Unit is the image capture interface found in the Renesas
-SH Mobile, R-Mobile and RZ SoCs.
-
-The interface supports a single parallel input with data bus width of 8 or 16
-bits.
-
-Required properties:
-- compatible: Shall be one of the following values:
-	"renesas,r7s72100-ceu" for CEU units found in RZ/A1H and RZ/A1M SoCs
-	"renesas,r8a7740-ceu" for CEU units found in R-Mobile A1 R8A7740 SoCs
-- reg: Registers address base and size.
-- interrupts: The interrupt specifier.
-
-The CEU supports a single parallel input and should contain a single 'port'
-subnode with a single 'endpoint'. Connection to input devices are modeled
-according to the video interfaces OF bindings specified in:
-[1] Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Optional endpoint properties applicable to parallel input bus described in
-the above mentioned "video-interfaces.txt" file are supported.
-
-- hsync-active: See [1] for description. If property is not present,
-  default is active high.
-- vsync-active: See [1] for description. If property is not present,
-  default is active high.
-- bus-width: See [1] for description. Accepted values are '8' and '16'.
-  If property is not present, default is '8'.
-- field-even-active: See [1] for description. If property is not present,
-  an even field is identified by a logic 0 (active-low signal).
-
-Example:
-
-The example describes the connection between the Capture Engine Unit and an
-OV7670 image sensor connected to i2c1 interface.
-
-ceu: ceu@e8210000 {
-	reg = <0xe8210000 0x209c>;
-	compatible = "renesas,r7s72100-ceu";
-	interrupts = <GIC_SPI 332 IRQ_TYPE_LEVEL_HIGH>;
-
-	pinctrl-names = "default";
-	pinctrl-0 = <&vio_pins>;
-
-	status = "okay";
-
-	port {
-		ceu_in: endpoint {
-			remote-endpoint = <&ov7670_out>;
-
-			hsync-active = <1>;
-			vsync-active = <0>;
-		};
-	};
-};
-
-i2c1: i2c@fcfee400 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&i2c1_pins>;
-
-	status = "okay";
-
-	clock-frequency = <100000>;
-
-	ov7670: camera@21 {
-		compatible = "ovti,ov7670";
-		reg = <0x21>;
-
-		pinctrl-names = "default";
-		pinctrl-0 = <&vio_pins>;
-
-		reset-gpios = <&port3 11 GPIO_ACTIVE_LOW>;
-		powerdown-gpios = <&port3 12 GPIO_ACTIVE_HIGH>;
-
-		port {
-			ov7670_out: endpoint {
-				remote-endpoint = <&ceu_in>;
-
-				hsync-active = <1>;
-				vsync-active = <0>;
-			};
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/media/renesas,ceu.yaml b/Documentation/devicetree/bindings/media/renesas,ceu.yaml
new file mode 100644
index 000000000000..8e9251a0f9ef
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/renesas,ceu.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/renesas,ceu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Capture Engine Unit (CEU) Bindings
+
+maintainers:
+  - Jacopo Mondi <jacopo+renesas@jmondi.org>
+  - linux-renesas-soc@vger.kernel.org
+
+description: |+
+  The Capture Engine Unit is the image capture interface found in the Renesas SH
+  Mobile, R-Mobile and RZ SoCs. The interface supports a single parallel input
+  with data bus width of 8 or 16 bits.
+
+properties:
+  compatible:
+    enum:
+      - renesas,r7s72100-ceu
+      - renesas,r8a7740-ceu
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  port:
+    type: object
+    additionalProperties: false
+
+    properties:
+       endpoint:
+         type: object
+         additionalProperties: false
+
+         # Properties described in
+         # Documentation/devicetree/bindings/media/video-interfaces.txt
+         properties:
+           remote-endpoint: true
+           hsync-active: true
+           vsync-active: true
+           field-even-active: false
+           bus-width:
+             enum: [8, 16]
+             default: 8
+
+         required:
+           - remote-endpoint
+
+    required:
+      - endpoint
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - port
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    ceu: ceu@e8210000 {
+        reg = <0xe8210000 0x209c>;
+        compatible = "renesas,r7s72100-ceu";
+        interrupts = <GIC_SPI 332 IRQ_TYPE_LEVEL_HIGH>;
+
+        port {
+            ceu_in: endpoint {
+                remote-endpoint = <&ov7670_out>;
+                hsync-active = <1>;
+                vsync-active = <0>;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/media/renesas,csi2.txt b/Documentation/devicetree/bindings/media/renesas,csi2.txt
deleted file mode 100644
index 331409259752..000000000000
--- a/Documentation/devicetree/bindings/media/renesas,csi2.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-Renesas R-Car MIPI CSI-2
-------------------------
-
-The R-Car CSI-2 receiver device provides MIPI CSI-2 capabilities for the
-Renesas R-Car and RZ/G2 family of devices. It is used in conjunction with the
-R-Car VIN module, which provides the video capture capabilities.
-
-Mandatory properties
---------------------
- - compatible: Must be one or more of the following
-   - "renesas,r8a774a1-csi2" for the R8A774A1 device.
-   - "renesas,r8a774c0-csi2" for the R8A774C0 device.
-   - "renesas,r8a7795-csi2" for the R8A7795 device.
-   - "renesas,r8a7796-csi2" for the R8A7796 device.
-   - "renesas,r8a77965-csi2" for the R8A77965 device.
-   - "renesas,r8a77970-csi2" for the R8A77970 device.
-   - "renesas,r8a77980-csi2" for the R8A77980 device.
-   - "renesas,r8a77990-csi2" for the R8A77990 device.
-
- - reg: the register base and size for the device registers
- - interrupts: the interrupt for the device
- - clocks: A phandle + clock specifier for the module clock
- - resets: A phandle + reset specifier for the module reset
-
-The device node shall contain two 'port' child nodes according to the
-bindings defined in Documentation/devicetree/bindings/media/
-video-interfaces.txt. port@0 shall connect to the CSI-2 source. port@1
-shall connect to all the R-Car VIN modules that have a hardware
-connection to the CSI-2 receiver.
-
-- port@0- Video source (mandatory)
-	- endpoint@0 - sub-node describing the endpoint that is the video source
-
-- port@1 - VIN instances (optional)
-	- One endpoint sub-node for every R-Car VIN instance which is connected
-	  to the R-Car CSI-2 receiver.
-
-Example:
-
-	csi20: csi2@fea80000 {
-		compatible = "renesas,r8a7796-csi2";
-		reg = <0 0xfea80000 0 0x10000>;
-		interrupts = <0 184 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cpg CPG_MOD 714>;
-		power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
-		resets = <&cpg 714>;
-
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			port@0 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-
-				reg = <0>;
-
-				csi20_in: endpoint@0 {
-					reg = <0>;
-					clock-lanes = <0>;
-					data-lanes = <1>;
-					remote-endpoint = <&adv7482_txb>;
-				};
-			};
-
-			port@1 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-
-				reg = <1>;
-
-				csi20vin0: endpoint@0 {
-					reg = <0>;
-					remote-endpoint = <&vin0csi20>;
-				};
-				csi20vin1: endpoint@1 {
-					reg = <1>;
-					remote-endpoint = <&vin1csi20>;
-				};
-				csi20vin2: endpoint@2 {
-					reg = <2>;
-					remote-endpoint = <&vin2csi20>;
-				};
-				csi20vin3: endpoint@3 {
-					reg = <3>;
-					remote-endpoint = <&vin3csi20>;
-				};
-				csi20vin4: endpoint@4 {
-					reg = <4>;
-					remote-endpoint = <&vin4csi20>;
-				};
-				csi20vin5: endpoint@5 {
-					reg = <5>;
-					remote-endpoint = <&vin5csi20>;
-				};
-				csi20vin6: endpoint@6 {
-					reg = <6>;
-					remote-endpoint = <&vin6csi20>;
-				};
-				csi20vin7: endpoint@7 {
-					reg = <7>;
-					remote-endpoint = <&vin7csi20>;
-				};
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/media/renesas,csi2.yaml b/Documentation/devicetree/bindings/media/renesas,csi2.yaml
new file mode 100644
index 000000000000..408442a0c389
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/renesas,csi2.yaml
@@ -0,0 +1,198 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2020 Renesas Electronics Corp.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/renesas,csi2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car MIPI CSI-2 receiver
+
+maintainers:
+  - Niklas Söderlund <niklas.soderlund@ragnatech.se>
+
+description:
+  The R-Car CSI-2 receiver device provides MIPI CSI-2 capabilities for the
+  Renesas R-Car and RZ/G2 family of devices. It is used in conjunction with the
+  R-Car VIN module, which provides the video capture capabilities.
+
+properties:
+  compatible:
+    items:
+      - enum:
+        - renesas,r8a774a1-csi2 # RZ/G2M
+        - renesas,r8a774b1-csi2 # RZ/G2N
+        - renesas,r8a774c0-csi2 # RZ/G2E
+        - renesas,r8a7795-csi2  # R-Car H3
+        - renesas,r8a7796-csi2  # R-Car M3-W
+        - renesas,r8a77965-csi2 # R-Car M3-N
+        - renesas,r8a77970-csi2 # R-Car V3M
+        - renesas,r8a77980-csi2 # R-Car V3H
+        - renesas,r8a77990-csi2 # R-Car E3
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  ports:
+    type: object
+    description:
+      A node containing input and output port nodes with endpoint definitions
+      as documented in
+      Documentation/devicetree/bindings/media/video-interfaces.txt
+
+    properties:
+      port@0:
+        type: object
+        description:
+          Input port node, single endpoint describing the CSI-2 transmitter.
+
+        properties:
+          reg:
+            const: 0
+
+          endpoint:
+            type: object
+
+            properties:
+              clock-lanes:
+                maxItems: 1
+
+              data-lanes:
+                maxItems: 1
+
+              remote-endpoint: true
+
+            required:
+              - clock-lanes
+              - data-lanes
+              - remote-endpoint
+
+            additionalProperties: false
+
+        additionalProperties: false
+
+      port@1:
+        type: object
+        description:
+          Output port node, multiple endpoints describing all the R-Car VIN
+          modules connected the CSI-2 receiver.
+
+        properties:
+          '#address-cells':
+            const: 1
+
+          '#size-cells':
+            const: 0
+
+          reg:
+            const: 1
+
+        patternProperties:
+          "^endpoint@[0-9a-f]$":
+            type: object
+
+            properties:
+              reg:
+                maxItems: 1
+
+              remote-endpoint: true
+
+            required:
+              - reg
+              - remote-endpoint
+
+            additionalProperties: false
+
+        additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - power-domains
+  - resets
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7796-cpg-mssr.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/r8a7796-sysc.h>
+
+    csi20: csi2@fea80000 {
+            compatible = "renesas,r8a7796-csi2";
+            reg = <0 0xfea80000 0 0x10000>;
+            interrupts = <0 184 IRQ_TYPE_LEVEL_HIGH>;
+            clocks = <&cpg CPG_MOD 714>;
+            power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
+            resets = <&cpg 714>;
+
+            ports {
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+
+                    port@0 {
+                            reg = <0>;
+
+                            csi20_in: endpoint {
+                                    clock-lanes = <0>;
+                                    data-lanes = <1>;
+                                    remote-endpoint = <&adv7482_txb>;
+                            };
+                    };
+
+                    port@1 {
+                            #address-cells = <1>;
+                            #size-cells = <0>;
+
+                            reg = <1>;
+
+                            csi20vin0: endpoint@0 {
+                                    reg = <0>;
+                                    remote-endpoint = <&vin0csi20>;
+                            };
+                            csi20vin1: endpoint@1 {
+                                    reg = <1>;
+                                    remote-endpoint = <&vin1csi20>;
+                            };
+                            csi20vin2: endpoint@2 {
+                                    reg = <2>;
+                                    remote-endpoint = <&vin2csi20>;
+                            };
+                            csi20vin3: endpoint@3 {
+                                    reg = <3>;
+                                    remote-endpoint = <&vin3csi20>;
+                            };
+                            csi20vin4: endpoint@4 {
+                                    reg = <4>;
+                                    remote-endpoint = <&vin4csi20>;
+                            };
+                            csi20vin5: endpoint@5 {
+                                    reg = <5>;
+                                    remote-endpoint = <&vin5csi20>;
+                            };
+                            csi20vin6: endpoint@6 {
+                                    reg = <6>;
+                                    remote-endpoint = <&vin6csi20>;
+                            };
+                            csi20vin7: endpoint@7 {
+                                    reg = <7>;
+                                    remote-endpoint = <&vin7csi20>;
+                            };
+                    };
+            };
+    };
diff --git a/Documentation/devicetree/bindings/media/renesas,vin.txt b/Documentation/devicetree/bindings/media/renesas,vin.txt
index aa217b096279..e30b0d4eefdd 100644
--- a/Documentation/devicetree/bindings/media/renesas,vin.txt
+++ b/Documentation/devicetree/bindings/media/renesas,vin.txt
@@ -14,6 +14,7 @@ on Gen3 and RZ/G2 platforms to a CSI-2 receiver.
    - "renesas,vin-r8a7744" for the R8A7744 device
    - "renesas,vin-r8a7745" for the R8A7745 device
    - "renesas,vin-r8a774a1" for the R8A774A1 device
+   - "renesas,vin-r8a774b1" for the R8A774B1 device
    - "renesas,vin-r8a774c0" for the R8A774C0 device
    - "renesas,vin-r8a7778" for the R8A7778 device
    - "renesas,vin-r8a7779" for the R8A7779 device
@@ -43,7 +44,7 @@ on Gen3 and RZ/G2 platforms to a CSI-2 receiver.
 Additionally, an alias named vinX will need to be created to specify
 which video input device this is.
 
-The per-board settings Gen2 platforms:
+The per-board settings for Gen2 and RZ/G1 platforms:
 
 - port - sub-node describing a single endpoint connected to the VIN
   from external SoC pins as described in video-interfaces.txt[1].
@@ -63,7 +64,7 @@ The per-board settings Gen2 platforms:
     - data-enable-active: polarity of CLKENB signal, see [1] for
       description. Default is active high.
 
-The per-board settings Gen3 and RZ/G2 platforms:
+The per-board settings for Gen3 and RZ/G2 platforms:
 
 Gen3 and RZ/G2 platforms can support both a single connected parallel input
 source from external SoC pins (port@0) and/or multiple parallel input sources
diff --git a/Documentation/devicetree/bindings/media/samsung-fimc.txt b/Documentation/devicetree/bindings/media/samsung-fimc.txt
index 48c599dacbdf..f91b9dc80eb3 100644
--- a/Documentation/devicetree/bindings/media/samsung-fimc.txt
+++ b/Documentation/devicetree/bindings/media/samsung-fimc.txt
@@ -1,4 +1,4 @@
-Samsung S5P/EXYNOS SoC Camera Subsystem (FIMC)
+Samsung S5P/Exynos SoC Camera Subsystem (FIMC)
 ----------------------------------------------
 
 The S5P/Exynos SoC Camera subsystem comprises of multiple sub-devices
diff --git a/Documentation/devicetree/bindings/media/samsung-mipi-csis.txt b/Documentation/devicetree/bindings/media/samsung-mipi-csis.txt
index be45f0b1a449..a4149c9434ea 100644
--- a/Documentation/devicetree/bindings/media/samsung-mipi-csis.txt
+++ b/Documentation/devicetree/bindings/media/samsung-mipi-csis.txt
@@ -1,4 +1,4 @@
-Samsung S5P/EXYNOS SoC series MIPI CSI-2 receiver (MIPI CSIS)
+Samsung S5P/Exynos SoC series MIPI CSI-2 receiver (MIPI CSIS)
 -------------------------------------------------------------
 
 Required properties:
diff --git a/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt b/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt
deleted file mode 100644
index cfa4ffada8ae..000000000000
--- a/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Bindings, specific for the sh_mobile_ceu_camera.c driver:
- - compatible: Should be "renesas,sh-mobile-ceu"
- - reg: register base and size
- - interrupts: the interrupt number
- - renesas,max-width: maximum image width, supported on this SoC
- - renesas,max-height: maximum image height, supported on this SoC
-
-Example:
-
-ceu0: ceu@fe910000 {
-	compatible = "renesas,sh-mobile-ceu";
-	reg = <0xfe910000 0xa0>;
-	interrupt-parent = <&intcs>;
-	interrupts = <0x880>;
-	renesas,max-width = <8188>;
-	renesas,max-height = <8188>;
-};
diff --git a/Documentation/devicetree/bindings/media/st,stm32-cec.txt b/Documentation/devicetree/bindings/media/st,stm32-cec.txt
deleted file mode 100644
index 6be2381c180d..000000000000
--- a/Documentation/devicetree/bindings/media/st,stm32-cec.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-STMicroelectronics STM32 CEC driver
-
-Required properties:
- - compatible : value should be "st,stm32-cec"
- - reg : Physical base address of the IP registers and length of memory
-	 mapped region.
- - clocks : from common clock binding: handle to CEC clocks
- - clock-names : from common clock binding: must be "cec" and "hdmi-cec".
- - interrupts : CEC interrupt number to the CPU.
-
-Example for stm32f746:
-
-cec: cec@40006c00 {
-	compatible = "st,stm32-cec";
-	reg = <0x40006C00 0x400>;
-	interrupts = <94>;
-	clocks = <&rcc 0 STM32F7_APB1_CLOCK(CEC)>, <&rcc 1 CLK_HDMI_CEC>;
-	clock-names = "cec", "hdmi-cec";
-};
diff --git a/Documentation/devicetree/bindings/media/st,stm32-cec.yaml b/Documentation/devicetree/bindings/media/st,stm32-cec.yaml
new file mode 100644
index 000000000000..d75019c093a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/st,stm32-cec.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/st,stm32-cec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 CEC bindings
+
+maintainers:
+  - Benjamin Gaignard <benjamin.gaignard@st.com>
+  - Yannick Fertre <yannick.fertre@st.com>
+
+properties:
+  compatible:
+    const: st,stm32-cec
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Module Clock
+      - description: Bus Clock
+
+  clock-names:
+    items:
+      - const: cec
+      - const: hdmi-cec
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    cec: cec@40006c00 {
+        compatible = "st,stm32-cec";
+        reg = <0x40006c00 0x400>;
+        interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&rcc CEC_K>, <&clk_lse>;
+        clock-names = "cec", "hdmi-cec";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/media/st,stm32-dcmi.txt b/Documentation/devicetree/bindings/media/st,stm32-dcmi.txt
deleted file mode 100644
index 3122ded82eb4..000000000000
--- a/Documentation/devicetree/bindings/media/st,stm32-dcmi.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-STMicroelectronics STM32 Digital Camera Memory Interface (DCMI)
-
-Required properties:
-- compatible: "st,stm32-dcmi"
-- reg: physical base address and length of the registers set for the device
-- interrupts: should contain IRQ line for the DCMI
-- resets: reference to a reset controller,
-          see Documentation/devicetree/bindings/reset/st,stm32-rcc.txt
-- clocks: list of clock specifiers, corresponding to entries in
-          the clock-names property
-- clock-names: must contain "mclk", which is the DCMI peripherial clock
-- pinctrl: the pincontrol settings to configure muxing properly
-           for pins that connect to DCMI device.
-           See Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml.
-- dmas: phandle to DMA controller node,
-        see Documentation/devicetree/bindings/dma/stm32-dma.txt
-- dma-names: must contain "tx", which is the transmit channel from DCMI to DMA
-
-DCMI supports a single port node with parallel bus. It should contain one
-'port' child node with child 'endpoint' node. Please refer to the bindings
-defined in Documentation/devicetree/bindings/media/video-interfaces.txt.
-
-Example:
-
-	dcmi: dcmi@50050000 {
-		compatible = "st,stm32-dcmi";
-		reg = <0x50050000 0x400>;
-		interrupts = <78>;
-		resets = <&rcc STM32F4_AHB2_RESET(DCMI)>;
-		clocks = <&rcc 0 STM32F4_AHB2_CLOCK(DCMI)>;
-		clock-names = "mclk";
-		pinctrl-names = "default";
-		pinctrl-0 = <&dcmi_pins>;
-		dmas = <&dma2 1 1 0x414 0x3>;
-		dma-names = "tx";
-		port {
-			dcmi_0: endpoint {
-				remote-endpoint = <...>;
-				bus-width = <8>;
-				hsync-active = <0>;
-				vsync-active = <0>;
-				pclk-sample = <1>;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml b/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml
new file mode 100644
index 000000000000..3fe778cb5cc3
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/st,stm32-dcmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Digital Camera Memory Interface (DCMI) binding
+
+maintainers:
+  - Hugues Fruchet <hugues.fruchet@st.com>
+
+properties:
+  compatible:
+    const: st,stm32-dcmi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: mclk
+
+  dmas:
+    maxItems: 1
+
+  dma-names:
+    items:
+      - const: tx
+
+  resets:
+    maxItems: 1
+
+  port:
+    type: object
+    description:
+      DCMI supports a single port node with parallel bus. It should contain
+      one 'port' child node with child 'endpoint' node. Please refer to the
+      bindings defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - dmas
+  - dma-names
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    dcmi: dcmi@4c006000 {
+        compatible = "st,stm32-dcmi";
+        reg = <0x4c006000 0x400>;
+        interrupts = <GIC_SPI 78 IRQ_TYPE_LEVEL_HIGH>;
+        resets = <&rcc CAMITF_R>;
+        clocks = <&rcc DCMI>;
+        clock-names = "mclk";
+        dmas = <&dmamux1 75 0x400 0x0d>;
+        dma-names = "tx";
+
+        port {
+             dcmi_0: endpoint {
+                   remote-endpoint = <&ov5640_0>;
+                   bus-width = <8>;
+                   hsync-active = <0>;
+                   vsync-active = <0>;
+                   pclk-sample = <1>;
+             };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/media/sun6i-csi.txt b/Documentation/devicetree/bindings/media/sun6i-csi.txt
deleted file mode 100644
index a2e3e56f0257..000000000000
--- a/Documentation/devicetree/bindings/media/sun6i-csi.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-Allwinner V3s Camera Sensor Interface
--------------------------------------
-
-Allwinner V3s SoC features a CSI module(CSI1) with parallel interface.
-
-Required properties:
-  - compatible: value must be one of:
-    * "allwinner,sun6i-a31-csi"
-    * "allwinner,sun8i-a83t-csi"
-    * "allwinner,sun8i-h3-csi"
-    * "allwinner,sun8i-v3s-csi"
-    * "allwinner,sun50i-a64-csi"
-  - reg: base address and size of the memory-mapped region.
-  - interrupts: interrupt associated to this IP
-  - clocks: phandles to the clocks feeding the CSI
-    * bus: the CSI interface clock
-    * mod: the CSI module clock
-    * ram: the CSI DRAM clock
-  - clock-names: the clock names mentioned above
-  - resets: phandles to the reset line driving the CSI
-
-The CSI node should contain one 'port' child node with one child 'endpoint'
-node, according to the bindings defined in
-Documentation/devicetree/bindings/media/video-interfaces.txt.
-
-Endpoint node properties for CSI
----------------------------------
-See the video-interfaces.txt for a detailed description of these properties.
-- remote-endpoint	: (required) a phandle to the bus receiver's endpoint
-			   node
-- bus-width:		: (required) must be 8, 10, 12 or 16
-- pclk-sample		: (optional) (default: sample on falling edge)
-- hsync-active		: (required; parallel-only)
-- vsync-active		: (required; parallel-only)
-
-Example:
-
-csi1: csi@1cb4000 {
-	compatible = "allwinner,sun8i-v3s-csi";
-	reg = <0x01cb4000 0x1000>;
-	interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
-	clocks = <&ccu CLK_BUS_CSI>,
-		 <&ccu CLK_CSI1_SCLK>,
-		 <&ccu CLK_DRAM_CSI>;
-	clock-names = "bus", "mod", "ram";
-	resets = <&ccu RST_BUS_CSI>;
-
-	port {
-		/* Parallel bus endpoint */
-		csi1_ep: endpoint {
-			remote-endpoint = <&adv7611_ep>;
-			bus-width = <16>;
-
-			/* If hsync-active/vsync-active are missing,
-			   embedded BT.656 sync is used */
-			hsync-active = <0>; /* Active low */
-			vsync-active = <0>; /* Active low */
-			pclk-sample = <1>;  /* Rising */
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/media/ti,vpe.yaml b/Documentation/devicetree/bindings/media/ti,vpe.yaml
new file mode 100644
index 000000000000..f3a8a350e85f
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/ti,vpe.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/ti,vpe.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments DRA7x Video Processing Engine (VPE) Device Tree Bindings
+
+maintainers:
+  - Benoit Parrot <bparrot@ti.com>
+
+description: |-
+  The Video Processing Engine (VPE) is a key component for image post
+  processing applications. VPE consist of a single memory to memory
+  path which can perform chroma up/down sampling, deinterlacing,
+  scaling and color space conversion.
+
+properties:
+  compatible:
+      const: ti,dra7-vpe
+
+  reg:
+    items:
+      - description: The VPE main register region
+      - description: Scaler (SC) register region
+      - description: Color Space Conversion (CSC) register region
+      - description: Video Port Direct Memory Access (VPDMA) register region
+
+  reg-names:
+    items:
+      - const: vpe_top
+      - const: sc
+      - const: csc
+      - const: vpdma
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    vpe: vpe@489d0000 {
+        compatible = "ti,dra7-vpe";
+        reg = <0x489d0000 0x120>,
+              <0x489d0700 0x80>,
+              <0x489d5700 0x18>,
+              <0x489dd000 0x400>;
+        reg-names = "vpe_top",
+                    "sc",
+                    "csc",
+                    "vpdma";
+        interrupts = <GIC_SPI 354 IRQ_TYPE_LEVEL_HIGH>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/memory-controllers/exynos-srom.txt b/Documentation/devicetree/bindings/memory-controllers/exynos-srom.txt
deleted file mode 100644
index f633b5d0f8ca..000000000000
--- a/Documentation/devicetree/bindings/memory-controllers/exynos-srom.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-SAMSUNG Exynos SoCs SROM Controller driver.
-
-Required properties:
-- compatible : Should contain "samsung,exynos4210-srom".
-
-- reg: offset and length of the register set
-
-Optional properties:
-The SROM controller can be used to attach external peripherals. In this case
-extra properties, describing the bus behind it, should be specified as below:
-
-- #address-cells: Must be set to 2 to allow device address translation.
-		  Address is specified as (bank#, offset).
-
-- #size-cells: Must be set to 1 to allow device size passing
-
-- ranges: Must be set up to reflect the memory layout with four integer values
-	  per bank:
-		<bank-number> 0 <parent address of bank> <size>
-
-Sub-nodes:
-The actual device nodes should be added as subnodes to the SROMc node. These
-subnodes, in addition to regular device specification, should contain the following
-properties, describing configuration of the relevant SROM bank:
-
-Required properties:
-- reg: bank number, base address (relative to start of the bank) and size of
-       the memory mapped for the device. Note that base address will be
-       typically 0 as this is the start of the bank.
-
-- samsung,srom-timing : array of 6 integers, specifying bank timings in the
-                        following order: Tacp, Tcah, Tcoh, Tacc, Tcos, Tacs.
-                        Each value is specified in cycles and has the following
-                        meaning and valid range:
-                        Tacp : Page mode access cycle at Page mode (0 - 15)
-                        Tcah : Address holding time after CSn (0 - 15)
-                        Tcoh : Chip selection hold on OEn (0 - 15)
-                        Tacc : Access cycle (0 - 31, the actual time is N + 1)
-                        Tcos : Chip selection set-up before OEn (0 - 15)
-                        Tacs : Address set-up before CSn (0 - 15)
-
-Optional properties:
-- reg-io-width : data width in bytes (1 or 2). If omitted, default of 1 is used.
-
-- samsung,srom-page-mode : if page mode is set, 4 data page mode will be configured,
-			   else normal (1 data) page mode will be set.
-
-Example: basic definition, no banks are configured
-	memory-controller@12570000 {
-		compatible = "samsung,exynos4210-srom";
-		reg = <0x12570000 0x14>;
-	};
-
-Example: SROMc with SMSC911x ethernet chip on bank 3
-	memory-controller@12570000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		ranges = <0 0 0x04000000 0x20000   // Bank0
-			  1 0 0x05000000 0x20000   // Bank1
-			  2 0 0x06000000 0x20000   // Bank2
-			  3 0 0x07000000 0x20000>; // Bank3
-
-		compatible = "samsung,exynos4210-srom";
-		reg = <0x12570000 0x14>;
-
-		ethernet@3,0 {
-			compatible = "smsc,lan9115";
-			reg = <3 0 0x10000>;	   // Bank 3, offset = 0
-			phy-mode = "mii";
-			interrupt-parent = <&gpx0>;
-			interrupts = <5 8>;
-			reg-io-width = <2>;
-			smsc,irq-push-pull;
-			smsc,force-internal-phy;
-
-			samsung,srom-page-mode;
-			samsung,srom-timing = <9 12 1 9 1 1>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/memory-controllers/exynos-srom.yaml b/Documentation/devicetree/bindings/memory-controllers/exynos-srom.yaml
new file mode 100644
index 000000000000..cdfe3f7f0ea9
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/exynos-srom.yaml
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/exynos-srom.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC SROM Controller driver
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |+
+  The SROM controller can be used to attach external peripherals. In this case
+  extra properties, describing the bus behind it, should be specified.
+
+properties:
+  compatible:
+    items:
+      - const: samsung,exynos4210-srom
+
+  reg:
+    maxItems: 1
+
+  "#address-cells":
+    const: 2
+
+  "#size-cells":
+    const: 1
+
+  ranges:
+    description: |
+      Reflects the memory layout with four integer values per bank. Format:
+      <bank-number> 0 <parent address of bank> <size>
+      Up to four banks are supported.
+
+patternProperties:
+  "^.*@[0-3],[a-f0-9]+$":
+    type: object
+    description:
+      The actual device nodes should be added as subnodes to the SROMc node.
+      These subnodes, in addition to regular device specification, should
+      contain the following properties, describing configuration
+      of the relevant SROM bank.
+
+    properties:
+      reg:
+        description:
+          Bank number, base address (relative to start of the bank) and size
+          of the memory mapped for the device. Note that base address will be
+          typically 0 as this is the start of the bank.
+        maxItems: 1
+
+      reg-io-width:
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+          - enum: [1, 2]
+        description:
+          Data width in bytes (1 or 2). If omitted, default of 1 is used.
+
+      samsung,srom-page-mode:
+        description:
+          If page mode is set, 4 data page mode will be configured,
+          else normal (1 data) page mode will be set.
+        type: boolean
+
+      samsung,srom-timing:
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32-array
+          - items:
+              minItems: 6
+              maxItems: 6
+        description: |
+          Array of 6 integers, specifying bank timings in the following order:
+          Tacp, Tcah, Tcoh, Tacc, Tcos, Tacs.
+          Each value is specified in cycles and has the following meaning
+          and valid range:
+          Tacp: Page mode access cycle at Page mode (0 - 15)
+          Tcah: Address holding time after CSn (0 - 15)
+          Tcoh: Chip selection hold on OEn (0 - 15)
+          Tacc: Access cycle (0 - 31, the actual time is N + 1)
+          Tcos: Chip selection set-up before OEn (0 - 15)
+          Tacs: Address set-up before CSn (0 - 15)
+
+    required:
+      - reg
+      - samsung,srom-timing
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    // Example: basic definition, no banks are configured
+    memory-controller@12560000 {
+        compatible = "samsung,exynos4210-srom";
+        reg = <0x12560000 0x14>;
+    };
+
+  - |
+    // Example: SROMc with SMSC911x ethernet chip on bank 3
+    memory-controller@12570000 {
+        #address-cells = <2>;
+        #size-cells = <1>;
+        ranges = <0 0 0x04000000 0x20000   // Bank0
+                  1 0 0x05000000 0x20000   // Bank1
+                  2 0 0x06000000 0x20000   // Bank2
+                  3 0 0x07000000 0x20000>; // Bank3
+
+        compatible = "samsung,exynos4210-srom";
+        reg = <0x12570000 0x14>;
+
+        ethernet@3,0 {
+            compatible = "smsc,lan9115";
+            reg = <3 0 0x10000>;     // Bank 3, offset = 0
+            phy-mode = "mii";
+            interrupt-parent = <&gpx0>;
+            interrupts = <5 8>;
+            reg-io-width = <2>;
+            smsc,irq-push-pull;
+            smsc,force-internal-phy;
+
+            samsung,srom-page-mode;
+            samsung,srom-timing = <9 12 1 9 1 1>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/memory-controllers/exynos5422-dmc.txt b/Documentation/devicetree/bindings/memory-controllers/exynos5422-dmc.txt
new file mode 100644
index 000000000000..02e4a1f862f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/exynos5422-dmc.txt
@@ -0,0 +1,84 @@
+* Exynos5422 frequency and voltage scaling for Dynamic Memory Controller device
+
+The Samsung Exynos5422 SoC has DMC (Dynamic Memory Controller) to which the DRAM
+memory chips are connected. The driver is to monitor the controller in runtime
+and switch frequency and voltage. To monitor the usage of the controller in
+runtime, the driver uses the PPMU (Platform Performance Monitoring Unit), which
+is able to measure the current load of the memory.
+When 'userspace' governor is used for the driver, an application is able to
+switch the DMC and memory frequency.
+
+Required properties for DMC device for Exynos5422:
+- compatible: Should be "samsung,exynos5422-dmc".
+- clocks : list of clock specifiers, must contain an entry for each
+  required entry in clock-names for CLK_FOUT_SPLL, CLK_MOUT_SCLK_SPLL,
+  CLK_FF_DOUT_SPLL2, CLK_FOUT_BPLL, CLK_MOUT_BPLL, CLK_SCLK_BPLL,
+  CLK_MOUT_MX_MSPLL_CCORE, CLK_MOUT_MX_MSPLL_CCORE_PHY, CLK_MOUT_MCLK_CDREX,
+- clock-names : should include "fout_spll", "mout_sclk_spll", "ff_dout_spll2",
+  "fout_bpll", "mout_bpll", "sclk_bpll", "mout_mx_mspll_ccore",
+  "mout_mclk_cdrex"  entries
+- devfreq-events : phandles for PPMU devices connected to this DMC.
+- vdd-supply : phandle for voltage regulator which is connected.
+- reg : registers of two CDREX controllers.
+- operating-points-v2 : phandle for OPPs described in v2 definition.
+- device-handle : phandle of the connected DRAM memory device. For more
+	information please refer to documentation file:
+	Documentation/devicetree/bindings/ddr/lpddr3.txt
+- devfreq-events : phandles of the PPMU events used by the controller.
+- samsung,syscon-clk : phandle of the clock register set used by the controller,
+	these registers are used for enabling a 'pause' feature and are not
+	exposed by clock framework but they must be used in a safe way.
+	The register offsets are in the driver code and specyfic for this SoC
+	type.
+
+Optional properties for DMC device for Exynos5422:
+- interrupt-parent : The parent interrupt controller.
+- interrupts : Contains the IRQ line numbers for the DMC internal performance
+  event counters in DREX0 and DREX1 channels. Align with specification of the
+  interrupt line(s) in the interrupt-parent controller.
+- interrupt-names : IRQ names "drex_0" and "drex_1", the order should be the
+  same as in the 'interrupts' list above.
+
+Example:
+
+	ppmu_dmc0_0: ppmu@10d00000 {
+		compatible = "samsung,exynos-ppmu";
+		reg = <0x10d00000 0x2000>;
+		clocks = <&clock CLK_PCLK_PPMU_DREX0_0>;
+		clock-names = "ppmu";
+		events {
+			ppmu_event_dmc0_0: ppmu-event3-dmc0_0 {
+				event-name = "ppmu-event3-dmc0_0";
+			};
+		};
+	};
+
+	dmc: memory-controller@10c20000 {
+		compatible = "samsung,exynos5422-dmc";
+		reg = <0x10c20000 0x10000>, <0x10c30000 0x10000>;
+		clocks = <&clock CLK_FOUT_SPLL>,
+			 <&clock CLK_MOUT_SCLK_SPLL>,
+			 <&clock CLK_FF_DOUT_SPLL2>,
+			 <&clock CLK_FOUT_BPLL>,
+			 <&clock CLK_MOUT_BPLL>,
+			 <&clock CLK_SCLK_BPLL>,
+			 <&clock CLK_MOUT_MX_MSPLL_CCORE>,
+			 <&clock CLK_MOUT_MCLK_CDREX>;
+		clock-names = "fout_spll",
+			      "mout_sclk_spll",
+			      "ff_dout_spll2",
+			      "fout_bpll",
+			      "mout_bpll",
+			      "sclk_bpll",
+			      "mout_mx_mspll_ccore",
+			      "mout_mclk_cdrex";
+		operating-points-v2 = <&dmc_opp_table>;
+		devfreq-events = <&ppmu_event3_dmc0_0>,	<&ppmu_event3_dmc0_1>,
+				 <&ppmu_event3_dmc1_0>, <&ppmu_event3_dmc1_1>;
+		device-handle = <&samsung_K3QF2F20DB>;
+		vdd-supply = <&buck1_reg>;
+		samsung,syscon-clk = <&clock>;
+		interrupt-parent = <&combiner>;
+		interrupts = <16 0>, <16 1>;
+		interrupt-names = "drex_0", "drex_1";
+	};
diff --git a/Documentation/devicetree/bindings/memory-controllers/fsl/imx8m-ddrc.yaml b/Documentation/devicetree/bindings/memory-controllers/fsl/imx8m-ddrc.yaml
new file mode 100644
index 000000000000..c9e6c22cb5be
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/fsl/imx8m-ddrc.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/fsl/imx8m-ddrc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: i.MX8M DDR Controller
+
+maintainers:
+  - Leonard Crestez <leonard.crestez@nxp.com>
+
+description:
+  The DDRC block is integrated in i.MX8M for interfacing with DDR based
+  memories.
+
+  It supports switching between different frequencies at runtime but during
+  this process RAM itself becomes briefly inaccessible so actual frequency
+  switching is implemented by TF-A code which runs from a SRAM area.
+
+  The Linux driver for the DDRC doesn't even map registers (they're included
+  for the sake of "describing hardware"), it mostly just exposes firmware
+  capabilities through standard Linux mechanism like devfreq and OPP tables.
+
+properties:
+  compatible:
+    items:
+      - enum:
+        - fsl,imx8mn-ddrc
+        - fsl,imx8mm-ddrc
+        - fsl,imx8mq-ddrc
+      - const: fsl,imx8m-ddrc
+
+  reg:
+    maxItems: 1
+    description:
+      Base address and size of DDRC CTL area.
+      This is not currently mapped by the imx8m-ddrc driver.
+
+  clocks:
+    maxItems: 4
+
+  clock-names:
+    items:
+      - const: core
+      - const: pll
+      - const: alt
+      - const: apb
+
+  operating-points-v2: true
+  opp-table: true
+
+required:
+  - reg
+  - compatible
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/imx8mm-clock.h>
+    ddrc: memory-controller@3d400000 {
+        compatible = "fsl,imx8mm-ddrc", "fsl,imx8m-ddrc";
+        reg = <0x3d400000 0x400000>;
+        clock-names = "core", "pll", "alt", "apb";
+        clocks = <&clk IMX8MM_CLK_DRAM_CORE>,
+                 <&clk IMX8MM_DRAM_PLL>,
+                 <&clk IMX8MM_CLK_DRAM_ALT>,
+                 <&clk IMX8MM_CLK_DRAM_APB>;
+        operating-points-v2 = <&ddrc_opp_table>;
+    };
diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-mc.yaml b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-mc.yaml
new file mode 100644
index 000000000000..22a94b6fdbde
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-mc.yaml
@@ -0,0 +1,153 @@
+# SPDX-License-Identifier: (GPL-2.0)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/nvidia,tegra124-mc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra124 SoC Memory Controller
+
+maintainers:
+  - Jon Hunter <jonathanh@nvidia.com>
+  - Thierry Reding <thierry.reding@gmail.com>
+
+description: |
+  Tegra124 SoC features a hybrid 2x32-bit / 1x64-bit memory controller.
+  These are interleaved to provide high performance with the load shared across
+  two memory channels. The Tegra124 Memory Controller handles memory requests
+  from internal clients and arbitrates among them to allocate memory bandwidth
+  for DDR3L and LPDDR3 SDRAMs.
+
+properties:
+  compatible:
+    const: nvidia,tegra124-mc
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: mc
+
+  interrupts:
+    maxItems: 1
+
+  "#reset-cells":
+    const: 1
+
+  "#iommu-cells":
+    const: 1
+
+patternProperties:
+  "^emc-timings-[0-9]+$":
+    type: object
+    properties:
+      nvidia,ram-code:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description:
+          Value of RAM_CODE this timing set is used for.
+
+    patternProperties:
+      "^timing-[0-9]+$":
+        type: object
+        properties:
+          clock-frequency:
+            description:
+              Memory clock rate in Hz.
+            minimum: 1000000
+            maximum: 1066000000
+
+          nvidia,emem-configuration:
+            allOf:
+              - $ref: /schemas/types.yaml#/definitions/uint32-array
+            description: |
+              Values to be written to the EMEM register block. See section
+              "15.6.1 MC Registers" in the TRM.
+            items:
+              - description: MC_EMEM_ARB_CFG
+              - description: MC_EMEM_ARB_OUTSTANDING_REQ
+              - description: MC_EMEM_ARB_TIMING_RCD
+              - description: MC_EMEM_ARB_TIMING_RP
+              - description: MC_EMEM_ARB_TIMING_RC
+              - description: MC_EMEM_ARB_TIMING_RAS
+              - description: MC_EMEM_ARB_TIMING_FAW
+              - description: MC_EMEM_ARB_TIMING_RRD
+              - description: MC_EMEM_ARB_TIMING_RAP2PRE
+              - description: MC_EMEM_ARB_TIMING_WAP2PRE
+              - description: MC_EMEM_ARB_TIMING_R2R
+              - description: MC_EMEM_ARB_TIMING_W2W
+              - description: MC_EMEM_ARB_TIMING_R2W
+              - description: MC_EMEM_ARB_TIMING_W2R
+              - description: MC_EMEM_ARB_DA_TURNS
+              - description: MC_EMEM_ARB_DA_COVERS
+              - description: MC_EMEM_ARB_MISC0
+              - description: MC_EMEM_ARB_MISC1
+              - description: MC_EMEM_ARB_RING1_THROTTLE
+
+        required:
+          - clock-frequency
+          - nvidia,emem-configuration
+
+        additionalProperties: false
+
+    required:
+      - nvidia,ram-code
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - "#reset-cells"
+  - "#iommu-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    memory-controller@70019000 {
+        compatible = "nvidia,tegra124-mc";
+        reg = <0x0 0x70019000 0x0 0x1000>;
+        clocks = <&tegra_car 32>;
+        clock-names = "mc";
+
+        interrupts = <0 77 4>;
+
+        #iommu-cells = <1>;
+        #reset-cells = <1>;
+
+        emc-timings-3 {
+            nvidia,ram-code = <3>;
+
+            timing-12750000 {
+                clock-frequency = <12750000>;
+
+                nvidia,emem-configuration = <
+                    0x40040001 /* MC_EMEM_ARB_CFG */
+                    0x8000000a /* MC_EMEM_ARB_OUTSTANDING_REQ */
+                    0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+                    0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+                    0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+                    0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+                    0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+                    0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+                    0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+                    0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+                    0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+                    0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+                    0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+                    0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+                    0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+                    0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
+                    0x77e30303 /* MC_EMEM_ARB_MISC0 */
+                    0x70000f03 /* MC_EMEM_ARB_MISC1 */
+                    0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+                >;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-emc.yaml b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-emc.yaml
new file mode 100644
index 000000000000..e4135bac6957
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-emc.yaml
@@ -0,0 +1,339 @@
+# SPDX-License-Identifier: (GPL-2.0)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/nvidia,tegra30-emc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra30 SoC External Memory Controller
+
+maintainers:
+  - Dmitry Osipenko <digetx@gmail.com>
+  - Jon Hunter <jonathanh@nvidia.com>
+  - Thierry Reding <thierry.reding@gmail.com>
+
+description: |
+  The EMC interfaces with the off-chip SDRAM to service the request stream
+  sent from Memory Controller. The EMC also has various performance-affecting
+  settings beyond the obvious SDRAM configuration parameters and initialization
+  settings. Tegra30 EMC supports multiple JEDEC standard protocols: LPDDR2,
+  LPDDR3, and DDR3.
+
+properties:
+  compatible:
+    const: nvidia,tegra30-emc
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  nvidia,memory-controller:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle of the Memory Controller node.
+
+patternProperties:
+  "^emc-timings-[0-9]+$":
+    type: object
+    properties:
+      nvidia,ram-code:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description:
+          Value of RAM_CODE this timing set is used for.
+
+    patternProperties:
+      "^timing-[0-9]+$":
+        type: object
+        properties:
+          clock-frequency:
+            description:
+              Memory clock rate in Hz.
+            minimum: 1000000
+            maximum: 900000000
+
+          nvidia,emc-auto-cal-interval:
+            allOf:
+              - $ref: /schemas/types.yaml#/definitions/uint32
+            description:
+              Pad calibration interval in microseconds.
+            minimum: 0
+            maximum: 2097151
+
+          nvidia,emc-mode-1:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            description:
+              Mode Register 1.
+
+          nvidia,emc-mode-2:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            description:
+              Mode Register 2.
+
+          nvidia,emc-mode-reset:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            description:
+              Mode Register 0.
+
+          nvidia,emc-zcal-cnt-long:
+            allOf:
+              - $ref: /schemas/types.yaml#/definitions/uint32
+            description:
+              Number of EMC clocks to wait before issuing any commands after
+              sending ZCAL_MRW_CMD.
+            minimum: 0
+            maximum: 1023
+
+          nvidia,emc-cfg-dyn-self-ref:
+            type: boolean
+            description:
+              Dynamic self-refresh enabled.
+
+          nvidia,emc-cfg-periodic-qrst:
+            type: boolean
+            description:
+              FBIO "read" FIFO periodic resetting enabled.
+
+          nvidia,emc-configuration:
+            allOf:
+              - $ref: /schemas/types.yaml#/definitions/uint32-array
+            description:
+              EMC timing characterization data. These are the registers
+              (see section "18.13.2 EMC Registers" in the TRM) whose values
+              need to be specified, according to the board documentation.
+            items:
+              - description: EMC_RC
+              - description: EMC_RFC
+              - description: EMC_RAS
+              - description: EMC_RP
+              - description: EMC_R2W
+              - description: EMC_W2R
+              - description: EMC_R2P
+              - description: EMC_W2P
+              - description: EMC_RD_RCD
+              - description: EMC_WR_RCD
+              - description: EMC_RRD
+              - description: EMC_REXT
+              - description: EMC_WEXT
+              - description: EMC_WDV
+              - description: EMC_QUSE
+              - description: EMC_QRST
+              - description: EMC_QSAFE
+              - description: EMC_RDV
+              - description: EMC_REFRESH
+              - description: EMC_BURST_REFRESH_NUM
+              - description: EMC_PRE_REFRESH_REQ_CNT
+              - description: EMC_PDEX2WR
+              - description: EMC_PDEX2RD
+              - description: EMC_PCHG2PDEN
+              - description: EMC_ACT2PDEN
+              - description: EMC_AR2PDEN
+              - description: EMC_RW2PDEN
+              - description: EMC_TXSR
+              - description: EMC_TXSRDLL
+              - description: EMC_TCKE
+              - description: EMC_TFAW
+              - description: EMC_TRPAB
+              - description: EMC_TCLKSTABLE
+              - description: EMC_TCLKSTOP
+              - description: EMC_TREFBW
+              - description: EMC_QUSE_EXTRA
+              - description: EMC_FBIO_CFG6
+              - description: EMC_ODT_WRITE
+              - description: EMC_ODT_READ
+              - description: EMC_FBIO_CFG5
+              - description: EMC_CFG_DIG_DLL
+              - description: EMC_CFG_DIG_DLL_PERIOD
+              - description: EMC_DLL_XFORM_DQS0
+              - description: EMC_DLL_XFORM_DQS1
+              - description: EMC_DLL_XFORM_DQS2
+              - description: EMC_DLL_XFORM_DQS3
+              - description: EMC_DLL_XFORM_DQS4
+              - description: EMC_DLL_XFORM_DQS5
+              - description: EMC_DLL_XFORM_DQS6
+              - description: EMC_DLL_XFORM_DQS7
+              - description: EMC_DLL_XFORM_QUSE0
+              - description: EMC_DLL_XFORM_QUSE1
+              - description: EMC_DLL_XFORM_QUSE2
+              - description: EMC_DLL_XFORM_QUSE3
+              - description: EMC_DLL_XFORM_QUSE4
+              - description: EMC_DLL_XFORM_QUSE5
+              - description: EMC_DLL_XFORM_QUSE6
+              - description: EMC_DLL_XFORM_QUSE7
+              - description: EMC_DLI_TRIM_TXDQS0
+              - description: EMC_DLI_TRIM_TXDQS1
+              - description: EMC_DLI_TRIM_TXDQS2
+              - description: EMC_DLI_TRIM_TXDQS3
+              - description: EMC_DLI_TRIM_TXDQS4
+              - description: EMC_DLI_TRIM_TXDQS5
+              - description: EMC_DLI_TRIM_TXDQS6
+              - description: EMC_DLI_TRIM_TXDQS7
+              - description: EMC_DLL_XFORM_DQ0
+              - description: EMC_DLL_XFORM_DQ1
+              - description: EMC_DLL_XFORM_DQ2
+              - description: EMC_DLL_XFORM_DQ3
+              - description: EMC_XM2CMDPADCTRL
+              - description: EMC_XM2DQSPADCTRL2
+              - description: EMC_XM2DQPADCTRL2
+              - description: EMC_XM2CLKPADCTRL
+              - description: EMC_XM2COMPPADCTRL
+              - description: EMC_XM2VTTGENPADCTRL
+              - description: EMC_XM2VTTGENPADCTRL2
+              - description: EMC_XM2QUSEPADCTRL
+              - description: EMC_XM2DQSPADCTRL3
+              - description: EMC_CTT_TERM_CTRL
+              - description: EMC_ZCAL_INTERVAL
+              - description: EMC_ZCAL_WAIT_CNT
+              - description: EMC_MRS_WAIT_CNT
+              - description: EMC_AUTO_CAL_CONFIG
+              - description: EMC_CTT
+              - description: EMC_CTT_DURATION
+              - description: EMC_DYN_SELF_REF_CONTROL
+              - description: EMC_FBIO_SPARE
+              - description: EMC_CFG_RSV
+
+        required:
+          - clock-frequency
+          - nvidia,emc-auto-cal-interval
+          - nvidia,emc-mode-1
+          - nvidia,emc-mode-2
+          - nvidia,emc-mode-reset
+          - nvidia,emc-zcal-cnt-long
+          - nvidia,emc-configuration
+
+        additionalProperties: false
+
+    required:
+      - nvidia,ram-code
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - nvidia,memory-controller
+
+additionalProperties: false
+
+examples:
+  - |
+    external-memory-controller@7000f400 {
+        compatible = "nvidia,tegra30-emc";
+        reg = <0x7000f400 0x400>;
+        interrupts = <0 78 4>;
+        clocks = <&tegra_car 57>;
+
+        nvidia,memory-controller = <&mc>;
+
+        emc-timings-1 {
+            nvidia,ram-code = <1>;
+
+            timing-667000000 {
+                clock-frequency = <667000000>;
+
+                nvidia,emc-auto-cal-interval = <0x001fffff>;
+                nvidia,emc-mode-1 = <0x80100002>;
+                nvidia,emc-mode-2 = <0x80200018>;
+                nvidia,emc-mode-reset = <0x80000b71>;
+                nvidia,emc-zcal-cnt-long = <0x00000040>;
+                nvidia,emc-cfg-periodic-qrst;
+
+                nvidia,emc-configuration = <
+                    0x00000020 /* EMC_RC */
+                    0x0000006a /* EMC_RFC */
+                    0x00000017 /* EMC_RAS */
+                    0x00000007 /* EMC_RP */
+                    0x00000005 /* EMC_R2W */
+                    0x0000000c /* EMC_W2R */
+                    0x00000003 /* EMC_R2P */
+                    0x00000011 /* EMC_W2P */
+                    0x00000007 /* EMC_RD_RCD */
+                    0x00000007 /* EMC_WR_RCD */
+                    0x00000002 /* EMC_RRD */
+                    0x00000001 /* EMC_REXT */
+                    0x00000000 /* EMC_WEXT */
+                    0x00000007 /* EMC_WDV */
+                    0x0000000a /* EMC_QUSE */
+                    0x00000009 /* EMC_QRST */
+                    0x0000000b /* EMC_QSAFE */
+                    0x00000011 /* EMC_RDV */
+                    0x00001412 /* EMC_REFRESH */
+                    0x00000000 /* EMC_BURST_REFRESH_NUM */
+                    0x00000504 /* EMC_PRE_REFRESH_REQ_CNT */
+                    0x00000002 /* EMC_PDEX2WR */
+                    0x0000000e /* EMC_PDEX2RD */
+                    0x00000001 /* EMC_PCHG2PDEN */
+                    0x00000000 /* EMC_ACT2PDEN */
+                    0x0000000c /* EMC_AR2PDEN */
+                    0x00000016 /* EMC_RW2PDEN */
+                    0x00000072 /* EMC_TXSR */
+                    0x00000200 /* EMC_TXSRDLL */
+                    0x00000005 /* EMC_TCKE */
+                    0x00000015 /* EMC_TFAW */
+                    0x00000000 /* EMC_TRPAB */
+                    0x00000006 /* EMC_TCLKSTABLE */
+                    0x00000007 /* EMC_TCLKSTOP */
+                    0x00001453 /* EMC_TREFBW */
+                    0x0000000b /* EMC_QUSE_EXTRA */
+                    0x00000006 /* EMC_FBIO_CFG6 */
+                    0x00000000 /* EMC_ODT_WRITE */
+                    0x00000000 /* EMC_ODT_READ */
+                    0x00005088 /* EMC_FBIO_CFG5 */
+                    0xf00b0191 /* EMC_CFG_DIG_DLL */
+                    0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+                    0x00000008 /* EMC_DLL_XFORM_DQS0 */
+                    0x00000008 /* EMC_DLL_XFORM_DQS1 */
+                    0x00000008 /* EMC_DLL_XFORM_DQS2 */
+                    0x00000008 /* EMC_DLL_XFORM_DQS3 */
+                    0x0000000a /* EMC_DLL_XFORM_DQS4 */
+                    0x0000000a /* EMC_DLL_XFORM_DQS5 */
+                    0x0000000a /* EMC_DLL_XFORM_DQS6 */
+                    0x0000000a /* EMC_DLL_XFORM_DQS7 */
+                    0x00018000 /* EMC_DLL_XFORM_QUSE0 */
+                    0x00018000 /* EMC_DLL_XFORM_QUSE1 */
+                    0x00018000 /* EMC_DLL_XFORM_QUSE2 */
+                    0x00018000 /* EMC_DLL_XFORM_QUSE3 */
+                    0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+                    0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+                    0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+                    0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+                    0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+                    0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+                    0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+                    0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+                    0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+                    0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+                    0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+                    0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+                    0x0000000a /* EMC_DLL_XFORM_DQ0 */
+                    0x0000000a /* EMC_DLL_XFORM_DQ1 */
+                    0x0000000a /* EMC_DLL_XFORM_DQ2 */
+                    0x0000000a /* EMC_DLL_XFORM_DQ3 */
+                    0x000002a0 /* EMC_XM2CMDPADCTRL */
+                    0x0800013d /* EMC_XM2DQSPADCTRL2 */
+                    0x22220000 /* EMC_XM2DQPADCTRL2 */
+                    0x77fff884 /* EMC_XM2CLKPADCTRL */
+                    0x01f1f501 /* EMC_XM2COMPPADCTRL */
+                    0x07077404 /* EMC_XM2VTTGENPADCTRL */
+                    0x54000000 /* EMC_XM2VTTGENPADCTRL2 */
+                    0x080001e8 /* EMC_XM2QUSEPADCTRL */
+                    0x0c000021 /* EMC_XM2DQSPADCTRL3 */
+                    0x00000802 /* EMC_CTT_TERM_CTRL */
+                    0x00020000 /* EMC_ZCAL_INTERVAL */
+                    0x00000100 /* EMC_ZCAL_WAIT_CNT */
+                    0x0155000c /* EMC_MRS_WAIT_CNT */
+                    0xa0f10000 /* EMC_AUTO_CAL_CONFIG */
+                    0x00000000 /* EMC_CTT */
+                    0x00000000 /* EMC_CTT_DURATION */
+                    0x800028a5 /* EMC_DYN_SELF_REF_CONTROL */
+                    0xe8000000 /* EMC_FBIO_SPARE */
+                    0xff00ff49 /* EMC_CFG_RSV */
+                >;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.txt b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.txt
deleted file mode 100644
index a878b5908a4d..000000000000
--- a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-NVIDIA Tegra Memory Controller device tree bindings
-===================================================
-
-memory-controller node
-----------------------
-
-Required properties:
-- compatible: Should be "nvidia,tegra<chip>-mc"
-- reg: Physical base address and length of the controller's registers.
-- clocks: Must contain an entry for each entry in clock-names.
-  See ../clocks/clock-bindings.txt for details.
-- clock-names: Must include the following entries:
-  - mc: the module's clock input
-- interrupts: The interrupt outputs from the controller.
-- #reset-cells : Should be 1. This cell represents memory client module ID.
-  The assignments may be found in header file <dt-bindings/memory/tegra30-mc.h>
-  or in the TRM documentation.
-
-Required properties for Tegra30, Tegra114, Tegra124, Tegra132 and Tegra210:
-- #iommu-cells: Should be 1. The single cell of the IOMMU specifier defines
-  the SWGROUP of the master.
-
-This device implements an IOMMU that complies with the generic IOMMU binding.
-See ../iommu/iommu.txt for details.
-
-emc-timings subnode
--------------------
-
-The node should contain a "emc-timings" subnode for each supported RAM type (see field RAM_CODE in
-register PMC_STRAPPING_OPT_A).
-
-Required properties for "emc-timings" nodes :
-- nvidia,ram-code : Should contain the value of RAM_CODE this timing set is used for.
-
-timing subnode
---------------
-
-Each "emc-timings" node should contain a subnode for every supported EMC clock rate.
-
-Required properties for timing nodes :
-- clock-frequency : Should contain the memory clock rate in Hz.
-- nvidia,emem-configuration : Values to be written to the EMEM register block. For the Tegra124 SoC
-(see section "15.6.1 MC Registers" in the TRM), these are the registers whose values need to be
-specified, according to the board documentation:
-
-	MC_EMEM_ARB_CFG
-	MC_EMEM_ARB_OUTSTANDING_REQ
-	MC_EMEM_ARB_TIMING_RCD
-	MC_EMEM_ARB_TIMING_RP
-	MC_EMEM_ARB_TIMING_RC
-	MC_EMEM_ARB_TIMING_RAS
-	MC_EMEM_ARB_TIMING_FAW
-	MC_EMEM_ARB_TIMING_RRD
-	MC_EMEM_ARB_TIMING_RAP2PRE
-	MC_EMEM_ARB_TIMING_WAP2PRE
-	MC_EMEM_ARB_TIMING_R2R
-	MC_EMEM_ARB_TIMING_W2W
-	MC_EMEM_ARB_TIMING_R2W
-	MC_EMEM_ARB_TIMING_W2R
-	MC_EMEM_ARB_DA_TURNS
-	MC_EMEM_ARB_DA_COVERS
-	MC_EMEM_ARB_MISC0
-	MC_EMEM_ARB_MISC1
-	MC_EMEM_ARB_RING1_THROTTLE
-
-Example SoC include file:
-
-/ {
-	mc: memory-controller@70019000 {
-		compatible = "nvidia,tegra124-mc";
-		reg = <0x0 0x70019000 0x0 0x1000>;
-		clocks = <&tegra_car TEGRA124_CLK_MC>;
-		clock-names = "mc";
-
-		interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
-
-		#iommu-cells = <1>;
-		#reset-cells = <1>;
-	};
-
-	sdhci@700b0000 {
-		compatible = "nvidia,tegra124-sdhci";
-		...
-		iommus = <&mc TEGRA_SWGROUP_SDMMC1A>;
-		resets = <&mc TEGRA124_MC_RESET_SDMMC1>;
-	};
-};
-
-Example board file:
-
-/ {
-	memory-controller@70019000 {
-		emc-timings-3 {
-			nvidia,ram-code = <3>;
-
-			timing-12750000 {
-				clock-frequency = <12750000>;
-
-				nvidia,emem-configuration = <
-					0x40040001 /* MC_EMEM_ARB_CFG */
-					0x8000000a /* MC_EMEM_ARB_OUTSTANDING_REQ */
-					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
-					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
-					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
-					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
-					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
-					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
-					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
-					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
-					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
-					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
-					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
-					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
-					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
-					0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
-					0x77e30303 /* MC_EMEM_ARB_MISC0 */
-					0x70000f03 /* MC_EMEM_ARB_MISC1 */
-					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
-				>;
-			};
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.yaml b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.yaml
new file mode 100644
index 000000000000..4b9196c83291
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.yaml
@@ -0,0 +1,168 @@
+# SPDX-License-Identifier: (GPL-2.0)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/nvidia,tegra30-mc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra30 SoC Memory Controller
+
+maintainers:
+  - Dmitry Osipenko <digetx@gmail.com>
+  - Jon Hunter <jonathanh@nvidia.com>
+  - Thierry Reding <thierry.reding@gmail.com>
+
+description: |
+  Tegra30 Memory Controller architecturally consists of the following parts:
+
+    Arbitration Domains, which can handle a single request or response per
+    clock from a group of clients. Typically, a system has a single Arbitration
+    Domain, but an implementation may divide the client space into multiple
+    Arbitration Domains to increase the effective system bandwidth.
+
+    Protocol Arbiter, which manage a related pool of memory devices. A system
+    may have a single Protocol Arbiter or multiple Protocol Arbiters.
+
+    Memory Crossbar, which routes request and responses between Arbitration
+    Domains and Protocol Arbiters. In the simplest version of the system, the
+    Memory Crossbar is just a pass through between a single Arbitration Domain
+    and a single Protocol Arbiter.
+
+    Global Resources, which include things like configuration registers which
+    are shared across the Memory Subsystem.
+
+  The Tegra30 Memory Controller handles memory requests from internal clients
+  and arbitrates among them to allocate memory bandwidth for DDR3L and LPDDR2
+  SDRAMs.
+
+properties:
+  compatible:
+    const: nvidia,tegra30-mc
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: mc
+
+  interrupts:
+    maxItems: 1
+
+  "#reset-cells":
+    const: 1
+
+  "#iommu-cells":
+    const: 1
+
+patternProperties:
+  "^emc-timings-[0-9]+$":
+    type: object
+    properties:
+      nvidia,ram-code:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description:
+          Value of RAM_CODE this timing set is used for.
+
+    patternProperties:
+      "^timing-[0-9]+$":
+        type: object
+        properties:
+          clock-frequency:
+            description:
+              Memory clock rate in Hz.
+            minimum: 1000000
+            maximum: 900000000
+
+          nvidia,emem-configuration:
+            allOf:
+              - $ref: /schemas/types.yaml#/definitions/uint32-array
+            description: |
+              Values to be written to the EMEM register block. See section
+              "18.13.1 MC Registers" in the TRM.
+            items:
+              - description: MC_EMEM_ARB_CFG
+              - description: MC_EMEM_ARB_OUTSTANDING_REQ
+              - description: MC_EMEM_ARB_TIMING_RCD
+              - description: MC_EMEM_ARB_TIMING_RP
+              - description: MC_EMEM_ARB_TIMING_RC
+              - description: MC_EMEM_ARB_TIMING_RAS
+              - description: MC_EMEM_ARB_TIMING_FAW
+              - description: MC_EMEM_ARB_TIMING_RRD
+              - description: MC_EMEM_ARB_TIMING_RAP2PRE
+              - description: MC_EMEM_ARB_TIMING_WAP2PRE
+              - description: MC_EMEM_ARB_TIMING_R2R
+              - description: MC_EMEM_ARB_TIMING_W2W
+              - description: MC_EMEM_ARB_TIMING_R2W
+              - description: MC_EMEM_ARB_TIMING_W2R
+              - description: MC_EMEM_ARB_DA_TURNS
+              - description: MC_EMEM_ARB_DA_COVERS
+              - description: MC_EMEM_ARB_MISC0
+              - description: MC_EMEM_ARB_RING1_THROTTLE
+
+        required:
+          - clock-frequency
+          - nvidia,emem-configuration
+
+        additionalProperties: false
+
+    required:
+      - nvidia,ram-code
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - "#reset-cells"
+  - "#iommu-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    memory-controller@7000f000 {
+        compatible = "nvidia,tegra30-mc";
+        reg = <0x7000f000 0x400>;
+        clocks = <&tegra_car 32>;
+        clock-names = "mc";
+
+        interrupts = <0 77 4>;
+
+        #iommu-cells = <1>;
+        #reset-cells = <1>;
+
+        emc-timings-1 {
+            nvidia,ram-code = <1>;
+
+            timing-667000000 {
+                clock-frequency = <667000000>;
+
+                nvidia,emem-configuration = <
+                    0x0000000a /* MC_EMEM_ARB_CFG */
+                    0xc0000079 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+                    0x00000003 /* MC_EMEM_ARB_TIMING_RCD */
+                    0x00000004 /* MC_EMEM_ARB_TIMING_RP */
+                    0x00000010 /* MC_EMEM_ARB_TIMING_RC */
+                    0x0000000b /* MC_EMEM_ARB_TIMING_RAS */
+                    0x0000000a /* MC_EMEM_ARB_TIMING_FAW */
+                    0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+                    0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+                    0x0000000b /* MC_EMEM_ARB_TIMING_WAP2PRE */
+                    0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+                    0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+                    0x00000004 /* MC_EMEM_ARB_TIMING_R2W */
+                    0x00000008 /* MC_EMEM_ARB_TIMING_W2R */
+                    0x08040202 /* MC_EMEM_ARB_DA_TURNS */
+                    0x00130b10 /* MC_EMEM_ARB_DA_COVERS */
+                    0x70ea1f11 /* MC_EMEM_ARB_MISC0 */
+                    0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+                >;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/mfd/ab8500.txt b/Documentation/devicetree/bindings/mfd/ab8500.txt
index cd9e90c5d171..b6bc30d7777e 100644
--- a/Documentation/devicetree/bindings/mfd/ab8500.txt
+++ b/Documentation/devicetree/bindings/mfd/ab8500.txt
@@ -69,6 +69,18 @@ Required child device properties:
 - compatible             : "stericsson,ab8500-[bm|btemp|charger|fg|gpadc|gpio|ponkey|
                                                pwm|regulator|rtc|sysctrl|usb]";
 
+  A few child devices require ADC channels from the GPADC node. Those follow the
+  standard bindings from iio/iio-bindings.txt and iio/adc/adc.txt
+
+  abx500-temp		 : io-channels "aux1" and "aux2" for measuring external
+			   temperatures.
+  ab8500-fg		 : io-channel "main_bat_v" for measuring main battery voltage,
+  ab8500-btemp		 : io-channels "btemp_ball" and "bat_ctrl" for measuring the
+			   battery voltage.
+  ab8500-charger	 : io-channels "main_charger_v", "main_charger_c", "vbus_v",
+			   "usb_charger_c" for measuring voltage and current of the
+			   different charging supplies.
+
 Optional child device properties:
 - interrupts             : contains the device IRQ(s) using the 2-cell format (see above)
 - interrupt-names        : contains names of IRQ resource in the order in which they were
@@ -102,8 +114,115 @@ ab8500 {
                               39 0x4>;
                 interrupt-names = "HW_CONV_END", "SW_CONV_END";
                 vddadc-supply = <&ab8500_ldo_tvout_reg>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#io-channel-cells = <1>;
+
+		/* GPADC channels */
+		bat_ctrl: channel@1 {
+			reg = <0x01>;
+		};
+		btemp_ball: channel@2 {
+			reg = <0x02>;
+		};
+		main_charger_v: channel@3 {
+			reg = <0x03>;
+		};
+		acc_detect1: channel@4 {
+			reg = <0x04>;
+		};
+		acc_detect2: channel@5 {
+			reg = <0x05>;
+		};
+		adc_aux1: channel@6 {
+			reg = <0x06>;
+		};
+		adc_aux2: channel@7 {
+			reg = <0x07>;
+		};
+		main_batt_v: channel@8 {
+			reg = <0x08>;
+		};
+		vbus_v: channel@9 {
+			reg = <0x09>;
+		};
+		main_charger_c: channel@a {
+			reg = <0x0a>;
+		};
+		usb_charger_c: channel@b {
+			reg = <0x0b>;
+		};
+		bk_bat_v: channel@c {
+			reg = <0x0c>;
+		};
+		die_temp: channel@d {
+			reg = <0x0d>;
+		};
+		usb_id: channel@e {
+			reg = <0x0e>;
+		};
+		xtal_temp: channel@12 {
+			reg = <0x12>;
+		};
+		vbat_true_meas: channel@13 {
+			reg = <0x13>;
+		};
+		bat_ctrl_and_ibat: channel@1c {
+			reg = <0x1c>;
+		};
+		vbat_meas_and_ibat: channel@1d {
+			reg = <0x1d>;
+		};
+		vbat_true_meas_and_ibat: channel@1e {
+			reg = <0x1e>;
+		};
+		bat_temp_and_ibat: channel@1f {
+			reg = <0x1f>;
+		};
         };
 
+	ab8500_temp {
+		compatible = "stericsson,abx500-temp";
+		io-channels = <&gpadc 0x06>,
+			      <&gpadc 0x07>;
+		io-channel-name = "aux1", "aux2";
+	};
+
+	ab8500_battery: ab8500_battery {
+		stericsson,battery-type = "LIPO";
+		thermistor-on-batctrl;
+	};
+
+	ab8500_fg {
+		compatible = "stericsson,ab8500-fg";
+		battery	   = <&ab8500_battery>;
+		io-channels = <&gpadc 0x08>;
+		io-channel-name = "main_bat_v";
+	};
+
+	ab8500_btemp {
+		compatible = "stericsson,ab8500-btemp";
+		battery	   = <&ab8500_battery>;
+		io-channels = <&gpadc 0x02>,
+			      <&gpadc 0x01>;
+		io-channel-name = "btemp_ball",
+				"bat_ctrl";
+	};
+
+	ab8500_charger {
+		compatible	= "stericsson,ab8500-charger";
+		battery		= <&ab8500_battery>;
+		vddadc-supply	= <&ab8500_ldo_tvout_reg>;
+		io-channels = <&gpadc 0x03>,
+			      <&gpadc 0x0a>,
+			      <&gpadc 0x09>,
+			      <&gpadc 0x0b>;
+		io-channel-name = "main_charger_v",
+				"main_charger_c",
+				"vbus_v",
+				"usb_charger_c";
+	};
+
         ab8500-usb {
                 compatible = "stericsson,ab8500-usb";
                 interrupts = < 90 0x4
diff --git a/Documentation/devicetree/bindings/mfd/allwinner,sun4i-a10-ts.yaml b/Documentation/devicetree/bindings/mfd/allwinner,sun4i-a10-ts.yaml
index 4b1a09acb98b..39afacc447b2 100644
--- a/Documentation/devicetree/bindings/mfd/allwinner,sun4i-a10-ts.yaml
+++ b/Documentation/devicetree/bindings/mfd/allwinner,sun4i-a10-ts.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Resistive Touchscreen Controller Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#thermal-sensor-cells":
diff --git a/Documentation/devicetree/bindings/mfd/allwinner,sun6i-a31-prcm.yaml b/Documentation/devicetree/bindings/mfd/allwinner,sun6i-a31-prcm.yaml
new file mode 100644
index 000000000000..d131759ccaf3
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/allwinner,sun6i-a31-prcm.yaml
@@ -0,0 +1,219 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/allwinner,sun6i-a31-prcm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 PRCM Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  compatible:
+    const: allwinner,sun6i-a31-prcm
+
+  reg:
+    maxItems: 1
+
+patternProperties:
+  "^.*_(clk|rst)$":
+    type: object
+
+    properties:
+      compatible:
+        enum:
+          - allwinner,sun4i-a10-mod0-clk
+          - allwinner,sun6i-a31-apb0-clk
+          - allwinner,sun6i-a31-apb0-gates-clk
+          - allwinner,sun6i-a31-ar100-clk
+          - allwinner,sun6i-a31-clock-reset
+          - fixed-factor-clock
+
+    allOf:
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: allwinner,sun6i-a31-apb0-clk
+
+        then:
+          properties:
+            "#clock-cells":
+              const: 0
+
+            # Already checked in the main schema
+            compatible: true
+
+            clocks:
+              maxItems: 1
+
+            clock-output-names:
+              maxItems: 1
+
+            phandle: true
+
+          required:
+            - "#clock-cells"
+            - compatible
+            - clocks
+            - clock-output-names
+
+          additionalProperties: false
+
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: allwinner,sun6i-a31-apb0-gates-clk
+
+        then:
+          properties:
+            "#clock-cells":
+              const: 1
+              description: >
+                This additional argument passed to that clock is the
+                offset of the bit controlling this particular gate in
+                the register.
+
+            # Already checked in the main schema
+            compatible: true
+
+            clocks:
+              maxItems: 1
+
+            clock-output-names:
+              minItems: 1
+              maxItems: 32
+
+            phandle: true
+
+          required:
+            - "#clock-cells"
+            - compatible
+            - clocks
+            - clock-output-names
+
+          additionalProperties: false
+
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: allwinner,sun6i-a31-ar100-clk
+
+        then:
+          properties:
+            "#clock-cells":
+              const: 0
+
+            # Already checked in the main schema
+            compatible: true
+
+            clocks:
+              maxItems: 4
+              description: >
+                The parent order must match the hardware programming
+                order.
+
+            clock-output-names:
+              maxItems: 1
+
+            phandle: true
+
+          required:
+            - "#clock-cells"
+            - compatible
+            - clocks
+            - clock-output-names
+
+          additionalProperties: false
+
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: allwinner,sun6i-a31-clock-reset
+
+        then:
+          properties:
+            "#reset-cells":
+              const: 1
+
+            # Already checked in the main schema
+            compatible: true
+
+            phandle: true
+
+          required:
+            - "#reset-cells"
+            - compatible
+
+          additionalProperties: false
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/sun6i-a31-ccu.h>
+
+    prcm@1f01400 {
+        compatible = "allwinner,sun6i-a31-prcm";
+        reg = <0x01f01400 0x200>;
+
+        ar100: ar100_clk {
+            compatible = "allwinner,sun6i-a31-ar100-clk";
+            #clock-cells = <0>;
+            clocks = <&rtc 0>, <&osc24M>,
+                     <&ccu CLK_PLL_PERIPH>,
+                     <&ccu CLK_PLL_PERIPH>;
+            clock-output-names = "ar100";
+        };
+
+        ahb0: ahb0_clk {
+            compatible = "fixed-factor-clock";
+            #clock-cells = <0>;
+            clock-div = <1>;
+            clock-mult = <1>;
+            clocks = <&ar100>;
+            clock-output-names = "ahb0";
+        };
+
+        apb0: apb0_clk {
+            compatible = "allwinner,sun6i-a31-apb0-clk";
+            #clock-cells = <0>;
+            clocks = <&ahb0>;
+            clock-output-names = "apb0";
+        };
+
+        apb0_gates: apb0_gates_clk {
+            compatible = "allwinner,sun6i-a31-apb0-gates-clk";
+            #clock-cells = <1>;
+            clocks = <&apb0>;
+            clock-output-names = "apb0_pio", "apb0_ir",
+                                 "apb0_timer", "apb0_p2wi",
+                                 "apb0_uart", "apb0_1wire",
+                                 "apb0_i2c";
+        };
+
+        ir_clk: ir_clk {
+            #clock-cells = <0>;
+            compatible = "allwinner,sun4i-a10-mod0-clk";
+            clocks = <&rtc 0>, <&osc24M>;
+            clock-output-names = "ir";
+        };
+
+        apb0_rst: apb0_rst {
+            compatible = "allwinner,sun6i-a31-clock-reset";
+            #reset-cells = <1>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/mfd/allwinner,sun8i-a23-prcm.yaml b/Documentation/devicetree/bindings/mfd/allwinner,sun8i-a23-prcm.yaml
new file mode 100644
index 000000000000..aa5e683b236c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/allwinner,sun8i-a23-prcm.yaml
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-2.0+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/allwinner,sun8i-a23-prcm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A23 PRCM Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+  compatible:
+    const: allwinner,sun8i-a23-prcm
+
+  reg:
+    maxItems: 1
+
+patternProperties:
+  "^.*(clk|rst|codec).*$":
+    type: object
+
+    properties:
+      compatible:
+        enum:
+          - fixed-factor-clock
+          - allwinner,sun8i-a23-apb0-clk
+          - allwinner,sun8i-a23-apb0-gates-clk
+          - allwinner,sun6i-a31-clock-reset
+          - allwinner,sun8i-a23-codec-analog
+
+    required:
+      - compatible
+
+    allOf:
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: allwinner,sun8i-a23-apb0-clk
+
+        then:
+          properties:
+            "#clock-cells":
+              const: 0
+
+            # Already checked in the main schema
+            compatible: true
+
+            clocks:
+              maxItems: 1
+
+            clock-output-names:
+              maxItems: 1
+
+            phandle: true
+
+          required:
+            - "#clock-cells"
+            - compatible
+            - clocks
+            - clock-output-names
+
+          additionalProperties: false
+
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: allwinner,sun8i-a23-apb0-gates-clk
+
+        then:
+          properties:
+            "#clock-cells":
+              const: 1
+              description: >
+                This additional argument passed to that clock is the
+                offset of the bit controlling this particular gate in
+                the register.
+
+            # Already checked in the main schema
+            compatible: true
+
+            clocks:
+              maxItems: 1
+
+            clock-output-names:
+              minItems: 1
+              maxItems: 32
+
+            phandle: true
+
+          required:
+            - "#clock-cells"
+            - compatible
+            - clocks
+            - clock-output-names
+
+          additionalProperties: false
+
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: allwinner,sun6i-a31-clock-reset
+
+        then:
+          properties:
+            "#reset-cells":
+              const: 1
+
+            # Already checked in the main schema
+            compatible: true
+
+            phandle: true
+
+          required:
+            - "#reset-cells"
+            - compatible
+
+          additionalProperties: false
+
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: allwinner,sun8i-a23-codec-analog
+
+        then:
+          properties:
+            # Already checked in the main schema
+            compatible: true
+
+            phandle: true
+
+          required:
+            - compatible
+
+          additionalProperties: false
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    prcm@1f01400 {
+        compatible = "allwinner,sun8i-a23-prcm";
+        reg = <0x01f01400 0x200>;
+
+        ar100: ar100_clk {
+            compatible = "fixed-factor-clock";
+            #clock-cells = <0>;
+            clock-div = <1>;
+            clock-mult = <1>;
+            clocks = <&osc24M>;
+            clock-output-names = "ar100";
+        };
+
+        ahb0: ahb0_clk {
+            compatible = "fixed-factor-clock";
+            #clock-cells = <0>;
+            clock-div = <1>;
+            clock-mult = <1>;
+            clocks = <&ar100>;
+            clock-output-names = "ahb0";
+        };
+
+        apb0: apb0_clk {
+            compatible = "allwinner,sun8i-a23-apb0-clk";
+            #clock-cells = <0>;
+            clocks = <&ahb0>;
+            clock-output-names = "apb0";
+        };
+
+        apb0_gates: apb0_gates_clk {
+            compatible = "allwinner,sun8i-a23-apb0-gates-clk";
+            #clock-cells = <1>;
+            clocks = <&apb0>;
+            clock-output-names = "apb0_pio", "apb0_timer",
+                                 "apb0_rsb", "apb0_uart",
+                                 "apb0_i2c";
+        };
+
+        apb0_rst: apb0_rst {
+            compatible = "allwinner,sun6i-a31-clock-reset";
+            #reset-cells = <1>;
+        };
+
+        codec_analog: codec-analog {
+            compatible = "allwinner,sun8i-a23-codec-analog";
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/mfd/da9062.txt b/Documentation/devicetree/bindings/mfd/da9062.txt
index edca653a5777..bc4b59de6a55 100644
--- a/Documentation/devicetree/bindings/mfd/da9062.txt
+++ b/Documentation/devicetree/bindings/mfd/da9062.txt
@@ -66,6 +66,9 @@ Sub-nodes:
   details of individual regulator device can be found in:
   Documentation/devicetree/bindings/regulator/regulator.txt
 
+  regulator-initial-mode may be specified for buck regulators using mode values
+  from include/dt-bindings/regulator/dlg,da9063-regulator.h.
+
 - rtc : This node defines settings required for the Real-Time Clock associated
   with the DA9062. There are currently no entries in this binding, however
   compatible = "dlg,da9062-rtc" should be added if a node is created.
@@ -96,6 +99,7 @@ Example:
 				regulator-max-microvolt = <1570000>;
 				regulator-min-microamp = <500000>;
 				regulator-max-microamp = <2000000>;
+				regulator-initial-mode = <DA9063_BUCK_MODE_SYNC>;
 				regulator-boot-on;
 			};
 			DA9062_LDO1: ldo1 {
diff --git a/Documentation/devicetree/bindings/mfd/madera.txt b/Documentation/devicetree/bindings/mfd/madera.txt
index cad0f2800502..47e2b8bc6051 100644
--- a/Documentation/devicetree/bindings/mfd/madera.txt
+++ b/Documentation/devicetree/bindings/mfd/madera.txt
@@ -67,6 +67,14 @@ Optional properties:
     As defined in bindings/gpio.txt.
     Although optional, it is strongly recommended to use a hardware reset
 
+  - clocks: Should reference the clocks supplied on MCLK1, MCLK2 and MCLK3
+  - clock-names: May contain up to three strings:
+      "mclk1" for the clock supplied on MCLK1, recommended to be a high
+      quality audio reference clock
+      "mclk2" for the clock supplied on MCLK2, required to be an always on
+      32k clock
+      "mclk3" for the clock supplied on MCLK3
+
   - MICBIASx : Initial data for the MICBIAS regulators, as covered in
     Documentation/devicetree/bindings/regulator/regulator.txt.
     One for each MICBIAS generator (MICBIAS1, MICBIAS2, ...)
diff --git a/Documentation/devicetree/bindings/mfd/max77650.txt b/Documentation/devicetree/bindings/mfd/max77650.txt
deleted file mode 100644
index b529d8d19335..000000000000
--- a/Documentation/devicetree/bindings/mfd/max77650.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-MAX77650 ultra low-power PMIC from Maxim Integrated.
-
-Required properties:
--------------------
-- compatible:		Must be "maxim,max77650"
-- reg:			I2C device address.
-- interrupts:		The interrupt on the parent the controller is
-			connected to.
-- interrupt-controller: Marks the device node as an interrupt controller.
-- #interrupt-cells:	Must be <2>.
-
-- gpio-controller:	Marks the device node as a gpio controller.
-- #gpio-cells:		Must be <2>. The first cell is the pin number and
-			the second cell is used to specify the gpio active
-			state.
-
-Optional properties:
---------------------
-gpio-line-names:	Single string containing the name of the GPIO line.
-
-The GPIO-controller module is represented as part of the top-level PMIC
-node. The device exposes a single GPIO line.
-
-For device-tree bindings of other sub-modules (regulator, power supply,
-LEDs and onkey) refer to the binding documents under the respective
-sub-system directories.
-
-For more details on GPIO bindings, please refer to the generic GPIO DT
-binding document <devicetree/bindings/gpio/gpio.txt>.
-
-Example:
---------
-
-	pmic@48 {
-		compatible = "maxim,max77650";
-		reg = <0x48>;
-
-		interrupt-controller;
-		interrupt-parent = <&gpio2>;
-		#interrupt-cells = <2>;
-		interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
-
-		gpio-controller;
-		#gpio-cells = <2>;
-		gpio-line-names = "max77650-charger";
-	};
diff --git a/Documentation/devicetree/bindings/mfd/max77650.yaml b/Documentation/devicetree/bindings/mfd/max77650.yaml
new file mode 100644
index 000000000000..4a70f875a6eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/max77650.yaml
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/max77650.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MAX77650 ultra low-power PMIC from Maxim Integrated.
+
+maintainers:
+  - Bartosz Golaszewski <bgolaszewski@baylibre.com>
+
+description: |
+  MAX77650 is an ultra-low power PMIC providing battery charging and power
+  supply for low-power IoT and wearable applications.
+
+  The GPIO-controller module is represented as part of the top-level PMIC
+  node. The device exposes a single GPIO line.
+
+  For device-tree bindings of other sub-modules (regulator, power supply,
+  LEDs and onkey) refer to the binding documents under the respective
+  sub-system directories.
+
+properties:
+  compatible:
+    const: maxim,max77650
+
+  reg:
+    description:
+      I2C device address.
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 2
+    description:
+      The first cell is the IRQ number, the second cell is the trigger type.
+
+  gpio-controller: true
+
+  "#gpio-cells":
+    const: 2
+    description:
+      The first cell is the pin number and the second cell is used to specify
+      the gpio active state.
+
+  gpio-line-names:
+    maxItems: 1
+    description:
+      Single string containing the name of the GPIO line.
+
+  regulators:
+    $ref: ../regulator/max77650-regulator.yaml
+
+  charger:
+    $ref: ../power/supply/max77650-charger.yaml
+
+  leds:
+    $ref: ../leds/leds-max77650.yaml
+
+  onkey:
+    $ref: ../input/max77650-onkey.yaml
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-controller
+  - "#interrupt-cells"
+  - gpio-controller
+  - "#gpio-cells"
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/input/linux-event-codes.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pmic@48 {
+            compatible = "maxim,max77650";
+            reg = <0x48>;
+
+            interrupt-controller;
+            interrupt-parent = <&gpio2>;
+            #interrupt-cells = <2>;
+            interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
+
+            gpio-controller;
+            #gpio-cells = <2>;
+            gpio-line-names = "max77650-charger";
+
+            regulators {
+                compatible = "maxim,max77650-regulator";
+
+                max77650_ldo: regulator@0 {
+                    regulator-compatible = "ldo";
+                    regulator-name = "max77650-ldo";
+                    regulator-min-microvolt = <1350000>;
+                    regulator-max-microvolt = <2937500>;
+                };
+
+                max77650_sbb0: regulator@1 {
+                    regulator-compatible = "sbb0";
+                    regulator-name = "max77650-sbb0";
+                    regulator-min-microvolt = <800000>;
+                    regulator-max-microvolt = <1587500>;
+                };
+            };
+
+            charger {
+                compatible = "maxim,max77650-charger";
+                input-voltage-min-microvolt = <4200000>;
+                input-current-limit-microamp = <285000>;
+            };
+
+            leds {
+                compatible = "maxim,max77650-led";
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                led@0 {
+                    reg = <0>;
+                    label = "blue:usr0";
+                };
+
+                led@1 {
+                    reg = <1>;
+                    label = "red:usr1";
+                    linux,default-trigger = "heartbeat";
+                };
+
+                led@2 {
+                    reg = <2>;
+                    label = "green:usr2";
+                };
+            };
+
+            onkey {
+                compatible = "maxim,max77650-onkey";
+                linux,code = <KEY_END>;
+                maxim,onkey-slide;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/mfd/max77693.txt b/Documentation/devicetree/bindings/mfd/max77693.txt
index a3c60a7a3be1..0ced96e16c16 100644
--- a/Documentation/devicetree/bindings/mfd/max77693.txt
+++ b/Documentation/devicetree/bindings/mfd/max77693.txt
@@ -175,6 +175,7 @@ Example:
 			maxim,thermal-regulation-celsius = <75>;
 			maxim,battery-overcurrent-microamp = <3000000>;
 			maxim,charge-input-threshold-microvolt = <4300000>;
+		};
 
 		led {
 			compatible = "maxim,max77693-led";
diff --git a/Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.txt b/Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.txt
index 143706222a51..fffc8fde3302 100644
--- a/Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.txt
+++ b/Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.txt
@@ -29,6 +29,8 @@ Required properties:
                    "qcom,pm8916",
                    "qcom,pm8004",
                    "qcom,pm8909",
+                   "qcom,pm8950",
+                   "qcom,pmi8950",
                    "qcom,pm8998",
                    "qcom,pmi8998",
                    "qcom,pm8005",
diff --git a/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.txt b/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.txt
index d759da606f75..30ea27c3936d 100644
--- a/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.txt
+++ b/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.txt
@@ -18,7 +18,7 @@ an optional sub-node. For "samsung,exynos5433-lpass" compatible this includes:
 UART, SLIMBUS, PCM, I2S, DMAC, Timers 0...4, VIC, WDT 0...1 devices.
 
 Bindings of the sub-nodes are described in:
-  ../serial/samsung_uart.txt
+  ../serial/samsung_uart.yaml
   ../sound/samsung-i2s.txt
   ../dma/arm-pl330.txt
 
diff --git a/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml b/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml
new file mode 100644
index 000000000000..1a4cc5f3fb33
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml
@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/st,stm32-lptimer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Low-Power Timers bindings
+
+description: |
+  The STM32 Low-Power Timer (LPTIM) is a 16-bit timer that provides several
+  functions
+   - PWM output (with programmable prescaler, configurable polarity)
+   - Trigger source for STM32 ADC/DAC (LPTIM_OUT)
+   - Several counter modes:
+     - quadrature encoder to detect angular position and direction of rotary
+       elements, from IN1 and IN2 input signals.
+     - simple counter from IN1 input signal.
+
+maintainers:
+  - Fabrice Gasnier <fabrice.gasnier@st.com>
+
+properties:
+  compatible:
+    const: st,stm32-lptimer
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: mux
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+  pwm:
+    type: object
+
+    properties:
+      compatible:
+        const: st,stm32-pwm-lp
+
+      "#pwm-cells":
+        const: 3
+
+    required:
+      - "#pwm-cells"
+      - compatible
+
+patternProperties:
+  "^trigger@[0-9]+$":
+    type: object
+
+    properties:
+      compatible:
+        const: st,stm32-lptimer-trigger
+
+      reg:
+        description: Identify trigger hardware block.
+        items:
+         minimum: 0
+         maximum: 2
+
+    required:
+      - compatible
+      - reg
+
+  counter:
+    type: object
+
+    properties:
+      compatible:
+        const: st,stm32-lptimer-counter
+
+    required:
+      - compatible
+
+required:
+  - "#address-cells"
+  - "#size-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    timer@40002400 {
+      compatible = "st,stm32-lptimer";
+      reg = <0x40002400 0x400>;
+      clocks = <&timer_clk>;
+      clock-names = "mux";
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      pwm {
+        compatible = "st,stm32-pwm-lp";
+        #pwm-cells = <3>;
+      };
+
+      trigger@0 {
+        compatible = "st,stm32-lptimer-trigger";
+        reg = <0>;
+      };
+
+      counter {
+        compatible = "st,stm32-lptimer-counter";
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml b/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml
new file mode 100644
index 000000000000..590849ee9f32
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/st,stm32-timers.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Timers bindings
+
+description: |
+  This hardware block provides 3 types of timer along with PWM functionality:
+    - advanced-control timers consist of a 16-bit auto-reload counter driven
+      by a programmable prescaler, break input feature, PWM outputs and
+      complementary PWM outputs channels.
+    - general-purpose timers consist of a 16-bit or 32-bit auto-reload counter
+      driven by a programmable prescaler and PWM outputs.
+    - basic timers consist of a 16-bit auto-reload counter driven by a
+      programmable prescaler.
+
+maintainers:
+  - Benjamin Gaignard <benjamin.gaignard@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@st.com>
+
+properties:
+  compatible:
+    const: st,stm32-timers
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: int
+
+  reset:
+    maxItems: 1
+
+  dmas:
+    minItems: 1
+    maxItems: 7
+
+  dma-names:
+    items:
+      enum: [ ch1, ch2, ch3, ch4, up, trig, com ]
+    minItems: 1
+    maxItems: 7
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+  pwm:
+    type: object
+
+    properties:
+      compatible:
+        const: st,stm32-pwm
+
+      "#pwm-cells":
+        const: 3
+
+      st,breakinput:
+        description:
+          One or two <index level filter> to describe break input
+          configurations.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32-matrix
+          - items:
+              items:
+                - description: |
+                    "index" indicates on which break input (0 or 1) the
+                    configuration should be applied.
+                  enum: [ 0 , 1]
+                - description: |
+                    "level" gives the active level (0=low or 1=high) of the
+                    input signal for this configuration
+                  enum: [ 0, 1 ]
+                - description: |
+                    "filter" gives the filtering value (up to 15) to be applied.
+                  maximum: 15
+            minItems: 1
+            maxItems: 2
+
+    required:
+      - "#pwm-cells"
+      - compatible
+
+patternProperties:
+  "^timer@[0-9]+$":
+    type: object
+
+    properties:
+      compatible:
+        enum:
+          - st,stm32-timer-trigger
+          - st,stm32h7-timer-trigger
+
+      reg:
+        description: Identify trigger hardware block.
+        items:
+         minimum: 0
+         maximum: 16
+
+    required:
+      - compatible
+      - reg
+
+  counter:
+    type: object
+
+    properties:
+      compatible:
+        const: st,stm32-timer-counter
+
+    required:
+      - compatible
+
+required:
+  - "#address-cells"
+  - "#size-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    timers2: timers@40000000 {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      compatible = "st,stm32-timers";
+      reg = <0x40000000 0x400>;
+      clocks = <&rcc TIM2_K>;
+      clock-names = "int";
+      dmas = <&dmamux1 18 0x400 0x1>,
+             <&dmamux1 19 0x400 0x1>,
+             <&dmamux1 20 0x400 0x1>,
+             <&dmamux1 21 0x400 0x1>,
+             <&dmamux1 22 0x400 0x1>;
+      dma-names = "ch1", "ch2", "ch3", "ch4", "up";
+      pwm {
+        compatible = "st,stm32-pwm";
+        #pwm-cells = <3>;
+        st,breakinput = <0 1 5>;
+      };
+      timer@0 {
+        compatible = "st,stm32-timer-trigger";
+        reg = <0>;
+      };
+      counter {
+        compatible = "st,stm32-timer-counter";
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/mfd/stm32-lptimer.txt b/Documentation/devicetree/bindings/mfd/stm32-lptimer.txt
deleted file mode 100644
index fb54e4dad5b3..000000000000
--- a/Documentation/devicetree/bindings/mfd/stm32-lptimer.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-STMicroelectronics STM32 Low-Power Timer
-
-The STM32 Low-Power Timer (LPTIM) is a 16-bit timer that provides several
-functions:
-- PWM output (with programmable prescaler, configurable polarity)
-- Quadrature encoder, counter
-- Trigger source for STM32 ADC/DAC (LPTIM_OUT)
-
-Required properties:
-- compatible:		Must be "st,stm32-lptimer".
-- reg:			Offset and length of the device's register set.
-- clocks:		Phandle to the clock used by the LP Timer module.
-- clock-names:		Must be "mux".
-- #address-cells:	Should be '<1>'.
-- #size-cells:		Should be '<0>'.
-
-Optional subnodes:
-- pwm:			See ../pwm/pwm-stm32-lp.txt
-- counter:		See ../counter/stm32-lptimer-cnt.txt
-- trigger:		See ../iio/timer/stm32-lptimer-trigger.txt
-
-Example:
-
-	timer@40002400 {
-		compatible = "st,stm32-lptimer";
-		reg = <0x40002400 0x400>;
-		clocks = <&timer_clk>;
-		clock-names = "mux";
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		pwm {
-			compatible = "st,stm32-pwm-lp";
-			pinctrl-names = "default";
-			pinctrl-0 = <&lppwm1_pins>;
-		};
-
-		trigger@0 {
-			compatible = "st,stm32-lptimer-trigger";
-			reg = <0>;
-		};
-
-		counter {
-			compatible = "st,stm32-lptimer-counter";
-			pinctrl-names = "default";
-			pinctrl-0 = <&lptim1_in_pins>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/mfd/stm32-timers.txt b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
deleted file mode 100644
index 15c3b87f51d9..000000000000
--- a/Documentation/devicetree/bindings/mfd/stm32-timers.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-STM32 Timers driver bindings
-
-This IP provides 3 types of timer along with PWM functionality:
-- advanced-control timers consist of a 16-bit auto-reload counter driven by a programmable
-  prescaler, break input feature, PWM outputs and complementary PWM ouputs channels.
-- general-purpose timers consist of a 16-bit or 32-bit auto-reload counter driven by a
-  programmable prescaler and PWM outputs.
-- basic timers consist of a 16-bit auto-reload counter driven by a programmable prescaler.
-
-Required parameters:
-- compatible: must be "st,stm32-timers"
-
-- reg:			Physical base address and length of the controller's
-			registers.
-- clock-names:		Set to "int".
-- clocks: 		Phandle to the clock used by the timer module.
-			For Clk properties, please refer to ../clock/clock-bindings.txt
-
-Optional parameters:
-- resets:		Phandle to the parent reset controller.
-			See ../reset/st,stm32-rcc.txt
-- dmas:			List of phandle to dma channels that can be used for
-			this timer instance. There may be up to 7 dma channels.
-- dma-names:		List of dma names. Must match 'dmas' property. Valid
-			names are: "ch1", "ch2", "ch3", "ch4", "up", "trig",
-			"com".
-
-Optional subnodes:
-- pwm:			See ../pwm/pwm-stm32.txt
-- timer:		See ../iio/timer/stm32-timer-trigger.txt
-- counter:		See ../counter/stm32-timer-cnt.txt
-
-Example:
-	timers@40010000 {
-		#address-cells = <1>;
-		#size-cells = <0>;
-		compatible = "st,stm32-timers";
-		reg = <0x40010000 0x400>;
-		clocks = <&rcc 0 160>;
-		clock-names = "int";
-
-		pwm {
-			compatible = "st,stm32-pwm";
-			pinctrl-0	= <&pwm1_pins>;
-			pinctrl-names	= "default";
-		};
-
-		timer@0 {
-			compatible = "st,stm32-timer-trigger";
-			reg = <0>;
-		};
-
-		counter {
-			compatible = "st,stm32-timer-counter";
-			pinctrl-names = "default";
-			pinctrl-0 = <&tim1_in_pins>;
-		};
-	};
-
-Example with all dmas:
-	timer@40010000 {
-		...
-		dmas = <&dmamux1 11 0x400 0x0>,
-		       <&dmamux1 12 0x400 0x0>,
-		       <&dmamux1 13 0x400 0x0>,
-		       <&dmamux1 14 0x400 0x0>,
-		       <&dmamux1 15 0x400 0x0>,
-		       <&dmamux1 16 0x400 0x0>,
-		       <&dmamux1 17 0x400 0x0>;
-		dma-names = "ch1", "ch2", "ch3", "ch4", "up", "trig", "com";
-		...
-		child nodes...
-	};
diff --git a/Documentation/devicetree/bindings/mfd/sun6i-prcm.txt b/Documentation/devicetree/bindings/mfd/sun6i-prcm.txt
deleted file mode 100644
index daa091c2e67b..000000000000
--- a/Documentation/devicetree/bindings/mfd/sun6i-prcm.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-* Allwinner PRCM (Power/Reset/Clock Management) Multi-Functional Device
-
-PRCM is an MFD device exposing several Power Management related devices
-(like clks and reset controllers).
-
-Required properties:
- - compatible: "allwinner,sun6i-a31-prcm" or "allwinner,sun8i-a23-prcm"
- - reg: The PRCM registers range
-
-The prcm node may contain several subdevices definitions:
- - see Documentation/devicetree/bindings/clock/sunxi.txt for clock devices
- - see Documentation/devicetree/bindings/reset/allwinner,sunxi-clock-reset.txt for reset
-   controller devices
-
-
-Example:
-
-	prcm: prcm@1f01400 {
-		compatible = "allwinner,sun6i-a31-prcm";
-		reg = <0x01f01400 0x200>;
-
-		/* Put subdevices here */
-		ar100: ar100_clk {
-			compatible = "allwinner,sun6i-a31-ar100-clk";
-			#clock-cells = <0>;
-			clocks = <&osc32k>, <&osc24M>, <&pll6>, <&pll6>;
-		};
-
-		ahb0: ahb0_clk {
-			compatible = "fixed-factor-clock";
-			#clock-cells = <0>;
-			clock-div = <1>;
-			clock-mult = <1>;
-			clocks = <&ar100_div>;
-			clock-output-names = "ahb0";
-		};
-
-		apb0: apb0_clk {
-			compatible = "allwinner,sun6i-a31-apb0-clk";
-			#clock-cells = <0>;
-			clocks = <&ahb0>;
-			clock-output-names = "apb0";
-		};
-
-		apb0_gates: apb0_gates_clk {
-			compatible = "allwinner,sun6i-a31-apb0-gates-clk";
-			#clock-cells = <1>;
-			clocks = <&apb0>;
-			clock-output-names = "apb0_pio", "apb0_ir",
-					"apb0_timer01", "apb0_p2wi",
-					"apb0_uart", "apb0_1wire",
-					"apb0_i2c";
-		};
-
-		apb0_rst: apb0_rst {
-			compatible = "allwinner,sun6i-a31-clock-reset";
-			#reset-cells = <1>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/mfd/syscon.txt b/Documentation/devicetree/bindings/mfd/syscon.txt
deleted file mode 100644
index 25d9e9c2fd53..000000000000
--- a/Documentation/devicetree/bindings/mfd/syscon.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-* System Controller Registers R/W driver
-
-System controller node represents a register region containing a set
-of miscellaneous registers. The registers are not cohesive enough to
-represent as any specific type of device. The typical use-case is for
-some other node's driver, or platform-specific code, to acquire a
-reference to the syscon node (e.g. by phandle, node path, or search
-using a specific compatible value), interrogate the node (or associated
-OS driver) to determine the location of the registers, and access the
-registers directly.
-
-Required properties:
-- compatible: Should contain "syscon".
-- reg: the register region can be accessed from syscon
-
-Optional property:
-- reg-io-width: the size (in bytes) of the IO accesses that should be
-  performed on the device.
-- hwlocks: reference to a phandle of a hardware spinlock provider node.
-
-Examples:
-gpr: iomuxc-gpr@20e0000 {
-	compatible = "fsl,imx6q-iomuxc-gpr", "syscon";
-	reg = <0x020e0000 0x38>;
-	hwlocks = <&hwlock1 1>;
-};
-
-hwlock1: hwspinlock@40500000 {
-	...
-	reg = <0x40500000 0x1000>;
-	#hwlock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/mfd/syscon.yaml b/Documentation/devicetree/bindings/mfd/syscon.yaml
new file mode 100644
index 000000000000..39375e4313d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/syscon.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/syscon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: System Controller Registers R/W Device Tree Bindings
+
+description: |
+  System controller node represents a register region containing a set
+  of miscellaneous registers. The registers are not cohesive enough to
+  represent as any specific type of device. The typical use-case is
+  for some other node's driver, or platform-specific code, to acquire
+  a reference to the syscon node (e.g. by phandle, node path, or
+  search using a specific compatible value), interrogate the node (or
+  associated OS driver) to determine the location of the registers,
+  and access the registers directly.
+
+maintainers:
+  - Lee Jones <lee.jones@linaro.org>
+
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - syscon
+
+  required:
+    - compatible
+
+properties:
+  compatible:
+    anyOf:
+      - items:
+        - enum:
+          - allwinner,sun8i-a83t-system-controller
+          - allwinner,sun8i-h3-system-controller
+          - allwinner,sun8i-v3s-system-controller
+          - allwinner,sun50i-a64-system-controller
+
+        - const: syscon
+
+      - contains:
+          const: syscon
+        additionalItems: true
+
+  reg:
+    maxItems: 1
+
+  reg-io-width:
+    description: |
+      The size (in bytes) of the IO accesses that should be performed
+      on the device.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum: [ 1, 2, 4, 8 ]
+
+  hwlocks:
+    maxItems: 1
+    description:
+      Reference to a phandle of a hardware spinlock provider node.
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    syscon: syscon@1c00000 {
+        compatible = "allwinner,sun8i-h3-system-controller", "syscon";
+        reg = <0x01c00000 0x1000>;
+    };
+
+  - |
+    gpr: iomuxc-gpr@20e0000 {
+        compatible = "fsl,imx6q-iomuxc-gpr", "syscon";
+        reg = <0x020e0000 0x38>;
+        hwlocks = <&hwlock1 1>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/mfd/xylon,logicvc.yaml b/Documentation/devicetree/bindings/mfd/xylon,logicvc.yaml
new file mode 100644
index 000000000000..abc9937506e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/xylon,logicvc.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 Bootlin
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/mfd/xylon,logicvc.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Xylon LogiCVC multi-function device
+
+maintainers:
+  - Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+
+description: |
+  The LogiCVC is a display controller that also contains a GPIO controller.
+  As a result, a multi-function device is exposed as parent of the display
+  and GPIO blocks.
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - xylon,logicvc-3.02.a
+      - const: syscon
+      - const: simple-mfd
+
+  reg:
+    maxItems: 1
+
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - xylon,logicvc-3.02.a
+
+  required:
+    - compatible
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    logicvc: logicvc@43c00000 {
+      compatible = "xylon,logicvc-3.02.a", "syscon", "simple-mfd";
+      reg = <0x43c00000 0x6000>;
+      #address-cells = <1>;
+      #size-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/mips/ralink.txt b/Documentation/devicetree/bindings/mips/ralink.txt
index a16e8d7fe56c..8cc0ab41578c 100644
--- a/Documentation/devicetree/bindings/mips/ralink.txt
+++ b/Documentation/devicetree/bindings/mips/ralink.txt
@@ -16,3 +16,17 @@ value must be one of the following values:
   ralink,mt7620a-soc
   ralink,mt7620n-soc
   ralink,mt7628a-soc
+  ralink,mt7688a-soc
+
+2. Boards
+
+GARDENA smart Gateway (MT7688)
+
+This board is based on the MediaTek MT7688 and equipped with 128 MiB
+of DDR and 8 MiB of flash (SPI NOR) and additional 128MiB SPI NAND
+storage.
+
+------------------------------
+Required root node properties:
+- compatible = "gardena,smart-gateway-mt7688", "ralink,mt7688a-soc",
+		"ralink,mt7628a-soc";
diff --git a/Documentation/devicetree/bindings/misc/allwinner,syscon.txt b/Documentation/devicetree/bindings/misc/allwinner,syscon.txt
deleted file mode 100644
index 31494a24fe69..000000000000
--- a/Documentation/devicetree/bindings/misc/allwinner,syscon.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-* Allwinner sun8i system controller
-
-This file describes the bindings for the system controller present in
-Allwinner SoC H3, A83T and A64.
-The principal function of this syscon is to control EMAC PHY choice and
-config.
-
-Required properties for the system controller:
-- reg: address and length of the register for the device.
-- compatible: should be "syscon" and one of the following string:
-		"allwinner,sun8i-h3-system-controller"
-		"allwinner,sun8i-v3s-system-controller"
-		"allwinner,sun50i-a64-system-controller"
-		"allwinner,sun8i-a83t-system-controller"
-
-Example:
-syscon: syscon@1c00000 {
-	compatible = "allwinner,sun8i-h3-system-controller", "syscon";
-	reg = <0x01c00000 0x1000>;
-};
diff --git a/Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml b/Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml
index d2d4308596b8..e82c9a07b6fb 100644
--- a/Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml
+++ b/Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml
@@ -11,7 +11,7 @@ allOf:
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#address-cells": true
@@ -85,6 +85,8 @@ required:
   - clocks
   - clock-names
 
+unevaluatedProperties: false
+
 examples:
   - |
     mmc0: mmc@1c0f000 {
@@ -97,8 +99,4 @@ examples:
         cd-gpios = <&pio 7 1 0>;
     };
 
-# FIXME: We should set it, but it would report all the generic
-# properties as additional properties.
-# additionalProperties: false
-
 ...
diff --git a/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt b/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
index 7ca0aa7ccc0b..428685eb2ded 100644
--- a/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
+++ b/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
@@ -15,10 +15,15 @@ Required Properties:
     - "arasan,sdhci-5.1": generic Arasan SDHCI 5.1 PHY
     - "rockchip,rk3399-sdhci-5.1", "arasan,sdhci-5.1": rk3399 eMMC PHY
       For this device it is strongly suggested to include arasan,soc-ctl-syscon.
+    - "xlnx,zynqmp-8.9a": ZynqMP SDHCI 8.9a PHY
+      For this device it is strongly suggested to include clock-output-names and
+      #clock-cells.
     - "ti,am654-sdhci-5.1", "arasan,sdhci-5.1": TI AM654 MMC PHY
 	Note: This binding has been deprecated and moved to [5].
     - "intel,lgm-sdhci-5.1-emmc", "arasan,sdhci-5.1": Intel LGM eMMC PHY
       For this device it is strongly suggested to include arasan,soc-ctl-syscon.
+    - "intel,lgm-sdhci-5.1-sdxc", "arasan,sdhci-5.1": Intel LGM SDXC PHY
+      For this device it is strongly suggested to include arasan,soc-ctl-syscon.
 
   [5] Documentation/devicetree/bindings/mmc/sdhci-am654.txt
 
@@ -38,15 +43,19 @@ Optional Properties:
   - clock-output-names: If specified, this will be the name of the card clock
     which will be exposed by this device.  Required if #clock-cells is
     specified.
-  - #clock-cells: If specified this should be the value <0>.  With this property
-    in place we will export a clock representing the Card Clock.  This clock
-    is expected to be consumed by our PHY.  You must also specify
+  - #clock-cells: If specified this should be the value <0> or <1>. With this
+    property in place we will export one or two clocks representing the Card
+    Clock. These clocks are expected to be consumed by our PHY.
   - xlnx,fails-without-test-cd: when present, the controller doesn't work when
     the CD line is not connected properly, and the line is not connected
     properly. Test mode can be used to force the controller to function.
   - xlnx,int-clock-stable-broken: when present, the controller always reports
     that the internal clock is stable even when it is not.
 
+  - xlnx,mio-bank: When specified, this will indicate the MIO bank number in
+    which the command and data lines are configured. If not specified, driver
+    will assume this as 0.
+
 Example:
 	sdhci@e0100000 {
 		compatible = "arasan,sdhci-8.9a";
@@ -83,6 +92,18 @@ Example:
 		#clock-cells = <0>;
 	};
 
+	sdhci: mmc@ff160000 {
+		compatible = "xlnx,zynqmp-8.9a", "arasan,sdhci-8.9a";
+		interrupt-parent = <&gic>;
+		interrupts = <0 48 4>;
+		reg = <0x0 0xff160000 0x0 0x1000>;
+		clocks = <&clk200>, <&clk200>;
+		clock-names = "clk_xin", "clk_ahb";
+		clock-output-names = "clk_out_sd0", "clk_in_sd0";
+		#clock-cells = <1>;
+		clk-phase-sd-hs = <63>, <72>;
+	};
+
 	emmc: sdhci@ec700000 {
 		compatible = "intel,lgm-sdhci-5.1-emmc", "arasan,sdhci-5.1";
 		reg = <0xec700000 0x300>;
@@ -97,3 +118,18 @@ Example:
 		phy-names = "phy_arasan";
 		arasan,soc-ctl-syscon = <&sysconf>;
 	};
+
+	sdxc: sdhci@ec600000 {
+		compatible = "arasan,sdhci-5.1", "intel,lgm-sdhci-5.1-sdxc";
+		reg = <0xec600000 0x300>;
+		interrupt-parent = <&ioapic1>;
+		interrupts = <43 1>;
+		clocks = <&cgu0 LGM_CLK_SDIO>, <&cgu0 LGM_CLK_NGI>,
+			 <&cgu0 LGM_GCLK_SDXC>;
+		clock-names = "clk_xin", "clk_ahb", "gate";
+		clock-output-names = "sdxc_cardclock";
+		#clock-cells = <0>;
+		phys = <&sdxc_phy>;
+		phy-names = "phy_arasan";
+		arasan,soc-ctl-syscon = <&sysconf>;
+	};
diff --git a/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.txt b/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.txt
index 733b64a4d8eb..ae2074184528 100644
--- a/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.txt
+++ b/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.txt
@@ -11,28 +11,43 @@ Required properties:
 - compatible: should be one of the following
   - "brcm,bcm7425-sdhci"
   - "brcm,bcm7445-sdhci"
+  - "brcm,bcm7216-sdhci"
 
 Refer to clocks/clock-bindings.txt for generic clock consumer properties.
 
 Example:
 
-	sdhci@f03e0100 {
-		compatible = "brcm,bcm7425-sdhci";
-		reg = <0xf03e0000 0x100>;
-		interrupts = <0x0 0x26 0x0>;
-		sdhci,auto-cmd12;
-		clocks = <&sw_sdio>;
+	sdhci@84b0000 {
 		sd-uhs-sdr50;
 		sd-uhs-ddr50;
+		sd-uhs-sdr104;
+		sdhci,auto-cmd12;
+		compatible = "brcm,bcm7216-sdhci",
+			   "brcm,bcm7445-sdhci",
+			   "brcm,sdhci-brcmstb";
+		reg = <0x84b0000 0x260 0x84b0300 0x200>;
+		reg-names = "host", "cfg";
+		interrupts = <0x0 0x26 0x4>;
+		interrupt-names = "sdio0_0";
+		clocks = <&scmi_clk 245>;
+		clock-names = "sw_sdio";
 	};
 
-	sdhci@f03e0300 {
+	sdhci@84b1000 {
+		mmc-ddr-1_8v;
+		mmc-hs200-1_8v;
+		mmc-hs400-1_8v;
+		mmc-hs400-enhanced-strobe;
+		supports-cqe;
 		non-removable;
 		bus-width = <0x8>;
-		compatible = "brcm,bcm7425-sdhci";
-		reg = <0xf03e0200 0x100>;
-		interrupts = <0x0 0x27 0x0>;
-		sdhci,auto-cmd12;
-		clocks = <sw_sdio>;
-		mmc-hs200-1_8v;
+		compatible = "brcm,bcm7216-sdhci",
+			   "brcm,bcm7445-sdhci",
+			   "brcm,sdhci-brcmstb";
+		reg = <0x84b1000 0x260 0x84b1300 0x200>;
+		reg-names = "host", "cfg";
+		interrupts = <0x0 0x27 0x4>;
+		interrupt-names = "sdio1_0";
+		clocks = <&scmi_clk 245>;
+		clock-names = "sw_sdio";
 	};
diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
index f707b8bee304..0f97d711444e 100644
--- a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
+++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
@@ -18,6 +18,11 @@ Required properties:
 	       "fsl,imx6ull-usdhc"
 	       "fsl,imx7d-usdhc"
 	       "fsl,imx7ulp-usdhc"
+	       "fsl,imx8mq-usdhc"
+	       "fsl,imx8mm-usdhc"
+	       "fsl,imx8mn-usdhc"
+	       "fsl,imx8mp-usdhc"
+	       "fsl,imx8qm-usdhc"
 	       "fsl,imx8qxp-usdhc"
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/mmc/jz4740.txt b/Documentation/devicetree/bindings/mmc/jz4740.txt
index 8a6f87f13114..453d3b9d145d 100644
--- a/Documentation/devicetree/bindings/mmc/jz4740.txt
+++ b/Documentation/devicetree/bindings/mmc/jz4740.txt
@@ -1,14 +1,16 @@
-* Ingenic JZ47xx MMC controllers
+* Ingenic XBurst MMC controllers
 
 This file documents the device tree properties used for the MMC controller in
-Ingenic JZ4740/JZ4780 SoCs. These are in addition to the core MMC properties
-described in mmc.txt.
+Ingenic JZ4740/JZ4760/JZ4780/X1000 SoCs. These are in addition to the core MMC
+properties described in mmc.txt.
 
 Required properties:
 - compatible: Should be one of the following:
   - "ingenic,jz4740-mmc" for the JZ4740
   - "ingenic,jz4725b-mmc" for the JZ4725B
+  - "ingenic,jz4760-mmc" for the JZ4760
   - "ingenic,jz4780-mmc" for the JZ4780
+  - "ingenic,x1000-mmc" for the X1000
 - reg: Should contain the MMC controller registers location and length.
 - interrupts: Should contain the interrupt specifier of the MMC controller.
 - clocks: Clock for the MMC controller.
diff --git a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
index 080754e0ef35..3c0df4016a12 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
@@ -96,11 +96,10 @@ properties:
     description:
       When set, no physical write-protect line is present. This
       property should only be specified when the controller has a
-      dedicated write-protect detection logic. If a GPIO is always
-      used for the write-protect detection. If a GPIO is always used
+      dedicated write-protect detection logic. If a GPIO is always used
       for the write-protect detection logic, it is sufficient to not
       specify the wp-gpios property in the absence of a write-protect
-      line.
+      line. Not used in combination with eMMC or SDIO.
 
   wp-gpios:
     description:
@@ -333,6 +332,19 @@ patternProperties:
     required:
       - reg
 
+  "^clk-phase-(legacy|sd-hs|mmc-(hs|hs[24]00|ddr52)|uhs-(sdr(12|25|50|104)|ddr50))$":
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 2
+    maxItems: 2
+    items:
+      minimum: 0
+      maximum: 359
+      description:
+        Set the clock (phase) delays which are to be configured in the
+        controller while switching to particular speed mode. These values
+        are in pair of degrees.
+
 dependencies:
   cd-debounce-delay-ms: [ cd-gpios ]
   fixed-emmc-driver-type: [ non-removable ]
@@ -351,6 +363,7 @@ examples:
         keep-power-in-suspend;
         wakeup-source;
         mmc-pwrseq = <&sdhci0_pwrseq>;
+        clk-phase-sd-hs = <63>, <72>;
     };
 
   - |
diff --git a/Documentation/devicetree/bindings/mmc/owl-mmc.yaml b/Documentation/devicetree/bindings/mmc/owl-mmc.yaml
new file mode 100644
index 000000000000..12b40213426d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/owl-mmc.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/owl-mmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Actions Semi Owl SoCs SD/MMC/SDIO controller
+
+allOf:
+  - $ref: "mmc-controller.yaml"
+
+maintainers:
+  - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+properties:
+  compatible:
+    const: actions,owl-mmc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+
+  resets:
+    maxItems: 1
+
+  dmas:
+    maxItems: 1
+
+  dma-names:
+    const: mmc
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - resets
+  - dmas
+  - dma-names
+
+examples:
+  - |
+    mmc0: mmc@e0330000 {
+        compatible = "actions,owl-mmc";
+        reg = <0x0 0xe0330000 0x0 0x4000>;
+        interrupts = <0 42 4>;
+        clocks = <&cmu 56>;
+        resets = <&cmu 23>;
+        dmas = <&dma 2>;
+        dma-names = "mmc";
+        bus-width = <4>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/mmc/renesas,sdhi.txt b/Documentation/devicetree/bindings/mmc/renesas,sdhi.txt
index dd08d038a65c..e6cc47844207 100644
--- a/Documentation/devicetree/bindings/mmc/renesas,sdhi.txt
+++ b/Documentation/devicetree/bindings/mmc/renesas,sdhi.txt
@@ -11,6 +11,7 @@ Required properties:
 		"renesas,sdhi-r8a7744" - SDHI IP on R8A7744 SoC
 		"renesas,sdhi-r8a7745" - SDHI IP on R8A7745 SoC
 		"renesas,sdhi-r8a774a1" - SDHI IP on R8A774A1 SoC
+		"renesas,sdhi-r8a774b1" - SDHI IP on R8A774B1 SoC
 		"renesas,sdhi-r8a774c0" - SDHI IP on R8A774C0 SoC
 		"renesas,sdhi-r8a77470" - SDHI IP on R8A77470 SoC
 		"renesas,sdhi-mmc-r8a77470" - SDHI/MMC IP on R8A77470 SoC
@@ -22,7 +23,8 @@ Required properties:
 		"renesas,sdhi-r8a7793" - SDHI IP on R8A7793 SoC
 		"renesas,sdhi-r8a7794" - SDHI IP on R8A7794 SoC
 		"renesas,sdhi-r8a7795" - SDHI IP on R8A7795 SoC
-		"renesas,sdhi-r8a7796" - SDHI IP on R8A7796 SoC
+		"renesas,sdhi-r8a7796" - SDHI IP on R8A77960 SoC
+		"renesas,sdhi-r8a77961" - SDHI IP on R8A77961 SoC
 		"renesas,sdhi-r8a77965" - SDHI IP on R8A77965 SoC
 		"renesas,sdhi-r8a77970" - SDHI IP on R8A77970 SoC
 		"renesas,sdhi-r8a77980" - SDHI IP on R8A77980 SoC
diff --git a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
deleted file mode 100644
index 6f629b12bd69..000000000000
--- a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-* Rockchip specific extensions to the Synopsys Designware Mobile
-  Storage Host Controller
-
-The Synopsys designware mobile storage host controller is used to interface
-a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
-differences between the core Synopsys dw mshc controller properties described
-by synopsys-dw-mshc.txt and the properties used by the Rockchip specific
-extensions to the Synopsys Designware Mobile Storage Host Controller.
-
-Required Properties:
-
-* compatible: should be
-	- "rockchip,rk2928-dw-mshc": for Rockchip RK2928 and following,
-							before RK3288
-	- "rockchip,rk3288-dw-mshc": for Rockchip RK3288
-	- "rockchip,rv1108-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RV1108
-	- "rockchip,px30-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip PX30
-	- "rockchip,rk3036-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3036
-	- "rockchip,rk3228-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK322x
-	- "rockchip,rk3328-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3328
-	- "rockchip,rk3368-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3368
-	- "rockchip,rk3399-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3399
-
-Optional Properties:
-* clocks: from common clock binding: if ciu-drive and ciu-sample are
-  specified in clock-names, should contain handles to these clocks.
-
-* clock-names: Apart from the clock-names described in synopsys-dw-mshc.txt
-  two more clocks "ciu-drive" and "ciu-sample" are supported. They are used
-  to control the clock phases, "ciu-sample" is required for tuning high-
-  speed modes.
-
-* rockchip,default-sample-phase: The default phase to set ciu-sample at
-  probing, low speeds or in case where all phases work at tuning time.
-  If not specified 0 deg will be used.
-
-* rockchip,desired-num-phases: The desired number of times that the host
-  execute tuning when needed. If not specified, the host will do tuning
-  for 360 times, namely tuning for each degree.
-
-Example:
-
-	rkdwmmc0@12200000 {
-		compatible = "rockchip,rk3288-dw-mshc";
-		reg = <0x12200000 0x1000>;
-		interrupts = <0 75 0>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-	};
diff --git a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml
new file mode 100644
index 000000000000..89c3edd6a728
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml
@@ -0,0 +1,125 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/rockchip-dw-mshc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip designware mobile storage host controller device tree bindings
+
+description:
+  Rockchip uses the Synopsys designware mobile storage host controller
+  to interface a SoC with storage medium such as eMMC or SD/MMC cards.
+  This file documents the combined properties for the core Synopsys dw mshc
+  controller that are not already included in the synopsys-dw-mshc-common.yaml
+  file and the Rockchip specific extensions.
+
+allOf:
+  - $ref: "synopsys-dw-mshc-common.yaml#"
+
+maintainers:
+  - Heiko Stuebner <heiko@sntech.de>
+
+# Everything else is described in the common file
+properties:
+  compatible:
+    oneOf:
+      # for Rockchip RK2928 and before RK3288
+      - const: rockchip,rk2928-dw-mshc
+      # for Rockchip RK3288
+      - const: rockchip,rk3288-dw-mshc
+      - items:
+          - enum:
+            # for Rockchip PX30
+            - rockchip,px30-dw-mshc
+            # for Rockchip RK3036
+            - rockchip,rk3036-dw-mshc
+            # for Rockchip RK322x
+            - rockchip,rk3228-dw-mshc
+            # for Rockchip RK3308
+            - rockchip,rk3308-dw-mshc
+            # for Rockchip RK3328
+            - rockchip,rk3328-dw-mshc
+            # for Rockchip RK3368
+            - rockchip,rk3368-dw-mshc
+            # for Rockchip RK3399
+            - rockchip,rk3399-dw-mshc
+            # for Rockchip RV1108
+            - rockchip,rv1108-dw-mshc
+          - const: rockchip,rk3288-dw-mshc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 4
+    description:
+      Handle to "biu" and "ciu" clocks for the bus interface unit clock and
+      the card interface unit clock. If "ciu-drive" and "ciu-sample" are
+      specified in clock-names, it should also contain
+      handles to these clocks.
+
+  clock-names:
+    minItems: 2
+    items:
+      - const: biu
+      - const: ciu
+      - const: ciu-drive
+      - const: ciu-sample
+    description:
+      Apart from the clock-names "biu" and "ciu" two more clocks
+      "ciu-drive" and "ciu-sample" are supported. They are used
+      to control the clock phases, "ciu-sample" is required for tuning
+      high speed modes.
+
+  rockchip,default-sample-phase:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 360
+    default: 0
+    description:
+      The default phase to set "ciu-sample" at probing,
+      low speeds or in case where all phases work at tuning time.
+      If not specified 0 deg will be used.
+
+  rockchip,desired-num-phases:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 360
+    default: 360
+    description:
+      The desired number of times that the host execute tuning when needed.
+      If not specified, the host will do tuning for 360 times,
+      namely tuning for each degree.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    #include <dt-bindings/clock/rk3288-cru.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    sdmmc: mmc@ff0c0000 {
+      compatible = "rockchip,rk3288-dw-mshc";
+      reg = <0x0 0xff0c0000 0x0 0x4000>;
+      interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>,
+               <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>;
+      clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
+      resets = <&cru SRST_MMC0>;
+      reset-names = "reset";
+      fifo-depth = <0x100>;
+      max-frequency = <150000000>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-atmel.txt b/Documentation/devicetree/bindings/mmc/sdhci-atmel.txt
index 1b662d7171a0..69edfd4d3922 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-atmel.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-atmel.txt
@@ -5,17 +5,29 @@ Documentation/devicetree/bindings/mmc/mmc.txt and the properties used by the
 sdhci-of-at91 driver.
 
 Required properties:
-- compatible:		Must be "atmel,sama5d2-sdhci".
+- compatible:		Must be "atmel,sama5d2-sdhci" or "microchip,sam9x60-sdhci".
 - clocks:		Phandlers to the clocks.
-- clock-names:		Must be "hclock", "multclk", "baseclk";
+- clock-names:		Must be "hclock", "multclk", "baseclk" for
+			"atmel,sama5d2-sdhci".
+			Must be "hclock", "multclk" for "microchip,sam9x60-sdhci".
 
+Optional properties:
+- assigned-clocks:	The same with "multclk".
+- assigned-clock-rates	The rate of "multclk" in order to not rely on the
+			gck configuration set by previous components.
+- microchip,sdcal-inverted: when present, polarity on the SDCAL SoC pin is
+  inverted. The default polarity for this signal is described in the datasheet.
+  For instance on SAMA5D2, the pin is usually tied to the GND with a resistor
+  and a capacitor (see "SDMMC I/O Calibration" chapter).
 
 Example:
 
-sdmmc0: sdio-host@a0000000 {
+mmc0: sdio-host@a0000000 {
 	compatible = "atmel,sama5d2-sdhci";
 	reg = <0xa0000000 0x300>;
 	interrupts = <31 IRQ_TYPE_LEVEL_HIGH 0>;
 	clocks = <&sdmmc0_hclk>, <&sdmmc0_gclk>, <&main>;
 	clock-names = "hclock", "multclk", "baseclk";
+	assigned-clocks = <&sdmmc0_gclk>;
+	assigned-clock-rates = <480000000>;
 };
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-milbeaut.txt b/Documentation/devicetree/bindings/mmc/sdhci-milbeaut.txt
new file mode 100644
index 000000000000..627ee89c125b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-milbeaut.txt
@@ -0,0 +1,30 @@
+* SOCIONEXT Milbeaut SDHCI controller
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the sdhci_milbeaut driver.
+
+Required properties:
+- compatible: "socionext,milbeaut-m10v-sdhci-3.0"
+- clocks: Must contain an entry for each entry in clock-names. It is a
+  list of phandles and clock-specifier pairs.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names: Should contain the following two entries:
+	"iface" - clock used for sdhci interface
+	"core"  - core clock for sdhci controller
+
+Optional properties:
+- fujitsu,cmd-dat-delay-select: boolean property indicating that this host
+  requires the CMD_DAT_DELAY control to be enabled.
+
+Example:
+	sdhci3: mmc@1b010000 {
+		compatible = "socionext,milbeaut-m10v-sdhci-3.0";
+		reg = <0x1b010000 0x10000>;
+		interrupts = <0 265 0x4>;
+		voltage-ranges = <3300 3300>;
+		bus-width = <4>;
+		clocks = <&clk 7>, <&ahb_clk>;
+		clock-names = "core", "iface";
+		cap-sdio-irq;
+		fujitsu,cmd-dat-delay-select;
+	};
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
index da4edb146a98..7ee639b1af03 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
@@ -19,6 +19,7 @@ Required properties:
 		"qcom,msm8996-sdhci", "qcom,sdhci-msm-v4"
 		"qcom,sdm845-sdhci", "qcom,sdhci-msm-v5"
 		"qcom,qcs404-sdhci", "qcom,sdhci-msm-v5"
+		"qcom,sc7180-sdhci", "qcom,sdhci-msm-v5";
 	NOTE that some old device tree files may be floating around that only
 	have the string "qcom,sdhci-msm-v4" without the SoC compatible string
 	but doing that should be considered a deprecated practice.
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-omap.txt b/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
index 72c4dec7e1db..aeb615ef672a 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
@@ -7,6 +7,8 @@ For UHS devices which require tuning, the device tree should have a "cpu_thermal
 Required properties:
 - compatible: Should be "ti,dra7-sdhci" for DRA7 and DRA72 controllers
 	      Should be "ti,k2g-sdhci" for K2G
+	      Should be "ti,am335-sdhci" for am335x controllers
+	      Should be "ti,am437-sdhci" for am437x controllers
 - ti,hwmods: Must be "mmc<n>", <n> is controller instance starting 1
 	     (Not required for K2G).
 - pinctrl-names: Should be subset of "default", "hs", "sdr12", "sdr25", "sdr50",
@@ -15,6 +17,13 @@ Required properties:
 		 "hs200_1_8v",
 - pinctrl-<n> : Pinctrl states as described in bindings/pinctrl/pinctrl-bindings.txt
 
+Optional properties:
+- dmas:		List of DMA specifiers with the controller specific format as described
+		in the generic DMA client binding. A tx and rx specifier is required.
+- dma-names:	List of DMA request names. These strings correspond 1:1 with the
+		DMA specifiers listed in dmas. The string naming is to be "tx"
+		and "rx" for TX and RX DMA requests, respectively.
+
 Example:
 	mmc1: mmc@4809c000 {
 		compatible = "ti,dra7-sdhci";
@@ -22,4 +31,6 @@ Example:
 		ti,hwmods = "mmc1";
 		bus-width = <4>;
 		vmmc-supply = <&vmmc>; /* phandle to regulator node */
+		dmas = <&sdma 61 &sdma 62>;
+		dma-names = "tx", "rx";
 	};
diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc-common.yaml b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc-common.yaml
new file mode 100644
index 000000000000..890d47a87ac5
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc-common.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/synopsys-dw-mshc-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys Designware Mobile Storage Host Controller Common Properties
+
+allOf:
+  - $ref: "mmc-controller.yaml#"
+
+maintainers:
+  - Ulf Hansson <ulf.hansson@linaro.org>
+
+# Everything else is described in the common file
+properties:
+  resets:
+    maxItems: 1
+
+  reset-names:
+    const: reset
+
+  clock-frequency:
+    description:
+      Should be the frequency (in Hz) of the ciu clock.  If this
+      is specified and the ciu clock is specified then we'll try to set the ciu
+      clock to this at probe time.
+
+  fifo-depth:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      The maximum size of the tx/rx fifo's. If this property is not
+      specified, the default value of the fifo size is determined from the
+      controller registers.
+
+  card-detect-delay:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - default: 0
+    description:
+      Delay in milli-seconds before detecting card after card
+      insert event. The default value is 0.
+
+  data-addr:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Override fifo address with value provided by DT. The default FIFO reg
+      offset is assumed as 0x100 (version < 0x240A) and 0x200(version >= 0x240A)
+      by driver. If the controller does not follow this rule, please use
+      this property to set fifo address in device tree.
+
+  fifo-watermark-aligned:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Data done irq is expected if data length is less than
+      watermark in PIO mode. But fifo watermark is requested to be aligned
+      with data length in some SoC so that TX/RX irq can be generated with
+      data done irq. Add this watermark quirk to mark this requirement and
+      force fifo watermark setting accordingly.
+
+  dmas:
+    maxItems: 1
+
+  dma-names:
+    const: rx-tx
diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
deleted file mode 100644
index 7e5e427a22ce..000000000000
--- a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
+++ /dev/null
@@ -1,141 +0,0 @@
-* Synopsys Designware Mobile Storage Host Controller
-
-The Synopsys designware mobile storage host controller is used to interface
-a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
-differences between the core mmc properties described by mmc.txt and the
-properties used by the Synopsys Designware Mobile Storage Host Controller.
-
-Required Properties:
-
-* compatible: should be
-	- snps,dw-mshc: for controllers compliant with synopsys dw-mshc.
-* #address-cells: should be 1.
-* #size-cells: should be 0.
-
-# Slots (DEPRECATED): The slot specific information are contained within
-  child-nodes with each child-node representing a supported slot. There should
-  be atleast one child node representing a card slot. The name of the child node
-  representing the slot is recommended to be slot@n where n is the unique number
-  of the slot connected to the controller. The following are optional properties
-  which can be included in the slot child node.
-
-	* reg: specifies the physical slot number. The valid values of this
-	  property is 0 to (num-slots -1), where num-slots is the value
-	  specified by the num-slots property.
-
-	* bus-width: as documented in mmc core bindings.
-
-	* wp-gpios: specifies the write protect gpio line. The format of the
-	  gpio specifier depends on the gpio controller. If a GPIO is not used
-	  for write-protect, this property is optional.
-
-	* disable-wp: If the wp-gpios property isn't present then (by default)
-	  we'd assume that the write protect is hooked up directly to the
-	  controller's special purpose write protect line (accessible via
-	  the WRTPRT register).  However, it's possible that we simply don't
-	  want write protect.  In that case specify 'disable-wp'.
-	  NOTE: This property is not required for slots known to always
-	  connect to eMMC or SDIO cards.
-
-Optional properties:
-
-* resets: phandle + reset specifier pair, intended to represent hardware
-  reset signal present internally in some host controller IC designs.
-  See Documentation/devicetree/bindings/reset/reset.txt for details.
-
-* reset-names: request name for using "resets" property. Must be "reset".
-	(It will be used together with "resets" property.)
-
-* clocks: from common clock binding: handle to biu and ciu clocks for the
-  bus interface unit clock and the card interface unit clock.
-
-* clock-names: from common clock binding: Shall be "biu" and "ciu".
-  If the biu clock is missing we'll simply skip enabling it.  If the
-  ciu clock is missing we'll just assume that the clock is running at
-  clock-frequency.  It is an error to omit both the ciu clock and the
-  clock-frequency.
-
-* clock-frequency: should be the frequency (in Hz) of the ciu clock.  If this
-  is specified and the ciu clock is specified then we'll try to set the ciu
-  clock to this at probe time.
-
-* fifo-depth: The maximum size of the tx/rx fifo's. If this property is not
-  specified, the default value of the fifo size is determined from the
-  controller registers.
-
-* card-detect-delay: Delay in milli-seconds before detecting card after card
-  insert event. The default value is 0.
-
-* data-addr: Override fifo address with value provided by DT. The default FIFO reg
-  offset is assumed as 0x100 (version < 0x240A) and 0x200(version >= 0x240A) by
-  driver. If the controller does not follow this rule, please use this property
-  to set fifo address in device tree.
-
-* fifo-watermark-aligned: Data done irq is expected if data length is less than
-  watermark in PIO mode. But fifo watermark is requested to be aligned with data
-  length in some SoC so that TX/RX irq can be generated with data done irq. Add this
-  watermark quirk to mark this requirement and force fifo watermark setting
-  accordingly.
-
-* vmmc-supply: The phandle to the regulator to use for vmmc.  If this is
-  specified we'll defer probe until we can find this regulator.
-
-* dmas: List of DMA specifiers with the controller specific format as described
-  in the generic DMA client binding. Refer to dma.txt for details.
-
-* dma-names: request names for generic DMA client binding. Must be "rx-tx".
-  Refer to dma.txt for details.
-
-Aliases:
-
-- All the MSHC controller nodes should be represented in the aliases node using
-  the following format 'mshc{n}' where n is a unique number for the alias.
-
-Example:
-
-The MSHC controller node can be split into two portions, SoC specific and
-board specific portions as listed below.
-
-	dwmmc0@12200000 {
-		compatible = "snps,dw-mshc";
-		clocks = <&clock 351>, <&clock 132>;
-		clock-names = "biu", "ciu";
-		reg = <0x12200000 0x1000>;
-		interrupts = <0 75 0>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		data-addr = <0x200>;
-		fifo-watermark-aligned;
-		resets = <&rst 20>;
-		reset-names = "reset";
-	};
-
-[board specific internal DMA resources]
-
-	dwmmc0@12200000 {
-		clock-frequency = <400000000>;
-		clock-freq-min-max = <400000 200000000>;
-		broken-cd;
-		fifo-depth = <0x80>;
-		card-detect-delay = <200>;
-		vmmc-supply = <&buck8>;
-		bus-width = <8>;
-		cap-mmc-highspeed;
-		cap-sd-highspeed;
-	};
-
-[board specific generic DMA request binding]
-
-	dwmmc0@12200000 {
-		clock-frequency = <400000000>;
-		clock-freq-min-max = <400000 200000000>;
-		broken-cd;
-		fifo-depth = <0x80>;
-		card-detect-delay = <200>;
-		vmmc-supply = <&buck8>;
-		bus-width = <8>;
-		cap-mmc-highspeed;
-		cap-sd-highspeed;
-		dmas = <&pdma 12>;
-		dma-names = "rx-tx";
-	};
diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml
new file mode 100644
index 000000000000..05f9f36dcb75
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/synopsys-dw-mshc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys Designware Mobile Storage Host Controller Binding
+
+allOf:
+  - $ref: "synopsys-dw-mshc-common.yaml#"
+
+maintainers:
+  - Ulf Hansson <ulf.hansson@linaro.org>
+
+# Everything else is described in the common file
+properties:
+  compatible:
+    const: snps,dw-mshc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 2
+    description:
+      Handle to "biu" and "ciu" clocks for the
+      bus interface unit clock and the card interface unit clock.
+
+  clock-names:
+    items:
+      - const: biu
+      - const: ciu
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    mmc@12200000 {
+      compatible = "snps,dw-mshc";
+      reg = <0x12200000 0x1000>;
+      interrupts = <0 75 0>;
+      clocks = <&clock 351>, <&clock 132>;
+      clock-names = "biu", "ciu";
+      dmas = <&pdma 12>;
+      dma-names = "rx-tx";
+      resets = <&rst 20>;
+      reset-names = "reset";
+      vmmc-supply = <&buck8>;
+      #address-cells = <1>;
+      #size-cells = <0>;
+      broken-cd;
+      bus-width = <8>;
+      cap-mmc-highspeed;
+      cap-sd-highspeed;
+      card-detect-delay = <200>;
+      clock-freq-min-max = <400000 200000000>;
+      clock-frequency = <400000000>;
+      data-addr = <0x200>;
+      fifo-depth = <0x80>;
+      fifo-watermark-aligned;
+    };
diff --git a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
index b5b3cf5b1ac2..5d3fa412aabd 100644
--- a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
@@ -11,7 +11,7 @@ allOf:
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#address-cells": true
diff --git a/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt b/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt
new file mode 100644
index 000000000000..f3893c4d3c6a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt
@@ -0,0 +1,53 @@
+* Cadence NAND controller
+
+Required properties:
+  - compatible : "cdns,hp-nfc"
+  - reg : Contains two entries, each of which is a tuple consisting of a
+	  physical address and length. The first entry is the address and
+	  length of the controller register set. The second entry is the
+	  address and length of the Slave DMA data port.
+  - reg-names: should contain "reg" and "sdma"
+  - #address-cells: should be 1. The cell encodes the chip select connection.
+  - #size-cells : should be 0.
+  - interrupts : The interrupt number.
+  - clocks: phandle of the controller core clock (nf_clk).
+
+Optional properties:
+  - dmas: shall reference DMA channel associated to the NAND controller
+  - cdns,board-delay-ps : Estimated Board delay. The value includes the total
+    round trip delay for the signals and is used for deciding on values
+    associated with data read capture. The example formula for SDR mode is
+    the following:
+    board delay = RE#PAD delay + PCB trace to device + PCB trace from device
+    + DQ PAD delay
+
+Child nodes represent the available NAND chips.
+
+Required properties of NAND chips:
+  - reg: shall contain the native Chip Select ids from 0 to max supported by
+    the cadence nand flash controller
+
+See Documentation/devicetree/bindings/mtd/nand.txt for more details on
+generic bindings.
+
+Example:
+
+nand_controller: nand-controller@60000000 {
+	  compatible = "cdns,hp-nfc";
+	  #address-cells = <1>;
+	  #size-cells = <0>;
+	  reg = <0x60000000 0x10000>, <0x80000000 0x10000>;
+	  reg-names = "reg", "sdma";
+	  clocks = <&nf_clk>;
+	  cdns,board-delay-ps = <4830>;
+	  interrupts = <2 0>;
+	  nand@0 {
+	      reg = <0>;
+	      label = "nand-1";
+	  };
+	  nand@1 {
+	      reg = <1>;
+	      label = "nand-2";
+	  };
+
+};
diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt
index b32aed1db46d..98916a84bbf6 100644
--- a/Documentation/devicetree/bindings/mtd/denali-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt
@@ -14,6 +14,11 @@ Required properties:
     interface clock, and the ECC circuit clock.
   - clock-names: should contain "nand", "nand_x", "ecc"
 
+Optional properties:
+  - resets: may contain phandles to the controller core reset, the register
+    reset
+  - reset-names: may contain "nand", "reg"
+
 Sub-nodes:
   Sub-nodes represent available NAND chips.
 
@@ -46,6 +51,8 @@ nand: nand@ff900000 {
 	reg-names = "nand_data", "denali_reg";
 	clocks = <&nand_clk>, <&nand_x_clk>, <&nand_ecc_clk>;
 	clock-names = "nand", "nand_x", "ecc";
+	resets = <&nand_rst>, <&nand_reg_rst>;
+	reset-names = "nand", "reg";
 	interrupts = <0 144 4>;
 
 	nand@0 {
diff --git a/Documentation/devicetree/bindings/mtd/intel,ixp4xx-flash.txt b/Documentation/devicetree/bindings/mtd/intel,ixp4xx-flash.txt
new file mode 100644
index 000000000000..4bdcb92ae381
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/intel,ixp4xx-flash.txt
@@ -0,0 +1,22 @@
+Flash device on Intel IXP4xx SoC
+
+This flash is regular CFI compatible (Intel or AMD extended) flash chips with
+specific big-endian or mixed-endian memory access pattern.
+
+Required properties:
+- compatible : must be "intel,ixp4xx-flash", "cfi-flash";
+- reg : memory address for the flash chip
+- bank-width : width in bytes of flash interface, should be <2>
+
+For the rest of the properties, see mtd-physmap.txt.
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Example:
+
+flash@50000000 {
+	compatible = "intel,ixp4xx-flash", "cfi-flash";
+	reg = <0x50000000 0x01000000>;
+	bank-width = <2>;
+};
diff --git a/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
new file mode 100644
index 000000000000..b059267f6d20
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mtd/st,stm32-fmc2-nand.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics Flexible Memory Controller 2 (FMC2) Bindings
+
+maintainers:
+  - Christophe Kerello <christophe.kerello@st.com>
+
+allOf:
+  - $ref: "nand-controller.yaml#"
+
+properties:
+  compatible:
+    const: st,stm32mp15-fmc2
+
+  reg:
+    items:
+      - description: Registers
+      - description: Chip select 0 data
+      - description: Chip select 0 command
+      - description: Chip select 0 address space
+      - description: Chip select 1 data
+      - description: Chip select 1 command
+      - description: Chip select 1 address space
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  dmas:
+    items:
+      - description: tx DMA channel
+      - description: rx DMA channel
+      - description: ecc DMA channel
+
+  dma-names:
+    items:
+      - const: tx
+      - const: rx
+      - const: ecc
+
+patternProperties:
+  "^nand@[a-f0-9]$":
+    type: object
+    properties:
+      nand-ecc-step-size:
+        const: 512
+
+      nand-ecc-strength:
+        enum: [1, 4 ,8 ]
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    nand-controller@58002000 {
+      compatible = "st,stm32mp15-fmc2";
+      reg = <0x58002000 0x1000>,
+            <0x80000000 0x1000>,
+            <0x88010000 0x1000>,
+            <0x88020000 0x1000>,
+            <0x81000000 0x1000>,
+            <0x89010000 0x1000>,
+            <0x89020000 0x1000>;
+            interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+            dmas = <&mdma1 20 0x10 0x12000a02 0x0 0x0>,
+                   <&mdma1 20 0x10 0x12000a08 0x0 0x0>,
+                   <&mdma1 21 0x10 0x12000a0a 0x0 0x0>;
+            dma-names = "tx", "rx", "ecc";
+            clocks = <&rcc FMC_K>;
+            resets = <&rcc FMC_R>;
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      nand@0 {
+        reg = <0>;
+        nand-on-flash-bbt;
+        #address-cells = <1>;
+        #size-cells = <1>;
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/mtd/stm32-fmc2-nand.txt b/Documentation/devicetree/bindings/mtd/stm32-fmc2-nand.txt
deleted file mode 100644
index e55895e8dae4..000000000000
--- a/Documentation/devicetree/bindings/mtd/stm32-fmc2-nand.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-STMicroelectronics Flexible Memory Controller 2 (FMC2)
-NAND Interface
-
-Required properties:
-- compatible: Should be one of:
-              * st,stm32mp15-fmc2
-- reg: NAND flash controller memory areas.
-       First region contains the register location.
-       Regions 2 to 4 respectively contain the data, command,
-       and address space for CS0.
-       Regions 5 to 7 contain the same areas for CS1.
-- interrupts: The interrupt number
-- pinctrl-0: Standard Pinctrl phandle (see: pinctrl/pinctrl-bindings.txt)
-- clocks: The clock needed by the NAND flash controller
-
-Optional properties:
-- resets: Reference to a reset controller asserting the FMC controller
-- dmas: DMA specifiers (see: dma/stm32-mdma.txt)
-- dma-names: Must be "tx", "rx" and "ecc"
-
-* NAND device bindings:
-
-Required properties:
-- reg: describes the CS lines assigned to the NAND device.
-
-Optional properties:
-- nand-on-flash-bbt: see nand-controller.yaml
-- nand-ecc-strength: see nand-controller.yaml
-- nand-ecc-step-size: see nand-controller.yaml
-
-The following ECC strength and step size are currently supported:
- - nand-ecc-strength = <1>, nand-ecc-step-size = <512> (Hamming)
- - nand-ecc-strength = <4>, nand-ecc-step-size = <512> (BCH4)
- - nand-ecc-strength = <8>, nand-ecc-step-size = <512> (BCH8) (default)
-
-Example:
-
-	fmc: nand-controller@58002000 {
-		compatible = "st,stm32mp15-fmc2";
-		reg = <0x58002000 0x1000>,
-		      <0x80000000 0x1000>,
-		      <0x88010000 0x1000>,
-		      <0x88020000 0x1000>,
-		      <0x81000000 0x1000>,
-		      <0x89010000 0x1000>,
-		      <0x89020000 0x1000>;
-		interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&rcc FMC_K>;
-		resets = <&rcc FMC_R>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&fmc_pins_a>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		nand@0 {
-			reg = <0>;
-			nand-on-flash-bbt;
-			#address-cells = <1>;
-			#size-cells = <1>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
index 792196bf4abd..8d8560a67abf 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
@@ -11,7 +11,7 @@ allOf:
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   compatible:
@@ -38,6 +38,8 @@ required:
   - phy-handle
   - allwinner,sram
 
+unevaluatedProperties: false
+
 examples:
   - |
     emac: ethernet@1c0b000 {
@@ -49,8 +51,4 @@ examples:
         allwinner,sram = <&emac_sram 1>;
     };
 
-# FIXME: We should set it, but it would report all the generic
-# properties as additional properties.
-# additionalProperties: false
-
 ...
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
index df24d9d969f7..767193ec1d32 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 MDIO Controller Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 allOf:
   - $ref: "mdio.yaml#"
@@ -49,6 +49,8 @@ required:
   - compatible
   - reg
 
+unevaluatedProperties: false
+
 examples:
   - |
     mdio@1c0b080 {
@@ -63,8 +65,4 @@ examples:
         };
     };
 
-# FIXME: We should set it, but it would report all the generic
-# properties as additional properties.
-# additionalProperties: false
-
 ...
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
index ef446ae166f3..703d0d886884 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
@@ -11,7 +11,7 @@ allOf:
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   compatible:
@@ -49,6 +49,8 @@ required:
   - clock-names
   - phy-mode
 
+unevaluatedProperties: false
+
 examples:
   - |
     gmac: ethernet@1c50000 {
@@ -61,8 +63,4 @@ examples:
         phy-mode = "mii";
     };
 
-# FIXME: We should set it, but it would report all the generic
-# properties as additional properties.
-# additionalProperties: false
-
 ...
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
index 3fb0714e761e..db36b4d86484 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
@@ -8,7 +8,7 @@ title: Allwinner A83t EMAC Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   compatible:
@@ -184,6 +184,8 @@ allOf:
             - mdio-parent-bus
             - mdio@1
 
+unevaluatedProperties: false
+
 examples:
   - |
     ethernet@1c0b000 {
@@ -314,8 +316,4 @@ examples:
         };
     };
 
-# FIXME: We should set it, but it would report all the generic
-# properties as additional properties.
-# additionalProperties: false
-
 ...
diff --git a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
index b7336b9d6a3c..48a7f916c5e4 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
+++ b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
@@ -44,6 +44,12 @@ Optional properties:
   Admission Control Block supports reporting the number of packets in-flight in a
   switch queue
 
+- resets: a single phandle and reset identifier pair. See
+  Documentation/devicetree/binding/reset/reset.txt for details.
+
+- reset-names: If the "reset" property is specified, this property should have
+  the value "switch" to denote the switch reset line.
+
 Port subnodes:
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt b/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
index 3956af1d30f3..33a0d67e4ce5 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
+++ b/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
@@ -2,7 +2,7 @@
 
 Required properties:
 - compatible: should contain one of "brcm,genet-v1", "brcm,genet-v2",
-  "brcm,genet-v3", "brcm,genet-v4", "brcm,genet-v5".
+  "brcm,genet-v3", "brcm,genet-v4", "brcm,genet-v5", "brcm,bcm2711-genet-v5".
 - reg: address and length of the register set for the device
 - interrupts and/or interrupts-extended: must be two cells, the first cell
   is the general purpose interrupt line, while the second cell is the
diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt b/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt
index 4fa00e2eafcf..dd258674633c 100644
--- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt
+++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt
@@ -11,16 +11,21 @@ Required properties:
 
  - compatible: should contain one of the following:
    * "brcm,bcm20702a1"
+   * "brcm,bcm4329-bt"
    * "brcm,bcm4330-bt"
    * "brcm,bcm43438-bt"
    * "brcm,bcm4345c5"
+   * "brcm,bcm43540-bt"
+   * "brcm,bcm4335a0"
 
 Optional properties:
 
  - max-speed: see Documentation/devicetree/bindings/serial/slave-device.txt
  - shutdown-gpios: GPIO specifier, used to enable the BT module
  - device-wakeup-gpios: GPIO specifier, used to wakeup the controller
- - host-wakeup-gpios: GPIO specifier, used to wakeup the host processor
+ - host-wakeup-gpios: GPIO specifier, used to wakeup the host processor.
+                      deprecated, replaced by interrupts and
+                      "host-wakeup" interrupt-names
  - clocks: 1 or 2 clocks as defined in clock-names below, in that order
  - clock-names: names for clock inputs, matching the clocks given
    - "extclk": deprecated, replaced by "txco"
@@ -28,7 +33,14 @@ Optional properties:
    - "lpo": external low power 32.768 kHz clock
  - vbat-supply: phandle to regulator supply for VBAT
  - vddio-supply: phandle to regulator supply for VDDIO
-
+ - brcm,bt-pcm-int-params: configure PCM parameters via a 5-byte array
+    - sco-routing: 0 = PCM, 1 = Transport, 2 = Codec, 3 = I2S
+    - pcm-interface-rate: 128KBps, 256KBps, 512KBps, 1024KBps, 2048KBps
+    - pcm-frame-type: short, long
+    - pcm-sync-mode: slave, master
+    - pcm-clock-mode: slave, master
+ - interrupts: must be one, used to wakeup the host processor
+ - interrupt-names: must be "host-wakeup"
 
 Example:
 
@@ -39,5 +51,6 @@ Example:
        bluetooth {
                compatible = "brcm,bcm43438-bt";
                max-speed = <921600>;
+               brcm,bt-pcm-int-params = [01 02 00 01 01];
        };
 };
diff --git a/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml b/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml
new file mode 100644
index 000000000000..a95960ee3feb
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/can/allwinner,sun4i-a10-can.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 CAN Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - const: allwinner,sun7i-a20-can
+          - const: allwinner,sun4i-a10-can
+      - const: allwinner,sun4i-a10-can
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/sun7i-a20-ccu.h>
+
+    can0: can@1c2bc00 {
+        compatible = "allwinner,sun7i-a20-can",
+                     "allwinner,sun4i-a10-can";
+        reg = <0x01c2bc00 0x400>;
+        interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_APB1_CAN>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/net/can/sun4i_can.txt b/Documentation/devicetree/bindings/net/can/sun4i_can.txt
deleted file mode 100644
index f69845e6feaf..000000000000
--- a/Documentation/devicetree/bindings/net/can/sun4i_can.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Allwinner A10/A20 CAN controller Device Tree Bindings
------------------------------------------------------
-
-Required properties:
-- compatible: "allwinner,sun4i-a10-can"
-- reg: physical base address and size of the Allwinner A10/A20 CAN register map.
-- interrupts: interrupt specifier for the sole interrupt.
-- clock: phandle and clock specifier.
-
-Example
--------
-
-SoC common .dtsi file:
-
-	can0_pins_a: can0@0 {
-		allwinner,pins = "PH20","PH21";
-		allwinner,function = "can";
-		allwinner,drive = <0>;
-		allwinner,pull = <0>;
-	};
-...
-	can0: can@1c2bc00 {
-		compatible = "allwinner,sun4i-a10-can";
-		reg = <0x01c2bc00 0x400>;
-		interrupts = <0 26 4>;
-		clocks = <&apb1_gates 4>;
-		status = "disabled";
-	};
-
-Board specific .dts file:
-
-	can0: can@1c2bc00 {
-		pinctrl-names = "default";
-		pinctrl-0 = <&can0_pins_a>;
-		status = "okay";
-	};
diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
index 27e1b4cebfbd..6bdcc3f84bd3 100644
--- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
+++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
@@ -10,7 +10,6 @@ Required properties:
 	- #size-cells: 0
 	- spi-max-frequency: Maximum frequency of the SPI bus the chip can
 			     operate at should be less than or equal to 18 MHz.
-	- device-wake-gpios: Wake up GPIO to wake up the TCAN device.
 	- interrupt-parent: the phandle to the interrupt controller which provides
                     the interrupt.
 	- interrupts: interrupt specification for data-ready.
@@ -23,6 +22,7 @@ Optional properties:
 		       reset.
 	- device-state-gpios: Input GPIO that indicates if the device is in
 			      a sleep state or if the device is active.
+	- device-wake-gpios: Wake up GPIO to wake up the TCAN device.
 
 Example:
 tcan4x5x: tcan4x5x@0 {
@@ -36,5 +36,5 @@ tcan4x5x: tcan4x5x@0 {
 		interrupts = <14 GPIO_ACTIVE_LOW>;
 		device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
 		device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>;
-		reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
+		reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>;
 };
diff --git a/Documentation/devicetree/bindings/net/davinci-mdio.txt b/Documentation/devicetree/bindings/net/davinci-mdio.txt
deleted file mode 100644
index e6527de80f10..000000000000
--- a/Documentation/devicetree/bindings/net/davinci-mdio.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-TI SoC Davinci/Keystone2 MDIO Controller Device Tree Bindings
----------------------------------------------------
-
-Required properties:
-- compatible		: Should be "ti,davinci_mdio"
-			  and "ti,keystone_mdio" for Keystone 2 SoCs
-			  and "ti,cpsw-mdio" for am335x, am472x, am57xx/dra7, dm814x SoCs
-			  and "ti,am4372-mdio" for am472x SoC
-- reg			: physical base address and size of the davinci mdio
-			  registers map
-- bus_freq		: Mdio Bus frequency
-
-Optional properties:
-- ti,hwmods		: Must be "davinci_mdio"
-
-Note: "ti,hwmods" field is used to fetch the base address and irq
-resources from TI, omap hwmod data base during device registration.
-Future plan is to migrate hwmod data base contents into device tree
-blob so that, all the required data will be used from device tree dts
-file.
-
-Examples:
-
-	mdio: davinci_mdio@4a101000 {
-		compatible = "ti,davinci_mdio";
-		reg = <0x4A101000 0x1000>;
-		bus_freq = <1000000>;
-	};
-
-(or)
-
-	mdio: davinci_mdio@4a101000 {
-		compatible = "ti,davinci_mdio";
-		ti,hwmods = "davinci_mdio";
-		bus_freq = <1000000>;
-	};
diff --git a/Documentation/devicetree/bindings/net/dsa/ar9331.txt b/Documentation/devicetree/bindings/net/dsa/ar9331.txt
new file mode 100644
index 000000000000..320607cbbb17
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/ar9331.txt
@@ -0,0 +1,148 @@
+Atheros AR9331 built-in switch
+=============================
+
+It is a switch built-in to Atheros AR9331 WiSoC and addressable over internal
+MDIO bus. All PHYs are built-in as well.
+
+Required properties:
+
+ - compatible: should be: "qca,ar9331-switch"
+ - reg: Address on the MII bus for the switch.
+ - resets : Must contain an entry for each entry in reset-names.
+ - reset-names : Must include the following entries: "switch"
+ - interrupt-parent: Phandle to the parent interrupt controller
+ - interrupts: IRQ line for the switch
+ - interrupt-controller: Indicates the switch is itself an interrupt
+   controller. This is used for the PHY interrupts.
+ - #interrupt-cells: must be 1
+ - mdio: Container of PHY and devices on the switches MDIO bus.
+
+See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
+required and optional properties.
+Examples:
+
+eth0: ethernet@19000000 {
+	compatible = "qca,ar9330-eth";
+	reg = <0x19000000 0x200>;
+	interrupts = <4>;
+
+	resets = <&rst 9>, <&rst 22>;
+	reset-names = "mac", "mdio";
+	clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
+	clock-names = "eth", "mdio";
+
+	phy-mode = "mii";
+	phy-handle = <&phy_port4>;
+};
+
+eth1: ethernet@1a000000 {
+	compatible = "qca,ar9330-eth";
+	reg = <0x1a000000 0x200>;
+	interrupts = <5>;
+	resets = <&rst 13>, <&rst 23>;
+	reset-names = "mac", "mdio";
+	clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
+	clock-names = "eth", "mdio";
+
+	phy-mode = "gmii";
+
+	fixed-link {
+		speed = <1000>;
+		full-duplex;
+	};
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		switch10: switch@10 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			compatible = "qca,ar9331-switch";
+			reg = <0x10>;
+			resets = <&rst 8>;
+			reset-names = "switch";
+
+			interrupt-parent = <&miscintc>;
+			interrupts = <12>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				switch_port0: port@0 {
+					reg = <0x0>;
+					label = "cpu";
+					ethernet = <&eth1>;
+
+					phy-mode = "gmii";
+
+					fixed-link {
+						speed = <1000>;
+						full-duplex;
+					};
+				};
+
+				switch_port1: port@1 {
+					reg = <0x1>;
+					phy-handle = <&phy_port0>;
+					phy-mode = "internal";
+				};
+
+				switch_port2: port@2 {
+					reg = <0x2>;
+					phy-handle = <&phy_port1>;
+					phy-mode = "internal";
+				};
+
+				switch_port3: port@3 {
+					reg = <0x3>;
+					phy-handle = <&phy_port2>;
+					phy-mode = "internal";
+				};
+
+				switch_port4: port@4 {
+					reg = <0x4>;
+					phy-handle = <&phy_port3>;
+					phy-mode = "internal";
+				};
+			};
+
+			mdio {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				interrupt-parent = <&switch10>;
+
+				phy_port0: phy@0 {
+					reg = <0x0>;
+					interrupts = <0>;
+				};
+
+				phy_port1: phy@1 {
+					reg = <0x1>;
+					interrupts = <0>;
+				};
+
+				phy_port2: phy@2 {
+					reg = <0x2>;
+					interrupts = <0>;
+				};
+
+				phy_port3: phy@3 {
+					reg = <0x3>;
+					interrupts = <0>;
+				};
+
+				phy_port4: phy@4 {
+					reg = <0x4>;
+					interrupts = <0>;
+				};
+			};
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index 0e7c31794ae6..ac471b60ed6a 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -121,6 +121,11 @@ properties:
       and is useful for determining certain configuration settings
       such as flow control thresholds.
 
+  sfp:
+    $ref: /schemas/types.yaml#definitions/phandle
+    description:
+      Specifies a reference to a node representing a SFP cage.
+
   tx-fifo-depth:
     $ref: /schemas/types.yaml#definitions/uint32
     description:
diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
index f70f18ff821f..8927941c74bb 100644
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -153,6 +153,11 @@ properties:
       Delay after the reset was deasserted in microseconds. If
       this property is missing the delay will be skipped.
 
+  sfp:
+    $ref: /schemas/types.yaml#definitions/phandle
+    description:
+      Specifies a reference to a node representing a SFP cage.
+
 required:
   - reg
 
diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt
index 299c0dcd67db..250f8d8cdce4 100644
--- a/Documentation/devicetree/bindings/net/fsl-fman.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fman.txt
@@ -403,6 +403,19 @@ PROPERTIES
 		The settings and programming routines for internal/external
 		MDIO are different. Must be included for internal MDIO.
 
+- fsl,erratum-a011043
+		Usage: optional
+		Value type: <boolean>
+		Definition: Indicates the presence of the A011043 erratum
+		describing that the MDIO_CFG[MDIO_RD_ER] bit may be falsely
+		set when reading internal PCS registers. MDIO reads to
+		internal PCS registers may result in having the
+		MDIO_CFG[MDIO_RD_ER] bit set, even when there is no error and
+		read data (MDIO_DATA[MDIO_DATA]) is correct.
+		Software may get false read error when reading internal
+		PCS registers through MDIO. As a workaround, all internal
+		MDIO accesses should ignore the MDIO_CFG[MDIO_RD_ER] bit.
+
 For internal PHY device on internal mdio bus, a PHY node should be created.
 See the definition of the PHY node in booting-without-of.txt for an
 example of how to define a PHY (Internal PHY has no interrupt line).
diff --git a/Documentation/devicetree/bindings/net/ftgmac100.txt b/Documentation/devicetree/bindings/net/ftgmac100.txt
index 72e7aaf7242e..f878c1103463 100644
--- a/Documentation/devicetree/bindings/net/ftgmac100.txt
+++ b/Documentation/devicetree/bindings/net/ftgmac100.txt
@@ -9,6 +9,7 @@ Required properties:
 
      - "aspeed,ast2400-mac"
      - "aspeed,ast2500-mac"
+     - "aspeed,ast2600-mac"
 
 - reg: Address and length of the register set for the device
 - interrupts: Should contain ethernet controller interrupt
@@ -23,6 +24,13 @@ Optional properties:
 - no-hw-checksum: Used to disable HW checksum support. Here for backward
   compatibility as the driver now should have correct defaults based on
   the SoC.
+- clocks: In accordance with the generic clock bindings. Must describe the MAC
+  IP clock, and optionally an RMII RCLK gate for the AST2500/AST2600. The
+  required MAC clock must be the first cell.
+- clock-names:
+
+      - "MACCLK": The MAC IP clock
+      - "RCLK": Clock gate for the RMII RCLK
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/net/lpc-eth.txt b/Documentation/devicetree/bindings/net/lpc-eth.txt
index b92e927808b6..cfe0e5991d46 100644
--- a/Documentation/devicetree/bindings/net/lpc-eth.txt
+++ b/Documentation/devicetree/bindings/net/lpc-eth.txt
@@ -10,6 +10,11 @@ Optional properties:
   absent, "rmii" is assumed.
 - use-iram: Use LPC32xx internal SRAM (IRAM) for DMA buffering
 
+Optional subnodes:
+- mdio : specifies the mdio bus, used as a container for phy nodes according to
+  phy.txt in the same directory
+
+
 Example:
 
 	mac: ethernet@31060000 {
diff --git a/Documentation/devicetree/bindings/net/mediatek-dwmac.txt b/Documentation/devicetree/bindings/net/mediatek-dwmac.txt
index 8a08621a5b54..afbcaebf062e 100644
--- a/Documentation/devicetree/bindings/net/mediatek-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-dwmac.txt
@@ -14,7 +14,7 @@ Required properties:
 	Should be "macirq" for the main MAC IRQ
 - clocks: Must contain a phandle for each entry in clock-names.
 - clock-names: The name of the clock listed in the clocks property. These are
-	"axi", "apb", "mac_main", "ptp_ref" for MT2712 SoC
+	"axi", "apb", "mac_main", "ptp_ref", "rmii_internal" for MT2712 SoC.
 - mac-address: See ethernet.txt in the same directory
 - phy-mode: See ethernet.txt in the same directory
 - mediatek,pericfg: A phandle to the syscon node that control ethernet
@@ -23,8 +23,10 @@ Required properties:
 Optional properties:
 - mediatek,tx-delay-ps: TX clock delay macro value. Default is 0.
 	It should be defined for RGMII/MII interface.
+	It should be defined for RMII interface when the reference clock is from MT2712 SoC.
 - mediatek,rx-delay-ps: RX clock delay macro value. Default is 0.
-	It should be defined for RGMII/MII/RMII interface.
+	It should be defined for RGMII/MII interface.
+	It should be defined for RMII interface.
 Both delay properties need to be a multiple of 170 for RGMII interface,
 or will round down. Range 0~31*170.
 Both delay properties need to be a multiple of 550 for MII/RMII interface,
@@ -34,13 +36,20 @@ or will round down. Range 0~31*550.
 	reference clock, which is from external PHYs, is connected to RXC pin
 	on MT2712 SoC.
 	Otherwise, is connected to TXC pin.
+- mediatek,rmii-clk-from-mac: boolean property, if present indicates that
+	MT2712 SoC provides the RMII reference clock, which outputs to TXC pin only.
 - mediatek,txc-inverse: boolean property, if present indicates that
 	1. tx clock will be inversed in MII/RGMII case,
 	2. tx clock inside MAC will be inversed relative to reference clock
 	   which is from external PHYs in RMII case, and it rarely happen.
+	3. the reference clock, which outputs to TXC pin will be inversed in RMII case
+	   when the reference clock is from MT2712 SoC.
 - mediatek,rxc-inverse: boolean property, if present indicates that
 	1. rx clock will be inversed in MII/RGMII case.
-	2. reference clock will be inversed when arrived at MAC in RMII case.
+	2. reference clock will be inversed when arrived at MAC in RMII case, when
+	   the reference clock is from external PHYs.
+	3. the inside clock, which be sent to MAC, will be inversed in RMII case when
+	   the reference clock is from MT2712 SoC.
 - assigned-clocks: mac_main and ptp_ref clocks
 - assigned-clock-parents: parent clocks of the assigned clocks
 
@@ -50,29 +59,33 @@ Example:
 		reg = <0 0x1101c000 0 0x1300>;
 		interrupts = <GIC_SPI 237 IRQ_TYPE_LEVEL_LOW>;
 		interrupt-names = "macirq";
-		phy-mode ="rgmii";
+		phy-mode ="rgmii-rxid";
 		mac-address = [00 55 7b b5 7d f7];
 		clock-names = "axi",
 			      "apb",
 			      "mac_main",
 			      "ptp_ref",
-			      "ptp_top";
+			      "rmii_internal";
 		clocks = <&pericfg CLK_PERI_GMAC>,
 			 <&pericfg CLK_PERI_GMAC_PCLK>,
 			 <&topckgen CLK_TOP_ETHER_125M_SEL>,
-			 <&topckgen CLK_TOP_ETHER_50M_SEL>;
+			 <&topckgen CLK_TOP_ETHER_50M_SEL>,
+			 <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>;
 		assigned-clocks = <&topckgen CLK_TOP_ETHER_125M_SEL>,
-				  <&topckgen CLK_TOP_ETHER_50M_SEL>;
+				  <&topckgen CLK_TOP_ETHER_50M_SEL>,
+				  <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>;
 		assigned-clock-parents = <&topckgen CLK_TOP_ETHERPLL_125M>,
-					 <&topckgen CLK_TOP_APLL1_D3>;
+					 <&topckgen CLK_TOP_APLL1_D3>,
+					 <&topckgen CLK_TOP_ETHERPLL_50M>;
+		power-domains = <&scpsys MT2712_POWER_DOMAIN_AUDIO>;
 		mediatek,pericfg = <&pericfg>;
 		mediatek,tx-delay-ps = <1530>;
 		mediatek,rx-delay-ps = <1530>;
 		mediatek,rmii-rxc;
 		mediatek,txc-inverse;
 		mediatek,rxc-inverse;
-		snps,txpbl = <32>;
-		snps,rxpbl = <32>;
+		snps,txpbl = <1>;
+		snps,rxpbl = <1>;
 		snps,reset-gpio = <&pio 87 GPIO_ACTIVE_LOW>;
 		snps,reset-active-low;
 	};
diff --git a/Documentation/devicetree/bindings/net/nfc/pn532.txt b/Documentation/devicetree/bindings/net/nfc/pn532.txt
new file mode 100644
index 000000000000..a5507dc499bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nfc/pn532.txt
@@ -0,0 +1,46 @@
+* NXP Semiconductors PN532 NFC Controller
+
+Required properties:
+- compatible: Should be
+    - "nxp,pn532" Place a node with this inside the devicetree node of the bus
+                  where the NFC chip is connected to.
+                  Currently the kernel has phy bindings for uart and i2c.
+    - "nxp,pn532-i2c" (DEPRECATED) only works for the i2c binding.
+    - "nxp,pn533-i2c" (DEPRECATED) only works for the i2c binding.
+
+Required properties if connected on i2c:
+- clock-frequency: I²C work frequency.
+- reg: for the I²C bus address. This is fixed at 0x24 for the PN532.
+- interrupts: GPIO interrupt to which the chip is connected
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+
+Example (for ARM-based BeagleBone with PN532 on I2C2):
+
+&i2c2 {
+
+
+	pn532: nfc@24 {
+
+		compatible = "nxp,pn532";
+
+		reg = <0x24>;
+		clock-frequency = <400000>;
+
+		interrupt-parent = <&gpio1>;
+		interrupts = <17 IRQ_TYPE_EDGE_FALLING>;
+
+	};
+};
+
+Example (for PN532 connected via uart):
+
+uart4: serial@49042000 {
+        compatible = "ti,omap3-uart";
+
+        pn532: nfc {
+                compatible = "nxp,pn532";
+        };
+};
diff --git a/Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt b/Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt
deleted file mode 100644
index 2efe3886b95b..000000000000
--- a/Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-* NXP Semiconductors PN532 NFC Controller
-
-Required properties:
-- compatible: Should be "nxp,pn532-i2c" or "nxp,pn533-i2c".
-- clock-frequency: I²C work frequency.
-- reg: address on the bus
-- interrupts: GPIO interrupt to which the chip is connected
-
-Optional SoC Specific Properties:
-- pinctrl-names: Contains only one value - "default".
-- pintctrl-0: Specifies the pin control groups used for this controller.
-
-Example (for ARM-based BeagleBone with PN532 on I2C2):
-
-&i2c2 {
-
-
-	pn532: pn532@24 {
-
-		compatible = "nxp,pn532-i2c";
-
-		reg = <0x24>;
-		clock-frequency = <400000>;
-
-		interrupt-parent = <&gpio1>;
-		interrupts = <17 IRQ_TYPE_EDGE_FALLING>;
-
-	};
-};
diff --git a/Documentation/devicetree/bindings/net/qca,ar803x.yaml b/Documentation/devicetree/bindings/net/qca,ar803x.yaml
new file mode 100644
index 000000000000..5a6c9d20c0ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qca,ar803x.yaml
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: GPL-2.0+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/qca,ar803x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Atheros AR803x PHY
+
+maintainers:
+  - Andrew Lunn <andrew@lunn.ch>
+  - Florian Fainelli <f.fainelli@gmail.com>
+  - Heiner Kallweit <hkallweit1@gmail.com>
+
+description: |
+  Bindings for Qualcomm Atheros AR803x PHYs
+
+allOf:
+  - $ref: ethernet-phy.yaml#
+
+properties:
+  qca,clk-out-frequency:
+    description: Clock output frequency in Hertz.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum: [ 25000000, 50000000, 62500000, 125000000 ]
+
+  qca,clk-out-strength:
+    description: Clock output driver strength.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum: [ 0, 1, 2 ]
+
+  qca,keep-pll-enabled:
+    description: |
+      If set, keep the PLL enabled even if there is no link. Useful if you
+      want to use the clock output without an ethernet link.
+
+      Only supported on the AR8031.
+    type: boolean
+
+  vddio-supply:
+    description: |
+      RGMII I/O voltage regulator (see regulator/regulator.yaml).
+
+      The PHY supports RGMII I/O voltages of 1.5V, 1.8V and 2.5V. You can
+      either connect this to the vddio-regulator (1.5V / 1.8V) or the
+      vddh-regulator (2.5V).
+
+      Only supported on the AR8031.
+
+  vddio-regulator:
+    type: object
+    description:
+      Initial data for the VDDIO regulator. Set this to 1.5V or 1.8V.
+    allOf:
+      - $ref: /schemas/regulator/regulator.yaml
+
+  vddh-regulator:
+    type: object
+    description:
+      Dummy subnode to model the external connection of the PHY VDDH
+      regulator to VDDIO.
+    allOf:
+      - $ref: /schemas/regulator/regulator.yaml
+
+
+examples:
+  - |
+    #include <dt-bindings/net/qca-ar803x.h>
+
+    ethernet {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        phy-mode = "rgmii-id";
+
+        ethernet-phy@0 {
+            reg = <0>;
+
+            qca,clk-out-frequency = <125000000>;
+            qca,clk-out-strength = <AR803X_STRENGTH_FULL>;
+
+            vddio-supply = <&vddio>;
+
+            vddio: vddio-regulator {
+                regulator-min-microvolt = <1800000>;
+                regulator-max-microvolt = <1800000>;
+            };
+        };
+    };
+  - |
+    #include <dt-bindings/net/qca-ar803x.h>
+
+    ethernet {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        phy-mode = "rgmii-id";
+
+        ethernet-phy@0 {
+            reg = <0>;
+
+            qca,clk-out-frequency = <50000000>;
+            qca,keep-pll-enabled;
+
+            vddio-supply = <&vddh>;
+
+            vddh: vddh-regulator {
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
index 68b67d9db63a..999aceadb985 100644
--- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
+++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
@@ -11,6 +11,7 @@ Required properties:
  - compatible: should contain one of the following:
    * "qcom,qca6174-bt"
    * "qcom,wcn3990-bt"
+   * "qcom,wcn3991-bt"
    * "qcom,wcn3998-bt"
 
 Optional properties for compatible string qcom,qca6174-bt:
diff --git a/Documentation/devicetree/bindings/net/renesas,ether.yaml b/Documentation/devicetree/bindings/net/renesas,ether.yaml
new file mode 100644
index 000000000000..7f84df9790e2
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/renesas,ether.yaml
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/renesas,ether.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Electronics SH EtherMAC
+
+allOf:
+  - $ref: ethernet-controller.yaml#
+
+maintainers:
+  - Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - renesas,gether-r8a7740   # device is a part of R8A7740 SoC
+              - renesas,gether-r8a77980  # device is a part of R8A77980 SoC
+              - renesas,ether-r7s72100   # device is a part of R7S72100 SoC
+              - renesas,ether-r7s9210    # device is a part of R7S9210 SoC
+      - items:
+          - enum:
+              - renesas,ether-r8a7778    # device is a part of R8A7778 SoC
+              - renesas,ether-r8a7779    # device is a part of R8A7779 SoC
+          - enum:
+              - renesas,rcar-gen1-ether  # a generic R-Car Gen1 device
+      - items:
+          - enum:
+              - renesas,ether-r8a7745    # device is a part of R8A7745 SoC
+              - renesas,ether-r8a7743    # device is a part of R8A7743 SoC
+              - renesas,ether-r8a7790    # device is a part of R8A7790 SoC
+              - renesas,ether-r8a7791    # device is a part of R8A7791 SoC
+              - renesas,ether-r8a7793    # device is a part of R8A7793 SoC
+              - renesas,ether-r8a7794    # device is a part of R8A7794 SoC
+          - enum:
+              - renesas,rcar-gen2-ether  # a generic R-Car Gen2 or RZ/G1 device
+
+  reg:
+    items:
+       - description: E-DMAC/feLic registers
+       - description: TSU registers
+    minItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  '#address-cells':
+    description: number of address cells for the MDIO bus
+    const: 1
+
+  '#size-cells':
+    description: number of size cells on the MDIO bus
+    const: 0
+
+  clocks:
+    maxItems: 1
+
+  pinctrl-0: true
+
+  pinctrl-names: true
+
+  renesas,no-ether-link:
+    type: boolean
+    description:
+      specify when a board does not provide a proper Ether LINK signal
+
+  renesas,ether-link-active-low:
+    type: boolean
+    description:
+      specify when the Ether LINK signal is active-low instead of normal
+      active-high
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - phy-mode
+  - phy-handle
+  - '#address-cells'
+  - '#size-cells'
+  - clocks
+  - pinctrl-0
+
+examples:
+  # Lager board
+  - |
+    #include <dt-bindings/clock/r8a7790-clock.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    ethernet@ee700000 {
+        compatible = "renesas,ether-r8a7790", "renesas,rcar-gen2-ether";
+        reg = <0 0xee700000 0 0x400>;
+        interrupt-parent = <&gic>;
+        interrupts = <0 162 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&mstp8_clks R8A7790_CLK_ETHER>;
+        phy-mode = "rmii";
+        phy-handle = <&phy1>;
+        pinctrl-0 = <&ether_pins>;
+        pinctrl-names = "default";
+        renesas,ether-link-active-low;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        phy1: ethernet-phy@1 {
+            reg = <1>;
+            interrupt-parent = <&irqc0>;
+            interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
+            pinctrl-0 = <&phy1_pins>;
+            pinctrl-names = "default";
+        };
+    };
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index 5df4aa7f6811..87dad2dd8ca0 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt
@@ -21,7 +21,8 @@ Required properties:
       - "renesas,etheravb-r8a774b1" for the R8A774B1 SoC.
       - "renesas,etheravb-r8a774c0" for the R8A774C0 SoC.
       - "renesas,etheravb-r8a7795" for the R8A7795 SoC.
-      - "renesas,etheravb-r8a7796" for the R8A7796 SoC.
+      - "renesas,etheravb-r8a7796" for the R8A77960 SoC.
+      - "renesas,etheravb-r8a77961" for the R8A77961 SoC.
       - "renesas,etheravb-r8a77965" for the R8A77965 SoC.
       - "renesas,etheravb-r8a77970" for the R8A77970 SoC.
       - "renesas,etheravb-r8a77980" for the R8A77980 SoC.
@@ -37,8 +38,8 @@ Required properties:
 - reg: Offset and length of (1) the register block and (2) the stream buffer.
        The region for the register block is mandatory.
        The region for the stream buffer is optional, as it is only present on
-       R-Car Gen2 and RZ/G1 SoCs, and on R-Car H3 (R8A7795), M3-W (R8A7796),
-       and M3-N (R8A77965).
+       R-Car Gen2 and RZ/G1 SoCs, and on R-Car H3 (R8A7795), M3-W (R8A77960),
+       M3-W+ (R8A77961), and M3-N (R8A77965).
 - interrupts: A list of interrupt-specifiers, one for each entry in
 	      interrupt-names.
 	      If interrupt-names is not present, an interrupt specifier
diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt
deleted file mode 100644
index abc36274227c..000000000000
--- a/Documentation/devicetree/bindings/net/sh_eth.txt
+++ /dev/null
@@ -1,69 +0,0 @@
-* Renesas Electronics SH EtherMAC
-
-This file provides information on what the device node for the SH EtherMAC
-interface contains.
-
-Required properties:
-- compatible: Must contain one or more of the following:
-	      "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC.
-	      "renesas,ether-r8a7743"  if the device is a part of R8A7743 SoC.
-	      "renesas,ether-r8a7745"  if the device is a part of R8A7745 SoC.
-	      "renesas,ether-r8a7778"  if the device is a part of R8A7778 SoC.
-	      "renesas,ether-r8a7779"  if the device is a part of R8A7779 SoC.
-	      "renesas,ether-r8a7790"  if the device is a part of R8A7790 SoC.
-	      "renesas,ether-r8a7791"  if the device is a part of R8A7791 SoC.
-	      "renesas,ether-r8a7793"  if the device is a part of R8A7793 SoC.
-	      "renesas,ether-r8a7794"  if the device is a part of R8A7794 SoC.
-	      "renesas,gether-r8a77980" if the device is a part of R8A77980 SoC.
-	      "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC.
-	      "renesas,ether-r7s9210" if the device is a part of R7S9210 SoC.
-	      "renesas,rcar-gen1-ether" for a generic R-Car Gen1 device.
-	      "renesas,rcar-gen2-ether" for a generic R-Car Gen2 or RZ/G1
-	                                device.
-
-	      When compatible with the generic version, nodes must list
-	      the SoC-specific version corresponding to the platform
-	      first followed by the generic version.
-
-- reg: offset and length of (1) the E-DMAC/feLic register block (required),
-       (2) the TSU register block (optional).
-- interrupts: interrupt specifier for the sole interrupt.
-- phy-mode: see ethernet.txt file in the same directory.
-- phy-handle: see ethernet.txt file in the same directory.
-- #address-cells: number of address cells for the MDIO bus, must be equal to 1.
-- #size-cells: number of size cells on the MDIO bus, must be equal to 0.
-- clocks: clock phandle and specifier pair.
-- pinctrl-0: phandle, referring to a default pin configuration node.
-
-Optional properties:
-- pinctrl-names: pin configuration state name ("default").
-- renesas,no-ether-link: boolean, specify when a board does not provide a proper
-			 Ether LINK signal.
-- renesas,ether-link-active-low: boolean, specify when the Ether LINK signal is
-				 active-low instead of normal active-high.
-
-Example (Lager board):
-
-	ethernet@ee700000 {
-		compatible = "renesas,ether-r8a7790",
-		             "renesas,rcar-gen2-ether";
-		reg = <0 0xee700000 0 0x400>;
-		interrupt-parent = <&gic>;
-		interrupts = <0 162 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&mstp8_clks R8A7790_CLK_ETHER>;
-		phy-mode = "rmii";
-		phy-handle = <&phy1>;
-		pinctrl-0 = <&ether_pins>;
-		pinctrl-names = "default";
-		renesas,ether-link-active-low;
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		phy1: ethernet-phy@1 {
-			reg = <1>;
-			interrupt-parent = <&irqc0>;
-			interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
-			pinctrl-0 = <&phy1_pins>;
-			pinctrl-names = "default";
-		};
-	};
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 4845e29411e4..e08cd4c4d568 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -347,6 +347,7 @@ allOf:
               - st,spear600-gmac
 
     then:
+      properties:
         snps,tso:
           $ref: /schemas/types.yaml#definitions/flag
           description:
diff --git a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
new file mode 100644
index 000000000000..ac8c76369a86
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
@@ -0,0 +1,232 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/ti,cpsw-switch.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI SoC Ethernet Switch Controller (CPSW) Device Tree Bindings
+
+maintainers:
+  - Grygorii Strashko <grygorii.strashko@ti.com>
+  - Sekhar Nori <nsekhar@ti.com>
+
+description:
+  The 3-port switch gigabit ethernet subsystem provides ethernet packet
+  communication and can be configured as an ethernet switch. It provides the
+  gigabit media independent interface (GMII),reduced gigabit media
+  independent interface (RGMII), reduced media independent interface (RMII),
+  the management data input output (MDIO) for physical layer device (PHY)
+  management.
+
+properties:
+  compatible:
+    oneOf:
+      - const: ti,cpsw-switch
+      - items:
+         - const: ti,am335x-cpsw-switch
+         - const: ti,cpsw-switch
+      - items:
+        - const: ti,am4372-cpsw-switch
+        - const: ti,cpsw-switch
+      - items:
+        - const: ti,dra7-cpsw-switch
+        - const: ti,cpsw-switch
+
+  reg:
+    maxItems: 1
+    description:
+       The physical base address and size of full the CPSW module IO range
+
+  ranges: true
+
+  clocks:
+    maxItems: 1
+    description: CPSW functional clock
+
+  clock-names:
+    items:
+      - const: fck
+
+  interrupts:
+    items:
+      - description: RX_THRESH interrupt
+      - description: RX interrupt
+      - description: TX interrupt
+      - description: MISC interrupt
+
+  interrupt-names:
+    items:
+      - const: "rx_thresh"
+      - const: "rx"
+      - const: "tx"
+      - const: "misc"
+
+  pinctrl-names: true
+
+  syscon:
+    $ref: /schemas/types.yaml#definitions/phandle
+    description:
+      Phandle to the system control device node which provides access to
+      efuse IO range with MAC addresses
+
+  ethernet-ports:
+    type: object
+    properties:
+      '#address-cells':
+        const: 1
+      '#size-cells':
+        const: 0
+
+    patternProperties:
+      "^port@[0-9]+$":
+          type: object
+          description: CPSW external ports
+
+          allOf:
+            - $ref: ethernet-controller.yaml#
+
+          properties:
+            reg:
+              items:
+                - enum: [1, 2]
+              description: CPSW port number
+
+            phys:
+              maxItems: 1
+              description:  phandle on phy-gmii-sel PHY
+
+            label:
+              description: label associated with this port
+
+            ti,dual-emac-pvid:
+              allOf:
+                - $ref: /schemas/types.yaml#/definitions/uint32
+              minimum: 1
+              maximum: 1024
+              description:
+                Specifies default PORT VID to be used to segregate
+                ports. Default value - CPSW port number.
+
+          required:
+            - reg
+            - phys
+
+  mdio:
+    type: object
+    allOf:
+      - $ref: "ti,davinci-mdio.yaml#"
+    description:
+      CPSW MDIO bus.
+
+  cpts:
+    type: object
+    description:
+      The Common Platform Time Sync (CPTS) module
+
+    properties:
+      clocks:
+        maxItems: 1
+        description: CPTS reference clock
+
+      clock-names:
+        items:
+          - const: cpts
+
+      cpts_clock_mult:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description:
+          Numerator to convert input clock ticks into ns
+
+      cpts_clock_shift:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description:
+          Denominator to convert input clock ticks into ns.
+          Mult and shift will be calculated basing on CPTS rftclk frequency if
+          both cpts_clock_shift and cpts_clock_mult properties are not provided.
+
+    required:
+      - clocks
+      - clock-names
+
+required:
+  - compatible
+  - reg
+  - ranges
+  - clocks
+  - clock-names
+  - interrupts
+  - interrupt-names
+  - '#address-cells'
+  - '#size-cells'
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/dra7.h>
+
+    mac_sw: switch@0 {
+        compatible = "ti,dra7-cpsw-switch","ti,cpsw-switch";
+        reg = <0x0 0x4000>;
+        ranges = <0 0 0x4000>;
+        clocks = <&gmac_main_clk>;
+        clock-names = "fck";
+        #address-cells = <1>;
+        #size-cells = <1>;
+        syscon = <&scm_conf>;
+        inctrl-names = "default", "sleep";
+
+        interrupts = <GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-names = "rx_thresh", "rx", "tx", "misc";
+
+        ethernet-ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                cpsw_port1: port@1 {
+                        reg = <1>;
+                        label = "port1";
+                        mac-address = [ 00 00 00 00 00 00 ];
+                        phys = <&phy_gmii_sel 1>;
+                        phy-handle = <&ethphy0_sw>;
+                        phy-mode = "rgmii";
+                        ti,dual-emac-pvid = <1>;
+                };
+
+                cpsw_port2: port@2 {
+                        reg = <2>;
+                        label = "wan";
+                        mac-address = [ 00 00 00 00 00 00 ];
+                        phys = <&phy_gmii_sel 2>;
+                        phy-handle = <&ethphy1_sw>;
+                        phy-mode = "rgmii";
+                        ti,dual-emac-pvid = <2>;
+                };
+        };
+
+        davinci_mdio_sw: mdio@1000 {
+                compatible = "ti,cpsw-mdio","ti,davinci_mdio";
+                reg = <0x1000 0x100>;
+                clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 0>;
+                clock-names = "fck";
+                #address-cells = <1>;
+                #size-cells = <0>;
+                bus_freq = <1000000>;
+
+                ethphy0_sw: ethernet-phy@0 {
+                        reg = <0>;
+                };
+
+                ethphy1_sw: ethernet-phy@1 {
+                        reg = <1>;
+                };
+        };
+
+        cpts {
+                clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 25>;
+                clock-names = "cpts";
+        };
+    };
diff --git a/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml b/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml
new file mode 100644
index 000000000000..242ac4935a4b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/ti,davinci-mdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI SoC Davinci/Keystone2 MDIO Controller
+
+maintainers:
+  - Grygorii Strashko <grygorii.strashko@ti.com>
+
+description:
+  TI SoC Davinci/Keystone2 MDIO Controller
+
+allOf:
+  - $ref: "mdio.yaml#"
+
+properties:
+  compatible:
+    oneOf:
+       - const: ti,davinci_mdio
+       - items:
+         - const: ti,keystone_mdio
+         - const: ti,davinci_mdio
+       - items:
+         - const: ti,cpsw-mdio
+         - const: ti,davinci_mdio
+       - items:
+         - const: ti,am4372-mdio
+         - const: ti,cpsw-mdio
+         - const: ti,davinci_mdio
+
+  reg:
+    maxItems: 1
+
+  bus_freq:
+      maximum: 2500000
+      description:
+        MDIO Bus frequency
+
+  ti,hwmods:
+    description: TI hwmod name
+    deprecated: true
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/string-array
+      - items:
+          const: davinci_mdio
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: ti,davinci_mdio
+  required:
+    - bus_freq
+
+required:
+  - compatible
+  - reg
+  - "#address-cells"
+  - "#size-cells"
+
+examples:
+  - |
+    davinci_mdio: mdio@4a101000 {
+         compatible = "ti,davinci_mdio";
+         #address-cells = <1>;
+         #size-cells = <0>;
+         reg = <0x4a101000 0x1000>;
+         bus_freq = <1000000>;
+    };
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 388ff48f53ae..44e2a4fab29e 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -8,8 +8,6 @@ Required properties:
 	- ti,tx-internal-delay - RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h
 		for applicable values. Required only if interface type is
 		PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_TXID
-	- ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h
-		for applicable values
 
 Note: If the interface type is PHY_INTERFACE_MODE_RGMII the TX/RX clock delays
       will be left at their default values, as set by the PHY's pin strapping.
@@ -42,6 +40,14 @@ Optional property:
 				    Some MACs work with differential SGMII clock.
 				    See data manual for details.
 
+	- ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h
+		for applicable values (deprecated)
+
+	-tx-fifo-depth - As defined in the ethernet-controller.yaml.  Values for
+			 the depth can be found in dt-bindings/net/ti-dp83867.h
+	-rx-fifo-depth - As defined in the ethernet-controller.yaml.  Values for
+			 the depth can be found in dt-bindings/net/ti-dp83867.h
+
 Note: ti,min-output-impedance and ti,max-output-impedance are mutually
       exclusive. When both properties are present ti,max-output-impedance
       takes precedence.
@@ -55,7 +61,7 @@ Example:
 		reg = <0>;
 		ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>;
 		ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_75_NS>;
-		ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
+		tx-fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
 	};
 
 Datasheet can be found:
diff --git a/Documentation/devicetree/bindings/net/ti,dp83869.yaml b/Documentation/devicetree/bindings/net/ti,dp83869.yaml
new file mode 100644
index 000000000000..6fe3e451da8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ti,dp83869.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2019 Texas Instruments Incorporated
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/net/ti,dp83869.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: TI DP83869 ethernet PHY
+
+allOf:
+  - $ref: "ethernet-controller.yaml#"
+
+maintainers:
+  - Dan Murphy <dmurphy@ti.com>
+
+description: |
+  The DP83869HM device is a robust, fully-featured Gigabit (PHY) transceiver
+  with integrated PMD sublayers that supports 10BASE-Te, 100BASE-TX and
+  1000BASE-T Ethernet protocols. The DP83869 also supports 1000BASE-X and
+  100BASE-FX Fiber protocols.
+  This device interfaces to the MAC layer through Reduced GMII (RGMII) and
+  SGMII The DP83869HM supports Media Conversion in Managed mode. In this mode,
+  the DP83869HM can run 1000BASE-X-to-1000BASE-T and 100BASE-FX-to-100BASE-TX
+  conversions.  The DP83869HM can also support Bridge Conversion from RGMII to
+  SGMII and SGMII to RGMII.
+
+  Specifications about the charger can be found at:
+    http://www.ti.com/lit/ds/symlink/dp83869hm.pdf
+
+properties:
+  reg:
+    maxItems: 1
+
+  ti,min-output-impedance:
+    type: boolean
+    description: |
+       MAC Interface Impedance control to set the programmable output impedance
+       to a minimum value (35 ohms).
+
+  ti,max-output-impedance:
+    type: boolean
+    description: |
+       MAC Interface Impedance control to set the programmable output impedance
+       to a maximum value (70 ohms).
+
+  tx-fifo-depth:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+       Transmitt FIFO depth see dt-bindings/net/ti-dp83869.h for values
+
+  rx-fifo-depth:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+       Receive FIFO depth see dt-bindings/net/ti-dp83869.h for values
+
+  ti,clk-output-sel:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+       Muxing option for CLK_OUT pin see dt-bindings/net/ti-dp83869.h for values.
+
+  ti,op-mode:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+       Operational mode for the PHY.  If this is not set then the operational
+       mode is set by the straps. see dt-bindings/net/ti-dp83869.h for values
+
+required:
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/net/ti-dp83869.h>
+    mdio0 {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      ethphy0: ethernet-phy@0 {
+        reg = <0>;
+        tx-fifo-depth = <DP83869_PHYCR_FIFO_DEPTH_4_B_NIB>;
+        rx-fifo-depth = <DP83869_PHYCR_FIFO_DEPTH_4_B_NIB>;
+        ti,op-mode = <DP83869_RGMII_COPPER_ETHERNET>;
+        ti,max-output-impedance = "true";
+        ti,clk-output-sel = <DP83869_CLK_O_SEL_CHN_A_RCLK>;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
index ae661e65354e..616c87746d6f 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
@@ -50,7 +50,7 @@ Optional properties:
           entry in clock-names.
 - clock-names: Should contain the clock names "wifi_wcss_cmd", "wifi_wcss_ref",
 	       "wifi_wcss_rtc" for "qcom,ipq4019-wifi" compatible target and
-	       "cxo_ref_clk_pin" for "qcom,wcn3990-wifi"
+	       "cxo_ref_clk_pin" and optionally "qdss" for "qcom,wcn3990-wifi"
 	       compatible target.
 - qcom,msi_addr: MSI interrupt address.
 - qcom,msi_base: Base value to add before writing MSI data into
@@ -81,6 +81,15 @@ Optional properties:
 	Definition: Name of external front end module used. Some valid FEM names
 		    for example: "microsemi-lx5586", "sky85703-11"
 		    and "sky85803" etc.
+- qcom,snoc-host-cap-8bit-quirk:
+	Usage: Optional
+	Value type: <empty>
+	Definition: Quirk specifying that the firmware expects the 8bit version
+		    of the host capability QMI request
+- qcom,xo-cal-data: xo cal offset to be configured in xo trim register.
+
+- qcom,msa-fixed-perm: Boolean context flag to disable SCM call for statically
+		       mapped msa region.
 
 Example (to supply PCI based wifi block details):
 
@@ -179,4 +188,5 @@ wifi@18000000 {
 		vdd-3.3-ch0-supply = <&vreg_l25a_3p3>;
 		memory-region = <&wifi_msa_mem>;
 		iommus = <&apps_smmu 0x0040 0x1>;
+		qcom,msa-fixed-perm;
 };
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
new file mode 100644
index 000000000000..a1717db36dba
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
@@ -0,0 +1,273 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/wireless/qcom,ath11k.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies ath11k wireless devices Generic Binding
+
+maintainers:
+  - Kalle Valo <kvalo@codeaurora.org>
+
+description: |
+  These are dt entries for Qualcomm Technologies, Inc. IEEE 802.11ax
+  devices, for example like AHB based IPQ8074.
+
+properties:
+  compatible:
+    const: qcom,ipq8074-wifi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: misc-pulse1 interrupt events
+      - description: misc-latch interrupt events
+      - description: sw exception interrupt events
+      - description: watchdog interrupt events
+      - description: interrupt event for ring CE0
+      - description: interrupt event for ring CE1
+      - description: interrupt event for ring CE2
+      - description: interrupt event for ring CE3
+      - description: interrupt event for ring CE4
+      - description: interrupt event for ring CE5
+      - description: interrupt event for ring CE6
+      - description: interrupt event for ring CE7
+      - description: interrupt event for ring CE8
+      - description: interrupt event for ring CE9
+      - description: interrupt event for ring CE10
+      - description: interrupt event for ring CE11
+      - description: interrupt event for ring host2wbm-desc-feed
+      - description: interrupt event for ring host2reo-re-injection
+      - description: interrupt event for ring host2reo-command
+      - description: interrupt event for ring host2rxdma-monitor-ring3
+      - description: interrupt event for ring host2rxdma-monitor-ring2
+      - description: interrupt event for ring host2rxdma-monitor-ring1
+      - description: interrupt event for ring reo2ost-exception
+      - description: interrupt event for ring wbm2host-rx-release
+      - description: interrupt event for ring reo2host-status
+      - description: interrupt event for ring reo2host-destination-ring4
+      - description: interrupt event for ring reo2host-destination-ring3
+      - description: interrupt event for ring reo2host-destination-ring2
+      - description: interrupt event for ring reo2host-destination-ring1
+      - description: interrupt event for ring rxdma2host-monitor-destination-mac3
+      - description: interrupt event for ring rxdma2host-monitor-destination-mac2
+      - description: interrupt event for ring rxdma2host-monitor-destination-mac1
+      - description: interrupt event for ring ppdu-end-interrupts-mac3
+      - description: interrupt event for ring ppdu-end-interrupts-mac2
+      - description: interrupt event for ring ppdu-end-interrupts-mac1
+      - description: interrupt event for ring rxdma2host-monitor-status-ring-mac3
+      - description: interrupt event for ring rxdma2host-monitor-status-ring-mac2
+      - description: interrupt event for ring rxdma2host-monitor-status-ring-mac1
+      - description: interrupt event for ring host2rxdma-host-buf-ring-mac3
+      - description: interrupt event for ring host2rxdma-host-buf-ring-mac2
+      - description: interrupt event for ring host2rxdma-host-buf-ring-mac1
+      - description: interrupt event for ring rxdma2host-destination-ring-mac3
+      - description: interrupt event for ring rxdma2host-destination-ring-mac2
+      - description: interrupt event for ring rxdma2host-destination-ring-mac1
+      - description: interrupt event for ring host2tcl-input-ring4
+      - description: interrupt event for ring host2tcl-input-ring3
+      - description: interrupt event for ring host2tcl-input-ring2
+      - description: interrupt event for ring host2tcl-input-ring1
+      - description: interrupt event for ring wbm2host-tx-completions-ring3
+      - description: interrupt event for ring wbm2host-tx-completions-ring2
+      - description: interrupt event for ring wbm2host-tx-completions-ring1
+      - description: interrupt event for ring tcl2host-status-ring
+
+
+  interrupt-names:
+    items:
+      - const: misc-pulse1
+      - const: misc-latch
+      - const: sw-exception
+      - const: watchdog
+      - const: ce0
+      - const: ce1
+      - const: ce2
+      - const: ce3
+      - const: ce4
+      - const: ce5
+      - const: ce6
+      - const: ce7
+      - const: ce8
+      - const: ce9
+      - const: ce10
+      - const: ce11
+      - const: host2wbm-desc-feed
+      - const: host2reo-re-injection
+      - const: host2reo-command
+      - const: host2rxdma-monitor-ring3
+      - const: host2rxdma-monitor-ring2
+      - const: host2rxdma-monitor-ring1
+      - const: reo2ost-exception
+      - const: wbm2host-rx-release
+      - const: reo2host-status
+      - const: reo2host-destination-ring4
+      - const: reo2host-destination-ring3
+      - const: reo2host-destination-ring2
+      - const: reo2host-destination-ring1
+      - const: rxdma2host-monitor-destination-mac3
+      - const: rxdma2host-monitor-destination-mac2
+      - const: rxdma2host-monitor-destination-mac1
+      - const: ppdu-end-interrupts-mac3
+      - const: ppdu-end-interrupts-mac2
+      - const: ppdu-end-interrupts-mac1
+      - const: rxdma2host-monitor-status-ring-mac3
+      - const: rxdma2host-monitor-status-ring-mac2
+      - const: rxdma2host-monitor-status-ring-mac1
+      - const: host2rxdma-host-buf-ring-mac3
+      - const: host2rxdma-host-buf-ring-mac2
+      - const: host2rxdma-host-buf-ring-mac1
+      - const: rxdma2host-destination-ring-mac3
+      - const: rxdma2host-destination-ring-mac2
+      - const: rxdma2host-destination-ring-mac1
+      - const: host2tcl-input-ring4
+      - const: host2tcl-input-ring3
+      - const: host2tcl-input-ring2
+      - const: host2tcl-input-ring1
+      - const: wbm2host-tx-completions-ring3
+      - const: wbm2host-tx-completions-ring2
+      - const: wbm2host-tx-completions-ring1
+      - const: tcl2host-status-ring
+
+  qcom,rproc:
+    $ref: /schemas/types.yaml#definitions/phandle
+    description:
+      DT entry of q6v5-wcss remoteproc driver.
+      Phandle to a node that can contain the following properties
+        * compatible
+        * reg
+        * reg-names
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - qcom,rproc
+
+additionalProperties: false
+
+examples:
+  - |
+
+    q6v5_wcss: q6v5_wcss@CD00000 {
+        compatible = "qcom,ipq8074-wcss-pil";
+        reg = <0xCD00000 0x4040>,
+              <0x4AB000 0x20>;
+        reg-names = "qdsp6",
+                    "rmb";
+    };
+
+    wifi0: wifi@c000000 {
+        compatible = "qcom,ipq8074-wifi";
+        reg = <0xc000000 0x2000000>;
+        interrupts = <0 320 1>,
+                     <0 319 1>,
+                     <0 318 1>,
+                     <0 317 1>,
+                     <0 316 1>,
+                     <0 315 1>,
+                     <0 314 1>,
+                     <0 311 1>,
+                     <0 310 1>,
+                     <0 411 1>,
+                     <0 410 1>,
+                     <0 40 1>,
+                     <0 39 1>,
+                     <0 302 1>,
+                     <0 301 1>,
+                     <0 37 1>,
+                     <0 36 1>,
+                     <0 296 1>,
+                     <0 295 1>,
+                     <0 294 1>,
+                     <0 293 1>,
+                     <0 292 1>,
+                     <0 291 1>,
+                     <0 290 1>,
+                     <0 289 1>,
+                     <0 288 1>,
+                     <0 239 1>,
+                     <0 236 1>,
+                     <0 235 1>,
+                     <0 234 1>,
+                     <0 233 1>,
+                     <0 232 1>,
+                     <0 231 1>,
+                     <0 230 1>,
+                     <0 229 1>,
+                     <0 228 1>,
+                     <0 224 1>,
+                     <0 223 1>,
+                     <0 203 1>,
+                     <0 183 1>,
+                     <0 180 1>,
+                     <0 179 1>,
+                     <0 178 1>,
+                     <0 177 1>,
+                     <0 176 1>,
+                     <0 163 1>,
+                     <0 162 1>,
+                     <0 160 1>,
+                     <0 159 1>,
+                     <0 158 1>,
+                     <0 157 1>,
+                     <0 156 1>;
+        interrupt-names = "misc-pulse1",
+                          "misc-latch",
+                          "sw-exception",
+                          "watchdog",
+                          "ce0",
+                          "ce1",
+                          "ce2",
+                          "ce3",
+                          "ce4",
+                          "ce5",
+                          "ce6",
+                          "ce7",
+                          "ce8",
+                          "ce9",
+                          "ce10",
+                          "ce11",
+                          "host2wbm-desc-feed",
+                          "host2reo-re-injection",
+                          "host2reo-command",
+                          "host2rxdma-monitor-ring3",
+                          "host2rxdma-monitor-ring2",
+                          "host2rxdma-monitor-ring1",
+                          "reo2ost-exception",
+                          "wbm2host-rx-release",
+                          "reo2host-status",
+                          "reo2host-destination-ring4",
+                          "reo2host-destination-ring3",
+                          "reo2host-destination-ring2",
+                          "reo2host-destination-ring1",
+                          "rxdma2host-monitor-destination-mac3",
+                          "rxdma2host-monitor-destination-mac2",
+                          "rxdma2host-monitor-destination-mac1",
+                          "ppdu-end-interrupts-mac3",
+                          "ppdu-end-interrupts-mac2",
+                          "ppdu-end-interrupts-mac1",
+                          "rxdma2host-monitor-status-ring-mac3",
+                          "rxdma2host-monitor-status-ring-mac2",
+                          "rxdma2host-monitor-status-ring-mac1",
+                          "host2rxdma-host-buf-ring-mac3",
+                          "host2rxdma-host-buf-ring-mac2",
+                          "host2rxdma-host-buf-ring-mac1",
+                          "rxdma2host-destination-ring-mac3",
+                          "rxdma2host-destination-ring-mac2",
+                          "rxdma2host-destination-ring-mac1",
+                          "host2tcl-input-ring4",
+                          "host2tcl-input-ring3",
+                          "host2tcl-input-ring2",
+                          "host2tcl-input-ring1",
+                          "wbm2host-tx-completions-ring3",
+                          "wbm2host-tx-completions-ring2",
+                          "wbm2host-tx-completions-ring1",
+                          "tcl2host-status-ring";
+        qcom,rproc = <&q6v5_wcss>;
+    };
diff --git a/Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt b/Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt
index bb2fcde6f7ff..f38950560982 100644
--- a/Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt
+++ b/Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt
@@ -35,3 +35,29 @@ Examples:
 		ti,power-gpio = <&gpio3 23 GPIO_ACTIVE_HIGH>; /* 87 */
 	};
 };
+
+&mmc3 {
+	vmmc-supply = <&wlan_en>;
+
+	bus-width = <4>;
+	non-removable;
+	ti,non-removable;
+	cap-power-off-card;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc3_pins>;
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	wlan: wifi@1 {
+		compatible = "ti,wl1251";
+
+		reg = <1>;
+
+		interrupt-parent = <&gpio1>;
+		interrupts = <21 IRQ_TYPE_LEVEL_HIGH>;	/* GPIO_21 */
+
+		ti,wl1251-has-eeprom;
+	};
+};
diff --git a/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml b/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml
index 1084e9d2917d..daf1321d76ad 100644
--- a/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml
+++ b/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Security ID Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 allOf:
   - $ref: "nvmem.yaml#"
@@ -31,9 +31,7 @@ required:
   - compatible
   - reg
 
-# FIXME: We should set it, but it would report all the generic
-# properties as additional properties.
-# additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/nvmem/amlogic-efuse.txt b/Documentation/devicetree/bindings/nvmem/amlogic-efuse.txt
index 2e0723ab3384..f7b3ed74db54 100644
--- a/Documentation/devicetree/bindings/nvmem/amlogic-efuse.txt
+++ b/Documentation/devicetree/bindings/nvmem/amlogic-efuse.txt
@@ -4,6 +4,7 @@ Required properties:
 - compatible: should be "amlogic,meson-gxbb-efuse"
 - clocks: phandle to the efuse peripheral clock provided by the
 	  clock controller.
+- secure-monitor: phandle to the secure-monitor node
 
 = Data cells =
 Are child nodes of eFuse, bindings of which as described in
@@ -16,6 +17,7 @@ Example:
 		clocks = <&clkc CLKID_EFUSE>;
 		#address-cells = <1>;
 		#size-cells = <1>;
+		secure-monitor = <&sm>;
 
 		sn: sn@14 {
 			reg = <0x14 0x10>;
@@ -30,6 +32,10 @@ Example:
 		};
 	};
 
+	sm: secure-monitor {
+		compatible = "amlogic,meson-gxbb-sm";
+	};
+
 = Data consumers =
 Are device nodes which consume nvmem data cells.
 
diff --git a/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt b/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt
index 904dadf3d07b..6e346d5cddcf 100644
--- a/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt
+++ b/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt
@@ -2,7 +2,7 @@ Freescale i.MX6 On-Chip OTP Controller (OCOTP) device tree bindings
 
 This binding represents the on-chip eFuse OTP controller found on
 i.MX6Q/D, i.MX6DL/S, i.MX6SL, i.MX6SX, i.MX6UL, i.MX6ULL/ULZ, i.MX6SLL,
-i.MX7D/S, i.MX7ULP, i.MX8MQ, i.MX8MM and i.MX8MN SoCs.
+i.MX7D/S, i.MX7ULP, i.MX8MQ, i.MX8MM, i.MX8MN and i.MX8MP SoCs.
 
 Required properties:
 - compatible: should be one of
@@ -17,6 +17,7 @@ Required properties:
 	"fsl,imx8mq-ocotp" (i.MX8MQ),
 	"fsl,imx8mm-ocotp" (i.MX8MM),
 	"fsl,imx8mn-ocotp" (i.MX8MN),
+	"fsl,imx8mp-ocotp" (i.MX8MP),
 	followed by "syscon".
 - #address-cells : Should be 1
 - #size-cells : Should be 1
diff --git a/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml b/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml
new file mode 100644
index 000000000000..7bbd4e62044e
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/nvmem/qcom,spmi-sdam.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies, Inc. SPMI SDAM DT bindings
+
+maintainers:
+  - Shyam Kumar Thella <sthella@codeaurora.org>
+
+description: |
+  The SDAM provides scratch register space for the PMIC clients. This
+  memory can be used by software to store information or communicate
+  to/from the PBUS.
+
+allOf:
+  - $ref: "nvmem.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - qcom,spmi-sdam
+
+  reg:
+    maxItems: 1
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 1
+
+  ranges: true
+
+required:
+  - compatible
+  - reg
+  - ranges
+
+patternProperties:
+  "^.*@[0-9a-f]+$":
+    type: object
+
+    properties:
+      reg:
+        maxItems: 1
+        description:
+          Offset and size in bytes within the storage device.
+
+      bits:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        maxItems: 1
+        items:
+          items:
+            - minimum: 0
+              maximum: 7
+              description:
+                Offset in bit within the address range specified by reg.
+            - minimum: 1
+              description:
+                Size in bit within the address range specified by reg.
+
+    required:
+      - reg
+
+    additionalProperties: false
+
+examples:
+  - |
+      sdam_1: nvram@b000 {
+          #address-cells = <1>;
+          #size-cells = <1>;
+          compatible = "qcom,spmi-sdam";
+          reg = <0xb000 0x100>;
+          ranges = <0 0xb000 0x100>;
+
+          /* Data cells */
+          restart_reason: restart@50 {
+              reg = <0x50 0x1>;
+              bits = <6 2>;
+          };
+      };
+...
diff --git a/Documentation/devicetree/bindings/nvmem/rockchip-otp.txt b/Documentation/devicetree/bindings/nvmem/rockchip-otp.txt
new file mode 100644
index 000000000000..40f649f7c2e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvmem/rockchip-otp.txt
@@ -0,0 +1,25 @@
+Rockchip internal OTP (One Time Programmable) memory device tree bindings
+
+Required properties:
+- compatible: Should be one of the following.
+  - "rockchip,px30-otp" - for PX30 SoCs.
+  - "rockchip,rk3308-otp" - for RK3308 SoCs.
+- reg: Should contain the registers location and size
+- clocks: Must contain an entry for each entry in clock-names.
+- clock-names: Should be "otp", "apb_pclk" and "phy".
+- resets: Must contain an entry for each entry in reset-names.
+  See ../../reset/reset.txt for details.
+- reset-names: Should be "phy".
+
+See nvmem.txt for more information.
+
+Example:
+	otp: otp@ff290000 {
+		compatible = "rockchip,px30-otp";
+		reg = <0x0 0xff290000 0x0 0x4000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		clocks = <&cru SCLK_OTP_USR>, <&cru PCLK_OTP_NS>,
+			 <&cru PCLK_OTP_PHY>;
+		clock-names = "otp", "apb_pclk", "phy";
+	};
diff --git a/Documentation/devicetree/bindings/nvmem/sprd-efuse.txt b/Documentation/devicetree/bindings/nvmem/sprd-efuse.txt
new file mode 100644
index 000000000000..96b6feec27f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvmem/sprd-efuse.txt
@@ -0,0 +1,39 @@
+= Spreadtrum eFuse device tree bindings =
+
+Required properties:
+- compatible: Should be "sprd,ums312-efuse".
+- reg: Specify the address offset of efuse controller.
+- clock-names: Should be "enable".
+- clocks: The phandle and specifier referencing the controller's clock.
+- hwlocks: Reference to a phandle of a hwlock provider node.
+
+= Data cells =
+Are child nodes of eFuse, bindings of which as described in
+bindings/nvmem/nvmem.txt
+
+Example:
+
+	ap_efuse: efuse@32240000 {
+		compatible = "sprd,ums312-efuse";
+		reg = <0 0x32240000 0 0x10000>;
+		clock-names = "enable";
+		hwlocks = <&hwlock 8>;
+		clocks = <&aonapb_gate CLK_EFUSE_EB>;
+
+		/* Data cells */
+		thermal_calib: calib@10 {
+			reg = <0x10 0x2>;
+		};
+	};
+
+= Data consumers =
+Are device nodes which consume nvmem data cells.
+
+Example:
+
+	thermal {
+		...
+
+		nvmem-cells = <&thermal_calib>;
+		nvmem-cell-names = "calibration";
+	};
diff --git a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.txt b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.txt
deleted file mode 100644
index 142a51d5a9be..000000000000
--- a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-STMicroelectronics STM32 Factory-programmed data device tree bindings
-
-This represents STM32 Factory-programmed read only non-volatile area: locked
-flash, OTP, read-only HW regs... This contains various information such as:
-analog calibration data for temperature sensor (e.g. TS_CAL1, TS_CAL2),
-internal vref (VREFIN_CAL), unique device ID...
-
-Required properties:
-- compatible:		Should be one of:
-			"st,stm32f4-otp"
-			"st,stm32mp15-bsec"
-- reg:			Offset and length of factory-programmed area.
-- #address-cells:	Should be '<1>'.
-- #size-cells:		Should be '<1>'.
-
-Optional Data cells:
-- Must be child nodes as described in nvmem.txt.
-
-Example on stm32f4:
-	romem: nvmem@1fff7800 {
-		compatible = "st,stm32f4-otp";
-		reg = <0x1fff7800 0x400>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-
-		/* Data cells: ts_cal1 at 0x1fff7a2c */
-		ts_cal1: calib@22c {
-			reg = <0x22c 0x2>;
-		};
-		...
-	};
diff --git a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
new file mode 100644
index 000000000000..d84deb4774a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/nvmem/st,stm32-romem.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Factory-programmed data bindings
+
+description: |
+  This represents STM32 Factory-programmed read only non-volatile area: locked
+  flash, OTP, read-only HW regs... This contains various information such as:
+  analog calibration data for temperature sensor (e.g. TS_CAL1, TS_CAL2),
+  internal vref (VREFIN_CAL), unique device ID...
+
+maintainers:
+  - Fabrice Gasnier <fabrice.gasnier@st.com>
+
+allOf:
+  - $ref: "nvmem.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - st,stm32f4-otp
+      - st,stm32mp15-bsec
+
+required:
+  - "#address-cells"
+  - "#size-cells"
+  - compatible
+  - reg
+
+examples:
+  - |
+    efuse@1fff7800 {
+      compatible = "st,stm32f4-otp";
+      reg = <0x1fff7800 0x400>;
+      #address-cells = <1>;
+      #size-cells = <1>;
+
+      calib@22c {
+        reg = <0x22c 0x2>;
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml b/Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml
new file mode 100644
index 000000000000..aef87a33a7c9
--- /dev/null
+++ b/Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/opp/allwinner,sun50i-h6-operating-points.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner H6 CPU OPP Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description: |
+  For some SoCs, the CPU frequency subset and voltage value of each
+  OPP varies based on the silicon variant in use. Allwinner Process
+  Voltage Scaling Tables defines the voltage and frequency value based
+  on the speedbin blown in the efuse combination. The
+  sun50i-cpufreq-nvmem driver reads the efuse value from the SoC to
+  provide the OPP framework with required information.
+
+properties:
+  compatible:
+    const: allwinner,sun50i-h6-operating-points
+
+  nvmem-cells:
+    description: |
+      A phandle pointing to a nvmem-cells node representing the efuse
+      registers that has information about the speedbin that is used
+      to select the right frequency/voltage value pair. Please refer
+      the for nvmem-cells bindings
+      Documentation/devicetree/bindings/nvmem/nvmem.txt and also
+      examples below.
+
+required:
+  - compatible
+  - nvmem-cells
+
+patternProperties:
+  "opp-[0-9]+":
+    type: object
+
+    properties:
+      opp-hz: true
+
+    patternProperties:
+      "opp-microvolt-.*": true
+
+    required:
+      - opp-hz
+      - opp-microvolt-speed0
+      - opp-microvolt-speed1
+      - opp-microvolt-speed2
+
+    unevaluatedProperties: false
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    cpu_opp_table: opp-table {
+        compatible = "allwinner,sun50i-h6-operating-points";
+        nvmem-cells = <&speedbin_efuse>;
+        opp-shared;
+
+        opp-480000000 {
+            clock-latency-ns = <244144>; /* 8 32k periods */
+            opp-hz = /bits/ 64 <480000000>;
+
+            opp-microvolt-speed0 = <880000>;
+            opp-microvolt-speed1 = <820000>;
+            opp-microvolt-speed2 = <800000>;
+        };
+
+        opp-720000000 {
+            clock-latency-ns = <244144>; /* 8 32k periods */
+            opp-hz = /bits/ 64 <720000000>;
+
+            opp-microvolt-speed0 = <880000>;
+            opp-microvolt-speed1 = <820000>;
+            opp-microvolt-speed2 = <800000>;
+        };
+
+        opp-816000000 {
+            clock-latency-ns = <244144>; /* 8 32k periods */
+            opp-hz = /bits/ 64 <816000000>;
+
+            opp-microvolt-speed0 = <880000>;
+            opp-microvolt-speed1 = <820000>;
+            opp-microvolt-speed2 = <800000>;
+        };
+
+        opp-888000000 {
+            clock-latency-ns = <244144>; /* 8 32k periods */
+            opp-hz = /bits/ 64 <888000000>;
+
+            opp-microvolt-speed0 = <940000>;
+            opp-microvolt-speed1 = <820000>;
+            opp-microvolt-speed2 = <800000>;
+        };
+
+        opp-1080000000 {
+            clock-latency-ns = <244144>; /* 8 32k periods */
+            opp-hz = /bits/ 64 <1080000000>;
+
+            opp-microvolt-speed0 = <1060000>;
+            opp-microvolt-speed1 = <880000>;
+            opp-microvolt-speed2 = <840000>;
+        };
+
+        opp-1320000000 {
+            clock-latency-ns = <244144>; /* 8 32k periods */
+            opp-hz = /bits/ 64 <1320000000>;
+
+            opp-microvolt-speed0 = <1160000>;
+            opp-microvolt-speed1 = <940000>;
+            opp-microvolt-speed2 = <900000>;
+        };
+
+        opp-1488000000 {
+            clock-latency-ns = <244144>; /* 8 32k periods */
+            opp-hz = /bits/ 64 <1488000000>;
+
+            opp-microvolt-speed0 = <1160000>;
+            opp-microvolt-speed1 = <1000000>;
+            opp-microvolt-speed2 = <960000>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt b/Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt
deleted file mode 100644
index 7deae57a587b..000000000000
--- a/Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt
+++ /dev/null
@@ -1,167 +0,0 @@
-Allwinner Technologies, Inc. NVMEM CPUFreq and OPP bindings
-===================================
-
-For some SoCs, the CPU frequency subset and voltage value of each OPP
-varies based on the silicon variant in use. Allwinner Process Voltage
-Scaling Tables defines the voltage and frequency value based on the
-speedbin blown in the efuse combination. The sun50i-cpufreq-nvmem driver
-reads the efuse value from the SoC to provide the OPP framework with
-required information.
-
-Required properties:
---------------------
-In 'cpus' nodes:
-- operating-points-v2: Phandle to the operating-points-v2 table to use.
-
-In 'operating-points-v2' table:
-- compatible: Should be
-	- 'allwinner,sun50i-h6-operating-points'.
-- nvmem-cells: A phandle pointing to a nvmem-cells node representing the
-		efuse registers that has information about the speedbin
-		that is used to select the right frequency/voltage value
-		pair. Please refer the for nvmem-cells bindings
-		Documentation/devicetree/bindings/nvmem/nvmem.txt and
-		also examples below.
-
-In every OPP node:
-- opp-microvolt-<name>: Voltage in micro Volts.
-			At runtime, the platform can pick a <name> and
-			matching opp-microvolt-<name> property.
-			[See: opp.txt]
-			HW:		<name>:
-			sun50i-h6	speed0 speed1 speed2
-
-Example 1:
----------
-
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		cpu0: cpu@0 {
-			compatible = "arm,cortex-a53";
-			device_type = "cpu";
-			reg = <0>;
-			enable-method = "psci";
-			clocks = <&ccu CLK_CPUX>;
-			clock-latency-ns = <244144>; /* 8 32k periods */
-			operating-points-v2 = <&cpu_opp_table>;
-			#cooling-cells = <2>;
-		};
-
-		cpu1: cpu@1 {
-			compatible = "arm,cortex-a53";
-			device_type = "cpu";
-			reg = <1>;
-			enable-method = "psci";
-			clocks = <&ccu CLK_CPUX>;
-			clock-latency-ns = <244144>; /* 8 32k periods */
-			operating-points-v2 = <&cpu_opp_table>;
-			#cooling-cells = <2>;
-		};
-
-		cpu2: cpu@2 {
-			compatible = "arm,cortex-a53";
-			device_type = "cpu";
-			reg = <2>;
-			enable-method = "psci";
-			clocks = <&ccu CLK_CPUX>;
-			clock-latency-ns = <244144>; /* 8 32k periods */
-			operating-points-v2 = <&cpu_opp_table>;
-			#cooling-cells = <2>;
-		};
-
-		cpu3: cpu@3 {
-			compatible = "arm,cortex-a53";
-			device_type = "cpu";
-			reg = <3>;
-			enable-method = "psci";
-			clocks = <&ccu CLK_CPUX>;
-			clock-latency-ns = <244144>; /* 8 32k periods */
-			operating-points-v2 = <&cpu_opp_table>;
-			#cooling-cells = <2>;
-		};
-        };
-
-        cpu_opp_table: opp_table {
-                compatible = "allwinner,sun50i-h6-operating-points";
-                nvmem-cells = <&speedbin_efuse>;
-                opp-shared;
-
-                opp@480000000 {
-                        clock-latency-ns = <244144>; /* 8 32k periods */
-                        opp-hz = /bits/ 64 <480000000>;
-
-                        opp-microvolt-speed0 = <880000>;
-                        opp-microvolt-speed1 = <820000>;
-                        opp-microvolt-speed2 = <800000>;
-                };
-
-                opp@720000000 {
-                        clock-latency-ns = <244144>; /* 8 32k periods */
-                        opp-hz = /bits/ 64 <720000000>;
-
-                        opp-microvolt-speed0 = <880000>;
-                        opp-microvolt-speed1 = <820000>;
-                        opp-microvolt-speed2 = <800000>;
-                };
-
-                opp@816000000 {
-                        clock-latency-ns = <244144>; /* 8 32k periods */
-                        opp-hz = /bits/ 64 <816000000>;
-
-                        opp-microvolt-speed0 = <880000>;
-                        opp-microvolt-speed1 = <820000>;
-                        opp-microvolt-speed2 = <800000>;
-                };
-
-                opp@888000000 {
-                        clock-latency-ns = <244144>; /* 8 32k periods */
-                        opp-hz = /bits/ 64 <888000000>;
-
-                        opp-microvolt-speed0 = <940000>;
-                        opp-microvolt-speed1 = <820000>;
-                        opp-microvolt-speed2 = <800000>;
-                };
-
-                opp@1080000000 {
-                        clock-latency-ns = <244144>; /* 8 32k periods */
-                        opp-hz = /bits/ 64 <1080000000>;
-
-                        opp-microvolt-speed0 = <1060000>;
-                        opp-microvolt-speed1 = <880000>;
-                        opp-microvolt-speed2 = <840000>;
-                };
-
-                opp@1320000000 {
-                        clock-latency-ns = <244144>; /* 8 32k periods */
-                        opp-hz = /bits/ 64 <1320000000>;
-
-                        opp-microvolt-speed0 = <1160000>;
-                        opp-microvolt-speed1 = <940000>;
-                        opp-microvolt-speed2 = <900000>;
-                };
-
-                opp@1488000000 {
-                        clock-latency-ns = <244144>; /* 8 32k periods */
-                        opp-hz = /bits/ 64 <1488000000>;
-
-                        opp-microvolt-speed0 = <1160000>;
-                        opp-microvolt-speed1 = <1000000>;
-                        opp-microvolt-speed2 = <960000>;
-                };
-        };
-....
-soc {
-....
-	sid: sid@3006000 {
-		compatible = "allwinner,sun50i-h6-sid";
-		reg = <0x03006000 0x400>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		....
-		speedbin_efuse: speed@1c {
-			reg = <0x1c 4>;
-		};
-        };
-};
diff --git a/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt b/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt
index efa2c8b9b85a..84fdc422792e 100644
--- a/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt
@@ -9,13 +9,16 @@ Additional properties are described here:
 
 Required properties:
 - compatible:
-	should contain "amlogic,axg-pcie" to identify the core.
+	should contain :
+	- "amlogic,axg-pcie" for AXG SoC Family
+	- "amlogic,g12a-pcie" for G12A SoC Family
+	to identify the core.
 - reg:
 	should contain the configuration address space.
 - reg-names: Must be
 	- "elbi"	External local bus interface registers
 	- "cfg"		Meson specific registers
-	- "phy"		Meson PCIE PHY registers
+	- "phy"		Meson PCIE PHY registers for AXG SoC Family
 	- "config"	PCIe configuration space
 - reset-gpios: The GPIO to generate PCIe PERST# assert and deassert signal.
 - clocks: Must contain an entry for each entry in clock-names.
@@ -23,12 +26,13 @@ Required properties:
 	- "pclk"       PCIe GEN 100M PLL clock
 	- "port"       PCIe_x(A or B) RC clock gate
 	- "general"    PCIe Phy clock
-	- "mipi"       PCIe_x(A or B) 100M ref clock gate
+	- "mipi"       PCIe_x(A or B) 100M ref clock gate for AXG SoC Family
 - resets: phandle to the reset lines.
 - reset-names: must contain "phy" "port" and "apb"
-       - "phy"         Share PHY reset
+       - "phy"         Share PHY reset for AXG SoC Family
        - "port"        Port A or B reset
        - "apb"         Share APB reset
+- phys: should contain a phandle to the shared phy for G12A SoC Family
 - device_type:
 	should be "pci". As specified in designware-pcie.txt
 
diff --git a/Documentation/devicetree/bindings/pci/arm,juno-r1-pcie.txt b/Documentation/devicetree/bindings/pci/arm,juno-r1-pcie.txt
deleted file mode 100644
index f7514c170a32..000000000000
--- a/Documentation/devicetree/bindings/pci/arm,juno-r1-pcie.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-* ARM Juno R1 PCIe interface
-
-This PCIe host controller is based on PLDA XpressRICH3-AXI IP
-and thus inherits all the common properties defined in plda,xpressrich3-axi.txt
-as well as the base properties defined in host-generic-pci.txt.
-
-Required properties:
- - compatible: "arm,juno-r1-pcie"
- - dma-coherent: The host controller bridges the AXI transactions into PCIe bus
-   in a manner that makes the DMA operations to appear coherent to the CPUs.
diff --git a/Documentation/devicetree/bindings/pci/designware-pcie-ecam.txt b/Documentation/devicetree/bindings/pci/designware-pcie-ecam.txt
deleted file mode 100644
index 515b2f9542e5..000000000000
--- a/Documentation/devicetree/bindings/pci/designware-pcie-ecam.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-* Synopsys DesignWare PCIe root complex in ECAM shift mode
-
-In some cases, firmware may already have configured the Synopsys DesignWare
-PCIe controller in RC mode with static ATU window mappings that cover all
-config, MMIO and I/O spaces in a [mostly] ECAM compatible fashion.
-In this case, there is no need for the OS to perform any low level setup
-of clocks, PHYs or device registers, nor is there any reason for the driver
-to reconfigure ATU windows for config and/or IO space accesses at runtime.
-
-In cases where the IP was synthesized with a minimum ATU window size of
-64 KB, it cannot be supported by the generic ECAM driver, because it
-requires special config space accessors that filter accesses to device #1
-and beyond on the first bus.
-
-Required properties:
-- compatible: "marvell,armada8k-pcie-ecam" or
-              "socionext,synquacer-pcie-ecam" or
-              "snps,dw-pcie-ecam" (must be preceded by a more specific match)
-
-Please refer to the binding document of "pci-host-ecam-generic" in the
-file host-generic-pci.txt for a description of the remaining required
-and optional properties.
-
-Example:
-
-    pcie1: pcie@7f000000 {
-        compatible = "socionext,synquacer-pcie-ecam", "snps,dw-pcie-ecam";
-        device_type = "pci";
-        reg = <0x0 0x7f000000 0x0 0xf00000>;
-        bus-range = <0x0 0xe>;
-        #address-cells = <3>;
-        #size-cells = <2>;
-        ranges = <0x1000000 0x00 0x00010000 0x00 0x7ff00000 0x0 0x00010000>,
-                 <0x2000000 0x00 0x70000000 0x00 0x70000000 0x0 0x0f000000>,
-                 <0x3000000 0x3f 0x00000000 0x3f 0x00000000 0x1 0x00000000>;
-
-        #interrupt-cells = <0x1>;
-        interrupt-map-mask = <0x0 0x0 0x0 0x0>;
-        interrupt-map = <0x0 0x0 0x0 0x0 &gic 0x0 0x0 0x0 182 0x4>;
-        msi-map = <0x0 &its 0x0 0x10000>;
-        dma-coherent;
-    };
diff --git a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
index 0dcb87d6554f..d6796ef54ea1 100644
--- a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
@@ -41,45 +41,3 @@ Hip05 Example (note that Hip06 is the same except compatible):
 				 0x0 0 0 3 &mbigen_pcie 3 12
 				 0x0 0 0 4 &mbigen_pcie 4 13>;
 	};
-
-HiSilicon Hip06/Hip07 PCIe host bridge DT (almost-ECAM) description.
-
-Some BIOSes place the host controller in a mode where it is ECAM
-compliant for all devices other than the root complex. In such cases,
-the host controller should be described as below.
-
-The properties and their meanings are identical to those described in
-host-generic-pci.txt except as listed below.
-
-Properties of the host controller node that differ from
-host-generic-pci.txt:
-
-- compatible     : Must be "hisilicon,hip06-pcie-ecam", or
-		   "hisilicon,hip07-pcie-ecam"
-
-- reg            : Two entries: First the ECAM configuration space for any
-		   other bus underneath the root bus. Second, the base
-		   and size of the HiSilicon host bridge registers include
-		   the RC's own config space.
-
-Example:
-	pcie0: pcie@a0090000 {
-		compatible = "hisilicon,hip06-pcie-ecam";
-		reg = <0 0xb0000000 0 0x2000000>,  /*  ECAM configuration space */
-		      <0 0xa0090000 0 0x10000>; /* host bridge registers */
-		bus-range = <0  31>;
-		msi-map = <0x0000 &its_dsa 0x0000 0x2000>;
-		msi-map-mask = <0xffff>;
-		#address-cells = <3>;
-		#size-cells = <2>;
-		device_type = "pci";
-		dma-coherent;
-		ranges = <0x02000000 0 0xb2000000 0x0 0xb2000000 0 0x5ff0000
-			  0x01000000 0 0 0 0xb7ff0000 0 0x10000>;
-		#interrupt-cells = <1>;
-		interrupt-map-mask = <0xf800 0 0 7>;
-		interrupt-map = <0x0 0 0 1 &mbigen_pcie0 650 4
-				 0x0 0 0 2 &mbigen_pcie0 650 4
-				 0x0 0 0 3 &mbigen_pcie0 650 4
-				 0x0 0 0 4 &mbigen_pcie0 650 4>;
-	};
diff --git a/Documentation/devicetree/bindings/pci/host-generic-pci.txt b/Documentation/devicetree/bindings/pci/host-generic-pci.txt
deleted file mode 100644
index 614b594f4e72..000000000000
--- a/Documentation/devicetree/bindings/pci/host-generic-pci.txt
+++ /dev/null
@@ -1,101 +0,0 @@
-* Generic PCI host controller
-
-Firmware-initialised PCI host controllers and PCI emulations, such as the
-virtio-pci implementations found in kvmtool and other para-virtualised
-systems, do not require driver support for complexities such as regulator
-and clock management. In fact, the controller may not even require the
-configuration of a control interface by the operating system, instead
-presenting a set of fixed windows describing a subset of IO, Memory and
-Configuration Spaces.
-
-Such a controller can be described purely in terms of the standardized device
-tree bindings communicated in pci.txt:
-
-
-Properties of the host controller node:
-
-- compatible     : Must be "pci-host-cam-generic" or "pci-host-ecam-generic"
-                   depending on the layout of configuration space (CAM vs
-                   ECAM respectively).
-
-- device_type    : Must be "pci".
-
-- ranges         : As described in IEEE Std 1275-1994, but must provide
-                   at least a definition of non-prefetchable memory. One
-                   or both of prefetchable Memory and IO Space may also
-                   be provided.
-
-- bus-range      : Optional property (also described in IEEE Std 1275-1994)
-                   to indicate the range of bus numbers for this controller.
-                   If absent, defaults to <0 255> (i.e. all buses).
-
-- #address-cells : Must be 3.
-
-- #size-cells    : Must be 2.
-
-- reg            : The Configuration Space base address and size, as accessed
-                   from the parent bus.  The base address corresponds to
-                   the first bus in the "bus-range" property.  If no
-                   "bus-range" is specified, this will be bus 0 (the default).
-
-Properties of the /chosen node:
-
-- linux,pci-probe-only
-                 : Optional property which takes a single-cell argument.
-                   If '0', then Linux will assign devices in its usual manner,
-                   otherwise it will not try to assign devices and instead use
-                   them as they are configured already.
-
-Configuration Space is assumed to be memory-mapped (as opposed to being
-accessed via an ioport) and laid out with a direct correspondence to the
-geography of a PCI bus address by concatenating the various components to
-form an offset.
-
-For CAM, this 24-bit offset is:
-
-        cfg_offset(bus, device, function, register) =
-                   bus << 16 | device << 11 | function << 8 | register
-
-While ECAM extends this by 4 bits to accommodate 4k of function space:
-
-        cfg_offset(bus, device, function, register) =
-                   bus << 20 | device << 15 | function << 12 | register
-
-Interrupt mapping is exactly as described in `Open Firmware Recommended
-Practice: Interrupt Mapping' and requires the following properties:
-
-- #interrupt-cells   : Must be 1
-
-- interrupt-map      : <see aforementioned specification>
-
-- interrupt-map-mask : <see aforementioned specification>
-
-
-Example:
-
-pci {
-    compatible = "pci-host-cam-generic"
-    device_type = "pci";
-    #address-cells = <3>;
-    #size-cells = <2>;
-    bus-range = <0x0 0x1>;
-
-    // CPU_PHYSICAL(2)  SIZE(2)
-    reg = <0x0 0x40000000  0x0 0x1000000>;
-
-    // BUS_ADDRESS(3)  CPU_PHYSICAL(2)  SIZE(2)
-    ranges = <0x01000000 0x0 0x01000000  0x0 0x01000000  0x0 0x00010000>,
-             <0x02000000 0x0 0x41000000  0x0 0x41000000  0x0 0x3f000000>;
-
-
-    #interrupt-cells = <0x1>;
-
-    // PCI_DEVICE(3)  INT#(1)  CONTROLLER(PHANDLE)  CONTROLLER_DATA(3)
-    interrupt-map = <  0x0 0x0 0x0  0x1  &gic  0x0 0x4 0x1
-                     0x800 0x0 0x0  0x1  &gic  0x0 0x5 0x1
-                    0x1000 0x0 0x0  0x1  &gic  0x0 0x6 0x1
-                    0x1800 0x0 0x0  0x1  &gic  0x0 0x7 0x1>;
-
-    // PCI_DEVICE(3)  INT#(1)
-    interrupt-map-mask = <0xf800 0x0 0x0  0x7>;
-}
diff --git a/Documentation/devicetree/bindings/pci/host-generic-pci.yaml b/Documentation/devicetree/bindings/pci/host-generic-pci.yaml
new file mode 100644
index 000000000000..47353d0cd394
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/host-generic-pci.yaml
@@ -0,0 +1,172 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pci/host-generic-pci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic PCI host controller
+
+maintainers:
+  - Will Deacon <will@kernel.org>
+
+description: |
+  Firmware-initialised PCI host controllers and PCI emulations, such as the
+  virtio-pci implementations found in kvmtool and other para-virtualised
+  systems, do not require driver support for complexities such as regulator
+  and clock management. In fact, the controller may not even require the
+  configuration of a control interface by the operating system, instead
+  presenting a set of fixed windows describing a subset of IO, Memory and
+  Configuration Spaces.
+
+  Configuration Space is assumed to be memory-mapped (as opposed to being
+  accessed via an ioport) and laid out with a direct correspondence to the
+  geography of a PCI bus address by concatenating the various components to
+  form an offset.
+
+  For CAM, this 24-bit offset is:
+
+          cfg_offset(bus, device, function, register) =
+                     bus << 16 | device << 11 | function << 8 | register
+
+  While ECAM extends this by 4 bits to accommodate 4k of function space:
+
+          cfg_offset(bus, device, function, register) =
+                     bus << 20 | device << 15 | function << 12 | register
+
+properties:
+  compatible:
+    description: Depends on the layout of configuration space (CAM vs ECAM
+      respectively). May also have more specific compatibles.
+    oneOf:
+      - description:
+          PCIe host controller in Arm Juno based on PLDA XpressRICH3-AXI IP
+        items:
+          - const: arm,juno-r1-pcie
+          - const: plda,xpressrich3-axi
+          - const: pci-host-ecam-generic
+      - description: |
+          ThunderX PCI host controller for pass-1.x silicon
+
+          Firmware-initialized PCI host controller to on-chip devices found on
+          some Cavium ThunderX processors.  These devices have ECAM-based config
+          access, but the BARs are all at fixed addresses.  We handle the fixed
+          addresses by synthesizing Enhanced Allocation (EA) capabilities for
+          these devices.
+        const: cavium,pci-host-thunder-ecam
+      - description:
+          Cavium ThunderX PEM firmware-initialized PCIe host controller
+        const: cavium,pci-host-thunder-pem
+      - description:
+          HiSilicon Hip06/Hip07 PCIe host bridge in almost-ECAM mode. Some
+          firmware places the host controller in a mode where it is ECAM
+          compliant for all devices other than the root complex.
+        enum:
+          - hisilicon,hip06-pcie-ecam
+          - hisilicon,hip07-pcie-ecam
+      - description: |
+          In some cases, firmware may already have configured the Synopsys
+          DesignWare PCIe controller in RC mode with static ATU window mappings
+          that cover all config, MMIO and I/O spaces in a [mostly] ECAM
+          compatible fashion. In this case, there is no need for the OS to
+          perform any low level setup of clocks, PHYs or device registers, nor
+          is there any reason for the driver to reconfigure ATU windows for
+          config and/or IO space accesses at runtime.
+
+          In cases where the IP was synthesized with a minimum ATU window size
+          of 64 KB, it cannot be supported by the generic ECAM driver, because
+          it requires special config space accessors that filter accesses to
+          device #1 and beyond on the first bus.
+        items:
+          - enum:
+              - marvell,armada8k-pcie-ecam
+              - socionext,synquacer-pcie-ecam
+          - const: snps,dw-pcie-ecam
+      - description:
+          CAM or ECAM compliant PCI host controllers without any quirks
+        enum:
+          - pci-host-cam-generic
+          - pci-host-ecam-generic
+
+  reg:
+    description:
+      The Configuration Space base address and size, as accessed from the parent
+      bus. The base address corresponds to the first bus in the "bus-range"
+      property. If no "bus-range" is specified, this will be bus 0 (the
+      default). Some host controllers have a 2nd non-compliant address range,
+      so 2 entries are allowed.
+    minItems: 1
+    maxItems: 2
+
+  ranges:
+    description:
+      As described in IEEE Std 1275-1994, but must provide at least a
+      definition of non-prefetchable memory. One or both of prefetchable Memory
+      and IO Space may also be provided.
+    minItems: 1
+    maxItems: 3
+
+  dma-coherent: true
+
+required:
+  - compatible
+  - reg
+  - ranges
+
+allOf:
+  - $ref: /schemas/pci/pci-bus.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: arm,juno-r1-pcie
+    then:
+      required:
+        - dma-coherent
+
+  - if:
+      properties:
+        compatible:
+          not:
+            contains:
+              enum:
+                - cavium,pci-host-thunder-pem
+                - hisilicon,hip06-pcie-ecam
+                - hisilicon,hip07-pcie-ecam
+    then:
+      properties:
+        reg:
+          maxItems: 1
+
+examples:
+  - |
+
+    bus {
+        #address-cells = <2>;
+        #size-cells = <2>;
+        pcie@40000000 {
+            compatible = "pci-host-cam-generic";
+            device_type = "pci";
+            #address-cells = <3>;
+            #size-cells = <2>;
+            bus-range = <0x0 0x1>;
+
+            // CPU_PHYSICAL(2)  SIZE(2)
+            reg = <0x0 0x40000000  0x0 0x1000000>;
+
+            // BUS_ADDRESS(3)  CPU_PHYSICAL(2)  SIZE(2)
+            ranges = <0x01000000 0x0 0x01000000  0x0 0x01000000  0x0 0x00010000>,
+                     <0x02000000 0x0 0x41000000  0x0 0x41000000  0x0 0x3f000000>;
+
+            #interrupt-cells = <0x1>;
+
+            // PCI_DEVICE(3)  INT#(1)  CONTROLLER(PHANDLE)  CONTROLLER_DATA(3)
+            interrupt-map = <   0x0 0x0 0x0  0x1  &gic  0x0 0x4 0x1>,
+                            < 0x800 0x0 0x0  0x1  &gic  0x0 0x5 0x1>,
+                            <0x1000 0x0 0x0  0x1  &gic  0x0 0x6 0x1>,
+                            <0x1800 0x0 0x0  0x1  &gic  0x0 0x7 0x1>;
+
+            // PCI_DEVICE(3)  INT#(1)
+            interrupt-map-mask = <0xf800 0x0 0x0  0x7>;
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/pci/layerscape-pci.txt b/Documentation/devicetree/bindings/pci/layerscape-pci.txt
index e20ceaab9b38..99a386ea691c 100644
--- a/Documentation/devicetree/bindings/pci/layerscape-pci.txt
+++ b/Documentation/devicetree/bindings/pci/layerscape-pci.txt
@@ -21,6 +21,7 @@ Required properties:
         "fsl,ls1046a-pcie"
         "fsl,ls1043a-pcie"
         "fsl,ls1012a-pcie"
+        "fsl,ls1028a-pcie"
   EP mode:
 	"fsl,ls1046a-pcie-ep", "fsl,ls-pcie-ep"
 - reg: base addresses and lengths of the PCIe controller register blocks.
diff --git a/Documentation/devicetree/bindings/pci/pci-thunder-ecam.txt b/Documentation/devicetree/bindings/pci/pci-thunder-ecam.txt
deleted file mode 100644
index f478874b79ce..000000000000
--- a/Documentation/devicetree/bindings/pci/pci-thunder-ecam.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-* ThunderX PCI host controller for pass-1.x silicon
-
-Firmware-initialized PCI host controller to on-chip devices found on
-some Cavium ThunderX processors.  These devices have ECAM-based config
-access, but the BARs are all at fixed addresses.  We handle the fixed
-addresses by synthesizing Enhanced Allocation (EA) capabilities for
-these devices.
-
-The properties and their meanings are identical to those described in
-host-generic-pci.txt except as listed below.
-
-Properties of the host controller node that differ from
-host-generic-pci.txt:
-
-- compatible     : Must be "cavium,pci-host-thunder-ecam"
-
-Example:
-
-	pcie@84b000000000 {
-		compatible = "cavium,pci-host-thunder-ecam";
-		device_type = "pci";
-		msi-parent = <&its>;
-		msi-map = <0 &its 0x30000 0x10000>;
-		bus-range = <0 31>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		#stream-id-cells = <1>;
-		reg = <0x84b0 0x00000000 0 0x02000000>;  /* Configuration space */
-		ranges = <0x03000000 0x8180 0x00000000 0x8180 0x00000000 0x80 0x00000000>; /* mem ranges */
-	};
diff --git a/Documentation/devicetree/bindings/pci/pci-thunder-pem.txt b/Documentation/devicetree/bindings/pci/pci-thunder-pem.txt
deleted file mode 100644
index f131faea3b7c..000000000000
--- a/Documentation/devicetree/bindings/pci/pci-thunder-pem.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-* ThunderX PEM PCIe host controller
-
-Firmware-initialized PCI host controller found on some Cavium
-ThunderX processors.
-
-The properties and their meanings are identical to those described in
-host-generic-pci.txt except as listed below.
-
-Properties of the host controller node that differ from
-host-generic-pci.txt:
-
-- compatible     : Must be "cavium,pci-host-thunder-pem"
-
-- reg            : Two entries: First the configuration space for down
-                   stream devices base address and size, as accessed
-                   from the parent bus. Second, the register bank of
-                   the PEM device PCIe bridge.
-
-Example:
-
-    pci@87e0,c2000000 {
-	compatible = "cavium,pci-host-thunder-pem";
-	device_type = "pci";
-	msi-parent = <&its>;
-	msi-map = <0 &its 0x10000 0x10000>;
-	bus-range = <0x8f 0xc7>;
-	#size-cells = <2>;
-	#address-cells = <3>;
-
-	reg = <0x8880 0x8f000000 0x0 0x39000000>,  /* Configuration space */
-	      <0x87e0 0xc2000000 0x0 0x00010000>; /* PEM space */
-	ranges = <0x01000000 0x00 0x00020000 0x88b0 0x00020000 0x00 0x00010000>, /* I/O */
-		 <0x03000000 0x00 0x10000000 0x8890 0x10000000 0x0f 0xf0000000>, /* mem64 */
-		 <0x43000000 0x10 0x00000000 0x88a0 0x00000000 0x10 0x00000000>, /* mem64-pref */
-		 <0x03000000 0x87e0 0xc2f00000 0x87e0 0xc2000000 0x00 0x00100000>; /* mem64 PEM BAR4 */
-
-	#interrupt-cells = <1>;
-	interrupt-map-mask = <0 0 0 7>;
-	interrupt-map = <0 0 0 1 &gic0 0 0 0 24 4>, /* INTA */
-			<0 0 0 2 &gic0 0 0 0 25 4>, /* INTB */
-			<0 0 0 3 &gic0 0 0 0 26 4>, /* INTC */
-			<0 0 0 4 &gic0 0 0 0 27 4>; /* INTD */
-    };
diff --git a/Documentation/devicetree/bindings/pci/plda,xpressrich3-axi.txt b/Documentation/devicetree/bindings/pci/plda,xpressrich3-axi.txt
deleted file mode 100644
index f3f75bfb42bc..000000000000
--- a/Documentation/devicetree/bindings/pci/plda,xpressrich3-axi.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-* PLDA XpressRICH3-AXI host controller
-
-The PLDA XpressRICH3-AXI host controller can be configured in a manner that
-makes it compliant with the SBSA[1] standard published by ARM Ltd. For those
-scenarios, the host-generic-pci.txt bindings apply with the following additions
-to the compatible property:
-
-Required properties:
- - compatible: should contain "plda,xpressrich3-axi" to identify the IP used.
-
-
-[1] http://infocenter.arm.com/help/topic/com.arm.doc.den0029a/
diff --git a/Documentation/devicetree/bindings/pci/rcar-pci.txt b/Documentation/devicetree/bindings/pci/rcar-pci.txt
index 45bba9f88a51..12702c8c46ce 100644
--- a/Documentation/devicetree/bindings/pci/rcar-pci.txt
+++ b/Documentation/devicetree/bindings/pci/rcar-pci.txt
@@ -4,6 +4,7 @@ Required properties:
 compatible: "renesas,pcie-r8a7743" for the R8A7743 SoC;
 	    "renesas,pcie-r8a7744" for the R8A7744 SoC;
 	    "renesas,pcie-r8a774a1" for the R8A774A1 SoC;
+	    "renesas,pcie-r8a774b1" for the R8A774B1 SoC;
 	    "renesas,pcie-r8a774c0" for the R8A774C0 SoC;
 	    "renesas,pcie-r8a7779" for the R8A7779 SoC;
 	    "renesas,pcie-r8a7790" for the R8A7790 SoC;
diff --git a/Documentation/devicetree/bindings/pci/versatile.txt b/Documentation/devicetree/bindings/pci/versatile.txt
deleted file mode 100644
index 0a702b13d2ac..000000000000
--- a/Documentation/devicetree/bindings/pci/versatile.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-* ARM Versatile Platform Baseboard PCI interface
-
-PCI host controller found on the ARM Versatile PB board's FPGA.
-
-Required properties:
-- compatible: should contain "arm,versatile-pci" to identify the Versatile PCI
-  controller.
-- reg: base addresses and lengths of the PCI controller. There must be 3
-  entries:
-	- Versatile-specific registers
-	- Self Config space
-	- Config space
-- #address-cells: set to <3>
-- #size-cells: set to <2>
-- device_type: set to "pci"
-- bus-range: set to <0 0xff>
-- ranges: ranges for the PCI memory and I/O regions
-- #interrupt-cells: set to <1>
-- interrupt-map-mask and interrupt-map: standard PCI properties to define
-	the mapping of the PCI interface to interrupt numbers.
-
-Example:
-
-pci-controller@10001000 {
-	compatible = "arm,versatile-pci";
-	device_type = "pci";
-	reg = <0x10001000 0x1000
-	       0x41000000 0x10000
-	       0x42000000 0x100000>;
-	bus-range = <0 0xff>;
-	#address-cells = <3>;
-	#size-cells = <2>;
-	#interrupt-cells = <1>;
-
-	ranges = <0x01000000 0 0x00000000 0x43000000 0 0x00010000   /* downstream I/O */
-		  0x02000000 0 0x50000000 0x50000000 0 0x10000000   /* non-prefetchable memory */
-		  0x42000000 0 0x60000000 0x60000000 0 0x10000000>; /* prefetchable memory */
-
-	interrupt-map-mask = <0x1800 0 0 7>;
-	interrupt-map = <0x1800 0 0 1 &sic 28
-			 0x1800 0 0 2 &sic 29
-			 0x1800 0 0 3 &sic 30
-			 0x1800 0 0 4 &sic 27
-
-			 0x1000 0 0 1 &sic 27
-			 0x1000 0 0 2 &sic 28
-			 0x1000 0 0 3 &sic 29
-			 0x1000 0 0 4 &sic 30
-
-			 0x0800 0 0 1 &sic 30
-			 0x0800 0 0 2 &sic 27
-			 0x0800 0 0 3 &sic 28
-			 0x0800 0 0 4 &sic 29
-
-			 0x0000 0 0 1 &sic 29
-			 0x0000 0 0 2 &sic 30
-			 0x0000 0 0 3 &sic 27
-			 0x0000 0 0 4 &sic 28>;
-};
diff --git a/Documentation/devicetree/bindings/pci/versatile.yaml b/Documentation/devicetree/bindings/pci/versatile.yaml
new file mode 100644
index 000000000000..07a48c27db1f
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/versatile.yaml
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pci/versatile.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Versatile Platform Baseboard PCI interface
+
+maintainers:
+  - Rob Herring <robh@kernel.org>
+
+description: |+
+  PCI host controller found on the ARM Versatile PB board's FPGA.
+
+allOf:
+  - $ref: /schemas/pci/pci-bus.yaml#
+
+properties:
+  compatible:
+    const: arm,versatile-pci
+
+  reg:
+    items:
+      - description: Versatile-specific registers
+      - description: Self Config space
+      - description: Config space
+
+  ranges:
+    maxItems: 3
+
+  "#interrupt-cells": true
+
+  interrupt-map:
+    maxItems: 16
+
+  interrupt-map-mask:
+    items:
+      - const: 0x1800
+      - const: 0
+      - const: 0
+      - const: 7
+
+required:
+  - compatible
+  - reg
+  - ranges
+  - "#interrupt-cells"
+  - interrupt-map
+  - interrupt-map-mask
+
+examples:
+  - |
+    pci@10001000 {
+      compatible = "arm,versatile-pci";
+      device_type = "pci";
+      reg = <0x10001000 0x1000>,
+            <0x41000000 0x10000>,
+            <0x42000000 0x100000>;
+      bus-range = <0 0xff>;
+      #address-cells = <3>;
+      #size-cells = <2>;
+      #interrupt-cells = <1>;
+
+      ranges =
+          <0x01000000 0 0x00000000 0x43000000 0 0x00010000>,  /* downstream I/O */
+          <0x02000000 0 0x50000000 0x50000000 0 0x10000000>,  /* non-prefetchable memory */
+          <0x42000000 0 0x60000000 0x60000000 0 0x10000000>;  /* prefetchable memory */
+
+      interrupt-map-mask = <0x1800 0 0 7>;
+      interrupt-map = <0x1800 0 0 1 &sic 28>,
+          <0x1800 0 0 2 &sic 29>,
+          <0x1800 0 0 3 &sic 30>,
+          <0x1800 0 0 4 &sic 27>,
+
+          <0x1000 0 0 1 &sic 27>,
+          <0x1000 0 0 2 &sic 28>,
+          <0x1000 0 0 3 &sic 29>,
+          <0x1000 0 0 4 &sic 30>,
+
+          <0x0800 0 0 1 &sic 30>,
+          <0x0800 0 0 2 &sic 27>,
+          <0x0800 0 0 3 &sic 28>,
+          <0x0800 0 0 4 &sic 29>,
+
+          <0x0000 0 0 1 &sic 29>,
+          <0x0000 0 0 2 &sic 30>,
+          <0x0000 0 0 3 &sic 27>,
+          <0x0000 0 0 4 &sic 28>;
+    };
+
+
+...
diff --git a/Documentation/devicetree/bindings/perf/arm-ccn.txt b/Documentation/devicetree/bindings/perf/arm-ccn.txt
index 43b5a71a5a9d..1c53b5aa3317 100644
--- a/Documentation/devicetree/bindings/perf/arm-ccn.txt
+++ b/Documentation/devicetree/bindings/perf/arm-ccn.txt
@@ -6,6 +6,7 @@ Required properties:
 	"arm,ccn-502"
 	"arm,ccn-504"
 	"arm,ccn-508"
+	"arm,ccn-512"
 
 - reg: (standard registers property) physical address and size
 	(16MB) of the configuration registers block
diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
index d77e3f26f9e6..7822a806ea0a 100644
--- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
+++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
@@ -5,6 +5,7 @@ Required properties:
 - compatible: should be one of:
 	"fsl,imx8-ddr-pmu"
 	"fsl,imx8m-ddr-pmu"
+	"fsl,imx8mp-ddr-pmu"
 
 - reg: physical address and size
 
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml
new file mode 100644
index 000000000000..020ef9e4c411
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun4i-a10-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 USB PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 1
+
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-usb-phy
+      - allwinner,sun7i-a20-usb-phy
+
+  reg:
+    items:
+      - description: PHY Control registers
+      - description: PHY PMU1 registers
+      - description: PHY PMU2 registers
+
+  reg-names:
+    items:
+      - const: phy_ctrl
+      - const: pmu1
+      - const: pmu2
+
+  clocks:
+    maxItems: 1
+    description: USB PHY bus clock
+
+  clock-names:
+    const: usb_phy
+
+  resets:
+    items:
+      - description: USB OTG reset
+      - description: USB Host 1 Controller reset
+      - description: USB Host 2 Controller reset
+
+  reset-names:
+    items:
+      - const: usb0_reset
+      - const: usb1_reset
+      - const: usb2_reset
+
+  usb0_id_det-gpios:
+    description: GPIO to the USB OTG ID pin
+
+  usb0_vbus_det-gpios:
+    description: GPIO to the USB OTG VBUS detect pin
+
+  usb0_vbus_power-supply:
+    description: Power supply to detect the USB OTG VBUS
+
+  usb0_vbus-supply:
+    description: Regulator controlling USB OTG VBUS
+
+  usb1_vbus-supply:
+    description: Regulator controlling USB1 Host controller
+
+  usb2_vbus-supply:
+    description: Regulator controlling USB2 Host controller
+
+required:
+  - "#phy-cells"
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - reg-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/clock/sun4i-a10-ccu.h>
+    #include <dt-bindings/reset/sun4i-a10-ccu.h>
+
+    usbphy: phy@01c13400 {
+        #phy-cells = <1>;
+        compatible = "allwinner,sun4i-a10-usb-phy";
+        reg = <0x01c13400 0x10>, <0x01c14800 0x4>, <0x01c1c800 0x4>;
+        reg-names = "phy_ctrl", "pmu1", "pmu2";
+        clocks = <&ccu CLK_USB_PHY>;
+        clock-names = "usb_phy";
+        resets = <&ccu RST_USB_PHY0>,
+                 <&ccu RST_USB_PHY1>,
+                 <&ccu RST_USB_PHY2>;
+        reset-names = "usb0_reset", "usb1_reset", "usb2_reset";
+        usb0_id_det-gpios = <&pio 7 19 GPIO_ACTIVE_HIGH>;
+        usb0_vbus_det-gpios = <&pio 7 22 GPIO_ACTIVE_HIGH>;
+        usb0_vbus-supply = <&reg_usb0_vbus>;
+        usb1_vbus-supply = <&reg_usb1_vbus>;
+        usb2_vbus-supply = <&reg_usb2_vbus>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun50i-a64-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun50i-a64-usb-phy.yaml
new file mode 100644
index 000000000000..fd6e126fcf18
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun50i-a64-usb-phy.yaml
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun50i-a64-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A64 USB PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 1
+
+  compatible:
+    const: allwinner,sun50i-a64-usb-phy
+
+  reg:
+    items:
+      - description: PHY Control registers
+      - description: PHY PMU0 registers
+      - description: PHY PMU1 registers
+
+  reg-names:
+    items:
+      - const: phy_ctrl
+      - const: pmu0
+      - const: pmu1
+
+  clocks:
+    items:
+      - description: USB OTG PHY bus clock
+      - description: USB Host 0 PHY bus clock
+
+  clock-names:
+    items:
+      - const: usb0_phy
+      - const: usb1_phy
+
+  resets:
+    items:
+      - description: USB OTG reset
+      - description: USB Host 1 Controller reset
+
+  reset-names:
+    items:
+      - const: usb0_reset
+      - const: usb1_reset
+
+  usb0_id_det-gpios:
+    description: GPIO to the USB OTG ID pin
+
+  usb0_vbus_det-gpios:
+    description: GPIO to the USB OTG VBUS detect pin
+
+  usb0_vbus_power-supply:
+    description: Power supply to detect the USB OTG VBUS
+
+  usb0_vbus-supply:
+    description: Regulator controlling USB OTG VBUS
+
+  usb1_vbus-supply:
+    description: Regulator controlling USB1 Host controller
+
+required:
+  - "#phy-cells"
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - reg-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/clock/sun50i-a64-ccu.h>
+    #include <dt-bindings/reset/sun50i-a64-ccu.h>
+
+    phy@1c19400 {
+        #phy-cells = <1>;
+        compatible = "allwinner,sun50i-a64-usb-phy";
+        reg = <0x01c19400 0x14>,
+              <0x01c1a800 0x4>,
+              <0x01c1b800 0x4>;
+        reg-names = "phy_ctrl",
+                    "pmu0",
+                    "pmu1";
+        clocks = <&ccu CLK_USB_PHY0>,
+                 <&ccu CLK_USB_PHY1>;
+        clock-names = "usb0_phy",
+                      "usb1_phy";
+        resets = <&ccu RST_USB_PHY0>,
+                 <&ccu RST_USB_PHY1>;
+        reset-names = "usb0_reset",
+                      "usb1_reset";
+        usb0_id_det-gpios = <&pio 7 9 GPIO_ACTIVE_HIGH>; /* PH9 */
+        usb0_vbus_power-supply = <&usb_power_supply>;
+        usb0_vbus-supply = <&reg_drivevbus>;
+        usb1_vbus-supply = <&reg_usb1_vbus>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun50i-h6-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun50i-h6-usb-phy.yaml
new file mode 100644
index 000000000000..7670411002c9
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun50i-h6-usb-phy.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun50i-h6-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner H6 USB PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 1
+
+  compatible:
+    const: allwinner,sun50i-h6-usb-phy
+
+  reg:
+    items:
+      - description: PHY Control registers
+      - description: PHY PMU0 registers
+      - description: PHY PMU3 registers
+
+  reg-names:
+    items:
+      - const: phy_ctrl
+      - const: pmu0
+      - const: pmu3
+
+  clocks:
+    items:
+      - description: USB OTG PHY bus clock
+      - description: USB Host PHY bus clock
+
+  clock-names:
+    items:
+      - const: usb0_phy
+      - const: usb3_phy
+
+  resets:
+    items:
+      - description: USB OTG reset
+      - description: USB Host Controller reset
+
+  reset-names:
+    items:
+      - const: usb0_reset
+      - const: usb3_reset
+
+  usb0_id_det-gpios:
+    description: GPIO to the USB OTG ID pin
+
+  usb0_vbus_det-gpios:
+    description: GPIO to the USB OTG VBUS detect pin
+
+  usb0_vbus_power-supply:
+    description: Power supply to detect the USB OTG VBUS
+
+  usb0_vbus-supply:
+    description: Regulator controlling USB OTG VBUS
+
+  usb3_vbus-supply:
+    description: Regulator controlling USB3 Host controller
+
+required:
+  - "#phy-cells"
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - reg-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/clock/sun50i-h6-ccu.h>
+    #include <dt-bindings/reset/sun50i-h6-ccu.h>
+
+    phy@5100400 {
+        #phy-cells = <1>;
+        compatible = "allwinner,sun50i-h6-usb-phy";
+        reg = <0x05100400 0x24>,
+              <0x05101800 0x4>,
+              <0x05311800 0x4>;
+        reg-names = "phy_ctrl",
+                    "pmu0",
+                    "pmu3";
+        clocks = <&ccu CLK_USB_PHY0>,
+                 <&ccu CLK_USB_PHY3>;
+        clock-names = "usb0_phy",
+                      "usb3_phy";
+        resets = <&ccu RST_USB_PHY0>,
+                 <&ccu RST_USB_PHY3>;
+        reset-names = "usb0_reset",
+                      "usb3_reset";
+        usb0_id_det-gpios = <&pio 2 6 GPIO_ACTIVE_HIGH>; /* PC6 */
+        usb0_vbus-supply = <&reg_vcc5v>;
+        usb3_vbus-supply = <&reg_vcc5v>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun50i-h6-usb3-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun50i-h6-usb3-phy.yaml
new file mode 100644
index 000000000000..e5922b427342
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun50i-h6-usb3-phy.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 Ondrej Jirman <megous@megous.com>
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/phy/allwinner,sun50i-h6-usb3-phy.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Allwinner H6 USB3 PHY
+
+maintainers:
+  - Ondrej Jirman <megous@megous.com>
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun50i-h6-usb3-phy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  "#phy-cells":
+    const: 0
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - resets
+  - "#phy-cells"
+
+examples:
+  - |
+    #include <dt-bindings/clock/sun50i-h6-ccu.h>
+    #include <dt-bindings/reset/sun50i-h6-ccu.h>
+    phy@5210000 {
+          compatible = "allwinner,sun50i-h6-usb3-phy";
+          reg = <0x5210000 0x10000>;
+          clocks = <&ccu CLK_USB_PHY1>;
+          resets = <&ccu RST_USB_PHY1>;
+          #phy-cells = <0>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun5i-a13-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun5i-a13-usb-phy.yaml
new file mode 100644
index 000000000000..9b319381d1ad
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun5i-a13-usb-phy.yaml
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun5i-a13-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A13 USB PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 1
+
+  compatible:
+    const: allwinner,sun5i-a13-usb-phy
+
+  reg:
+    items:
+      - description: PHY Control registers
+      - description: PHY PMU1 registers
+
+  reg-names:
+    items:
+      - const: phy_ctrl
+      - const: pmu1
+
+  clocks:
+    maxItems: 1
+    description: USB OTG PHY bus clock
+
+  clock-names:
+    const: usb_phy
+
+  resets:
+    items:
+      - description: USB OTG reset
+      - description: USB Host 1 Controller reset
+
+  reset-names:
+    items:
+      - const: usb0_reset
+      - const: usb1_reset
+
+  usb0_id_det-gpios:
+    description: GPIO to the USB OTG ID pin
+
+  usb0_vbus_det-gpios:
+    description: GPIO to the USB OTG VBUS detect pin
+
+  usb0_vbus_power-supply:
+    description: Power supply to detect the USB OTG VBUS
+
+  usb0_vbus-supply:
+    description: Regulator controlling USB OTG VBUS
+
+  usb1_vbus-supply:
+    description: Regulator controlling USB1 Host controller
+
+required:
+  - "#phy-cells"
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - reg-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/clock/sun5i-ccu.h>
+    #include <dt-bindings/reset/sun5i-ccu.h>
+
+    phy@1c13400 {
+        #phy-cells = <1>;
+        compatible = "allwinner,sun5i-a13-usb-phy";
+        reg = <0x01c13400 0x10>, <0x01c14800 0x4>;
+        reg-names = "phy_ctrl", "pmu1";
+        clocks = <&ccu CLK_USB_PHY0>;
+        clock-names = "usb_phy";
+        resets = <&ccu RST_USB_PHY0>, <&ccu RST_USB_PHY1>;
+        reset-names = "usb0_reset", "usb1_reset";
+        usb0_id_det-gpios = <&pio 6 2 (GPIO_ACTIVE_HIGH | GPIO_PULL_UP)>; /* PG2 */
+        usb0_vbus_det-gpios = <&pio 6 1 (GPIO_ACTIVE_HIGH | GPIO_PULL_DOWN)>; /* PG1 */
+        usb0_vbus-supply = <&reg_usb0_vbus>;
+        usb1_vbus-supply = <&reg_usb1_vbus>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
index fa46670de299..d0b541a461f3 100644
--- a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
@@ -8,14 +8,18 @@ title: Allwinner A31 MIPI D-PHY Controller Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#phy-cells":
     const: 0
 
   compatible:
-    const: allwinner,sun6i-a31-mipi-dphy
+    oneOf:
+      - const: allwinner,sun6i-a31-mipi-dphy
+      - items:
+          - const: allwinner,sun50i-a64-mipi-dphy
+          - const: allwinner,sun6i-a31-mipi-dphy
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-usb-phy.yaml
new file mode 100644
index 000000000000..b0ed01bbf3db
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-usb-phy.yaml
@@ -0,0 +1,119 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun6i-a31-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 USB PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 1
+
+  compatible:
+    const: allwinner,sun6i-a31-usb-phy
+
+  reg:
+    items:
+      - description: PHY Control registers
+      - description: PHY PMU1 registers
+      - description: PHY PMU2 registers
+
+  reg-names:
+    items:
+      - const: phy_ctrl
+      - const: pmu1
+      - const: pmu2
+
+  clocks:
+    items:
+      - description: USB OTG PHY bus clock
+      - description: USB Host 0 PHY bus clock
+      - description: USB Host 1 PHY bus clock
+
+  clock-names:
+    items:
+      - const: usb0_phy
+      - const: usb1_phy
+      - const: usb2_phy
+
+  resets:
+    items:
+      - description: USB OTG reset
+      - description: USB Host 1 Controller reset
+      - description: USB Host 2 Controller reset
+
+  reset-names:
+    items:
+      - const: usb0_reset
+      - const: usb1_reset
+      - const: usb2_reset
+
+  usb0_id_det-gpios:
+    description: GPIO to the USB OTG ID pin
+
+  usb0_vbus_det-gpios:
+    description: GPIO to the USB OTG VBUS detect pin
+
+  usb0_vbus_power-supply:
+    description: Power supply to detect the USB OTG VBUS
+
+  usb0_vbus-supply:
+    description: Regulator controlling USB OTG VBUS
+
+  usb1_vbus-supply:
+    description: Regulator controlling USB1 Host controller
+
+  usb2_vbus-supply:
+    description: Regulator controlling USB2 Host controller
+
+required:
+  - "#phy-cells"
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - reg-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/clock/sun6i-a31-ccu.h>
+    #include <dt-bindings/reset/sun6i-a31-ccu.h>
+
+    phy@1c19400 {
+        #phy-cells = <1>;
+        compatible = "allwinner,sun6i-a31-usb-phy";
+        reg = <0x01c19400 0x10>,
+              <0x01c1a800 0x4>,
+              <0x01c1b800 0x4>;
+        reg-names = "phy_ctrl",
+                    "pmu1",
+                    "pmu2";
+        clocks = <&ccu CLK_USB_PHY0>,
+                 <&ccu CLK_USB_PHY1>,
+                 <&ccu CLK_USB_PHY2>;
+        clock-names = "usb0_phy",
+                      "usb1_phy",
+                      "usb2_phy";
+        resets = <&ccu RST_USB_PHY0>,
+                 <&ccu RST_USB_PHY1>,
+                 <&ccu RST_USB_PHY2>;
+        reset-names = "usb0_reset",
+                      "usb1_reset",
+                      "usb2_reset";
+        usb0_id_det-gpios = <&pio 0 15 GPIO_ACTIVE_HIGH>; /* PA15 */
+        usb0_vbus_det-gpios = <&pio 0 16 GPIO_ACTIVE_HIGH>; /* PA16 */
+        usb0_vbus_power-supply = <&usb_power_supply>;
+        usb0_vbus-supply = <&reg_drivevbus>;
+        usb1_vbus-supply = <&reg_usb1_vbus>;
+        usb2_vbus-supply = <&reg_usb2_vbus>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun8i-a23-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun8i-a23-usb-phy.yaml
new file mode 100644
index 000000000000..b0674406f8aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun8i-a23-usb-phy.yaml
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun8i-a23-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A23 USB PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 1
+
+  compatible:
+    enum:
+      - allwinner,sun8i-a23-usb-phy
+      - allwinner,sun8i-a33-usb-phy
+
+  reg:
+    items:
+      - description: PHY Control registers
+      - description: PHY PMU1 registers
+
+  reg-names:
+    items:
+      - const: phy_ctrl
+      - const: pmu1
+
+  clocks:
+    items:
+      - description: USB OTG PHY bus clock
+      - description: USB Host 0 PHY bus clock
+
+  clock-names:
+    items:
+      - const: usb0_phy
+      - const: usb1_phy
+
+  resets:
+    items:
+      - description: USB OTG reset
+      - description: USB Host 1 Controller reset
+
+  reset-names:
+    items:
+      - const: usb0_reset
+      - const: usb1_reset
+
+  usb0_id_det-gpios:
+    description: GPIO to the USB OTG ID pin
+
+  usb0_vbus_det-gpios:
+    description: GPIO to the USB OTG VBUS detect pin
+
+  usb0_vbus_power-supply:
+    description: Power supply to detect the USB OTG VBUS
+
+  usb0_vbus-supply:
+    description: Regulator controlling USB OTG VBUS
+
+  usb1_vbus-supply:
+    description: Regulator controlling USB1 Host controller
+
+required:
+  - "#phy-cells"
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - reg-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/clock/sun8i-a23-a33-ccu.h>
+    #include <dt-bindings/reset/sun8i-a23-a33-ccu.h>
+
+    phy@1c19400 {
+        #phy-cells = <1>;
+        compatible = "allwinner,sun8i-a23-usb-phy";
+        reg = <0x01c19400 0x10>, <0x01c1a800 0x4>;
+        reg-names = "phy_ctrl", "pmu1";
+        clocks = <&ccu CLK_USB_PHY0>,
+                 <&ccu CLK_USB_PHY1>;
+        clock-names = "usb0_phy",
+                      "usb1_phy";
+        resets = <&ccu RST_USB_PHY0>,
+                 <&ccu RST_USB_PHY1>;
+        reset-names = "usb0_reset",
+                      "usb1_reset";
+        usb0_id_det-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
+        usb0_vbus_power-supply = <&usb_power_supply>;
+        usb0_vbus-supply = <&reg_drivevbus>;
+        usb1_vbus-supply = <&reg_usb1_vbus>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun8i-a83t-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun8i-a83t-usb-phy.yaml
new file mode 100644
index 000000000000..48dc9c834a9b
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun8i-a83t-usb-phy.yaml
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun8i-a83t-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A83t USB PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 1
+
+  compatible:
+    const: allwinner,sun8i-a83t-usb-phy
+
+  reg:
+    items:
+      - description: PHY Control registers
+      - description: PHY PMU1 registers
+      - description: PHY PMU2 registers
+
+  reg-names:
+    items:
+      - const: phy_ctrl
+      - const: pmu1
+      - const: pmu2
+
+  clocks:
+    items:
+      - description: USB OTG PHY bus clock
+      - description: USB Host 0 PHY bus clock
+      - description: USB Host 1 PHY bus clock
+      - description: USB HSIC 12MHz clock
+
+  clock-names:
+    items:
+      - const: usb0_phy
+      - const: usb1_phy
+      - const: usb2_phy
+      - const: usb2_hsic_12M
+
+  resets:
+    items:
+      - description: USB OTG reset
+      - description: USB Host 1 Controller reset
+      - description: USB Host 2 Controller reset
+
+  reset-names:
+    items:
+      - const: usb0_reset
+      - const: usb1_reset
+      - const: usb2_reset
+
+  usb0_id_det-gpios:
+    description: GPIO to the USB OTG ID pin
+
+  usb0_vbus_det-gpios:
+    description: GPIO to the USB OTG VBUS detect pin
+
+  usb0_vbus_power-supply:
+    description: Power supply to detect the USB OTG VBUS
+
+  usb0_vbus-supply:
+    description: Regulator controlling USB OTG VBUS
+
+  usb1_vbus-supply:
+    description: Regulator controlling USB1 Host controller
+
+  usb2_vbus-supply:
+    description: Regulator controlling USB2 Host controller
+
+required:
+  - "#phy-cells"
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - reg-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/clock/sun8i-a83t-ccu.h>
+    #include <dt-bindings/reset/sun8i-a83t-ccu.h>
+
+    phy@1c19400 {
+        #phy-cells = <1>;
+        compatible = "allwinner,sun8i-a83t-usb-phy";
+        reg = <0x01c19400 0x10>,
+              <0x01c1a800 0x14>,
+              <0x01c1b800 0x14>;
+        reg-names = "phy_ctrl",
+                    "pmu1",
+                    "pmu2";
+        clocks = <&ccu CLK_USB_PHY0>,
+                 <&ccu CLK_USB_PHY1>,
+                 <&ccu CLK_USB_HSIC>,
+                 <&ccu CLK_USB_HSIC_12M>;
+        clock-names = "usb0_phy",
+                      "usb1_phy",
+                      "usb2_phy",
+                      "usb2_hsic_12M";
+        resets = <&ccu RST_USB_PHY0>,
+                 <&ccu RST_USB_PHY1>,
+                 <&ccu RST_USB_HSIC>;
+        reset-names = "usb0_reset",
+                      "usb1_reset",
+                      "usb2_reset";
+        usb0_id_det-gpios = <&pio 7 11 GPIO_ACTIVE_HIGH>; /* PH11 */
+        usb0_vbus_power-supply = <&usb_power_supply>;
+        usb0_vbus-supply = <&reg_drivevbus>;
+        usb1_vbus-supply = <&reg_usb1_vbus>;
+        usb2_vbus-supply = <&reg_usb2_vbus>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml
new file mode 100644
index 000000000000..60c344585276
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml
@@ -0,0 +1,137 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun8i-h3-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner H3 USB PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 1
+
+  compatible:
+    const: allwinner,sun8i-h3-usb-phy
+
+  reg:
+    items:
+      - description: PHY Control registers
+      - description: PHY PMU0 registers
+      - description: PHY PMU1 registers
+      - description: PHY PMU2 registers
+      - description: PHY PMU3 registers
+
+  reg-names:
+    items:
+      - const: phy_ctrl
+      - const: pmu0
+      - const: pmu1
+      - const: pmu2
+      - const: pmu3
+
+  clocks:
+    items:
+      - description: USB OTG PHY bus clock
+      - description: USB Host 0 PHY bus clock
+      - description: USB Host 1 PHY bus clock
+      - description: USB Host 2 PHY bus clock
+
+  clock-names:
+    items:
+      - const: usb0_phy
+      - const: usb1_phy
+      - const: usb2_phy
+      - const: usb3_phy
+
+  resets:
+    items:
+      - description: USB OTG reset
+      - description: USB Host 1 Controller reset
+      - description: USB Host 2 Controller reset
+      - description: USB Host 3 Controller reset
+
+  reset-names:
+    items:
+      - const: usb0_reset
+      - const: usb1_reset
+      - const: usb2_reset
+      - const: usb3_reset
+
+  usb0_id_det-gpios:
+    description: GPIO to the USB OTG ID pin
+
+  usb0_vbus_det-gpios:
+    description: GPIO to the USB OTG VBUS detect pin
+
+  usb0_vbus_power-supply:
+    description: Power supply to detect the USB OTG VBUS
+
+  usb0_vbus-supply:
+    description: Regulator controlling USB OTG VBUS
+
+  usb1_vbus-supply:
+    description: Regulator controlling USB1 Host controller
+
+  usb2_vbus-supply:
+    description: Regulator controlling USB2 Host controller
+
+  usb3_vbus-supply:
+    description: Regulator controlling USB3 Host controller
+
+required:
+  - "#phy-cells"
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - reg-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/clock/sun8i-h3-ccu.h>
+    #include <dt-bindings/reset/sun8i-h3-ccu.h>
+
+    phy@1c19400 {
+        #phy-cells = <1>;
+        compatible = "allwinner,sun8i-h3-usb-phy";
+        reg = <0x01c19400 0x2c>,
+              <0x01c1a800 0x4>,
+              <0x01c1b800 0x4>,
+              <0x01c1c800 0x4>,
+              <0x01c1d800 0x4>;
+        reg-names = "phy_ctrl",
+                    "pmu0",
+                    "pmu1",
+                    "pmu2",
+                    "pmu3";
+        clocks = <&ccu CLK_USB_PHY0>,
+                 <&ccu CLK_USB_PHY1>,
+                 <&ccu CLK_USB_PHY2>,
+                 <&ccu CLK_USB_PHY3>;
+        clock-names = "usb0_phy",
+                      "usb1_phy",
+                      "usb2_phy",
+                      "usb3_phy";
+        resets = <&ccu RST_USB_PHY0>,
+                 <&ccu RST_USB_PHY1>,
+                 <&ccu RST_USB_PHY2>,
+                 <&ccu RST_USB_PHY3>;
+        reset-names = "usb0_reset",
+                      "usb1_reset",
+                      "usb2_reset",
+                      "usb3_reset";
+        usb0_id_det-gpios = <&pio 6 12 GPIO_ACTIVE_HIGH>; /* PG12 */
+        usb0_vbus-supply = <&reg_usb0_vbus>;
+        usb1_vbus-supply = <&reg_usb1_vbus>;
+        usb2_vbus-supply = <&reg_usb2_vbus>;
+        usb3_vbus-supply = <&reg_usb3_vbus>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun8i-r40-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun8i-r40-usb-phy.yaml
new file mode 100644
index 000000000000..a2bb36790fbd
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun8i-r40-usb-phy.yaml
@@ -0,0 +1,119 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun8i-r40-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner R40 USB PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 1
+
+  compatible:
+    const: allwinner,sun8i-r40-usb-phy
+
+  reg:
+    items:
+      - description: PHY Control registers
+      - description: PHY PMU0 registers
+      - description: PHY PMU1 registers
+      - description: PHY PMU2 registers
+
+  reg-names:
+    items:
+      - const: phy_ctrl
+      - const: pmu0
+      - const: pmu1
+      - const: pmu2
+
+  clocks:
+    items:
+      - description: USB OTG PHY bus clock
+      - description: USB Host 0 PHY bus clock
+      - description: USB Host 1 PHY bus clock
+
+  clock-names:
+    items:
+      - const: usb0_phy
+      - const: usb1_phy
+      - const: usb2_phy
+
+  resets:
+    items:
+      - description: USB OTG reset
+      - description: USB Host 1 Controller reset
+      - description: USB Host 2 Controller reset
+
+  reset-names:
+    items:
+      - const: usb0_reset
+      - const: usb1_reset
+      - const: usb2_reset
+
+  usb0_id_det-gpios:
+    description: GPIO to the USB OTG ID pin
+
+  usb0_vbus_det-gpios:
+    description: GPIO to the USB OTG VBUS detect pin
+
+  usb0_vbus_power-supply:
+    description: Power supply to detect the USB OTG VBUS
+
+  usb0_vbus-supply:
+    description: Regulator controlling USB OTG VBUS
+
+  usb1_vbus-supply:
+    description: Regulator controlling USB1 Host controller
+
+  usb2_vbus-supply:
+    description: Regulator controlling USB2 Host controller
+
+required:
+  - "#phy-cells"
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - reg-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/clock/sun8i-r40-ccu.h>
+    #include <dt-bindings/reset/sun8i-r40-ccu.h>
+
+    phy@1c13400 {
+        #phy-cells = <1>;
+        compatible = "allwinner,sun8i-r40-usb-phy";
+        reg = <0x01c13400 0x14>,
+              <0x01c14800 0x4>,
+              <0x01c19800 0x4>,
+              <0x01c1c800 0x4>;
+        reg-names = "phy_ctrl",
+                    "pmu0",
+                    "pmu1",
+                    "pmu2";
+        clocks = <&ccu CLK_USB_PHY0>,
+                 <&ccu CLK_USB_PHY1>,
+                 <&ccu CLK_USB_PHY2>;
+        clock-names = "usb0_phy",
+                      "usb1_phy",
+                      "usb2_phy";
+        resets = <&ccu RST_USB_PHY0>,
+                 <&ccu RST_USB_PHY1>,
+                 <&ccu RST_USB_PHY2>;
+        reset-names = "usb0_reset",
+                      "usb1_reset",
+                      "usb2_reset";
+        usb1_vbus-supply = <&reg_vcc5v0>;
+        usb2_vbus-supply = <&reg_vcc5v0>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun8i-v3s-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun8i-v3s-usb-phy.yaml
new file mode 100644
index 000000000000..eadfd0c9493c
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun8i-v3s-usb-phy.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun8i-v3s-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner V3s USB PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 1
+
+  compatible:
+    const: allwinner,sun8i-v3s-usb-phy
+
+  reg:
+    items:
+      - description: PHY Control registers
+      - description: PHY PMU0 registers
+
+  reg-names:
+    items:
+      - const: phy_ctrl
+      - const: pmu0
+
+  clocks:
+    maxItems: 1
+    description: USB OTG PHY bus clock
+
+  clock-names:
+    const: usb0_phy
+
+  resets:
+    maxItems: 1
+    description: USB OTG reset
+
+  reset-names:
+    const: usb0_reset
+
+  usb0_id_det-gpios:
+    description: GPIO to the USB OTG ID pin
+
+  usb0_vbus_det-gpios:
+    description: GPIO to the USB OTG VBUS detect pin
+
+  usb0_vbus_power-supply:
+    description: Power supply to detect the USB OTG VBUS
+
+  usb0_vbus-supply:
+    description: Regulator controlling USB OTG VBUS
+
+required:
+  - "#phy-cells"
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - reg-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/clock/sun8i-v3s-ccu.h>
+    #include <dt-bindings/reset/sun8i-v3s-ccu.h>
+
+    phy@1c19400 {
+        #phy-cells = <1>;
+        compatible = "allwinner,sun8i-v3s-usb-phy";
+        reg = <0x01c19400 0x2c>,
+              <0x01c1a800 0x4>;
+        reg-names = "phy_ctrl",
+                    "pmu0";
+        clocks = <&ccu CLK_USB_PHY0>;
+        clock-names = "usb0_phy";
+        resets = <&ccu RST_USB_PHY0>;
+        reset-names = "usb0_reset";
+        usb0_id_det-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun9i-a80-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun9i-a80-usb-phy.yaml
new file mode 100644
index 000000000000..ded7d6f0a119
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun9i-a80-usb-phy.yaml
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun9i-a80-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 USB PHY Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#phy-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun9i-a80-usb-phy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    anyOf:
+      - description: Main PHY Clock
+
+      - items:
+          - description: Main PHY clock
+          - description: HSIC 12MHz clock
+          - description: HSIC 480MHz clock
+
+  clock-names:
+    oneOf:
+      - const: phy
+
+      - items:
+          - const: phy
+          - const: hsic_12M
+          - const: hsic_480M
+
+  resets:
+    anyOf:
+      - description: Normal USB PHY reset
+
+      - items:
+          - description: Normal USB PHY reset
+          - description: HSIC Reset
+
+  reset-names:
+    oneOf:
+      - const: phy
+
+      - items:
+          - const: phy
+          - const: hsic
+
+  phy_type:
+    const: hsic
+    description:
+      When absent, the PHY type will be assumed to be normal USB.
+
+  phy-supply:
+    description:
+      Regulator that powers VBUS
+
+required:
+  - "#phy-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+if:
+  properties:
+    phy_type:
+      const: hsic
+
+  required:
+    - phy_type
+
+then:
+  properties:
+    clocks:
+      maxItems: 3
+
+    clock-names:
+      maxItems: 3
+
+    resets:
+      maxItems: 2
+
+    reset-names:
+      maxItems: 2
+
+examples:
+  - |
+    #include <dt-bindings/clock/sun9i-a80-usb.h>
+    #include <dt-bindings/reset/sun9i-a80-usb.h>
+
+    usbphy1: phy@a00800 {
+        compatible = "allwinner,sun9i-a80-usb-phy";
+        reg = <0x00a00800 0x4>;
+        clocks = <&usb_clocks CLK_USB0_PHY>;
+        clock-names = "phy";
+        resets = <&usb_clocks RST_USB0_PHY>;
+        reset-names = "phy";
+        phy-supply = <&reg_usb1_vbus>;
+        #phy-cells = <0>;
+    };
+
+  - |
+    #include <dt-bindings/clock/sun9i-a80-usb.h>
+    #include <dt-bindings/reset/sun9i-a80-usb.h>
+
+    usbphy3: phy@a02800 {
+        compatible = "allwinner,sun9i-a80-usb-phy";
+        reg = <0x00a02800 0x4>;
+        clocks = <&usb_clocks CLK_USB2_PHY>,
+                 <&usb_clocks CLK_USB_HSIC>,
+                 <&usb_clocks CLK_USB2_HSIC>;
+        clock-names = "phy",
+                      "hsic_12M",
+                      "hsic_480M";
+        resets = <&usb_clocks RST_USB2_PHY>,
+                 <&usb_clocks RST_USB2_HSIC>;
+        reset-names = "phy",
+                      "hsic";
+        phy_type = "hsic";
+        phy-supply = <&reg_usb3_vbus>;
+        #phy-cells = <0>;
+    };
diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml
index 51254b4e65dd..57d8603076bd 100644
--- a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml
@@ -36,7 +36,6 @@ properties:
     const: 0
 
   phy-supply:
-     maxItems: 1
      description:
        Phandle to a regulator that provides power to the PHY. This
        regulator will be managed during the PHY power on/off sequence.
diff --git a/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.txt b/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.txt
index 24a0d06acd1d..698aacbdcfc4 100644
--- a/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.txt
+++ b/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.txt
@@ -1,30 +1,49 @@
 Broadcom STB USB PHY
 
 Required properties:
- - compatible: brcm,brcmstb-usb-phy
- - reg: two offset and length pairs.
-	The first pair specifies a manditory set of memory mapped
-	registers used for general control of the PHY.
-	The second pair specifies optional registers used by some of
-	the SoCs that support USB 3.x
- - #phy-cells: Shall be 1 as it expects one argument for setting
-	       the type of the PHY. Possible values are:
-	       - PHY_TYPE_USB2 for USB1.1/2.0 PHY
-	       - PHY_TYPE_USB3 for USB3.x PHY
+- compatible: should be one of
+	"brcm,brcmstb-usb-phy"
+	"brcm,bcm7216-usb-phy"
+	"brcm,bcm7211-usb-phy"
+
+- reg and reg-names properties requirements are specific to the
+  compatible string.
+  "brcm,brcmstb-usb-phy":
+    - reg: 1 or 2 offset and length pairs. One for the base CTRL registers
+           and an optional pair for systems with USB 3.x support
+    - reg-names: not specified
+  "brcm,bcm7216-usb-phy":
+    - reg: 3 offset and length pairs for CTRL, XHCI_EC and XHCI_GBL
+           registers
+    - reg-names: "ctrl", "xhci_ec", "xhci_gbl"
+  "brcm,bcm7211-usb-phy":
+    - reg: 5 offset and length pairs for CTRL, XHCI_EC, XHCI_GBL,
+           USB_PHY and USB_MDIO registers and an optional pair
+	   for the BDC registers
+    - reg-names: "ctrl", "xhci_ec", "xhci_gbl", "usb_phy", "usb_mdio", "bdc_ec"
+
+- #phy-cells: Shall be 1 as it expects one argument for setting
+	      the type of the PHY. Possible values are:
+	      - PHY_TYPE_USB2 for USB1.1/2.0 PHY
+	      - PHY_TYPE_USB3 for USB3.x PHY
 
 Optional Properties:
 - clocks : clock phandles.
 - clock-names: String, clock name.
+- interrupts: wakeup interrupt
+- interrupt-names: "wakeup"
 - brcm,ipp: Boolean, Invert Port Power.
   Possible values are: 0 (Don't invert), 1 (Invert)
 - brcm,ioc: Boolean, Invert Over Current detection.
   Possible values are: 0 (Don't invert), 1 (Invert)
-NOTE: one or both of the following two properties must be set
-- brcm,has-xhci: Boolean indicating the phy has an XHCI phy.
-- brcm,has-eohci: Boolean indicating the phy has an EHCI/OHCI phy.
 - dr_mode: String, PHY Device mode.
   Possible values are: "host", "peripheral ", "drd" or "typec-pd"
   If this property is not defined, the phy will default to "host" mode.
+- brcm,syscon-piarbctl: phandle to syscon for handling config registers
+NOTE: one or both of the following two properties must be set
+- brcm,has-xhci: Boolean indicating the phy has an XHCI phy.
+- brcm,has-eohci: Boolean indicating the phy has an EHCI/OHCI phy.
+
 
 Example:
 
@@ -41,3 +60,27 @@ usbphy_0: usb-phy@f0470200 {
 	clocks = <&usb20>, <&usb30>;
 	clock-names = "sw_usb", "sw_usb3";
 };
+
+usb-phy@29f0200 {
+	reg = <0x29f0200 0x200>,
+		<0x29c0880 0x30>,
+		<0x29cc100 0x534>,
+		<0x2808000 0x24>,
+		<0x2980080 0x8>;
+	reg-names = "ctrl",
+		"xhci_ec",
+		"xhci_gbl",
+		"usb_phy",
+		"usb_mdio";
+	brcm,ioc = <0x0>;
+	brcm,ipp = <0x0>;
+	compatible = "brcm,bcm7211-usb-phy";
+	interrupts = <0x30>;
+	interrupt-parent = <&vpu_intr1_nosec_intc>;
+	interrupt-names = "wake";
+	#phy-cells = <0x1>;
+	brcm,has-xhci;
+	syscon-piarbctl = <&syscon_piarbctl>;
+	clocks = <&scmi_clk 256>;
+	clock-names = "sw_usb";
+};
diff --git a/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt b/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
index b640845fec67..c03ad2198410 100644
--- a/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
+++ b/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
@@ -2,6 +2,7 @@
 
 Required properties:
 - compatible: should be one or more of
+     "brcm,bcm7216-sata-phy"
      "brcm,bcm7425-sata-phy"
      "brcm,bcm7445-sata-phy"
      "brcm,iproc-ns2-sata-phy"
diff --git a/Documentation/devicetree/bindings/phy/intel,lgm-emmc-phy.yaml b/Documentation/devicetree/bindings/phy/intel,lgm-emmc-phy.yaml
new file mode 100644
index 000000000000..ff7959c21af0
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/intel,lgm-emmc-phy.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/intel,lgm-emmc-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Lightning Mountain(LGM) eMMC PHY Device Tree Bindings
+
+maintainers:
+  - Ramuthevar Vadivel Murugan <vadivel.muruganx.ramuthevar@linux.intel.com>
+
+description: |+
+  Bindings for eMMC PHY on Intel's Lightning Mountain SoC, syscon
+  node is used to reference the base address of eMMC phy registers.
+
+  The eMMC PHY node should be the child of a syscon node with the
+  required property:
+
+  - compatible:         Should be one of the following:
+                        "intel,lgm-syscon", "syscon"
+  - reg:
+      maxItems: 1
+
+properties:
+  compatible:
+      const: intel,lgm-emmc-phy
+
+  "#phy-cells":
+    const: 0
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - "#phy-cells"
+  - compatible
+  - reg
+  - clocks
+
+examples:
+  - |
+    sysconf: chiptop@e0200000 {
+      compatible = "intel,lgm-syscon", "syscon";
+      reg = <0xe0200000 0x100>;
+
+      emmc-phy: emmc-phy@a8 {
+        compatible = "intel,lgm-emmc-phy";
+        reg = <0x00a8 0x10>;
+        clocks = <&emmc>;
+        #phy-cells = <0>;
+      };
+    };
+...
diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-sierra.txt b/Documentation/devicetree/bindings/phy/phy-cadence-sierra.txt
index 6e1b47bfce43..03f5939d3d19 100644
--- a/Documentation/devicetree/bindings/phy/phy-cadence-sierra.txt
+++ b/Documentation/devicetree/bindings/phy/phy-cadence-sierra.txt
@@ -2,21 +2,24 @@ Cadence Sierra PHY
 -----------------------
 
 Required properties:
-- compatible:	cdns,sierra-phy-t0
-- clocks:	Must contain an entry in clock-names.
-		See ../clocks/clock-bindings.txt for details.
-- clock-names:	Must be "phy_clk"
+- compatible:	Must be "cdns,sierra-phy-t0" for Sierra in Cadence platform
+		Must be "ti,sierra-phy-t0" for Sierra in TI's J721E SoC.
 - resets:	Must contain an entry for each in reset-names.
 		See ../reset/reset.txt for details.
 - reset-names:	Must include "sierra_reset" and "sierra_apb".
 		"sierra_reset" must control the reset line to the PHY.
 		"sierra_apb" must control the reset line to the APB PHY
-		interface.
+		interface ("sierra_apb" is optional).
 - reg:		register range for the PHY.
 - #address-cells: Must be 1
 - #size-cells:	Must be 0
 
 Optional properties:
+- clocks:		Must contain an entry in clock-names.
+			See ../clocks/clock-bindings.txt for details.
+- clock-names:		Must contain "cmn_refclk_dig_div" and
+			"cmn_refclk1_dig_div" for configuring the frequency of
+			the clock to the lanes. "phy_clk" is deprecated.
 - cdns,autoconf:	A boolean property whose presence indicates that the
 			PHY registers will be configured by hardware. If not
 			present, all sub-node optional properties must be
diff --git a/Documentation/devicetree/bindings/phy/phy-mmp3-usb.txt b/Documentation/devicetree/bindings/phy/phy-mmp3-usb.txt
new file mode 100644
index 000000000000..7183b9102f91
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-mmp3-usb.txt
@@ -0,0 +1,13 @@
+Marvell MMP3 USB PHY
+--------------------
+
+Required properties:
+- compatible: must be "marvell,mmp3-usb-phy"
+- #phy-cells: must be 0
+
+Example:
+	usb-phy: usb-phy@d4207000 {
+		compatible = "marvell,mmp3-usb-phy";
+		reg = <0xd4207000 0x40>;
+		#phy-cells = <0>;
+	};
diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt
index 00639baae74a..541f5298827c 100644
--- a/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt
+++ b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt
@@ -2,6 +2,7 @@ ROCKCHIP USB2.0 PHY WITH INNO IP BLOCK
 
 Required properties (phy (parent) node):
  - compatible : should be one of the listed compatibles:
+	* "rockchip,px30-usb2phy"
 	* "rockchip,rk3228-usb2phy"
 	* "rockchip,rk3328-usb2phy"
 	* "rockchip,rk3366-usb2phy"
diff --git a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
index 085fbd676cfc..eac9ad3cbbc8 100644
--- a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
+++ b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
@@ -14,7 +14,8 @@ Required properties:
 	       "qcom,msm8998-qmp-pcie-phy" for PCIe QMP phy on msm8998,
 	       "qcom,sdm845-qmp-usb3-phy" for USB3 QMP V3 phy on sdm845,
 	       "qcom,sdm845-qmp-usb3-uni-phy" for USB3 QMP V3 UNI phy on sdm845,
-	       "qcom,sdm845-qmp-ufs-phy" for UFS QMP phy on sdm845.
+	       "qcom,sdm845-qmp-ufs-phy" for UFS QMP phy on sdm845,
+	       "qcom,sm8150-qmp-ufs-phy" for UFS QMP phy on sm8150.
 
 - reg:
   - index 0: address and length of register set for PHY's common
@@ -57,6 +58,8 @@ Required properties:
 			"aux", "cfg_ahb", "ref", "com_aux".
 		For "qcom,sdm845-qmp-ufs-phy" must contain:
 			"ref", "ref_aux".
+		For "qcom,sm8150-qmp-ufs-phy" must contain:
+			"ref", "ref_aux".
 
  - resets: a list of phandles and reset controller specifier pairs,
 	   one for each entry in reset-names.
@@ -83,6 +86,8 @@ Required properties:
 			"phy", "common".
 		For "qcom,sdm845-qmp-ufs-phy": must contain:
 			"ufsphy".
+		For "qcom,sm8150-qmp-ufs-phy": must contain:
+			"ufsphy".
 
  - vdda-phy-supply: Phandle to a regulator supply to PHY core block.
  - vdda-pll-supply: Phandle to 1.8V regulator supply to PHY refclk pll block.
diff --git a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
index 503a8cfb3184..7734b219d9aa 100644
--- a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
+++ b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
@@ -10,6 +10,8 @@ Required properties:
 	      SoC.
 	      "renesas,usb2-phy-r8a774a1" if the device is a part of an R8A774A1
 	      SoC.
+	      "renesas,usb2-phy-r8a774b1" if the device is a part of an R8A774B1
+	      SoC.
 	      "renesas,usb2-phy-r8a774c0" if the device is a part of an R8A774C0
 	      SoC.
 	      "renesas,usb2-phy-r8a7795" if the device is a part of an R8A7795
diff --git a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb3.txt b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb3.txt
index 9d9826609c2f..0fe433b9a592 100644
--- a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb3.txt
+++ b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb3.txt
@@ -9,6 +9,8 @@ need this driver.
 Required properties:
 - compatible: "renesas,r8a774a1-usb3-phy" if the device is a part of an R8A774A1
 	      SoC.
+	      "renesas,r8a774b1-usb3-phy" if the device is a part of an R8A774B1
+	      SoC.
 	      "renesas,r8a7795-usb3-phy" if the device is a part of an R8A7795
 	      SoC.
 	      "renesas,r8a7796-usb3-phy" if the device is a part of an R8A7796
diff --git a/Documentation/devicetree/bindings/phy/rockchip,px30-dsi-dphy.yaml b/Documentation/devicetree/bindings/phy/rockchip,px30-dsi-dphy.yaml
new file mode 100644
index 000000000000..72aca81e8959
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/rockchip,px30-dsi-dphy.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/rockchip,px30-dsi-dphy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip MIPI DPHY with additional LVDS/TTL modes
+
+maintainers:
+  - Heiko Stuebner <heiko@sntech.de>
+
+properties:
+  "#phy-cells":
+    const: 0
+
+  compatible:
+    enum:
+      - rockchip,px30-dsi-dphy
+      - rockchip,rk3128-dsi-dphy
+      - rockchip,rk3368-dsi-dphy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: PLL reference clock
+      - description: Module clock
+
+  clock-names:
+    items:
+      - const: ref
+      - const: pclk
+
+  power-domains:
+    maxItems: 1
+    description: phandle to the associated power domain
+
+  resets:
+    items:
+      - description: exclusive PHY reset line
+
+  reset-names:
+    items:
+      - const: apb
+
+required:
+  - "#phy-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    dsi_dphy: phy@ff2e0000 {
+        compatible = "rockchip,px30-dsi-dphy";
+        reg = <0x0 0xff2e0000 0x0 0x10000>;
+        clocks = <&pmucru 13>, <&cru 12>;
+        clock-names = "ref", "pclk";
+        resets = <&cru 12>;
+        reset-names = "apb";
+        #phy-cells = <0>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/phy/samsung-phy.txt b/Documentation/devicetree/bindings/phy/samsung-phy.txt
index 1c40ccd40ce4..7510830a79bd 100644
--- a/Documentation/devicetree/bindings/phy/samsung-phy.txt
+++ b/Documentation/devicetree/bindings/phy/samsung-phy.txt
@@ -1,4 +1,4 @@
-Samsung S5P/EXYNOS SoC series MIPI CSIS/DSIM DPHY
+Samsung S5P/Exynos SoC series MIPI CSIS/DSIM DPHY
 -------------------------------------------------
 
 Required properties:
@@ -27,7 +27,7 @@ the PHY specifier identifies the PHY and its meaning is as follows:
 supports additional fifth PHY:
   4 - MIPI CSIS 2.
 
-Samsung EXYNOS SoC series Display Port PHY
+Samsung Exynos SoC series Display Port PHY
 -------------------------------------------------
 
 Required properties:
@@ -38,7 +38,7 @@ Required properties:
 		      control pmu registers for power isolation.
 - #phy-cells : from the generic PHY bindings, must be 0;
 
-Samsung S5P/EXYNOS SoC series USB PHY
+Samsung S5P/Exynos SoC series USB PHY
 -------------------------------------------------
 
 Required properties:
diff --git a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt
deleted file mode 100644
index f2e120af17f0..000000000000
--- a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-Allwinner sun4i USB PHY
------------------------
-
-Required properties:
-- compatible : should be one of
-  * allwinner,sun4i-a10-usb-phy
-  * allwinner,sun5i-a13-usb-phy
-  * allwinner,sun6i-a31-usb-phy
-  * allwinner,sun7i-a20-usb-phy
-  * allwinner,sun8i-a23-usb-phy
-  * allwinner,sun8i-a33-usb-phy
-  * allwinner,sun8i-a83t-usb-phy
-  * allwinner,sun8i-h3-usb-phy
-  * allwinner,sun8i-r40-usb-phy
-  * allwinner,sun8i-v3s-usb-phy
-  * allwinner,sun50i-a64-usb-phy
-  * allwinner,sun50i-h6-usb-phy
-- reg : a list of offset + length pairs
-- reg-names :
-  * "phy_ctrl"
-  * "pmu0" for H3, V3s, A64 or H6
-  * "pmu1"
-  * "pmu2" for sun4i, sun6i, sun7i, sun8i-a83t or sun8i-h3
-  * "pmu3" for sun8i-h3 or sun50i-h6
-- #phy-cells : from the generic phy bindings, must be 1
-- clocks : phandle + clock specifier for the phy clocks
-- clock-names :
-  * "usb_phy" for sun4i, sun5i or sun7i
-  * "usb0_phy", "usb1_phy" and "usb2_phy" for sun6i
-  * "usb0_phy", "usb1_phy" for sun8i
-  * "usb0_phy", "usb1_phy", "usb2_phy" and "usb2_hsic_12M" for sun8i-a83t
-  * "usb0_phy", "usb1_phy", "usb2_phy" and "usb3_phy" for sun8i-h3
-  * "usb0_phy" and "usb3_phy" for sun50i-h6
-- resets : a list of phandle + reset specifier pairs
-- reset-names :
-  * "usb0_reset"
-  * "usb1_reset"
-  * "usb2_reset" for sun4i, sun6i, sun7i, sun8i-a83t or sun8i-h3
-  * "usb3_reset" for sun8i-h3 and sun50i-h6
-
-Optional properties:
-- usb0_id_det-gpios : gpio phandle for reading the otg id pin value
-- usb0_vbus_det-gpios : gpio phandle for detecting the presence of usb0 vbus
-- usb0_vbus_power-supply: power-supply phandle for usb0 vbus presence detect
-- usb0_vbus-supply : regulator phandle for controller usb0 vbus
-- usb1_vbus-supply : regulator phandle for controller usb1 vbus
-- usb2_vbus-supply : regulator phandle for controller usb2 vbus
-- usb3_vbus-supply : regulator phandle for controller usb3 vbus
-
-Example:
-	usbphy: phy@01c13400 {
-		#phy-cells = <1>;
-		compatible = "allwinner,sun4i-a10-usb-phy";
-		/* phy base regs, phy1 pmu reg, phy2 pmu reg */
-		reg = <0x01c13400 0x10 0x01c14800 0x4 0x01c1c800 0x4>;
-		reg-names = "phy_ctrl", "pmu1", "pmu2";
-		clocks = <&usb_clk 8>;
-		clock-names = "usb_phy";
-		resets = <&usb_clk 0>, <&usb_clk 1>, <&usb_clk 2>;
-		reset-names = "usb0_reset", "usb1_reset", "usb2_reset";
-		pinctrl-names = "default";
-		pinctrl-0 = <&usb0_id_detect_pin>, <&usb0_vbus_detect_pin>;
-		usb0_id_det-gpios = <&pio 7 19 GPIO_ACTIVE_HIGH>; /* PH19 */
-		usb0_vbus_det-gpios = <&pio 7 22 GPIO_ACTIVE_HIGH>; /* PH22 */
-		usb0_vbus-supply = <&reg_usb0_vbus>;
-		usb1_vbus-supply = <&reg_usb1_vbus>;
-		usb2_vbus-supply = <&reg_usb2_vbus>;
-	};
diff --git a/Documentation/devicetree/bindings/phy/sun9i-usb-phy.txt b/Documentation/devicetree/bindings/phy/sun9i-usb-phy.txt
deleted file mode 100644
index 64f7109aea1f..000000000000
--- a/Documentation/devicetree/bindings/phy/sun9i-usb-phy.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-Allwinner sun9i USB PHY
------------------------
-
-Required properties:
-- compatible : should be one of
-  * allwinner,sun9i-a80-usb-phy
-- reg : a list of offset + length pairs
-- #phy-cells : from the generic phy bindings, must be 0
-- phy_type : "hsic" for HSIC usage;
-	     other values or absence of this property indicates normal USB
-- clocks : phandle + clock specifier for the phy clocks
-- clock-names : depending on the "phy_type" property,
-  * "phy" for normal USB
-  * "hsic_480M", "hsic_12M" for HSIC
-- resets : a list of phandle + reset specifier pairs
-- reset-names : depending on the "phy_type" property,
-  * "phy" for normal USB
-  * "hsic" for HSIC
-
-Optional Properties:
-- phy-supply : from the generic phy bindings, a phandle to a regulator that
-	       provides power to VBUS.
-
-It is recommended to list all clocks and resets available.
-The driver will only use those matching the phy_type.
-
-Example:
-	usbphy1: phy@a01800 {
-		compatible = "allwinner,sun9i-a80-usb-phy";
-		reg = <0x00a01800 0x4>;
-		clocks = <&usb_phy_clk 2>, <&usb_phy_clk 10>,
-		       <&usb_phy_clk 3>;
-		clock-names = "hsic_480M", "hsic_12M", "phy";
-		resets = <&usb_phy_clk 18>, <&usb_phy_clk 19>;
-		reset-names = "hsic", "phy";
-		#phy-cells = <0>;
-	};
diff --git a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
new file mode 100644
index 000000000000..452cee1aed32
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
@@ -0,0 +1,221 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/phy/ti,phy-j721e-wiz.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: TI J721E WIZ (SERDES Wrapper)
+
+maintainers:
+  - Kishon Vijay Abraham I <kishon@ti.com>
+
+properties:
+  compatible:
+    enum:
+      - ti,j721e-wiz-16g
+      - ti,j721e-wiz-10g
+
+  power-domains:
+    maxItems: 1
+
+  clocks:
+    maxItems: 3
+    description: clock-specifier to represent input to the WIZ
+
+  clock-names:
+    items:
+      - const: fck
+      - const: core_ref_clk
+      - const: ext_ref_clk
+
+  num-lanes:
+    minimum: 1
+    maximum: 4
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 1
+
+  "#reset-cells":
+    const: 1
+
+  ranges: true
+
+  assigned-clocks:
+    maxItems: 2
+
+  assigned-clock-parents:
+    maxItems: 2
+
+  typec-dir-gpios:
+    maxItems: 1
+    description:
+      GPIO to signal Type-C cable orientation for lane swap.
+      If GPIO is active, lane 0 and lane 1 of SERDES will be swapped to
+      achieve the funtionality of an external type-C plug flip mux.
+
+  typec-dir-debounce-ms:
+    minimum: 100
+    maximum: 1000
+    default: 100
+    description:
+      Number of milliseconds to wait before sampling typec-dir-gpio.
+      If not specified, the default debounce of 100ms will be used.
+      Type-C spec states minimum CC pin debounce of 100 ms and maximum
+      of 200 ms. However, some solutions might need more than 200 ms.
+
+patternProperties:
+  "^pll[0|1]-refclk$":
+    type: object
+    description: |
+      WIZ node should have subnodes for each of the PLLs present in
+      the SERDES.
+    properties:
+      clocks:
+        maxItems: 2
+        description: Phandle to clock nodes representing the two inputs to PLL.
+
+      "#clock-cells":
+        const: 0
+
+      assigned-clocks:
+        maxItems: 1
+
+      assigned-clock-parents:
+        maxItems: 1
+
+    required:
+      - clocks
+      - "#clock-cells"
+      - assigned-clocks
+      - assigned-clock-parents
+
+  "^cmn-refclk1?-dig-div$":
+    type: object
+    description:
+      WIZ node should have subnodes for each of the PMA common refclock
+      provided by the SERDES.
+    properties:
+      clocks:
+        maxItems: 1
+        description: Phandle to the clock node representing the input to the
+          divider clock.
+
+      "#clock-cells":
+        const: 0
+
+    required:
+      - clocks
+      - "#clock-cells"
+
+  "^refclk-dig$":
+    type: object
+    description: |
+      WIZ node should have subnode for refclk_dig to select the reference
+      clock source for the reference clock used in the PHY and PMA digital
+      logic.
+    properties:
+      clocks:
+        maxItems: 4
+        description: Phandle to four clock nodes representing the inputs to
+          refclk_dig
+
+      "#clock-cells":
+        const: 0
+
+      assigned-clocks:
+        maxItems: 1
+
+      assigned-clock-parents:
+        maxItems: 1
+
+    required:
+      - clocks
+      - "#clock-cells"
+      - assigned-clocks
+      - assigned-clock-parents
+
+  "^serdes@[0-9a-f]+$":
+    type: object
+    description: |
+      WIZ node should have '1' subnode for the SERDES. It could be either
+      Sierra SERDES or Torrent SERDES. Sierra SERDES should follow the
+      bindings specified in
+      Documentation/devicetree/bindings/phy/phy-cadence-sierra.txt
+      Torrent SERDES should follow the bindings specified in
+      Documentation/devicetree/bindings/phy/phy-cadence-dp.txt
+
+required:
+  - compatible
+  - power-domains
+  - clocks
+  - clock-names
+  - num-lanes
+  - "#address-cells"
+  - "#size-cells"
+  - "#reset-cells"
+  - ranges
+
+examples:
+  - |
+    #include <dt-bindings/soc/ti,sci_pm_domain.h>
+
+    wiz@5000000 {
+           compatible = "ti,j721e-wiz-16g";
+           #address-cells = <1>;
+           #size-cells = <1>;
+           power-domains = <&k3_pds 292 TI_SCI_PD_EXCLUSIVE>;
+           clocks = <&k3_clks 292 5>, <&k3_clks 292 11>, <&dummy_cmn_refclk>;
+           clock-names = "fck", "core_ref_clk", "ext_ref_clk";
+           assigned-clocks = <&k3_clks 292 11>, <&k3_clks 292 0>;
+           assigned-clock-parents = <&k3_clks 292 15>, <&k3_clks 292 4>;
+           num-lanes = <2>;
+           #reset-cells = <1>;
+           ranges = <0x5000000 0x5000000 0x10000>;
+
+           pll0-refclk {
+                  clocks = <&k3_clks 293 13>, <&dummy_cmn_refclk>;
+                  #clock-cells = <0>;
+                  assigned-clocks = <&wiz1_pll0_refclk>;
+                  assigned-clock-parents = <&k3_clks 293 13>;
+           };
+
+           pll1-refclk {
+                  clocks = <&k3_clks 293 0>, <&dummy_cmn_refclk1>;
+                  #clock-cells = <0>;
+                  assigned-clocks = <&wiz1_pll1_refclk>;
+                  assigned-clock-parents = <&k3_clks 293 0>;
+           };
+
+           cmn-refclk-dig-div {
+                  clocks = <&wiz1_refclk_dig>;
+                  #clock-cells = <0>;
+           };
+
+           cmn-refclk1-dig-div {
+                  clocks = <&wiz1_pll1_refclk>;
+                  #clock-cells = <0>;
+           };
+
+           refclk-dig {
+                  clocks = <&k3_clks 292 11>, <&k3_clks 292 0>, <&dummy_cmn_refclk>, <&dummy_cmn_refclk1>;
+                  #clock-cells = <0>;
+                  assigned-clocks = <&wiz0_refclk_dig>;
+                  assigned-clock-parents = <&k3_clks 292 11>;
+           };
+
+           serdes@5000000 {
+                  compatible = "cdns,ti,sierra-phy-t0";
+                  reg-names = "serdes";
+                  reg = <0x5000000 0x10000>;
+                  #address-cells = <1>;
+                  #size-cells = <0>;
+                  resets = <&serdes_wiz0 0>;
+                  reset-names = "sierra_reset";
+                  clocks = <&wiz0_cmn_refclk_dig_div>, <&wiz0_cmn_refclk1_dig_div>;
+                  clock-names = "cmn_refclk_dig_div", "cmn_refclk1_dig_div";
+           };
+    };
diff --git a/Documentation/devicetree/bindings/pinctrl/allwinner,sun4i-a10-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/allwinner,sun4i-a10-pinctrl.yaml
new file mode 100644
index 000000000000..bfefd09d8c1e
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/allwinner,sun4i-a10-pinctrl.yaml
@@ -0,0 +1,243 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/allwinner,sun4i-a10-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Pin Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#gpio-cells":
+    const: 3
+    description:
+      GPIO consumers must use three arguments, first the number of the
+      bank, then the pin number inside that bank, and finally the GPIO
+      flags.
+
+  "#interrupt-cells":
+    const: 3
+    description:
+      Interrupts consumers must use three arguments, first the number
+      of the bank, then the pin number inside that bank, and finally
+      the interrupts flags.
+
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-pinctrl
+      - allwinner,sun5i-a10s-pinctrl
+      - allwinner,sun5i-a13-pinctrl
+      - allwinner,sun6i-a31-pinctrl
+      - allwinner,sun6i-a31-r-pinctrl
+      - allwinner,sun6i-a31s-pinctrl
+      - allwinner,sun7i-a20-pinctrl
+      - allwinner,sun8i-a23-pinctrl
+      - allwinner,sun8i-a23-r-pinctrl
+      - allwinner,sun8i-a33-pinctrl
+      - allwinner,sun8i-a83t-pinctrl
+      - allwinner,sun8i-a83t-r-pinctrl
+      - allwinner,sun8i-h3-pinctrl
+      - allwinner,sun8i-h3-r-pinctrl
+      - allwinner,sun8i-r40-pinctrl
+      - allwinner,sun8i-v3-pinctrl
+      - allwinner,sun8i-v3s-pinctrl
+      - allwinner,sun9i-a80-pinctrl
+      - allwinner,sun9i-a80-r-pinctrl
+      - allwinner,sun50i-a64-pinctrl
+      - allwinner,sun50i-a64-r-pinctrl
+      - allwinner,sun50i-h5-pinctrl
+      - allwinner,sun50i-h6-pinctrl
+      - allwinner,sun50i-h6-r-pinctrl
+      - allwinner,suniv-f1c100s-pinctrl
+      - nextthing,gr8-pinctrl
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 5
+    description:
+      One interrupt per external interrupt bank supported on the
+      controller, sorted by bank number ascending order.
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: High Frequency Oscillator
+      - description: Low Frequency Oscillator
+
+  clock-names:
+    items:
+      - const: apb
+      - const: hosc
+      - const: losc
+
+  resets:
+    maxItems: 1
+
+  gpio-controller: true
+  interrupt-controller: true
+  gpio-line-names: true
+
+  input-debounce:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+      - minItems: 1
+        maxItems: 5
+    description:
+      Debouncing periods in microseconds, one period per interrupt
+      bank found in the controller
+
+patternProperties:
+  # It's pretty scary, but the basic idea is that:
+  #   - One node name can start with either s- or r- for PRCM nodes,
+  #   - Then, the name itself can be any repetition of <string>- (to
+  #     accomodate with nodes like uart4-rts-cts-pins), where each
+  #     string can be either starting with 'p' but in a string longer
+  #     than 3, or something that doesn't start with 'p',
+  #   - Then, the bank name is optional and will be between pa and pg,
+  #     pl or pm. Some pins groups that have several options will have
+  #     the pin numbers then,
+  #   - Finally, the name will end with either -pin or pins.
+
+  "^([rs]-)?(([a-z0-9]{3,}|[a-oq-z][a-z0-9]*?)?-)+?(p[a-ilm][0-9]*?-)??pins?$":
+    type: object
+
+    properties:
+      pins: true
+      function: true
+      bias-disable: true
+      bias-pull-up: true
+      bias-pull-down: true
+
+      drive-strength:
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+          - enum: [ 10, 20, 30, 40 ]
+
+    required:
+      - pins
+      - function
+
+    additionalProperties: false
+
+  "^vcc-p[a-hlm]-supply$":
+    description:
+      Power supplies for pin banks.
+
+required:
+  - "#gpio-cells"
+  - "#interrupt-cells"
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - gpio-controller
+  - interrupt-controller
+
+allOf:
+  # FIXME: We should have the pin bank supplies here, but not a lot of
+  # boards are defining it at the moment so it would generate a lot of
+  # warnings.
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - allwinner,sun9i-a80-pinctrl
+
+    then:
+      properties:
+        interrupts:
+          minItems: 5
+          maxItems: 5
+
+    else:
+      if:
+        properties:
+          compatible:
+            enum:
+              - allwinner,sun6i-a31-pinctrl
+              - allwinner,sun6i-a31s-pinctrl
+              - allwinner,sun50i-h6-pinctrl
+
+      then:
+        properties:
+          interrupts:
+            minItems: 4
+            maxItems: 4
+
+      else:
+        if:
+          properties:
+            compatible:
+              enum:
+                - allwinner,sun8i-a23-pinctrl
+                - allwinner,sun8i-a83t-pinctrl
+                - allwinner,sun50i-a64-pinctrl
+                - allwinner,sun50i-h5-pinctrl
+                - allwinner,suniv-f1c100s-pinctrl
+
+        then:
+          properties:
+            interrupts:
+              minItems: 3
+              maxItems: 3
+
+        else:
+          if:
+            properties:
+              compatible:
+                enum:
+                  - allwinner,sun6i-a31-r-pinctrl
+                  - allwinner,sun8i-a33-pinctrl
+                  - allwinner,sun8i-h3-pinctrl
+                  - allwinner,sun8i-v3-pinctrl
+                  - allwinner,sun8i-v3s-pinctrl
+                  - allwinner,sun9i-a80-r-pinctrl
+                  - allwinner,sun50i-h6-r-pinctrl
+
+          then:
+            properties:
+              interrupts:
+                minItems: 2
+                maxItems: 2
+
+          else:
+            properties:
+              interrupts:
+                minItems: 1
+                maxItems: 1
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/sun5i-ccu.h>
+
+    pio: pinctrl@1c20800 {
+        compatible = "allwinner,sun5i-a13-pinctrl";
+        reg = <0x01c20800 0x400>;
+        interrupts = <28>;
+        clocks = <&ccu CLK_APB0_PIO>, <&osc24M>, <&osc32k>;
+        clock-names = "apb", "hosc", "losc";
+        gpio-controller;
+        interrupt-controller;
+        #interrupt-cells = <3>;
+        #gpio-cells = <3>;
+
+        uart1_pe_pins: uart1-pe-pins {
+            pins = "PE10", "PE11";
+            function = "uart1";
+        };
+
+        uart1_pg_pins: uart1-pg-pins {
+            pins = "PG3", "PG4";
+            function = "uart1";
+        };
+    };
diff --git a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
deleted file mode 100644
index 328585c6da58..000000000000
--- a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
+++ /dev/null
@@ -1,164 +0,0 @@
-* Allwinner A1X Pin Controller
-
-The pins controlled by sunXi pin controller are organized in banks,
-each bank has 32 pins.  Each pin has 7 multiplexing functions, with
-the first two functions being GPIO in and out. The configuration on
-the pins includes drive strength and pull-up.
-
-Required properties:
-- compatible: Should be one of the following (depending on your SoC):
-  "allwinner,sun4i-a10-pinctrl"
-  "allwinner,sun5i-a10s-pinctrl"
-  "allwinner,sun5i-a13-pinctrl"
-  "allwinner,sun6i-a31-pinctrl"
-  "allwinner,sun6i-a31s-pinctrl"
-  "allwinner,sun6i-a31-r-pinctrl"
-  "allwinner,sun7i-a20-pinctrl"
-  "allwinner,sun8i-a23-pinctrl"
-  "allwinner,sun8i-a23-r-pinctrl"
-  "allwinner,sun8i-a33-pinctrl"
-  "allwinner,sun9i-a80-pinctrl"
-  "allwinner,sun9i-a80-r-pinctrl"
-  "allwinner,sun8i-a83t-pinctrl"
-  "allwinner,sun8i-a83t-r-pinctrl"
-  "allwinner,sun8i-h3-pinctrl"
-  "allwinner,sun8i-h3-r-pinctrl"
-  "allwinner,sun8i-r40-pinctrl"
-  "allwinner,sun8i-v3-pinctrl"
-  "allwinner,sun8i-v3s-pinctrl"
-  "allwinner,sun50i-a64-pinctrl"
-  "allwinner,sun50i-a64-r-pinctrl"
-  "allwinner,sun50i-h5-pinctrl"
-  "allwinner,sun50i-h6-pinctrl"
-  "allwinner,sun50i-h6-r-pinctrl"
-  "allwinner,suniv-f1c100s-pinctrl"
-  "nextthing,gr8-pinctrl"
-
-- reg: Should contain the register physical address and length for the
-  pin controller.
-
-- clocks: phandle to the clocks feeding the pin controller:
-  - "apb": the gated APB parent clock
-  - "hosc": the high frequency oscillator in the system
-  - "losc": the low frequency oscillator in the system
-
-Note: For backward compatibility reasons, the hosc and losc clocks are only
-required if you need to use the optional input-debounce property. Any new
-device tree should set them.
-
-Each pin bank, depending on the SoC, can have an associated regulator:
-
-- vcc-pa-supply: for the A10, A20, A31, A31s, A80 and R40 SoCs
-- vcc-pb-supply: for the A31, A31s, A80 and V3s SoCs
-- vcc-pc-supply: for the A10, A20, A31, A31s, A64, A80, H5, R40 and V3s SoCs
-- vcc-pd-supply: for the A23, A31, A31s, A64, A80, A83t, H3, H5 and R40 SoCs
-- vcc-pe-supply: for the A10, A20, A31, A31s, A64, A80, R40 and V3s SoCs
-- vcc-pf-supply: for the A10, A20, A31, A31s, A80, R40 and V3s SoCs
-- vcc-pg-supply: for the A10, A20, A31, A31s, A64, A80, H3, H5, R40 and V3s SoCs
-- vcc-ph-supply: for the A31, A31s and A80 SoCs
-- vcc-pl-supply: for the r-pinctrl of the A64, A80 and A83t SoCs
-- vcc-pm-supply: for the r-pinctrl of the A31, A31s and A80 SoCs
-
-Optional properties:
-  - input-debounce: Array of debouncing periods in microseconds. One period per
-    irq bank found in the controller. 0 if no setup required.
-
-
-Please refer to pinctrl-bindings.txt in this directory for details of the
-common pinctrl bindings used by client devices.
-
-A pinctrl node should contain at least one subnodes representing the
-pinctrl groups available on the machine. Each subnode will list the
-pins it needs, and how they should be configured, with regard to muxer
-configuration, drive strength and pullups. If one of these options is
-not set, its actual value will be unspecified.
-
-Allwinner A1X Pin Controller supports the generic pin multiplexing and
-configuration bindings. For details on each properties, you can refer to
- ./pinctrl-bindings.txt.
-
-Required sub-node properties:
-  - pins
-  - function
-
-Optional sub-node properties:
-  - bias-disable
-  - bias-pull-up
-  - bias-pull-down
-  - drive-strength
-
-*** Deprecated pin configuration and multiplexing binding
-
-Required subnode-properties:
-
-- allwinner,pins: List of strings containing the pin name.
-- allwinner,function: Function to mux the pins listed above to.
-
-Optional subnode-properties:
-- allwinner,drive: Integer. Represents the current sent to the pin
-    0: 10 mA
-    1: 20 mA
-    2: 30 mA
-    3: 40 mA
-- allwinner,pull: Integer.
-    0: No resistor
-    1: Pull-up resistor
-    2: Pull-down resistor
-
-Examples:
-
-pio: pinctrl@1c20800 {
-	compatible = "allwinner,sun5i-a13-pinctrl";
-	reg = <0x01c20800 0x400>;
-	#address-cells = <1>;
-	#size-cells = <0>;
-
-	uart1_pins_a: uart1@0 {
-		allwinner,pins = "PE10", "PE11";
-		allwinner,function = "uart1";
-		allwinner,drive = <0>;
-		allwinner,pull = <0>;
-	};
-
-	uart1_pins_b: uart1@1 {
-		allwinner,pins = "PG3", "PG4";
-		allwinner,function = "uart1";
-		allwinner,drive = <0>;
-		allwinner,pull = <0>;
-	};
-};
-
-
-GPIO and interrupt controller
------------------------------
-
-This hardware also acts as a GPIO controller and an interrupt
-controller.
-
-Consumers that would want to refer to one or the other (or both)
-should provide through the usual *-gpios and interrupts properties a
-cell with 3 arguments, first the number of the bank, then the pin
-inside that bank, and finally the flags for the GPIO/interrupts.
-
-Example:
-
-xio: gpio@38 {
-	compatible = "nxp,pcf8574a";
-	reg = <0x38>;
-
-	gpio-controller;
-	#gpio-cells = <2>;
-
-	interrupt-parent = <&pio>;
-	interrupts = <6 0 IRQ_TYPE_EDGE_FALLING>;
-	interrupt-controller;
-	#interrupt-cells = <2>;
-};
-
-reg_usb1_vbus: usb1-vbus {
-	compatible = "regulator-fixed";
-	regulator-name = "usb1-vbus";
-	regulator-min-microvolt = <5000000>;
-	regulator-max-microvolt = <5000000>;
-	gpio = <&pio 7 6 GPIO_ACTIVE_HIGH>;
-};
diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
index 064b7dfc4252..3749fa233e87 100644
--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
@@ -54,8 +54,9 @@ patternProperties:
               TACH10, TACH11, TACH12, TACH13, TACH14, TACH15, TACH2, TACH3,
               TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2,
               THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12, UART13,
-              UART6, UART7, UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2,
-              WDTRST3, WDTRST4, ]
+              UART6, UART7, UART8, UART9, USBAD, USBADP, USB2AH, USB2AHP,
+              USB2BD, USB2BH, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3,
+              WDTRST4, ]
         groups:
           allOf:
             - $ref: "/schemas/types.yaml#/definitions/string"
@@ -85,8 +86,8 @@ patternProperties:
               TACH10, TACH11, TACH12, TACH13, TACH14, TACH15, TACH2, TACH3,
               TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2,
               THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12G0,
-              UART12G1, UART13G0, UART13G1, UART6, UART7, UART8, UART9, VB,
-              VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3, WDTRST4, ]
+              UART12G1, UART13G0, UART13G1, UART6, UART7, UART8, UART9, USBA,
+              USBB, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3, WDTRST4, ]
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mp-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mp-pinctrl.yaml
new file mode 100644
index 000000000000..2e31e120395e
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mp-pinctrl.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/fsl,imx8mp-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale IMX8MP IOMUX Controller
+
+maintainers:
+  - Anson Huang <Anson.Huang@nxp.com>
+
+description:
+  Please refer to fsl,imx-pinctrl.txt and pinctrl-bindings.txt in this directory
+  for common binding part and usage.
+
+properties:
+  compatible:
+    const: fsl,imx8mp-iomuxc
+
+  reg:
+    maxItems: 1
+
+# Client device subnode's properties
+patternProperties:
+  'grp$':
+    type: object
+    description:
+      Pinctrl node's client devices use subnodes for desired pin configuration.
+      Client device subnodes use below standard properties.
+
+    properties:
+      fsl,pins:
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32-array
+        description:
+          each entry consists of 6 integers and represents the mux and config
+          setting for one pin. The first 5 integers <mux_reg conf_reg input_reg
+          mux_val input_val> are specified using a PIN_FUNC_ID macro, which can
+          be found in <arch/arm64/boot/dts/freescale/imx8mp-pinfunc.h>. The last
+          integer CONFIG is the pad setting value like pull-up on this pin. Please
+          refer to i.MX8M Plus Reference Manual for detailed CONFIG settings.
+
+    required:
+      - fsl,pins
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  # Pinmux controller node
+  - |
+    iomuxc: pinctrl@30330000 {
+        compatible = "fsl,imx8mp-iomuxc";
+        reg = <0x30330000 0x10000>;
+
+        pinctrl_uart2: uart2grp {
+            fsl,pins = <
+                0x228 0x488 0x5F0 0x0 0x6	0x49
+                0x228 0x488 0x000 0x0 0x0	0x49
+            >;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.txt
index 0014d9899797..d9b2100c98e8 100644
--- a/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.txt
@@ -10,9 +10,9 @@ GPIO port configuration registers and it is typical to refer to pins using the
 naming scheme "PxN" where x is a character identifying the GPIO port with
 which the pin is associated and N is an integer from 0 to 31 identifying the
 pin within that GPIO port. For example PA0 is the first pin in GPIO port A, and
-PB31 is the last pin in GPIO port B. The jz4740 and the x1000 contains 4 GPIO
-ports, PA to PD, for a total of 128 pins. The jz4760, the jz4770 and the jz4780
-contains 6 GPIO ports, PA to PF, for a total of 192 pins.
+PB31 is the last pin in GPIO port B. The jz4740, the x1000 and the x1830
+contains 4 GPIO ports, PA to PD, for a total of 128 pins. The jz4760, the
+jz4770 and the jz4780 contains 6 GPIO ports, PA to PF, for a total of 192 pins.
 
 
 Required properties:
@@ -28,6 +28,7 @@ Required properties:
     - "ingenic,x1000-pinctrl"
     - "ingenic,x1000e-pinctrl"
     - "ingenic,x1500-pinctrl"
+    - "ingenic,x1830-pinctrl"
  - reg: Address range of the pinctrl registers.
 
 
@@ -40,6 +41,7 @@ Required properties for sub-nodes (GPIO chips):
     - "ingenic,jz4770-gpio"
     - "ingenic,jz4780-gpio"
     - "ingenic,x1000-gpio"
+    - "ingenic,x1830-gpio"
  - reg: The GPIO bank number.
  - interrupt-controller: Marks the device node as an interrupt controller.
  - interrupts: Interrupt specifier for the controllers interrupt.
diff --git a/Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml b/Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml
new file mode 100644
index 000000000000..cd2b436350ef
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/intel,lgm-io.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Lightning Mountain SoC pinmux & GPIO controller binding
+
+maintainers:
+  - Rahul Tanwar <rahul.tanwar@linux.intel.com>
+
+description: |
+  Pinmux & GPIO controller controls pin multiplexing & configuration including
+  GPIO function selection & GPIO attributes configuration.
+
+properties:
+  compatible:
+    const: intel,lgm-io
+
+  reg:
+    maxItems: 1
+
+# Client device subnode's properties
+patternProperties:
+  '-pins$':
+    type: object
+    allOf:
+      - $ref: pincfg-node.yaml#
+      - $ref: pinmux-node.yaml#
+    description:
+      Pinctrl node's client devices use subnodes for desired pin configuration.
+      Client device subnodes use below standard properties.
+
+    properties:
+      function: true
+      groups: true
+      pins: true
+      pinmux: true
+      bias-pull-up: true
+      bias-pull-down: true
+      drive-strength: true
+      slew-rate: true
+      drive-open-drain: true
+      output-enable: true
+
+    required:
+      - function
+      - groups
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  # Pinmux controller node
+  - |
+    pinctrl: pinctrl@e2880000 {
+        compatible = "intel,lgm-io";
+        reg = <0xe2880000 0x100000>;
+
+        uart0-pins {
+             pins = <64>, /* UART_RX0 */
+                    <65>; /* UART_TX0 */
+             function = "CONSOLE_UART0";
+             pinmux = <1>,
+                      <1>;
+             groups = "CONSOLE_UART0";
+          };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
index 10dc4f7176ca..0aff1f28495c 100644
--- a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
@@ -15,6 +15,7 @@ Required properties for the root node:
 		      "amlogic,meson-axg-aobus-pinctrl"
 		      "amlogic,meson-g12a-periphs-pinctrl"
 		      "amlogic,meson-g12a-aobus-pinctrl"
+		      "amlogic,meson-a1-periphs-pinctrl"
  - reg: address and size of registers controlling irq functionality
 
 === GPIO sub-nodes ===
diff --git a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
new file mode 100644
index 000000000000..13b7ab9dd6d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
@@ -0,0 +1,140 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/pincfg-node.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic pin configuration node schema
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description:
+  Many data items that are represented in a pin configuration node are common
+  and generic. Pin control bindings should use the properties defined below
+  where they are applicable; not all of these properties are relevant or useful
+  for all hardware or binding structures. Each individual binding document
+  should state which of these generic properties, if any, are used, and the
+  structure of the DT nodes that contain these properties.
+
+properties:
+  bias-disable:
+    type: boolean
+    description: disable any pin bias
+
+  bias-high-impedance:
+    type: boolean
+    description: high impedance mode ("third-state", "floating")
+
+  bias-bus-hold:
+    type: boolean
+    description: latch weakly
+
+  bias-pull-up:
+    oneOf:
+      - type: boolean
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    description: pull up the pin. Takes as optional argument on hardware
+      supporting it the pull strength in Ohm.
+
+  bias-pull-down:
+    oneOf:
+      - type: boolean
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    description: pull down the pin. Takes as optional argument on hardware
+      supporting it the pull strength in Ohm.
+
+  bias-pull-pin-default:
+    oneOf:
+      - type: boolean
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    description: use pin-default pull state. Takes as optional argument on
+      hardware supporting it the pull strength in Ohm.
+
+  drive-push-pull:
+    type: boolean
+    description: drive actively high and low
+
+  drive-open-drain:
+    type: boolean
+    description: drive with open drain
+
+  drive-open-source:
+    type: boolean
+    description: drive with open source
+
+  drive-strength:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: sink or source at most X mA
+
+  drive-strength-microamp:
+    description: sink or source at most X uA
+
+  input-enable:
+    type: boolean
+    description: enable input on pin (no effect on output, such as
+      enabling an input buffer)
+
+  input-disable:
+    type: boolean
+    description: disable input on pin (no effect on output, such as
+      disabling an input buffer)
+
+  input-schmitt-enable:
+    type: boolean
+    description: enable schmitt-trigger mode
+
+  input-schmitt-disable:
+    type: boolean
+    description: disable schmitt-trigger mode
+
+  input-debounce:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Takes the debounce time in usec as argument or 0 to disable
+      debouncing
+
+  power-source:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: select between different power supplies
+
+  low-power-enable:
+    type: boolean
+    description: enable low power mode
+
+  low-power-disable:
+    type: boolean
+    description: disable low power mode
+
+  output-disable:
+    type: boolean
+    description: disable output on a pin (such as disable an output buffer)
+
+  output-enable:
+    type: boolean
+    description: enable output on a pin without actively driving it
+      (such as enabling an output buffer)
+
+  output-low:
+    type: boolean
+    description: set the pin to output mode with low level
+
+  output-high:
+    type: boolean
+    description: set the pin to output mode with high level
+
+  sleep-hardware-state:
+    type: boolean
+    description: indicate this is sleep related state which will be
+      programmed into the registers for the sleep state.
+
+  slew-rate:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: set the slew rate
+
+  skew-delay:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      this affects the expected clock skew on input pins
+      and the delay before latching a value to an output
+      pin. Typically indicates how many double-inverters are
+      used to delay the signal.
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
index fcd37e93ed4d..4613bb17ace3 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
@@ -141,196 +141,8 @@ controller device.
 
 == Generic pin multiplexing node content ==
 
-pin multiplexing nodes:
-
-function		- the mux function to select
-groups			- the list of groups to select with this function
-			  (either this or "pins" must be specified)
-pins			- the list of pins to select with this function (either
-			  this or "groups" must be specified)
-
-Example:
-
-state_0_node_a {
-	uart0 {
-		function = "uart0";
-		groups = "u0rxtx", "u0rtscts";
-	};
-};
-state_1_node_a {
-	spi0 {
-		function = "spi0";
-		groups = "spi0pins";
-	};
-};
-state_2_node_a {
-	function = "i2c0";
-	pins = "mfio29", "mfio30";
-};
-
-Optionally an alternative binding can be used if more suitable depending on the
-pin controller hardware. For hardware where there is a large number of identical
-pin controller instances, naming each pin and function can easily become
-unmaintainable. This is especially the case if the same controller is used for
-different pins and functions depending on the SoC revision and packaging.
-
-For cases like this, the pin controller driver may use pinctrl-pin-array helper
-binding with a hardware based index and a number of pin configuration values:
-
-pincontroller {
-	... /* Standard DT properties for the device itself elided */
-	#pinctrl-cells = <2>;
-
-	state_0_node_a {
-		pinctrl-pin-array = <
-			0 A_DELAY_PS(0) G_DELAY_PS(120)
-			4 A_DELAY_PS(0) G_DELAY_PS(360)
-			...
-		>;
-	};
-	...
-};
-
-Above #pinctrl-cells specifies the number of value cells in addition to the
-index of the registers. This is similar to the interrupts-extended binding with
-one exception. There is no need to specify the phandle for each entry as that
-is already known as the defined pins are always children of the pin controller
-node. Further having the phandle pointing to another pin controller would not
-currently work as the pinctrl framework uses named modes to group pins for each
-pin control device.
-
-The index for pinctrl-pin-array must relate to the hardware for the pinctrl
-registers, and must not be a virtual index of pin instances. The reason for
-this is to avoid mapping of the index in the dts files and the pin controller
-driver as it can change.
-
-For hardware where pin multiplexing configurations have to be specified for
-each single pin the number of required sub-nodes containing "pin" and
-"function" properties can quickly escalate and become hard to write and
-maintain.
-
-For cases like this, the pin controller driver may use the pinmux helper
-property, where the pin identifier is provided with mux configuration settings
-in a pinmux group. A pinmux group consists of the pin identifier and mux
-settings represented as a single integer or an array of integers.
-
-The pinmux property accepts an array of pinmux groups, each of them describing
-a single pin multiplexing configuration.
-
-pincontroller {
-	state_0_node_a {
-		pinmux = <PINMUX_GROUP>, <PINMUX_GROUP>, ...;
-	};
-};
-
-Each individual pin controller driver bindings documentation shall specify
-how pin IDs and pin multiplexing configuration are defined and assembled
-together in a pinmux group.
+See pinmux-node.yaml
 
 == Generic pin configuration node content ==
 
-Many data items that are represented in a pin configuration node are common
-and generic. Pin control bindings should use the properties defined below
-where they are applicable; not all of these properties are relevant or useful
-for all hardware or binding structures. Each individual binding document
-should state which of these generic properties, if any, are used, and the
-structure of the DT nodes that contain these properties.
-
-Supported generic properties are:
-
-pins			- the list of pins that properties in the node
-			  apply to (either this, "group" or "pinmux" has to be
-			  specified)
-group			- the group to apply the properties to, if the driver
-			  supports configuration of whole groups rather than
-			  individual pins (either this, "pins" or "pinmux" has
-			  to be specified)
-pinmux			- the list of numeric pin ids and their mux settings
-			  that properties in the node apply to (either this,
-			  "pins" or "groups" have to be specified)
-bias-disable		- disable any pin bias
-bias-high-impedance	- high impedance mode ("third-state", "floating")
-bias-bus-hold		- latch weakly
-bias-pull-up		- pull up the pin
-bias-pull-down		- pull down the pin
-bias-pull-pin-default	- use pin-default pull state
-drive-push-pull		- drive actively high and low
-drive-open-drain	- drive with open drain
-drive-open-source	- drive with open source
-drive-strength		- sink or source at most X mA
-drive-strength-microamp	- sink or source at most X uA
-input-enable		- enable input on pin (no effect on output, such as
-			  enabling an input buffer)
-input-disable		- disable input on pin (no effect on output, such as
-			  disabling an input buffer)
-input-schmitt-enable	- enable schmitt-trigger mode
-input-schmitt-disable	- disable schmitt-trigger mode
-input-debounce		- debounce mode with debound time X
-power-source		- select between different power supplies
-low-power-enable	- enable low power mode
-low-power-disable	- disable low power mode
-output-disable		- disable output on a pin (such as disable an output
-			  buffer)
-output-enable		- enable output on a pin without actively driving it
-			  (such as enabling an output buffer)
-output-low		- set the pin to output mode with low level
-output-high		- set the pin to output mode with high level
-sleep-hardware-state	- indicate this is sleep related state which will be programmed
-			  into the registers for the sleep state.
-slew-rate		- set the slew rate
-skew-delay		- this affects the expected clock skew on input pins
-			  and the delay before latching a value to an output
-			  pin. Typically indicates how many double-inverters are
-			  used to delay the signal.
-
-For example:
-
-state_0_node_a {
-	cts_rxd {
-		pins = "GPIO0_AJ5", "GPIO2_AH4"; /* CTS+RXD */
-		bias-pull-up;
-	};
-};
-state_1_node_a {
-	rts_txd {
-		pins = "GPIO1_AJ3", "GPIO3_AH3"; /* RTS+TXD */
-		output-high;
-	};
-};
-state_2_node_a {
-	foo {
-		group = "foo-group";
-		bias-pull-up;
-	};
-};
-state_3_node_a {
-	mux {
-		pinmux = <GPIOx_PINm_MUXn>, <GPIOx_PINj_MUXk)>;
-		input-enable;
-	};
-};
-
-Some of the generic properties take arguments. For those that do, the
-arguments are described below.
-
-- pins takes a list of pin names or IDs as a required argument. The specific
-  binding for the hardware defines:
-  - Whether the entries are integers or strings, and their meaning.
-
-- pinmux takes a list of pin IDs and mux settings as required argument. The
-  specific bindings for the hardware defines:
-  - How pin IDs and mux settings are defined and assembled together in a single
-    integer or an array of integers.
-
-- bias-pull-up, -down and -pin-default take as optional argument on hardware
-  supporting it the pull strength in Ohm. bias-disable will disable the pull.
-
-- drive-strength takes as argument the target strength in mA.
-
-- drive-strength-microamp takes as argument the target strength in uA.
-
-- input-debounce takes the debounce time in usec as argument
-  or 0 to disable debouncing
-
-More in-depth documentation on these parameters can be found in
-<include/linux/pinctrl/pinconf-generic.h>
+See pincfg-node.yaml
diff --git a/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml b/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml
new file mode 100644
index 000000000000..732d9075560b
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/pinmux-node.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic pin multiplexing node schema
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+  The contents of the pin configuration child nodes are defined by the binding
+  for the individual pin controller device. The pin configuration nodes need not
+  be direct children of the pin controller device; they may be grandchildren,
+  for example. Whether this is legal, and whether there is any interaction
+  between the child and intermediate parent nodes, is again defined entirely by
+  the binding for the individual pin controller device.
+
+  While not required to be used, there are 3 generic forms of pin muxing nodes
+  which pin controller devices can use.
+
+  pin multiplexing nodes:
+
+  Example:
+
+  state_0_node_a {
+    uart0 {
+      function = "uart0";
+      groups = "u0rxtx", "u0rtscts";
+    };
+  };
+  state_1_node_a {
+    spi0 {
+      function = "spi0";
+      groups = "spi0pins";
+    };
+  };
+  state_2_node_a {
+    function = "i2c0";
+    pins = "mfio29", "mfio30";
+  };
+
+  Optionally an alternative binding can be used if more suitable depending on the
+  pin controller hardware. For hardware where there is a large number of identical
+  pin controller instances, naming each pin and function can easily become
+  unmaintainable. This is especially the case if the same controller is used for
+  different pins and functions depending on the SoC revision and packaging.
+
+  For cases like this, the pin controller driver may use pinctrl-pin-array helper
+  binding with a hardware based index and a number of pin configuration values:
+
+  pincontroller {
+    ... /* Standard DT properties for the device itself elided */
+    #pinctrl-cells = <2>;
+
+    state_0_node_a {
+      pinctrl-pin-array = <
+        0 A_DELAY_PS(0) G_DELAY_PS(120)
+        4 A_DELAY_PS(0) G_DELAY_PS(360)
+        ...
+        >;
+    };
+    ...
+  };
+
+  Above #pinctrl-cells specifies the number of value cells in addition to the
+  index of the registers. This is similar to the interrupts-extended binding with
+  one exception. There is no need to specify the phandle for each entry as that
+  is already known as the defined pins are always children of the pin controller
+  node. Further having the phandle pointing to another pin controller would not
+  currently work as the pinctrl framework uses named modes to group pins for each
+  pin control device.
+
+  The index for pinctrl-pin-array must relate to the hardware for the pinctrl
+  registers, and must not be a virtual index of pin instances. The reason for
+  this is to avoid mapping of the index in the dts files and the pin controller
+  driver as it can change.
+
+  For hardware where pin multiplexing configurations have to be specified for
+  each single pin the number of required sub-nodes containing "pin" and
+  "function" properties can quickly escalate and become hard to write and
+  maintain.
+
+  For cases like this, the pin controller driver may use the pinmux helper
+  property, where the pin identifier is provided with mux configuration settings
+  in a pinmux group. A pinmux group consists of the pin identifier and mux
+  settings represented as a single integer or an array of integers.
+
+  The pinmux property accepts an array of pinmux groups, each of them describing
+  a single pin multiplexing configuration.
+
+  pincontroller {
+    state_0_node_a {
+      pinmux = <PINMUX_GROUP>, <PINMUX_GROUP>, ...;
+    };
+  };
+
+  Each individual pin controller driver bindings documentation shall specify
+  how pin IDs and pin multiplexing configuration are defined and assembled
+  together in a pinmux group.
+
+properties:
+  function:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: The mux function to select
+
+  pins:
+    oneOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+      - $ref: /schemas/types.yaml#/definitions/string-array
+    description:
+      The list of pin identifiers that properties in the node apply to. The
+      specific binding for the hardware defines whether the entries are integers
+      or strings, and their meaning.
+
+  groups:
+    $ref: /schemas/types.yaml#/definitions/string-array
+    description:
+      the group to apply the properties to, if the driver supports
+      configuration of whole groups rather than individual pins (either
+      this, "pins" or "pinmux" has to be specified)
+
+  pinmux:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+    description:
+      The list of numeric pin ids and their mux settings that properties in the
+      node apply to (either this, "pins" or "groups" have to be specified)
+
+  pinctrl-pin-array:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8976-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8976-pinctrl.txt
new file mode 100644
index 000000000000..70d04d12f136
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8976-pinctrl.txt
@@ -0,0 +1,183 @@
+Qualcomm MSM8976 TLMM block
+
+This binding describes the Top Level Mode Multiplexer block found in the
+MSM8956 and MSM8976 platforms.
+
+- compatible:
+	Usage: required
+	Value type: <string>
+	Definition: must be "qcom,msm8976-pinctrl"
+
+- reg:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: the base address and size of the TLMM register space.
+
+- interrupts:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: should specify the TLMM summary IRQ.
+
+- interrupt-controller:
+	Usage: required
+	Value type: <none>
+	Definition: identifies this node as an interrupt controller
+
+- #interrupt-cells:
+	Usage: required
+	Value type: <u32>
+	Definition: must be 2. Specifying the pin number and flags, as defined
+		    in <dt-bindings/interrupt-controller/irq.h>
+
+- gpio-controller:
+	Usage: required
+	Value type: <none>
+	Definition: identifies this node as a gpio controller
+
+- #gpio-cells:
+	Usage: required
+	Value type: <u32>
+	Definition: must be 2. Specifying the pin number and flags, as defined
+		    in <dt-bindings/gpio/gpio.h>
+
+- gpio-ranges:
+	Usage: required
+	Definition:  see ../gpio/gpio.txt
+
+- gpio-reserved-ranges:
+	Usage: optional
+	Definition: see ../gpio/gpio.txt
+
+Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
+a general description of GPIO and interrupt bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+The pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, drive strength, etc.
+
+
+PIN CONFIGURATION NODES:
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pin configuration subnode:
+
+- pins:
+	Usage: required
+	Value type: <string-array>
+	Definition: List of gpio pins affected by the properties specified in
+		    this subnode.
+
+		    Valid pins are:
+		      gpio0-gpio145
+		        Supports mux, bias and drive-strength
+
+		      sdc1_clk, sdc1_cmd, sdc1_data,
+		      sdc2_clk, sdc2_cmd, sdc2_data,
+		      sdc3_clk, sdc3_cmd, sdc3_data
+		        Supports bias and drive-strength
+
+- function:
+	Usage: required
+	Value type: <string>
+	Definition: Specify the alternative function to be configured for the
+		    specified pins. Functions are only valid for gpio pins.
+		    Valid values are:
+
+		    gpio, blsp_uart1, blsp_spi1, smb_int, blsp_i2c1, blsp_spi2,
+		    blsp_uart2, blsp_i2c2, gcc_gp1_clk_b, blsp_spi3,
+		    qdss_tracedata_b, blsp_i2c3, gcc_gp2_clk_b, gcc_gp3_clk_b,
+		    blsp_spi4, cap_int, blsp_i2c4, blsp_spi5, blsp_uart5,
+		    qdss_traceclk_a, m_voc, blsp_i2c5, qdss_tracectl_a,
+		    qdss_tracedata_a, blsp_spi6, blsp_uart6, qdss_tracectl_b,
+		    blsp_i2c6, qdss_traceclk_b, mdp_vsync, pri_mi2s_mclk_a,
+		    sec_mi2s_mclk_a, cam_mclk, cci0_i2c, cci1_i2c, blsp1_spi,
+		    blsp3_spi, gcc_gp1_clk_a, gcc_gp2_clk_a, gcc_gp3_clk_a,
+		    uim_batt, sd_write, uim1_data, uim1_clk, uim1_reset,
+		    uim1_present, uim2_data, uim2_clk, uim2_reset,
+		    uim2_present, ts_xvdd, mipi_dsi0, us_euro, ts_resout,
+		    ts_sample, sec_mi2s_mclk_b, pri_mi2s, codec_reset,
+		    cdc_pdm0, us_emitter, pri_mi2s_mclk_b, pri_mi2s_mclk_c,
+		    lpass_slimbus, lpass_slimbus0, lpass_slimbus1, codec_int1,
+		    codec_int2, wcss_bt, sdc3, wcss_wlan2, wcss_wlan1,
+		    wcss_wlan0, wcss_wlan, wcss_fm, key_volp, key_snapshot,
+		    key_focus, key_home, pwr_down, dmic0_clk, hdmi_int,
+		    dmic0_data, wsa_vi, wsa_en, blsp_spi8, wsa_irq, blsp_i2c8,
+		    pa_indicator, modem_tsync, ssbi_wtr1, gsm1_tx, gsm0_tx,
+		    sdcard_det, sec_mi2s, ss_switch,
+
+- bias-disable:
+	Usage: optional
+	Value type: <none>
+	Definition: The specified pins should be configured as no pull.
+
+- bias-pull-down:
+	Usage: optional
+	Value type: <none>
+	Definition: The specified pins should be configured as pull down.
+
+- bias-pull-up:
+	Usage: optional
+	Value type: <none>
+	Definition: The specified pins should be configured as pull up.
+
+- output-high:
+	Usage: optional
+	Value type: <none>
+	Definition: The specified pins are configured in output mode, driven
+		    high.
+		    Not valid for sdc pins.
+
+- output-low:
+	Usage: optional
+	Value type: <none>
+	Definition: The specified pins are configured in output mode, driven
+		    low.
+		    Not valid for sdc pins.
+
+- drive-strength:
+	Usage: optional
+	Value type: <u32>
+	Definition: Selects the drive strength for the specified pins, in mA.
+		    Valid values are: 2, 4, 6, 8, 10, 12, 14 and 16
+
+Example:
+
+	tlmm: pinctrl@1000000 {
+		compatible = "qcom,msm8976-pinctrl";
+		reg = <0x1000000 0x300000>;
+		interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;
+		gpio-controller;
+		#gpio-cells = <2>;
+		gpio-ranges = <&tlmm 0 0 145>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
+
+		blsp1_uart2_active: blsp1_uart2_active {
+			mux {
+				pins = "gpio4", "gpio5", "gpio6", "gpio7";
+				function = "blsp_uart2";
+			};
+
+			config {
+				pins = "gpio4", "gpio5", "gpio6", "gpio7";
+				drive-strength = <2>;
+				bias-disable;
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt
index c32bf3237545..7be5de8d253f 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt
@@ -15,14 +15,18 @@ PMIC's from Qualcomm.
 		    "qcom,pm8917-gpio"
 		    "qcom,pm8921-gpio"
 		    "qcom,pm8941-gpio"
+		    "qcom,pm8950-gpio"
 		    "qcom,pm8994-gpio"
 		    "qcom,pm8998-gpio"
 		    "qcom,pma8084-gpio"
+		    "qcom,pmi8950-gpio"
 		    "qcom,pmi8994-gpio"
 		    "qcom,pmi8998-gpio"
 		    "qcom,pms405-gpio"
 		    "qcom,pm8150-gpio"
 		    "qcom,pm8150b-gpio"
+		    "qcom,pm6150-gpio"
+		    "qcom,pm6150l-gpio"
 
 		    And must contain either "qcom,spmi-gpio" or "qcom,ssbi-gpio"
 		    if the device is on an spmi bus or an ssbi bus respectively
@@ -91,15 +95,19 @@ to specify in a pin configuration subnode:
 		    gpio1-gpio38 for pm8917
 		    gpio1-gpio44 for pm8921
 		    gpio1-gpio36 for pm8941
+		    gpio1-gpio8 for pm8950 (hole on gpio3)
 		    gpio1-gpio22 for pm8994
 		    gpio1-gpio26 for pm8998
 		    gpio1-gpio22 for pma8084
+		    gpio1-gpio2 for pmi8950
 		    gpio1-gpio10 for pmi8994
 		    gpio1-gpio12 for pms405 (holes on gpio1, gpio9 and gpio10)
 		    gpio1-gpio10 for pm8150 (holes on gpio2, gpio5, gpio7
 					     and gpio8)
 		    gpio1-gpio12 for pm8150b (holes on gpio3, gpio4, gpio7)
 		    gpio1-gpio12 for pm8150l (hole on gpio7)
+		    gpio1-gpio10 for pm6150
+		    gpio1-gpio12 for pm6150l
 
 - function:
 	Usage: required
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt
index 2ab95bc26066..448d36a85730 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt
@@ -16,6 +16,8 @@ of PMIC's from Qualcomm.
 		    "qcom,pm8917-mpp",
 		    "qcom,pm8921-mpp",
 		    "qcom,pm8941-mpp",
+		    "qcom,pm8950-mpp",
+		    "qcom,pmi8950-mpp",
 		    "qcom,pm8994-mpp",
 		    "qcom,pma8084-mpp",
 
@@ -80,6 +82,8 @@ to specify in a pin configuration subnode:
 		    mpp1-mpp4 for pm8841
 		    mpp1-mpp4 for pm8916
 		    mpp1-mpp8 for pm8941
+		    mpp1-mpp4 for pm8950
+		    mpp1-mpp4 for pmi8950
 		    mpp1-mpp4 for pma8084
 
 - function:
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sc7180-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,sc7180-pinctrl.txt
index b5767ee82ee6..6ffeac9801df 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,sc7180-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,sc7180-pinctrl.txt
@@ -125,8 +125,9 @@ to specify in a pin configuration subnode:
 		    mi2s_1, mi2s_2, mss_lte, m_voc, pa_indicator, phase_flag,
 		    PLL_BIST, pll_bypassnl, pll_reset, prng_rosc, qdss,
 		    qdss_cti, qlink_enable, qlink_request, qspi_clk, qspi_cs,
-		    qspi_data, qup00, qup01, qup02, qup03, qup04, qup05,
-		    qup10, qup11, qup12, qup13, qup14, qup15, sdc1_tb,
+		    qspi_data, qup00, qup01, qup02_i2c, qup02_uart, qup03,
+		    qup04_i2c, qup04_uart, qup05, qup10, qup11_i2c, qup11_uart,
+		    qup12, qup13_i2c, qup13_uart, qup14, qup15, sdc1_tb,
 		    sdc2_tb, sd_write, sp_cmu, tgu_ch0, tgu_ch1, tgu_ch2,
 		    tgu_ch3, tsense_pwm1, tsense_pwm2, uim1, uim2, uim_batt,
 		    usb_phy, vfr_1, _V_GPIO, _V_PPS_IN, _V_PPS_OUT,
diff --git a/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt
index 3902efa18fd0..6eada23eaa31 100644
--- a/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt
@@ -18,6 +18,7 @@ Required Properties:
     - "renesas,pfc-r8a7745": for R8A7745 (RZ/G1E) compatible pin-controller.
     - "renesas,pfc-r8a77470": for R8A77470 (RZ/G1C) compatible pin-controller.
     - "renesas,pfc-r8a774a1": for R8A774A1 (RZ/G2M) compatible pin-controller.
+    - "renesas,pfc-r8a774b1": for R8A774B1 (RZ/G2N) compatible pin-controller.
     - "renesas,pfc-r8a774c0": for R8A774C0 (RZ/G2E) compatible pin-controller.
     - "renesas,pfc-r8a7778": for R8A7778 (R-Car M1) compatible pin-controller.
     - "renesas,pfc-r8a7779": for R8A7779 (R-Car H1) compatible pin-controller.
@@ -27,7 +28,8 @@ Required Properties:
     - "renesas,pfc-r8a7793": for R8A7793 (R-Car M2-N) compatible pin-controller.
     - "renesas,pfc-r8a7794": for R8A7794 (R-Car E2) compatible pin-controller.
     - "renesas,pfc-r8a7795": for R8A7795 (R-Car H3) compatible pin-controller.
-    - "renesas,pfc-r8a7796": for R8A7796 (R-Car M3-W) compatible pin-controller.
+    - "renesas,pfc-r8a7796": for R8A77960 (R-Car M3-W) compatible pin-controller.
+    - "renesas,pfc-r8a77961": for R8A77961 (R-Car M3-W+) compatible pin-controller.
     - "renesas,pfc-r8a77965": for R8A77965 (R-Car M3-N) compatible pin-controller.
     - "renesas,pfc-r8a77970": for R8A77970 (R-Car V3M) compatible pin-controller.
     - "renesas,pfc-r8a77980": for R8A77980 (R-Car V3H) compatible pin-controller.
diff --git a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt
index 0919db294c17..2113cfaa26e6 100644
--- a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt
@@ -29,6 +29,7 @@ Required properties for iomux controller:
 		"rockchip,rk3188-pinctrl":  for Rockchip RK3188
 		"rockchip,rk3228-pinctrl":  for Rockchip RK3228
 		"rockchip,rk3288-pinctrl":  for Rockchip RK3288
+		"rockchip,rk3308-pinctrl":  for Rockchip RK3308
 		"rockchip,rk3328-pinctrl":  for Rockchip RK3328
 		"rockchip,rk3368-pinctrl":  for Rockchip RK3368
 		"rockchip,rk3399-pinctrl":  for Rockchip RK3399
diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
index 400df2da018a..754ea7ab040a 100644
--- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
@@ -40,10 +40,9 @@ properties:
     allOf:
       - $ref: "/schemas/types.yaml#/definitions/phandle-array"
     description: Should be phandle/offset/mask
-    items:
-      - description: Phandle to the syscon node which includes IRQ mux selection.
-      - description: The offset of the IRQ mux selection register.
-      - description: The field mask of IRQ mux, needed if different of 0xf.
+      - Phandle to the syscon node which includes IRQ mux selection.
+      - The offset of the IRQ mux selection register.
+      - The field mask of IRQ mux, needed if different of 0xf.
 
   st,package:
     allOf:
diff --git a/Documentation/devicetree/bindings/power/amlogic,meson-gx-pwrc.txt b/Documentation/devicetree/bindings/power/amlogic,meson-gx-pwrc.txt
index 0fdc3dd1125e..99b5b10cda31 100644
--- a/Documentation/devicetree/bindings/power/amlogic,meson-gx-pwrc.txt
+++ b/Documentation/devicetree/bindings/power/amlogic,meson-gx-pwrc.txt
@@ -10,7 +10,7 @@ The Video Processing Unit power domain is controlled by this power controller,
 but the domain requires some external resources to meet the correct power
 sequences.
 The bindings must respect the power domain bindings as described in the file
-power_domain.txt
+power-domain.yaml
 
 Device Tree Bindings:
 ---------------------
diff --git a/Documentation/devicetree/bindings/power/avs/qcom,cpr.txt b/Documentation/devicetree/bindings/power/avs/qcom,cpr.txt
new file mode 100644
index 000000000000..ab0d5ebbad4e
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/avs/qcom,cpr.txt
@@ -0,0 +1,130 @@
+QCOM CPR (Core Power Reduction)
+
+CPR (Core Power Reduction) is a technology to reduce core power on a CPU
+or other device. Each OPP of a device corresponds to a "corner" that has
+a range of valid voltages for a particular frequency. While the device is
+running at a particular frequency, CPR monitors dynamic factors such as
+temperature, etc. and suggests adjustments to the voltage to save power
+and meet silicon characteristic requirements.
+
+- compatible:
+	Usage: required
+	Value type: <string>
+	Definition: should be "qcom,qcs404-cpr", "qcom,cpr" for qcs404
+
+- reg:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: base address and size of the rbcpr register region
+
+- interrupts:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: should specify the CPR interrupt
+
+- clocks:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: phandle to the reference clock
+
+- clock-names:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "ref"
+
+- vdd-apc-supply:
+	Usage: required
+	Value type: <phandle>
+	Definition: phandle to the vdd-apc-supply regulator
+
+- #power-domain-cells:
+	Usage: required
+	Value type: <u32>
+	Definition: should be 0
+
+- operating-points-v2:
+	Usage: required
+	Value type: <phandle>
+	Definition: A phandle to the OPP table containing the
+		    performance states supported by the CPR
+		    power domain
+
+- acc-syscon:
+	Usage: optional
+	Value type: <phandle>
+	Definition: phandle to syscon for writing ACC settings
+
+- nvmem-cells:
+	Usage: required
+	Value type: <phandle>
+	Definition: phandle to nvmem cells containing the data
+		    that makes up a fuse corner, for each fuse corner.
+		    As well as the CPR fuse revision.
+
+- nvmem-cell-names:
+	Usage: required
+	Value type: <stringlist>
+	Definition: should be "cpr_quotient_offset1", "cpr_quotient_offset2",
+		    "cpr_quotient_offset3", "cpr_init_voltage1",
+		    "cpr_init_voltage2", "cpr_init_voltage3", "cpr_quotient1",
+		    "cpr_quotient2", "cpr_quotient3", "cpr_ring_osc1",
+		    "cpr_ring_osc2", "cpr_ring_osc3", "cpr_fuse_revision"
+		    for qcs404.
+
+Example:
+
+	cpr_opp_table: cpr-opp-table {
+		compatible = "operating-points-v2-qcom-level";
+
+		cpr_opp1: opp1 {
+			opp-level = <1>;
+			qcom,opp-fuse-level = <1>;
+		};
+		cpr_opp2: opp2 {
+			opp-level = <2>;
+			qcom,opp-fuse-level = <2>;
+		};
+		cpr_opp3: opp3 {
+			opp-level = <3>;
+			qcom,opp-fuse-level = <3>;
+		};
+	};
+
+	power-controller@b018000 {
+		compatible = "qcom,qcs404-cpr", "qcom,cpr";
+		reg = <0x0b018000 0x1000>;
+		interrupts = <0 15 IRQ_TYPE_EDGE_RISING>;
+		clocks = <&xo_board>;
+		clock-names = "ref";
+		vdd-apc-supply = <&pms405_s3>;
+		#power-domain-cells = <0>;
+		operating-points-v2 = <&cpr_opp_table>;
+		acc-syscon = <&tcsr>;
+
+		nvmem-cells = <&cpr_efuse_quot_offset1>,
+			<&cpr_efuse_quot_offset2>,
+			<&cpr_efuse_quot_offset3>,
+			<&cpr_efuse_init_voltage1>,
+			<&cpr_efuse_init_voltage2>,
+			<&cpr_efuse_init_voltage3>,
+			<&cpr_efuse_quot1>,
+			<&cpr_efuse_quot2>,
+			<&cpr_efuse_quot3>,
+			<&cpr_efuse_ring1>,
+			<&cpr_efuse_ring2>,
+			<&cpr_efuse_ring3>,
+			<&cpr_efuse_revision>;
+		nvmem-cell-names = "cpr_quotient_offset1",
+			"cpr_quotient_offset2",
+			"cpr_quotient_offset3",
+			"cpr_init_voltage1",
+			"cpr_init_voltage2",
+			"cpr_init_voltage3",
+			"cpr_quotient1",
+			"cpr_quotient2",
+			"cpr_quotient3",
+			"cpr_ring_osc1",
+			"cpr_ring_osc2",
+			"cpr_ring_osc3",
+			"cpr_fuse_revision";
+	};
diff --git a/Documentation/devicetree/bindings/power/fsl,imx-gpc.txt b/Documentation/devicetree/bindings/power/fsl,imx-gpc.txt
index 726ec2875223..f0f5553a9e74 100644
--- a/Documentation/devicetree/bindings/power/fsl,imx-gpc.txt
+++ b/Documentation/devicetree/bindings/power/fsl,imx-gpc.txt
@@ -19,7 +19,7 @@ Required properties:
   - ipg
 
 The power domains are generic power domain providers as documented in
-Documentation/devicetree/bindings/power/power_domain.txt. They are described as
+Documentation/devicetree/bindings/power/power-domain.yaml. They are described as
 subnodes of the power gating controller 'pgc' node of the GPC and should
 contain the following:
 
diff --git a/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.txt b/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.txt
index 7c7e972aaa42..61649202f6f5 100644
--- a/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.txt
+++ b/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.txt
@@ -17,7 +17,7 @@ Required properties:
 
 Power domains contained within GPC node are generic power domain
 providers, documented in
-Documentation/devicetree/bindings/power/power_domain.txt, which are
+Documentation/devicetree/bindings/power/power-domain.yaml, which are
 described as subnodes of the power gating controller 'pgc' node,
 which, in turn, is expected to contain the following:
 
diff --git a/Documentation/devicetree/bindings/power/pd-samsung.txt b/Documentation/devicetree/bindings/power/pd-samsung.txt
deleted file mode 100644
index 92ef355e8f64..000000000000
--- a/Documentation/devicetree/bindings/power/pd-samsung.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-* Samsung Exynos Power Domains
-
-Exynos processors include support for multiple power domains which are used
-to gate power to one or more peripherals on the processor.
-
-Required Properties:
-- compatible: should be one of the following.
-    * samsung,exynos4210-pd - for exynos4210 type power domain.
-    * samsung,exynos5433-pd - for exynos5433 type power domain.
-- reg: physical base address of the controller and length of memory mapped
-    region.
-- #power-domain-cells: number of cells in power domain specifier;
-    must be 0.
-
-Optional Properties:
-- label: Human readable string with domain name. Will be visible in userspace
-	to let user to distinguish between multiple domains in SoC.
-- power-domains: phandle pointing to the parent power domain, for more details
-		 see Documentation/devicetree/bindings/power/power_domain.txt
-
-Deprecated Properties:
-- clocks
-- clock-names
-
-Node of a device using power domains must have a power-domains property
-defined with a phandle to respective power domain.
-
-Example:
-
-	lcd0: power-domain-lcd0 {
-		compatible = "samsung,exynos4210-pd";
-		reg = <0x10023C00 0x10>;
-		#power-domain-cells = <0>;
-		label = "LCD0";
-	};
-
-	mfc_pd: power-domain@10044060 {
-		compatible = "samsung,exynos4210-pd";
-		reg = <0x10044060 0x20>;
-		#power-domain-cells = <0>;
-		label = "MFC";
-	};
-
-See Documentation/devicetree/bindings/power/power_domain.txt for description
-of consumer-side bindings.
diff --git a/Documentation/devicetree/bindings/power/pd-samsung.yaml b/Documentation/devicetree/bindings/power/pd-samsung.yaml
new file mode 100644
index 000000000000..09bdd96c1ec1
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/pd-samsung.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/pd-samsung.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC Power Domains
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |+
+  Exynos processors include support for multiple power domains which are used
+  to gate power to one or more peripherals on the processor.
+
+allOf:
+  - $ref: power-domain.yaml#
+
+properties:
+  compatible:
+    enum:
+      - samsung,exynos4210-pd
+      - samsung,exynos5433-pd
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    deprecated: true
+    maxItems: 1
+
+  clock-names:
+    deprecated: true
+    maxItems: 1
+
+  label:
+    description:
+      Human readable string with domain name. Will be visible in userspace
+      to let user to distinguish between multiple domains in SoC.
+
+  "#power-domain-cells":
+    const: 0
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - "#power-domain-cells"
+  - reg
+
+examples:
+  - |
+    lcd0_pd: power-domain@10023c80 {
+        compatible = "samsung,exynos4210-pd";
+        reg = <0x10023c80 0x20>;
+        #power-domain-cells = <0>;
+        label = "LCD0";
+    };
+
+    mfc_pd: power-domain@10044060 {
+        compatible = "samsung,exynos4210-pd";
+        reg = <0x10044060 0x20>;
+        #power-domain-cells = <0>;
+        label = "MFC";
+    };
diff --git a/Documentation/devicetree/bindings/power/power-domain.yaml b/Documentation/devicetree/bindings/power/power-domain.yaml
new file mode 100644
index 000000000000..455b573293ae
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/power-domain.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/power-domain.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic PM domains
+
+maintainers:
+  - Rafael J. Wysocki <rjw@rjwysocki.net>
+  - Kevin Hilman <khilman@kernel.org>
+  - Ulf Hansson <ulf.hansson@linaro.org>
+
+description: |+
+  System on chip designs are often divided into multiple PM domains that can be
+  used for power gating of selected IP blocks for power saving by reduced leakage
+  current.
+
+  This device tree binding can be used to bind PM domain consumer devices with
+  their PM domains provided by PM domain providers. A PM domain provider can be
+  represented by any node in the device tree and can provide one or more PM
+  domains. A consumer node can refer to the provider by a phandle and a set of
+  phandle arguments (so called PM domain specifiers) of length specified by the
+  \#power-domain-cells property in the PM domain provider node.
+
+properties:
+  $nodename:
+    pattern: "^(power-controller|power-domain)(@.*)?$"
+
+  domain-idle-states:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    description:
+      A phandle of an idle-state that shall be soaked into a generic domain
+      power state. The idle state definitions are compatible with
+      domain-idle-state specified in
+      Documentation/devicetree/bindings/power/domain-idle-state.txt
+      phandles that are not compatible with domain-idle-state will be ignored.
+      The domain-idle-state property reflects the idle state of this PM domain
+      and not the idle states of the devices or sub-domains in the PM domain.
+      Devices and sub-domains have their own idle-states independent
+      of the parent domain's idle states. In the absence of this property,
+      the domain would be considered as capable of being powered-on
+      or powered-off.
+
+  operating-points-v2:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    description:
+      Phandles to the OPP tables of power domains provided by a power domain
+      provider. If the provider provides a single power domain only or all
+      the power domains provided by the provider have identical OPP tables,
+      then this shall contain a single phandle. Refer to ../opp/opp.txt
+      for more information.
+
+  "#power-domain-cells":
+    description:
+      Number of cells in a PM domain specifier. Typically 0 for nodes
+      representing a single PM domain and 1 for nodes providing multiple PM
+      domains (e.g. power controllers), but can be any value as specified
+      by device tree binding documentation of particular provider.
+
+  power-domains:
+    description:
+       A phandle and PM domain specifier as defined by bindings of the power
+       controller specified by phandle. Some power domains might be powered
+       from another power domain (or have other hardware specific
+       dependencies). For representing such dependency a standard PM domain
+       consumer binding is used. When provided, all domains created
+       by the given provider should be subdomains of the domain specified
+       by this binding.
+
+required:
+  - "#power-domain-cells"
+
+examples:
+  - |
+    power: power-controller@12340000 {
+        compatible = "foo,power-controller";
+        reg = <0x12340000 0x1000>;
+        #power-domain-cells = <1>;
+    };
+
+    // The node above defines a power controller that is a PM domain provider and
+    // expects one cell as its phandle argument.
+
+  - |
+    parent2: power-controller@12340000 {
+        compatible = "foo,power-controller";
+        reg = <0x12340000 0x1000>;
+        #power-domain-cells = <1>;
+    };
+
+    child2: power-controller@12341000 {
+        compatible = "foo,power-controller";
+        reg = <0x12341000 0x1000>;
+        power-domains = <&parent2 0>;
+        #power-domain-cells = <1>;
+    };
+
+    // The nodes above define two power controllers: 'parent' and 'child'.
+    // Domains created by the 'child' power controller are subdomains of '0' power
+    // domain provided by the 'parent' power controller.
+
+  - |
+    parent3: power-controller@12340000 {
+        compatible = "foo,power-controller";
+        reg = <0x12340000 0x1000>;
+        #power-domain-cells = <0>;
+        domain-idle-states = <&DOMAIN_RET>, <&DOMAIN_PWR_DN>;
+    };
+
+    child3: power-controller@12341000 {
+        compatible = "foo,power-controller";
+        reg = <0x12341000 0x1000>;
+        power-domains = <&parent3>;
+        #power-domain-cells = <0>;
+        domain-idle-states = <&DOMAIN_PWR_DN>;
+    };
+
+    DOMAIN_RET: state@0 {
+        compatible = "domain-idle-state";
+        reg = <0x0 0x0>;
+        entry-latency-us = <1000>;
+        exit-latency-us = <2000>;
+        min-residency-us = <10000>;
+    };
+
+    DOMAIN_PWR_DN: state@1 {
+        compatible = "domain-idle-state";
+        reg = <0x1 0x0>;
+        entry-latency-us = <5000>;
+        exit-latency-us = <8000>;
+        min-residency-us = <7000>;
+    };
diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
index 8f8b25a24b8f..5b09b2deb483 100644
--- a/Documentation/devicetree/bindings/power/power_domain.txt
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -13,100 +13,7 @@ phandle arguments (so called PM domain specifiers) of length specified by the
 
 ==PM domain providers==
 
-Required properties:
- - #power-domain-cells : Number of cells in a PM domain specifier;
-   Typically 0 for nodes representing a single PM domain and 1 for nodes
-   providing multiple PM domains (e.g. power controllers), but can be any value
-   as specified by device tree binding documentation of particular provider.
-
-Optional properties:
- - power-domains : A phandle and PM domain specifier as defined by bindings of
-                   the power controller specified by phandle.
-   Some power domains might be powered from another power domain (or have
-   other hardware specific dependencies). For representing such dependency
-   a standard PM domain consumer binding is used. When provided, all domains
-   created by the given provider should be subdomains of the domain
-   specified by this binding. More details about power domain specifier are
-   available in the next section.
-
-- domain-idle-states : A phandle of an idle-state that shall be soaked into a
-                generic domain power state. The idle state definitions are
-                compatible with domain-idle-state specified in [1]. phandles
-                that are not compatible with domain-idle-state will be
-                ignored.
-  The domain-idle-state property reflects the idle state of this PM domain and
-  not the idle states of the devices or sub-domains in the PM domain. Devices
-  and sub-domains have their own idle-states independent of the parent
-  domain's idle states. In the absence of this property, the domain would be
-  considered as capable of being powered-on or powered-off.
-
-- operating-points-v2 : Phandles to the OPP tables of power domains provided by
-  a power domain provider. If the provider provides a single power domain only
-  or all the power domains provided by the provider have identical OPP tables,
-  then this shall contain a single phandle. Refer to ../opp/opp.txt for more
-  information.
-
-Example:
-
-	power: power-controller@12340000 {
-		compatible = "foo,power-controller";
-		reg = <0x12340000 0x1000>;
-		#power-domain-cells = <1>;
-	};
-
-The node above defines a power controller that is a PM domain provider and
-expects one cell as its phandle argument.
-
-Example 2:
-
-	parent: power-controller@12340000 {
-		compatible = "foo,power-controller";
-		reg = <0x12340000 0x1000>;
-		#power-domain-cells = <1>;
-	};
-
-	child: power-controller@12341000 {
-		compatible = "foo,power-controller";
-		reg = <0x12341000 0x1000>;
-		power-domains = <&parent 0>;
-		#power-domain-cells = <1>;
-	};
-
-The nodes above define two power controllers: 'parent' and 'child'.
-Domains created by the 'child' power controller are subdomains of '0' power
-domain provided by the 'parent' power controller.
-
-Example 3:
-	parent: power-controller@12340000 {
-		compatible = "foo,power-controller";
-		reg = <0x12340000 0x1000>;
-		#power-domain-cells = <0>;
-		domain-idle-states = <&DOMAIN_RET>, <&DOMAIN_PWR_DN>;
-	};
-
-	child: power-controller@12341000 {
-		compatible = "foo,power-controller";
-		reg = <0x12341000 0x1000>;
-		power-domains = <&parent>;
-		#power-domain-cells = <0>;
-		domain-idle-states = <&DOMAIN_PWR_DN>;
-	};
-
-	DOMAIN_RET: state@0 {
-		compatible = "domain-idle-state";
-		reg = <0x0>;
-		entry-latency-us = <1000>;
-		exit-latency-us = <2000>;
-		min-residency-us = <10000>;
-	};
-
-	DOMAIN_PWR_DN: state@1 {
-		compatible = "domain-idle-state";
-		reg = <0x1>;
-		entry-latency-us = <5000>;
-		exit-latency-us = <8000>;
-		min-residency-us = <7000>;
-	};
+See power-domain.yaml.
 
 ==PM domain consumers==
 
diff --git a/Documentation/devicetree/bindings/power/qcom,rpmpd.txt b/Documentation/devicetree/bindings/power/qcom,rpmpd.txt
index eb35b22f9e23..bc75bf49cdae 100644
--- a/Documentation/devicetree/bindings/power/qcom,rpmpd.txt
+++ b/Documentation/devicetree/bindings/power/qcom,rpmpd.txt
@@ -5,6 +5,7 @@ which then translates it into a corresponding voltage on a rail
 
 Required Properties:
  - compatible: Should be one of the following
+	* qcom,msm8976-rpmpd: RPM Power domain for the msm8976 family of SoC
 	* qcom,msm8996-rpmpd: RPM Power domain for the msm8996 family of SoC
 	* qcom,msm8998-rpmpd: RPM Power domain for the msm8998 family of SoC
 	* qcom,qcs404-rpmpd: RPM Power domain for the qcs404 family of SoC
diff --git a/Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt b/Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt
index eae2a880155a..acb41fade926 100644
--- a/Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt
+++ b/Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt
@@ -12,6 +12,7 @@ Required properties:
       - "renesas,r8a7745-sysc" (RZ/G1E)
       - "renesas,r8a77470-sysc" (RZ/G1C)
       - "renesas,r8a774a1-sysc" (RZ/G2M)
+      - "renesas,r8a774b1-sysc" (RZ/G2N)
       - "renesas,r8a774c0-sysc" (RZ/G2E)
       - "renesas,r8a7779-sysc" (R-Car H1)
       - "renesas,r8a7790-sysc" (R-Car H2)
@@ -21,6 +22,7 @@ Required properties:
       - "renesas,r8a7794-sysc" (R-Car E2)
       - "renesas,r8a7795-sysc" (R-Car H3)
       - "renesas,r8a7796-sysc" (R-Car M3-W)
+      - "renesas,r8a77961-sysc" (R-Car M3-W+)
       - "renesas,r8a77965-sysc" (R-Car M3-N)
       - "renesas,r8a77970-sysc" (R-Car V3M)
       - "renesas,r8a77980-sysc" (R-Car V3H)
diff --git a/Documentation/devicetree/bindings/power/renesas,sysc-rmobile.txt b/Documentation/devicetree/bindings/power/renesas,sysc-rmobile.txt
index beda7d2efc30..49aba15dff8b 100644
--- a/Documentation/devicetree/bindings/power/renesas,sysc-rmobile.txt
+++ b/Documentation/devicetree/bindings/power/renesas,sysc-rmobile.txt
@@ -29,7 +29,7 @@ Optional nodes:
 
 Each of the PM domain nodes represents a PM domain, as documented by the
 generic PM domain bindings in
-Documentation/devicetree/bindings/power/power_domain.txt.
+Documentation/devicetree/bindings/power/power-domain.yaml.
 
 The nodes should be named by the real power area names, and thus their names
 should be unique.
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt
deleted file mode 100644
index 022ed1f3bc80..000000000000
--- a/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Generic SYSCON mapped register poweroff driver
-
-This is a generic poweroff driver using syscon to map the poweroff register.
-The poweroff is generally performed with a write to the poweroff register
-defined by the register map pointed by syscon reference plus the offset
-with the value and mask defined in the poweroff node.
-
-Required properties:
-- compatible: should contain "syscon-poweroff"
-- regmap: this is phandle to the register map node
-- offset: offset in the register map for the poweroff register (in bytes)
-- value: the poweroff value written to the poweroff register (32 bit access)
-
-Optional properties:
-- mask: update only the register bits defined by the mask (32 bit)
-
-Legacy usage:
-If a node doesn't contain a value property but contains a mask property, the
-mask property is used as the value.
-
-Default will be little endian mode, 32 bit access only.
-
-Examples:
-
-	poweroff {
-	   compatible = "syscon-poweroff";
-	   regmap = <&regmapnode>;
-	   offset = <0x0>;
-	   mask = <0x7a>;
-	};
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-poweroff.yaml b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.yaml
new file mode 100644
index 000000000000..520e07e6f21b
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/reset/syscon-poweroff.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic SYSCON mapped register poweroff driver
+
+maintainers:
+  - Sebastian Reichel <sre@kernel.org>
+
+description: |+
+  This is a generic poweroff driver using syscon to map the poweroff register.
+  The poweroff is generally performed with a write to the poweroff register
+  defined by the register map pointed by syscon reference plus the offset
+  with the value and mask defined in the poweroff node.
+  Default will be little endian mode, 32 bit access only.
+
+properties:
+  compatible:
+    const: syscon-poweroff
+
+  mask:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Update only the register bits defined by the mask (32 bit).
+
+  offset:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Offset in the register map for the poweroff register (in bytes).
+
+  regmap:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: Phandle to the register map node.
+
+  value:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: The poweroff value written to the poweroff register (32 bit access).
+
+required:
+  - compatible
+  - regmap
+  - offset
+
+allOf:
+  - if:
+      not:
+        required:
+          - mask
+    then:
+      required:
+        - value
+
+examples:
+  - |
+    poweroff {
+        compatible = "syscon-poweroff";
+        regmap = <&regmapnode>;
+        offset = <0x0>;
+        mask = <0x7a>;
+    };
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-reboot.txt b/Documentation/devicetree/bindings/power/reset/syscon-reboot.txt
deleted file mode 100644
index e23dea8344f8..000000000000
--- a/Documentation/devicetree/bindings/power/reset/syscon-reboot.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Generic SYSCON mapped register reset driver
-
-This is a generic reset driver using syscon to map the reset register.
-The reset is generally performed with a write to the reset register
-defined by the register map pointed by syscon reference plus the offset
-with the value and mask defined in the reboot node.
-
-Required properties:
-- compatible: should contain "syscon-reboot"
-- regmap: this is phandle to the register map node
-- offset: offset in the register map for the reboot register (in bytes)
-- value: the reset value written to the reboot register (32 bit access)
-
-Optional properties:
-- mask: update only the register bits defined by the mask (32 bit)
-
-Legacy usage:
-If a node doesn't contain a value property but contains a mask property, the
-mask property is used as the value.
-
-Default will be little endian mode, 32 bit access only.
-
-Examples:
-
-	reboot {
-	   compatible = "syscon-reboot";
-	   regmap = <&regmapnode>;
-	   offset = <0x0>;
-	   mask = <0x1>;
-	};
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml b/Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml
new file mode 100644
index 000000000000..d38006b1f1f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/reset/syscon-reboot.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic SYSCON mapped register reset driver
+
+maintainers:
+  - Sebastian Reichel <sre@kernel.org>
+
+description: |+
+  This is a generic reset driver using syscon to map the reset register.
+  The reset is generally performed with a write to the reset register
+  defined by the register map pointed by syscon reference plus the offset
+  with the value and mask defined in the reboot node.
+  Default will be little endian mode, 32 bit access only.
+
+properties:
+  compatible:
+    const: syscon-reboot
+
+  mask:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Update only the register bits defined by the mask (32 bit).
+
+  offset:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Offset in the register map for the reboot register (in bytes).
+
+  regmap:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: Phandle to the register map node.
+
+  value:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: The reset value written to the reboot register (32 bit access).
+
+required:
+  - compatible
+  - regmap
+  - offset
+
+allOf:
+  - if:
+      not:
+        required:
+          - mask
+    then:
+      required:
+        - value
+
+examples:
+  - |
+    reboot {
+        compatible = "syscon-reboot";
+        regmap = <&regmapnode>;
+        offset = <0x0>;
+        mask = <0x1>;
+    };
diff --git a/Documentation/devicetree/bindings/power/supply/battery.txt b/Documentation/devicetree/bindings/power/supply/battery.txt
index 5c913d4cf36c..3049cf88bdcf 100644
--- a/Documentation/devicetree/bindings/power/supply/battery.txt
+++ b/Documentation/devicetree/bindings/power/supply/battery.txt
@@ -35,6 +35,10 @@ Optional Properties:
    for each of the battery capacity lookup table. The first temperature value
    specifies the OCV table 0, and the second temperature value specifies the
    OCV table 1, and so on.
+ - resistance-temp-table: An array providing the temperature in degree Celsius
+   and corresponding battery internal resistance percent, which is used to look
+   up the resistance percent according to current temperature to get a accurate
+   batterty internal resistance in different temperatures.
 
 Battery properties are named, where possible, for the corresponding
 elements in enum power_supply_property, defined in
@@ -61,6 +65,7 @@ Example:
 		ocv-capacity-table-0 = <4185000 100>, <4113000 95>, <4066000 90>, ...;
 		ocv-capacity-table-1 = <4200000 100>, <4185000 95>, <4113000 90>, ...;
 		ocv-capacity-table-2 = <4250000 100>, <4200000 95>, <4185000 90>, ...;
+		resistance-temp-table = <20 100>, <10 90>, <0 80>, <(-10) 60>;
 	};
 
 	charger: charger@11 {
diff --git a/Documentation/devicetree/bindings/power/supply/bq25890.txt b/Documentation/devicetree/bindings/power/supply/bq25890.txt
index dc0568933359..dc9c8f76e06c 100644
--- a/Documentation/devicetree/bindings/power/supply/bq25890.txt
+++ b/Documentation/devicetree/bindings/power/supply/bq25890.txt
@@ -1,11 +1,14 @@
 Binding for TI bq25890 Li-Ion Charger
 
-This driver will support the bq25896 and the bq25890. There are other ICs
-in the same family but those have not been tested.
+This driver will support the bq25892, the bq25896 and the bq25890. There are
+other ICs in the same family but those have not been tested.
 
 Required properties:
 - compatible: Should contain one of the following:
     * "ti,bq25890"
+    * "ti,bq25892"
+    * "ti,bq25895"
+    * "ti,bq25896"
 - reg: integer, i2c address of the device.
 - ti,battery-regulation-voltage: integer, maximum charging voltage (in uV);
 - ti,charge-current: integer, maximum charging current (in uA);
diff --git a/Documentation/devicetree/bindings/power/supply/cpcap-charger.txt b/Documentation/devicetree/bindings/power/supply/cpcap-charger.txt
index 80bd873c3b1d..6048f636783f 100644
--- a/Documentation/devicetree/bindings/power/supply/cpcap-charger.txt
+++ b/Documentation/devicetree/bindings/power/supply/cpcap-charger.txt
@@ -5,7 +5,8 @@ Required properties:
 - interrupts: Interrupt specifier for each name in interrupt-names
 - interrupt-names: Should contain the following entries:
 		   "chrg_det", "rvrs_chrg", "chrg_se1b", "se0conn",
-		   "rvrs_mode", "chrgcurr1", "vbusvld", "battdetb"
+		   "rvrs_mode", "chrgcurr2", "chrgcurr1", "vbusvld",
+		   "battdetb"
 - io-channels: IIO ADC channel specifier for each name in io-channel-names
 - io-channel-names: Should contain the following entries:
 		    "battdetb", "battp", "vbus", "chg_isense", "batti"
@@ -21,11 +22,13 @@ cpcap_charger: charger {
 	compatible = "motorola,mapphone-cpcap-charger";
 	interrupts-extended = <
 		&cpcap 13 0 &cpcap 12 0 &cpcap 29 0 &cpcap 28 0
-		&cpcap 22 0 &cpcap 20 0 &cpcap 19 0 &cpcap 54 0
+		&cpcap 22 0 &cpcap 21 0 &cpcap 20 0 &cpcap 19 0
+		&cpcap 54 0
 	>;
 	interrupt-names =
 		"chrg_det", "rvrs_chrg", "chrg_se1b", "se0conn",
-		"rvrs_mode", "chrgcurr1", "vbusvld", "battdetb";
+		"rvrs_mode", "chrgcurr2", "chrgcurr1", "vbusvld",
+		"battdetb";
 	mode-gpios = <&gpio3 29 GPIO_ACTIVE_LOW
 		      &gpio3 23 GPIO_ACTIVE_LOW>;
 	io-channels = <&cpcap_adc 0 &cpcap_adc 1
diff --git a/Documentation/devicetree/bindings/power/supply/max17040_battery.txt b/Documentation/devicetree/bindings/power/supply/max17040_battery.txt
new file mode 100644
index 000000000000..4e0186b8380f
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/max17040_battery.txt
@@ -0,0 +1,33 @@
+max17040_battery
+~~~~~~~~~~~~~~~~
+
+Required properties :
+ - compatible : "maxim,max17040" or "maxim,max77836-battery"
+ - reg: i2c slave address
+
+Optional properties :
+- maxim,alert-low-soc-level :	The alert threshold that sets the state of
+				charge level (%) where an interrupt is
+				generated. Can be configured from 1 up to 32
+				(%). If skipped the power up default value of
+				4 (%) will be used.
+- interrupts : 			Interrupt line see Documentation/devicetree/
+				bindings/interrupt-controller/interrupts.txt
+- wakeup-source :		This device has wakeup capabilities. Use this
+				property to use alert low SOC level interrupt
+				as wake up source.
+
+Optional properties support interrupt functionality for alert low state of
+charge level, present in some ICs in the same family, and should be used with
+compatible "maxim,max77836-battery".
+
+Example:
+
+	battery-fuel-gauge@36 {
+		compatible = "maxim,max77836-battery";
+		reg = <0x36>;
+		maxim,alert-low-soc-level = <10>;
+		interrupt-parent = <&gpio7>;
+		interrupts = <2 IRQ_TYPE_EDGE_FALLING>;
+		wakeup-source;
+	};
diff --git a/Documentation/devicetree/bindings/power/supply/max17042_battery.txt b/Documentation/devicetree/bindings/power/supply/max17042_battery.txt
index 3f3894aaeebc..f34c5daae9af 100644
--- a/Documentation/devicetree/bindings/power/supply/max17042_battery.txt
+++ b/Documentation/devicetree/bindings/power/supply/max17042_battery.txt
@@ -2,7 +2,11 @@ max17042_battery
 ~~~~~~~~~~~~~~~~
 
 Required properties :
- - compatible : "maxim,max17042"
+ - compatible : one of the following
+ * "maxim,max17042"
+ * "maxim,max17047"
+ * "maxim,max17050"
+ * "maxim,max17055"
 
 Optional properties :
  - maxim,rsns-microohm : Resistance of rsns resistor in micro Ohms
diff --git a/Documentation/devicetree/bindings/power/supply/max77650-charger.txt b/Documentation/devicetree/bindings/power/supply/max77650-charger.txt
deleted file mode 100644
index e6d0fb6ff94e..000000000000
--- a/Documentation/devicetree/bindings/power/supply/max77650-charger.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Battery charger driver for MAX77650 PMIC from Maxim Integrated.
-
-This module is part of the MAX77650 MFD device. For more details
-see Documentation/devicetree/bindings/mfd/max77650.txt.
-
-The charger is represented as a sub-node of the PMIC node on the device tree.
-
-Required properties:
---------------------
-- compatible:		Must be "maxim,max77650-charger"
-
-Optional properties:
---------------------
-- input-voltage-min-microvolt:	Minimum CHGIN regulation voltage. Must be one
-				of: 4000000, 4100000, 4200000, 4300000,
-				4400000, 4500000, 4600000, 4700000.
-- input-current-limit-microamp:	CHGIN input current limit (in microamps). Must
-				be one of: 95000, 190000, 285000, 380000,
-				475000.
-
-Example:
---------
-
-	charger {
-		compatible = "maxim,max77650-charger";
-		input-voltage-min-microvolt = <4200000>;
-		input-current-limit-microamp = <285000>;
-	};
diff --git a/Documentation/devicetree/bindings/power/supply/max77650-charger.yaml b/Documentation/devicetree/bindings/power/supply/max77650-charger.yaml
new file mode 100644
index 000000000000..deef010ec535
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/max77650-charger.yaml
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/supply/max77650-charger.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Battery charger driver for MAX77650 PMIC from Maxim Integrated.
+
+maintainers:
+  - Bartosz Golaszewski <bgolaszewski@baylibre.com>
+
+description: |
+  This module is part of the MAX77650 MFD device. For more details
+  see Documentation/devicetree/bindings/mfd/max77650.yaml.
+
+  The charger is represented as a sub-node of the PMIC node on the device tree.
+
+properties:
+  compatible:
+    const: maxim,max77650-charger
+
+  input-voltage-min-microvolt:
+    description:
+      Minimum CHGIN regulation voltage.
+    enum: [ 4000000, 4100000, 4200000, 4300000,
+            4400000, 4500000, 4600000, 4700000 ]
+
+  input-current-limit-microamp:
+    description:
+      CHGIN input current limit (in microamps).
+    enum: [ 95000, 190000, 285000, 380000, 475000 ]
+
+required:
+  - compatible
diff --git a/Documentation/devicetree/bindings/power/supply/sc27xx-fg.txt b/Documentation/devicetree/bindings/power/supply/sc27xx-fg.txt
index 0a5705b8b592..b6359b590383 100644
--- a/Documentation/devicetree/bindings/power/supply/sc27xx-fg.txt
+++ b/Documentation/devicetree/bindings/power/supply/sc27xx-fg.txt
@@ -13,6 +13,8 @@ Required properties:
 - io-channel-names: Should be "bat-temp" or "charge-vol".
 - nvmem-cells: A phandle to the calibration cells provided by eFuse device.
 - nvmem-cell-names: Should be "fgu_calib".
+- sprd,calib-resistance-micro-ohms: Specify the real resistance of coulomb counter
+  chip in micro Ohms.
 - monitored-battery: Phandle of battery characteristics devicetree node.
   See Documentation/devicetree/bindings/power/supply/battery.txt
 
@@ -52,5 +54,6 @@ Example:
 			nvmem-cells = <&fgu_calib>;
 			nvmem-cell-names = "fgu_calib";
 			monitored-battery = <&bat>;
+			sprd,calib-resistance-micro-ohms = <21500>;
 		};
 	};
diff --git a/Documentation/devicetree/bindings/power/xlnx,zynqmp-genpd.txt b/Documentation/devicetree/bindings/power/xlnx,zynqmp-genpd.txt
index 8d1b8200ebd0..54b9f9d0f90f 100644
--- a/Documentation/devicetree/bindings/power/xlnx,zynqmp-genpd.txt
+++ b/Documentation/devicetree/bindings/power/xlnx,zynqmp-genpd.txt
@@ -4,7 +4,7 @@ Device Tree Bindings for the Xilinx Zynq MPSoC PM domains
 The binding for zynqmp-power-controller follow the common
 generic PM domain binding[1].
 
-[1] Documentation/devicetree/bindings/power/power_domain.txt
+[1] Documentation/devicetree/bindings/power/power-domain.yaml
 
 == Zynq MPSoC Generic PM Domain Node ==
 
diff --git a/Documentation/devicetree/bindings/ptp/ptp-idtcm.yaml b/Documentation/devicetree/bindings/ptp/ptp-idtcm.yaml
new file mode 100644
index 000000000000..9e21b83d717e
--- /dev/null
+++ b/Documentation/devicetree/bindings/ptp/ptp-idtcm.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ptp/ptp-idtcm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: IDT ClockMatrix (TM) PTP Clock Device Tree Bindings
+
+maintainers:
+  - Vincent Cheng <vincent.cheng.xh@renesas.com>
+
+properties:
+  compatible:
+    enum:
+      # For System Synchronizer
+      - idt,8a34000
+      - idt,8a34001
+      - idt,8a34002
+      - idt,8a34003
+      - idt,8a34004
+      - idt,8a34005
+      - idt,8a34006
+      - idt,8a34007
+      - idt,8a34008
+      - idt,8a34009
+      # For Port Synchronizer
+      - idt,8a34010
+      - idt,8a34011
+      - idt,8a34012
+      - idt,8a34013
+      - idt,8a34014
+      - idt,8a34015
+      - idt,8a34016
+      - idt,8a34017
+      - idt,8a34018
+      - idt,8a34019
+      # For Universal Frequency Translator (UFT)
+      - idt,8a34040
+      - idt,8a34041
+      - idt,8a34042
+      - idt,8a34043
+      - idt,8a34044
+      - idt,8a34045
+      - idt,8a34046
+      - idt,8a34047
+      - idt,8a34048
+      - idt,8a34049
+
+  reg:
+    maxItems: 1
+    description:
+      I2C slave address of the device.
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    i2c@1 {
+        compatible = "abc,acme-1234";
+        reg = <0x01 0x400>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+        phc@5b {
+            compatible = "idt,8a34000";
+            reg = <0x5b>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/ptp/ptp-ines.txt b/Documentation/devicetree/bindings/ptp/ptp-ines.txt
new file mode 100644
index 000000000000..4c242bd1ce9c
--- /dev/null
+++ b/Documentation/devicetree/bindings/ptp/ptp-ines.txt
@@ -0,0 +1,35 @@
+ZHAW InES PTP time stamping IP core
+
+The IP core needs two different kinds of nodes.  The control node
+lives somewhere in the memory map and specifies the address of the
+control registers.  There can be up to three port handles placed as
+attributes of PHY nodes.  These associate a particular MII bus with a
+port index within the IP core.
+
+Required properties of the control node:
+
+- compatible:		"ines,ptp-ctrl"
+- reg:			physical address and size of the register bank
+
+Required format of the port handle within the PHY node:
+
+- timestamper:		provides control node reference and
+			the port channel within the IP core
+
+Example:
+
+	tstamper: timestamper@60000000 {
+		compatible = "ines,ptp-ctrl";
+		reg = <0x60000000 0x80>;
+	};
+
+	ethernet@80000000 {
+		...
+		mdio {
+			...
+			ethernet-phy@3 {
+				...
+				timestamper = <&tstamper 0>;
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/ptp/timestamper.txt b/Documentation/devicetree/bindings/ptp/timestamper.txt
new file mode 100644
index 000000000000..fc550ce4d4ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/ptp/timestamper.txt
@@ -0,0 +1,42 @@
+Time stamps from MII bus snooping devices
+
+This binding supports non-PHY devices that snoop the MII bus and
+provide time stamps.  In contrast to PHY time stamping drivers (which
+can simply attach their interface directly to the PHY instance), stand
+alone MII time stamping drivers use this binding to specify the
+connection between the snooping device and a given network interface.
+
+Non-PHY MII time stamping drivers typically talk to the control
+interface over another bus like I2C, SPI, UART, or via a memory mapped
+peripheral.  This controller device is associated with one or more
+time stamping channels, each of which snoops on a MII bus.
+
+The "timestamper" property lives in a phy node and links a time
+stamping channel from the controller device to that phy's MII bus.
+
+Example:
+
+	tstamper: timestamper@10000000 {
+		compatible = "ines,ptp-ctrl";
+		reg = <0x10000000 0x80>;
+	};
+
+	ethernet@20000000 {
+		mdio {
+			ethernet-phy@1 {
+				timestamper = <&tstamper 0>;
+			};
+		};
+	};
+
+	ethernet@30000000 {
+		mdio {
+			ethernet-phy@2 {
+				timestamper = <&tstamper 1>;
+			};
+		};
+	};
+
+In this example, time stamps from the MII bus attached to phy@1 will
+appear on time stamp channel 0 (zero), and those from phy@2 appear on
+channel 1.
diff --git a/Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml b/Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml
index 0ac52f83a58c..4a21fe77ee1d 100644
--- a/Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml
+++ b/Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 PWM Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#pwm-cells":
diff --git a/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt
index cfda0d57d302..afa501bf7f94 100644
--- a/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt
@@ -10,7 +10,7 @@ Required properties:
  - pinctrl-0: should contain the pinctrl states described by pinctrl
    default.
  - #pwm-cells: should be set to 3. This PWM chip use the default 3 cells
-   bindings defined in pwm.txt in this directory.
+   bindings defined in pwm.yaml in this directory.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/pwm/atmel-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-pwm.txt
index 591ecdd39c7b..fbb5325be1f0 100644
--- a/Documentation/devicetree/bindings/pwm/atmel-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/atmel-pwm.txt
@@ -7,7 +7,7 @@ Required properties:
     - "atmel,sama5d2-pwm"
     - "microchip,sam9x60-pwm"
   - reg: physical base address and length of the controller's registers
-  - #pwm-cells: Should be 3. See pwm.txt in this directory for a
+  - #pwm-cells: Should be 3. See pwm.yaml in this directory for a
     description of the cells format.
 
 Example:
diff --git a/Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt
index 8031148bcf85..985fcc65f8c4 100644
--- a/Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt
@@ -2,7 +2,7 @@ Atmel TCB PWM controller
 
 Required properties:
 - compatible: should be "atmel,tcb-pwm"
-- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+- #pwm-cells: should be 3. See pwm.yaml in this directory for a description of
   the cells format. The only third cell flag supported by this binding is
   PWM_POLARITY_INVERTED.
 - tc-block: The Timer Counter block to use as a PWM chip.
diff --git a/Documentation/devicetree/bindings/pwm/brcm,bcm7038-pwm.txt b/Documentation/devicetree/bindings/pwm/brcm,bcm7038-pwm.txt
index d9254a6da5ed..0e662d7f6bd1 100644
--- a/Documentation/devicetree/bindings/pwm/brcm,bcm7038-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/brcm,bcm7038-pwm.txt
@@ -4,7 +4,7 @@ Required properties:
 
 - compatible: must be "brcm,bcm7038-pwm"
 - reg: physical base address and length for this controller
-- #pwm-cells: should be 2. See pwm.txt in this directory for a description
+- #pwm-cells: should be 2. See pwm.yaml in this directory for a description
   of the cells format
 - clocks: a phandle to the reference clock for this block which is fed through
   its internal variable clock frequency generator
diff --git a/Documentation/devicetree/bindings/pwm/brcm,iproc-pwm.txt b/Documentation/devicetree/bindings/pwm/brcm,iproc-pwm.txt
index 21f75bbd6dae..655f6cd4ef46 100644
--- a/Documentation/devicetree/bindings/pwm/brcm,iproc-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/brcm,iproc-pwm.txt
@@ -6,7 +6,7 @@ Required Properties :
 - compatible: must be "brcm,iproc-pwm"
 - reg: physical base address and length of the controller's registers
 - clocks: phandle + clock specifier pair for the external clock
-- #pwm-cells: Should be 3. See pwm.txt in this directory for a
+- #pwm-cells: Should be 3. See pwm.yaml in this directory for a
   description of the cells format.
 
 Refer to clocks/clock-bindings.txt for generic clock consumer properties.
diff --git a/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt b/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt
index 8eae9fe7841c..c42eecfc81ed 100644
--- a/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt
@@ -6,7 +6,7 @@ Required Properties :
 - compatible: should contain "brcm,kona-pwm"
 - reg: physical base address and length of the controller's registers
 - clocks: phandle + clock specifier pair for the external clock
-- #pwm-cells: Should be 3. See pwm.txt in this directory for a
+- #pwm-cells: Should be 3. See pwm.yaml in this directory for a
   description of the cells format.
 
 Refer to clocks/clock-bindings.txt for generic clock consumer properties.
diff --git a/Documentation/devicetree/bindings/pwm/img-pwm.txt b/Documentation/devicetree/bindings/pwm/img-pwm.txt
index fade5f26fcac..9db6de97317d 100644
--- a/Documentation/devicetree/bindings/pwm/img-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/img-pwm.txt
@@ -8,7 +8,7 @@ Required properties:
   - clock-names: Must include the following entries.
     - pwm: PWM operating clock.
     - sys: PWM system interface clock.
-  - #pwm-cells: Should be 2. See pwm.txt in this directory for the
+  - #pwm-cells: Should be 2. See pwm.yaml in this directory for the
 	description of the cells format.
   - img,cr-periph: Must contain a phandle to the peripheral control
 	syscon node which contains PWM control registers.
diff --git a/Documentation/devicetree/bindings/pwm/imx-pwm.txt b/Documentation/devicetree/bindings/pwm/imx-pwm.txt
index c61bdf8cd41b..22f1c3d8b773 100644
--- a/Documentation/devicetree/bindings/pwm/imx-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/imx-pwm.txt
@@ -6,7 +6,7 @@ Required properties:
   - "fsl,imx1-pwm" for PWM compatible with the one integrated on i.MX1
   - "fsl,imx27-pwm" for PWM compatible with the one integrated on i.MX27
 - reg: physical base address and length of the controller's registers
-- #pwm-cells: 2 for i.MX1 and 3 for i.MX27 and newer SoCs. See pwm.txt
+- #pwm-cells: 2 for i.MX1 and 3 for i.MX27 and newer SoCs. See pwm.yaml
   in this directory for a description of the cells format.
 - clocks : Clock specifiers for both ipg and per clocks.
 - clock-names : Clock names should include both "ipg" and "per"
diff --git a/Documentation/devicetree/bindings/pwm/imx-tpm-pwm.txt b/Documentation/devicetree/bindings/pwm/imx-tpm-pwm.txt
index 3ba958d764ff..5bf20950a24e 100644
--- a/Documentation/devicetree/bindings/pwm/imx-tpm-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/imx-tpm-pwm.txt
@@ -3,7 +3,7 @@ Freescale i.MX TPM PWM controller
 Required properties:
 - compatible : Should be "fsl,imx7ulp-pwm".
 - reg: Physical base address and length of the controller's registers.
-- #pwm-cells: Should be 3. See pwm.txt in this directory for a description of the cells format.
+- #pwm-cells: Should be 3. See pwm.yaml in this directory for a description of the cells format.
 - clocks : The clock provided by the SoC to drive the PWM.
 - interrupts: The interrupt for the PWM controller.
 
diff --git a/Documentation/devicetree/bindings/pwm/lpc1850-sct-pwm.txt b/Documentation/devicetree/bindings/pwm/lpc1850-sct-pwm.txt
index 36e49d4325cd..43d9f4f08a2e 100644
--- a/Documentation/devicetree/bindings/pwm/lpc1850-sct-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/lpc1850-sct-pwm.txt
@@ -7,7 +7,7 @@ Required properties:
     See ../clock/clock-bindings.txt for details.
   - clock-names: Must include the following entries.
     - pwm: PWM operating clock.
-  - #pwm-cells: Should be 3. See pwm.txt in this directory for the description
+  - #pwm-cells: Should be 3. See pwm.yaml in this directory for the description
     of the cells format.
 
 Example:
diff --git a/Documentation/devicetree/bindings/pwm/mxs-pwm.txt b/Documentation/devicetree/bindings/pwm/mxs-pwm.txt
index 96cdde5f6208..1b06f86a7091 100644
--- a/Documentation/devicetree/bindings/pwm/mxs-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/mxs-pwm.txt
@@ -3,7 +3,7 @@ Freescale MXS PWM controller
 Required properties:
 - compatible: should be "fsl,imx23-pwm"
 - reg: physical base address and length of the controller's registers
-- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+- #pwm-cells: should be 2. See pwm.yaml in this directory for a description of
   the cells format.
 - fsl,pwm-number: the number of PWM devices
 
diff --git a/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt
index c57e11b8d937..0a69eadf44ce 100644
--- a/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt
@@ -10,7 +10,7 @@ Required properties:
   - "nvidia,tegra210-pwm", "nvidia,tegra20-pwm": for Tegra210
   - "nvidia,tegra186-pwm": for Tegra186
 - reg: physical base address and length of the controller's registers
-- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+- #pwm-cells: should be 2. See pwm.yaml in this directory for a description of
   the cells format.
 - clocks: Must contain one entry, for the module clock.
   See ../clocks/clock-bindings.txt for details.
diff --git a/Documentation/devicetree/bindings/pwm/nxp,pca9685-pwm.txt b/Documentation/devicetree/bindings/pwm/nxp,pca9685-pwm.txt
index f84ec9d291ea..f21b55c95738 100644
--- a/Documentation/devicetree/bindings/pwm/nxp,pca9685-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/nxp,pca9685-pwm.txt
@@ -3,7 +3,7 @@ NXP PCA9685 16-channel 12-bit PWM LED controller
 
 Required properties:
   - compatible: "nxp,pca9685-pwm"
-  - #pwm-cells: Should be 2. See pwm.txt in this directory for a description of
+  - #pwm-cells: Should be 2. See pwm.yaml in this directory for a description of
     the cells format.
     The index 16 is the ALLCALL channel, that sets all PWM channels at the same
     time.
diff --git a/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt b/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt
index 8cf87d1bfca5..f5753b3f79df 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt
@@ -6,7 +6,7 @@ Required properties:
 - clocks: This clock defines the base clock frequency of the PWM hardware
   system, the period and the duty_cycle of the PWM signal is a multiple of
   the base period.
-- #pwm-cells: Should be 3. See pwm.txt in this directory for a description of
+- #pwm-cells: Should be 3. See pwm.yaml in this directory for a description of
   the cells format.
 
 Examples:
diff --git a/Documentation/devicetree/bindings/pwm/pwm-berlin.txt b/Documentation/devicetree/bindings/pwm/pwm-berlin.txt
index 82cbe16fcbbc..f01e993a498a 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-berlin.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-berlin.txt
@@ -4,7 +4,7 @@ Required properties:
 - compatible: should be "marvell,berlin-pwm"
 - reg: physical base address and length of the controller's registers
 - clocks: phandle to the input clock
-- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+- #pwm-cells: should be 3. See pwm.yaml in this directory for a description of
   the cells format.
 
 Example:
diff --git a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt
index 576ad002bc83..36532cd5ab25 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt
@@ -21,7 +21,7 @@ Required properties:
   - "fsl,vf610-ftm-pwm" for PWM compatible with the one integrated on VF610
   - "fsl,imx8qm-ftm-pwm" for PWM compatible with the one integrated on i.MX8QM
 - reg: Physical base address and length of the controller's registers
-- #pwm-cells: Should be 3. See pwm.txt in this directory for a description of
+- #pwm-cells: Should be 3. See pwm.yaml in this directory for a description of
   the cells format.
 - clock-names: Should include the following module clock source entries:
     "ftm_sys" (module clock, also can be used as counter clock),
diff --git a/Documentation/devicetree/bindings/pwm/pwm-hibvt.txt b/Documentation/devicetree/bindings/pwm/pwm-hibvt.txt
index daedfef09bb6..54dbc2a0e648 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-hibvt.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-hibvt.txt
@@ -10,7 +10,7 @@ Required properties:
 - reg: physical base address and length of the controller's registers.
 - clocks: phandle and clock specifier of the PWM reference clock.
 - resets: phandle and reset specifier for the PWM controller reset.
-- #pwm-cells: Should be 3. See pwm.txt in this directory for a description of
+- #pwm-cells: Should be 3. See pwm.yaml in this directory for a description of
   the cells format.
 
 Example:
diff --git a/Documentation/devicetree/bindings/pwm/pwm-lp3943.txt b/Documentation/devicetree/bindings/pwm/pwm-lp3943.txt
index 7bd9d3b12ce1..f214305a8f5e 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-lp3943.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-lp3943.txt
@@ -2,7 +2,7 @@ TI/National Semiconductor LP3943 PWM controller
 
 Required properties:
   - compatible: "ti,lp3943-pwm"
-  - #pwm-cells: Should be 2. See pwm.txt in this directory for a
+  - #pwm-cells: Should be 2. See pwm.yaml in this directory for a
                 description of the cells format.
                 Note that this hardware limits the period length to the
                 range 6250~1600000.
diff --git a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt
index c8501530173c..95536d83c5f2 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt
@@ -6,10 +6,10 @@ Required properties:
    - "mediatek,mt7622-pwm": found on mt7622 SoC.
    - "mediatek,mt7623-pwm": found on mt7623 SoC.
    - "mediatek,mt7628-pwm": found on mt7628 SoC.
-   - "mediatek,mt7629-pwm", "mediatek,mt7622-pwm": found on mt7629 SoC.
+   - "mediatek,mt7629-pwm": found on mt7629 SoC.
    - "mediatek,mt8516-pwm": found on mt8516 SoC.
  - reg: physical base address and length of the controller's registers.
- - #pwm-cells: must be 2. See pwm.txt in this directory for a description of
+ - #pwm-cells: must be 2. See pwm.yaml in this directory for a description of
    the cell format.
  - clocks: phandle and clock specifier of the PWM reference clock.
  - clock-names: must contain the following, except for MT7628 which
diff --git a/Documentation/devicetree/bindings/pwm/pwm-meson.txt b/Documentation/devicetree/bindings/pwm/pwm-meson.txt
index 891632354065..bd02b0a1496f 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-meson.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-meson.txt
@@ -10,7 +10,7 @@ Required properties:
                          or "amlogic,meson-g12a-ee-pwm"
                          or "amlogic,meson-g12a-ao-pwm-ab"
                          or "amlogic,meson-g12a-ao-pwm-cd"
-- #pwm-cells: Should be 3. See pwm.txt in this directory for a description of
+- #pwm-cells: Should be 3. See pwm.yaml in this directory for a description of
   the cells format.
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt b/Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt
index 6f8af2bcc7b7..0521957c253f 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt
@@ -6,7 +6,7 @@ Required properties:
    - "mediatek,mt6595-disp-pwm": found on mt6595 SoC.
    - "mediatek,mt8173-disp-pwm": found on mt8173 SoC.
  - reg: physical base address and length of the controller's registers.
- - #pwm-cells: must be 2. See pwm.txt in this directory for a description of
+ - #pwm-cells: must be 2. See pwm.yaml in this directory for a description of
    the cell format.
  - clocks: phandle and clock specifier of the PWM reference clock.
  - clock-names: must contain the following:
diff --git a/Documentation/devicetree/bindings/pwm/pwm-omap-dmtimer.txt b/Documentation/devicetree/bindings/pwm/pwm-omap-dmtimer.txt
index 5ccfcc82da08..d722ae3be363 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-omap-dmtimer.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-omap-dmtimer.txt
@@ -4,7 +4,7 @@ Required properties:
 - compatible: Shall contain "ti,omap-dmtimer-pwm".
 - ti,timers: phandle to PWM capable OMAP timer. See timer/ti,timer.txt for info
   about these timers.
-- #pwm-cells: Should be 3. See pwm.txt in this directory for a description of
+- #pwm-cells: Should be 3. See pwm.yaml in this directory for a description of
   the cells format.
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt
index 2c5e52a5bede..f70956dea77b 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt
@@ -14,7 +14,7 @@ Required properties:
    - For newer hardware (rk3328 and future socs): specified by name
      - "pwm": This is used to derive the functional clock.
      - "pclk": This is the APB bus clock.
- - #pwm-cells: must be 2 (rk2928) or 3 (rk3288). See pwm.txt in this directory
+ - #pwm-cells: must be 2 (rk2928) or 3 (rk3288). See pwm.yaml in this directory
    for a description of the cell format.
 
 Example:
diff --git a/Documentation/devicetree/bindings/pwm/pwm-samsung.txt b/Documentation/devicetree/bindings/pwm/pwm-samsung.txt
deleted file mode 100644
index 5538de9c2007..000000000000
--- a/Documentation/devicetree/bindings/pwm/pwm-samsung.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-* Samsung PWM timers
-
-Samsung SoCs contain PWM timer blocks which can be used for system clock source
-and clock event timers, as well as to drive SoC outputs with PWM signal. Each
-PWM timer block provides 5 PWM channels (not all of them can drive physical
-outputs - see SoC and board manual).
-
-Be aware that the clocksource driver supports only uniprocessor systems.
-
-Required properties:
-- compatible : should be one of following:
-    samsung,s3c2410-pwm - for 16-bit timers present on S3C24xx SoCs
-    samsung,s3c6400-pwm - for 32-bit timers present on S3C64xx SoCs
-    samsung,s5p6440-pwm - for 32-bit timers present on S5P64x0 SoCs
-    samsung,s5pc100-pwm - for 32-bit timers present on S5PC100, S5PV210,
-			  Exynos4210 rev0 SoCs
-    samsung,exynos4210-pwm - for 32-bit timers present on Exynos4210,
-                          Exynos4x12, Exynos5250 and Exynos5420 SoCs
-- reg: base address and size of register area
-- interrupts: list of timer interrupts (one interrupt per timer, starting at
-  timer 0)
-- clock-names: should contain all following required clock names:
-    - "timers" - PWM base clock used to generate PWM signals,
-  and any subset of following optional clock names:
-    - "pwm-tclk0" - first external PWM clock source,
-    - "pwm-tclk1" - second external PWM clock source.
-  Note that not all IP variants allow using all external clock sources.
-  Refer to SoC documentation to learn which clock source configurations
-  are available.
-- clocks: should contain clock specifiers of all clocks, which input names
-  have been specified in clock-names property, in same order.
-- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
-  the cells format. The only third cell flag supported by this binding is
-  PWM_POLARITY_INVERTED.
-
-Optional properties:
-- samsung,pwm-outputs: list of PWM channels used as PWM outputs on particular
-    platform - an array of up to 5 elements being indices of PWM channels
-    (from 0 to 4), the order does not matter.
-
-Example:
-	pwm@7f006000 {
-		compatible = "samsung,s3c6400-pwm";
-		reg = <0x7f006000 0x1000>;
-		interrupt-parent = <&vic0>;
-		interrupts = <23>, <24>, <25>, <27>, <28>;
-		clocks = <&clock 67>;
-		clock-names = "timers";
-		samsung,pwm-outputs = <0>, <1>;
-		#pwm-cells = <3>;
-	}
diff --git a/Documentation/devicetree/bindings/pwm/pwm-samsung.yaml b/Documentation/devicetree/bindings/pwm/pwm-samsung.yaml
new file mode 100644
index 000000000000..ea7f32905172
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-samsung.yaml
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/pwm-samsung.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung SoC PWM timers
+
+maintainers:
+  - Thierry Reding <thierry.reding@gmail.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |+
+  Samsung SoCs contain PWM timer blocks which can be used for system clock source
+  and clock event timers, as well as to drive SoC outputs with PWM signal. Each
+  PWM timer block provides 5 PWM channels (not all of them can drive physical
+  outputs - see SoC and board manual).
+
+  Be aware that the clocksource driver supports only uniprocessor systems.
+
+allOf:
+  - $ref: pwm.yaml#
+
+properties:
+  compatible:
+    enum:
+      - samsung,s3c2410-pwm             # 16-bit, S3C24xx
+      - samsung,s3c6400-pwm             # 32-bit, S3C64xx
+      - samsung,s5p6440-pwm             # 32-bit, S5P64x0
+      - samsung,s5pc100-pwm             # 32-bit, S5PC100, S5PV210, Exynos4210 rev0 SoCs
+      - samsung,exynos4210-pwm          # 32-bit, Exynos
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    maxItems: 3
+
+  clock-names:
+    description: |
+      Should contain all following required clock names:
+      - "timers" - PWM base clock used to generate PWM signals,
+      and any subset of following optional clock names:
+      - "pwm-tclk0" - first external PWM clock source,
+      - "pwm-tclk1" - second external PWM clock source.
+      Note that not all IP variants allow using all external clock sources.
+      Refer to SoC documentation to learn which clock source configurations
+      are available.
+    oneOf:
+      - items:
+        - const: timers
+      - items:
+        - const: timers
+        - const: pwm-tclk0
+      - items:
+        - const: timers
+        - const: pwm-tclk1
+      - items:
+        - const: timers
+        - const: pwm-tclk0
+        - const: pwm-tclk1
+
+  interrupts:
+    description:
+      One interrupt per timer, starting at timer 0.
+    minItems: 1
+    maxItems: 5
+
+  "#pwm-cells":
+    description:
+      The only third cell flag supported by this binding
+      is PWM_POLARITY_INVERTED.
+    const: 3
+
+  samsung,pwm-outputs:
+    description:
+      A list of PWM channels used as PWM outputs on particular platform.
+      It is an array of up to 5 elements being indices of PWM channels
+      (from 0 to 4), the order does not matter.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+      - uniqueItems: true
+      - items:
+          minimum: 0
+          maximum: 4
+
+required:
+  - clocks
+  - clock-names
+  - compatible
+  - interrupts
+  - "#pwm-cells"
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    pwm@7f006000 {
+        compatible = "samsung,s3c6400-pwm";
+        reg = <0x7f006000 0x1000>;
+        interrupt-parent = <&vic0>;
+        interrupts = <23>, <24>, <25>, <27>, <28>;
+        clocks = <&clock 67>;
+        clock-names = "timers";
+        samsung,pwm-outputs = <0>, <1>;
+        #pwm-cells = <3>;
+    };
diff --git a/Documentation/devicetree/bindings/pwm/pwm-sifive.txt b/Documentation/devicetree/bindings/pwm/pwm-sifive.txt
index 36447e3c9378..3d1dd7b06efc 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-sifive.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-sifive.txt
@@ -17,7 +17,7 @@ Required properties:
   Please refer to sifive-blocks-ip-versioning.txt for details.
 - reg: physical base address and length of the controller's registers
 - clocks: Should contain a clock identifier for the PWM's parent clock.
-- #pwm-cells: Should be 3. See pwm.txt in this directory
+- #pwm-cells: Should be 3. See pwm.yaml in this directory
   for a description of the cell format.
 - interrupts: one interrupt per PWM channel
 
diff --git a/Documentation/devicetree/bindings/pwm/pwm-sprd.txt b/Documentation/devicetree/bindings/pwm/pwm-sprd.txt
index 16fa5a096206..87b206fd0618 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-sprd.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-sprd.txt
@@ -9,7 +9,7 @@ Required properties:
 - clock-names: Should contain following entries:
   "pwmn": used to derive the functional clock for PWM channel n (n range: 0 ~ 3).
   "enablen": for PWM channel n enable clock (n range: 0 ~ 3).
-- #pwm-cells: Should be 2. See pwm.txt in this directory for a description of
+- #pwm-cells: Should be 2. See pwm.yaml in this directory for a description of
   the cells format.
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt b/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt
deleted file mode 100644
index 6521bc44a74e..000000000000
--- a/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-STMicroelectronics STM32 Low-Power Timer PWM
-
-STM32 Low-Power Timer provides single channel PWM.
-
-Must be a sub-node of an STM32 Low-Power Timer device tree node.
-See ../mfd/stm32-lptimer.txt for details about the parent node.
-
-Required parameters:
-- compatible:		Must be "st,stm32-pwm-lp".
-- #pwm-cells:		Should be set to 3. This PWM chip uses the default 3 cells
-			bindings defined in pwm.txt.
-
-Optional properties:
-- pinctrl-names: 	Set to "default". An additional "sleep" state can be
-			defined to set pins in sleep state when in low power.
-- pinctrl-n: 		Phandle(s) pointing to pin configuration node for PWM,
-			respectively for "default" and "sleep" states.
-
-Example:
-	timer@40002400 {
-		compatible = "st,stm32-lptimer";
-		...
-		pwm {
-			compatible = "st,stm32-pwm-lp";
-			#pwm-cells = <3>;
-			pinctrl-names = "default", "sleep";
-			pinctrl-0 = <&lppwm1_pins>;
-			pinctrl-1 = <&lppwm1_sleep_pins>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-stm32.txt b/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
deleted file mode 100644
index a8690bfa5e1f..000000000000
--- a/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-STMicroelectronics STM32 Timers PWM bindings
-
-Must be a sub-node of an STM32 Timers device tree node.
-See ../mfd/stm32-timers.txt for details about the parent node.
-
-Required parameters:
-- compatible:		Must be "st,stm32-pwm".
-- pinctrl-names: 	Set to "default".
-- pinctrl-0: 		List of phandles pointing to pin configuration nodes for PWM module.
-			For Pinctrl properties see ../pinctrl/pinctrl-bindings.txt
-- #pwm-cells:		Should be set to 3. This PWM chip uses the default 3 cells
-			bindings defined in pwm.txt.
-
-Optional parameters:
-- st,breakinput:	One or two <index level filter> to describe break input configurations.
-			"index" indicates on which break input (0 or 1) the configuration
-			should be applied.
-			"level" gives the active level (0=low or 1=high) of the input signal
-			for this configuration.
-			"filter" gives the filtering value to be applied.
-
-Example:
-	timers@40010000 {
-		#address-cells = <1>;
-		#size-cells = <0>;
-		compatible = "st,stm32-timers";
-		reg = <0x40010000 0x400>;
-		clocks = <&rcc 0 160>;
-		clock-names = "int";
-
-		pwm {
-			compatible = "st,stm32-pwm";
-			#pwm-cells = <3>;
-			pinctrl-0	= <&pwm1_pins>;
-			pinctrl-names	= "default";
-			st,breakinput = <0 1 5>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
index b9a1d7402128..c7c4347a769a 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
@@ -8,7 +8,7 @@ Required properties:
   for dra746 - compatible = "ti,dra746-ecap", "ti,am3352-ecap";
   for 66ak2g - compatible = "ti,k2g-ecap", "ti,am3352-ecap";
   for am654  - compatible = "ti,am654-ecap", "ti,am3352-ecap";
-- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+- #pwm-cells: should be 3. See pwm.yaml in this directory for a description of
   the cells format. The PWM channel index ranges from 0 to 4. The only third
   cell flag supported by this binding is PWM_POLARITY_INVERTED.
 - reg: physical base address and size of the registers map.
diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt
index 31c4577157dd..c7e28f6d28be 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt
@@ -7,7 +7,7 @@ Required properties:
   for am654   - compatible = "ti,am654-ehrpwm", "ti-am3352-ehrpwm";
   for da850   - compatible = "ti,da850-ehrpwm", "ti-am3352-ehrpwm", "ti,am33xx-ehrpwm";
   for dra746 - compatible = "ti,dra746-ehrpwm", "ti-am3352-ehrpwm";
-- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+- #pwm-cells: should be 3. See pwm.yaml in this directory for a description of
   the cells format. The only third cell flag supported by this binding is
   PWM_POLARITY_INVERTED.
 - reg: physical base address and size of the registers map.
diff --git a/Documentation/devicetree/bindings/pwm/pwm-zx.txt b/Documentation/devicetree/bindings/pwm/pwm-zx.txt
index a6bcc75c9164..3c8fe7aa8269 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-zx.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-zx.txt
@@ -7,7 +7,7 @@ Required properties:
  - clock-names: "pclk" for PCLK, "wclk" for WCLK to the PWM controller.  The
    PCLK is for register access, while WCLK is the reference clock for
    calculating period and duty cycles.
- - #pwm-cells: Should be 3. See pwm.txt in this directory for a description of
+ - #pwm-cells: Should be 3. See pwm.yaml in this directory for a description of
    the cells format.
 
 Example:
diff --git a/Documentation/devicetree/bindings/pwm/pwm.txt b/Documentation/devicetree/bindings/pwm/pwm.txt
index 8556263b8502..084886bd721e 100644
--- a/Documentation/devicetree/bindings/pwm/pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm.txt
@@ -57,13 +57,4 @@ Example with optional PWM specifier for inverse polarity
 2) PWM controller nodes
 -----------------------
 
-PWM controller nodes must specify the number of cells used for the
-specifier using the '#pwm-cells' property.
-
-An example PWM controller might look like this:
-
-	pwm: pwm@7000a000 {
-		compatible = "nvidia,tegra20-pwm";
-		reg = <0x7000a000 0x100>;
-		#pwm-cells = <2>;
-	};
+See pwm.yaml.
diff --git a/Documentation/devicetree/bindings/pwm/pwm.yaml b/Documentation/devicetree/bindings/pwm/pwm.yaml
new file mode 100644
index 000000000000..fa4f9de92090
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm.yaml
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/pwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: PWM controllers (providers)
+
+maintainers:
+  - Thierry Reding <thierry.reding@gmail.com>
+
+properties:
+  $nodename:
+    pattern: "^pwm(@.*|-[0-9a-f])*$"
+
+  "#pwm-cells":
+    description:
+      Number of cells in a PWM specifier.
+
+required:
+  - "#pwm-cells"
+
+examples:
+  - |
+    pwm: pwm@7000a000 {
+        compatible = "nvidia,tegra20-pwm";
+        reg = <0x7000a000 0x100>;
+        #pwm-cells = <2>;
+    };
diff --git a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
deleted file mode 100644
index fbd6a4f943ce..000000000000
--- a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-* Renesas R-Car PWM Timer Controller
-
-Required Properties:
-- compatible: should be "renesas,pwm-rcar" and one of the following.
- - "renesas,pwm-r8a7743": for RZ/G1M
- - "renesas,pwm-r8a7744": for RZ/G1N
- - "renesas,pwm-r8a7745": for RZ/G1E
- - "renesas,pwm-r8a774a1": for RZ/G2M
- - "renesas,pwm-r8a774c0": for RZ/G2E
- - "renesas,pwm-r8a7778": for R-Car M1A
- - "renesas,pwm-r8a7779": for R-Car H1
- - "renesas,pwm-r8a7790": for R-Car H2
- - "renesas,pwm-r8a7791": for R-Car M2-W
- - "renesas,pwm-r8a7794": for R-Car E2
- - "renesas,pwm-r8a7795": for R-Car H3
- - "renesas,pwm-r8a7796": for R-Car M3-W
- - "renesas,pwm-r8a77965": for R-Car M3-N
- - "renesas,pwm-r8a77970": for R-Car V3M
- - "renesas,pwm-r8a77980": for R-Car V3H
- - "renesas,pwm-r8a77990": for R-Car E3
- - "renesas,pwm-r8a77995": for R-Car D3
-- reg: base address and length of the registers block for the PWM.
-- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
-  the cells format.
-- clocks: clock phandle and specifier pair.
-- pinctrl-0: phandle, referring to a default pin configuration node.
-- pinctrl-names: Set to "default".
-
-Example: R8A7743 (RZ/G1M) PWM Timer node
-
-	pwm0: pwm@e6e30000 {
-		compatible = "renesas,pwm-r8a7743", "renesas,pwm-rcar";
-		reg = <0 0xe6e30000 0 0x8>;
-		clocks = <&cpg CPG_MOD 523>;
-		power-domains = <&sysc R8A7743_PD_ALWAYS_ON>;
-		resets = <&cpg 523>;
-		#pwm-cells = <2>;
-		pinctrl-0 = <&pwm0_pins>;
-		pinctrl-names = "default";
-	};
diff --git a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.yaml b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.yaml
new file mode 100644
index 000000000000..945c14e1be35
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/renesas,pwm-rcar.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car PWM Timer Controller
+
+maintainers:
+  - Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,pwm-r8a7743   # RZ/G1M
+          - renesas,pwm-r8a7744   # RZ/G1N
+          - renesas,pwm-r8a7745   # RZ/G1E
+          - renesas,pwm-r8a77470  # RZ/G1C
+          - renesas,pwm-r8a774a1  # RZ/G2M
+          - renesas,pwm-r8a774b1  # RZ/G2N
+          - renesas,pwm-r8a774c0  # RZ/G2E
+          - renesas,pwm-r8a7778   # R-Car M1A
+          - renesas,pwm-r8a7779   # R-Car H1
+          - renesas,pwm-r8a7790   # R-Car H2
+          - renesas,pwm-r8a7791   # R-Car M2-W
+          - renesas,pwm-r8a7794   # R-Car E2
+          - renesas,pwm-r8a7795   # R-Car H3
+          - renesas,pwm-r8a7796   # R-Car M3-W
+          - renesas,pwm-r8a77965  # R-Car M3-N
+          - renesas,pwm-r8a77970  # R-Car V3M
+          - renesas,pwm-r8a77980  # R-Car V3H
+          - renesas,pwm-r8a77990  # R-Car E3
+          - renesas,pwm-r8a77995  # R-Car D3
+      - const: renesas,pwm-rcar
+
+  reg:
+    # base address and length of the registers block for the PWM.
+    maxItems: 1
+
+  '#pwm-cells':
+    # should be 2. See pwm.yaml in this directory for a description of
+    # the cells format.
+    const: 2
+
+  clocks:
+    # clock phandle and specifier pair.
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - '#pwm-cells'
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7743-cpg-mssr.h>
+    #include <dt-bindings/power/r8a7743-sysc.h>
+
+    pwm0: pwm@e6e30000 {
+        compatible = "renesas,pwm-r8a7743", "renesas,pwm-rcar";
+        reg = <0 0xe6e30000 0 0x8>;
+        clocks = <&cpg CPG_MOD 523>;
+        power-domains = <&sysc R8A7743_PD_ALWAYS_ON>;
+        resets = <&cpg 523>;
+        #pwm-cells = <2>;
+        pinctrl-0 = <&pwm0_pins>;
+        pinctrl-names = "default";
+    };
diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
deleted file mode 100644
index 848a92b53d81..000000000000
--- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-* Renesas R-Car Timer Pulse Unit PWM Controller
-
-Required Properties:
-
-  - compatible: must contain one or more of the following:
-    - "renesas,tpu-r8a73a4": for R8A73A4 (R-Mobile APE6) compatible PWM controller.
-    - "renesas,tpu-r8a7740": for R8A7740 (R-Mobile A1) compatible PWM controller.
-    - "renesas,tpu-r8a7743": for R8A7743 (RZ/G1M) compatible PWM controller.
-    - "renesas,tpu-r8a7744": for R8A7744 (RZ/G1N) compatible PWM controller.
-    - "renesas,tpu-r8a7745": for R8A7745 (RZ/G1E) compatible PWM controller.
-    - "renesas,tpu-r8a7790": for R8A7790 (R-Car H2) compatible PWM controller.
-    - "renesas,tpu-r8a77970": for R8A77970 (R-Car V3M) compatible PWM
-			      controller.
-    - "renesas,tpu-r8a77980": for R8A77980 (R-Car V3H) compatible PWM
-			      controller.
-    - "renesas,tpu": for the generic TPU PWM controller; this is a fallback for
-		     the entries listed above.
-
-  - reg: Base address and length of each memory resource used by the PWM
-    controller hardware module.
-
-  - #pwm-cells: should be 3. See pwm.txt in this directory for a description of
-    the cells format. The only third cell flag supported by this binding is
-    PWM_POLARITY_INVERTED.
-
-Please refer to pwm.txt in this directory for details of the common PWM bindings
-used by client devices.
-
-Example: R8A7740 (R-Mobile A1) TPU controller node
-
-	tpu: pwm@e6600000 {
-		compatible = "renesas,tpu-r8a7740", "renesas,tpu";
-		reg = <0xe6600000 0x148>;
-		#pwm-cells = <3>;
-	};
diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml
new file mode 100644
index 000000000000..4969a954993c
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/renesas,tpu-pwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car Timer Pulse Unit PWM Controller
+
+maintainers:
+  - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,tpu-r8a73a4   # R-Mobile APE6
+          - renesas,tpu-r8a7740   # R-Mobile A1
+          - renesas,tpu-r8a7743   # RZ/G1M
+          - renesas,tpu-r8a7744   # RZ/G1N
+          - renesas,tpu-r8a7745   # RZ/G1E
+          - renesas,tpu-r8a7790   # R-Car H2
+          - renesas,tpu-r8a7795   # R-Car H3
+          - renesas,tpu-r8a7796   # R-Car M3-W
+          - renesas,tpu-r8a77965  # R-Car M3-N
+          - renesas,tpu-r8a77970  # R-Car V3M
+          - renesas,tpu-r8a77980  # R-Car V3H
+      - const: renesas,tpu
+
+  reg:
+    # Base address and length of each memory resource used by the PWM
+    # controller hardware module.
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  '#pwm-cells':
+    # should be 3. See pwm.yaml in this directory for a description of
+    # the cells format. The only third cell flag supported by this binding is
+    # PWM_POLARITY_INVERTED.
+    const: 3
+
+  clocks:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - '#pwm-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7740-clock.h>
+
+    tpu: pwm@e6600000 {
+        compatible = "renesas,tpu-r8a7740", "renesas,tpu";
+        reg = <0xe6600000 0x148>;
+        clocks = <&mstp3_clks R8A7740_CLK_TPU0>;
+        power-domains = <&pd_a3sp>;
+        #pwm-cells = <3>;
+    };
diff --git a/Documentation/devicetree/bindings/pwm/spear-pwm.txt b/Documentation/devicetree/bindings/pwm/spear-pwm.txt
index b486de2c3fe3..95894128b62f 100644
--- a/Documentation/devicetree/bindings/pwm/spear-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/spear-pwm.txt
@@ -5,7 +5,7 @@ Required properties:
   - "st,spear320-pwm"
   - "st,spear1340-pwm"
 - reg: physical base address and length of the controller's registers
-- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+- #pwm-cells: should be 2. See pwm.yaml in this directory for a description of
   the cells format.
 
 Example:
diff --git a/Documentation/devicetree/bindings/pwm/st,stmpe-pwm.txt b/Documentation/devicetree/bindings/pwm/st,stmpe-pwm.txt
index cb209646bf13..f401316e0248 100644
--- a/Documentation/devicetree/bindings/pwm/st,stmpe-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/st,stmpe-pwm.txt
@@ -7,7 +7,7 @@ subdevices of the STMPE MFD device.
 Required properties:
 - compatible: should be:
   - "st,stmpe-pwm"
-- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+- #pwm-cells: should be 2. See pwm.yaml in this directory for a description of
   the cells format.
 
 Example:
diff --git a/Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt b/Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt
index 4e32bee11201..d97ca1964e94 100644
--- a/Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt
@@ -6,7 +6,7 @@ On TWL6030 series: PWM0 and PWM1
 
 Required properties:
 - compatible: "ti,twl4030-pwm" or "ti,twl6030-pwm"
-- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+- #pwm-cells: should be 2. See pwm.yaml in this directory for a description of
   the cells format.
 
 Example:
diff --git a/Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt b/Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt
index 9f4b46090782..31ca1b032ef0 100644
--- a/Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt
+++ b/Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt
@@ -6,7 +6,7 @@ On TWL6030 series: LED PWM (mainly used as charging indicator LED)
 
 Required properties:
 - compatible: "ti,twl4030-pwmled" or "ti,twl6030-pwmled"
-- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+- #pwm-cells: should be 2. See pwm.yaml in this directory for a description of
   the cells format.
 
 Example:
diff --git a/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt b/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt
index a76390e6df2e..4fba93ce1985 100644
--- a/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt
@@ -3,7 +3,7 @@ VIA/Wondermedia VT8500/WM8xxx series SoC PWM controller
 Required properties:
 - compatible: should be "via,vt8500-pwm"
 - reg: physical base address and length of the controller's registers
-- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+- #pwm-cells: should be 3. See pwm.yaml in this directory for a description of
   the cells format. The only third cell flag supported by this binding is
   PWM_POLARITY_INVERTED.
 - clocks: phandle to the PWM source clock
diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
index f32416968197..3dbb9cf86f15 100644
--- a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
@@ -50,6 +50,10 @@ properties:
     description: startup time in microseconds
     $ref: /schemas/types.yaml#/definitions/uint32
 
+  off-on-delay-us:
+    description: off delay time in microseconds
+    $ref: /schemas/types.yaml#/definitions/uint32
+
   enable-active-high:
     description:
       Polarity of GPIO is Active high. If this property is missing,
@@ -64,7 +68,6 @@ properties:
 
   vin-supply:
     description: Input supply phandle.
-    $ref: /schemas/types.yaml#/definitions/phandle
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/regulator/max77650-regulator.txt b/Documentation/devicetree/bindings/regulator/max77650-regulator.txt
deleted file mode 100644
index f1cbe813c30f..000000000000
--- a/Documentation/devicetree/bindings/regulator/max77650-regulator.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Regulator driver for MAX77650 PMIC from Maxim Integrated.
-
-This module is part of the MAX77650 MFD device. For more details
-see Documentation/devicetree/bindings/mfd/max77650.txt.
-
-The regulator controller is represented as a sub-node of the PMIC node
-on the device tree.
-
-The device has a single LDO regulator and a SIMO buck-boost regulator with
-three independent power rails.
-
-Required properties:
---------------------
-- compatible:		Must be "maxim,max77650-regulator"
-
-Each rail must be instantiated under the regulators subnode of the top PMIC
-node. Up to four regulators can be defined. For standard regulator properties
-refer to Documentation/devicetree/bindings/regulator/regulator.txt.
-
-Available regulator compatible strings are: "ldo", "sbb0", "sbb1", "sbb2".
-
-Example:
---------
-
-	regulators {
-		compatible = "maxim,max77650-regulator";
-
-		max77650_ldo: regulator@0 {
-			regulator-compatible = "ldo";
-			regulator-name = "max77650-ldo";
-			regulator-min-microvolt = <1350000>;
-			regulator-max-microvolt = <2937500>;
-		};
-
-		max77650_sbb0: regulator@1 {
-			regulator-compatible = "sbb0";
-			regulator-name = "max77650-sbb0";
-			regulator-min-microvolt = <800000>;
-			regulator-max-microvolt = <1587500>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/regulator/max77650-regulator.yaml b/Documentation/devicetree/bindings/regulator/max77650-regulator.yaml
new file mode 100644
index 000000000000..7d724159f890
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max77650-regulator.yaml
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/max77650-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Regulator driver for MAX77650 PMIC from Maxim Integrated.
+
+maintainers:
+  - Bartosz Golaszewski <bgolaszewski@baylibre.com>
+
+description: |
+  This module is part of the MAX77650 MFD device. For more details
+  see Documentation/devicetree/bindings/mfd/max77650.yaml.
+
+  The regulator controller is represented as a sub-node of the PMIC node
+  on the device tree.
+
+  The device has a single LDO regulator and a SIMO buck-boost regulator with
+  three independent power rails.
+
+properties:
+  compatible:
+    const: maxim,max77650-regulator
+
+patternProperties:
+  "^regulator@[0-3]$":
+    $ref: "regulator.yaml#"
+
+required:
+  - compatible
diff --git a/Documentation/devicetree/bindings/regulator/mp8859.txt b/Documentation/devicetree/bindings/regulator/mp8859.txt
new file mode 100644
index 000000000000..74ad69730989
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/mp8859.txt
@@ -0,0 +1,22 @@
+Monolithic Power Systems MP8859 voltage regulator
+
+Required properties:
+- compatible: "mps,mp8859";
+- reg: I2C slave address.
+
+Optional subnode for regulator: "mp8859_dcdc", using common regulator
+bindings given in <Documentation/devicetree/bindings/regulator/regulator.txt>.
+
+Example:
+
+	mp8859: regulator@66 {
+		compatible = "mps,mp8859";
+		reg = <0x66>;
+		dc_12v: mp8859_dcdc {
+			regulator-name = "dc_12v";
+			regulator-min-microvolt = <12000000>;
+			regulator-max-microvolt = <12000000>;
+			regulator-boot-on;
+			regulator-always-on;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml b/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml
new file mode 100644
index 000000000000..a682af0dc67e
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/mps,mpq7920.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Monolithic Power System MPQ7920 PMIC
+
+maintainers:
+  - Saravanan Sekar <sravanhome@gmail.com>
+
+properties:
+  $nodename:
+    pattern: "pmic@[0-9a-f]{1,2}"
+  compatible:
+    enum:
+      - mps,mpq7920
+
+  reg:
+    maxItems: 1
+
+  regulators:
+    type: object
+    allOf:
+      - $ref: regulator.yaml#
+    description: |
+      list of regulators provided by this controller, must be named
+      after their hardware counterparts BUCK[1-4], one LDORTC, and LDO[2-5]
+
+    properties:
+      mps,switch-freq:
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint8"
+        enum: [ 0, 1, 2, 3 ]
+        default: 2
+        description: |
+          switching frequency must be one of following corresponding value
+          1.1MHz, 1.65MHz, 2.2MHz, 2.75MHz
+
+    patternProperties:
+      "^ldo[1-4]$":
+        type: object
+        allOf:
+          - $ref: regulator.yaml#
+
+      "^ldortc$":
+        type: object
+        allOf:
+          - $ref: regulator.yaml#
+
+      "^buck[1-4]$":
+        type: object
+        allOf:
+          - $ref: regulator.yaml#
+
+        properties:
+          mps,buck-softstart:
+            allOf:
+              - $ref: "/schemas/types.yaml#/definitions/uint8"
+            enum: [ 0, 1, 2, 3 ]
+            description: |
+              defines the soft start time of this buck, must be one of the following
+              corresponding values 150us, 300us, 610us, 920us
+
+          mps,buck-phase-delay:
+            allOf:
+              - $ref: "/schemas/types.yaml#/definitions/uint8"
+            enum: [ 0, 1, 2, 3 ]
+            description: |
+              defines the phase delay of this buck, must be one of the following
+              corresponding values 0deg, 90deg, 180deg, 270deg
+
+          mps,buck-ovp-disable:
+            type: boolean
+            description: |
+              disables over voltage protection of this buck
+
+      additionalProperties: false
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - regulators
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pmic@69 {
+          compatible = "mps,mpq7920";
+          reg = <0x69>;
+
+          regulators {
+            mps,switch-freq = /bits/ 8 <1>;
+
+            buck1 {
+             regulator-name = "buck1";
+             regulator-min-microvolt = <400000>;
+             regulator-max-microvolt = <3587500>;
+             regulator-min-microamp  = <460000>;
+             regulator-max-microamp  = <7600000>;
+             regulator-boot-on;
+             mps,buck-ovp-disable;
+             mps,buck-phase-delay = /bits/ 8 <2>;
+             mps,buck-softstart = /bits/ 8 <1>;
+            };
+
+            ldo2 {
+             regulator-name = "ldo2";
+             regulator-min-microvolt = <650000>;
+             regulator-max-microvolt = <3587500>;
+            };
+         };
+       };
+     };
+...
diff --git a/Documentation/devicetree/bindings/regulator/nvidia,tegra-regulators-coupling.txt b/Documentation/devicetree/bindings/regulator/nvidia,tegra-regulators-coupling.txt
new file mode 100644
index 000000000000..4bf2dbf7c6cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/nvidia,tegra-regulators-coupling.txt
@@ -0,0 +1,65 @@
+NVIDIA Tegra Regulators Coupling
+================================
+
+NVIDIA Tegra SoC's have a mandatory voltage-coupling between regulators.
+Thus on Tegra20 there are 3 coupled regulators and on NVIDIA Tegra30
+there are 2.
+
+Tegra20 voltage coupling
+------------------------
+
+On Tegra20 SoC's there are 3 coupled regulators: CORE, RTC and CPU.
+The CORE and RTC voltages shall be in a range of 170mV from each other
+and they both shall be higher than the CPU voltage by at least 120mV.
+
+Tegra30 voltage coupling
+------------------------
+
+On Tegra30 SoC's there are 2 coupled regulators: CORE and CPU. The CORE
+and CPU voltages shall be in a range of 300mV from each other and CORE
+voltage shall be higher than the CPU by N mV, where N depends on the CPU
+voltage.
+
+Required properties:
+- nvidia,tegra-core-regulator: Boolean property that designates regulator
+  as the "Core domain" voltage regulator.
+- nvidia,tegra-rtc-regulator: Boolean property that designates regulator
+  as the "RTC domain" voltage regulator.
+- nvidia,tegra-cpu-regulator: Boolean property that designates regulator
+  as the "CPU domain" voltage regulator.
+
+Example:
+
+	pmic {
+		regulators {
+			core_vdd_reg: core {
+				regulator-name = "vdd_core";
+				regulator-min-microvolt = <950000>;
+				regulator-max-microvolt = <1300000>;
+				regulator-coupled-with = <&rtc_vdd_reg &cpu_vdd_reg>;
+				regulator-coupled-max-spread = <170000 550000>;
+
+				nvidia,tegra-core-regulator;
+			};
+
+			rtc_vdd_reg: rtc {
+				regulator-name = "vdd_rtc";
+				regulator-min-microvolt = <950000>;
+				regulator-max-microvolt = <1300000>;
+				regulator-coupled-with = <&core_vdd_reg &cpu_vdd_reg>;
+				regulator-coupled-max-spread = <170000 550000>;
+
+				nvidia,tegra-rtc-regulator;
+			};
+
+			cpu_vdd_reg: cpu {
+				regulator-name = "vdd_cpu";
+				regulator-min-microvolt = <750000>;
+				regulator-max-microvolt = <1125000>;
+				regulator-coupled-with = <&core_vdd_reg &rtc_vdd_reg>;
+				regulator-coupled-max-spread = <550000 550000>;
+
+				nvidia,tegra-cpu-regulator;
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
index bab9f71140b8..97c3e0b7611c 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
@@ -28,6 +28,8 @@ Supported regulator node names:
 	PM8150L:	smps1 - smps8, ldo1 - ldo11, bob, flash, rgb
 	PM8998:		smps1 - smps13, ldo1 - ldo28, lvs1 - lvs2
 	PMI8998:	bob
+	PM6150:         smps1 - smps5, ldo1 - ldo19
+	PM6150L:        smps1 - smps8, ldo1 - ldo11, bob
 
 ========================
 First Level Nodes - PMIC
@@ -43,6 +45,8 @@ First Level Nodes - PMIC
 		    "qcom,pm8150l-rpmh-regulators"
 		    "qcom,pm8998-rpmh-regulators"
 		    "qcom,pmi8998-rpmh-regulators"
+		    "qcom,pm6150-rpmh-regulators"
+		    "qcom,pm6150l-rpmh-regulators"
 
 - qcom,pmic-id
 	Usage:      required
diff --git a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.txt
index 45025b5b67f6..d126df043403 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.txt
@@ -22,6 +22,7 @@ Regulator nodes are identified by their compatible:
 		    "qcom,rpm-pm8841-regulators"
 		    "qcom,rpm-pm8916-regulators"
 		    "qcom,rpm-pm8941-regulators"
+		    "qcom,rpm-pm8950-regulators"
 		    "qcom,rpm-pm8994-regulators"
 		    "qcom,rpm-pm8998-regulators"
 		    "qcom,rpm-pma8084-regulators"
@@ -57,6 +58,26 @@ Regulator nodes are identified by their compatible:
 - vdd_s1-supply:
 - vdd_s2-supply:
 - vdd_s3-supply:
+- vdd_s4-supply:
+- vdd_s4-supply:
+- vdd_s5-supply:
+- vdd_s6-supply:
+- vdd_l1_l19-supply:
+- vdd_l2_l23-supply:
+- vdd_l3-supply:
+- vdd_l4_l5_l6_l7_l16-supply:
+- vdd_l8_l11_l12_l17_l22-supply:
+- vdd_l9_l10_l13_l14_l15_l18-supply:
+- vdd_l20-supply:
+- vdd_l21-supply:
+	Usage: optional (pm8950 only)
+	Value type: <phandle>
+	Definition: reference to regulator supplying the input pin, as
+		    described in the data sheet
+
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s3-supply:
 - vdd_l1_l3-supply:
 - vdd_l2_lvs1_2_3-supply:
 - vdd_l4_l11-supply:
diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
index 430b8622bda1..f5cdac8b2847 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
@@ -4,10 +4,12 @@ Qualcomm SPMI Regulators
 	Usage: required
 	Value type: <string>
 	Definition: must be one of:
+			"qcom,pm8004-regulators"
 			"qcom,pm8005-regulators"
 			"qcom,pm8841-regulators"
 			"qcom,pm8916-regulators"
 			"qcom,pm8941-regulators"
+			"qcom,pm8950-regulators"
 			"qcom,pm8994-regulators"
 			"qcom,pmi8994-regulators"
 			"qcom,pms405-regulators"
@@ -76,6 +78,26 @@ Qualcomm SPMI Regulators
 - vdd_s2-supply:
 - vdd_s3-supply:
 - vdd_s4-supply:
+- vdd_s4-supply:
+- vdd_s5-supply:
+- vdd_s6-supply:
+- vdd_l1_l19-supply:
+- vdd_l2_l23-supply:
+- vdd_l3-supply:
+- vdd_l4_l5_l6_l7_l16-supply:
+- vdd_l8_l11_l12_l17_l22-supply:
+- vdd_l9_l10_l13_l14_l15_l18-supply:
+- vdd_l20-supply:
+- vdd_l21-supply:
+	Usage: optional (pm8950 only)
+	Value type: <phandle>
+	Definition: reference to regulator supplying the input pin, as
+		    described in the data sheet
+
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s3-supply:
+- vdd_s4-supply:
 - vdd_s5-supply:
 - vdd_s6-supply:
 - vdd_s7-supply:
@@ -140,6 +162,9 @@ sub-node is identified using the node's name, with valid values listed for each
 of the PMICs below.
 
 pm8005:
+	s2, s5
+
+pm8005:
 	s1, s2, s3, s4
 
 pm8841:
diff --git a/Documentation/devicetree/bindings/regulator/regulator.yaml b/Documentation/devicetree/bindings/regulator/regulator.yaml
index 02c3043ce419..92ff2e8ad572 100644
--- a/Documentation/devicetree/bindings/regulator/regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/regulator.yaml
@@ -38,7 +38,12 @@ properties:
     type: boolean
 
   regulator-boot-on:
-    description: bootloader/firmware enabled regulator
+    description: bootloader/firmware enabled regulator.
+      It's expected that this regulator was left on by the bootloader.
+      If the bootloader didn't leave it on then OS should turn it on
+      at boot but shouldn't prevent it from being turned off later.
+      This property is intended to only be used for regulators where
+      software cannot read the state of the regulator.
     type: boolean
 
   regulator-allow-bypass:
diff --git a/Documentation/devicetree/bindings/regulator/rohm,bd71828-regulator.yaml b/Documentation/devicetree/bindings/regulator/rohm,bd71828-regulator.yaml
new file mode 100644
index 000000000000..71ce032b8cf8
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/rohm,bd71828-regulator.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/rohm,bd71828-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ROHM BD71828 Power Management Integrated Circuit regulators
+
+maintainers:
+  - Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
+
+description: |
+  This module is part of the ROHM BD71828 MFD device. For more details
+  see Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml.
+
+  The regulator controller is represented as a sub-node of the PMIC node
+  on the device tree.
+
+  Regulator nodes should be named to BUCK_<number> and LDO_<number>.
+  The valid names for BD71828 regulator nodes are
+  BUCK1, BUCK2, BUCK3, BUCK4, BUCK5, BUCK6, BUCK7
+  LDO1, LDO2, LDO3, LDO4, LDO5, LDO6, LDO7
+
+patternProperties:
+  "^LDO[1-7]$":
+    type: object
+    allOf:
+      - $ref: regulator.yaml#
+    description:
+      Properties for single LDO regulator.
+
+    properties:
+      regulator-name:
+        pattern: "^ldo[1-7]$"
+        description:
+          should be "ldo1", ..., "ldo7"
+
+  "^BUCK[1-7]$":
+    type: object
+    allOf:
+      - $ref: regulator.yaml#
+    description:
+      Properties for single BUCK regulator.
+
+    properties:
+      regulator-name:
+        pattern: "^buck[1-7]$"
+        description:
+          should be "buck1", ..., "buck7"
+
+      rohm,dvs-run-voltage:
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - minimum: 0
+            maximum: 3300000
+        description:
+          PMIC default "RUN" state voltage in uV. See below table for
+          bucks which support this. 0 means disabled.
+
+      rohm,dvs-idle-voltage:
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - minimum: 0
+            maximum: 3300000
+        description:
+          PMIC default "IDLE" state voltage in uV. See below table for
+          bucks which support this. 0 means disabled.
+
+      rohm,dvs-suspend-voltage:
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - minimum: 0
+            maximum: 3300000
+        description:
+          PMIC default "SUSPEND" state voltage in uV. See below table for
+          bucks which support this. 0 means disabled.
+
+      rohm,dvs-lpsr-voltage:
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - minimum: 0
+            maximum: 3300000
+        description:
+          PMIC default "LPSR" state voltage in uV. See below table for
+          bucks which support this. 0 means disabled.
+
+        # Supported default DVS states:
+        #     buck       |    run     |   idle    | suspend  | lpsr
+        #--------------------------------------------------------------
+        # 1, 2, 6, and 7 | supported  | supported | supported (*)
+        #--------------------------------------------------------------
+        # 3, 4, and 5    |                    supported (**)
+        #--------------------------------------------------------------
+        #
+        #(*)  LPSR and SUSPEND states use same voltage but both states have own
+        #     enable /
+        #     disable settings. Voltage 0 can be specified for a state to make
+        #     regulator disabled on that state.
+        #
+        #(**) All states use same voltage but have own enable / disable
+        #     settings. Voltage 0 can be specified for a state to make
+        #     regulator disabled on that state.
+
+    required:
+      - regulator-name
+  additionalProperties: false
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-booster.txt b/Documentation/devicetree/bindings/regulator/st,stm32-booster.txt
deleted file mode 100644
index 479ad4c8758e..000000000000
--- a/Documentation/devicetree/bindings/regulator/st,stm32-booster.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-STM32 BOOSTER - Booster for ADC analog input switches
-
-Some STM32 devices embed a 3.3V booster supplied by Vdda, that can be used
-to supply ADC analog input switches.
-
-Required properties:
-- compatible: Should be one of:
-  "st,stm32h7-booster"
-  "st,stm32mp1-booster"
-- st,syscfg: Phandle to system configuration controller.
-- vdda-supply: Phandle to the vdda input analog voltage.
-
-Example:
-	booster: regulator-booster {
-		compatible = "st,stm32mp1-booster";
-		st,syscfg = <&syscfg>;
-		vdda-supply = <&vdda>;
-	};
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml b/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml
new file mode 100644
index 000000000000..64f1183ce841
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/st,stm32-booster.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 booster for ADC analog input switches bindings
+
+maintainers:
+  - Fabrice Gasnier <fabrice.gasnier@st.com>
+
+description: |
+  Some STM32 devices embed a 3.3V booster supplied by Vdda, that can be used
+  to supply ADC analog input switches.
+
+allOf:
+  - $ref: "regulator.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - st,stm32h7-booster
+      - st,stm32mp1-booster
+
+  st,syscfg:
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/phandle-array"
+    description: phandle to system configuration controller.
+
+  vdda-supply:
+    description: phandle to the vdda input analog voltage.
+
+required:
+  - compatible
+  - st,syscfg
+  - vdda-supply
+
+examples:
+  - |
+    regulator-booster {
+      compatible = "st,stm32mp1-booster";
+      st,syscfg = <&syscfg>;
+      vdda-supply = <&vdda>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.txt b/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.txt
deleted file mode 100644
index 5ddb8500a929..000000000000
--- a/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-STM32 VREFBUF - Voltage reference buffer
-
-Some STM32 devices embed a voltage reference buffer which can be used as
-voltage reference for ADCs, DACs and also as voltage reference for external
-components through the dedicated VREF+ pin.
-
-Required properties:
-- compatible:		Must be "st,stm32-vrefbuf".
-- reg:			Offset and length of VREFBUF register set.
-- clocks:		Must contain an entry for peripheral clock.
-
-Example:
-	vrefbuf: regulator@58003c00 {
-		compatible = "st,stm32-vrefbuf";
-		reg = <0x58003C00 0x8>;
-		clocks = <&rcc VREF_CK>;
-		regulator-min-microvolt = <1500000>;
-		regulator-max-microvolt = <2500000>;
-		vdda-supply = <&vdda>;
-	};
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml b/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml
new file mode 100644
index 000000000000..33cdaeb25aee
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/st,stm32-vrefbuf.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Voltage reference buffer bindings
+
+description: |
+  Some STM32 devices embed a voltage reference buffer which can be used as
+  voltage reference for ADCs, DACs and also as voltage reference for external
+  components through the dedicated VREF+ pin.
+
+maintainers:
+  - Fabrice Gasnier <fabrice.gasnier@st.com>
+
+allOf:
+  - $ref: "regulator.yaml#"
+
+properties:
+  compatible:
+    const: st,stm32-vrefbuf
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  vdda-supply:
+    description: phandle to the vdda input analog voltage.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - vdda-supply
+
+examples:
+  - |
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    vrefbuf@50025000 {
+      compatible = "st,stm32-vrefbuf";
+      reg = <0x50025000 0x8>;
+      regulator-min-microvolt = <1500000>;
+      regulator-max-microvolt = <2500000>;
+      clocks = <&rcc VREF>;
+      vdda-supply = <&vdda>;
+    };
+
+...
+
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.txt b/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.txt
deleted file mode 100644
index e372dd3f0c8a..000000000000
--- a/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-STM32MP1 PWR Regulators
------------------------
-
-Available Regulators in STM32MP1 PWR block are:
-  - reg11 for regulator 1V1
-  - reg18 for regulator 1V8
-  - usb33 for the swtich USB3V3
-
-Required properties:
-- compatible: Must be "st,stm32mp1,pwr-reg"
-- list of child nodes that specify the regulator reg11, reg18 or usb33
-  initialization data for defined regulators. The definition for each of
-  these nodes is defined using the standard binding for regulators found at
-  Documentation/devicetree/bindings/regulator/regulator.txt.
-- vdd-supply: phandle to the parent supply/regulator node for vdd input
-- vdd_3v3_usbfs-supply: phandle to the parent supply/regulator node for usb33
-
-Example:
-
-pwr_regulators: pwr@50001000 {
-	compatible = "st,stm32mp1,pwr-reg";
-	reg = <0x50001000 0x10>;
-	vdd-supply = <&vdd>;
-	vdd_3v3_usbfs-supply = <&vdd_usb>;
-
-	reg11: reg11 {
-		regulator-name = "reg11";
-		regulator-min-microvolt = <1100000>;
-		regulator-max-microvolt = <1100000>;
-	};
-
-	reg18: reg18 {
-		regulator-name = "reg18";
-		regulator-min-microvolt = <1800000>;
-		regulator-max-microvolt = <1800000>;
-	};
-
-	usb33: usb33 {
-		regulator-name = "usb33";
-		regulator-min-microvolt = <3300000>;
-		regulator-max-microvolt = <3300000>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml b/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml
new file mode 100644
index 000000000000..8d8f38fe85dc
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/st,stm32mp1-pwr-reg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STM32MP1 PWR voltage regulators
+
+maintainers:
+  - Pascal Paillet <p.paillet@st.com>
+
+properties:
+  compatible:
+    const: st,stm32mp1,pwr-reg
+
+  reg:
+    maxItems: 1
+
+  vdd-supply:
+    description: Input supply phandle(s) for vdd input
+
+  vdd_3v3_usbfs-supply:
+    description: Input supply phandle(s) for vdd_3v3_usbfs input
+
+patternProperties:
+  "^(reg11|reg18|usb33)$":
+    type: object
+
+    allOf:
+      - $ref: "regulator.yaml#"
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    pwr@50001000 {
+      compatible = "st,stm32mp1,pwr-reg";
+      reg = <0x50001000 0x10>;
+      vdd-supply = <&vdd>;
+      vdd_3v3_usbfs-supply = <&vdd_usb>;
+
+      reg11 {
+        regulator-name = "reg11";
+        regulator-min-microvolt = <1100000>;
+        regulator-max-microvolt = <1100000>;
+      };
+
+      reg18 {
+        regulator-name = "reg18";
+        regulator-min-microvolt = <1800000>;
+        regulator-max-microvolt = <1800000>;
+      };
+
+      usb33 {
+        regulator-name = "usb33";
+        regulator-min-microvolt = <3300000>;
+        regulator-max-microvolt = <3300000>;
+      };
+    };
+...
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt b/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
index 41ca5df5be5a..c416746f93cf 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
@@ -12,6 +12,7 @@ on the Qualcomm Hexagon core.
 		    "qcom,msm8916-mss-pil",
 		    "qcom,msm8974-mss-pil"
 		    "qcom,msm8996-mss-pil"
+		    "qcom,msm8998-mss-pil"
 		    "qcom,sdm845-mss-pil"
 
 - reg:
@@ -41,6 +42,7 @@ on the Qualcomm Hexagon core.
 	qcom,msm8974-mss-pil:
 		    must be "wdog", "fatal", "ready", "handover", "stop-ack"
 	qcom,msm8996-mss-pil:
+	qcom,msm8998-mss-pil:
 	qcom,sdm845-mss-pil:
 		    must be "wdog", "fatal", "ready", "handover", "stop-ack",
 		    "shutdown-ack"
@@ -70,6 +72,9 @@ on the Qualcomm Hexagon core.
 	qcom,msm8996-mss-pil:
 		    must be "iface", "bus", "mem", "xo", "gpll0_mss",
 		    "snoc_axi", "mnoc_axi", "pnoc", "qdss"
+	qcom,msm8998-mss-pil:
+		    must be "iface", "bus", "mem", "xo", "gpll0_mss",
+		    "snoc_axi", "mnoc_axi", "qdss"
 	qcom,sdm845-mss-pil:
 		    must be "iface", "bus", "mem", "xo", "gpll0_mss",
 		    "snoc_axi", "mnoc_axi", "prng"
@@ -137,6 +142,7 @@ For the compatible string below the following supplies are required:
 	qcom,msm8974-mss-pil:
 		    no power-domain names required
 	qcom,msm8996-mss-pil:
+	qcom,msm8998-mss-pil:
 		    must be "cx", "mx"
 	qcom,sdm845-mss-pil:
 		    must be "cx", "mx", "mss", "load_state"
diff --git a/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
new file mode 100644
index 000000000000..c0d83865e933
--- /dev/null
+++ b/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/remoteproc/st,stm32-rproc.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: STMicroelectronics STM32 remote processor controller bindings
+
+description:
+  This document defines the binding for the remoteproc component that loads and
+  boots firmwares on the ST32MP family chipset.
+
+maintainers:
+  - Fabien Dessenne <fabien.dessenne@st.com>
+  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+
+properties:
+  compatible:
+    const: st,stm32mp1-m4
+
+  reg:
+    description:
+      Address ranges of the RETRAM and MCU SRAM memories used by the remote
+      processor.
+    maxItems: 3
+
+  resets:
+     maxItems: 1
+
+  st,syscfg-holdboot:
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/phandle-array"
+    description: remote processor reset hold boot
+      - Phandle of syscon block.
+      - The offset of the hold boot setting register.
+      - The field mask of the hold boot.
+    maxItems: 1
+
+  st,syscfg-tz:
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/phandle-array"
+    description:
+      Reference to the system configuration which holds the RCC trust zone mode
+      - Phandle of syscon block.
+      - The offset of the RCC trust zone mode register.
+      - The field mask of the RCC trust zone mode.
+    maxItems: 1
+
+  interrupts:
+    description: Should contain the WWDG1 watchdog reset interrupt
+    maxItems: 1
+
+  wakeup-source: true
+
+  mboxes:
+    description:
+      This property is required only if the rpmsg/virtio functionality is used.
+    items:
+      - description: |
+          A channel (a) used to communicate through virtqueues with the
+          remote proc.
+          Bi-directional channel:
+            - from local to remote = send message
+            - from remote to local = send message ack
+      - description: |
+          A channel (b) working the opposite direction of channel (a)
+      - description: |
+          A channel (c) used by the local proc to notify the remote proc that it
+          is about to be shut down.
+          Unidirectional channel:
+            - from local to remote, where ACK from the remote means that it is
+              ready for shutdown
+    minItems: 1
+    maxItems: 3
+
+  mbox-names:
+    items:
+      - const: vq0
+      - const: vq1
+      - const: shutdown
+    minItems: 1
+    maxItems: 3
+
+  memory-region:
+    description:
+      List of phandles to the reserved memory regions associated with the
+      remoteproc device. This is variable and describes the memories shared with
+      the remote processor (e.g. remoteproc firmware and carveouts, rpmsg
+      vrings, ...).
+      (see ../reserved-memory/reserved-memory.txt)
+
+  st,syscfg-pdds:
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/phandle-array"
+    description: |
+      Reference to the system configuration which holds the remote
+        1st cell: phandle to syscon block
+        2nd cell: register offset containing the deep sleep setting
+        3rd cell: register bitmask for the deep sleep bit
+    maxItems: 1
+
+  st,auto-boot:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      If defined, when remoteproc is probed, it loads the default firmware and
+      starts the remote processor.
+
+required:
+  - compatible
+  - reg
+  - resets
+  - st,syscfg-holdboot
+  - st,syscfg-tz
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    m4_rproc: m4@10000000 {
+      compatible = "st,stm32mp1-m4";
+      reg = <0x10000000 0x40000>,
+            <0x30000000 0x40000>,
+            <0x38000000 0x10000>;
+      resets = <&rcc MCU_R>;
+      st,syscfg-holdboot = <&rcc 0x10C 0x1>;
+      st,syscfg-tz = <&rcc 0x000 0x1>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/remoteproc/stm32-rproc.txt b/Documentation/devicetree/bindings/remoteproc/stm32-rproc.txt
deleted file mode 100644
index 5fa915a4b736..000000000000
--- a/Documentation/devicetree/bindings/remoteproc/stm32-rproc.txt
+++ /dev/null
@@ -1,63 +0,0 @@
-STMicroelectronics STM32 Remoteproc
------------------------------------
-This document defines the binding for the remoteproc component that loads and
-boots firmwares on the ST32MP family chipset.
-
-Required properties:
-- compatible:	Must be "st,stm32mp1-m4"
-- reg:		Address ranges of the RETRAM and MCU SRAM memories used by the
-		remote processor.
-- resets:	Reference to a reset controller asserting the remote processor.
-- st,syscfg-holdboot: Reference to the system configuration which holds the
-		remote processor reset hold boot
-	1st cell: phandle of syscon block
-	2nd cell: register offset containing the hold boot setting
-	3rd cell: register bitmask for the hold boot field
-- st,syscfg-tz: Reference to the system configuration which holds the RCC trust
-		zone mode
-	1st cell: phandle to syscon block
-	2nd cell: register offset containing the RCC trust zone mode setting
-	3rd cell: register bitmask for the RCC trust zone mode bit
-
-Optional properties:
-- interrupts:	Should contain the watchdog interrupt
-- mboxes:	This property is required only if the rpmsg/virtio functionality
-		is used. List of phandle and mailbox channel specifiers:
-		- a channel (a) used to communicate through virtqueues with the
-		  remote proc.
-		  Bi-directional channel:
-		      - from local to remote = send message
-		      - from remote to local = send message ack
-		- a channel (b) working the opposite direction of channel (a)
-		- a channel (c) used by the local proc to notify the remote proc
-		  that it is about to be shut down.
-		  Unidirectional channel:
-		      - from local to remote, where ACK from the remote means
-		        that it is ready for shutdown
-- mbox-names:	This property is required if the mboxes property is used.
-		- must be "vq0" for channel (a)
-		- must be "vq1" for channel (b)
-		- must be "shutdown" for channel (c)
-- memory-region: List of phandles to the reserved memory regions associated with
-		the remoteproc device. This is variable and describes the
-		memories shared with the remote processor (eg: remoteproc
-		firmware and carveouts, rpmsg vrings, ...).
-		(see ../reserved-memory/reserved-memory.txt)
-- st,syscfg-pdds: Reference to the system configuration which holds the remote
-		processor deep sleep setting
-	1st cell: phandle to syscon block
-	2nd cell: register offset containing the deep sleep setting
-	3rd cell: register bitmask for the deep sleep bit
-- st,auto-boot:	If defined, when remoteproc is probed, it loads the default
-		firmware and starts the remote processor.
-
-Example:
-	m4_rproc: m4@10000000 {
-		compatible = "st,stm32mp1-m4";
-		reg = <0x10000000 0x40000>,
-		      <0x30000000 0x40000>,
-		      <0x38000000 0x10000>;
-		resets = <&rcc MCU_R>;
-		st,syscfg-holdboot = <&rcc 0x10C 0x1>;
-		st,syscfg-tz = <&rcc 0x000 0x1>;
-	};
diff --git a/Documentation/devicetree/bindings/reset/allwinner,sun6i-a31-clock-reset.yaml b/Documentation/devicetree/bindings/reset/allwinner,sun6i-a31-clock-reset.yaml
new file mode 100644
index 000000000000..001c0d2a8c1f
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/allwinner,sun6i-a31-clock-reset.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/reset/allwinner,sun6i-a31-clock-reset.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 Peripheral Reset Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - allwinner,sun6i-a31-ahb1-reset
+          - allwinner,sun6i-a31-clock-reset
+
+  # The PRCM on the A31 and A23 will have the reg property missing,
+  # since it's set at the upper level node, and will be validated by
+  # PRCM's schema. Make sure we only validate standalone nodes.
+  required:
+    - compatible
+    - reg
+
+properties:
+  "#reset-cells":
+    const: 1
+    description: >
+      This additional argument passed to that reset controller is the
+      offset of the bit controlling this particular reset line in the
+      register.
+
+  compatible:
+    enum:
+      - allwinner,sun6i-a31-ahb1-reset
+      - allwinner,sun6i-a31-clock-reset
+
+  reg:
+    maxItems: 1
+
+required:
+  - "#reset-cells"
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    ahb1_rst: reset@1c202c0 {
+        #reset-cells = <1>;
+        compatible = "allwinner,sun6i-a31-ahb1-reset";
+        reg = <0x01c202c0 0xc>;
+    };
+
+  - |
+    apbs_rst: reset@80014b0 {
+        #reset-cells = <1>;
+        compatible = "allwinner,sun6i-a31-clock-reset";
+        reg = <0x080014b0 0x4>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/reset/allwinner,sunxi-clock-reset.txt b/Documentation/devicetree/bindings/reset/allwinner,sunxi-clock-reset.txt
deleted file mode 100644
index 4ca66c96fe97..000000000000
--- a/Documentation/devicetree/bindings/reset/allwinner,sunxi-clock-reset.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-Allwinner sunxi Peripheral Reset Controller
-===========================================
-
-Please also refer to reset.txt in this directory for common reset
-controller binding usage.
-
-Required properties:
-- compatible: Should be one of the following:
-  "allwinner,sun6i-a31-ahb1-reset"
-  "allwinner,sun6i-a31-clock-reset"
-- reg: should be register base and length as documented in the
-  datasheet
-- #reset-cells: 1, see below
-
-example:
-
-ahb1_rst: reset@1c202c0 {
-	#reset-cells = <1>;
-	compatible = "allwinner,sun6i-a31-ahb1-reset";
-	reg = <0x01c202c0 0xc>;
-};
diff --git a/Documentation/devicetree/bindings/reset/amlogic,meson-axg-audio-arb.txt b/Documentation/devicetree/bindings/reset/amlogic,meson-axg-audio-arb.txt
index 26e542eb96df..43e580ef64ba 100644
--- a/Documentation/devicetree/bindings/reset/amlogic,meson-axg-audio-arb.txt
+++ b/Documentation/devicetree/bindings/reset/amlogic,meson-axg-audio-arb.txt
@@ -4,7 +4,8 @@ The Amlogic Audio ARB is a simple device which enables or
 disables the access of Audio FIFOs to DDR on AXG based SoC.
 
 Required properties:
-- compatible: 'amlogic,meson-axg-audio-arb'
+- compatible: 'amlogic,meson-axg-audio-arb' or
+	      'amlogic,meson-sm1-audio-arb'
 - reg: physical base address of the controller and length of memory
        mapped region.
 - clocks: phandle to the fifo peripheral clock provided by the audio
diff --git a/Documentation/devicetree/bindings/reset/amlogic,meson-reset.yaml b/Documentation/devicetree/bindings/reset/amlogic,meson-reset.yaml
index 00917d868d58..b3f57d81f007 100644
--- a/Documentation/devicetree/bindings/reset/amlogic,meson-reset.yaml
+++ b/Documentation/devicetree/bindings/reset/amlogic,meson-reset.yaml
@@ -16,6 +16,7 @@ properties:
       - amlogic,meson8b-reset # Reset Controller on Meson8b and compatible SoCs
       - amlogic,meson-gxbb-reset # Reset Controller on GXBB and compatible SoCs
       - amlogic,meson-axg-reset # Reset Controller on AXG and compatible SoCs
+      - amlogic,meson-a1-reset # Reset Controller on A1 and compatible SoCs
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt b/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt
index 6e5341b4f891..ee59409640f2 100644
--- a/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt
+++ b/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt
@@ -22,6 +22,6 @@ Example:
 	};
 
 	&ethernet_switch {
-		resets = <&reset>;
+		resets = <&reset 26>;
 		reset-names = "switch";
 	};
diff --git a/Documentation/devicetree/bindings/reset/qcom,aoss-reset.txt b/Documentation/devicetree/bindings/reset/qcom,aoss-reset.txt
deleted file mode 100644
index 510c748656ec..000000000000
--- a/Documentation/devicetree/bindings/reset/qcom,aoss-reset.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-Qualcomm AOSS Reset Controller
-======================================
-
-This binding describes a reset-controller found on AOSS-CC (always on subsystem)
-for Qualcomm SDM845 SoCs.
-
-Required properties:
-- compatible:
-	Usage: required
-	Value type: <string>
-	Definition: must be:
-		    "qcom,sdm845-aoss-cc"
-
-- reg:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: must specify the base address and size of the register
-	            space.
-
-- #reset-cells:
-	Usage: required
-	Value type: <uint>
-	Definition: must be 1; cell entry represents the reset index.
-
-Example:
-
-aoss_reset: reset-controller@c2a0000 {
-	compatible = "qcom,sdm845-aoss-cc";
-	reg = <0xc2a0000 0x31000>;
-	#reset-cells = <1>;
-};
-
-Specifying reset lines connected to IP modules
-==============================================
-
-Device nodes that need access to reset lines should
-specify them as a reset phandle in their corresponding node as
-specified in reset.txt.
-
-For list of all valid reset indicies see
-<dt-bindings/reset/qcom,sdm845-aoss.h>
-
-Example:
-
-modem-pil@4080000 {
-	...
-
-	resets = <&aoss_reset AOSS_CC_MSS_RESTART>;
-	reset-names = "mss_restart";
-
-	...
-};
diff --git a/Documentation/devicetree/bindings/reset/qcom,aoss-reset.yaml b/Documentation/devicetree/bindings/reset/qcom,aoss-reset.yaml
new file mode 100644
index 000000000000..e2d85a1e1d63
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/qcom,aoss-reset.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/reset/qcom,aoss-reset.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm AOSS Reset Controller
+
+maintainers:
+  - Sibi Sankar <sibis@codeaurora.org>
+
+description:
+  The bindings describe the reset-controller found on AOSS-CC (always on
+  subsystem) for Qualcomm Technologies Inc SoCs.
+
+properties:
+  compatible:
+    oneOf:
+      - description: on SC7180 SoCs the following compatibles must be specified
+        items:
+          - const: "qcom,sc7180-aoss-cc"
+          - const: "qcom,sdm845-aoss-cc"
+
+      - description: on SDM845 SoCs the following compatibles must be specified
+        items:
+          - const: "qcom,sdm845-aoss-cc"
+
+  reg:
+    maxItems: 1
+
+  '#reset-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    aoss_reset: reset-controller@c2a0000 {
+      compatible = "qcom,sdm845-aoss-cc";
+      reg = <0xc2a0000 0x31000>;
+      #reset-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt b/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt
deleted file mode 100644
index a62a492843e7..000000000000
--- a/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-PDC Global
-======================================
-
-This binding describes a reset-controller found on PDC-Global (Power Domain
-Controller) block for Qualcomm Technologies Inc SDM845 SoCs.
-
-Required properties:
-- compatible:
-	Usage: required
-	Value type: <string>
-	Definition: must be:
-		    "qcom,sdm845-pdc-global"
-
-- reg:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: must specify the base address and size of the register
-	            space.
-
-- #reset-cells:
-	Usage: required
-	Value type: <uint>
-	Definition: must be 1; cell entry represents the reset index.
-
-Example:
-
-pdc_reset: reset-controller@b2e0000 {
-	compatible = "qcom,sdm845-pdc-global";
-	reg = <0xb2e0000 0x20000>;
-	#reset-cells = <1>;
-};
-
-PDC reset clients
-======================================
-
-Device nodes that need access to reset lines should
-specify them as a reset phandle in their corresponding node as
-specified in reset.txt.
-
-For a list of all valid reset indices see
-<dt-bindings/reset/qcom,sdm845-pdc.h>
-
-Example:
-
-modem-pil@4080000 {
-	...
-
-	resets = <&pdc_reset PDC_MODEM_SYNC_RESET>;
-	reset-names = "pdc_reset";
-
-	...
-};
diff --git a/Documentation/devicetree/bindings/reset/qcom,pdc-global.yaml b/Documentation/devicetree/bindings/reset/qcom,pdc-global.yaml
new file mode 100644
index 000000000000..d7d8cec9419f
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/qcom,pdc-global.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/reset/qcom,pdc-global.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm PDC Global
+
+maintainers:
+  - Sibi Sankar <sibis@codeaurora.org>
+
+description:
+  The bindings describes the reset-controller found on PDC-Global (Power Domain
+  Controller) block for Qualcomm Technologies Inc SoCs.
+
+properties:
+  compatible:
+    oneOf:
+      - description: on SC7180 SoCs the following compatibles must be specified
+        items:
+          - const: "qcom,sc7180-pdc-global"
+          - const: "qcom,sdm845-pdc-global"
+
+      - description: on SDM845 SoCs the following compatibles must be specified
+        items:
+          - const: "qcom,sdm845-pdc-global"
+
+  reg:
+    maxItems: 1
+
+  '#reset-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    pdc_reset: reset-controller@b2e0000 {
+      compatible = "qcom,sdm845-pdc-global";
+      reg = <0xb2e0000 0x20000>;
+      #reset-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/reset/renesas,rst.txt b/Documentation/devicetree/bindings/reset/renesas,rst.txt
index b03c48a1150e..de7f06ccd003 100644
--- a/Documentation/devicetree/bindings/reset/renesas,rst.txt
+++ b/Documentation/devicetree/bindings/reset/renesas,rst.txt
@@ -20,6 +20,7 @@ Required properties:
 		  - "renesas,r8a7745-rst" (RZ/G1E)
 		  - "renesas,r8a77470-rst" (RZ/G1C)
 		  - "renesas,r8a774a1-rst" (RZ/G2M)
+		  - "renesas,r8a774b1-rst" (RZ/G2N)
 		  - "renesas,r8a774c0-rst" (RZ/G2E)
 		  - "renesas,r8a7778-reset-wdt" (R-Car M1A)
 		  - "renesas,r8a7779-reset-wdt" (R-Car H1)
@@ -30,6 +31,7 @@ Required properties:
 		  - "renesas,r8a7794-rst" (R-Car E2)
 		  - "renesas,r8a7795-rst" (R-Car H3)
 		  - "renesas,r8a7796-rst" (R-Car M3-W)
+		  - "renesas,r8a77961-rst" (R-Car M3-W+)
 		  - "renesas,r8a77965-rst" (R-Car M3-N)
 		  - "renesas,r8a77970-rst" (R-Car V3M)
 		  - "renesas,r8a77980-rst" (R-Car V3H)
diff --git a/Documentation/devicetree/bindings/reset/uniphier-reset.txt b/Documentation/devicetree/bindings/reset/uniphier-reset.txt
index ea005177d20a..e320a8cc9e4d 100644
--- a/Documentation/devicetree/bindings/reset/uniphier-reset.txt
+++ b/Documentation/devicetree/bindings/reset/uniphier-reset.txt
@@ -130,6 +130,7 @@ this layer. These clocks and resets should be described in each property.
 Required properties:
 - compatible: Should be
     "socionext,uniphier-pro4-usb3-reset" - for Pro4 SoC USB3
+    "socionext,uniphier-pro5-usb3-reset" - for Pro5 SoC USB3
     "socionext,uniphier-pxs2-usb3-reset" - for PXs2 SoC USB3
     "socionext,uniphier-ld20-usb3-reset" - for LD20 SoC USB3
     "socionext,uniphier-pxs3-usb3-reset" - for PXs3 SoC USB3
@@ -141,12 +142,12 @@ Required properties:
 - clocks: A list of phandles to the clock gate for the glue layer.
 	According to the clock-names, appropriate clocks are required.
 - clock-names: Should contain
-    "gio", "link" - for Pro4 SoC
+    "gio", "link" - for Pro4 and Pro5 SoCs
     "link"        - for others
 - resets: A list of phandles to the reset control for the glue layer.
 	According to the reset-names, appropriate resets are required.
 - reset-names: Should contain
-    "gio", "link" - for Pro4 SoC
+    "gio", "link" - for Pro4 and Pro5 SoCs
     "link"        - for others
 
 Example:
diff --git a/Documentation/devicetree/bindings/rng/atmel-trng.txt b/Documentation/devicetree/bindings/rng/atmel-trng.txt
index 4ac5aaa2d024..3900ee4f3532 100644
--- a/Documentation/devicetree/bindings/rng/atmel-trng.txt
+++ b/Documentation/devicetree/bindings/rng/atmel-trng.txt
@@ -1,7 +1,7 @@
 Atmel TRNG (True Random Number Generator) block
 
 Required properties:
-- compatible : Should be "atmel,at91sam9g45-trng"
+- compatible : Should be "atmel,at91sam9g45-trng" or "microchip,sam9x60-trng"
 - reg : Offset and length of the register set of this block
 - interrupts : the interrupt number for the TRNG block
 - clocks: should contain the TRNG clk source
diff --git a/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt b/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt
index c223e54452da..802523196ee5 100644
--- a/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt
+++ b/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt
@@ -2,6 +2,7 @@ HWRNG support for the iproc-rng200 driver
 
 Required properties:
 - compatible : Must be one of:
+	       "brcm,bcm2711-rng200"
 	       "brcm,bcm7211-rng200"
 	       "brcm,bcm7278-rng200"
 	       "brcm,iproc-rng200"
diff --git a/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.txt b/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.txt
new file mode 100644
index 000000000000..65c04172fc8c
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.txt
@@ -0,0 +1,12 @@
+NPCM SoC Random Number Generator
+
+Required properties:
+- compatible  : "nuvoton,npcm750-rng" for the NPCM7XX BMC.
+- reg         : Specifies physical base address and size of the registers.
+
+Example:
+
+rng: rng@f000b000 {
+	compatible = "nuvoton,npcm750-rng";
+	reg = <0xf000b000 0x8>;
+};
diff --git a/Documentation/devicetree/bindings/rng/omap3_rom_rng.txt b/Documentation/devicetree/bindings/rng/omap3_rom_rng.txt
new file mode 100644
index 000000000000..f315c9723bd2
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/omap3_rom_rng.txt
@@ -0,0 +1,27 @@
+OMAP ROM RNG driver binding
+
+Secure SoCs may provide RNG via secure ROM calls like Nokia N900 does. The
+implementation can depend on the SoC secure ROM used.
+
+- compatible:
+	Usage: required
+	Value type: <string>
+	Definition: must be "nokia,n900-rom-rng"
+
+- clocks:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: reference to the the RNG interface clock
+
+- clock-names:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "ick"
+
+Example:
+
+	rom_rng: rng {
+		compatible = "nokia,n900-rom-rng";
+		clocks = <&rng_ick>;
+		clock-names = "ick";
+	};
diff --git a/Documentation/devicetree/bindings/rng/samsung,exynos4-rng.txt b/Documentation/devicetree/bindings/rng/samsung,exynos4-rng.txt
deleted file mode 100644
index a13fbdb4bd88..000000000000
--- a/Documentation/devicetree/bindings/rng/samsung,exynos4-rng.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-Exynos Pseudo Random Number Generator
-
-Required properties:
-
-- compatible  : One of:
-                - "samsung,exynos4-rng" for Exynos4210 and Exynos4412
-                - "samsung,exynos5250-prng" for Exynos5250+
-- reg         : Specifies base physical address and size of the registers map.
-- clocks      : Phandle to clock-controller plus clock-specifier pair.
-- clock-names : "secss" as a clock name.
-
-Example:
-
-	rng@10830400 {
-		compatible = "samsung,exynos4-rng";
-		reg = <0x10830400 0x200>;
-		clocks = <&clock CLK_SSS>;
-		clock-names = "secss";
-	};
diff --git a/Documentation/devicetree/bindings/rng/samsung,exynos4-rng.yaml b/Documentation/devicetree/bindings/rng/samsung,exynos4-rng.yaml
new file mode 100644
index 000000000000..3362cb1213c0
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/samsung,exynos4-rng.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rng/samsung,exynos4-rng.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC Pseudo Random Number Generator
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+properties:
+  compatible:
+    enum:
+      - samsung,exynos4-rng                   # for Exynos4210 and Exynos4412
+      - samsung,exynos5250-prng               # for Exynos5250+
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: secss
+
+required:
+  - compatible
+  - reg
+  - clock-names
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos4.h>
+
+    rng@10830400 {
+        compatible = "samsung,exynos4-rng";
+        reg = <0x10830400 0x200>;
+        clocks = <&clock CLK_SSS>;
+        clock-names = "secss";
+    };
diff --git a/Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.txt b/Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.txt
new file mode 100644
index 000000000000..5a613a4ec780
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.txt
@@ -0,0 +1,17 @@
+Exynos True Random Number Generator
+
+Required properties:
+
+- compatible  : Should be "samsung,exynos5250-trng".
+- reg         : Specifies base physical address and size of the registers map.
+- clocks      : Phandle to clock-controller plus clock-specifier pair.
+- clock-names : "secss" as a clock name.
+
+Example:
+
+	rng@10830600 {
+		compatible = "samsung,exynos5250-trng";
+		reg = <0x10830600 0x100>;
+		clocks = <&clock CLK_SSS>;
+		clock-names = "secss";
+	};
diff --git a/Documentation/devicetree/bindings/rng/st,stm32-rng.txt b/Documentation/devicetree/bindings/rng/st,stm32-rng.txt
deleted file mode 100644
index 1dfa7d51e006..000000000000
--- a/Documentation/devicetree/bindings/rng/st,stm32-rng.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-STMicroelectronics STM32 HW RNG
-===============================
-
-The STM32 hardware random number generator is a simple fixed purpose IP and
-is fully separated from other crypto functions.
-
-Required properties:
-
-- compatible : Should be "st,stm32-rng"
-- reg : Should be register base and length as documented in the datasheet
-- interrupts : The designated IRQ line for the RNG
-- clocks : The clock needed to enable the RNG
-
-Optional properties:
-- resets : The reset to properly start RNG
-- clock-error-detect : Enable the clock detection management
-
-Example:
-
-	rng: rng@50060800 {
-		compatible = "st,stm32-rng";
-		reg = <0x50060800 0x400>;
-		interrupts = <80>;
-		clocks = <&rcc 0 38>;
-	};
diff --git a/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml b/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml
new file mode 100644
index 000000000000..82bb2e97e889
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rng/st,stm32-rng.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 RNG bindings
+
+description: |
+  The STM32 hardware random number generator is a simple fixed purpose
+  IP and is fully separated from other crypto functions.
+
+maintainers:
+  - Lionel Debieve <lionel.debieve@st.com>
+
+properties:
+  compatible:
+    const: st,stm32-rng
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  clock-error-detect:
+    description: If set enable the clock detection management
+
+required:
+  - compatible
+  - reg
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    rng@54003000 {
+      compatible = "st,stm32-rng";
+      reg = <0x54003000 0x400>;
+      clocks = <&rcc RNG1_K>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml b/Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml
index 46d69c32b89b..478b0234e8fa 100644
--- a/Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml
@@ -11,7 +11,7 @@ allOf:
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
index d7a57ec4a640..37c2a601c3fa 100644
--- a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
@@ -8,7 +8,7 @@ title: Allwinner A31 RTC Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#clock-cells":
diff --git a/Documentation/devicetree/bindings/rtc/renesas,sh-rtc.yaml b/Documentation/devicetree/bindings/rtc/renesas,sh-rtc.yaml
new file mode 100644
index 000000000000..dcff573cbdb1
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/renesas,sh-rtc.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/renesas,sh-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Real Time Clock for Renesas SH and ARM SoCs
+
+maintainers:
+  - Chris Brandt <chris.brandt@renesas.com>
+  - Geert Uytterhoeven <geert+renesas@glider.be>
+
+properties:
+  compatible:
+    items:
+      - const: renesas,r7s72100-rtc  # RZ/A1H
+      - const: renesas,sh-rtc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 3
+
+  interrupt-names:
+    items:
+      - const: alarm
+      - const: period
+      - const: carry
+
+  clocks:
+    # The functional clock source for the RTC controller must be listed
+    # first (if it exists). Additionally, potential clock counting sources
+    # are to be listed.
+    minItems: 1
+    maxItems: 4
+
+  clock-names:
+    # The functional clock must be labeled as "fck". Other clocks
+    # may be named in accordance to the SoC hardware manuals.
+    minItems: 1
+    maxItems: 4
+    items:
+      enum: [ fck, rtc_x1, rtc_x3, extal ]
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    #include <dt-bindings/clock/r7s72100-clock.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    rtc: rtc@fcff1000 {
+        compatible = "renesas,r7s72100-rtc", "renesas,sh-rtc";
+        reg = <0xfcff1000 0x2e>;
+        interrupts = <GIC_SPI 276 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 277 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 278 IRQ_TYPE_EDGE_RISING>;
+        interrupt-names = "alarm", "period", "carry";
+        clocks = <&mstp6_clks R7S72100_CLK_RTC>, <&rtc_x1_clk>,
+                 <&rtc_x3_clk>, <&extal_clk>;
+        clock-names = "fck", "rtc_x1", "rtc_x3", "extal";
+    };
diff --git a/Documentation/devicetree/bindings/rtc/rtc-mt6397.txt b/Documentation/devicetree/bindings/rtc/rtc-mt6397.txt
new file mode 100644
index 000000000000..55a0c8874c03
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc-mt6397.txt
@@ -0,0 +1,29 @@
+Device-Tree bindings for MediaTek PMIC based RTC
+
+MediaTek PMIC based RTC is an independent function of MediaTek PMIC that works
+as a type of multi-function device (MFD). The RTC can be configured and set up
+with PMIC wrapper bus which is a common resource shared with the other
+functions found on the same PMIC.
+
+For MediaTek PMIC MFD bindings, see:
+../mfd/mt6397.txt
+
+For MediaTek PMIC wrapper bus bindings, see:
+../soc/mediatek/pwrap.txt
+
+Required properties:
+- compatible: Should be one of follows
+       "mediatek,mt6323-rtc": for MT6323 PMIC
+       "mediatek,mt6397-rtc": for MT6397 PMIC
+
+Example:
+
+       pmic {
+               compatible = "mediatek,mt6323";
+
+               ...
+
+               rtc {
+                       compatible = "mediatek,mt6323-rtc";
+               };
+       };
diff --git a/Documentation/devicetree/bindings/rtc/rtc-sh.txt b/Documentation/devicetree/bindings/rtc/rtc-sh.txt
deleted file mode 100644
index 7676c7d28874..000000000000
--- a/Documentation/devicetree/bindings/rtc/rtc-sh.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-* Real Time Clock for Renesas SH and ARM SoCs
-
-Required properties:
-- compatible: Should be "renesas,r7s72100-rtc" and "renesas,sh-rtc" as a
-  fallback.
-- reg: physical base address and length of memory mapped region.
-- interrupts: 3 interrupts for alarm, period, and carry.
-- interrupt-names: The interrupts should be labeled as "alarm", "period", and
-  "carry".
-- clocks: The functional clock source for the RTC controller must be listed
-  first (if exists). Additionally, potential clock counting sources are to be
-  listed.
-- clock-names: The functional clock must be labeled as "fck". Other clocks
-  may be named in accordance to the SoC hardware manuals.
-
-
-Example:
-rtc: rtc@fcff1000 {
-	compatible = "renesas,r7s72100-rtc", "renesas,sh-rtc";
-	reg = <0xfcff1000 0x2e>;
-	interrupts = <GIC_SPI 276 IRQ_TYPE_EDGE_RISING
-		      GIC_SPI 277 IRQ_TYPE_EDGE_RISING
-		      GIC_SPI 278 IRQ_TYPE_EDGE_RISING>;
-	interrupt-names = "alarm", "period", "carry";
-	clocks = <&mstp6_clks R7S72100_CLK_RTC>, <&rtc_x1_clk>,
-		 <&rtc_x3_clk>, <&extal_clk>;
-	clock-names = "fck", "rtc_x1", "rtc_x3", "extal";
-};
diff --git a/Documentation/devicetree/bindings/rtc/s3c-rtc.txt b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
deleted file mode 100644
index fdde63a5419c..000000000000
--- a/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-* Samsung's S3C Real Time Clock controller
-
-Required properties:
-- compatible: should be one of the following.
-    * "samsung,s3c2410-rtc" - for controllers compatible with s3c2410 rtc.
-    * "samsung,s3c2416-rtc" - for controllers compatible with s3c2416 rtc.
-    * "samsung,s3c2443-rtc" - for controllers compatible with s3c2443 rtc.
-    * "samsung,s3c6410-rtc" - for controllers compatible with s3c6410 rtc.
-    * "samsung,exynos3250-rtc" - (deprecated) for controllers compatible with
-                                 exynos3250 rtc (use "samsung,s3c6410-rtc").
-- reg: physical base address of the controller and length of memory mapped
-  region.
-- interrupts: Two interrupt numbers to the cpu should be specified. First
-  interrupt number is the rtc alarm interrupt and second interrupt number
-  is the rtc tick interrupt. The number of cells representing a interrupt
-  depends on the parent interrupt controller.
-- clocks: Must contain a list of phandle and clock specifier for the rtc
-          clock and in the case of a s3c6410 compatible controller, also
-          a source clock.
-- clock-names: Must contain "rtc" and for a s3c6410 compatible controller,
-               a "rtc_src" sorted in the same order as the clocks property.
-
-Example:
-
-	rtc@10070000 {
-		compatible = "samsung,s3c6410-rtc";
-		reg = <0x10070000 0x100>;
-		interrupts = <44 0 45 0>;
-		clocks = <&clock CLK_RTC>, <&s2mps11_osc S2MPS11_CLK_AP>;
-		clock-names = "rtc", "rtc_src";
-	};
diff --git a/Documentation/devicetree/bindings/rtc/s3c-rtc.yaml b/Documentation/devicetree/bindings/rtc/s3c-rtc.yaml
new file mode 100644
index 000000000000..76bbf8b7555b
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/s3c-rtc.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/s3c-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S3C, S5P and Exynos Real Time Clock controller
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - samsung,s3c2410-rtc
+          - samsung,s3c2416-rtc
+          - samsung,s3c2443-rtc
+          - samsung,s3c6410-rtc
+      - const: samsung,exynos3250-rtc
+        deprecated: true
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    description:
+      Must contain a list of phandle and clock specifier for the rtc
+      clock and in the case of a s3c6410 compatible controller, also
+      a source clock.
+    minItems: 1
+    maxItems: 2
+
+  clock-names:
+    description:
+      Must contain "rtc" and for a s3c6410 compatible controller
+      also "rtc_src".
+    minItems: 1
+    maxItems: 2
+
+  interrupts:
+    description:
+      Two interrupt numbers to the cpu should be specified. First
+      interrupt number is the rtc alarm interrupt and second interrupt number
+      is the rtc tick interrupt. The number of cells representing a interrupt
+      depends on the parent interrupt controller.
+    minItems: 2
+    maxItems: 2
+
+allOf:
+  - $ref: rtc.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - samsung,s3c6410-rtc
+              - samsung,exynos3250-rtc
+    then:
+      properties:
+        clocks:
+          minItems: 2
+          maxItems: 2
+        clock-names:
+          items:
+            - const: rtc
+            - const: rtc_src
+    else:
+      properties:
+        clocks:
+          minItems: 1
+          maxItems: 1
+        clock-names:
+          items:
+            - const: rtc
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos5420.h>
+    #include <dt-bindings/clock/samsung,s2mps11.h>
+
+    rtc@10070000 {
+        compatible = "samsung,s3c6410-rtc";
+        reg = <0x10070000 0x100>;
+        interrupts = <0 44 4>, <0 45 4>;
+        clocks = <&clock CLK_RTC>,
+                 <&s2mps11_osc S2MPS11_CLK_AP>;
+        clock-names = "rtc", "rtc_src";
+    };
diff --git a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt
deleted file mode 100644
index 130ca5b98253..000000000000
--- a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-STM32 Real Time Clock
-
-Required properties:
-- compatible: can be one of the following:
-  - "st,stm32-rtc" for devices compatible with stm32(f4/f7).
-  - "st,stm32h7-rtc" for devices compatible with stm32h7.
-  - "st,stm32mp1-rtc" for devices compatible with stm32mp1.
-- reg: address range of rtc register set.
-- clocks: can use up to two clocks, depending on part used:
-  - "rtc_ck": RTC clock source.
-  - "pclk": RTC APB interface clock.
-    It is not present on stm32(f4/f7).
-    It is required on stm32(h7/mp1).
-- clock-names: must be "rtc_ck" and "pclk".
-    It is required on stm32(h7/mp1).
-- interrupts: rtc alarm interrupt. On stm32mp1, a second interrupt is required
-  for rtc alarm wakeup interrupt.
-- st,syscfg: phandle/offset/mask triplet. The phandle to pwrcfg used to
-  access control register at offset, and change the dbp (Disable Backup
-  Protection) bit represented by the mask, mandatory to disable/enable backup
-  domain (RTC registers) write protection.
-    It is required on stm32(f4/f7/h7).
-
-Optional properties (to override default rtc_ck parent clock on stm32(f4/f7/h7):
-- assigned-clocks: reference to the rtc_ck clock entry.
-- assigned-clock-parents: phandle of the new parent clock of rtc_ck.
-
-Example:
-
-	rtc: rtc@40002800 {
-		compatible = "st,stm32-rtc";
-		reg = <0x40002800 0x400>;
-		clocks = <&rcc 1 CLK_RTC>;
-		assigned-clocks = <&rcc 1 CLK_RTC>;
-		assigned-clock-parents = <&rcc 1 CLK_LSE>;
-		interrupt-parent = <&exti>;
-		interrupts = <17 1>;
-		st,syscfg = <&pwrcfg 0x00 0x100>;
-	};
-
-	rtc: rtc@58004000 {
-		compatible = "st,stm32h7-rtc";
-		reg = <0x58004000 0x400>;
-		clocks = <&rcc RTCAPB_CK>, <&rcc RTC_CK>;
-		clock-names = "pclk", "rtc_ck";
-		assigned-clocks = <&rcc RTC_CK>;
-		assigned-clock-parents = <&rcc LSE_CK>;
-		interrupt-parent = <&exti>;
-		interrupts = <17 1>;
-		interrupt-names = "alarm";
-		st,syscfg = <&pwrcfg 0x00 0x100>;
-	};
-
-	rtc: rtc@5c004000 {
-		compatible = "st,stm32mp1-rtc";
-		reg = <0x5c004000 0x400>;
-		clocks = <&rcc RTCAPB>, <&rcc RTC>;
-		clock-names = "pclk", "rtc_ck";
-		interrupts-extended = <&intc GIC_SPI 3 IRQ_TYPE_NONE>,
-				      <&exti 19 1>;
-	};
diff --git a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
new file mode 100644
index 000000000000..0a54296d7218
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
@@ -0,0 +1,139 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/st,stm32-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Real Time Clock Bindings
+
+maintainers:
+  - Gabriel Fernandez <gabriel.fernandez@st.com>
+
+properties:
+  compatible:
+    enum:
+      - st,stm32-rtc
+      - st,stm32h7-rtc
+      - st,stm32mp1-rtc
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    maxItems: 2
+
+  clock-names:
+    items:
+      - const: pclk
+      - const: rtc_ck
+
+  interrupts:
+    maxItems: 1
+
+  st,syscfg:
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/phandle-array"
+      - items:
+          minItems: 3
+          maxItems: 3
+    description: |
+       Phandle/offset/mask triplet. The phandle to pwrcfg used to
+       access control register at offset, and change the dbp (Disable Backup
+       Protection) bit represented by the mask, mandatory to disable/enable backup
+       domain (RTC registers) write protection.
+
+  assigned-clocks:
+    description: |
+      override default rtc_ck parent clock reference to the rtc_ck clock entry
+    maxItems: 1
+
+  assigned-clock-parents:
+    description: |
+      override default rtc_ck parent clock phandle of the new parent clock of rtc_ck
+    maxItems: 1
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: st,stm32-rtc
+
+    then:
+      properties:
+        clocks:
+          minItems: 1
+          maxItems: 1
+
+        clock-names: false
+
+      required:
+        - st,syscfg
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: st,stm32h7-rtc
+
+    then:
+       properties:
+         clocks:
+           minItems: 2
+           maxItems: 2
+
+       required:
+         - clock-names
+         - st,syscfg
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: st,stm32mp1-rtc
+
+    then:
+       properties:
+         clocks:
+           minItems: 2
+           maxItems: 2
+
+         assigned-clocks: false
+         assigned-clock-parents: false
+
+       required:
+         - clock-names
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/mfd/stm32f4-rcc.h>
+    #include <dt-bindings/clock/stm32fx-clock.h>
+    rtc@40002800 {
+      compatible = "st,stm32-rtc";
+      reg = <0x40002800 0x400>;
+      clocks = <&rcc 1 CLK_RTC>;
+      assigned-clocks = <&rcc 1 CLK_RTC>;
+      assigned-clock-parents = <&rcc 1 CLK_LSE>;
+      interrupt-parent = <&exti>;
+      interrupts = <17 1>;
+      st,syscfg = <&pwrcfg 0x00 0x100>;
+    };
+
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    rtc@5c004000 {
+      compatible = "st,stm32mp1-rtc";
+      reg = <0x5c004000 0x400>;
+      clocks = <&rcc RTCAPB>, <&rcc RTC>;
+      clock-names = "pclk", "rtc_ck";
+      interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/security/tpm/google,cr50.txt b/Documentation/devicetree/bindings/security/tpm/google,cr50.txt
new file mode 100644
index 000000000000..cd69c2efdd37
--- /dev/null
+++ b/Documentation/devicetree/bindings/security/tpm/google,cr50.txt
@@ -0,0 +1,19 @@
+* H1 Secure Microcontroller with Cr50 Firmware on SPI Bus.
+
+H1 Secure Microcontroller running Cr50 firmware provides several
+functions, including TPM-like functionality. It communicates over
+SPI using the FIFO protocol described in the PTP Spec, section 6.
+
+Required properties:
+- compatible: Should be "google,cr50".
+- spi-max-frequency: Maximum SPI frequency.
+
+Example:
+
+&spi0 {
+	tpm@0 {
+		compatible = "google,cr50";
+		reg = <0>;
+		spi-max-frequency = <800000>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/serial/8250.txt b/Documentation/devicetree/bindings/serial/8250.txt
index 20d351f268ef..55700f20f6ee 100644
--- a/Documentation/devicetree/bindings/serial/8250.txt
+++ b/Documentation/devicetree/bindings/serial/8250.txt
@@ -56,6 +56,11 @@ Optional properties:
 - {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD
   line respectively. It will use specified GPIO instead of the peripheral
   function pin for the UART feature. If unsure, don't specify this property.
+- aspeed,sirq-polarity-sense: Only applicable to aspeed,ast2500-vuart.
+  phandle to aspeed,ast2500-scu compatible syscon alongside register offset
+  and bit number to identify how the SIRQ polarity should be configured.
+  One possible data source is the LPC/eSPI mode bit.
+  Example: aspeed,sirq-polarity-sense = <&syscon 0x70 25>
 
 Note:
 * fsl,ns16550:
diff --git a/Documentation/devicetree/bindings/serial/fsl-lpuart.txt b/Documentation/devicetree/bindings/serial/fsl-lpuart.txt
index 3495eee81d53..c904e2e68332 100644
--- a/Documentation/devicetree/bindings/serial/fsl-lpuart.txt
+++ b/Documentation/devicetree/bindings/serial/fsl-lpuart.txt
@@ -10,6 +10,8 @@ Required properties:
     on i.MX7ULP SoC with 32-bit little-endian register organization
   - "fsl,imx8qxp-lpuart" for lpuart compatible with the one integrated
     on i.MX8QXP SoC with 32-bit little-endian register organization
+  - "fsl,imx8qm-lpuart" for lpuart compatible with the one integrated
+    on i.MX8QM SoC with 32-bit little-endian register organization
 - reg : Address and length of the register set for the device
 - interrupts : Should contain uart interrupt
 - clocks : phandle + clock specifier pairs, one for each entry in clock-names
@@ -21,8 +23,7 @@ Required properties:
 Optional properties:
 - dmas: A list of two dma specifiers, one for each entry in dma-names.
 - dma-names: should contain "tx" and "rx".
-- rs485-rts-delay, rs485-rts-active-low, rs485-rx-during-tx,
-  linux,rs485-enabled-at-boot-time: see rs485.txt
+- rs485-rts-active-low, linux,rs485-enabled-at-boot-time: see rs485.txt
 
 Note: Optional properties for DMA support. Write them both or both not.
 
diff --git a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
index b143d9a21b2d..a5edf4b70c7a 100644
--- a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
+++ b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
@@ -54,8 +54,10 @@ Required properties:
     - "renesas,hscif-r8a7794" for R8A7794 (R-Car E2) HSCIF compatible UART.
     - "renesas,scif-r8a7795" for R8A7795 (R-Car H3) SCIF compatible UART.
     - "renesas,hscif-r8a7795" for R8A7795 (R-Car H3) HSCIF compatible UART.
-    - "renesas,scif-r8a7796" for R8A7796 (R-Car M3-W) SCIF compatible UART.
-    - "renesas,hscif-r8a7796" for R8A7796 (R-Car M3-W) HSCIF compatible UART.
+    - "renesas,scif-r8a7796" for R8A77960 (R-Car M3-W) SCIF compatible UART.
+    - "renesas,hscif-r8a7796" for R8A77960 (R-Car M3-W) HSCIF compatible UART.
+    - "renesas,scif-r8a77961" for R8A77961 (R-Car M3-W+) SCIF compatible UART.
+    - "renesas,hscif-r8a77961" for R8A77961 (R-Car M3-W+) HSCIF compatible UART.
     - "renesas,scif-r8a77965" for R8A77965 (R-Car M3-N) SCIF compatible UART.
     - "renesas,hscif-r8a77965" for R8A77965 (R-Car M3-N) HSCIF compatible UART.
     - "renesas,scif-r8a77970" for R8A77970 (R-Car V3M) SCIF compatible UART.
diff --git a/Documentation/devicetree/bindings/serial/rs485.txt b/Documentation/devicetree/bindings/serial/rs485.txt
index b92592dff6dd..a7fe93efc4a5 100644
--- a/Documentation/devicetree/bindings/serial/rs485.txt
+++ b/Documentation/devicetree/bindings/serial/rs485.txt
@@ -1,31 +1 @@
-* RS485 serial communications
-
-The RTS signal is capable of automatically controlling line direction for
-the built-in half-duplex mode.
-The properties described hereafter shall be given to a half-duplex capable
-UART node.
-
-Optional properties:
-- rs485-rts-delay: prop-encoded-array <a b> where:
-  * a is the delay between rts signal and beginning of data sent in milliseconds.
-      it corresponds to the delay before sending data.
-  * b is the delay between end of data sent and rts signal in milliseconds
-      it corresponds to the delay after sending data and actual release of the line.
-  If this property is not specified, <0 0> is assumed.
-- rs485-rts-active-low: drive RTS low when sending (default is high).
-- linux,rs485-enabled-at-boot-time: empty property telling to enable the rs485
-  feature at boot time. It can be disabled later with proper ioctl.
-- rs485-rx-during-tx: empty property that enables the receiving of data even
-  while sending data.
-
-RS485 example for Atmel USART:
-	usart0: serial@fff8c000 {
-		compatible = "atmel,at91sam9260-usart";
-		reg = <0xfff8c000 0x4000>;
-		interrupts = <7>;
-		atmel,use-dma-rx;
-		atmel,use-dma-tx;
-		linux,rs485-enabled-at-boot-time;
-		rs485-rts-delay = <0 200>;		// in milliseconds
-	};
-
+See rs485.yaml
diff --git a/Documentation/devicetree/bindings/serial/rs485.yaml b/Documentation/devicetree/bindings/serial/rs485.yaml
new file mode 100644
index 000000000000..d4beaf11222d
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/rs485.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/serial/rs485.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RS485 serial communications Bindings
+
+description: The RTS signal is capable of automatically controlling
+             line direction for the built-in half-duplex mode.
+             The properties described hereafter shall be given to a
+             half-duplex capable UART node.
+
+maintainers:
+  -  Rob Herring <robh@kernel.org>
+
+properties:
+  rs485-rts-delay:
+    description: prop-encoded-array <a b>
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+      - items:
+          items:
+            - description:
+                Delay between rts signal and beginning of data sent in milliseconds.
+                It corresponds to the delay before sending data.
+              default: 0
+              maximum: 1000
+            - description:
+                Delay between end of data sent and rts signal in milliseconds.
+                It corresponds to the delay after sending data and actual release of the line.
+              default: 0
+              maximum: 1000
+
+  rs485-rts-active-low:
+    description: drive RTS low when sending (default is high).
+    $ref: /schemas/types.yaml#/definitions/flag
+
+  linux,rs485-enabled-at-boot-time:
+    description: enables the rs485 feature at boot time. It can be disabled later with proper ioctl.
+    $ref: /schemas/types.yaml#/definitions/flag
+
+  rs485-rx-during-tx:
+   description: enables the receiving of data even while sending data.
+   $ref: /schemas/types.yaml#/definitions/flag
diff --git a/Documentation/devicetree/bindings/serial/samsung_uart.txt b/Documentation/devicetree/bindings/serial/samsung_uart.txt
deleted file mode 100644
index e85f37ec33f0..000000000000
--- a/Documentation/devicetree/bindings/serial/samsung_uart.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-* Samsung's UART Controller
-
-The Samsung's UART controller is used for interfacing SoC with serial
-communicaion devices.
-
-Required properties:
-- compatible: should be one of following:
-  - "samsung,exynos4210-uart" -  Exynos4210 SoC,
-  - "samsung,s3c2410-uart" - compatible with ports present on S3C2410 SoC,
-  - "samsung,s3c2412-uart" - compatible with ports present on S3C2412 SoC,
-  - "samsung,s3c2440-uart" - compatible with ports present on S3C2440 SoC,
-  - "samsung,s3c6400-uart" - compatible with ports present on S3C6400 SoC,
-  - "samsung,s5pv210-uart" - compatible with ports present on S5PV210 SoC.
-
-- reg: base physical address of the controller and length of memory mapped
-  region.
-
-- interrupts: a single interrupt signal to SoC interrupt controller,
-  according to interrupt bindings documentation [1].
-
-- clock-names: input names of clocks used by the controller:
-  - "uart" - controller bus clock,
-  - "clk_uart_baudN" - Nth baud base clock input (N = 0, 1, ...),
-    according to SoC User's Manual (only N = 0 is allowedfor SoCs without
-    internal baud clock mux).
-- clocks: phandles and specifiers for all clocks specified in "clock-names"
-  property, in the same order, according to clock bindings documentation [2].
-
-[1] Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
-[2] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Optional properties:
-- samsung,uart-fifosize: The fifo size supported by the UART channel
-
-Note: Each Samsung UART should have an alias correctly numbered in the
-"aliases" node, according to serialN format, where N is the port number
-(non-negative decimal integer) as specified by User's Manual of respective
-SoC.
-
-Example:
-	aliases {
-		serial0 = &uart0;
-		serial1 = &uart1;
-		serial2 = &uart2;
-	};
-
-Example:
-	uart1: serial@7f005400 {
-		compatible = "samsung,s3c6400-uart";
-		reg = <0x7f005400 0x100>;
-		interrupt-parent = <&vic1>;
-		interrupts = <6>;
-		clock-names = "uart", "clk_uart_baud2",
-				"clk_uart_baud3";
-		clocks = <&clocks PCLK_UART1>, <&clocks PCLK_UART1>,
-				<&clocks SCLK_UART>;
-		samsung,uart-fifosize = <16>;
-	};
diff --git a/Documentation/devicetree/bindings/serial/samsung_uart.yaml b/Documentation/devicetree/bindings/serial/samsung_uart.yaml
new file mode 100644
index 000000000000..9d2ce347875b
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/samsung_uart.yaml
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/serial/samsung_uart.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S3C, S5P and Exynos SoC UART Controller
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+  - Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+description: |+
+  Each Samsung UART should have an alias correctly numbered in the "aliases"
+  node, according to serialN format, where N is the port number (non-negative
+  decimal integer) as specified by User's Manual of respective SoC.
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - samsung,s3c2410-uart
+          - samsung,s3c2412-uart
+          - samsung,s3c2440-uart
+          - samsung,s3c6400-uart
+          - samsung,s5pv210-uart
+          - samsung,exynos4210-uart
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 5
+
+  clock-names:
+    description: N = 0 is allowed for SoCs without internal baud clock mux.
+    minItems: 2
+    maxItems: 5
+    items:
+      - const: uart
+      - pattern: '^clk_uart_baud[0-3]$'
+      - pattern: '^clk_uart_baud[0-3]$'
+      - pattern: '^clk_uart_baud[0-3]$'
+      - pattern: '^clk_uart_baud[0-3]$'
+
+  interrupts:
+    description: RX interrupt and optionally TX interrupt.
+    minItems: 1
+    maxItems: 2
+
+  samsung,uart-fifosize:
+    description: The fifo size supported by the UART channel.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum: [16, 64, 256]
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - interrupts
+  - reg
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - samsung,s3c2410-uart
+              - samsung,s5pv210-uart
+    then:
+      properties:
+        clocks:
+          minItems: 2
+          maxItems: 3
+        clock-names:
+          minItems: 2
+          maxItems: 3
+          items:
+            - const: uart
+            - pattern: '^clk_uart_baud[0-1]$'
+            - pattern: '^clk_uart_baud[0-1]$'
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - samsung,exynos4210-uart
+    then:
+      properties:
+        clocks:
+          minItems: 2
+          maxItems: 2
+        clock-names:
+          minItems: 2
+          maxItems: 2
+          items:
+            - const: uart
+            - const: clk_uart_baud0
+
+examples:
+  - |
+    #include <dt-bindings/clock/samsung,s3c64xx-clock.h>
+
+    uart0: serial@7f005000 {
+        compatible = "samsung,s3c6400-uart";
+        reg = <0x7f005000 0x100>;
+        interrupt-parent = <&vic1>;
+        interrupts = <5>;
+        clock-names = "uart", "clk_uart_baud2",
+                      "clk_uart_baud3";
+        clocks = <&clocks PCLK_UART0>, <&clocks PCLK_UART0>,
+                 <&clocks SCLK_UART>;
+        samsung,uart-fifosize = <16>;
+    };
diff --git a/Documentation/devicetree/bindings/serial/sprd-uart.txt b/Documentation/devicetree/bindings/serial/sprd-uart.txt
deleted file mode 100644
index 9607dc616205..000000000000
--- a/Documentation/devicetree/bindings/serial/sprd-uart.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-* Spreadtrum serial UART
-
-Required properties:
-- compatible: must be one of:
-  * "sprd,sc9836-uart"
-  * "sprd,sc9860-uart", "sprd,sc9836-uart"
-
-- reg: offset and length of the register set for the device
-- interrupts: exactly one interrupt specifier
-- clock-names: Should contain following entries:
-  "enable" for UART module enable clock,
-  "uart" for UART clock,
-  "source" for UART source (parent) clock.
-- clocks: Should contain a clock specifier for each entry in clock-names.
-  UART clock and source clock are optional properties, but enable clock
-  is required.
-
-Optional properties:
-- dma-names: Should contain "rx" for receive and "tx" for transmit channels.
-- dmas: A list of dma specifiers, one for each entry in dma-names.
-
-Example:
-	uart0: serial@0 {
-		compatible = "sprd,sc9860-uart",
-			     "sprd,sc9836-uart";
-		reg = <0x0 0x100>;
-		interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
-		dma-names = "rx", "tx";
-		dmas = <&ap_dma 19>, <&ap_dma 20>;
-		clock-names = "enable", "uart", "source";
-		clocks = <&clk_ap_apb_gates 9>, <&clk_uart0>, <&ext_26m>;
-	};
diff --git a/Documentation/devicetree/bindings/serial/sprd-uart.yaml b/Documentation/devicetree/bindings/serial/sprd-uart.yaml
new file mode 100644
index 000000000000..e66b2e92a7fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/sprd-uart.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2019 Unisoc Inc.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/serial/sprd-uart.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Spreadtrum serial UART
+
+maintainers:
+  - Orson Zhai <orsonzhai@gmail.com>
+  - Baolin Wang <baolin.wang7@gmail.com>
+  - Chunyan Zhang <zhang.lyra@gmail.com>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - sprd,sc9860-uart
+              - sprd,sc9863a-uart
+          - const: sprd,sc9836-uart
+      - const: sprd,sc9836-uart
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    maxItems: 3
+
+  clock-names:
+    description: |
+      "enable" for UART module enable clock, "uart" for UART clock, "source" 
+      for UART source (parent) clock.
+    items:
+      - const: enable
+      - const: uart
+      - const: source
+
+  dmas:
+    minItems: 1
+    maxItems: 2
+
+  dma-names:
+    minItems: 1
+    items:
+      - const: rx
+      - const: tx
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    serial@0 {
+      compatible = "sprd,sc9860-uart", "sprd,sc9836-uart";
+      reg = <0x0 0x100>;
+      interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+      dma-names = "rx", "tx";
+      dmas = <&ap_dma 19>, <&ap_dma 20>;
+      clock-names = "enable", "uart", "source";
+      clocks = <&clk_ap_apb_gates 9>, <&clk_uart0>, <&ext_26m>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml b/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml
new file mode 100644
index 000000000000..238c44192d31
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/serial/st,stm32-uart.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+maintainers:
+  - Erwan Le Ray <erwan.leray@st.com>
+
+title: STMicroelectronics STM32 USART bindings
+
+allOf:
+  - $ref: rs485.yaml
+
+properties:
+  compatible:
+    enum:
+      - st,stm32-uart
+      - st,stm32f7-uart
+      - st,stm32h7-uart
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  label:
+    description: label associated with this uart
+
+  st,hw-flow-ctrl:
+    description: enable hardware flow control
+    $ref: /schemas/types.yaml#/definitions/flag
+
+  dmas:
+    minItems: 1
+    maxItems: 2
+
+  dma-names:
+    items:
+      enum: [ rx, tx ]
+    minItems: 1
+    maxItems: 2
+
+  wakeup-source: true
+
+  rs485-rts-delay: true
+  rs485-rts-active-low: true
+  linux,rs485-enabled-at-boot-time: true
+  rs485-rx-during-tx: true
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    usart1: serial@40011000 {
+      compatible = "st,stm32-uart";
+      reg = <0x40011000 0x400>;
+      interrupts = <37>;
+      clocks = <&rcc 0 164>;
+      dmas = <&dma2 2 4 0x414 0x0>,
+             <&dma2 7 4 0x414 0x0>;
+      dma-names = "rx", "tx";
+      rs485-rts-active-low;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/serial/st,stm32-usart.txt b/Documentation/devicetree/bindings/serial/st,stm32-usart.txt
deleted file mode 100644
index 8620f7fcbd50..000000000000
--- a/Documentation/devicetree/bindings/serial/st,stm32-usart.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-* STMicroelectronics STM32 USART
-
-Required properties:
-- compatible: can be either:
-  - "st,stm32-uart",
-  - "st,stm32f7-uart",
-  - "st,stm32h7-uart".
-  depending is compatible with stm32(f4), stm32f7 or stm32h7.
-- reg: The address and length of the peripheral registers space
-- interrupts:
-  - The interrupt line for the USART instance,
-  - An optional wake-up interrupt.
-- clocks: The input clock of the USART instance
-
-Optional properties:
-- resets: Must contain the phandle to the reset controller.
-- pinctrl: The reference on the pins configuration
-- st,hw-flow-ctrl: bool flag to enable hardware flow control.
-- rs485-rts-delay, rs485-rx-during-tx, rs485-rts-active-low,
-  linux,rs485-enabled-at-boot-time: see rs485.txt.
-- dmas: phandle(s) to DMA controller node(s). Refer to stm32-dma.txt
-- dma-names: "rx" and/or "tx"
-- wakeup-source: bool flag to indicate this device has wakeup capabilities
-- interrupt-names, if optional wake-up interrupt is used, should be:
-  - "event": the name for the interrupt line of the USART instance
-  - "wakeup" the name for the optional wake-up interrupt
-
-
-Examples:
-usart4: serial@40004c00 {
-	compatible = "st,stm32-uart";
-	reg = <0x40004c00 0x400>;
-	interrupts = <52>;
-	clocks = <&clk_pclk1>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_usart4>;
-};
-
-usart2: serial@40004400 {
-	compatible = "st,stm32-uart";
-	reg = <0x40004400 0x400>;
-	interrupts = <38>;
-	clocks = <&clk_pclk1>;
-	st,hw-flow-ctrl;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_usart2 &pinctrl_usart2_rtscts>;
-};
-
-usart1: serial@40011000 {
-	compatible = "st,stm32-uart";
-	reg = <0x40011000 0x400>;
-	interrupts = <37>;
-	clocks = <&rcc 0 164>;
-	dmas = <&dma2 2 4 0x414 0x0>,
-	       <&dma2 7 4 0x414 0x0>;
-	dma-names = "rx", "tx";
-};
diff --git a/Documentation/devicetree/bindings/serio/allwinner,sun4i-a10-ps2.yaml b/Documentation/devicetree/bindings/serio/allwinner,sun4i-a10-ps2.yaml
new file mode 100644
index 000000000000..2ecab8ed702a
--- /dev/null
+++ b/Documentation/devicetree/bindings/serio/allwinner,sun4i-a10-ps2.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/serio/allwinner,sun4i-a10-ps2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 PS2 Host Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description:
+  A20 PS2 is dual role controller (PS2 host and PS2 device). These
+  bindings for PS2 A10/A20 host controller. IBM compliant IBM PS2 and
+  AT-compatible keyboard and mouse can be connected.
+
+properties:
+  compatible:
+    const: allwinner,sun4i-a10-ps2
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/sun7i-a20-ccu.h>
+
+    ps20: ps2@1c2a000 {
+        compatible = "allwinner,sun4i-a10-ps2";
+        reg = <0x01c2a000 0x400>;
+        interrupts = <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_APB1_PS20>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt b/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt
deleted file mode 100644
index 75996b6111bb..000000000000
--- a/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-* Device tree bindings for Allwinner A10, A20 PS2 host controller
-
-A20 PS2 is dual role controller (PS2 host and PS2 device). These bindings are
-for PS2 A10/A20 host controller. IBM compliant IBM PS2 and AT-compatible keyboard
-and mouse can be connected.
-
-Required properties:
-
- - reg             : Offset and length of the register set for the device.
- - compatible      : Should be as of the following:
-                     - "allwinner,sun4i-a10-ps2"
- - interrupts      : The interrupt line connected to the PS2.
- - clocks          : The gate clk connected to the PS2.
-
-
-Example:
-	ps20: ps2@01c2a000 {
-		compatible = "allwinner,sun4i-a10-ps2";
-		reg = <0x01c2a000 0x400>;
-		interrupts = <0 62 4>;
-		clocks = <&apb1_gates 6>;
-	};
diff --git a/Documentation/devicetree/bindings/slimbus/bus.txt b/Documentation/devicetree/bindings/slimbus/bus.txt
index 52fa6426388c..bbe871f82a8b 100644
--- a/Documentation/devicetree/bindings/slimbus/bus.txt
+++ b/Documentation/devicetree/bindings/slimbus/bus.txt
@@ -32,6 +32,10 @@ Required property for SLIMbus child node if it is present:
 	 	  Product Code, shall be in lower case hexadecimal with leading
 		  zeroes suppressed
 
+Optional property for SLIMbus child node if it is present:
+- slim-ifc-dev	- Should be phandle to SLIMBus Interface device.
+		  Required for devices which deal with streams.
+
 SLIMbus example for Qualcomm's slimbus manager component:
 
 	slim@28080000 {
@@ -43,8 +47,14 @@ SLIMbus example for Qualcomm's slimbus manager component:
 		#address-cells = <2>;
 		#size-cell = <0>;
 
+		codec_ifd: ifd@0,0{
+			compatible = "slim217,60";
+			reg = <0 0>;
+		};
+
 		codec: wcd9310@1,0{
 			compatible = "slim217,60";
 			reg = <1 0>;
+			slim-ifc-dev  = <&codec_ifd>;
 		};
 	};
diff --git a/Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.txt b/Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.txt
deleted file mode 100644
index e876f3ce54f6..000000000000
--- a/Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Amlogic Canvas
-================================
-
-A canvas is a collection of metadata that describes a pixel buffer.
-Those metadata include: width, height, phyaddr, wrapping and block mode.
-Starting with GXBB the endianness can also be described.
-
-Many IPs within Amlogic SoCs rely on canvas indexes to read/write pixel data
-rather than use the phy addresses directly. For instance, this is the case for
-the video decoders and the display.
-
-Amlogic SoCs have 256 canvas.
-
-Device Tree Bindings:
----------------------
-
-Video Lookup Table
---------------------------
-
-Required properties:
-- compatible: has to be one of:
-		- "amlogic,meson8-canvas", "amlogic,canvas" on Meson8
-		- "amlogic,meson8b-canvas", "amlogic,canvas" on Meson8b
-		- "amlogic,meson8m2-canvas", "amlogic,canvas" on Meson8m2
-		- "amlogic,canvas" on GXBB and newer
-- reg: Base physical address and size of the canvas registers.
-
-Example:
-
-canvas: video-lut@48 {
-	compatible = "amlogic,canvas";
-	reg = <0x0 0x48 0x0 0x14>;
-};
diff --git a/Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.yaml b/Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.yaml
new file mode 100644
index 000000000000..f548594d020b
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 BayLibre, SAS
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/soc/amlogic/amlogic,canvas.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Amlogic Canvas Video Lookup Table
+
+maintainers:
+  - Neil Armstrong <narmstrong@baylibre.com>
+  - Maxime Jourdan <mjourdan@baylibre.com>
+
+description: |
+  A canvas is a collection of metadata that describes a pixel buffer.
+  Those metadata include: width, height, phyaddr, wrapping and block mode.
+  Starting with GXBB the endianness can also be described.
+
+  Many IPs within Amlogic SoCs rely on canvas indexes to read/write pixel data
+  rather than use the phy addresses directly. For instance, this is the case for
+  the video decoders and the display.
+
+  Amlogic SoCs have 256 canvas.
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+        - enum:
+          - amlogic,meson8-canvas
+          - amlogic,meson8b-canvas
+          - amlogic,meson8m2-canvas
+        - const: amlogic,canvas
+      - const: amlogic,canvas # GXBB and newer SoCs
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    canvas: video-lut@48 {
+        compatible = "amlogic,canvas";
+        reg = <0x48 0x14>;
+    };
+
diff --git a/Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-pm.txt b/Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-pm.txt
index 3b7d32956391..72ff033565e5 100644
--- a/Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-pm.txt
+++ b/Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-pm.txt
@@ -26,7 +26,7 @@ Optional properties:
     system power.  This node follows the power controller bindings[3].
 
 [1] Documentation/devicetree/bindings/reset/reset.txt
-[2] Documentation/devicetree/bindings/power/power_domain.txt
+[2] Documentation/devicetree/bindings/power/power-domain.yaml
 [3] Documentation/devicetree/bindings/power/power-controller.txt
 
 Example:
diff --git a/Documentation/devicetree/bindings/soc/fsl/rcpm.txt b/Documentation/devicetree/bindings/soc/fsl/rcpm.txt
index e284e4e1ccd5..5a33619d881d 100644
--- a/Documentation/devicetree/bindings/soc/fsl/rcpm.txt
+++ b/Documentation/devicetree/bindings/soc/fsl/rcpm.txt
@@ -5,7 +5,7 @@ and power management.
 
 Required properites:
   - reg : Offset and length of the register set of the RCPM block.
-  - fsl,#rcpm-wakeup-cells : The number of IPPDEXPCR register cells in the
+  - #fsl,rcpm-wakeup-cells : The number of IPPDEXPCR register cells in the
 	fsl,rcpm-wakeup property.
   - compatible : Must contain a chip-specific RCPM block compatible string
 	and (if applicable) may contain a chassis-version RCPM compatible
@@ -20,6 +20,7 @@ Required properites:
 	* "fsl,qoriq-rcpm-1.0": for chassis 1.0 rcpm
 	* "fsl,qoriq-rcpm-2.0": for chassis 2.0 rcpm
 	* "fsl,qoriq-rcpm-2.1": for chassis 2.1 rcpm
+	* "fsl,qoriq-rcpm-2.1+": for chassis 2.1+ rcpm
 
 All references to "1.0" and "2.0" refer to the QorIQ chassis version to
 which the chip complies.
@@ -27,14 +28,19 @@ Chassis Version		Example Chips
 ---------------		-------------------------------
 1.0				p4080, p5020, p5040, p2041, p3041
 2.0				t4240, b4860, b4420
-2.1				t1040, ls1021
+2.1				t1040,
+2.1+				ls1021a, ls1012a, ls1043a, ls1046a
+
+Optional properties:
+ - little-endian : RCPM register block is Little Endian. Without it RCPM
+   will be Big Endian (default case).
 
 Example:
 The RCPM node for T4240:
 	rcpm: global-utilities@e2000 {
 		compatible = "fsl,t4240-rcpm", "fsl,qoriq-rcpm-2.0";
 		reg = <0xe2000 0x1000>;
-		fsl,#rcpm-wakeup-cells = <2>;
+		#fsl,rcpm-wakeup-cells = <2>;
 	};
 
 * Freescale RCPM Wakeup Source Device Tree Bindings
@@ -44,7 +50,7 @@ can be used as a wakeup source.
 
   - fsl,rcpm-wakeup: Consists of a phandle to the rcpm node and the IPPDEXPCR
 	register cells. The number of IPPDEXPCR register cells is defined in
-	"fsl,#rcpm-wakeup-cells" in the rcpm node. The first register cell is
+	"#fsl,rcpm-wakeup-cells" in the rcpm node. The first register cell is
 	the bit mask that should be set in IPPDEXPCR0, and the second register
 	cell is for IPPDEXPCR1, and so on.
 
diff --git a/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt b/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt
index 876693a7ada5..8f469d85833b 100644
--- a/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt
+++ b/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt
@@ -8,7 +8,7 @@ The System Power Manager (SPM) inside the SCPSYS is for the MTCMOS power
 domain control.
 
 The driver implements the Generic PM domain bindings described in
-power/power_domain.txt. It provides the power domains defined in
+power/power-domain.yaml. It provides the power domains defined in
 - include/dt-bindings/power/mt8173-power.h
 - include/dt-bindings/power/mt6797-power.h
 - include/dt-bindings/power/mt2701-power.h
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.txt
index f3fa313963d5..616fddcd09fd 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.txt
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.txt
@@ -22,6 +22,7 @@ resources.
 		    "qcom,rpm-apq8084"
 		    "qcom,rpm-msm8916"
 		    "qcom,rpm-msm8974"
+		    "qcom,rpm-msm8976"
 		    "qcom,rpm-msm8998"
 		    "qcom,rpm-sdm660"
 		    "qcom,rpm-qcs404"
diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.txt b/Documentation/devicetree/bindings/soc/rockchip/grf.txt
index 46e27cd69f18..f96511aa3897 100644
--- a/Documentation/devicetree/bindings/soc/rockchip/grf.txt
+++ b/Documentation/devicetree/bindings/soc/rockchip/grf.txt
@@ -10,6 +10,12 @@ From RK3368 SoCs, the GRF is divided into two sections,
 
 On RK3328 SoCs, the GRF adds a section for USB2PHYGRF,
 
+ON RK3308 SoC, the GRF is divided into four sections:
+- GRF, used for general non-secure system,
+- SGRF, used for general secure system,
+- DETECTGRF, used for audio codec system,
+- COREGRF, used for pvtm,
+
 Required Properties:
 
 - compatible: GRF should be one of the following:
@@ -19,19 +25,25 @@ Required Properties:
    - "rockchip,rk3188-grf", "syscon": for rk3188
    - "rockchip,rk3228-grf", "syscon": for rk3228
    - "rockchip,rk3288-grf", "syscon": for rk3288
+   - "rockchip,rk3308-grf", "syscon": for rk3308
    - "rockchip,rk3328-grf", "syscon": for rk3328
    - "rockchip,rk3368-grf", "syscon": for rk3368
    - "rockchip,rk3399-grf", "syscon": for rk3399
    - "rockchip,rv1108-grf", "syscon": for rv1108
+- compatible: DETECTGRF should be one of the following:
+   - "rockchip,rk3308-detect-grf", "syscon": for rk3308
+- compatilbe: COREGRF should be one of the following:
+   - "rockchip,rk3308-core-grf", "syscon": for rk3308
 - compatible: PMUGRF should be one of the following:
    - "rockchip,px30-pmugrf", "syscon": for px30
    - "rockchip,rk3368-pmugrf", "syscon": for rk3368
    - "rockchip,rk3399-pmugrf", "syscon": for rk3399
-- compatible: SGRF should be one of the following
+- compatible: SGRF should be one of the following:
    - "rockchip,rk3288-sgrf", "syscon": for rk3288
-- compatible: USB2PHYGRF should be one of the followings
+- compatible: USB2PHYGRF should be one of the following:
+   - "rockchip,px30-usb2phy-grf", "syscon": for px30
    - "rockchip,rk3328-usb2phy-grf", "syscon": for rk3328
-- compatible: USBGRF should be one of the following
+- compatible: USBGRF should be one of the following:
    - "rockchip,rv1108-usbgrf", "syscon": for rv1108
 - reg: physical base address of the controller and length of memory mapped
   region.
diff --git a/Documentation/devicetree/bindings/soc/ti/k3-ringacc.txt b/Documentation/devicetree/bindings/soc/ti/k3-ringacc.txt
new file mode 100644
index 000000000000..59758ccce809
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/ti/k3-ringacc.txt
@@ -0,0 +1,59 @@
+* Texas Instruments K3 NavigatorSS Ring Accelerator
+
+The Ring Accelerator (RA) is a machine which converts read/write accesses
+from/to a constant address into corresponding read/write accesses from/to a
+circular data structure in memory. The RA eliminates the need for each DMA
+controller which needs to access ring elements from having to know the current
+state of the ring (base address, current offset). The DMA controller
+performs a read or write access to a specific address range (which maps to the
+source interface on the RA) and the RA replaces the address for the transaction
+with a new address which corresponds to the head or tail element of the ring
+(head for reads, tail for writes).
+
+The Ring Accelerator is a hardware module that is responsible for accelerating
+management of the packet queues. The K3 SoCs can have more than one RA instances
+
+Required properties:
+- compatible	: Must be "ti,am654-navss-ringacc";
+- reg		: Should contain register location and length of the following
+		  named register regions.
+- reg-names	: should be
+		  "rt" - The RA Ring Real-time Control/Status Registers
+		  "fifos" - The RA Queues Registers
+		  "proxy_gcfg" - The RA Proxy Global Config Registers
+		  "proxy_target" - The RA Proxy Datapath Registers
+- ti,num-rings	: Number of rings supported by RA
+- ti,sci-rm-range-gp-rings : TI-SCI RM subtype for GP ring range
+- ti,sci	: phandle on TI-SCI compatible System controller node
+- ti,sci-dev-id	: TI-SCI device id of the ring accelerator
+- msi-parent	: phandle for "ti,sci-inta" interrupt controller
+
+Optional properties:
+ -- ti,dma-ring-reset-quirk : enable ringacc / udma ring state interoperability
+		  issue software w/a
+
+Example:
+
+ringacc: ringacc@3c000000 {
+	compatible = "ti,am654-navss-ringacc";
+	reg =	<0x0 0x3c000000 0x0 0x400000>,
+		<0x0 0x38000000 0x0 0x400000>,
+		<0x0 0x31120000 0x0 0x100>,
+		<0x0 0x33000000 0x0 0x40000>;
+	reg-names = "rt", "fifos",
+		    "proxy_gcfg", "proxy_target";
+	ti,num-rings = <818>;
+	ti,sci-rm-range-gp-rings = <0x2>; /* GP ring range */
+	ti,dma-ring-reset-quirk;
+	ti,sci = <&dmsc>;
+	ti,sci-dev-id = <187>;
+	msi-parent = <&inta_main_udmass>;
+};
+
+client:
+
+dma_ipx: dma_ipx@<addr> {
+	...
+	ti,ringacc = <&ringacc>;
+	...
+}
diff --git a/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
index f541d1f776a2..6217e64309de 100644
--- a/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
+++ b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
@@ -12,7 +12,7 @@ PM Domain Node
 ==============
 The PM domain node represents the global PM domain managed by the PMMC, which
 in this case is the implementation as documented by the generic PM domain
-bindings in Documentation/devicetree/bindings/power/power_domain.txt.  Because
+bindings in Documentation/devicetree/bindings/power/power-domain.yaml.  Because
 this relies on the TI SCI protocol to communicate with the PMMC it must be a
 child of the pmmc node.
 
diff --git a/Documentation/devicetree/bindings/sound/adi,adau7118.yaml b/Documentation/devicetree/bindings/sound/adi,adau7118.yaml
new file mode 100644
index 000000000000..75e0cbe6be70
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/adi,adau7118.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/adi,adau7118.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+
+title: Analog Devices ADAU7118 8 Channel PDM to I2S/TDM Converter
+
+maintainers:
+  - Nuno Sá <nuno.sa@analog.com>
+
+description: |
+  Analog Devices ADAU7118 8 Channel PDM to I2S/TDM Converter over I2C or HW
+  standalone mode.
+  https://www.analog.com/media/en/technical-documentation/data-sheets/ADAU7118.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,adau7118
+
+  reg:
+    maxItems: 1
+
+  "#sound-dai-cells":
+    const: 0
+
+  iovdd-supply:
+    description: Digital Input/Output Power Supply.
+
+  dvdd-supply:
+    description: Internal Core Digital Power Supply.
+
+  adi,decimation-ratio:
+    description: |
+      This property set's the decimation ratio of PDM to PCM audio data.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum: [64, 32, 16]
+        default: 64
+
+  adi,pdm-clk-map:
+    description: |
+      The ADAU7118 has two PDM clocks for the four Inputs. Each input must be
+      assigned to one of these two clocks. This property set's the mapping
+      between the clocks and the inputs.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+      - minItems: 4
+        maxItems: 4
+        items:
+          maximum: 1
+        default: [0, 0, 1, 1]
+
+required:
+  - "#sound-dai-cells"
+  - compatible
+  - iovdd-supply
+  - dvdd-supply
+
+examples:
+  - |
+    i2c {
+        /* example with i2c support */
+        #address-cells = <1>;
+        #size-cells = <0>;
+        adau7118_codec: audio-codec@14 {
+                compatible = "adi,adau7118";
+                reg = <0x14>;
+                #sound-dai-cells = <0>;
+                iovdd-supply = <&supply>;
+                dvdd-supply = <&supply>;
+                adi,pdm-clk-map = <1 1 0 0>;
+                adi,decimation-ratio = <16>;
+        };
+    };
+
+    /* example with hw standalone mode */
+    adau7118_codec_hw: adau7118-codec-hw {
+            compatible = "adi,adau7118";
+            #sound-dai-cells = <0>;
+            iovdd-supply = <&supply>;
+            dvdd-supply = <&supply>;
+    };
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml
new file mode 100644
index 000000000000..ea1d2efb2aaa
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml
@@ -0,0 +1,267 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/allwinner,sun4i-a10-codec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Codec Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  "#sound-dai-cells":
+    const: 0
+
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-codec
+      - allwinner,sun6i-a31-codec
+      - allwinner,sun7i-a20-codec
+      - allwinner,sun8i-a23-codec
+      - allwinner,sun8i-h3-codec
+      - allwinner,sun8i-v3s-codec
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+
+  clock-names:
+    items:
+      - const: apb
+      - const: codec
+
+  dmas:
+    items:
+      - description: RX DMA Channel
+      - description: TX DMA Channel
+
+  dma-names:
+    items:
+      - const: rx
+      - const: tx
+
+  resets:
+    maxItems: 1
+
+  allwinner,audio-routing:
+    description: |-
+      A list of the connections between audio components.  Each entry
+      is a pair of strings, the first being the connection's sink, the
+      second being the connection's source.
+    allOf:
+      - $ref: /schemas/types.yaml#definitions/non-unique-string-array
+      - minItems: 2
+        maxItems: 18
+        items:
+          enum:
+            # Audio Pins on the SoC
+            - HP
+            - HPCOM
+            - LINEIN
+            - LINEOUT
+            - MIC1
+            - MIC2
+            - MIC3
+
+            # Microphone Biases from the SoC
+            - HBIAS
+            - MBIAS
+
+            # Board Connectors
+            - Headphone
+            - Headset Mic
+            - Line In
+            - Line Out
+            - Mic
+            - Speaker
+
+  allwinner,codec-analog-controls:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: Phandle to the codec analog controls in the PRCM
+
+  allwinner,pa-gpios:
+    description: GPIO to enable the external amplifier
+
+required:
+  - "#sound-dai-cells"
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - dmas
+  - dma-names
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          enum:
+            - allwinner,sun6i-a31-codec
+            - allwinner,sun8i-a23-codec
+            - allwinner,sun8i-h3-codec
+            - allwinner,sun8i-v3s-codec
+
+    then:
+      if:
+        properties:
+          compatible:
+            const: allwinner,sun6i-a31-codec
+
+      then:
+        required:
+          - resets
+          - allwinner,audio-routing
+
+      else:
+        required:
+          - resets
+          - allwinner,audio-routing
+          - allwinner,codec-analog-controls
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - allwinner,sun6i-a31-codec
+
+    then:
+      properties:
+        allwinner,audio-routing:
+          items:
+            enum:
+              - HP
+              - HPCOM
+              - LINEIN
+              - LINEOUT
+              - MIC1
+              - MIC2
+              - MIC3
+              - HBIAS
+              - MBIAS
+              - Headphone
+              - Headset Mic
+              - Line In
+              - Line Out
+              - Mic
+              - Speaker
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - allwinner,sun8i-a23-codec
+
+    then:
+      properties:
+        allwinner,audio-routing:
+          items:
+            enum:
+              - HP
+              - HPCOM
+              - LINEIN
+              - MIC1
+              - MIC2
+              - HBIAS
+              - MBIAS
+              - Headphone
+              - Headset Mic
+              - Line In
+              - Line Out
+              - Mic
+              - Speaker
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - allwinner,sun8i-h3-codec
+
+    then:
+      properties:
+        allwinner,audio-routing:
+          items:
+            enum:
+              - HP
+              - HPCOM
+              - LINEIN
+              - LINEOUT
+              - MIC1
+              - MIC2
+              - HBIAS
+              - MBIAS
+              - Headphone
+              - Headset Mic
+              - Line In
+              - Line Out
+              - Mic
+              - Speaker
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - allwinner,sun8i-v3s-codec
+
+    then:
+      properties:
+        allwinner,audio-routing:
+          items:
+            enum:
+              - HP
+              - HPCOM
+              - MIC1
+              - HBIAS
+              - Headphone
+              - Headset Mic
+              - Line In
+              - Line Out
+              - Mic
+              - Speaker
+
+additionalProperties: false
+
+examples:
+  - |
+    codec@1c22c00 {
+        #sound-dai-cells = <0>;
+        compatible = "allwinner,sun7i-a20-codec";
+        reg = <0x01c22c00 0x40>;
+        interrupts = <0 30 4>;
+        clocks = <&apb0_gates 0>, <&codec_clk>;
+        clock-names = "apb", "codec";
+        dmas = <&dma 0 19>, <&dma 0 19>;
+        dma-names = "rx", "tx";
+    };
+
+  - |
+    codec@1c22c00 {
+        #sound-dai-cells = <0>;
+        compatible = "allwinner,sun6i-a31-codec";
+        reg = <0x01c22c00 0x98>;
+        interrupts = <0 29 4>;
+        clocks = <&ccu 61>, <&ccu 135>;
+        clock-names = "apb", "codec";
+        resets = <&ccu 42>;
+        dmas = <&dma 15>, <&dma 15>;
+        dma-names = "rx", "tx";
+        allwinner,audio-routing =
+            "Headphone", "HP",
+            "Speaker", "LINEOUT",
+            "LINEIN", "Line In",
+            "MIC1", "MBIAS",
+            "MIC1", "Mic",
+            "MIC2", "HBIAS",
+            "MIC2", "Headset Mic";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
index eb3992138eec..112ae00d63c1 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 I2S Controller Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#sound-dai-cells":
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
index 38d4cede0860..444a432912bb 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
@@ -10,7 +10,7 @@ maintainers:
   - Chen-Yu Tsai <wens@csie.org>
   - Liam Girdwood <lgirdwood@gmail.com>
   - Mark Brown <broonie@kernel.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#sound-dai-cells":
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun50i-a64-codec-analog.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun50i-a64-codec-analog.yaml
index f290eb72a878..3b764415c9ab 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun50i-a64-codec-analog.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun50i-a64-codec-analog.yaml
@@ -8,7 +8,7 @@ title: Allwinner A64 Analog Codec Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun8i-a23-codec-analog.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun8i-a23-codec-analog.yaml
new file mode 100644
index 000000000000..9718358826ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun8i-a23-codec-analog.yaml
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/allwinner,sun8i-a23-codec-analog.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A23 Analog Codec Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  compatible:
+    enum:
+      # FIXME: This is documented in the PRCM binding, but needs to be
+      # migrated here at some point
+      # - allwinner,sun8i-a23-codec-analog
+      - allwinner,sun8i-h3-codec-analog
+      - allwinner,sun8i-v3s-codec-analog
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    codec_analog: codec-analog@1f015c0 {
+      compatible = "allwinner,sun8i-h3-codec-analog";
+      reg = <0x01f015c0 0x4>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun8i-a33-codec.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun8i-a33-codec.yaml
index 5e7cc05bbff1..55d28268d2f4 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun8i-a33-codec.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun8i-a33-codec.yaml
@@ -8,7 +8,7 @@ title: Allwinner A33 Codec Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#sound-dai-cells":
diff --git a/Documentation/devicetree/bindings/sound/amlogic,axg-fifo.txt b/Documentation/devicetree/bindings/sound/amlogic,axg-fifo.txt
index 3080979350a0..fa4545ed81ca 100644
--- a/Documentation/devicetree/bindings/sound/amlogic,axg-fifo.txt
+++ b/Documentation/devicetree/bindings/sound/amlogic,axg-fifo.txt
@@ -17,6 +17,9 @@ Required properties:
   * "arb" : memory ARB line (required)
   * "rst" : dedicated device reset line (optional)
 - #sound-dai-cells: must be 0.
+- amlogic,fifo-depth: The size of the controller's fifo in bytes. This
+  		      is useful for determining certain configuration such
+		      as the flush threshold of the fifo
 
 Example of FRDDR A on the A113 SoC:
 
@@ -27,4 +30,5 @@ frddr_a: audio-controller@1c0 {
 	interrupts = <GIC_SPI 88 IRQ_TYPE_EDGE_RISING>;
 	clocks = <&clkc_audio AUD_CLKID_FRDDR_A>;
 	resets = <&arb AXG_ARB_FRDDR_A>;
+	fifo-depth = <512>;
 };
diff --git a/Documentation/devicetree/bindings/sound/arndale.txt b/Documentation/devicetree/bindings/sound/arndale.txt
index 0e76946385ae..17530120ccfc 100644
--- a/Documentation/devicetree/bindings/sound/arndale.txt
+++ b/Documentation/devicetree/bindings/sound/arndale.txt
@@ -1,8 +1,9 @@
 Audio Binding for Arndale boards
 
 Required properties:
-- compatible : Can be the following,
-			"samsung,arndale-rt5631"
+- compatible : Can be one of the following:
+		"samsung,arndale-rt5631",
+		"samsung,arndale-wm1811"
 
 - samsung,audio-cpu: The phandle of the Samsung I2S controller
 - samsung,audio-codec: The phandle of the audio codec
diff --git a/Documentation/devicetree/bindings/sound/fsl,asrc.txt b/Documentation/devicetree/bindings/sound/fsl,asrc.txt
index 1d4d9f938689..cb9a25165503 100644
--- a/Documentation/devicetree/bindings/sound/fsl,asrc.txt
+++ b/Documentation/devicetree/bindings/sound/fsl,asrc.txt
@@ -8,7 +8,12 @@ three substreams within totally 10 channels.
 
 Required properties:
 
-  - compatible		: Contains "fsl,imx35-asrc" or "fsl,imx53-asrc".
+  - compatible		: Compatible list, should contain one of the following
+			  compatibles:
+			  "fsl,imx35-asrc",
+			  "fsl,imx53-asrc",
+			  "fsl,imx8qm-asrc",
+			  "fsl,imx8qxp-asrc",
 
   - reg			: Offset and length of the register set for the device.
 
@@ -35,6 +40,11 @@ Required properties:
 
    - fsl,asrc-width	: Defines a mutual sample width used by DPCM Back Ends.
 
+   - fsl,asrc-clk-map   : Defines clock map used in driver. which is required
+			  by imx8qm/imx8qxp platform
+			  <0> - select the map for asrc0 in imx8qm/imx8qxp
+			  <1> - select the map for asrc1 in imx8qm/imx8qxp
+
 Optional properties:
 
    - big-endian		: If this property is absent, the little endian mode
diff --git a/Documentation/devicetree/bindings/sound/fsl,mqs.txt b/Documentation/devicetree/bindings/sound/fsl,mqs.txt
new file mode 100644
index 000000000000..40353fc30255
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/fsl,mqs.txt
@@ -0,0 +1,36 @@
+fsl,mqs audio CODEC
+
+Required properties:
+  - compatible : Must contain one of "fsl,imx6sx-mqs", "fsl,codec-mqs"
+		"fsl,imx8qm-mqs", "fsl,imx8qxp-mqs".
+  - clocks : A list of phandles + clock-specifiers, one for each entry in
+	     clock-names
+  - clock-names : "mclk" - must required.
+		  "core" - required if compatible is "fsl,imx8qm-mqs", it
+		           is for register access.
+  - gpr : A phandle of General Purpose Registers in IOMUX Controller.
+	  Required if compatible is "fsl,imx6sx-mqs".
+
+Required if compatible is "fsl,imx8qm-mqs":
+  - power-domains: A phandle of PM domain provider node.
+  - reg: Offset and length of the register set for the device.
+
+Example:
+
+mqs: mqs {
+	compatible = "fsl,imx6sx-mqs";
+	gpr = <&gpr>;
+	clocks = <&clks IMX6SX_CLK_SAI1>;
+	clock-names = "mclk";
+	status = "disabled";
+};
+
+mqs: mqs@59850000 {
+	compatible = "fsl,imx8qm-mqs";
+	reg = <0x59850000 0x10000>;
+	clocks = <&clk IMX8QM_AUD_MQS_IPG>,
+		 <&clk IMX8QM_AUD_MQS_HMCLK>;
+	clock-names = "core", "mclk";
+	power-domains = <&pd_mqs0>;
+	status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.txt b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.txt
index 1084f7f22eea..8ca52dcc5572 100644
--- a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.txt
+++ b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.txt
@@ -1,4 +1,4 @@
-* Audio codec controlled by ChromeOS EC
+Audio codec controlled by ChromeOS EC
 
 Google's ChromeOS EC codec is a digital mic codec provided by the
 Embedded Controller (EC) and is controlled via a host-command interface.
@@ -9,10 +9,27 @@ Documentation/devicetree/bindings/mfd/cros-ec.txt).
 Required properties:
 - compatible: Must contain "google,cros-ec-codec"
 - #sound-dai-cells: Should be 1. The cell specifies number of DAIs.
-- max-dmic-gain: A number for maximum gain in dB on digital microphone.
+
+Optional properties:
+- reg: Pysical base address and length of shared memory region from EC.
+       It contains 3 unsigned 32-bit integer.  The first 2 integers
+       combine to become an unsigned 64-bit physical address.  The last
+       one integer is length of the shared memory.
+- memory-region: Shared memory region to EC.  A "shared-dma-pool".  See
+                 ../reserved-memory/reserved-memory.txt for details.
 
 Example:
 
+{
+	...
+
+	reserved_mem: reserved_mem {
+		compatible = "shared-dma-pool";
+		reg = <0 0x52800000 0 0x100000>;
+		no-map;
+	};
+}
+
 cros-ec@0 {
 	compatible = "google,cros-ec-spi";
 
@@ -21,6 +38,7 @@ cros-ec@0 {
 	cros_ec_codec: ec-codec {
 		compatible = "google,cros-ec-codec";
 		#sound-dai-cells = <1>;
-		max-dmic-gain = <43>;
+		reg = <0x0 0x10500000 0x80000>;
+		memory-region = <&reserved_mem>;
 	};
 };
diff --git a/Documentation/devicetree/bindings/sound/gtm601.txt b/Documentation/devicetree/bindings/sound/gtm601.txt
index 5efc8c068de0..efa32a486c4a 100644
--- a/Documentation/devicetree/bindings/sound/gtm601.txt
+++ b/Documentation/devicetree/bindings/sound/gtm601.txt
@@ -1,10 +1,16 @@
 GTM601 UMTS modem audio interface CODEC
 
-This device has no configuration interface. Sample rate is fixed - 8kHz.
+This device has no configuration interface. The sample rate and channels are
+based on the compatible string
+	"option,gtm601" = 8kHz mono
+	"broadmobi,bm818" = 48KHz stereo
 
 Required properties:
 
-  - compatible : "option,gtm601"
+  - compatible : one of
+	"option,gtm601"
+	"broadmobi,bm818"
+
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/sound/ingenic,codec.yaml b/Documentation/devicetree/bindings/sound/ingenic,codec.yaml
new file mode 100644
index 000000000000..eb4be86464bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ingenic,codec.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/ingenic,codec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ingenic JZ47xx internal codec DT bindings
+
+maintainers:
+  - Paul Cercueil <paul@crapouillou.net>
+
+properties:
+  $nodename:
+    pattern: '^audio-codec@.*'
+
+  compatible:
+    oneOf:
+      - const: ingenic,jz4770-codec
+      - const: ingenic,jz4725b-codec
+      - const: ingenic,jz4740-codec
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: aic
+
+  '#sound-dai-cells':
+    const: 0
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - '#sound-dai-cells'
+
+examples:
+  - |
+    #include <dt-bindings/clock/jz4740-cgu.h>
+    codec: audio-codec@10020080 {
+      compatible = "ingenic,jz4740-codec";
+      reg = <0x10020080 0x8>;
+      #sound-dai-cells = <0>;
+      clocks = <&cgu JZ4740_CLK_AIC>;
+      clock-names = "aic";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/sound/ingenic,jz4725b-codec.txt b/Documentation/devicetree/bindings/sound/ingenic,jz4725b-codec.txt
deleted file mode 100644
index 05adc0d47b13..000000000000
--- a/Documentation/devicetree/bindings/sound/ingenic,jz4725b-codec.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-Ingenic JZ4725B codec controller
-
-Required properties:
-- compatible : "ingenic,jz4725b-codec"
-- reg : codec registers location and length
-- clocks : phandle to the AIC clock.
-- clock-names: must be set to "aic".
-- #sound-dai-cells: Must be set to 0.
-
-Example:
-
-codec: audio-codec@100200a4 {
-	compatible = "ingenic,jz4725b-codec";
-	reg = <0x100200a4 0x8>;
-
-	#sound-dai-cells = <0>;
-
-	clocks = <&cgu JZ4725B_CLK_AIC>;
-	clock-names = "aic";
-};
diff --git a/Documentation/devicetree/bindings/sound/ingenic,jz4740-codec.txt b/Documentation/devicetree/bindings/sound/ingenic,jz4740-codec.txt
deleted file mode 100644
index 1ffcade87e7b..000000000000
--- a/Documentation/devicetree/bindings/sound/ingenic,jz4740-codec.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-Ingenic JZ4740 codec controller
-
-Required properties:
-- compatible : "ingenic,jz4740-codec"
-- reg : codec registers location and length
-- clocks : phandle to the AIC clock.
-- clock-names: must be set to "aic".
-- #sound-dai-cells: Must be set to 0.
-
-Example:
-
-codec: audio-codec@10020080 {
-	compatible = "ingenic,jz4740-codec";
-	reg = <0x10020080 0x8>;
-
-	#sound-dai-cells = <0>;
-
-	clocks = <&cgu JZ4740_CLK_AIC>;
-	clock-names = "aic";
-};
diff --git a/Documentation/devicetree/bindings/sound/mt8183-afe-pcm.txt b/Documentation/devicetree/bindings/sound/mt8183-afe-pcm.txt
index 396ba38619f6..1f1cba4152ce 100644
--- a/Documentation/devicetree/bindings/sound/mt8183-afe-pcm.txt
+++ b/Documentation/devicetree/bindings/sound/mt8183-afe-pcm.txt
@@ -4,6 +4,10 @@ Required properties:
 - compatible = "mediatek,mt68183-audio";
 - reg: register location and size
 - interrupts: should contain AFE interrupt
+- resets: Must contain an entry for each entry in reset-names
+  See ../reset/reset.txt for details.
+- reset-names: should have these reset names:
+		"audiosys";
 - power-domains: should define the power domain
 - clocks: Must contain an entry for each entry in clock-names
 - clock-names: should have these clock names:
@@ -20,6 +24,8 @@ Example:
 		compatible = "mediatek,mt8183-audio";
 		reg = <0 0x11220000 0 0x1000>;
 		interrupts = <GIC_SPI 161 IRQ_TYPE_LEVEL_LOW>;
+		resets = <&watchdog MT8183_TOPRGU_AUDIO_SW_RST>;
+		reset-names = "audiosys";
 		power-domains = <&scpsys MT8183_POWER_DOMAIN_AUDIO>;
 		clocks = <&infrasys CLK_INFRA_AUDIO>,
 			 <&infrasys CLK_INFRA_AUDIO_26M_BCLK>,
diff --git a/Documentation/devicetree/bindings/sound/mt8183-mt6358-ts3a227-max98357.txt b/Documentation/devicetree/bindings/sound/mt8183-mt6358-ts3a227-max98357.txt
index d6d5207fa996..decaa013a07e 100644
--- a/Documentation/devicetree/bindings/sound/mt8183-mt6358-ts3a227-max98357.txt
+++ b/Documentation/devicetree/bindings/sound/mt8183-mt6358-ts3a227-max98357.txt
@@ -2,14 +2,19 @@ MT8183 with MT6358, TS3A227 and MAX98357 CODECS
 
 Required properties:
 - compatible : "mediatek,mt8183_mt6358_ts3a227_max98357"
-- mediatek,headset-codec: the phandles of ts3a227 codecs
 - mediatek,platform: the phandle of MT8183 ASoC platform
 
+Optional properties:
+- mediatek,headset-codec: the phandles of ts3a227 codecs
+- mediatek,ec-codec: the phandle of EC codecs.
+                     See google,cros-ec-codec.txt for more details.
+
 Example:
 
 	sound {
 		compatible = "mediatek,mt8183_mt6358_ts3a227_max98357";
 		mediatek,headset-codec = <&ts3a227>;
+		mediatek,ec-codec = <&ec_codec>;
 		mediatek,platform = <&afe>;
 	};
 
diff --git a/Documentation/devicetree/bindings/sound/qcom,sdm845.txt b/Documentation/devicetree/bindings/sound/qcom,sdm845.txt
index 408c4837e6d5..ca8c89e88bfa 100644
--- a/Documentation/devicetree/bindings/sound/qcom,sdm845.txt
+++ b/Documentation/devicetree/bindings/sound/qcom,sdm845.txt
@@ -5,7 +5,10 @@ This binding describes the SDM845 sound card, which uses qdsp for audio.
 - compatible:
 	Usage: required
 	Value type: <stringlist>
-	Definition: must be "qcom,sdm845-sndcard"
+	Definition: must be one of this
+			"qcom,sdm845-sndcard"
+			"qcom,db845c-sndcard"
+			"lenovo,yoga-c630-sndcard"
 
 - audio-routing:
 	Usage: Optional
diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml
new file mode 100644
index 000000000000..38eaf0c028f9
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml
@@ -0,0 +1,175 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/qcom,wcd934x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bindings for Qualcomm WCD9340/WCD9341 Audio Codec
+
+maintainers:
+  - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description: |
+  Qualcomm WCD9340/WCD9341 Codec is a standalone Hi-Fi audio codec IC.
+  It has in-built Soundwire controller, pin controller, interrupt mux and
+  supports both I2S/I2C and SLIMbus audio interfaces.
+
+properties:
+  compatible:
+    const: slim217,250
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  reset-gpios:
+    description: GPIO spec for reset line to use
+    maxItems: 1
+
+  slim-ifc-dev: true
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    const: extclk
+
+  vdd-buck-supply:
+    description: A reference to the 1.8V buck supply
+
+  vdd-buck-sido-supply:
+    description: A reference to the 1.8V SIDO buck supply
+
+  vdd-rx-supply:
+    description: A reference to the 1.8V rx supply
+
+  vdd-tx-supply:
+    description: A reference to the 1.8V tx supply
+
+  vdd-vbat-supply:
+    description: A reference to the vbat supply
+
+  vdd-io-supply:
+    description: A reference to the 1.8V I/O supply
+
+  vdd-micbias-supply:
+    description: A reference to the micbias supply
+
+  qcom,micbias1-microvolt:
+    description: micbias1 voltage
+    minimum: 1800000
+    maximum: 2850000
+
+  qcom,micbias2-microvolt:
+    description: micbias2 voltage
+    minimum: 1800000
+    maximum: 2850000
+
+  qcom,micbias3-microvolt:
+    description: micbias3 voltage
+    minimum: 1800000
+    maximum: 2850000
+
+  qcom,micbias4-microvolt:
+    description: micbias4 voltage
+    minimum: 1800000
+    maximum: 2850000
+
+  clock-output-names:
+    const: mclk
+
+  clock-frequency:
+    description: Clock frequency of output clk in Hz
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 1
+
+  '#clock-cells':
+    const: 0
+
+  '#sound-dai-cells':
+    const: 1
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 1
+
+  gpio@42:
+    type: object
+    allOf:
+      - $ref: ../gpio/qcom,wcd934x-gpio.yaml#
+
+patternProperties:
+  "^.*@[0-9a-f]+$":
+    type: object
+    description: |
+      WCD934x subnode for each slave devices. Bindings of each subnodes
+      depends on the specific driver providing the functionality and
+      documented in their respective bindings.
+
+    properties:
+      reg:
+        maxItems: 1
+
+    required:
+      - reg
+
+required:
+  - compatible
+  - reg
+  - reset-gpios
+  - slim-ifc-dev
+  - interrupts
+  - interrupt-controller
+  - clock-frequency
+  - clock-output-names
+  - qcom,micbias1-microvolt
+  - qcom,micbias2-microvolt
+  - qcom,micbias3-microvolt
+  - qcom,micbias4-microvolt
+  - "#interrupt-cells"
+  - "#clock-cells"
+  - "#sound-dai-cells"
+  - "#address-cells"
+  - "#size-cells"
+
+examples:
+  - |
+    codec@1,0{
+        compatible = "slim217,250";
+        reg  = <1 0>;
+        reset-gpios = <&tlmm 64 0>;
+        slim-ifc-dev  = <&wcd9340_ifd>;
+        #sound-dai-cells = <1>;
+        interrupt-parent = <&tlmm>;
+        interrupts = <54 4>;
+        interrupt-controller;
+        #interrupt-cells = <1>;
+        #clock-cells = <0>;
+        clock-frequency = <9600000>;
+        clock-output-names = "mclk";
+        qcom,micbias1-microvolt = <1800000>;
+        qcom,micbias2-microvolt = <1800000>;
+        qcom,micbias3-microvolt = <1800000>;
+        qcom,micbias4-microvolt = <1800000>;
+        clock-names = "extclk";
+        clocks = <&rpmhcc 2>;
+
+        #address-cells = <1>;
+        #size-cells = <1>;
+
+        gpio@42 {
+            compatible = "qcom,wcd9340-gpio";
+            reg = <0x42 0x2>;
+            gpio-controller;
+            #gpio-cells = <2>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml b/Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml
new file mode 100644
index 000000000000..ea44d03e58ca
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/qcom,wsa881x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bindings for Qualcomm WSA8810/WSA8815 Class-D Smart Speaker Amplifier
+
+maintainers:
+  - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description: |
+  WSA8810 is a class-D smart speaker amplifier and WSA8815
+  is a high-output power class-D smart speaker amplifier.
+  Their primary operating mode uses a SoundWire digital audio
+  interface. This binding is for SoundWire interface.
+
+properties:
+  compatible:
+    const: sdw10217201000
+
+  reg:
+    maxItems: 1
+
+  powerdown-gpios:
+    description: GPIO spec for Powerdown/Shutdown line to use
+    maxItems: 1
+
+  '#thermal-sensor-cells':
+    const: 0
+
+  '#sound-dai-cells':
+    const: 0
+
+required:
+  - compatible
+  - reg
+  - powerdown-gpios
+  - "#thermal-sensor-cells"
+  - "#sound-dai-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    soundwire@c2d0000 {
+        #address-cells = <2>;
+        #size-cells = <0>;
+        reg = <0x0c2d0000 0x2000>;
+
+        speaker@0,1 {
+            compatible = "sdw10217201000";
+            reg = <0 1>;
+            powerdown-gpios = <&wcdpinctrl 2 0>;
+            #thermal-sensor-cells = <0>;
+            #sound-dai-cells = <0>;
+        };
+
+        speaker@0,2 {
+            compatible = "sdw10217201000";
+            reg = <0 2>;
+            powerdown-gpios = <&wcdpinctrl 2 0>;
+            #thermal-sensor-cells = <0>;
+            #sound-dai-cells = <0>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/sound/renesas,fsi.txt b/Documentation/devicetree/bindings/sound/renesas,fsi.txt
deleted file mode 100644
index 0cf0f819b823..000000000000
--- a/Documentation/devicetree/bindings/sound/renesas,fsi.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Renesas FSI
-
-Required properties:
-- compatible			: "renesas,fsi2-<soctype>",
-				  "renesas,sh_fsi2" or "renesas,sh_fsi" as
-				  fallback.
-				  Examples with soctypes are:
-				    - "renesas,fsi2-r8a7740" (R-Mobile A1)
-				    - "renesas,fsi2-sh73a0" (SH-Mobile AG5)
-- reg				: Should contain the register physical address and length
-- interrupts			: Should contain FSI interrupt
-
-- fsia,spdif-connection		: FSI is connected by S/PDIF
-- fsia,stream-mode-support	: FSI supports 16bit stream mode.
-- fsia,use-internal-clock	: FSI uses internal clock when master mode.
-
-- fsib,spdif-connection		: same as fsia
-- fsib,stream-mode-support	: same as fsia
-- fsib,use-internal-clock	: same as fsia
-
-Example:
-
-sh_fsi2: sh_fsi2@ec230000 {
-	compatible = "renesas,sh_fsi2";
-	reg = <0xec230000 0x400>;
-	interrupts = <0 146 0x4>;
-
-	fsia,spdif-connection;
-	fsia,stream-mode-support;
-	fsia,use-internal-clock;
-};
diff --git a/Documentation/devicetree/bindings/sound/renesas,fsi.yaml b/Documentation/devicetree/bindings/sound/renesas,fsi.yaml
new file mode 100644
index 000000000000..140a37fc3c0b
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/renesas,fsi.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/renesas,fsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas FSI Sound Driver Device Tree Bindings
+
+maintainers:
+  - Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+
+properties:
+  $nodename:
+    pattern: "^sound@.*"
+
+  compatible:
+    oneOf:
+      # for FSI2 SoC
+      - items:
+        - enum:
+          - renesas,fsi2-sh73a0
+          - renesas,fsi2-r8a7740
+        - enum:
+          - renesas,sh_fsi2
+      # for Generic
+      - items:
+        - enum:
+          - renesas,sh_fsi
+          - renesas,sh_fsi2
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  fsia,spdif-connection:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: FSI is connected by S/PDIF
+
+  fsia,stream-mode-support:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: FSI supports 16bit stream mode
+
+  fsia,use-internal-clock:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: FSI uses internal clock when master mode
+
+  fsib,spdif-connection:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: same as fsia
+
+  fsib,stream-mode-support:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: same as fsia
+
+  fsib,use-internal-clock:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: same as fsia
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+examples:
+  - |
+    sh_fsi2: sound@ec230000 {
+            compatible = "renesas,fsi2-r8a7740", "renesas,sh_fsi2";
+            reg = <0xec230000 0x400>;
+            interrupts = <0 146 0x4>;
+
+            fsia,spdif-connection;
+            fsia,stream-mode-support;
+            fsia,use-internal-clock;
+    };
diff --git a/Documentation/devicetree/bindings/sound/renesas,rsnd.txt b/Documentation/devicetree/bindings/sound/renesas,rsnd.txt
index 5c52182f7dcf..797fd035434c 100644
--- a/Documentation/devicetree/bindings/sound/renesas,rsnd.txt
+++ b/Documentation/devicetree/bindings/sound/renesas,rsnd.txt
@@ -268,6 +268,7 @@ Required properties:
 				    - "renesas,rcar_sound-r8a7745" (RZ/G1E)
 				    - "renesas,rcar_sound-r8a77470" (RZ/G1C)
 				    - "renesas,rcar_sound-r8a774a1" (RZ/G2M)
+				    - "renesas,rcar_sound-r8a774b1" (RZ/G2N)
 				    - "renesas,rcar_sound-r8a774c0" (RZ/G2E)
 				    - "renesas,rcar_sound-r8a7778" (R-Car M1A)
 				    - "renesas,rcar_sound-r8a7779" (R-Car H1)
diff --git a/Documentation/devicetree/bindings/sound/rockchip-max98090.txt b/Documentation/devicetree/bindings/sound/rockchip-max98090.txt
index a805aa99ad75..e9c58b204399 100644
--- a/Documentation/devicetree/bindings/sound/rockchip-max98090.txt
+++ b/Documentation/devicetree/bindings/sound/rockchip-max98090.txt
@@ -5,15 +5,38 @@ Required properties:
 - rockchip,model: The user-visible name of this sound complex
 - rockchip,i2s-controller: The phandle of the Rockchip I2S controller that's
   connected to the CODEC
-- rockchip,audio-codec: The phandle of the MAX98090 audio codec
-- rockchip,headset-codec: The phandle of Ext chip for jack detection
+
+Optional properties:
+- rockchip,audio-codec: The phandle of the MAX98090 audio codec.
+- rockchip,headset-codec: The phandle of Ext chip for jack detection. This is
+                          required if there is rockchip,audio-codec.
+- rockchip,hdmi-codec: The phandle of HDMI device for HDMI codec.
 
 Example:
 
+/* For max98090-only board. */
+sound {
+	compatible = "rockchip,rockchip-audio-max98090";
+	rockchip,model = "ROCKCHIP-I2S";
+	rockchip,i2s-controller = <&i2s>;
+	rockchip,audio-codec = <&max98090>;
+	rockchip,headset-codec = <&headsetcodec>;
+};
+
+/* For HDMI-only board. */
+sound {
+	compatible = "rockchip,rockchip-audio-max98090";
+	rockchip,model = "ROCKCHIP-I2S";
+	rockchip,i2s-controller = <&i2s>;
+	rockchip,hdmi-codec = <&hdmi>;
+};
+
+/* For max98090 plus HDMI board. */
 sound {
 	compatible = "rockchip,rockchip-audio-max98090";
 	rockchip,model = "ROCKCHIP-I2S";
 	rockchip,i2s-controller = <&i2s>;
 	rockchip,audio-codec = <&max98090>;
 	rockchip,headset-codec = <&headsetcodec>;
+	rockchip,hdmi-codec = <&hdmi>;
 };
diff --git a/Documentation/devicetree/bindings/sound/rt1011.txt b/Documentation/devicetree/bindings/sound/rt1011.txt
index 35a23e60d679..02d53b9aa247 100644
--- a/Documentation/devicetree/bindings/sound/rt1011.txt
+++ b/Documentation/devicetree/bindings/sound/rt1011.txt
@@ -20,6 +20,14 @@ Required properties:
         |     1     |    1     |   0x3b     |
         -------------------------------------
 
+Optional properties:
+
+- realtek,temperature_calib
+  u32. The temperature was measured while doing the calibration. Units: Celsius degree
+
+- realtek,r0_calib
+  u32. This is r0 calibration data which was measured in factory mode.
+
 Pins on the device (for linking into audio routes) for RT1011:
 
   * SPO
@@ -29,4 +37,6 @@ Example:
 rt1011: codec@38 {
 	compatible = "realtek,rt1011";
 	reg = <0x38>;
+	realtek,temperature_calib = <25>;
+	realtek,r0_calib = <0x224050>;
 };
diff --git a/Documentation/devicetree/bindings/sound/rt1015.txt b/Documentation/devicetree/bindings/sound/rt1015.txt
new file mode 100644
index 000000000000..fcfd02d8d32f
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/rt1015.txt
@@ -0,0 +1,17 @@
+RT1015 Mono Class D Audio Amplifier
+
+This device supports I2C only.
+
+Required properties:
+
+- compatible : "realtek,rt1015".
+
+- reg : The I2C address of the device.
+
+
+Example:
+
+rt1015: codec@28 {
+	compatible = "realtek,rt1015";
+	reg = <0x28>;
+};
diff --git a/Documentation/devicetree/bindings/sound/rt5645.txt b/Documentation/devicetree/bindings/sound/rt5645.txt
index a03f9a872a71..41a62fd2ae1f 100644
--- a/Documentation/devicetree/bindings/sound/rt5645.txt
+++ b/Documentation/devicetree/bindings/sound/rt5645.txt
@@ -10,6 +10,10 @@ Required properties:
 
 - interrupts : The CODEC's interrupt output.
 
+- avdd-supply: Power supply for AVDD, providing 1.8V.
+
+- cpvdd-supply: Power supply for CPVDD, providing 3.5V.
+
 Optional properties:
 
 - hp-detect-gpios:
diff --git a/Documentation/devicetree/bindings/sound/rt5682.txt b/Documentation/devicetree/bindings/sound/rt5682.txt
index 312e9a129530..30e927a28369 100644
--- a/Documentation/devicetree/bindings/sound/rt5682.txt
+++ b/Documentation/devicetree/bindings/sound/rt5682.txt
@@ -27,6 +27,11 @@ Optional properties:
 
 - realtek,ldo1-en-gpios : The GPIO that controls the CODEC's LDO1_EN pin.
 
+- realtek,btndet-delay
+  The debounce delay for push button.
+  The delay time is realtek,btndet-delay value multiple of 8.192 ms.
+  If absent, the default is 16.
+
 Pins on the device (for linking into audio routes) for RT5682:
 
   * DMIC L1
@@ -47,4 +52,5 @@ rt5682 {
 	realtek,dmic1-data-pin = <1>;
 	realtek,dmic1-clk-pin = <1>;
 	realtek,jd-src = <1>;
+	realtek,btndet-delay = <16>;
 };
diff --git a/Documentation/devicetree/bindings/sound/samsung,odroid.txt b/Documentation/devicetree/bindings/sound/samsung,odroid.txt
deleted file mode 100644
index e9da2200e173..000000000000
--- a/Documentation/devicetree/bindings/sound/samsung,odroid.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Samsung Exynos Odroid XU3/XU4 audio complex with MAX98090 codec
-
-Required properties:
-
- - compatible - "hardkernel,odroid-xu3-audio" - for Odroid XU3 board,
-		"hardkernel,odroid-xu4-audio" - for Odroid XU4 board (deprecated),
-		"samsung,odroid-xu3-audio" - for Odroid XU3 board (deprecated),
-		"samsung,odroid-xu4-audio" - for Odroid XU4 board (deprecated)
- - model - the user-visible name of this sound complex
- - clocks - should contain entries matching clock names in the clock-names
-    property
- - samsung,audio-widgets - this property specifies off-codec audio elements
-   like headphones or speakers, for details see widgets.txt
- - samsung,audio-routing - a list of the connections between audio
-   components;  each entry is a pair of strings, the first being the
-   connection's sink, the second being the connection's source;
-   valid names for sources and sinks are the MAX98090's pins (as
-   documented in its binding), and the jacks on the board
-
-   For Odroid X2:
-     "Headphone Jack", "Mic Jack", "DMIC"
-
-   For Odroid U3, XU3:
-     "Headphone Jack", "Speakers"
-
-   For Odroid XU4:
-     no entries
-
-Required sub-nodes:
-
- - 'cpu' subnode with a 'sound-dai' property containing the phandle of the I2S
-    controller
- - 'codec' subnode with a 'sound-dai' property containing list of phandles
-    to the CODEC nodes, first entry must be corresponding to the MAX98090
-    CODEC and the second entry must be the phandle of the HDMI IP block node
-
-Example:
-
-sound {
-	compatible = "hardkernel,odroid-xu3-audio";
-	model = "Odroid-XU3";
-	samsung,audio-routing =
-		"Headphone Jack", "HPL",
-		"Headphone Jack", "HPR",
-		"IN1", "Mic Jack",
-		"Mic Jack", "MICBIAS";
-
-	cpu {
-		sound-dai = <&i2s0 0>;
-	};
-	codec {
-		sound-dai = <&hdmi>, <&max98090>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/sound/samsung,odroid.yaml b/Documentation/devicetree/bindings/sound/samsung,odroid.yaml
new file mode 100644
index 000000000000..c6b244352d05
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/samsung,odroid.yaml
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/samsung,odroid.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos Odroid XU3/XU4 audio complex with MAX98090 codec
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+  - Sylwester Nawrocki <s.nawrocki@samsung.com>
+
+properties:
+  compatible:
+    oneOf:
+      - const: hardkernel,odroid-xu3-audio
+
+      - const: hardkernel,odroid-xu4-audio
+        deprecated: true
+
+      - const: samsung,odroid-xu3-audio
+        deprecated: true
+
+      - const: samsung,odroid-xu4-audio
+        deprecated: true
+
+  model:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: The user-visible name of this sound complex.
+
+  cpu:
+    type: object
+    properties:
+      sound-dai:
+        $ref: /schemas/types.yaml#/definitions/phandle-array
+        description: phandles to the I2S controllers
+
+  codec:
+    type: object
+    properties:
+      sound-dai:
+        $ref: /schemas/types.yaml#/definitions/phandle-array
+        description: |
+          List of phandles to the CODEC nodes,
+          first entry must be corresponding to the MAX98090 CODEC and
+          the second entry must be the phandle of the HDMI IP block node.
+
+  samsung,audio-routing:
+    $ref: /schemas/types.yaml#/definitions/non-unique-string-array
+    description: |
+      List of the connections between audio
+      components;  each entry is a pair of strings, the first being the
+      connection's sink, the second being the connection's source;
+      valid names for sources and sinks are the MAX98090's pins (as
+      documented in its binding), and the jacks on the board.
+      For Odroid X2: "Headphone Jack", "Mic Jack", "DMIC"
+      For Odroid U3, XU3: "Headphone Jack", "Speakers"
+      For Odroid XU4: no entries
+
+  samsung,audio-widgets:
+    $ref: /schemas/types.yaml#/definitions/non-unique-string-array
+    description: |
+      This property specifies off-codec audio elements
+      like headphones or speakers, for details see widgets.txt
+
+required:
+  - compatible
+  - model
+  - cpu
+  - codec
+
+examples:
+  - |
+    sound {
+        compatible = "hardkernel,odroid-xu3-audio";
+        model = "Odroid-XU3";
+        samsung,audio-routing =
+                "Headphone Jack", "HPL",
+                "Headphone Jack", "HPR",
+                "IN1", "Mic Jack",
+                "Mic Jack", "MICBIAS";
+
+        cpu {
+            sound-dai = <&i2s0 0>;
+        };
+
+        codec {
+            sound-dai = <&hdmi>, <&max98090>;
+        };
+    };
+
diff --git a/Documentation/devicetree/bindings/sound/samsung-i2s.txt b/Documentation/devicetree/bindings/sound/samsung-i2s.txt
deleted file mode 100644
index a88cb00fa096..000000000000
--- a/Documentation/devicetree/bindings/sound/samsung-i2s.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-* Samsung I2S controller
-
-Required SoC Specific Properties:
-
-- compatible : should be one of the following.
-   - samsung,s3c6410-i2s: for 8/16/24bit stereo I2S.
-   - samsung,s5pv210-i2s: for 8/16/24bit multichannel(5.1) I2S with
-     secondary fifo, s/w reset control and internal mux for root clk src.
-   - samsung,exynos5420-i2s: for 8/16/24bit multichannel(5.1) I2S for
-     playback, stereo channel capture, secondary fifo using internal
-     or external dma, s/w reset control, internal mux for root clk src
-     and 7.1 channel TDM support for playback. TDM (Time division multiplexing)
-     is to allow transfer of multiple channel audio data on single data line.
-   - samsung,exynos7-i2s: with all the available features of exynos5 i2s,
-     exynos7 I2S has 7.1 channel TDM support for capture, secondary fifo
-     with only external dma and more no.of root clk sampling frequencies.
-   - samsung,exynos7-i2s1: I2S1 on previous samsung platforms supports
-     stereo channels. exynos7 i2s1 upgraded to 5.1 multichannel with
-     slightly modified bit offsets.
-
-- reg: physical base address of the controller and length of memory mapped
-  region.
-- dmas: list of DMA controller phandle and DMA request line ordered pairs.
-- dma-names: identifier string for each DMA request line in the dmas property.
-  These strings correspond 1:1 with the ordered pairs in dmas.
-- clocks: Handle to iis clock and RCLK source clk.
-- clock-names:
-  i2s0 uses some base clocks from CMU and some are from audio subsystem internal
-  clock controller. The clock names for i2s0 should be "iis", "i2s_opclk0" and
-  "i2s_opclk1" as shown in the example below.
-  i2s1 and i2s2 uses clocks from CMU. The clock names for i2s1 and i2s2 should
-  be "iis" and "i2s_opclk0".
-  "iis" is the i2s bus clock and i2s_opclk0, i2s_opclk1 are sources of the root
-  clk. i2s0 has internal mux to select the source of root clk and i2s1 and i2s2
-  doesn't have any such mux.
-- #clock-cells: should be 1, this property must be present if the I2S device
-  is a clock provider in terms of the common clock bindings, described in
-  ../clock/clock-bindings.txt.
-- clock-output-names (deprecated): from the common clock bindings, names of
-  the CDCLK I2S output clocks, suggested values are "i2s_cdclk0", "i2s_cdclk1",
-  "i2s_cdclk3" for the I2S0, I2S1, I2S2 devices respectively.
-
-There are following clocks available at the I2S device nodes:
- CLK_I2S_CDCLK    - the CDCLK (CODECLKO) gate clock,
- CLK_I2S_RCLK_PSR - the RCLK prescaler divider clock (corresponding to the
-		    IISPSR register),
- CLK_I2S_RCLK_SRC - the RCLKSRC mux clock (corresponding to RCLKSRC bit in
-		    IISMOD register).
-
-Refer to the SoC datasheet for availability of the above clocks.
-The CLK_I2S_RCLK_PSR and CLK_I2S_RCLK_SRC clocks are usually only available
-in the IIS Multi Audio Interface.
-
-Note: Old DTs may not have the #clock-cells property and then not use the I2S
-node as a clock supplier.
-
-Optional SoC Specific Properties:
-
-- samsung,idma-addr: Internal DMA register base address of the audio
-  sub system(used in secondary sound source).
-- pinctrl-0: Should specify pin control groups used for this controller.
-- pinctrl-names: Should contain only one value - "default".
-- #sound-dai-cells: should be 1.
-
-
-Example:
-
-i2s0: i2s@3830000 {
-	compatible = "samsung,s5pv210-i2s";
-	reg = <0x03830000 0x100>;
-	dmas = <&pdma0 10
-		&pdma0 9
-		&pdma0 8>;
-	dma-names = "tx", "rx", "tx-sec";
-	clocks = <&clock_audss EXYNOS_I2S_BUS>,
-		<&clock_audss EXYNOS_I2S_BUS>,
-		<&clock_audss EXYNOS_SCLK_I2S>;
-	clock-names = "iis", "i2s_opclk0", "i2s_opclk1";
-	#clock-cells = <1>;
-	samsung,idma-addr = <0x03000000>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&i2s0_bus>;
-	#sound-dai-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/sound/samsung-i2s.yaml b/Documentation/devicetree/bindings/sound/samsung-i2s.yaml
new file mode 100644
index 000000000000..53e3bad4178c
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/samsung-i2s.yaml
@@ -0,0 +1,138 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/samsung-i2s.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung SoC I2S controller
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+  - Sylwester Nawrocki <s.nawrocki@samsung.com>
+
+properties:
+  compatible:
+    description: |
+      samsung,s3c6410-i2s: for 8/16/24bit stereo I2S.
+
+      samsung,s5pv210-i2s: for 8/16/24bit multichannel (5.1) I2S with
+      secondary FIFO, s/w reset control and internal mux for root clock
+      source.
+
+      samsung,exynos5420-i2s: for 8/16/24bit multichannel (5.1) I2S for
+      playback, stereo channel capture, secondary FIFO using internal
+      or external DMA, s/w reset control, internal mux for root clock
+      source and 7.1 channel TDM support for playback; TDM (Time division
+      multiplexing) is to allow transfer of multiple channel audio data on
+      single data line.
+
+      samsung,exynos7-i2s: with all the available features of Exynos5 I2S.
+      Exynos7 I2S has 7.1 channel TDM support for capture, secondary FIFO
+      with only external DMA and more number of root clock sampling
+      frequencies.
+
+      samsung,exynos7-i2s1: I2S1 on previous samsung platforms supports
+      stereo channels. Exynos7 I2S1 upgraded to 5.1 multichannel with
+      slightly modified bit offsets.
+    enum:
+      - samsung,s3c6410-i2s
+      - samsung,s5pv210-i2s
+      - samsung,exynos5420-i2s
+      - samsung,exynos7-i2s
+      - samsung,exynos7-i2s1
+
+  reg:
+    maxItems: 1
+
+  dmas:
+    minItems: 2
+    maxItems: 3
+
+  dma-names:
+    oneOf:
+      - items:
+          - const: tx
+          - const: rx
+      - items:
+          - const: tx
+          - const: rx
+          - const: tx-sec
+
+  clocks:
+    minItems: 1
+    maxItems: 3
+
+  clock-names:
+    oneOf:
+      - items:
+          - const: iis
+      - items: # for I2S0
+          - const: iis
+          - const: i2s_opclk0
+          - const: i2s_opclk1
+      - items: # for I2S1 and I2S2
+          - const: iis
+          - const: i2s_opclk0
+    description: |
+      "iis" is the I2S bus clock and i2s_opclk0, i2s_opclk1 are sources
+      of the root clock. I2S0 has internal mux to select the source
+      of root clock and I2S1 and I2S2 doesn't have any such mux.
+
+  "#clock-cells":
+    const: 1
+
+  clock-output-names:
+    deprecated: true
+    oneOf:
+      - items: # for I2S0
+          - const: i2s_cdclk0
+      - items: # for I2S1
+          - const: i2s_cdclk1
+      - items: # for I2S2
+          - const: i2s_cdclk2
+    description: Names of the CDCLK I2S output clocks.
+
+  samsung,idma-addr:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      Internal DMA register base address of the audio
+      subsystem (used in secondary sound source).
+
+  pinctrl-0:
+    description: Should specify pin control groups used for this controller.
+
+  pinctrl-names:
+    const: default
+
+  "#sound-dai-cells":
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - dmas
+  - dma-names
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos-audss-clk.h>
+
+    i2s0: i2s@3830000 {
+        compatible = "samsung,s5pv210-i2s";
+        reg = <0x03830000 0x100>;
+        dmas = <&pdma0 10>,
+                <&pdma0 9>,
+                <&pdma0 8>;
+        dma-names = "tx", "rx", "tx-sec";
+        clocks = <&clock_audss EXYNOS_I2S_BUS>,
+                <&clock_audss EXYNOS_I2S_BUS>,
+                <&clock_audss EXYNOS_SCLK_I2S>;
+        clock-names = "iis", "i2s_opclk0", "i2s_opclk1";
+        #clock-cells = <1>;
+        samsung,idma-addr = <0x03000000>;
+        pinctrl-names = "default";
+        pinctrl-0 = <&i2s0_bus>;
+        #sound-dai-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/sound/sun4i-codec.txt b/Documentation/devicetree/bindings/sound/sun4i-codec.txt
deleted file mode 100644
index 66579bbd3294..000000000000
--- a/Documentation/devicetree/bindings/sound/sun4i-codec.txt
+++ /dev/null
@@ -1,94 +0,0 @@
-* Allwinner A10 Codec
-
-Required properties:
-- compatible: must be one of the following compatibles:
-		- "allwinner,sun4i-a10-codec"
-		- "allwinner,sun6i-a31-codec"
-		- "allwinner,sun7i-a20-codec"
-		- "allwinner,sun8i-a23-codec"
-		- "allwinner,sun8i-h3-codec"
-		- "allwinner,sun8i-v3s-codec"
-- reg: must contain the registers location and length
-- interrupts: must contain the codec interrupt
-- dmas: DMA channels for tx and rx dma. See the DMA client binding,
-	Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: should include "tx" and "rx".
-- clocks: a list of phandle + clock-specifer pairs, one for each entry
-  in clock-names.
-- clock-names: should contain the following:
-   - "apb": the parent APB clock for this controller
-   - "codec": the parent module clock
-
-Optional properties:
-- allwinner,pa-gpios: gpio to enable external amplifier
-
-Required properties for the following compatibles:
-		- "allwinner,sun6i-a31-codec"
-		- "allwinner,sun8i-a23-codec"
-		- "allwinner,sun8i-h3-codec"
-		- "allwinner,sun8i-v3s-codec"
-- resets: phandle to the reset control for this device
-- allwinner,audio-routing: A list of the connections between audio components.
-			   Each entry is a pair of strings, the first being the
-			   connection's sink, the second being the connection's
-			   source. Valid names include:
-
-			   Audio pins on the SoC:
-			   "HP"
-			   "HPCOM"
-			   "LINEIN"	(not on sun8i-v3s)
-			   "LINEOUT"	(not on sun8i-a23 or sun8i-v3s)
-			   "MIC1"
-			   "MIC2"	(not on sun8i-v3s)
-			   "MIC3"	(sun6i-a31 only)
-
-			   Microphone biases from the SoC:
-			   "HBIAS"
-			   "MBIAS"	(not on sun8i-v3s)
-
-			   Board connectors:
-			   "Headphone"
-			   "Headset Mic"
-			   "Line In"
-			   "Line Out"
-			   "Mic"
-			   "Speaker"
-
-Required properties for the following compatibles:
-		- "allwinner,sun8i-a23-codec"
-		- "allwinner,sun8i-h3-codec"
-		- "allwinner,sun8i-v3s-codec"
-- allwinner,codec-analog-controls: A phandle to the codec analog controls
-				   block in the PRCM.
-
-Example:
-codec: codec@1c22c00 {
-	#sound-dai-cells = <0>;
-	compatible = "allwinner,sun7i-a20-codec";
-	reg = <0x01c22c00 0x40>;
-	interrupts = <0 30 4>;
-	clocks = <&apb0_gates 0>, <&codec_clk>;
-	clock-names = "apb", "codec";
-	dmas = <&dma 0 19>, <&dma 0 19>;
-	dma-names = "rx", "tx";
-};
-
-codec: codec@1c22c00 {
-	#sound-dai-cells = <0>;
-	compatible = "allwinner,sun6i-a31-codec";
-	reg = <0x01c22c00 0x98>;
-	interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>;
-	clocks = <&ccu CLK_APB1_CODEC>, <&ccu CLK_CODEC>;
-	clock-names = "apb", "codec";
-	resets = <&ccu RST_APB1_CODEC>;
-	dmas = <&dma 15>, <&dma 15>;
-	dma-names = "rx", "tx";
-	allwinner,audio-routing =
-		"Headphone", "HP",
-		"Speaker", "LINEOUT",
-		"LINEIN", "Line In",
-		"MIC1",	"MBIAS",
-		"MIC1", "Mic",
-		"MIC2", "HBIAS",
-		"MIC2", "Headset Mic";
-};
diff --git a/Documentation/devicetree/bindings/sound/sun8i-codec-analog.txt b/Documentation/devicetree/bindings/sound/sun8i-codec-analog.txt
deleted file mode 100644
index 07356758bd91..000000000000
--- a/Documentation/devicetree/bindings/sound/sun8i-codec-analog.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-* Allwinner Codec Analog Controls
-
-Required properties:
-- compatible: must be one of the following compatibles:
-		- "allwinner,sun8i-a23-codec-analog"
-		- "allwinner,sun8i-h3-codec-analog"
-		- "allwinner,sun8i-v3s-codec-analog"
-
-Required properties if not a sub-node of the PRCM node:
-- reg: must contain the registers location and length
-
-Example:
-prcm: prcm@1f01400 {
-	codec_analog: codec-analog {
-		compatible = "allwinner,sun8i-a23-codec-analog";
-	};
-};
diff --git a/Documentation/devicetree/bindings/sound/tas2562.txt b/Documentation/devicetree/bindings/sound/tas2562.txt
new file mode 100644
index 000000000000..658e1fb18a99
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tas2562.txt
@@ -0,0 +1,34 @@
+Texas Instruments TAS2562 Smart PA
+
+The TAS2562 is a mono, digital input Class-D audio amplifier optimized for
+efficiently driving high peak power into small loudspeakers.
+Integrated speaker voltage and current sense provides for
+real time monitoring of loudspeaker behavior.
+
+Required properties:
+ - #address-cells  - Should be <1>.
+ - #size-cells     - Should be <0>.
+ - compatible:	   - Should contain "ti,tas2562".
+ - reg:		   - The i2c address. Should be 0x4c, 0x4d, 0x4e or 0x4f.
+ - ti,imon-slot-no:- TDM TX current sense time slot.
+
+Optional properties:
+- interrupt-parent: phandle to the interrupt controller which provides
+                    the interrupt.
+- interrupts: (GPIO) interrupt to which the chip is connected.
+- shut-down: GPIO used to control the state of the device.
+
+Examples:
+tas2562@4c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        compatible = "ti,tas2562";
+        reg = <0x4c>;
+
+        interrupt-parent = <&gpio1>;
+        interrupts = <14>;
+
+	shut-down = <&gpio1 15 0>;
+        ti,imon-slot-no = <0>;
+};
+
diff --git a/Documentation/devicetree/bindings/sound/tas2770.txt b/Documentation/devicetree/bindings/sound/tas2770.txt
new file mode 100644
index 000000000000..ede6bb3d9637
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tas2770.txt
@@ -0,0 +1,37 @@
+Texas Instruments TAS2770 Smart PA
+
+The TAS2770 is a mono, digital input Class-D audio amplifier optimized for
+efficiently driving high peak power into small loudspeakers.
+Integrated speaker voltage and current sense provides for
+real time monitoring of loudspeaker behavior.
+
+Required properties:
+
+ - compatible:	   - Should contain "ti,tas2770".
+ - reg:		       - The i2c address. Should contain <0x4c>, <0x4d>,<0x4e>, or <0x4f>.
+ - #address-cells  - Should be <1>.
+ - #size-cells     - Should be <0>.
+ - ti,asi-format:  - Sets TDM RX capture edge. 0->Rising; 1->Falling.
+ - ti,imon-slot-no:- TDM TX current sense time slot.
+ - ti,vmon-slot-no:- TDM TX voltage sense time slot.
+
+Optional properties:
+
+- interrupt-parent: the phandle to the interrupt controller which provides
+                     the interrupt.
+- interrupts: interrupt specification for data-ready.
+
+Examples:
+
+    tas2770@4c {
+                compatible = "ti,tas2770";
+                reg = <0x4c>;
+                #address-cells = <1>;
+                #size-cells = <0>;
+                interrupt-parent = <&msm_gpio>;
+                interrupts = <97 0>;
+                ti,asi-format = <0>;
+                ti,imon-slot-no = <0>;
+                ti,vmon-slot-no = <2>;
+        };
+
diff --git a/Documentation/devicetree/bindings/sound/ti,pcm3168a.txt b/Documentation/devicetree/bindings/sound/ti,pcm3168a.txt
index 5d9cb84c661d..a02ecaab5183 100644
--- a/Documentation/devicetree/bindings/sound/ti,pcm3168a.txt
+++ b/Documentation/devicetree/bindings/sound/ti,pcm3168a.txt
@@ -25,6 +25,13 @@ Required properties:
 
 For required properties on SPI/I2C, consult SPI/I2C device tree documentation
 
+Optional properties:
+
+  - reset-gpios : Optional reset gpio line connected to RST pin of the codec.
+		  The RST line is low active:
+		  RST = low: device power-down
+		  RST = high: device is enabled
+
 Examples:
 
 i2c0: i2c0@0 {
@@ -34,6 +41,7 @@ i2c0: i2c0@0 {
 	pcm3168a: audio-codec@44 {
 		compatible = "ti,pcm3168a";
 		reg = <0x44>;
+		reset-gpios = <&gpio0 4 GPIO_ACTIVE_LOW>;
 		clocks = <&clk_core CLK_AUDIO>;
 		clock-names = "scki";
 		VDD1-supply = <&supply3v3>;
diff --git a/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt b/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
index 5b3c33bb99e5..e372303697dc 100644
--- a/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
+++ b/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
@@ -29,6 +29,11 @@ Optional properties:
         3 or MICBIAS_AVDD - MICBIAS output is connected to AVDD
 	If this node is not mentioned or if the value is unknown, then
 	micbias	is set to 2.0V.
+- ai31xx-ocmv - output common-mode voltage setting
+        0 - 1.35V,
+        1 - 1.5V,
+        2 - 1.65V,
+        3 - 1.8V
 
 Deprecated properties:
 
diff --git a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt b/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt
new file mode 100644
index 000000000000..436547f3b155
--- /dev/null
+++ b/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt
@@ -0,0 +1,167 @@
+Qualcomm SoundWire Controller Bindings
+
+
+This binding describes the Qualcomm SoundWire Controller along with its
+board specific bus parameters.
+
+- compatible:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "qcom,soundwire-v<MAJOR>.<MINOR>.<STEP>",
+		    Example:
+			"qcom,soundwire-v1.3.0"
+			"qcom,soundwire-v1.5.0"
+			"qcom,soundwire-v1.6.0"
+- reg:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: the base address and size of SoundWire controller
+		    address space.
+
+- interrupts:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: should specify the SoundWire Controller IRQ
+
+- clock-names:
+	Usage: required
+	Value type: <stringlist>
+	Definition: should be "iface" for SoundWire Controller interface clock
+
+- clocks:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: should specify the SoundWire Controller interface clock
+
+- #sound-dai-cells:
+	Usage: required
+	Value type: <u32>
+	Definition: must be 1 for digital audio interfaces on the controller.
+
+- qcom,dout-ports:
+	Usage: required
+	Value type: <u32>
+	Definition: must be count of data out ports
+
+- qcom,din-ports:
+	Usage: required
+	Value type: <u32>
+	Definition: must be count of data in ports
+
+- qcom,ports-offset1:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: should specify payload transport window offset1 of each
+		    data port. Out ports followed by In ports.
+		    More info in MIPI Alliance SoundWire 1.0 Specifications.
+
+- qcom,ports-offset2:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: should specify payload transport window offset2 of each
+		    data port. Out ports followed by In ports.
+		    More info in MIPI Alliance SoundWire 1.0 Specifications.
+
+- qcom,ports-sinterval-low:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: should be sample interval low of each data port.
+		    Out ports followed by In ports. Used for Sample Interval
+		    calculation.
+		    More info in MIPI Alliance SoundWire 1.0 Specifications.
+
+- qcom,ports-word-length:
+	Usage: optional
+	Value type: <prop-encoded-array>
+	Definition: should be size of payload channel sample.
+		    More info in MIPI Alliance SoundWire 1.0 Specifications.
+
+- qcom,ports-block-pack-mode:
+	Usage: optional
+	Value type: <prop-encoded-array>
+	Definition: should be 0 or 1 to indicate the block packing mode.
+		    0 to indicate Blocks are per Channel
+		    1 to indicate Blocks are per Port.
+		    Out ports followed by In ports.
+		    More info in MIPI Alliance SoundWire 1.0 Specifications.
+
+- qcom,ports-block-group-count:
+	Usage: optional
+	Value type: <prop-encoded-array>
+	Definition: should be in range 1 to 4 to indicate how many sample
+		    intervals are combined into a payload.
+		    Out ports followed by In ports.
+		    More info in MIPI Alliance SoundWire 1.0 Specifications.
+
+- qcom,ports-lane-control:
+	Usage: optional
+	Value type: <prop-encoded-array>
+	Definition: should be in range 0 to 7 to identify which	data lane
+		    the data port uses.
+		    Out ports followed by In ports.
+		    More info in MIPI Alliance SoundWire 1.0 Specifications.
+
+- qcom,ports-hstart:
+	Usage: optional
+	Value type: <prop-encoded-array>
+	Definition: should be number identifying lowerst numbered coloum in
+		    SoundWire Frame, i.e. left edge of the Transport sub-frame
+		    for each port. Values between 0 and 15 are valid.
+		    Out ports followed by In ports.
+		    More info in MIPI Alliance SoundWire 1.0 Specifications.
+
+- qcom,ports-hstop:
+	Usage: optional
+	Value type: <prop-encoded-array>
+	Definition: should be number identifying highest numbered coloum in
+		    SoundWire Frame, i.e. the right edge of the Transport
+		    sub-frame for each port. Values between 0 and 15 are valid.
+		    Out ports followed by In ports.
+		    More info in MIPI Alliance SoundWire 1.0 Specifications.
+
+- qcom,dports-type:
+	Usage: optional
+	Value type: <prop-encoded-array>
+	Definition: should be one of the following types
+		    0 for reduced port
+		    1 for simple ports
+		    2 for full port
+		    Out ports followed by In ports.
+		    More info in MIPI Alliance SoundWire 1.0 Specifications.
+
+Note:
+	More Information on detail of encoding of these fields can be
+found in MIPI Alliance SoundWire 1.0 Specifications.
+
+= SoundWire devices
+Each subnode of the bus represents SoundWire device attached to it.
+The properties of these nodes are defined by the individual bindings.
+
+= EXAMPLE
+The following example represents a SoundWire controller on DB845c board
+which has controller integrated inside WCD934x codec on SDM845 SoC.
+
+soundwire: soundwire@c85 {
+	compatible = "qcom,soundwire-v1.3.0";
+	reg = <0xc85 0x20>;
+	interrupts = <20 IRQ_TYPE_EDGE_RISING>;
+	clocks = <&wcc>;
+	clock-names = "iface";
+	#sound-dai-cells = <1>;
+	qcom,dports-type = <0>;
+	qcom,dout-ports	= <6>;
+	qcom,din-ports	= <2>;
+	qcom,ports-sinterval-low = /bits/ 8  <0x07 0x1F 0x3F 0x7 0x1F 0x3F 0x0F 0x0F>;
+	qcom,ports-offset1 = /bits/ 8 <0x01 0x02 0x0C 0x6 0x12 0x0D 0x07 0x0A >;
+	qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x1F 0x00 0x00 0x1F 0x00 0x00>;
+
+	/* Left Speaker */
+	left{
+		....
+	};
+
+	/* Right Speaker */
+	right{
+		....
+	};
+};
diff --git a/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml b/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml
index 1b43993bccdb..330924b8618e 100644
--- a/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml
+++ b/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml
@@ -69,6 +69,7 @@ examples:
             reg = <0 1>;
             powerdown-gpios = <&wcdpinctrl 2 0>;
             #thermal-sensor-cells = <0>;
+            #sound-dai-cells = <0>;
         };
 
         speaker@0,2 {
@@ -76,6 +77,7 @@ examples:
             reg = <0 2>;
             powerdown-gpios = <&wcdpinctrl 2 0>;
             #thermal-sensor-cells = <0>;
+            #sound-dai-cells = <0>;
         };
     };
 
diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
index 6d1329c28170..8036499112f5 100644
--- a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
@@ -11,7 +11,7 @@ allOf:
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#address-cells": true
diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
index f36c46d236d7..0565dc49e449 100644
--- a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
@@ -11,7 +11,7 @@ allOf:
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   "#address-cells": true
diff --git a/Documentation/devicetree/bindings/spi/nuvoton,npcm-pspi.txt b/Documentation/devicetree/bindings/spi/nuvoton,npcm-pspi.txt
index 1fd9a4406a1d..b98203ca656d 100644
--- a/Documentation/devicetree/bindings/spi/nuvoton,npcm-pspi.txt
+++ b/Documentation/devicetree/bindings/spi/nuvoton,npcm-pspi.txt
@@ -12,6 +12,7 @@ Required properties:
  - clock-names: Should be "clk_apb5".
  - pinctrl-names : a pinctrl state named "default" must be defined.
  - pinctrl-0 : phandle referencing pin configuration of the device.
+ - resets : phandle to the reset control for this device.
  - cs-gpios: Specifies the gpio pins to be used for chipselects.
             See: Documentation/devicetree/bindings/spi/spi-bus.txt
 
@@ -19,16 +20,6 @@ Optional properties:
 - clock-frequency : Input clock frequency to the PSPI block in Hz.
 		    Default is 25000000 Hz.
 
-Aliases:
-- All the SPI controller nodes should be represented in the aliases node using
-  the following format 'spi{n}' withe the correct numbered in "aliases" node.
-
-Example:
-
-aliases {
-	spi0 = &spi0;
-};
-
 spi0: spi@f0200000 {
 	compatible = "nuvoton,npcm750-pspi";
 	reg = <0xf0200000 0x1000>;
@@ -39,5 +30,6 @@ spi0: spi@f0200000 {
 	interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
 	clocks = <&clk NPCM7XX_CLK_APB5>;
 	clock-names = "clk_apb5";
+	resets = <&rstc NPCM7XX_RESET_IPSRST2 NPCM7XX_RESET_PSPI1>
 	cs-gpios = <&gpio6 11 GPIO_ACTIVE_LOW>;
 };
diff --git a/Documentation/devicetree/bindings/spi/renesas,hspi.yaml b/Documentation/devicetree/bindings/spi/renesas,hspi.yaml
new file mode 100644
index 000000000000..c429cf4bea5b
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/renesas,hspi.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/renesas,hspi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas HSPI
+
+maintainers:
+  - Geert Uytterhoeven <geert+renesas@glider.be>
+
+allOf:
+  - $ref: spi-controller.yaml#
+
+properties:
+  compatible:
+    items:
+      - enum:
+        - renesas,hspi-r8a7778 # R-Car M1A
+        - renesas,hspi-r8a7779 # R-Car H1
+      - const: renesas,hspi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - '#address-cells'
+  - '#size-cells'
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7778-clock.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    hspi0: spi@fffc7000 {
+        compatible = "renesas,hspi-r8a7778", "renesas,hspi";
+        reg = <0xfffc7000 0x18>;
+        interrupts = <0 63 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&mstp0_clks R8A7778_CLK_HSPI>;
+        power-domains = <&cpg_clocks>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+    };
+
diff --git a/Documentation/devicetree/bindings/spi/renesas,rzn1-spi.txt b/Documentation/devicetree/bindings/spi/renesas,rzn1-spi.txt
new file mode 100644
index 000000000000..fb1a6728638d
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/renesas,rzn1-spi.txt
@@ -0,0 +1,11 @@
+Renesas RZ/N1 SPI Controller
+
+This controller is based on the Synopsys DW Synchronous Serial Interface and
+inherits all properties defined in snps,dw-apb-ssi.txt except for the
+compatible property.
+
+Required properties:
+- compatible : The device specific string followed by the generic RZ/N1 string.
+   Therefore it must be one of:
+   "renesas,r9a06g032-spi", "renesas,rzn1-spi"
+   "renesas,r9a06g033-spi", "renesas,rzn1-spi"
diff --git a/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml b/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml
new file mode 100644
index 000000000000..b6c1dd2a9c5e
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml
@@ -0,0 +1,159 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/renesas,sh-msiof.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas MSIOF SPI controller
+
+maintainers:
+  - Geert Uytterhoeven <geert+renesas@glider.be>
+
+allOf:
+  - $ref: spi-controller.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - const: renesas,msiof-sh73a0     # SH-Mobile AG5
+          - const: renesas,sh-mobile-msiof  # generic SH-Mobile compatible
+                                            # device
+      - items:
+          - enum:
+              - renesas,msiof-r8a7743       # RZ/G1M
+              - renesas,msiof-r8a7744       # RZ/G1N
+              - renesas,msiof-r8a7745       # RZ/G1E
+              - renesas,msiof-r8a77470      # RZ/G1C
+              - renesas,msiof-r8a7790       # R-Car H2
+              - renesas,msiof-r8a7791       # R-Car M2-W
+              - renesas,msiof-r8a7792       # R-Car V2H
+              - renesas,msiof-r8a7793       # R-Car M2-N
+              - renesas,msiof-r8a7794       # R-Car E2
+          - const: renesas,rcar-gen2-msiof  # generic R-Car Gen2 and RZ/G1
+                                            # compatible device
+      - items:
+          - enum:
+              - renesas,msiof-r8a774a1      # RZ/G2M
+              - renesas,msiof-r8a774b1      # RZ/G2N
+              - renesas,msiof-r8a774c0      # RZ/G2E
+              - renesas,msiof-r8a7795       # R-Car H3
+              - renesas,msiof-r8a7796       # R-Car M3-W
+              - renesas,msiof-r8a77965      # R-Car M3-N
+              - renesas,msiof-r8a77970      # R-Car V3M
+              - renesas,msiof-r8a77980      # R-Car V3H
+              - renesas,msiof-r8a77990      # R-Car E3
+              - renesas,msiof-r8a77995      # R-Car D3
+          - const: renesas,rcar-gen3-msiof  # generic R-Car Gen3 and RZ/G2
+                                            # compatible device
+      - items:
+          - const: renesas,sh-msiof  # deprecated
+
+  reg:
+    minItems: 1
+    maxItems: 2
+    oneOf:
+      - items:
+          - description: CPU and DMA engine registers
+      - items:
+          - description: CPU registers
+          - description: DMA engine registers
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  num-cs:
+    description: |
+      Total number of chip selects (default is 1).
+      Up to 3 native chip selects are supported:
+        0: MSIOF_SYNC
+        1: MSIOF_SS1
+        2: MSIOF_SS2
+      Hardware limitations related to chip selects:
+        - Native chip selects are always deasserted in between transfers
+          that are part of the same message.  Use cs-gpios to work around
+          this.
+        - All slaves using native chip selects must use the same spi-cs-high
+          configuration.  Use cs-gpios to work around this.
+        - When using GPIO chip selects, at least one native chip select must
+          be left unused, as it will be driven anyway.
+    minimum: 1
+    maximum: 3
+    default: 1
+
+  dmas:
+    minItems: 2
+    maxItems: 4
+
+  dma-names:
+    minItems: 2
+    maxItems: 4
+    items:
+      enum: [ tx, rx ]
+
+  renesas,dtdl:
+    description: delay sync signal (setup) in transmit mode.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum:
+          - 0    # no bit delay
+          - 50   # 0.5-clock-cycle delay
+          - 100  # 1-clock-cycle delay
+          - 150  # 1.5-clock-cycle delay
+          - 200  # 2-clock-cycle delay
+
+  renesas,syncdl:
+    description: delay sync signal (hold) in transmit mode
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum:
+          - 0    # no bit delay
+          - 50   # 0.5-clock-cycle delay
+          - 100  # 1-clock-cycle delay
+          - 150  # 1.5-clock-cycle delay
+          - 200  # 2-clock-cycle delay
+          - 300  # 3-clock-cycle delay
+
+  renesas,tx-fifo-size:
+    # deprecated for soctype-specific bindings
+    description: |
+      Override the default TX fifo size.  Unit is words.  Ignored if 0.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - maxItems: 1
+    default: 64
+
+  renesas,rx-fifo-size:
+    # deprecated for soctype-specific bindings
+    description: |
+      Override the default RX fifo size.  Unit is words.  Ignored if 0.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - maxItems: 1
+    default: 64
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - '#address-cells'
+  - '#size-cells'
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7791-clock.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    msiof0: spi@e6e20000 {
+        compatible = "renesas,msiof-r8a7791", "renesas,rcar-gen2-msiof";
+        reg = <0 0xe6e20000 0 0x0064>;
+        interrupts = <0 156 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&mstp0_clks R8A7791_CLK_MSIOF0>;
+        dmas = <&dmac0 0x51>, <&dmac0 0x52>;
+        dma-names = "tx", "rx";
+        #address-cells = <1>;
+        #size-cells = <0>;
+    };
diff --git a/Documentation/devicetree/bindings/spi/sh-hspi.txt b/Documentation/devicetree/bindings/spi/sh-hspi.txt
deleted file mode 100644
index b9d1e4d11a77..000000000000
--- a/Documentation/devicetree/bindings/spi/sh-hspi.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Renesas HSPI.
-
-Required properties:
-- compatible       : "renesas,hspi-<soctype>", "renesas,hspi" as fallback.
-		     Examples with soctypes are:
-		       - "renesas,hspi-r8a7778" (R-Car M1)
-		       - "renesas,hspi-r8a7779" (R-Car H1)
-- reg              : Offset and length of the register set for the device
-- interrupts       : Interrupt specifier
-- #address-cells   : Must be <1>
-- #size-cells      : Must be <0>
-
-Pinctrl properties might be needed, too.  See
-Documentation/devicetree/bindings/pinctrl/renesas,*.
-
-Example:
-
-	hspi0: spi@fffc7000 {
-		compatible = "renesas,hspi-r8a7778", "renesas,hspi";
-		reg = <0xfffc7000 0x18>;
-		interrupt-parent = <&gic>;
-		interrupts = <0 63 IRQ_TYPE_LEVEL_HIGH>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-	};
-
diff --git a/Documentation/devicetree/bindings/spi/sh-msiof.txt b/Documentation/devicetree/bindings/spi/sh-msiof.txt
deleted file mode 100644
index 18e14ee257b2..000000000000
--- a/Documentation/devicetree/bindings/spi/sh-msiof.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-Renesas MSIOF spi controller
-
-Required properties:
-- compatible           : "renesas,msiof-r8a7743" (RZ/G1M)
-			 "renesas,msiof-r8a7744" (RZ/G1N)
-			 "renesas,msiof-r8a7745" (RZ/G1E)
-			 "renesas,msiof-r8a77470" (RZ/G1C)
-			 "renesas,msiof-r8a774a1" (RZ/G2M)
-			 "renesas,msiof-r8a774c0" (RZ/G2E)
-			 "renesas,msiof-r8a7790" (R-Car H2)
-			 "renesas,msiof-r8a7791" (R-Car M2-W)
-			 "renesas,msiof-r8a7792" (R-Car V2H)
-			 "renesas,msiof-r8a7793" (R-Car M2-N)
-			 "renesas,msiof-r8a7794" (R-Car E2)
-			 "renesas,msiof-r8a7795" (R-Car H3)
-			 "renesas,msiof-r8a7796" (R-Car M3-W)
-			 "renesas,msiof-r8a77965" (R-Car M3-N)
-			 "renesas,msiof-r8a77970" (R-Car V3M)
-			 "renesas,msiof-r8a77980" (R-Car V3H)
-			 "renesas,msiof-r8a77990" (R-Car E3)
-			 "renesas,msiof-r8a77995" (R-Car D3)
-			 "renesas,msiof-sh73a0" (SH-Mobile AG5)
-			 "renesas,sh-mobile-msiof" (generic SH-Mobile compatibile device)
-			 "renesas,rcar-gen2-msiof" (generic R-Car Gen2 and RZ/G1 compatible device)
-			 "renesas,rcar-gen3-msiof" (generic R-Car Gen3 and RZ/G2 compatible device)
-			 "renesas,sh-msiof"      (deprecated)
-
-			 When compatible with the generic version, nodes
-			 must list the SoC-specific version corresponding
-			 to the platform first followed by the generic
-			 version.
-
-- reg                  : A list of offsets and lengths of the register sets for
-			 the device.
-			 If only one register set is present, it is to be used
-			 by both the CPU and the DMA engine.
-			 If two register sets are present, the first is to be
-			 used by the CPU, and the second is to be used by the
-			 DMA engine.
-- interrupts           : Interrupt specifier
-- #address-cells       : Must be <1>
-- #size-cells          : Must be <0>
-
-Optional properties:
-- clocks               : Must contain a reference to the functional clock.
-- num-cs               : Total number of chip selects (default is 1).
-			 Up to 3 native chip selects are supported:
-			   0: MSIOF_SYNC
-			   1: MSIOF_SS1
-			   2: MSIOF_SS2
-			 Hardware limitations related to chip selects:
-			   - Native chip selects are always deasserted in
-			     between transfers that are part of the same
-			     message.  Use cs-gpios to work around this.
-			   - All slaves using native chip selects must use the
-			     same spi-cs-high configuration.  Use cs-gpios to
-			     work around this.
-			   - When using GPIO chip selects, at least one native
-			     chip select must be left unused, as it will be
-			     driven anyway.
-- dmas                 : Must contain a list of two references to DMA
-			 specifiers, one for transmission, and one for
-			 reception.
-- dma-names            : Must contain a list of two DMA names, "tx" and "rx".
-- spi-slave            : Empty property indicating the SPI controller is used
-			 in slave mode.
-- renesas,dtdl         : delay sync signal (setup) in transmit mode.
-			 Must contain one of the following values:
-			 0   (no bit delay)
-			 50  (0.5-clock-cycle delay)
-			 100 (1-clock-cycle delay)
-			 150 (1.5-clock-cycle delay)
-			 200 (2-clock-cycle delay)
-
-- renesas,syncdl       : delay sync signal (hold) in transmit mode.
-			 Must contain one of the following values:
-			 0   (no bit delay)
-			 50  (0.5-clock-cycle delay)
-			 100 (1-clock-cycle delay)
-			 150 (1.5-clock-cycle delay)
-			 200 (2-clock-cycle delay)
-			 300 (3-clock-cycle delay)
-
-Optional properties, deprecated for soctype-specific bindings:
-- renesas,tx-fifo-size : Overrides the default tx fifo size given in words
-			 (default is 64)
-- renesas,rx-fifo-size : Overrides the default rx fifo size given in words
-			 (default is 64)
-
-Pinctrl properties might be needed, too.  See
-Documentation/devicetree/bindings/pinctrl/renesas,*.
-
-Example:
-
-	msiof0: spi@e6e20000 {
-		compatible = "renesas,msiof-r8a7791",
-			     "renesas,rcar-gen2-msiof";
-		reg = <0 0xe6e20000 0 0x0064>;
-		interrupts = <0 156 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&mstp0_clks R8A7791_CLK_MSIOF0>;
-		dmas = <&dmac0 0x51>, <&dmac0 0x52>;
-		dma-names = "tx", "rx";
-		#address-cells = <1>;
-		#size-cells = <0>;
-	};
diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt
index f54c8c36395e..3ed08ee9feba 100644
--- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt
+++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt
@@ -16,7 +16,8 @@ Required properties:
 Optional properties:
 - clock-names : Contains the names of the clocks:
     "ssi_clk", for the core clock used to generate the external SPI clock.
-    "pclk", the interface clock, required for register access.
+    "pclk", the interface clock, required for register access. If a clock domain
+     used to enable this clock then it should be named "pclk_clkdomain".
 - cs-gpios : Specifies the gpio pins to be used for chipselects.
 - num-cs : The number of chipselects. If omitted, this will default to 4.
 - reg-io-width : The I/O register width (in bytes) implemented by this
diff --git a/Documentation/devicetree/bindings/spi/spi-controller.yaml b/Documentation/devicetree/bindings/spi/spi-controller.yaml
index 732339275848..1e0ca6ccf64b 100644
--- a/Documentation/devicetree/bindings/spi/spi-controller.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-controller.yaml
@@ -111,7 +111,7 @@ patternProperties:
       spi-rx-bus-width:
         allOf:
           - $ref: /schemas/types.yaml#/definitions/uint32
-          - enum: [ 1, 2, 4 ]
+          - enum: [ 1, 2, 4, 8 ]
           - default: 1
         description:
           Bus width to the SPI bus used for MISO.
@@ -123,7 +123,7 @@ patternProperties:
       spi-tx-bus-width:
         allOf:
           - $ref: /schemas/types.yaml#/definitions/uint32
-          - enum: [ 1, 2, 4 ]
+          - enum: [ 1, 2, 4, 8 ]
           - default: 1
         description:
           Bus width to the SPI bus used for MOSI.
diff --git a/Documentation/devicetree/bindings/spi/spi-sifive.txt b/Documentation/devicetree/bindings/spi/spi-sifive.txt
deleted file mode 100644
index 3f5c6e438972..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-sifive.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-SiFive SPI controller Device Tree Bindings
-------------------------------------------
-
-Required properties:
-- compatible		: Should be "sifive,<chip>-spi" and "sifive,spi<version>".
-			  Supported compatible strings are:
-			  "sifive,fu540-c000-spi" for the SiFive SPI v0 as integrated
-			  onto the SiFive FU540 chip, and "sifive,spi0" for the SiFive
-			  SPI v0 IP block with no chip integration tweaks.
-			  Please refer to sifive-blocks-ip-versioning.txt for details
-- reg			: Physical base address and size of SPI registers map
-			  A second (optional) range can indicate memory mapped flash
-- interrupts		: Must contain one entry
-- interrupt-parent	: Must be core interrupt controller
-- clocks		: Must reference the frequency given to the controller
-- #address-cells	: Must be '1', indicating which CS to use
-- #size-cells		: Must be '0'
-
-Optional properties:
-- sifive,fifo-depth		: Depth of hardware queues; defaults to 8
-- sifive,max-bits-per-word	: Maximum bits per word; defaults to 8
-
-SPI RTL that corresponds to the IP block version numbers can be found here:
-https://github.com/sifive/sifive-blocks/tree/master/src/main/scala/devices/spi
-
-Example:
-	spi: spi@10040000 {
-		compatible = "sifive,fu540-c000-spi", "sifive,spi0";
-		reg = <0x0 0x10040000 0x0 0x1000 0x0 0x20000000 0x0 0x10000000>;
-		interrupt-parent = <&plic>;
-		interrupts = <51>;
-		clocks = <&tlclk>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		sifive,fifo-depth = <8>;
-		sifive,max-bits-per-word = <8>;
-	};
diff --git a/Documentation/devicetree/bindings/spi/spi-sifive.yaml b/Documentation/devicetree/bindings/spi/spi-sifive.yaml
new file mode 100644
index 000000000000..140e4351a19f
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-sifive.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/spi-sifive.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SiFive SPI controller
+
+maintainers:
+  - Pragnesh Patel <pragnesh.patel@sifive.com>
+  - Paul Walmsley  <paul.walmsley@sifive.com>
+  - Palmer Dabbelt <palmer@sifive.com>
+
+allOf:
+  - $ref: "spi-controller.yaml#"
+
+properties:
+  compatible:
+    items:
+      - const: sifive,fu540-c000-spi
+      - const: sifive,spi0
+
+    description:
+      Should be "sifive,<chip>-spi" and "sifive,spi<version>".
+      Supported compatible strings are -
+      "sifive,fu540-c000-spi" for the SiFive SPI v0 as integrated
+      onto the SiFive FU540 chip, and "sifive,spi0" for the SiFive
+      SPI v0 IP block with no chip integration tweaks.
+      Please refer to sifive-blocks-ip-versioning.txt for details
+
+      SPI RTL that corresponds to the IP block version numbers can be found here -
+      https://github.com/sifive/sifive-blocks/tree/master/src/main/scala/devices/spi
+
+  reg:
+    maxItems: 1
+
+    description:
+      Physical base address and size of SPI registers map
+      A second (optional) range can indicate memory mapped flash
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+    description:
+      Must reference the frequency given to the controller
+
+  sifive,fifo-depth:
+    description:
+      Depth of hardware queues; defaults to 8
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/uint32"
+      - enum: [ 8 ]
+      - default: 8
+
+  sifive,max-bits-per-word:
+    description:
+      Maximum bits per word; defaults to 8
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/uint32"
+      - enum: [ 0, 1, 2, 3, 4, 5, 6, 7, 8 ]
+      - default: 8
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+examples:
+  - |
+    spi: spi@10040000 {
+      compatible = "sifive,fu540-c000-spi", "sifive,spi0";
+      reg = <0x0 0x10040000 0x0 0x1000 0x0 0x20000000 0x0 0x10000000>;
+      interrupt-parent = <&plic>;
+      interrupts = <51>;
+      clocks = <&tlclk>;
+      #address-cells = <1>;
+      #size-cells = <0>;
+      sifive,fifo-depth = <8>;
+      sifive,max-bits-per-word = <8>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/spi/spi-stm32-qspi.txt b/Documentation/devicetree/bindings/spi/spi-stm32-qspi.txt
deleted file mode 100644
index bfc038b9478d..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-stm32-qspi.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-* STMicroelectronics Quad Serial Peripheral Interface(QSPI)
-
-Required properties:
-- compatible: should be "st,stm32f469-qspi"
-- reg: the first contains the register location and length.
-       the second contains the memory mapping address and length
-- reg-names: should contain the reg names "qspi" "qspi_mm"
-- interrupts: should contain the interrupt for the device
-- clocks: the phandle of the clock needed by the QSPI controller
-- A pinctrl must be defined to set pins in mode of operation for QSPI transfer
-
-Optional properties:
-- resets: must contain the phandle to the reset controller.
-
-A spi flash (NOR/NAND) must be a child of spi node and could have some
-properties. Also see jedec,spi-nor.txt.
-
-Required properties:
-- reg: chip-Select number (QSPI controller may connect 2 flashes)
-- spi-max-frequency: max frequency of spi bus
-
-Optional properties:
-- spi-rx-bus-width: see ./spi-bus.txt for the description
-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
-Documentation/devicetree/bindings/dma/dma.txt.
-- dma-names: DMA request names should include "tx" and "rx" if present.
-
-Example:
-
-qspi: spi@a0001000 {
-	compatible = "st,stm32f469-qspi";
-	reg = <0xa0001000 0x1000>, <0x90000000 0x10000000>;
-	reg-names = "qspi", "qspi_mm";
-	interrupts = <91>;
-	resets = <&rcc STM32F4_AHB3_RESET(QSPI)>;
-	clocks = <&rcc 0 STM32F4_AHB3_CLOCK(QSPI)>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_qspi0>;
-
-	flash@0 {
-		compatible = "jedec,spi-nor";
-		reg = <0>;
-		spi-rx-bus-width = <4>;
-		spi-max-frequency = <108000000>;
-		...
-	};
-};
diff --git a/Documentation/devicetree/bindings/spi/spi-stm32.txt b/Documentation/devicetree/bindings/spi/spi-stm32.txt
deleted file mode 100644
index d82755c63eaf..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-stm32.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-STMicroelectronics STM32 SPI Controller
-
-The STM32 SPI controller is used to communicate with external devices using
-the Serial Peripheral Interface. It supports full-duplex, half-duplex and
-simplex synchronous serial communication with external devices. It supports
-from 4 to 32-bit data size. Although it can be configured as master or slave,
-only master is supported by the driver.
-
-Required properties:
-- compatible: Should be one of:
-  "st,stm32h7-spi"
-  "st,stm32f4-spi"
-- reg: Offset and length of the device's register set.
-- interrupts: Must contain the interrupt id.
-- clocks: Must contain an entry for spiclk (which feeds the internal clock
-	  generator).
-- #address-cells:  Number of cells required to define a chip select address.
-- #size-cells: Should be zero.
-
-Optional properties:
-- resets: Must contain the phandle to the reset controller.
-- A pinctrl state named "default" may be defined to set pins in mode of
-  operation for SPI transfer.
-- dmas: DMA specifiers for tx and rx dma. DMA fifo mode must be used. See the
-  STM32 DMA bindings, Documentation/devicetree/bindings/dma/stm32-dma.txt.
-- dma-names: DMA request names should include "tx" and "rx" if present.
-- cs-gpios: list of GPIO chip selects. See the SPI bus bindings,
-  Documentation/devicetree/bindings/spi/spi-bus.txt
-
-
-Child nodes represent devices on the SPI bus
-  See ../spi/spi-bus.txt
-
-Optional properties:
-- st,spi-midi-ns: Only for STM32H7, (Master Inter-Data Idleness) minimum time
-		  delay in nanoseconds inserted between two consecutive data
-		  frames.
-
-
-Example:
-	spi2: spi@40003800 {
-		#address-cells = <1>;
-		#size-cells = <0>;
-		compatible = "st,stm32h7-spi";
-		reg = <0x40003800 0x400>;
-		interrupts = <36>;
-		clocks = <&rcc SPI2_CK>;
-		resets = <&rcc 1166>;
-		dmas = <&dmamux1 0 39 0x400 0x01>,
-		       <&dmamux1 1 40 0x400 0x01>;
-		dma-names = "rx", "tx";
-		pinctrl-0 = <&spi2_pins_b>;
-		pinctrl-names = "default";
-		cs-gpios = <&gpioa 11 0>;
-
-		aardvark@0 {
-			compatible = "totalphase,aardvark";
-			reg = <0>;
-			spi-max-frequency = <4000000>;
-			st,spi-midi-ns = <4000>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/spi/spi-xilinx.txt b/Documentation/devicetree/bindings/spi/spi-xilinx.txt
index dc924a5f71db..5f4ed3e5c994 100644
--- a/Documentation/devicetree/bindings/spi/spi-xilinx.txt
+++ b/Documentation/devicetree/bindings/spi/spi-xilinx.txt
@@ -8,7 +8,8 @@ Required properties:
 			  number.
 
 Optional properties:
-- xlnx,num-ss-bits	: Number of chip selects used.
+- xlnx,num-ss-bits	 : Number of chip selects used.
+- xlnx,num-transfer-bits : Number of bits per transfer. This will be 8 if not specified
 
 Example:
 	axi_quad_spi@41e00000 {
@@ -17,5 +18,6 @@ Example:
 			interrupts = <0 31 1>;
 			reg = <0x41e00000 0x10000>;
 			xlnx,num-ss-bits = <0x1>;
+			xlnx,num-transfer-bits = <32>;
 	};
 
diff --git a/Documentation/devicetree/bindings/spi/spi_atmel.txt b/Documentation/devicetree/bindings/spi/spi_atmel.txt
index f99c733d75c1..5bb4a8f1df7a 100644
--- a/Documentation/devicetree/bindings/spi/spi_atmel.txt
+++ b/Documentation/devicetree/bindings/spi/spi_atmel.txt
@@ -1,7 +1,7 @@
 Atmel SPI device
 
 Required properties:
-- compatible : should be "atmel,at91rm9200-spi".
+- compatible : should be "atmel,at91rm9200-spi" or "microchip,sam9x60-spi".
 - reg: Address and length of the register set for the device
 - interrupts: Should contain spi interrupt
 - cs-gpios: chipselects (optional for SPI controller version >= 2 with the
diff --git a/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml b/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml
new file mode 100644
index 000000000000..3665a5fe6b7f
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/st,stm32-qspi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Quad Serial Peripheral Interface (QSPI) bindings
+
+maintainers:
+  - Christophe Kerello <christophe.kerello@st.com>
+  - Patrice Chotard <patrice.chotard@st.com>
+
+allOf:
+  - $ref: "spi-controller.yaml#"
+
+properties:
+  compatible:
+    const: st,stm32f469-qspi
+
+  reg:
+    items:
+      - description: registers
+      - description: memory mapping
+
+  reg-names:
+    items:
+     - const: qspi
+     - const: qspi_mm
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  dmas:
+    items:
+      - description: tx DMA channel
+      - description: rx DMA channel
+
+  dma-names:
+    items:
+      - const: tx
+      - const: rx
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - clocks
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    spi@58003000 {
+      compatible = "st,stm32f469-qspi";
+      reg = <0x58003000 0x1000>, <0x70000000 0x10000000>;
+      reg-names = "qspi", "qspi_mm";
+      interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+      dmas = <&mdma1 22 0x10 0x100002 0x0 0x0>,
+             <&mdma1 22 0x10 0x100008 0x0 0x0>;
+      dma-names = "tx", "rx";
+      clocks = <&rcc QSPI_K>;
+      resets = <&rcc QSPI_R>;
+
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      flash@0 {
+        compatible = "jedec,spi-nor";
+        reg = <0>;
+        spi-rx-bus-width = <4>;
+        spi-max-frequency = <108000000>;
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
new file mode 100644
index 000000000000..f0d979664f07
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/st,stm32-spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 SPI Controller bindings
+
+description: |
+  The STM32 SPI controller is used to communicate with external devices using
+  the Serial Peripheral Interface. It supports full-duplex, half-duplex and
+  simplex synchronous serial communication with external devices. It supports
+  from 4 to 32-bit data size.
+
+maintainers:
+  - Erwan Leray <erwan.leray@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@st.com>
+
+allOf:
+  - $ref: "spi-controller.yaml#"
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: st,stm32f4-spi
+
+    then:
+      properties:
+        st,spi-midi-ns: false
+
+properties:
+  compatible:
+    enum:
+      - st,stm32f4-spi
+      - st,stm32h7-spi
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  dmas:
+    description: |
+      DMA specifiers for tx and rx dma. DMA fifo mode must be used. See
+      the STM32 DMA bindings Documentation/devicetree/bindings/dma/stm32-dma.txt.
+    items:
+      - description: rx DMA channel
+      - description: tx DMA channel
+
+  dma-names:
+    items:
+      - const: rx
+      - const: tx
+
+patternProperties:
+  "^[a-zA-Z][a-zA-Z0-9,+\\-._]{0,63}@[0-9a-f]+$":
+    type: object
+    # SPI slave nodes must be children of the SPI master node and can
+    # contain the following properties.
+    properties:
+      st,spi-midi-ns:
+        description: |
+          Only for STM32H7, (Master Inter-Data Idleness) minimum time
+          delay in nanoseconds inserted between two consecutive data frames.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    #include <dt-bindings/reset/stm32mp1-resets.h>
+    spi@4000b000 {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      compatible = "st,stm32h7-spi";
+      reg = <0x4000b000 0x400>;
+      interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&rcc SPI2_K>;
+      resets = <&rcc SPI2_R>;
+      dmas = <&dmamux1 0 39 0x400 0x05>,
+             <&dmamux1 1 40 0x400 0x05>;
+      dma-names = "rx", "tx";
+      cs-gpios = <&gpioa 11 0>;
+
+      aardvark@0 {
+        compatible = "totalphase,aardvark";
+        reg = <0>;
+        spi-max-frequency = <4000000>;
+        st,spi-midi-ns = <4000>;
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml b/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml
new file mode 100644
index 000000000000..80bac7a182d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml
@@ -0,0 +1,140 @@
+# SPDX-License-Identifier: GPL-2.0+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sram/allwinner,sun4i-a10-system-control.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 System Control Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+description:
+  The SRAM controller found on most Allwinner devices is represented
+  by a regular node for the SRAM controller itself, with sub-nodes
+  representing the SRAM handled by the SRAM controller.
+
+properties:
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 1
+
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-sram-controller
+        deprecated: true
+      - const: allwinner,sun4i-a10-system-control
+      - const: allwinner,sun5i-a13-system-control
+      - items:
+        - const: allwinner,sun7i-a20-system-control
+        - const: allwinner,sun4i-a10-system-control
+      - const: allwinner,sun8i-a23-system-control
+      - const: allwinner,sun8i-h3-system-control
+      - const: allwinner,sun50i-a64-sram-controller
+        deprecated: true
+      - const: allwinner,sun50i-a64-system-control
+      - const: allwinner,sun50i-h5-system-control
+      - items:
+        - const: allwinner,sun50i-h6-system-control
+        - const: allwinner,sun50i-a64-system-control
+      - items:
+        - const: allwinner,suniv-f1c100s-system-control
+        - const: allwinner,sun4i-a10-system-control
+
+  reg:
+    maxItems: 1
+
+  ranges: true
+
+patternProperties:
+  "^sram@[a-z0-9]+":
+    type: object
+
+    properties:
+      compatible:
+        const: mmio-sram
+
+    patternProperties:
+      "^sram-section?@[a-f0-9]+$":
+        type: object
+
+        properties:
+          compatible:
+            oneOf:
+              - const: allwinner,sun4i-a10-sram-a3-a4
+              - const: allwinner,sun4i-a10-sram-c1
+              - const: allwinner,sun4i-a10-sram-d
+              - const: allwinner,sun50i-a64-sram-c
+              - items:
+                - const: allwinner,sun5i-a13-sram-a3-a4
+                - const: allwinner,sun4i-a10-sram-a3-a4
+              - items:
+                - const: allwinner,sun7i-a20-sram-a3-a4
+                - const: allwinner,sun4i-a10-sram-a3-a4
+              - items:
+                - const: allwinner,sun5i-a13-sram-c1
+                - const: allwinner,sun4i-a10-sram-c1
+              - items:
+                - const: allwinner,sun7i-a20-sram-c1
+                - const: allwinner,sun4i-a10-sram-c1
+              - items:
+                - const: allwinner,sun8i-a23-sram-c1
+                - const: allwinner,sun4i-a10-sram-c1
+              - items:
+                - const: allwinner,sun8i-h3-sram-c1
+                - const: allwinner,sun4i-a10-sram-c1
+              - items:
+                - const: allwinner,sun50i-a64-sram-c1
+                - const: allwinner,sun4i-a10-sram-c1
+              - items:
+                - const: allwinner,sun50i-h5-sram-c1
+                - const: allwinner,sun4i-a10-sram-c1
+              - items:
+                - const: allwinner,sun50i-h6-sram-c1
+                - const: allwinner,sun4i-a10-sram-c1
+              - items:
+                - const: allwinner,sun5i-a13-sram-d
+                - const: allwinner,sun4i-a10-sram-d
+              - items:
+                - const: allwinner,sun7i-a20-sram-d
+                - const: allwinner,sun4i-a10-sram-d
+              - items:
+                - const: allwinner,suniv-f1c100s-sram-d
+                - const: allwinner,sun4i-a10-sram-d
+              - items:
+                - const: allwinner,sun50i-h6-sram-c
+                - const: allwinner,sun50i-a64-sram-c
+
+required:
+  - "#address-cells"
+  - "#size-cells"
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    system-control@1c00000 {
+      compatible = "allwinner,sun4i-a10-system-control";
+      reg = <0x01c00000 0x30>;
+      #address-cells = <1>;
+      #size-cells = <1>;
+      ranges;
+
+      sram_a: sram@00000000 {
+        compatible = "mmio-sram";
+        reg = <0x00000000 0xc000>;
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges = <0 0x00000000 0xc000>;
+
+        emac_sram: sram-section@8000 {
+          compatible = "allwinner,sun4i-a10-sram-a3-a4";
+          reg = <0x8000 0x4000>;
+        };
+      };
+    };
diff --git a/Documentation/devicetree/bindings/sram/milbeaut-smp-sram.txt b/Documentation/devicetree/bindings/sram/milbeaut-smp-sram.txt
deleted file mode 100644
index 194f6a3c1c1e..000000000000
--- a/Documentation/devicetree/bindings/sram/milbeaut-smp-sram.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Milbeaut SRAM for smp bringup
-
-Milbeaut SoCs use a part of the sram for the bringup of the secondary cores.
-Once they get powered up in the bootloader, they stay at the specific part
-of the sram.
-Therefore the part needs to be added as the sub-node of mmio-sram.
-
-Required sub-node properties:
-- compatible : should be "socionext,milbeaut-smp-sram"
-
-Example:
-
-        sram: sram@0 {
-                compatible = "mmio-sram";
-                reg = <0x0 0x10000>;
-                #address-cells = <1>;
-                #size-cells = <1>;
-                ranges = <0 0x0 0x10000>;
-
-                smp-sram@f100 {
-                        compatible = "socionext,milbeaut-smp-sram";
-                        reg = <0xf100 0x20>;
-                };
-        };
diff --git a/Documentation/devicetree/bindings/sram/qcom,ocmem.yaml b/Documentation/devicetree/bindings/sram/qcom,ocmem.yaml
new file mode 100644
index 000000000000..222990f9923c
--- /dev/null
+++ b/Documentation/devicetree/bindings/sram/qcom,ocmem.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sram/qcom,ocmem.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: On Chip Memory (OCMEM) that is present on some Qualcomm Snapdragon SoCs.
+
+maintainers:
+  - Brian Masney <masneyb@onstation.org>
+
+description: |
+  The On Chip Memory (OCMEM) is typically used by the GPU, camera/video, and
+  audio components on some Snapdragon SoCs.
+
+properties:
+  compatible:
+    const: qcom,msm8974-ocmem
+
+  reg:
+    items:
+      - description: Control registers
+      - description: OCMEM address range
+
+  reg-names:
+    items:
+      - const: ctrl
+      - const: mem
+
+  clocks:
+    items:
+      - description: Core clock
+      - description: Interface clock
+
+  clock-names:
+    items:
+      - const: core
+      - const: iface
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - clocks
+  - clock-names
+  - '#address-cells'
+  - '#size-cells'
+
+patternProperties:
+  "^.+-sram$":
+    type: object
+    description: A region of reserved memory.
+
+    properties:
+      reg:
+        maxItems: 1
+
+      ranges:
+        maxItems: 1
+
+    required:
+      - reg
+      - ranges
+
+examples:
+  - |
+      #include <dt-bindings/clock/qcom,rpmcc.h>
+      #include <dt-bindings/clock/qcom,mmcc-msm8974.h>
+
+      ocmem: ocmem@fdd00000 {
+        compatible = "qcom,msm8974-ocmem";
+
+        reg = <0xfdd00000 0x2000>,
+              <0xfec00000 0x180000>;
+        reg-names = "ctrl",
+                    "mem";
+
+        clocks = <&rpmcc RPM_SMD_OCMEMGX_CLK>,
+                 <&mmcc OCMEMCX_OCMEMNOC_CLK>;
+        clock-names = "core",
+                      "iface";
+
+        #address-cells = <1>;
+        #size-cells = <1>;
+
+        gmu-sram@0 {
+                reg = <0x0 0x100000>;
+                ranges = <0 0 0xfec00000 0x100000>;
+        };
+      };
diff --git a/Documentation/devicetree/bindings/sram/renesas,smp-sram.txt b/Documentation/devicetree/bindings/sram/renesas,smp-sram.txt
deleted file mode 100644
index 712d05e3e15e..000000000000
--- a/Documentation/devicetree/bindings/sram/renesas,smp-sram.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* Renesas SMP SRAM
-
-Renesas R-Car Gen2 and RZ/G1 SoCs need a small piece of SRAM for the jump stub
-for secondary CPU bringup and CPU hotplug.
-This memory is reserved by adding a child node to a "mmio-sram" node, cfr.
-Documentation/devicetree/bindings/sram/sram.txt.
-
-Required child node properties:
-  - compatible: Must be "renesas,smp-sram",
-  - reg: Address and length of the reserved SRAM.
-    The full physical (bus) address must be aligned to a 256 KiB boundary.
-
-
-Example:
-
-	icram1:	sram@e63c0000 {
-		compatible = "mmio-sram";
-		reg = <0 0xe63c0000 0 0x1000>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges = <0 0 0xe63c0000 0x1000>;
-
-		smp-sram@0 {
-			compatible = "renesas,smp-sram";
-			reg = <0 0x10>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/sram/rockchip-smp-sram.txt b/Documentation/devicetree/bindings/sram/rockchip-smp-sram.txt
deleted file mode 100644
index 800701ecffca..000000000000
--- a/Documentation/devicetree/bindings/sram/rockchip-smp-sram.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Rockchip SRAM for smp bringup:
-------------------------------
-
-Rockchip's smp-capable SoCs use the first part of the sram for the bringup
-of the cores. Once the core gets powered up it executes the code that is
-residing at the very beginning of the sram.
-
-Therefore a reserved section sub-node has to be added to the mmio-sram
-declaration.
-
-Required sub-node properties:
-- compatible : should be "rockchip,rk3066-smp-sram"
-
-The rest of the properties should follow the generic mmio-sram discription
-found in Documentation/devicetree/bindings/sram/sram.txt
-
-Example:
-
-	sram: sram@10080000 {
-		compatible = "mmio-sram";
-		reg = <0x10080000 0x10000>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		smp-sram@10080000 {
-			compatible = "rockchip,rk3066-smp-sram";
-			reg = <0x10080000 0x50>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/sram/samsung-sram.txt b/Documentation/devicetree/bindings/sram/samsung-sram.txt
deleted file mode 100644
index 61a9bbed303d..000000000000
--- a/Documentation/devicetree/bindings/sram/samsung-sram.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-Samsung Exynos SYSRAM for SMP bringup:
-------------------------------------
-
-Samsung SMP-capable Exynos SoCs use part of the SYSRAM for the bringup
-of the secondary cores. Once the core gets powered up it executes the
-code that is residing at some specific location of the SYSRAM.
-
-Therefore reserved section sub-nodes have to be added to the mmio-sram
-declaration. These nodes are of two types depending upon secure or
-non-secure execution environment.
-
-Required sub-node properties:
-- compatible : depending upon boot mode, should be
-		"samsung,exynos4210-sysram" : for Secure SYSRAM
-		"samsung,exynos4210-sysram-ns" : for Non-secure SYSRAM
-
-The rest of the properties should follow the generic mmio-sram discription
-found in Documentation/devicetree/bindings/sram/sram.txt
-
-Example:
-
-	sysram@2020000 {
-		compatible = "mmio-sram";
-		reg = <0x02020000 0x54000>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges = <0 0x02020000 0x54000>;
-
-		smp-sysram@0 {
-			compatible = "samsung,exynos4210-sysram";
-			reg = <0x0 0x1000>;
-		};
-
-		smp-sysram@53000 {
-			compatible = "samsung,exynos4210-sysram-ns";
-			reg = <0x53000 0x1000>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/sram/sram.txt b/Documentation/devicetree/bindings/sram/sram.txt
deleted file mode 100644
index e98908bd4227..000000000000
--- a/Documentation/devicetree/bindings/sram/sram.txt
+++ /dev/null
@@ -1,80 +0,0 @@
-Generic on-chip SRAM
-
-Simple IO memory regions to be managed by the genalloc API.
-
-Required properties:
-
-- compatible : mmio-sram or atmel,sama5d2-securam
-
-- reg : SRAM iomem address range
-
-Reserving sram areas:
----------------------
-
-Each child of the sram node specifies a region of reserved memory. Each
-child node should use a 'reg' property to specify a specific range of
-reserved memory.
-
-Following the generic-names recommended practice, node names should
-reflect the purpose of the node. Unit address (@<address>) should be
-appended to the name.
-
-Required properties in the sram node:
-
-- #address-cells, #size-cells : should use the same values as the root node
-- ranges : standard definition, should translate from local addresses
-           within the sram to bus addresses
-
-Optional properties in the sram node:
-
-- no-memory-wc : the flag indicating, that SRAM memory region has not to
-                 be remapped as write combining. WC is used by default.
-
-Required properties in the area nodes:
-
-- reg : iomem address range, relative to the SRAM range
-
-Optional properties in the area nodes:
-
-- compatible : standard definition, should contain a vendor specific string
-               in the form <vendor>,[<device>-]<usage>
-- pool : indicates that the particular reserved SRAM area is addressable
-         and in use by another device or devices
-- export : indicates that the reserved SRAM area may be accessed outside
-           of the kernel, e.g. by bootloader or userspace
-- protect-exec : Same as 'pool' above but with the additional
-		 constraint that code wil be run from the region and
-		 that the memory is maintained as read-only, executable
-		 during code execution. NOTE: This region must be page
-		 aligned on start and end in order to properly allow
-		 manipulation of the page attributes.
-- label : the name for the reserved partition, if omitted, the label
-          is taken from the node name excluding the unit address.
-- clocks : a list of phandle and clock specifier pair that controls the
-	   single SRAM clock.
-
-Example:
-
-sram: sram@5c000000 {
-	compatible = "mmio-sram";
-	reg = <0x5c000000 0x40000>; /* 256 KiB SRAM at address 0x5c000000 */
-
-	#address-cells = <1>;
-	#size-cells = <1>;
-	ranges = <0 0x5c000000 0x40000>;
-
-	smp-sram@100 {
-		compatible = "socvendor,smp-sram";
-		reg = <0x100 0x50>;
-	};
-
-	device-sram@1000 {
-		reg = <0x1000 0x1000>;
-		pool;
-	};
-
-	exported@20000 {
-		reg = <0x20000 0x20000>;
-		export;
-	};
-};
diff --git a/Documentation/devicetree/bindings/sram/sram.yaml b/Documentation/devicetree/bindings/sram/sram.yaml
new file mode 100644
index 000000000000..7b83cc6c9bfa
--- /dev/null
+++ b/Documentation/devicetree/bindings/sram/sram.yaml
@@ -0,0 +1,262 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sram/sram.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic on-chip SRAM
+
+maintainers:
+  - Rob Herring <robh@kernel.org>
+
+description: |+
+  Simple IO memory regions to be managed by the genalloc API.
+
+  Each child of the sram node specifies a region of reserved memory. Each
+  child node should use a 'reg' property to specify a specific range of
+  reserved memory.
+
+  Following the generic-names recommended practice, node names should
+  reflect the purpose of the node. Unit address (@<address>) should be
+  appended to the name.
+
+properties:
+  $nodename:
+    pattern: "^sram(@.*)?"
+
+  compatible:
+    contains:
+      enum:
+        - mmio-sram
+        - atmel,sama5d2-securam
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    description:
+      A list of phandle and clock specifier pair that controls the single
+      SRAM clock.
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 1
+
+  ranges:
+    description:
+      Should translate from local addresses within the sram to bus addresses.
+
+  no-memory-wc:
+    description:
+      The flag indicating, that SRAM memory region has not to be remapped
+      as write combining. WC is used by default.
+    type: boolean
+
+patternProperties:
+  "^([a-z]*-)?sram(-section)?@[a-f0-9]+$":
+    type: object
+    description:
+      Each child of the sram node specifies a region of reserved memory.
+    properties:
+      compatible:
+        description:
+          Should contain a vendor specific string in the form
+          <vendor>,[<device>-]<usage>
+        contains:
+          enum:
+            - allwinner,sun4i-a10-sram-a3-a4
+            - allwinner,sun4i-a10-sram-c1
+            - allwinner,sun4i-a10-sram-d
+            - allwinner,sun9i-a80-smp-sram
+            - allwinner,sun50i-a64-sram-c
+            - amlogic,meson8-smp-sram
+            - amlogic,meson8b-smp-sram
+            - renesas,smp-sram
+            - rockchip,rk3066-smp-sram
+            - samsung,exynos4210-sysram
+            - samsung,exynos4210-sysram-ns
+            - socionext,milbeaut-smp-sram
+
+      reg:
+        description:
+          IO mem address range, relative to the SRAM range.
+        maxItems: 1
+
+      pool:
+        description:
+          Indicates that the particular reserved SRAM area is addressable
+          and in use by another device or devices.
+        type: boolean
+
+      export:
+        description:
+          Indicates that the reserved SRAM area may be accessed outside
+          of the kernel, e.g. by bootloader or userspace.
+        type: boolean
+
+      protect-exec:
+        description: |
+          Same as 'pool' above but with the additional constraint that code
+          will be run from the region and that the memory is maintained as
+          read-only, executable during code execution. NOTE: This region must
+          be page aligned on start and end in order to properly allow
+          manipulation of the page attributes.
+        type: boolean
+
+      label:
+        description:
+          The name for the reserved partition, if omitted, the label is taken
+          from the node name excluding the unit address.
+
+    required:
+      - reg
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - "#address-cells"
+  - "#size-cells"
+  - ranges
+
+additionalProperties: false
+
+examples:
+  - |
+    sram@5c000000 {
+        compatible = "mmio-sram";
+        reg = <0x5c000000 0x40000>; /* 256 KiB SRAM at address 0x5c000000 */
+
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges = <0 0x5c000000 0x40000>;
+
+        smp-sram@100 {
+            reg = <0x100 0x50>;
+        };
+
+        device-sram@1000 {
+            reg = <0x1000 0x1000>;
+            pool;
+        };
+
+        exported-sram@20000 {
+            reg = <0x20000 0x20000>;
+            export;
+        };
+    };
+
+  - |
+    // Samsung SMP-capable Exynos SoCs use part of the SYSRAM for the bringup
+    // of the secondary cores. Once the core gets powered up it executes the
+    // code that is residing at some specific location of the SYSRAM.
+    //
+    // Therefore reserved section sub-nodes have to be added to the mmio-sram
+    // declaration. These nodes are of two types depending upon secure or
+    // non-secure execution environment.
+    sram@2020000 {
+        compatible = "mmio-sram";
+        reg = <0x02020000 0x54000>;
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges = <0 0x02020000 0x54000>;
+
+        smp-sram@0 {
+            compatible = "samsung,exynos4210-sysram";
+            reg = <0x0 0x1000>;
+        };
+
+        smp-sram@53000 {
+            compatible = "samsung,exynos4210-sysram-ns";
+            reg = <0x53000 0x1000>;
+        };
+    };
+
+  - |
+    // Amlogic's SMP-capable SoCs use part of the sram for the bringup of the cores.
+    // Once the core gets powered up it executes the code that is residing at a
+    // specific location.
+    //
+    // Therefore a reserved section sub-node has to be added to the mmio-sram
+    // declaration.
+    sram@d9000000 {
+        compatible = "mmio-sram";
+        reg = <0xd9000000 0x20000>;
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges = <0 0xd9000000 0x20000>;
+
+        smp-sram@1ff80 {
+            compatible = "amlogic,meson8b-smp-sram";
+            reg = <0x1ff80 0x8>;
+        };
+    };
+
+  - |
+    sram@e63c0000 {
+        compatible = "mmio-sram";
+        reg = <0xe63c0000 0x1000>;
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges = <0 0xe63c0000 0x1000>;
+
+        smp-sram@0 {
+            compatible = "renesas,smp-sram";
+            reg = <0 0x10>;
+        };
+    };
+
+  - |
+    sram@10080000 {
+        compatible = "mmio-sram";
+        reg = <0x10080000 0x10000>;
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+
+        smp-sram@10080000 {
+            compatible = "rockchip,rk3066-smp-sram";
+            reg = <0x10080000 0x50>;
+        };
+    };
+
+  - |
+    // Allwinner's A80 SoC uses part of the secure sram for hotplugging of the
+    // primary core (cpu0). Once the core gets powered up it checks if a magic
+    // value is set at a specific location. If it is then the BROM will jump
+    // to the software entry address, instead of executing a standard boot.
+    //
+    // Also there are no "secure-only" properties. The implementation should
+    // check if this SRAM is usable first.
+    sram@20000 {
+        // 256 KiB secure SRAM at 0x20000
+        compatible = "mmio-sram";
+        reg = <0x00020000 0x40000>;
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges = <0 0x00020000 0x40000>;
+
+        smp-sram@1000 {
+            // This is checked by BROM to determine if
+            // cpu0 should jump to SMP entry vector
+            compatible = "allwinner,sun9i-a80-smp-sram";
+            reg = <0x1000 0x8>;
+        };
+    };
+
+  - |
+    sram@0 {
+        compatible = "mmio-sram";
+        reg = <0x0 0x10000>;
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges = <0 0x0 0x10000>;
+
+        smp-sram@f100 {
+            compatible = "socionext,milbeaut-smp-sram";
+            reg = <0xf100 0x20>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/sram/sunxi-sram.txt b/Documentation/devicetree/bindings/sram/sunxi-sram.txt
deleted file mode 100644
index 380246a805f2..000000000000
--- a/Documentation/devicetree/bindings/sram/sunxi-sram.txt
+++ /dev/null
@@ -1,113 +0,0 @@
-Allwinnner SoC SRAM controllers
------------------------------------------------------
-
-The SRAM controller found on most Allwinner devices is represented by
-a regular node for the SRAM controller itself, with sub-nodes
-reprensenting the SRAM handled by the SRAM controller.
-
-Controller Node
----------------
-
-Required properties:
-- compatible : should be:
-    - "allwinner,sun4i-a10-sram-controller" (deprecated)
-    - "allwinner,sun4i-a10-system-control"
-    - "allwinner,sun5i-a13-system-control"
-    - "allwinner,sun7i-a20-system-control", "allwinner,sun4i-a10-system-control"
-    - "allwinner,sun8i-a23-system-control"
-    - "allwinner,sun8i-h3-system-control"
-    - "allwinner,sun50i-a64-sram-controller" (deprecated)
-    - "allwinner,sun50i-a64-system-control"
-    - "allwinner,sun50i-h5-system-control"
-    - "allwinner,sun50i-h6-system-control", "allwinner,sun50i-a64-system-control"
-    - "allwinner,suniv-f1c100s-system-control", "allwinner,sun4i-a10-system-control"
-- reg : sram controller register offset + length
-
-SRAM nodes
-----------
-
-Each SRAM is described using the mmio-sram bindings documented in
-Documentation/devicetree/bindings/sram/sram.txt
-
-Each SRAM will have SRAM sections that are going to be handled by the
-SRAM controller as subnodes. These sections are represented following
-once again the representation described in the mmio-sram binding.
-
-The valid sections compatible for A10 are:
-    - allwinner,sun4i-a10-sram-a3-a4
-    - allwinner,sun4i-a10-sram-c1
-    - allwinner,sun4i-a10-sram-d
-
-The valid sections compatible for A13 are:
-    - allwinner,sun5i-a13-sram-a3-a4, allwinner,sun4i-a10-sram-a3-a4
-    - allwinner,sun5i-a13-sram-c1, allwinner,sun4i-a10-sram-c1
-    - allwinner,sun5i-a13-sram-d, allwinner,sun4i-a10-sram-d
-
-The valid sections compatible for A20 are:
-    - allwinner,sun7i-a20-sram-a3-a4, allwinner,sun4i-a10-sram-a3-a4
-    - allwinner,sun7i-a20-sram-c1, allwinner,sun4i-a10-sram-c1
-    - allwinner,sun7i-a20-sram-d, allwinner,sun4i-a10-sram-d
-
-The valid sections compatible for A23/A33 are:
-    - allwinner,sun8i-a23-sram-c1, allwinner,sun4i-a10-sram-c1
-
-The valid sections compatible for H3 are:
-    - allwinner,sun8i-h3-sram-c1, allwinner,sun4i-a10-sram-c1
-
-The valid sections compatible for A64 are:
-    - allwinner,sun50i-a64-sram-c
-    - allwinner,sun50i-a64-sram-c1, allwinner,sun4i-a10-sram-c1
-
-The valid sections compatible for H5 are:
-    - allwinner,sun50i-h5-sram-c1, allwinner,sun4i-a10-sram-c1
-
-The valid sections compatible for H6 are:
-    - allwinner,sun50i-h6-sram-c, allwinner,sun50i-a64-sram-c
-    - allwinner,sun50i-h6-sram-c1, allwinner,sun4i-a10-sram-c1
-
-The valid sections compatible for F1C100s are:
-    - allwinner,suniv-f1c100s-sram-d, allwinner,sun4i-a10-sram-d
-
-Devices using SRAM sections
----------------------------
-
-Some devices need to request to the SRAM controller to map an SRAM for
-their exclusive use.
-
-The relationship between such a device and an SRAM section is
-expressed through the allwinner,sram property, that will take a
-phandle and an argument.
-
-This valid values for this argument are:
-  - 0: CPU
-  - 1: Device
-
-Example
--------
-system-control@1c00000 {
-	compatible = "allwinner,sun4i-a10-system-control";
-	reg = <0x01c00000 0x30>;
-	#address-cells = <1>;
-	#size-cells = <1>;
-	ranges;
-
-	sram_a: sram@00000000 {
-		compatible = "mmio-sram";
-		reg = <0x00000000 0xc000>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges = <0 0x00000000 0xc000>;
-
-		emac_sram: sram-section@8000 {
-			compatible = "allwinner,sun4i-a10-sram-a3-a4";
-			reg = <0x8000 0x4000>;
-		};
-	};
-};
-
-emac: ethernet@1c0b000 {
-	compatible = "allwinner,sun4i-a10-emac";
-	...
-
-	allwinner,sram = <&emac_sram 1>;
-};
diff --git a/Documentation/devicetree/bindings/submitting-patches.txt b/Documentation/devicetree/bindings/submitting-patches.txt
index de0d6090c0fd..98bee6240b65 100644
--- a/Documentation/devicetree/bindings/submitting-patches.txt
+++ b/Documentation/devicetree/bindings/submitting-patches.txt
@@ -15,17 +15,28 @@ I. For patch submitters
      use "Documentation" or "doc" because that is implied. All bindings are
      docs. Repeating "binding" again should also be avoided.
 
-  2) Submit the entire series to the devicetree mailinglist at
+  2) DT binding files are written in DT schema format using json-schema
+     vocabulary and YAML file format. The DT binding files must pass validation
+     by running:
+
+       make dt_binding_check
+
+     See ../writing-schema.rst for more details about schema and tools setup.
+
+  3) DT binding files should be dual licensed. The preferred license tag is
+     (GPL-2.0-only OR BSD-2-Clause).
+
+  4) Submit the entire series to the devicetree mailinglist at
 
        devicetree@vger.kernel.org
 
      and Cc: the DT maintainers. Use scripts/get_maintainer.pl to identify
      all of the DT maintainers.
 
-  3) The Documentation/ portion of the patch should come in the series before
+  5) The Documentation/ portion of the patch should come in the series before
      the code implementing the binding.
 
-  4) Any compatible strings used in a chip or board DTS file must be
+  6) Any compatible strings used in a chip or board DTS file must be
      previously documented in the corresponding DT binding text file
      in Documentation/devicetree/bindings.  This rule applies even if
      the Linux device driver does not yet match on the compatible
@@ -33,7 +44,7 @@ I. For patch submitters
      followed as of commit bff5da4335256513497cc8c79f9a9d1665e09864
      ("checkpatch: add DT compatible string documentation checks"). ]
 
-  5) The wildcard "<chip>" may be used in compatible strings, as in
+  7) The wildcard "<chip>" may be used in compatible strings, as in
      the following example:
 
          - compatible: Must contain '"nvidia,<chip>-pcie",
@@ -42,7 +53,7 @@ I. For patch submitters
      As in the above example, the known values of "<chip>" should be
      documented if it is used.
 
-  6) If a documented compatible string is not yet matched by the
+  8) If a documented compatible string is not yet matched by the
      driver, the documentation should also include a compatible
      string that is matched by the driver (as in the "nvidia,tegra20-pcie"
      example above).
diff --git a/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml b/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml
new file mode 100644
index 000000000000..87369264feb9
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml
@@ -0,0 +1,160 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/allwinner,sun8i-a83t-ths.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner SUN8I Thermal Controller Device Tree Bindings
+
+maintainers:
+  - Vasily Khoruzhick <anarsoul@gmail.com>
+  - Yangtao Li <tiny.windzz@gmail.com>
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun8i-a83t-ths
+      - allwinner,sun8i-h3-ths
+      - allwinner,sun8i-r40-ths
+      - allwinner,sun50i-a64-ths
+      - allwinner,sun50i-h5-ths
+      - allwinner,sun50i-h6-ths
+
+  clocks:
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+
+  clock-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      - const: bus
+      - const: mod
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  nvmem-cells:
+    maxItems: 1
+    description: Calibration data for thermal sensors
+
+  nvmem-cell-names:
+    const: calibration
+
+  # See ./thermal.txt for details
+  "#thermal-sensor-cells":
+    enum:
+      - 0
+      - 1
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun50i-h6-ths
+
+    then:
+      properties:
+        clocks:
+          maxItems: 1
+
+        clock-names:
+          maxItems: 1
+
+    else:
+      properties:
+        clocks:
+          minItems: 2
+
+        clock-names:
+          minItems: 2
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun8i-h3-ths
+
+    then:
+      properties:
+        "#thermal-sensor-cells":
+          const: 0
+
+    else:
+      properties:
+        "#thermal-sensor-cells":
+          const: 1
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - const: allwinner,sun8i-h3-ths
+              - const: allwinner,sun8i-r40-ths
+              - const: allwinner,sun50i-a64-ths
+              - const: allwinner,sun50i-h5-ths
+              - const: allwinner,sun50i-h6-ths
+
+    then:
+      required:
+        - clocks
+        - clock-names
+        - resets
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - '#thermal-sensor-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    thermal-sensor@1f04000 {
+         compatible = "allwinner,sun8i-a83t-ths";
+         reg = <0x01f04000 0x100>;
+         interrupts = <0 31 0>;
+         nvmem-cells = <&ths_calibration>;
+         nvmem-cell-names = "calibration";
+         #thermal-sensor-cells = <1>;
+    };
+
+  - |
+    thermal-sensor@1c25000 {
+         compatible = "allwinner,sun8i-h3-ths";
+         reg = <0x01c25000 0x400>;
+         clocks = <&ccu 0>, <&ccu 1>;
+         clock-names = "bus", "mod";
+         resets = <&ccu 2>;
+         interrupts = <0 31 0>;
+         nvmem-cells = <&ths_calibration>;
+         nvmem-cell-names = "calibration";
+         #thermal-sensor-cells = <0>;
+    };
+
+  - |
+    thermal-sensor@5070400 {
+         compatible = "allwinner,sun50i-h6-ths";
+         reg = <0x05070400 0x100>;
+         clocks = <&ccu 0>;
+         clock-names = "bus";
+         resets = <&ccu 2>;
+         interrupts = <0 15 0>;
+         nvmem-cells = <&ths_calibration>;
+         nvmem-cell-names = "calibration";
+         #thermal-sensor-cells = <1>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/thermal/amlogic,thermal.yaml b/Documentation/devicetree/bindings/thermal/amlogic,thermal.yaml
new file mode 100644
index 000000000000..f761681e4c0d
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/amlogic,thermal.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/amlogic,thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic Thermal
+
+maintainers:
+  - Guillaume La Roque <glaroque@baylibre.com>
+
+description: Binding for Amlogic Thermal
+
+properties:
+  compatible:
+      items:
+        - enum:
+            - amlogic,g12a-cpu-thermal
+            - amlogic,g12a-ddr-thermal
+        - const: amlogic,g12a-thermal
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  amlogic,ao-secure:
+    description: phandle to the ao-secure syscon
+    $ref: '/schemas/types.yaml#/definitions/phandle'
+
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - amlogic,ao-secure
+
+examples:
+  - |
+        cpu_temp: temperature-sensor@ff634800 {
+                compatible = "amlogic,g12a-cpu-thermal",
+                             "amlogic,g12a-thermal";
+                reg = <0xff634800 0x50>;
+                interrupts = <0x0 0x24 0x0>;
+                clocks = <&clk 164>;
+                #thermal-sensor-cells = <0>;
+                amlogic,ao-secure = <&sec_AO>;
+        };
+...
diff --git a/Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml b/Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml
new file mode 100644
index 000000000000..d9fdf4809a49
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/brcm,avs-ro-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom AVS ring oscillator thermal
+
+maintainers:
+  - Stefan Wahren <wahrenst@gmx.net>
+
+description: |+
+  The thermal node should be the child of a syscon node with the
+  required property:
+
+  - compatible: Should be one of the following:
+                "brcm,bcm2711-avs-monitor", "syscon", "simple-mfd"
+
+  Refer to the the bindings described in
+  Documentation/devicetree/bindings/mfd/syscon.txt
+
+properties:
+  compatible:
+    const: brcm,bcm2711-thermal
+
+  # See ./thermal.txt for details
+  "#thermal-sensor-cells":
+    const: 0
+
+required:
+  - compatible
+  - '#thermal-sensor-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+        avs-monitor@7d5d2000 {
+                compatible = "brcm,bcm2711-avs-monitor",
+                             "syscon", "simple-mfd";
+                reg = <0x7d5d2000 0xf00>;
+
+                thermal: thermal {
+                        compatible = "brcm,bcm2711-thermal";
+                        #thermal-sensor-cells = <0>;
+                };
+        };
+...
diff --git a/Documentation/devicetree/bindings/thermal/brcm,avs-tmon.txt b/Documentation/devicetree/bindings/thermal/brcm,avs-tmon.txt
index 43a9ed545944..74a9ef09db8b 100644
--- a/Documentation/devicetree/bindings/thermal/brcm,avs-tmon.txt
+++ b/Documentation/devicetree/bindings/thermal/brcm,avs-tmon.txt
@@ -3,9 +3,13 @@
 Thermal management core, provided by the AVS TMON hardware block.
 
 Required properties:
-- compatible: must be "brcm,avs-tmon" and/or "brcm,avs-tmon-bcm7445"
+- compatible: must be one of:
+	"brcm,avs-tmon-bcm7216"
+	"brcm,avs-tmon-bcm7445"
+	"brcm,avs-tmon"
 - reg: address range for the AVS TMON registers
-- interrupts: temperature monitor interrupt, for high/low threshold triggers
+- interrupts: temperature monitor interrupt, for high/low threshold triggers,
+	      required except for "brcm,avs-tmon-bcm7216"
 - interrupt-names: should be "tmon"
 
 Example:
diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.txt b/Documentation/devicetree/bindings/thermal/qcom-tsens.txt
deleted file mode 100644
index 673cc1831ee9..000000000000
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-* QCOM SoC Temperature Sensor (TSENS)
-
-Required properties:
-- compatible:
-  Must be one of the following:
-    - "qcom,msm8916-tsens" (MSM8916)
-    - "qcom,msm8974-tsens" (MSM8974)
-    - "qcom,msm8996-tsens" (MSM8996)
-    - "qcom,qcs404-tsens", "qcom,tsens-v1" (QCS404)
-    - "qcom,msm8998-tsens", "qcom,tsens-v2" (MSM8998)
-    - "qcom,sdm845-tsens", "qcom,tsens-v2" (SDM845)
-  The generic "qcom,tsens-v2" property must be used as a fallback for any SoC
-  with version 2 of the TSENS IP. MSM8996 is the only exception because the
-  generic property did not exist when support was added.
-  Similarly, the generic "qcom,tsens-v1" property must be used as a fallback for
-  any SoC with version 1 of the TSENS IP.
-
-- reg: Address range of the thermal registers.
-  New platforms containing v2.x.y of the TSENS IP must specify the SROT and TM
-  register spaces separately, with order being TM before SROT.
-  See Example 2, below.
-
-- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description.
-- #qcom,sensors: Number of sensors in tsens block
-- Refer to Documentation/devicetree/bindings/nvmem/nvmem.txt to know how to specify
-nvmem cells
-
-Example 1 (legacy support before a fallback tsens-v2 property was introduced):
-tsens: thermal-sensor@900000 {
-		compatible = "qcom,msm8916-tsens";
-		reg = <0x4a8000 0x2000>;
-		nvmem-cells = <&tsens_caldata>, <&tsens_calsel>;
-		nvmem-cell-names = "caldata", "calsel";
-		#thermal-sensor-cells = <1>;
-	};
-
-Example 2 (for any platform containing v2 of the TSENS IP):
-tsens0: thermal-sensor@c263000 {
-		compatible = "qcom,sdm845-tsens", "qcom,tsens-v2";
-		reg = <0xc263000 0x1ff>, /* TM */
-			<0xc222000 0x1ff>; /* SROT */
-		#qcom,sensors = <13>;
-		#thermal-sensor-cells = <1>;
-	};
-
-Example 3 (for any platform containing v1 of the TSENS IP):
-tsens: thermal-sensor@4a9000 {
-		compatible = "qcom,qcs404-tsens", "qcom,tsens-v1";
-		reg = <0x004a9000 0x1000>, /* TM */
-		      <0x004a8000 0x1000>; /* SROT */
-		nvmem-cells = <&tsens_caldata>;
-		nvmem-cell-names = "calib";
-		#qcom,sensors = <10>;
-		#thermal-sensor-cells = <1>;
-	};
diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
new file mode 100644
index 000000000000..eef13b9446a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -0,0 +1,170 @@
+# SPDX-License-Identifier: (GPL-2.0 OR MIT)
+# Copyright 2019 Linaro Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/qcom-tsens.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: QCOM SoC Temperature Sensor (TSENS)
+
+maintainers:
+  - Amit Kucheria <amit.kucheria@linaro.org>
+
+description: |
+  QCOM SoCs have TSENS IP to allow temperature measurement. There are currently
+  three distinct major versions of the IP that is supported by a single driver.
+  The IP versions are named v0.1, v1 and v2 in the driver, where v0.1 captures
+  everything before v1 when there was no versioning information.
+
+properties:
+  compatible:
+    oneOf:
+      - description: v0.1 of TSENS
+        items:
+          - enum:
+              - qcom,msm8916-tsens
+              - qcom,msm8974-tsens
+          - const: qcom,tsens-v0_1
+
+      - description: v1 of TSENS
+        items:
+          - enum:
+              - qcom,msm8976-tsens
+              - qcom,qcs404-tsens
+          - const: qcom,tsens-v1
+
+      - description: v2 of TSENS
+        items:
+          - enum:
+              - qcom,msm8996-tsens
+              - qcom,msm8998-tsens
+              - qcom,sdm845-tsens
+          - const: qcom,tsens-v2
+
+  reg:
+    maxItems: 2
+    items:
+      - description: TM registers
+      - description: SROT registers
+
+  nvmem-cells:
+    minItems: 1
+    maxItems: 2
+    description:
+      Reference to an nvmem node for the calibration data
+
+  nvmem-cells-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      - enum:
+        - caldata
+        - calsel
+
+  "#qcom,sensors":
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - minimum: 1
+      - maximum: 16
+    description:
+      Number of sensors enabled on this platform
+
+  "#thermal-sensor-cells":
+    const: 1
+    description:
+      Number of cells required to uniquely identify the thermal sensors. Since
+      we have multiple sensors this is set to 1
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,msm8916-tsens
+              - qcom,msm8974-tsens
+              - qcom,msm8976-tsens
+              - qcom,qcs404-tsens
+              - qcom,tsens-v0_1
+              - qcom,tsens-v1
+    then:
+      properties:
+        interrupts:
+          items:
+            - description: Combined interrupt if upper or lower threshold crossed
+        interrupt-names:
+          items:
+            - const: uplow
+
+    else:
+      properties:
+        interrupts:
+          items:
+            - description: Combined interrupt if upper or lower threshold crossed
+            - description: Interrupt if critical threshold crossed
+        interrupt-names:
+          items:
+            - const: uplow
+            - const: critical
+
+required:
+  - compatible
+  - reg
+  - "#qcom,sensors"
+  - interrupts
+  - interrupt-names
+  - "#thermal-sensor-cells"
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    // Example 1 (legacy: for pre v1 IP):
+    tsens1: thermal-sensor@900000 {
+           compatible = "qcom,msm8916-tsens", "qcom,tsens-v0_1";
+           reg = <0x4a9000 0x1000>, /* TM */
+                 <0x4a8000 0x1000>; /* SROT */
+
+           nvmem-cells = <&tsens_caldata>, <&tsens_calsel>;
+           nvmem-cell-names = "caldata", "calsel";
+
+           interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+           interrupt-names = "uplow";
+
+           #qcom,sensors = <5>;
+           #thermal-sensor-cells = <1>;
+    };
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    // Example 2 (for any platform containing v1 of the TSENS IP):
+    tsens2: thermal-sensor@4a9000 {
+          compatible = "qcom,qcs404-tsens", "qcom,tsens-v1";
+          reg = <0x004a9000 0x1000>, /* TM */
+                <0x004a8000 0x1000>; /* SROT */
+
+          nvmem-cells = <&tsens_caldata>;
+          nvmem-cell-names = "calib";
+
+          interrupts = <GIC_SPI 506 IRQ_TYPE_LEVEL_HIGH>;
+          interrupt-names = "uplow";
+
+          #qcom,sensors = <10>;
+          #thermal-sensor-cells = <1>;
+    };
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    // Example 3 (for any platform containing v2 of the TSENS IP):
+    tsens3: thermal-sensor@c263000 {
+           compatible = "qcom,sdm845-tsens", "qcom,tsens-v2";
+           reg = <0xc263000 0x1ff>,
+                 <0xc222000 0x1ff>;
+
+           interrupts = <GIC_SPI 506 IRQ_TYPE_LEVEL_HIGH>,
+                        <GIC_SPI 508 IRQ_TYPE_LEVEL_HIGH>;
+           interrupt-names = "uplow", "critical";
+
+           #qcom,sensors = <13>;
+           #thermal-sensor-cells = <1>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt
index b6ab60f6abbf..12c740b975f7 100644
--- a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt
@@ -8,6 +8,7 @@ Required properties:
 - compatible		: "renesas,<soctype>-thermal",
 			  Examples with soctypes are:
 			    - "renesas,r8a774a1-thermal" (RZ/G2M)
+			    - "renesas,r8a774b1-thermal" (RZ/G2N)
 			    - "renesas,r8a7795-thermal" (R-Car H3)
 			    - "renesas,r8a7796-thermal" (R-Car M3-W)
 			    - "renesas,r8a77965-thermal" (R-Car M3-N)
diff --git a/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml b/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml
new file mode 100644
index 000000000000..c0f59c56003d
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/st,stm32-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 digital thermal sensor (DTS) binding
+
+maintainers:
+  - David Hernandez Sanchez <david.hernandezsanchez@st.com>
+
+properties:
+  compatible:
+    const: st,stm32-thermal
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: pclk
+
+  "#thermal-sensor-cells":
+    const: 0
+
+required:
+  - "#thermal-sensor-cells"
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    dts: thermal@50028000 {
+        compatible = "st,stm32-thermal";
+        reg = <0x50028000 0x100>;
+        clocks = <&rcc TMPSENS>;
+        clock-names = "pclk";
+        #thermal-sensor-cells = <0>;
+        interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+    };
+
+    thermal-zones {
+        cpu_thermal: cpu-thermal {
+            polling-delay-passive = <0>;
+            polling-delay = <0>;
+
+            thermal-sensors = <&dts>;
+            trips {
+                cpu_alert1: cpu-alert1 {
+                    temperature = <85000>;
+                    hysteresis = <0>;
+                    type = "passive";
+                };
+
+                cpu_crit: cpu-crit {
+                    temperature = <120000>;
+                    hysteresis = <0>;
+                    type = "critical";
+                };
+            };
+
+            cooling-maps {
+            };
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/thermal/stm32-thermal.txt b/Documentation/devicetree/bindings/thermal/stm32-thermal.txt
deleted file mode 100644
index 8c0d5a4d8031..000000000000
--- a/Documentation/devicetree/bindings/thermal/stm32-thermal.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-Binding for Thermal Sensor for STMicroelectronics STM32 series of SoCs.
-
-On STM32 SoCs, the Digital Temperature Sensor (DTS) is in charge of managing an
-analog block which delivers a frequency depending on the internal SoC's
-temperature. By using a reference frequency, DTS is able to provide a sample
-number which can be translated into a temperature by the user.
-
-DTS provides interrupt notification mechanism by threshold. This mechanism
-offers two temperature trip points: passive and critical. The first is intended
-for passive cooling notification while the second is used for over-temperature
-reset.
-
-Required parameters:
--------------------
-
-compatible: 	Should be "st,stm32-thermal"
-reg: 		This should be the physical base address and length of the
-		sensor's registers.
-clocks: 	Phandle of the clock used by the thermal sensor.
-		  See: Documentation/devicetree/bindings/clock/clock-bindings.txt
-clock-names: 	Should be "pclk" for register access clock and reference clock.
-		  See: Documentation/devicetree/bindings/resource-names.txt
-#thermal-sensor-cells: Should be 0. See ./thermal.txt for a description.
-interrupts:	Standard way to define interrupt number.
-
-Example:
-
-	thermal-zones {
-		cpu_thermal: cpu-thermal {
-			polling-delay-passive = <0>;
-			polling-delay = <0>;
-
-			thermal-sensors = <&thermal>;
-
-			trips {
-				cpu_alert1: cpu-alert1 {
-					temperature = <85000>;
-					hysteresis = <0>;
-					type = "passive";
-				};
-
-				cpu-crit: cpu-crit {
-					temperature = <120000>;
-					hysteresis = <0>;
-					type = "critical";
-				};
-			};
-
-			cooling-maps {
-			};
-		};
-	};
-
-	thermal: thermal@50028000 {
-		compatible = "st,stm32-thermal";
-		reg = <0x50028000 0x100>;
-		clocks = <&rcc TMPSENS>;
-		clock-names = "pclk";
-		#thermal-sensor-cells = <0>;
-		interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
-	};
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
index 20adc1c8e9cc..23e989e09766 100644
--- a/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
+++ b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Timer Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml
index dfa0c41fd261..40fc4bcb3145 100644
--- a/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml
+++ b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml
@@ -8,7 +8,7 @@ title: Allwinner A13 High-Speed Timer Device Tree Bindings
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml b/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml
index b3f0fe96ff0d..102f319833d9 100644
--- a/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml
+++ b/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml
@@ -99,22 +99,22 @@ examples:
       compatible = "arm,armv7-timer-mem";
       #address-cells = <1>;
       #size-cells = <1>;
-      ranges;
+      ranges = <0 0xf0001000 0x1000>;
       reg = <0xf0000000 0x1000>;
       clock-frequency = <50000000>;
 
-      frame@f0001000 {
+      frame@0 {
         frame-number = <0>;
         interrupts = <0 13 0x8>,
                <0 14 0x8>;
-        reg = <0xf0001000 0x1000>,
-              <0xf0002000 0x1000>;
+        reg = <0x0000 0x1000>,
+              <0x1000 0x1000>;
       };
 
-      frame@f0003000 {
+      frame@2000 {
         frame-number = <1>;
         interrupts = <0 15 0x8>;
-        reg = <0xf0003000 0x1000>;
+        reg = <0x2000 0x1000>;
       };
     };
 
diff --git a/Documentation/devicetree/bindings/timer/ingenic,tcu.txt b/Documentation/devicetree/bindings/timer/ingenic,tcu.txt
index 5a4b9ddd9470..0b63cebc5f45 100644
--- a/Documentation/devicetree/bindings/timer/ingenic,tcu.txt
+++ b/Documentation/devicetree/bindings/timer/ingenic,tcu.txt
@@ -2,7 +2,7 @@ Ingenic JZ47xx SoCs Timer/Counter Unit devicetree bindings
 ==========================================================
 
 For a description of the TCU hardware and drivers, have a look at
-Documentation/mips/ingenic-tcu.txt.
+Documentation/mips/ingenic-tcu.rst.
 
 Required properties:
 
@@ -42,7 +42,7 @@ Required properties:
 - compatible: Must be one of:
   * ingenic,jz4740-pwm
   * ingenic,jz4725b-pwm
-- #pwm-cells: Should be 3. See ../pwm/pwm.txt for a description of the cell
+- #pwm-cells: Should be 3. See ../pwm/pwm.yaml for a description of the cell
   format.
 - clocks: List of phandle & clock specifiers for the TCU clocks.
 - clock-names: List of name strings for the TCU clocks.
diff --git a/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt b/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
index 74c3eadad844..0d256486f886 100644
--- a/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
+++ b/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
@@ -21,6 +21,7 @@ Required properties:
 	* "mediatek,mt6577-timer" for MT6577 and all above compatible timers (GPT)
 
 	For those SoCs that use SYST
+	* "mediatek,mt8183-timer" for MT8183 compatible timers (SYST)
 	* "mediatek,mt7629-timer" for MT7629 compatible timers (SYST)
 	* "mediatek,mt6765-timer" for MT6765 and all above compatible timers (SYST)
 
diff --git a/Documentation/devicetree/bindings/timer/renesas,cmt.txt b/Documentation/devicetree/bindings/timer/renesas,cmt.txt
index a444cfc5852a..a747fabab7d3 100644
--- a/Documentation/devicetree/bindings/timer/renesas,cmt.txt
+++ b/Documentation/devicetree/bindings/timer/renesas,cmt.txt
@@ -29,6 +29,8 @@ Required Properties:
     - "renesas,r8a77470-cmt1" for the 48-bit CMT1 device included in r8a77470.
     - "renesas,r8a774a1-cmt0" for the 32-bit CMT0 device included in r8a774a1.
     - "renesas,r8a774a1-cmt1" for the 48-bit CMT devices included in r8a774a1.
+    - "renesas,r8a774b1-cmt0" for the 32-bit CMT0 device included in r8a774b1.
+    - "renesas,r8a774b1-cmt1" for the 48-bit CMT devices included in r8a774b1.
     - "renesas,r8a774c0-cmt0" for the 32-bit CMT0 device included in r8a774c0.
     - "renesas,r8a774c0-cmt1" for the 48-bit CMT devices included in r8a774c0.
     - "renesas,r8a7790-cmt0" for the 32-bit CMT0 device included in r8a7790.
diff --git a/Documentation/devicetree/bindings/timer/renesas,tmu.txt b/Documentation/devicetree/bindings/timer/renesas,tmu.txt
index 13ad07416bdd..9dff7e5cae6a 100644
--- a/Documentation/devicetree/bindings/timer/renesas,tmu.txt
+++ b/Documentation/devicetree/bindings/timer/renesas,tmu.txt
@@ -10,6 +10,7 @@ Required Properties:
 
   - compatible: must contain one or more of the following:
     - "renesas,tmu-r8a7740" for the r8a7740 TMU
+    - "renesas,tmu-r8a774a1" for the r8a774A1 TMU
     - "renesas,tmu-r8a774c0" for the r8a774C0 TMU
     - "renesas,tmu-r8a7778" for the r8a7778 TMU
     - "renesas,tmu-r8a7779" for the r8a7779 TMU
diff --git a/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.txt b/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.txt
deleted file mode 100644
index 8f78640ad64c..000000000000
--- a/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-Samsung's Multi Core Timer (MCT)
-
-The Samsung's Multi Core Timer (MCT) module includes two main blocks, the
-global timer and CPU local timers. The global timer is a 64-bit free running
-up-counter and can generate 4 interrupts when the counter reaches one of the
-four preset counter values. The CPU local timers are 32-bit free running
-down-counters and generate an interrupt when the counter expires. There is
-one CPU local timer instantiated in MCT for every CPU in the system.
-
-Required properties:
-
-- compatible: should be "samsung,exynos4210-mct".
-  (a) "samsung,exynos4210-mct", for mct compatible with Exynos4210 mct.
-  (b) "samsung,exynos4412-mct", for mct compatible with Exynos4412 mct.
-
-- reg: base address of the mct controller and length of the address space
-  it occupies.
-
-- interrupts: the list of interrupts generated by the controller. The following
-  should be the order of the interrupts specified. The local timer interrupts
-  should be specified after the four global timer interrupts have been
-  specified.
-
-	0: Global Timer Interrupt 0
-	1: Global Timer Interrupt 1
-	2: Global Timer Interrupt 2
-	3: Global Timer Interrupt 3
-	4: Local Timer Interrupt 0
-	5: Local Timer Interrupt 1
-	6: ..
-	7: ..
-	i: Local Timer Interrupt n
-
-  For MCT block that uses a per-processor interrupt for local timers, such
-  as ones compatible with "samsung,exynos4412-mct", only one local timer
-  interrupt might be specified, meaning that all local timers use the same
-  per processor interrupt.
-
-Example 1: In this example, the IP contains two local timers, using separate
-	   interrupts, so two local timer interrupts have been specified,
-	   in addition to four global timer interrupts.
-
-	mct@10050000 {
-		compatible = "samsung,exynos4210-mct";
-		reg = <0x10050000 0x800>;
-		interrupts = <0 57 0>, <0 69 0>, <0 70 0>, <0 71 0>,
-			     <0 42 0>, <0 48 0>;
-	};
-
-Example 2: In this example, the timer interrupts are connected to two separate
-	   interrupt controllers. Hence, an interrupt-map is created to map
-	   the interrupts to the respective interrupt controllers.
-
-	mct@101c0000 {
-		compatible = "samsung,exynos4210-mct";
-		reg = <0x101C0000 0x800>;
-		interrupt-parent = <&mct_map>;
-		interrupts = <0>, <1>, <2>, <3>, <4>, <5>;
-
-		mct_map: mct-map {
-			#interrupt-cells = <1>;
-			#address-cells = <0>;
-			#size-cells = <0>;
-			interrupt-map = <0 &gic 0 57 0>,
-					<1 &gic 0 69 0>,
-					<2 &combiner 12 6>,
-					<3 &combiner 12 7>,
-					<4 &gic 0 42 0>,
-					<5 &gic 0 48 0>;
-		};
-	};
-
-Example 3: In this example, the IP contains four local timers, but using
-	   a per-processor interrupt to handle them. Either all the local
-	   timer interrupts can be specified, with the same interrupt specifier
-	   value or just the first one.
-
-	mct@10050000 {
-		compatible = "samsung,exynos4412-mct";
-		reg = <0x10050000 0x800>;
-
-		/* Both ways are possible in this case. Either: */
-		interrupts = <0 57 0>, <0 69 0>, <0 70 0>, <0 71 0>,
-			     <0 42 0>;
-		/* or: */
-		interrupts = <0 57 0>, <0 69 0>, <0 70 0>, <0 71 0>,
-			     <0 42 0>, <0 42 0>, <0 42 0>, <0 42 0>;
-	};
diff --git a/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml b/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml
new file mode 100644
index 000000000000..273e359854dd
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/samsung,exynos4210-mct.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC Multi Core Timer (MCT)
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |+
+  The Samsung's Multi Core Timer (MCT) module includes two main blocks, the
+  global timer and CPU local timers. The global timer is a 64-bit free running
+  up-counter and can generate 4 interrupts when the counter reaches one of the
+  four preset counter values. The CPU local timers are 32-bit free running
+  down-counters and generate an interrupt when the counter expires. There is
+  one CPU local timer instantiated in MCT for every CPU in the system.
+
+properties:
+  compatible:
+    enum:
+      - samsung,exynos4210-mct
+      - samsung,exynos4412-mct
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    description: |
+      Interrupts should be put in specific order. This is, the local timer
+      interrupts should be specified after the four global timer interrupts
+      have been specified:
+      0: Global Timer Interrupt 0
+      1: Global Timer Interrupt 1
+      2: Global Timer Interrupt 2
+      3: Global Timer Interrupt 3
+      4: Local Timer Interrupt 0
+      5: Local Timer Interrupt 1
+      6: ..
+      7: ..
+      i: Local Timer Interrupt n
+      For MCT block that uses a per-processor interrupt for local timers, such
+      as ones compatible with "samsung,exynos4412-mct", only one local timer
+      interrupt might be specified, meaning that all local timers use the same
+      per processor interrupt.
+    minItems: 5               # 4 Global + 1 local
+    maxItems: 20              # 4 Global + 16 local
+
+required:
+  - compatible
+  - interrupts
+  - reg
+
+examples:
+  - |
+    // In this example, the IP contains two local timers, using separate
+    // interrupts, so two local timer interrupts have been specified,
+    // in addition to four global timer interrupts.
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    timer@10050000 {
+        compatible = "samsung,exynos4210-mct";
+        reg = <0x10050000 0x800>;
+        interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+    };
+
+  - |
+    // In this example, the timer interrupts are connected to two separate
+    // interrupt controllers. Hence, an interrupts-extended is needed.
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    timer@101c0000 {
+        compatible = "samsung,exynos4210-mct";
+        reg = <0x101C0000 0x800>;
+        interrupts-extended = <&gic GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+                              <&gic GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
+                              <&combiner 12 6>,
+                              <&combiner 12 7>,
+                              <&gic GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                              <&gic GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+    };
+
+  - |
+    // In this example, the IP contains four local timers, but using
+    // a per-processor interrupt to handle them. Only one first local
+    // interrupt is specified.
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    timer@10050000 {
+        compatible = "samsung,exynos4412-mct";
+        reg = <0x10050000 0x800>;
+
+        interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_PPI 42 IRQ_TYPE_LEVEL_HIGH>;
+    };
+
+  - |
+    // In this example, the IP contains four local timers, but using
+    // a per-processor interrupt to handle them. All the local timer
+    // interrupts are specified.
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    timer@10050000 {
+        compatible = "samsung,exynos4412-mct";
+        reg = <0x10050000 0x800>;
+
+        interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_PPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_PPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_PPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_PPI 42 IRQ_TYPE_LEVEL_HIGH>;
+    };
diff --git a/Documentation/devicetree/bindings/timer/st,stm32-timer.txt b/Documentation/devicetree/bindings/timer/st,stm32-timer.txt
deleted file mode 100644
index 8ef28e70d6e8..000000000000
--- a/Documentation/devicetree/bindings/timer/st,stm32-timer.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-. STMicroelectronics STM32 timer
-
-The STM32 MCUs family has several general-purpose 16 and 32 bits timers.
-
-Required properties:
-- compatible : Should be "st,stm32-timer"
-- reg : Address and length of the register set
-- clocks : Reference on the timer input clock
-- interrupts : Reference to the timer interrupt
-
-Optional properties:
-- resets: Reference to a reset controller asserting the timer
-
-Example:
-
-timer5: timer@40000c00 {
-	compatible = "st,stm32-timer";
-	reg = <0x40000c00 0x400>;
-	interrupts = <50>;
-	resets = <&rrc 259>;
-	clocks = <&clk_pmtr1>;
-};
diff --git a/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml b/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml
new file mode 100644
index 000000000000..176aa3c9baf8
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/st,stm32-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 general-purpose 16 and 32 bits timers bindings
+
+maintainers:
+  - Benjamin Gaignard <benjamin.gaignard@st.com>
+
+properties:
+  compatible:
+    const: st,stm32-timer
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    timer: timer@40000c00 {
+        compatible = "st,stm32-timer";
+        reg = <0x40000c00 0x400>;
+        interrupts = <50>;
+        clocks = <&clk_pmtr1>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 870ac52d2225..978de7d37c66 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -104,6 +104,10 @@ properties:
           - infineon,slb9645tt
             # Infineon TLV493D-A1B6 I2C 3D Magnetic Sensor
           - infineon,tlv493d-a1b6
+            # Infineon Multi-phase Digital VR Controller xdpe12254
+          - infineon,xdpe12254
+            # Infineon Multi-phase Digital VR Controller xdpe12284
+          - infineon,xdpe12284
             # Inspur Power System power supply unit version 1
           - inspur,ipsps1
             # Intersil ISL29028 Ambient Light and Proximity Sensor
@@ -114,12 +118,26 @@ properties:
           - isil,isl68137
             # 5 Bit Programmable, Pulse-Width Modulator
           - maxim,ds1050
+            # 10-bit 8 channels 300ks/s SPI ADC with temperature sensor
+          - maxim,max1027
+            # 10-bit 12 channels 300ks/s SPI ADC with temperature sensor
+          - maxim,max1029
+            # 10-bit 16 channels 300ks/s SPI ADC with temperature sensor
+          - maxim,max1031
+            # 12-bit 8 channels 300ks/s SPI ADC with temperature sensor
+          - maxim,max1227
+            # 12-bit 12 channels 300ks/s SPI ADC with temperature sensor
+          - maxim,max1229
+            # 12-bit 16 channels 300ks/s SPI ADC with temperature sensor
+          - maxim,max1231
             # Low-Power, 4-/12-Channel, 2-Wire Serial, 12-Bit ADCs
           - maxim,max1237
             # PECI-to-I2C translator for PECI-to-SMBus/I2C protocol conversion
           - maxim,max6621
             # 9-Bit/12-Bit Temperature Sensors with I²C-Compatible Serial Interface
           - maxim,max6625
+            # 3-Channel Remote Temperature Sensor
+          - maxim,max31730
             # mCube 3-axis 8-bit digital accelerometer
           - mcube,mc3230
             # MEMSIC 2-axis 8-bit digital accelerometer
@@ -342,6 +360,10 @@ properties:
           - ti,tmp103
             # Digital Temperature Sensor
           - ti,tmp275
+            # TI Dual channel DCAP+ multiphase controller TPS53679
+          - ti,tps53679
+            # TI Dual channel DCAP+ multiphase controller TPS53688
+          - ti,tps53688
             # Winbond/Nuvoton H/W Monitor
           - winbond,w83793
             # i2c trusted platform module (TPM)
diff --git a/Documentation/devicetree/bindings/ufs/ti,j721e-ufs.yaml b/Documentation/devicetree/bindings/ufs/ti,j721e-ufs.yaml
new file mode 100644
index 000000000000..c8a2a92074df
--- /dev/null
+++ b/Documentation/devicetree/bindings/ufs/ti,j721e-ufs.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ufs/ti,j721e-ufs.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI J721e UFS Host Controller Glue Driver
+
+maintainers:
+  - Vignesh Raghavendra <vigneshr@ti.com>
+
+properties:
+  compatible:
+    items:
+      - const: ti,j721e-ufs
+
+  reg:
+    maxItems: 1
+    description: address of TI UFS glue registers
+
+  clocks:
+    maxItems: 1
+    description: phandle to the M-PHY clock
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - power-domains
+
+patternProperties:
+  "^ufs@[0-9a-f]+$":
+    type: object
+    description: |
+      Cadence UFS controller node must be the child node. Refer
+      Documentation/devicetree/bindings/ufs/cdns,ufshc.txt for binding
+      documentation of child node
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    ufs_wrapper: ufs-wrapper@4e80000 {
+       compatible = "ti,j721e-ufs";
+       reg = <0x0 0x4e80000 0x0 0x100>;
+       power-domains = <&k3_pds 277>;
+       clocks = <&k3_clks 277 1>;
+       assigned-clocks = <&k3_clks 277 1>;
+       assigned-clock-parents = <&k3_clks 277 4>;
+       #address-cells = <2>;
+       #size-cells = <2>;
+
+       ufs@4e84000 {
+          compatible = "cdns,ufshc-m31-16nm", "jedec,ufs-2.0";
+          reg = <0x0 0x4e84000 0x0 0x10000>;
+          interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+          freq-table-hz = <19200000 19200000>;
+          power-domains = <&k3_pds 277>;
+          clocks = <&k3_clks 277 1>;
+          assigned-clocks = <&k3_clks 277 1>;
+          assigned-clock-parents = <&k3_clks 277 4>;
+          clock-names = "core_clk";
+       };
+    };
diff --git a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
index d78ef63935f9..415ccdd7442d 100644
--- a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
+++ b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
@@ -13,6 +13,7 @@ Required properties:
 			    "qcom,msm8996-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
 			    "qcom,msm8998-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
 			    "qcom,sdm845-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
+			    "qcom,sm8150-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
 - interrupts        : <interrupt mapping for UFS host controller IRQ>
 - reg               : <registers mapping>
 
diff --git a/Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.txt b/Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.txt
deleted file mode 100644
index 50abb20fe319..000000000000
--- a/Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Allwinner sun4i A10 musb DRC/OTG controller
--------------------------------------------
-
-Required properties:
- - compatible      : "allwinner,sun4i-a10-musb", "allwinner,sun6i-a31-musb",
-                     "allwinner,sun8i-a33-musb" or "allwinner,sun8i-h3-musb"
- - reg             : mmio address range of the musb controller
- - clocks          : clock specifier for the musb controller ahb gate clock
- - reset           : reset specifier for the ahb reset (A31 and newer only)
- - interrupts      : interrupt to which the musb controller is connected
- - interrupt-names : must be "mc"
- - phys            : phy specifier for the otg phy
- - phy-names       : must be "usb"
- - dr_mode         : Dual-Role mode must be "host" or "otg"
- - extcon          : extcon specifier for the otg phy
-
-Example:
-
-	usb_otg: usb@1c13000 {
-		compatible = "allwinner,sun4i-a10-musb";
-		reg = <0x01c13000 0x0400>;
-		clocks = <&ahb_gates 0>;
-		interrupts = <38>;
-		interrupt-names = "mc";
-		phys = <&usbphy 0>;
-		phy-names = "usb";
-		extcon = <&usbphy 0>;
-	};
diff --git a/Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.yaml b/Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.yaml
new file mode 100644
index 000000000000..d9207bf9d894
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.yaml
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/usb/allwinner,sun4i-a10-musb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 mUSB OTG Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-musb
+      - const: allwinner,sun6i-a31-musb
+      - const: allwinner,sun8i-a33-musb
+      - const: allwinner,sun8i-h3-musb
+      - items:
+          - enum:
+              - allwinner,sun8i-a83t-musb
+              - allwinner,sun50i-h6-musb
+          - const: allwinner,sun8i-a33-musb
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  interrupt-names:
+    const: mc
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  phys:
+    description: PHY specifier for the OTG PHY
+
+  phy-names:
+    const: usb
+
+  extcon:
+    description: Extcon specifier for the OTG PHY
+
+  dr_mode:
+    enum:
+      - host
+      - otg
+      - peripheral
+
+  allwinner,sram:
+    description: Phandle to the device SRAM
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - clocks
+  - phys
+  - phy-names
+  - dr_mode
+  - extcon
+
+if:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - allwinner,sun6i-a31-musb
+          - allwinner,sun8i-a33-musb
+          - allwinner,sun8i-h3-musb
+
+then:
+  required:
+    - resets
+
+additionalProperties: false
+
+examples:
+  - |
+    usb_otg: usb@1c13000 {
+      compatible = "allwinner,sun4i-a10-musb";
+      reg = <0x01c13000 0x0400>;
+      clocks = <&ahb_gates 0>;
+      interrupts = <38>;
+      interrupt-names = "mc";
+      phys = <&usbphy 0>;
+      phy-names = "usb";
+      extcon = <&usbphy 0>;
+      dr_mode = "peripheral";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt b/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt
index 6ffb09be7a76..9a8b631904fd 100644
--- a/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt
@@ -40,91 +40,3 @@ Example device nodes:
 				phy-names = "usb2-phy", "usb3-phy";
 			};
 		};
-
-Amlogic Meson G12A DWC3 USB SoC Controller Glue
-
-The Amlogic G12A embeds a DWC3 USB IP Core configured for USB2 and USB3
-in host-only mode, and a DWC2 IP Core configured for USB2 peripheral mode
-only.
-
-A glue connects the DWC3 core to USB2 PHYs and optionnaly to an USB3 PHY.
-
-One of the USB2 PHY can be re-routed in peripheral mode to a DWC2 USB IP.
-
-The DWC3 Glue controls the PHY routing and power, an interrupt line is
-connected to the Glue to serve as OTG ID change detection.
-
-Required properties:
-- compatible:	Should be "amlogic,meson-g12a-usb-ctrl"
-- clocks:	a handle for the "USB" clock
-- resets:	a handle for the shared "USB" reset line
-- reg:		The base address and length of the registers
-- interrupts:	the interrupt specifier for the OTG detection
-- phys: 	handle to used PHYs on the system
-	- a <0> phandle can be used if a PHY is not used
-- phy-names:	names of the used PHYs on the system :
-	- "usb2-phy0" for USB2 PHY0 if USBHOST_A port is used
-	- "usb2-phy1" for USB2 PHY1 if USBOTG_B port is used
-	- "usb3-phy0" for USB3 PHY if USB3_0 is used
-- dr_mode:	should be "host", "peripheral", or "otg" depending on
-	the usage and configuration of the OTG Capable port.
-	- "host" and "peripheral" means a fixed Host or Device only connection
-	- "otg" means the port can be used as both Host or Device and
-	  be switched automatically using the OTG ID pin.
-
-Optional properties:
-- vbus-supply:	should be a phandle to the regulator controlling the VBUS
-		power supply when used in OTG switchable mode
-
-Required child nodes:
-
-A child node must exist to represent the core DWC3 IP block. The name of
-the node is not important. The content of the node is defined in dwc3.txt.
-
-A child node must exist to represent the core DWC2 IP block. The name of
-the node is not important. The content of the node is defined in dwc2.txt.
-
-PHY documentation is provided in the following places:
-- Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml
-- Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml
-
-Example device nodes:
-	usb: usb@ffe09000 {
-			compatible = "amlogic,meson-g12a-usb-ctrl";
-			reg = <0x0 0xffe09000 0x0 0xa0>;
-			interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
-			#address-cells = <2>;
-			#size-cells = <2>;
-			ranges;
-
-			clocks = <&clkc CLKID_USB>;
-			resets = <&reset RESET_USB>;
-
-			dr_mode = "otg";
-
-			phys = <&usb2_phy0>, <&usb2_phy1>,
-			       <&usb3_pcie_phy PHY_TYPE_USB3>;
-			phy-names = "usb2-phy0", "usb2-phy1", "usb3-phy0";
-
-			dwc2: usb@ff400000 {
-				compatible = "amlogic,meson-g12a-usb", "snps,dwc2";
-				reg = <0x0 0xff400000 0x0 0x40000>;
-				interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
-				clocks = <&clkc CLKID_USB1_DDR_BRIDGE>;
-				clock-names = "ddr";
-				phys = <&usb2_phy1>;
-				dr_mode = "peripheral";
-				g-rx-fifo-size = <192>;
-				g-np-tx-fifo-size = <128>;
-				g-tx-fifo-size = <128 128 16 16 16>;
-			};
-
-			dwc3: usb@ff500000 {
-				compatible = "snps,dwc3";
-				reg = <0x0 0xff500000 0x0 0x100000>;
-				interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
-				dr_mode = "host";
-				snps,dis_u2_susphy_quirk;
-				snps,quirk-frame-length-adjustment;
-			};
-	};
diff --git a/Documentation/devicetree/bindings/usb/amlogic,meson-g12a-usb-ctrl.yaml b/Documentation/devicetree/bindings/usb/amlogic,meson-g12a-usb-ctrl.yaml
new file mode 100644
index 000000000000..267fce165994
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/amlogic,meson-g12a-usb-ctrl.yaml
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 BayLibre, SAS
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/usb/amlogic,meson-g12a-usb-ctrl.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Amlogic Meson G12A DWC3 USB SoC Controller Glue
+
+maintainers:
+  - Neil Armstrong <narmstrong@baylibre.com>
+
+description: |
+  The Amlogic G12A embeds a DWC3 USB IP Core configured for USB2 and USB3
+  in host-only mode, and a DWC2 IP Core configured for USB2 peripheral mode
+  only.
+
+  A glue connects the DWC3 core to USB2 PHYs and optionally to an USB3 PHY.
+
+  One of the USB2 PHYs can be re-routed in peripheral mode to a DWC2 USB IP.
+
+  The DWC3 Glue controls the PHY routing and power, an interrupt line is
+  connected to the Glue to serve as OTG ID change detection.
+
+properties:
+  compatible:
+    enum:
+      - amlogic,meson-g12a-usb-ctrl
+
+  ranges: true
+
+  "#address-cells":
+    enum: [ 1, 2 ]
+
+  "#size-cells":
+    enum: [ 1, 2 ]
+
+  clocks:
+    minItems: 1
+
+  resets:
+    minItems: 1
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  phy-names:
+    items:
+      - const: usb2-phy0 # USB2 PHY0 if USBHOST_A port is used
+      - const: usb2-phy1 # USB2 PHY1 if USBOTG_B port is used
+      - const: usb3-phy0 # USB3 PHY if USB3_0 is used
+
+  phys:
+    minItems: 1
+    maxItems: 3
+
+  dr_mode: true
+
+  power-domains:
+    maxItems: 1
+
+  vbus-supply:
+    description: VBUS power supply when used in OTG switchable mode
+
+patternProperties:
+  "^usb@[0-9a-f]+$":
+    type: object
+
+additionalProperties: false
+
+required:
+  - compatible
+  - "#address-cells"
+  - "#size-cells"
+  - ranges
+  - clocks
+  - resets
+  - reg
+  - interrupts
+  - phy-names
+  - phys
+  - dr_mode
+
+examples:
+  - |
+    usb: usb@ffe09000 {
+          compatible = "amlogic,meson-g12a-usb-ctrl";
+          reg = <0x0 0xffe09000 0x0 0xa0>;
+          interrupts = <16>;
+          #address-cells = <1>;
+          #size-cells = <1>;
+          ranges;
+
+          clocks = <&clkc_usb>;
+          resets = <&reset_usb>;
+
+          dr_mode = "otg";
+
+          phys = <&usb2_phy0>, <&usb2_phy1>, <&usb3_phy0>;
+          phy-names = "usb2-phy0", "usb2-phy1", "usb3-phy0";
+
+          dwc2: usb@ff400000 {
+              compatible = "amlogic,meson-g12a-usb", "snps,dwc2";
+              reg = <0xff400000 0x40000>;
+              interrupts = <31>;
+              clocks = <&clkc_usb1>;
+              clock-names = "otg";
+              phys = <&usb2_phy1>;
+              dr_mode = "peripheral";
+              g-rx-fifo-size = <192>;
+              g-np-tx-fifo-size = <128>;
+              g-tx-fifo-size = <128 128 16 16 16>;
+          };
+
+          dwc3: usb@ff500000 {
+              compatible = "snps,dwc3";
+              reg = <0xff500000 0x100000>;
+              interrupts = <30>;
+              dr_mode = "host";
+              snps,dis_u2_susphy_quirk;
+              snps,quirk-frame-length-adjustment;
+          };
+    };
+
diff --git a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
index cfc9f40ab641..51376cbe5f3d 100644
--- a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
+++ b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
@@ -15,6 +15,10 @@ Required properties:
 	"qcom,ci-hdrc"
 	"chipidea,usb2"
 	"xlnx,zynq-usb-2.20a"
+	"nvidia,tegra20-udc"
+	"nvidia,tegra30-udc"
+	"nvidia,tegra114-udc"
+	"nvidia,tegra124-udc"
 - reg: base address and length of the registers
 - interrupts: interrupt for the USB controller
 
diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt
deleted file mode 100644
index aafff3a6904d..000000000000
--- a/Documentation/devicetree/bindings/usb/dwc2.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-Platform DesignWare HS OTG USB 2.0 controller
------------------------------------------------------
-
-Required properties:
-- compatible : One of:
-  - brcm,bcm2835-usb: The DWC2 USB controller instance in the BCM2835 SoC.
-  - hisilicon,hi6220-usb: The DWC2 USB controller instance in the hi6220 SoC.
-  - rockchip,rk3066-usb: The DWC2 USB controller instance in the rk3066 Soc;
-  - "rockchip,px30-usb", "rockchip,rk3066-usb", "snps,dwc2": for px30 Soc;
-  - "rockchip,rk3188-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3188 Soc;
-  - "rockchip,rk3288-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3288 Soc;
-  - "lantiq,arx100-usb": The DWC2 USB controller instance in Lantiq ARX SoCs;
-  - "lantiq,xrx200-usb": The DWC2 USB controller instance in Lantiq XRX SoCs;
-  - "amlogic,meson8-usb": The DWC2 USB controller instance in Amlogic Meson8 SoCs;
-  - "amlogic,meson8b-usb": The DWC2 USB controller instance in Amlogic Meson8b SoCs;
-  - "amlogic,meson-gxbb-usb": The DWC2 USB controller instance in Amlogic S905 SoCs;
-  - "amlogic,meson-g12a-usb": The DWC2 USB controller instance in Amlogic G12A SoCs;
-  - "amcc,dwc-otg": The DWC2 USB controller instance in AMCC Canyonlands 460EX SoCs;
-  - snps,dwc2: A generic DWC2 USB controller with default parameters.
-  - "st,stm32f4x9-fsotg": The DWC2 USB FS/HS controller instance in STM32F4x9 SoCs
-  configured in FS mode;
-  - "st,stm32f4x9-hsotg": The DWC2 USB HS controller instance in STM32F4x9 SoCs
-  configured in HS mode;
-  - "st,stm32f7-hsotg": The DWC2 USB HS controller instance in STM32F7 SoCs
-    configured in HS mode;
-- reg : Should contain 1 register range (address and length)
-- interrupts : Should contain 1 interrupt
-- clocks: clock provider specifier
-- clock-names: shall be "otg"
-Refer to clk/clock-bindings.txt for generic clock consumer properties
-
-Optional properties:
-- phys: phy provider specifier
-- phy-names: shall be "usb2-phy"
-- vbus-supply: reference to the VBUS regulator. Depending on the current mode
-  this is enabled (in "host" mode") or disabled (in "peripheral" mode). The
-  regulator is updated if the controller is configured in "otg" mode and the
-  status changes between "host" and "peripheral".
-Refer to phy/phy-bindings.txt for generic phy consumer properties
-- dr_mode: shall be one of "host", "peripheral" and "otg"
-  Refer to usb/generic.txt
-- g-rx-fifo-size: size of rx fifo size in gadget mode.
-- g-np-tx-fifo-size: size of non-periodic tx fifo size in gadget mode.
-- g-tx-fifo-size: size of periodic tx fifo per endpoint (except ep0) in gadget mode.
-- snps,need-phy-for-wake: If present indicates that the phy needs to be left
-                          on for remote wakeup during suspend.
-- snps,reset-phy-on-wake: If present indicates that we need to reset the PHY when
-                          we detect a wakeup.  This is due to a hardware errata.
-
-Deprecated properties:
-- g-use-dma: gadget DMA mode is automatically detected
-
-Example:
-
-        usb@101c0000 {
-                compatible = "ralink,rt3050-usb, snps,dwc2";
-                reg = <0x101c0000 40000>;
-                interrupts = <18>;
-		clocks = <&usb_otg_ahb_clk>;
-		clock-names = "otg";
-		phys = <&usbphy>;
-		phy-names = "usb2-phy";
-		snps,need-phy-for-wake;
-        };
diff --git a/Documentation/devicetree/bindings/usb/dwc2.yaml b/Documentation/devicetree/bindings/usb/dwc2.yaml
new file mode 100644
index 000000000000..71cf7ba32237
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/dwc2.yaml
@@ -0,0 +1,151 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/usb/dwc2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: DesignWare HS OTG USB 2.0 controller Bindings
+
+maintainers:
+  - Rob Herring <robh@kernel.org>
+
+properties:
+  compatible:
+    oneOf:
+      - const: brcm,bcm2835-usb
+      - const: hisilicon,hi6220-usb
+      - items:
+          - const: rockchip,rk3066-usb
+          - const: snps,dwc2
+      - items:
+          - const: rockchip,px30-usb
+          - const: rockchip,rk3066-usb
+          - const: snps,dwc2
+      - items:
+          - const: rockchip,rk3036-usb
+          - const: rockchip,rk3066-usb
+          - const: snps,dwc2
+      - items:
+          - const: rockchip,rv1108-usb
+          - const: rockchip,rk3066-usb
+          - const: snps,dwc2
+      - items:
+          - const: rockchip,rk3188-usb
+          - const: rockchip,rk3066-usb
+          - const: snps,dwc2
+      - items:
+          - const: rockchip,rk3228-usb
+          - const: rockchip,rk3066-usb
+          - const: snps,dwc2
+      - items:
+          - const: rockchip,rk3288-usb
+          - const: rockchip,rk3066-usb
+          - const: snps,dwc2
+      - const: lantiq,arx100-usb
+      - const: lantiq,xrx200-usb
+      - items:
+          - const: amlogic,meson8-usb
+          - const: snps,dwc2
+      - items:
+          - const: amlogic,meson8b-usb
+          - const: snps,dwc2
+      - const: amlogic,meson-gxbb-usb
+      - items:
+          - const: amlogic,meson-g12a-usb
+          - const: snps,dwc2
+      - const: amcc,dwc-otg
+      - const: snps,dwc2
+      - const: st,stm32f4x9-fsotg
+      - const: st,stm32f4x9-hsotg
+      - const: st,stm32f7-hsotg
+      - const: samsung,s3c6400-hsotg
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: otg
+
+  resets:
+    items:
+     - description: common reset
+     - description: ecc reset
+    minItems: 1
+
+  reset-names:
+    items:
+     - const: dwc2
+     - const: dwc2-ecc
+    minItems: 1
+
+  phys:
+    maxItems: 1
+
+  phy-names:
+    const: usb2-phy
+
+  vbus-supply:
+    description: reference to the VBUS regulator. Depending on the current mode
+      this is enabled (in "host" mode") or disabled (in "peripheral" mode). The
+      regulator is updated if the controller is configured in "otg" mode and the
+      status changes between "host" and "peripheral".
+
+  vusb_d-supply:
+    description: phandle to voltage regulator of digital section,
+
+  vusb_a-supply:
+    description: phandle to voltage regulator of analog section.
+
+  dr_mode:
+    enum: [host, peripheral, otg]
+
+  g-rx-fifo-size:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: size of rx fifo size in gadget mode.
+
+  g-np-tx-fifo-size:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: size of non-periodic tx fifo size in gadget mode.
+
+  g-tx-fifo-size:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    description: size of periodic tx fifo per endpoint (except ep0) in gadget mode.
+
+  snps,need-phy-for-wake:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: If present indicates that the phy needs to be left on for remote wakeup during suspend.
+
+  snps,reset-phy-on-wake:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: If present indicates that we need to reset the PHY when we detect a wakeup.
+                 This is due to a hardware errata.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+      usb@101c0000 {
+        compatible = "rockchip,rk3066-usb", "snps,dwc2";
+        reg = <0x10180000 0x40000>;
+        interrupts = <18>;
+        clocks = <&usb_otg_ahb_clk>;
+        clock-names = "otg";
+        phys = <&usbphy>;
+        phy-names = "usb2-phy";
+      };
+
+...
diff --git a/Documentation/devicetree/bindings/usb/generic-ehci.yaml b/Documentation/devicetree/bindings/usb/generic-ehci.yaml
index 1ca64c85191a..10edd05872ea 100644
--- a/Documentation/devicetree/bindings/usb/generic-ehci.yaml
+++ b/Documentation/devicetree/bindings/usb/generic-ehci.yaml
@@ -63,6 +63,11 @@ properties:
     description:
       Set this flag to force EHCI reset after resume.
 
+  companion:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+     Phandle of a companion.
+
   phys:
     description: PHY specifier for the USB PHY
 
diff --git a/Documentation/devicetree/bindings/usb/generic.txt b/Documentation/devicetree/bindings/usb/generic.txt
index cf5a1ad456e6..e6790d2a4da9 100644
--- a/Documentation/devicetree/bindings/usb/generic.txt
+++ b/Documentation/devicetree/bindings/usb/generic.txt
@@ -2,10 +2,11 @@ Generic USB Properties
 
 Optional properties:
  - maximum-speed: tells USB controllers we want to work up to a certain
-			speed. Valid arguments are "super-speed", "high-speed",
-			"full-speed" and "low-speed". In case this isn't passed
-			via DT, USB controllers should default to their maximum
-			HW capability.
+			speed. Valid arguments are "super-speed-plus",
+			"super-speed", "high-speed", "full-speed" and
+			"low-speed". In case this isn't passed via DT, USB
+			controllers should default to their maximum HW
+			capability.
  - dr_mode: tells Dual-Role USB controllers that we want to work on a
 			particular mode. Valid arguments are "host",
 			"peripheral" and "otg". In case this attribute isn't
diff --git a/Documentation/devicetree/bindings/usb/mediatek,musb.txt b/Documentation/devicetree/bindings/usb/mediatek,musb.txt
new file mode 100644
index 000000000000..2b8a87c90d9e
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/mediatek,musb.txt
@@ -0,0 +1,57 @@
+MediaTek musb DRD/OTG controller
+-------------------------------------------
+
+Required properties:
+ - compatible      : should be one of:
+                     "mediatek,mt2701-musb"
+                     ...
+                     followed by "mediatek,mtk-musb"
+ - reg             : specifies physical base address and size of
+                     the registers
+ - interrupts      : interrupt used by musb controller
+ - interrupt-names : must be "mc"
+ - phys            : PHY specifier for the OTG phy
+ - dr_mode         : should be one of "host", "peripheral" or "otg",
+                     refer to usb/generic.txt
+ - clocks          : a list of phandle + clock-specifier pairs, one for
+                     each entry in clock-names
+ - clock-names     : must contain "main", "mcu", "univpll"
+                     for clocks of controller
+
+Optional properties:
+ - power-domains   : a phandle to USB power domain node to control USB's
+                     MTCMOS
+
+Required child nodes:
+ usb connector node as defined in bindings/connector/usb-connector.txt
+Optional properties:
+ - id-gpios        : input GPIO for USB ID pin.
+ - vbus-gpios      : input GPIO for USB VBUS pin.
+ - vbus-supply     : reference to the VBUS regulator, needed when supports
+                     dual-role mode
+ - usb-role-switch : use USB Role Switch to support dual-role switch, see
+                     usb/generic.txt.
+
+Example:
+
+usb2: usb@11200000 {
+	compatible = "mediatek,mt2701-musb",
+		     "mediatek,mtk-musb";
+	reg = <0 0x11200000 0 0x1000>;
+	interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_LOW>;
+	interrupt-names = "mc";
+	phys = <&u2port2 PHY_TYPE_USB2>;
+	dr_mode = "otg";
+	clocks = <&pericfg CLK_PERI_USB0>,
+		 <&pericfg CLK_PERI_USB0_MCU>,
+		 <&pericfg CLK_PERI_USB_SLV>;
+	clock-names = "main","mcu","univpll";
+	power-domains = <&scpsys MT2701_POWER_DOMAIN_IFR_MSC>;
+	usb-role-switch;
+	connector{
+		compatible = "gpio-usb-b-connector", "usb-b-connector";
+		type = "micro";
+		id-gpios = <&pio 44 GPIO_ACTIVE_HIGH>;
+		vbus-supply = <&usb_vbus>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/usb/renesas,usb3-peri.txt b/Documentation/devicetree/bindings/usb/renesas,usb3-peri.txt
deleted file mode 100644
index 35039e720515..000000000000
--- a/Documentation/devicetree/bindings/usb/renesas,usb3-peri.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Renesas Electronics USB3.0 Peripheral driver
-
-Required properties:
-  - compatible: Must contain one of the following:
-	- "renesas,r8a774a1-usb3-peri"
-	- "renesas,r8a774c0-usb3-peri"
-	- "renesas,r8a7795-usb3-peri"
-	- "renesas,r8a7796-usb3-peri"
-	- "renesas,r8a77965-usb3-peri"
-	- "renesas,r8a77990-usb3-peri"
-	- "renesas,rcar-gen3-usb3-peri" for a generic R-Car Gen3 or RZ/G2
-	  compatible device
-
-    When compatible with the generic version, nodes must list the
-    SoC-specific version corresponding to the platform first
-    followed by the generic version.
-
-  - reg: Base address and length of the register for the USB3.0 Peripheral
-  - interrupts: Interrupt specifier for the USB3.0 Peripheral
-  - clocks: clock phandle and specifier pair
-
-Optional properties:
-  - phys: phandle + phy specifier pair
-  - phy-names: must be "usb"
-
-Example of R-Car H3 ES1.x:
-	usb3_peri0: usb@ee020000 {
-		compatible = "renesas,r8a7795-usb3-peri",
-			     "renesas,rcar-gen3-usb3-peri";
-		reg = <0 0xee020000 0 0x400>;
-		interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cpg CPG_MOD 328>;
-	};
-
-	usb3_peri1: usb@ee060000 {
-		compatible = "renesas,r8a7795-usb3-peri",
-			     "renesas,rcar-gen3-usb3-peri";
-		reg = <0 0xee060000 0 0x400>;
-		interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cpg CPG_MOD 327>;
-	};
diff --git a/Documentation/devicetree/bindings/usb/renesas,usb3-peri.yaml b/Documentation/devicetree/bindings/usb/renesas,usb3-peri.yaml
new file mode 100644
index 000000000000..92d8631b9aa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/renesas,usb3-peri.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/usb/renesas,usb3-peri.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas USB 3.0 Peripheral controller
+
+maintainers:
+  - Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,r8a774a1-usb3-peri # RZ/G2M
+          - renesas,r8a774b1-usb3-peri # RZ/G2N
+          - renesas,r8a774c0-usb3-peri # RZ/G2E
+          - renesas,r8a7795-usb3-peri  # R-Car H3
+          - renesas,r8a7796-usb3-peri  # R-Car M3-W
+          - renesas,r8a77965-usb3-peri # R-Car M3-N
+          - renesas,r8a77990-usb3-peri # R-Car E3
+      - const: renesas,rcar-gen3-usb3-peri
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  phys:
+    maxItems: 1
+
+  phy-names:
+    const: usb
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  usb-role-switch:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: Support role switch.
+
+  companion:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: phandle of a companion.
+
+  port:
+    description: |
+      any connector to the data bus of this controller should be modelled
+      using the OF graph bindings specified, if the "usb-role-switch"
+      property is used.
+
+required:
+  - compatible
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a774c0-cpg-mssr.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/r8a774c0-sysc.h>
+
+    usb3_peri0: usb@ee020000 {
+        compatible = "renesas,r8a774c0-usb3-peri", "renesas,rcar-gen3-usb3-peri";
+        reg = <0 0xee020000 0 0x400>;
+        interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&cpg CPG_MOD 328>;
+        companion = <&xhci0>;
+        usb-role-switch;
+
+        port {
+            usb3_role_switch: endpoint {
+                remote-endpoint = <&hd3ss3220_ep>;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/usb/renesas,usbhs.txt b/Documentation/devicetree/bindings/usb/renesas,usbhs.txt
deleted file mode 100644
index e39255ea6e4f..000000000000
--- a/Documentation/devicetree/bindings/usb/renesas,usbhs.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Renesas Electronics USBHS driver
-
-Required properties:
-  - compatible: Must contain one or more of the following:
-
-	- "renesas,usbhs-r8a7743" for r8a7743 (RZ/G1M) compatible device
-	- "renesas,usbhs-r8a7744" for r8a7744 (RZ/G1N) compatible device
-	- "renesas,usbhs-r8a7745" for r8a7745 (RZ/G1E) compatible device
-	- "renesas,usbhs-r8a77470" for r8a77470 (RZ/G1C) compatible device
-	- "renesas,usbhs-r8a774a1" for r8a774a1 (RZ/G2M) compatible device
-	- "renesas,usbhs-r8a774c0" for r8a774c0 (RZ/G2E) compatible device
-	- "renesas,usbhs-r8a7790" for r8a7790 (R-Car H2) compatible device
-	- "renesas,usbhs-r8a7791" for r8a7791 (R-Car M2-W) compatible device
-	- "renesas,usbhs-r8a7792" for r8a7792 (R-Car V2H) compatible device
-	- "renesas,usbhs-r8a7793" for r8a7793 (R-Car M2-N) compatible device
-	- "renesas,usbhs-r8a7794" for r8a7794 (R-Car E2) compatible device
-	- "renesas,usbhs-r8a7795" for r8a7795 (R-Car H3) compatible device
-	- "renesas,usbhs-r8a7796" for r8a7796 (R-Car M3-W) compatible device
-	- "renesas,usbhs-r8a77965" for r8a77965 (R-Car M3-N) compatible device
-	- "renesas,usbhs-r8a77990" for r8a77990 (R-Car E3) compatible device
-	- "renesas,usbhs-r8a77995" for r8a77995 (R-Car D3) compatible device
-	- "renesas,usbhs-r7s72100" for r7s72100 (RZ/A1) compatible device
-	- "renesas,usbhs-r7s9210" for r7s9210 (RZ/A2) compatible device
-	- "renesas,rcar-gen2-usbhs" for R-Car Gen2 or RZ/G1 compatible devices
-	- "renesas,rcar-gen3-usbhs" for R-Car Gen3 or RZ/G2 compatible devices
-	- "renesas,rza1-usbhs" for RZ/A1 compatible device
-	- "renesas,rza2-usbhs" for RZ/A2 compatible device
-
-	When compatible with the generic version, nodes must list the
-	SoC-specific version corresponding to the platform first followed
-	by the generic version.
-
-  - reg: Base address and length of the register for the USBHS
-  - interrupts: Interrupt specifier for the USBHS
-  - clocks: A list of phandle + clock specifier pairs.
-	    - In case of "renesas,rcar-gen3-usbhs", two clocks are required.
-	      First clock should be peripheral and second one should be host.
-	    - In case of except above, one clock is required. First clock
-	      should be peripheral.
-
-Optional properties:
-  - renesas,buswait: Integer to use BUSWAIT register
-  - renesas,enable-gpio: A gpio specifier to check GPIO determining if USB
-			 function should be enabled
-  - phys: phandle + phy specifier pair
-  - phy-names: must be "usb"
-  - dmas: Must contain a list of references to DMA specifiers.
-  - dma-names : named "ch%d", where %d is the channel number ranging from zero
-                to the number of channels (DnFIFOs) minus one.
-
-Example:
-	usbhs: usb@e6590000 {
-		compatible = "renesas,usbhs-r8a7790", "renesas,rcar-gen2-usbhs";
-		reg = <0 0xe6590000 0 0x100>;
-		interrupts = <0 107 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&mstp7_clks R8A7790_CLK_HSUSB>;
-	};
diff --git a/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml b/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml
new file mode 100644
index 000000000000..469affa872d3
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/usb/renesas,usbhs.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas USBHS (HS-USB) controller
+
+maintainers:
+  - Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - const: renesas,usbhs-r7s72100 # RZ/A1
+          - const: renesas,rza1-usbhs
+
+      - items:
+          - const: renesas,usbhs-r7s9210 # RZ/A2
+          - const: renesas,rza2-usbhs
+
+      - items:
+          - enum:
+              - renesas,usbhs-r8a7743  # RZ/G1M
+              - renesas,usbhs-r8a7744  # RZ/G1N
+              - renesas,usbhs-r8a7745  # RZ/G1E
+              - renesas,usbhs-r8a77470 # RZ/G1C
+              - renesas,usbhs-r8a7790  # R-Car H2
+              - renesas,usbhs-r8a7791  # R-Car M2-W
+              - renesas,usbhs-r8a7792  # R-Car V2H
+              - renesas,usbhs-r8a7793  # R-Car M2-N
+              - renesas,usbhs-r8a7794  # R-Car E2
+          - const: renesas,rcar-gen2-usbhs
+
+      - items:
+          - enum:
+              - renesas,usbhs-r8a774a1 # RZ/G2M
+              - renesas,usbhs-r8a774b1 # RZ/G2N
+              - renesas,usbhs-r8a774c0 # RZ/G2E
+              - renesas,usbhs-r8a7795  # R-Car H3
+              - renesas,usbhs-r8a7796  # R-Car M3-W
+              - renesas,usbhs-r8a77965 # R-Car M3-N
+              - renesas,usbhs-r8a77990 # R-Car E3
+              - renesas,usbhs-r8a77995 # R-Car D3
+          - const: renesas,rcar-gen3-usbhs
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    maxItems: 3
+    items:
+      - description: USB 2.0 host
+      - description: USB 2.0 peripheral
+      - description: USB 2.0 clock selector
+
+  interrupts:
+    maxItems: 1
+
+  renesas,buswait:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      Integer to use BUSWAIT register.
+
+  renesas,enable-gpio:
+    description: |
+      gpio specifier to check GPIO determining if USB function should be
+      enabled.
+
+  phys:
+    maxItems: 1
+    items:
+      - description: phandle + phy specifier pair.
+
+  phy-names:
+    maxItems: 1
+    items:
+      - const: usb
+
+  dmas:
+    minItems: 2
+    maxItems: 4
+
+  dma-names:
+    minItems: 2
+    maxItems: 4
+    items:
+      - const: ch0
+      - const: ch1
+      - const: ch2
+      - const: ch3
+
+  dr_mode: true
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: USB 2.0 host
+      - description: USB 2.0 peripheral
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7790-cpg-mssr.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/r8a7790-sysc.h>
+
+    usbhs: usb@e6590000 {
+        compatible = "renesas,usbhs-r8a7790", "renesas,rcar-gen2-usbhs";
+        reg = <0 0xe6590000 0 0x100>;
+        interrupts = <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&cpg CPG_MOD 704>;
+    };
diff --git a/Documentation/devicetree/bindings/usb/richtek,rt1711h.txt b/Documentation/devicetree/bindings/usb/richtek,rt1711h.txt
index d4cf53c071d9..e3fc57e605ed 100644
--- a/Documentation/devicetree/bindings/usb/richtek,rt1711h.txt
+++ b/Documentation/devicetree/bindings/usb/richtek,rt1711h.txt
@@ -6,10 +6,39 @@ Required properties:
  - interrupts : <a b> where a is the interrupt number and b represents an
    encoding of the sense and level information for the interrupt.
 
+Required sub-node:
+- connector: The "usb-c-connector" attached to the tcpci chip, the bindings
+  of connector node are specified in
+  Documentation/devicetree/bindings/connector/usb-connector.txt
+
 Example :
 rt1711h@4e {
 	compatible = "richtek,rt1711h";
 	reg = <0x4e>;
 	interrupt-parent = <&gpio26>;
 	interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
+
+	usb_con: connector {
+		compatible = "usb-c-connector";
+		label = "USB-C";
+		data-role = "dual";
+		power-role = "dual";
+		try-power-role = "sink";
+		source-pdos = <PDO_FIXED(5000, 2000, PDO_FIXED_USB_COMM)>;
+		sink-pdos = <PDO_FIXED(5000, 2000, PDO_FIXED_USB_COMM)
+			     PDO_VAR(5000, 12000, 2000)>;
+		op-sink-microwatt = <10000000>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@1 {
+				reg = <1>;
+				usb_con_ss: endpoint {
+					remote-endpoint = <&usb3_data_ss>;
+				};
+			};
+		};
+	};
 };
diff --git a/Documentation/devicetree/bindings/usb/ti,hd3ss3220.txt b/Documentation/devicetree/bindings/usb/ti,hd3ss3220.txt
new file mode 100644
index 000000000000..25780e945b15
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ti,hd3ss3220.txt
@@ -0,0 +1,38 @@
+TI HD3SS3220 TypeC DRP Port Controller.
+
+Required properties:
+ - compatible: Must be "ti,hd3ss3220".
+ - reg: I2C slave address, must be 0x47 or 0x67 based on ADDR pin.
+ - interrupts: An interrupt specifier.
+
+Required sub-node:
+ - connector: The "usb-c-connector" attached to the hd3ss3220 chip. The
+   bindings of the connector node are specified in:
+
+	Documentation/devicetree/bindings/connector/usb-connector.txt
+
+Example:
+hd3ss3220@47 {
+	compatible = "ti,hd3ss3220";
+	reg = <0x47>;
+	interrupt-parent = <&gpio6>;
+	interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
+
+	connector {
+		compatible = "usb-c-connector";
+		label = "USB-C";
+		data-role = "dual";
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@1 {
+				reg = <1>;
+				hd3ss3220_ep: endpoint {
+					remote-endpoint = <&usb3_role_switch>;
+				};
+			};
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml
new file mode 100644
index 000000000000..5f5264b2e9ad
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/usb/ti,j721e-usb.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Bindings for the TI wrapper module for the Cadence USBSS-DRD controller
+
+maintainers:
+  - Roger Quadros <rogerq@ti.com>
+
+properties:
+  compatible:
+    items:
+      - const: ti,j721e-usb
+
+  reg:
+    description: module registers
+
+  power-domains:
+    description:
+       PM domain provider node and an args specifier containing
+       the USB device id value. See,
+       Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
+
+  clocks:
+    description: Clock phandles to usb2_refclk and lpm_clk
+    minItems: 2
+    maxItems: 2
+
+  clock-names:
+    items:
+      - const: ref
+      - const: lpm
+
+  ti,usb2-only:
+    description:
+      If present, it restricts the controller to USB2.0 mode of
+      operation. Must be present if USB3 PHY is not available
+      for USB.
+    type: boolean
+
+  ti,vbus-divider:
+    description:
+      Should be present if USB VBUS line is connected to the
+      VBUS pin of the SoC via a 1/3 voltage divider.
+    type: boolean
+
+required:
+  - compatible
+  - reg
+  - power-domains
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    #include <dt-bindings/soc/ti,sci_pm_domain.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    cdns_usb@4104000 {
+          compatible = "ti,j721e-usb";
+          reg = <0x00 0x4104000 0x00 0x100>;
+          power-domains = <&k3_pds 288 TI_SCI_PD_EXCLUSIVE>;
+          clocks = <&k3_clks 288 15>, <&k3_clks 288 3>;
+          clock-names = "ref", "lpm";
+          assigned-clocks = <&k3_clks 288 15>;	/* USB2_REFCLK */
+          assigned-clock-parents = <&k3_clks 288 16>; /* HFOSC0 */
+          #address-cells = <2>;
+          #size-cells = <2>;
+
+          usb@6000000 {
+                compatible = "cdns,usb3";
+                reg = <0x00 0x6000000 0x00 0x10000>,
+                      <0x00 0x6010000 0x00 0x10000>,
+                      <0x00 0x6020000 0x00 0x10000>;
+                reg-names = "otg", "xhci", "dev";
+                interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>,	/* irq.0 */
+                             <GIC_SPI 102 IRQ_TYPE_LEVEL_HIGH>,	/* irq.6 */
+                             <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;	/* otgirq.0 */
+                interrupt-names = "host",
+                                  "peripheral",
+                                  "otg";
+                maximum-speed = "super-speed";
+                dr_mode = "otg";
+        };
+    };
diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt b/Documentation/devicetree/bindings/usb/usb-xhci.txt
index b49b819571f9..3f378951d624 100644
--- a/Documentation/devicetree/bindings/usb/usb-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
@@ -10,6 +10,7 @@ Required properties:
     - "renesas,xhci-r8a7743" for r8a7743 SoC
     - "renesas,xhci-r8a7744" for r8a7744 SoC
     - "renesas,xhci-r8a774a1" for r8a774a1 SoC
+    - "renesas,xhci-r8a774b1" for r8a774b1 SoC
     - "renesas,xhci-r8a774c0" for r8a774c0 SoC
     - "renesas,xhci-r8a7790" for r8a7790 SoC
     - "renesas,xhci-r8a7791" for r8a7791 SoC
diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt
index 17915f64b8ee..1a934eab175e 100644
--- a/Documentation/devicetree/bindings/usb/usb251xb.txt
+++ b/Documentation/devicetree/bindings/usb/usb251xb.txt
@@ -7,11 +7,12 @@ Required properties :
  - compatible : Should be "microchip,usb251xb" or one of the specific types:
 	"microchip,usb2512b", "microchip,usb2512bi", "microchip,usb2513b",
 	"microchip,usb2513bi", "microchip,usb2514b", "microchip,usb2514bi",
-	"microchip,usb2517", "microchip,usb2517i"
+	"microchip,usb2517", "microchip,usb2517i", "microchip,usb2422"
  - reg : I2C address on the selected bus (default is <0x2C>)
 
 Optional properties :
  - reset-gpios : Should specify the gpio for hub reset
+ - vdd-supply : Should specify the phandle to the regulator supplying vdd
  - skip-config : Skip Hub configuration, but only send the USB-Attach command
  - vendor-id : Set USB Vendor ID of the hub (16 bit, default is 0x0424)
  - product-id : Set USB Product ID of the hub (16 bit, default depends on type)
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 967e78c5ec0a..1bc7b3c4b591 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -16,7 +16,7 @@ properties: {}
 patternProperties:
   # Prefixes which are not vendors, but followed the pattern
   # DO NOT ADD NEW PROPERTIES TO THIS LIST
-  "^(at25|devbus|dmacap|dsa|exynos|gpio-fan|gpio|gpmc|hdmi|i2c-gpio),.*": true
+  "^(at25|devbus|dmacap|dsa|exynos|fsi[ab]|gpio-fan|gpio|gpmc|hdmi|i2c-gpio),.*": true
   "^(keypad|m25p|max8952|max8997|max8998|mpmc),.*": true
   "^(pinctrl-single|#pinctrl-single|PowerPC),.*": true
   "^(pl022|pxa-mmc|rcar_sound|rotary-encoder|s5m8767|sdhci),.*": true
@@ -109,6 +109,8 @@ patternProperties:
     description: Artesyn Embedded Technologies Inc.
   "^asahi-kasei,.*":
     description: Asahi Kasei Corp.
+  "^asc,.*":
+    description: All Sensors Corporation
   "^aspeed,.*":
     description: ASPEED Technology Inc.
   "^asus,.*":
@@ -149,6 +151,8 @@ patternProperties:
     description: Bosch Sensortec GmbH
   "^boundary,.*":
     description: Boundary Devices Inc.
+  "^broadmobi,.*":
+    description: Shanghai Broadmobi Communication Technology Co.,Ltd.
   "^brcm,.*":
     description: Broadcom Corporation
   "^buffalo,.*":
@@ -157,6 +161,8 @@ patternProperties:
     description: B&R Industrial Automation GmbH
   "^bticino,.*":
     description: Bticino International
+  "^calaosystems,.*":
+    description: CALAO Systems SAS
   "^calxeda,.*":
     description: Calxeda
   "^capella,.*":
@@ -343,6 +349,8 @@ patternProperties:
     description: Freescale Semiconductor
   "^fujitsu,.*":
     description: Fujitsu Ltd.
+  "^gardena,.*":
+    description: GARDENA GmbH
   "^gateworks,.*":
     description: Gateworks Corporation
   "^gcw,.*":
@@ -473,6 +481,8 @@ patternProperties:
     description: Shenzhen Jesurun Electronics Business Dept.
   "^jianda,.*":
     description: Jiandangjing Technology Co., Ltd.
+  "^kam,.*":
+    description: Kamstrup A/S
   "^karo,.*":
     description: Ka-Ro electronics GmbH
   "^keithkoep,.*":
@@ -511,6 +521,8 @@ patternProperties:
     description: Lantiq Semiconductor
   "^lattice,.*":
     description: Lattice Semiconductor
+  "^leadtek,.*":
+    description: Shenzhen Leadtek Technology Co., Ltd.
   "^leez,.*":
     description: Leez
   "^lego,.*":
@@ -611,6 +623,8 @@ patternProperties:
     description: Moxa Inc.
   "^mpl,.*":
     description: MPL AG
+  "^mps,.*":
+    description: Monolithic Power Systems Inc.
   "^mqmaker,.*":
     description: mqmaker Inc.
   "^mscc,.*":
@@ -705,6 +719,8 @@ patternProperties:
     description: Ortus Technology Co., Ltd.
   "^osddisplays,.*":
     description: OSD Displays
+  "^overkiz,.*":
+    description: Overkiz SAS
   "^ovti,.*":
     description: OmniVision Technologies
   "^oxsemi,.*":
@@ -713,6 +729,8 @@ patternProperties:
     description: Panasonic Corporation
   "^parade,.*":
     description: Parade Technologies Inc.
+  "^parallax,.*":
+    description: Parallax Inc.
   "^pda,.*":
     description: Precision Design Associates, Inc.
   "^pericom,.*":
@@ -819,6 +837,8 @@ patternProperties:
     description: Sancloud Ltd
   "^sandisk,.*":
     description: Sandisk Corporation
+  "^satoz,.*":
+    description: Satoz International Co., Ltd
   "^sbs,.*":
     description: Smart Battery System
   "^schindler,.*":
@@ -907,8 +927,12 @@ patternProperties:
     description: Startek
   "^ste,.*":
     description: ST-Ericsson
+    deprecated: true
   "^stericsson,.*":
     description: ST-Ericsson
+  "^st-ericsson,.*":
+    description: ST-Ericsson
+    deprecated: true
   "^summit,.*":
     description: Summit microelectronics
   "^sunchip,.*":
@@ -988,6 +1012,8 @@ patternProperties:
     description: Ubiquiti Networks
   "^udoo,.*":
     description: Udoo
+  "^ugoos,.*":
+    description: Ugoos Industrial Co., Ltd.
   "^uniwest,.*":
     description: United Western Technologies Corp (UniWest)
   "^upisemi,.*":
@@ -1050,10 +1076,14 @@ patternProperties:
     description: Extreme Engineering Solutions (X-ES)
   "^xillybus,.*":
     description: Xillybus Ltd.
+  "^xinpeng,.*":
+    description: Shenzhen Xinpeng Technology Co., Ltd
   "^xlnx,.*":
     description: Xilinx
   "^xunlong,.*":
     description: Shenzhen Xunlong Software CO.,Limited
+  "^xylon,.*":
+    description: Xylon
   "^yones-toptech,.*":
     description: Yones Toptech Co., Ltd.
   "^ysoft,.*":
diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
index 3a54f58683a0..e8f226376108 100644
--- a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
@@ -11,7 +11,7 @@ allOf:
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
-  - Maxime Ripard <maxime.ripard@bootlin.com>
+  - Maxime Ripard <mripard@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml
index d7352f709b37..4ddae6feef3b 100644
--- a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml
@@ -10,6 +10,9 @@ title: Meson GXBB SoCs Watchdog timer
 maintainers:
   - Neil Armstrong <narmstrong@baylibre.com>
 
+allOf:
+  - $ref: watchdog.yaml#
+
 properties:
   compatible:
     enum:
diff --git a/Documentation/devicetree/bindings/watchdog/atmel-sama5d4-wdt.txt b/Documentation/devicetree/bindings/watchdog/atmel-sama5d4-wdt.txt
index 4fec1e3725b4..44727fcc2729 100644
--- a/Documentation/devicetree/bindings/watchdog/atmel-sama5d4-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/atmel-sama5d4-wdt.txt
@@ -1,7 +1,7 @@
 * Atmel SAMA5D4 Watchdog Timer (WDT) Controller
 
 Required properties:
-- compatible: "atmel,sama5d4-wdt"
+- compatible: "atmel,sama5d4-wdt" or "microchip,sam9x60-wdt"
 - reg: base physical address and length of memory mapped region.
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/watchdog/renesas,wdt.txt b/Documentation/devicetree/bindings/watchdog/renesas,wdt.txt
index 9f365c1a3399..79b3c62f183d 100644
--- a/Documentation/devicetree/bindings/watchdog/renesas,wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/renesas,wdt.txt
@@ -10,6 +10,7 @@ Required properties:
 		 - "renesas,r8a7745-wdt" (RZ/G1E)
 		 - "renesas,r8a77470-wdt" (RZ/G1C)
 		 - "renesas,r8a774a1-wdt" (RZ/G2M)
+		 - "renesas,r8a774b1-wdt" (RZ/G2N)
 		 - "renesas,r8a774c0-wdt" (RZ/G2E)
 	         - "renesas,r8a7790-wdt" (R-Car H2)
 	         - "renesas,r8a7791-wdt" (R-Car M2-W)
@@ -18,6 +19,7 @@ Required properties:
 	         - "renesas,r8a7794-wdt" (R-Car E2)
 	         - "renesas,r8a7795-wdt" (R-Car H3)
 	         - "renesas,r8a7796-wdt" (R-Car M3-W)
+	         - "renesas,r8a77961-wdt" (R-Car M3-W+)
 		 - "renesas,r8a77965-wdt" (R-Car M3-N)
 	         - "renesas,r8a77970-wdt" (R-Car V3M)
 	         - "renesas,r8a77990-wdt" (R-Car E3)
diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
deleted file mode 100644
index 46dcb48e75b4..000000000000
--- a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-* Samsung's Watchdog Timer Controller
-
-The Samsung's Watchdog controller is used for resuming system operation
-after a preset amount of time during which the WDT reset event has not
-occurred.
-
-Required properties:
-- compatible : should be one among the following
-	- "samsung,s3c2410-wdt" for S3C2410
-	- "samsung,s3c6410-wdt" for S3C6410, S5PV210 and Exynos4
-	- "samsung,exynos5250-wdt" for Exynos5250
-	- "samsung,exynos5420-wdt" for Exynos5420
-	- "samsung,exynos7-wdt" for Exynos7
-
-- reg : base physical address of the controller and length of memory mapped
-	region.
-- interrupts : interrupt number to the cpu.
-- samsung,syscon-phandle : reference to syscon node (This property required only
-	in case of compatible being "samsung,exynos5250-wdt" or "samsung,exynos5420-wdt".
-	In case of Exynos5250 and 5420 this property points to syscon node holding the PMU
-	base address)
-
-Optional properties:
-- timeout-sec : contains the watchdog timeout in seconds.
-
-Example:
-
-watchdog@101d0000 {
-	compatible = "samsung,exynos5250-wdt";
-	reg = <0x101D0000 0x100>;
-	interrupts = <0 42 0>;
-	clocks = <&clock 336>;
-	clock-names = "watchdog";
-	samsung,syscon-phandle = <&pmu_syscon>;
-};
diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml
new file mode 100644
index 000000000000..2fa40d8864b2
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/samsung-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung SoC Watchdog Timer Controller
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |+
+  The Samsung's Watchdog controller is used for resuming system operation
+  after a preset amount of time during which the WDT reset event has not
+  occurred.
+
+properties:
+  compatible:
+    enum:
+      - samsung,s3c2410-wdt                   # for S3C2410
+      - samsung,s3c6410-wdt                   # for S3C6410, S5PV210 and Exynos4
+      - samsung,exynos5250-wdt                # for Exynos5250
+      - samsung,exynos5420-wdt                # for Exynos5420
+      - samsung,exynos7-wdt                   # for Exynos7
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: watchdog
+
+  interrupts:
+    maxItems: 1
+
+  samsung,syscon-phandle:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the PMU system controller node (in case of Exynos5250
+      and Exynos5420).
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - interrupts
+  - reg
+
+allOf:
+  - $ref: watchdog.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - samsung,exynos5250-wdt
+              - samsung,exynos5420-wdt
+    then:
+      required:
+        - samsung,syscon-phandle
+
+examples:
+  - |
+    watchdog@101d0000 {
+        compatible = "samsung,exynos5250-wdt";
+        reg = <0x101D0000 0x100>;
+        interrupts = <0 42 0>;
+        clocks = <&clock 336>;
+        clock-names = "watchdog";
+        samsung,syscon-phandle = <&pmu_syscon>;
+    };
diff --git a/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.txt b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.txt
deleted file mode 100644
index d8f4430b0a13..000000000000
--- a/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-STM32 Independent WatchDoG (IWDG)
----------------------------------
-
-Required properties:
-- compatible: Should be either:
-  - "st,stm32-iwdg"
-  - "st,stm32mp1-iwdg"
-- reg: Physical base address and length of the registers set for the device
-- clocks: Reference to the clock entry lsi. Additional pclk clock entry
-  is required only for st,stm32mp1-iwdg.
-- clock-names: Name of the clocks used.
-  "lsi" for st,stm32-iwdg
-  "lsi", "pclk" for st,stm32mp1-iwdg
-
-Optional Properties:
-- timeout-sec: Watchdog timeout value in seconds.
-
-Example:
-
-iwdg: watchdog@40003000 {
-	compatible = "st,stm32-iwdg";
-	reg = <0x40003000 0x400>;
-	clocks = <&clk_lsi>;
-	clock-names = "lsi";
-	timeout-sec = <32>;
-};
diff --git a/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml
new file mode 100644
index 000000000000..a27c504e2e4f
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/st,stm32-iwdg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Independent WatchDoG (IWDG) bindings
+
+maintainers:
+  - Yannick Fertre <yannick.fertre@st.com>
+  - Christophe Roullier <christophe.roullier@st.com>
+
+allOf:
+  - $ref: "watchdog.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - st,stm32-iwdg
+      - st,stm32mp1-iwdg
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Low speed clock
+      - description: Optional peripheral clock
+    minItems: 1
+    maxItems: 2
+
+  clock-names:
+    items:
+      enum:
+        - lsi
+        - pclk
+    minItems: 1
+    maxItems: 2
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    #include <dt-bindings/clock/stm32mp1-clks.h>
+    watchdog@5a002000 {
+      compatible = "st,stm32mp1-iwdg";
+      reg = <0x5a002000 0x400>;
+      clocks = <&rcc IWDG2>, <&rcc CK_LSI>;
+      clock-names = "pclk", "lsi";
+      timeout-sec = <32>;
+    };
+
+...
diff --git a/Documentation/devicetree/writing-schema.rst b/Documentation/devicetree/writing-schema.rst
index f4a638072262..7635ab230456 100644
--- a/Documentation/devicetree/writing-schema.rst
+++ b/Documentation/devicetree/writing-schema.rst
@@ -117,8 +117,17 @@ project can be installed with pip::
 
     pip3 install git+https://github.com/devicetree-org/dt-schema.git@master
 
+Several executables (dt-doc-validate, dt-mk-schema, dt-validate) will be
+installed. Ensure they are in your PATH (~/.local/bin by default).
+
 dtc must also be built with YAML output support enabled. This requires that
-libyaml and its headers be installed on the host system.
+libyaml and its headers be installed on the host system. For some distributions
+that involves installing the development package, such as:
+
+Debian:
+  apt-get install libyaml-dev
+Fedora:
+  dnf -y install libyaml-devel
 
 Running checks
 ~~~~~~~~~~~~~~
@@ -130,11 +139,13 @@ binding schema. All of the DT binding documents can be validated using the
 
     make dt_binding_check
 
-In order to perform validation of DT source files, use the `dtbs_check` target::
+In order to perform validation of DT source files, use the ``dtbs_check`` target::
 
     make dtbs_check
 
-This will first run the `dt_binding_check` which generates the processed schema.
+Note that ``dtbs_check`` will skip any binding schema files with errors. It is
+necessary to use ``dt_binding_check`` to get all the validation errors in the
+binding schema files.
 
 It is also possible to run checks with a single schema file by setting the
 ``DT_SCHEMA_FILES`` variable to a specific schema file.
diff --git a/Documentation/doc-guide/contributing.rst b/Documentation/doc-guide/contributing.rst
new file mode 100644
index 000000000000..10956583d22e
--- /dev/null
+++ b/Documentation/doc-guide/contributing.rst
@@ -0,0 +1,294 @@
+.. SPDX-License-Identifier: GPL-2.0
+How to help improve kernel documentation
+========================================
+
+Documentation is an important part of any software-development project.
+Good documentation helps to bring new developers in and helps established
+developers work more effectively.  Without top-quality documentation, a lot
+of time is wasted in reverse-engineering the code and making avoidable
+mistakes.
+
+Unfortunately, the kernel's documentation currently falls far short of what
+it needs to be to support a project of this size and importance.
+
+This guide is for contributors who would like to improve that situation.
+Kernel documentation improvements can be made by developers at a variety of
+skill levels; they are a relatively easy way to learn the kernel process in
+general and find a place in the community.  The bulk of what follows is the
+documentation maintainer's list of tasks that most urgently need to be
+done.
+
+The documentation TODO list
+---------------------------
+
+There is an endless list of tasks that need to be carried out to get our
+documentation to where it should be.  This list contains a number of
+important items, but is far from exhaustive; if you see a different way to
+improve the documentation, please do not hold back!
+
+Addressing warnings
+~~~~~~~~~~~~~~~~~~~
+
+The documentation build currently spews out an unbelievable number of
+warnings.  When you have that many, you might as well have none at all;
+people ignore them, and they will never notice when their work adds new
+ones.  For this reason, eliminating warnings is one of the highest-priority
+tasks on the documentation TODO list.  The task itself is reasonably
+straightforward, but it must be approached in the right way to be
+successful.
+
+Warnings issued by a compiler for C code can often be dismissed as false
+positives, leading to patches aimed at simply shutting the compiler up.
+Warnings from the documentation build almost always point at a real
+problem; making those warnings go away requires understanding the problem
+and fixing it at its source.  For this reason, patches fixing documentation
+warnings should probably not say "fix a warning" in the changelog title;
+they should indicate the real problem that has been fixed.
+
+Another important point is that documentation warnings are often created by
+problems in kerneldoc comments in C code.  While the documentation
+maintainer appreciates being copied on fixes for these warnings, the
+documentation tree is often not the right one to actually carry those
+fixes; they should go to the maintainer of the subsystem in question.
+
+For example, in a documentation build I grabbed a pair of warnings nearly
+at random::
+
+  ./drivers/devfreq/devfreq.c:1818: warning: bad line:
+  	- Resource-managed devfreq_register_notifier()
+  ./drivers/devfreq/devfreq.c:1854: warning: bad line:
+	- Resource-managed devfreq_unregister_notifier()
+
+(The lines were split for readability).
+
+A quick look at the source file named above turned up a couple of kerneldoc
+comments that look like this::
+
+  /**
+   * devm_devfreq_register_notifier()
+	  - Resource-managed devfreq_register_notifier()
+   * @dev:	The devfreq user device. (parent of devfreq)
+   * @devfreq:	The devfreq object.
+   * @nb:		The notifier block to be unregistered.
+   * @list:	DEVFREQ_TRANSITION_NOTIFIER.
+   */
+
+The problem is the missing "*", which confuses the build system's
+simplistic idea of what C comment blocks look like.  This problem had been
+present since that comment was added in 2016 — a full four years.  Fixing
+it was a matter of adding the missing asterisks.  A quick look at the
+history for that file showed what the normal format for subject lines is,
+and ``scripts/get_maintainer.pl`` told me who should receive it.  The
+resulting patch looked like this::
+
+  [PATCH] PM / devfreq: Fix two malformed kerneldoc comments
+
+  Two kerneldoc comments in devfreq.c fail to adhere to the required format,
+  resulting in these doc-build warnings:
+
+    ./drivers/devfreq/devfreq.c:1818: warning: bad line:
+  	  - Resource-managed devfreq_register_notifier()
+    ./drivers/devfreq/devfreq.c:1854: warning: bad line:
+	  - Resource-managed devfreq_unregister_notifier()
+
+  Add a couple of missing asterisks and make kerneldoc a little happier.
+
+  Signed-off-by: Jonathan Corbet <corbet@lwn.net>
+  ---
+   drivers/devfreq/devfreq.c | 4 ++--
+   1 file changed, 2 insertions(+), 2 deletions(-)
+
+  diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
+  index 57f6944d65a6..00c9b80b3d33 100644
+  --- a/drivers/devfreq/devfreq.c
+  +++ b/drivers/devfreq/devfreq.c
+  @@ -1814,7 +1814,7 @@ static void devm_devfreq_notifier_release(struct device *dev, void *res)
+
+   /**
+    * devm_devfreq_register_notifier()
+  -	- Resource-managed devfreq_register_notifier()
+  + *	- Resource-managed devfreq_register_notifier()
+    * @dev:	The devfreq user device. (parent of devfreq)
+    * @devfreq:	The devfreq object.
+    * @nb:		The notifier block to be unregistered.
+  @@ -1850,7 +1850,7 @@ EXPORT_SYMBOL(devm_devfreq_register_notifier);
+
+   /**
+    * devm_devfreq_unregister_notifier()
+  -	- Resource-managed devfreq_unregister_notifier()
+  + *	- Resource-managed devfreq_unregister_notifier()
+    * @dev:	The devfreq user device. (parent of devfreq)
+    * @devfreq:	The devfreq object.
+    * @nb:		The notifier block to be unregistered.
+  --
+  2.24.1
+
+The entire process only took a few minutes.  Of course, I then found that
+somebody else had fixed it in a separate tree, highlighting another lesson:
+always check linux-next to see if a problem has been fixed before you dig
+into it.
+
+Other fixes will take longer, especially those relating to structure
+members or function parameters that lack documentation.  In such cases, it
+is necessary to work out what the role of those members or parameters is
+and describe them correctly.  Overall, this task gets a little tedious at
+times, but it's highly important.  If we can actually eliminate warnings
+from the documentation build, then we can start expecting developers to
+avoid adding new ones.
+
+Languishing kerneldoc comments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Developers are encouraged to write kerneldoc comments for their code, but
+many of those comments are never pulled into the docs build.  That makes
+this information harder to find and, for example, makes Sphinx unable to
+generate links to that documentation.  Adding ``kernel-doc`` directives to
+the documentation to bring those comments in can help the community derive
+the full value of the work that has gone into creating them.
+
+The ``scripts/find-unused-docs.sh`` tool can be used to find these
+overlooked comments.
+
+Note that the most value comes from pulling in the documentation for
+exported functions and data structures.  Many subsystems also have
+kerneldoc comments for internal use; those should not be pulled into the
+documentation build unless they are placed in a document that is
+specifically aimed at developers working within the relevant subsystem.
+
+
+Typo fixes
+~~~~~~~~~~
+
+Fixing typographical or formatting errors in the documentation is a quick
+way to figure out how to create and send patches, and it is a useful
+service.  I am always willing to accept such patches.  That said, once you
+have fixed a few, please consider moving on to more advanced tasks, leaving
+some typos for the next beginner to address.
+
+Please note that some things are *not* typos and should not be "fixed":
+
+ - Both American and British English spellings are allowed within the
+   kernel documentation.  There is no need to fix one by replacing it with
+   the other.
+
+ - The question of whether a period should be followed by one or two spaces
+   is not to be debated in the context of kernel documentation.  Other
+   areas of rational disagreement, such as the "Oxford comma", are also
+   off-topic here.
+
+As with any patch to any project, please consider whether your change is
+really making things better.
+
+Ancient documentation
+~~~~~~~~~~~~~~~~~~~~~
+
+Some kernel documentation is current, maintained, and useful.  Some
+documentation is ... not.  Dusty, old, and inaccurate documentation can
+mislead readers and casts doubt on our documentation as a whole.  Anything
+that can be done to address such problems is more than welcome.
+
+Whenever you are working with a document, please consider whether it is
+current, whether it needs updating, or whether it should perhaps be removed
+altogether.  There are a number of warning signs that you can pay attention
+to here:
+
+ - References to 2.x kernels
+ - Pointers to SourceForge repositories
+ - Nothing but typo fixes in the history for several years
+ - Discussion of pre-Git workflows
+
+The best thing to do, of course, would be to bring the documentation
+current, adding whatever information is needed.  Such work often requires
+the cooperation of developers familiar with the subsystem in question, of
+course.  Developers are often more than willing to cooperate with people
+working to improve the documentation when asked nicely, and when their
+answers are listened to and acted upon.
+
+Some documentation is beyond hope; we occasionally find documents that
+refer to code that was removed from the kernel long ago, for example.
+There is surprising resistance to removing obsolete documentation, but we
+should do that anyway.  Extra cruft in our documentation helps nobody.
+
+In cases where there is perhaps some useful information in a badly outdated
+document, and you are unable to update it, the best thing to do may be to
+add a warning at the beginning.  The following text is recommended::
+
+  .. warning ::
+  	This document is outdated and in need of attention.  Please use
+	this information with caution, and please consider sending patches
+	to update it.
+
+That way, at least our long-suffering readers have been warned that the
+document may lead them astray.
+
+Documentation coherency
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The old-timers around here will remember the Linux books that showed up on
+the shelves in the 1990s.  They were simply collections of documentation
+files scrounged from various locations on the net.  The books have (mostly)
+improved since then, but the kernel's documentation is still mostly built
+on that model.  It is thousands of files, almost each of which was written
+in isolation from all of the others.  We don't have a coherent body of
+kernel documentation; we have thousands of individual documents.
+
+We have been trying to improve the situation through the creation of
+a set of "books" that group documentation for specific readers.  These
+include:
+
+ - :doc:`../admin-guide/index`
+ - :doc:`../core-api/index`
+ - :doc:`../driver-api/index`
+ - :doc:`../userspace-api/index`
+
+As well as this book on documentation itself.
+
+Moving documents into the appropriate books is an important task and needs
+to continue.  There are a couple of challenges associated with this work,
+though.  Moving documentation files creates short-term pain for the people
+who work with those files; they are understandably unenthusiastic about
+such changes.  Usually the case can be made to move a document once; we
+really don't want to keep shifting them around, though.
+
+Even when all documents are in the right place, though, we have only
+managed to turn a big pile into a group of smaller piles.  The work of
+trying to knit all of those documents together into a single whole has not
+yet begun.  If you have bright ideas on how we could proceed on that front,
+we would be more than happy to hear them.
+
+Stylesheet improvements
+~~~~~~~~~~~~~~~~~~~~~~~
+
+With the adoption of Sphinx we have much nicer-looking HTML output than we
+once did.  But it could still use a lot of improvement; Donald Knuth and
+Edward Tufte would be unimpressed.  That requires tweaking our stylesheets
+to create more typographically sound, accessible, and readable output.
+
+Be warned: if you take on this task you are heading into classic bikeshed
+territory.  Expect a lot of opinions and discussion for even relatively
+obvious changes.  That is, alas, the nature of the world we live in.
+
+Non-LaTeX PDF build
+~~~~~~~~~~~~~~~~~~~
+
+This is a decidedly nontrivial task for somebody with a lot of time and
+Python skills.  The Sphinx toolchain is relatively small and well
+contained; it is easy to add to a development system.  But building PDF or
+EPUB output requires installing LaTeX, which is anything but small or well
+contained.  That would be a nice thing to eliminate.
+
+The original hope had been to use the rst2pdf tool (https://rst2pdf.org/)
+for PDF generation, but it turned out to not be up to the task.
+Development work on rst2pdf seems to have picked up again in recent times,
+though, which is a hopeful sign.  If a suitably motivated developer were to
+work with that project to make rst2pdf work with the kernel documentation
+build, the world would be eternally grateful.
+
+Write more documentation
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Naturally, there are massive parts of the kernel that are severely
+underdocumented.  If you have the knowledge to document a specific kernel
+subsystem and the desire to do so, please do not hesitate to do some
+writing and contribute the result to the kernel.  Untold numbers of kernel
+developers and users will thank you.
diff --git a/Documentation/doc-guide/index.rst b/Documentation/doc-guide/index.rst
index 603f3ff55d5a..7c7d97784626 100644
--- a/Documentation/doc-guide/index.rst
+++ b/Documentation/doc-guide/index.rst
@@ -10,6 +10,8 @@ How to write kernel documentation
    sphinx
    kernel-doc
    parse-headers
+   contributing
+   maintainer-profile
 
 .. only::  subproject and html
 
diff --git a/Documentation/doc-guide/kernel-doc.rst b/Documentation/doc-guide/kernel-doc.rst
index 192c36af39e2..fff6604631ea 100644
--- a/Documentation/doc-guide/kernel-doc.rst
+++ b/Documentation/doc-guide/kernel-doc.rst
@@ -476,6 +476,22 @@ internal: *[source-pattern ...]*
     .. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
        :internal:
 
+identifiers: *[ function/type ...]*
+  Include documentation for each *function* and *type* in *source*.
+  If no *function* is specified, the documentation for all functions
+  and types in the *source* will be included.
+
+  Examples::
+
+    .. kernel-doc:: lib/bitmap.c
+       :identifiers: bitmap_parselist bitmap_parselist_user
+
+    .. kernel-doc:: lib/idr.c
+       :identifiers:
+
+functions: *[ function/type ...]*
+  This is an alias of the 'identifiers' directive and deprecated.
+
 doc: *title*
   Include documentation for the ``DOC:`` paragraph identified by *title* in
   *source*. Spaces are allowed in *title*; do not quote the *title*. The *title*
@@ -488,19 +504,6 @@ doc: *title*
     .. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
        :doc: High Definition Audio over HDMI and Display Port
 
-functions: *[ function ...]*
-  Include documentation for each *function* in *source*.
-  If no *function* is specified, the documentation for all functions
-  and types in the *source* will be included.
-
-  Examples::
-
-    .. kernel-doc:: lib/bitmap.c
-       :functions: bitmap_parselist bitmap_parselist_user
-
-    .. kernel-doc:: lib/idr.c
-       :functions:
-
 Without options, the kernel-doc directive includes all documentation comments
 from the source file.
 
diff --git a/Documentation/doc-guide/maintainer-profile.rst b/Documentation/doc-guide/maintainer-profile.rst
new file mode 100644
index 000000000000..aee2f508cc89
--- /dev/null
+++ b/Documentation/doc-guide/maintainer-profile.rst
@@ -0,0 +1,44 @@
+.. SPDX-License-Identifier: GPL-2.0
+Documentation subsystem maintainer entry profile
+================================================
+
+The documentation "subsystem" is the central coordinating point for the
+kernel's documentation and associated infrastructure.  It covers the
+hierarchy under Documentation/ (with the exception of
+Documentation/device-tree), various utilities under scripts/ and, at least
+some of the time, LICENSES/.
+
+It's worth noting, though, that the boundaries of this subsystem are rather
+fuzzier than normal.  Many other subsystem maintainers like to keep control
+of portions of Documentation/, and many more freely apply changes there
+when it is convenient.  Beyond that, much of the kernel's documentation is
+found in the source as kerneldoc comments; those are usually (but not
+always) maintained by the relevant subsystem maintainer.
+
+The mailing list for documentation is linux-doc@vger.kernel.org.  Patches
+should be made against the docs-next tree whenever possible.
+
+Submit checklist addendum
+-------------------------
+
+When making documentation changes, you should actually build the
+documentation and ensure that no new errors or warnings have been
+introduced.  Generating HTML documents and looking at the result will help
+to avoid unsightly misunderstandings about how things will be rendered.
+
+Key cycle dates
+---------------
+
+Patches can be sent anytime, but response will be slower than usual during
+the merge window.  The docs tree tends to close late before the merge
+window opens, since the risk of regressions from documentation patches is
+low.
+
+Review cadence
+--------------
+
+I am the sole maintainer for the documentation subsystem, and I am doing
+the work on my own time, so the response to patches will occasionally be
+slow.  I try to always send out a notification when a patch is merged (or
+when I decide that one cannot be).  Do not hesitate to send a ping if you
+have not heard back within a week of sending a patch.
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 9f4392876099..72fc2e9e2b63 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -179,6 +179,7 @@ mkutf8data
 modpost
 modules.builtin
 modules.builtin.modinfo
+modules.nsdeps
 modules.order
 modversions.h*
 nconf
diff --git a/Documentation/driver-api/devfreq.rst b/Documentation/driver-api/devfreq.rst
new file mode 100644
index 000000000000..4a0bf87a3b13
--- /dev/null
+++ b/Documentation/driver-api/devfreq.rst
@@ -0,0 +1,30 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+Device Frequency Scaling
+========================
+
+Introduction
+------------
+
+This framework provides a standard kernel interface for Dynamic Voltage and
+Frequency Switching on arbitrary devices.
+
+It exposes controls for adjusting frequency through sysfs files which are
+similar to the cpufreq subsystem.
+
+Devices for which current usage can be measured can have their frequency
+automatically adjusted by governors.
+
+API
+---
+
+Device drivers need to initialize a :c:type:`devfreq_profile` and call the
+:c:func:`devfreq_add_device` function to create a :c:type:`devfreq` instance.
+
+.. kernel-doc:: include/linux/devfreq.h
+.. kernel-doc:: include/linux/devfreq-event.h
+.. kernel-doc:: drivers/devfreq/devfreq.c
+        :export:
+.. kernel-doc:: drivers/devfreq/devfreq-event.c
+        :export:
diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst
index 1b5020ec6517..bc2d89af88ce 100644
--- a/Documentation/driver-api/device_link.rst
+++ b/Documentation/driver-api/device_link.rst
@@ -281,7 +281,8 @@ State machine
   :c:func:`driver_bound()`.)
 
 * Before a consumer device is probed, presence of supplier drivers is
-  verified by checking that links to suppliers are in ``DL_STATE_AVAILABLE``
+  verified by checking the consumer device is not in the wait_for_suppliers
+  list and by checking that links to suppliers are in ``DL_STATE_AVAILABLE``
   state.  The state of the links is updated to ``DL_STATE_CONSUMER_PROBE``.
   (Call to :c:func:`device_links_check_suppliers()` from
   :c:func:`really_probe()`.)
diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst
index b541e97c7ab1..c78db28519f7 100644
--- a/Documentation/driver-api/dma-buf.rst
+++ b/Documentation/driver-api/dma-buf.rst
@@ -118,13 +118,13 @@ Kernel Functions and Structures Reference
 Reservation Objects
 -------------------
 
-.. kernel-doc:: drivers/dma-buf/reservation.c
+.. kernel-doc:: drivers/dma-buf/dma-resv.c
    :doc: Reservation Object Overview
 
-.. kernel-doc:: drivers/dma-buf/reservation.c
+.. kernel-doc:: drivers/dma-buf/dma-resv.c
    :export:
 
-.. kernel-doc:: include/linux/reservation.h
+.. kernel-doc:: include/linux/dma-resv.h
    :internal:
 
 DMA Fences
diff --git a/Documentation/driver-api/dmaengine/client.rst b/Documentation/driver-api/dmaengine/client.rst
index 45953f171500..a9a7a3c84c63 100644
--- a/Documentation/driver-api/dmaengine/client.rst
+++ b/Documentation/driver-api/dmaengine/client.rst
@@ -151,6 +151,93 @@ The details of these operations are:
      Note that callbacks will always be invoked from the DMA
      engines tasklet, never from interrupt context.
 
+  Optional: per descriptor metadata
+  ---------------------------------
+  DMAengine provides two ways for metadata support.
+
+  DESC_METADATA_CLIENT
+
+    The metadata buffer is allocated/provided by the client driver and it is
+    attached to the descriptor.
+
+  .. code-block:: c
+
+     int dmaengine_desc_attach_metadata(struct dma_async_tx_descriptor *desc,
+				   void *data, size_t len);
+
+  DESC_METADATA_ENGINE
+
+    The metadata buffer is allocated/managed by the DMA driver. The client
+    driver can ask for the pointer, maximum size and the currently used size of
+    the metadata and can directly update or read it.
+
+    Becasue the DMA driver manages the memory area containing the metadata,
+    clients must make sure that they do not try to access or get the pointer
+    after their transfer completion callback has run for the descriptor.
+    If no completion callback has been defined for the transfer, then the
+    metadata must not be accessed after issue_pending.
+    In other words: if the aim is to read back metadata after the transfer is
+    completed, then the client must use completion callback.
+
+  .. code-block:: c
+
+     void *dmaengine_desc_get_metadata_ptr(struct dma_async_tx_descriptor *desc,
+		size_t *payload_len, size_t *max_len);
+
+     int dmaengine_desc_set_metadata_len(struct dma_async_tx_descriptor *desc,
+		size_t payload_len);
+
+  Client drivers can query if a given mode is supported with:
+
+  .. code-block:: c
+
+     bool dmaengine_is_metadata_mode_supported(struct dma_chan *chan,
+		enum dma_desc_metadata_mode mode);
+
+  Depending on the used mode client drivers must follow different flow.
+
+  DESC_METADATA_CLIENT
+
+    - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM:
+      1. prepare the descriptor (dmaengine_prep_*)
+         construct the metadata in the client's buffer
+      2. use dmaengine_desc_attach_metadata() to attach the buffer to the
+         descriptor
+      3. submit the transfer
+    - DMA_DEV_TO_MEM:
+      1. prepare the descriptor (dmaengine_prep_*)
+      2. use dmaengine_desc_attach_metadata() to attach the buffer to the
+         descriptor
+      3. submit the transfer
+      4. when the transfer is completed, the metadata should be available in the
+         attached buffer
+
+  DESC_METADATA_ENGINE
+
+    - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM:
+      1. prepare the descriptor (dmaengine_prep_*)
+      2. use dmaengine_desc_get_metadata_ptr() to get the pointer to the
+         engine's metadata area
+      3. update the metadata at the pointer
+      4. use dmaengine_desc_set_metadata_len()  to tell the DMA engine the
+         amount of data the client has placed into the metadata buffer
+      5. submit the transfer
+    - DMA_DEV_TO_MEM:
+      1. prepare the descriptor (dmaengine_prep_*)
+      2. submit the transfer
+      3. on transfer completion, use dmaengine_desc_get_metadata_ptr() to get
+         the pointer to the engine's metadata area
+      4. read out the metadata from the pointer
+
+  .. note::
+
+     When DESC_METADATA_ENGINE mode is used the metadata area for the descriptor
+     is no longer valid after the transfer has been completed (valid up to the
+     point when the completion callback returns if used).
+
+     Mixed use of DESC_METADATA_CLIENT / DESC_METADATA_ENGINE is not allowed,
+     client drivers must use either of the modes per descriptor.
+
 4. Submit the transaction
 
    Once the descriptor has been prepared and the callback information
diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst
index dfc4486b5743..790a15089f1f 100644
--- a/Documentation/driver-api/dmaengine/provider.rst
+++ b/Documentation/driver-api/dmaengine/provider.rst
@@ -247,6 +247,54 @@ after each transfer. In case of a ring buffer, they may loop
 (DMA_CYCLIC). Addresses pointing to a device's register (e.g. a FIFO)
 are typically fixed.
 
+Per descriptor metadata support
+-------------------------------
+Some data movement architecture (DMA controller and peripherals) uses metadata
+associated with a transaction. The DMA controller role is to transfer the
+payload and the metadata alongside.
+The metadata itself is not used by the DMA engine itself, but it contains
+parameters, keys, vectors, etc for peripheral or from the peripheral.
+
+The DMAengine framework provides a generic ways to facilitate the metadata for
+descriptors. Depending on the architecture the DMA driver can implement either
+or both of the methods and it is up to the client driver to choose which one
+to use.
+
+- DESC_METADATA_CLIENT
+
+  The metadata buffer is allocated/provided by the client driver and it is
+  attached (via the dmaengine_desc_attach_metadata() helper to the descriptor.
+
+  From the DMA driver the following is expected for this mode:
+  - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM
+    The data from the provided metadata buffer should be prepared for the DMA
+    controller to be sent alongside of the payload data. Either by copying to a
+    hardware descriptor, or highly coupled packet.
+  - DMA_DEV_TO_MEM
+    On transfer completion the DMA driver must copy the metadata to the client
+    provided metadata buffer before notifying the client about the completion.
+    After the transfer completion, DMA drivers must not touch the metadata
+    buffer provided by the client.
+
+- DESC_METADATA_ENGINE
+
+  The metadata buffer is allocated/managed by the DMA driver. The client driver
+  can ask for the pointer, maximum size and the currently used size of the
+  metadata and can directly update or read it. dmaengine_desc_get_metadata_ptr()
+  and dmaengine_desc_set_metadata_len() is provided as helper functions.
+
+  From the DMA driver the following is expected for this mode:
+  - get_metadata_ptr
+    Should return a pointer for the metadata buffer, the maximum size of the
+    metadata buffer and the currently used / valid (if any) bytes in the buffer.
+  - set_metadata_len
+    It is called by the clients after it have placed the metadata to the buffer
+    to let the DMA driver know the number of valid bytes provided.
+
+  Note: since the client will ask for the metadata pointer in the completion
+  callback (in DMA_DEV_TO_MEM case) the DMA driver must ensure that the
+  descriptor is not freed up prior the callback is called.
+
 Device operations
 -----------------
 
diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index a100bef54952..46c13780994c 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -267,6 +267,8 @@ DRM
 
 GPIO
   devm_gpiod_get()
+  devm_gpiod_get_array()
+  devm_gpiod_get_array_optional()
   devm_gpiod_get_index()
   devm_gpiod_get_index_optional()
   devm_gpiod_get_optional()
@@ -313,9 +315,13 @@ IOMAP
   devm_ioport_map()
   devm_ioport_unmap()
   devm_ioremap()
-  devm_ioremap_nocache()
+  devm_ioremap_uc()
   devm_ioremap_wc()
   devm_ioremap_resource() : checks resource, requests memory region, ioremaps
+  devm_ioremap_resource_wc()
+  devm_platform_ioremap_resource() : calls devm_ioremap_resource() for platform device
+  devm_platform_ioremap_resource_wc()
+  devm_platform_ioremap_resource_byname()
   devm_iounmap()
   pcim_iomap()
   pcim_iomap_regions()	: do request_region() and iomap() on multiple BARs
diff --git a/Documentation/driver-api/driver-model/driver.rst b/Documentation/driver-api/driver-model/driver.rst
index 11d281506a04..baa6a85c8287 100644
--- a/Documentation/driver-api/driver-model/driver.rst
+++ b/Documentation/driver-api/driver-model/driver.rst
@@ -169,6 +169,49 @@ A driver's probe() may return a negative errno value to indicate that
 the driver did not bind to this device, in which case it should have
 released all resources it allocated::
 
+	void (*sync_state)(struct device *dev);
+
+sync_state is called only once for a device. It's called when all the consumer
+devices of the device have successfully probed. The list of consumers of the
+device is obtained by looking at the device links connecting that device to its
+consumer devices.
+
+The first attempt to call sync_state() is made during late_initcall_sync() to
+give firmware and drivers time to link devices to each other. During the first
+attempt at calling sync_state(), if all the consumers of the device at that
+point in time have already probed successfully, sync_state() is called right
+away. If there are no consumers of the device during the first attempt, that
+too is considered as "all consumers of the device have probed" and sync_state()
+is called right away.
+
+If during the first attempt at calling sync_state() for a device, there are
+still consumers that haven't probed successfully, the sync_state() call is
+postponed and reattempted in the future only when one or more consumers of the
+device probe successfully. If during the reattempt, the driver core finds that
+there are one or more consumers of the device that haven't probed yet, then
+sync_state() call is postponed again.
+
+A typical use case for sync_state() is to have the kernel cleanly take over
+management of devices from the bootloader. For example, if a device is left on
+and at a particular hardware configuration by the bootloader, the device's
+driver might need to keep the device in the boot configuration until all the
+consumers of the device have probed. Once all the consumers of the device have
+probed, the device's driver can synchronize the hardware state of the device to
+match the aggregated software state requested by all the consumers. Hence the
+name sync_state().
+
+While obvious examples of resources that can benefit from sync_state() include
+resources such as regulator, sync_state() can also be useful for complex
+resources like IOMMUs. For example, IOMMUs with multiple consumers (devices
+whose addresses are remapped by the IOMMU) might need to keep their mappings
+fixed at (or additive to) the boot configuration until all its consumers have
+probed.
+
+While the typical use case for sync_state() is to have the kernel cleanly take
+over management of devices from the bootloader, the usage of sync_state() is
+not restricted to that. Use it whenever it makes sense to take an action after
+all the consumers of a device have probed.
+
 	int 	(*remove)	(struct device *dev);
 
 remove is called to unbind a driver from a device. This may be
diff --git a/Documentation/driver-api/generic-counter.rst b/Documentation/driver-api/generic-counter.rst
index 8382f01a53e3..e622f8f6e56a 100644
--- a/Documentation/driver-api/generic-counter.rst
+++ b/Documentation/driver-api/generic-counter.rst
@@ -7,7 +7,7 @@ Generic Counter Interface
 Introduction
 ============
 
-Counter devices are prevalent within a diverse spectrum of industries.
+Counter devices are prevalent among a diverse spectrum of industries.
 The ubiquitous presence of these devices necessitates a common interface
 and standard of interaction and exposure. This driver API attempts to
 resolve the issue of duplicate code found among existing counter device
@@ -26,23 +26,72 @@ the Generic Counter interface.
 
 There are three core components to a counter:
 
-* Count:
-  Count data for a set of Signals.
-
 * Signal:
-  Input data that is evaluated by the counter to determine the count
-  data.
+  Stream of data to be evaluated by the counter.
 
 * Synapse:
-  The association of a Signal with a respective Count.
+  Association of a Signal, and evaluation trigger, with a Count.
+
+* Count:
+  Accumulation of the effects of connected Synapses.
+
+SIGNAL
+------
+A Signal represents a stream of data. This is the input data that is
+evaluated by the counter to determine the count data; e.g. a quadrature
+signal output line of a rotary encoder. Not all counter devices provide
+user access to the Signal data, so exposure is optional for drivers.
+
+When the Signal data is available for user access, the Generic Counter
+interface provides the following available signal values:
+
+* SIGNAL_LOW:
+  Signal line is in a low state.
+
+* SIGNAL_HIGH:
+  Signal line is in a high state.
+
+A Signal may be associated with one or more Counts.
+
+SYNAPSE
+-------
+A Synapse represents the association of a Signal with a Count. Signal
+data affects respective Count data, and the Synapse represents this
+relationship.
+
+The Synapse action mode specifies the Signal data condition that
+triggers the respective Count's count function evaluation to update the
+count data. The Generic Counter interface provides the following
+available action modes:
+
+* None:
+  Signal does not trigger the count function. In Pulse-Direction count
+  function mode, this Signal is evaluated as Direction.
+
+* Rising Edge:
+  Low state transitions to high state.
+
+* Falling Edge:
+  High state transitions to low state.
+
+* Both Edges:
+  Any state transition.
+
+A counter is defined as a set of input signals associated with count
+data that are generated by the evaluation of the state of the associated
+input signals as defined by the respective count functions. Within the
+context of the Generic Counter interface, a counter consists of Counts
+each associated with a set of Signals, whose respective Synapse
+instances represent the count function update conditions for the
+associated Counts.
+
+A Synapse associates one Signal with one Count.
 
 COUNT
 -----
-A Count represents the count data for a set of Signals. The Generic
-Counter interface provides the following available count data types:
-
-* COUNT_POSITION:
-  Unsigned integer value representing position.
+A Count represents the accumulation of the effects of connected
+Synapses; i.e. the count data for a set of Signals. The Generic
+Counter interface represents the count data as a natural number.
 
 A Count has a count function mode which represents the update behavior
 for the count data. The Generic Counter interface provides the following
@@ -86,60 +135,7 @@ available count function modes:
     Any state transition on either quadrature pair signals updates the
     respective count. Quadrature encoding determines the direction.
 
-A Count has a set of one or more associated Signals.
-
-SIGNAL
-------
-A Signal represents a counter input data; this is the input data that is
-evaluated by the counter to determine the count data; e.g. a quadrature
-signal output line of a rotary encoder. Not all counter devices provide
-user access to the Signal data.
-
-The Generic Counter interface provides the following available signal
-data types for when the Signal data is available for user access:
-
-* SIGNAL_LEVEL:
-  Signal line state level. The following states are possible:
-
-  - SIGNAL_LEVEL_LOW:
-    Signal line is in a low state.
-
-  - SIGNAL_LEVEL_HIGH:
-    Signal line is in a high state.
-
-A Signal may be associated with one or more Counts.
-
-SYNAPSE
--------
-A Synapse represents the association of a Signal with a respective
-Count. Signal data affects respective Count data, and the Synapse
-represents this relationship.
-
-The Synapse action mode specifies the Signal data condition which
-triggers the respective Count's count function evaluation to update the
-count data. The Generic Counter interface provides the following
-available action modes:
-
-* None:
-  Signal does not trigger the count function. In Pulse-Direction count
-  function mode, this Signal is evaluated as Direction.
-
-* Rising Edge:
-  Low state transitions to high state.
-
-* Falling Edge:
-  High state transitions to low state.
-
-* Both Edges:
-  Any state transition.
-
-A counter is defined as a set of input signals associated with count
-data that are generated by the evaluation of the state of the associated
-input signals as defined by the respective count functions. Within the
-context of the Generic Counter interface, a counter consists of Counts
-each associated with a set of Signals, whose respective Synapse
-instances represent the count function update conditions for the
-associated Counts.
+A Count has a set of one or more associated Synapses.
 
 Paradigm
 ========
@@ -286,10 +282,36 @@ if device memory-managed registration is desired.
 Extension sysfs attributes can be created for auxiliary functionality
 and data by passing in defined counter_device_ext, counter_count_ext,
 and counter_signal_ext structures. In these cases, the
-counter_device_ext structure is used for global configuration of the
-respective Counter device, while the counter_count_ext and
-counter_signal_ext structures allow for auxiliary exposure and
-configuration of a specific Count or Signal respectively.
+counter_device_ext structure is used for global/miscellaneous exposure
+and configuration of the respective Counter device, while the
+counter_count_ext and counter_signal_ext structures allow for auxiliary
+exposure and configuration of a specific Count or Signal respectively.
+
+Determining the type of extension to create is a matter of scope.
+
+* Signal extensions are attributes that expose information/control
+  specific to a Signal. These types of attributes will exist under a
+  Signal's directory in sysfs.
+
+  For example, if you have an invert feature for a Signal, you can have
+  a Signal extension called "invert" that toggles that feature:
+  /sys/bus/counter/devices/counterX/signalY/invert
+
+* Count extensions are attributes that expose information/control
+  specific to a Count. These type of attributes will exist under a
+  Count's directory in sysfs.
+
+  For example, if you want to pause/unpause a Count from updating, you
+  can have a Count extension called "enable" that toggles such:
+  /sys/bus/counter/devices/counterX/countY/enable
+
+* Device extensions are attributes that expose information/control
+  non-specific to a particular Count or Signal. This is where you would
+  put your global features or other miscellanous functionality.
+
+  For example, if your device has an overtemp sensor, you can report the
+  chip overheated via a device extension called "error_overtemp":
+  /sys/bus/counter/devices/counterX/error_overtemp
 
 Architecture
 ============
diff --git a/Documentation/driver-api/bt8xxgpio.rst b/Documentation/driver-api/gpio/bt8xxgpio.rst
index a845feb074de..d7e75f1234e7 100644
--- a/Documentation/driver-api/bt8xxgpio.rst
+++ b/Documentation/driver-api/gpio/bt8xxgpio.rst
@@ -2,7 +2,7 @@
 A driver for a selfmade cheap BT8xx based PCI GPIO-card (bt8xxgpio)
 ===================================================================
 
-For advanced documentation, see http://www.bu3sch.de/btgpio.php
+For advanced documentation, see https://bues.ch/cms/unmaintained/btgpio.html
 
 A generic digital 24-port PCI GPIO card can be built out of an ordinary
 Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The
diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 3fdb32422f8a..871922529332 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -5,7 +5,7 @@ GPIO Driver Interface
 This document serves as a guide for writers of GPIO chip drivers.
 
 Each GPIO controller driver needs to include the following header, which defines
-the structures used to define a GPIO driver:
+the structures used to define a GPIO driver::
 
 	#include <linux/gpio/driver.h>
 
@@ -398,12 +398,15 @@ provided. A big portion of overhead code will be managed by gpiolib,
 under the assumption that your interrupts are 1-to-1-mapped to the
 GPIO line index:
 
-  GPIO line offset   Hardware IRQ
-  0                  0
-  1                  1
-  2                  2
-  ...                ...
-  ngpio-1            ngpio-1
+.. csv-table::
+    :header: GPIO line offset, Hardware IRQ
+
+    0,0
+    1,1
+    2,2
+    ...,...
+    ngpio-1, ngpio-1
+
 
 If some GPIO lines do not have corresponding IRQs, the bitmask valid_mask
 and the flag need_valid_mask in gpio_irq_chip can be used to mask off some
@@ -413,7 +416,9 @@ The preferred way to set up the helpers is to fill in the
 struct gpio_irq_chip inside struct gpio_chip before adding the gpio_chip.
 If you do this, the additional irq_chip will be set up by gpiolib at the
 same time as setting up the rest of the GPIO functionality. The following
-is a typical example of a cascaded interrupt handler using gpio_irq_chip:
+is a typical example of a cascaded interrupt handler using gpio_irq_chip::
+
+.. code-block:: c
 
   /* Typical state container with dynamic irqchip */
   struct my_gpio {
@@ -448,7 +453,9 @@ is a typical example of a cascaded interrupt handler using gpio_irq_chip:
   return devm_gpiochip_add_data(dev, &g->gc, g);
 
 The helper support using hierarchical interrupt controllers as well.
-In this case the typical set-up will look like this:
+In this case the typical set-up will look like this::
+
+.. code-block:: c
 
   /* Typical state container with dynamic irqchip */
   struct my_gpio {
@@ -493,18 +500,13 @@ available but we try to move away from this:
   gpiochip. It will pass the struct gpio_chip* for the chip to all IRQ
   callbacks, so the callbacks need to embed the gpio_chip in its state
   container and obtain a pointer to the container using container_of().
-  (See Documentation/driver-model/design-patterns.txt)
+  (See Documentation/driver-api/driver-model/design-patterns.rst)
 
 - gpiochip_irqchip_add_nested(): adds a nested cascaded irqchip to a gpiochip,
   as discussed above regarding different types of cascaded irqchips. The
   cascaded irq has to be handled by a threaded interrupt handler.
   Apart from that it works exactly like the chained irqchip.
 
-- DEPRECATED: gpiochip_set_chained_irqchip(): sets up a chained cascaded irq
-  handler for a gpio_chip from a parent IRQ and passes the struct gpio_chip*
-  as handler data. Notice that we pass is as the handler data, since the
-  irqchip data is likely used by the parent irqchip.
-
 - gpiochip_set_nested_irqchip(): sets up a nested cascaded irq handler for a
   gpio_chip from a parent IRQ. As the parent IRQ has usually been
   explicitly requested by the driver, this does very little more than
diff --git a/Documentation/driver-api/gpio/drivers-on-gpio.rst b/Documentation/driver-api/gpio/drivers-on-gpio.rst
index f3a189320e11..820b403d50f6 100644
--- a/Documentation/driver-api/gpio/drivers-on-gpio.rst
+++ b/Documentation/driver-api/gpio/drivers-on-gpio.rst
@@ -95,7 +95,7 @@ to emulate MCTRL (modem control) signals CTS/RTS by using two GPIO lines. The
 MTD NOR flash has add-ons for extra GPIO lines too, though the address bus is
 usually connected directly to the flash.
 
-Use those instead of talking directly to the GPIOs using sysfs; they integrate
-with kernel frameworks better than your userspace code could. Needless to say,
-just using the appropriate kernel drivers will simplify and speed up your
-embedded hacking in particular by providing ready-made components.
+Use those instead of talking directly to the GPIOs from userspace; they
+integrate with kernel frameworks better than your userspace code could.
+Needless to say, just using the appropriate kernel drivers will simplify and
+speed up your embedded hacking in particular by providing ready-made components.
diff --git a/Documentation/driver-api/gpio/index.rst b/Documentation/driver-api/gpio/index.rst
index c5b8467f9104..1d48fe248f05 100644
--- a/Documentation/driver-api/gpio/index.rst
+++ b/Documentation/driver-api/gpio/index.rst
@@ -8,11 +8,13 @@ Contents:
    :maxdepth: 2
 
    intro
+   using-gpio
    driver
    consumer
    board
    drivers-on-gpio
    legacy
+   bt8xxgpio
 
 Core
 ====
diff --git a/Documentation/driver-api/gpio/using-gpio.rst b/Documentation/driver-api/gpio/using-gpio.rst
new file mode 100644
index 000000000000..dda069444032
--- /dev/null
+++ b/Documentation/driver-api/gpio/using-gpio.rst
@@ -0,0 +1,50 @@
+=========================
+Using GPIO Lines in Linux
+=========================
+
+The Linux kernel exists to abstract and present hardware to users. GPIO lines
+as such are normally not user facing abstractions. The most obvious, natural
+and preferred way to use GPIO lines is to let kernel hardware drivers deal
+with them.
+
+For examples of already existing generic drivers that will also be good
+examples for any other kernel drivers you want to author, refer to
+:doc:`drivers-on-gpio`
+
+For any kind of mass produced system you want to support, such as servers,
+laptops, phones, tablets, routers, and any consumer or office or business goods
+using appropriate kernel drivers is paramount. Submit your code for inclusion
+in the upstream Linux kernel when you feel it is mature enough and you will get
+help to refine it, see :doc:`../../process/submitting-patches`.
+
+In Linux GPIO lines also have a userspace ABI.
+
+The userspace ABI is intended for one-off deployments. Examples are prototypes,
+factory lines, maker community projects, workshop specimen, production tools,
+industrial automation, PLC-type use cases, door controllers, in short a piece
+of specialized equipment that is not produced by the numbers, requiring
+operators to have a deep knowledge of the equipment and knows about the
+software-hardware interface to be set up. They should not have a natural fit
+to any existing kernel subsystem and not be a good fit for an operating system,
+because of not being reusable or abstract enough, or involving a lot of non
+computer hardware related policy.
+
+Applications that have a good reason to use the industrial I/O (IIO) subsystem
+from userspace will likely be a good fit for using GPIO lines from userspace as
+well.
+
+Do not under any circumstances abuse the GPIO userspace ABI to cut corners in
+any product development projects. If you use it for prototyping, then do not
+productify the prototype: rewrite it using proper kernel drivers. Do not under
+any circumstances deploy any uniform products using GPIO from userspace.
+
+The userspace ABI is a character device for each GPIO hardware unit (GPIO chip).
+These devices will appear on the system as ``/dev/gpiochip0`` thru
+``/dev/gpiochipN``. Examples of how to directly use the userspace ABI can be
+found in the kernel tree ``tools/gpio`` subdirectory.
+
+For structured and managed applications, we recommend that you make use of the
+libgpiod_ library. This provides helper abstractions, command line utlities
+and arbitration for multiple simultaneous consumers on the same GPIO chip.
+
+.. _libgpiod: https://git.kernel.org/pub/scm/libs/libgpiod/libgpiod.git/
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 38e638abe3eb..0ebe205efd0c 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -26,6 +26,7 @@ available subsections can be seen below.
    device_link
    component
    message-based
+   infiniband
    sound
    frame-buffer
    regulator
@@ -39,6 +40,7 @@ available subsections can be seen below.
    ipmb
    i3c/index
    interconnect
+   devfreq
    hsi
    edac
    scsi
@@ -69,11 +71,9 @@ available subsections can be seen below.
    fpga/index
    acpi/index
    backlight/lp855x-driver.rst
-   bt8xxgpio
    connector
    console
    dcdbas
-   dell_rbu
    edid
    eisa
    ipmb
@@ -93,7 +93,6 @@ available subsections can be seen below.
    pwm
    rfkill
    serial/index
-   sgi-ioc4
    sm501
    smsc_ece1099
    switchtec
diff --git a/Documentation/driver-api/infiniband.rst b/Documentation/driver-api/infiniband.rst
new file mode 100644
index 000000000000..1a3116f32ff0
--- /dev/null
+++ b/Documentation/driver-api/infiniband.rst
@@ -0,0 +1,127 @@
+===========================================
+InfiniBand and Remote DMA (RDMA) Interfaces
+===========================================
+
+Introduction and Overview
+=========================
+
+TBD
+
+InfiniBand core interfaces
+==========================
+
+.. kernel-doc:: drivers/infiniband/core/iwpm_util.h
+    :internal:
+
+.. kernel-doc:: drivers/infiniband/core/cq.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/core/cm.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/core/rw.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/core/device.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/core/verbs.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/core/packer.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/core/sa_query.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/core/ud_header.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/core/fmr_pool.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/core/umem.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/core/umem_odp.c
+    :export:
+
+RDMA Verbs transport library
+============================
+
+.. kernel-doc:: drivers/infiniband/sw/rdmavt/mr.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/sw/rdmavt/rc.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/sw/rdmavt/ah.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/sw/rdmavt/vt.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/sw/rdmavt/cq.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/sw/rdmavt/qp.c
+    :export:
+
+.. kernel-doc:: drivers/infiniband/sw/rdmavt/mcast.c
+    :export:
+
+Upper Layer Protocols
+=====================
+
+iSCSI Extensions for RDMA (iSER)
+--------------------------------
+
+.. kernel-doc:: drivers/infiniband/ulp/iser/iscsi_iser.h
+   :internal:
+
+.. kernel-doc:: drivers/infiniband/ulp/iser/iscsi_iser.c
+   :functions: iscsi_iser_pdu_alloc iser_initialize_task_headers \
+	iscsi_iser_task_init iscsi_iser_mtask_xmit iscsi_iser_task_xmit \
+	iscsi_iser_cleanup_task iscsi_iser_check_protection \
+	iscsi_iser_conn_create iscsi_iser_conn_bind \
+	iscsi_iser_conn_start iscsi_iser_conn_stop \
+	iscsi_iser_session_destroy iscsi_iser_session_create \
+	iscsi_iser_set_param iscsi_iser_ep_connect iscsi_iser_ep_poll \
+	iscsi_iser_ep_disconnect
+
+.. kernel-doc:: drivers/infiniband/ulp/iser/iser_initiator.c
+   :internal:
+
+.. kernel-doc:: drivers/infiniband/ulp/iser/iser_verbs.c
+   :internal:
+
+Omni-Path (OPA) Virtual NIC support
+-----------------------------------
+
+.. kernel-doc:: drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
+   :internal:
+
+.. kernel-doc:: drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
+   :internal:
+
+.. kernel-doc:: drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
+   :internal:
+
+.. kernel-doc:: drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+   :internal:
+
+InfiniBand SCSI RDMA protocol target support
+--------------------------------------------
+
+.. kernel-doc:: drivers/infiniband/ulp/srpt/ib_srpt.h
+   :internal:
+
+.. kernel-doc:: drivers/infiniband/ulp/srpt/ib_srpt.c
+   :internal:
+
+iSCSI Extensions for RDMA (iSER) target support
+-----------------------------------------------
+
+.. kernel-doc:: drivers/infiniband/ulp/isert/ib_isert.c
+   :internal:
+
diff --git a/Documentation/driver-api/infrastructure.rst b/Documentation/driver-api/infrastructure.rst
index 6172f3cc3d0b..06d98c4526df 100644
--- a/Documentation/driver-api/infrastructure.rst
+++ b/Documentation/driver-api/infrastructure.rst
@@ -49,9 +49,6 @@ Device Drivers Base
 Device Drivers DMA Management
 -----------------------------
 
-.. kernel-doc:: kernel/dma/coherent.c
-   :export:
-
 .. kernel-doc:: kernel/dma/mapping.c
    :export:
 
diff --git a/Documentation/driver-api/interconnect.rst b/Documentation/driver-api/interconnect.rst
index c3e004893796..5ed4f57a6bac 100644
--- a/Documentation/driver-api/interconnect.rst
+++ b/Documentation/driver-api/interconnect.rst
@@ -1,7 +1,7 @@
 .. SPDX-License-Identifier: GPL-2.0
 
 =====================================
-GENERIC SYSTEM INTERCONNECT SUBSYSTEM
+Generic System Interconnect Subsystem
 =====================================
 
 Introduction
@@ -91,3 +91,25 @@ Interconnect consumers are the clients which use the interconnect APIs to
 get paths between endpoints and set their bandwidth/latency/QoS requirements
 for these interconnect paths.  These interfaces are not currently
 documented.
+
+Interconnect debugfs interfaces
+-------------------------------
+
+Like several other subsystems interconnect will create some files for debugging
+and introspection. Files in debugfs are not considered ABI so application
+software shouldn't rely on format details change between kernel versions.
+
+``/sys/kernel/debug/interconnect/interconnect_summary``:
+
+Show all interconnect nodes in the system with their aggregated bandwidth
+request. Indented under each node show bandwidth requests from each device.
+
+``/sys/kernel/debug/interconnect/interconnect_graph``:
+
+Show the interconnect graph in the graphviz dot format. It shows all
+interconnect nodes and links in the system and groups together nodes from the
+same provider as subgraphs. The format is human-readable and can also be piped
+through dot to generate diagrams in many graphical formats::
+
+        $ cat /sys/kernel/debug/interconnect/interconnect_graph | \
+                dot -Tsvg > interconnect_graph.svg
diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst
index 70e180e6b93d..207f0d24de69 100644
--- a/Documentation/driver-api/libata.rst
+++ b/Documentation/driver-api/libata.rst
@@ -250,23 +250,23 @@ High-level taskfile hooks
 
 ::
 
-    void (*qc_prep) (struct ata_queued_cmd *qc);
+    enum ata_completion_errors (*qc_prep) (struct ata_queued_cmd *qc);
     int (*qc_issue) (struct ata_queued_cmd *qc);
 
 
-Higher-level hooks, these two hooks can potentially supercede several of
+Higher-level hooks, these two hooks can potentially supersede several of
 the above taskfile/DMA engine hooks. ``->qc_prep`` is called after the
 buffers have been DMA-mapped, and is typically used to populate the
-hardware's DMA scatter-gather table. Most drivers use the standard
-:c:func:`ata_qc_prep` helper function, but more advanced drivers roll their
-own.
+hardware's DMA scatter-gather table. Some drivers use the standard
+:c:func:`ata_bmdma_qc_prep` and :c:func:`ata_bmdma_dumb_qc_prep` helper
+functions, but more advanced drivers roll their own.
 
 ``->qc_issue`` is used to make a command active, once the hardware and S/G
 tables have been prepared. IDE BMDMA drivers use the helper function
-:c:func:`ata_qc_issue_prot` for taskfile protocol-based dispatch. More
+:c:func:`ata_sff_qc_issue` for taskfile protocol-based dispatch. More
 advanced drivers implement their own ``->qc_issue``.
 
-:c:func:`ata_qc_issue_prot` calls ``->tf_load()``, ``->bmdma_setup()``, and
+:c:func:`ata_sff_qc_issue` calls ``->sff_tf_load()``, ``->bmdma_setup()``, and
 ``->bmdma_start()`` as necessary to initiate a transfer.
 
 Exception and probe handling (EH)
diff --git a/Documentation/driver-api/nvmem.rst b/Documentation/driver-api/nvmem.rst
index d9d958d5c824..287e86819640 100644
--- a/Documentation/driver-api/nvmem.rst
+++ b/Documentation/driver-api/nvmem.rst
@@ -129,6 +129,8 @@ To facilitate such consumers NVMEM framework provides below apis::
   struct nvmem_device *nvmem_device_get(struct device *dev, const char *name);
   struct nvmem_device *devm_nvmem_device_get(struct device *dev,
 					   const char *name);
+  struct nvmem_device *nvmem_device_find(void *data,
+			int (*match)(struct device *dev, const void *data));
   void nvmem_device_put(struct nvmem_device *nvmem);
   int nvmem_device_read(struct nvmem_device *nvmem, unsigned int offset,
 		      size_t bytes, void *buf);
diff --git a/Documentation/driver-api/pti_intel_mid.rst b/Documentation/driver-api/pti_intel_mid.rst
index 20f1cff42d5f..bacc2a4ee89f 100644
--- a/Documentation/driver-api/pti_intel_mid.rst
+++ b/Documentation/driver-api/pti_intel_mid.rst
@@ -49,7 +49,9 @@ but is not just blindly executing as 'root'. Keep in mind
 the use of ioctl(,TIOCSETD,) is not specific to the n_tracerouter
 and n_tracesink line discpline drivers but is a generic
 operation for a program to use a line discpline driver
-on a tty port other than the default n_tty::
+on a tty port other than the default n_tty:
+
+.. code-block:: c
 
   /////////// To hook up n_tracerouter and n_tracesink /////////
 
diff --git a/Documentation/driver-api/thermal/cpu-idle-cooling.rst b/Documentation/driver-api/thermal/cpu-idle-cooling.rst
new file mode 100644
index 000000000000..e4f0859486c7
--- /dev/null
+++ b/Documentation/driver-api/thermal/cpu-idle-cooling.rst
@@ -0,0 +1,189 @@
+
+Situation:
+----------
+
+Under certain circumstances a SoC can reach a critical temperature
+limit and is unable to stabilize the temperature around a temperature
+control. When the SoC has to stabilize the temperature, the kernel can
+act on a cooling device to mitigate the dissipated power. When the
+critical temperature is reached, a decision must be taken to reduce
+the temperature, that, in turn impacts performance.
+
+Another situation is when the silicon temperature continues to
+increase even after the dynamic leakage is reduced to its minimum by
+clock gating the component. This runaway phenomenon can continue due
+to the static leakage. The only solution is to power down the
+component, thus dropping the dynamic and static leakage that will
+allow the component to cool down.
+
+Last but not least, the system can ask for a specific power budget but
+because of the OPP density, we can only choose an OPP with a power
+budget lower than the requested one and under-utilize the CPU, thus
+losing performance. In other words, one OPP under-utilizes the CPU
+with a power less than the requested power budget and the next OPP
+exceeds the power budget. An intermediate OPP could have been used if
+it were present.
+
+Solutions:
+----------
+
+If we can remove the static and the dynamic leakage for a specific
+duration in a controlled period, the SoC temperature will
+decrease. Acting on the idle state duration or the idle cycle
+injection period, we can mitigate the temperature by modulating the
+power budget.
+
+The Operating Performance Point (OPP) density has a great influence on
+the control precision of cpufreq, however different vendors have a
+plethora of OPP density, and some have large power gap between OPPs,
+that will result in loss of performance during thermal control and
+loss of power in other scenarios.
+
+At a specific OPP, we can assume that injecting idle cycle on all CPUs
+belong to the same cluster, with a duration greater than the cluster
+idle state target residency, we lead to dropping the static and the
+dynamic leakage for this period (modulo the energy needed to enter
+this state). So the sustainable power with idle cycles has a linear
+relation with the OPP’s sustainable power and can be computed with a
+coefficient similar to:
+
+	    Power(IdleCycle) = Coef x Power(OPP)
+
+Idle Injection:
+---------------
+
+The base concept of the idle injection is to force the CPU to go to an
+idle state for a specified time each control cycle, it provides
+another way to control CPU power and heat in addition to
+cpufreq. Ideally, if all CPUs belonging to the same cluster, inject
+their idle cycles synchronously, the cluster can reach its power down
+state with a minimum power consumption and reduce the static leakage
+to almost zero.  However, these idle cycles injection will add extra
+latencies as the CPUs will have to wakeup from a deep sleep state.
+
+We use a fixed duration of idle injection that gives an acceptable
+performance penalty and a fixed latency. Mitigation can be increased
+or decreased by modulating the duty cycle of the idle injection.
+
+     ^
+     |
+     |
+     |-------                         -------
+     |_______|_______________________|_______|___________
+
+     <------>
+       idle  <---------------------->
+                    running
+
+      <----------------------------->
+              duty cycle 25%
+
+
+The implementation of the cooling device bases the number of states on
+the duty cycle percentage. When no mitigation is happening the cooling
+device state is zero, meaning the duty cycle is 0%.
+
+When the mitigation begins, depending on the governor's policy, a
+starting state is selected. With a fixed idle duration and the duty
+cycle (aka the cooling device state), the running duration can be
+computed.
+
+The governor will change the cooling device state thus the duty cycle
+and this variation will modulate the cooling effect.
+
+     ^
+     |
+     |
+     |-------                 -------
+     |_______|_______________|_______|___________
+
+     <------>
+       idle  <-------------->
+                running
+
+      <----------------------------->
+              duty cycle 33%
+
+
+     ^
+     |
+     |
+     |-------         -------
+     |_______|_______|_______|___________
+
+     <------>
+       idle  <------>
+              running
+
+      <------------->
+       duty cycle 50%
+
+The idle injection duration value must comply with the constraints:
+
+- It is less than or equal to the latency we tolerate when the
+  mitigation begins. It is platform dependent and will depend on the
+  user experience, reactivity vs performance trade off we want. This
+  value should be specified.
+
+- It is greater than the idle state’s target residency we want to go
+  for thermal mitigation, otherwise we end up consuming more energy.
+
+Power considerations
+--------------------
+
+When we reach the thermal trip point, we have to sustain a specified
+power for a specific temperature but at this time we consume:
+
+ Power = Capacitance x Voltage^2 x Frequency x Utilisation
+
+... which is more than the sustainable power (or there is something
+wrong in the system setup). The ‘Capacitance’ and ‘Utilisation’ are a
+fixed value, ‘Voltage’ and the ‘Frequency’ are fixed artificially
+because we don’t want to change the OPP. We can group the
+‘Capacitance’ and the ‘Utilisation’ into a single term which is the
+‘Dynamic Power Coefficient (Cdyn)’ Simplifying the above, we have:
+
+ Pdyn = Cdyn x Voltage^2 x Frequency
+
+The power allocator governor will ask us somehow to reduce our power
+in order to target the sustainable power defined in the device
+tree. So with the idle injection mechanism, we want an average power
+(Ptarget) resulting in an amount of time running at full power on a
+specific OPP and idle another amount of time. That could be put in a
+equation:
+
+ P(opp)target = ((Trunning x (P(opp)running) + (Tidle x P(opp)idle)) /
+			(Trunning + Tidle)
+  ...
+
+ Tidle = Trunning x ((P(opp)running / P(opp)target) - 1)
+
+At this point if we know the running period for the CPU, that gives us
+the idle injection we need. Alternatively if we have the idle
+injection duration, we can compute the running duration with:
+
+ Trunning = Tidle / ((P(opp)running / P(opp)target) - 1)
+
+Practically, if the running power is less than the targeted power, we
+end up with a negative time value, so obviously the equation usage is
+bound to a power reduction, hence a higher OPP is needed to have the
+running power greater than the targeted power.
+
+However, in this demonstration we ignore three aspects:
+
+ * The static leakage is not defined here, we can introduce it in the
+   equation but assuming it will be zero most of the time as it is
+   difficult to get the values from the SoC vendors
+
+ * The idle state wake up latency (or entry + exit latency) is not
+   taken into account, it must be added in the equation in order to
+   rigorously compute the idle injection
+
+ * The injected idle duration must be greater than the idle state
+   target residency, otherwise we end up consuming more energy and
+   potentially invert the mitigation effect
+
+So the final equation is:
+
+ Trunning = (Tidle - Twakeup ) x
+		(((P(opp)dyn + P(opp)static ) - P(opp)target) / P(opp)target )
diff --git a/Documentation/driver-api/thermal/exynos_thermal.rst b/Documentation/driver-api/thermal/exynos_thermal.rst
index 5bd556566c70..764df4ab584d 100644
--- a/Documentation/driver-api/thermal/exynos_thermal.rst
+++ b/Documentation/driver-api/thermal/exynos_thermal.rst
@@ -4,7 +4,7 @@ Kernel driver exynos_tmu
 
 Supported chips:
 
-* ARM SAMSUNG EXYNOS4, EXYNOS5 series of SoC
+* ARM Samsung Exynos4, Exynos5 series of SoC
 
   Datasheet: Not publicly available
 
@@ -14,7 +14,7 @@ Authors: Amit Daniel <amit.daniel@samsung.com>
 TMU controller Description:
 ---------------------------
 
-This driver allows to read temperature inside SAMSUNG EXYNOS4/5 series of SoC.
+This driver allows to read temperature inside Samsung Exynos4/5 series of SoC.
 
 The chip only exposes the measured 8-bit temperature code value
 through a register.
@@ -43,7 +43,7 @@ The three equations are:
        Trimming info for 85 degree Celsius (stored at TRIMINFO register)
        Temperature code measured at 85 degree Celsius which is unchanged
 
-TMU(Thermal Management Unit) in EXYNOS4/5 generates interrupt
+TMU(Thermal Management Unit) in Exynos4/5 generates interrupt
 when temperature exceeds pre-defined levels.
 The maximum number of configurable threshold is five.
 The threshold levels are defined as follows::
@@ -67,7 +67,7 @@ TMU driver description:
 The exynos thermal driver is structured as::
 
 					Kernel Core thermal framework
-				(thermal_core.c, step_wise.c, cpu_cooling.c)
+				(thermal_core.c, step_wise.c, cpufreq_cooling.c)
 								^
 								|
 								|
diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst
index fab2c9b36d08..b40b1f839148 100644
--- a/Documentation/driver-api/thermal/sysfs-api.rst
+++ b/Documentation/driver-api/thermal/sysfs-api.rst
@@ -725,24 +725,10 @@ method, the sys I/F structure will be built like this::
     |---temp1_input:		37000
     |---temp1_crit:		100000
 
-4. Event Notification
+4. Export Symbol APIs
 =====================
 
-The framework includes a simple notification mechanism, in the form of a
-netlink event. Netlink socket initialization is done during the _init_
-of the framework. Drivers which intend to use the notification mechanism
-just need to call thermal_generate_netlink_event() with two arguments viz
-(originator, event). The originator is a pointer to struct thermal_zone_device
-from where the event has been originated. An integer which represents the
-thermal zone device will be used in the message to identify the zone. The
-event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL,
-THERMAL_DEV_FAULT}. Notification can be sent when the current temperature
-crosses any of the configured thresholds.
-
-5. Export Symbol APIs
-=====================
-
-5.1. get_tz_trend
+4.1. get_tz_trend
 -----------------
 
 This function returns the trend of a thermal zone, i.e the rate of change
@@ -751,14 +737,14 @@ are supposed to implement the callback. If they don't, the thermal
 framework calculated the trend by comparing the previous and the current
 temperature values.
 
-5.2. get_thermal_instance
+4.2. get_thermal_instance
 -------------------------
 
 This function returns the thermal_instance corresponding to a given
 {thermal_zone, cooling_device, trip_point} combination. Returns NULL
 if such an instance does not exist.
 
-5.3. thermal_notify_framework
+4.3. thermal_notify_framework
 -----------------------------
 
 This function handles the trip events from sensor drivers. It starts
@@ -768,14 +754,14 @@ and does actual throttling for other trip points i.e ACTIVE and PASSIVE.
 The throttling policy is based on the configured platform data; if no
 platform data is provided, this uses the step_wise throttling policy.
 
-5.4. thermal_cdev_update
+4.4. thermal_cdev_update
 ------------------------
 
 This function serves as an arbitrator to set the state of a cooling
 device. It sets the cooling device to the deepest cooling state if
 possible.
 
-6. thermal_emergency_poweroff
+5. thermal_emergency_poweroff
 =============================
 
 On an event of critical trip temperature crossing. Thermal framework
diff --git a/Documentation/fb/fbcon.rst b/Documentation/fb/fbcon.rst
index ebca41785abe..e57a3d1d085a 100644
--- a/Documentation/fb/fbcon.rst
+++ b/Documentation/fb/fbcon.rst
@@ -127,7 +127,7 @@ C. Boot options
 	is typically located on the same video card.  Thus, the consoles that
 	are controlled by the VGA console will be garbled.
 
-4. fbcon=rotate:<n>
+5. fbcon=rotate:<n>
 
 	This option changes the orientation angle of the console display. The
 	value 'n' accepts the following:
@@ -152,21 +152,21 @@ C. Boot options
 	Actually, the underlying fb driver is totally ignorant of console
 	rotation.
 
-5. fbcon=margin:<color>
+6. fbcon=margin:<color>
 
 	This option specifies the color of the margins. The margins are the
 	leftover area at the right and the bottom of the screen that are not
 	used by text. By default, this area will be black. The 'color' value
 	is an integer number that depends on the framebuffer driver being used.
 
-6. fbcon=nodefer
+7. fbcon=nodefer
 
 	If the kernel is compiled with deferred fbcon takeover support, normally
 	the framebuffer contents, left in place by the firmware/bootloader, will
 	be preserved until there actually is some text is output to the console.
 	This option causes fbcon to bind immediately to the fbdev device.
 
-7. fbcon=logo-pos:<location>
+8. fbcon=logo-pos:<location>
 
 	The only possible 'location' is 'center' (without quotes), and when
 	given, the bootup logo is moved from the default top-left corner
@@ -174,6 +174,11 @@ C. Boot options
 	displayed due to multiple CPUs, the collected line of logos is moved
 	as a whole.
 
+9. fbcon=logo-count:<n>
+
+	The value 'n' overrides the number of bootup logos. 0 disables the
+	logo, and -1 gives the default which is the number of online CPUs.
+
 C. Attaching, Detaching and Unloading
 
 Before going on to how to attach, detach and unload the framebuffer console, an
diff --git a/Documentation/fb/modedb.rst b/Documentation/fb/modedb.rst
index 9c4e3fd39e6d..624d08fd2856 100644
--- a/Documentation/fb/modedb.rst
+++ b/Documentation/fb/modedb.rst
@@ -65,6 +65,9 @@ Valid options are::
   - reflect_y (boolean): Perform an axial symmetry on the Y axis
   - rotate (integer): Rotate the initial framebuffer by x
     degrees. Valid values are 0, 90, 180 and 270.
+  - panel_orientation, one of "normal", "upside_down", "left_side_up", or
+    "right_side_up". For KMS drivers only, this sets the "panel orientation"
+    property on the kms connector as hint for kms users.
 
 
 -----------------------------------------------------------------------------
diff --git a/Documentation/features/core/tracehook/arch-support.txt b/Documentation/features/core/tracehook/arch-support.txt
index d344b99aae1e..964667052eda 100644
--- a/Documentation/features/core/tracehook/arch-support.txt
+++ b/Documentation/features/core/tracehook/arch-support.txt
@@ -30,5 +30,5 @@
     |          um: | TODO |
     |   unicore32: | TODO |
     |         x86: |  ok  |
-    |      xtensa: | TODO |
+    |      xtensa: |  ok  |
     -----------------------
diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt
index 059d58a549c7..6fb2b0671994 100644
--- a/Documentation/features/debug/gcov-profile-all/arch-support.txt
+++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: | TODO |
diff --git a/Documentation/filesystems/adfs.txt b/Documentation/filesystems/adfs.txt
index 5949766353f7..0baa8e8c1fc1 100644
--- a/Documentation/filesystems/adfs.txt
+++ b/Documentation/filesystems/adfs.txt
@@ -1,3 +1,27 @@
+Filesystems supported by ADFS
+-----------------------------
+
+The ADFS module supports the following Filecore formats which have:
+
+- new maps
+- new directories or big directories
+
+In terms of the named formats, this means we support:
+
+- E and E+, with or without boot block
+- F and F+
+
+We fully support reading files from these filesystems, and writing to
+existing files within their existing allocation.  Essentially, we do
+not support changing any of the filesystem metadata.
+
+This is intended to support loopback mounted Linux native filesystems
+on a RISC OS Filecore filesystem, but will allow the data within files
+to be changed.
+
+If write support (ADFS_FS_RW) is configured, we allow rudimentary
+directory updates, specifically updating the access mode and timestamp.
+
 Mount options for ADFS
 ----------------------
 
diff --git a/Documentation/filesystems/autofs.rst b/Documentation/filesystems/autofs.rst
new file mode 100644
index 000000000000..681c6a492bc0
--- /dev/null
+++ b/Documentation/filesystems/autofs.rst
@@ -0,0 +1,580 @@
+=====================
+autofs - how it works
+=====================
+
+Purpose
+=======
+
+The goal of autofs is to provide on-demand mounting and race free
+automatic unmounting of various other filesystems.  This provides two
+key advantages:
+
+1. There is no need to delay boot until all filesystems that
+   might be needed are mounted.  Processes that try to access those
+   slow filesystems might be delayed but other processes can
+   continue freely.  This is particularly important for
+   network filesystems (e.g. NFS) or filesystems stored on
+   media with a media-changing robot.
+
+2. The names and locations of filesystems can be stored in
+   a remote database and can change at any time.  The content
+   in that data base at the time of access will be used to provide
+   a target for the access.  The interpretation of names in the
+   filesystem can even be programmatic rather than database-backed,
+   allowing wildcards for example, and can vary based on the user who
+   first accessed a name.
+
+Context
+=======
+
+The "autofs" filesystem module is only one part of an autofs system.
+There also needs to be a user-space program which looks up names
+and mounts filesystems.  This will often be the "automount" program,
+though other tools including "systemd" can make use of "autofs".
+This document describes only the kernel module and the interactions
+required with any user-space program.  Subsequent text refers to this
+as the "automount daemon" or simply "the daemon".
+
+"autofs" is a Linux kernel module with provides the "autofs"
+filesystem type.  Several "autofs" filesystems can be mounted and they
+can each be managed separately, or all managed by the same daemon.
+
+Content
+=======
+
+An autofs filesystem can contain 3 sorts of objects: directories,
+symbolic links and mount traps.  Mount traps are directories with
+extra properties as described in the next section.
+
+Objects can only be created by the automount daemon: symlinks are
+created with a regular `symlink` system call, while directories and
+mount traps are created with `mkdir`.  The determination of whether a
+directory should be a mount trap is based on a master map. This master
+map is consulted by autofs to determine which directories are mount
+points. Mount points can be *direct*/*indirect*/*offset*.
+On most systems, the default master map is located at */etc/auto.master*.
+
+If neither the *direct* or *offset* mount options are given (so the
+mount is considered to be *indirect*), then the root directory is
+always a regular directory, otherwise it is a mount trap when it is
+empty and a regular directory when not empty.  Note that *direct* and
+*offset* are treated identically so a concise summary is that the root
+directory is a mount trap only if the filesystem is mounted *direct*
+and the root is empty.
+
+Directories created in the root directory are mount traps only if the
+filesystem is mounted *indirect* and they are empty.
+
+Directories further down the tree depend on the *maxproto* mount
+option and particularly whether it is less than five or not.
+When *maxproto* is five, no directories further down the
+tree are ever mount traps, they are always regular directories.  When
+the *maxproto* is four (or three), these directories are mount traps
+precisely when they are empty.
+
+So: non-empty (i.e. non-leaf) directories are never mount traps. Empty
+directories are sometimes mount traps, and sometimes not depending on
+where in the tree they are (root, top level, or lower), the *maxproto*,
+and whether the mount was *indirect* or not.
+
+Mount Traps
+===========
+
+A core element of the implementation of autofs is the Mount Traps
+which are provided by the Linux VFS.  Any directory provided by a
+filesystem can be designated as a trap.  This involves two separate
+features that work together to allow autofs to do its job.
+
+**DCACHE_NEED_AUTOMOUNT**
+
+If a dentry has the DCACHE_NEED_AUTOMOUNT flag set (which gets set if
+the inode has S_AUTOMOUNT set, or can be set directly) then it is
+(potentially) a mount trap.  Any access to this directory beyond a
+"`stat`" will (normally) cause the `d_op->d_automount()` dentry operation
+to be called. The task of this method is to find the filesystem that
+should be mounted on the directory and to return it.  The VFS is
+responsible for actually mounting the root of this filesystem on the
+directory.
+
+autofs doesn't find the filesystem itself but sends a message to the
+automount daemon asking it to find and mount the filesystem.  The
+autofs `d_automount` method then waits for the daemon to report that
+everything is ready.  It will then return "`NULL`" indicating that the
+mount has already happened.  The VFS doesn't try to mount anything but
+follows down the mount that is already there.
+
+This functionality is sufficient for some users of mount traps such
+as NFS which creates traps so that mountpoints on the server can be
+reflected on the client.  However it is not sufficient for autofs.  As
+mounting onto a directory is considered to be "beyond a `stat`", the
+automount daemon would not be able to mount a filesystem on the 'trap'
+directory without some way to avoid getting caught in the trap.  For
+that purpose there is another flag.
+
+**DCACHE_MANAGE_TRANSIT**
+
+If a dentry has DCACHE_MANAGE_TRANSIT set then two very different but
+related behaviours are invoked, both using the `d_op->d_manage()`
+dentry operation.
+
+Firstly, before checking to see if any filesystem is mounted on the
+directory, d_manage() will be called with the `rcu_walk` parameter set
+to `false`.  It may return one of three things:
+
+-  A return value of zero indicates that there is nothing special
+   about this dentry and normal checks for mounts and automounts
+   should proceed.
+
+   autofs normally returns zero, but first waits for any
+   expiry (automatic unmounting of the mounted filesystem) to
+   complete.  This avoids races.
+
+-  A return value of `-EISDIR` tells the VFS to ignore any mounts
+   on the directory and to not consider calling `->d_automount()`.
+   This effectively disables the **DCACHE_NEED_AUTOMOUNT** flag
+   causing the directory not be a mount trap after all.
+
+   autofs returns this if it detects that the process performing the
+   lookup is the automount daemon and that the mount has been
+   requested but has not yet completed.  How it determines this is
+   discussed later.  This allows the automount daemon not to get
+   caught in the mount trap.
+
+   There is a subtlety here.  It is possible that a second autofs
+   filesystem can be mounted below the first and for both of them to
+   be managed by the same daemon.  For the daemon to be able to mount
+   something on the second it must be able to "walk" down past the
+   first.  This means that d_manage cannot *always* return -EISDIR for
+   the automount daemon.  It must only return it when a mount has
+   been requested, but has not yet completed.
+
+   `d_manage` also returns `-EISDIR` if the dentry shouldn't be a
+   mount trap, either because it is a symbolic link or because it is
+   not empty.
+
+-  Any other negative value is treated as an error and returned
+   to the caller.
+
+   autofs can return
+
+   - -ENOENT if the automount daemon failed to mount anything,
+   - -ENOMEM if it ran out of memory,
+   - -EINTR if a signal arrived while waiting for expiry to
+     complete
+   - or any other error sent down by the automount daemon.
+
+
+The second use case only occurs during an "RCU-walk" and so `rcu_walk`
+will be set.
+
+An RCU-walk is a fast and lightweight process for walking down a
+filename path (i.e. it is like running on tip-toes).  RCU-walk cannot
+cope with all situations so when it finds a difficulty it falls back
+to "REF-walk", which is slower but more robust.
+
+RCU-walk will never call `->d_automount`; the filesystems must already
+be mounted or RCU-walk cannot handle the path.
+To determine if a mount-trap is safe for RCU-walk mode it calls
+`->d_manage()` with `rcu_walk` set to `true`.
+
+In this case `d_manage()` must avoid blocking and should avoid taking
+spinlocks if at all possible.  Its sole purpose is to determine if it
+would be safe to follow down into any mounted directory and the only
+reason that it might not be is if an expiry of the mount is
+underway.
+
+In the `rcu_walk` case, `d_manage()` cannot return -EISDIR to tell the
+VFS that this is a directory that doesn't require d_automount.  If
+`rcu_walk` sees a dentry with DCACHE_NEED_AUTOMOUNT set but nothing
+mounted, it *will* fall back to REF-walk.  `d_manage()` cannot make the
+VFS remain in RCU-walk mode, but can only tell it to get out of
+RCU-walk mode by returning `-ECHILD`.
+
+So `d_manage()`, when called with `rcu_walk` set, should either return
+-ECHILD if there is any reason to believe it is unsafe to enter the
+mounted filesystem, otherwise it should return 0.
+
+autofs will return `-ECHILD` if an expiry of the filesystem has been
+initiated or is being considered, otherwise it returns 0.
+
+
+Mountpoint expiry
+=================
+
+The VFS has a mechanism for automatically expiring unused mounts,
+much as it can expire any unused dentry information from the dcache.
+This is guided by the MNT_SHRINKABLE flag.  This only applies to
+mounts that were created by `d_automount()` returning a filesystem to be
+mounted.  As autofs doesn't return such a filesystem but leaves the
+mounting to the automount daemon, it must involve the automount daemon
+in unmounting as well.  This also means that autofs has more control
+over expiry.
+
+The VFS also supports "expiry" of mounts using the MNT_EXPIRE flag to
+the `umount` system call.  Unmounting with MNT_EXPIRE will fail unless
+a previous attempt had been made, and the filesystem has been inactive
+and untouched since that previous attempt.  autofs does not depend on
+this but has its own internal tracking of whether filesystems were
+recently used.  This allows individual names in the autofs directory
+to expire separately.
+
+With version 4 of the protocol, the automount daemon can try to
+unmount any filesystems mounted on the autofs filesystem or remove any
+symbolic links or empty directories any time it likes.  If the unmount
+or removal is successful the filesystem will be returned to the state
+it was before the mount or creation, so that any access of the name
+will trigger normal auto-mount processing.  In particular, `rmdir` and
+`unlink` do not leave negative entries in the dcache as a normal
+filesystem would, so an attempt to access a recently-removed object is
+passed to autofs for handling.
+
+With version 5, this is not safe except for unmounting from top-level
+directories.  As lower-level directories are never mount traps, other
+processes will see an empty directory as soon as the filesystem is
+unmounted.  So it is generally safest to use the autofs expiry
+protocol described below.
+
+Normally the daemon only wants to remove entries which haven't been
+used for a while.  For this purpose autofs maintains a "`last_used`"
+time stamp on each directory or symlink.  For symlinks it genuinely
+does record the last time the symlink was "used" or followed to find
+out where it points to.  For directories the field is used slightly
+differently.  The field is updated at mount time and during expire
+checks if it is found to be in use (ie. open file descriptor or
+process working directory) and during path walks. The update done
+during path walks prevents frequent expire and immediate mount of
+frequently accessed automounts. But in the case where a GUI continually
+access or an application frequently scans an autofs directory tree
+there can be an accumulation of mounts that aren't actually being
+used. To cater for this case the "`strictexpire`" autofs mount option
+can be used to avoid the "`last_used`" update on path walk thereby
+preventing this apparent inability to expire mounts that aren't
+really in use.
+
+The daemon is able to ask autofs if anything is due to be expired,
+using an `ioctl` as discussed later.  For a *direct* mount, autofs
+considers if the entire mount-tree can be unmounted or not.  For an
+*indirect* mount, autofs considers each of the names in the top level
+directory to determine if any of those can be unmounted and cleaned
+up.
+
+There is an option with indirect mounts to consider each of the leaves
+that has been mounted on instead of considering the top-level names.
+This was originally intended for compatibility with version 4 of autofs
+and should be considered as deprecated for Sun Format automount maps.
+However, it may be used again for amd format mount maps (which are
+generally indirect maps) because the amd automounter allows for the
+setting of an expire timeout for individual mounts. But there are
+some difficulties in making the needed changes for this.
+
+When autofs considers a directory it checks the `last_used` time and
+compares it with the "timeout" value set when the filesystem was
+mounted, though this check is ignored in some cases. It also checks if
+the directory or anything below it is in use.  For symbolic links,
+only the `last_used` time is ever considered.
+
+If both appear to support expiring the directory or symlink, an action
+is taken.
+
+There are two ways to ask autofs to consider expiry.  The first is to
+use the **AUTOFS_IOC_EXPIRE** ioctl.  This only works for indirect
+mounts.  If it finds something in the root directory to expire it will
+return the name of that thing.  Once a name has been returned the
+automount daemon needs to unmount any filesystems mounted below the
+name normally.  As described above, this is unsafe for non-toplevel
+mounts in a version-5 autofs.  For this reason the current `automount(8)`
+does not use this ioctl.
+
+The second mechanism uses either the **AUTOFS_DEV_IOCTL_EXPIRE_CMD** or
+the **AUTOFS_IOC_EXPIRE_MULTI** ioctl.  This will work for both direct and
+indirect mounts.  If it selects an object to expire, it will notify
+the daemon using the notification mechanism described below.  This
+will block until the daemon acknowledges the expiry notification.
+This implies that the "`EXPIRE`" ioctl must be sent from a different
+thread than the one which handles notification.
+
+While the ioctl is blocking, the entry is marked as "expiring" and
+`d_manage` will block until the daemon affirms that the unmount has
+completed (together with removing any directories that might have been
+necessary), or has been aborted.
+
+Communicating with autofs: detecting the daemon
+===============================================
+
+There are several forms of communication between the automount daemon
+and the filesystem.  As we have already seen, the daemon can create and
+remove directories and symlinks using normal filesystem operations.
+autofs knows whether a process requesting some operation is the daemon
+or not based on its process-group id number (see getpgid(1)).
+
+When an autofs filesystem is mounted the pgid of the mounting
+processes is recorded unless the "pgrp=" option is given, in which
+case that number is recorded instead.  Any request arriving from a
+process in that process group is considered to come from the daemon.
+If the daemon ever has to be stopped and restarted a new pgid can be
+provided through an ioctl as will be described below.
+
+Communicating with autofs: the event pipe
+=========================================
+
+When an autofs filesystem is mounted, the 'write' end of a pipe must
+be passed using the 'fd=' mount option.  autofs will write
+notification messages to this pipe for the daemon to respond to.
+For version 5, the format of the message is::
+
+	struct autofs_v5_packet {
+		struct autofs_packet_hdr hdr;
+		autofs_wqt_t wait_queue_token;
+		__u32 dev;
+		__u64 ino;
+		__u32 uid;
+		__u32 gid;
+		__u32 pid;
+		__u32 tgid;
+		__u32 len;
+		char name[NAME_MAX+1];
+        };
+
+And the format of the header is::
+
+	struct autofs_packet_hdr {
+		int proto_version;		/* Protocol version */
+		int type;			/* Type of packet */
+	};
+
+where the type is one of ::
+
+	autofs_ptype_missing_indirect
+	autofs_ptype_expire_indirect
+	autofs_ptype_missing_direct
+	autofs_ptype_expire_direct
+
+so messages can indicate that a name is missing (something tried to
+access it but it isn't there) or that it has been selected for expiry.
+
+The pipe will be set to "packet mode" (equivalent to passing
+`O_DIRECT`) to _pipe2(2)_ so that a read from the pipe will return at
+most one packet, and any unread portion of a packet will be discarded.
+
+The `wait_queue_token` is a unique number which can identify a
+particular request to be acknowledged.  When a message is sent over
+the pipe the affected dentry is marked as either "active" or
+"expiring" and other accesses to it block until the message is
+acknowledged using one of the ioctls below with the relevant
+`wait_queue_token`.
+
+Communicating with autofs: root directory ioctls
+================================================
+
+The root directory of an autofs filesystem will respond to a number of
+ioctls.  The process issuing the ioctl must have the CAP_SYS_ADMIN
+capability, or must be the automount daemon.
+
+The available ioctl commands are:
+
+- **AUTOFS_IOC_READY**:
+	a notification has been handled.  The argument
+	to the ioctl command is the "wait_queue_token" number
+	corresponding to the notification being acknowledged.
+- **AUTOFS_IOC_FAIL**:
+	similar to above, but indicates failure with
+	the error code `ENOENT`.
+- **AUTOFS_IOC_CATATONIC**:
+	Causes the autofs to enter "catatonic"
+	mode meaning that it stops sending notifications to the daemon.
+	This mode is also entered if a write to the pipe fails.
+- **AUTOFS_IOC_PROTOVER**:
+	This returns the protocol version in use.
+- **AUTOFS_IOC_PROTOSUBVER**:
+	Returns the protocol sub-version which
+	is really a version number for the implementation.
+- **AUTOFS_IOC_SETTIMEOUT**:
+	This passes a pointer to an unsigned
+	long.  The value is used to set the timeout for expiry, and
+	the current timeout value is stored back through the pointer.
+- **AUTOFS_IOC_ASKUMOUNT**:
+	Returns, in the pointed-to `int`, 1 if
+	the filesystem could be unmounted.  This is only a hint as
+	the situation could change at any instant.  This call can be
+	used to avoid a more expensive full unmount attempt.
+- **AUTOFS_IOC_EXPIRE**:
+	as described above, this asks if there is
+	anything suitable to expire.  A pointer to a packet::
+
+		struct autofs_packet_expire_multi {
+			struct autofs_packet_hdr hdr;
+			autofs_wqt_t wait_queue_token;
+			int len;
+			char name[NAME_MAX+1];
+		};
+
+	is required.  This is filled in with the name of something
+	that can be unmounted or removed.  If nothing can be expired,
+	`errno` is set to `EAGAIN`.  Even though a `wait_queue_token`
+	is present in the structure, no "wait queue" is established
+	and no acknowledgment is needed.
+- **AUTOFS_IOC_EXPIRE_MULTI**:
+	This is similar to
+	**AUTOFS_IOC_EXPIRE** except that it causes notification to be
+	sent to the daemon, and it blocks until the daemon acknowledges.
+	The argument is an integer which can contain two different flags.
+
+	**AUTOFS_EXP_IMMEDIATE** causes `last_used` time to be ignored
+	and objects are expired if the are not in use.
+
+	**AUTOFS_EXP_FORCED** causes the in use status to be ignored
+	and objects are expired ieven if they are in use. This assumes
+	that the daemon has requested this because it is capable of
+	performing the umount.
+
+	**AUTOFS_EXP_LEAVES** will select a leaf rather than a top-level
+	name to expire.  This is only safe when *maxproto* is 4.
+
+Communicating with autofs: char-device ioctls
+=============================================
+
+It is not always possible to open the root of an autofs filesystem,
+particularly a *direct* mounted filesystem.  If the automount daemon
+is restarted there is no way for it to regain control of existing
+mounts using any of the above communication channels.  To address this
+need there is a "miscellaneous" character device (major 10, minor 235)
+which can be used to communicate directly with the autofs filesystem.
+It requires CAP_SYS_ADMIN for access.
+
+The 'ioctl's that can be used on this device are described in a separate
+document `autofs-mount-control.txt`, and are summarised briefly here.
+Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure::
+
+        struct autofs_dev_ioctl {
+                __u32 ver_major;
+                __u32 ver_minor;
+                __u32 size;             /* total size of data passed in
+                                         * including this struct */
+                __s32 ioctlfd;          /* automount command fd */
+
+		/* Command parameters */
+		union {
+			struct args_protover		protover;
+			struct args_protosubver		protosubver;
+			struct args_openmount		openmount;
+			struct args_ready		ready;
+			struct args_fail		fail;
+			struct args_setpipefd		setpipefd;
+			struct args_timeout		timeout;
+			struct args_requester		requester;
+			struct args_expire		expire;
+			struct args_askumount		askumount;
+			struct args_ismountpoint	ismountpoint;
+		};
+
+                char path[0];
+        };
+
+For the **OPEN_MOUNT** and **IS_MOUNTPOINT** commands, the target
+filesystem is identified by the `path`.  All other commands identify
+the filesystem by the `ioctlfd` which is a file descriptor open on the
+root, and which can be returned by **OPEN_MOUNT**.
+
+The `ver_major` and `ver_minor` are in/out parameters which check that
+the requested version is supported, and report the maximum version
+that the kernel module can support.
+
+Commands are:
+
+- **AUTOFS_DEV_IOCTL_VERSION_CMD**:
+	does nothing, except validate and
+	set version numbers.
+- **AUTOFS_DEV_IOCTL_OPENMOUNT_CMD**:
+	return an open file descriptor
+	on the root of an autofs filesystem.  The filesystem is identified
+	by name and device number, which is stored in `openmount.devid`.
+	Device numbers for existing filesystems can be found in
+	`/proc/self/mountinfo`.
+- **AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD**:
+	same as `close(ioctlfd)`.
+- **AUTOFS_DEV_IOCTL_SETPIPEFD_CMD**:
+	if the filesystem is in
+	catatonic mode, this can provide the write end of a new pipe
+	in `setpipefd.pipefd` to re-establish communication with a daemon.
+	The process group of the calling process is used to identify the
+	daemon.
+- **AUTOFS_DEV_IOCTL_REQUESTER_CMD**:
+	`path` should be a
+	name within the filesystem that has been auto-mounted on.
+	On successful return, `requester.uid` and `requester.gid` will be
+	the UID and GID of the process which triggered that mount.
+- **AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD**:
+	Check if path is a
+	mountpoint of a particular type - see separate documentation for
+	details.
+
+- **AUTOFS_DEV_IOCTL_PROTOVER_CMD**
+- **AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD**
+- **AUTOFS_DEV_IOCTL_READY_CMD**
+- **AUTOFS_DEV_IOCTL_FAIL_CMD**
+- **AUTOFS_DEV_IOCTL_CATATONIC_CMD**
+- **AUTOFS_DEV_IOCTL_TIMEOUT_CMD**
+- **AUTOFS_DEV_IOCTL_EXPIRE_CMD**
+- **AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD**
+
+These all have the same
+function as the similarly named **AUTOFS_IOC** ioctls, except
+that **FAIL** can be given an explicit error number in `fail.status`
+instead of assuming `ENOENT`, and this **EXPIRE** command
+corresponds to **AUTOFS_IOC_EXPIRE_MULTI**.
+
+Catatonic mode
+==============
+
+As mentioned, an autofs mount can enter "catatonic" mode.  This
+happens if a write to the notification pipe fails, or if it is
+explicitly requested by an `ioctl`.
+
+When entering catatonic mode, the pipe is closed and any pending
+notifications are acknowledged with the error `ENOENT`.
+
+Once in catatonic mode attempts to access non-existing names will
+result in `ENOENT` while attempts to access existing directories will
+be treated in the same way as if they came from the daemon, so mount
+traps will not fire.
+
+When the filesystem is mounted a _uid_ and _gid_ can be given which
+set the ownership of directories and symbolic links.  When the
+filesystem is in catatonic mode, any process with a matching UID can
+create directories or symlinks in the root directory, but not in other
+directories.
+
+Catatonic mode can only be left via the
+**AUTOFS_DEV_IOCTL_OPENMOUNT_CMD** ioctl on the `/dev/autofs`.
+
+The "ignore" mount option
+=========================
+
+The "ignore" mount option can be used to provide a generic indicator
+to applications that the mount entry should be ignored when displaying
+mount information.
+
+In other OSes that provide autofs and that provide a mount list to user
+space based on the kernel mount list a no-op mount option ("ignore" is
+the one use on the most common OSes) is allowed so that autofs file
+system users can optionally use it.
+
+This is intended to be used by user space programs to exclude autofs
+mounts from consideration when reading the mounts list.
+
+autofs, name spaces, and shared mounts
+======================================
+
+With bind mounts and name spaces it is possible for an autofs
+filesystem to appear at multiple places in one or more filesystem
+name spaces.  For this to work sensibly, the autofs filesystem should
+always be mounted "shared". e.g. ::
+
+	mount --make-shared /autofs/mount/point
+
+The automount daemon is only able to manage a single mount location for
+an autofs filesystem and if mounts on that are not 'shared', other
+locations will not behave as expected.  In particular access to those
+other locations will likely result in the `ELOOP` error ::
+
+	Too many levels of symbolic links
diff --git a/Documentation/filesystems/autofs.txt b/Documentation/filesystems/autofs.txt
deleted file mode 100644
index 3af38c7fd26d..000000000000
--- a/Documentation/filesystems/autofs.txt
+++ /dev/null
@@ -1,559 +0,0 @@
-<head>
-<style> p { max-width:50em} ol, ul {max-width: 40em}</style>
-</head>
-
-autofs - how it works
-=====================
-
-Purpose
--------
-
-The goal of autofs is to provide on-demand mounting and race free
-automatic unmounting of various other filesystems.  This provides two
-key advantages:
-
-1. There is no need to delay boot until all filesystems that
-   might be needed are mounted.  Processes that try to access those
-   slow filesystems might be delayed but other processes can
-   continue freely.  This is particularly important for
-   network filesystems (e.g. NFS) or filesystems stored on
-   media with a media-changing robot.
-
-2. The names and locations of filesystems can be stored in
-   a remote database and can change at any time.  The content
-   in that data base at the time of access will be used to provide
-   a target for the access.  The interpretation of names in the
-   filesystem can even be programmatic rather than database-backed,
-   allowing wildcards for example, and can vary based on the user who
-   first accessed a name.
-
-Context
--------
-
-The "autofs" filesystem module is only one part of an autofs system.
-There also needs to be a user-space program which looks up names
-and mounts filesystems.  This will often be the "automount" program,
-though other tools including "systemd" can make use of "autofs".
-This document describes only the kernel module and the interactions
-required with any user-space program.  Subsequent text refers to this
-as the "automount daemon" or simply "the daemon".
-
-"autofs" is a Linux kernel module with provides the "autofs"
-filesystem type.  Several "autofs" filesystems can be mounted and they
-can each be managed separately, or all managed by the same daemon.
-
-Content
--------
-
-An autofs filesystem can contain 3 sorts of objects: directories,
-symbolic links and mount traps.  Mount traps are directories with
-extra properties as described in the next section.
-
-Objects can only be created by the automount daemon: symlinks are
-created with a regular `symlink` system call, while directories and
-mount traps are created with `mkdir`.  The determination of whether a
-directory should be a mount trap or not is quite _ad hoc_, largely for
-historical reasons, and is determined in part by the
-*direct*/*indirect*/*offset* mount options, and the *maxproto* mount option.
-
-If neither the *direct* or *offset* mount options are given (so the
-mount is considered to be *indirect*), then the root directory is
-always a regular directory, otherwise it is a mount trap when it is
-empty and a regular directory when not empty.  Note that *direct* and
-*offset* are treated identically so a concise summary is that the root
-directory is a mount trap only if the filesystem is mounted *direct*
-and the root is empty.
-
-Directories created in the root directory are mount traps only if the
-filesystem is mounted *indirect* and they are empty.
-
-Directories further down the tree depend on the *maxproto* mount
-option and particularly whether it is less than five or not.
-When *maxproto* is five, no directories further down the
-tree are ever mount traps, they are always regular directories.  When
-the *maxproto* is four (or three), these directories are mount traps
-precisely when they are empty.
-
-So: non-empty (i.e. non-leaf) directories are never mount traps. Empty
-directories are sometimes mount traps, and sometimes not depending on
-where in the tree they are (root, top level, or lower), the *maxproto*,
-and whether the mount was *indirect* or not.
-
-Mount Traps
----------------
-
-A core element of the implementation of autofs is the Mount Traps
-which are provided by the Linux VFS.  Any directory provided by a
-filesystem can be designated as a trap.  This involves two separate
-features that work together to allow autofs to do its job.
-
-**DCACHE_NEED_AUTOMOUNT**
-
-If a dentry has the DCACHE_NEED_AUTOMOUNT flag set (which gets set if
-the inode has S_AUTOMOUNT set, or can be set directly) then it is
-(potentially) a mount trap.  Any access to this directory beyond a
-"`stat`" will (normally) cause the `d_op->d_automount()` dentry operation
-to be called. The task of this method is to find the filesystem that
-should be mounted on the directory and to return it.  The VFS is
-responsible for actually mounting the root of this filesystem on the
-directory.
-
-autofs doesn't find the filesystem itself but sends a message to the
-automount daemon asking it to find and mount the filesystem.  The
-autofs `d_automount` method then waits for the daemon to report that
-everything is ready.  It will then return "`NULL`" indicating that the
-mount has already happened.  The VFS doesn't try to mount anything but
-follows down the mount that is already there.
-
-This functionality is sufficient for some users of mount traps such
-as NFS which creates traps so that mountpoints on the server can be
-reflected on the client.  However it is not sufficient for autofs.  As
-mounting onto a directory is considered to be "beyond a `stat`", the
-automount daemon would not be able to mount a filesystem on the 'trap'
-directory without some way to avoid getting caught in the trap.  For
-that purpose there is another flag.
-
-**DCACHE_MANAGE_TRANSIT**
-
-If a dentry has DCACHE_MANAGE_TRANSIT set then two very different but
-related behaviours are invoked, both using the `d_op->d_manage()`
-dentry operation.
-
-Firstly, before checking to see if any filesystem is mounted on the
-directory, d_manage() will be called with the `rcu_walk` parameter set
-to `false`.  It may return one of three things:
-
--  A return value of zero indicates that there is nothing special
-   about this dentry and normal checks for mounts and automounts
-   should proceed.
-
-   autofs normally returns zero, but first waits for any
-   expiry (automatic unmounting of the mounted filesystem) to
-   complete.  This avoids races.
-
--  A return value of `-EISDIR` tells the VFS to ignore any mounts
-   on the directory and to not consider calling `->d_automount()`.
-   This effectively disables the **DCACHE_NEED_AUTOMOUNT** flag
-   causing the directory not be a mount trap after all.
-
-   autofs returns this if it detects that the process performing the
-   lookup is the automount daemon and that the mount has been
-   requested but has not yet completed.  How it determines this is
-   discussed later.  This allows the automount daemon not to get
-   caught in the mount trap.
-
-   There is a subtlety here.  It is possible that a second autofs
-   filesystem can be mounted below the first and for both of them to
-   be managed by the same daemon.  For the daemon to be able to mount
-   something on the second it must be able to "walk" down past the
-   first.  This means that d_manage cannot *always* return -EISDIR for
-   the automount daemon.  It must only return it when a mount has
-   been requested, but has not yet completed.
-
-   `d_manage` also returns `-EISDIR` if the dentry shouldn't be a
-   mount trap, either because it is a symbolic link or because it is
-   not empty.
-
--  Any other negative value is treated as an error and returned
-   to the caller.
-
-   autofs can return
-
-   - -ENOENT if the automount daemon failed to mount anything,
-   - -ENOMEM if it ran out of memory,
-   - -EINTR if a signal arrived while waiting for expiry to
-     complete
-   - or any other error sent down by the automount daemon.
-
-
-The second use case only occurs during an "RCU-walk" and so `rcu_walk`
-will be set.
-
-An RCU-walk is a fast and lightweight process for walking down a
-filename path (i.e. it is like running on tip-toes).  RCU-walk cannot
-cope with all situations so when it finds a difficulty it falls back
-to "REF-walk", which is slower but more robust.
-
-RCU-walk will never call `->d_automount`; the filesystems must already
-be mounted or RCU-walk cannot handle the path.
-To determine if a mount-trap is safe for RCU-walk mode it calls
-`->d_manage()` with `rcu_walk` set to `true`.
-
-In this case `d_manage()` must avoid blocking and should avoid taking
-spinlocks if at all possible.  Its sole purpose is to determine if it
-would be safe to follow down into any mounted directory and the only
-reason that it might not be is if an expiry of the mount is
-underway.
-
-In the `rcu_walk` case, `d_manage()` cannot return -EISDIR to tell the
-VFS that this is a directory that doesn't require d_automount.  If
-`rcu_walk` sees a dentry with DCACHE_NEED_AUTOMOUNT set but nothing
-mounted, it *will* fall back to REF-walk.  `d_manage()` cannot make the
-VFS remain in RCU-walk mode, but can only tell it to get out of
-RCU-walk mode by returning `-ECHILD`.
-
-So `d_manage()`, when called with `rcu_walk` set, should either return
--ECHILD if there is any reason to believe it is unsafe to enter the
-mounted filesystem, otherwise it should return 0.
-
-autofs will return `-ECHILD` if an expiry of the filesystem has been
-initiated or is being considered, otherwise it returns 0.
-
-
-Mountpoint expiry
------------------
-
-The VFS has a mechanism for automatically expiring unused mounts,
-much as it can expire any unused dentry information from the dcache.
-This is guided by the MNT_SHRINKABLE flag.  This only applies to
-mounts that were created by `d_automount()` returning a filesystem to be
-mounted.  As autofs doesn't return such a filesystem but leaves the
-mounting to the automount daemon, it must involve the automount daemon
-in unmounting as well.  This also means that autofs has more control
-over expiry.
-
-The VFS also supports "expiry" of mounts using the MNT_EXPIRE flag to
-the `umount` system call.  Unmounting with MNT_EXPIRE will fail unless
-a previous attempt had been made, and the filesystem has been inactive
-and untouched since that previous attempt.  autofs does not depend on
-this but has its own internal tracking of whether filesystems were
-recently used.  This allows individual names in the autofs directory
-to expire separately.
-
-With version 4 of the protocol, the automount daemon can try to
-unmount any filesystems mounted on the autofs filesystem or remove any
-symbolic links or empty directories any time it likes.  If the unmount
-or removal is successful the filesystem will be returned to the state
-it was before the mount or creation, so that any access of the name
-will trigger normal auto-mount processing.  In particular, `rmdir` and
-`unlink` do not leave negative entries in the dcache as a normal
-filesystem would, so an attempt to access a recently-removed object is
-passed to autofs for handling.
-
-With version 5, this is not safe except for unmounting from top-level
-directories.  As lower-level directories are never mount traps, other
-processes will see an empty directory as soon as the filesystem is
-unmounted.  So it is generally safest to use the autofs expiry
-protocol described below.
-
-Normally the daemon only wants to remove entries which haven't been
-used for a while.  For this purpose autofs maintains a "`last_used`"
-time stamp on each directory or symlink.  For symlinks it genuinely
-does record the last time the symlink was "used" or followed to find
-out where it points to.  For directories the field is used slightly
-differently.  The field is updated at mount time and during expire
-checks if it is found to be in use (ie. open file descriptor or
-process working directory) and during path walks. The update done
-during path walks prevents frequent expire and immediate mount of
-frequently accessed automounts. But in the case where a GUI continually
-access or an application frequently scans an autofs directory tree
-there can be an accumulation of mounts that aren't actually being
-used. To cater for this case the "`strictexpire`" autofs mount option
-can be used to avoid the "`last_used`" update on path walk thereby
-preventing this apparent inability to expire mounts that aren't
-really in use.
-
-The daemon is able to ask autofs if anything is due to be expired,
-using an `ioctl` as discussed later.  For a *direct* mount, autofs
-considers if the entire mount-tree can be unmounted or not.  For an
-*indirect* mount, autofs considers each of the names in the top level
-directory to determine if any of those can be unmounted and cleaned
-up.
-
-There is an option with indirect mounts to consider each of the leaves
-that has been mounted on instead of considering the top-level names.
-This was originally intended for compatibility with version 4 of autofs
-and should be considered as deprecated for Sun Format automount maps.
-However, it may be used again for amd format mount maps (which are
-generally indirect maps) because the amd automounter allows for the
-setting of an expire timeout for individual mounts. But there are
-some difficulties in making the needed changes for this.
-
-When autofs considers a directory it checks the `last_used` time and
-compares it with the "timeout" value set when the filesystem was
-mounted, though this check is ignored in some cases. It also checks if
-the directory or anything below it is in use.  For symbolic links,
-only the `last_used` time is ever considered.
-
-If both appear to support expiring the directory or symlink, an action
-is taken.
-
-There are two ways to ask autofs to consider expiry.  The first is to
-use the **AUTOFS_IOC_EXPIRE** ioctl.  This only works for indirect
-mounts.  If it finds something in the root directory to expire it will
-return the name of that thing.  Once a name has been returned the
-automount daemon needs to unmount any filesystems mounted below the
-name normally.  As described above, this is unsafe for non-toplevel
-mounts in a version-5 autofs.  For this reason the current `automount(8)`
-does not use this ioctl.
-
-The second mechanism uses either the **AUTOFS_DEV_IOCTL_EXPIRE_CMD** or
-the **AUTOFS_IOC_EXPIRE_MULTI** ioctl.  This will work for both direct and
-indirect mounts.  If it selects an object to expire, it will notify
-the daemon using the notification mechanism described below.  This
-will block until the daemon acknowledges the expiry notification.
-This implies that the "`EXPIRE`" ioctl must be sent from a different
-thread than the one which handles notification.
-
-While the ioctl is blocking, the entry is marked as "expiring" and
-`d_manage` will block until the daemon affirms that the unmount has
-completed (together with removing any directories that might have been
-necessary), or has been aborted.
-
-Communicating with autofs: detecting the daemon
------------------------------------------------
-
-There are several forms of communication between the automount daemon
-and the filesystem.  As we have already seen, the daemon can create and
-remove directories and symlinks using normal filesystem operations.
-autofs knows whether a process requesting some operation is the daemon
-or not based on its process-group id number (see getpgid(1)).
-
-When an autofs filesystem is mounted the pgid of the mounting
-processes is recorded unless the "pgrp=" option is given, in which
-case that number is recorded instead.  Any request arriving from a
-process in that process group is considered to come from the daemon.
-If the daemon ever has to be stopped and restarted a new pgid can be
-provided through an ioctl as will be described below.
-
-Communicating with autofs: the event pipe
------------------------------------------
-
-When an autofs filesystem is mounted, the 'write' end of a pipe must
-be passed using the 'fd=' mount option.  autofs will write
-notification messages to this pipe for the daemon to respond to.
-For version 5, the format of the message is:
-
-        struct autofs_v5_packet {
-                int proto_version;                /* Protocol version */
-                int type;                        /* Type of packet */
-                autofs_wqt_t wait_queue_token;
-                __u32 dev;
-                __u64 ino;
-                __u32 uid;
-                __u32 gid;
-                __u32 pid;
-                __u32 tgid;
-                __u32 len;
-                char name[NAME_MAX+1];
-        };
-
-where the type is one of
-
-        autofs_ptype_missing_indirect
-        autofs_ptype_expire_indirect
-        autofs_ptype_missing_direct
-        autofs_ptype_expire_direct
-
-so messages can indicate that a name is missing (something tried to
-access it but it isn't there) or that it has been selected for expiry.
-
-The pipe will be set to "packet mode" (equivalent to passing
-`O_DIRECT`) to _pipe2(2)_ so that a read from the pipe will return at
-most one packet, and any unread portion of a packet will be discarded.
-
-The `wait_queue_token` is a unique number which can identify a
-particular request to be acknowledged.  When a message is sent over
-the pipe the affected dentry is marked as either "active" or
-"expiring" and other accesses to it block until the message is
-acknowledged using one of the ioctls below with the relevant
-`wait_queue_token`.
-
-Communicating with autofs: root directory ioctls
-------------------------------------------------
-
-The root directory of an autofs filesystem will respond to a number of
-ioctls.  The process issuing the ioctl must have the CAP_SYS_ADMIN
-capability, or must be the automount daemon.
-
-The available ioctl commands are:
-
-- **AUTOFS_IOC_READY**: a notification has been handled.  The argument
-    to the ioctl command is the "wait_queue_token" number
-    corresponding to the notification being acknowledged.
-- **AUTOFS_IOC_FAIL**: similar to above, but indicates failure with
-    the error code `ENOENT`.
-- **AUTOFS_IOC_CATATONIC**: Causes the autofs to enter "catatonic"
-    mode meaning that it stops sending notifications to the daemon.
-    This mode is also entered if a write to the pipe fails.
-- **AUTOFS_IOC_PROTOVER**:  This returns the protocol version in use.
-- **AUTOFS_IOC_PROTOSUBVER**: Returns the protocol sub-version which
-    is really a version number for the implementation.
-- **AUTOFS_IOC_SETTIMEOUT**:  This passes a pointer to an unsigned
-    long.  The value is used to set the timeout for expiry, and
-    the current timeout value is stored back through the pointer.
-- **AUTOFS_IOC_ASKUMOUNT**:  Returns, in the pointed-to `int`, 1 if
-    the filesystem could be unmounted.  This is only a hint as
-    the situation could change at any instant.  This call can be
-    used to avoid a more expensive full unmount attempt.
-- **AUTOFS_IOC_EXPIRE**: as described above, this asks if there is
-    anything suitable to expire.  A pointer to a packet:
-
-        struct autofs_packet_expire_multi {
-                int proto_version;              /* Protocol version */
-                int type;                       /* Type of packet */
-                autofs_wqt_t wait_queue_token;
-                int len;
-                char name[NAME_MAX+1];
-        };
-
-     is required.  This is filled in with the name of something
-     that can be unmounted or removed.  If nothing can be expired,
-     `errno` is set to `EAGAIN`.  Even though a `wait_queue_token`
-     is present in the structure, no "wait queue" is established
-     and no acknowledgment is needed.
-- **AUTOFS_IOC_EXPIRE_MULTI**:  This is similar to
-     **AUTOFS_IOC_EXPIRE** except that it causes notification to be
-     sent to the daemon, and it blocks until the daemon acknowledges.
-     The argument is an integer which can contain two different flags.
-
-     **AUTOFS_EXP_IMMEDIATE** causes `last_used` time to be ignored
-     and objects are expired if the are not in use.
-
-     **AUTOFS_EXP_FORCED** causes the in use status to be ignored
-     and objects are expired ieven if they are in use. This assumes
-     that the daemon has requested this because it is capable of
-     performing the umount.
-
-     **AUTOFS_EXP_LEAVES** will select a leaf rather than a top-level
-     name to expire.  This is only safe when *maxproto* is 4.
-
-Communicating with autofs: char-device ioctls
----------------------------------------------
-
-It is not always possible to open the root of an autofs filesystem,
-particularly a *direct* mounted filesystem.  If the automount daemon
-is restarted there is no way for it to regain control of existing
-mounts using any of the above communication channels.  To address this
-need there is a "miscellaneous" character device (major 10, minor 235)
-which can be used to communicate directly with the autofs filesystem.
-It requires CAP_SYS_ADMIN for access.
-
-The `ioctl`s that can be used on this device are described in a separate
-document `autofs-mount-control.txt`, and are summarised briefly here.
-Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure:
-
-        struct autofs_dev_ioctl {
-                __u32 ver_major;
-                __u32 ver_minor;
-                __u32 size;             /* total size of data passed in
-                                         * including this struct */
-                __s32 ioctlfd;          /* automount command fd */
-
-		/* Command parameters */
-		union {
-			struct args_protover		protover;
-			struct args_protosubver		protosubver;
-			struct args_openmount		openmount;
-			struct args_ready		ready;
-			struct args_fail		fail;
-			struct args_setpipefd		setpipefd;
-			struct args_timeout		timeout;
-			struct args_requester		requester;
-			struct args_expire		expire;
-			struct args_askumount		askumount;
-			struct args_ismountpoint	ismountpoint;
-		};
-
-                char path[0];
-        };
-
-For the **OPEN_MOUNT** and **IS_MOUNTPOINT** commands, the target
-filesystem is identified by the `path`.  All other commands identify
-the filesystem by the `ioctlfd` which is a file descriptor open on the
-root, and which can be returned by **OPEN_MOUNT**.
-
-The `ver_major` and `ver_minor` are in/out parameters which check that
-the requested version is supported, and report the maximum version
-that the kernel module can support.
-
-Commands are:
-
-- **AUTOFS_DEV_IOCTL_VERSION_CMD**: does nothing, except validate and
-    set version numbers.
-- **AUTOFS_DEV_IOCTL_OPENMOUNT_CMD**: return an open file descriptor
-    on the root of an autofs filesystem.  The filesystem is identified
-    by name and device number, which is stored in `openmount.devid`.
-    Device numbers for existing filesystems can be found in
-    `/proc/self/mountinfo`.
-- **AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD**: same as `close(ioctlfd)`.
-- **AUTOFS_DEV_IOCTL_SETPIPEFD_CMD**: if the filesystem is in
-    catatonic mode, this can provide the write end of a new pipe
-    in `setpipefd.pipefd` to re-establish communication with a daemon.
-    The process group of the calling process is used to identify the
-    daemon.
-- **AUTOFS_DEV_IOCTL_REQUESTER_CMD**: `path` should be a
-    name within the filesystem that has been auto-mounted on.
-    On successful return, `requester.uid` and `requester.gid` will be
-    the UID and GID of the process which triggered that mount.
-- **AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD**: Check if path is a
-    mountpoint of a particular type - see separate documentation for
-    details.
-- **AUTOFS_DEV_IOCTL_PROTOVER_CMD**:
-- **AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD**:
-- **AUTOFS_DEV_IOCTL_READY_CMD**:
-- **AUTOFS_DEV_IOCTL_FAIL_CMD**:
-- **AUTOFS_DEV_IOCTL_CATATONIC_CMD**:
-- **AUTOFS_DEV_IOCTL_TIMEOUT_CMD**:
-- **AUTOFS_DEV_IOCTL_EXPIRE_CMD**:
-- **AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD**:  These all have the same
-    function as the similarly named **AUTOFS_IOC** ioctls, except
-    that **FAIL** can be given an explicit error number in `fail.status`
-    instead of assuming `ENOENT`, and this **EXPIRE** command
-    corresponds to **AUTOFS_IOC_EXPIRE_MULTI**.
-
-Catatonic mode
---------------
-
-As mentioned, an autofs mount can enter "catatonic" mode.  This
-happens if a write to the notification pipe fails, or if it is
-explicitly requested by an `ioctl`.
-
-When entering catatonic mode, the pipe is closed and any pending
-notifications are acknowledged with the error `ENOENT`.
-
-Once in catatonic mode attempts to access non-existing names will
-result in `ENOENT` while attempts to access existing directories will
-be treated in the same way as if they came from the daemon, so mount
-traps will not fire.
-
-When the filesystem is mounted a _uid_ and _gid_ can be given which
-set the ownership of directories and symbolic links.  When the
-filesystem is in catatonic mode, any process with a matching UID can
-create directories or symlinks in the root directory, but not in other
-directories.
-
-Catatonic mode can only be left via the
-**AUTOFS_DEV_IOCTL_OPENMOUNT_CMD** ioctl on the `/dev/autofs`.
-
-The "ignore" mount option
--------------------------
-
-The "ignore" mount option can be used to provide a generic indicator
-to applications that the mount entry should be ignored when displaying
-mount information.
-
-In other OSes that provide autofs and that provide a mount list to user
-space based on the kernel mount list a no-op mount option ("ignore" is
-the one use on the most common OSes) is allowed so that autofs file
-system users can optionally use it.
-
-This is intended to be used by user space programs to exclude autofs
-mounts from consideration when reading the mounts list.
-
-autofs, name spaces, and shared mounts
---------------------------------------
-
-With bind mounts and name spaces it is possible for an autofs
-filesystem to appear at multiple places in one or more filesystem
-name spaces.  For this to work sensibly, the autofs filesystem should
-always be mounted "shared". e.g.
-
-> `mount --make-shared /autofs/mount/point`
-
-The automount daemon is only able to manage a single mount location for
-an autofs filesystem and if mounts on that are not 'shared', other
-locations will not behave as expected.  In particular access to those
-other locations will likely result in the `ELOOP` error
-
-> Too many levels of symbolic links
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt
index b0afd3d55eaf..7d9f82607562 100644
--- a/Documentation/filesystems/automount-support.txt
+++ b/Documentation/filesystems/automount-support.txt
@@ -9,7 +9,7 @@ also be requested by userspace.
 IN-KERNEL AUTOMOUNTING
 ======================
 
-See section "Mount Traps" of  Documentation/filesystems/autofs.txt
+See section "Mount Traps" of  Documentation/filesystems/autofs.rst
 
 Then from userspace, you can just do something like:
 
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
index 9e27c843d00e..dc497b96fa4f 100644
--- a/Documentation/filesystems/debugfs.txt
+++ b/Documentation/filesystems/debugfs.txt
@@ -68,41 +68,49 @@ actually necessary; the debugfs code provides a number of helper functions
 for simple situations.  Files containing a single integer value can be
 created with any of:
 
-    struct dentry *debugfs_create_u8(const char *name, umode_t mode,
-				     struct dentry *parent, u8 *value);
-    struct dentry *debugfs_create_u16(const char *name, umode_t mode,
-				      struct dentry *parent, u16 *value);
+    void debugfs_create_u8(const char *name, umode_t mode,
+			   struct dentry *parent, u8 *value);
+    void debugfs_create_u16(const char *name, umode_t mode,
+			    struct dentry *parent, u16 *value);
     struct dentry *debugfs_create_u32(const char *name, umode_t mode,
 				      struct dentry *parent, u32 *value);
-    struct dentry *debugfs_create_u64(const char *name, umode_t mode,
-				      struct dentry *parent, u64 *value);
+    void debugfs_create_u64(const char *name, umode_t mode,
+			    struct dentry *parent, u64 *value);
 
 These files support both reading and writing the given value; if a specific
 file should not be written to, simply set the mode bits accordingly.  The
 values in these files are in decimal; if hexadecimal is more appropriate,
 the following functions can be used instead:
 
-    struct dentry *debugfs_create_x8(const char *name, umode_t mode,
-				     struct dentry *parent, u8 *value);
-    struct dentry *debugfs_create_x16(const char *name, umode_t mode,
-				      struct dentry *parent, u16 *value);
-    struct dentry *debugfs_create_x32(const char *name, umode_t mode,
-				      struct dentry *parent, u32 *value);
-    struct dentry *debugfs_create_x64(const char *name, umode_t mode,
-				      struct dentry *parent, u64 *value);
+    void debugfs_create_x8(const char *name, umode_t mode,
+			   struct dentry *parent, u8 *value);
+    void debugfs_create_x16(const char *name, umode_t mode,
+			    struct dentry *parent, u16 *value);
+    void debugfs_create_x32(const char *name, umode_t mode,
+			    struct dentry *parent, u32 *value);
+    void debugfs_create_x64(const char *name, umode_t mode,
+			    struct dentry *parent, u64 *value);
 
 These functions are useful as long as the developer knows the size of the
 value to be exported.  Some types can have different widths on different
-architectures, though, complicating the situation somewhat.  There is a
-function meant to help out in one special case:
+architectures, though, complicating the situation somewhat.  There are
+functions meant to help out in such special cases:
 
-    struct dentry *debugfs_create_size_t(const char *name, umode_t mode,
-				         struct dentry *parent, 
-					 size_t *value);
+    void debugfs_create_size_t(const char *name, umode_t mode,
+			       struct dentry *parent, size_t *value);
 
 As might be expected, this function will create a debugfs file to represent
 a variable of type size_t.
 
+Similarly, there are helpers for variables of type unsigned long, in decimal
+and hexadecimal:
+
+    struct dentry *debugfs_create_ulong(const char *name, umode_t mode,
+					struct dentry *parent,
+					unsigned long *value);
+    void debugfs_create_xul(const char *name, umode_t mode,
+			    struct dentry *parent, unsigned long *value);
+
 Boolean values can be placed in debugfs with:
 
     struct dentry *debugfs_create_bool(const char *name, umode_t mode,
@@ -114,8 +122,8 @@ lower-case values, or 1 or 0.  Any other input will be silently ignored.
 
 Also, atomic_t values can be placed in debugfs with:
 
-    struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode,
-				struct dentry *parent, atomic_t *value)
+    void debugfs_create_atomic_t(const char *name, umode_t mode,
+				 struct dentry *parent, atomic_t *value)
 
 A read of this file will get atomic_t values, and a write of this file
 will set atomic_t values.
diff --git a/Documentation/filesystems/erofs.txt b/Documentation/filesystems/erofs.txt
index b0c085326e2e..db6d39c3ae71 100644
--- a/Documentation/filesystems/erofs.txt
+++ b/Documentation/filesystems/erofs.txt
@@ -24,11 +24,11 @@ Here is the main features of EROFS:
  - Metadata & data could be mixed by design;
 
  - 2 inode versions for different requirements:
-                          v1            v2
+                          compact (v1)  extended (v2)
    Inode metadata size:   32 bytes      64 bytes
    Max file size:         4 GB          16 EB (also limited by max. vol size)
    Max uids/gids:         65536         4294967296
-   File creation time:    no            yes (64 + 32-bit timestamp)
+   File change time:      no            yes (64 + 32-bit timestamp)
    Max hardlinks:         65536         4294967296
    Metadata reserved:     4 bytes       14 bytes
 
@@ -39,7 +39,7 @@ Here is the main features of EROFS:
  - Support POSIX.1e ACLs by using xattrs;
 
  - Support transparent file compression as an option:
-   LZ4 algorithm with 4 KB fixed-output compression for high performance;
+   LZ4 algorithm with 4 KB fixed-sized output compression for high performance.
 
 The following git tree provides the file system user-space tools under
 development (ex, formatting tool mkfs.erofs):
@@ -85,7 +85,7 @@ All data areas should be aligned with the block size, but metadata areas
 may not. All metadatas can be now observed in two different spaces (views):
  1. Inode metadata space
     Each valid inode should be aligned with an inode slot, which is a fixed
-    value (32 bytes) and designed to be kept in line with v1 inode size.
+    value (32 bytes) and designed to be kept in line with compact inode size.
 
     Each inode can be directly found with the following formula:
          inode offset = meta_blkaddr * block_size + 32 * nid
@@ -117,10 +117,10 @@ may not. All metadatas can be now observed in two different spaces (views):
                                                        |-> aligned with 4B
 
     Inode could be 32 or 64 bytes, which can be distinguished from a common
-    field which all inode versions have -- i_advise:
+    field which all inode versions have -- i_format:
 
         __________________               __________________
-       |     i_advise     |             |     i_advise     |
+       |     i_format     |             |     i_format     |
        |__________________|             |__________________|
        |        ...       |             |        ...       |
        |                  |             |                  |
@@ -129,12 +129,13 @@ may not. All metadatas can be now observed in two different spaces (views):
                                         |__________________| 64 bytes
 
     Xattrs, extents, data inline are followed by the corresponding inode with
-    proper alignes, and they could be optional for different data mappings,
-    _currently_ there are totally 3 valid data mappings supported:
+    proper alignment, and they could be optional for different data mappings.
+    _currently_ total 4 valid data mappings are supported:
 
-     1) flat file data without data inline (no extent);
-     2) fixed-output size data compression (must have extents);
-     3) flat file data with tail-end data inline (no extent);
+     0  flat file data without data inline (no extent);
+     1  fixed-sized output data compression (with non-compacted indexes);
+     2  flat file data with tail packing data inline (no extent);
+     3  fixed-sized output data compression (with compacted indexes, v5.3+).
 
     The size of the optional xattrs is indicated by i_xattr_count in inode
     header. Large xattrs or xattrs shared by many different files can be
@@ -182,8 +183,8 @@ introduce another on-disk field at all.
 
 Compression
 -----------
-Currently, EROFS supports 4KB fixed-output clustersize transparent file
-compression, as illustrated below:
+Currently, EROFS supports 4KB fixed-sized output transparent file compression,
+as illustrated below:
 
          |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
          clusterofs                      clusterofs            clusterofs
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 7e1991328473..4eb3e2ddd00e 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -235,6 +235,17 @@ checkpoint=%s[:%u[%]]     Set to "disable" to turn off checkpointing. Set to "en
                        hide up to all remaining free space. The actual space that
                        would be unusable can be viewed at /sys/fs/f2fs/<disk>/unusable
                        This space is reclaimed once checkpoint=enable.
+compress_algorithm=%s  Control compress algorithm, currently f2fs supports "lzo"
+                       and "lz4" algorithm.
+compress_log_size=%u   Support configuring compress cluster size, the size will
+                       be 4KB * (1 << %u), 16KB is minimum size, also it's
+                       default size.
+compress_extension=%s  Support adding specified extension, so that f2fs can enable
+                       compression on those corresponding files, e.g. if all files
+                       with '.ext' has high compression rate, we can set the '.ext'
+                       on compression extension list and enable compression on
+                       these file by default rather than to enable it via ioctl.
+                       For other files, we can still enable compression via ioctl.
 
 ================================================================================
 DEBUGFS ENTRIES
@@ -259,167 +270,6 @@ The files in each per-device directory are shown in table below.
 
 Files in /sys/fs/f2fs/<devname>
 (see also Documentation/ABI/testing/sysfs-fs-f2fs)
-..............................................................................
- File                         Content
-
- gc_urgent_sleep_time         This parameter controls sleep time for gc_urgent.
-                              500 ms is set by default. See above gc_urgent.
-
- gc_min_sleep_time            This tuning parameter controls the minimum sleep
-                              time for the garbage collection thread. Time is
-                              in milliseconds.
-
- gc_max_sleep_time            This tuning parameter controls the maximum sleep
-                              time for the garbage collection thread. Time is
-                              in milliseconds.
-
- gc_no_gc_sleep_time          This tuning parameter controls the default sleep
-                              time for the garbage collection thread. Time is
-                              in milliseconds.
-
- gc_idle                      This parameter controls the selection of victim
-                              policy for garbage collection. Setting gc_idle = 0
-                              (default) will disable this option. Setting
-                              gc_idle = 1 will select the Cost Benefit approach
-                              & setting gc_idle = 2 will select the greedy approach.
-
- gc_urgent                    This parameter controls triggering background GCs
-                              urgently or not. Setting gc_urgent = 0 [default]
-                              makes back to default behavior, while if it is set
-                              to 1, background thread starts to do GC by given
-                              gc_urgent_sleep_time interval.
-
- reclaim_segments             This parameter controls the number of prefree
-                              segments to be reclaimed. If the number of prefree
-			      segments is larger than the number of segments
-			      in the proportion to the percentage over total
-			      volume size, f2fs tries to conduct checkpoint to
-			      reclaim the prefree segments to free segments.
-			      By default, 5% over total # of segments.
-
- max_small_discards	      This parameter controls the number of discard
-			      commands that consist small blocks less than 2MB.
-			      The candidates to be discarded are cached until
-			      checkpoint is triggered, and issued during the
-			      checkpoint. By default, it is disabled with 0.
-
- discard_granularity	      This parameter controls the granularity of discard
-			      command size. It will issue discard commands iif
-			      the size is larger than given granularity. Its
-			      unit size is 4KB, and 4 (=16KB) is set by default.
-			      The maximum value is 128 (=512KB).
-
- reserved_blocks	      This parameter indicates the number of blocks that
-			      f2fs reserves internally for root.
-
- batched_trim_sections	      This parameter controls the number of sections
-                              to be trimmed out in batch mode when FITRIM
-                              conducts. 32 sections is set by default.
-
- ipu_policy                   This parameter controls the policy of in-place
-                              updates in f2fs. There are five policies:
-                               0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR,
-                               0x04: F2FS_IPU_UTIL,  0x08: F2FS_IPU_SSR_UTIL,
-                               0x10: F2FS_IPU_FSYNC.
-
- min_ipu_util                 This parameter controls the threshold to trigger
-                              in-place-updates. The number indicates percentage
-                              of the filesystem utilization, and used by
-                              F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies.
-
- min_fsync_blocks             This parameter controls the threshold to trigger
-                              in-place-updates when F2FS_IPU_FSYNC mode is set.
-			      The number indicates the number of dirty pages
-			      when fsync needs to flush on its call path. If
-			      the number is less than this value, it triggers
-			      in-place-updates.
-
- min_seq_blocks		      This parameter controls the threshold to serialize
-			      write IOs issued by multiple threads in parallel.
-
- min_hot_blocks		      This parameter controls the threshold to allocate
-			      a hot data log for pending data blocks to write.
-
- min_ssr_sections	      This parameter adds the threshold when deciding
-			      SSR block allocation. If this is large, SSR mode
-			      will be enabled early.
-
- ram_thresh                   This parameter controls the memory footprint used
-			      by free nids and cached nat entries. By default,
-			      10 is set, which indicates 10 MB / 1 GB RAM.
-
- ra_nid_pages		      When building free nids, F2FS reads NAT blocks
-			      ahead for speed up. Default is 0.
-
- dirty_nats_ratio	      Given dirty ratio of cached nat entries, F2FS
-			      determines flushing them in background.
-
- max_victim_search	      This parameter controls the number of trials to
-			      find a victim segment when conducting SSR and
-			      cleaning operations. The default value is 4096
-			      which covers 8GB block address range.
-
- migration_granularity	      For large-sized sections, F2FS can stop GC given
-			      this granularity instead of reclaiming entire
-			      section.
-
- dir_level                    This parameter controls the directory level to
-			      support large directory. If a directory has a
-			      number of files, it can reduce the file lookup
-			      latency by increasing this dir_level value.
-			      Otherwise, it needs to decrease this value to
-			      reduce the space overhead. The default value is 0.
-
- cp_interval		      F2FS tries to do checkpoint periodically, 60 secs
-			      by default.
-
- idle_interval		      F2FS detects system is idle, if there's no F2FS
-			      operations during given interval, 5 secs by
-			      default.
-
- discard_idle_interval	      F2FS detects the discard thread is idle, given
-			      time interval. Default is 5 secs.
-
- gc_idle_interval	      F2FS detects the GC thread is idle, given time
-			      interval. Default is 5 secs.
-
- umount_discard_timeout       When unmounting the disk, F2FS waits for finishing
-			      queued discard commands which can take huge time.
-			      This gives time out for it, 5 secs by default.
-
- iostat_enable		      This controls to enable/disable iostat in F2FS.
-
- readdir_ra		      This enables/disabled readahead of inode blocks
-			      in readdir, and default is enabled.
-
- gc_pin_file_thresh	      This indicates how many GC can be failed for the
-			      pinned file. If it exceeds this, F2FS doesn't
-			      guarantee its pinning state. 2048 trials is set
-			      by default.
-
- extension_list		      This enables to change extension_list for hot/cold
-			      files in runtime.
-
- inject_rate		      This controls injection rate of arbitrary faults.
-
- inject_type		      This controls injection type of arbitrary faults.
-
- dirty_segments 	      This shows # of dirty segments.
-
- lifetime_write_kbytes	      This shows # of data written to the disk.
-
- features		      This shows current features enabled on F2FS.
-
- current_reserved_blocks      This shows # of blocks currently reserved.
-
- unusable                     If checkpoint=disable, this shows the number of
-                              blocks that are unusable.
-                              If checkpoint=enable it shows the number of blocks
-                              that would be unusable if checkpoint=disable were
-                              to be set.
-
-encoding 	              This shows the encoding used for casefolding.
-                              If casefolding is not enabled, returns (none)
 
 ================================================================================
 USAGE
@@ -837,3 +687,44 @@ zero or random data, which is useful to the below scenario where:
  4. address = fibmap(fd, offset)
  5. open(blkdev)
  6. write(blkdev, address)
+
+Compression implementation
+--------------------------
+
+- New term named cluster is defined as basic unit of compression, file can
+be divided into multiple clusters logically. One cluster includes 4 << n
+(n >= 0) logical pages, compression size is also cluster size, each of
+cluster can be compressed or not.
+
+- In cluster metadata layout, one special block address is used to indicate
+cluster is compressed one or normal one, for compressed cluster, following
+metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs
+stores data including compress header and compressed data.
+
+- In order to eliminate write amplification during overwrite, F2FS only
+support compression on write-once file, data can be compressed only when
+all logical blocks in file are valid and cluster compress ratio is lower
+than specified threshold.
+
+- To enable compression on regular inode, there are three ways:
+* chattr +c file
+* chattr +c dir; touch dir/file
+* mount w/ -o compress_extension=ext; touch file.ext
+
+Compress metadata layout:
+                             [Dnode Structure]
+             +-----------------------------------------------+
+             | cluster 1 | cluster 2 | ......... | cluster N |
+             +-----------------------------------------------+
+             .           .                       .           .
+       .                       .                .                      .
+  .         Compressed Cluster       .        .        Normal Cluster            .
++----------+---------+---------+---------+  +---------+---------+---------+---------+
+|compr flag| block 1 | block 2 | block 3 |  | block 1 | block 2 | block 3 | block 4 |
++----------+---------+---------+---------+  +---------+---------+---------+---------+
+           .                             .
+         .                                           .
+       .                                                           .
+      +-------------+-------------+----------+----------------------------+
+      | data length | data chksum | reserved |      compressed data       |
+      +-------------+-------------+----------+----------------------------+
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 8a0700af9596..bd9932344804 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -234,8 +234,8 @@ HKDF is more flexible, is nonreversible, and evenly distributes
 entropy from the master key.  HKDF is also standardized and widely
 used by other software, whereas the AES-128-ECB based KDF is ad-hoc.
 
-Per-file keys
--------------
+Per-file encryption keys
+------------------------
 
 Since each master key can protect many files, it is necessary to
 "tweak" the encryption of each file so that the same plaintext in two
@@ -256,13 +256,8 @@ alternative master keys or to support rotating master keys.  Instead,
 the master keys may be wrapped in userspace, e.g. as is done by the
 `fscrypt <https://github.com/google/fscrypt>`_ tool.
 
-Including the inode number in the IVs was considered.  However, it was
-rejected as it would have prevented ext4 filesystems from being
-resized, and by itself still wouldn't have been sufficient to prevent
-the same key from being directly reused for both XTS and CTS-CBC.
-
-DIRECT_KEY and per-mode keys
-----------------------------
+DIRECT_KEY policies
+-------------------
 
 The Adiantum encryption mode (see `Encryption modes and usage`_) is
 suitable for both contents and filenames encryption, and it accepts
@@ -273,9 +268,9 @@ is greater than that of an AES-256-XTS key.
 Therefore, to improve performance and save memory, for Adiantum a
 "direct key" configuration is supported.  When the user has enabled
 this by setting FSCRYPT_POLICY_FLAG_DIRECT_KEY in the fscrypt policy,
-per-file keys are not used.  Instead, whenever any data (contents or
-filenames) is encrypted, the file's 16-byte nonce is included in the
-IV.  Moreover:
+per-file encryption keys are not used.  Instead, whenever any data
+(contents or filenames) is encrypted, the file's 16-byte nonce is
+included in the IV.  Moreover:
 
 - For v1 encryption policies, the encryption is done directly with the
   master key.  Because of this, users **must not** use the same master
@@ -285,6 +280,21 @@ IV.  Moreover:
   key derived using the KDF.  Users may use the same master key for
   other v2 encryption policies.
 
+IV_INO_LBLK_64 policies
+-----------------------
+
+When FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 is set in the fscrypt policy,
+the encryption keys are derived from the master key, encryption mode
+number, and filesystem UUID.  This normally results in all files
+protected by the same master key sharing a single contents encryption
+key and a single filenames encryption key.  To still encrypt different
+files' data differently, inode numbers are included in the IVs.
+Consequently, shrinking the filesystem may not be allowed.
+
+This format is optimized for use with inline encryption hardware
+compliant with the UFS or eMMC standards, which support only 64 IV
+bits per I/O request and may have only a small number of keyslots.
+
 Key identifiers
 ---------------
 
@@ -292,6 +302,16 @@ For master keys used for v2 encryption policies, a unique 16-byte "key
 identifier" is also derived using the KDF.  This value is stored in
 the clear, since it is needed to reliably identify the key itself.
 
+Dirhash keys
+------------
+
+For directories that are indexed using a secret-keyed dirhash over the
+plaintext filenames, the KDF is also used to derive a 128-bit
+SipHash-2-4 key per directory in order to hash filenames.  This works
+just like deriving a per-file encryption key, except that a different
+KDF context is used.  Currently, only casefolded ("case-insensitive")
+encrypted directories use this style of hashing.
+
 Encryption modes and usage
 ==========================
 
@@ -308,17 +328,18 @@ If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
 
 AES-128-CBC was added only for low-powered embedded devices with
 crypto accelerators such as CAAM or CESA that do not support XTS.  To
-use AES-128-CBC, CONFIG_CRYPTO_SHA256 (or another SHA-256
-implementation) must be enabled so that ESSIV can be used.
+use AES-128-CBC, CONFIG_CRYPTO_ESSIV and CONFIG_CRYPTO_SHA256 (or
+another SHA-256 implementation) must be enabled so that ESSIV can be
+used.
 
 Adiantum is a (primarily) stream cipher-based mode that is fast even
 on CPUs without dedicated crypto instructions.  It's also a true
 wide-block mode, unlike XTS.  It can also eliminate the need to derive
-per-file keys.  However, it depends on the security of two primitives,
-XChaCha12 and AES-256, rather than just one.  See the paper
-"Adiantum: length-preserving encryption for entry-level processors"
-(https://eprint.iacr.org/2018/720.pdf) for more details.  To use
-Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled.  Also, fast
+per-file encryption keys.  However, it depends on the security of two
+primitives, XChaCha12 and AES-256, rather than just one.  See the
+paper "Adiantum: length-preserving encryption for entry-level
+processors" (https://eprint.iacr.org/2018/720.pdf) for more details.
+To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled.  Also, fast
 implementations of ChaCha and NHPoly1305 should be enabled, e.g.
 CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.
 
@@ -331,8 +352,8 @@ Contents encryption
 -------------------
 
 For file contents, each filesystem block is encrypted independently.
-Currently, only the case where the filesystem block size is equal to
-the system's page size (usually 4096 bytes) is supported.
+Starting from Linux kernel 5.5, encryption of filesystems with block
+size less than system's page size is supported.
 
 Each block's IV is set to the logical block number within the file as
 a little endian number, except that:
@@ -341,10 +362,16 @@ a little endian number, except that:
   is encrypted with AES-256 where the AES-256 key is the SHA-256 hash
   of the file's data encryption key.
 
-- In the "direct key" configuration (FSCRYPT_POLICY_FLAG_DIRECT_KEY
-  set in the fscrypt_policy), the file's nonce is also appended to the
-  IV.  Currently this is only allowed with the Adiantum encryption
-  mode.
+- With `DIRECT_KEY policies`_, the file's nonce is appended to the IV.
+  Currently this is only allowed with the Adiantum encryption mode.
+
+- With `IV_INO_LBLK_64 policies`_, the logical block number is limited
+  to 32 bits and is placed in bits 0-31 of the IV.  The inode number
+  (which is also limited to 32 bits) is placed in bits 32-63.
+
+Note that because file logical block numbers are included in the IVs,
+filesystems must enforce that blocks are never shifted around within
+encrypted files, e.g. via "collapse range" or "insert range".
 
 Filenames encryption
 --------------------
@@ -354,10 +381,10 @@ the requirements to retain support for efficient directory lookups and
 filenames of up to 255 bytes, the same IV is used for every filename
 in a directory.
 
-However, each encrypted directory still uses a unique key; or
-alternatively (for the "direct key" configuration) has the file's
-nonce included in the IVs.  Thus, IV reuse is limited to within a
-single directory.
+However, each encrypted directory still uses a unique key, or
+alternatively has the file's nonce (for `DIRECT_KEY policies`_) or
+inode number (for `IV_INO_LBLK_64 policies`_) included in the IVs.
+Thus, IV reuse is limited to within a single directory.
 
 With CTS-CBC, the IV reuse means that when the plaintext filenames
 share a common prefix at least as long as the cipher block size (16
@@ -431,12 +458,15 @@ This structure must be initialized as follows:
   (1) for ``contents_encryption_mode`` and FSCRYPT_MODE_AES_256_CTS
   (4) for ``filenames_encryption_mode``.
 
-- ``flags`` must contain a value from ``<linux/fscrypt.h>`` which
-  identifies the amount of NUL-padding to use when encrypting
-  filenames.  If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32 (0x3).
-  Additionally, if the encryption modes are both
-  FSCRYPT_MODE_ADIANTUM, this can contain
-  FSCRYPT_POLICY_FLAG_DIRECT_KEY; see `DIRECT_KEY and per-mode keys`_.
+- ``flags`` contains optional flags from ``<linux/fscrypt.h>``:
+
+  - FSCRYPT_POLICY_FLAGS_PAD_*: The amount of NUL padding to use when
+    encrypting filenames.  If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32
+    (0x3).
+  - FSCRYPT_POLICY_FLAG_DIRECT_KEY: See `DIRECT_KEY policies`_.
+  - FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64: See `IV_INO_LBLK_64
+    policies`_.  This is mutually exclusive with DIRECT_KEY and is not
+    supported on v1 policies.
 
 - For v2 encryption policies, ``__reserved`` must be zeroed.
 
@@ -493,7 +523,9 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors:
 - ``EEXIST``: the file is already encrypted with an encryption policy
   different from the one specified
 - ``EINVAL``: an invalid encryption policy was specified (invalid
-  version, mode(s), or flags; or reserved bits were set)
+  version, mode(s), or flags; or reserved bits were set); or a v1
+  encryption policy was specified but the directory has the casefold
+  flag enabled (casefolding is incompatible with v1 policies).
 - ``ENOKEY``: a v2 encryption policy was specified, but the key with
   the specified ``master_key_identifier`` has not been added, nor does
   the process have the CAP_FOWNER capability in the initial user
@@ -618,7 +650,8 @@ follows::
     struct fscrypt_add_key_arg {
             struct fscrypt_key_specifier key_spec;
             __u32 raw_size;
-            __u32 __reserved[9];
+            __u32 key_id;
+            __u32 __reserved[8];
             __u8 raw[];
     };
 
@@ -635,6 +668,12 @@ follows::
             } u;
     };
 
+    struct fscrypt_provisioning_key_payload {
+            __u32 type;
+            __u32 __reserved;
+            __u8 raw[];
+    };
+
 :c:type:`struct fscrypt_add_key_arg` must be zeroed, then initialized
 as follows:
 
@@ -657,9 +696,26 @@ as follows:
   ``Documentation/security/keys/core.rst``).
 
 - ``raw_size`` must be the size of the ``raw`` key provided, in bytes.
+  Alternatively, if ``key_id`` is nonzero, this field must be 0, since
+  in that case the size is implied by the specified Linux keyring key.
+
+- ``key_id`` is 0 if the raw key is given directly in the ``raw``
+  field.  Otherwise ``key_id`` is the ID of a Linux keyring key of
+  type "fscrypt-provisioning" whose payload is a :c:type:`struct
+  fscrypt_provisioning_key_payload` whose ``raw`` field contains the
+  raw key and whose ``type`` field matches ``key_spec.type``.  Since
+  ``raw`` is variable-length, the total size of this key's payload
+  must be ``sizeof(struct fscrypt_provisioning_key_payload)`` plus the
+  raw key size.  The process must have Search permission on this key.
+
+  Most users should leave this 0 and specify the raw key directly.
+  The support for specifying a Linux keyring key is intended mainly to
+  allow re-adding keys after a filesystem is unmounted and re-mounted,
+  without having to store the raw keys in userspace memory.
 
 - ``raw`` is a variable-length field which must contain the actual
-  key, ``raw_size`` bytes long.
+  key, ``raw_size`` bytes long.  Alternatively, if ``key_id`` is
+  nonzero, then this field is unused.
 
 For v2 policy keys, the kernel keeps track of which user (identified
 by effective user ID) added the key, and only allows the key to be
@@ -681,11 +737,16 @@ FS_IOC_ADD_ENCRYPTION_KEY can fail with the following errors:
 
 - ``EACCES``: FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR was specified, but the
   caller does not have the CAP_SYS_ADMIN capability in the initial
-  user namespace
+  user namespace; or the raw key was specified by Linux key ID but the
+  process lacks Search permission on the key.
 - ``EDQUOT``: the key quota for this user would be exceeded by adding
   the key
 - ``EINVAL``: invalid key size or key specifier type, or reserved bits
   were set
+- ``EKEYREJECTED``: the raw key was specified by Linux key ID, but the
+  key has the wrong type
+- ``ENOKEY``: the raw key was specified by Linux key ID, but no key
+  exists with that ID
 - ``ENOTTY``: this type of filesystem does not implement encryption
 - ``EOPNOTSUPP``: the kernel was not configured with encryption
   support for this filesystem, or the filesystem superblock has not
@@ -955,9 +1016,9 @@ astute users may notice some differences in behavior:
 - Direct I/O is not supported on encrypted files.  Attempts to use
   direct I/O on such files will fall back to buffered I/O.
 
-- The fallocate operations FALLOC_FL_COLLAPSE_RANGE,
-  FALLOC_FL_INSERT_RANGE, and FALLOC_FL_ZERO_RANGE are not supported
-  on encrypted files and will fail with EOPNOTSUPP.
+- The fallocate operations FALLOC_FL_COLLAPSE_RANGE and
+  FALLOC_FL_INSERT_RANGE are not supported on encrypted files and will
+  fail with EOPNOTSUPP.
 
 - Online defragmentation of encrypted files is not supported.  The
   EXT4_IOC_MOVE_EXT and F2FS_IOC_MOVE_RANGE ioctls will fail with
@@ -1088,8 +1149,8 @@ The context structs contain the same information as the corresponding
 policy structs (see `Setting an encryption policy`_), except that the
 context structs also contain a nonce.  The nonce is randomly generated
 by the kernel and is used as KDF input or as a tweak to cause
-different files to be encrypted differently; see `Per-file keys`_ and
-`DIRECT_KEY and per-mode keys`_.
+different files to be encrypted differently; see `Per-file encryption
+keys`_ and `DIRECT_KEY policies`_.
 
 Data path changes
 -----------------
@@ -1141,7 +1202,7 @@ filesystem-specific hash(es) needed for directory lookups.  This
 allows the filesystem to still, with a high degree of confidence, map
 the filename given in ->lookup() back to a particular directory entry
 that was previously listed by readdir().  See :c:type:`struct
-fscrypt_digested_name` in the source for more details.
+fscrypt_nokey_name` in the source for more details.
 
 Note that the precise way that filenames are presented to userspace
 without the key is subject to change in the future.  It is only meant
diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst
index 42a0b6dd9e0b..a95536b6443c 100644
--- a/Documentation/filesystems/fsverity.rst
+++ b/Documentation/filesystems/fsverity.rst
@@ -226,6 +226,14 @@ To do so, check for FS_VERITY_FL (0x00100000) in the returned flags.
 The verity flag is not settable via FS_IOC_SETFLAGS.  You must use
 FS_IOC_ENABLE_VERITY instead, since parameters must be provided.
 
+statx
+-----
+
+Since Linux v5.5, the statx() system call sets STATX_ATTR_VERITY if
+the file has fs-verity enabled.  This can perform better than
+FS_IOC_GETFLAGS and FS_IOC_MEASURE_VERITY because it doesn't require
+opening the file, and opening verity files can be expensive.
+
 Accessing verity files
 ======================
 
@@ -398,7 +406,7 @@ pages have been read into the pagecache.  (See `Verifying data`_.)
 ext4
 ----
 
-ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2.
+ext4 supports fs-verity since Linux v5.4 and e2fsprogs v1.45.2.
 
 To create verity files on an ext4 filesystem, the filesystem must have
 been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on
@@ -434,7 +442,7 @@ also only supports extent-based files.
 f2fs
 ----
 
-f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0.
+f2fs supports fs-verity since Linux v5.4 and f2fs-tools v1.11.0.
 
 To create verity files on an f2fs filesystem, the filesystem must have
 been formatted with ``-O verity``.
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 2c3a9f761205..824a3ecbb0ca 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -46,4 +46,7 @@ Documentation for filesystem implementations.
 .. toctree::
    :maxdepth: 2
 
+   autofs
+   overlayfs
    virtiofs
+   vfat
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index fc3a0704553c..5057e4d9dcd1 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -105,7 +105,7 @@ getattr:	no
 listxattr:	no
 fiemap:		no
 update_time:	no
-atomic_open:	exclusive
+atomic_open:	shared (exclusive if O_CREAT is set in open flags)
 tmpfile:	no
 ============	=============================================
 
diff --git a/Documentation/filesystems/nfs/fault_injection.txt b/Documentation/filesystems/nfs/fault_injection.txt
deleted file mode 100644
index f3a5b0a8ac05..000000000000
--- a/Documentation/filesystems/nfs/fault_injection.txt
+++ /dev/null
@@ -1,69 +0,0 @@
-
-Fault Injection
-===============
-Fault injection is a method for forcing errors that may not normally occur, or
-may be difficult to reproduce.  Forcing these errors in a controlled environment
-can help the developer find and fix bugs before their code is shipped in a
-production system.  Injecting an error on the Linux NFS server will allow us to
-observe how the client reacts and if it manages to recover its state correctly.
-
-NFSD_FAULT_INJECTION must be selected when configuring the kernel to use this
-feature.
-
-
-Using Fault Injection
-=====================
-On the client, mount the fault injection server through NFS v4.0+ and do some
-work over NFS (open files, take locks, ...).
-
-On the server, mount the debugfs filesystem to <debug_dir> and ls
-<debug_dir>/nfsd.  This will show a list of files that will be used for
-injecting faults on the NFS server.  As root, write a number n to the file
-corresponding to the action you want the server to take.  The server will then
-process the first n items it finds.  So if you want to forget 5 locks, echo '5'
-to <debug_dir>/nfsd/forget_locks.  A value of 0 will tell the server to forget
-all corresponding items.  A log message will be created containing the number
-of items forgotten (check dmesg).
-
-Go back to work on the client and check if the client recovered from the error
-correctly.
-
-
-Available Faults
-================
-forget_clients:
-     The NFS server keeps a list of clients that have placed a mount call.  If
-     this list is cleared, the server will have no knowledge of who the client
-     is, forcing the client to reauthenticate with the server.
-
-forget_openowners:
-     The NFS server keeps a list of what files are currently opened and who
-     they were opened by.  Clearing this list will force the client to reopen
-     its files.
-
-forget_locks:
-     The NFS server keeps a list of what files are currently locked in the VFS.
-     Clearing this list will force the client to reclaim its locks (files are
-     unlocked through the VFS as they are cleared from this list).
-
-forget_delegations:
-     A delegation is used to assure the client that a file, or part of a file,
-     has not changed since the delegation was awarded.  Clearing this list will
-     force the client to reacquire its delegation before accessing the file
-     again.
-
-recall_delegations:
-     Delegations can be recalled by the server when another client attempts to
-     access a file.  This test will notify the client that its delegation has
-     been revoked, forcing the client to reacquire the delegation before using
-     the file again.
-
-
-tools/nfs/inject_faults.sh script
-=================================
-This script has been created to ease the fault injection process.  This script
-will detect the mounted debugfs directory and write to the files located there
-based on the arguments passed by the user.  For example, running
-`inject_faults.sh forget_locks 1` as root will instruct the server to forget
-one lock.  Running `inject_faults forget_locks` will instruct the server to
-forgetall locks.
diff --git a/Documentation/filesystems/nfs/idmapper.txt b/Documentation/filesystems/nfs/idmapper.txt
deleted file mode 100644
index b86831acd583..000000000000
--- a/Documentation/filesystems/nfs/idmapper.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-
-=========
-ID Mapper
-=========
-Id mapper is used by NFS to translate user and group ids into names, and to
-translate user and group names into ids.  Part of this translation involves
-performing an upcall to userspace to request the information.  There are two
-ways NFS could obtain this information: placing a call to /sbin/request-key
-or by placing a call to the rpc.idmap daemon.
-
-NFS will attempt to call /sbin/request-key first.  If this succeeds, the
-result will be cached using the generic request-key cache.  This call should
-only fail if /etc/request-key.conf is not configured for the id_resolver key
-type, see the "Configuring" section below if you wish to use the request-key
-method.
-
-If the call to /sbin/request-key fails (if /etc/request-key.conf is not
-configured with the id_resolver key type), then the idmapper will ask the
-legacy rpc.idmap daemon for the id mapping.  This result will be stored
-in a custom NFS idmap cache.
-
-
-===========
-Configuring
-===========
-The file /etc/request-key.conf will need to be modified so /sbin/request-key can
-direct the upcall.  The following line should be added:
-
-#OP	TYPE	DESCRIPTION	CALLOUT INFO	PROGRAM ARG1 ARG2 ARG3 ...
-#======	=======	===============	===============	===============================
-create	id_resolver	*	*		/usr/sbin/nfs.idmap %k %d 600
-
-This will direct all id_resolver requests to the program /usr/sbin/nfs.idmap.
-The last parameter, 600, defines how many seconds into the future the key will
-expire.  This parameter is optional for /usr/sbin/nfs.idmap.  When the timeout
-is not specified, nfs.idmap will default to 600 seconds.
-
-id mapper uses for key descriptions:
-	  uid:  Find the UID for the given user
-	  gid:  Find the GID for the given group
-	 user:  Find the user  name for the given UID
-	group:  Find the group name for the given GID
-
-You can handle any of these individually, rather than using the generic upcall
-program.  If you would like to use your own program for a uid lookup then you
-would edit your request-key.conf so it look similar to this:
-
-#OP	TYPE	DESCRIPTION	CALLOUT INFO	PROGRAM ARG1 ARG2 ARG3 ...
-#======	=======	===============	===============	===============================
-create	id_resolver	uid:*	*		/some/other/program %k %d 600
-create	id_resolver	*	*		/usr/sbin/nfs.idmap %k %d 600
-
-Notice that the new line was added above the line for the generic program.
-request-key will find the first matching line and corresponding program.  In
-this case, /some/other/program will handle all uid lookups and
-/usr/sbin/nfs.idmap will handle gid, user, and group lookups.
-
-See <file:Documentation/security/keys/request-key.rst> for more information
-about the request-key function.
-
-
-=========
-nfs.idmap
-=========
-nfs.idmap is designed to be called by request-key, and should not be run "by
-hand".  This program takes two arguments, a serialized key and a key
-description.  The serialized key is first converted into a key_serial_t, and
-then passed as an argument to keyctl_instantiate (both are part of keyutils.h).
-
-The actual lookups are performed by functions found in nfsidmap.h.  nfs.idmap
-determines the correct function to call by looking at the first part of the
-description string.  For example, a uid lookup description will appear as
-"uid:user@domain".
-
-nfs.idmap will return 0 if the key was instantiated, and non-zero otherwise.
diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt
deleted file mode 100644
index 22dc0dd6889c..000000000000
--- a/Documentation/filesystems/nfs/nfs-rdma.txt
+++ /dev/null
@@ -1,274 +0,0 @@
-################################################################################
-#									       #
-#				NFS/RDMA README				       #
-#									       #
-################################################################################
-
- Author: NetApp and Open Grid Computing
- Date: May 29, 2008
-
-Table of Contents
-~~~~~~~~~~~~~~~~~
- - Overview
- - Getting Help
- - Installation
- - Check RDMA and NFS Setup
- - NFS/RDMA Setup
-
-Overview
-~~~~~~~~
-
-  This document describes how to install and setup the Linux NFS/RDMA client
-  and server software.
-
-  The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server
-  was first included in the following release, Linux 2.6.25.
-
-  In our testing, we have obtained excellent performance results (full 10Gbit
-  wire bandwidth at minimal client CPU) under many workloads. The code passes
-  the full Connectathon test suite and operates over both Infiniband and iWARP
-  RDMA adapters.
-
-Getting Help
-~~~~~~~~~~~~
-
-  If you get stuck, you can ask questions on the
-
-                nfs-rdma-devel@lists.sourceforge.net
-
-  mailing list.
-
-Installation
-~~~~~~~~~~~~
-
-  These instructions are a step by step guide to building a machine for
-  use with NFS/RDMA.
-
-  - Install an RDMA device
-
-    Any device supported by the drivers in drivers/infiniband/hw is acceptable.
-
-    Testing has been performed using several Mellanox-based IB cards, the
-    Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter.
-
-  - Install a Linux distribution and tools
-
-    The first kernel release to contain both the NFS/RDMA client and server was
-    Linux 2.6.25  Therefore, a distribution compatible with this and subsequent
-    Linux kernel release should be installed.
-
-    The procedures described in this document have been tested with
-    distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
-
-  - Install nfs-utils-1.1.2 or greater on the client
-
-    An NFS/RDMA mount point can be obtained by using the mount.nfs command in
-    nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
-    version with support for NFS/RDMA mounts, but for various reasons we
-    recommend using nfs-utils-1.1.2 or greater). To see which version of
-    mount.nfs you are using, type:
-
-    $ /sbin/mount.nfs -V
-
-    If the version is less than 1.1.2 or the command does not exist,
-    you should install the latest version of nfs-utils.
-
-    Download the latest package from:
-
-    http://www.kernel.org/pub/linux/utils/nfs
-
-    Uncompress the package and follow the installation instructions.
-
-    If you will not need the idmapper and gssd executables (you do not need
-    these to create an NFS/RDMA enabled mount command), the installation
-    process can be simplified by disabling these features when running
-    configure:
-
-    $ ./configure --disable-gss --disable-nfsv4
-
-    To build nfs-utils you will need the tcp_wrappers package installed. For
-    more information on this see the package's README and INSTALL files.
-
-    After building the nfs-utils package, there will be a mount.nfs binary in
-    the utils/mount directory. This binary can be used to initiate NFS v2, v3,
-    or v4 mounts. To initiate a v4 mount, the binary must be called
-    mount.nfs4.  The standard technique is to create a symlink called
-    mount.nfs4 to mount.nfs.
-
-    This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
-
-    $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
-
-    In this location, mount.nfs will be invoked automatically for NFS mounts
-    by the system mount command.
-
-    NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
-    on the NFS client machine. You do not need this specific version of
-    nfs-utils on the server. Furthermore, only the mount.nfs command from
-    nfs-utils-1.1.2 is needed on the client.
-
-  - Install a Linux kernel with NFS/RDMA
-
-    The NFS/RDMA client and server are both included in the mainline Linux
-    kernel version 2.6.25 and later. This and other versions of the Linux
-    kernel can be found at:
-
-    https://www.kernel.org/pub/linux/kernel/
-
-    Download the sources and place them in an appropriate location.
-
-  - Configure the RDMA stack
-
-    Make sure your kernel configuration has RDMA support enabled. Under
-    Device Drivers -> InfiniBand support, update the kernel configuration
-    to enable InfiniBand support [NOTE: the option name is misleading. Enabling
-    InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)].
-
-    Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or
-    iWARP adapter support (amso, cxgb3, etc.).
-
-    If you are using InfiniBand, be sure to enable IP-over-InfiniBand support.
-
-  - Configure the NFS client and server
-
-    Your kernel configuration must also have NFS file system support and/or
-    NFS server support enabled. These and other NFS related configuration
-    options can be found under File Systems -> Network File Systems.
-
-  - Build, install, reboot
-
-    The NFS/RDMA code will be enabled automatically if NFS and RDMA
-    are turned on. The NFS/RDMA client and server are configured via the hidden
-    SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
-    value of SUNRPC_XPRT_RDMA will be:
-
-     - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
-       and server will not be built
-     - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M,
-       in this case the NFS/RDMA client and server will be built as modules
-     - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client
-       and server will be built into the kernel
-
-    Therefore, if you have followed the steps above and turned no NFS and RDMA,
-    the NFS/RDMA client and server will be built.
-
-    Build a new kernel, install it, boot it.
-
-Check RDMA and NFS Setup
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-    Before configuring the NFS/RDMA software, it is a good idea to test
-    your new kernel to ensure that the kernel is working correctly.
-    In particular, it is a good idea to verify that the RDMA stack
-    is functioning as expected and standard NFS over TCP/IP and/or UDP/IP
-    is working properly.
-
-  - Check RDMA Setup
-
-    If you built the RDMA components as modules, load them at
-    this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
-    card:
-
-    $ modprobe ib_mthca
-    $ modprobe ib_ipoib
-
-    If you are using InfiniBand, make sure there is a Subnet Manager (SM)
-    running on the network. If your IB switch has an embedded SM, you can
-    use it. Otherwise, you will need to run an SM, such as OpenSM, on one
-    of your end nodes.
-
-    If an SM is running on your network, you should see the following:
-
-    $ cat /sys/class/infiniband/driverX/ports/1/state
-    4: ACTIVE
-
-    where driverX is mthca0, ipath5, ehca3, etc.
-
-    To further test the InfiniBand software stack, use IPoIB (this
-    assumes you have two IB hosts named host1 and host2):
-
-    host1$ ip link set dev ib0 up
-    host1$ ip address add dev ib0 a.b.c.x
-    host2$ ip link set dev ib0 up
-    host2$ ip address add dev ib0 a.b.c.y
-    host1$ ping a.b.c.y
-    host2$ ping a.b.c.x
-
-    For other device types, follow the appropriate procedures.
-
-  - Check NFS Setup
-
-    For the NFS components enabled above (client and/or server),
-    test their functionality over standard Ethernet using TCP/IP or UDP/IP.
-
-NFS/RDMA Setup
-~~~~~~~~~~~~~~
-
-  We recommend that you use two machines, one to act as the client and
-  one to act as the server.
-
-  One time configuration:
-
-  - On the server system, configure the /etc/exports file and
-    start the NFS/RDMA server.
-
-    Exports entries with the following formats have been tested:
-
-    /vol0   192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
-    /vol0   192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
-
-    The IP address(es) is(are) the client's IPoIB address for an InfiniBand
-    HCA or the client's iWARP address(es) for an RNIC.
-
-    NOTE: The "insecure" option must be used because the NFS/RDMA client does
-    not use a reserved port.
-
- Each time a machine boots:
-
-  - Load and configure the RDMA drivers
-
-    For InfiniBand using a Mellanox adapter:
-
-    $ modprobe ib_mthca
-    $ modprobe ib_ipoib
-    $ ip li set dev ib0 up
-    $ ip addr add dev ib0 a.b.c.d
-
-    NOTE: use unique addresses for the client and server
-
-  - Start the NFS server
-
-    If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
-    kernel config), load the RDMA transport module:
-
-    $ modprobe svcrdma
-
-    Regardless of how the server was built (module or built-in), start the
-    server:
-
-    $ /etc/init.d/nfs start
-
-    or
-
-    $ service nfs start
-
-    Instruct the server to listen on the RDMA transport:
-
-    $ echo rdma 20049 > /proc/fs/nfsd/portlist
-
-  - On the client system
-
-    If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
-    kernel config), load the RDMA client module:
-
-    $ modprobe xprtrdma.ko
-
-    Regardless of how the client was built (module or built-in), use this
-    command to mount the NFS/RDMA server:
-
-    $ mount -o rdma,port=20049 <IPoIB-server-name-or-address>:/<export> /mnt
-
-    To verify that the mount is using RDMA, run "cat /proc/mounts" and check
-    the "proto" field for the given mount.
-
-  Congratulations! You're using NFS/RDMA!
diff --git a/Documentation/filesystems/nfs/nfs.txt b/Documentation/filesystems/nfs/nfs.txt
deleted file mode 100644
index f2571c8bef74..000000000000
--- a/Documentation/filesystems/nfs/nfs.txt
+++ /dev/null
@@ -1,136 +0,0 @@
-
-The NFS client
-==============
-
-The NFS version 2 protocol was first documented in RFC1094 (March 1989).
-Since then two more major releases of NFS have been published, with NFSv3
-being documented in RFC1813 (June 1995), and NFSv4 in RFC3530 (April
-2003).
-
-The Linux NFS client currently supports all the above published versions,
-and work is in progress on adding support for minor version 1 of the NFSv4
-protocol.
-
-The purpose of this document is to provide information on some of the
-special features of the NFS client that can be configured by system
-administrators.
-
-
-The nfs4_unique_id parameter
-============================
-
-NFSv4 requires clients to identify themselves to servers with a unique
-string.  File open and lock state shared between one client and one server
-is associated with this identity.  To support robust NFSv4 state recovery
-and transparent state migration, this identity string must not change
-across client reboots.
-
-Without any other intervention, the Linux client uses a string that contains
-the local system's node name.  System administrators, however, often do not
-take care to ensure that node names are fully qualified and do not change
-over the lifetime of a client system.  Node names can have other
-administrative requirements that require particular behavior that does not
-work well as part of an nfs_client_id4 string.
-
-The nfs.nfs4_unique_id boot parameter specifies a unique string that can be
-used instead of a system's node name when an NFS client identifies itself to
-a server.  Thus, if the system's node name is not unique, or it changes, its
-nfs.nfs4_unique_id stays the same, preventing collision with other clients
-or loss of state during NFS reboot recovery or transparent state migration.
-
-The nfs.nfs4_unique_id string is typically a UUID, though it can contain
-anything that is believed to be unique across all NFS clients.  An
-nfs4_unique_id string should be chosen when a client system is installed,
-just as a system's root file system gets a fresh UUID in its label at
-install time.
-
-The string should remain fixed for the lifetime of the client.  It can be
-changed safely if care is taken that the client shuts down cleanly and all
-outstanding NFSv4 state has expired, to prevent loss of NFSv4 state.
-
-This string can be stored in an NFS client's grub.conf, or it can be provided
-via a net boot facility such as PXE.  It may also be specified as an nfs.ko
-module parameter.  Specifying a uniquifier string is not support for NFS
-clients running in containers.
-
-
-The DNS resolver
-================
-
-NFSv4 allows for one server to refer the NFS client to data that has been
-migrated onto another server by means of the special "fs_locations"
-attribute. See
-	http://tools.ietf.org/html/rfc3530#section-6
-and
-	http://tools.ietf.org/html/draft-ietf-nfsv4-referrals-00
-
-The fs_locations information can take the form of either an ip address and
-a path, or a DNS hostname and a path. The latter requires the NFS client to
-do a DNS lookup in order to mount the new volume, and hence the need for an
-upcall to allow userland to provide this service.
-
-Assuming that the user has the 'rpc_pipefs' filesystem mounted in the usual
-/var/lib/nfs/rpc_pipefs, the upcall consists of the following steps:
-
-   (1) The process checks the dns_resolve cache to see if it contains a
-       valid entry. If so, it returns that entry and exits.
-
-   (2) If no valid entry exists, the helper script '/sbin/nfs_cache_getent'
-       (may be changed using the 'nfs.cache_getent' kernel boot parameter)
-       is run, with two arguments:
-		- the cache name, "dns_resolve"
-		- the hostname to resolve
-
-   (3) After looking up the corresponding ip address, the helper script
-       writes the result into the rpc_pipefs pseudo-file
-       '/var/lib/nfs/rpc_pipefs/cache/dns_resolve/channel'
-       in the following (text) format:
-
-		"<ip address> <hostname> <ttl>\n"
-
-       Where <ip address> is in the usual IPv4 (123.456.78.90) or IPv6
-       (ffee:ddcc:bbaa:9988:7766:5544:3322:1100, ffee::1100, ...) format.
-       <hostname> is identical to the second argument of the helper
-       script, and <ttl> is the 'time to live' of this cache entry (in
-       units of seconds).
-
-       Note: If <ip address> is invalid, say the string "0", then a negative
-       entry is created, which will cause the kernel to treat the hostname
-       as having no valid DNS translation.
-
-
-
-
-A basic sample /sbin/nfs_cache_getent
-=====================================
-
-#!/bin/bash
-#
-ttl=600
-#
-cut=/usr/bin/cut
-getent=/usr/bin/getent
-rpc_pipefs=/var/lib/nfs/rpc_pipefs
-#
-die()
-{
-	echo "Usage: $0 cache_name entry_name"
-	exit 1
-}
-
-[ $# -lt 2 ] && die
-cachename="$1"
-cache_path=${rpc_pipefs}/cache/${cachename}/channel
-
-case "${cachename}" in
-	dns_resolve)
-		name="$2"
-		result="$(${getent} hosts ${name} | ${cut} -f1 -d\ )"
-		[ -z "${result}" ] && result="0"
-		;;
-	*)
-		die
-		;;
-esac
-echo "${result} ${name} ${ttl}" >${cache_path}
-
diff --git a/Documentation/filesystems/nfs/nfsd-admin-interfaces.txt b/Documentation/filesystems/nfs/nfsd-admin-interfaces.txt
deleted file mode 100644
index 56a96fb08a73..000000000000
--- a/Documentation/filesystems/nfs/nfsd-admin-interfaces.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Administrative interfaces for nfsd
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Note that normally these interfaces are used only by the utilities in
-nfs-utils.
-
-nfsd is controlled mainly by pseudofiles under the "nfsd" filesystem,
-which is normally mounted at /proc/fs/nfsd/.
-
-The server is always started by the first write of a nonzero value to
-nfsd/threads.
-
-Before doing that, NFSD can be told which sockets to listen on by
-writing to nfsd/portlist; that write may be:
-
-	- an ascii-encoded file descriptor, which should refer to a
-	  bound (and listening, for tcp) socket, or
-	- "transportname port", where transportname is currently either
-	  "udp", "tcp", or "rdma".
-
-If nfsd is started without doing any of these, then it will create one
-udp and one tcp listener at port 2049 (see nfsd_init_socks).
-
-On startup, nfsd and lockd grace periods start.
-
-nfsd is shut down by a write of 0 to nfsd/threads.  All locks and state
-are thrown away at that point.
-
-Between startup and shutdown, the number of threads may be adjusted up
-or down by additional writes to nfsd/threads or by writes to
-nfsd/pool_threads.
-
-For more detail about files under nfsd/ and what they control, see
-fs/nfsd/nfsctl.c; most of them have detailed comments.
-
-Implementation notes
-^^^^^^^^^^^^^^^^^^^^
-
-Note that the rpc server requires the caller to serialize addition and
-removal of listening sockets, and startup and shutdown of the server.
-For nfsd this is done using nfsd_mutex.
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt
deleted file mode 100644
index ae4332464560..000000000000
--- a/Documentation/filesystems/nfs/nfsroot.txt
+++ /dev/null
@@ -1,355 +0,0 @@
-Mounting the root filesystem via NFS (nfsroot)
-===============================================
-
-Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
-Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
-Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
-Updated 2006 by Horms <horms@verge.net.au>
-Updated 2018 by Chris Novakovic <chris@chrisn.me.uk>
-
-
-
-In order to use a diskless system, such as an X-terminal or printer server
-for example, it is necessary for the root filesystem to be present on a
-non-disk device. This may be an initramfs (see Documentation/filesystems/
-ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/admin-guide/initrd.rst) or a
-filesystem mounted via NFS. The following text describes on how to use NFS
-for the root filesystem. For the rest of this text 'client' means the
-diskless system, and 'server' means the NFS server.
-
-
-
-
-1.) Enabling nfsroot capabilities
-    -----------------------------
-
-In order to use nfsroot, NFS client support needs to be selected as
-built-in during configuration. Once this has been selected, the nfsroot
-option will become available, which should also be selected.
-
-In the networking options, kernel level autoconfiguration can be selected,
-along with the types of autoconfiguration to support. Selecting all of
-DHCP, BOOTP and RARP is safe.
-
-
-
-
-2.) Kernel command line
-    -------------------
-
-When the kernel has been loaded by a boot loader (see below) it needs to be
-told what root fs device to use. And in the case of nfsroot, where to find
-both the server and the name of the directory on the server to mount as root.
-This can be established using the following kernel command line parameters:
-
-
-root=/dev/nfs
-
-  This is necessary to enable the pseudo-NFS-device. Note that it's not a
-  real device but just a synonym to tell the kernel to use NFS instead of
-  a real device.
-
-
-nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
-
-  If the `nfsroot' parameter is NOT given on the command line,
-  the default "/tftpboot/%s" will be used.
-
-  <server-ip>	Specifies the IP address of the NFS server.
-		The default address is determined by the `ip' parameter
-		(see below). This parameter allows the use of different
-		servers for IP autoconfiguration and NFS.
-
-  <root-dir>	Name of the directory on the server to mount as root.
-		If there is a "%s" token in the string, it will be
-		replaced by the ASCII-representation of the client's
-		IP address.
-
-  <nfs-options>	Standard NFS options. All options are separated by commas.
-		The following defaults are used:
-			port		= as given by server portmap daemon
-			rsize		= 4096
-			wsize		= 4096
-			timeo		= 7
-			retrans		= 3
-			acregmin	= 3
-			acregmax	= 60
-			acdirmin	= 30
-			acdirmax	= 60
-			flags		= hard, nointr, noposix, cto, ac
-
-
-ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:
-   <dns0-ip>:<dns1-ip>:<ntp0-ip>
-
-  This parameter tells the kernel how to configure IP addresses of devices
-  and also how to set up the IP routing table. It was originally called
-  `nfsaddrs', but now the boot-time IP configuration works independently of
-  NFS, so it was renamed to `ip' and the old name remained as an alias for
-  compatibility reasons.
-
-  If this parameter is missing from the kernel command line, all fields are
-  assumed to be empty, and the defaults mentioned below apply. In general
-  this means that the kernel tries to configure everything using
-  autoconfiguration.
-
-  The <autoconf> parameter can appear alone as the value to the `ip'
-  parameter (without all the ':' characters before).  If the value is
-  "ip=off" or "ip=none", no autoconfiguration will take place, otherwise
-  autoconfiguration will take place.  The most common way to use this
-  is "ip=dhcp".
-
-  <client-ip>	IP address of the client.
-
-  		Default:  Determined using autoconfiguration.
-
-  <server-ip>	IP address of the NFS server. If RARP is used to determine
-		the client address and this parameter is NOT empty only
-		replies from the specified server are accepted.
-
-		Only required for NFS root. That is autoconfiguration
-		will not be triggered if it is missing and NFS root is not
-		in operation.
-
-		Value is exported to /proc/net/pnp with the prefix "bootserver "
-		(see below).
-
-		Default: Determined using autoconfiguration.
-		         The address of the autoconfiguration server is used.
-
-  <gw-ip>	IP address of a gateway if the server is on a different subnet.
-
-		Default: Determined using autoconfiguration.
-
-  <netmask>	Netmask for local network interface. If unspecified
-		the netmask is derived from the client IP address assuming
-		classful addressing.
-
-		Default:  Determined using autoconfiguration.
-
-  <hostname>	Name of the client. If a '.' character is present, anything
-		before the first '.' is used as the client's hostname, and anything
-		after it is used as its NIS domain name. May be supplied by
-		autoconfiguration, but its absence will not trigger autoconfiguration.
-		If specified and DHCP is used, the user-provided hostname (and NIS
-		domain name, if present) will be carried in the DHCP request; this
-		may cause a DNS record to be created or updated for the client.
-
-  		Default: Client IP address is used in ASCII notation.
-
-  <device>	Name of network device to use.
-
-		Default: If the host only has one device, it is used.
-			 Otherwise the device is determined using
-			 autoconfiguration. This is done by sending
-			 autoconfiguration requests out of all devices,
-			 and using the device that received the first reply.
-
-  <autoconf>	Method to use for autoconfiguration. In the case of options
-                which specify multiple autoconfiguration protocols,
-		requests are sent using all protocols, and the first one
-		to reply is used.
-
-		Only autoconfiguration protocols that have been compiled
-		into the kernel will be used, regardless of the value of
-		this option.
-
-                  off or none: don't use autoconfiguration
-				(do static IP assignment instead)
-		  on or any:   use any protocol available in the kernel
-			       (default)
-		  dhcp:        use DHCP
-		  bootp:       use BOOTP
-		  rarp:        use RARP
-		  both:        use both BOOTP and RARP but not DHCP
-		               (old option kept for backwards compatibility)
-
-		if dhcp is used, the client identifier can be used by following
-		format "ip=dhcp,client-id-type,client-id-value"
-
-                Default: any
-
-  <dns0-ip>	IP address of primary nameserver.
-		Value is exported to /proc/net/pnp with the prefix "nameserver "
-		(see below).
-
-		Default: None if not using autoconfiguration; determined
-		automatically if using autoconfiguration.
-
-  <dns1-ip>	IP address of secondary nameserver.
-		See <dns0-ip>.
-
-  <ntp0-ip>	IP address of a Network Time Protocol (NTP) server.
-		Value is exported to /proc/net/ipconfig/ntp_servers, but is
-		otherwise unused (see below).
-
-		Default: None if not using autoconfiguration; determined
-		automatically if using autoconfiguration.
-
-  After configuration (whether manual or automatic) is complete, two files
-  are created in the following format; lines are omitted if their respective
-  value is empty following configuration:
-
-  - /proc/net/pnp:
-
-	#PROTO: <DHCP|BOOTP|RARP|MANUAL>	(depending on configuration method)
-	domain <dns-domain>			(if autoconfigured, the DNS domain)
-	nameserver <dns0-ip>			(primary name server IP)
-	nameserver <dns1-ip>			(secondary name server IP)
-	nameserver <dns2-ip>			(tertiary name server IP)
-	bootserver <server-ip>			(NFS server IP)
-
-  - /proc/net/ipconfig/ntp_servers:
-
-	<ntp0-ip>				(NTP server IP)
-	<ntp1-ip>				(NTP server IP)
-	<ntp2-ip>				(NTP server IP)
-
-  <dns-domain> and <dns2-ip> (in /proc/net/pnp) and <ntp1-ip> and <ntp2-ip>
-  (in /proc/net/ipconfig/ntp_servers) are requested during autoconfiguration;
-  they cannot be specified as part of the "ip=" kernel command line parameter.
-
-  Because the "domain" and "nameserver" options are recognised by DNS
-  resolvers, /etc/resolv.conf is often linked to /proc/net/pnp on systems
-  that use an NFS root filesystem.
-
-  Note that the kernel will not synchronise the system time with any NTP
-  servers it discovers; this is the responsibility of a user space process
-  (e.g. an initrd/initramfs script that passes the IP addresses listed in
-  /proc/net/ipconfig/ntp_servers to an NTP client before mounting the real
-  root filesystem if it is on NFS).
-
-
-nfsrootdebug
-
-  This parameter enables debugging messages to appear in the kernel
-  log at boot time so that administrators can verify that the correct
-  NFS mount options, server address, and root path are passed to the
-  NFS client.
-
-
-rdinit=<executable file>
-
-  To specify which file contains the program that starts system
-  initialization, administrators can use this command line parameter.
-  The default value of this parameter is "/init".  If the specified
-  file exists and the kernel can execute it, root filesystem related
-  kernel command line parameters, including `nfsroot=', are ignored.
-
-  A description of the process of mounting the root file system can be
-  found in:
-
-    Documentation/driver-api/early-userspace/early_userspace_support.rst
-
-
-
-
-3.) Boot Loader
-    ----------
-
-To get the kernel into memory different approaches can be used.
-They depend on various facilities being available:
-
-
-3.1)  Booting from a floppy using syslinux
-
-	When building kernels, an easy way to create a boot floppy that uses
-	syslinux is to use the zdisk or bzdisk make targets which use zimage
-      	and bzimage images respectively. Both targets accept the
-     	FDARGS parameter which can be used to set the kernel command line.
-
-	e.g.
-	   make bzdisk FDARGS="root=/dev/nfs"
-
-   	Note that the user running this command will need to have
-     	access to the floppy drive device, /dev/fd0
-
-     	For more information on syslinux, including how to create bootdisks
-     	for prebuilt kernels, see http://syslinux.zytor.com/
-
-	N.B: Previously it was possible to write a kernel directly to
-	     a floppy using dd, configure the boot device using rdev, and
-	     boot using the resulting floppy. Linux no longer supports this
-	     method of booting.
-
-3.2) Booting from a cdrom using isolinux
-
-     	When building kernels, an easy way to create a bootable cdrom that
-     	uses isolinux is to use the isoimage target which uses a bzimage
-     	image. Like zdisk and bzdisk, this target accepts the FDARGS
-     	parameter which can be used to set the kernel command line.
-
-	e.g.
-	  make isoimage FDARGS="root=/dev/nfs"
-
-     	The resulting iso image will be arch/<ARCH>/boot/image.iso
-     	This can be written to a cdrom using a variety of tools including
-     	cdrecord.
-
-	e.g.
-	  cdrecord dev=ATAPI:1,0,0 arch/x86/boot/image.iso
-
-     	For more information on isolinux, including how to create bootdisks
-     	for prebuilt kernels, see http://syslinux.zytor.com/
-
-3.2) Using LILO
-	When using LILO all the necessary command line parameters may be
-	specified using the 'append=' directive in the LILO configuration
-	file.
-
-	However, to use the 'root=' directive you also need to create
-	a dummy root device, which may be removed after LILO is run.
-
-	mknod /dev/boot255 c 0 255
-
-	For information on configuring LILO, please refer to its documentation.
-
-3.3) Using GRUB
-	When using GRUB, kernel parameter are simply appended after the kernel
-	specification: kernel <kernel> <parameters>
-
-3.4) Using loadlin
-	loadlin may be used to boot Linux from a DOS command prompt without
-	requiring a local hard disk to mount as root. This has not been
-	thoroughly tested by the authors of this document, but in general
-	it should be possible configure the kernel command line similarly
-	to the configuration of LILO.
-
-	Please refer to the loadlin documentation for further information.
-
-3.5) Using a boot ROM
-	This is probably the most elegant way of booting a diskless client.
-	With a boot ROM the kernel is loaded using the TFTP protocol. The
-	authors of this document are not aware of any no commercial boot
-	ROMs that support booting Linux over the network. However, there
-	are two free implementations of a boot ROM, netboot-nfs and
-	etherboot, both of which are available on sunsite.unc.edu, and both
-	of which contain everything you need to boot a diskless Linux client.
-
-3.6) Using pxelinux
-	Pxelinux may be used to boot linux using the PXE boot loader
-	which is present on many modern network cards.
-
-	When using pxelinux, the kernel image is specified using
-	"kernel <relative-path-below /tftpboot>". The nfsroot parameters
-	are passed to the kernel by adding them to the "append" line.
-	It is common to use serial console in conjunction with pxeliunx,
-	see Documentation/admin-guide/serial-console.rst for more information.
-
-	For more information on isolinux, including how to create bootdisks
-	for prebuilt kernels, see http://syslinux.zytor.com/
-
-
-
-
-4.) Credits
-    -------
-
-  The nfsroot code in the kernel and the RARP support have been written
-  by Gero Kuhlmann <gero@gkminix.han.de>.
-
-  The rest of the IP layer autoconfiguration code has been written
-  by Martin Mares <mj@atrey.karlin.mff.cuni.cz>.
-
-  In order to write the initial version of nfsroot I would like to thank
-  Jens-Uwe Mager <jum@anubis.han.de> for his help.
diff --git a/Documentation/filesystems/nfs/pnfs-block-server.txt b/Documentation/filesystems/nfs/pnfs-block-server.txt
deleted file mode 100644
index 2143673cf154..000000000000
--- a/Documentation/filesystems/nfs/pnfs-block-server.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-pNFS block layout server user guide
-
-The Linux NFS server now supports the pNFS block layout extension.  In this
-case the NFS server acts as Metadata Server (MDS) for pNFS, which in addition
-to handling all the metadata access to the NFS export also hands out layouts
-to the clients to directly access the underlying block devices that are
-shared with the client.
-
-To use pNFS block layouts with with the Linux NFS server the exported file
-system needs to support the pNFS block layouts (currently just XFS), and the
-file system must sit on shared storage (typically iSCSI) that is accessible
-to the clients in addition to the MDS.  As of now the file system needs to
-sit directly on the exported volume, striping or concatenation of
-volumes on the MDS and clients is not supported yet.
-
-On the server, pNFS block volume support is automatically if the file system
-support it.  On the client make sure the kernel has the CONFIG_PNFS_BLOCK
-option enabled, the blkmapd daemon from nfs-utils is running, and the
-file system is mounted using the NFSv4.1 protocol version (mount -o vers=4.1).
-
-If the nfsd server needs to fence a non-responding client it calls
-/sbin/nfsd-recall-failed with the first argument set to the IP address of
-the client, and the second argument set to the device node without the /dev
-prefix for the file system to be fenced. Below is an example file that shows
-how to translate the device into a serial number from SCSI EVPD 0x80:
-
-cat > /sbin/nfsd-recall-failed << EOF
-#!/bin/sh
-
-CLIENT="$1"
-DEV="/dev/$2"
-EVPD=`sg_inq --page=0x80 ${DEV} | \
-	grep "Unit serial number:" | \
-	awk -F ': ' '{print $2}'`
-
-echo "fencing client ${CLIENT} serial ${EVPD}" >> /var/log/pnfsd-fence.log
-EOF
diff --git a/Documentation/filesystems/nfs/pnfs-scsi-server.txt b/Documentation/filesystems/nfs/pnfs-scsi-server.txt
deleted file mode 100644
index 5bef7268bd9f..000000000000
--- a/Documentation/filesystems/nfs/pnfs-scsi-server.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-pNFS SCSI layout server user guide
-==================================
-
-This document describes support for pNFS SCSI layouts in the Linux NFS server.
-With pNFS SCSI layouts, the NFS server acts as Metadata Server (MDS) for pNFS,
-which in addition to handling all the metadata access to the NFS export,
-also hands out layouts to the clients so that they can directly access the
-underlying SCSI LUNs that are shared with the client.
-
-To use pNFS SCSI layouts with with the Linux NFS server, the exported file
-system needs to support the pNFS SCSI layouts (currently just XFS), and the
-file system must sit on a SCSI LUN that is accessible to the clients in
-addition to the MDS.  As of now the file system needs to sit directly on the
-exported LUN, striping or concatenation of LUNs on the MDS and clients
-is not supported yet.
-
-On a server built with CONFIG_NFSD_SCSI, the pNFS SCSI volume support is
-automatically enabled if the file system is exported using the "pnfs"
-option and the underlying SCSI device support persistent reservations.
-On the client make sure the kernel has the CONFIG_PNFS_BLOCK option
-enabled, and the file system is mounted using the NFSv4.1 protocol
-version (mount -o vers=4.1).
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
new file mode 100644
index 000000000000..e443be7928db
--- /dev/null
+++ b/Documentation/filesystems/overlayfs.rst
@@ -0,0 +1,497 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Written by: Neil Brown
+Please see MAINTAINERS file for where to send questions.
+
+Overlay Filesystem
+==================
+
+This document describes a prototype for a new approach to providing
+overlay-filesystem functionality in Linux (sometimes referred to as
+union-filesystems).  An overlay-filesystem tries to present a
+filesystem which is the result over overlaying one filesystem on top
+of the other.
+
+
+Overlay objects
+---------------
+
+The overlay filesystem approach is 'hybrid', because the objects that
+appear in the filesystem do not always appear to belong to that filesystem.
+In many cases, an object accessed in the union will be indistinguishable
+from accessing the corresponding object from the original filesystem.
+This is most obvious from the 'st_dev' field returned by stat(2).
+
+While directories will report an st_dev from the overlay-filesystem,
+non-directory objects may report an st_dev from the lower filesystem or
+upper filesystem that is providing the object.  Similarly st_ino will
+only be unique when combined with st_dev, and both of these can change
+over the lifetime of a non-directory object.  Many applications and
+tools ignore these values and will not be affected.
+
+In the special case of all overlay layers on the same underlying
+filesystem, all objects will report an st_dev from the overlay
+filesystem and st_ino from the underlying filesystem.  This will
+make the overlay mount more compliant with filesystem scanners and
+overlay objects will be distinguishable from the corresponding
+objects in the original filesystem.
+
+On 64bit systems, even if all overlay layers are not on the same
+underlying filesystem, the same compliant behavior could be achieved
+with the "xino" feature.  The "xino" feature composes a unique object
+identifier from the real object st_ino and an underlying fsid index.
+If all underlying filesystems support NFS file handles and export file
+handles with 32bit inode number encoding (e.g. ext4), overlay filesystem
+will use the high inode number bits for fsid.  Even when the underlying
+filesystem uses 64bit inode numbers, users can still enable the "xino"
+feature with the "-o xino=on" overlay mount option.  That is useful for the
+case of underlying filesystems like xfs and tmpfs, which use 64bit inode
+numbers, but are very unlikely to use the high inode number bit.
+
+
+Upper and Lower
+---------------
+
+An overlay filesystem combines two filesystems - an 'upper' filesystem
+and a 'lower' filesystem.  When a name exists in both filesystems, the
+object in the 'upper' filesystem is visible while the object in the
+'lower' filesystem is either hidden or, in the case of directories,
+merged with the 'upper' object.
+
+It would be more correct to refer to an upper and lower 'directory
+tree' rather than 'filesystem' as it is quite possible for both
+directory trees to be in the same filesystem and there is no
+requirement that the root of a filesystem be given for either upper or
+lower.
+
+The lower filesystem can be any filesystem supported by Linux and does
+not need to be writable.  The lower filesystem can even be another
+overlayfs.  The upper filesystem will normally be writable and if it
+is it must support the creation of trusted.* extended attributes, and
+must provide valid d_type in readdir responses, so NFS is not suitable.
+
+A read-only overlay of two read-only filesystems may use any
+filesystem type.
+
+Directories
+-----------
+
+Overlaying mainly involves directories.  If a given name appears in both
+upper and lower filesystems and refers to a non-directory in either,
+then the lower object is hidden - the name refers only to the upper
+object.
+
+Where both upper and lower objects are directories, a merged directory
+is formed.
+
+At mount time, the two directories given as mount options "lowerdir" and
+"upperdir" are combined into a merged directory:
+
+  mount -t overlay overlay -olowerdir=/lower,upperdir=/upper,\
+  workdir=/work /merged
+
+The "workdir" needs to be an empty directory on the same filesystem
+as upperdir.
+
+Then whenever a lookup is requested in such a merged directory, the
+lookup is performed in each actual directory and the combined result
+is cached in the dentry belonging to the overlay filesystem.  If both
+actual lookups find directories, both are stored and a merged
+directory is created, otherwise only one is stored: the upper if it
+exists, else the lower.
+
+Only the lists of names from directories are merged.  Other content
+such as metadata and extended attributes are reported for the upper
+directory only.  These attributes of the lower directory are hidden.
+
+whiteouts and opaque directories
+--------------------------------
+
+In order to support rm and rmdir without changing the lower
+filesystem, an overlay filesystem needs to record in the upper filesystem
+that files have been removed.  This is done using whiteouts and opaque
+directories (non-directories are always opaque).
+
+A whiteout is created as a character device with 0/0 device number.
+When a whiteout is found in the upper level of a merged directory, any
+matching name in the lower level is ignored, and the whiteout itself
+is also hidden.
+
+A directory is made opaque by setting the xattr "trusted.overlay.opaque"
+to "y".  Where the upper filesystem contains an opaque directory, any
+directory in the lower filesystem with the same name is ignored.
+
+readdir
+-------
+
+When a 'readdir' request is made on a merged directory, the upper and
+lower directories are each read and the name lists merged in the
+obvious way (upper is read first, then lower - entries that already
+exist are not re-added).  This merged name list is cached in the
+'struct file' and so remains as long as the file is kept open.  If the
+directory is opened and read by two processes at the same time, they
+will each have separate caches.  A seekdir to the start of the
+directory (offset 0) followed by a readdir will cause the cache to be
+discarded and rebuilt.
+
+This means that changes to the merged directory do not appear while a
+directory is being read.  This is unlikely to be noticed by many
+programs.
+
+seek offsets are assigned sequentially when the directories are read.
+Thus if
+
+  - read part of a directory
+  - remember an offset, and close the directory
+  - re-open the directory some time later
+  - seek to the remembered offset
+
+there may be little correlation between the old and new locations in
+the list of filenames, particularly if anything has changed in the
+directory.
+
+Readdir on directories that are not merged is simply handled by the
+underlying directory (upper or lower).
+
+renaming directories
+--------------------
+
+When renaming a directory that is on the lower layer or merged (i.e. the
+directory was not created on the upper layer to start with) overlayfs can
+handle it in two different ways:
+
+1. return EXDEV error: this error is returned by rename(2) when trying to
+   move a file or directory across filesystem boundaries.  Hence
+   applications are usually prepared to hande this error (mv(1) for example
+   recursively copies the directory tree).  This is the default behavior.
+
+2. If the "redirect_dir" feature is enabled, then the directory will be
+   copied up (but not the contents).  Then the "trusted.overlay.redirect"
+   extended attribute is set to the path of the original location from the
+   root of the overlay.  Finally the directory is moved to the new
+   location.
+
+There are several ways to tune the "redirect_dir" feature.
+
+Kernel config options:
+
+- OVERLAY_FS_REDIRECT_DIR:
+    If this is enabled, then redirect_dir is turned on by  default.
+- OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW:
+    If this is enabled, then redirects are always followed by default. Enabling
+    this results in a less secure configuration.  Enable this option only when
+    worried about backward compatibility with kernels that have the redirect_dir
+    feature and follow redirects even if turned off.
+
+Module options (can also be changed through /sys/module/overlay/parameters/):
+
+- "redirect_dir=BOOL":
+    See OVERLAY_FS_REDIRECT_DIR kernel config option above.
+- "redirect_always_follow=BOOL":
+    See OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW kernel config option above.
+- "redirect_max=NUM":
+    The maximum number of bytes in an absolute redirect (default is 256).
+
+Mount options:
+
+- "redirect_dir=on":
+    Redirects are enabled.
+- "redirect_dir=follow":
+    Redirects are not created, but followed.
+- "redirect_dir=off":
+    Redirects are not created and only followed if "redirect_always_follow"
+    feature is enabled in the kernel/module config.
+- "redirect_dir=nofollow":
+    Redirects are not created and not followed (equivalent to "redirect_dir=off"
+    if "redirect_always_follow" feature is not enabled).
+
+When the NFS export feature is enabled, every copied up directory is
+indexed by the file handle of the lower inode and a file handle of the
+upper directory is stored in a "trusted.overlay.upper" extended attribute
+on the index entry.  On lookup of a merged directory, if the upper
+directory does not match the file handle stores in the index, that is an
+indication that multiple upper directories may be redirected to the same
+lower directory.  In that case, lookup returns an error and warns about
+a possible inconsistency.
+
+Because lower layer redirects cannot be verified with the index, enabling
+NFS export support on an overlay filesystem with no upper layer requires
+turning off redirect follow (e.g. "redirect_dir=nofollow").
+
+
+Non-directories
+---------------
+
+Objects that are not directories (files, symlinks, device-special
+files etc.) are presented either from the upper or lower filesystem as
+appropriate.  When a file in the lower filesystem is accessed in a way
+the requires write-access, such as opening for write access, changing
+some metadata etc., the file is first copied from the lower filesystem
+to the upper filesystem (copy_up).  Note that creating a hard-link
+also requires copy_up, though of course creation of a symlink does
+not.
+
+The copy_up may turn out to be unnecessary, for example if the file is
+opened for read-write but the data is not modified.
+
+The copy_up process first makes sure that the containing directory
+exists in the upper filesystem - creating it and any parents as
+necessary.  It then creates the object with the same metadata (owner,
+mode, mtime, symlink-target etc.) and then if the object is a file, the
+data is copied from the lower to the upper filesystem.  Finally any
+extended attributes are copied up.
+
+Once the copy_up is complete, the overlay filesystem simply
+provides direct access to the newly created file in the upper
+filesystem - future operations on the file are barely noticed by the
+overlay filesystem (though an operation on the name of the file such as
+rename or unlink will of course be noticed and handled).
+
+
+Multiple lower layers
+---------------------
+
+Multiple lower layers can now be given using the the colon (":") as a
+separator character between the directory names.  For example:
+
+  mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged
+
+As the example shows, "upperdir=" and "workdir=" may be omitted.  In
+that case the overlay will be read-only.
+
+The specified lower directories will be stacked beginning from the
+rightmost one and going left.  In the above example lower1 will be the
+top, lower2 the middle and lower3 the bottom layer.
+
+
+Metadata only copy up
+---------------------
+
+When metadata only copy up feature is enabled, overlayfs will only copy
+up metadata (as opposed to whole file), when a metadata specific operation
+like chown/chmod is performed. Full file will be copied up later when
+file is opened for WRITE operation.
+
+In other words, this is delayed data copy up operation and data is copied
+up when there is a need to actually modify data.
+
+There are multiple ways to enable/disable this feature. A config option
+CONFIG_OVERLAY_FS_METACOPY can be set/unset to enable/disable this feature
+by default. Or one can enable/disable it at module load time with module
+parameter metacopy=on/off. Lastly, there is also a per mount option
+metacopy=on/off to enable/disable this feature per mount.
+
+Do not use metacopy=on with untrusted upper/lower directories. Otherwise
+it is possible that an attacker can create a handcrafted file with
+appropriate REDIRECT and METACOPY xattrs, and gain access to file on lower
+pointed by REDIRECT. This should not be possible on local system as setting
+"trusted." xattrs will require CAP_SYS_ADMIN. But it should be possible
+for untrusted layers like from a pen drive.
+
+Note: redirect_dir={off|nofollow|follow[*]} conflicts with metacopy=on, and
+results in an error.
+
+[*] redirect_dir=follow only conflicts with metacopy=on if upperdir=... is
+given.
+
+Sharing and copying layers
+--------------------------
+
+Lower layers may be shared among several overlay mounts and that is indeed
+a very common practice.  An overlay mount may use the same lower layer
+path as another overlay mount and it may use a lower layer path that is
+beneath or above the path of another overlay lower layer path.
+
+Using an upper layer path and/or a workdir path that are already used by
+another overlay mount is not allowed and may fail with EBUSY.  Using
+partially overlapping paths is not allowed and may fail with EBUSY.
+If files are accessed from two overlayfs mounts which share or overlap the
+upper layer and/or workdir path the behavior of the overlay is undefined,
+though it will not result in a crash or deadlock.
+
+Mounting an overlay using an upper layer path, where the upper layer path
+was previously used by another mounted overlay in combination with a
+different lower layer path, is allowed, unless the "inodes index" feature
+or "metadata only copy up" feature is enabled.
+
+With the "inodes index" feature, on the first time mount, an NFS file
+handle of the lower layer root directory, along with the UUID of the lower
+filesystem, are encoded and stored in the "trusted.overlay.origin" extended
+attribute on the upper layer root directory.  On subsequent mount attempts,
+the lower root directory file handle and lower filesystem UUID are compared
+to the stored origin in upper root directory.  On failure to verify the
+lower root origin, mount will fail with ESTALE.  An overlayfs mount with
+"inodes index" enabled will fail with EOPNOTSUPP if the lower filesystem
+does not support NFS export, lower filesystem does not have a valid UUID or
+if the upper filesystem does not support extended attributes.
+
+For "metadata only copy up" feature there is no verification mechanism at
+mount time. So if same upper is mounted with different set of lower, mount
+probably will succeed but expect the unexpected later on. So don't do it.
+
+It is quite a common practice to copy overlay layers to a different
+directory tree on the same or different underlying filesystem, and even
+to a different machine.  With the "inodes index" feature, trying to mount
+the copied layers will fail the verification of the lower root file handle.
+
+
+Non-standard behavior
+---------------------
+
+Current version of overlayfs can act as a mostly POSIX compliant
+filesystem.
+
+This is the list of cases that overlayfs doesn't currently handle:
+
+a) POSIX mandates updating st_atime for reads.  This is currently not
+done in the case when the file resides on a lower layer.
+
+b) If a file residing on a lower layer is opened for read-only and then
+memory mapped with MAP_SHARED, then subsequent changes to the file are not
+reflected in the memory mapping.
+
+The following options allow overlayfs to act more like a standards
+compliant filesystem:
+
+1) "redirect_dir"
+
+Enabled with the mount option or module option: "redirect_dir=on" or with
+the kernel config option CONFIG_OVERLAY_FS_REDIRECT_DIR=y.
+
+If this feature is disabled, then rename(2) on a lower or merged directory
+will fail with EXDEV ("Invalid cross-device link").
+
+2) "inode index"
+
+Enabled with the mount option or module option "index=on" or with the
+kernel config option CONFIG_OVERLAY_FS_INDEX=y.
+
+If this feature is disabled and a file with multiple hard links is copied
+up, then this will "break" the link.  Changes will not be propagated to
+other names referring to the same inode.
+
+3) "xino"
+
+Enabled with the mount option "xino=auto" or "xino=on", with the module
+option "xino_auto=on" or with the kernel config option
+CONFIG_OVERLAY_FS_XINO_AUTO=y.  Also implicitly enabled by using the same
+underlying filesystem for all layers making up the overlay.
+
+If this feature is disabled or the underlying filesystem doesn't have
+enough free bits in the inode number, then overlayfs will not be able to
+guarantee that the values of st_ino and st_dev returned by stat(2) and the
+value of d_ino returned by readdir(3) will act like on a normal filesystem.
+E.g. the value of st_dev may be different for two objects in the same
+overlay filesystem and the value of st_ino for directory objects may not be
+persistent and could change even while the overlay filesystem is mounted.
+
+
+Changes to underlying filesystems
+---------------------------------
+
+Offline changes, when the overlay is not mounted, are allowed to either
+the upper or the lower trees.
+
+Changes to the underlying filesystems while part of a mounted overlay
+filesystem are not allowed.  If the underlying filesystem is changed,
+the behavior of the overlay is undefined, though it will not result in
+a crash or deadlock.
+
+When the overlay NFS export feature is enabled, overlay filesystems
+behavior on offline changes of the underlying lower layer is different
+than the behavior when NFS export is disabled.
+
+On every copy_up, an NFS file handle of the lower inode, along with the
+UUID of the lower filesystem, are encoded and stored in an extended
+attribute "trusted.overlay.origin" on the upper inode.
+
+When the NFS export feature is enabled, a lookup of a merged directory,
+that found a lower directory at the lookup path or at the path pointed
+to by the "trusted.overlay.redirect" extended attribute, will verify
+that the found lower directory file handle and lower filesystem UUID
+match the origin file handle that was stored at copy_up time.  If a
+found lower directory does not match the stored origin, that directory
+will not be merged with the upper directory.
+
+
+
+NFS export
+----------
+
+When the underlying filesystems supports NFS export and the "nfs_export"
+feature is enabled, an overlay filesystem may be exported to NFS.
+
+With the "nfs_export" feature, on copy_up of any lower object, an index
+entry is created under the index directory.  The index entry name is the
+hexadecimal representation of the copy up origin file handle.  For a
+non-directory object, the index entry is a hard link to the upper inode.
+For a directory object, the index entry has an extended attribute
+"trusted.overlay.upper" with an encoded file handle of the upper
+directory inode.
+
+When encoding a file handle from an overlay filesystem object, the
+following rules apply:
+
+1. For a non-upper object, encode a lower file handle from lower inode
+2. For an indexed object, encode a lower file handle from copy_up origin
+3. For a pure-upper object and for an existing non-indexed upper object,
+   encode an upper file handle from upper inode
+
+The encoded overlay file handle includes:
+ - Header including path type information (e.g. lower/upper)
+ - UUID of the underlying filesystem
+ - Underlying filesystem encoding of underlying inode
+
+This encoding format is identical to the encoding format file handles that
+are stored in extended attribute "trusted.overlay.origin".
+
+When decoding an overlay file handle, the following steps are followed:
+
+1. Find underlying layer by UUID and path type information.
+2. Decode the underlying filesystem file handle to underlying dentry.
+3. For a lower file handle, lookup the handle in index directory by name.
+4. If a whiteout is found in index, return ESTALE. This represents an
+   overlay object that was deleted after its file handle was encoded.
+5. For a non-directory, instantiate a disconnected overlay dentry from the
+   decoded underlying dentry, the path type and index inode, if found.
+6. For a directory, use the connected underlying decoded dentry, path type
+   and index, to lookup a connected overlay dentry.
+
+Decoding a non-directory file handle may return a disconnected dentry.
+copy_up of that disconnected dentry will create an upper index entry with
+no upper alias.
+
+When overlay filesystem has multiple lower layers, a middle layer
+directory may have a "redirect" to lower directory.  Because middle layer
+"redirects" are not indexed, a lower file handle that was encoded from the
+"redirect" origin directory, cannot be used to find the middle or upper
+layer directory.  Similarly, a lower file handle that was encoded from a
+descendant of the "redirect" origin directory, cannot be used to
+reconstruct a connected overlay path.  To mitigate the cases of
+directories that cannot be decoded from a lower file handle, these
+directories are copied up on encode and encoded as an upper file handle.
+On an overlay filesystem with no upper layer this mitigation cannot be
+used NFS export in this setup requires turning off redirect follow (e.g.
+"redirect_dir=nofollow").
+
+The overlay filesystem does not support non-directory connectable file
+handles, so exporting with the 'subtree_check' exportfs configuration will
+cause failures to lookup files over NFS.
+
+When the NFS export feature is enabled, all directory index entries are
+verified on mount time to check that upper file handles are not stale.
+This verification may cause significant overhead in some cases.
+
+
+Testsuite
+---------
+
+There's a testsuite originally developed by David Howells and currently
+maintained by Amir Goldstein at:
+
+  https://github.com/amir73il/unionmount-testsuite.git
+
+Run as root:
+
+  # cd unionmount-testsuite
+  # ./run --ov --verify
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
deleted file mode 100644
index 845d689e0fd7..000000000000
--- a/Documentation/filesystems/overlayfs.txt
+++ /dev/null
@@ -1,495 +0,0 @@
-Written by: Neil Brown
-Please see MAINTAINERS file for where to send questions.
-
-Overlay Filesystem
-==================
-
-This document describes a prototype for a new approach to providing
-overlay-filesystem functionality in Linux (sometimes referred to as
-union-filesystems).  An overlay-filesystem tries to present a
-filesystem which is the result over overlaying one filesystem on top
-of the other.
-
-
-Overlay objects
----------------
-
-The overlay filesystem approach is 'hybrid', because the objects that
-appear in the filesystem do not always appear to belong to that filesystem.
-In many cases, an object accessed in the union will be indistinguishable
-from accessing the corresponding object from the original filesystem.
-This is most obvious from the 'st_dev' field returned by stat(2).
-
-While directories will report an st_dev from the overlay-filesystem,
-non-directory objects may report an st_dev from the lower filesystem or
-upper filesystem that is providing the object.  Similarly st_ino will
-only be unique when combined with st_dev, and both of these can change
-over the lifetime of a non-directory object.  Many applications and
-tools ignore these values and will not be affected.
-
-In the special case of all overlay layers on the same underlying
-filesystem, all objects will report an st_dev from the overlay
-filesystem and st_ino from the underlying filesystem.  This will
-make the overlay mount more compliant with filesystem scanners and
-overlay objects will be distinguishable from the corresponding
-objects in the original filesystem.
-
-On 64bit systems, even if all overlay layers are not on the same
-underlying filesystem, the same compliant behavior could be achieved
-with the "xino" feature.  The "xino" feature composes a unique object
-identifier from the real object st_ino and an underlying fsid index.
-If all underlying filesystems support NFS file handles and export file
-handles with 32bit inode number encoding (e.g. ext4), overlay filesystem
-will use the high inode number bits for fsid.  Even when the underlying
-filesystem uses 64bit inode numbers, users can still enable the "xino"
-feature with the "-o xino=on" overlay mount option.  That is useful for the
-case of underlying filesystems like xfs and tmpfs, which use 64bit inode
-numbers, but are very unlikely to use the high inode number bit.
-
-
-Upper and Lower
----------------
-
-An overlay filesystem combines two filesystems - an 'upper' filesystem
-and a 'lower' filesystem.  When a name exists in both filesystems, the
-object in the 'upper' filesystem is visible while the object in the
-'lower' filesystem is either hidden or, in the case of directories,
-merged with the 'upper' object.
-
-It would be more correct to refer to an upper and lower 'directory
-tree' rather than 'filesystem' as it is quite possible for both
-directory trees to be in the same filesystem and there is no
-requirement that the root of a filesystem be given for either upper or
-lower.
-
-The lower filesystem can be any filesystem supported by Linux and does
-not need to be writable.  The lower filesystem can even be another
-overlayfs.  The upper filesystem will normally be writable and if it
-is it must support the creation of trusted.* extended attributes, and
-must provide valid d_type in readdir responses, so NFS is not suitable.
-
-A read-only overlay of two read-only filesystems may use any
-filesystem type.
-
-Directories
------------
-
-Overlaying mainly involves directories.  If a given name appears in both
-upper and lower filesystems and refers to a non-directory in either,
-then the lower object is hidden - the name refers only to the upper
-object.
-
-Where both upper and lower objects are directories, a merged directory
-is formed.
-
-At mount time, the two directories given as mount options "lowerdir" and
-"upperdir" are combined into a merged directory:
-
-  mount -t overlay overlay -olowerdir=/lower,upperdir=/upper,\
-  workdir=/work /merged
-
-The "workdir" needs to be an empty directory on the same filesystem
-as upperdir.
-
-Then whenever a lookup is requested in such a merged directory, the
-lookup is performed in each actual directory and the combined result
-is cached in the dentry belonging to the overlay filesystem.  If both
-actual lookups find directories, both are stored and a merged
-directory is created, otherwise only one is stored: the upper if it
-exists, else the lower.
-
-Only the lists of names from directories are merged.  Other content
-such as metadata and extended attributes are reported for the upper
-directory only.  These attributes of the lower directory are hidden.
-
-whiteouts and opaque directories
---------------------------------
-
-In order to support rm and rmdir without changing the lower
-filesystem, an overlay filesystem needs to record in the upper filesystem
-that files have been removed.  This is done using whiteouts and opaque
-directories (non-directories are always opaque).
-
-A whiteout is created as a character device with 0/0 device number.
-When a whiteout is found in the upper level of a merged directory, any
-matching name in the lower level is ignored, and the whiteout itself
-is also hidden.
-
-A directory is made opaque by setting the xattr "trusted.overlay.opaque"
-to "y".  Where the upper filesystem contains an opaque directory, any
-directory in the lower filesystem with the same name is ignored.
-
-readdir
--------
-
-When a 'readdir' request is made on a merged directory, the upper and
-lower directories are each read and the name lists merged in the
-obvious way (upper is read first, then lower - entries that already
-exist are not re-added).  This merged name list is cached in the
-'struct file' and so remains as long as the file is kept open.  If the
-directory is opened and read by two processes at the same time, they
-will each have separate caches.  A seekdir to the start of the
-directory (offset 0) followed by a readdir will cause the cache to be
-discarded and rebuilt.
-
-This means that changes to the merged directory do not appear while a
-directory is being read.  This is unlikely to be noticed by many
-programs.
-
-seek offsets are assigned sequentially when the directories are read.
-Thus if
-
-  - read part of a directory
-  - remember an offset, and close the directory
-  - re-open the directory some time later
-  - seek to the remembered offset
-
-there may be little correlation between the old and new locations in
-the list of filenames, particularly if anything has changed in the
-directory.
-
-Readdir on directories that are not merged is simply handled by the
-underlying directory (upper or lower).
-
-renaming directories
---------------------
-
-When renaming a directory that is on the lower layer or merged (i.e. the
-directory was not created on the upper layer to start with) overlayfs can
-handle it in two different ways:
-
-1. return EXDEV error: this error is returned by rename(2) when trying to
-   move a file or directory across filesystem boundaries.  Hence
-   applications are usually prepared to hande this error (mv(1) for example
-   recursively copies the directory tree).  This is the default behavior.
-
-2. If the "redirect_dir" feature is enabled, then the directory will be
-   copied up (but not the contents).  Then the "trusted.overlay.redirect"
-   extended attribute is set to the path of the original location from the
-   root of the overlay.  Finally the directory is moved to the new
-   location.
-
-There are several ways to tune the "redirect_dir" feature.
-
-Kernel config options:
-
-- OVERLAY_FS_REDIRECT_DIR:
-    If this is enabled, then redirect_dir is turned on by  default.
-- OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW:
-    If this is enabled, then redirects are always followed by default. Enabling
-    this results in a less secure configuration.  Enable this option only when
-    worried about backward compatibility with kernels that have the redirect_dir
-    feature and follow redirects even if turned off.
-
-Module options (can also be changed through /sys/module/overlay/parameters/*):
-
-- "redirect_dir=BOOL":
-    See OVERLAY_FS_REDIRECT_DIR kernel config option above.
-- "redirect_always_follow=BOOL":
-    See OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW kernel config option above.
-- "redirect_max=NUM":
-    The maximum number of bytes in an absolute redirect (default is 256).
-
-Mount options:
-
-- "redirect_dir=on":
-    Redirects are enabled.
-- "redirect_dir=follow":
-    Redirects are not created, but followed.
-- "redirect_dir=off":
-    Redirects are not created and only followed if "redirect_always_follow"
-    feature is enabled in the kernel/module config.
-- "redirect_dir=nofollow":
-    Redirects are not created and not followed (equivalent to "redirect_dir=off"
-    if "redirect_always_follow" feature is not enabled).
-
-When the NFS export feature is enabled, every copied up directory is
-indexed by the file handle of the lower inode and a file handle of the
-upper directory is stored in a "trusted.overlay.upper" extended attribute
-on the index entry.  On lookup of a merged directory, if the upper
-directory does not match the file handle stores in the index, that is an
-indication that multiple upper directories may be redirected to the same
-lower directory.  In that case, lookup returns an error and warns about
-a possible inconsistency.
-
-Because lower layer redirects cannot be verified with the index, enabling
-NFS export support on an overlay filesystem with no upper layer requires
-turning off redirect follow (e.g. "redirect_dir=nofollow").
-
-
-Non-directories
----------------
-
-Objects that are not directories (files, symlinks, device-special
-files etc.) are presented either from the upper or lower filesystem as
-appropriate.  When a file in the lower filesystem is accessed in a way
-the requires write-access, such as opening for write access, changing
-some metadata etc., the file is first copied from the lower filesystem
-to the upper filesystem (copy_up).  Note that creating a hard-link
-also requires copy_up, though of course creation of a symlink does
-not.
-
-The copy_up may turn out to be unnecessary, for example if the file is
-opened for read-write but the data is not modified.
-
-The copy_up process first makes sure that the containing directory
-exists in the upper filesystem - creating it and any parents as
-necessary.  It then creates the object with the same metadata (owner,
-mode, mtime, symlink-target etc.) and then if the object is a file, the
-data is copied from the lower to the upper filesystem.  Finally any
-extended attributes are copied up.
-
-Once the copy_up is complete, the overlay filesystem simply
-provides direct access to the newly created file in the upper
-filesystem - future operations on the file are barely noticed by the
-overlay filesystem (though an operation on the name of the file such as
-rename or unlink will of course be noticed and handled).
-
-
-Multiple lower layers
----------------------
-
-Multiple lower layers can now be given using the the colon (":") as a
-separator character between the directory names.  For example:
-
-  mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged
-
-As the example shows, "upperdir=" and "workdir=" may be omitted.  In
-that case the overlay will be read-only.
-
-The specified lower directories will be stacked beginning from the
-rightmost one and going left.  In the above example lower1 will be the
-top, lower2 the middle and lower3 the bottom layer.
-
-
-Metadata only copy up
---------------------
-
-When metadata only copy up feature is enabled, overlayfs will only copy
-up metadata (as opposed to whole file), when a metadata specific operation
-like chown/chmod is performed. Full file will be copied up later when
-file is opened for WRITE operation.
-
-In other words, this is delayed data copy up operation and data is copied
-up when there is a need to actually modify data.
-
-There are multiple ways to enable/disable this feature. A config option
-CONFIG_OVERLAY_FS_METACOPY can be set/unset to enable/disable this feature
-by default. Or one can enable/disable it at module load time with module
-parameter metacopy=on/off. Lastly, there is also a per mount option
-metacopy=on/off to enable/disable this feature per mount.
-
-Do not use metacopy=on with untrusted upper/lower directories. Otherwise
-it is possible that an attacker can create a handcrafted file with
-appropriate REDIRECT and METACOPY xattrs, and gain access to file on lower
-pointed by REDIRECT. This should not be possible on local system as setting
-"trusted." xattrs will require CAP_SYS_ADMIN. But it should be possible
-for untrusted layers like from a pen drive.
-
-Note: redirect_dir={off|nofollow|follow(*)} conflicts with metacopy=on, and
-results in an error.
-
-(*) redirect_dir=follow only conflicts with metacopy=on if upperdir=... is
-given.
-
-Sharing and copying layers
---------------------------
-
-Lower layers may be shared among several overlay mounts and that is indeed
-a very common practice.  An overlay mount may use the same lower layer
-path as another overlay mount and it may use a lower layer path that is
-beneath or above the path of another overlay lower layer path.
-
-Using an upper layer path and/or a workdir path that are already used by
-another overlay mount is not allowed and may fail with EBUSY.  Using
-partially overlapping paths is not allowed and may fail with EBUSY.
-If files are accessed from two overlayfs mounts which share or overlap the
-upper layer and/or workdir path the behavior of the overlay is undefined,
-though it will not result in a crash or deadlock.
-
-Mounting an overlay using an upper layer path, where the upper layer path
-was previously used by another mounted overlay in combination with a
-different lower layer path, is allowed, unless the "inodes index" feature
-or "metadata only copy up" feature is enabled.
-
-With the "inodes index" feature, on the first time mount, an NFS file
-handle of the lower layer root directory, along with the UUID of the lower
-filesystem, are encoded and stored in the "trusted.overlay.origin" extended
-attribute on the upper layer root directory.  On subsequent mount attempts,
-the lower root directory file handle and lower filesystem UUID are compared
-to the stored origin in upper root directory.  On failure to verify the
-lower root origin, mount will fail with ESTALE.  An overlayfs mount with
-"inodes index" enabled will fail with EOPNOTSUPP if the lower filesystem
-does not support NFS export, lower filesystem does not have a valid UUID or
-if the upper filesystem does not support extended attributes.
-
-For "metadata only copy up" feature there is no verification mechanism at
-mount time. So if same upper is mounted with different set of lower, mount
-probably will succeed but expect the unexpected later on. So don't do it.
-
-It is quite a common practice to copy overlay layers to a different
-directory tree on the same or different underlying filesystem, and even
-to a different machine.  With the "inodes index" feature, trying to mount
-the copied layers will fail the verification of the lower root file handle.
-
-
-Non-standard behavior
----------------------
-
-Current version of overlayfs can act as a mostly POSIX compliant
-filesystem.
-
-This is the list of cases that overlayfs doesn't currently handle:
-
-a) POSIX mandates updating st_atime for reads.  This is currently not
-done in the case when the file resides on a lower layer.
-
-b) If a file residing on a lower layer is opened for read-only and then
-memory mapped with MAP_SHARED, then subsequent changes to the file are not
-reflected in the memory mapping.
-
-The following options allow overlayfs to act more like a standards
-compliant filesystem:
-
-1) "redirect_dir"
-
-Enabled with the mount option or module option: "redirect_dir=on" or with
-the kernel config option CONFIG_OVERLAY_FS_REDIRECT_DIR=y.
-
-If this feature is disabled, then rename(2) on a lower or merged directory
-will fail with EXDEV ("Invalid cross-device link").
-
-2) "inode index"
-
-Enabled with the mount option or module option "index=on" or with the
-kernel config option CONFIG_OVERLAY_FS_INDEX=y.
-
-If this feature is disabled and a file with multiple hard links is copied
-up, then this will "break" the link.  Changes will not be propagated to
-other names referring to the same inode.
-
-3) "xino"
-
-Enabled with the mount option "xino=auto" or "xino=on", with the module
-option "xino_auto=on" or with the kernel config option
-CONFIG_OVERLAY_FS_XINO_AUTO=y.  Also implicitly enabled by using the same
-underlying filesystem for all layers making up the overlay.
-
-If this feature is disabled or the underlying filesystem doesn't have
-enough free bits in the inode number, then overlayfs will not be able to
-guarantee that the values of st_ino and st_dev returned by stat(2) and the
-value of d_ino returned by readdir(3) will act like on a normal filesystem.
-E.g. the value of st_dev may be different for two objects in the same
-overlay filesystem and the value of st_ino for directory objects may not be
-persistent and could change even while the overlay filesystem is mounted.
-
-
-Changes to underlying filesystems
----------------------------------
-
-Offline changes, when the overlay is not mounted, are allowed to either
-the upper or the lower trees.
-
-Changes to the underlying filesystems while part of a mounted overlay
-filesystem are not allowed.  If the underlying filesystem is changed,
-the behavior of the overlay is undefined, though it will not result in
-a crash or deadlock.
-
-When the overlay NFS export feature is enabled, overlay filesystems
-behavior on offline changes of the underlying lower layer is different
-than the behavior when NFS export is disabled.
-
-On every copy_up, an NFS file handle of the lower inode, along with the
-UUID of the lower filesystem, are encoded and stored in an extended
-attribute "trusted.overlay.origin" on the upper inode.
-
-When the NFS export feature is enabled, a lookup of a merged directory,
-that found a lower directory at the lookup path or at the path pointed
-to by the "trusted.overlay.redirect" extended attribute, will verify
-that the found lower directory file handle and lower filesystem UUID
-match the origin file handle that was stored at copy_up time.  If a
-found lower directory does not match the stored origin, that directory
-will not be merged with the upper directory.
-
-
-
-NFS export
-----------
-
-When the underlying filesystems supports NFS export and the "nfs_export"
-feature is enabled, an overlay filesystem may be exported to NFS.
-
-With the "nfs_export" feature, on copy_up of any lower object, an index
-entry is created under the index directory.  The index entry name is the
-hexadecimal representation of the copy up origin file handle.  For a
-non-directory object, the index entry is a hard link to the upper inode.
-For a directory object, the index entry has an extended attribute
-"trusted.overlay.upper" with an encoded file handle of the upper
-directory inode.
-
-When encoding a file handle from an overlay filesystem object, the
-following rules apply:
-
-1. For a non-upper object, encode a lower file handle from lower inode
-2. For an indexed object, encode a lower file handle from copy_up origin
-3. For a pure-upper object and for an existing non-indexed upper object,
-   encode an upper file handle from upper inode
-
-The encoded overlay file handle includes:
- - Header including path type information (e.g. lower/upper)
- - UUID of the underlying filesystem
- - Underlying filesystem encoding of underlying inode
-
-This encoding format is identical to the encoding format file handles that
-are stored in extended attribute "trusted.overlay.origin".
-
-When decoding an overlay file handle, the following steps are followed:
-
-1. Find underlying layer by UUID and path type information.
-2. Decode the underlying filesystem file handle to underlying dentry.
-3. For a lower file handle, lookup the handle in index directory by name.
-4. If a whiteout is found in index, return ESTALE. This represents an
-   overlay object that was deleted after its file handle was encoded.
-5. For a non-directory, instantiate a disconnected overlay dentry from the
-   decoded underlying dentry, the path type and index inode, if found.
-6. For a directory, use the connected underlying decoded dentry, path type
-   and index, to lookup a connected overlay dentry.
-
-Decoding a non-directory file handle may return a disconnected dentry.
-copy_up of that disconnected dentry will create an upper index entry with
-no upper alias.
-
-When overlay filesystem has multiple lower layers, a middle layer
-directory may have a "redirect" to lower directory.  Because middle layer
-"redirects" are not indexed, a lower file handle that was encoded from the
-"redirect" origin directory, cannot be used to find the middle or upper
-layer directory.  Similarly, a lower file handle that was encoded from a
-descendant of the "redirect" origin directory, cannot be used to
-reconstruct a connected overlay path.  To mitigate the cases of
-directories that cannot be decoded from a lower file handle, these
-directories are copied up on encode and encoded as an upper file handle.
-On an overlay filesystem with no upper layer this mitigation cannot be
-used NFS export in this setup requires turning off redirect follow (e.g.
-"redirect_dir=nofollow").
-
-The overlay filesystem does not support non-directory connectable file
-handles, so exporting with the 'subtree_check' exportfs configuration will
-cause failures to lookup files over NFS.
-
-When the NFS export feature is enabled, all directory index entries are
-verified on mount time to check that upper file handles are not stale.
-This verification may cause significant overhead in some cases.
-
-
-Testsuite
----------
-
-There's a testsuite originally developed by David Howells and currently
-maintained by Amir Goldstein at:
-
-  https://github.com/amir73il/unionmount-testsuite.git
-
-Run as root:
-
-  # cd unionmount-testsuite
-  # ./run --ov --verify
diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index 434a07b0002b..a3216979298b 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -13,6 +13,7 @@ It has subsequently been updated to reflect changes in the kernel
 including:
 
 - per-directory parallel name lookup.
+- ``openat2()`` resolution restriction flags.
 
 Introduction to pathname lookup
 ===============================
@@ -235,6 +236,13 @@ renamed.  If ``d_lookup`` finds that a rename happened while it
 unsuccessfully scanned a chain in the hash table, it simply tries
 again.
 
+``rename_lock`` is also used to detect and defend against potential attacks
+against ``LOOKUP_BENEATH`` and ``LOOKUP_IN_ROOT`` when resolving ".." (where
+the parent directory is moved outside the root, bypassing the ``path_equal()``
+check). If ``rename_lock`` is updated during the lookup and the path encounters
+a "..", a potential attack occurred and ``handle_dots()`` will bail out with
+``-EAGAIN``.
+
 inode->i_rwsem
 ~~~~~~~~~~~~~~
 
@@ -348,6 +356,13 @@ any changes to any mount points while stepping up.  This locking is
 needed to stabilize the link to the mounted-on dentry, which the
 refcount on the mount itself doesn't ensure.
 
+``mount_lock`` is also used to detect and defend against potential attacks
+against ``LOOKUP_BENEATH`` and ``LOOKUP_IN_ROOT`` when resolving ".." (where
+the parent directory is moved outside the root, bypassing the ``path_equal()``
+check). If ``mount_lock`` is updated during the lookup and the path encounters
+a "..", a potential attack occurred and ``handle_dots()`` will bail out with
+``-EAGAIN``.
+
 RCU
 ~~~
 
@@ -405,6 +420,10 @@ is requested.  Keeping a reference in the ``nameidata`` ensures that
 only one root is in effect for the entire path walk, even if it races
 with a ``chroot()`` system call.
 
+It should be noted that in the case of ``LOOKUP_IN_ROOT`` or
+``LOOKUP_BENEATH``, the effective root becomes the directory file descriptor
+passed to ``openat2()`` (which exposes these ``LOOKUP_`` flags).
+
 The root is needed when either of two conditions holds: (1) either the
 pathname or a symbolic link starts with a "'/'", or (2) a "``..``"
 component is being handled, since "``..``" from the root must always stay
@@ -1149,7 +1168,7 @@ so ``NULL`` is returned to indicate that the symlink can be released and
 the stack frame discarded.
 
 The other case involves things in ``/proc`` that look like symlinks but
-aren't really::
+aren't really (and are therefore commonly referred to as "magic-links")::
 
      $ ls -l /proc/self/fd/1
      lrwx------ 1 neilb neilb 64 Jun 13 10:19 /proc/self/fd/1 -> /dev/pts/4
@@ -1286,7 +1305,9 @@ A few flags
 A suitable way to wrap up this tour of pathname walking is to list
 the various flags that can be stored in the ``nameidata`` to guide the
 lookup process.  Many of these are only meaningful on the final
-component, others reflect the current state of the pathname lookup.
+component, others reflect the current state of the pathname lookup, and some
+apply restrictions to all path components encountered in the path lookup.
+
 And then there is ``LOOKUP_EMPTY``, which doesn't fit conceptually with
 the others.  If this is not set, an empty pathname causes an error
 very early on.  If it is set, empty pathnames are not considered to be
@@ -1310,13 +1331,48 @@ longer needed.
 ``LOOKUP_JUMPED`` means that the current dentry was chosen not because
 it had the right name but for some other reason.  This happens when
 following "``..``", following a symlink to ``/``, crossing a mount point
-or accessing a "``/proc/$PID/fd/$FD``" symlink.  In this case the
-filesystem has not been asked to revalidate the name (with
-``d_revalidate()``).  In such cases the inode may still need to be
-revalidated, so ``d_op->d_weak_revalidate()`` is called if
+or accessing a "``/proc/$PID/fd/$FD``" symlink (also known as a "magic
+link"). In this case the filesystem has not been asked to revalidate the
+name (with ``d_revalidate()``).  In such cases the inode may still need
+to be revalidated, so ``d_op->d_weak_revalidate()`` is called if
 ``LOOKUP_JUMPED`` is set when the look completes - which may be at the
 final component or, when creating, unlinking, or renaming, at the penultimate component.
 
+Resolution-restriction flags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In order to allow userspace to protect itself against certain race conditions
+and attack scenarios involving changing path components, a series of flags are
+available which apply restrictions to all path components encountered during
+path lookup. These flags are exposed through ``openat2()``'s ``resolve`` field.
+
+``LOOKUP_NO_SYMLINKS`` blocks all symlink traversals (including magic-links).
+This is distinctly different from ``LOOKUP_FOLLOW``, because the latter only
+relates to restricting the following of trailing symlinks.
+
+``LOOKUP_NO_MAGICLINKS`` blocks all magic-link traversals. Filesystems must
+ensure that they return errors from ``nd_jump_link()``, because that is how
+``LOOKUP_NO_MAGICLINKS`` and other magic-link restrictions are implemented.
+
+``LOOKUP_NO_XDEV`` blocks all ``vfsmount`` traversals (this includes both
+bind-mounts and ordinary mounts). Note that the ``vfsmount`` which contains the
+lookup is determined by the first mountpoint the path lookup reaches --
+absolute paths start with the ``vfsmount`` of ``/``, and relative paths start
+with the ``dfd``'s ``vfsmount``. Magic-links are only permitted if the
+``vfsmount`` of the path is unchanged.
+
+``LOOKUP_BENEATH`` blocks any path components which resolve outside the
+starting point of the resolution. This is done by blocking ``nd_jump_root()``
+as well as blocking ".." if it would jump outside the starting point.
+``rename_lock`` and ``mount_lock`` are used to detect attacks against the
+resolution of "..". Magic-links are also blocked.
+
+``LOOKUP_IN_ROOT`` resolves all path components as though the starting point
+were the filesystem root. ``nd_jump_root()`` brings the resolution back to to
+the starting point, and ".." at the starting point will act as a no-op. As with
+``LOOKUP_BENEATH``, ``rename_lock`` and ``mount_lock`` are used to detect
+attacks against ".." resolution. Magic-links are also blocked.
+
 Final-component flags
 ~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/Documentation/filesystems/vfat.rst b/Documentation/filesystems/vfat.rst
new file mode 100644
index 000000000000..e85d74e91295
--- /dev/null
+++ b/Documentation/filesystems/vfat.rst
@@ -0,0 +1,387 @@
+====
+VFAT
+====
+
+USING VFAT
+==========
+
+To use the vfat filesystem, use the filesystem type 'vfat'.  i.e.::
+
+  mount -t vfat /dev/fd0 /mnt
+
+
+No special partition formatter is required,
+'mkdosfs' will work fine if you want to format from within Linux.
+
+VFAT MOUNT OPTIONS
+==================
+
+**uid=###**
+	Set the owner of all files on this filesystem.
+	The default is the uid of current process.
+
+**gid=###**
+	Set the group of all files on this filesystem.
+	The default is the gid of current process.
+
+**umask=###**
+	The permission mask (for files and directories, see *umask(1)*).
+	The default is the umask of current process.
+
+**dmask=###**
+	The permission mask for the directory.
+	The default is the umask of current process.
+
+**fmask=###**
+	The permission mask for files.
+	The default is the umask of current process.
+
+**allow_utime=###**
+	This option controls the permission check of mtime/atime.
+
+		**-20**: If current process is in group of file's group ID,
+                you can change timestamp.
+
+		**-2**: Other users can change timestamp.
+
+	The default is set from dmask option. If the directory is
+	writable, utime(2) is also allowed. i.e. ~dmask & 022.
+
+	Normally utime(2) checks current process is owner of
+	the file, or it has CAP_FOWNER capability. But FAT
+	filesystem doesn't have uid/gid on disk, so normal
+	check is too unflexible. With this option you can
+	relax it.
+
+**codepage=###**
+	Sets the codepage number for converting to shortname
+	characters on FAT filesystem.
+	By default, FAT_DEFAULT_CODEPAGE setting is used.
+
+**iocharset=<name>**
+	Character set to use for converting between the
+	encoding is used for user visible filename and 16 bit
+	Unicode characters. Long filenames are stored on disk
+	in Unicode format, but Unix for the most part doesn't
+	know how to deal with Unicode.
+	By default, FAT_DEFAULT_IOCHARSET setting is used.
+
+	There is also an option of doing UTF-8 translations
+	with the utf8 option.
+
+.. note:: ``iocharset=utf8`` is not recommended. If unsure, you should consider
+	  the utf8 option instead.
+
+**utf8=<bool>**
+	UTF-8 is the filesystem safe version of Unicode that
+	is used by the console. It can be enabled or disabled
+	for the filesystem with this option.
+	If 'uni_xlate' gets set, UTF-8 gets disabled.
+	By default, FAT_DEFAULT_UTF8 setting is used.
+
+**uni_xlate=<bool>**
+	Translate unhandled Unicode characters to special
+	escaped sequences.  This would let you backup and
+	restore filenames that are created with any Unicode
+	characters.  Until Linux supports Unicode for real,
+	this gives you an alternative.  Without this option,
+	a '?' is used when no translation is possible.  The
+	escape character is ':' because it is otherwise
+	illegal on the vfat filesystem.  The escape sequence
+	that gets used is ':' and the four digits of hexadecimal
+	unicode.
+
+**nonumtail=<bool>**
+	When creating 8.3 aliases, normally the alias will
+	end in '~1' or tilde followed by some number.  If this
+	option is set, then if the filename is
+	"longfilename.txt" and "longfile.txt" does not
+	currently exist in the directory, longfile.txt will
+	be the short alias instead of longfi~1.txt.
+
+**usefree**
+	Use the "free clusters" value stored on FSINFO. It will
+	be used to determine number of free clusters without
+	scanning disk. But it's not used by default, because
+	recent Windows don't update it correctly in some
+	case. If you are sure the "free clusters" on FSINFO is
+	correct, by this option you can avoid scanning disk.
+
+**quiet**
+	Stops printing certain warning messages.
+
+**check=s|r|n**
+	Case sensitivity checking setting.
+
+	**s**: strict, case sensitive
+
+	**r**: relaxed, case insensitive
+
+	**n**: normal, default setting, currently case insensitive
+
+**nocase**
+	This was deprecated for vfat. Use ``shortname=win95`` instead.
+
+**shortname=lower|win95|winnt|mixed**
+	Shortname display/create setting.
+
+	**lower**: convert to lowercase for display,
+	emulate the Windows 95 rule for create.
+
+	**win95**: emulate the Windows 95 rule for display/create.
+
+	**winnt**: emulate the Windows NT rule for display/create.
+
+	**mixed**: emulate the Windows NT rule for display,
+	emulate the Windows 95 rule for create.
+
+	Default setting is `mixed`.
+
+**tz=UTC**
+	Interpret timestamps as UTC rather than local time.
+	This option disables the conversion of timestamps
+	between local time (as used by Windows on FAT) and UTC
+	(which Linux uses internally).  This is particularly
+	useful when mounting devices (like digital cameras)
+	that are set to UTC in order to avoid the pitfalls of
+	local time.
+
+**time_offset=minutes**
+	Set offset for conversion of timestamps from local time
+	used by FAT to UTC. I.e. <minutes> minutes will be subtracted
+	from each timestamp to convert it to UTC used internally by
+	Linux. This is useful when time zone set in ``sys_tz`` is
+	not the time zone used by the filesystem. Note that this
+	option still does not provide correct time stamps in all
+	cases in presence of DST - time stamps in a different DST
+	setting will be off by one hour.
+
+**showexec**
+	If set, the execute permission bits of the file will be
+	allowed only if the extension part of the name is .EXE,
+	.COM, or .BAT. Not set by default.
+
+**debug**
+	Can be set, but unused by the current implementation.
+
+**sys_immutable**
+	If set, ATTR_SYS attribute on FAT is handled as
+	IMMUTABLE flag on Linux. Not set by default.
+
+**flush**
+	If set, the filesystem will try to flush to disk more
+	early than normal. Not set by default.
+
+**rodir**
+	FAT has the ATTR_RO (read-only) attribute. On Windows,
+	the ATTR_RO of the directory will just be ignored,
+	and is used only by applications as a flag (e.g. it's set
+	for the customized folder).
+
+	If you want to use ATTR_RO as read-only flag even for
+	the directory, set this option.
+
+**errors=panic|continue|remount-ro**
+	specify FAT behavior on critical errors: panic, continue
+	without doing anything or remount the partition in
+	read-only mode (default behavior).
+
+**discard**
+	If set, issues discard/TRIM commands to the block
+	device when blocks are freed. This is useful for SSD devices
+	and sparse/thinly-provisoned LUNs.
+
+**nfs=stale_rw|nostale_ro**
+	Enable this only if you want to export the FAT filesystem
+	over NFS.
+
+		**stale_rw**: This option maintains an index (cache) of directory
+		*inodes* by *i_logstart* which is used by the nfs-related code to
+		improve look-ups. Full file operations (read/write) over NFS is
+		supported but with cache eviction at NFS server, this could
+		result in ESTALE issues.
+
+		**nostale_ro**: This option bases the *inode* number and filehandle
+		on the on-disk location of a file in the MS-DOS directory entry.
+		This ensures that ESTALE will not be returned after a file is
+		evicted from the inode cache. However, it means that operations
+		such as rename, create and unlink could cause filehandles that
+		previously pointed at one file to point at a different file,
+		potentially causing data corruption. For this reason, this
+		option also mounts the filesystem readonly.
+
+	To maintain backward compatibility, ``'-o nfs'`` is also accepted,
+	defaulting to "stale_rw".
+
+**dos1xfloppy  <bool>: 0,1,yes,no,true,false**
+	If set, use a fallback default BIOS Parameter Block
+	configuration, determined by backing device size. These static
+	parameters match defaults assumed by DOS 1.x for 160 kiB,
+	180 kiB, 320 kiB, and 360 kiB floppies and floppy images.
+
+
+
+LIMITATION
+==========
+
+The fallocated region of file is discarded at umount/evict time
+when using fallocate with FALLOC_FL_KEEP_SIZE.
+So, User should assume that fallocated region can be discarded at
+last close if there is memory pressure resulting in eviction of
+the inode from the memory. As a result, for any dependency on
+the fallocated region, user should make sure to recheck fallocate
+after reopening the file.
+
+TODO
+====
+Need to get rid of the raw scanning stuff.  Instead, always use
+a get next directory entry approach.  The only thing left that uses
+raw scanning is the directory renaming code.
+
+
+POSSIBLE PROBLEMS
+=================
+
+- vfat_valid_longname does not properly checked reserved names.
+- When a volume name is the same as a directory name in the root
+  directory of the filesystem, the directory name sometimes shows
+  up as an empty file.
+- autoconv option does not work correctly.
+
+
+TEST SUITE
+==========
+If you plan to make any modifications to the vfat filesystem, please
+get the test suite that comes with the vfat distribution at
+
+`<http://web.archive.org/web/*/http://bmrc.berkeley.edu/people/chaffee/vfat.html>`_
+
+This tests quite a few parts of the vfat filesystem and additional
+tests for new features or untested features would be appreciated.
+
+NOTES ON THE STRUCTURE OF THE VFAT FILESYSTEM
+=============================================
+This documentation was provided by Galen C. Hunt gchunt@cs.rochester.edu and
+lightly annotated by Gordon Chaffee.
+
+This document presents a very rough, technical overview of my
+knowledge of the extended FAT file system used in Windows NT 3.5 and
+Windows 95.  I don't guarantee that any of the following is correct,
+but it appears to be so.
+
+The extended FAT file system is almost identical to the FAT
+file system used in DOS versions up to and including *6.223410239847*
+:-).  The significant change has been the addition of long file names.
+These names support up to 255 characters including spaces and lower
+case characters as opposed to the traditional 8.3 short names.
+
+Here is the description of the traditional FAT entry in the current
+Windows 95 filesystem::
+
+        struct directory { // Short 8.3 names
+                unsigned char name[8];          // file name
+                unsigned char ext[3];           // file extension
+                unsigned char attr;             // attribute byte
+		unsigned char lcase;		// Case for base and extension
+		unsigned char ctime_ms;		// Creation time, milliseconds
+		unsigned char ctime[2];		// Creation time
+		unsigned char cdate[2];		// Creation date
+		unsigned char adate[2];		// Last access date
+		unsigned char reserved[2];	// reserved values (ignored)
+                unsigned char time[2];          // time stamp
+                unsigned char date[2];          // date stamp
+                unsigned char start[2];         // starting cluster number
+                unsigned char size[4];          // size of the file
+        };
+
+
+The lcase field specifies if the base and/or the extension of an 8.3
+name should be capitalized.  This field does not seem to be used by
+Windows 95 but it is used by Windows NT.  The case of filenames is not
+completely compatible from Windows NT to Windows 95.  It is not completely
+compatible in the reverse direction, however.  Filenames that fit in
+the 8.3 namespace and are written on Windows NT to be lowercase will
+show up as uppercase on Windows 95.
+
+.. note:: Note that the ``start`` and ``size`` values are actually little
+          endian integer values.  The descriptions of the fields in this
+          structure are public knowledge and can be found elsewhere.
+
+With the extended FAT system, Microsoft has inserted extra
+directory entries for any files with extended names.  (Any name which
+legally fits within the old 8.3 encoding scheme does not have extra
+entries.)  I call these extra entries slots.  Basically, a slot is a
+specially formatted directory entry which holds up to 13 characters of
+a file's extended name.  Think of slots as additional labeling for the
+directory entry of the file to which they correspond.  Microsoft
+prefers to refer to the 8.3 entry for a file as its alias and the
+extended slot directory entries as the file name.
+
+The C structure for a slot directory entry follows::
+
+        struct slot { // Up to 13 characters of a long name
+                unsigned char id;               // sequence number for slot
+                unsigned char name0_4[10];      // first 5 characters in name
+                unsigned char attr;             // attribute byte
+                unsigned char reserved;         // always 0
+                unsigned char alias_checksum;   // checksum for 8.3 alias
+                unsigned char name5_10[12];     // 6 more characters in name
+                unsigned char start[2];         // starting cluster number
+                unsigned char name11_12[4];     // last 2 characters in name
+        };
+
+
+If the layout of the slots looks a little odd, it's only
+because of Microsoft's efforts to maintain compatibility with old
+software.  The slots must be disguised to prevent old software from
+panicking.  To this end, a number of measures are taken:
+
+        1) The attribute byte for a slot directory entry is always set
+           to 0x0f.  This corresponds to an old directory entry with
+           attributes of "hidden", "system", "read-only", and "volume
+           label".  Most old software will ignore any directory
+           entries with the "volume label" bit set.  Real volume label
+           entries don't have the other three bits set.
+
+        2) The starting cluster is always set to 0, an impossible
+           value for a DOS file.
+
+Because the extended FAT system is backward compatible, it is
+possible for old software to modify directory entries.  Measures must
+be taken to ensure the validity of slots.  An extended FAT system can
+verify that a slot does in fact belong to an 8.3 directory entry by
+the following:
+
+        1) Positioning.  Slots for a file always immediately proceed
+           their corresponding 8.3 directory entry.  In addition, each
+           slot has an id which marks its order in the extended file
+           name.  Here is a very abbreviated view of an 8.3 directory
+           entry and its corresponding long name slots for the file
+           "My Big File.Extension which is long"::
+
+                <proceeding files...>
+                <slot #3, id = 0x43, characters = "h is long">
+                <slot #2, id = 0x02, characters = "xtension whic">
+                <slot #1, id = 0x01, characters = "My Big File.E">
+                <directory entry, name = "MYBIGFIL.EXT">
+
+
+           .. note:: Note that the slots are stored from last to first.  Slots
+		     are numbered from 1 to N.  The Nth slot is ``or'ed`` with
+		     0x40 to mark it as the last one.
+
+        2) Checksum.  Each slot has an alias_checksum value.  The
+           checksum is calculated from the 8.3 name using the
+           following algorithm::
+
+                for (sum = i = 0; i < 11; i++) {
+                        sum = (((sum&1)<<7)|((sum&0xfe)>>1)) + name[i]
+                }
+
+
+	3) If there is free space in the final slot, a Unicode ``NULL (0x0000)``
+	   is stored after the final character.  After that, all unused
+	   characters in the final slot are set to Unicode 0xFFFF.
+
+Finally, note that the extended name is stored in Unicode.  Each Unicode
+character takes either two or four bytes, UTF-16LE encoded.
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
deleted file mode 100644
index 91031298beb1..000000000000
--- a/Documentation/filesystems/vfat.txt
+++ /dev/null
@@ -1,347 +0,0 @@
-USING VFAT
-----------------------------------------------------------------------
-To use the vfat filesystem, use the filesystem type 'vfat'.  i.e.
-  mount -t vfat /dev/fd0 /mnt
-
-No special partition formatter is required.  mkdosfs will work fine
-if you want to format from within Linux.
-
-VFAT MOUNT OPTIONS
-----------------------------------------------------------------------
-uid=###       -- Set the owner of all files on this filesystem.
-		 The default is the uid of current process.
-
-gid=###       -- Set the group of all files on this filesystem.
-		 The default is the gid of current process.
-
-umask=###     -- The permission mask (for files and directories, see umask(1)).
-                 The default is the umask of current process.
-
-dmask=###     -- The permission mask for the directory.
-                 The default is the umask of current process.
-
-fmask=###     -- The permission mask for files.
-                 The default is the umask of current process.
-
-allow_utime=### -- This option controls the permission check of mtime/atime.
-
-                  20 - If current process is in group of file's group ID,
-                       you can change timestamp.
-                   2 - Other users can change timestamp.
-
-                 The default is set from `dmask' option. (If the directory is
-                 writable, utime(2) is also allowed. I.e. ~dmask & 022)
-
-                 Normally utime(2) checks current process is owner of
-                 the file, or it has CAP_FOWNER capability.  But FAT
-                 filesystem doesn't have uid/gid on disk, so normal
-                 check is too unflexible. With this option you can
-                 relax it.
-
-codepage=###  -- Sets the codepage number for converting to shortname
-		 characters on FAT filesystem.
-		 By default, FAT_DEFAULT_CODEPAGE setting is used.
-
-iocharset=<name> -- Character set to use for converting between the
-		 encoding is used for user visible filename and 16 bit
-		 Unicode characters. Long filenames are stored on disk
-		 in Unicode format, but Unix for the most part doesn't
-		 know how to deal with Unicode.
-		 By default, FAT_DEFAULT_IOCHARSET setting is used.
-
-		 There is also an option of doing UTF-8 translations
-		 with the utf8 option.
-
-		 NOTE: "iocharset=utf8" is not recommended. If unsure,
-		 you should consider the following option instead.
-
-utf8=<bool>   -- UTF-8 is the filesystem safe version of Unicode that
-		 is used by the console. It can be enabled or disabled
-		 for the filesystem with this option.
-		 If 'uni_xlate' gets set, UTF-8 gets disabled.
-		 By default, FAT_DEFAULT_UTF8 setting is used.
-
-uni_xlate=<bool> -- Translate unhandled Unicode characters to special
-		 escaped sequences.  This would let you backup and
-		 restore filenames that are created with any Unicode
-		 characters.  Until Linux supports Unicode for real,
-		 this gives you an alternative.  Without this option,
-		 a '?' is used when no translation is possible.  The
-		 escape character is ':' because it is otherwise
-		 illegal on the vfat filesystem.  The escape sequence
-		 that gets used is ':' and the four digits of hexadecimal
-		 unicode.
-
-nonumtail=<bool> -- When creating 8.3 aliases, normally the alias will
-                 end in '~1' or tilde followed by some number.  If this
-                 option is set, then if the filename is 
-                 "longfilename.txt" and "longfile.txt" does not
-                 currently exist in the directory, 'longfile.txt' will
-                 be the short alias instead of 'longfi~1.txt'. 
-                  
-usefree       -- Use the "free clusters" value stored on FSINFO. It'll
-                 be used to determine number of free clusters without
-                 scanning disk. But it's not used by default, because
-                 recent Windows don't update it correctly in some
-                 case. If you are sure the "free clusters" on FSINFO is
-                 correct, by this option you can avoid scanning disk.
-
-quiet         -- Stops printing certain warning messages.
-
-check=s|r|n   -- Case sensitivity checking setting.
-                 s: strict, case sensitive
-                 r: relaxed, case insensitive
-                 n: normal, default setting, currently case insensitive
-
-nocase        -- This was deprecated for vfat. Use shortname=win95 instead.
-
-shortname=lower|win95|winnt|mixed
-	      -- Shortname display/create setting.
-		 lower: convert to lowercase for display,
-			emulate the Windows 95 rule for create.
-		 win95: emulate the Windows 95 rule for display/create.
-		 winnt: emulate the Windows NT rule for display/create.
-		 mixed: emulate the Windows NT rule for display,
-			emulate the Windows 95 rule for create.
-		 Default setting is `mixed'.
-
-tz=UTC        -- Interpret timestamps as UTC rather than local time.
-                 This option disables the conversion of timestamps
-                 between local time (as used by Windows on FAT) and UTC
-                 (which Linux uses internally).  This is particularly
-                 useful when mounting devices (like digital cameras)
-                 that are set to UTC in order to avoid the pitfalls of
-                 local time.
-time_offset=minutes
-	      -- Set offset for conversion of timestamps from local time
-		 used by FAT to UTC. I.e. <minutes> minutes will be subtracted
-		 from each timestamp to convert it to UTC used internally by
-		 Linux. This is useful when time zone set in sys_tz is
-		 not the time zone used by the filesystem. Note that this
-		 option still does not provide correct time stamps in all
-		 cases in presence of DST - time stamps in a different DST
-		 setting will be off by one hour.
-
-showexec      -- If set, the execute permission bits of the file will be
-		 allowed only if the extension part of the name is .EXE,
-		 .COM, or .BAT. Not set by default.
-
-debug         -- Can be set, but unused by the current implementation.
-
-sys_immutable -- If set, ATTR_SYS attribute on FAT is handled as
-		 IMMUTABLE flag on Linux. Not set by default.
-
-flush         -- If set, the filesystem will try to flush to disk more
-		 early than normal. Not set by default.
-
-rodir	      -- FAT has the ATTR_RO (read-only) attribute. On Windows,
-		 the ATTR_RO of the directory will just be ignored,
-		 and is used only by applications as a flag (e.g. it's set
-		 for the customized folder).
-
-		 If you want to use ATTR_RO as read-only flag even for
-		 the directory, set this option.
-
-errors=panic|continue|remount-ro
-	      -- specify FAT behavior on critical errors: panic, continue
-		 without doing anything or remount the partition in
-		 read-only mode (default behavior).
-
-discard       -- If set, issues discard/TRIM commands to the block
-		 device when blocks are freed. This is useful for SSD devices
-		 and sparse/thinly-provisoned LUNs.
-
-nfs=stale_rw|nostale_ro
-		Enable this only if you want to export the FAT filesystem
-		over NFS.
-
-		stale_rw: This option maintains an index (cache) of directory
-		inodes by i_logstart which is used by the nfs-related code to
-		improve look-ups. Full file operations (read/write) over NFS is
-		supported but with cache eviction at NFS server, this could
-		result in ESTALE issues.
-
-		nostale_ro: This option bases the inode number and filehandle
-		on the on-disk location of a file in the MS-DOS directory entry.
-		This ensures that ESTALE will not be returned after a file is
-		evicted from the inode cache. However, it means that operations
-		such as rename, create and unlink could cause filehandles that
-		previously pointed at one file to point at a different file,
-		potentially causing data corruption. For this reason, this
-		option also mounts the filesystem readonly.
-
-		To maintain backward compatibility, '-o nfs' is also accepted,
-		defaulting to stale_rw
-
-dos1xfloppy  -- If set, use a fallback default BIOS Parameter Block
-		configuration, determined by backing device size. These static
-		parameters match defaults assumed by DOS 1.x for 160 kiB,
-		180 kiB, 320 kiB, and 360 kiB floppies and floppy images.
-
-
-<bool>: 0,1,yes,no,true,false
-
-LIMITATION
----------------------------------------------------------------------
-* The fallocated region of file is discarded at umount/evict time
-  when using fallocate with FALLOC_FL_KEEP_SIZE.
-  So, User should assume that fallocated region can be discarded at
-  last close if there is memory pressure resulting in eviction of
-  the inode from the memory. As a result, for any dependency on
-  the fallocated region, user should make sure to recheck fallocate
-  after reopening the file.
-
-TODO
-----------------------------------------------------------------------
-* Need to get rid of the raw scanning stuff.  Instead, always use
-  a get next directory entry approach.  The only thing left that uses
-  raw scanning is the directory renaming code.
-
-
-POSSIBLE PROBLEMS
-----------------------------------------------------------------------
-* vfat_valid_longname does not properly checked reserved names.
-* When a volume name is the same as a directory name in the root
-  directory of the filesystem, the directory name sometimes shows
-  up as an empty file.
-* autoconv option does not work correctly.
-
-BUG REPORTS
-----------------------------------------------------------------------
-If you have trouble with the VFAT filesystem, mail bug reports to
-chaffee@bmrc.cs.berkeley.edu.  Please specify the filename
-and the operation that gave you trouble.
-
-TEST SUITE
-----------------------------------------------------------------------
-If you plan to make any modifications to the vfat filesystem, please
-get the test suite that comes with the vfat distribution at
-
-  http://web.archive.org/web/*/http://bmrc.berkeley.edu/
-  people/chaffee/vfat.html
-
-This tests quite a few parts of the vfat filesystem and additional
-tests for new features or untested features would be appreciated.
-
-NOTES ON THE STRUCTURE OF THE VFAT FILESYSTEM
-----------------------------------------------------------------------
-(This documentation was provided by Galen C. Hunt <gchunt@cs.rochester.edu>
- and lightly annotated by Gordon Chaffee).
-
-This document presents a very rough, technical overview of my
-knowledge of the extended FAT file system used in Windows NT 3.5 and
-Windows 95.  I don't guarantee that any of the following is correct,
-but it appears to be so.
-
-The extended FAT file system is almost identical to the FAT
-file system used in DOS versions up to and including 6.223410239847
-:-).  The significant change has been the addition of long file names.
-These names support up to 255 characters including spaces and lower
-case characters as opposed to the traditional 8.3 short names.
-
-Here is the description of the traditional FAT entry in the current
-Windows 95 filesystem:
-
-        struct directory { // Short 8.3 names 
-                unsigned char name[8];          // file name 
-                unsigned char ext[3];           // file extension 
-                unsigned char attr;             // attribute byte 
-		unsigned char lcase;		// Case for base and extension
-		unsigned char ctime_ms;		// Creation time, milliseconds
-		unsigned char ctime[2];		// Creation time
-		unsigned char cdate[2];		// Creation date
-		unsigned char adate[2];		// Last access date
-		unsigned char reserved[2];	// reserved values (ignored) 
-                unsigned char time[2];          // time stamp 
-                unsigned char date[2];          // date stamp 
-                unsigned char start[2];         // starting cluster number 
-                unsigned char size[4];          // size of the file 
-        };
-
-The lcase field specifies if the base and/or the extension of an 8.3
-name should be capitalized.  This field does not seem to be used by
-Windows 95 but it is used by Windows NT.  The case of filenames is not
-completely compatible from Windows NT to Windows 95.  It is not completely
-compatible in the reverse direction, however.  Filenames that fit in
-the 8.3 namespace and are written on Windows NT to be lowercase will
-show up as uppercase on Windows 95.
-
-Note that the "start" and "size" values are actually little
-endian integer values.  The descriptions of the fields in this
-structure are public knowledge and can be found elsewhere.
-
-With the extended FAT system, Microsoft has inserted extra
-directory entries for any files with extended names.  (Any name which
-legally fits within the old 8.3 encoding scheme does not have extra
-entries.)  I call these extra entries slots.  Basically, a slot is a
-specially formatted directory entry which holds up to 13 characters of
-a file's extended name.  Think of slots as additional labeling for the
-directory entry of the file to which they correspond.  Microsoft
-prefers to refer to the 8.3 entry for a file as its alias and the
-extended slot directory entries as the file name. 
-
-The C structure for a slot directory entry follows:
-
-        struct slot { // Up to 13 characters of a long name 
-                unsigned char id;               // sequence number for slot 
-                unsigned char name0_4[10];      // first 5 characters in name 
-                unsigned char attr;             // attribute byte
-                unsigned char reserved;         // always 0 
-                unsigned char alias_checksum;   // checksum for 8.3 alias 
-                unsigned char name5_10[12];     // 6 more characters in name
-                unsigned char start[2];         // starting cluster number
-                unsigned char name11_12[4];     // last 2 characters in name
-        };
-
-If the layout of the slots looks a little odd, it's only
-because of Microsoft's efforts to maintain compatibility with old
-software.  The slots must be disguised to prevent old software from
-panicking.  To this end, a number of measures are taken:
-
-        1) The attribute byte for a slot directory entry is always set
-           to 0x0f.  This corresponds to an old directory entry with
-           attributes of "hidden", "system", "read-only", and "volume
-           label".  Most old software will ignore any directory
-           entries with the "volume label" bit set.  Real volume label
-           entries don't have the other three bits set.
-
-        2) The starting cluster is always set to 0, an impossible
-           value for a DOS file.
-
-Because the extended FAT system is backward compatible, it is
-possible for old software to modify directory entries.  Measures must
-be taken to ensure the validity of slots.  An extended FAT system can
-verify that a slot does in fact belong to an 8.3 directory entry by
-the following:
-
-        1) Positioning.  Slots for a file always immediately proceed
-           their corresponding 8.3 directory entry.  In addition, each
-           slot has an id which marks its order in the extended file
-           name.  Here is a very abbreviated view of an 8.3 directory
-           entry and its corresponding long name slots for the file
-           "My Big File.Extension which is long":
-
-                <proceeding files...>
-                <slot #3, id = 0x43, characters = "h is long">
-                <slot #2, id = 0x02, characters = "xtension whic">
-                <slot #1, id = 0x01, characters = "My Big File.E">
-                <directory entry, name = "MYBIGFIL.EXT">
-
-           Note that the slots are stored from last to first.  Slots
-           are numbered from 1 to N.  The Nth slot is or'ed with 0x40
-           to mark it as the last one.
-
-        2) Checksum.  Each slot has an "alias_checksum" value.  The
-           checksum is calculated from the 8.3 name using the
-           following algorithm:
-
-                for (sum = i = 0; i < 11; i++) {
-                        sum = (((sum&1)<<7)|((sum&0xfe)>>1)) + name[i]
-                }
-
-	3) If there is free space in the final slot, a Unicode NULL (0x0000) 
-	   is stored after the final character.  After that, all unused 
-	   characters in the final slot are set to Unicode 0xFFFF.
-
-Finally, note that the extended name is stored in Unicode.  Each Unicode
-character takes either two or four bytes, UTF-16LE encoded.
diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 0a72b6321f5f..c13fee8b02ba 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -71,8 +71,8 @@ DMA support
 DMA controllers enumerated via ACPI should be registered in the system to
 provide generic access to their resources. For example, a driver that would
 like to be accessible to slave devices via generic API call
-dma_request_slave_channel() must register itself at the end of the probe
-function like this::
+dma_request_chan() must register itself at the end of the probe function like
+this::
 
 	err = devm_acpi_dma_controller_register(dev, xlate_func, dw);
 	/* Handle the error if it's not a case of !CONFIG_ACPI */
@@ -112,15 +112,15 @@ could look like::
 	}
 	#endif
 
-dma_request_slave_channel() will call xlate_func() for each registered DMA
-controller. In the xlate function the proper channel must be chosen based on
+dma_request_chan() will call xlate_func() for each registered DMA controller.
+In the xlate function the proper channel must be chosen based on
 information in struct acpi_dma_spec and the properties of the controller
 provided by struct acpi_dma.
 
-Clients must call dma_request_slave_channel() with the string parameter that
-corresponds to a specific FixedDMA resource. By default "tx" means the first
-entry of the FixedDMA resource array, "rx" means the second entry. The table
-below shows a layout::
+Clients must call dma_request_chan() with the string parameter that corresponds
+to a specific FixedDMA resource. By default "tx" means the first entry of the
+FixedDMA resource array, "rx" means the second entry. The table below shows a
+layout::
 
 	Device (I2C0)
 	{
diff --git a/Documentation/firmware-guide/acpi/namespace.rst b/Documentation/firmware-guide/acpi/namespace.rst
index 835521baeb89..3eb763d6656d 100644
--- a/Documentation/firmware-guide/acpi/namespace.rst
+++ b/Documentation/firmware-guide/acpi/namespace.rst
@@ -261,7 +261,7 @@ Description Tables contain information used for the creation of the
 struct acpi_device objects represented by the given row (xSDT means DSDT
 or SSDT).
 
-The forth column of the above table indicates the 'bus_id' generation
+The fourth column of the above table indicates the 'bus_id' generation
 rule of the struct acpi_device object:
 
    _HID:
diff --git a/Documentation/fpga/dfl.rst b/Documentation/fpga/dfl.rst
index 6fa483fc823e..094fc8aacd8e 100644
--- a/Documentation/fpga/dfl.rst
+++ b/Documentation/fpga/dfl.rst
@@ -108,6 +108,16 @@ More functions are exposed through sysfs
      error reporting sysfs interfaces allow user to read errors detected by the
      hardware, and clear the logged errors.
 
+ Power management (dfl_fme_power hwmon)
+     power management hwmon sysfs interfaces allow user to read power management
+     information (power consumption, thresholds, threshold status, limits, etc.)
+     and configure power thresholds for different throttling levels.
+
+ Thermal management (dfl_fme_thermal hwmon)
+     thermal management hwmon sysfs interfaces allow user to read thermal
+     management information (current temperature, thresholds, threshold status,
+     etc.).
+
 
 FIU - PORT
 ==========
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index 5acdd1842ea2..0efede580039 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -79,16 +79,71 @@ AMDGPU XGMI Support
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
    :internal:
 
-AMDGPU RAS debugfs control interface
-====================================
+AMDGPU RAS Support
+==================
+
+The AMDGPU RAS interfaces are exposed via sysfs (for informational queries) and
+debugfs (for error injection).
+
+RAS debugfs/sysfs Control and Error Injection Interfaces
+--------------------------------------------------------
 
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
    :doc: AMDGPU RAS debugfs control interface
 
+RAS Reboot Behavior for Unrecoverable Errors
+--------------------------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+   :doc: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
+
+RAS Error Count sysfs Interface
+-------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+   :doc: AMDGPU RAS sysfs Error Count Interface
+
+RAS EEPROM debugfs Interface
+----------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+   :doc: AMDGPU RAS debugfs EEPROM table reset interface
+
+RAS VRAM Bad Pages sysfs Interface
+----------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+   :doc: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
 
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
    :internal:
 
+Sample Code
+-----------
+Sample code for testing error injection can be found here:
+https://cgit.freedesktop.org/mesa/drm/tree/tests/amdgpu/ras_tests.c
+
+This is part of the libdrm amdgpu unit tests which cover several areas of the GPU.
+There are four sets of tests:
+
+RAS Basic Test
+
+The test verifies the RAS feature enabled status and makes sure the necessary sysfs and debugfs files
+are present.
+
+RAS Query Test
+
+This test checks the RAS availability and enablement status for each supported IP block as well as
+the error counts.
+
+RAS Inject Test
+
+This test injects errors for each IP.
+
+RAS Disable Test
+
+This test tests disabling of RAS features for each IP block.
+
 
 GPU Power/Thermal Controls and Monitoring
 =========================================
@@ -130,11 +185,11 @@ pp_od_clk_voltage
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
    :doc: pp_od_clk_voltage
 
-pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+pp_dpm_*
+~~~~~~~~
 
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
-   :doc: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
+   :doc: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk pp_dpm_pcie
 
 pp_power_profile_mode
 ~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst
index 966bd2d9f0cc..a73320576ca9 100644
--- a/Documentation/gpu/drm-internals.rst
+++ b/Documentation/gpu/drm-internals.rst
@@ -24,9 +24,9 @@ Driver Initialization
 At the core of every DRM driver is a :c:type:`struct drm_driver
 <drm_driver>` structure. Drivers typically statically initialize
 a drm_driver structure, and then pass it to
-:c:func:`drm_dev_alloc()` to allocate a device instance. After the
+drm_dev_alloc() to allocate a device instance. After the
 device instance is fully initialized it can be registered (which makes
-it accessible from userspace) using :c:func:`drm_dev_register()`.
+it accessible from userspace) using drm_dev_register().
 
 The :c:type:`struct drm_driver <drm_driver>` structure
 contains static information that describes the driver and features it
diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst
index 3868008db8a9..9668a7fe2408 100644
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@@ -77,9 +77,6 @@ Atomic State Reset and Initialization
 Atomic State Helper Reference
 -----------------------------
 
-.. kernel-doc:: include/drm/drm_atomic_state_helper.h
-   :internal:
-
 .. kernel-doc:: drivers/gpu/drm/drm_atomic_state_helper.c
    :export:
 
diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst
index 23a3c986ef6d..906771e03103 100644
--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@@ -3,7 +3,7 @@ Kernel Mode Setting (KMS)
 =========================
 
 Drivers must initialize the mode setting core by calling
-:c:func:`drm_mode_config_init()` on the DRM device. The function
+drm_mode_config_init() on the DRM device. The function
 initializes the :c:type:`struct drm_device <drm_device>`
 mode_config field and never fails. Once done, mode configuration must
 be setup by initializing the following fields.
@@ -181,8 +181,7 @@ Setting`_). The somewhat surprising part here is that properties are not
 directly instantiated on each object, but free-standing mode objects themselves,
 represented by :c:type:`struct drm_property <drm_property>`, which only specify
 the type and value range of a property. Any given property can be attached
-multiple times to different objects using :c:func:`drm_object_attach_property()
-<drm_object_attach_property>`.
+multiple times to different objects using drm_object_attach_property().
 
 .. kernel-doc:: include/drm/drm_mode_object.h
    :internal:
@@ -260,7 +259,8 @@ Taken all together there's two consequences for the atomic design:
   drm_connector_state <drm_connector_state>` for connectors. These are the only
   objects with userspace-visible and settable state. For internal state drivers
   can subclass these structures through embeddeding, or add entirely new state
-  structures for their globally shared hardware functions.
+  structures for their globally shared hardware functions, see :c:type:`struct
+  drm_private_state<drm_private_state>`.
 
 - An atomic update is assembled and validated as an entirely free-standing pile
   of structures within the :c:type:`drm_atomic_state <drm_atomic_state>`
@@ -269,6 +269,14 @@ Taken all together there's two consequences for the atomic design:
   to the driver and modeset objects. This way rolling back an update boils down
   to releasing memory and unreferencing objects like framebuffers.
 
+Locking of atomic state structures is internally using :c:type:`struct
+drm_modeset_lock <drm_modeset_lock>`. As a general rule the locking shouldn't be
+exposed to drivers, instead the right locks should be automatically acquired by
+any function that duplicates or peeks into a state, like e.g.
+drm_atomic_get_crtc_state().  Locking only protects the software data
+structure, ordering of committing state changes to hardware is sequenced using
+:c:type:`struct drm_crtc_commit <drm_crtc_commit>`.
+
 Read on in this chapter, and also in :ref:`drm_atomic_helper` for more detailed
 coverage of specific topics.
 
@@ -479,6 +487,9 @@ Color Management Properties
 .. kernel-doc:: drivers/gpu/drm/drm_color_mgmt.c
    :export:
 
+.. kernel-doc:: include/drm/drm_color_mgmt.h
+   :internal:
+
 Tile Group Property
 -------------------
 
diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index b664f054c259..c77b32601260 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -149,19 +149,19 @@ struct :c:type:`struct drm_gem_object <drm_gem_object>`.
 To create a GEM object, a driver allocates memory for an instance of its
 specific GEM object type and initializes the embedded struct
 :c:type:`struct drm_gem_object <drm_gem_object>` with a call
-to :c:func:`drm_gem_object_init()`. The function takes a pointer
+to drm_gem_object_init(). The function takes a pointer
 to the DRM device, a pointer to the GEM object and the buffer object
 size in bytes.
 
 GEM uses shmem to allocate anonymous pageable memory.
-:c:func:`drm_gem_object_init()` will create an shmfs file of the
+drm_gem_object_init() will create an shmfs file of the
 requested size and store it into the struct :c:type:`struct
 drm_gem_object <drm_gem_object>` filp field. The memory is
 used as either main storage for the object when the graphics hardware
 uses system memory directly or as a backing store otherwise.
 
 Drivers are responsible for the actual physical pages allocation by
-calling :c:func:`shmem_read_mapping_page_gfp()` for each page.
+calling shmem_read_mapping_page_gfp() for each page.
 Note that they can decide to allocate pages when initializing the GEM
 object, or to delay allocation until the memory is needed (for instance
 when a page fault occurs as a result of a userspace memory access or
@@ -170,20 +170,18 @@ when the driver needs to start a DMA transfer involving the memory).
 Anonymous pageable memory allocation is not always desired, for instance
 when the hardware requires physically contiguous system memory as is
 often the case in embedded devices. Drivers can create GEM objects with
-no shmfs backing (called private GEM objects) by initializing them with
-a call to :c:func:`drm_gem_private_object_init()` instead of
-:c:func:`drm_gem_object_init()`. Storage for private GEM objects
-must be managed by drivers.
+no shmfs backing (called private GEM objects) by initializing them with a call
+to drm_gem_private_object_init() instead of drm_gem_object_init(). Storage for
+private GEM objects must be managed by drivers.
 
 GEM Objects Lifetime
 --------------------
 
 All GEM objects are reference-counted by the GEM core. References can be
-acquired and release by :c:func:`calling drm_gem_object_get()` and
-:c:func:`drm_gem_object_put()` respectively. The caller must hold the
-:c:type:`struct drm_device <drm_device>` struct_mutex lock when calling
-:c:func:`drm_gem_object_get()`. As a convenience, GEM provides
-:c:func:`drm_gem_object_put_unlocked()` functions that can be called without
+acquired and release by calling drm_gem_object_get() and drm_gem_object_put()
+respectively. The caller must hold the :c:type:`struct drm_device <drm_device>`
+struct_mutex lock when calling drm_gem_object_get(). As a convenience, GEM
+provides drm_gem_object_put_unlocked() functions that can be called without
 holding the lock.
 
 When the last reference to a GEM object is released the GEM core calls
@@ -194,7 +192,7 @@ free the GEM object and all associated resources.
 void (\*gem_free_object) (struct drm_gem_object \*obj); Drivers are
 responsible for freeing all GEM object resources. This includes the
 resources created by the GEM core, which need to be released with
-:c:func:`drm_gem_object_release()`.
+drm_gem_object_release().
 
 GEM Objects Naming
 ------------------
@@ -210,13 +208,11 @@ to the GEM object in other standard or driver-specific ioctls. Closing a
 DRM file handle frees all its GEM handles and dereferences the
 associated GEM objects.
 
-To create a handle for a GEM object drivers call
-:c:func:`drm_gem_handle_create()`. The function takes a pointer
-to the DRM file and the GEM object and returns a locally unique handle.
-When the handle is no longer needed drivers delete it with a call to
-:c:func:`drm_gem_handle_delete()`. Finally the GEM object
-associated with a handle can be retrieved by a call to
-:c:func:`drm_gem_object_lookup()`.
+To create a handle for a GEM object drivers call drm_gem_handle_create(). The
+function takes a pointer to the DRM file and the GEM object and returns a
+locally unique handle.  When the handle is no longer needed drivers delete it
+with a call to drm_gem_handle_delete(). Finally the GEM object associated with a
+handle can be retrieved by a call to drm_gem_object_lookup().
 
 Handles don't take ownership of GEM objects, they only take a reference
 to the object that will be dropped when the handle is destroyed. To
@@ -258,7 +254,7 @@ The mmap system call can't be used directly to map GEM objects, as they
 don't have their own file handle. Two alternative methods currently
 co-exist to map GEM objects to userspace. The first method uses a
 driver-specific ioctl to perform the mapping operation, calling
-:c:func:`do_mmap()` under the hood. This is often considered
+do_mmap() under the hood. This is often considered
 dubious, seems to be discouraged for new GEM-enabled drivers, and will
 thus not be described here.
 
@@ -267,23 +263,22 @@ The second method uses the mmap system call on the DRM file handle. void
 offset); DRM identifies the GEM object to be mapped by a fake offset
 passed through the mmap offset argument. Prior to being mapped, a GEM
 object must thus be associated with a fake offset. To do so, drivers
-must call :c:func:`drm_gem_create_mmap_offset()` on the object.
+must call drm_gem_create_mmap_offset() on the object.
 
 Once allocated, the fake offset value must be passed to the application
 in a driver-specific way and can then be used as the mmap offset
 argument.
 
-The GEM core provides a helper method :c:func:`drm_gem_mmap()` to
+The GEM core provides a helper method drm_gem_mmap() to
 handle object mapping. The method can be set directly as the mmap file
 operation handler. It will look up the GEM object based on the offset
 value and set the VMA operations to the :c:type:`struct drm_driver
-<drm_driver>` gem_vm_ops field. Note that
-:c:func:`drm_gem_mmap()` doesn't map memory to userspace, but
-relies on the driver-provided fault handler to map pages individually.
+<drm_driver>` gem_vm_ops field. Note that drm_gem_mmap() doesn't map memory to
+userspace, but relies on the driver-provided fault handler to map pages
+individually.
 
-To use :c:func:`drm_gem_mmap()`, drivers must fill the struct
-:c:type:`struct drm_driver <drm_driver>` gem_vm_ops field
-with a pointer to VM operations.
+To use drm_gem_mmap(), drivers must fill the struct :c:type:`struct drm_driver
+<drm_driver>` gem_vm_ops field with a pointer to VM operations.
 
 The VM operations is a :c:type:`struct vm_operations_struct <vm_operations_struct>`
 made up of several fields, the more interesting ones being:
@@ -298,9 +293,8 @@ made up of several fields, the more interesting ones being:
 
 
 The open and close operations must update the GEM object reference
-count. Drivers can use the :c:func:`drm_gem_vm_open()` and
-:c:func:`drm_gem_vm_close()` helper functions directly as open
-and close handlers.
+count. Drivers can use the drm_gem_vm_open() and drm_gem_vm_close() helper
+functions directly as open and close handlers.
 
 The fault operation handler is responsible for mapping individual pages
 to userspace when a page fault occurs. Depending on the memory
@@ -312,12 +306,12 @@ Drivers that want to map the GEM object upfront instead of handling page
 faults can implement their own mmap file operation handler.
 
 For platforms without MMU the GEM core provides a helper method
-:c:func:`drm_gem_cma_get_unmapped_area`. The mmap() routines will call
-this to get a proposed address for the mapping.
+drm_gem_cma_get_unmapped_area(). The mmap() routines will call this to get a
+proposed address for the mapping.
 
-To use :c:func:`drm_gem_cma_get_unmapped_area`, drivers must fill the
-struct :c:type:`struct file_operations <file_operations>` get_unmapped_area
-field with a pointer on :c:func:`drm_gem_cma_get_unmapped_area`.
+To use drm_gem_cma_get_unmapped_area(), drivers must fill the struct
+:c:type:`struct file_operations <file_operations>` get_unmapped_area field with
+a pointer on drm_gem_cma_get_unmapped_area().
 
 More detailed information about get_unmapped_area can be found in
 Documentation/nommu-mmap.txt
@@ -400,16 +394,13 @@ GEM VRAM Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_gem_vram_helper.c
    :export:
 
-VRAM MM Helper Functions Reference
-----------------------------------
+GEM TTM Helper Functions Reference
+-----------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/drm_vram_mm_helper.c
+.. kernel-doc:: drivers/gpu/drm/drm_gem_ttm_helper.c
    :doc: overview
 
-.. kernel-doc:: include/drm/drm_vram_mm_helper.h
-   :internal:
-
-.. kernel-doc:: drivers/gpu/drm/drm_vram_mm_helper.c
+.. kernel-doc:: drivers/gpu/drm/drm_gem_ttm_helper.c
    :export:
 
 VMA Offset Manager
diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst
index 94f90521f58c..56fec6ed1ad8 100644
--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@@ -254,36 +254,45 @@ Validating changes with IGT
 There's a collection of tests that aims to cover the whole functionality of
 DRM drivers and that can be used to check that changes to DRM drivers or the
 core don't regress existing functionality. This test suite is called IGT and
-its code can be found in https://cgit.freedesktop.org/drm/igt-gpu-tools/.
+its code and instructions to build and run can be found in
+https://gitlab.freedesktop.org/drm/igt-gpu-tools/.
 
-To build IGT, start by installing its build dependencies. In Debian-based
-systems::
+Using VKMS to test DRM API
+--------------------------
 
-	# apt-get build-dep intel-gpu-tools
+VKMS is a software-only model of a KMS driver that is useful for testing
+and for running compositors. VKMS aims to enable a virtual display without
+the need for a hardware display capability. These characteristics made VKMS
+a perfect tool for validating the DRM core behavior and also support the
+compositor developer. VKMS makes it possible to test DRM functions in a
+virtual machine without display, simplifying the validation of some of the
+core changes.
 
-And in Fedora-based systems::
+To Validate changes in DRM API with VKMS, start setting the kernel: make
+sure to enable VKMS module; compile the kernel with the VKMS enabled and
+install it in the target machine. VKMS can be run in a Virtual Machine
+(QEMU, virtme or similar). It's recommended the use of KVM with the minimum
+of 1GB of RAM and four cores.
 
-	# dnf builddep intel-gpu-tools
+It's possible to run the IGT-tests in a VM in two ways:
 
-Then clone the repository::
+	1. Use IGT inside a VM
+	2. Use IGT from the host machine and write the results in a shared directory.
 
-	$ git clone git://anongit.freedesktop.org/drm/igt-gpu-tools
+As follow, there is an example of using a VM with a shared directory with
+the host machine to run igt-tests. As an example it's used virtme::
 
-Configure the build system and start the build::
+	$ virtme-run --rwdir /path/for/shared_dir --kdir=path/for/kernel/directory --mods=auto
 
-	$ cd igt-gpu-tools && ./autogen.sh && make -j6
+Run the igt-tests in the guest machine, as example it's ran the 'kms_flip'
+tests::
 
-Download the piglit dependency::
+	$ /path/for/igt-gpu-tools/scripts/run-tests.sh -p -s -t "kms_flip.*" -v
 
-	$ ./scripts/run-tests.sh -d
-
-And run the tests::
-
-	$ ./scripts/run-tests.sh -t kms -t core -s
-
-run-tests.sh is a wrapper around piglit that will execute the tests matching
-the -t options. A report in HTML format will be available in
-./results/html/index.html. Results can be compared with piglit.
+In this example, instead of build the igt_runner, Piglit is used
+(-p option); it's created html summary of the tests results and it's saved
+in the folder "igt-gpu-tools/results"; it's executed only the igt-tests
+matching the -t option.
 
 Display CRC Support
 -------------------
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 3415255ad3dc..e539c42a3e78 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -246,6 +246,15 @@ Display PLLs
 .. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpll_mgr.h
    :internal:
 
+Display State Buffer
+--------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dsb.c
+   :doc: DSB
+
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dsb.c
+   :internal:
+
 Memory Management and Command Submission
 ========================================
 
@@ -358,15 +367,6 @@ Batchbuffer Parsing
 .. kernel-doc:: drivers/gpu/drm/i915/i915_cmd_parser.c
    :internal:
 
-Batchbuffer Pools
------------------
-
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_batch_pool.c
-   :doc: batch pool
-
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_batch_pool.c
-   :internal:
-
 User Batchbuffer Execution
 --------------------------
 
@@ -415,44 +415,77 @@ Object Tiling IOCTLs
 .. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_tiling.c
    :doc: buffer object tiling
 
+Microcontrollers
+================
+
+Starting from gen9, three microcontrollers are available on the HW: the
+graphics microcontroller (GuC), the HEVC/H.265 microcontroller (HuC) and the
+display microcontroller (DMC). The driver is responsible for loading the
+firmwares on the microcontrollers; the GuC and HuC firmwares are transferred
+to WOPCM using the DMA engine, while the DMC firmware is written through MMIO.
+
 WOPCM
-=====
+-----
 
 WOPCM Layout
-------------
+~~~~~~~~~~~~
 
 .. kernel-doc:: drivers/gpu/drm/i915/intel_wopcm.c
    :doc: WOPCM Layout
 
 GuC
-===
+---
 
-Firmware Layout
--------------------
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c
+   :doc: GuC
+
+GuC Firmware Layout
+~~~~~~~~~~~~~~~~~~~
 
 .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
    :doc: Firmware Layout
 
+GuC Memory Management
+~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c
+   :doc: GuC Memory Management
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c
+   :functions: intel_guc_allocate_vma
+
+
 GuC-specific firmware loader
-----------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
    :internal:
 
 GuC-based command submission
-----------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
    :doc: GuC-based command submission
 
-.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
-   :internal:
+HuC
+---
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c
+   :doc: HuC
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c
+   :functions: intel_huc_auth
 
-GuC Address Space
------------------
+HuC Memory Management
+~~~~~~~~~~~~~~~~~~~~~
 
-.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c
-   :doc: GuC Address Space
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c
+   :doc: HuC Memory Management
+
+HuC Firmware Layout
+~~~~~~~~~~~~~~~~~~~
+The HuC FW layout is the same as the GuC one, see `GuC Firmware Layout`_
+
+DMC
+---
+See `CSR firmware support for DMC`_
 
 Tracing
 =======
@@ -514,9 +547,9 @@ i915 Perf Stream
 This section covers the stream-semantics-agnostic structures and functions
 for representing an i915 perf stream FD and associated file operations.
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_drv.h
+.. kernel-doc:: drivers/gpu/drm/i915/i915_perf_types.h
    :functions: i915_perf_stream
-.. kernel-doc:: drivers/gpu/drm/i915/i915_drv.h
+.. kernel-doc:: drivers/gpu/drm/i915/i915_perf_types.h
    :functions: i915_perf_stream_ops
 
 .. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
@@ -541,7 +574,7 @@ for representing an i915 perf stream FD and associated file operations.
 i915 Perf Observation Architecture Stream
 -----------------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_drv.h
+.. kernel-doc:: drivers/gpu/drm/i915/i915_perf_types.h
    :functions: i915_oa_ops
 
 .. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
diff --git a/Documentation/gpu/mcde.rst b/Documentation/gpu/mcde.rst
index c69e977defda..dd43dde379e0 100644
--- a/Documentation/gpu/mcde.rst
+++ b/Documentation/gpu/mcde.rst
@@ -5,4 +5,4 @@
 =======================================================
 
 .. kernel-doc:: drivers/gpu/drm/mcde/mcde_drv.c
-   :doc: ST-Ericsson MCDE DRM Driver
+   :doc: ST-Ericsson MCDE Driver
diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index 32787acff0a8..bc869b23fc39 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -7,6 +7,22 @@ TODO list
 This section contains a list of smaller janitorial tasks in the kernel DRM
 graphics subsystem useful as newbie projects. Or for slow rainy days.
 
+Difficulty
+----------
+
+To make it easier task are categorized into different levels:
+
+Starter: Good tasks to get started with the DRM subsystem.
+
+Intermediate: Tasks which need some experience with working in the DRM
+subsystem, or some specific GPU/display graphics knowledge. For debugging issue
+it's good to have the relevant hardware (or a virtual driver set up) available
+for testing.
+
+Advanced: Tricky tasks that need fairly good understanding of the DRM subsystem
+and graphics topics. Generally need the relevant hardware for development and
+testing.
+
 Subsystem-wide refactorings
 ===========================
 
@@ -20,6 +36,8 @@ implementations), and then remove it.
 
 Contact: Daniel Vetter, respective driver maintainers
 
+Level: Intermediate
+
 Convert existing KMS drivers to atomic modesetting
 --------------------------------------------------
 
@@ -38,6 +56,8 @@ do by directly using the new atomic helper driver callbacks.
 
 Contact: Daniel Vetter, respective driver maintainers
 
+Level: Advanced
+
 Clean up the clipped coordination confusion around planes
 ---------------------------------------------------------
 
@@ -50,6 +70,8 @@ helpers.
 
 Contact: Ville Syrjälä, Daniel Vetter, driver maintainers
 
+Level: Advanced
+
 Convert early atomic drivers to async commit helpers
 ----------------------------------------------------
 
@@ -63,6 +85,8 @@ events for atomic commits correctly. But fixing these bugs is good anyway.
 
 Contact: Daniel Vetter, respective driver maintainers
 
+Level: Advanced
+
 Fallout from atomic KMS
 -----------------------
 
@@ -91,6 +115,8 @@ interfaces to fix these issues:
 
 Contact: Daniel Vetter
 
+Level: Intermediate
+
 Get rid of dev->struct_mutex from GEM drivers
 ---------------------------------------------
 
@@ -114,14 +140,16 @@ fine-grained per-buffer object and per-context lockings scheme. Currently only t
 
 Contact: Daniel Vetter, respective driver maintainers
 
-Convert instances of dev_info/dev_err/dev_warn to their DRM_DEV_* equivalent
-----------------------------------------------------------------------------
+Level: Advanced
+
+Convert logging to drm_* functions with drm_device paramater
+------------------------------------------------------------
 
 For drivers which could have multiple instances, it is necessary to
 differentiate between which is which in the logs. Since DRM_INFO/WARN/ERROR
 don't do this, drivers used dev_info/warn/err to make this differentiation. We
-now have DRM_DEV_* variants of the drm print macros, so we can start to convert
-those drivers back to using drm-formwatted specific log messages.
+now have drm_* variants of the drm print functions, so we can start to convert
+those drivers back to using drm-formatted specific log messages.
 
 Before you start this conversion please contact the relevant maintainers to make
 sure your work will be merged - not everyone agrees that the DRM dmesg macros
@@ -129,6 +157,8 @@ are better.
 
 Contact: Sean Paul, Maintainer of the driver you plan to convert
 
+Level: Starter
+
 Convert drivers to use simple modeset suspend/resume
 ----------------------------------------------------
 
@@ -139,23 +169,44 @@ of the atomic suspend/resume code in older atomic modeset drivers.
 
 Contact: Maintainer of the driver you plan to convert
 
-Convert drivers to use drm_fb_helper_fbdev_setup/teardown()
------------------------------------------------------------
+Level: Intermediate
 
-Most drivers can use drm_fb_helper_fbdev_setup() except maybe:
+Convert drivers to use drm_fbdev_generic_setup()
+------------------------------------------------
 
-- amdgpu which has special logic to decide whether to call
-  drm_helper_disable_unused_functions()
+Most drivers can use drm_fbdev_generic_setup(). Driver have to implement
+atomic modesetting and GEM vmap support. Current generic fbdev emulation
+expects the framebuffer in system memory (or system-like memory).
 
-- armada which isn't atomic and doesn't call
-  drm_helper_disable_unused_functions()
+Contact: Maintainer of the driver you plan to convert
 
-- i915 which calls drm_fb_helper_initial_config() in a worker
+Level: Intermediate
 
-Drivers that use drm_framebuffer_remove() to clean up the fbdev framebuffer can
-probably use drm_fb_helper_fbdev_teardown().
+drm_framebuffer_funcs and drm_mode_config_funcs.fb_create cleanup
+-----------------------------------------------------------------
 
-Contact: Maintainer of the driver you plan to convert
+A lot more drivers could be switched over to the drm_gem_framebuffer helpers.
+Various hold-ups:
+
+- Need to switch over to the generic dirty tracking code using
+  drm_atomic_helper_dirtyfb first (e.g. qxl).
+
+- Need to switch to drm_fbdev_generic_setup(), otherwise a lot of the custom fb
+  setup code can't be deleted.
+
+- Many drivers wrap drm_gem_fb_create() only to check for valid formats. For
+  atomic drivers we could check for valid formats by calling
+  drm_plane_check_pixel_format() against all planes, and pass if any plane
+  supports the format. For non-atomic that's not possible since like the format
+  list for the primary plane is fake and we'd therefor reject valid formats.
+
+- Many drivers subclass drm_framebuffer, we'd need a embedding compatible
+  version of the varios drm_gem_fb_create functions. Maybe called
+  drm_gem_fb_create/_with_dirty/_with_funcs as needed.
+
+Contact: Daniel Vetter
+
+Level: Intermediate
 
 Clean up mmap forwarding
 ------------------------
@@ -166,14 +217,16 @@ There's drm_gem_prime_mmap() for this now, but still needs to be rolled out.
 
 Contact: Daniel Vetter
 
+Level: Intermediate
+
 Generic fbdev defio support
 ---------------------------
 
 The defio support code in the fbdev core has some very specific requirements,
-which means drivers need to have a special framebuffer for fbdev. Which prevents
-us from using the generic fbdev emulation code everywhere. The main issue is
-that it uses some fields in struct page itself, which breaks shmem gem objects
-(and other things).
+which means drivers need to have a special framebuffer for fbdev. The main
+issue is that it uses some fields in struct page itself, which breaks shmem
+gem objects (and other things). To support defio, affected drivers require
+the use of a shadow buffer, which may add CPU and memory overhead.
 
 Possible solution would be to write our own defio mmap code in the drm fbdev
 emulation. It would need to fully wrap the existing mmap ops, forwarding
@@ -196,6 +249,8 @@ Might be good to also have some igt testcases for this.
 
 Contact: Daniel Vetter, Noralf Tronnes
 
+Level: Advanced
+
 idr_init_base()
 ---------------
 
@@ -206,6 +261,8 @@ efficient.
 
 Contact: Daniel Vetter
 
+Level: Starter
+
 struct drm_gem_object_funcs
 ---------------------------
 
@@ -216,6 +273,8 @@ We also need a 2nd version of the CMA define that doesn't require the
 vmapping to be present (different hook for prime importing). Plus this needs to
 be rolled out to all drivers using their own implementations, too.
 
+Level: Intermediate
+
 Use DRM_MODESET_LOCK_ALL_* helpers instead of boilerplate
 ---------------------------------------------------------
 
@@ -231,6 +290,8 @@ As a reference, take a look at the conversions already completed in drm core.
 
 Contact: Sean Paul, respective driver maintainers
 
+Level: Starter
+
 Rename CMA helpers to DMA helpers
 ---------------------------------
 
@@ -241,6 +302,9 @@ no one knows what that means) since underneath they just use dma_alloc_coherent.
 
 Contact: Laurent Pinchart, Daniel Vetter
 
+Level: Intermediate (mostly because it is a huge tasks without good partial
+milestones, not technically itself that challenging)
+
 Convert direct mode.vrefresh accesses to use drm_mode_vrefresh()
 ----------------------------------------------------------------
 
@@ -259,6 +323,8 @@ drm_display_mode to avoid future use.
 
 Contact: Sean Paul
 
+Level: Starter
+
 Remove drm_display_mode.hsync
 -----------------------------
 
@@ -269,6 +335,8 @@ it to use drm_mode_hsync() instead.
 
 Contact: Sean Paul
 
+Level: Starter
+
 drm_fb_helper tasks
 -------------------
 
@@ -277,28 +345,49 @@ drm_fb_helper tasks
   these igt tests need to be fixed: kms_fbcon_fbt@psr and
   kms_fbcon_fbt@psr-suspend.
 
-- The max connector argument for drm_fb_helper_init() and
-  drm_fb_helper_fbdev_setup() isn't used anymore and can be removed.
+- The max connector argument for drm_fb_helper_init() isn't used anymore and
+  can be removed.
 
 - The helper doesn't keep an array of connectors anymore so these can be
   removed: drm_fb_helper_single_add_all_connectors(),
   drm_fb_helper_add_one_connector() and drm_fb_helper_remove_one_connector().
 
-Core refactorings
-=================
+Level: Intermediate
 
-Clean up the DRM header mess
-----------------------------
+connector register/unregister fixes
+-----------------------------------
+
+- For most connectors it's a no-op to call drm_connector_register/unregister
+  directly from driver code, drm_dev_register/unregister take care of this
+  already. We can remove all of them.
+
+- For dp drivers it's a bit more a mess, since we need the connector to be
+  registered when calling drm_dp_aux_register. Fix this by instead calling
+  drm_dp_aux_init, and moving the actual registering into a late_register
+  callback as recommended in the kerneldoc.
 
-The DRM subsystem originally had only one huge global header, ``drmP.h``. This
-is now split up, but many source files still include it. The remaining part of
-the cleanup work here is to replace any ``#include <drm/drmP.h>`` by only the
-headers needed (and fixing up any missing pre-declarations in the headers).
+Level: Intermediate
 
-In the end no .c file should need to include ``drmP.h`` anymore.
+Remove load/unload callbacks from all non-DRIVER_LEGACY drivers
+---------------------------------------------------------------
+
+The load/unload callbacks in struct &drm_driver are very much midlayers, plus
+for historical reasons they get the ordering wrong (and we can't fix that)
+between setting up the &drm_driver structure and calling drm_dev_register().
+
+- Rework drivers to no longer use the load/unload callbacks, directly coding the
+  load/unload sequence into the driver's probe function.
+
+- Once all non-DRIVER_LEGACY drivers are converted, disallow the load/unload
+  callbacks for all modern drivers.
 
 Contact: Daniel Vetter
 
+Level: Intermediate
+
+Core refactorings
+=================
+
 Make panic handling work
 ------------------------
 
@@ -338,6 +427,8 @@ This is a really varied tasks with lots of little bits and pieces:
 
 Contact: Daniel Vetter
 
+Level: Advanced
+
 Clean up the debugfs support
 ----------------------------
 
@@ -367,6 +458,8 @@ There's a bunch of issues with it:
 
 Contact: Daniel Vetter
 
+Level: Intermediate
+
 KMS cleanups
 ------------
 
@@ -382,6 +475,8 @@ Some of these date from the very introduction of KMS in 2008 ...
   end, for which we could add drm_*_cleanup_kfree(). And then there's the (for
   historical reasons) misnamed drm_primary_helper_destroy() function.
 
+Level: Intermediate
+
 Better Testing
 ==============
 
@@ -390,6 +485,8 @@ Enable trinity for DRM
 
 And fix up the fallout. Should be really interesting ...
 
+Level: Advanced
+
 Make KMS tests in i-g-t generic
 -------------------------------
 
@@ -403,6 +500,8 @@ converting things over. For modeset tests we also first need a bit of
 infrastructure to use dumb buffers for untiled buffers, to be able to run all
 the non-i915 specific modeset tests.
 
+Level: Advanced
+
 Extend virtual test driver (VKMS)
 ---------------------------------
 
@@ -412,6 +511,8 @@ fit the available time.
 
 Contact: Daniel Vetter
 
+Level: See details
+
 Backlight Refactoring
 ---------------------
 
@@ -425,6 +526,8 @@ Plan to fix this:
 
 Contact: Daniel Vetter
 
+Level: Intermediate
+
 Driver Specific
 ===============
 
@@ -438,13 +541,6 @@ See drivers/gpu/drm/amd/display/TODO for tasks.
 
 Contact: Harry Wentland, Alex Deucher
 
-i915
-----
-
-- Our early/late pm callbacks could be removed in favour of using
-  device_link_add to model the dependency between i915 and snd_had. See
-  https://dri.freedesktop.org/docs/drm/driver-api/device_link.html
-
 Bootsplash
 ==========
 
@@ -460,5 +556,36 @@ for fbdev.
 
 Contact: Sam Ravnborg
 
+Level: Advanced
+
 Outside DRM
 ===========
+
+Convert fbdev drivers to DRM
+----------------------------
+
+There are plenty of fbdev drivers for older hardware. Some hwardware has
+become obsolete, but some still provides good(-enough) framebuffers. The
+drivers that are still useful should be converted to DRM and afterwards
+removed from fbdev.
+
+Very simple fbdev drivers can best be converted by starting with a new
+DRM driver. Simple KMS helpers and SHMEM should be able to handle any
+existing hardware. The new driver's call-back functions are filled from
+existing fbdev code.
+
+More complex fbdev drivers can be refactored step-by-step into a DRM
+driver with the help of the DRM fbconv helpers. [1] These helpers provide
+the transition layer between the DRM core infrastructure and the fbdev
+driver interface. Create a new DRM driver on top of the fbconv helpers,
+copy over the fbdev driver, and hook it up to the DRM code. Examples for
+several fbdev drivers are available at [1] and a tutorial of this process
+available at [2]. The result is a primitive DRM driver that can run X11
+and Weston.
+
+ - [1] https://gitlab.freedesktop.org/tzimmermann/linux/tree/fbconv
+ - [2] https://gitlab.freedesktop.org/tzimmermann/linux/blob/fbconv/drivers/gpu/drm/drm_fbconv_helper.c
+
+Contact: Thomas Zimmermann <tzimmermann@suse.de>
+
+Level: Advanced
diff --git a/Documentation/hwmon/adm1177.rst b/Documentation/hwmon/adm1177.rst
new file mode 100644
index 000000000000..c81e0b4abd28
--- /dev/null
+++ b/Documentation/hwmon/adm1177.rst
@@ -0,0 +1,36 @@
+Kernel driver adm1177
+=====================
+
+Supported chips:
+  * Analog Devices ADM1177
+    Prefix: 'adm1177'
+    Datasheet: https://www.analog.com/media/en/technical-documentation/data-sheets/ADM1177.pdf
+
+Author: Beniamin Bia <beniamin.bia@analog.com>
+
+
+Description
+-----------
+
+This driver supports hardware monitoring for Analog Devices ADM1177
+Hot-Swap Controller and Digital Power Monitors with Soft Start Pin.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Current maxim attribute
+is read-write, all other attributes are read-only.
+
+in0_input		Measured voltage in microvolts.
+
+curr1_input		Measured current in microamperes.
+curr1_max_alarm		Overcurrent alarm in microamperes.
diff --git a/Documentation/hwmon/bel-pfe.rst b/Documentation/hwmon/bel-pfe.rst
new file mode 100644
index 000000000000..4b4a7d67854c
--- /dev/null
+++ b/Documentation/hwmon/bel-pfe.rst
@@ -0,0 +1,112 @@
+Kernel driver bel-pfe
+======================
+
+Supported chips:
+
+  * BEL PFE1100
+
+    Prefixes: 'pfe1100'
+
+    Addresses scanned: -
+
+    Datasheet: https://www.belfuse.com/resources/datasheets/powersolutions/ds-bps-pfe1100-12-054xa.pdf
+
+  * BEL PFE3000
+
+    Prefixes: 'pfe3000'
+
+    Addresses scanned: -
+
+    Datasheet: https://www.belfuse.com/resources/datasheets/powersolutions/ds-bps-pfe3000-series.pdf
+
+Author: Tao Ren <rentao.bupt@gmail.com>
+
+
+Description
+-----------
+
+This driver supports hardware monitoring for below power supply devices
+which support PMBus Protocol:
+
+  * BEL PFE1100
+
+    1100 Watt AC to DC power-factor-corrected (PFC) power supply.
+    PMBus Communication Manual is not publicly available.
+
+  * BEL PFE3000
+
+    3000 Watt AC/DC power-factor-corrected (PFC) and DC-DC power supply.
+    PMBus Communication Manual is not publicly available.
+
+The driver is a client driver to the core PMBus driver. Please see
+Documentation/hwmon/pmbus.rst for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
+details.
+
+Example: the following will load the driver for an PFE3000 at address 0x20
+on I2C bus #1::
+
+	$ modprobe bel-pfe
+	$ echo pfe3000 0x20 > /sys/bus/i2c/devices/i2c-1/new_device
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Sysfs entries
+-------------
+
+======================= =======================================================
+curr1_label		"iin"
+curr1_input		Measured input current
+curr1_max               Input current max value
+curr1_max_alarm         Input current max alarm
+
+curr[2-3]_label		"iout[1-2]"
+curr[2-3]_input		Measured output current
+curr[2-3]_max           Output current max value
+curr[2-3]_max_alarm     Output current max alarm
+
+fan[1-2]_input          Fan 1 and 2 speed in RPM
+fan1_target             Set fan speed reference for both fans
+
+in1_label		"vin"
+in1_input		Measured input voltage
+in1_crit		Input voltage critical max value
+in1_crit_alarm		Input voltage critical max alarm
+in1_lcrit               Input voltage critical min value
+in1_lcrit_alarm         Input voltage critical min alarm
+in1_max                 Input voltage max value
+in1_max_alarm           Input voltage max alarm
+
+in2_label               "vcap"
+in2_input               Hold up capacitor voltage
+
+in[3-8]_label		"vout[1-3,5-7]"
+in[3-8]_input		Measured output voltage
+in[3-4]_alarm           vout[1-2] output voltage alarm
+
+power[1-2]_label	"pin[1-2]"
+power[1-2]_input        Measured input power
+power[1-2]_alarm	Input power high alarm
+
+power[3-4]_label	"pout[1-2]"
+power[3-4]_input	Measured output power
+
+temp[1-3]_input		Measured temperature
+temp[1-3]_alarm         Temperature alarm
+======================= =======================================================
+
+.. note::
+
+    - curr3, fan2, vout[2-7], vcap, pin2, pout2 and temp3 attributes only
+      exist for PFE3000.
diff --git a/Documentation/hwmon/dell-smm-hwmon.rst b/Documentation/hwmon/dell-smm-hwmon.rst
new file mode 100644
index 000000000000..3bf77a5df995
--- /dev/null
+++ b/Documentation/hwmon/dell-smm-hwmon.rst
@@ -0,0 +1,164 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+.. include:: <isonum.txt>
+
+Kernel driver dell-smm-hwmon
+============================
+
+:Copyright: |copy| 2002-2005 Massimo Dal Zotto <dz@debian.org>
+:Copyright: |copy| 2019 Giovanni Mascellani <gio@debian.org>
+
+Description
+-----------
+
+On many Dell laptops the System Management Mode (SMM) BIOS can be
+queried for the status of fans and temperature sensors.  Userspace
+utilities like ``sensors`` can be used to return the readings. The
+userspace suite `i8kutils`__ can also be used to read the sensors and
+automatically adjust fan speed (please notice that it currently uses
+the deprecated ``/proc/i8k`` interface).
+
+ __ https://github.com/vitorafsr/i8kutils
+
+``sysfs`` interface
+-------------------
+
+Temperature sensors and fans can be queried and set via the standard
+``hwmon`` interface on ``sysfs``, under the directory
+``/sys/class/hwmon/hwmonX`` for some value of ``X`` (search for the
+``X`` such that ``/sys/class/hwmon/hwmonX/name`` has content
+``dell_smm``). A number of other attributes can be read or written:
+
+=============================== ======= =======================================
+Name				Perm	Description
+=============================== ======= =======================================
+fan[1-3]_input                  RO      Fan speed in RPM.
+fan[1-3]_label                  RO      Fan label.
+pwm[1-3]                        RW      Control the fan PWM duty-cycle.
+pwm1_enable                     WO      Enable or disable automatic BIOS fan
+                                        control (not supported on all laptops,
+                                        see below for details).
+temp[1-10]_input                RO      Temperature reading in milli-degrees
+                                        Celsius.
+temp[1-10]_label                RO      Temperature sensor label.
+=============================== ======= =======================================
+
+Disabling automatic BIOS fan control
+------------------------------------
+
+On some laptops the BIOS automatically sets fan speed every few
+seconds. Therefore the fan speed set by mean of this driver is quickly
+overwritten.
+
+There is experimental support for disabling automatic BIOS fan
+control, at least on laptops where the corresponding SMM command is
+known, by writing the value ``1`` in the attribute ``pwm1_enable``
+(writing ``2`` enables automatic BIOS control again). Even if you have
+more than one fan, all of them are set to either enabled or disabled
+automatic fan control at the same time and, notwithstanding the name,
+``pwm1_enable`` sets automatic control for all fans.
+
+If ``pwm1_enable`` is not available, then it means that SMM codes for
+enabling and disabling automatic BIOS fan control are not whitelisted
+for your hardware. It is possible that codes that work for other
+laptops actually work for yours as well, or that you have to discover
+new codes.
+
+Check the list ``i8k_whitelist_fan_control`` in file
+``drivers/hwmon/dell-smm-hwmon.c`` in the kernel tree: as a first
+attempt you can try to add your machine and use an already-known code
+pair. If, after recompiling the kernel, you see that ``pwm1_enable``
+is present and works (i.e., you can manually control the fan speed),
+then please submit your finding as a kernel patch, so that other users
+can benefit from it. Please see
+:ref:`Documentation/process/submitting-patches.rst <submittingpatches>`
+for information on submitting patches.
+
+If no known code works on your machine, you need to resort to do some
+probing, because unfortunately Dell does not publish datasheets for
+its SMM. You can experiment with the code in `this repository`__ to
+probe the BIOS on your machine and discover the appropriate codes.
+
+ __ https://github.com/clopez/dellfan/
+
+Again, when you find new codes, we'd be happy to have your patches!
+
+Module parameters
+-----------------
+
+* force:bool
+                   Force loading without checking for supported
+                   models. (default: 0)
+
+* ignore_dmi:bool
+                   Continue probing hardware even if DMI data does not
+                   match. (default: 0)
+
+* restricted:bool
+                   Allow fan control only to processes with the
+                   ``CAP_SYS_ADMIN`` capability set or processes run
+                   as root when using the legacy ``/proc/i8k``
+                   interface. In this case normal users will be able
+                   to read temperature and fan status but not to
+                   control the fan.  If your notebook is shared with
+                   other users and you don't trust them you may want
+                   to use this option. (default: 1, only available
+                   with ``CONFIG_I8K``)
+
+* power_status:bool
+                   Report AC status in ``/proc/i8k``. (default: 0,
+                   only available with ``CONFIG_I8K``)
+
+* fan_mult:uint
+                   Factor to multiply fan speed with. (default:
+                   autodetect)
+
+* fan_max:uint
+                   Maximum configurable fan speed. (default:
+                   autodetect)
+
+Legacy ``/proc`` interface
+--------------------------
+
+.. warning:: This interface is obsolete and deprecated and should not
+             used in new applications. This interface is only
+             available when kernel is compiled with option
+             ``CONFIG_I8K``.
+
+The information provided by the kernel driver can be accessed by
+simply reading the ``/proc/i8k`` file. For example::
+
+    $ cat /proc/i8k
+    1.0 A17 2J59L02 52 2 1 8040 6420 1 2
+
+The fields read from ``/proc/i8k`` are::
+
+    1.0 A17 2J59L02 52 2 1 8040 6420 1 2
+    |   |   |       |  | | |    |    | |
+    |   |   |       |  | | |    |    | +------- 10. buttons status
+    |   |   |       |  | | |    |    +--------- 9.  AC status
+    |   |   |       |  | | |    +-------------- 8.  fan0 RPM
+    |   |   |       |  | | +------------------- 7.  fan1 RPM
+    |   |   |       |  | +--------------------- 6.  fan0 status
+    |   |   |       |  +----------------------- 5.  fan1 status
+    |   |   |       +-------------------------- 4.  temp0 reading (Celsius)
+    |   |   +---------------------------------- 3.  Dell service tag (later known as 'serial number')
+    |   +-------------------------------------- 2.  BIOS version
+    +------------------------------------------ 1.  /proc/i8k format version
+
+A negative value, for example -22, indicates that the BIOS doesn't
+return the corresponding information. This is normal on some
+models/BIOSes.
+
+For performance reasons the ``/proc/i8k`` doesn't report by default
+the AC status since this SMM call takes a long time to execute and is
+not really needed.  If you want to see the ac status in ``/proc/i8k``
+you must explictitly enable this option by passing the
+``power_status=1`` parameter to insmod. If AC status is not
+available -1 is printed instead.
+
+The driver provides also an ioctl interface which can be used to
+obtain the same information and to control the fan status. The ioctl
+interface can be accessed from C programs or from shell using the
+i8kctl utility. See the source file of ``i8kutils`` for more
+information on how to use the ioctl interface.
diff --git a/Documentation/hwmon/drivetemp.rst b/Documentation/hwmon/drivetemp.rst
new file mode 100644
index 000000000000..2d37d049247f
--- /dev/null
+++ b/Documentation/hwmon/drivetemp.rst
@@ -0,0 +1,52 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver drivetemp
+=======================
+
+
+References
+----------
+
+ANS T13/1699-D
+Information technology - AT Attachment 8 - ATA/ATAPI Command Set (ATA8-ACS)
+
+ANS Project T10/BSR INCITS 513
+Information technology - SCSI Primary Commands - 4 (SPC-4)
+
+ANS Project INCITS 557
+Information technology - SCSI / ATA Translation - 5 (SAT-5)
+
+
+Description
+-----------
+
+This driver supports reporting the temperature of disk and solid state
+drives with temperature sensors.
+
+If supported, it uses the ATA SCT Command Transport feature to read
+the current drive temperature and, if available, temperature limits
+as well as historic minimum and maximum temperatures. If SCT Command
+Transport is not supported, the driver uses SMART attributes to read
+the drive temperature.
+
+
+Sysfs entries
+-------------
+
+Only the temp1_input attribute is always available. Other attributes are
+available only if reported by the drive. All temperatures are reported in
+milli-degrees Celsius.
+
+=======================	=====================================================
+temp1_input		Current drive temperature
+temp1_lcrit		Minimum temperature limit. Operating the device below
+			this temperature may cause physical damage to the
+			device.
+temp1_min		Minimum recommended continuous operating limit
+temp1_max		Maximum recommended continuous operating temperature
+temp1_crit		Maximum temperature limit. Operating the device above
+			this temperature may cause physical damage to the
+			device.
+temp1_lowest		Minimum temperature seen this power cycle
+temp1_highest		Maximum temperature seen this power cycle
+=======================	=====================================================
diff --git a/Documentation/hwmon/ina3221.rst b/Documentation/hwmon/ina3221.rst
index f6007ae8f4e2..297f7323b441 100644
--- a/Documentation/hwmon/ina3221.rst
+++ b/Documentation/hwmon/ina3221.rst
@@ -41,6 +41,18 @@ curr[123]_max           Warning alert current(mA) setting, activates the
 			average is above this value.
 curr[123]_max_alarm     Warning alert current limit exceeded
 in[456]_input           Shunt voltage(uV) for channels 1, 2, and 3 respectively
+in7_input               Sum of shunt voltage(uV) channels
+in7_label               Channel label for sum of shunt voltage
+curr4_input             Sum of current(mA) measurement channels,
+                        (only available when all channels use the same resistor
+                        value for their shunt resistors)
+curr4_crit              Critical alert current(mA) setting for sum of current
+                        measurements, activates the corresponding alarm
+                        when the respective current is above this value
+                        (only effective when all channels use the same resistor
+                        value for their shunt resistors)
+curr4_crit_alarm        Critical alert current limit exceeded for sum of
+                        current measurements.
 samples                 Number of samples using in the averaging mode.
 
                         Supports the list of number of samples:
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 230ad59b462b..b24adb67ddca 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -29,6 +29,7 @@ Hardware Monitoring Kernel Drivers
    adm1025
    adm1026
    adm1031
+   adm1177
    adm1275
    adm9240
    ads7828
@@ -41,10 +42,13 @@ Hardware Monitoring Kernel Drivers
    asb100
    asc7621
    aspeed-pwm-tacho
+   bel-pfe
    coretemp
    da9052
    da9055
+   dell-smm-hwmon
    dme1737
+   drivetemp
    ds1621
    ds620
    emc1403
@@ -90,6 +94,7 @@ Hardware Monitoring Kernel Drivers
    lm95245
    lochnagar
    ltc2945
+   ltc2947
    ltc2978
    ltc2990
    ltc3815
@@ -103,8 +108,10 @@ Hardware Monitoring Kernel Drivers
    max1619
    max1668
    max197
+   max20730
    max20751
    max31722
+   max31730
    max31785
    max31790
    max34440
@@ -153,6 +160,7 @@ Hardware Monitoring Kernel Drivers
    tmp108
    tmp401
    tmp421
+   tmp513
    tps40422
    twl4030-madc-hwmon
    ucd9000
@@ -173,6 +181,7 @@ Hardware Monitoring Kernel Drivers
    wm831x
    wm8350
    xgene-hwmon
+   xdpe12284
    zl6100
 
 .. only::  subproject and html
diff --git a/Documentation/hwmon/inspur-ipsps1.rst b/Documentation/hwmon/inspur-ipsps1.rst
index 292c0c26bdd1..4825046ecb25 100644
--- a/Documentation/hwmon/inspur-ipsps1.rst
+++ b/Documentation/hwmon/inspur-ipsps1.rst
@@ -17,7 +17,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 Sysfs entries
diff --git a/Documentation/hwmon/ltc2947.rst b/Documentation/hwmon/ltc2947.rst
new file mode 100644
index 000000000000..419fc84fe934
--- /dev/null
+++ b/Documentation/hwmon/ltc2947.rst
@@ -0,0 +1,100 @@
+Kernel drivers ltc2947-i2c and ltc2947-spi
+==========================================
+
+Supported chips:
+
+  * Analog Devices LTC2947
+
+    Prefix: 'ltc2947'
+
+    Addresses scanned: -
+
+    Datasheet:
+
+        https://www.analog.com/media/en/technical-documentation/data-sheets/LTC2947.pdf
+
+Author: Nuno Sá <nuno.sa@analog.com>
+
+Description
+___________
+
+The LTC2947 is a high precision power and energy monitor that measures current,
+voltage, power, temperature, charge and energy. The device supports both SPI
+and I2C depending on the chip configuration.
+The device also measures accumulated quantities as energy. It has two banks of
+register's to read/set energy related values. These banks can be configured
+independently to have setups like: energy1 accumulates always and enrgy2 only
+accumulates if current is positive (to check battery charging efficiency for
+example). The device also supports a GPIO pin that can be configured as output
+to control a fan as a function of measured temperature. Then, the GPIO becomes
+active as soon as a temperature reading is higher than a defined threshold. The
+temp2 channel is used to control this thresholds and to read the respective
+alarms.
+
+Sysfs entries
+_____________
+
+The following attributes are supported. Limits are read-write, reset_history
+is write-only and all the other attributes are read-only.
+
+======================= ==========================================
+in0_input		VP-VM voltage (mV).
+in0_min			Undervoltage threshold
+in0_max			Overvoltage threshold
+in0_lowest		Lowest measured voltage
+in0_highest		Highest measured voltage
+in0_reset_history	Write 1 to reset in1 history
+in0_min_alarm		Undervoltage alarm
+in0_max_alarm		Overvoltage alarm
+in0_label		Channel label (VP-VM)
+
+in1_input		DVCC voltage (mV)
+in1_min			Undervoltage threshold
+in1_max			Overvoltage threshold
+in1_lowest		Lowest measured voltage
+in1_highest		Highest measured voltage
+in1_reset_history	Write 1 to reset in2 history
+in1_min_alarm		Undervoltage alarm
+in1_max_alarm		Overvoltage alarm
+in1_label		Channel label (DVCC)
+
+curr1_input		IP-IM Sense current (mA)
+curr1_min		Undercurrent threshold
+curr1_max		Overcurrent threshold
+curr1_lowest		Lowest measured current
+curr1_highest		Highest measured current
+curr1_reset_history	Write 1 to reset curr1 history
+curr1_min_alarm		Undercurrent alarm
+curr1_max_alarm		Overcurrent alarm
+curr1_label		Channel label (IP-IM)
+
+power1_input		Power (in uW)
+power1_min		Low power threshold
+power1_max		High power threshold
+power1_input_lowest	Historical minimum power use
+power1_input_highest	Historical maximum power use
+power1_reset_history	Write 1 to reset power1 history
+power1_min_alarm	Low power alarm
+power1_max_alarm	High power alarm
+power1_label		Channel label (Power)
+
+temp1_input		Chip Temperature (in milliC)
+temp1_min		Low temperature threshold
+temp1_max		High temperature threshold
+temp1_input_lowest	Historical minimum temperature use
+temp1_input_highest	Historical maximum temperature use
+temp1_reset_history	Write 1 to reset temp1 history
+temp1_min_alarm		Low temperature alarm
+temp1_max_alarm		High temperature alarm
+temp1_label		Channel label (Ambient)
+
+temp2_min		Low temperature threshold for fan control
+temp2_max		High temperature threshold for fan control
+temp2_min_alarm		Low temperature fan control alarm
+temp2_max_alarm		High temperature fan control alarm
+temp2_label		Channel label (TEMPFAN)
+
+energy1_input		Measured energy over time (in microJoule)
+
+energy2_input		Measured energy over time (in microJoule)
+======================= ==========================================
diff --git a/Documentation/hwmon/max20730.rst b/Documentation/hwmon/max20730.rst
new file mode 100644
index 000000000000..cea7ae58c2f7
--- /dev/null
+++ b/Documentation/hwmon/max20730.rst
@@ -0,0 +1,74 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Kernel driver max20730
+======================
+
+Supported chips:
+
+  * Maxim MAX20730
+
+    Prefix: 'max20730'
+
+    Addresses scanned: -
+
+    Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX20730.pdf
+
+  * Maxim MAX20734
+
+    Prefix: 'max20734'
+
+    Addresses scanned: -
+
+    Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX20734.pdf
+
+  * Maxim MAX20743
+
+    Prefix: 'max20743'
+
+    Addresses scanned: -
+
+    Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX20743.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver implements support for Maxim MAX20730, MAX20734, and MAX20743
+Integrated, Step-Down Switching Regulators with PMBus support.
+
+The driver is a client driver to the core PMBus driver.
+Please see Documentation/hwmon/pmbus.rst for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
+details.
+
+
+Sysfs entries
+-------------
+
+=================== ===== =======================================================
+curr1_crit          RW/RO Critical output current. Please see datasheet for
+                          supported limits. Read-only if the chip is
+                          write protected; read-write otherwise.
+curr1_crit_alarm    RO    Output current critical alarm
+curr1_input         RO    Output current
+curr1_label         RO    'iout1'
+in1_alarm           RO    Input voltage alarm
+in1_input           RO    Input voltage
+in1_label           RO    'vin'
+in2_alarm           RO    Output voltage alarm
+in2_input           RO    Output voltage
+in2_label           RO    'vout1'
+temp1_crit          RW/RO Critical temeperature. Supported values are 130 or 150
+                          degrees C. Read-only if the chip is write protected;
+                          read-write otherwise.
+temp1_crit_alarm    RO    Temperature critical alarm
+temp1_input         RO    Chip temperature
+=================== ===== =======================================================
diff --git a/Documentation/hwmon/max31730.rst b/Documentation/hwmon/max31730.rst
new file mode 100644
index 000000000000..def0de19dbd2
--- /dev/null
+++ b/Documentation/hwmon/max31730.rst
@@ -0,0 +1,44 @@
+Kernel driver max31790
+======================
+
+Supported chips:
+
+  * Maxim MAX31730
+
+    Prefix: 'max31730'
+
+    Addresses scanned: 0x1c, 0x1d, 0x1e, 0x1f, 0x4c, 0x4d, 0x4e, 0x4f
+
+    Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX31730.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver implements support for Maxim MAX31730.
+
+The MAX31730 temperature sensor monitors its own temperature and the
+temperatures of three external diode-connected transistors. The operating
+supply voltage is from 3.0V to 3.6V. Resistance cancellation compensates
+for high series resistance in circuit-board traces and the external thermal
+diode, while beta compensation corrects for temperature-measurement
+errors due to low-beta sensing transistors.
+
+
+Sysfs entries
+-------------
+
+=================== == =======================================================
+temp[1-4]_enable    RW Temperature enable/disable
+                       Set to 0 to enable channel, 0 to disable
+temp[1-4]_input     RO Temperature input
+temp[2-4]_fault     RO Fault indicator for remote channels
+temp[1-4]_max       RW Maximum temperature
+temp[1-4]_max_alarm RW Maximum temperature alarm
+temp[1-4]_min       RW Minimum temperature. Common for all channels.
+                       Only temp1_min is writeable.
+temp[1-4]_min_alarm RO Minimum temperature alarm
+temp[2-4]_offset    RW Temperature offset for remote channels
+=================== == =======================================================
diff --git a/Documentation/hwmon/pmbus.rst b/Documentation/hwmon/pmbus.rst
index abfb9dd4857d..f787984e88a9 100644
--- a/Documentation/hwmon/pmbus.rst
+++ b/Documentation/hwmon/pmbus.rst
@@ -63,6 +63,16 @@ Supported chips:
 
 	http://www.ti.com/lit/gpn/tps544c25
 
+  * Maxim MAX20796
+
+    Prefix: 'max20796'
+
+    Addresses scanned: -
+
+    Datasheet:
+
+	Not published
+
   * Generic PMBus devices
 
     Prefix: 'pmbus'
diff --git a/Documentation/hwmon/tmp513.rst b/Documentation/hwmon/tmp513.rst
new file mode 100644
index 000000000000..6c8fae4b1a75
--- /dev/null
+++ b/Documentation/hwmon/tmp513.rst
@@ -0,0 +1,103 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver tmp513
+====================
+
+Supported chips:
+
+  * Texas Instruments TMP512
+
+    Prefix: 'tmp512'
+
+    Datasheet: http://www.ti.com/lit/ds/symlink/tmp512.pdf
+
+  * Texas Instruments TMP513
+
+    Prefix: 'tmp513'
+
+    Datasheet: http://www.ti.com/lit/ds/symlink/tmp513.pdf
+
+Authors:
+
+	Eric Tremblay <etremblay@distech-controls.com>
+
+Description
+-----------
+
+This driver implements support for Texas Instruments TMP512, and TMP513.
+The TMP512 (dual-channel) and TMP513 (triple-channel) are system monitors
+that include remote sensors, a local temperature sensor, and a high-side current
+shunt monitor. These system monitors have the capability of measuring remote
+temperatures, on-chip temperatures, and system voltage/power/current
+consumption.
+
+The temperatures are measured in degrees Celsius with a range of
+-40 to + 125 degrees with a resolution of 0.0625 degree C.
+
+For hysteresis value, only the first channel is writable. Writing to it
+will affect all other values since each channels are sharing the same
+hysteresis value. The hysteresis is in degrees Celsius with a range of
+0 to 127.5 degrees with a resolution of 0.5 degree.
+
+The driver exports the temperature values via the following sysfs files:
+
+**temp[1-4]_input**
+
+**temp[1-4]_crit**
+
+**temp[1-4]_crit_alarm**
+
+**temp[1-4]_crit_hyst**
+
+The driver read the shunt voltage from the chip and convert it to current.
+The readable range depends on the "ti,pga-gain" property (default to 8) and the
+shunt resistor value. The value resolution will be equal to 10uV/Rshunt.
+
+The driver exports the shunt currents values via the following sysFs files:
+
+**curr1_input**
+
+**curr1_lcrit**
+
+**curr1_lcrit_alarm**
+
+**curr1_crit**
+
+**curr1_crit_alarm**
+
+The bus voltage range is read from the chip with a resolution of 4mV. The chip
+can be configurable in two different range (32V or 16V) using the
+ti,bus-range-microvolt property in the devicetree.
+
+The driver exports the bus voltage values via the following sysFs files:
+
+**in0_input**
+
+**in0_lcrit**
+
+**in0_lcrit_alarm**
+
+**in0_crit**
+
+**in0_crit_alarm**
+
+The bus power and bus currents range and resolution depends on the calibration
+register value. Those values are calculate by the hardware using those
+formulas:
+
+Current = (ShuntVoltage * CalibrationRegister) / 4096
+Power   = (Current * BusVoltage) / 5000
+
+The driver exports the bus current and bus power values via the following
+sysFs files:
+
+**curr2_input**
+
+**power1_input**
+
+**power1_crit**
+
+**power1_crit_alarm**
+
+The calibration process follow the procedure of the datasheet (without overflow)
+and depend on the shunt resistor value and the pga_gain value.
diff --git a/Documentation/hwmon/ucd9000.rst b/Documentation/hwmon/ucd9000.rst
index 746f21fcb48c..704f0cbd95d3 100644
--- a/Documentation/hwmon/ucd9000.rst
+++ b/Documentation/hwmon/ucd9000.rst
@@ -3,9 +3,10 @@ Kernel driver ucd9000
 
 Supported chips:
 
-  * TI UCD90120, UCD90124, UCD90160, UCD9090, and UCD90910
+  * TI UCD90120, UCD90124, UCD90160, UCD90320, UCD9090, and UCD90910
 
-    Prefixes: 'ucd90120', 'ucd90124', 'ucd90160', 'ucd9090', 'ucd90910'
+    Prefixes: 'ucd90120', 'ucd90124', 'ucd90160', 'ucd90320', 'ucd9090',
+              'ucd90910'
 
     Addresses scanned: -
 
@@ -14,6 +15,7 @@ Supported chips:
 	- http://focus.ti.com/lit/ds/symlink/ucd90120.pdf
 	- http://focus.ti.com/lit/ds/symlink/ucd90124.pdf
 	- http://focus.ti.com/lit/ds/symlink/ucd90160.pdf
+	- http://focus.ti.com/lit/ds/symlink/ucd90320.pdf
 	- http://focus.ti.com/lit/ds/symlink/ucd9090.pdf
 	- http://focus.ti.com/lit/ds/symlink/ucd90910.pdf
 
@@ -45,6 +47,12 @@ power-on reset signals, external interrupts, cascading, or other system
 functions. Twelve of these pins offer PWM functionality. Using these pins, the
 UCD90160 offers support for margining, and general-purpose PWM functions.
 
+The UCD90320 is a 32-rail PMBus/I2C addressable power-supply sequencer and
+monitor. The 24 integrated ADC channels (AMONx) monitor the power supply
+voltage, current, and temperature. Of the 84 GPIO pins, 8 can be used as
+digital monitors (DMONx), 32 to enable the power supply (ENx), 24 for margining
+(MARx), 16 for logical GPO, and 32 GPIs for cascading, and system function.
+
 The UCD9090 is a 10-rail PMBus/I2C addressable power-supply sequencer and
 monitor. The device integrates a 12-bit ADC for monitoring up to 10 power-supply
 voltage inputs. Twenty-three GPIO pins can be used for power supply enables,
diff --git a/Documentation/hwmon/xdpe12284.rst b/Documentation/hwmon/xdpe12284.rst
new file mode 100644
index 000000000000..6b7ae98cc536
--- /dev/null
+++ b/Documentation/hwmon/xdpe12284.rst
@@ -0,0 +1,101 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver xdpe122
+=====================
+
+Supported chips:
+
+  * Infineon XDPE12254
+
+    Prefix: 'xdpe12254'
+
+  * Infineon XDPE12284
+
+    Prefix: 'xdpe12284'
+
+Authors:
+
+	Vadim Pasternak <vadimp@mellanox.com>
+
+Description
+-----------
+
+This driver implements support for Infineon Multi-phase XDPE122 family
+dual loop voltage regulators.
+The family includes XDPE12284 and XDPE12254 devices.
+The devices from this family complaint with:
+- Intel VR13 and VR13HC rev 1.3, IMVP8 rev 1.2 and IMPVP9 rev 1.3 DC-DC
+  converter specification.
+- Intel SVID rev 1.9. protocol.
+- PMBus rev 1.3 interface.
+
+Devices support linear format for reading input voltage, input and output current,
+input and output power and temperature.
+Device supports VID format for reading output voltage. The below modes are
+supported:
+- VR12.0 mode, 5-mV DAC - 0x01.
+- VR12.5 mode, 10-mV DAC - 0x02.
+- IMVP9 mode, 5-mV DAC - 0x03.
+- AMD mode 6.25mV - 0x10.
+
+Devices support two pages for telemetry.
+
+The driver provides for current: input, maximum and critical thresholds
+and maximum and critical alarms. Critical thresholds and critical alarm are
+supported only for current output.
+The driver exports the following attributes for via the sysfs files, where
+indexes 1, 2 are for "iin" and 3, 4 for "iout":
+
+**curr[3-4]_crit**
+
+**curr[3-4]_crit_alarm**
+
+**curr[1-4]_input**
+
+**curr[1-4]_label**
+
+**curr[1-4]_max**
+
+**curr[1-4]_max_alarm**
+
+The driver provides for voltage: input, critical and low critical thresholds
+and critical and low critical alarms.
+The driver exports the following attributes for via the sysfs files, where
+indexes 1, 2 are for "vin" and 3, 4 for "vout":
+
+**in[1-4]_crit**
+
+**in[1-4_crit_alarm**
+
+**in[1-4]_input**
+
+**in[1-4_label**
+
+**in[1-4]_lcrit**
+
+**in[1-41_lcrit_alarm**
+
+The driver provides for power: input and alarms. Power alarm is supported only
+for power input.
+The driver exports the following attributes for via the sysfs files, where
+indexes 1, 2 are for "pin" and 3, 4 for "pout":
+
+**power[1-2]_alarm**
+
+**power[1-4]_input**
+
+**power[1-4]_label**
+
+The driver provides for temperature: input, maximum and critical thresholds
+and maximum and critical alarms.
+The driver exports the following attributes for via the sysfs files:
+
+**temp[1-2]_crit**
+
+**temp[1-2]_crit_alarm**
+
+**temp[1-2]_input**
+
+**temp[1-2]_max**
+
+**temp[1-2]_max_alarm**
diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst
index 2a570c214880..b83da0e94184 100644
--- a/Documentation/i2c/busses/i2c-i801.rst
+++ b/Documentation/i2c/busses/i2c-i801.rst
@@ -42,6 +42,7 @@ Supported adapters:
   * Intel Comet Lake (PCH)
   * Intel Elkhart Lake (PCH)
   * Intel Tiger Lake (PCH)
+  * Intel Jasper Lake (SOC)
 
    Datasheets: Publicly available at the Intel website
 
diff --git a/Documentation/i2c/busses/index.rst b/Documentation/i2c/busses/index.rst
index 97ca4d510816..2a26e251a335 100644
--- a/Documentation/i2c/busses/index.rst
+++ b/Documentation/i2c/busses/index.rst
@@ -1,4 +1,4 @@
-. SPDX-License-Identifier: GPL-2.0
+.. SPDX-License-Identifier: GPL-2.0
 
 ===============
 I2C Bus Drivers
diff --git a/Documentation/i2c/index.rst b/Documentation/i2c/index.rst
index cd8d020f7ac5..a0fbaf6d0675 100644
--- a/Documentation/i2c/index.rst
+++ b/Documentation/i2c/index.rst
@@ -1,4 +1,4 @@
-. SPDX-License-Identifier: GPL-2.0
+.. SPDX-License-Identifier: GPL-2.0
 
 ===================
 I2C/SMBus Subsystem
diff --git a/Documentation/i2c/instantiating-devices.rst b/Documentation/i2c/instantiating-devices.rst
index 1238f1fa3382..875ebe9e78e3 100644
--- a/Documentation/i2c/instantiating-devices.rst
+++ b/Documentation/i2c/instantiating-devices.rst
@@ -123,7 +123,7 @@ present or not (for example for an optional feature which is not present
 on cheap variants of a board but you have no way to tell them apart), or
 it may have different addresses from one board to the next (manufacturer
 changing its design without notice). In this case, you can call
-i2c_new_probed_device() instead of i2c_new_device().
+i2c_new_scanned_device() instead of i2c_new_device().
 
 Example (from the nxp OHCI driver)::
 
@@ -139,8 +139,8 @@ Example (from the nxp OHCI driver)::
 	i2c_adap = i2c_get_adapter(2);
 	memset(&i2c_info, 0, sizeof(struct i2c_board_info));
 	strscpy(i2c_info.type, "isp1301_nxp", sizeof(i2c_info.type));
-	isp1301_i2c_client = i2c_new_probed_device(i2c_adap, &i2c_info,
-						   normal_i2c, NULL);
+	isp1301_i2c_client = i2c_new_scanned_device(i2c_adap, &i2c_info,
+						    normal_i2c, NULL);
 	i2c_put_adapter(i2c_adap);
 	(...)
   }
@@ -153,14 +153,14 @@ simply gives up.
 The driver which instantiated the I2C device is responsible for destroying
 it on cleanup. This is done by calling i2c_unregister_device() on the
 pointer that was earlier returned by i2c_new_device() or
-i2c_new_probed_device().
+i2c_new_scanned_device().
 
 
 Method 3: Probe an I2C bus for certain devices
 ----------------------------------------------
 
 Sometimes you do not have enough information about an I2C device, not even
-to call i2c_new_probed_device(). The typical case is hardware monitoring
+to call i2c_new_scanned_device(). The typical case is hardware monitoring
 chips on PC mainboards. There are several dozen models, which can live
 at 25 different addresses. Given the huge number of mainboards out there,
 it is next to impossible to build an exhaustive list of the hardware
diff --git a/Documentation/i2c/writing-clients.rst b/Documentation/i2c/writing-clients.rst
index dddf0a14ab7c..ced309b5e0cc 100644
--- a/Documentation/i2c/writing-clients.rst
+++ b/Documentation/i2c/writing-clients.rst
@@ -185,14 +185,14 @@ Sometimes you know that a device is connected to a given I2C bus, but you
 don't know the exact address it uses.  This happens on TV adapters for
 example, where the same driver supports dozens of slightly different
 models, and I2C device addresses change from one model to the next.  In
-that case, you can use the i2c_new_probed_device() variant, which is
+that case, you can use the i2c_new_scanned_device() variant, which is
 similar to i2c_new_device(), except that it takes an additional list of
 possible I2C addresses to probe.  A device is created for the first
 responsive address in the list.  If you expect more than one device to be
-present in the address range, simply call i2c_new_probed_device() that
+present in the address range, simply call i2c_new_scanned_device() that
 many times.
 
-The call to i2c_new_device() or i2c_new_probed_device() typically happens
+The call to i2c_new_device() or i2c_new_scanned_device() typically happens
 in the I2C bus driver. You may want to save the returned i2c_client
 reference for later use.
 
@@ -237,7 +237,7 @@ Device Deletion
 ---------------
 
 Each I2C device which has been created using i2c_new_device() or
-i2c_new_probed_device() can be unregistered by calling
+i2c_new_scanned_device() can be unregistered by calling
 i2c_unregister_device().  If you don't call it explicitly, it will be
 called automatically before the underlying I2C bus itself is removed, as a
 device can't survive its parent in the device driver model.
diff --git a/Documentation/index.rst b/Documentation/index.rst
index b843e313d2f2..e99d0bd2589d 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -57,7 +57,6 @@ the kernel interface as seen by application developers.
    :maxdepth: 2
 
    userspace-api/index
-   ioctl/index
 
 
 Introduction to kernel development
@@ -135,6 +134,14 @@ needed).
    mic/index
    scheduler/index
 
+Architecture-agnostic documentation
+-----------------------------------
+
+.. toctree::
+   :maxdepth: 2
+
+   asm-annotations
+
 Architecture-specific documentation
 -----------------------------------
 
diff --git a/Documentation/ioctl/index.rst b/Documentation/ioctl/index.rst
deleted file mode 100644
index 0f0a857f6615..000000000000
--- a/Documentation/ioctl/index.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-======
-IOCTLs
-======
-
-.. toctree::
-   :maxdepth: 1
-
-   ioctl-number
-
-   botching-up-ioctls
-   ioctl-decoding
-
-   cdrom
-   hdio
diff --git a/Documentation/isdn/avmb1.rst b/Documentation/isdn/avmb1.rst
deleted file mode 100644
index de3961e67553..000000000000
--- a/Documentation/isdn/avmb1.rst
+++ /dev/null
@@ -1,246 +0,0 @@
-================================
-Driver for active AVM Controller
-================================
-
-The driver provides a kernel capi2.0 Interface (kernelcapi) and
-on top of this a User-Level-CAPI2.0-interface (capi)
-and a driver to connect isdn4linux with CAPI2.0 (capidrv).
-The lowlevel interface can be used to implement a CAPI2.0
-also for passive cards since July 1999.
-
-The author can be reached at calle@calle.in-berlin.de.
-The command avmcapictrl is part of the isdn4k-utils.
-t4-files can be found at ftp://ftp.avm.de/cardware/b1/linux/firmware
-
-Currently supported cards:
-
-	- B1 ISA (all versions)
-	- B1 PCI
-	- T1/T1B (HEMA card)
-	- M1
-	- M2
-	- B1 PCMCIA
-
-Installing
-----------
-
-You need at least /dev/capi20 to load the firmware.
-
-::
-
-    mknod /dev/capi20 c 68 0
-    mknod /dev/capi20.00 c 68 1
-    mknod /dev/capi20.01 c 68 2
-    .
-    .
-    .
-    mknod /dev/capi20.19 c 68 20
-
-Running
--------
-
-To use the card you need the t4-files to download the firmware.
-AVM GmbH provides several t4-files for the different D-channel
-protocols (b1.t4 for Euro-ISDN). Install these file in /lib/isdn.
-
-if you configure as modules load the modules this way::
-
-    insmod /lib/modules/current/misc/capiutil.o
-    insmod /lib/modules/current/misc/b1.o
-    insmod /lib/modules/current/misc/kernelcapi.o
-    insmod /lib/modules/current/misc/capidrv.o
-    insmod /lib/modules/current/misc/capi.o
-
-if you have an B1-PCI card load the module b1pci.o::
-
-    insmod /lib/modules/current/misc/b1pci.o
-
-and load the firmware with::
-
-    avmcapictrl load /lib/isdn/b1.t4 1
-
-if you have an B1-ISA card load the module b1isa.o
-and add the card by calling::
-
-    avmcapictrl add 0x150 15
-
-and load the firmware by calling::
-
-    avmcapictrl load /lib/isdn/b1.t4 1
-
-if you have an T1-ISA card load the module t1isa.o
-and add the card by calling::
-
-    avmcapictrl add 0x450 15 T1 0
-
-and load the firmware by calling::
-
-    avmcapictrl load /lib/isdn/t1.t4 1
-
-if you have an PCMCIA card (B1/M1/M2) load the module b1pcmcia.o
-before you insert the card.
-
-Leased Lines with B1
---------------------
-
-Init card and load firmware.
-
-For an D64S use "FV: 1" as phone number
-
-For an D64S2 use "FV: 1" and "FV: 2" for multilink
-or "FV: 1,2" to use CAPI channel bundling.
-
-/proc-Interface
------------------
-
-/proc/capi::
-
-  dr-xr-xr-x   2 root     root            0 Jul  1 14:03 .
-  dr-xr-xr-x  82 root     root            0 Jun 30 19:08 ..
-  -r--r--r--   1 root     root            0 Jul  1 14:03 applications
-  -r--r--r--   1 root     root            0 Jul  1 14:03 applstats
-  -r--r--r--   1 root     root            0 Jul  1 14:03 capi20
-  -r--r--r--   1 root     root            0 Jul  1 14:03 capidrv
-  -r--r--r--   1 root     root            0 Jul  1 14:03 controller
-  -r--r--r--   1 root     root            0 Jul  1 14:03 contrstats
-  -r--r--r--   1 root     root            0 Jul  1 14:03 driver
-  -r--r--r--   1 root     root            0 Jul  1 14:03 ncci
-  -r--r--r--   1 root     root            0 Jul  1 14:03 users
-
-/proc/capi/applications:
-   applid level3cnt datablkcnt datablklen ncci-cnt recvqueuelen
-	level3cnt:
-	    capi_register parameter
-	datablkcnt:
-	    capi_register parameter
-	ncci-cnt:
-	    current number of nccis (connections)
-	recvqueuelen:
-	    number of messages on receive queue
-
-   for example::
-
-	1 -2 16 2048 1 0
-	2 2 7 2048 1 0
-
-/proc/capi/applstats:
-   applid recvctlmsg nrecvdatamsg nsentctlmsg nsentdatamsg
-	recvctlmsg:
-	    capi messages received without DATA_B3_IND
-	recvdatamsg:
-	    capi DATA_B3_IND received
-	sentctlmsg:
-	    capi messages sent without DATA_B3_REQ
-	sentdatamsg:
-	    capi DATA_B3_REQ sent
-
-   for example::
-
-	1 2057 1699 1721 1699
-
-/proc/capi/capi20: statistics of capi.o (/dev/capi20)
-    minor nopen nrecvdropmsg nrecvctlmsg nrecvdatamsg sentctlmsg sentdatamsg
-	minor:
-	    minor device number of capi device
-	nopen:
-	    number of calls to devices open
-	nrecvdropmsg:
-	    capi messages dropped (messages in recvqueue in close)
-	nrecvctlmsg:
-	    capi messages received without DATA_B3_IND
-	nrecvdatamsg:
-	    capi DATA_B3_IND received
-	nsentctlmsg:
-	    capi messages sent without DATA_B3_REQ
-	nsentdatamsg:
-	    capi DATA_B3_REQ sent
-
-   for example::
-
-	1 2 18 0 16 2
-
-/proc/capi/capidrv: statistics of capidrv.o (capi messages)
-    nrecvctlmsg nrecvdatamsg sentctlmsg sentdatamsg
-	nrecvctlmsg:
-	    capi messages received without DATA_B3_IND
-	nrecvdatamsg:
-	    capi DATA_B3_IND received
-	nsentctlmsg:
-	    capi messages sent without DATA_B3_REQ
-	nsentdatamsg:
-	    capi DATA_B3_REQ sent
-
-   for example:
-	2780 2226 2256 2226
-
-/proc/capi/controller:
-   controller drivername state cardname   controllerinfo
-
-   for example::
-
-	1 b1pci      running  b1pci-e000       B1 3.07-01 0xe000 19
-	2 t1isa      running  t1isa-450        B1 3.07-01 0x450 11 0
-	3 b1pcmcia   running  m2-150           B1 3.07-01 0x150 5
-
-/proc/capi/contrstats:
-    controller nrecvctlmsg nrecvdatamsg sentctlmsg sentdatamsg
-	nrecvctlmsg:
-	    capi messages received without DATA_B3_IND
-	nrecvdatamsg:
-	    capi DATA_B3_IND received
-	nsentctlmsg:
-	    capi messages sent without DATA_B3_REQ
-	nsentdatamsg:
-	    capi DATA_B3_REQ sent
-
-   for example::
-
-	1 2845 2272 2310 2274
-	2 2 0 2 0
-	3 2 0 2 0
-
-/proc/capi/driver:
-   drivername ncontroller
-
-   for example::
-
-	b1pci                            1
-	t1isa                            1
-	b1pcmcia                         1
-	b1isa                            0
-
-/proc/capi/ncci:
-   apllid ncci winsize sendwindow
-
-   for example::
-
-	1 0x10101 8 0
-
-/proc/capi/users: kernelmodules that use the kernelcapi.
-   name
-
-   for example::
-
-	capidrv
-	capi20
-
-Questions
----------
-
-Check out the FAQ (ftp.isdn4linux.de) or subscribe to the
-linux-avmb1@calle.in-berlin.de mailing list by sending
-a mail to majordomo@calle.in-berlin.de with
-subscribe linux-avmb1
-in the body.
-
-German documentation and several scripts can be found at
-ftp://ftp.avm.de/cardware/b1/linux/
-
-Bugs
-----
-
-If you find any please let me know.
-
-Enjoy,
-
-Carsten Paeth (calle@calle.in-berlin.de)
diff --git a/Documentation/isdn/gigaset.rst b/Documentation/isdn/gigaset.rst
deleted file mode 100644
index 98b4ec521c51..000000000000
--- a/Documentation/isdn/gigaset.rst
+++ /dev/null
@@ -1,465 +0,0 @@
-==========================
-GigaSet 307x Device Driver
-==========================
-
-1.   Requirements
-=================
-
-1.1. Hardware
--------------
-
-     This driver supports the connection of the Gigaset 307x/417x family of
-     ISDN DECT bases via Gigaset M101 Data, Gigaset M105 Data or direct USB
-     connection. The following devices are reported to be compatible:
-
-     Bases:
-       - Siemens Gigaset 3070/3075 isdn
-       - Siemens Gigaset 4170/4175 isdn
-       - Siemens Gigaset SX205/255
-       - Siemens Gigaset SX353
-       - T-Com Sinus 45 [AB] isdn
-       - T-Com Sinus 721X[A] [SE]
-       - Vox Chicago 390 ISDN (KPN Telecom)
-
-     RS232 data boxes:
-       - Siemens Gigaset M101 Data
-       - T-Com Sinus 45 Data 1
-
-     USB data boxes:
-       - Siemens Gigaset M105 Data
-       - Siemens Gigaset USB Adapter DECT
-       - T-Com Sinus 45 Data 2
-       - T-Com Sinus 721 data
-       - Chicago 390 USB (KPN)
-
-     See also http://www.erbze.info/sinus_gigaset.htm
-       (archived at https://web.archive.org/web/20100717020421/http://www.erbze.info:80/sinus_gigaset.htm ) and
-	http://gigaset307x.sourceforge.net/
-
-     We had also reports from users of Gigaset M105 who could use the drivers
-     with SX 100 and CX 100 ISDN bases (only in unimodem mode, see section 2.5.)
-     If you have another device that works with our driver, please let us know.
-
-     Chances of getting an USB device to work are good if the output of::
-
-	lsusb
-
-     at the command line contains one of the following::
-
-	ID 0681:0001
-	ID 0681:0002
-	ID 0681:0009
-	ID 0681:0021
-	ID 0681:0022
-
-1.2. Software
--------------
-
-     The driver works with the Kernel CAPI subsystem and can be used with any
-     software which is able to use CAPI 2.0 for ISDN connections (voice or data).
-
-     There are some user space tools available at
-     https://sourceforge.net/projects/gigaset307x/
-     which provide access to additional device specific functions like SMS,
-     phonebook or call journal.
-
-
-2.   How to use the driver
-==========================
-
-2.1. Modules
-------------
-
-     For the devices to work, the proper kernel modules have to be loaded.
-     This normally happens automatically when the system detects the USB
-     device (base, M105) or when the line discipline is attached (M101). It
-     can also be triggered manually using the modprobe(8) command, for example
-     for troubleshooting or to pass module parameters.
-
-     The module ser_gigaset provides a serial line discipline N_GIGASET_M101
-     which uses the regular serial port driver to access the device, and must
-     therefore be attached to the serial device to which the M101 is connected.
-     The ldattach(8) command (included in util-linux-ng release 2.14 or later)
-     can be used for that purpose, for example::
-
-	ldattach GIGASET_M101 /dev/ttyS1
-
-     This will open the device file, attach the line discipline to it, and
-     then sleep in the background, keeping the device open so that the line
-     discipline remains active. To deactivate it, kill the daemon, for example
-     with::
-
-	killall ldattach
-
-     before disconnecting the device. To have this happen automatically at
-     system startup/shutdown on an LSB compatible system, create and activate
-     an appropriate LSB startup script /etc/init.d/gigaset. (The init name
-     'gigaset' is officially assigned to this project by LANANA.)
-     Alternatively, just add the 'ldattach' command line to /etc/rc.local.
-
-     The modules accept the following parameters:
-
-	=============== ========== ==========================================
-	Module		Parameter  Meaning
-
-	gigaset		debug	   debug level (see section 3.2.)
-
-			startmode  initial operation mode (see section 2.5.):
-	bas_gigaset )		   1=CAPI (default), 0=Unimodem
-	ser_gigaset )
-	usb_gigaset )	cidmode    initial Call-ID mode setting (see section
-				   2.5.): 1=on (default), 0=off
-
-	=============== ========== ==========================================
-
-     Depending on your distribution you may want to create a separate module
-     configuration file like /etc/modprobe.d/gigaset.conf for these.
-
-2.2. Device nodes for user space programs
------------------------------------------
-
-     The device can be accessed from user space (eg. by the user space tools
-     mentioned in 1.2.) through the device nodes:
-
-     - /dev/ttyGS0 for M101 (RS232 data boxes)
-     - /dev/ttyGU0 for M105 (USB data boxes)
-     - /dev/ttyGB0 for the base driver (direct USB connection)
-
-     If you connect more than one device of a type, they will get consecutive
-     device nodes, eg. /dev/ttyGU1 for a second M105.
-
-     You can also set a "default device" for the user space tools to use when
-     no device node is given as parameter, by creating a symlink /dev/ttyG to
-     one of them, eg.::
-
-	ln -s /dev/ttyGB0 /dev/ttyG
-
-     The devices accept the following device specific ioctl calls
-     (defined in gigaset_dev.h):
-
-     ``ioctl(int fd, GIGASET_REDIR, int *cmd);``
-
-     If cmd==1, the device is set to be controlled exclusively through the
-     character device node; access from the ISDN subsystem is blocked.
-
-     If cmd==0, the device is set to be used from the ISDN subsystem and does
-     not communicate through the character device node.
-
-     ``ioctl(int fd, GIGASET_CONFIG, int *cmd);``
-
-     (ser_gigaset and usb_gigaset only)
-
-     If cmd==1, the device is set to adapter configuration mode where commands
-     are interpreted by the M10x DECT adapter itself instead of being
-     forwarded to the base station. In this mode, the device accepts the
-     commands described in Siemens document "AT-Kommando Alignment M10x Data"
-     for setting the operation mode, associating with a base station and
-     querying parameters like field strengh and signal quality.
-
-     Note that there is no ioctl command for leaving adapter configuration
-     mode and returning to regular operation. In order to leave adapter
-     configuration mode, write the command ATO to the device.
-
-     ``ioctl(int fd, GIGASET_BRKCHARS, unsigned char brkchars[6]);``
-
-     (usb_gigaset only)
-
-     Set the break characters on an M105's internal serial adapter to the six
-     bytes stored in brkchars[]. Unused bytes should be set to zero.
-
-     ioctl(int fd, GIGASET_VERSION, unsigned version[4]);
-     Retrieve version information from the driver. version[0] must be set to
-     one of:
-
-     - GIGVER_DRIVER: retrieve driver version
-     - GIGVER_COMPAT: retrieve interface compatibility version
-     - GIGVER_FWBASE: retrieve the firmware version of the base
-
-     Upon return, version[] is filled with the requested version information.
-
-2.3. CAPI
----------
-
-     The devices will show up as CAPI controllers as soon as the
-     corresponding driver module is loaded, and can then be used with
-     CAPI 2.0 kernel and user space applications. For user space access,
-     the module capi.ko must be loaded.
-
-     Most distributions handle loading and unloading of the various CAPI
-     modules automatically via the command capiinit(1) from the capi4k-utils
-     package or a similar mechanism. Note that capiinit(1) cannot unload the
-     Gigaset drivers because it doesn't support more than one module per
-     driver.
-
-2.5. Unimodem mode
-------------------
-
-     In this mode the device works like a modem connected to a serial port
-     (the /dev/ttyGU0, ... mentioned above) which understands the commands::
-
-	 ATZ                 init, reset
-	     => OK or ERROR
-	 ATD
-	 ATDT                dial
-	     => OK, CONNECT,
-		BUSY,
-		NO DIAL TONE,
-		NO CARRIER,
-		NO ANSWER
-	 <pause>+++<pause>   change to command mode when connected
-	 ATH                 hangup
-
-     You can use some configuration tool of your distribution to configure this
-     "modem" or configure pppd/wvdial manually. There are some example ppp
-     configuration files and chat scripts in the gigaset-VERSION/ppp directory
-     in the driver packages from https://sourceforge.net/projects/gigaset307x/.
-     Please note that the USB drivers are not able to change the state of the
-     control lines. This means you must use "Stupid Mode" if you are using
-     wvdial or you should use the nocrtscts option of pppd.
-     You must also assure that the ppp_async module is loaded with the parameter
-     flag_time=0. You can do this e.g. by adding a line like::
-
-	options ppp_async flag_time=0
-
-     to an appropriate module configuration file, like::
-
-	/etc/modprobe.d/gigaset.conf.
-
-     Unimodem mode is needed for making some devices [e.g. SX100] work which
-     do not support the regular Gigaset command set. If debug output (see
-     section 3.2.) shows something like this when dialing::
-
-	 CMD Received: ERROR
-	 Available Params: 0
-	 Connection State: 0, Response: -1
-	 gigaset_process_response: resp_code -1 in ConState 0 !
-	 Timeout occurred
-
-     then switching to unimodem mode may help.
-
-     If you have installed the command line tool gigacontr, you can enter
-     unimodem mode using::
-
-	 gigacontr --mode unimodem
-
-     You can switch back using::
-
-	 gigacontr --mode isdn
-
-     You can also put the driver directly into Unimodem mode when it's loaded,
-     by passing the module parameter startmode=0 to the hardware specific
-     module, e.g.::
-
-	modprobe usb_gigaset startmode=0
-
-     or by adding a line like::
-
-	options usb_gigaset startmode=0
-
-     to an appropriate module configuration file, like::
-
-	/etc/modprobe.d/gigaset.conf
-
-2.6. Call-ID (CID) mode
------------------------
-
-     Call-IDs are numbers used to tag commands to, and responses from, the
-     Gigaset base in order to support the simultaneous handling of multiple
-     ISDN calls. Their use can be enabled ("CID mode") or disabled ("Unimodem
-     mode"). Without Call-IDs (in Unimodem mode), only a very limited set of
-     functions is available. It allows outgoing data connections only, but
-     does not signal incoming calls or other base events.
-
-     DECT cordless data devices (M10x) permanently occupy the cordless
-     connection to the base while Call-IDs are activated. As the Gigaset
-     bases only support one DECT data connection at a time, this prevents
-     other DECT cordless data devices from accessing the base.
-
-     During active operation, the driver switches to the necessary mode
-     automatically. However, for the reasons above, the mode chosen when
-     the device is not in use (idle) can be selected by the user.
-
-     - If you want to receive incoming calls, you can use the default
-       settings (CID mode).
-     - If you have several DECT data devices (M10x) which you want to use
-       in turn, select Unimodem mode by passing the parameter "cidmode=0" to
-       the appropriate driver module (ser_gigaset or usb_gigaset).
-
-     If you want both of these at once, you are out of luck.
-
-     You can also use the tty class parameter "cidmode" of the device to
-     change its CID mode while the driver is loaded, eg.::
-
-	echo 0 > /sys/class/tty/ttyGU0/cidmode
-
-2.7. Dialing Numbers
---------------------
-provided by an application for dialing out must
-     be a public network number according to the local dialing plan, without
-     any dial prefix for getting an outside line.
-
-     Internal calls can be made by providing an internal extension number
-     prefixed with ``**`` (two asterisks) as the called party number. So to dial
-     eg. the first registered DECT handset, give ``**11`` as the called party
-     number. Dialing ``***`` (three asterisks) calls all extensions
-     simultaneously (global call).
-
-     Unimodem mode does not support internal calls.
-
-2.8. Unregistered Wireless Devices (M101/M105)
-----------------------------------------------
-
-     The main purpose of the ser_gigaset and usb_gigaset drivers is to allow
-     the M101 and M105 wireless devices to be used as ISDN devices for ISDN
-     connections through a Gigaset base. Therefore they assume that the device
-     is registered to a DECT base.
-
-     If the M101/M105 device is not registered to a base, initialization of
-     the device fails, and a corresponding error message is logged by the
-     driver. In that situation, a restricted set of functions is available
-     which includes, in particular, those necessary for registering the device
-     to a base or for switching it between Fixed Part and Portable Part
-     modes. See the gigacontr(8) manpage for details.
-
-3.   Troubleshooting
-====================
-
-3.1. Solutions to frequently reported problems
-----------------------------------------------
-
-     Problem:
-	You have a slow provider and isdn4linux gives up dialing too early.
-     Solution:
-	Load the isdn module using the dialtimeout option. You can do this e.g.
-	by adding a line like::
-
-	   options isdn dialtimeout=15
-
-	to /etc/modprobe.d/gigaset.conf or a similar file.
-
-     Problem:
-	The isdnlog program emits error messages or just doesn't work.
-     Solution:
-	Isdnlog supports only the HiSax driver. Do not attempt to use it with
-	other drivers such as Gigaset.
-
-     Problem:
-	You have two or more DECT data adapters (M101/M105) and only the
-	first one you turn on works.
-     Solution:
-	Select Unimodem mode for all DECT data adapters. (see section 2.5.)
-
-     Problem:
-	Messages like this::
-
-	    usb_gigaset 3-2:1.0: Could not initialize the device.
-
-	appear in your syslog.
-     Solution:
-	Check whether your M10x wireless device is correctly registered to the
-	Gigaset base. (see section 2.7.)
-
-3.2. Telling the driver to provide more information
----------------------------------------------------
-     Building the driver with the "Gigaset debugging" kernel configuration
-     option (CONFIG_GIGASET_DEBUG) gives it the ability to produce additional
-     information useful for debugging.
-
-     You can control the amount of debugging information the driver produces by
-     writing an appropriate value to /sys/module/gigaset/parameters/debug,
-     e.g.::
-
-	echo 0 > /sys/module/gigaset/parameters/debug
-
-     switches off debugging output completely,
-
-     ::
-
-	echo 0x302020 > /sys/module/gigaset/parameters/debug
-
-     enables a reasonable set of debugging output messages. These values are
-     bit patterns where every bit controls a certain type of debugging output.
-     See the constants DEBUG_* in the source file gigaset.h for details.
-
-     The initial value can be set using the debug parameter when loading the
-     module "gigaset", e.g. by adding a line::
-
-	options gigaset debug=0
-
-     to your module configuration file, eg. /etc/modprobe.d/gigaset.conf
-
-     Generated debugging information can be found
-     - as output of the command::
-
-	 dmesg
-
-     - in system log files written by your syslog daemon, usually
-       in /var/log/, e.g. /var/log/messages.
-
-3.3. Reporting problems and bugs
---------------------------------
-     If you can't solve problems with the driver on your own, feel free to
-     use one of the forums, bug trackers, or mailing lists on
-
-	 https://sourceforge.net/projects/gigaset307x
-
-     or write an electronic mail to the maintainers.
-
-     Try to provide as much information as possible, such as
-
-     - distribution
-     - kernel version (uname -r)
-     - gcc version (gcc --version)
-     - hardware architecture (uname -m, ...)
-     - type and firmware version of your device (base and wireless module,
-       if any)
-     - output of "lsusb -v" (if using an USB device)
-     - error messages
-     - relevant system log messages (it would help if you activate debug
-       output as described in 3.2.)
-
-     For help with general configuration problems not specific to our driver,
-     such as isdn4linux and network configuration issues, please refer to the
-     appropriate forums and newsgroups.
-
-3.4. Reporting problem solutions
---------------------------------
-     If you solved a problem with our drivers, wrote startup scripts for your
-     distribution, ... feel free to contact us (using one of the places
-     mentioned in 3.3.). We'd like to add scripts, hints, documentation
-     to the driver and/or the project web page.
-
-
-4.   Links, other software
-==========================
-
-     - Sourceforge project developing this driver and associated tools
-	 https://sourceforge.net/projects/gigaset307x
-     - Yahoo! Group on the Siemens Gigaset family of devices
-	 https://de.groups.yahoo.com/group/Siemens-Gigaset
-     - Siemens Gigaset/T-Sinus compatibility table
-	 http://www.erbze.info/sinus_gigaset.htm
-	    (archived at https://web.archive.org/web/20100717020421/http://www.erbze.info:80/sinus_gigaset.htm )
-
-
-5.   Credits
-============
-
-     Thanks to
-
-     Karsten Keil
-	for his help with isdn4linux
-     Deti Fliegl
-	for his base driver code
-     Dennis Dietrich
-	for his kernel 2.6 patches
-     Andreas Rummel
-	for his work and logs to get unimodem mode working
-     Andreas Degert
-	for his logs and patches to get cx 100 working
-     Dietrich Feist
-	for his generous donation of one M105 and two M101 cordless adapters
-     Christoph Schweers
-	for his generous donation of a M34 device
-
-     and all the other people who sent logs and other information.
diff --git a/Documentation/isdn/hysdn.rst b/Documentation/isdn/hysdn.rst
deleted file mode 100644
index 0a168d1cbffc..000000000000
--- a/Documentation/isdn/hysdn.rst
+++ /dev/null
@@ -1,196 +0,0 @@
-============
-Hysdn Driver
-============
-
-The hysdn driver has been written by
-Werner Cornelius (werner@isdn4linux.de or werner@titro.de)
-for Hypercope GmbH Aachen Germany. Hypercope agreed to publish this driver
-under the GNU General Public License.
-
-The CAPI 2.0-support was added by Ulrich Albrecht (ualbrecht@hypercope.de)
-for Hypercope GmbH Aachen, Germany.
-
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-.. Table of contents
-
-    1. About the driver
-
-    2. Loading/Unloading the driver
-
-    3. Entries in the /proc filesystem
-
-    4. The /proc/net/hysdn/cardconfX file
-
-    5. The /proc/net/hysdn/cardlogX file
-
-    6. Where to get additional info and help
-
-
-1. About the driver
-===================
-
-   The drivers/isdn/hysdn subdir contains a driver for HYPERCOPEs active
-   PCI isdn cards Champ, Ergo and Metro. To enable support for this cards
-   enable ISDN support in the kernel config and support for HYSDN cards in
-   the active cards submenu. The driver may only be compiled and used if
-   support for loadable modules and the process filesystem have been enabled.
-
-   These cards provide two different interfaces to the kernel. Without the
-   optional CAPI 2.0 support, they register as ethernet card. IP-routing
-   to a ISDN-destination is performed on the card itself. All necessary
-   handlers for various protocols like ppp and others as well as config info
-   and firmware may be fetched from Hypercopes WWW-Site www.hypercope.de.
-
-   With CAPI 2.0 support enabled, the card can also be used as a CAPI 2.0
-   compliant devices with either CAPI 2.0 applications
-   (check isdn4k-utils) or -using the capidrv module- as a regular
-   isdn4linux device. This is done via the same mechanism as with the
-   active AVM cards and in fact uses the same module.
-
-
-2. Loading/Unloading the driver
-===============================
-
-   The module has no command line parameters and auto detects up to 10 cards
-   in the id-range 0-9.
-   If a loaded driver shall be unloaded all open files in the /proc/net/hysdn
-   subdir need to be closed and all ethernet interfaces allocated by this
-   driver must be shut down. Otherwise the module counter will avoid a module
-   unload.
-
-   If you are using the CAPI 2.0-interface, make sure to load/modprobe the
-   kernelcapi-module first.
-
-   If you plan to use the capidrv-link to isdn4linux, make sure to load
-   capidrv.o after all modules using this driver (i.e. after hysdn and
-   any avm-specific modules).
-
-3. Entries in the /proc filesystem
-==================================
-
-   When the module has been loaded it adds the directory hysdn in the
-   /proc/net tree. This directory contains exactly 2 file entries for each
-   card. One is called cardconfX and the other cardlogX, where X is the
-   card id number from 0 to 9.
-   The cards are numbered in the order found in the PCI config data.
-
-4. The /proc/net/hysdn/cardconfX file
-=====================================
-
-   This file may be read to get by everyone to get info about the cards type,
-   actual state, available features and used resources.
-   The first 3 entries (id, bus and slot) are PCI info fields, the following
-   type field gives the information about the cards type:
-
-   - 4 -> Ergo card (server card with 2 b-chans)
-   - 5 -> Metro card (server card with 4 or 8 b-chans)
-   - 6 -> Champ card (client card with 2 b-chans)
-
-   The following 3 fields show the hardware assignments for irq, iobase and the
-   dual ported memory (dp-mem).
-
-   The fields b-chans and fax-chans announce the available card resources of
-   this types for the user.
-
-   The state variable indicates the actual drivers state for this card with the
-   following assignments.
-
-   - 0 -> card has not been booted since driver load
-   - 1 -> card booting is actually in progess
-   - 2 -> card is in an error state due to a previous boot failure
-   - 3 -> card is booted and active
-
-   And the last field (device) shows the name of the ethernet device assigned
-   to this card. Up to the first successful boot this field only shows a -
-   to tell that no net device has been allocated up to now. Once a net device
-   has been allocated it remains assigned to this card, even if a card is
-   rebooted and an boot error occurs.
-
-   Writing to the cardconfX file boots the card or transfers config lines to
-   the cards firmware. The type of data is automatically detected when the
-   first data is written. Only root has write access to this file.
-   The firmware boot files are normally called hyclient.pof for client cards
-   and hyserver.pof for server cards.
-   After successfully writing the boot file, complete config files or single
-   config lines may be copied to this file.
-   If an error occurs the return value given to the writing process has the
-   following additional codes (decimal):
-
-   ==== ============================================
-   1000 Another process is currently bootng the card
-   1001 Invalid firmware header
-   1002 Boards dual-port RAM test failed
-   1003 Internal firmware handler error
-   1004 Boot image size invalid
-   1005 First boot stage (bootstrap loader) failed
-   1006 Second boot stage failure
-   1007 Timeout waiting for card ready during boot
-   1008 Operation only allowed in booted state
-   1009 Config line too long
-   1010 Invalid channel number
-   1011 Timeout sending config data
-   ==== ============================================
-
-   Additional info about error reasons may be fetched from the log output.
-
-5. The /proc/net/hysdn/cardlogX file
-====================================
-
-   The cardlogX file entry may be opened multiple for reading by everyone to
-   get the cards and drivers log data. Card messages always start with the
-   keyword LOG. All other lines are output from the driver.
-   The driver log data may be redirected to the syslog by selecting the
-   appropriate bitmask. The cards log messages will always be send to this
-   interface but never to the syslog.
-
-   A root user may write a decimal or hex (with 0x) value t this file to select
-   desired output options. As mentioned above the cards log dat is always
-   written to the cardlog file independent of the following options only used
-   to check and debug the driver itself:
-
-   For example::
-
-	echo "0x34560078" > /proc/net/hysdn/cardlog0
-
-   to output the hex log mask 34560078 for card 0.
-
-   The written value is regarded as an unsigned 32-Bit value, bit ored for
-   desired output. The following bits are already assigned:
-
-   ==========   ============================================================
-   0x80000000   All driver log data is alternatively via syslog
-   0x00000001   Log memory allocation errors
-   0x00000010   Firmware load start and close are logged
-   0x00000020   Log firmware record parser
-   0x00000040   Log every firmware write actions
-   0x00000080   Log all card related boot messages
-   0x00000100   Output all config data sent for debugging purposes
-   0x00000200   Only non comment config lines are shown wth channel
-   0x00000400   Additional conf log output
-   0x00001000   Log the asynchronous scheduler actions (config and log)
-   0x00100000   Log all open and close actions to /proc/net/hysdn/card files
-   0x00200000   Log all actions from /proc file entries
-   0x00010000   Log network interface init and deinit
-   ==========   ============================================================
-
-6. Where to get additional info and help
-========================================
-
-   If you have any problems concerning the driver or configuration contact
-   the Hypercope support team (support@hypercope.de) and or the authors
-   Werner Cornelius (werner@isdn4linux or cornelius@titro.de) or
-   Ulrich Albrecht (ualbrecht@hypercope.de).
diff --git a/Documentation/isdn/index.rst b/Documentation/isdn/index.rst
index 407e74b78372..9622939fa526 100644
--- a/Documentation/isdn/index.rst
+++ b/Documentation/isdn/index.rst
@@ -9,9 +9,6 @@ ISDN
 
    interface_capi
 
-   avmb1
-   gigaset
-   hysdn
    m_isdn
 
    credits
diff --git a/Documentation/isdn/interface_capi.rst b/Documentation/isdn/interface_capi.rst
index 01a4b5ade9a4..fe2421444b76 100644
--- a/Documentation/isdn/interface_capi.rst
+++ b/Documentation/isdn/interface_capi.rst
@@ -26,13 +26,6 @@ This standard is freely available from https://www.capi.org.
 2. Driver and Device Registration
 =================================
 
-CAPI drivers optionally register themselves with Kernel CAPI by calling the
-Kernel CAPI function register_capi_driver() with a pointer to a struct
-capi_driver. This structure must be filled with the name and revision of the
-driver, and optionally a pointer to a callback function, add_card(). The
-registration can be revoked by calling the function unregister_capi_driver()
-with a pointer to the same struct capi_driver.
-
 CAPI drivers must register each of the ISDN devices they control with Kernel
 CAPI by calling the Kernel CAPI function attach_capi_ctr() with a pointer to a
 struct capi_ctr before they can be used. This structure must be filled with
@@ -89,9 +82,6 @@ register_capi_driver():
 	the name of the driver, as a zero-terminated ASCII string
 ``char revision[32]``
 	the revision number of the driver, as a zero-terminated ASCII string
-``int (*add_card)(struct capi_driver *driver, capicardparams *data)``
-	a callback function pointer (may be NULL)
-
 
 4.2 struct capi_ctr
 -------------------
@@ -178,12 +168,6 @@ to be set by the driver before calling attach_capi_ctr():
 	pointer to a callback function returning the entry for the device in
 	the CAPI controller info table, /proc/capi/controller
 
-``const struct file_operations *proc_fops``
-	pointers to callback functions for the device's proc file
-	system entry, /proc/capi/controllers/<n>; pointer to the device's
-	capi_ctr structure is available from struct proc_dir_entry::data
-	which is available from struct inode.
-
 Note:
   Callback functions except send_message() are never called in interrupt
   context.
@@ -267,25 +251,10 @@ _cmstruct   alternative representation for CAPI parameters of type 'struct'
 	    _cmsg structure members.
 =========== =================================================================
 
-Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert
-messages between their transport encoding described in the CAPI 2.0 standard
-and their _cmsg structure representation. Note that capi_cmsg2message() does
-not know or check the size of its destination buffer. The caller must make
-sure it is big enough to accommodate the resulting CAPI message.
-
 
 5. Lower Layer Interface Functions
 ==================================
 
-(declared in <linux/isdn/capilli.h>)
-
-::
-
-  void register_capi_driver(struct capi_driver *drvr)
-  void unregister_capi_driver(struct capi_driver *drvr)
-
-register/unregister a driver with Kernel CAPI
-
 ::
 
   int attach_capi_ctr(struct capi_ctr *ctrlr)
@@ -302,13 +271,6 @@ signal controller ready/not ready
 
 ::
 
-  void capi_ctr_suspend_output(struct capi_ctr *ctrlr)
-  void capi_ctr_resume_output(struct capi_ctr *ctrlr)
-
-signal suspend/resume
-
-::
-
   void capi_ctr_handle_message(struct capi_ctr * ctrlr, u16 applid,
 			       struct sk_buff *skb)
 
@@ -319,21 +281,6 @@ for forwarding to the specified application
 6. Helper Functions and Macros
 ==============================
 
-Library functions (from <linux/isdn/capilli.h>):
-
-::
-
-  void capilib_new_ncci(struct list_head *head, u16 applid,
-			u32 ncci, u32 winsize)
-  void capilib_free_ncci(struct list_head *head, u16 applid, u32 ncci)
-  void capilib_release_appl(struct list_head *head, u16 applid)
-  void capilib_release(struct list_head *head)
-  void capilib_data_b3_conf(struct list_head *head, u16 applid,
-			u32 ncci, u16 msgid)
-  u16  capilib_data_b3_req(struct list_head *head, u16 applid,
-			u32 ncci, u16 msgid)
-
-
 Macros to extract/set element values from/in a CAPI message header
 (from <linux/isdn/capiutil.h>):
 
@@ -357,24 +304,6 @@ CAPIMSG_DATALEN(m)	CAPIMSG_SETDATALEN(m, len)	Data Length (u16)
 Library functions for working with _cmsg structures
 (from <linux/isdn/capiutil.h>):
 
-``unsigned capi_cmsg2message(_cmsg *cmsg, u8 *msg)``
-	Assembles a CAPI 2.0 message from the parameters in ``*cmsg``,
-	storing the result in ``*msg``.
-
-``unsigned capi_message2cmsg(_cmsg *cmsg, u8 *msg)``
-	Disassembles the CAPI 2.0 message in ``*msg``, storing the parameters
-	in ``*cmsg``.
-
-``unsigned capi_cmsg_header(_cmsg *cmsg, u16 ApplId, u8 Command, u8 Subcommand, u16 Messagenumber, u32 Controller)``
-	Fills the header part and address field of the _cmsg structure ``*cmsg``
-	with the given values, zeroing the remainder of the structure so only
-	parameters with non-default values need to be changed before sending
-	the message.
-
-``void capi_cmsg_answer(_cmsg *cmsg)``
-	Sets the low bit of the Subcommand field in ``*cmsg``, thereby
-	converting ``_REQ`` to ``_CONF`` and ``_IND`` to ``_RESP``.
-
 ``char *capi_cmd2str(u8 Command, u8 Subcommand)``
 	Returns the CAPI 2.0 message name corresponding to the given command
 	and subcommand values, as a static ASCII string. The return value may
diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst
index 74bef19f69f0..231e6a64957f 100644
--- a/Documentation/kbuild/kconfig-language.rst
+++ b/Documentation/kbuild/kconfig-language.rst
@@ -196,14 +196,11 @@ applicable everywhere (see syntax).
   or equal to the first symbol and smaller than or equal to the second
   symbol.
 
-- help text: "help" or "---help---"
+- help text: "help"
 
   This defines a help text. The end of the help text is determined by
   the indentation level, this means it ends at the first line which has
   a smaller indentation than the first line of the help text.
-  "---help---" and "help" do not differ in behaviour, "---help---" is
-  used to help visually separate configuration logic from help within
-  the file as an aid to developers.
 
 - misc options: "option" <symbol>[=<value>]
 
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index b89c88168d6a..d7e6534a8505 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -297,9 +297,19 @@ more details, with real examples.
 	If CONFIG_EXT2_FS is set to either 'y' (built-in) or 'm' (modular)
 	the corresponding obj- variable will be set, and kbuild will descend
 	down in the ext2 directory.
-	Kbuild only uses this information to decide that it needs to visit
-	the directory, it is the Makefile in the subdirectory that
-	specifies what is modular and what is built-in.
+
+	Kbuild uses this information not only to decide that it needs to visit
+	the directory, but also to decide whether or not to link objects from
+	the directory into vmlinux.
+
+	When Kbuild descends into the directory with 'y', all built-in objects
+	from that directory are combined into the built-in.a, which will be
+	eventually linked into vmlinux.
+
+	When Kbuild descends into the directory with 'm', in contrast, nothing
+	from that directory will be linked into vmlinux. If the Makefile in
+	that directory specifies obj-y, those objects will be left orphan.
+	It is very likely a bug of the Makefile or of dependencies in Kconfig.
 
 	It is good practice to use a `CONFIG_` variable when assigning directory
 	names. This allows kbuild to totally skip the directory if the
@@ -1115,23 +1125,6 @@ When kbuild executes, the following steps are followed (roughly):
 	In this example, extra-y is used to list object files that
 	shall be built, but shall not be linked as part of built-in.a.
 
-    header-test-y
-
-	header-test-y specifies headers (`*.h`) in the current directory that
-	should be compile tested to ensure they are self-contained,
-	i.e. compilable as standalone units. If CONFIG_HEADER_TEST is enabled,
-	this builds them as part of extra-y.
-
-    header-test-pattern-y
-
-	This works as a weaker version of header-test-y, and accepts wildcard
-	patterns. The typical usage is::
-
-		header-test-pattern-y += *.h
-
-	This specifies all the files that matches to `*.h` in the current
-	directory, but the files in 'header-test-' are excluded.
-
 6.7 Commands useful for building a boot image
 ---------------------------------------------
 
diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst
index 774a998dcf37..69fa48ee93d6 100644
--- a/Documentation/kbuild/modules.rst
+++ b/Documentation/kbuild/modules.rst
@@ -492,11 +492,8 @@ build.
 	to the symbols from the kernel to check if all external symbols
 	are defined. This is done in the MODPOST step. modpost obtains
 	the symbols by reading Module.symvers from the kernel source
-	tree. If a Module.symvers file is present in the directory
-	where the external module is being built, this file will be
-	read too. During the MODPOST step, a new Module.symvers file
-	will be written containing all exported symbols that were not
-	defined in the kernel.
+	tree. During the MODPOST step, a new Module.symvers file will be
+	written containing all exported symbols from that external module.
 
 6.3 Symbols From Another External Module
 ----------------------------------------
@@ -504,7 +501,7 @@ build.
 	Sometimes, an external module uses exported symbols from
 	another external module. Kbuild needs to have full knowledge of
 	all symbols to avoid spitting out warnings about undefined
-	symbols. Three solutions exist for this situation.
+	symbols. Two solutions exist for this situation.
 
 	NOTE: The method with a top-level kbuild file is recommended
 	but may be impractical in certain situations.
@@ -544,8 +541,8 @@ build.
 		all symbols defined and not part of the kernel.
 
 	Use "make" variable KBUILD_EXTRA_SYMBOLS
-		If it is impractical to copy Module.symvers from
-		another module, you can assign a space separated list
+		If it is impractical to add a top-level kbuild file,
+		you can assign a space separated list
 		of files to KBUILD_EXTRA_SYMBOLS in your build file.
 		These files will be loaded by modpost during the
 		initialization of its symbol tables.
diff --git a/Documentation/kernel-hacking/hacking.rst b/Documentation/kernel-hacking/hacking.rst
index a3ddb213a5e1..d62aacb2822a 100644
--- a/Documentation/kernel-hacking/hacking.rst
+++ b/Documentation/kernel-hacking/hacking.rst
@@ -601,7 +601,7 @@ Defined in ``include/linux/export.h``
 
 This is the variant of `EXPORT_SYMBOL()` that allows specifying a symbol
 namespace. Symbol Namespaces are documented in
-``Documentation/kbuild/namespaces.rst``.
+``Documentation/core-api/symbol-namespaces.rst``.
 
 :c:func:`EXPORT_SYMBOL_NS_GPL()`
 --------------------------------
@@ -610,7 +610,7 @@ Defined in ``include/linux/export.h``
 
 This is the variant of `EXPORT_SYMBOL_GPL()` that allows specifying a symbol
 namespace. Symbol Namespaces are documented in
-``Documentation/kbuild/namespaces.rst``.
+``Documentation/core-api/symbol-namespaces.rst``.
 
 Routines and Conventions
 ========================
diff --git a/Documentation/livepatch/index.rst b/Documentation/livepatch/index.rst
index 17674a9e21b2..525944063be7 100644
--- a/Documentation/livepatch/index.rst
+++ b/Documentation/livepatch/index.rst
@@ -12,6 +12,7 @@ Kernel Livepatching
     cumulative-patches
     module-elf-format
     shadow-vars
+    system-state
 
 .. only::  subproject and html
 
diff --git a/Documentation/livepatch/system-state.rst b/Documentation/livepatch/system-state.rst
new file mode 100644
index 000000000000..c6d127c2d9aa
--- /dev/null
+++ b/Documentation/livepatch/system-state.rst
@@ -0,0 +1,167 @@
+====================
+System State Changes
+====================
+
+Some users are really reluctant to reboot a system. This brings the need
+to provide more livepatches and maintain some compatibility between them.
+
+Maintaining more livepatches is much easier with cumulative livepatches.
+Each new livepatch completely replaces any older one. It can keep,
+add, and even remove fixes. And it is typically safe to replace any version
+of the livepatch with any other one thanks to the atomic replace feature.
+
+The problems might come with shadow variables and callbacks. They might
+change the system behavior or state so that it is no longer safe to
+go back and use an older livepatch or the original kernel code. Also
+any new livepatch must be able to detect what changes have already been
+done by the already installed livepatches.
+
+This is where the livepatch system state tracking gets useful. It
+allows to:
+
+  - store data needed to manipulate and restore the system state
+
+  - define compatibility between livepatches using a change id
+    and version
+
+
+1. Livepatch system state API
+=============================
+
+The state of the system might get modified either by several livepatch callbacks
+or by the newly used code. Also it must be possible to find changes done by
+already installed livepatches.
+
+Each modified state is described by struct klp_state, see
+include/linux/livepatch.h.
+
+Each livepatch defines an array of struct klp_states. They mention
+all states that the livepatch modifies.
+
+The livepatch author must define the following two fields for each
+struct klp_state:
+
+  - *id*
+
+    - Non-zero number used to identify the affected system state.
+
+  - *version*
+
+    - Number describing the variant of the system state change that
+      is supported by the given livepatch.
+
+The state can be manipulated using two functions:
+
+  - *klp_get_state(patch, id)*
+
+    - Get struct klp_state associated with the given livepatch
+      and state id.
+
+  - *klp_get_prev_state(id)*
+
+    - Get struct klp_state associated with the given feature id and
+      already installed livepatches.
+
+2. Livepatch compatibility
+==========================
+
+The system state version is used to prevent loading incompatible livepatches.
+The check is done when the livepatch is enabled. The rules are:
+
+  - Any completely new system state modification is allowed.
+
+  - System state modifications with the same or higher version are allowed
+    for already modified system states.
+
+  - Cumulative livepatches must handle all system state modifications from
+    already installed livepatches.
+
+  - Non-cumulative livepatches are allowed to touch already modified
+    system states.
+
+3. Supported scenarios
+======================
+
+Livepatches have their life-cycle and the same is true for the system
+state changes. Every compatible livepatch has to support the following
+scenarios:
+
+  - Modify the system state when the livepatch gets enabled and the state
+    has not been already modified by a livepatches that are being
+    replaced.
+
+  - Take over or update the system state modification when is has already
+    been done by a livepatch that is being replaced.
+
+  - Restore the original state when the livepatch is disabled.
+
+  - Restore the previous state when the transition is reverted.
+    It might be the original system state or the state modification
+    done by livepatches that were being replaced.
+
+  - Remove any already made changes when error occurs and the livepatch
+    cannot get enabled.
+
+4. Expected usage
+=================
+
+System states are usually modified by livepatch callbacks. The expected
+role of each callback is as follows:
+
+*pre_patch()*
+
+  - Allocate *state->data* when necessary. The allocation might fail
+    and *pre_patch()* is the only callback that could stop loading
+    of the livepatch. The allocation is not needed when the data
+    are already provided by previously installed livepatches.
+
+  - Do any other preparatory action that is needed by
+    the new code even before the transition gets finished.
+    For example, initialize *state->data*.
+
+    The system state itself is typically modified in *post_patch()*
+    when the entire system is able to handle it.
+
+  - Clean up its own mess in case of error. It might be done by a custom
+    code or by calling *post_unpatch()* explicitly.
+
+*post_patch()*
+
+  - Copy *state->data* from the previous livepatch when they are
+    compatible.
+
+  - Do the actual system state modification. Eventually allow
+    the new code to use it.
+
+  - Make sure that *state->data* has all necessary information.
+
+  - Free *state->data* from replaces livepatches when they are
+    not longer needed.
+
+*pre_unpatch()*
+
+  - Prevent the code, added by the livepatch, relying on the system
+    state change.
+
+  - Revert the system state modification..
+
+*post_unpatch()*
+
+  - Distinguish transition reverse and livepatch disabling by
+    checking *klp_get_prev_state()*.
+
+  - In case of transition reverse, restore the previous system
+    state. It might mean doing nothing.
+
+  - Remove any not longer needed setting or data.
+
+.. note::
+
+   *pre_unpatch()* typically does symmetric operations to *post_patch()*.
+   Except that it is called only when the livepatch is being disabled.
+   Therefore it does not need to care about any previously installed
+   livepatch.
+
+   *post_unpatch()* typically does symmetric operations to *pre_patch()*.
+   It might be called also during the transition reverse. Therefore it
+   has to handle the state of the previously installed livepatches.
diff --git a/Documentation/locking/locktorture.rst b/Documentation/locking/locktorture.rst
index e79eeeca3ac6..5bcb99ba7bd9 100644
--- a/Documentation/locking/locktorture.rst
+++ b/Documentation/locking/locktorture.rst
@@ -103,8 +103,7 @@ stat_interval
 		  Number of seconds between statistics-related printk()s.
 		  By default, locktorture will report stats every 60 seconds.
 		  Setting the interval to zero causes the statistics to
-		  be printed -only- when the module is unloaded, and this
-		  is the default.
+		  be printed -only- when the module is unloaded.
 
 stutter
 		  The length of time to run the test before pausing for this
diff --git a/Documentation/maintainer/configure-git.rst b/Documentation/maintainer/configure-git.rst
index 78bbbb0d2c84..80ae5030a590 100644
--- a/Documentation/maintainer/configure-git.rst
+++ b/Documentation/maintainer/configure-git.rst
@@ -32,3 +32,33 @@ You may also like to tell ``gpg`` which ``tty`` to use (add to your shell rc fil
 ::
 
 	export GPG_TTY=$(tty)
+
+
+Creating commit links to lore.kernel.org
+----------------------------------------
+
+The web site http://lore.kernel.org is meant as a grand archive of all mail
+list traffic concerning or influencing the kernel development. Storing archives
+of patches here is a recommended practice, and when a maintainer applies a
+patch to a subsystem tree, it is a good idea to provide a Link: tag with a
+reference back to the lore archive so that people that browse the commit
+history can find related discussions and rationale behind a certain change.
+The link tag will look like this:
+
+    Link: https://lore.kernel.org/r/<message-id>
+
+This can be configured to happen automatically any time you issue ``git am``
+by adding the following hook into your git:
+
+.. code-block:: none
+
+	$ git config am.messageid true
+	$ cat >.git/hooks/applypatch-msg <<'EOF'
+	#!/bin/sh
+	. git-sh-setup
+	perl -pi -e 's|^Message-Id:\s*<?([^>]+)>?$|Link: https://lore.kernel.org/r/$1|g;' "$1"
+	test -x "$GIT_DIR/hooks/commit-msg" &&
+		exec "$GIT_DIR/hooks/commit-msg" ${1+"$@"}
+	:
+	EOF
+	$ chmod a+x .git/hooks/applypatch-msg
diff --git a/Documentation/maintainer/index.rst b/Documentation/maintainer/index.rst
index 56e2c09dfa39..d904e74e1159 100644
--- a/Documentation/maintainer/index.rst
+++ b/Documentation/maintainer/index.rst
@@ -12,4 +12,5 @@ additions to this manual.
    configure-git
    rebasing-and-merging
    pull-requests
+   maintainer-entry-profile
 
diff --git a/Documentation/maintainer/maintainer-entry-profile.rst b/Documentation/maintainer/maintainer-entry-profile.rst
new file mode 100644
index 000000000000..11ebe3682771
--- /dev/null
+++ b/Documentation/maintainer/maintainer-entry-profile.rst
@@ -0,0 +1,103 @@
+.. _maintainerentryprofile:
+
+Maintainer Entry Profile
+========================
+
+The Maintainer Entry Profile supplements the top-level process documents
+(submitting-patches, submitting drivers...) with
+subsystem/device-driver-local customs as well as details about the patch
+submission life-cycle. A contributor uses this document to level set
+their expectations and avoid common mistakes, maintainers may use these
+profiles to look across subsystems for opportunities to converge on
+common practices.
+
+
+Overview
+--------
+Provide an introduction to how the subsystem operates. While MAINTAINERS
+tells the contributor where to send patches for which files, it does not
+convey other subsystem-local infrastructure and mechanisms that aid
+development.
+
+Example questions to consider:
+
+- Are there notifications when patches are applied to the local tree, or
+  merged upstream?
+- Does the subsystem have a patchwork instance? Are patchwork state
+  changes notified?
+- Any bots or CI infrastructure that watches the list, or automated
+  testing feedback that the subsystem gates acceptance?
+- Git branches that are pulled into -next?
+- What branch should contributors submit against?
+- Links to any other Maintainer Entry Profiles? For example a
+  device-driver may point to an entry for its parent subsystem. This makes
+  the contributor aware of obligations a maintainer may have have for
+  other maintainers in the submission chain.
+
+
+Submit Checklist Addendum
+-------------------------
+List mandatory and advisory criteria, beyond the common "submit-checklist",
+for a patch to be considered healthy enough for maintainer attention.
+For example: "pass checkpatch.pl with no errors, or warning. Pass the
+unit test detailed at $URI".
+
+The Submit Checklist Addendum can also include details about the status
+of related hardware specifications. For example, does the subsystem
+require published specifications at a certain revision before patches
+will be considered.
+
+
+Key Cycle Dates
+---------------
+One of the common misunderstandings of submitters is that patches can be
+sent at any time before the merge window closes and can still be
+considered for the next -rc1. The reality is that most patches need to
+be settled in soaking in linux-next in advance of the merge window
+opening. Clarify for the submitter the key dates (in terms rc release
+week) that patches might considered for merging and when patches need to
+wait for the next -rc. At a minimum:
+
+- Last -rc for new feature submissions:
+  New feature submissions targeting the next merge window should have
+  their first posting for consideration before this point. Patches that
+  are submitted after this point should be clear that they are targeting
+  the NEXT+1 merge window, or should come with sufficient justification
+  why they should be considered on an expedited schedule. A general
+  guideline is to set expectation with contributors that new feature
+  submissions should appear before -rc5.
+
+- Last -rc to merge features: Deadline for merge decisions
+  Indicate to contributors the point at which an as yet un-applied patch
+  set will need to wait for the NEXT+1 merge window. Of course there is no
+  obligation to ever except any given patchset, but if the review has not
+  concluded by this point the expectation the contributor should wait and
+  resubmit for the following merge window.
+
+Optional:
+
+- First -rc at which the development baseline branch, listed in the
+  overview section, should be considered ready for new submissions.
+
+
+Review Cadence
+--------------
+One of the largest sources of contributor angst is how soon to ping
+after a patchset has been posted without receiving any feedback. In
+addition to specifying how long to wait before a resubmission this
+section can also indicate a preferred style of update like, resend the
+full series, or privately send a reminder email. This section might also
+list how review works for this code area and methods to get feedback
+that are not directly from the maintainer.
+
+Existing profiles
+-----------------
+
+For now, existing maintainer profiles are listed here; we will likely want
+to do something different in the near future.
+
+.. toctree::
+   :maxdepth: 1
+
+   ../doc-guide/maintainer-profile
+   ../nvdimm/maintainer-entry-profile
diff --git a/Documentation/media/cec.h.rst.exceptions b/Documentation/media/cec.h.rst.exceptions
index 014816d04b9e..d83790ccac8e 100644
--- a/Documentation/media/cec.h.rst.exceptions
+++ b/Documentation/media/cec.h.rst.exceptions
@@ -335,6 +335,95 @@ ignore define CEC_OP_MENU_STATE_DEACTIVATED
 
 ignore define CEC_MSG_USER_CONTROL_PRESSED
 
+ignore define CEC_OP_UI_CMD_SELECT
+ignore define CEC_OP_UI_CMD_UP
+ignore define CEC_OP_UI_CMD_DOWN
+ignore define CEC_OP_UI_CMD_LEFT
+ignore define CEC_OP_UI_CMD_RIGHT
+ignore define CEC_OP_UI_CMD_RIGHT_UP
+ignore define CEC_OP_UI_CMD_RIGHT_DOWN
+ignore define CEC_OP_UI_CMD_LEFT_UP
+ignore define CEC_OP_UI_CMD_LEFT_DOWN
+ignore define CEC_OP_UI_CMD_DEVICE_ROOT_MENU
+ignore define CEC_OP_UI_CMD_DEVICE_SETUP_MENU
+ignore define CEC_OP_UI_CMD_CONTENTS_MENU
+ignore define CEC_OP_UI_CMD_FAVORITE_MENU
+ignore define CEC_OP_UI_CMD_BACK
+ignore define CEC_OP_UI_CMD_MEDIA_TOP_MENU
+ignore define CEC_OP_UI_CMD_MEDIA_CONTEXT_SENSITIVE_MENU
+ignore define CEC_OP_UI_CMD_NUMBER_ENTRY_MODE
+ignore define CEC_OP_UI_CMD_NUMBER_11
+ignore define CEC_OP_UI_CMD_NUMBER_12
+ignore define CEC_OP_UI_CMD_NUMBER_0_OR_NUMBER_10
+ignore define CEC_OP_UI_CMD_NUMBER_1
+ignore define CEC_OP_UI_CMD_NUMBER_2
+ignore define CEC_OP_UI_CMD_NUMBER_3
+ignore define CEC_OP_UI_CMD_NUMBER_4
+ignore define CEC_OP_UI_CMD_NUMBER_5
+ignore define CEC_OP_UI_CMD_NUMBER_6
+ignore define CEC_OP_UI_CMD_NUMBER_7
+ignore define CEC_OP_UI_CMD_NUMBER_8
+ignore define CEC_OP_UI_CMD_NUMBER_9
+ignore define CEC_OP_UI_CMD_DOT
+ignore define CEC_OP_UI_CMD_ENTER
+ignore define CEC_OP_UI_CMD_CLEAR
+ignore define CEC_OP_UI_CMD_NEXT_FAVORITE
+ignore define CEC_OP_UI_CMD_CHANNEL_UP
+ignore define CEC_OP_UI_CMD_CHANNEL_DOWN
+ignore define CEC_OP_UI_CMD_PREVIOUS_CHANNEL
+ignore define CEC_OP_UI_CMD_SOUND_SELECT
+ignore define CEC_OP_UI_CMD_INPUT_SELECT
+ignore define CEC_OP_UI_CMD_DISPLAY_INFORMATION
+ignore define CEC_OP_UI_CMD_HELP
+ignore define CEC_OP_UI_CMD_PAGE_UP
+ignore define CEC_OP_UI_CMD_PAGE_DOWN
+ignore define CEC_OP_UI_CMD_POWER
+ignore define CEC_OP_UI_CMD_VOLUME_UP
+ignore define CEC_OP_UI_CMD_VOLUME_DOWN
+ignore define CEC_OP_UI_CMD_MUTE
+ignore define CEC_OP_UI_CMD_PLAY
+ignore define CEC_OP_UI_CMD_STOP
+ignore define CEC_OP_UI_CMD_PAUSE
+ignore define CEC_OP_UI_CMD_RECORD
+ignore define CEC_OP_UI_CMD_REWIND
+ignore define CEC_OP_UI_CMD_FAST_FORWARD
+ignore define CEC_OP_UI_CMD_EJECT
+ignore define CEC_OP_UI_CMD_SKIP_FORWARD
+ignore define CEC_OP_UI_CMD_SKIP_BACKWARD
+ignore define CEC_OP_UI_CMD_STOP_RECORD
+ignore define CEC_OP_UI_CMD_PAUSE_RECORD
+ignore define CEC_OP_UI_CMD_ANGLE
+ignore define CEC_OP_UI_CMD_SUB_PICTURE
+ignore define CEC_OP_UI_CMD_VIDEO_ON_DEMAND
+ignore define CEC_OP_UI_CMD_ELECTRONIC_PROGRAM_GUIDE
+ignore define CEC_OP_UI_CMD_TIMER_PROGRAMMING
+ignore define CEC_OP_UI_CMD_INITIAL_CONFIGURATION
+ignore define CEC_OP_UI_CMD_SELECT_BROADCAST_TYPE
+ignore define CEC_OP_UI_CMD_SELECT_SOUND_PRESENTATION
+ignore define CEC_OP_UI_CMD_AUDIO_DESCRIPTION
+ignore define CEC_OP_UI_CMD_INTERNET
+ignore define CEC_OP_UI_CMD_3D_MODE
+ignore define CEC_OP_UI_CMD_PLAY_FUNCTION
+ignore define CEC_OP_UI_CMD_PAUSE_PLAY_FUNCTION
+ignore define CEC_OP_UI_CMD_RECORD_FUNCTION
+ignore define CEC_OP_UI_CMD_PAUSE_RECORD_FUNCTION
+ignore define CEC_OP_UI_CMD_STOP_FUNCTION
+ignore define CEC_OP_UI_CMD_MUTE_FUNCTION
+ignore define CEC_OP_UI_CMD_RESTORE_VOLUME_FUNCTION
+ignore define CEC_OP_UI_CMD_TUNE_FUNCTION
+ignore define CEC_OP_UI_CMD_SELECT_MEDIA_FUNCTION
+ignore define CEC_OP_UI_CMD_SELECT_AV_INPUT_FUNCTION
+ignore define CEC_OP_UI_CMD_SELECT_AUDIO_INPUT_FUNCTION
+ignore define CEC_OP_UI_CMD_POWER_TOGGLE_FUNCTION
+ignore define CEC_OP_UI_CMD_POWER_OFF_FUNCTION
+ignore define CEC_OP_UI_CMD_POWER_ON_FUNCTION
+ignore define CEC_OP_UI_CMD_F1_BLUE
+ignore define CEC_OP_UI_CMD_F2_RED
+ignore define CEC_OP_UI_CMD_F3_GREEN
+ignore define CEC_OP_UI_CMD_F4_YELLOW
+ignore define CEC_OP_UI_CMD_F5
+ignore define CEC_OP_UI_CMD_DATA
+
 ignore define CEC_OP_UI_BCAST_TYPE_TOGGLE_ALL
 ignore define CEC_OP_UI_BCAST_TYPE_TOGGLE_DIG_ANA
 ignore define CEC_OP_UI_BCAST_TYPE_ANALOGUE
diff --git a/Documentation/media/kapi/v4l2-controls.rst b/Documentation/media/kapi/v4l2-controls.rst
index ebe2a55908be..b20800cae3f2 100644
--- a/Documentation/media/kapi/v4l2-controls.rst
+++ b/Documentation/media/kapi/v4l2-controls.rst
@@ -140,6 +140,15 @@ Menu controls with a driver specific menu are added by calling
                        const struct v4l2_ctrl_ops *ops, u32 id, s32 max,
                        s32 skip_mask, s32 def, const char * const *qmenu);
 
+Standard compound controls can be added by calling
+:c:func:`v4l2_ctrl_new_std_compound`:
+
+.. code-block:: c
+
+       struct v4l2_ctrl *v4l2_ctrl_new_std_compound(struct v4l2_ctrl_handler *hdl,
+                       const struct v4l2_ctrl_ops *ops, u32 id,
+                       const union v4l2_ctrl_ptr p_def);
+
 Integer menu controls with a driver specific menu can be added by calling
 :c:func:`v4l2_ctrl_new_int_menu`:
 
diff --git a/Documentation/media/uapi/cec/cec-funcs.rst b/Documentation/media/uapi/cec/cec-funcs.rst
index 620590b168c9..dc6da9c639a8 100644
--- a/Documentation/media/uapi/cec/cec-funcs.rst
+++ b/Documentation/media/uapi/cec/cec-funcs.rst
@@ -24,6 +24,7 @@ Function Reference
     cec-ioc-adap-g-caps
     cec-ioc-adap-g-log-addrs
     cec-ioc-adap-g-phys-addr
+    cec-ioc-adap-g-conn-info
     cec-ioc-dqevent
     cec-ioc-g-mode
     cec-ioc-receive
diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst
index 0c44f31a9b59..76761a98c312 100644
--- a/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst
@@ -135,8 +135,12 @@ returns the information to the application. The ioctl never fails.
       - The CEC hardware can monitor CEC pin changes from low to high voltage
         and vice versa. When in pin monitoring mode the application will
 	receive ``CEC_EVENT_PIN_CEC_LOW`` and ``CEC_EVENT_PIN_CEC_HIGH`` events.
+    * .. _`CEC-CAP-CONNECTOR-INFO`:
 
-
+      - ``CEC_CAP_CONNECTOR_INFO``
+      - 0x00000100
+      - If this capability is set, then :ref:`CEC_ADAP_G_CONNECTOR_INFO` can
+        be used.
 
 Return Value
 ============
diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-conn-info.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-conn-info.rst
new file mode 100644
index 000000000000..a21659d55c6b
--- /dev/null
+++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-conn-info.rst
@@ -0,0 +1,105 @@
+.. SPDX-License-Identifier: GPL-2.0
+..
+.. Copyright 2019 Google LLC
+..
+.. _CEC_ADAP_G_CONNECTOR_INFO:
+
+*******************************
+ioctl CEC_ADAP_G_CONNECTOR_INFO
+*******************************
+
+Name
+====
+
+CEC_ADAP_G_CONNECTOR_INFO - Query HDMI connector information
+
+Synopsis
+========
+
+.. c:function:: int ioctl( int fd, CEC_ADAP_G_CONNECTOR_INFO, struct cec_connector_info *argp )
+    :name: CEC_ADAP_G_CONNECTOR_INFO
+
+Arguments
+=========
+
+``fd``
+    File descriptor returned by :c:func:`open() <cec-open>`.
+
+``argp``
+
+
+Description
+===========
+
+Using this ioctl an application can learn which HDMI connector this CEC
+device corresponds to. While calling this ioctl the application should
+provide a pointer to a cec_connector_info struct which will be populated
+by the kernel with the info provided by the adapter's driver. This ioctl
+is only available if the ``CEC_CAP_CONNECTOR_INFO`` capability is set.
+
+.. tabularcolumns:: |p{1.0cm}|p{4.4cm}|p{2.5cm}|p{9.6cm}|
+
+.. c:type:: cec_connector_info
+
+.. flat-table:: struct cec_connector_info
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 1 8
+
+    * - __u32
+      - ``type``
+      - The type of connector this adapter is associated with.
+    * - union
+      - ``(anonymous)``
+      -
+    * -
+      - ``struct cec_drm_connector_info``
+      - drm
+      - :ref:`cec-drm-connector-info`
+
+
+.. tabularcolumns:: |p{4.4cm}|p{2.5cm}|p{10.6cm}|
+
+.. _connector-type:
+
+.. flat-table:: Connector types
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       3 1 8
+
+    * .. _`CEC-CONNECTOR-TYPE-NO-CONNECTOR`:
+
+      - ``CEC_CONNECTOR_TYPE_NO_CONNECTOR``
+      - 0
+      - No connector is associated with the adapter/the information is not
+        provided by the driver.
+    * .. _`CEC-CONNECTOR-TYPE-DRM`:
+
+      - ``CEC_CONNECTOR_TYPE_DRM``
+      - 1
+      - Indicates that a DRM connector is associated with this adapter.
+        Information about the connector can be found in
+	:ref:`cec-drm-connector-info`.
+
+.. tabularcolumns:: |p{4.4cm}|p{2.5cm}|p{10.6cm}|
+
+.. c:type:: cec_drm_connector_info
+
+.. _cec-drm-connector-info:
+
+.. flat-table:: struct cec_drm_connector_info
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       3 1 8
+
+    * .. _`CEC-DRM-CONNECTOR-TYPE-CARD-NO`:
+
+      - __u32
+      - ``card_no``
+      - DRM card number: the number from a card's path, e.g. 0 in case of
+        /dev/card0.
+    * .. _`CEC-DRM-CONNECTOR-TYPE-CONNECTOR_ID`:
+
+      - __u32
+      - ``connector_id``
+      - DRM connector ID.
diff --git a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
index 46a1c99a595e..5e21b1fbfc01 100644
--- a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
@@ -70,6 +70,14 @@ it is guaranteed that the state did change in between the two events.
         addresses are claimed or if ``phys_addr`` is ``CEC_PHYS_ADDR_INVALID``.
 	If bit 15 is set (``1 << CEC_LOG_ADDR_UNREGISTERED``) then this device
 	has the unregistered logical address. In that case all other bits are 0.
+    * - __u16
+      - ``have_conn_info``
+      - If non-zero, then HDMI connector information is available.
+        This field is only valid if ``CEC_CAP_CONNECTOR_INFO`` is set. If that
+        capability is set and ``have_conn_info`` is zero, then that indicates
+        that the HDMI connector device is not instantiated, either because
+        the HDMI driver is still configuring the device or because the HDMI
+        device was unbound.
 
 
 .. c:type:: cec_event_lost_msgs
diff --git a/Documentation/media/uapi/mediactl/request-api.rst b/Documentation/media/uapi/mediactl/request-api.rst
index a74c82d95609..01abe8103bdd 100644
--- a/Documentation/media/uapi/mediactl/request-api.rst
+++ b/Documentation/media/uapi/mediactl/request-api.rst
@@ -53,8 +53,8 @@ with different configurations in advance, knowing that the configuration will be
 applied when needed to get the expected result. Configuration values at the time
 of request completion are also available for reading.
 
-Usage
-=====
+General Usage
+-------------
 
 The Request API extends the Media Controller API and cooperates with
 subsystem-specific APIs to support request usage. At the Media Controller
diff --git a/Documentation/media/uapi/v4l/biblio.rst b/Documentation/media/uapi/v4l/biblio.rst
index ad2ff258afa8..8095f57d3d75 100644
--- a/Documentation/media/uapi/v4l/biblio.rst
+++ b/Documentation/media/uapi/v4l/biblio.rst
@@ -131,6 +131,15 @@ ITU-T Rec. H.264 Specification (04/2017 Edition)
 
 :author:    International Telecommunication Union (http://www.itu.ch)
 
+.. _hevc:
+
+ITU H.265/HEVC
+==============
+
+:title:     ITU-T Rec. H.265 | ISO/IEC 23008-2 "High Efficiency Video Coding"
+
+:author:    International Telecommunication Union (http://www.itu.ch), International Organisation for Standardisation (http://www.iso.ch)
+
 .. _jfif:
 
 JFIF
diff --git a/Documentation/media/uapi/v4l/buffer.rst b/Documentation/media/uapi/v4l/buffer.rst
index 1cbd9cde57f3..9149b57728e5 100644
--- a/Documentation/media/uapi/v4l/buffer.rst
+++ b/Documentation/media/uapi/v4l/buffer.rst
@@ -607,6 +607,19 @@ Buffer Flags
 	applications shall use this flag for output buffers if the data in
 	this buffer has not been created by the CPU but by some
 	DMA-capable unit, in which case caches have not been used.
+    * .. _`V4L2-BUF-FLAG-M2M-HOLD-CAPTURE-BUF`:
+
+      - ``V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF``
+      - 0x00000200
+      - Only valid if ``V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF`` is
+	set. It is typically used with stateless decoders where multiple
+	output buffers each decode to a slice of the decoded frame.
+	Applications can set this flag when queueing the output buffer
+	to prevent the driver from dequeueing the capture buffer after
+	the output buffer has been decoded (i.e. the capture buffer is
+	'held'). If the timestamp of this output buffer differs from that
+	of the previous output buffer, then that indicates the start of a
+	new frame and the previously held capture buffer is dequeued.
     * .. _`V4L2-BUF-FLAG-LAST`:
 
       - ``V4L2_BUF_FLAG_LAST``
diff --git a/Documentation/media/uapi/v4l/dev-mem2mem.rst b/Documentation/media/uapi/v4l/dev-mem2mem.rst
index caa05f5f6380..70953958cee6 100644
--- a/Documentation/media/uapi/v4l/dev-mem2mem.rst
+++ b/Documentation/media/uapi/v4l/dev-mem2mem.rst
@@ -46,3 +46,4 @@ devices are given in the following sections.
     :maxdepth: 1
 
     dev-decoder
+    dev-stateless-decoder
diff --git a/Documentation/media/uapi/v4l/dev-stateless-decoder.rst b/Documentation/media/uapi/v4l/dev-stateless-decoder.rst
new file mode 100644
index 000000000000..4a26646eeec5
--- /dev/null
+++ b/Documentation/media/uapi/v4l/dev-stateless-decoder.rst
@@ -0,0 +1,424 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _stateless_decoder:
+
+**************************************************
+Memory-to-memory Stateless Video Decoder Interface
+**************************************************
+
+A stateless decoder is a decoder that works without retaining any kind of state
+between processed frames. This means that each frame is decoded independently
+of any previous and future frames, and that the client is responsible for
+maintaining the decoding state and providing it to the decoder with each
+decoding request. This is in contrast to the stateful video decoder interface,
+where the hardware and driver maintain the decoding state and all the client
+has to do is to provide the raw encoded stream and dequeue decoded frames in
+display order.
+
+This section describes how user-space ("the client") is expected to communicate
+with stateless decoders in order to successfully decode an encoded stream.
+Compared to stateful codecs, the decoder/client sequence is simpler, but the
+cost of this simplicity is extra complexity in the client which is responsible
+for maintaining a consistent decoding state.
+
+Stateless decoders make use of the :ref:`media-request-api`. A stateless
+decoder must expose the ``V4L2_BUF_CAP_SUPPORTS_REQUESTS`` capability on its
+``OUTPUT`` queue when :c:func:`VIDIOC_REQBUFS` or :c:func:`VIDIOC_CREATE_BUFS`
+are invoked.
+
+Depending on the encoded formats supported by the decoder, a single decoded
+frame may be the result of several decode requests (for instance, H.264 streams
+with multiple slices per frame). Decoders that support such formats must also
+expose the ``V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF`` capability on their
+``OUTPUT`` queue.
+
+Querying capabilities
+=====================
+
+1. To enumerate the set of coded formats supported by the decoder, the client
+   calls :c:func:`VIDIOC_ENUM_FMT` on the ``OUTPUT`` queue.
+
+   * The driver must always return the full set of supported ``OUTPUT`` formats,
+     irrespective of the format currently set on the ``CAPTURE`` queue.
+
+   * Simultaneously, the driver must restrain the set of values returned by
+     codec-specific capability controls (such as H.264 profiles) to the set
+     actually supported by the hardware.
+
+2. To enumerate the set of supported raw formats, the client calls
+   :c:func:`VIDIOC_ENUM_FMT` on the ``CAPTURE`` queue.
+
+   * The driver must return only the formats supported for the format currently
+     active on the ``OUTPUT`` queue.
+
+   * Depending on the currently set ``OUTPUT`` format, the set of supported raw
+     formats may depend on the value of some codec-dependent controls.
+     The client is responsible for making sure that these controls are set
+     before querying the ``CAPTURE`` queue. Failure to do so will result in the
+     default values for these controls being used, and a returned set of formats
+     that may not be usable for the media the client is trying to decode.
+
+3. The client may use :c:func:`VIDIOC_ENUM_FRAMESIZES` to detect supported
+   resolutions for a given format, passing desired pixel format in
+   :c:type:`v4l2_frmsizeenum`'s ``pixel_format``.
+
+4. Supported profiles and levels for the current ``OUTPUT`` format, if
+   applicable, may be queried using their respective controls via
+   :c:func:`VIDIOC_QUERYCTRL`.
+
+Initialization
+==============
+
+1. Set the coded format on the ``OUTPUT`` queue via :c:func:`VIDIOC_S_FMT`.
+
+   * **Required fields:**
+
+     ``type``
+         a ``V4L2_BUF_TYPE_*`` enum appropriate for ``OUTPUT``.
+
+     ``pixelformat``
+         a coded pixel format.
+
+     ``width``, ``height``
+         coded width and height parsed from the stream.
+
+     other fields
+         follow standard semantics.
+
+   .. note::
+
+      Changing the ``OUTPUT`` format may change the currently set ``CAPTURE``
+      format. The driver will derive a new ``CAPTURE`` format from the
+      ``OUTPUT`` format being set, including resolution, colorimetry
+      parameters, etc. If the client needs a specific ``CAPTURE`` format,
+      it must adjust it afterwards.
+
+2. Call :c:func:`VIDIOC_S_EXT_CTRLS` to set all the controls (parsed headers,
+   etc.) required by the ``OUTPUT`` format to enumerate the ``CAPTURE`` formats.
+
+3. Call :c:func:`VIDIOC_G_FMT` for ``CAPTURE`` queue to get the format for the
+   destination buffers parsed/decoded from the bytestream.
+
+   * **Required fields:**
+
+     ``type``
+         a ``V4L2_BUF_TYPE_*`` enum appropriate for ``CAPTURE``.
+
+   * **Returned fields:**
+
+     ``width``, ``height``
+         frame buffer resolution for the decoded frames.
+
+     ``pixelformat``
+         pixel format for decoded frames.
+
+     ``num_planes`` (for _MPLANE ``type`` only)
+         number of planes for pixelformat.
+
+     ``sizeimage``, ``bytesperline``
+         as per standard semantics; matching frame buffer format.
+
+   .. note::
+
+      The value of ``pixelformat`` may be any pixel format supported for the
+      ``OUTPUT`` format, based on the hardware capabilities. It is suggested
+      that the driver chooses the preferred/optimal format for the current
+      configuration. For example, a YUV format may be preferred over an RGB
+      format, if an additional conversion step would be required for RGB.
+
+4. *[optional]* Enumerate ``CAPTURE`` formats via :c:func:`VIDIOC_ENUM_FMT` on
+   the ``CAPTURE`` queue. The client may use this ioctl to discover which
+   alternative raw formats are supported for the current ``OUTPUT`` format and
+   select one of them via :c:func:`VIDIOC_S_FMT`.
+
+   .. note::
+
+      The driver will return only formats supported for the currently selected
+      ``OUTPUT`` format and currently set controls, even if more formats may be
+      supported by the decoder in general.
+
+      For example, a decoder may support YUV and RGB formats for
+      resolutions 1920x1088 and lower, but only YUV for higher resolutions (due
+      to hardware limitations). After setting a resolution of 1920x1088 or lower
+      as the ``OUTPUT`` format, :c:func:`VIDIOC_ENUM_FMT` may return a set of
+      YUV and RGB pixel formats, but after setting a resolution higher than
+      1920x1088, the driver will not return RGB pixel formats, since they are
+      unsupported for this resolution.
+
+5. *[optional]* Choose a different ``CAPTURE`` format than suggested via
+   :c:func:`VIDIOC_S_FMT` on ``CAPTURE`` queue. It is possible for the client to
+   choose a different format than selected/suggested by the driver in
+   :c:func:`VIDIOC_G_FMT`.
+
+    * **Required fields:**
+
+      ``type``
+          a ``V4L2_BUF_TYPE_*`` enum appropriate for ``CAPTURE``.
+
+      ``pixelformat``
+          a raw pixel format.
+
+      ``width``, ``height``
+         frame buffer resolution of the decoded stream; typically unchanged from
+         what was returned with :c:func:`VIDIOC_G_FMT`, but it may be different
+         if the hardware supports composition and/or scaling.
+
+   After performing this step, the client must perform step 3 again in order
+   to obtain up-to-date information about the buffers size and layout.
+
+6. Allocate source (bytestream) buffers via :c:func:`VIDIOC_REQBUFS` on
+   ``OUTPUT`` queue.
+
+    * **Required fields:**
+
+      ``count``
+          requested number of buffers to allocate; greater than zero.
+
+      ``type``
+          a ``V4L2_BUF_TYPE_*`` enum appropriate for ``OUTPUT``.
+
+      ``memory``
+          follows standard semantics.
+
+    * **Return fields:**
+
+      ``count``
+          actual number of buffers allocated.
+
+    * If required, the driver will adjust ``count`` to be equal or bigger to the
+      minimum of required number of ``OUTPUT`` buffers for the given format and
+      requested count. The client must check this value after the ioctl returns
+      to get the actual number of buffers allocated.
+
+7. Allocate destination (raw format) buffers via :c:func:`VIDIOC_REQBUFS` on the
+   ``CAPTURE`` queue.
+
+    * **Required fields:**
+
+      ``count``
+          requested number of buffers to allocate; greater than zero. The client
+          is responsible for deducing the minimum number of buffers required
+          for the stream to be properly decoded (taking e.g. reference frames
+          into account) and pass an equal or bigger number.
+
+      ``type``
+          a ``V4L2_BUF_TYPE_*`` enum appropriate for ``CAPTURE``.
+
+      ``memory``
+          follows standard semantics. ``V4L2_MEMORY_USERPTR`` is not supported
+          for ``CAPTURE`` buffers.
+
+    * **Return fields:**
+
+      ``count``
+          adjusted to allocated number of buffers, in case the codec requires
+          more buffers than requested.
+
+    * The driver must adjust count to the minimum of required number of
+      ``CAPTURE`` buffers for the current format, stream configuration and
+      requested count. The client must check this value after the ioctl
+      returns to get the number of buffers allocated.
+
+8. Allocate requests (likely one per ``OUTPUT`` buffer) via
+    :c:func:`MEDIA_IOC_REQUEST_ALLOC` on the media device.
+
+9. Start streaming on both ``OUTPUT`` and ``CAPTURE`` queues via
+    :c:func:`VIDIOC_STREAMON`.
+
+Decoding
+========
+
+For each frame, the client is responsible for submitting at least one request to
+which the following is attached:
+
+* The amount of encoded data expected by the codec for its current
+  configuration, as a buffer submitted to the ``OUTPUT`` queue. Typically, this
+  corresponds to one frame worth of encoded data, but some formats may allow (or
+  require) different amounts per unit.
+* All the metadata needed to decode the submitted encoded data, in the form of
+  controls relevant to the format being decoded.
+
+The amount of data and contents of the source ``OUTPUT`` buffer, as well as the
+controls that must be set on the request, depend on the active coded pixel
+format and might be affected by codec-specific extended controls, as stated in
+documentation of each format.
+
+If there is a possibility that the decoded frame will require one or more
+decode requests after the current one in order to be produced, then the client
+must set the ``V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF`` flag on the ``OUTPUT``
+buffer. This will result in the (potentially partially) decoded ``CAPTURE``
+buffer not being made available for dequeueing, and reused for the next decode
+request if the timestamp of the next ``OUTPUT`` buffer has not changed.
+
+A typical frame would thus be decoded using the following sequence:
+
+1. Queue an ``OUTPUT`` buffer containing one unit of encoded bytestream data for
+   the decoding request, using :c:func:`VIDIOC_QBUF`.
+
+    * **Required fields:**
+
+      ``index``
+          index of the buffer being queued.
+
+      ``type``
+          type of the buffer.
+
+      ``bytesused``
+          number of bytes taken by the encoded data frame in the buffer.
+
+      ``flags``
+          the ``V4L2_BUF_FLAG_REQUEST_FD`` flag must be set. Additionally, if
+          we are not sure that the current decode request is the last one needed
+          to produce a fully decoded frame, then
+          ``V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF`` must also be set.
+
+      ``request_fd``
+          must be set to the file descriptor of the decoding request.
+
+      ``timestamp``
+          must be set to a unique value per frame. This value will be propagated
+          into the decoded frame's buffer and can also be used to use this frame
+          as the reference of another. If using multiple decode requests per
+          frame, then the timestamps of all the ``OUTPUT`` buffers for a given
+          frame must be identical. If the timestamp changes, then the currently
+          held ``CAPTURE`` buffer will be made available for dequeuing and the
+          current request will work on a new ``CAPTURE`` buffer.
+
+2. Set the codec-specific controls for the decoding request, using
+   :c:func:`VIDIOC_S_EXT_CTRLS`.
+
+    * **Required fields:**
+
+      ``which``
+          must be ``V4L2_CTRL_WHICH_REQUEST_VAL``.
+
+      ``request_fd``
+          must be set to the file descriptor of the decoding request.
+
+      other fields
+          other fields are set as usual when setting controls. The ``controls``
+          array must contain all the codec-specific controls required to decode
+          a frame.
+
+   .. note::
+
+      It is possible to specify the controls in different invocations of
+      :c:func:`VIDIOC_S_EXT_CTRLS`, or to overwrite a previously set control, as
+      long as ``request_fd`` and ``which`` are properly set. The controls state
+      at the moment of request submission is the one that will be considered.
+
+   .. note::
+
+      The order in which steps 1 and 2 take place is interchangeable.
+
+3. Submit the request by invoking :c:func:`MEDIA_REQUEST_IOC_QUEUE` on the
+   request FD.
+
+    If the request is submitted without an ``OUTPUT`` buffer, or if some of the
+    required controls are missing from the request, then
+    :c:func:`MEDIA_REQUEST_IOC_QUEUE` will return ``-ENOENT``. If more than one
+    ``OUTPUT`` buffer is queued, then it will return ``-EINVAL``.
+    :c:func:`MEDIA_REQUEST_IOC_QUEUE` returning non-zero means that no
+    ``CAPTURE`` buffer will be produced for this request.
+
+``CAPTURE`` buffers must not be part of the request, and are queued
+independently. They are returned in decode order (i.e. the same order as coded
+frames were submitted to the ``OUTPUT`` queue).
+
+Runtime decoding errors are signaled by the dequeued ``CAPTURE`` buffers
+carrying the ``V4L2_BUF_FLAG_ERROR`` flag. If a decoded reference frame has an
+error, then all following decoded frames that refer to it also have the
+``V4L2_BUF_FLAG_ERROR`` flag set, although the decoder will still try to
+produce (likely corrupted) frames.
+
+Buffer management while decoding
+================================
+Contrary to stateful decoders, a stateless decoder does not perform any kind of
+buffer management: it only guarantees that dequeued ``CAPTURE`` buffers can be
+used by the client for as long as they are not queued again. "Used" here
+encompasses using the buffer for compositing or display.
+
+A dequeued capture buffer can also be used as the reference frame of another
+buffer.
+
+A frame is specified as reference by converting its timestamp into nanoseconds,
+and storing it into the relevant member of a codec-dependent control structure.
+The :c:func:`v4l2_timeval_to_ns` function must be used to perform that
+conversion. The timestamp of a frame can be used to reference it as soon as all
+its units of encoded data are successfully submitted to the ``OUTPUT`` queue.
+
+A decoded buffer containing a reference frame must not be reused as a decoding
+target until all the frames referencing it have been decoded. The safest way to
+achieve this is to refrain from queueing a reference buffer until all the
+decoded frames referencing it have been dequeued. However, if the driver can
+guarantee that buffers queued to the ``CAPTURE`` queue are processed in queued
+order, then user-space can take advantage of this guarantee and queue a
+reference buffer when the following conditions are met:
+
+1. All the requests for frames affected by the reference frame have been
+   queued, and
+
+2. A sufficient number of ``CAPTURE`` buffers to cover all the decoded
+   referencing frames have been queued.
+
+When queuing a decoding request, the driver will increase the reference count of
+all the resources associated with reference frames. This means that the client
+can e.g. close the DMABUF file descriptors of reference frame buffers if it
+won't need them afterwards.
+
+Seeking
+=======
+In order to seek, the client just needs to submit requests using input buffers
+corresponding to the new stream position. It must however be aware that
+resolution may have changed and follow the dynamic resolution change sequence in
+that case. Also depending on the codec used, picture parameters (e.g. SPS/PPS
+for H.264) may have changed and the client is responsible for making sure that a
+valid state is sent to the decoder.
+
+The client is then free to ignore any returned ``CAPTURE`` buffer that comes
+from the pre-seek position.
+
+Pausing
+=======
+
+In order to pause, the client can just cease queuing buffers onto the ``OUTPUT``
+queue. Without source bytestream data, there is no data to process and the codec
+will remain idle.
+
+Dynamic resolution change
+=========================
+
+If the client detects a resolution change in the stream, it will need to perform
+the initialization sequence again with the new resolution:
+
+1. If the last submitted request resulted in a ``CAPTURE`` buffer being
+   held by the use of the ``V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF`` flag, then the
+   last frame is not available on the ``CAPTURE`` queue. In this case, a
+   ``V4L2_DEC_CMD_FLUSH`` command shall be sent. This will make the driver
+   dequeue the held ``CAPTURE`` buffer.
+
+2. Wait until all submitted requests have completed and dequeue the
+   corresponding output buffers.
+
+3. Call :c:func:`VIDIOC_STREAMOFF` on both the ``OUTPUT`` and ``CAPTURE``
+   queues.
+
+4. Free all ``CAPTURE`` buffers by calling :c:func:`VIDIOC_REQBUFS` on the
+   ``CAPTURE`` queue with a buffer count of zero.
+
+5. Perform the initialization sequence again (minus the allocation of
+   ``OUTPUT`` buffers), with the new resolution set on the ``OUTPUT`` queue.
+   Note that due to resolution constraints, a different format may need to be
+   picked on the ``CAPTURE`` queue.
+
+Drain
+=====
+
+If the last submitted request resulted in a ``CAPTURE`` buffer being
+held by the use of the ``V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF`` flag, then the
+last frame is not available on the ``CAPTURE`` queue. In this case, a
+``V4L2_DEC_CMD_FLUSH`` command shall be sent. This will make the driver
+dequeue the held ``CAPTURE`` buffer.
+
+After that, in order to drain the stream on a stateless decoder, the client
+just needs to wait until all the submitted requests are completed.
diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
index bc5dd8e76567..28313c0f4e7c 100644
--- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
+++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
@@ -1713,10 +1713,14 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
 
     * - __u8
       - ``scaling_list_4x4[6][16]``
-      -
+      - Scaling matrix after applying the inverse scanning process.
+        Expected list order is Intra Y, Intra Cb, Intra Cr, Inter Y,
+        Inter Cb, Inter Cr.
     * - __u8
       - ``scaling_list_8x8[6][64]``
-      -
+      - Scaling matrix after applying the inverse scanning process.
+        Expected list order is Intra Y, Inter Y, Intra Cb, Inter Cb,
+        Intra Cr, Inter Cr.
 
 ``V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS (struct)``
     Specifies the slice parameters (as extracted from the bitstream)
@@ -1796,7 +1800,7 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
       -
     * - __u32
       - ``dec_ref_pic_marking_bit_size``
-      -
+      - Size in bits of the dec_ref_pic_marking() syntax element.
     * - __u32
       - ``pic_order_cnt_bit_size``
       -
@@ -1820,10 +1824,12 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
       -
     * - __u8
       - ``num_ref_idx_l0_active_minus1``
-      -
+      - If num_ref_idx_active_override_flag is not set, this field must be
+        set to the value of num_ref_idx_l0_default_active_minus1.
     * - __u8
       - ``num_ref_idx_l1_active_minus1``
-      -
+      - If num_ref_idx_active_override_flag is not set, this field must be
+        set to the value of num_ref_idx_l1_default_active_minus1.
     * - __u32
       - ``slice_group_change_cycle``
       -
@@ -1983,9 +1989,9 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
       - ``reference_ts``
       - Timestamp of the V4L2 capture buffer to use as reference, used
         with B-coded and P-coded frames. The timestamp refers to the
-	``timestamp`` field in struct :c:type:`v4l2_buffer`. Use the
-	:c:func:`v4l2_timeval_to_ns()` function to convert the struct
-	:c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
+        ``timestamp`` field in struct :c:type:`v4l2_buffer`. Use the
+        :c:func:`v4l2_timeval_to_ns()` function to convert the struct
+        :c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
     * - __u16
       - ``frame_num``
       -
@@ -3693,3 +3699,550 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
     Indicates whether to generate SPS and PPS at every IDR. Setting it to 0
     disables generating SPS and PPS at every IDR. Setting it to one enables
     generating SPS and PPS at every IDR.
+
+.. _v4l2-mpeg-hevc:
+
+``V4L2_CID_MPEG_VIDEO_HEVC_SPS (struct)``
+    Specifies the Sequence Parameter Set fields (as extracted from the
+    bitstream) for the associated HEVC slice data.
+    These bitstream parameters are defined according to :ref:`hevc`.
+    They are described in section 7.4.3.2 "Sequence parameter set RBSP
+    semantics" of the specification.
+
+.. c:type:: v4l2_ctrl_hevc_sps
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_ctrl_hevc_sps
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u16
+      - ``pic_width_in_luma_samples``
+      -
+    * - __u16
+      - ``pic_height_in_luma_samples``
+      -
+    * - __u8
+      - ``bit_depth_luma_minus8``
+      -
+    * - __u8
+      - ``bit_depth_chroma_minus8``
+      -
+    * - __u8
+      - ``log2_max_pic_order_cnt_lsb_minus4``
+      -
+    * - __u8
+      - ``sps_max_dec_pic_buffering_minus1``
+      -
+    * - __u8
+      - ``sps_max_num_reorder_pics``
+      -
+    * - __u8
+      - ``sps_max_latency_increase_plus1``
+      -
+    * - __u8
+      - ``log2_min_luma_coding_block_size_minus3``
+      -
+    * - __u8
+      - ``log2_diff_max_min_luma_coding_block_size``
+      -
+    * - __u8
+      - ``log2_min_luma_transform_block_size_minus2``
+      -
+    * - __u8
+      - ``log2_diff_max_min_luma_transform_block_size``
+      -
+    * - __u8
+      - ``max_transform_hierarchy_depth_inter``
+      -
+    * - __u8
+      - ``max_transform_hierarchy_depth_intra``
+      -
+    * - __u8
+      - ``pcm_sample_bit_depth_luma_minus1``
+      -
+    * - __u8
+      - ``pcm_sample_bit_depth_chroma_minus1``
+      -
+    * - __u8
+      - ``log2_min_pcm_luma_coding_block_size_minus3``
+      -
+    * - __u8
+      - ``log2_diff_max_min_pcm_luma_coding_block_size``
+      -
+    * - __u8
+      - ``num_short_term_ref_pic_sets``
+      -
+    * - __u8
+      - ``num_long_term_ref_pics_sps``
+      -
+    * - __u8
+      - ``chroma_format_idc``
+      -
+    * - __u64
+      - ``flags``
+      - See :ref:`Sequence Parameter Set Flags <hevc_sps_flags>`
+
+.. _hevc_sps_flags:
+
+``Sequence Parameter Set Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE``
+      - 0x00000001
+      -
+    * - ``V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED``
+      - 0x00000002
+      -
+    * - ``V4L2_HEVC_SPS_FLAG_AMP_ENABLED``
+      - 0x00000004
+      -
+    * - ``V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET``
+      - 0x00000008
+      -
+    * - ``V4L2_HEVC_SPS_FLAG_PCM_ENABLED``
+      - 0x00000010
+      -
+    * - ``V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED``
+      - 0x00000020
+      -
+    * - ``V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT``
+      - 0x00000040
+      -
+    * - ``V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED``
+      - 0x00000080
+      -
+    * - ``V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED``
+      - 0x00000100
+      -
+
+``V4L2_CID_MPEG_VIDEO_HEVC_PPS (struct)``
+    Specifies the Picture Parameter Set fields (as extracted from the
+    bitstream) for the associated HEVC slice data.
+    These bitstream parameters are defined according to :ref:`hevc`.
+    They are described in section 7.4.3.3 "Picture parameter set RBSP
+    semantics" of the specification.
+
+.. c:type:: v4l2_ctrl_hevc_pps
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_ctrl_hevc_pps
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u8
+      - ``num_extra_slice_header_bits``
+      -
+    * - __s8
+      - ``init_qp_minus26``
+      -
+    * - __u8
+      - ``diff_cu_qp_delta_depth``
+      -
+    * - __s8
+      - ``pps_cb_qp_offset``
+      -
+    * - __s8
+      - ``pps_cr_qp_offset``
+      -
+    * - __u8
+      - ``num_tile_columns_minus1``
+      -
+    * - __u8
+      - ``num_tile_rows_minus1``
+      -
+    * - __u8
+      - ``column_width_minus1[20]``
+      -
+    * - __u8
+      - ``row_height_minus1[22]``
+      -
+    * - __s8
+      - ``pps_beta_offset_div2``
+      -
+    * - __s8
+      - ``pps_tc_offset_div2``
+      -
+    * - __u8
+      - ``log2_parallel_merge_level_minus2``
+      -
+    * - __u8
+      - ``padding[4]``
+      - Applications and drivers must set this to zero.
+    * - __u64
+      - ``flags``
+      - See :ref:`Picture Parameter Set Flags <hevc_pps_flags>`
+
+.. _hevc_pps_flags:
+
+``Picture Parameter Set Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT``
+      - 0x00000001
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT``
+      - 0x00000002
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED``
+      - 0x00000004
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT``
+      - 0x00000008
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED``
+      - 0x00000010
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED``
+      - 0x00000020
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED``
+      - 0x00000040
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT``
+      - 0x00000080
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED``
+      - 0x00000100
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED``
+      - 0x00000200
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED``
+      - 0x00000400
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_TILES_ENABLED``
+      - 0x00000800
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED``
+      - 0x00001000
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED``
+      - 0x00002000
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED``
+      - 0x00004000
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED``
+      - 0x00008000
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER``
+      - 0x00010000
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT``
+      - 0x00020000
+      -
+    * - ``V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT``
+      - 0x00040000
+      -
+
+``V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (struct)``
+    Specifies various slice-specific parameters, especially from the NAL unit
+    header, general slice segment header and weighted prediction parameter
+    parts of the bitstream.
+    These bitstream parameters are defined according to :ref:`hevc`.
+    They are described in section 7.4.7 "General slice segment header
+    semantics" of the specification.
+
+.. c:type:: v4l2_ctrl_hevc_slice_params
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_ctrl_hevc_slice_params
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u32
+      - ``bit_size``
+      - Size (in bits) of the current slice data.
+    * - __u32
+      - ``data_bit_offset``
+      - Offset (in bits) to the video data in the current slice data.
+    * - __u8
+      - ``nal_unit_type``
+      -
+    * - __u8
+      - ``nuh_temporal_id_plus1``
+      -
+    * - __u8
+      - ``slice_type``
+      -
+	(V4L2_HEVC_SLICE_TYPE_I, V4L2_HEVC_SLICE_TYPE_P or
+	V4L2_HEVC_SLICE_TYPE_B).
+    * - __u8
+      - ``colour_plane_id``
+      -
+    * - __u16
+      - ``slice_pic_order_cnt``
+      -
+    * - __u8
+      - ``num_ref_idx_l0_active_minus1``
+      -
+    * - __u8
+      - ``num_ref_idx_l1_active_minus1``
+      -
+    * - __u8
+      - ``collocated_ref_idx``
+      -
+    * - __u8
+      - ``five_minus_max_num_merge_cand``
+      -
+    * - __s8
+      - ``slice_qp_delta``
+      -
+    * - __s8
+      - ``slice_cb_qp_offset``
+      -
+    * - __s8
+      - ``slice_cr_qp_offset``
+      -
+    * - __s8
+      - ``slice_act_y_qp_offset``
+      -
+    * - __s8
+      - ``slice_act_cb_qp_offset``
+      -
+    * - __s8
+      - ``slice_act_cr_qp_offset``
+      -
+    * - __s8
+      - ``slice_beta_offset_div2``
+      -
+    * - __s8
+      - ``slice_tc_offset_div2``
+      -
+    * - __u8
+      - ``pic_struct``
+      -
+    * - __u8
+      - ``num_active_dpb_entries``
+      - The number of entries in ``dpb``.
+    * - __u8
+      - ``ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+      - The list of L0 reference elements as indices in the DPB.
+    * - __u8
+      - ``ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+      - The list of L1 reference elements as indices in the DPB.
+    * - __u8
+      - ``num_rps_poc_st_curr_before``
+      - The number of reference pictures in the short-term set that come before
+        the current frame.
+    * - __u8
+      - ``num_rps_poc_st_curr_after``
+      - The number of reference pictures in the short-term set that come after
+        the current frame.
+    * - __u8
+      - ``num_rps_poc_lt_curr``
+      - The number of reference pictures in the long-term set.
+    * - __u8
+      - ``padding[7]``
+      - Applications and drivers must set this to zero.
+    * - struct :c:type:`v4l2_hevc_dpb_entry`
+      - ``dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+      - The decoded picture buffer, for meta-data about reference frames.
+    * - struct :c:type:`v4l2_hevc_pred_weight_table`
+      - ``pred_weight_table``
+      - The prediction weight coefficients for inter-picture prediction.
+    * - __u64
+      - ``flags``
+      - See :ref:`Slice Parameters Flags <hevc_slice_params_flags>`
+
+.. _hevc_slice_params_flags:
+
+``Slice Parameters Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA``
+      - 0x00000001
+      -
+    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA``
+      - 0x00000002
+      -
+    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED``
+      - 0x00000004
+      -
+    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO``
+      - 0x00000008
+      -
+    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT``
+      - 0x00000010
+      -
+    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0``
+      - 0x00000020
+      -
+    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV``
+      - 0x00000040
+      -
+    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED``
+      - 0x00000080
+      -
+    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED``
+      - 0x00000100
+      -
+
+.. c:type:: v4l2_hevc_dpb_entry
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_hevc_dpb_entry
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u64
+      - ``timestamp``
+      - Timestamp of the V4L2 capture buffer to use as reference, used
+        with B-coded and P-coded frames. The timestamp refers to the
+	``timestamp`` field in struct :c:type:`v4l2_buffer`. Use the
+	:c:func:`v4l2_timeval_to_ns()` function to convert the struct
+	:c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
+    * - __u8
+      - ``rps``
+      - The reference set for the reference frame
+        (V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE,
+        V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER or
+        V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR)
+    * - __u8
+      - ``field_pic``
+      - Whether the reference is a field picture or a frame.
+    * - __u16
+      - ``pic_order_cnt[2]``
+      - The picture order count of the reference. Only the first element of the
+        array is used for frame pictures, while the first element identifies the
+        top field and the second the bottom field in field-coded pictures.
+    * - __u8
+      - ``padding[2]``
+      - Applications and drivers must set this to zero.
+
+.. c:type:: v4l2_hevc_pred_weight_table
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_hevc_pred_weight_table
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u8
+      - ``luma_log2_weight_denom``
+      -
+    * - __s8
+      - ``delta_chroma_log2_weight_denom``
+      -
+    * - __s8
+      - ``delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+      -
+    * - __s8
+      - ``luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+      -
+    * - __s8
+      - ``delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]``
+      -
+    * - __s8
+      - ``chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]``
+      -
+    * - __s8
+      - ``delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+      -
+    * - __s8
+      - ``luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+      -
+    * - __s8
+      - ``delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]``
+      -
+    * - __s8
+      - ``chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]``
+      -
+    * - __u8
+      - ``padding[6]``
+      - Applications and drivers must set this to zero.
+
+``V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (enum)``
+    Specifies the decoding mode to use. Currently exposes slice-based and
+    frame-based decoding but new modes might be added later on.
+    This control is used as a modifier for V4L2_PIX_FMT_HEVC_SLICE
+    pixel format. Applications that support V4L2_PIX_FMT_HEVC_SLICE
+    are required to set this control in order to specify the decoding mode
+    that is expected for the buffer.
+    Drivers may expose a single or multiple decoding modes, depending
+    on what they can support.
+
+    .. note::
+
+       This menu control is not yet part of the public kernel API and
+       it is expected to change.
+
+.. c:type:: v4l2_mpeg_video_hevc_decode_mode
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED``
+      - 0
+      - Decoding is done at the slice granularity.
+        The OUTPUT buffer must contain a single slice.
+    * - ``V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED``
+      - 1
+      - Decoding is done at the frame granularity.
+        The OUTPUT buffer must contain all slices needed to decode the
+        frame. The OUTPUT buffer must also contain both fields.
+
+``V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (enum)``
+    Specifies the HEVC slice start code expected for each slice.
+    This control is used as a modifier for V4L2_PIX_FMT_HEVC_SLICE
+    pixel format. Applications that support V4L2_PIX_FMT_HEVC_SLICE
+    are required to set this control in order to specify the start code
+    that is expected for the buffer.
+    Drivers may expose a single or multiple start codes, depending
+    on what they can support.
+
+    .. note::
+
+       This menu control is not yet part of the public kernel API and
+       it is expected to change.
+
+.. c:type:: v4l2_mpeg_video_hevc_start_code
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE``
+      - 0
+      - Selecting this value specifies that HEVC slices are passed
+        to the driver without any start code.
+    * - ``V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B``
+      - 1
+      - Selecting this value specifies that HEVC slices are expected
+        to be prefixed by Annex B start codes. According to :ref:`hevc`
+        valid start codes can be 3-bytes 0x000001 or 4-bytes 0x00000001.
diff --git a/Documentation/media/uapi/v4l/ext-ctrls-flash.rst b/Documentation/media/uapi/v4l/ext-ctrls-flash.rst
index eff056b17167..b9a6b08fbf32 100644
--- a/Documentation/media/uapi/v4l/ext-ctrls-flash.rst
+++ b/Documentation/media/uapi/v4l/ext-ctrls-flash.rst
@@ -98,7 +98,7 @@ Flash Control IDs
 	V4L2_CID_FLASH_STROBE control.
     * - ``V4L2_FLASH_STROBE_SOURCE_EXTERNAL``
       - The flash strobe is triggered by an external source. Typically
-	this is a sensor, which makes it possible to synchronises the
+	this is a sensor, which makes it possible to synchronise the
 	flash strobe start to exposure start.
 
 
diff --git a/Documentation/media/uapi/v4l/ext-ctrls-image-source.rst b/Documentation/media/uapi/v4l/ext-ctrls-image-source.rst
index 2c3ab5796d76..2d3e2b83d6dd 100644
--- a/Documentation/media/uapi/v4l/ext-ctrls-image-source.rst
+++ b/Documentation/media/uapi/v4l/ext-ctrls-image-source.rst
@@ -55,3 +55,13 @@ Image Source Control IDs
 
 ``V4L2_CID_TEST_PATTERN_GREENB (integer)``
     Test pattern green (next to blue) colour component.
+
+``V4L2_CID_UNIT_CELL_SIZE (struct)``
+    This control returns the unit cell size in nanometers. The struct
+    :c:type:`v4l2_area` provides the width and the height in separate
+    fields to take into consideration asymmetric pixels.
+    This control does not take into consideration any possible hardware
+    binning.
+    The unit cell consists of the whole area of the pixel, sensitive and
+    non-sensitive.
+    This control is required for automatic calibration of sensors/cameras.
diff --git a/Documentation/media/uapi/v4l/meta-formats.rst b/Documentation/media/uapi/v4l/meta-formats.rst
index b10ca9ee3968..74c8659ee9d6 100644
--- a/Documentation/media/uapi/v4l/meta-formats.rst
+++ b/Documentation/media/uapi/v4l/meta-formats.rst
@@ -24,3 +24,4 @@ These formats are used for the :ref:`metadata` interface only.
     pixfmt-meta-uvc
     pixfmt-meta-vsp1-hgo
     pixfmt-meta-vsp1-hgt
+    pixfmt-meta-vivid
diff --git a/Documentation/media/uapi/v4l/pixfmt-compressed.rst b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
index 292fdc116c77..561bda112809 100644
--- a/Documentation/media/uapi/v4l/pixfmt-compressed.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
@@ -61,10 +61,10 @@ Compressed Formats
 
       - ``V4L2_PIX_FMT_H264_SLICE``
       - 'S264'
-      - H264 parsed slice data, without the start code and as
-	extracted from the H264 bitstream.  This format is adapted for
-	stateless video decoders that implement an H264 pipeline
-	(using the :ref:`mem2mem` and :ref:`media-request-api`).
+      - H264 parsed slice data, including slice headers, either with or
+	without the start code, as extracted from the H264 bitstream.
+	This format is adapted for stateless video decoders that implement an
+	H264 pipeline (using the :ref:`mem2mem` and :ref:`media-request-api`).
 	This pixelformat has two modifiers that must be set at least once
 	through the ``V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE``
         and ``V4L2_CID_MPEG_VIDEO_H264_START_CODE`` controls.
@@ -80,6 +80,10 @@ Compressed Formats
 	appropriate number of macroblocks to decode a full
 	corresponding frame to the matching capture buffer.
 
+	The syntax for this format is documented in :ref:`h264`, section
+	7.3.2.8 "Slice layer without partitioning RBSP syntax" and the following
+	sections.
+
 	.. note::
 
 	   This format is not yet part of the public kernel API and it
@@ -188,6 +192,29 @@ Compressed Formats
 	If :ref:`VIDIOC_ENUM_FMT` reports ``V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM``
 	then the decoder has no	requirements since it can parse all the
 	information from the raw bytestream.
+    * .. _V4L2-PIX-FMT-HEVC-SLICE:
+
+      - ``V4L2_PIX_FMT_HEVC_SLICE``
+      - 'S265'
+      - HEVC parsed slice data, as extracted from the HEVC bitstream.
+	This format is adapted for stateless video decoders that implement a
+	HEVC pipeline (using the :ref:`mem2mem` and :ref:`media-request-api`).
+	This pixelformat has two modifiers that must be set at least once
+	through the ``V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE``
+        and ``V4L2_CID_MPEG_VIDEO_HEVC_START_CODE`` controls.
+	Metadata associated with the frame to decode is required to be passed
+	through the following controls :
+        * ``V4L2_CID_MPEG_VIDEO_HEVC_SPS``
+        * ``V4L2_CID_MPEG_VIDEO_HEVC_PPS``
+        * ``V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS``
+	See the :ref:`associated Codec Control IDs <v4l2-mpeg-hevc>`.
+	Buffers associated with this pixel format must contain the appropriate
+	number of macroblocks to decode a full corresponding frame.
+
+	.. note::
+
+	   This format is not yet part of the public kernel API and it
+	   is expected to change.
     * .. _V4L2-PIX-FMT-FWHT:
 
       - ``V4L2_PIX_FMT_FWHT``
diff --git a/Documentation/media/uapi/v4l/pixfmt-meta-vivid.rst b/Documentation/media/uapi/v4l/pixfmt-meta-vivid.rst
new file mode 100644
index 000000000000..eed20eaefe24
--- /dev/null
+++ b/Documentation/media/uapi/v4l/pixfmt-meta-vivid.rst
@@ -0,0 +1,60 @@
+.. This file is dual-licensed: you can use it either under the terms
+.. of the GPL 2.0 or the GFDL 1.1+ license, at your option. Note that this
+.. dual licensing only applies to this file, and not this project as a
+.. whole.
+..
+.. a) This file is free software; you can redistribute it and/or
+..    modify it under the terms of the GNU General Public License as
+..    published by the Free Software Foundation version 2 of
+..    the License.
+..
+..    This file is distributed in the hope that it will be useful,
+..    but WITHOUT ANY WARRANTY; without even the implied warranty of
+..    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+..    GNU General Public License for more details.
+..
+.. Or, alternatively,
+..
+.. b) Permission is granted to copy, distribute and/or modify this
+..    document under the terms of the GNU Free Documentation License,
+..    Version 1.1 or any later version published by the Free Software
+..    Foundation, with no Invariant Sections, no Front-Cover Texts
+..    and no Back-Cover Texts. A copy of the license is included at
+..    Documentation/media/uapi/fdl-appendix.rst.
+..
+.. TODO: replace it to GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
+
+.. _v4l2-meta-fmt-vivid:
+
+*******************************
+V4L2_META_FMT_VIVID ('VIVD')
+*******************************
+
+VIVID Metadata Format
+
+
+Description
+===========
+
+This describes metadata format used by the vivid driver.
+
+It sets Brightness, Saturation, Contrast and Hue, each of which maps to
+corresponding controls of the vivid driver with respect to the range and default values.
+
+It contains the following fields:
+
+.. flat-table:: VIVID Metadata
+    :widths: 1 4
+    :header-rows:  1
+    :stub-columns: 0
+
+    * - Field
+      - Description
+    * - u16 brightness;
+      - Image brightness, the value is in the range 0 to 255, with the default value as 128.
+    * - u16 contrast;
+      - Image contrast, the value is in the range 0 to 255, with the default value as 128.
+    * - u16 saturation;
+      - Image color saturation, the value is in the range 0 to 255, with the default value as 128.
+    * - s16 hue;
+      - Image color balance, the value is in the range -128 to 128, with the default value as 0.
diff --git a/Documentation/media/uapi/v4l/v4l2-selection-targets.rst b/Documentation/media/uapi/v4l/v4l2-selection-targets.rst
index f74f239b0510..aae0c0013eb1 100644
--- a/Documentation/media/uapi/v4l/v4l2-selection-targets.rst
+++ b/Documentation/media/uapi/v4l/v4l2-selection-targets.rst
@@ -38,8 +38,10 @@ of the two interfaces they are used.
     * - ``V4L2_SEL_TGT_CROP_DEFAULT``
       - 0x0001
       - Suggested cropping rectangle that covers the "whole picture".
+        This includes only active pixels and excludes other non-active
+        pixels such as black pixels.
+      - Yes
       - Yes
-      - No
     * - ``V4L2_SEL_TGT_CROP_BOUNDS``
       - 0x0002
       - Bounds of the crop rectangle. All valid crop rectangles fit inside
diff --git a/Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst b/Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst
index 57f0066f4cff..f1a504836f31 100644
--- a/Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst
+++ b/Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst
@@ -208,7 +208,15 @@ introduced in Linux 3.3. They are, however, mandatory for stateful mem2mem decod
 	been started yet, the driver will return an ``EPERM`` error code. When
 	the decoder is already running, this command does nothing. No
 	flags are defined for this command.
-
+    * - ``V4L2_DEC_CMD_FLUSH``
+      - 4
+      - Flush any held capture buffers. Only valid for stateless decoders.
+	This command is typically used when the application reached the
+	end of the stream and the last output buffer had the
+	``V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF`` flag set. This would prevent
+	dequeueing the capture buffer containing the last decoded frame.
+	So this command can be used to explicitly flush that final decoded
+	frame. This command does nothing if there are no held capture buffers.
 
 Return Value
 ============
diff --git a/Documentation/media/uapi/v4l/vidioc-g-ext-ctrls.rst b/Documentation/media/uapi/v4l/vidioc-g-ext-ctrls.rst
index 13dc1a986249..271cac18afbb 100644
--- a/Documentation/media/uapi/v4l/vidioc-g-ext-ctrls.rst
+++ b/Documentation/media/uapi/v4l/vidioc-g-ext-ctrls.rst
@@ -199,6 +199,11 @@ still cause this situation.
       - A pointer to a matrix control of unsigned 32-bit values. Valid if
 	this control is of type ``V4L2_CTRL_TYPE_U32``.
     * -
+      - :c:type:`v4l2_area` *
+      - ``p_area``
+      - A pointer to a struct :c:type:`v4l2_area`. Valid if this control is
+        of type ``V4L2_CTRL_TYPE_AREA``.
+    * -
       - void *
       - ``ptr``
       - A pointer to a compound type which can be an N-dimensional array
diff --git a/Documentation/media/uapi/v4l/vidioc-g-fbuf.rst b/Documentation/media/uapi/v4l/vidioc-g-fbuf.rst
index 7b6179627803..2d197e6bba8f 100644
--- a/Documentation/media/uapi/v4l/vidioc-g-fbuf.rst
+++ b/Documentation/media/uapi/v4l/vidioc-g-fbuf.rst
@@ -63,7 +63,7 @@ EINVAL error code when overlays are not supported.
 
 To set the parameters for a *Video Output Overlay*, applications must
 initialize the ``flags`` field of a struct
-struct :c:type:`v4l2_framebuffer`. Since the framebuffer is
+:c:type:`v4l2_framebuffer`. Since the framebuffer is
 implemented on the TV card all other parameters are determined by the
 driver. When an application calls :ref:`VIDIOC_S_FBUF <VIDIOC_G_FBUF>` with a pointer to
 this structure, the driver prepares for the overlay and returns the
diff --git a/Documentation/media/uapi/v4l/vidioc-queryctrl.rst b/Documentation/media/uapi/v4l/vidioc-queryctrl.rst
index a3d56ffbf4cc..6690928e657b 100644
--- a/Documentation/media/uapi/v4l/vidioc-queryctrl.rst
+++ b/Documentation/media/uapi/v4l/vidioc-queryctrl.rst
@@ -443,6 +443,12 @@ See also the examples in :ref:`control`.
       - n/a
       - A struct :c:type:`v4l2_ctrl_mpeg2_quantization`, containing MPEG-2
 	quantization matrices for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_AREA``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_area`, containing the width and the height
+        of a rectangular area. Units depend on the use case.
     * - ``V4L2_CTRL_TYPE_H264_SPS``
       - n/a
       - n/a
@@ -473,6 +479,24 @@ See also the examples in :ref:`control`.
       - n/a
       - A struct :c:type:`v4l2_ctrl_h264_decode_params`, containing H264
 	decode parameters for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_HEVC_SPS``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_ctrl_hevc_sps`, containing HEVC Sequence
+	Parameter Set for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_HEVC_PPS``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_ctrl_hevc_pps`, containing HEVC Picture
+	Parameter Set for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_ctrl_hevc_slice_params`, containing HEVC
+	slice parameters for stateless video decoders.
 
 .. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
 
diff --git a/Documentation/media/uapi/v4l/vidioc-reqbufs.rst b/Documentation/media/uapi/v4l/vidioc-reqbufs.rst
index d7faef10e39b..d0c643db477a 100644
--- a/Documentation/media/uapi/v4l/vidioc-reqbufs.rst
+++ b/Documentation/media/uapi/v4l/vidioc-reqbufs.rst
@@ -125,6 +125,7 @@ aborting or finishing any DMA in progress, an implicit
 .. _V4L2-BUF-CAP-SUPPORTS-DMABUF:
 .. _V4L2-BUF-CAP-SUPPORTS-REQUESTS:
 .. _V4L2-BUF-CAP-SUPPORTS-ORPHANED-BUFS:
+.. _V4L2-BUF-CAP-SUPPORTS-M2M-HOLD-CAPTURE-BUF:
 
 .. cssclass:: longtable
 
@@ -150,6 +151,11 @@ aborting or finishing any DMA in progress, an implicit
       - The kernel allows calling :ref:`VIDIOC_REQBUFS` while buffers are still
         mapped or exported via DMABUF. These orphaned buffers will be freed
         when they are unmapped or when the exported DMABUF fds are closed.
+    * - ``V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF``
+      - 0x00000020
+      - Only valid for stateless decoders. If set, then userspace can set the
+        ``V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF`` flag to hold off on returning the
+	capture buffer until the OUTPUT timestamp changes.
 
 Return Value
 ============
diff --git a/Documentation/media/v4l-drivers/imx.rst b/Documentation/media/v4l-drivers/imx.rst
index 1d7eb8c7bd5c..1246573c1019 100644
--- a/Documentation/media/v4l-drivers/imx.rst
+++ b/Documentation/media/v4l-drivers/imx.rst
@@ -515,10 +515,10 @@ Streaming can then begin independently on the capture device nodes
 be used to select any supported YUV pixelformat on the capture device
 nodes, including planar.
 
-SabreAuto with ADV7180 decoder
-------------------------------
+i.MX6Q SabreAuto with ADV7180 decoder
+-------------------------------------
 
-On the SabreAuto, an on-board ADV7180 SD decoder is connected to the
+On the i.MX6Q SabreAuto, an on-board ADV7180 SD decoder is connected to the
 parallel bus input on the internal video mux to IPU1 CSI0.
 
 The following example configures a pipeline to capture from the ADV7180
@@ -547,8 +547,6 @@ This example configures a pipeline to capture from the ADV7180
 video decoder, assuming PAL 720x576 input signals, with Motion
 Compensated de-interlacing. The adv7180 must output sequential or
 alternating fields (field type 'seq-tb' for PAL, or 'alternate').
-$outputfmt can be any format supported by the ipu1_ic_prpvf entity
-at its output pad:
 
 .. code-block:: none
 
@@ -565,11 +563,70 @@ at its output pad:
    media-ctl -V "'ipu1_csi0':1 [fmt:AYUV32/720x576]"
    media-ctl -V "'ipu1_vdic':2 [fmt:AYUV32/720x576 field:none]"
    media-ctl -V "'ipu1_ic_prp':2 [fmt:AYUV32/720x576 field:none]"
-   media-ctl -V "'ipu1_ic_prpvf':1 [fmt:$outputfmt field:none]"
+   media-ctl -V "'ipu1_ic_prpvf':1 [fmt:AYUV32/720x576 field:none]"
+   # Configure "ipu1_ic_prpvf capture" interface (assumed at /dev/video2)
+   v4l2-ctl -d2 --set-fmt-video=field=none
+
+Streaming can then begin on /dev/video2. The v4l2-ctl tool can also be
+used to select any supported YUV pixelformat on /dev/video2.
+
+This platform accepts Composite Video analog inputs to the ADV7180 on
+Ain1 (connector J42).
+
+i.MX6DL SabreAuto with ADV7180 decoder
+--------------------------------------
+
+On the i.MX6DL SabreAuto, an on-board ADV7180 SD decoder is connected to the
+parallel bus input on the internal video mux to IPU1 CSI0.
+
+The following example configures a pipeline to capture from the ADV7180
+video decoder, assuming NTSC 720x480 input signals, using simple
+interweave (unconverted and without motion compensation). The adv7180
+must output sequential or alternating fields (field type 'seq-bt' for
+NTSC, or 'alternate'):
+
+.. code-block:: none
+
+   # Setup links
+   media-ctl -l "'adv7180 4-0021':0 -> 'ipu1_csi0_mux':4[1]"
+   media-ctl -l "'ipu1_csi0_mux':5 -> 'ipu1_csi0':0[1]"
+   media-ctl -l "'ipu1_csi0':2 -> 'ipu1_csi0 capture':0[1]"
+   # Configure pads
+   media-ctl -V "'adv7180 4-0021':0 [fmt:UYVY2X8/720x480 field:seq-bt]"
+   media-ctl -V "'ipu1_csi0_mux':5 [fmt:UYVY2X8/720x480]"
+   media-ctl -V "'ipu1_csi0':2 [fmt:AYUV32/720x480]"
+   # Configure "ipu1_csi0 capture" interface (assumed at /dev/video0)
+   v4l2-ctl -d0 --set-fmt-video=field=interlaced_bt
+
+Streaming can then begin on /dev/video0. The v4l2-ctl tool can also be
+used to select any supported YUV pixelformat on /dev/video0.
+
+This example configures a pipeline to capture from the ADV7180
+video decoder, assuming PAL 720x576 input signals, with Motion
+Compensated de-interlacing. The adv7180 must output sequential or
+alternating fields (field type 'seq-tb' for PAL, or 'alternate').
+
+.. code-block:: none
+
+   # Setup links
+   media-ctl -l "'adv7180 4-0021':0 -> 'ipu1_csi0_mux':4[1]"
+   media-ctl -l "'ipu1_csi0_mux':5 -> 'ipu1_csi0':0[1]"
+   media-ctl -l "'ipu1_csi0':1 -> 'ipu1_vdic':0[1]"
+   media-ctl -l "'ipu1_vdic':2 -> 'ipu1_ic_prp':0[1]"
+   media-ctl -l "'ipu1_ic_prp':2 -> 'ipu1_ic_prpvf':0[1]"
+   media-ctl -l "'ipu1_ic_prpvf':1 -> 'ipu1_ic_prpvf capture':0[1]"
+   # Configure pads
+   media-ctl -V "'adv7180 4-0021':0 [fmt:UYVY2X8/720x576 field:seq-tb]"
+   media-ctl -V "'ipu1_csi0_mux':5 [fmt:UYVY2X8/720x576]"
+   media-ctl -V "'ipu1_csi0':1 [fmt:AYUV32/720x576]"
+   media-ctl -V "'ipu1_vdic':2 [fmt:AYUV32/720x576 field:none]"
+   media-ctl -V "'ipu1_ic_prp':2 [fmt:AYUV32/720x576 field:none]"
+   media-ctl -V "'ipu1_ic_prpvf':1 [fmt:AYUV32/720x576 field:none]"
+   # Configure "ipu1_ic_prpvf capture" interface (assumed at /dev/video2)
+   v4l2-ctl -d2 --set-fmt-video=field=none
 
-Streaming can then begin on the capture device node at
-"ipu1_ic_prpvf capture". The v4l2-ctl tool can be used to select any
-supported YUV or RGB pixelformat on the capture device node.
+Streaming can then begin on /dev/video2. The v4l2-ctl tool can also be
+used to select any supported YUV pixelformat on /dev/video2.
 
 This platform accepts Composite Video analog inputs to the ADV7180 on
 Ain1 (connector J42).
diff --git a/Documentation/media/v4l-drivers/ipu3.rst b/Documentation/media/v4l-drivers/ipu3.rst
index c9f780404eee..e4904ab44e60 100644
--- a/Documentation/media/v4l-drivers/ipu3.rst
+++ b/Documentation/media/v4l-drivers/ipu3.rst
@@ -265,19 +265,56 @@ below.
 
 yavta -w "0x009819A1 1" /dev/v4l-subdev7
 
-RAW Bayer frames go through the following ImgU pipeline HW blocks to have the
+Certain hardware blocks in ImgU pipeline can change the frame resolution by
+cropping or scaling, these hardware blocks include Input Feeder(IF), Bayer Down
+Scaler (BDS) and Geometric Distortion Correction (GDC).
+There is also a block which can change the frame resolution - YUV Scaler, it is
+only applicable to the secondary output.
+
+RAW Bayer frames go through these ImgU pipeline hardware blocks and the final
 processed image output to the DDR memory.
 
-RAW Bayer frame -> Input Feeder -> Bayer Down Scaling (BDS) -> Geometric
-Distortion Correction (GDC) -> DDR
+.. kernel-figure::  ipu3_rcb.svg
+   :alt: ipu3 resolution blocks image
 
-The ImgU V4L2 subdev has to be configured with the supported resolutions in all
-the above HW blocks, for a given input resolution.
+   IPU3 resolution change hardware blocks
+
+**Input Feeder**
+
+Input Feeder gets the Bayer frame data from the sensor, it can enable cropping
+of lines and columns from the frame and then store pixels into device's internal
+pixel buffer which are ready to readout by following blocks.
+
+**Bayer Down Scaler**
+
+Bayer Down Scaler is capable of performing image scaling in Bayer domain, the
+downscale factor can be configured from 1X to 1/4X in each axis with
+configuration steps of 0.03125 (1/32).
 
+**Geometric Distortion Correction**
+
+Geometric Distortion Correction is used to performe correction of distortions
+and image filtering. It needs some extra filter and envelop padding pixels to
+work, so the input resolution of GDC should be larger than the output
+resolution.
+
+**YUV Scaler**
+
+YUV Scaler which similar with BDS, but it is mainly do image down scaling in
+YUV domain, it can support up to 1/12X down scaling, but it can not be applied
+to the main output.
+
+The ImgU V4L2 subdev has to be configured with the supported resolutions in all
+the above hardware blocks, for a given input resolution.
 For a given supported resolution for an input frame, the Input Feeder, Bayer
-Down Scaling and GDC blocks should be configured with the supported resolutions.
-This information can be obtained by looking at the following IPU3 ImgU
-configuration table.
+Down Scaler and GDC blocks should be configured with the supported resolutions
+as each hardware block has its own alignment requirement.
+
+You must configure the output resolution of the hardware blocks smartly to meet
+the hardware requirement along with keeping the maximum field of view.
+The intermediate resolutions can be generated by specific tool and this
+information can be obtained by looking at the following IPU3 ImgU configuration
+table.
 
 https://chromium.googlesource.com/chromiumos/overlays/board-overlays/+/master
 
diff --git a/Documentation/media/v4l-drivers/ipu3_rcb.svg b/Documentation/media/v4l-drivers/ipu3_rcb.svg
new file mode 100644
index 000000000000..d878421b42a0
--- /dev/null
+++ b/Documentation/media/v4l-drivers/ipu3_rcb.svg
@@ -0,0 +1,331 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="774pt" height="152pt" viewBox="0 0 774 152" version="1.1">
+<defs>
+<g>
+<symbol overflow="visible" id="glyph0-0">
+<path style="stroke:none;" d="M 1 0 L 1 -15 L 9 -15 L 9 0 Z M 8 -1 L 8 -14 L 2 -14 L 2 -1 Z M 8 -1 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-1">
+<path style="stroke:none;" d="M 4.6875 -1.15625 C 5.519531 -1.15625 6.15625 -1.316406 6.59375 -1.640625 C 7.039062 -1.960938 7.265625 -2.441406 7.265625 -3.078125 C 7.265625 -3.460938 7.179688 -3.789062 7.015625 -4.0625 C 6.859375 -4.34375 6.644531 -4.582031 6.375 -4.78125 C 6.113281 -4.988281 5.816406 -5.171875 5.484375 -5.328125 C 5.148438 -5.484375 4.804688 -5.628906 4.453125 -5.765625 C 4.054688 -5.921875 3.675781 -6.097656 3.3125 -6.296875 C 2.945312 -6.492188 2.617188 -6.726562 2.328125 -7 C 2.046875 -7.269531 1.820312 -7.582031 1.65625 -7.9375 C 1.488281 -8.300781 1.40625 -8.726562 1.40625 -9.21875 C 1.40625 -10.300781 1.742188 -11.144531 2.421875 -11.75 C 3.097656 -12.351562 4.046875 -12.65625 5.265625 -12.65625 C 5.597656 -12.65625 5.925781 -12.628906 6.25 -12.578125 C 6.570312 -12.535156 6.875 -12.476562 7.15625 -12.40625 C 7.4375 -12.34375 7.6875 -12.265625 7.90625 -12.171875 C 8.125 -12.085938 8.300781 -12 8.4375 -11.90625 L 7.921875 -10.515625 C 7.648438 -10.679688 7.28125 -10.84375 6.8125 -11 C 6.351562 -11.15625 5.835938 -11.234375 5.265625 -11.234375 C 4.660156 -11.234375 4.140625 -11.082031 3.703125 -10.78125 C 3.265625 -10.488281 3.046875 -10.039062 3.046875 -9.4375 C 3.046875 -9.09375 3.109375 -8.800781 3.234375 -8.5625 C 3.359375 -8.320312 3.53125 -8.109375 3.75 -7.921875 C 3.96875 -7.742188 4.222656 -7.582031 4.515625 -7.4375 C 4.804688 -7.289062 5.128906 -7.144531 5.484375 -7 C 5.984375 -6.789062 6.441406 -6.578125 6.859375 -6.359375 C 7.285156 -6.148438 7.648438 -5.894531 7.953125 -5.59375 C 8.253906 -5.300781 8.488281 -4.953125 8.65625 -4.546875 C 8.820312 -4.148438 8.90625 -3.664062 8.90625 -3.09375 C 8.90625 -2.019531 8.539062 -1.191406 7.8125 -0.609375 C 7.082031 -0.0234375 6.039062 0.265625 4.6875 0.265625 C 4.238281 0.265625 3.820312 0.234375 3.4375 0.171875 C 3.050781 0.109375 2.707031 0.03125 2.40625 -0.0625 C 2.101562 -0.15625 1.835938 -0.25 1.609375 -0.34375 C 1.390625 -0.4375 1.21875 -0.519531 1.09375 -0.59375 L 1.59375 -1.953125 C 1.863281 -1.804688 2.257812 -1.632812 2.78125 -1.4375 C 3.300781 -1.25 3.9375 -1.15625 4.6875 -1.15625 Z M 4.6875 -1.15625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-2">
+<path style="stroke:none;" d="M 5.1875 -9.5 C 6.4375 -9.5 7.398438 -9.109375 8.078125 -8.328125 C 8.753906 -7.546875 9.09375 -6.363281 9.09375 -4.78125 L 9.09375 -4.203125 L 2.453125 -4.203125 C 2.523438 -3.242188 2.84375 -2.515625 3.40625 -2.015625 C 3.976562 -1.515625 4.773438 -1.265625 5.796875 -1.265625 C 6.390625 -1.265625 6.890625 -1.3125 7.296875 -1.40625 C 7.710938 -1.5 8.023438 -1.597656 8.234375 -1.703125 L 8.453125 -0.296875 C 8.253906 -0.191406 7.894531 -0.0820312 7.375 0.03125 C 6.851562 0.15625 6.269531 0.21875 5.625 0.21875 C 4.820312 0.21875 4.113281 0.0976562 3.5 -0.140625 C 2.894531 -0.390625 2.394531 -0.726562 2 -1.15625 C 1.601562 -1.582031 1.300781 -2.09375 1.09375 -2.6875 C 0.894531 -3.28125 0.796875 -3.925781 0.796875 -4.625 C 0.796875 -5.445312 0.921875 -6.164062 1.171875 -6.78125 C 1.429688 -7.394531 1.765625 -7.898438 2.171875 -8.296875 C 2.585938 -8.703125 3.054688 -9.003906 3.578125 -9.203125 C 4.097656 -9.398438 4.632812 -9.5 5.1875 -9.5 Z M 7.421875 -5.546875 C 7.421875 -6.328125 7.210938 -6.945312 6.796875 -7.40625 C 6.390625 -7.863281 5.84375 -8.09375 5.15625 -8.09375 C 4.769531 -8.09375 4.421875 -8.019531 4.109375 -7.875 C 3.796875 -7.726562 3.523438 -7.535156 3.296875 -7.296875 C 3.066406 -7.054688 2.882812 -6.78125 2.75 -6.46875 C 2.625 -6.164062 2.539062 -5.859375 2.5 -5.546875 Z M 7.421875 -5.546875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-3">
+<path style="stroke:none;" d="M 1.421875 -9.015625 C 2.015625 -9.160156 2.609375 -9.273438 3.203125 -9.359375 C 3.796875 -9.441406 4.351562 -9.484375 4.875 -9.484375 C 6.113281 -9.484375 7.050781 -9.160156 7.6875 -8.515625 C 8.320312 -7.878906 8.640625 -6.851562 8.640625 -5.4375 L 8.640625 0 L 7 0 L 7 -5.140625 C 7 -5.742188 6.945312 -6.226562 6.84375 -6.59375 C 6.738281 -6.96875 6.585938 -7.257812 6.390625 -7.46875 C 6.191406 -7.675781 5.957031 -7.816406 5.6875 -7.890625 C 5.414062 -7.972656 5.117188 -8.015625 4.796875 -8.015625 C 4.535156 -8.015625 4.253906 -8 3.953125 -7.96875 C 3.648438 -7.9375 3.359375 -7.894531 3.078125 -7.84375 L 3.078125 0 L 1.421875 0 Z M 1.421875 -9.015625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-4">
+<path style="stroke:none;" d="M 7.015625 -2.3125 C 7.015625 -2.644531 6.878906 -2.914062 6.609375 -3.125 C 6.335938 -3.34375 6 -3.53125 5.59375 -3.6875 C 5.1875 -3.851562 4.742188 -4.015625 4.265625 -4.171875 C 3.785156 -4.328125 3.335938 -4.515625 2.921875 -4.734375 C 2.515625 -4.960938 2.175781 -5.242188 1.90625 -5.578125 C 1.632812 -5.910156 1.5 -6.34375 1.5 -6.875 C 1.5 -7.625 1.800781 -8.25 2.40625 -8.75 C 3.007812 -9.25 3.960938 -9.5 5.265625 -9.5 C 5.765625 -9.5 6.285156 -9.460938 6.828125 -9.390625 C 7.367188 -9.316406 7.832031 -9.21875 8.21875 -9.09375 L 7.921875 -7.625 C 7.816406 -7.675781 7.671875 -7.726562 7.484375 -7.78125 C 7.296875 -7.84375 7.082031 -7.894531 6.84375 -7.9375 C 6.601562 -7.988281 6.34375 -8.023438 6.0625 -8.046875 C 5.789062 -8.078125 5.53125 -8.09375 5.28125 -8.09375 C 3.84375 -8.09375 3.125 -7.703125 3.125 -6.921875 C 3.125 -6.640625 3.257812 -6.398438 3.53125 -6.203125 C 3.800781 -6.015625 4.144531 -5.835938 4.5625 -5.671875 C 4.976562 -5.515625 5.425781 -5.351562 5.90625 -5.1875 C 6.382812 -5.019531 6.828125 -4.816406 7.234375 -4.578125 C 7.648438 -4.335938 7.992188 -4.046875 8.265625 -3.703125 C 8.546875 -3.367188 8.6875 -2.941406 8.6875 -2.421875 C 8.6875 -1.578125 8.359375 -0.925781 7.703125 -0.46875 C 7.046875 -0.0078125 6.007812 0.21875 4.59375 0.21875 C 3.957031 0.21875 3.375 0.164062 2.84375 0.0625 C 2.3125 -0.0390625 1.800781 -0.203125 1.3125 -0.421875 L 1.640625 -1.921875 C 2.109375 -1.703125 2.597656 -1.523438 3.109375 -1.390625 C 3.617188 -1.253906 4.171875 -1.1875 4.765625 -1.1875 C 6.265625 -1.1875 7.015625 -1.5625 7.015625 -2.3125 Z M 7.015625 -2.3125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-5">
+<path style="stroke:none;" d="M 9.203125 -4.640625 C 9.203125 -3.910156 9.097656 -3.25 8.890625 -2.65625 C 8.679688 -2.0625 8.390625 -1.550781 8.015625 -1.125 C 7.640625 -0.695312 7.191406 -0.363281 6.671875 -0.125 C 6.160156 0.101562 5.597656 0.21875 4.984375 0.21875 C 4.378906 0.21875 3.820312 0.101562 3.3125 -0.125 C 2.800781 -0.363281 2.359375 -0.695312 1.984375 -1.125 C 1.609375 -1.550781 1.316406 -2.0625 1.109375 -2.65625 C 0.898438 -3.25 0.796875 -3.910156 0.796875 -4.640625 C 0.796875 -5.367188 0.898438 -6.035156 1.109375 -6.640625 C 1.316406 -7.242188 1.609375 -7.753906 1.984375 -8.171875 C 2.359375 -8.585938 2.800781 -8.910156 3.3125 -9.140625 C 3.820312 -9.378906 4.378906 -9.5 4.984375 -9.5 C 5.597656 -9.5 6.160156 -9.378906 6.671875 -9.140625 C 7.191406 -8.910156 7.640625 -8.585938 8.015625 -8.171875 C 8.390625 -7.753906 8.679688 -7.242188 8.890625 -6.640625 C 9.097656 -6.035156 9.203125 -5.367188 9.203125 -4.640625 Z M 7.5 -4.640625 C 7.5 -5.691406 7.269531 -6.519531 6.8125 -7.125 C 6.363281 -7.738281 5.753906 -8.046875 4.984375 -8.046875 C 4.222656 -8.046875 3.617188 -7.738281 3.171875 -7.125 C 2.722656 -6.519531 2.5 -5.691406 2.5 -4.640625 C 2.5 -3.597656 2.722656 -2.773438 3.171875 -2.171875 C 3.617188 -1.566406 4.222656 -1.265625 4.984375 -1.265625 C 5.753906 -1.265625 6.363281 -1.566406 6.8125 -2.171875 C 7.269531 -2.773438 7.5 -3.597656 7.5 -4.640625 Z M 7.5 -4.640625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-6">
+<path style="stroke:none;" d="M 2.140625 0 L 2.140625 -8.78125 C 3.503906 -9.25 4.878906 -9.484375 6.265625 -9.484375 C 6.691406 -9.484375 7.097656 -9.460938 7.484375 -9.421875 C 7.867188 -9.390625 8.296875 -9.320312 8.765625 -9.21875 L 8.453125 -7.765625 C 8.023438 -7.878906 7.648438 -7.953125 7.328125 -7.984375 C 7.003906 -8.023438 6.648438 -8.046875 6.265625 -8.046875 C 5.453125 -8.046875 4.625 -7.929688 3.78125 -7.703125 L 3.78125 0 Z M 2.140625 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-7">
+<path style="stroke:none;" d="M 5.8125 -10.984375 L 5.8125 -1.40625 L 8.21875 -1.40625 L 8.21875 0 L 1.78125 0 L 1.78125 -1.40625 L 4.1875 -1.40625 L 4.1875 -10.984375 L 1.78125 -10.984375 L 1.78125 -12.375 L 8.21875 -12.375 L 8.21875 -10.984375 Z M 5.8125 -10.984375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-8">
+<path style="stroke:none;" d="M 1.8125 0 L 1.8125 -12.375 L 8.84375 -12.375 L 8.84375 -10.984375 L 3.453125 -10.984375 L 3.453125 -7.125 L 8.203125 -7.125 L 8.203125 -5.734375 L 3.453125 -5.734375 L 3.453125 0 Z M 1.8125 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-9">
+<path style="stroke:none;" d="M 4.078125 0.09375 C 3.878906 0.09375 3.644531 0.0859375 3.375 0.078125 C 3.113281 0.0664062 2.847656 0.0507812 2.578125 0.03125 C 2.316406 0.0078125 2.050781 -0.0195312 1.78125 -0.0625 C 1.507812 -0.101562 1.273438 -0.148438 1.078125 -0.203125 L 1.078125 -12.203125 C 1.273438 -12.253906 1.503906 -12.300781 1.765625 -12.34375 C 2.023438 -12.382812 2.289062 -12.410156 2.5625 -12.421875 C 2.84375 -12.441406 3.113281 -12.457031 3.375 -12.46875 C 3.632812 -12.488281 3.867188 -12.5 4.078125 -12.5 C 4.691406 -12.5 5.265625 -12.445312 5.796875 -12.34375 C 6.328125 -12.238281 6.789062 -12.054688 7.1875 -11.796875 C 7.582031 -11.546875 7.890625 -11.210938 8.109375 -10.796875 C 8.328125 -10.390625 8.4375 -9.878906 8.4375 -9.265625 C 8.4375 -8.960938 8.390625 -8.675781 8.296875 -8.40625 C 8.203125 -8.132812 8.070312 -7.878906 7.90625 -7.640625 C 7.738281 -7.398438 7.546875 -7.1875 7.328125 -7 C 7.109375 -6.820312 6.875 -6.6875 6.625 -6.59375 C 7.300781 -6.40625 7.867188 -6.0625 8.328125 -5.5625 C 8.785156 -5.0625 9.015625 -4.414062 9.015625 -3.625 C 9.015625 -2.394531 8.617188 -1.46875 7.828125 -0.84375 C 7.046875 -0.21875 5.796875 0.09375 4.078125 0.09375 Z M 2.71875 -5.78125 L 2.71875 -1.359375 C 2.75 -1.347656 2.898438 -1.332031 3.171875 -1.3125 C 3.441406 -1.289062 3.785156 -1.28125 4.203125 -1.28125 C 4.609375 -1.28125 5 -1.3125 5.375 -1.375 C 5.757812 -1.445312 6.097656 -1.570312 6.390625 -1.75 C 6.691406 -1.925781 6.929688 -2.160156 7.109375 -2.453125 C 7.285156 -2.753906 7.375 -3.132812 7.375 -3.59375 C 7.375 -4.007812 7.289062 -4.359375 7.125 -4.640625 C 6.957031 -4.921875 6.738281 -5.144531 6.46875 -5.3125 C 6.195312 -5.476562 5.878906 -5.597656 5.515625 -5.671875 C 5.160156 -5.742188 4.789062 -5.78125 4.40625 -5.78125 Z M 2.71875 -7.140625 L 4.015625 -7.140625 C 4.347656 -7.140625 4.679688 -7.171875 5.015625 -7.234375 C 5.347656 -7.304688 5.644531 -7.414062 5.90625 -7.5625 C 6.175781 -7.707031 6.390625 -7.90625 6.546875 -8.15625 C 6.710938 -8.414062 6.796875 -8.738281 6.796875 -9.125 C 6.796875 -9.476562 6.722656 -9.78125 6.578125 -10.03125 C 6.429688 -10.289062 6.238281 -10.5 6 -10.65625 C 5.757812 -10.820312 5.484375 -10.9375 5.171875 -11 C 4.859375 -11.0625 4.53125 -11.09375 4.1875 -11.09375 C 3.832031 -11.09375 3.523438 -11.085938 3.265625 -11.078125 C 3.003906 -11.078125 2.820312 -11.066406 2.71875 -11.046875 Z M 2.71875 -7.140625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-10">
+<path style="stroke:none;" d="M 9.203125 -6.203125 C 9.203125 -5.054688 9.054688 -4.082031 8.765625 -3.28125 C 8.484375 -2.476562 8.09375 -1.828125 7.59375 -1.328125 C 7.09375 -0.828125 6.5 -0.460938 5.8125 -0.234375 C 5.125 -0.015625 4.378906 0.09375 3.578125 0.09375 C 2.753906 0.09375 1.921875 -0.00390625 1.078125 -0.203125 L 1.078125 -12.203125 C 1.921875 -12.398438 2.753906 -12.5 3.578125 -12.5 C 4.378906 -12.5 5.125 -12.382812 5.8125 -12.15625 C 6.5 -11.925781 7.09375 -11.554688 7.59375 -11.046875 C 8.09375 -10.546875 8.484375 -9.894531 8.765625 -9.09375 C 9.054688 -8.300781 9.203125 -7.335938 9.203125 -6.203125 Z M 2.71875 -1.375 C 3.050781 -1.332031 3.390625 -1.3125 3.734375 -1.3125 C 4.335938 -1.3125 4.875 -1.398438 5.34375 -1.578125 C 5.8125 -1.765625 6.203125 -2.054688 6.515625 -2.453125 C 6.835938 -2.847656 7.082031 -3.351562 7.25 -3.96875 C 7.425781 -4.59375 7.515625 -5.335938 7.515625 -6.203125 C 7.515625 -7.878906 7.191406 -9.109375 6.546875 -9.890625 C 5.898438 -10.679688 4.945312 -11.078125 3.6875 -11.078125 C 3.507812 -11.078125 3.335938 -11.070312 3.171875 -11.0625 C 3.003906 -11.0625 2.851562 -11.046875 2.71875 -11.015625 Z M 2.71875 -1.375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-11">
+<path style="stroke:none;" d="M 7.453125 -6.09375 L 9.09375 -6.09375 L 9.09375 -0.296875 C 8.84375 -0.203125 8.4375 -0.0859375 7.875 0.046875 C 7.320312 0.191406 6.664062 0.265625 5.90625 0.265625 C 5.15625 0.265625 4.472656 0.125 3.859375 -0.15625 C 3.242188 -0.445312 2.71875 -0.863281 2.28125 -1.40625 C 1.851562 -1.957031 1.519531 -2.632812 1.28125 -3.4375 C 1.039062 -4.25 0.921875 -5.171875 0.921875 -6.203125 C 0.921875 -7.242188 1.050781 -8.160156 1.3125 -8.953125 C 1.582031 -9.753906 1.945312 -10.425781 2.40625 -10.96875 C 2.863281 -11.519531 3.398438 -11.9375 4.015625 -12.21875 C 4.628906 -12.507812 5.289062 -12.65625 6 -12.65625 C 6.457031 -12.65625 6.859375 -12.617188 7.203125 -12.546875 C 7.546875 -12.484375 7.835938 -12.40625 8.078125 -12.3125 C 8.328125 -12.226562 8.53125 -12.132812 8.6875 -12.03125 C 8.851562 -11.925781 8.976562 -11.847656 9.0625 -11.796875 L 8.515625 -10.421875 C 8.210938 -10.660156 7.847656 -10.851562 7.421875 -11 C 7.003906 -11.15625 6.5625 -11.234375 6.09375 -11.234375 C 5.59375 -11.234375 5.125 -11.113281 4.6875 -10.875 C 4.257812 -10.632812 3.890625 -10.296875 3.578125 -9.859375 C 3.273438 -9.421875 3.035156 -8.890625 2.859375 -8.265625 C 2.679688 -7.648438 2.59375 -6.960938 2.59375 -6.203125 C 2.59375 -5.453125 2.671875 -4.769531 2.828125 -4.15625 C 2.984375 -3.539062 3.207031 -3.015625 3.5 -2.578125 C 3.789062 -2.140625 4.148438 -1.796875 4.578125 -1.546875 C 5.015625 -1.304688 5.515625 -1.1875 6.078125 -1.1875 C 6.460938 -1.1875 6.757812 -1.210938 6.96875 -1.265625 C 7.1875 -1.316406 7.347656 -1.367188 7.453125 -1.421875 Z M 7.453125 -6.09375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-12">
+<path style="stroke:none;" d="M 9.203125 -0.515625 C 8.734375 -0.253906 8.234375 -0.0625 7.703125 0.0625 C 7.179688 0.195312 6.617188 0.265625 6.015625 0.265625 C 5.285156 0.265625 4.609375 0.132812 3.984375 -0.125 C 3.367188 -0.382812 2.832031 -0.773438 2.375 -1.296875 C 1.925781 -1.828125 1.570312 -2.5 1.3125 -3.3125 C 1.050781 -4.132812 0.921875 -5.097656 0.921875 -6.203125 C 0.921875 -7.253906 1.054688 -8.179688 1.328125 -8.984375 C 1.597656 -9.785156 1.96875 -10.457031 2.4375 -11 C 2.90625 -11.539062 3.453125 -11.953125 4.078125 -12.234375 C 4.703125 -12.515625 5.367188 -12.65625 6.078125 -12.65625 C 6.566406 -12.65625 7.066406 -12.585938 7.578125 -12.453125 C 8.097656 -12.328125 8.601562 -12.109375 9.09375 -11.796875 L 8.625 -10.4375 C 7.738281 -10.945312 6.910156 -11.203125 6.140625 -11.203125 C 5.585938 -11.203125 5.09375 -11.082031 4.65625 -10.84375 C 4.226562 -10.613281 3.859375 -10.28125 3.546875 -9.84375 C 3.242188 -9.40625 3.007812 -8.878906 2.84375 -8.265625 C 2.675781 -7.648438 2.59375 -6.960938 2.59375 -6.203125 C 2.59375 -5.347656 2.679688 -4.609375 2.859375 -3.984375 C 3.046875 -3.359375 3.296875 -2.835938 3.609375 -2.421875 C 3.929688 -2.003906 4.316406 -1.695312 4.765625 -1.5 C 5.210938 -1.300781 5.695312 -1.203125 6.21875 -1.203125 C 6.601562 -1.203125 7.007812 -1.25 7.4375 -1.34375 C 7.863281 -1.445312 8.304688 -1.625 8.765625 -1.875 Z M 9.203125 -0.515625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-0">
+<path style="stroke:none;" d="M 0.59375 0 L 0.59375 -9 L 5.40625 -9 L 5.40625 0 Z M 4.796875 -0.59375 L 4.796875 -8.40625 L 1.203125 -8.40625 L 1.203125 -0.59375 Z M 4.796875 -0.59375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-1">
+<path style="stroke:none;" d="M 2.515625 0 L 2.515625 -2.765625 C 2.023438 -3.554688 1.582031 -4.332031 1.1875 -5.09375 C 0.789062 -5.851562 0.445312 -6.628906 0.15625 -7.421875 L 1.265625 -7.421875 C 1.492188 -6.753906 1.757812 -6.113281 2.0625 -5.5 C 2.363281 -4.882812 2.6875 -4.253906 3.03125 -3.609375 C 3.394531 -4.285156 3.71875 -4.929688 4 -5.546875 C 4.28125 -6.160156 4.539062 -6.785156 4.78125 -7.421875 L 5.859375 -7.421875 C 5.554688 -6.640625 5.207031 -5.875 4.8125 -5.125 C 4.414062 -4.382812 3.976562 -3.601562 3.5 -2.78125 L 3.5 0 Z M 2.515625 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-2">
+<path style="stroke:none;" d="M 3 0.15625 C 2.5625 0.15625 2.1875 0.09375 1.875 -0.03125 C 1.570312 -0.164062 1.320312 -0.347656 1.125 -0.578125 C 0.9375 -0.804688 0.796875 -1.085938 0.703125 -1.421875 C 0.617188 -1.765625 0.578125 -2.144531 0.578125 -2.5625 L 0.578125 -7.421875 L 1.5625 -7.421875 L 1.5625 -2.65625 C 1.5625 -2.28125 1.59375 -1.96875 1.65625 -1.71875 C 1.726562 -1.46875 1.828125 -1.265625 1.953125 -1.109375 C 2.078125 -0.960938 2.222656 -0.859375 2.390625 -0.796875 C 2.566406 -0.734375 2.769531 -0.703125 3 -0.703125 C 3.226562 -0.703125 3.425781 -0.734375 3.59375 -0.796875 C 3.769531 -0.859375 3.921875 -0.960938 4.046875 -1.109375 C 4.171875 -1.265625 4.265625 -1.46875 4.328125 -1.71875 C 4.398438 -1.96875 4.4375 -2.28125 4.4375 -2.65625 L 4.4375 -7.421875 L 5.421875 -7.421875 L 5.421875 -2.5625 C 5.421875 -2.144531 5.375 -1.765625 5.28125 -1.421875 C 5.195312 -1.085938 5.054688 -0.804688 4.859375 -0.578125 C 4.671875 -0.347656 4.421875 -0.164062 4.109375 -0.03125 C 3.804688 0.09375 3.4375 0.15625 3 0.15625 Z M 3 0.15625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-3">
+<path style="stroke:none;" d="M 1.21875 -7.421875 C 1.320312 -6.921875 1.445312 -6.375 1.59375 -5.78125 C 1.738281 -5.1875 1.890625 -4.585938 2.046875 -3.984375 C 2.210938 -3.390625 2.378906 -2.820312 2.546875 -2.28125 C 2.722656 -1.738281 2.882812 -1.265625 3.03125 -0.859375 C 3.15625 -1.265625 3.300781 -1.742188 3.46875 -2.296875 C 3.644531 -2.847656 3.816406 -3.421875 3.984375 -4.015625 C 4.148438 -4.609375 4.304688 -5.203125 4.453125 -5.796875 C 4.609375 -6.390625 4.734375 -6.929688 4.828125 -7.421875 L 5.859375 -7.421875 C 5.796875 -7.109375 5.691406 -6.679688 5.546875 -6.140625 C 5.398438 -5.597656 5.226562 -4.992188 5.03125 -4.328125 C 4.832031 -3.660156 4.609375 -2.953125 4.359375 -2.203125 C 4.117188 -1.453125 3.863281 -0.71875 3.59375 0 L 2.375 0 C 2.125 -0.71875 1.878906 -1.445312 1.640625 -2.1875 C 1.410156 -2.9375 1.195312 -3.644531 1 -4.3125 C 0.800781 -4.976562 0.628906 -5.582031 0.484375 -6.125 C 0.335938 -6.675781 0.226562 -7.109375 0.15625 -7.421875 Z M 1.21875 -7.421875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-4">
+<path style="stroke:none;" d=""/>
+</symbol>
+<symbol overflow="visible" id="glyph1-5">
+<path style="stroke:none;" d="M 5.515625 -3.71875 C 5.515625 -3.03125 5.425781 -2.445312 5.25 -1.96875 C 5.082031 -1.488281 4.847656 -1.097656 4.546875 -0.796875 C 4.253906 -0.492188 3.898438 -0.273438 3.484375 -0.140625 C 3.078125 -0.00390625 2.628906 0.0625 2.140625 0.0625 C 1.648438 0.0625 1.148438 0 0.640625 -0.125 L 0.640625 -7.3125 C 1.148438 -7.4375 1.648438 -7.5 2.140625 -7.5 C 2.628906 -7.5 3.078125 -7.429688 3.484375 -7.296875 C 3.898438 -7.160156 4.253906 -6.941406 4.546875 -6.640625 C 4.847656 -6.335938 5.082031 -5.941406 5.25 -5.453125 C 5.425781 -4.972656 5.515625 -4.394531 5.515625 -3.71875 Z M 1.625 -0.828125 C 1.832031 -0.804688 2.039062 -0.796875 2.25 -0.796875 C 2.601562 -0.796875 2.921875 -0.847656 3.203125 -0.953125 C 3.484375 -1.054688 3.71875 -1.226562 3.90625 -1.46875 C 4.101562 -1.707031 4.253906 -2.007812 4.359375 -2.375 C 4.460938 -2.75 4.515625 -3.195312 4.515625 -3.71875 C 4.515625 -4.726562 4.316406 -5.46875 3.921875 -5.9375 C 3.535156 -6.40625 2.960938 -6.640625 2.203125 -6.640625 C 2.097656 -6.640625 1.992188 -6.640625 1.890625 -6.640625 C 1.796875 -6.640625 1.707031 -6.628906 1.625 -6.609375 Z M 1.625 -0.828125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-6">
+<path style="stroke:none;" d="M 5.515625 -2.78125 C 5.515625 -2.34375 5.453125 -1.945312 5.328125 -1.59375 C 5.203125 -1.238281 5.023438 -0.929688 4.796875 -0.671875 C 4.578125 -0.410156 4.3125 -0.210938 4 -0.078125 C 3.695312 0.0546875 3.359375 0.125 2.984375 0.125 C 2.628906 0.125 2.296875 0.0546875 1.984375 -0.078125 C 1.679688 -0.210938 1.414062 -0.410156 1.1875 -0.671875 C 0.96875 -0.929688 0.796875 -1.238281 0.671875 -1.59375 C 0.546875 -1.945312 0.484375 -2.34375 0.484375 -2.78125 C 0.484375 -3.21875 0.546875 -3.617188 0.671875 -3.984375 C 0.796875 -4.347656 0.96875 -4.65625 1.1875 -4.90625 C 1.414062 -5.15625 1.679688 -5.347656 1.984375 -5.484375 C 2.296875 -5.628906 2.628906 -5.703125 2.984375 -5.703125 C 3.359375 -5.703125 3.695312 -5.628906 4 -5.484375 C 4.3125 -5.347656 4.578125 -5.15625 4.796875 -4.90625 C 5.023438 -4.65625 5.203125 -4.347656 5.328125 -3.984375 C 5.453125 -3.617188 5.515625 -3.21875 5.515625 -2.78125 Z M 4.5 -2.78125 C 4.5 -3.414062 4.363281 -3.914062 4.09375 -4.28125 C 3.820312 -4.644531 3.453125 -4.828125 2.984375 -4.828125 C 2.523438 -4.828125 2.160156 -4.644531 1.890625 -4.28125 C 1.628906 -3.914062 1.5 -3.414062 1.5 -2.78125 C 1.5 -2.15625 1.628906 -1.660156 1.890625 -1.296875 C 2.160156 -0.929688 2.523438 -0.75 2.984375 -0.75 C 3.453125 -0.75 3.820312 -0.929688 4.09375 -1.296875 C 4.363281 -1.660156 4.5 -2.15625 4.5 -2.78125 Z M 4.5 -2.78125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-7">
+<path style="stroke:none;" d="M 4.109375 0 C 3.992188 -0.269531 3.890625 -0.515625 3.796875 -0.734375 C 3.710938 -0.960938 3.628906 -1.1875 3.546875 -1.40625 C 3.460938 -1.632812 3.378906 -1.867188 3.296875 -2.109375 C 3.210938 -2.359375 3.113281 -2.640625 3 -2.953125 C 2.882812 -2.640625 2.78125 -2.359375 2.6875 -2.109375 C 2.601562 -1.867188 2.519531 -1.632812 2.4375 -1.40625 C 2.351562 -1.1875 2.265625 -0.960938 2.171875 -0.734375 C 2.085938 -0.515625 1.984375 -0.269531 1.859375 0 L 1.109375 0 C 0.890625 -0.976562 0.707031 -1.953125 0.5625 -2.921875 C 0.414062 -3.890625 0.304688 -4.769531 0.234375 -5.5625 L 1.15625 -5.5625 C 1.1875 -5.25 1.210938 -4.941406 1.234375 -4.640625 C 1.265625 -4.347656 1.300781 -4.035156 1.34375 -3.703125 C 1.382812 -3.378906 1.429688 -3.023438 1.484375 -2.640625 C 1.535156 -2.253906 1.59375 -1.820312 1.65625 -1.34375 C 1.78125 -1.664062 1.882812 -1.945312 1.96875 -2.1875 C 2.0625 -2.425781 2.144531 -2.648438 2.21875 -2.859375 C 2.289062 -3.078125 2.359375 -3.296875 2.421875 -3.515625 C 2.492188 -3.742188 2.570312 -4 2.65625 -4.28125 L 3.390625 -4.28125 C 3.472656 -4 3.546875 -3.742188 3.609375 -3.515625 C 3.671875 -3.296875 3.738281 -3.078125 3.8125 -2.859375 C 3.882812 -2.648438 3.957031 -2.425781 4.03125 -2.1875 C 4.113281 -1.945312 4.21875 -1.671875 4.34375 -1.359375 C 4.414062 -1.796875 4.476562 -2.203125 4.53125 -2.578125 C 4.59375 -2.953125 4.640625 -3.304688 4.671875 -3.640625 C 4.710938 -3.972656 4.75 -4.296875 4.78125 -4.609375 C 4.820312 -4.921875 4.851562 -5.238281 4.875 -5.5625 L 5.765625 -5.5625 C 5.734375 -5.164062 5.6875 -4.738281 5.625 -4.28125 C 5.570312 -3.820312 5.503906 -3.351562 5.421875 -2.875 C 5.335938 -2.394531 5.25 -1.910156 5.15625 -1.421875 C 5.0625 -0.929688 4.960938 -0.457031 4.859375 0 Z M 4.109375 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-8">
+<path style="stroke:none;" d="M 0.859375 -5.40625 C 1.210938 -5.5 1.566406 -5.566406 1.921875 -5.609375 C 2.273438 -5.660156 2.609375 -5.6875 2.921875 -5.6875 C 3.671875 -5.6875 4.234375 -5.492188 4.609375 -5.109375 C 4.992188 -4.722656 5.1875 -4.109375 5.1875 -3.265625 L 5.1875 0 L 4.203125 0 L 4.203125 -3.078125 C 4.203125 -3.441406 4.171875 -3.734375 4.109375 -3.953125 C 4.046875 -4.179688 3.953125 -4.359375 3.828125 -4.484375 C 3.710938 -4.609375 3.570312 -4.691406 3.40625 -4.734375 C 3.25 -4.785156 3.070312 -4.8125 2.875 -4.8125 C 2.71875 -4.8125 2.546875 -4.800781 2.359375 -4.78125 C 2.179688 -4.757812 2.007812 -4.734375 1.84375 -4.703125 L 1.84375 0 L 0.859375 0 Z M 0.859375 -5.40625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-9">
+<path style="stroke:none;" d="M 4.21875 -1.390625 C 4.21875 -1.585938 4.132812 -1.75 3.96875 -1.875 C 3.800781 -2.007812 3.59375 -2.125 3.34375 -2.21875 C 3.101562 -2.3125 2.835938 -2.40625 2.546875 -2.5 C 2.265625 -2.59375 2 -2.707031 1.75 -2.84375 C 1.507812 -2.976562 1.304688 -3.144531 1.140625 -3.34375 C 0.984375 -3.539062 0.90625 -3.800781 0.90625 -4.125 C 0.90625 -4.570312 1.082031 -4.945312 1.4375 -5.25 C 1.800781 -5.550781 2.375 -5.703125 3.15625 -5.703125 C 3.457031 -5.703125 3.769531 -5.675781 4.09375 -5.625 C 4.414062 -5.582031 4.695312 -5.523438 4.9375 -5.453125 L 4.75 -4.578125 C 4.6875 -4.609375 4.597656 -4.640625 4.484375 -4.671875 C 4.367188 -4.710938 4.238281 -4.742188 4.09375 -4.765625 C 3.957031 -4.796875 3.804688 -4.816406 3.640625 -4.828125 C 3.472656 -4.847656 3.316406 -4.859375 3.171875 -4.859375 C 2.304688 -4.859375 1.875 -4.625 1.875 -4.15625 C 1.875 -3.988281 1.953125 -3.84375 2.109375 -3.71875 C 2.273438 -3.601562 2.484375 -3.5 2.734375 -3.40625 C 2.984375 -3.3125 3.25 -3.210938 3.53125 -3.109375 C 3.820312 -3.015625 4.09375 -2.894531 4.34375 -2.75 C 4.59375 -2.601562 4.796875 -2.425781 4.953125 -2.21875 C 5.117188 -2.019531 5.203125 -1.765625 5.203125 -1.453125 C 5.203125 -0.953125 5.003906 -0.5625 4.609375 -0.28125 C 4.222656 -0.0078125 3.609375 0.125 2.765625 0.125 C 2.378906 0.125 2.023438 0.09375 1.703125 0.03125 C 1.378906 -0.03125 1.078125 -0.125 0.796875 -0.25 L 0.984375 -1.15625 C 1.265625 -1.019531 1.554688 -0.910156 1.859375 -0.828125 C 2.171875 -0.742188 2.503906 -0.703125 2.859375 -0.703125 C 3.765625 -0.703125 4.21875 -0.929688 4.21875 -1.390625 Z M 4.21875 -1.390625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-10">
+<path style="stroke:none;" d="M 0.59375 -2.765625 C 0.59375 -3.273438 0.671875 -3.710938 0.828125 -4.078125 C 0.984375 -4.441406 1.203125 -4.742188 1.484375 -4.984375 C 1.765625 -5.234375 2.09375 -5.414062 2.46875 -5.53125 C 2.84375 -5.644531 3.238281 -5.703125 3.65625 -5.703125 C 3.925781 -5.703125 4.195312 -5.679688 4.46875 -5.640625 C 4.738281 -5.609375 5.023438 -5.546875 5.328125 -5.453125 L 5.09375 -4.59375 C 4.832031 -4.6875 4.59375 -4.75 4.375 -4.78125 C 4.15625 -4.8125 3.929688 -4.828125 3.703125 -4.828125 C 3.421875 -4.828125 3.148438 -4.785156 2.890625 -4.703125 C 2.640625 -4.628906 2.414062 -4.507812 2.21875 -4.34375 C 2.03125 -4.1875 1.878906 -3.976562 1.765625 -3.71875 C 1.660156 -3.457031 1.609375 -3.140625 1.609375 -2.765625 C 1.609375 -2.421875 1.660156 -2.117188 1.765625 -1.859375 C 1.867188 -1.609375 2.015625 -1.398438 2.203125 -1.234375 C 2.390625 -1.078125 2.613281 -0.957031 2.875 -0.875 C 3.144531 -0.789062 3.4375 -0.75 3.75 -0.75 C 4.007812 -0.75 4.253906 -0.765625 4.484375 -0.796875 C 4.722656 -0.828125 4.984375 -0.890625 5.265625 -0.984375 L 5.40625 -0.15625 C 5.125 -0.0507812 4.835938 0.0195312 4.546875 0.0625 C 4.265625 0.101562 3.957031 0.125 3.625 0.125 C 3.175781 0.125 2.765625 0.0664062 2.390625 -0.046875 C 2.023438 -0.171875 1.707031 -0.351562 1.4375 -0.59375 C 1.164062 -0.832031 0.957031 -1.132812 0.8125 -1.5 C 0.664062 -1.863281 0.59375 -2.285156 0.59375 -2.765625 Z M 0.59375 -2.765625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-11">
+<path style="stroke:none;" d="M 3.0625 -0.703125 C 3.3125 -0.703125 3.53125 -0.707031 3.71875 -0.71875 C 3.914062 -0.738281 4.082031 -0.765625 4.21875 -0.796875 L 4.21875 -2.453125 C 4.082031 -2.492188 3.925781 -2.523438 3.75 -2.546875 C 3.570312 -2.566406 3.382812 -2.578125 3.1875 -2.578125 C 3 -2.578125 2.816406 -2.5625 2.640625 -2.53125 C 2.460938 -2.507812 2.304688 -2.460938 2.171875 -2.390625 C 2.035156 -2.316406 1.921875 -2.222656 1.828125 -2.109375 C 1.742188 -1.992188 1.703125 -1.847656 1.703125 -1.671875 C 1.703125 -1.304688 1.820312 -1.050781 2.0625 -0.90625 C 2.3125 -0.769531 2.644531 -0.703125 3.0625 -0.703125 Z M 2.96875 -5.703125 C 3.382812 -5.703125 3.734375 -5.648438 4.015625 -5.546875 C 4.296875 -5.441406 4.523438 -5.296875 4.703125 -5.109375 C 4.878906 -4.929688 5.003906 -4.707031 5.078125 -4.4375 C 5.148438 -4.175781 5.1875 -3.890625 5.1875 -3.578125 L 5.1875 -0.09375 C 4.957031 -0.0507812 4.648438 -0.00390625 4.265625 0.046875 C 3.890625 0.0976562 3.5 0.125 3.09375 0.125 C 2.789062 0.125 2.492188 0.0976562 2.203125 0.046875 C 1.921875 -0.00390625 1.664062 -0.09375 1.4375 -0.21875 C 1.21875 -0.351562 1.039062 -0.535156 0.90625 -0.765625 C 0.769531 -0.992188 0.703125 -1.289062 0.703125 -1.65625 C 0.703125 -1.976562 0.769531 -2.25 0.90625 -2.46875 C 1.039062 -2.6875 1.21875 -2.863281 1.4375 -3 C 1.664062 -3.132812 1.921875 -3.234375 2.203125 -3.296875 C 2.484375 -3.359375 2.769531 -3.390625 3.0625 -3.390625 C 3.445312 -3.390625 3.832031 -3.34375 4.21875 -3.25 L 4.21875 -3.53125 C 4.21875 -3.695312 4.195312 -3.859375 4.15625 -4.015625 C 4.125 -4.171875 4.054688 -4.3125 3.953125 -4.4375 C 3.847656 -4.5625 3.707031 -4.660156 3.53125 -4.734375 C 3.363281 -4.816406 3.144531 -4.859375 2.875 -4.859375 C 2.53125 -4.859375 2.226562 -4.832031 1.96875 -4.78125 C 1.71875 -4.738281 1.523438 -4.691406 1.390625 -4.640625 L 1.265625 -5.453125 C 1.398438 -5.523438 1.625 -5.582031 1.9375 -5.625 C 2.257812 -5.675781 2.601562 -5.703125 2.96875 -5.703125 Z M 2.96875 -5.703125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-12">
+<path style="stroke:none;" d="M 4.0625 0.125 C 3.707031 0.125 3.410156 0.078125 3.171875 -0.015625 C 2.941406 -0.109375 2.757812 -0.25 2.625 -0.4375 C 2.488281 -0.632812 2.390625 -0.875 2.328125 -1.15625 C 2.273438 -1.4375 2.25 -1.765625 2.25 -2.140625 L 2.25 -7.421875 L 0.640625 -7.421875 L 0.640625 -8.25 L 3.234375 -8.25 L 3.234375 -2.140625 C 3.234375 -1.867188 3.25 -1.644531 3.28125 -1.46875 C 3.320312 -1.289062 3.378906 -1.144531 3.453125 -1.03125 C 3.535156 -0.925781 3.628906 -0.851562 3.734375 -0.8125 C 3.847656 -0.769531 3.984375 -0.75 4.140625 -0.75 C 4.367188 -0.75 4.582031 -0.773438 4.78125 -0.828125 C 4.988281 -0.890625 5.144531 -0.953125 5.25 -1.015625 L 5.40625 -0.1875 C 5.351562 -0.15625 5.28125 -0.117188 5.1875 -0.078125 C 5.101562 -0.046875 5 -0.015625 4.875 0.015625 C 4.757812 0.046875 4.628906 0.0703125 4.484375 0.09375 C 4.347656 0.113281 4.207031 0.125 4.0625 0.125 Z M 4.0625 0.125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-13">
+<path style="stroke:none;" d="M 2.515625 -6.4375 C 2.304688 -6.4375 2.125 -6.503906 1.96875 -6.640625 C 1.8125 -6.785156 1.734375 -6.984375 1.734375 -7.234375 C 1.734375 -7.484375 1.8125 -7.679688 1.96875 -7.828125 C 2.125 -7.972656 2.304688 -8.046875 2.515625 -8.046875 C 2.722656 -8.046875 2.898438 -7.972656 3.046875 -7.828125 C 3.203125 -7.679688 3.28125 -7.484375 3.28125 -7.234375 C 3.28125 -6.984375 3.203125 -6.785156 3.046875 -6.640625 C 2.898438 -6.503906 2.722656 -6.4375 2.515625 -6.4375 Z M 2.25 -4.734375 L 0.640625 -4.734375 L 0.640625 -5.5625 L 3.234375 -5.5625 L 3.234375 -2.140625 C 3.234375 -1.585938 3.3125 -1.21875 3.46875 -1.03125 C 3.625 -0.84375 3.851562 -0.75 4.15625 -0.75 C 4.382812 -0.75 4.597656 -0.773438 4.796875 -0.828125 C 4.992188 -0.890625 5.144531 -0.953125 5.25 -1.015625 L 5.40625 -0.1875 C 5.351562 -0.15625 5.28125 -0.117188 5.1875 -0.078125 C 5.101562 -0.046875 5.003906 -0.015625 4.890625 0.015625 C 4.773438 0.046875 4.644531 0.0703125 4.5 0.09375 C 4.363281 0.113281 4.21875 0.125 4.0625 0.125 C 3.71875 0.125 3.425781 0.078125 3.1875 -0.015625 C 2.957031 -0.109375 2.769531 -0.25 2.625 -0.4375 C 2.488281 -0.632812 2.390625 -0.875 2.328125 -1.15625 C 2.273438 -1.4375 2.25 -1.765625 2.25 -2.140625 Z M 2.25 -4.734375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-14">
+<path style="stroke:none;" d="M 4.15625 -0.515625 C 4.039062 -0.453125 3.863281 -0.382812 3.625 -0.3125 C 3.394531 -0.238281 3.128906 -0.203125 2.828125 -0.203125 C 2.503906 -0.203125 2.195312 -0.253906 1.90625 -0.359375 C 1.625 -0.472656 1.378906 -0.640625 1.171875 -0.859375 C 0.960938 -1.078125 0.796875 -1.351562 0.671875 -1.6875 C 0.546875 -2.03125 0.484375 -2.4375 0.484375 -2.90625 C 0.484375 -3.3125 0.539062 -3.679688 0.65625 -4.015625 C 0.769531 -4.359375 0.9375 -4.65625 1.15625 -4.90625 C 1.375 -5.15625 1.644531 -5.347656 1.96875 -5.484375 C 2.289062 -5.628906 2.65625 -5.703125 3.0625 -5.703125 C 3.539062 -5.703125 3.945312 -5.664062 4.28125 -5.59375 C 4.625 -5.53125 4.910156 -5.46875 5.140625 -5.40625 L 5.140625 -0.4375 C 5.140625 0.425781 4.921875 1.050781 4.484375 1.4375 C 4.054688 1.820312 3.398438 2.015625 2.515625 2.015625 C 2.160156 2.015625 1.835938 1.984375 1.546875 1.921875 C 1.253906 1.867188 0.992188 1.804688 0.765625 1.734375 L 0.953125 0.859375 C 1.160156 0.941406 1.394531 1.007812 1.65625 1.0625 C 1.925781 1.125 2.222656 1.15625 2.546875 1.15625 C 3.117188 1.15625 3.53125 1.035156 3.78125 0.796875 C 4.03125 0.566406 4.15625 0.191406 4.15625 -0.328125 Z M 4.15625 -4.6875 C 4.0625 -4.71875 3.925781 -4.75 3.75 -4.78125 C 3.582031 -4.8125 3.359375 -4.828125 3.078125 -4.828125 C 2.554688 -4.828125 2.160156 -4.648438 1.890625 -4.296875 C 1.628906 -3.941406 1.5 -3.472656 1.5 -2.890625 C 1.5 -2.566406 1.535156 -2.289062 1.609375 -2.0625 C 1.691406 -1.84375 1.796875 -1.65625 1.921875 -1.5 C 2.054688 -1.351562 2.207031 -1.242188 2.375 -1.171875 C 2.539062 -1.109375 2.722656 -1.078125 2.921875 -1.078125 C 3.160156 -1.078125 3.390625 -1.113281 3.609375 -1.1875 C 3.835938 -1.257812 4.019531 -1.34375 4.15625 -1.4375 Z M 4.15625 -4.6875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-15">
+<path style="stroke:none;" d="M 2.8125 -0.703125 C 3.3125 -0.703125 3.691406 -0.796875 3.953125 -0.984375 C 4.222656 -1.171875 4.359375 -1.457031 4.359375 -1.84375 C 4.359375 -2.082031 4.304688 -2.28125 4.203125 -2.4375 C 4.109375 -2.601562 3.984375 -2.75 3.828125 -2.875 C 3.671875 -3 3.488281 -3.109375 3.28125 -3.203125 C 3.082031 -3.296875 2.878906 -3.378906 2.671875 -3.453125 C 2.429688 -3.546875 2.203125 -3.648438 1.984375 -3.765625 C 1.765625 -3.890625 1.566406 -4.03125 1.390625 -4.1875 C 1.222656 -4.351562 1.085938 -4.546875 0.984375 -4.765625 C 0.890625 -4.984375 0.84375 -5.238281 0.84375 -5.53125 C 0.84375 -6.175781 1.046875 -6.679688 1.453125 -7.046875 C 1.859375 -7.410156 2.425781 -7.59375 3.15625 -7.59375 C 3.351562 -7.59375 3.550781 -7.578125 3.75 -7.546875 C 3.945312 -7.523438 4.128906 -7.492188 4.296875 -7.453125 C 4.460938 -7.410156 4.609375 -7.359375 4.734375 -7.296875 C 4.867188 -7.242188 4.976562 -7.191406 5.0625 -7.140625 L 4.75 -6.3125 C 4.59375 -6.40625 4.375 -6.5 4.09375 -6.59375 C 3.8125 -6.695312 3.5 -6.75 3.15625 -6.75 C 2.789062 -6.75 2.476562 -6.65625 2.21875 -6.46875 C 1.957031 -6.289062 1.828125 -6.019531 1.828125 -5.65625 C 1.828125 -5.457031 1.863281 -5.285156 1.9375 -5.140625 C 2.007812 -4.992188 2.113281 -4.863281 2.25 -4.75 C 2.382812 -4.644531 2.535156 -4.546875 2.703125 -4.453125 C 2.878906 -4.367188 3.070312 -4.285156 3.28125 -4.203125 C 3.59375 -4.078125 3.875 -3.945312 4.125 -3.8125 C 4.375 -3.6875 4.585938 -3.535156 4.765625 -3.359375 C 4.953125 -3.179688 5.09375 -2.972656 5.1875 -2.734375 C 5.289062 -2.492188 5.34375 -2.203125 5.34375 -1.859375 C 5.34375 -1.210938 5.125 -0.710938 4.6875 -0.359375 C 4.25 -0.015625 3.625 0.15625 2.8125 0.15625 C 2.539062 0.15625 2.289062 0.132812 2.0625 0.09375 C 1.832031 0.0625 1.625 0.0195312 1.4375 -0.03125 C 1.257812 -0.09375 1.101562 -0.148438 0.96875 -0.203125 C 0.832031 -0.253906 0.726562 -0.304688 0.65625 -0.359375 L 0.953125 -1.171875 C 1.117188 -1.085938 1.359375 -0.988281 1.671875 -0.875 C 1.984375 -0.757812 2.363281 -0.703125 2.8125 -0.703125 Z M 2.8125 -0.703125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-16">
+<path style="stroke:none;" d="M 3.109375 -5.703125 C 3.859375 -5.703125 4.4375 -5.46875 4.84375 -5 C 5.25 -4.53125 5.453125 -3.820312 5.453125 -2.875 L 5.453125 -2.515625 L 1.46875 -2.515625 C 1.507812 -1.941406 1.703125 -1.503906 2.046875 -1.203125 C 2.390625 -0.898438 2.867188 -0.75 3.484375 -0.75 C 3.835938 -0.75 4.132812 -0.773438 4.375 -0.828125 C 4.625 -0.890625 4.8125 -0.953125 4.9375 -1.015625 L 5.078125 -0.1875 C 4.953125 -0.113281 4.734375 -0.046875 4.421875 0.015625 C 4.109375 0.0859375 3.757812 0.125 3.375 0.125 C 2.894531 0.125 2.472656 0.0507812 2.109375 -0.09375 C 1.742188 -0.238281 1.441406 -0.4375 1.203125 -0.6875 C 0.960938 -0.945312 0.78125 -1.253906 0.65625 -1.609375 C 0.539062 -1.960938 0.484375 -2.347656 0.484375 -2.765625 C 0.484375 -3.265625 0.554688 -3.695312 0.703125 -4.0625 C 0.859375 -4.4375 1.0625 -4.742188 1.3125 -4.984375 C 1.5625 -5.222656 1.835938 -5.398438 2.140625 -5.515625 C 2.453125 -5.640625 2.773438 -5.703125 3.109375 -5.703125 Z M 4.453125 -3.328125 C 4.453125 -3.796875 4.328125 -4.164062 4.078125 -4.4375 C 3.828125 -4.71875 3.5 -4.859375 3.09375 -4.859375 C 2.863281 -4.859375 2.65625 -4.8125 2.46875 -4.71875 C 2.28125 -4.632812 2.117188 -4.519531 1.984375 -4.375 C 1.847656 -4.226562 1.738281 -4.0625 1.65625 -3.875 C 1.570312 -3.695312 1.519531 -3.515625 1.5 -3.328125 Z M 4.453125 -3.328125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-17">
+<path style="stroke:none;" d="M 4.15625 -4.390625 C 4.039062 -4.492188 3.875 -4.59375 3.65625 -4.6875 C 3.445312 -4.78125 3.222656 -4.828125 2.984375 -4.828125 C 2.722656 -4.828125 2.5 -4.773438 2.3125 -4.671875 C 2.125 -4.566406 1.96875 -4.421875 1.84375 -4.234375 C 1.726562 -4.054688 1.640625 -3.84375 1.578125 -3.59375 C 1.523438 -3.34375 1.5 -3.070312 1.5 -2.78125 C 1.5 -2.132812 1.648438 -1.632812 1.953125 -1.28125 C 2.253906 -0.925781 2.648438 -0.75 3.140625 -0.75 C 3.390625 -0.75 3.597656 -0.757812 3.765625 -0.78125 C 3.941406 -0.8125 4.070312 -0.835938 4.15625 -0.859375 Z M 4.15625 -8.140625 L 5.140625 -8.3125 L 5.140625 -0.15625 C 4.929688 -0.09375 4.65625 -0.03125 4.3125 0.03125 C 3.976562 0.09375 3.585938 0.125 3.140625 0.125 C 2.742188 0.125 2.378906 0.0546875 2.046875 -0.078125 C 1.722656 -0.210938 1.441406 -0.40625 1.203125 -0.65625 C 0.972656 -0.90625 0.796875 -1.207031 0.671875 -1.5625 C 0.546875 -1.925781 0.484375 -2.332031 0.484375 -2.78125 C 0.484375 -3.21875 0.535156 -3.613281 0.640625 -3.96875 C 0.742188 -4.320312 0.898438 -4.625 1.109375 -4.875 C 1.316406 -5.132812 1.566406 -5.335938 1.859375 -5.484375 C 2.148438 -5.628906 2.488281 -5.703125 2.875 -5.703125 C 3.164062 -5.703125 3.421875 -5.664062 3.640625 -5.59375 C 3.867188 -5.519531 4.039062 -5.441406 4.15625 -5.359375 Z M 4.15625 -8.140625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-18">
+<path style="stroke:none;" d="M 1.28125 0 L 1.28125 -5.265625 C 2.101562 -5.546875 2.925781 -5.6875 3.75 -5.6875 C 4.007812 -5.6875 4.253906 -5.675781 4.484375 -5.65625 C 4.722656 -5.632812 4.976562 -5.59375 5.25 -5.53125 L 5.078125 -4.65625 C 4.816406 -4.726562 4.585938 -4.773438 4.390625 -4.796875 C 4.203125 -4.816406 3.988281 -4.828125 3.75 -4.828125 C 3.269531 -4.828125 2.773438 -4.757812 2.265625 -4.625 L 2.265625 0 Z M 1.28125 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-19">
+<path style="stroke:none;" d="M 0.609375 1.046875 C 0.679688 1.085938 0.78125 1.117188 0.90625 1.140625 C 1.039062 1.160156 1.164062 1.171875 1.28125 1.171875 C 1.675781 1.171875 1.984375 1.082031 2.203125 0.90625 C 2.421875 0.738281 2.625 0.460938 2.8125 0.078125 C 2.363281 -0.773438 1.941406 -1.6875 1.546875 -2.65625 C 1.148438 -3.625 0.828125 -4.59375 0.578125 -5.5625 L 1.65625 -5.5625 C 1.738281 -5.25 1.832031 -4.90625 1.9375 -4.53125 C 2.039062 -4.15625 2.160156 -3.769531 2.296875 -3.375 C 2.441406 -2.988281 2.585938 -2.597656 2.734375 -2.203125 C 2.890625 -1.804688 3.054688 -1.425781 3.234375 -1.0625 C 3.367188 -1.4375 3.488281 -1.800781 3.59375 -2.15625 C 3.707031 -2.519531 3.8125 -2.882812 3.90625 -3.25 C 4.007812 -3.613281 4.109375 -3.984375 4.203125 -4.359375 C 4.296875 -4.742188 4.394531 -5.144531 4.5 -5.5625 L 5.53125 -5.5625 C 5.269531 -4.53125 4.984375 -3.523438 4.671875 -2.546875 C 4.359375 -1.566406 4.019531 -0.660156 3.65625 0.171875 C 3.519531 0.484375 3.375 0.753906 3.21875 0.984375 C 3.0625 1.222656 2.890625 1.414062 2.703125 1.5625 C 2.523438 1.71875 2.316406 1.832031 2.078125 1.90625 C 1.847656 1.976562 1.585938 2.015625 1.296875 2.015625 C 1.140625 2.015625 0.972656 1.992188 0.796875 1.953125 C 0.617188 1.910156 0.5 1.875 0.4375 1.84375 Z M 0.609375 1.046875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-20">
+<path style="stroke:none;" d="M 0.34375 -3.71875 C 0.34375 -4.382812 0.40625 -4.960938 0.53125 -5.453125 C 0.664062 -5.941406 0.847656 -6.34375 1.078125 -6.65625 C 1.304688 -6.96875 1.582031 -7.203125 1.90625 -7.359375 C 2.238281 -7.515625 2.601562 -7.59375 3 -7.59375 C 3.394531 -7.59375 3.753906 -7.515625 4.078125 -7.359375 C 4.410156 -7.203125 4.691406 -6.96875 4.921875 -6.65625 C 5.148438 -6.34375 5.328125 -5.941406 5.453125 -5.453125 C 5.585938 -4.960938 5.65625 -4.382812 5.65625 -3.71875 C 5.65625 -3.050781 5.585938 -2.472656 5.453125 -1.984375 C 5.328125 -1.503906 5.148438 -1.101562 4.921875 -0.78125 C 4.691406 -0.457031 4.410156 -0.21875 4.078125 -0.0625 C 3.753906 0.0820312 3.394531 0.15625 3 0.15625 C 2.601562 0.15625 2.238281 0.0820312 1.90625 -0.0625 C 1.582031 -0.21875 1.304688 -0.457031 1.078125 -0.78125 C 0.847656 -1.101562 0.664062 -1.503906 0.53125 -1.984375 C 0.40625 -2.472656 0.34375 -3.050781 0.34375 -3.71875 Z M 1.359375 -3.71875 C 1.359375 -2.738281 1.488281 -1.988281 1.75 -1.46875 C 2.007812 -0.957031 2.414062 -0.703125 2.96875 -0.703125 C 3.53125 -0.703125 3.953125 -0.957031 4.234375 -1.46875 C 4.515625 -1.988281 4.65625 -2.738281 4.65625 -3.71875 C 4.65625 -4.695312 4.515625 -5.445312 4.234375 -5.96875 C 3.953125 -6.488281 3.53125 -6.75 2.96875 -6.75 C 2.414062 -6.75 2.007812 -6.488281 1.75 -5.96875 C 1.488281 -5.445312 1.359375 -4.695312 1.359375 -3.71875 Z M 1.359375 -3.71875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-21">
+<path style="stroke:none;" d="M 5.140625 -0.15625 C 4.929688 -0.101562 4.644531 -0.046875 4.28125 0.015625 C 3.925781 0.0859375 3.507812 0.125 3.03125 0.125 C 2.613281 0.125 2.265625 0.0625 1.984375 -0.0625 C 1.703125 -0.1875 1.472656 -0.363281 1.296875 -0.59375 C 1.117188 -0.820312 0.992188 -1.09375 0.921875 -1.40625 C 0.847656 -1.71875 0.8125 -2.0625 0.8125 -2.4375 L 0.8125 -5.5625 L 1.796875 -5.5625 L 1.796875 -2.65625 C 1.796875 -1.96875 1.894531 -1.476562 2.09375 -1.1875 C 2.300781 -0.894531 2.644531 -0.75 3.125 -0.75 C 3.226562 -0.75 3.332031 -0.753906 3.4375 -0.765625 C 3.550781 -0.773438 3.65625 -0.785156 3.75 -0.796875 C 3.851562 -0.804688 3.9375 -0.816406 4 -0.828125 C 4.070312 -0.847656 4.125 -0.859375 4.15625 -0.859375 L 4.15625 -5.5625 L 5.140625 -5.5625 Z M 5.140625 -0.15625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-22">
+<path style="stroke:none;" d="M 2.921875 -5.5625 L 5.265625 -5.5625 L 5.265625 -4.734375 L 2.921875 -4.734375 L 2.921875 -2.140625 C 2.921875 -1.867188 2.9375 -1.644531 2.96875 -1.46875 C 3.007812 -1.289062 3.078125 -1.144531 3.171875 -1.03125 C 3.265625 -0.925781 3.382812 -0.851562 3.53125 -0.8125 C 3.675781 -0.769531 3.851562 -0.75 4.0625 -0.75 C 4.34375 -0.75 4.570312 -0.773438 4.75 -0.828125 C 4.925781 -0.878906 5.09375 -0.941406 5.25 -1.015625 L 5.40625 -0.1875 C 5.289062 -0.132812 5.109375 -0.0703125 4.859375 0 C 4.617188 0.0820312 4.316406 0.125 3.953125 0.125 C 3.546875 0.125 3.207031 0.078125 2.9375 -0.015625 C 2.675781 -0.109375 2.46875 -0.25 2.3125 -0.4375 C 2.164062 -0.632812 2.066406 -0.875 2.015625 -1.15625 C 1.960938 -1.4375 1.9375 -1.765625 1.9375 -2.140625 L 1.9375 -4.734375 L 0.75 -4.734375 L 0.75 -5.5625 L 1.9375 -5.5625 L 1.9375 -7.125 L 2.921875 -7.296875 Z M 2.921875 -5.5625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-23">
+<path style="stroke:none;" d="M 4.5 -2.765625 C 4.5 -3.421875 4.347656 -3.925781 4.046875 -4.28125 C 3.742188 -4.632812 3.347656 -4.8125 2.859375 -4.8125 C 2.585938 -4.8125 2.375 -4.796875 2.21875 -4.765625 C 2.0625 -4.742188 1.9375 -4.71875 1.84375 -4.6875 L 1.84375 -1.171875 C 1.957031 -1.066406 2.117188 -0.96875 2.328125 -0.875 C 2.546875 -0.789062 2.773438 -0.75 3.015625 -0.75 C 3.273438 -0.75 3.5 -0.800781 3.6875 -0.90625 C 3.875 -1.007812 4.023438 -1.148438 4.140625 -1.328125 C 4.265625 -1.515625 4.351562 -1.726562 4.40625 -1.96875 C 4.46875 -2.21875 4.5 -2.484375 4.5 -2.765625 Z M 5.515625 -2.765625 C 5.515625 -2.347656 5.460938 -1.957031 5.359375 -1.59375 C 5.253906 -1.238281 5.097656 -0.929688 4.890625 -0.671875 C 4.679688 -0.421875 4.429688 -0.222656 4.140625 -0.078125 C 3.847656 0.0546875 3.507812 0.125 3.125 0.125 C 2.832031 0.125 2.570312 0.0859375 2.34375 0.015625 C 2.125 -0.046875 1.957031 -0.125 1.84375 -0.21875 L 1.84375 1.984375 L 0.859375 1.984375 L 0.859375 -5.40625 C 1.066406 -5.46875 1.34375 -5.53125 1.6875 -5.59375 C 2.03125 -5.65625 2.421875 -5.6875 2.859375 -5.6875 C 3.253906 -5.6875 3.613281 -5.617188 3.9375 -5.484375 C 4.269531 -5.347656 4.550781 -5.15625 4.78125 -4.90625 C 5.019531 -4.65625 5.203125 -4.347656 5.328125 -3.984375 C 5.453125 -3.617188 5.515625 -3.210938 5.515625 -2.765625 Z M 5.515625 -2.765625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-24">
+<path style="stroke:none;" d="M 3.015625 -3.734375 L 4.15625 -7.421875 L 5.09375 -7.421875 C 5.25 -6.253906 5.359375 -5.054688 5.421875 -3.828125 C 5.492188 -2.609375 5.554688 -1.332031 5.609375 0 L 4.65625 0 C 4.644531 -0.425781 4.632812 -0.890625 4.625 -1.390625 C 4.625 -1.898438 4.613281 -2.421875 4.59375 -2.953125 C 4.582031 -3.492188 4.570312 -4.039062 4.5625 -4.59375 C 4.550781 -5.144531 4.539062 -5.679688 4.53125 -6.203125 L 3.4375 -2.8125 L 2.578125 -2.8125 L 1.46875 -6.203125 C 1.46875 -5.679688 1.457031 -5.144531 1.4375 -4.59375 C 1.425781 -4.050781 1.414062 -3.507812 1.40625 -2.96875 C 1.394531 -2.425781 1.382812 -1.898438 1.375 -1.390625 C 1.363281 -0.890625 1.351562 -0.425781 1.34375 0 L 0.390625 0 C 0.410156 -0.601562 0.4375 -1.222656 0.46875 -1.859375 C 0.5 -2.503906 0.535156 -3.144531 0.578125 -3.78125 C 0.617188 -4.414062 0.671875 -5.039062 0.734375 -5.65625 C 0.796875 -6.269531 0.863281 -6.859375 0.9375 -7.421875 L 1.84375 -7.421875 Z M 3.015625 -3.734375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-0">
+<path style="stroke:none;" d="M 0.640625 2.296875 L 0.640625 -9.171875 L 7.140625 -9.171875 L 7.140625 2.296875 Z M 1.375 1.578125 L 6.421875 1.578125 L 6.421875 -8.4375 L 1.375 -8.4375 Z M 1.375 1.578125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-1">
+<path style="stroke:none;" d="M 6.34375 -6.84375 L 6.34375 -5.75 C 6.007812 -5.925781 5.675781 -6.0625 5.34375 -6.15625 C 5.007812 -6.25 4.675781 -6.296875 4.34375 -6.296875 C 3.582031 -6.296875 2.992188 -6.050781 2.578125 -5.5625 C 2.160156 -5.082031 1.953125 -4.410156 1.953125 -3.546875 C 1.953125 -2.679688 2.160156 -2.007812 2.578125 -1.53125 C 2.992188 -1.050781 3.582031 -0.8125 4.34375 -0.8125 C 4.675781 -0.8125 5.007812 -0.851562 5.34375 -0.9375 C 5.675781 -1.03125 6.007812 -1.171875 6.34375 -1.359375 L 6.34375 -0.265625 C 6.019531 -0.117188 5.679688 -0.0078125 5.328125 0.0625 C 4.984375 0.144531 4.613281 0.1875 4.21875 0.1875 C 3.144531 0.1875 2.289062 -0.144531 1.65625 -0.8125 C 1.03125 -1.488281 0.71875 -2.398438 0.71875 -3.546875 C 0.71875 -4.703125 1.035156 -5.613281 1.671875 -6.28125 C 2.304688 -6.945312 3.179688 -7.28125 4.296875 -7.28125 C 4.648438 -7.28125 5 -7.242188 5.34375 -7.171875 C 5.6875 -7.097656 6.019531 -6.988281 6.34375 -6.84375 Z M 6.34375 -6.84375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-2">
+<path style="stroke:none;" d="M 5.34375 -6.015625 C 5.207031 -6.085938 5.0625 -6.140625 4.90625 -6.171875 C 4.757812 -6.210938 4.59375 -6.234375 4.40625 -6.234375 C 3.75 -6.234375 3.242188 -6.019531 2.890625 -5.59375 C 2.535156 -5.164062 2.359375 -4.550781 2.359375 -3.75 L 2.359375 0 L 1.1875 0 L 1.1875 -7.109375 L 2.359375 -7.109375 L 2.359375 -6 C 2.597656 -6.4375 2.914062 -6.757812 3.3125 -6.96875 C 3.707031 -7.175781 4.1875 -7.28125 4.75 -7.28125 C 4.832031 -7.28125 4.921875 -7.273438 5.015625 -7.265625 C 5.109375 -7.253906 5.21875 -7.238281 5.34375 -7.21875 Z M 5.34375 -6.015625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-3">
+<path style="stroke:none;" d="M 3.984375 -6.296875 C 3.359375 -6.296875 2.863281 -6.050781 2.5 -5.5625 C 2.132812 -5.070312 1.953125 -4.398438 1.953125 -3.546875 C 1.953125 -2.691406 2.128906 -2.019531 2.484375 -1.53125 C 2.847656 -1.050781 3.347656 -0.8125 3.984375 -0.8125 C 4.597656 -0.8125 5.085938 -1.054688 5.453125 -1.546875 C 5.816406 -2.035156 6 -2.703125 6 -3.546875 C 6 -4.390625 5.816406 -5.054688 5.453125 -5.546875 C 5.085938 -6.046875 4.597656 -6.296875 3.984375 -6.296875 Z M 3.984375 -7.28125 C 4.992188 -7.28125 5.789062 -6.945312 6.375 -6.28125 C 6.957031 -5.625 7.25 -4.710938 7.25 -3.546875 C 7.25 -2.378906 6.957031 -1.460938 6.375 -0.796875 C 5.789062 -0.140625 4.992188 0.1875 3.984375 0.1875 C 2.960938 0.1875 2.160156 -0.140625 1.578125 -0.796875 C 1.003906 -1.460938 0.71875 -2.378906 0.71875 -3.546875 C 0.71875 -4.710938 1.003906 -5.625 1.578125 -6.28125 C 2.160156 -6.945312 2.960938 -7.28125 3.984375 -7.28125 Z M 3.984375 -7.28125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-4">
+<path style="stroke:none;" d="M 2.359375 -1.0625 L 2.359375 2.703125 L 1.1875 2.703125 L 1.1875 -7.109375 L 2.359375 -7.109375 L 2.359375 -6.03125 C 2.597656 -6.457031 2.90625 -6.769531 3.28125 -6.96875 C 3.65625 -7.175781 4.101562 -7.28125 4.625 -7.28125 C 5.488281 -7.28125 6.191406 -6.9375 6.734375 -6.25 C 7.273438 -5.5625 7.546875 -4.660156 7.546875 -3.546875 C 7.546875 -2.429688 7.273438 -1.53125 6.734375 -0.84375 C 6.191406 -0.15625 5.488281 0.1875 4.625 0.1875 C 4.101562 0.1875 3.65625 0.0820312 3.28125 -0.125 C 2.90625 -0.332031 2.597656 -0.644531 2.359375 -1.0625 Z M 6.328125 -3.546875 C 6.328125 -4.410156 6.148438 -5.082031 5.796875 -5.5625 C 5.441406 -6.050781 4.957031 -6.296875 4.34375 -6.296875 C 3.726562 -6.296875 3.242188 -6.050781 2.890625 -5.5625 C 2.535156 -5.082031 2.359375 -4.410156 2.359375 -3.546875 C 2.359375 -2.691406 2.535156 -2.019531 2.890625 -1.53125 C 3.242188 -1.039062 3.726562 -0.796875 4.34375 -0.796875 C 4.957031 -0.796875 5.441406 -1.039062 5.796875 -1.53125 C 6.148438 -2.019531 6.328125 -2.691406 6.328125 -3.546875 Z M 6.328125 -3.546875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-5">
+<path style="stroke:none;" d="M 5.75 -6.90625 L 5.75 -5.796875 C 5.425781 -5.960938 5.085938 -6.085938 4.734375 -6.171875 C 4.378906 -6.253906 4.007812 -6.296875 3.625 -6.296875 C 3.039062 -6.296875 2.601562 -6.207031 2.3125 -6.03125 C 2.03125 -5.851562 1.890625 -5.585938 1.890625 -5.234375 C 1.890625 -4.960938 1.988281 -4.75 2.1875 -4.59375 C 2.394531 -4.445312 2.816406 -4.300781 3.453125 -4.15625 L 3.84375 -4.0625 C 4.675781 -3.882812 5.265625 -3.632812 5.609375 -3.3125 C 5.960938 -2.988281 6.140625 -2.539062 6.140625 -1.96875 C 6.140625 -1.300781 5.878906 -0.773438 5.359375 -0.390625 C 4.835938 -0.00390625 4.117188 0.1875 3.203125 0.1875 C 2.816406 0.1875 2.414062 0.148438 2 0.078125 C 1.59375 0.00390625 1.160156 -0.109375 0.703125 -0.265625 L 0.703125 -1.46875 C 1.140625 -1.238281 1.566406 -1.066406 1.984375 -0.953125 C 2.398438 -0.847656 2.8125 -0.796875 3.21875 -0.796875 C 3.769531 -0.796875 4.191406 -0.890625 4.484375 -1.078125 C 4.785156 -1.265625 4.9375 -1.53125 4.9375 -1.875 C 4.9375 -2.1875 4.828125 -2.425781 4.609375 -2.59375 C 4.398438 -2.769531 3.9375 -2.9375 3.21875 -3.09375 L 2.8125 -3.1875 C 2.082031 -3.34375 1.554688 -3.578125 1.234375 -3.890625 C 0.910156 -4.203125 0.75 -4.632812 0.75 -5.1875 C 0.75 -5.851562 0.984375 -6.367188 1.453125 -6.734375 C 1.929688 -7.097656 2.609375 -7.28125 3.484375 -7.28125 C 3.910156 -7.28125 4.316406 -7.25 4.703125 -7.1875 C 5.085938 -7.125 5.4375 -7.03125 5.75 -6.90625 Z M 5.75 -6.90625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-6">
+<path style="stroke:none;" d="M 4.453125 -3.578125 C 3.515625 -3.578125 2.863281 -3.46875 2.5 -3.25 C 2.132812 -3.03125 1.953125 -2.660156 1.953125 -2.140625 C 1.953125 -1.734375 2.085938 -1.40625 2.359375 -1.15625 C 2.628906 -0.914062 3 -0.796875 3.46875 -0.796875 C 4.113281 -0.796875 4.632812 -1.023438 5.03125 -1.484375 C 5.425781 -1.941406 5.625 -2.550781 5.625 -3.3125 L 5.625 -3.578125 Z M 6.78125 -4.0625 L 6.78125 0 L 5.625 0 L 5.625 -1.078125 C 5.351562 -0.648438 5.019531 -0.332031 4.625 -0.125 C 4.226562 0.0820312 3.738281 0.1875 3.15625 0.1875 C 2.425781 0.1875 1.847656 -0.015625 1.421875 -0.421875 C 0.992188 -0.835938 0.78125 -1.382812 0.78125 -2.0625 C 0.78125 -2.863281 1.046875 -3.46875 1.578125 -3.875 C 2.117188 -4.28125 2.921875 -4.484375 3.984375 -4.484375 L 5.625 -4.484375 L 5.625 -4.609375 C 5.625 -5.140625 5.445312 -5.550781 5.09375 -5.84375 C 4.738281 -6.144531 4.238281 -6.296875 3.59375 -6.296875 C 3.1875 -6.296875 2.789062 -6.242188 2.40625 -6.140625 C 2.019531 -6.046875 1.648438 -5.898438 1.296875 -5.703125 L 1.296875 -6.78125 C 1.722656 -6.945312 2.140625 -7.070312 2.546875 -7.15625 C 2.953125 -7.238281 3.34375 -7.28125 3.71875 -7.28125 C 4.75 -7.28125 5.515625 -7.015625 6.015625 -6.484375 C 6.523438 -5.953125 6.78125 -5.144531 6.78125 -4.0625 Z M 6.78125 -4.0625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-7">
+<path style="stroke:none;" d="M 1.21875 -9.875 L 2.390625 -9.875 L 2.390625 0 L 1.21875 0 Z M 1.21875 -9.875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-8">
+<path style="stroke:none;" d="M 7.3125 -3.84375 L 7.3125 -3.28125 L 1.9375 -3.28125 C 1.988281 -2.46875 2.226562 -1.851562 2.65625 -1.4375 C 3.09375 -1.019531 3.695312 -0.8125 4.46875 -0.8125 C 4.914062 -0.8125 5.347656 -0.863281 5.765625 -0.96875 C 6.191406 -1.082031 6.613281 -1.25 7.03125 -1.46875 L 7.03125 -0.359375 C 6.613281 -0.179688 6.179688 -0.046875 5.734375 0.046875 C 5.296875 0.140625 4.851562 0.1875 4.40625 0.1875 C 3.269531 0.1875 2.367188 -0.140625 1.703125 -0.796875 C 1.046875 -1.460938 0.71875 -2.359375 0.71875 -3.484375 C 0.71875 -4.648438 1.03125 -5.570312 1.65625 -6.25 C 2.289062 -6.9375 3.140625 -7.28125 4.203125 -7.28125 C 5.160156 -7.28125 5.914062 -6.972656 6.46875 -6.359375 C 7.03125 -5.742188 7.3125 -4.90625 7.3125 -3.84375 Z M 6.140625 -4.1875 C 6.128906 -4.820312 5.945312 -5.332031 5.59375 -5.71875 C 5.25 -6.101562 4.789062 -6.296875 4.21875 -6.296875 C 3.5625 -6.296875 3.035156 -6.109375 2.640625 -5.734375 C 2.253906 -5.367188 2.03125 -4.851562 1.96875 -4.1875 Z M 6.140625 -4.1875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-9">
+<path style="stroke:none;" d="M 6.328125 -3.546875 C 6.328125 -4.410156 6.148438 -5.082031 5.796875 -5.5625 C 5.441406 -6.050781 4.957031 -6.296875 4.34375 -6.296875 C 3.726562 -6.296875 3.242188 -6.050781 2.890625 -5.5625 C 2.535156 -5.082031 2.359375 -4.410156 2.359375 -3.546875 C 2.359375 -2.691406 2.535156 -2.019531 2.890625 -1.53125 C 3.242188 -1.039062 3.726562 -0.796875 4.34375 -0.796875 C 4.957031 -0.796875 5.441406 -1.039062 5.796875 -1.53125 C 6.148438 -2.019531 6.328125 -2.691406 6.328125 -3.546875 Z M 2.359375 -6.03125 C 2.597656 -6.457031 2.90625 -6.769531 3.28125 -6.96875 C 3.65625 -7.175781 4.101562 -7.28125 4.625 -7.28125 C 5.488281 -7.28125 6.191406 -6.9375 6.734375 -6.25 C 7.273438 -5.5625 7.546875 -4.660156 7.546875 -3.546875 C 7.546875 -2.429688 7.273438 -1.53125 6.734375 -0.84375 C 6.191406 -0.15625 5.488281 0.1875 4.625 0.1875 C 4.101562 0.1875 3.65625 0.0820312 3.28125 -0.125 C 2.90625 -0.332031 2.597656 -0.644531 2.359375 -1.0625 L 2.359375 0 L 1.1875 0 L 1.1875 -9.875 L 2.359375 -9.875 Z M 2.359375 -6.03125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-10">
+<path style="stroke:none;" d="M 1.21875 -7.109375 L 2.390625 -7.109375 L 2.390625 0 L 1.21875 0 Z M 1.21875 -9.875 L 2.390625 -9.875 L 2.390625 -8.390625 L 1.21875 -8.390625 Z M 1.21875 -9.875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-11">
+<path style="stroke:none;" d="M 7.140625 -4.296875 L 7.140625 0 L 5.96875 0 L 5.96875 -4.25 C 5.96875 -4.925781 5.835938 -5.429688 5.578125 -5.765625 C 5.316406 -6.097656 4.921875 -6.265625 4.390625 -6.265625 C 3.765625 -6.265625 3.269531 -6.0625 2.90625 -5.65625 C 2.539062 -5.257812 2.359375 -4.710938 2.359375 -4.015625 L 2.359375 0 L 1.1875 0 L 1.1875 -7.109375 L 2.359375 -7.109375 L 2.359375 -6 C 2.640625 -6.425781 2.96875 -6.742188 3.34375 -6.953125 C 3.71875 -7.171875 4.15625 -7.28125 4.65625 -7.28125 C 5.46875 -7.28125 6.082031 -7.023438 6.5 -6.515625 C 6.925781 -6.015625 7.140625 -5.273438 7.140625 -4.296875 Z M 7.140625 -4.296875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-12">
+<path style="stroke:none;" d="M 5.90625 -3.640625 C 5.90625 -4.484375 5.726562 -5.132812 5.375 -5.59375 C 5.03125 -6.0625 4.539062 -6.296875 3.90625 -6.296875 C 3.28125 -6.296875 2.789062 -6.0625 2.4375 -5.59375 C 2.09375 -5.132812 1.921875 -4.484375 1.921875 -3.640625 C 1.921875 -2.796875 2.09375 -2.140625 2.4375 -1.671875 C 2.789062 -1.210938 3.28125 -0.984375 3.90625 -0.984375 C 4.539062 -0.984375 5.03125 -1.210938 5.375 -1.671875 C 5.726562 -2.140625 5.90625 -2.796875 5.90625 -3.640625 Z M 7.078125 -0.875 C 7.078125 0.332031 6.804688 1.226562 6.265625 1.8125 C 5.722656 2.40625 4.898438 2.703125 3.796875 2.703125 C 3.390625 2.703125 3.003906 2.671875 2.640625 2.609375 C 2.273438 2.546875 1.921875 2.453125 1.578125 2.328125 L 1.578125 1.1875 C 1.921875 1.375 2.257812 1.507812 2.59375 1.59375 C 2.925781 1.6875 3.265625 1.734375 3.609375 1.734375 C 4.378906 1.734375 4.953125 1.535156 5.328125 1.140625 C 5.710938 0.742188 5.90625 0.140625 5.90625 -0.671875 L 5.90625 -1.25 C 5.664062 -0.832031 5.351562 -0.519531 4.96875 -0.3125 C 4.59375 -0.101562 4.144531 0 3.625 0 C 2.75 0 2.046875 -0.332031 1.515625 -1 C 0.984375 -1.664062 0.71875 -2.546875 0.71875 -3.640625 C 0.71875 -4.734375 0.984375 -5.613281 1.515625 -6.28125 C 2.046875 -6.945312 2.75 -7.28125 3.625 -7.28125 C 4.144531 -7.28125 4.59375 -7.175781 4.96875 -6.96875 C 5.351562 -6.757812 5.664062 -6.445312 5.90625 -6.03125 L 5.90625 -7.109375 L 7.078125 -7.109375 Z M 7.078125 -0.875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-13">
+<path style="stroke:none;" d="M 5.125 -8.609375 C 4.1875 -8.609375 3.441406 -8.257812 2.890625 -7.5625 C 2.347656 -6.875 2.078125 -5.929688 2.078125 -4.734375 C 2.078125 -3.535156 2.347656 -2.585938 2.890625 -1.890625 C 3.441406 -1.203125 4.1875 -0.859375 5.125 -0.859375 C 6.050781 -0.859375 6.785156 -1.203125 7.328125 -1.890625 C 7.878906 -2.585938 8.15625 -3.535156 8.15625 -4.734375 C 8.15625 -5.929688 7.878906 -6.875 7.328125 -7.5625 C 6.785156 -8.257812 6.050781 -8.609375 5.125 -8.609375 Z M 5.125 -9.65625 C 6.445312 -9.65625 7.503906 -9.207031 8.296875 -8.3125 C 9.097656 -7.414062 9.5 -6.222656 9.5 -4.734375 C 9.5 -3.234375 9.097656 -2.035156 8.296875 -1.140625 C 7.503906 -0.253906 6.445312 0.1875 5.125 0.1875 C 3.789062 0.1875 2.722656 -0.253906 1.921875 -1.140625 C 1.128906 -2.035156 0.734375 -3.234375 0.734375 -4.734375 C 0.734375 -6.222656 1.128906 -7.414062 1.921875 -8.3125 C 2.722656 -9.207031 3.789062 -9.65625 5.125 -9.65625 Z M 5.125 -9.65625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-14">
+<path style="stroke:none;" d="M 1.109375 -2.8125 L 1.109375 -7.109375 L 2.265625 -7.109375 L 2.265625 -2.84375 C 2.265625 -2.175781 2.394531 -1.671875 2.65625 -1.328125 C 2.925781 -0.992188 3.320312 -0.828125 3.84375 -0.828125 C 4.476562 -0.828125 4.976562 -1.023438 5.34375 -1.421875 C 5.707031 -1.828125 5.890625 -2.378906 5.890625 -3.078125 L 5.890625 -7.109375 L 7.0625 -7.109375 L 7.0625 0 L 5.890625 0 L 5.890625 -1.09375 C 5.609375 -0.65625 5.28125 -0.332031 4.90625 -0.125 C 4.53125 0.0820312 4.09375 0.1875 3.59375 0.1875 C 2.78125 0.1875 2.160156 -0.0664062 1.734375 -0.578125 C 1.316406 -1.085938 1.109375 -1.832031 1.109375 -2.8125 Z M 4.046875 -7.28125 Z M 4.046875 -7.28125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-15">
+<path style="stroke:none;" d="M 2.375 -9.125 L 2.375 -7.109375 L 4.78125 -7.109375 L 4.78125 -6.203125 L 2.375 -6.203125 L 2.375 -2.34375 C 2.375 -1.757812 2.453125 -1.382812 2.609375 -1.21875 C 2.773438 -1.0625 3.101562 -0.984375 3.59375 -0.984375 L 4.78125 -0.984375 L 4.78125 0 L 3.59375 0 C 2.6875 0 2.0625 -0.164062 1.71875 -0.5 C 1.375 -0.84375 1.203125 -1.457031 1.203125 -2.34375 L 1.203125 -6.203125 L 0.34375 -6.203125 L 0.34375 -7.109375 L 1.203125 -7.109375 L 1.203125 -9.125 Z M 2.375 -9.125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-16">
+<path style="stroke:none;" d=""/>
+</symbol>
+<symbol overflow="visible" id="glyph2-17">
+<path style="stroke:none;" d="M 1.28125 -9.484375 L 6.71875 -9.484375 L 6.71875 -8.390625 L 2.5625 -8.390625 L 2.5625 -5.609375 L 6.3125 -5.609375 L 6.3125 -4.53125 L 2.5625 -4.53125 L 2.5625 0 L 1.28125 0 Z M 1.28125 -9.484375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-18">
+<path style="stroke:none;" d="M 6.765625 -5.75 C 7.054688 -6.269531 7.40625 -6.65625 7.8125 -6.90625 C 8.21875 -7.15625 8.695312 -7.28125 9.25 -7.28125 C 9.988281 -7.28125 10.554688 -7.019531 10.953125 -6.5 C 11.359375 -5.976562 11.5625 -5.242188 11.5625 -4.296875 L 11.5625 0 L 10.390625 0 L 10.390625 -4.25 C 10.390625 -4.9375 10.265625 -5.441406 10.015625 -5.765625 C 9.773438 -6.097656 9.410156 -6.265625 8.921875 -6.265625 C 8.316406 -6.265625 7.835938 -6.0625 7.484375 -5.65625 C 7.128906 -5.257812 6.953125 -4.710938 6.953125 -4.015625 L 6.953125 0 L 5.78125 0 L 5.78125 -4.25 C 5.78125 -4.9375 5.660156 -5.441406 5.421875 -5.765625 C 5.179688 -6.097656 4.804688 -6.265625 4.296875 -6.265625 C 3.703125 -6.265625 3.226562 -6.0625 2.875 -5.65625 C 2.53125 -5.25 2.359375 -4.703125 2.359375 -4.015625 L 2.359375 0 L 1.1875 0 L 1.1875 -7.109375 L 2.359375 -7.109375 L 2.359375 -6 C 2.617188 -6.4375 2.9375 -6.757812 3.3125 -6.96875 C 3.6875 -7.175781 4.128906 -7.28125 4.640625 -7.28125 C 5.160156 -7.28125 5.597656 -7.148438 5.953125 -6.890625 C 6.316406 -6.628906 6.585938 -6.25 6.765625 -5.75 Z M 6.765625 -5.75 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-19">
+<path style="stroke:none;" d="M 6.953125 -9.171875 L 6.953125 -7.921875 C 6.472656 -8.148438 6.015625 -8.320312 5.578125 -8.4375 C 5.148438 -8.550781 4.734375 -8.609375 4.328125 -8.609375 C 3.628906 -8.609375 3.085938 -8.472656 2.703125 -8.203125 C 2.328125 -7.929688 2.140625 -7.546875 2.140625 -7.046875 C 2.140625 -6.628906 2.265625 -6.3125 2.515625 -6.09375 C 2.773438 -5.882812 3.253906 -5.710938 3.953125 -5.578125 L 4.734375 -5.421875 C 5.679688 -5.234375 6.382812 -4.910156 6.84375 -4.453125 C 7.300781 -3.992188 7.53125 -3.378906 7.53125 -2.609375 C 7.53125 -1.691406 7.222656 -0.992188 6.609375 -0.515625 C 5.992188 -0.046875 5.085938 0.1875 3.890625 0.1875 C 3.441406 0.1875 2.960938 0.132812 2.453125 0.03125 C 1.953125 -0.0703125 1.429688 -0.222656 0.890625 -0.421875 L 0.890625 -1.734375 C 1.410156 -1.441406 1.921875 -1.222656 2.421875 -1.078125 C 2.921875 -0.929688 3.410156 -0.859375 3.890625 -0.859375 C 4.628906 -0.859375 5.195312 -1 5.59375 -1.28125 C 5.988281 -1.570312 6.1875 -1.984375 6.1875 -2.515625 C 6.1875 -2.984375 6.039062 -3.347656 5.75 -3.609375 C 5.46875 -3.867188 5.003906 -4.066406 4.359375 -4.203125 L 3.578125 -4.359375 C 2.617188 -4.546875 1.925781 -4.84375 1.5 -5.25 C 1.070312 -5.65625 0.859375 -6.21875 0.859375 -6.9375 C 0.859375 -7.78125 1.148438 -8.441406 1.734375 -8.921875 C 2.328125 -9.410156 3.144531 -9.65625 4.1875 -9.65625 C 4.625 -9.65625 5.070312 -9.613281 5.53125 -9.53125 C 6 -9.445312 6.472656 -9.328125 6.953125 -9.171875 Z M 6.953125 -9.171875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-20">
+<path style="stroke:none;" d="M 4.1875 0.65625 C 3.851562 1.507812 3.53125 2.0625 3.21875 2.3125 C 2.90625 2.570312 2.488281 2.703125 1.96875 2.703125 L 1.03125 2.703125 L 1.03125 1.734375 L 1.71875 1.734375 C 2.039062 1.734375 2.289062 1.65625 2.46875 1.5 C 2.644531 1.34375 2.835938 0.984375 3.046875 0.421875 L 3.265625 -0.109375 L 0.390625 -7.109375 L 1.625 -7.109375 L 3.84375 -1.546875 L 6.0625 -7.109375 L 7.3125 -7.109375 Z M 4.1875 0.65625 "/>
+</symbol>
+</g>
+</defs>
+<g id="surface268880">
+<rect x="0" y="0" width="774" height="152" style="fill:rgb(100%,100%,100%);fill-opacity:1;stroke:none;"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 21.75297 10.408118 L 26.433829 10.408118 L 26.433829 12.281165 L 21.75297 12.281165 Z M 21.75297 10.408118 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 29.079728 10.51222 L 32.829728 10.51222 L 32.829728 12.149915 L 29.079728 12.149915 Z M 29.079728 10.51222 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph0-1" x="20.171875" y="57.705621"/>
+  <use xlink:href="#glyph0-2" x="30.171875" y="57.705621"/>
+  <use xlink:href="#glyph0-3" x="40.171875" y="57.705621"/>
+  <use xlink:href="#glyph0-4" x="50.171875" y="57.705621"/>
+  <use xlink:href="#glyph0-5" x="60.171875" y="57.705621"/>
+  <use xlink:href="#glyph0-6" x="70.171875" y="57.705621"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph0-7" x="174.203125" y="60.053277"/>
+  <use xlink:href="#glyph0-8" x="184.203125" y="60.053277"/>
+</g>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 40.925236 10.544446 L 44.675236 10.544446 L 44.675236 12.090345 L 40.925236 12.090345 Z M 40.925236 10.544446 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 34.883439 10.536634 L 38.633439 10.536634 L 38.633439 12.120032 L 34.883439 12.120032 Z M 34.883439 10.536634 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 47.084806 10.484876 L 52.045743 10.484876 L 52.045743 12.130774 L 47.084806 12.130774 Z M 47.084806 10.484876 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 53.980118 10.376868 L 59.866642 10.376868 L 59.866642 12.279603 L 53.980118 12.279603 Z M 53.980118 10.376868 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 54.048478 13.825501 L 59.868009 13.825501 L 59.868009 15.490345 L 54.048478 15.490345 Z M 54.048478 13.825501 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 26.481876 11.338001 L 28.593009 11.332337 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 28.968009 11.33136 L 28.468595 11.582728 L 28.593009 11.332337 L 28.467228 11.082728 Z M 28.968009 11.33136 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 32.876798 11.329798 L 34.396525 11.328626 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 34.771525 11.328431 L 34.27172 11.578821 L 34.396525 11.328626 L 34.271329 11.078821 Z M 34.771525 11.328431 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 38.633439 11.328431 L 40.438517 11.319642 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 40.813517 11.317884 L 40.314689 11.570228 L 40.438517 11.319642 L 40.312345 11.070423 Z M 40.813517 11.317884 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 44.675236 11.317298 L 46.597892 11.309876 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 46.972892 11.308313 L 46.473868 11.560267 L 46.597892 11.309876 L 46.471915 11.060267 Z M 46.972892 11.308313 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 52.045743 11.307923 L 53.4934 11.323157 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 53.8684 11.327063 L 53.365861 11.57179 L 53.4934 11.323157 L 53.370939 11.07179 Z M 53.8684 11.327063 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph0-9" x="286.757813" y="60.611871"/>
+  <use xlink:href="#glyph0-10" x="296.757813" y="60.611871"/>
+  <use xlink:href="#glyph0-1" x="306.757813" y="60.611871"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph0-11" x="405.660156" y="59.904839"/>
+  <use xlink:href="#glyph0-10" x="415.660156" y="59.904839"/>
+  <use xlink:href="#glyph0-12" x="425.660156" y="59.904839"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph1-1" x="511.308594" y="58.064616"/>
+  <use xlink:href="#glyph1-2" x="517.308757" y="58.064616"/>
+  <use xlink:href="#glyph1-3" x="523.308919" y="58.064616"/>
+  <use xlink:href="#glyph1-4" x="529.309082" y="58.064616"/>
+  <use xlink:href="#glyph1-5" x="535.309245" y="58.064616"/>
+  <use xlink:href="#glyph1-6" x="541.309408" y="58.064616"/>
+  <use xlink:href="#glyph1-7" x="547.30957" y="58.064616"/>
+  <use xlink:href="#glyph1-8" x="553.309733" y="58.064616"/>
+  <use xlink:href="#glyph1-9" x="559.309896" y="58.064616"/>
+  <use xlink:href="#glyph1-10" x="565.310059" y="58.064616"/>
+  <use xlink:href="#glyph1-11" x="571.310221" y="58.064616"/>
+  <use xlink:href="#glyph1-12" x="577.310384" y="58.064616"/>
+  <use xlink:href="#glyph1-13" x="583.310547" y="58.064616"/>
+  <use xlink:href="#glyph1-8" x="589.31071" y="58.064616"/>
+  <use xlink:href="#glyph1-14" x="595.310872" y="58.064616"/>
+</g>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 45.671915 11.342298 L 45.655704 11.342298 L 45.655704 14.657923 L 53.561759 14.657923 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 53.936759 14.657923 L 53.436759 14.907923 L 53.561759 14.657923 L 53.436759 14.407923 Z M 53.936759 14.657923 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph1-15" x="657.078125" y="57.724772"/>
+  <use xlink:href="#glyph1-16" x="663.078288" y="57.724772"/>
+  <use xlink:href="#glyph1-10" x="669.078451" y="57.724772"/>
+  <use xlink:href="#glyph1-6" x="675.078613" y="57.724772"/>
+  <use xlink:href="#glyph1-8" x="681.078776" y="57.724772"/>
+  <use xlink:href="#glyph1-17" x="687.078939" y="57.724772"/>
+  <use xlink:href="#glyph1-11" x="693.079102" y="57.724772"/>
+  <use xlink:href="#glyph1-18" x="699.079264" y="57.724772"/>
+  <use xlink:href="#glyph1-19" x="705.079427" y="57.724772"/>
+  <use xlink:href="#glyph1-4" x="711.07959" y="57.724772"/>
+  <use xlink:href="#glyph1-20" x="717.079753" y="57.724772"/>
+  <use xlink:href="#glyph1-21" x="723.079915" y="57.724772"/>
+  <use xlink:href="#glyph1-22" x="729.080078" y="57.724772"/>
+  <use xlink:href="#glyph1-23" x="735.080241" y="57.724772"/>
+  <use xlink:href="#glyph1-21" x="741.080404" y="57.724772"/>
+  <use xlink:href="#glyph1-22" x="747.080566" y="57.724772"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph1-24" x="673.335938" y="124.170085"/>
+  <use xlink:href="#glyph1-11" x="679.3361" y="124.170085"/>
+  <use xlink:href="#glyph1-13" x="685.336263" y="124.170085"/>
+  <use xlink:href="#glyph1-8" x="691.336426" y="124.170085"/>
+  <use xlink:href="#glyph1-4" x="697.336589" y="124.170085"/>
+  <use xlink:href="#glyph1-20" x="703.336751" y="124.170085"/>
+  <use xlink:href="#glyph1-21" x="709.336914" y="124.170085"/>
+  <use xlink:href="#glyph1-22" x="715.337077" y="124.170085"/>
+  <use xlink:href="#glyph1-23" x="721.33724" y="124.170085"/>
+  <use xlink:href="#glyph1-21" x="727.337402" y="124.170085"/>
+  <use xlink:href="#glyph1-22" x="733.337565" y="124.170085"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph2-1" x="168.71875" y="31.959093"/>
+  <use xlink:href="#glyph2-2" x="175.866102" y="31.959093"/>
+  <use xlink:href="#glyph2-3" x="180.92551" y="31.959093"/>
+  <use xlink:href="#glyph2-4" x="188.879069" y="31.959093"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph2-5" x="288.109375" y="31.681749"/>
+  <use xlink:href="#glyph2-1" x="294.882378" y="31.681749"/>
+  <use xlink:href="#glyph2-6" x="302.029731" y="31.681749"/>
+  <use xlink:href="#glyph2-7" x="309.996039" y="31.681749"/>
+  <use xlink:href="#glyph2-8" x="313.607964" y="31.681749"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph2-5" x="535.988281" y="33.365343"/>
+  <use xlink:href="#glyph2-1" x="542.761285" y="33.365343"/>
+  <use xlink:href="#glyph2-6" x="549.908637" y="33.365343"/>
+  <use xlink:href="#glyph2-7" x="557.874946" y="33.365343"/>
+  <use xlink:href="#glyph2-8" x="561.486871" y="33.365343"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph2-9" x="26.695313" y="32.365343"/>
+  <use xlink:href="#glyph2-10" x="34.947266" y="32.365343"/>
+  <use xlink:href="#glyph2-11" x="38.559191" y="32.365343"/>
+  <use xlink:href="#glyph2-11" x="46.798394" y="32.365343"/>
+  <use xlink:href="#glyph2-10" x="55.037598" y="32.365343"/>
+  <use xlink:href="#glyph2-11" x="58.649523" y="32.365343"/>
+  <use xlink:href="#glyph2-12" x="66.888726" y="32.365343"/>
+</g>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-dasharray:0.14,0.14;stroke-miterlimit:10;" d="M 45.300431 9.486438 L 60.373478 9.486438 L 60.373478 16.175696 L 45.300431 16.175696 Z M 45.300431 9.486438 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph2-13" x="532.003906" y="11.904405"/>
+  <use xlink:href="#glyph2-14" x="542.236382" y="11.904405"/>
+  <use xlink:href="#glyph2-15" x="550.475586" y="11.904405"/>
+  <use xlink:href="#glyph2-4" x="555.5727" y="11.904405"/>
+  <use xlink:href="#glyph2-14" x="563.824653" y="11.904405"/>
+  <use xlink:href="#glyph2-15" x="572.063856" y="11.904405"/>
+  <use xlink:href="#glyph2-16" x="577.16097" y="11.904405"/>
+  <use xlink:href="#glyph2-17" x="581.293186" y="11.904405"/>
+  <use xlink:href="#glyph2-3" x="588.307346" y="11.904405"/>
+  <use xlink:href="#glyph2-2" x="596.260905" y="11.904405"/>
+  <use xlink:href="#glyph2-18" x="601.377279" y="11.904405"/>
+  <use xlink:href="#glyph2-6" x="614.040853" y="11.904405"/>
+  <use xlink:href="#glyph2-15" x="622.007161" y="11.904405"/>
+  <use xlink:href="#glyph2-15" x="627.104275" y="11.904405"/>
+  <use xlink:href="#glyph2-8" x="632.201389" y="11.904405"/>
+  <use xlink:href="#glyph2-2" x="640.199436" y="11.904405"/>
+  <use xlink:href="#glyph2-16" x="645.544217" y="11.904405"/>
+  <use xlink:href="#glyph2-19" x="649.676432" y="11.904405"/>
+  <use xlink:href="#glyph2-20" x="657.928385" y="11.904405"/>
+  <use xlink:href="#glyph2-5" x="665.621799" y="11.904405"/>
+  <use xlink:href="#glyph2-15" x="672.394803" y="11.904405"/>
+  <use xlink:href="#glyph2-8" x="677.491916" y="11.904405"/>
+  <use xlink:href="#glyph2-18" x="685.489963" y="11.904405"/>
+</g>
+</g>
+</svg>
diff --git a/Documentation/media/v4l-drivers/meye.rst b/Documentation/media/v4l-drivers/meye.rst
index a572996cdbf6..dc57a6a91b43 100644
--- a/Documentation/media/v4l-drivers/meye.rst
+++ b/Documentation/media/v4l-drivers/meye.rst
@@ -95,7 +95,7 @@ so all video4linux tools (like xawtv) should work with this driver.
 
 Besides the video4linux interface, the driver has a private interface
 for accessing the Motion Eye extended parameters (camera sharpness,
-agc, video framerate), the shapshot and the MJPEG capture facilities.
+agc, video framerate), the snapshot and the MJPEG capture facilities.
 
 This interface consists of several ioctls (prototypes and structures
 can be found in include/linux/meye.h):
diff --git a/Documentation/media/v4l-drivers/vimc.rst b/Documentation/media/v4l-drivers/vimc.rst
index 406417680db5..8f5d7f8d83bb 100644
--- a/Documentation/media/v4l-drivers/vimc.rst
+++ b/Documentation/media/v4l-drivers/vimc.rst
@@ -76,27 +76,19 @@ vimc-capture:
 	* 1 Pad sink
 	* 1 Pad source
 
-Module options
----------------
 
-Vimc has a few module parameters to configure the driver. You should pass
-those arguments to each subdevice, not to the vimc module. For example::
+Module options
+--------------
 
-        vimc_subdevice.param=value
+Vimc has a module parameter to configure the driver.
 
-* ``vimc_scaler.sca_mult=<unsigned int>``
+* ``sca_mult=<unsigned int>``
 
         Image size multiplier factor to be used to multiply both width and
         height, so the image size will be ``sca_mult^2`` bigger than the
         original one. Currently, only supports scaling up (the default value
         is 3).
 
-* ``vimc_debayer.deb_mean_win_size=<unsigned int>``
-
-        Window size to calculate the mean. Note: the window size needs to be an
-        odd number, as the main pixel stays in the center of the window,
-        otherwise the next odd number is considered (the default value is 3).
-
 Source code documentation
 -------------------------
 
diff --git a/Documentation/media/videodev2.h.rst.exceptions b/Documentation/media/videodev2.h.rst.exceptions
index adeb6b7a15cb..cb6ccf91776e 100644
--- a/Documentation/media/videodev2.h.rst.exceptions
+++ b/Documentation/media/videodev2.h.rst.exceptions
@@ -141,6 +141,10 @@ replace symbol V4L2_CTRL_TYPE_H264_PPS :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_H264_SCALING_MATRIX :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_H264_SLICE_PARAMS :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_H264_DECODE_PARAMS :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_HEVC_SPS :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_HEVC_PPS :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_AREA :c:type:`v4l2_ctrl_type`
 
 # V4L2 capability defines
 replace define V4L2_CAP_VIDEO_CAPTURE device-capabilities
@@ -434,6 +438,7 @@ replace define V4L2_DEC_CMD_START decoder-cmds
 replace define V4L2_DEC_CMD_STOP decoder-cmds
 replace define V4L2_DEC_CMD_PAUSE decoder-cmds
 replace define V4L2_DEC_CMD_RESUME decoder-cmds
+replace define V4L2_DEC_CMD_FLUSH decoder-cmds
 
 replace define V4L2_DEC_CMD_START_MUTE_AUDIO decoder-cmds
 replace define V4L2_DEC_CMD_PAUSE_TO_BLACK decoder-cmds
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 1adbb8a371c7..ec3b5865c1be 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -63,7 +63,6 @@ CONTENTS
 
      - Compiler barrier.
      - CPU memory barriers.
-     - MMIO write barrier.
 
  (*) Implicit kernel memory barriers.
 
@@ -75,7 +74,6 @@ CONTENTS
  (*) Inter-CPU acquiring barrier effects.
 
      - Acquires vs memory accesses.
-     - Acquires vs I/O accesses.
 
  (*) Where are memory barriers needed?
 
@@ -492,10 +490,9 @@ And a couple of implicit varieties:
      happen before it completes.
 
      The use of ACQUIRE and RELEASE operations generally precludes the need
-     for other sorts of memory barrier (but note the exceptions mentioned in
-     the subsection "MMIO write barrier").  In addition, a RELEASE+ACQUIRE
-     pair is -not- guaranteed to act as a full memory barrier.  However, after
-     an ACQUIRE on a given variable, all memory accesses preceding any prior
+     for other sorts of memory barrier.  In addition, a RELEASE+ACQUIRE pair is
+     -not- guaranteed to act as a full memory barrier.  However, after an
+     ACQUIRE on a given variable, all memory accesses preceding any prior
      RELEASE on that same variable are guaranteed to be visible.  In other
      words, within a given variable's critical section, all accesses of all
      previous critical sections for that variable are guaranteed to have
@@ -1512,8 +1509,6 @@ levels:
 
   (*) CPU memory barriers.
 
-  (*) MMIO write barrier.
-
 
 COMPILER BARRIER
 ----------------
diff --git a/Documentation/mips/ingenic-tcu.rst b/Documentation/mips/ingenic-tcu.rst
index c4ef4c45aade..c5a646b14450 100644
--- a/Documentation/mips/ingenic-tcu.rst
+++ b/Documentation/mips/ingenic-tcu.rst
@@ -68,4 +68,4 @@ and frameworks can be controlled from the same registers, all of these
 drivers access their registers through the same regmap.
 
 For more information regarding the devicetree bindings of the TCU drivers,
-have a look at Documentation/devicetree/bindings/mfd/ingenic,tcu.txt.
+have a look at Documentation/devicetree/bindings/timer/ingenic,tcu.txt.
diff --git a/Documentation/misc-devices/xilinx_sdfec.rst b/Documentation/misc-devices/xilinx_sdfec.rst
new file mode 100644
index 000000000000..7a47075c171c
--- /dev/null
+++ b/Documentation/misc-devices/xilinx_sdfec.rst
@@ -0,0 +1,292 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+====================
+Xilinx SD-FEC Driver
+====================
+
+Overview
+========
+
+This driver supports SD-FEC Integrated Block for Zynq |Ultrascale+ (TM)| RFSoCs.
+
+.. |Ultrascale+ (TM)| unicode:: Ultrascale+ U+2122
+   .. with trademark sign
+
+For a full description of SD-FEC core features, see the `SD-FEC Product Guide (PG256) <https://www.xilinx.com/cgi-bin/docs/ipdoc?c=sd_fec;v=latest;d=pg256-sdfec-integrated-block.pdf>`_
+
+This driver supports the following features:
+
+  - Retrieval of the Integrated Block configuration and status information
+  - Configuration of LDPC codes
+  - Configuration of Turbo decoding
+  - Monitoring errors
+
+Missing features, known issues, and limitations of the SD-FEC driver are as
+follows:
+
+  - Only allows a single open file handler to any instance of the driver at any time
+  - Reset of the SD-FEC Integrated Block is not controlled by this driver
+  - Does not support shared LDPC code table wraparound
+
+The device tree entry is described in:
+`linux-xlnx/Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt <https://github.com/Xilinx/linux-xlnx/blob/master/Documentation/devicetree/bindings/misc/xlnx%2Csd-fec.txt>`_
+
+
+Modes of Operation
+------------------
+
+The driver works with the SD-FEC core in two modes of operation:
+
+  - Run-time configuration
+  - Programmable Logic (PL) initialization
+
+
+Run-time Configuration
+~~~~~~~~~~~~~~~~~~~~~~
+
+For Run-time configuration the role of driver is to allow the software application to do the following:
+
+	- Load the configuration parameters for either Turbo decode or LDPC encode or decode
+	- Activate the SD-FEC core
+	- Monitor the SD-FEC core for errors
+	- Retrieve the status and configuration of the SD-FEC core
+
+Programmable Logic (PL) Initialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For PL initialization, supporting logic loads configuration parameters for either
+the Turbo decode or LDPC encode or decode.  The role of the driver is to allow
+the software application to do the following:
+
+	- Activate the SD-FEC core
+	- Monitor the SD-FEC core for errors
+	- Retrieve the status and configuration of the SD-FEC core
+
+
+Driver Structure
+================
+
+The driver provides a platform device where the ``probe`` and ``remove``
+operations are provided.
+
+  - probe: Updates configuration register with device-tree entries plus determines the current activate state of the core, for example, is the core bypassed or has the core been started.
+
+
+The driver defines the following driver file operations to provide user
+application interfaces:
+
+  - open: Implements restriction that only a single file descriptor can be open per SD-FEC instance at any time
+  - release: Allows another file descriptor to be open, that is after current file descriptor is closed
+  - poll: Provides a method to monitor for SD-FEC Error events
+  - unlocked_ioctl: Provides the the following ioctl commands that allows the application configure the SD-FEC core:
+
+		- :c:macro:`XSDFEC_START_DEV`
+		- :c:macro:`XSDFEC_STOP_DEV`
+		- :c:macro:`XSDFEC_GET_STATUS`
+		- :c:macro:`XSDFEC_SET_IRQ`
+		- :c:macro:`XSDFEC_SET_TURBO`
+		- :c:macro:`XSDFEC_ADD_LDPC_CODE_PARAMS`
+		- :c:macro:`XSDFEC_GET_CONFIG`
+		- :c:macro:`XSDFEC_SET_ORDER`
+		- :c:macro:`XSDFEC_SET_BYPASS`
+		- :c:macro:`XSDFEC_IS_ACTIVE`
+		- :c:macro:`XSDFEC_CLEAR_STATS`
+		- :c:macro:`XSDFEC_SET_DEFAULT_CONFIG`
+
+
+Driver Usage
+============
+
+
+Overview
+--------
+
+After opening the driver, the user should find out what operations need to be
+performed to configure and activate the SD-FEC core and determine the
+configuration of the driver.
+The following outlines the flow the user should perform:
+
+  - Determine Configuration
+  - Set the order, if not already configured as desired
+  - Set Turbo decode, LPDC encode or decode parameters, depending on how the
+    SD-FEC core is configured plus if the SD-FEC has not been configured for PL
+    initialization
+  - Enable interrupts, if not already enabled
+  - Bypass the SD-FEC core, if required
+  - Start the SD-FEC core if not already started
+  - Get the SD-FEC core status
+  - Monitor for interrupts
+  - Stop the SD-FEC core
+
+
+Note: When monitoring for interrupts if a critical error is detected where a reset is required, the driver will be required to load the default configuration.
+
+
+Determine Configuration
+-----------------------
+
+Determine the configuration of the SD-FEC core by using the ioctl
+:c:macro:`XSDFEC_GET_CONFIG`.
+
+Set the Order
+-------------
+
+Setting the order determines how the order of Blocks can change from input to output.
+
+Setting the order is done by using the ioctl :c:macro:`XSDFEC_SET_ORDER`
+
+Setting the order can only be done if the following restrictions are met:
+
+	- The ``state`` member of struct :c:type:`xsdfec_status <xsdfec_status>` filled by the ioctl :c:macro:`XSDFEC_GET_STATUS` indicates the SD-FEC core has not STARTED
+
+
+Add LDPC Codes
+--------------
+
+The following steps indicate how to add LDPC codes to the SD-FEC core:
+
+	- Use the auto-generated parameters to fill the :c:type:`struct xsdfec_ldpc_params <xsdfec_ldpc_params>` for the desired LDPC code.
+	- Set the SC, QA, and LA table offsets for the LPDC parameters and the parameters in the structure :c:type:`struct xsdfec_ldpc_params <xsdfec_ldpc_params>`
+	- Set the desired Code Id value in the structure :c:type:`struct xsdfec_ldpc_params <xsdfec_ldpc_params>`
+	- Add the LPDC Code Parameters using the ioctl :c:macro:`XSDFEC_ADD_LDPC_CODE_PARAMS`
+	- For the applied LPDC Code Parameter use the function :c:func:`xsdfec_calculate_shared_ldpc_table_entry_size` to calculate the size of shared LPDC code tables. This allows the user to determine the shared table usage so when selecting the table offsets for the next LDPC code parameters unused table areas can be selected.
+	- Repeat for each LDPC code parameter.
+
+Adding LDPC codes can only be done if the following restrictions are met:
+
+	- The ``code`` member of :c:type:`struct xsdfec_config <xsdfec_config>` filled by the ioctl :c:macro:`XSDFEC_GET_CONFIG` indicates the SD-FEC core is configured as LDPC
+	- The ``code_wr_protect`` of :c:type:`struct xsdfec_config <xsdfec_config>` filled by the ioctl :c:macro:`XSDFEC_GET_CONFIG` indicates that write protection is not enabled
+	- The ``state`` member of struct :c:type:`xsdfec_status <xsdfec_status>` filled by the ioctl :c:macro:`XSDFEC_GET_STATUS` indicates the SD-FEC core has not started
+
+Set Turbo Decode
+----------------
+
+Configuring the Turbo decode parameters is done by using the ioctl :c:macro:`XSDFEC_SET_TURBO` using auto-generated parameters to fill the :c:type:`struct xsdfec_turbo <xsdfec_turbo>` for the desired Turbo code.
+
+Adding Turbo decode can only be done if the following restrictions are met:
+
+	- The ``code`` member of :c:type:`struct xsdfec_config <xsdfec_config>` filled by the ioctl :c:macro:`XSDFEC_GET_CONFIG` indicates the SD-FEC core is configured as TURBO
+	- The ``state`` member of struct :c:type:`xsdfec_status <xsdfec_status>` filled by the ioctl :c:macro:`XSDFEC_GET_STATUS` indicates the SD-FEC core has not STARTED
+
+Enable Interrupts
+-----------------
+
+Enabling or disabling interrupts is done by using the ioctl :c:macro:`XSDFEC_SET_IRQ`. The members of the parameter passed, :c:type:`struct xsdfec_irq <xsdfec_irq>`, to the ioctl are used to set and clear different categories of interrupts. The category of interrupt is controlled as following:
+
+  - ``enable_isr`` controls the ``tlast`` interrupts
+  - ``enable_ecc_isr`` controls the ECC interrupts
+
+If the ``code`` member of :c:type:`struct xsdfec_config <xsdfec_config>` filled by the ioctl :c:macro:`XSDFEC_GET_CONFIG` indicates the SD-FEC core is configured as TURBO then the enabling ECC errors is not required.
+
+Bypass the SD-FEC
+-----------------
+
+Bypassing the SD-FEC is done by using the ioctl :c:macro:`XSDFEC_SET_BYPASS`
+
+Bypassing the SD-FEC can only be done if the following restrictions are met:
+
+	- The ``state`` member of :c:type:`struct xsdfec_status <xsdfec_status>` filled by the ioctl :c:macro:`XSDFEC_GET_STATUS` indicates the SD-FEC core has not STARTED
+
+Start the SD-FEC core
+---------------------
+
+Start the SD-FEC core by using the ioctl :c:macro:`XSDFEC_START_DEV`
+
+Get SD-FEC Status
+-----------------
+
+Get the SD-FEC status of the device by using the ioctl :c:macro:`XSDFEC_GET_STATUS`, which will fill the :c:type:`struct xsdfec_status <xsdfec_status>`
+
+Monitor for Interrupts
+----------------------
+
+	- Use the poll system call to monitor for an interrupt. The poll system call waits for an interrupt to wake it up or times out if no interrupt occurs.
+	- On return Poll ``revents`` will indicate whether stats and/or state have been updated
+		- ``POLLPRI`` indicates a critical error and the user should use :c:macro:`XSDFEC_GET_STATUS` and :c:macro:`XSDFEC_GET_STATS` to confirm
+		- ``POLLRDNORM`` indicates a non-critical error has occurred and the user should use  :c:macro:`XSDFEC_GET_STATS` to confirm
+	- Get stats by using the ioctl :c:macro:`XSDFEC_GET_STATS`
+		- For critical error the ``isr_err_count`` or ``uecc_count`` member  of :c:type:`struct xsdfec_stats <xsdfec_stats>` is non-zero
+		- For non-critical errors the ``cecc_count`` member of :c:type:`struct xsdfec_stats <xsdfec_stats>` is non-zero
+	- Get state by using the ioctl :c:macro:`XSDFEC_GET_STATUS`
+		- For a critical error the ``state`` of :c:type:`xsdfec_status <xsdfec_status>` will indicate a Reset Is Required
+	- Clear stats by using the ioctl :c:macro:`XSDFEC_CLEAR_STATS`
+
+If a critical error is detected where a reset is required. The application is required to call the ioctl :c:macro:`XSDFEC_SET_DEFAULT_CONFIG`, after the reset and it is not required to call the ioctl :c:macro:`XSDFEC_STOP_DEV`
+
+Note: Using poll system call prevents busy looping using :c:macro:`XSDFEC_GET_STATS` and :c:macro:`XSDFEC_GET_STATUS`
+
+Stop the SD-FEC Core
+---------------------
+
+Stop the device by using the ioctl :c:macro:`XSDFEC_STOP_DEV`
+
+Set the Default Configuration
+-----------------------------
+
+Load default configuration by using the ioctl :c:macro:`XSDFEC_SET_DEFAULT_CONFIG` to restore the driver.
+
+Limitations
+-----------
+
+Users should not duplicate SD-FEC device file handlers, for example fork() or dup() a process that has a created an SD-FEC file handler.
+
+Driver IOCTLs
+==============
+
+.. c:macro:: XSDFEC_START_DEV
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_START_DEV
+
+.. c:macro:: XSDFEC_STOP_DEV
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_STOP_DEV
+
+.. c:macro:: XSDFEC_GET_STATUS
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_GET_STATUS
+
+.. c:macro:: XSDFEC_SET_IRQ
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_SET_IRQ
+
+.. c:macro:: XSDFEC_SET_TURBO
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_SET_TURBO
+
+.. c:macro:: XSDFEC_ADD_LDPC_CODE_PARAMS
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_ADD_LDPC_CODE_PARAMS
+
+.. c:macro:: XSDFEC_GET_CONFIG
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_GET_CONFIG
+
+.. c:macro:: XSDFEC_SET_ORDER
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_SET_ORDER
+
+.. c:macro:: XSDFEC_SET_BYPASS
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_SET_BYPASS
+
+.. c:macro:: XSDFEC_IS_ACTIVE
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_IS_ACTIVE
+
+.. c:macro:: XSDFEC_CLEAR_STATS
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_CLEAR_STATS
+
+.. c:macro:: XSDFEC_GET_STATS
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_GET_STATS
+
+.. c:macro:: XSDFEC_SET_DEFAULT_CONFIG
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :doc: XSDFEC_SET_DEFAULT_CONFIG
+
+Driver Type Definitions
+=======================
+
+.. kernel-doc:: include/uapi/misc/xilinx_sdfec.h
+   :internal:
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index 83f7ae5fc045..5bc55a4e3bce 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -40,13 +40,13 @@ allocates memory for this UMEM using whatever means it feels is most
 appropriate (malloc, mmap, huge pages, etc). This memory area is then
 registered with the kernel using the new setsockopt XDP_UMEM_REG. The
 UMEM also has two rings: the FILL ring and the COMPLETION ring. The
-fill ring is used by the application to send down addr for the kernel
+FILL ring is used by the application to send down addr for the kernel
 to fill in with RX packet data. References to these frames will then
 appear in the RX ring once each packet has been received. The
-completion ring, on the other hand, contains frame addr that the
+COMPLETION ring, on the other hand, contains frame addr that the
 kernel has transmitted completely and can now be used again by user
 space, for either TX or RX. Thus, the frame addrs appearing in the
-completion ring are addrs that were previously transmitted using the
+COMPLETION ring are addrs that were previously transmitted using the
 TX ring. In summary, the RX and FILL rings are used for the RX path
 and the TX and COMPLETION rings are used for the TX path.
 
@@ -91,11 +91,16 @@ Concepts
 ========
 
 In order to use an AF_XDP socket, a number of associated objects need
-to be setup.
+to be setup. These objects and their options are explained in the
+following sections.
 
-Jonathan Corbet has also written an excellent article on LWN,
-"Accelerating networking with AF_XDP". It can be found at
-https://lwn.net/Articles/750845/.
+For an overview on how AF_XDP works, you can also take a look at the
+Linux Plumbers paper from 2018 on the subject:
+http://vger.kernel.org/lpc_net2018_talks/lpc18_paper_af_xdp_perf-v2.pdf. Do
+NOT consult the paper from 2017 on "AF_PACKET v4", the first attempt
+at AF_XDP. Nearly everything changed since then. Jonathan Corbet has
+also written an excellent article on LWN, "Accelerating networking
+with AF_XDP". It can be found at https://lwn.net/Articles/750845/.
 
 UMEM
 ----
@@ -113,22 +118,22 @@ the next socket B can do this by setting the XDP_SHARED_UMEM flag in
 struct sockaddr_xdp member sxdp_flags, and passing the file descriptor
 of A to struct sockaddr_xdp member sxdp_shared_umem_fd.
 
-The UMEM has two single-producer/single-consumer rings, that are used
+The UMEM has two single-producer/single-consumer rings that are used
 to transfer ownership of UMEM frames between the kernel and the
 user-space application.
 
 Rings
 -----
 
-There are a four different kind of rings: Fill, Completion, RX and
+There are a four different kind of rings: FILL, COMPLETION, RX and
 TX. All rings are single-producer/single-consumer, so the user-space
 application need explicit synchronization of multiple
 processes/threads are reading/writing to them.
 
-The UMEM uses two rings: Fill and Completion. Each socket associated
+The UMEM uses two rings: FILL and COMPLETION. Each socket associated
 with the UMEM must have an RX queue, TX queue or both. Say, that there
 is a setup with four sockets (all doing TX and RX). Then there will be
-one Fill ring, one Completion ring, four TX rings and four RX rings.
+one FILL ring, one COMPLETION ring, four TX rings and four RX rings.
 
 The rings are head(producer)/tail(consumer) based rings. A producer
 writes the data ring at the index pointed out by struct xdp_ring
@@ -146,7 +151,7 @@ The size of the rings need to be of size power of two.
 UMEM Fill Ring
 ~~~~~~~~~~~~~~
 
-The Fill ring is used to transfer ownership of UMEM frames from
+The FILL ring is used to transfer ownership of UMEM frames from
 user-space to kernel-space. The UMEM addrs are passed in the ring. As
 an example, if the UMEM is 64k and each chunk is 4k, then the UMEM has
 16 chunks and can pass addrs between 0 and 64k.
@@ -164,8 +169,8 @@ chunks mode, then the incoming addr will be left untouched.
 UMEM Completion Ring
 ~~~~~~~~~~~~~~~~~~~~
 
-The Completion Ring is used transfer ownership of UMEM frames from
-kernel-space to user-space. Just like the Fill ring, UMEM indicies are
+The COMPLETION Ring is used transfer ownership of UMEM frames from
+kernel-space to user-space. Just like the FILL ring, UMEM indices are
 used.
 
 Frames passed from the kernel to user-space are frames that has been
@@ -181,7 +186,7 @@ The RX ring is the receiving side of a socket. Each entry in the ring
 is a struct xdp_desc descriptor. The descriptor contains UMEM offset
 (addr) and the length of the data (len).
 
-If no frames have been passed to kernel via the Fill ring, no
+If no frames have been passed to kernel via the FILL ring, no
 descriptors will (or can) appear on the RX ring.
 
 The user application consumes struct xdp_desc descriptors from this
@@ -199,8 +204,24 @@ be relaxed in the future.
 The user application produces struct xdp_desc descriptors to this
 ring.
 
+Libbpf
+======
+
+Libbpf is a helper library for eBPF and XDP that makes using these
+technologies a lot simpler. It also contains specific helper functions
+in tools/lib/bpf/xsk.h for facilitating the use of AF_XDP. It
+contains two types of functions: those that can be used to make the
+setup of AF_XDP socket easier and ones that can be used in the data
+plane to access the rings safely and quickly. To see an example on how
+to use this API, please take a look at the sample application in
+samples/bpf/xdpsock_usr.c which uses libbpf for both setup and data
+plane operations.
+
+We recommend that you use this library unless you have become a power
+user. It will make your program a lot simpler.
+
 XSKMAP / BPF_MAP_TYPE_XSKMAP
-----------------------------
+============================
 
 On XDP side there is a BPF map type BPF_MAP_TYPE_XSKMAP (XSKMAP) that
 is used in conjunction with bpf_redirect_map() to pass the ingress
@@ -216,21 +237,202 @@ queue 17. Only the XDP program executing for eth0 and queue 17 will
 successfully pass data to the socket. Please refer to the sample
 application (samples/bpf/) in for an example.
 
+Configuration Flags and Socket Options
+======================================
+
+These are the various configuration flags that can be used to control
+and monitor the behavior of AF_XDP sockets.
+
+XDP_COPY and XDP_ZERO_COPY bind flags
+-------------------------------------
+
+When you bind to a socket, the kernel will first try to use zero-copy
+copy. If zero-copy is not supported, it will fall back on using copy
+mode, i.e. copying all packets out to user space. But if you would
+like to force a certain mode, you can use the following flags. If you
+pass the XDP_COPY flag to the bind call, the kernel will force the
+socket into copy mode. If it cannot use copy mode, the bind call will
+fail with an error. Conversely, the XDP_ZERO_COPY flag will force the
+socket into zero-copy mode or fail.
+
+XDP_SHARED_UMEM bind flag
+-------------------------
+
+This flag enables you to bind multiple sockets to the same UMEM, but
+only if they share the same queue id. In this mode, each socket has
+their own RX and TX rings, but the UMEM (tied to the fist socket
+created) only has a single FILL ring and a single COMPLETION
+ring. To use this mode, create the first socket and bind it in the normal
+way. Create a second socket and create an RX and a TX ring, or at
+least one of them, but no FILL or COMPLETION rings as the ones from
+the first socket will be used. In the bind call, set he
+XDP_SHARED_UMEM option and provide the initial socket's fd in the
+sxdp_shared_umem_fd field. You can attach an arbitrary number of extra
+sockets this way.
+
+What socket will then a packet arrive on? This is decided by the XDP
+program. Put all the sockets in the XSK_MAP and just indicate which
+index in the array you would like to send each packet to. A simple
+round-robin example of distributing packets is shown below:
+
+.. code-block:: c
+
+   #include <linux/bpf.h>
+   #include "bpf_helpers.h"
+
+   #define MAX_SOCKS 16
+
+   struct {
+        __uint(type, BPF_MAP_TYPE_XSKMAP);
+        __uint(max_entries, MAX_SOCKS);
+        __uint(key_size, sizeof(int));
+        __uint(value_size, sizeof(int));
+   } xsks_map SEC(".maps");
+
+   static unsigned int rr;
+
+   SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+   {
+	rr = (rr + 1) & (MAX_SOCKS - 1);
+
+	return bpf_redirect_map(&xsks_map, rr, XDP_DROP);
+   }
+
+Note, that since there is only a single set of FILL and COMPLETION
+rings, and they are single producer, single consumer rings, you need
+to make sure that multiple processes or threads do not use these rings
+concurrently. There are no synchronization primitives in the
+libbpf code that protects multiple users at this point in time.
+
+Libbpf uses this mode if you create more than one socket tied to the
+same umem. However, note that you need to supply the
+XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD libbpf_flag with the
+xsk_socket__create calls and load your own XDP program as there is no
+built in one in libbpf that will route the traffic for you.
+
+XDP_USE_NEED_WAKEUP bind flag
+-----------------------------
+
+This option adds support for a new flag called need_wakeup that is
+present in the FILL ring and the TX ring, the rings for which user
+space is a producer. When this option is set in the bind call, the
+need_wakeup flag will be set if the kernel needs to be explicitly
+woken up by a syscall to continue processing packets. If the flag is
+zero, no syscall is needed.
+
+If the flag is set on the FILL ring, the application needs to call
+poll() to be able to continue to receive packets on the RX ring. This
+can happen, for example, when the kernel has detected that there are no
+more buffers on the FILL ring and no buffers left on the RX HW ring of
+the NIC. In this case, interrupts are turned off as the NIC cannot
+receive any packets (as there are no buffers to put them in), and the
+need_wakeup flag is set so that user space can put buffers on the
+FILL ring and then call poll() so that the kernel driver can put these
+buffers on the HW ring and start to receive packets.
+
+If the flag is set for the TX ring, it means that the application
+needs to explicitly notify the kernel to send any packets put on the
+TX ring. This can be accomplished either by a poll() call, as in the
+RX path, or by calling sendto().
+
+An example of how to use this flag can be found in
+samples/bpf/xdpsock_user.c. An example with the use of libbpf helpers
+would look like this for the TX path:
+
+.. code-block:: c
+
+   if (xsk_ring_prod__needs_wakeup(&my_tx_ring))
+      sendto(xsk_socket__fd(xsk_handle), NULL, 0, MSG_DONTWAIT, NULL, 0);
+
+I.e., only use the syscall if the flag is set.
+
+We recommend that you always enable this mode as it usually leads to
+better performance especially if you run the application and the
+driver on the same core, but also if you use different cores for the
+application and the kernel driver, as it reduces the number of
+syscalls needed for the TX path.
+
+XDP_{RX|TX|UMEM_FILL|UMEM_COMPLETION}_RING setsockopts
+------------------------------------------------------
+
+These setsockopts sets the number of descriptors that the RX, TX,
+FILL, and COMPLETION rings respectively should have. It is mandatory
+to set the size of at least one of the RX and TX rings. If you set
+both, you will be able to both receive and send traffic from your
+application, but if you only want to do one of them, you can save
+resources by only setting up one of them. Both the FILL ring and the
+COMPLETION ring are mandatory as you need to have a UMEM tied to your
+socket. But if the XDP_SHARED_UMEM flag is used, any socket after the
+first one does not have a UMEM and should in that case not have any
+FILL or COMPLETION rings created as the ones from the shared umem will
+be used. Note, that the rings are single-producer single-consumer, so
+do not try to access them from multiple processes at the same
+time. See the XDP_SHARED_UMEM section.
+
+In libbpf, you can create Rx-only and Tx-only sockets by supplying
+NULL to the rx and tx arguments, respectively, to the
+xsk_socket__create function.
+
+If you create a Tx-only socket, we recommend that you do not put any
+packets on the fill ring. If you do this, drivers might think you are
+going to receive something when you in fact will not, and this can
+negatively impact performance.
+
+XDP_UMEM_REG setsockopt
+-----------------------
+
+This setsockopt registers a UMEM to a socket. This is the area that
+contain all the buffers that packet can recide in. The call takes a
+pointer to the beginning of this area and the size of it. Moreover, it
+also has parameter called chunk_size that is the size that the UMEM is
+divided into. It can only be 2K or 4K at the moment. If you have an
+UMEM area that is 128K and a chunk size of 2K, this means that you
+will be able to hold a maximum of 128K / 2K = 64 packets in your UMEM
+area and that your largest packet size can be 2K.
+
+There is also an option to set the headroom of each single buffer in
+the UMEM. If you set this to N bytes, it means that the packet will
+start N bytes into the buffer leaving the first N bytes for the
+application to use. The final option is the flags field, but it will
+be dealt with in separate sections for each UMEM flag.
+
+XDP_STATISTICS getsockopt
+-------------------------
+
+Gets drop statistics of a socket that can be useful for debug
+purposes. The supported statistics are shown below:
+
+.. code-block:: c
+
+   struct xdp_statistics {
+	  __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+	  __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
+	  __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+   };
+
+XDP_OPTIONS getsockopt
+----------------------
+
+Gets options from an XDP socket. The only one supported so far is
+XDP_OPTIONS_ZEROCOPY which tells you if zero-copy is on or not.
+
 Usage
 =====
 
-In order to use AF_XDP sockets there are two parts needed. The
+In order to use AF_XDP sockets two parts are needed. The
 user-space application and the XDP program. For a complete setup and
 usage example, please refer to the sample application. The user-space
 side is xdpsock_user.c and the XDP side is part of libbpf.
 
-The XDP code sample included in tools/lib/bpf/xsk.c is the following::
+The XDP code sample included in tools/lib/bpf/xsk.c is the following:
+
+.. code-block:: c
 
    SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
    {
        int index = ctx->rx_queue_index;
 
-       // A set entry here means that the correspnding queue_id
+       // A set entry here means that the corresponding queue_id
        // has an active AF_XDP socket bound to it.
        if (bpf_map_lookup_elem(&xsks_map, &index))
            return bpf_redirect_map(&xsks_map, index, 0);
@@ -238,7 +440,10 @@ The XDP code sample included in tools/lib/bpf/xsk.c is the following::
        return XDP_PASS;
    }
 
-Naive ring dequeue and enqueue could look like this::
+A simple but not so performance ring dequeue and enqueue could look
+like this:
+
+.. code-block:: c
 
     // struct xdp_rxtx_ring {
     // 	__u32 *producer;
@@ -287,17 +492,16 @@ Naive ring dequeue and enqueue could look like this::
         return 0;
     }
 
-
-For a more optimized version, please refer to the sample application.
+But please use the libbpf functions as they are optimized and ready to
+use. Will make your life easier.
 
 Sample application
 ==================
 
 There is a xdpsock benchmarking/test application included that
-demonstrates how to use AF_XDP sockets with both private and shared
-UMEMs. Say that you would like your UDP traffic from port 4242 to end
-up in queue 16, that we will enable AF_XDP on. Here, we use ethtool
-for this::
+demonstrates how to use AF_XDP sockets with private UMEMs. Say that
+you would like your UDP traffic from port 4242 to end up in queue 16,
+that we will enable AF_XDP on. Here, we use ethtool for this::
 
       ethtool -N p3p2 rx-flow-hash udp4 fn
       ethtool -N p3p2 flow-type udp4 src-port 4242 dst-port 4242 \
@@ -311,13 +515,18 @@ using::
 For XDP_SKB mode, use the switch "-S" instead of "-N" and all options
 can be displayed with "-h", as usual.
 
+This sample application uses libbpf to make the setup and usage of
+AF_XDP simpler. If you want to know how the raw uapi of AF_XDP is
+really used to make something more advanced, take a look at the libbpf
+code in tools/lib/bpf/xsk.[ch].
+
 FAQ
 =======
 
 Q: I am not seeing any traffic on the socket. What am I doing wrong?
 
 A: When a netdev of a physical NIC is initialized, Linux usually
-   allocates one Rx and Tx queue pair per core. So on a 8 core system,
+   allocates one RX and TX queue pair per core. So on a 8 core system,
    queue ids 0 to 7 will be allocated, one per core. In the AF_XDP
    bind call or the xsk_socket__create libbpf function call, you
    specify a specific queue id to bind to and it is only the traffic
@@ -343,9 +552,21 @@ A: When a netdev of a physical NIC is initialized, Linux usually
      sudo ethtool -N <interface> flow-type udp4 src-port 4242 dst-port \
      4242 action 2
 
-   A number of other ways are possible all up to the capabilitites of
+   A number of other ways are possible all up to the capabilities of
    the NIC you have.
 
+Q: Can I use the XSKMAP to implement a switch betwen different umems
+   in copy mode?
+
+A: The short answer is no, that is not supported at the moment. The
+   XSKMAP can only be used to switch traffic coming in on queue id X
+   to sockets bound to the same queue id X. The XSKMAP can contain
+   sockets bound to different queue ids, for example X and Y, but only
+   traffic goming in from queue id Y can be directed to sockets bound
+   to the same queue id Y. In zero-copy mode, you should use the
+   switch, or other distribution mechanism, in your NIC to direct
+   traffic to the correct queue id and socket.
+
 Credits
 =======
 
diff --git a/Documentation/networking/device_drivers/aquantia/atlantic.txt b/Documentation/networking/device_drivers/aquantia/atlantic.txt
index d235cbaeccc6..2013fcedc2da 100644
--- a/Documentation/networking/device_drivers/aquantia/atlantic.txt
+++ b/Documentation/networking/device_drivers/aquantia/atlantic.txt
@@ -1,5 +1,5 @@
-aQuantia AQtion Driver for the aQuantia Multi-Gigabit PCI Express Family of
-Ethernet Adapters
+Marvell(Aquantia) AQtion Driver for the aQuantia Multi-Gigabit PCI Express
+Family of Ethernet Adapters
 =============================================================================
 
 Contents
@@ -325,6 +325,46 @@ Supported ethtool options
  Example:
  ethtool -N eth0 flow-type udp4 action 0 loc 32
 
+ UDP GSO hardware offload
+ ---------------------------------
+ UDP GSO allows to boost UDP tx rates by offloading UDP headers allocation
+ into hardware. A special userspace socket option is required for this,
+ could be validated with /kernel/tools/testing/selftests/net/
+
+    udpgso_bench_tx -u -4 -D 10.0.1.1 -s 6300 -S 100
+
+ Will cause sending out of 100 byte sized UDP packets formed from single
+ 6300 bytes user buffer.
+
+ UDP GSO is configured by:
+
+    ethtool -K eth0 tx-udp-segmentation on
+
+ Private flags (testing)
+ ---------------------------------
+
+ Atlantic driver supports private flags for hardware custom features:
+
+	$ ethtool --show-priv-flags ethX
+
+	Private flags for ethX:
+	DMASystemLoopback  : off
+	PKTSystemLoopback  : off
+	DMANetworkLoopback : off
+	PHYInternalLoopback: off
+	PHYExternalLoopback: off
+
+ Example:
+
+	$ ethtool --set-priv-flags ethX DMASystemLoopback on
+
+ DMASystemLoopback:   DMA Host loopback.
+ PKTSystemLoopback:   Packet buffer host loopback.
+ DMANetworkLoopback:  Network side loopback on DMA block.
+ PHYInternalLoopback: Internal loopback on Phy.
+ PHYExternalLoopback: External loopback on Phy (with loopback ethernet cable).
+
+
 Command Line Parameters
 =======================
 The following command line parameters are available on atlantic driver:
@@ -426,7 +466,7 @@ Support
 
 If an issue is identified with the released source code on the supported
 kernel with a supported adapter, email the specific information related
-to the issue to support@aquantia.com
+to the issue to aqn_support@marvell.com
 
 License
 =======
diff --git a/Documentation/networking/device_drivers/freescale/dpaa.txt b/Documentation/networking/device_drivers/freescale/dpaa.txt
index f88194f71c54..b06601ff9200 100644
--- a/Documentation/networking/device_drivers/freescale/dpaa.txt
+++ b/Documentation/networking/device_drivers/freescale/dpaa.txt
@@ -129,9 +129,9 @@ CONFIG_AQUANTIA_PHY=y
 DPAA Ethernet Frame Processing
 ==============================
 
-On Rx, buffers for the incoming frames are retrieved from one of the three
-existing buffers pools. The driver initializes and seeds these, each with
-buffers of different sizes: 1KB, 2KB and 4KB.
+On Rx, buffers for the incoming frames are retrieved from the buffers found
+in the dedicated interface buffer pool. The driver initializes and seeds these
+with one page buffers.
 
 On Tx, all transmitted frames are returned to the driver through Tx
 confirmation frame queues. The driver is then responsible for freeing the
@@ -254,7 +254,7 @@ The following statistics are exported for each interface through ethtool:
 The driver also exports the following information in sysfs:
 
 	- the FQ IDs for each FQ type
-	/sys/devices/platform/dpaa-ethernet.0/net/<int>/fqids
+	/sys/devices/platform/soc/<addr>.fman/<addr>.ethernet/dpaa-ethernet.<id>/net/fm<nr>-mac<nr>/fqids
 
-	- the IDs of the buffer pools in use
-	/sys/devices/platform/dpaa-ethernet.0/net/<int>/bpids
+	- the ID of the buffer pool in use
+	/sys/devices/platform/soc/<addr>.fman/<addr>.ethernet/dpaa-ethernet.<id>/net/fm<nr>-mac<nr>/bpids
diff --git a/Documentation/networking/device_drivers/freescale/dpaa2/index.rst b/Documentation/networking/device_drivers/freescale/dpaa2/index.rst
index 67bd87fe6c53..ee40fcc5ddff 100644
--- a/Documentation/networking/device_drivers/freescale/dpaa2/index.rst
+++ b/Documentation/networking/device_drivers/freescale/dpaa2/index.rst
@@ -8,3 +8,4 @@ DPAA2 Documentation
    overview
    dpio-driver
    ethernet-driver
+   mac-phy-support
diff --git a/Documentation/networking/device_drivers/freescale/dpaa2/mac-phy-support.rst b/Documentation/networking/device_drivers/freescale/dpaa2/mac-phy-support.rst
new file mode 100644
index 000000000000..51e6624fb774
--- /dev/null
+++ b/Documentation/networking/device_drivers/freescale/dpaa2/mac-phy-support.rst
@@ -0,0 +1,191 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=======================
+DPAA2 MAC / PHY support
+=======================
+
+:Copyright: |copy| 2019 NXP
+
+Overview
+--------
+
+The DPAA2 MAC / PHY support consists of a set of APIs that help DPAA2 network
+drivers (dpaa2-eth, dpaa2-ethsw) interract with the PHY library.
+
+DPAA2 Software Architecture
+---------------------------
+
+Among other DPAA2 objects, the fsl-mc bus exports DPNI objects (abstracting a
+network interface) and DPMAC objects (abstracting a MAC). The dpaa2-eth driver
+probes on the DPNI object and connects to and configures a DPMAC object with
+the help of phylink.
+
+Data connections may be established between a DPNI and a DPMAC, or between two
+DPNIs. Depending on the connection type, the netif_carrier_[on/off] is handled
+directly by the dpaa2-eth driver or by phylink.
+
+.. code-block:: none
+
+  Sources of abstracted link state information presented by the MC firmware
+
+                                               +--------------------------------------+
+  +------------+                  +---------+  |                           xgmac_mdio |
+  | net_device |                  | phylink |--|  +-----+  +-----+  +-----+  +-----+  |
+  +------------+                  +---------+  |  | PHY |  | PHY |  | PHY |  | PHY |  |
+        |                             |        |  +-----+  +-----+  +-----+  +-----+  |
+      +------------------------------------+   |                    External MDIO bus |
+      |            dpaa2-eth               |   +--------------------------------------+
+      +------------------------------------+
+        |                             |                                           Linux
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        |                             |                                     MC firmware
+        |              /|             V
+  +----------+        / |       +----------+
+  |          |       /  |       |          |
+  |          |       |  |       |          |
+  |   DPNI   |<------|  |<------|   DPMAC  |
+  |          |       |  |       |          |
+  |          |       \  |<---+  |          |
+  +----------+        \ |    |  +----------+
+                       \|    |
+                             |
+           +--------------------------------------+
+           | MC firmware polling MAC PCS for link |
+           |  +-----+  +-----+  +-----+  +-----+  |
+           |  | PCS |  | PCS |  | PCS |  | PCS |  |
+           |  +-----+  +-----+  +-----+  +-----+  |
+           |                    Internal MDIO bus |
+           +--------------------------------------+
+
+
+Depending on an MC firmware configuration setting, each MAC may be in one of two modes:
+
+- DPMAC_LINK_TYPE_FIXED: the link state management is handled exclusively by
+  the MC firmware by polling the MAC PCS. Without the need to register a
+  phylink instance, the dpaa2-eth driver will not bind to the connected dpmac
+  object at all.
+
+- DPMAC_LINK_TYPE_PHY: The MC firmware is left waiting for link state update
+  events, but those are in fact passed strictly between the dpaa2-mac (based on
+  phylink) and its attached net_device driver (dpaa2-eth, dpaa2-ethsw),
+  effectively bypassing the firmware.
+
+Implementation
+--------------
+
+At probe time or when a DPNI's endpoint is dynamically changed, the dpaa2-eth
+is responsible to find out if the peer object is a DPMAC and if this is the
+case, to integrate it with PHYLINK using the dpaa2_mac_connect() API, which
+will do the following:
+
+ - look up the device tree for PHYLINK-compatible of binding (phy-handle)
+ - will create a PHYLINK instance associated with the received net_device
+ - connect to the PHY using phylink_of_phy_connect()
+
+The following phylink_mac_ops callback are implemented:
+
+ - .validate() will populate the supported linkmodes with the MAC capabilities
+   only when the phy_interface_t is RGMII_* (at the moment, this is the only
+   link type supported by the driver).
+
+ - .mac_config() will configure the MAC in the new configuration using the
+   dpmac_set_link_state() MC firmware API.
+
+ - .mac_link_up() / .mac_link_down() will update the MAC link using the same
+   API described above.
+
+At driver unbind() or when the DPNI object is disconnected from the DPMAC, the
+dpaa2-eth driver calls dpaa2_mac_disconnect() which will, in turn, disconnect
+from the PHY and destroy the PHYLINK instance.
+
+In case of a DPNI-DPMAC connection, an 'ip link set dev eth0 up' would start
+the following sequence of operations:
+
+(1) phylink_start() called from .dev_open().
+(2) The .mac_config() and .mac_link_up() callbacks are called by PHYLINK.
+(3) In order to configure the HW MAC, the MC Firmware API
+    dpmac_set_link_state() is called.
+(4) The firmware will eventually setup the HW MAC in the new configuration.
+(5) A netif_carrier_on() call is made directly from PHYLINK on the associated
+    net_device.
+(6) The dpaa2-eth driver handles the LINK_STATE_CHANGE irq in order to
+    enable/disable Rx taildrop based on the pause frame settings.
+
+.. code-block:: none
+
+  +---------+               +---------+
+  | PHYLINK |-------------->|  eth0   |
+  +---------+           (5) +---------+
+  (1) ^  |
+      |  |
+      |  v (2)
+  +-----------------------------------+
+  |             dpaa2-eth             |
+  +-----------------------------------+
+         |                    ^ (6)
+         |                    |
+         v (3)                |
+  +---------+---------------+---------+
+  |  DPMAC  |               |  DPNI   |
+  +---------+               +---------+
+  |            MC Firmware            |
+  +-----------------------------------+
+         |
+         |
+         v (4)
+  +-----------------------------------+
+  |             HW MAC                |
+  +-----------------------------------+
+
+In case of a DPNI-DPNI connection, a usual sequence of operations looks like
+the following:
+
+(1) ip link set dev eth0 up
+(2) The dpni_enable() MC API called on the associated fsl_mc_device.
+(3) ip link set dev eth1 up
+(4) The dpni_enable() MC API called on the associated fsl_mc_device.
+(5) The LINK_STATE_CHANGED irq is received by both instances of the dpaa2-eth
+    driver because now the operational link state is up.
+(6) The netif_carrier_on() is called on the exported net_device from
+    link_state_update().
+
+.. code-block:: none
+
+  +---------+               +---------+
+  |  eth0   |               |  eth1   |
+  +---------+               +---------+
+      |  ^                     ^  |
+      |  |                     |  |
+  (1) v  | (6)             (6) |  v (3)
+  +---------+               +---------+
+  |dpaa2-eth|               |dpaa2-eth|
+  +---------+               +---------+
+      |  ^                     ^  |
+      |  |                     |  |
+  (2) v  | (5)             (5) |  v (4)
+  +---------+---------------+---------+
+  |  DPNI   |               |  DPNI   |
+  +---------+               +---------+
+  |            MC Firmware            |
+  +-----------------------------------+
+
+
+Exported API
+------------
+
+Any DPAA2 driver that drivers endpoints of DPMAC objects should service its
+_EVENT_ENDPOINT_CHANGED irq and connect/disconnect from the associated DPMAC
+when necessary using the below listed API::
+
+ - int dpaa2_mac_connect(struct dpaa2_mac *mac);
+ - void dpaa2_mac_disconnect(struct dpaa2_mac *mac);
+
+A phylink integration is necessary only when the partner DPMAC is not of TYPE_FIXED.
+One can check for this condition using the below API::
+
+ - bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,struct fsl_mc_io *mc_io);
+
+Before connection to a MAC, the caller must allocate and populate the
+dpaa2_mac structure with the associated net_device, a pointer to the MC portal
+to be used and the actual fsl_mc_device structure of the DPMAC.
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index c1f7f75e5fd9..a191faaf97de 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -22,9 +22,11 @@ Contents:
    intel/iavf
    intel/ice
    google/gve
+   marvell/octeontx2
    mellanox/mlx5
    netronome/nfp
    pensando/ionic
+   stmicro/stmmac
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/marvell/octeontx2.rst b/Documentation/networking/device_drivers/marvell/octeontx2.rst
new file mode 100644
index 000000000000..88f508338c5f
--- /dev/null
+++ b/Documentation/networking/device_drivers/marvell/octeontx2.rst
@@ -0,0 +1,159 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+====================================
+Marvell OcteonTx2 RVU Kernel Drivers
+====================================
+
+Copyright (c) 2020 Marvell International Ltd.
+
+Contents
+========
+
+- `Overview`_
+- `Drivers`_
+- `Basic packet flow`_
+
+Overview
+========
+
+Resource virtualization unit (RVU) on Marvell's OcteonTX2 SOC maps HW
+resources from the network, crypto and other functional blocks into
+PCI-compatible physical and virtual functions. Each functional block
+again has multiple local functions (LFs) for provisioning to PCI devices.
+RVU supports multiple PCIe SRIOV physical functions (PFs) and virtual
+functions (VFs). PF0 is called the administrative / admin function (AF)
+and has privileges to provision RVU functional block's LFs to each of the
+PF/VF.
+
+RVU managed networking functional blocks
+ - Network pool or buffer allocator (NPA)
+ - Network interface controller (NIX)
+ - Network parser CAM (NPC)
+ - Schedule/Synchronize/Order unit (SSO)
+ - Loopback interface (LBK)
+
+RVU managed non-networking functional blocks
+ - Crypto accelerator (CPT)
+ - Scheduled timers unit (TIM)
+ - Schedule/Synchronize/Order unit (SSO)
+   Used for both networking and non networking usecases
+
+Resource provisioning examples
+ - A PF/VF with NIX-LF & NPA-LF resources works as a pure network device
+ - A PF/VF with CPT-LF resource works as a pure crypto offload device.
+
+RVU functional blocks are highly configurable as per software requirements.
+
+Firmware setups following stuff before kernel boots
+ - Enables required number of RVU PFs based on number of physical links.
+ - Number of VFs per PF are either static or configurable at compile time.
+   Based on config, firmware assigns VFs to each of the PFs.
+ - Also assigns MSIX vectors to each of PF and VFs.
+ - These are not changed after kernel boot.
+
+Drivers
+=======
+
+Linux kernel will have multiple drivers registering to different PF and VFs
+of RVU. Wrt networking there will be 3 flavours of drivers.
+
+Admin Function driver
+---------------------
+
+As mentioned above RVU PF0 is called the admin function (AF), this driver
+supports resource provisioning and configuration of functional blocks.
+Doesn't handle any I/O. It sets up few basic stuff but most of the
+funcionality is achieved via configuration requests from PFs and VFs.
+
+PF/VFs communicates with AF via a shared memory region (mailbox). Upon
+receiving requests AF does resource provisioning and other HW configuration.
+AF is always attached to host kernel, but PFs and their VFs may be used by host
+kernel itself, or attached to VMs or to userspace applications like
+DPDK etc. So AF has to handle provisioning/configuration requests sent
+by any device from any domain.
+
+AF driver also interacts with underlying firmware to
+ - Manage physical ethernet links ie CGX LMACs.
+ - Retrieve information like speed, duplex, autoneg etc
+ - Retrieve PHY EEPROM and stats.
+ - Configure FEC, PAM modes
+ - etc
+
+From pure networking side AF driver supports following functionality.
+ - Map a physical link to a RVU PF to which a netdev is registered.
+ - Attach NIX and NPA block LFs to RVU PF/VF which provide buffer pools, RQs, SQs
+   for regular networking functionality.
+ - Flow control (pause frames) enable/disable/config.
+ - HW PTP timestamping related config.
+ - NPC parser profile config, basically how to parse pkt and what info to extract.
+ - NPC extract profile config, what to extract from the pkt to match data in MCAM entries.
+ - Manage NPC MCAM entries, upon request can frame and install requested packet forwarding rules.
+ - Defines receive side scaling (RSS) algorithms.
+ - Defines segmentation offload algorithms (eg TSO)
+ - VLAN stripping, capture and insertion config.
+ - SSO and TIM blocks config which provide packet scheduling support.
+ - Debugfs support, to check current resource provising, current status of
+   NPA pools, NIX RQ, SQ and CQs, various stats etc which helps in debugging issues.
+ - And many more.
+
+Physical Function driver
+------------------------
+
+This RVU PF handles IO, is mapped to a physical ethernet link and this
+driver registers a netdev. This supports SR-IOV. As said above this driver
+communicates with AF with a mailbox. To retrieve information from physical
+links this driver talks to AF and AF gets that info from firmware and responds
+back ie cannot talk to firmware directly.
+
+Supports ethtool for configuring links, RSS, queue count, queue size,
+flow control, ntuple filters, dump PHY EEPROM, config FEC etc.
+
+Virtual Function driver
+-----------------------
+
+There are two types VFs, VFs that share the physical link with their parent
+SR-IOV PF and the VFs which work in pairs using internal HW loopback channels (LBK).
+
+Type1:
+ - These VFs and their parent PF share a physical link and used for outside communication.
+ - VFs cannot communicate with AF directly, they send mbox message to PF and PF
+   forwards that to AF. AF after processing, responds back to PF and PF forwards
+   the reply to VF.
+ - From functionality point of view there is no difference between PF and VF as same type
+   HW resources are attached to both. But user would be able to configure few stuff only
+   from PF as PF is treated as owner/admin of the link.
+
+Type2:
+ - RVU PF0 ie admin function creates these VFs and maps them to loopback block's channels.
+ - A set of two VFs (VF0 & VF1, VF2 & VF3 .. so on) works as a pair ie pkts sent out of
+   VF0 will be received by VF1 and viceversa.
+ - These VFs can be used by applications or virtual machines to communicate between them
+   without sending traffic outside. There is no switch present in HW, hence the support
+   for loopback VFs.
+ - These communicate directly with AF (PF0) via mbox.
+
+Except for the IO channels or links used for packet reception and transmission there is
+no other difference between these VF types. AF driver takes care of IO channel mapping,
+hence same VF driver works for both types of devices.
+
+Basic packet flow
+=================
+
+Ingress
+-------
+
+1. CGX LMAC receives packet.
+2. Forwards the packet to the NIX block.
+3. Then submitted to NPC block for parsing and then MCAM lookup to get the destination RVU device.
+4. NIX LF attached to the destination RVU device allocates a buffer from RQ mapped buffer pool of NPA block LF.
+5. RQ may be selected by RSS or by configuring MCAM rule with a RQ number.
+6. Packet is DMA'ed and driver is notified.
+
+Egress
+------
+
+1. Driver prepares a send descriptor and submits to SQ for transmission.
+2. The SQ is already configured (by AF) to transmit on a specific link/channel.
+3. The SQ descriptor ring is maintained in buffers allocated from SQ mapped pool of NPA block LF.
+4. NIX block transmits the pkt on the designated channel.
+5. NPC MCAM entries can be installed to divert pkt onto a different channel.
diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst
index d071c6b49e1f..f575a49790e8 100644
--- a/Documentation/networking/device_drivers/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst
@@ -154,6 +154,27 @@ User command examples:
       values:
          cmode runtime value smfs
 
+enable_roce: RoCE enablement state
+----------------------------------
+RoCE enablement state controls driver support for RoCE traffic.
+When RoCE is disabled, there is no gid table, only raw ethernet QPs are supported and traffic on the well known UDP RoCE port is handled as raw ethernet traffic.
+
+To change RoCE enablement state a user must change the driverinit cmode value and run devlink reload.
+
+User command examples:
+
+- Disable RoCE::
+
+    $ devlink dev param set pci/0000:06:00.0 name enable_roce value false cmode driverinit
+    $ devlink dev reload pci/0000:06:00.0
+
+- Read RoCE enablement state::
+
+    $ devlink dev param show pci/0000:06:00.0 name enable_roce
+      pci/0000:06:00.0:
+      name enable_roce type generic
+      values:
+         cmode driverinit value true
 
 Devlink health reporters
 ========================
@@ -258,7 +279,7 @@ mlx5 tracepoints
 ================
 
 mlx5 driver provides internal trace points for tracking and debugging using
-kernel tracepoints interfaces (refer to Documentation/trace/ftrase.rst).
+kernel tracepoints interfaces (refer to Documentation/trace/ftrace.rst).
 
 For the list of support mlx5 events check /sys/kernel/debug/tracing/events/mlx5/
 
diff --git a/Documentation/networking/device_drivers/microsoft/netvsc.txt b/Documentation/networking/device_drivers/microsoft/netvsc.txt
index 3bfa635bbbd5..cd63556b27a0 100644
--- a/Documentation/networking/device_drivers/microsoft/netvsc.txt
+++ b/Documentation/networking/device_drivers/microsoft/netvsc.txt
@@ -82,3 +82,24 @@ Features
   contain one or more packets. The send buffer is an optimization, the driver
   will use slower method to handle very large packets or if the send buffer
   area is exhausted.
+
+  XDP support
+  -----------
+  XDP (eXpress Data Path) is a feature that runs eBPF bytecode at the early
+  stage when packets arrive at a NIC card. The goal is to increase performance
+  for packet processing, reducing the overhead of SKB allocation and other
+  upper network layers.
+
+  hv_netvsc supports XDP in native mode, and transparently sets the XDP
+  program on the associated VF NIC as well.
+
+  Setting / unsetting XDP program on synthetic NIC (netvsc) propagates to
+  VF NIC automatically. Setting / unsetting XDP program on VF NIC directly
+  is not recommended, also not propagated to synthetic NIC, and may be
+  overwritten by setting of synthetic NIC.
+
+  XDP program cannot run with LRO (RSC) enabled, so you need to disable LRO
+  before running XDP:
+	ethtool -K eth0 lro off
+
+  XDP_REDIRECT action is not yet supported.
diff --git a/Documentation/networking/device_drivers/netronome/nfp.rst b/Documentation/networking/device_drivers/netronome/nfp.rst
index 6c08ac8b5147..ada611fb427c 100644
--- a/Documentation/networking/device_drivers/netronome/nfp.rst
+++ b/Documentation/networking/device_drivers/netronome/nfp.rst
@@ -131,3 +131,119 @@ abi_drv_reset
 abi_drv_load_ifc
     Defines a list of PF devices allowed to load FW on the device.
     This variable is not currently user configurable.
+
+Statistics
+==========
+
+Following device statistics are available through the ``ethtool -S`` interface:
+
+.. flat-table:: NFP device statistics
+   :header-rows: 1
+   :widths: 3 1 11
+
+   * - Name
+     - ID
+     - Meaning
+
+   * - dev_rx_discards
+     - 1
+     - Packet can be discarded on the RX path for one of the following reasons:
+
+        * The NIC is not in promisc mode, and the destination MAC address
+          doesn't match the interfaces' MAC address.
+        * The received packet is larger than the max buffer size on the host.
+          I.e. it exceeds the Layer 3 MRU.
+        * There is no freelist descriptor available on the host for the packet.
+          It is likely that the NIC couldn't cache one in time.
+        * A BPF program discarded the packet.
+        * The datapath drop action was executed.
+        * The MAC discarded the packet due to lack of ingress buffer space
+          on the NIC.
+
+   * - dev_rx_errors
+     - 2
+     - A packet can be counted (and dropped) as RX error for the following
+       reasons:
+
+       * A problem with the VEB lookup (only when SR-IOV is used).
+       * A physical layer problem that causes Ethernet errors, like FCS or
+         alignment errors. The cause is usually faulty cables or SFPs.
+
+   * - dev_rx_bytes
+     - 3
+     - Total number of bytes received.
+
+   * - dev_rx_uc_bytes
+     - 4
+     - Unicast bytes received.
+
+   * - dev_rx_mc_bytes
+     - 5
+     - Multicast bytes received.
+
+   * - dev_rx_bc_bytes
+     - 6
+     - Broadcast bytes received.
+
+   * - dev_rx_pkts
+     - 7
+     - Total number of packets received.
+
+   * - dev_rx_mc_pkts
+     - 8
+     - Multicast packets received.
+
+   * - dev_rx_bc_pkts
+     - 9
+     - Broadcast packets received.
+
+   * - dev_tx_discards
+     - 10
+     - A packet can be discarded in the TX direction if the MAC is
+       being flow controlled and the NIC runs out of TX queue space.
+
+   * - dev_tx_errors
+     - 11
+     - A packet can be counted as TX error (and dropped) for one for the
+       following reasons:
+
+       * The packet is an LSO segment, but the Layer 3 or Layer 4 offset
+         could not be determined. Therefore LSO could not continue.
+       * An invalid packet descriptor was received over PCIe.
+       * The packet Layer 3 length exceeds the device MTU.
+       * An error on the MAC/physical layer. Usually due to faulty cables or
+         SFPs.
+       * A CTM buffer could not be allocated.
+       * The packet offset was incorrect and could not be fixed by the NIC.
+
+   * - dev_tx_bytes
+     - 12
+     - Total number of bytes transmitted.
+
+   * - dev_tx_uc_bytes
+     - 13
+     - Unicast bytes transmitted.
+
+   * - dev_tx_mc_bytes
+     - 14
+     - Multicast bytes transmitted.
+
+   * - dev_tx_bc_bytes
+     - 15
+     - Broadcast bytes transmitted.
+
+   * - dev_tx_pkts
+     - 16
+     - Total number of packets transmitted.
+
+   * - dev_tx_mc_pkts
+     - 17
+     - Multicast packets transmitted.
+
+   * - dev_tx_bc_pkts
+     - 18
+     - Broadcast packets transmitted.
+
+Note that statistics unknown to the driver will be displayed as
+``dev_unknown_stat$ID``, where ``$ID`` refers to the second column
+above.
diff --git a/Documentation/networking/device_drivers/stmicro/stmmac.rst b/Documentation/networking/device_drivers/stmicro/stmmac.rst
new file mode 100644
index 000000000000..c34bab3d2df0
--- /dev/null
+++ b/Documentation/networking/device_drivers/stmicro/stmmac.rst
@@ -0,0 +1,697 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+==============================================================
+Linux Driver for the Synopsys(R) Ethernet Controllers "stmmac"
+==============================================================
+
+Authors: Giuseppe Cavallaro <peppe.cavallaro@st.com>,
+Alexandre Torgue <alexandre.torgue@st.com>, Jose Abreu <joabreu@synopsys.com>
+
+Contents
+========
+
+- In This Release
+- Feature List
+- Kernel Configuration
+- Command Line Parameters
+- Driver Information and Notes
+- Debug Information
+- Support
+
+In This Release
+===============
+
+This file describes the stmmac Linux Driver for all the Synopsys(R) Ethernet
+Controllers.
+
+Currently, this network device driver is for all STi embedded MAC/GMAC
+(i.e. 7xxx/5xxx SoCs), SPEAr (arm), Loongson1B (mips) and XILINX XC2V3000
+FF1152AMT0221 D1215994A VIRTEX FPGA board. The Synopsys Ethernet QoS 5.0 IPK
+is also supported.
+
+DesignWare(R) Cores Ethernet MAC 10/100/1000 Universal version 3.70a
+(and older) and DesignWare(R) Cores Ethernet Quality-of-Service version 4.0
+(and upper) have been used for developing this driver as well as
+DesignWare(R) Cores XGMAC - 10G Ethernet MAC.
+
+This driver supports both the platform bus and PCI.
+
+This driver includes support for the following Synopsys(R) DesignWare(R)
+Cores Ethernet Controllers and corresponding minimum and maximum versions:
+
++-------------------------------+--------------+--------------+--------------+
+| Controller Name               | Min. Version | Max. Version | Abbrev. Name |
++===============================+==============+==============+==============+
+| Ethernet MAC Universal        | N/A          | 3.73a        | GMAC         |
++-------------------------------+--------------+--------------+--------------+
+| Ethernet Quality-of-Service   | 4.00a        | N/A          | GMAC4+       |
++-------------------------------+--------------+--------------+--------------+
+| XGMAC - 10G Ethernet MAC      | 2.10a        | N/A          | XGMAC2+      |
++-------------------------------+--------------+--------------+--------------+
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Ethernet adapter. All hardware requirements listed apply
+to use with Linux.
+
+Feature List
+============
+
+The following features are available in this driver:
+ - GMII/MII/RGMII/SGMII/RMII/XGMII Interface
+ - Half-Duplex / Full-Duplex Operation
+ - Energy Efficient Ethernet (EEE)
+ - IEEE 802.3x PAUSE Packets (Flow Control)
+ - RMON/MIB Counters
+ - IEEE 1588 Timestamping (PTP)
+ - Pulse-Per-Second Output (PPS)
+ - MDIO Clause 22 / Clause 45 Interface
+ - MAC Loopback
+ - ARP Offloading
+ - Automatic CRC / PAD Insertion and Checking
+ - Checksum Offload for Received and Transmitted Packets
+ - Standard or Jumbo Ethernet Packets
+ - Source Address Insertion / Replacement
+ - VLAN TAG Insertion / Replacement / Deletion / Filtering (HASH and PERFECT)
+ - Programmable TX and RX Watchdog and Coalesce Settings
+ - Destination Address Filtering (PERFECT)
+ - HASH Filtering (Multicast)
+ - Layer 3 / Layer 4 Filtering
+ - Remote Wake-Up Detection
+ - Receive Side Scaling (RSS)
+ - Frame Preemption for TX and RX
+ - Programmable Burst Length, Threshold, Queue Size
+ - Multiple Queues (up to 8)
+ - Multiple Scheduling Algorithms (TX: WRR, DWRR, WFQ, SP, CBS, EST, TBS;
+   RX: WRR, SP)
+ - Flexible RX Parser
+ - TCP / UDP Segmentation Offload (TSO, USO)
+ - Split Header (SPH)
+ - Safety Features (ECC Protection, Data Parity Protection)
+ - Selftests using Ethtool
+
+Kernel Configuration
+====================
+
+The kernel configuration option is ``CONFIG_STMMAC_ETH``:
+ - ``CONFIG_STMMAC_PLATFORM``: is to enable the platform driver.
+ - ``CONFIG_STMMAC_PCI``: is to enable the pci driver.
+
+Command Line Parameters
+=======================
+
+If the driver is built as a module the following optional parameters are used
+by entering them on the command line with the modprobe command using this
+syntax (e.g. for PCI module)::
+
+    modprobe stmmac_pci [<option>=<VAL1>,<VAL2>,...]
+
+Driver parameters can be also passed in command line by using::
+
+    stmmaceth=watchdog:100,chain_mode=1
+
+The default value for each parameter is generally the recommended setting,
+unless otherwise noted.
+
+watchdog
+--------
+:Valid Range: 5000-None
+:Default Value: 5000
+
+This parameter overrides the transmit timeout in milliseconds.
+
+debug
+-----
+:Valid Range: 0-16 (0=none,...,16=all)
+:Default Value: 0
+
+This parameter adjusts the level of debug messages displayed in the system
+logs.
+
+phyaddr
+-------
+:Valid Range: 0-31
+:Default Value: -1
+
+This parameter overrides the physical address of the PHY device.
+
+flow_ctrl
+---------
+:Valid Range: 0-3 (0=off,1=rx,2=tx,3=rx/tx)
+:Default Value: 3
+
+This parameter changes the default Flow Control ability.
+
+pause
+-----
+:Valid Range: 0-65535
+:Default Value: 65535
+
+This parameter changes the default Flow Control Pause time.
+
+tc
+--
+:Valid Range: 64-256
+:Default Value: 64
+
+This parameter changes the default HW FIFO Threshold control value.
+
+buf_sz
+------
+:Valid Range: 1536-16384
+:Default Value: 1536
+
+This parameter changes the default RX DMA packet buffer size.
+
+eee_timer
+---------
+:Valid Range: 0-None
+:Default Value: 1000
+
+This parameter changes the default LPI TX Expiration time in milliseconds.
+
+chain_mode
+----------
+:Valid Range: 0-1 (0=off,1=on)
+:Default Value: 0
+
+This parameter changes the default mode of operation from Ring Mode to
+Chain Mode.
+
+Driver Information and Notes
+============================
+
+Transmit Process
+----------------
+
+The xmit method is invoked when the kernel needs to transmit a packet; it sets
+the descriptors in the ring and informs the DMA engine that there is a packet
+ready to be transmitted.
+
+By default, the driver sets the ``NETIF_F_SG`` bit in the features field of
+the ``net_device`` structure, enabling the scatter-gather feature. This is
+true on chips and configurations where the checksum can be done in hardware.
+
+Once the controller has finished transmitting the packet, timer will be
+scheduled to release the transmit resources.
+
+Receive Process
+---------------
+
+When one or more packets are received, an interrupt happens. The interrupts
+are not queued, so the driver has to scan all the descriptors in the ring
+during the receive process.
+
+This is based on NAPI, so the interrupt handler signals only if there is work
+to be done, and it exits. Then the poll method will be scheduled at some
+future point.
+
+The incoming packets are stored, by the DMA, in a list of pre-allocated socket
+buffers in order to avoid the memcpy (zero-copy).
+
+Interrupt Mitigation
+--------------------
+
+The driver is able to mitigate the number of its DMA interrupts using NAPI for
+the reception on chips older than the 3.50. New chips have an HW RX Watchdog
+used for this mitigation.
+
+Mitigation parameters can be tuned by ethtool.
+
+WoL
+---
+
+Wake up on Lan feature through Magic and Unicast frames are supported for the
+GMAC, GMAC4/5 and XGMAC core.
+
+DMA Descriptors
+---------------
+
+Driver handles both normal and alternate descriptors. The latter has been only
+tested on DesignWare(R) Cores Ethernet MAC Universal version 3.41a and later.
+
+stmmac supports DMA descriptor to operate both in dual buffer (RING) and
+linked-list(CHAINED) mode. In RING each descriptor points to two data buffer
+pointers whereas in CHAINED mode they point to only one data buffer pointer.
+RING mode is the default.
+
+In CHAINED mode each descriptor will have pointer to next descriptor in the
+list, hence creating the explicit chaining in the descriptor itself, whereas
+such explicit chaining is not possible in RING mode.
+
+Extended Descriptors
+--------------------
+
+The extended descriptors give us information about the Ethernet payload when
+it is carrying PTP packets or TCP/UDP/ICMP over IP. These are not available on
+GMAC Synopsys(R) chips older than the 3.50. At probe time the driver will
+decide if these can be actually used. This support also is mandatory for PTPv2
+because the extra descriptors are used for saving the hardware timestamps and
+Extended Status.
+
+Ethtool Support
+---------------
+
+Ethtool is supported. For example, driver statistics (including RMON),
+internal errors can be taken using::
+
+    ethtool -S ethX
+
+Ethtool selftests are also supported. This allows to do some early sanity
+checks to the HW using MAC and PHY loopback mechanisms::
+
+    ethtool -t ethX
+
+Jumbo and Segmentation Offloading
+---------------------------------
+
+Jumbo frames are supported and tested for the GMAC. The GSO has been also
+added but it's performed in software. LRO is not supported.
+
+TSO Support
+-----------
+
+TSO (TCP Segmentation Offload) feature is supported by GMAC > 4.x and XGMAC
+chip family. When a packet is sent through TCP protocol, the TCP stack ensures
+that the SKB provided to the low level driver (stmmac in our case) matches
+with the maximum frame len (IP header + TCP header + payload <= 1500 bytes
+(for MTU set to 1500)). It means that if an application using TCP want to send
+a packet which will have a length (after adding headers) > 1514 the packet
+will be split in several TCP packets: The data payload is split and headers
+(TCP/IP ..) are added. It is done by software.
+
+When TSO is enabled, the TCP stack doesn't care about the maximum frame length
+and provide SKB packet to stmmac as it is. The GMAC IP will have to perform
+the segmentation by it self to match with maximum frame length.
+
+This feature can be enabled in device tree through ``snps,tso`` entry.
+
+Energy Efficient Ethernet
+-------------------------
+
+Energy Efficient Ethernet (EEE) enables IEEE 802.3 MAC sublayer along with a
+family of Physical layer to operate in the Low Power Idle (LPI) mode. The EEE
+mode supports the IEEE 802.3 MAC operation at 100Mbps, 1000Mbps and 1Gbps.
+
+The LPI mode allows power saving by switching off parts of the communication
+device functionality when there is no data to be transmitted & received.
+The system on both the side of the link can disable some functionalities and
+save power during the period of low-link utilization. The MAC controls whether
+the system should enter or exit the LPI mode and communicate this to PHY.
+
+As soon as the interface is opened, the driver verifies if the EEE can be
+supported. This is done by looking at both the DMA HW capability register and
+the PHY devices MCD registers.
+
+To enter in TX LPI mode the driver needs to have a software timer that enable
+and disable the LPI mode when there is nothing to be transmitted.
+
+Precision Time Protocol (PTP)
+-----------------------------
+
+The driver supports the IEEE 1588-2002, Precision Time Protocol (PTP), which
+enables precise synchronization of clocks in measurement and control systems
+implemented with technologies such as network communication.
+
+In addition to the basic timestamp features mentioned in IEEE 1588-2002
+Timestamps, new GMAC cores support the advanced timestamp features.
+IEEE 1588-2008 can be enabled when configuring the Kernel.
+
+SGMII/RGMII Support
+-------------------
+
+New GMAC devices provide own way to manage RGMII/SGMII. This information is
+available at run-time by looking at the HW capability register. This means
+that the stmmac can manage auto-negotiation and link status w/o using the
+PHYLIB stuff. In fact, the HW provides a subset of extended registers to
+restart the ANE, verify Full/Half duplex mode and Speed. Thanks to these
+registers, it is possible to look at the Auto-negotiated Link Parter Ability.
+
+Physical
+--------
+
+The driver is compatible with Physical Abstraction Layer to be connected with
+PHY and GPHY devices.
+
+Platform Information
+--------------------
+
+Several information can be passed through the platform and device-tree.
+
+::
+
+    struct plat_stmmacenet_data {
+
+1) Bus identifier::
+
+        int bus_id;
+
+2) PHY Physical Address. If set to -1 the driver will pick the first PHY it
+finds::
+
+        int phy_addr;
+
+3) PHY Device Interface::
+
+        int interface;
+
+4) Specific platform fields for the MDIO bus::
+
+        struct stmmac_mdio_bus_data *mdio_bus_data;
+
+5) Internal DMA parameters::
+
+        struct stmmac_dma_cfg *dma_cfg;
+
+6) Fixed CSR Clock Range selection::
+
+        int clk_csr;
+
+7) HW uses the GMAC core::
+
+        int has_gmac;
+
+8) If set the MAC will use Enhanced Descriptors::
+
+        int enh_desc;
+
+9) Core is able to perform TX Checksum and/or RX Checksum in HW::
+
+        int tx_coe;
+        int rx_coe;
+
+11) Some HWs are not able to perform the csum in HW for over-sized frames due
+to limited buffer sizes. Setting this flag the csum will be done in SW on
+JUMBO frames::
+
+        int bugged_jumbo;
+
+12) Core has the embedded power module::
+
+        int pmt;
+
+13) Force DMA to use the Store and Forward mode or Threshold mode::
+
+        int force_sf_dma_mode;
+        int force_thresh_dma_mode;
+
+15) Force to disable the RX Watchdog feature and switch to NAPI mode::
+
+        int riwt_off;
+
+16) Limit the maximum operating speed and MTU::
+
+        int max_speed;
+        int maxmtu;
+
+18) Number of Multicast/Unicast filters::
+
+        int multicast_filter_bins;
+        int unicast_filter_entries;
+
+20) Limit the maximum TX and RX FIFO size::
+
+        int tx_fifo_size;
+        int rx_fifo_size;
+
+21) Use the specified number of TX and RX Queues::
+
+        u32 rx_queues_to_use;
+        u32 tx_queues_to_use;
+
+22) Use the specified TX and RX scheduling algorithm::
+
+        u8 rx_sched_algorithm;
+        u8 tx_sched_algorithm;
+
+23) Internal TX and RX Queue parameters::
+
+        struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES];
+        struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES];
+
+24) This callback is used for modifying some syscfg registers (on ST SoCs)
+according to the link speed negotiated by the physical layer::
+
+        void (*fix_mac_speed)(void *priv, unsigned int speed);
+
+25) Callbacks used for calling a custom initialization; This is sometimes
+necessary on some platforms (e.g. ST boxes) where the HW needs to have set
+some PIO lines or system cfg registers. init/exit callbacks should not use
+or modify platform data::
+
+        int (*init)(struct platform_device *pdev, void *priv);
+        void (*exit)(struct platform_device *pdev, void *priv);
+
+26) Perform HW setup of the bus. For example, on some ST platforms this field
+is used to configure the AMBA bridge to generate more efficient STBus traffic::
+
+        struct mac_device_info *(*setup)(void *priv);
+        void *bsp_priv;
+
+27) Internal clocks and rates::
+
+        struct clk *stmmac_clk;
+        struct clk *pclk;
+        struct clk *clk_ptp_ref;
+        unsigned int clk_ptp_rate;
+        unsigned int clk_ref_rate;
+        s32 ptp_max_adj;
+
+28) Main reset::
+
+        struct reset_control *stmmac_rst;
+
+29) AXI Internal Parameters::
+
+        struct stmmac_axi *axi;
+
+30) HW uses GMAC>4 cores::
+
+        int has_gmac4;
+
+31) HW is sun8i based::
+
+        bool has_sun8i;
+
+32) Enables TSO feature::
+
+        bool tso_en;
+
+33) Enables Receive Side Scaling (RSS) feature::
+
+        int rss_en;
+
+34) MAC Port selection::
+
+        int mac_port_sel_speed;
+
+35) Enables TX LPI Clock Gating::
+
+        bool en_tx_lpi_clockgating;
+
+36) HW uses XGMAC>2.10 cores::
+
+        int has_xgmac;
+
+::
+
+    }
+
+For MDIO bus data, we have:
+
+::
+
+    struct stmmac_mdio_bus_data {
+
+1) PHY mask passed when MDIO bus is registered::
+
+        unsigned int phy_mask;
+
+2) List of IRQs, one per PHY::
+
+        int *irqs;
+
+3) If IRQs is NULL, use this for probed PHY::
+
+        int probed_phy_irq;
+
+4) Set to true if PHY needs reset::
+
+        bool needs_reset;
+
+::
+
+    }
+
+For DMA engine configuration, we have:
+
+::
+
+    struct stmmac_dma_cfg {
+
+1) Programmable Burst Length (TX and RX)::
+
+        int pbl;
+
+2) If set, DMA TX / RX will use this value rather than pbl::
+
+        int txpbl;
+        int rxpbl;
+
+3) Enable 8xPBL::
+
+        bool pblx8;
+
+4) Enable Fixed or Mixed burst::
+
+        int fixed_burst;
+        int mixed_burst;
+
+5) Enable Address Aligned Beats::
+
+        bool aal;
+
+6) Enable Enhanced Addressing (> 32 bits)::
+
+        bool eame;
+
+::
+
+    }
+
+For DMA AXI parameters, we have:
+
+::
+
+    struct stmmac_axi {
+
+1) Enable AXI LPI::
+
+        bool axi_lpi_en;
+        bool axi_xit_frm;
+
+2) Set AXI Write / Read maximum outstanding requests::
+
+        u32 axi_wr_osr_lmt;
+        u32 axi_rd_osr_lmt;
+
+3) Set AXI 4KB bursts::
+
+        bool axi_kbbe;
+
+4) Set AXI maximum burst length map::
+
+        u32 axi_blen[AXI_BLEN];
+
+5) Set AXI Fixed burst / mixed burst::
+
+        bool axi_fb;
+        bool axi_mb;
+
+6) Set AXI rebuild incrx mode::
+
+        bool axi_rb;
+
+::
+
+    }
+
+For the RX Queues configuration, we have:
+
+::
+
+    struct stmmac_rxq_cfg {
+
+1) Mode to use (DCB or AVB)::
+
+        u8 mode_to_use;
+
+2) DMA channel to use::
+
+        u32 chan;
+
+3) Packet routing, if applicable::
+
+        u8 pkt_route;
+
+4) Use priority routing, and priority to route::
+
+        bool use_prio;
+        u32 prio;
+
+::
+
+    }
+
+For the TX Queues configuration, we have:
+
+::
+
+    struct stmmac_txq_cfg {
+
+1) Queue weight in scheduler::
+
+        u32 weight;
+
+2) Mode to use (DCB or AVB)::
+
+        u8 mode_to_use;
+
+3) Credit Base Shaper Parameters::
+
+        u32 send_slope;
+        u32 idle_slope;
+        u32 high_credit;
+        u32 low_credit;
+
+4) Use priority scheduling, and priority::
+
+        bool use_prio;
+        u32 prio;
+
+::
+
+    }
+
+Device Tree Information
+-----------------------
+
+Please refer to the following document:
+Documentation/devicetree/bindings/net/snps,dwmac.yaml
+
+HW Capabilities
+---------------
+
+Note that, starting from new chips, where it is available the HW capability
+register, many configurations are discovered at run-time for example to
+understand if EEE, HW csum, PTP, enhanced descriptor etc are actually
+available. As strategy adopted in this driver, the information from the HW
+capability register can replace what has been passed from the platform.
+
+Debug Information
+=================
+
+The driver exports many information i.e. internal statistics, debug
+information, MAC and DMA registers etc.
+
+These can be read in several ways depending on the type of the information
+actually needed.
+
+For example a user can be use the ethtool support to get statistics: e.g.
+using: ``ethtool -S ethX`` (that shows the Management counters (MMC) if
+supported) or sees the MAC/DMA registers: e.g. using: ``ethtool -d ethX``
+
+Compiling the Kernel with ``CONFIG_DEBUG_FS`` the driver will export the
+following debugfs entries:
+
+ - ``descriptors_status``: To show the DMA TX/RX descriptor rings
+ - ``dma_cap``: To show the HW Capabilities
+
+Developer can also use the ``debug`` module parameter to get further debug
+information (please see: NETIF Msg Level).
+
+Support
+=======
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the
+issue to netdev@vger.kernel.org
diff --git a/Documentation/networking/device_drivers/stmicro/stmmac.txt b/Documentation/networking/device_drivers/stmicro/stmmac.txt
deleted file mode 100644
index 1ae979fd90d2..000000000000
--- a/Documentation/networking/device_drivers/stmicro/stmmac.txt
+++ /dev/null
@@ -1,401 +0,0 @@
-       STMicroelectronics 10/100/1000 Synopsys Ethernet driver
-
-Copyright (C) 2007-2015  STMicroelectronics Ltd
-Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
-
-This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers
-(Synopsys IP blocks).
-
-Currently this network device driver is for all STi embedded MAC/GMAC
-(i.e. 7xxx/5xxx SoCs), SPEAr (arm), Loongson1B (mips) and XLINX XC2V3000
-FF1152AMT0221 D1215994A VIRTEX FPGA board.
-
-DWC Ether MAC 10/100/1000 Universal version 3.70a (and older) and DWC Ether
-MAC 10/100 Universal version 4.0 have been used for developing this driver.
-
-This driver supports both the platform bus and PCI.
-
-Please, for more information also visit: www.stlinux.com
-
-1) Kernel Configuration
-The kernel configuration option is STMMAC_ETH:
- Device Drivers ---> Network device support ---> Ethernet (1000 Mbit) --->
- STMicroelectronics 10/100/1000 Ethernet driver (STMMAC_ETH)
-
-CONFIG_STMMAC_PLATFORM: is to enable the platform driver.
-CONFIG_STMMAC_PCI: is to enable the pci driver.
-
-2) Driver parameters list:
-	debug: message level (0: no output, 16: all);
-	phyaddr: to manually provide the physical address to the PHY device;
-	buf_sz: DMA buffer size;
-	tc: control the HW FIFO threshold;
-	watchdog: transmit timeout (in milliseconds);
-	flow_ctrl: Flow control ability [on/off];
-	pause: Flow Control Pause Time;
-	eee_timer: tx EEE timer;
-	chain_mode: select chain mode instead of ring.
-
-3) Command line options
-Driver parameters can be also passed in command line by using:
-	stmmaceth=watchdog:100,chain_mode=1
-
-4) Driver information and notes
-
-4.1) Transmit process
-The xmit method is invoked when the kernel needs to transmit a packet; it sets
-the descriptors in the ring and informs the DMA engine, that there is a packet
-ready to be transmitted.
-By default, the driver sets the NETIF_F_SG bit in the features field of the
-net_device structure, enabling the scatter-gather feature. This is true on
-chips and configurations where the checksum can be done in hardware.
-Once the controller has finished transmitting the packet, timer will be
-scheduled to release the transmit resources.
-
-4.2) Receive process
-When one or more packets are received, an interrupt happens. The interrupts
-are not queued, so the driver has to scan all the descriptors in the ring during
-the receive process.
-This is based on NAPI, so the interrupt handler signals only if there is work
-to be done, and it exits.
-Then the poll method will be scheduled at some future point.
-The incoming packets are stored, by the DMA, in a list of pre-allocated socket
-buffers in order to avoid the memcpy (zero-copy).
-
-4.3) Interrupt mitigation
-The driver is able to mitigate the number of its DMA interrupts
-using NAPI for the reception on chips older than the 3.50.
-New chips have an HW RX-Watchdog used for this mitigation.
-Mitigation parameters can be tuned by ethtool.
-
-4.4) WOL
-Wake up on Lan feature through Magic and Unicast frames are supported for the
-GMAC core.
-
-4.5) DMA descriptors
-Driver handles both normal and alternate descriptors. The latter has been only
-tested on DWC Ether MAC 10/100/1000 Universal version 3.41a and later.
-
-STMMAC supports DMA descriptor to operate both in dual buffer (RING)
-and linked-list(CHAINED) mode. In RING each descriptor points to two
-data buffer pointers whereas in CHAINED mode they point to only one data
-buffer pointer. RING mode is the default.
-
-In CHAINED mode each descriptor will have pointer to next descriptor in
-the list, hence creating the explicit chaining in the descriptor itself,
-whereas such explicit chaining is not possible in RING mode.
-
-4.5.1) Extended descriptors
-The extended descriptors give us information about the Ethernet payload
-when it is carrying PTP packets or TCP/UDP/ICMP over IP.
-These are not available on GMAC Synopsys chips older than the 3.50.
-At probe time the driver will decide if these can be actually used.
-This support also is mandatory for PTPv2 because the extra descriptors
-are used for saving the hardware timestamps and Extended Status.
-
-4.6) Ethtool support
-Ethtool is supported.
-
-For example, driver statistics (including RMON), internal errors can be taken
-using:
-  # ethtool -S ethX
-command
-
-4.7) Jumbo and Segmentation Offloading
-Jumbo frames are supported and tested for the GMAC.
-The GSO has been also added but it's performed in software.
-LRO is not supported.
-
-4.8) Physical
-The driver is compatible with Physical Abstraction Layer to be connected with
-PHY and GPHY devices.
-
-4.9) Platform information
-Several information can be passed through the platform and device-tree.
-
-struct plat_stmmacenet_data {
-	char *phy_bus_name;
-	int bus_id;
-	int phy_addr;
-	int interface;
-	struct stmmac_mdio_bus_data *mdio_bus_data;
-	struct stmmac_dma_cfg *dma_cfg;
-	int clk_csr;
-	int has_gmac;
-	int enh_desc;
-	int tx_coe;
-	int rx_coe;
-	int bugged_jumbo;
-	int pmt;
-	int force_sf_dma_mode;
-	int force_thresh_dma_mode;
-	int riwt_off;
-	int max_speed;
-	int maxmtu;
-	void (*fix_mac_speed)(void *priv, unsigned int speed);
-	void (*bus_setup)(void __iomem *ioaddr);
-	int (*init)(struct platform_device *pdev, void *priv);
-	void (*exit)(struct platform_device *pdev, void *priv);
-	void *bsp_priv;
-	int has_gmac4;
-	bool tso_en;
-};
-
-Where:
- o phy_bus_name: phy bus name to attach to the stmmac.
- o bus_id: bus identifier.
- o phy_addr: the physical address can be passed from the platform.
-	    If it is set to -1 the driver will automatically
-	    detect it at run-time by probing all the 32 addresses.
- o interface: PHY device's interface.
- o mdio_bus_data: specific platform fields for the MDIO bus.
- o dma_cfg: internal DMA parameters
-   o pbl: the Programmable Burst Length is maximum number of beats to
-       be transferred in one DMA transaction.
-       GMAC also enables the 4xPBL by default. (8xPBL for GMAC 3.50 and newer)
-   o txpbl/rxpbl: GMAC and newer supports independent DMA pbl for tx/rx.
-   o pblx8: Enable 8xPBL (4xPBL for core rev < 3.50). Enabled by default.
-   o fixed_burst/mixed_burst/aal
- o clk_csr: fixed CSR Clock range selection.
- o has_gmac: uses the GMAC core.
- o enh_desc: if sets the MAC will use the enhanced descriptor structure.
- o tx_coe: core is able to perform the tx csum in HW.
- o rx_coe: the supports three check sum offloading engine types:
-	   type_1, type_2 (full csum) and no RX coe.
- o bugged_jumbo: some HWs are not able to perform the csum in HW for
-		over-sized frames due to limited buffer sizes.
-		Setting this flag the csum will be done in SW on
-		JUMBO frames.
- o pmt: core has the embedded power module (optional).
- o force_sf_dma_mode: force DMA to use the Store and Forward mode
-		     instead of the Threshold.
- o force_thresh_dma_mode: force DMA to use the Threshold mode other than
-		     the Store and Forward mode.
- o riwt_off: force to disable the RX watchdog feature and switch to NAPI mode.
- o fix_mac_speed: this callback is used for modifying some syscfg registers
-		 (on ST SoCs) according to the link speed negotiated by the
-		 physical layer .
- o bus_setup: perform HW setup of the bus. For example, on some ST platforms
-	     this field is used to configure the AMBA  bridge to generate more
-	     efficient STBus traffic.
- o init/exit: callbacks used for calling a custom initialization;
-	     this is sometime necessary on some platforms (e.g. ST boxes)
-	     where the HW needs to have set some PIO lines or system cfg
-	     registers.  init/exit callbacks should not use or modify
-	     platform data.
- o bsp_priv: another private pointer.
- o has_gmac4: uses GMAC4 core.
- o tso_en: Enables TSO (TCP Segmentation Offload) feature.
-
-For MDIO bus The we have:
-
- struct stmmac_mdio_bus_data {
-	int (*phy_reset)(void *priv);
-	unsigned int phy_mask;
-	int *irqs;
-	int probed_phy_irq;
- };
-
-Where:
- o phy_reset: hook to reset the phy device attached to the bus.
- o phy_mask: phy mask passed when register the MDIO bus within the driver.
- o irqs: list of IRQs, one per PHY.
- o probed_phy_irq: if irqs is NULL, use this for probed PHY.
-
-For DMA engine we have the following internal fields that should be
-tuned according to the HW capabilities.
-
-struct stmmac_dma_cfg {
-	int pbl;
-	int txpbl;
-	int rxpbl;
-	bool pblx8;
-	int fixed_burst;
-	int mixed_burst;
-	bool aal;
-};
-
-Where:
- o pbl: Programmable Burst Length (tx and rx)
- o txpbl: Transmit Programmable Burst Length. Only for GMAC and newer.
-	 If set, DMA tx will use this value rather than pbl.
- o rxpbl: Receive Programmable Burst Length. Only for GMAC and newer.
-	 If set, DMA rx will use this value rather than pbl.
- o pblx8: Enable 8xPBL (4xPBL for core rev < 3.50). Enabled by default.
- o fixed_burst: program the DMA to use the fixed burst mode
- o mixed_burst: program the DMA to use the mixed burst mode
- o aal: Address-Aligned Beats
-
----
-
-Below an example how the structures above are using on ST platforms.
-
- static struct plat_stmmacenet_data stxYYY_ethernet_platform_data = {
-	.has_gmac = 0,
-	.enh_desc = 0,
-	.fix_mac_speed = stxYYY_ethernet_fix_mac_speed,
-				|
-				|-> to write an internal syscfg
-				|   on this platform when the
-				|   link speed changes from 10 to
-				|   100 and viceversa
-	.init = &stmmac_claim_resource,
-				|
-				|-> On ST SoC this calls own "PAD"
-				|   manager framework to claim
-				|   all the resources necessary
-				|   (GPIO ...). The .custom_cfg field
-				|   is used to pass a custom config.
-};
-
-Below the usage of the stmmac_mdio_bus_data: on this SoC, in fact,
-there are two MAC cores: one MAC is for MDIO Bus/PHY emulation
-with fixed_link support.
-
-static struct stmmac_mdio_bus_data stmmac1_mdio_bus = {
-	.phy_reset = phy_reset;
-		|
-		|-> function to provide the phy_reset on this board
-	.phy_mask = 0,
-};
-
-static struct fixed_phy_status stmmac0_fixed_phy_status = {
-	.link = 1,
-	.speed = 100,
-	.duplex = 1,
-};
-
-During the board's device_init we can configure the first
-MAC for fixed_link by calling:
-  fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status);
-and the second one, with a real PHY device attached to the bus,
-by using the stmmac_mdio_bus_data structure (to provide the id, the
-reset procedure etc).
-
-Note that, starting from new chips, where it is available the HW capability
-register, many configurations are discovered at run-time for example to
-understand if EEE, HW csum, PTP, enhanced descriptor etc are actually
-available. As strategy adopted in this driver, the information from the HW
-capability register can replace what has been passed from the platform.
-
-4.10) Device-tree support.
-
-Please see the following document:
-	Documentation/devicetree/bindings/net/stmmac.txt
-
-4.11) This is a summary of the content of some relevant files:
- o stmmac_main.c: implements the main network device driver;
- o stmmac_mdio.c: provides MDIO functions;
- o stmmac_pci: this is the PCI driver;
- o stmmac_platform.c: this the platform driver (OF supported);
- o stmmac_ethtool.c: implements the ethtool support;
- o stmmac.h: private driver structure;
- o common.h: common definitions and VFTs;
- o mmc_core.c/mmc.h: Management MAC Counters;
- o stmmac_hwtstamp.c: HW timestamp support for PTP;
- o stmmac_ptp.c: PTP 1588 clock;
- o stmmac_pcs.h: Physical Coding Sublayer common implementation;
- o dwmac-<XXX>.c: these are for the platform glue-logic file; e.g. dwmac-sti.c
-   for STMicroelectronics SoCs.
-
-- GMAC 3.x
- o descs.h: descriptor structure definitions;
- o dwmac1000_core.c: dwmac GiGa core functions;
- o dwmac1000_dma.c: dma functions for the GMAC chip;
- o dwmac1000.h: specific header file for the dwmac GiGa;
- o dwmac100_core: dwmac 100 core code;
- o dwmac100_dma.c: dma functions for the dwmac 100 chip;
- o dwmac1000.h: specific header file for the MAC;
- o dwmac_lib.c: generic DMA functions;
- o enh_desc.c: functions for handling enhanced descriptors;
- o norm_desc.c: functions for handling normal descriptors;
- o chain_mode.c/ring_mode.c:: functions to manage RING/CHAINED modes;
-
-- GMAC4.x generation
- o dwmac4_core.c: dwmac GMAC4.x core functions;
- o dwmac4_desc.c: functions for handling GMAC4.x descriptors;
- o dwmac4_descs.h: descriptor definitions;
- o dwmac4_dma.c: dma functions for the GMAC4.x chip;
- o dwmac4_dma.h: dma definitions for the GMAC4.x chip;
- o dwmac4.h: core definitions for the GMAC4.x chip;
- o dwmac4_lib.c: generic GMAC4.x functions;
-
-4.12) TSO support (GMAC4.x)
-
-TSO (Tcp Segmentation Offload) feature is supported by GMAC 4.x chip family.
-When a packet is sent through TCP protocol, the TCP stack ensures that
-the SKB provided to the low level driver (stmmac in our case) matches with
-the maximum frame len (IP header + TCP header + payload <= 1500 bytes (for
-MTU set to 1500)). It means that if an application using TCP want to send a
-packet which will have a length (after adding headers) > 1514 the packet
-will be split in several TCP packets: The data payload is split and headers
-(TCP/IP ..) are added. It is done by software.
-
-When TSO is enabled, the TCP stack doesn't care about the maximum frame
-length and provide SKB packet to stmmac as it is. The GMAC IP will have to
-perform the segmentation by it self to match with maximum frame length.
-
-This feature can be enabled in device tree through "snps,tso" entry.
-
-5) Debug Information
-
-The driver exports many information i.e. internal statistics,
-debug information, MAC and DMA registers etc.
-
-These can be read in several ways depending on the
-type of the information actually needed.
-
-For example a user can be use the ethtool support
-to get statistics: e.g. using: ethtool -S ethX
-(that shows the Management counters (MMC) if supported)
-or sees the MAC/DMA registers: e.g. using: ethtool -d ethX
-
-Compiling the Kernel with CONFIG_DEBUG_FS the driver will export the following
-debugfs entries:
-
-/sys/kernel/debug/stmmaceth/descriptors_status
-  To show the DMA TX/RX descriptor rings
-
-Developer can also use the "debug" module parameter to get further debug
-information (please see: NETIF Msg Level).
-
-6) Energy Efficient Ethernet
-
-Energy Efficient Ethernet(EEE) enables IEEE 802.3 MAC sublayer along
-with a family of Physical layer to operate in the Low power Idle(LPI)
-mode. The EEE mode supports the IEEE 802.3 MAC operation at 100Mbps,
-1000Mbps & 10Gbps.
-
-The LPI mode allows power saving by switching off parts of the
-communication device functionality when there is no data to be
-transmitted & received. The system on both the side of the link can
-disable some functionalities & save power during the period of low-link
-utilization. The MAC controls whether the system should enter or exit
-the LPI mode & communicate this to PHY.
-
-As soon as the interface is opened, the driver verifies if the EEE can
-be supported. This is done by looking at both the DMA HW capability
-register and the PHY devices MCD registers.
-To enter in Tx LPI mode the driver needs to have a software timer
-that enable and disable the LPI mode when there is nothing to be
-transmitted.
-
-7) Precision Time Protocol (PTP)
-The driver supports the IEEE 1588-2002, Precision Time Protocol (PTP),
-which enables precise synchronization of clocks in measurement and
-control systems implemented with technologies such as network
-communication.
-
-In addition to the basic timestamp features mentioned in IEEE 1588-2002
-Timestamps, new GMAC cores support the advanced timestamp features.
-IEEE 1588-2008 that can be enabled when configure the Kernel.
-
-8) SGMII/RGMII support
-New GMAC devices provide own way to manage RGMII/SGMII.
-This information is available at run-time by looking at the
-HW capability register. This means that the stmmac can manage
-auto-negotiation and link status w/o using the PHYLIB stuff.
-In fact, the HW provides a subset of extended registers to
-restart the ANE, verify Full/Half duplex mode and Speed.
-Thanks to these registers, it is possible to look at the
-Auto-negotiated Link Parter Ability.
diff --git a/Documentation/networking/device_drivers/ti/cpsw_switchdev.txt b/Documentation/networking/device_drivers/ti/cpsw_switchdev.txt
new file mode 100644
index 000000000000..12855ab268b8
--- /dev/null
+++ b/Documentation/networking/device_drivers/ti/cpsw_switchdev.txt
@@ -0,0 +1,209 @@
+* Texas Instruments CPSW switchdev based ethernet driver 2.0
+
+- Port renaming
+On older udev versions renaming of ethX to swXpY will not be automatically
+supported
+In order to rename via udev:
+ip -d link show dev sw0p1 | grep switchid
+
+SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}==<switchid>, \
+        ATTR{phys_port_name}!="", NAME="sw0$attr{phys_port_name}"
+
+
+====================
+# Dual mac mode
+====================
+- The new (cpsw_new.c) driver is operating in dual-emac mode by default, thus
+working as 2 individual network interfaces. Main differences from legacy CPSW
+driver are:
+ - optimized promiscuous mode: The P0_UNI_FLOOD (both ports) is enabled in
+addition to ALLMULTI (current port) instead of ALE_BYPASS.
+So, Ports in promiscuous mode will keep possibility of mcast and vlan filtering,
+which is provides significant benefits when ports are joined to the same bridge,
+but without enabling "switch" mode, or to different bridges.
+ - learning disabled on ports as it make not too much sense for
+   segregated ports - no forwarding in HW.
+ - enabled basic support for devlink.
+
+	devlink dev show
+		platform/48484000.switch
+
+	devlink dev param show
+	platform/48484000.switch:
+	name switch_mode type driver-specific
+	values:
+		cmode runtime value false
+	name ale_bypass type driver-specific
+	values:
+		cmode runtime value false
+
+Devlink configuration parameters
+====================
+See Documentation/networking/devlink/ti-cpsw-switch.rst
+
+====================
+# Bridging in dual mac mode
+====================
+The dual_mac mode requires two vids to be reserved for internal purposes,
+which, by default, equal CPSW Port numbers. As result, bridge has to be
+configured in vlan unaware mode or default_pvid has to be adjusted.
+
+	ip link add name br0 type bridge
+	ip link set dev br0 type bridge vlan_filtering 0
+	echo 0 > /sys/class/net/br0/bridge/default_pvid
+	ip link set dev sw0p1 master br0
+	ip link set dev sw0p2 master br0
+ - or -
+	ip link add name br0 type bridge
+	ip link set dev br0 type bridge vlan_filtering 0
+	echo 100 > /sys/class/net/br0/bridge/default_pvid
+	ip link set dev br0 type bridge vlan_filtering 1
+	ip link set dev sw0p1 master br0
+	ip link set dev sw0p2 master br0
+
+====================
+# Enabling "switch"
+====================
+The Switch mode can be enabled by configuring devlink driver parameter
+"switch_mode" to 1/true:
+	devlink dev param set platform/48484000.switch \
+	name switch_mode value 1 cmode runtime
+
+This can be done regardless of the state of Port's netdev devices - UP/DOWN, but
+Port's netdev devices have to be in UP before joining to the bridge to avoid
+overwriting of bridge configuration as CPSW switch driver copletly reloads its
+configuration when first Port changes its state to UP.
+
+When the both interfaces joined the bridge - CPSW switch driver will enable
+marking packets with offload_fwd_mark flag unless "ale_bypass=0"
+
+All configuration is implemented via switchdev API.
+
+====================
+# Bridge setup
+====================
+	devlink dev param set platform/48484000.switch \
+	name switch_mode value 1 cmode runtime
+
+	ip link add name br0 type bridge
+	ip link set dev br0 type bridge ageing_time 1000
+	ip link set dev sw0p1 up
+	ip link set dev sw0p2 up
+	ip link set dev sw0p1 master br0
+	ip link set dev sw0p2 master br0
+	[*] bridge vlan add dev br0 vid 1 pvid untagged self
+
+[*] if vlan_filtering=1. where default_pvid=1
+
+=================
+# On/off STP
+=================
+ip link set dev BRDEV type bridge stp_state 1/0
+
+Note. Steps [*] are mandatory.
+
+====================
+# VLAN configuration
+====================
+bridge vlan add dev br0 vid 1 pvid untagged self <---- add cpu port to VLAN 1
+
+Note. This step is mandatory for bridge/default_pvid.
+
+=================
+# Add extra VLANs
+=================
+ 1. untagged:
+    bridge vlan add dev sw0p1 vid 100 pvid untagged master
+    bridge vlan add dev sw0p2 vid 100 pvid untagged master
+    bridge vlan add dev br0 vid 100 pvid untagged self <---- Add cpu port to VLAN100
+
+ 2. tagged:
+    bridge vlan add dev sw0p1 vid 100 master
+    bridge vlan add dev sw0p2 vid 100 master
+    bridge vlan add dev br0 vid 100 pvid tagged self <---- Add cpu port to VLAN100
+
+====
+FDBs
+====
+FDBs are automatically added on the appropriate switch port upon detection
+
+Manually adding FDBs:
+bridge fdb add aa:bb:cc:dd:ee:ff dev sw0p1 master vlan 100
+bridge fdb add aa:bb:cc:dd:ee:fe dev sw0p2 master <---- Add on all VLANs
+
+====
+MDBs
+====
+MDBs are automatically added on the appropriate switch port upon detection
+
+Manually adding MDBs:
+bridge mdb add dev br0 port sw0p1 grp 239.1.1.1 permanent vid 100
+bridge mdb add dev br0 port sw0p1 grp 239.1.1.1 permanent <---- Add on all VLANs
+
+==================
+Multicast flooding
+==================
+CPU port mcast_flooding is always on
+
+Turning flooding on/off on swithch ports:
+bridge link set dev sw0p1 mcast_flood on/off
+
+==================
+Access and Trunk port
+==================
+ bridge vlan add dev sw0p1 vid 100 pvid untagged master
+ bridge vlan add dev sw0p2 vid 100 master
+
+
+ bridge vlan add dev br0 vid 100 self
+ ip link add link br0 name br0.100 type vlan id 100
+
+ Note. Setting PVID on Bridge device itself working only for
+ default VLAN (default_pvid).
+
+=====================
+ NFS
+=====================
+The only way for NFS to work is by chrooting to a minimal environment when
+switch configuration that will affect connectivity is needed.
+Assuming you are booting NFS with eth1 interface(the script is hacky and
+it's just there to prove NFS is doable).
+
+setup.sh:
+#!/bin/sh
+mkdir proc
+mount -t proc none /proc
+ifconfig br0  > /dev/null
+if [ $? -ne 0 ]; then
+        echo "Setting up bridge"
+        ip link add name br0 type bridge
+        ip link set dev br0 type bridge ageing_time 1000
+        ip link set dev br0 type bridge vlan_filtering 1
+
+        ip link set eth1 down
+        ip link set eth1 name sw0p1
+        ip link set dev sw0p1 up
+        ip link set dev sw0p2 up
+        ip link set dev sw0p2 master br0
+        ip link set dev sw0p1 master br0
+        bridge vlan add dev br0 vid 1 pvid untagged self
+        ifconfig sw0p1 0.0.0.0
+        udhchc -i br0
+fi
+umount /proc
+
+run_nfs.sh:
+#!/bin/sh
+mkdir /tmp/root/bin -p
+mkdir /tmp/root/lib -p
+
+cp -r /lib/ /tmp/root/
+cp -r /bin/ /tmp/root/
+cp /sbin/ip /tmp/root/bin
+cp /sbin/bridge /tmp/root/bin
+cp /sbin/ifconfig /tmp/root/bin
+cp /sbin/udhcpc /tmp/root/bin
+cp /path/to/setup.sh /tmp/root/bin
+chroot /tmp/root/ busybox sh /bin/setup.sh
+
+run ./run_nfs.sh
diff --git a/Documentation/networking/devlink-health.txt b/Documentation/networking/devlink-health.txt
deleted file mode 100644
index 1db3fbea0831..000000000000
--- a/Documentation/networking/devlink-health.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-The health mechanism is targeted for Real Time Alerting, in order to know when
-something bad had happened to a PCI device
-- Provide alert debug information
-- Self healing
-- If problem needs vendor support, provide a way to gather all needed debugging
-  information.
-
-The main idea is to unify and centralize driver health reports in the
-generic devlink instance and allow the user to set different
-attributes of the health reporting and recovery procedures.
-
-The devlink health reporter:
-Device driver creates a "health reporter" per each error/health type.
-Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
-or unknown (driver specific).
-For each registered health reporter a driver can issue error/health reports
-asynchronously. All health reports handling is done by devlink.
-Device driver can provide specific callbacks for each "health reporter", e.g.
- - Recovery procedures
- - Diagnostics and object dump procedures
- - OOB initial parameters
-Different parts of the driver can register different types of health reporters
-with different handlers.
-
-Once an error is reported, devlink health will do the following actions:
-  * A log is being send to the kernel trace events buffer
-  * Health status and statistics are being updated for the reporter instance
-  * Object dump is being taken and saved at the reporter instance (as long as
-    there is no other dump which is already stored)
-  * Auto recovery attempt is being done. Depends on:
-    - Auto-recovery configuration
-    - Grace period vs. time passed since last recover
-
-The user interface:
-User can access/change each reporter's parameters and driver specific callbacks
-via devlink, e.g per error type (per health reporter)
- - Configure reporter's generic parameters (like: disable/enable auto recovery)
- - Invoke recovery procedure
- - Run diagnostics
- - Object dump
-
-The devlink health interface (via netlink):
-DEVLINK_CMD_HEALTH_REPORTER_GET
-  Retrieves status and configuration info per DEV and reporter.
-DEVLINK_CMD_HEALTH_REPORTER_SET
-  Allows reporter-related configuration setting.
-DEVLINK_CMD_HEALTH_REPORTER_RECOVER
-  Triggers a reporter's recovery procedure.
-DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE
-  Retrieves diagnostics data from a reporter on a device.
-DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET
-  Retrieves the last stored dump. Devlink health
-  saves a single dump. If an dump is not already stored by the devlink
-  for this reporter, devlink generates a new dump.
-  dump output is defined by the reporter.
-DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR
-  Clears the last saved dump file for the specified reporter.
-
-
-                                               netlink
-                                      +--------------------------+
-                                      |                          |
-                                      |            +             |
-                                      |            |             |
-                                      +--------------------------+
-                                                   |request for ops
-                                                   |(diagnose,
- mlx5_core                             devlink     |recover,
-                                                   |dump)
-+--------+                            +--------------------------+
-|        |                            |    reporter|             |
-|        |                            |  +---------v----------+  |
-|        |   ops execution            |  |                    |  |
-|     <----------------------------------+                    |  |
-|        |                            |  |                    |  |
-|        |                            |  + ^------------------+  |
-|        |                            |    | request for ops     |
-|        |                            |    | (recover, dump)     |
-|        |                            |    |                     |
-|        |                            |  +-+------------------+  |
-|        |     health report          |  | health handler     |  |
-|        +------------------------------->                    |  |
-|        |                            |  +--------------------+  |
-|        |     health reporter create |                          |
-|        +---------------------------->                          |
-+--------+                            +--------------------------+
diff --git a/Documentation/networking/devlink-info-versions.rst b/Documentation/networking/devlink-info-versions.rst
deleted file mode 100644
index 4914f581b1fd..000000000000
--- a/Documentation/networking/devlink-info-versions.rst
+++ /dev/null
@@ -1,64 +0,0 @@
-.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-
-=====================
-Devlink info versions
-=====================
-
-board.id
-========
-
-Unique identifier of the board design.
-
-board.rev
-=========
-
-Board design revision.
-
-asic.id
-=======
-
-ASIC design identifier.
-
-asic.rev
-========
-
-ASIC design revision.
-
-board.manufacture
-=================
-
-An identifier of the company or the facility which produced the part.
-
-fw
-==
-
-Overall firmware version, often representing the collection of
-fw.mgmt, fw.app, etc.
-
-fw.mgmt
-=======
-
-Control unit firmware version. This firmware is responsible for house
-keeping tasks, PHY control etc. but not the packet-by-packet data path
-operation.
-
-fw.app
-======
-
-Data path microcode controlling high-speed packet processing.
-
-fw.undi
-=======
-
-UNDI software, may include the UEFI driver, firmware or both.
-
-fw.ncsi
-=======
-
-Version of the software responsible for supporting/handling the
-Network Controller Sideband Interface.
-
-fw.psid
-=======
-
-Unique identifier of the firmware parameter set.
diff --git a/Documentation/networking/devlink-params-bnxt.txt b/Documentation/networking/devlink-params-bnxt.txt
deleted file mode 100644
index 481aa303d5b4..000000000000
--- a/Documentation/networking/devlink-params-bnxt.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-enable_sriov		[DEVICE, GENERIC]
-			Configuration mode: Permanent
-
-ignore_ari		[DEVICE, GENERIC]
-			Configuration mode: Permanent
-
-msix_vec_per_pf_max	[DEVICE, GENERIC]
-			Configuration mode: Permanent
-
-msix_vec_per_pf_min	[DEVICE, GENERIC]
-			Configuration mode: Permanent
-
-gre_ver_check		[DEVICE, DRIVER-SPECIFIC]
-			Generic Routing Encapsulation (GRE) version check will
-			be enabled in the device. If disabled, device skips
-			version checking for incoming packets.
-			Type: Boolean
-			Configuration mode: Permanent
diff --git a/Documentation/networking/devlink-params-mlxsw.txt b/Documentation/networking/devlink-params-mlxsw.txt
deleted file mode 100644
index c63ea9fc7009..000000000000
--- a/Documentation/networking/devlink-params-mlxsw.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-fw_load_policy		[DEVICE, GENERIC]
-			Configuration mode: driverinit
-
-acl_region_rehash_interval	[DEVICE, DRIVER-SPECIFIC]
-			Sets an interval for periodic ACL region rehashes.
-			The value is in milliseconds, minimal value is "3000".
-			Value "0" disables the periodic work.
-			The first rehash will be run right after value is set.
-			Type: u32
-			Configuration mode: runtime
diff --git a/Documentation/networking/devlink-params-nfp.txt b/Documentation/networking/devlink-params-nfp.txt
deleted file mode 100644
index 43e4d4034865..000000000000
--- a/Documentation/networking/devlink-params-nfp.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-fw_load_policy		[DEVICE, GENERIC]
-			Configuration mode: permanent
-
-reset_dev_on_drv_probe	[DEVICE, GENERIC]
-			Configuration mode: permanent
diff --git a/Documentation/networking/devlink-params.txt b/Documentation/networking/devlink-params.txt
deleted file mode 100644
index ddba3e9b55b1..000000000000
--- a/Documentation/networking/devlink-params.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-Devlink configuration parameters
-================================
-Following is the list of configuration parameters via devlink interface.
-Each parameter can be generic or driver specific and are device level
-parameters.
-
-Note that the driver-specific files should contain the generic params
-they support to, with supported config modes.
-
-Each parameter can be set in different configuration modes:
-	runtime		- set while driver is running, no reset required.
-	driverinit	- applied while driver initializes, requires restart
-			driver by devlink reload command.
-	permanent	- written to device's non-volatile memory, hard reset
-			required.
-
-Following is the list of parameters:
-====================================
-enable_sriov		[DEVICE, GENERIC]
-			Enable Single Root I/O Virtualisation (SRIOV) in
-			the device.
-			Type: Boolean
-
-ignore_ari		[DEVICE, GENERIC]
-			Ignore Alternative Routing-ID Interpretation (ARI)
-			capability. If enabled, adapter will ignore ARI
-			capability even when platforms has the support
-			enabled and creates same number of partitions when
-			platform does not support ARI.
-			Type: Boolean
-
-msix_vec_per_pf_max	[DEVICE, GENERIC]
-			Provides the maximum number of MSIX interrupts that
-			a device can create. Value is same across all
-			physical functions (PFs) in the device.
-			Type: u32
-
-msix_vec_per_pf_min	[DEVICE, GENERIC]
-			Provides the minimum number of MSIX interrupts required
-			for the device initialization. Value is same across all
-			physical functions (PFs) in the device.
-			Type: u32
-
-fw_load_policy		[DEVICE, GENERIC]
-			Controls the device's firmware loading policy.
-			Valid values:
-			* DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER (0)
-			  Load firmware version preferred by the driver.
-			* DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH (1)
-			  Load firmware currently stored in flash.
-			* DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK (2)
-			  Load firmware currently available on host's disk.
-			Type: u8
-
-reset_dev_on_drv_probe	[DEVICE, GENERIC]
-			Controls the device's reset policy on driver probe.
-			Valid values:
-			* DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN (0)
-			  Unknown or invalid value.
-			* DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS (1)
-			  Always reset device on driver probe.
-			* DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER (2)
-			  Never reset device on driver probe.
-			* DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK (3)
-			  Reset only if device firmware can be found in the
-			  filesystem.
-			Type: u8
diff --git a/Documentation/networking/devlink-trap-netdevsim.rst b/Documentation/networking/devlink-trap-netdevsim.rst
deleted file mode 100644
index b721c9415473..000000000000
--- a/Documentation/networking/devlink-trap-netdevsim.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-======================
-Devlink Trap netdevsim
-======================
-
-Driver-specific Traps
-=====================
-
-.. list-table:: List of Driver-specific Traps Registered by ``netdevsim``
-   :widths: 5 5 90
-
-   * - Name
-     - Type
-     - Description
-   * - ``fid_miss``
-     - ``exception``
-     - When a packet enters the device it is classified to a filtering
-       indentifier (FID) based on the ingress port and VLAN. This trap is used
-       to trap packets for which a FID could not be found
diff --git a/Documentation/networking/devlink-trap.rst b/Documentation/networking/devlink-trap.rst
deleted file mode 100644
index 8e90a85f3bd5..000000000000
--- a/Documentation/networking/devlink-trap.rst
+++ /dev/null
@@ -1,209 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-============
-Devlink Trap
-============
-
-Background
-==========
-
-Devices capable of offloading the kernel's datapath and perform functions such
-as bridging and routing must also be able to send specific packets to the
-kernel (i.e., the CPU) for processing.
-
-For example, a device acting as a multicast-aware bridge must be able to send
-IGMP membership reports to the kernel for processing by the bridge module.
-Without processing such packets, the bridge module could never populate its
-MDB.
-
-As another example, consider a device acting as router which has received an IP
-packet with a TTL of 1. Upon routing the packet the device must send it to the
-kernel so that it will route it as well and generate an ICMP Time Exceeded
-error datagram. Without letting the kernel route such packets itself, utilities
-such as ``traceroute`` could never work.
-
-The fundamental ability of sending certain packets to the kernel for processing
-is called "packet trapping".
-
-Overview
-========
-
-The ``devlink-trap`` mechanism allows capable device drivers to register their
-supported packet traps with ``devlink`` and report trapped packets to
-``devlink`` for further analysis.
-
-Upon receiving trapped packets, ``devlink`` will perform a per-trap packets and
-bytes accounting and potentially report the packet to user space via a netlink
-event along with all the provided metadata (e.g., trap reason, timestamp, input
-port). This is especially useful for drop traps (see :ref:`Trap-Types`)
-as it allows users to obtain further visibility into packet drops that would
-otherwise be invisible.
-
-The following diagram provides a general overview of ``devlink-trap``::
-
-                                    Netlink event: Packet w/ metadata
-                                                   Or a summary of recent drops
-                                  ^
-                                  |
-         Userspace                |
-        +---------------------------------------------------+
-         Kernel                   |
-                                  |
-                          +-------+--------+
-                          |                |
-                          |  drop_monitor  |
-                          |                |
-                          +-------^--------+
-                                  |
-                                  |
-                                  |
-                             +----+----+
-                             |         |      Kernel's Rx path
-                             | devlink |      (non-drop traps)
-                             |         |
-                             +----^----+      ^
-                                  |           |
-                                  +-----------+
-                                  |
-                          +-------+-------+
-                          |               |
-                          | Device driver |
-                          |               |
-                          +-------^-------+
-         Kernel                   |
-        +---------------------------------------------------+
-         Hardware                 |
-                                  | Trapped packet
-                                  |
-                               +--+---+
-                               |      |
-                               | ASIC |
-                               |      |
-                               +------+
-
-.. _Trap-Types:
-
-Trap Types
-==========
-
-The ``devlink-trap`` mechanism supports the following packet trap types:
-
-  * ``drop``: Trapped packets were dropped by the underlying device. Packets
-    are only processed by ``devlink`` and not injected to the kernel's Rx path.
-    The trap action (see :ref:`Trap-Actions`) can be changed.
-  * ``exception``: Trapped packets were not forwarded as intended by the
-    underlying device due to an exception (e.g., TTL error, missing neighbour
-    entry) and trapped to the control plane for resolution. Packets are
-    processed by ``devlink`` and injected to the kernel's Rx path. Changing the
-    action of such traps is not allowed, as it can easily break the control
-    plane.
-
-.. _Trap-Actions:
-
-Trap Actions
-============
-
-The ``devlink-trap`` mechanism supports the following packet trap actions:
-
-  * ``trap``: The sole copy of the packet is sent to the CPU.
-  * ``drop``: The packet is dropped by the underlying device and a copy is not
-    sent to the CPU.
-
-Generic Packet Traps
-====================
-
-Generic packet traps are used to describe traps that trap well-defined packets
-or packets that are trapped due to well-defined conditions (e.g., TTL error).
-Such traps can be shared by multiple device drivers and their description must
-be added to the following table:
-
-.. list-table:: List of Generic Packet Traps
-   :widths: 5 5 90
-
-   * - Name
-     - Type
-     - Description
-   * - ``source_mac_is_multicast``
-     - ``drop``
-     - Traps incoming packets that the device decided to drop because of a
-       multicast source MAC
-   * - ``vlan_tag_mismatch``
-     - ``drop``
-     - Traps incoming packets that the device decided to drop in case of VLAN
-       tag mismatch: The ingress bridge port is not configured with a PVID and
-       the packet is untagged or prio-tagged
-   * - ``ingress_vlan_filter``
-     - ``drop``
-     - Traps incoming packets that the device decided to drop in case they are
-       tagged with a VLAN that is not configured on the ingress bridge port
-   * - ``ingress_spanning_tree_filter``
-     - ``drop``
-     - Traps incoming packets that the device decided to drop in case the STP
-       state of the ingress bridge port is not "forwarding"
-   * - ``port_list_is_empty``
-     - ``drop``
-     - Traps packets that the device decided to drop in case they need to be
-       flooded (e.g., unknown unicast, unregistered multicast) and there are
-       no ports the packets should be flooded to
-   * - ``port_loopback_filter``
-     - ``drop``
-     - Traps packets that the device decided to drop in case after layer 2
-       forwarding the only port from which they should be transmitted through
-       is the port from which they were received
-   * - ``blackhole_route``
-     - ``drop``
-     - Traps packets that the device decided to drop in case they hit a
-       blackhole route
-   * - ``ttl_value_is_too_small``
-     - ``exception``
-     - Traps unicast packets that should be forwarded by the device whose TTL
-       was decremented to 0 or less
-   * - ``tail_drop``
-     - ``drop``
-     - Traps packets that the device decided to drop because they could not be
-       enqueued to a transmission queue which is full
-
-Driver-specific Packet Traps
-============================
-
-Device drivers can register driver-specific packet traps, but these must be
-clearly documented. Such traps can correspond to device-specific exceptions and
-help debug packet drops caused by these exceptions. The following list includes
-links to the description of driver-specific traps registered by various device
-drivers:
-
-  * :doc:`/devlink-trap-netdevsim`
-
-Generic Packet Trap Groups
-==========================
-
-Generic packet trap groups are used to aggregate logically related packet
-traps. These groups allow the user to batch operations such as setting the trap
-action of all member traps. In addition, ``devlink-trap`` can report aggregated
-per-group packets and bytes statistics, in case per-trap statistics are too
-narrow. The description of these groups must be added to the following table:
-
-.. list-table:: List of Generic Packet Trap Groups
-   :widths: 10 90
-
-   * - Name
-     - Description
-   * - ``l2_drops``
-     - Contains packet traps for packets that were dropped by the device during
-       layer 2 forwarding (i.e., bridge)
-   * - ``l3_drops``
-     - Contains packet traps for packets that were dropped by the device or hit
-       an exception (e.g., TTL error) during layer 3 forwarding
-   * - ``buffer_drops``
-     - Contains packet traps for packets that were dropped by the device due to
-       an enqueue decision
-
-Testing
-=======
-
-See ``tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh`` for a
-test covering the core infrastructure. Test cases should be added for any new
-functionality.
-
-Device drivers should focus their tests on device-specific functionality, such
-as the triggering of supported packet traps.
diff --git a/Documentation/networking/devlink/bnxt.rst b/Documentation/networking/devlink/bnxt.rst
new file mode 100644
index 000000000000..82ef9ec46707
--- /dev/null
+++ b/Documentation/networking/devlink/bnxt.rst
@@ -0,0 +1,74 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+bnxt devlink support
+====================
+
+This document describes the devlink features implemented by the ``bnxt``
+device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+   * - Name
+     - Mode
+   * - ``enable_sriov``
+     - Permanent
+   * - ``ignore_ari``
+     - Permanent
+   * - ``msix_vec_per_pf_max``
+     - Permanent
+   * - ``msix_vec_per_pf_min``
+     - Permanent
+
+The ``bnxt`` driver also implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+   :widths: 5 5 5 85
+
+   * - Name
+     - Type
+     - Mode
+     - Description
+   * - ``gre_ver_check``
+     - Boolean
+     - Permanent
+     - Generic Routing Encapsulation (GRE) version check will be enabled in
+       the device. If disabled, the device will skip the version check for
+       incoming packets.
+
+Info versions
+=============
+
+The ``bnxt_en`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+      :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``asic.id``
+     - fixed
+     - ASIC design identifier
+   * - ``asic.rev``
+     - fixed
+     - ASIC design revision
+   * - ``fw.psid``
+     - stored, running
+     - Firmware parameter set version of the board
+   * - ``fw``
+     - stored, running
+     - Overall board firmware version
+   * - ``fw.app``
+     - stored, running
+     - Data path firmware version
+   * - ``fw.mgmt``
+     - stored, running
+     - Management firmware version
+   * - ``fw.roce``
+     - stored, running
+     - RoCE management firmware version
diff --git a/Documentation/networking/devlink/devlink-dpipe.rst b/Documentation/networking/devlink/devlink-dpipe.rst
new file mode 100644
index 000000000000..468fe1001b74
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-dpipe.rst
@@ -0,0 +1,252 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Devlink DPIPE
+=============
+
+Background
+==========
+
+While performing the hardware offloading process, much of the hardware
+specifics cannot be presented. These details are useful for debugging, and
+``devlink-dpipe`` provides a standardized way to provide visibility into the
+offloading process.
+
+For example, the routing longest prefix match (LPM) algorithm used by the
+Linux kernel may differ from the hardware implementation. The pipeline debug
+API (DPIPE) is aimed at providing the user visibility into the ASIC's
+pipeline in a generic way.
+
+The hardware offload process is expected to be done in a way that the user
+should not be able to distinguish between the hardware vs. software
+implementation. In this process, hardware specifics are neglected. In
+reality those details can have lots of meaning and should be exposed in some
+standard way.
+
+This problem is made even more complex when one wishes to offload the
+control path of the whole networking stack to a switch ASIC. Due to
+differences in the hardware and software models some processes cannot be
+represented correctly.
+
+One example is the kernel's LPM algorithm which in many cases differs
+greatly to the hardware implementation. The configuration API is the same,
+but one cannot rely on the Forward Information Base (FIB) to look like the
+Level Path Compression trie (LPC-trie) in hardware.
+
+In many situations trying to analyze systems failure solely based on the
+kernel's dump may not be enough. By combining this data with complementary
+information about the underlying hardware, this debugging can be made
+easier; additionally, the information can be useful when debugging
+performance issues.
+
+Overview
+========
+
+The ``devlink-dpipe`` interface closes this gap. The hardware's pipeline is
+modeled as a graph of match/action tables. Each table represents a specific
+hardware block. This model is not new, first being used by the P4 language.
+
+Traditionally it has been used as an alternative model for hardware
+configuration, but the ``devlink-dpipe`` interface uses it for visibility
+purposes as a standard complementary tool. The system's view from
+``devlink-dpipe`` should change according to the changes done by the
+standard configuration tools.
+
+For example, it’s quiet common to  implement Access Control Lists (ACL)
+using Ternary Content Addressable Memory (TCAM). The TCAM memory can be
+divided into TCAM regions. Complex TC filters can have multiple rules with
+different priorities and different lookup keys. On the other hand hardware
+TCAM regions have a predefined lookup key. Offloading the TC filter rules
+using TCAM engine can result in multiple TCAM regions being interconnected
+in a chain (which may affect the data path latency). In response to a new TC
+filter new tables should be created describing those regions.
+
+Model
+=====
+
+The ``DPIPE`` model introduces several objects:
+
+  * headers
+  * tables
+  * entries
+
+A ``header`` describes packet formats and provides names for fields within
+the packet. A ``table`` describes hardware blocks. An ``entry`` describes
+the actual content of a specific table.
+
+The hardware pipeline is not port specific, but rather describes the whole
+ASIC. Thus it is tied to the top of the ``devlink`` infrastructure.
+
+Drivers can register and unregister tables at run time, in order to support
+dynamic behavior. This dynamic behavior is mandatory for describing hardware
+blocks like TCAM regions which can be allocated and freed dynamically.
+
+``devlink-dpipe`` generally is not intended for configuration. The exception
+is hardware counting for a specific table.
+
+The following commands are used to obtain the ``dpipe`` objects from
+userspace:
+
+  * ``table_get``: Receive a table's description.
+  * ``headers_get``: Receive a device's supported headers.
+  * ``entries_get``: Receive a table's current entries.
+  * ``counters_set``: Enable or disable counters on a table.
+
+Table
+-----
+
+The driver should implement the following operations for each table:
+
+  * ``matches_dump``: Dump the supported matches.
+  * ``actions_dump``: Dump the supported actions.
+  * ``entries_dump``: Dump the actual content of the table.
+  * ``counters_set_update``: Synchronize hardware with counters enabled or
+    disabled.
+
+Header/Field
+------------
+
+In a similar way to P4 headers and fields are used to describe a table's
+behavior. There is a slight difference between the standard protocol headers
+and specific ASIC metadata. The protocol headers should be declared in the
+``devlink`` core API. On the other hand ASIC meta data is driver specific
+and should be defined in the driver. Additionally, each driver-specific
+devlink documentation file should document the driver-specific ``dpipe``
+headers it implements. The headers and fields are identified by enumeration.
+
+In order to provide further visibility some ASIC metadata fields could be
+mapped to kernel objects. For example, internal router interface indexes can
+be directly mapped to the net device ifindex. FIB table indexes used by
+different Virtual Routing and Forwarding (VRF) tables can be mapped to
+internal routing table indexes.
+
+Match
+-----
+
+Matches are kept primitive and close to hardware operation. Match types like
+LPM are not supported due to the fact that this is exactly a process we wish
+to describe in full detail. Example of matches:
+
+  * ``field_exact``: Exact match on a specific field.
+  * ``field_exact_mask``: Exact match on a specific field after masking.
+  * ``field_range``: Match on a specific range.
+
+The id's of the header and the field should be specified in order to
+identify the specific field. Furthermore, the header index should be
+specified in order to distinguish multiple headers of the same type in a
+packet (tunneling).
+
+Action
+------
+
+Similar to match, the actions are kept primitive and close to hardware
+operation. For example:
+
+  * ``field_modify``: Modify the field value.
+  * ``field_inc``: Increment the field value.
+  * ``push_header``: Add a header.
+  * ``pop_header``: Remove a header.
+
+Entry
+-----
+
+Entries of a specific table can be dumped on demand. Each eentry is
+identified with an index and its properties are described by a list of
+match/action values and specific counter. By dumping the tables content the
+interactions between tables can be resolved.
+
+Abstraction Example
+===================
+
+The following is an example of the abstraction model of the L3 part of
+Mellanox Spectrum ASIC. The blocks are described in the order they appear in
+the pipeline. The table sizes in the following examples are not real
+hardware sizes and are provided for demonstration purposes.
+
+LPM
+---
+
+The LPM algorithm can be implemented as a list of hash tables. Each hash
+table contains routes with the same prefix length. The root of the list is
+/32, and in case of a miss the hardware will continue to the next hash
+table. The depth of the search will affect the data path latency.
+
+In case of a hit the entry contains information about the next stage of the
+pipeline which resolves the MAC address. The next stage can be either local
+host table for directly connected routes, or adjacency table for next-hops.
+The ``meta.lpm_prefix`` field is used to connect two LPM tables.
+
+.. code::
+
+    table lpm_prefix_16 {
+      size: 4096,
+      counters_enabled: true,
+      match: { meta.vr_id: exact,
+               ipv4.dst_addr: exact_mask,
+               ipv6.dst_addr: exact_mask,
+               meta.lpm_prefix: exact },
+      action: { meta.adj_index: set,
+                meta.adj_group_size: set,
+                meta.rif_port: set,
+                meta.lpm_prefix: set },
+    }
+
+Local Host
+----------
+
+In the case of local routes the LPM lookup already resolves the egress
+router interface (RIF), yet the exact MAC address is not known. The local
+host table is a hash table combining the output interface id with
+destination IP address as a key. The result is the MAC address.
+
+.. code::
+
+    table local_host {
+      size: 4096,
+      counters_enabled: true,
+      match: { meta.rif_port: exact,
+               ipv4.dst_addr: exact},
+      action: { ethernet.daddr: set }
+    }
+
+Adjacency
+---------
+
+In case of remote routes this table does the ECMP. The LPM lookup results in
+ECMP group size and index that serves as a global offset into this table.
+Concurrently a hash of the packet is generated. Based on the ECMP group size
+and the packet's hash a local offset is generated. Multiple LPM entries can
+point to the same adjacency group.
+
+.. code::
+
+    table adjacency {
+      size: 4096,
+      counters_enabled: true,
+      match: { meta.adj_index: exact,
+               meta.adj_group_size: exact,
+               meta.packet_hash_index: exact },
+      action: { ethernet.daddr: set,
+                meta.erif: set }
+    }
+
+ERIF
+----
+
+In case the egress RIF and destination MAC have been resolved by previous
+tables this table does multiple operations like TTL decrease and MTU check.
+Then the decision of forward/drop is taken and the port L3 statistics are
+updated based on the packet's type (broadcast, unicast, multicast).
+
+.. code::
+
+    table erif {
+      size: 800,
+      counters_enabled: true,
+      match: { meta.rif_port: exact,
+               meta.is_l3_unicast: exact,
+               meta.is_l3_broadcast: exact,
+               meta.is_l3_multicast, exact },
+      action: { meta.l3_drop: set,
+                meta.l3_forward: set }
+    }
diff --git a/Documentation/networking/devlink/devlink-health.rst b/Documentation/networking/devlink/devlink-health.rst
new file mode 100644
index 000000000000..0c99b11f05f9
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-health.rst
@@ -0,0 +1,114 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Devlink Health
+==============
+
+Background
+==========
+
+The ``devlink`` health mechanism is targeted for Real Time Alerting, in
+order to know when something bad happened to a PCI device.
+
+  * Provide alert debug information.
+  * Self healing.
+  * If problem needs vendor support, provide a way to gather all needed
+    debugging information.
+
+Overview
+========
+
+The main idea is to unify and centralize driver health reports in the
+generic ``devlink`` instance and allow the user to set different
+attributes of the health reporting and recovery procedures.
+
+The ``devlink`` health reporter:
+Device driver creates a "health reporter" per each error/health type.
+Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
+or unknown (driver specific).
+For each registered health reporter a driver can issue error/health reports
+asynchronously. All health reports handling is done by ``devlink``.
+Device driver can provide specific callbacks for each "health reporter", e.g.:
+
+  * Recovery procedures
+  * Diagnostics procedures
+  * Object dump procedures
+  * OOB initial parameters
+
+Different parts of the driver can register different types of health reporters
+with different handlers.
+
+Actions
+=======
+
+Once an error is reported, devlink health will perform the following actions:
+
+  * A log is being send to the kernel trace events buffer
+  * Health status and statistics are being updated for the reporter instance
+  * Object dump is being taken and saved at the reporter instance (as long as
+    there is no other dump which is already stored)
+  * Auto recovery attempt is being done. Depends on:
+    - Auto-recovery configuration
+    - Grace period vs. time passed since last recover
+
+User Interface
+==============
+
+User can access/change each reporter's parameters and driver specific callbacks
+via ``devlink``, e.g per error type (per health reporter):
+
+  * Configure reporter's generic parameters (like: disable/enable auto recovery)
+  * Invoke recovery procedure
+  * Run diagnostics
+  * Object dump
+
+.. list-table:: List of devlink health interfaces
+   :widths: 10 90
+
+   * - Name
+     - Description
+   * - ``DEVLINK_CMD_HEALTH_REPORTER_GET``
+     - Retrieves status and configuration info per DEV and reporter.
+   * - ``DEVLINK_CMD_HEALTH_REPORTER_SET``
+     - Allows reporter-related configuration setting.
+   * - ``DEVLINK_CMD_HEALTH_REPORTER_RECOVER``
+     - Triggers a reporter's recovery procedure.
+   * - ``DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE``
+     - Retrieves diagnostics data from a reporter on a device.
+   * - ``DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET``
+     - Retrieves the last stored dump. Devlink health
+       saves a single dump. If an dump is not already stored by the devlink
+       for this reporter, devlink generates a new dump.
+       dump output is defined by the reporter.
+   * - ``DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR``
+     - Clears the last saved dump file for the specified reporter.
+
+The following diagram provides a general overview of ``devlink-health``::
+
+                                                   netlink
+                                          +--------------------------+
+                                          |                          |
+                                          |            +             |
+                                          |            |             |
+                                          +--------------------------+
+                                                       |request for ops
+                                                       |(diagnose,
+     mlx5_core                             devlink     |recover,
+                                                       |dump)
+    +--------+                            +--------------------------+
+    |        |                            |    reporter|             |
+    |        |                            |  +---------v----------+  |
+    |        |   ops execution            |  |                    |  |
+    |     <----------------------------------+                    |  |
+    |        |                            |  |                    |  |
+    |        |                            |  + ^------------------+  |
+    |        |                            |    | request for ops     |
+    |        |                            |    | (recover, dump)     |
+    |        |                            |    |                     |
+    |        |                            |  +-+------------------+  |
+    |        |     health report          |  | health handler     |  |
+    |        +------------------------------->                    |  |
+    |        |                            |  +--------------------+  |
+    |        |     health reporter create |                          |
+    |        +---------------------------->                          |
+    +--------+                            +--------------------------+
diff --git a/Documentation/networking/devlink/devlink-info.rst b/Documentation/networking/devlink/devlink-info.rst
new file mode 100644
index 000000000000..70981dd1b981
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-info.rst
@@ -0,0 +1,100 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+============
+Devlink Info
+============
+
+The ``devlink-info`` mechanism enables device drivers to report device
+information in a generic fashion. It is extensible, and enables exporting
+even device or driver specific information.
+
+devlink supports representing the following types of versions
+
+.. list-table:: List of version types
+   :widths: 5 95
+
+   * - Type
+     - Description
+   * - ``fixed``
+     - Represents fixed versions, which cannot change. For example,
+       component identifiers or the board version reported in the PCI VPD.
+   * - ``running``
+     - Represents the version of the currently running component. For
+       example the running version of firmware. These versions generally
+       only update after a reboot.
+   * - ``stored``
+     - Represents the version of a component as stored, such as after a
+       flash update. Stored values should update to reflect changes in the
+       flash even if a reboot has not yet occurred.
+
+Generic Versions
+================
+
+It is expected that drivers use the following generic names for exporting
+version information. Other information may be exposed using driver-specific
+names, but these should be documented in the driver-specific file.
+
+board.id
+--------
+
+Unique identifier of the board design.
+
+board.rev
+---------
+
+Board design revision.
+
+asic.id
+-------
+
+ASIC design identifier.
+
+asic.rev
+--------
+
+ASIC design revision.
+
+board.manufacture
+-----------------
+
+An identifier of the company or the facility which produced the part.
+
+fw
+--
+
+Overall firmware version, often representing the collection of
+fw.mgmt, fw.app, etc.
+
+fw.mgmt
+-------
+
+Control unit firmware version. This firmware is responsible for house
+keeping tasks, PHY control etc. but not the packet-by-packet data path
+operation.
+
+fw.app
+------
+
+Data path microcode controlling high-speed packet processing.
+
+fw.undi
+-------
+
+UNDI software, may include the UEFI driver, firmware or both.
+
+fw.ncsi
+-------
+
+Version of the software responsible for supporting/handling the
+Network Controller Sideband Interface.
+
+fw.psid
+-------
+
+Unique identifier of the firmware parameter set.
+
+fw.roce
+-------
+
+RoCE firmware version which is responsible for handling roce
+management.
diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst
new file mode 100644
index 000000000000..da2f85c0fa21
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-params.rst
@@ -0,0 +1,108 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Devlink Params
+==============
+
+``devlink`` provides capability for a driver to expose device parameters for low
+level device functionality. Since devlink can operate at the device-wide
+level, it can be used to provide configuration that may affect multiple
+ports on a single device.
+
+This document describes a number of generic parameters that are supported
+across multiple drivers. Each driver is also free to add their own
+parameters. Each driver must document the specific parameters they support,
+whether generic or not.
+
+Configuration modes
+===================
+
+Parameters may be set in different configuration modes.
+
+.. list-table:: Possible configuration modes
+   :widths: 5 90
+
+   * - Name
+     - Description
+   * - ``runtime``
+     - set while the driver is running, and takes effect immediately. No
+       reset is required.
+   * - ``driverinit``
+     - applied while the driver initializes. Requires the user to restart
+       the driver using the ``devlink`` reload command.
+   * - ``permanent``
+     - written to the device's non-volatile memory. A hard reset is required
+       for it to take effect.
+
+Reloading
+---------
+
+In order for ``driverinit`` parameters to take effect, the driver must
+support reloading via the ``devlink-reload`` command. This command will
+request a reload of the device driver.
+
+Generic configuration parameters
+================================
+The following is a list of generic configuration parameters that drivers may
+add. Use of generic parameters is preferred over each driver creating their
+own name.
+
+.. list-table:: List of generic parameters
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``enable_sriov``
+     - Boolean
+     - Enable Single Root I/O Virtualization (SRIOV) in the device.
+   * - ``ignore_ari``
+     - Boolean
+     - Ignore Alternative Routing-ID Interpretation (ARI) capability. If
+       enabled, the adapter will ignore ARI capability even when the
+       platform has support enabled. The device will create the same number
+       of partitions as when the platform does not support ARI.
+   * - ``msix_vec_per_pf_max``
+     - u32
+     - Provides the maximum number of MSI-X interrupts that a device can
+       create. Value is the same across all physical functions (PFs) in the
+       device.
+   * - ``msix_vec_per_pf_min``
+     - u32
+     - Provides the minimum number of MSI-X interrupts required for the
+       device to initialize. Value is the same across all physical functions
+       (PFs) in the device.
+   * - ``fw_load_policy``
+     - u8
+     - Control the device's firmware loading policy.
+        - ``DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER`` (0)
+          Load firmware version preferred by the driver.
+        - ``DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH`` (1)
+          Load firmware currently stored in flash.
+        - ``DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK`` (2)
+          Load firmware currently available on host's disk.
+   * - ``reset_dev_on_drv_probe``
+     - u8
+     - Controls the device's reset policy on driver probe.
+        - ``DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN`` (0)
+          Unknown or invalid value.
+        - ``DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS`` (1)
+          Always reset device on driver probe.
+        - ``DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER`` (2)
+          Never reset device on driver probe.
+        - ``DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK`` (3)
+          Reset the device only if firmware can be found in the filesystem.
+   * - ``enable_roce``
+     - Boolean
+     - Enable handling of RoCE traffic in the device.
+   * - ``internal_err_reset``
+     - Boolean
+     - When enabled, the device driver will reset the device on internal
+       errors.
+   * - ``max_macs``
+     - u32
+     - Specifies the maximum number of MAC addresses per ethernet port of
+       this device.
+   * - ``region_snapshot_enable``
+     - Boolean
+     - Enable capture of ``devlink-region`` snapshots.
diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst
new file mode 100644
index 000000000000..1a7683e7acb2
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-region.rst
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Devlink Region
+==============
+
+``devlink`` regions enable access to driver defined address regions using
+devlink.
+
+Each device can create and register its own supported address regions. The
+region can then be accessed via the devlink region interface.
+
+Region snapshots are collected by the driver, and can be accessed via read
+or dump commands. This allows future analysis on the created snapshots.
+Regions may optionally support triggering snapshots on demand.
+
+The major benefit to creating a region is to provide access to internal
+address regions that are otherwise inaccessible to the user.
+
+Regions may also be used to provide an additional way to debug complex error
+states, but see also :doc:`devlink-health`
+
+example usage
+-------------
+
+.. code:: shell
+
+    $ devlink region help
+    $ devlink region show [ DEV/REGION ]
+    $ devlink region del DEV/REGION snapshot SNAPSHOT_ID
+    $ devlink region dump DEV/REGION [ snapshot SNAPSHOT_ID ]
+    $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ]
+            address ADDRESS length length
+
+    # Show all of the exposed regions with region sizes:
+    $ devlink region show
+    pci/0000:00:05.0/cr-space: size 1048576 snapshot [1 2]
+    pci/0000:00:05.0/fw-health: size 64 snapshot [1 2]
+
+    # Delete a snapshot using:
+    $ devlink region del pci/0000:00:05.0/cr-space snapshot 1
+
+    # Trigger (request) a snapshot be taken:
+    $ devlink region trigger pci/0000:00:05.0/cr-space
+
+    # Dump a snapshot:
+    $ devlink region dump pci/0000:00:05.0/fw-health snapshot 1
+    0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30
+    0000000000000010 0000 0000 ffff ff04 0029 8c00 0028 8cc8
+    0000000000000020 0016 0bb8 0016 1720 0000 0000 c00f 3ffc
+    0000000000000030 bada cce5 bada cce5 bada cce5 bada cce5
+
+    # Read a specific part of a snapshot:
+    $ devlink region read pci/0000:00:05.0/fw-health snapshot 1 address 0
+            length 16
+    0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30
+
+As regions are likely very device or driver specific, no generic regions are
+defined. See the driver-specific documentation files for information on the
+specific regions a driver supports.
diff --git a/Documentation/networking/devlink/devlink-resource.rst b/Documentation/networking/devlink/devlink-resource.rst
new file mode 100644
index 000000000000..93e92d2f0752
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-resource.rst
@@ -0,0 +1,62 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+Devlink Resource
+================
+
+``devlink`` provides the ability for drivers to register resources, which
+can allow administrators to see the device restrictions for a given
+resource, as well as how much of the given resource is currently
+in use. Additionally, these resources can optionally have configurable size.
+This could enable the administrator to limit the number of resources that
+are used.
+
+For example, the ``netdevsim`` driver enables ``/IPv4/fib`` and
+``/IPv4/fib-rules`` as resources to limit the number of IPv4 FIB entries and
+rules for a given device.
+
+Resource Ids
+============
+
+Each resource is represented by an id, and contains information about its
+current size and related sub resources. To access a sub resource, you
+specify the path of the resource. For example ``/IPv4/fib`` is the id for
+the ``fib`` sub-resource under the ``IPv4`` resource.
+
+example usage
+-------------
+
+The resources exposed by the driver can be observed, for example:
+
+.. code:: shell
+
+    $devlink resource show pci/0000:03:00.0
+    pci/0000:03:00.0:
+      name kvd size 245760 unit entry
+        resources:
+          name linear size 98304 occ 0 unit entry size_min 0 size_max 147456 size_gran 128
+          name hash_double size 60416 unit entry size_min 32768 size_max 180224 size_gran 128
+          name hash_single size 87040 unit entry size_min 65536 size_max 212992 size_gran 128
+
+Some resource's size can be changed. Examples:
+
+.. code:: shell
+
+    $devlink resource set pci/0000:03:00.0 path /kvd/hash_single size 73088
+    $devlink resource set pci/0000:03:00.0 path /kvd/hash_double size 74368
+
+The changes do not apply immediately, this can be validated by the 'size_new'
+attribute, which represents the pending change in size. For example:
+
+.. code:: shell
+
+    $devlink resource show pci/0000:03:00.0
+    pci/0000:03:00.0:
+      name kvd size 245760 unit entry size_valid false
+      resources:
+        name linear size 98304 size_new 147456 occ 0 unit entry size_min 0 size_max 147456 size_gran 128
+        name hash_double size 60416 unit entry size_min 32768 size_max 180224 size_gran 128
+        name hash_single size 87040 unit entry size_min 65536 size_max 212992 size_gran 128
+
+Note that changes in resource size may require a device reload to properly
+take effect.
diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst
new file mode 100644
index 000000000000..47a429bb8658
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-trap.rst
@@ -0,0 +1,289 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Devlink Trap
+============
+
+Background
+==========
+
+Devices capable of offloading the kernel's datapath and perform functions such
+as bridging and routing must also be able to send specific packets to the
+kernel (i.e., the CPU) for processing.
+
+For example, a device acting as a multicast-aware bridge must be able to send
+IGMP membership reports to the kernel for processing by the bridge module.
+Without processing such packets, the bridge module could never populate its
+MDB.
+
+As another example, consider a device acting as router which has received an IP
+packet with a TTL of 1. Upon routing the packet the device must send it to the
+kernel so that it will route it as well and generate an ICMP Time Exceeded
+error datagram. Without letting the kernel route such packets itself, utilities
+such as ``traceroute`` could never work.
+
+The fundamental ability of sending certain packets to the kernel for processing
+is called "packet trapping".
+
+Overview
+========
+
+The ``devlink-trap`` mechanism allows capable device drivers to register their
+supported packet traps with ``devlink`` and report trapped packets to
+``devlink`` for further analysis.
+
+Upon receiving trapped packets, ``devlink`` will perform a per-trap packets and
+bytes accounting and potentially report the packet to user space via a netlink
+event along with all the provided metadata (e.g., trap reason, timestamp, input
+port). This is especially useful for drop traps (see :ref:`Trap-Types`)
+as it allows users to obtain further visibility into packet drops that would
+otherwise be invisible.
+
+The following diagram provides a general overview of ``devlink-trap``::
+
+                                    Netlink event: Packet w/ metadata
+                                                   Or a summary of recent drops
+                                  ^
+                                  |
+         Userspace                |
+        +---------------------------------------------------+
+         Kernel                   |
+                                  |
+                          +-------+--------+
+                          |                |
+                          |  drop_monitor  |
+                          |                |
+                          +-------^--------+
+                                  |
+                                  |
+                                  |
+                             +----+----+
+                             |         |      Kernel's Rx path
+                             | devlink |      (non-drop traps)
+                             |         |
+                             +----^----+      ^
+                                  |           |
+                                  +-----------+
+                                  |
+                          +-------+-------+
+                          |               |
+                          | Device driver |
+                          |               |
+                          +-------^-------+
+         Kernel                   |
+        +---------------------------------------------------+
+         Hardware                 |
+                                  | Trapped packet
+                                  |
+                               +--+---+
+                               |      |
+                               | ASIC |
+                               |      |
+                               +------+
+
+.. _Trap-Types:
+
+Trap Types
+==========
+
+The ``devlink-trap`` mechanism supports the following packet trap types:
+
+  * ``drop``: Trapped packets were dropped by the underlying device. Packets
+    are only processed by ``devlink`` and not injected to the kernel's Rx path.
+    The trap action (see :ref:`Trap-Actions`) can be changed.
+  * ``exception``: Trapped packets were not forwarded as intended by the
+    underlying device due to an exception (e.g., TTL error, missing neighbour
+    entry) and trapped to the control plane for resolution. Packets are
+    processed by ``devlink`` and injected to the kernel's Rx path. Changing the
+    action of such traps is not allowed, as it can easily break the control
+    plane.
+
+.. _Trap-Actions:
+
+Trap Actions
+============
+
+The ``devlink-trap`` mechanism supports the following packet trap actions:
+
+  * ``trap``: The sole copy of the packet is sent to the CPU.
+  * ``drop``: The packet is dropped by the underlying device and a copy is not
+    sent to the CPU.
+
+Generic Packet Traps
+====================
+
+Generic packet traps are used to describe traps that trap well-defined packets
+or packets that are trapped due to well-defined conditions (e.g., TTL error).
+Such traps can be shared by multiple device drivers and their description must
+be added to the following table:
+
+.. list-table:: List of Generic Packet Traps
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``source_mac_is_multicast``
+     - ``drop``
+     - Traps incoming packets that the device decided to drop because of a
+       multicast source MAC
+   * - ``vlan_tag_mismatch``
+     - ``drop``
+     - Traps incoming packets that the device decided to drop in case of VLAN
+       tag mismatch: The ingress bridge port is not configured with a PVID and
+       the packet is untagged or prio-tagged
+   * - ``ingress_vlan_filter``
+     - ``drop``
+     - Traps incoming packets that the device decided to drop in case they are
+       tagged with a VLAN that is not configured on the ingress bridge port
+   * - ``ingress_spanning_tree_filter``
+     - ``drop``
+     - Traps incoming packets that the device decided to drop in case the STP
+       state of the ingress bridge port is not "forwarding"
+   * - ``port_list_is_empty``
+     - ``drop``
+     - Traps packets that the device decided to drop in case they need to be
+       flooded (e.g., unknown unicast, unregistered multicast) and there are
+       no ports the packets should be flooded to
+   * - ``port_loopback_filter``
+     - ``drop``
+     - Traps packets that the device decided to drop in case after layer 2
+       forwarding the only port from which they should be transmitted through
+       is the port from which they were received
+   * - ``blackhole_route``
+     - ``drop``
+     - Traps packets that the device decided to drop in case they hit a
+       blackhole route
+   * - ``ttl_value_is_too_small``
+     - ``exception``
+     - Traps unicast packets that should be forwarded by the device whose TTL
+       was decremented to 0 or less
+   * - ``tail_drop``
+     - ``drop``
+     - Traps packets that the device decided to drop because they could not be
+       enqueued to a transmission queue which is full
+   * - ``non_ip``
+     - ``drop``
+     - Traps packets that the device decided to drop because they need to
+       undergo a layer 3 lookup, but are not IP or MPLS packets
+   * - ``uc_dip_over_mc_dmac``
+     - ``drop``
+     - Traps packets that the device decided to drop because they need to be
+       routed and they have a unicast destination IP and a multicast destination
+       MAC
+   * - ``dip_is_loopback_address``
+     - ``drop``
+     - Traps packets that the device decided to drop because they need to be
+       routed and their destination IP is the loopback address (i.e., 127.0.0.0/8
+       and ::1/128)
+   * - ``sip_is_mc``
+     - ``drop``
+     - Traps packets that the device decided to drop because they need to be
+       routed and their source IP is multicast (i.e., 224.0.0.0/8 and ff::/8)
+   * - ``sip_is_loopback_address``
+     - ``drop``
+     - Traps packets that the device decided to drop because they need to be
+       routed and their source IP is the loopback address (i.e., 127.0.0.0/8 and ::1/128)
+   * - ``ip_header_corrupted``
+     - ``drop``
+     - Traps packets that the device decided to drop because they need to be
+       routed and their IP header is corrupted: wrong checksum, wrong IP version
+       or too short Internet Header Length (IHL)
+   * - ``ipv4_sip_is_limited_bc``
+     - ``drop``
+     - Traps packets that the device decided to drop because they need to be
+       routed and their source IP is limited broadcast (i.e., 255.255.255.255/32)
+   * - ``ipv6_mc_dip_reserved_scope``
+     - ``drop``
+     - Traps IPv6 packets that the device decided to drop because they need to
+       be routed and their IPv6 multicast destination IP has a reserved scope
+       (i.e., ffx0::/16)
+   * - ``ipv6_mc_dip_interface_local_scope``
+     - ``drop``
+     - Traps IPv6 packets that the device decided to drop because they need to
+       be routed and their IPv6 multicast destination IP has an interface-local scope
+       (i.e., ffx1::/16)
+   * - ``mtu_value_is_too_small``
+     - ``exception``
+     - Traps packets that should have been routed by the device, but were bigger
+       than the MTU of the egress interface
+   * - ``unresolved_neigh``
+     - ``exception``
+     - Traps packets that did not have a matching IP neighbour after routing
+   * - ``mc_reverse_path_forwarding``
+     - ``exception``
+     - Traps multicast IP packets that failed reverse-path forwarding (RPF)
+       check during multicast routing
+   * - ``reject_route``
+     - ``exception``
+     - Traps packets that hit reject routes (i.e., "unreachable", "prohibit")
+   * - ``ipv4_lpm_miss``
+     - ``exception``
+     - Traps unicast IPv4 packets that did not match any route
+   * - ``ipv6_lpm_miss``
+     - ``exception``
+     - Traps unicast IPv6 packets that did not match any route
+   * - ``non_routable_packet``
+     - ``drop``
+     - Traps packets that the device decided to drop because they are not
+       supposed to be routed. For example, IGMP queries can be flooded by the
+       device in layer 2 and reach the router. Such packets should not be
+       routed and instead dropped
+   * - ``decap_error``
+     - ``exception``
+     - Traps NVE and IPinIP packets that the device decided to drop because of
+       failure during decapsulation (e.g., packet being too short, reserved
+       bits set in VXLAN header)
+   * - ``overlay_smac_is_mc``
+     - ``drop``
+     - Traps NVE packets that the device decided to drop because their overlay
+       source MAC is multicast
+
+Driver-specific Packet Traps
+============================
+
+Device drivers can register driver-specific packet traps, but these must be
+clearly documented. Such traps can correspond to device-specific exceptions and
+help debug packet drops caused by these exceptions. The following list includes
+links to the description of driver-specific traps registered by various device
+drivers:
+
+  * :doc:`netdevsim`
+  * :doc:`mlxsw`
+
+Generic Packet Trap Groups
+==========================
+
+Generic packet trap groups are used to aggregate logically related packet
+traps. These groups allow the user to batch operations such as setting the trap
+action of all member traps. In addition, ``devlink-trap`` can report aggregated
+per-group packets and bytes statistics, in case per-trap statistics are too
+narrow. The description of these groups must be added to the following table:
+
+.. list-table:: List of Generic Packet Trap Groups
+   :widths: 10 90
+
+   * - Name
+     - Description
+   * - ``l2_drops``
+     - Contains packet traps for packets that were dropped by the device during
+       layer 2 forwarding (i.e., bridge)
+   * - ``l3_drops``
+     - Contains packet traps for packets that were dropped by the device or hit
+       an exception (e.g., TTL error) during layer 3 forwarding
+   * - ``buffer_drops``
+     - Contains packet traps for packets that were dropped by the device due to
+       an enqueue decision
+   * - ``tunnel_drops``
+     - Contains packet traps for packets that were dropped by the device during
+       tunnel encapsulation / decapsulation
+
+Testing
+=======
+
+See ``tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh`` for a
+test covering the core infrastructure. Test cases should be added for any new
+functionality.
+
+Device drivers should focus their tests on device-specific functionality, such
+as the triggering of supported packet traps.
diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst
new file mode 100644
index 000000000000..087ff54d53fc
--- /dev/null
+++ b/Documentation/networking/devlink/index.rst
@@ -0,0 +1,42 @@
+Linux Devlink Documentation
+===========================
+
+devlink is an API to expose device information and resources not directly
+related to any device class, such as chip-wide/switch-ASIC-wide configuration.
+
+Interface documentation
+-----------------------
+
+The following pages describe various interfaces available through devlink in
+general.
+
+.. toctree::
+   :maxdepth: 1
+
+   devlink-dpipe
+   devlink-health
+   devlink-info
+   devlink-params
+   devlink-region
+   devlink-resource
+   devlink-trap
+
+Driver-specific documentation
+-----------------------------
+
+Each driver that implements ``devlink`` is expected to document what
+parameters, info versions, and other features it supports.
+
+.. toctree::
+   :maxdepth: 1
+
+   bnxt
+   ionic
+   mlx4
+   mlx5
+   mlxsw
+   mv88e6xxx
+   netdevsim
+   nfp
+   qed
+   ti-cpsw-switch
diff --git a/Documentation/networking/devlink/ionic.rst b/Documentation/networking/devlink/ionic.rst
new file mode 100644
index 000000000000..48da9c92d584
--- /dev/null
+++ b/Documentation/networking/devlink/ionic.rst
@@ -0,0 +1,29 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+ionic devlink support
+=====================
+
+This document describes the devlink features implemented by the ``ionic``
+device driver.
+
+Info versions
+=============
+
+The ``ionic`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``fw``
+     - running
+     - Version of firmware running on the device
+   * - ``asic.id``
+     - fixed
+     - The ASIC type for this device
+   * - ``asic.rev``
+     - fixed
+     - The revision of the ASIC for this device
diff --git a/Documentation/networking/devlink/mlx4.rst b/Documentation/networking/devlink/mlx4.rst
new file mode 100644
index 000000000000..7b2d17ea5471
--- /dev/null
+++ b/Documentation/networking/devlink/mlx4.rst
@@ -0,0 +1,56 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+mlx4 devlink support
+====================
+
+This document describes the devlink features implemented by the ``mlx4``
+device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+   * - Name
+     - Mode
+   * - ``internal_err_reset``
+     - driverinit, runtime
+   * - ``max_macs``
+     - driverinit
+   * - ``region_snapshot_enable``
+     - driverinit, runtime
+
+The ``mlx4`` driver also implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+   :widths: 5 5 5 85
+
+   * - Name
+     - Type
+     - Mode
+     - Description
+   * - ``enable_64b_cqe_eqe``
+     - Boolean
+     - driverinit
+     - Enable 64 byte CQEs/EQEs, if the FW supports it.
+   * - ``enable_4k_uar``
+     - Boolean
+     - driverinit
+     - Enable using the 4k UAR.
+
+The ``mlx4`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
+
+Regions
+=======
+
+The ``mlx4`` driver supports dumping the firmware PCI crspace and health
+buffer during a critical firmware issue.
+
+In case a firmware command times out, firmware getting stuck, or a non zero
+value on the catastrophic buffer, a snapshot will be taken by the driver.
+
+The ``cr-space`` region will contain the firmware PCI crspace contents. The
+``fw-health`` region will contain the device firmware's health buffer.
+Snapshots for both of these regions are taken on the same event triggers.
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
new file mode 100644
index 000000000000..629a6e69c036
--- /dev/null
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -0,0 +1,59 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+mlx5 devlink support
+====================
+
+This document describes the devlink features implemented by the ``mlx5``
+device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+   * - Name
+     - Mode
+   * - ``enable_roce``
+     - driverinit
+
+The ``mlx5`` driver also implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+   :widths: 5 5 5 85
+
+   * - Name
+     - Type
+     - Mode
+     - Description
+   * - ``flow_steering_mode``
+     - string
+     - runtime
+     - Controls the flow steering mode of the driver
+
+       * ``dmfs`` Device managed flow steering. In DMFS mode, the HW
+         steering entities are created and managed through firmware.
+       * ``smfs`` Software managed flow steering. In SMFS mode, the HW
+         steering entities are created and manage through the driver without
+         firmware intervention.
+
+The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
+
+Info versions
+=============
+
+The ``mlx5`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``fw.psid``
+     - fixed
+     - Used to represent the board id of the device.
+   * - ``fw.version``
+     - stored, running
+     - Three digit major.minor.subminor firmware version number.
diff --git a/Documentation/networking/devlink/mlxsw.rst b/Documentation/networking/devlink/mlxsw.rst
new file mode 100644
index 000000000000..cf857cb4ba8f
--- /dev/null
+++ b/Documentation/networking/devlink/mlxsw.rst
@@ -0,0 +1,81 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+mlxsw devlink support
+=====================
+
+This document describes the devlink features implemented by the ``mlxsw``
+device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+   * - Name
+     - Mode
+   * - ``fw_load_policy``
+     - driverinit
+
+The ``mlxsw`` driver also implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+   :widths: 5 5 5 85
+
+   * - Name
+     - Type
+     - Mode
+     - Description
+   * - ``acl_region_rehash_interval``
+     - u32
+     - runtime
+     - Sets an interval for periodic ACL region rehashes. The value is
+       specified in milliseconds, with a minimum of ``3000``. The value of
+       ``0`` disables periodic work entirely. The first rehash will be run
+       immediately after the value is set.
+
+The ``mlxsw`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
+
+Info versions
+=============
+
+The ``mlxsw`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``hw.revision``
+     - fixed
+     - The hardware revision for this board
+   * - ``fw.psid``
+     - fixed
+     - Firmware PSID
+   * - ``fw.version``
+     - running
+     - Three digit firmware version
+
+Driver-specific Traps
+=====================
+
+.. list-table:: List of Driver-specific Traps Registered by ``mlxsw``
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``irif_disabled``
+     - ``drop``
+     - Traps packets that the device decided to drop because they need to be
+       routed from a disabled router interface (RIF). This can happen during
+       RIF dismantle, when the RIF is first disabled before being removed
+       completely
+   * - ``erif_disabled``
+     - ``drop``
+     - Traps packets that the device decided to drop because they need to be
+       routed through a disabled router interface (RIF). This can happen during
+       RIF dismantle, when the RIF is first disabled before being removed
+       completely
diff --git a/Documentation/networking/devlink/mv88e6xxx.rst b/Documentation/networking/devlink/mv88e6xxx.rst
new file mode 100644
index 000000000000..c621212a47a1
--- /dev/null
+++ b/Documentation/networking/devlink/mv88e6xxx.rst
@@ -0,0 +1,28 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+mv88e6xxx devlink support
+=========================
+
+This document describes the devlink features implemented by the ``mv88e6xxx``
+device driver.
+
+Parameters
+==========
+
+The ``mv88e6xxx`` driver implements the following driver-specific parameters.
+
+.. list-table:: Driver-specific parameters implemented
+   :widths: 5 5 5 85
+
+   * - Name
+     - Type
+     - Mode
+     - Description
+   * - ``ATU_hash``
+     - u8
+     - runtime
+     - Select one of four possible hashing algorithms for MAC addresses in
+       the Address Translation Unit. A value of 3 may work better than the
+       default of 1 when many MAC addresses have the same OUI. Only the
+       values 0 to 3 are valid for this parameter.
diff --git a/Documentation/networking/devlink/netdevsim.rst b/Documentation/networking/devlink/netdevsim.rst
new file mode 100644
index 000000000000..2a266b7e7b38
--- /dev/null
+++ b/Documentation/networking/devlink/netdevsim.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+netdevsim devlink support
+=========================
+
+This document describes the ``devlink`` features supported by the
+``netdevsim`` device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+   * - Name
+     - Mode
+   * - ``max_macs``
+     - driverinit
+
+The ``netdevsim`` driver also implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+   :widths: 5 5 5 85
+
+   * - Name
+     - Type
+     - Mode
+     - Description
+   * - ``test1``
+     - Boolean
+     - driverinit
+     - Test parameter used to show how a driver-specific devlink parameter
+       can be implemented.
+
+The ``netdevsim`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
+
+Regions
+=======
+
+The ``netdevsim`` driver exposes a ``dummy`` region as an example of how the
+devlink-region interfaces work. A snapshot is taken whenever the
+``take_snapshot`` debugfs file is written to.
+
+Resources
+=========
+
+The ``netdevsim`` driver exposes resources to control the number of FIB
+entries and FIB rule entries that the driver will allow.
+
+.. code:: shell
+
+    $ devlink resource set netdevsim/netdevsim0 path /IPv4/fib size 96
+    $ devlink resource set netdevsim/netdevsim0 path /IPv4/fib-rules size 16
+    $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib size 64
+    $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib-rules size 16
+    $ devlink dev reload netdevsim/netdevsim0
+
+Driver-specific Traps
+=====================
+
+.. list-table:: List of Driver-specific Traps Registered by ``netdevsim``
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``fid_miss``
+     - ``exception``
+     - When a packet enters the device it is classified to a filtering
+       indentifier (FID) based on the ingress port and VLAN. This trap is used
+       to trap packets for which a FID could not be found
diff --git a/Documentation/networking/devlink/nfp.rst b/Documentation/networking/devlink/nfp.rst
new file mode 100644
index 000000000000..a1717db0dfcc
--- /dev/null
+++ b/Documentation/networking/devlink/nfp.rst
@@ -0,0 +1,65 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+nfp devlink support
+===================
+
+This document describes the devlink features implemented by the ``nfp``
+device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+   * - Name
+     - Mode
+   * - ``fw_load_policy``
+     - permanent
+   * - ``reset_dev_on_drv_probe``
+     - permanent
+
+Info versions
+=============
+
+The ``nfp`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``board.id``
+     - fixed
+     - Part number identifying the board design
+   * - ``board.rev``
+     - fixed
+     - Revision of the board design
+   * - ``board.manufacture``
+     - fixed
+     - Vendor of the board design
+   * - ``board.model``
+     - fixed
+     - Model name of the board design
+   * - ``fw.bundle_id``
+     - stored, running
+     - Firmware bundle id
+   * - ``fw.mgmt``
+     - stored, running
+     - Version of the management firmware
+   * - ``fw.cpld``
+     - stored, running
+     - The CPLD firmware component version
+   * - ``fw.app``
+     - stored, running
+     - The APP firmware component version
+   * - ``fw.undi``
+     - stored, running
+     - The UNDI firmware component version
+   * - ``fw.ncsi``
+     - stored, running
+     - The NSCI firmware component version
+   * - ``chip.init``
+     - stored, running
+     - The CFGR firmware component version
diff --git a/Documentation/networking/devlink/qed.rst b/Documentation/networking/devlink/qed.rst
new file mode 100644
index 000000000000..805c6f63621a
--- /dev/null
+++ b/Documentation/networking/devlink/qed.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+qed devlink support
+===================
+
+This document describes the devlink features implemented by the ``qed`` core
+device driver.
+
+Parameters
+==========
+
+The ``qed`` driver implements the following driver-specific parameters.
+
+.. list-table:: Driver-specific parameters implemented
+   :widths: 5 5 5 85
+
+   * - Name
+     - Type
+     - Mode
+     - Description
+   * - ``iwarp_cmt``
+     - Boolean
+     - runtime
+     - Enable iWARP functionality for 100g devices. Note that this impacts
+       L2 performance, and is therefore not enabled by default.
diff --git a/Documentation/networking/devlink/ti-cpsw-switch.rst b/Documentation/networking/devlink/ti-cpsw-switch.rst
new file mode 100644
index 000000000000..dc399e32abaa
--- /dev/null
+++ b/Documentation/networking/devlink/ti-cpsw-switch.rst
@@ -0,0 +1,31 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+ti-cpsw-switch devlink support
+==============================
+
+This document describes the devlink features implemented by the ``ti-cpsw-switch``
+device driver.
+
+Parameters
+==========
+
+The ``ti-cpsw-switch`` driver implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+   :widths: 5 5 5 85
+
+   * - Name
+     - Type
+     - Mode
+     - Description
+   * - ``ale_bypass``
+     - Boolean
+     - runtime
+     - Enables ALE_CONTROL(4).BYPASS mode for debugging purposes. In this
+       mode, all packets will be sent to the host port only.
+   * - ``switch_mode``
+     - Boolean
+     - runtime
+     - Enable switch mode
diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst
index eef20d0bcf7c..64553d8d91cb 100644
--- a/Documentation/networking/dsa/sja1105.rst
+++ b/Documentation/networking/dsa/sja1105.rst
@@ -230,12 +230,6 @@ simultaneously on two ports. The driver checks the consistency of the schedules
 against this restriction and errors out when appropriate. Schedule analysis is
 needed to avoid this, which is outside the scope of the document.
 
-At the moment, the time-aware scheduler can only be triggered based on a
-standalone clock and not based on PTP time. This means the base-time argument
-from tc-taprio is ignored and the schedule starts right away. It also means it
-is more difficult to phase-align the scheduler with the other devices in the
-network.
-
 Device Tree bindings and board design
 =====================================
 
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
new file mode 100644
index 000000000000..f1f868479ceb
--- /dev/null
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -0,0 +1,618 @@
+=============================
+Netlink interface for ethtool
+=============================
+
+
+Basic information
+=================
+
+Netlink interface for ethtool uses generic netlink family ``ethtool``
+(userspace application should use macros ``ETHTOOL_GENL_NAME`` and
+``ETHTOOL_GENL_VERSION`` defined in ``<linux/ethtool_netlink.h>`` uapi
+header). This family does not use a specific header, all information in
+requests and replies is passed using netlink attributes.
+
+The ethtool netlink interface uses extended ACK for error and warning
+reporting, userspace application developers are encouraged to make these
+messages available to user in a suitable way.
+
+Requests can be divided into three categories: "get" (retrieving information),
+"set" (setting parameters) and "action" (invoking an action).
+
+All "set" and "action" type requests require admin privileges
+(``CAP_NET_ADMIN`` in the namespace). Most "get" type requests are allowed for
+anyone but there are exceptions (where the response contains sensitive
+information). In some cases, the request as such is allowed for anyone but
+unprivileged users have attributes with sensitive information (e.g.
+wake-on-lan password) omitted.
+
+
+Conventions
+===========
+
+Attributes which represent a boolean value usually use NLA_U8 type so that we
+can distinguish three states: "on", "off" and "not present" (meaning the
+information is not available in "get" requests or value is not to be changed
+in "set" requests). For these attributes, the "true" value should be passed as
+number 1 but any non-zero value should be understood as "true" by recipient.
+In the tables below, "bool" denotes NLA_U8 attributes interpreted in this way.
+
+In the message structure descriptions below, if an attribute name is suffixed
+with "+", parent nest can contain multiple attributes of the same type. This
+implements an array of entries.
+
+
+Request header
+==============
+
+Each request or reply message contains a nested attribute with common header.
+Structure of this header is
+
+  ==============================  ======  =============================
+  ``ETHTOOL_A_HEADER_DEV_INDEX``  u32     device ifindex
+  ``ETHTOOL_A_HEADER_DEV_NAME``   string  device name
+  ``ETHTOOL_A_HEADER_FLAGS``      u32     flags common for all requests
+  ==============================  ======  =============================
+
+``ETHTOOL_A_HEADER_DEV_INDEX`` and ``ETHTOOL_A_HEADER_DEV_NAME`` identify the
+device message relates to. One of them is sufficient in requests, if both are
+used, they must identify the same device. Some requests, e.g. global string
+sets, do not require device identification. Most ``GET`` requests also allow
+dump requests without device identification to query the same information for
+all devices providing it (each device in a separate message).
+
+``ETHTOOL_A_HEADER_FLAGS`` is a bitmap of request flags common for all request
+types. The interpretation of these flags is the same for all request types but
+the flags may not apply to requests. Recognized flags are:
+
+  =================================  ===================================
+  ``ETHTOOL_FLAG_COMPACT_BITSETS``   use compact format bitsets in reply
+  ``ETHTOOL_FLAG_OMIT_REPLY``        omit optional reply (_SET and _ACT)
+  =================================  ===================================
+
+New request flags should follow the general idea that if the flag is not set,
+the behaviour is backward compatible, i.e. requests from old clients not aware
+of the flag should be interpreted the way the client expects. A client must
+not set flags it does not understand.
+
+
+Bit sets
+========
+
+For short bitmaps of (reasonably) fixed length, standard ``NLA_BITFIELD32``
+type is used. For arbitrary length bitmaps, ethtool netlink uses a nested
+attribute with contents of one of two forms: compact (two binary bitmaps
+representing bit values and mask of affected bits) and bit-by-bit (list of
+bits identified by either index or name).
+
+Verbose (bit-by-bit) bitsets allow sending symbolic names for bits together
+with their values which saves a round trip (when the bitset is passed in a
+request) or at least a second request (when the bitset is in a reply). This is
+useful for one shot applications like traditional ethtool command. On the
+other hand, long running applications like ethtool monitor (displaying
+notifications) or network management daemons may prefer fetching the names
+only once and using compact form to save message size. Notifications from
+ethtool netlink interface always use compact form for bitsets.
+
+A bitset can represent either a value/mask pair (``ETHTOOL_A_BITSET_NOMASK``
+not set) or a single bitmap (``ETHTOOL_A_BITSET_NOMASK`` set). In requests
+modifying a bitmap, the former changes the bit set in mask to values set in
+value and preserves the rest; the latter sets the bits set in the bitmap and
+clears the rest.
+
+Compact form: nested (bitset) atrribute contents:
+
+  ============================  ======  ============================
+  ``ETHTOOL_A_BITSET_NOMASK``   flag    no mask, only a list
+  ``ETHTOOL_A_BITSET_SIZE``     u32     number of significant bits
+  ``ETHTOOL_A_BITSET_VALUE``    binary  bitmap of bit values
+  ``ETHTOOL_A_BITSET_MASK``     binary  bitmap of valid bits
+  ============================  ======  ============================
+
+Value and mask must have length at least ``ETHTOOL_A_BITSET_SIZE`` bits
+rounded up to a multiple of 32 bits. They consist of 32-bit words in host byte
+order, words ordered from least significant to most significant (i.e. the same
+way as bitmaps are passed with ioctl interface).
+
+For compact form, ``ETHTOOL_A_BITSET_SIZE`` and ``ETHTOOL_A_BITSET_VALUE`` are
+mandatory. ``ETHTOOL_A_BITSET_MASK`` attribute is mandatory if
+``ETHTOOL_A_BITSET_NOMASK`` is not set (bitset represents a value/mask pair);
+if ``ETHTOOL_A_BITSET_NOMASK`` is not set, ``ETHTOOL_A_BITSET_MASK`` is not
+allowed (bitset represents a single bitmap.
+
+Kernel bit set length may differ from userspace length if older application is
+used on newer kernel or vice versa. If userspace bitmap is longer, an error is
+issued only if the request actually tries to set values of some bits not
+recognized by kernel.
+
+Bit-by-bit form: nested (bitset) attribute contents:
+
+ +------------------------------------+--------+-----------------------------+
+ | ``ETHTOOL_A_BITSET_NOMASK``        | flag   | no mask, only a list        |
+ +------------------------------------+--------+-----------------------------+
+ | ``ETHTOOL_A_BITSET_SIZE``          | u32    | number of significant bits  |
+ +------------------------------------+--------+-----------------------------+
+ | ``ETHTOOL_A_BITSET_BITS``          | nested | array of bits               |
+ +-+----------------------------------+--------+-----------------------------+
+ | | ``ETHTOOL_A_BITSET_BITS_BIT+``   | nested | one bit                     |
+ +-+-+--------------------------------+--------+-----------------------------+
+ | | | ``ETHTOOL_A_BITSET_BIT_INDEX`` | u32    | bit index (0 for LSB)       |
+ +-+-+--------------------------------+--------+-----------------------------+
+ | | | ``ETHTOOL_A_BITSET_BIT_NAME``  | string | bit name                    |
+ +-+-+--------------------------------+--------+-----------------------------+
+ | | | ``ETHTOOL_A_BITSET_BIT_VALUE`` | flag   | present if bit is set       |
+ +-+-+--------------------------------+--------+-----------------------------+
+
+Bit size is optional for bit-by-bit form. ``ETHTOOL_A_BITSET_BITS`` nest can
+only contain ``ETHTOOL_A_BITSET_BITS_BIT`` attributes but there can be an
+arbitrary number of them.  A bit may be identified by its index or by its
+name. When used in requests, listed bits are set to 0 or 1 according to
+``ETHTOOL_A_BITSET_BIT_VALUE``, the rest is preserved. A request fails if
+index exceeds kernel bit length or if name is not recognized.
+
+When ``ETHTOOL_A_BITSET_NOMASK`` flag is present, bitset is interpreted as
+a simple bitmap. ``ETHTOOL_A_BITSET_BIT_VALUE`` attributes are not used in
+such case. Such bitset represents a bitmap with listed bits set and the rest
+zero.
+
+In requests, application can use either form. Form used by kernel in reply is
+determined by ``ETHTOOL_FLAG_COMPACT_BITSETS`` flag in flags field of request
+header. Semantics of value and mask depends on the attribute.
+
+
+List of message types
+=====================
+
+All constants identifying message types use ``ETHTOOL_CMD_`` prefix and suffix
+according to message purpose:
+
+  ==============    ======================================
+  ``_GET``          userspace request to retrieve data
+  ``_SET``          userspace request to set data
+  ``_ACT``          userspace request to perform an action
+  ``_GET_REPLY``    kernel reply to a ``GET`` request
+  ``_SET_REPLY``    kernel reply to a ``SET`` request
+  ``_ACT_REPLY``    kernel reply to an ``ACT`` request
+  ``_NTF``          kernel notification
+  ==============    ======================================
+
+Userspace to kernel:
+
+  ===================================== ================================
+  ``ETHTOOL_MSG_STRSET_GET``            get string set
+  ``ETHTOOL_MSG_LINKINFO_GET``          get link settings
+  ``ETHTOOL_MSG_LINKINFO_SET``          set link settings
+  ``ETHTOOL_MSG_LINKMODES_GET``         get link modes info
+  ``ETHTOOL_MSG_LINKMODES_SET``         set link modes info
+  ``ETHTOOL_MSG_LINKSTATE_GET``         get link state
+  ``ETHTOOL_MSG_DEBUG_GET``             get debugging settings
+  ``ETHTOOL_MSG_DEBUG_SET``             set debugging settings
+  ``ETHTOOL_MSG_WOL_GET``               get wake-on-lan settings
+  ``ETHTOOL_MSG_WOL_SET``               set wake-on-lan settings
+  ===================================== ================================
+
+Kernel to userspace:
+
+  ===================================== =================================
+  ``ETHTOOL_MSG_STRSET_GET_REPLY``      string set contents
+  ``ETHTOOL_MSG_LINKINFO_GET_REPLY``    link settings
+  ``ETHTOOL_MSG_LINKINFO_NTF``          link settings notification
+  ``ETHTOOL_MSG_LINKMODES_GET_REPLY``   link modes info
+  ``ETHTOOL_MSG_LINKMODES_NTF``         link modes notification
+  ``ETHTOOL_MSG_LINKSTATE_GET_REPLY``   link state info
+  ``ETHTOOL_MSG_DEBUG_GET_REPLY``       debugging settings
+  ``ETHTOOL_MSG_DEBUG_NTF``             debugging settings notification
+  ``ETHTOOL_MSG_WOL_GET_REPLY``         wake-on-lan settings
+  ``ETHTOOL_MSG_WOL_NTF``               wake-on-lan settings notification
+  ===================================== =================================
+
+``GET`` requests are sent by userspace applications to retrieve device
+information. They usually do not contain any message specific attributes.
+Kernel replies with corresponding "GET_REPLY" message. For most types, ``GET``
+request with ``NLM_F_DUMP`` and no device identification can be used to query
+the information for all devices supporting the request.
+
+If the data can be also modified, corresponding ``SET`` message with the same
+layout as corresponding ``GET_REPLY`` is used to request changes. Only
+attributes where a change is requested are included in such request (also, not
+all attributes may be changed). Replies to most ``SET`` request consist only
+of error code and extack; if kernel provides additional data, it is sent in
+the form of corresponding ``SET_REPLY`` message which can be suppressed by
+setting ``ETHTOOL_FLAG_OMIT_REPLY`` flag in request header.
+
+Data modification also triggers sending a ``NTF`` message with a notification.
+These usually bear only a subset of attributes which was affected by the
+change. The same notification is issued if the data is modified using other
+means (mostly ioctl ethtool interface). Unlike notifications from ethtool
+netlink code which are only sent if something actually changed, notifications
+triggered by ioctl interface may be sent even if the request did not actually
+change any data.
+
+``ACT`` messages request kernel (driver) to perform a specific action. If some
+information is reported by kernel (which can be suppressed by setting
+``ETHTOOL_FLAG_OMIT_REPLY`` flag in request header), the reply takes form of
+an ``ACT_REPLY`` message. Performing an action also triggers a notification
+(``NTF`` message).
+
+Later sections describe the format and semantics of these messages.
+
+
+STRSET_GET
+==========
+
+Requests contents of a string set as provided by ioctl commands
+``ETHTOOL_GSSET_INFO`` and ``ETHTOOL_GSTRINGS.`` String sets are not user
+writeable so that the corresponding ``STRSET_SET`` message is only used in
+kernel replies. There are two types of string sets: global (independent of
+a device, e.g. device feature names) and device specific (e.g. device private
+flags).
+
+Request contents:
+
+ +---------------------------------------+--------+------------------------+
+ | ``ETHTOOL_A_STRSET_HEADER``           | nested | request header         |
+ +---------------------------------------+--------+------------------------+
+ | ``ETHTOOL_A_STRSET_STRINGSETS``       | nested | string set to request  |
+ +-+-------------------------------------+--------+------------------------+
+ | | ``ETHTOOL_A_STRINGSETS_STRINGSET+`` | nested | one string set         |
+ +-+-+-----------------------------------+--------+------------------------+
+ | | | ``ETHTOOL_A_STRINGSET_ID``        | u32    | set id                 |
+ +-+-+-----------------------------------+--------+------------------------+
+
+Kernel response contents:
+
+ +---------------------------------------+--------+-----------------------+
+ | ``ETHTOOL_A_STRSET_HEADER``           | nested | reply header          |
+ +---------------------------------------+--------+-----------------------+
+ | ``ETHTOOL_A_STRSET_STRINGSETS``       | nested | array of string sets  |
+ +-+-------------------------------------+--------+-----------------------+
+ | | ``ETHTOOL_A_STRINGSETS_STRINGSET+`` | nested | one string set        |
+ +-+-+-----------------------------------+--------+-----------------------+
+ | | | ``ETHTOOL_A_STRINGSET_ID``        | u32    | set id                |
+ +-+-+-----------------------------------+--------+-----------------------+
+ | | | ``ETHTOOL_A_STRINGSET_COUNT``     | u32    | number of strings     |
+ +-+-+-----------------------------------+--------+-----------------------+
+ | | | ``ETHTOOL_A_STRINGSET_STRINGS``   | nested | array of strings      |
+ +-+-+-+---------------------------------+--------+-----------------------+
+ | | | | ``ETHTOOL_A_STRINGS_STRING+``   | nested | one string            |
+ +-+-+-+-+-------------------------------+--------+-----------------------+
+ | | | | | ``ETHTOOL_A_STRING_INDEX``    | u32    | string index          |
+ +-+-+-+-+-------------------------------+--------+-----------------------+
+ | | | | | ``ETHTOOL_A_STRING_VALUE``    | string | string value          |
+ +-+-+-+-+-------------------------------+--------+-----------------------+
+ | ``ETHTOOL_A_STRSET_COUNTS_ONLY``      | flag   | return only counts    |
+ +---------------------------------------+--------+-----------------------+
+
+Device identification in request header is optional. Depending on its presence
+a and ``NLM_F_DUMP`` flag, there are three type of ``STRSET_GET`` requests:
+
+ - no ``NLM_F_DUMP,`` no device: get "global" stringsets
+ - no ``NLM_F_DUMP``, with device: get string sets related to the device
+ - ``NLM_F_DUMP``, no device: get device related string sets for all devices
+
+If there is no ``ETHTOOL_A_STRSET_STRINGSETS`` array, all string sets of
+requested type are returned, otherwise only those specified in the request.
+Flag ``ETHTOOL_A_STRSET_COUNTS_ONLY`` tells kernel to only return string
+counts of the sets, not the actual strings.
+
+
+LINKINFO_GET
+============
+
+Requests link settings as provided by ``ETHTOOL_GLINKSETTINGS`` except for
+link modes and autonegotiation related information. The request does not use
+any attributes.
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_LINKINFO_HEADER``         nested  request header
+  ====================================  ======  ==========================
+
+Kernel response contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_LINKINFO_HEADER``         nested  reply header
+  ``ETHTOOL_A_LINKINFO_PORT``           u8      physical port
+  ``ETHTOOL_A_LINKINFO_PHYADDR``        u8      phy MDIO address
+  ``ETHTOOL_A_LINKINFO_TP_MDIX``        u8      MDI(-X) status
+  ``ETHTOOL_A_LINKINFO_TP_MDIX_CTRL``   u8      MDI(-X) control
+  ``ETHTOOL_A_LINKINFO_TRANSCEIVER``    u8      transceiver
+  ====================================  ======  ==========================
+
+Attributes and their values have the same meaning as matching members of the
+corresponding ioctl structures.
+
+``LINKINFO_GET`` allows dump requests (kernel returns reply message for all
+devices supporting the request).
+
+
+LINKINFO_SET
+============
+
+``LINKINFO_SET`` request allows setting some of the attributes reported by
+``LINKINFO_GET``.
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_LINKINFO_HEADER``         nested  request header
+  ``ETHTOOL_A_LINKINFO_PORT``           u8      physical port
+  ``ETHTOOL_A_LINKINFO_PHYADDR``        u8      phy MDIO address
+  ``ETHTOOL_A_LINKINFO_TP_MDIX_CTRL``   u8      MDI(-X) control
+  ====================================  ======  ==========================
+
+MDI(-X) status and transceiver cannot be set, request with the corresponding
+attributes is rejected.
+
+
+LINKMODES_GET
+=============
+
+Requests link modes (supported, advertised and peer advertised) and related
+information (autonegotiation status, link speed and duplex) as provided by
+``ETHTOOL_GLINKSETTINGS``. The request does not use any attributes.
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_LINKMODES_HEADER``        nested  request header
+  ====================================  ======  ==========================
+
+Kernel response contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_LINKMODES_HEADER``        nested  reply header
+  ``ETHTOOL_A_LINKMODES_AUTONEG``       u8      autonegotiation status
+  ``ETHTOOL_A_LINKMODES_OURS``          bitset  advertised link modes
+  ``ETHTOOL_A_LINKMODES_PEER``          bitset  partner link modes
+  ``ETHTOOL_A_LINKMODES_SPEED``         u32     link speed (Mb/s)
+  ``ETHTOOL_A_LINKMODES_DUPLEX``        u8      duplex mode
+  ====================================  ======  ==========================
+
+For ``ETHTOOL_A_LINKMODES_OURS``, value represents advertised modes and mask
+represents supported modes. ``ETHTOOL_A_LINKMODES_PEER`` in the reply is a bit
+list.
+
+``LINKMODES_GET`` allows dump requests (kernel returns reply messages for all
+devices supporting the request).
+
+
+LINKMODES_SET
+=============
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_LINKMODES_HEADER``        nested  request header
+  ``ETHTOOL_A_LINKMODES_AUTONEG``       u8      autonegotiation status
+  ``ETHTOOL_A_LINKMODES_OURS``          bitset  advertised link modes
+  ``ETHTOOL_A_LINKMODES_PEER``          bitset  partner link modes
+  ``ETHTOOL_A_LINKMODES_SPEED``         u32     link speed (Mb/s)
+  ``ETHTOOL_A_LINKMODES_DUPLEX``        u8      duplex mode
+  ====================================  ======  ==========================
+
+``ETHTOOL_A_LINKMODES_OURS`` bit set allows setting advertised link modes. If
+autonegotiation is on (either set now or kept from before), advertised modes
+are not changed (no ``ETHTOOL_A_LINKMODES_OURS`` attribute) and at least one
+of speed and duplex is specified, kernel adjusts advertised modes to all
+supported modes matching speed, duplex or both (whatever is specified). This
+autoselection is done on ethtool side with ioctl interface, netlink interface
+is supposed to allow requesting changes without knowing what exactly kernel
+supports.
+
+
+LINKSTATE_GET
+=============
+
+Requests link state information. At the moment, only link up/down flag (as
+provided by ``ETHTOOL_GLINK`` ioctl command) is provided but some future
+extensions are planned (e.g. link down reason). This request does not have any
+attributes.
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_LINKSTATE_HEADER``        nested  request header
+  ====================================  ======  ==========================
+
+Kernel response contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_LINKSTATE_HEADER``        nested  reply header
+  ``ETHTOOL_A_LINKSTATE_LINK``          bool    link state (up/down)
+  ====================================  ======  ==========================
+
+For most NIC drivers, the value of ``ETHTOOL_A_LINKSTATE_LINK`` returns
+carrier flag provided by ``netif_carrier_ok()`` but there are drivers which
+define their own handler.
+
+``LINKSTATE_GET`` allows dump requests (kernel returns reply messages for all
+devices supporting the request).
+
+
+DEBUG_GET
+=========
+
+Requests debugging settings of a device. At the moment, only message mask is
+provided.
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_DEBUG_HEADER``            nested  request header
+  ====================================  ======  ==========================
+
+Kernel response contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_DEBUG_HEADER``            nested  reply header
+  ``ETHTOOL_A_DEBUG_MSGMASK``           bitset  message mask
+  ====================================  ======  ==========================
+
+The message mask (``ETHTOOL_A_DEBUG_MSGMASK``) is equal to message level as
+provided by ``ETHTOOL_GMSGLVL`` and set by ``ETHTOOL_SMSGLVL`` in ioctl
+interface. While it is called message level there for historical reasons, most
+drivers and almost all newer drivers use it as a mask of enabled message
+classes (represented by ``NETIF_MSG_*`` constants); therefore netlink
+interface follows its actual use in practice.
+
+``DEBUG_GET`` allows dump requests (kernel returns reply messages for all
+devices supporting the request).
+
+
+DEBUG_SET
+=========
+
+Set or update debugging settings of a device. At the moment, only message mask
+is supported.
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_DEBUG_HEADER``            nested  request header
+  ``ETHTOOL_A_DEBUG_MSGMASK``           bitset  message mask
+  ====================================  ======  ==========================
+
+``ETHTOOL_A_DEBUG_MSGMASK`` bit set allows setting or modifying mask of
+enabled debugging message types for the device.
+
+
+WOL_GET
+=======
+
+Query device wake-on-lan settings. Unlike most "GET" type requests,
+``ETHTOOL_MSG_WOL_GET`` requires (netns) ``CAP_NET_ADMIN`` privileges as it
+(potentially) provides SecureOn(tm) password which is confidential.
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_WOL_HEADER``              nested  request header
+  ====================================  ======  ==========================
+
+Kernel response contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_WOL_HEADER``              nested  reply header
+  ``ETHTOOL_A_WOL_MODES``               bitset  mask of enabled WoL modes
+  ``ETHTOOL_A_WOL_SOPASS``              binary  SecureOn(tm) password
+  ====================================  ======  ==========================
+
+In reply, ``ETHTOOL_A_WOL_MODES`` mask consists of modes supported by the
+device, value of modes which are enabled. ``ETHTOOL_A_WOL_SOPASS`` is only
+included in reply if ``WAKE_MAGICSECURE`` mode is supported.
+
+
+WOL_SET
+=======
+
+Set or update wake-on-lan settings.
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_WOL_HEADER``              nested  request header
+  ``ETHTOOL_A_WOL_MODES``               bitset  enabled WoL modes
+  ``ETHTOOL_A_WOL_SOPASS``              binary  SecureOn(tm) password
+  ====================================  ======  ==========================
+
+``ETHTOOL_A_WOL_SOPASS`` is only allowed for devices supporting
+``WAKE_MAGICSECURE`` mode.
+
+
+Request translation
+===================
+
+The following table maps ioctl commands to netlink commands providing their
+functionality. Entries with "n/a" in right column are commands which do not
+have their netlink replacement yet.
+
+  =================================== =====================================
+  ioctl command                       netlink command
+  =================================== =====================================
+  ``ETHTOOL_GSET``                    ``ETHTOOL_MSG_LINKINFO_GET``
+                                      ``ETHTOOL_MSG_LINKMODES_GET``
+  ``ETHTOOL_SSET``                    ``ETHTOOL_MSG_LINKINFO_SET``
+                                      ``ETHTOOL_MSG_LINKMODES_SET``
+  ``ETHTOOL_GDRVINFO``                n/a
+  ``ETHTOOL_GREGS``                   n/a
+  ``ETHTOOL_GWOL``                    ``ETHTOOL_MSG_WOL_GET``
+  ``ETHTOOL_SWOL``                    ``ETHTOOL_MSG_WOL_SET``
+  ``ETHTOOL_GMSGLVL``                 ``ETHTOOL_MSG_DEBUG_GET``
+  ``ETHTOOL_SMSGLVL``                 ``ETHTOOL_MSG_DEBUG_SET``
+  ``ETHTOOL_NWAY_RST``                n/a
+  ``ETHTOOL_GLINK``                   ``ETHTOOL_MSG_LINKSTATE_GET``
+  ``ETHTOOL_GEEPROM``                 n/a
+  ``ETHTOOL_SEEPROM``                 n/a
+  ``ETHTOOL_GCOALESCE``               n/a
+  ``ETHTOOL_SCOALESCE``               n/a
+  ``ETHTOOL_GRINGPARAM``              n/a
+  ``ETHTOOL_SRINGPARAM``              n/a
+  ``ETHTOOL_GPAUSEPARAM``             n/a
+  ``ETHTOOL_SPAUSEPARAM``             n/a
+  ``ETHTOOL_GRXCSUM``                 n/a
+  ``ETHTOOL_SRXCSUM``                 n/a
+  ``ETHTOOL_GTXCSUM``                 n/a
+  ``ETHTOOL_STXCSUM``                 n/a
+  ``ETHTOOL_GSG``                     n/a
+  ``ETHTOOL_SSG``                     n/a
+  ``ETHTOOL_TEST``                    n/a
+  ``ETHTOOL_GSTRINGS``                ``ETHTOOL_MSG_STRSET_GET``
+  ``ETHTOOL_PHYS_ID``                 n/a
+  ``ETHTOOL_GSTATS``                  n/a
+  ``ETHTOOL_GTSO``                    n/a
+  ``ETHTOOL_STSO``                    n/a
+  ``ETHTOOL_GPERMADDR``               rtnetlink ``RTM_GETLINK``
+  ``ETHTOOL_GUFO``                    n/a
+  ``ETHTOOL_SUFO``                    n/a
+  ``ETHTOOL_GGSO``                    n/a
+  ``ETHTOOL_SGSO``                    n/a
+  ``ETHTOOL_GFLAGS``                  n/a
+  ``ETHTOOL_SFLAGS``                  n/a
+  ``ETHTOOL_GPFLAGS``                 n/a
+  ``ETHTOOL_SPFLAGS``                 n/a
+  ``ETHTOOL_GRXFH``                   n/a
+  ``ETHTOOL_SRXFH``                   n/a
+  ``ETHTOOL_GGRO``                    n/a
+  ``ETHTOOL_SGRO``                    n/a
+  ``ETHTOOL_GRXRINGS``                n/a
+  ``ETHTOOL_GRXCLSRLCNT``             n/a
+  ``ETHTOOL_GRXCLSRULE``              n/a
+  ``ETHTOOL_GRXCLSRLALL``             n/a
+  ``ETHTOOL_SRXCLSRLDEL``             n/a
+  ``ETHTOOL_SRXCLSRLINS``             n/a
+  ``ETHTOOL_FLASHDEV``                n/a
+  ``ETHTOOL_RESET``                   n/a
+  ``ETHTOOL_SRXNTUPLE``               n/a
+  ``ETHTOOL_GRXNTUPLE``               n/a
+  ``ETHTOOL_GSSET_INFO``              ``ETHTOOL_MSG_STRSET_GET``
+  ``ETHTOOL_GRXFHINDIR``              n/a
+  ``ETHTOOL_SRXFHINDIR``              n/a
+  ``ETHTOOL_GFEATURES``               n/a
+  ``ETHTOOL_SFEATURES``               n/a
+  ``ETHTOOL_GCHANNELS``               n/a
+  ``ETHTOOL_SCHANNELS``               n/a
+  ``ETHTOOL_SET_DUMP``                n/a
+  ``ETHTOOL_GET_DUMP_FLAG``           n/a
+  ``ETHTOOL_GET_DUMP_DATA``           n/a
+  ``ETHTOOL_GET_TS_INFO``             n/a
+  ``ETHTOOL_GMODULEINFO``             n/a
+  ``ETHTOOL_GMODULEEEPROM``           n/a
+  ``ETHTOOL_GEEE``                    n/a
+  ``ETHTOOL_SEEE``                    n/a
+  ``ETHTOOL_GRSSH``                   n/a
+  ``ETHTOOL_SRSSH``                   n/a
+  ``ETHTOOL_GTUNABLE``                n/a
+  ``ETHTOOL_STUNABLE``                n/a
+  ``ETHTOOL_GPHYSTATS``               n/a
+  ``ETHTOOL_PERQUEUE``                n/a
+  ``ETHTOOL_GLINKSETTINGS``           ``ETHTOOL_MSG_LINKINFO_GET``
+                                      ``ETHTOOL_MSG_LINKMODES_GET``
+  ``ETHTOOL_SLINKSETTINGS``           ``ETHTOOL_MSG_LINKINFO_SET``
+                                      ``ETHTOOL_MSG_LINKMODES_SET``
+  ``ETHTOOL_PHY_GTUNABLE``            n/a
+  ``ETHTOOL_PHY_STUNABLE``            n/a
+  ``ETHTOOL_GFECPARAM``               n/a
+  ``ETHTOOL_SFECPARAM``               n/a
+  =================================== =====================================
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 319e5e041f38..c4a328f2d57a 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -770,10 +770,10 @@ Some core changes of the new internal format:
     callq foo
     mov %rax,%r13
     mov %rbx,%rdi
-    mov $0x2,%esi
-    mov $0x3,%edx
-    mov $0x4,%ecx
-    mov $0x5,%r8d
+    mov $0x6,%esi
+    mov $0x7,%edx
+    mov $0x8,%ecx
+    mov $0x9,%r8d
     callq bar
     add %r13,%rax
     mov -0x228(%rbp),%rbx
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index d4dca42910d0..d07d9855dcd3 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -13,9 +13,8 @@ Contents:
    can_ucan_protocol
    device_drivers/index
    dsa/index
-   devlink-info-versions
-   devlink-trap
-   devlink-trap-netdevsim
+   devlink/index
+   ethtool-netlink
    ieee802154
    j1939
    kapi
@@ -33,6 +32,7 @@ Contents:
    scaling
    tls
    tls-offload
+   nfc
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 8d4ad1d1ae26..5f53faff4e25 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -479,6 +479,10 @@ tcp_no_metrics_save - BOOLEAN
 	degradation.  If set, TCP will not cache metrics on closing
 	connections.
 
+tcp_no_ssthresh_metrics_save - BOOLEAN
+	Controls whether TCP saves ssthresh metrics in the route cache.
+	Default is 1, which disables ssthresh metrics.
+
 tcp_orphan_retries - INTEGER
 	This value influences the timeout of a locally closed TCP connection,
 	when RTO retransmissions remain unacknowledged.
@@ -603,7 +607,7 @@ tcp_synack_retries - INTEGER
 	with the current initial RTO of 1second. With this the final timeout
 	for a passive TCP connection will happen after 63seconds.
 
-tcp_syncookies - BOOLEAN
+tcp_syncookies - INTEGER
 	Only valid when the kernel was compiled with CONFIG_SYN_COOKIES
 	Send out syncookies when the syn backlog queue of a socket
 	overflows. This is to prevent against the common 'SYN flood attack'
@@ -904,8 +908,9 @@ ip_local_port_range - 2 INTEGERS
 	Defines the local port range that is used by TCP and UDP to
 	choose the local port. The first number is the first, the
 	second the last local port number.
-	If possible, it is better these numbers have different parity.
-	(one even and one odd values)
+	If possible, it is better these numbers have different parity
+	(one even and one odd value).
+	Must be greater than or equal to ip_unprivileged_port_start.
 	The default values are 32768 and 60999 respectively.
 
 ip_local_reserved_ports - list of comma separated ranges
@@ -943,8 +948,8 @@ ip_unprivileged_port_start - INTEGER
 	This is a per-namespace sysctl.  It defines the first
 	unprivileged port in the network namespace.  Privileged ports
 	require root or CAP_NET_BIND_SERVICE in order to bind to them.
-	To disable all privileged ports, set this to 0.  It may not
-	overlap with the ip_local_reserved_ports range.
+	To disable all privileged ports, set this to 0.  They must not
+	overlap with the ip_local_port_range.
 
 	Default: 1024
 
@@ -2091,6 +2096,28 @@ pf_enable - INTEGER
 
 	Default: 1
 
+pf_expose - INTEGER
+	Unset or enable/disable pf (pf is short for potentially failed) state
+	exposure.  Applications can control the exposure of the PF path state
+	in the SCTP_PEER_ADDR_CHANGE event and the SCTP_GET_PEER_ADDR_INFO
+	sockopt.   When it's unset, no SCTP_PEER_ADDR_CHANGE event with
+	SCTP_ADDR_PF state will be sent and a SCTP_PF-state transport info
+	can be got via SCTP_GET_PEER_ADDR_INFO sockopt;  When it's enabled,
+	a SCTP_PEER_ADDR_CHANGE event will be sent for a transport becoming
+	SCTP_PF state and a SCTP_PF-state transport info can be got via
+	SCTP_GET_PEER_ADDR_INFO sockopt;  When it's diabled, no
+	SCTP_PEER_ADDR_CHANGE event will be sent and it returns -EACCES when
+	trying to get a SCTP_PF-state transport info via SCTP_GET_PEER_ADDR_INFO
+	sockopt.
+
+	0: Unset pf state exposure, Compatible with old applications.
+
+	1: Disable pf state exposure.
+
+	2: Enable pf state exposure.
+
+	Default: 0
+
 addip_noauth_enable - BOOLEAN
 	Dynamic Address Reconfiguration (ADD-IP) requires the use of
 	authentication to protect the operations of adding or removing new
@@ -2173,6 +2200,18 @@ pf_retrans - INTEGER
 
 	Default: 0
 
+ps_retrans - INTEGER
+	Primary.Switchover.Max.Retrans (PSMR), it's a tunable parameter coming
+	from section-5 "Primary Path Switchover" in rfc7829.  The primary path
+	will be changed to another active path when the path error counter on
+	the old primary path exceeds PSMR, so that "the SCTP sender is allowed
+	to continue data transmission on a new working path even when the old
+	primary destination address becomes active again".   Note this feature
+	is disabled by initializing 'ps_retrans' per netns as 0xffff by default,
+	and its value can't be less than 'pf_retrans' when changing by sysctl.
+
+	Default: 0xffff
+
 rto_initial - INTEGER
 	The initial round trip timeout value in milliseconds that will be used
 	in calculating round trip times.  This is the initial time interval
diff --git a/Documentation/networking/j1939.rst b/Documentation/networking/j1939.rst
index dc60b13fcd09..f5be243d250a 100644
--- a/Documentation/networking/j1939.rst
+++ b/Documentation/networking/j1939.rst
@@ -339,7 +339,7 @@ To claim an address following code example can be used:
 			.pgn = J1939_PGN_ADDRESS_CLAIMED,
 			.pgn_mask = J1939_PGN_PDU1_MAX,
 		}, {
-			.pgn = J1939_PGN_ADDRESS_REQUEST,
+			.pgn = J1939_PGN_REQUEST,
 			.pgn_mask = J1939_PGN_PDU1_MAX,
 		}, {
 			.pgn = J1939_PGN_ADDRESS_COMMANDED,
diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 642fa963be3c..d5c9320901c3 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -34,8 +34,8 @@ the names, the ``net`` tree is for fixes to existing code already in the
 mainline tree from Linus, and ``net-next`` is where the new code goes
 for the future release.  You can find the trees here:
 
-- https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
-- https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
+- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
+- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
 
 Q: How often do changes from these trees make it to the mainline Linus tree?
 ----------------------------------------------------------------------------
diff --git a/Documentation/networking/nfc.rst b/Documentation/networking/nfc.rst
new file mode 100644
index 000000000000..9aab3a88c9b2
--- /dev/null
+++ b/Documentation/networking/nfc.rst
@@ -0,0 +1,130 @@
+===================
+Linux NFC subsystem
+===================
+
+The Near Field Communication (NFC) subsystem is required to standardize the
+NFC device drivers development and to create an unified userspace interface.
+
+This document covers the architecture overview, the device driver interface
+description and the userspace interface description.
+
+Architecture overview
+=====================
+
+The NFC subsystem is responsible for:
+      - NFC adapters management;
+      - Polling for targets;
+      - Low-level data exchange;
+
+The subsystem is divided in some parts. The 'core' is responsible for
+providing the device driver interface. On the other side, it is also
+responsible for providing an interface to control operations and low-level
+data exchange.
+
+The control operations are available to userspace via generic netlink.
+
+The low-level data exchange interface is provided by the new socket family
+PF_NFC. The NFC_SOCKPROTO_RAW performs raw communication with NFC targets.
+
+.. code-block:: none
+
+        +--------------------------------------+
+        |              USER SPACE              |
+        +--------------------------------------+
+            ^                       ^
+            | low-level             | control
+            | data exchange         | operations
+            |                       |
+            |                       v
+            |                  +-----------+
+            | AF_NFC           |  netlink  |
+            | socket           +-----------+
+            | raw                   ^
+            |                       |
+            v                       v
+        +---------+            +-----------+
+        | rawsock | <--------> |   core    |
+        +---------+            +-----------+
+                                    ^
+                                    |
+                                    v
+                               +-----------+
+                               |  driver   |
+                               +-----------+
+
+Device Driver Interface
+=======================
+
+When registering on the NFC subsystem, the device driver must inform the core
+of the set of supported NFC protocols and the set of ops callbacks. The ops
+callbacks that must be implemented are the following:
+
+* start_poll - setup the device to poll for targets
+* stop_poll - stop on progress polling operation
+* activate_target - select and initialize one of the targets found
+* deactivate_target - deselect and deinitialize the selected target
+* data_exchange - send data and receive the response (transceive operation)
+
+Userspace interface
+===================
+
+The userspace interface is divided in control operations and low-level data
+exchange operation.
+
+CONTROL OPERATIONS:
+
+Generic netlink is used to implement the interface to the control operations.
+The operations are composed by commands and events, all listed below:
+
+* NFC_CMD_GET_DEVICE - get specific device info or dump the device list
+* NFC_CMD_START_POLL - setup a specific device to polling for targets
+* NFC_CMD_STOP_POLL - stop the polling operation in a specific device
+* NFC_CMD_GET_TARGET - dump the list of targets found by a specific device
+
+* NFC_EVENT_DEVICE_ADDED - reports an NFC device addition
+* NFC_EVENT_DEVICE_REMOVED - reports an NFC device removal
+* NFC_EVENT_TARGETS_FOUND - reports START_POLL results when 1 or more targets
+  are found
+
+The user must call START_POLL to poll for NFC targets, passing the desired NFC
+protocols through NFC_ATTR_PROTOCOLS attribute. The device remains in polling
+state until it finds any target. However, the user can stop the polling
+operation by calling STOP_POLL command. In this case, it will be checked if
+the requester of STOP_POLL is the same of START_POLL.
+
+If the polling operation finds one or more targets, the event TARGETS_FOUND is
+sent (including the device id). The user must call GET_TARGET to get the list of
+all targets found by such device. Each reply message has target attributes with
+relevant information such as the supported NFC protocols.
+
+All polling operations requested through one netlink socket are stopped when
+it's closed.
+
+LOW-LEVEL DATA EXCHANGE:
+
+The userspace must use PF_NFC sockets to perform any data communication with
+targets. All NFC sockets use AF_NFC::
+
+        struct sockaddr_nfc {
+               sa_family_t sa_family;
+               __u32 dev_idx;
+               __u32 target_idx;
+               __u32 nfc_protocol;
+        };
+
+To establish a connection with one target, the user must create an
+NFC_SOCKPROTO_RAW socket and call the 'connect' syscall with the sockaddr_nfc
+struct correctly filled. All information comes from NFC_EVENT_TARGETS_FOUND
+netlink event. As a target can support more than one NFC protocol, the user
+must inform which protocol it wants to use.
+
+Internally, 'connect' will result in an activate_target call to the driver.
+When the socket is closed, the target is deactivated.
+
+The data format exchanged through the sockets is NFC protocol dependent. For
+instance, when communicating with MIFARE tags, the data exchanged are MIFARE
+commands and their responses.
+
+The first received package is the response to the first sent package and so
+on. In order to allow valid "empty" responses, every data received has a NULL
+header of 1 byte.
diff --git a/Documentation/networking/nfc.txt b/Documentation/networking/nfc.txt
deleted file mode 100644
index b24c29bdae27..000000000000
--- a/Documentation/networking/nfc.txt
+++ /dev/null
@@ -1,128 +0,0 @@
-Linux NFC subsystem
-===================
-
-The Near Field Communication (NFC) subsystem is required to standardize the
-NFC device drivers development and to create an unified userspace interface.
-
-This document covers the architecture overview, the device driver interface
-description and the userspace interface description.
-
-Architecture overview
----------------------
-
-The NFC subsystem is responsible for:
-      - NFC adapters management;
-      - Polling for targets;
-      - Low-level data exchange;
-
-The subsystem is divided in some parts. The 'core' is responsible for
-providing the device driver interface. On the other side, it is also
-responsible for providing an interface to control operations and low-level
-data exchange.
-
-The control operations are available to userspace via generic netlink.
-
-The low-level data exchange interface is provided by the new socket family
-PF_NFC. The NFC_SOCKPROTO_RAW performs raw communication with NFC targets.
-
-
-             +--------------------------------------+
-             |              USER SPACE              |
-             +--------------------------------------+
-                 ^                       ^
-                 | low-level             | control
-                 | data exchange         | operations
-                 |                       |
-                 |                       v
-                 |                  +-----------+
-                 | AF_NFC           |  netlink  |
-                 | socket           +-----------+
-                 | raw                   ^
-                 |                       |
-                 v                       v
-             +---------+            +-----------+
-             | rawsock | <--------> |   core    |
-             +---------+            +-----------+
-                                         ^
-                                         |
-                                         v
-                                    +-----------+
-                                    |  driver   |
-                                    +-----------+
-
-Device Driver Interface
------------------------
-
-When registering on the NFC subsystem, the device driver must inform the core
-of the set of supported NFC protocols and the set of ops callbacks. The ops
-callbacks that must be implemented are the following:
-
-* start_poll - setup the device to poll for targets
-* stop_poll - stop on progress polling operation
-* activate_target - select and initialize one of the targets found
-* deactivate_target - deselect and deinitialize the selected target
-* data_exchange - send data and receive the response (transceive operation)
-
-Userspace interface
---------------------
-
-The userspace interface is divided in control operations and low-level data
-exchange operation.
-
-CONTROL OPERATIONS:
-
-Generic netlink is used to implement the interface to the control operations.
-The operations are composed by commands and events, all listed below:
-
-* NFC_CMD_GET_DEVICE - get specific device info or dump the device list
-* NFC_CMD_START_POLL - setup a specific device to polling for targets
-* NFC_CMD_STOP_POLL - stop the polling operation in a specific device
-* NFC_CMD_GET_TARGET - dump the list of targets found by a specific device
-
-* NFC_EVENT_DEVICE_ADDED - reports an NFC device addition
-* NFC_EVENT_DEVICE_REMOVED - reports an NFC device removal
-* NFC_EVENT_TARGETS_FOUND - reports START_POLL results when 1 or more targets
-are found
-
-The user must call START_POLL to poll for NFC targets, passing the desired NFC
-protocols through NFC_ATTR_PROTOCOLS attribute. The device remains in polling
-state until it finds any target. However, the user can stop the polling
-operation by calling STOP_POLL command. In this case, it will be checked if
-the requester of STOP_POLL is the same of START_POLL.
-
-If the polling operation finds one or more targets, the event TARGETS_FOUND is
-sent (including the device id). The user must call GET_TARGET to get the list of
-all targets found by such device. Each reply message has target attributes with
-relevant information such as the supported NFC protocols.
-
-All polling operations requested through one netlink socket are stopped when
-it's closed.
-
-LOW-LEVEL DATA EXCHANGE:
-
-The userspace must use PF_NFC sockets to perform any data communication with
-targets. All NFC sockets use AF_NFC:
-
-struct sockaddr_nfc {
-       sa_family_t sa_family;
-       __u32 dev_idx;
-       __u32 target_idx;
-       __u32 nfc_protocol;
-};
-
-To establish a connection with one target, the user must create an
-NFC_SOCKPROTO_RAW socket and call the 'connect' syscall with the sockaddr_nfc
-struct correctly filled. All information comes from NFC_EVENT_TARGETS_FOUND
-netlink event. As a target can support more than one NFC protocol, the user
-must inform which protocol it wants to use.
-
-Internally, 'connect' will result in an activate_target call to the driver.
-When the socket is closed, the target is deactivated.
-
-The data format exchanged through the sockets is NFC protocol dependent. For
-instance, when communicating with MIFARE tags, the data exchanged are MIFARE
-commands and their responses.
-
-The first received package is the response to the first sent package and so
-on. In order to allow valid "empty" responses, every data received has a NULL
-header of 1 byte.
diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index a689966bc4be..1e4735cc0553 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -73,7 +73,7 @@ The Reduced Gigabit Medium Independent Interface (RGMII) is a 12-pin
 electrical signal interface using a synchronous 125Mhz clock signal and several
 data lines. Due to this design decision, a 1.5ns to 2ns delay must be added
 between the clock line (RXC or TXC) and the data lines to let the PHY (clock
-sink) have enough setup and hold times to sample the data lines correctly. The
+sink) have a large enough setup and hold time to sample the data lines correctly. The
 PHY library offers different types of PHY_INTERFACE_MODE_RGMII* values to let
 the PHY driver and optionally the MAC driver, implement the required delay. The
 values of phy_interface_t must be understood from the perspective of the PHY
@@ -267,6 +267,24 @@ Some of the interface modes are described below:
     duplex, pause or other settings.  This is dependent on the MAC and/or
     PHY behaviour.
 
+``PHY_INTERFACE_MODE_10GBASER``
+    This is the IEEE 802.3 Clause 49 defined 10GBASE-R protocol used with
+    various different mediums. Please refer to the IEEE standard for a
+    definition of this.
+
+    Note: 10GBASE-R is just one protocol that can be used with XFI and SFI.
+    XFI and SFI permit multiple protocols over a single SERDES lane, and
+    also defines the electrical characteristics of the signals with a host
+    compliance board plugged into the host XFP/SFP connector. Therefore,
+    XFI and SFI are not PHY interface types in their own right.
+
+``PHY_INTERFACE_MODE_10GKR``
+    This is the IEEE 802.3 Clause 49 defined 10GBASE-R with Clause 73
+    autonegotiation. Please refer to the IEEE standard for further
+    information.
+
+    Note: due to legacy usage, some 10GBASE-R usage incorrectly makes
+    use of this definition.
 
 Pause frames / flow control
 ===========================
@@ -352,7 +370,8 @@ Fills the phydev structure with up-to-date information about the current
 settings in the PHY.
 ::
 
- int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+ int phy_ethtool_ksettings_set(struct phy_device *phydev,
+                               const struct ethtool_link_ksettings *cmd);
 
 Ethtool convenience functions.
 ::
diff --git a/Documentation/networking/ppp_generic.txt b/Documentation/networking/ppp_generic.txt
index 61daf4b39600..fd563aff5fc9 100644
--- a/Documentation/networking/ppp_generic.txt
+++ b/Documentation/networking/ppp_generic.txt
@@ -378,6 +378,8 @@ an interface unit are:
   CONFIG_PPP_FILTER option is enabled, the set of packets which reset
   the transmit and receive idle timers is restricted to those which
   pass the `active' packet filter.
+  Two versions of this command exist, to deal with user space
+  expecting times as either 32-bit or 64-bit time_t seconds.
 
 * PPPIOCSMAXCID sets the maximum connection-ID parameter (and thus the
   number of connection slots) for the TCP header compressor and
diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst
index a5e00a159d21..d753a309f9d1 100644
--- a/Documentation/networking/sfp-phylink.rst
+++ b/Documentation/networking/sfp-phylink.rst
@@ -251,7 +251,8 @@ this documentation.
 	phylink_mac_change(priv->phylink, link_is_up);
 
     where ``link_is_up`` is true if the link is currently up or false
-    otherwise.
+    otherwise. If a MAC is unable to provide these interrupts, then
+    it should set ``priv->phylink_config.pcs_poll = true;`` in step 9.
 
 11. Verify that the driver does not call::
 
diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst
index 5bcbf75e2025..8cb2cd4e2a80 100644
--- a/Documentation/networking/tls.rst
+++ b/Documentation/networking/tls.rst
@@ -213,3 +213,29 @@ A patchset to OpenSSL to use ktls as the record layer is
 of calling send directly after a handshake using gnutls.
 Since it doesn't implement a full record layer, control
 messages are not supported.
+
+Statistics
+==========
+
+TLS implementation exposes the following per-namespace statistics
+(``/proc/net/tls_stat``):
+
+- ``TlsCurrTxSw``, ``TlsCurrRxSw`` -
+  number of TX and RX sessions currently installed where host handles
+  cryptography
+
+- ``TlsCurrTxDevice``, ``TlsCurrRxDevice`` -
+  number of TX and RX sessions currently installed where NIC handles
+  cryptography
+
+- ``TlsTxSw``, ``TlsRxSw`` -
+  number of TX and RX sessions opened with host cryptography
+
+- ``TlsTxDevice``, ``TlsRxDevice`` -
+  number of TX and RX sessions opened with NIC cryptography
+
+- ``TlsDecryptError`` -
+  record decryption failed (e.g. due to incorrect authentication tag)
+
+- ``TlsDeviceRxResync`` -
+  number of RX resyncs sent to NICs handling cryptography
diff --git a/Documentation/nvdimm/maintainer-entry-profile.rst b/Documentation/nvdimm/maintainer-entry-profile.rst
new file mode 100644
index 000000000000..efe37adadcea
--- /dev/null
+++ b/Documentation/nvdimm/maintainer-entry-profile.rst
@@ -0,0 +1,60 @@
+LIBNVDIMM Maintainer Entry Profile
+==================================
+
+Overview
+--------
+The libnvdimm subsystem manages persistent memory across multiple
+architectures. The mailing list, is tracked by patchwork here:
+https://patchwork.kernel.org/project/linux-nvdimm/list/
+...and that instance is configured to give feedback to submitters on
+patch acceptance and upstream merge. Patches are merged to either the
+'libnvdimm-fixes', or 'libnvdimm-for-next' branch. Those branches are
+available here:
+https://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git/
+
+In general patches can be submitted against the latest -rc, however if
+the incoming code change is dependent on other pending changes then the
+patch should be based on the libnvdimm-for-next branch. However, since
+persistent memory sits at the intersection of storage and memory there
+are cases where patches are more suitable to be merged through a
+Filesystem or the Memory Management tree. When in doubt copy the nvdimm
+list and the maintainers will help route.
+
+Submissions will be exposed to the kbuild robot for compile regression
+testing. It helps to get a success notification from that infrastructure
+before submitting, but it is not required.
+
+
+Submit Checklist Addendum
+-------------------------
+There are unit tests for the subsystem via the ndctl utility:
+https://github.com/pmem/ndctl
+Those tests need to be passed before the patches go upstream, but not
+necessarily before initial posting. Contact the list if you need help
+getting the test environment set up.
+
+ACPI Device Specific Methods (_DSM)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Before patches enabling for a new _DSM family will be considered it must
+be assigned a format-interface-code from the NVDIMM Sub-team of the ACPI
+Specification Working Group. In general, the stance of the subsystem is
+to push back on the proliferation of NVDIMM command sets, do strongly
+consider implementing support for an existing command set. See
+drivers/acpi/nfit/nfit.h for the set of support command sets.
+
+
+Key Cycle Dates
+---------------
+New submissions can be sent at any time, but if they intend to hit the
+next merge window they should be sent before -rc4, and ideally
+stabilized in the libnvdimm-for-next branch by -rc6. Of course if a
+patch set requires more than 2 weeks of review -rc4 is already too late
+and some patches may require multiple development cycles to review.
+
+
+Review Cadence
+--------------
+In general, please wait up to one week before pinging for feedback. A
+private mail reminder is preferred. Alternatively ask for other
+developers that have Reviewed-by tags for libnvdimm changes to take a
+look and offer their opinion.
diff --git a/Documentation/padata.txt b/Documentation/padata.txt
deleted file mode 100644
index b37ba1eaace3..000000000000
--- a/Documentation/padata.txt
+++ /dev/null
@@ -1,163 +0,0 @@
-=======================================
-The padata parallel execution mechanism
-=======================================
-
-:Last updated: for 2.6.36
-
-Padata is a mechanism by which the kernel can farm work out to be done in
-parallel on multiple CPUs while retaining the ordering of tasks.  It was
-developed for use with the IPsec code, which needs to be able to perform
-encryption and decryption on large numbers of packets without reordering
-those packets.  The crypto developers made a point of writing padata in a
-sufficiently general fashion that it could be put to other uses as well.
-
-The first step in using padata is to set up a padata_instance structure for
-overall control of how tasks are to be run::
-
-    #include <linux/padata.h>
-
-    struct padata_instance *padata_alloc(const char *name,
-					 const struct cpumask *pcpumask,
-					 const struct cpumask *cbcpumask);
-
-'name' simply identifies the instance.
-
-The pcpumask describes which processors will be used to execute work
-submitted to this instance in parallel. The cbcpumask defines which
-processors are allowed to be used as the serialization callback processor.
-The workqueue wq is where the work will actually be done; it should be
-a multithreaded queue, naturally.
-
-To allocate a padata instance with the cpu_possible_mask for both
-cpumasks this helper function can be used::
-
-    struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq);
-
-Note: Padata maintains two kinds of cpumasks internally. The user supplied
-cpumasks, submitted by padata_alloc/padata_alloc_possible and the 'usable'
-cpumasks. The usable cpumasks are always a subset of active CPUs in the
-user supplied cpumasks; these are the cpumasks padata actually uses. So
-it is legal to supply a cpumask to padata that contains offline CPUs.
-Once an offline CPU in the user supplied cpumask comes online, padata
-is going to use it.
-
-There are functions for enabling and disabling the instance::
-
-    int padata_start(struct padata_instance *pinst);
-    void padata_stop(struct padata_instance *pinst);
-
-These functions are setting or clearing the "PADATA_INIT" flag;
-if that flag is not set, other functions will refuse to work.
-padata_start returns zero on success (flag set) or -EINVAL if the
-padata cpumask contains no active CPU (flag not set).
-padata_stop clears the flag and blocks until the padata instance
-is unused.
-
-The list of CPUs to be used can be adjusted with these functions::
-
-    int padata_set_cpumasks(struct padata_instance *pinst,
-			    cpumask_var_t pcpumask,
-			    cpumask_var_t cbcpumask);
-    int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
-			   cpumask_var_t cpumask);
-    int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask);
-    int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask);
-
-Changing the CPU masks are expensive operations, though, so it should not be
-done with great frequency.
-
-It's possible to change both cpumasks of a padata instance with
-padata_set_cpumasks by specifying the cpumasks for parallel execution (pcpumask)
-and for the serial callback function (cbcpumask). padata_set_cpumask is used to
-change just one of the cpumasks. Here cpumask_type is one of PADATA_CPU_SERIAL,
-PADATA_CPU_PARALLEL and cpumask specifies the new cpumask to use.
-To simply add or remove one CPU from a certain cpumask the functions
-padata_add_cpu/padata_remove_cpu are used. cpu specifies the CPU to add or
-remove and mask is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL.
-
-If a user is interested in padata cpumask changes, he can register to
-the padata cpumask change notifier::
-
-    int padata_register_cpumask_notifier(struct padata_instance *pinst,
-					 struct notifier_block *nblock);
-
-To unregister from that notifier::
-
-    int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
-					   struct notifier_block *nblock);
-
-The padata cpumask change notifier notifies about changes of the usable
-cpumasks, i.e. the subset of active CPUs in the user supplied cpumask.
-
-Padata calls the notifier chain with::
-
-    blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
-				 notification_mask,
-				 &pd_new->cpumask);
-
-Here cpumask_change_notifier is registered notifier, notification_mask
-is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL and cpumask is a pointer
-to a struct padata_cpumask that contains the new cpumask information.
-
-Actually submitting work to the padata instance requires the creation of a
-padata_priv structure::
-
-    struct padata_priv {
-        /* Other stuff here... */
-	void                    (*parallel)(struct padata_priv *padata);
-	void                    (*serial)(struct padata_priv *padata);
-    };
-
-This structure will almost certainly be embedded within some larger
-structure specific to the work to be done.  Most of its fields are private to
-padata, but the structure should be zeroed at initialisation time, and the
-parallel() and serial() functions should be provided.  Those functions will
-be called in the process of getting the work done as we will see
-momentarily.
-
-The submission of work is done with::
-
-    int padata_do_parallel(struct padata_instance *pinst,
-		           struct padata_priv *padata, int cb_cpu);
-
-The pinst and padata structures must be set up as described above; cb_cpu
-specifies which CPU will be used for the final callback when the work is
-done; it must be in the current instance's CPU mask.  The return value from
-padata_do_parallel() is zero on success, indicating that the work is in
-progress. -EBUSY means that somebody, somewhere else is messing with the
-instance's CPU mask, while -EINVAL is a complaint about cb_cpu not being
-in that CPU mask or about a not running instance.
-
-Each task submitted to padata_do_parallel() will, in turn, be passed to
-exactly one call to the above-mentioned parallel() function, on one CPU, so
-true parallelism is achieved by submitting multiple tasks.  parallel() runs with
-software interrupts disabled and thus cannot sleep.  The parallel()
-function gets the padata_priv structure pointer as its lone parameter;
-information about the actual work to be done is probably obtained by using
-container_of() to find the enclosing structure.
-
-Note that parallel() has no return value; the padata subsystem assumes that
-parallel() will take responsibility for the task from this point.  The work
-need not be completed during this call, but, if parallel() leaves work
-outstanding, it should be prepared to be called again with a new job before
-the previous one completes.  When a task does complete, parallel() (or
-whatever function actually finishes the job) should inform padata of the
-fact with a call to::
-
-    void padata_do_serial(struct padata_priv *padata);
-
-At some point in the future, padata_do_serial() will trigger a call to the
-serial() function in the padata_priv structure.  That call will happen on
-the CPU requested in the initial call to padata_do_parallel(); it, too, is
-run with local software interrupts disabled.
-Note that this call may be deferred for a while since the padata code takes
-pains to ensure that tasks are completed in the order in which they were
-submitted.
-
-The one remaining function in the padata API should be called to clean up
-when a padata instance is no longer needed::
-
-    void padata_free(struct padata_instance *pinst);
-
-This function will busy-wait while any remaining tasks are completed, so it
-might be best not to call it while there is work outstanding.
diff --git a/Documentation/power/drivers-testing.rst b/Documentation/power/drivers-testing.rst
index e53f1999fc39..d77d2894f9fe 100644
--- a/Documentation/power/drivers-testing.rst
+++ b/Documentation/power/drivers-testing.rst
@@ -39,9 +39,10 @@ c) Compile the driver directly into the kernel and try the test modes of
 d) Attempt to hibernate with the driver compiled directly into the kernel
    in the "reboot", "shutdown" and "platform" modes.
 
-e) Try the test modes of suspend (see: Documentation/power/basic-pm-debugging.rst,
-   2).  [As far as the STR tests are concerned, it should not matter whether or
-   not the driver is built as a module.]
+e) Try the test modes of suspend (see:
+   Documentation/power/basic-pm-debugging.rst, 2).  [As far as the STR tests are
+   concerned, it should not matter whether or not the driver is built as a
+   module.]
 
 f) Attempt to suspend to RAM using the s2ram tool with the driver loaded
    (see: Documentation/power/basic-pm-debugging.rst, 2).
diff --git a/Documentation/power/freezing-of-tasks.rst b/Documentation/power/freezing-of-tasks.rst
index ef110fe55e82..8bd693399834 100644
--- a/Documentation/power/freezing-of-tasks.rst
+++ b/Documentation/power/freezing-of-tasks.rst
@@ -215,30 +215,31 @@ VI. Are there any precautions to be taken to prevent freezing failures?
 
 Yes, there are.
 
-First of all, grabbing the 'system_transition_mutex' lock to mutually exclude a piece of code
-from system-wide sleep such as suspend/hibernation is not encouraged.
-If possible, that piece of code must instead hook onto the suspend/hibernation
-notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code
-(kernel/cpu.c) for an example.
-
-However, if that is not feasible, and grabbing 'system_transition_mutex' is deemed necessary,
-it is strongly discouraged to directly call mutex_[un]lock(&system_transition_mutex) since
-that could lead to freezing failures, because if the suspend/hibernate code
-successfully acquired the 'system_transition_mutex' lock, and hence that other entity failed
-to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE
-state. As a consequence, the freezer would not be able to freeze that task,
-leading to freezing failure.
+First of all, grabbing the 'system_transition_mutex' lock to mutually exclude a
+piece of code from system-wide sleep such as suspend/hibernation is not
+encouraged.  If possible, that piece of code must instead hook onto the
+suspend/hibernation notifiers to achieve mutual exclusion. Look at the
+CPU-Hotplug code (kernel/cpu.c) for an example.
+
+However, if that is not feasible, and grabbing 'system_transition_mutex' is
+deemed necessary, it is strongly discouraged to directly call
+mutex_[un]lock(&system_transition_mutex) since that could lead to freezing
+failures, because if the suspend/hibernate code successfully acquired the
+'system_transition_mutex' lock, and hence that other entity failed to acquire
+the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE state. As a
+consequence, the freezer would not be able to freeze that task, leading to
+freezing failure.
 
 However, the [un]lock_system_sleep() APIs are safe to use in this scenario,
 since they ask the freezer to skip freezing this task, since it is anyway
-"frozen enough" as it is blocked on 'system_transition_mutex', which will be released
-only after the entire suspend/hibernation sequence is complete.
-So, to summarize, use [un]lock_system_sleep() instead of directly using
+"frozen enough" as it is blocked on 'system_transition_mutex', which will be
+released only after the entire suspend/hibernation sequence is complete.  So, to
+summarize, use [un]lock_system_sleep() instead of directly using
 mutex_[un]lock(&system_transition_mutex). That would prevent freezing failures.
 
 V. Miscellaneous
 ================
 
 /sys/power/pm_freeze_timeout controls how long it will cost at most to freeze
-all user space processes or all freezable kernel threads, in unit of millisecond.
-The default value is 20000, with range of unsigned integer.
+all user space processes or all freezable kernel threads, in unit of
+millisecond.  The default value is 20000, with range of unsigned integer.
diff --git a/Documentation/power/opp.rst b/Documentation/power/opp.rst
index 209c7613f5a4..e3cc4f349ea8 100644
--- a/Documentation/power/opp.rst
+++ b/Documentation/power/opp.rst
@@ -73,19 +73,21 @@ factors. Example usage: Thermal management or other exceptional situations where
 SoC framework might choose to disable a higher frequency OPP to safely continue
 operations until that OPP could be re-enabled if possible.
 
-OPP library facilitates this concept in it's implementation. The following
+OPP library facilitates this concept in its implementation. The following
 operational functions operate only on available opps:
-opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq, dev_pm_opp_get_opp_count
+opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq,
+dev_pm_opp_get_opp_count
 
-dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer which can then
-be used for dev_pm_opp_enable/disable functions to make an opp available as required.
+dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer
+which can then be used for dev_pm_opp_enable/disable functions to make an
+opp available as required.
 
 WARNING: Users of OPP library should refresh their availability count using
-get_opp_count if dev_pm_opp_enable/disable functions are invoked for a device, the
-exact mechanism to trigger these or the notification mechanism to other
-dependent subsystems such as cpufreq are left to the discretion of the SoC
-specific framework which uses the OPP library. Similar care needs to be taken
-care to refresh the cpufreq table in cases of these operations.
+get_opp_count if dev_pm_opp_enable/disable functions are invoked for a
+device, the exact mechanism to trigger these or the notification mechanism
+to other dependent subsystems such as cpufreq are left to the discretion of
+the SoC specific framework which uses the OPP library. Similar care needs
+to be taken care to refresh the cpufreq table in cases of these operations.
 
 2. Initial OPP List Registration
 ================================
@@ -99,11 +101,11 @@ OPPs dynamically using the dev_pm_opp_enable / disable functions.
 dev_pm_opp_add
 	Add a new OPP for a specific domain represented by the device pointer.
 	The OPP is defined using the frequency and voltage. Once added, the OPP
-	is assumed to be available and control of it's availability can be done
-	with the dev_pm_opp_enable/disable functions. OPP library internally stores
-	and manages this information in the opp struct. This function may be
-	used by SoC framework to define a optimal list as per the demands of
-	SoC usage environment.
+	is assumed to be available and control of its availability can be done
+	with the dev_pm_opp_enable/disable functions. OPP library
+	internally stores and manages this information in the opp struct.
+	This function may be used by SoC framework to define a optimal list
+	as per the demands of SoC usage environment.
 
 	WARNING:
 		Do not use this function in interrupt context.
@@ -354,7 +356,7 @@ struct dev_pm_opp
 
 struct device
 	This is used to identify a domain to the OPP layer. The
-	nature of the device and it's implementation is left to the user of
+	nature of the device and its implementation is left to the user of
 	OPP library such as the SoC framework.
 
 Overall, in a simplistic view, the data structure operations is represented as
diff --git a/Documentation/power/pci.rst b/Documentation/power/pci.rst
index 0e2ef7429304..0924d29636ad 100644
--- a/Documentation/power/pci.rst
+++ b/Documentation/power/pci.rst
@@ -130,8 +130,8 @@ a full power-on reset sequence and the power-on defaults are restored to the
 device by hardware just as at initial power up.
 
 PCI devices supporting the PCI PM Spec can be programmed to generate PMEs
-while in a low-power state (D1-D3), but they are not required to be capable
-of generating PMEs from all supported low-power states.  In particular, the
+while in any power state (D0-D3), but they are not required to be capable
+of generating PMEs from all supported power states.  In particular, the
 capability of generating PMEs from D3cold is optional and depends on the
 presence of additional voltage (3.3Vaux) allowing the device to remain
 sufficiently active to generate a wakeup signal.
@@ -426,12 +426,12 @@ pm->runtime_idle() callback.
 2.4. System-Wide Power Transitions
 ----------------------------------
 There are a few different types of system-wide power transitions, described in
-Documentation/driver-api/pm/devices.rst.  Each of them requires devices to be handled
-in a specific way and the PM core executes subsystem-level power management
-callbacks for this purpose.  They are executed in phases such that each phase
-involves executing the same subsystem-level callback for every device belonging
-to the given subsystem before the next phase begins.  These phases always run
-after tasks have been frozen.
+Documentation/driver-api/pm/devices.rst.  Each of them requires devices to be
+handled in a specific way and the PM core executes subsystem-level power
+management callbacks for this purpose.  They are executed in phases such that
+each phase involves executing the same subsystem-level callback for every device
+belonging to the given subsystem before the next phase begins.  These phases
+always run after tasks have been frozen.
 
 2.4.1. System Suspend
 ^^^^^^^^^^^^^^^^^^^^^
@@ -600,17 +600,17 @@ using the following PCI bus type's callbacks::
 
 respectively.
 
-The first of them, pci_pm_thaw_noirq(), is analogous to pci_pm_resume_noirq(),
-but it doesn't put the device into the full power state and doesn't attempt to
-restore its standard configuration registers.  It also executes the device
-driver's pm->thaw_noirq() callback, if defined, instead of pm->resume_noirq().
+The first of them, pci_pm_thaw_noirq(), is analogous to pci_pm_resume_noirq().
+It puts the device into the full power state and restores its standard
+configuration registers.  It also executes the device driver's pm->thaw_noirq()
+callback, if defined, instead of pm->resume_noirq().
 
 The pci_pm_thaw() routine is similar to pci_pm_resume(), but it runs the device
 driver's pm->thaw() callback instead of pm->resume().  It is executed
 asynchronously for different PCI devices that don't depend on each other in a
 known way.
 
-The complete phase it the same as for system resume.
+The complete phase is the same as for system resume.
 
 After saving the image, devices need to be powered down before the system can
 enter the target sleep state (ACPI S4 for ACPI-based systems).  This is done in
@@ -636,12 +636,12 @@ System restore requires a hibernation image to be loaded into memory and the
 pre-hibernation memory contents to be restored before the pre-hibernation system
 activity can be resumed.
 
-As described in Documentation/driver-api/pm/devices.rst, the hibernation image is loaded
-into memory by a fresh instance of the kernel, called the boot kernel, which in
-turn is loaded and run by a boot loader in the usual way.  After the boot kernel
-has loaded the image, it needs to replace its own code and data with the code
-and data of the "hibernated" kernel stored within the image, called the image
-kernel.  For this purpose all devices are frozen just like before creating
+As described in Documentation/driver-api/pm/devices.rst, the hibernation image
+is loaded into memory by a fresh instance of the kernel, called the boot kernel,
+which in turn is loaded and run by a boot loader in the usual way.  After the
+boot kernel has loaded the image, it needs to replace its own code and data with
+the code and data of the "hibernated" kernel stored within the image, called the
+image kernel.  For this purpose all devices are frozen just like before creating
 the image during hibernation, in the
 
 	prepare, freeze, freeze_noirq
@@ -691,12 +691,12 @@ controlling the runtime power management of their devices.
 
 At the time of this writing there are two ways to define power management
 callbacks for a PCI device driver, the recommended one, based on using a
-dev_pm_ops structure described in Documentation/driver-api/pm/devices.rst, and the
-"legacy" one, in which the .suspend(), .suspend_late(), .resume_early(), and
-.resume() callbacks from struct pci_driver are used.  The legacy approach,
-however, doesn't allow one to define runtime power management callbacks and is
-not really suitable for any new drivers.  Therefore it is not covered by this
-document (refer to the source code to learn more about it).
+dev_pm_ops structure described in Documentation/driver-api/pm/devices.rst, and
+the "legacy" one, in which the .suspend() and .resume() callbacks from struct
+pci_driver are used.  The legacy approach, however, doesn't allow one to define
+runtime power management callbacks and is not really suitable for any new
+drivers.  Therefore it is not covered by this document (refer to the source code
+to learn more about it).
 
 It is recommended that all PCI device drivers define a struct dev_pm_ops object
 containing pointers to power management (PM) callbacks that will be executed by
diff --git a/Documentation/power/pm_qos_interface.rst b/Documentation/power/pm_qos_interface.rst
index 3097694fba69..0d62d506caf0 100644
--- a/Documentation/power/pm_qos_interface.rst
+++ b/Documentation/power/pm_qos_interface.rst
@@ -8,8 +8,8 @@ one of the parameters.
 
 Two different PM QoS frameworks are available:
 1. PM QoS classes for cpu_dma_latency
-2. the per-device PM QoS framework provides the API to manage the per-device latency
-constraints and PM QoS flags.
+2. The per-device PM QoS framework provides the API to manage the
+   per-device latency constraints and PM QoS flags.
 
 Each parameters have defined units:
 
@@ -47,14 +47,14 @@ void pm_qos_add_request(handle, param_class, target_value):
   pm_qos API functions.
 
 void pm_qos_update_request(handle, new_target_value):
-  Will update the list element pointed to by the handle with the new target value
-  and recompute the new aggregated target, calling the notification tree if the
-  target is changed.
+  Will update the list element pointed to by the handle with the new target
+  value and recompute the new aggregated target, calling the notification tree
+  if the target is changed.
 
 void pm_qos_remove_request(handle):
-  Will remove the element.  After removal it will update the aggregate target and
-  call the notification tree if the target was changed as a result of removing
-  the request.
+  Will remove the element.  After removal it will update the aggregate target
+  and call the notification tree if the target was changed as a result of
+  removing the request.
 
 int pm_qos_request(param_class):
   Returns the aggregated value for a given PM QoS class.
@@ -167,9 +167,9 @@ int dev_pm_qos_expose_flags(device, value)
   change the value of the PM_QOS_FLAG_NO_POWER_OFF flag.
 
 void dev_pm_qos_hide_flags(device)
-  Drop the request added by dev_pm_qos_expose_flags() from the device's PM QoS list
-  of flags and remove sysfs attribute pm_qos_no_power_off from the device's power
-  directory.
+  Drop the request added by dev_pm_qos_expose_flags() from the device's PM QoS
+  list of flags and remove sysfs attribute pm_qos_no_power_off from the device's
+  power directory.
 
 Notification mechanisms:
 
@@ -179,8 +179,8 @@ int dev_pm_qos_add_notifier(device, notifier, type):
   Adds a notification callback function for the device for a particular request
   type.
 
-  The callback is called when the aggregated value of the device constraints list
-  is changed.
+  The callback is called when the aggregated value of the device constraints
+  list is changed.
 
 int dev_pm_qos_remove_notifier(device, notifier, type):
   Removes the notification callback function for the device.
diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst
index 2c2ec99b5088..ab8406c84254 100644
--- a/Documentation/power/runtime_pm.rst
+++ b/Documentation/power/runtime_pm.rst
@@ -268,8 +268,8 @@ defined in include/linux/pm.h:
   `unsigned int runtime_auto;`
     - if set, indicates that the user space has allowed the device driver to
       power manage the device at run time via the /sys/devices/.../power/control
-      `interface;` it may only be modified with the help of the pm_runtime_allow()
-      and pm_runtime_forbid() helper functions
+      `interface;` it may only be modified with the help of the
+      pm_runtime_allow() and pm_runtime_forbid() helper functions
 
   `unsigned int no_callbacks;`
     - indicates that the device does not use the runtime PM callbacks (see
diff --git a/Documentation/power/suspend-and-cpuhotplug.rst b/Documentation/power/suspend-and-cpuhotplug.rst
index 7ac8e1f549f4..572d968c5375 100644
--- a/Documentation/power/suspend-and-cpuhotplug.rst
+++ b/Documentation/power/suspend-and-cpuhotplug.rst
@@ -106,8 +106,8 @@ execution during resume):
 * Release system_transition_mutex lock.
 
 
-It is to be noted here that the system_transition_mutex lock is acquired at the very
-beginning, when we are just starting out to suspend, and then released only
+It is to be noted here that the system_transition_mutex lock is acquired at the
+very beginning, when we are just starting out to suspend, and then released only
 after the entire cycle is complete (i.e., suspend + resume).
 
 ::
@@ -165,7 +165,8 @@ Important files and functions/entry points:
 
 - kernel/power/process.c : freeze_processes(), thaw_processes()
 - kernel/power/suspend.c : suspend_prepare(), suspend_enter(), suspend_finish()
-- kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](), [disable|enable]_nonboot_cpus()
+- kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](),
+  [disable|enable]_nonboot_cpus()
 
 
 
diff --git a/Documentation/power/swsusp.rst b/Documentation/power/swsusp.rst
index d000312f6965..8524f079e05c 100644
--- a/Documentation/power/swsusp.rst
+++ b/Documentation/power/swsusp.rst
@@ -118,7 +118,8 @@ In a really perfect world::
 
   echo 1 > /proc/acpi/sleep       # for standby
   echo 2 > /proc/acpi/sleep       # for suspend to ram
-  echo 3 > /proc/acpi/sleep       # for suspend to ram, but with more power conservative
+  echo 3 > /proc/acpi/sleep       # for suspend to ram, but with more power
+                                  # conservative
   echo 4 > /proc/acpi/sleep       # for suspend to disk
   echo 5 > /proc/acpi/sleep       # for shutdown unfriendly the system
 
@@ -192,8 +193,8 @@ Q:
 
 A:
   The freezing of tasks is a mechanism by which user space processes and some
-  kernel threads are controlled during hibernation or system-wide suspend (on some
-  architectures).  See freezing-of-tasks.txt for details.
+  kernel threads are controlled during hibernation or system-wide suspend (on
+  some architectures).  See freezing-of-tasks.txt for details.
 
 Q:
   What is the difference between "platform" and "shutdown"?
@@ -282,7 +283,8 @@ A:
       suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
       with state snapshot
 
-      state snapshot: copy of whole used memory is taken with interrupts disabled
+      state snapshot: copy of whole used memory is taken with interrupts
+      disabled
 
       resume(): devices are woken up so that we can write image to swap
 
@@ -353,8 +355,8 @@ Q:
 
 A:
   Generally, yes, you can.  However, it requires you to use the "resume=" and
-  "resume_offset=" kernel command line parameters, so the resume from a swap file
-  cannot be initiated from an initrd or initramfs image.  See
+  "resume_offset=" kernel command line parameters, so the resume from a swap
+  file cannot be initiated from an initrd or initramfs image.  See
   swsusp-and-swap-files.txt for details.
 
 Q:
diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst
index db7b6a880f52..ba5edb3211c0 100644
--- a/Documentation/powerpc/index.rst
+++ b/Documentation/powerpc/index.rst
@@ -19,6 +19,7 @@ powerpc
     firmware-assisted-dump
     hvcs
     isa-versions
+    kaslr-booke32
     mpc52xx
     pci_iov_resource_on_powernv
     pmu-ebb
diff --git a/Documentation/powerpc/kaslr-booke32.rst b/Documentation/powerpc/kaslr-booke32.rst
new file mode 100644
index 000000000000..8b259fdfdf03
--- /dev/null
+++ b/Documentation/powerpc/kaslr-booke32.rst
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+KASLR for Freescale BookE32
+===========================
+
+The word KASLR stands for Kernel Address Space Layout Randomization.
+
+This document tries to explain the implementation of the KASLR for
+Freescale BookE32. KASLR is a security feature that deters exploit
+attempts relying on knowledge of the location of kernel internals.
+
+Since CONFIG_RELOCATABLE has already supported, what we need to do is
+map or copy kernel to a proper place and relocate. Freescale Book-E
+parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1
+entries are not suitable to map the kernel directly in a randomized
+region, so we chose to copy the kernel to a proper place and restart to
+relocate.
+
+Entropy is derived from the banner and timer base, which will change every
+build and boot. This not so much safe so additionally the bootloader may
+pass entropy via the /chosen/kaslr-seed node in device tree.
+
+We will use the first 512M of the low memory to randomize the kernel
+image. The memory will be split in 64M zones. We will use the lower 8
+bit of the entropy to decide the index of the 64M zone. Then we chose a
+16K aligned offset inside the 64M zone to put the kernel in::
+
+    KERNELBASE
+
+        |-->   64M   <--|
+        |               |
+        +---------------+    +----------------+---------------+
+        |               |....|    |kernel|    |               |
+        +---------------+    +----------------+---------------+
+        |                         |
+        |----->   offset    <-----|
+
+                              kernstart_virt_addr
+
+To enable KASLR, set CONFIG_RANDOMIZE_BASE = y. If KASLR is enable and you
+want to disable it at runtime, add "nokaslr" to the kernel cmdline.
diff --git a/Documentation/ioctl/botching-up-ioctls.rst b/Documentation/process/botching-up-ioctls.rst
index ac697fef3545..2d4829b2fb09 100644
--- a/Documentation/ioctl/botching-up-ioctls.rst
+++ b/Documentation/process/botching-up-ioctls.rst
@@ -46,7 +46,7 @@ will need to add a 32-bit compat layer:
    conversion or worse, fiddle the raw __u64 through your code since that
    diminishes the checking tools like sparse can provide. The macro
    u64_to_user_ptr can be used in the kernel to avoid warnings about integers
-   and pointres of different sizes.
+   and pointers of different sizes.
 
 
 Basics
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index ada573b7d703..edb296c52f61 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -988,7 +988,7 @@ Similarly, if you need to calculate the size of some structure member, use
 
 .. code-block:: c
 
-	#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+	#define sizeof_field(t, f) (sizeof(((t*)0)->f))
 
 There are also min() and max() macros that do strict type checking if you
 need them.  Feel free to peruse that header file to see what else is already
diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst
index a3c3349046c4..33edae654599 100644
--- a/Documentation/process/embargoed-hardware-issues.rst
+++ b/Documentation/process/embargoed-hardware-issues.rst
@@ -1,3 +1,5 @@
+.. _embargoed_hardware_issues:
+
 Embargoed hardware issues
 =========================
 
@@ -36,7 +38,10 @@ issue according to our documented process.
 The list is encrypted and email to the list can be sent by either PGP or
 S/MIME encrypted and must be signed with the reporter's PGP key or S/MIME
 certificate. The list's PGP key and S/MIME certificate are available from
-https://www.kernel.org/....
+the following URLs:
+
+  - PGP: https://www.kernel.org/static/files/hardware-security.asc
+  - S/MIME: https://www.kernel.org/static/files/hardware-security.crt
 
 While hardware security issues are often handled by the affected hardware
 vendor, we welcome contact from researchers or individuals who have
@@ -55,14 +60,14 @@ Operation of mailing-lists
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The encrypted mailing-lists which are used in our process are hosted on
-Linux Foundation's IT infrastructure. By providing this service Linux
-Foundation's director of IT Infrastructure security technically has the
-ability to access the embargoed information, but is obliged to
-confidentiality by his employment contract. Linux Foundation's director of
-IT Infrastructure security is also responsible for the kernel.org
-infrastructure.
-
-The Linux Foundation's current director of IT Infrastructure security is
+Linux Foundation's IT infrastructure. By providing this service, members
+of Linux Foundation's IT operations personnel technically have the
+ability to access the embargoed information, but are obliged to
+confidentiality by their employment contract. Linux Foundation IT
+personnel are also responsible for operating and managing the rest of
+kernel.org infrastructure.
+
+The Linux Foundation's current director of IT Project infrastructure is
 Konstantin Ryabitsev.
 
 
@@ -240,7 +245,7 @@ an involved disclosed party. The current ambassadors list:
 
   ============= ========================================================
   ARM
-  AMD
+  AMD		Tom Lendacky <tom.lendacky@amd.com>
   IBM
   Intel		Tony Luck <tony.luck@intel.com>
   Qualcomm	Trilok Soni <tsoni@codeaurora.org>
@@ -255,7 +260,7 @@ an involved disclosed party. The current ambassadors list:
   Red Hat	Josh Poimboeuf <jpoimboe@redhat.com>
   SUSE		Jiri Kosina <jkosina@suse.cz>
 
-  Amazon
+  Amazon	Peter Bowen <pzb@amzn.com>
   Google	Kees Cook <keescook@chromium.org>
   ============= ========================================================
 
@@ -274,7 +279,7 @@ software decrypts the email and re-encrypts it individually for each
 subscriber with the subscriber's PGP key or S/MIME certificate. Details
 about the mailing-list software and the setup which is used to ensure the
 security of the lists and protection of the data can be found here:
-https://www.kernel.org/....
+https://korg.wiki.kernel.org/userdoc/remail.
 
 List keys
 ^^^^^^^^^
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index e2c9ffc682c5..6399d92f0b21 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -46,6 +46,7 @@ Other guides to the community that are of interest to most developers are:
    kernel-docs
    deprecated
    embargoed-hardware-issues
+   maintainers
 
 These are some overall technical guides that have been put here for now for
 lack of a better place.
@@ -57,7 +58,9 @@ lack of a better place.
    adding-syscalls
    magic-number
    volatile-considered-harmful
+   botching-up-ioctls
    clang-format
+   ../riscv/patch-acceptance
 
 .. only::  subproject and html
 
diff --git a/Documentation/process/magic-number.rst b/Documentation/process/magic-number.rst
index 547bbf28e615..eee9b44553b3 100644
--- a/Documentation/process/magic-number.rst
+++ b/Documentation/process/magic-number.rst
@@ -81,7 +81,6 @@ FF_MAGIC              0x4646           fc_info                  ``drivers/net/ip
 ISICOM_MAGIC          0x4d54           isi_port                 ``include/linux/isicom.h``
 PTY_MAGIC             0x5001                                    ``drivers/char/pty.c``
 PPP_MAGIC             0x5002           ppp                      ``include/linux/if_pppvar.h``
-SERIAL_MAGIC          0x5301           async_struct             ``include/linux/serial.h``
 SSTATE_MAGIC          0x5302           serial_state             ``include/linux/serial.h``
 SLIP_MAGIC            0x5302           slip                     ``drivers/net/slip.h``
 STRIP_MAGIC           0x5303           strip                    ``drivers/net/strip.c``
diff --git a/Documentation/process/maintainers.rst b/Documentation/process/maintainers.rst
new file mode 100644
index 000000000000..6174cfb4138f
--- /dev/null
+++ b/Documentation/process/maintainers.rst
@@ -0,0 +1 @@
+.. maintainers-include::
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index fb56297f70dc..ba5e944c7a63 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -782,7 +782,58 @@ helpful, you can use the https://lkml.kernel.org/ redirector (e.g., in
 the cover email text) to link to an earlier version of the patch series.
 
 
-16) Sending ``git pull`` requests
+16) Providing base tree information
+-----------------------------------
+
+When other developers receive your patches and start the review process,
+it is often useful for them to know where in the tree history they
+should place your work. This is particularly useful for automated CI
+processes that attempt to run a series of tests in order to establish
+the quality of your submission before the maintainer starts the review.
+
+If you are using ``git format-patch`` to generate your patches, you can
+automatically include the base tree information in your submission by
+using the ``--base`` flag. The easiest and most convenient way to use
+this option is with topical branches::
+
+    $ git checkout -t -b my-topical-branch master
+    Branch 'my-topical-branch' set up to track local branch 'master'.
+    Switched to a new branch 'my-topical-branch'
+
+    [perform your edits and commits]
+
+    $ git format-patch --base=auto --cover-letter -o outgoing/ master
+    outgoing/0000-cover-letter.patch
+    outgoing/0001-First-Commit.patch
+    outgoing/...
+
+When you open ``outgoing/0000-cover-letter.patch`` for editing, you will
+notice that it will have the ``base-commit:`` trailer at the very
+bottom, which provides the reviewer and the CI tools enough information
+to properly perform ``git am`` without worrying about conflicts::
+
+    $ git checkout -b patch-review [base-commit-id]
+    Switched to a new branch 'patch-review'
+    $ git am patches.mbox
+    Applying: First Commit
+    Applying: ...
+
+Please see ``man git-format-patch`` for more information about this
+option.
+
+.. note::
+
+    The ``--base`` feature was introduced in git version 2.9.0.
+
+If you are not using git to format your patches, you can still include
+the same ``base-commit`` trailer to indicate the commit hash of the tree
+on which your work is based. You should add it either in the cover
+letter or in the first patch of the series and it should be placed
+either below the ``---`` line or at the very bottom of all other
+content, right before your email signature.
+
+
+17) Sending ``git pull`` requests
 ---------------------------------
 
 If you have a series of patches, it may be most convenient to have the
diff --git a/Documentation/riscv/boot-image-header.rst b/Documentation/riscv/boot-image-header.rst
index 7b4d1d747585..d7752533865f 100644
--- a/Documentation/riscv/boot-image-header.rst
+++ b/Documentation/riscv/boot-image-header.rst
@@ -21,8 +21,8 @@ The following 64-byte header is present in decompressed Linux kernel image::
 	u32 res1 = 0;		  /* Reserved */
 	u64 res2 = 0;		  /* Reserved */
 	u64 magic = 0x5643534952; /* Magic number, little endian, "RISCV" */
-	u32 magic2 = 0x56534905;  /* Magic number 2, little endian, "RSC\x05" */
-	u32 res4;		  /* Reserved for PE COFF offset */
+	u32 magic2 = 0x05435352;  /* Magic number 2, little endian, "RSC\x05" */
+	u32 res3;		  /* Reserved for PE COFF offset */
 
 This header format is compliant with PE/COFF header and largely inspired from
 ARM64 header. Thus, both ARM64 & RISC-V header can be combined into one common
@@ -34,7 +34,7 @@ Notes
 - This header can also be reused to support EFI stub for RISC-V in future. EFI
   specification needs PE/COFF image header in the beginning of the kernel image
   in order to load it as an EFI application. In order to support EFI stub,
-  code0 should be replaced with "MZ" magic string and res5(at offset 0x3c) should
+  code0 should be replaced with "MZ" magic string and res3(at offset 0x3c) should
   point to the rest of the PE/COFF header.
 
 - version field indicate header version number
diff --git a/Documentation/riscv/index.rst b/Documentation/riscv/index.rst
index 215fd3c1f2d5..fa33bffd8992 100644
--- a/Documentation/riscv/index.rst
+++ b/Documentation/riscv/index.rst
@@ -7,6 +7,7 @@ RISC-V architecture
 
     boot-image-header
     pmu
+    patch-acceptance
 
 .. only::  subproject and html
 
diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst
new file mode 100644
index 000000000000..dfe0ac5624fb
--- /dev/null
+++ b/Documentation/riscv/patch-acceptance.rst
@@ -0,0 +1,35 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+arch/riscv maintenance guidelines for developers
+================================================
+
+Overview
+--------
+The RISC-V instruction set architecture is developed in the open:
+in-progress drafts are available for all to review and to experiment
+with implementations.  New module or extension drafts can change
+during the development process - sometimes in ways that are
+incompatible with previous drafts.  This flexibility can present a
+challenge for RISC-V Linux maintenance.  Linux maintainers disapprove
+of churn, and the Linux development process prefers well-reviewed and
+tested code over experimental code.  We wish to extend these same
+principles to the RISC-V-related code that will be accepted for
+inclusion in the kernel.
+
+Submit Checklist Addendum
+-------------------------
+We'll only accept patches for new modules or extensions if the
+specifications for those modules or extensions are listed as being
+"Frozen" or "Ratified" by the RISC-V Foundation.  (Developers may, of
+course, maintain their own Linux kernel trees that contain code for
+any draft extensions that they wish.)
+
+Additionally, the RISC-V specification allows implementors to create
+their own custom extensions.  These custom extensions aren't required
+to go through any review or ratification process by the RISC-V
+Foundation.  To avoid the maintenance complexity and potential
+performance impact of adding kernel code for implementor-specific
+RISC-V extensions, we'll only to accept patches for extensions that
+have been officially frozen or ratified by the RISC-V Foundation.
+(Implementors, may, of course, maintain their own Linux kernel trees
+containing code for any custom extensions that they wish.)
diff --git a/Documentation/scheduler/sched-stats.rst b/Documentation/scheduler/sched-stats.rst
index 0cb0aa714545..dd9b99a025f7 100644
--- a/Documentation/scheduler/sched-stats.rst
+++ b/Documentation/scheduler/sched-stats.rst
@@ -28,7 +28,7 @@ of these will need to start with a baseline observation and then calculate
 the change in the counters at each subsequent observation.  A perl script
 which does this for many of the fields is available at
 
-    http://eaglet.rain.com/rick/linux/schedstat/
+    http://eaglet.pdxhosts.com/rick/linux/schedstat/
 
 Note that any such script will necessarily be version-specific, as the main
 reason to change versions is changes in the output format.  For those wishing
@@ -164,4 +164,4 @@ report on how well a particular process or set of processes is faring
 under the scheduler's policies.  A simple version of such a program is
 available at
 
-    http://eaglet.rain.com/rick/linux/schedstat/v12/latency.c
+    http://eaglet.pdxhosts.com/rick/linux/schedstat/v12/latency.c
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index c1dd4939f4ae..2a4be1c3e6db 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -1084,7 +1084,8 @@ of interest:
                    commands to the adapter.
     this_id      - scsi id of host (scsi initiator) or -1 if not known
     sg_tablesize - maximum scatter gather elements allowed by host.
-                   0 implies scatter gather not supported by host
+                   Set this to SG_ALL or less to avoid chained SG lists.
+                   Must be at least 1.
     max_sectors  - maximum number of sectors (usually 512 bytes) allowed
                    in a single SCSI command. The default value of 0 leads
                    to a setting of SCSI_DEFAULT_MAX_SECTORS (defined in
diff --git a/Documentation/scsi/smartpqi.txt b/Documentation/scsi/smartpqi.txt
index 201f80c7c050..df129f55ace5 100644
--- a/Documentation/scsi/smartpqi.txt
+++ b/Documentation/scsi/smartpqi.txt
@@ -29,7 +29,7 @@ smartpqi specific entries in /sys
   smartpqi host attributes:
   -------------------------
   /sys/class/scsi_host/host*/rescan
-  /sys/class/scsi_host/host*/version
+  /sys/class/scsi_host/host*/driver_version
 
   The host rescan attribute is a write only attribute. Writing to this
   attribute will trigger the driver to scan for new, changed, or removed
diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index d6d8b0b756b6..d9b0b859018b 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -1102,7 +1102,7 @@ payload contents" for more information.
     See also Documentation/security/keys/request-key.rst.
 
 
- *  To search for a key in a specific domain, call:
+ *  To search for a key in a specific domain, call::
 
 	struct key *request_key_tag(const struct key_type *type,
 				    const char *description,
diff --git a/Documentation/security/lsm.rst b/Documentation/security/lsm.rst
index ad4dfd020e0d..aadf47c808c0 100644
--- a/Documentation/security/lsm.rst
+++ b/Documentation/security/lsm.rst
@@ -56,7 +56,7 @@ the infrastructure to support security modules. The LSM kernel patch
 also moves most of the capabilities logic into an optional security
 module, with the system defaulting to the traditional superuser logic.
 This capabilities module is discussed further in
-`LSM Capabilities Module <#cap>`__.
+`LSM Capabilities Module`_.
 
 The LSM kernel patch adds security fields to kernel data structures and
 inserts calls to hook functions at critical points in the kernel code to
diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst
index 02aacd69ab96..392875a1b94e 100644
--- a/Documentation/sound/alsa-configuration.rst
+++ b/Documentation/sound/alsa-configuration.rst
@@ -495,7 +495,8 @@ Module for C-Media CMI8338/8738/8768/8770 PCI sound cards.
 mpu_port
     port address of MIDI interface (8338 only):
     0x300,0x310,0x320,0x330 = legacy port,
-    0 = disable (default)
+    1 = integrated PCI port (default on 8738),
+    0 = disable
 fm_port
     port address of OPL-3 FM synthesizer (8x38 only):
     0x388 = legacy port,
diff --git a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
index 132f5eb9b530..fa4968817696 100644
--- a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
+++ b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
@@ -259,7 +259,7 @@ to details explained in the following section.
       {
               struct mychip *chip;
               int err;
-              static struct snd_device_ops ops = {
+              static const struct snd_device_ops ops = {
                      .dev_free = snd_mychip_dev_free,
               };
 
@@ -675,7 +675,7 @@ low-level device with a specified ``ops``,
 
 ::
 
-  static struct snd_device_ops ops = {
+  static const struct snd_device_ops ops = {
           .dev_free =        snd_mychip_dev_free,
   };
   ....
@@ -761,7 +761,7 @@ destructor and PCI entries. Example code is shown first, below.
       {
               struct mychip *chip;
               int err;
-              static struct snd_device_ops ops = {
+              static const struct snd_device_ops ops = {
                      .dev_free = snd_mychip_dev_free,
               };
 
@@ -805,6 +805,7 @@ destructor and PCI entries. Example code is shown first, below.
                       return -EBUSY;
               }
               chip->irq = pci->irq;
+              card->sync_irq = chip->irq;
 
               /* (2) initialization of the chip hardware */
               .... /*   (not implemented in this document) */
@@ -965,6 +966,15 @@ usually like the following:
           return IRQ_HANDLED;
   }
 
+After requesting the IRQ, you can passed it to ``card->sync_irq``
+field:
+::
+
+          card->irq = chip->irq;
+
+This allows PCM core automatically performing
+:c:func:`synchronize_irq()` at the necessary timing like ``hw_free``.
+See the later section `sync_stop callback`_ for details.
 
 Now let's write the corresponding destructor for the resources above.
 The role of destructor is simple: disable the hardware (if already
@@ -1048,7 +1058,7 @@ and the allocation would be like below:
           return err;
   }
   chip->iobase_phys = pci_resource_start(pci, 0);
-  chip->iobase_virt = ioremap_nocache(chip->iobase_phys,
+  chip->iobase_virt = ioremap(chip->iobase_phys,
                                       pci_resource_len(pci, 0));
 
 and the corresponding destructor would be:
@@ -1270,21 +1280,23 @@ shows only the skeleton, how to build up the PCM interfaces.
               /* the hardware-specific codes will be here */
               ....
               return 0;
-
       }
 
       /* hw_params callback */
       static int snd_mychip_pcm_hw_params(struct snd_pcm_substream *substream,
                                    struct snd_pcm_hw_params *hw_params)
       {
-              return snd_pcm_lib_malloc_pages(substream,
-                                         params_buffer_bytes(hw_params));
+              /* the hardware-specific codes will be here */
+              ....
+              return 0;
       }
 
       /* hw_free callback */
       static int snd_mychip_pcm_hw_free(struct snd_pcm_substream *substream)
       {
-              return snd_pcm_lib_free_pages(substream);
+              /* the hardware-specific codes will be here */
+              ....
+              return 0;
       }
 
       /* prepare callback */
@@ -1339,7 +1351,6 @@ shows only the skeleton, how to build up the PCM interfaces.
       static struct snd_pcm_ops snd_mychip_playback_ops = {
               .open =        snd_mychip_playback_open,
               .close =       snd_mychip_playback_close,
-              .ioctl =       snd_pcm_lib_ioctl,
               .hw_params =   snd_mychip_pcm_hw_params,
               .hw_free =     snd_mychip_pcm_hw_free,
               .prepare =     snd_mychip_pcm_prepare,
@@ -1351,7 +1362,6 @@ shows only the skeleton, how to build up the PCM interfaces.
       static struct snd_pcm_ops snd_mychip_capture_ops = {
               .open =        snd_mychip_capture_open,
               .close =       snd_mychip_capture_close,
-              .ioctl =       snd_pcm_lib_ioctl,
               .hw_params =   snd_mychip_pcm_hw_params,
               .hw_free =     snd_mychip_pcm_hw_free,
               .prepare =     snd_mychip_pcm_prepare,
@@ -1382,9 +1392,9 @@ shows only the skeleton, how to build up the PCM interfaces.
                               &snd_mychip_capture_ops);
               /* pre-allocation of buffers */
               /* NOTE: this may fail */
-              snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
-                                                    snd_dma_pci_data(chip->pci),
-                                                    64*1024, 64*1024);
+              snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_DEV,
+                                             &chip->pci->dev,
+                                             64*1024, 64*1024);
               return 0;
       }
 
@@ -1454,7 +1464,6 @@ The operators are defined typically like this:
   static struct snd_pcm_ops snd_mychip_playback_ops = {
           .open =        snd_mychip_pcm_open,
           .close =       snd_mychip_pcm_close,
-          .ioctl =       snd_pcm_lib_ioctl,
           .hw_params =   snd_mychip_pcm_hw_params,
           .hw_free =     snd_mychip_pcm_hw_free,
           .prepare =     snd_mychip_pcm_prepare,
@@ -1465,13 +1474,14 @@ The operators are defined typically like this:
 All the callbacks are described in the Operators_ subsection.
 
 After setting the operators, you probably will want to pre-allocate the
-buffer. For the pre-allocation, simply call the following:
+buffer and set up the managed allocation mode.
+For that, simply call the following:
 
 ::
 
-  snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
-                                        snd_dma_pci_data(chip->pci),
-                                        64*1024, 64*1024);
+  snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_DEV,
+                                 &chip->pci->dev,
+                                 64*1024, 64*1024);
 
 It will allocate a buffer up to 64kB as default. Buffer management
 details will be described in the later section `Buffer and Memory
@@ -1621,8 +1631,7 @@ For the operators (callbacks) of each sound driver, most of these
 records are supposed to be read-only. Only the PCM middle-layer changes
 / updates them. The exceptions are the hardware description (hw) DMA
 buffer information and the private data. Besides, if you use the
-standard buffer allocation method via
-:c:func:`snd_pcm_lib_malloc_pages()`, you don't need to set the
+standard managed buffer allocation mode, you don't need to set the
 DMA buffer information by yourself.
 
 In the sections below, important records are explained.
@@ -1776,8 +1785,8 @@ the physical address of the buffer. This field is specified only when
 the buffer is a linear buffer. ``dma_bytes`` holds the size of buffer
 in bytes. ``dma_private`` is used for the ALSA DMA allocator.
 
-If you use a standard ALSA function,
-:c:func:`snd_pcm_lib_malloc_pages()`, for allocating the buffer,
+If you use either the managed buffer allocation mode or the standard
+API function :c:func:`snd_pcm_lib_malloc_pages()` for allocating the buffer,
 these fields are set by the ALSA middle layer, and you should *not*
 change them by yourself. You can read them but not write them. On the
 other hand, if you want to allocate the buffer by yourself, you'll
@@ -1911,7 +1920,10 @@ ioctl callback
 ~~~~~~~~~~~~~~
 
 This is used for any special call to pcm ioctls. But usually you can
-pass a generic ioctl callback, :c:func:`snd_pcm_lib_ioctl()`.
+leave it as NULL, then PCM core calls the generic ioctl callback
+function :c:func:`snd_pcm_lib_ioctl()`.  If you need to deal with the
+unique setup of channel info or reset procedure, you can pass your own
+callback function here.
 
 hw_params callback
 ~~~~~~~~~~~~~~~~~~~
@@ -1929,8 +1941,12 @@ Many hardware setups should be done in this callback, including the
 allocation of buffers.
 
 Parameters to be initialized are retrieved by
-:c:func:`params_xxx()` macros. To allocate buffer, you can call a
-helper function,
+:c:func:`params_xxx()` macros.
+
+When you set up the managed buffer allocation mode for the substream,
+a buffer is already allocated before this callback gets
+called. Alternatively, you can call a helper function below for
+allocating the buffer, too.
 
 ::
 
@@ -1964,18 +1980,23 @@ hw_free callback
   static int snd_xxx_hw_free(struct snd_pcm_substream *substream);
 
 This is called to release the resources allocated via
-``hw_params``. For example, releasing the buffer via
-:c:func:`snd_pcm_lib_malloc_pages()` is done by calling the
-following:
-
-::
-
-  snd_pcm_lib_free_pages(substream);
+``hw_params``.
 
 This function is always called before the close callback is called.
 Also, the callback may be called multiple times, too. Keep track
 whether the resource was already released.
 
+When you have set up the managed buffer allocation mode for the PCM
+substream, the allocated PCM buffer will be automatically released
+after this callback gets called.  Otherwise you'll have to release the
+buffer manually.  Typically, when the buffer was allocated from the
+pre-allocated pool, you can use the standard API function
+:c:func:`snd_pcm_lib_malloc_pages()` like:
+
+::
+
+  snd_pcm_lib_free_pages(substream);
+
 prepare callback
 ~~~~~~~~~~~~~~~~
 
@@ -2048,6 +2069,37 @@ flag set, and you cannot call functions which may sleep. The
 triggering the DMA. The other stuff should be initialized
 ``hw_params`` and ``prepare`` callbacks properly beforehand.
 
+sync_stop callback
+~~~~~~~~~~~~~~~~~~
+
+::
+
+  static int snd_xxx_sync_stop(struct snd_pcm_substream *substream);
+
+This callback is optional, and NULL can be passed.  It's called after
+the PCM core stops the stream and changes the stream state
+``prepare``, ``hw_params`` or ``hw_free``.
+Since the IRQ handler might be still pending, we need to wait until
+the pending task finishes before moving to the next step; otherwise it
+might lead to a crash due to resource conflicts or access to the freed
+resources.  A typical behavior is to call a synchronization function
+like :c:func:`synchronize_irq()` here.
+
+For majority of drivers that need only a call of
+:c:func:`synchronize_irq()`, there is a simpler setup, too.
+While keeping NULL to ``sync_stop`` PCM callback, the driver can set
+``card->sync_irq`` field to store the valid interrupt number after
+requesting an IRQ, instead.   Then PCM core will look call
+:c:func:`synchronize_irq()` with the given IRQ appropriately.
+
+If the IRQ handler is released at the card destructor, you don't need
+to clear ``card->sync_irq``, as the card itself is being released.
+So, usually you'll need to add just a single line for assigning
+``card->sync_irq`` in the driver code unless the driver re-acquires
+the IRQ.  When the driver frees and re-acquires the IRQ dynamically
+(e.g. for suspend/resume), it needs to clear and re-set
+``card->sync_irq`` again appropriately.
+
 pointer callback
 ~~~~~~~~~~~~~~~~
 
@@ -2095,10 +2147,12 @@ This callback is atomic as default.
 page callback
 ~~~~~~~~~~~~~
 
-This callback is optional too. This callback is used mainly for
-non-contiguous buffers. The mmap calls this callback to get the page
-address. Some examples will be explained in the later section `Buffer
-and Memory Management`_, too.
+This callback is optional too. The mmap calls this callback to get the
+page fault address.
+
+Since the recent changes, you need no special callback any longer for
+the standard SG-buffer or vmalloc-buffer. Hence this callback should
+be rarely used.
 
 mmap calllback
 ~~~~~~~~~~~~~~
@@ -3512,7 +3566,7 @@ bus).
 ::
 
   snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
-                                        snd_dma_pci_data(pci), size, max);
+                                        &pci->dev, size, max);
 
 where ``size`` is the byte size to be pre-allocated and the ``max`` is
 the maximum size to be changed via the ``prealloc`` proc file. The
@@ -3523,12 +3577,14 @@ The second argument (type) and the third argument (device pointer) are
 dependent on the bus. For normal devices, pass the device pointer
 (typically identical as ``card->dev``) to the third argument with
 ``SNDRV_DMA_TYPE_DEV`` type. For the continuous buffer unrelated to the
-bus can be pre-allocated with ``SNDRV_DMA_TYPE_CONTINUOUS`` type and the
-``snd_dma_continuous_data(GFP_KERNEL)`` device pointer, where
-``GFP_KERNEL`` is the kernel allocation flag to use. For the
-scatter-gather buffers, use ``SNDRV_DMA_TYPE_DEV_SG`` with the device
-pointer (see the `Non-Contiguous Buffers`_
-section).
+bus can be pre-allocated with ``SNDRV_DMA_TYPE_CONTINUOUS`` type.
+You can pass NULL to the device pointer in that case, which is the
+default mode implying to allocate with ``GFP_KRENEL`` flag.
+If you need a different GFP flag, you can pass it by encoding the flag
+into the device pointer via a special macro
+:c:func:`snd_dma_continuous_data()`.
+For the scatter-gather buffers, use ``SNDRV_DMA_TYPE_DEV_SG`` with the
+device pointer (see the `Non-Contiguous Buffers`_ section).
 
 Once the buffer is pre-allocated, you can use the allocator in the
 ``hw_params`` callback:
@@ -3539,6 +3595,25 @@ Once the buffer is pre-allocated, you can use the allocator in the
 
 Note that you have to pre-allocate to use this function.
 
+Most of drivers use, though, rather the newly introduced "managed
+buffer allocation mode" instead of the manual allocation or release.
+This is done by calling :c:func:`snd_pcm_set_managed_buffer_all()`
+instead of :c:func:`snd_pcm_lib_preallocate_pages_for_all()`.
+
+::
+
+  snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_DEV,
+                                 &pci->dev, size, max);
+
+where passed arguments are identical in both functions.
+The difference in the managed mode is that PCM core will call
+:c:func:`snd_pcm_lib_malloc_pages()` internally already before calling
+the PCM ``hw_params`` callback, and call :c:func:`snd_pcm_lib_free_pages()`
+after the PCM ``hw_free`` callback automatically.  So the driver
+doesn't have to call these functions explicitly in its callback any
+longer.  This made many driver code having NULL ``hw_params`` and
+``hw_free`` entries.
+
 External Hardware Buffers
 -------------------------
 
@@ -3693,20 +3768,26 @@ provides an interface for handling SG-buffers. The API is provided in
 ``<sound/pcm.h>``.
 
 For creating the SG-buffer handler, call
-:c:func:`snd_pcm_lib_preallocate_pages()` or
-:c:func:`snd_pcm_lib_preallocate_pages_for_all()` with
+:c:func:`snd_pcm_set_managed_buffer()` or
+:c:func:`snd_pcm_set_managed_buffer_all()` with
 ``SNDRV_DMA_TYPE_DEV_SG`` in the PCM constructor like other PCI
-pre-allocator. You need to pass ``snd_dma_pci_data(pci)``, where pci is
+pre-allocator. You need to pass ``&pci->dev``, where pci is
 the :c:type:`struct pci_dev <pci_dev>` pointer of the chip as
-well. The ``struct snd_sg_buf`` instance is created as
-``substream->dma_private``. You can cast the pointer like:
+well.
+
+::
+
+  snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_DEV_SG,
+                                 &pci->dev, size, max);
+
+The ``struct snd_sg_buf`` instance is created as
+``substream->dma_private`` in turn. You can cast the pointer like:
 
 ::
 
   struct snd_sg_buf *sgbuf = (struct snd_sg_buf *)substream->dma_private;
 
-Then call :c:func:`snd_pcm_lib_malloc_pages()` in the ``hw_params``
-callback as well as in the case of normal PCI buffer. The SG-buffer
+Then in :c:func:`snd_pcm_lib_malloc_pages()` call, the common SG-buffer
 handler will allocate the non-contiguous kernel pages of the given size
 and map them onto the virtually contiguous memory. The virtual pointer
 is addressed in runtime->dma_area. The physical address
@@ -3715,41 +3796,40 @@ physically non-contiguous. The physical address table is set up in
 ``sgbuf->table``. You can get the physical address at a certain offset
 via :c:func:`snd_pcm_sgbuf_get_addr()`.
 
-When a SG-handler is used, you need to set
-:c:func:`snd_pcm_sgbuf_ops_page()` as the ``page`` callback. (See
-`page callback`_ section.)
-
-To release the data, call :c:func:`snd_pcm_lib_free_pages()` in
-the ``hw_free`` callback as usual.
+If you need to release the SG-buffer data explicitly, call the
+standard API function :c:func:`snd_pcm_lib_free_pages()` as usual.
 
 Vmalloc'ed Buffers
 ------------------
 
 It's possible to use a buffer allocated via :c:func:`vmalloc()`, for
-example, for an intermediate buffer. Since the allocated pages are not
-contiguous, you need to set the ``page`` callback to obtain the physical
-address at every offset.
+example, for an intermediate buffer. In the recent version of kernel,
+you can simply allocate it via standard
+:c:func:`snd_pcm_lib_malloc_pages()` and co after setting up the
+buffer preallocation with ``SNDRV_DMA_TYPE_VMALLOC`` type.
 
-The easiest way to achieve it would be to use
-:c:func:`snd_pcm_lib_alloc_vmalloc_buffer()` for allocating the buffer
-via :c:func:`vmalloc()`, and set :c:func:`snd_pcm_sgbuf_ops_page()` to
-the ``page`` callback.  At release, you need to call
-:c:func:`snd_pcm_lib_free_vmalloc_buffer()`.
+::
 
-If you want to implementation the ``page`` manually, it would be like
-this:
+  snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_VMALLOC,
+                                 NULL, 0, 0);
 
-::
+The NULL is passed to the device pointer argument, which indicates
+that the default pages (GFP_KERNEL and GFP_HIGHMEM) will be
+allocated.
 
-  #include <linux/vmalloc.h>
+Also, note that zero is passed to both the size and the max size
+arguments here.  Since each vmalloc call should succeed at any time,
+we don't need to pre-allocate the buffers like other continuous
+pages.
 
-  /* get the physical page pointer on the given offset */
-  static struct page *mychip_page(struct snd_pcm_substream *substream,
-                                  unsigned long offset)
-  {
-          void *pageptr = substream->runtime->dma_area + offset;
-          return vmalloc_to_page(pageptr);
-  }
+If you need the 32bit DMA allocation, pass the device pointer encoded
+by :c:func:`snd_dma_continuous_data()` with ``GFP_KERNEL|__GFP_DMA32``
+argument.
+
+::
+
+  snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_VMALLOC,
+          snd_dma_continuous_data(GFP_KERNEL | __GFP_DMA32), 0, 0);
 
 Proc Interface
 ==============
@@ -3832,7 +3912,7 @@ For a raw-data proc-file, set the attributes as follows:
 
 ::
 
-  static struct snd_info_entry_ops my_file_io_ops = {
+  static const struct snd_info_entry_ops my_file_io_ops = {
           .read = my_file_io_read,
   };
 
diff --git a/Documentation/sphinx-static/theme_overrides.css b/Documentation/sphinx-static/theme_overrides.css
index e21e36cd6761..459ec5b29d68 100644
--- a/Documentation/sphinx-static/theme_overrides.css
+++ b/Documentation/sphinx-static/theme_overrides.css
@@ -53,6 +53,16 @@ div[class^="highlight"] pre {
     line-height: normal;
 }
 
+/* Keep fields from being strangely far apart due to inheirited table CSS. */
+.rst-content table.field-list th.field-name {
+    padding-top: 1px;
+    padding-bottom: 1px;
+}
+.rst-content table.field-list td.field-body {
+    padding-top: 1px;
+    padding-bottom: 1px;
+}
+
 @media screen {
 
     /* content column
diff --git a/Documentation/sphinx/automarkup.py b/Documentation/sphinx/automarkup.py
index 5b6119ff69f4..b18236370742 100644
--- a/Documentation/sphinx/automarkup.py
+++ b/Documentation/sphinx/automarkup.py
@@ -5,8 +5,13 @@
 # has been done.
 #
 from docutils import nodes
+import sphinx
 from sphinx import addnodes
-from sphinx.environment import NoUri
+if sphinx.version_info[0] < 2 or \
+   sphinx.version_info[0] == 2 and sphinx.version_info[1] < 1:
+    from sphinx.environment import NoUri
+else:
+    from sphinx.errors import NoUri
 import re
 
 #
diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py
index 1159405cb920..4bcbd6ae01cd 100644
--- a/Documentation/sphinx/kerneldoc.py
+++ b/Documentation/sphinx/kerneldoc.py
@@ -59,9 +59,10 @@ class KernelDocDirective(Directive):
     optional_arguments = 4
     option_spec = {
         'doc': directives.unchanged_required,
-        'functions': directives.unchanged,
         'export': directives.unchanged,
         'internal': directives.unchanged,
+        'identifiers': directives.unchanged,
+        'functions': directives.unchanged,
     }
     has_content = False
 
@@ -77,6 +78,10 @@ class KernelDocDirective(Directive):
 
         tab_width = self.options.get('tab-width', self.state.document.settings.tab_width)
 
+        # 'function' is an alias of 'identifiers'
+        if 'functions' in self.options:
+            self.options['identifiers'] = self.options.get('functions')
+
         # FIXME: make this nicer and more robust against errors
         if 'export' in self.options:
             cmd += ['-export']
@@ -86,11 +91,11 @@ class KernelDocDirective(Directive):
             export_file_patterns = str(self.options.get('internal')).split()
         elif 'doc' in self.options:
             cmd += ['-function', str(self.options.get('doc'))]
-        elif 'functions' in self.options:
-            functions = self.options.get('functions').split()
-            if functions:
-                for f in functions:
-                    cmd += ['-function', f]
+        elif 'identifiers' in self.options:
+            identifiers = self.options.get('identifiers').split()
+            if identifiers:
+                for i in identifiers:
+                    cmd += ['-function', i]
             else:
                 cmd += ['-no-doc-sections']
 
diff --git a/Documentation/sphinx/maintainers_include.py b/Documentation/sphinx/maintainers_include.py
new file mode 100755
index 000000000000..dc8fed48d3c2
--- /dev/null
+++ b/Documentation/sphinx/maintainers_include.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8; mode: python -*-
+# pylint: disable=R0903, C0330, R0914, R0912, E0401
+
+u"""
+    maintainers-include
+    ~~~~~~~~~~~~~~~~~~~
+
+    Implementation of the ``maintainers-include`` reST-directive.
+
+    :copyright:  Copyright (C) 2019  Kees Cook <keescook@chromium.org>
+    :license:    GPL Version 2, June 1991 see linux/COPYING for details.
+
+    The ``maintainers-include`` reST-directive performs extensive parsing
+    specific to the Linux kernel's standard "MAINTAINERS" file, in an
+    effort to avoid needing to heavily mark up the original plain text.
+"""
+
+import sys
+import re
+import os.path
+
+from docutils import statemachine
+from docutils.utils.error_reporting import ErrorString
+from docutils.parsers.rst import Directive
+from docutils.parsers.rst.directives.misc import Include
+
+__version__  = '1.0'
+
+def setup(app):
+    app.add_directive("maintainers-include", MaintainersInclude)
+    return dict(
+        version = __version__,
+        parallel_read_safe = True,
+        parallel_write_safe = True
+    )
+
+class MaintainersInclude(Include):
+    u"""MaintainersInclude (``maintainers-include``) directive"""
+    required_arguments = 0
+
+    def parse_maintainers(self, path):
+        """Parse all the MAINTAINERS lines into ReST for human-readability"""
+
+        result = list()
+        result.append(".. _maintainers:")
+        result.append("")
+
+        # Poor man's state machine.
+        descriptions = False
+        maintainers = False
+        subsystems = False
+
+        # Field letter to field name mapping.
+        field_letter = None
+        fields = dict()
+
+        prev = None
+        field_prev = ""
+        field_content = ""
+
+        for line in open(path):
+            if sys.version_info.major == 2:
+                line = unicode(line, 'utf-8')
+            # Have we reached the end of the preformatted Descriptions text?
+            if descriptions and line.startswith('Maintainers'):
+                descriptions = False
+                # Ensure a blank line following the last "|"-prefixed line.
+                result.append("")
+
+            # Start subsystem processing? This is to skip processing the text
+            # between the Maintainers heading and the first subsystem name.
+            if maintainers and not subsystems:
+                if re.search('^[A-Z0-9]', line):
+                    subsystems = True
+
+            # Drop needless input whitespace.
+            line = line.rstrip()
+
+            # Linkify all non-wildcard refs to ReST files in Documentation/.
+            pat = '(Documentation/([^\s\?\*]*)\.rst)'
+            m = re.search(pat, line)
+            if m:
+                # maintainers.rst is in a subdirectory, so include "../".
+                line = re.sub(pat, ':doc:`%s <../%s>`' % (m.group(2), m.group(2)), line)
+
+            # Check state machine for output rendering behavior.
+            output = None
+            if descriptions:
+                # Escape the escapes in preformatted text.
+                output = "| %s" % (line.replace("\\", "\\\\"))
+                # Look for and record field letter to field name mappings:
+                #   R: Designated *reviewer*: FullName <address@domain>
+                m = re.search("\s(\S):\s", line)
+                if m:
+                    field_letter = m.group(1)
+                if field_letter and not field_letter in fields:
+                    m = re.search("\*([^\*]+)\*", line)
+                    if m:
+                        fields[field_letter] = m.group(1)
+            elif subsystems:
+                # Skip empty lines: subsystem parser adds them as needed.
+                if len(line) == 0:
+                    continue
+                # Subsystem fields are batched into "field_content"
+                if line[1] != ':':
+                    # Render a subsystem entry as:
+                    #   SUBSYSTEM NAME
+                    #   ~~~~~~~~~~~~~~
+
+                    # Flush pending field content.
+                    output = field_content + "\n\n"
+                    field_content = ""
+
+                    # Collapse whitespace in subsystem name.
+                    heading = re.sub("\s+", " ", line)
+                    output = output + "%s\n%s" % (heading, "~" * len(heading))
+                    field_prev = ""
+                else:
+                    # Render a subsystem field as:
+                    #   :Field: entry
+                    #           entry...
+                    field, details = line.split(':', 1)
+                    details = details.strip()
+
+                    # Mark paths (and regexes) as literal text for improved
+                    # readability and to escape any escapes.
+                    if field in ['F', 'N', 'X', 'K']:
+                        # But only if not already marked :)
+                        if not ':doc:' in details:
+                            details = '``%s``' % (details)
+
+                    # Comma separate email field continuations.
+                    if field == field_prev and field_prev in ['M', 'R', 'L']:
+                        field_content = field_content + ","
+
+                    # Do not repeat field names, so that field entries
+                    # will be collapsed together.
+                    if field != field_prev:
+                        output = field_content + "\n"
+                        field_content = ":%s:" % (fields.get(field, field))
+                    field_content = field_content + "\n\t%s" % (details)
+                    field_prev = field
+            else:
+                output = line
+
+            # Re-split on any added newlines in any above parsing.
+            if output != None:
+                for separated in output.split('\n'):
+                    result.append(separated)
+
+            # Update the state machine when we find heading separators.
+            if line.startswith('----------'):
+                if prev.startswith('Descriptions'):
+                    descriptions = True
+                if prev.startswith('Maintainers'):
+                    maintainers = True
+
+            # Retain previous line for state machine transitions.
+            prev = line
+
+        # Flush pending field contents.
+        if field_content != "":
+            for separated in field_content.split('\n'):
+                result.append(separated)
+
+        output = "\n".join(result)
+        # For debugging the pre-rendered results...
+        #print(output, file=open("/tmp/MAINTAINERS.rst", "w"))
+
+        self.state_machine.insert_input(
+          statemachine.string2lines(output), path)
+
+    def run(self):
+        """Include the MAINTAINERS file as part of this reST file."""
+        if not self.state.document.settings.file_insertion_enabled:
+            raise self.warning('"%s" directive disabled.' % self.name)
+
+        # Walk up source path directories to find Documentation/../
+        path = self.state_machine.document.attributes['source']
+        path = os.path.realpath(path)
+        tail = path
+        while tail != "Documentation" and tail != "":
+            (path, tail) = os.path.split(path)
+
+        # Append "MAINTAINERS"
+        path = os.path.join(path, "MAINTAINERS")
+
+        try:
+            self.state.document.settings.record_dependencies.add(path)
+            lines = self.parse_maintainers(path)
+        except IOError as error:
+            raise self.severe('Problems with "%s" directive path:\n%s.' %
+                      (self.name, ErrorString(error)))
+
+        return []
diff --git a/Documentation/sphinx/parallel-wrapper.sh b/Documentation/sphinx/parallel-wrapper.sh
new file mode 100644
index 000000000000..7daf5133bdd3
--- /dev/null
+++ b/Documentation/sphinx/parallel-wrapper.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Figure out if we should follow a specific parallelism from the make
+# environment (as exported by scripts/jobserver-exec), or fall back to
+# the "auto" parallelism when "-jN" is not specified at the top-level
+# "make" invocation.
+
+sphinx="$1"
+shift || true
+
+parallel="$PARALLELISM"
+if [ -z "$parallel" ] ; then
+	# If no parallelism is specified at the top-level make, then
+	# fall back to the expected "-jauto" mode that the "htmldocs"
+	# target has had.
+	auto=$(perl -e 'open IN,"'"$sphinx"' --version 2>&1 |";
+			while (<IN>) {
+				if (m/([\d\.]+)/) {
+					print "auto" if ($1 >= "1.7")
+				}
+			}
+			close IN')
+	if [ -n "$auto" ] ; then
+		parallel="$auto"
+	fi
+fi
+# Only if some parallelism has been determined do we add the -jN option.
+if [ -n "$parallel" ] ; then
+	parallel="-j$parallel"
+fi
+
+exec "$sphinx" "$parallel" "$@"
diff --git a/Documentation/tee.txt b/Documentation/tee.txt
index afacdf2fd1de..c8fad81c4563 100644
--- a/Documentation/tee.txt
+++ b/Documentation/tee.txt
@@ -112,6 +112,83 @@ kernel are handled by the kernel driver. Other RPC messages will be forwarded to
 tee-supplicant without further involvement of the driver, except switching
 shared memory buffer representation.
 
+AMD-TEE driver
+==============
+
+The AMD-TEE driver handles the communication with AMD's TEE environment. The
+TEE environment is provided by AMD Secure Processor.
+
+The AMD Secure Processor (formerly called Platform Security Processor or PSP)
+is a dedicated processor that features ARM TrustZone technology, along with a
+software-based Trusted Execution Environment (TEE) designed to enable
+third-party Trusted Applications. This feature is currently enabled only for
+APUs.
+
+The following picture shows a high level overview of AMD-TEE::
+
+                                             |
+    x86                                      |
+                                             |
+ User space            (Kernel space)        |    AMD Secure Processor (PSP)
+ ~~~~~~~~~~            ~~~~~~~~~~~~~~        |    ~~~~~~~~~~~~~~~~~~~~~~~~~~
+                                             |
+ +--------+                                  |       +-------------+
+ | Client |                                  |       | Trusted     |
+ +--------+                                  |       | Application |
+     /\                                      |       +-------------+
+     ||                                      |             /\
+     ||                                      |             ||
+     ||                                      |             \/
+     ||                                      |         +----------+
+     ||                                      |         |   TEE    |
+     ||                                      |         | Internal |
+     \/                                      |         |   API    |
+ +---------+           +-----------+---------+         +----------+
+ | TEE     |           | TEE       | AMD-TEE |         | AMD-TEE  |
+ | Client  |           | subsystem | driver  |         | Trusted  |
+ | API     |           |           |         |         |   OS     |
+ +---------+-----------+----+------+---------+---------+----------+
+ |   Generic TEE API        |      | ASP     |      Mailbox       |
+ |   IOCTL (TEE_IOC_*)      |      | driver  | Register Protocol  |
+ +--------------------------+      +---------+--------------------+
+
+At the lowest level (in x86), the AMD Secure Processor (ASP) driver uses the
+CPU to PSP mailbox regsister to submit commands to the PSP. The format of the
+command buffer is opaque to the ASP driver. It's role is to submit commands to
+the secure processor and return results to AMD-TEE driver. The interface
+between AMD-TEE driver and AMD Secure Processor driver can be found in [6].
+
+The AMD-TEE driver packages the command buffer payload for processing in TEE.
+The command buffer format for the different TEE commands can be found in [7].
+
+The TEE commands supported by AMD-TEE Trusted OS are:
+* TEE_CMD_ID_LOAD_TA          - loads a Trusted Application (TA) binary into
+                                TEE environment.
+* TEE_CMD_ID_UNLOAD_TA        - unloads TA binary from TEE environment.
+* TEE_CMD_ID_OPEN_SESSION     - opens a session with a loaded TA.
+* TEE_CMD_ID_CLOSE_SESSION    - closes session with loaded TA
+* TEE_CMD_ID_INVOKE_CMD       - invokes a command with loaded TA
+* TEE_CMD_ID_MAP_SHARED_MEM   - maps shared memory
+* TEE_CMD_ID_UNMAP_SHARED_MEM - unmaps shared memory
+
+AMD-TEE Trusted OS is the firmware running on AMD Secure Processor.
+
+The AMD-TEE driver registers itself with TEE subsystem and implements the
+following driver function callbacks:
+
+* get_version - returns the driver implementation id and capability.
+* open - sets up the driver context data structure.
+* release - frees up driver resources.
+* open_session - loads the TA binary and opens session with loaded TA.
+* close_session -  closes session with loaded TA and unloads it.
+* invoke_func - invokes a command with loaded TA.
+
+cancel_req driver callback is not supported by AMD-TEE.
+
+The GlobalPlatform TEE Client API [5] can be used by the user space (client) to
+talk to AMD's TEE. AMD's TEE provides a secure environment for loading, opening
+a session, invoking commands and clossing session with TA.
+
 References
 ==========
 
@@ -125,3 +202,7 @@ References
 
 [5] http://www.globalplatform.org/specificationsdevice.asp look for
     "TEE Client API Specification v1.0" and click download.
+
+[6] include/linux/psp-tee.h
+
+[7] drivers/tee/amdtee/amdtee_if.h
diff --git a/Documentation/trace/coresight-cpu-debug.rst b/Documentation/trace/coresight/coresight-cpu-debug.rst
index 993dd294b81b..993dd294b81b 100644
--- a/Documentation/trace/coresight-cpu-debug.rst
+++ b/Documentation/trace/coresight/coresight-cpu-debug.rst
diff --git a/Documentation/trace/coresight/coresight-etm4x-reference.rst b/Documentation/trace/coresight/coresight-etm4x-reference.rst
new file mode 100644
index 000000000000..b64d9a9c79df
--- /dev/null
+++ b/Documentation/trace/coresight/coresight-etm4x-reference.rst
@@ -0,0 +1,798 @@
+===============================================
+ETMv4 sysfs linux driver programming reference.
+===============================================
+
+    :Author:   Mike Leach <mike.leach@linaro.org>
+    :Date:     October 11th, 2019
+
+Supplement to existing ETMv4 driver documentation.
+
+Sysfs files and directories
+---------------------------
+
+Root: ``/sys/bus/coresight/devices/etm<N>``
+
+
+The following paragraphs explain the association between sysfs files and the
+ETMv4 registers that they effect. Note the register names are given without
+the ‘TRC’ prefix.
+
+----
+
+:File:            ``mode`` (rw)
+:Trace Registers: {CONFIGR + others}
+:Notes:
+    Bit select trace features. See ‘mode’ section below. Bits
+    in this will cause equivalent programming of trace config and
+    other registers to enable the features requested.
+
+:Syntax & eg:
+    ``echo bitfield > mode``
+
+    bitfield up to 32 bits setting trace features.
+
+:Example:
+    ``$> echo 0x012 > mode``
+
+----
+
+:File:            ``reset`` (wo)
+:Trace Registers: All
+:Notes:
+    Reset all programming to trace nothing / no logic programmed.
+
+:Syntax:
+    ``echo 1 > reset``
+
+----
+
+:File:            ``enable_source`` (wo)
+:Trace Registers: PRGCTLR, All hardware regs.
+:Notes:
+    - > 0 : Programs up the hardware with the current values held in the driver
+      and enables trace.
+
+    - = 0 : disable trace hardware.
+
+:Syntax:
+    ``echo 1 > enable_source``
+
+----
+
+:File:            ``cpu`` (ro)
+:Trace Registers: None.
+:Notes:
+    CPU ID that this ETM is attached to.
+
+:Example:
+    ``$> cat cpu``
+
+    ``$> 0``
+
+----
+
+:File:            ``addr_idx`` (rw)
+:Trace Registers: None.
+:Notes:
+    Virtual register to index address comparator and range
+    features. Set index for first of the pair in a range.
+
+:Syntax:
+    ``echo idx > addr_idx``
+
+    Where idx < nr_addr_cmp x 2
+
+----
+
+:File:            ``addr_range`` (rw)
+:Trace Registers: ACVR[idx, idx+1], VIIECTLR
+:Notes:
+    Pair of addresses for a range selected by addr_idx. Include
+    / exclude according to the optional parameter, or if omitted
+    uses the current ‘mode’ setting. Select comparator range in
+    control register. Error if index is odd value.
+
+:Depends: ``mode, addr_idx``
+:Syntax:
+   ``echo addr1 addr2 [exclude] > addr_range``
+
+   Where addr1 and addr2 define the range and addr1 < addr2.
+
+   Optional exclude value:-
+
+   - 0 for include
+   - 1 for exclude.
+:Example:
+   ``$> echo 0x0000 0x2000 0 > addr_range``
+
+----
+
+:File:            ``addr_single`` (rw)
+:Trace Registers: ACVR[idx]
+:Notes:
+    Set a single address comparator according to addr_idx. This
+    is used if the address comparator is used as part of event
+    generation logic etc.
+
+:Depends: ``addr_idx``
+:Syntax:
+   ``echo addr1 > addr_single``
+
+----
+
+:File:           ``addr_start`` (rw)
+:Trace Registers: ACVR[idx], VISSCTLR
+:Notes:
+    Set a trace start address comparator according to addr_idx.
+    Select comparator in control register.
+
+:Depends: ``addr_idx``
+:Syntax:
+    ``echo addr1 > addr_start``
+
+----
+
+:File:            ``addr_stop`` (rw)
+:Trace Registers: ACVR[idx], VISSCTLR
+:Notes:
+    Set a trace stop address comparator according to addr_idx.
+    Select comparator in control register.
+
+:Depends: ``addr_idx``
+:Syntax:
+    ``echo addr1 > addr_stop``
+
+----
+
+:File:            ``addr_context`` (rw)
+:Trace Registers: ACATR[idx,{6:4}]
+:Notes:
+    Link context ID comparator to address comparator addr_idx
+
+:Depends: ``addr_idx``
+:Syntax:
+    ``echo ctxt_idx > addr_context``
+
+    Where ctxt_idx is the index of the linked context id / vmid
+    comparator.
+
+----
+
+:File:            ``addr_ctxtype`` (rw)
+:Trace Registers: ACATR[idx,{3:2}]
+:Notes:
+    Input value string. Set type for linked context ID comparator
+
+:Depends: ``addr_idx``
+:Syntax:
+    ``echo type > addr_ctxtype``
+
+    Type one of {all, vmid, ctxid, none}
+:Example:
+    ``$> echo ctxid > addr_ctxtype``
+
+----
+
+:File:            ``addr_exlevel_s_ns`` (rw)
+:Trace Registers: ACATR[idx,{14:8}]
+:Notes:
+    Set the ELx secure and non-secure matching bits for the
+    selected address comparator
+
+:Depends: ``addr_idx``
+:Syntax:
+    ``echo val > addr_exlevel_s_ns``
+
+    val is a 7 bit value for exception levels to exclude. Input
+    value shifted to correct bits in register.
+:Example:
+    ``$> echo 0x4F > addr_exlevel_s_ns``
+
+----
+
+:File:            ``addr_instdatatype`` (rw)
+:Trace Registers: ACATR[idx,{1:0}]
+:Notes:
+    Set the comparator address type for matching. Driver only
+    supports setting instruction address type.
+
+:Depends: ``addr_idx``
+
+----
+
+:File:            ``addr_cmp_view`` (ro)
+:Trace Registers: ACVR[idx, idx+1], ACATR[idx], VIIECTLR
+:Notes:
+    Read the currently selected address comparator. If part of
+    address range then display both addresses.
+
+:Depends: ``addr_idx``
+:Syntax:
+    ``cat addr_cmp_view``
+:Example:
+    ``$> cat addr_cmp_view``
+
+   ``addr_cmp[0] range 0x0 0xffffffffffffffff include ctrl(0x4b00)``
+
+----
+
+:File:            ``nr_addr_cmp`` (ro)
+:Trace Registers: From IDR4
+:Notes:
+    Number of address comparator pairs
+
+----
+
+:File:            ``sshot_idx`` (rw)
+:Trace Registers: None
+:Notes:
+    Select single shot register set.
+
+----
+
+:File:            ``sshot_ctrl`` (rw)
+:Trace Registers: SSCCR[idx]
+:Notes:
+    Access a single shot comparator control register.
+
+:Depends: ``sshot_idx``
+:Syntax:
+    ``echo val > sshot_ctrl``
+
+    Writes val into the selected control register.
+
+----
+
+:File:            ``sshot_status`` (ro)
+:Trace Registers: SSCSR[idx]
+:Notes:
+    Read a single shot comparator status register
+
+:Depends: ``sshot_idx``
+:Syntax:
+    ``cat sshot_status``
+
+    Read status.
+:Example:
+    ``$> cat sshot_status``
+
+    ``0x1``
+
+----
+
+:File:            ``sshot_pe_ctrl`` (rw)
+:Trace Registers: SSPCICR[idx]
+:Notes:
+    Access a single shot PE comparator input control register.
+
+:Depends: ``sshot_idx``
+:Syntax:
+    ``echo val > sshot_pe_ctrl``
+
+    Writes val into the selected control register.
+
+----
+
+:File:            ``ns_exlevel_vinst`` (rw)
+:Trace Registers: VICTLR{23:20}
+:Notes:
+    Program non-secure exception level filters. Set / clear NS
+    exception filter bits. Setting ‘1’ excludes trace from the
+    exception level.
+
+:Syntax:
+    ``echo bitfield > ns_exlevel_viinst``
+
+    Where bitfield contains bits to set clear for EL0 to EL2
+:Example:
+    ``%> echo 0x4 > ns_exlevel_viinst``
+
+    Excludes EL2 NS trace.
+
+----
+
+:File:            ``vinst_pe_cmp_start_stop`` (rw)
+:Trace Registers: VIPCSSCTLR
+:Notes:
+    Access PE start stop comparator input control registers
+
+----
+
+:File:            ``bb_ctrl`` (rw)
+:Trace Registers: BBCTLR
+:Notes:
+    Define ranges that Branch Broadcast will operate in.
+    Default (0x0) is all addresses.
+
+:Depends: BB enabled.
+
+----
+
+:File:            ``cyc_threshold`` (rw)
+:Trace Registers: CCCTLR
+:Notes:
+    Set the threshold for which cycle counts will be emitted.
+    Error if attempt to set below minimum defined in IDR3, masked
+    to width of valid bits.
+
+:Depends: CC enabled.
+
+----
+
+:File:            ``syncfreq`` (rw)
+:Trace Registers: SYNCPR
+:Notes:
+    Set trace synchronisation period. Power of 2 value, 0 (off)
+    or 8-20. Driver defaults to 12 (every 4096 bytes).
+
+----
+
+:File:            ``cntr_idx`` (rw)
+:Trace Registers: none
+:Notes:
+    Select the counter to access
+
+:Syntax:
+    ``echo idx > cntr_idx``
+
+    Where idx < nr_cntr
+
+----
+
+:File:            ``cntr_ctrl`` (rw)
+:Trace Registers: CNTCTLR[idx]
+:Notes:
+    Set counter control value.
+
+:Depends: ``cntr_idx``
+:Syntax:
+    ``echo val > cntr_ctrl``
+
+    Where val is per ETMv4 spec.
+
+----
+
+:File:            ``cntrldvr`` (rw)
+:Trace Registers: CNTRLDVR[idx]
+:Notes:
+    Set counter reload value.
+
+:Depends: ``cntr_idx``
+:Syntax:
+    ``echo val > cntrldvr``
+
+    Where val is per ETMv4 spec.
+
+----
+
+:File:            ``nr_cntr`` (ro)
+:Trace Registers: From IDR5
+
+:Notes:
+    Number of counters implemented.
+
+----
+
+:File:            ``ctxid_idx`` (rw)
+:Trace Registers: None
+:Notes:
+    Select the context ID comparator to access
+
+:Syntax:
+    ``echo idx > ctxid_idx``
+
+    Where idx < numcidc
+
+----
+
+:File:            ``ctxid_pid`` (rw)
+:Trace Registers: CIDCVR[idx]
+:Notes:
+   Set the context ID comparator value
+
+:Depends: ``ctxid_idx``
+
+----
+
+:File: ``ctxid_masks`` (rw)
+:Trace Registers: CIDCCTLR0, CIDCCTLR1, CIDCVR<0-7>
+:Notes:
+    Pair of values to set the byte masks for 1-8 context ID
+    comparators. Automatically clears masked bytes to 0 in CID
+    value registers.
+
+:Syntax:
+    ``echo m3m2m1m0 [m7m6m5m4] > ctxid_masks``
+
+    32 bit values made up of mask bytes, where mN represents a
+    byte mask value for Context ID comparator N.
+
+    Second value not required on systems that have fewer than 4
+    context ID comparators
+
+----
+
+:File:            ``numcidc`` (ro)
+:Trace Registers: From IDR4
+:Notes:
+    Number of Context ID comparators
+
+----
+
+:File:            ``vmid_idx`` (rw)
+:Trace Registers: None
+:Notes:
+    Select the VM ID comparator to access.
+
+:Syntax:
+    ``echo idx > vmid_idx``
+
+    Where idx <  numvmidc
+
+----
+
+:File:            ``vmid_val`` (rw)
+:Trace Registers: VMIDCVR[idx]
+:Notes:
+    Set the VM ID comparator value
+
+:Depends: ``vmid_idx``
+
+----
+
+:File:            ``vmid_masks`` (rw)
+:Trace Registers: VMIDCCTLR0, VMIDCCTLR1, VMIDCVR<0-7>
+:Notes:
+    Pair of values to set the byte masks for 1-8 VM ID comparators.
+    Automatically clears masked bytes to 0 in VMID value registers.
+
+:Syntax:
+    ``echo m3m2m1m0 [m7m6m5m4] > vmid_masks``
+
+    Where mN represents a byte mask value for VMID comparator N.
+    Second value not required on systems that have fewer than 4
+    VMID comparators.
+
+----
+
+:File:            ``numvmidc`` (ro)
+:Trace Registers: From IDR4
+:Notes:
+    Number of VMID comparators
+
+----
+
+:File:            ``res_idx`` (rw)
+:Trace Registers: None.
+:Notes:
+    Select the resource selector control to access. Must be 2 or
+    higher as selectors 0 and 1 are hardwired.
+
+:Syntax:
+    ``echo idx > res_idx``
+
+    Where 2 <= idx < nr_resource x 2
+
+----
+
+:File:            ``res_ctrl`` (rw)
+:Trace Registers: RSCTLR[idx]
+:Notes:
+    Set resource selector control value. Value per ETMv4 spec.
+
+:Depends: ``res_idx``
+:Syntax:
+    ``echo val > res_cntr``
+
+    Where val is per ETMv4 spec.
+
+----
+
+:File:            ``nr_resource`` (ro)
+:Trace Registers: From IDR4
+:Notes:
+    Number of resource selector pairs
+
+----
+
+:File:            ``event`` (rw)
+:Trace Registers: EVENTCTRL0R
+:Notes:
+    Set up to 4 implemented event fields.
+
+:Syntax:
+    ``echo ev3ev2ev1ev0 > event``
+
+    Where evN is an 8 bit event field. Up to 4 event fields make up the
+    32-bit input value. Number of valid fields is implementation dependent,
+    defined in IDR0.
+
+----
+
+:File: ``event_instren`` (rw)
+:Trace Registers: EVENTCTRL1R
+:Notes:
+    Choose events which insert event packets into trace stream.
+
+:Depends: EVENTCTRL0R
+:Syntax:
+    ``echo bitfield > event_instren``
+
+    Where bitfield is up to 4 bits according to number of event fields.
+
+----
+
+:File:            ``event_ts`` (rw)
+:Trace Registers: TSCTLR
+:Notes:
+    Set the event that will generate timestamp requests.
+
+:Depends: ``TS activated``
+:Syntax:
+    ``echo evfield > event_ts``
+
+    Where evfield is an 8 bit event selector.
+
+----
+
+:File:            ``seq_idx`` (rw)
+:Trace Registers: None
+:Notes:
+    Sequencer event register select - 0 to 2
+
+----
+
+:File:            ``seq_state`` (rw)
+:Trace Registers: SEQSTR
+:Notes:
+    Sequencer current state - 0 to 3.
+
+----
+
+:File:            ``seq_event`` (rw)
+:Trace Registers: SEQEVR[idx]
+:Notes:
+    State transition event registers
+
+:Depends: ``seq_idx``
+:Syntax:
+    ``echo evBevF > seq_event``
+
+    Where evBevF is a 16 bit value made up of two event selectors,
+
+    - evB : back
+    - evF : forwards.
+
+----
+
+:File:            ``seq_reset_event`` (rw)
+:Trace Registers: SEQRSTEVR
+:Notes:
+    Sequencer reset event
+
+:Syntax:
+    ``echo evfield > seq_reset_event``
+
+    Where evfield is an 8 bit event selector.
+
+----
+
+:File:            ``nrseqstate`` (ro)
+:Trace Registers: From IDR5
+:Notes:
+    Number of sequencer states (0 or 4)
+
+----
+
+:File:            ``nr_pe_cmp`` (ro)
+:Trace Registers: From IDR4
+:Notes:
+    Number of PE comparator inputs
+
+----
+
+:File:            ``nr_ext_inp`` (ro)
+:Trace Registers: From IDR5
+:Notes:
+    Number of external inputs
+
+----
+
+:File:            ``nr_ss_cmp`` (ro)
+:Trace Registers: From IDR4
+:Notes:
+    Number of Single Shot control registers
+
+----
+
+*Note:* When programming any address comparator the driver will tag the
+comparator with a type used - i.e. RANGE, SINGLE, START, STOP. Once this tag
+is set, then only the values can be changed using the same sysfs file / type
+used to program it.
+
+Thus::
+
+  % echo 0 > addr_idx		; select address comparator 0
+  % echo 0x1000 0x5000 0 > addr_range ; set address range on comparators 0, 1.
+  % echo 0x2000 > addr_start    ; error as comparator 0 is a range comparator
+  % echo 2 > addr_idx		; select address comparator 2
+  % echo 0x2000 > addr_start	; this is OK as comparator 2 is unused.
+  % echo 0x3000 > addr_stop	; error as comparator 2 set as start address.
+  % echo 2 > addr_idx		; select address comparator 3
+  % echo 0x3000 > addr_stop	; this is OK
+
+To remove programming on all the comparators (and all the other hardware) use
+the reset parameter::
+
+  % echo 1 > reset
+
+
+
+The ‘mode’ sysfs parameter.
+---------------------------
+
+This is a bitfield selection parameter that sets the overall trace mode for the
+ETM. The table below describes the bits, using the defines from the driver
+source file, along with a description of the feature these represent. Many
+features are optional and therefore dependent on implementation in the
+hardware.
+
+Bit assignments shown below:-
+
+----
+
+**bit (0):**
+    ETM_MODE_EXCLUDE
+
+**description:**
+    This is the default value for the include / exclude function when
+    setting address ranges. Set 1 for exclude range. When the mode
+    parameter is set this value is applied to the currently indexed
+    address range.
+
+
+**bit (4):**
+    ETM_MODE_BB
+
+**description:**
+    Set to enable branch broadcast if supported in hardware [IDR0].
+
+
+**bit (5):**
+    ETMv4_MODE_CYCACC
+
+**description:**
+    Set to enable cycle accurate trace if supported [IDR0].
+
+
+**bit (6):**
+    ETMv4_MODE_CTXID
+
+**description:**
+    Set to enable context ID tracing if supported in hardware [IDR2].
+
+
+**bit (7):**
+    ETM_MODE_VMID
+
+**description:**
+    Set to enable virtual machine ID tracing if supported [IDR2].
+
+
+**bit (11):**
+    ETMv4_MODE_TIMESTAMP
+
+**description:**
+    Set to enable timestamp generation if supported [IDR0].
+
+
+**bit (12):**
+    ETM_MODE_RETURNSTACK
+**description:**
+    Set to enable trace return stack use if supported [IDR0].
+
+
+**bit (13-14):**
+    ETM_MODE_QELEM(val)
+
+**description:**
+    ‘val’ determines level of Q element support enabled if
+    implemented by the ETM [IDR0]
+
+
+**bit (19):**
+    ETM_MODE_ATB_TRIGGER
+
+**description:**
+    Set to enable the ATBTRIGGER bit in the event control register
+    [EVENTCTLR1] if supported [IDR5].
+
+
+**bit (20):**
+    ETM_MODE_LPOVERRIDE
+
+**description:**
+    Set to enable the LPOVERRIDE bit in the event control register
+    [EVENTCTLR1], if supported [IDR5].
+
+
+**bit (21):**
+    ETM_MODE_ISTALL_EN
+
+**description:**
+    Set to enable the ISTALL bit in the stall control register
+    [STALLCTLR]
+
+
+**bit (23):**
+    ETM_MODE_INSTPRIO
+
+**description:**
+	      Set to enable the INSTPRIORITY bit in the stall control register
+	      [STALLCTLR] , if supported [IDR0].
+
+
+**bit (24):**
+    ETM_MODE_NOOVERFLOW
+
+**description:**
+    Set to enable the NOOVERFLOW bit in the stall control register
+    [STALLCTLR], if supported [IDR3].
+
+
+**bit (25):**
+    ETM_MODE_TRACE_RESET
+
+**description:**
+    Set to enable the TRCRESET bit in the viewinst control register
+    [VICTLR] , if supported [IDR3].
+
+
+**bit (26):**
+    ETM_MODE_TRACE_ERR
+
+**description:**
+    Set to enable the TRCCTRL bit in the viewinst control register
+    [VICTLR].
+
+
+**bit (27):**
+    ETM_MODE_VIEWINST_STARTSTOP
+
+**description:**
+    Set the initial state value of the ViewInst start / stop logic
+    in the viewinst control register [VICTLR]
+
+
+**bit (30):**
+    ETM_MODE_EXCL_KERN
+
+**description:**
+    Set default trace setup to exclude kernel mode trace (see note a)
+
+
+**bit (31):**
+    ETM_MODE_EXCL_USER
+
+**description:**
+    Set default trace setup to exclude user space trace (see note a)
+
+----
+
+*Note a)* On startup the ETM is programmed to trace the complete address space
+using address range comparator 0. ‘mode’ bits 30 / 31 modify this setting to
+set EL exclude bits for NS state in either user space (EL0) or kernel space
+(EL1) in the address range comparator. (the default setting excludes all
+secure EL, and NS EL2)
+
+Once the reset parameter has been used, and/or custom programming has been
+implemented - using these bits will result in the EL bits for address
+comparator 0 being set in the same way.
+
+*Note b)* Bits 2-3, 8-10, 15-16, 18, 22, control features that only work with
+data trace. As A-profile data trace is architecturally prohibited in ETMv4,
+these have been omitted here. Possible uses could be where a kernel has
+support for control of R or M profile infrastructure as part of a heterogeneous
+system.
+
+Bits 17, 28-29 are unused.
diff --git a/Documentation/trace/coresight.rst b/Documentation/trace/coresight/coresight.rst
index 72f4b7ef1bad..a566719f8e7e 100644
--- a/Documentation/trace/coresight.rst
+++ b/Documentation/trace/coresight/coresight.rst
@@ -489,7 +489,7 @@ interface provided for that purpose by the generic STM API::
     crw-------    1 root     root       10,  61 Jan  3 18:11 /dev/stm0
     root@genericarmv8:~#
 
-Details on how to use the generic STM API can be found here [#second]_.
+Details on how to use the generic STM API can be found here:- :doc:`../stm` [#second]_.
 
 .. [#first] Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
 
diff --git a/Documentation/trace/coresight/index.rst b/Documentation/trace/coresight/index.rst
new file mode 100644
index 000000000000..8d31b155a87c
--- /dev/null
+++ b/Documentation/trace/coresight/index.rst
@@ -0,0 +1,9 @@
+==============================
+CoreSight - ARM Hardware Trace
+==============================
+
+.. toctree::
+   :maxdepth: 2
+   :glob:
+
+   *
diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst
index 1fbc69894eed..2a05e770618a 100644
--- a/Documentation/trace/ftrace-uses.rst
+++ b/Documentation/trace/ftrace-uses.rst
@@ -146,7 +146,7 @@ FTRACE_OPS_FL_RECURSION_SAFE
 	itself or any nested functions that those functions call.
 
 	If this flag is set, it is possible that the callback will also
-	be called with preemption enabled (when CONFIG_PREEMPT is set),
+	be called with preemption enabled (when CONFIG_PREEMPTION is set),
 	but this is not guaranteed.
 
 FTRACE_OPS_FL_IPMODIFY
@@ -170,6 +170,14 @@ FTRACE_OPS_FL_RCU
 	a callback may be executed and RCU synchronization will not protect
 	it.
 
+FTRACE_OPS_FL_PERMANENT
+        If this is set on any ftrace ops, then the tracing cannot disabled by
+        writing 0 to the proc sysctl ftrace_enabled. Equally, a callback with
+        the flag set cannot be registered if ftrace_enabled is 0.
+
+        Livepatch uses it not to lose the function redirection, so the system
+        stays protected.
+
 
 Filtering which functions to trace
 ==================================
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index e3060eedb22d..ff658e27d25b 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -95,7 +95,8 @@ of ftrace. Here is a list of some of the key files:
   current_tracer:
 
 	This is used to set or display the current tracer
-	that is configured.
+	that is configured. Changing the current tracer clears
+	the ring buffer content as well as the "snapshot" buffer.
 
   available_tracers:
 
@@ -126,7 +127,8 @@ of ftrace. Here is a list of some of the key files:
 	This file holds the output of the trace in a human
 	readable format (described below). Note, tracing is temporarily
 	disabled when the file is open for reading. Once all readers
-	are closed, tracing is re-enabled.
+	are closed, tracing is re-enabled. Opening this file for
+	writing with the O_TRUNC flag clears the ring buffer content.
 
   trace_pipe:
 
@@ -185,7 +187,8 @@ of ftrace. Here is a list of some of the key files:
 	CPU buffer and not total size of all buffers. The
 	trace buffers are allocated in pages (blocks of memory
 	that the kernel uses for allocation, usually 4 KB in size).
-	If the last page allocated has room for more bytes
+	A few extra pages may be allocated to accommodate buffer management
+	meta-data. If the last page allocated has room for more bytes
 	than requested, the rest of the page will be used,
 	making the actual allocation bigger than requested or shown.
 	( Note, the size may not be a multiple of the page size
@@ -235,7 +238,7 @@ of ftrace. Here is a list of some of the key files:
 	This interface also allows for commands to be used. See the
 	"Filter commands" section for more details.
 
-	As a speed up, since processing strings can't be quite expensive
+	As a speed up, since processing strings can be quite expensive
 	and requires a check of all functions registered to tracing, instead
 	an index can be written into this file. A number (starting with "1")
 	written will instead select the same corresponding at the line position
@@ -382,7 +385,7 @@ of ftrace. Here is a list of some of the key files:
 
 	By default, 128 comms are saved (see "saved_cmdlines" above). To
 	increase or decrease the amount of comms that are cached, echo
-	in a the number of comms to cache, into this file.
+	the number of comms to cache into this file.
 
   saved_tgids:
 
@@ -490,6 +493,9 @@ of ftrace. Here is a list of some of the key files:
 
 	  # echo global > trace_clock
 
+	Setting a clock clears the ring buffer content as well as the
+	"snapshot" buffer.
+
   trace_marker:
 
 	This is a very useful file for synchronizing user space
@@ -2976,7 +2982,9 @@ Note, the proc sysctl ftrace_enable is a big on/off switch for the
 function tracer. By default it is enabled (when function tracing is
 enabled in the kernel). If it is disabled, all function tracing is
 disabled. This includes not only the function tracers for ftrace, but
-also for any other uses (perf, kprobes, stack tracing, profiling, etc).
+also for any other uses (perf, kprobes, stack tracing, profiling, etc). It
+cannot be disabled if there is a callback with FTRACE_OPS_FL_PERMANENT set
+registered.
 
 Please disable this with care.
 
@@ -3322,7 +3330,7 @@ directories after it is created.
 
 As you can see, the new directory looks similar to the tracing directory
 itself. In fact, it is very similar, except that the buffer and
-events are agnostic from the main director, or from any other
+events are agnostic from the main directory, or from any other
 instances that are created.
 
 The files in the new directory work just like the files with the
diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst
index b7891cb1ab4d..04acd277c5f6 100644
--- a/Documentation/trace/index.rst
+++ b/Documentation/trace/index.rst
@@ -23,5 +23,4 @@ Linux Tracing Technologies
    intel_th
    stm
    sys-t
-   coresight
-   coresight-cpu-debug
+   coresight/index
diff --git a/Documentation/trace/intel_th.rst b/Documentation/trace/intel_th.rst
index baa12eb09ef4..70b7126eaeeb 100644
--- a/Documentation/trace/intel_th.rst
+++ b/Documentation/trace/intel_th.rst
@@ -44,7 +44,8 @@ Documentation/trace/stm.rst for more information on that.
 
 MSU can be configured to collect trace data into a system memory
 buffer, which can later on be read from its device nodes via read() or
-mmap() interface.
+mmap() interface and directed to a "software sink" driver that will
+consume the data and/or relay it further.
 
 On the whole, Intel(R) Trace Hub does not require any special
 userspace software to function; everything can be configured, started
@@ -122,3 +123,28 @@ In order to enable the host mode, set the 'host_mode' parameter of the
 will show up on the intel_th bus. Also, trace configuration and
 capture controlling attribute groups of the 'gth' device will not be
 exposed. The 'sth' device will operate as usual.
+
+Software Sinks
+--------------
+
+The Memory Storage Unit (MSU) driver provides an in-kernel API for
+drivers to register themselves as software sinks for the trace data.
+Such drivers can further export the data via other devices, such as
+USB device controllers or network cards.
+
+The API has two main parts::
+ - notifying the software sink that a particular window is full, and
+   "locking" that window, that is, making it unavailable for the trace
+   collection; when this happens, the MSU driver will automatically
+   switch to the next window in the buffer if it is unlocked, or stop
+   the trace capture if it's not;
+ - tracking the "locked" state of windows and providing a way for the
+   software sink driver to notify the MSU driver when a window is
+   unlocked and can be used again to collect trace data.
+
+An example sink driver, msu-sink illustrates the implementation of a
+software sink. Functionally, it simply unlocks windows as soon as they
+are full, keeping the MSU running in a circular buffer mode. Unlike the
+"multi" mode, it will fill out all the windows in the buffer as opposed
+to just the first one. It can be enabled by writing "sink" to the "mode"
+file (assuming msu-sink.ko is loaded).
diff --git a/Documentation/trace/ring-buffer-design.txt b/Documentation/trace/ring-buffer-design.txt
index ff747b6fa39b..2d53c6f25b91 100644
--- a/Documentation/trace/ring-buffer-design.txt
+++ b/Documentation/trace/ring-buffer-design.txt
@@ -37,7 +37,7 @@ commit_page - a pointer to the page with the last finished non-nested write.
 
 cmpxchg - hardware-assisted atomic transaction that performs the following:
 
-   A = B iff previous A == C
+   A = B if previous A == C
 
    R = cmpxchg(A, C, B) is saying that we replace A with B if and only if
       current A is equal to C, and we put the old (current) A into R
diff --git a/Documentation/translations/it_IT/process/coding-style.rst b/Documentation/translations/it_IT/process/coding-style.rst
index 8995d2d19f20..8725f2b9e960 100644
--- a/Documentation/translations/it_IT/process/coding-style.rst
+++ b/Documentation/translations/it_IT/process/coding-style.rst
@@ -1005,7 +1005,7 @@ struttura, usate
 
 .. code-block:: c
 
-	#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+	#define sizeof_field(t, f) (sizeof(((t*)0)->f))
 
 Ci sono anche le macro min() e max() che, se vi serve, effettuano un controllo
 rigido sui tipi.  Sentitevi liberi di leggere attentamente questo file
diff --git a/Documentation/translations/it_IT/process/magic-number.rst b/Documentation/translations/it_IT/process/magic-number.rst
index ed1121d0ba84..783e0de314a0 100644
--- a/Documentation/translations/it_IT/process/magic-number.rst
+++ b/Documentation/translations/it_IT/process/magic-number.rst
@@ -87,7 +87,6 @@ FF_MAGIC              0x4646           fc_info                  ``drivers/net/ip
 ISICOM_MAGIC          0x4d54           isi_port                 ``include/linux/isicom.h``
 PTY_MAGIC             0x5001                                    ``drivers/char/pty.c``
 PPP_MAGIC             0x5002           ppp                      ``include/linux/if_pppvar.h``
-SERIAL_MAGIC          0x5301           async_struct             ``include/linux/serial.h``
 SSTATE_MAGIC          0x5302           serial_state             ``include/linux/serial.h``
 SLIP_MAGIC            0x5302           slip                     ``drivers/net/slip.h``
 STRIP_MAGIC           0x5303           strip                    ``drivers/net/strip.c``
diff --git a/Documentation/translations/it_IT/process/maintainer-pgp-guide.rst b/Documentation/translations/it_IT/process/maintainer-pgp-guide.rst
index 118fb4153e8f..f3c8e8d377ee 100644
--- a/Documentation/translations/it_IT/process/maintainer-pgp-guide.rst
+++ b/Documentation/translations/it_IT/process/maintainer-pgp-guide.rst
@@ -455,7 +455,7 @@ soluzioni disponibili:
   `GnuK`_ della FSIJ. Questo è uno dei pochi dispositivi a supportare le chiavi
   ECC ED25519, ma offre meno funzionalità di sicurezza (come la resistenza
   alla manomissione o alcuni attacchi ad un canale laterale).
-- `Nitrokey Pro`_: è simile alla Nitrokey Start, ma è più resistente alla
+- `Nitrokey Pro 2`_: è simile alla Nitrokey Start, ma è più resistente alla
   manomissione e offre più funzionalità di sicurezza. La Pro 2 supporta la
   crittografia ECC (NISTP).
 - `Yubikey 5`_: l'hardware e il software sono proprietari, ma è più economica
diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst
index b3f51b19de7c..ae3ad897d2ae 100644
--- a/Documentation/translations/ko_KR/howto.rst
+++ b/Documentation/translations/ko_KR/howto.rst
@@ -240,21 +240,21 @@ ReST 마크업을 사용하는 문서들은 Documentation/output 에 생성된�
 서브시스템에 특화된 커널 브랜치들로 구성된다. 몇몇 다른 메인
 브랜치들은 다음과 같다.
 
-  - main 4.x 커널 트리
-  - 4.x.y - 안정된 커널 트리
-  - 서브시스템을 위한 커널 트리들과 패치들
-  - 4.x - 통합 테스트를 위한 next 커널 트리
+  - 리누스의 메인라인 트리
+  - 여러 메이저 넘버를 갖는 다양한 안정된 커널 트리들
+  - 서브시스템을 위한 커널 트리들
+  - 통합 테스트를 위한 linux-next 커널 트리
 
-4.x 커널 트리
+메인라인 트리
 ~~~~~~~~~~~~~
 
-4.x 커널들은 Linus Torvalds가 관리하며 https://kernel.org 의
-pub/linux/kernel/v4.x/ 디렉토리에서 참조될 수 있다.개발 프로세스는 다음과 같다.
+메인라인 트리는 Linus Torvalds가 관리하며 https://kernel.org  또는 소스
+저장소에서 참조될 수 있다.개발 프로세스는 다음과 같다.
 
   - 새로운 커널이 배포되자마자 2주의 시간이 주어진다. 이 기간동은
     메인테이너들은 큰 diff들을 Linus에게 제출할 수 있다. 대개 이 패치들은
-    몇 주 동안 -next 커널내에 이미 있었던 것들이다. 큰 변경들을 제출하는 데
-    선호되는 방법은  git(커널의 소스 관리 툴, 더 많은 정보들은
+    몇 주 동안 linux-next 커널내에 이미 있었던 것들이다. 큰 변경들을 제출하는
+    데 선호되는 방법은  git(커널의 소스 관리 툴, 더 많은 정보들은
     https://git-scm.com/ 에서 참조할 수 있다)를 사용하는 것이지만 순수한
     패치파일의 형식으로 보내는 것도 무관하다.
   - 2주 후에 -rc1 커널이 릴리즈되며 여기서부터의 주안점은 새로운 커널을
@@ -281,28 +281,25 @@ Andrew Morton의 글이 있다.
         버그의 상황에 따라 배포되는 것이지 미리정해 놓은 시간에 따라
         배포되는 것은 아니기 때문이다."*
 
-4.x.y - 안정 커널 트리
-~~~~~~~~~~~~~~~~~~~~~~
+여러 메이저 넘버를 갖는 다양한 안정된 커널 트리들
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-3 자리 숫자로 이루어진 버젼의 커널들은 -stable 커널들이다. 그것들은 4.x
-커널에서 발견된 큰 회귀들이나 보안 문제들 중 비교적 작고 중요한 수정들을
-포함한다.
+3 자리 숫자로 이루어진 버젼의 커널들은 -stable 커널들이다. 그것들은 해당 메이저
+메인라인 릴리즈에서 발견된 큰 회귀들이나 보안 문제들 중 비교적 작고 중요한
+수정들을 포함하며, 앞의 두 버전 넘버는 같은 기반 버전을 의미한다.
 
 이것은 가장 최근의 안정적인 커널을 원하는 사용자에게 추천되는 브랜치이며,
 개발/실험적 버젼을 테스트하는 것을 돕고자 하는 사용자들과는 별로 관련이 없다.
 
-어떤 4.x.y 커널도 사용할 수 없다면 그때는 가장 높은 숫자의 4.x
-커널이 현재의 안정 커널이다.
-
-4.x.y는 "stable" 팀<stable@vger.kernel.org>에 의해 관리되며 거의 매번 격주로
-배포된다.
+-stable 트리들은 "stable" 팀<stable@vger.kernel.org>에 의해 관리되며 거의 매번
+격주로 배포된다.
 
 커널 트리 문서들 내의 :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
 파일은 어떤 종류의 변경들이 -stable 트리로 들어왔는지와
 배포 프로세스가 어떻게 진행되는지를 설명한다.
 
-서브시스템 커널 트리들과 패치들
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+서브시스템 커널 트리들
+~~~~~~~~~~~~~~~~~~~~~~
 
 다양한 커널 서브시스템의 메인테이너들 --- 그리고 많은 커널 서브시스템 개발자들
 --- 은 그들의 현재 개발 상태를 소스 저장소로 노출한다. 이를 통해 다른 사람들도
@@ -324,17 +321,18 @@ Andrew Morton의 글이 있다.
 대부분의 이러한 patchwork 사이트는 https://patchwork.kernel.org/ 또는
 http://patchwork.ozlabs.org/ 에 나열되어 있다.
 
-4.x - 통합 테스트를 위한 next 커널 트리
----------------------------------------
-서브시스템 트리들의 변경사항들은 mainline 4.x 트리로 들어오기 전에 통합
-테스트를 거쳐야 한다. 이런 목적으로, 모든 서브시스템 트리의 변경사항을 거의
-매일 받아가는 특수한 테스트 저장소가 존재한다:
+통합 테스트를 위한 linux-next 커널 트리
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+서브시스템 트리들의 변경사항들은 mainline 트리로 들어오기 전에 통합 테스트를
+거쳐야 한다. 이런 목적으로, 모든 서브시스템 트리의 변경사항을 거의 매일
+받아가는 특수한 테스트 저장소가 존재한다:
 
        https://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git
 
-이런 식으로, -next 커널을 통해 다음 머지 기간에 메인라인 커널에 어떤 변경이
-가해질 것인지 간략히 알 수 있다. 모험심 강한 테스터라면 -next 커널에서 테스트를
-수행하는 것도 좋을 것이다.
+이런 식으로, linux-next 커널을 통해 다음 머지 기간에 메인라인 커널에 어떤
+변경이 가해질 것인지 간략히 알 수 있다. 모험심 강한 테스터라면 linux-next
+커널에서 테스트를 수행하는 것도 좋을 것이다.
 
 
 버그 보고
diff --git a/Documentation/translations/ko_KR/index.rst b/Documentation/translations/ko_KR/index.rst
index 0b695345abc7..27995c4233de 100644
--- a/Documentation/translations/ko_KR/index.rst
+++ b/Documentation/translations/ko_KR/index.rst
@@ -3,8 +3,8 @@
         \renewcommand\thesection*
         \renewcommand\thesubsection*
 
-Korean translations
-===================
+한국어 번역
+===========
 
 .. toctree::
    :maxdepth: 1
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 2774624ee843..2e831ece6e26 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -1907,21 +1907,6 @@ Mandatory 배리어들은 SMP 시스템에서도 UP 시스템에서도 SMP 효�
      위해선 Documentation/DMA-API.txt 문서를 참고하세요.
 
 
-MMIO 쓰기 배리어
-----------------
-
-리눅스 커널은 또한 memory-mapped I/O 쓰기를 위한 특별한 배리어도 가지고
-있습니다:
-
-	mmiowb();
-
-이것은 mandatory 쓰기 배리어의 변종으로, 완화된 순서 규칙의 I/O 영역에으로의
-쓰기가 부분적으로 순서를 맞추도록 해줍니다.  이 함수는 CPU->하드웨어 사이를
-넘어서 실제 하드웨어에까지 일부 수준의 영향을 끼칩니다.
-
-더 많은 정보를 위해선 "Acquire vs I/O 액세스" 서브섹션을 참고하세요.
-
-
 =========================
 암묵적 커널 메모리 배리어
 =========================
@@ -2283,73 +2268,6 @@ ACQUIRE VS 메모리 액세스
 	*E, *F or *G following RELEASE Q
 
 
-
-ACQUIRE VS I/O 액세스
-----------------------
-
-특정한 (특히 NUMA 가 관련된) 환경 하에서 두개의 CPU 에서 동일한 스핀락으로
-보호되는 두개의 크리티컬 섹션 안의 I/O 액세스는 PCI 브릿지에 겹쳐진 I/O
-액세스로 보일 수 있는데, PCI 브릿지는 캐시 일관성 프로토콜과 합을 맞춰야 할
-의무가 없으므로, 필요한 읽기 메모리 배리어가 요청되지 않기 때문입니다.
-
-예를 들어서:
-
-	CPU 1				CPU 2
-	===============================	===============================
-	spin_lock(Q)
-	writel(0, ADDR)
-	writel(1, DATA);
-	spin_unlock(Q);
-					spin_lock(Q);
-					writel(4, ADDR);
-					writel(5, DATA);
-					spin_unlock(Q);
-
-는 PCI 브릿지에 다음과 같이 보일 수 있습니다:
-
-	STORE *ADDR = 0, STORE *ADDR = 4, STORE *DATA = 1, STORE *DATA = 5
-
-이렇게 되면 하드웨어의 오동작을 일으킬 수 있습니다.
-
-
-이런 경우엔 잡아둔 스핀락을 내려놓기 전에 mmiowb() 를 수행해야 하는데, 예를
-들면 다음과 같습니다:
-
-	CPU 1				CPU 2
-	===============================	===============================
-	spin_lock(Q)
-	writel(0, ADDR)
-	writel(1, DATA);
-	mmiowb();
-	spin_unlock(Q);
-					spin_lock(Q);
-					writel(4, ADDR);
-					writel(5, DATA);
-					mmiowb();
-					spin_unlock(Q);
-
-이 코드는 CPU 1 에서 요청된 두개의 스토어가 PCI 브릿지에 CPU 2 에서 요청된
-스토어들보다 먼저 보여짐을 보장합니다.
-
-
-또한, 같은 디바이스에서 스토어를 이어 로드가 수행되면 이 로드는 로드가 수행되기
-전에 스토어가 완료되기를 강제하므로 mmiowb() 의 필요가 없어집니다:
-
-	CPU 1				CPU 2
-	===============================	===============================
-	spin_lock(Q)
-	writel(0, ADDR)
-	a = readl(DATA);
-	spin_unlock(Q);
-					spin_lock(Q);
-					writel(4, ADDR);
-					b = readl(DATA);
-					spin_unlock(Q);
-
-
-더 많은 정보를 위해선 Documentation/driver-api/device-io.rst 를 참고하세요.
-
-
 =========================
 메모리 배리어가 필요한 곳
 =========================
@@ -2494,14 +2412,9 @@ _않습니다_.
 리눅스 커널 내부에서, I/O 는 어떻게 액세스들을 적절히 순차적이게 만들 수 있는지
 알고 있는, - inb() 나 writel() 과 같은 - 적절한 액세스 루틴을 통해 이루어져야만
 합니다.  이것들은 대부분의 경우에는 명시적 메모리 배리어 와 함께 사용될 필요가
-없습니다만, 다음의 두가지 상황에서는 명시적 메모리 배리어가 필요할 수 있습니다:
-
- (1) 일부 시스템에서 I/O 스토어는 모든 CPU 에 일관되게 순서 맞춰지지 않는데,
-     따라서 _모든_ 일반적인 드라이버들에 락이 사용되어야만 하고 이 크리티컬
-     섹션을 빠져나오기 전에 mmiowb() 가 꼭 호출되어야 합니다.
-
- (2) 만약 액세스 함수들이 완화된 메모리 액세스 속성을 갖는 I/O 메모리 윈도우를
-     사용한다면, 순서를 강제하기 위해선 _mandatory_ 메모리 배리어가 필요합니다.
+없습니다만, 완화된 메모리 액세스 속성으로 I/O 메모리 윈도우로의 참조를 위해
+액세스 함수가 사용된다면 순서를 강제하기 위해 _mandatory_ 메모리 배리어가
+필요합니다.
 
 더 많은 정보를 위해선 Documentation/driver-api/device-io.rst 를 참고하십시오.
 
@@ -2545,10 +2458,9 @@ _않습니다_.
 인터럽트 내에서 일어난 액세스와 섞일 수 있다고 - 그리고 그 반대도 - 가정해야만
 합니다.
 
-그런 영역 안에서 일어나는 I/O 액세스들은 엄격한 순서 규칙의 I/O 레지스터에
-묵시적 I/O 배리어를 형성하는 동기적 (synchronous) 로드 오퍼레이션을 포함하기
-때문에 일반적으로는 이런게 문제가 되지 않습니다.  만약 이걸로는 충분치 않다면
-mmiowb() 가 명시적으로 사용될 필요가 있습니다.
+그런 영역 안에서 일어나는 I/O 액세스는 묵시적 I/O 배리어를 형성하는, 엄격한
+순서 규칙의 I/O 레지스터로의 로드 오퍼레이션을 포함하기 때문에 일반적으로는
+문제가 되지 않습니다.
 
 
 하나의 인터럽트 루틴과 별도의 CPU 에서 수행중이며 서로 통신을 하는 두 루틴
@@ -2560,67 +2472,102 @@ mmiowb() 가 명시적으로 사용될 필요가 있습니다.
 커널 I/O 배리어의 효과
 ======================
 
-I/O 메모리에 액세스할 때, 드라이버는 적절한 액세스 함수를 사용해야 합니다:
+I/O 액세스를 통한 주변장치와의 통신은 아키텍쳐와 기기에 매우 종속적입니다.
+따라서, 본질적으로 이식성이 없는 드라이버는 가능한 가장 적은 오버헤드로
+동기화를 하기 위해 각자의 타겟 시스템의 특정 동작에 의존할 겁니다.  다양한
+아키텍쳐와 버스 구현에 이식성을 가지려 하는 드라이버를 위해, 커널은 다양한
+정도의 순서 보장을 제공하는 일련의 액세스 함수를 제공합니다.
 
- (*) inX(), outX():
-
-     이것들은 메모리 공간보다는 I/O 공간에 이야기를 하려는 의도로
-     만들어졌습니다만, 그건 기본적으로 CPU 마다 다른 컨셉입니다.  i386 과
-     x86_64 프로세서들은 특별한 I/O 공간 액세스 사이클과 명령어를 실제로 가지고
-     있지만, 다른 많은 CPU 들에는 그런 컨셉이 존재하지 않습니다.
-
-     다른 것들 중에서도 PCI 버스가 I/O 공간 컨셉을 정의하는데, 이는 - i386 과
-     x86_64 같은 CPU 에서 - CPU 의 I/O 공간 컨셉으로 쉽게 매치됩니다.  하지만,
-     대체할 I/O 공간이 없는 CPU 에서는 CPU 의 메모리 맵의 가상 I/O 공간으로
-     매핑될 수도 있습니다.
-
-     이 공간으로의 액세스는 (i386 등에서는) 완전하게 동기화 됩니다만, 중간의
-     (PCI 호스트 브리지와 같은) 브리지들은 이를 완전히 보장하진 않을수도
-     있습니다.
+ (*) readX(), writeX():
 
-     이것들의 상호간의 순서는 완전하게 보장됩니다.
+	readX() 와 writeX() MMIO 액세스 함수는 접근되는 주변장치로의 포인터를
+	__iomem * 패러미터로 받습니다.  디폴트 I/O 기능으로 매핑되는 포인터
+	(예: ioremap() 으로 반환되는 것) 의 순서 보장은 다음과 같습니다:
+
+	1. 같은 주변장치로의 모든 readX() 와 writeX() 액세스는 각자에 대해
+	   순서지어집니다.  이는 같은 CPU 쓰레드에 의한 특정 디바이스로의 MMIO
+	   레지스터 액세스가 프로그램 순서대로 도착할 것을 보장합니다.
+
+	2. 한 스핀락을 잡은 CPU 쓰레드에 의한 writeX() 는 같은 스핀락을 나중에
+	   잡은 다른 CPU 쓰레드에 의해 같은 주변장치를 향해 호출된 writeX()
+	   앞으로 순서지어집니다.  이는 스핀락을 잡은 채 특정 디바이스를 향해
+	   호출된 MMIO 레지스터 쓰기는 해당 락의 획득에 일관적인 순서로 도달할
+	   것을 보장합니다.
+
+	3. 특정 주변장치를 향한 특정 CPU 쓰레드의 writeX() 는 먼저 해당
+	   쓰레드로 전파되는, 또는 해당 쓰레드에 의해 요청된 모든 앞선 메모리
+	   쓰기가 완료되기 전까지 먼저 기다립니다.  이는 dma_alloc_coherent()
+	   를 통해 할당된 전송용 DMA 버퍼로의 해당 CPU 의 쓰기가 이 CPU 가 이
+	   전송을 시작시키기 위해 MMIO 컨트롤 레지스터에 쓰기를 할 때 DMA
+	   엔진에 보여질 것을 보장합니다.
+
+	4. 특정 CPU 쓰레드에 의한 주변장치로의 readX() 는 같은 쓰레드에 의한
+	   모든 뒤따르는 메모리 읽기가 시작되기 전에 완료됩니다.  이는
+	   dma_alloc_coherent() 를 통해 할당된 수신용 DMA 버퍼로부터의 CPU 의
+	   읽기는 이 DMA 수신의 완료를 표시하는 DMA 엔진의 MMIO 상태 레지스터
+	   읽기 후에는 오염된 데이터를 읽지 않을 것을 보장합니다.
+
+	5. CPU 에 의한 주변장치로의 readX() 는 모든 뒤따르는 delay() 루프가
+	   수행을 시작하기 전에 완료됩니다.  이는 CPU 의 특정
+	   주변장치로의 두개의 MMIO 레지스터 쓰기가 행해지는데 첫번째 쓰기가
+	   readX() 를 통해 곧바로 읽어졌고 이어 두번째 writeX() 전에 udelay(1)
+	   이 호출되었다면 이 두개의 쓰기는 최소 1us 의 간격을 두고 행해질 것을
+	   보장합니다:
+
+		writel(42, DEVICE_REGISTER_0); // 디바이스에 도착함...
+		readl(DEVICE_REGISTER_0);
+		udelay(1);
+		writel(42, DEVICE_REGISTER_1); // ...이것보다 최소 1us 전에.
+
+	디폴트가 아닌 기능을 통해 얻어지는 __iomem 포인터 (예: ioremap_wc() 를
+	통해 리턴되는 것) 의 순서 속성은 실제 아키텍쳐에 의존적이어서 이런
+	종류의 매핑으로의 액세스는 앞서 설명된 보장사항에 의존할 수 없습니다.
 
-     다른 타입의 메모리 오퍼레이션, I/O 오퍼레이션에 대한 순서는 완전하게
-     보장되지는 않습니다.
+ (*) readX_relaxed(), writeX_relaxed()
 
- (*) readX(), writeX():
+	이것들은 readX() 와 writeX() 랑 비슷하지만, 더 완화된 메모리 순서
+	보장을 제공합니다.  구체적으로, 이것들은 일반적 메모리 액세스나 delay()
+	루프 (예:앞의 2-5 항목) 에 대해 순서를 보장하지 않습니다만 디폴트 I/O
+	기능으로 매핑된 __iomem 포인터에 대해 동작할 때, 같은 CPU 쓰레드에 의한
+	같은 주변장치로의 액세스에는 순서가 맞춰질 것이 보장됩니다.
 
-     이것들이 수행 요청되는 CPU 에서 서로에게 완전히 순서가 맞춰지고 독립적으로
-     수행되는지에 대한 보장 여부는 이들이 액세스 하는 메모리 윈도우에 정의된
-     특성에 의해 결정됩니다.  예를 들어, 최신의 i386 아키텍쳐 머신에서는 MTRR
-     레지스터로 이 특성이 조정됩니다.
+ (*) readsX(), writesX():
 
-     일반적으로는, 프리페치 (prefetch) 가능한 디바이스를 액세스 하는게
-     아니라면, 이것들은 완전히 순서가 맞춰지고 결합되지 않게 보장될 겁니다.
+	readsX() 와 writesX() MMIO 액세스 함수는 DMA 를 수행하는데 적절치 않은,
+	주변장치 내의 메모리 매핑된 레지스터 기반 FIFO 로의 액세스를 위해
+	설계되었습니다.  따라서, 이 기능들은 앞서 설명된 readX_relaxed() 와
+	writeX_relaxed() 의 순서 보장만을 제공합니다.
 
-     하지만, (PCI 브리지와 같은) 중간의 하드웨어는 자신이 원한다면 집행을
-     연기시킬 수 있습니다; 스토어 명령을 실제로 하드웨어로 내려보내기(flush)
-     위해서는 같은 위치로부터 로드를 하는 방법이 있습니다만[*], PCI 의 경우는
-     같은 디바이스나 환경 구성 영역에서의 로드만으로도 충분할 겁니다.
+ (*) inX(), outX():
 
-     [*] 주의! 쓰여진 것과 같은 위치로부터의 로드를 시도하는 것은 오동작을
-	 일으킬 수도 있습니다 - 예로 16650 Rx/Tx 시리얼 레지스터를 생각해
-	 보세요.
+	inX() 와 outX() 액세스 함수는 일부 아키텍쳐 (특히 x86) 에서는 특수한
+	명령어를 필요로 하며 포트에 매핑되는, 과거의 유산인 I/O 주변장치로의
+	접근을 위해 만들어졌습니다.
 
-     프리페치 가능한 I/O 메모리가 사용되면, 스토어 명령들이 순서를 지키도록
-     하기 위해 mmiowb() 배리어가 필요할 수 있습니다.
+	많은 CPU 아키텍쳐가 결국은 이런 주변장치를 내부의 가상 메모리 매핑을
+	통해 접근하기 때문에, inX() 와 outX() 가 제공하는 이식성 있는 순서
+	보장은 디폴트 I/O 기능을 통한 매핑을 접근할 때의 readX() 와 writeX() 에
+	의해 제공되는 것과 각각 동일합니다.
 
-     PCI 트랜잭션 사이의 상호작용에 대해 더 많은 정보를 위해선 PCI 명세서를
-     참고하시기 바랍니다.
+	디바이스 드라이버는 outX() 가 리턴하기 전에 해당 I/O 주변장치로부터의
+	완료 응답을 기다리는 쓰기 트랜잭션을 만들어 낸다고 기대할 수도
+	있습니다.  이는 모든 아키텍쳐에서 보장되지는 않고, 따라서 이식성 있는
+	순서 규칙의 일부분이 아닙니다.
 
- (*) readX_relaxed(), writeX_relaxed()
+ (*) insX(), outsX():
 
-     이것들은 readX() 와 writeX() 랑 비슷하지만, 더 완화된 메모리 순서 보장을
-     제공합니다.  구체적으로, 이것들은 일반적 메모리 액세스 (예: DMA 버퍼) 에도
-     LOCK 이나 UNLOCK 오퍼레이션들에도 순서를 보장하지 않습니다.  LOCK 이나
-     UNLOCK 오퍼레이션들에 맞춰지는 순서가 필요하다면, mmiowb() 배리어가 사용될
-     수 있습니다.  같은 주변 장치에의 완화된 액세스끼리는 순서가 지켜짐을 알아
-     두시기 바랍니다.
+	앞에서와 같이, insX() 와 outsX() 액세스 함수는 디폴트 I/O 기능을 통한
+	매핑을 접근할 때 각각 readX() 와 writeX() 와 같은 순서 보장을
+	제공합니다.
 
  (*) ioreadX(), iowriteX()
 
-     이것들은 inX()/outX() 나 readX()/writeX() 처럼 실제로 수행하는 액세스의
-     종류에 따라 적절하게 수행될 것입니다.
+	이것들은 inX()/outX() 나 readX()/writeX() 처럼 실제로 수행하는 액세스의
+	종류에 따라 적절하게 수행될 것입니다.
+
+String 액세스 함수 (insX(), outsX(), readsX() 그리고 writesX()) 의 예외를
+제외하고는, 앞의 모든 것이 아랫단의 주변장치가 little-endian 이라 가정하며,
+따라서 big-endian 아키텍쳐에서는 byte-swapping 오퍼레이션을 수행합니다.
 
 
 ===================================
diff --git a/Documentation/translations/zh_CN/process/coding-style.rst b/Documentation/translations/zh_CN/process/coding-style.rst
index 4f6237392e65..eae10bc7f86f 100644
--- a/Documentation/translations/zh_CN/process/coding-style.rst
+++ b/Documentation/translations/zh_CN/process/coding-style.rst
@@ -826,7 +826,7 @@ inline gcc 也可以自动使其内联。而且其他用户可能会要求移除
 
 .. code-block:: c
 
-	#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+	#define sizeof_field(t, f) (sizeof(((t*)0)->f))
 
 还有可以做严格的类型检查的 min() 和 max() 宏，如果你需要可以使用它们。你可以
 自己看看那个头文件里还定义了什么你可以拿来用的东西，如果有定义的话，你就不应
diff --git a/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst
new file mode 100644
index 000000000000..b93f1af68261
--- /dev/null
+++ b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst
@@ -0,0 +1,228 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: :ref:`Documentation/process/embargoed-hardware-issues.rst <embargoed_hardware_issues>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+
+被限制的硬件问题
+================
+
+范围
+----
+
+导致安全问题的硬件问题与只影响Linux内核的纯软件错误是不同的安全错误类别。
+
+必须区别对待诸如熔毁(Meltdown)、Spectre、L1TF等硬件问题，因为它们通常会影响
+所有操作系统（“OS”），因此需要在不同的OS供应商、发行版、硬件供应商和其他各方
+之间进行协调。对于某些问题，软件缓解可能依赖于微码或固件更新，这需要进一步的
+协调。
+
+.. _zh_Contact:
+
+接触
+----
+
+Linux内核硬件安全小组独立于普通的Linux内核安全小组。
+
+该小组只负责协调被限制的硬件安全问题。Linux内核中纯软件安全漏洞的报告不由该
+小组处理，报告者将被引导至常规Linux内核安全小组(:ref:`Documentation/admin-guide/
+<securitybugs>`)联系。
+
+可以通过电子邮件 <hardware-security@kernel.org> 与小组联系。这是一份私密的安全
+官名单，他们将帮助您根据我们的文档化流程协调问题。
+
+邮件列表是加密的，发送到列表的电子邮件可以通过PGP或S/MIME加密，并且必须使用报告
+者的PGP密钥或S/MIME证书签名。该列表的PGP密钥和S/MIME证书可从
+https://www.kernel.org/.... 获得。
+
+虽然硬件安全问题通常由受影响的硬件供应商处理，但我们欢迎发现潜在硬件缺陷的研究
+人员或个人与我们联系。
+
+硬件安全官
+^^^^^^^^^^
+
+目前的硬件安全官小组:
+
+  - Linus Torvalds（Linux基金会院士）
+  - Greg Kroah Hartman（Linux基金会院士）
+  - Thomas Gleixner（Linux基金会院士）
+
+邮件列表的操作
+^^^^^^^^^^^^^^
+
+处理流程中使用的加密邮件列表托管在Linux Foundation的IT基础设施上。通过提供这项
+服务，Linux基金会的IT基础设施安全总监在技术上有能力访问被限制的信息，但根据他
+的雇佣合同，他必须保密。Linux基金会的IT基础设施安全总监还负责 kernel.org 基础
+设施。
+
+Linux基金会目前的IT基础设施安全总监是 Konstantin Ryabitsev。
+
+保密协议
+--------
+
+Linux内核硬件安全小组不是正式的机构，因此无法签订任何保密协议。核心社区意识到
+这些问题的敏感性，并提供了一份谅解备忘录。
+
+谅解备忘录
+----------
+
+Linux内核社区深刻理解在不同操作系统供应商、发行商、硬件供应商和其他各方之间
+进行协调时，保持硬件安全问题处于限制状态的要求。
+
+Linux内核社区在过去已经成功地处理了硬件安全问题，并且有必要的机制允许在限制
+限制下进行符合社区的开发。
+
+Linux内核社区有一个专门的硬件安全小组负责初始联系，并监督在限制规则下处理
+此类问题的过程。
+
+硬件安全小组确定开发人员（领域专家），他们将组成特定问题的初始响应小组。最初
+的响应小组可以引入更多的开发人员（领域专家）以最佳的技术方式解决这个问题。
+
+所有相关开发商承诺遵守限制规定，并对收到的信息保密。违反承诺将导致立即从当前
+问题中排除，并从所有相关邮件列表中删除。此外，硬件安全小组还将把违反者排除在
+未来的问题之外。这一后果的影响在我们社区是一种非常有效的威慑。如果发生违规
+情况，硬件安全小组将立即通知相关方。如果您或任何人发现潜在的违规行为，请立即
+向硬件安全人员报告。
+
+流程
+^^^^
+
+由于Linux内核开发的全球分布式特性，面对面的会议几乎不可能解决硬件安全问题。
+由于时区和其他因素，电话会议很难协调，只能在绝对必要时使用。加密电子邮件已被
+证明是解决此类问题的最有效和最安全的通信方法。
+
+开始披露
+""""""""
+
+披露内容首先通过电子邮件联系Linux内核硬件安全小组。此初始联系人应包含问题的
+描述和任何已知受影响硬件的列表。如果您的组织制造或分发受影响的硬件，我们建议
+您也考虑哪些其他硬件可能会受到影响。
+
+硬件安全小组将提供一个特定于事件的加密邮件列表，用于与报告者进行初步讨论、
+进一步披露和协调。
+
+硬件安全小组将向披露方提供一份开发人员（领域专家）名单，在与开发人员确认他们
+将遵守本谅解备忘录和文件化流程后，应首先告知开发人员有关该问题的信息。这些开发
+人员组成初始响应小组，并在初始接触后负责处理问题。硬件安全小组支持响应小组，
+但不一定参与缓解开发过程。
+
+虽然个别开发人员可能通过其雇主受到保密协议的保护，但他们不能以Linux内核开发
+人员的身份签订个别保密协议。但是，他们将同意遵守这一书面程序和谅解备忘录。
+
+披露方应提供已经或应该被告知该问题的所有其他实体的联系人名单。这有几个目的:
+
+  - 披露的实体列表允许跨行业通信，例如其他操作系统供应商、硬件供应商等。
+
+  - 可联系已披露的实体，指定应参与缓解措施开发的专家。
+
+  - 如果需要处理某一问题的专家受雇于某一上市实体或某一上市实体的成员，则响应
+    小组可要求该实体披露该专家。这确保专家也是实体反应小组的一部分。
+
+披露
+""""
+
+披露方通过特定的加密邮件列表向初始响应小组提供详细信息。
+
+根据我们的经验，这些问题的技术文档通常是一个足够的起点，最好通过电子邮件进行
+进一步的技术澄清。
+
+缓解开发
+""""""""
+
+初始响应小组设置加密邮件列表，或在适当的情况下重新修改现有邮件列表。
+
+使用邮件列表接近于正常的Linux开发过程，并且在过去已经成功地用于为各种硬件安全
+问题开发缓解措施。
+
+邮件列表的操作方式与正常的Linux开发相同。发布、讨论和审查修补程序，如果同意，
+则应用于非公共git存储库，参与开发人员只能通过安全连接访问该存储库。存储库包含
+针对主线内核的主开发分支，并根据需要为稳定的内核版本提供向后移植分支。
+
+最初的响应小组将根据需要从Linux内核开发人员社区中确定更多的专家。引进专家可以
+在开发过程中的任何时候发生，需要及时处理。
+
+如果专家受雇于披露方提供的披露清单上的实体或其成员，则相关实体将要求其参与。
+
+否则，披露方将被告知专家参与的情况。谅解备忘录涵盖了专家，要求披露方确认参与。
+如果披露方有令人信服的理由提出异议，则必须在五个工作日内提出异议，并立即与事件
+小组解决。如果披露方在五个工作日内未作出回应，则视为默许。
+
+在确认或解决异议后，专家由事件小组披露，并进入开发过程。
+
+协调发布
+""""""""
+
+有关各方将协商限制结束的日期和时间。此时，准备好的缓解措施集成到相关的内核树中
+并发布。
+
+虽然我们理解硬件安全问题需要协调限制时间，但限制时间应限制在所有有关各方制定、
+测试和准备缓解措施所需的最短时间内。人为地延长限制时间以满足会议讨论日期或其他
+非技术原因，会给相关的开发人员和响应小组带来了更多的工作和负担，因为补丁需要
+保持最新，以便跟踪正在进行的上游内核开发，这可能会造成冲突的更改。
+
+CVE分配
+"""""""
+
+硬件安全小组和初始响应小组都不分配CVE，开发过程也不需要CVE。如果CVE是由披露方
+提供的，则可用于文档中。
+
+流程专使
+--------
+
+为了协助这一进程，我们在各组织设立了专使，他们可以回答有关报告流程和进一步处理
+的问题或提供指导。专使不参与特定问题的披露，除非响应小组或相关披露方提出要求。
+现任专使名单:
+
+  ============= ========================================================
+  ARM
+  AMD		Tom Lendacky <tom.lendacky@amd.com>
+  IBM
+  Intel		Tony Luck <tony.luck@intel.com>
+  Qualcomm	Trilok Soni <tsoni@codeaurora.org>
+
+  Microsoft	Sasha Levin <sashal@kernel.org>
+  VMware
+  Xen		Andrew Cooper <andrew.cooper3@citrix.com>
+
+  Canonical	Tyler Hicks <tyhicks@canonical.com>
+  Debian	Ben Hutchings <ben@decadent.org.uk>
+  Oracle	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+  Red Hat	Josh Poimboeuf <jpoimboe@redhat.com>
+  SUSE		Jiri Kosina <jkosina@suse.cz>
+
+  Amazon
+  Google	Kees Cook <keescook@chromium.org>
+  ============= ========================================================
+
+如果要将您的组织添加到专使名单中，请与硬件安全小组联系。被提名的专使必须完全
+理解和支持我们的过程，并且在Linux内核社区中很容易联系。
+
+加密邮件列表
+------------
+
+我们使用加密邮件列表进行通信。这些列表的工作原理是，发送到列表的电子邮件使用
+列表的PGP密钥或列表的/MIME证书进行加密。邮件列表软件对电子邮件进行解密，并
+使用订阅者的PGP密钥或S/MIME证书为每个订阅者分别对其进行重新加密。有关邮件列表
+软件和用于确保列表安全和数据保护的设置的详细信息，请访问:
+https://www.kernel.org/....
+
+关键点
+^^^^^^
+
+初次接触见 :ref:`zh_Contact`. 对于特定于事件的邮件列表，密钥和S/MIME证书通过
+特定列表发送的电子邮件传递给订阅者。
+
+订阅事件特定列表
+^^^^^^^^^^^^^^^^
+
+订阅由响应小组处理。希望参与通信的披露方将潜在订户的列表发送给响应组，以便
+响应组可以验证订阅请求。
+
+每个订户都需要通过电子邮件向响应小组发送订阅请求。电子邮件必须使用订阅服务器
+的PGP密钥或S/MIME证书签名。如果使用PGP密钥，则必须从公钥服务器获得该密钥，
+并且理想情况下该密钥连接到Linux内核的PGP信任网。另请参见:
+https://www.kernel.org/signature.html.
+
+响应小组验证订阅者，并将订阅者添加到列表中。订阅后，订阅者将收到来自邮件列表
+的电子邮件，该邮件列表使用列表的PGP密钥或列表的/MIME证书签名。订阅者的电子邮件
+客户端可以从签名中提取PGP密钥或S/MIME证书，以便订阅者可以向列表发送加密电子
+邮件。
diff --git a/Documentation/translations/zh_CN/process/index.rst b/Documentation/translations/zh_CN/process/index.rst
index be1e764a80d2..8051a7b322c5 100644
--- a/Documentation/translations/zh_CN/process/index.rst
+++ b/Documentation/translations/zh_CN/process/index.rst
@@ -31,6 +31,8 @@
    development-process
    email-clients
    license-rules
+   kernel-enforcement-statement
+   kernel-driver-statement
 
 其它大多数开发人员感兴趣的社区指南：
 
@@ -43,6 +45,7 @@
    stable-api-nonsense
    stable-kernel-rules
    management-style
+   embargoed-hardware-issues
 
 这些是一些总体技术指南，由于缺乏更好的地方，现在已经放在这里
 
diff --git a/Documentation/translations/zh_CN/process/kernel-driver-statement.rst b/Documentation/translations/zh_CN/process/kernel-driver-statement.rst
new file mode 100644
index 000000000000..2b3375bcccfd
--- /dev/null
+++ b/Documentation/translations/zh_CN/process/kernel-driver-statement.rst
@@ -0,0 +1,199 @@
+.. _cn_process_statement_driver:
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: :ref:`Documentation/process/kernel-driver-statement.rst <process_statement_driver>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+
+内核驱动声明
+------------
+
+关于Linux内核模块的立场声明
+===========================
+
+我们，以下署名的Linux内核开发人员，认为任何封闭源Linux内核模块或驱动程序都是
+有害的和不可取的。我们已经一再发现它们对Linux用户，企业和更大的Linux生态系统
+有害。这样的模块否定了Linux开发模型的开放性，稳定性，灵活性和可维护性，并使
+他们的用户无法使用Linux社区的专业知识。提供闭源内核模块的供应商迫使其客户
+放弃Linux的主要优势或选择新的供应商。因此，为了充分利用开源所提供的成本节省和
+共享支持优势，我们敦促供应商采取措施，以开源内核代码在Linux上为其客户提供支持。
+
+我们只为自己说话，而不是我们今天可能会为之工作，过去或将来会为之工作的任何公司。
+
+ - Dave Airlie
+ - Nick Andrew
+ - Jens Axboe
+ - Ralf Baechle
+ - Felipe Balbi
+ - Ohad Ben-Cohen
+ - Muli Ben-Yehuda
+ - Jiri Benc
+ - Arnd Bergmann
+ - Thomas Bogendoerfer
+ - Vitaly Bordug
+ - James Bottomley
+ - Josh Boyer
+ - Neil Brown
+ - Mark Brown
+ - David Brownell
+ - Michael Buesch
+ - Franck Bui-Huu
+ - Adrian Bunk
+ - François Cami
+ - Ralph Campbell
+ - Luiz Fernando N. Capitulino
+ - Mauro Carvalho Chehab
+ - Denis Cheng
+ - Jonathan Corbet
+ - Glauber Costa
+ - Alan Cox
+ - Magnus Damm
+ - Ahmed S. Darwish
+ - Robert P. J. Day
+ - Hans de Goede
+ - Arnaldo Carvalho de Melo
+ - Helge Deller
+ - Jean Delvare
+ - Mathieu Desnoyers
+ - Sven-Thorsten Dietrich
+ - Alexey Dobriyan
+ - Daniel Drake
+ - Alex Dubov
+ - Randy Dunlap
+ - Michael Ellerman
+ - Pekka Enberg
+ - Jan Engelhardt
+ - Mark Fasheh
+ - J. Bruce Fields
+ - Larry Finger
+ - Jeremy Fitzhardinge
+ - Mike Frysinger
+ - Kumar Gala
+ - Robin Getz
+ - Liam Girdwood
+ - Jan-Benedict Glaw
+ - Thomas Gleixner
+ - Brice Goglin
+ - Cyrill Gorcunov
+ - Andy Gospodarek
+ - Thomas Graf
+ - Krzysztof Halasa
+ - Harvey Harrison
+ - Stephen Hemminger
+ - Michael Hennerich
+ - Tejun Heo
+ - Benjamin Herrenschmidt
+ - Kristian Høgsberg
+ - Henrique de Moraes Holschuh
+ - Marcel Holtmann
+ - Mike Isely
+ - Takashi Iwai
+ - Olof Johansson
+ - Dave Jones
+ - Jesper Juhl
+ - Matthias Kaehlcke
+ - Kenji Kaneshige
+ - Jan Kara
+ - Jeremy Kerr
+ - Russell King
+ - Olaf Kirch
+ - Roel Kluin
+ - Hans-Jürgen Koch
+ - Auke Kok
+ - Peter Korsgaard
+ - Jiri Kosina
+ - Aaro Koskinen
+ - Mariusz Kozlowski
+ - Greg Kroah-Hartman
+ - Michael Krufky
+ - Aneesh Kumar
+ - Clemens Ladisch
+ - Christoph Lameter
+ - Gunnar Larisch
+ - Anders Larsen
+ - Grant Likely
+ - John W. Linville
+ - Yinghai Lu
+ - Tony Luck
+ - Pavel Machek
+ - Matt Mackall
+ - Paul Mackerras
+ - Roland McGrath
+ - Patrick McHardy
+ - Kyle McMartin
+ - Paul Menage
+ - Thierry Merle
+ - Eric Miao
+ - Akinobu Mita
+ - Ingo Molnar
+ - James Morris
+ - Andrew Morton
+ - Paul Mundt
+ - Oleg Nesterov
+ - Luca Olivetti
+ - S.Çağlar Onur
+ - Pierre Ossman
+ - Keith Owens
+ - Venkatesh Pallipadi
+ - Nick Piggin
+ - Nicolas Pitre
+ - Evgeniy Polyakov
+ - Richard Purdie
+ - Mike Rapoport
+ - Sam Ravnborg
+ - Gerrit Renker
+ - Stefan Richter
+ - David Rientjes
+ - Luis R. Rodriguez
+ - Stefan Roese
+ - Francois Romieu
+ - Rami Rosen
+ - Stephen Rothwell
+ - Maciej W. Rozycki
+ - Mark Salyzyn
+ - Yoshinori Sato
+ - Deepak Saxena
+ - Holger Schurig
+ - Amit Shah
+ - Yoshihiro Shimoda
+ - Sergei Shtylyov
+ - Kay Sievers
+ - Sebastian Siewior
+ - Rik Snel
+ - Jes Sorensen
+ - Alexey Starikovskiy
+ - Alan Stern
+ - Timur Tabi
+ - Hirokazu Takata
+ - Eliezer Tamir
+ - Eugene Teo
+ - Doug Thompson
+ - FUJITA Tomonori
+ - Dmitry Torokhov
+ - Marcelo Tosatti
+ - Steven Toth
+ - Theodore Tso
+ - Matthias Urlichs
+ - Geert Uytterhoeven
+ - Arjan van de Ven
+ - Ivo van Doorn
+ - Rik van Riel
+ - Wim Van Sebroeck
+ - Hans Verkuil
+ - Horst H. von Brand
+ - Dmitri Vorobiev
+ - Anton Vorontsov
+ - Daniel Walker
+ - Johannes Weiner
+ - Harald Welte
+ - Matthew Wilcox
+ - Dan J. Williams
+ - Darrick J. Wong
+ - David Woodhouse
+ - Chris Wright
+ - Bryan Wu
+ - Rafael J. Wysocki
+ - Herbert Xu
+ - Vlad Yasevich
+ - Peter Zijlstra
+ - Bartlomiej Zolnierkiewicz
diff --git a/Documentation/translations/zh_CN/process/kernel-enforcement-statement.rst b/Documentation/translations/zh_CN/process/kernel-enforcement-statement.rst
new file mode 100644
index 000000000000..75f7b7b9137c
--- /dev/null
+++ b/Documentation/translations/zh_CN/process/kernel-enforcement-statement.rst
@@ -0,0 +1,151 @@
+.. _cn_process_statement_kernel:
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: :ref:`Documentation/process/kernel-enforcement-statement.rst <process_statement_kernel>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+
+Linux 内核执行声明
+------------------
+
+作为Linux内核的开发人员，我们对如何使用我们的软件以及如何实施软件许可证有着
+浓厚的兴趣。遵守GPL-2.0的互惠共享义务对我们软件和社区的长期可持续性至关重要。
+
+虽然有权强制执行对我们社区的贡献中的单独版权权益，但我们有共同的利益，即确保
+个人强制执行行动的方式有利于我们的社区，不会对我们软件生态系统的健康和增长
+产生意外的负面影响。为了阻止无益的执法行动，我们同意代表我们自己和我们版权
+利益的任何继承人对Linux内核用户作出以下符合我们开发社区最大利益的承诺:
+
+    尽管有GPL-2.0的终止条款，我们同意，采用以下GPL-3.0条款作为我们许可证下的
+    附加许可，作为任何对许可证下权利的非防御性主张，这符合我们开发社区的最佳
+    利益。
+
+        但是，如果您停止所有违反本许可证的行为，则您从特定版权持有人处获得的
+        许可证将被恢复：（a）暂时恢复，除非版权持有人明确并最终终止您的许可证；
+        以及（b）永久恢复, 如果版权持有人未能在你终止违反后60天内以合理方式
+        通知您违反本许可证的行为，则永久恢复您的许可证。
+
+        此外，如果版权所有者以某种合理的方式通知您违反了本许可，这是您第一次
+        从该版权所有者处收到违反本许可的通知（对于任何作品），并且您在收到通知
+        后的30天内纠正违规行为。则您从特定版权所有者处获得的许可将永久恢复.
+
+我们提供这些保证的目的是鼓励更多地使用该软件。我们希望公司和个人使用、修改和
+分发此软件。我们希望以公开和透明的方式与用户合作，以消除我们对法规遵从性或强制
+执行的任何不确定性，这些不确定性可能会限制我们软件的采用。我们将法律行动视为
+最后手段，只有在其他社区努力未能解决这一问题时才采取行动。
+
+最后，一旦一个不合规问题得到解决，我们希望用户会感到欢迎，加入我们为之努力的
+这个项目。共同努力，我们会更强大。
+
+除了下面提到的以外，我们只为自己说话，而不是为今天、过去或将来可能为之工作的
+任何公司说话。
+
+  - Laura Abbott
+  - Bjorn Andersson (Linaro)
+  - Andrea Arcangeli
+  - Neil Armstrong
+  - Jens Axboe
+  - Pablo Neira Ayuso
+  - Khalid Aziz
+  - Ralf Baechle
+  - Felipe Balbi
+  - Arnd Bergmann
+  - Ard Biesheuvel
+  - Tim Bird
+  - Paolo Bonzini
+  - Christian Borntraeger
+  - Mark Brown (Linaro)
+  - Paul Burton
+  - Javier Martinez Canillas
+  - Rob Clark
+  - Kees Cook (Google)
+  - Jonathan Corbet
+  - Dennis Dalessandro
+  - Vivien Didelot (Savoir-faire Linux)
+  - Hans de Goede
+  - Mel Gorman (SUSE)
+  - Sven Eckelmann
+  - Alex Elder (Linaro)
+  - Fabio Estevam
+  - Larry Finger
+  - Bhumika Goyal
+  - Andy Gross
+  - Juergen Gross
+  - Shawn Guo
+  - Ulf Hansson
+  - Stephen Hemminger (Microsoft)
+  - Tejun Heo
+  - Rob Herring
+  - Masami Hiramatsu
+  - Michal Hocko
+  - Simon Horman
+  - Johan Hovold (Hovold Consulting AB)
+  - Christophe JAILLET
+  - Olof Johansson
+  - Lee Jones (Linaro)
+  - Heiner Kallweit
+  - Srinivas Kandagatla
+  - Jan Kara
+  - Shuah Khan (Samsung)
+  - David Kershner
+  - Jaegeuk Kim
+  - Namhyung Kim
+  - Colin Ian King
+  - Jeff Kirsher
+  - Greg Kroah-Hartman (Linux Foundation)
+  - Christian König
+  - Vinod Koul
+  - Krzysztof Kozlowski
+  - Viresh Kumar
+  - Aneesh Kumar K.V
+  - Julia Lawall
+  - Doug Ledford
+  - Chuck Lever (Oracle)
+  - Daniel Lezcano
+  - Shaohua Li
+  - Xin Long
+  - Tony Luck
+  - Catalin Marinas (Arm Ltd)
+  - Mike Marshall
+  - Chris Mason
+  - Paul E. McKenney
+  - Arnaldo Carvalho de Melo
+  - David S. Miller
+  - Ingo Molnar
+  - Kuninori Morimoto
+  - Trond Myklebust
+  - Martin K. Petersen (Oracle)
+  - Borislav Petkov
+  - Jiri Pirko
+  - Josh Poimboeuf
+  - Sebastian Reichel (Collabora)
+  - Guenter Roeck
+  - Joerg Roedel
+  - Leon Romanovsky
+  - Steven Rostedt (VMware)
+  - Frank Rowand
+  - Ivan Safonov
+  - Anna Schumaker
+  - Jes Sorensen
+  - K.Y. Srinivasan
+  - David Sterba (SUSE)
+  - Heiko Stuebner
+  - Jiri Kosina (SUSE)
+  - Willy Tarreau
+  - Dmitry Torokhov
+  - Linus Torvalds
+  - Thierry Reding
+  - Rik van Riel
+  - Luis R. Rodriguez
+  - Geert Uytterhoeven (Glider bvba)
+  - Eduardo Valentin (Amazon.com)
+  - Daniel Vetter
+  - Linus Walleij
+  - Richard Weinberger
+  - Dan Williams
+  - Rafael J. Wysocki
+  - Arvind Yadav
+  - Masahiro Yamada
+  - Wei Yongjun
+  - Lv Zheng
+  - Marc Zyngier (Arm Ltd)
diff --git a/Documentation/translations/zh_CN/process/magic-number.rst b/Documentation/translations/zh_CN/process/magic-number.rst
index 15c592518194..e4c225996af0 100644
--- a/Documentation/translations/zh_CN/process/magic-number.rst
+++ b/Documentation/translations/zh_CN/process/magic-number.rst
@@ -70,7 +70,6 @@ FF_MAGIC              0x4646           fc_info                  ``drivers/net/ip
 ISICOM_MAGIC          0x4d54           isi_port                 ``include/linux/isicom.h``
 PTY_MAGIC             0x5001                                    ``drivers/char/pty.c``
 PPP_MAGIC             0x5002           ppp                      ``include/linux/if_pppvar.h``
-SERIAL_MAGIC          0x5301           async_struct             ``include/linux/serial.h``
 SSTATE_MAGIC          0x5302           serial_state             ``include/linux/serial.h``
 SLIP_MAGIC            0x5302           slip                     ``drivers/net/slip.h``
 STRIP_MAGIC           0x5303           strip                    ``drivers/net/strip.c``
diff --git a/Documentation/usb/index.rst b/Documentation/usb/index.rst
index e55386a4abfb..36b6ebd9a9d9 100644
--- a/Documentation/usb/index.rst
+++ b/Documentation/usb/index.rst
@@ -22,11 +22,9 @@ USB support
     misc_usbsevseg
     mtouchusb
     ohci
-    rio
     usbip_protocol
     usbmon
     usb-serial
-    wusb-design-overview
 
     usb-help
     text_files
diff --git a/Documentation/usb/text_files.rst b/Documentation/usb/text_files.rst
index 6a8d3fcf64b6..1c18c05c3920 100644
--- a/Documentation/usb/text_files.rst
+++ b/Documentation/usb/text_files.rst
@@ -16,12 +16,6 @@ USB devfs drop permissions source
 .. literalinclude:: usbdevfs-drop-permissions.c
     :language: c
 
-WUSB command line script to manipulate auth credentials
--------------------------------------------------------
-
-.. literalinclude:: wusb-cbaf
-   :language: shell
-
 Credits
 -------
 
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index ad494da40009..e983488b48b1 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -21,6 +21,7 @@ place where this information is gathered.
    unshare
    spec_ctrl
    accelerators/ocxl
+   ioctl/index
 
 .. only::  subproject and html
 
diff --git a/Documentation/ioctl/cdrom.rst b/Documentation/userspace-api/ioctl/cdrom.rst
index 3b4c0506de46..3b4c0506de46 100644
--- a/Documentation/ioctl/cdrom.rst
+++ b/Documentation/userspace-api/ioctl/cdrom.rst
diff --git a/Documentation/ioctl/hdio.rst b/Documentation/userspace-api/ioctl/hdio.rst
index e822e3dff176..e822e3dff176 100644
--- a/Documentation/ioctl/hdio.rst
+++ b/Documentation/userspace-api/ioctl/hdio.rst
diff --git a/Documentation/userspace-api/ioctl/index.rst b/Documentation/userspace-api/ioctl/index.rst
new file mode 100644
index 000000000000..475675eae086
--- /dev/null
+++ b/Documentation/userspace-api/ioctl/index.rst
@@ -0,0 +1,15 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======
+IOCTLs
+======
+
+.. toctree::
+   :maxdepth: 1
+
+   ioctl-number
+
+   ioctl-decoding
+
+   cdrom
+   hdio
diff --git a/Documentation/ioctl/ioctl-decoding.rst b/Documentation/userspace-api/ioctl/ioctl-decoding.rst
index 380d6bb3e3ea..380d6bb3e3ea 100644
--- a/Documentation/ioctl/ioctl-decoding.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-decoding.rst
diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index bef79cd4c6b4..2e91370dc159 100644
--- a/Documentation/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -132,7 +132,6 @@ Code  Seq#    Include File                                           Comments
 'F'   80-8F  linux/arcfb.h                                           conflict!
 'F'   DD     video/sstfb.h                                           conflict!
 'G'   00-3F  drivers/misc/sgi-gru/grulib.h                           conflict!
-'G'   00-0F  linux/gigaset_dev.h                                     conflict!
 'H'   00-7F  linux/hiddev.h                                          conflict!
 'H'   00-0F  linux/hidraw.h                                          conflict!
 'H'   01     linux/mei.h                                             conflict!
@@ -233,6 +232,7 @@ Code  Seq#    Include File                                           Comments
 'f'   00-0F  fs/ext4/ext4.h                                          conflict!
 'f'   00-0F  linux/fs.h                                              conflict!
 'f'   00-0F  fs/ocfs2/ocfs2_fs.h                                     conflict!
+'f'   13-27  linux/fscrypt.h
 'f'   81-8F  linux/fsverity.h
 'g'   00-0F  linux/usb/gadgetfs.h
 'g'   20-2F  linux/usb/g_printer.h
diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt
index 4833904d32a5..ebb37b34dcfc 100644
--- a/Documentation/virt/kvm/api.txt
+++ b/Documentation/virt/kvm/api.txt
@@ -5,7 +5,7 @@ The Definitive KVM (Kernel-based Virtual Machine) API Documentation
 ----------------------
 
 The kvm API is a set of ioctls that are issued to control various aspects
-of a virtual machine.  The ioctls belong to three classes:
+of a virtual machine.  The ioctls belong to the following classes:
 
  - System ioctls: These query and set global attributes which affect the
    whole kvm subsystem.  In addition a system ioctl is used to create
@@ -1002,12 +1002,18 @@ Specifying exception.has_esr on a system that does not support it will return
 -EINVAL. Setting anything other than the lower 24bits of exception.serror_esr
 will return -EINVAL.
 
+It is not possible to read back a pending external abort (injected via
+KVM_SET_VCPU_EVENTS or otherwise) because such an exception is always delivered
+directly to the virtual CPU).
+
+
 struct kvm_vcpu_events {
 	struct {
 		__u8 serror_pending;
 		__u8 serror_has_esr;
+		__u8 ext_dabt_pending;
 		/* Align it to 8 bytes */
-		__u8 pad[6];
+		__u8 pad[5];
 		__u64 serror_esr;
 	} exception;
 	__u32 reserved[12];
@@ -1051,9 +1057,23 @@ contain a valid state and shall be written into the VCPU.
 
 ARM/ARM64:
 
+User space may need to inject several types of events to the guest.
+
 Set the pending SError exception state for this VCPU. It is not possible to
 'cancel' an Serror that has been made pending.
 
+If the guest performed an access to I/O memory which could not be handled by
+userspace, for example because of missing instruction syndrome decode
+information or because there is no device mapped at the accessed IPA, then
+userspace can ask the kernel to inject an external abort using the address
+from the exiting fault on the VCPU. It is a programming error to set
+ext_dabt_pending after an exit which was not either KVM_EXIT_MMIO or
+KVM_EXIT_ARM_NISV. This feature is only available if the system supports
+KVM_CAP_ARM_INJECT_EXT_DABT. This is a helper which provides commonality in
+how userspace reports accesses for the above cases to guests, across different
+userspace implementations. Nevertheless, userspace can still emulate all Arm
+exceptions by manipulating individual registers using the KVM_SET_ONE_REG API.
+
 See KVM_GET_VCPU_EVENTS for the data structure.
 
 
@@ -2982,6 +3002,9 @@ can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
 KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number
 indicating the number of supported registers.
 
+For ppc, the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability indicates whether
+the single-step debug event (KVM_GUESTDBG_SINGLESTEP) is supported.
+
 When debug events exit the main run loop with the reason
 KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
 structure containing architecture specific debug information.
@@ -4126,6 +4149,24 @@ Valid values for 'action':
 #define KVM_PMU_EVENT_ALLOW 0
 #define KVM_PMU_EVENT_DENY 1
 
+4.121 KVM_PPC_SVM_OFF
+
+Capability: basic
+Architectures: powerpc
+Type: vm ioctl
+Parameters: none
+Returns: 0 on successful completion,
+Errors:
+  EINVAL:    if ultravisor failed to terminate the secure guest
+  ENOMEM:    if hypervisor failed to allocate new radix page tables for guest
+
+This ioctl is used to turn off the secure mode of the guest or transition
+the guest from secure mode to normal mode. This is invoked when the guest
+is reset. This has no effect if called for a normal guest.
+
+This ioctl issues an ultravisor call to terminate the secure guest,
+unpins the VPA pages and releases all the device pages that are used to
+track the secure pages by hypervisor.
 
 5. The kvm_run structure
 ------------------------
@@ -4468,6 +4509,39 @@ Hyper-V SynIC state change. Notification is used to remap SynIC
 event/message pages and to enable/disable SynIC messages/events processing
 in userspace.
 
+		/* KVM_EXIT_ARM_NISV */
+		struct {
+			__u64 esr_iss;
+			__u64 fault_ipa;
+		} arm_nisv;
+
+Used on arm and arm64 systems. If a guest accesses memory not in a memslot,
+KVM will typically return to userspace and ask it to do MMIO emulation on its
+behalf. However, for certain classes of instructions, no instruction decode
+(direction, length of memory access) is provided, and fetching and decoding
+the instruction from the VM is overly complicated to live in the kernel.
+
+Historically, when this situation occurred, KVM would print a warning and kill
+the VM. KVM assumed that if the guest accessed non-memslot memory, it was
+trying to do I/O, which just couldn't be emulated, and the warning message was
+phrased accordingly. However, what happened more often was that a guest bug
+caused access outside the guest memory areas which should lead to a more
+meaningful warning message and an external abort in the guest, if the access
+did not fall within an I/O window.
+
+Userspace implementations can query for KVM_CAP_ARM_NISV_TO_USER, and enable
+this capability at VM creation. Once this is done, these types of errors will
+instead return to userspace with KVM_EXIT_ARM_NISV, with the valid bits from
+the HSR (arm) and ESR_EL2 (arm64) in the esr_iss field, and the faulting IPA
+in the fault_ipa field. Userspace can either fix up the access if it's
+actually an I/O access by decoding the instruction from guest memory (if it's
+very brave) and continue executing the guest, or it can decide to suspend,
+dump, or restart the guest.
+
+Note that KVM does not skip the faulting instruction as it does for
+KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state
+if it decides to decode and emulate the instruction.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
diff --git a/Documentation/virt/kvm/arm/pvtime.rst b/Documentation/virt/kvm/arm/pvtime.rst
new file mode 100644
index 000000000000..2357dd2d8655
--- /dev/null
+++ b/Documentation/virt/kvm/arm/pvtime.rst
@@ -0,0 +1,80 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Paravirtualized time support for arm64
+======================================
+
+Arm specification DEN0057/A defines a standard for paravirtualised time
+support for AArch64 guests:
+
+https://developer.arm.com/docs/den0057/a
+
+KVM/arm64 implements the stolen time part of this specification by providing
+some hypervisor service calls to support a paravirtualized guest obtaining a
+view of the amount of time stolen from its execution.
+
+Two new SMCCC compatible hypercalls are defined:
+
+* PV_TIME_FEATURES: 0xC5000020
+* PV_TIME_ST:       0xC5000021
+
+These are only available in the SMC64/HVC64 calling convention as
+paravirtualized time is not available to 32 bit Arm guests. The existence of
+the PV_FEATURES hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES
+mechanism before calling it.
+
+PV_TIME_FEATURES
+    ============= ========    ==========
+    Function ID:  (uint32)    0xC5000020
+    PV_call_id:   (uint32)    The function to query for support.
+                              Currently only PV_TIME_ST is supported.
+    Return value: (int64)     NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant
+                              PV-time feature is supported by the hypervisor.
+    ============= ========    ==========
+
+PV_TIME_ST
+    ============= ========    ==========
+    Function ID:  (uint32)    0xC5000021
+    Return value: (int64)     IPA of the stolen time data structure for this
+                              VCPU. On failure:
+                              NOT_SUPPORTED (-1)
+    ============= ========    ==========
+
+The IPA returned by PV_TIME_ST should be mapped by the guest as normal memory
+with inner and outer write back caching attributes, in the inner shareable
+domain. A total of 16 bytes from the IPA returned are guaranteed to be
+meaningfully filled by the hypervisor (see structure below).
+
+PV_TIME_ST returns the structure for the calling VCPU.
+
+Stolen Time
+-----------
+
+The structure pointed to by the PV_TIME_ST hypercall is as follows:
+
++-------------+-------------+-------------+----------------------------+
+| Field       | Byte Length | Byte Offset | Description                |
++=============+=============+=============+============================+
+| Revision    |      4      |      0      | Must be 0 for version 1.0  |
++-------------+-------------+-------------+----------------------------+
+| Attributes  |      4      |      4      | Must be 0                  |
++-------------+-------------+-------------+----------------------------+
+| Stolen time |      8      |      8      | Stolen time in unsigned    |
+|             |             |             | nanoseconds indicating how |
+|             |             |             | much time this VCPU thread |
+|             |             |             | was involuntarily not      |
+|             |             |             | running on a physical CPU. |
++-------------+-------------+-------------+----------------------------+
+
+All values in the structure are stored little-endian.
+
+The structure will be updated by the hypervisor prior to scheduling a VCPU. It
+will be present within a reserved region of the normal memory given to the
+guest. The guest should not attempt to write into this memory. There is a
+structure per VCPU of the guest.
+
+It is advisable that one or more 64k pages are set aside for the purpose of
+these structures and not used for other purposes, this enables the guest to map
+the region using 64k pages and avoids conflicting attributes with other memory.
+
+For the user space interface see Documentation/virt/kvm/devices/vcpu.txt
+section "3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL".
diff --git a/Documentation/virt/kvm/devices/vcpu.txt b/Documentation/virt/kvm/devices/vcpu.txt
index 2b5dab16c4f2..6f3bd64a05b0 100644
--- a/Documentation/virt/kvm/devices/vcpu.txt
+++ b/Documentation/virt/kvm/devices/vcpu.txt
@@ -60,3 +60,17 @@ time to use the number provided for a given timer, overwriting any previously
 configured values on other VCPUs.  Userspace should configure the interrupt
 numbers on at least one VCPU after creating all VCPUs and before running any
 VCPUs.
+
+3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
+Architectures: ARM64
+
+3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
+Parameters: 64-bit base address
+Returns: -ENXIO:  Stolen time not implemented
+         -EEXIST: Base address already set for this VCPU
+         -EINVAL: Base address not 64 byte aligned
+
+Specifies the base address of the stolen time structure for this VCPU. The
+base address must be 64 byte aligned and exist within a valid guest memory
+region. See Documentation/virt/kvm/arm/pvtime.txt for more information
+including the layout of the stolen time structure.
diff --git a/Documentation/virt/kvm/devices/xics.txt b/Documentation/virt/kvm/devices/xics.txt
index 42864935ac5d..423332dda7bc 100644
--- a/Documentation/virt/kvm/devices/xics.txt
+++ b/Documentation/virt/kvm/devices/xics.txt
@@ -3,9 +3,19 @@ XICS interrupt controller
 Device type supported: KVM_DEV_TYPE_XICS
 
 Groups:
-  KVM_DEV_XICS_SOURCES
+  1. KVM_DEV_XICS_GRP_SOURCES
   Attributes: One per interrupt source, indexed by the source number.
 
+  2. KVM_DEV_XICS_GRP_CTRL
+  Attributes:
+    2.1 KVM_DEV_XICS_NR_SERVERS (write only)
+  The kvm_device_attr.addr points to a __u32 value which is the number of
+  interrupt server numbers (ie, highest possible vcpu id plus one).
+  Errors:
+    -EINVAL: Value greater than KVM_MAX_VCPU_ID.
+    -EFAULT: Invalid user pointer for attr->addr.
+    -EBUSY:  A vcpu is already connected to the device.
+
 This device emulates the XICS (eXternal Interrupt Controller
 Specification) defined in PAPR.  The XICS has a set of interrupt
 sources, each identified by a 20-bit source number, and a set of
@@ -38,7 +48,7 @@ least-significant end of the word:
 
 Each source has 64 bits of state that can be read and written using
 the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
-KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
+KVM_DEV_XICS_GRP_SOURCES attribute group, with the attribute number being
 the interrupt source number.  The 64 bit state word has the following
 bitfields, starting from the least-significant end of the word:
 
diff --git a/Documentation/virt/kvm/devices/xive.txt b/Documentation/virt/kvm/devices/xive.txt
index 9a24a4525253..f5d1d6b5af61 100644
--- a/Documentation/virt/kvm/devices/xive.txt
+++ b/Documentation/virt/kvm/devices/xive.txt
@@ -78,6 +78,14 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
     migrating the VM.
     Errors: none
 
+    1.3 KVM_DEV_XIVE_NR_SERVERS (write only)
+    The kvm_device_attr.addr points to a __u32 value which is the number of
+    interrupt server numbers (ie, highest possible vcpu id plus one).
+    Errors:
+      -EINVAL: Value greater than KVM_MAX_VCPU_ID.
+      -EFAULT: Invalid user pointer for attr->addr.
+      -EBUSY:  A vCPU is already connected to the device.
+
   2. KVM_DEV_XIVE_GRP_SOURCE (write only)
   Initializes a new source in the XIVE device and mask it.
   Attributes:
diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 0a5960beccf7..95fec5968362 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -147,49 +147,16 @@ Address space mirroring implementation and API
 Address space mirroring's main objective is to allow duplication of a range of
 CPU page table into a device page table; HMM helps keep both synchronized. A
 device driver that wants to mirror a process address space must start with the
-registration of an hmm_mirror struct::
-
- int hmm_mirror_register(struct hmm_mirror *mirror,
-                         struct mm_struct *mm);
-
-The mirror struct has a set of callbacks that are used
-to propagate CPU page tables::
-
- struct hmm_mirror_ops {
-     /* release() - release hmm_mirror
-      *
-      * @mirror: pointer to struct hmm_mirror
-      *
-      * This is called when the mm_struct is being released.  The callback
-      * must ensure that all access to any pages obtained from this mirror
-      * is halted before the callback returns. All future access should
-      * fault.
-      */
-     void (*release)(struct hmm_mirror *mirror);
-
-     /* sync_cpu_device_pagetables() - synchronize page tables
-      *
-      * @mirror: pointer to struct hmm_mirror
-      * @update: update information (see struct mmu_notifier_range)
-      * Return: -EAGAIN if update.blockable false and callback need to
-      *         block, 0 otherwise.
-      *
-      * This callback ultimately originates from mmu_notifiers when the CPU
-      * page table is updated. The device driver must update its page table
-      * in response to this callback. The update argument tells what action
-      * to perform.
-      *
-      * The device driver must not return from this callback until the device
-      * page tables are completely updated (TLBs flushed, etc); this is a
-      * synchronous call.
-      */
-     int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
-                                       const struct hmm_update *update);
- };
-
-The device driver must perform the update action to the range (mark range
-read only, or fully unmap, etc.). The device must complete the update before
-the driver callback returns.
+registration of a mmu_interval_notifier::
+
+ int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub,
+				  struct mm_struct *mm, unsigned long start,
+				  unsigned long length,
+				  const struct mmu_interval_notifier_ops *ops);
+
+During the ops->invalidate() callback the device driver must perform the
+update action to the range (mark range read only, or fully unmap, etc.). The
+device must complete the update before the driver callback returns.
 
 When the device driver wants to populate a range of virtual addresses, it can
 use::
@@ -216,70 +183,46 @@ The usage pattern is::
       struct hmm_range range;
       ...
 
+      range.notifier = &interval_sub;
       range.start = ...;
       range.end = ...;
       range.pfns = ...;
       range.flags = ...;
       range.values = ...;
       range.pfn_shift = ...;
-      hmm_range_register(&range, mirror);
 
-      /*
-       * Just wait for range to be valid, safe to ignore return value as we
-       * will use the return value of hmm_range_fault() below under the
-       * mmap_sem to ascertain the validity of the range.
-       */
-      hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
+      if (!mmget_not_zero(interval_sub->notifier.mm))
+          return -EFAULT;
 
  again:
+      range.notifier_seq = mmu_interval_read_begin(&interval_sub);
       down_read(&mm->mmap_sem);
       ret = hmm_range_fault(&range, HMM_RANGE_SNAPSHOT);
       if (ret) {
           up_read(&mm->mmap_sem);
-          if (ret == -EBUSY) {
-            /*
-             * No need to check hmm_range_wait_until_valid() return value
-             * on retry we will get proper error with hmm_range_fault()
-             */
-            hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
-            goto again;
-          }
-          hmm_range_unregister(&range);
+          if (ret == -EBUSY)
+                 goto again;
           return ret;
       }
+      up_read(&mm->mmap_sem);
+
       take_lock(driver->update);
-      if (!hmm_range_valid(&range)) {
+      if (mmu_interval_read_retry(&ni, range.notifier_seq) {
           release_lock(driver->update);
-          up_read(&mm->mmap_sem);
           goto again;
       }
 
-      // Use pfns array content to update device page table
+      /* Use pfns array content to update device page table,
+       * under the update lock */
 
-      hmm_range_unregister(&range);
       release_lock(driver->update);
-      up_read(&mm->mmap_sem);
       return 0;
  }
 
 The driver->update lock is the same lock that the driver takes inside its
-sync_cpu_device_pagetables() callback. That lock must be held before calling
-hmm_range_valid() to avoid any race with a concurrent CPU page table update.
-
-HMM implements all this on top of the mmu_notifier API because we wanted a
-simpler API and also to be able to perform optimizations latter on like doing
-concurrent device updates in multi-devices scenario.
-
-HMM also serves as an impedance mismatch between how CPU page table updates
-are done (by CPU write to the page table and TLB flushes) and how devices
-update their own page table. Device updates are a multi-step process. First,
-appropriate commands are written to a buffer, then this buffer is scheduled for
-execution on the device. It is only once the device has executed commands in
-the buffer that the update is done. Creating and scheduling the update command
-buffer can happen concurrently for multiple devices. Waiting for each device to
-report commands as executed is serialized (there is no point in doing this
-concurrently).
-
+invalidate() callback. That lock must be held before calling
+mmu_interval_read_retry() to avoid any race with a concurrent CPU page table
+update.
 
 Leverage default_flags and pfn_flags_mask
 =========================================
diff --git a/Documentation/w1/index.rst b/Documentation/w1/index.rst
index 57cba81865e2..156279f17553 100644
--- a/Documentation/w1/index.rst
+++ b/Documentation/w1/index.rst
@@ -1,4 +1,4 @@
-. SPDX-License-Identifier: GPL-2.0
+.. SPDX-License-Identifier: GPL-2.0
 
 ================
 1-Wire Subsystem
diff --git a/Documentation/w1/masters/omap-hdq.rst b/Documentation/w1/masters/omap-hdq.rst
index 345298a59e50..5347b5d9e90a 100644
--- a/Documentation/w1/masters/omap-hdq.rst
+++ b/Documentation/w1/masters/omap-hdq.rst
@@ -44,7 +44,7 @@ that the ID used should be same for both master and slave driver loading.
 e.g::
 
   insmod omap_hdq.ko W1_ID=2
-  inamod w1_bq27000.ko F_ID=2
+  insmod w1_bq27000.ko F_ID=2
 
 The driver also supports 1-wire mode. In this mode, there is no need to
 pass slave ID as parameter. The driver will auto-detect slaves connected
diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst
index 08a2f100c0e6..c9c201596c3e 100644
--- a/Documentation/x86/boot.rst
+++ b/Documentation/x86/boot.rst
@@ -68,8 +68,26 @@ Protocol 2.12	(Kernel 3.8) Added the xloadflags field and extension fields
 Protocol 2.13	(Kernel 3.14) Support 32- and 64-bit flags being set in
 		xloadflags to support booting a 64-bit kernel from 32-bit
 		EFI
+
+Protocol 2.14	BURNT BY INCORRECT COMMIT
+                ae7e1238e68f2a472a125673ab506d49158c1889
+		(x86/boot: Add ACPI RSDP address to setup_header)
+		DO NOT USE!!! ASSUME SAME AS 2.13.
+
+Protocol 2.15	(Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
 =============	============================================================
 
+.. note::
+     The protocol version number should be changed only if the setup header
+     is changed. There is no need to update the version number if boot_params
+     or kernel_info are changed. Additionally, it is recommended to use
+     xloadflags (in this case the protocol version number should not be
+     updated either) or kernel_info to communicate supported Linux kernel
+     features to the boot loader. Due to very limited space available in
+     the original setup header every update to it should be considered
+     with great care. Starting from the protocol 2.15 the primary way to
+     communicate things to the boot loader is the kernel_info.
+
 
 Memory Layout
 =============
@@ -207,6 +225,7 @@ Offset/Size	Proto		Name			Meaning
 0258/8		2.10+		pref_address		Preferred loading address
 0260/4		2.10+		init_size		Linear memory required during initialization
 0264/4		2.11+		handover_offset		Offset of handover entry point
+0268/4		2.15+		kernel_info_offset	Offset of the kernel_info
 ===========	========	=====================	============================================
 
 .. note::
@@ -233,7 +252,7 @@ setting fields in the header, you must make sure only to set fields
 supported by the protocol version in use.
 
 
-Details of Harder Fileds
+Details of Header Fields
 ========================
 
 For each field, some are information from the kernel to the bootloader
@@ -809,6 +828,47 @@ Protocol:	2.09+
   sure to consider the case where the linked list already contains
   entries.
 
+  The setup_data is a bit awkward to use for extremely large data objects,
+  both because the setup_data header has to be adjacent to the data object
+  and because it has a 32-bit length field. However, it is important that
+  intermediate stages of the boot process have a way to identify which
+  chunks of memory are occupied by kernel data.
+
+  Thus setup_indirect struct and SETUP_INDIRECT type were introduced in
+  protocol 2.15::
+
+    struct setup_indirect {
+      __u32 type;
+      __u32 reserved;  /* Reserved, must be set to zero. */
+      __u64 len;
+      __u64 addr;
+    };
+
+  The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be
+  SETUP_INDIRECT itself since making the setup_indirect a tree structure
+  could require a lot of stack space in something that needs to parse it
+  and stack space can be limited in boot contexts.
+
+  Let's give an example how to point to SETUP_E820_EXT data using setup_indirect.
+  In this case setup_data and setup_indirect will look like this::
+
+    struct setup_data {
+      __u64 next = 0 or <addr_of_next_setup_data_struct>;
+      __u32 type = SETUP_INDIRECT;
+      __u32 len = sizeof(setup_data);
+      __u8 data[sizeof(setup_indirect)] = struct setup_indirect {
+        __u32 type = SETUP_INDIRECT | SETUP_E820_EXT;
+        __u32 reserved = 0;
+        __u64 len = <len_of_SETUP_E820_EXT_data>;
+        __u64 addr = <addr_of_SETUP_E820_EXT_data>;
+      }
+    }
+
+.. note::
+     SETUP_INDIRECT | SETUP_NONE objects cannot be properly distinguished
+     from SETUP_INDIRECT itself. So, this kind of objects cannot be provided
+     by the bootloaders.
+
 ============	============
 Field name:	pref_address
 Type:		read (reloc)
@@ -855,6 +915,121 @@ Offset/size:	0x264/4
 
   See EFI HANDOVER PROTOCOL below for more details.
 
+============	==================
+Field name:	kernel_info_offset
+Type:		read
+Offset/size:	0x268/4
+Protocol:	2.15+
+============	==================
+
+  This field is the offset from the beginning of the kernel image to the
+  kernel_info. The kernel_info structure is embedded in the Linux image
+  in the uncompressed protected mode region.
+
+
+The kernel_info
+===============
+
+The relationships between the headers are analogous to the various data
+sections:
+
+  setup_header = .data
+  boot_params/setup_data = .bss
+
+What is missing from the above list? That's right:
+
+  kernel_info = .rodata
+
+We have been (ab)using .data for things that could go into .rodata or .bss for
+a long time, for lack of alternatives and -- especially early on -- inertia.
+Also, the BIOS stub is responsible for creating boot_params, so it isn't
+available to a BIOS-based loader (setup_data is, though).
+
+setup_header is permanently limited to 144 bytes due to the reach of the
+2-byte jump field, which doubles as a length field for the structure, combined
+with the size of the "hole" in struct boot_params that a protected-mode loader
+or the BIOS stub has to copy it into. It is currently 119 bytes long, which
+leaves us with 25 very precious bytes. This isn't something that can be fixed
+without revising the boot protocol entirely, breaking backwards compatibility.
+
+boot_params proper is limited to 4096 bytes, but can be arbitrarily extended
+by adding setup_data entries. It cannot be used to communicate properties of
+the kernel image, because it is .bss and has no image-provided content.
+
+kernel_info solves this by providing an extensible place for information about
+the kernel image. It is readonly, because the kernel cannot rely on a
+bootloader copying its contents anywhere, but that is OK; if it becomes
+necessary it can still contain data items that an enabled bootloader would be
+expected to copy into a setup_data chunk.
+
+All kernel_info data should be part of this structure. Fixed size data have to
+be put before kernel_info_var_len_data label. Variable size data have to be put
+after kernel_info_var_len_data label. Each chunk of variable size data has to
+be prefixed with header/magic and its size, e.g.::
+
+  kernel_info:
+          .ascii  "LToP"          /* Header, Linux top (structure). */
+          .long   kernel_info_var_len_data - kernel_info
+          .long   kernel_info_end - kernel_info
+          .long   0x01234567      /* Some fixed size data for the bootloaders. */
+  kernel_info_var_len_data:
+  example_struct:                 /* Some variable size data for the bootloaders. */
+          .ascii  "0123"          /* Header/Magic. */
+          .long   example_struct_end - example_struct
+          .ascii  "Struct"
+          .long   0x89012345
+  example_struct_end:
+  example_strings:                /* Some variable size data for the bootloaders. */
+          .ascii  "ABCD"          /* Header/Magic. */
+          .long   example_strings_end - example_strings
+          .asciz  "String_0"
+          .asciz  "String_1"
+  example_strings_end:
+  kernel_info_end:
+
+This way the kernel_info is self-contained blob.
+
+.. note::
+     Each variable size data header/magic can be any 4-character string,
+     without \0 at the end of the string, which does not collide with
+     existing variable length data headers/magics.
+
+
+Details of the kernel_info Fields
+=================================
+
+============	========
+Field name:	header
+Offset/size:	0x0000/4
+============	========
+
+  Contains the magic number "LToP" (0x506f544c).
+
+============	========
+Field name:	size
+Offset/size:	0x0004/4
+============	========
+
+  This field contains the size of the kernel_info including kernel_info.header.
+  It does not count kernel_info.kernel_info_var_len_data size. This field should be
+  used by the bootloaders to detect supported fixed size fields in the kernel_info
+  and beginning of kernel_info.kernel_info_var_len_data.
+
+============	========
+Field name:	size_total
+Offset/size:	0x0008/4
+============	========
+
+  This field contains the size of the kernel_info including kernel_info.header
+  and kernel_info.kernel_info_var_len_data.
+
+============	==============
+Field name:	setup_type_max
+Offset/size:	0x000c/4
+============	==============
+
+  This field contains maximal allowed type for setup_data and setup_indirect structs.
+
 
 The Image Checksum
 ==================
diff --git a/Documentation/x86/pat.rst b/Documentation/x86/pat.rst
index 9a298fd97d74..5d901771016d 100644
--- a/Documentation/x86/pat.rst
+++ b/Documentation/x86/pat.rst
@@ -44,8 +44,6 @@ address range to avoid any aliasing.
 +------------------------+----------+--------------+------------------+
 | ioremap_uc             |    --    |    UC        |       UC         |
 +------------------------+----------+--------------+------------------+
-| ioremap_nocache        |    --    |    UC-       |       UC-        |
-+------------------------+----------+--------------+------------------+
 | ioremap_wc             |    --    |    --        |       WC         |
 +------------------------+----------+--------------+------------------+
 | ioremap_wt             |    --    |    --        |       WT         |
diff --git a/Documentation/x86/x86_64/mm.rst b/Documentation/x86/x86_64/mm.rst
index 267fc4808945..e5053404a1ae 100644
--- a/Documentation/x86/x86_64/mm.rst
+++ b/Documentation/x86/x86_64/mm.rst
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0
 
-================
-Memory Managment
-================
+=================
+Memory Management
+=================
 
 Complete virtual memory map with 4-level page tables
 ====================================================
diff --git a/MAINTAINERS b/MAINTAINERS
index 9d3a5c54a41d..dc833b88c8bc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1,12 +1,14 @@
-
-
-	List of maintainers and how to submit kernel changes
+List of maintainers and how to submit kernel changes
+====================================================
 
 Please try to follow the guidelines below.  This will make things
 easier on the maintainers.  Not all of these guidelines matter for every
 trivial patch so apply some common sense.
 
-1.	Always _test_ your changes, however small, on at least 4 or
+Tips for patch submitters
+-------------------------
+
+1.	Always *test* your changes, however small, on at least 4 or
 	5 people, preferably many more.
 
 2.	Try to release a few ALPHA test versions to the net. Announce
@@ -25,7 +27,7 @@ trivial patch so apply some common sense.
 	testing and await feedback.
 
 5.	Make a patch available to the relevant maintainer in the list. Use
-	'diff -u' to make the patch easy to merge. Be prepared to get your
+	``diff -u`` to make the patch easy to merge. Be prepared to get your
 	changes sent back with seemingly silly requests about formatting
 	and variable names.  These aren't as silly as they seem. One
 	job the maintainers (and especially Linus) do is to keep things
@@ -38,7 +40,7 @@ trivial patch so apply some common sense.
 	See Documentation/process/coding-style.rst for guidance here.
 
 	PLEASE CC: the maintainers and mailing lists that are generated
-	by scripts/get_maintainer.pl.  The results returned by the
+	by ``scripts/get_maintainer.pl.`` The results returned by the
 	script will be best if you have git installed and are making
 	your changes in a branch derived from Linus' latest git tree.
 	See Documentation/process/submitting-patches.rst for details.
@@ -70,26 +72,27 @@ trivial patch so apply some common sense.
 	not represent an immediate threat and are better handled publicly,
 	and ideally, should come with a patch proposal. Please do not send
 	automated reports to this list either. Such bugs will be handled
-	better and faster in the usual public places.
+	better and faster in the usual public places. See
+	Documentation/admin-guide/security-bugs.rst for details.
 
 8.	Happy hacking.
 
-Descriptions of section entries:
+Descriptions of section entries
+-------------------------------
 
-	P: Person (obsolete)
-	M: Mail patches to: FullName <address@domain>
-	R: Designated reviewer: FullName <address@domain>
+	M: *Mail* patches to: FullName <address@domain>
+	R: Designated *Reviewer*: FullName <address@domain>
 	   These reviewers should be CCed on patches.
-	L: Mailing list that is relevant to this area
-	W: Web-page with status/info
-	B: URI for where to file bugs. A web-page with detailed bug
+	L: *Mailing list* that is relevant to this area
+	W: *Web-page* with status/info
+	B: URI for where to file *bugs*. A web-page with detailed bug
 	   filing info, a direct bug tracker link, or a mailto: URI.
-	C: URI for chat protocol, server and channel where developers
+	C: URI for *chat* protocol, server and channel where developers
 	   usually hang out, for example irc://server/channel.
-	Q: Patchwork web based patch tracking system site
-	T: SCM tree type and location.
+	Q: *Patchwork* web based patch tracking system site
+	T: *SCM* tree type and location.
 	   Type is one of: git, hg, quilt, stgit, topgit
-	S: Status, one of the following:
+	S: *Status*, one of the following:
 	   Supported:	Someone is actually paid to look after this.
 	   Maintained:	Someone actually looks after it.
 	   Odd Fixes:	It has a maintainer but they don't have time to do
@@ -99,13 +102,17 @@ Descriptions of section entries:
 	   Obsolete:	Old code. Something tagged obsolete generally means
 			it has been replaced by a better system and you
 			should be using that.
-	F: Files and directories with wildcard patterns.
+	P: Subsystem Profile document for more details submitting
+	   patches to the given subsystem. This is either an in-tree file,
+	   or a URI. See Documentation/maintainer/maintainer-entry-profile.rst
+	   for details.
+	F: *Files* and directories wildcard patterns.
 	   A trailing slash includes all files and subdirectory files.
 	   F:	drivers/net/	all files in and below drivers/net
 	   F:	drivers/net/*	all files in drivers/net, but not below
 	   F:	*/net/*		all files in "any top level directory"/net
 	   One pattern per line.  Multiple F: lines acceptable.
-	N: Files and directories with regex patterns.
+	N: Files and directories *Regex* patterns.
 	   N:	[^a-z]tegra	all files whose path contains the word tegra
 	   One pattern per line.  Multiple N: lines acceptable.
 	   scripts/get_maintainer.pl has different behavior for files that
@@ -113,14 +120,14 @@ Descriptions of section entries:
 	   get_maintainer will not look at git log history when an F: pattern
 	   match occurs.  When an N: match occurs, git log history is used
 	   to also notify the people that have git commit signatures.
-	X: Files and directories that are NOT maintained, same rules as F:
-	   Files exclusions are tested before file matches.
+	X: *Excluded* files and directories that are NOT maintained, same
+	   rules as F:. Files exclusions are tested before file matches.
 	   Can be useful for excluding a specific subdirectory, for instance:
 	   F:	net/
 	   X:	net/ipv6/
 	   matches all files in and below net excluding net/ipv6/
-	K: Keyword perl extended regex pattern to match content in a
-	   patch or file.  For instance:
+	K: *Content regex* (perl extended) pattern match in a patch or file.
+	   For instance:
 	   K: of_get_profile
 	      matches patches or files that contain "of_get_profile"
 	   K: \b(printk|pr_(info|err))\b
@@ -128,13 +135,12 @@ Descriptions of section entries:
 	      printk, pr_info or pr_err
 	   One regex pattern per line.  Multiple K: lines acceptable.
 
-Note: For the hard of thinking, this list is meant to remain in alphabetical
-order. If you could add yourselves to it in alphabetical order that would be
-so much easier [Ed]
+Maintainers List
+----------------
 
-Maintainers List (try to look for most precise areas first)
-
-		-----------------------------------
+.. note:: When reading this list, please look for the most precise areas
+          first. When adding to this list, please keep the entries in
+          alphabetical order.
 
 3C59X NETWORK DRIVER
 M:	Steffen Klassert <klassert@kernel.org>
@@ -295,7 +301,7 @@ S:	Maintained
 F:	drivers/net/ethernet/alteon/acenic*
 
 ACER ASPIRE ONE TEMPERATURE AND FAN DRIVER
-M:	Peter Feuerer <peter@piie.net>
+M:	Peter Kaestle <peter@piie.net>
 L:	platform-driver-x86@vger.kernel.org
 W:	http://piie.net/?section=acerhdf
 S:	Maintained
@@ -339,7 +345,7 @@ F:	drivers/acpi/apei/
 
 ACPI COMPONENT ARCHITECTURE (ACPICA)
 M:	Robert Moore <robert.moore@intel.com>
-M:	Erik Schmauss <erik.schmauss@intel.com>
+M:	Erik Kaneda <erik.kaneda@intel.com>
 M:	"Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
 L:	linux-acpi@vger.kernel.org
 L:	devel@acpica.org
@@ -668,6 +674,14 @@ S:	Maintained
 F:	Documentation/i2c/busses/i2c-ali1563.rst
 F:	drivers/i2c/busses/i2c-ali1563.c
 
+ALL SENSORS DLH SERIES PRESSURE SENSORS DRIVER
+M:	Tomislav Denis <tomislav.denis@avl.com>
+W:	http://www.allsensors.com/
+S:	Maintained
+L:	linux-iio@vger.kernel.org
+F:	drivers/iio/pressure/dlhl60d.c
+F:	Documentation/devicetree/bindings/iio/pressure/asc,dlhl60d.yaml
+
 ALLEGRO DVT VIDEO IP CORE DRIVER
 M:	Michael Tretter <m.tretter@pengutronix.de>
 R:	Pengutronix Kernel Team <kernel@pengutronix.de>
@@ -682,11 +696,19 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt
 F:	drivers/cpufreq/sun50i-cpufreq-nvmem.c
 
-ALLWINNER SECURITY SYSTEM
+ALLWINNER CRYPTO DRIVERS
 M:	Corentin Labbe <clabbe.montjoie@gmail.com>
 L:	linux-crypto@vger.kernel.org
 S:	Maintained
-F:	drivers/crypto/sunxi-ss/
+F:	drivers/crypto/allwinner/
+
+ALLWINNER THERMAL DRIVER
+M:	Vasily Khoruzhick <anarsoul@gmail.com>
+M:	Yangtao Li <tiny.windzz@gmail.com>
+L:	linux-pm@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml
+F:	drivers/thermal/sun8i_thermal.c
 
 ALLWINNER VPU DRIVER
 M:	Maxime Ripard <mripard@kernel.org>
@@ -714,13 +736,13 @@ F:	Documentation/devicetree/bindings/i2c/i2c-altera.txt
 F:	drivers/i2c/busses/i2c-altera.c
 
 ALTERA MAILBOX DRIVER
-M:	Ley Foon Tan <lftan@altera.com>
+M:	Ley Foon Tan <ley.foon.tan@intel.com>
 L:	nios2-dev@lists.rocketboards.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/mailbox/mailbox-altera.c
 
 ALTERA PIO DRIVER
-M:	Tien Hock Loh <thloh@altera.com>
+M:	Joyce Ooi <joyce.ooi@intel.com>
 L:	linux-gpio@vger.kernel.org
 S:	Maintained
 F:	drivers/gpio/gpio-altera.c
@@ -765,6 +787,8 @@ F:	drivers/thermal/thermal_mmio.c
 
 AMAZON ETHERNET DRIVERS
 M:	Netanel Belgazal <netanel@amazon.com>
+M:	Arthur Kiyanovski <akiyano@amazon.com>
+R:	Guy Tzalik <gtzalik@amazon.com>
 R:	Saeed Bishara <saeedb@amazon.com>
 R:	Zorik Machulsky <zorik@amazon.com>
 L:	netdev@vger.kernel.org
@@ -783,7 +807,6 @@ F:	include/uapi/rdma/efa-abi.h
 
 AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
 M:	Tom Lendacky <thomas.lendacky@amd.com>
-M:	Gary Hook <gary.hook@amd.com>
 L:	linux-crypto@vger.kernel.org
 S:	Supported
 F:	drivers/crypto/ccp/
@@ -817,7 +840,7 @@ S:	Orphan
 F:	drivers/usb/gadget/udc/amd5536udc.*
 
 AMD GEODE PROCESSOR/CHIPSET SUPPORT
-P:	Andres Salomon <dilinger@queued.net>
+M:	Andres Salomon <dilinger@queued.net>
 L:	linux-geode@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
 S:	Supported
@@ -856,7 +879,6 @@ S:	Maintained
 F:	drivers/i2c/busses/i2c-amd-mp2*
 
 AMD POWERPLAY
-M:	Rex Zhu <rex.zhu@amd.com>
 M:	Evan Quan <evan.quan@amd.com>
 L:	amd-gfx@lists.freedesktop.org
 S:	Supported
@@ -893,6 +915,14 @@ S:	Supported
 F:	drivers/iio/dac/ad5758.c
 F:	Documentation/devicetree/bindings/iio/dac/ad5758.txt
 
+ANALOG DEVICES INC AD7091R5 DRIVER
+M:	Beniamin Bia <beniamin.bia@analog.com>
+L:	linux-iio@vger.kernel.org
+W:	http://ez.analog.com/community/linux-device-drivers
+S:	Supported
+F:	drivers/iio/adc/ad7091r5.c
+F:	Documentation/devicetree/bindings/iio/adc/adi,ad7091r5.yaml
+
 ANALOG DEVICES INC AD7124 DRIVER
 M:	Stefan Popa <stefan.popa@analog.com>
 L:	linux-iio@vger.kernel.org
@@ -901,6 +931,14 @@ S:	Supported
 F:	drivers/iio/adc/ad7124.c
 F:	Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
 
+ANALOG DEVICES INC AD7292 DRIVER
+M:	Marcelo Schmitt <marcelo.schmitt1@gmail.com>
+L:	linux-iio@vger.kernel.org
+W:	http://ez.analog.com/community/linux-device-drivers
+S:	Supported
+F:	drivers/iio/adc/ad7292.c
+F:	Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml
+
 ANALOG DEVICES INC AD7606 DRIVER
 M:	Stefan Popa <stefan.popa@analog.com>
 M:	Beniamin Bia <beniamin.bia@analog.com>
@@ -962,6 +1000,15 @@ W:	http://ez.analog.com/community/linux-device-drivers
 F:	drivers/iio/imu/adis16460.c
 F:	Documentation/devicetree/bindings/iio/imu/adi,adis16460.yaml
 
+ANALOG DEVICES INC ADM1177 DRIVER
+M:	Beniamin Bia <beniamin.bia@analog.com>
+M:	Michael Hennerich <Michael.Hennerich@analog.com>
+L:	linux-hwmon@vger.kernel.org
+W:	http://ez.analog.com/community/linux-device-drivers
+S:	Supported
+F:	drivers/hwmon/adm1177.c
+F:	Documentation/devicetree/bindings/hwmon/adi,adm1177.yaml
+
 ANALOG DEVICES INC ADP5061 DRIVER
 M:	Stefan Popa <stefan.popa@analog.com>
 L:	linux-pm@vger.kernel.org
@@ -1002,6 +1049,7 @@ F:	drivers/media/i2c/adv7842*
 
 ANALOG DEVICES INC ASOC CODEC DRIVERS
 M:	Lars-Peter Clausen <lars@metafoo.de>
+M:	Nuno Sá <nuno.sa@analog.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 W:	http://wiki.analog.com/
 W:	http://ez.analog.com/community/linux-device-drivers
@@ -1029,7 +1077,7 @@ S:	Supported
 F:	Documentation/ABI/testing/sysfs-bus-iio-frequency-ad9523
 F:	Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4350
 F:	drivers/iio/*/ad*
-F:	drivers/iio/adc/ltc2497*
+F:	drivers/iio/adc/ltc249*
 X:	drivers/iio/*/adjd*
 F:	drivers/staging/iio/*/ad*
 
@@ -1040,6 +1088,7 @@ F:	drivers/clk/analogbits/*
 F:	include/linux/clk/analogbits*
 
 ANDES ARCHITECTURE
+M:	Nick Hu <nickhu@andestech.com>
 M:	Greentime Hu <green.hu@gmail.com>
 M:	Vincent Chen <deanbo422@gmail.com>
 T:	git https://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux.git
@@ -1182,14 +1231,21 @@ S:	Maintained
 F:	drivers/media/i2c/aptina-pll.*
 
 AQUANTIA ETHERNET DRIVER (atlantic)
-M:	Igor Russkikh <igor.russkikh@aquantia.com>
+M:	Igor Russkikh <irusskikh@marvell.com>
 L:	netdev@vger.kernel.org
 S:	Supported
-W:	http://www.aquantia.com
+W:	https://www.marvell.com/
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
 F:	drivers/net/ethernet/aquantia/atlantic/
 F:	Documentation/networking/device_drivers/aquantia/atlantic.txt
 
+AQUANTIA ETHERNET DRIVER PTP SUBSYSTEM
+M:	Egor Pomozov <epomozov@marvell.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+W:	http://www.aquantia.com
+F:	drivers/net/ethernet/aquantia/atlantic/aq_ptp*
+
 ARC FRAMEBUFFER DRIVER
 M:	Jaya Kumar <jayalk@intworks.biz>
 S:	Maintained
@@ -1251,6 +1307,7 @@ F:	Documentation/devicetree/bindings/display/arm,hdlcd.txt
 ARM KOMEDA DRM-KMS DRIVER
 M:	James (Qian) Wang <james.qian.wang@arm.com>
 M:	Liviu Dudau <liviu.dudau@arm.com>
+M:	Mihail Atanassov <mihail.atanassov@arm.com>
 L:	Mali DP Maintainers <malidp@foss.arm.com>
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -1272,6 +1329,8 @@ F:	Documentation/gpu/afbc.rst
 ARM MALI PANFROST DRM DRIVER
 M:	Rob Herring <robh@kernel.org>
 M:	Tomeu Vizoso <tomeu.vizoso@collabora.com>
+R:	Steven Price <steven.price@arm.com>
+R:	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -1380,7 +1439,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc.git
 
 ARM/ACTIONS SEMI ARCHITECTURE
 M:	Andreas Färber <afaerber@suse.de>
-R:	Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+M:	Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 N:	owl
@@ -1391,6 +1450,7 @@ F:	drivers/clk/actions/
 F:	drivers/clocksource/timer-owl*
 F:	drivers/dma/owl-dma.c
 F:	drivers/i2c/busses/i2c-owl.c
+F:	drivers/mmc/host/owl-mmc.c
 F:	drivers/pinctrl/actions/*
 F:	drivers/soc/actions/
 F:	include/dt-bindings/power/owl-*
@@ -1399,6 +1459,7 @@ F:	Documentation/devicetree/bindings/arm/actions.yaml
 F:	Documentation/devicetree/bindings/clock/actions,owl-cmu.txt
 F:	Documentation/devicetree/bindings/dma/owl-dma.txt
 F:	Documentation/devicetree/bindings/i2c/i2c-owl.txt
+F:	Documentation/devicetree/bindings/mmc/owl-mmc.yaml
 F:	Documentation/devicetree/bindings/pinctrl/actions,s900-pinctrl.txt
 F:	Documentation/devicetree/bindings/power/actions,owl-sps.txt
 F:	Documentation/devicetree/bindings/timer/actions,owl-timer.txt
@@ -1470,6 +1531,14 @@ F:	drivers/soc/amlogic/
 F:	drivers/rtc/rtc-meson*
 N:	meson
 
+ARM/Amlogic Meson SoC Crypto Drivers
+M:	Corentin Labbe <clabbe@baylibre.com>
+L:	linux-crypto@vger.kernel.org
+L:	linux-amlogic@lists.infradead.org
+S:	Maintained
+F:	drivers/crypto/amlogic/
+F:	Documentation/devicetree/bindings/crypto/amlogic*
+
 ARM/Amlogic Meson SoC Sound Drivers
 M:	Jerome Brunet <jbrunet@baylibre.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -1529,8 +1598,10 @@ M:	Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm64/boot/dts/bitmain/
+F:	drivers/clk/clk-bm1880.c
 F:	drivers/pinctrl/pinctrl-bm1880.c
 F:	Documentation/devicetree/bindings/arm/bitmain.yaml
+F:	Documentation/devicetree/bindings/clock/bitmain,bm1880-clk.yaml
 F:	Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt
 
 ARM/CALXEDA HIGHBANK ARCHITECTURE
@@ -1547,8 +1618,8 @@ S:	Maintained
 F:	arch/arm/mach-cns3xxx/
 
 ARM/CAVIUM THUNDER NETWORK DRIVER
-M:	Sunil Goutham <sgoutham@cavium.com>
-M:	Robert Richter <rric@kernel.org>
+M:	Sunil Goutham <sgoutham@marvell.com>
+M:	Robert Richter <rrichter@marvell.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 F:	drivers/net/ethernet/cavium/thunder/
@@ -1608,8 +1679,7 @@ R:	Suzuki K Poulose <suzuki.poulose@arm.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/hwtracing/coresight/*
-F:	Documentation/trace/coresight.rst
-F:	Documentation/trace/coresight-cpu-debug.rst
+F:	Documentation/trace/coresight/*
 F:	Documentation/devicetree/bindings/arm/coresight.txt
 F:	Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
 F:	Documentation/ABI/testing/sysfs-bus-coresight-devices-*
@@ -1897,7 +1967,7 @@ F:	arch/arm/boot/dts/dove*
 F:	arch/arm/boot/dts/orion5x*
 T:	git git://git.infradead.org/linux-mvebu.git
 
-ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K SOC support
+ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K, CN9130 SOC support
 M:	Jason Cooper <jason@lakedaemon.net>
 M:	Andrew Lunn <andrew@lunn.ch>
 M:	Gregory Clement <gregory.clement@bootlin.com>
@@ -1909,6 +1979,7 @@ F:	arch/arm/boot/dts/kirkwood*
 F:	arch/arm/configs/mvebu_*_defconfig
 F:	arch/arm/mach-mvebu/
 F:	arch/arm64/boot/dts/marvell/armada*
+F:	arch/arm64/boot/dts/marvell/cn913*
 F:	drivers/cpufreq/armada-37xx-cpufreq.c
 F:	drivers/cpufreq/armada-8k-cpufreq.c
 F:	drivers/cpufreq/mvebu-cpufreq.c
@@ -2005,6 +2076,7 @@ F:	drivers/dma/ste_dma40*
 F:	drivers/hwspinlock/u8500_hsem.c
 F:	drivers/i2c/busses/i2c-nomadik.c
 F:	drivers/i2c/busses/i2c-stu300.c
+F:	drivers/iio/adc/ab8500-gpadc.c
 F:	drivers/mfd/ab3100*
 F:	drivers/mfd/ab8500*
 F:	drivers/mfd/abx500*
@@ -2102,6 +2174,7 @@ S:	Maintained
 
 ARM/QUALCOMM SUPPORT
 M:	Andy Gross <agross@kernel.org>
+M:	Bjorn Andersson <bjorn.andersson@linaro.org>
 L:	linux-arm-msm@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/soc/qcom/
@@ -2150,9 +2223,11 @@ L:	linux-unisoc@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/boot/dts/rda8810pl-*
 F:	drivers/clocksource/timer-rda.c
+F:	drivers/gpio/gpio-rda.c
 F:	drivers/irqchip/irq-rda-intc.c
 F:	drivers/tty/serial/rda-uart.c
 F:	Documentation/devicetree/bindings/arm/rda.yaml
+F:	Documentation/devicetree/bindings/gpio/gpio-rda.yaml
 F:	Documentation/devicetree/bindings/interrupt-controller/rda,8810pl-intc.txt
 F:	Documentation/devicetree/bindings/serial/rda,8810pl-uart.txt
 F:	Documentation/devicetree/bindings/timer/rda,8810pl-timer.txt
@@ -2160,6 +2235,7 @@ F:	Documentation/devicetree/bindings/timer/rda,8810pl-timer.txt
 ARM/REALTEK ARCHITECTURE
 M:	Andreas Färber <afaerber@suse.de>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	linux-realtek-soc@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm64/boot/dts/realtek/
 F:	Documentation/devicetree/bindings/arm/realtek.yaml
@@ -2198,6 +2274,7 @@ L:	linux-rockchip@lists.infradead.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip.git
 S:	Maintained
 F:	Documentation/devicetree/bindings/i2c/i2c-rk3x.txt
+F:	Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml
 F:	arch/arm/boot/dts/rk3*
 F:	arch/arm/boot/dts/rv1108*
 F:	arch/arm/mach-rockchip/
@@ -2230,11 +2307,11 @@ F:	drivers/*/*s3c64xx*
 F:	drivers/*/*s5pv210*
 F:	drivers/memory/samsung/
 F:	drivers/soc/samsung/
+F:	drivers/tty/serial/samsung*
 F:	include/linux/soc/samsung/
 F:	Documentation/arm/samsung/
 F:	Documentation/devicetree/bindings/arm/samsung/
-F:	Documentation/devicetree/bindings/sram/samsung-sram.txt
-F:	Documentation/devicetree/bindings/power/pd-samsung.txt
+F:	Documentation/devicetree/bindings/power/pd-samsung.yaml
 N:	exynos
 
 ARM/SAMSUNG MOBILE MACHINE SUPPORT
@@ -2488,10 +2565,10 @@ F:	drivers/reset/reset-uniphier.c
 F:	drivers/tty/serial/8250/8250_uniphier.c
 N:	uniphier
 
-ARM/Ux500 CLOCK FRAMEWORK SUPPORT
+Ux500 CLOCK DRIVERS
 M:	Ulf Hansson <ulf.hansson@linaro.org>
+L:	linux-clk@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-T:	git git://git.linaro.org/people/ulfh/clk.git
 S:	Maintained
 F:	drivers/clk/ux500/
 
@@ -2611,6 +2688,7 @@ S:	Maintained
 F:	arch/arm64/
 X:	arch/arm64/boot/dts/
 F:	Documentation/arm64/
+F:	tools/testing/selftests/arm64/
 
 AS3645A LED FLASH CONTROLLER DRIVER
 M:	Sakari Ailus <sakari.ailus@iki.fi>
@@ -2649,6 +2727,14 @@ S:	Maintained
 F:	drivers/pinctrl/aspeed/
 F:	Documentation/devicetree/bindings/pinctrl/aspeed,*
 
+ASPEED SCU INTERRUPT CONTROLLER DRIVER
+M:	Eddie James <eajames@linux.ibm.com>
+L:	linux-aspeed@lists.ozlabs.org (moderated for non-subscribers)
+S:	Maintained
+F:	Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2xxx-scu-ic.txt
+F:	drivers/irqchip/irq-aspeed-scu-ic.c
+F:	include/dt-bindings/interrupt-controller/aspeed-scu-ic.h
+
 ASPEED VIDEO ENGINE DRIVER
 M:	Eddie James <eajames@linux.ibm.com>
 L:	linux-media@vger.kernel.org
@@ -2697,7 +2783,7 @@ M:	Bartosz Golaszewski <bgolaszewski@baylibre.com>
 L:	linux-i2c@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
 S:	Maintained
-F:	Documentation/devicetree/bindings/eeprom/at24.txt
+F:	Documentation/devicetree/bindings/eeprom/at24.yaml
 F:	drivers/misc/eeprom/at24.c
 
 ATA OVER ETHERNET (AOE) DRIVER
@@ -2858,7 +2944,6 @@ AXENTIA ARM DEVICES
 M:	Peter Rosin <peda@axentia.se>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-F:	Documentation/devicetree/bindings/arm/axentia.txt
 F:	arch/arm/boot/dts/at91-linea.dtsi
 F:	arch/arm/boot/dts/at91-natte.dtsi
 F:	arch/arm/boot/dts/at91-nattis-2-natte-2.dts
@@ -3047,6 +3132,13 @@ S:	Supported
 F:	drivers/net/bonding/
 F:	include/uapi/linux/if_bonding.h
 
+BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER
+M:	Dan Robertson <dan@dlrobertson.com>
+L:	linux-iio@vger.kernel.org
+S:	Maintained
+F:	drivers/iio/accel/bma400*
+F:	Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml
+
 BPF (Safe dynamic programs and tools)
 M:	Alexei Starovoitov <ast@kernel.org>
 M:	Daniel Borkmann <daniel@iogearbox.net>
@@ -3106,7 +3198,7 @@ S:	Maintained
 F:	arch/mips/net/
 
 BPF JIT for NFP NICs
-M:	Jakub Kicinski <jakub.kicinski@netronome.com>
+M:	Jakub Kicinski <kuba@kernel.org>
 L:	netdev@vger.kernel.org
 L:	bpf@vger.kernel.org
 S:	Supported
@@ -3187,8 +3279,7 @@ N:	kona
 F:	arch/arm/mach-bcm/
 
 BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE
-M:	Eric Anholt <eric@anholt.net>
-M:	Stefan Wahren <wahrenst@gmx.net>
+M:	Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
 L:	bcm-kernel-feedback-list@broadcom.com
 L:	linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -3532,7 +3623,7 @@ BUS FREQUENCY DRIVER FOR SAMSUNG EXYNOS
 M:	Chanwoo Choi <cw00.choi@samsung.com>
 L:	linux-pm@vger.kernel.org
 L:	linux-samsung-soc@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git
 S:	Maintained
 F:	drivers/devfreq/exynos-bus.c
 F:	Documentation/devicetree/bindings/devfreq/exynos-bus.txt
@@ -3595,6 +3686,13 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/media/cdns,*.txt
 F:	drivers/media/platform/cadence/cdns-csi2*
 
+CADENCE NAND DRIVER
+M:	Piotr Sroka <piotrs@cadence.com>
+L:	linux-mtd@lists.infradead.org
+S:	Maintained
+F:	drivers/mtd/nand/raw/cadence-nand-controller.c
+F:	Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt
+
 CADET FM/AM RADIO RECEIVER DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
@@ -3626,16 +3724,6 @@ L:	cake@lists.bufferbloat.net (moderated for non-subscribers)
 S:	Maintained
 F:	net/sched/sch_cake.c
 
-CALGARY x86-64 IOMMU
-M:	Muli Ben-Yehuda <mulix@mulix.org>
-M:	Jon Mason <jdmason@kudzu.us>
-L:	iommu@lists.linux-foundation.org
-S:	Maintained
-F:	arch/x86/kernel/pci-calgary_64.c
-F:	arch/x86/kernel/tce_64.c
-F:	arch/x86/include/asm/calgary.h
-F:	arch/x86/include/asm/tce.h
-
 CAN NETWORK DRIVERS
 M:	Wolfgang Grandegger <wg@grandegger.com>
 M:	Marc Kleine-Budde <mkl@pengutronix.de>
@@ -3678,7 +3766,7 @@ M:	Oleksij Rempel <o.rempel@pengutronix.de>
 R:	Pengutronix Kernel Team <kernel@pengutronix.de>
 L:	linux-can@vger.kernel.org
 S:	Maintained
-F:	Documentation/networking/j1939.txt
+F:	Documentation/networking/j1939.rst
 F:	net/can/j1939/
 F:	include/uapi/linux/can/j1939.h
 
@@ -3704,9 +3792,8 @@ S:	Maintained
 F:	drivers/net/wireless/ath/carl9170/
 
 CAVIUM I2C DRIVER
-M:	Jan Glauber <jglauber@cavium.com>
-M:	David Daney <david.daney@cavium.com>
-W:	http://www.cavium.com
+M:	Robert Richter <rrichter@marvell.com>
+W:	http://www.marvell.com
 S:	Supported
 F:	drivers/i2c/busses/i2c-octeon*
 F:	drivers/i2c/busses/i2c-thunderx*
@@ -3716,27 +3803,25 @@ M:	Derek Chickles <dchickles@marvell.com>
 M:	Satanand Burla <sburla@marvell.com>
 M:	Felix Manlunas <fmanlunas@marvell.com>
 L:	netdev@vger.kernel.org
-W:	http://www.cavium.com
+W:	http://www.marvell.com
 S:	Supported
 F:	drivers/net/ethernet/cavium/liquidio/
 
 CAVIUM MMC DRIVER
-M:	Jan Glauber <jglauber@cavium.com>
-M:	David Daney <david.daney@cavium.com>
-M:	Steven J. Hill <Steven.Hill@cavium.com>
-W:	http://www.cavium.com
+M:	Robert Richter <rrichter@marvell.com>
+W:	http://www.marvell.com
 S:	Supported
 F:	drivers/mmc/host/cavium*
 
 CAVIUM OCTEON-TX CRYPTO DRIVER
-M:	George Cherian <george.cherian@cavium.com>
+M:	George Cherian <gcherian@marvell.com>
 L:	linux-crypto@vger.kernel.org
-W:	http://www.cavium.com
+W:	http://www.marvell.com
 S:	Supported
 F:	drivers/crypto/cavium/cpt/
 
 CAVIUM THUNDERX2 ARM64 SOC
-M:	Robert Richter <rrichter@cavium.com>
+M:	Robert Richter <rrichter@marvell.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm64/boot/dts/cavium/thunder2-99xx*
@@ -4269,14 +4354,13 @@ F:	include/linux/cpufreq.h
 F:	include/linux/sched/cpufreq.h
 F:	tools/testing/selftests/cpufreq/
 
-CPU FREQUENCY DRIVERS - ARM BIG LITTLE
+CPU FREQUENCY DRIVERS - VEXPRESS SPC ARM BIG LITTLE
 M:	Viresh Kumar <viresh.kumar@linaro.org>
 M:	Sudeep Holla <sudeep.holla@arm.com>
 L:	linux-pm@vger.kernel.org
 W:	http://www.arm.com/products/processors/technologies/biglittleprocessing.php
 S:	Maintained
-F:	drivers/cpufreq/arm_big_little.h
-F:	drivers/cpufreq/arm_big_little.c
+F:	drivers/cpufreq/vexpress-spc-cpufreq.c
 
 CPU POWER MONITORING SUBSYSTEM
 M:	Thomas Renninger <trenn@suse.com>
@@ -4455,14 +4539,6 @@ W:	http://www.chelsio.com
 S:	Supported
 F:	drivers/scsi/cxgbi/cxgb3i
 
-CXGB3 IWARP RNIC DRIVER (IW_CXGB3)
-M:	Potnuri Bharat Teja <bharat@chelsio.com>
-L:	linux-rdma@vger.kernel.org
-W:	http://www.openfabrics.org
-S:	Supported
-F:	drivers/infiniband/hw/cxgb3/
-F:	include/uapi/rdma/cxgb3-abi.h
-
 CXGB4 CRYPTO DRIVER (chcr)
 M:	Atul Gupta <atul.gupta@chelsio.com>
 L:	linux-crypto@vger.kernel.org
@@ -4637,6 +4713,14 @@ M:	"Maciej W. Rozycki" <macro@linux-mips.org>
 S:	Maintained
 F:	drivers/net/fddi/defxx.*
 
+DEINTERLACE DRIVERS FOR ALLWINNER H3
+M:	Jernej Skrabec <jernej.skrabec@siol.net>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Maintained
+F:	drivers/media/platform/sunxi/sun8i-di/
+F:	Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml
+
 DELL SMBIOS DRIVER
 M:	Pali Rohár <pali.rohar@gmail.com>
 M:	Mario Limonciello <mario.limonciello@dell.com>
@@ -4761,9 +4845,9 @@ F:	include/linux/devcoredump.h
 DEVICE FREQUENCY (DEVFREQ)
 M:	MyungJoo Ham <myungjoo.ham@samsung.com>
 M:	Kyungmin Park <kyungmin.park@samsung.com>
-R:	Chanwoo Choi <cw00.choi@samsung.com>
+M:	Chanwoo Choi <cw00.choi@samsung.com>
 L:	linux-pm@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git
 S:	Maintained
 F:	drivers/devfreq/
 F:	include/linux/devfreq.h
@@ -4773,10 +4857,11 @@ F:	include/trace/events/devfreq.h
 DEVICE FREQUENCY EVENT (DEVFREQ-EVENT)
 M:	Chanwoo Choi <cw00.choi@samsung.com>
 L:	linux-pm@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git
 S:	Supported
 F:	drivers/devfreq/event/
 F:	drivers/devfreq/devfreq-event.c
+F:	include/dt-bindings/pmu/exynos_ppmu.h
 F:	include/linux/devfreq-event.h
 F:	Documentation/devicetree/bindings/devfreq/event/
 
@@ -4811,6 +4896,7 @@ S:	Supported
 F:	net/core/devlink.c
 F:	include/net/devlink.h
 F:	include/uapi/linux/devlink.h
+F:	Documentation/networking/devlink
 
 DIALOG SEMICONDUCTOR DRIVERS
 M:	Support Opensource <support.opensource@diasemi.com>
@@ -4878,7 +4964,6 @@ F:	include/trace/events/fs_dax.h
 DEVICE DIRECT ACCESS (DAX)
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
-M:	Keith Busch <keith.busch@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
 L:	linux-nvdimm@lists.01.org
 S:	Supported
@@ -4937,6 +5022,25 @@ F:	include/linux/dma-buf*
 F:	include/linux/reservation.h
 F:	include/linux/*fence.h
 F:	Documentation/driver-api/dma-buf.rst
+K:	dma_(buf|fence|resv)
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+
+DMA-BUF HEAPS FRAMEWORK
+M:	Sumit Semwal <sumit.semwal@linaro.org>
+R:	Andrew F. Davis <afd@ti.com>
+R:	Benjamin Gaignard <benjamin.gaignard@linaro.org>
+R:	Liam Mark <lmark@codeaurora.org>
+R:	Laura Abbott <labbott@redhat.com>
+R:	Brian Starkey <Brian.Starkey@arm.com>
+R:	John Stultz <john.stultz@linaro.org>
+S:	Maintained
+L:	linux-media@vger.kernel.org
+L:	dri-devel@lists.freedesktop.org
+L:	linaro-mm-sig@lists.linaro.org (moderated for non-subscribers)
+F:	include/uapi/linux/dma-heap.h
+F:	include/linux/dma-heap.h
+F:	drivers/dma-buf/dma-heap.c
+F:	drivers/dma-buf/heaps/*
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 
 DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
@@ -4965,6 +5069,14 @@ F:	include/linux/dma-direct.h
 F:	include/linux/dma-mapping.h
 F:	include/linux/dma-noncoherent.h
 
+DMC FREQUENCY DRIVER FOR SAMSUNG EXYNOS5422
+M:	Lukasz Luba <lukasz.luba@arm.com>
+L:	linux-pm@vger.kernel.org
+L:	linux-samsung-soc@vger.kernel.org
+S:	Maintained
+F:	drivers/memory/samsung/exynos5422-dmc.c
+F:	Documentation/devicetree/bindings/memory-controllers/exynos5422-dmc.txt
+
 DME1737 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>
 L:	linux-hwmon@vger.kernel.org
@@ -5046,10 +5158,14 @@ M:	Ioana Radulescu <ruxandra.radulescu@nxp.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/freescale/dpaa2/dpaa2-eth*
+F:	drivers/net/ethernet/freescale/dpaa2/dpaa2-mac*
 F:	drivers/net/ethernet/freescale/dpaa2/dpni*
+F:	drivers/net/ethernet/freescale/dpaa2/dpmac*
 F:	drivers/net/ethernet/freescale/dpaa2/dpkg.h
 F:	drivers/net/ethernet/freescale/dpaa2/Makefile
 F:	drivers/net/ethernet/freescale/dpaa2/Kconfig
+F:	Documentation/networking/device_drivers/freescale/dpaa2/ethernet-driver.rst
+F:	Documentation/networking/device_drivers/freescale/dpaa2/mac-phy-support.rst
 
 DPAA2 ETHERNET SWITCH DRIVER
 M:	Ioana Radulescu <ruxandra.radulescu@nxp.com>
@@ -5132,6 +5248,12 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 S:	Maintained
 F:	drivers/gpu/drm/bochs/
 
+DRM DRIVER FOR BOE HIMAX8279D PANELS
+M:	Jerry Han <hanxu5@huaqin.corp-partner.google.com>
+S:	Maintained
+F:	drivers/gpu/drm/panel/panel-boe-himax8279d.c
+F:	Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt
+
 DRM DRIVER FOR FARADAY TVE200 TV ENCODER
 M:	Linus Walleij <linus.walleij@linaro.org>
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -5287,6 +5409,12 @@ S:	Maintained
 F:	drivers/gpu/drm/tiny/st7735r.c
 F:	Documentation/devicetree/bindings/display/sitronix,st7735r.txt
 
+DRM DRIVER FOR SONY ACX424AKP PANELS
+M:	Linus Walleij <linus.walleij@linaro.org>
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Maintained
+F:	drivers/gpu/drm/panel/panel-sony-acx424akp.c
+
 DRM DRIVER FOR ST-ERICSSON MCDE
 M:	Linus Walleij <linus.walleij@linaro.org>
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -5359,7 +5487,6 @@ F:	include/linux/vga*
 DRM DRIVERS AND MISC GPU PATCHES
 M:	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
 M:	Maxime Ripard <mripard@kernel.org>
-M:	Sean Paul <sean@poorly.run>
 W:	https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -5372,12 +5499,22 @@ F:	include/linux/vga*
 
 DRM DRIVERS FOR ALLWINNER A10
 M:	Maxime Ripard <mripard@kernel.org>
+M:	Chen-Yu Tsai <wens@csie.org>
 L:	dri-devel@lists.freedesktop.org
 S:	Supported
 F:	drivers/gpu/drm/sun4i/
 F:	Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 
+DRM DRIVER FOR ALLWINNER DE2 AND DE3 ENGINE
+M:	Maxime Ripard <mripard@kernel.org>
+M:	Chen-Yu Tsai <wens@csie.org>
+R:	Jernej Skrabec <jernej.skrabec@siol.net>
+L:	dri-devel@lists.freedesktop.org
+S:	Supported
+F:	drivers/gpu/drm/sun4i/sun8i*
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+
 DRM DRIVERS FOR AMLOGIC SOCS
 M:	Neil Armstrong <narmstrong@baylibre.com>
 L:	dri-devel@lists.freedesktop.org
@@ -5833,15 +5970,14 @@ F:	drivers/edac/highbank*
 
 EDAC-CAVIUM OCTEON
 M:	Ralf Baechle <ralf@linux-mips.org>
-M:	David Daney <david.daney@cavium.com>
+M:	Robert Richter <rrichter@marvell.com>
 L:	linux-edac@vger.kernel.org
 L:	linux-mips@vger.kernel.org
 S:	Supported
 F:	drivers/edac/octeon_edac*
 
 EDAC-CAVIUM THUNDERX
-M:	David Daney <david.daney@cavium.com>
-M:	Jan Glauber <jglauber@cavium.com>
+M:	Robert Richter <rrichter@marvell.com>
 L:	linux-edac@vger.kernel.org
 S:	Supported
 F:	drivers/edac/thunderx_edac*
@@ -5971,6 +6107,7 @@ M:	Yash Shah <yash.shah@sifive.com>
 L:	linux-edac@vger.kernel.org
 S:	Supported
 F:	drivers/edac/sifive_edac.c
+F:	drivers/soc/sifive_l2_cache.c
 
 EDAC-SKYLAKE
 M:	Tony Luck <tony.luck@intel.com>
@@ -6002,14 +6139,14 @@ F:	sound/usb/misc/ua101.c
 EFI TEST DRIVER
 L:	linux-efi@vger.kernel.org
 M:	Ivan Hu <ivan.hu@canonical.com>
-M:	Ard Biesheuvel <ard.biesheuvel@linaro.org>
+M:	Ard Biesheuvel <ardb@kernel.org>
 S:	Maintained
 F:	drivers/firmware/efi/test/
 
 EFI VARIABLE FILESYSTEM
 M:	Matthew Garrett <matthew.garrett@nebula.com>
 M:	Jeremy Kerr <jk@ozlabs.org>
-M:	Ard Biesheuvel <ard.biesheuvel@linaro.org>
+M:	Ard Biesheuvel <ardb@kernel.org>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
 L:	linux-efi@vger.kernel.org
 S:	Maintained
@@ -6096,6 +6233,12 @@ M:	Maxim Levitsky <maximlevitsky@gmail.com>
 S:	Maintained
 F:	drivers/media/rc/ene_ir.*
 
+EPAPR HYPERVISOR BYTE CHANNEL DEVICE DRIVER
+M:	Laurentiu Tudor <laurentiu.tudor@nxp.com>
+L:	linuxppc-dev@lists.ozlabs.org
+S:	Maintained
+F:	drivers/tty/ehv_bytechan.c
+
 EPSON S1D13XXX FRAMEBUFFER DRIVER
 M:	Kristoffer Ericson <kristoffer.ericson@gmail.com>
 S:	Maintained
@@ -6138,15 +6281,18 @@ ETHERNET PHY LIBRARY
 M:	Andrew Lunn <andrew@lunn.ch>
 M:	Florian Fainelli <f.fainelli@gmail.com>
 M:	Heiner Kallweit <hkallweit1@gmail.com>
+R:	Russell King <linux@armlinux.org.uk>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/ABI/testing/sysfs-class-net-phydev
 F:	Documentation/devicetree/bindings/net/ethernet-phy.yaml
 F:	Documentation/devicetree/bindings/net/mdio*
+F:	Documentation/devicetree/bindings/net/qca,ar803x.yaml
 F:	Documentation/networking/phy.rst
 F:	drivers/net/phy/
 F:	drivers/of/of_mdio.c
 F:	drivers/of/of_net.c
+F:	include/dt-bindings/net/qca-ar803x.h
 F:	include/linux/*mdio*.h
 F:	include/linux/of_net.h
 F:	include/linux/phy.h
@@ -6159,6 +6305,7 @@ F:	include/uapi/linux/mii.h
 
 EXFAT FILE SYSTEM
 M:	Valdis Kletnieks <valdis.kletnieks@vt.edu>
+L:	linux-fsdevel@vger.kernel.org
 S:	Maintained
 F:	drivers/staging/exfat/
 
@@ -6188,7 +6335,7 @@ S:	Supported
 F:	security/integrity/evm/
 
 EXTENSIBLE FIRMWARE INTERFACE (EFI)
-M:	Ard Biesheuvel <ard.biesheuvel@linaro.org>
+M:	Ard Biesheuvel <ardb@kernel.org>
 L:	linux-efi@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
 S:	Maintained
@@ -6340,6 +6487,7 @@ F:	fs/*
 F:	include/linux/fs.h
 F:	include/linux/fs_types.h
 F:	include/uapi/linux/fs.h
+F:	include/uapi/linux/openat2.h
 
 FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
 M:	Riku Voipio <riku.voipio@iki.fi>
@@ -6713,6 +6861,7 @@ FSNOTIFY: FILESYSTEM NOTIFICATION INFRASTRUCTURE
 M:	Jan Kara <jack@suse.cz>
 R:	Amir Goldstein <amir73il@gmail.com>
 L:	linux-fsdevel@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs.git fsnotify
 S:	Maintained
 F:	fs/notify/
 F:	include/linux/fsnotify*.h
@@ -6882,7 +7031,7 @@ L:	linux-pm@vger.kernel.org
 S:	Supported
 F:	drivers/base/power/domain*.c
 F:	include/linux/pm_domain.h
-F:	Documentation/devicetree/bindings/power/power_domain.txt
+F:	Documentation/devicetree/bindings/power/power?domain*
 
 GENERIC RESISTIVE TOUCHSCREEN ADC DRIVER
 M:	Eugen Hristev <eugen.hristev@microchip.com>
@@ -6973,6 +7122,7 @@ L:	linux-acpi@vger.kernel.org
 S:	Maintained
 F:	Documentation/firmware-guide/acpi/gpio-properties.rst
 F:	drivers/gpio/gpiolib-acpi.c
+F:	drivers/gpio/gpiolib-acpi.h
 
 GPIO IR Transmitter
 M:	Sean Young <sean@mess.org>
@@ -7233,7 +7383,7 @@ M:	Ohad Ben-Cohen <ohad@wizery.com>
 M:	Bjorn Andersson <bjorn.andersson@linaro.org>
 L:	linux-remoteproc@vger.kernel.org
 S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/hwspinlock.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git hwspinlock-next
 F:	Documentation/devicetree/bindings/hwlock/
 F:	Documentation/hwspinlock.txt
 F:	drivers/hwspinlock/
@@ -7364,6 +7514,25 @@ F:	include/uapi/linux/if_hippi.h
 F:	net/802/hippi.c
 F:	drivers/net/hippi/
 
+HISILICON SECURITY ENGINE V2 DRIVER (SEC2)
+M:	Zaibo Xu <xuzaibo@huawei.com>
+L:	linux-crypto@vger.kernel.org
+S:	Maintained
+F:	drivers/crypto/hisilicon/sec2/sec_crypto.c
+F:	drivers/crypto/hisilicon/sec2/sec_main.c
+F:	drivers/crypto/hisilicon/sec2/sec_crypto.h
+F:	drivers/crypto/hisilicon/sec2/sec.h
+F:	Documentation/ABI/testing/debugfs-hisi-sec
+
+HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE)
+M:	Zaibo Xu <xuzaibo@huawei.com>
+L:	linux-crypto@vger.kernel.org
+S:	Maintained
+F:	drivers/crypto/hisilicon/hpre/hpre_crypto.c
+F:	drivers/crypto/hisilicon/hpre/hpre_main.c
+F:	drivers/crypto/hisilicon/hpre/hpre.h
+F:	Documentation/ABI/testing/debugfs-hisi-hpre
+
 HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3)
 M:	Yisen Zhuang <yisen.zhuang@huawei.com>
 M:	Salil Mehta <salil.mehta@huawei.com>
@@ -7372,6 +7541,11 @@ W:	http://www.hisilicon.com
 S:	Maintained
 F:	drivers/net/ethernet/hisilicon/hns3/
 
+HISILICON TRUE RANDOM NUMBER GENERATOR V2 SUPPORT
+M:	Zaibo Xu <xuzaibo@huawei.com>
+S:	Maintained
+F:	drivers/char/hw_random/hisi-trng-v2.c
+
 HISILICON LPC BUS DRIVER
 M:	john.garry@huawei.com
 W:	http://www.hisilicon.com
@@ -7410,6 +7584,12 @@ S:	Supported
 F:	drivers/scsi/hisi_sas/
 F:	Documentation/devicetree/bindings/scsi/hisilicon-sas.txt
 
+HISILICON V3XX SPI NOR FLASH Controller Driver
+M:	John Garry <john.garry@huawei.com>
+W:	http://www.hisilicon.com
+S:	Maintained
+F:	drivers/spi/spi-hisi-sfc-v3xx.c
+
 HISILICON QM AND ZIP Controller DRIVER
 M:	Zhou Wang <wangzhou1@hisilicon.com>
 L:	linux-crypto@vger.kernel.org
@@ -7417,7 +7597,6 @@ S:	Maintained
 F:	drivers/crypto/hisilicon/qm.c
 F:	drivers/crypto/hisilicon/qm.h
 F:	drivers/crypto/hisilicon/sgl.c
-F:	drivers/crypto/hisilicon/sgl.h
 F:	drivers/crypto/hisilicon/zip/
 F:	Documentation/ABI/testing/debugfs-hisi-zip
 
@@ -7443,8 +7622,8 @@ F:	drivers/platform/x86/tc1100-wmi.c
 
 HP100:	Driver for HP 10/100 Mbit/s Voice Grade Network Adapter Series
 M:	Jaroslav Kysela <perex@perex.cz>
-S:	Maintained
-F:	drivers/net/ethernet/hp/hp100.*
+S:	Obsolete
+F:	drivers/staging/hp/hp100.*
 
 HPET:	High Precision Event Timers driver
 M:	Clemens Ladisch <clemens@ladisch.de>
@@ -7545,6 +7724,13 @@ L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	arch/x86/kernel/cpu/hygon.c
 
+HYNIX HI556 SENSOR DRIVER
+M:	Shawn Tu <shawnx.tu@intel.com>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Maintained
+F:	drivers/media/i2c/hi556.c
+
 Hyper-V CORE AND DRIVERS
 M:	"K. Y. Srinivasan" <kys@microsoft.com>
 M:	Haiyang Zhang <haiyangz@microsoft.com>
@@ -7577,6 +7763,7 @@ F:	include/uapi/linux/hyperv.h
 F:	include/asm-generic/mshyperv.h
 F:	tools/hv/
 F:	Documentation/ABI/stable/sysfs-bus-vmbus
+F:	Documentation/ABI/testing/debugfs-hyperv
 
 HYPERBUS SUPPORT
 M:	Vignesh Raghavendra <vigneshr@ti.com>
@@ -7729,7 +7916,7 @@ F:	drivers/i2c/i2c-stub.c
 
 I3C SUBSYSTEM
 M:	Boris Brezillon <bbrezillon@kernel.org>
-L:	linux-i3c@lists.infradead.org
+L:	linux-i3c@lists.infradead.org (moderated for non-subscribers)
 C:	irc://chat.freenode.net/linux-i3c
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux.git
 S:	Maintained
@@ -7745,6 +7932,12 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.txt
 F:	drivers/i3c/master/dw*
 
+I3C DRIVER FOR CADENCE I3C MASTER IP
+M:	Przemysław Gaj <pgaj@cadence.com>
+S:	Maintained
+F:	Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt
+F:	drivers/i3c/master/i3c-master-cdns.c
+
 IA64 (Itanium) PLATFORM
 M:	Tony Luck <tony.luck@intel.com>
 M:	Fenghua Yu <fenghua.yu@intel.com>
@@ -8243,13 +8436,12 @@ F:	Documentation/fb/intelfb.rst
 F:	drivers/video/fbdev/intelfb/
 
 INTEL GPIO DRIVERS
-M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+M:	Andy Shevchenko <andy@kernel.org>
 L:	linux-gpio@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git
 F:	drivers/gpio/gpio-ich.c
 F:	drivers/gpio/gpio-intel-mid.c
-F:	drivers/gpio/gpio-lynxpoint.c
 F:	drivers/gpio/gpio-merrifield.c
 F:	drivers/gpio/gpio-ml-ioh.c
 F:	drivers/gpio/gpio-pch.c
@@ -8280,6 +8472,14 @@ Q:	https://patchwork.kernel.org/project/linux-dmaengine/list/
 S:	Supported
 F:	drivers/dma/ioat*
 
+INTEL IADX DRIVER
+M:	Dave Jiang <dave.jiang@intel.com>
+L:	dmaengine@vger.kernel.org
+S:	Supported
+F:	drivers/dma/idxd/*
+F:	include/uapi/linux/idxd.h
+F:	include/linux/idxd.h
+
 INTEL IDLE DRIVER
 M:	Jacob Pan <jacob.jun.pan@linux.intel.com>
 M:	Len Brown <lenb@kernel.org>
@@ -8329,6 +8529,7 @@ S:	Maintained
 F:	drivers/staging/media/ipu3/
 F:	Documentation/media/uapi/v4l/pixfmt-meta-intel-ipu3.rst
 F:	Documentation/media/v4l-drivers/ipu3.rst
+F:	Documentation/media/v4l-drivers/ipu3_rcb.svg
 
 INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT
 M:	Krzysztof Halasa <khalasa@piap.pl>
@@ -8396,7 +8597,7 @@ F:	arch/x86/include/asm/intel_pmc_ipc.h
 F:	arch/x86/include/asm/intel_punit_ipc.h
 
 INTEL PMIC GPIO DRIVERS
-M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+M:	Andy Shevchenko <andy@kernel.org>
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git
 F:	drivers/gpio/gpio-*cove.c
@@ -8460,6 +8661,12 @@ S:	Maintained
 F:	arch/x86/include/asm/intel_telemetry.h
 F:	drivers/platform/x86/intel_telemetry*
 
+INTEL UNCORE FREQUENCY CONTROL
+M:	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Maintained
+F:	drivers/platform/x86/intel-uncore-frequency.c
+
 INTEL VIRTUAL BUTTON DRIVER
 M:	AceLan Kao <acelan.kao@canonical.com>
 L:	platform-driver-x86@vger.kernel.org
@@ -8467,7 +8674,7 @@ S:	Maintained
 F:	drivers/platform/x86/intel-vbtn.c
 
 INTEL WIRELESS 3945ABG/BG, 4965AGN (iwlegacy)
-M:	Stanislaw Gruszka <sgruszka@redhat.com>
+M:	Stanislaw Gruszka <stf_xl@wp.pl>
 L:	linux-wireless@vger.kernel.org
 S:	Supported
 F:	drivers/net/wireless/intel/iwlegacy/
@@ -8564,12 +8771,13 @@ F:	include/linux/iova.h
 
 IO_URING
 M:	Jens Axboe <axboe@kernel.dk>
-L:	linux-block@vger.kernel.org
-L:	linux-fsdevel@vger.kernel.org
+L:	io-uring@vger.kernel.org
 T:	git git://git.kernel.dk/linux-block
 T:	git git://git.kernel.dk/liburing
 S:	Maintained
 F:	fs/io_uring.c
+F:	fs/io-wq.c
+F:	fs/io-wq.h
 F:	include/uapi/linux/io_uring.h
 
 IPMI SUBSYSTEM
@@ -8667,6 +8875,7 @@ ISCSI
 M:	Lee Duncan <lduncan@suse.com>
 M:	Chris Leech <cleech@redhat.com>
 L:	open-iscsi@googlegroups.com
+L:	linux-scsi@vger.kernel.org
 W:	www.open-iscsi.com
 S:	Maintained
 F:	drivers/scsi/*iscsi*
@@ -8706,7 +8915,7 @@ S:	Maintained
 F:	drivers/isdn/mISDN
 F:	drivers/isdn/hardware
 
-ISDN/CAPI SUBSYSTEM
+ISDN/CMTP OVER BLUETOOTH
 M:	Karsten Keil <isdn@linux-pingi.de>
 L:	isdn4linux@listserv.isdn4linux.de (subscribers-only)
 L:	netdev@vger.kernel.org
@@ -8714,7 +8923,6 @@ W:	http://www.isdn4linux.de
 S:	Odd Fixes
 F:	Documentation/isdn/
 F:	drivers/isdn/capi/
-F:	drivers/staging/isdn/
 F:	net/bluetooth/cmtp/
 F:	include/linux/isdn/
 F:	include/uapi/linux/isdn/
@@ -8842,7 +9050,7 @@ F:	mm/kasan/
 F:	scripts/Makefile.kasan
 
 KCONFIG
-M:	Masahiro Yamada <yamada.masahiro@socionext.com>
+M:	Masahiro Yamada <masahiroy@kernel.org>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig
 L:	linux-kbuild@vger.kernel.org
 S:	Maintained
@@ -8874,7 +9082,7 @@ S:	Maintained
 F:	fs/autofs/
 
 KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
-M:	Masahiro Yamada <yamada.masahiro@socionext.com>
+M:	Masahiro Yamada <masahiroy@kernel.org>
 M:	Michal Marek <michal.lkml@markovi.net>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
 L:	linux-kbuild@vger.kernel.org
@@ -8920,6 +9128,17 @@ S:	Maintained
 F:	tools/testing/selftests/
 F:	Documentation/dev-tools/kselftest*
 
+KERNEL UNIT TESTING FRAMEWORK (KUnit)
+M:	Brendan Higgins <brendanhiggins@google.com>
+L:	linux-kselftest@vger.kernel.org
+L:	kunit-dev@googlegroups.com
+W:	https://google.github.io/kunit-docs/third_party/kernel/docs/
+S:	Maintained
+F:	Documentation/dev-tools/kunit/
+F:	include/kunit/
+F:	lib/kunit/
+F:	tools/testing/kunit/
+
 KERNEL USERMODE HELPER
 M:	Luis Chamberlain <mcgrof@kernel.org>
 L:	linux-kernel@vger.kernel.org
@@ -8929,7 +9148,6 @@ F:	include/linux/umh.h
 
 KERNEL VIRTUAL MACHINE (KVM)
 M:	Paolo Bonzini <pbonzini@redhat.com>
-M:	Radim Krčmář <rkrcmar@redhat.com>
 L:	kvm@vger.kernel.org
 W:	http://www.linux-kvm.org
 T:	git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
@@ -8964,9 +9182,9 @@ F:	virt/kvm/arm/
 F:	include/kvm/arm_*
 
 KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
-M:	James Hogan <jhogan@kernel.org>
 L:	linux-mips@vger.kernel.org
-S:	Supported
+L:	kvm@vger.kernel.org
+S:	Orphan
 F:	arch/mips/include/uapi/asm/kvm*
 F:	arch/mips/include/asm/kvm*
 F:	arch/mips/kvm/
@@ -9001,7 +9219,6 @@ F:	tools/testing/selftests/kvm/*/s390x/
 
 KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
 M:	Paolo Bonzini <pbonzini@redhat.com>
-M:	Radim Krčmář <rkrcmar@redhat.com>
 R:	Sean Christopherson <sean.j.christopherson@intel.com>
 R:	Vitaly Kuznetsov <vkuznets@redhat.com>
 R:	Wanpeng Li <wanpengli@tencent.com>
@@ -9019,7 +9236,7 @@ F:	arch/x86/include/uapi/asm/svm.h
 F:	arch/x86/include/asm/kvm*
 F:	arch/x86/include/asm/pvclock-abi.h
 F:	arch/x86/include/asm/svm.h
-F:	arch/x86/include/asm/vmx.h
+F:	arch/x86/include/asm/vmx*.h
 F:	arch/x86/kernel/kvm.c
 F:	arch/x86/kernel/kvmclock.c
 
@@ -9303,6 +9520,7 @@ M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
 L:	linux-nvdimm@lists.01.org
+P:	Documentation/nvdimm/maintainer-entry-profile.rst
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 F:	drivers/nvdimm/blk.c
@@ -9313,6 +9531,7 @@ M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
 L:	linux-nvdimm@lists.01.org
+P:	Documentation/nvdimm/maintainer-entry-profile.rst
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 F:	drivers/nvdimm/btt*
@@ -9322,6 +9541,7 @@ M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
 L:	linux-nvdimm@lists.01.org
+P:	Documentation/nvdimm/maintainer-entry-profile.rst
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 F:	drivers/nvdimm/pmem*
@@ -9338,9 +9558,9 @@ LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
-M:	Keith Busch <keith.busch@intel.com>
 M:	Ira Weiny <ira.weiny@intel.com>
 L:	linux-nvdimm@lists.01.org
+P:	Documentation/nvdimm/maintainer-entry-profile.rst
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git
 S:	Supported
@@ -9465,6 +9685,7 @@ LINUX KERNEL DUMP TEST MODULE (LKDTM)
 M:	Kees Cook <keescook@chromium.org>
 S:	Maintained
 F:	drivers/misc/lkdtm/*
+F:	tools/testing/selftests/lkdtm/*
 
 LINUX KERNEL MEMORY CONSISTENCY MODEL (LKMM)
 M:	Alan Stern <stern@rowland.harvard.edu>
@@ -9497,6 +9718,13 @@ F:	Documentation/misc-devices/lis3lv02d.rst
 F:	drivers/misc/lis3lv02d/
 F:	drivers/platform/x86/hp_accel.c
 
+LIST KUNIT TEST
+M:	David Gow <davidgow@google.com>
+L:	linux-kselftest@vger.kernel.org
+L:	kunit-dev@googlegroups.com
+S:	Maintained
+F:	lib/list-test.c
+
 LIVE PATCHING
 M:	Josh Poimboeuf <jpoimboe@redhat.com>
 M:	Jiri Kosina <jikos@kernel.org>
@@ -9601,6 +9829,13 @@ S:	Maintained
 F:	Documentation/admin-guide/ldm.rst
 F:	block/partitions/ldm.*
 
+LOGITECH HID GAMING KEYBOARDS
+M:	Hans de Goede <hdegoede@redhat.com>
+L:	linux-input@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
+S:	Maintained
+F:	drivers/hid/hid-lg-g15.c
+
 LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
 M:	Sathya Prakash <sathya.prakash@broadcom.com>
 M:	Chaitra P B <chaitra.basappa@broadcom.com>
@@ -9622,9 +9857,17 @@ LTC1660 DAC DRIVER
 M:	Marcus Folkesson <marcus.folkesson@gmail.com>
 L:	linux-iio@vger.kernel.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/iio/dac/ltc1660.txt
+F:	Documentation/devicetree/bindings/iio/dac/lltc,ltc1660.yaml
 F:	drivers/iio/dac/ltc1660.c
 
+LTC2983 IIO TEMPERATURE DRIVER
+M:	Nuno Sá <nuno.sa@analog.com>
+W:	http://ez.analog.com/community/linux-device-drivers
+L:	linux-iio@vger.kernel.org
+S:	Supported
+F:	drivers/iio/temperature/ltc2983.c
+F:	Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+
 LTC4261 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
 L:	linux-hwmon@vger.kernel.org
@@ -9632,6 +9875,17 @@ S:	Maintained
 F:	Documentation/hwmon/ltc4261.rst
 F:	drivers/hwmon/ltc4261.c
 
+LTC2947 HARDWARE MONITOR DRIVER
+M:	Nuno Sá <nuno.sa@analog.com>
+W:	http://ez.analog.com/community/linux-device-drivers
+L:	linux-hwmon@vger.kernel.org
+S:	Supported
+F:	drivers/hwmon/ltc2947-core.c
+F:	drivers/hwmon/ltc2947-spi.c
+F:	drivers/hwmon/ltc2947-i2c.c
+F:	drivers/hwmon/ltc2947.h
+F:	Documentation/devicetree/bindings/hwmon/adi,ltc2947.yaml
+
 LTC4306 I2C MULTIPLEXER DRIVER
 M:	Michael Hennerich <michael.hennerich@analog.com>
 W:	http://ez.analog.com/community/linux-device-drivers
@@ -9740,6 +9994,7 @@ S:	Maintained
 F:	drivers/net/dsa/mv88e6xxx/
 F:	include/linux/platform_data/mv88e6xxx.h
 F:	Documentation/devicetree/bindings/net/dsa/marvell.txt
+F:	Documentation/networking/devlink/mv88e6xxx.rst
 
 MARVELL ARMADA DRM SUPPORT
 M:	Russell King <linux@armlinux.org.uk>
@@ -9809,8 +10064,7 @@ F:	drivers/net/ethernet/marvell/mvneta.*
 
 MARVELL MWIFIEX WIRELESS DRIVER
 M:	Amitkumar Karwar <amitkarwar@gmail.com>
-M:	Nishant Sarmukadam <nishants@marvell.com>
-M:	Ganapathi Bhat <gbhat@marvell.com>
+M:	Ganapathi Bhat <ganapathi.bhat@nxp.com>
 M:	Xinming Hu <huxinming820@gmail.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
@@ -9849,6 +10103,16 @@ M:	Jerin Jacob <jerinj@marvell.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/marvell/octeontx2/af/
+F:	Documentation/networking/device_drivers/marvell/octeontx2.rst
+
+MARVELL OCTEONTX2 PHYSICAL FUNCTION DRIVER
+M:	Sunil Goutham <sgoutham@marvell.com>
+M:	Geetha sowjanya <gakula@marvell.com>
+M:	Subbaraya Sundeep <sbhatta@marvell.com>
+M:	hariprasad <hkelam@marvell.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/ethernet/marvell/octeontx2/nic/
 
 MATROX FRAMEBUFFER DRIVER
 L:	linux-fbdev@vger.kernel.org
@@ -9864,7 +10128,7 @@ F:	Documentation/hwmon/max16065.rst
 F:	drivers/hwmon/max16065.c
 
 MAX2175 SDR TUNER DRIVER
-M:	Ramesh Shanmugasundaram <ramesh.shanmugasundaram@bp.renesas.com>
+M:	Ramesh Shanmugasundaram <rashanmu@gmail.com>
 L:	linux-media@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 S:	Maintained
@@ -9906,8 +10170,8 @@ MAXIM MAX77650 PMIC MFD DRIVER
 M:	Bartosz Golaszewski <bgolaszewski@baylibre.com>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/*/*max77650.txt
-F:	Documentation/devicetree/bindings/*/max77650*.txt
+F:	Documentation/devicetree/bindings/*/*max77650.yaml
+F:	Documentation/devicetree/bindings/*/max77650*.yaml
 F:	include/linux/mfd/max77650.h
 F:	drivers/mfd/max77650.c
 F:	drivers/regulator/max77650-regulator.c
@@ -9961,6 +10225,16 @@ W:	https://linuxtv.org
 S:	Maintained
 F:	drivers/media/radio/radio-maxiradio*
 
+MCAN MMIO DEVICE DRIVER
+M:	Dan Murphy <dmurphy@ti.com>
+M:	Sriram Dash <sriram.dash@samsung.com>
+L:	linux-can@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/net/can/m_can.txt
+F:	drivers/net/can/m_can/m_can.c
+F:	drivers/net/can/m_can/m_can.h
+F:	drivers/net/can/m_can/m_can_platform.c
+
 MCP4018 AND MCP4531 MICROCHIP DIGITAL POTENTIOMETER DRIVERS
 M:	Peter Rosin <peda@axentia.se>
 L:	linux-iio@vger.kernel.org
@@ -10121,12 +10395,12 @@ L:	linux-media@vger.kernel.org
 L:	linux-renesas-soc@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 S:	Supported
-F:	Documentation/devicetree/bindings/media/renesas,ceu.txt
+F:	Documentation/devicetree/bindings/media/renesas,ceu.yaml
 F:	drivers/media/platform/renesas-ceu.c
 F:	include/media/drv-intf/renesas-ceu.h
 
 MEDIA DRIVERS FOR RENESAS - DRIF
-M:	Ramesh Shanmugasundaram <ramesh.shanmugasundaram@bp.renesas.com>
+M:	Ramesh Shanmugasundaram <rashanmu@gmail.com>
 L:	linux-media@vger.kernel.org
 L:	linux-renesas-soc@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
@@ -10159,7 +10433,7 @@ L:	linux-media@vger.kernel.org
 L:	linux-renesas-soc@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 S:	Supported
-F:	Documentation/devicetree/bindings/media/renesas,csi2.txt
+F:	Documentation/devicetree/bindings/media/renesas,csi2.yaml
 F:	Documentation/devicetree/bindings/media/renesas,vin.txt
 F:	drivers/media/platform/rcar-vin/
 
@@ -10206,7 +10480,6 @@ F:	drivers/staging/media/tegra-vde/
 
 MEDIA INPUT INFRASTRUCTURE (V4L/DVB)
 M:	Mauro Carvalho Chehab <mchehab@kernel.org>
-P:	LinuxTV.org Project
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 Q:	http://patchwork.kernel.org/project/linux-media/list/
@@ -10270,6 +10543,13 @@ S:	Maintained
 F:	drivers/net/dsa/mt7530.*
 F:	net/dsa/tag_mtk.c
 
+MEDIATEK BOARD LEVEL SHUTDOWN DRIVERS
+M:	Sean Wang <sean.wang@mediatek.com>
+L:	linux-pm@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/power/reset/mt6323-poweroff.txt
+F:	drivers/power/reset/mt6323-poweroff.c
+
 MEDIATEK JPEG DRIVER
 M:	Rick Chang <rick.chang@mediatek.com>
 M:	Bin Liu <bin.liu@mediatek.com>
@@ -10435,6 +10715,7 @@ M:	Darren Hart <dvhart@infradead.org>
 M:	Vadim Pasternak <vadimp@mellanox.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Supported
+F:	Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
 F:	drivers/platform/mellanox/
 F:	include/linux/platform_data/mlxreg.h
 
@@ -10536,15 +10817,13 @@ F:	include/linux/vmalloc.h
 F:	mm/
 
 MEMORY TECHNOLOGY DEVICES (MTD)
-M:	David Woodhouse <dwmw2@infradead.org>
-M:	Brian Norris <computersforpeace@gmail.com>
-M:	Marek Vasut <marek.vasut@gmail.com>
 M:	Miquel Raynal <miquel.raynal@bootlin.com>
 M:	Richard Weinberger <richard@nod.at>
 M:	Vignesh Raghavendra <vigneshr@ti.com>
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
 Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
+C:	irc://irc.oftc.net/mtd
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd/fixes
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd/next
 S:	Maintained
@@ -10589,7 +10868,7 @@ W:	http://linux-meson.com/
 S:	Supported
 F:	drivers/media/platform/meson/ao-cec.c
 F:	drivers/media/platform/meson/ao-cec-g12a.c
-F:	Documentation/devicetree/bindings/media/meson-ao-cec.txt
+F:	Documentation/devicetree/bindings/media/amlogic,meson-gx-ao-cec.yaml
 T:	git git://linuxtv.org/media_tree.git
 
 MESON NAND CONTROLLER DRIVER FOR AMLOGIC SOCS
@@ -10730,7 +11009,7 @@ M:	Kent Gustavsson <kent@minoris.se>
 L:	linux-iio@vger.kernel.org
 S:	Supported
 F:	drivers/iio/adc/mcp3911.c
-F:	Documentation/devicetree/bindings/iio/adc/mcp3911.txt
+F:	Documentation/devicetree/bindings/iio/adc/microchip,mcp3911.yaml
 
 MICROCHIP NAND DRIVER
 M:	Tudor Ambarus <tudor.ambarus@microchip.com>
@@ -10821,6 +11100,7 @@ M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/mscc/
+F:	include/soc/mscc/ocelot*
 
 MICROSOFT SURFACE PRO 3 BUTTON DRIVER
 M:	Chen Yu <yu.c.chen@intel.com>
@@ -10875,18 +11155,18 @@ F:	arch/mips/include/asm/mach-loongson32/
 F:	drivers/*/*loongson1*
 F:	drivers/*/*/*loongson1*
 
-MIPS/LOONGSON2 ARCHITECTURE
+MIPS/LOONGSON2EF ARCHITECTURE
 M:	Jiaxun Yang <jiaxun.yang@flygoat.com>
 L:	linux-mips@vger.kernel.org
 S:	Maintained
-F:	arch/mips/loongson64/fuloong-2e/
-F:	arch/mips/loongson64/lemote-2f/
-F:	arch/mips/include/asm/mach-loongson64/
+F:	arch/mips/loongson2ef/
+F:	arch/mips/include/asm/mach-loongson2ef/
 F:	drivers/*/*loongson2*
 F:	drivers/*/*/*loongson2*
 
-MIPS/LOONGSON3 ARCHITECTURE
+MIPS/LOONGSON64 ARCHITECTURE
 M:	Huacai Chen <chenhc@lemote.com>
+M:	Jiaxun Yang <jiaxun.yang@flygoat.com>
 L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	arch/mips/loongson64/
@@ -10913,9 +11193,18 @@ F:	drivers/media/radio/radio-miropcm20*
 MMP SUPPORT
 R:	Lubomir Rintel <lkundrak@v3.sk>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lkundrak/linux-mmp.git
 S:	Odd Fixes
 F:	arch/arm/boot/dts/mmp*
 F:	arch/arm/mach-mmp/
+F:	linux/soc/mmp/
+
+MMP USB PHY DRIVERS
+R:	Lubomir Rintel <lkundrak@v3.sk>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/phy/marvell/phy-mmp3-usb.c
+F:	drivers/phy/marvell/phy-pxa-usb.c
 
 MMU GATHER AND TLB INVALIDATION
 M:	Will Deacon <will@kernel.org>
@@ -10968,6 +11257,13 @@ S:	Maintained
 F:	Documentation/driver-api/serial/moxa-smartio.rst
 F:	drivers/tty/mxser.*
 
+MONOLITHIC POWER SYSTEM PMIC DRIVER
+M:	Saravanan Sekar <sravanhome@gmail.com>
+S:	Maintained
+F:	Documentation/devicetree/bindings/regulator/mpq7920.yaml
+F:	drivers/regulator/mpq7920.c
+F:	drivers/regulator/mpq7920.h
+
 MR800 AVERMEDIA USB FM RADIO DRIVER
 M:	Alexey Klimov <klimov.linux@gmail.com>
 L:	linux-media@vger.kernel.org
@@ -11211,7 +11507,7 @@ F:	Documentation/networking/net_failover.rst
 
 NETEM NETWORK EMULATOR
 M:	Stephen Hemminger <stephen@networkplumber.org>
-L:	netem@lists.linux-foundation.org (moderated for non-subscribers)
+L:	netdev@vger.kernel.org
 S:	Maintained
 F:	net/sched/sch_netem.c
 
@@ -11256,7 +11552,7 @@ F:	include/uapi/linux/netrom.h
 F:	net/netrom/
 
 NETRONOME ETHERNET DRIVERS
-M:	Jakub Kicinski <jakub.kicinski@netronome.com>
+M:	Jakub Kicinski <kuba@kernel.org>
 L:	oss-drivers@netronome.com
 S:	Maintained
 F:	drivers/net/ethernet/netronome/
@@ -11285,8 +11581,8 @@ M:	"David S. Miller" <davem@davemloft.net>
 L:	netdev@vger.kernel.org
 W:	http://www.linuxfoundation.org/en/Net
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
 S:	Odd Fixes
 F:	Documentation/devicetree/bindings/net/
 F:	drivers/net/
@@ -11324,11 +11620,12 @@ F:	drivers/net/dsa/
 
 NETWORKING [GENERAL]
 M:	"David S. Miller" <davem@davemloft.net>
+M:	Jakub Kicinski <kuba@kernel.org>
 L:	netdev@vger.kernel.org
 W:	http://www.linuxfoundation.org/en/Net
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
 B:	mailto:netdev@vger.kernel.org
 S:	Maintained
 F:	net/
@@ -11373,7 +11670,7 @@ M:	"David S. Miller" <davem@davemloft.net>
 M:	Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
 M:	Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
 L:	netdev@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
 S:	Maintained
 F:	net/ipv4/
 F:	net/ipv6/
@@ -11398,6 +11695,18 @@ F:	net/ipv6/calipso.c
 F:	net/netfilter/xt_CONNSECMARK.c
 F:	net/netfilter/xt_SECMARK.c
 
+NETWORKING [MPTCP]
+M:	Mat Martineau <mathew.j.martineau@linux.intel.com>
+M:	Matthieu Baerts <matthieu.baerts@tessares.net>
+L:	netdev@vger.kernel.org
+L:	mptcp@lists.01.org
+W:	https://github.com/multipath-tcp/mptcp_net-next/wiki
+B:	https://github.com/multipath-tcp/mptcp_net-next/issues
+S:	Maintained
+F:	include/net/mptcp.h
+F:	net/mptcp/
+F:	tools/testing/selftests/net/mptcp/
+
 NETWORKING [TCP]
 M:	Eric Dumazet <edumazet@google.com>
 L:	netdev@vger.kernel.org
@@ -11416,7 +11725,7 @@ M:	Boris Pismenny <borisp@mellanox.com>
 M:	Aviad Yehezkel <aviadye@mellanox.com>
 M:	John Fastabend <john.fastabend@gmail.com>
 M:	Daniel Borkmann <daniel@iogearbox.net>
-M:	Jakub Kicinski <jakub.kicinski@netronome.com>
+M:	Jakub Kicinski <kuba@kernel.org>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	net/tls/*
@@ -11428,7 +11737,7 @@ L:	linux-wireless@vger.kernel.org
 Q:	http://patchwork.kernel.org/project/linux-wireless/list/
 
 NETDEVSIM
-M:	Jakub Kicinski <jakub.kicinski@netronome.com>
+M:	Jakub Kicinski <kuba@kernel.org>
 S:	Maintained
 F:	drivers/net/netdevsim/*
 
@@ -11505,7 +11814,7 @@ F:	Documentation/scsi/NinjaSCSI.txt
 F:	drivers/scsi/nsp32*
 
 NIOS2 ARCHITECTURE
-M:	Ley Foon Tan <lftan@altera.com>
+M:	Ley Foon Tan <ley.foon.tan@intel.com>
 L:	nios2-dev@lists.rocketboards.org (moderated for non-subscribers)
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lftan/nios2.git
 S:	Maintained
@@ -11637,6 +11946,7 @@ F:	drivers/nvme/target/fcloop.c
 NVM EXPRESS TARGET DRIVER
 M:	Christoph Hellwig <hch@lst.de>
 M:	Sagi Grimberg <sagi@grimberg.me>
+M:	Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
 L:	linux-nvme@lists.infradead.org
 T:	git://git.infradead.org/nvme.git
 W:	http://git.infradead.org/nvme.git
@@ -11761,6 +12071,8 @@ F:	arch/arm/boot/dts/*am3*
 F:	arch/arm/boot/dts/*am4*
 F:	arch/arm/boot/dts/*am5*
 F:	arch/arm/boot/dts/*dra7*
+F:	arch/arm/boot/dts/logicpd-som-lv*
+F:	arch/arm/boot/dts/logicpd-torpedo*
 
 OMAP DISPLAY SUBSYSTEM and FRAMEBUFFER SUPPORT (DSS2)
 L:	linux-omap@vger.kernel.org
@@ -12220,7 +12532,7 @@ L:	linux-unionfs@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git
 S:	Supported
 F:	fs/overlayfs/
-F:	Documentation/filesystems/overlayfs.txt
+F:	Documentation/filesystems/overlayfs.rst
 
 P54 WIRELESS DRIVER
 M:	Christian Lamparter <chunkeey@googlemail.com>
@@ -12253,7 +12565,7 @@ L:	linux-crypto@vger.kernel.org
 S:	Maintained
 F:	kernel/padata.c
 F:	include/linux/padata.h
-F:	Documentation/padata.txt
+F:	Documentation/core-api/padata.rst
 
 PAGE POOL
 M:	Jesper Dangaard Brouer <hawk@kernel.org>
@@ -12269,6 +12581,13 @@ L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	drivers/platform/x86/panasonic-laptop.c
 
+PARALLAX PING IIO SENSOR DRIVER
+M:	Andreas Klinger <ak@it-klinger.de>
+L:	linux-iio@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/iio/proximity/parallax-ping.yaml
+F:	drivers/iio/proximity/ping.c
+
 PARALLEL LCD/KEYPAD PANEL DRIVER
 M:	Willy Tarreau <willy@haproxy.com>
 M:	Ksenija Stanojevic <ksenija.stanojevic@gmail.com>
@@ -12386,7 +12705,7 @@ F:	Documentation/devicetree/bindings/pci/aardvark-pci.txt
 F:	drivers/pci/controller/pci-aardvark.c
 
 PCI DRIVER FOR ALTERA PCIE IP
-M:	Ley Foon Tan <lftan@altera.com>
+M:	Ley Foon Tan <ley.foon.tan@intel.com>
 L:	rfi@lists.rocketboards.org (moderated for non-subscribers)
 L:	linux-pci@vger.kernel.org
 S:	Supported
@@ -12406,7 +12725,7 @@ M:	Rob Herring <robh@kernel.org>
 L:	linux-pci@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/pci/versatile.txt
+F:	Documentation/devicetree/bindings/pci/versatile.yaml
 F:	drivers/pci/controller/pci-versatile.c
 
 PCI DRIVER FOR ARMADA 8K
@@ -12439,7 +12758,7 @@ M:	Will Deacon <will@kernel.org>
 L:	linux-pci@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-F:	Documentation/devicetree/bindings/pci/host-generic-pci.txt
+F:	Documentation/devicetree/bindings/pci/host-generic-pci.yaml
 F:	drivers/pci/controller/pci-host-common.c
 F:	drivers/pci/controller/pci-host-generic.c
 
@@ -12453,7 +12772,6 @@ F:	Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
 F:	drivers/pci/controller/dwc/*imx6*
 
 PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD)
-M:	Keith Busch <keith.busch@intel.com>
 M:	Jonathan Derrick <jonathan.derrick@intel.com>
 L:	linux-pci@vger.kernel.org
 S:	Supported
@@ -12496,7 +12814,8 @@ F:	Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
 F:	drivers/pci/controller/pci-tegra.c
 
 PCI DRIVER FOR RENESAS R-CAR
-M:	Simon Horman <horms@verge.net.au>
+M:	Marek Vasut <marek.vasut+renesas@gmail.com>
+M:	Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
 L:	linux-pci@vger.kernel.org
 L:	linux-renesas-soc@vger.kernel.org
 S:	Maintained
@@ -12565,7 +12884,7 @@ S:	Supported
 F:	Documentation/PCI/pci-error-recovery.rst
 
 PCI MSI DRIVER FOR ALTERA MSI IP
-M:	Ley Foon Tan <lftan@altera.com>
+M:	Ley Foon Tan <ley.foon.tan@intel.com>
 L:	rfi@lists.rocketboards.org (moderated for non-subscribers)
 L:	linux-pci@vger.kernel.org
 S:	Supported
@@ -12631,7 +12950,7 @@ F:	Documentation/devicetree/bindings/pci/axis,artpec*
 F:	drivers/pci/controller/dwc/*artpec*
 
 PCIE DRIVER FOR CAVIUM THUNDERX
-M:	David Daney <david.daney@cavium.com>
+M:	Robert Richter <rrichter@marvell.com>
 L:	linux-pci@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
@@ -12778,6 +13097,13 @@ F:	arch/*/events/*
 F:	arch/*/events/*/*
 F:	tools/perf/
 
+PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS
+R:	John Garry <john.garry@huawei.com>
+R:	Will Deacon <will@kernel.org>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Supported
+F:	tools/perf/pmu-events/arch/arm64/
+
 PERSONALITY HANDLING
 M:	Christoph Hellwig <hch@infradead.org>
 L:	linux-abi-devel@lists.sourceforge.net
@@ -12835,6 +13161,7 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git
 F:	samples/pidfd/
 F:	tools/testing/selftests/pidfd/
+F:	tools/testing/selftests/clone3/
 K:	(?i)pidfd
 K:	(?i)clone3
 K:	\b(clone_args|kernel_clone_args)\b
@@ -12870,7 +13197,7 @@ F:	Documentation/devicetree/bindings/pinctrl/fsl,*
 
 PIN CONTROLLER - INTEL
 M:	Mika Westerberg <mika.westerberg@linux.intel.com>
-M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+M:	Andy Shevchenko <andy@kernel.org>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git
 S:	Maintained
 F:	drivers/pinctrl/intel/
@@ -12958,6 +13285,11 @@ S:	Maintained
 F:	drivers/iio/chemical/pms7003.c
 F:	Documentation/devicetree/bindings/iio/chemical/plantower,pms7003.yaml
 
+PLX DMA DRIVER
+M:	Logan Gunthorpe <logang@deltatee.com>
+S:	Maintained
+F:	drivers/dma/plx_dma.c
+
 PMBUS HARDWARE MONITORING DRIVERS
 M:	Guenter Roeck <linux@roeck-us.net>
 L:	linux-hwmon@vger.kernel.org
@@ -13000,6 +13332,15 @@ L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/pm8001/
 
+PM-GRAPH UTILITY
+M:	"Todd E Brandt" <todd.e.brandt@linux.intel.com>
+L:	linux-pm@vger.kernel.org
+W:	https://01.org/pm-graph
+B:	https://bugzilla.kernel.org/buglist.cgi?component=pm-graph&product=Tools
+T:	git git://github.com/intel/pm-graph
+S:	Supported
+F:	tools/power/pm-graph
+
 PNP SUPPORT
 M:	"Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
 S:	Maintained
@@ -13019,6 +13360,8 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
 S:	Maintained
 F:	fs/timerfd.c
 F:	include/linux/timer*
+F:	include/linux/time_namespace.h
+F:	kernel/time_namespace.c
 F:	kernel/time/*timer*
 
 POWER MANAGEMENT CORE
@@ -13132,12 +13475,14 @@ F:	Documentation/filesystems/proc.txt
 PROC SYSCTL
 M:	Luis Chamberlain <mcgrof@kernel.org>
 M:	Kees Cook <keescook@chromium.org>
+M:	Iurii Zaikin <yzaikin@google.com>
 L:	linux-kernel@vger.kernel.org
 L:	linux-fsdevel@vger.kernel.org
 S:	Maintained
 F:	fs/proc/proc_sysctl.c
 F:	include/linux/sysctl.h
 F:	kernel/sysctl.c
+F:	kernel/sysctl-test.c
 F:	tools/testing/selftests/sysctl/
 
 PS3 NETWORK SUPPORT
@@ -13452,6 +13797,13 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
 S:	Supported
 F:	drivers/net/wireless/ath/ath10k/
 
+QUALCOMM ATHEROS ATH11K WIRELESS DRIVER
+M:	Kalle Valo <kvalo@codeaurora.org>
+L:	ath11k@lists.infradead.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
+S:	Supported
+F:	drivers/net/wireless/ath/ath11k/
+
 QUALCOMM ATHEROS ATH9K WIRELESS DRIVER
 M:	QCA ath9k Development <ath9k-devel@qca.qualcomm.com>
 L:	linux-wireless@vger.kernel.org
@@ -13474,6 +13826,14 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt
 F:	drivers/cpufreq/qcom-cpufreq-nvmem.c
 
+QUALCOMM CORE POWER REDUCTION (CPR) AVS DRIVER
+M:	Niklas Cassel <nks@flawful.org>
+L:	linux-pm@vger.kernel.org
+L:	linux-arm-msm@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/power/avs/qcom,cpr.txt
+F:	drivers/power/avs/qcom-cpr.c
+
 QUALCOMM EMAC GIGABIT ETHERNET DRIVER
 M:	Timur Tabi <timur@kernel.org>
 L:	netdev@vger.kernel.org
@@ -13482,7 +13842,6 @@ F:	drivers/net/ethernet/qualcomm/emac/
 
 QUALCOMM ETHQOS ETHERNET DRIVER
 M:	Vinod Koul <vkoul@kernel.org>
-M:	Niklas Cassel <niklas.cassel@linaro.org>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -13516,12 +13875,22 @@ L:	linux-arm-msm@vger.kernel.org
 S:	Maintained
 F:	drivers/iommu/qcom_iommu.c
 
+QUALCOMM RMNET DRIVER
+M:	Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+M:	Sean Tranchetti <stranche@codeaurora.org>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/qualcomm/rmnet/
+F:	Documentation/networking/device_drivers/qualcomm/rmnet.txt
+F:	include/linux/if_rmnet.h
+
 QUALCOMM TSENS THERMAL DRIVER
 M:	Amit Kucheria <amit.kucheria@linaro.org>
 L:	linux-pm@vger.kernel.org
 L:	linux-arm-msm@vger.kernel.org
 S:	Maintained
 F:	drivers/thermal/qcom/
+F:	Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
 
 QUALCOMM VENUS VIDEO ACCELERATOR DRIVER
 M:	Stanimir Varbanov <stanimir.varbanov@linaro.org>
@@ -13584,7 +13953,7 @@ F:	drivers/media/radio/radio-tea5777.c
 RADOS BLOCK DEVICE (RBD)
 M:	Ilya Dryomov <idryomov@gmail.com>
 M:	Sage Weil <sage@redhat.com>
-M:	Alex Elder <elder@kernel.org>
+R:	Dongsheng Yang <dongsheng.yang@easystack.cn>
 L:	ceph-devel@vger.kernel.org
 W:	http://ceph.com/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
@@ -13614,8 +13983,7 @@ S:	Maintained
 F:	arch/mips/ralink
 
 RALINK RT2X00 WIRELESS LAN DRIVER
-P:	rt2x00 project
-M:	Stanislaw Gruszka <sgruszka@redhat.com>
+M:	Stanislaw Gruszka <stf_xl@wp.pl>
 M:	Helmut Schaa <helmut.schaa@googlemail.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
@@ -13821,7 +14189,7 @@ R:	Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
 L:	netdev@vger.kernel.org
 L:	linux-renesas-soc@vger.kernel.org
 F:	Documentation/devicetree/bindings/net/renesas,*.txt
-F:	Documentation/devicetree/bindings/net/sh_eth.txt
+F:	Documentation/devicetree/bindings/net/renesas,*.yaml
 F:	drivers/net/ethernet/renesas/
 F:	include/linux/sh_eth.h
 
@@ -13862,6 +14230,7 @@ F:	include/dt-bindings/reset/
 F:	include/linux/reset.h
 F:	include/linux/reset/
 F:	include/linux/reset-controller.h
+K:      \b(?:devm_|of_)?reset_control(?:ler_[a-z]+|_[a-z_]+)?\b
 
 RESTARTABLE SEQUENCES SUPPORT
 M:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
@@ -13914,6 +14283,7 @@ M:	Paul Walmsley <paul.walmsley@sifive.com>
 M:	Palmer Dabbelt <palmer@dabbelt.com>
 M:	Albert Ou <aou@eecs.berkeley.edu>
 L:	linux-riscv@lists.infradead.org
+P:	Documentation/riscv/patch-acceptance.rst
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
 S:	Supported
 F:	arch/riscv/
@@ -13950,7 +14320,6 @@ S:	Supported
 F:	drivers/net/ethernet/rocker/
 
 ROCKETPORT DRIVER
-P:	Comtrol Corp.
 W:	http://www.comtrol.com
 S:	Maintained
 F:	Documentation/driver-api/serial/rocket.rst
@@ -13962,6 +14331,12 @@ L:	linux-serial@vger.kernel.org
 S:	Odd Fixes
 F:	drivers/tty/serial/rp2.*
 
+ROHM BH1750 AMBIENT LIGHT SENSOR DRIVER
+M:	Tomasz Duszynski <tduszyns@gmail.com>
+S:	Maintained
+F:	drivers/iio/light/bh1750.c
+F:	Documentation/devicetree/bindings/iio/light/bh1750.yaml
+
 ROHM MULTIFUNCTION BD9571MWV-M PMIC DEVICE DRIVERS
 M:	Marek Vasut <marek.vasut+renesas@gmail.com>
 L:	linux-kernel@vger.kernel.org
@@ -14223,7 +14598,7 @@ L:	linux-crypto@vger.kernel.org
 L:	linux-samsung-soc@vger.kernel.org
 S:	Maintained
 F:	drivers/crypto/exynos-rng.c
-F:	Documentation/devicetree/bindings/rng/samsung,exynos4-rng.txt
+F:	Documentation/devicetree/bindings/rng/samsung,exynos4-rng.yaml
 
 SAMSUNG EXYNOS TRUE RANDOM NUMBER GENERATOR (TRNG) DRIVER
 M:	Łukasz Stelmach <l.stelmach@samsung.com>
@@ -14294,12 +14669,12 @@ F:	drivers/media/i2c/s5k5baf.c
 SAMSUNG S5P Security SubSystem (SSS) DRIVER
 M:	Krzysztof Kozlowski <krzk@kernel.org>
 M:	Vladimir Zapolskiy <vz@mleia.com>
-M:	Kamil Konieczny <k.konieczny@partner.samsung.com>
+M:	Kamil Konieczny <k.konieczny@samsung.com>
 L:	linux-crypto@vger.kernel.org
 L:	linux-samsung-soc@vger.kernel.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/crypto/samsung-slimsss.txt
-F:	Documentation/devicetree/bindings/crypto/samsung-sss.txt
+F:	Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml
+F:	Documentation/devicetree/bindings/crypto/samsung-sss.yaml
 F:	drivers/crypto/s5p-sss.c
 
 SAMSUNG S5P/EXYNOS4 SOC SERIES CAMERA SUBSYSTEM DRIVERS
@@ -14336,8 +14711,6 @@ F:	include/linux/platform_data/spi-s3c64xx.h
 
 SAMSUNG SXGBE DRIVERS
 M:	Byungho An <bh74.an@samsung.com>
-M:	Girish K S <ks.giri@samsung.com>
-M:	Vipul Pandya <vipul.pandya@samsung.com>
 S:	Supported
 L:	netdev@vger.kernel.org
 F:	drivers/net/ethernet/samsung/sxgbe/
@@ -14608,6 +14981,7 @@ F:	include/uapi/linux/selinux_netlink.h
 F:	security/selinux/
 F:	scripts/selinux/
 F:	Documentation/admin-guide/LSM/SELinux.rst
+F:	Documentation/ABI/obsolete/sysfs-selinux-disable
 
 SENSABLE PHANTOM
 M:	Jiri Slaby <jirislaby@gmail.com>
@@ -14787,6 +15161,12 @@ F:	drivers/media/usb/siano/
 F:	drivers/media/usb/siano/
 F:	drivers/media/mmc/siano/
 
+SIFIVE PDMA DRIVER
+M:	Green Wan <green.wan@sifive.com>
+S:	Maintained
+F:	drivers/dma/sf-pdma/
+F:	Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml
+
 SIFIVE DRIVERS
 M:	Palmer Dabbelt <palmer@dabbelt.com>
 M:	Paul Walmsley <paul.walmsley@sifive.com>
@@ -14813,6 +15193,11 @@ S:	Maintained
 F:	drivers/input/touchscreen/silead.c
 F:	drivers/platform/x86/touchscreen_dmi.c
 
+SILICON LABS WIRELESS DRIVERS (for WFxxx series)
+M:	Jérôme Pouiller <jerome.pouiller@silabs.com>
+S:	Supported
+F:	drivers/staging/wfx/
+
 SILICON MOTION SM712 FRAME BUFFER DRIVER
 M:	Sudip Mukherjee <sudipm.mukherjee@gmail.com>
 M:	Teddy Wang <teddy.wang@siliconmotion.com>
@@ -14823,11 +15208,8 @@ F:	drivers/video/fbdev/sm712*
 F:	Documentation/fb/sm712fb.rst
 
 SIMPLE FIRMWARE INTERFACE (SFI)
-M:	Len Brown <lenb@kernel.org>
-L:	sfi-devel@simplefirmware.org
 W:	http://simplefirmware.org/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-sfi-2.6.git
-S:	Supported
+S:	Obsolete
 F:	arch/x86/platform/sfi/
 F:	drivers/sfi/
 F:	include/linux/sfi*.h
@@ -14841,15 +15223,13 @@ F:	drivers/video/fbdev/simplefb.c
 F:	include/linux/platform_data/simplefb.h
 
 SIMTEC EB110ATX (Chalice CATS)
-P:	Ben Dooks
-P:	Vincent Sanders <vince@simtec.co.uk>
+M:	Vincent Sanders <vince@simtec.co.uk>
 M:	Simtec Linux Team <linux@simtec.co.uk>
 W:	http://www.simtec.co.uk/products/EB110ATX/
 S:	Supported
 
 SIMTEC EB2410ITX (BAST)
-P:	Ben Dooks
-P:	Vincent Sanders <vince@simtec.co.uk>
+M:	Vincent Sanders <vince@simtec.co.uk>
 M:	Simtec Linux Team <linux@simtec.co.uk>
 W:	http://www.simtec.co.uk/products/EB2410ITX/
 S:	Supported
@@ -15012,7 +15392,7 @@ F:	include/media/soc_camera.h
 F:	drivers/staging/media/soc_camera/
 
 SOCIONEXT SYNQUACER I2C DRIVER
-M:	Ard Biesheuvel <ard.biesheuvel@linaro.org>
+M:	Ard Biesheuvel <ardb@kernel.org>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/busses/i2c-synquacer.c
@@ -15147,6 +15527,14 @@ S:	Maintained
 F:	drivers/media/i2c/imx274.c
 F:	Documentation/devicetree/bindings/media/i2c/imx274.txt
 
+SONY IMX290 SENSOR DRIVER
+M:	Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Maintained
+F:	drivers/media/i2c/imx290.c
+F:	Documentation/devicetree/bindings/media/i2c/imx290.txt
+
 SONY IMX319 SENSOR DRIVER
 M:	Bingbu Cao <bingbu.cao@intel.com>
 L:	linux-media@vger.kernel.org
@@ -15294,7 +15682,6 @@ F:	arch/arm/boot/dts/spear*
 F:	arch/arm/mach-spear/
 
 SPI NOR SUBSYSTEM
-M:	Marek Vasut <marek.vasut@gmail.com>
 M:	Tudor Ambarus <tudor.ambarus@microchip.com>
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
@@ -15471,6 +15858,14 @@ L:	linux-wireless@vger.kernel.org
 S:	Supported
 F:	drivers/staging/wilc1000/
 
+STAGING - SEPS525 LCD CONTROLLER DRIVERS
+M:	Michael Hennerich <michael.hennerich@analog.com>
+M:	Beniamin Bia <beniamin.bia@analog.com>
+L:	linux-fbdev@vger.kernel.org
+S:	Supported
+F:	drivers/staging/fbtft/fb_seps525.c
+F:	Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml
+
 STAGING SUBSYSTEM
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
@@ -15534,6 +15929,7 @@ M:	Jose Abreu <joabreu@synopsys.com>
 L:	netdev@vger.kernel.org
 W:	http://www.stlinux.com
 S:	Supported
+F:	Documentation/networking/device_drivers/stmicro/
 F:	drivers/net/ethernet/stmicro/stmmac/
 
 SUN3/3X
@@ -15549,7 +15945,7 @@ SUN4I LOW RES ADC ATTACHED TABLET KEYS DRIVER
 M:	Hans de Goede <hdegoede@redhat.com>
 L:	linux-input@vger.kernel.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
+F:	Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
 F:	drivers/input/keyboard/sun4i-lradc-keys.c
 
 SUNDANCE NETWORK DRIVER
@@ -15761,6 +16157,13 @@ F:	drivers/hwtracing/stm/
 F:	include/linux/stm.h
 F:	include/uapi/linux/stm.h
 
+SYSTEM76 ACPI DRIVER
+M:	Jeremy Soller <jeremy@system76.com>
+M:	System76 Product Development <productdev@system76.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Maintained
+F:	drivers/platform/x86/system76_acpi.c
+
 SYSV FILESYSTEM
 M:	Christoph Hellwig <hch@infradead.org>
 S:	Maintained
@@ -16085,12 +16488,10 @@ F:	drivers/media/radio/radio-raremono.c
 
 THERMAL
 M:	Zhang Rui <rui.zhang@intel.com>
-M:	Eduardo Valentin <edubezval@gmail.com>
-R:	Daniel Lezcano <daniel.lezcano@linaro.org>
+M:	Daniel Lezcano <daniel.lezcano@linaro.org>
 R:	Amit Kucheria <amit.kucheria@verdurent.com>
 L:	linux-pm@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/thermal/linux.git
 Q:	https://patchwork.kernel.org/project/linux-pm/list/
 S:	Supported
 F:	drivers/thermal/
@@ -16101,14 +16502,26 @@ F:	Documentation/devicetree/bindings/thermal/
 
 THERMAL/CPU_COOLING
 M:	Amit Daniel Kachhap <amit.kachhap@gmail.com>
+M:	Daniel Lezcano <daniel.lezcano@linaro.org>
 M:	Viresh Kumar <viresh.kumar@linaro.org>
 M:	Javi Merino <javi.merino@kernel.org>
 L:	linux-pm@vger.kernel.org
 S:	Supported
 F:	Documentation/driver-api/thermal/cpu-cooling-api.rst
-F:	drivers/thermal/cpu_cooling.c
+F:	Documentation/driver-api/thermal/cpu-idle-cooling.rst
+F:	drivers/thermal/cpufreq_cooling.c
+F:	drivers/thermal/cpuidle_cooling.c
 F:	include/linux/cpu_cooling.h
 
+THERMAL DRIVER FOR AMLOGIC SOCS
+M:	Guillaume La Roque <glaroque@baylibre.com>
+L:	linux-pm@vger.kernel.org
+L:	linux-amlogic@lists.infradead.org
+W:	http://linux-meson.com/
+S:	Supported
+F:	drivers/thermal/amlogic_thermal.c
+F:	Documentation/devicetree/bindings/thermal/amlogic,thermal.yaml
+
 THINKPAD ACPI EXTRAS DRIVER
 M:	Henrique de Moraes Holschuh <ibm-acpi@hmh.eng.br>
 L:	ibm-acpi-devel@lists.sourceforge.net
@@ -16124,6 +16537,7 @@ M:	Andreas Noever <andreas.noever@gmail.com>
 M:	Michael Jamet <michael.jamet@intel.com>
 M:	Mika Westerberg <mika.westerberg@linux.intel.com>
 M:	Yehezkel Bernat <YehezkelShB@gmail.com>
+L:	linux-usb@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
 S:	Maintained
 F:	Documentation/admin-guide/thunderbolt.rst
@@ -16139,7 +16553,7 @@ S:	Maintained
 F:	drivers/net/thunderbolt.c
 
 THUNDERX GPIO DRIVER
-M:	David Daney <david.daney@cavium.com>
+M:	Robert Richter <rrichter@marvell.com>
 S:	Maintained
 F:	drivers/gpio/gpio-thunderx.c
 
@@ -16213,6 +16627,12 @@ S:	Maintained
 F:	drivers/media/platform/davinci/
 F:	include/media/davinci/
 
+TI ENHANCED QUADRATURE ENCODER PULSE (eQEP) DRIVER
+R:	David Lechner <david@lechnology.com>
+L:	linux-iio@vger.kernel.org
+F:	Documentation/devicetree/bindings/counter/ti-eqep.yaml
+F:	drivers/counter/ti-eqep.c
+
 TI ETHERNET SWITCH DRIVER (CPSW)
 R:	Grygorii Strashko <grygorii.strashko@ti.com>
 L:	linux-omap@vger.kernel.org
@@ -16289,6 +16709,13 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Odd Fixes
 F:	sound/soc/codecs/tas571x*
 
+TI TCAN4X5X DEVICE DRIVER
+M:	Dan Murphy <dmurphy@ti.com>
+L:	linux-can@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/net/can/tcan4x5x.txt
+F:	drivers/net/can/m_can/tcan4x5x.c
+
 TI TRF7970A NFC DRIVER
 M:	Mark Greer <mgreer@animalcreek.com>
 L:	linux-wireless@vger.kernel.org
@@ -16310,6 +16737,7 @@ W:	http://linuxtv.org/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 S:	Maintained
 F:	drivers/media/platform/ti-vpe/
+F:	Documentation/devicetree/bindings/media/ti,vpe.yaml
 
 TI WILINK WIRELESS DRIVERS
 L:	linux-wireless@vger.kernel.org
@@ -16339,7 +16767,7 @@ F:	kernel/time/ntp.c
 F:	tools/testing/selftests/timers/
 
 TIPC NETWORK LAYER
-M:	Jon Maloy <jon.maloy@ericsson.com>
+M:	Jon Maloy <jmaloy@redhat.com>
 M:	Ying Xue <ying.xue@windriver.com>
 L:	netdev@vger.kernel.org (core kernel code)
 L:	tipc-discussion@lists.sourceforge.net (user apps, general discussion)
@@ -16380,6 +16808,13 @@ S:	Maintained
 F:	Documentation/hwmon/tmp401.rst
 F:	drivers/hwmon/tmp401.c
 
+TMP513 HARDWARE MONITOR DRIVER
+M:	Eric Tremblay <etremblay@distech-controls.com>
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	Documentation/hwmon/tmp513.rst
+F:	drivers/hwmon/tmp513.c
+
 TMPFS (SHMEM FILESYSTEM)
 M:	Hugh Dickins <hughd@google.com>
 L:	linux-mm@kvack.org
@@ -16591,10 +17026,9 @@ F:	drivers/media/pci/tw686x/
 
 UBI FILE SYSTEM (UBIFS)
 M:	Richard Weinberger <richard@nod.at>
-M:	Artem Bityutskiy <dedekind1@gmail.com>
-M:	Adrian Hunter <adrian.hunter@intel.com>
 L:	linux-mtd@lists.infradead.org
-T:	git git://git.infradead.org/ubifs-2.6.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git next
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git fixes
 W:	http://www.linux-mtd.infradead.org/doc/ubifs.html
 S:	Supported
 F:	Documentation/filesystems/ubifs.txt
@@ -16709,11 +17143,11 @@ S:	Maintained
 F:	drivers/scsi/ufs/ufs-mediatek*
 
 UNSORTED BLOCK IMAGES (UBI)
-M:	Artem Bityutskiy <dedekind1@gmail.com>
 M:	Richard Weinberger <richard@nod.at>
 W:	http://www.linux-mtd.infradead.org/
 L:	linux-mtd@lists.infradead.org
-T:	git git://git.infradead.org/ubifs-2.6.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git next
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git fixes
 S:	Supported
 F:	drivers/mtd/ubi/
 F:	include/linux/mtd/ubi.h
@@ -17106,7 +17540,7 @@ F:	drivers/mtd/nand/raw/vf610_nfc.c
 VFAT/FAT/MSDOS FILESYSTEM
 M:	OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
 S:	Maintained
-F:	Documentation/filesystems/vfat.txt
+F:	Documentation/filesystems/vfat.rst
 F:	fs/fat/
 
 VFIO DRIVER
@@ -17200,6 +17634,7 @@ F:	include/media/videobuf2-*
 
 VIMC VIRTUAL MEDIA CONTROLLER DRIVER
 M:	Helen Koike <helen.koike@collabora.com>
+R:	Shuah Khan <skhan@linuxfoundation.org>
 L:	linux-media@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 W:	https://linuxtv.org
@@ -17228,6 +17663,7 @@ F:	net/vmw_vsock/diag.c
 F:	net/vmw_vsock/af_vsock_tap.c
 F:	net/vmw_vsock/virtio_transport_common.c
 F:	net/vmw_vsock/virtio_transport.c
+F:	net/vmw_vsock/vsock_loopback.c
 F:	drivers/net/vsockmon.c
 F:	drivers/vhost/vsock.c
 F:	tools/testing/vsock/
@@ -17347,6 +17783,14 @@ S:	Maintained
 F:	drivers/input/serio/userio.c
 F:	include/uapi/linux/userio.h
 
+VITESSE FELIX ETHERNET SWITCH DRIVER
+M:	Vladimir Oltean <vladimir.oltean@nxp.com>
+M:	Claudiu Manoil <claudiu.manoil@nxp.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/dsa/ocelot/*
+F:	net/dsa/tag_ocelot.c
+
 VIVID VIRTUAL VIDEO DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
@@ -17447,6 +17891,18 @@ S:	Maintained
 F:	drivers/net/vrf.c
 F:	Documentation/networking/vrf.txt
 
+VSPRINTF
+M:	Petr Mladek <pmladek@suse.com>
+M:	Steven Rostedt <rostedt@goodmis.org>
+M:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+R:	Rasmus Villemoes <linux@rasmusvillemoes.dk>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk.git
+S:	Maintained
+F:	lib/vsprintf.c
+F:	lib/test_printf.c
+F:	Documentation/core-api/printk-formats.rst
+
 VT1211 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>
 L:	linux-hwmon@vger.kernel.org
@@ -17461,10 +17917,8 @@ S:	Maintained
 F:	drivers/hwmon/vt8231.c
 
 VUB300 USB to SDIO/SD/MMC bridge chip
-M:	Tony Olech <tony.olech@elandigitalsystems.com>
 L:	linux-mmc@vger.kernel.org
-L:	linux-usb@vger.kernel.org
-S:	Supported
+S:	Orphan
 F:	drivers/mmc/host/vub300.c
 
 W1 DALLAS'S 1-WIRE BUS
@@ -17580,6 +18034,14 @@ L:	linux-gpio@vger.kernel.org
 S:	Maintained
 F:	drivers/gpio/gpio-ws16c48.c
 
+WIREGUARD SECURE NETWORK TUNNEL
+M:	Jason A. Donenfeld <Jason@zx2c4.com>
+S:	Maintained
+F:	drivers/net/wireguard/
+F:	tools/testing/selftests/wireguard/
+L:	wireguard@lists.zx2c4.com
+L:	netdev@vger.kernel.org
+
 WISTRON LAPTOP BUTTON DRIVER
 M:	Miloslav Trmac <mitr@volny.cz>
 S:	Maintained
@@ -17755,7 +18217,7 @@ XDP (eXpress Data Path)
 M:	Alexei Starovoitov <ast@kernel.org>
 M:	Daniel Borkmann <daniel@iogearbox.net>
 M:	David S. Miller <davem@davemloft.net>
-M:	Jakub Kicinski <jakub.kicinski@netronome.com>
+M:	Jakub Kicinski <kuba@kernel.org>
 M:	Jesper Dangaard Brouer <hawk@kernel.org>
 M:	John Fastabend <john.fastabend@gmail.com>
 L:	netdev@vger.kernel.org
@@ -17878,6 +18340,14 @@ M:	Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
 S:	Maintained
 F:	drivers/net/ethernet/xilinx/xilinx_axienet*
 
+XILINX CAN DRIVER
+M:	Appana Durga Kedareswara rao <appana.durga.rao@xilinx.com>
+R:	Naga Sureshkumar Relli <naga.sureshkumar.relli@xilinx.com>
+L:	linux-can@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/net/can/xilinx_can.txt
+F:	drivers/net/can/xilinx_can.c
+
 XILINX UARTLITE SERIAL DRIVER
 M:	Peter Korsgaard <jacmet@sunsite.dk>
 L:	linux-serial@vger.kernel.org
@@ -17912,10 +18382,9 @@ S:	Supported
 F:	drivers/char/xillybus/
 
 XLP9XX I2C DRIVER
-M:	George Cherian <george.cherian@cavium.com>
-M:	Jan Glauber <jglauber@cavium.com>
+M:	George Cherian <gcherian@marvell.com>
 L:	linux-i2c@vger.kernel.org
-W:	http://www.cavium.com
+W:	http://www.marvell.com
 S:	Supported
 F:	Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt
 F:	drivers/i2c/busses/i2c-xlp9xx.c
diff --git a/Makefile b/Makefile
index d4d36c61940b..6a01b073915e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 5
-PATCHLEVEL = 4
+PATCHLEVEL = 5
 SUBLEVEL = 0
 EXTRAVERSION =
 NAME = Kleptomaniac Octopus
@@ -414,6 +414,7 @@ STRIP		= $(CROSS_COMPILE)strip
 OBJCOPY		= $(CROSS_COMPILE)objcopy
 OBJDUMP		= $(CROSS_COMPILE)objdump
 OBJSIZE		= $(CROSS_COMPILE)size
+READELF		= $(CROSS_COMPILE)readelf
 PAHOLE		= pahole
 LEX		= flex
 YACC		= bison
@@ -472,7 +473,7 @@ GCC_PLUGINS_CFLAGS :=
 CLANG_FLAGS :=
 
 export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE PAHOLE LEX YACC AWK INSTALLKERNEL
+export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE READELF PAHOLE LEX YACC AWK INSTALLKERNEL
 export PERL PYTHON PYTHON2 PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
 export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
 
@@ -618,7 +619,6 @@ ifeq ($(KBUILD_EXTMOD),)
 init-y		:= init/
 drivers-y	:= drivers/ sound/
 drivers-$(CONFIG_SAMPLES) += samples/
-drivers-$(CONFIG_KERNEL_HEADER_TEST) += include/
 net-y		:= net/
 libs-y		:= lib/
 core-y		:= usr/
@@ -1011,6 +1011,7 @@ endif
 PHONY += prepare0
 
 export MODORDER := $(extmod-prefix)modules.order
+export MODULES_NSDEPS := $(extmod-prefix)modules.nsdeps
 
 ifeq ($(KBUILD_EXTMOD),)
 core-y		+= kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
@@ -1196,19 +1197,15 @@ headers: $(version_h) scripts_unifdef uapi-asm-generic archheaders archscripts
 	$(Q)$(MAKE) $(hdr-inst)=include/uapi
 	$(Q)$(MAKE) $(hdr-inst)=arch/$(SRCARCH)/include/uapi
 
+# Deprecated. It is no-op now.
 PHONY += headers_check
-headers_check: headers
-	$(Q)$(MAKE) $(hdr-inst)=include/uapi HDRCHECK=1
-	$(Q)$(MAKE) $(hdr-inst)=arch/$(SRCARCH)/include/uapi HDRCHECK=1
+headers_check:
+	@:
 
 ifdef CONFIG_HEADERS_INSTALL
 prepare: headers
 endif
 
-ifdef CONFIG_HEADERS_CHECK
-all: headers_check
-endif
-
 PHONY += scripts_unifdef
 scripts_unifdef: scripts_basic
 	$(Q)$(MAKE) $(build)=scripts scripts/unifdef
@@ -1360,7 +1357,7 @@ endif # CONFIG_MODULES
 
 # Directories & files removed with 'make clean'
 CLEAN_DIRS  += include/ksym
-CLEAN_FILES += modules.builtin.modinfo
+CLEAN_FILES += modules.builtin.modinfo modules.nsdeps
 
 # Directories & files removed with 'make mrproper'
 MRPROPER_DIRS  += include/config include/generated          \
@@ -1476,7 +1473,6 @@ help:
 	@echo  '  versioncheck    - Sanity check on version.h usage'
 	@echo  '  includecheck    - Check for duplicate included header files'
 	@echo  '  export_report   - List the usages of all exported symbols'
-	@echo  '  headers_check   - Sanity check on exported headers'
 	@echo  '  headerdep       - Detect inclusion cycles in headers'
 	@echo  '  coccicheck      - Check with Coccinelle'
 	@echo  ''
@@ -1515,7 +1511,7 @@ help:
 	@echo  ''
 	@$(if $(boards), \
 		$(foreach b, $(boards), \
-		printf "  %-24s - Build for %s\\n" $(b) $(subst _defconfig,,$(b));) \
+		printf "  %-27s - Build for %s\\n" $(b) $(subst _defconfig,,$(b));) \
 		echo '')
 	@$(if $(board-dirs), \
 		$(foreach b, $(board-dirs), \
@@ -1526,7 +1522,8 @@ help:
 	@echo  '  make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
 	@echo  '  make V=2   [targets] 2 => give reason for rebuild of target'
 	@echo  '  make O=dir [targets] Locate all output files in "dir", including .config'
-	@echo  '  make C=1   [targets] Check re-compiled c source with $$CHECK (sparse by default)'
+	@echo  '  make C=1   [targets] Check re-compiled c source with $$CHECK'
+	@echo  '                       (sparse by default)'
 	@echo  '  make C=2   [targets] Force check of all c source with $$CHECK'
 	@echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
 	@echo  '  make W=n   [targets] Enable extra build checks, n=1,2,3 where'
@@ -1622,7 +1619,7 @@ _emodinst_post: _emodinst_
 	$(call cmd,depmod)
 
 clean-dirs := $(KBUILD_EXTMOD)
-clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers
+clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers $(KBUILD_EXTMOD)/modules.nsdeps
 
 PHONY += /
 /:
@@ -1641,6 +1638,50 @@ help:
 PHONY += prepare
 endif # KBUILD_EXTMOD
 
+# Single targets
+# ---------------------------------------------------------------------------
+# To build individual files in subdirectories, you can do like this:
+#
+#   make foo/bar/baz.s
+#
+# The supported suffixes for single-target are listed in 'single-targets'
+#
+# To build only under specific subdirectories, you can do like this:
+#
+#   make foo/bar/baz/
+
+ifdef single-build
+
+# .ko is special because modpost is needed
+single-ko := $(sort $(filter %.ko, $(MAKECMDGOALS)))
+single-no-ko := $(sort $(patsubst %.ko,%.mod, $(MAKECMDGOALS)))
+
+$(single-ko): single_modpost
+	@:
+$(single-no-ko): descend
+	@:
+
+ifeq ($(KBUILD_EXTMOD),)
+# For the single build of in-tree modules, use a temporary file to avoid
+# the situation of modules_install installing an invalid modules.order.
+MODORDER := .modules.tmp
+endif
+
+PHONY += single_modpost
+single_modpost: $(single-no-ko)
+	$(Q){ $(foreach m, $(single-ko), echo $(extmod-prefix)$m;) } > $(MODORDER)
+	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
+
+KBUILD_MODULES := 1
+
+export KBUILD_SINGLE_TARGETS := $(addprefix $(extmod-prefix), $(single-no-ko))
+
+# trim unrelated directories
+build-dirs := $(foreach d, $(build-dirs), \
+			$(if $(filter $(d)/%, $(KBUILD_SINGLE_TARGETS)), $(d)))
+
+endif
+
 # Handle descending into subdirectories listed in $(build-dirs)
 # Preset locale variables to speed up the build process. Limit locale
 # tweaks to this spot to avoid wrong language settings when running
@@ -1649,7 +1690,9 @@ endif # KBUILD_EXTMOD
 PHONY += descend $(build-dirs)
 descend: $(build-dirs)
 $(build-dirs): prepare
-	$(Q)$(MAKE) $(build)=$@ single-build=$(single-build) need-builtin=1 need-modorder=1
+	$(Q)$(MAKE) $(build)=$@ \
+	single-build=$(if $(filter-out $@/, $(single-no-ko)),1) \
+	need-builtin=1 need-modorder=1
 
 clean-dirs := $(addprefix _clean_, $(clean-dirs))
 PHONY += $(clean-dirs) clean
@@ -1664,7 +1707,7 @@ clean: $(clean-dirs)
 		-o -name '*.ko.*' \
 		-o -name '*.dtb' -o -name '*.dtb.S' -o -name '*.dt.yaml' \
 		-o -name '*.dwo' -o -name '*.lst' \
-		-o -name '*.su' -o -name '*.mod' -o -name '*.ns_deps' \
+		-o -name '*.su' -o -name '*.mod' \
 		-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
 		-o -name '*.lex.c' -o -name '*.tab.[ch]' \
 		-o -name '*.asn1.[ch]' \
@@ -1686,10 +1729,9 @@ tags TAGS cscope gtags: FORCE
 # ---------------------------------------------------------------------------
 
 PHONY += nsdeps
-
+nsdeps: export KBUILD_NSDEPS=1
 nsdeps: modules
-	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost nsdeps
-	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/$@
+	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/nsdeps
 
 # Scripts to check various things for consistency
 # ---------------------------------------------------------------------------
@@ -1753,50 +1795,6 @@ tools/%: FORCE
 	$(Q)mkdir -p $(objtree)/tools
 	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ $*
 
-# Single targets
-# ---------------------------------------------------------------------------
-# To build individual files in subdirectories, you can do like this:
-#
-#   make foo/bar/baz.s
-#
-# The supported suffixes for single-target are listed in 'single-targets'
-#
-# To build only under specific subdirectories, you can do like this:
-#
-#   make foo/bar/baz/
-
-ifdef single-build
-
-single-all := $(filter $(single-targets), $(MAKECMDGOALS))
-
-# .ko is special because modpost is needed
-single-ko := $(sort $(filter %.ko, $(single-all)))
-single-no-ko := $(sort $(patsubst %.ko,%.mod, $(single-all)))
-
-$(single-ko): single_modpost
-	@:
-$(single-no-ko): descend
-	@:
-
-ifeq ($(KBUILD_EXTMOD),)
-# For the single build of in-tree modules, use a temporary file to avoid
-# the situation of modules_install installing an invalid modules.order.
-MODORDER := .modules.tmp
-endif
-
-PHONY += single_modpost
-single_modpost: $(single-no-ko)
-	$(Q){ $(foreach m, $(single-ko), echo $(extmod-prefix)$m;) } > $(MODORDER)
-	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
-
-KBUILD_MODULES := 1
-
-export KBUILD_SINGLE_TARGETS := $(addprefix $(extmod-prefix), $(single-no-ko))
-
-single-build = $(if $(filter-out $@/, $(single-no-ko)),1)
-
-endif
-
 # FIXME Should go into a make.lib or something
 # ===========================================================================
 
diff --git a/arch/Kconfig b/arch/Kconfig
index 5f8a5d84dbbe..48b5e103bdb0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -72,11 +72,11 @@ config KPROBES
 	  If in doubt, say "N".
 
 config JUMP_LABEL
-       bool "Optimize very unlikely/likely branches"
-       depends on HAVE_ARCH_JUMP_LABEL
-       depends on CC_HAS_ASM_GOTO
-       help
-         This option enables a transparent branch optimization that
+	bool "Optimize very unlikely/likely branches"
+	depends on HAVE_ARCH_JUMP_LABEL
+	depends on CC_HAS_ASM_GOTO
+	help
+	 This option enables a transparent branch optimization that
 	 makes certain almost-always-true or almost-always-false branch
 	 conditions even cheaper to execute within the kernel.
 
@@ -84,7 +84,7 @@ config JUMP_LABEL
 	 scheduler functionality, networking code and KVM have such
 	 branches and include support for this optimization technique.
 
-         If it is detected that the compiler has support for "asm goto",
+	 If it is detected that the compiler has support for "asm goto",
 	 the kernel will compile such branches with just a nop
 	 instruction. When the condition flag is toggled to true, the
 	 nop will be converted to a jump instruction to execute the
@@ -151,8 +151,8 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS
 	  information on the topic of unaligned memory accesses.
 
 config ARCH_USE_BUILTIN_BSWAP
-       bool
-       help
+	bool
+	help
 	 Modern versions of GCC (since 4.4) have builtin functions
 	 for handling byte-swapping. Using these, instead of the old
 	 inline assembler that the architecture code provides in the
@@ -221,10 +221,10 @@ config HAVE_DMA_CONTIGUOUS
 	bool
 
 config GENERIC_SMP_IDLE_THREAD
-       bool
+	bool
 
 config GENERIC_IDLE_POLL_SETUP
-       bool
+	bool
 
 config ARCH_HAS_FORTIFY_SOURCE
 	bool
@@ -257,7 +257,7 @@ config ARCH_HAS_UNCACHED_SEGMENT
 
 # Select if arch init_task must go in the __init_task_data section
 config ARCH_TASK_STRUCT_ON_STACK
-       bool
+	bool
 
 # Select if arch has its private alloc_task_struct() function
 config ARCH_TASK_STRUCT_ALLOCATOR
@@ -796,16 +796,9 @@ config OLD_SIGACTION
 config COMPAT_OLD_SIGACTION
 	bool
 
-config 64BIT_TIME
-	def_bool y
-	help
-	  This should be selected by all architectures that need to support
-	  new system calls with a 64-bit time_t. This is relevant on all 32-bit
-	  architectures, and 64-bit architectures as part of compat syscall
-	  handling.
-
 config COMPAT_32BIT_TIME
-	def_bool !64BIT || COMPAT
+	bool "Provide system calls for 32-bit time_t"
+	default !64BIT || COMPAT
 	help
 	  This enables 32 bit time_t support in addition to 64 bit time_t support.
 	  This is relevant on all 32-bit architectures, and 64-bit architectures
@@ -843,16 +836,17 @@ config HAVE_ARCH_VMAP_STACK
 config VMAP_STACK
 	default y
 	bool "Use a virtually-mapped stack"
-	depends on HAVE_ARCH_VMAP_STACK && !KASAN
+	depends on HAVE_ARCH_VMAP_STACK
+	depends on !KASAN || KASAN_VMALLOC
 	---help---
 	  Enable this if you want the use virtually-mapped kernel stacks
 	  with guard pages.  This causes kernel stack overflows to be
 	  caught immediately rather than causing difficult-to-diagnose
 	  corruption.
 
-	  This is presently incompatible with KASAN because KASAN expects
-	  the stack to map directly to the KASAN shadow map using a formula
-	  that is incorrect if the stack is in vmalloc space.
+	  To use this with KASAN, the architecture must support backing
+	  virtual mappings with real shadow memory, and KASAN_VMALLOC must
+	  be enabled.
 
 config ARCH_OPTIONAL_KERNEL_RWX
 	def_bool n
@@ -892,27 +886,6 @@ config STRICT_MODULE_RWX
 config ARCH_HAS_PHYS_TO_DMA
 	bool
 
-config ARCH_HAS_REFCOUNT
-	bool
-	help
-	  An architecture selects this when it has implemented refcount_t
-	  using open coded assembly primitives that provide an optimized
-	  refcount_t implementation, possibly at the expense of some full
-	  refcount state checks of CONFIG_REFCOUNT_FULL=y.
-
-	  The refcount overflow check behavior, however, must be retained.
-	  Catching overflows is the primary security concern for protecting
-	  against bugs in reference counts.
-
-config REFCOUNT_FULL
-	bool "Perform full reference count validation at the expense of speed"
-	help
-	  Enabling this switches the refcounting infrastructure from a fast
-	  unchecked atomic_t implementation to a fully state checked
-	  implementation, which can be (slightly) slower but provides protections
-	  against various use-after-free conditions that can be used in
-	  security flaw exploits.
-
 config HAVE_ARCH_COMPILER_H
 	bool
 	help
@@ -960,6 +933,14 @@ config RELR
 config ARCH_HAS_MEM_ENCRYPT
 	bool
 
+config HAVE_SPARSE_SYSCALL_NR
+       bool
+       help
+          An architecture should select this if its syscall numbering is sparse
+	  to save space. For example, MIPS architecture has a syscall array with
+	  entries at 4000, 5000 and 6000 locations. This option turns on syscall
+	  related optimizations for a given architecture.
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index af2c0063dc75..d1ed5a8133c5 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -283,20 +283,8 @@ static inline void __iomem *ioremap(unsigned long port, unsigned long size)
 	return IO_CONCAT(__IO_PREFIX,ioremap) (port, size);
 }
 
-static inline void __iomem *__ioremap(unsigned long port, unsigned long size,
-				      unsigned long flags)
-{
-	return ioremap(port, size);
-}
-
-static inline void __iomem * ioremap_nocache(unsigned long offset,
-					     unsigned long size)
-{
-	return ioremap(offset, size);
-}
-
-#define ioremap_wc ioremap_nocache
-#define ioremap_uc ioremap_nocache
+#define ioremap_wc ioremap
+#define ioremap_uc ioremap
 
 static inline void iounmap(volatile void __iomem *addr)
 {
diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h
index 889b5d3ad825..7ee144f484f1 100644
--- a/arch/alpha/include/asm/mmzone.h
+++ b/arch/alpha/include/asm/mmzone.h
@@ -73,7 +73,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 
 #define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> 32))
-#define pgd_page(pgd)		(pfn_to_page(pgd_val(pgd) >> 32))
 #define pte_pfn(pte)		(pte_val(pte) >> 32)
 
 #define mk_pte(page, pgprot)						     \
diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index eb91f1e85629..a1a29f60934c 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -27,9 +27,9 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 }
 
 static inline void
-pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
-	pgd_set(pgd, pmd);
+	pud_set(pud, pmd);
 }
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 065b57f408c3..299791ce14b6 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -2,7 +2,7 @@
 #ifndef _ALPHA_PGTABLE_H
 #define _ALPHA_PGTABLE_H
 
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
 
 /*
  * This file contains the functions and defines necessary to modify and use
@@ -226,8 +226,8 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 extern inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
 { pmd_val(*pmdp) = _PAGE_TABLE | ((((unsigned long) ptep) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
 
-extern inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
-{ pgd_val(*pgdp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
+extern inline void pud_set(pud_t * pudp, pmd_t * pmdp)
+{ pud_val(*pudp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
 
 
 extern inline unsigned long
@@ -238,11 +238,11 @@ pmd_page_vaddr(pmd_t pmd)
 
 #ifndef CONFIG_DISCONTIGMEM
 #define pmd_page(pmd)	(mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32))
-#define pgd_page(pgd)	(mem_map + ((pgd_val(pgd) & _PFN_MASK) >> 32))
+#define pud_page(pud)	(mem_map + ((pud_val(pud) & _PFN_MASK) >> 32))
 #endif
 
-extern inline unsigned long pgd_page_vaddr(pgd_t pgd)
-{ return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
+extern inline unsigned long pud_page_vaddr(pud_t pgd)
+{ return PAGE_OFFSET + ((pud_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
 
 extern inline int pte_none(pte_t pte)		{ return !pte_val(pte); }
 extern inline int pte_present(pte_t pte)	{ return pte_val(pte) & _PAGE_VALID; }
@@ -256,10 +256,10 @@ extern inline int pmd_bad(pmd_t pmd)		{ return (pmd_val(pmd) & ~_PFN_MASK) != _P
 extern inline int pmd_present(pmd_t pmd)	{ return pmd_val(pmd) & _PAGE_VALID; }
 extern inline void pmd_clear(pmd_t * pmdp)	{ pmd_val(*pmdp) = 0; }
 
-extern inline int pgd_none(pgd_t pgd)		{ return !pgd_val(pgd); }
-extern inline int pgd_bad(pgd_t pgd)		{ return (pgd_val(pgd) & ~_PFN_MASK) != _PAGE_TABLE; }
-extern inline int pgd_present(pgd_t pgd)	{ return pgd_val(pgd) & _PAGE_VALID; }
-extern inline void pgd_clear(pgd_t * pgdp)	{ pgd_val(*pgdp) = 0; }
+extern inline int pud_none(pud_t pud)		{ return !pud_val(pud); }
+extern inline int pud_bad(pud_t pud)		{ return (pud_val(pud) & ~_PFN_MASK) != _PAGE_TABLE; }
+extern inline int pud_present(pud_t pud)	{ return pud_val(pud) & _PAGE_VALID; }
+extern inline void pud_clear(pud_t * pudp)	{ pud_val(*pudp) = 0; }
 
 /*
  * The following only work if pte_present() is true.
@@ -301,9 +301,9 @@ extern inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
  */
 
 /* Find an entry in the second-level page table.. */
-extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address)
 {
-	pmd_t *ret = (pmd_t *) pgd_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
+	pmd_t *ret = (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
 	smp_read_barrier_depends(); /* see above */
 	return ret;
 }
diff --git a/arch/alpha/include/asm/vmalloc.h b/arch/alpha/include/asm/vmalloc.h
new file mode 100644
index 000000000000..0a9a366a4d34
--- /dev/null
+++ b/arch/alpha/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_ALPHA_VMALLOC_H
+#define _ASM_ALPHA_VMALLOC_H
+
+#endif /* _ASM_ALPHA_VMALLOC_H */
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index bf497b8b0ec6..94e4cde8071a 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -963,7 +963,7 @@ put_tv32(struct timeval32 __user *o, struct timespec64 *i)
 }
 
 static inline long
-put_tv_to_tv32(struct timeval32 __user *o, struct timeval *i)
+put_tv_to_tv32(struct timeval32 __user *o, struct __kernel_old_timeval *i)
 {
 	return copy_to_user(o, &(struct timeval32){
 				.tv_sec = i->tv_sec,
@@ -971,30 +971,6 @@ put_tv_to_tv32(struct timeval32 __user *o, struct timeval *i)
 			    sizeof(struct timeval32));
 }
 
-static inline long
-get_it32(struct itimerval *o, struct itimerval32 __user *i)
-{
-	struct itimerval32 itv;
-	if (copy_from_user(&itv, i, sizeof(struct itimerval32)))
-		return -EFAULT;
-	o->it_interval.tv_sec = itv.it_interval.tv_sec;
-	o->it_interval.tv_usec = itv.it_interval.tv_usec;
-	o->it_value.tv_sec = itv.it_value.tv_sec;
-	o->it_value.tv_usec = itv.it_value.tv_usec;
-	return 0;
-}
-
-static inline long
-put_it32(struct itimerval32 __user *o, struct itimerval *i)
-{
-	return copy_to_user(o, &(struct itimerval32){
-				.it_interval.tv_sec = o->it_interval.tv_sec,
-				.it_interval.tv_usec = o->it_interval.tv_usec,
-				.it_value.tv_sec = o->it_value.tv_sec,
-				.it_value.tv_usec = o->it_value.tv_usec},
-			    sizeof(struct itimerval32));
-}
-
 static inline void
 jiffies_to_timeval32(unsigned long jiffies, struct timeval32 *value)
 {
@@ -1039,47 +1015,6 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
 
 asmlinkage long sys_ni_posix_timers(void);
 
-SYSCALL_DEFINE2(osf_getitimer, int, which, struct itimerval32 __user *, it)
-{
-	struct itimerval kit;
-	int error;
-
-	if (!IS_ENABLED(CONFIG_POSIX_TIMERS))
-		return sys_ni_posix_timers();
-
-	error = do_getitimer(which, &kit);
-	if (!error && put_it32(it, &kit))
-		error = -EFAULT;
-
-	return error;
-}
-
-SYSCALL_DEFINE3(osf_setitimer, int, which, struct itimerval32 __user *, in,
-		struct itimerval32 __user *, out)
-{
-	struct itimerval kin, kout;
-	int error;
-
-	if (!IS_ENABLED(CONFIG_POSIX_TIMERS))
-		return sys_ni_posix_timers();
-
-	if (in) {
-		if (get_it32(&kin, in))
-			return -EFAULT;
-	} else
-		memset(&kin, 0, sizeof(kin));
-
-	error = do_setitimer(which, &kin, out ? &kout : NULL);
-	if (error || !out)
-		return error;
-
-	if (put_it32(out, &kout))
-		return -EFAULT;
-
-	return 0;
-
-}
-
 SYSCALL_DEFINE2(osf_utimes, const char __user *, filename,
 		struct timeval32 __user *, tvs)
 {
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index f94c732fedeb..0021580d79ad 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -71,10 +71,10 @@ static int pci_mmap_resource(struct kobject *kobj,
 	struct pci_bus_region bar;
 	int i;
 
-	for (i = 0; i < PCI_ROM_RESOURCE; i++)
+	for (i = 0; i < PCI_STD_NUM_BARS; i++)
 		if (res == &pdev->resource[i])
 			break;
-	if (i >= PCI_ROM_RESOURCE)
+	if (i >= PCI_STD_NUM_BARS)
 		return -ENODEV;
 
 	if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
@@ -115,7 +115,7 @@ void pci_remove_resource_files(struct pci_dev *pdev)
 {
 	int i;
 
-	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		struct bin_attribute *res_attr;
 
 		res_attr = pdev->res_attr[i];
@@ -232,7 +232,7 @@ int pci_create_resource_files(struct pci_dev *pdev)
 	int retval;
 
 	/* Expose the PCI resources from this device as files */
-	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 
 		/* skip empty resources */
 		if (!pci_resource_len(pdev, i))
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 4341ccf5c0c4..e7a59d927d78 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -824,7 +824,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 	if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
 		/* This should never occur! */
 		irq_err_count++;
-		pr_warning("PMI: silly index %ld\n", la_ptr);
+		pr_warn("PMI: silly index %ld\n", la_ptr);
 		wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
 		return;
 	}
@@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 	if (unlikely(!event)) {
 		/* This should never occur! */
 		irq_err_count++;
-		pr_warning("PMI: No event at index %d!\n", idx);
+		pr_warn("PMI: No event at index %d!\n", idx);
 		wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
 		return;
 	}
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 5d4c76a77a9f..f19aa577354b 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -655,8 +655,6 @@ setup_arch(char **cmdline_p)
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
 	conswitchp = &vga_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
 #endif
 #endif
 
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 728fe028c02c..36d42da7466a 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -89,10 +89,10 @@
 80	common	setgroups			sys_setgroups
 81	common	osf_old_getpgrp			sys_ni_syscall
 82	common	setpgrp				sys_setpgid
-83	common	osf_setitimer			sys_osf_setitimer
+83	common	osf_setitimer			compat_sys_setitimer
 84	common	osf_old_wait			sys_ni_syscall
 85	common	osf_table			sys_ni_syscall
-86	common	osf_getitimer			sys_osf_getitimer
+86	common	osf_getitimer			compat_sys_getitimer
 87	common	gethostname			sys_gethostname
 88	common	sethostname			sys_sethostname
 89	common	getdtablesize			sys_getdtablesize
@@ -475,3 +475,5 @@
 543	common	fspick				sys_fspick
 544	common	pidfd_open			sys_pidfd_open
 # 545 reserved for clone3
+547	common	openat2				sys_openat2
+548	common	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index c4b5ceceab52..bc6f727278fd 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -1,4 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/thread_info.h>
 #include <asm/cache.h>
@@ -8,7 +12,7 @@
 OUTPUT_FORMAT("elf64-alpha")
 OUTPUT_ARCH(alpha)
 ENTRY(__start)
-PHDRS { kernel PT_LOAD; note PT_NOTE; }
+PHDRS { text PT_LOAD; note PT_NOTE; }
 jiffies = jiffies_64;
 SECTIONS
 {
@@ -27,17 +31,11 @@ SECTIONS
 		LOCK_TEXT
 		*(.fixup)
 		*(.gnu.warning)
-	} :kernel
+	} :text
 	swapper_pg_dir = SWAPPER_PGD;
 	_etext = .;	/* End of text section */
 
-	NOTES :kernel :note
-	.dummy : {
-		*(.dummy)
-	} :kernel
-
-	RODATA
-	EXCEPTION_TABLE(16)
+	RO_DATA(4096)
 
 	/* Will be freed after init */
 	__init_begin = ALIGN(PAGE_SIZE);
@@ -52,7 +50,7 @@ SECTIONS
 
 	_sdata = .;	/* Start of rw data section */
 	_data = .;
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 
 	.got : {
 		*(.got)
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index e2cbec3789e8..12e218d3792a 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -146,6 +146,8 @@ callback_init(void * kernel_end)
 {
 	struct crb_struct * crb;
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	void *two_pages;
 
@@ -184,8 +186,10 @@ callback_init(void * kernel_end)
 	memset(two_pages, 0, 2*PAGE_SIZE);
 
 	pgd = pgd_offset_k(VMALLOC_START);
-	pgd_set(pgd, (pmd_t *)two_pages);
-	pmd = pmd_offset(pgd, VMALLOC_START);
+	p4d = p4d_offset(pgd, VMALLOC_START);
+	pud = pud_offset(p4d, VMALLOC_START);
+	pud_set(pud, (pmd_t *)two_pages);
+	pmd = pmd_offset(pud, VMALLOC_START);
 	pmd_set(pmd, (pte_t *)(two_pages + PAGE_SIZE));
 
 	if (alpha_using_srm) {
@@ -214,9 +218,9 @@ callback_init(void * kernel_end)
 				/* Newer consoles (especially on larger
 				   systems) may require more pages of
 				   PTEs. Grab additional pages as needed. */
-				if (pmd != pmd_offset(pgd, vaddr)) {
+				if (pmd != pmd_offset(pud, vaddr)) {
 					memset(kernel_end, 0, PAGE_SIZE);
-					pmd = pmd_offset(pgd, vaddr);
+					pmd = pmd_offset(pud, vaddr);
 					pmd_set(pmd, (pte_t *)kernel_end);
 					kernel_end += PAGE_SIZE;
 				}
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 8383155c8c82..5f448201955b 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -6,7 +6,6 @@
 config ARC
 	def_bool y
 	select ARC_TIMERS
-	select ARCH_HAS_DMA_COHERENT_TO_PFN
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_SETUP_DMA_OPS
@@ -14,7 +13,7 @@ config ARC
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
 	select ARCH_32BIT_OFF_T
-	select BUILDTIME_EXTABLE_SORT
+	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select DMA_DIRECT_REMAP
@@ -30,6 +29,7 @@ config ARC
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DEBUG_STACKOVERFLOW
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_IOREMAP_PROT
 	select HAVE_KERNEL_GZIP
@@ -46,6 +46,7 @@ config ARC
 	select OF_EARLY_FLATTREE
 	select PCI_SYSCALL if PCI
 	select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
+	select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
 
 config ARCH_HAS_CACHE_LINE_SIZE
 	def_bool y
@@ -525,6 +526,13 @@ config ARC_DW2_UNWIND
 config ARC_DBG_TLB_PARANOIA
 	bool "Paranoia Checks in Low Level TLB Handlers"
 
+config ARC_DBG_JUMP_LABEL
+	bool "Paranoid checks in Static Keys (jump labels) code"
+	depends on JUMP_LABEL
+	default y if STATIC_KEYS_SELFTEST
+	help
+	  Enable paranoid checks and self-test of both ARC-specific and generic
+	  part of static keys (jump labels) related code.
 endif
 
 config ARC_BUILTIN_DTB_NAME
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index f1c44cccf8d6..20e9ab6cc521 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -3,7 +3,7 @@
 # Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 #
 
-KBUILD_DEFCONFIG := nsim_hs_defconfig
+KBUILD_DEFCONFIG := haps_hs_smp_defconfig
 
 ifeq ($(CROSS_COMPILE),)
 CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-)
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
index 6ec1fcdfc0d7..79ec27c043c1 100644
--- a/arch/arc/boot/dts/axc001.dtsi
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -28,6 +28,12 @@
 			clock-frequency = <750000000>;
 		};
 
+		input_clk: input-clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <33333333>;
+		};
+
 		core_intc: arc700-intc@cpu {
 			compatible = "snps,arc700-intc";
 			interrupt-controller;
diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts
index 305a7f9658e0..c4cfc5f4f427 100644
--- a/arch/arc/boot/dts/axs101.dts
+++ b/arch/arc/boot/dts/axs101.dts
@@ -14,6 +14,6 @@
 	compatible = "snps,axs101", "snps,arc-sdp";
 
 	chosen {
-		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60 print-fatal-signals=1";
+		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 print-fatal-signals=1";
 	};
 };
diff --git a/arch/arc/boot/dts/axs103_idu.dts b/arch/arc/boot/dts/axs103_idu.dts
index 46c9136cbf2b..a934b92a8c30 100644
--- a/arch/arc/boot/dts/axs103_idu.dts
+++ b/arch/arc/boot/dts/axs103_idu.dts
@@ -17,6 +17,6 @@
 	compatible = "snps,axs103", "snps,arc-sdp";
 
 	chosen {
-		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 print-fatal-signals=1 consoleblank=0 video=1280x720@60";
+		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 print-fatal-signals=1 consoleblank=0";
 	};
 };
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index 08bcfed6b80f..f9a5c9ddcae7 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -61,12 +61,13 @@
 				clock-frequency = <25000000>;
 				#clock-cells = <0>;
 			};
+		};
 
-			pguclk: pguclk {
-				#clock-cells = <0>;
-				compatible = "fixed-clock";
-				clock-frequency = <74250000>;
-			};
+		pguclk: pguclk@10080 {
+			compatible = "snps,axs10x-pgu-pll-clock";
+			reg = <0x10080 0x10>, <0x110 0x10>;
+			#clock-cells = <0>;
+			clocks = <&input_clk>;
 		};
 
 		gmac: ethernet@18000 {
diff --git a/arch/arc/boot/dts/haps_hs.dts b/arch/arc/boot/dts/haps_hs.dts
index 44bc522fdec8..60d578e2781f 100644
--- a/arch/arc/boot/dts/haps_hs.dts
+++ b/arch/arc/boot/dts/haps_hs.dts
@@ -9,13 +9,15 @@
 / {
 	model = "snps,zebu_hs";
 	compatible = "snps,zebu_hs";
-	#address-cells = <1>;
-	#size-cells = <1>;
+	#address-cells = <2>;
+	#size-cells = <2>;
 	interrupt-parent = <&core_intc>;
 
 	memory {
 		device_type = "memory";
-		reg = <0x80000000 0x20000000>;	/* 512 */
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
+		reg = <0x0 0x80000000 0x0 0x20000000	/* 512 MB low mem */
+		       0x1 0x00000000 0x0 0x40000000>;	/* 1 GB highmem */
 	};
 
 	chosen {
@@ -31,8 +33,9 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 
-		/* child and parent address space 1:1 mapped */
-		ranges;
+		/* only perip space at end of low mem accessible
+			  bus addr,  parent bus addr, size    */
+		ranges = <0x80000000 0x0 0x80000000 0x80000000>;
 
 		core_clk: core_clk {
 			#clock-cells = <0>;
@@ -47,7 +50,7 @@
 		};
 
 		uart0: serial@f0000000 {
-			compatible = "ns8250";
+			compatible = "ns16550a";
 			reg = <0xf0000000 0x2000>;
 			interrupts = <24>;
 			clock-frequency = <50000000>;
diff --git a/arch/arc/boot/dts/haps_hs_idu.dts b/arch/arc/boot/dts/haps_hs_idu.dts
index 4d6971cf5f9f..738c76cd07b3 100644
--- a/arch/arc/boot/dts/haps_hs_idu.dts
+++ b/arch/arc/boot/dts/haps_hs_idu.dts
@@ -54,7 +54,6 @@
 		};
 
 		uart0: serial@f0000000 {
-			/* compatible = "ns8250"; Doesn't use FIFOs */
 			compatible = "ns16550a";
 			reg = <0xf0000000 0x2000>;
 			interrupt-parent = <&idu_intc>;
diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts
index 63dbaab1247d..f8832a15e174 100644
--- a/arch/arc/boot/dts/nsim_700.dts
+++ b/arch/arc/boot/dts/nsim_700.dts
@@ -14,11 +14,11 @@
 	interrupt-parent = <&core_intc>;
 
 	chosen {
-		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 print-fatal-signals=1";
 	};
 
 	aliases {
-		serial0 = &arcuart0;
+		serial0 = &uart0;
 	};
 
 	fpga {
@@ -41,29 +41,15 @@
 			#interrupt-cells = <1>;
 		};
 
-		arcuart0: serial@c0fc1000 {
-			compatible = "snps,arc-uart";
-			reg = <0xc0fc1000 0x100>;
-			interrupts = <5>;
-			clock-frequency = <80000000>;
-			current-speed = <115200>;
-			status = "okay";
-		};
-
-		ethernet@c0fc2000 {
-			compatible = "snps,arc-emac";
-			reg = <0xc0fc2000 0x3c>;
-			interrupts = <6>;
-			mac-address = [ 00 11 22 33 44 55 ];
-			clock-frequency = <80000000>;
-			max-speed = <100>;
-			phy = <&phy0>;
-
-			#address-cells = <1>;
-			#size-cells = <0>;
-			phy0: ethernet-phy@0 {
-				reg = <1>;
-			};
+		uart0: serial@f0000000 {
+			compatible = "ns16550a";
+			reg = <0xf0000000 0x2000>;
+			interrupts = <24>;
+			clock-frequency = <50000000>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
 		};
 
 		arcpct0: pct {
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
deleted file mode 100644
index 851798a5f4e3..000000000000
--- a/arch/arc/boot/dts/nsim_hs.dts
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
- */
-/dts-v1/;
-
-/include/ "skeleton_hs.dtsi"
-
-/ {
-	model = "snps,nsim_hs";
-	compatible = "snps,nsim_hs";
-	#address-cells = <2>;
-	#size-cells = <2>;
-	interrupt-parent = <&core_intc>;
-
-	memory {
-		device_type = "memory";
-		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
-		reg = <0x0 0x80000000 0x0 0x20000000	/* 512 MB low mem */
-		       0x1 0x00000000 0x0 0x40000000>;	/* 1 GB highmem */
-	};
-
-	chosen {
-		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
-	};
-
-	aliases {
-		serial0 = &arcuart0;
-	};
-
-	fpga {
-		compatible = "simple-bus";
-		#address-cells = <1>;
-		#size-cells = <1>;
-
-		/* only perip space at end of low mem accessible
-			 bus addr,   parent bus addr, size */
-		ranges = <0x80000000 0x0 0x80000000 0x80000000>;
-
-		core_clk: core_clk {
-			#clock-cells = <0>;
-			compatible = "fixed-clock";
-			clock-frequency = <80000000>;
-		};
-
-		core_intc: core-interrupt-controller {
-			compatible = "snps,archs-intc";
-			interrupt-controller;
-			#interrupt-cells = <1>;
-		};
-
-		arcuart0: serial@c0fc1000 {
-			compatible = "snps,arc-uart";
-			reg = <0xc0fc1000 0x100>;
-			interrupts = <24>;
-			clock-frequency = <80000000>;
-			current-speed = <115200>;
-			status = "okay";
-		};
-
-		arcpct0: pct {
-			compatible = "snps,archs-pct";
-			#interrupt-cells = <1>;
-			interrupts = <20>;
-		};
-	};
-};
diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts
deleted file mode 100644
index 6c559a0bd1f5..000000000000
--- a/arch/arc/boot/dts/nsim_hs_idu.dts
+++ /dev/null
@@ -1,65 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
- */
-/dts-v1/;
-
-/include/ "skeleton_hs_idu.dtsi"
-
-/ {
-	model = "snps,nsim_hs-smp";
-	compatible = "snps,nsim_hs";
-	interrupt-parent = <&core_intc>;
-
-	chosen {
-		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
-	};
-
-	aliases {
-		serial0 = &arcuart0;
-	};
-
-	fpga {
-		compatible = "simple-bus";
-		#address-cells = <1>;
-		#size-cells = <1>;
-
-		/* child and parent address space 1:1 mapped */
-		ranges;
-
-		core_clk: core_clk {
-			#clock-cells = <0>;
-			compatible = "fixed-clock";
-			clock-frequency = <80000000>;
-		};
-
-		core_intc: core-interrupt-controller {
-			compatible = "snps,archs-intc";
-			interrupt-controller;
-			#interrupt-cells = <1>;
-		};
-
-		idu_intc: idu-interrupt-controller {
-			compatible = "snps,archs-idu-intc";
-			interrupt-controller;
-			interrupt-parent = <&core_intc>;
-			#interrupt-cells = <1>;
-		};
-
-		arcuart0: serial@c0fc1000 {
-			compatible = "snps,arc-uart";
-			reg = <0xc0fc1000 0x100>;
-			interrupt-parent = <&idu_intc>;
-			interrupts = <0>;
-			clock-frequency = <80000000>;
-			current-speed = <115200>;
-			status = "okay";
-		};
-
-		arcpct0: pct {
-			compatible = "snps,archs-pct";
-			#interrupt-cells = <1>;
-			interrupts = <20>;
-		};
-	};
-};
diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig
index 47ff8a97e42d..7337cdf4ffdd 100644
--- a/arch/arc/configs/haps_hs_defconfig
+++ b/arch/arc/configs/haps_hs_defconfig
@@ -4,6 +4,7 @@ CONFIG_POSIX_MQUEUE=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NO_HZ_IDLE=y
 CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_NAMESPACES=y
@@ -15,13 +16,9 @@ CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_SLAB=y
+CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs"
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ISA_ARCV2=y
-CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs"
-CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -30,9 +27,6 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
@@ -42,21 +36,12 @@ CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_NETDEVICES=y
 CONFIG_VIRTIO_NET=y
-# CONFIG_NET_VENDOR_ARC is not set
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_VIA is not set
-# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_ETHERNET is not set
 # CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=y
-CONFIG_MOUSE_PS2_TOUCHKIT=y
-# CONFIG_SERIO_SERPORT is not set
-CONFIG_SERIO_ARC_PS2=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
@@ -66,9 +51,6 @@ CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_LOGO=y
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_VIRTIO_MMIO=y
diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig
index 9685fd5f57a4..bc927221afc0 100644
--- a/arch/arc/configs/haps_hs_smp_defconfig
+++ b/arch/arc/configs/haps_hs_smp_defconfig
@@ -4,6 +4,7 @@ CONFIG_POSIX_MQUEUE=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NO_HZ_IDLE=y
 CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_NAMESPACES=y
@@ -16,15 +17,11 @@ CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_SLAB=y
+CONFIG_SMP=y
+CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_idu"
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ISA_ARCV2=y
-CONFIG_SMP=y
-CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_idu"
-CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -33,9 +30,6 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
@@ -43,21 +37,12 @@ CONFIG_DEVTMPFS=y
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_BLK_DEV is not set
 CONFIG_NETDEVICES=y
-# CONFIG_NET_VENDOR_ARC is not set
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_VIA is not set
-# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_ETHERNET is not set
 # CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=y
-CONFIG_MOUSE_PS2_TOUCHKIT=y
-# CONFIG_SERIO_SERPORT is not set
-CONFIG_SERIO_ARC_PS2=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
@@ -67,9 +52,6 @@ CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_LOGO=y
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_IOMMU_SUPPORT is not set
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
index 5978d4d7d5b0..07f26ed39f02 100644
--- a/arch/arc/configs/nps_defconfig
+++ b/arch/arc/configs/nps_defconfig
@@ -7,7 +7,6 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
 # CONFIG_TIMERFD is not set
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index 2b9b11474640..326f6cde7826 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -4,6 +4,7 @@ CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_NAMESPACES=y
@@ -17,13 +18,10 @@ CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_ISA_ARCOMPACT=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
-CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -37,15 +35,18 @@ CONFIG_DEVTMPFS=y
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_BLK_DEV is not set
 CONFIG_NETDEVICES=y
-CONFIG_ARC_EMAC=y
-CONFIG_LXT_PHY=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_ETHERNET is not set
+# CONFIG_WLAN is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_ARC=y
-CONFIG_SERIAL_ARC_CONSOLE=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 # CONFIG_HID is not set
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
deleted file mode 100644
index bab3dd255841..000000000000
--- a/arch/arc/configs/nsim_hs_defconfig
+++ /dev/null
@@ -1,60 +0,0 @@
-# CONFIG_LOCALVERSION_AUTO is not set
-# CONFIG_SWAP is not set
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-# CONFIG_CROSS_MEMORY_ATTACH is not set
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NAMESPACES=y
-# CONFIG_UTS_NS is not set
-# CONFIG_PID_NS is not set
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
-CONFIG_PERF_EVENTS=y
-# CONFIG_SLUB_DEBUG is not set
-# CONFIG_COMPAT_BRK is not set
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ISA_ARCV2=y
-CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs"
-CONFIG_PREEMPT=y
-# CONFIG_COMPACTION is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_UNIX_DIAG=y
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-# CONFIG_IPV6 is not set
-CONFIG_DEVTMPFS=y
-# CONFIG_STANDALONE is not set
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_BLK_DEV is not set
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_ARC=y
-CONFIG_SERIAL_ARC_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_HID is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_TMPFS=y
-# CONFIG_MISC_FILESYSTEMS is not set
-CONFIG_NFS_FS=y
-# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_DEBUG_PREEMPT is not set
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
deleted file mode 100644
index 90d2d50fb8dc..000000000000
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ /dev/null
@@ -1,58 +0,0 @@
-# CONFIG_LOCALVERSION_AUTO is not set
-# CONFIG_SWAP is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NAMESPACES=y
-# CONFIG_UTS_NS is not set
-# CONFIG_PID_NS is not set
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
-CONFIG_PERF_EVENTS=y
-# CONFIG_SLUB_DEBUG is not set
-# CONFIG_COMPAT_BRK is not set
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ISA_ARCV2=y
-CONFIG_SMP=y
-CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu"
-CONFIG_PREEMPT=y
-# CONFIG_COMPACTION is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_UNIX_DIAG=y
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-# CONFIG_IPV6 is not set
-CONFIG_DEVTMPFS=y
-# CONFIG_STANDALONE is not set
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_BLK_DEV is not set
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_ARC=y
-CONFIG_SERIAL_ARC_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_HID is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_TMPFS=y
-# CONFIG_MISC_FILESYSTEMS is not set
-CONFIG_NFS_FS=y
-# CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index 3a138f8c7299..a12656ec0072 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig
@@ -15,7 +15,6 @@ CONFIG_INITRAMFS_ROOT_UID=2100
 CONFIG_INITRAMFS_ROOT_GID=501
 # CONFIG_RD_GZIP is not set
 CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_AIO is not set
 CONFIG_EMBEDDED=y
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 393d4f5e1450..1b505694691e 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -17,7 +17,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += mmiowb.h
-generic-y += msi.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 918804c7c1a4..d8ece4292388 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -25,6 +25,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/build_bug.h>
+
 /* Uncached access macros */
 #define arc_read_uncached_32(ptr)	\
 ({					\
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
index 41b16f21beec..0b8b63d0bec1 100644
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -162,7 +162,7 @@
 #endif
 
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
-	ST2	r58, r59, PT_sp + 12
+	ST2	r58, r59, PT_r58
 #endif
 
 .endm
@@ -172,8 +172,8 @@
 
 	LD2	gp, fp, PT_r26		; gp (r26), fp (r27)
 
-	ld	r12, [sp, PT_sp + 4]
-	ld	r30, [sp, PT_sp + 8]
+	ld	r12, [sp, PT_r12]
+	ld	r30, [sp, PT_r30]
 
 	; Restore SP (into AUX_USER_SP) only if returning to U mode
 	;  - for K mode, it will be implicitly restored as stack is unwound
@@ -190,7 +190,7 @@
 #endif
 
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
-	LD2	r58, r59, PT_sp + 12
+	LD2	r58, r59, PT_r58
 #endif
 .endm
 
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 66a292335ee6..c3aa775878dc 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -130,7 +130,7 @@
  * to be saved again on kernel mode stack, as part of pt_regs.
  *-------------------------------------------------------------*/
 .macro PROLOG_FREEUP_REG	reg, mem
-#ifdef CONFIG_SMP
+#ifndef ARC_USE_SCRATCH_REG
 	sr  \reg, [ARC_REG_SCRATCH_DATA0]
 #else
 	st  \reg, [\mem]
@@ -138,7 +138,7 @@
 .endm
 
 .macro PROLOG_RESTORE_REG	reg, mem
-#ifdef CONFIG_SMP
+#ifndef ARC_USE_SCRATCH_REG
 	lr  \reg, [ARC_REG_SCRATCH_DATA0]
 #else
 	ld  \reg, [\mem]
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 9a74ce71a767..30ac40fed2c5 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -8,7 +8,6 @@
 #define _ASM_ARC_HUGEPAGE_H
 
 #include <linux/types.h>
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline pte_t pmd_pte(pmd_t pmd)
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 72f7929736f8..8f777d6441a5 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -34,10 +34,6 @@ static inline void ioport_unmap(void __iomem *addr)
 
 extern void iounmap(const void __iomem *addr);
 
-#define ioremap_nocache(phy, sz)	ioremap(phy, sz)
-#define ioremap_wc(phy, sz)		ioremap(phy, sz)
-#define ioremap_wt(phy, sz)		ioremap(phy, sz)
-
 /*
  * io{read,write}{16,32}be() macros
  */
diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h
new file mode 100644
index 000000000000..9d9618079739
--- /dev/null
+++ b/arch/arc/include/asm/jump_label.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARC_JUMP_LABEL_H
+#define _ASM_ARC_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+
+#define JUMP_LABEL_NOP_SIZE 4
+
+/*
+ * NOTE about '.balign 4':
+ *
+ * To make atomic update of patched instruction available we need to guarantee
+ * that this instruction doesn't cross L1 cache line boundary.
+ *
+ * As of today we simply align instruction which can be patched by 4 byte using
+ * ".balign 4" directive. In that case patched instruction is aligned with one
+ * 16-bit NOP_S if this is required.
+ * However 'align by 4' directive is much stricter than it actually required.
+ * It's enough that our 32-bit instruction don't cross L1 cache line boundary /
+ * L1 I$ fetch block boundary which can be achieved by using
+ * ".bundle_align_mode" assembler directive. That will save us from adding
+ * useless NOP_S padding in most of the cases.
+ *
+ * TODO: switch to ".bundle_align_mode" directive using whin it will be
+ * supported by ARC toolchain.
+ */
+
+static __always_inline bool arch_static_branch(struct static_key *key,
+					       bool branch)
+{
+	asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"	\n"
+		 "1:							\n"
+		 "nop							\n"
+		 ".pushsection __jump_table, \"aw\"			\n"
+		 ".word 1b, %l[l_yes], %c0				\n"
+		 ".popsection						\n"
+		 : : "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key,
+						    bool branch)
+{
+	asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"	\n"
+		 "1:							\n"
+		 "b %l[l_yes]						\n"
+		 ".pushsection __jump_table, \"aw\"			\n"
+		 ".word 1b, %l[l_yes], %c0				\n"
+		 ".popsection						\n"
+		 : : "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+typedef u32 jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif  /* __ASSEMBLY__ */
+#endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 98cadf1a09ac..26b731d32a2b 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -40,6 +40,10 @@
 #define ARC_REG_SCRATCH_DATA0	0x46c
 #endif
 
+#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP)
+#define	ARC_USE_SCRATCH_REG
+#endif
+
 /* Bits in MMU PID register */
 #define __TLB_ENABLE		(1 << 31)
 #define __PROG_ENABLE		(1 << 30)
@@ -63,6 +67,8 @@
 #if (CONFIG_ARC_MMU_VER >= 2)
 #define TLBWriteNI  0x5		/* write JTLB without inv uTLBs */
 #define TLBIVUTLB   0x6		/* explicitly inv uTLBs */
+#else
+#define TLBWriteNI  TLBWrite	/* Not present in hardware, fallback */
 #endif
 
 #if (CONFIG_ARC_MMU_VER >= 4)
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 035470816be5..3a5e6a5b9ed6 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -144,7 +144,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 */
 	cpumask_set_cpu(cpu, mm_cpumask(next));
 
-#ifndef CONFIG_SMP
+#ifdef ARC_USE_SCRATCH_REG
 	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
 	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
 #endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 7addd0301c51..9019ed9f9c94 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -33,7 +33,6 @@
 #define _ASM_ARC_PGTABLE_H
 
 #include <linux/bits.h>
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #include <asm/page.h>
 #include <asm/mmu.h>	/* to propagate CONFIG_ARC_MMU_VER <n> */
@@ -351,7 +350,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
  * Thus use this macro only when you are certain that "current" is current
  * e.g. when dealing with signal frame setup code etc
  */
-#ifndef CONFIG_SMP
+#ifdef ARC_USE_SCRATCH_REG
 #define pgd_offset_fast(mm, addr)	\
 ({					\
 	pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0);  \
diff --git a/arch/arc/include/asm/vmalloc.h b/arch/arc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..973095aad665
--- /dev/null
+++ b/arch/arc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_ARC_VMALLOC_H
+#define _ASM_ARC_VMALLOC_H
+
+#endif /* _ASM_ARC_VMALLOC_H */
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index de6251132310..e784f5396dda 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_ARC_EMUL_UNALIGNED) 	+= unaligned.o
 obj-$(CONFIG_KGDB)			+= kgdb.o
 obj-$(CONFIG_ARC_METAWARE_HLINK)	+= arc_hostlink.o
 obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
+obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 
 obj-$(CONFIG_ARC_FPU_SAVE_RESTORE)	+= fpu.o
 CFLAGS_fpu.o   += -mdpfp
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
index 1f621e416521..c783bcd35eb8 100644
--- a/arch/arc/kernel/asm-offsets.c
+++ b/arch/arc/kernel/asm-offsets.c
@@ -66,7 +66,15 @@ int main(void)
 
 	DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
 	DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
-	DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
+
+#ifdef CONFIG_ISA_ARCV2
+	OFFSET(PT_r12, pt_regs, r12);
+	OFFSET(PT_r30, pt_regs, r30);
+#endif
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	OFFSET(PT_r58, pt_regs, r58);
+	OFFSET(PT_r59, pt_regs, r59);
+#endif
 
 	return 0;
 }
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 72be01270e24..1f6bb184a44d 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -337,11 +337,11 @@ resume_user_mode_begin:
 resume_kernel_mode:
 
 	; Disable Interrupts from this point on
-	; CONFIG_PREEMPT: This is a must for preempt_schedule_irq()
-	; !CONFIG_PREEMPT: To ensure restore_regs is intr safe
+	; CONFIG_PREEMPTION: This is a must for preempt_schedule_irq()
+	; !CONFIG_PREEMPTION: To ensure restore_regs is intr safe
 	IRQ_DISABLE	r9
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 
 	; Can't preempt if preemption disabled
 	GET_CURR_THR_INFO_FROM_SP   r10
diff --git a/arch/arc/kernel/jump_label.c b/arch/arc/kernel/jump_label.c
new file mode 100644
index 000000000000..b8600dc325b5
--- /dev/null
+++ b/arch/arc/kernel/jump_label.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+
+#include "asm/cacheflush.h"
+
+#define JUMPLABEL_ERR	"ARC: jump_label: ERROR: "
+
+/* Halt system on fatal error to make debug easier */
+#define arc_jl_fatal(format...)						\
+({									\
+	pr_err(JUMPLABEL_ERR format);					\
+	BUG();								\
+})
+
+static inline u32 arc_gen_nop(void)
+{
+	/* 1x 32bit NOP in middle endian */
+	return 0x7000264a;
+}
+
+/*
+ * Atomic update of patched instruction is only available if this
+ * instruction doesn't cross L1 cache line boundary. You can read about
+ * the way we achieve this in arc/include/asm/jump_label.h
+ */
+static inline void instruction_align_assert(void *addr, int len)
+{
+	unsigned long a = (unsigned long)addr;
+
+	if ((a >> L1_CACHE_SHIFT) != ((a + len - 1) >> L1_CACHE_SHIFT))
+		arc_jl_fatal("instruction (addr %px) cross L1 cache line border",
+			     addr);
+}
+
+/*
+ * ARCv2 'Branch unconditionally' instruction:
+ * 00000ssssssssss1SSSSSSSSSSNRtttt
+ * s S[n:0] lower bits signed immediate (number is bitfield size)
+ * S S[m:n+1] upper bits signed immediate (number is bitfield size)
+ * t S[24:21] upper bits signed immediate (branch unconditionally far)
+ * N N <.d> delay slot mode
+ * R R Reserved
+ */
+static inline u32 arc_gen_branch(jump_label_t pc, jump_label_t target)
+{
+	u32 instruction_l, instruction_r;
+	u32 pcl = pc & GENMASK(31, 2);
+	u32 u_offset = target - pcl;
+	u32 s, S, t;
+
+	/*
+	 * Offset in 32-bit branch instruction must to fit into s25.
+	 * Something is terribly broken if we get such huge offset within one
+	 * function.
+	 */
+	if ((s32)u_offset < -16777216 || (s32)u_offset > 16777214)
+		arc_jl_fatal("gen branch with offset (%d) not fit in s25",
+			     (s32)u_offset);
+
+	/*
+	 * All instructions are aligned by 2 bytes so we should never get offset
+	 * here which is not 2 bytes aligned.
+	 */
+	if (u_offset & 0x1)
+		arc_jl_fatal("gen branch with offset (%d) unaligned to 2 bytes",
+			     (s32)u_offset);
+
+	s = (u_offset >> 1)  & GENMASK(9, 0);
+	S = (u_offset >> 11) & GENMASK(9, 0);
+	t = (u_offset >> 21) & GENMASK(3, 0);
+
+	/* 00000ssssssssss1 */
+	instruction_l = (s << 1) | 0x1;
+	/* SSSSSSSSSSNRtttt */
+	instruction_r = (S << 6) | t;
+
+	return (instruction_r << 16) | (instruction_l & GENMASK(15, 0));
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	jump_label_t *instr_addr = (jump_label_t *)entry->code;
+	u32 instr;
+
+	instruction_align_assert(instr_addr, JUMP_LABEL_NOP_SIZE);
+
+	if (type == JUMP_LABEL_JMP)
+		instr = arc_gen_branch(entry->code, entry->target);
+	else
+		instr = arc_gen_nop();
+
+	WRITE_ONCE(*instr_addr, instr);
+	flush_icache_range(entry->code, entry->code + JUMP_LABEL_NOP_SIZE);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+				      enum jump_label_type type)
+{
+	/*
+	 * We use only one NOP type (1x, 4 byte) in arch_static_branch, so
+	 * there's no need to patch an identical NOP over the top of it here.
+	 * The generic code calls 'arch_jump_label_transform' if the NOP needs
+	 * to be replaced by a branch, so 'arch_jump_label_transform_static' is
+	 * never called with type other than JUMP_LABEL_NOP.
+	 */
+	BUG_ON(type != JUMP_LABEL_NOP);
+}
+
+#ifdef CONFIG_ARC_DBG_JUMP_LABEL
+#define SELFTEST_MSG	"ARC: instruction generation self-test: "
+
+struct arc_gen_branch_testdata {
+	jump_label_t pc;
+	jump_label_t target_address;
+	u32 expected_instr;
+};
+
+static __init int branch_gen_test(const struct arc_gen_branch_testdata *test)
+{
+	u32 instr_got;
+
+	instr_got = arc_gen_branch(test->pc, test->target_address);
+	if (instr_got == test->expected_instr)
+		return 0;
+
+	pr_err(SELFTEST_MSG "FAIL:\n arc_gen_branch(0x%08x, 0x%08x) != 0x%08x, got 0x%08x\n",
+	       test->pc, test->target_address,
+	       test->expected_instr, instr_got);
+
+	return -EFAULT;
+}
+
+/*
+ * Offset field in branch instruction is not continuous. Test all
+ * available offset field and sign combinations. Test data is generated
+ * from real working code.
+ */
+static const struct arc_gen_branch_testdata arcgenbr_test_data[] __initconst = {
+	{0x90007548, 0x90007514, 0xffcf07cd}, /* tiny (-52) offs */
+	{0x9000c9c0, 0x9000c782, 0xffcf05c3}, /* tiny (-574) offs */
+	{0x9000cc1c, 0x9000c782, 0xffcf0367}, /* tiny (-1178) offs */
+	{0x9009dce0, 0x9009d106, 0xff8f0427}, /* small (-3034) offs */
+	{0x9000f5de, 0x90007d30, 0xfc0f0755}, /* big  (-30892) offs */
+	{0x900a2444, 0x90035f64, 0xc9cf0321}, /* huge (-443616) offs */
+	{0x90007514, 0x9000752c, 0x00000019}, /* tiny (+24) offs */
+	{0x9001a578, 0x9001a77a, 0x00000203}, /* tiny (+514) offs */
+	{0x90031ed8, 0x90032634, 0x0000075d}, /* tiny (+1884) offs */
+	{0x9008c7f2, 0x9008d3f0, 0x00400401}, /* small (+3072) offs */
+	{0x9000bb38, 0x9003b340, 0x17c00009}, /* big  (+194568) offs */
+	{0x90008f44, 0x90578d80, 0xb7c2063d}  /* huge (+5701180) offs */
+};
+
+static __init int instr_gen_test(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(arcgenbr_test_data); i++)
+		if (branch_gen_test(&arcgenbr_test_data[i]))
+			return -EFAULT;
+
+	pr_info(SELFTEST_MSG "OK\n");
+
+	return 0;
+}
+early_initcall(instr_gen_test);
+
+#endif /* CONFIG_ARC_DBG_JUMP_LABEL */
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 7ee89dc61f6e..e1c647490f00 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -572,10 +572,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	root_mountflags &= ~MS_RDONLY;
 
-#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
-#endif
-
 	arc_unwind_init();
 }
 
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index dc05a63516f5..27ea64b1fa33 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -42,10 +42,10 @@ do {						\
 
 #define EXTRA_INFO(f) { \
 		BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
-				% FIELD_SIZEOF(struct unwind_frame_info, f)) \
+				% sizeof_field(struct unwind_frame_info, f)) \
 				+ offsetof(struct unwind_frame_info, f) \
-				/ FIELD_SIZEOF(struct unwind_frame_info, f), \
-				FIELD_SIZEOF(struct unwind_frame_info, f) \
+				/ sizeof_field(struct unwind_frame_info, f), \
+				sizeof_field(struct unwind_frame_info, f) \
 	}
 #define PTREGS_INFO(f) EXTRA_INFO(regs.f)
 
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index 6c693a9d29b6..54139a6f469b 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -95,13 +95,13 @@ SECTIONS
 	_etext = .;
 
 	_sdata = .;
-	RO_DATA_SECTION(PAGE_SIZE)
+	RO_DATA(PAGE_SIZE)
 
 	/*
 	 * 1. this is .data essentially
 	 * 2. THREAD_SIZE for init.task, must be kernel-stk sz aligned
 	 */
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 
 	_edata = .;
 
@@ -118,8 +118,6 @@ SECTIONS
 	/DISCARD/ : {	*(.eh_frame) }
 #endif
 
-	NOTES
-
 	. = ALIGN(PAGE_SIZE);
 	_end = . ;
 
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 73a7e88a1e92..e947572a521e 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -48,8 +48,8 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
  * upper layer functions (in include/linux/dma-mapping.h)
  */
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_TO_DEVICE:
@@ -69,8 +69,8 @@ void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
 	}
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_TO_DEVICE:
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 3861543b66a0..fb86bc3e9b35 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -30,6 +30,7 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
 	 * with the 'reference' page table.
 	 */
 	pgd_t *pgd, *pgd_k;
+	p4d_t *p4d, *p4d_k;
 	pud_t *pud, *pud_k;
 	pmd_t *pmd, *pmd_k;
 
@@ -39,8 +40,13 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
 	if (!pgd_present(*pgd_k))
 		goto bad_area;
 
-	pud = pud_offset(pgd, address);
-	pud_k = pud_offset(pgd_k, address);
+	p4d = p4d_offset(pgd, address);
+	p4d_k = p4d_offset(pgd_k, address);
+	if (!p4d_present(*p4d_k))
+		goto bad_area;
+
+	pud = pud_offset(p4d, address);
+	pud_k = pud_offset(p4d_k, address);
 	if (!pud_present(*pud_k))
 		goto bad_area;
 
diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index a4856bfaedf3..fc8849e4f72e 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -111,12 +111,14 @@ EXPORT_SYMBOL(__kunmap_atomic);
 static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr)
 {
 	pgd_t *pgd_k;
+	p4d_t *p4d_k;
 	pud_t *pud_k;
 	pmd_t *pmd_k;
 	pte_t *pte_k;
 
 	pgd_k = pgd_offset_k(kvaddr);
-	pud_k = pud_offset(pgd_k, kvaddr);
+	p4d_k = p4d_offset(pgd_k, kvaddr);
+	pud_k = pud_offset(p4d_k, kvaddr);
 	pmd_k = pmd_offset(pud_k, kvaddr);
 
 	pte_k = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 10025e199353..c340acd989a0 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -118,6 +118,33 @@ static inline void __tlb_entry_erase(void)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
+static void utlb_invalidate(void)
+{
+#if (CONFIG_ARC_MMU_VER >= 2)
+
+#if (CONFIG_ARC_MMU_VER == 2)
+	/* MMU v2 introduced the uTLB Flush command.
+	 * There was however an obscure hardware bug, where uTLB flush would
+	 * fail when a prior probe for J-TLB (both totally unrelated) would
+	 * return lkup err - because the entry didn't exist in MMU.
+	 * The Workround was to set Index reg with some valid value, prior to
+	 * flush. This was fixed in MMU v3
+	 */
+	unsigned int idx;
+
+	/* make sure INDEX Reg is valid */
+	idx = read_aux_reg(ARC_REG_TLBINDEX);
+
+	/* If not write some dummy val */
+	if (unlikely(idx & TLB_LKUP_ERR))
+		write_aux_reg(ARC_REG_TLBINDEX, 0xa);
+#endif
+
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
+#endif
+
+}
+
 #if (CONFIG_ARC_MMU_VER < 4)
 
 static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
@@ -149,44 +176,6 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
 	}
 }
 
-/****************************************************************************
- * ARC700 MMU caches recently used J-TLB entries (RAM) as uTLBs (FLOPs)
- *
- * New IVUTLB cmd in MMU v2 explictly invalidates the uTLB
- *
- * utlb_invalidate ( )
- *  -For v2 MMU calls Flush uTLB Cmd
- *  -For v1 MMU does nothing (except for Metal Fix v1 MMU)
- *      This is because in v1 TLBWrite itself invalidate uTLBs
- ***************************************************************************/
-
-static void utlb_invalidate(void)
-{
-#if (CONFIG_ARC_MMU_VER >= 2)
-
-#if (CONFIG_ARC_MMU_VER == 2)
-	/* MMU v2 introduced the uTLB Flush command.
-	 * There was however an obscure hardware bug, where uTLB flush would
-	 * fail when a prior probe for J-TLB (both totally unrelated) would
-	 * return lkup err - because the entry didn't exist in MMU.
-	 * The Workround was to set Index reg with some valid value, prior to
-	 * flush. This was fixed in MMU v3 hence not needed any more
-	 */
-	unsigned int idx;
-
-	/* make sure INDEX Reg is valid */
-	idx = read_aux_reg(ARC_REG_TLBINDEX);
-
-	/* If not write some dummy val */
-	if (unlikely(idx & TLB_LKUP_ERR))
-		write_aux_reg(ARC_REG_TLBINDEX, 0xa);
-#endif
-
-	write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
-#endif
-
-}
-
 static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 {
 	unsigned int idx;
@@ -219,11 +208,6 @@ static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 
 #else	/* CONFIG_ARC_MMU_VER >= 4) */
 
-static void utlb_invalidate(void)
-{
-	/* No need since uTLB is always in sync with JTLB */
-}
-
 static void tlb_entry_erase(unsigned int vaddr_n_asid)
 {
 	write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT);
@@ -267,7 +251,7 @@ noinline void local_flush_tlb_all(void)
 	for (entry = 0; entry < num_tlb; entry++) {
 		/* write this entry to the TLB */
 		write_aux_reg(ARC_REG_TLBINDEX, entry);
-		write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+		write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI);
 	}
 
 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
@@ -278,7 +262,7 @@ noinline void local_flush_tlb_all(void)
 
 		for (entry = stlb_idx; entry < stlb_idx + 16; entry++) {
 			write_aux_reg(ARC_REG_TLBINDEX, entry);
-			write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+			write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI);
 		}
 	}
 
@@ -355,8 +339,6 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		}
 	}
 
-	utlb_invalidate();
-
 	local_irq_restore(flags);
 }
 
@@ -385,8 +367,6 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 		start += PAGE_SIZE;
 	}
 
-	utlb_invalidate();
-
 	local_irq_restore(flags);
 }
 
@@ -407,7 +387,6 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 
 	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
 		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu));
-		utlb_invalidate();
 	}
 
 	local_irq_restore(flags);
@@ -868,7 +847,7 @@ void arc_mmu_init(void)
 	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
 
 	/* In smp we use this reg for interrupt 1 scratch */
-#ifndef CONFIG_SMP
+#ifdef ARC_USE_SCRATCH_REG
 	/* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
 	write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
 #endif
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index c55d95dd2f39..2efaf6ca0c06 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -122,17 +122,27 @@ ex_saved_reg1:
 #else	/* ARCv2 */
 
 .macro TLBMISS_FREEUP_REGS
+#ifdef CONFIG_ARC_HAS_LL64
+	std   r0, [sp, -16]
+	std   r2, [sp, -8]
+#else
 	PUSH  r0
 	PUSH  r1
 	PUSH  r2
 	PUSH  r3
+#endif
 .endm
 
 .macro TLBMISS_RESTORE_REGS
+#ifdef CONFIG_ARC_HAS_LL64
+	ldd   r0, [sp, -16]
+	ldd   r2, [sp, -8]
+#else
 	POP   r3
 	POP   r2
 	POP   r1
 	POP   r0
+#endif
 .endm
 
 #endif
@@ -193,7 +203,7 @@ ex_saved_reg1:
 
 	lr  r2, [efa]
 
-#ifndef CONFIG_SMP
+#ifdef ARC_USE_SCRATCH_REG
 	lr  r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
 #else
 	GET_CURR_TASK_ON_CPU  r1
@@ -282,11 +292,7 @@ ex_saved_reg1:
 	sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
 
 	/* Commit the Write */
-#if (CONFIG_ARC_MMU_VER >= 2)   /* introduced in v2 */
 	sr TLBWriteNI, [ARC_REG_TLBCOMMAND]
-#else
-	sr TLBWrite, [ARC_REG_TLBCOMMAND]
-#endif
 
 #else
 	sr TLBInsertEntry, [ARC_REG_TLBCOMMAND]
@@ -370,9 +376,7 @@ ENTRY(EV_TLBMissD)
 
 	;----------------------------------------------------------------
 	; UPDATE_PTE: Let Linux VM know that page was accessed/dirty
-	lr      r3, [ecr]
 	or      r0, r0, _PAGE_ACCESSED        ; Accessed bit always
-	btst_s  r3,  ECR_C_BIT_DTLB_ST_MISS   ; See if it was a Write Access ?
 	or.nz   r0, r0, _PAGE_DIRTY           ; if Write, set Dirty bit as well
 	st_s    r0, [r1]                      ; Write back PTE
 
diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig
index a376a50d3fea..a931d0a256d0 100644
--- a/arch/arc/plat-eznps/Kconfig
+++ b/arch/arc/plat-eznps/Kconfig
@@ -7,7 +7,7 @@
 menuconfig ARC_PLAT_EZNPS
 	bool "\"EZchip\" ARC dev platform"
 	select CPU_BIG_ENDIAN
-	select CLKSRC_NPS
+	select CLKSRC_NPS if !PHYS_ADDR_T_64BIT
 	select EZNPS_GIC
 	select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET
 	help
diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c
index 3765dedcd319..2bde2a6e336a 100644
--- a/arch/arc/plat-sim/platform.c
+++ b/arch/arc/plat-sim/platform.c
@@ -21,7 +21,6 @@ static const char *simulation_compat[] __initconst = {
 	"snps,nsim",
 	"snps,nsimosci",
 #else
-	"snps,nsim_hs",
 	"snps,nsimosci_hs",
 	"snps,zebu_hs",
 #endif
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8a50efb559f3..0b1b1c66bce9 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -7,7 +7,6 @@ config ARM
 	select ARCH_HAS_BINFMT_FLAT
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
-	select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB
 	select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FORTIFY_SOURCE
@@ -37,7 +36,7 @@ config ARM
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
-	select BUILDTIME_EXTABLE_SORT if MMU
+	select BUILDTIME_TABLE_SORT if MMU
 	select CLONE_BACKWARDS
 	select CPU_PM if SUSPEND || CPU_IDLE
 	select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -73,6 +72,7 @@ config ARM
 	select HAVE_ARM_SMCCC if CPU_V7
 	select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
 	select HAVE_CONTEXT_TRACKING
+	select HAVE_COPY_THREAD_TLS
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS if MMU
@@ -117,7 +117,6 @@ config ARM
 	select OLD_SIGSUSPEND3
 	select PCI_SYSCALL if PCI
 	select PERF_USE_VMALLOC
-	select REFCOUNT_FULL
 	select RTC_LIB
 	select SYS_SUPPORTS_APM_EMULATION
 	# Above selects are sorted alphabetically; please add new ones
@@ -1020,7 +1019,7 @@ config ARM_ERRATA_775420
        depends on CPU_V7
        help
 	 This option enables the workaround for the 775420 Cortex-A9 (r2p2,
-	 r2p6,r2p8,r2p10,r3p0) erratum. In case a date cache maintenance
+	 r2p6,r2p8,r2p10,r3p0) erratum. In case a data cache maintenance
 	 operation aborts with MMU exception, it might cause the processor
 	 to deadlock. This workaround puts DSB before executing ISB if
 	 an abort may occur on cache maintenance.
@@ -1359,7 +1358,7 @@ config ARCH_NR_GPIO
 	int
 	default 2048 if ARCH_SOCFPGA
 	default 1024 if ARCH_BRCMSTB || ARCH_RENESAS || ARCH_TEGRA || \
-		ARCH_ZYNQ
+		ARCH_ZYNQ || ARCH_ASPEED
 	default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 || \
 		SOC_DRA7XX || ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210
 	default 416 if ARCH_SUNXI
diff --git a/arch/arm/boot/bootp/init.S b/arch/arm/boot/bootp/init.S
index 5c476bd2b4ce..b562da2f7040 100644
--- a/arch/arm/boot/bootp/init.S
+++ b/arch/arm/boot/bootp/init.S
@@ -13,7 +13,7 @@
  *  size immediately following the kernel, we could build this into
  *  a binary blob, and concatenate the zImage using the cat command.
  */
-		.section .start,#alloc,#execinstr
+		.section .start, "ax"
 		.type	_start, #function
 		.globl	_start
 
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 9219389bbe61..a1e883c5e5c4 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -121,7 +121,7 @@ ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE
 
 # Supply kernel BSS size to the decompressor via a linker symbol.
-KBSS_SZ = $(shell echo $$(($$($(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \
+KBSS_SZ = $(shell echo $$(($$($(NM) $(obj)/../../../../vmlinux | \
 		sed -n -e 's/^\([^ ]*\) [AB] __bss_start$$/-0x\1/p' \
 		       -e 's/^\([^ ]*\) [AB] __bss_stop$$/+0x\1/p') )) )
 LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ)
@@ -165,7 +165,7 @@ $(obj)/bswapsdi2.S: $(srctree)/arch/$(SRCARCH)/lib/bswapsdi2.S
 # The .data section is already discarded by the linker script so no need
 # to bother about it here.
 check_for_bad_syms = \
-bad_syms=$$($(CROSS_COMPILE)nm $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \
+bad_syms=$$($(NM) $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \
 [ -z "$$bad_syms" ] || \
   ( echo "following symbols must have non local/private scope:" >&2; \
     echo "$$bad_syms" >&2; false )
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
index 330cd3c2eae5..64c49747f8a3 100644
--- a/arch/arm/boot/compressed/atags_to_fdt.c
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -19,7 +19,7 @@ static int node_offset(void *fdt, const char *node_path)
 }
 
 static int setprop(void *fdt, const char *node_path, const char *property,
-		   uint32_t *val_array, int size)
+		   void *val_array, int size)
 {
 	int offset = node_offset(fdt, node_path);
 	if (offset < 0)
@@ -60,7 +60,7 @@ static uint32_t get_cell_size(const void *fdt)
 {
 	int len;
 	uint32_t cell_size = 1;
-	const uint32_t *size_len =  getprop(fdt, "/", "#size-cells", &len);
+	const __be32 *size_len =  getprop(fdt, "/", "#size-cells", &len);
 
 	if (size_len)
 		cell_size = fdt32_to_cpu(*size_len);
@@ -129,7 +129,7 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 	struct tag *atag = atag_list;
 	/* In the case of 64 bits memory size, need to reserve 2 cells for
 	 * address and size for each bank */
-	uint32_t mem_reg_property[2 * 2 * NR_BANKS];
+	__be32 mem_reg_property[2 * 2 * NR_BANKS];
 	int memcount = 0;
 	int ret, memsize;
 
@@ -138,7 +138,7 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 		return 1;
 
 	/* if we get a DTB here we're done already */
-	if (*(u32 *)atag_list == fdt32_to_cpu(FDT_MAGIC))
+	if (*(__be32 *)atag_list == cpu_to_fdt32(FDT_MAGIC))
 	       return 0;
 
 	/* validate the ATAG */
@@ -177,8 +177,8 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 				/* if memsize is 2, that means that
 				 * each data needs 2 cells of 32 bits,
 				 * so the data are 64 bits */
-				uint64_t *mem_reg_prop64 =
-					(uint64_t *)mem_reg_property;
+				__be64 *mem_reg_prop64 =
+					(__be64 *)mem_reg_property;
 				mem_reg_prop64[memcount++] =
 					cpu_to_fdt64(atag->u.mem.start);
 				mem_reg_prop64[memcount++] =
diff --git a/arch/arm/boot/compressed/big-endian.S b/arch/arm/boot/compressed/big-endian.S
index 88e2a88d324b..0e092c36da2f 100644
--- a/arch/arm/boot/compressed/big-endian.S
+++ b/arch/arm/boot/compressed/big-endian.S
@@ -6,7 +6,7 @@
  *  Author: Nicolas Pitre
  */
 
-	.section ".start", #alloc, #execinstr
+	.section ".start", "ax"
 
 	mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 	orr	r0, r0, #(1 << 7)	@ enable big endian mode
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 93dffed0ac6e..ead21e5f2b80 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -140,7 +140,7 @@
 #endif
 		.endm
 
-		.section ".start", #alloc, #execinstr
+		.section ".start", "ax"
 /*
  * sort out different calling conventions
  */
@@ -1273,7 +1273,7 @@ iflush:
 __armv5tej_mmu_cache_flush:
 		tst	r4, #1
 		movne	pc, lr
-1:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
+1:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
 		bne	1b
 		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
 		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h
index b36c0289a308..6a0f1f524466 100644
--- a/arch/arm/boot/compressed/libfdt_env.h
+++ b/arch/arm/boot/compressed/libfdt_env.h
@@ -2,11 +2,13 @@
 #ifndef _ARM_LIBFDT_ENV_H
 #define _ARM_LIBFDT_ENV_H
 
+#include <linux/limits.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <asm/byteorder.h>
 
-#define INT_MAX			((int)(~0U>>1))
+#define INT32_MAX	S32_MAX
+#define UINT32_MAX	U32_MAX
 
 typedef __be16 fdt16_t;
 typedef __be32 fdt32_t;
diff --git a/arch/arm/boot/compressed/piggy.S b/arch/arm/boot/compressed/piggy.S
index 0284f84dcf38..27577644ee72 100644
--- a/arch/arm/boot/compressed/piggy.S
+++ b/arch/arm/boot/compressed/piggy.S
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-	.section .piggydata,#alloc
+	.section .piggydata, "a"
 	.globl	input_data
 input_data:
 	.incbin	"arch/arm/boot/compressed/piggy_data"
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index b21b3a64641a..08011dc8c7a6 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -45,7 +45,8 @@ dtb-$(CONFIG_SOC_AT91SAM9) += \
 	at91sam9x25ek.dtb \
 	at91sam9x35ek.dtb
 dtb-$(CONFIG_SOC_SAM_V7) += \
-	at91-kizbox2.dtb \
+	at91-kizbox2-2.dtb \
+	at91-kizbox3-hs.dtb \
 	at91-nattis-2-natte-2.dtb \
 	at91-sama5d27_som1_ek.dtb \
 	at91-sama5d2_ptc_ek.dtb \
@@ -83,6 +84,7 @@ dtb-$(CONFIG_ARCH_BCM2835) += \
 	bcm2837-rpi-3-b.dtb \
 	bcm2837-rpi-3-b-plus.dtb \
 	bcm2837-rpi-cm3-io3.dtb \
+	bcm2711-rpi-4-b.dtb \
 	bcm2835-rpi-zero.dtb \
 	bcm2835-rpi-zero-w.dtb
 dtb-$(CONFIG_ARCH_BCM_5301X) += \
@@ -113,6 +115,7 @@ dtb-$(CONFIG_ARCH_BCM_5301X) += \
 	bcm47094-luxul-abr-4500.dtb \
 	bcm47094-luxul-xap-1610.dtb \
 	bcm47094-luxul-xbr-4500.dtb \
+	bcm47094-luxul-xwc-2000.dtb \
 	bcm47094-luxul-xwr-3100.dtb \
 	bcm47094-luxul-xwr-3150-v1.dtb \
 	bcm47094-netgear-r8500.dtb \
@@ -337,7 +340,8 @@ dtb-$(CONFIG_ARCH_MMP) += \
 	pxa168-aspenite.dtb \
 	pxa910-dkb.dtb \
 	mmp2-brownstone.dtb \
-	mmp2-olpc-xo-1-75.dtb
+	mmp2-olpc-xo-1-75.dtb \
+	mmp3-dell-ariel.dtb
 dtb-$(CONFIG_ARCH_MPS2) += \
 	mps2-an385.dtb \
 	mps2-an399.dtb
@@ -552,7 +556,8 @@ dtb-$(CONFIG_SOC_IMX6SL) += \
 	imx6sl-evk.dtb \
 	imx6sl-warp.dtb
 dtb-$(CONFIG_SOC_IMX6SLL) += \
-	imx6sll-evk.dtb
+	imx6sll-evk.dtb \
+	imx6sll-kobo-clarahd.dtb
 dtb-$(CONFIG_SOC_IMX6SX) += \
 	imx6sx-nitrogen6sx.dtb \
 	imx6sx-sabreauto.dtb \
@@ -583,6 +588,7 @@ dtb-$(CONFIG_SOC_IMX6UL) += \
 	imx6ull-14x14-evk.dtb \
 	imx6ull-colibri-eval-v3.dtb \
 	imx6ull-colibri-wifi-eval-v3.dtb \
+	imx6ull-opos6uldev.dtb \
 	imx6ull-phytec-segin-ff-rdk-nand.dtb \
 	imx6ull-phytec-segin-ff-rdk-emmc.dtb \
 	imx6ull-phytec-segin-lc-rdk-nand.dtb \
@@ -753,6 +759,9 @@ dtb-$(CONFIG_SOC_AM33XX) += \
 	am335x-moxa-uc-2101.dtb \
 	am335x-moxa-uc-8100-me-t.dtb \
 	am335x-nano.dtb \
+	am335x-netcan-plus-1xx.dtb \
+	am335x-netcom-plus-2xx.dtb \
+	am335x-netcom-plus-8xx.dtb \
 	am335x-pdu001.dtb \
 	am335x-pepper.dtb \
 	am335x-phycore-rdk.dtb \
@@ -765,6 +774,7 @@ dtb-$(CONFIG_SOC_AM33XX) += \
 	am335x-wega-rdk.dtb \
 	am335x-osd3358-sm-red.dtb
 dtb-$(CONFIG_ARCH_OMAP4) += \
+	omap4-droid-bionic-xt875.dtb \
 	omap4-droid4-xt894.dtb \
 	omap4-duovero-parlor.dtb \
 	omap4-kc1.dtb \
@@ -1105,6 +1115,7 @@ dtb-$(CONFIG_MACH_SUN8I) += \
 	sun8i-h3-beelink-x2.dtb \
 	sun8i-h3-libretech-all-h3-cc.dtb \
 	sun8i-h3-mapleboard-mp130.dtb \
+	sun8i-h3-nanopi-duo2.dtb \
 	sun8i-h3-nanopi-m1.dtb	\
 	sun8i-h3-nanopi-m1-plus.dtb \
 	sun8i-h3-nanopi-neo.dtb \
@@ -1288,6 +1299,7 @@ dtb-$(CONFIG_ARCH_ASPEED) += \
 	aspeed-bmc-facebook-wedge40.dtb \
 	aspeed-bmc-facebook-wedge100.dtb \
 	aspeed-bmc-facebook-yamp.dtb \
+	aspeed-bmc-ibm-rainier.dtb \
 	aspeed-bmc-intel-s2600wf.dtb \
 	aspeed-bmc-inspur-fp5280g2.dtb \
 	aspeed-bmc-lenovo-hr630.dtb \
@@ -1298,6 +1310,7 @@ dtb-$(CONFIG_ARCH_ASPEED) += \
 	aspeed-bmc-opp-palmetto.dtb \
 	aspeed-bmc-opp-romulus.dtb \
 	aspeed-bmc-opp-swift.dtb \
+	aspeed-bmc-opp-tacoma.dtb \
 	aspeed-bmc-opp-vesnin.dtb \
 	aspeed-bmc-opp-witherspoon.dtb \
 	aspeed-bmc-opp-zaius.dtb \
diff --git a/arch/arm/boot/dts/am335x-baltos.dtsi b/arch/arm/boot/dts/am335x-baltos.dtsi
index ed235f263e29..05e7b5d4a95b 100644
--- a/arch/arm/boot/dts/am335x-baltos.dtsi
+++ b/arch/arm/boot/dts/am335x-baltos.dtsi
@@ -258,18 +258,6 @@
 	};
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&cppi41dma  {
-	status = "okay";
-};
-
 #include "tps65910.dtsi"
 
 &tps {
diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index 89b4cf2cb7f8..6c9187bc0f17 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -191,38 +191,16 @@
 	status = "okay";
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
 &usb0 {
-	status = "okay";
 	dr_mode = "peripheral";
 	interrupts-extended = <&intc 18 &tps 0>;
 	interrupt-names = "mc", "vbus";
 };
 
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 &i2c0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c0_pins>;
diff --git a/arch/arm/boot/dts/am335x-boneblack-common.dtsi b/arch/arm/boot/dts/am335x-boneblack-common.dtsi
index 7ad079861efd..91f93bc89716 100644
--- a/arch/arm/boot/dts/am335x-boneblack-common.dtsi
+++ b/arch/arm/boot/dts/am335x-boneblack-common.dtsi
@@ -131,6 +131,11 @@
 };
 
 / {
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x80000000 0x20000000>; /* 512 MB */
+	};
+
 	clk_mcasp0_fixed: clk_mcasp0_fixed {
 		#clock-cells = <0>;
 		compatible = "fixed-clock";
diff --git a/arch/arm/boot/dts/am335x-boneblue.dts b/arch/arm/boot/dts/am335x-boneblue.dts
index 2f6652ef9a15..5811fb8d4fdf 100644
--- a/arch/arm/boot/dts/am335x-boneblue.dts
+++ b/arch/arm/boot/dts/am335x-boneblue.dts
@@ -278,38 +278,16 @@
 	status = "okay";
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
 &usb0 {
-	status = "okay";
 	dr_mode = "peripheral";
 	interrupts-extended = <&intc 18 &tps 0>;
 	interrupt-names = "mc", "vbus";
 };
 
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 &i2c0 {
 	baseboard_eeprom: baseboard_eeprom@50 {
 		compatible = "atmel,24c256";
diff --git a/arch/arm/boot/dts/am335x-chiliboard.dts b/arch/arm/boot/dts/am335x-chiliboard.dts
index 8cd81dc0cc72..b14a2759c69b 100644
--- a/arch/arm/boot/dts/am335x-chiliboard.dts
+++ b/arch/arm/boot/dts/am335x-chiliboard.dts
@@ -153,30 +153,12 @@
 };
 
 /* USB */
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
 &usb1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&usb1_drvvbus>;
-
-	status = "okay";
 	dr_mode = "host";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 /* microSD */
 &mmc1 {
 	pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/am335x-cm-t335.dts b/arch/arm/boot/dts/am335x-cm-t335.dts
index 1fe3b566ba3d..c6fe9db660e2 100644
--- a/arch/arm/boot/dts/am335x-cm-t335.dts
+++ b/arch/arm/boot/dts/am335x-cm-t335.dts
@@ -330,26 +330,6 @@ status = "okay";
 	};
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb0 {
-	status = "okay";
-};
-
-&cppi41dma  {
-	status = "okay";
-};
-
 &epwmss0 {
 	status = "okay";
 
diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts
index a00145705c9b..6f0a6be93098 100644
--- a/arch/arm/boot/dts/am335x-evm.dts
+++ b/arch/arm/boot/dts/am335x-evm.dts
@@ -433,35 +433,10 @@
 	};
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
-&usb0 {
-	status = "okay";
-};
-
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 &i2c1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c1_pins>;
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index e28a5b82fdf3..a97f9df460c1 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -523,35 +523,10 @@
 	};
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
-&usb0 {
-	status = "okay";
-};
-
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 &epwmss2 {
 	status = "okay";
 
diff --git a/arch/arm/boot/dts/am335x-guardian.dts b/arch/arm/boot/dts/am335x-guardian.dts
index c9611ea4b884..81e0f63e94d3 100644
--- a/arch/arm/boot/dts/am335x-guardian.dts
+++ b/arch/arm/boot/dts/am335x-guardian.dts
@@ -115,10 +115,6 @@
 	};
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 &elm {
 	status = "okay";
 };
@@ -328,30 +324,12 @@
 	status = "okay";
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
 &usb0 {
 	dr_mode = "peripheral";
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
 };
 
 &usb1 {
 	dr_mode = "host";
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
 };
 
 &am33xx_pinmux {
diff --git a/arch/arm/boot/dts/am335x-igep0033.dtsi b/arch/arm/boot/dts/am335x-igep0033.dtsi
index eabcc8b2e4ea..c9f354fc984a 100644
--- a/arch/arm/boot/dts/am335x-igep0033.dtsi
+++ b/arch/arm/boot/dts/am335x-igep0033.dtsi
@@ -217,35 +217,10 @@
 	pinctrl-0 = <&uart0_pins>;
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
-&usb0 {
-	status = "okay";
-};
-
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 #include "tps65910.dtsi"
 
 &tps {
diff --git a/arch/arm/boot/dts/am335x-lxm.dts b/arch/arm/boot/dts/am335x-lxm.dts
index a8005e975ea2..fef582852820 100644
--- a/arch/arm/boot/dts/am335x-lxm.dts
+++ b/arch/arm/boot/dts/am335x-lxm.dts
@@ -283,36 +283,14 @@
 	status = "okay";
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
 &usb0 {
-	status = "okay";
 	dr_mode = "host";
 };
 
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 &cpsw_emac0 {
 	phy-handle = <&ethphy0>;
 	phy-mode = "rmii";
diff --git a/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi b/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi
index 671d4a5da9c4..6495a125c01f 100644
--- a/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi
+++ b/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi
@@ -111,27 +111,10 @@
 	};
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
 &usb0 {
-	status = "okay";
 	dr_mode = "host";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 /* Power */
 &vbat {
 	regulator-name = "vbat";
diff --git a/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts b/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts
index 783d411f2cef..244df9c5a537 100644
--- a/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts
+++ b/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts
@@ -290,36 +290,14 @@
 	};
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
 &usb0 {
-	status = "okay";
 	dr_mode = "host";
 };
 
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 #include "tps65910.dtsi"
 
 &tps {
diff --git a/arch/arm/boot/dts/am335x-netcan-plus-1xx.dts b/arch/arm/boot/dts/am335x-netcan-plus-1xx.dts
new file mode 100644
index 000000000000..1e4dbc85c120
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-netcan-plus-1xx.dts
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ */
+
+/*
+ * VScom OnRISC
+ * http://www.vscom.de
+ */
+
+/dts-v1/;
+
+#include "am335x-baltos.dtsi"
+#include "am335x-baltos-leds.dtsi"
+
+/ {
+	model = "NetCAN";
+
+	leds {
+		pinctrl-names = "default";
+		pinctrl-0 = <&user_leds_s0>;
+
+		compatible = "gpio-leds";
+
+		led@1 {
+			label = "can_data";
+			linux,default-trigger = "netdev";
+			gpios = <&gpio0 14 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+		};
+		led@2 {
+			label = "can_error";
+			gpios = <&gpio0 15 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+		};
+	};
+};
+
+&am33xx_pinmux {
+	user_leds_s0: user_leds_s0 {
+		pinctrl-single,pins = <
+			AM33XX_PADCONF(AM335X_PIN_UART1_RXD, PIN_OUTPUT_PULLDOWN, MUX_MODE7)	/* CAN Data LED */
+			AM33XX_PADCONF(AM335X_PIN_UART1_TXD, PIN_OUTPUT_PULLDOWN, MUX_MODE7)	/* CAN Error LED */
+		>;
+	};
+
+	dcan1_pins: pinmux_dcan1_pins {
+		pinctrl-single,pins = <
+			AM33XX_PADCONF(AM335X_PIN_UART0_CTSN, PIN_OUTPUT, MUX_MODE2)	/* CAN TX */
+			AM33XX_PADCONF(AM335X_PIN_UART0_RTSN, PIN_INPUT, MUX_MODE2)	/* CAN RX */
+		>;
+	};
+};
+
+&usb0_phy {
+	status = "okay";
+};
+
+&usb0 {
+	status = "okay";
+	dr_mode = "host";
+};
+
+&davinci_mdio {
+	phy0: ethernet-phy@0 {
+		reg = <1>;
+	};
+};
+
+&cpsw_emac0 {
+	phy-mode = "rmii";
+	dual_emac_res_vlan = <1>;
+	phy-handle = <&phy0>;
+};
+
+&cpsw_emac1 {
+	phy-mode = "rgmii-id";
+	dual_emac_res_vlan = <2>;
+	phy-handle = <&phy1>;
+};
+
+&dcan1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&dcan1_pins>;
+
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/am335x-netcom-plus-2xx.dts b/arch/arm/boot/dts/am335x-netcom-plus-2xx.dts
new file mode 100644
index 000000000000..9a6cd8ef821f
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-netcom-plus-2xx.dts
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ */
+
+/*
+ * VScom OnRISC
+ * http://www.vscom.de
+ */
+
+/dts-v1/;
+
+#include "am335x-baltos.dtsi"
+#include "am335x-baltos-leds.dtsi"
+
+/ {
+	model = "NetCom Plus";
+};
+
+&am33xx_pinmux {
+	uart1_pins: pinmux_uart1_pins {
+		pinctrl-single,pins = <
+			AM33XX_PADCONF(AM335X_PIN_UART1_RXD, PIN_INPUT, MUX_MODE0)			/* RX */
+			AM33XX_PADCONF(AM335X_PIN_UART1_TXD, PIN_INPUT, MUX_MODE0)			/* TX */
+			AM33XX_PADCONF(AM335X_PIN_UART1_CTSN, PIN_INPUT_PULLDOWN, MUX_MODE0)		/* CTS */
+			AM33XX_PADCONF(AM335X_PIN_UART1_RTSN, PIN_OUTPUT_PULLDOWN, MUX_MODE0)		/* RTS */
+			AM33XX_PADCONF(AM335X_PIN_LCD_VSYNC, PIN_OUTPUT_PULLDOWN, MUX_MODE7)		/* DTR */
+			AM33XX_PADCONF(AM335X_PIN_LCD_HSYNC, PIN_INPUT_PULLDOWN, MUX_MODE7)		/* DSR */
+			AM33XX_PADCONF(AM335X_PIN_LCD_PCLK, PIN_INPUT_PULLDOWN, MUX_MODE7)		/* DCD */
+			AM33XX_PADCONF(AM335X_PIN_LCD_AC_BIAS_EN, PIN_INPUT_PULLDOWN, MUX_MODE7)	/* RI */
+		>;
+	};
+
+	uart2_pins: pinmux_uart2_pins {
+		pinctrl-single,pins = <
+			AM33XX_PADCONF(AM335X_PIN_SPI0_SCLK, PIN_INPUT, MUX_MODE1)		/* RX */
+			AM33XX_PADCONF(AM335X_PIN_SPI0_D0, PIN_OUTPUT, MUX_MODE1)      		/* TX */
+			AM33XX_PADCONF(AM335X_PIN_I2C0_SDA, PIN_INPUT_PULLDOWN, MUX_MODE2)	/* CTS */
+			AM33XX_PADCONF(AM335X_PIN_I2C0_SCL, PIN_OUTPUT_PULLDOWN, MUX_MODE2)	/* RTS */
+			AM33XX_PADCONF(AM335X_PIN_GPMC_AD12, PIN_OUTPUT_PULLDOWN, MUX_MODE7)	/* DTR */
+			AM33XX_PADCONF(AM335X_PIN_GPMC_AD13, PIN_INPUT_PULLDOWN, MUX_MODE7)	/* DSR */
+			AM33XX_PADCONF(AM335X_PIN_GPMC_AD14, PIN_INPUT_PULLDOWN, MUX_MODE7)	/* DCD */
+			AM33XX_PADCONF(AM335X_PIN_GPMC_AD15, PIN_INPUT_PULLDOWN, MUX_MODE7)	/* RI */
+		>;
+	};
+};
+
+&usb0_phy {
+	status = "okay";
+};
+
+&usb0 {
+	status = "okay";
+	dr_mode = "host";
+};
+
+&uart1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart1_pins>;
+	dtr-gpios = <&gpio2 22 GPIO_ACTIVE_LOW>;
+	dsr-gpios = <&gpio2 23 GPIO_ACTIVE_LOW>;
+	dcd-gpios = <&gpio2 24 GPIO_ACTIVE_LOW>;
+	rng-gpios = <&gpio2 25 GPIO_ACTIVE_LOW>;
+
+	status = "okay";
+};
+
+&uart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart2_pins>;
+	dtr-gpios = <&gpio1 12 GPIO_ACTIVE_LOW>;
+	dsr-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;
+	dcd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
+	rng-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
+
+	status = "okay";
+};
+
+&davinci_mdio {
+	phy0: ethernet-phy@0 {
+		reg = <1>;
+	};
+};
+
+&cpsw_emac0 {
+	phy-mode = "rmii";
+	dual_emac_res_vlan = <1>;
+	phy-handle = <&phy0>;
+};
+
+&cpsw_emac1 {
+	phy-mode = "rgmii-id";
+	dual_emac_res_vlan = <2>;
+	phy-handle = <&phy1>;
+};
diff --git a/arch/arm/boot/dts/am335x-netcom-plus-8xx.dts b/arch/arm/boot/dts/am335x-netcom-plus-8xx.dts
new file mode 100644
index 000000000000..2298563f7334
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-netcom-plus-8xx.dts
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ */
+
+/*
+ * VScom OnRISC
+ * http://www.vscom.de
+ */
+
+/dts-v1/;
+
+#include "am335x-baltos.dtsi"
+
+/ {
+	model = "NetCom Plus";
+};
+
+&am33xx_pinmux {
+	pinctrl-names = "default";
+	pinctrl-0 = <&dip_switches>;
+
+	dip_switches: pinmux_dip_switches {
+		pinctrl-single,pins = <
+			AM33XX_PADCONF(AM335X_PIN_GPMC_AD12, PIN_INPUT_PULLDOWN, MUX_MODE7)
+			AM33XX_PADCONF(AM335X_PIN_GPMC_AD13, PIN_INPUT_PULLDOWN, MUX_MODE7)
+			AM33XX_PADCONF(AM335X_PIN_GPMC_AD14, PIN_INPUT_PULLDOWN, MUX_MODE7)
+			AM33XX_PADCONF(AM335X_PIN_GPMC_AD15, PIN_INPUT_PULLDOWN, MUX_MODE7)
+		>;
+	};
+
+	tca6416_pins: pinmux_tca6416_pins {
+		pinctrl-single,pins = <
+			AM33XX_PADCONF(AM335X_PIN_XDMA_EVENT_INTR1, PIN_INPUT_PULLUP, MUX_MODE7)
+		>;
+	};
+
+	i2c2_pins: pinmux_i2c2_pins {
+		pinctrl-single,pins = <
+			AM33XX_PADCONF(AM335X_PIN_UART1_CTSN, PIN_INPUT_PULLDOWN, MUX_MODE3)
+			AM33XX_PADCONF(AM335X_PIN_UART1_RTSN, PIN_INPUT_PULLDOWN, MUX_MODE3)
+		>;
+	};
+};
+
+&usb0_phy {
+	status = "okay";
+};
+
+&usb1_phy {
+	status = "okay";
+};
+
+&usb0 {
+	status = "okay";
+	dr_mode = "host";
+};
+
+&usb1 {
+	status = "okay";
+	dr_mode = "host";
+};
+
+&i2c1 {
+	tca6416a: gpio@20 {
+		compatible = "ti,tca6416";
+		reg = <0x20>;
+		gpio-controller;
+		#gpio-cells = <2>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <20 IRQ_TYPE_EDGE_RISING>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&tca6416_pins>;
+	};
+};
+
+&i2c2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c2_pins>;
+
+	status = "okay";
+	clock-frequency = <400000>;
+
+	tca6416b: gpio@20 {
+		compatible = "ti,tca6416";
+		reg = <0x20>;
+		gpio-controller;
+		#gpio-cells = <2>;
+	};
+
+	tca6416c: gpio@21 {
+		compatible = "ti,tca6416";
+		reg = <0x21>;
+		gpio-controller;
+		#gpio-cells = <2>;
+	};
+};
+
+&davinci_mdio {
+	phy0: ethernet-phy@0 {
+		reg = <1>;
+	};
+};
+
+&cpsw_emac0 {
+	phy-mode = "rmii";
+	dual_emac_res_vlan = <1>;
+	phy-handle = <&phy0>;
+};
+
+&cpsw_emac1 {
+	phy-mode = "rgmii-id";
+	dual_emac_res_vlan = <2>;
+	phy-handle = <&phy1>;
+};
diff --git a/arch/arm/boot/dts/am335x-osd3358-sm-red.dts b/arch/arm/boot/dts/am335x-osd3358-sm-red.dts
index f47cc9fea253..1d2902083483 100644
--- a/arch/arm/boot/dts/am335x-osd3358-sm-red.dts
+++ b/arch/arm/boot/dts/am335x-osd3358-sm-red.dts
@@ -384,38 +384,16 @@
 	status = "okay";
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
 &usb0 {
-	status = "okay";
 	dr_mode = "peripheral";
 	interrupts-extended = <&intc 18 &tps 0>;
 	interrupt-names = "mc", "vbus";
 };
 
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 &i2c2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c2_pins>;
diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi
index 9bfa032bcada..6c547c83e5dd 100644
--- a/arch/arm/boot/dts/am335x-pcm-953.dtsi
+++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi
@@ -237,31 +237,6 @@
 };
 
 /* USB */
-&cppi41dma {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb {
-	status = "okay";
-};
-
-&usb0 {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
-
-&usb1_phy {
-	status = "okay";
-};
diff --git a/arch/arm/boot/dts/am335x-pdu001.dts b/arch/arm/boot/dts/am335x-pdu001.dts
index 3141255f72c2..e4dcfa087a1b 100644
--- a/arch/arm/boot/dts/am335x-pdu001.dts
+++ b/arch/arm/boot/dts/am335x-pdu001.dts
@@ -384,34 +384,6 @@
 	};
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
-&usb0 {
-	status = "okay";
-};
-
-&usb1 {
-	status = "okay";
-};
-
-&cppi41dma  {
-	status = "okay";
-};
-
 /*
  * Disable soc's rtc as we have no VBAT for it. This makes the board
  * rtc (Microchip MCP79400) the default rtc device 'rtc0'.
diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts
index e7764ecdf65f..6d7608d9377b 100644
--- a/arch/arm/boot/dts/am335x-pepper.dts
+++ b/arch/arm/boot/dts/am335x-pepper.dts
@@ -552,38 +552,18 @@
 
 /* USB */
 &usb {
-	status = "okay";
-
 	pinctrl-names = "default";
 	pinctrl-0 = <&usb_pins>;
 };
 
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
 &usb0 {
-	status = "okay";
         dr_mode = "host";
 };
 
 &usb1 {
-	status = "okay";
         dr_mode = "host";
 };
 
-&cppi41dma {
-	status = "okay";
-};
-
 &am33xx_pinmux {
 	usb_pins: pinmux_usb {
 		pinctrl-single,pins = <
diff --git a/arch/arm/boot/dts/am335x-pocketbeagle.dts b/arch/arm/boot/dts/am335x-pocketbeagle.dts
index ff4f919d22f6..4da719098028 100644
--- a/arch/arm/boot/dts/am335x-pocketbeagle.dts
+++ b/arch/arm/boot/dts/am335x-pocketbeagle.dts
@@ -206,32 +206,10 @@
 	status = "okay";
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
 &usb0 {
-	status = "okay";
 	dr_mode = "otg";
 };
 
-&usb1_phy {
-	status = "okay";
-};
-
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
-
-&cppi41dma  {
-	status = "okay";
-};
diff --git a/arch/arm/boot/dts/am335x-regor.dtsi b/arch/arm/boot/dts/am335x-regor.dtsi
index 5aff02a95766..6fbf4ac739e7 100644
--- a/arch/arm/boot/dts/am335x-regor.dtsi
+++ b/arch/arm/boot/dts/am335x-regor.dtsi
@@ -200,24 +200,3 @@
 	status = "okay";
 	linux,rs485-enabled-at-boot-time;
 };
-
-/* USB */
-&cppi41dma {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb {
-	status = "okay";
-};
-
-&usb0 {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
diff --git a/arch/arm/boot/dts/am335x-sancloud-bbe.dts b/arch/arm/boot/dts/am335x-sancloud-bbe.dts
index 8678e6e35493..e5fdb7abb0d5 100644
--- a/arch/arm/boot/dts/am335x-sancloud-bbe.dts
+++ b/arch/arm/boot/dts/am335x-sancloud-bbe.dts
@@ -108,7 +108,7 @@
 
 &cpsw_emac0 {
 	phy-handle = <&ethphy0>;
-	phy-mode = "rgmii-txid";
+	phy-mode = "rgmii-id";
 };
 
 &i2c0 {
diff --git a/arch/arm/boot/dts/am335x-shc.dts b/arch/arm/boot/dts/am335x-shc.dts
index 5b0368504015..1eaa26533466 100644
--- a/arch/arm/boot/dts/am335x-shc.dts
+++ b/arch/arm/boot/dts/am335x-shc.dts
@@ -117,10 +117,6 @@
 	status = "okay";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 &davinci_mdio {
 	pinctrl-names = "default", "sleep";
 	pinctrl-0 = <&davinci_mdio_default>;
@@ -358,20 +354,7 @@
 	status = "okay";
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
 
diff --git a/arch/arm/boot/dts/am335x-sl50.dts b/arch/arm/boot/dts/am335x-sl50.dts
index 2f82095e7210..f4684c8eaffe 100644
--- a/arch/arm/boot/dts/am335x-sl50.dts
+++ b/arch/arm/boot/dts/am335x-sl50.dts
@@ -512,36 +512,14 @@
 	status = "disabled";
 };
 
-&usb {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
-};
-
 &usb0 {
-	status = "okay";
 	dr_mode = "otg";
 };
 
 &usb1 {
-	status = "okay";
 	dr_mode = "host";
 };
 
-&cppi41dma  {
-	status = "okay";
-};
-
 &mmc1 {
 	status = "okay";
 	pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/am335x-wega.dtsi b/arch/arm/boot/dts/am335x-wega.dtsi
index 61fc4cd2d164..1359bf8715e6 100644
--- a/arch/arm/boot/dts/am335x-wega.dtsi
+++ b/arch/arm/boot/dts/am335x-wega.dtsi
@@ -191,32 +191,6 @@
 	status = "okay";
 };
 
-/* USB */
-&cppi41dma {
-	status = "okay";
-};
-
-&usb_ctrl_mod {
-	status = "okay";
-};
-
-&usb {
-	status = "okay";
-};
-
-&usb0 {
-	status = "okay";
-};
-
-&usb0_phy {
-	status = "okay";
-};
-
 &usb1 {
 	dr_mode = "host";
-	status = "okay";
-};
-
-&usb1_phy {
-	status = "okay";
 };
diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi
index 7a9eb2b0d45b..3a8a205c27b5 100644
--- a/arch/arm/boot/dts/am33xx-l4.dtsi
+++ b/arch/arm/boot/dts/am33xx-l4.dtsi
@@ -129,7 +129,6 @@
 
 		gpio0_target: target-module@7000 {	/* 0x44e07000, ap 14 20.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio1";
 			reg = <0x7000 0x4>,
 			      <0x7010 0x4>,
 			      <0x7114 0x4>;
@@ -163,7 +162,6 @@
 
 		target-module@9000 {			/* 0x44e09000, ap 16 04.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart1";
 			reg = <0x9050 0x4>,
 			      <0x9054 0x4>,
 			      <0x9058 0x4>;
@@ -195,7 +193,6 @@
 
 		target-module@b000 {			/* 0x44e0b000, ap 18 48.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "i2c1";
 			reg = <0xb000 0x8>,
 			      <0xb010 0x8>,
 			      <0xb090 0x8>;
@@ -306,6 +303,13 @@
 					};
 				};
 
+				usb_ctrl_mod: control@620 {
+					compatible = "ti,am335x-usb-ctrl-module";
+					reg = <0x620 0x10>,
+					      <0x648 0x4>;
+					reg-names = "phy_ctrl", "wakeup";
+				};
+
 				wkup_m3_ipc: wkup_m3_ipc@1324 {
 					compatible = "ti,am3352-wkup-m3-ipc";
 					reg = <0x1324 0x24>;
@@ -368,7 +372,6 @@
 
 		target-module@35000 {			/* 0x44e35000, ap 29 50.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "wd_timer2";
 			reg = <0x35000 0x4>,
 			      <0x35010 0x4>,
 			      <0x35014 0x4>;
@@ -912,7 +915,6 @@
 
 		target-module@22000 {			/* 0x48022000, ap 10 12.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart2";
 			reg = <0x22050 0x4>,
 			      <0x22054 0x4>,
 			      <0x22058 0x4>;
@@ -944,7 +946,6 @@
 
 		target-module@24000 {			/* 0x48024000, ap 12 14.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart3";
 			reg = <0x24050 0x4>,
 			      <0x24054 0x4>,
 			      <0x24058 0x4>;
@@ -976,7 +977,6 @@
 
 		target-module@2a000 {			/* 0x4802a000, ap 14 2a.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "i2c2";
 			reg = <0x2a000 0x8>,
 			      <0x2a010 0x8>,
 			      <0x2a090 0x8>;
@@ -1046,7 +1046,6 @@
 
 		target-module@38000 {			/* 0x48038000, ap 16 02.0 */
 			compatible = "ti,sysc-omap4-simple", "ti,sysc";
-			ti,hwmods = "mcasp0";
 			reg = <0x38000 0x4>,
 			      <0x38004 0x4>;
 			reg-names = "rev", "sysc";
@@ -1077,7 +1076,6 @@
 
 		target-module@3c000 {			/* 0x4803c000, ap 20 32.0 */
 			compatible = "ti,sysc-omap4-simple", "ti,sysc";
-			ti,hwmods = "mcasp1";
 			reg = <0x3c000 0x4>,
 			      <0x3c004 0x4>;
 			reg-names = "rev", "sysc";
@@ -1270,7 +1268,6 @@
 
 		target-module@4c000 {			/* 0x4804c000, ap 32 36.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio2";
 			reg = <0x4c000 0x4>,
 			      <0x4c010 0x4>,
 			      <0x4c114 0x4>;
@@ -1312,7 +1309,6 @@
 
 		target-module@60000 {			/* 0x48060000, ap 36 0c.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mmc1";
 			reg = <0x602fc 0x4>,
 			      <0x60110 0x4>,
 			      <0x60114 0x4>;
@@ -1385,7 +1381,6 @@
 
 		target-module@c8000 {			/* 0x480c8000, ap 87 06.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox";
 			reg = <0xc8000 0x4>,
 			      <0xc8010 0x4>;
 			reg-names = "rev", "sysc";
@@ -1506,7 +1501,6 @@
 
 		target-module@9c000 {			/* 0x4819c000, ap 46 5a.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "i2c3";
 			reg = <0x9c000 0x8>,
 			      <0x9c010 0x8>,
 			      <0x9c090 0x8>;
@@ -1592,7 +1586,6 @@
 
 		target-module@a6000 {			/* 0x481a6000, ap 48 16.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart4";
 			reg = <0xa6050 0x4>,
 			      <0xa6054 0x4>,
 			      <0xa6058 0x4>;
@@ -1622,7 +1615,6 @@
 
 		target-module@a8000 {			/* 0x481a8000, ap 50 20.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart5";
 			reg = <0xa8050 0x4>,
 			      <0xa8054 0x4>,
 			      <0xa8058 0x4>;
@@ -1652,7 +1644,6 @@
 
 		target-module@aa000 {			/* 0x481aa000, ap 52 1a.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart6";
 			reg = <0xaa050 0x4>,
 			      <0xaa054 0x4>,
 			      <0xaa058 0x4>;
@@ -1682,7 +1673,6 @@
 
 		target-module@ac000 {			/* 0x481ac000, ap 54 38.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio3";
 			reg = <0xac000 0x4>,
 			      <0xac010 0x4>,
 			      <0xac114 0x4>;
@@ -1716,7 +1706,6 @@
 
 		target-module@ae000 {			/* 0x481ae000, ap 56 3a.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio4";
 			reg = <0xae000 0x4>,
 			      <0xae010 0x4>,
 			      <0xae114 0x4>;
@@ -1806,7 +1795,6 @@
 
 		target-module@d8000 {			/* 0x481d8000, ap 64 66.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mmc2";
 			reg = <0xd82fc 0x4>,
 			      <0xd8110 0x4>,
 			      <0xd8114 0x4>;
@@ -2061,7 +2049,6 @@
 
 		target-module@10000 {			/* 0x48310000, ap 76 4e.1 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "rng";
 			reg = <0x11fe0 0x4>,
 			      <0x11fe4 0x4>;
 			reg-names = "rev", "sysc";
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index fb6b8aa12cc5..646f11430dad 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -236,7 +236,6 @@
 
 		target-module@47810000 {
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mmc3";
 			reg = <0x478102fc 0x4>,
 			      <0x47810110 0x4>,
 			      <0x47810114 0x4>;
@@ -263,37 +262,38 @@
 			};
 		};
 
-		usb: usb@47400000 {
-			compatible = "ti,am33xx-usb";
-			reg = <0x47400000 0x1000>;
-			ranges;
+		usb: target-module@47400000 {
+			compatible = "ti,sysc-omap4", "ti,sysc";
+			reg = <0x47400000 0x4>,
+			      <0x47400010 0x4>;
+			reg-names = "rev", "sysc";
+			ti,sysc-mask = <(SYSC_OMAP4_FREEEMU |
+					 SYSC_OMAP2_SOFTRESET)>;
+			ti,sysc-midle = <SYSC_IDLE_FORCE>,
+					<SYSC_IDLE_NO>,
+					<SYSC_IDLE_SMART>;
+			ti,sysc-sidle = <SYSC_IDLE_FORCE>,
+					<SYSC_IDLE_NO>,
+					<SYSC_IDLE_SMART>,
+					<SYSC_IDLE_SMART_WKUP>;
+			clocks = <&l3s_clkctrl AM3_L3S_USB_OTG_HS_CLKCTRL 0>;
+			clock-names = "fck";
 			#address-cells = <1>;
 			#size-cells = <1>;
-			ti,hwmods = "usb_otg_hs";
-			status = "disabled";
-
-			usb_ctrl_mod: control@44e10620 {
-				compatible = "ti,am335x-usb-ctrl-module";
-				reg = <0x44e10620 0x10
-					0x44e10648 0x4>;
-				reg-names = "phy_ctrl", "wakeup";
-				status = "disabled";
-			};
+			ranges = <0x0 0x47400000 0x5000>;
 
-			usb0_phy: usb-phy@47401300 {
+			usb0_phy: usb-phy@1300 {
 				compatible = "ti,am335x-usb-phy";
-				reg = <0x47401300 0x100>;
+				reg = <0x1300 0x100>;
 				reg-names = "phy";
-				status = "disabled";
 				ti,ctrl_mod = <&usb_ctrl_mod>;
 				#phy-cells = <0>;
 			};
 
-			usb0: usb@47401000 {
+			usb0: usb@1400 {
 				compatible = "ti,musb-am33xx";
-				status = "disabled";
-				reg = <0x47401400 0x400
-					0x47401000 0x200>;
+				reg = <0x1400 0x400>,
+				      <0x1000 0x200>;
 				reg-names = "mc", "control";
 
 				interrupts = <18>;
@@ -329,20 +329,18 @@
 					"tx14", "tx15";
 			};
 
-			usb1_phy: usb-phy@47401b00 {
+			usb1_phy: usb-phy@1b00 {
 				compatible = "ti,am335x-usb-phy";
-				reg = <0x47401b00 0x100>;
+				reg = <0x1b00 0x100>;
 				reg-names = "phy";
-				status = "disabled";
 				ti,ctrl_mod = <&usb_ctrl_mod>;
 				#phy-cells = <0>;
 			};
 
-			usb1: usb@47401800 {
+			usb1: usb@1800 {
 				compatible = "ti,musb-am33xx";
-				status = "disabled";
-				reg = <0x47401c00 0x400
-					0x47401800 0x200>;
+				reg = <0x1c00 0x400>,
+				      <0x1800 0x200>;
 				reg-names = "mc", "control";
 				interrupts = <19>;
 				interrupt-names = "mc";
@@ -377,36 +375,35 @@
 					"tx14", "tx15";
 			};
 
-			cppi41dma: dma-controller@47402000 {
+			cppi41dma: dma-controller@2000 {
 				compatible = "ti,am3359-cppi41";
-				reg =  <0x47400000 0x1000
-					0x47402000 0x1000
-					0x47403000 0x1000
-					0x47404000 0x4000>;
+				reg =  <0x0000 0x1000>,
+				       <0x2000 0x1000>,
+				       <0x3000 0x1000>,
+				       <0x4000 0x4000>;
 				reg-names = "glue", "controller", "scheduler", "queuemgr";
 				interrupts = <17>;
 				interrupt-names = "glue";
 				#dma-cells = <2>;
 				#dma-channels = <30>;
 				#dma-requests = <256>;
-				status = "disabled";
 			};
 		};
 
-		ocmcram: ocmcram@40300000 {
+		ocmcram: sram@40300000 {
 			compatible = "mmio-sram";
 			reg = <0x40300000 0x10000>; /* 64k */
 			ranges = <0x0 0x40300000 0x10000>;
 			#address-cells = <1>;
 			#size-cells = <1>;
 
-			pm_sram_code: pm-sram-code@0 {
+			pm_sram_code: pm-code-sram@0 {
 				compatible = "ti,sram";
 				reg = <0x0 0x1000>;
 				protect-exec;
 			};
 
-			pm_sram_data: pm-sram-data@1000 {
+			pm_sram_data: pm-data-sram@1000 {
 				compatible = "ti,sram";
 				reg = <0x1000 0x1000>;
 				pool;
@@ -465,3 +462,29 @@
 
 #include "am33xx-l4.dtsi"
 #include "am33xx-clocks.dtsi"
+
+&prcm {
+	prm_per: prm@c00 {
+		compatible = "ti,am3-prm-inst", "ti,omap-prm-inst";
+		reg = <0xc00 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_wkup: prm@d00 {
+		compatible = "ti,am3-prm-inst", "ti,omap-prm-inst";
+		reg = <0xd00 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_device: prm@f00 {
+		compatible = "ti,am3-prm-inst", "ti,omap-prm-inst";
+		reg = <0xf00 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_gfx: prm@1100 {
+		compatible = "ti,am3-prm-inst", "ti,omap-prm-inst";
+		reg = <0x1100 0x100>;
+		#reset-cells = <1>;
+	};
+};
diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi
index bf3002009b00..125379ecab2f 100644
--- a/arch/arm/boot/dts/am3517.dtsi
+++ b/arch/arm/boot/dts/am3517.dtsi
@@ -16,6 +16,37 @@
 		can = &hecc;
 	};
 
+	cpus {
+		cpu: cpu@0 {
+			/* Based on OMAP3630 variants OPP50 and OPP100 */
+			operating-points-v2 = <&cpu0_opp_table>;
+
+			clock-latency = <300000>; /* From legacy driver */
+		};
+	};
+
+	cpu0_opp_table: opp-table {
+		compatible = "operating-points-v2-ti-cpu";
+		syscon = <&scm_conf>;
+		/*
+		 * AM3517 TRM only lists 600MHz @ 1.2V, but omap36xx
+		 * appear to operate at 300MHz as well. Since AM3517 only
+		 * lists one operating voltage, it will remain fixed at 1.2V
+		 */
+		opp50-300000000 {
+			opp-hz = /bits/ 64 <300000000>;
+			opp-microvolt = <1200000>;
+			opp-supported-hw = <0xffffffff 0xffffffff>;
+			opp-suspend;
+		};
+
+		opp100-600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <1200000>;
+			opp-supported-hw = <0xffffffff 0xffffffff>;
+		};
+	};
+
 	ocp@68000000 {
 		am35x_otg_hs: am35x_otg_hs@5c040000 {
 			compatible = "ti,omap3-musb";
@@ -115,6 +146,12 @@
 	};
 };
 
+/* Not currently working, probably needs at least different clocks */
+&rng_target {
+	status = "disabled";
+	/delete-property/ clocks;
+};
+
 /* Table Table 5-79 of the TRM shows 480ab000 is reserved */
 &usb_otg_hs {
 	status = "disabled";
diff --git a/arch/arm/boot/dts/am3517_mt_ventoux.dts b/arch/arm/boot/dts/am3517_mt_ventoux.dts
index e507e4ae0d88..e7d7124a34ba 100644
--- a/arch/arm/boot/dts/am3517_mt_ventoux.dts
+++ b/arch/arm/boot/dts/am3517_mt_ventoux.dts
@@ -8,7 +8,7 @@
 
 / {
 	model = "TeeJet Mt.Ventoux";
-	compatible = "teejet,mt_ventoux", "ti,omap3";
+	compatible = "teejet,mt_ventoux", "ti,am3517", "ti,omap3";
 
 	memory@80000000 {
 		device_type = "memory";
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index 14bbc438055f..ca0aa3f26c0a 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -230,7 +230,6 @@
 
 		target-module@47810000 {
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mmc3";
 			reg = <0x478102fc 0x4>,
 			      <0x47810110 0x4>,
 			      <0x47810114 0x4>;
@@ -351,20 +350,20 @@
 			};
 		};
 
-		ocmcram: ocmcram@40300000 {
+		ocmcram: sram@40300000 {
 			compatible = "mmio-sram";
 			reg = <0x40300000 0x40000>; /* 256k */
 			ranges = <0x0 0x40300000 0x40000>;
 			#address-cells = <1>;
 			#size-cells = <1>;
 
-			pm_sram_code: pm-sram-code@0 {
+			pm_sram_code: pm-code-sram@0 {
 				compatible = "ti,sram";
 				reg = <0x0 0x1000>;
 				protect-exec;
 			};
 
-			pm_sram_data: pm-sram-data@1000 {
+			pm_sram_data: pm-data-sram@1000 {
 				compatible = "ti,sram";
 				reg = <0x1000 0x1000>;
 				pool;
@@ -375,3 +374,29 @@
 
 #include "am437x-l4.dtsi"
 #include "am43xx-clocks.dtsi"
+
+&prcm {
+	prm_gfx: prm@400 {
+		compatible = "ti,am4-prm-inst", "ti,omap-prm-inst";
+		reg = <0x400 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_per: prm@800 {
+		compatible = "ti,am4-prm-inst", "ti,omap-prm-inst";
+		reg = <0x800 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_wkup: prm@2000 {
+		compatible = "ti,am4-prm-inst", "ti,omap-prm-inst";
+		reg = <0x2000 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_device: prm@4000 {
+		compatible = "ti,am4-prm-inst", "ti,omap-prm-inst";
+		reg = <0x4000 0x100>;
+		#reset-cells = <1>;
+	};
+};
diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts
index cae4500194fe..811c8cae315b 100644
--- a/arch/arm/boot/dts/am437x-gp-evm.dts
+++ b/arch/arm/boot/dts/am437x-gp-evm.dts
@@ -86,7 +86,7 @@
 		};
 
 	lcd0: display {
-		compatible = "osddisplays,osd057T0559-34ts", "panel-dpi";
+		compatible = "osddisplays,osd070t1718-19ts", "panel-dpi";
 		label = "lcd";
 
 		backlight = <&lcd_bl>;
diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi
index 59770dd3785e..0dd59ee14585 100644
--- a/arch/arm/boot/dts/am437x-l4.dtsi
+++ b/arch/arm/boot/dts/am437x-l4.dtsi
@@ -132,7 +132,6 @@
 
 		target-module@7000 {			/* 0x44e07000, ap 14 20.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio1";
 			reg = <0x7000 0x4>,
 			      <0x7010 0x4>,
 			      <0x7114 0x4>;
@@ -167,7 +166,6 @@
 
 		target-module@9000 {			/* 0x44e09000, ap 16 04.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart1";
 			reg = <0x9050 0x4>,
 			      <0x9054 0x4>,
 			      <0x9058 0x4>;
@@ -195,7 +193,6 @@
 
 		target-module@b000 {			/* 0x44e0b000, ap 18 48.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "i2c1";
 			reg = <0xb000 0x8>,
 			      <0xb010 0x8>,
 			      <0xb090 0x8>;
@@ -373,7 +370,6 @@
 
 		target-module@35000 {			/* 0x44e35000, ap 28 50.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "wd_timer2";
 			reg = <0x35000 0x4>,
 			      <0x35010 0x4>,
 			      <0x35014 0x4>;
@@ -679,7 +675,6 @@
 
 		target-module@22000 {			/* 0x48022000, ap 8 0a.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart2";
 			reg = <0x22050 0x4>,
 			      <0x22054 0x4>,
 			      <0x22058 0x4>;
@@ -708,7 +703,6 @@
 
 		target-module@24000 {			/* 0x48024000, ap 10 1c.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart3";
 			reg = <0x24050 0x4>,
 			      <0x24054 0x4>,
 			      <0x24058 0x4>;
@@ -737,7 +731,6 @@
 
 		target-module@2a000 {			/* 0x4802a000, ap 12 22.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "i2c2";
 			reg = <0x2a000 0x8>,
 			      <0x2a010 0x8>,
 			      <0x2a090 0x8>;
@@ -817,7 +810,6 @@
 
 		target-module@38000 {			/* 0x48038000, ap 14 04.0 */
 			compatible = "ti,sysc-omap4-simple", "ti,sysc";
-			ti,hwmods = "mcasp0";
 			reg = <0x38000 0x4>,
 			      <0x38004 0x4>;
 			reg-names = "rev", "sysc";
@@ -849,7 +841,6 @@
 
 		target-module@3c000 {			/* 0x4803c000, ap 16 2a.0 */
 			compatible = "ti,sysc-omap4-simple", "ti,sysc";
-			ti,hwmods = "mcasp1";
 			reg = <0x3c000 0x4>,
 			      <0x3c004 0x4>;
 			reg-names = "rev", "sysc";
@@ -1048,7 +1039,6 @@
 
 		target-module@4c000 {			/* 0x4804c000, ap 28 36.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio2";
 			reg = <0x4c000 0x4>,
 			      <0x4c010 0x4>,
 			      <0x4c114 0x4>;
@@ -1083,7 +1073,6 @@
 
 		target-module@60000 {			/* 0x48060000, ap 30 14.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mmc1";
 			reg = <0x602fc 0x4>,
 			      <0x60110 0x4>,
 			      <0x60114 0x4>;
@@ -1149,7 +1138,6 @@
 
 		target-module@c8000 {			/* 0x480c8000, ap 73 06.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox";
 			reg = <0xc8000 0x4>,
 			      <0xc8010 0x4>;
 			reg-names = "rev", "sysc";
@@ -1262,7 +1250,6 @@
 
 		target-module@9c000 {			/* 0x4819c000, ap 38 52.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "i2c3";
 			reg = <0x9c000 0x8>,
 			      <0x9c010 0x8>,
 			      <0x9c090 0x8>;
@@ -1388,7 +1375,6 @@
 
 		target-module@a6000 {			/* 0x481a6000, ap 40 16.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart4";
 			reg = <0xa6050 0x4>,
 			      <0xa6054 0x4>,
 			      <0xa6058 0x4>;
@@ -1417,7 +1403,6 @@
 
 		target-module@a8000 {			/* 0x481a8000, ap 42 20.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart5";
 			reg = <0xa8050 0x4>,
 			      <0xa8054 0x4>,
 			      <0xa8058 0x4>;
@@ -1446,7 +1431,6 @@
 
 		target-module@aa000 {			/* 0x481aa000, ap 44 12.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart6";
 			reg = <0xaa050 0x4>,
 			      <0xaa054 0x4>,
 			      <0xaa058 0x4>;
@@ -1475,7 +1459,6 @@
 
 		target-module@ac000 {			/* 0x481ac000, ap 46 30.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio3";
 			reg = <0xac000 0x4>,
 			      <0xac010 0x4>,
 			      <0xac114 0x4>;
@@ -1510,7 +1493,6 @@
 
 		target-module@ae000 {			/* 0x481ae000, ap 48 32.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio4";
 			reg = <0xae000 0x4>,
 			      <0xae010 0x4>,
 			      <0xae114 0x4>;
@@ -1614,7 +1596,6 @@
 
 		target-module@d8000 {			/* 0x481d8000, ap 54 5e.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mmc2";
 			reg = <0xd82fc 0x4>,
 			      <0xd8110 0x4>,
 			      <0xd8114 0x4>;
@@ -1999,7 +1980,6 @@
 
 		target-module@10000 {			/* 0x48310000, ap 64 4e.1 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "rng";
 			reg = <0x11fe0 0x4>,
 			      <0x11fe4 0x4>;
 			reg-names = "rev", "sysc";
@@ -2038,7 +2018,6 @@
 
 		target-module@20000 {			/* 0x48320000, ap 82 34.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio5";
 			reg = <0x20000 0x4>,
 			      <0x20010 0x4>,
 			      <0x20114 0x4>;
@@ -2073,7 +2052,6 @@
 
 		target-module@22000 {			/* 0x48322000, ap 116 64.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio6";
 			reg = <0x22000 0x4>,
 			      <0x22010 0x4>,
 			      <0x22114 0x4>;
@@ -2296,7 +2274,6 @@
 
 		target-module@47000 {			/* 0x48347000, ap 110 70.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "hdq1w";
 			reg = <0x47000 0x4>,
 			      <0x47014 0x4>,
 			      <0x47018 0x4>;
diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts
index 95314121d111..a6fbc088daa8 100644
--- a/arch/arm/boot/dts/am43x-epos-evm.dts
+++ b/arch/arm/boot/dts/am43x-epos-evm.dts
@@ -42,7 +42,7 @@
 	};
 
 	lcd0: display {
-		compatible = "osddisplays,osd057T0559-34ts", "panel-dpi";
+		compatible = "osddisplays,osd070t1718-19ts", "panel-dpi";
 		label = "lcd";
 
 		backlight = <&lcd_bl>;
@@ -848,6 +848,7 @@
 	pinctrl-names = "default", "sleep";
 	pinctrl-0 = <&spi0_pins_default>;
 	pinctrl-1 = <&spi0_pins_sleep>;
+	ti,pindir-d0-out-d1-in = <1>;
 };
 
 &spi1 {
@@ -855,6 +856,7 @@
 	pinctrl-names = "default", "sleep";
 	pinctrl-0 = <&spi1_pins_default>;
 	pinctrl-1 = <&spi1_pins_sleep>;
+	ti,pindir-d0-out-d1-in = <1>;
 };
 
 &usb2_phy1 {
diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts
index 0aaacea1d887..669559c9c95b 100644
--- a/arch/arm/boot/dts/am571x-idk.dts
+++ b/arch/arm/boot/dts/am571x-idk.dts
@@ -167,11 +167,7 @@
 
 &pcie1_rc {
 	status = "okay";
-	gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
-};
-
-&pcie1_ep {
-	gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
+	gpios = <&gpio5 18 GPIO_ACTIVE_HIGH>;
 };
 
 &mmc1 {
@@ -186,3 +182,30 @@
 	pinctrl-1 = <&mmc2_pins_hs>;
 	pinctrl-2 = <&mmc2_pins_ddr_rev20 &mmc2_iodelay_ddr_conf>;
 };
+
+&mac_sw {
+	pinctrl-names = "default", "sleep";
+	status = "okay";
+};
+
+&cpsw_port1 {
+	phy-handle = <&ethphy0_sw>;
+	phy-mode = "rgmii";
+	ti,dual-emac-pvid = <1>;
+};
+
+&cpsw_port2 {
+	phy-handle = <&ethphy1_sw>;
+	phy-mode = "rgmii";
+	ti,dual-emac-pvid = <2>;
+};
+
+&davinci_mdio_sw {
+	ethphy0_sw: ethernet-phy@0 {
+		reg = <0>;
+	};
+
+	ethphy1_sw: ethernet-phy@1 {
+		reg = <1>;
+	};
+};
diff --git a/arch/arm/boot/dts/am572x-idk-common.dtsi b/arch/arm/boot/dts/am572x-idk-common.dtsi
index a064f13b3880..ddf123620e96 100644
--- a/arch/arm/boot/dts/am572x-idk-common.dtsi
+++ b/arch/arm/boot/dts/am572x-idk-common.dtsi
@@ -147,10 +147,6 @@
 	gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
 };
 
-&pcie1_ep {
-	gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
-};
-
 &mailbox5 {
 	status = "okay";
 	mbox_ipu1_ipc3x: mbox_ipu1_ipc3x {
diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts
index ea1c119feaa5..c3d966904d64 100644
--- a/arch/arm/boot/dts/am572x-idk.dts
+++ b/arch/arm/boot/dts/am572x-idk.dts
@@ -27,3 +27,8 @@
 	pinctrl-1 = <&mmc2_pins_hs>;
 	pinctrl-2 = <&mmc2_pins_ddr_rev20>;
 };
+
+&mac {
+	status = "okay";
+	dual_emac;
+};
diff --git a/arch/arm/boot/dts/am574x-idk.dts b/arch/arm/boot/dts/am574x-idk.dts
index 7935d70874ce..fa0088025b2c 100644
--- a/arch/arm/boot/dts/am574x-idk.dts
+++ b/arch/arm/boot/dts/am574x-idk.dts
@@ -35,3 +35,8 @@
 	pinctrl-1 = <&mmc2_pins_default>;
 	pinctrl-2 = <&mmc2_pins_default>;
 };
+
+&mac {
+	status = "okay";
+	dual_emac;
+};
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
index bc76f1705c0f..a813a0cf3ff3 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
@@ -29,6 +29,27 @@
 		reg = <0x0 0x80000000 0x0 0x80000000>;
 	};
 
+	main_12v0: fixedregulator-main_12v0 {
+		/* main supply */
+		compatible = "regulator-fixed";
+		regulator-name = "main_12v0";
+		regulator-min-microvolt = <12000000>;
+		regulator-max-microvolt = <12000000>;
+		regulator-always-on;
+		regulator-boot-on;
+	};
+
+	evm_5v0: fixedregulator-evm_5v0 {
+		/* Output of TPS54531D */
+		compatible = "regulator-fixed";
+		regulator-name = "evm_5v0";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		vin-supply = <&main_12v0>;
+		regulator-always-on;
+		regulator-boot-on;
+	};
+
 	vdd_3v3: fixedregulator-vdd_3v3 {
 		compatible = "regulator-fixed";
 		regulator-name = "vdd_3v3";
@@ -547,10 +568,6 @@
 	gpios = <&gpio2 8 GPIO_ACTIVE_LOW>;
 };
 
-&pcie1_ep {
-	gpios = <&gpio2 8 GPIO_ACTIVE_LOW>;
-};
-
 &mcasp3 {
 	#sound-dai-cells = <0>;
 	assigned-clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 24>;
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts b/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts
index 7b113b52c3fb..39d1c4ff5749 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts
@@ -24,7 +24,7 @@
 };
 
 &mmc2 {
-	pinctrl-names = "default", "hs", "ddr_1_8v";
+	pinctrl-names = "default", "hs", "ddr_3_3v";
 	pinctrl-0 = <&mmc2_pins_default>;
 	pinctrl-1 = <&mmc2_pins_hs>;
 	pinctrl-2 = <&mmc2_pins_ddr_3_3v_rev11 &mmc2_iodelay_ddr_3_3v_rev11_conf>;
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-revc.dts b/arch/arm/boot/dts/am57xx-beagle-x15-revc.dts
index 30c500b15b21..4187a9729f96 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15-revc.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15-revc.dts
@@ -24,7 +24,7 @@
 };
 
 &mmc2 {
-	pinctrl-names = "default", "hs", "ddr_1_8v";
+	pinctrl-names = "default", "hs", "ddr_3_3v";
 	pinctrl-0 = <&mmc2_pins_default>;
 	pinctrl-1 = <&mmc2_pins_hs>;
 	pinctrl-2 = <&mmc2_pins_ddr_rev20>;
diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi
index 423855a2a2d6..398721c7201c 100644
--- a/arch/arm/boot/dts/am57xx-idk-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi
@@ -363,11 +363,6 @@
 	ext-clk-src;
 };
 
-&mac {
-	status = "okay";
-	dual_emac;
-};
-
 &cpsw_emac0 {
 	phy-handle = <&ethphy0>;
 	phy-mode = "rgmii";
diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index 3f4bb44d85f0..e038abc0c6b4 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -103,6 +103,11 @@
 			#size-cells = <1>;
 			ranges = <0 MBUS_ID(0xf0, 0x01) 0 0x100000>;
 
+			sdramc: sdramc@1400 {
+				compatible = "marvell,armada-xp-sdram-controller";
+				reg = <0x1400 0x500>;
+			};
+
 			L2: cache-controller@8000 {
 				compatible = "arm,pl310-cache";
 				reg = <0x8000 0x1000>;
diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
index 267d0c178e55..654648b05c7c 100644
--- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
+++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
@@ -90,7 +90,7 @@
 		};
 
 		internal-regs {
-			sdramc@1400 {
+			sdramc: sdramc@1400 {
 				compatible = "marvell,armada-xp-sdram-controller";
 				reg = <0x1400 0x500>;
 			};
diff --git a/arch/arm/boot/dts/armada-xp-db-xc3-24g4xg.dts b/arch/arm/boot/dts/armada-xp-db-xc3-24g4xg.dts
index df048050615f..4ec0ae01b61d 100644
--- a/arch/arm/boot/dts/armada-xp-db-xc3-24g4xg.dts
+++ b/arch/arm/boot/dts/armada-xp-db-xc3-24g4xg.dts
@@ -33,6 +33,11 @@
 	};
 };
 
+&L2 {
+	arm,parity-enable;
+	marvell,ecc-enable;
+};
+
 &devbus_bootcs {
 	status = "okay";
 
diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi
index ee15c77d3689..6c19984d668e 100644
--- a/arch/arm/boot/dts/armada-xp.dtsi
+++ b/arch/arm/boot/dts/armada-xp.dtsi
@@ -36,7 +36,7 @@
 		};
 
 		internal-regs {
-			sdramc@1400 {
+			sdramc: sdramc@1400 {
 				compatible = "marvell,armada-xp-sdram-controller";
 				reg = <0x1400 0x500>;
 			};
diff --git a/arch/arm/boot/dts/aspeed-ast2500-evb.dts b/arch/arm/boot/dts/aspeed-ast2500-evb.dts
index c9d88c90135e..8bec21ed0de5 100644
--- a/arch/arm/boot/dts/aspeed-ast2500-evb.dts
+++ b/arch/arm/boot/dts/aspeed-ast2500-evb.dts
@@ -40,6 +40,7 @@
 		status = "okay";
 		m25p,fast-read;
 		label = "bmc";
+		spi-max-frequency = <50000000>;
 #include "openbmc-flash-layout.dtsi"
 	};
 };
@@ -50,6 +51,7 @@
 		status = "okay";
 		m25p,fast-read;
 		label = "pnor";
+		spi-max-frequency = <100000000>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/aspeed-ast2600-evb.dts b/arch/arm/boot/dts/aspeed-ast2600-evb.dts
index 9870553919b7..4afa8662c4e8 100644
--- a/arch/arm/boot/dts/aspeed-ast2600-evb.dts
+++ b/arch/arm/boot/dts/aspeed-ast2600-evb.dts
@@ -55,6 +55,9 @@
 
 	phy-mode = "rgmii";
 	phy-handle = <&ethphy1>;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_rgmii2_default>;
 };
 
 &mac2 {
@@ -62,6 +65,9 @@
 
 	phy-mode = "rgmii";
 	phy-handle = <&ethphy2>;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_rgmii3_default>;
 };
 
 &mac3 {
@@ -69,12 +75,141 @@
 
 	phy-mode = "rgmii";
 	phy-handle = <&ethphy3>;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_rgmii4_default>;
 };
 
-&emmc {
+&emmc_controller {
 	status = "okay";
 };
 
+&emmc {
+	non-removable;
+	bus-width = <4>;
+	max-frequency = <52000000>;
+};
+
 &rtc {
 	status = "okay";
 };
+
+&fmc {
+	status = "okay";
+	flash@0 {
+		status = "okay";
+		m25p,fast-read;
+		label = "bmc";
+		spi-max-frequency = <50000000>;
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			u-boot@0 {
+				reg = <0x0 0xe0000>; // 896KB
+				label = "u-boot";
+			};
+
+			u-boot-env@e0000 {
+				reg = <0xe0000 0x20000>; // 128KB
+				label = "u-boot-env";
+			};
+
+			kernel@100000 {
+				reg = <0x100000 0x900000>; // 9MB
+				label = "kernel";
+			};
+
+			rofs@a00000 {
+				reg = <0xa00000 0x2000000>; // 32MB
+				label = "rofs";
+			};
+
+			rwfs@6000000 {
+				reg = <0x2a00000 0x1600000>; // 22MB
+				label = "rwfs";
+			};
+		};
+	};
+};
+
+&spi1 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_spi1_default>;
+
+	flash@0 {
+		status = "okay";
+		m25p,fast-read;
+		label = "pnor";
+		spi-max-frequency = <100000000>;
+	};
+};
+
+&uart5 {
+	// Workaround for A0
+	compatible = "snps,dw-apb-uart";
+};
+
+&i2c0 {
+	status = "okay";
+
+	temp@2e {
+		compatible = "adi,adt7490";
+		reg = <0x2e>;
+	};
+};
+
+&i2c1 {
+	status = "okay";
+};
+
+&i2c2 {
+	status = "okay";
+};
+
+&i2c3 {
+	status = "okay";
+};
+
+&i2c4 {
+	status = "okay";
+};
+
+&i2c5 {
+	status = "okay";
+};
+
+&i2c6 {
+	status = "okay";
+};
+
+&i2c7 {
+	status = "okay";
+};
+
+&i2c8 {
+	status = "okay";
+};
+
+&i2c9 {
+	status = "okay";
+};
+
+&i2c12 {
+	status = "okay";
+};
+
+&i2c13 {
+	status = "okay";
+};
+
+&i2c14 {
+	status = "okay";
+};
+
+&i2c15 {
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/aspeed-bmc-arm-stardragon4800-rep2.dts b/arch/arm/boot/dts/aspeed-bmc-arm-stardragon4800-rep2.dts
index 521afbea2c5b..2c29ac037d32 100644
--- a/arch/arm/boot/dts/aspeed-bmc-arm-stardragon4800-rep2.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-arm-stardragon4800-rep2.dts
@@ -92,6 +92,9 @@
 	status = "okay";
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii2_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC2CLK>,
+		 <&syscon ASPEED_CLK_MAC2RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
diff --git a/arch/arm/boot/dts/aspeed-bmc-facebook-cmm.dts b/arch/arm/boot/dts/aspeed-bmc-facebook-cmm.dts
index d519d307aa2a..016bbcb99bb6 100644
--- a/arch/arm/boot/dts/aspeed-bmc-facebook-cmm.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-facebook-cmm.dts
@@ -2,7 +2,7 @@
 // Copyright (c) 2018 Facebook Inc.
 /dts-v1/;
 
-#include "aspeed-g5.dtsi"
+#include "ast2500-facebook-netbmc-common.dtsi"
 
 / {
 	model = "Facebook Backpack CMM BMC";
@@ -53,10 +53,6 @@
 		bootargs = "console=ttyS1,9600n8 root=/dev/ram rw earlyprintk";
 	};
 
-	memory@80000000 {
-		reg = <0x80000000 0x20000000>;
-	};
-
 	ast-adc-hwmon {
 		compatible = "iio-hwmon";
 		io-channels = <&adc 0>, <&adc 1>, <&adc 2>, <&adc 3>,
@@ -64,39 +60,7 @@
 	};
 };
 
-&pinctrl {
-	aspeed,external-nodes = <&gfx &lhc>;
-};
-
-/*
- * Update reset type to "system" (full chip) to fix warm reboot hang issue
- * when reset type is set to default ("soc", gated by reset mask registers).
- */
-&wdt1 {
-	status = "okay";
-	aspeed,reset-type = "system";
-};
-
-/*
- * wdt2 is not used by Backpack CMM.
- */
-&wdt2 {
-	status = "disabled";
-};
-
-&fmc {
-	status = "okay";
-	flash@0 {
-		status = "okay";
-		m25p,fast-read;
-		label = "bmc";
-#include "facebook-bmc-flash-layout.dtsi"
-	};
-};
-
 &uart1 {
-	status = "okay";
-	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_txd1_default
 		     &pinctrl_rxd1_default
 		     &pinctrl_ncts1_default
@@ -107,8 +71,6 @@
 };
 
 &uart3 {
-	status = "okay";
-	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_txd3_default
 		     &pinctrl_rxd3_default
 		     &pinctrl_ncts3_default
@@ -123,17 +85,6 @@
 		     &pinctrl_rxd4_default>;
 };
 
-&uart5 {
-	status = "okay";
-};
-
-&mac1 {
-	status = "okay";
-	no-hw-checksum;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_rgmii2_default &pinctrl_mdio2_default>;
-};
-
 /*
  * I2C bus reserved for communication with COM-E.
  */
@@ -380,3 +331,18 @@
 &ehci1 {
 	status = "okay";
 };
+
+&vhub {
+	status = "disabled";
+};
+
+&sdhci0 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_sd1_default>;
+};
+
+&sdhci1 {
+	status = "disabled";
+};
diff --git a/arch/arm/boot/dts/aspeed-bmc-facebook-minipack.dts b/arch/arm/boot/dts/aspeed-bmc-facebook-minipack.dts
index c05478296446..88ce4ff9f47e 100644
--- a/arch/arm/boot/dts/aspeed-bmc-facebook-minipack.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-facebook-minipack.dts
@@ -2,7 +2,7 @@
 // Copyright (c) 2018 Facebook Inc.
 /dts-v1/;
 
-#include "aspeed-g5.dtsi"
+#include "ast2500-facebook-netbmc-common.dtsi"
 
 / {
 	model = "Facebook Minipack 100 BMC";
@@ -76,35 +76,36 @@
 		stdout-path = &uart1;
 		bootargs = "debug console=ttyS1,9600n8 root=/dev/ram rw";
 	};
-
-	memory@80000000 {
-		reg = <0x80000000 0x20000000>;
-	};
 };
 
-&wdt1 {
+&wdt2 {
 	status = "okay";
 	aspeed,reset-type = "system";
 };
 
-&wdt2 {
-	status = "okay";
-	aspeed,reset-type = "system";
+/*
+ * Both firmware flashes are 64MB on Minipack BMC.
+ */
+&fmc_flash0 {
+	partitions {
+		data0@1c00000 {
+			reg = <0x1c00000 0x2400000>;
+		};
+		flash0@0 {
+			reg = <0x0 0x4000000>;
+		};
+	};
 };
 
-&fmc {
-	status = "okay";
-	flash@0 {
-		status = "okay";
-		m25p,fast-read;
-		label = "bmc";
-#include "facebook-bmc-flash-layout.dtsi"
+&fmc_flash1 {
+	partitions {
+		flash1@0 {
+			reg = <0x0 0x4000000>;
+		};
 	};
 };
 
 &uart1 {
-	status = "okay";
-	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_txd1_default
 		     &pinctrl_rxd1_default
 		     &pinctrl_ncts1_default
@@ -120,13 +121,6 @@
 		     &pinctrl_rxd2_default>;
 };
 
-&uart3 {
-	status = "okay";
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_txd3_default
-		     &pinctrl_rxd3_default>;
-};
-
 &uart4 {
 	status = "okay";
 	pinctrl-names = "default";
@@ -134,17 +128,6 @@
 		     &pinctrl_rxd4_default>;
 };
 
-&uart5 {
-	status = "okay";
-};
-
-&mac1 {
-	status = "okay";
-	no-hw-checksum;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_rgmii2_default &pinctrl_mdio2_default>;
-};
-
 &i2c0 {
 	status = "okay";
 	bus-frequency = <400000>;
@@ -423,7 +406,3 @@
 &i2c13 {
 	status = "okay";
 };
-
-&vhub {
-	status = "okay";
-};
diff --git a/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts b/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts
index 682f729ea25e..5d7cbd9164d4 100644
--- a/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts
@@ -126,6 +126,9 @@
 
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
diff --git a/arch/arm/boot/dts/aspeed-bmc-facebook-yamp.dts b/arch/arm/boot/dts/aspeed-bmc-facebook-yamp.dts
index 4e09a9cf32b7..52933598aac6 100644
--- a/arch/arm/boot/dts/aspeed-bmc-facebook-yamp.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-facebook-yamp.dts
@@ -2,7 +2,7 @@
 // Copyright (c) 2018 Facebook Inc.
 /dts-v1/;
 
-#include "aspeed-g5.dtsi"
+#include "ast2500-facebook-netbmc-common.dtsi"
 
 / {
 	model = "Facebook YAMP 100 BMC";
@@ -23,47 +23,6 @@
 		stdout-path = &uart5;
 		bootargs = "console=ttyS0,9600n8 root=/dev/ram rw";
 	};
-
-	memory@80000000 {
-		reg = <0x80000000 0x20000000>;
-	};
-};
-
-&pinctrl {
-	aspeed,external-nodes = <&gfx &lhc>;
-};
-
-/*
- * Update reset type to "system" (full chip) to fix warm reboot hang issue
- * when reset type is set to default ("soc", gated by reset mask registers).
- */
-&wdt1 {
-	status = "okay";
-	aspeed,reset-type = "system";
-};
-
-/*
- * wdt2 is not used by Yamp.
- */
-&wdt2 {
-	status = "disabled";
-};
-
-&fmc {
-	status = "okay";
-	flash@0 {
-		status = "okay";
-		m25p,fast-read;
-		label = "bmc";
-#include "facebook-bmc-flash-layout.dtsi"
-	};
-};
-
-&uart1 {
-	status = "okay";
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_txd1_default
-		     &pinctrl_rxd1_default>;
 };
 
 &uart2 {
@@ -73,23 +32,19 @@
 		     &pinctrl_rxd2_default>;
 };
 
-&uart3 {
-	status = "okay";
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_txd3_default
-		     &pinctrl_rxd3_default>;
-};
-
-&uart5 {
-	status = "okay";
-};
-
 &mac0 {
 	status = "okay";
 	use-ncsi;
 	no-hw-checksum;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
+};
+
+&mac1 {
+	status = "disabled";
 };
 
 &i2c0 {
@@ -154,7 +109,3 @@
 &i2c13 {
 	status = "okay";
 };
-
-&vhub {
-	status = "okay";
-};
diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts
new file mode 100644
index 000000000000..13f7aefe045e
--- /dev/null
+++ b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts
@@ -0,0 +1,607 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright 2019 IBM Corp.
+/dts-v1/;
+
+#include "aspeed-g6.dtsi"
+#include <dt-bindings/gpio/aspeed-gpio.h>
+
+/ {
+	model = "Rainier";
+	compatible = "ibm,rainier-bmc", "aspeed,ast2600";
+
+	aliases {
+		serial4 = &uart5;
+	};
+
+	chosen {
+		stdout-path = &uart5;
+		bootargs = "console=ttyS4,115200n8";
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x80000000 0x40000000>;
+	};
+
+	reserved-memory {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		flash_memory: region@B8000000 {
+			no-map;
+			reg = <0xB8000000 0x04000000>; /* 64M */
+		};
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+
+		ps0-presence {
+			label = "ps0-presence";
+			gpios = <&gpio0 ASPEED_GPIO(S, 0) GPIO_ACTIVE_LOW>;
+			linux,code = <ASPEED_GPIO(S, 0)>;
+		};
+
+		ps1-presence {
+			label = "ps1-presence";
+			gpios = <&gpio0 ASPEED_GPIO(S, 1) GPIO_ACTIVE_LOW>;
+			linux,code = <ASPEED_GPIO(S, 1)>;
+		};
+
+		ps2-presence {
+			label = "ps2-presence";
+			gpios = <&gpio0 ASPEED_GPIO(S, 2) GPIO_ACTIVE_LOW>;
+			linux,code = <ASPEED_GPIO(S, 2)>;
+		};
+
+		ps3-presence {
+			label = "ps3-presence";
+			gpios = <&gpio0 ASPEED_GPIO(S, 3) GPIO_ACTIVE_LOW>;
+			linux,code = <ASPEED_GPIO(S, 3)>;
+		};
+	};
+
+};
+
+&emmc_controller {
+	status = "okay";
+};
+
+&emmc {
+	status = "okay";
+};
+
+&ibt {
+	status = "okay";
+};
+
+&i2c0 {
+	status = "okay";
+
+	eeprom@51 {
+		compatible = "atmel,24c64";
+		reg = <0x51>;
+	};
+};
+
+&i2c1 {
+	status = "okay";
+};
+
+&i2c2 {
+	status = "okay";
+};
+
+&i2c3 {
+	status = "okay";
+
+	power-supply@68 {
+		compatible = "ibm,cffps2";
+		reg = <0x68>;
+	};
+
+	power-supply@69 {
+		compatible = "ibm,cffps2";
+		reg = <0x69>;
+	};
+
+	power-supply@6a {
+		compatible = "ibm,cffps2";
+		reg = <0x6a>;
+	};
+
+	power-supply@6b {
+		compatible = "ibm,cffps2";
+		reg = <0x6b>;
+	};
+};
+
+&i2c4 {
+	status = "okay";
+
+	tmp275@48 {
+		compatible = "ti,tmp275";
+		reg = <0x48>;
+	};
+
+	tmp275@49 {
+		compatible = "ti,tmp275";
+		reg = <0x49>;
+	};
+
+	tmp275@4a {
+		compatible = "ti,tmp275";
+		reg = <0x4a>;
+	};
+
+	eeprom@50 {
+		compatible = "atmel,24c64";
+		reg = <0x50>;
+	};
+
+	eeprom@51 {
+		compatible = "atmel,24c64";
+		reg = <0x51>;
+	};
+
+	eeprom@52 {
+		compatible = "atmel,24c64";
+		reg = <0x52>;
+	};
+};
+
+&i2c5 {
+	status = "okay";
+
+	tmp275@48 {
+		compatible = "ti,tmp275";
+		reg = <0x48>;
+	};
+
+	tmp275@49 {
+		compatible = "ti,tmp275";
+		reg = <0x49>;
+	};
+
+	eeprom@50 {
+		compatible = "atmel,24c64";
+		reg = <0x50>;
+	};
+
+	eeprom@51 {
+		compatible = "atmel,24c64";
+		reg = <0x51>;
+	};
+};
+
+&i2c6 {
+	status = "okay";
+
+	tmp275@48 {
+		compatible = "ti,tmp275";
+		reg = <0x48>;
+	};
+
+	tmp275@4a {
+		compatible = "ti,tmp275";
+		reg = <0x4a>;
+	};
+
+	tmp275@4b {
+		compatible = "ti,tmp275";
+		reg = <0x4b>;
+	};
+
+	eeprom@50 {
+		compatible = "atmel,24c64";
+		reg = <0x50>;
+	};
+
+	eeprom@51 {
+		compatible = "atmel,24c64";
+		reg = <0x51>;
+	};
+
+	eeprom@52 {
+		compatible = "atmel,24c64";
+		reg = <0x52>;
+	};
+
+	eeprom@53 {
+		compatible = "atmel,24c64";
+		reg = <0x53>;
+	};
+};
+
+&i2c7 {
+	status = "okay";
+
+	si7021-a20@20 {
+		compatible = "silabs,si7020";
+		reg = <0x20>;
+	};
+
+	tmp275@48 {
+		compatible = "ti,tmp275";
+		reg = <0x48>;
+	};
+
+	max31785@52 {
+		compatible = "maxim,max31785a";
+		reg = <0x52>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		fan@0 {
+			compatible = "pmbus-fan";
+			reg = <0>;
+			tach-pulses = <2>;
+		};
+
+		fan@1 {
+			compatible = "pmbus-fan";
+			reg = <1>;
+			tach-pulses = <2>;
+		};
+
+		fan@2 {
+			compatible = "pmbus-fan";
+			reg = <2>;
+			tach-pulses = <2>;
+		};
+
+		fan@3 {
+			compatible = "pmbus-fan";
+			reg = <3>;
+			tach-pulses = <2>;
+		};
+	};
+
+	pca0: pca9552@61 {
+		compatible = "nxp,pca9552";
+		reg = <0x61>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		gpio@0 {
+			reg = <0>;
+		};
+
+		gpio@1 {
+			reg = <1>;
+		};
+
+		gpio@2 {
+			reg = <2>;
+		};
+
+		gpio@3 {
+			reg = <3>;
+		};
+
+		gpio@4 {
+			reg = <4>;
+		};
+
+		gpio@5 {
+			reg = <5>;
+		};
+
+		gpio@6 {
+			reg = <6>;
+		};
+
+		gpio@7 {
+			reg = <7>;
+		};
+
+		gpio@8 {
+			reg = <8>;
+		};
+
+		gpio@9 {
+			reg = <9>;
+		};
+
+		gpio@10 {
+			reg = <10>;
+		};
+
+		gpio@11 {
+			reg = <11>;
+		};
+
+		gpio@12 {
+			reg = <12>;
+		};
+
+		gpio@13 {
+			reg = <13>;
+		};
+
+		gpio@14 {
+			reg = <14>;
+		};
+
+		gpio@15 {
+			reg = <15>;
+		};
+	};
+
+	dps: dps310@76 {
+		compatible = "infineon,dps310";
+		reg = <0x76>;
+		#io-channel-cells = <0>;
+	};
+
+	eeprom@50 {
+		compatible = "atmel,24c64";
+		reg = <0x50>;
+	};
+
+	eeprom@51 {
+		compatible = "atmel,24c64";
+		reg = <0x51>;
+	};
+};
+
+&i2c8 {
+	status = "okay";
+
+	ucd90320@b {
+		compatible = "ti,ucd90160";
+		reg = <0x0b>;
+	};
+
+	ucd90320@c {
+		compatible = "ti,ucd90160";
+		reg = <0x0c>;
+	};
+
+	ucd90320@11 {
+		compatible = "ti,ucd90160";
+		reg = <0x11>;
+	};
+
+	rtc@32 {
+		compatible = "epson,rx8900";
+		reg = <0x32>;
+	};
+
+	tmp275@48 {
+		compatible = "ti,tmp275";
+		reg = <0x48>;
+	};
+
+	tmp275@4a {
+		compatible = "ti,tmp275";
+		reg = <0x4a>;
+	};
+
+	eeprom@50 {
+		compatible = "atmel,24c64";
+		reg = <0x50>;
+	};
+
+	eeprom@51 {
+		compatible = "atmel,24c64";
+		reg = <0x51>;
+	};
+};
+
+&i2c9 {
+	status = "okay";
+
+	ir35221@42 {
+		compatible = "infineon,ir35221";
+		reg = <0x42>;
+	};
+
+	ir35221@43 {
+		compatible = "infineon,ir35221";
+		reg = <0x43>;
+	};
+
+	ir35221@44 {
+		compatible = "infineon,ir35221";
+		reg = <0x44>;
+	};
+
+	tmp423a@4c {
+		compatible = "ti,tmp423";
+		reg = <0x4c>;
+	};
+
+	tmp423b@4d {
+		compatible = "ti,tmp423";
+		reg = <0x4d>;
+	};
+
+	ir35221@72 {
+		compatible = "infineon,ir35221";
+		reg = <0x72>;
+	};
+
+	ir35221@73 {
+		compatible = "infineon,ir35221";
+		reg = <0x73>;
+	};
+
+	ir35221@74 {
+		compatible = "infineon,ir35221";
+		reg = <0x74>;
+	};
+
+	eeprom@50 {
+		compatible = "atmel,24c128";
+		reg = <0x50>;
+	};
+};
+
+&i2c10 {
+	status = "okay";
+
+	ir35221@42 {
+		compatible = "infineon,ir35221";
+		reg = <0x42>;
+	};
+
+	ir35221@43 {
+		compatible = "infineon,ir35221";
+		reg = <0x43>;
+	};
+
+	ir35221@44 {
+		compatible = "infineon,ir35221";
+		reg = <0x44>;
+	};
+
+	tmp423a@4c {
+		compatible = "ti,tmp423";
+		reg = <0x4c>;
+	};
+
+	tmp423b@4d {
+		compatible = "ti,tmp423";
+		reg = <0x4d>;
+	};
+
+	ir35221@72 {
+		compatible = "infineon,ir35221";
+		reg = <0x72>;
+	};
+
+	ir35221@73 {
+		compatible = "infineon,ir35221";
+		reg = <0x73>;
+	};
+
+	ir35221@74 {
+		compatible = "infineon,ir35221";
+		reg = <0x74>;
+	};
+
+	eeprom@50 {
+		compatible = "atmel,24c128";
+		reg = <0x50>;
+	};
+};
+
+&i2c11 {
+	status = "okay";
+
+	tmp275@48 {
+		compatible = "ti,tmp275";
+		reg = <0x48>;
+	};
+
+	tmp275@49 {
+		compatible = "ti,tmp275";
+		reg = <0x49>;
+	};
+
+	eeprom@50 {
+		compatible = "atmel,24c64";
+		reg = <0x50>;
+	};
+
+	eeprom@51 {
+		compatible = "atmel,24c64";
+		reg = <0x51>;
+	};
+};
+
+&i2c12 {
+	status = "okay";
+};
+
+&i2c13 {
+	status = "okay";
+
+	eeprom@50 {
+		compatible = "atmel,24c64";
+		reg = <0x50>;
+	};
+};
+
+&i2c14 {
+	status = "okay";
+
+	eeprom@50 {
+		compatible = "atmel,24c64";
+		reg = <0x50>;
+	};
+};
+
+&i2c15 {
+	status = "okay";
+
+	eeprom@50 {
+		compatible = "atmel,24c64";
+		reg = <0x50>;
+	};
+};
+
+&vuart1 {
+	status = "okay";
+};
+
+&lpc_ctrl {
+	status = "okay";
+	memory-region = <&flash_memory>;
+};
+
+&mac2 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_rmii3_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC3CLK>,
+		 <&syscon ASPEED_CLK_MAC3RCLK>;
+	clock-names = "MACCLK", "RCLK";
+	use-ncsi;
+};
+
+&mac3 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_rmii4_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC4CLK>,
+		 <&syscon ASPEED_CLK_MAC4RCLK>;
+	clock-names = "MACCLK", "RCLK";
+	use-ncsi;
+};
+
+&fmc {
+	status = "okay";
+	flash@0 {
+		status = "okay";
+		m25p,fast-read;
+		label = "bmc";
+		spi-max-frequency = <50000000>;
+#include "openbmc-flash-layout-128.dtsi"
+	};
+
+	flash@1 {
+		status = "okay";
+		m25p,fast-read;
+		label = "alt-bmc";
+		spi-max-frequency = <50000000>;
+	};
+};
+
+&spi1 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_spi1_default>;
+
+	flash@0 {
+		status = "okay";
+		m25p,fast-read;
+		label = "pnor";
+		spi-max-frequency = <100000000>;
+	};
+};
diff --git a/arch/arm/boot/dts/aspeed-bmc-inspur-fp5280g2.dts b/arch/arm/boot/dts/aspeed-bmc-inspur-fp5280g2.dts
index e9d714a46a60..c17bb7fce7ff 100644
--- a/arch/arm/boot/dts/aspeed-bmc-inspur-fp5280g2.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-inspur-fp5280g2.dts
@@ -148,14 +148,48 @@
 	};
 
 	leds {
-	    compatible = "gpio-leds";
+		compatible = "gpio-leds";
 
-	    power {
-		    label = "power";
-		    /* TODO: dummy gpio */
-		    gpios = <&gpio ASPEED_GPIO(R, 1) GPIO_ACTIVE_LOW>;
-	    };
+		power {
+			label = "power";
+			/* TODO: dummy gpio */
+			gpios = <&gpio ASPEED_GPIO(R, 1) GPIO_ACTIVE_LOW>;
+		};
+
+		init-ok {
+			label = "init-ok";
+			gpios = <&gpio ASPEED_GPIO(B, 7) GPIO_ACTIVE_LOW>;
+		};
+
+		front-memory {
+			label = "front-memory";
+			gpios = <&gpio ASPEED_GPIO(F, 4) GPIO_ACTIVE_LOW>;
+		};
+
+		front-syshot {
+			label = "front-syshot";
+			gpios = <&gpio ASPEED_GPIO(I, 1) GPIO_ACTIVE_LOW>;
+		};
+
+		front-syshealth {
+			label = "front-syshealth";
+			gpios = <&gpio ASPEED_GPIO(I, 0) GPIO_ACTIVE_LOW>;
+		};
 
+		front-fan {
+			label = "front-fan";
+			gpios = <&gpio ASPEED_GPIO(H, 4) GPIO_ACTIVE_LOW>;
+		};
+
+		front-psu {
+			label = "front-psu";
+			gpios = <&gpio ASPEED_GPIO(B, 2) GPIO_ACTIVE_LOW>;
+		};
+
+		identify {
+			label = "identify";
+			gpios = <&gpio ASPEED_GPIO(Z, 7) GPIO_ACTIVE_LOW>;
+		};
 	};
 
 	iio-hwmon-battery {
@@ -239,6 +273,9 @@
 	status = "okay";
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
@@ -749,15 +786,6 @@
 	aspeed,external-nodes = <&gfx &lhc>;
 };
 
-&gpio {
-	pin_gpio_b7 {
-		gpio-hog;
-		gpios = <ASPEED_GPIO(B,7) GPIO_ACTIVE_LOW>;
-		output-high;
-		line-name = "BMC_INIT_OK";
-	};
-};
-
 &wdt1 {
 	aspeed,reset-type = "none";
 	aspeed,external-signal;
diff --git a/arch/arm/boot/dts/aspeed-bmc-inspur-on5263m5.dts b/arch/arm/boot/dts/aspeed-bmc-inspur-on5263m5.dts
index 2337ee23f5c4..80c92e065a10 100644
--- a/arch/arm/boot/dts/aspeed-bmc-inspur-on5263m5.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-inspur-on5263m5.dts
@@ -77,6 +77,9 @@
 
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
diff --git a/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts b/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts
index 22dade6393d0..1deb30ec912c 100644
--- a/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts
@@ -69,6 +69,9 @@
 
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
diff --git a/arch/arm/boot/dts/aspeed-bmc-lenovo-hr630.dts b/arch/arm/boot/dts/aspeed-bmc-lenovo-hr630.dts
index d3695a32e8e0..c29e5f4d86ad 100644
--- a/arch/arm/boot/dts/aspeed-bmc-lenovo-hr630.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-lenovo-hr630.dts
@@ -133,6 +133,9 @@
 
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
diff --git a/arch/arm/boot/dts/aspeed-bmc-lenovo-hr855xg2.dts b/arch/arm/boot/dts/aspeed-bmc-lenovo-hr855xg2.dts
index 118eb8bbbf1b..084c455ad4cb 100644
--- a/arch/arm/boot/dts/aspeed-bmc-lenovo-hr855xg2.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-lenovo-hr855xg2.dts
@@ -139,6 +139,9 @@
 	status = "okay";
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-lanyang.dts b/arch/arm/boot/dts/aspeed-bmc-opp-lanyang.dts
index de95112e2a04..42b37a204241 100644
--- a/arch/arm/boot/dts/aspeed-bmc-opp-lanyang.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-opp-lanyang.dts
@@ -178,6 +178,9 @@
 
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-mihawk.dts b/arch/arm/boot/dts/aspeed-bmc-opp-mihawk.dts
index e55cc454b17f..f7e935ede919 100644
--- a/arch/arm/boot/dts/aspeed-bmc-opp-mihawk.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-opp-mihawk.dts
@@ -449,6 +449,9 @@
 
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-palmetto.dts b/arch/arm/boot/dts/aspeed-bmc-opp-palmetto.dts
index b0cb34ccb135..eb4e93a57ff4 100644
--- a/arch/arm/boot/dts/aspeed-bmc-opp-palmetto.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-opp-palmetto.dts
@@ -87,6 +87,7 @@
 		status = "okay";
 		m25p,fast-read;
 		label = "bmc";
+		spi-max-frequency = <50000000>;
 #include "openbmc-flash-layout.dtsi"
 	};
 };
@@ -99,6 +100,7 @@
 	flash@0 {
 		status = "okay";
 		m25p,fast-read;
+		spi-max-frequency = <50000000>;
 		label = "pnor";
 	};
 };
diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-romulus.dts b/arch/arm/boot/dts/aspeed-bmc-opp-romulus.dts
index 9628ecb879cf..edfa44fe1f75 100644
--- a/arch/arm/boot/dts/aspeed-bmc-opp-romulus.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-opp-romulus.dts
@@ -112,6 +112,7 @@
 		status = "okay";
 		m25p,fast-read;
 		label = "bmc";
+		spi-max-frequency = <50000000>;
 #include "openbmc-flash-layout.dtsi"
 	};
 };
@@ -125,6 +126,7 @@
 		status = "okay";
 		m25p,fast-read;
 		label = "pnor";
+		spi-max-frequency = <100000000>;
 	};
 };
 
@@ -160,6 +162,9 @@
 
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 };
 
 &i2c1 {
diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-swift.dts b/arch/arm/boot/dts/aspeed-bmc-opp-swift.dts
index f67fef1ac5e1..b8fdd2a8a2c9 100644
--- a/arch/arm/boot/dts/aspeed-bmc-opp-swift.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-opp-swift.dts
@@ -322,6 +322,9 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
 	use-ncsi;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 };
 
 &i2c2 {
diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts
new file mode 100644
index 000000000000..ff49ec76fa7c
--- /dev/null
+++ b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts
@@ -0,0 +1,802 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright 2019 IBM Corp.
+/dts-v1/;
+
+#include "aspeed-g6.dtsi"
+#include <dt-bindings/gpio/aspeed-gpio.h>
+#include <dt-bindings/leds/leds-pca955x.h>
+
+/ {
+	model = "Tacoma";
+	compatible = "ibm,tacoma-bmc", "aspeed,ast2600";
+
+	chosen {
+		stdout-path = &uart5;
+		bootargs = "console=ttyS4,115200n8";
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x80000000 0x40000000>;
+	};
+
+	reserved-memory {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		flash_memory: region@ba000000 {
+			no-map;
+			reg = <0xb8000000 0x4000000>; /* 64M */
+		};
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+
+		air-water {
+			label = "air-water";
+			gpios = <&gpio0 ASPEED_GPIO(Q, 7) GPIO_ACTIVE_LOW>;
+			linux,code = <ASPEED_GPIO(Q, 7)>;
+		};
+
+		checkstop {
+			label = "checkstop";
+			gpios = <&gpio0 ASPEED_GPIO(E, 3) GPIO_ACTIVE_LOW>;
+			linux,code = <ASPEED_GPIO(E, 3)>;
+		};
+
+		ps0-presence {
+			label = "ps0-presence";
+			gpios = <&gpio0 ASPEED_GPIO(H, 3) GPIO_ACTIVE_LOW>;
+			linux,code = <ASPEED_GPIO(H, 3)>;
+		};
+
+		ps1-presence {
+			label = "ps1-presence";
+			gpios = <&gpio0 ASPEED_GPIO(E, 5) GPIO_ACTIVE_LOW>;
+			linux,code = <ASPEED_GPIO(E, 5)>;
+		};
+	};
+
+	gpio-keys-polled {
+		compatible = "gpio-keys-polled";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		poll-interval = <1000>;
+
+		fan0-presence {
+			label = "fan0-presence";
+			gpios = <&pca0 4 GPIO_ACTIVE_LOW>;
+			linux,code = <4>;
+		};
+
+		fan1-presence {
+			label = "fan1-presence";
+			gpios = <&pca0 5 GPIO_ACTIVE_LOW>;
+			linux,code = <5>;
+		};
+
+		fan2-presence {
+			label = "fan2-presence";
+			gpios = <&pca0 6 GPIO_ACTIVE_LOW>;
+			linux,code = <6>;
+		};
+
+		fan3-presence {
+			label = "fan3-presence";
+			gpios = <&pca0 7 GPIO_ACTIVE_LOW>;
+			linux,code = <7>;
+		};
+	};
+};
+
+&fmc {
+	status = "okay";
+	flash@0 {
+		status = "okay";
+		m25p,fast-read;
+		label = "bmc";
+		spi-max-frequency = <50000000>;
+#include "openbmc-flash-layout-128.dtsi"
+	};
+
+	flash@1 {
+		status = "okay";
+		m25p,fast-read;
+		label = "alt-bmc";
+		spi-max-frequency = <50000000>;
+	};
+};
+
+&spi1 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_spi1_default>;
+
+	flash@0 {
+		status = "okay";
+		m25p,fast-read;
+		label = "pnor";
+		spi-max-frequency = <100000000>;
+	};
+};
+
+&mac2 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_rmii3_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC3CLK>,
+		 <&syscon ASPEED_CLK_MAC3RCLK>;
+	clock-names = "MACCLK", "RCLK";
+	use-ncsi;
+};
+
+&emmc {
+	status = "okay";
+};
+
+&fsim0 {
+	status = "okay";
+
+	#address-cells = <2>;
+	#size-cells = <0>;
+
+	cfam@0,0 {
+		reg = <0 0>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		chip-id = <0>;
+
+		scom@1000 {
+			compatible = "ibm,fsi2pib";
+			reg = <0x1000 0x400>;
+		};
+
+		i2c@1800 {
+			compatible = "ibm,fsi-i2c-master";
+			reg = <0x1800 0x400>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			cfam0_i2c0: i2c-bus@0 {
+				reg = <0>;
+			};
+
+			cfam0_i2c1: i2c-bus@1 {
+				reg = <1>;
+			};
+
+			cfam0_i2c2: i2c-bus@2 {
+				reg = <2>;
+			};
+
+			cfam0_i2c3: i2c-bus@3 {
+				reg = <3>;
+			};
+
+			cfam0_i2c4: i2c-bus@4 {
+				reg = <4>;
+			};
+
+			cfam0_i2c5: i2c-bus@5 {
+				reg = <5>;
+			};
+
+			cfam0_i2c6: i2c-bus@6 {
+				reg = <6>;
+			};
+
+			cfam0_i2c7: i2c-bus@7 {
+				reg = <7>;
+			};
+
+			cfam0_i2c8: i2c-bus@8 {
+				reg = <8>;
+			};
+
+			cfam0_i2c9: i2c-bus@9 {
+				reg = <9>;
+			};
+
+			cfam0_i2c10: i2c-bus@a {
+				reg = <10>;
+			};
+
+			cfam0_i2c11: i2c-bus@b {
+				reg = <11>;
+			};
+
+			cfam0_i2c12: i2c-bus@c {
+				reg = <12>;
+			};
+
+			cfam0_i2c13: i2c-bus@d {
+				reg = <13>;
+			};
+
+			cfam0_i2c14: i2c-bus@e {
+				reg = <14>;
+			};
+		};
+
+		sbefifo@2400 {
+			compatible = "ibm,p9-sbefifo";
+			reg = <0x2400 0x400>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			fsi_occ0: occ {
+				compatible = "ibm,p9-occ";
+			};
+		};
+
+		fsi_hub0: hub@3400 {
+			compatible = "fsi-master-hub";
+			reg = <0x3400 0x400>;
+			#address-cells = <2>;
+			#size-cells = <0>;
+
+			no-scan-on-init;
+		};
+	};
+};
+
+&fsi_hub0 {
+	cfam@1,0 {
+		reg = <1 0>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		chip-id = <1>;
+
+		scom@1000 {
+			compatible = "ibm,fsi2pib";
+			reg = <0x1000 0x400>;
+		};
+
+		i2c@1800 {
+			compatible = "ibm,fsi-i2c-master";
+			reg = <0x1800 0x400>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			cfam1_i2c0: i2c-bus@0 {
+				reg = <0>;
+			};
+
+			cfam1_i2c1: i2c-bus@1 {
+				reg = <1>;
+			};
+
+			cfam1_i2c2: i2c-bus@2 {
+				reg = <2>;
+			};
+
+			cfam1_i2c3: i2c-bus@3 {
+				reg = <3>;
+			};
+
+			cfam1_i2c4: i2c-bus@4 {
+				reg = <4>;
+			};
+
+			cfam1_i2c5: i2c-bus@5 {
+				reg = <5>;
+			};
+
+			cfam1_i2c6: i2c-bus@6 {
+				reg = <6>;
+			};
+
+			cfam1_i2c7: i2c-bus@7 {
+				reg = <7>;
+			};
+
+			cfam1_i2c8: i2c-bus@8 {
+				reg = <8>;
+			};
+
+			cfam1_i2c9: i2c-bus@9 {
+				reg = <9>;
+			};
+
+			cfam1_i2c10: i2c-bus@a {
+				reg = <10>;
+			};
+
+			cfam1_i2c11: i2c-bus@b {
+				reg = <11>;
+			};
+
+			cfam1_i2c12: i2c-bus@c {
+				reg = <12>;
+			};
+
+			cfam1_i2c13: i2c-bus@d {
+				reg = <13>;
+			};
+
+			cfam1_i2c14: i2c-bus@e {
+				reg = <14>;
+			};
+		};
+
+		sbefifo@2400 {
+			compatible = "ibm,p9-sbefifo";
+			reg = <0x2400 0x400>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			fsi_occ1: occ {
+				compatible = "ibm,p9-occ";
+			};
+		};
+
+		fsi_hub1: hub@3400 {
+			compatible = "fsi-master-hub";
+			reg = <0x3400 0x400>;
+			#address-cells = <2>;
+			#size-cells = <0>;
+
+			no-scan-on-init;
+		};
+	};
+};
+
+/* Legacy OCC numbering (to get rid of when userspace is fixed) */
+&fsi_occ0 {
+	reg = <1>;
+};
+
+&fsi_occ1 {
+	reg = <2>;
+};
+
+/ {
+	aliases {
+		i2c100 = &cfam0_i2c0;
+		i2c101 = &cfam0_i2c1;
+		i2c102 = &cfam0_i2c2;
+		i2c103 = &cfam0_i2c3;
+		i2c104 = &cfam0_i2c4;
+		i2c105 = &cfam0_i2c5;
+		i2c106 = &cfam0_i2c6;
+		i2c107 = &cfam0_i2c7;
+		i2c108 = &cfam0_i2c8;
+		i2c109 = &cfam0_i2c9;
+		i2c110 = &cfam0_i2c10;
+		i2c111 = &cfam0_i2c11;
+		i2c112 = &cfam0_i2c12;
+		i2c113 = &cfam0_i2c13;
+		i2c114 = &cfam0_i2c14;
+		i2c200 = &cfam1_i2c0;
+		i2c201 = &cfam1_i2c1;
+		i2c202 = &cfam1_i2c2;
+		i2c203 = &cfam1_i2c3;
+		i2c204 = &cfam1_i2c4;
+		i2c205 = &cfam1_i2c5;
+		i2c206 = &cfam1_i2c6;
+		i2c207 = &cfam1_i2c7;
+		i2c208 = &cfam1_i2c8;
+		i2c209 = &cfam1_i2c9;
+		i2c210 = &cfam1_i2c10;
+		i2c211 = &cfam1_i2c11;
+		i2c212 = &cfam1_i2c12;
+		i2c213 = &cfam1_i2c13;
+		i2c214 = &cfam1_i2c14;
+	};
+
+};
+
+&i2c0 {
+	status = "okay";
+};
+
+&i2c1 {
+	status = "okay";
+};
+
+&i2c2 {
+	status = "okay";
+};
+
+&i2c3 {
+	status = "okay";
+
+	bmp: bmp280@77 {
+		compatible = "bosch,bmp280";
+		reg = <0x77>;
+		#io-channel-cells = <1>;
+	};
+
+	max31785@52 {
+		compatible = "maxim,max31785a";
+		reg = <0x52>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		fan@0 {
+			compatible = "pmbus-fan";
+			reg = <0>;
+			tach-pulses = <2>;
+			maxim,fan-rotor-input = "tach";
+			maxim,fan-pwm-freq = <25000>;
+			maxim,fan-dual-tach;
+			maxim,fan-no-watchdog;
+			maxim,fan-no-fault-ramp;
+			maxim,fan-ramp = <2>;
+			maxim,fan-fault-pin-mon;
+		};
+
+		fan@1 {
+			compatible = "pmbus-fan";
+			reg = <1>;
+			tach-pulses = <2>;
+			maxim,fan-rotor-input = "tach";
+			maxim,fan-pwm-freq = <25000>;
+			maxim,fan-dual-tach;
+			maxim,fan-no-watchdog;
+			maxim,fan-no-fault-ramp;
+			maxim,fan-ramp = <2>;
+			maxim,fan-fault-pin-mon;
+		};
+
+		fan@2 {
+			compatible = "pmbus-fan";
+			reg = <2>;
+			tach-pulses = <2>;
+			maxim,fan-rotor-input = "tach";
+			maxim,fan-pwm-freq = <25000>;
+			maxim,fan-dual-tach;
+			maxim,fan-no-watchdog;
+			maxim,fan-no-fault-ramp;
+			maxim,fan-ramp = <2>;
+			maxim,fan-fault-pin-mon;
+		};
+
+		fan@3 {
+			compatible = "pmbus-fan";
+			reg = <3>;
+			tach-pulses = <2>;
+			maxim,fan-rotor-input = "tach";
+			maxim,fan-pwm-freq = <25000>;
+			maxim,fan-dual-tach;
+			maxim,fan-no-watchdog;
+			maxim,fan-no-fault-ramp;
+			maxim,fan-ramp = <2>;
+			maxim,fan-fault-pin-mon;
+		};
+	};
+
+	dps: dps310@76 {
+		compatible = "infineon,dps310";
+		reg = <0x76>;
+		#io-channel-cells = <0>;
+	};
+
+	pca0: pca9552@60 {
+		compatible = "nxp,pca9552";
+		reg = <0x60>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		gpio@0 {
+			reg = <0>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@1 {
+			reg = <1>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@2 {
+			reg = <2>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@3 {
+			reg = <3>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@4 {
+			reg = <4>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@5 {
+			reg = <5>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@6 {
+			reg = <6>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@7 {
+			reg = <7>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@8 {
+			reg = <8>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@9 {
+			reg = <9>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@10 {
+			reg = <10>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@11 {
+			reg = <11>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@12 {
+			reg = <12>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@13 {
+			reg = <13>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@14 {
+			reg = <14>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@15 {
+			reg = <15>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+	};
+
+	power-supply@68 {
+		compatible = "ibm,cffps1";
+		reg = <0x68>;
+	};
+
+	power-supply@69 {
+		compatible = "ibm,cffps1";
+		reg = <0x69>;
+	};
+};
+
+&i2c4 {
+	status = "okay";
+
+	tmp423a@4c {
+		compatible = "ti,tmp423";
+		reg = <0x4c>;
+	};
+
+	ir35221@70 {
+		compatible = "infineon,ir35221";
+		reg = <0x70>;
+	};
+
+	ir35221@71 {
+		compatible = "infineon,ir35221";
+		reg = <0x71>;
+	};
+};
+
+&i2c5 {
+	status = "okay";
+
+	tmp423a@4c {
+		compatible = "ti,tmp423";
+		reg = <0x4c>;
+	};
+
+	ir35221@70 {
+		compatible = "infineon,ir35221";
+		reg = <0x70>;
+	};
+
+	ir35221@71 {
+		compatible = "infineon,ir35221";
+		reg = <0x71>;
+	};
+};
+
+&i2c7 {
+	status = "okay";
+};
+
+&i2c9 {
+	status = "okay";
+
+	tmp275@4a {
+		compatible = "ti,tmp275";
+		reg = <0x4a>;
+	};
+};
+
+&i2c10 {
+	status = "okay";
+};
+
+&i2c11 {
+	status = "okay";
+
+	pca9552: pca9552@60 {
+		compatible = "nxp,pca9552";
+		reg = <0x60>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		gpio-line-names = "PS_SMBUS_RESET_N", "APSS_RESET_N",
+			"GPU0_TH_OVERT_N_BUFF",	"GPU1_TH_OVERT_N_BUFF",
+			"GPU2_TH_OVERT_N_BUFF", "GPU3_TH_OVERT_N_BUFF",
+			"GPU4_TH_OVERT_N_BUFF",	"GPU5_TH_OVERT_N_BUFF",
+			"GPU0_PWR_GOOD_BUFF", "GPU1_PWR_GOOD_BUFF",
+			"GPU2_PWR_GOOD_BUFF", "GPU3_PWR_GOOD_BUFF",
+			"GPU4_PWR_GOOD_BUFF", "GPU5_PWR_GOOD_BUFF",
+			"12V_BREAKER_FLT_N", "THROTTLE_UNLATCHED_N";
+
+		gpio@0 {
+			reg = <0>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@1 {
+			reg = <1>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@2 {
+			reg = <2>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@3 {
+			reg = <3>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@4 {
+			reg = <4>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@5 {
+			reg = <5>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@6 {
+			reg = <6>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@7 {
+			reg = <7>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@8 {
+			reg = <8>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@9 {
+			reg = <9>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@10 {
+			reg = <10>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@11 {
+			reg = <11>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@12 {
+			reg = <12>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@13 {
+			reg = <13>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@14 {
+			reg = <14>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+
+		gpio@15 {
+			reg = <15>;
+			type = <PCA955X_TYPE_GPIO>;
+		};
+	};
+
+	rtc@32 {
+		compatible = "epson,rx8900";
+		reg = <0x32>;
+	};
+
+	eeprom@51 {
+		compatible = "atmel,24c64";
+		reg = <0x51>;
+	};
+
+	ucd90160@64 {
+		compatible = "ti,ucd90160";
+		reg = <0x64>;
+	};
+};
+
+&i2c12 {
+	status = "okay";
+};
+
+&i2c13 {
+	status = "okay";
+};
+
+&ibt {
+	status = "okay";
+};
+
+&uart1 {
+	status = "okay";
+	// Workaround for A0
+	compatible = "snps,dw-apb-uart";
+};
+
+&uart5 {
+	// Workaround for A0
+	compatible = "snps,dw-apb-uart";
+};
+
+&vuart1 {
+	status = "okay";
+};
+
+&lpc_ctrl {
+	status = "okay";
+	memory-region = <&flash_memory>;
+	flash = <&spi1>;
+};
+
+&wdt1 {
+	aspeed,reset-type = "none";
+	aspeed,external-signal;
+	aspeed,ext-push-pull;
+	aspeed,ext-active-high;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_wdtrst1_default>;
+};
+
+&wdt2 {
+	status = "okay";
+};
+
+&pinctrl {
+	/* Hog these as no driver is probed for the entire LPC block */
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_lpc_default>,
+		    <&pinctrl_lsirq_default>;
+};
diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-vesnin.dts b/arch/arm/boot/dts/aspeed-bmc-opp-vesnin.dts
index a27c88d23056..affd2c8743b1 100644
--- a/arch/arm/boot/dts/aspeed-bmc-opp-vesnin.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-opp-vesnin.dts
@@ -43,6 +43,10 @@
 			gpios = <&gpio ASPEED_GPIO(N, 1) GPIO_ACTIVE_LOW>;
 		};
 
+		power_green {
+			gpios = <&gpio ASPEED_GPIO(F, 1) GPIO_ACTIVE_LOW>;
+		};
+
 		id_blue {
 			gpios = <&gpio ASPEED_GPIO(O, 0) GPIO_ACTIVE_LOW>;
 		};
diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-witherspoon.dts b/arch/arm/boot/dts/aspeed-bmc-opp-witherspoon.dts
index 31ea34e14c79..569dad93e162 100644
--- a/arch/arm/boot/dts/aspeed-bmc-opp-witherspoon.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-opp-witherspoon.dts
@@ -200,6 +200,7 @@
 		status = "okay";
 		label = "bmc";
 		m25p,fast-read;
+		spi-max-frequency = <50000000>;
 
 		partitions {
 			#address-cells = < 1 >;
@@ -224,6 +225,7 @@
 		status = "okay";
 		label = "alt-bmc";
 		m25p,fast-read;
+		spi-max-frequency = <50000000>;
 
 		partitions {
 			#address-cells = < 1 >;
@@ -242,7 +244,6 @@
 				label = "alt-obmc-ubi";
 			};
 		};
-
 	};
 };
 
@@ -255,6 +256,7 @@
 		status = "okay";
 		label = "pnor";
 		m25p,fast-read;
+		spi-max-frequency = <100000000>;
 	};
 };
 
@@ -293,6 +295,9 @@
 	status = "okay";
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-zaius.dts b/arch/arm/boot/dts/aspeed-bmc-opp-zaius.dts
index 30624378316d..bc60ec291681 100644
--- a/arch/arm/boot/dts/aspeed-bmc-opp-zaius.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-opp-zaius.dts
@@ -130,6 +130,7 @@
 		status = "okay";
 		label = "bmc";
 		m25p,fast-read;
+		spi-max-frequency = <50000000>;
 #include "openbmc-flash-layout.dtsi"
 	};
 };
@@ -143,6 +144,7 @@
 		status = "okay";
 		label = "pnor";
 		m25p,fast-read;
+		spi-max-frequency = <100000000>;
 	};
 };
 
@@ -187,6 +189,9 @@
 	status = "okay";
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
diff --git a/arch/arm/boot/dts/aspeed-bmc-portwell-neptune.dts b/arch/arm/boot/dts/aspeed-bmc-portwell-neptune.dts
index 33d704541de6..4a1ca8f5b6a7 100644
--- a/arch/arm/boot/dts/aspeed-bmc-portwell-neptune.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-portwell-neptune.dts
@@ -80,12 +80,18 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii1_default
 		     &pinctrl_mdio1_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>,
+		 <&syscon ASPEED_CLK_MAC1RCLK>;
+	clock-names = "MACCLK", "RCLK";
 };
 
 &mac1 {
 	status = "okay";
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_rmii2_default>;
+	clocks = <&syscon ASPEED_CLK_GATE_MAC2CLK>,
+		 <&syscon ASPEED_CLK_MAC2RCLK>;
+	clock-names = "MACCLK", "RCLK";
 	use-ncsi;
 };
 
diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi
index dffb595d30e4..46c0891aac5a 100644
--- a/arch/arm/boot/dts/aspeed-g4.dtsi
+++ b/arch/arm/boot/dts/aspeed-g4.dtsi
@@ -65,6 +65,7 @@
 			flash@0 {
 				reg = < 0 >;
 				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
 				status = "disabled";
 			};
 			flash@1 {
@@ -100,6 +101,7 @@
 			flash@0 {
 				reg = < 0 >;
 				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
 				status = "disabled";
 			};
 		};
@@ -182,7 +184,7 @@
 				#reset-cells = <1>;
 
 				pinctrl: pinctrl {
-					compatible = "aspeed,g4-pinctrl";
+					compatible = "aspeed,ast2400-pinctrl";
 				};
 
 				p2a: p2a-control {
diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi
index e8feb8b66a2f..a259c63fff06 100644
--- a/arch/arm/boot/dts/aspeed-g5.dtsi
+++ b/arch/arm/boot/dts/aspeed-g5.dtsi
@@ -72,16 +72,19 @@
 			flash@0 {
 				reg = < 0 >;
 				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
 				status = "disabled";
 			};
 			flash@1 {
 				reg = < 1 >;
 				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
 				status = "disabled";
 			};
 			flash@2 {
 				reg = < 2 >;
 				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
 				status = "disabled";
 			};
 		};
@@ -97,11 +100,13 @@
 			flash@0 {
 				reg = < 0 >;
 				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
 				status = "disabled";
 			};
 			flash@1 {
 				reg = < 1 >;
 				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
 				status = "disabled";
 			};
 		};
@@ -117,11 +122,13 @@
 			flash@0 {
 				reg = < 0 >;
 				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
 				status = "disabled";
 			};
 			flash@1 {
 				reg = < 1 >;
 				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
 				status = "disabled";
 			};
 		};
@@ -215,7 +222,7 @@
 				#reset-cells = <1>;
 
 				pinctrl: pinctrl {
-					compatible = "aspeed,g5-pinctrl";
+					compatible = "aspeed,ast2500-pinctrl";
 					aspeed,external-nodes = <&gfx &lhc>;
 
 				};
@@ -299,7 +306,7 @@
 				#gpio-cells = <2>;
 				gpio-controller;
 				compatible = "aspeed,ast2500-gpio";
-				reg = <0x1e780000 0x1000>;
+				reg = <0x1e780000 0x200>;
 				interrupts = <20>;
 				gpio-ranges = <&pinctrl 0 0 232>;
 				clocks = <&syscon ASPEED_CLK_APB>;
@@ -307,6 +314,21 @@
 				#interrupt-cells = <2>;
 			};
 
+			sgpio: sgpio@1e780200 {
+				#gpio-cells = <2>;
+				compatible = "aspeed,ast2500-sgpio";
+				gpio-controller;
+				interrupts = <40>;
+				reg = <0x1e780200 0x0100>;
+				clocks = <&syscon ASPEED_CLK_APB>;
+				interrupt-controller;
+				ngpios = <8>;
+				bus-frequency = <12000000>;
+				pinctrl-names = "default";
+				pinctrl-0 = <&pinctrl_sgpm_default>;
+				status = "disabled";
+			};
+
 			rtc: rtc@1e781000 {
 				compatible = "aspeed,ast2500-rtc";
 				reg = <0x1e781000 0x18>;
@@ -379,6 +401,7 @@
 				interrupts = <8>;
 				clocks = <&syscon ASPEED_CLK_APB>;
 				no-loopback-test;
+				aspeed,sirq-polarity-sense = <&syscon 0x70 25>;
 				status = "disabled";
 			};
 
diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
index 5b8bf58e89cb..045ce66ca876 100644
--- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
+++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
@@ -852,14 +852,9 @@
 		groups = "SD2";
 	};
 
-	pinctrl_sd3_default: sd3_default {
-		function = "SD3";
-		groups = "SD3";
-	};
-
 	pinctrl_emmc_default: emmc_default {
-		function = "SD3";
-		groups = "EMMC";
+		function = "EMMC";
+		groups = "EMMCG4";
 	};
 
 	pinctrl_sgpm1_default: sgpm1_default {
diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi
index 3a1422f7c49c..b72afbaadaf8 100644
--- a/arch/arm/boot/dts/aspeed-g6.dtsi
+++ b/arch/arm/boot/dts/aspeed-g6.dtsi
@@ -12,7 +12,29 @@
 	interrupt-parent = <&gic>;
 
 	aliases {
+		i2c0 = &i2c0;
+		i2c1 = &i2c1;
+		i2c2 = &i2c2;
+		i2c3 = &i2c3;
+		i2c4 = &i2c4;
+		i2c5 = &i2c5;
+		i2c6 = &i2c6;
+		i2c7 = &i2c7;
+		i2c8 = &i2c8;
+		i2c9 = &i2c9;
+		i2c10 = &i2c10;
+		i2c11 = &i2c11;
+		i2c12 = &i2c12;
+		i2c13 = &i2c13;
+		i2c14 = &i2c14;
+		i2c15 = &i2c15;
+		serial0 = &uart1;
+		serial1 = &uart2;
+		serial2 = &uart3;
+		serial3 = &uart4;
 		serial4 = &uart5;
+		serial5 = &vuart1;
+		serial6 = &vuart2;
 	};
 
 
@@ -64,12 +86,93 @@
 			    <0x40466000 0x2000>;
 			};
 
+		fmc: spi@1e620000 {
+			reg = < 0x1e620000 0xc4
+				0x20000000 0x10000000 >;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "aspeed,ast2600-fmc";
+			clocks = <&syscon ASPEED_CLK_AHB>;
+			status = "disabled";
+			interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
+			flash@0 {
+				reg = < 0 >;
+				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
+				status = "disabled";
+			};
+			flash@1 {
+				reg = < 1 >;
+				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
+				status = "disabled";
+			};
+			flash@2 {
+				reg = < 2 >;
+				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
+				status = "disabled";
+			};
+		};
+
+		spi1: spi@1e630000 {
+			reg = < 0x1e630000 0xc4
+				0x30000000 0x10000000 >;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "aspeed,ast2600-spi";
+			clocks = <&syscon ASPEED_CLK_AHB>;
+			status = "disabled";
+			flash@0 {
+				reg = < 0 >;
+				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
+				status = "disabled";
+			};
+			flash@1 {
+				reg = < 1 >;
+				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
+				status = "disabled";
+			};
+		};
+
+		spi2: spi@1e631000 {
+			reg = < 0x1e631000 0xc4
+				0x50000000 0x10000000 >;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "aspeed,ast2600-spi";
+			clocks = <&syscon ASPEED_CLK_AHB>;
+			status = "disabled";
+			flash@0 {
+				reg = < 0 >;
+				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
+				status = "disabled";
+			};
+			flash@1 {
+				reg = < 1 >;
+				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
+				status = "disabled";
+			};
+			flash@2 {
+				reg = < 2 >;
+				compatible = "jedec,spi-nor";
+				spi-max-frequency = <50000000>;
+				status = "disabled";
+			};
+		};
+
 		mdio0: mdio@1e650000 {
 			compatible = "aspeed,ast2600-mdio";
 			reg = <0x1e650000 0x8>;
 			#address-cells = <1>;
 			#size-cells = <0>;
 			status = "disabled";
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_mdio1_default>;
 		};
 
 		mdio1: mdio@1e650008 {
@@ -78,6 +181,8 @@
 			#address-cells = <1>;
 			#size-cells = <0>;
 			status = "disabled";
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_mdio2_default>;
 		};
 
 		mdio2: mdio@1e650010 {
@@ -86,6 +191,8 @@
 			#address-cells = <1>;
 			#size-cells = <0>;
 			status = "disabled";
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_mdio3_default>;
 		};
 
 		mdio3: mdio@1e650018 {
@@ -94,6 +201,8 @@
 			#address-cells = <1>;
 			#size-cells = <0>;
 			status = "disabled";
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_mdio4_default>;
 		};
 
 		mac0: ftgmac@1e660000 {
@@ -168,6 +277,32 @@
 				quality = <100>;
 			};
 
+			gpio0: gpio@1e780000 {
+				#gpio-cells = <2>;
+				gpio-controller;
+				compatible = "aspeed,ast2600-gpio";
+				reg = <0x1e780000 0x800>;
+				interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
+				gpio-ranges = <&pinctrl 0 0 208>;
+				ngpios = <208>;
+				clocks = <&syscon ASPEED_CLK_APB2>;
+				interrupt-controller;
+				#interrupt-cells = <2>;
+			};
+
+			gpio1: gpio@1e780800 {
+				#gpio-cells = <2>;
+				gpio-controller;
+				compatible = "aspeed,ast2600-gpio";
+				reg = <0x1e780800 0x800>;
+				interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+				gpio-ranges = <&pinctrl 0 208 36>;
+				ngpios = <36>;
+				clocks = <&syscon ASPEED_CLK_APB1>;
+				interrupt-controller;
+				#interrupt-cells = <2>;
+			};
+
 			rtc: rtc@1e781000 {
 				compatible = "aspeed,ast2600-rtc";
 				reg = <0x1e781000 0x18>;
@@ -175,6 +310,35 @@
 				status = "disabled";
 			};
 
+			timer: timer@1e782000 {
+				compatible = "aspeed,ast2600-timer";
+				reg = <0x1e782000 0x90>;
+				interrupts-extended = <&gic  GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>,
+						<&gic  GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>,
+						<&gic  GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+						<&gic  GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>,
+						<&gic  GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>,
+						<&gic  GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>,
+						<&gic  GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
+						<&gic  GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&syscon ASPEED_CLK_APB1>;
+				clock-names = "PCLK";
+                        };
+
+			uart1: serial@1e783000 {
+				compatible = "ns16550a";
+				reg = <0x1e783000 0x20>;
+				reg-shift = <2>;
+				reg-io-width = <4>;
+				interrupts = <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&syscon ASPEED_CLK_GATE_UART1CLK>;
+				resets = <&lpc_reset 4>;
+				no-loopback-test;
+				pinctrl-names = "default";
+				pinctrl-0 = <&pinctrl_txd1_default &pinctrl_rxd1_default>;
+				status = "disabled";
+			};
+
 			uart5: serial@1e784000 {
 				compatible = "ns16550a";
 				reg = <0x1e784000 0x1000>;
@@ -207,6 +371,93 @@
 				status = "disabled";
 			};
 
+			lpc: lpc@1e789000 {
+				compatible = "aspeed,ast2600-lpc", "simple-mfd";
+				reg = <0x1e789000 0x1000>;
+
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0x0 0x1e789000 0x1000>;
+
+				lpc_bmc: lpc-bmc@0 {
+					compatible = "aspeed,ast2600-lpc-bmc", "simple-mfd", "syscon";
+					reg = <0x0 0x80>;
+					reg-io-width = <4>;
+
+					#address-cells = <1>;
+					#size-cells = <1>;
+					ranges = <0x0 0x0 0x80>;
+
+					kcs1: kcs1@0 {
+						compatible = "aspeed,ast2600-kcs-bmc";
+						interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+						kcs_chan = <1>;
+						status = "disabled";
+					};
+					kcs2: kcs2@0 {
+						compatible = "aspeed,ast2600-kcs-bmc";
+						interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>;
+						kcs_chan = <2>;
+						status = "disabled";
+					};
+					kcs3: kcs3@0 {
+						compatible = "aspeed,ast2600-kcs-bmc";
+						interrupts = <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>;
+						kcs_chan = <3>;
+						status = "disabled";
+					};
+				};
+
+				lpc_host: lpc-host@80 {
+					compatible = "aspeed,ast2600-lpc-host", "simple-mfd", "syscon";
+					reg = <0x80 0x1e0>;
+					reg-io-width = <4>;
+
+					#address-cells = <1>;
+					#size-cells = <1>;
+					ranges = <0x0 0x80 0x1e0>;
+
+					kcs4: kcs4@0 {
+						compatible = "aspeed,ast2600-kcs-bmc";
+						interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>;
+						kcs_chan = <4>;
+						status = "disabled";
+					};
+
+					lpc_ctrl: lpc-ctrl@0 {
+						compatible = "aspeed,ast2600-lpc-ctrl";
+						reg = <0x0 0x80>;
+						clocks = <&syscon ASPEED_CLK_GATE_LCLK>;
+						status = "disabled";
+					};
+
+					lpc_snoop: lpc-snoop@0 {
+						compatible = "aspeed,ast2600-lpc-snoop";
+						reg = <0x0 0x80>;
+						interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>;
+						status = "disabled";
+					};
+
+					lhc: lhc@20 {
+						compatible = "aspeed,ast2600-lhc";
+						reg = <0x20 0x24 0x48 0x8>;
+					};
+
+					lpc_reset: reset-controller@18 {
+						compatible = "aspeed,ast2600-lpc-reset";
+						reg = <0x18 0x4>;
+						#reset-cells = <1>;
+					};
+
+					ibt: ibt@c0 {
+						compatible = "aspeed,ast2600-ibt-bmc";
+						reg = <0xc0 0x18>;
+						interrupts = <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+						status = "disabled";
+					};
+				};
+			};
+
 			sdc: sdc@1e740000 {
 				compatible = "aspeed,ast2600-sd-controller";
 				reg = <0x1e740000 0x100>;
@@ -235,7 +486,7 @@
 				};
 			};
 
-			emmc: sdc@1e750000 {
+			emmc_controller: sdc@1e750000 {
 				compatible = "aspeed,ast2600-sd-controller";
 				reg = <0x1e750000 0x100>;
 				#address-cells = <1>;
@@ -244,7 +495,7 @@
 				clocks = <&syscon ASPEED_CLK_GATE_EMMCCLK>;
 				status = "disabled";
 
-				sdhci@1e750100 {
+				emmc: sdhci@1e750100 {
 					compatible = "aspeed,ast2600-sdhci";
 					reg = <0x100 0x100>;
 					sdhci,auto-cmd12;
@@ -254,8 +505,339 @@
 					pinctrl-0 = <&pinctrl_emmc_default>;
 				};
 			};
+
+			vuart1: serial@1e787000 {
+				compatible = "aspeed,ast2500-vuart";
+				reg = <0x1e787000 0x40>;
+				reg-shift = <2>;
+				interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&syscon ASPEED_CLK_APB1>;
+				no-loopback-test;
+				status = "disabled";
+			};
+
+			vuart2: serial@1e788000 {
+				compatible = "aspeed,ast2500-vuart";
+				reg = <0x1e788000 0x40>;
+				reg-shift = <2>;
+				interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&syscon ASPEED_CLK_APB1>;
+				no-loopback-test;
+				status = "disabled";
+			};
+
+			uart2: serial@1e78d000 {
+				compatible = "ns16550a";
+				reg = <0x1e78d000 0x20>;
+				reg-shift = <2>;
+				reg-io-width = <4>;
+				interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&syscon ASPEED_CLK_GATE_UART2CLK>;
+				resets = <&lpc_reset 5>;
+				no-loopback-test;
+				pinctrl-names = "default";
+				pinctrl-0 = <&pinctrl_txd2_default &pinctrl_rxd2_default>;
+				status = "disabled";
+			};
+
+			uart3: serial@1e78e000 {
+				compatible = "ns16550a";
+				reg = <0x1e78e000 0x20>;
+				reg-shift = <2>;
+				reg-io-width = <4>;
+				interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&syscon ASPEED_CLK_GATE_UART3CLK>;
+				resets = <&lpc_reset 6>;
+				no-loopback-test;
+				pinctrl-names = "default";
+				pinctrl-0 = <&pinctrl_txd3_default &pinctrl_rxd3_default>;
+				status = "disabled";
+			};
+
+			uart4: serial@1e78f000 {
+				compatible = "ns16550a";
+				reg = <0x1e78f000 0x20>;
+				reg-shift = <2>;
+				reg-io-width = <4>;
+				interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&syscon ASPEED_CLK_GATE_UART4CLK>;
+				resets = <&lpc_reset 7>;
+				no-loopback-test;
+				pinctrl-names = "default";
+				pinctrl-0 = <&pinctrl_txd4_default &pinctrl_rxd4_default>;
+				status = "disabled";
+			};
+
+			i2c: bus@1e78a000 {
+				compatible = "simple-bus";
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0 0x1e78a000 0x1000>;
+			};
+
+			fsim0: fsi@1e79b000 {
+				compatible = "aspeed,ast2600-fsi-master", "fsi-master";
+				reg = <0x1e79b000 0x94>;
+				interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
+				pinctrl-names = "default";
+				pinctrl-0 = <&pinctrl_fsi1_default>;
+				clocks = <&syscon ASPEED_CLK_GATE_FSICLK>;
+				status = "disabled";
+			};
+
+			fsim1: fsi@1e79b100 {
+				compatible = "aspeed,ast2600-fsi-master", "fsi-master";
+				reg = <0x1e79b100 0x94>;
+				interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+				pinctrl-names = "default";
+				pinctrl-0 = <&pinctrl_fsi2_default>;
+				clocks = <&syscon ASPEED_CLK_GATE_FSICLK>;
+				status = "disabled";
+			};
 		};
 	};
 };
 
 #include "aspeed-g6-pinctrl.dtsi"
+
+&i2c {
+	i2c0: i2c-bus@80 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x80 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c1_default>;
+		status = "disabled";
+	};
+
+	i2c1: i2c-bus@100 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x100 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c2_default>;
+		status = "disabled";
+	};
+
+	i2c2: i2c-bus@180 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x180 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c3_default>;
+		status = "disabled";
+	};
+
+	i2c3: i2c-bus@200 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x200 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c4_default>;
+		status = "disabled";
+	};
+
+	i2c4: i2c-bus@280 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x280 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c5_default>;
+		status = "disabled";
+	};
+
+	i2c5: i2c-bus@300 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x300 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c6_default>;
+		status = "disabled";
+	};
+
+	i2c6: i2c-bus@380 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x380 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c7_default>;
+		status = "disabled";
+	};
+
+	i2c7: i2c-bus@400 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x400 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c8_default>;
+		status = "disabled";
+	};
+
+	i2c8: i2c-bus@480 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x480 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c9_default>;
+		status = "disabled";
+	};
+
+	i2c9: i2c-bus@500 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x500 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c10_default>;
+		status = "disabled";
+	};
+
+	i2c10: i2c-bus@580 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x580 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c11_default>;
+		status = "disabled";
+	};
+
+	i2c11: i2c-bus@600 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x600 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c12_default>;
+		status = "disabled";
+	};
+
+	i2c12: i2c-bus@680 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x680 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c13_default>;
+		status = "disabled";
+	};
+
+	i2c13: i2c-bus@700 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x700 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c14_default>;
+		status = "disabled";
+	};
+
+	i2c14: i2c-bus@780 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x780 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c15_default>;
+		status = "disabled";
+	};
+
+	i2c15: i2c-bus@800 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x800 0x80>;
+		compatible = "aspeed,ast2600-i2c-bus";
+		clocks = <&syscon ASPEED_CLK_APB2>;
+		resets = <&syscon ASPEED_RESET_I2C>;
+		interrupts = <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+		bus-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c16_default>;
+		status = "disabled";
+	};
+};
diff --git a/arch/arm/boot/dts/ast2500-facebook-netbmc-common.dtsi b/arch/arm/boot/dts/ast2500-facebook-netbmc-common.dtsi
new file mode 100644
index 000000000000..7a395ba56512
--- /dev/null
+++ b/arch/arm/boot/dts/ast2500-facebook-netbmc-common.dtsi
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2019 Facebook Inc.
+
+#include "aspeed-g5.dtsi"
+
+/ {
+	memory@80000000 {
+		reg = <0x80000000 0x40000000>;
+	};
+};
+
+/*
+ * Update reset type to "system" (full chip) to fix warm reboot hang issue
+ * when reset type is set to default ("soc", gated by reset mask registers).
+ */
+&wdt1 {
+	status = "okay";
+	aspeed,reset-type = "system";
+};
+
+&wdt2 {
+	status = "disabled";
+};
+
+&uart1 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_txd1_default
+		     &pinctrl_rxd1_default>;
+};
+
+&uart3 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_txd3_default
+		     &pinctrl_rxd3_default>;
+};
+
+&uart5 {
+	status = "okay";
+};
+
+&fmc {
+	status = "okay";
+
+	fmc_flash0: flash@0 {
+		status = "okay";
+		m25p,fast-read;
+		label = "spi0.0";
+
+#include "facebook-bmc-flash-layout.dtsi"
+	};
+
+	fmc_flash1: flash@1 {
+		status = "okay";
+		m25p,fast-read;
+		label = "spi0.1";
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			flash1@0 {
+				reg = <0x0 0x2000000>;
+				label = "flash1";
+			};
+		};
+	};
+};
+
+&mac1 {
+	status = "okay";
+	no-hw-checksum;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_rgmii2_default &pinctrl_mdio2_default>;
+};
+
+&rtc {
+	status = "okay";
+};
+
+&vhub {
+	status = "okay";
+};
+
+&sdmmc {
+	status = "okay";
+};
+
+&sdhci1 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_sd2_default>;
+};
diff --git a/arch/arm/boot/dts/at91-kizbox2-2.dts b/arch/arm/boot/dts/at91-kizbox2-2.dts
new file mode 100644
index 000000000000..cab8b3579efa
--- /dev/null
+++ b/arch/arm/boot/dts/at91-kizbox2-2.dts
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * at91-kizbox2-2.dts - Device Tree file for the Kizbox2 with
+ * two head board
+ *
+ * Copyright (C) 2015 Overkiz SAS
+ *
+ * Authors: Antoine Aubert <a.aubert@overkiz.com>
+ *	    Kévin Raymond <k.raymond@overkiz.com>
+ */
+/dts-v1/;
+#include "at91-kizbox2-common.dtsi"
+
+/ {
+	model = "Overkiz Kizbox 2 with two heads";
+	compatible = "overkiz,kizbox2-2", "atmel,sama5d31",
+		     "atmel,sama5d3", "atmel,sama5";
+};
+
+&usart1 {
+	status = "okay";
+};
+
+&usart2 {
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/at91-kizbox2-common.dtsi b/arch/arm/boot/dts/at91-kizbox2-common.dtsi
new file mode 100644
index 000000000000..af38253a6e7a
--- /dev/null
+++ b/arch/arm/boot/dts/at91-kizbox2-common.dtsi
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * at91-kizbox2_common.dtsi - Device Tree Include file for
+ * Overkiz Kizbox 2 family SoC
+ *
+ * Copyright (C) 2014-2018 Overkiz SAS
+ *
+ * Authors: Antoine Aubert <a.aubert@overkiz.com>
+ *          Gaël Portay <g.portay@overkiz.com>
+ *          Kévin Raymond <k.raymond@overkiz.com>
+ */
+#include "sama5d31.dtsi"
+
+/ {
+	chosen {
+		bootargs = "ubi.mtd=ubi";
+		stdout-path = &dbgu;
+	};
+
+	memory {
+		reg = <0x20000000 0x10000000>;
+	};
+
+	clocks {
+		slow_xtal {
+			clock-frequency = <32768>;
+		};
+
+		main_xtal {
+			clock-frequency = <12000000>;
+		};
+	};
+
+	gpio_keys {
+		compatible = "gpio-keys";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		prog {
+			label = "PB_PROG";
+			gpios = <&pioE 27 GPIO_ACTIVE_LOW>;
+			linux,code = <0x102>;
+			wakeup-source;
+		};
+
+		reset {
+			label = "PB_RST";
+			gpios = <&pioE 29 GPIO_ACTIVE_LOW>;
+			linux,code = <0x100>;
+			wakeup-source;
+		};
+
+		user {
+			label = "PB_USER";
+			gpios = <&pioE 31 GPIO_ACTIVE_HIGH>;
+			linux,code = <0x101>;
+			wakeup-source;
+		};
+	};
+
+	pwm_leds {
+		compatible = "pwm-leds";
+
+		blue {
+			label = "pwm:blue:user";
+			pwms = <&pwm0 2 10000000 0>;
+			max-brightness = <255>;
+			linux,default-trigger = "none";
+		};
+
+		green {
+			label = "pwm:green:user";
+			pwms = <&pwm0 1 10000000 0>;
+			max-brightness = <255>;
+			linux,default-trigger = "default-on";
+		};
+
+		red {
+			label = "pwm:red:user";
+			pwms = <&pwm0 0 10000000 0>;
+			max-brightness = <255>;
+			linux,default-trigger = "default-on";
+		};
+	};
+};
+
+&i2c1 {
+	status = "okay";
+
+	pmic: act8865@5b {
+		compatible = "active-semi,act8865";
+		reg = <0x5b>;
+		status = "okay";
+
+		regulators {
+			vcc_1v8_reg: DCDC_REG1 {
+				regulator-name = "VCC_1V8";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+			};
+
+			vcc_1v2_reg: DCDC_REG2 {
+				regulator-name = "VCC_1V2";
+				regulator-min-microvolt = <1200000>;
+				regulator-max-microvolt = <1200000>;
+				regulator-always-on;
+			};
+
+			vcc_3v3_reg: DCDC_REG3 {
+				regulator-name = "VCC_3V3";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+			};
+
+			vddfuse_reg: LDO_REG1 {
+				regulator-name = "FUSE_2V5";
+				regulator-min-microvolt = <2500000>;
+				regulator-max-microvolt = <2500000>;
+			};
+
+			vddana_reg: LDO_REG2 {
+				regulator-name = "VDDANA";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+			};
+
+			vled_reg: LDO_REG3 {
+				regulator-name = "VLED";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+			};
+
+			v3v8_rf_reg: LDO_REG4 {
+				regulator-name = "V3V8_RF";
+				regulator-min-microvolt = <3800000>;
+				regulator-max-microvolt = <3800000>;
+				regulator-always-on;
+			};
+		};
+	};
+};
+
+&usart0 {
+	atmel,use-dma-rx;
+	atmel,use-dma-tx;
+	status = "disabled";
+};
+
+&usart1 {
+	atmel,use-dma-rx;
+	atmel,use-dma-tx;
+	status = "disabled";
+};
+
+&usart2 {
+	atmel,use-dma-rx;
+	atmel,use-dma-tx;
+	status = "disabled";
+};
+
+&pwm0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pwm0_pwmh0_1
+		     &pinctrl_pwm0_pwmh1_1
+		     &pinctrl_pwm0_pwmh2_0>;
+	status = "okay";
+};
+
+&adc0 {
+	atmel,adc-vref = <3333>;
+	status = "okay";
+};
+
+&macb1 {
+	phy-mode = "rmii";
+	status = "okay";
+};
+
+&dbgu {
+	status = "okay";
+};
+
+&watchdog {
+	status = "okay";
+};
+
+&ebi {
+	pinctrl-0 = <&pinctrl_ebi_nand_addr>;
+	pinctrl-names = "default";
+	status = "okay";
+};
+
+&nand_controller {
+	status = "okay";
+
+	nand@3 {
+		reg = <0x3 0x0 0x2>;
+		atmel,rb = <0>;
+		nand-bus-width = <8>;
+		nand-ecc-mode = "hw";
+		nand-ecc-strength = <4>;
+		nand-ecc-step-size = <512>;
+		nand-on-flash-bbt;
+		label = "atmel_nand";
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			bootstrap@0 {
+				label = "bootstrap";
+				reg = <0x0 0x20000>;
+			};
+
+			ubi@20000 {
+				label = "ubi";
+				reg = <0x20000 0x7fe0000>;
+			};
+		};
+	};
+};
+
+&usb1 {
+	status = "okay";
+};
+
+&usb2 {
+	status = "okay";
+};
+
+/* WMBUS (inverted with IO in the latest schematic) */
+&pinctrl_usart0 {
+	atmel,pins =
+		<AT91_PIOD 17 AT91_PERIPH_A AT91_PINCTRL_NONE
+		 AT91_PIOD 18 AT91_PERIPH_A AT91_PINCTRL_PULL_UP
+		 AT91_PIOE 2 AT91_PERIPH_GPIO AT91_PINCTRL_MULTI_DRIVE>;
+};
+
+/* RTS */
+&pinctrl_usart1 {
+	atmel,pins =
+		<AT91_PIOB 28 AT91_PERIPH_A AT91_PINCTRL_NONE
+		 AT91_PIOB 29 AT91_PERIPH_A AT91_PINCTRL_PULL_UP
+		 AT91_PIOE 7 AT91_PERIPH_GPIO AT91_PINCTRL_MULTI_DRIVE>;
+};
+
+/* IO (inverted with WMBUS in the latest schematic) */
+&pinctrl_usart2 {
+	atmel,pins =
+		<AT91_PIOE 25 AT91_PERIPH_B AT91_PINCTRL_NONE
+		 AT91_PIOE 26 AT91_PERIPH_B AT91_PINCTRL_PULL_UP
+		 AT91_PIOE 8 AT91_PERIPH_GPIO AT91_PINCTRL_MULTI_DRIVE>;
+};
diff --git a/arch/arm/boot/dts/at91-kizbox2.dts b/arch/arm/boot/dts/at91-kizbox2.dts
deleted file mode 100644
index 86d821884bd4..000000000000
--- a/arch/arm/boot/dts/at91-kizbox2.dts
+++ /dev/null
@@ -1,244 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * at91-kizbox2.dts - Device Tree file for Overkiz Kizbox 2 board
- *
- * Copyright (C) 2014 Gaël PORTAY <g.portay@overkiz.com>
- */
-/dts-v1/;
-#include "sama5d31.dtsi"
-#include <dt-bindings/pwm/pwm.h>
-
-/ {
-	model = "Overkiz Kizbox 2";
-	compatible = "overkiz,kizbox2", "atmel,sama5d31", "atmel,sama5d3", "atmel,sama5";
-
-	chosen {
-		bootargs = "ubi.mtd=ubi";
-		stdout-path = &dbgu;
-	};
-
-	memory {
-		reg = <0x20000000 0x10000000>;
-	};
-
-	clocks {
-		slow_xtal {
-			clock-frequency = <32768>;
-		};
-
-		main_xtal {
-			clock-frequency = <12000000>;
-		};
-	};
-
-	ahb {
-		apb {
-			i2c1: i2c@f0018000 {
-				status = "okay";
-
-				pmic: act8865@5b {
-					compatible = "active-semi,act8865";
-					reg = <0x5b>;
-					status = "okay";
-
-					regulators {
-						vcc_1v8_reg: DCDC_REG1 {
-							regulator-name = "VCC_1V8";
-							regulator-min-microvolt = <1800000>;
-							regulator-max-microvolt = <1800000>;
-							regulator-always-on;
-						};
-
-						vcc_1v2_reg: DCDC_REG2 {
-							regulator-name = "VCC_1V2";
-							regulator-min-microvolt = <1200000>;
-							regulator-max-microvolt = <1200000>;
-							regulator-always-on;
-						};
-
-						vcc_3v3_reg: DCDC_REG3 {
-							regulator-name = "VCC_3V3";
-							regulator-min-microvolt = <3300000>;
-							regulator-max-microvolt = <3300000>;
-							regulator-always-on;
-						};
-
-						vddfuse_reg: LDO_REG1 {
-							regulator-name = "FUSE_2V5";
-							regulator-min-microvolt = <2500000>;
-							regulator-max-microvolt = <2500000>;
-						};
-
-						vddana_reg: LDO_REG2 {
-							regulator-name = "VDDANA";
-							regulator-min-microvolt = <3300000>;
-							regulator-max-microvolt = <3300000>;
-							regulator-always-on;
-						};
-
-						vled_reg: LDO_REG3 {
-							regulator-name = "VLED";
-							regulator-min-microvolt = <3300000>;
-							regulator-max-microvolt = <3300000>;
-							regulator-always-on;
-						};
-
-						v3v8_rf_reg: LDO_REG4 {
-							regulator-name = "V3V8_RF";
-							regulator-min-microvolt = <3800000>;
-							regulator-max-microvolt = <3800000>;
-							regulator-always-on;
-						};
-					};
-				};
-			};
-
-			tcb0: timer@f0010000 {
-				timer@0 {
-					compatible = "atmel,tcb-timer";
-					reg = <0>;
-				};
-
-				timer@1 {
-					compatible = "atmel,tcb-timer";
-					reg = <1>;
-				};
-			};
-
-			usart0: serial@f001c000 {
-				status = "okay";
-			};
-
-			usart1: serial@f0020000 {
-				status = "okay";
-			};
-
-			pwm0: pwm@f002c000 {
-				pinctrl-names = "default";
-				pinctrl-0 = <&pinctrl_pwm0_pwmh0_1
-					     &pinctrl_pwm0_pwmh1_1
-					     &pinctrl_pwm0_pwmh2_0>;
-				status = "okay";
-			};
-
-			adc0: adc@f8018000 {
-				atmel,adc-vref = <3333>;
-				status = "okay";
-			};
-
-			usart2: serial@f8020000 {
-				status = "okay";
-			};
-
-			macb1: ethernet@f802c000 {
-				phy-mode = "rmii";
-				status = "okay";
-			};
-
-			dbgu: serial@ffffee00 {
-				status = "okay";
-			};
-
-			watchdog@fffffe40 {
-				status = "okay";
-			};
-		};
-
-		usb1: ohci@600000 {
-			status = "okay";
-		};
-
-		usb2: ehci@700000 {
-			status = "okay";
-		};
-
-		ebi: ebi@10000000 {
-			pinctrl-0 = <&pinctrl_ebi_nand_addr>;
-			pinctrl-names = "default";
-			status = "okay";
-
-			nand_controller: nand-controller {
-				status = "okay";
-
-				nand@3 {
-					reg = <0x3 0x0 0x2>;
-					atmel,rb = <0>;
-					nand-bus-width = <8>;
-					nand-ecc-mode = "hw";
-					nand-ecc-strength = <4>;
-					nand-ecc-step-size = <512>;
-					nand-on-flash-bbt;
-					label = "atmel_nand";
-
-					partitions {
-						compatible = "fixed-partitions";
-						#address-cells = <1>;
-						#size-cells = <1>;
-
-						bootstrap@0 {
-							label = "bootstrap";
-							reg = <0x0 0x20000>;
-						};
-
-						ubi@20000 {
-							label = "ubi";
-							reg = <0x20000 0x7fe0000>;
-						};
-					};
-				};
-			};
-		};
-	};
-
-	gpio_keys {
-		compatible = "gpio-keys";
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		prog {
-			label = "PB_PROG";
-			gpios = <&pioE 27 GPIO_ACTIVE_LOW>;
-			linux,code = <0x102>;
-			wakeup-source;
-		};
-
-		reset {
-			label = "PB_RST";
-			gpios = <&pioE 29 GPIO_ACTIVE_LOW>;
-			linux,code = <0x100>;
-			wakeup-source;
-		};
-
-		user {
-			label = "PB_USER";
-			gpios = <&pioE 31 GPIO_ACTIVE_HIGH>;
-			linux,code = <0x101>;
-			wakeup-source;
-		};
-	};
-
-	pwm_leds {
-		compatible = "pwm-leds";
-
-		blue {
-			label = "pwm:blue:user";
-			pwms = <&pwm0 2 10000000 0>;
-			max-brightness = <255>;
-			linux,default-trigger = "default-on";
-		};
-
-		green {
-			label = "pwm:green:user";
-			pwms = <&pwm0 1 10000000 0>;
-			max-brightness = <255>;
-			linux,default-trigger = "default-on";
-		};
-
-		red {
-			label = "pwm:red:user";
-			pwms = <&pwm0 0 10000000 0>;
-			max-brightness = <255>;
-			linux,default-trigger = "default-on";
-		};
-	};
-};
diff --git a/arch/arm/boot/dts/at91-kizbox3-hs.dts b/arch/arm/boot/dts/at91-kizbox3-hs.dts
new file mode 100644
index 000000000000..8734e7f8939e
--- /dev/null
+++ b/arch/arm/boot/dts/at91-kizbox3-hs.dts
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * at91-kizbox3-hs.dts - Device Tree file for Overkiz KIZBOX3-HS board
+ *
+ * Copyright (C) 2018 Overkiz SAS
+ *
+ * Authors: Dorian Rocipon <d.rocipon@overkiz.com>
+ *          Kevin Carli <k.carli@overkiz.com>
+ *          Mickael Gardet <m.gardet@overkiz.com>
+ */
+/dts-v1/;
+#include "at91-kizbox3_common.dtsi"
+
+/ {
+	model = "Overkiz KIZBOX3-HS";
+	compatible = "overkiz,kizbox3-hs", "atmel,sama5d2", "atmel,sama5";
+
+	pwm_leds {
+		status = "okay";
+
+		red {
+			status = "okay";
+		};
+
+		green {
+			status = "okay";
+		};
+
+		blue {
+			status = "okay";
+		};
+
+		white {
+			status = "okay";
+		};
+	};
+
+	leds  {
+		compatible = "gpio-leds";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_led_red
+			     &pinctrl_led_white>;
+		status = "okay";
+
+		red {
+			label = "pio:red:user";
+			gpios = <&pioA PIN_PB1 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		white {
+			label = "pio:white:user";
+			gpios = <&pioA PIN_PB8 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+	};
+
+	gpio_keys {
+		compatible = "gpio-keys";
+		pinctrl-names = "default" , "default", "default",
+				"default", "default" ;
+		pinctrl-0 = <&pinctrl_key_gpio_default>;
+		pinctrl-1 = <&pinctrl_pio_rf &pinctrl_pio_wifi>;
+		pinctrl-2 = <&pinctrl_pio_io_boot
+			     &pinctrl_pio_io_reset
+			     &pinctrl_pio_io_test_radio>;
+		pinctrl-3 = <&pinctrl_pio_zbe_test_radio
+			     &pinctrl_pio_zbe_rst>;
+		pinctrl-4 = <&pinctrl_pio_input>;
+
+		SW1 {
+			label = "SW1";
+			gpios = <&pioA PIN_PA29 GPIO_ACTIVE_LOW>;
+			linux,code = <0x101>;
+			wakeup-source;
+		};
+
+		SW2 {
+			label = "SW2";
+			gpios = <&pioA PIN_PA18 GPIO_ACTIVE_LOW>;
+			linux,code = <0x102>;
+			wakeup-source;
+		};
+
+		SW3 {
+			label = "SW3";
+			gpios = <&pioA PIN_PA22 GPIO_ACTIVE_LOW>;
+			linux,code = <0x103>;
+			wakeup-source;
+		};
+
+		SW7 {
+			label = "SW7";
+			gpios = <&pioA PIN_PA26 GPIO_ACTIVE_LOW>;
+			linux,code = <0x107>;
+			wakeup-source;
+		};
+
+		SW8 {
+			label = "SW8";
+			gpios = <&pioA PIN_PA24 GPIO_ACTIVE_LOW>;
+			linux,code = <0x108>;
+			wakeup-source;
+		};
+	};
+
+	gpios {
+		compatible = "gpio";
+		status = "okay";
+
+		rf_on {
+			label = "rf on";
+			gpio = <&pioA PIN_PC19 GPIO_ACTIVE_HIGH>;
+			output;
+			init-low;
+		};
+
+		wifi_on {
+			label = "wifi on";
+			gpio = <&pioA PIN_PC20 GPIO_ACTIVE_HIGH>;
+			output;
+			init-low;
+		};
+
+		zbe_test_radio {
+			label = "zbe test radio";
+			gpio = <&pioA PIN_PB21 GPIO_ACTIVE_HIGH>;
+			output;
+			init-low;
+		};
+
+		zbe_rst {
+			label = "zbe rst";
+			gpio = <&pioA PIN_PB25 GPIO_ACTIVE_HIGH>;
+			output;
+			init-low;
+		};
+
+		io_reset {
+			label = "io reset";
+			gpio = <&pioA PIN_PB30 GPIO_ACTIVE_HIGH>;
+			output;
+			init-low;
+		};
+
+		io_test_radio {
+			label = "io test radio";
+			gpio = <&pioA PIN_PC9 GPIO_ACTIVE_HIGH>;
+			output;
+			init-low;
+		};
+
+		io_boot_0 {
+			label = "io boot 0";
+			gpio = <&pioA PIN_PC11 GPIO_ACTIVE_HIGH>;
+			output;
+			init-low;
+		};
+
+		io_boot_1 {
+			label = "io boot 1";
+			gpio = <&pioA PIN_PC17 GPIO_ACTIVE_HIGH>;
+			output;
+			init-low;
+		};
+
+		verbose_bootloader {
+			label = "verbose bootloader";
+			gpio = <&pioA PIN_PB11 GPIO_ACTIVE_HIGH>;
+			input;
+		};
+
+		 nail_bed_detection  {
+			label = "nail bed detection";
+			gpio = <&pioA PIN_PB12 GPIO_ACTIVE_HIGH>;
+			input;
+		};
+
+		 id_usba {
+			label = "id usba";
+			gpio = <&pioA PIN_PC0 GPIO_ACTIVE_LOW>;
+			input;
+		};
+	};
+};
+
+&pioA {
+	pinctrl_key_gpio_default: key_gpio_default {
+		pinmux=  <PIN_PA22__GPIO>,
+		<PIN_PA24__GPIO>,
+		<PIN_PA26__GPIO>,
+		<PIN_PA29__GPIO>,
+		<PIN_PA18__GPIO>;
+		bias-disable;
+		};
+
+	pinctrl_gpio {
+		pinctrl_pio_rf: gpio_rf {
+			pinmux = <PIN_PC19__GPIO>;
+			bias-disable;
+		};
+		pinctrl_pio_wifi: gpio_wifi {
+			pinmux = <PIN_PC20__GPIO>;
+			bias-disable;
+		};
+		pinctrl_pio_io_boot: gpio_io_boot {
+			pinmux =
+			<PIN_PC11__GPIO>,
+			<PIN_PC17__GPIO>;
+			bias-disable;
+		};
+		pinctrl_pio_io_test_radio: gpio_io_test_radio {
+			pinmux = <PIN_PC9__GPIO>;
+			bias-disable;
+		};
+		pinctrl_pio_zbe_test_radio: gpio_zbe_test_radio {
+			pinmux = <PIN_PB21__GPIO>;
+			bias-disable;
+		};
+		pinctrl_pio_zbe_rst: gpio_zbe_rst {
+			pinmux = <PIN_PB25__GPIO>;
+			bias-disable;
+		};
+		/* stm32 reset must be open drain (internal pull up) */
+		pinctrl_pio_io_reset: gpio_io_reset {
+			pinmux = <PIN_PB30__GPIO>;
+			bias-disable;
+			drive-open-drain = <1>;
+			output-low;
+		};
+		pinctrl_pio_input: gpio_input {
+			pinmux =
+			<PIN_PB11__GPIO>,
+			<PIN_PB12__GPIO>,
+			<PIN_PC0__GPIO>;
+			bias-disable;
+		};
+	};
+
+	pinctrl_leds {
+		pinctrl_led_red: led_red {
+			pinmux = <PIN_PB1__GPIO>;
+			bias-disable;
+		};
+		pinctrl_led_white: led_white {
+			pinmux = <PIN_PB8__GPIO>;
+			bias-disable;
+		};
+	};
+};
+
+&adc {
+	status = "okay";
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&uart3 {
+	status = "okay";
+};
+
+&uart4 {
+	status = "okay";
+};
+
+&flx0 {
+	status = "okay";
+
+	uart5: serial@200  {
+			status = "okay";
+	};
+};
+
+&flx3 {
+	status = "okay";
+	uart6: serial@200 {
+		status = "okay";
+	};
+};
+
+&flx4 {
+	status = "okay";
+
+	i2c2: i2c@600 {
+		status = "okay";
+	};
+};
+
+&usb0 {
+	status = "okay";
+};
+
+&usb1 {
+	status = "okay";
+};
+
+&usb2 {
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/at91-kizbox3_common.dtsi b/arch/arm/boot/dts/at91-kizbox3_common.dtsi
new file mode 100644
index 000000000000..299e74d23184
--- /dev/null
+++ b/arch/arm/boot/dts/at91-kizbox3_common.dtsi
@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * at91-kizbox3.dts - Device Tree Include file for Overkiz Kizbox 3
+ * family SoC boards
+ *
+ * Copyright (C) 2018 Overkiz SAS
+ *
+ * Authors: Dorian Rocipon <d.rocipon@overkiz.com>
+ *          Kevin Carli <k.carli@overkiz.com>
+ *          Mickael Gardet <m.gardet@overkiz.com>
+ */
+/dts-v1/;
+#include "sama5d2.dtsi"
+#include "sama5d2-pinfunc.h"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/mfd/atmel-flexcom.h>
+#include <dt-bindings/pinctrl/at91.h>
+#include <dt-bindings/pwm/pwm.h>
+
+/ {
+	model = "Overkiz Kizbox3";
+	compatible = "overkiz,kizbox3", "atmel,sama5d2", "atmel,sama5";
+
+	aliases {
+		serial0 = &uart0;
+		serial1 = &uart1;
+		serial2 = &uart2;
+		serial3 = &uart3;
+		serial4 = &uart4;
+		serial5 = &uart5;
+		serial6 = &uart6;
+	};
+
+	chosen {
+		bootargs = "ubi.mtd=ubi";
+		stdout-path = "serial1:115200n8";
+	};
+
+	clocks {
+		slow_xtal {
+			clock-frequency = <32768>;
+		};
+
+		main_xtal {
+			clock-frequency = <12000000>;
+		};
+	};
+
+	vdd_adc_vddana: supply_3v3_ana {
+		compatible = "regulator-fixed";
+		regulator-name = "adc-vddana";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+	};
+
+	vdd_adc_vref: supply_3v3_ref {
+		compatible = "regulator-fixed";
+		regulator-name = "adc-vref";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+	};
+
+	pwm_leds {
+		compatible = "pwm-leds";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_pwm0_pwm_h0
+			     &pinctrl_pwm0_pwm_h1
+			     &pinctrl_pwm0_pwm_h2
+			     &pinctrl_pwm0_pwm_h3>;
+		status = "disabled";
+
+		red {
+			label = "pwm:red:user";
+			pwms = <&pwm0 0 10000000 0>;
+			max-brightness = <255>;
+			linux,default-trigger = "default-on";
+			status = "disabled";
+		};
+
+		green {
+			label = "pwm:green:user";
+			pwms = <&pwm0 1 10000000 0>;
+			max-brightness = <255>;
+			linux,default-trigger = "default-on";
+			status = "disabled";
+		};
+
+		blue {
+			label = "pwm:blue:user";
+			pwms = <&pwm0 2 10000000 0>;
+			max-brightness = <255>;
+			status = "disabled";
+		};
+
+		white {
+			label = "pwm:white:user";
+			pwms = <&pwm0 3 10000000 0>;
+			max-brightness = <255>;
+			status = "disabled";
+		};
+	};
+};
+
+&ebi {
+	status = "okay";
+};
+
+&nand_controller {
+	status = "okay";
+
+	nand@3 {
+		pinctrl-0 = <&pinctrl_ebi_nand_addr>;
+		pinctrl-names = "default";
+		reg = <0x3 0x0 0x800000>;
+
+		atmel,rb = <0>;
+		nand-bus-width = <8>;
+		nand-ecc-mode = "hw";
+		nand-ecc-strength = <4>;
+		nand-ecc-step-size = <512>;
+		nand-on-flash-bbt;
+		label = "atmel_nand";
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			bootstrap@0 {
+				label = "bootstrap";
+				reg = <0x0 0x20000>;
+			};
+
+			u-boot@20000 {
+				label = "u-boot";
+				reg = <0x20000 0x140000>;
+			};
+
+			u-boot-factory@160000 {
+				label = "u-boot-factory";
+				reg = <0x160000 0x140000>;
+			};
+
+			ubi@2A0000 {
+				label = "ubi";
+				reg = <0x2A0000 0x7D60000>;
+			};
+		};
+
+	};
+};
+
+&rtc {
+	status = "okay";
+};
+
+&pioA {
+	pinctrl_ebi_nand_addr: ebi-addr-1 {
+		pinmux = <PIN_PA0__D0>,
+			<PIN_PA1__D1>,
+			<PIN_PA2__D2>,
+			<PIN_PA3__D3>,
+			<PIN_PA4__D4>,
+			<PIN_PA5__D5>,
+			<PIN_PA6__D6>,
+			<PIN_PA7__D7>,
+			<PIN_PA8__NWE_NANDWE>,
+			<PIN_PA9__NCS3>,
+			<PIN_PA10__A21_NANDALE>,
+			<PIN_PA11__A22_NANDCLE>,
+			<PIN_PA21__NANDRDY>;
+		bias-disable;
+	};
+
+	pinctrl_usart {
+		pinctrl_usart_0: usart0-0 {
+			pinmux = < PIN_PB26__URXD0>, <PIN_PB27__UTXD0>;
+			bias-disable;
+		};
+		pinctrl_usart_1: usart1-0 {
+			pinmux = < PIN_PD2__URXD1>, <PIN_PD3__UTXD1>;
+			bias-disable;
+		};
+		pinctrl_usart_2: usart2-0 {
+			pinmux = < PIN_PD4__URXD2>, <PIN_PD5__UTXD2>;
+			bias-disable;
+		};
+		pinctrl_usart_3: usart3-0 {
+			pinmux = < PIN_PC12__URXD3>, <PIN_PC13__UTXD3>;
+			bias-disable;
+		};
+		pinctrl_usart_4: usart4-0 {
+			pinmux = < PIN_PB3__URXD4>, <PIN_PB4__UTXD4>;
+			bias-disable;
+		};
+		pinctrl_flx0_default: flx0_usart_default {
+			pinmux = <PIN_PB28__FLEXCOM0_IO0>, //TX
+			<PIN_PB29__FLEXCOM0_IO1>; //RX
+			bias-disable;
+		};
+		pinctrl_flx3_default: flx3_usart_default {
+			pinmux = <PIN_PB22__FLEXCOM3_IO1>, //RX
+			<PIN_PB23__FLEXCOM3_IO0>; //TX
+			bias-disable;
+		};
+	};
+
+	pinctrl_flx4_default: flx4_i2c2_default {
+		pinmux = <PIN_PD12__FLEXCOM4_IO0>, //DATA
+		<PIN_PD13__FLEXCOM4_IO1>; //CLK
+		bias-disable;
+		drive-open-drain = <1>;
+	};
+
+	pinctrl_pwm0 {
+		pinctrl_pwm0_pwm_h0: pwm0_pwm_h0 {
+			pinmux = <PIN_PA30__PWMH0>;
+			bias-disable;
+		};
+		pinctrl_pwm0_pwm_h1: pwm0_pwmh1 {
+			pinmux = <PIN_PB0__PWMH1>;
+			bias-disable;
+		};
+		pinctrl_pwm0_pwm_h2: pwm0_pwm_h2 {
+			pinmux = <PIN_PB5__PWMH2>;
+			bias-disable;
+		};
+		pinctrl_pwm0_pwm_h3: pwm0_pwm_h3 {
+			pinmux = <PIN_PB7__PWMH3>;
+			bias-disable;
+		};
+	};
+
+	pinctrl_adc {
+		pinctrl_adc2: adc2 {
+			pinmux = <PIN_PD21__GPIO>;
+			bias-disable;
+		};
+		pinctrl_adc3: adc3 {
+			pinmux = <PIN_PD22__GPIO>;
+			bias-disable;
+		};
+		pinctrl_adc4: adc4 {
+			pinmux = <PIN_PD23__GPIO>;
+			bias-disable;
+		};
+		pinctrl_adc5: adc5 {
+			pinmux = <PIN_PD24__GPIO>;
+			bias-disable;
+		};
+	};
+};
+
+&uart0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usart_0>;
+	atmel,use-dma-rx;
+	atmel,use-dma-tx;
+	status = "disabled";
+};
+
+/* debug uart */
+&uart1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usart_1>;
+	atmel,use-dma-rx;
+	atmel,use-dma-tx;
+	status = "disabled";
+};
+
+&uart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usart_2>;
+	atmel,use-dma-rx;
+	atmel,use-dma-tx;
+	status = "disabled";
+};
+
+&uart3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usart_3>;
+	atmel,use-dma-rx;
+	atmel,use-dma-tx;
+	status = "disabled";
+};
+
+&uart4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usart_4>;
+	atmel,use-dma-rx;
+	atmel,use-dma-tx;
+	status = "disabled";
+};
+
+&flx0 {
+	atmel,flexcom-mode = <ATMEL_FLEXCOM_MODE_USART>;
+	status = "disabled";
+
+	uart5: serial@200  {
+		compatible = "atmel,at91sam9260-usart";
+		reg = <0x200 0x400>;
+		interrupts = <19 IRQ_TYPE_LEVEL_HIGH 7>;
+		dmas = <&dma0
+			(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+			| AT91_XDMAC_DT_PERID(11))>,
+		       <&dma0
+			(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+			| AT91_XDMAC_DT_PERID(12))>;
+		dma-names = "tx", "rx";
+		clocks = <&pmc PMC_TYPE_PERIPHERAL 19>;
+		clock-names = "usart";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_flx0_default>;
+		atmel,fifo-size = <32>;
+		atmel,use-dma-rx;
+		atmel,use-dma-tx;
+		status = "disabled";
+	};
+};
+
+&flx3 {
+	atmel,flexcom-mode = <ATMEL_FLEXCOM_MODE_USART>;
+	status = "disabled";
+
+	uart6: serial@200 {
+		compatible = "atmel,at91sam9260-usart";
+		reg = <0x200 0x400>;
+		interrupts = <22 IRQ_TYPE_LEVEL_HIGH 7>;
+		dmas = <&dma0
+			(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+			| AT91_XDMAC_DT_PERID(17))>,
+		       <&dma0
+			(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+			| AT91_XDMAC_DT_PERID(18))>;
+		dma-names = "tx", "rx";
+		clocks = <&pmc PMC_TYPE_PERIPHERAL 22>;
+		clock-names = "usart";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_flx3_default>;
+		atmel,fifo-size = <32>;
+		atmel,use-dma-rx;
+		atmel,use-dma-tx;
+		status = "disabled";
+	};
+};
+
+&flx4 {
+	atmel,flexcom-mode = <ATMEL_FLEXCOM_MODE_TWI>;
+	status = "disabled";
+
+	i2c2: i2c@600 {
+		compatible = "atmel,sama5d2-i2c";
+		reg = <0x600 0x200>;
+		interrupts = <23 IRQ_TYPE_LEVEL_HIGH 7>;
+		dmas = <&dma0
+			(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+			| AT91_XDMAC_DT_PERID(19))>,
+		       <&dma0
+			(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+			| AT91_XDMAC_DT_PERID(20))>;
+		dma-names = "tx", "rx";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <&pmc PMC_TYPE_PERIPHERAL 23>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_flx4_default>;
+		atmel,fifo-size = <16>;
+		status = "disabled";
+	};
+};
+
+&pwm0 {
+	status = "okay";
+};
+
+&shutdown_controller {
+	atmel,shdwc-debouncer = <976>;
+	atmel,wakeup-rtc-timer;
+
+	input@0 {
+		reg = <0>;
+		atmel,wakeup-type = "low";
+	};
+};
+
+&watchdog {
+	status = "okay";
+};
+
+&adc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_adc2
+		     &pinctrl_adc3
+		     &pinctrl_adc4
+		     &pinctrl_adc5>;
+
+	vddana-supply = <&vdd_adc_vddana>;
+	vref-supply = <&vdd_adc_vref>;
+	status = "disabled";
+};
+
+&securam {
+	export;
+
+	/* export overkiz u-boot mode/version and factory */
+	uboot@1400 {
+		reg = <0x1400 0x20>;
+		export;
+	};
+};
diff --git a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts
index 89f0c9979b89..fca5716ce44f 100644
--- a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts
+++ b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts
@@ -53,6 +53,7 @@
 
 		sdmmc0: sdio-host@a0000000 {
 			bus-width = <8>;
+			mmc-ddr-3_3v;
 			pinctrl-names = "default";
 			pinctrl-0 = <&pinctrl_sdmmc0_default>;
 			status = "okay";
diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
index 808e399fd39a..9d0a7fbea725 100644
--- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
@@ -334,6 +334,9 @@
 					pinctrl-names = "default";
 					pinctrl-0 = <&pinctrl_flx4_default>;
 					atmel,fifo-size = <16>;
+					i2c-analog-filter;
+					i2c-digital-filter;
+					i2c-digital-filter-width-ns = <35>;
 					status = "okay";
 				};
 			};
@@ -342,6 +345,9 @@
 				dmas = <0>, <0>;
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_i2c1_default>;
+				i2c-analog-filter;
+				i2c-digital-filter;
+				i2c-digital-filter-width-ns = <35>;
 				status = "okay";
 
 				at24@54 {
diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
index fdfc37d716e0..924d9491780d 100644
--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
@@ -49,6 +49,7 @@
 			};
 
 			i2c0: i2c@f8014000 {
+				i2c-digital-filter;
 				status = "okay";
 			};
 
diff --git a/arch/arm/boot/dts/atlas7-evb.dts b/arch/arm/boot/dts/atlas7-evb.dts
index e0c0291ac9fd..e0515043d145 100644
--- a/arch/arm/boot/dts/atlas7-evb.dts
+++ b/arch/arm/boot/dts/atlas7-evb.dts
@@ -119,7 +119,7 @@
 				label = "rearview key";
 				linux,code = <KEY_CAMERA>;
 				gpios = <&gpio_1 3 GPIO_ACTIVE_LOW>;
-				debounce_interval = <100>;
+				debounce-interval = <100>;
 			};
 		};
 
diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi
index 2dac3efc7640..1bc45cfd5453 100644
--- a/arch/arm/boot/dts/bcm-cygnus.dtsi
+++ b/arch/arm/boot/dts/bcm-cygnus.dtsi
@@ -174,8 +174,8 @@
 		mdio: mdio@18002000 {
 			compatible = "brcm,iproc-mdio";
 			reg = <0x18002000 0x8>;
-			#size-cells = <1>;
-			#address-cells = <0>;
+			#size-cells = <0>;
+			#address-cells = <1>;
 			status = "disabled";
 
 			gphy0: ethernet-phy@0 {
diff --git a/arch/arm/boot/dts/bcm-hr2.dtsi b/arch/arm/boot/dts/bcm-hr2.dtsi
index e4d49731287f..6142c672811e 100644
--- a/arch/arm/boot/dts/bcm-hr2.dtsi
+++ b/arch/arm/boot/dts/bcm-hr2.dtsi
@@ -268,7 +268,7 @@
 			clock-frequency = <100000>;
 		};
 
-		watchdog@39000 {
+		watchdog: watchdog@39000 {
 			compatible = "arm,sp805", "arm,primecell";
 			reg = <0x39000 0x1000>;
 			interrupts = <GIC_SPI 213 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts
new file mode 100644
index 000000000000..1b5a835f66bd
--- /dev/null
+++ b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/dts-v1/;
+#include "bcm2711.dtsi"
+#include "bcm2835-rpi.dtsi"
+#include "bcm283x-rpi-usb-peripheral.dtsi"
+
+/ {
+	compatible = "raspberrypi,4-model-b", "brcm,bcm2711";
+	model = "Raspberry Pi 4 Model B";
+
+	chosen {
+		/* 8250 auxiliary UART instead of pl011 */
+		stdout-path = "serial1:115200n8";
+	};
+
+	/* Will be filled by the bootloader */
+	memory@0 {
+		device_type = "memory";
+		reg = <0 0 0>;
+	};
+
+	aliases {
+		ethernet0 = &genet;
+	};
+
+	leds {
+		act {
+			gpios = <&gpio 42 GPIO_ACTIVE_HIGH>;
+		};
+
+		pwr {
+			label = "PWR";
+			gpios = <&expgpio 2 GPIO_ACTIVE_LOW>;
+		};
+	};
+
+	wifi_pwrseq: wifi-pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>;
+	};
+
+	sd_io_1v8_reg: sd_io_1v8_reg {
+		compatible = "regulator-gpio";
+		regulator-name = "vdd-sd-io";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-boot-on;
+		regulator-always-on;
+		regulator-settling-time-us = <5000>;
+		gpios = <&expgpio 4 GPIO_ACTIVE_HIGH>;
+		states = <1800000 0x1
+			  3300000 0x0>;
+		status = "okay";
+	};
+};
+
+&firmware {
+	expgpio: gpio {
+		compatible = "raspberrypi,firmware-gpio";
+		gpio-controller;
+		#gpio-cells = <2>;
+		gpio-line-names = "BT_ON",
+				  "WL_ON",
+				  "PWR_LED_OFF",
+				  "GLOBAL_RESET",
+				  "VDD_SD_IO_SEL",
+				  "CAM_GPIO",
+				  "",
+				  "";
+		status = "okay";
+	};
+};
+
+&pwm1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pwm1_0_gpio40 &pwm1_1_gpio41>;
+	status = "okay";
+};
+
+/* SDHCI is used to control the SDIO for wireless */
+&sdhci {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_gpio34>;
+	bus-width = <4>;
+	non-removable;
+	mmc-pwrseq = <&wifi_pwrseq>;
+	status = "okay";
+
+	brcmf: wifi@1 {
+		reg = <1>;
+		compatible = "brcm,bcm4329-fmac";
+	};
+};
+
+/* EMMC2 is used to drive the SD card */
+&emmc2 {
+	vqmmc-supply = <&sd_io_1v8_reg>;
+	broken-cd;
+	status = "okay";
+};
+
+&genet {
+	phy-handle = <&phy1>;
+	phy-mode = "rgmii-rxid";
+	status = "okay";
+};
+
+&genet_mdio {
+	phy1: ethernet-phy@1 {
+		/* No PHY interrupt */
+		reg = <0x1>;
+	};
+};
+
+/* uart0 communicates with the BT module */
+&uart0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart0_ctsrts_gpio30 &uart0_gpio32>;
+	uart-has-rtscts;
+	status = "okay";
+
+	bluetooth {
+		compatible = "brcm,bcm43438-bt";
+		max-speed = <2000000>;
+		shutdown-gpios = <&expgpio 0 GPIO_ACTIVE_HIGH>;
+	};
+};
+
+/* uart1 is mapped to the pin header */
+&uart1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart1_gpio14>;
+	status = "okay";
+};
+
+&vchiq {
+	interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+};
diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi
new file mode 100644
index 000000000000..8eedd6eb409b
--- /dev/null
+++ b/arch/arm/boot/dts/bcm2711.dtsi
@@ -0,0 +1,902 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcm283x.dtsi"
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/soc/bcm2835-pm.h>
+
+/ {
+	compatible = "brcm,bcm2711";
+
+	#address-cells = <2>;
+	#size-cells = <1>;
+
+	interrupt-parent = <&gicv2>;
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+
+		/*
+		 * arm64 reserves the CMA by default somewhere in ZONE_DMA32,
+		 * that's not good enough for the BCM2711 as some devices can
+		 * only address the lower 1G of memory (ZONE_DMA).
+		 */
+		linux,cma {
+			compatible = "shared-dma-pool";
+			size = <0x2000000>; /* 32MB */
+			alloc-ranges = <0x0 0x00000000 0x40000000>;
+			reusable;
+			linux,cma-default;
+		};
+	};
+
+
+	soc {
+		/*
+		 * Defined ranges:
+		 *   Common BCM283x peripherals
+		 *   BCM2711-specific peripherals
+		 *   ARM-local peripherals
+		 */
+		ranges = <0x7e000000  0x0 0xfe000000  0x01800000>,
+			 <0x7c000000  0x0 0xfc000000  0x02000000>,
+			 <0x40000000  0x0 0xff800000  0x00800000>;
+		/* Emulate a contiguous 30-bit address range for DMA */
+		dma-ranges = <0xc0000000  0x0 0x00000000  0x40000000>;
+
+		/*
+		 * This node is the provider for the enable-method for
+		 * bringing up secondary cores.
+		 */
+		local_intc: local_intc@40000000 {
+			compatible = "brcm,bcm2836-l1-intc";
+			reg = <0x40000000 0x100>;
+		};
+
+		gicv2: interrupt-controller@40041000 {
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			compatible = "arm,gic-400";
+			reg =	<0x40041000 0x1000>,
+				<0x40042000 0x2000>,
+				<0x40044000 0x2000>,
+				<0x40046000 0x2000>;
+			interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) |
+						 IRQ_TYPE_LEVEL_HIGH)>;
+		};
+
+		avs_monitor: avs-monitor@7d5d2000 {
+			compatible = "brcm,bcm2711-avs-monitor",
+				     "syscon", "simple-mfd";
+			reg = <0x7d5d2000 0xf00>;
+
+			thermal: thermal {
+				compatible = "brcm,bcm2711-thermal";
+				#thermal-sensor-cells = <0>;
+			};
+		};
+
+		dma: dma@7e007000 {
+			compatible = "brcm,bcm2835-dma";
+			reg = <0x7e007000 0xb00>;
+			interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>,
+				     /* DMA lite 7 - 10 */
+				     <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "dma0",
+					  "dma1",
+					  "dma2",
+					  "dma3",
+					  "dma4",
+					  "dma5",
+					  "dma6",
+					  "dma7",
+					  "dma8",
+					  "dma9",
+					  "dma10";
+			#dma-cells = <1>;
+			brcm,dma-channel-mask = <0x07f5>;
+		};
+
+		pm: watchdog@7e100000 {
+			compatible = "brcm,bcm2835-pm", "brcm,bcm2835-pm-wdt";
+			#power-domain-cells = <1>;
+			#reset-cells = <1>;
+			reg = <0x7e100000 0x114>,
+			      <0x7e00a000 0x24>,
+			      <0x7ec11000 0x20>;
+			clocks = <&clocks BCM2835_CLOCK_V3D>,
+				 <&clocks BCM2835_CLOCK_PERI_IMAGE>,
+				 <&clocks BCM2835_CLOCK_H264>,
+				 <&clocks BCM2835_CLOCK_ISP>;
+			clock-names = "v3d", "peri_image", "h264", "isp";
+			system-power-controller;
+		};
+
+		rng@7e104000 {
+			interrupts = <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+
+			/* RNG is incompatible with brcm,bcm2835-rng */
+			status = "disabled";
+		};
+
+		uart2: serial@7e201400 {
+			compatible = "arm,pl011", "arm,primecell";
+			reg = <0x7e201400 0x200>;
+			interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_UART>,
+				 <&clocks BCM2835_CLOCK_VPU>;
+			clock-names = "uartclk", "apb_pclk";
+			arm,primecell-periphid = <0x00241011>;
+			status = "disabled";
+		};
+
+		uart3: serial@7e201600 {
+			compatible = "arm,pl011", "arm,primecell";
+			reg = <0x7e201600 0x200>;
+			interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_UART>,
+				 <&clocks BCM2835_CLOCK_VPU>;
+			clock-names = "uartclk", "apb_pclk";
+			arm,primecell-periphid = <0x00241011>;
+			status = "disabled";
+		};
+
+		uart4: serial@7e201800 {
+			compatible = "arm,pl011", "arm,primecell";
+			reg = <0x7e201800 0x200>;
+			interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_UART>,
+				 <&clocks BCM2835_CLOCK_VPU>;
+			clock-names = "uartclk", "apb_pclk";
+			arm,primecell-periphid = <0x00241011>;
+			status = "disabled";
+		};
+
+		uart5: serial@7e201a00 {
+			compatible = "arm,pl011", "arm,primecell";
+			reg = <0x7e201a00 0x200>;
+			interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_UART>,
+				 <&clocks BCM2835_CLOCK_VPU>;
+			clock-names = "uartclk", "apb_pclk";
+			arm,primecell-periphid = <0x00241011>;
+			status = "disabled";
+		};
+
+		spi3: spi@7e204600 {
+			compatible = "brcm,bcm2835-spi";
+			reg = <0x7e204600 0x0200>;
+			interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_VPU>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		spi4: spi@7e204800 {
+			compatible = "brcm,bcm2835-spi";
+			reg = <0x7e204800 0x0200>;
+			interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_VPU>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		spi5: spi@7e204a00 {
+			compatible = "brcm,bcm2835-spi";
+			reg = <0x7e204a00 0x0200>;
+			interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_VPU>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		spi6: spi@7e204c00 {
+			compatible = "brcm,bcm2835-spi";
+			reg = <0x7e204c00 0x0200>;
+			interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_VPU>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		i2c3: i2c@7e205600 {
+			compatible = "brcm,bcm2711-i2c", "brcm,bcm2835-i2c";
+			reg = <0x7e205600 0x200>;
+			interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_VPU>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		i2c4: i2c@7e205800 {
+			compatible = "brcm,bcm2711-i2c", "brcm,bcm2835-i2c";
+			reg = <0x7e205800 0x200>;
+			interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_VPU>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		i2c5: i2c@7e205a00 {
+			compatible = "brcm,bcm2711-i2c", "brcm,bcm2835-i2c";
+			reg = <0x7e205a00 0x200>;
+			interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_VPU>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		i2c6: i2c@7e205c00 {
+			compatible = "brcm,bcm2711-i2c", "brcm,bcm2835-i2c";
+			reg = <0x7e205c00 0x200>;
+			interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2835_CLOCK_VPU>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		pwm1: pwm@7e20c800 {
+			compatible = "brcm,bcm2835-pwm";
+			reg = <0x7e20c800 0x28>;
+			clocks = <&clocks BCM2835_CLOCK_PWM>;
+			assigned-clocks = <&clocks BCM2835_CLOCK_PWM>;
+			assigned-clock-rates = <10000000>;
+			#pwm-cells = <2>;
+			status = "disabled";
+		};
+
+		emmc2: emmc2@7e340000 {
+			compatible = "brcm,bcm2711-emmc2";
+			reg = <0x7e340000 0x100>;
+			interrupts = <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clocks BCM2711_CLOCK_EMMC2>;
+			status = "disabled";
+		};
+
+		hvs@7e400000 {
+			interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
+		};
+	};
+
+	arm-pmu {
+		compatible = "arm,cortex-a72-pmu", "arm,armv8-pmuv3";
+		interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>,
+			<GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>,
+			<GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+			<GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) |
+					  IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) |
+					  IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) |
+					  IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) |
+					  IRQ_TYPE_LEVEL_LOW)>;
+		/* This only applies to the ARMv7 stub */
+		arm,cpu-registers-not-fw-configured;
+	};
+
+	cpus: cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		enable-method = "brcm,bcm2836-smp"; // for ARM 32-bit
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a72";
+			reg = <0>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x000000d8>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a72";
+			reg = <1>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x000000e0>;
+		};
+
+		cpu2: cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a72";
+			reg = <2>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x000000e8>;
+		};
+
+		cpu3: cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a72";
+			reg = <3>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x000000f0>;
+		};
+	};
+
+	scb {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+
+		ranges = <0x0 0x7c000000  0x0 0xfc000000  0x03800000>;
+
+		genet: ethernet@7d580000 {
+			compatible = "brcm,bcm2711-genet-v5";
+			reg = <0x0 0x7d580000 0x10000>;
+			#address-cells = <0x1>;
+			#size-cells = <0x1>;
+			interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+
+			genet_mdio: mdio@e14 {
+				compatible = "brcm,genet-mdio-v5";
+				reg = <0xe14 0x8>;
+				reg-names = "mdio";
+				#address-cells = <0x0>;
+				#size-cells = <0x1>;
+			};
+		};
+	};
+};
+
+&clk_osc {
+	clock-frequency = <54000000>;
+};
+
+&clocks {
+	compatible = "brcm,bcm2711-cprman";
+};
+
+&cpu_thermal {
+	coefficients = <(-487) 410040>;
+	thermal-sensors = <&thermal>;
+};
+
+&dsi0 {
+	interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&dsi1 {
+	interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&gpio {
+	compatible = "brcm,bcm2711-gpio";
+	interrupts = <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
+
+	gpclk0_gpio49: gpclk0_gpio49 {
+		pin-gpclk {
+			pins = "gpio49";
+			function = "alt1";
+			bias-disable;
+		};
+	};
+	gpclk1_gpio50: gpclk1_gpio50 {
+		pin-gpclk {
+			pins = "gpio50";
+			function = "alt1";
+			bias-disable;
+		};
+	};
+	gpclk2_gpio51: gpclk2_gpio51 {
+		pin-gpclk {
+			pins = "gpio51";
+			function = "alt1";
+			bias-disable;
+		};
+	};
+
+	i2c0_gpio46: i2c0_gpio46 {
+		pin-sda {
+			function = "alt0";
+			pins = "gpio46";
+			bias-pull-up;
+		};
+		pin-scl {
+			function = "alt0";
+			pins = "gpio47";
+			bias-disable;
+		};
+	};
+	i2c1_gpio46: i2c1_gpio46 {
+		pin-sda {
+			function = "alt1";
+			pins = "gpio46";
+			bias-pull-up;
+		};
+		pin-scl {
+			function = "alt1";
+			pins = "gpio47";
+			bias-disable;
+		};
+	};
+	i2c3_gpio2: i2c3_gpio2 {
+		pin-sda {
+			function = "alt5";
+			pins = "gpio2";
+			bias-pull-up;
+		};
+		pin-scl {
+			function = "alt5";
+			pins = "gpio3";
+			bias-disable;
+		};
+	};
+	i2c3_gpio4: i2c3_gpio4 {
+		pin-sda {
+			function = "alt5";
+			pins = "gpio4";
+			bias-pull-up;
+		};
+		pin-scl {
+			function = "alt5";
+			pins = "gpio5";
+			bias-disable;
+		};
+	};
+	i2c4_gpio6: i2c4_gpio6 {
+		pin-sda {
+			function = "alt5";
+			pins = "gpio6";
+			bias-pull-up;
+		};
+		pin-scl {
+			function = "alt5";
+			pins = "gpio7";
+			bias-disable;
+		};
+	};
+	i2c4_gpio8: i2c4_gpio8 {
+		pin-sda {
+			function = "alt5";
+			pins = "gpio8";
+			bias-pull-up;
+		};
+		pin-scl {
+			function = "alt5";
+			pins = "gpio9";
+			bias-disable;
+		};
+	};
+	i2c5_gpio10: i2c5_gpio10 {
+		pin-sda {
+			function = "alt5";
+			pins = "gpio10";
+			bias-pull-up;
+		};
+		pin-scl {
+			function = "alt5";
+			pins = "gpio11";
+			bias-disable;
+		};
+	};
+	i2c5_gpio12: i2c5_gpio12 {
+		pin-sda {
+			function = "alt5";
+			pins = "gpio12";
+			bias-pull-up;
+		};
+		pin-scl {
+			function = "alt5";
+			pins = "gpio13";
+			bias-disable;
+		};
+	};
+	i2c6_gpio0: i2c6_gpio0 {
+		pin-sda {
+			function = "alt5";
+			pins = "gpio0";
+			bias-pull-up;
+		};
+		pin-scl {
+			function = "alt5";
+			pins = "gpio1";
+			bias-disable;
+		};
+	};
+	i2c6_gpio22: i2c6_gpio22 {
+		pin-sda {
+			function = "alt5";
+			pins = "gpio22";
+			bias-pull-up;
+		};
+		pin-scl {
+			function = "alt5";
+			pins = "gpio23";
+			bias-disable;
+		};
+	};
+	i2c_slave_gpio8: i2c_slave_gpio8 {
+		pins-i2c-slave {
+			pins = "gpio8",
+			       "gpio9",
+			       "gpio10",
+			       "gpio11";
+			function = "alt3";
+		};
+	};
+
+	jtag_gpio48: jtag_gpio48 {
+		pins-jtag {
+			pins = "gpio48",
+			       "gpio49",
+			       "gpio50",
+			       "gpio51",
+			       "gpio52",
+			       "gpio53";
+			function = "alt4";
+		};
+	};
+
+	mii_gpio28: mii_gpio28 {
+		pins-mii {
+			pins = "gpio28",
+			       "gpio29",
+			       "gpio30",
+			       "gpio31";
+			function = "alt4";
+		};
+	};
+	mii_gpio36: mii_gpio36 {
+		pins-mii {
+			pins = "gpio36",
+			       "gpio37",
+			       "gpio38",
+			       "gpio39";
+			function = "alt5";
+		};
+	};
+
+	pcm_gpio50: pcm_gpio50 {
+		pins-pcm {
+			pins = "gpio50",
+			       "gpio51",
+			       "gpio52",
+			       "gpio53";
+			function = "alt2";
+		};
+	};
+
+	pwm0_0_gpio12: pwm0_0_gpio12 {
+		pin-pwm {
+			pins = "gpio12";
+			function = "alt0";
+			bias-disable;
+		};
+	};
+	pwm0_0_gpio18: pwm0_0_gpio18 {
+		pin-pwm {
+			pins = "gpio18";
+			function = "alt5";
+			bias-disable;
+		};
+	};
+	pwm1_0_gpio40: pwm1_0_gpio40 {
+		pin-pwm {
+			pins = "gpio40";
+			function = "alt0";
+			bias-disable;
+		};
+	};
+	pwm0_1_gpio13: pwm0_1_gpio13 {
+		pin-pwm {
+			pins = "gpio13";
+			function = "alt0";
+			bias-disable;
+		};
+	};
+	pwm0_1_gpio19: pwm0_1_gpio19 {
+		pin-pwm {
+			pins = "gpio19";
+			function = "alt5";
+			bias-disable;
+		};
+	};
+	pwm1_1_gpio41: pwm1_1_gpio41 {
+		pin-pwm {
+			pins = "gpio41";
+			function = "alt0";
+			bias-disable;
+		};
+	};
+	pwm0_1_gpio45: pwm0_1_gpio45 {
+		pin-pwm {
+			pins = "gpio45";
+			function = "alt0";
+			bias-disable;
+		};
+	};
+	pwm0_0_gpio52: pwm0_0_gpio52 {
+		pin-pwm {
+			pins = "gpio52";
+			function = "alt1";
+			bias-disable;
+		};
+	};
+	pwm0_1_gpio53: pwm0_1_gpio53 {
+		pin-pwm {
+			pins = "gpio53";
+			function = "alt1";
+			bias-disable;
+		};
+	};
+
+	rgmii_gpio35: rgmii_gpio35 {
+		pin-start-stop {
+			pins = "gpio35";
+			function = "alt4";
+		};
+		pin-rx-ok {
+			pins = "gpio36";
+			function = "alt4";
+		};
+	};
+	rgmii_irq_gpio34: rgmii_irq_gpio34 {
+		pin-irq {
+			pins = "gpio34";
+			function = "alt5";
+		};
+	};
+	rgmii_irq_gpio39: rgmii_irq_gpio39 {
+		pin-irq {
+			pins = "gpio39";
+			function = "alt4";
+		};
+	};
+	rgmii_mdio_gpio28: rgmii_mdio_gpio28 {
+		pins-mdio {
+			pins = "gpio28",
+			       "gpio29";
+			function = "alt5";
+		};
+	};
+	rgmii_mdio_gpio37: rgmii_mdio_gpio37 {
+		pins-mdio {
+			pins = "gpio37",
+			       "gpio38";
+			function = "alt4";
+		};
+	};
+
+	spi0_gpio46: spi0_gpio46 {
+		pins-spi {
+			pins = "gpio46",
+			       "gpio47",
+			       "gpio48",
+			       "gpio49";
+			function = "alt2";
+		};
+	};
+	spi2_gpio46: spi2_gpio46 {
+		pins-spi {
+			pins = "gpio46",
+			       "gpio47",
+			       "gpio48",
+			       "gpio49",
+			       "gpio50";
+			function = "alt5";
+		};
+	};
+	spi3_gpio0: spi3_gpio0 {
+		pins-spi {
+			pins = "gpio0",
+			       "gpio1",
+			       "gpio2",
+			       "gpio3";
+			function = "alt3";
+		};
+	};
+	spi4_gpio4: spi4_gpio4 {
+		pins-spi {
+			pins = "gpio4",
+			       "gpio5",
+			       "gpio6",
+			       "gpio7";
+			function = "alt3";
+		};
+	};
+	spi5_gpio12: spi5_gpio12 {
+		pins-spi {
+			pins = "gpio12",
+			       "gpio13",
+			       "gpio14",
+			       "gpio15";
+			function = "alt3";
+		};
+	};
+	spi6_gpio18: spi6_gpio18 {
+		pins-spi {
+			pins = "gpio18",
+			       "gpio19",
+			       "gpio20",
+			       "gpio21";
+			function = "alt3";
+		};
+	};
+
+	uart2_gpio0: uart2_gpio0 {
+		pin-tx {
+			pins = "gpio0";
+			function = "alt4";
+			bias-disable;
+		};
+		pin-rx {
+			pins = "gpio1";
+			function = "alt4";
+			bias-pull-up;
+		};
+	};
+	uart2_ctsrts_gpio2: uart2_ctsrts_gpio2 {
+		pin-cts {
+			pins = "gpio2";
+			function = "alt4";
+			bias-pull-up;
+		};
+		pin-rts {
+			pins = "gpio3";
+			function = "alt4";
+			bias-disable;
+		};
+	};
+	uart3_gpio4: uart3_gpio4 {
+		pin-tx {
+			pins = "gpio4";
+			function = "alt4";
+			bias-disable;
+		};
+		pin-rx {
+			pins = "gpio5";
+			function = "alt4";
+			bias-pull-up;
+		};
+	};
+	uart3_ctsrts_gpio6: uart3_ctsrts_gpio6 {
+		pin-cts {
+			pins = "gpio6";
+			function = "alt4";
+			bias-pull-up;
+		};
+		pin-rts {
+			pins = "gpio7";
+			function = "alt4";
+			bias-disable;
+		};
+	};
+	uart4_gpio8: uart4_gpio8 {
+		pin-tx {
+			pins = "gpio8";
+			function = "alt4";
+			bias-disable;
+		};
+		pin-rx {
+			pins = "gpio9";
+			function = "alt4";
+			bias-pull-up;
+		};
+	};
+	uart4_ctsrts_gpio10: uart4_ctsrts_gpio10 {
+		pin-cts {
+			pins = "gpio10";
+			function = "alt4";
+			bias-pull-up;
+		};
+		pin-rts {
+			pins = "gpio11";
+			function = "alt4";
+			bias-disable;
+		};
+	};
+	uart5_gpio12: uart5_gpio12 {
+		pin-tx {
+			pins = "gpio12";
+			function = "alt4";
+			bias-disable;
+		};
+		pin-rx {
+			pins = "gpio13";
+			function = "alt4";
+			bias-pull-up;
+		};
+	};
+	uart5_ctsrts_gpio14: uart5_ctsrts_gpio14 {
+		pin-cts {
+			pins = "gpio14";
+			function = "alt4";
+			bias-pull-up;
+		};
+		pin-rts {
+			pins = "gpio15";
+			function = "alt4";
+			bias-disable;
+		};
+	};
+};
+
+&i2c0 {
+	compatible = "brcm,bcm2711-i2c", "brcm,bcm2835-i2c";
+	interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&i2c1 {
+	compatible = "brcm,bcm2711-i2c", "brcm,bcm2835-i2c";
+	interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&mailbox {
+	interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&sdhci {
+	interrupts = <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&sdhost {
+	interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&spi {
+	interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&spi1 {
+	interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&spi2 {
+	interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&system_timer {
+	interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 66 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&txp {
+	interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&uart0 {
+	interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&uart1 {
+	interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&usb {
+	interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&vec {
+	interrupts = <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>;
+};
diff --git a/arch/arm/boot/dts/bcm2835-common.dtsi b/arch/arm/boot/dts/bcm2835-common.dtsi
new file mode 100644
index 000000000000..fe1ab40c7f22
--- /dev/null
+++ b/arch/arm/boot/dts/bcm2835-common.dtsi
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* This include file covers the common peripherals and configuration between
+ * bcm2835, bcm2836 and bcm2837 implementations.
+ */
+
+/ {
+	interrupt-parent = <&intc>;
+
+	soc {
+		dma: dma@7e007000 {
+			compatible = "brcm,bcm2835-dma";
+			reg = <0x7e007000 0xf00>;
+			interrupts = <1 16>,
+				     <1 17>,
+				     <1 18>,
+				     <1 19>,
+				     <1 20>,
+				     <1 21>,
+				     <1 22>,
+				     <1 23>,
+				     <1 24>,
+				     <1 25>,
+				     <1 26>,
+				     /* dma channel 11-14 share one irq */
+				     <1 27>,
+				     <1 27>,
+				     <1 27>,
+				     <1 27>,
+				     /* unused shared irq for all channels */
+				     <1 28>;
+			interrupt-names = "dma0",
+					  "dma1",
+					  "dma2",
+					  "dma3",
+					  "dma4",
+					  "dma5",
+					  "dma6",
+					  "dma7",
+					  "dma8",
+					  "dma9",
+					  "dma10",
+					  "dma11",
+					  "dma12",
+					  "dma13",
+					  "dma14",
+					  "dma-shared-all";
+			#dma-cells = <1>;
+			brcm,dma-channel-mask = <0x7f35>;
+		};
+
+		intc: interrupt-controller@7e00b200 {
+			compatible = "brcm,bcm2835-armctrl-ic";
+			reg = <0x7e00b200 0x200>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+		};
+
+		pm: watchdog@7e100000 {
+			compatible = "brcm,bcm2835-pm", "brcm,bcm2835-pm-wdt";
+			#power-domain-cells = <1>;
+			#reset-cells = <1>;
+			reg = <0x7e100000 0x114>,
+			      <0x7e00a000 0x24>;
+			clocks = <&clocks BCM2835_CLOCK_V3D>,
+				 <&clocks BCM2835_CLOCK_PERI_IMAGE>,
+				 <&clocks BCM2835_CLOCK_H264>,
+				 <&clocks BCM2835_CLOCK_ISP>;
+			clock-names = "v3d", "peri_image", "h264", "isp";
+			system-power-controller;
+		};
+
+		pixelvalve@7e206000 {
+			compatible = "brcm,bcm2835-pixelvalve0";
+			reg = <0x7e206000 0x100>;
+			interrupts = <2 13>; /* pwa0 */
+		};
+
+		pixelvalve@7e207000 {
+			compatible = "brcm,bcm2835-pixelvalve1";
+			reg = <0x7e207000 0x100>;
+			interrupts = <2 14>; /* pwa1 */
+		};
+
+		thermal: thermal@7e212000 {
+			compatible = "brcm,bcm2835-thermal";
+			reg = <0x7e212000 0x8>;
+			clocks = <&clocks BCM2835_CLOCK_TSENS>;
+			#thermal-sensor-cells = <0>;
+			status = "disabled";
+		};
+
+		i2c2: i2c@7e805000 {
+			compatible = "brcm,bcm2835-i2c";
+			reg = <0x7e805000 0x1000>;
+			interrupts = <2 21>;
+			clocks = <&clocks BCM2835_CLOCK_VPU>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "okay";
+		};
+
+		pixelvalve@7e807000 {
+			compatible = "brcm,bcm2835-pixelvalve2";
+			reg = <0x7e807000 0x100>;
+			interrupts = <2 10>; /* pixelvalve */
+		};
+
+		hdmi: hdmi@7e902000 {
+			compatible = "brcm,bcm2835-hdmi";
+			reg = <0x7e902000 0x600>,
+			      <0x7e808000 0x100>;
+			interrupts = <2 8>, <2 9>;
+			ddc = <&i2c2>;
+			clocks = <&clocks BCM2835_PLLH_PIX>,
+				 <&clocks BCM2835_CLOCK_HSM>;
+			clock-names = "pixel", "hdmi";
+			dmas = <&dma 17>;
+			dma-names = "audio-rx";
+			status = "disabled";
+		};
+
+		v3d: v3d@7ec00000 {
+			compatible = "brcm,bcm2835-v3d";
+			reg = <0x7ec00000 0x1000>;
+			interrupts = <1 10>;
+			power-domains = <&pm BCM2835_POWER_DOMAIN_GRAFX_V3D>;
+		};
+
+		vc4: gpu {
+			compatible = "brcm,bcm2835-vc4";
+		};
+	};
+};
+
+&cpu_thermal {
+	thermal-sensors = <&thermal>;
+};
+
+&gpio {
+	i2c_slave_gpio18: i2c_slave_gpio18 {
+		brcm,pins = <18 19 20 21>;
+		brcm,function = <BCM2835_FSEL_ALT3>;
+	};
+
+	jtag_gpio4: jtag_gpio4 {
+		brcm,pins = <4 5 6 12 13>;
+		brcm,function = <BCM2835_FSEL_ALT5>;
+	};
+
+	pwm0_gpio12: pwm0_gpio12 {
+		brcm,pins = <12>;
+		brcm,function = <BCM2835_FSEL_ALT0>;
+	};
+	pwm0_gpio18: pwm0_gpio18 {
+		brcm,pins = <18>;
+		brcm,function = <BCM2835_FSEL_ALT5>;
+	};
+	pwm0_gpio40: pwm0_gpio40 {
+		brcm,pins = <40>;
+		brcm,function = <BCM2835_FSEL_ALT0>;
+	};
+	pwm1_gpio13: pwm1_gpio13 {
+		brcm,pins = <13>;
+		brcm,function = <BCM2835_FSEL_ALT0>;
+	};
+	pwm1_gpio19: pwm1_gpio19 {
+		brcm,pins = <19>;
+		brcm,function = <BCM2835_FSEL_ALT5>;
+	};
+	pwm1_gpio41: pwm1_gpio41 {
+		brcm,pins = <41>;
+		brcm,function = <BCM2835_FSEL_ALT0>;
+	};
+	pwm1_gpio45: pwm1_gpio45 {
+		brcm,pins = <45>;
+		brcm,function = <BCM2835_FSEL_ALT0>;
+	};
+};
+
+&i2s {
+	dmas = <&dma 2>, <&dma 3>;
+	dma-names = "tx", "rx";
+};
+
+&sdhost {
+	dmas = <&dma 13>;
+	dma-names = "rx-tx";
+};
+
+&spi {
+	dmas = <&dma 6>, <&dma 7>;
+	dma-names = "tx", "rx";
+};
diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi
index 6c6a7f620d8b..394c8a71b13b 100644
--- a/arch/arm/boot/dts/bcm2835-rpi.dtsi
+++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi
@@ -59,10 +59,6 @@
 	clock-frequency = <100000>;
 };
 
-&i2c2 {
-	status = "okay";
-};
-
 &usb {
 	power-domains = <&power RPI_POWER_DOMAIN_USB>;
 };
diff --git a/arch/arm/boot/dts/bcm2835.dtsi b/arch/arm/boot/dts/bcm2835.dtsi
index a5c3824c8056..53bf4579cc22 100644
--- a/arch/arm/boot/dts/bcm2835.dtsi
+++ b/arch/arm/boot/dts/bcm2835.dtsi
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcm283x.dtsi"
+#include "bcm2835-common.dtsi"
 
 / {
 	compatible = "brcm,bcm2835";
diff --git a/arch/arm/boot/dts/bcm2836.dtsi b/arch/arm/boot/dts/bcm2836.dtsi
index c933e8413884..82d6c4662ae4 100644
--- a/arch/arm/boot/dts/bcm2836.dtsi
+++ b/arch/arm/boot/dts/bcm2836.dtsi
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcm283x.dtsi"
+#include "bcm2835-common.dtsi"
 
 / {
 	compatible = "brcm,bcm2836";
diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi
index beb6c502dadc..9e95fee78e19 100644
--- a/arch/arm/boot/dts/bcm2837.dtsi
+++ b/arch/arm/boot/dts/bcm2837.dtsi
@@ -1,4 +1,5 @@
 #include "bcm283x.dtsi"
+#include "bcm2835-common.dtsi"
 
 / {
 	compatible = "brcm,bcm2837";
diff --git a/arch/arm/boot/dts/bcm283x-rpi-usb-peripheral.dtsi b/arch/arm/boot/dts/bcm283x-rpi-usb-peripheral.dtsi
new file mode 100644
index 000000000000..0ff0e9e25327
--- /dev/null
+++ b/arch/arm/boot/dts/bcm283x-rpi-usb-peripheral.dtsi
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+&usb {
+	dr_mode = "peripheral";
+	g-rx-fifo-size = <256>;
+	g-np-tx-fifo-size = <32>;
+	g-tx-fifo-size = <256 256 512 512 512 768 768>;
+};
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 2d191fcbc2cc..839491628e87 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -18,7 +18,6 @@
 / {
 	compatible = "brcm,bcm2835";
 	model = "BCM2835";
-	interrupt-parent = <&intc>;
 	#address-cells = <1>;
 	#size-cells = <1>;
 
@@ -36,11 +35,9 @@
 			polling-delay-passive = <0>;
 			polling-delay = <1000>;
 
-			thermal-sensors = <&thermal>;
-
 			trips {
 				cpu-crit {
-					temperature	= <80000>;
+					temperature	= <90000>;
 					hysteresis	= <0>;
 					type		= "critical";
 				};
@@ -56,7 +53,7 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 
-		timer@7e003000 {
+		system_timer: timer@7e003000 {
 			compatible = "brcm,bcm2835-system-timer";
 			reg = <0x7e003000 0x1000>;
 			interrupts = <1 0>, <1 1>, <1 2>, <1 3>;
@@ -67,74 +64,12 @@
 			clock-frequency = <1000000>;
 		};
 
-		txp@7e004000 {
+		txp: txp@7e004000 {
 			compatible = "brcm,bcm2835-txp";
 			reg = <0x7e004000 0x20>;
 			interrupts = <1 11>;
 		};
 
-		dma: dma@7e007000 {
-			compatible = "brcm,bcm2835-dma";
-			reg = <0x7e007000 0xf00>;
-			interrupts = <1 16>,
-				     <1 17>,
-				     <1 18>,
-				     <1 19>,
-				     <1 20>,
-				     <1 21>,
-				     <1 22>,
-				     <1 23>,
-				     <1 24>,
-				     <1 25>,
-				     <1 26>,
-				     /* dma channel 11-14 share one irq */
-				     <1 27>,
-				     <1 27>,
-				     <1 27>,
-				     <1 27>,
-				     /* unused shared irq for all channels */
-				     <1 28>;
-			interrupt-names = "dma0",
-					  "dma1",
-					  "dma2",
-					  "dma3",
-					  "dma4",
-					  "dma5",
-					  "dma6",
-					  "dma7",
-					  "dma8",
-					  "dma9",
-					  "dma10",
-					  "dma11",
-					  "dma12",
-					  "dma13",
-					  "dma14",
-					  "dma-shared-all";
-			#dma-cells = <1>;
-			brcm,dma-channel-mask = <0x7f35>;
-		};
-
-		intc: interrupt-controller@7e00b200 {
-			compatible = "brcm,bcm2835-armctrl-ic";
-			reg = <0x7e00b200 0x200>;
-			interrupt-controller;
-			#interrupt-cells = <2>;
-		};
-
-		pm: watchdog@7e100000 {
-			compatible = "brcm,bcm2835-pm", "brcm,bcm2835-pm-wdt";
-			#power-domain-cells = <1>;
-			#reset-cells = <1>;
-			reg = <0x7e100000 0x114>,
-			      <0x7e00a000 0x24>;
-			clocks = <&clocks BCM2835_CLOCK_V3D>,
-				 <&clocks BCM2835_CLOCK_PERI_IMAGE>,
-				 <&clocks BCM2835_CLOCK_H264>,
-				 <&clocks BCM2835_CLOCK_ISP>;
-			clock-names = "v3d", "peri_image", "h264", "isp";
-			system-power-controller;
-		};
-
 		clocks: cprman@7e101000 {
 			compatible = "brcm,bcm2835-cprman";
 			#clock-cells = <1>;
@@ -184,8 +119,7 @@
 			interrupt-controller;
 			#interrupt-cells = <2>;
 
-			/* Defines pin muxing groups according to
-			 * BCM2835-ARM-Peripherals.pdf page 102.
+			/* Defines common pin muxing groups
 			 *
 			 * While each pin can have its mux selected
 			 * for various functions individually, some
@@ -263,15 +197,7 @@
 				brcm,pins = <44 45>;
 				brcm,function = <BCM2835_FSEL_ALT2>;
 			};
-			i2c_slave_gpio18: i2c_slave_gpio18 {
-				brcm,pins = <18 19 20 21>;
-				brcm,function = <BCM2835_FSEL_ALT3>;
-			};
 
-			jtag_gpio4: jtag_gpio4 {
-				brcm,pins = <4 5 6 12 13>;
-				brcm,function = <BCM2835_FSEL_ALT5>;
-			};
 			jtag_gpio22: jtag_gpio22 {
 				brcm,pins = <22 23 24 25 26 27>;
 				brcm,function = <BCM2835_FSEL_ALT4>;
@@ -286,35 +212,6 @@
 				brcm,function = <BCM2835_FSEL_ALT2>;
 			};
 
-			pwm0_gpio12: pwm0_gpio12 {
-				brcm,pins = <12>;
-				brcm,function = <BCM2835_FSEL_ALT0>;
-			};
-			pwm0_gpio18: pwm0_gpio18 {
-				brcm,pins = <18>;
-				brcm,function = <BCM2835_FSEL_ALT5>;
-			};
-			pwm0_gpio40: pwm0_gpio40 {
-				brcm,pins = <40>;
-				brcm,function = <BCM2835_FSEL_ALT0>;
-			};
-			pwm1_gpio13: pwm1_gpio13 {
-				brcm,pins = <13>;
-				brcm,function = <BCM2835_FSEL_ALT0>;
-			};
-			pwm1_gpio19: pwm1_gpio19 {
-				brcm,pins = <19>;
-				brcm,function = <BCM2835_FSEL_ALT5>;
-			};
-			pwm1_gpio41: pwm1_gpio41 {
-				brcm,pins = <41>;
-				brcm,function = <BCM2835_FSEL_ALT0>;
-			};
-			pwm1_gpio45: pwm1_gpio45 {
-				brcm,pins = <45>;
-				brcm,function = <BCM2835_FSEL_ALT0>;
-			};
-
 			sdhost_gpio48: sdhost_gpio48 {
 				brcm,pins = <48 49 50 51 52 53>;
 				brcm,function = <BCM2835_FSEL_ALT0>;
@@ -396,7 +293,7 @@
 		};
 
 		uart0: serial@7e201000 {
-			compatible = "brcm,bcm2835-pl011", "arm,pl011", "arm,primecell";
+			compatible = "arm,pl011", "arm,primecell";
 			reg = <0x7e201000 0x200>;
 			interrupts = <2 25>;
 			clocks = <&clocks BCM2835_CLOCK_UART>,
@@ -410,8 +307,6 @@
 			reg = <0x7e202000 0x100>;
 			interrupts = <2 24>;
 			clocks = <&clocks BCM2835_CLOCK_VPU>;
-			dmas = <&dma 13>;
-			dma-names = "rx-tx";
 			status = "disabled";
 		};
 
@@ -419,10 +314,6 @@
 			compatible = "brcm,bcm2835-i2s";
 			reg = <0x7e203000 0x24>;
 			clocks = <&clocks BCM2835_CLOCK_PCM>;
-
-			dmas = <&dma 2>,
-			       <&dma 3>;
-			dma-names = "tx", "rx";
 			status = "disabled";
 		};
 
@@ -431,8 +322,6 @@
 			reg = <0x7e204000 0x200>;
 			interrupts = <2 22>;
 			clocks = <&clocks BCM2835_CLOCK_VPU>;
-			dmas = <&dma 6>, <&dma 7>;
-			dma-names = "tx", "rx";
 			#address-cells = <1>;
 			#size-cells = <0>;
 			status = "disabled";
@@ -448,18 +337,6 @@
 			status = "disabled";
 		};
 
-		pixelvalve@7e206000 {
-			compatible = "brcm,bcm2835-pixelvalve0";
-			reg = <0x7e206000 0x100>;
-			interrupts = <2 13>; /* pwa0 */
-		};
-
-		pixelvalve@7e207000 {
-			compatible = "brcm,bcm2835-pixelvalve1";
-			reg = <0x7e207000 0x100>;
-			interrupts = <2 14>; /* pwa1 */
-		};
-
 		dpi: dpi@7e208000 {
 			compatible = "brcm,bcm2835-dpi";
 			reg = <0x7e208000 0x8c>;
@@ -490,14 +367,6 @@
 
 		};
 
-		thermal: thermal@7e212000 {
-			compatible = "brcm,bcm2835-thermal";
-			reg = <0x7e212000 0x8>;
-			clocks = <&clocks BCM2835_CLOCK_TSENS>;
-			#thermal-sensor-cells = <0>;
-			status = "disabled";
-		};
-
 		aux: aux@7e215000 {
 			compatible = "brcm,bcm2835-aux";
 			#clock-cells = <1>;
@@ -587,16 +456,6 @@
 			status = "disabled";
 		};
 
-		i2c2: i2c@7e805000 {
-			compatible = "brcm,bcm2835-i2c";
-			reg = <0x7e805000 0x1000>;
-			interrupts = <2 21>;
-			clocks = <&clocks BCM2835_CLOCK_VPU>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			status = "disabled";
-		};
-
 		vec: vec@7e806000 {
 			compatible = "brcm,bcm2835-vec";
 			reg = <0x7e806000 0x1000>;
@@ -605,26 +464,6 @@
 			status = "disabled";
 		};
 
-		pixelvalve@7e807000 {
-			compatible = "brcm,bcm2835-pixelvalve2";
-			reg = <0x7e807000 0x100>;
-			interrupts = <2 10>; /* pixelvalve */
-		};
-
-		hdmi: hdmi@7e902000 {
-			compatible = "brcm,bcm2835-hdmi";
-			reg = <0x7e902000 0x600>,
-			      <0x7e808000 0x100>;
-			interrupts = <2 8>, <2 9>;
-			ddc = <&i2c2>;
-			clocks = <&clocks BCM2835_PLLH_PIX>,
-				 <&clocks BCM2835_CLOCK_HSM>;
-			clock-names = "pixel", "hdmi";
-			dmas = <&dma 17>;
-			dma-names = "audio-rx";
-			status = "disabled";
-		};
-
 		usb: usb@7e980000 {
 			compatible = "brcm,bcm2835-usb";
 			reg = <0x7e980000 0x10000>;
@@ -636,36 +475,19 @@
 			phys = <&usbphy>;
 			phy-names = "usb2-phy";
 		};
-
-		v3d: v3d@7ec00000 {
-			compatible = "brcm,bcm2835-v3d";
-			reg = <0x7ec00000 0x1000>;
-			interrupts = <1 10>;
-			power-domains = <&pm BCM2835_POWER_DOMAIN_GRAFX_V3D>;
-		};
-
-		vc4: gpu {
-			compatible = "brcm,bcm2835-vc4";
-		};
 	};
 
 	clocks {
-		compatible = "simple-bus";
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		/* The oscillator is the root of the clock tree. */
-		clk_osc: clock@3 {
+		clk_osc: clk-osc {
 			compatible = "fixed-clock";
-			reg = <3>;
 			#clock-cells = <0>;
 			clock-output-names = "osc";
 			clock-frequency = <19200000>;
 		};
 
-		clk_usb: clock@4 {
+		clk_usb: clk-usb {
 			compatible = "fixed-clock";
-			reg = <4>;
 			#clock-cells = <0>;
 			clock-output-names = "otg";
 			clock-frequency = <480000000>;
diff --git a/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts b/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts
new file mode 100644
index 000000000000..334325390aed
--- /dev/null
+++ b/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+/*
+ * Copyright 2019 Legrand AV Inc.
+ */
+
+/dts-v1/;
+
+#include "bcm47094.dtsi"
+#include "bcm5301x-nand-cs0-bch8.dtsi"
+
+/ {
+	compatible = "luxul,xwc-2000-v1", "brcm,bcm47094", "brcm,bcm4708";
+	model = "Luxul XWC-2000 V1";
+
+	chosen {
+		bootargs = "earlycon";
+	};
+
+	memory {
+		reg = <0x00000000 0x08000000
+		       0x88000000 0x18000000>;
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		status	{
+			label = "bcm53xx:green:status";
+			gpios = <&chipcommon 18 GPIO_ACTIVE_LOW>;
+			linux,default-trigger = "timer";
+		};
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		restart {
+			label = "Reset";
+			linux,code = <KEY_RESTART>;
+			gpios = <&chipcommon 19 GPIO_ACTIVE_LOW>;
+		};
+	};
+};
+
+&uart1 {
+	status = "okay";
+};
+
+&spi_nor {
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi
index 372dc1eb88a0..2d9b4dd05830 100644
--- a/arch/arm/boot/dts/bcm5301x.dtsi
+++ b/arch/arm/boot/dts/bcm5301x.dtsi
@@ -353,8 +353,8 @@
 	mdio: mdio@18003000 {
 		compatible = "brcm,iproc-mdio";
 		reg = <0x18003000 0x8>;
-		#size-cells = <1>;
-		#address-cells = <0>;
+		#size-cells = <0>;
+		#address-cells = <1>;
 	};
 
 	mdio-bus-mux@18003000 {
diff --git a/arch/arm/boot/dts/dove.dtsi b/arch/arm/boot/dts/dove.dtsi
index 2e8a3977219f..3081b04e8c08 100644
--- a/arch/arm/boot/dts/dove.dtsi
+++ b/arch/arm/boot/dts/dove.dtsi
@@ -784,7 +784,7 @@
 				status = "disabled";
 			};
 
-			crypto_sram: sa-sram@ffffe000 {
+			crypto_sram: sram@ffffe000 {
 				compatible = "mmio-sram";
 				reg = <0xffffe000 0x800>;
 				clocks = <&gate_clk 15>;
diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index 5cac2dd58241..7e7aa101d8a4 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -442,7 +442,6 @@
 
 		target-module@f4000 {			/* 0x4a0f4000, ap 23 04.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox1";
 			reg = <0xf4000 0x4>,
 			      <0xf4010 0x4>;
 			reg-names = "rev", "sysc";
@@ -1899,7 +1898,6 @@
 
 		target-module@90000 {			/* 0x48090000, ap 55 12.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "rng";
 			reg = <0x91fe0 0x4>,
 			      <0x91fe4 0x4>;
 			reg-names = "rev", "sysc";
@@ -2090,7 +2088,6 @@
 
 		target-module@b2000 {			/* 0x480b2000, ap 37 52.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "hdq1w";
 			reg = <0xb2000 0x4>,
 			      <0xb2014 0x4>,
 			      <0xb2018 0x4>;
@@ -3059,7 +3056,7 @@
 
 				davinci_mdio: mdio@1000 {
 					compatible = "ti,cpsw-mdio","ti,davinci_mdio";
-					clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 0>;
+					clocks = <&gmac_main_clk>;
 					clock-names = "fck";
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -3079,6 +3076,58 @@
 					phys = <&phy_gmii_sel 2>;
 				};
 			};
+
+			mac_sw: switch@0 {
+				compatible = "ti,dra7-cpsw-switch","ti,cpsw-switch";
+				reg = <0x0 0x4000>;
+				ranges = <0 0 0x4000>;
+				clocks = <&gmac_main_clk>;
+				clock-names = "fck";
+				#address-cells = <1>;
+				#size-cells = <1>;
+				syscon = <&scm_conf>;
+				status = "disabled";
+
+				interrupts = <GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-names = "rx_thresh", "rx", "tx", "misc";
+
+				ethernet-ports {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					cpsw_port1: port@1 {
+						reg = <1>;
+						label = "port1";
+						mac-address = [ 00 00 00 00 00 00 ];
+						phys = <&phy_gmii_sel 1>;
+					};
+
+					cpsw_port2: port@2 {
+						reg = <2>;
+						label = "port2";
+						mac-address = [ 00 00 00 00 00 00 ];
+						phys = <&phy_gmii_sel 2>;
+					};
+				};
+
+				davinci_mdio_sw: mdio@1000 {
+					compatible = "ti,cpsw-mdio","ti,davinci_mdio";
+					clocks = <&gmac_main_clk>;
+					clock-names = "fck";
+					#address-cells = <1>;
+					#size-cells = <0>;
+					bus_freq = <1000000>;
+					reg = <0x1000 0x100>;
+				};
+
+				cpts {
+					clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 25>;
+					clock-names = "cpts";
+				};
+			};
 		};
 	};
 };
@@ -3199,7 +3248,6 @@
 
 		target-module@2000 {			/* 0x48802000, ap 95 7c.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox13";
 			reg = <0x2000 0x4>,
 			      <0x2010 0x4>;
 			reg-names = "rev", "sysc";
@@ -3528,7 +3576,6 @@
 
 		target-module@3a000 {			/* 0x4883a000, ap 33 3e.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox2";
 			reg = <0x3a000 0x4>,
 			      <0x3a010 0x4>;
 			reg-names = "rev", "sysc";
@@ -3559,7 +3606,6 @@
 
 		target-module@3c000 {			/* 0x4883c000, ap 35 3a.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox3";
 			reg = <0x3c000 0x4>,
 			      <0x3c010 0x4>;
 			reg-names = "rev", "sysc";
@@ -3590,7 +3636,6 @@
 
 		target-module@3e000 {			/* 0x4883e000, ap 37 46.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox4";
 			reg = <0x3e000 0x4>,
 			      <0x3e010 0x4>;
 			reg-names = "rev", "sysc";
@@ -3621,7 +3666,6 @@
 
 		target-module@40000 {			/* 0x48840000, ap 39 64.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox5";
 			reg = <0x40000 0x4>,
 			      <0x40010 0x4>;
 			reg-names = "rev", "sysc";
@@ -3652,7 +3696,6 @@
 
 		target-module@42000 {			/* 0x48842000, ap 41 4e.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox6";
 			reg = <0x42000 0x4>,
 			      <0x42010 0x4>;
 			reg-names = "rev", "sysc";
@@ -3683,7 +3726,6 @@
 
 		target-module@44000 {			/* 0x48844000, ap 43 42.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox7";
 			reg = <0x44000 0x4>,
 			      <0x44010 0x4>;
 			reg-names = "rev", "sysc";
@@ -3714,7 +3756,6 @@
 
 		target-module@46000 {			/* 0x48846000, ap 45 48.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox8";
 			reg = <0x46000 0x4>,
 			      <0x46010 0x4>;
 			reg-names = "rev", "sysc";
@@ -3833,7 +3874,6 @@
 
 		target-module@5e000 {			/* 0x4885e000, ap 69 6c.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox9";
 			reg = <0x5e000 0x4>,
 			      <0x5e010 0x4>;
 			reg-names = "rev", "sysc";
@@ -3864,7 +3904,6 @@
 
 		target-module@60000 {			/* 0x48860000, ap 71 4a.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox10";
 			reg = <0x60000 0x4>,
 			      <0x60010 0x4>;
 			reg-names = "rev", "sysc";
@@ -3895,7 +3934,6 @@
 
 		target-module@62000 {			/* 0x48862000, ap 73 74.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox11";
 			reg = <0x62000 0x4>,
 			      <0x62010 0x4>;
 			reg-names = "rev", "sysc";
@@ -3926,7 +3964,6 @@
 
 		target-module@64000 {			/* 0x48864000, ap 67 52.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox12";
 			reg = <0x64000 0x4>,
 			      <0x64010 0x4>;
 			reg-names = "rev", "sysc";
@@ -4301,7 +4338,6 @@
 
 		target-module@4000 {			/* 0x4ae14000, ap 7 28.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "wd_timer2";
 			reg = <0x4000 0x4>,
 			      <0x4010 0x4>,
 			      <0x4014 0x4>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 953f0ffce2a9..73e5011f531a 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -763,3 +763,54 @@
 
 #include "dra7-l4.dtsi"
 #include "dra7xx-clocks.dtsi"
+
+&prm {
+	prm_dsp1: prm@400 {
+		compatible = "ti,dra7-prm-inst", "ti,omap-prm-inst";
+		reg = <0x400 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_ipu: prm@500 {
+		compatible = "ti,dra7-prm-inst", "ti,omap-prm-inst";
+		reg = <0x500 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_core: prm@700 {
+		compatible = "ti,dra7-prm-inst", "ti,omap-prm-inst";
+		reg = <0x700 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_iva: prm@f00 {
+		compatible = "ti,dra7-prm-inst", "ti,omap-prm-inst";
+		reg = <0xf00 0x100>;
+	};
+
+	prm_dsp2: prm@1b00 {
+		compatible = "ti,dra7-prm-inst", "ti,omap-prm-inst";
+		reg = <0x1b00 0x40>;
+		#reset-cells = <1>;
+	};
+
+	prm_eve1: prm@1b40 {
+		compatible = "ti,dra7-prm-inst", "ti,omap-prm-inst";
+		reg = <0x1b40 0x40>;
+	};
+
+	prm_eve2: prm@1b80 {
+		compatible = "ti,dra7-prm-inst", "ti,omap-prm-inst";
+		reg = <0x1b80 0x40>;
+	};
+
+	prm_eve3: prm@1bc0 {
+		compatible = "ti,dra7-prm-inst", "ti,omap-prm-inst";
+		reg = <0x1bc0 0x40>;
+	};
+
+	prm_eve4: prm@1c00 {
+		compatible = "ti,dra7-prm-inst", "ti,omap-prm-inst";
+		reg = <0x1c00 0x60>;
+	};
+};
diff --git a/arch/arm/boot/dts/e60k02.dtsi b/arch/arm/boot/dts/e60k02.dtsi
new file mode 100644
index 000000000000..5a2c5320437d
--- /dev/null
+++ b/arch/arm/boot/dts/e60k02.dtsi
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Andreas Kemnade
+ * based on works
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ * and
+ * Copyright (C) 2014 Ricoh Electronic Devices Co., Ltd
+ *
+ * Netronix E60K02 board common.
+ * This board is equipped with different SoCs and
+ * found in ebook-readers like the Kobo Clara HD (with i.MX6SLL) and
+ * the Tolino Shine 3 (with i.MX6SL)
+ */
+#include <dt-bindings/input/input.h>
+
+/ {
+
+	chosen {
+		stdout-path = &uart1;
+	};
+
+	gpio_keys: gpio-keys {
+		compatible = "gpio-keys";
+
+		power {
+			label = "Power";
+			gpios = <&gpio5 8 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_POWER>;
+			wakeup-source;
+		};
+
+		cover {
+			label = "Cover";
+			gpios = <&gpio5 12 GPIO_ACTIVE_LOW>;
+			linux,code = <SW_LID>;
+			linux,input-type = <EV_SW>;
+			wakeup-source;
+		};
+	};
+
+	leds: leds {
+		compatible = "gpio-leds";
+
+		on {
+			label = "e60k02:white:on";
+			gpios = <&gpio5 7 GPIO_ACTIVE_LOW>;
+			linux,default-trigger = "timer";
+		};
+	};
+
+	memory {
+		reg = <0x80000000 0x20000000>;
+	};
+
+	reg_wifi: regulator-wifi {
+		compatible = "regulator-fixed";
+		regulator-name = "SD3_SPWR";
+		regulator-min-microvolt = <3000000>;
+		regulator-max-microvolt = <3000000>;
+		gpio = <&gpio4 29 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+	};
+
+	wifi_pwrseq: wifi_pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		post-power-on-delay-ms = <20>;
+		reset-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
+	};
+};
+
+
+&i2c1 {
+	clock-frequency = <100000>;
+	status = "okay";
+
+	lm3630a: backlight@36 {
+		reg = <0x36>;
+		compatible = "ti,lm3630a";
+		enable-gpios = <&gpio2 10 GPIO_ACTIVE_HIGH>;
+
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		led@0 {
+			reg = <0>;
+			led-sources = <0>;
+			label = "backlight_warm";
+			default-brightness = <0>;
+			max-brightness = <255>;
+		};
+
+		led@1 {
+			reg = <1>;
+			led-sources = <1>;
+			label = "backlight_cold";
+			default-brightness = <0>;
+			max-brightness = <255>;
+		};
+	};
+};
+
+&i2c2 {
+	clock-frequency = <100000>;
+	status = "okay";
+
+	/* TODO: CYTTSP5 touch controller at 0x24 */
+
+	/* TODO: TPS65185 PMIC for E Ink at 0x68 */
+
+};
+
+&i2c3 {
+	clock-frequency = <100000>;
+	status = "okay";
+
+	ricoh619: pmic@32 {
+		compatible = "ricoh,rc5t619";
+		reg = <0x32>;
+		system-power-controller;
+
+		regulators {
+			dcdc1_reg: DCDC1 {
+				regulator-name = "DCDC1";
+				regulator-min-microvolt = <300000>;
+				regulator-max-microvolt = <1875000>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-max-microvolt = <900000>;
+					regulator-suspend-min-microvolt = <900000>;
+				};
+			};
+
+			/* Core3_3V3 */
+			dcdc2_reg: DCDC2 {
+				regulator-name = "DCDC2";
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-max-microvolt = <3300000>;
+					regulator-suspend-min-microvolt = <3300000>;
+				};
+			};
+
+			dcdc3_reg: DCDC3 {
+				regulator-name = "DCDC3";
+				regulator-min-microvolt = <300000>;
+				regulator-max-microvolt = <1875000>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-max-microvolt = <1140000>;
+					regulator-suspend-min-microvolt = <1140000>;
+				};
+			};
+
+			/* Core4_1V2 */
+			dcdc4_reg: DCDC4 {
+				regulator-name = "DCDC4";
+				regulator-min-microvolt = <1200000>;
+				regulator-max-microvolt = <1200000>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-max-microvolt = <1140000>;
+					regulator-suspend-min-microvolt = <1140000>;
+				};
+			};
+
+			/* Core4_1V8 */
+			dcdc5_reg: DCDC5 {
+				regulator-name = "DCDC5";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-max-microvolt = <1700000>;
+					regulator-suspend-min-microvolt = <1700000>;
+				};
+			};
+
+			/* IR_3V3 */
+			ldo1_reg: LDO1  {
+				regulator-name = "LDO1";
+				regulator-boot-on;
+			};
+
+			/* Core1_3V3 */
+			ldo2_reg: LDO2  {
+				regulator-name = "LDO2";
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-max-microvolt = <3000000>;
+					regulator-suspend-min-microvolt = <3000000>;
+				};
+			};
+
+			/* Core5_1V2 */
+			ldo3_reg: LDO3  {
+				regulator-name = "LDO3";
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			ldo4_reg: LDO4 {
+				regulator-name = "LDO4";
+				regulator-boot-on;
+			};
+
+			/* SPD_3V3 */
+			ldo5_reg: LDO5 {
+				regulator-name = "LDO5";
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			/* DDR_0V6 */
+			ldo6_reg: LDO6 {
+				regulator-name = "LDO6";
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			/* VDD_PWM */
+			ldo7_reg: LDO7 {
+				regulator-name = "LDO7";
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			/* ldo_1v8 */
+			ldo8_reg: LDO8 {
+				regulator-name = "LDO8";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			ldo9_reg: LDO9 {
+				regulator-name = "LDO9";
+				regulator-boot-on;
+			};
+
+			ldo10_reg: LDO10 {
+				regulator-name = "LDO10";
+				regulator-boot-on;
+			};
+
+			ldortc1_reg: LDORTC1  {
+				regulator-name = "LDORTC1";
+				regulator-boot-on;
+			};
+		};
+	};
+};
+
+&snvs_rtc {
+	/* we are using the rtc in the pmic, not disabled in imx6sll.dtsi */
+	status = "disabled";
+};
+
+&uart1 {
+	status = "okay";
+};
+
+&usdhc2 {
+	non-removable;
+	status = "okay";
+};
+
+&usdhc3 {
+	vmmc-supply = <&reg_wifi>;
+	mmc-pwrseq = <&wifi_pwrseq>;
+	cap-power-off-card;
+	non-removable;
+	status = "okay";
+};
+
+&usbotg1 {
+	pinctrl-names = "default";
+	disable-over-current;
+	srp-disable;
+	hnp-disable;
+	adp-disable;
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/emev2.dtsi b/arch/arm/boot/dts/emev2.dtsi
index 67d86012a85c..96678ddbb4e6 100644
--- a/arch/arm/boot/dts/emev2.dtsi
+++ b/arch/arm/boot/dts/emev2.dtsi
@@ -212,6 +212,7 @@
 		interrupt-controller;
 		#interrupt-cells = <2>;
 	};
+
 	gpio1: gpio@e0050080 {
 		compatible = "renesas,em-gio";
 		reg = <0xe0050080 0x2c>, <0xe00500c0 0x20>;
@@ -224,6 +225,7 @@
 		interrupt-controller;
 		#interrupt-cells = <2>;
 	};
+
 	gpio2: gpio@e0050100 {
 		compatible = "renesas,em-gio";
 		reg = <0xe0050100 0x2c>, <0xe0050140 0x20>;
@@ -236,6 +238,7 @@
 		interrupt-controller;
 		#interrupt-cells = <2>;
 	};
+
 	gpio3: gpio@e0050180 {
 		compatible = "renesas,em-gio";
 		reg = <0xe0050180 0x2c>, <0xe00501c0 0x20>;
@@ -248,6 +251,7 @@
 		interrupt-controller;
 		#interrupt-cells = <2>;
 	};
+
 	gpio4: gpio@e0050200 {
 		compatible = "renesas,em-gio";
 		reg = <0xe0050200 0x2c>, <0xe0050240 0x20>;
diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi
index 784818490376..b016b0b68306 100644
--- a/arch/arm/boot/dts/exynos3250.dtsi
+++ b/arch/arm/boot/dts/exynos3250.dtsi
@@ -138,7 +138,7 @@
 		#size-cells = <1>;
 		ranges;
 
-		sysram@2020000 {
+		sram@2020000 {
 			compatible = "mmio-sram";
 			reg = <0x02020000 0x40000>;
 			#address-cells = <1>;
@@ -265,7 +265,7 @@
 					(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
 		};
 
-		mct@10050000 {
+		timer@10050000 {
 			compatible = "samsung,exynos4210-mct";
 			reg = <0x10050000 0x800>;
 			interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>,
@@ -314,8 +314,7 @@
 		sysmmu_jpeg: sysmmu@11a60000 {
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x11a60000 0x1000>;
-			interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>,
-				     <GIC_SPI 161 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>;
 			clock-names = "sysmmu", "master";
 			clocks = <&cmu CLK_SMMUJPEG>, <&cmu CLK_JPEG>;
 			power-domains = <&pd_cam>;
@@ -355,8 +354,7 @@
 		sysmmu_fimd0: sysmmu@11e20000 {
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x11e20000 0x1000>;
-			interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>,
-				     <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
 			clock-names = "sysmmu", "master";
 			clocks = <&cmu CLK_SMMUFIMD0>, <&cmu CLK_FIMD0>;
 			power-domains = <&pd_lcd0>;
@@ -507,8 +505,7 @@
 		sysmmu_mfc: sysmmu@13620000 {
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x13620000 0x1000>;
-			interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>,
-				     <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
 			clock-names = "sysmmu", "master";
 			clocks = <&cmu CLK_SMMUMFC_L>, <&cmu CLK_MFC>;
 			power-domains = <&pd_mfc>;
diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi
index 433f109d97ca..d2779a790ce3 100644
--- a/arch/arm/boot/dts/exynos4.dtsi
+++ b/arch/arm/boot/dts/exynos4.dtsi
@@ -111,28 +111,28 @@
 			syscon = <&pmu_system_controller>;
 		};
 
-		pd_mfc: mfc-power-domain@10023c40 {
+		pd_mfc: power-domain@10023c40 {
 			compatible = "samsung,exynos4210-pd";
 			reg = <0x10023C40 0x20>;
 			#power-domain-cells = <0>;
 			label = "MFC";
 		};
 
-		pd_g3d: g3d-power-domain@10023c60 {
+		pd_g3d: power-domain@10023c60 {
 			compatible = "samsung,exynos4210-pd";
 			reg = <0x10023C60 0x20>;
 			#power-domain-cells = <0>;
 			label = "G3D";
 		};
 
-		pd_lcd0: lcd0-power-domain@10023c80 {
+		pd_lcd0: power-domain@10023c80 {
 			compatible = "samsung,exynos4210-pd";
 			reg = <0x10023C80 0x20>;
 			#power-domain-cells = <0>;
 			label = "LCD0";
 		};
 
-		pd_tv: tv-power-domain@10023c20 {
+		pd_tv: power-domain@10023c20 {
 			compatible = "samsung,exynos4210-pd";
 			reg = <0x10023C20 0x20>;
 			#power-domain-cells = <0>;
@@ -140,21 +140,21 @@
 			label = "TV";
 		};
 
-		pd_cam: cam-power-domain@10023c00 {
+		pd_cam: power-domain@10023c00 {
 			compatible = "samsung,exynos4210-pd";
 			reg = <0x10023C00 0x20>;
 			#power-domain-cells = <0>;
 			label = "CAM";
 		};
 
-		pd_gps: gps-power-domain@10023ce0 {
+		pd_gps: power-domain@10023ce0 {
 			compatible = "samsung,exynos4210-pd";
 			reg = <0x10023CE0 0x20>;
 			#power-domain-cells = <0>;
 			label = "GPS";
 		};
 
-		pd_gps_alive: gps-alive-power-domain@10023d00 {
+		pd_gps_alive: power-domain@10023d00 {
 			compatible = "samsung,exynos4210-pd";
 			reg = <0x10023D00 0x20>;
 			#power-domain-cells = <0>;
diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
index f220716239db..554819ae1446 100644
--- a/arch/arm/boot/dts/exynos4210.dtsi
+++ b/arch/arm/boot/dts/exynos4210.dtsi
@@ -72,7 +72,7 @@
 	};
 
 	soc: soc {
-		sysram: sysram@2020000 {
+		sysram: sram@2020000 {
 			compatible = "mmio-sram";
 			reg = <0x02020000 0x20000>;
 			#address-cells = <1>;
@@ -90,7 +90,7 @@
 			};
 		};
 
-		pd_lcd1: lcd1-power-domain@10023ca0 {
+		pd_lcd1: power-domain@10023ca0 {
 			compatible = "samsung,exynos4210-pd";
 			reg = <0x10023CA0 0x20>;
 			#power-domain-cells = <0>;
@@ -106,26 +106,17 @@
 			arm,data-latency = <2 2 1>;
 		};
 
-		mct: mct@10050000 {
+		mct: timer@10050000 {
 			compatible = "samsung,exynos4210-mct";
 			reg = <0x10050000 0x800>;
-			interrupt-parent = <&mct_map>;
-			interrupts = <0>, <1>, <2>, <3>, <4>, <5>;
 			clocks = <&clock CLK_FIN_PLL>, <&clock CLK_MCT>;
 			clock-names = "fin_pll", "mct";
-
-			mct_map: mct-map {
-				#interrupt-cells = <1>;
-				#address-cells = <0>;
-				#size-cells = <0>;
-				interrupt-map =
-					<0 &gic 0 57 IRQ_TYPE_LEVEL_HIGH>,
-					<1 &gic 0 69 IRQ_TYPE_LEVEL_HIGH>,
-					<2 &combiner 12 6>,
-					<3 &combiner 12 7>,
-					<4 &gic 0 42 IRQ_TYPE_LEVEL_HIGH>,
-					<5 &gic 0 48 IRQ_TYPE_LEVEL_HIGH>;
-			};
+			interrupts-extended = <&gic GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+					      <&gic GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
+					      <&combiner 12 6>,
+					      <&combiner 12 7>,
+					      <&gic GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+					      <&gic GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
 		};
 
 		watchdog: watchdog@10060000 {
diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi
index d20db2dfe8e2..5022aa574b26 100644
--- a/arch/arm/boot/dts/exynos4412.dtsi
+++ b/arch/arm/boot/dts/exynos4412.dtsi
@@ -188,7 +188,7 @@
 			interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
 		};
 
-		sysram@2020000 {
+		sram@2020000 {
 			compatible = "mmio-sram";
 			reg = <0x02020000 0x40000>;
 			#address-cells = <1>;
@@ -206,7 +206,7 @@
 			};
 		};
 
-		pd_isp: isp-power-domain@10023ca0 {
+		pd_isp: power-domain@10023ca0 {
 			compatible = "samsung,exynos4210-pd";
 			reg = <0x10023CA0 0x20>;
 			#power-domain-cells = <0>;
@@ -243,25 +243,16 @@
 			clock-names = "aclk200", "aclk400_mcuisp";
 		};
 
-		mct@10050000 {
+		timer@10050000 {
 			compatible = "samsung,exynos4412-mct";
 			reg = <0x10050000 0x800>;
-			interrupt-parent = <&mct_map>;
-			interrupts = <0>, <1>, <2>, <3>, <4>;
 			clocks = <&clock CLK_FIN_PLL>, <&clock CLK_MCT>;
 			clock-names = "fin_pll", "mct";
-
-			mct_map: mct-map {
-				#interrupt-cells = <1>;
-				#address-cells = <0>;
-				#size-cells = <0>;
-				interrupt-map =
-					<0 &gic 0 57 IRQ_TYPE_LEVEL_HIGH>,
-					<1 &combiner 12 5>,
-					<2 &combiner 12 6>,
-					<3 &combiner 12 7>,
-					<4 &gic 1 12 IRQ_TYPE_LEVEL_HIGH>;
-			};
+			interrupts-extended = <&gic GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+					      <&combiner 12 5>,
+					      <&combiner 12 6>,
+					      <&combiner 12 7>,
+					      <&gic GIC_PPI 12 IRQ_TYPE_LEVEL_HIGH>;
 		};
 
 		watchdog: watchdog@10060000 {
diff --git a/arch/arm/boot/dts/exynos5.dtsi b/arch/arm/boot/dts/exynos5.dtsi
index 67f9b4504a42..4801ca759feb 100644
--- a/arch/arm/boot/dts/exynos5.dtsi
+++ b/arch/arm/boot/dts/exynos5.dtsi
@@ -35,8 +35,8 @@
 		#size-cells = <1>;
 		ranges;
 
-		chipid@10000000 {
-			compatible = "samsung,exynos4210-chipid";
+		chipid: chipid@10000000 {
+			compatible = "samsung,exynos4210-chipid", "syscon";
 			reg = <0x10000000 0x100>;
 		};
 
diff --git a/arch/arm/boot/dts/exynos5250-arndale.dts b/arch/arm/boot/dts/exynos5250-arndale.dts
index 6fcb78a354fe..d6c85efdb465 100644
--- a/arch/arm/boot/dts/exynos5250-arndale.dts
+++ b/arch/arm/boot/dts/exynos5250-arndale.dts
@@ -11,6 +11,7 @@
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/input/input.h>
 #include <dt-bindings/clock/samsung,s2mps11.h>
+#include <dt-bindings/sound/samsung-i2s.h>
 #include "exynos5250.dtsi"
 
 / {
@@ -135,6 +136,12 @@
 		};
 	};
 
+	sound {
+		compatible = "samsung,arndale-wm1811";
+		samsung,audio-cpu = <&i2s0>;
+		samsung,audio-codec = <&wm1811>;
+	};
+
 	fixed-rate-clocks {
 		xxti {
 			compatible = "samsung,clock-xxti";
@@ -151,6 +158,16 @@
 	};
 };
 
+&clock {
+	assigned-clocks = <&clock CLK_FOUT_EPLL>;
+	assigned-clock-rates = <49152000>;
+};
+
+&clock_audss {
+	assigned-clocks = <&clock_audss EXYNOS_MOUT_AUDSS>;
+	assigned-clock-parents = <&clock CLK_FOUT_EPLL>;
+};
+
 &cpu0 {
 	cpu0-supply = <&buck2_reg>;
 };
@@ -502,9 +519,11 @@
 &i2c_3 {
 	status = "okay";
 
-	wm1811a@1a {
+	wm1811: codec@1a {
 		compatible = "wlf,wm1811";
 		reg = <0x1a>;
+		clocks = <&i2s0 CLK_I2S_CDCLK>;
+		clock-names = "MCLK1";
 
 		AVDD2-supply = <&main_dc_reg>;
 		CPVDD-supply = <&main_dc_reg>;
@@ -540,9 +559,15 @@
 };
 
 &i2s0 {
+	assigned-clocks = <&i2s0 CLK_I2S_RCLK_SRC>;
+	assigned-clock-parents = <&clock_audss EXYNOS_I2S_BUS>;
 	status = "okay";
 };
 
+&i2s0_bus {
+	samsung,pin-drv = <EXYNOS4_PIN_DRV_LV2>;
+};
+
 &mali {
 	mali-supply = <&buck4_reg>;
 	status = "okay";
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index fc966c10cf49..e1f0215e3985 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -164,7 +164,7 @@
 	};
 
 	soc: soc {
-		sysram@2020000 {
+		sram@2020000 {
 			compatible = "mmio-sram";
 			reg = <0x02020000 0x30000>;
 			#address-cells = <1>;
@@ -233,28 +233,17 @@
 			power-domains = <&pd_mau>;
 		};
 
-		mct@101c0000 {
+		timer@101c0000 {
 			compatible = "samsung,exynos4210-mct";
 			reg = <0x101C0000 0x800>;
-			interrupt-controller;
-			#interrupt-cells = <2>;
-			interrupt-parent = <&mct_map>;
-			interrupts = <0 0>, <1 0>, <2 0>, <3 0>,
-				     <4 0>, <5 0>;
 			clocks = <&clock CLK_FIN_PLL>, <&clock CLK_MCT>;
 			clock-names = "fin_pll", "mct";
-
-			mct_map: mct-map {
-				#interrupt-cells = <2>;
-				#address-cells = <0>;
-				#size-cells = <0>;
-				interrupt-map = <0x0 0 &combiner 23 3>,
-						<0x1 0 &combiner 23 4>,
-						<0x2 0 &combiner 25 2>,
-						<0x3 0 &combiner 25 3>,
-						<0x4 0 &gic 0 120 IRQ_TYPE_LEVEL_HIGH>,
-						<0x5 0 &gic 0 121 IRQ_TYPE_LEVEL_HIGH>;
-			};
+			interrupts-extended = <&combiner 23 3>,
+					      <&combiner 23 4>,
+					      <&combiner 25 2>,
+					      <&combiner 25 3>,
+					      <&gic GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>,
+					      <&gic GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
 		};
 
 		pinctrl_0: pinctrl@11400000 {
@@ -586,9 +575,9 @@
 			compatible = "samsung,s5pv210-i2s";
 			status = "disabled";
 			reg = <0x03830000 0x100>;
-			dmas = <&pdma0 10
-				&pdma0 9
-				&pdma0 8>;
+			dmas = <&pdma0 10>,
+				<&pdma0 9>,
+				<&pdma0 8>;
 			dma-names = "tx", "rx", "tx-sec";
 			clocks = <&clock_audss EXYNOS_I2S_BUS>,
 				<&clock_audss EXYNOS_I2S_BUS>,
@@ -606,8 +595,8 @@
 			compatible = "samsung,s3c6410-i2s";
 			status = "disabled";
 			reg = <0x12D60000 0x100>;
-			dmas = <&pdma1 12
-				&pdma1 11>;
+			dmas = <&pdma1 12>,
+				<&pdma1 11>;
 			dma-names = "tx", "rx";
 			clocks = <&clock CLK_I2S1>, <&clock CLK_DIV_I2S1>;
 			clock-names = "iis", "i2s_opclk0";
@@ -621,8 +610,8 @@
 			compatible = "samsung,s3c6410-i2s";
 			status = "disabled";
 			reg = <0x12D70000 0x100>;
-			dmas = <&pdma0 12
-				&pdma0 11>;
+			dmas = <&pdma0 12>,
+				<&pdma0 11>;
 			dma-names = "tx", "rx";
 			clocks = <&clock CLK_I2S2>, <&clock CLK_DIV_I2S2>;
 			clock-names = "iis", "i2s_opclk0";
diff --git a/arch/arm/boot/dts/exynos5260.dtsi b/arch/arm/boot/dts/exynos5260.dtsi
index 3581b57fbbf7..b0811dbbb362 100644
--- a/arch/arm/boot/dts/exynos5260.dtsi
+++ b/arch/arm/boot/dts/exynos5260.dtsi
@@ -180,7 +180,7 @@
 			reg = <0x10000000 0x100>;
 		};
 
-		mct: mct@100b0000 {
+		mct: timer@100b0000 {
 			compatible = "samsung,exynos4210-mct";
 			reg = <0x100B0000 0x1000>;
 			clocks = <&fin_pll>, <&clock_peri PERI_CLK_MCT>;
diff --git a/arch/arm/boot/dts/exynos5410.dtsi b/arch/arm/boot/dts/exynos5410.dtsi
index e6f78b1cee7c..a4b03d4c3de5 100644
--- a/arch/arm/boot/dts/exynos5410.dtsi
+++ b/arch/arm/boot/dts/exynos5410.dtsi
@@ -222,9 +222,9 @@
 		audi2s0: i2s@3830000 {
 			compatible = "samsung,exynos5420-i2s";
 			reg = <0x03830000 0x100>;
-			dmas = <&pdma0 10
-				&pdma0 9
-				&pdma0 8>;
+			dmas = <&pdma0 10>,
+				<&pdma0 9>,
+				<&pdma0 8>;
 			dma-names = "tx", "rx", "tx-sec";
 			clocks = <&clock_audss EXYNOS_I2S_BUS>,
 				<&clock_audss EXYNOS_I2S_BUS>,
diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts
index 9eb48cabcca4..2bcbdf8a39bf 100644
--- a/arch/arm/boot/dts/exynos5420-peach-pit.dts
+++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts
@@ -1065,6 +1065,10 @@
 	status = "okay";
 };
 
+&timer {
+	arm,cpu-registers-not-fw-configured;
+};
+
 &tmu_cpu0 {
 	vtmu-supply = <&ldo10_reg>;
 };
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index 7d51e0f4ab79..d39907a41f78 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -175,7 +175,7 @@
 		};
 
 		clock: clock-controller@10010000 {
-			compatible = "samsung,exynos5420-clock";
+			compatible = "samsung,exynos5420-clock", "syscon";
 			reg = <0x10010000 0x30000>;
 			#clock-cells = <1>;
 		};
@@ -237,6 +237,32 @@
 			status = "disabled";
 		};
 
+		dmc: memory-controller@10c20000 {
+			compatible = "samsung,exynos5422-dmc";
+			reg = <0x10c20000 0x10000>, <0x10c30000 0x10000>;
+			interrupt-parent = <&combiner>;
+			interrupts = <16 0>, <16 1>;
+			interrupt-names = "drex_0", "drex_1";
+			clocks = <&clock CLK_FOUT_SPLL>,
+				 <&clock CLK_MOUT_SCLK_SPLL>,
+				 <&clock CLK_FF_DOUT_SPLL2>,
+				 <&clock CLK_FOUT_BPLL>,
+				 <&clock CLK_MOUT_BPLL>,
+				 <&clock CLK_SCLK_BPLL>,
+				 <&clock CLK_MOUT_MX_MSPLL_CCORE>,
+				 <&clock CLK_MOUT_MCLK_CDREX>;
+			clock-names = "fout_spll",
+				      "mout_sclk_spll",
+				      "ff_dout_spll2",
+				      "fout_bpll",
+				      "mout_bpll",
+				      "sclk_bpll",
+				      "mout_mx_mspll_ccore",
+				      "mout_mclk_cdrex";
+			samsung,syscon-clk = <&clock>;
+			status = "disabled";
+		};
+
 		nocp_mem0_0: nocp@10ca1000 {
 			compatible = "samsung,exynos5420-nocp";
 			reg = <0x10CA1000 0x200>;
@@ -273,6 +299,54 @@
 			status = "disabled";
 		};
 
+		ppmu_dmc0_0: ppmu@10d00000 {
+			compatible = "samsung,exynos-ppmu";
+			reg = <0x10d00000 0x2000>;
+			clocks = <&clock CLK_PCLK_PPMU_DREX0_0>;
+			clock-names = "ppmu";
+			events {
+				ppmu_event3_dmc0_0: ppmu-event3-dmc0_0 {
+					event-name = "ppmu-event3-dmc0_0";
+				};
+			};
+		};
+
+		ppmu_dmc0_1: ppmu@10d10000 {
+			compatible = "samsung,exynos-ppmu";
+			reg = <0x10d10000 0x2000>;
+			clocks = <&clock CLK_PCLK_PPMU_DREX0_1>;
+			clock-names = "ppmu";
+			events {
+				ppmu_event3_dmc0_1: ppmu-event3-dmc0_1 {
+					event-name = "ppmu-event3-dmc0_1";
+				};
+			};
+		};
+
+		ppmu_dmc1_0: ppmu@10d60000 {
+			compatible = "samsung,exynos-ppmu";
+			reg = <0x10d60000 0x2000>;
+			clocks = <&clock CLK_PCLK_PPMU_DREX1_0>;
+			clock-names = "ppmu";
+			events {
+				ppmu_event3_dmc1_0: ppmu-event3-dmc1_0 {
+					event-name = "ppmu-event3-dmc1_0";
+				};
+			};
+		};
+
+		ppmu_dmc1_1: ppmu@10d70000 {
+			compatible = "samsung,exynos-ppmu";
+			reg = <0x10d70000 0x2000>;
+			clocks = <&clock CLK_PCLK_PPMU_DREX1_1>;
+			clock-names = "ppmu";
+			events {
+				ppmu_event3_dmc1_1: ppmu-event3-dmc1_1 {
+					event-name = "ppmu-event3-dmc1_1";
+				};
+			};
+		};
+
 		gsc_pd: power-domain@10044000 {
 			compatible = "samsung,exynos4210-pd";
 			reg = <0x10044000 0x20>;
@@ -434,9 +508,9 @@
 		i2s0: i2s@3830000 {
 			compatible = "samsung,exynos5420-i2s";
 			reg = <0x03830000 0x100>;
-			dmas = <&adma 0
-				&adma 2
-				&adma 1>;
+			dmas = <&adma 0>,
+				<&adma 2>,
+				<&adma 1>;
 			dma-names = "tx", "rx", "tx-sec";
 			clocks = <&clock_audss EXYNOS_I2S_BUS>,
 				<&clock_audss EXYNOS_I2S_BUS>,
@@ -455,8 +529,8 @@
 		i2s1: i2s@12d60000 {
 			compatible = "samsung,exynos5420-i2s";
 			reg = <0x12D60000 0x100>;
-			dmas = <&pdma1 12
-				&pdma1 11>;
+			dmas = <&pdma1 12>,
+				<&pdma1 11>;
 			dma-names = "tx", "rx";
 			clocks = <&clock CLK_I2S1>, <&clock CLK_SCLK_I2S1>;
 			clock-names = "iis", "i2s_opclk0";
@@ -471,8 +545,8 @@
 		i2s2: i2s@12d70000 {
 			compatible = "samsung,exynos5420-i2s";
 			reg = <0x12D70000 0x100>;
-			dmas = <&pdma0 12
-				&pdma0 11>;
+			dmas = <&pdma0 12>,
+				<&pdma0 11>;
 			dma-names = "tx", "rx";
 			clocks = <&clock CLK_I2S2>, <&clock CLK_SCLK_I2S2>;
 			clock-names = "iis", "i2s_opclk0";
diff --git a/arch/arm/boot/dts/exynos5422-odroid-core.dtsi b/arch/arm/boot/dts/exynos5422-odroid-core.dtsi
index 829147e320e0..059fa32d1a8f 100644
--- a/arch/arm/boot/dts/exynos5422-odroid-core.dtsi
+++ b/arch/arm/boot/dts/exynos5422-odroid-core.dtsi
@@ -34,6 +34,98 @@
 			clock-frequency = <24000000>;
 		};
 	};
+
+	dmc_opp_table: opp_table2 {
+		compatible = "operating-points-v2";
+
+		opp00 {
+			opp-hz = /bits/ 64 <165000000>;
+			opp-microvolt = <875000>;
+		};
+		opp01 {
+			opp-hz = /bits/ 64 <206000000>;
+			opp-microvolt = <875000>;
+		};
+		opp02 {
+			opp-hz = /bits/ 64 <275000000>;
+			opp-microvolt = <875000>;
+		};
+		opp03 {
+			opp-hz = /bits/ 64 <413000000>;
+			opp-microvolt = <887500>;
+		};
+		opp04 {
+			opp-hz = /bits/ 64 <543000000>;
+			opp-microvolt = <937500>;
+		};
+		opp05 {
+			opp-hz = /bits/ 64 <633000000>;
+			opp-microvolt = <1012500>;
+		};
+		opp06 {
+			opp-hz = /bits/ 64 <728000000>;
+			opp-microvolt = <1037500>;
+		};
+		opp07 {
+			opp-hz = /bits/ 64 <825000000>;
+			opp-microvolt = <1050000>;
+		};
+	};
+
+	samsung_K3QF2F20DB: lpddr3 {
+		compatible	= "samsung,K3QF2F20DB", "jedec,lpddr3";
+		density		= <16384>;
+		io-width	= <32>;
+		#address-cells  = <1>;
+		#size-cells     = <0>;
+
+		tRFC-min-tck		= <17>;
+		tRRD-min-tck		= <2>;
+		tRPab-min-tck		= <2>;
+		tRPpb-min-tck		= <2>;
+		tRCD-min-tck		= <3>;
+		tRC-min-tck		= <6>;
+		tRAS-min-tck		= <5>;
+		tWTR-min-tck		= <2>;
+		tWR-min-tck		= <7>;
+		tRTP-min-tck		= <2>;
+		tW2W-C2C-min-tck	= <0>;
+		tR2R-C2C-min-tck	= <0>;
+		tWL-min-tck		= <8>;
+		tDQSCK-min-tck		= <5>;
+		tRL-min-tck		= <14>;
+		tFAW-min-tck		= <5>;
+		tXSR-min-tck		= <12>;
+		tXP-min-tck		= <2>;
+		tCKE-min-tck		= <2>;
+		tCKESR-min-tck		= <2>;
+		tMRD-min-tck		= <5>;
+
+		timings_samsung_K3QF2F20DB_800mhz: lpddr3-timings@800000000 {
+			compatible	= "jedec,lpddr3-timings";
+			/* workaround: 'reg' shows max-freq */
+			reg		= <800000000>;
+			min-freq	= <100000000>;
+			tRFC		= <65000>;
+			tRRD		= <6000>;
+			tRPab		= <12000>;
+			tRPpb		= <12000>;
+			tRCD		= <10000>;
+			tRC		= <33750>;
+			tRAS		= <23000>;
+			tWTR		= <3750>;
+			tWR		= <7500>;
+			tRTP		= <3750>;
+			tW2W-C2C	= <0>;
+			tR2R-C2C	= <0>;
+			tFAW		= <25000>;
+			tXSR		= <70000>;
+			tXP		= <3750>;
+			tCKE		= <3750>;
+			tCKESR		= <3750>;
+			tMRD		= <7000>;
+		};
+	};
 };
 
 &adc {
@@ -132,6 +224,15 @@
 	cpu-supply = <&buck2_reg>;
 };
 
+&dmc {
+	devfreq-events = <&ppmu_event3_dmc0_0>,	<&ppmu_event3_dmc0_1>,
+			<&ppmu_event3_dmc1_0>, <&ppmu_event3_dmc1_1>;
+	device-handle = <&samsung_K3QF2F20DB>;
+	operating-points-v2 = <&dmc_opp_table>;
+	vdd-supply = <&buck1_reg>;
+	status = "okay";
+};
+
 &hsi2c_4 {
 	status = "okay";
 
@@ -634,6 +735,22 @@
 	};
 };
 
+&ppmu_dmc0_0 {
+	status = "okay";
+};
+
+&ppmu_dmc0_1 {
+	status = "okay";
+};
+
+&ppmu_dmc1_0 {
+	status = "okay";
+};
+
+&ppmu_dmc1_1 {
+	status = "okay";
+};
+
 &tmu_cpu0 {
 	vtmu-supply = <&ldo7_reg>;
 };
diff --git a/arch/arm/boot/dts/exynos5422-odroidxu3-lite.dts b/arch/arm/boot/dts/exynos5422-odroidxu3-lite.dts
index c19b5a51ca44..a31ca2ef750f 100644
--- a/arch/arm/boot/dts/exynos5422-odroidxu3-lite.dts
+++ b/arch/arm/boot/dts/exynos5422-odroidxu3-lite.dts
@@ -26,6 +26,10 @@
 	status = "disabled";
 };
 
+&chipid {
+	samsung,asv-bin = <2>;
+};
+
 &pwm {
 	/*
 	 * PWM 0 -- fan
diff --git a/arch/arm/boot/dts/exynos54xx.dtsi b/arch/arm/boot/dts/exynos54xx.dtsi
index 9c3b63b7cac6..f78dee801cd9 100644
--- a/arch/arm/boot/dts/exynos54xx.dtsi
+++ b/arch/arm/boot/dts/exynos54xx.dtsi
@@ -45,8 +45,17 @@
 		status = "disabled";
 	};
 
+	timer: timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+		clock-frequency = <24000000>;
+	};
+
 	soc: soc {
-		sysram@2020000 {
+		sram@2020000 {
 			compatible = "mmio-sram";
 			reg = <0x02020000 0x54000>;
 			#address-cells = <1>;
@@ -64,30 +73,21 @@
 			};
 		};
 
-		mct: mct@101c0000 {
+		mct: timer@101c0000 {
 			compatible = "samsung,exynos4210-mct";
 			reg = <0x101c0000 0xb00>;
-			interrupt-parent = <&mct_map>;
-			interrupts = <0>, <1>, <2>, <3>, <4>, <5>, <6>, <7>,
-					<8>, <9>, <10>, <11>;
-
-			mct_map: mct-map {
-				#interrupt-cells = <1>;
-				#address-cells = <0>;
-				#size-cells = <0>;
-				interrupt-map = <0 &combiner 23 3>,
-						<1 &combiner 23 4>,
-						<2 &combiner 25 2>,
-						<3 &combiner 25 3>,
-						<4 &gic 0 120 IRQ_TYPE_LEVEL_HIGH>,
-						<5 &gic 0 121 IRQ_TYPE_LEVEL_HIGH>,
-						<6 &gic 0 122 IRQ_TYPE_LEVEL_HIGH>,
-						<7 &gic 0 123 IRQ_TYPE_LEVEL_HIGH>,
-						<8 &gic 0 128 IRQ_TYPE_LEVEL_HIGH>,
-						<9 &gic 0 129 IRQ_TYPE_LEVEL_HIGH>,
-						<10 &gic 0 130 IRQ_TYPE_LEVEL_HIGH>,
-						<11 &gic 0 131 IRQ_TYPE_LEVEL_HIGH>;
-			};
+			interrupts-extended = <&combiner 23 3>,
+					      <&combiner 23 4>,
+					      <&combiner 25 2>,
+					      <&combiner 25 3>,
+					      <&gic GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>,
+					      <&gic GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+					      <&gic GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
+					      <&gic GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>,
+					      <&gic GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+					      <&gic GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+					      <&gic GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+					      <&gic GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
 		};
 
 		watchdog: watchdog@101d0000 {
diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts
index 4398f2d1fe88..60ca3d685247 100644
--- a/arch/arm/boot/dts/exynos5800-peach-pi.dts
+++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts
@@ -1034,6 +1034,10 @@
 	status = "okay";
 };
 
+&timer {
+	arm,cpu-registers-not-fw-configured;
+};
+
 &tmu_cpu0 {
 	vtmu-supply = <&ldo10_reg>;
 };
diff --git a/arch/arm/boot/dts/exynos5800.dtsi b/arch/arm/boot/dts/exynos5800.dtsi
index de639eecc5c9..16177d815ee4 100644
--- a/arch/arm/boot/dts/exynos5800.dtsi
+++ b/arch/arm/boot/dts/exynos5800.dtsi
@@ -17,7 +17,7 @@
 };
 
 &clock {
-	compatible = "samsung,exynos5800-clock";
+	compatible = "samsung,exynos5800-clock", "syscon";
 };
 
 &cluster_a15_opp_table {
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index 3652f5556b29..f3464cf52e49 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -585,7 +585,7 @@
 			status = "disabled";
 		};
 
-		iram: iram@ffff4c00 {
+		iram: sram@ffff4c00 {
 			compatible = "mmio-sram";
 			reg = <0xffff4c00 0xb400>;
 		};
diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi
index d7f6fb764997..6b62f0745b82 100644
--- a/arch/arm/boot/dts/imx31.dtsi
+++ b/arch/arm/boot/dts/imx31.dtsi
@@ -55,7 +55,7 @@
 		interrupt-parent = <&avic>;
 		ranges;
 
-		iram: iram@1fffc000 {
+		iram: sram@1fffc000 {
 			compatible = "mmio-sram";
 			reg = <0x1fffc000 0x4000>;
 			#address-cells = <1>;
diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi
index 0a4b9a5d9a9c..dea86b98e9c3 100644
--- a/arch/arm/boot/dts/imx51.dtsi
+++ b/arch/arm/boot/dts/imx51.dtsi
@@ -116,7 +116,7 @@
 		interrupt-parent = <&tzic>;
 		ranges;
 
-		iram: iram@1ffe0000 {
+		iram: sram@1ffe0000 {
 			compatible = "mmio-sram";
 			reg = <0x1ffe0000 0x20000>;
 		};
diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi
index f00dda334976..9b4efcd82636 100644
--- a/arch/arm/boot/dts/imx53-qsb-common.dtsi
+++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi
@@ -18,34 +18,28 @@
 
 	display0: disp0 {
 		compatible = "fsl,imx-parallel-display";
-		interface-pix-fmt = "rgb565";
 		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_ipu_disp0>;
+
+		#address-cells = <1>;
+		#size-cells = <0>;
 		status = "disabled";
-		display-timings {
-			claawvga {
-				native-mode;
-				clock-frequency = <27000000>;
-				hactive = <800>;
-				vactive = <480>;
-				hback-porch = <40>;
-				hfront-porch = <60>;
-				vback-porch = <10>;
-				vfront-porch = <10>;
-				hsync-len = <20>;
-				vsync-len = <10>;
-				hsync-active = <0>;
-				vsync-active = <0>;
-				de-active = <1>;
-				pixelclk-active = <0>;
-			};
-		};
 
-		port {
+		port@0 {
+			reg = <0>;
+
 			display0_in: endpoint {
 				remote-endpoint = <&ipu_di0_disp0>;
 			};
 		};
+
+		port@1 {
+			reg = <1>;
+
+			display_out: endpoint {
+				remote-endpoint = <&panel_in>;
+			};
+		};
 	};
 
 	gpio-keys {
@@ -84,6 +78,16 @@
 		};
 	};
 
+	panel {
+		compatible = "sii,43wvf1g";
+
+		port {
+			panel_in: endpoint {
+				remote-endpoint = <&display_out>;
+			};
+		};
+	};
+
 	regulators {
 		compatible = "simple-bus";
 		#address-cells = <1>;
diff --git a/arch/arm/boot/dts/imx53-usbarmory.dts b/arch/arm/boot/dts/imx53-usbarmory.dts
index ee6263d1c2d3..f34993a490ee 100644
--- a/arch/arm/boot/dts/imx53-usbarmory.dts
+++ b/arch/arm/boot/dts/imx53-usbarmory.dts
@@ -120,7 +120,7 @@
 	};
 
 	/*
-	 * UART mode pin header configration
+	 * UART mode pin header configuration
 	 * 3 - GPIO5[26], pull-down 100K
 	 * 4 - GPIO5[27], pull-down 100K
 	 * 5 - TX, pull-up 100K
diff --git a/arch/arm/boot/dts/imx6dl-apf6dev.dts b/arch/arm/boot/dts/imx6dl-apf6dev.dts
index 6632e99fbb68..3dcce3454b08 100644
--- a/arch/arm/boot/dts/imx6dl-apf6dev.dts
+++ b/arch/arm/boot/dts/imx6dl-apf6dev.dts
@@ -1,49 +1,6 @@
-/*
- * Copyright 2015 Armadeus Systems
- *
- * This file is dual-licensed: you can use it either under the terms
- * of the GPL or the X11 license, at your option. Note that this dual
- * licensing only applies to this file, and not this project as a
- * whole.
- *
- *  a) This file is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This file is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public
- *     License along with this file; if not, write to the Free
- *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
- *     MA 02110-1301 USA
- *
- * Or, alternatively,
- *
- *  b) Permission is hereby granted, free of charge, to any person
- *     obtaining a copy of this software and associated documentation
- *     files (the "Software"), to deal in the Software without
- *     restriction, including without limitation the rights to use,
- *     copy, modify, merge, publish, distribute, sublicense, and/or
- *     sell copies of the Software, and to permit persons to whom the
- *     Software is furnished to do so, subject to the following
- *     conditions:
- *
- *     The above copyright notice and this permission notice shall be
- *     included in all copies or substantial portions of the Software.
- *
- *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- *     OTHER DEALINGS IN THE SOFTWARE.
- */
+// SPDX-License-Identifier: GPL-2.0+ OR MIT
+//
+// Copyright 2015 Armadeus Systems <support@armadeus.com>
 
 /dts-v1/;
 #include "imx6dl.dtsi"
diff --git a/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts b/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts
index 9a5d6c94cca4..cd075621de52 100644
--- a/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts
+++ b/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts
@@ -168,6 +168,21 @@
 &i2c3 {
 	status = "okay";
 
+	/*
+	 * Touchscreen is using SODIMM 28/30, also used for PWM<B>, PWM<C>,
+	 * aka pwm2, pwm3. so if you enable touchscreen, disable the pwms
+	 */
+	touchscreen@4a {
+		compatible = "atmel,maxtouch";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_pcap_1>;
+		reg = <0x4a>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <9 IRQ_TYPE_EDGE_FALLING>;		/* SODIMM 28 */
+		reset-gpios = <&gpio2 10 GPIO_ACTIVE_HIGH>;	/* SODIMM 30 */
+		status = "disabled";
+	};
+
 	/* M41T0M6 real time clock on carrier board */
 	rtc_i2c: rtc@68 {
 		compatible = "st,m41t0";
@@ -175,6 +190,30 @@
 	};
 };
 
+&iomuxc {
+	pinctrl-names = "default";
+	pinctrl-0 = <
+		&pinctrl_weim_gpio_1 &pinctrl_weim_gpio_2
+		&pinctrl_weim_gpio_3 &pinctrl_weim_gpio_4
+		&pinctrl_weim_gpio_5 &pinctrl_weim_gpio_6
+		&pinctrl_usbh_oc_1 &pinctrl_usbc_id_1
+	>;
+
+	pinctrl_pcap_1: pcap1grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_9__GPIO1_IO09	0x1b0b0 /* SODIMM 28 */
+			MX6QDL_PAD_SD4_DAT2__GPIO2_IO10	0x1b0b0 /* SODIMM 30 */
+		>;
+	};
+
+	pinctrl_mxt_ts: mxttsgrp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_CS1__GPIO2_IO24	0x130b0 /* SODIMM 107 */
+			MX6QDL_PAD_SD2_DAT1__GPIO1_IO14	0x130b0 /* SODIMM 106 */
+		>;
+	};
+};
+
 &ipu1_di0_disp0 {
 	remote-endpoint = <&lcd_display_in>;
 };
diff --git a/arch/arm/boot/dts/imx6dl-icore-mipi.dts b/arch/arm/boot/dts/imx6dl-icore-mipi.dts
index e43bccb78ab2..d8f3821a0ffd 100644
--- a/arch/arm/boot/dts/imx6dl-icore-mipi.dts
+++ b/arch/arm/boot/dts/imx6dl-icore-mipi.dts
@@ -8,7 +8,7 @@
 /dts-v1/;
 
 #include "imx6dl.dtsi"
-#include "imx6qdl-icore.dtsi"
+#include "imx6qdl-icore-1.5.dtsi"
 
 / {
 	model = "Engicam i.CoreM6 DualLite/Solo MIPI Starter Kit";
diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
index e8d800fec637..80ed5f16a76e 100644
--- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
+++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
@@ -4,6 +4,7 @@
 
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/input/input.h>
 #include <dt-bindings/pwm/pwm.h>
 
 / {
@@ -308,7 +309,7 @@
 	clock-frequency = <100000>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_i2c3>;
-	status = "disabled";
+	status = "okay";
 
 	oled: oled@3d {
 		compatible = "solomon,ssd1305fb-i2c";
@@ -330,6 +331,18 @@
 		vcc-supply = <&sw2_reg>;
 		status = "disabled";
 	};
+
+	touchkeys: keys@5a {
+		compatible = "fsl,mpr121-touchkey";
+		reg = <0x5a>;
+		vdd-supply = <&sw2_reg>;
+		autorepeat;
+		linux,keycodes = <KEY_1>, <KEY_2>, <KEY_3>, <KEY_4>, <KEY_5>,
+				<KEY_6>, <KEY_7>, <KEY_8>, <KEY_9>,
+				<KEY_BACKSPACE>, <KEY_0>, <KEY_ENTER>;
+		poll-interval = <50>;
+		status = "disabled";
+	};
 };
 
 &iomuxc {
@@ -447,6 +460,13 @@
 		>;
 	};
 
+	pinctrl_uart2: uart2grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_7__UART2_TX_DATA	0x1b098
+			MX6QDL_PAD_GPIO_8__UART2_RX_DATA	0x1b098
+		>;
+	};
+
 	pinctrl_usbh1: usbh1grp {
 		fsl,pins = <
 			MX6QDL_PAD_EIM_D30__USB_H1_OC	0x1b098
@@ -532,6 +552,12 @@
 	status = "okay";
 };
 
+&uart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart2>;
+	status = "okay";
+};
+
 &usbh1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usbh1>;
diff --git a/arch/arm/boot/dts/imx6dl-yapp4-hydra.dts b/arch/arm/boot/dts/imx6dl-yapp4-hydra.dts
index f97927064750..6010d3d872ab 100644
--- a/arch/arm/boot/dts/imx6dl-yapp4-hydra.dts
+++ b/arch/arm/boot/dts/imx6dl-yapp4-hydra.dts
@@ -25,10 +25,6 @@
 	status = "okay";
 };
 
-&i2c3 {
-	status = "okay";
-};
-
 &leds {
 	status = "okay";
 };
@@ -45,6 +41,10 @@
 	status = "okay";
 };
 
+&touchkeys {
+	status = "okay";
+};
+
 &usdhc3 {
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6dl.dtsi b/arch/arm/boot/dts/imx6dl.dtsi
index 2ed10310a7b7..008312ee0c31 100644
--- a/arch/arm/boot/dts/imx6dl.dtsi
+++ b/arch/arm/boot/dts/imx6dl.dtsi
@@ -64,6 +64,7 @@
 				396000	1175000
 			>;
 			clock-latency = <61036>; /* two CLK32 periods */
+			#cooling-cells = <2>;
 			clocks = <&clks IMX6QDL_CLK_ARM>,
 				 <&clks IMX6QDL_CLK_PLL2_PFD2_396M>,
 				 <&clks IMX6QDL_CLK_STEP>,
diff --git a/arch/arm/boot/dts/imx6q-apalis-eval.dts b/arch/arm/boot/dts/imx6q-apalis-eval.dts
index 0edd3043d9c1..4665e15b196d 100644
--- a/arch/arm/boot/dts/imx6q-apalis-eval.dts
+++ b/arch/arm/boot/dts/imx6q-apalis-eval.dts
@@ -167,6 +167,19 @@
 &i2c1 {
 	status = "okay";
 
+	/*
+	 * Touchscreen is using SODIMM 28/30, also used for PWM<B>, PWM<C>,
+	 * aka pwm2, pwm3. so if you enable touchscreen, disable the pwms
+	 */
+	touchscreen@4a {
+		compatible = "atmel,maxtouch";
+		reg = <0x4a>;
+		interrupt-parent = <&gpio6>;
+		interrupts = <10 IRQ_TYPE_EDGE_FALLING>;
+		reset-gpios = <&gpio6 9 GPIO_ACTIVE_HIGH>; /* SODIMM 13 */
+		status = "disabled";
+	};
+
 	pcie-switch@58 {
 		compatible = "plx,pex8605";
 		reg = <0x58>;
diff --git a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.1.dts b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.1.dts
index b94bb687be6b..a3fa04a97d81 100644
--- a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.1.dts
+++ b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.1.dts
@@ -172,6 +172,19 @@
 &i2c1 {
 	status = "okay";
 
+	/*
+	 * Touchscreen is using SODIMM 28/30, also used for PWM<B>, PWM<C>,
+	 * aka pwm2, pwm3. so if you enable touchscreen, disable the pwms
+	 */
+	touchscreen@4a {
+		compatible = "atmel,maxtouch";
+		reg = <0x4a>;
+		interrupt-parent = <&gpio6>;
+		interrupts = <10 IRQ_TYPE_EDGE_FALLING>;
+		reset-gpios = <&gpio6 9 GPIO_ACTIVE_HIGH>; /* SODIMM 13 */
+		status = "disabled";
+	};
+
 	/* M41T0M6 real time clock on carrier board */
 	rtc_i2c: rtc@68 {
 		compatible = "st,m41t0";
diff --git a/arch/arm/boot/dts/imx6q-apalis-ixora.dts b/arch/arm/boot/dts/imx6q-apalis-ixora.dts
index 302fd6adc8a7..5ba49d0f4880 100644
--- a/arch/arm/boot/dts/imx6q-apalis-ixora.dts
+++ b/arch/arm/boot/dts/imx6q-apalis-ixora.dts
@@ -171,6 +171,19 @@
 &i2c1 {
 	status = "okay";
 
+	/*
+	 * Touchscreen is using SODIMM 28/30, also used for PWM<B>, PWM<C>,
+	 * aka pwm2, pwm3. so if you enable touchscreen, disable the pwms
+	 */
+	touchscreen@4a {
+		compatible = "atmel,maxtouch";
+		reg = <0x4a>;
+		interrupt-parent = <&gpio6>;
+		interrupts = <10 IRQ_TYPE_EDGE_FALLING>;
+		reset-gpios = <&gpio6 9 GPIO_ACTIVE_HIGH>; /* SODIMM 13 */
+		status = "disabled";
+	};
+
 	eeprom@50 {
 		compatible = "atmel,24c02";
 		reg = <0x50>;
diff --git a/arch/arm/boot/dts/imx6q-apf6dev.dts b/arch/arm/boot/dts/imx6q-apf6dev.dts
index 07a36bb8075b..664b0af8f0bb 100644
--- a/arch/arm/boot/dts/imx6q-apf6dev.dts
+++ b/arch/arm/boot/dts/imx6q-apf6dev.dts
@@ -1,49 +1,6 @@
-/*
- * Copyright 2015 Armadeus Systems
- *
- * This file is dual-licensed: you can use it either under the terms
- * of the GPL or the X11 license, at your option. Note that this dual
- * licensing only applies to this file, and not this project as a
- * whole.
- *
- *  a) This file is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This file is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public
- *     License along with this file; if not, write to the Free
- *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
- *     MA 02110-1301 USA
- *
- * Or, alternatively,
- *
- *  b) Permission is hereby granted, free of charge, to any person
- *     obtaining a copy of this software and associated documentation
- *     files (the "Software"), to deal in the Software without
- *     restriction, including without limitation the rights to use,
- *     copy, modify, merge, publish, distribute, sublicense, and/or
- *     sell copies of the Software, and to permit persons to whom the
- *     Software is furnished to do so, subject to the following
- *     conditions:
- *
- *     The above copyright notice and this permission notice shall be
- *     included in all copies or substantial portions of the Software.
- *
- *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- *     OTHER DEALINGS IN THE SOFTWARE.
- */
+// SPDX-License-Identifier: GPL-2.0+ OR MIT
+//
+// Copyright 2015 Armadeus Systems <support@armadeus.com>
 
 /dts-v1/;
 #include "imx6q.dtsi"
diff --git a/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts b/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts
index 9c61e3be2d9a..bb74fc62d913 100644
--- a/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts
+++ b/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts
@@ -43,6 +43,14 @@
 	status = "okay";
 };
 
+&can1 {
+	status = "okay";
+};
+
+&can2 {
+	status = "disabled";
+};
+
 &hdmi {
 	ddc-i2c-bus = <&i2c2>;
 	status = "okay";
@@ -55,7 +63,7 @@
 		#sound-dai-cells = <0>;
 		clocks = <&clk_ext_audio_codec>;
 		VDDA-supply = <&reg_3p3v>;
-		VDDIO-supply = <&reg_3p3v>;
+		VDDIO-supply = <&sw2_reg>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi
index 387801dde02e..87f0aa897086 100644
--- a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi
+++ b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi
@@ -51,13 +51,11 @@
 &can1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_flexcan1>;
-	status = "okay";
 };
 
 &can2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_flexcan2>;
-	status = "okay";
 };
 
 &ecspi1 {
@@ -206,7 +204,7 @@
 	};
 
 	rtc@56 {
-		compatible = "rv3029c2";
+		compatible = "microcrystal,rv3029";
 		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_rtc_hw300>;
 		reg = <0x56>;
diff --git a/arch/arm/boot/dts/imx6q-gw54xx.dts b/arch/arm/boot/dts/imx6q-gw54xx.dts
index ecc3989f607b..d5d46908cf6e 100644
--- a/arch/arm/boot/dts/imx6q-gw54xx.dts
+++ b/arch/arm/boot/dts/imx6q-gw54xx.dts
@@ -15,19 +15,16 @@
 	sound-digital {
 		compatible = "simple-audio-card";
 		simple-audio-card,name = "tda1997x-audio";
+		simple-audio-card,format = "i2s";
+		simple-audio-card,bitclock-master = <&sound_codec>;
+		simple-audio-card,frame-master = <&sound_codec>;
 
-		simple-audio-card,dai-link@0 {
-			format = "i2s";
-
-			cpu {
-				sound-dai = <&ssi2>;
-			};
+		sound_cpu: simple-audio-card,cpu {
+			sound-dai = <&ssi2>;
+		};
 
-			codec {
-				bitclock-master;
-				frame-master;
-				sound-dai = <&hdmi_receiver>;
-			};
+		sound_codec: simple-audio-card,codec {
+			sound-dai = <&hdmi_receiver>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi
index d038f4117024..9d3be1cc6b64 100644
--- a/arch/arm/boot/dts/imx6q.dtsi
+++ b/arch/arm/boot/dts/imx6q.dtsi
@@ -73,6 +73,7 @@
 				396000	1175000
 			>;
 			clock-latency = <61036>; /* two CLK32 periods */
+			#cooling-cells = <2>;
 			clocks = <&clks IMX6QDL_CLK_ARM>,
 				 <&clks IMX6QDL_CLK_PLL2_PFD2_396M>,
 				 <&clks IMX6QDL_CLK_STEP>,
@@ -107,6 +108,7 @@
 				396000	1175000
 			>;
 			clock-latency = <61036>; /* two CLK32 periods */
+			#cooling-cells = <2>;
 			clocks = <&clks IMX6QDL_CLK_ARM>,
 				 <&clks IMX6QDL_CLK_PLL2_PFD2_396M>,
 				 <&clks IMX6QDL_CLK_STEP>,
@@ -141,6 +143,7 @@
 				396000	1175000
 			>;
 			clock-latency = <61036>; /* two CLK32 periods */
+			#cooling-cells = <2>;
 			clocks = <&clks IMX6QDL_CLK_ARM>,
 				 <&clks IMX6QDL_CLK_PLL2_PFD2_396M>,
 				 <&clks IMX6QDL_CLK_STEP>,
diff --git a/arch/arm/boot/dts/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/imx6qdl-apalis.dtsi
index 7c4ad541c3f5..ff1287e6b7ce 100644
--- a/arch/arm/boot/dts/imx6qdl-apalis.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-apalis.dtsi
@@ -148,14 +148,16 @@
 };
 
 &can1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_flexcan1>;
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&pinctrl_flexcan1_default>;
+	pinctrl-1 = <&pinctrl_flexcan1_sleep>;
 	status = "disabled";
 };
 
 &can2 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_flexcan2>;
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&pinctrl_flexcan2_default>;
+	pinctrl-1 = <&pinctrl_flexcan2_sleep>;
 	status = "disabled";
 };
 
@@ -205,8 +207,11 @@
 /* I2C1_SDA/SCL on MXM3 209/211 (e.g. RTC on carrier board) */
 &i2c1 {
 	clock-frequency = <100000>;
-	pinctrl-names = "default";
+	pinctrl-names = "default", "gpio";
 	pinctrl-0 = <&pinctrl_i2c1>;
+	pinctrl-1 = <&pinctrl_i2c1_gpio>;
+	scl-gpios = <&gpio5 27 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+	sda-gpios = <&gpio5 26 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
 	status = "disabled";
 };
 
@@ -216,8 +221,11 @@
  */
 &i2c2 {
 	clock-frequency = <100000>;
-	pinctrl-names = "default";
+	pinctrl-names = "default", "gpio";
 	pinctrl-0 = <&pinctrl_i2c2>;
+	pinctrl-1 = <&pinctrl_i2c2_gpio>;
+	scl-gpios = <&gpio4 12 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+	sda-gpios = <&gpio4 13 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
 	status = "okay";
 
 	pmic: pfuze100@8 {
@@ -372,9 +380,9 @@
  */
 &i2c3 {
 	clock-frequency = <100000>;
-	pinctrl-names = "default", "recovery";
+	pinctrl-names = "default", "gpio";
 	pinctrl-0 = <&pinctrl_i2c3>;
-	pinctrl-1 = <&pinctrl_i2c3_recovery>;
+	pinctrl-1 = <&pinctrl_i2c3_gpio>;
 	scl-gpios = <&gpio3 17 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
 	sda-gpios = <&gpio3 18 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
 	status = "disabled";
@@ -599,19 +607,32 @@
 		>;
 	};
 
-	pinctrl_flexcan1: flexcan1grp {
+	pinctrl_flexcan1_default: flexcan1defgrp {
 		fsl,pins = <
 			MX6QDL_PAD_GPIO_7__FLEXCAN1_TX 0x1b0b0
 			MX6QDL_PAD_GPIO_8__FLEXCAN1_RX 0x1b0b0
 		>;
 	};
 
-	pinctrl_flexcan2: flexcan2grp {
+	pinctrl_flexcan1_sleep: flexcan1slpgrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_7__GPIO1_IO07 0x0
+			MX6QDL_PAD_GPIO_8__GPIO1_IO08 0x0
+		>;
+	};
+
+	pinctrl_flexcan2_default: flexcan2defgrp {
 		fsl,pins = <
 			MX6QDL_PAD_KEY_COL4__FLEXCAN2_TX 0x1b0b0
 			MX6QDL_PAD_KEY_ROW4__FLEXCAN2_RX 0x1b0b0
 		>;
 	};
+	pinctrl_flexcan2_sleep: flexcan2slpgrp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL4__GPIO4_IO14 0x0
+			MX6QDL_PAD_KEY_ROW4__GPIO4_IO15 0x0
+		>;
+	};
 
 	pinctrl_gpio_bl_on: gpioblon {
 		fsl,pins = <
@@ -646,6 +667,13 @@
 		>;
 	};
 
+	pinctrl_i2c1_gpio: i2c1gpiogrp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT8__GPIO5_IO26 0x4001b8b1
+			MX6QDL_PAD_CSI0_DAT9__GPIO5_IO27 0x4001b8b1
+		>;
+	};
+
 	pinctrl_i2c2: i2c2grp {
 		fsl,pins = <
 			MX6QDL_PAD_KEY_COL3__I2C2_SCL 0x4001b8b1
@@ -653,6 +681,13 @@
 		>;
 	};
 
+	pinctrl_i2c2_gpio: i2c2gpiogrp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL3__GPIO4_IO12 0x4001b8b1
+			MX6QDL_PAD_KEY_ROW3__GPIO4_IO13 0x4001b8b1
+		>;
+	};
+
 	pinctrl_i2c3: i2c3grp {
 		fsl,pins = <
 			MX6QDL_PAD_EIM_D17__I2C3_SCL 0x4001b8b1
@@ -660,7 +695,7 @@
 		>;
 	};
 
-	pinctrl_i2c3_recovery: i2c3recoverygrp {
+	pinctrl_i2c3_gpio: i2c3gpiogrp {
 		fsl,pins = <
 			MX6QDL_PAD_EIM_D17__GPIO3_IO17 0x4001b8b1
 			MX6QDL_PAD_EIM_D18__GPIO3_IO18 0x4001b8b1
diff --git a/arch/arm/boot/dts/imx6qdl-apf6.dtsi b/arch/arm/boot/dts/imx6qdl-apf6.dtsi
index 4738c3c1ab50..b78ed7974ea9 100644
--- a/arch/arm/boot/dts/imx6qdl-apf6.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-apf6.dtsi
@@ -1,66 +1,56 @@
-/*
- * Copyright 2015 Armadeus Systems
- *
- * This file is dual-licensed: you can use it either under the terms
- * of the GPL or the X11 license, at your option. Note that this dual
- * licensing only applies to this file, and not this project as a
- * whole.
- *
- *  a) This file is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This file is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public
- *     License along with this file; if not, write to the Free
- *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
- *     MA 02110-1301 USA
- *
- * Or, alternatively,
- *
- *  b) Permission is hereby granted, free of charge, to any person
- *     obtaining a copy of this software and associated documentation
- *     files (the "Software"), to deal in the Software without
- *     restriction, including without limitation the rights to use,
- *     copy, modify, merge, publish, distribute, sublicense, and/or
- *     sell copies of the Software, and to permit persons to whom the
- *     Software is furnished to do so, subject to the following
- *     conditions:
- *
- *     The above copyright notice and this permission notice shall be
- *     included in all copies or substantial portions of the Software.
- *
- *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- *     OTHER DEALINGS IN THE SOFTWARE.
- */
+// SPDX-License-Identifier: GPL-2.0+ OR MIT
+//
+// Copyright 2015 Armadeus Systems <support@armadeus.com>
 
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/interrupt-controller/irq.h>
 
+/ {
+	reg_1p8v: regulator-1p8v {
+		compatible = "regulator-fixed";
+		regulator-name = "1P8V";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		regulator-always-on;
+		vin-supply = <&reg_3p3v>;
+	};
+
+	usdhc1_pwrseq: usdhc1-pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		reset-gpios = <&gpio2 8 GPIO_ACTIVE_LOW>;
+		post-power-on-delay-ms = <15>;
+		power-off-delay-us = <70>;
+	};
+};
+
 &fec {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet>;
 	phy-mode = "rgmii-id";
 	phy-reset-duration = <10>;
 	phy-reset-gpios = <&gpio1 24 GPIO_ACTIVE_LOW>;
+	phy-handle = <&ethphy1>;
 	status = "okay";
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethphy1: ethernet-phy@1 {
+			compatible = "ethernet-phy-ieee802.3-c22";
+			reg = <1>;
+			interrupt-parent = <&gpio1>;
+			interrupts = <28 IRQ_TYPE_LEVEL_LOW>;
+			status = "okay";
+		};
+	};
 };
 
 /* Bluetooth */
 &uart2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_uart2>;
+	uart-has-rtscts;
 	status = "okay";
 };
 
@@ -68,6 +58,12 @@
 &usdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc1>;
+	bus-width = <4>;
+	mmc-pwrseq = <&usdhc1_pwrseq>;
+	vmmc-supply = <&reg_3p3v>;
+	vqmmc-supply = <&reg_1p8v>;
+	cap-power-off-card;
+	keep-power-in-suspend;
 	non-removable;
 	status = "okay";
 
@@ -94,65 +90,63 @@
 };
 
 &iomuxc {
-	apf6 {
-		pinctrl_enet: enetgrp {
-			fsl,pins = <
-				MX6QDL_PAD_ENET_MDIO__ENET_MDIO		0x1b8b0
-				MX6QDL_PAD_ENET_MDC__ENET_MDC		0x1b0b0
-				MX6QDL_PAD_ENET_REF_CLK__ENET_TX_CLK	0x1b0b0
-				MX6QDL_PAD_ENET_RX_ER__GPIO1_IO24	0x130b0
-				MX6QDL_PAD_ENET_TX_EN__GPIO1_IO28	0x130b0
-				MX6QDL_PAD_RGMII_TXC__RGMII_TXC		0x1b030
-				MX6QDL_PAD_RGMII_TD0__RGMII_TD0		0x1b030
-				MX6QDL_PAD_RGMII_TD1__RGMII_TD1		0x1b030
-				MX6QDL_PAD_RGMII_TD2__RGMII_TD2		0x1b030
-				MX6QDL_PAD_RGMII_TD3__RGMII_TD3		0x1b030
-				MX6QDL_PAD_RGMII_TX_CTL__RGMII_TX_CTL	0x1b030
-				MX6QDL_PAD_RGMII_RXC__RGMII_RXC		0x13030
-				MX6QDL_PAD_RGMII_RD0__RGMII_RD0		0x1b030
-				MX6QDL_PAD_RGMII_RD1__RGMII_RD1		0x13030
-				MX6QDL_PAD_RGMII_RD2__RGMII_RD2		0x1f030
-				MX6QDL_PAD_RGMII_RD3__RGMII_RD3		0x1f030
-				MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL	0x13030
-			>;
-		};
+	pinctrl_enet: enetgrp {
+		fsl,pins = <
+			MX6QDL_PAD_ENET_MDIO__ENET_MDIO		0x1b8b0
+			MX6QDL_PAD_ENET_MDC__ENET_MDC		0x1b0b0
+			MX6QDL_PAD_ENET_REF_CLK__ENET_TX_CLK	0x1b0b0
+			MX6QDL_PAD_ENET_RX_ER__GPIO1_IO24	0x130b0
+			MX6QDL_PAD_ENET_TX_EN__GPIO1_IO28	0x130b0
+			MX6QDL_PAD_RGMII_TXC__RGMII_TXC		0x1b030
+			MX6QDL_PAD_RGMII_TD0__RGMII_TD0		0x1b030
+			MX6QDL_PAD_RGMII_TD1__RGMII_TD1		0x1b030
+			MX6QDL_PAD_RGMII_TD2__RGMII_TD2		0x1b030
+			MX6QDL_PAD_RGMII_TD3__RGMII_TD3		0x1b030
+			MX6QDL_PAD_RGMII_TX_CTL__RGMII_TX_CTL	0x1b030
+			MX6QDL_PAD_RGMII_RXC__RGMII_RXC		0x13030
+			MX6QDL_PAD_RGMII_RD0__RGMII_RD0		0x1b030
+			MX6QDL_PAD_RGMII_RD1__RGMII_RD1		0x13030
+			MX6QDL_PAD_RGMII_RD2__RGMII_RD2		0x1f030
+			MX6QDL_PAD_RGMII_RD3__RGMII_RD3		0x1f030
+			MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL	0x13030
+		>;
+	};
 
-		pinctrl_uart2: uart2grp {
-			fsl,pins = <
-				MX6QDL_PAD_SD4_DAT4__UART2_RX_DATA	0x1b0b0
-				MX6QDL_PAD_SD4_DAT5__UART2_RTS_B	0x1b0b0
-				MX6QDL_PAD_SD4_DAT6__UART2_CTS_B	0x1b0b0
-				MX6QDL_PAD_SD4_DAT7__UART2_TX_DATA	0x1b0b0
-				MX6QDL_PAD_SD4_DAT3__GPIO2_IO11		0x130b0 /* BT_EN */
-			>;
-		};
+	pinctrl_uart2: uart2grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD4_DAT4__UART2_RX_DATA	0x1b0b0
+			MX6QDL_PAD_SD4_DAT5__UART2_RTS_B	0x1b0b0
+			MX6QDL_PAD_SD4_DAT6__UART2_CTS_B	0x1b0b0
+			MX6QDL_PAD_SD4_DAT7__UART2_TX_DATA	0x1b0b0
+			MX6QDL_PAD_SD4_DAT3__GPIO2_IO11		0x130b0 /* BT_EN */
+		>;
+	};
 
-		pinctrl_usdhc1: usdhc1grp {
-			fsl,pins = <
-				MX6QDL_PAD_SD1_CMD__SD1_CMD	0x17059
-				MX6QDL_PAD_SD1_CLK__SD1_CLK	0x10059
-				MX6QDL_PAD_SD1_DAT0__SD1_DATA0	0x17059
-				MX6QDL_PAD_SD1_DAT1__SD1_DATA1	0x17059
-				MX6QDL_PAD_SD1_DAT2__SD1_DATA2	0x17059
-				MX6QDL_PAD_SD1_DAT3__SD1_DATA3	0x17059
-				MX6QDL_PAD_SD4_DAT0__GPIO2_IO08	0x1b0b0 /* WL_EN */
-				MX6QDL_PAD_SD4_DAT2__GPIO2_IO10	0x1b0b0 /* WL_IRQ */
-			>;
-		};
+	pinctrl_usdhc1: usdhc1grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD1_CMD__SD1_CMD	0x17059
+			MX6QDL_PAD_SD1_CLK__SD1_CLK	0x10059
+			MX6QDL_PAD_SD1_DAT0__SD1_DATA0	0x17059
+			MX6QDL_PAD_SD1_DAT1__SD1_DATA1	0x17059
+			MX6QDL_PAD_SD1_DAT2__SD1_DATA2	0x17059
+			MX6QDL_PAD_SD1_DAT3__SD1_DATA3	0x17059
+			MX6QDL_PAD_SD4_DAT0__GPIO2_IO08	0x130b0 /* WL_EN */
+			MX6QDL_PAD_SD4_DAT2__GPIO2_IO10	0x130b0 /* WL_IRQ */
+		>;
+	};
 
-		pinctrl_usdhc3: usdhc3grp {
-			fsl,pins = <
-				MX6QDL_PAD_SD3_CMD__SD3_CMD	0x17059
-				MX6QDL_PAD_SD3_CLK__SD3_CLK	0x10059
-				MX6QDL_PAD_SD3_DAT0__SD3_DATA0	0x17059
-				MX6QDL_PAD_SD3_DAT1__SD3_DATA1	0x17059
-				MX6QDL_PAD_SD3_DAT2__SD3_DATA2	0x17059
-				MX6QDL_PAD_SD3_DAT3__SD3_DATA3	0x17059
-				MX6QDL_PAD_SD3_DAT4__SD3_DATA4	0x17059
-				MX6QDL_PAD_SD3_DAT5__SD3_DATA5	0x17059
-				MX6QDL_PAD_SD3_DAT6__SD3_DATA6	0x17059
-				MX6QDL_PAD_SD3_DAT7__SD3_DATA7	0x17059
-			>;
-		};
+	pinctrl_usdhc3: usdhc3grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD3_CMD__SD3_CMD	0x17059
+			MX6QDL_PAD_SD3_CLK__SD3_CLK	0x10059
+			MX6QDL_PAD_SD3_DAT0__SD3_DATA0	0x17059
+			MX6QDL_PAD_SD3_DAT1__SD3_DATA1	0x17059
+			MX6QDL_PAD_SD3_DAT2__SD3_DATA2	0x17059
+			MX6QDL_PAD_SD3_DAT3__SD3_DATA3	0x17059
+			MX6QDL_PAD_SD3_DAT4__SD3_DATA4	0x17059
+			MX6QDL_PAD_SD3_DAT5__SD3_DATA5	0x17059
+			MX6QDL_PAD_SD3_DAT6__SD3_DATA6	0x17059
+			MX6QDL_PAD_SD3_DAT7__SD3_DATA7	0x17059
+		>;
 	};
 };
diff --git a/arch/arm/boot/dts/imx6qdl-apf6dev.dtsi b/arch/arm/boot/dts/imx6qdl-apf6dev.dtsi
index 9fc1fa449f64..b8e74ab3c993 100644
--- a/arch/arm/boot/dts/imx6qdl-apf6dev.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-apf6dev.dtsi
@@ -1,49 +1,6 @@
-/*
- * Copyright 2015 Armadeus Systems
- *
- * This file is dual-licensed: you can use it either under the terms
- * of the GPL or the X11 license, at your option. Note that this dual
- * licensing only applies to this file, and not this project as a
- * whole.
- *
- *  a) This file is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This file is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public
- *     License along with this file; if not, write to the Free
- *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
- *     MA 02110-1301 USA
- *
- * Or, alternatively,
- *
- *  b) Permission is hereby granted, free of charge, to any person
- *     obtaining a copy of this software and associated documentation
- *     files (the "Software"), to deal in the Software without
- *     restriction, including without limitation the rights to use,
- *     copy, modify, merge, publish, distribute, sublicense, and/or
- *     sell copies of the Software, and to permit persons to whom the
- *     Software is furnished to do so, subject to the following
- *     conditions:
- *
- *     The above copyright notice and this permission notice shall be
- *     included in all copies or substantial portions of the Software.
- *
- *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- *     OTHER DEALINGS IN THE SOFTWARE.
- */
+// SPDX-License-Identifier: GPL-2.0+ OR MIT
+//
+// Copyright 2015 Armadeus Systems <support@armadeus.com>
 
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/input/input.h>
@@ -54,35 +11,37 @@
 		stdout-path = &uart4;
 	};
 
+	backlight: backlight {
+		compatible = "pwm-backlight";
+		pwms = <&pwm3 0 191000>;
+		brightness-levels = <0 4 8 16 32 64 128 255>;
+		default-brightness-level = <0>;
+		power-supply = <&reg_5v>;
+	};
+
 	disp0 {
 		compatible = "fsl,imx-parallel-display";
-		interface-pix-fmt = "bgr666";
 		pinctrl-names = "default";
-		pinctrl-0 = <&pinctrl_ipu1_disp1>;
-
-		display-timings {
-			lw700 {
-				clock-frequency = <33000033>;
-				hactive = <800>;
-				vactive = <480>;
-				hback-porch = <96>;
-				hfront-porch = <96>;
-				vback-porch = <20>;
-				vfront-porch = <21>;
-				hsync-len = <64>;
-				vsync-len = <4>;
-				hsync-active = <1>;
-				vsync-active = <1>;
-				de-active = <1>;
-				pixelclk-active = <1>;
-			};
-		};
+		pinctrl-0 = <&pinctrl_ipu1_disp0>;
+
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port@0 {
+			reg = <0>;
 
-		port {
 			display_in: endpoint {
 				remote-endpoint = <&ipu1_di0_disp0>;
 			};
 		};
+
+		port@1 {
+			reg = <1>;
+
+			display_out: endpoint {
+				remote-endpoint = <&panel_in>;
+			};
+		};
 	};
 
 	gpio-keys {
@@ -111,17 +70,30 @@
 		};
 	};
 
+	panel {
+		compatible = "armadeus,st0700-adapt";
+		power-supply = <&reg_3p3v>;
+		backlight = <&backlight>;
+
+		port {
+			panel_in: endpoint {
+				remote-endpoint = <&display_out>;
+			};
+		};
+	};
+
 	reg_3p3v: regulator-3p3v {
 		compatible = "regulator-fixed";
 		regulator-name = "3P3V";
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
 		regulator-always-on;
+		vin-supply = <&reg_5v>;
 	};
 
-	reg_usbh1_vbus: regulator-usb-h1-vbus {
+	reg_5v: regulator-5v {
 		compatible = "regulator-fixed";
-		regulator-name = "usb_h1_vbus";
+		regulator-name = "5V";
 		regulator-min-microvolt = <5000000>;
 		regulator-max-microvolt = <5000000>;
 		regulator-always-on;
@@ -166,6 +138,7 @@
 &can2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_flexcan2>;
+	xceiver-supply = <&reg_5v>;
 	status = "okay";
 };
 
@@ -212,6 +185,11 @@
 		VDDA-supply = <&reg_3p3v>;
 		VDDIO-supply = <&reg_3p3v>;
 	};
+
+	rtc@6f {
+		compatible = "microchip,mcp7940x";
+		reg = <0x6f>;
+	};
 };
 
 &i2c3 {
@@ -261,7 +239,7 @@
 };
 
 &usbh1 {
-	vbus-supply = <&reg_usbh1_vbus>;
+	vbus-supply = <&reg_5v>;
 	phy_type = "utmi";
 	status = "okay";
 };
@@ -297,178 +275,176 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_gpios>;
 
-	apf6dev {
-		pinctrl_audmux: audmuxgrp {
-			fsl,pins = <
-				MX6QDL_PAD_CSI0_DAT4__AUD3_TXC  0x1b0b0
-				MX6QDL_PAD_CSI0_DAT5__AUD3_TXD  0x1b0b0
-				MX6QDL_PAD_CSI0_DAT6__AUD3_TXFS 0x1b0b0
-				MX6QDL_PAD_CSI0_DAT7__AUD3_RXD  0x1b0b0
-				MX6QDL_PAD_GPIO_0__CCM_CLKO1    0x130b0
-			>;
-		};
+	pinctrl_audmux: audmuxgrp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT4__AUD3_TXC  0x1b0b0
+			MX6QDL_PAD_CSI0_DAT5__AUD3_TXD  0x1b0b0
+			MX6QDL_PAD_CSI0_DAT6__AUD3_TXFS 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT7__AUD3_RXD  0x1b0b0
+			MX6QDL_PAD_GPIO_0__CCM_CLKO1    0x130b0
+		>;
+	};
 
-		pinctrl_ecspi1: ecspi1grp {
-			fsl,pins = <
-				MX6QDL_PAD_KEY_COL1__ECSPI1_MISO 0x100b1
-				MX6QDL_PAD_KEY_ROW0__ECSPI1_MOSI 0x100b1
-				MX6QDL_PAD_KEY_COL0__ECSPI1_SCLK 0x100b1
-				MX6QDL_PAD_KEY_ROW1__GPIO4_IO09  0x1b0b0
-				MX6QDL_PAD_KEY_ROW2__GPIO4_IO11  0x1b0b0
-				MX6QDL_PAD_KEY_COL2__GPIO4_IO10  0x1b0b0
-			>;
-		};
+	pinctrl_ecspi1: ecspi1grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL1__ECSPI1_MISO 0x100b1
+			MX6QDL_PAD_KEY_ROW0__ECSPI1_MOSI 0x100b1
+			MX6QDL_PAD_KEY_COL0__ECSPI1_SCLK 0x100b1
+			MX6QDL_PAD_KEY_ROW1__GPIO4_IO09  0x1b0b0
+			MX6QDL_PAD_KEY_ROW2__GPIO4_IO11  0x1b0b0
+			MX6QDL_PAD_KEY_COL2__GPIO4_IO10  0x1b0b0
+		>;
+	};
 
-		pinctrl_flexcan2: flexcan2grp {
-			fsl,pins = <
-				MX6QDL_PAD_KEY_COL4__FLEXCAN2_TX 0x1b0b0
-				MX6QDL_PAD_KEY_ROW4__FLEXCAN2_RX 0x1b0b0
-			>;
-		};
+	pinctrl_flexcan2: flexcan2grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL4__FLEXCAN2_TX 0x1b0b0
+			MX6QDL_PAD_KEY_ROW4__FLEXCAN2_RX 0x1b0b0
+		>;
+	};
 
-		pinctrl_gpio_keys: gpiokeysgrp {
-			fsl,pins = <
-				MX6QDL_PAD_GPIO_9__GPIO1_IO09 0x1b0b0
-			>;
-		};
+	pinctrl_gpio_keys: gpiokeysgrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_9__GPIO1_IO09 0x1b0b0
+		>;
+	};
 
-		pinctrl_gpio_leds: gpioledsgrp {
-			fsl,pins = <
-				MX6QDL_PAD_GPIO_17__GPIO7_IO12 0x130b0
-			>;
-		};
+	pinctrl_gpio_leds: gpioledsgrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_17__GPIO7_IO12 0x130b0
+		>;
+	};
 
-		pinctrl_gpios: gpiosgrp {
-			fsl,pins = <
-				MX6QDL_PAD_DI0_PIN4__GPIO4_IO20		0x100b1
-				MX6QDL_PAD_DISP0_DAT18__GPIO5_IO12	0x100b1
-				MX6QDL_PAD_DISP0_DAT19__GPIO5_IO13	0x100b1
-				MX6QDL_PAD_DISP0_DAT20__GPIO5_IO14	0x100b1
-				MX6QDL_PAD_DISP0_DAT21__GPIO5_IO15	0x100b1
-				MX6QDL_PAD_DISP0_DAT22__GPIO5_IO16	0x100b1
-				MX6QDL_PAD_DISP0_DAT23__GPIO5_IO17	0x100b1
-				MX6QDL_PAD_CSI0_PIXCLK__GPIO5_IO18	0x100b1
-				MX6QDL_PAD_CSI0_VSYNC__GPIO5_IO21	0x100b1
-			>;
-		};
+	pinctrl_gpios: gpiosgrp {
+		fsl,pins = <
+			MX6QDL_PAD_DI0_PIN4__GPIO4_IO20		0x100b1
+			MX6QDL_PAD_DISP0_DAT18__GPIO5_IO12	0x100b1
+			MX6QDL_PAD_DISP0_DAT19__GPIO5_IO13	0x100b1
+			MX6QDL_PAD_DISP0_DAT20__GPIO5_IO14	0x100b1
+			MX6QDL_PAD_DISP0_DAT21__GPIO5_IO15	0x100b1
+			MX6QDL_PAD_DISP0_DAT22__GPIO5_IO16	0x100b1
+			MX6QDL_PAD_DISP0_DAT23__GPIO5_IO17	0x100b1
+			MX6QDL_PAD_CSI0_PIXCLK__GPIO5_IO18	0x100b1
+			MX6QDL_PAD_CSI0_VSYNC__GPIO5_IO21	0x100b1
+		>;
+	};
 
-		pinctrl_gsm: gsmgrp {
-			fsl,pins = <
-				MX6QDL_PAD_GPIO_4__GPIO1_IO04  0x130b0 /* GSM_POKIN */
-				MX6QDL_PAD_GPIO_18__GPIO7_IO13 0x130b0 /* GSM_PWR_EN */
-			>;
-		};
+	pinctrl_gsm: gsmgrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_4__GPIO1_IO04  0x130b0 /* GSM_POKIN */
+			MX6QDL_PAD_GPIO_18__GPIO7_IO13 0x130b0 /* GSM_PWR_EN */
+		>;
+	};
 
-		pinctrl_i2c1: i2c1grp {
-			fsl,pins = <
-				MX6QDL_PAD_CSI0_DAT8__I2C1_SDA 0x4001b8b1
-				MX6QDL_PAD_CSI0_DAT9__I2C1_SCL 0x4001b8b1
-			>;
-		};
+	pinctrl_i2c1: i2c1grp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT8__I2C1_SDA 0x4001b8b1
+			MX6QDL_PAD_CSI0_DAT9__I2C1_SCL 0x4001b8b1
+		>;
+	};
 
-		pinctrl_i2c2: i2c2grp {
-			fsl,pins = <
-				MX6QDL_PAD_KEY_COL3__I2C2_SCL 0x4001b8b1
-				MX6QDL_PAD_KEY_ROW3__I2C2_SDA 0x4001b8b1
-			>;
-		};
+	pinctrl_i2c2: i2c2grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL3__I2C2_SCL 0x4001b8b1
+			MX6QDL_PAD_KEY_ROW3__I2C2_SDA 0x4001b8b1
+		>;
+	};
 
-		pinctrl_i2c3: i2c3grp {
-			fsl,pins = <
-				MX6QDL_PAD_GPIO_6__I2C3_SDA 0x4001b8b1
-				MX6QDL_PAD_GPIO_5__I2C3_SCL 0x4001b8b1
-			>;
-		};
+	pinctrl_i2c3: i2c3grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_6__I2C3_SDA 0x4001b8b1
+			MX6QDL_PAD_GPIO_5__I2C3_SCL 0x4001b8b1
+		>;
+	};
 
-		pinctrl_ipu1_disp1: ipu1disp1grp {
-			fsl,pins = <
-				MX6QDL_PAD_DI0_DISP_CLK__IPU1_DI0_DISP_CLK	0x100b1
-				MX6QDL_PAD_DI0_PIN15__IPU1_DI0_PIN15		0x100b1
-				MX6QDL_PAD_DI0_PIN2__IPU1_DI0_PIN02		0x100b1
-				MX6QDL_PAD_DI0_PIN3__IPU1_DI0_PIN03		0x100b1
-				MX6QDL_PAD_DISP0_DAT0__IPU1_DISP0_DATA00	0x100b1
-				MX6QDL_PAD_DISP0_DAT1__IPU1_DISP0_DATA01	0x100b1
-				MX6QDL_PAD_DISP0_DAT2__IPU1_DISP0_DATA02	0x100b1
-				MX6QDL_PAD_DISP0_DAT3__IPU1_DISP0_DATA03	0x100b1
-				MX6QDL_PAD_DISP0_DAT4__IPU1_DISP0_DATA04	0x100b1
-				MX6QDL_PAD_DISP0_DAT5__IPU1_DISP0_DATA05	0x100b1
-				MX6QDL_PAD_DISP0_DAT6__IPU1_DISP0_DATA06	0x100b1
-				MX6QDL_PAD_DISP0_DAT7__IPU1_DISP0_DATA07	0x100b1
-				MX6QDL_PAD_DISP0_DAT8__IPU1_DISP0_DATA08	0x100b1
-				MX6QDL_PAD_DISP0_DAT9__IPU1_DISP0_DATA09	0x100b1
-				MX6QDL_PAD_DISP0_DAT10__IPU1_DISP0_DATA10	0x100b1
-				MX6QDL_PAD_DISP0_DAT11__IPU1_DISP0_DATA11	0x100b1
-				MX6QDL_PAD_DISP0_DAT12__IPU1_DISP0_DATA12	0x100b1
-				MX6QDL_PAD_DISP0_DAT13__IPU1_DISP0_DATA13	0x100b1
-				MX6QDL_PAD_DISP0_DAT14__IPU1_DISP0_DATA14	0x100b1
-				MX6QDL_PAD_DISP0_DAT15__IPU1_DISP0_DATA15	0x100b1
-				MX6QDL_PAD_DISP0_DAT16__IPU1_DISP0_DATA16	0x100b1
-				MX6QDL_PAD_DISP0_DAT17__IPU1_DISP0_DATA17	0x100b1
-			>;
-		};
+	pinctrl_ipu1_disp0: ipu1disp0grp {
+		fsl,pins = <
+			MX6QDL_PAD_DI0_DISP_CLK__IPU1_DI0_DISP_CLK	0x100b1
+			MX6QDL_PAD_DI0_PIN15__IPU1_DI0_PIN15		0x100b1
+			MX6QDL_PAD_DI0_PIN2__IPU1_DI0_PIN02		0x100b1
+			MX6QDL_PAD_DI0_PIN3__IPU1_DI0_PIN03		0x100b1
+			MX6QDL_PAD_DISP0_DAT0__IPU1_DISP0_DATA00	0x100b1
+			MX6QDL_PAD_DISP0_DAT1__IPU1_DISP0_DATA01	0x100b1
+			MX6QDL_PAD_DISP0_DAT2__IPU1_DISP0_DATA02	0x100b1
+			MX6QDL_PAD_DISP0_DAT3__IPU1_DISP0_DATA03	0x100b1
+			MX6QDL_PAD_DISP0_DAT4__IPU1_DISP0_DATA04	0x100b1
+			MX6QDL_PAD_DISP0_DAT5__IPU1_DISP0_DATA05	0x100b1
+			MX6QDL_PAD_DISP0_DAT6__IPU1_DISP0_DATA06	0x100b1
+			MX6QDL_PAD_DISP0_DAT7__IPU1_DISP0_DATA07	0x100b1
+			MX6QDL_PAD_DISP0_DAT8__IPU1_DISP0_DATA08	0x100b1
+			MX6QDL_PAD_DISP0_DAT9__IPU1_DISP0_DATA09	0x100b1
+			MX6QDL_PAD_DISP0_DAT10__IPU1_DISP0_DATA10	0x100b1
+			MX6QDL_PAD_DISP0_DAT11__IPU1_DISP0_DATA11	0x100b1
+			MX6QDL_PAD_DISP0_DAT12__IPU1_DISP0_DATA12	0x100b1
+			MX6QDL_PAD_DISP0_DAT13__IPU1_DISP0_DATA13	0x100b1
+			MX6QDL_PAD_DISP0_DAT14__IPU1_DISP0_DATA14	0x100b1
+			MX6QDL_PAD_DISP0_DAT15__IPU1_DISP0_DATA15	0x100b1
+			MX6QDL_PAD_DISP0_DAT16__IPU1_DISP0_DATA16	0x100b1
+			MX6QDL_PAD_DISP0_DAT17__IPU1_DISP0_DATA17	0x100b1
+		>;
+	};
 
-		pinctrl_pcie: pciegrp {
-			fsl,pins = <
-				MX6QDL_PAD_CSI0_DAT16__GPIO6_IO02 0x130b0
-			>;
-		};
+	pinctrl_pcie: pciegrp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT16__GPIO6_IO02 0x130b0
+		>;
+	};
 
-		pinctrl_pwm3: pwm3grp {
-			fsl,pins = <
-				MX6QDL_PAD_SD4_DAT1__PWM3_OUT 0x1b0b1
-			>;
-		};
+	pinctrl_pwm3: pwm3grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD4_DAT1__PWM3_OUT 0x1b0b1
+		>;
+	};
 
-		pinctrl_uart1: uart1grp {
-			fsl,pins = <
-				MX6QDL_PAD_CSI0_DAT10__UART1_TX_DATA 0x1b0b0
-				MX6QDL_PAD_CSI0_DAT11__UART1_RX_DATA 0x1b0b0
-			>;
-		};
+	pinctrl_uart1: uart1grp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT10__UART1_TX_DATA 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT11__UART1_RX_DATA 0x1b0b0
+		>;
+	};
 
-		pinctrl_uart3: uart3grp {
-			fsl,pins = <
-				MX6QDL_PAD_EIM_D23__UART3_CTS_B   0x1b0b0
-				MX6QDL_PAD_EIM_D24__UART3_TX_DATA 0x1b0b0
-				MX6QDL_PAD_EIM_D25__UART3_RX_DATA 0x1b0b0
-				MX6QDL_PAD_EIM_D31__UART3_RTS_B   0x1b0b0
-			>;
-		};
+	pinctrl_uart3: uart3grp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_D23__UART3_CTS_B   0x1b0b0
+			MX6QDL_PAD_EIM_D24__UART3_TX_DATA 0x1b0b0
+			MX6QDL_PAD_EIM_D25__UART3_RX_DATA 0x1b0b0
+			MX6QDL_PAD_EIM_D31__UART3_RTS_B   0x1b0b0
+		>;
+	};
 
-		pinctrl_uart4: uart4grp {
-			fsl,pins = <
-				MX6QDL_PAD_CSI0_DAT12__UART4_TX_DATA 0x1b0b0
-				MX6QDL_PAD_CSI0_DAT13__UART4_RX_DATA 0x1b0b0
-			>;
-		};
+	pinctrl_uart4: uart4grp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT12__UART4_TX_DATA 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT13__UART4_RX_DATA 0x1b0b0
+		>;
+	};
 
-		pinctrl_usbotg: usbotggrp {
-			fsl,pins = <
-				MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x1b0b0
-			>;
-		};
+	pinctrl_usbotg: usbotggrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x1b0b0
+		>;
+	};
 
-		pinctrl_usdhc2: usdhc2grp {
-			fsl,pins = <
-				MX6QDL_PAD_SD2_CMD__SD2_CMD    0x17059
-				MX6QDL_PAD_SD2_CLK__SD2_CLK    0x10059
-				MX6QDL_PAD_SD2_DAT0__SD2_DATA0 0x17059
-				MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17059
-				MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17059
-				MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17059
-			>;
-		};
+	pinctrl_usdhc2: usdhc2grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD2_CMD__SD2_CMD    0x17059
+			MX6QDL_PAD_SD2_CLK__SD2_CLK    0x10059
+			MX6QDL_PAD_SD2_DAT0__SD2_DATA0 0x17059
+			MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17059
+			MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17059
+			MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17059
+		>;
+	};
 
-		pinctrl_spdif: spdifgrp {
-			fsl,pins = <
-				MX6QDL_PAD_GPIO_19__SPDIF_OUT 0x1b0b0
-			>;
-		};
+	pinctrl_spdif: spdifgrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_19__SPDIF_OUT 0x1b0b0
+		>;
+	};
 
-		pinctrl_touchscreen: touchscreengrp {
-			fsl,pins = <
-				MX6QDL_PAD_CSI0_DAT17__GPIO6_IO03 0x1b0b0
-			>;
-		};
+	pinctrl_touchscreen: touchscreengrp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT17__GPIO6_IO03 0x1b0b0
+		>;
 	};
 };
diff --git a/arch/arm/boot/dts/imx6qdl-colibri.dtsi b/arch/arm/boot/dts/imx6qdl-colibri.dtsi
index 019dda6b88ad..d03dff23863d 100644
--- a/arch/arm/boot/dts/imx6qdl-colibri.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-colibri.dtsi
@@ -166,8 +166,11 @@
  */
 &i2c2 {
 	clock-frequency = <100000>;
-	pinctrl-names = "default";
+	pinctrl-names = "default", "gpio";
 	pinctrl-0 = <&pinctrl_i2c2>;
+	pinctrl-0 = <&pinctrl_i2c2_gpio>;
+	scl-gpios = <&gpio2 30 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+	sda-gpios = <&gpio3 16 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
 	status = "okay";
 
 	pmic: pfuze100@8 {
@@ -312,9 +315,9 @@
  */
 &i2c3 {
 	clock-frequency = <100000>;
-	pinctrl-names = "default", "recovery";
+	pinctrl-names = "default", "gpio";
 	pinctrl-0 = <&pinctrl_i2c3>;
-	pinctrl-1 = <&pinctrl_i2c3_recovery>;
+	pinctrl-1 = <&pinctrl_i2c3_gpio>;
 	scl-gpios = <&gpio1 3 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
 	sda-gpios = <&gpio1 6 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
 	status = "disabled";
@@ -426,6 +429,9 @@
 };
 
 &iomuxc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbh_oc_1>;
+
 	pinctrl_audmux: audmuxgrp {
 		fsl,pins = <
 			MX6QDL_PAD_KEY_COL0__AUD5_TXC	0x130b0
@@ -509,6 +515,13 @@
 		>;
 	};
 
+	pinctrl_i2c2_gpio: i2c2grp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_EB2__GPIO2_IO30 0x4001b8b1
+			MX6QDL_PAD_EIM_D16__GPIO3_IO16 0x4001b8b1
+		>;
+	};
+
 	pinctrl_i2c3: i2c3grp {
 		fsl,pins = <
 			MX6QDL_PAD_GPIO_3__I2C3_SCL 0x4001b8b1
@@ -516,7 +529,7 @@
 		>;
 	};
 
-	pinctrl_i2c3_recovery: i2c3recoverygrp {
+	pinctrl_i2c3_gpio: i2c3gpiogrp {
 		fsl,pins = <
 			MX6QDL_PAD_GPIO_3__GPIO1_IO03 0x4001b8b1
 			MX6QDL_PAD_GPIO_6__GPIO1_IO06 0x4001b8b1
@@ -615,6 +628,13 @@
 		>;
 	};
 
+	pinctrl_usbh_oc_1: usbhoc1grp {
+		fsl,pins = <
+			/* USBH_OC */
+			MX6QDL_PAD_EIM_D30__GPIO3_IO30		0x1b0b0
+		>;
+	};
+
 	pinctrl_spdif: spdifgrp {
 		fsl,pins = <
 			MX6QDL_PAD_GPIO_17__SPDIF_OUT 0x1b0b0
@@ -681,6 +701,13 @@
 		>;
 	};
 
+	pinctrl_usbc_id_1: usbc_id-1 {
+		fsl,pins = <
+			/* USBC_ID */
+			MX6QDL_PAD_NANDF_D2__GPIO2_IO02		0x1b0b0
+		>;
+	};
+
 	pinctrl_usdhc1: usdhc1grp {
 		fsl,pins = <
 			MX6QDL_PAD_SD1_CMD__SD1_CMD	0x17071
diff --git a/arch/arm/boot/dts/imx6qdl-gw551x.dtsi b/arch/arm/boot/dts/imx6qdl-gw551x.dtsi
index c23ba229fd05..c38e86eedcc0 100644
--- a/arch/arm/boot/dts/imx6qdl-gw551x.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw551x.dtsi
@@ -105,19 +105,16 @@
 	sound-digital {
 		compatible = "simple-audio-card";
 		simple-audio-card,name = "tda1997x-audio";
+		simple-audio-card,format = "i2s";
+		simple-audio-card,bitclock-master = <&sound_codec>;
+		simple-audio-card,frame-master = <&sound_codec>;
 
-		simple-audio-card,dai-link@0 {
-			format = "i2s";
-
-			cpu {
-				sound-dai = <&ssi2>;
-			};
+		sound_cpu: simple-audio-card,cpu {
+			sound-dai = <&ssi2>;
+		};
 
-			codec {
-				bitclock-master;
-				frame-master;
-				sound-dai = <&hdmi_receiver>;
-			};
+		sound_codec: simple-audio-card,codec {
+			sound-dai = <&hdmi_receiver>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/imx6qdl-rex.dtsi b/arch/arm/boot/dts/imx6qdl-rex.dtsi
index 97f1659144ea..de514eb5aa99 100644
--- a/arch/arm/boot/dts/imx6qdl-rex.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-rex.dtsi
@@ -132,6 +132,19 @@
 	pinctrl-0 = <&pinctrl_i2c2>;
 	status = "okay";
 
+	pca9535: gpio-expander@27 {
+		compatible = "nxp,pca9535";
+		reg = <0x27>;
+		gpio-controller;
+		#gpio-cells = <2>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_pca9535>;
+		interrupt-parent = <&gpio6>;
+		interrupts = <16 IRQ_TYPE_LEVEL_LOW>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
 	eeprom@57 {
 		compatible = "atmel,24c02";
 		reg = <0x57>;
@@ -237,6 +250,12 @@
 			>;
 		};
 
+		pinctrl_pca9535: pca9535grp {
+			fsl,pins = <
+				MX6QDL_PAD_NANDF_CS3__GPIO6_IO16	0x17059
+		   >;
+		};
+
 		pinctrl_uart1: uart1grp {
 			fsl,pins = <
 				MX6QDL_PAD_CSI0_DAT10__UART1_TX_DATA	0x1b0b1
diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
index 71ca76a5e4a5..fe59dde41b64 100644
--- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
@@ -749,10 +749,6 @@
 	vin-supply = <&vgen5_reg>;
 };
 
-&reg_vdd3p0 {
-	vin-supply = <&sw2_reg>;
-};
-
 &reg_vdd2p5 {
 	vin-supply = <&vgen5_reg>;
 };
diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
index 776bfc77f89d..828dd20cd27d 100644
--- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
@@ -210,6 +210,14 @@
 			>;
 		};
 
+		pinctrl_usbotg: usbotg {
+			fsl,pins = <
+				MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x17059
+				MX6QDL_PAD_EIM_D22__USB_OTG_PWR 0x17059
+				MX6QDL_PAD_EIM_D21__USB_OTG_OC 0x17059
+			>;
+		};
+
 		pinctrl_usdhc3: usdhc3grp {
 			fsl,pins = <
 				MX6QDL_PAD_SD3_CMD__SD3_CMD		0x17059
@@ -287,6 +295,12 @@
 	status = "okay";
 };
 
+&usbotg {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbotg>;
+	status = "okay";
+};
+
 &usdhc3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3>;
diff --git a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi
index 2cfb4112a467..c070893c509e 100644
--- a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi
@@ -279,8 +279,18 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet>;
 	phy-mode = "rgmii-id";
+	phy-handle = <&ethphy>;
 	phy-reset-gpios = <&gpio3 29 GPIO_ACTIVE_LOW>;
 	status = "okay";
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethphy: ethernet-phy@1 {
+			reg = <1>;
+		};
+	};
 };
 
 &mipi_csi {
diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi
index 93be00a60c88..a2a4f33a3e3e 100644
--- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi
@@ -358,8 +358,10 @@
 		compatible = "fsl,mma8451";
 		reg = <0x1c>;
 		interrupt-parent = <&gpio1>;
-		interrupt-names = "int1", "int2";
-		interrupts = <18 IRQ_TYPE_LEVEL_LOW>, <20 IRQ_TYPE_LEVEL_LOW>;
+		interrupt-names = "INT2";
+		interrupts = <20 IRQ_TYPE_LEVEL_LOW>;
+		vdd-supply = <&reg_3p3v>;
+		vddio-supply = <&reg_3p3v>;
 	};
 
 	hpa2: amp@60 {
@@ -849,7 +851,6 @@
 &iomuxc {
 	pinctrl_accel: accelgrp {
 		fsl,pins = <
-			MX6QDL_PAD_SD1_CMD__GPIO1_IO18		0x4001b000
 			MX6QDL_PAD_SD1_CLK__GPIO1_IO20		0x4001b000
 		>;
 	};
diff --git a/arch/arm/boot/dts/imx6sl-evk.dts b/arch/arm/boot/dts/imx6sl-evk.dts
index 4829aa682aeb..bc86cfaaa9c2 100644
--- a/arch/arm/boot/dts/imx6sl-evk.dts
+++ b/arch/arm/boot/dts/imx6sl-evk.dts
@@ -584,10 +584,6 @@
 	vin-supply = <&sw2_reg>;
 };
 
-&reg_vdd3p0 {
-	vin-supply = <&sw2_reg>;
-};
-
 &reg_vdd2p5 {
 	vin-supply = <&sw2_reg>;
 };
diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi
index 3a96b5538a2a..59c54e6ad09a 100644
--- a/arch/arm/boot/dts/imx6sl.dtsi
+++ b/arch/arm/boot/dts/imx6sl.dtsi
@@ -525,7 +525,7 @@
 			anatop: anatop@20c8000 {
 				compatible = "fsl,imx6sl-anatop",
 					     "fsl,imx6q-anatop",
-					     "syscon", "simple-bus";
+					     "syscon", "simple-mfd";
 				reg = <0x020c8000 0x1000>;
 				interrupts = <0 49 IRQ_TYPE_LEVEL_HIGH>,
 					     <0 54 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/imx6sll-evk.dts b/arch/arm/boot/dts/imx6sll-evk.dts
index 3e1d32fdf4b8..5ace9e6acf85 100644
--- a/arch/arm/boot/dts/imx6sll-evk.dts
+++ b/arch/arm/boot/dts/imx6sll-evk.dts
@@ -265,10 +265,6 @@
 	status = "okay";
 };
 
-&reg_3p0 {
-	vin-supply = <&sw2_reg>;
-};
-
 &snvs_poweroff {
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6sll-kobo-clarahd.dts b/arch/arm/boot/dts/imx6sll-kobo-clarahd.dts
new file mode 100644
index 000000000000..7214d1c98249
--- /dev/null
+++ b/arch/arm/boot/dts/imx6sll-kobo-clarahd.dts
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: (GPL-2.0)
+/*
+ * Device tree for the Kobo Clara HD ebook reader
+ *
+ * Name on mainboard is: 37NB-E60K00+4A4
+ * Serials start with: E60K02 (a number also seen in
+ * vendor kernel sources)
+ *
+ * This mainboard seems to be equipped with different SoCs.
+ * In the Kobo Clara HD ebook reader it is an i.MX6SLL
+ *
+ * Copyright 2019 Andreas Kemnade
+ * based on works
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/gpio/gpio.h>
+#include "imx6sll.dtsi"
+#include "e60k02.dtsi"
+
+/ {
+	model = "Kobo Clara HD";
+	compatible = "kobo,clarahd", "fsl,imx6sll";
+};
+
+&clks {
+	assigned-clocks = <&clks IMX6SLL_CLK_PLL4_AUDIO_DIV>;
+	assigned-clock-rates = <393216000>;
+};
+
+&cpu0 {
+	arm-supply = <&dcdc3_reg>;
+	soc-supply = <&dcdc1_reg>;
+};
+
+&gpio_keys {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_gpio_keys>;
+};
+
+&i2c1 {
+	pinctrl-names = "default","sleep";
+	pinctrl-0 = <&pinctrl_i2c1>;
+	pinctrl-1 = <&pinctrl_i2c1_sleep>;
+};
+
+&i2c2 {
+	pinctrl-names = "default","sleep";
+	pinctrl-0 = <&pinctrl_i2c2>;
+	pinctrl-1 = <&pinctrl_i2c2_sleep>;
+};
+
+&i2c3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c3>;
+};
+
+&iomuxc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_hog>;
+
+	pinctrl_gpio_keys: gpio-keysgrp {
+		fsl,pins = <
+			MX6SLL_PAD_SD1_DATA1__GPIO5_IO08	0x17059	/* PWR_SW */
+			MX6SLL_PAD_SD1_DATA4__GPIO5_IO12	0x17059	/* HALL_EN */
+		>;
+	};
+
+	pinctrl_hog: hoggrp {
+		fsl,pins = <
+			MX6SLL_PAD_LCD_DATA00__GPIO2_IO20	0x79
+			MX6SLL_PAD_LCD_DATA01__GPIO2_IO21	0x79
+			MX6SLL_PAD_LCD_DATA02__GPIO2_IO22	0x79
+			MX6SLL_PAD_LCD_DATA03__GPIO2_IO23	0x79
+			MX6SLL_PAD_LCD_DATA04__GPIO2_IO24	0x79
+			MX6SLL_PAD_LCD_DATA05__GPIO2_IO25	0x79
+			MX6SLL_PAD_LCD_DATA06__GPIO2_IO26	0x79
+			MX6SLL_PAD_LCD_DATA07__GPIO2_IO27	0x79
+			MX6SLL_PAD_LCD_DATA08__GPIO2_IO28	0x79
+			MX6SLL_PAD_LCD_DATA09__GPIO2_IO29	0x79
+			MX6SLL_PAD_LCD_DATA10__GPIO2_IO30	0x79
+			MX6SLL_PAD_LCD_DATA11__GPIO2_IO31	0x79
+			MX6SLL_PAD_LCD_DATA12__GPIO3_IO00	0x79
+			MX6SLL_PAD_LCD_DATA13__GPIO3_IO01	0x79
+			MX6SLL_PAD_LCD_DATA14__GPIO3_IO02	0x79
+			MX6SLL_PAD_LCD_DATA15__GPIO3_IO03	0x79
+			MX6SLL_PAD_LCD_DATA16__GPIO3_IO04	0x79
+			MX6SLL_PAD_LCD_DATA17__GPIO3_IO05	0x79
+			MX6SLL_PAD_LCD_DATA18__GPIO3_IO06	0x79
+			MX6SLL_PAD_LCD_DATA19__GPIO3_IO07	0x79
+			MX6SLL_PAD_LCD_DATA20__GPIO3_IO08	0x79
+			MX6SLL_PAD_LCD_DATA21__GPIO3_IO09	0x79
+			MX6SLL_PAD_LCD_DATA22__GPIO3_IO10	0x79
+			MX6SLL_PAD_LCD_DATA23__GPIO3_IO11	0x79
+			MX6SLL_PAD_LCD_CLK__GPIO2_IO15		0x79
+			MX6SLL_PAD_LCD_ENABLE__GPIO2_IO16	0x79
+			MX6SLL_PAD_LCD_HSYNC__GPIO2_IO17	0x79
+			MX6SLL_PAD_LCD_VSYNC__GPIO2_IO18	0x79
+			MX6SLL_PAD_LCD_RESET__GPIO2_IO19	0x79
+			MX6SLL_PAD_KEY_COL3__GPIO3_IO30		0x79
+			MX6SLL_PAD_KEY_ROW7__GPIO4_IO07		0x79
+			MX6SLL_PAD_ECSPI2_MOSI__GPIO4_IO13	0x79
+			MX6SLL_PAD_KEY_COL5__GPIO4_IO02		0x79
+			MX6SLL_PAD_KEY_ROW6__GPIO4_IO05		0x79
+		>;
+	};
+
+	pinctrl_i2c1: i2c1grp {
+		fsl,pins = <
+			MX6SLL_PAD_I2C1_SCL__I2C1_SCL	0x4001f8b1
+			MX6SLL_PAD_I2C1_SDA__I2C1_SDA	0x4001f8b1
+		>;
+	};
+
+	pinctrl_i2c1_sleep: i2c1grp-sleep {
+		fsl,pins = <
+			MX6SLL_PAD_I2C1_SCL__I2C1_SCL	0x400108b1
+			MX6SLL_PAD_I2C1_SDA__I2C1_SDA	0x400108b1
+		>;
+	};
+
+	pinctrl_i2c2: i2c2grp {
+		fsl,pins = <
+			MX6SLL_PAD_I2C2_SCL__I2C2_SCL	0x4001f8b1
+			MX6SLL_PAD_I2C2_SDA__I2C2_SDA	0x4001f8b1
+		>;
+	};
+
+	pinctrl_i2c2_sleep: i2c2grp-sleep {
+		fsl,pins = <
+			MX6SLL_PAD_I2C2_SCL__I2C2_SCL	0x400108b1
+			MX6SLL_PAD_I2C2_SDA__I2C2_SDA	0x400108b1
+		>;
+	};
+
+	pinctrl_i2c3: i2c3grp {
+		fsl,pins = <
+			MX6SLL_PAD_REF_CLK_24M__I2C3_SCL 0x4001f8b1
+			MX6SLL_PAD_REF_CLK_32K__I2C3_SDA 0x4001f8b1
+		>;
+	};
+
+	pinctrl_led: ledgrp {
+		fsl,pins = <
+			MX6SLL_PAD_SD1_DATA6__GPIO5_IO07 0x17059
+		>;
+	};
+
+	pinctrl_lm3630a_bl_gpio: lm3630a-bl-gpiogrp {
+		fsl,pins = <
+			MX6SLL_PAD_EPDC_PWR_CTRL3__GPIO2_IO10	0x10059 /* HWEN */
+		>;
+	};
+
+	pinctrl_ricoh_gpio: ricoh-gpiogrp {
+		fsl,pins = <
+			MX6SLL_PAD_SD1_CLK__GPIO5_IO15	0x1b8b1 /* ricoh619 chg */
+			MX6SLL_PAD_SD1_DATA0__GPIO5_IO11 0x1b8b1 /* ricoh619 irq */
+			MX6SLL_PAD_KEY_COL2__GPIO3_IO28	0x1b8b1 /* ricoh619 bat_low_int */
+		>;
+	};
+
+	pinctrl_uart1: uart1grp {
+		fsl,pins = <
+			MX6SLL_PAD_UART1_TXD__UART1_DCE_TX 0x1b0b1
+			MX6SLL_PAD_UART1_RXD__UART1_DCE_RX 0x1b0b1
+		>;
+	};
+
+	pinctrl_usbotg1: usbotg1grp {
+		fsl,pins = <
+			MX6SLL_PAD_EPDC_PWR_COM__USB_OTG1_ID 0x17059
+		>;
+	};
+
+	pinctrl_usdhc2: usdhc2grp {
+		fsl,pins = <
+			MX6SLL_PAD_SD2_CMD__SD2_CMD		0x17059
+			MX6SLL_PAD_SD2_CLK__SD2_CLK		0x13059
+			MX6SLL_PAD_SD2_DATA0__SD2_DATA0		0x17059
+			MX6SLL_PAD_SD2_DATA1__SD2_DATA1		0x17059
+			MX6SLL_PAD_SD2_DATA2__SD2_DATA2		0x17059
+			MX6SLL_PAD_SD2_DATA3__SD2_DATA3		0x17059
+		>;
+	};
+
+	pinctrl_usdhc2_100mhz: usdhc2grp-100mhz {
+		fsl,pins = <
+			MX6SLL_PAD_SD2_CMD__SD2_CMD		0x170b9
+			MX6SLL_PAD_SD2_CLK__SD2_CLK		0x130b9
+			MX6SLL_PAD_SD2_DATA0__SD2_DATA0		0x170b9
+			MX6SLL_PAD_SD2_DATA1__SD2_DATA1		0x170b9
+			MX6SLL_PAD_SD2_DATA2__SD2_DATA2		0x170b9
+			MX6SLL_PAD_SD2_DATA3__SD2_DATA3		0x170b9
+		>;
+	};
+
+	pinctrl_usdhc2_200mhz: usdhc2grp-200mhz {
+		fsl,pins = <
+			MX6SLL_PAD_SD2_CMD__SD2_CMD		0x170f9
+			MX6SLL_PAD_SD2_CLK__SD2_CLK		0x130f9
+			MX6SLL_PAD_SD2_DATA0__SD2_DATA0		0x170f9
+			MX6SLL_PAD_SD2_DATA1__SD2_DATA1		0x170f9
+			MX6SLL_PAD_SD2_DATA2__SD2_DATA2		0x170f9
+			MX6SLL_PAD_SD2_DATA3__SD2_DATA3		0x170f9
+		>;
+	};
+
+	pinctrl_usdhc2_sleep: usdhc2grp-sleep {
+		fsl,pins = <
+			MX6SLL_PAD_SD2_CMD__GPIO5_IO04		0x100f9
+			MX6SLL_PAD_SD2_CLK__GPIO5_IO05		0x100f9
+			MX6SLL_PAD_SD2_DATA0__GPIO5_IO01	0x100f9
+			MX6SLL_PAD_SD2_DATA1__GPIO4_IO30	0x100f9
+			MX6SLL_PAD_SD2_DATA2__GPIO5_IO03	0x100f9
+			MX6SLL_PAD_SD2_DATA3__GPIO4_IO28	0x100f9
+		>;
+	};
+
+	pinctrl_usdhc3: usdhc3grp {
+		fsl,pins = <
+			MX6SLL_PAD_SD3_CMD__SD3_CMD	0x11059
+			MX6SLL_PAD_SD3_CLK__SD3_CLK	0x11059
+			MX6SLL_PAD_SD3_DATA0__SD3_DATA0	0x11059
+			MX6SLL_PAD_SD3_DATA1__SD3_DATA1	0x11059
+			MX6SLL_PAD_SD3_DATA2__SD3_DATA2	0x11059
+			MX6SLL_PAD_SD3_DATA3__SD3_DATA3	0x11059
+		>;
+	};
+
+	pinctrl_usdhc3_100mhz: usdhc3grp-100mhz {
+		fsl,pins = <
+			MX6SLL_PAD_SD3_CMD__SD3_CMD	0x170b9
+			MX6SLL_PAD_SD3_CLK__SD3_CLK	0x170b9
+			MX6SLL_PAD_SD3_DATA0__SD3_DATA0	0x170b9
+			MX6SLL_PAD_SD3_DATA1__SD3_DATA1	0x170b9
+			MX6SLL_PAD_SD3_DATA2__SD3_DATA2	0x170b9
+			MX6SLL_PAD_SD3_DATA3__SD3_DATA3	0x170b9
+		>;
+	};
+
+	pinctrl_usdhc3_200mhz: usdhc3grp-200mhz {
+		fsl,pins = <
+			MX6SLL_PAD_SD3_CMD__SD3_CMD	0x170f9
+			MX6SLL_PAD_SD3_CLK__SD3_CLK	0x170f9
+			MX6SLL_PAD_SD3_DATA0__SD3_DATA0	0x170f9
+			MX6SLL_PAD_SD3_DATA1__SD3_DATA1	0x170f9
+			MX6SLL_PAD_SD3_DATA2__SD3_DATA2	0x170f9
+			MX6SLL_PAD_SD3_DATA3__SD3_DATA3	0x170f9
+		>;
+	};
+
+	pinctrl_usdhc3_sleep: usdhc3grp-sleep {
+		fsl,pins = <
+			MX6SLL_PAD_SD3_CMD__GPIO5_IO21	0x100c1
+			MX6SLL_PAD_SD3_CLK__GPIO5_IO18	0x100c1
+			MX6SLL_PAD_SD3_DATA0__GPIO5_IO19	0x100c1
+			MX6SLL_PAD_SD3_DATA1__GPIO5_IO20	0x100c1
+			MX6SLL_PAD_SD3_DATA2__GPIO5_IO16	0x100c1
+			MX6SLL_PAD_SD3_DATA3__GPIO5_IO17	0x100c1
+		>;
+	};
+
+	pinctrl_wifi_power: wifi-powergrp {
+		fsl,pins = <
+			MX6SLL_PAD_SD2_DATA6__GPIO4_IO29	0x10059		/* WIFI_3V3_ON */
+		>;
+	};
+
+	pinctrl_wifi_reset: wifi-resetgrp {
+		fsl,pins = <
+			MX6SLL_PAD_SD2_DATA7__GPIO5_IO00	0x10059		/* WIFI_RST */
+		>;
+	};
+};
+
+&leds {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_led>;
+};
+
+&lm3630a {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_lm3630a_bl_gpio>;
+};
+
+&reg_wifi {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_wifi_power>;
+};
+
+&ricoh619 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ricoh_gpio>;
+};
+
+&uart1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart1>;
+};
+
+&usdhc2 {
+	pinctrl-names = "default", "state_100mhz", "state_200mhz","sleep";
+	pinctrl-0 = <&pinctrl_usdhc2>;
+	pinctrl-1 = <&pinctrl_usdhc2_100mhz>;
+	pinctrl-2 = <&pinctrl_usdhc2_200mhz>;
+	pinctrl-3 = <&pinctrl_usdhc2_sleep>;
+};
+
+&usdhc3 {
+	pinctrl-names = "default", "state_100mhz", "state_200mhz","sleep";
+	pinctrl-0 = <&pinctrl_usdhc3>;
+	pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
+	pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
+	pinctrl-3 = <&pinctrl_usdhc3_sleep>;
+};
+
+&wifi_pwrseq {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_wifi_reset>;
+};
diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi
index 13c7ba7fa6bc..85aa8bb98528 100644
--- a/arch/arm/boot/dts/imx6sll.dtsi
+++ b/arch/arm/boot/dts/imx6sll.dtsi
@@ -507,7 +507,7 @@
 			anatop: anatop@20c8000 {
 				compatible = "fsl,imx6sll-anatop",
 					     "fsl,imx6q-anatop",
-					     "syscon", "simple-bus";
+					     "syscon", "simple-mfd";
 				reg = <0x020c8000 0x4000>;
 				interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/imx6sx-sdb-reva.dts b/arch/arm/boot/dts/imx6sx-sdb-reva.dts
index f1830ed387a5..91a7548fdb8d 100644
--- a/arch/arm/boot/dts/imx6sx-sdb-reva.dts
+++ b/arch/arm/boot/dts/imx6sx-sdb-reva.dts
@@ -159,10 +159,6 @@
 	vin-supply = <&vgen6_reg>;
 };
 
-&reg_vdd3p0 {
-	vin-supply = <&sw2_reg>;
-};
-
 &reg_vdd2p5 {
 	vin-supply = <&vgen6_reg>;
 };
diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts
index a8ee7087af5a..5a63ca615722 100644
--- a/arch/arm/boot/dts/imx6sx-sdb.dts
+++ b/arch/arm/boot/dts/imx6sx-sdb.dts
@@ -141,10 +141,6 @@
 	vin-supply = <&vgen6_reg>;
 };
 
-&reg_vdd3p0 {
-	vin-supply = <&sw2_reg>;
-};
-
 &reg_vdd2p5 {
 	vin-supply = <&vgen6_reg>;
 };
diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi
index 531a52c1e987..59bad60a47dc 100644
--- a/arch/arm/boot/dts/imx6sx.dtsi
+++ b/arch/arm/boot/dts/imx6sx.dtsi
@@ -594,7 +594,7 @@
 
 			anatop: anatop@20c8000 {
 				compatible = "fsl,imx6sx-anatop", "fsl,imx6q-anatop",
-					     "syscon", "simple-bus";
+					     "syscon", "simple-mfd";
 				reg = <0x020c8000 0x1000>;
 				interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
index c2a9dd57e56a..212144511b66 100644
--- a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
+++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
@@ -30,6 +30,28 @@
 		enable-active-high;
 	};
 
+	reg_peri_3v3: regulator-peri-3v3 {
+		compatible = "regulator-fixed";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_peri_3v3>;
+		regulator-name = "VPERI_3V3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		gpio = <&gpio5 2 GPIO_ACTIVE_LOW>;
+		/*
+		 * If you want to want to make this dynamic please
+		 * check schematics and test all affected peripherals:
+		 *
+		 * - sensors
+		 * - ethernet phy
+		 * - can
+		 * - bluetooth
+		 * - wm8960 audio codec
+		 * - ov5640 camera
+		 */
+		regulator-always-on;
+	};
+
 	reg_can_3v3: regulator-can-3v3 {
 		compatible = "regulator-fixed";
 		regulator-name = "can-3v3";
@@ -130,6 +152,7 @@
 	pinctrl-0 = <&pinctrl_enet1>;
 	phy-mode = "rmii";
 	phy-handle = <&ethphy0>;
+	phy-supply = <&reg_peri_3v3>;
 	status = "okay";
 };
 
@@ -138,6 +161,7 @@
 	pinctrl-0 = <&pinctrl_enet2>;
 	phy-mode = "rmii";
 	phy-handle = <&ethphy1>;
+	phy-supply = <&reg_peri_3v3>;
 	status = "okay";
 
 	mdio {
@@ -180,9 +204,11 @@
 	pinctrl-0 = <&pinctrl_i2c1>;
 	status = "okay";
 
-	mag3110@e {
+	magnetometer@e {
 		compatible = "fsl,mag3110";
 		reg = <0x0e>;
+		vdd-supply = <&reg_peri_3v3>;
+		vddio-supply = <&reg_peri_3v3>;
 	};
 };
 
@@ -215,7 +241,7 @@
 	flash0: n25q256a@0 {
 		#address-cells = <1>;
 		#size-cells = <1>;
-		compatible = "micron,n25q256a";
+		compatible = "micron,n25q256a", "jedec,spi-nor";
 		spi-max-frequency = <29000000>;
 		spi-rx-bus-width = <4>;
 		spi-tx-bus-width = <4>;
@@ -266,6 +292,8 @@
 
 &usbotg1 {
 	dr_mode = "otg";
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usb_otg1>;
 	status = "okay";
 };
 
@@ -448,6 +476,12 @@
 		>;
 	};
 
+	pinctrl_peri_3v3: peri3v3grp {
+		fsl,pins = <
+			MX6UL_PAD_SNVS_TAMPER2__GPIO5_IO02	0x1b0b0
+		>;
+	};
+
 	pinctrl_pwm1: pwm1grp {
 		fsl,pins = <
 			MX6UL_PAD_GPIO1_IO08__PWM1_OUT   0x110b0
@@ -499,6 +533,12 @@
 		>;
 	};
 
+	pinctrl_usb_otg1: usbotg1grp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO00__ANATOP_OTG1_ID	0x17059
+		>;
+	};
+
 	pinctrl_usdhc1: usdhc1grp {
 		fsl,pins = <
 			MX6UL_PAD_SD1_CMD__USDHC1_CMD     	0x17059
diff --git a/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi b/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi
new file mode 100644
index 000000000000..f2386dcb9ff2
--- /dev/null
+++ b/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+//
+// Copyright 2019 Armadeus Systems <support@armadeus.com>
+
+/ {
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x80000000 0>; /* will be filled by U-Boot */
+	};
+
+	reg_3v3: regulator-3v3 {
+		compatible = "regulator-fixed";
+		regulator-name = "3V3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
+
+	usdhc3_pwrseq: usdhc3-pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		reset-gpios = <&gpio2 9 GPIO_ACTIVE_LOW>;
+	};
+};
+
+&fec1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_enet1>;
+	phy-mode = "rmii";
+	phy-reset-duration = <1>;
+	phy-reset-gpios = <&gpio4 2 GPIO_ACTIVE_LOW>;
+	phy-handle = <&ethphy1>;
+	phy-supply = <&reg_3v3>;
+	status = "okay";
+
+	mdio: mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethphy1: ethernet-phy@1 {
+			compatible = "ethernet-phy-ieee802.3-c22";
+			reg = <1>;
+			interrupt-parent = <&gpio4>;
+			interrupts = <16 IRQ_TYPE_LEVEL_LOW>;
+			status = "okay";
+		};
+	};
+};
+
+/* Bluetooth */
+&uart8 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart8>;
+	uart-has-rtscts;
+	status = "okay";
+};
+
+/* eMMC */
+&usdhc1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc1>;
+	bus-width = <8>;
+	no-1-8-v;
+	non-removable;
+	status = "okay";
+};
+
+/* WiFi */
+&usdhc2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc2>;
+	bus-width = <4>;
+	no-1-8-v;
+	non-removable;
+	mmc-pwrseq = <&usdhc3_pwrseq>;
+	status = "okay";
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	brcmf: wifi@1 {
+		compatible = "brcm,bcm4329-fmac";
+		reg = <1>;
+		interrupt-parent = <&gpio2>;
+		interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
+		interrupt-names = "host-wake";
+	};
+};
+
+&iomuxc {
+	pinctrl_enet1: enet1grp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO06__ENET1_MDIO	0x1b0b0
+			MX6UL_PAD_GPIO1_IO07__ENET1_MDC		0x1b0b0
+			MX6UL_PAD_ENET1_RX_ER__ENET1_RX_ER	0x130b0
+			MX6UL_PAD_ENET1_RX_EN__ENET1_RX_EN	0x130b0
+			MX6UL_PAD_ENET1_RX_DATA1__ENET1_RDATA01	0x130b0
+			MX6UL_PAD_ENET1_RX_DATA0__ENET1_RDATA00	0x130b0
+			MX6UL_PAD_ENET1_TX_DATA0__ENET1_TDATA00	0x1b0b0
+			MX6UL_PAD_ENET1_TX_DATA1__ENET1_TDATA01	0x1b0b0
+			MX6UL_PAD_ENET1_TX_EN__ENET1_TX_EN	0x1b0b0
+			/* INT# */
+			MX6UL_PAD_NAND_DQS__GPIO4_IO16		0x1b0b0
+			/* RST# */
+			MX6UL_PAD_NAND_DATA00__GPIO4_IO02	0x130b0
+			MX6UL_PAD_ENET1_TX_CLK__ENET1_REF_CLK1	0x4001b031
+		>;
+	};
+
+	pinctrl_uart8: uart8grp {
+		fsl,pins = <
+			MX6UL_PAD_ENET2_TX_EN__UART8_DCE_RX	0x1b0b0
+			MX6UL_PAD_ENET2_TX_DATA1__UART8_DCE_TX	0x1b0b0
+			MX6UL_PAD_ENET2_RX_ER__UART8_DCE_RTS	0x1b0b0
+			MX6UL_PAD_ENET2_TX_CLK__UART8_DCE_CTS	0x1b0b0
+			/* BT_REG_ON */
+			MX6UL_PAD_ENET2_RX_EN__GPIO2_IO10	0x130b0
+		>;
+	};
+
+	pinctrl_usdhc1: usdhc1grp {
+		fsl,pins = <
+			MX6UL_PAD_SD1_CMD__USDHC1_CMD		0x17059
+			MX6UL_PAD_SD1_CLK__USDHC1_CLK		0x10059
+			MX6UL_PAD_SD1_DATA0__USDHC1_DATA0	0x17059
+			MX6UL_PAD_SD1_DATA1__USDHC1_DATA1	0x17059
+			MX6UL_PAD_SD1_DATA2__USDHC1_DATA2	0x17059
+			MX6UL_PAD_SD1_DATA3__USDHC1_DATA3	0x17059
+			MX6UL_PAD_NAND_READY_B__USDHC1_DATA4	0x17059
+			MX6UL_PAD_NAND_CE0_B__USDHC1_DATA5	0x17059
+			MX6UL_PAD_NAND_CE1_B__USDHC1_DATA6	0x17059
+			MX6UL_PAD_NAND_CLE__USDHC1_DATA7	0x17059
+		>;
+	};
+
+	pinctrl_usdhc2: usdhc2grp {
+		fsl,pins = <
+			MX6UL_PAD_LCD_DATA18__USDHC2_CMD	0x1b0b0
+			MX6UL_PAD_LCD_DATA19__USDHC2_CLK	0x100b0
+			MX6UL_PAD_LCD_DATA20__USDHC2_DATA0	0x1b0b0
+			MX6UL_PAD_LCD_DATA21__USDHC2_DATA1	0x1b0b0
+			MX6UL_PAD_LCD_DATA22__USDHC2_DATA2	0x1b0b0
+			MX6UL_PAD_LCD_DATA23__USDHC2_DATA3	0x1b0b0
+			/* WL_REG_ON */
+			MX6UL_PAD_ENET2_RX_DATA1__GPIO2_IO09	0x130b0
+			/* WL_IRQ */
+			MX6UL_PAD_ENET2_RX_DATA0__GPIO2_IO08	0x1b0b0
+		>;
+	};
+};
diff --git a/arch/arm/boot/dts/imx6ul-imx6ull-opos6uldev.dtsi b/arch/arm/boot/dts/imx6ul-imx6ull-opos6uldev.dtsi
new file mode 100644
index 000000000000..18966350bfd8
--- /dev/null
+++ b/arch/arm/boot/dts/imx6ul-imx6ull-opos6uldev.dtsi
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+//
+// Copyright 2019 Armadeus Systems <support@armadeus.com>
+
+/ {
+	chosen {
+		stdout-path = &uart1;
+	};
+
+	backlight: backlight {
+		compatible = "pwm-backlight";
+		pwms = <&pwm3 0 191000>;
+		brightness-levels = <0 4 8 16 32 64 128 255>;
+		default-brightness-level = <7>;
+		power-supply = <&reg_5v>;
+		status = "okay";
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_gpio_keys>;
+
+		user-button {
+			label = "User button";
+			gpios = <&gpio2 11 GPIO_ACTIVE_LOW>;
+			linux,code = <BTN_MISC>;
+			wakeup-source;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		user-led {
+			label = "User";
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_led>;
+			gpios = <&gpio3 4 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "heartbeat";
+		};
+	};
+
+	onewire {
+		compatible = "w1-gpio";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_w1>;
+		gpios = <&gpio5 1 GPIO_ACTIVE_HIGH>;
+	};
+
+	panel: panel {
+		compatible = "armadeus,st0700-adapt";
+		power-supply = <&reg_3v3>;
+		backlight = <&backlight>;
+
+		port {
+			panel_in: endpoint {
+				remote-endpoint = <&lcdif_out>;
+			};
+		};
+	};
+
+	reg_5v: regulator-5v {
+		compatible = "regulator-fixed";
+		regulator-name = "5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+	};
+
+	reg_usbotg1_vbus: regulator-usbotg1vbus {
+		compatible = "regulator-fixed";
+		regulator-name = "usbotg1vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_usbotg1_vbus>;
+		gpio = <&gpio1 5 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+	};
+
+	reg_usbotg2_vbus: regulator-usbotg2vbus {
+		compatible = "regulator-fixed";
+		regulator-name = "usbotg2vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_usbotg2_vbus>;
+		gpio = <&gpio5 9 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+	};
+};
+
+&adc1 {
+	vref-supply = <&reg_3v3>;
+	status = "okay";
+};
+
+&can1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_flexcan1>;
+	xceiver-supply = <&reg_5v>;
+	status = "okay";
+};
+
+&can2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_flexcan2>;
+	xceiver-supply = <&reg_5v>;
+	status = "okay";
+};
+
+&ecspi4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ecspi4>;
+	cs-gpios = <&gpio4 9 GPIO_ACTIVE_LOW>, <&gpio4 3 GPIO_ACTIVE_LOW>;
+	status = "okay";
+
+	spidev0: spi@0 {
+		compatible = "spidev";
+		reg = <0>;
+		spi-max-frequency = <5000000>;
+	};
+
+	spidev1: spi@1 {
+		compatible = "spidev";
+		reg = <1>;
+		spi-max-frequency = <5000000>;
+	};
+};
+
+&i2c1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c1>;
+	clock-frequency = <400000>;
+	status = "okay";
+};
+
+&i2c2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c2>;
+	clock-frequency = <400000>;
+	status = "okay";
+};
+
+&lcdif {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_lcdif>;
+	status = "okay";
+
+	port {
+		lcdif_out: endpoint {
+			remote-endpoint = <&panel_in>;
+		};
+	};
+};
+
+&pwm3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pwm3>;
+	status = "okay";
+};
+
+&snvs_pwrkey {
+	status = "disabled";
+};
+
+&tsc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_tsc>;
+	xnur-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
+	measure-delay-time = <0xffff>;
+	pre-charge-time = <0xffff>;
+	status = "okay";
+};
+
+&uart1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart1>;
+	status = "okay";
+};
+
+&uart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart2>;
+	status = "okay";
+};
+
+&usbotg1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbotg1_id>;
+	vbus-supply = <&reg_usbotg1_vbus>;
+	dr_mode = "otg";
+	disable-over-current;
+	status = "okay";
+};
+
+&usbotg2 {
+	vbus-supply = <&reg_usbotg2_vbus>;
+	dr_mode = "host";
+	disable-over-current;
+	status = "okay";
+};
+
+&iomuxc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_gpios>;
+
+	pinctrl_ecspi4: ecspi4grp {
+		fsl,pins = <
+			MX6UL_PAD_NAND_DATA04__ECSPI4_SCLK	0x1b0b0
+			MX6UL_PAD_NAND_DATA05__ECSPI4_MOSI	0x1b0b0
+			MX6UL_PAD_NAND_DATA06__ECSPI4_MISO	0x1b0b0
+			MX6UL_PAD_NAND_DATA01__GPIO4_IO03	0x1b0b0
+			MX6UL_PAD_NAND_DATA07__GPIO4_IO09	0x1b0b0
+		>;
+	};
+
+	pinctrl_flexcan1: flexcan1grp {
+		fsl,pins = <
+			MX6UL_PAD_UART3_CTS_B__FLEXCAN1_TX	0x0b0b0
+			MX6UL_PAD_UART3_RTS_B__FLEXCAN1_RX	0x0b0b0
+		>;
+	};
+
+	pinctrl_flexcan2: flexcan2grp {
+		fsl,pins = <
+			MX6UL_PAD_UART2_CTS_B__FLEXCAN2_TX	0x0b0b0
+			MX6UL_PAD_UART2_RTS_B__FLEXCAN2_RX	0x0b0b0
+		>;
+	};
+
+	pinctrl_gpios: gpiosgrp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO09__GPIO1_IO09	0x0b0b0
+			MX6UL_PAD_UART3_RX_DATA__GPIO1_IO25	0x0b0b0
+			MX6UL_PAD_UART3_TX_DATA__GPIO1_IO24	0x0b0b0
+			MX6UL_PAD_NAND_RE_B__GPIO4_IO00		0x0b0b0
+			MX6UL_PAD_GPIO1_IO08__GPIO1_IO08	0x0b0b0
+			MX6UL_PAD_UART1_CTS_B__GPIO1_IO18	0x0b0b0
+			MX6UL_PAD_UART1_RTS_B__GPIO1_IO19	0x0b0b0
+			MX6UL_PAD_NAND_WE_B__GPIO4_IO01		0x0b0b0
+		>;
+	};
+
+	pinctrl_gpio_keys: gpiokeysgrp {
+		fsl,pins = <
+			MX6UL_PAD_ENET2_TX_DATA0__GPIO2_IO11	0x0b0b0
+		>;
+	};
+
+	pinctrl_i2c1: i2c1grp {
+		fsl,pins = <
+			MX6UL_PAD_UART4_RX_DATA__I2C1_SDA	0x4001b8b0
+			MX6UL_PAD_UART4_TX_DATA__I2C1_SCL	0x4001b8b0
+		>;
+	};
+
+	pinctrl_i2c2: i2c2grp {
+		fsl,pins = <
+			MX6UL_PAD_UART5_RX_DATA__I2C2_SDA	0x4001b8b0
+			MX6UL_PAD_UART5_TX_DATA__I2C2_SCL	0x4001b8b0
+		>;
+	};
+
+	pinctrl_lcdif: lcdifgrp {
+		fsl,pins = <
+			MX6UL_PAD_LCD_CLK__LCDIF_CLK	    0x100b1
+			MX6UL_PAD_LCD_ENABLE__LCDIF_ENABLE  0x100b1
+			MX6UL_PAD_LCD_HSYNC__LCDIF_HSYNC    0x100b1
+			MX6UL_PAD_LCD_VSYNC__LCDIF_VSYNC    0x100b1
+			MX6UL_PAD_LCD_DATA00__LCDIF_DATA00  0x100b1
+			MX6UL_PAD_LCD_DATA01__LCDIF_DATA01  0x100b1
+			MX6UL_PAD_LCD_DATA02__LCDIF_DATA02  0x100b1
+			MX6UL_PAD_LCD_DATA03__LCDIF_DATA03  0x100b1
+			MX6UL_PAD_LCD_DATA04__LCDIF_DATA04  0x100b1
+			MX6UL_PAD_LCD_DATA05__LCDIF_DATA05  0x100b1
+			MX6UL_PAD_LCD_DATA06__LCDIF_DATA06  0x100b1
+			MX6UL_PAD_LCD_DATA07__LCDIF_DATA07  0x100b1
+			MX6UL_PAD_LCD_DATA08__LCDIF_DATA08  0x100b1
+			MX6UL_PAD_LCD_DATA09__LCDIF_DATA09  0x100b1
+			MX6UL_PAD_LCD_DATA10__LCDIF_DATA10  0x100b1
+			MX6UL_PAD_LCD_DATA11__LCDIF_DATA11  0x100b1
+			MX6UL_PAD_LCD_DATA12__LCDIF_DATA12  0x100b1
+			MX6UL_PAD_LCD_DATA13__LCDIF_DATA13  0x100b1
+			MX6UL_PAD_LCD_DATA14__LCDIF_DATA14  0x100b1
+			MX6UL_PAD_LCD_DATA15__LCDIF_DATA15  0x100b1
+			MX6UL_PAD_LCD_DATA16__LCDIF_DATA16  0x100b1
+			MX6UL_PAD_LCD_DATA17__LCDIF_DATA17  0x100b1
+		>;
+	};
+
+	pinctrl_led: ledgrp {
+		fsl,pins = <
+			MX6UL_PAD_LCD_RESET__GPIO3_IO04		0x0b0b0
+		>;
+	};
+
+	pinctrl_pwm3: pwm3grp {
+		fsl,pins = <
+			MX6UL_PAD_NAND_ALE__PWM3_OUT		0x1b0b0
+		>;
+	};
+
+	pinctrl_tsc: tscgrp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO01__GPIO1_IO01       0xb0
+			MX6UL_PAD_GPIO1_IO02__GPIO1_IO02       0xb0
+			MX6UL_PAD_GPIO1_IO03__GPIO1_IO03       0xb0
+			MX6UL_PAD_GPIO1_IO04__GPIO1_IO04       0xb0
+		>;
+	};
+
+	pinctrl_uart1: uart1grp {
+		fsl,pins = <
+			MX6UL_PAD_UART1_TX_DATA__UART1_DCE_TX	0x1b0b1
+			MX6UL_PAD_UART1_RX_DATA__UART1_DCE_RX	0x1b0b1
+		>;
+	};
+
+	pinctrl_uart2: uart2grp {
+		fsl,pins = <
+			MX6UL_PAD_UART2_TX_DATA__UART2_DCE_TX	0x1b0b1
+			MX6UL_PAD_UART2_RX_DATA__UART2_DCE_RX	0x1b0b1
+		>;
+	};
+
+	pinctrl_usbotg1_id: usbotg1idgrp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO00__ANATOP_OTG1_ID	0x1b0b0
+		>;
+	};
+
+	pinctrl_usbotg1_vbus: usbotg1vbusgrp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO05__GPIO1_IO05	0x1b0b0
+		>;
+	};
+};
diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6310-s.dts b/arch/arm/boot/dts/imx6ul-kontron-n6310-s.dts
index 0205fd56d975..5a3e06d6219b 100644
--- a/arch/arm/boot/dts/imx6ul-kontron-n6310-s.dts
+++ b/arch/arm/boot/dts/imx6ul-kontron-n6310-s.dts
@@ -8,413 +8,10 @@
 /dts-v1/;
 
 #include "imx6ul-kontron-n6310-som.dtsi"
+#include "imx6ul-kontron-n6x1x-s.dtsi"
 
 / {
 	model = "Kontron N6310 S";
 	compatible = "kontron,imx6ul-n6310-s", "kontron,imx6ul-n6310-som",
 		     "fsl,imx6ul";
-
-	gpio-leds {
-		compatible = "gpio-leds";
-		pinctrl-names = "default";
-		pinctrl-0 = <&pinctrl_gpio_leds>;
-
-		led1 {
-			label = "debug-led1";
-			gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
-			default-state = "off";
-			linux,default-trigger = "heartbeat";
-		};
-
-		led2 {
-			label = "debug-led2";
-			gpios = <&gpio5 3 GPIO_ACTIVE_LOW>;
-			default-state = "off";
-		};
-
-		led3 {
-			label = "debug-led3";
-			gpios = <&gpio5 2 GPIO_ACTIVE_LOW>;
-			default-state = "off";
-		};
-	};
-
-	pwm-beeper {
-		compatible = "pwm-beeper";
-		pwms = <&pwm8 0 5000>;
-	};
-
-	reg_3v3: regulator-3v3 {
-		compatible = "regulator-fixed";
-		regulator-name = "3v3";
-		regulator-min-microvolt = <3300000>;
-		regulator-max-microvolt = <3300000>;
-	};
-
-	reg_usb_otg1_vbus: regulator-usb-otg1-vbus {
-		compatible = "regulator-fixed";
-		regulator-name = "usb_otg1_vbus";
-		regulator-min-microvolt = <5000000>;
-		regulator-max-microvolt = <5000000>;
-		gpio = <&gpio1 4 GPIO_ACTIVE_HIGH>;
-		enable-active-high;
-	};
-
-	reg_vref_adc: regulator-vref-adc {
-		compatible = "regulator-fixed";
-		regulator-name = "vref-adc";
-		regulator-min-microvolt = <3300000>;
-		regulator-max-microvolt = <3300000>;
-	};
-};
-
-&adc1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_adc1>;
-	num-channels = <3>;
-	vref-supply = <&reg_vref_adc>;
-	status = "okay";
-};
-
-&can2 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_flexcan2>;
-	status = "okay";
-};
-
-&ecspi1 {
-	cs-gpios = <&gpio4 26 GPIO_ACTIVE_HIGH>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_ecspi1>;
-	status = "okay";
-
-	eeprom@0 {
-		compatible = "anvo,anv32e61w", "atmel,at25";
-		reg = <0>;
-		spi-max-frequency = <20000000>;
-		spi-cpha;
-		spi-cpol;
-		pagesize = <1>;
-		size = <8192>;
-		address-width = <16>;
-	};
-};
-
-&fec1 {
-	pinctrl-0 = <&pinctrl_enet1>;
-	/delete-node/ mdio;
-};
-
-&fec2 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_enet2 &pinctrl_enet2_mdio>;
-	phy-mode = "rmii";
-	phy-handle = <&ethphy2>;
-	status = "okay";
-
-	mdio {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		ethphy1: ethernet-phy@1 {
-			reg = <1>;
-			micrel,led-mode = <0>;
-			clocks = <&clks IMX6UL_CLK_ENET_REF>;
-			clock-names = "rmii-ref";
-		};
-
-		ethphy2: ethernet-phy@2 {
-			reg = <2>;
-			micrel,led-mode = <0>;
-			clocks = <&clks IMX6UL_CLK_ENET2_REF>;
-			clock-names = "rmii-ref";
-		};
-	};
-};
-
-&i2c1 {
-	clock-frequency = <100000>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_i2c1>;
-	status = "okay";
-};
-
-&i2c4 {
-	clock-frequency = <100000>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_i2c4>;
-	status = "okay";
-
-	rtc@32 {
-		compatible = "epson,rx8900";
-		reg = <0x32>;
-	};
-};
-
-&pwm8 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_pwm8>;
-	status = "okay";
-};
-
-&snvs_poweroff {
-	status = "okay";
-};
-
-&uart1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_uart1>;
-	status = "okay";
-};
-
-&uart2 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_uart2>;
-	linux,rs485-enabled-at-boot-time;
-	rs485-rx-during-tx;
-	rs485-rts-active-low;
-	uart-has-rtscts;
-	status = "okay";
-};
-
-&uart3 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_uart3>;
-	fsl,uart-has-rtscts;
-	status = "okay";
-};
-
-&uart4 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_uart4>;
-	status = "okay";
-};
-
-&usbotg1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_usbotg1>;
-	dr_mode = "otg";
-	srp-disable;
-	hnp-disable;
-	adp-disable;
-	vbus-supply = <&reg_usb_otg1_vbus>;
-	status = "okay";
-};
-
-&usbotg2 {
-	dr_mode = "host";
-	disable-over-current;
-	status = "okay";
-};
-
-&usdhc1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_usdhc1>;
-	cd-gpios = <&gpio1 19 GPIO_ACTIVE_LOW>;
-	keep-power-in-suspend;
-	wakeup-source;
-	vmmc-supply = <&reg_3v3>;
-	voltage-ranges = <3300 3300>;
-	no-1-8-v;
-	status = "okay";
-};
-
-&usdhc2 {
-	pinctrl-names = "default", "state_100mhz", "state_200mhz";
-	pinctrl-0 = <&pinctrl_usdhc2>;
-	pinctrl-1 = <&pinctrl_usdhc2_100mhz>;
-	pinctrl-2 = <&pinctrl_usdhc2_200mhz>;
-	non-removable;
-	keep-power-in-suspend;
-	wakeup-source;
-	vmmc-supply = <&reg_3v3>;
-	voltage-ranges = <3300 3300>;
-	no-1-8-v;
-	status = "okay";
-};
-
-&wdog1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_wdog>;
-	fsl,ext-reset-output;
-	status = "okay";
-};
-
-&iomuxc {
-	pinctrl-0 = <&pinctrl_reset_out &pinctrl_gpio>;
-
-	pinctrl_adc1: adc1grp {
-		fsl,pins = <
-			MX6UL_PAD_GPIO1_IO02__GPIO1_IO02	0xb0
-			MX6UL_PAD_GPIO1_IO03__GPIO1_IO03	0xb0
-			MX6UL_PAD_GPIO1_IO08__GPIO1_IO08	0xb0
-		>;
-	};
-
-	/* FRAM */
-	pinctrl_ecspi1: ecspi1grp {
-		fsl,pins = <
-			MX6UL_PAD_CSI_DATA07__ECSPI1_MISO	0x100b1
-			MX6UL_PAD_CSI_DATA06__ECSPI1_MOSI	0x100b1
-			MX6UL_PAD_CSI_DATA04__ECSPI1_SCLK	0x100b1
-			MX6UL_PAD_CSI_DATA05__GPIO4_IO26	0x100b1	/* ECSPI1-CS1 */
-		>;
-	};
-
-	pinctrl_enet2: enet2grp {
-		fsl,pins = <
-			MX6UL_PAD_ENET2_RX_EN__ENET2_RX_EN	0x1b0b0
-			MX6UL_PAD_ENET2_RX_ER__ENET2_RX_ER	0x1b0b0
-			MX6UL_PAD_ENET2_RX_DATA0__ENET2_RDATA00	0x1b0b0
-			MX6UL_PAD_ENET2_RX_DATA1__ENET2_RDATA01	0x1b0b0
-			MX6UL_PAD_ENET2_TX_EN__ENET2_TX_EN	0x1b0b0
-			MX6UL_PAD_ENET2_TX_DATA0__ENET2_TDATA00	0x1b0b0
-			MX6UL_PAD_ENET2_TX_DATA1__ENET2_TDATA01	0x1b0b0
-			MX6UL_PAD_ENET2_TX_CLK__ENET2_REF_CLK2	0x4001b009
-		>;
-	};
-
-	pinctrl_enet2_mdio: enet2mdiogrp {
-		fsl,pins = <
-			MX6UL_PAD_GPIO1_IO07__ENET2_MDC         0x1b0b0
-			MX6UL_PAD_GPIO1_IO06__ENET2_MDIO        0x1b0b0
-		>;
-	};
-
-	pinctrl_flexcan2: flexcan2grp{
-		fsl,pins = <
-			MX6UL_PAD_UART2_RTS_B__FLEXCAN2_RX	0x1b020
-			MX6UL_PAD_UART2_CTS_B__FLEXCAN2_TX	0x1b020
-		>;
-	};
-
-	pinctrl_gpio: gpiogrp {
-		fsl,pins = <
-			MX6UL_PAD_SNVS_TAMPER5__GPIO5_IO05	0x1b0b0 /* DOUT1 */
-			MX6UL_PAD_SNVS_TAMPER4__GPIO5_IO04	0x1b0b0 /* DIN1 */
-			MX6UL_PAD_SNVS_TAMPER1__GPIO5_IO01	0x1b0b0 /* DOUT2 */
-			MX6UL_PAD_SNVS_TAMPER0__GPIO5_IO00	0x1b0b0 /* DIN2 */
-		>;
-	};
-
-	pinctrl_gpio_leds: gpioledsgrp {
-		fsl,pins = <
-			MX6UL_PAD_UART5_TX_DATA__GPIO1_IO30	0x1b0b0	/* LED H14 */
-			MX6UL_PAD_SNVS_TAMPER3__GPIO5_IO03	0x1b0b0	/* LED H15 */
-			MX6UL_PAD_SNVS_TAMPER2__GPIO5_IO02	0x1b0b0	/* LED H16 */
-		>;
-	};
-
-	pinctrl_i2c1: i2c1grp {
-		fsl,pins = <
-			MX6UL_PAD_CSI_PIXCLK__I2C1_SCL		0x4001b8b0
-			MX6UL_PAD_CSI_MCLK__I2C1_SDA		0x4001b8b0
-		>;
-	};
-
-	pinctrl_i2c4: i2c4grp {
-		fsl,pins = <
-			MX6UL_PAD_UART2_TX_DATA__I2C4_SCL	0x4001f8b0
-			MX6UL_PAD_UART2_RX_DATA__I2C4_SDA	0x4001f8b0
-		>;
-	};
-
-	pinctrl_pwm8: pwm8grp {
-		fsl,pins = <
-			MX6UL_PAD_CSI_HSYNC__PWM8_OUT		0x110b0
-		>;
-	};
-
-	pinctrl_uart1: uart1grp {
-		fsl,pins = <
-			MX6UL_PAD_UART1_TX_DATA__UART1_DCE_TX	0x1b0b1
-			MX6UL_PAD_UART1_RX_DATA__UART1_DCE_RX	0x1b0b1
-		>;
-	};
-
-	pinctrl_uart2: uart2grp {
-		fsl,pins = <
-			MX6UL_PAD_NAND_DATA04__UART2_DCE_TX	0x1b0b1
-			MX6UL_PAD_NAND_DATA05__UART2_DCE_RX	0x1b0b1
-			MX6UL_PAD_NAND_DATA06__UART2_DCE_CTS	0x1b0b1
-			/*
-			 * mux unused RTS to make sure it doesn't cause
-			 * any interrupts when it is undefined
-			 */
-			MX6UL_PAD_NAND_DATA07__UART2_DCE_RTS	0x1b0b1
-		>;
-	};
-
-	pinctrl_uart3: uart3grp {
-		fsl,pins = <
-			MX6UL_PAD_UART3_TX_DATA__UART3_DCE_TX	0x1b0b1
-			MX6UL_PAD_UART3_RX_DATA__UART3_DCE_RX	0x1b0b1
-			MX6UL_PAD_UART3_CTS_B__UART3_DCE_CTS	0x1b0b1
-			MX6UL_PAD_UART3_RTS_B__UART3_DCE_RTS	0x1b0b1
-		>;
-	};
-
-	pinctrl_uart4: uart4grp {
-		fsl,pins = <
-			MX6UL_PAD_UART4_TX_DATA__UART4_DCE_TX	0x1b0b1
-			MX6UL_PAD_UART4_RX_DATA__UART4_DCE_RX	0x1b0b1
-		>;
-	};
-
-	pinctrl_usbotg1: usbotg1 {
-		fsl,pins = <
-			MX6UL_PAD_GPIO1_IO04__GPIO1_IO04	0x1b0b0
-		>;
-	};
-
-	pinctrl_usdhc1: usdhc1grp {
-		fsl,pins = <
-			MX6UL_PAD_SD1_CMD__USDHC1_CMD		0x17059
-			MX6UL_PAD_SD1_CLK__USDHC1_CLK		0x10059
-			MX6UL_PAD_SD1_DATA0__USDHC1_DATA0	0x17059
-			MX6UL_PAD_SD1_DATA1__USDHC1_DATA1	0x17059
-			MX6UL_PAD_SD1_DATA2__USDHC1_DATA2	0x17059
-			MX6UL_PAD_SD1_DATA3__USDHC1_DATA3	0x17059
-			MX6UL_PAD_UART1_RTS_B__GPIO1_IO19	0x100b1	/* SD1_CD */
-		>;
-	};
-
-	pinctrl_usdhc2: usdhc2grp {
-		fsl,pins = <
-			MX6UL_PAD_NAND_RE_B__USDHC2_CLK		0x10059
-			MX6UL_PAD_NAND_WE_B__USDHC2_CMD		0x17059
-			MX6UL_PAD_NAND_DATA00__USDHC2_DATA0	0x17059
-			MX6UL_PAD_NAND_DATA01__USDHC2_DATA1	0x17059
-			MX6UL_PAD_NAND_DATA02__USDHC2_DATA2	0x17059
-			MX6UL_PAD_NAND_DATA03__USDHC2_DATA3	0x17059
-		>;
-	};
-
-	pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp {
-		fsl,pins = <
-			MX6UL_PAD_NAND_RE_B__USDHC2_CLK		0x100b9
-			MX6UL_PAD_NAND_WE_B__USDHC2_CMD		0x170b9
-			MX6UL_PAD_NAND_DATA00__USDHC2_DATA0	0x170b9
-			MX6UL_PAD_NAND_DATA01__USDHC2_DATA1	0x170b9
-			MX6UL_PAD_NAND_DATA02__USDHC2_DATA2	0x170b9
-			MX6UL_PAD_NAND_DATA03__USDHC2_DATA3	0x170b9
-		>;
-	};
-
-	pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp {
-		fsl,pins = <
-			MX6UL_PAD_NAND_RE_B__USDHC2_CLK		0x100f9
-			MX6UL_PAD_NAND_WE_B__USDHC2_CMD		0x170f9
-			MX6UL_PAD_NAND_DATA00__USDHC2_DATA0	0x170f9
-			MX6UL_PAD_NAND_DATA01__USDHC2_DATA1	0x170f9
-			MX6UL_PAD_NAND_DATA02__USDHC2_DATA2	0x170f9
-			MX6UL_PAD_NAND_DATA03__USDHC2_DATA3	0x170f9
-		>;
-	};
-
-	pinctrl_wdog: wdoggrp {
-		fsl,pins = <
-			MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY	0x30b0
-		>;
-	};
 };
diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6310-som.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6310-som.dtsi
index a896b2348dd2..47d3ce5d255f 100644
--- a/arch/arm/boot/dts/imx6ul-kontron-n6310-som.dtsi
+++ b/arch/arm/boot/dts/imx6ul-kontron-n6310-som.dtsi
@@ -6,7 +6,7 @@
  */
 
 #include "imx6ul.dtsi"
-#include <dt-bindings/gpio/gpio.h>
+#include "imx6ul-kontron-n6x1x-som-common.dtsi"
 
 / {
 	model = "Kontron N6310 SOM";
@@ -18,49 +18,7 @@
 	};
 };
 
-&ecspi2 {
-	cs-gpios = <&gpio4 22 GPIO_ACTIVE_HIGH>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_ecspi2>;
-	status = "okay";
-
-	spi-flash@0 {
-		compatible = "mxicy,mx25v8035f", "jedec,spi-nor";
-		spi-max-frequency = <50000000>;
-		reg = <0>;
-	};
-};
-
-&fec1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_enet1 &pinctrl_enet1_mdio>;
-	phy-mode = "rmii";
-	phy-handle = <&ethphy1>;
-	status = "okay";
-
-	mdio {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		ethphy1: ethernet-phy@1 {
-			reg = <1>;
-			micrel,led-mode = <0>;
-			clocks = <&clks IMX6UL_CLK_ENET_REF>;
-			clock-names = "rmii-ref";
-		};
-	};
-};
-
-&fec2 {
-	phy-mode = "rmii";
-	status = "disabled";
-};
-
 &qspi {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_qspi>;
-	status = "okay";
-
 	spi-flash@0 {
 		#address-cells = <1>;
 		#size-cells = <1>;
@@ -81,54 +39,3 @@
 		};
 	};
 };
-
-&iomuxc {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_reset_out>;
-
-	pinctrl_ecspi2: ecspi2grp {
-		fsl,pins = <
-			MX6UL_PAD_CSI_DATA03__ECSPI2_MISO      0x100b1
-			MX6UL_PAD_CSI_DATA02__ECSPI2_MOSI      0x100b1
-			MX6UL_PAD_CSI_DATA00__ECSPI2_SCLK      0x100b1
-			MX6UL_PAD_CSI_DATA01__GPIO4_IO22       0x100b1
-		>;
-	};
-
-	pinctrl_enet1: enet1grp {
-		fsl,pins = <
-			MX6UL_PAD_ENET1_RX_EN__ENET1_RX_EN      0x1b0b0
-			MX6UL_PAD_ENET1_RX_ER__ENET1_RX_ER      0x1b0b0
-			MX6UL_PAD_ENET1_RX_DATA0__ENET1_RDATA00 0x1b0b0
-			MX6UL_PAD_ENET1_RX_DATA1__ENET1_RDATA01 0x1b0b0
-			MX6UL_PAD_ENET1_TX_EN__ENET1_TX_EN      0x1b0b0
-			MX6UL_PAD_ENET1_TX_DATA0__ENET1_TDATA00 0x1b0b0
-			MX6UL_PAD_ENET1_TX_DATA1__ENET1_TDATA01 0x1b0b0
-			MX6UL_PAD_ENET1_TX_CLK__ENET1_REF_CLK1  0x4001b009
-		>;
-	};
-
-	pinctrl_enet1_mdio: enet1mdiogrp {
-		fsl,pins = <
-			MX6UL_PAD_GPIO1_IO07__ENET1_MDC         0x1b0b0
-			MX6UL_PAD_GPIO1_IO06__ENET1_MDIO        0x1b0b0
-		>;
-	};
-
-	pinctrl_qspi: qspigrp {
-		fsl,pins = <
-			MX6UL_PAD_NAND_WP_B__QSPI_A_SCLK        0x70a1
-			MX6UL_PAD_NAND_READY_B__QSPI_A_DATA00   0x70a1
-			MX6UL_PAD_NAND_CE0_B__QSPI_A_DATA01     0x70a1
-			MX6UL_PAD_NAND_CE1_B__QSPI_A_DATA02     0x70a1
-			MX6UL_PAD_NAND_CLE__QSPI_A_DATA03       0x70a1
-			MX6UL_PAD_NAND_DQS__QSPI_A_SS0_B        0x70a1
-		>;
-	};
-
-	pinctrl_reset_out: rstoutgrp {
-		fsl,pins = <
-			MX6UL_PAD_SNVS_TAMPER9__GPIO5_IO09      0x1b0b0
-		>;
-	};
-};
diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6311-s.dts b/arch/arm/boot/dts/imx6ul-kontron-n6311-s.dts
new file mode 100644
index 000000000000..239a1af3aeaa
--- /dev/null
+++ b/arch/arm/boot/dts/imx6ul-kontron-n6311-s.dts
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 exceet electronics GmbH
+ * Copyright (C) 2018 Kontron Electronics GmbH
+ */
+
+/dts-v1/;
+
+#include "imx6ul-kontron-n6311-som.dtsi"
+#include "imx6ul-kontron-n6x1x-s.dtsi"
+
+/ {
+	model = "Kontron N6311 S";
+	compatible = "kontron,imx6ul-n6311-s", "kontron,imx6ul-n6311-som",
+		     "fsl,imx6ul";
+};
diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6311-som.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6311-som.dtsi
new file mode 100644
index 000000000000..a095a7654ac6
--- /dev/null
+++ b/arch/arm/boot/dts/imx6ul-kontron-n6311-som.dtsi
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 exceet electronics GmbH
+ * Copyright (C) 2018 Kontron Electronics GmbH
+ */
+
+#include "imx6ul.dtsi"
+#include "imx6ul-kontron-n6x1x-som-common.dtsi"
+
+/ {
+	model = "Kontron N6311 SOM";
+	compatible = "kontron,imx6ul-n6311-som", "fsl,imx6ul";
+
+	memory@80000000 {
+		reg = <0x80000000 0x20000000>;
+		device_type = "memory";
+	};
+};
+
+&qspi {
+	spi-flash@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "spi-nand";
+		spi-max-frequency = <104000000>;
+		spi-tx-bus-width = <4>;
+		spi-rx-bus-width = <4>;
+		reg = <0>;
+
+		partition@0 {
+			label = "ubi1";
+			reg = <0x00000000 0x08000000>;
+		};
+
+		partition@8000000 {
+			label = "ubi2";
+			reg = <0x08000000 0x18000000>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi
new file mode 100644
index 000000000000..f05e91841202
--- /dev/null
+++ b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 exceet electronics GmbH
+ * Copyright (C) 2018 Kontron Electronics GmbH
+ * Copyright (c) 2019 Krzysztof Kozlowski <krzk@kernel.org>
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+	gpio-leds {
+		compatible = "gpio-leds";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_gpio_leds>;
+
+		led1 {
+			label = "debug-led1";
+			gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+			linux,default-trigger = "heartbeat";
+		};
+
+		led2 {
+			label = "debug-led2";
+			gpios = <&gpio5 3 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+		};
+
+		led3 {
+			label = "debug-led3";
+			gpios = <&gpio5 2 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+		};
+	};
+
+	pwm-beeper {
+		compatible = "pwm-beeper";
+		pwms = <&pwm8 0 5000>;
+	};
+
+	reg_3v3: regulator-3v3 {
+		compatible = "regulator-fixed";
+		regulator-name = "3v3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
+
+	reg_5v: regulator-5v {
+		compatible = "regulator-fixed";
+		regulator-name = "5v";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+	};
+
+	reg_usb_otg1_vbus: regulator-usb-otg1-vbus {
+		compatible = "regulator-fixed";
+		regulator-name = "usb_otg1_vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gpio1 4 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+	};
+
+	reg_vref_adc: regulator-vref-adc {
+		compatible = "regulator-fixed";
+		regulator-name = "vref-adc";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
+};
+
+&adc1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_adc1>;
+	num-channels = <3>;
+	vref-supply = <&reg_vref_adc>;
+	status = "okay";
+};
+
+&can2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_flexcan2>;
+	status = "okay";
+};
+
+&ecspi1 {
+	cs-gpios = <&gpio4 26 GPIO_ACTIVE_HIGH>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ecspi1>;
+	status = "okay";
+
+	eeprom@0 {
+		compatible = "anvo,anv32e61w", "atmel,at25";
+		reg = <0>;
+		spi-max-frequency = <20000000>;
+		spi-cpha;
+		spi-cpol;
+		pagesize = <1>;
+		size = <8192>;
+		address-width = <16>;
+	};
+};
+
+&fec1 {
+	pinctrl-0 = <&pinctrl_enet1>;
+	/delete-node/ mdio;
+};
+
+&fec2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_enet2 &pinctrl_enet2_mdio>;
+	phy-mode = "rmii";
+	phy-handle = <&ethphy2>;
+	status = "okay";
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethphy1: ethernet-phy@1 {
+			reg = <1>;
+			micrel,led-mode = <0>;
+			clocks = <&clks IMX6UL_CLK_ENET_REF>;
+			clock-names = "rmii-ref";
+		};
+
+		ethphy2: ethernet-phy@2 {
+			reg = <2>;
+			micrel,led-mode = <0>;
+			clocks = <&clks IMX6UL_CLK_ENET2_REF>;
+			clock-names = "rmii-ref";
+		};
+	};
+};
+
+&i2c1 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c1>;
+	status = "okay";
+};
+
+&i2c4 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c4>;
+	status = "okay";
+
+	rtc@32 {
+		compatible = "epson,rx8900";
+		reg = <0x32>;
+	};
+};
+
+&pwm8 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pwm8>;
+	status = "okay";
+};
+
+&uart1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart1>;
+	status = "okay";
+};
+
+&uart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart2>;
+	linux,rs485-enabled-at-boot-time;
+	rs485-rx-during-tx;
+	rs485-rts-active-low;
+	uart-has-rtscts;
+	status = "okay";
+};
+
+&uart3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart3>;
+	fsl,uart-has-rtscts;
+	status = "okay";
+};
+
+&uart4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart4>;
+	status = "okay";
+};
+
+&usbotg1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbotg1>;
+	dr_mode = "otg";
+	srp-disable;
+	hnp-disable;
+	adp-disable;
+	over-current-active-low;
+	vbus-supply = <&reg_usb_otg1_vbus>;
+	status = "okay";
+};
+
+&usbotg2 {
+	dr_mode = "host";
+	disable-over-current;
+	vbus-supply = <&reg_5v>;
+	status = "okay";
+};
+
+&usdhc1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc1>;
+	cd-gpios = <&gpio1 19 GPIO_ACTIVE_LOW>;
+	keep-power-in-suspend;
+	wakeup-source;
+	vmmc-supply = <&reg_3v3>;
+	voltage-ranges = <3300 3300>;
+	no-1-8-v;
+	status = "okay";
+};
+
+&usdhc2 {
+	pinctrl-names = "default", "state_100mhz", "state_200mhz";
+	pinctrl-0 = <&pinctrl_usdhc2>;
+	pinctrl-1 = <&pinctrl_usdhc2_100mhz>;
+	pinctrl-2 = <&pinctrl_usdhc2_200mhz>;
+	non-removable;
+	keep-power-in-suspend;
+	wakeup-source;
+	vmmc-supply = <&reg_3v3>;
+	voltage-ranges = <3300 3300>;
+	no-1-8-v;
+	status = "okay";
+};
+
+&wdog1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_wdog>;
+	fsl,ext-reset-output;
+	status = "okay";
+};
+
+&iomuxc {
+	pinctrl-0 = <&pinctrl_reset_out &pinctrl_gpio>;
+
+	pinctrl_adc1: adc1grp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO02__GPIO1_IO02	0xb0
+			MX6UL_PAD_GPIO1_IO03__GPIO1_IO03	0xb0
+			MX6UL_PAD_GPIO1_IO08__GPIO1_IO08	0xb0
+		>;
+	};
+
+	pinctrl_ecspi1: ecspi1grp {
+		fsl,pins = <
+			MX6UL_PAD_CSI_DATA07__ECSPI1_MISO	0x100b1
+			MX6UL_PAD_CSI_DATA06__ECSPI1_MOSI	0x100b1
+			MX6UL_PAD_CSI_DATA04__ECSPI1_SCLK	0x100b1
+			MX6UL_PAD_CSI_DATA05__GPIO4_IO26	0x100b1	/* ECSPI1-CS1 */
+		>;
+	};
+
+	pinctrl_enet2: enet2grp {
+		fsl,pins = <
+			MX6UL_PAD_ENET2_RX_EN__ENET2_RX_EN	0x1b0b0
+			MX6UL_PAD_ENET2_RX_ER__ENET2_RX_ER	0x1b0b0
+			MX6UL_PAD_ENET2_RX_DATA0__ENET2_RDATA00	0x1b0b0
+			MX6UL_PAD_ENET2_RX_DATA1__ENET2_RDATA01	0x1b0b0
+			MX6UL_PAD_ENET2_TX_EN__ENET2_TX_EN	0x1b0b0
+			MX6UL_PAD_ENET2_TX_DATA0__ENET2_TDATA00	0x1b0b0
+			MX6UL_PAD_ENET2_TX_DATA1__ENET2_TDATA01	0x1b0b0
+			MX6UL_PAD_ENET2_TX_CLK__ENET2_REF_CLK2	0x4001b009
+		>;
+	};
+
+	pinctrl_enet2_mdio: enet2mdiogrp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO07__ENET2_MDC		0x1b0b0
+			MX6UL_PAD_GPIO1_IO06__ENET2_MDIO	0x1b0b0
+		>;
+	};
+
+	pinctrl_flexcan2: flexcan2grp{
+		fsl,pins = <
+			MX6UL_PAD_UART2_RTS_B__FLEXCAN2_RX	0x1b020
+			MX6UL_PAD_UART2_CTS_B__FLEXCAN2_TX	0x1b020
+		>;
+	};
+
+	pinctrl_gpio: gpiogrp {
+		fsl,pins = <
+			MX6UL_PAD_SNVS_TAMPER5__GPIO5_IO05	0x1b0b0	/* DOUT1 */
+			MX6UL_PAD_SNVS_TAMPER4__GPIO5_IO04	0x1b0b0	/* DIN1 */
+			MX6UL_PAD_SNVS_TAMPER1__GPIO5_IO01	0x1b0b0	/* DOUT2 */
+			MX6UL_PAD_SNVS_TAMPER0__GPIO5_IO00	0x1b0b0	/* DIN2 */
+		>;
+	};
+
+	pinctrl_gpio_leds: gpioledsgrp {
+		fsl,pins = <
+			MX6UL_PAD_UART5_TX_DATA__GPIO1_IO30	0x1b0b0	/* LED H14 */
+			MX6UL_PAD_SNVS_TAMPER3__GPIO5_IO03	0x1b0b0	/* LED H15 */
+			MX6UL_PAD_SNVS_TAMPER2__GPIO5_IO02	0x1b0b0	/* LED H16 */
+		>;
+	};
+
+	pinctrl_i2c1: i2c1grp {
+		fsl,pins = <
+			MX6UL_PAD_CSI_PIXCLK__I2C1_SCL		0x4001b8b0
+			MX6UL_PAD_CSI_MCLK__I2C1_SDA		0x4001b8b0
+		>;
+	};
+
+	pinctrl_i2c4: i2c4grp {
+		fsl,pins = <
+			MX6UL_PAD_UART2_TX_DATA__I2C4_SCL	0x4001f8b0
+			MX6UL_PAD_UART2_RX_DATA__I2C4_SDA	0x4001f8b0
+		>;
+	};
+
+	pinctrl_pwm8: pwm8grp {
+		fsl,pins = <
+			MX6UL_PAD_CSI_HSYNC__PWM8_OUT		0x110b0
+		>;
+	};
+
+	pinctrl_uart1: uart1grp {
+		fsl,pins = <
+			MX6UL_PAD_UART1_TX_DATA__UART1_DCE_TX	0x1b0b1
+			MX6UL_PAD_UART1_RX_DATA__UART1_DCE_RX	0x1b0b1
+		>;
+	};
+
+	pinctrl_uart2: uart2grp {
+		fsl,pins = <
+			MX6UL_PAD_NAND_DATA04__UART2_DCE_TX	0x1b0b1
+			MX6UL_PAD_NAND_DATA05__UART2_DCE_RX	0x1b0b1
+			MX6UL_PAD_NAND_DATA06__UART2_DCE_CTS	0x1b0b1
+			/*
+			 * mux unused RTS to make sure it doesn't cause
+			 * any interrupts when it is undefined
+			 */
+			MX6UL_PAD_NAND_DATA07__UART2_DCE_RTS	0x1b0b1
+		>;
+	};
+
+	pinctrl_uart3: uart3grp {
+		fsl,pins = <
+			MX6UL_PAD_UART3_TX_DATA__UART3_DCE_TX	0x1b0b1
+			MX6UL_PAD_UART3_RX_DATA__UART3_DCE_RX	0x1b0b1
+			MX6UL_PAD_UART3_CTS_B__UART3_DCE_CTS	0x1b0b1
+			MX6UL_PAD_UART3_RTS_B__UART3_DCE_RTS	0x1b0b1
+		>;
+	};
+
+	pinctrl_uart4: uart4grp {
+		fsl,pins = <
+			MX6UL_PAD_UART4_TX_DATA__UART4_DCE_TX	0x1b0b1
+			MX6UL_PAD_UART4_RX_DATA__UART4_DCE_RX	0x1b0b1
+		>;
+	};
+
+	pinctrl_usbotg1: usbotg1 {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO04__GPIO1_IO04	0x1b0b0
+		>;
+	};
+
+	pinctrl_usdhc1: usdhc1grp {
+		fsl,pins = <
+			MX6UL_PAD_SD1_CMD__USDHC1_CMD		0x17059
+			MX6UL_PAD_SD1_CLK__USDHC1_CLK		0x10059
+			MX6UL_PAD_SD1_DATA0__USDHC1_DATA0	0x17059
+			MX6UL_PAD_SD1_DATA1__USDHC1_DATA1	0x17059
+			MX6UL_PAD_SD1_DATA2__USDHC1_DATA2	0x17059
+			MX6UL_PAD_SD1_DATA3__USDHC1_DATA3	0x17059
+			MX6UL_PAD_UART1_RTS_B__GPIO1_IO19	0x100b1	/* SD1_CD */
+		>;
+	};
+
+	pinctrl_usdhc2: usdhc2grp {
+		fsl,pins = <
+			MX6UL_PAD_NAND_RE_B__USDHC2_CLK		0x10059
+			MX6UL_PAD_NAND_WE_B__USDHC2_CMD		0x17059
+			MX6UL_PAD_NAND_DATA00__USDHC2_DATA0	0x17059
+			MX6UL_PAD_NAND_DATA01__USDHC2_DATA1	0x17059
+			MX6UL_PAD_NAND_DATA02__USDHC2_DATA2	0x17059
+			MX6UL_PAD_NAND_DATA03__USDHC2_DATA3	0x17059
+		>;
+	};
+
+	pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp {
+		fsl,pins = <
+			MX6UL_PAD_NAND_RE_B__USDHC2_CLK		0x100b9
+			MX6UL_PAD_NAND_WE_B__USDHC2_CMD		0x170b9
+			MX6UL_PAD_NAND_DATA00__USDHC2_DATA0	0x170b9
+			MX6UL_PAD_NAND_DATA01__USDHC2_DATA1	0x170b9
+			MX6UL_PAD_NAND_DATA02__USDHC2_DATA2	0x170b9
+			MX6UL_PAD_NAND_DATA03__USDHC2_DATA3	0x170b9
+		>;
+	};
+
+	pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp {
+		fsl,pins = <
+			MX6UL_PAD_NAND_RE_B__USDHC2_CLK		0x100f9
+			MX6UL_PAD_NAND_WE_B__USDHC2_CMD		0x170f9
+			MX6UL_PAD_NAND_DATA00__USDHC2_DATA0	0x170f9
+			MX6UL_PAD_NAND_DATA01__USDHC2_DATA1	0x170f9
+			MX6UL_PAD_NAND_DATA02__USDHC2_DATA2	0x170f9
+			MX6UL_PAD_NAND_DATA03__USDHC2_DATA3	0x170f9
+		>;
+	};
+
+	pinctrl_wdog: wdoggrp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY	0x30b0
+		>;
+	};
+};
diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi
new file mode 100644
index 000000000000..a17af4d9bfdf
--- /dev/null
+++ b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 exceet electronics GmbH
+ * Copyright (C) 2018 Kontron Electronics GmbH
+ * Copyright (c) 2019 Krzysztof Kozlowski <krzk@kernel.org>
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+	chosen {
+		stdout-path = &uart4;
+	};
+};
+
+&ecspi2 {
+	cs-gpios = <&gpio4 22 GPIO_ACTIVE_HIGH>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ecspi2>;
+	status = "okay";
+
+	spi-flash@0 {
+		compatible = "mxicy,mx25v8035f", "jedec,spi-nor";
+		spi-max-frequency = <50000000>;
+		reg = <0>;
+	};
+};
+
+&fec1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_enet1 &pinctrl_enet1_mdio>;
+	phy-mode = "rmii";
+	phy-handle = <&ethphy1>;
+	status = "okay";
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethphy1: ethernet-phy@1 {
+			reg = <1>;
+			micrel,led-mode = <0>;
+			clocks = <&clks IMX6UL_CLK_ENET_REF>;
+			clock-names = "rmii-ref";
+		};
+	};
+};
+
+&fec2 {
+	phy-mode = "rmii";
+	status = "disabled";
+};
+
+&qspi {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_qspi>;
+	status = "okay";
+};
+
+&iomuxc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_reset_out>;
+
+	pinctrl_ecspi2: ecspi2grp {
+		fsl,pins = <
+			MX6UL_PAD_CSI_DATA03__ECSPI2_MISO      0x100b1
+			MX6UL_PAD_CSI_DATA02__ECSPI2_MOSI      0x100b1
+			MX6UL_PAD_CSI_DATA00__ECSPI2_SCLK      0x100b1
+			MX6UL_PAD_CSI_DATA01__GPIO4_IO22       0x100b1
+		>;
+	};
+
+	pinctrl_enet1: enet1grp {
+		fsl,pins = <
+			MX6UL_PAD_ENET1_RX_EN__ENET1_RX_EN      0x1b0b0
+			MX6UL_PAD_ENET1_RX_ER__ENET1_RX_ER      0x1b0b0
+			MX6UL_PAD_ENET1_RX_DATA0__ENET1_RDATA00 0x1b0b0
+			MX6UL_PAD_ENET1_RX_DATA1__ENET1_RDATA01 0x1b0b0
+			MX6UL_PAD_ENET1_TX_EN__ENET1_TX_EN      0x1b0b0
+			MX6UL_PAD_ENET1_TX_DATA0__ENET1_TDATA00 0x1b0b0
+			MX6UL_PAD_ENET1_TX_DATA1__ENET1_TDATA01 0x1b0b0
+			MX6UL_PAD_ENET1_TX_CLK__ENET1_REF_CLK1  0x4001b009
+		>;
+	};
+
+	pinctrl_enet1_mdio: enet1mdiogrp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO07__ENET1_MDC         0x1b0b0
+			MX6UL_PAD_GPIO1_IO06__ENET1_MDIO        0x1b0b0
+		>;
+	};
+
+	pinctrl_qspi: qspigrp {
+		fsl,pins = <
+			MX6UL_PAD_NAND_WP_B__QSPI_A_SCLK        0x70a1
+			MX6UL_PAD_NAND_READY_B__QSPI_A_DATA00   0x70a1
+			MX6UL_PAD_NAND_CE0_B__QSPI_A_DATA01     0x70a1
+			MX6UL_PAD_NAND_CE1_B__QSPI_A_DATA02     0x70a1
+			MX6UL_PAD_NAND_CLE__QSPI_A_DATA03       0x70a1
+			MX6UL_PAD_NAND_DQS__QSPI_A_SS0_B        0x70a1
+		>;
+	};
+
+	pinctrl_reset_out: rstoutgrp {
+		fsl,pins = <
+			MX6UL_PAD_SNVS_TAMPER9__GPIO5_IO09      0x1b0b0
+		>;
+	};
+};
diff --git a/arch/arm/boot/dts/imx6ul-opos6ul.dtsi b/arch/arm/boot/dts/imx6ul-opos6ul.dtsi
index cf7faf4b9c47..6ce84f92b027 100644
--- a/arch/arm/boot/dts/imx6ul-opos6ul.dtsi
+++ b/arch/arm/boot/dts/imx6ul-opos6ul.dtsi
@@ -1,193 +1,6 @@
-/*
- * Copyright 2017 Armadeus Systems <support@armadeus.com>
- *
- * This file is dual-licensed: you can use it either under the terms
- * of the GPL or the X11 license, at your option. Note that this dual
- * licensing only applies to this file, and not this project as a
- * whole.
- *
- *  a) This file is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This file is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public
- *     License along with this file; if not, write to the Free
- *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
- *     MA 02110-1301 USA
- *
- * Or, alternatively,
- *
- *  b) Permission is hereby granted, free of charge, to any person
- *     obtaining a copy of this software and associated documentation
- *     files (the "Software"), to deal in the Software without
- *     restriction, including without limitation the rights to use,
- *     copy, modify, merge, publish, distribute, sublicense, and/or
- *     sell copies of the Software, and to permit persons to whom the
- *     Software is furnished to do so, subject to the following
- *     conditions:
- *
- *     The above copyright notice and this permission notice shall be
- *     included in all copies or substantial portions of the Software.
- *
- *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- *     OTHER DEALINGS IN THE SOFTWARE.
- */
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+//
+// Copyright 2017 Armadeus Systems <support@armadeus.com>
 
 #include "imx6ul.dtsi"
-
-/ {
-	memory@80000000 {
-		device_type = "memory";
-		reg = <0x80000000 0>; /* will be filled by U-Boot */
-	};
-
-	reg_3v3: regulator-3v3 {
-		compatible = "regulator-fixed";
-		regulator-name = "3V3";
-		regulator-min-microvolt = <3300000>;
-		regulator-max-microvolt = <3300000>;
-	};
-
-	usdhc3_pwrseq: usdhc3-pwrseq {
-		compatible = "mmc-pwrseq-simple";
-		reset-gpios = <&gpio2 9 GPIO_ACTIVE_LOW>;
-	};
-};
-
-&fec1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_enet1>;
-	phy-mode = "rmii";
-	phy-reset-duration = <1>;
-	phy-reset-gpios = <&gpio4 2 GPIO_ACTIVE_LOW>;
-	phy-handle = <&ethphy1>;
-	phy-supply = <&reg_3v3>;
-	status = "okay";
-
-	mdio: mdio {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		ethphy1: ethernet-phy@1 {
-			compatible = "ethernet-phy-ieee802.3-c22";
-			reg = <1>;
-			interrupt-parent = <&gpio4>;
-			interrupts = <16 IRQ_TYPE_LEVEL_LOW>;
-			status = "okay";
-		};
-	};
-};
-
-/* Bluetooth */
-&uart8 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_uart8>;
-	uart-has-rtscts;
-	status = "okay";
-};
-
-/* eMMC */
-&usdhc1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_usdhc1>;
-	bus-width = <8>;
-	no-1-8-v;
-	non-removable;
-	status = "okay";
-};
-
-/* WiFi */
-&usdhc2 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_usdhc2>;
-	bus-width = <4>;
-	no-1-8-v;
-	non-removable;
-	mmc-pwrseq = <&usdhc3_pwrseq>;
-	status = "okay";
-
-	#address-cells = <1>;
-	#size-cells = <0>;
-
-	brcmf: wifi@1 {
-		compatible = "brcm,bcm4329-fmac";
-		reg = <1>;
-		interrupt-parent = <&gpio2>;
-		interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
-		interrupt-names = "host-wake";
-	};
-};
-
-&iomuxc {
-	pinctrl_enet1: enet1grp {
-		fsl,pins = <
-			MX6UL_PAD_GPIO1_IO06__ENET1_MDIO	0x1b0b0
-			MX6UL_PAD_GPIO1_IO07__ENET1_MDC		0x1b0b0
-			MX6UL_PAD_ENET1_RX_ER__ENET1_RX_ER	0x130b0
-			MX6UL_PAD_ENET1_RX_EN__ENET1_RX_EN	0x130b0
-			MX6UL_PAD_ENET1_RX_DATA1__ENET1_RDATA01	0x130b0
-			MX6UL_PAD_ENET1_RX_DATA0__ENET1_RDATA00	0x130b0
-			MX6UL_PAD_ENET1_TX_DATA0__ENET1_TDATA00	0x1b0b0
-			MX6UL_PAD_ENET1_TX_DATA1__ENET1_TDATA01	0x1b0b0
-			MX6UL_PAD_ENET1_TX_EN__ENET1_TX_EN	0x1b0b0
-			/* INT# */
-			MX6UL_PAD_NAND_DQS__GPIO4_IO16		0x1b0b0
-			/* RST# */
-			MX6UL_PAD_NAND_DATA00__GPIO4_IO02	0x130b0
-			MX6UL_PAD_ENET1_TX_CLK__ENET1_REF_CLK1	0x4001b031
-		>;
-	};
-
-	pinctrl_uart8: uart8grp {
-		fsl,pins = <
-			MX6UL_PAD_ENET2_TX_EN__UART8_DCE_RX	0x1b0b0
-			MX6UL_PAD_ENET2_TX_DATA1__UART8_DCE_TX	0x1b0b0
-			MX6UL_PAD_ENET2_RX_ER__UART8_DCE_RTS	0x1b0b0
-			MX6UL_PAD_ENET2_TX_CLK__UART8_DCE_CTS	0x1b0b0
-			/* BT_REG_ON */
-			MX6UL_PAD_ENET2_RX_EN__GPIO2_IO10	0x130b0
-		>;
-	};
-
-	pinctrl_usdhc1: usdhc1grp {
-		fsl,pins = <
-			MX6UL_PAD_SD1_CMD__USDHC1_CMD		0x17059
-			MX6UL_PAD_SD1_CLK__USDHC1_CLK		0x10059
-			MX6UL_PAD_SD1_DATA0__USDHC1_DATA0	0x17059
-			MX6UL_PAD_SD1_DATA1__USDHC1_DATA1	0x17059
-			MX6UL_PAD_SD1_DATA2__USDHC1_DATA2	0x17059
-			MX6UL_PAD_SD1_DATA3__USDHC1_DATA3	0x17059
-			MX6UL_PAD_NAND_READY_B__USDHC1_DATA4	0x17059
-			MX6UL_PAD_NAND_CE0_B__USDHC1_DATA5	0x17059
-			MX6UL_PAD_NAND_CE1_B__USDHC1_DATA6	0x17059
-			MX6UL_PAD_NAND_CLE__USDHC1_DATA7	0x17059
-		>;
-	};
-
-	pinctrl_usdhc2: usdhc2grp {
-		fsl,pins = <
-			MX6UL_PAD_LCD_DATA18__USDHC2_CMD	0x1b0b0
-			MX6UL_PAD_LCD_DATA19__USDHC2_CLK	0x100b0
-			MX6UL_PAD_LCD_DATA20__USDHC2_DATA0	0x1b0b0
-			MX6UL_PAD_LCD_DATA21__USDHC2_DATA1	0x1b0b0
-			MX6UL_PAD_LCD_DATA22__USDHC2_DATA2	0x1b0b0
-			MX6UL_PAD_LCD_DATA23__USDHC2_DATA3	0x1b0b0
-			/* WL_REG_ON */
-			MX6UL_PAD_ENET2_RX_DATA1__GPIO2_IO09	0x130b0
-			/* WL_IRQ */
-			MX6UL_PAD_ENET2_RX_DATA0__GPIO2_IO08	0x1b0b0
-		>;
-	};
-};
+#include "imx6ul-imx6ull-opos6ul.dtsi"
diff --git a/arch/arm/boot/dts/imx6ul-opos6uldev.dts b/arch/arm/boot/dts/imx6ul-opos6uldev.dts
index 8ecdb9ad2b2e..375b98d7205a 100644
--- a/arch/arm/boot/dts/imx6ul-opos6uldev.dts
+++ b/arch/arm/boot/dts/imx6ul-opos6uldev.dts
@@ -1,293 +1,21 @@
-/*
- * Copyright 2017 Armadeus Systems <support@armadeus.com>
- *
- * This file is dual-licensed: you can use it either under the terms
- * of the GPL or the X11 license, at your option. Note that this dual
- * licensing only applies to this file, and not this project as a
- * whole.
- *
- *  a) This file is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This file is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public
- *     License along with this file; if not, write to the Free
- *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
- *     MA 02110-1301 USA
- *
- * Or, alternatively,
- *
- *  b) Permission is hereby granted, free of charge, to any person
- *     obtaining a copy of this software and associated documentation
- *     files (the "Software"), to deal in the Software without
- *     restriction, including without limitation the rights to use,
- *     copy, modify, merge, publish, distribute, sublicense, and/or
- *     sell copies of the Software, and to permit persons to whom the
- *     Software is furnished to do so, subject to the following
- *     conditions:
- *
- *     The above copyright notice and this permission notice shall be
- *     included in all copies or substantial portions of the Software.
- *
- *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- *     OTHER DEALINGS IN THE SOFTWARE.
- */
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+//
+// Copyright 2017 Armadeus Systems <support@armadeus.com>
 
 /dts-v1/;
 #include "imx6ul-opos6ul.dtsi"
+#include "imx6ul-imx6ull-opos6uldev.dtsi"
 
 / {
-	model = "Armadeus Systems OPOS6UL SoM on OPOS6ULDev board";
-	compatible = "armadeus,opos6uldev", "armadeus,opos6ul", "fsl,imx6ul";
-
-	chosen {
-		stdout-path = &uart1;
-	};
-
-	backlight: backlight {
-		compatible = "pwm-backlight";
-		pwms = <&pwm3 0 191000>;
-		brightness-levels = <0 4 8 16 32 64 128 255>;
-		default-brightness-level = <7>;
-		power-supply = <&reg_5v>;
-		status = "okay";
-	};
-
-	gpio-keys {
-		compatible = "gpio-keys";
-		pinctrl-names = "default";
-		pinctrl-0 = <&pinctrl_gpio_keys>;
-
-		user-button {
-			label = "User button";
-			gpios = <&gpio2 11 GPIO_ACTIVE_LOW>;
-			linux,code = <BTN_MISC>;
-			wakeup-source;
-		};
-	};
-
-	leds {
-		compatible = "gpio-leds";
-
-		user-led {
-			label = "User";
-			pinctrl-names = "default";
-			pinctrl-0 = <&pinctrl_led>;
-			gpios = <&gpio3 4 GPIO_ACTIVE_HIGH>;
-			linux,default-trigger = "heartbeat";
-		};
-	};
-
-	onewire {
-		compatible = "w1-gpio";
-		pinctrl-names = "default";
-		pinctrl-0 = <&pinctrl_w1>;
-		gpios = <&gpio5 1 GPIO_ACTIVE_HIGH>;
-	};
-
-	panel: panel {
-		compatible = "armadeus,st0700-adapt";
-		power-supply = <&reg_3v3>;
-		backlight = <&backlight>;
-
-		port {
-			panel_in: endpoint {
-				remote-endpoint = <&lcdif_out>;
-			};
-		};
-	};
-
-	reg_5v: regulator-5v {
-		compatible = "regulator-fixed";
-		regulator-name = "5V";
-		regulator-min-microvolt = <5000000>;
-		regulator-max-microvolt = <5000000>;
-	};
-
-	reg_usbotg1_vbus: regulator-usbotg1vbus {
-		compatible = "regulator-fixed";
-		regulator-name = "usbotg1vbus";
-		regulator-min-microvolt = <5000000>;
-		regulator-max-microvolt = <5000000>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&pinctrl_usbotg1_vbus>;
-		gpio = <&gpio1 5 GPIO_ACTIVE_HIGH>;
-		enable-active-high;
-	};
-
-	reg_usbotg2_vbus: regulator-usbotg2vbus {
-		compatible = "regulator-fixed";
-		regulator-name = "usbotg2vbus";
-		regulator-min-microvolt = <5000000>;
-		regulator-max-microvolt = <5000000>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&pinctrl_usbotg2_vbus>;
-		gpio = <&gpio5 9 GPIO_ACTIVE_HIGH>;
-		enable-active-high;
-	};
-};
-
-&adc1 {
-	vref-supply = <&reg_3v3>;
-	status = "okay";
-};
-
-&can1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_flexcan1>;
-	xceiver-supply = <&reg_5v>;
-	status = "okay";
-};
-
-&can2 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_flexcan2>;
-	xceiver-supply = <&reg_5v>;
-	status = "okay";
-};
-
-&ecspi4 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_ecspi4>;
-	cs-gpios = <&gpio4 9 GPIO_ACTIVE_LOW>, <&gpio4 3 GPIO_ACTIVE_LOW>;
-	status = "okay";
-
-	spidev0: spi@0 {
-		compatible = "spidev";
-		reg = <0>;
-		spi-max-frequency = <5000000>;
-	};
-
-	spidev1: spi@1 {
-		compatible = "spidev";
-		reg = <1>;
-		spi-max-frequency = <5000000>;
-	};
-};
-
-&i2c1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_i2c1>;
-	clock_frequency = <400000>;
-	status = "okay";
-};
-
-&i2c2 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_i2c2>;
-	clock_frequency = <400000>;
-	status = "okay";
-};
-
-&lcdif {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_lcdif>;
-	status = "okay";
-
-	port {
-		lcdif_out: endpoint {
-			remote-endpoint = <&panel_in>;
-		};
-	};
-};
-
-&pwm3 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_pwm3>;
-	status = "okay";
-};
-
-&snvs_pwrkey {
-	status = "disabled";
-};
-
-&tsc {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_tsc>;
-	xnur-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
-	measure-delay-time = <0xffff>;
-	pre-charge-time = <0xffff>;
-	status = "okay";
-};
-
-&uart1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_uart1>;
-	status = "okay";
-};
-
-&uart2 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_uart2>;
-	status = "okay";
-};
-
-&usbotg1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_usbotg1_id>;
-	vbus-supply = <&reg_usbotg1_vbus>;
-	dr_mode = "otg";
-	disable-over-current;
-	status = "okay";
-};
-
-&usbotg2 {
-	vbus-supply = <&reg_usbotg2_vbus>;
-	dr_mode = "host";
-	disable-over-current;
-	status = "okay";
+	model = "Armadeus Systems OPOS6UL SoM (i.MX6UL) on OPOS6ULDev board";
+	compatible = "armadeus,imx6ul-opos6uldev", "armadeus,imx6ul-opos6ul", "fsl,imx6ul";
 };
 
 &iomuxc {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_gpios>;
+	pinctrl-0 = <&pinctrl_gpios>, <&pinctrl_tamper_gpios>;
 
-	pinctrl_ecspi4: ecspi4grp {
+	pinctrl_tamper_gpios: tampergpiosgrp {
 		fsl,pins = <
-			MX6UL_PAD_NAND_DATA04__ECSPI4_SCLK	0x1b0b0
-			MX6UL_PAD_NAND_DATA05__ECSPI4_MOSI	0x1b0b0
-			MX6UL_PAD_NAND_DATA06__ECSPI4_MISO	0x1b0b0
-			MX6UL_PAD_NAND_DATA01__GPIO4_IO03	0x1b0b0
-			MX6UL_PAD_NAND_DATA07__GPIO4_IO09	0x1b0b0
-		>;
-	};
-
-	pinctrl_flexcan1: flexcan1grp {
-		fsl,pins = <
-			MX6UL_PAD_UART3_CTS_B__FLEXCAN1_TX	0x0b0b0
-			MX6UL_PAD_UART3_RTS_B__FLEXCAN1_RX	0x0b0b0
-		>;
-	};
-
-	pinctrl_flexcan2: flexcan2grp {
-		fsl,pins = <
-			MX6UL_PAD_UART2_CTS_B__FLEXCAN2_TX	0x0b0b0
-			MX6UL_PAD_UART2_RTS_B__FLEXCAN2_RX	0x0b0b0
-		>;
-	};
-
-	pinctrl_gpios: gpiosgrp {
-		fsl,pins = <
-			MX6UL_PAD_GPIO1_IO09__GPIO1_IO09	0x0b0b0
-			MX6UL_PAD_UART3_RX_DATA__GPIO1_IO25	0x0b0b0
-			MX6UL_PAD_UART3_TX_DATA__GPIO1_IO24	0x0b0b0
-			MX6UL_PAD_NAND_RE_B__GPIO4_IO00		0x0b0b0
-			MX6UL_PAD_GPIO1_IO08__GPIO1_IO08	0x0b0b0
-			MX6UL_PAD_UART1_CTS_B__GPIO1_IO18	0x0b0b0
-			MX6UL_PAD_UART1_RTS_B__GPIO1_IO19	0x0b0b0
-			MX6UL_PAD_NAND_WE_B__GPIO4_IO01		0x0b0b0
 			MX6UL_PAD_SNVS_TAMPER0__GPIO5_IO00	0x0b0b0
 			MX6UL_PAD_SNVS_TAMPER2__GPIO5_IO02	0x0b0b0
 			MX6UL_PAD_SNVS_TAMPER3__GPIO5_IO03	0x0b0b0
@@ -299,100 +27,6 @@
 		>;
 	};
 
-	pinctrl_gpio_keys: gpiokeysgrp {
-		fsl,pins = <
-			MX6UL_PAD_ENET2_TX_DATA0__GPIO2_IO11	0x0b0b0
-		>;
-	};
-
-	pinctrl_i2c1: i2c1grp {
-		fsl,pins = <
-			MX6UL_PAD_UART4_RX_DATA__I2C1_SDA	0x4001b8b0
-			MX6UL_PAD_UART4_TX_DATA__I2C1_SCL	0x4001b8b0
-		>;
-	};
-
-	pinctrl_i2c2: i2c2grp {
-		fsl,pins = <
-			MX6UL_PAD_UART5_RX_DATA__I2C2_SDA	0x4001b8b0
-			MX6UL_PAD_UART5_TX_DATA__I2C2_SCL	0x4001b8b0
-		>;
-	};
-
-	pinctrl_lcdif: lcdifgrp {
-		fsl,pins = <
-			MX6UL_PAD_LCD_CLK__LCDIF_CLK	    0x100b1
-			MX6UL_PAD_LCD_ENABLE__LCDIF_ENABLE  0x100b1
-			MX6UL_PAD_LCD_HSYNC__LCDIF_HSYNC    0x100b1
-			MX6UL_PAD_LCD_VSYNC__LCDIF_VSYNC    0x100b1
-			MX6UL_PAD_LCD_DATA00__LCDIF_DATA00  0x100b1
-			MX6UL_PAD_LCD_DATA01__LCDIF_DATA01  0x100b1
-			MX6UL_PAD_LCD_DATA02__LCDIF_DATA02  0x100b1
-			MX6UL_PAD_LCD_DATA03__LCDIF_DATA03  0x100b1
-			MX6UL_PAD_LCD_DATA04__LCDIF_DATA04  0x100b1
-			MX6UL_PAD_LCD_DATA05__LCDIF_DATA05  0x100b1
-			MX6UL_PAD_LCD_DATA06__LCDIF_DATA06  0x100b1
-			MX6UL_PAD_LCD_DATA07__LCDIF_DATA07  0x100b1
-			MX6UL_PAD_LCD_DATA08__LCDIF_DATA08  0x100b1
-			MX6UL_PAD_LCD_DATA09__LCDIF_DATA09  0x100b1
-			MX6UL_PAD_LCD_DATA10__LCDIF_DATA10  0x100b1
-			MX6UL_PAD_LCD_DATA11__LCDIF_DATA11  0x100b1
-			MX6UL_PAD_LCD_DATA12__LCDIF_DATA12  0x100b1
-			MX6UL_PAD_LCD_DATA13__LCDIF_DATA13  0x100b1
-			MX6UL_PAD_LCD_DATA14__LCDIF_DATA14  0x100b1
-			MX6UL_PAD_LCD_DATA15__LCDIF_DATA15  0x100b1
-			MX6UL_PAD_LCD_DATA16__LCDIF_DATA16  0x100b1
-			MX6UL_PAD_LCD_DATA17__LCDIF_DATA17  0x100b1
-		>;
-	};
-
-	pinctrl_led: ledgrp {
-		fsl,pins = <
-			MX6UL_PAD_LCD_RESET__GPIO3_IO04		0x0b0b0
-		>;
-	};
-
-	pinctrl_pwm3: pwm3grp {
-		fsl,pins = <
-			MX6UL_PAD_NAND_ALE__PWM3_OUT		0x1b0b0
-		>;
-	};
-
-	pinctrl_tsc: tscgrp {
-		fsl,pins = <
-			MX6UL_PAD_GPIO1_IO01__GPIO1_IO01       0xb0
-			MX6UL_PAD_GPIO1_IO02__GPIO1_IO02       0xb0
-			MX6UL_PAD_GPIO1_IO03__GPIO1_IO03       0xb0
-			MX6UL_PAD_GPIO1_IO04__GPIO1_IO04       0xb0
-		>;
-	};
-
-	pinctrl_uart1: uart1grp {
-		fsl,pins = <
-			MX6UL_PAD_UART1_TX_DATA__UART1_DCE_TX	0x1b0b1
-			MX6UL_PAD_UART1_RX_DATA__UART1_DCE_RX	0x1b0b1
-		>;
-	};
-
-	pinctrl_uart2: uart2grp {
-		fsl,pins = <
-			MX6UL_PAD_UART2_TX_DATA__UART2_DCE_TX	0x1b0b1
-			MX6UL_PAD_UART2_RX_DATA__UART2_DCE_RX	0x1b0b1
-		>;
-	};
-
-	pinctrl_usbotg1_id: usbotg1idgrp {
-		fsl,pins = <
-			MX6UL_PAD_GPIO1_IO00__ANATOP_OTG1_ID	0x1b0b0
-		>;
-	};
-
-	pinctrl_usbotg1_vbus: usbotg1vbusgrp {
-		fsl,pins = <
-			MX6UL_PAD_GPIO1_IO05__GPIO1_IO05	0x1b0b0
-		>;
-	};
-
 	pinctrl_usbotg2_vbus: usbotg2vbusgrp {
 		fsl,pins = <
 			MX6UL_PAD_SNVS_TAMPER9__GPIO5_IO09	0x1b0b0
diff --git a/arch/arm/boot/dts/imx6ul-phytec-phycore-som.dtsi b/arch/arm/boot/dts/imx6ul-phytec-phycore-som.dtsi
index 41f3b7f62bbf..88f631c8fabb 100644
--- a/arch/arm/boot/dts/imx6ul-phytec-phycore-som.dtsi
+++ b/arch/arm/boot/dts/imx6ul-phytec-phycore-som.dtsi
@@ -20,7 +20,7 @@
 	 * Set the minimum memory size here and
 	 * let the bootloader set the real size.
 	 */
-	memory {
+	memory@80000000 {
 		device_type = "memory";
 		reg = <0x80000000 0x8000000>;
 	};
diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi
index f008036e9294..d9fdca12819b 100644
--- a/arch/arm/boot/dts/imx6ul.dtsi
+++ b/arch/arm/boot/dts/imx6ul.dtsi
@@ -558,7 +558,7 @@
 
 			anatop: anatop@20c8000 {
 				compatible = "fsl,imx6ul-anatop", "fsl,imx6q-anatop",
-					     "syscon", "simple-bus";
+					     "syscon", "simple-mfd";
 				reg = <0x020c8000 0x1000>;
 				interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
@@ -711,6 +711,7 @@
 				clocks = <&clks IMX6UL_CLK_GPT2_BUS>,
 					 <&clks IMX6UL_CLK_GPT2_SERIAL>;
 				clock-names = "ipg", "per";
+				status = "disabled";
 			};
 
 			sdma: sdma@20ec000 {
diff --git a/arch/arm/boot/dts/imx6ull-colibri-eval-v3.dtsi b/arch/arm/boot/dts/imx6ull-colibri-eval-v3.dtsi
index b6147c76d159..a78849fd2afa 100644
--- a/arch/arm/boot/dts/imx6ull-colibri-eval-v3.dtsi
+++ b/arch/arm/boot/dts/imx6ull-colibri-eval-v3.dtsi
@@ -8,6 +8,20 @@
 		stdout-path = "serial0:115200n8";
 	};
 
+	gpio-keys {
+		compatible = "gpio-keys";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_snvs_gpiokeys>;
+
+		power {
+			label = "Wake-Up";
+			gpios = <&gpio5 1 GPIO_ACTIVE_HIGH>;
+			linux,code = <KEY_WAKEUP>;
+			debounce-interval = <10>;
+			wakeup-source;
+		};
+	};
+
 	/* fixed crystal dedicated to mcp2515 */
 	clk16m: clk16m {
 		compatible = "fixed-clock";
diff --git a/arch/arm/boot/dts/imx6ull-colibri-nonwifi.dtsi b/arch/arm/boot/dts/imx6ull-colibri-nonwifi.dtsi
index fb213bec4654..95a11b8bcbdb 100644
--- a/arch/arm/boot/dts/imx6ull-colibri-nonwifi.dtsi
+++ b/arch/arm/boot/dts/imx6ull-colibri-nonwifi.dtsi
@@ -15,7 +15,7 @@
 &iomuxc {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_gpio1 &pinctrl_gpio2 &pinctrl_gpio3
-		&pinctrl_gpio4 &pinctrl_gpio5 &pinctrl_gpio6>;
+		&pinctrl_gpio4 &pinctrl_gpio5 &pinctrl_gpio6 &pinctrl_gpio7>;
 };
 
 &iomuxc_snvs {
diff --git a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi
index 038d8c90f6df..a0545431b3dc 100644
--- a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi
+++ b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi
@@ -26,7 +26,7 @@
 &iomuxc {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_gpio1 &pinctrl_gpio2 &pinctrl_gpio3
-		&pinctrl_gpio4 &pinctrl_gpio5>;
+		&pinctrl_gpio4 &pinctrl_gpio5 &pinctrl_gpio7>;
 
 };
 
diff --git a/arch/arm/boot/dts/imx6ull-colibri.dtsi b/arch/arm/boot/dts/imx6ull-colibri.dtsi
index d56728f03c35..6d850d997e1e 100644
--- a/arch/arm/boot/dts/imx6ull-colibri.dtsi
+++ b/arch/arm/boot/dts/imx6ull-colibri.dtsi
@@ -54,6 +54,18 @@
 	vref-supply = <&reg_module_3v3_avdd>;
 };
 
+&can1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_flexcan1>;
+	status = "disabled";
+};
+
+&can2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_flexcan2>;
+	status = "disabled";
+};
+
 /* Colibri SPI */
 &ecspi1 {
 	cs-gpios = <&gpio3 26 GPIO_ACTIVE_HIGH>;
@@ -62,8 +74,9 @@
 };
 
 &fec2 {
-	pinctrl-names = "default";
+	pinctrl-names = "default", "sleep";
 	pinctrl-0 = <&pinctrl_enet2>;
+	pinctrl-1 = <&pinctrl_enet2_sleep>;
 	phy-mode = "rmii";
 	phy-handle = <&ethphy1>;
 	status = "okay";
@@ -198,6 +211,12 @@
 	assigned-clock-rates = <0>, <198000000>;
 };
 
+&wdog1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_wdog>;
+	fsl,ext-reset-output;
+};
+
 &iomuxc {
 	pinctrl_can_int: canint-grp {
 		fsl,pins = <
@@ -220,6 +239,21 @@
 		>;
 	};
 
+	pinctrl_enet2_sleep: enet2sleepgrp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO06__GPIO1_IO06	0x0
+			MX6UL_PAD_GPIO1_IO07__GPIO1_IO07	0x0
+			MX6UL_PAD_ENET2_RX_DATA0__GPIO2_IO08	0x0
+			MX6UL_PAD_ENET2_RX_DATA1__GPIO2_IO09	0x0
+			MX6UL_PAD_ENET2_RX_EN__GPIO2_IO10	0x0
+			MX6UL_PAD_ENET2_RX_ER__GPIO2_IO15	0x0
+			MX6UL_PAD_ENET2_TX_CLK__ENET2_REF_CLK2	0x4001b031
+			MX6UL_PAD_ENET2_TX_DATA0__GPIO2_IO11	0x0
+			MX6UL_PAD_ENET2_TX_DATA1__GPIO2_IO12	0x0
+			MX6UL_PAD_ENET2_TX_EN__GPIO2_IO13	0x0
+		>;
+	};
+
 	pinctrl_ecspi1_cs: ecspi1-cs-grp {
 		fsl,pins = <
 			MX6UL_PAD_LCD_DATA21__GPIO3_IO26	0x000a0
@@ -234,6 +268,13 @@
 		>;
 	};
 
+	pinctrl_flexcan1: flexcan1-grp {
+		fsl,pins = <
+			MX6UL_PAD_ENET1_RX_DATA0__FLEXCAN1_TX	0x1b020
+			MX6UL_PAD_ENET1_RX_DATA1__FLEXCAN1_RX	0x1b020
+		>;
+	};
+
 	pinctrl_flexcan2: flexcan2-grp {
 		fsl,pins = <
 			MX6UL_PAD_ENET1_TX_DATA0__FLEXCAN2_RX	0x1b020
@@ -249,8 +290,6 @@
 
 	pinctrl_gpio1: gpio1-grp {
 		fsl,pins = <
-			MX6UL_PAD_ENET1_RX_DATA0__GPIO2_IO00	0x74 /* SODIMM 55 */
-			MX6UL_PAD_ENET1_RX_DATA1__GPIO2_IO01	0x74 /* SODIMM 63 */
 			MX6UL_PAD_UART3_RX_DATA__GPIO1_IO25	0X14 /* SODIMM 77 */
 			MX6UL_PAD_JTAG_TCK__GPIO1_IO14		0x14 /* SODIMM 99 */
 			MX6UL_PAD_NAND_CE1_B__GPIO4_IO14	0x14 /* SODIMM 133 */
@@ -303,6 +342,13 @@
 		>;
 	};
 
+	pinctrl_gpio7: gpio7-grp { /* CAN1 */
+		fsl,pins = <
+			MX6UL_PAD_ENET1_RX_DATA0__GPIO2_IO00	0x74 /* SODIMM 55 */
+			MX6UL_PAD_ENET1_RX_DATA1__GPIO2_IO01	0x74 /* SODIMM 63 */
+		>;
+	};
+
 	pinctrl_gpmi_nand: gpmi-nand-grp {
 		fsl,pins = <
 			MX6UL_PAD_NAND_DATA00__RAWNAND_DATA00	0x100a9
@@ -490,6 +536,12 @@
 			MX6UL_PAD_GPIO1_IO03__OSC32K_32K_OUT	0x14
 		>;
 	};
+
+	pinctrl_wdog: wdog-grp {
+		fsl,pins = <
+			MX6UL_PAD_LCD_RESET__WDOG1_WDOG_ANY    0x30b0
+		>;
+	};
 };
 
 &iomuxc_snvs {
@@ -517,19 +569,19 @@
 
 	pinctrl_snvs_ad7879_int: snvs-ad7879-int-grp { /* TOUCH Interrupt */
 		fsl,pins = <
-			MX6ULL_PAD_SNVS_TAMPER7__GPIO5_IO07	0x1b0b0
+			MX6ULL_PAD_SNVS_TAMPER7__GPIO5_IO07	0x100b0
 		>;
 	};
 
 	pinctrl_snvs_reg_sd: snvs-reg-sd-grp {
 		fsl,pins = <
-			MX6ULL_PAD_SNVS_TAMPER9__GPIO5_IO09	0x4001b8b0
+			MX6ULL_PAD_SNVS_TAMPER9__GPIO5_IO09	0x400100b0
 		>;
 	};
 
 	pinctrl_snvs_usbc_det: snvs-usbc-det-grp {
 		fsl,pins = <
-			MX6ULL_PAD_SNVS_TAMPER2__GPIO5_IO02	0x1b0b0
+			MX6ULL_PAD_SNVS_TAMPER2__GPIO5_IO02	0x130b0
 		>;
 	};
 
diff --git a/arch/arm/boot/dts/imx6ull-kontron-n6411-s.dts b/arch/arm/boot/dts/imx6ull-kontron-n6411-s.dts
new file mode 100644
index 000000000000..57588a5e1e34
--- /dev/null
+++ b/arch/arm/boot/dts/imx6ull-kontron-n6411-s.dts
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 exceet electronics GmbH
+ * Copyright (C) 2019 Kontron Electronics GmbH
+ */
+
+/dts-v1/;
+
+#include "imx6ull-kontron-n6411-som.dtsi"
+#include "imx6ul-kontron-n6x1x-s.dtsi"
+
+/ {
+	model = "Kontron N6411 S";
+	compatible = "kontron,imx6ull-n6411-s", "kontron,imx6ull-n6411-som",
+		     "fsl,imx6ull";
+};
diff --git a/arch/arm/boot/dts/imx6ull-kontron-n6411-som.dtsi b/arch/arm/boot/dts/imx6ull-kontron-n6411-som.dtsi
new file mode 100644
index 000000000000..b7e984284e1a
--- /dev/null
+++ b/arch/arm/boot/dts/imx6ull-kontron-n6411-som.dtsi
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 exceet electronics GmbH
+ * Copyright (C) 2018 Kontron Electronics GmbH
+ */
+
+#include "imx6ull.dtsi"
+#include "imx6ul-kontron-n6x1x-som-common.dtsi"
+
+/ {
+	model = "Kontron N6411 SOM";
+	compatible = "kontron,imx6ull-n6311-som", "fsl,imx6ull";
+
+	memory@80000000 {
+		reg = <0x80000000 0x20000000>;
+		device_type = "memory";
+	};
+};
+
+&qspi {
+	spi-flash@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "spi-nand";
+		spi-max-frequency = <104000000>;
+		spi-tx-bus-width = <4>;
+		spi-rx-bus-width = <4>;
+		reg = <0>;
+
+		partition@0 {
+			label = "ubi1";
+			reg = <0x00000000 0x08000000>;
+		};
+
+		partition@8000000 {
+			label = "ubi2";
+			reg = <0x08000000 0x18000000>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/imx6ull-opos6ul.dtsi b/arch/arm/boot/dts/imx6ull-opos6ul.dtsi
new file mode 100644
index 000000000000..155f941f2811
--- /dev/null
+++ b/arch/arm/boot/dts/imx6ull-opos6ul.dtsi
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+//
+// Copyright 2019 Armadeus Systems <support@armadeus.com>
+
+#include "imx6ull.dtsi"
+#include "imx6ul-imx6ull-opos6ul.dtsi"
diff --git a/arch/arm/boot/dts/imx6ull-opos6uldev.dts b/arch/arm/boot/dts/imx6ull-opos6uldev.dts
new file mode 100644
index 000000000000..198fdb72641b
--- /dev/null
+++ b/arch/arm/boot/dts/imx6ull-opos6uldev.dts
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+//
+// Copyright 2019 Armadeus Systems <support@armadeus.com>
+
+/dts-v1/;
+#include "imx6ull-opos6ul.dtsi"
+#include "imx6ul-imx6ull-opos6uldev.dtsi"
+
+/ {
+	model = "Armadeus Systems OPOS6UL SoM (i.MX6ULL) on OPOS6ULDev board";
+	compatible = "armadeus,imx6ull-opos6uldev", "armadeus,imx6ull-opos6ul", "fsl,imx6ull";
+};
+
+&iomuxc_snvs {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_tamper_gpios>;
+
+	pinctrl_tamper_gpios: tampergpiosgrp {
+		fsl,pins = <
+			MX6ULL_PAD_SNVS_TAMPER0__GPIO5_IO00	0x0b0b0
+			MX6ULL_PAD_SNVS_TAMPER2__GPIO5_IO02	0x0b0b0
+			MX6ULL_PAD_SNVS_TAMPER3__GPIO5_IO03	0x0b0b0
+			MX6ULL_PAD_SNVS_TAMPER4__GPIO5_IO04	0x0b0b0
+			MX6ULL_PAD_SNVS_TAMPER5__GPIO5_IO05	0x0b0b0
+			MX6ULL_PAD_SNVS_TAMPER6__GPIO5_IO06	0x0b0b0
+			MX6ULL_PAD_SNVS_TAMPER7__GPIO5_IO07	0x0b0b0
+			MX6ULL_PAD_SNVS_TAMPER8__GPIO5_IO08	0x0b0b0
+		>;
+	};
+
+	pinctrl_usbotg2_vbus: usbotg2vbusgrp {
+		fsl,pins = <
+			MX6ULL_PAD_SNVS_TAMPER9__GPIO5_IO09	0x1b0b0
+		>;
+	};
+
+	pinctrl_w1: w1grp {
+		fsl,pins = <
+			MX6ULL_PAD_SNVS_TAMPER1__GPIO5_IO01	0x0b0b0
+		>;
+	};
+};
diff --git a/arch/arm/boot/dts/imx7-colibri-eval-v3.dtsi b/arch/arm/boot/dts/imx7-colibri-eval-v3.dtsi
index 3f2746169181..6aa123cbdadb 100644
--- a/arch/arm/boot/dts/imx7-colibri-eval-v3.dtsi
+++ b/arch/arm/boot/dts/imx7-colibri-eval-v3.dtsi
@@ -52,6 +52,20 @@
 		clock-frequency = <16000000>;
 	};
 
+	gpio-keys {
+		compatible = "gpio-keys";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_gpiokeys>;
+
+		power {
+			label = "Wake-Up";
+			gpios = <&gpio1 1 GPIO_ACTIVE_HIGH>;
+			linux,code = <KEY_WAKEUP>;
+			debounce-interval = <10>;
+			wakeup-source;
+		};
+	};
+
 	panel: panel {
 		compatible = "edt,et057090dhu";
 		backlight = <&bl>;
@@ -131,6 +145,21 @@
 &i2c4 {
 	status = "okay";
 
+	/*
+	 * Touchscreen is using SODIMM 28/30, also used for PWM<B>, PWM<C>,
+	 * aka pwm2, pwm3. so if you enable touchscreen, disable the pwms
+	 */
+	touchscreen@4a {
+		compatible = "atmel,maxtouch";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_gpiotouch>;
+		reg = <0x4a>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <9 IRQ_TYPE_EDGE_FALLING>;		/* SODIMM 28 */
+		reset-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>;	/* SODIMM 30 */
+		status = "disabled";
+	};
+
 	/* M41T0M6 real time clock on carrier board */
 	rtc: m41t0m6@68 {
 		compatible = "st,m41t0";
@@ -186,3 +215,12 @@
 	vmmc-supply = <&reg_3v3>;
 	status = "okay";
 };
+
+&iomuxc {
+	pinctrl_gpiotouch: touchgpios {
+		fsl,pins = <
+			MX7D_PAD_GPIO1_IO09__GPIO1_IO9		0x74
+			MX7D_PAD_GPIO1_IO10__GPIO1_IO10		0x14
+		>;
+	};
+};
diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi
index 917eb0b58b13..d05be3f0e2a7 100644
--- a/arch/arm/boot/dts/imx7-colibri.dtsi
+++ b/arch/arm/boot/dts/imx7-colibri.dtsi
@@ -322,7 +322,6 @@
 &usdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc1 &pinctrl_cd_usdhc1>;
-	no-1-8-v;
 	cd-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>;
 	disable-wp;
 	vqmmc-supply = <&reg_LDO2>;
@@ -667,6 +666,28 @@
 		>;
 	};
 
+	pinctrl_usdhc1_100mhz: usdhc1grp_100mhz {
+		fsl,pins = <
+			MX7D_PAD_SD1_CMD__SD1_CMD	0x5a
+			MX7D_PAD_SD1_CLK__SD1_CLK	0x1a
+			MX7D_PAD_SD1_DATA0__SD1_DATA0	0x5a
+			MX7D_PAD_SD1_DATA1__SD1_DATA1	0x5a
+			MX7D_PAD_SD1_DATA2__SD1_DATA2	0x5a
+			MX7D_PAD_SD1_DATA3__SD1_DATA3	0x5a
+		>;
+	};
+
+	pinctrl_usdhc1_200mhz: usdhc1grp_200mhz {
+		fsl,pins = <
+			MX7D_PAD_SD1_CMD__SD1_CMD	0x5b
+			MX7D_PAD_SD1_CLK__SD1_CLK	0x1b
+			MX7D_PAD_SD1_DATA0__SD1_DATA0	0x5b
+			MX7D_PAD_SD1_DATA1__SD1_DATA1	0x5b
+			MX7D_PAD_SD1_DATA2__SD1_DATA2	0x5b
+			MX7D_PAD_SD1_DATA3__SD1_DATA3	0x5b
+		>;
+	};
+
 	pinctrl_usdhc3: usdhc3grp {
 		fsl,pins = <
 			MX7D_PAD_SD3_CMD__SD3_CMD		0x59
@@ -737,12 +758,17 @@
 
 	pinctrl_gpio_lpsr: gpio1-grp {
 		fsl,pins = <
-			MX7D_PAD_LPSR_GPIO1_IO01__GPIO1_IO1	0x59
 			MX7D_PAD_LPSR_GPIO1_IO02__GPIO1_IO2	0x59
 			MX7D_PAD_LPSR_GPIO1_IO03__GPIO1_IO3	0x59
 		>;
 	};
 
+	pinctrl_gpiokeys: gpiokeysgrp {
+		fsl,pins = <
+			MX7D_PAD_LPSR_GPIO1_IO01__GPIO1_IO1	0x19
+		>;
+	};
+
 	pinctrl_i2c1: i2c1-grp {
 		fsl,pins = <
 			MX7D_PAD_LPSR_GPIO1_IO05__I2C1_SDA	0x4000007f
diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi
index 9c8dd32cc035..d8acd7cc7918 100644
--- a/arch/arm/boot/dts/imx7d.dtsi
+++ b/arch/arm/boot/dts/imx7d.dtsi
@@ -22,6 +22,7 @@
 			reg = <1>;
 			clock-frequency = <996000000>;
 			operating-points-v2 = <&cpu0_opp_table>;
+			#cooling-cells = <2>;
 			cpu-idle-states = <&cpu_sleep_wait>;
 		};
 	};
@@ -43,7 +44,8 @@
 			opp-hz = /bits/ 64 <792000000>;
 			opp-microvolt = <1000000>;
 			clock-latency-ns = <150000>;
-			opp-supported-hw = <0xf>, <0xf>;
+			opp-supported-hw = <0xd>, <0xf>;
+			opp-suspend;
 		};
 
 		opp-996000000 {
@@ -51,6 +53,7 @@
 			opp-microvolt = <1100000>;
 			clock-latency-ns = <150000>;
 			opp-supported-hw = <0xc>, <0xf>;
+			opp-suspend;
 		};
 
 		opp-1200000000 {
@@ -58,6 +61,7 @@
 			opp-microvolt = <1225000>;
 			clock-latency-ns = <150000>;
 			opp-supported-hw = <0x8>, <0xf>;
+			opp-suspend;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/imx7s-colibri.dtsi b/arch/arm/boot/dts/imx7s-colibri.dtsi
index 1fb1ec5d3d70..6d16e32aed89 100644
--- a/arch/arm/boot/dts/imx7s-colibri.dtsi
+++ b/arch/arm/boot/dts/imx7s-colibri.dtsi
@@ -49,3 +49,7 @@
 		reg = <0x80000000 0x10000000>;
 	};
 };
+
+&gpmi {
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi
index e2e604d6ba0b..1b812f4e7453 100644
--- a/arch/arm/boot/dts/imx7s.dtsi
+++ b/arch/arm/boot/dts/imx7s.dtsi
@@ -559,7 +559,7 @@
 
 			anatop: anatop@30360000 {
 				compatible = "fsl,imx7d-anatop", "fsl,imx6q-anatop",
-					"syscon", "simple-bus";
+					"syscon", "simple-mfd";
 				reg = <0x30360000 0x10000>;
 				interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>,
 					<GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/boot/dts/imx7ulp-evk.dts b/arch/arm/boot/dts/imx7ulp-evk.dts
index 4245b33bb451..a863a2b337d6 100644
--- a/arch/arm/boot/dts/imx7ulp-evk.dts
+++ b/arch/arm/boot/dts/imx7ulp-evk.dts
@@ -77,6 +77,8 @@
 };
 
 &usdhc0 {
+	assigned-clocks = <&pcc2 IMX7ULP_CLK_USDHC0>;
+	assigned-clock-parents = <&scg1 IMX7ULP_CLK_APLL_PFD1>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc0>;
 	cd-gpios = <&gpio_ptc 10 GPIO_ACTIVE_LOW>;
diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi
index 6859a3a83750..ab91c98f2124 100644
--- a/arch/arm/boot/dts/imx7ulp.dtsi
+++ b/arch/arm/boot/dts/imx7ulp.dtsi
@@ -37,10 +37,10 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu0: cpu@0 {
+		cpu0: cpu@f00 {
 			compatible = "arm,cortex-a7";
 			device_type = "cpu";
-			reg = <0>;
+			reg = <0xf00>;
 		};
 	};
 
@@ -87,13 +87,6 @@
 		#clock-cells = <0>;
 	};
 
-	mpll: clock-mpll {
-		compatible = "fixed-clock";
-		clock-frequency = <480000000>;
-		clock-output-names = "mpll";
-		#clock-cells = <0>;
-	};
-
 	ahbbridge0: bus@40000000 {
 		compatible = "simple-bus";
 		#address-cells = <1>;
@@ -230,8 +223,6 @@
 				 <&scg1 IMX7ULP_CLK_NIC1_DIV>,
 				 <&pcc2 IMX7ULP_CLK_USDHC0>;
 			clock-names = "ipg", "ahb", "per";
-			assigned-clocks = <&pcc2 IMX7ULP_CLK_USDHC0>;
-			assigned-clock-parents = <&scg1 IMX7ULP_CLK_NIC1_DIV>;
 			bus-width = <4>;
 			fsl,tuning-start-tap = <20>;
 			fsl,tuning-step = <2>;
@@ -246,8 +237,6 @@
 				 <&scg1 IMX7ULP_CLK_NIC1_DIV>,
 				 <&pcc2 IMX7ULP_CLK_USDHC1>;
 			clock-names = "ipg", "ahb", "per";
-			assigned-clocks = <&pcc2 IMX7ULP_CLK_USDHC1>;
-			assigned-clock-parents = <&scg1 IMX7ULP_CLK_NIC1_DIV>;
 			bus-width = <4>;
 			fsl,tuning-start-tap = <20>;
 			fsl,tuning-step = <2>;
@@ -258,12 +247,22 @@
 			compatible = "fsl,imx7ulp-scg1";
 			reg = <0x403e0000 0x10000>;
 			clocks = <&rosc>, <&sosc>, <&sirc>,
-				 <&firc>, <&upll>, <&mpll>;
+				 <&firc>, <&upll>;
 			clock-names = "rosc", "sosc", "sirc",
-				      "firc", "upll", "mpll";
+				      "firc", "upll";
 			#clock-cells = <1>;
 		};
 
+		wdog1: watchdog@403d0000 {
+			compatible = "fsl,imx7ulp-wdt";
+			reg = <0x403d0000 0x10000>;
+			interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
+			assigned-clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
+			assigned-clocks-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>;
+			timeout-sec = <40>;
+		};
+
 		pcc2: clock-controller@403f0000 {
 			compatible = "fsl,imx7ulp-pcc2";
 			reg = <0x403f0000 0x10000>;
@@ -276,13 +275,12 @@
 				 <&scg1 IMX7ULP_CLK_APLL_PFD0>,
 				 <&scg1 IMX7ULP_CLK_UPLL>,
 				 <&scg1 IMX7ULP_CLK_SOSC_BUS_CLK>,
-				 <&scg1 IMX7ULP_CLK_MIPI_PLL>,
 				 <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>,
 				 <&scg1 IMX7ULP_CLK_ROSC>,
 				 <&scg1 IMX7ULP_CLK_SPLL_BUS_CLK>;
 			clock-names = "nic1_bus_clk", "nic1_clk", "ddr_clk",
 				      "apll_pfd2", "apll_pfd1", "apll_pfd0",
-				      "upll", "sosc_bus_clk", "mpll",
+				      "upll", "sosc_bus_clk",
 				      "firc_bus_clk", "rosc", "spll_bus_clk";
 			assigned-clocks = <&pcc2 IMX7ULP_CLK_LPTPM5>;
 			assigned-clock-parents = <&scg1 IMX7ULP_CLK_SOSC_BUS_CLK>;
@@ -309,13 +307,12 @@
 				 <&scg1 IMX7ULP_CLK_APLL_PFD0>,
 				 <&scg1 IMX7ULP_CLK_UPLL>,
 				 <&scg1 IMX7ULP_CLK_SOSC_BUS_CLK>,
-				 <&scg1 IMX7ULP_CLK_MIPI_PLL>,
 				 <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>,
 				 <&scg1 IMX7ULP_CLK_ROSC>,
 				 <&scg1 IMX7ULP_CLK_SPLL_BUS_CLK>;
 			clock-names = "nic1_bus_clk", "nic1_clk", "ddr_clk",
 				      "apll_pfd2", "apll_pfd1", "apll_pfd0",
-				      "upll", "sosc_bus_clk", "mpll",
+				      "upll", "sosc_bus_clk",
 				      "firc_bus_clk", "rosc", "spll_bus_clk";
 		};
 	};
diff --git a/arch/arm/boot/dts/keystone-clocks.dtsi b/arch/arm/boot/dts/keystone-clocks.dtsi
index 457515b0736a..0397c3423d2d 100644
--- a/arch/arm/boot/dts/keystone-clocks.dtsi
+++ b/arch/arm/boot/dts/keystone-clocks.dtsi
@@ -408,4 +408,31 @@ clocks {
 		reg-names = "control", "domain";
 		domain-id = <0>;
 	};
+
+	/*
+	 * Below are set of fixed, input clocks definitions,
+	 * for which real frequencies have to be defined in board files.
+	 * Those clocks can be used as reference clocks for some HW modules
+	 * (as cpts, for example) by configuring corresponding clock muxes.
+	 */
+	timi0: timi0 {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <0>;
+		clock-output-names = "timi0";
+	};
+
+	timi1: timi1 {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <0>;
+		clock-output-names = "timi1";
+	};
+
+	tsrefclk: tsrefclk {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <0>;
+		clock-output-names = "tsrefclk";
+	};
 };
diff --git a/arch/arm/boot/dts/keystone-k2e-clocks.dtsi b/arch/arm/boot/dts/keystone-k2e-clocks.dtsi
index f7592155a740..cf30e007fea3 100644
--- a/arch/arm/boot/dts/keystone-k2e-clocks.dtsi
+++ b/arch/arm/boot/dts/keystone-k2e-clocks.dtsi
@@ -71,4 +71,24 @@ clocks {
 		reg-names = "control", "domain";
 		domain-id = <29>;
 	};
+
+	/*
+	 * Below are set of fixed, input clocks definitions,
+	 * for which real frequencies have to be defined in board files.
+	 * Those clocks can be used as reference clocks for some HW modules
+	 * (as cpts, for example) by configuring corresponding clock muxes.
+	 */
+	tsipclka: tsipclka {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <0>;
+		clock-output-names = "tsipclka";
+	};
+
+	tsipclkb: tsipclkb {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <0>;
+		clock-output-names = "tsipclkb";
+	};
 };
diff --git a/arch/arm/boot/dts/keystone-k2e-netcp.dtsi b/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
index 1db17ec744b1..ad15e77874b1 100644
--- a/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
+++ b/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
@@ -135,8 +135,8 @@ netcp: netcp@24000000 {
 	/* NetCP address range */
 	ranges = <0 0x24000000 0x1000000>;
 
-	clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>;
-	clock-names = "pa_clk", "ethss_clk", "cpts";
+	clocks = <&clkpa>, <&clkcpgmac>;
+	clock-names = "pa_clk", "ethss_clk";
 	dma-coherent;
 
 	ti,navigator-dmas = <&dma_gbe 0>,
@@ -156,6 +156,23 @@ netcp: netcp@24000000 {
 			tx-queue = <896>;
 			tx-channel = "nettx";
 
+			cpts {
+				clocks = <&cpts_refclk_mux>;
+				clock-names = "cpts";
+
+				cpts_refclk_mux: cpts-refclk-mux {
+					#clock-cells = <0>;
+					clocks = <&chipclk12>, <&chipclk13>,
+						 <&timi0>, <&timi1>,
+						 <&tsipclka>, <&tsrefclk>,
+						 <&tsipclkb>;
+					ti,mux-tbl = <0x0>, <0x1>, <0x2>,
+						<0x3>, <0x4>, <0x8>, <0xC>;
+					assigned-clocks = <&cpts_refclk_mux>;
+					assigned-clock-parents = <&chipclk12>;
+				};
+			};
+
 			interfaces {
 				gbe0: interface-0 {
 					slave-port = <0>;
diff --git a/arch/arm/boot/dts/keystone-k2hk-netcp.dtsi b/arch/arm/boot/dts/keystone-k2hk-netcp.dtsi
index e203145acbea..d5a6c1f5633c 100644
--- a/arch/arm/boot/dts/keystone-k2hk-netcp.dtsi
+++ b/arch/arm/boot/dts/keystone-k2hk-netcp.dtsi
@@ -152,8 +152,8 @@ netcp: netcp@2000000 {
 	/* NetCP address range */
 	ranges  = <0 0x2000000 0x100000>;
 
-	clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>;
-	clock-names = "pa_clk", "ethss_clk", "cpts";
+	clocks = <&clkpa>, <&clkcpgmac>;
+	clock-names = "pa_clk", "ethss_clk";
 	dma-coherent;
 
 	ti,navigator-dmas = <&dma_gbe 22>,
@@ -175,6 +175,22 @@ netcp: netcp@2000000 {
 			tx-queue = <648>;
 			tx-channel = "nettx";
 
+			cpts {
+				clocks = <&cpts_refclk_mux>;
+				clock-names = "cpts";
+
+				cpts_refclk_mux: cpts-refclk-mux {
+					#clock-cells = <0>;
+					clocks = <&chipclk12>, <&chipclk13>,
+						 <&timi0>, <&timi1>,
+						 <&tsrefclk>;
+					ti,mux-tbl = <0x0>, <0x1>, <0x2>,
+						<0x3>, <0x8>;
+					assigned-clocks = <&cpts_refclk_mux>;
+					assigned-clock-parents = <&chipclk12>;
+				};
+			};
+
 			interfaces {
 				gbe0: interface-0 {
 					slave-port = <0>;
diff --git a/arch/arm/boot/dts/keystone-k2l-netcp.dtsi b/arch/arm/boot/dts/keystone-k2l-netcp.dtsi
index a2e47bad3307..c1f982604145 100644
--- a/arch/arm/boot/dts/keystone-k2l-netcp.dtsi
+++ b/arch/arm/boot/dts/keystone-k2l-netcp.dtsi
@@ -134,8 +134,8 @@ netcp: netcp@26000000 {
 	/* NetCP address range */
 	ranges = <0 0x26000000 0x1000000>;
 
-	clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>;
-	clock-names = "pa_clk", "ethss_clk", "cpts";
+	clocks = <&clkpa>, <&clkcpgmac>;
+	clock-names = "pa_clk", "ethss_clk";
 	dma-coherent;
 
 	ti,navigator-dmas = <&dma_gbe 0>,
@@ -155,6 +155,22 @@ netcp: netcp@26000000 {
 			tx-queue = <896>;
 			tx-channel = "nettx";
 
+			cpts {
+				clocks = <&cpts_refclk_mux>;
+				clock-names = "cpts";
+
+				cpts_refclk_mux: cpts-refclk-mux {
+					#clock-cells = <0>;
+					clocks = <&chipclk12>, <&chipclk13>,
+						 <&timi0>, <&timi1>,
+						 <&tsrefclk>;
+					ti,mux-tbl = <0x0>, <0x1>, <0x2>,
+						<0x3>, <0x8>;
+					assigned-clocks = <&cpts_refclk_mux>;
+					assigned-clock-parents = <&chipclk12>;
+				};
+			};
+
 			interfaces {
 				gbe0: interface-0 {
 					slave-port = <0>;
diff --git a/arch/arm/boot/dts/kirkwood-synology.dtsi b/arch/arm/boot/dts/kirkwood-synology.dtsi
index c97ed29a0a0b..217bd374e52b 100644
--- a/arch/arm/boot/dts/kirkwood-synology.dtsi
+++ b/arch/arm/boot/dts/kirkwood-synology.dtsi
@@ -244,7 +244,7 @@
 
 			rs5c372: rs5c372@32 {
 				status = "disabled";
-				compatible = "ricoh,rs5c372";
+				compatible = "ricoh,rs5c372a";
 				reg = <0x32>;
 			};
 
diff --git a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
index f7a841a28865..2a0a98fe67f0 100644
--- a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
@@ -9,5 +9,5 @@
 
 / {
 	model = "LogicPD Zoom OMAP35xx SOM-LV Development Kit";
-	compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3";
+	compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3430", "ti,omap3";
 };
diff --git a/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts
index 7675bc3fa868..57bae2aa910e 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts
@@ -9,5 +9,5 @@
 
 / {
 	model = "LogicPD Zoom OMAP35xx Torpedo Development Kit";
-	compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap3";
+	compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap3430", "ti,omap3";
 };
diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit-28.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit-28.dts
index 07ac99b9cda6..b5536132971f 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit-28.dts
+++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit-28.dts
@@ -11,22 +11,5 @@
 #include "logicpd-torpedo-37xx-devkit.dts"
 
 &lcd0 {
-
-	label = "28";
-
-	panel-timing {
-		clock-frequency = <9000000>;
-		hactive = <480>;
-		vactive = <272>;
-		hfront-porch = <3>;
-		hback-porch = <2>;
-		hsync-len = <42>;
-		vback-porch = <3>;
-		vfront-porch = <2>;
-		vsync-len = <11>;
-		hsync-active = <1>;
-		vsync-active = <1>;
-		de-active = <1>;
-		pixelclk-active = <0>;
-	};
+	compatible = "logicpd,type28";
 };
diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
index 18c27e85051f..5532db04046c 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
@@ -50,6 +50,20 @@
 	};
 };
 
+&uart2 {
+	/delete-property/dma-names;
+	bluetooth {
+		compatible = "ti,wl1283-st";
+		enable-gpios = <&gpio6 2 GPIO_ACTIVE_HIGH>; /* gpio 162 */
+		max-speed = <3000000>;
+	};
+};
+
+/* The DM3730 has a faster L3 than OMAP35, so increase pixel clock */
+&mt9p031_out {
+	pixel-clock-frequency = <90000000>;
+};
+
 &omap3_pmx_core {
 	mmc3_pins: pinmux_mm3_pins {
 		pinctrl-single,pins = <
diff --git a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi
index 449cc7616da6..f7b82ced4080 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi
@@ -101,6 +101,12 @@
 	};
 };
 
+&hdqw1w {
+	pinctrl-names = "default";
+	pinctrl-0 = <&hdq_pins>;
+};
+
+
 &vpll2 {
 	regulator-always-on;
 };
@@ -108,6 +114,7 @@
 &dss {
 	status = "ok";
 	vdds_dsi-supply = <&vpll2>;
+	vdda_video-supply = <&vpll2>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&dss_dpi_pins1>;
 	port {
@@ -125,7 +132,6 @@
 
 	lcd0: display {
 		/* This isn't the exact LCD, but the timings meet spec */
-		/* To make it work, set CONFIG_OMAP2_DSS_MIN_FCK_PER_PCK=4 */
 		compatible = "newhaven,nhd-4.3-480272ef-atxl";
 		label = "15";
 		pinctrl-names = "default";
@@ -168,6 +174,12 @@
 		>;
 	};
 
+	hdq_pins: hdq_pins {
+		pinctrl-single,pins = <
+			OMAP3_CORE1_IOPAD(0x21c6, PIN_INPUT_PULLUP | MUX_MODE0) /* hdq_sio */
+		>;
+	};
+
 	pwm_pins: pinmux_pwm_pins {
 		pinctrl-single,pins = <
 			OMAP3_CORE1_IOPAD(0x20B8, PIN_OUTPUT | PIN_OFF_OUTPUT_LOW | MUX_MODE3)       /* gpmc_ncs5.gpt_10_pwm_evt */
diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
index 506b118e511a..3a5228562b0d 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
@@ -35,6 +35,11 @@
 	};
 };
 
+/* The Torpedo doesn't route the USB host pins */
+&usbhshost {
+	status = "disabled";
+};
+
 &gpmc {
 	ranges = <0 0 0x30000000 0x1000000>;	/* CS0: 16MB for NAND */
 
diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi
index 5a7e3e5caebe..3c534cd50ee3 100644
--- a/arch/arm/boot/dts/meson8.dtsi
+++ b/arch/arm/boot/dts/meson8.dtsi
@@ -253,7 +253,7 @@
 &aobus {
 	pmu: pmu@e0 {
 		compatible = "amlogic,meson8-pmu", "syscon";
-		reg = <0xe0 0x8>;
+		reg = <0xe0 0x18>;
 	};
 
 	pinctrl_aobus: pinctrl@84 {
diff --git a/arch/arm/boot/dts/mmp3-dell-ariel.dts b/arch/arm/boot/dts/mmp3-dell-ariel.dts
new file mode 100644
index 000000000000..c1947b5a688d
--- /dev/null
+++ b/arch/arm/boot/dts/mmp3-dell-ariel.dts
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+/*
+ * Dell Wyse 3020 a.k.a. "Ariel" a.k.a. Tx0D (T00D, T10D)
+ *
+ * Copyright (C) 2019 Lubomir Rintel <lkundrak@v3.sk>
+ */
+
+/dts-v1/;
+#include "mmp3.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+	model = "Dell Ariel";
+	compatible = "dell,wyse-ariel", "marvell,mmp3";
+
+	aliases {
+		serial2 = &uart3;
+	};
+
+	chosen {
+		#address-cells = <0x1>;
+		#size-cells = <0x1>;
+		ranges;
+		bootargs = "earlyprintk=ttyS2,115200 console=ttyS2,115200";
+	};
+
+	memory@0 {
+		linux,usable-memory = <0x0 0x7f600000>;
+		available = <0x7f700000 0x7ff00000 0x00000000 0x7f600000>;
+		reg = <0x0 0x80000000>;
+		device_type = "memory";
+	};
+};
+
+&uart3 {
+	status = "okay";
+};
+
+&rtc {
+	status = "okay";
+};
+
+&usb_otg0 {
+	status = "okay";
+};
+
+&usb_otg_phy0 {
+	status = "okay";
+};
+
+&mmc3 {
+	status = "okay";
+	max-frequency = <50000000>;
+	status = "okay";
+	bus-width = <8>;
+	non-removable;
+	cap-mmc-highspeed;
+};
+
+&twsi1 {
+	status = "okay";
+
+	rtc@68 {
+		compatible = "dallas,ds1338";
+		reg = <0x68>;
+		status = "okay";
+	};
+};
+
+&twsi3 {
+	status = "okay";
+};
+
+&twsi4 {
+	status = "okay";
+};
+
+&ssp3 {
+	status = "okay";
+	cs-gpios = <&gpio 46 GPIO_ACTIVE_HIGH>;
+
+	firmware-flash@0 {
+		compatible = "st,m25p80", "jedec,spi-nor";
+		reg = <0>;
+		spi-max-frequency = <40000000>;
+		m25p,fast-read;
+	};
+};
+
+&ssp4 {
+	cs-gpios = <&gpio 56 GPIO_ACTIVE_HIGH>;
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/mmp3.dtsi b/arch/arm/boot/dts/mmp3.dtsi
new file mode 100644
index 000000000000..6f480827b94d
--- /dev/null
+++ b/arch/arm/boot/dts/mmp3.dtsi
@@ -0,0 +1,527 @@
+// SPDX-License-Identifier: GPL-2.0+ OR MIT
+/*
+ *  Copyright (C) 2019 Lubomir Rintel <lkundrak@v3.sk>
+ */
+
+#include <dt-bindings/clock/marvell,mmp2.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		enable-method = "marvell,mmp3-smp";
+
+		cpu@0 {
+			compatible = "marvell,pj4b";
+			device_type = "cpu";
+			next-level-cache = <&l2>;
+			reg = <0>;
+		};
+
+		cpu@1 {
+			compatible = "marvell,pj4b";
+			device_type = "cpu";
+			next-level-cache = <&l2>;
+			reg = <1>;
+		};
+	};
+
+	soc {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "simple-bus";
+		interrupt-parent = <&gic>;
+		ranges;
+
+		axi@d4200000 {
+			compatible = "simple-bus";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0xd4200000 0x00200000>;
+			ranges;
+
+			interrupt-controller@d4282000 {
+				compatible = "marvell,mmp3-intc";
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0xd4282000 0x1000>,
+				      <0xd4284000 0x100>;
+				mrvl,intc-nr-irqs = <64>;
+			};
+
+			pmic_mux: interrupt-controller@d4282150 {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x150 0x4>, <0x168 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <4>;
+			};
+
+			rtc_mux: interrupt-controller@d4282154 {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x154 0x4>, <0x16c 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <2>;
+			};
+
+			hsi3_mux: interrupt-controller@d42821bc {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x1bc 0x4>, <0x1a4 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <3>;
+			};
+
+			gpu_mux: interrupt-controller@d42821c0 {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x1c0 0x4>, <0x1a8 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <3>;
+			};
+
+			twsi_mux: interrupt-controller@d4282158 {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x158 0x4>, <0x170 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <5>;
+			};
+
+			hsi2_mux: interrupt-controller@d42821c4 {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x1c4 0x4>, <0x1ac 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <2>;
+			};
+
+			dxo_mux: interrupt-controller@d42821c8 {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x1c8 0x4>, <0x1b0 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <2>;
+			};
+
+			misc1_mux: interrupt-controller@d428215c {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x15c 0x4>, <0x174 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <31>;
+			};
+
+			ci_mux: interrupt-controller@d42821cc {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x1cc 0x4>, <0x1b4 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <2>;
+			};
+
+			ssp_mux: interrupt-controller@d4282160 {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x160 0x4>, <0x178 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <2>;
+			};
+
+			hsi1_mux: interrupt-controller@d4282184 {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x184 0x4>, <0x17c 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <4>;
+			};
+
+			misc2_mux: interrupt-controller@d4282188 {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x188 0x4>, <0x180 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <20>;
+			};
+
+			hsi0_mux: interrupt-controller@d42821d0 {
+				compatible = "mrvl,mmp2-mux-intc";
+				interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				reg = <0x1d0 0x4>, <0x1b8 0x4>;
+				reg-names = "mux status", "mux mask";
+				mrvl,intc-nr-irqs = <5>;
+			};
+
+			usb_otg_phy0: usb-otg-phy@d4207000 {
+				compatible = "marvell,mmp3-usb-phy";
+				reg = <0xd4207000 0x40>;
+				#phy-cells = <0>;
+				status = "disabled";
+			};
+
+			usb_otg0: usb-otg@d4208000 {
+				compatible = "marvell,pxau2o-ehci";
+				reg = <0xd4208000 0x200>;
+				interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&soc_clocks MMP2_CLK_USB>;
+				clock-names = "USBCLK";
+				phys = <&usb_otg_phy0>;
+				phy-names = "usb";
+				status = "disabled";
+			};
+
+			mmc1: mmc@d4280000 {
+				compatible = "mrvl,pxav3-mmc";
+				reg = <0xd4280000 0x120>;
+				clocks = <&soc_clocks MMP2_CLK_SDH0>;
+				clock-names = "io";
+				interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+			};
+
+			mmc2: mmc@d4280800 {
+				compatible = "mrvl,pxav3-mmc";
+				reg = <0xd4280800 0x120>;
+				clocks = <&soc_clocks MMP2_CLK_SDH1>;
+				clock-names = "io";
+				interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+			};
+
+			mmc3: mmc@d4281000 {
+				compatible = "mrvl,pxav3-mmc";
+				reg = <0xd4281000 0x120>;
+				clocks = <&soc_clocks MMP2_CLK_SDH2>;
+				clock-names = "io";
+				interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+			};
+
+			mmc4: mmc@d4281800 {
+				compatible = "mrvl,pxav3-mmc";
+				reg = <0xd4281800 0x120>;
+				clocks = <&soc_clocks MMP2_CLK_SDH3>;
+				clock-names = "io";
+				interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+			};
+
+			camera0: camera@d420a000 {
+				compatible = "marvell,mmp2-ccic";
+				reg = <0xd420a000 0x800>;
+				interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&soc_clocks MMP2_CLK_CCIC0>;
+				clock-names = "axi";
+				#clock-cells = <0>;
+				clock-output-names = "mclk";
+				status = "disabled";
+			};
+
+			camera1: camera@d420a800 {
+				compatible = "marvell,mmp2-ccic";
+				reg = <0xd420a800 0x800>;
+				interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&soc_clocks MMP2_CLK_CCIC1>;
+				clock-names = "axi";
+				#clock-cells = <0>;
+				clock-output-names = "mclk";
+				status = "disabled";
+			};
+		};
+
+		apb@d4000000 {
+			compatible = "simple-bus";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0xd4000000 0x00200000>;
+			ranges;
+
+			timer: timer@d4014000 {
+				compatible = "mrvl,mmp-timer";
+				reg = <0xd4014000 0x100>;
+				interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&soc_clocks MMP2_CLK_TIMER>;
+			};
+
+			uart1: uart@d4030000 {
+				compatible = "mrvl,mmp-uart";
+				reg = <0xd4030000 0x1000>;
+				interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&soc_clocks MMP2_CLK_UART0>;
+				resets = <&soc_clocks MMP2_CLK_UART0>;
+				reg-shift = <2>;
+				status = "disabled";
+			};
+
+			uart2: uart@d4017000 {
+				compatible = "mrvl,mmp-uart";
+				reg = <0xd4017000 0x1000>;
+				interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&soc_clocks MMP2_CLK_UART1>;
+				resets = <&soc_clocks MMP2_CLK_UART1>;
+				reg-shift = <2>;
+				status = "disabled";
+			};
+
+			uart3: uart@d4018000 {
+				compatible = "mrvl,mmp-uart";
+				reg = <0xd4018000 0x1000>;
+				interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&soc_clocks MMP2_CLK_UART2>;
+				resets = <&soc_clocks MMP2_CLK_UART2>;
+				reg-shift = <2>;
+				status = "disabled";
+			};
+
+			uart4: uart@d4016000 {
+				compatible = "mrvl,mmp-uart";
+				reg = <0xd4016000 0x1000>;
+				interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&soc_clocks MMP2_CLK_UART3>;
+				resets = <&soc_clocks MMP2_CLK_UART3>;
+				reg-shift = <2>;
+				status = "disabled";
+			};
+
+			gpio: gpio@d4019000 {
+				compatible = "marvell,mmp2-gpio";
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xd4019000 0x1000>;
+				gpio-controller;
+				#gpio-cells = <2>;
+				interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-names = "gpio_mux";
+				clocks = <&soc_clocks MMP2_CLK_GPIO>;
+				resets = <&soc_clocks MMP2_CLK_GPIO>;
+				interrupt-controller;
+				#interrupt-cells = <2>;
+				ranges;
+
+				gcb0: gpio@d4019000 {
+					reg = <0xd4019000 0x4>;
+				};
+
+				gcb1: gpio@d4019004 {
+					reg = <0xd4019004 0x4>;
+				};
+
+				gcb2: gpio@d4019008 {
+					reg = <0xd4019008 0x4>;
+				};
+
+				gcb3: gpio@d4019100 {
+					reg = <0xd4019100 0x4>;
+				};
+
+				gcb4: gpio@d4019104 {
+					reg = <0xd4019104 0x4>;
+				};
+
+				gcb5: gpio@d4019108 {
+					reg = <0xd4019108 0x4>;
+				};
+			};
+
+			twsi1: i2c@d4011000 {
+				compatible = "mrvl,mmp-twsi";
+				reg = <0xd4011000 0x70>;
+				interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&soc_clocks MMP2_CLK_TWSI0>;
+				resets = <&soc_clocks MMP2_CLK_TWSI0>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				mrvl,i2c-fast-mode;
+				status = "disabled";
+			};
+
+			twsi2: i2c@d4031000 {
+				compatible = "mrvl,mmp-twsi";
+				reg = <0xd4031000 0x70>;
+				interrupt-parent = <&twsi_mux>;
+				interrupts = <0>;
+				clocks = <&soc_clocks MMP2_CLK_TWSI1>;
+				resets = <&soc_clocks MMP2_CLK_TWSI1>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				status = "disabled";
+			};
+
+			twsi3: i2c@d4032000 {
+				compatible = "mrvl,mmp-twsi";
+				reg = <0xd4032000 0x70>;
+				interrupt-parent = <&twsi_mux>;
+				interrupts = <1>;
+				clocks = <&soc_clocks MMP2_CLK_TWSI2>;
+				resets = <&soc_clocks MMP2_CLK_TWSI2>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				status = "disabled";
+			};
+
+			twsi4: i2c@d4033000 {
+				compatible = "mrvl,mmp-twsi";
+				reg = <0xd4033000 0x70>;
+				interrupt-parent = <&twsi_mux>;
+				interrupts = <2>;
+				clocks = <&soc_clocks MMP2_CLK_TWSI3>;
+				resets = <&soc_clocks MMP2_CLK_TWSI3>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				status = "disabled";
+			};
+
+
+			twsi5: i2c@d4033800 {
+				compatible = "mrvl,mmp-twsi";
+				reg = <0xd4033800 0x70>;
+				interrupt-parent = <&twsi_mux>;
+				interrupts = <3>;
+				clocks = <&soc_clocks MMP2_CLK_TWSI4>;
+				resets = <&soc_clocks MMP2_CLK_TWSI4>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				status = "disabled";
+			};
+
+			twsi6: i2c@d4034000 {
+				compatible = "mrvl,mmp-twsi";
+				reg = <0xd4034000 0x70>;
+				interrupt-parent = <&twsi_mux>;
+				interrupts = <4>;
+				clocks = <&soc_clocks MMP2_CLK_TWSI5>;
+				resets = <&soc_clocks MMP2_CLK_TWSI5>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				status = "disabled";
+			};
+
+			rtc: rtc@d4010000 {
+				compatible = "mrvl,mmp-rtc";
+				reg = <0xd4010000 0x1000>;
+				interrupts = <1 0>;
+				interrupt-names = "rtc 1Hz", "rtc alarm";
+				interrupt-parent = <&rtc_mux>;
+				clocks = <&soc_clocks MMP2_CLK_RTC>;
+				resets = <&soc_clocks MMP2_CLK_RTC>;
+				status = "disabled";
+			};
+
+			ssp1: spi@d4035000 {
+				compatible = "marvell,mmp2-ssp";
+				reg = <0xd4035000 0x1000>;
+				clocks = <&soc_clocks MMP2_CLK_SSP0>;
+				interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				status = "disabled";
+			};
+
+			ssp2: spi@d4036000 {
+				compatible = "marvell,mmp2-ssp";
+				reg = <0xd4036000 0x1000>;
+				clocks = <&soc_clocks MMP2_CLK_SSP1>;
+				interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				status = "disabled";
+			};
+
+			ssp3: spi@d4037000 {
+				compatible = "marvell,mmp2-ssp";
+				reg = <0xd4037000 0x1000>;
+				clocks = <&soc_clocks MMP2_CLK_SSP2>;
+				interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				status = "disabled";
+			};
+
+			ssp4: spi@d4039000 {
+				compatible = "marvell,mmp2-ssp";
+				reg = <0xd4039000 0x1000>;
+				clocks = <&soc_clocks MMP2_CLK_SSP3>;
+				interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				status = "disabled";
+			};
+		};
+
+		l2: l2-cache-controller@d0020000 {
+			compatible = "marvell,tauros3-cache", "arm,pl310-cache";
+			reg = <0xd0020000 0x1000>;
+			cache-unified;
+			cache-level = <2>;
+		};
+
+		soc_clocks: clocks@d4050000 {
+			compatible = "marvell,mmp2-clock";
+			reg = <0xd4050000 0x1000>,
+			      <0xd4282800 0x400>,
+			      <0xd4015000 0x1000>;
+			reg-names = "mpmu", "apmu", "apbc";
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+			#power-domain-cells = <1>;
+		};
+
+		snoop-control-unit@e0000000 {
+			compatible = "arm,arm11mp-scu";
+			reg = <0xe0000000 0x100>;
+		};
+
+		gic: interrupt-controller@e0001000 {
+			compatible = "arm,arm11mp-gic";
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			reg = <0xe0001000 0x1000>,
+			      <0xe0000100 0x100>;
+		};
+
+		local-timer@e0000600 {
+			compatible = "arm,arm11mp-twd-timer";
+			interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) |
+						  IRQ_TYPE_EDGE_RISING)>;
+			reg = <0xe0000600 0x20>;
+		};
+
+		watchdog@e0000620 {
+			compatible = "arm,arm11mp-twd-wdt";
+			reg = <0xe0000620 0x20>;
+			interrupts = <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) |
+						  IRQ_TYPE_EDGE_RISING)>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi
index d1eae47b83f6..936ad010c83a 100644
--- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi
+++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi
@@ -43,11 +43,13 @@
 			compatible = "motorola,mapphone-cpcap-charger";
 			interrupts-extended = <
 				&cpcap 13 0 &cpcap 12 0 &cpcap 29 0 &cpcap 28 0
-				&cpcap 22 0 &cpcap 20 0 &cpcap 19 0 &cpcap 54 0
+				&cpcap 22 0 &cpcap 21 0 &cpcap 20 0 &cpcap 19 0
+				&cpcap 54 0
 			>;
 			interrupt-names =
 				"chrg_det", "rvrs_chrg", "chrg_se1b", "se0conn",
-				"rvrs_mode", "chrgcurr1", "vbusvld", "battdetb";
+				"rvrs_mode", "chrgcurr2", "chrgcurr1", "vbusvld",
+				"battdetb";
 			mode-gpios = <&gpio3 29 GPIO_ACTIVE_LOW
 				      &gpio3 23 GPIO_ACTIVE_LOW>;
 			io-channels = <&cpcap_adc 0 &cpcap_adc 1
@@ -160,12 +162,12 @@
 		regulator-enable-ramp-delay = <1000>;
 	};
 
-	/* Used by DSS */
+	/* Used by DSS and is the "zerov_regulator" trigger for SoC off mode */
 	vcsi: VCSI {
 		regulator-min-microvolt = <1800000>;
 		regulator-max-microvolt = <1800000>;
 		regulator-enable-ramp-delay = <1000>;
-		regulator-boot-on;
+		regulator-always-on;
 	};
 
 	vdac: VDAC {
diff --git a/arch/arm/boot/dts/motorola-mapphone-common.dtsi b/arch/arm/boot/dts/motorola-mapphone-common.dtsi
new file mode 100644
index 000000000000..da6b107da84a
--- /dev/null
+++ b/arch/arm/boot/dts/motorola-mapphone-common.dtsi
@@ -0,0 +1,786 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/dts-v1/;
+
+#include <dt-bindings/input/input.h>
+#include "omap443x.dtsi"
+#include "motorola-cpcap-mapphone.dtsi"
+
+/ {
+	chosen {
+		stdout-path = &uart3;
+	};
+
+	aliases {
+		display0 = &lcd0;
+		display1 = &hdmi0;
+	};
+
+	/*
+	 * We seem to have only 1021 MB accessible, 1021 - 1022 is locked,
+	 * then 1023 - 1024 seems to contain mbm.
+	 */
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x3fd00000>;	/* 1021 MB */
+	};
+
+	/* Poweroff GPIO probably connected to CPCAP */
+	gpio-poweroff {
+		compatible = "gpio-poweroff";
+		pinctrl-0 = <&poweroff_gpio>;
+		pinctrl-names = "default";
+		gpios = <&gpio2 18 GPIO_ACTIVE_LOW>;	/* gpio50 */
+	};
+
+	hdmi0: connector {
+		compatible = "hdmi-connector";
+		pinctrl-0 = <&hdmi_hpd_gpio>;
+		pinctrl-names = "default";
+		label = "hdmi";
+		type = "d";
+
+		hpd-gpios = <&gpio2 31 GPIO_ACTIVE_HIGH>;	/* gpio63 */
+
+		port {
+			hdmi_connector_in: endpoint {
+				remote-endpoint = <&hdmi_out>;
+			};
+		};
+	};
+
+	/*
+	 * HDMI 5V regulator probably sourced from battery. Let's keep
+	 * keep this as always enabled for HDMI to work until we've
+	 * figured what the encoder chip is.
+	 */
+	hdmi_regulator: regulator-hdmi {
+		compatible = "regulator-fixed";
+		regulator-name = "hdmi";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gpio2 27 GPIO_ACTIVE_HIGH>;	/* gpio59 */
+		enable-active-high;
+		regulator-always-on;
+	};
+
+	/* FS USB Host PHY on port 1 for mdm6600 */
+	fsusb1_phy: usb-phy@1 {
+		compatible = "motorola,mapphone-mdm6600";
+		pinctrl-0 = <&usb_mdm6600_pins>;
+		pinctrl-names = "default";
+		enable-gpios = <&gpio3 31 GPIO_ACTIVE_LOW>;     /* gpio_95 */
+		power-gpios = <&gpio2 22 GPIO_ACTIVE_HIGH>;	/* gpio_54 */
+		reset-gpios = <&gpio2 17 GPIO_ACTIVE_HIGH>;	/* gpio_49 */
+		/* mode: gpio_148 gpio_149 */
+		motorola,mode-gpios = <&gpio5 20 GPIO_ACTIVE_HIGH>,
+				      <&gpio5 21 GPIO_ACTIVE_HIGH>;
+		/* cmd: gpio_103 gpio_104 gpio_142 */
+		motorola,cmd-gpios = <&gpio4 7 GPIO_ACTIVE_HIGH>,
+				     <&gpio4 8 GPIO_ACTIVE_HIGH>,
+				     <&gpio5 14 GPIO_ACTIVE_HIGH>;
+		/* status: gpio_52 gpio_53 gpio_55 */
+		motorola,status-gpios = <&gpio2 20 GPIO_ACTIVE_HIGH>,
+					<&gpio2 21 GPIO_ACTIVE_HIGH>,
+					<&gpio2 23 GPIO_ACTIVE_HIGH>;
+		#phy-cells = <0>;
+	};
+
+	/* HS USB host TLL nop-phy on port 2 for w3glte */
+	hsusb2_phy: usb-phy@2 {
+		compatible = "usb-nop-xceiv";
+		#phy-cells = <0>;
+	};
+
+	/* LCD regulator from sw5 source */
+	lcd_regulator: regulator-lcd {
+		compatible = "regulator-fixed";
+		regulator-name = "lcd";
+		regulator-min-microvolt = <5050000>;
+		regulator-max-microvolt = <5050000>;
+		gpio = <&gpio4 0 GPIO_ACTIVE_HIGH>;	/* gpio96 */
+		enable-active-high;
+		vin-supply = <&sw5>;
+	};
+
+	/* This is probably coming straight from the battery.. */
+	wl12xx_vmmc: regulator-wl12xx {
+		compatible = "regulator-fixed";
+		regulator-name = "vwl1271";
+		regulator-min-microvolt = <1650000>;
+		regulator-max-microvolt = <1650000>;
+		gpio = <&gpio3 30 GPIO_ACTIVE_HIGH>;	/* gpio94 */
+		startup-delay-us = <70000>;
+		enable-active-high;
+	};
+
+	gpio_keys {
+		compatible = "gpio-keys";
+
+		volume_down {
+			label = "Volume Down";
+			gpios = <&gpio5 26 GPIO_ACTIVE_LOW>; /* gpio154 */
+			linux,code = <KEY_VOLUMEDOWN>;
+			linux,can-disable;
+			/* Value above 7.95ms for no GPIO hardware debounce */
+			debounce-interval = <10>;
+		};
+
+		slider {
+			label = "Keypad Slide";
+			gpios = <&gpio4 26 GPIO_ACTIVE_HIGH>; /* gpio122 */
+			linux,input-type = <EV_SW>;
+			linux,code = <SW_KEYPAD_SLIDE>;
+			linux,can-disable;
+			/* Value above 7.95ms for no GPIO hardware debounce */
+			debounce-interval = <10>;
+		};
+	};
+
+	soundcard {
+		compatible = "audio-graph-card";
+		label = "Droid 4 Audio";
+
+		simple-graph-card,widgets =
+			"Speaker", "Earpiece",
+			"Speaker", "Loudspeaker",
+			"Headphone", "Headphone Jack",
+			"Microphone", "Internal Mic";
+
+		simple-graph-card,routing =
+			"Earpiece", "EP",
+			"Loudspeaker", "SPKR",
+			"Headphone Jack", "HSL",
+			"Headphone Jack", "HSR",
+			"MICR", "Internal Mic";
+
+		dais = <&mcbsp2_port>, <&mcbsp3_port>;
+	};
+
+	pwm8: dmtimer-pwm-8 {
+		pinctrl-names = "default";
+		pinctrl-0 = <&vibrator_direction_pin>;
+
+		compatible = "ti,omap-dmtimer-pwm";
+		#pwm-cells = <3>;
+		ti,timers = <&timer8>;
+		ti,clock-source = <0x01>;
+	};
+
+	pwm9: dmtimer-pwm-9 {
+		pinctrl-names = "default";
+		pinctrl-0 = <&vibrator_enable_pin>;
+
+		compatible = "ti,omap-dmtimer-pwm";
+		#pwm-cells = <3>;
+		ti,timers = <&timer9>;
+		ti,clock-source = <0x01>;
+	};
+
+	vibrator {
+		compatible = "pwm-vibrator";
+		pwms = <&pwm9 0 10000000 0>, <&pwm8 0 10000000 0>;
+		pwm-names = "enable", "direction";
+		direction-duty-cycle-ns = <10000000>;
+	};
+};
+
+&dss {
+	status = "okay";
+};
+
+&dsi1 {
+	status = "okay";
+	vdd-supply = <&vcsi>;
+
+	port {
+		dsi1_out_ep: endpoint {
+			remote-endpoint = <&lcd0_in>;
+			lanes = <0 1 2 3 4 5>;
+		};
+	};
+
+	lcd0: display {
+		compatible = "panel-dsi-cm";
+		label = "lcd0";
+		vddi-supply = <&lcd_regulator>;
+		reset-gpios = <&gpio4 5 GPIO_ACTIVE_HIGH>;	/* gpio101 */
+
+		width-mm = <50>;
+		height-mm = <89>;
+
+		panel-timing {
+			clock-frequency = <0>;		/* Calculated by dsi */
+
+			hback-porch = <2>;
+			hactive = <540>;
+			hfront-porch = <0>;
+			hsync-len = <2>;
+
+			vback-porch = <1>;
+			vactive = <960>;
+			vfront-porch = <0>;
+			vsync-len = <1>;
+
+			hsync-active = <0>;
+			vsync-active = <0>;
+			de-active = <1>;
+			pixelclk-active = <1>;
+		};
+
+		port {
+			lcd0_in: endpoint {
+				remote-endpoint = <&dsi1_out_ep>;
+			};
+		};
+	};
+};
+
+&hdmi {
+	status = "okay";
+	pinctrl-0 = <&dss_hdmi_pins>;
+	pinctrl-names = "default";
+	vdda-supply = <&vdac>;
+
+	port {
+		hdmi_out: endpoint {
+			remote-endpoint = <&hdmi_connector_in>;
+			lanes = <1 0 3 2 5 4 7 6>;
+		};
+	};
+};
+
+&i2c1 {
+	tmp105@48 {
+		compatible = "ti,tmp105";
+		reg = <0x48>;
+		pinctrl-0 = <&tmp105_irq>;
+		pinctrl-names = "default";
+		/* kpd_row0.gpio_178 */
+		interrupts-extended = <&gpio6 18 IRQ_TYPE_EDGE_FALLING
+				       &omap4_pmx_core 0x14e>;
+		interrupt-names = "irq", "wakeup";
+		wakeup-source;
+	};
+};
+
+&keypad {
+	keypad,num-rows = <8>;
+	keypad,num-columns = <8>;
+	linux,keymap = <
+
+	/* Row 1 */
+	MATRIX_KEY(0, 2, KEY_1)
+	MATRIX_KEY(0, 6, KEY_2)
+	MATRIX_KEY(2, 3, KEY_3)
+	MATRIX_KEY(0, 7, KEY_4)
+	MATRIX_KEY(0, 4, KEY_5)
+	MATRIX_KEY(5, 5, KEY_6)
+	MATRIX_KEY(0, 1, KEY_7)
+	MATRIX_KEY(0, 5, KEY_8)
+	MATRIX_KEY(0, 0, KEY_9)
+	MATRIX_KEY(1, 6, KEY_0)
+
+	/* Row 2 */
+	MATRIX_KEY(3, 4, KEY_APOSTROPHE)
+	MATRIX_KEY(7, 6, KEY_Q)
+	MATRIX_KEY(7, 7, KEY_W)
+	MATRIX_KEY(7, 2, KEY_E)
+	MATRIX_KEY(1, 0, KEY_R)
+	MATRIX_KEY(4, 4, KEY_T)
+	MATRIX_KEY(1, 2, KEY_Y)
+	MATRIX_KEY(6, 7, KEY_U)
+	MATRIX_KEY(2, 2, KEY_I)
+	MATRIX_KEY(5, 6, KEY_O)
+	MATRIX_KEY(3, 7, KEY_P)
+	MATRIX_KEY(6, 5, KEY_BACKSPACE)
+
+	/* Row 3 */
+	MATRIX_KEY(5, 4, KEY_TAB)
+	MATRIX_KEY(5, 7, KEY_A)
+	MATRIX_KEY(2, 7, KEY_S)
+	MATRIX_KEY(7, 0, KEY_D)
+	MATRIX_KEY(2, 6, KEY_F)
+	MATRIX_KEY(6, 2, KEY_G)
+	MATRIX_KEY(6, 6, KEY_H)
+	MATRIX_KEY(1, 4, KEY_J)
+	MATRIX_KEY(3, 1, KEY_K)
+	MATRIX_KEY(2, 1, KEY_L)
+	MATRIX_KEY(4, 6, KEY_ENTER)
+
+	/* Row 4 */
+	MATRIX_KEY(3, 6, KEY_LEFTSHIFT)		/* KEY_CAPSLOCK */
+	MATRIX_KEY(6, 1, KEY_Z)
+	MATRIX_KEY(7, 4, KEY_X)
+	MATRIX_KEY(5, 1, KEY_C)
+	MATRIX_KEY(1, 7, KEY_V)
+	MATRIX_KEY(2, 4, KEY_B)
+	MATRIX_KEY(4, 1, KEY_N)
+	MATRIX_KEY(1, 1, KEY_M)
+	MATRIX_KEY(3, 5, KEY_COMMA)
+	MATRIX_KEY(5, 2, KEY_DOT)
+	MATRIX_KEY(6, 3, KEY_UP)
+	MATRIX_KEY(7, 3, KEY_OK)
+
+	/* Row 5 */
+	MATRIX_KEY(2, 5, KEY_LEFTCTRL)		/* KEY_LEFTSHIFT */
+	MATRIX_KEY(4, 5, KEY_LEFTALT)		/* SYM */
+	MATRIX_KEY(6, 0, KEY_MINUS)
+	MATRIX_KEY(4, 7, KEY_EQUAL)
+	MATRIX_KEY(1, 5, KEY_SPACE)
+	MATRIX_KEY(3, 2, KEY_SLASH)
+	MATRIX_KEY(4, 3, KEY_LEFT)
+	MATRIX_KEY(5, 3, KEY_DOWN)
+	MATRIX_KEY(3, 3, KEY_RIGHT)
+
+	/* Side buttons, KEY_VOLUMEDOWN and KEY_PWER are on CPCAP? */
+	MATRIX_KEY(5, 0, KEY_VOLUMEUP)
+	>;
+};
+
+&mmc1 {
+	vmmc-supply = <&vwlan2>;
+	bus-width = <4>;
+	cd-gpios = <&gpio6 16 GPIO_ACTIVE_LOW>;	/* gpio176 */
+};
+
+&mmc2 {
+	vmmc-supply = <&vsdio>;
+	bus-width = <8>;
+	ti,non-removable;
+};
+
+&mmc3 {
+	vmmc-supply = <&wl12xx_vmmc>;
+	/* uart2_tx.sdmmc3_dat1 pad as wakeirq */
+	interrupts-extended = <&wakeupgen GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH
+			       &omap4_pmx_core 0xde>;
+	interrupt-names = "irq", "wakeup";
+	non-removable;
+	bus-width = <4>;
+	cap-power-off-card;
+	keep-power-in-suspend;
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	wlcore: wlcore@2 {
+		compatible = "ti,wl1285", "ti,wl1283";
+		reg = <2>;
+		/* gpio_100 with gpmc_wait2 pad as wakeirq */
+		interrupts-extended = <&gpio4 4 IRQ_TYPE_LEVEL_HIGH>,
+				      <&omap4_pmx_core 0x4e>;
+		interrupt-names = "irq", "wakeup";
+		ref-clock-frequency = <26000000>;
+		tcxo-clock-frequency = <26000000>;
+	};
+};
+
+&i2c1 {
+	led-controller@38 {
+		compatible = "ti,lm3532";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x38>;
+
+		enable-gpios = <&gpio6 12 GPIO_ACTIVE_HIGH>;
+
+		ramp-up-us = <1024>;
+		ramp-down-us = <8193>;
+
+		led@0 {
+			reg = <0>;
+			led-sources = <2>;
+			ti,led-mode = <0>;
+			label = ":backlight";
+			linux,default-trigger = "backlight";
+		};
+
+		led@1 {
+			reg = <1>;
+			led-sources = <1>;
+			ti,led-mode = <0>;
+			label = ":kbd_backlight";
+		};
+	};
+};
+
+&i2c2 {
+	touchscreen@4a {
+		compatible = "atmel,maxtouch";
+		reg = <0x4a>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&touchscreen_pins>;
+
+		reset-gpios = <&gpio6 13 GPIO_ACTIVE_HIGH>; /* gpio173 */
+
+		/* gpio_183 with sys_nirq2 pad as wakeup */
+		interrupts-extended = <&gpio6 23 IRQ_TYPE_EDGE_FALLING>,
+				      <&omap4_pmx_core 0x160>;
+		interrupt-names = "irq", "wakeup";
+		wakeup-source;
+	};
+
+	isl29030@44 {
+		compatible = "isil,isl29030";
+		reg = <0x44>;
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&als_proximity_pins>;
+
+		interrupt-parent = <&gpio6>;
+		interrupts = <17 IRQ_TYPE_LEVEL_LOW>; /* gpio177 */
+	};
+};
+
+&omap4_pmx_core {
+
+	/* hdmi_hpd.gpio_63 */
+	hdmi_hpd_gpio: pinmux_hdmi_hpd_pins {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x098, PIN_INPUT | MUX_MODE3)
+		>;
+	};
+
+	/* hdmi_cec.hdmi_cec, hdmi_scl.hdmi_scl, hdmi_sda.hdmi_sda */
+	dss_hdmi_pins: pinmux_dss_hdmi_pins {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x09a, PIN_INPUT | MUX_MODE0)
+		OMAP4_IOPAD(0x09c, PIN_INPUT | MUX_MODE0)
+		OMAP4_IOPAD(0x09e, PIN_INPUT | MUX_MODE0)
+		>;
+	};
+
+	/* gpmc_ncs0.gpio_50 */
+	poweroff_gpio: pinmux_poweroff_pins {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x074, PIN_OUTPUT_PULLUP | MUX_MODE3)
+		>;
+	};
+
+	/* kpd_row0.gpio_178 */
+	tmp105_irq: pinmux_tmp105_irq {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x18e, PIN_INPUT_PULLUP | MUX_MODE3)
+		>;
+	};
+
+	usb_gpio_mux_sel1: pinmux_usb_gpio_mux_sel1_pins {
+		/* gpio_60 */
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x088, PIN_OUTPUT | MUX_MODE3)
+		>;
+	};
+
+	touchscreen_pins: pinmux_touchscreen_pins {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x180, PIN_OUTPUT | MUX_MODE3)
+		OMAP4_IOPAD(0x1a0, PIN_INPUT_PULLUP | MUX_MODE3)
+		>;
+	};
+
+	als_proximity_pins: pinmux_als_proximity_pins {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x18c, PIN_INPUT_PULLUP | MUX_MODE3)
+		>;
+	};
+
+	usb_mdm6600_pins: pinmux_usb_mdm6600_pins {
+		pinctrl-single,pins = <
+		/* enable 0x4a1000d8 usbb1_ulpitll_dat7.gpio_95 ag16 */
+		OMAP4_IOPAD(0x0d8, PIN_INPUT | MUX_MODE3)
+
+		/* power 0x4a10007c gpmc_nwp.gpio_54 c25 */
+		OMAP4_IOPAD(0x07c, PIN_OUTPUT | MUX_MODE3)
+
+		/* reset 0x4a100072 gpmc_a25.gpio_49 d20 */
+		OMAP4_IOPAD(0x072, PIN_OUTPUT | MUX_MODE3)
+
+		/* mode0/bpwake 0x4a10014e sdmmc5_dat1.gpio_148 af4 */
+		OMAP4_IOPAD(0x14e, PIN_OUTPUT | MUX_MODE3)
+
+		/* mode1/apwake 0x4a100150 sdmmc5_dat2.gpio_149 ag3 */
+		OMAP4_IOPAD(0x150, PIN_OFF_OUTPUT_LOW | PIN_INPUT | MUX_MODE3)
+
+		/* status0 0x4a10007e gpmc_clk.gpio_55 b22 */
+		OMAP4_IOPAD(0x07e, PIN_INPUT | MUX_MODE3)
+
+		/* status1 0x4a10007a gpmc_ncs3.gpio_53 c22 */
+		OMAP4_IOPAD(0x07a, PIN_INPUT | MUX_MODE3)
+
+		/* status2 0x4a100078 gpmc_ncs2.gpio_52 d21 */
+		OMAP4_IOPAD(0x078, PIN_INPUT | MUX_MODE3)
+
+		/* cmd0 0x4a100094 gpmc_ncs6.gpio_103 c24 */
+		OMAP4_IOPAD(0x094, PIN_OUTPUT | MUX_MODE3)
+
+		/* cmd1 0x4a100096 gpmc_ncs7.gpio_104 d24 */
+		OMAP4_IOPAD(0x096, PIN_OUTPUT | MUX_MODE3)
+
+		/* cmd2 0x4a100142 uart3_rts_sd.gpio_142 f28 */
+		OMAP4_IOPAD(0x142, PIN_OUTPUT | MUX_MODE3)
+		>;
+	};
+
+	usb_ulpi_pins: pinmux_usb_ulpi_pins {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x196, MUX_MODE7)
+		OMAP4_IOPAD(0x198, MUX_MODE7)
+		OMAP4_IOPAD(0x1b2, PIN_INPUT_PULLUP | MUX_MODE0)
+		OMAP4_IOPAD(0x1b4, PIN_INPUT_PULLUP | MUX_MODE0)
+		OMAP4_IOPAD(0x1b6, PIN_INPUT_PULLUP | MUX_MODE0)
+		OMAP4_IOPAD(0x1b8, PIN_INPUT_PULLUP | MUX_MODE0)
+		OMAP4_IOPAD(0x1ba, PIN_INPUT_PULLUP | MUX_MODE0)
+		OMAP4_IOPAD(0x1bc, PIN_INPUT_PULLUP | MUX_MODE0)
+		OMAP4_IOPAD(0x1be, PIN_INPUT_PULLUP | MUX_MODE0)
+		OMAP4_IOPAD(0x1c0, PIN_INPUT_PULLUP | MUX_MODE0)
+		OMAP4_IOPAD(0x1c2, PIN_INPUT_PULLUP | MUX_MODE0)
+		OMAP4_IOPAD(0x1c4, PIN_INPUT_PULLUP | MUX_MODE0)
+		OMAP4_IOPAD(0x1c6, PIN_INPUT_PULLUP | MUX_MODE0)
+		OMAP4_IOPAD(0x1c8, PIN_INPUT_PULLUP | MUX_MODE0)
+		>;
+	};
+
+	/* usb0_otg_dp and usb0_otg_dm */
+	usb_utmi_pins: pinmux_usb_utmi_pins {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x196, PIN_INPUT | MUX_MODE0)
+		OMAP4_IOPAD(0x198, PIN_INPUT | MUX_MODE0)
+		OMAP4_IOPAD(0x1b2, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1b4, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1b6, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1b8, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1ba, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1bc, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1be, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1c0, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1c2, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1c4, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1c6, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1c8, PIN_INPUT_PULLUP | MUX_MODE7)
+		>;
+	};
+
+	/*
+	 * Note that the v3.0.8 stock userspace dynamically remuxes uart1
+	 * rts pin probably for PM purposes to PIN_INPUT_PULLUP | MUX_MODE7
+	 * when not used. If needed, we can add rts pin remux later based
+	 * on power measurements.
+	 */
+	uart1_pins: pinmux_uart1_pins {
+		pinctrl-single,pins = <
+		/* 0x4a10013c mcspi1_cs2.uart1_cts ag23 */
+		OMAP4_IOPAD(0x13c, PIN_INPUT_PULLUP | MUX_MODE1)
+
+		/* 0x4a10013e mcspi1_cs3.uart1_rts ah23 */
+		OMAP4_IOPAD(0x13e, MUX_MODE1)
+
+		/* 0x4a100140 uart3_cts_rctx.uart1_tx f27 */
+		OMAP4_IOPAD(0x140, PIN_OUTPUT | MUX_MODE1)
+
+		/* 0x4a1001ca dpm_emu14.uart1_rx aa3 */
+		OMAP4_IOPAD(0x1ca, PIN_INPUT_PULLUP | MUX_MODE2)
+		>;
+	};
+
+	/* uart3_tx_irtx and uart3_rx_irrx */
+	uart3_pins: pinmux_uart3_pins {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x196, MUX_MODE7)
+		OMAP4_IOPAD(0x198, MUX_MODE7)
+		OMAP4_IOPAD(0x1b2, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1b4, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1b6, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1b8, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1ba, MUX_MODE2)
+		OMAP4_IOPAD(0x1bc, PIN_INPUT | MUX_MODE2)
+		OMAP4_IOPAD(0x1be, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1c0, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1c2, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1c4, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1c6, PIN_INPUT_PULLUP | MUX_MODE7)
+		OMAP4_IOPAD(0x1c8, PIN_INPUT_PULLUP | MUX_MODE7)
+		>;
+	};
+
+	uart4_pins: pinmux_uart4_pins {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x15c, PIN_INPUT | MUX_MODE0)		/* uart4_rx */
+		OMAP4_IOPAD(0x15e, PIN_OUTPUT | MUX_MODE0)		/* uart4_tx */
+		OMAP4_IOPAD(0x110, PIN_INPUT_PULLUP | MUX_MODE5)	/* uart4_cts */
+		OMAP4_IOPAD(0x112, PIN_OUTPUT_PULLUP | MUX_MODE5)	/* uart4_rts */
+		>;
+	};
+
+	mcbsp2_pins: pinmux_mcbsp2_pins {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x0f6, PIN_INPUT | MUX_MODE0)	/* abe_mcbsp2_clkx */
+		OMAP4_IOPAD(0x0f8, PIN_INPUT | MUX_MODE0)	/* abe_mcbsp2_dr */
+		OMAP4_IOPAD(0x0fa, PIN_OUTPUT | MUX_MODE0)	/* abe_mcbsp2_dx */
+		OMAP4_IOPAD(0x0fc, PIN_INPUT | MUX_MODE0)	/* abe_mcbsp2_fsx */
+		>;
+	};
+
+	mcbsp3_pins: pinmux_mcbsp3_pins {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x106, PIN_INPUT | MUX_MODE1)	/* abe_mcbsp3_dr */
+		OMAP4_IOPAD(0x108, PIN_OUTPUT | MUX_MODE1)	/* abe_mcbsp3_dx */
+		OMAP4_IOPAD(0x10a, PIN_INPUT | MUX_MODE1)	/* abe_mcbsp3_clkx */
+		OMAP4_IOPAD(0x10c, PIN_INPUT | MUX_MODE1)	/* abe_mcbsp3_fsx */
+		>;
+	};
+
+	vibrator_direction_pin: pinmux_vibrator_direction_pin {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x1ce, PIN_OUTPUT | MUX_MODE1)	/* dmtimer8_pwm_evt (gpio_27) */
+		>;
+	};
+
+	vibrator_enable_pin: pinmux_vibrator_enable_pin {
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0X1d0, PIN_OUTPUT | MUX_MODE1)	/* dmtimer9_pwm_evt (gpio_28) */
+		>;
+	};
+};
+
+&omap4_pmx_wkup {
+	usb_gpio_mux_sel2: pinmux_usb_gpio_mux_sel2_pins {
+		/* gpio_wk0 */
+		pinctrl-single,pins = <
+		OMAP4_IOPAD(0x040, PIN_OUTPUT_PULLDOWN | MUX_MODE3)
+		>;
+	};
+};
+
+/* Configure pwm clock source for timers 8 & 9 */
+&timer8 {
+	assigned-clocks = <&abe_clkctrl OMAP4_TIMER8_CLKCTRL 24>;
+	assigned-clock-parents = <&sys_clkin_ck>;
+};
+
+&timer9 {
+	assigned-clocks = <&l4_per_clkctrl OMAP4_TIMER9_CLKCTRL 24>;
+	assigned-clock-parents = <&sys_clkin_ck>;
+};
+
+/*
+ * As uart1 is wired to mdm6600 with rts and cts, we can use the cts pin for
+ * uart1 wakeirq.
+ */
+&uart1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart1_pins>;
+	interrupts-extended = <&wakeupgen GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH
+			       &omap4_pmx_core 0xfc>;
+};
+
+&uart3 {
+	interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
+			       &omap4_pmx_core 0x17c>;
+};
+
+&uart4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart4_pins>;
+
+	bluetooth {
+		compatible = "ti,wl1285-st";
+		enable-gpios = <&gpio6 14 GPIO_ACTIVE_HIGH>; /* gpio 174 */
+		max-speed = <3686400>;
+	};
+};
+
+&usbhsohci {
+	phys = <&fsusb1_phy>;
+	phy-names = "usb";
+};
+
+&usbhsehci {
+	phys = <&hsusb2_phy>;
+};
+
+&usbhshost {
+	port1-mode = "ohci-phy-4pin-dpdm";
+	port2-mode = "ehci-tll";
+};
+
+/* Internal UTMI+ PHY used for OTG, CPCAP ULPI PHY for detection and charger */
+&usb_otg_hs {
+	interface-type = <1>;
+	mode = <3>;
+
+	/*
+	 * Max 300 mA steps based on similar PMIC MC13783UG.pdf "Table 10-4.
+	 * VBUS Regulator Main Characteristics". Binding uses 2 mA units.
+	 */
+	power = <150>;
+};
+
+&i2c4 {
+	ak8975: magnetometer@c {
+		compatible = "asahi-kasei,ak8975";
+		reg = <0x0c>;
+
+		vdd-supply = <&vhvio>;
+
+		interrupt-parent = <&gpio6>;
+		interrupts = <15 IRQ_TYPE_EDGE_RISING>; /* gpio175 */
+
+		rotation-matrix = "-1", "0", "0",
+				  "0", "1", "0",
+				  "0", "0", "-1";
+
+	};
+
+	lis3dh: accelerometer@18 {
+		compatible = "st,lis3dh-accel";
+		reg = <0x18>;
+
+		vdd-supply = <&vhvio>;
+
+		interrupt-parent = <&gpio2>;
+		interrupts = <2 IRQ_TYPE_EDGE_BOTH>; /* gpio34 */
+
+		rotation-matrix = "0", "-1", "0",
+				  "1", "0", "0",
+				  "0", "0", "1";
+	};
+};
+
+&mcbsp2 {
+	#sound-dai-cells = <0>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&mcbsp2_pins>;
+	status = "okay";
+
+	mcbsp2_port: port {
+		cpu_dai2: endpoint {
+			dai-format = "i2s";
+			remote-endpoint = <&cpcap_audio_codec0>;
+			frame-master = <&cpcap_audio_codec0>;
+			bitclock-master = <&cpcap_audio_codec0>;
+		};
+	};
+};
+
+&mcbsp3 {
+	#sound-dai-cells = <0>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&mcbsp3_pins>;
+	status = "okay";
+
+	mcbsp3_port: port {
+		cpu_dai3: endpoint {
+			dai-format = "dsp_a";
+			frame-master = <&cpcap_audio_codec1>;
+			bitclock-master = <&cpcap_audio_codec1>;
+			remote-endpoint = <&cpcap_audio_codec1>;
+		};
+	};
+};
+
+&cpcap_audio_codec0 {
+	remote-endpoint = <&cpu_dai2>;
+};
+
+&cpcap_audio_codec1 {
+	remote-endpoint = <&cpu_dai3>;
+};
diff --git a/arch/arm/boot/dts/mt6323.dtsi b/arch/arm/boot/dts/mt6323.dtsi
index ba397407c1dd..7fda40ab5fe8 100644
--- a/arch/arm/boot/dts/mt6323.dtsi
+++ b/arch/arm/boot/dts/mt6323.dtsi
@@ -238,5 +238,32 @@
 				regulator-enable-ramp-delay = <216>;
 			};
 		};
+
+		mt6323keys: mt6323keys {
+			compatible = "mediatek,mt6323-keys";
+			mediatek,long-press-mode = <1>;
+			power-off-time-sec = <0>;
+
+			power {
+				linux,keycodes = <116>;
+				wakeup-source;
+			};
+
+			home {
+				linux,keycodes = <114>;
+			};
+		};
+
+		codec: mt6397codec {
+			compatible = "mediatek,mt6397-codec";
+		};
+
+		power-controller {
+			compatible = "mediatek,mt6323-pwrc";
+		};
+
+		rtc {
+			compatible = "mediatek,mt6323-rtc";
+		};
 	};
 };
diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts
index 1aa99fc1487a..125ed933ca75 100644
--- a/arch/arm/boot/dts/omap3-beagle-xm.dts
+++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
@@ -8,7 +8,7 @@
 
 / {
 	model = "TI OMAP3 BeagleBoard xM";
-	compatible = "ti,omap3-beagle-xm", "ti,omap36xx", "ti,omap3";
+	compatible = "ti,omap3-beagle-xm", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	cpus {
 		cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
index e3df3c166902..4ed3f93f5841 100644
--- a/arch/arm/boot/dts/omap3-beagle.dts
+++ b/arch/arm/boot/dts/omap3-beagle.dts
@@ -8,7 +8,7 @@
 
 / {
 	model = "TI OMAP3 BeagleBoard";
-	compatible = "ti,omap3-beagle", "ti,omap3";
+	compatible = "ti,omap3-beagle", "ti,omap3430", "ti,omap3";
 
 	cpus {
 		cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-cm-t3530.dts b/arch/arm/boot/dts/omap3-cm-t3530.dts
index 76e52c78cbb4..32dbaeaed147 100644
--- a/arch/arm/boot/dts/omap3-cm-t3530.dts
+++ b/arch/arm/boot/dts/omap3-cm-t3530.dts
@@ -9,7 +9,7 @@
 
 / {
 	model = "CompuLab CM-T3530";
-	compatible = "compulab,omap3-cm-t3530", "ti,omap34xx", "ti,omap3";
+	compatible = "compulab,omap3-cm-t3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
 
 	/* Regulator to trigger the reset signal of the Wifi module */
 	mmc2_sdio_reset: regulator-mmc2-sdio-reset {
diff --git a/arch/arm/boot/dts/omap3-cm-t3730.dts b/arch/arm/boot/dts/omap3-cm-t3730.dts
index 6e944dfa0f3d..683819bf0915 100644
--- a/arch/arm/boot/dts/omap3-cm-t3730.dts
+++ b/arch/arm/boot/dts/omap3-cm-t3730.dts
@@ -9,7 +9,7 @@
 
 / {
 	model = "CompuLab CM-T3730";
-	compatible = "compulab,omap3-cm-t3730", "ti,omap36xx", "ti,omap3";
+	compatible = "compulab,omap3-cm-t3730", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	wl12xx_vmmc2: wl12xx_vmmc2 {
 		compatible = "regulator-fixed";
diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts b/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts
index a80fc60bc773..afed85078ad8 100644
--- a/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts
@@ -11,7 +11,7 @@
 #include "omap3-devkit8000-lcd-common.dtsi"
 / {
 	model = "TimLL OMAP3 Devkit8000 with 4.3'' LCD panel";
-	compatible = "timll,omap3-devkit8000", "ti,omap3";
+	compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3";
 
 	lcd0: display {
 		panel-timing {
diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts b/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts
index 0753776071f8..07c51a105c0d 100644
--- a/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts
@@ -11,7 +11,7 @@
 #include "omap3-devkit8000-lcd-common.dtsi"
 / {
 	model = "TimLL OMAP3 Devkit8000 with 7.0'' LCD panel";
-	compatible = "timll,omap3-devkit8000", "ti,omap3";
+	compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3";
 
 	lcd0: display {
 		panel-timing {
diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts
index faafc48d8f61..162d0726b008 100644
--- a/arch/arm/boot/dts/omap3-devkit8000.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000.dts
@@ -7,7 +7,7 @@
 #include "omap3-devkit8000-common.dtsi"
 / {
 	model = "TimLL OMAP3 Devkit8000";
-	compatible = "timll,omap3-devkit8000", "ti,omap3";
+	compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3";
 
 	aliases {
 		display1 = &dvi0;
diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
index b6ef1a7ac8a4..409a758c99f1 100644
--- a/arch/arm/boot/dts/omap3-gta04.dtsi
+++ b/arch/arm/boot/dts/omap3-gta04.dtsi
@@ -11,7 +11,7 @@
 
 / {
 	model = "OMAP3 GTA04";
-	compatible = "ti,omap3-gta04", "ti,omap36xx", "ti,omap3";
+	compatible = "ti,omap3-gta04", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	cpus {
 		cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-ha-lcd.dts b/arch/arm/boot/dts/omap3-ha-lcd.dts
index badb9b3c8897..c9ecbc45c8e2 100644
--- a/arch/arm/boot/dts/omap3-ha-lcd.dts
+++ b/arch/arm/boot/dts/omap3-ha-lcd.dts
@@ -8,7 +8,7 @@
 
 / {
 	model = "TI OMAP3 HEAD acoustics LCD-baseboard with TAO3530 SOM";
-	compatible = "headacoustics,omap3-ha-lcd", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3";
+	compatible = "headacoustics,omap3-ha-lcd", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
 };
 
 &omap3_pmx_core {
diff --git a/arch/arm/boot/dts/omap3-ha.dts b/arch/arm/boot/dts/omap3-ha.dts
index a5365252bfbe..35c4e15abeb7 100644
--- a/arch/arm/boot/dts/omap3-ha.dts
+++ b/arch/arm/boot/dts/omap3-ha.dts
@@ -8,7 +8,7 @@
 
 / {
 	model = "TI OMAP3 HEAD acoustics baseboard with TAO3530 SOM";
-	compatible = "headacoustics,omap3-ha", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3";
+	compatible = "headacoustics,omap3-ha", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
 };
 
 &omap3_pmx_core {
diff --git a/arch/arm/boot/dts/omap3-igep0020-rev-f.dts b/arch/arm/boot/dts/omap3-igep0020-rev-f.dts
index 03dcd05fb8a0..567232584f08 100644
--- a/arch/arm/boot/dts/omap3-igep0020-rev-f.dts
+++ b/arch/arm/boot/dts/omap3-igep0020-rev-f.dts
@@ -10,7 +10,7 @@
 
 / {
 	model = "IGEPv2 Rev. F (TI OMAP AM/DM37x)";
-	compatible = "isee,omap3-igep0020-rev-f", "ti,omap36xx", "ti,omap3";
+	compatible = "isee,omap3-igep0020-rev-f", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	/* Regulator to trigger the WL_EN signal of the Wifi module */
 	lbep5clwmc_wlen: regulator-lbep5clwmc-wlen {
@@ -49,3 +49,11 @@
 		interrupts = <17 IRQ_TYPE_EDGE_RISING>; /* gpio 177 */
 	};
 };
+
+&uart2 {
+	bluetooth {
+		compatible = "ti,wl1835-st";
+		enable-gpios = <&gpio5 9 GPIO_ACTIVE_HIGH>; /* gpio 137 */
+		max-speed = <300000>;
+	};
+};
diff --git a/arch/arm/boot/dts/omap3-igep0020.dts b/arch/arm/boot/dts/omap3-igep0020.dts
index 6d0519e3dfd0..e341535a7162 100644
--- a/arch/arm/boot/dts/omap3-igep0020.dts
+++ b/arch/arm/boot/dts/omap3-igep0020.dts
@@ -10,7 +10,7 @@
 
 / {
 	model = "IGEPv2 Rev. C (TI OMAP AM/DM37x)";
-	compatible = "isee,omap3-igep0020", "ti,omap36xx", "ti,omap3";
+	compatible = "isee,omap3-igep0020", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	vmmcsdio_fixed: fixedregulator-mmcsdio {
 		compatible = "regulator-fixed";
diff --git a/arch/arm/boot/dts/omap3-igep0030-rev-g.dts b/arch/arm/boot/dts/omap3-igep0030-rev-g.dts
index 060acd1e803a..df6ba1219830 100644
--- a/arch/arm/boot/dts/omap3-igep0030-rev-g.dts
+++ b/arch/arm/boot/dts/omap3-igep0030-rev-g.dts
@@ -10,7 +10,7 @@
 
 / {
 	model = "IGEP COM MODULE Rev. G (TI OMAP AM/DM37x)";
-	compatible = "isee,omap3-igep0030-rev-g", "ti,omap36xx", "ti,omap3";
+	compatible = "isee,omap3-igep0030-rev-g", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	/* Regulator to trigger the WL_EN signal of the Wifi module */
 	lbep5clwmc_wlen: regulator-lbep5clwmc-wlen {
@@ -71,3 +71,11 @@
 		interrupts = <8 IRQ_TYPE_EDGE_RISING>; /* gpio 136 */
 	};
 };
+
+&uart2 {
+	bluetooth {
+		compatible = "ti,wl1835-st";
+		enable-gpios = <&gpio5 9 GPIO_ACTIVE_HIGH>; /* gpio 137 */
+		max-speed = <300000>;
+	};
+};
diff --git a/arch/arm/boot/dts/omap3-igep0030.dts b/arch/arm/boot/dts/omap3-igep0030.dts
index 25170bd3c573..32f31035daa2 100644
--- a/arch/arm/boot/dts/omap3-igep0030.dts
+++ b/arch/arm/boot/dts/omap3-igep0030.dts
@@ -10,7 +10,7 @@
 
 / {
 	model = "IGEP COM MODULE Rev. E (TI OMAP AM/DM37x)";
-	compatible = "isee,omap3-igep0030", "ti,omap36xx", "ti,omap3";
+	compatible = "isee,omap3-igep0030", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	vmmcsdio_fixed: fixedregulator-mmcsdio {
 		compatible = "regulator-fixed";
diff --git a/arch/arm/boot/dts/omap3-ldp.dts b/arch/arm/boot/dts/omap3-ldp.dts
index 9a5fde2d9bce..ec9ba04ef43b 100644
--- a/arch/arm/boot/dts/omap3-ldp.dts
+++ b/arch/arm/boot/dts/omap3-ldp.dts
@@ -10,7 +10,7 @@
 
 / {
 	model = "TI OMAP3430 LDP (Zoom1 Labrador)";
-	compatible = "ti,omap3-ldp", "ti,omap3";
+	compatible = "ti,omap3-ldp", "ti,omap3430", "ti,omap3";
 
 	memory@80000000 {
 		device_type = "memory";
diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
index c22833d4e568..73d477898ec2 100644
--- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
+++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
@@ -7,7 +7,7 @@
 
 / {
 	model = "INCOstartec LILLY-A83X module (DM3730)";
-	compatible = "incostartec,omap3-lilly-a83x", "ti,omap36xx", "ti,omap3";
+	compatible = "incostartec,omap3-lilly-a83x", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	chosen {
 			bootargs = "console=ttyO0,115200n8 vt.global_cursor_default=0 consoleblank=0";
diff --git a/arch/arm/boot/dts/omap3-lilly-dbb056.dts b/arch/arm/boot/dts/omap3-lilly-dbb056.dts
index fec335400074..ecb4ef738e07 100644
--- a/arch/arm/boot/dts/omap3-lilly-dbb056.dts
+++ b/arch/arm/boot/dts/omap3-lilly-dbb056.dts
@@ -8,7 +8,7 @@
 
 / {
 	model = "INCOstartec LILLY-DBB056 (DM3730)";
-	compatible = "incostartec,omap3-lilly-dbb056", "incostartec,omap3-lilly-a83x", "ti,omap36xx", "ti,omap3";
+	compatible = "incostartec,omap3-lilly-dbb056", "incostartec,omap3-lilly-a83x", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 };
 
 &twl {
diff --git a/arch/arm/boot/dts/omap3-n9.dts b/arch/arm/boot/dts/omap3-n9.dts
index 74c0ff2350d3..2495a696cec6 100644
--- a/arch/arm/boot/dts/omap3-n9.dts
+++ b/arch/arm/boot/dts/omap3-n9.dts
@@ -12,7 +12,7 @@
 
 / {
 	model = "Nokia N9";
-	compatible = "nokia,omap3-n9", "ti,omap36xx", "ti,omap3";
+	compatible = "nokia,omap3-n9", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 };
 
 &i2c2 {
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index 84a5ade1e865..a638e059135b 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -155,6 +155,12 @@
 		pwms = <&pwm9 0 26316 0>; /* 38000 Hz */
 	};
 
+	rom_rng: rng {
+		compatible = "nokia,n900-rom-rng";
+		clocks = <&rng_ick>;
+		clock-names = "ick";
+	};
+
 	/* controlled (enabled/disabled) directly by bcm2048 and wl1251 */
 	vctcxo: vctcxo {
 		compatible = "fixed-clock";
@@ -1013,6 +1019,11 @@
 	};
 };
 
+/* RNG not directly accessible on n900, see omap3-rom-rng instead */
+&rng_target {
+	status = "disabled";
+};
+
 &usb_otg_hs {
 	interface-type = <0>;
 	usb-phy = <&usb2_phy>;
diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi
index 6681d4519e97..a075b63f3087 100644
--- a/arch/arm/boot/dts/omap3-n950-n9.dtsi
+++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi
@@ -11,13 +11,6 @@
 	cpus {
 		cpu@0 {
 			cpu0-supply = <&vcc>;
-			operating-points = <
-				/* kHz    uV */
-				300000  1012500
-				600000  1200000
-				800000  1325000
-				1000000	1375000
-			>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/omap3-n950.dts b/arch/arm/boot/dts/omap3-n950.dts
index 9886bf8b90ab..31d47a1fad84 100644
--- a/arch/arm/boot/dts/omap3-n950.dts
+++ b/arch/arm/boot/dts/omap3-n950.dts
@@ -12,7 +12,7 @@
 
 / {
 	model = "Nokia N950";
-	compatible = "nokia,omap3-n950", "ti,omap36xx", "ti,omap3";
+	compatible = "nokia,omap3-n950", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	keys {
 		compatible = "gpio-keys";
diff --git a/arch/arm/boot/dts/omap3-overo-storm-alto35.dts b/arch/arm/boot/dts/omap3-overo-storm-alto35.dts
index 18338576c41d..7f04dfad8203 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-alto35.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-alto35.dts
@@ -14,5 +14,5 @@
 
 / {
 	model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Alto35";
-	compatible = "gumstix,omap3-overo-alto35", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+	compatible = "gumstix,omap3-overo-alto35", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 };
diff --git a/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts b/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts
index f204c8af8281..bc5a04e03336 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts
@@ -14,7 +14,7 @@
 
 / {
 	model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Chestnut43";
-	compatible = "gumstix,omap3-overo-chestnut43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+	compatible = "gumstix,omap3-overo-chestnut43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 };
 
 &omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts b/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts
index c633f7cee68e..065c31cbf0e2 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts
@@ -14,7 +14,7 @@
 
 / {
 	model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Gallop43";
-	compatible = "gumstix,omap3-overo-gallop43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+	compatible = "gumstix,omap3-overo-gallop43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 };
 
 &omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-palo35.dts b/arch/arm/boot/dts/omap3-overo-storm-palo35.dts
index fb88ebc9858c..e38c1c51392c 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-palo35.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-palo35.dts
@@ -14,7 +14,7 @@
 
 / {
 	model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Palo35";
-	compatible = "gumstix,omap3-overo-palo35", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+	compatible = "gumstix,omap3-overo-palo35", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 };
 
 &omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-palo43.dts b/arch/arm/boot/dts/omap3-overo-storm-palo43.dts
index 76cca00d97b6..e6dc23159c4d 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-palo43.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-palo43.dts
@@ -14,7 +14,7 @@
 
 / {
 	model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Palo43";
-	compatible = "gumstix,omap3-overo-palo43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+	compatible = "gumstix,omap3-overo-palo43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 };
 
 &omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-summit.dts b/arch/arm/boot/dts/omap3-overo-storm-summit.dts
index cc081a9e4c1e..587c08ce282d 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-summit.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-summit.dts
@@ -14,7 +14,7 @@
 
 / {
 	model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Summit";
-	compatible = "gumstix,omap3-overo-summit", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+	compatible = "gumstix,omap3-overo-summit", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 };
 
 &omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-tobi.dts b/arch/arm/boot/dts/omap3-overo-storm-tobi.dts
index 1de41c0826e0..f57de6010994 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-tobi.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-tobi.dts
@@ -14,6 +14,6 @@
 
 / {
 	model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Tobi";
-	compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+	compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 };
 
diff --git a/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts b/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts
index 9ed13118ed8e..281af6c113be 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts
@@ -14,5 +14,5 @@
 
 / {
 	model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on TobiDuo";
-	compatible = "gumstix,omap3-overo-tobiduo", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+	compatible = "gumstix,omap3-overo-tobiduo", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 };
diff --git a/arch/arm/boot/dts/omap3-pandora-1ghz.dts b/arch/arm/boot/dts/omap3-pandora-1ghz.dts
index 81b957f33c9f..ea509956d7ac 100644
--- a/arch/arm/boot/dts/omap3-pandora-1ghz.dts
+++ b/arch/arm/boot/dts/omap3-pandora-1ghz.dts
@@ -16,7 +16,7 @@
 / {
 	model = "Pandora Handheld Console 1GHz";
 
-	compatible = "openpandora,omap3-pandora-1ghz", "ti,omap36xx", "ti,omap3";
+	compatible = "openpandora,omap3-pandora-1ghz", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 };
 
 &omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi
index ec5891718ae6..150d5be42d27 100644
--- a/arch/arm/boot/dts/omap3-pandora-common.dtsi
+++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi
@@ -226,6 +226,17 @@
 		gpio = <&gpio6 4 GPIO_ACTIVE_HIGH>;	/* GPIO_164 */
 	};
 
+	/* wl1251 wifi+bt module */
+	wlan_en: fixed-regulator-wg7210_en {
+		compatible = "regulator-fixed";
+		regulator-name = "vwlan";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		startup-delay-us = <50000>;
+		enable-active-high;
+		gpio = <&gpio1 23 GPIO_ACTIVE_HIGH>;
+	};
+
 	/* wg7210 (wifi+bt module) 32k clock buffer */
 	wg7210_32k: fixed-regulator-wg7210_32k {
 		compatible = "regulator-fixed";
@@ -522,9 +533,30 @@
 	/*wp-gpios = <&gpio4 31 GPIO_ACTIVE_HIGH>;*/	/* GPIO_127 */
 };
 
-/* mmc3 is probed using pdata-quirks to pass wl1251 card data */
 &mmc3 {
-	status = "disabled";
+	vmmc-supply = <&wlan_en>;
+
+	bus-width = <4>;
+	non-removable;
+	ti,non-removable;
+	cap-power-off-card;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc3_pins>;
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	wlan: wifi@1 {
+		compatible = "ti,wl1251";
+
+		reg = <1>;
+
+		interrupt-parent = <&gpio1>;
+		interrupts = <21 IRQ_TYPE_LEVEL_HIGH>;	/* GPIO_21 */
+
+		ti,wl1251-has-eeprom;
+	};
 };
 
 /* bluetooth*/
diff --git a/arch/arm/boot/dts/omap3-sbc-t3530.dts b/arch/arm/boot/dts/omap3-sbc-t3530.dts
index ae96002abb3b..24bf3fd86641 100644
--- a/arch/arm/boot/dts/omap3-sbc-t3530.dts
+++ b/arch/arm/boot/dts/omap3-sbc-t3530.dts
@@ -8,7 +8,7 @@
 
 / {
 	model = "CompuLab SBC-T3530 with CM-T3530";
-	compatible = "compulab,omap3-sbc-t3530", "compulab,omap3-cm-t3530", "ti,omap34xx", "ti,omap3";
+	compatible = "compulab,omap3-sbc-t3530", "compulab,omap3-cm-t3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
 
 	aliases {
 		display0 = &dvi0;
diff --git a/arch/arm/boot/dts/omap3-sbc-t3730.dts b/arch/arm/boot/dts/omap3-sbc-t3730.dts
index 7de6df16fc17..eb3893b9535e 100644
--- a/arch/arm/boot/dts/omap3-sbc-t3730.dts
+++ b/arch/arm/boot/dts/omap3-sbc-t3730.dts
@@ -8,7 +8,7 @@
 
 / {
 	model = "CompuLab SBC-T3730 with CM-T3730";
-	compatible = "compulab,omap3-sbc-t3730", "compulab,omap3-cm-t3730", "ti,omap36xx", "ti,omap3";
+	compatible = "compulab,omap3-sbc-t3730", "compulab,omap3-cm-t3730", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	aliases {
 		display0 = &dvi0;
diff --git a/arch/arm/boot/dts/omap3-sniper.dts b/arch/arm/boot/dts/omap3-sniper.dts
index 40a87330e8c3..b6879cdc5c13 100644
--- a/arch/arm/boot/dts/omap3-sniper.dts
+++ b/arch/arm/boot/dts/omap3-sniper.dts
@@ -9,7 +9,7 @@
 
 / {
 	model = "LG Optimus Black";
-	compatible = "lg,omap3-sniper", "ti,omap36xx", "ti,omap3";
+	compatible = "lg,omap3-sniper", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	cpus {
 		cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-tao3530.dtsi b/arch/arm/boot/dts/omap3-tao3530.dtsi
index a7a04d78deeb..f24e2326cfa7 100644
--- a/arch/arm/boot/dts/omap3-tao3530.dtsi
+++ b/arch/arm/boot/dts/omap3-tao3530.dtsi
@@ -222,7 +222,7 @@
 	pinctrl-0 = <&mmc1_pins>;
 	vmmc-supply = <&vmmc1>;
 	vqmmc-supply = <&vsim>;
-	cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_LOW>;
 	bus-width = <8>;
 };
 
diff --git a/arch/arm/boot/dts/omap3-thunder.dts b/arch/arm/boot/dts/omap3-thunder.dts
index 6276e7079b36..64221e3b3477 100644
--- a/arch/arm/boot/dts/omap3-thunder.dts
+++ b/arch/arm/boot/dts/omap3-thunder.dts
@@ -8,7 +8,7 @@
 
 / {
 	model = "TI OMAP3 Thunder baseboard with TAO3530 SOM";
-	compatible = "technexion,omap3-thunder", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3";
+	compatible = "technexion,omap3-thunder", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
 };
 
 &omap3_pmx_core {
diff --git a/arch/arm/boot/dts/omap3-zoom3.dts b/arch/arm/boot/dts/omap3-zoom3.dts
index db3a2fe84e99..d240e39f2151 100644
--- a/arch/arm/boot/dts/omap3-zoom3.dts
+++ b/arch/arm/boot/dts/omap3-zoom3.dts
@@ -9,7 +9,7 @@
 
 / {
 	model = "TI Zoom3";
-	compatible = "ti,omap3-zoom3", "ti,omap36xx", "ti,omap3";
+	compatible = "ti,omap3-zoom3", "ti,omap3630", "ti,omap36xx", "ti,omap3";
 
 	cpus {
 		cpu@0 {
diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
index 4043ecb38016..5698a3e241aa 100644
--- a/arch/arm/boot/dts/omap3.dtsi
+++ b/arch/arm/boot/dts/omap3.dtsi
@@ -8,6 +8,7 @@
  * kind, whether express or implied.
  */
 
+#include <dt-bindings/bus/ti-sysc.h>
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/pinctrl/omap.h>
@@ -502,6 +503,30 @@
 			status = "disabled";
 		};
 
+		/* Likely needs to be tagged disabled on HS devices */
+		rng_target: target-module@480a0000 {
+			compatible = "ti,sysc-omap2", "ti,sysc";
+			reg = <0x480a003c 0x4>,
+			      <0x480a0040 0x4>,
+			      <0x480a0044 0x4>;
+			reg-names = "rev", "sysc", "syss";
+			ti,sysc-mask = <(SYSC_OMAP2_AUTOIDLE)>;
+			ti,sysc-sidle = <SYSC_IDLE_FORCE>,
+					<SYSC_IDLE_NO>;
+			ti,syss-mask = <1>;
+			clocks = <&rng_ick>;
+			clock-names = "ick";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0 0x480a0000 0x2000>;
+
+			rng: rng@0 {
+				compatible = "ti,omap2-rng";
+				reg = <0x0 0x2000>;
+				interrupts = <52>;
+			};
+		};
+
 		mcbsp2: mcbsp@49022000 {
 			compatible = "ti,omap3-mcbsp";
 			reg = <0x49022000 0xff>,
diff --git a/arch/arm/boot/dts/omap3430-sdp.dts b/arch/arm/boot/dts/omap3430-sdp.dts
index 0abd61108a53..7bfde8aac7ae 100644
--- a/arch/arm/boot/dts/omap3430-sdp.dts
+++ b/arch/arm/boot/dts/omap3430-sdp.dts
@@ -8,7 +8,7 @@
 
 / {
 	model = "TI OMAP3430 SDP";
-	compatible = "ti,omap3430-sdp", "ti,omap3";
+	compatible = "ti,omap3430-sdp", "ti,omap3430", "ti,omap3";
 
 	memory@80000000 {
 		device_type = "memory";
diff --git a/arch/arm/boot/dts/omap34xx-omap36xx-clocks.dtsi b/arch/arm/boot/dts/omap34xx-omap36xx-clocks.dtsi
index 5e9d1afcd422..21079cdf2663 100644
--- a/arch/arm/boot/dts/omap34xx-omap36xx-clocks.dtsi
+++ b/arch/arm/boot/dts/omap34xx-omap36xx-clocks.dtsi
@@ -260,6 +260,6 @@
 			 <&gpt10_ick>, <&mcbsp5_ick>, <&mcbsp1_ick>,
 			 <&omapctrl_ick>, <&aes2_ick>, <&sha12_ick>, <&icr_ick>,
 			 <&des2_ick>, <&mspro_ick>, <&mailboxes_ick>,
-			 <&mspro_fck>;
+			 <&rng_ick>, <&mspro_fck>;
 	};
 };
diff --git a/arch/arm/boot/dts/omap34xx.dtsi b/arch/arm/boot/dts/omap34xx.dtsi
index 7b09cbee8bb8..c4dd9801840d 100644
--- a/arch/arm/boot/dts/omap34xx.dtsi
+++ b/arch/arm/boot/dts/omap34xx.dtsi
@@ -16,19 +16,67 @@
 / {
 	cpus {
 		cpu: cpu@0 {
-			/* OMAP343x/OMAP35xx variants OPP1-5 */
-			operating-points = <
-				/* kHz    uV */
-				125000   975000
-				250000  1075000
-				500000  1200000
-				550000  1270000
-				600000  1350000
-			>;
+			/* OMAP343x/OMAP35xx variants OPP1-6 */
+			operating-points-v2 = <&cpu0_opp_table>;
+
 			clock-latency = <300000>; /* From legacy driver */
 		};
 	};
 
+	/* see Documentation/devicetree/bindings/opp/opp.txt */
+	cpu0_opp_table: opp-table {
+		compatible = "operating-points-v2-ti-cpu";
+		syscon = <&scm_conf>;
+
+		opp1-125000000 {
+			opp-hz = /bits/ 64 <125000000>;
+			/*
+			 * we currently only select the max voltage from table
+			 * Table 3-3 of the omap3530 Data sheet (SPRS507F).
+			 * Format is: <target min max>
+			 */
+			opp-microvolt = <975000 975000 975000>;
+			/*
+			 * first value is silicon revision bit mask
+			 * second one 720MHz Device Identification bit mask
+			 */
+			opp-supported-hw = <0xffffffff 3>;
+		};
+
+		opp2-250000000 {
+			opp-hz = /bits/ 64 <250000000>;
+			opp-microvolt = <1075000 1075000 1075000>;
+			opp-supported-hw = <0xffffffff 3>;
+			opp-suspend;
+		};
+
+		opp3-500000000 {
+			opp-hz = /bits/ 64 <500000000>;
+			opp-microvolt = <1200000 1200000 1200000>;
+			opp-supported-hw = <0xffffffff 3>;
+		};
+
+		opp4-550000000 {
+			opp-hz = /bits/ 64 <550000000>;
+			opp-microvolt = <1275000 1275000 1275000>;
+			opp-supported-hw = <0xffffffff 3>;
+		};
+
+		opp5-600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <1350000 1350000 1350000>;
+			opp-supported-hw = <0xffffffff 3>;
+		};
+
+		opp6-720000000 {
+			opp-hz = /bits/ 64 <720000000>;
+			opp-microvolt = <1350000 1350000 1350000>;
+			/* only high-speed grade omap3530 devices */
+			opp-supported-hw = <0xffffffff 2>;
+			turbo-mode;
+		};
+	};
+
 	ocp@68000000 {
 		omap3_pmx_core2: pinmux@480025d8 {
 			compatible = "ti,omap3-padconf", "pinctrl-single";
diff --git a/arch/arm/boot/dts/omap36xx-clocks.dtsi b/arch/arm/boot/dts/omap36xx-clocks.dtsi
index e66fc57ec35d..4e9cc9003594 100644
--- a/arch/arm/boot/dts/omap36xx-clocks.dtsi
+++ b/arch/arm/boot/dts/omap36xx-clocks.dtsi
@@ -105,3 +105,7 @@
 			 <&mcbsp4_ick>, <&uart4_fck>;
 	};
 };
+
+&dpll4_m4_ck {
+	ti,max-div = <31>;
+};
diff --git a/arch/arm/boot/dts/omap36xx.dtsi b/arch/arm/boot/dts/omap36xx.dtsi
index 1e552f08f120..c618cb257d00 100644
--- a/arch/arm/boot/dts/omap36xx.dtsi
+++ b/arch/arm/boot/dts/omap36xx.dtsi
@@ -19,16 +19,65 @@
 	};
 
 	cpus {
-		/* OMAP3630/OMAP37xx 'standard device' variants OPP50 to OPP130 */
+		/* OMAP3630/OMAP37xx variants OPP50 to OPP130 and OPP1G */
 		cpu: cpu@0 {
-			operating-points = <
-				/* kHz    uV */
-				300000  1012500
-				600000  1200000
-				800000  1325000
-			>;
-			clock-latency = <300000>; /* From legacy driver */
+			operating-points-v2 = <&cpu0_opp_table>;
+
+			vbb-supply = <&abb_mpu_iva>;
+			clock-latency = <300000>; /* From omap-cpufreq driver */
+		};
+	};
+
+	/* see Documentation/devicetree/bindings/opp/opp.txt */
+	cpu0_opp_table: opp-table {
+		compatible = "operating-points-v2-ti-cpu";
+		syscon = <&scm_conf>;
+
+		opp50-300000000 {
+			opp-hz = /bits/ 64 <300000000>;
+			/*
+			 * we currently only select the max voltage from table
+			 * Table 4-19 of the DM3730 Data sheet (SPRS685B)
+			 * Format is:	cpu0-supply:	<target min max>
+			 *		vbb-supply:	<target min max>
+			 */
+			opp-microvolt = <1012500 1012500 1012500>,
+					 <1012500 1012500 1012500>;
+			/*
+			 * first value is silicon revision bit mask
+			 * second one is "speed binned" bit mask
+			 */
+			opp-supported-hw = <0xffffffff 3>;
+			opp-suspend;
+		};
+
+		opp100-600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <1200000 1200000 1200000>,
+					 <1200000 1200000 1200000>;
+			opp-supported-hw = <0xffffffff 3>;
+		};
+
+		opp130-800000000 {
+			opp-hz = /bits/ 64 <800000000>;
+			opp-microvolt = <1325000 1325000 1325000>,
+					 <1325000 1325000 1325000>;
+			opp-supported-hw = <0xffffffff 3>;
 		};
+
+		opp1g-1000000000 {
+			opp-hz = /bits/ 64 <1000000000>;
+			opp-microvolt = <1375000 1375000 1375000>,
+					 <1375000 1375000 1375000>;
+			/* only on am/dm37x with speed-binned bit set */
+			opp-supported-hw = <0xffffffff 2>;
+			turbo-mode;
+		};
+	};
+
+	opp_supply_mpu_iva: opp_supply {
+		compatible = "ti,omap-opp-supply";
+		ti,absolute-max-voltage-uv = <1375000>;
 	};
 
 	ocp@68000000 {
diff --git a/arch/arm/boot/dts/omap3xxx-clocks.dtsi b/arch/arm/boot/dts/omap3xxx-clocks.dtsi
index 685c82a9d03e..0656c32439d2 100644
--- a/arch/arm/boot/dts/omap3xxx-clocks.dtsi
+++ b/arch/arm/boot/dts/omap3xxx-clocks.dtsi
@@ -416,7 +416,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,divider-clock";
 		clocks = <&dpll4_ck>;
-		ti,max-div = <32>;
+		ti,max-div = <16>;
 		reg = <0x0e40>;
 		ti,index-starts-at-one;
 	};
diff --git a/arch/arm/boot/dts/omap4-droid-bionic-xt875.dts b/arch/arm/boot/dts/omap4-droid-bionic-xt875.dts
new file mode 100644
index 000000000000..ba5c35b7027d
--- /dev/null
+++ b/arch/arm/boot/dts/omap4-droid-bionic-xt875.dts
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/dts-v1/;
+
+#include "motorola-mapphone-common.dtsi"
+
+/ {
+	model = "Motorola Droid Bionic XT875";
+	compatible = "motorola,droid-bionic", "ti,omap4430", "ti,omap4";
+};
diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts
index a40fe8d49da6..c0d2fd92aea3 100644
--- a/arch/arm/boot/dts/omap4-droid4-xt894.dts
+++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts
@@ -1,784 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /dts-v1/;
 
-#include <dt-bindings/input/input.h>
-#include "omap443x.dtsi"
-#include "motorola-cpcap-mapphone.dtsi"
+#include "motorola-mapphone-common.dtsi"
 
 / {
 	model = "Motorola Droid 4 XT894";
 	compatible = "motorola,droid4", "ti,omap4430", "ti,omap4";
-
-	chosen {
-		stdout-path = &uart3;
-	};
-
-	aliases {
-		display0 = &lcd0;
-		display1 = &hdmi0;
-	};
-
-	/*
-	 * We seem to have only 1021 MB accessible, 1021 - 1022 is locked,
-	 * then 1023 - 1024 seems to contain mbm.
-	 */
-	memory {
-		device_type = "memory";
-		reg = <0x80000000 0x3fd00000>;	/* 1021 MB */
-	};
-
-	/* Poweroff GPIO probably connected to CPCAP */
-	gpio-poweroff {
-		compatible = "gpio-poweroff";
-		pinctrl-0 = <&poweroff_gpio>;
-		pinctrl-names = "default";
-		gpios = <&gpio2 18 GPIO_ACTIVE_LOW>;	/* gpio50 */
-	};
-
-	hdmi0: connector {
-		compatible = "hdmi-connector";
-		pinctrl-0 = <&hdmi_hpd_gpio>;
-		pinctrl-names = "default";
-		label = "hdmi";
-		type = "d";
-
-		hpd-gpios = <&gpio2 31 GPIO_ACTIVE_HIGH>;	/* gpio63 */
-
-		port {
-			hdmi_connector_in: endpoint {
-				remote-endpoint = <&hdmi_out>;
-			};
-		};
-	};
-
-	/*
-	 * HDMI 5V regulator probably sourced from battery. Let's keep
-	 * keep this as always enabled for HDMI to work until we've
-	 * figured what the encoder chip is.
-	 */
-	hdmi_regulator: regulator-hdmi {
-		compatible = "regulator-fixed";
-		regulator-name = "hdmi";
-		regulator-min-microvolt = <5000000>;
-		regulator-max-microvolt = <5000000>;
-		gpio = <&gpio2 27 GPIO_ACTIVE_HIGH>;	/* gpio59 */
-		enable-active-high;
-		regulator-always-on;
-	};
-
-	/* FS USB Host PHY on port 1 for mdm6600 */
-	fsusb1_phy: usb-phy@1 {
-		compatible = "motorola,mapphone-mdm6600";
-		pinctrl-0 = <&usb_mdm6600_pins>;
-		pinctrl-names = "default";
-		enable-gpios = <&gpio3 31 GPIO_ACTIVE_LOW>;     /* gpio_95 */
-		power-gpios = <&gpio2 22 GPIO_ACTIVE_HIGH>;	/* gpio_54 */
-		reset-gpios = <&gpio2 17 GPIO_ACTIVE_HIGH>;	/* gpio_49 */
-		/* mode: gpio_148 gpio_149 */
-		motorola,mode-gpios = <&gpio5 20 GPIO_ACTIVE_HIGH>,
-				      <&gpio5 21 GPIO_ACTIVE_HIGH>;
-		/* cmd: gpio_103 gpio_104 gpio_142 */
-		motorola,cmd-gpios = <&gpio4 7 GPIO_ACTIVE_HIGH>,
-				     <&gpio4 8 GPIO_ACTIVE_HIGH>,
-				     <&gpio5 14 GPIO_ACTIVE_HIGH>;
-		/* status: gpio_52 gpio_53 gpio_55 */
-		motorola,status-gpios = <&gpio2 20 GPIO_ACTIVE_HIGH>,
-					<&gpio2 21 GPIO_ACTIVE_HIGH>,
-					<&gpio2 23 GPIO_ACTIVE_HIGH>;
-		#phy-cells = <0>;
-	};
-
-	/* HS USB host TLL nop-phy on port 2 for w3glte */
-	hsusb2_phy: usb-phy@2 {
-		compatible = "usb-nop-xceiv";
-		#phy-cells = <0>;
-	};
-
-	/* LCD regulator from sw5 source */
-	lcd_regulator: regulator-lcd {
-		compatible = "regulator-fixed";
-		regulator-name = "lcd";
-		regulator-min-microvolt = <5050000>;
-		regulator-max-microvolt = <5050000>;
-		gpio = <&gpio4 0 GPIO_ACTIVE_HIGH>;	/* gpio96 */
-		enable-active-high;
-		vin-supply = <&sw5>;
-	};
-
-	/* This is probably coming straight from the battery.. */
-	wl12xx_vmmc: regulator-wl12xx {
-		compatible = "regulator-fixed";
-		regulator-name = "vwl1271";
-		regulator-min-microvolt = <1650000>;
-		regulator-max-microvolt = <1650000>;
-		gpio = <&gpio3 30 GPIO_ACTIVE_HIGH>;	/* gpio94 */
-		startup-delay-us = <70000>;
-		enable-active-high;
-	};
-
-	gpio_keys {
-		compatible = "gpio-keys";
-
-		volume_down {
-			label = "Volume Down";
-			gpios = <&gpio5 26 GPIO_ACTIVE_LOW>; /* gpio154 */
-			linux,code = <KEY_VOLUMEDOWN>;
-			linux,can-disable;
-			/* Value above 7.95ms for no GPIO hardware debounce */
-			debounce-interval = <10>;
-		};
-
-		slider {
-			label = "Keypad Slide";
-			gpios = <&gpio4 26 GPIO_ACTIVE_HIGH>; /* gpio122 */
-			linux,input-type = <EV_SW>;
-			linux,code = <SW_KEYPAD_SLIDE>;
-			linux,can-disable;
-			/* Value above 7.95ms for no GPIO hardware debounce */
-			debounce-interval = <10>;
-		};
-	};
-
-	soundcard {
-		compatible = "audio-graph-card";
-		label = "Droid 4 Audio";
-
-		simple-graph-card,widgets =
-			"Speaker", "Earpiece",
-			"Speaker", "Loudspeaker",
-			"Headphone", "Headphone Jack",
-			"Microphone", "Internal Mic";
-
-		simple-graph-card,routing =
-			"Earpiece", "EP",
-			"Loudspeaker", "SPKR",
-			"Headphone Jack", "HSL",
-			"Headphone Jack", "HSR",
-			"MICR", "Internal Mic";
-
-		dais = <&mcbsp2_port>, <&mcbsp3_port>;
-	};
-
-	pwm8: dmtimer-pwm-8 {
-		pinctrl-names = "default";
-		pinctrl-0 = <&vibrator_direction_pin>;
-
-		compatible = "ti,omap-dmtimer-pwm";
-		#pwm-cells = <3>;
-		ti,timers = <&timer8>;
-		ti,clock-source = <0x01>;
-	};
-
-	pwm9: dmtimer-pwm-9 {
-		pinctrl-names = "default";
-		pinctrl-0 = <&vibrator_enable_pin>;
-
-		compatible = "ti,omap-dmtimer-pwm";
-		#pwm-cells = <3>;
-		ti,timers = <&timer9>;
-		ti,clock-source = <0x01>;
-	};
-
-	vibrator {
-		compatible = "pwm-vibrator";
-		pwms = <&pwm9 0 10000000 0>, <&pwm8 0 10000000 0>;
-		pwm-names = "enable", "direction";
-		direction-duty-cycle-ns = <10000000>;
-	};
-};
-
-&dss {
-	status = "okay";
-};
-
-&dsi1 {
-	status = "okay";
-	vdd-supply = <&vcsi>;
-
-	port {
-		dsi1_out_ep: endpoint {
-			remote-endpoint = <&lcd0_in>;
-			lanes = <0 1 2 3 4 5>;
-		};
-	};
-
-	lcd0: display {
-		compatible = "panel-dsi-cm";
-		label = "lcd0";
-		vddi-supply = <&lcd_regulator>;
-		reset-gpios = <&gpio4 5 GPIO_ACTIVE_HIGH>;	/* gpio101 */
-
-		width-mm = <50>;
-		height-mm = <89>;
-
-		panel-timing {
-			clock-frequency = <0>;		/* Calculated by dsi */
-
-			hback-porch = <2>;
-			hactive = <540>;
-			hfront-porch = <0>;
-			hsync-len = <2>;
-
-			vback-porch = <1>;
-			vactive = <960>;
-			vfront-porch = <0>;
-			vsync-len = <1>;
-
-			hsync-active = <0>;
-			vsync-active = <0>;
-			de-active = <1>;
-			pixelclk-active = <1>;
-		};
-
-		port {
-			lcd0_in: endpoint {
-				remote-endpoint = <&dsi1_out_ep>;
-			};
-		};
-	};
-};
-
-&hdmi {
-	status = "okay";
-	pinctrl-0 = <&dss_hdmi_pins>;
-	pinctrl-names = "default";
-	vdda-supply = <&vdac>;
-
-	port {
-		hdmi_out: endpoint {
-			remote-endpoint = <&hdmi_connector_in>;
-			lanes = <1 0 3 2 5 4 7 6>;
-		};
-	};
-};
-
-&i2c1 {
-	tmp105@48 {
-		compatible = "ti,tmp105";
-		reg = <0x48>;
-		pinctrl-0 = <&tmp105_irq>;
-		pinctrl-names = "default";
-		/* kpd_row0.gpio_178 */
-		interrupts-extended = <&gpio6 18 IRQ_TYPE_EDGE_FALLING
-				       &omap4_pmx_core 0x14e>;
-		interrupt-names = "irq", "wakeup";
-		wakeup-source;
-	};
-};
-
-&keypad {
-	keypad,num-rows = <8>;
-	keypad,num-columns = <8>;
-	linux,keymap = <
-
-	/* Row 1 */
-	MATRIX_KEY(0, 2, KEY_1)
-	MATRIX_KEY(0, 6, KEY_2)
-	MATRIX_KEY(2, 3, KEY_3)
-	MATRIX_KEY(0, 7, KEY_4)
-	MATRIX_KEY(0, 4, KEY_5)
-	MATRIX_KEY(5, 5, KEY_6)
-	MATRIX_KEY(0, 1, KEY_7)
-	MATRIX_KEY(0, 5, KEY_8)
-	MATRIX_KEY(0, 0, KEY_9)
-	MATRIX_KEY(1, 6, KEY_0)
-
-	/* Row 2 */
-	MATRIX_KEY(3, 4, KEY_APOSTROPHE)
-	MATRIX_KEY(7, 6, KEY_Q)
-	MATRIX_KEY(7, 7, KEY_W)
-	MATRIX_KEY(7, 2, KEY_E)
-	MATRIX_KEY(1, 0, KEY_R)
-	MATRIX_KEY(4, 4, KEY_T)
-	MATRIX_KEY(1, 2, KEY_Y)
-	MATRIX_KEY(6, 7, KEY_U)
-	MATRIX_KEY(2, 2, KEY_I)
-	MATRIX_KEY(5, 6, KEY_O)
-	MATRIX_KEY(3, 7, KEY_P)
-	MATRIX_KEY(6, 5, KEY_BACKSPACE)
-
-	/* Row 3 */
-	MATRIX_KEY(5, 4, KEY_TAB)
-	MATRIX_KEY(5, 7, KEY_A)
-	MATRIX_KEY(2, 7, KEY_S)
-	MATRIX_KEY(7, 0, KEY_D)
-	MATRIX_KEY(2, 6, KEY_F)
-	MATRIX_KEY(6, 2, KEY_G)
-	MATRIX_KEY(6, 6, KEY_H)
-	MATRIX_KEY(1, 4, KEY_J)
-	MATRIX_KEY(3, 1, KEY_K)
-	MATRIX_KEY(2, 1, KEY_L)
-	MATRIX_KEY(4, 6, KEY_ENTER)
-
-	/* Row 4 */
-	MATRIX_KEY(3, 6, KEY_LEFTSHIFT)		/* KEY_CAPSLOCK */
-	MATRIX_KEY(6, 1, KEY_Z)
-	MATRIX_KEY(7, 4, KEY_X)
-	MATRIX_KEY(5, 1, KEY_C)
-	MATRIX_KEY(1, 7, KEY_V)
-	MATRIX_KEY(2, 4, KEY_B)
-	MATRIX_KEY(4, 1, KEY_N)
-	MATRIX_KEY(1, 1, KEY_M)
-	MATRIX_KEY(3, 5, KEY_COMMA)
-	MATRIX_KEY(5, 2, KEY_DOT)
-	MATRIX_KEY(6, 3, KEY_UP)
-	MATRIX_KEY(7, 3, KEY_OK)
-
-	/* Row 5 */
-	MATRIX_KEY(2, 5, KEY_LEFTCTRL)		/* KEY_LEFTSHIFT */
-	MATRIX_KEY(4, 5, KEY_LEFTALT)		/* SYM */
-	MATRIX_KEY(6, 0, KEY_MINUS)
-	MATRIX_KEY(4, 7, KEY_EQUAL)
-	MATRIX_KEY(1, 5, KEY_SPACE)
-	MATRIX_KEY(3, 2, KEY_SLASH)
-	MATRIX_KEY(4, 3, KEY_LEFT)
-	MATRIX_KEY(5, 3, KEY_DOWN)
-	MATRIX_KEY(3, 3, KEY_RIGHT)
-
-	/* Side buttons, KEY_VOLUMEDOWN and KEY_PWER are on CPCAP? */
-	MATRIX_KEY(5, 0, KEY_VOLUMEUP)
-	>;
-};
-
-&mmc1 {
-	vmmc-supply = <&vwlan2>;
-	bus-width = <4>;
-	cd-gpios = <&gpio6 16 GPIO_ACTIVE_LOW>;	/* gpio176 */
-};
-
-&mmc2 {
-	vmmc-supply = <&vsdio>;
-	bus-width = <8>;
-	ti,non-removable;
-};
-
-&mmc3 {
-	vmmc-supply = <&wl12xx_vmmc>;
-	/* uart2_tx.sdmmc3_dat1 pad as wakeirq */
-	interrupts-extended = <&wakeupgen GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH
-			       &omap4_pmx_core 0xde>;
-	interrupt-names = "irq", "wakeup";
-	non-removable;
-	bus-width = <4>;
-	cap-power-off-card;
-	keep-power-in-suspend;
-
-	#address-cells = <1>;
-	#size-cells = <0>;
-	wlcore: wlcore@2 {
-		compatible = "ti,wl1285", "ti,wl1283";
-		reg = <2>;
-		/* gpio_100 with gpmc_wait2 pad as wakeirq */
-		interrupts-extended = <&gpio4 4 IRQ_TYPE_LEVEL_HIGH>,
-				      <&omap4_pmx_core 0x4e>;
-		interrupt-names = "irq", "wakeup";
-		ref-clock-frequency = <26000000>;
-		tcxo-clock-frequency = <26000000>;
-	};
-};
-
-&i2c1 {
-	led-controller@38 {
-		compatible = "ti,lm3532";
-		#address-cells = <1>;
-		#size-cells = <0>;
-		reg = <0x38>;
-
-		enable-gpios = <&gpio6 12 GPIO_ACTIVE_HIGH>;
-
-		ramp-up-us = <1024>;
-		ramp-down-us = <8193>;
-
-		led@0 {
-			reg = <0>;
-			led-sources = <2>;
-			ti,led-mode = <0>;
-			label = ":backlight";
-			linux,default-trigger = "backlight";
-		};
-
-		led@1 {
-			reg = <1>;
-			led-sources = <1>;
-			ti,led-mode = <0>;
-			label = ":kbd_backlight";
-		};
-	};
-};
-
-&i2c2 {
-	touchscreen@4a {
-		compatible = "atmel,maxtouch";
-		reg = <0x4a>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&touchscreen_pins>;
-
-		reset-gpios = <&gpio6 13 GPIO_ACTIVE_HIGH>; /* gpio173 */
-
-		/* gpio_183 with sys_nirq2 pad as wakeup */
-		interrupts-extended = <&gpio6 23 IRQ_TYPE_EDGE_FALLING>,
-				      <&omap4_pmx_core 0x160>;
-		interrupt-names = "irq", "wakeup";
-		wakeup-source;
-	};
-
-	isl29030@44 {
-		compatible = "isil,isl29030";
-		reg = <0x44>;
-
-		pinctrl-names = "default";
-		pinctrl-0 = <&als_proximity_pins>;
-
-		interrupt-parent = <&gpio6>;
-		interrupts = <17 IRQ_TYPE_LEVEL_LOW>; /* gpio177 */
-	};
-};
-
-&omap4_pmx_core {
-
-	/* hdmi_hpd.gpio_63 */
-	hdmi_hpd_gpio: pinmux_hdmi_hpd_pins {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x098, PIN_INPUT | MUX_MODE3)
-		>;
-	};
-
-	/* hdmi_cec.hdmi_cec, hdmi_scl.hdmi_scl, hdmi_sda.hdmi_sda */
-	dss_hdmi_pins: pinmux_dss_hdmi_pins {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x09a, PIN_INPUT | MUX_MODE0)
-		OMAP4_IOPAD(0x09c, PIN_INPUT | MUX_MODE0)
-		OMAP4_IOPAD(0x09e, PIN_INPUT | MUX_MODE0)
-		>;
-	};
-
-	/* gpmc_ncs0.gpio_50 */
-	poweroff_gpio: pinmux_poweroff_pins {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x074, PIN_OUTPUT_PULLUP | MUX_MODE3)
-		>;
-	};
-
-	/* kpd_row0.gpio_178 */
-	tmp105_irq: pinmux_tmp105_irq {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x18e, PIN_INPUT_PULLUP | MUX_MODE3)
-		>;
-	};
-
-	usb_gpio_mux_sel1: pinmux_usb_gpio_mux_sel1_pins {
-		/* gpio_60 */
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x088, PIN_OUTPUT | MUX_MODE3)
-		>;
-	};
-
-	touchscreen_pins: pinmux_touchscreen_pins {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x180, PIN_OUTPUT | MUX_MODE3)
-		OMAP4_IOPAD(0x1a0, PIN_INPUT_PULLUP | MUX_MODE3)
-		>;
-	};
-
-	als_proximity_pins: pinmux_als_proximity_pins {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x18c, PIN_INPUT_PULLUP | MUX_MODE3)
-		>;
-	};
-
-	usb_mdm6600_pins: pinmux_usb_mdm6600_pins {
-		pinctrl-single,pins = <
-		/* enable 0x4a1000d8 usbb1_ulpitll_dat7.gpio_95 ag16 */
-		OMAP4_IOPAD(0x0d8, PIN_INPUT | MUX_MODE3)
-
-		/* power 0x4a10007c gpmc_nwp.gpio_54 c25 */
-		OMAP4_IOPAD(0x07c, PIN_OUTPUT | MUX_MODE3)
-
-		/* reset 0x4a100072 gpmc_a25.gpio_49 d20 */
-		OMAP4_IOPAD(0x072, PIN_OUTPUT | MUX_MODE3)
-
-		/* mode0/bpwake 0x4a10014e sdmmc5_dat1.gpio_148 af4 */
-		OMAP4_IOPAD(0x14e, PIN_OUTPUT | MUX_MODE3)
-
-		/* mode1/apwake 0x4a100150 sdmmc5_dat2.gpio_149 ag3 */
-		OMAP4_IOPAD(0x150, PIN_OFF_OUTPUT_LOW | PIN_INPUT | MUX_MODE3)
-
-		/* status0 0x4a10007e gpmc_clk.gpio_55 b22 */
-		OMAP4_IOPAD(0x07e, PIN_INPUT | MUX_MODE3)
-
-		/* status1 0x4a10007a gpmc_ncs3.gpio_53 c22 */
-		OMAP4_IOPAD(0x07a, PIN_INPUT | MUX_MODE3)
-
-		/* status2 0x4a100078 gpmc_ncs2.gpio_52 d21 */
-		OMAP4_IOPAD(0x078, PIN_INPUT | MUX_MODE3)
-
-		/* cmd0 0x4a100094 gpmc_ncs6.gpio_103 c24 */
-		OMAP4_IOPAD(0x094, PIN_OUTPUT | MUX_MODE3)
-
-		/* cmd1 0x4a100096 gpmc_ncs7.gpio_104 d24 */
-		OMAP4_IOPAD(0x096, PIN_OUTPUT | MUX_MODE3)
-
-		/* cmd2 0x4a100142 uart3_rts_sd.gpio_142 f28 */
-		OMAP4_IOPAD(0x142, PIN_OUTPUT | MUX_MODE3)
-		>;
-	};
-
-	usb_ulpi_pins: pinmux_usb_ulpi_pins {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x196, MUX_MODE7)
-		OMAP4_IOPAD(0x198, MUX_MODE7)
-		OMAP4_IOPAD(0x1b2, PIN_INPUT_PULLUP | MUX_MODE0)
-		OMAP4_IOPAD(0x1b4, PIN_INPUT_PULLUP | MUX_MODE0)
-		OMAP4_IOPAD(0x1b6, PIN_INPUT_PULLUP | MUX_MODE0)
-		OMAP4_IOPAD(0x1b8, PIN_INPUT_PULLUP | MUX_MODE0)
-		OMAP4_IOPAD(0x1ba, PIN_INPUT_PULLUP | MUX_MODE0)
-		OMAP4_IOPAD(0x1bc, PIN_INPUT_PULLUP | MUX_MODE0)
-		OMAP4_IOPAD(0x1be, PIN_INPUT_PULLUP | MUX_MODE0)
-		OMAP4_IOPAD(0x1c0, PIN_INPUT_PULLUP | MUX_MODE0)
-		OMAP4_IOPAD(0x1c2, PIN_INPUT_PULLUP | MUX_MODE0)
-		OMAP4_IOPAD(0x1c4, PIN_INPUT_PULLUP | MUX_MODE0)
-		OMAP4_IOPAD(0x1c6, PIN_INPUT_PULLUP | MUX_MODE0)
-		OMAP4_IOPAD(0x1c8, PIN_INPUT_PULLUP | MUX_MODE0)
-		>;
-	};
-
-	/* usb0_otg_dp and usb0_otg_dm */
-	usb_utmi_pins: pinmux_usb_utmi_pins {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x196, PIN_INPUT | MUX_MODE0)
-		OMAP4_IOPAD(0x198, PIN_INPUT | MUX_MODE0)
-		OMAP4_IOPAD(0x1b2, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1b4, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1b6, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1b8, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1ba, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1bc, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1be, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1c0, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1c2, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1c4, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1c6, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1c8, PIN_INPUT_PULLUP | MUX_MODE7)
-		>;
-	};
-
-	/*
-	 * Note that the v3.0.8 stock userspace dynamically remuxes uart1
-	 * rts pin probably for PM purposes to PIN_INPUT_PULLUP | MUX_MODE7
-	 * when not used. If needed, we can add rts pin remux later based
-	 * on power measurements.
-	 */
-	uart1_pins: pinmux_uart1_pins {
-		pinctrl-single,pins = <
-		/* 0x4a10013c mcspi1_cs2.uart1_cts ag23 */
-		OMAP4_IOPAD(0x13c, PIN_INPUT_PULLUP | MUX_MODE1)
-
-		/* 0x4a10013e mcspi1_cs3.uart1_rts ah23 */
-		OMAP4_IOPAD(0x13e, MUX_MODE1)
-
-		/* 0x4a100140 uart3_cts_rctx.uart1_tx f27 */
-		OMAP4_IOPAD(0x140, PIN_OUTPUT | MUX_MODE1)
-
-		/* 0x4a1001ca dpm_emu14.uart1_rx aa3 */
-		OMAP4_IOPAD(0x1ca, PIN_INPUT_PULLUP | MUX_MODE2)
-		>;
-	};
-
-	/* uart3_tx_irtx and uart3_rx_irrx */
-	uart3_pins: pinmux_uart3_pins {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x196, MUX_MODE7)
-		OMAP4_IOPAD(0x198, MUX_MODE7)
-		OMAP4_IOPAD(0x1b2, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1b4, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1b6, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1b8, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1ba, MUX_MODE2)
-		OMAP4_IOPAD(0x1bc, PIN_INPUT | MUX_MODE2)
-		OMAP4_IOPAD(0x1be, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1c0, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1c2, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1c4, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1c6, PIN_INPUT_PULLUP | MUX_MODE7)
-		OMAP4_IOPAD(0x1c8, PIN_INPUT_PULLUP | MUX_MODE7)
-		>;
-	};
-
-	uart4_pins: pinmux_uart4_pins {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x15c, PIN_INPUT | MUX_MODE0)		/* uart4_rx */
-		OMAP4_IOPAD(0x15e, PIN_OUTPUT | MUX_MODE0)		/* uart4_tx */
-		OMAP4_IOPAD(0x110, PIN_INPUT_PULLUP | MUX_MODE5)	/* uart4_cts */
-		OMAP4_IOPAD(0x112, PIN_OUTPUT_PULLUP | MUX_MODE5)	/* uart4_rts */
-		>;
-	};
-
-	mcbsp2_pins: pinmux_mcbsp2_pins {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x0f6, PIN_INPUT | MUX_MODE0)	/* abe_mcbsp2_clkx */
-		OMAP4_IOPAD(0x0f8, PIN_INPUT | MUX_MODE0)	/* abe_mcbsp2_dr */
-		OMAP4_IOPAD(0x0fa, PIN_OUTPUT | MUX_MODE0)	/* abe_mcbsp2_dx */
-		OMAP4_IOPAD(0x0fc, PIN_INPUT | MUX_MODE0)	/* abe_mcbsp2_fsx */
-		>;
-	};
-
-	mcbsp3_pins: pinmux_mcbsp3_pins {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x106, PIN_INPUT | MUX_MODE1)	/* abe_mcbsp3_dr */
-		OMAP4_IOPAD(0x108, PIN_OUTPUT | MUX_MODE1)	/* abe_mcbsp3_dx */
-		OMAP4_IOPAD(0x10a, PIN_INPUT | MUX_MODE1)	/* abe_mcbsp3_clkx */
-		OMAP4_IOPAD(0x10c, PIN_INPUT | MUX_MODE1)	/* abe_mcbsp3_fsx */
-		>;
-	};
-
-	vibrator_direction_pin: pinmux_vibrator_direction_pin {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x1ce, PIN_OUTPUT | MUX_MODE1)	/* dmtimer8_pwm_evt (gpio_27) */
-		>;
-	};
-
-	vibrator_enable_pin: pinmux_vibrator_enable_pin {
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0X1d0, PIN_OUTPUT | MUX_MODE1)	/* dmtimer9_pwm_evt (gpio_28) */
-		>;
-	};
-};
-
-&omap4_pmx_wkup {
-	usb_gpio_mux_sel2: pinmux_usb_gpio_mux_sel2_pins {
-		/* gpio_wk0 */
-		pinctrl-single,pins = <
-		OMAP4_IOPAD(0x040, PIN_OUTPUT_PULLDOWN | MUX_MODE3)
-		>;
-	};
-};
-
-/* Configure pwm clock source for timers 8 & 9 */
-&timer8 {
-	assigned-clocks = <&abe_clkctrl OMAP4_TIMER8_CLKCTRL 24>;
-	assigned-clock-parents = <&sys_clkin_ck>;
-};
-
-&timer9 {
-	assigned-clocks = <&l4_per_clkctrl OMAP4_TIMER9_CLKCTRL 24>;
-	assigned-clock-parents = <&sys_clkin_ck>;
-};
-
-/*
- * As uart1 is wired to mdm6600 with rts and cts, we can use the cts pin for
- * uart1 wakeirq.
- */
-&uart1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&uart1_pins>;
-	interrupts-extended = <&wakeupgen GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH
-			       &omap4_pmx_core 0xfc>;
-};
-
-&uart3 {
-	interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
-			       &omap4_pmx_core 0x17c>;
-};
-
-&uart4 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&uart4_pins>;
-
-	bluetooth {
-		compatible = "ti,wl1285-st";
-		enable-gpios = <&gpio6 14 GPIO_ACTIVE_HIGH>; /* gpio 174 */
-		max-speed = <3686400>;
-	};
-};
-
-&usbhsohci {
-	phys = <&fsusb1_phy>;
-	phy-names = "usb";
-};
-
-&usbhsehci {
-	phys = <&hsusb2_phy>;
-};
-
-&usbhshost {
-	port1-mode = "ohci-phy-4pin-dpdm";
-	port2-mode = "ehci-tll";
-};
-
-/* Internal UTMI+ PHY used for OTG, CPCAP ULPI PHY for detection and charger */
-&usb_otg_hs {
-	interface-type = <1>;
-	mode = <3>;
-	power = <50>;
-};
-
-&i2c4 {
-	ak8975: magnetometer@c {
-		compatible = "asahi-kasei,ak8975";
-		reg = <0x0c>;
-
-		vdd-supply = <&vhvio>;
-
-		interrupt-parent = <&gpio6>;
-		interrupts = <15 IRQ_TYPE_EDGE_RISING>; /* gpio175 */
-
-		rotation-matrix = "-1", "0", "0",
-				  "0", "1", "0",
-				  "0", "0", "-1";
-
-	};
-
-	lis3dh: accelerometer@18 {
-		compatible = "st,lis3dh-accel";
-		reg = <0x18>;
-
-		vdd-supply = <&vhvio>;
-
-		interrupt-parent = <&gpio2>;
-		interrupts = <2 IRQ_TYPE_EDGE_BOTH>; /* gpio34 */
-
-		rotation-matrix = "0", "-1", "0",
-				  "1", "0", "0",
-				  "0", "0", "1";
-	};
-};
-
-&mcbsp2 {
-	#sound-dai-cells = <0>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&mcbsp2_pins>;
-	status = "okay";
-
-	mcbsp2_port: port {
-		cpu_dai2: endpoint {
-			dai-format = "i2s";
-			remote-endpoint = <&cpcap_audio_codec0>;
-			frame-master = <&cpcap_audio_codec0>;
-			bitclock-master = <&cpcap_audio_codec0>;
-		};
-	};
-};
-
-&mcbsp3 {
-	#sound-dai-cells = <0>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&mcbsp3_pins>;
-	status = "okay";
-
-	mcbsp3_port: port {
-		cpu_dai3: endpoint {
-			dai-format = "dsp_a";
-			frame-master = <&cpcap_audio_codec1>;
-			bitclock-master = <&cpcap_audio_codec1>;
-			remote-endpoint = <&cpcap_audio_codec1>;
-		};
-	};
-};
-
-&cpcap_audio_codec0 {
-	remote-endpoint = <&cpu_dai2>;
-};
-
-&cpcap_audio_codec1 {
-	remote-endpoint = <&cpu_dai3>;
 };
diff --git a/arch/arm/boot/dts/omap4-l4-abe.dtsi b/arch/arm/boot/dts/omap4-l4-abe.dtsi
index 8e6662bb9e83..6c892fc9d726 100644
--- a/arch/arm/boot/dts/omap4-l4-abe.dtsi
+++ b/arch/arm/boot/dts/omap4-l4-abe.dtsi
@@ -86,7 +86,6 @@
 
 		target-module@22000 {			/* 0x40122000, ap 2 02.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mcbsp1";
 			reg = <0x2208c 0x4>;
 			reg-names = "sysc";
 			ti,sysc-mask = <(SYSC_OMAP2_CLOCKACTIVITY |
@@ -120,7 +119,6 @@
 
 		target-module@24000 {			/* 0x40124000, ap 4 04.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mcbsp2";
 			reg = <0x2408c 0x4>;
 			reg-names = "sysc";
 			ti,sysc-mask = <(SYSC_OMAP2_CLOCKACTIVITY |
@@ -154,7 +152,6 @@
 
 		target-module@26000 {			/* 0x40126000, ap 6 06.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mcbsp3";
 			reg = <0x2608c 0x4>;
 			reg-names = "sysc";
 			ti,sysc-mask = <(SYSC_OMAP2_CLOCKACTIVITY |
@@ -188,7 +185,6 @@
 
 		target-module@28000 {			/* 0x40128000, ap 8 08.0 */
 			compatible = "ti,sysc-mcasp", "ti,sysc";
-			ti,hwmods = "mcasp";
 			reg = <0x28000 0x4>,
 			      <0x28004 0x4>;
 			reg-names = "rev", "sysc";
diff --git a/arch/arm/boot/dts/omap4-l4.dtsi b/arch/arm/boot/dts/omap4-l4.dtsi
index d60d5e0ecc4c..83f803be8ee2 100644
--- a/arch/arm/boot/dts/omap4-l4.dtsi
+++ b/arch/arm/boot/dts/omap4-l4.dtsi
@@ -381,7 +381,6 @@
 
 		target-module@2b000 {			/* 0x4a0ab000, ap 84 12.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "usb_otg_hs";
 			reg = <0x2b400 0x4>,
 			      <0x2b404 0x4>,
 			      <0x2b408 0x4>;
@@ -580,7 +579,6 @@
 
 		target-module@74000 {			/* 0x4a0f4000, ap 27 24.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox";
 			reg = <0x74000 0x4>,
 			      <0x74010 0x4>;
 			reg-names = "rev", "sysc";
@@ -1007,7 +1005,7 @@
 			ranges = <0x0 0x6000 0x2000>;
 
 			prm: prm@0 {
-				compatible = "ti,omap4-prm";
+				compatible = "ti,omap4-prm", "simple-bus";
 				reg = <0x0 0x2000>;
 				interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
 				#address-cells = <1>;
@@ -1085,7 +1083,6 @@
 
 		gpio1_target: target-module@0 {			/* 0x4a310000, ap 5 14.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio1";
 			reg = <0x0 0x4>,
 			      <0x10 0x4>,
 			      <0x114 0x4>;
@@ -1550,7 +1547,6 @@
 
 		target-module@55000 {			/* 0x48055000, ap 15 0c.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio2";
 			reg = <0x55000 0x4>,
 			      <0x55010 0x4>,
 			      <0x55114 0x4>;
@@ -1584,7 +1580,6 @@
 
 		target-module@57000 {			/* 0x48057000, ap 17 16.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio3";
 			reg = <0x57000 0x4>,
 			      <0x57010 0x4>,
 			      <0x57114 0x4>;
@@ -1618,7 +1613,6 @@
 
 		target-module@59000 {			/* 0x48059000, ap 19 10.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio4";
 			reg = <0x59000 0x4>,
 			      <0x59010 0x4>,
 			      <0x59114 0x4>;
@@ -1652,7 +1646,6 @@
 
 		target-module@5b000 {			/* 0x4805b000, ap 21 12.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio5";
 			reg = <0x5b000 0x4>,
 			      <0x5b010 0x4>,
 			      <0x5b114 0x4>;
@@ -1686,7 +1679,6 @@
 
 		target-module@5d000 {			/* 0x4805d000, ap 23 14.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio6";
 			reg = <0x5d000 0x4>,
 			      <0x5d010 0x4>,
 			      <0x5d114 0x4>;
@@ -2020,7 +2012,6 @@
 
 		target-module@96000 {			/* 0x48096000, ap 37 26.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mcbsp4";
 			reg = <0x9608c 0x4>;
 			reg-names = "sysc";
 			ti,sysc-mask = <(SYSC_OMAP2_CLOCKACTIVITY |
@@ -2052,7 +2043,6 @@
 
 		target-module@98000 {			/* 0x48098000, ap 49 22.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mcspi1";
 			reg = <0x98000 0x4>,
 			      <0x98010 0x4>;
 			reg-names = "rev", "sysc";
@@ -2091,7 +2081,6 @@
 
 		target-module@9a000 {			/* 0x4809a000, ap 51 2c.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mcspi2";
 			reg = <0x9a000 0x4>,
 			      <0x9a010 0x4>;
 			reg-names = "rev", "sysc";
@@ -2232,7 +2221,6 @@
 
 		target-module@b2000 {			/* 0x480b2000, ap 65 3c.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "hdq1w";
 			reg = <0xb2000 0x4>,
 			      <0xb2014 0x4>,
 			      <0xb2018 0x4>;
@@ -2289,7 +2277,6 @@
 
 		target-module@b8000 {			/* 0x480b8000, ap 69 58.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mcspi3";
 			reg = <0xb8000 0x4>,
 			      <0xb8010 0x4>;
 			reg-names = "rev", "sysc";
@@ -2320,7 +2307,6 @@
 
 		target-module@ba000 {			/* 0x480ba000, ap 71 32.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mcspi4";
 			reg = <0xba000 0x4>,
 			      <0xba010 0x4>;
 			reg-names = "rev", "sysc";
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index 7cc95bc1598b..2de8a6b53de9 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -148,7 +148,7 @@
 		l4_abe: interconnect@40100000 {
 		};
 
-		ocmcram: ocmcram@40304000 {
+		ocmcram: sram@40304000 {
 			compatible = "mmio-sram";
 			reg = <0x40304000 0xa000>; /* 40k */
 		};
@@ -330,8 +330,8 @@
 
 		target-module@56000000 {
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			reg = <0x5601fc00 0x4>,
-			      <0x5601fc10 0x4>;
+			reg = <0x5600fe00 0x4>,
+			      <0x5600fe10 0x4>;
 			reg-names = "rev", "sysc";
 			ti,sysc-midle = <SYSC_IDLE_FORCE>,
 					<SYSC_IDLE_NO>,
@@ -442,3 +442,29 @@
 #include "omap4-l4.dtsi"
 #include "omap4-l4-abe.dtsi"
 #include "omap44xx-clocks.dtsi"
+
+&prm {
+	prm_tesla: prm@400 {
+		compatible = "ti,omap4-prm-inst", "ti,omap-prm-inst";
+		reg = <0x400 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_core: prm@700 {
+		compatible = "ti,omap4-prm-inst", "ti,omap-prm-inst";
+		reg = <0x700 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_ivahd: prm@f00 {
+		compatible = "ti,omap4-prm-inst", "ti,omap-prm-inst";
+		reg = <0xf00 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_device: prm@1b00 {
+		compatible = "ti,omap4-prm-inst", "ti,omap-prm-inst";
+		reg = <0x1b00 0x40>;
+		#reset-cells = <1>;
+	};
+};
diff --git a/arch/arm/boot/dts/omap5-l4-abe.dtsi b/arch/arm/boot/dts/omap5-l4-abe.dtsi
index dc9d0532f4cf..23aa90716f7f 100644
--- a/arch/arm/boot/dts/omap5-l4-abe.dtsi
+++ b/arch/arm/boot/dts/omap5-l4-abe.dtsi
@@ -86,7 +86,6 @@
 
 		target-module@22000 {			/* 0x40122000, ap 2 02.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mcbsp1";
 			reg = <0x2208c 0x4>;
 			reg-names = "sysc";
 			ti,sysc-mask = <(SYSC_OMAP2_CLOCKACTIVITY |
@@ -120,7 +119,6 @@
 
 		target-module@24000 {			/* 0x40124000, ap 4 04.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mcbsp2";
 			reg = <0x2408c 0x4>;
 			reg-names = "sysc";
 			ti,sysc-mask = <(SYSC_OMAP2_CLOCKACTIVITY |
@@ -154,7 +152,6 @@
 
 		target-module@26000 {			/* 0x40126000, ap 6 06.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "mcbsp3";
 			reg = <0x2608c 0x4>;
 			reg-names = "sysc";
 			ti,sysc-mask = <(SYSC_OMAP2_CLOCKACTIVITY |
diff --git a/arch/arm/boot/dts/omap5-l4.dtsi b/arch/arm/boot/dts/omap5-l4.dtsi
index 0960348002ad..25aacf1ba708 100644
--- a/arch/arm/boot/dts/omap5-l4.dtsi
+++ b/arch/arm/boot/dts/omap5-l4.dtsi
@@ -593,7 +593,6 @@
 
 		target-module@74000 {			/* 0x4a0f4000, ap 25 04.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mailbox";
 			reg = <0x74000 0x4>,
 			      <0x74010 0x4>;
 			reg-names = "rev", "sysc";
@@ -1033,7 +1032,6 @@
 
 		target-module@20000 {			/* 0x48020000, ap 3 04.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart3";
 			reg = <0x20050 0x4>,
 			      <0x20054 0x4>,
 			      <0x20058 0x4>;
@@ -1176,7 +1174,6 @@
 
 		target-module@51000 {			/* 0x48051000, ap 45 2e.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio7";
 			reg = <0x51000 0x4>,
 			      <0x51010 0x4>,
 			      <0x51114 0x4>;
@@ -1210,7 +1207,6 @@
 
 		target-module@53000 {			/* 0x48053000, ap 35 36.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio8";
 			reg = <0x53000 0x4>,
 			      <0x53010 0x4>,
 			      <0x53114 0x4>;
@@ -1244,7 +1240,6 @@
 
 		target-module@55000 {			/* 0x48055000, ap 13 0e.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio2";
 			reg = <0x55000 0x4>,
 			      <0x55010 0x4>,
 			      <0x55114 0x4>;
@@ -1278,7 +1273,6 @@
 
 		target-module@57000 {			/* 0x48057000, ap 15 06.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio3";
 			reg = <0x57000 0x4>,
 			      <0x57010 0x4>,
 			      <0x57114 0x4>;
@@ -1312,7 +1306,6 @@
 
 		target-module@59000 {			/* 0x48059000, ap 17 16.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio4";
 			reg = <0x59000 0x4>,
 			      <0x59010 0x4>,
 			      <0x59114 0x4>;
@@ -1346,7 +1339,6 @@
 
 		target-module@5b000 {			/* 0x4805b000, ap 19 1e.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio5";
 			reg = <0x5b000 0x4>,
 			      <0x5b010 0x4>,
 			      <0x5b114 0x4>;
@@ -1380,7 +1372,6 @@
 
 		target-module@5d000 {			/* 0x4805d000, ap 21 26.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio6";
 			reg = <0x5d000 0x4>,
 			      <0x5d010 0x4>,
 			      <0x5d114 0x4>;
@@ -1414,7 +1405,6 @@
 
 		target-module@60000 {			/* 0x48060000, ap 23 24.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "i2c3";
 			reg = <0x60000 0x8>,
 			      <0x60010 0x8>,
 			      <0x60090 0x8>;
@@ -1446,7 +1436,6 @@
 
 		target-module@66000 {			/* 0x48066000, ap 63 4c.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart5";
 			reg = <0x66050 0x4>,
 			      <0x66054 0x4>,
 			      <0x66058 0x4>;
@@ -1476,7 +1465,6 @@
 
 		target-module@68000 {			/* 0x48068000, ap 53 54.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart6";
 			reg = <0x68050 0x4>,
 			      <0x68054 0x4>,
 			      <0x68058 0x4>;
@@ -1506,7 +1494,6 @@
 
 		target-module@6a000 {			/* 0x4806a000, ap 24 0a.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart1";
 			reg = <0x6a050 0x4>,
 			      <0x6a054 0x4>,
 			      <0x6a058 0x4>;
@@ -1536,7 +1523,6 @@
 
 		target-module@6c000 {			/* 0x4806c000, ap 26 22.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart2";
 			reg = <0x6c050 0x4>,
 			      <0x6c054 0x4>,
 			      <0x6c058 0x4>;
@@ -1566,7 +1552,6 @@
 
 		target-module@6e000 {			/* 0x4806e000, ap 28 44.1 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "uart4";
 			reg = <0x6e050 0x4>,
 			      <0x6e054 0x4>,
 			      <0x6e058 0x4>;
@@ -1596,7 +1581,6 @@
 
 		target-module@70000 {			/* 0x48070000, ap 30 14.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "i2c1";
 			reg = <0x70000 0x8>,
 			      <0x70010 0x8>,
 			      <0x70090 0x8>;
@@ -1628,7 +1612,6 @@
 
 		target-module@72000 {			/* 0x48072000, ap 32 1c.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "i2c2";
 			reg = <0x72000 0x8>,
 			      <0x72010 0x8>,
 			      <0x72090 0x8>;
@@ -1668,7 +1651,6 @@
 
 		target-module@7a000 {			/* 0x4807a000, ap 81 2c.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "i2c4";
 			reg = <0x7a000 0x8>,
 			      <0x7a010 0x8>,
 			      <0x7a090 0x8>;
@@ -1700,7 +1682,6 @@
 
 		target-module@7c000 {			/* 0x4807c000, ap 83 34.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "i2c5";
 			reg = <0x7c000 0x8>,
 			      <0x7c010 0x8>,
 			      <0x7c090 0x8>;
@@ -1798,7 +1779,6 @@
 
 		target-module@98000 {			/* 0x48098000, ap 47 08.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mcspi1";
 			reg = <0x98000 0x4>,
 			      <0x98010 0x4>;
 			reg-names = "rev", "sysc";
@@ -1837,7 +1817,6 @@
 
 		target-module@9a000 {			/* 0x4809a000, ap 49 10.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mcspi2";
 			reg = <0x9a000 0x4>,
 			      <0x9a010 0x4>;
 			reg-names = "rev", "sysc";
@@ -1871,7 +1850,6 @@
 
 		target-module@9c000 {			/* 0x4809c000, ap 51 3a.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mmc1";
 			reg = <0x9c000 0x4>,
 			      <0x9c010 0x4>;
 			reg-names = "rev", "sysc";
@@ -1931,7 +1909,6 @@
 
 		target-module@ad000 {			/* 0x480ad000, ap 61 20.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mmc3";
 			reg = <0xad000 0x4>,
 			      <0xad010 0x4>;
 			reg-names = "rev", "sysc";
@@ -1972,7 +1949,6 @@
 
 		target-module@b4000 {			/* 0x480b4000, ap 65 42.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mmc2";
 			reg = <0xb4000 0x4>,
 			      <0xb4010 0x4>;
 			reg-names = "rev", "sysc";
@@ -2005,7 +1981,6 @@
 
 		target-module@b8000 {			/* 0x480b8000, ap 67 32.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mcspi3";
 			reg = <0xb8000 0x4>,
 			      <0xb8010 0x4>;
 			reg-names = "rev", "sysc";
@@ -2036,7 +2011,6 @@
 
 		target-module@ba000 {			/* 0x480ba000, ap 69 18.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mcspi4";
 			reg = <0xba000 0x4>,
 			      <0xba010 0x4>;
 			reg-names = "rev", "sysc";
@@ -2067,7 +2041,6 @@
 
 		target-module@d1000 {			/* 0x480d1000, ap 71 28.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mmc4";
 			reg = <0xd1000 0x4>,
 			      <0xd1010 0x4>;
 			reg-names = "rev", "sysc";
@@ -2100,7 +2073,6 @@
 
 		target-module@d5000 {			/* 0x480d5000, ap 73 30.0 */
 			compatible = "ti,sysc-omap4", "ti,sysc";
-			ti,hwmods = "mmc5";
 			reg = <0xd5000 0x4>,
 			      <0xd5010 0x4>;
 			reg-names = "rev", "sysc";
@@ -2296,7 +2268,6 @@
 
 		target-module@0 {			/* 0x4ae10000, ap 5 10.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "gpio1";
 			reg = <0x0 0x4>,
 			      <0x10 0x4>,
 			      <0x114 0x4>;
@@ -2331,7 +2302,6 @@
 
 		target-module@4000 {			/* 0x4ae14000, ap 7 14.0 */
 			compatible = "ti,sysc-omap2", "ti,sysc";
-			ti,hwmods = "wd_timer2";
 			reg = <0x4000 0x4>,
 			      <0x4010 0x4>,
 			      <0x4014 0x4>;
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index 1fb7937638f0..1f6ad1debc90 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -162,7 +162,7 @@
 		l4_abe: interconnect@40100000 {
 		};
 
-		ocmcram: ocmcram@40300000 {
+		ocmcram: sram@40300000 {
 			compatible = "mmio-sram";
 			reg = <0x40300000 0x20000>; /* 128k */
 		};
@@ -435,3 +435,29 @@
 
 #include "omap5-l4-abe.dtsi"
 #include "omap54xx-clocks.dtsi"
+
+&prm {
+	prm_dsp: prm@400 {
+		compatible = "ti,omap5-prm-inst", "ti,omap-prm-inst";
+		reg = <0x400 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_core: prm@700 {
+		compatible = "ti,omap5-prm-inst", "ti,omap-prm-inst";
+		reg = <0x700 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_iva: prm@1200 {
+		compatible = "ti,omap5-prm-inst", "ti,omap-prm-inst";
+		reg = <0x1200 0x100>;
+		#reset-cells = <1>;
+	};
+
+	prm_device: prm@1c00 {
+		compatible = "ti,omap5-prm-inst", "ti,omap-prm-inst";
+		reg = <0x1c00 0x100>;
+		#reset-cells = <1>;
+	};
+};
diff --git a/arch/arm/boot/dts/openbmc-flash-layout-128.dtsi b/arch/arm/boot/dts/openbmc-flash-layout-128.dtsi
new file mode 100644
index 000000000000..05101a38c5bd
--- /dev/null
+++ b/arch/arm/boot/dts/openbmc-flash-layout-128.dtsi
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+partitions {
+	compatible = "fixed-partitions";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	u-boot@0 {
+		reg = <0x0 0xe0000>; // 896KB
+		label = "u-boot";
+	};
+
+	u-boot-env@e0000 {
+		reg = <0xe0000 0x20000>; // 128KB
+		label = "u-boot-env";
+	};
+
+	kernel@100000 {
+		reg = <0x100000 0x900000>; // 9MB
+		label = "kernel";
+	};
+
+	rofs@a00000 {
+		reg = <0xa00000 0x5600000>; // 86MB
+		label = "rofs";
+	};
+
+	rwfs@6000000 {
+		reg = <0x6000000 0x2000000>; // 32MB
+		label = "rwfs";
+	};
+};
diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi
index 56f51599852d..8ef26da32ff4 100644
--- a/arch/arm/boot/dts/qcom-ipq4019.dtsi
+++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi
@@ -206,6 +206,18 @@
 			interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;
 		};
 
+		sdhci: sdhci@7824900 {
+			compatible = "qcom,sdhci-msm-v4";
+			reg = <0x7824900 0x11c>, <0x7824000 0x800>;
+			interrupts = <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "hc_irq", "pwr_irq";
+			bus-width = <8>;
+			clocks = <&gcc GCC_SDCC1_APPS_CLK>, <&gcc GCC_SDCC1_AHB_CLK>,
+				 <&gcc GCC_DCD_XO_CLK>;
+			clock-names = "core", "iface", "xo";
+			status = "disabled";
+		};
+
 		blsp_dma: dma@7884000 {
 			compatible = "qcom,bam-v1.7.0";
 			reg = <0x07884000 0x23000>;
diff --git a/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts b/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts
index 26160c324802..942e3a2cac35 100644
--- a/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts
+++ b/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts
@@ -143,7 +143,7 @@
 				compatible = "smsc,usb3503a";
 				reg = <0x8>;
 				connect-gpios = <&gpioext2 1 GPIO_ACTIVE_HIGH>;
-				intn-gpios = <&gpioext2 0 GPIO_ACTIVE_LOW>;
+				intn-gpios = <&gpioext2 0 GPIO_ACTIVE_HIGH>;
 				initial-mode = <1>;
 			};
 		};
diff --git a/arch/arm/boot/dts/qcom-msm8974-fairphone-fp2.dts b/arch/arm/boot/dts/qcom-msm8974-fairphone-fp2.dts
index bf402ae39226..26160394d717 100644
--- a/arch/arm/boot/dts/qcom-msm8974-fairphone-fp2.dts
+++ b/arch/arm/boot/dts/qcom-msm8974-fairphone-fp2.dts
@@ -221,6 +221,8 @@
 						regulator-max-microvolt = <2950000>;
 
 						regulator-boot-on;
+						regulator-system-load = <200000>;
+						regulator-allow-set-load;
 					};
 
 					l21 {
@@ -272,14 +274,6 @@
 			};
 		};
 
-		sdhc2_cd_pin_a: sdhc2-cd-pin-active {
-			pins = "gpio62";
-			function = "gpio";
-
-			drive-strength = <2>;
-			bias-disable;
-		};
-
 		sdhc2_pin_a: sdhc2-pin-active {
 			clk {
 				pins = "sdc2_clk";
@@ -317,7 +311,7 @@
 		bus-width = <4>;
 
 		pinctrl-names = "default";
-		pinctrl-0 = <&sdhc2_pin_a>, <&sdhc2_cd_pin_a>;
+		pinctrl-0 = <&sdhc2_pin_a>;
 	};
 
 	usb@f9a55000 {
@@ -344,6 +338,16 @@
 			};
 		};
 	};
+
+	imem@fe805000 {
+		status = "okay";
+
+		reboot-mode {
+			mode-normal	= <0x77665501>;
+			mode-bootloader	= <0x77665500>;
+			mode-recovery	= <0x77665502>;
+		};
+	};
 };
 
 &spmi_bus {
diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi
index 369e58f64145..9a84eb0cbbe6 100644
--- a/arch/arm/boot/dts/qcom-msm8974.dtsi
+++ b/arch/arm/boot/dts/qcom-msm8974.dtsi
@@ -217,6 +217,96 @@
 				};
 			};
 		};
+
+		q6-dsp-thermal {
+			polling-delay-passive = <250>;
+			polling-delay = <1000>;
+
+			thermal-sensors = <&tsens 1>;
+
+			trips {
+				q6_dsp_alert0: trip-point0 {
+					temperature = <90000>;
+					hysteresis = <2000>;
+					type = "hot";
+				};
+			};
+		};
+
+		modemtx-thermal {
+			polling-delay-passive = <250>;
+			polling-delay = <1000>;
+
+			thermal-sensors = <&tsens 2>;
+
+			trips {
+				modemtx_alert0: trip-point0 {
+					temperature = <90000>;
+					hysteresis = <2000>;
+					type = "hot";
+				};
+			};
+		};
+
+		video-thermal {
+			polling-delay-passive = <250>;
+			polling-delay = <1000>;
+
+			thermal-sensors = <&tsens 3>;
+
+			trips {
+				video_alert0: trip-point0 {
+					temperature = <95000>;
+					hysteresis = <2000>;
+					type = "hot";
+				};
+			};
+		};
+
+		wlan-thermal {
+			polling-delay-passive = <250>;
+			polling-delay = <1000>;
+
+			thermal-sensors = <&tsens 4>;
+
+			trips {
+				wlan_alert0: trip-point0 {
+					temperature = <105000>;
+					hysteresis = <2000>;
+					type = "hot";
+				};
+			};
+		};
+
+		gpu-thermal-top {
+			polling-delay-passive = <250>;
+			polling-delay = <1000>;
+
+			thermal-sensors = <&tsens 9>;
+
+			trips {
+				gpu1_alert0: trip-point0 {
+					temperature = <90000>;
+					hysteresis = <2000>;
+					type = "hot";
+				};
+			};
+		};
+
+		gpu-thermal-bottom {
+			polling-delay-passive = <250>;
+			polling-delay = <1000>;
+
+			thermal-sensors = <&tsens 10>;
+
+			trips {
+				gpu2_alert0: trip-point0 {
+					temperature = <90000>;
+					hysteresis = <2000>;
+					type = "hot";
+				};
+			};
+		};
 	};
 
 	cpu-pmu {
@@ -441,6 +531,8 @@
 			nvmem-cells = <&tsens_calib>, <&tsens_backup>;
 			nvmem-cell-names = "calib", "calib_backup";
 			#qcom,sensors = <11>;
+			interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "uplow";
 			#thermal-sensor-cells = <1>;
 		};
 
@@ -1217,6 +1309,17 @@
 				clock-names = "iface";
 			};
 		};
+
+		imem@fe805000 {
+			status = "disabled";
+			compatible = "syscon", "simple-mfd";
+			reg = <0xfe805000 0x1000>;
+
+			reboot-mode {
+				compatible = "syscon-reboot-mode";
+				offset = <0x65c>;
+			};
+		};
 	};
 
 	smd {
diff --git a/arch/arm/boot/dts/qcom-pm8941.dtsi b/arch/arm/boot/dts/qcom-pm8941.dtsi
index f198480c8ef4..c1f2012d1c8b 100644
--- a/arch/arm/boot/dts/qcom-pm8941.dtsi
+++ b/arch/arm/boot/dts/qcom-pm8941.dtsi
@@ -178,6 +178,16 @@
 				qcom,vs-soft-start-strength = <0>;
 				regulator-initial-mode = <1>;
 			};
+
+			pm8941_5vs2: 5vs2 {
+				regulator-enable-ramp-delay = <1000>;
+				regulator-pull-down;
+				regulator-over-current-protection;
+				qcom,ocp-max-retries = <10>;
+				qcom,ocp-retry-delay = <30>;
+				qcom,vs-soft-start-strength = <0>;
+				regulator-initial-mode = <1>;
+			};
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/r8a7790-lager.dts b/arch/arm/boot/dts/r8a7790-lager.dts
index 83cc619861b2..6ec2cf7eb354 100644
--- a/arch/arm/boot/dts/r8a7790-lager.dts
+++ b/arch/arm/boot/dts/r8a7790-lager.dts
@@ -325,10 +325,10 @@
 		#size-cells = <0>;
 	};
 
-        /*
-         * IIC2 and I2C2 may be switched using pinmux.
-         * A fallback to GPIO is also provided.
-         */
+	/*
+	 * IIC2 and I2C2 may be switched using pinmux.
+	 * A fallback to GPIO is also provided.
+	 */
 	i2chdmi: i2c-12 {
 		compatible = "i2c-demux-pinctrl";
 		i2c-parent = <&iic2>, <&i2c2>, <&gpioi2c2>;
diff --git a/arch/arm/boot/dts/r8a7793-gose.dts b/arch/arm/boot/dts/r8a7793-gose.dts
index 42f3313e6988..48fbeb6340fd 100644
--- a/arch/arm/boot/dts/r8a7793-gose.dts
+++ b/arch/arm/boot/dts/r8a7793-gose.dts
@@ -65,81 +65,81 @@
 		compatible = "gpio-keys";
 
 		key-1 {
-		        gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
-		        linux,code = <KEY_1>;
-		        label = "SW2-1";
-		        wakeup-source;
-		        debounce-interval = <20>;
+			gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_1>;
+			label = "SW2-1";
+			wakeup-source;
+			debounce-interval = <20>;
 		};
 		key-2 {
-		        gpios = <&gpio5 1 GPIO_ACTIVE_LOW>;
-		        linux,code = <KEY_2>;
-		        label = "SW2-2";
-		        wakeup-source;
-		        debounce-interval = <20>;
+			gpios = <&gpio5 1 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_2>;
+			label = "SW2-2";
+			wakeup-source;
+			debounce-interval = <20>;
 		};
 		key-3 {
-		        gpios = <&gpio5 2 GPIO_ACTIVE_LOW>;
-		        linux,code = <KEY_3>;
-		        label = "SW2-3";
-		        wakeup-source;
-		        debounce-interval = <20>;
+			gpios = <&gpio5 2 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_3>;
+			label = "SW2-3";
+			wakeup-source;
+			debounce-interval = <20>;
 		};
 		key-4 {
-		        gpios = <&gpio5 3 GPIO_ACTIVE_LOW>;
-		        linux,code = <KEY_4>;
-		        label = "SW2-4";
-		        wakeup-source;
-		        debounce-interval = <20>;
+			gpios = <&gpio5 3 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_4>;
+			label = "SW2-4";
+			wakeup-source;
+			debounce-interval = <20>;
 		};
 		key-a {
-		        gpios = <&gpio7 0 GPIO_ACTIVE_LOW>;
-		        linux,code = <KEY_A>;
-		        label = "SW30";
-		        wakeup-source;
-		        debounce-interval = <20>;
+			gpios = <&gpio7 0 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_A>;
+			label = "SW30";
+			wakeup-source;
+			debounce-interval = <20>;
 		};
 		key-b {
-		        gpios = <&gpio7 1 GPIO_ACTIVE_LOW>;
-		        linux,code = <KEY_B>;
-		        label = "SW31";
-		        wakeup-source;
-		        debounce-interval = <20>;
+			gpios = <&gpio7 1 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_B>;
+			label = "SW31";
+			wakeup-source;
+			debounce-interval = <20>;
 		};
 		key-c {
-		        gpios = <&gpio7 2 GPIO_ACTIVE_LOW>;
-		        linux,code = <KEY_C>;
-		        label = "SW32";
-		        wakeup-source;
-		        debounce-interval = <20>;
+			gpios = <&gpio7 2 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_C>;
+			label = "SW32";
+			wakeup-source;
+			debounce-interval = <20>;
 		};
 		key-d {
-		        gpios = <&gpio7 3 GPIO_ACTIVE_LOW>;
-		        linux,code = <KEY_D>;
-		        label = "SW33";
-		        wakeup-source;
-		        debounce-interval = <20>;
+			gpios = <&gpio7 3 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_D>;
+			label = "SW33";
+			wakeup-source;
+			debounce-interval = <20>;
 		};
 		key-e {
-		        gpios = <&gpio7 4 GPIO_ACTIVE_LOW>;
-		        linux,code = <KEY_E>;
-		        label = "SW34";
-		        wakeup-source;
-		        debounce-interval = <20>;
+			gpios = <&gpio7 4 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_E>;
+			label = "SW34";
+			wakeup-source;
+			debounce-interval = <20>;
 		};
 		key-f {
-		        gpios = <&gpio7 5 GPIO_ACTIVE_LOW>;
-		        linux,code = <KEY_F>;
-		        label = "SW35";
-		        wakeup-source;
-		        debounce-interval = <20>;
+			gpios = <&gpio7 5 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_F>;
+			label = "SW35";
+			wakeup-source;
+			debounce-interval = <20>;
 		};
 		key-g {
-		        gpios = <&gpio7 6 GPIO_ACTIVE_LOW>;
-		        linux,code = <KEY_G>;
-		        label = "SW36";
-		        wakeup-source;
-		        debounce-interval = <20>;
+			gpios = <&gpio7 6 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_G>;
+			label = "SW36";
+			wakeup-source;
+			debounce-interval = <20>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/rda8810pl.dtsi b/arch/arm/boot/dts/rda8810pl.dtsi
index 19cde895bf65..f30d6ece49fb 100644
--- a/arch/arm/boot/dts/rda8810pl.dtsi
+++ b/arch/arm/boot/dts/rda8810pl.dtsi
@@ -33,6 +33,21 @@
 		ranges;
 	};
 
+	modem@10000000 {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x0 0x10000000 0xfffffff>;
+
+		gpioc@1a08000 {
+			compatible = "rda,8810pl-gpio";
+			reg = <0x1a08000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			ngpios = <32>;
+		};
+	};
+
 	apb@20800000 {
 		compatible = "simple-bus";
 		#address-cells = <1>;
@@ -60,6 +75,39 @@
 				     <17 IRQ_TYPE_LEVEL_HIGH>;
 			interrupt-names = "hwtimer", "ostimer";
 		};
+
+		gpioa@30000 {
+			compatible = "rda,8810pl-gpio";
+			reg = <0x30000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			ngpios = <32>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		gpiob@31000 {
+			compatible = "rda,8810pl-gpio";
+			reg = <0x31000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			ngpios = <32>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupts = <13 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		gpiod@32000 {
+			compatible = "rda,8810pl-gpio";
+			reg = <0x32000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			ngpios = <32>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+		};
 	};
 
 	apb@20a00000 {
diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi
index c776321b2cc4..c70182c5aeb1 100644
--- a/arch/arm/boot/dts/rk3036.dtsi
+++ b/arch/arm/boot/dts/rk3036.dtsi
@@ -696,8 +696,8 @@
 
 		hdmi {
 			hdmi_ctl: hdmi-ctl {
-				rockchip,pins = <1 RK_PB0  1 &pcfg_pull_none>,
-						<1 RK_PB1  1 &pcfg_pull_none>,
+				rockchip,pins = <1 RK_PB0 1 &pcfg_pull_none>,
+						<1 RK_PB1 1 &pcfg_pull_none>,
 						<1 RK_PB2 1 &pcfg_pull_none>,
 						<1 RK_PB3 1 &pcfg_pull_none>;
 			};
diff --git a/arch/arm/boot/dts/rk3288-rock2-som.dtsi b/arch/arm/boot/dts/rk3288-rock2-som.dtsi
index 9f9e2bfd1295..44bb5e6f83b1 100644
--- a/arch/arm/boot/dts/rk3288-rock2-som.dtsi
+++ b/arch/arm/boot/dts/rk3288-rock2-som.dtsi
@@ -230,14 +230,14 @@
 	};
 
 	emmc {
-			emmc_reset: emmc-reset {
-				rockchip,pins = <3 RK_PB1 RK_FUNC_GPIO &pcfg_pull_none>;
-			};
+		emmc_reset: emmc-reset {
+			rockchip,pins = <3 RK_PB1 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
 	};
 
 	gmac {
 		phy_rst: phy-rst {
-			rockchip,pins = <4 RK_PB0 RK_FUNC_GPIO  &pcfg_output_high>;
+			rockchip,pins = <4 RK_PB0 RK_FUNC_GPIO &pcfg_output_high>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/rk3288-tinker.dtsi b/arch/arm/boot/dts/rk3288-tinker.dtsi
index 81e4e953d4a4..0aeef23ca3db 100644
--- a/arch/arm/boot/dts/rk3288-tinker.dtsi
+++ b/arch/arm/boot/dts/rk3288-tinker.dtsi
@@ -382,18 +382,15 @@
 
 	pmic {
 		pmic_int: pmic-int {
-			rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO \
-					&pcfg_pull_up>;
+			rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_up>;
 		};
 
 		dvs_1: dvs-1 {
-			rockchip,pins = <0 RK_PB3 RK_FUNC_GPIO \
-					&pcfg_pull_down>;
+			rockchip,pins = <0 RK_PB3 RK_FUNC_GPIO &pcfg_pull_down>;
 		};
 
 		dvs_2: dvs-2 {
-			rockchip,pins = <0 RK_PB4 RK_FUNC_GPIO \
-					&pcfg_pull_down>;
+			rockchip,pins = <0 RK_PB4 RK_FUNC_GPIO &pcfg_pull_down>;
 		};
 	};
 
@@ -406,8 +403,7 @@
 		};
 
 		sdmmc_clk: sdmmc-clk {
-			rockchip,pins = <6 RK_PC4 1 \
-					&pcfg_pull_none_drv_8ma>;
+			rockchip,pins = <6 RK_PC4 1 &pcfg_pull_none_drv_8ma>;
 		};
 
 		sdmmc_cmd: sdmmc-cmd {
@@ -432,7 +428,7 @@
 	sdio {
 		wifi_enable: wifi-enable {
 			rockchip,pins = <4 RK_PD3 RK_FUNC_GPIO &pcfg_pull_none>,
-				<4 RK_PD4 RK_FUNC_GPIO &pcfg_pull_none>;
+					<4 RK_PD4 RK_FUNC_GPIO &pcfg_pull_none>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/rk3288-veyron-analog-audio.dtsi b/arch/arm/boot/dts/rk3288-veyron-analog-audio.dtsi
index 445270aa136e..51208d161d65 100644
--- a/arch/arm/boot/dts/rk3288-veyron-analog-audio.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron-analog-audio.dtsi
@@ -17,6 +17,7 @@
 		rockchip,hp-det-gpios = <&gpio6 RK_PA5 GPIO_ACTIVE_HIGH>;
 		rockchip,mic-det-gpios = <&gpio6 RK_PB3 GPIO_ACTIVE_LOW>;
 		rockchip,headset-codec = <&headsetcodec>;
+		rockchip,hdmi-codec = <&hdmi>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/rk3288-veyron-edp.dtsi b/arch/arm/boot/dts/rk3288-veyron-edp.dtsi
index b12e061c5f7f..300a7e32c978 100644
--- a/arch/arm/boot/dts/rk3288-veyron-edp.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron-edp.dtsi
@@ -41,39 +41,8 @@
 
 	backlight: backlight {
 		compatible = "pwm-backlight";
-		brightness-levels = <
-			  0   1   2   3   4   5   6   7
-			  8   9  10  11  12  13  14  15
-			 16  17  18  19  20  21  22  23
-			 24  25  26  27  28  29  30  31
-			 32  33  34  35  36  37  38  39
-			 40  41  42  43  44  45  46  47
-			 48  49  50  51  52  53  54  55
-			 56  57  58  59  60  61  62  63
-			 64  65  66  67  68  69  70  71
-			 72  73  74  75  76  77  78  79
-			 80  81  82  83  84  85  86  87
-			 88  89  90  91  92  93  94  95
-			 96  97  98  99 100 101 102 103
-			104 105 106 107 108 109 110 111
-			112 113 114 115 116 117 118 119
-			120 121 122 123 124 125 126 127
-			128 129 130 131 132 133 134 135
-			136 137 138 139 140 141 142 143
-			144 145 146 147 148 149 150 151
-			152 153 154 155 156 157 158 159
-			160 161 162 163 164 165 166 167
-			168 169 170 171 172 173 174 175
-			176 177 178 179 180 181 182 183
-			184 185 186 187 188 189 190 191
-			192 193 194 195 196 197 198 199
-			200 201 202 203 204 205 206 207
-			208 209 210 211 212 213 214 215
-			216 217 218 219 220 221 222 223
-			224 225 226 227 228 229 230 231
-			232 233 234 235 236 237 238 239
-			240 241 242 243 244 245 246 247
-			248 249 250 251 252 253 254 255>;
+		brightness-levels = <0 255>;
+		num-interpolated-steps = <255>;
 		default-brightness-level = <128>;
 		enable-gpios = <&gpio7 RK_PA2 GPIO_ACTIVE_HIGH>;
 		pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/rk3288-veyron-jaq.dts b/arch/arm/boot/dts/rk3288-veyron-jaq.dts
index 80386203e85b..a4966e505a2f 100644
--- a/arch/arm/boot/dts/rk3288-veyron-jaq.dts
+++ b/arch/arm/boot/dts/rk3288-veyron-jaq.dts
@@ -20,39 +20,8 @@
 
 &backlight {
 	/* Jaq panel PWM must be >= 3%, so start non-zero brightness at 8 */
-	brightness-levels = <
-		  0
-		  8   9  10  11  12  13  14  15
-		 16  17  18  19  20  21  22  23
-		 24  25  26  27  28  29  30  31
-		 32  33  34  35  36  37  38  39
-		 40  41  42  43  44  45  46  47
-		 48  49  50  51  52  53  54  55
-		 56  57  58  59  60  61  62  63
-		 64  65  66  67  68  69  70  71
-		 72  73  74  75  76  77  78  79
-		 80  81  82  83  84  85  86  87
-		 88  89  90  91  92  93  94  95
-		 96  97  98  99 100 101 102 103
-		104 105 106 107 108 109 110 111
-		112 113 114 115 116 117 118 119
-		120 121 122 123 124 125 126 127
-		128 129 130 131 132 133 134 135
-		136 137 138 139 140 141 142 143
-		144 145 146 147 148 149 150 151
-		152 153 154 155 156 157 158 159
-		160 161 162 163 164 165 166 167
-		168 169 170 171 172 173 174 175
-		176 177 178 179 180 181 182 183
-		184 185 186 187 188 189 190 191
-		192 193 194 195 196 197 198 199
-		200 201 202 203 204 205 206 207
-		208 209 210 211 212 213 214 215
-		216 217 218 219 220 221 222 223
-		224 225 226 227 228 229 230 231
-		232 233 234 235 236 237 238 239
-		240 241 242 243 244 245 246 247
-		248 249 250 251 252 253 254 255>;
+	brightness-levels = <0 8 255>;
+	num-interpolated-steps = <247>;
 };
 
 &rk808 {
diff --git a/arch/arm/boot/dts/rk3288-veyron-mickey.dts b/arch/arm/boot/dts/rk3288-veyron-mickey.dts
index aa352d40c991..06a6a9554c48 100644
--- a/arch/arm/boot/dts/rk3288-veyron-mickey.dts
+++ b/arch/arm/boot/dts/rk3288-veyron-mickey.dts
@@ -28,6 +28,13 @@
 		regulator-boot-on;
 		vin-supply = <&vcc33_sys>;
 	};
+
+	sound {
+		compatible = "rockchip,rockchip-audio-max98090";
+		rockchip,model = "VEYRON-HDMI";
+		rockchip,hdmi-codec = <&hdmi>;
+		rockchip,i2s-controller = <&i2s>;
+	};
 };
 
 &cpu_thermal {
diff --git a/arch/arm/boot/dts/rk3288-veyron-minnie.dts b/arch/arm/boot/dts/rk3288-veyron-minnie.dts
index 55955b082501..c833716dbe48 100644
--- a/arch/arm/boot/dts/rk3288-veyron-minnie.dts
+++ b/arch/arm/boot/dts/rk3288-veyron-minnie.dts
@@ -38,39 +38,8 @@
 
 &backlight {
 	/* Minnie panel PWM must be >= 1%, so start non-zero brightness at 3 */
-	brightness-levels = <
-			  0   3   4   5   6   7
-			  8   9  10  11  12  13  14  15
-			 16  17  18  19  20  21  22  23
-			 24  25  26  27  28  29  30  31
-			 32  33  34  35  36  37  38  39
-			 40  41  42  43  44  45  46  47
-			 48  49  50  51  52  53  54  55
-			 56  57  58  59  60  61  62  63
-			 64  65  66  67  68  69  70  71
-			 72  73  74  75  76  77  78  79
-			 80  81  82  83  84  85  86  87
-			 88  89  90  91  92  93  94  95
-			 96  97  98  99 100 101 102 103
-			104 105 106 107 108 109 110 111
-			112 113 114 115 116 117 118 119
-			120 121 122 123 124 125 126 127
-			128 129 130 131 132 133 134 135
-			136 137 138 139 140 141 142 143
-			144 145 146 147 148 149 150 151
-			152 153 154 155 156 157 158 159
-			160 161 162 163 164 165 166 167
-			168 169 170 171 172 173 174 175
-			176 177 178 179 180 181 182 183
-			184 185 186 187 188 189 190 191
-			192 193 194 195 196 197 198 199
-			200 201 202 203 204 205 206 207
-			208 209 210 211 212 213 214 215
-			216 217 218 219 220 221 222 223
-			224 225 226 227 228 229 230 231
-			232 233 234 235 236 237 238 239
-			240 241 242 243 244 245 246 247
-			248 249 250 251 252 253 254 255>;
+	brightness-levels = <0 3 255>;
+	num-interpolated-steps = <252>;
 };
 
 &i2c_tunnel {
diff --git a/arch/arm/boot/dts/rk3288-veyron-tiger.dts b/arch/arm/boot/dts/rk3288-veyron-tiger.dts
index 27557203ae33..bebb230e592f 100644
--- a/arch/arm/boot/dts/rk3288-veyron-tiger.dts
+++ b/arch/arm/boot/dts/rk3288-veyron-tiger.dts
@@ -23,39 +23,8 @@
 
 &backlight {
 	/* Tiger panel PWM must be >= 1%, so start non-zero brightness at 3 */
-	brightness-levels = <
-		  0   3   4   5   6   7
-		  8   9  10  11  12  13  14  15
-		 16  17  18  19  20  21  22  23
-		 24  25  26  27  28  29  30  31
-		 32  33  34  35  36  37  38  39
-		 40  41  42  43  44  45  46  47
-		 48  49  50  51  52  53  54  55
-		 56  57  58  59  60  61  62  63
-		 64  65  66  67  68  69  70  71
-		 72  73  74  75  76  77  78  79
-		 80  81  82  83  84  85  86  87
-		 88  89  90  91  92  93  94  95
-		 96  97  98  99 100 101 102 103
-		104 105 106 107 108 109 110 111
-		112 113 114 115 116 117 118 119
-		120 121 122 123 124 125 126 127
-		128 129 130 131 132 133 134 135
-		136 137 138 139 140 141 142 143
-		144 145 146 147 148 149 150 151
-		152 153 154 155 156 157 158 159
-		160 161 162 163 164 165 166 167
-		168 169 170 171 172 173 174 175
-		176 177 178 179 180 181 182 183
-		184 185 186 187 188 189 190 191
-		192 193 194 195 196 197 198 199
-		200 201 202 203 204 205 206 207
-		208 209 210 211 212 213 214 215
-		216 217 218 219 220 221 222 223
-		224 225 226 227 228 229 230 231
-		232 233 234 235 236 237 238 239
-		240 241 242 243 244 245 246 247
-		248 249 250 251 252 253 254 255>;
+	brightness-levels = <0 3 255>;
+	num-interpolated-steps = <252>;
 };
 
 &backlight_regulator {
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index cc893e154fe5..415c75f5783c 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -1023,7 +1023,7 @@
 
 	vopb: vop@ff930000 {
 		compatible = "rockchip,rk3288-vop";
-		reg = <0x0 0xff930000 0x0 0x19c>;
+		reg = <0x0 0xff930000 0x0 0x19c>, <0x0 0xff931000 0x0 0x1000>;
 		interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru ACLK_VOP0>, <&cru DCLK_VOP0>, <&cru HCLK_VOP0>;
 		clock-names = "aclk_vop", "dclk_vop", "hclk_vop";
@@ -1073,7 +1073,7 @@
 
 	vopl: vop@ff940000 {
 		compatible = "rockchip,rk3288-vop";
-		reg = <0x0 0xff940000 0x0 0x19c>;
+		reg = <0x0 0xff940000 0x0 0x19c>, <0x0 0xff941000 0x0 0x1000>;
 		interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru ACLK_VOP1>, <&cru DCLK_VOP1>, <&cru HCLK_VOP1>;
 		clock-names = "aclk_vop", "dclk_vop", "hclk_vop";
@@ -1391,6 +1391,9 @@
 		clocks = <&cru PCLK_EFUSE256>;
 		clock-names = "pclk_efuse";
 
+		cpu_id: cpu-id@7 {
+			reg = <0x07 0x10>;
+		};
 		cpu_leakage: cpu_leakage@17 {
 			reg = <0x17 0x1>;
 		};
diff --git a/arch/arm/boot/dts/s3c6410-mini6410.dts b/arch/arm/boot/dts/s3c6410-mini6410.dts
index 0e159c884f97..1aeac33b0d34 100644
--- a/arch/arm/boot/dts/s3c6410-mini6410.dts
+++ b/arch/arm/boot/dts/s3c6410-mini6410.dts
@@ -165,6 +165,10 @@
 	};
 };
 
+&clocks {
+	clocks = <&fin_pll>;
+};
+
 &sdhci0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>;
diff --git a/arch/arm/boot/dts/s3c6410-smdk6410.dts b/arch/arm/boot/dts/s3c6410-smdk6410.dts
index a9a5689dc462..3bf6c450a26e 100644
--- a/arch/arm/boot/dts/s3c6410-smdk6410.dts
+++ b/arch/arm/boot/dts/s3c6410-smdk6410.dts
@@ -69,6 +69,10 @@
 	};
 };
 
+&clocks {
+	clocks = <&fin_pll>;
+};
+
 &sdhci0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>;
diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index 2e2c1a7b1d1d..565204816e34 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -689,7 +689,7 @@
 				#clock-cells = <0>;
 			};
 
-			rtc@f80480b0 {
+			rtc: rtc@f80480b0 {
 				compatible = "atmel,at91rm9200-rtc";
 				reg = <0xf80480b0 0x30>;
 				interrupts = <74 IRQ_TYPE_LEVEL_HIGH 7>;
diff --git a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts
index b4c0a76a4d1a..2b645642b935 100644
--- a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts
+++ b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts
@@ -19,7 +19,7 @@
 		m25p,fast-read;
 		cdns,page-size = <256>;
 		cdns,block-size = <16>;
-		cdns,read-delay = <4>;
+		cdns,read-delay = <3>;
 		cdns,tshsl-ns = <50>;
 		cdns,tsd2d-ns = <50>;
 		cdns,tchsh-ns = <4>;
diff --git a/arch/arm/boot/dts/stm32429i-eval.dts b/arch/arm/boot/dts/stm32429i-eval.dts
index ba08624c6237..58288aa53fee 100644
--- a/arch/arm/boot/dts/stm32429i-eval.dts
+++ b/arch/arm/boot/dts/stm32429i-eval.dts
@@ -60,7 +60,7 @@
 		stdout-path = "serial0:115200n8";
 	};
 
-	memory {
+	memory@00000000 {
 		device_type = "memory";
 		reg = <0x00000000 0x2000000>;
 	};
@@ -234,7 +234,6 @@
 	status = "okay";
 	pinctrl-0 = <&ltdc_pins>;
 	pinctrl-names = "default";
-	dma-ranges;
 
 	port {
 		ltdc_out_rgb: endpoint {
diff --git a/arch/arm/boot/dts/stm32746g-eval.dts b/arch/arm/boot/dts/stm32746g-eval.dts
index 2b1664884ae7..fcc804e3c158 100644
--- a/arch/arm/boot/dts/stm32746g-eval.dts
+++ b/arch/arm/boot/dts/stm32746g-eval.dts
@@ -55,7 +55,7 @@
 		stdout-path = "serial0:115200n8";
 	};
 
-	memory {
+	memory@c0000000 {
 		device_type = "memory";
 		reg = <0xc0000000 0x2000000>;
 	};
@@ -95,7 +95,6 @@
 
 	joystick {
 		compatible = "gpio-keys";
-		#size-cells = <0>;
 		pinctrl-0 = <&joystick_pins>;
 		pinctrl-names = "default";
 		button-0 {
diff --git a/arch/arm/boot/dts/stm32f429-disco.dts b/arch/arm/boot/dts/stm32f429-disco.dts
index e19d0fe7dbda..30c0f6717871 100644
--- a/arch/arm/boot/dts/stm32f429-disco.dts
+++ b/arch/arm/boot/dts/stm32f429-disco.dts
@@ -59,7 +59,7 @@
 		stdout-path = "serial0:115200n8";
 	};
 
-	memory {
+	memory@90000000 {
 		device_type = "memory";
 		reg = <0x90000000 0x800000>;
 	};
diff --git a/arch/arm/boot/dts/stm32f469-disco.dts b/arch/arm/boot/dts/stm32f469-disco.dts
index a3ff04940aec..f3ce477b7bae 100644
--- a/arch/arm/boot/dts/stm32f469-disco.dts
+++ b/arch/arm/boot/dts/stm32f469-disco.dts
@@ -60,7 +60,7 @@
 		stdout-path = "serial0:115200n8";
 	};
 
-	memory {
+	memory@00000000 {
 		device_type = "memory";
 		reg = <0x00000000 0x1000000>;
 	};
@@ -166,7 +166,6 @@
 };
 
 &ltdc {
-	dma-ranges;
 	status = "okay";
 
 	port {
diff --git a/arch/arm/boot/dts/stm32f469.dtsi b/arch/arm/boot/dts/stm32f469.dtsi
index 5ae5213f68cb..be002e8a78ac 100644
--- a/arch/arm/boot/dts/stm32f469.dtsi
+++ b/arch/arm/boot/dts/stm32f469.dtsi
@@ -8,7 +8,6 @@
 		dsi: dsi@40016c00 {
 			compatible = "st,stm32-dsi";
 			reg = <0x40016c00 0x800>;
-			interrupts = <92>;
 			resets = <&rcc STM32F4_APB2_RESET(DSI)>;
 			reset-names = "apb";
 			clocks = <&rcc 1 CLK_F469_DSI>, <&clk_hse>;
diff --git a/arch/arm/boot/dts/stm32f746-disco.dts b/arch/arm/boot/dts/stm32f746-disco.dts
index 0ba9c5b08ab9..569d23cc61e5 100644
--- a/arch/arm/boot/dts/stm32f746-disco.dts
+++ b/arch/arm/boot/dts/stm32f746-disco.dts
@@ -55,7 +55,7 @@
 		stdout-path = "serial0:115200n8";
 	};
 
-	memory {
+	memory@c0000000 {
 		device_type = "memory";
 		reg = <0xC0000000 0x800000>;
 	};
diff --git a/arch/arm/boot/dts/stm32f769-disco.dts b/arch/arm/boot/dts/stm32f769-disco.dts
index 6f1d0ac8c31c..1626e00bb2cb 100644
--- a/arch/arm/boot/dts/stm32f769-disco.dts
+++ b/arch/arm/boot/dts/stm32f769-disco.dts
@@ -55,7 +55,7 @@
 		stdout-path = "serial0:115200n8";
 	};
 
-	memory {
+	memory@c0000000 {
 		device_type = "memory";
 		reg = <0xC0000000 0x1000000>;
 	};
diff --git a/arch/arm/boot/dts/stm32h743i-disco.dts b/arch/arm/boot/dts/stm32h743i-disco.dts
index 3acd2e9c434e..e446d311c520 100644
--- a/arch/arm/boot/dts/stm32h743i-disco.dts
+++ b/arch/arm/boot/dts/stm32h743i-disco.dts
@@ -53,7 +53,7 @@
 		stdout-path = "serial0:115200n8";
 	};
 
-	memory {
+	memory@d0000000 {
 		device_type = "memory";
 		reg = <0xd0000000 0x2000000>;
 	};
diff --git a/arch/arm/boot/dts/stm32h743i-eval.dts b/arch/arm/boot/dts/stm32h743i-eval.dts
index e4d3c58f3d97..8f398178f5e5 100644
--- a/arch/arm/boot/dts/stm32h743i-eval.dts
+++ b/arch/arm/boot/dts/stm32h743i-eval.dts
@@ -53,7 +53,7 @@
 		stdout-path = "serial0:115200n8";
 	};
 
-	memory {
+	memory@d0000000 {
 		device_type = "memory";
 		reg = <0xd0000000 0x2000000>;
 	};
diff --git a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
index 0a3a7d66737b..3d1ecb408b03 100644
--- a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
+++ b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
@@ -137,6 +137,22 @@
 				status = "disabled";
 			};
 
+			adc12_ain_pins_a: adc12-ain-0 {
+				pins {
+					pinmux = <STM32_PINMUX('C', 3, ANALOG)>, /* ADC1 in13 */
+						 <STM32_PINMUX('F', 12, ANALOG)>, /* ADC1 in6 */
+						 <STM32_PINMUX('F', 13, ANALOG)>, /* ADC2 in2 */
+						 <STM32_PINMUX('F', 14, ANALOG)>; /* ADC2 in6 */
+				};
+			};
+
+			adc12_usb_cc_pins_a: adc12-usb-cc-pins-0 {
+				pins {
+					pinmux = <STM32_PINMUX('A', 4, ANALOG)>, /* ADC12 in18 */
+						 <STM32_PINMUX('A', 5, ANALOG)>; /* ADC12 in19 */
+				};
+			};
+
 			cec_pins_a: cec-0 {
 				pins {
 					pinmux = <STM32_PINMUX('A', 15, AF4)>;
@@ -167,6 +183,18 @@
 				};
 			};
 
+			dac_ch1_pins_a: dac-ch1 {
+				pins {
+					pinmux = <STM32_PINMUX('A', 4, ANALOG)>;
+				};
+			};
+
+			dac_ch2_pins_a: dac-ch2 {
+				pins {
+					pinmux = <STM32_PINMUX('A', 5, ANALOG)>;
+				};
+			};
+
 			dcmi_pins_a: dcmi-0 {
 				pins {
 					pinmux = <STM32_PINMUX('H', 8,  AF13)>,/* DCMI_HSYNC */
diff --git a/arch/arm/boot/dts/stm32mp157a-avenger96.dts b/arch/arm/boot/dts/stm32mp157a-avenger96.dts
index 2e4742c53d04..628c74a45a25 100644
--- a/arch/arm/boot/dts/stm32mp157a-avenger96.dts
+++ b/arch/arm/boot/dts/stm32mp157a-avenger96.dts
@@ -252,14 +252,13 @@
 				regulator-name = "vbus_otg";
 				interrupts = <IT_OCP_OTG 0>;
 				interrupt-parent = <&pmic>;
-				regulator-active-discharge;
 			};
 
 			vbus_sw: pwr_sw2 {
 				regulator-name = "vbus_sw";
 				interrupts = <IT_OCP_SWOUT 0>;
 				interrupt-parent = <&pmic>;
-				regulator-active-discharge;
+				regulator-active-discharge = <1>;
 			};
 		};
 
@@ -282,6 +281,11 @@
 	status = "okay";
 };
 
+&pwr_regulators {
+	vdd-supply = <&vdd>;
+	vdd_3v3_usbfs-supply = <&vdd_usb>;
+};
+
 &rng1 {
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/stm32mp157a-dk1.dts b/arch/arm/boot/dts/stm32mp157a-dk1.dts
index 0615d1c8a6fc..984a47cbd13d 100644
--- a/arch/arm/boot/dts/stm32mp157a-dk1.dts
+++ b/arch/arm/boot/dts/stm32mp157a-dk1.dts
@@ -25,6 +25,7 @@
 	};
 
 	memory@c0000000 {
+		device_type = "memory";
 		reg = <0xc0000000 0x20000000>;
 	};
 
@@ -92,7 +93,34 @@
 			"Playback" , "MCLK",
 			"Capture" , "MCLK",
 			"MICL" , "Mic Bias";
-		dais = <&sai2a_port &sai2b_port>;
+		dais = <&sai2a_port &sai2b_port &i2s2_port>;
+		status = "okay";
+	};
+};
+
+&adc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&adc12_ain_pins_a>, <&adc12_usb_cc_pins_a>;
+	vdd-supply = <&vdd>;
+	vdda-supply = <&vdd>;
+	vref-supply = <&vrefbuf>;
+	status = "disabled";
+	adc1: adc@0 {
+		/*
+		 * Type-C USB_PWR_CC1 & USB_PWR_CC2 on in18 & in19.
+		 * Use at least 5 * RC time, e.g. 5 * (Rp + Rd) * C:
+		 * 5 * (56 + 47kOhms) * 5pF => 2.5us.
+		 * Use arbitrary margin here (e.g. 5us).
+		 */
+		st,min-sample-time-nsecs = <5000>;
+		/* AIN connector, USB Type-C CC1 & CC2 */
+		st,adc-channels = <0 1 6 13 18 19>;
+		status = "okay";
+	};
+	adc2: adc@100 {
+		/* AIN connector, USB Type-C CC1 & CC2 */
+		st,adc-channels = <0 1 2 6 18 19>;
+		st,min-sample-time-nsecs = <5000>;
 		status = "okay";
 	};
 };
@@ -146,9 +174,7 @@
 		reset-gpios = <&gpioa 10 GPIO_ACTIVE_LOW>;
 		interrupts = <1 IRQ_TYPE_EDGE_FALLING>;
 		interrupt-parent = <&gpiog>;
-		pinctrl-names = "default", "sleep";
-		pinctrl-0 = <&ltdc_pins_a>;
-		pinctrl-1 = <&ltdc_pins_sleep_a>;
+		#sound-dai-cells = <0>;
 		status = "okay";
 
 		ports {
@@ -161,6 +187,13 @@
 					remote-endpoint = <&ltdc_ep0_out>;
 				};
 			};
+
+			port@3 {
+				reg = <3>;
+				sii9022_tx_endpoint: endpoint {
+					remote-endpoint = <&i2s2_endpoint>;
+				};
+			};
 		};
 	};
 
@@ -226,7 +259,7 @@
 
 			vddcore: buck1 {
 				regulator-name = "vddcore";
-				regulator-min-microvolt = <800000>;
+				regulator-min-microvolt = <1200000>;
 				regulator-max-microvolt = <1350000>;
 				regulator-always-on;
 				regulator-initial-mode = <0>;
@@ -327,7 +360,7 @@
 			 vbus_sw: pwr_sw2 {
 				regulator-name = "vbus_sw";
 				interrupts = <IT_OCP_SWOUT 0>;
-				regulator-active-discharge;
+				regulator-active-discharge = <1>;
 			 };
 		};
 
@@ -346,6 +379,23 @@
 	};
 };
 
+&i2s2 {
+	clocks = <&rcc SPI2>, <&rcc SPI2_K>, <&rcc PLL3_Q>, <&rcc PLL3_R>;
+	clock-names = "pclk", "i2sclk", "x8k", "x11k";
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&i2s2_pins_a>;
+	pinctrl-1 = <&i2s2_pins_sleep_a>;
+	status = "okay";
+
+	i2s2_port: port {
+		i2s2_endpoint: endpoint {
+			remote-endpoint = <&sii9022_tx_endpoint>;
+			format = "i2s";
+			mclk-fs = <256>;
+		};
+	};
+};
+
 &ipcc {
 	status = "okay";
 };
@@ -356,6 +406,9 @@
 };
 
 &ltdc {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&ltdc_pins_a>;
+	pinctrl-1 = <&ltdc_pins_sleep_a>;
 	status = "okay";
 
 	port {
@@ -379,6 +432,11 @@
 	status = "okay";
 };
 
+&pwr_regulators {
+	vdd-supply = <&vdd>;
+	vdd_3v3_usbfs-supply = <&vdd_usb>;
+};
+
 &rng1 {
 	status = "okay";
 };
@@ -449,3 +507,10 @@
 	pinctrl-0 = <&uart4_pins_a>;
 	status = "okay";
 };
+
+&vrefbuf {
+	regulator-min-microvolt = <2500000>;
+	regulator-max-microvolt = <2500000>;
+	vdda-supply = <&vdd>;
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/stm32mp157c-dk2.dts b/arch/arm/boot/dts/stm32mp157c-dk2.dts
index 20ea601a546d..d26adcbeba33 100644
--- a/arch/arm/boot/dts/stm32mp157c-dk2.dts
+++ b/arch/arm/boot/dts/stm32mp157c-dk2.dts
@@ -11,14 +11,6 @@
 / {
 	model = "STMicroelectronics STM32MP157C-DK2 Discovery Board";
 	compatible = "st,stm32mp157c-dk2", "st,stm32mp157";
-
-	reg18: reg18 {
-		compatible = "regulator-fixed";
-		regulator-name = "reg18";
-		regulator-min-microvolt = <1800000>;
-		regulator-max-microvolt = <1800000>;
-		regulator-always-on;
-	};
 };
 
 &dsi {
@@ -61,6 +53,19 @@
 	};
 };
 
+&i2c1 {
+	touchscreen@38 {
+		compatible = "focaltech,ft6236";
+		reg = <0x38>;
+		interrupts = <2 2>;
+		interrupt-parent = <&gpiof>;
+		interrupt-controller;
+		touchscreen-size-x = <480>;
+		touchscreen-size-y = <800>;
+		status = "okay";
+	};
+};
+
 &ltdc {
 	status = "okay";
 
diff --git a/arch/arm/boot/dts/stm32mp157c-ed1.dts b/arch/arm/boot/dts/stm32mp157c-ed1.dts
index 1d426ea8bdaf..b8cc0fb0ec48 100644
--- a/arch/arm/boot/dts/stm32mp157c-ed1.dts
+++ b/arch/arm/boot/dts/stm32mp157c-ed1.dts
@@ -74,22 +74,6 @@
 		serial0 = &uart4;
 	};
 
-	reg11: reg11 {
-		compatible = "regulator-fixed";
-		regulator-name = "reg11";
-		regulator-min-microvolt = <1100000>;
-		regulator-max-microvolt = <1100000>;
-		regulator-always-on;
-	};
-
-	reg18: reg18 {
-		compatible = "regulator-fixed";
-		regulator-name = "reg18";
-		regulator-min-microvolt = <1800000>;
-		regulator-max-microvolt = <1800000>;
-		regulator-always-on;
-	};
-
 	sd_switch: regulator-sd_switch {
 		compatible = "regulator-gpio";
 		regulator-name = "sd_switch";
@@ -100,7 +84,21 @@
 
 		gpios = <&gpiof 14 GPIO_ACTIVE_HIGH>;
 		gpios-states = <0>;
-		states = <1800000 0x1 2900000 0x0>;
+		states = <1800000 0x1>,
+			 <2900000 0x0>;
+	};
+};
+
+&dac {
+	pinctrl-names = "default";
+	pinctrl-0 = <&dac_ch1_pins_a &dac_ch2_pins_a>;
+	vref-supply = <&vdda>;
+	status = "disabled";
+	dac1: dac@1 {
+		status = "okay";
+	};
+	dac2: dac@2 {
+		status = "okay";
 	};
 };
 
@@ -143,7 +141,7 @@
 
 			vddcore: buck1 {
 				regulator-name = "vddcore";
-				regulator-min-microvolt = <800000>;
+				regulator-min-microvolt = <1200000>;
 				regulator-max-microvolt = <1350000>;
 				regulator-always-on;
 				regulator-initial-mode = <0>;
@@ -241,7 +239,7 @@
 			 vbus_sw: pwr_sw2 {
 				regulator-name = "vbus_sw";
 				interrupts = <IT_OCP_SWOUT 0>;
-				regulator-active-discharge;
+				regulator-active-discharge = <1>;
 			 };
 		};
 
@@ -279,6 +277,11 @@
 	status = "okay";
 };
 
+&pwr_regulators {
+	vdd-supply = <&vdd>;
+	vdd_3v3_usbfs-supply = <&vdd_usb>;
+};
+
 &rng1 {
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/stm32mp157c-ev1.dts b/arch/arm/boot/dts/stm32mp157c-ev1.dts
index 91fc0a315c49..3789312c8539 100644
--- a/arch/arm/boot/dts/stm32mp157c-ev1.dts
+++ b/arch/arm/boot/dts/stm32mp157c-ev1.dts
@@ -32,7 +32,6 @@
 
 	joystick {
 		compatible = "gpio-keys";
-		#size-cells = <0>;
 		pinctrl-0 = <&joystick_pins>;
 		pinctrl-names = "default";
 		button-0 {
@@ -335,14 +334,12 @@
 
 &usbh_ehci {
 	phys = <&usbphyc_port0>;
-	phy-names = "usb";
 	status = "okay";
 };
 
 &usbotg_hs {
 	dr_mode = "peripheral";
 	phys = <&usbphyc_port1 0>;
-	phy-names = "usb2-phy";
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi
index f98e0370c0bc..ed8b258256d7 100644
--- a/arch/arm/boot/dts/stm32mp157c.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c.dtsi
@@ -1079,6 +1079,29 @@
 			#reset-cells = <1>;
 		};
 
+		pwr_regulators: pwr@50001000 {
+			compatible = "st,stm32mp1,pwr-reg";
+			reg = <0x50001000 0x10>;
+
+			reg11: reg11 {
+				regulator-name = "reg11";
+				regulator-min-microvolt = <1100000>;
+				regulator-max-microvolt = <1100000>;
+			};
+
+			reg18: reg18 {
+				regulator-name = "reg18";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+			};
+
+			usb33: usb33 {
+				regulator-name = "usb33";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+			};
+		};
+
 		exti: interrupt-controller@5000d000 {
 			compatible = "st,stm32mp1-exti", "syscon";
 			interrupt-controller;
diff --git a/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts b/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts
index 7033a123c9a3..d6bb82c295f0 100644
--- a/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts
+++ b/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts
@@ -130,7 +130,7 @@
 &i2c1 {
 	status = "okay";
 
-	at24@50 {
+	eeprom@50 {
 		compatible = "atmel,24c16";
 		pagesize = <16>;
 		reg = <0x50>;
diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi
index ac7638078420..2cf34ae1c17b 100644
--- a/arch/arm/boot/dts/sun6i-a31.dtsi
+++ b/arch/arm/boot/dts/sun6i-a31.dtsi
@@ -469,7 +469,6 @@
 				 <&ccu CLK_PLL_VIDEO1_2X>;
 			clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1";
 			resets = <&ccu RST_AHB1_HDMI>;
-			reset-names = "ahb";
 			dma-names = "ddc-tx", "ddc-rx", "audio-tx";
 			dmas = <&dma 13>, <&dma 13>, <&dma 14>;
 			status = "disabled";
diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts
index fb928503ad45..d9be511f054f 100644
--- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts
@@ -101,7 +101,7 @@
 		initial-mode = <1>; /* initialize in HUB mode */
 		disabled-ports = <1>;
 		intn-gpios = <&pio 7 5 GPIO_ACTIVE_HIGH>; /* PH5 */
-		reset-gpios = <&pio 4 16 GPIO_ACTIVE_HIGH>; /* PE16 */
+		reset-gpios = <&pio 4 16 GPIO_ACTIVE_LOW>; /* PE16 */
 		connect-gpios = <&pio 4 17 GPIO_ACTIVE_HIGH>; /* PE17 */
 		refclk-frequency = <19200000>;
 	};
diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
index 3bec3e0a81b2..2fd31a0a0b34 100644
--- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
@@ -164,6 +164,22 @@
 	status = "okay";
 };
 
+&i2c0 {
+	clock-frequency = <400000>;
+	status = "okay";
+
+	touchscreen@38 {
+		compatible = "edt,edt-ft5x06";
+		reg = <0x38>;
+		interrupt-parent = <&r_pio>;
+		interrupts = <0 7 IRQ_TYPE_EDGE_FALLING>; /* PL7 */
+		reset-gpios = <&pio 3 5 GPIO_ACTIVE_LOW>; /* PD5 */
+		vcc-supply = <&reg_ldo_io0>;
+		touchscreen-size-x = <1024>;
+		touchscreen-size-y = <600>;
+	};
+};
+
 &i2c1 {
 	clock-frequency = <400000>;
 	status = "okay";
diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi
index 74bb053cf23c..53c38deb8a08 100644
--- a/arch/arm/boot/dts/sun8i-a83t.dtsi
+++ b/arch/arm/boot/dts/sun8i-a83t.dtsi
@@ -583,6 +583,15 @@
 			reg = <0x1c14000 0x400>;
 		};
 
+		crypto: crypto@1c15000 {
+			compatible = "allwinner,sun8i-a83t-crypto";
+			reg = <0x01c15000 0x1000>;
+			interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
+			resets = <&ccu RST_BUS_SS>;
+			clocks = <&ccu CLK_BUS_SS>, <&ccu CLK_SS>;
+			clock-names = "bus", "mod";
+		};
+
 		usb_otg: usb@1c19000 {
 			compatible = "allwinner,sun8i-a83t-musb",
 				     "allwinner,sun8i-a33-musb";
diff --git a/arch/arm/boot/dts/sun8i-h3-nanopi-duo2.dts b/arch/arm/boot/dts/sun8i-h3-nanopi-duo2.dts
new file mode 100644
index 000000000000..c73f59900975
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-h3-nanopi-duo2.dts
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Karl Palsson <karlp@tweak.net.au>
+ */
+
+/dts-v1/;
+#include "sun8i-h3.dtsi"
+#include "sunxi-common-regulators.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+	model = "FriendlyARM NanoPi Duo2";
+	compatible = "friendlyarm,nanopi-duo2", "allwinner,sun8i-h3";
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		pwr {
+			label = "nanopi:red:pwr";
+			gpios = <&r_pio 0 10 GPIO_ACTIVE_HIGH>; /* PL10 */
+			default-state = "on";
+		};
+
+		status {
+			label = "nanopi:green:status";
+			gpios = <&pio 0 10 GPIO_ACTIVE_HIGH>; /* PA10 */
+		};
+	};
+
+	r_gpio_keys {
+		compatible = "gpio-keys";
+
+		k1 {
+			label = "k1";
+			linux,code = <BTN_0>;
+			gpios = <&r_pio 0 3 GPIO_ACTIVE_LOW>; /* PL3 */
+		};
+	};
+
+	reg_vdd_cpux: vdd-cpux-regulator {
+		compatible = "regulator-gpio";
+		regulator-name = "vdd-cpux";
+		regulator-min-microvolt = <1100000>;
+		regulator-max-microvolt = <1300000>;
+		regulator-always-on;
+		regulator-boot-on;
+		regulator-ramp-delay = <50>; /* 4ms */
+
+		enable-active-high;
+		enable-gpio = <&r_pio 0 8 GPIO_ACTIVE_HIGH>; /* PL8 */
+		gpios = <&r_pio 0 6 GPIO_ACTIVE_HIGH>; /* PL6 */
+		gpios-states = <0x1>;
+		states = <1100000 0x0
+			  1300000 0x1>;
+	};
+
+	reg_vcc_dram: vcc-dram {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc-dram";
+		regulator-min-microvolt = <1500000>;
+		regulator-max-microvolt = <1500000>;
+		regulator-always-on;
+		regulator-boot-on;
+		enable-active-high;
+		gpio = <&r_pio 0 9 GPIO_ACTIVE_HIGH>; /* PL9 */
+		vin-supply = <&reg_vcc5v0>;
+        };
+
+	reg_vdd_sys: vdd-sys {
+		compatible = "regulator-fixed";
+		regulator-name = "vdd-sys";
+		regulator-min-microvolt = <1200000>;
+		regulator-max-microvolt = <1200000>;
+		regulator-always-on;
+		regulator-boot-on;
+		enable-active-high;
+		gpio = <&r_pio 0 8 GPIO_ACTIVE_HIGH>; /* PL8 */
+		vin-supply = <&reg_vcc5v0>;
+        };
+
+	wifi_pwrseq: wifi_pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		reset-gpios = <&r_pio 0 7 GPIO_ACTIVE_LOW>; /* PL7 */
+		clocks = <&rtc 1>;
+		clock-names = "ext_clock";
+	};
+
+};
+
+&cpu0 {
+	cpu-supply = <&reg_vdd_cpux>;
+};
+
+&ehci0 {
+	status = "okay";
+};
+
+&mmc0 {
+	bus-width = <4>;
+	cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; /* PF6 */
+	status = "okay";
+	vmmc-supply = <&reg_vcc3v3>;
+};
+
+&mmc1 {
+	vmmc-supply = <&reg_vcc3v3>;
+	vqmmc-supply = <&reg_vcc3v3>;
+	mmc-pwrseq = <&wifi_pwrseq>;
+	bus-width = <4>;
+	non-removable;
+	status = "okay";
+
+	sdio_wifi: sdio_wifi@1 {
+		reg = <1>;
+		compatible = "brcm,bcm4329-fmac";
+		interrupt-parent = <&pio>;
+		interrupts = <6 10 IRQ_TYPE_LEVEL_LOW>; /* PG10 / EINT10 */
+		interrupt-names = "host-wake";
+	};
+};
+
+&ohci0 {
+	status = "okay";
+};
+
+&reg_usb0_vbus {
+	gpio = <&r_pio 0 2 GPIO_ACTIVE_HIGH>; /* PL2 */
+	status = "okay";
+};
+
+&uart0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart0_pa_pins>;
+	status = "okay";
+};
+
+&uart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart2_pins>, <&uart2_rts_cts_pins>;
+	uart-has-rtscts;
+	status = "okay";
+
+	bluetooth {
+		compatible = "brcm,bcm43438-bt";
+		clocks = <&rtc 1>;
+		clock-names = "lpo";
+		vbat-supply = <&reg_vcc3v3>;
+		vddio-supply = <&reg_vcc3v3>;
+		device-wakeup-gpios = <&pio 0 8 GPIO_ACTIVE_HIGH>; /* PA8 */
+		host-wakeup-gpios = <&pio 0 7 GPIO_ACTIVE_HIGH>; /* PA7 */
+		shutdown-gpios = <&pio 6 13 GPIO_ACTIVE_HIGH>; /* PG13 */
+	};
+};
+
+&usb_otg {
+	status = "okay";
+	dr_mode = "otg";
+};
+
+&usbphy {
+	usb0_id_det-gpios = <&pio 6 12 GPIO_ACTIVE_HIGH>; /* PG12 */
+	usb0_vbus-supply = <&reg_usb0_vbus>;
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/sun8i-h3.dtsi b/arch/arm/boot/dts/sun8i-h3.dtsi
index e37c30e811d3..fe773c72a69b 100644
--- a/arch/arm/boot/dts/sun8i-h3.dtsi
+++ b/arch/arm/boot/dts/sun8i-h3.dtsi
@@ -120,6 +120,19 @@
 	};
 
 	soc {
+		deinterlace: deinterlace@1400000 {
+			compatible = "allwinner,sun8i-h3-deinterlace";
+			reg = <0x01400000 0x20000>;
+			clocks = <&ccu CLK_BUS_DEINTERLACE>,
+				 <&ccu CLK_DEINTERLACE>,
+				 <&ccu CLK_DRAM_DEINTERLACE>;
+			clock-names = "bus", "mod", "ram";
+			resets = <&ccu RST_BUS_DEINTERLACE>;
+			interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+			interconnects = <&mbus 9>;
+			interconnect-names = "dma-mem";
+		};
+
 		syscon: system-control@1c00000 {
 			compatible = "allwinner,sun8i-h3-system-control";
 			reg = <0x01c00000 0x1000>;
@@ -153,6 +166,15 @@
 			allwinner,sram = <&ve_sram 1>;
 		};
 
+		crypto: crypto@1c15000 {
+			compatible = "allwinner,sun8i-h3-crypto";
+			reg = <0x01c15000 0x1000>;
+			interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&ccu CLK_BUS_CE>, <&ccu CLK_CE>;
+			clock-names = "bus", "mod";
+			resets = <&ccu RST_BUS_CE>;
+		};
+
 		mali: gpu@1c40000 {
 			compatible = "allwinner,sun8i-h3-mali", "arm,mali-400";
 			reg = <0x01c40000 0x10000>;
diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi
index c9c2688db66d..421dfbbfd7ee 100644
--- a/arch/arm/boot/dts/sun8i-r40.dtsi
+++ b/arch/arm/boot/dts/sun8i-r40.dtsi
@@ -266,6 +266,15 @@
 			#phy-cells = <1>;
 		};
 
+		crypto: crypto@1c15000 {
+			compatible = "allwinner,sun8i-r40-crypto";
+			reg = <0x01c15000 0x1000>;
+			interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&ccu CLK_BUS_CE>, <&ccu CLK_CE>;
+			clock-names = "bus", "mod";
+			resets = <&ccu RST_BUS_CE>;
+		};
+
 		ehci1: usb@1c19000 {
 			compatible = "allwinner,sun8i-r40-ehci", "generic-ehci";
 			reg = <0x01c19000 0x100>;
diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi
index b9b6fb00be28..1d900f591d5f 100644
--- a/arch/arm/boot/dts/sun9i-a80.dtsi
+++ b/arch/arm/boot/dts/sun9i-a80.dtsi
@@ -457,6 +457,15 @@
 			reg = <0x01700000 0x100>;
 		};
 
+		crypto: crypto@1c02000 {
+			compatible = "allwinner,sun9i-a80-crypto";
+			reg = <0x01c02000 0x1000>;
+			interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
+			resets = <&ccu RST_BUS_SS>;
+			clocks = <&ccu CLK_BUS_SS>, <&ccu CLK_SS>;
+			clock-names = "bus", "mod";
+		};
+
 		mmc0: mmc@1c0f000 {
 			compatible = "allwinner,sun9i-a80-mmc";
 			reg = <0x01c0f000 0x1000>;
@@ -947,6 +956,7 @@
 			compatible = "allwinner,sun6i-a31-wdt";
 			reg = <0x06000ca0 0x20>;
 			interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&osc24M>;
 		};
 
 		pio: pinctrl@6000800 {
@@ -1154,6 +1164,7 @@
 			compatible = "allwinner,sun6i-a31-wdt";
 			reg = <0x08001000 0x20>;
 			interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&osc24M>;
 		};
 
 		prcm@8001400 {
diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
index 107eeafad20a..0afea59486c2 100644
--- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi
+++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
@@ -109,6 +109,7 @@
 		compatible = "simple-bus";
 		#address-cells = <1>;
 		#size-cells = <1>;
+		dma-ranges;
 		ranges;
 
 		display_clocks: clock@1000000 {
@@ -478,6 +479,11 @@
 				function = "uart2";
 			};
 
+			uart2_rts_cts_pins: uart2-rts-cts-pins {
+				pins = "PA2", "PA3";
+				function = "uart2";
+			};
+
 			uart3_pins: uart3-pins {
 				pins = "PA13", "PA14";
 				function = "uart3";
@@ -544,6 +550,14 @@
 			};
 		};
 
+		mbus: dram-controller@1c62000 {
+			compatible = "allwinner,sun8i-h3-mbus";
+			reg = <0x01c62000 0x1000>;
+			clocks = <&ccu 113>;
+			dma-ranges = <0x00000000 0x40000000 0xc0000000>;
+			#interconnect-cells = <1>;
+		};
+
 		spi0: spi@1c68000 {
 			compatible = "allwinner,sun8i-h3-spi";
 			reg = <0x01c68000 0x1000>;
diff --git a/arch/arm/boot/dts/tegra124-nyan-big-emc.dtsi b/arch/arm/boot/dts/tegra124-nyan-big-emc.dtsi
index 9af21fe93a5c..fb6b3e1a0b1f 100644
--- a/arch/arm/boot/dts/tegra124-nyan-big-emc.dtsi
+++ b/arch/arm/boot/dts/tegra124-nyan-big-emc.dtsi
@@ -1,5 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 / {
+	apbmisc@70000800 {
+		nvidia,long-ram-code;
+	};
+
 	clock@60006000 {
 		emc-timings-1 {
 			nvidia,ram-code = <1>;
@@ -52,7 +56,154 @@
 				clocks = <&tegra_car TEGRA124_CLK_PLL_M>;
 				clock-names = "emc-parent";
 			};
-			/* TODO: Add 528MHz frequency */
+			timing-528000000 {
+				clock-frequency = <528000000>;
+				nvidia,parent-clock-frequency = <528000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_M_UD>;
+				clock-names = "emc-parent";
+			};
+			timing-600000000 {
+				clock-frequency = <600000000>;
+				nvidia,parent-clock-frequency = <600000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_C_UD>;
+				clock-names = "emc-parent";
+			};
+			timing-792000000 {
+				clock-frequency = <792000000>;
+				nvidia,parent-clock-frequency = <792000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_M_UD>;
+				clock-names = "emc-parent";
+			};
+		};
+
+		emc-timings-4 {
+			nvidia,ram-code = <4>;
+
+			timing-12750000 {
+				clock-frequency = <12750000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-20400000 {
+				clock-frequency = <20400000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-40800000 {
+				clock-frequency = <40800000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-68000000 {
+				clock-frequency = <68000000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-102000000 {
+				clock-frequency = <102000000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-204000000 {
+				clock-frequency = <204000000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-300000000 {
+				clock-frequency = <300000000>;
+				nvidia,parent-clock-frequency = <600000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_C>;
+				clock-names = "emc-parent";
+			};
+			timing-396000000 {
+				clock-frequency = <396000000>;
+				nvidia,parent-clock-frequency = <792000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_M>;
+				clock-names = "emc-parent";
+			};
+			timing-528000000 {
+				clock-frequency = <528000000>;
+				nvidia,parent-clock-frequency = <528000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_M_UD>;
+				clock-names = "emc-parent";
+			};
+			timing-600000000 {
+				clock-frequency = <600000000>;
+				nvidia,parent-clock-frequency = <600000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_C_UD>;
+				clock-names = "emc-parent";
+			};
+			timing-792000000 {
+				clock-frequency = <792000000>;
+				nvidia,parent-clock-frequency = <792000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_M_UD>;
+				clock-names = "emc-parent";
+			};
+		};
+
+		emc-timings-6 {
+			nvidia,ram-code = <6>;
+
+			timing-12750000 {
+				clock-frequency = <12750000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-20400000 {
+				clock-frequency = <20400000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-40800000 {
+				clock-frequency = <40800000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-68000000 {
+				clock-frequency = <68000000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-102000000 {
+				clock-frequency = <102000000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-204000000 {
+				clock-frequency = <204000000>;
+				nvidia,parent-clock-frequency = <408000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
+				clock-names = "emc-parent";
+			};
+			timing-300000000 {
+				clock-frequency = <300000000>;
+				nvidia,parent-clock-frequency = <600000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_C>;
+				clock-names = "emc-parent";
+			};
+			timing-396000000 {
+				clock-frequency = <396000000>;
+				nvidia,parent-clock-frequency = <792000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_M>;
+				clock-names = "emc-parent";
+			};
+			timing-528000000 {
+				clock-frequency = <528000000>;
+				nvidia,parent-clock-frequency = <528000000>;
+				clocks = <&tegra_car TEGRA124_CLK_PLL_M_UD>;
+				clock-names = "emc-parent";
+			};
 			timing-600000000 {
 				clock-frequency = <600000000>;
 				nvidia,parent-clock-frequency = <600000000>;
@@ -94,149 +245,149 @@
 				nvidia,emc-zcal-interval = <0x00000000>;
 
 				nvidia,emc-configuration = <
-					0x00000000
-					0x00000003
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000004
-					0x0000000a
-					0x00000003
-					0x0000000b
-					0x00000000
-					0x00000000
-					0x00000003
-					0x00000003
-					0x00000000
-					0x00000006
-					0x00000006
-					0x00000006
-					0x00000002
-					0x00000000
-					0x00000005
-					0x00000005
-					0x00010000
-					0x00000003
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000004
-					0x0000000c
-					0x0000000d
-					0x0000000f
-					0x00000060
-					0x00000000
-					0x00000018
-					0x00000002
-					0x00000002
-					0x00000001
-					0x00000000
-					0x00000007
-					0x0000000f
-					0x00000005
-					0x00000005
-					0x00000004
-					0x00000005
-					0x00000004
-					0x00000000
-					0x00000000
-					0x00000005
-					0x00000005
-					0x00000064
-					0x00000000
-					0x00000000
-					0x00000000
-					0x106aa298
-					0x002c00a0
-					0x00008000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x000fc000
-					0x000fc000
-					0x000fc000
-					0x000fc000
-					0x0000fc00
-					0x0000fc00
-					0x0000fc00
-					0x0000fc00
-					0x10000280
-					0x00000000
-					0x00111111
-					0x00000000
-					0x00000000
-					0x77ffc081
-					0x00000303
-					0x81f1f108
-					0x07070004
-					0x0000003f
-					0x016eeeee
-					0x51451400
-					0x00514514
-					0x00514514
-					0x51451400
-					0x0000003f
-					0x00000007
-					0x00000000
-					0x00000042
-					0x000c000c
-					0x00000000
-					0x00000003
-					0x0000f2f3
-					0x800001c5
-					0x0000000a
+					0x00000000 /* EMC_RC */
+					0x00000003 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000000 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000060 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000018 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000007 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000005 /* EMC_TXSR */
+					0x00000005 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000000 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000064 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ0 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ1 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ2 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ3 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000007 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x800001c5 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
 				>;
 			};
 
@@ -262,149 +413,149 @@
 				nvidia,emc-zcal-interval = <0x00000000>;
 
 				nvidia,emc-configuration = <
-					0x00000000
-					0x00000005
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000004
-					0x0000000a
-					0x00000003
-					0x0000000b
-					0x00000000
-					0x00000000
-					0x00000003
-					0x00000003
-					0x00000000
-					0x00000006
-					0x00000006
-					0x00000006
-					0x00000002
-					0x00000000
-					0x00000005
-					0x00000005
-					0x00010000
-					0x00000003
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000004
-					0x0000000c
-					0x0000000d
-					0x0000000f
-					0x0000009a
-					0x00000000
-					0x00000026
-					0x00000002
-					0x00000002
-					0x00000001
-					0x00000000
-					0x00000007
-					0x0000000f
-					0x00000006
-					0x00000006
-					0x00000004
-					0x00000005
-					0x00000004
-					0x00000000
-					0x00000000
-					0x00000005
-					0x00000005
-					0x000000a0
-					0x00000000
-					0x00000000
-					0x00000000
-					0x106aa298
-					0x002c00a0
-					0x00008000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x000fc000
-					0x000fc000
-					0x000fc000
-					0x000fc000
-					0x0000fc00
-					0x0000fc00
-					0x0000fc00
-					0x0000fc00
-					0x10000280
-					0x00000000
-					0x00111111
-					0x00000000
-					0x00000000
-					0x77ffc081
-					0x00000303
-					0x81f1f108
-					0x07070004
-					0x0000003f
-					0x016eeeee
-					0x51451400
-					0x00514514
-					0x00514514
-					0x51451400
-					0x0000003f
-					0x0000000b
-					0x00000000
-					0x00000042
-					0x000c000c
-					0x00000000
-					0x00000003
-					0x0000f2f3
-					0x8000023a
-					0x0000000a
+					0x00000000 /* EMC_RC */
+					0x00000005 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000000 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x0000009a /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000026 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000007 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000006 /* EMC_TXSR */
+					0x00000006 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000000 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x000000a0 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ0 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ1 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ2 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ3 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x0000000b /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x8000023a /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
 				>;
 			};
 
@@ -430,149 +581,149 @@
 				nvidia,emc-zcal-interval = <0x00000000>;
 
 				nvidia,emc-configuration = <
-					0x00000001
-					0x0000000a
-					0x00000000
-					0x00000001
-					0x00000000
-					0x00000004
-					0x0000000a
-					0x00000003
-					0x0000000b
-					0x00000000
-					0x00000000
-					0x00000003
-					0x00000003
-					0x00000000
-					0x00000006
-					0x00000006
-					0x00000006
-					0x00000002
-					0x00000000
-					0x00000005
-					0x00000005
-					0x00010000
-					0x00000003
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000004
-					0x0000000c
-					0x0000000d
-					0x0000000f
-					0x00000134
-					0x00000000
-					0x0000004d
-					0x00000002
-					0x00000002
-					0x00000001
-					0x00000000
-					0x00000008
-					0x0000000f
-					0x0000000c
-					0x0000000c
-					0x00000004
-					0x00000005
-					0x00000004
-					0x00000000
-					0x00000000
-					0x00000005
-					0x00000005
-					0x0000013f
-					0x00000000
-					0x00000000
-					0x00000000
-					0x106aa298
-					0x002c00a0
-					0x00008000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x000fc000
-					0x000fc000
-					0x000fc000
-					0x000fc000
-					0x0000fc00
-					0x0000fc00
-					0x0000fc00
-					0x0000fc00
-					0x10000280
-					0x00000000
-					0x00111111
-					0x00000000
-					0x00000000
-					0x77ffc081
-					0x00000303
-					0x81f1f108
-					0x07070004
-					0x0000003f
-					0x016eeeee
-					0x51451400
-					0x00514514
-					0x00514514
-					0x51451400
-					0x0000003f
-					0x00000015
-					0x00000000
-					0x00000042
-					0x000c000c
-					0x00000000
-					0x00000003
-					0x0000f2f3
-					0x80000370
-					0x0000000a
+					0x00000001 /* EMC_RC */
+					0x0000000a /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000001 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000134 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x0000004d /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000008 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x0000000c /* EMC_TXSR */
+					0x0000000c /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000000 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x0000013f /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ0 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ1 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ2 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ3 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000015 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x80000370 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
 				>;
 			};
 
@@ -598,149 +749,149 @@
 				nvidia,emc-zcal-interval = <0x00000000>;
 
 				nvidia,emc-configuration = <
-					0x00000003
-					0x00000011
-					0x00000000
-					0x00000002
-					0x00000000
-					0x00000004
-					0x0000000a
-					0x00000003
-					0x0000000b
-					0x00000000
-					0x00000000
-					0x00000003
-					0x00000003
-					0x00000000
-					0x00000006
-					0x00000006
-					0x00000006
-					0x00000002
-					0x00000000
-					0x00000005
-					0x00000005
-					0x00010000
-					0x00000003
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000004
-					0x0000000c
-					0x0000000d
-					0x0000000f
-					0x00000202
-					0x00000000
-					0x00000080
-					0x00000002
-					0x00000002
-					0x00000001
-					0x00000000
-					0x0000000f
-					0x0000000f
-					0x00000013
-					0x00000013
-					0x00000004
-					0x00000005
-					0x00000004
-					0x00000001
-					0x00000000
-					0x00000005
-					0x00000005
-					0x00000213
-					0x00000000
-					0x00000000
-					0x00000000
-					0x106aa298
-					0x002c00a0
-					0x00008000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x000fc000
-					0x000fc000
-					0x000fc000
-					0x000fc000
-					0x0000fc00
-					0x0000fc00
-					0x0000fc00
-					0x0000fc00
-					0x10000280
-					0x00000000
-					0x00111111
-					0x00000000
-					0x00000000
-					0x77ffc081
-					0x00000303
-					0x81f1f108
-					0x07070004
-					0x0000003f
-					0x016eeeee
-					0x51451400
-					0x00514514
-					0x00514514
-					0x51451400
-					0x0000003f
-					0x00000022
-					0x00000000
-					0x00000042
-					0x000c000c
-					0x00000000
-					0x00000003
-					0x0000f2f3
-					0x8000050e
-					0x0000000a
+					0x00000003 /* EMC_RC */
+					0x00000011 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000002 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000202 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000080 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x0000000f /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000013 /* EMC_TXSR */
+					0x00000013 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000001 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000213 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ0 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ1 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ2 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ3 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000022 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x8000050e /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
 				>;
 			};
 
@@ -766,149 +917,149 @@
 				nvidia,emc-zcal-interval = <0x00000000>;
 
 				nvidia,emc-configuration = <
-					0x00000004
-					0x0000001a
-					0x00000000
-					0x00000003
-					0x00000001
-					0x00000004
-					0x0000000a
-					0x00000003
-					0x0000000b
-					0x00000001
-					0x00000001
-					0x00000003
-					0x00000003
-					0x00000000
-					0x00000006
-					0x00000006
-					0x00000006
-					0x00000002
-					0x00000000
-					0x00000005
-					0x00000005
-					0x00010000
-					0x00000003
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000004
-					0x0000000c
-					0x0000000d
-					0x0000000f
-					0x00000304
-					0x00000000
-					0x000000c1
-					0x00000002
-					0x00000002
-					0x00000001
-					0x00000000
-					0x00000018
-					0x0000000f
-					0x0000001c
-					0x0000001c
-					0x00000004
-					0x00000005
-					0x00000004
-					0x00000003
-					0x00000000
-					0x00000005
-					0x00000005
-					0x0000031c
-					0x00000000
-					0x00000000
-					0x00000000
-					0x106aa298
-					0x002c00a0
-					0x00008000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x000fc000
-					0x000fc000
-					0x000fc000
-					0x000fc000
-					0x0000fc00
-					0x0000fc00
-					0x0000fc00
-					0x0000fc00
-					0x10000280
-					0x00000000
-					0x00111111
-					0x00000000
-					0x00000000
-					0x77ffc081
-					0x00000303
-					0x81f1f108
-					0x07070004
-					0x0000003f
-					0x016eeeee
-					0x51451400
-					0x00514514
-					0x00514514
-					0x51451400
-					0x0000003f
-					0x00000033
-					0x00000000
-					0x00000042
-					0x000c000c
-					0x00000000
-					0x00000003
-					0x0000f2f3
-					0x80000713
-					0x0000000a
+					0x00000004 /* EMC_RC */
+					0x0000001a /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000003 /* EMC_RAS */
+					0x00000001 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000001 /* EMC_RD_RCD */
+					0x00000001 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000304 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000000c1 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000018 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x0000001c /* EMC_TXSR */
+					0x0000001c /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000003 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x0000031c /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ0 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ1 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ2 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ3 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000033 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x80000713 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
 				>;
 			};
 
@@ -934,149 +1085,149 @@
 				nvidia,emc-zcal-interval = <0x00020000>;
 
 				nvidia,emc-configuration = <
-					0x00000009
-					0x00000035
-					0x00000000
-					0x00000007
-					0x00000002
-					0x00000005
-					0x0000000a
-					0x00000003
-					0x0000000b
-					0x00000002
-					0x00000002
-					0x00000003
-					0x00000003
-					0x00000000
-					0x00000005
-					0x00000005
-					0x00000006
-					0x00000002
-					0x00000000
-					0x00000004
-					0x00000006
-					0x00010000
-					0x00000003
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000003
-					0x0000000d
-					0x0000000f
-					0x00000011
-					0x00000607
-					0x00000000
-					0x00000181
-					0x00000002
-					0x00000002
-					0x00000001
-					0x00000000
-					0x00000032
-					0x0000000f
-					0x00000038
-					0x00000038
-					0x00000004
-					0x00000005
-					0x00000004
-					0x00000007
-					0x00000000
-					0x00000005
-					0x00000005
-					0x00000638
-					0x00000000
-					0x00000000
-					0x00000000
-					0x106aa298
-					0x002c00a0
-					0x00008000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00064000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00004000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00090000
-					0x00090000
-					0x00094000
-					0x00094000
-					0x00009400
-					0x00009000
-					0x00009000
-					0x00009000
-					0x10000280
-					0x00000000
-					0x00111111
-					0x00000000
-					0x00000000
-					0x77ffc081
-					0x00000303
-					0x81f1f108
-					0x07070004
-					0x0000003f
-					0x016eeeee
-					0x51451400
-					0x00514514
-					0x00514514
-					0x51451400
-					0x0000003f
-					0x00000066
-					0x00000000
-					0x00000100
-					0x000c000c
-					0x00000000
-					0x00000003
-					0x0000d2b3
-					0x80000d22
-					0x0000000a
+					0x00000009 /* EMC_RC */
+					0x00000035 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000007 /* EMC_RAS */
+					0x00000002 /* EMC_RP */
+					0x00000005 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000002 /* EMC_RD_RCD */
+					0x00000002 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000005 /* EMC_WDV */
+					0x00000005 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000004 /* EMC_EINPUT */
+					0x00000006 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000003 /* EMC_QRST */
+					0x0000000d /* EMC_QSAFE */
+					0x0000000f /* EMC_RDV */
+					0x00000011 /* EMC_RDV_MASK */
+					0x00000607 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000181 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000032 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000038 /* EMC_TXSR */
+					0x00000038 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000007 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000638 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00090000 /* EMC_DLL_XFORM_DQ0 */
+					0x00090000 /* EMC_DLL_XFORM_DQ1 */
+					0x00094000 /* EMC_DLL_XFORM_DQ2 */
+					0x00094000 /* EMC_DLL_XFORM_DQ3 */
+					0x00009400 /* EMC_DLL_XFORM_DQ4 */
+					0x00009000 /* EMC_DLL_XFORM_DQ5 */
+					0x00009000 /* EMC_DLL_XFORM_DQ6 */
+					0x00009000 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000066 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000d2b3 /* EMC_CFG_PIPE */
+					0x80000d22 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
 				>;
 			};
 
@@ -1102,149 +1253,149 @@
 				nvidia,emc-zcal-interval = <0x00020000>;
 
 				nvidia,emc-configuration = <
-					0x0000000d
-					0x0000004c
-					0x00000000
-					0x00000009
-					0x00000003
-					0x00000004
-					0x00000008
-					0x00000002
-					0x00000009
-					0x00000003
-					0x00000003
-					0x00000002
-					0x00000002
-					0x00000000
-					0x00000003
-					0x00000003
-					0x00000005
-					0x00000002
-					0x00000000
-					0x00000002
-					0x00000007
-					0x00020000
-					0x00000003
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000001
-					0x0000000e
-					0x00000010
-					0x00000012
-					0x000008e4
-					0x00000000
-					0x00000239
-					0x00000001
-					0x00000008
-					0x00000001
-					0x00000000
-					0x0000004a
-					0x0000000e
-					0x00000051
-					0x00000200
-					0x00000004
-					0x00000005
-					0x00000004
-					0x00000009
-					0x00000000
-					0x00000005
-					0x00000005
-					0x00000924
-					0x00000000
-					0x00000000
-					0x00000000
-					0x104ab098
-					0x002c00a0
-					0x00008000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00098000
-					0x00098000
-					0x00000000
-					0x00098000
-					0x00098000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00060000
-					0x00060000
-					0x00060000
-					0x00060000
-					0x00006000
-					0x00006000
-					0x00006000
-					0x00006000
-					0x10000280
-					0x00000000
-					0x00111111
-					0x00000000
-					0x00000000
-					0x77ffc081
-					0x00000101
-					0x81f1f108
-					0x07070004
-					0x00000000
-					0x016eeeee
-					0x51451420
-					0x00514514
-					0x00514514
-					0x51451400
-					0x0000003f
-					0x00000096
-					0x00000000
-					0x00000100
-					0x0174000c
-					0x00000000
-					0x00000003
-					0x000052a3
-					0x800012d7
-					0x00000009
+					0x0000000d /* EMC_RC */
+					0x0000004c /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000009 /* EMC_RAS */
+					0x00000003 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x00000008 /* EMC_W2R */
+					0x00000002 /* EMC_R2P */
+					0x00000009 /* EMC_W2P */
+					0x00000003 /* EMC_RD_RCD */
+					0x00000003 /* EMC_WR_RCD */
+					0x00000002 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000003 /* EMC_WDV */
+					0x00000003 /* EMC_WDV_MASK */
+					0x00000005 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000002 /* EMC_EINPUT */
+					0x00000007 /* EMC_EINPUT_DURATION */
+					0x00020000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000001 /* EMC_QRST */
+					0x0000000e /* EMC_QSAFE */
+					0x00000010 /* EMC_RDV */
+					0x00000012 /* EMC_RDV_MASK */
+					0x000008e4 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000239 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000001 /* EMC_PDEX2WR */
+					0x00000008 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x0000004a /* EMC_AR2PDEN */
+					0x0000000e /* EMC_RW2PDEN */
+					0x00000051 /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000009 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000924 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00030000 /* EMC_DLL_XFORM_DQS0 */
+					0x00030000 /* EMC_DLL_XFORM_DQS1 */
+					0x00030000 /* EMC_DLL_XFORM_DQS2 */
+					0x00030000 /* EMC_DLL_XFORM_DQS3 */
+					0x00030000 /* EMC_DLL_XFORM_DQS4 */
+					0x00030000 /* EMC_DLL_XFORM_DQS5 */
+					0x00030000 /* EMC_DLL_XFORM_DQS6 */
+					0x00030000 /* EMC_DLL_XFORM_DQS7 */
+					0x00030000 /* EMC_DLL_XFORM_DQS8 */
+					0x00030000 /* EMC_DLL_XFORM_DQS9 */
+					0x00030000 /* EMC_DLL_XFORM_DQS10 */
+					0x00030000 /* EMC_DLL_XFORM_DQS11 */
+					0x00030000 /* EMC_DLL_XFORM_DQS12 */
+					0x00030000 /* EMC_DLL_XFORM_DQS13 */
+					0x00030000 /* EMC_DLL_XFORM_DQS14 */
+					0x00030000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00060000 /* EMC_DLL_XFORM_DQ0 */
+					0x00060000 /* EMC_DLL_XFORM_DQ1 */
+					0x00060000 /* EMC_DLL_XFORM_DQ2 */
+					0x00060000 /* EMC_DLL_XFORM_DQ3 */
+					0x00006000 /* EMC_DLL_XFORM_DQ4 */
+					0x00006000 /* EMC_DLL_XFORM_DQ5 */
+					0x00006000 /* EMC_DLL_XFORM_DQ6 */
+					0x00006000 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000096 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x0174000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000052a3 /* EMC_CFG_PIPE */
+					0x800012d7 /* EMC_DYN_SELF_REF_CONTROL */
+					0x00000009 /* EMC_QPOP */
 				>;
 			};
 
@@ -1270,149 +1421,317 @@
 				nvidia,emc-zcal-interval = <0x00020000>;
 
 				nvidia,emc-configuration = <
-					0x00000012
-					0x00000065
-					0x00000000
-					0x0000000c
-					0x00000004
-					0x00000005
-					0x00000008
-					0x00000002
-					0x0000000a
-					0x00000004
-					0x00000004
-					0x00000002
-					0x00000002
-					0x00000000
-					0x00000003
-					0x00000003
-					0x00000005
-					0x00000002
-					0x00000000
-					0x00000001
-					0x00000008
-					0x00020000
-					0x00000003
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x0000000f
-					0x00000010
-					0x00000012
-					0x00000bd1
-					0x00000000
-					0x000002f4
-					0x00000001
-					0x00000008
-					0x00000001
-					0x00000000
-					0x00000063
-					0x0000000f
-					0x0000006b
-					0x00000200
-					0x00000004
-					0x00000005
-					0x00000004
-					0x0000000d
-					0x00000000
-					0x00000005
-					0x00000005
-					0x00000c11
-					0x00000000
-					0x00000000
-					0x00000000
-					0x104ab098
-					0x002c00a0
-					0x00008000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00030000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00070000
-					0x00070000
-					0x00000000
-					0x00070000
-					0x00070000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00048000
-					0x00048000
-					0x00048000
-					0x00048000
-					0x00004800
-					0x00004800
-					0x00004800
-					0x00004800
-					0x10000280
-					0x00000000
-					0x00111111
-					0x00000000
-					0x00000000
-					0x77ffc081
-					0x00000101
-					0x81f1f108
-					0x07070004
-					0x00000000
-					0x016eeeee
-					0x51451420
-					0x00514514
-					0x00514514
-					0x51451400
-					0x0000003f
-					0x000000c6
-					0x00000000
-					0x00000100
-					0x015b000c
-					0x00000000
-					0x00000003
-					0x000052a3
-					0x8000188b
-					0x00000009
+					0x00000012 /* EMC_RC */
+					0x00000065 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x0000000c /* EMC_RAS */
+					0x00000004 /* EMC_RP */
+					0x00000005 /* EMC_R2W */
+					0x00000008 /* EMC_W2R */
+					0x00000002 /* EMC_R2P */
+					0x0000000a /* EMC_W2P */
+					0x00000004 /* EMC_RD_RCD */
+					0x00000004 /* EMC_WR_RCD */
+					0x00000002 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000003 /* EMC_WDV */
+					0x00000003 /* EMC_WDV_MASK */
+					0x00000005 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000001 /* EMC_EINPUT */
+					0x00000008 /* EMC_EINPUT_DURATION */
+					0x00020000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000000 /* EMC_QRST */
+					0x0000000f /* EMC_QSAFE */
+					0x00000010 /* EMC_RDV */
+					0x00000012 /* EMC_RDV_MASK */
+					0x00000bd1 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000002f4 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000001 /* EMC_PDEX2WR */
+					0x00000008 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000063 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x0000006b /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x0000000d /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000c11 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00030000 /* EMC_DLL_XFORM_DQS0 */
+					0x00030000 /* EMC_DLL_XFORM_DQS1 */
+					0x00030000 /* EMC_DLL_XFORM_DQS2 */
+					0x00030000 /* EMC_DLL_XFORM_DQS3 */
+					0x00030000 /* EMC_DLL_XFORM_DQS4 */
+					0x00030000 /* EMC_DLL_XFORM_DQS5 */
+					0x00030000 /* EMC_DLL_XFORM_DQS6 */
+					0x00030000 /* EMC_DLL_XFORM_DQS7 */
+					0x00030000 /* EMC_DLL_XFORM_DQS8 */
+					0x00030000 /* EMC_DLL_XFORM_DQS9 */
+					0x00030000 /* EMC_DLL_XFORM_DQS10 */
+					0x00030000 /* EMC_DLL_XFORM_DQS11 */
+					0x00030000 /* EMC_DLL_XFORM_DQS12 */
+					0x00030000 /* EMC_DLL_XFORM_DQS13 */
+					0x00030000 /* EMC_DLL_XFORM_DQS14 */
+					0x00030000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00048000 /* EMC_DLL_XFORM_DQ0 */
+					0x00048000 /* EMC_DLL_XFORM_DQ1 */
+					0x00048000 /* EMC_DLL_XFORM_DQ2 */
+					0x00048000 /* EMC_DLL_XFORM_DQ3 */
+					0x00004800 /* EMC_DLL_XFORM_DQ4 */
+					0x00004800 /* EMC_DLL_XFORM_DQ5 */
+					0x00004800 /* EMC_DLL_XFORM_DQ6 */
+					0x00004800 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x000000c6 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x015b000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000052a3 /* EMC_CFG_PIPE */
+					0x8000188b /* EMC_DYN_SELF_REF_CONTROL */
+					0x00000009 /* EMC_QPOP */
+				>;
+			};
+
+			timing-528000000 {
+				clock-frequency = <528000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000000>;
+				nvidia,emc-cfg = <0x73300000>;
+				nvidia,emc-cfg-2 = <0x0000089d>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100002>;
+				nvidia,emc-mode-2 = <0x80200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80000941>;
+				nvidia,emc-mrs-wait-cnt = <0x013a000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040008>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0123133d>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x00000018 /* EMC_RC */
+					0x00000088 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000011 /* EMC_RAS */
+					0x00000006 /* EMC_RP */
+					0x00000006 /* EMC_R2W */
+					0x00000009 /* EMC_W2R */
+					0x00000002 /* EMC_R2P */
+					0x0000000d /* EMC_W2P */
+					0x00000006 /* EMC_RD_RCD */
+					0x00000006 /* EMC_WR_RCD */
+					0x00000002 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000003 /* EMC_WDV */
+					0x00000003 /* EMC_WDV_MASK */
+					0x00000007 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000002 /* EMC_EINPUT */
+					0x00000009 /* EMC_EINPUT_DURATION */
+					0x00040000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000001 /* EMC_QRST */
+					0x00000010 /* EMC_QSAFE */
+					0x00000013 /* EMC_RDV */
+					0x00000015 /* EMC_RDV_MASK */
+					0x00000fd6 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000003f5 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x0000000b /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000084 /* EMC_AR2PDEN */
+					0x00000012 /* EMC_RW2PDEN */
+					0x0000008f /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000013 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000006 /* EMC_TCLKSTABLE */
+					0x00000006 /* EMC_TCLKSTOP */
+					0x00001017 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0xe01200b1 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x0000000a /* EMC_DLL_XFORM_DQS0 */
+					0x0000000a /* EMC_DLL_XFORM_DQS1 */
+					0x0000000a /* EMC_DLL_XFORM_DQS2 */
+					0x0000000a /* EMC_DLL_XFORM_DQS3 */
+					0x0000000a /* EMC_DLL_XFORM_DQS4 */
+					0x0000000a /* EMC_DLL_XFORM_DQS5 */
+					0x0000000a /* EMC_DLL_XFORM_DQS6 */
+					0x0000000a /* EMC_DLL_XFORM_DQS7 */
+					0x0000000a /* EMC_DLL_XFORM_DQS8 */
+					0x0000000a /* EMC_DLL_XFORM_DQS9 */
+					0x0000000a /* EMC_DLL_XFORM_DQS10 */
+					0x0000000a /* EMC_DLL_XFORM_DQS11 */
+					0x0000000a /* EMC_DLL_XFORM_DQS12 */
+					0x0000000a /* EMC_DLL_XFORM_DQS13 */
+					0x0000000a /* EMC_DLL_XFORM_DQS14 */
+					0x0000000a /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000001 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000001 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS15 */
+					0x0000000e /* EMC_DLL_XFORM_DQ0 */
+					0x0000000e /* EMC_DLL_XFORM_DQ1 */
+					0x0000000e /* EMC_DLL_XFORM_DQ2 */
+					0x0000000e /* EMC_DLL_XFORM_DQ3 */
+					0x0000000e /* EMC_DLL_XFORM_DQ4 */
+					0x0000000e /* EMC_DLL_XFORM_DQ5 */
+					0x0000000e /* EMC_DLL_XFORM_DQ6 */
+					0x0000000e /* EMC_DLL_XFORM_DQ7 */
+					0x100002a0 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc085 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0606003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000000 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x013a000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000042a0 /* EMC_CFG_PIPE */
+					0x80002062 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000b /* EMC_QPOP */
 				>;
 			};
 
@@ -1438,149 +1757,149 @@
 				nvidia,emc-zcal-interval = <0x00020000>;
 
 				nvidia,emc-configuration = <
-					0x0000001c
-					0x0000009a
-					0x00000000
-					0x00000013
-					0x00000007
-					0x00000007
-					0x0000000b
-					0x00000003
-					0x00000010
-					0x00000007
-					0x00000007
-					0x00000002
-					0x00000002
-					0x00000000
-					0x00000005
-					0x00000005
-					0x0000000a
-					0x00000002
-					0x00000000
-					0x00000003
-					0x0000000b
-					0x00070000
-					0x00000003
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000002
-					0x00000012
-					0x00000016
-					0x00000018
-					0x00001208
-					0x00000000
-					0x00000482
-					0x00000002
-					0x0000000d
-					0x00000001
-					0x00000000
-					0x00000096
-					0x00000015
-					0x000000a2
-					0x00000200
-					0x00000004
-					0x00000005
-					0x00000004
-					0x00000015
-					0x00000000
-					0x00000006
-					0x00000006
-					0x00001249
-					0x00000000
-					0x00000000
-					0x00000000
-					0x104ab098
-					0xe00e00b1
-					0x00008000
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00048000
-					0x00048000
-					0x00000000
-					0x00048000
-					0x00048000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000004
-					0x00000004
-					0x00000002
-					0x00000005
-					0x00000006
-					0x00000003
-					0x00000006
-					0x00000005
-					0x00000004
-					0x00000004
-					0x00000002
-					0x00000005
-					0x00000006
-					0x00000003
-					0x00000006
-					0x00000005
-					0x0000000e
-					0x0000000e
-					0x0000000e
-					0x0000000e
-					0x0000000e
-					0x0000000e
-					0x0000000e
-					0x0000000e
-					0x100002a0
-					0x00000000
-					0x00111111
-					0x00000000
-					0x00000000
-					0x77ffc085
-					0x00000101
-					0x81f1f108
-					0x07070004
-					0x00000000
-					0x016eeeee
-					0x51451420
-					0x00514514
-					0x00514514
-					0x51451400
-					0x0606003f
-					0x00000000
-					0x00000000
-					0x00000100
-					0x0128000c
-					0x00000000
-					0x00000003
-					0x000040a0
-					0x800024aa
-					0x0000000e
+					0x0000001c /* EMC_RC */
+					0x0000009a /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000013 /* EMC_RAS */
+					0x00000007 /* EMC_RP */
+					0x00000007 /* EMC_R2W */
+					0x0000000b /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x00000010 /* EMC_W2P */
+					0x00000007 /* EMC_RD_RCD */
+					0x00000007 /* EMC_WR_RCD */
+					0x00000002 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000005 /* EMC_WDV */
+					0x00000005 /* EMC_WDV_MASK */
+					0x0000000a /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000003 /* EMC_EINPUT */
+					0x0000000b /* EMC_EINPUT_DURATION */
+					0x00070000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000002 /* EMC_QRST */
+					0x00000012 /* EMC_QSAFE */
+					0x00000016 /* EMC_RDV */
+					0x00000018 /* EMC_RDV_MASK */
+					0x00001208 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000482 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x0000000d /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000096 /* EMC_AR2PDEN */
+					0x00000015 /* EMC_RW2PDEN */
+					0x000000a2 /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000015 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000006 /* EMC_TCLKSTABLE */
+					0x00000006 /* EMC_TCLKSTOP */
+					0x00001249 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0xe00e00b1 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x0000000a /* EMC_DLL_XFORM_DQS0 */
+					0x0000000a /* EMC_DLL_XFORM_DQS1 */
+					0x0000000a /* EMC_DLL_XFORM_DQS2 */
+					0x0000000a /* EMC_DLL_XFORM_DQS3 */
+					0x0000000a /* EMC_DLL_XFORM_DQS4 */
+					0x0000000a /* EMC_DLL_XFORM_DQS5 */
+					0x0000000a /* EMC_DLL_XFORM_DQS6 */
+					0x0000000a /* EMC_DLL_XFORM_DQS7 */
+					0x0000000a /* EMC_DLL_XFORM_DQS8 */
+					0x0000000a /* EMC_DLL_XFORM_DQS9 */
+					0x0000000a /* EMC_DLL_XFORM_DQS10 */
+					0x0000000a /* EMC_DLL_XFORM_DQS11 */
+					0x0000000a /* EMC_DLL_XFORM_DQS12 */
+					0x0000000a /* EMC_DLL_XFORM_DQS13 */
+					0x0000000a /* EMC_DLL_XFORM_DQS14 */
+					0x0000000a /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS15 */
+					0x0000000e /* EMC_DLL_XFORM_DQ0 */
+					0x0000000e /* EMC_DLL_XFORM_DQ1 */
+					0x0000000e /* EMC_DLL_XFORM_DQ2 */
+					0x0000000e /* EMC_DLL_XFORM_DQ3 */
+					0x0000000e /* EMC_DLL_XFORM_DQ4 */
+					0x0000000e /* EMC_DLL_XFORM_DQ5 */
+					0x0000000e /* EMC_DLL_XFORM_DQ6 */
+					0x0000000e /* EMC_DLL_XFORM_DQ7 */
+					0x100002a0 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc085 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0606003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000000 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x0128000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000040a0 /* EMC_CFG_PIPE */
+					0x800024aa /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000e /* EMC_QPOP */
 				>;
 			};
 
@@ -1606,152 +1925,3855 @@
 				nvidia,emc-zcal-interval = <0x00020000>;
 
 				nvidia,emc-configuration = <
-					0x00000025
-					0x000000cc
-					0x00000000
-					0x0000001a
-					0x00000009
-					0x00000008
-					0x0000000d
-					0x00000004
-					0x00000013
-					0x00000009
-					0x00000009
-					0x00000003
-					0x00000002
-					0x00000000
-					0x00000006
-					0x00000006
-					0x0000000b
-					0x00000002
-					0x00000000
-					0x00000002
-					0x0000000d
-					0x00080000
-					0x00000004
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000001
-					0x00000014
-					0x00000018
-					0x0000001a
-					0x000017e2
-					0x00000000
-					0x000005f8
-					0x00000003
-					0x00000011
-					0x00000001
-					0x00000000
-					0x000000c6
-					0x00000018
-					0x000000d6
-					0x00000200
-					0x00000005
-					0x00000006
-					0x00000005
-					0x0000001d
-					0x00000000
-					0x00000008
-					0x00000008
-					0x00001822
-					0x00000000
-					0x80000005
-					0x00000000
-					0x104ab198
-					0xe00700b1
-					0x00008000
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000005
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00034000
-					0x00034000
-					0x00000000
-					0x00034000
-					0x00034000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000000
-					0x00000008
-					0x00000008
-					0x00000005
-					0x00000009
-					0x00000009
-					0x00000007
-					0x00000009
-					0x00000008
-					0x00000008
-					0x00000008
-					0x00000005
-					0x00000009
-					0x00000009
-					0x00000007
-					0x00000009
-					0x00000008
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x0000000a
-					0x100002a0
-					0x00000000
-					0x00111111
-					0x00000000
-					0x00000000
-					0x77ffc085
-					0x00000101
-					0x81f1f108
-					0x07070004
-					0x00000000
-					0x016eeeee
-					0x61861820
-					0x00514514
-					0x00514514
-					0x61861800
-					0x0606003f
-					0x00000000
-					0x00000000
-					0x00000100
-					0x00f8000c
-					0x00000007
-					0x00000004
-					0x00004080
-					0x80003012
-					0x0000000f
+					0x00000025 /* EMC_RC */
+					0x000000cc /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x0000001a /* EMC_RAS */
+					0x00000009 /* EMC_RP */
+					0x00000008 /* EMC_R2W */
+					0x0000000d /* EMC_W2R */
+					0x00000004 /* EMC_R2P */
+					0x00000013 /* EMC_W2P */
+					0x00000009 /* EMC_RD_RCD */
+					0x00000009 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x0000000b /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000002 /* EMC_EINPUT */
+					0x0000000d /* EMC_EINPUT_DURATION */
+					0x00080000 /* EMC_PUTERM_EXTRA */
+					0x00000004 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000001 /* EMC_QRST */
+					0x00000014 /* EMC_QSAFE */
+					0x00000018 /* EMC_RDV */
+					0x0000001a /* EMC_RDV_MASK */
+					0x000017e2 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000005f8 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000003 /* EMC_PDEX2WR */
+					0x00000011 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x000000c6 /* EMC_AR2PDEN */
+					0x00000018 /* EMC_RW2PDEN */
+					0x000000d6 /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000005 /* EMC_TCKE */
+					0x00000006 /* EMC_TCKESR */
+					0x00000005 /* EMC_TPD */
+					0x0000001d /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000008 /* EMC_TCLKSTABLE */
+					0x00000008 /* EMC_TCLKSTOP */
+					0x00001822 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x80000005 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab198 /* EMC_FBIO_CFG5 */
+					0xe00700b1 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00000005 /* EMC_DLL_XFORM_DQS0 */
+					0x00000005 /* EMC_DLL_XFORM_DQS1 */
+					0x00000005 /* EMC_DLL_XFORM_DQS2 */
+					0x00000005 /* EMC_DLL_XFORM_DQS3 */
+					0x00000005 /* EMC_DLL_XFORM_DQS4 */
+					0x00000005 /* EMC_DLL_XFORM_DQS5 */
+					0x00000005 /* EMC_DLL_XFORM_DQS6 */
+					0x00000005 /* EMC_DLL_XFORM_DQS7 */
+					0x00000005 /* EMC_DLL_XFORM_DQS8 */
+					0x00000005 /* EMC_DLL_XFORM_DQS9 */
+					0x00000005 /* EMC_DLL_XFORM_DQS10 */
+					0x00000005 /* EMC_DLL_XFORM_DQS11 */
+					0x00000005 /* EMC_DLL_XFORM_DQS12 */
+					0x00000005 /* EMC_DLL_XFORM_DQS13 */
+					0x00000005 /* EMC_DLL_XFORM_DQS14 */
+					0x00000005 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00034000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00034000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00034000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00034000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000007 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000007 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS15 */
+					0x0000000a /* EMC_DLL_XFORM_DQ0 */
+					0x0000000a /* EMC_DLL_XFORM_DQ1 */
+					0x0000000a /* EMC_DLL_XFORM_DQ2 */
+					0x0000000a /* EMC_DLL_XFORM_DQ3 */
+					0x0000000a /* EMC_DLL_XFORM_DQ4 */
+					0x0000000a /* EMC_DLL_XFORM_DQ5 */
+					0x0000000a /* EMC_DLL_XFORM_DQ6 */
+					0x0000000a /* EMC_DLL_XFORM_DQ7 */
+					0x100002a0 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc085 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x61861820 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x61861800 /* EMC_XM2DQSPADCTRL6 */
+					0x0606003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000000 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x00f8000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000007 /* EMC_CTT */
+					0x00000004 /* EMC_CTT_DURATION */
+					0x00004080 /* EMC_CFG_PIPE */
+					0x80003012 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000f /* EMC_QPOP */
+				>;
+			};
+		};
+
+		emc-timings-4 {
+			nvidia,ram-code = <4>;
+
+			timing-12750000 {
+				clock-frequency = <12750000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x000008c5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x00100003>;
+				nvidia,emc-mode-2 = <0x00200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x00001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000e000e>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00000000>;
+
+				nvidia,emc-configuration = <
+					0x00000000 /* EMC_RC */
+					0x00000004 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000000 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000005 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000060 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000018 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000007 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000005 /* EMC_TXSR */
+					0x00000005 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000000 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000064 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00080000 /* EMC_DLL_XFORM_DQ0 */
+					0x00080000 /* EMC_DLL_XFORM_DQ1 */
+					0x00080000 /* EMC_DLL_XFORM_DQ2 */
+					0x00080000 /* EMC_DLL_XFORM_DQ3 */
+					0x00008000 /* EMC_DLL_XFORM_DQ4 */
+					0x00008000 /* EMC_DLL_XFORM_DQ5 */
+					0x00008000 /* EMC_DLL_XFORM_DQ6 */
+					0x00008000 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000007 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000e000e /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x800001c5 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-20400000 {
+				clock-frequency = <20400000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x000008c5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x00100003>;
+				nvidia,emc-mode-2 = <0x00200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x00001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000e000e>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00000000>;
+
+				nvidia,emc-configuration = <
+					0x00000000 /* EMC_RC */
+					0x00000007 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000000 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000005 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x0000009a /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000026 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000007 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000008 /* EMC_TXSR */
+					0x00000008 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000000 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x000000a0 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00080000 /* EMC_DLL_XFORM_DQ0 */
+					0x00080000 /* EMC_DLL_XFORM_DQ1 */
+					0x00080000 /* EMC_DLL_XFORM_DQ2 */
+					0x00080000 /* EMC_DLL_XFORM_DQ3 */
+					0x00008000 /* EMC_DLL_XFORM_DQ4 */
+					0x00008000 /* EMC_DLL_XFORM_DQ5 */
+					0x00008000 /* EMC_DLL_XFORM_DQ6 */
+					0x00008000 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x0000000b /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000e000e /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x8000023a /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-40800000 {
+				clock-frequency = <40800000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x000008c5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x00100003>;
+				nvidia,emc-mode-2 = <0x00200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x00001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000e000e>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00000000>;
+
+				nvidia,emc-configuration = <
+					0x00000001 /* EMC_RC */
+					0x0000000e /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000001 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000005 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000134 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x0000004d /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x0000000c /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x0000000f /* EMC_TXSR */
+					0x0000000f /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000000 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x0000013f /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00080000 /* EMC_DLL_XFORM_DQ0 */
+					0x00080000 /* EMC_DLL_XFORM_DQ1 */
+					0x00080000 /* EMC_DLL_XFORM_DQ2 */
+					0x00080000 /* EMC_DLL_XFORM_DQ3 */
+					0x00008000 /* EMC_DLL_XFORM_DQ4 */
+					0x00008000 /* EMC_DLL_XFORM_DQ5 */
+					0x00008000 /* EMC_DLL_XFORM_DQ6 */
+					0x00008000 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000015 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000e000e /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x80000370 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-68000000 {
+				clock-frequency = <68000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x000008c5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x00100003>;
+				nvidia,emc-mode-2 = <0x00200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x00001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000e000e>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00000000>;
+
+				nvidia,emc-configuration = <
+					0x00000003 /* EMC_RC */
+					0x00000017 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000002 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000005 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000202 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000080 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000015 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000019 /* EMC_TXSR */
+					0x00000019 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000001 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000213 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00080000 /* EMC_DLL_XFORM_DQ0 */
+					0x00080000 /* EMC_DLL_XFORM_DQ1 */
+					0x00080000 /* EMC_DLL_XFORM_DQ2 */
+					0x00080000 /* EMC_DLL_XFORM_DQ3 */
+					0x00008000 /* EMC_DLL_XFORM_DQ4 */
+					0x00008000 /* EMC_DLL_XFORM_DQ5 */
+					0x00008000 /* EMC_DLL_XFORM_DQ6 */
+					0x00008000 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000022 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000e000e /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x8000050e /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-102000000 {
+				clock-frequency = <102000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x000008c5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x00100003>;
+				nvidia,emc-mode-2 = <0x00200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x00001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000e000e>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00000000>;
+
+				nvidia,emc-configuration = <
+					0x00000004 /* EMC_RC */
+					0x00000023 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000003 /* EMC_RAS */
+					0x00000001 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000005 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000001 /* EMC_RD_RCD */
+					0x00000001 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000304 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000000c1 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000021 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000025 /* EMC_TXSR */
+					0x00000025 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000003 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x0000031c /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00080000 /* EMC_DLL_XFORM_DQ0 */
+					0x00080000 /* EMC_DLL_XFORM_DQ1 */
+					0x00080000 /* EMC_DLL_XFORM_DQ2 */
+					0x00080000 /* EMC_DLL_XFORM_DQ3 */
+					0x00008000 /* EMC_DLL_XFORM_DQ4 */
+					0x00008000 /* EMC_DLL_XFORM_DQ5 */
+					0x00008000 /* EMC_DLL_XFORM_DQ6 */
+					0x00008000 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000033 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000e000e /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x80000713 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-204000000 {
+				clock-frequency = <204000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x0000088d>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x00100003>;
+				nvidia,emc-mode-2 = <0x00200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x00001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000e000e>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040008>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x00000009 /* EMC_RC */
+					0x00000047 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000006 /* EMC_RAS */
+					0x00000002 /* EMC_RP */
+					0x00000005 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000005 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000002 /* EMC_RD_RCD */
+					0x00000002 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000005 /* EMC_WDV */
+					0x00000005 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000004 /* EMC_EINPUT */
+					0x00000006 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000003 /* EMC_QRST */
+					0x0000000d /* EMC_QSAFE */
+					0x0000000f /* EMC_RDV */
+					0x00000011 /* EMC_RDV_MASK */
+					0x00000607 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000181 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000044 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x0000004a /* EMC_TXSR */
+					0x0000004a /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000007 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000638 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00090000 /* EMC_DLL_XFORM_DQ0 */
+					0x00090000 /* EMC_DLL_XFORM_DQ1 */
+					0x00094000 /* EMC_DLL_XFORM_DQ2 */
+					0x00094000 /* EMC_DLL_XFORM_DQ3 */
+					0x00009400 /* EMC_DLL_XFORM_DQ4 */
+					0x00009000 /* EMC_DLL_XFORM_DQ5 */
+					0x00009000 /* EMC_DLL_XFORM_DQ6 */
+					0x00009000 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000066 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x000e000e /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000d2b3 /* EMC_CFG_PIPE */
+					0x80000d22 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-300000000 {
+				clock-frequency = <300000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000000>;
+				nvidia,emc-cfg = <0x73340000>;
+				nvidia,emc-cfg-2 = <0x000008d5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x00100002>;
+				nvidia,emc-mode-2 = <0x00200000>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x00000321>;
+				nvidia,emc-mrs-wait-cnt = <0x0117000e>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x01231339>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x0000000d /* EMC_RC */
+					0x00000067 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000009 /* EMC_RAS */
+					0x00000003 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x00000008 /* EMC_W2R */
+					0x00000002 /* EMC_R2P */
+					0x00000009 /* EMC_W2P */
+					0x00000003 /* EMC_RD_RCD */
+					0x00000003 /* EMC_WR_RCD */
+					0x00000002 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000003 /* EMC_WDV */
+					0x00000003 /* EMC_WDV_MASK */
+					0x00000005 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000002 /* EMC_EINPUT */
+					0x00000007 /* EMC_EINPUT_DURATION */
+					0x00020000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000001 /* EMC_QRST */
+					0x0000000e /* EMC_QSAFE */
+					0x00000010 /* EMC_RDV */
+					0x00000012 /* EMC_RDV_MASK */
+					0x000008e4 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000239 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000001 /* EMC_PDEX2WR */
+					0x00000008 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000065 /* EMC_AR2PDEN */
+					0x0000000e /* EMC_RW2PDEN */
+					0x0000006c /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000009 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000924 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00030000 /* EMC_DLL_XFORM_DQS0 */
+					0x00030000 /* EMC_DLL_XFORM_DQS1 */
+					0x00030000 /* EMC_DLL_XFORM_DQS2 */
+					0x00030000 /* EMC_DLL_XFORM_DQS3 */
+					0x00030000 /* EMC_DLL_XFORM_DQS4 */
+					0x00030000 /* EMC_DLL_XFORM_DQS5 */
+					0x00030000 /* EMC_DLL_XFORM_DQS6 */
+					0x00030000 /* EMC_DLL_XFORM_DQS7 */
+					0x00030000 /* EMC_DLL_XFORM_DQS8 */
+					0x00030000 /* EMC_DLL_XFORM_DQS9 */
+					0x00030000 /* EMC_DLL_XFORM_DQS10 */
+					0x00030000 /* EMC_DLL_XFORM_DQS11 */
+					0x00030000 /* EMC_DLL_XFORM_DQS12 */
+					0x00030000 /* EMC_DLL_XFORM_DQS13 */
+					0x00030000 /* EMC_DLL_XFORM_DQS14 */
+					0x00030000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00060000 /* EMC_DLL_XFORM_DQ0 */
+					0x00060000 /* EMC_DLL_XFORM_DQ1 */
+					0x00060000 /* EMC_DLL_XFORM_DQ2 */
+					0x00060000 /* EMC_DLL_XFORM_DQ3 */
+					0x00006000 /* EMC_DLL_XFORM_DQ4 */
+					0x00006000 /* EMC_DLL_XFORM_DQ5 */
+					0x00006000 /* EMC_DLL_XFORM_DQ6 */
+					0x00006000 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000096 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x0117000e /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000052a3 /* EMC_CFG_PIPE */
+					0x800012d7 /* EMC_DYN_SELF_REF_CONTROL */
+					0x00000009 /* EMC_QPOP */
+				>;
+			};
+
+			timing-396000000 {
+				clock-frequency = <396000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000000>;
+				nvidia,emc-cfg = <0x73340000>;
+				nvidia,emc-cfg-2 = <0x00000895>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x00100002>;
+				nvidia,emc-mode-2 = <0x00200000>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x00000521>;
+				nvidia,emc-mrs-wait-cnt = <0x00f5000e>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040008>;
+				nvidia,emc-xm2dqspadctrl2 = <0x01231339>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x00000011 /* EMC_RC */
+					0x00000089 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x0000000c /* EMC_RAS */
+					0x00000004 /* EMC_RP */
+					0x00000005 /* EMC_R2W */
+					0x00000008 /* EMC_W2R */
+					0x00000002 /* EMC_R2P */
+					0x0000000a /* EMC_W2P */
+					0x00000004 /* EMC_RD_RCD */
+					0x00000004 /* EMC_WR_RCD */
+					0x00000002 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000003 /* EMC_WDV */
+					0x00000003 /* EMC_WDV_MASK */
+					0x00000005 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000001 /* EMC_EINPUT */
+					0x00000008 /* EMC_EINPUT_DURATION */
+					0x00020000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000000 /* EMC_QRST */
+					0x0000000f /* EMC_QSAFE */
+					0x00000010 /* EMC_RDV */
+					0x00000012 /* EMC_RDV_MASK */
+					0x00000bd1 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000002f4 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000001 /* EMC_PDEX2WR */
+					0x00000008 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000087 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x0000008f /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x0000000d /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000c11 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00030000 /* EMC_DLL_XFORM_DQS0 */
+					0x00030000 /* EMC_DLL_XFORM_DQS1 */
+					0x00030000 /* EMC_DLL_XFORM_DQS2 */
+					0x00030000 /* EMC_DLL_XFORM_DQS3 */
+					0x00030000 /* EMC_DLL_XFORM_DQS4 */
+					0x00030000 /* EMC_DLL_XFORM_DQS5 */
+					0x00030000 /* EMC_DLL_XFORM_DQS6 */
+					0x00030000 /* EMC_DLL_XFORM_DQS7 */
+					0x00030000 /* EMC_DLL_XFORM_DQS8 */
+					0x00030000 /* EMC_DLL_XFORM_DQS9 */
+					0x00030000 /* EMC_DLL_XFORM_DQS10 */
+					0x00030000 /* EMC_DLL_XFORM_DQS11 */
+					0x00030000 /* EMC_DLL_XFORM_DQS12 */
+					0x00030000 /* EMC_DLL_XFORM_DQS13 */
+					0x00030000 /* EMC_DLL_XFORM_DQS14 */
+					0x00030000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00048000 /* EMC_DLL_XFORM_DQ0 */
+					0x00048000 /* EMC_DLL_XFORM_DQ1 */
+					0x00048000 /* EMC_DLL_XFORM_DQ2 */
+					0x00048000 /* EMC_DLL_XFORM_DQ3 */
+					0x00004800 /* EMC_DLL_XFORM_DQ4 */
+					0x00004800 /* EMC_DLL_XFORM_DQ5 */
+					0x00004800 /* EMC_DLL_XFORM_DQ6 */
+					0x00004800 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x000000c6 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x00f5000e /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000052a3 /* EMC_CFG_PIPE */
+					0x8000188b /* EMC_DYN_SELF_REF_CONTROL */
+					0x00000009 /* EMC_QPOP */
+				>;
+			};
+
+			timing-528000000 {
+				clock-frequency = <528000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000000>;
+				nvidia,emc-cfg = <0x73300000>;
+				nvidia,emc-cfg-2 = <0x0000089d>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x00100002>;
+				nvidia,emc-mode-2 = <0x00200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x00000941>;
+				nvidia,emc-mrs-wait-cnt = <0x00c8000e>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040008>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0123133d>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x00000018 /* EMC_RC */
+					0x000000b7 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000010 /* EMC_RAS */
+					0x00000006 /* EMC_RP */
+					0x00000006 /* EMC_R2W */
+					0x00000009 /* EMC_W2R */
+					0x00000002 /* EMC_R2P */
+					0x0000000d /* EMC_W2P */
+					0x00000006 /* EMC_RD_RCD */
+					0x00000006 /* EMC_WR_RCD */
+					0x00000002 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000003 /* EMC_WDV */
+					0x00000003 /* EMC_WDV_MASK */
+					0x00000007 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000002 /* EMC_EINPUT */
+					0x00000009 /* EMC_EINPUT_DURATION */
+					0x00040000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000001 /* EMC_QRST */
+					0x00000010 /* EMC_QSAFE */
+					0x00000013 /* EMC_RDV */
+					0x00000015 /* EMC_RDV_MASK */
+					0x00000fd6 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000003f5 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x0000000b /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x000000b4 /* EMC_AR2PDEN */
+					0x00000012 /* EMC_RW2PDEN */
+					0x000000bf /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000013 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000006 /* EMC_TCLKSTABLE */
+					0x00000006 /* EMC_TCLKSTOP */
+					0x00001017 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0xe01200b1 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x0000000a /* EMC_DLL_XFORM_DQS0 */
+					0x0000000a /* EMC_DLL_XFORM_DQS1 */
+					0x0000000a /* EMC_DLL_XFORM_DQS2 */
+					0x0000000a /* EMC_DLL_XFORM_DQS3 */
+					0x0000000a /* EMC_DLL_XFORM_DQS4 */
+					0x0000000a /* EMC_DLL_XFORM_DQS5 */
+					0x0000000a /* EMC_DLL_XFORM_DQS6 */
+					0x0000000a /* EMC_DLL_XFORM_DQS7 */
+					0x0000000a /* EMC_DLL_XFORM_DQS8 */
+					0x0000000a /* EMC_DLL_XFORM_DQS9 */
+					0x0000000a /* EMC_DLL_XFORM_DQS10 */
+					0x0000000a /* EMC_DLL_XFORM_DQS11 */
+					0x0000000a /* EMC_DLL_XFORM_DQS12 */
+					0x0000000a /* EMC_DLL_XFORM_DQS13 */
+					0x0000000a /* EMC_DLL_XFORM_DQS14 */
+					0x0000000a /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000001 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000001 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS15 */
+					0x0000000e /* EMC_DLL_XFORM_DQ0 */
+					0x0000000e /* EMC_DLL_XFORM_DQ1 */
+					0x0000000e /* EMC_DLL_XFORM_DQ2 */
+					0x0000000e /* EMC_DLL_XFORM_DQ3 */
+					0x0000000e /* EMC_DLL_XFORM_DQ4 */
+					0x0000000e /* EMC_DLL_XFORM_DQ5 */
+					0x0000000e /* EMC_DLL_XFORM_DQ6 */
+					0x0000000e /* EMC_DLL_XFORM_DQ7 */
+					0x100002a0 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc085 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0606003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000000 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x00c8000e /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000042a0 /* EMC_CFG_PIPE */
+					0x80002062 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000b /* EMC_QPOP */
+				>;
+			};
+
+			timing-600000000 {
+				clock-frequency = <600000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000000>;
+				nvidia,emc-cfg = <0x73300000>;
+				nvidia,emc-cfg-2 = <0x0000089d>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x00100002>;
+				nvidia,emc-mode-2 = <0x00200010>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x00000b61>;
+				nvidia,emc-mrs-wait-cnt = <0x00b0000e>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040008>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0121113d>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x0000001b /* EMC_RC */
+					0x000000d0 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000013 /* EMC_RAS */
+					0x00000007 /* EMC_RP */
+					0x00000007 /* EMC_R2W */
+					0x0000000b /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x00000010 /* EMC_W2P */
+					0x00000007 /* EMC_RD_RCD */
+					0x00000007 /* EMC_WR_RCD */
+					0x00000002 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000005 /* EMC_WDV */
+					0x00000005 /* EMC_WDV_MASK */
+					0x0000000a /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000003 /* EMC_EINPUT */
+					0x0000000b /* EMC_EINPUT_DURATION */
+					0x00070000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000002 /* EMC_QRST */
+					0x00000012 /* EMC_QSAFE */
+					0x00000016 /* EMC_RDV */
+					0x00000018 /* EMC_RDV_MASK */
+					0x00001208 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000482 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x0000000d /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x000000cc /* EMC_AR2PDEN */
+					0x00000015 /* EMC_RW2PDEN */
+					0x000000d8 /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000015 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000006 /* EMC_TCLKSTABLE */
+					0x00000006 /* EMC_TCLKSTOP */
+					0x00001249 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0xe00e00b1 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x0000000a /* EMC_DLL_XFORM_DQS0 */
+					0x0000000a /* EMC_DLL_XFORM_DQS1 */
+					0x0000000a /* EMC_DLL_XFORM_DQS2 */
+					0x0000000a /* EMC_DLL_XFORM_DQS3 */
+					0x0000000a /* EMC_DLL_XFORM_DQS4 */
+					0x0000000a /* EMC_DLL_XFORM_DQS5 */
+					0x0000000a /* EMC_DLL_XFORM_DQS6 */
+					0x0000000a /* EMC_DLL_XFORM_DQS7 */
+					0x0000000a /* EMC_DLL_XFORM_DQS8 */
+					0x0000000a /* EMC_DLL_XFORM_DQS9 */
+					0x0000000a /* EMC_DLL_XFORM_DQS10 */
+					0x0000000a /* EMC_DLL_XFORM_DQS11 */
+					0x0000000a /* EMC_DLL_XFORM_DQS12 */
+					0x0000000a /* EMC_DLL_XFORM_DQS13 */
+					0x0000000a /* EMC_DLL_XFORM_DQS14 */
+					0x0000000a /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS15 */
+					0x0000000e /* EMC_DLL_XFORM_DQ0 */
+					0x0000000e /* EMC_DLL_XFORM_DQ1 */
+					0x0000000e /* EMC_DLL_XFORM_DQ2 */
+					0x0000000e /* EMC_DLL_XFORM_DQ3 */
+					0x0000000e /* EMC_DLL_XFORM_DQ4 */
+					0x0000000e /* EMC_DLL_XFORM_DQ5 */
+					0x0000000e /* EMC_DLL_XFORM_DQ6 */
+					0x0000000e /* EMC_DLL_XFORM_DQ7 */
+					0x100002a0 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc085 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0606003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000000 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x00b0000e /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000040a0 /* EMC_CFG_PIPE */
+					0x800024aa /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000e /* EMC_QPOP */
+				>;
+			};
+
+			timing-792000000 {
+				clock-frequency = <792000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000000>;
+				nvidia,emc-cfg = <0x73300000>;
+				nvidia,emc-cfg-2 = <0x0080089d>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x00100002>;
+				nvidia,emc-mode-2 = <0x00200418>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x00000d71>;
+				nvidia,emc-mrs-wait-cnt = <0x006f000e>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040000>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0120113d>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x00000024 /* EMC_RC */
+					0x00000114 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000019 /* EMC_RAS */
+					0x0000000a /* EMC_RP */
+					0x00000008 /* EMC_R2W */
+					0x0000000d /* EMC_W2R */
+					0x00000004 /* EMC_R2P */
+					0x00000013 /* EMC_W2P */
+					0x0000000a /* EMC_RD_RCD */
+					0x0000000a /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x0000000b /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000002 /* EMC_EINPUT */
+					0x0000000d /* EMC_EINPUT_DURATION */
+					0x00080000 /* EMC_PUTERM_EXTRA */
+					0x00000004 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000001 /* EMC_QRST */
+					0x00000014 /* EMC_QSAFE */
+					0x00000018 /* EMC_RDV */
+					0x0000001a /* EMC_RDV_MASK */
+					0x000017e2 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000005f8 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000003 /* EMC_PDEX2WR */
+					0x00000011 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x0000010d /* EMC_AR2PDEN */
+					0x00000018 /* EMC_RW2PDEN */
+					0x0000011e /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000005 /* EMC_TCKE */
+					0x00000006 /* EMC_TCKESR */
+					0x00000005 /* EMC_TPD */
+					0x0000001d /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000008 /* EMC_TCLKSTABLE */
+					0x00000008 /* EMC_TCLKSTOP */
+					0x00001822 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x80000005 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab198 /* EMC_FBIO_CFG5 */
+					0xe00700b1 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x007fc007 /* EMC_DLL_XFORM_DQS0 */
+					0x007fc008 /* EMC_DLL_XFORM_DQS1 */
+					0x007f400c /* EMC_DLL_XFORM_DQS2 */
+					0x007fc007 /* EMC_DLL_XFORM_DQS3 */
+					0x007f4006 /* EMC_DLL_XFORM_DQS4 */
+					0x007f8004 /* EMC_DLL_XFORM_DQS5 */
+					0x007f8005 /* EMC_DLL_XFORM_DQS6 */
+					0x007f8004 /* EMC_DLL_XFORM_DQS7 */
+					0x007fc007 /* EMC_DLL_XFORM_DQS8 */
+					0x007fc008 /* EMC_DLL_XFORM_DQS9 */
+					0x007f400c /* EMC_DLL_XFORM_DQS10 */
+					0x007fc007 /* EMC_DLL_XFORM_DQS11 */
+					0x007f4006 /* EMC_DLL_XFORM_DQS12 */
+					0x007f8004 /* EMC_DLL_XFORM_DQS13 */
+					0x007f8005 /* EMC_DLL_XFORM_DQS14 */
+					0x007f8004 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00034000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00034000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00034000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00034000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000007 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000007 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS15 */
+					0x0000000e /* EMC_DLL_XFORM_DQ0 */
+					0x0000000e /* EMC_DLL_XFORM_DQ1 */
+					0x0000000e /* EMC_DLL_XFORM_DQ2 */
+					0x0000000e /* EMC_DLL_XFORM_DQ3 */
+					0x0000000e /* EMC_DLL_XFORM_DQ4 */
+					0x0000000e /* EMC_DLL_XFORM_DQ5 */
+					0x0000000e /* EMC_DLL_XFORM_DQ6 */
+					0x0000000e /* EMC_DLL_XFORM_DQ7 */
+					0x100002a0 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc085 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x61861820 /* EMC_XM2DQSPADCTRL3 */
+					0x00492492 /* EMC_XM2DQSPADCTRL4 */
+					0x00492492 /* EMC_XM2DQSPADCTRL5 */
+					0x61861800 /* EMC_XM2DQSPADCTRL6 */
+					0x0606003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000000 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x006f000e /* EMC_MRS_WAIT_CNT2 */
+					0x00000007 /* EMC_CTT */
+					0x00000004 /* EMC_CTT_DURATION */
+					0x00004080 /* EMC_CFG_PIPE */
+					0x80003012 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000f /* EMC_QPOP */
+				>;
+			};
+		};
+
+		emc-timings-6 {
+			nvidia,ram-code = <6>;
+
+			timing-12750000 {
+				clock-frequency = <12750000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x000008c5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100003>;
+				nvidia,emc-mode-2 = <0x80200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000c000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00000000>;
+
+				nvidia,emc-configuration = <
+					0x00000000 /* EMC_RC */
+					0x00000003 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000000 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000060 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000018 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000007 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000005 /* EMC_TXSR */
+					0x00000005 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000000 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000064 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ0 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ1 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ2 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ3 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000007 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x800001c5 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-20400000 {
+				clock-frequency = <20400000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x000008c5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100003>;
+				nvidia,emc-mode-2 = <0x80200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000c000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00000000>;
+
+				nvidia,emc-configuration = <
+					0x00000000 /* EMC_RC */
+					0x00000005 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000000 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x0000009a /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000026 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000007 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000006 /* EMC_TXSR */
+					0x00000006 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000000 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x000000a0 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ0 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ1 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ2 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ3 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x0000000b /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x8000023a /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-40800000 {
+				clock-frequency = <40800000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x000008c5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100003>;
+				nvidia,emc-mode-2 = <0x80200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000c000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00000000>;
+
+				nvidia,emc-configuration = <
+					0x00000001 /* EMC_RC */
+					0x0000000a /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000001 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000134 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x0000004d /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000008 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x0000000c /* EMC_TXSR */
+					0x0000000c /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000000 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x0000013f /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ0 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ1 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ2 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ3 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000015 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x80000370 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-68000000 {
+				clock-frequency = <68000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x000008c5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100003>;
+				nvidia,emc-mode-2 = <0x80200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000c000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00000000>;
+
+				nvidia,emc-configuration = <
+					0x00000003 /* EMC_RC */
+					0x00000011 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000002 /* EMC_RAS */
+					0x00000000 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000000 /* EMC_RD_RCD */
+					0x00000000 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000202 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000080 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x0000000f /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000013 /* EMC_TXSR */
+					0x00000013 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000001 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000213 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ0 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ1 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ2 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ3 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000022 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x8000050e /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-102000000 {
+				clock-frequency = <102000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x000008c5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100003>;
+				nvidia,emc-mode-2 = <0x80200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000c000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00000000>;
+
+				nvidia,emc-configuration = <
+					0x00000004 /* EMC_RC */
+					0x0000001a /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000003 /* EMC_RAS */
+					0x00000001 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000001 /* EMC_RD_RCD */
+					0x00000001 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000005 /* EMC_EINPUT */
+					0x00000005 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000004 /* EMC_QRST */
+					0x0000000c /* EMC_QSAFE */
+					0x0000000d /* EMC_RDV */
+					0x0000000f /* EMC_RDV_MASK */
+					0x00000304 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000000c1 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000018 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x0000001c /* EMC_TXSR */
+					0x0000001c /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000003 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x0000031c /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ0 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ1 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ2 */
+					0x000fc000 /* EMC_DLL_XFORM_DQ3 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ4 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ5 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ6 */
+					0x0000fc00 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000033 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000042 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000f2f3 /* EMC_CFG_PIPE */
+					0x80000713 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-204000000 {
+				clock-frequency = <204000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000008>;
+				nvidia,emc-cfg = <0x73240000>;
+				nvidia,emc-cfg-2 = <0x0000088d>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100003>;
+				nvidia,emc-mode-2 = <0x80200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80001221>;
+				nvidia,emc-mrs-wait-cnt = <0x000c000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040008>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x00000009 /* EMC_RC */
+					0x00000035 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000007 /* EMC_RAS */
+					0x00000002 /* EMC_RP */
+					0x00000005 /* EMC_R2W */
+					0x0000000a /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x0000000b /* EMC_W2P */
+					0x00000002 /* EMC_RD_RCD */
+					0x00000002 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000003 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000005 /* EMC_WDV */
+					0x00000005 /* EMC_WDV_MASK */
+					0x00000006 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000004 /* EMC_EINPUT */
+					0x00000006 /* EMC_EINPUT_DURATION */
+					0x00010000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000003 /* EMC_QRST */
+					0x0000000d /* EMC_QSAFE */
+					0x0000000f /* EMC_RDV */
+					0x00000011 /* EMC_RDV_MASK */
+					0x00000607 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000181 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x00000002 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000032 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x00000038 /* EMC_TXSR */
+					0x00000038 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000007 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000638 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x106aa298 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00064000 /* EMC_DLL_XFORM_DQS0 */
+					0x00064000 /* EMC_DLL_XFORM_DQS1 */
+					0x00064000 /* EMC_DLL_XFORM_DQS2 */
+					0x00064000 /* EMC_DLL_XFORM_DQS3 */
+					0x00064000 /* EMC_DLL_XFORM_DQS4 */
+					0x00064000 /* EMC_DLL_XFORM_DQS5 */
+					0x00064000 /* EMC_DLL_XFORM_DQS6 */
+					0x00064000 /* EMC_DLL_XFORM_DQS7 */
+					0x00064000 /* EMC_DLL_XFORM_DQS8 */
+					0x00064000 /* EMC_DLL_XFORM_DQS9 */
+					0x00064000 /* EMC_DLL_XFORM_DQS10 */
+					0x00064000 /* EMC_DLL_XFORM_DQS11 */
+					0x00064000 /* EMC_DLL_XFORM_DQS12 */
+					0x00064000 /* EMC_DLL_XFORM_DQS13 */
+					0x00064000 /* EMC_DLL_XFORM_DQS14 */
+					0x00064000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00004000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00090000 /* EMC_DLL_XFORM_DQ0 */
+					0x00090000 /* EMC_DLL_XFORM_DQ1 */
+					0x00094000 /* EMC_DLL_XFORM_DQ2 */
+					0x00094000 /* EMC_DLL_XFORM_DQ3 */
+					0x00009400 /* EMC_DLL_XFORM_DQ4 */
+					0x00009000 /* EMC_DLL_XFORM_DQ5 */
+					0x00009000 /* EMC_DLL_XFORM_DQ6 */
+					0x00009000 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000303 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x0000003f /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451400 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000066 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x000c000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x0000d2b3 /* EMC_CFG_PIPE */
+					0x80000d22 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000a /* EMC_QPOP */
+				>;
+			};
+
+			timing-300000000 {
+				clock-frequency = <300000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000000>;
+				nvidia,emc-cfg = <0x73340000>;
+				nvidia,emc-cfg-2 = <0x000008d5>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100002>;
+				nvidia,emc-mode-2 = <0x80200000>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80000321>;
+				nvidia,emc-mrs-wait-cnt = <0x0174000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+				nvidia,emc-xm2dqspadctrl2 = <0x01231339>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x0000000d /* EMC_RC */
+					0x0000004c /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000009 /* EMC_RAS */
+					0x00000003 /* EMC_RP */
+					0x00000004 /* EMC_R2W */
+					0x00000008 /* EMC_W2R */
+					0x00000002 /* EMC_R2P */
+					0x00000009 /* EMC_W2P */
+					0x00000003 /* EMC_RD_RCD */
+					0x00000003 /* EMC_WR_RCD */
+					0x00000002 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000003 /* EMC_WDV */
+					0x00000003 /* EMC_WDV_MASK */
+					0x00000005 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000002 /* EMC_EINPUT */
+					0x00000007 /* EMC_EINPUT_DURATION */
+					0x00020000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000001 /* EMC_QRST */
+					0x0000000e /* EMC_QSAFE */
+					0x00000010 /* EMC_RDV */
+					0x00000012 /* EMC_RDV_MASK */
+					0x000008e4 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000239 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000001 /* EMC_PDEX2WR */
+					0x00000008 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x0000004a /* EMC_AR2PDEN */
+					0x0000000e /* EMC_RW2PDEN */
+					0x00000051 /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000009 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000924 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00030000 /* EMC_DLL_XFORM_DQS0 */
+					0x00030000 /* EMC_DLL_XFORM_DQS1 */
+					0x00030000 /* EMC_DLL_XFORM_DQS2 */
+					0x00030000 /* EMC_DLL_XFORM_DQS3 */
+					0x00030000 /* EMC_DLL_XFORM_DQS4 */
+					0x00030000 /* EMC_DLL_XFORM_DQS5 */
+					0x00030000 /* EMC_DLL_XFORM_DQS6 */
+					0x00030000 /* EMC_DLL_XFORM_DQS7 */
+					0x00030000 /* EMC_DLL_XFORM_DQS8 */
+					0x00030000 /* EMC_DLL_XFORM_DQS9 */
+					0x00030000 /* EMC_DLL_XFORM_DQS10 */
+					0x00030000 /* EMC_DLL_XFORM_DQS11 */
+					0x00030000 /* EMC_DLL_XFORM_DQS12 */
+					0x00030000 /* EMC_DLL_XFORM_DQS13 */
+					0x00030000 /* EMC_DLL_XFORM_DQS14 */
+					0x00030000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00098000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00060000 /* EMC_DLL_XFORM_DQ0 */
+					0x00060000 /* EMC_DLL_XFORM_DQ1 */
+					0x00060000 /* EMC_DLL_XFORM_DQ2 */
+					0x00060000 /* EMC_DLL_XFORM_DQ3 */
+					0x00006000 /* EMC_DLL_XFORM_DQ4 */
+					0x00006000 /* EMC_DLL_XFORM_DQ5 */
+					0x00006000 /* EMC_DLL_XFORM_DQ6 */
+					0x00006000 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000096 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x0174000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000052a3 /* EMC_CFG_PIPE */
+					0x800012d7 /* EMC_DYN_SELF_REF_CONTROL */
+					0x00000009 /* EMC_QPOP */
+				>;
+			};
+
+			timing-396000000 {
+				clock-frequency = <396000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000000>;
+				nvidia,emc-cfg = <0x73340000>;
+				nvidia,emc-cfg-2 = <0x00000895>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100002>;
+				nvidia,emc-mode-2 = <0x80200000>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80000521>;
+				nvidia,emc-mrs-wait-cnt = <0x015b000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040008>;
+				nvidia,emc-xm2dqspadctrl2 = <0x01231339>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x00000012 /* EMC_RC */
+					0x00000065 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x0000000c /* EMC_RAS */
+					0x00000004 /* EMC_RP */
+					0x00000005 /* EMC_R2W */
+					0x00000008 /* EMC_W2R */
+					0x00000002 /* EMC_R2P */
+					0x0000000a /* EMC_W2P */
+					0x00000004 /* EMC_RD_RCD */
+					0x00000004 /* EMC_WR_RCD */
+					0x00000002 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000003 /* EMC_WDV */
+					0x00000003 /* EMC_WDV_MASK */
+					0x00000005 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000001 /* EMC_EINPUT */
+					0x00000008 /* EMC_EINPUT_DURATION */
+					0x00020000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000000 /* EMC_QRST */
+					0x0000000f /* EMC_QSAFE */
+					0x00000010 /* EMC_RDV */
+					0x00000012 /* EMC_RDV_MASK */
+					0x00000bd1 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000002f4 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000001 /* EMC_PDEX2WR */
+					0x00000008 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000063 /* EMC_AR2PDEN */
+					0x0000000f /* EMC_RW2PDEN */
+					0x0000006b /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x0000000d /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000005 /* EMC_TCLKSTABLE */
+					0x00000005 /* EMC_TCLKSTOP */
+					0x00000c11 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0x002c00a0 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00030000 /* EMC_DLL_XFORM_DQS0 */
+					0x00030000 /* EMC_DLL_XFORM_DQS1 */
+					0x00030000 /* EMC_DLL_XFORM_DQS2 */
+					0x00030000 /* EMC_DLL_XFORM_DQS3 */
+					0x00030000 /* EMC_DLL_XFORM_DQS4 */
+					0x00030000 /* EMC_DLL_XFORM_DQS5 */
+					0x00030000 /* EMC_DLL_XFORM_DQS6 */
+					0x00030000 /* EMC_DLL_XFORM_DQS7 */
+					0x00030000 /* EMC_DLL_XFORM_DQS8 */
+					0x00030000 /* EMC_DLL_XFORM_DQS9 */
+					0x00030000 /* EMC_DLL_XFORM_DQS10 */
+					0x00030000 /* EMC_DLL_XFORM_DQS11 */
+					0x00030000 /* EMC_DLL_XFORM_DQS12 */
+					0x00030000 /* EMC_DLL_XFORM_DQS13 */
+					0x00030000 /* EMC_DLL_XFORM_DQS14 */
+					0x00030000 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00070000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS15 */
+					0x00048000 /* EMC_DLL_XFORM_DQ0 */
+					0x00048000 /* EMC_DLL_XFORM_DQ1 */
+					0x00048000 /* EMC_DLL_XFORM_DQ2 */
+					0x00048000 /* EMC_DLL_XFORM_DQ3 */
+					0x00004800 /* EMC_DLL_XFORM_DQ4 */
+					0x00004800 /* EMC_DLL_XFORM_DQ5 */
+					0x00004800 /* EMC_DLL_XFORM_DQ6 */
+					0x00004800 /* EMC_DLL_XFORM_DQ7 */
+					0x10000280 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc081 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0000003f /* EMC_DSR_VTTGEN_DRV */
+					0x000000c6 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x015b000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000052a3 /* EMC_CFG_PIPE */
+					0x8000188b /* EMC_DYN_SELF_REF_CONTROL */
+					0x00000009 /* EMC_QPOP */
+				>;
+			};
+
+			timing-528000000 {
+				clock-frequency = <528000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000000>;
+				nvidia,emc-cfg = <0x73300000>;
+				nvidia,emc-cfg-2 = <0x0000089d>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100002>;
+				nvidia,emc-mode-2 = <0x80200008>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80000941>;
+				nvidia,emc-mrs-wait-cnt = <0x013a000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040008>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0123133d>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x00000018 /* EMC_RC */
+					0x00000088 /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000011 /* EMC_RAS */
+					0x00000006 /* EMC_RP */
+					0x00000006 /* EMC_R2W */
+					0x00000009 /* EMC_W2R */
+					0x00000002 /* EMC_R2P */
+					0x0000000d /* EMC_W2P */
+					0x00000006 /* EMC_RD_RCD */
+					0x00000006 /* EMC_WR_RCD */
+					0x00000002 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000003 /* EMC_WDV */
+					0x00000003 /* EMC_WDV_MASK */
+					0x00000007 /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000002 /* EMC_EINPUT */
+					0x00000009 /* EMC_EINPUT_DURATION */
+					0x00040000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000001 /* EMC_QRST */
+					0x00000010 /* EMC_QSAFE */
+					0x00000013 /* EMC_RDV */
+					0x00000015 /* EMC_RDV_MASK */
+					0x00000fd6 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000003f5 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x0000000b /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000084 /* EMC_AR2PDEN */
+					0x00000012 /* EMC_RW2PDEN */
+					0x0000008f /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000013 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000006 /* EMC_TCLKSTABLE */
+					0x00000006 /* EMC_TCLKSTOP */
+					0x00001017 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0xe01200b1 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x0000000a /* EMC_DLL_XFORM_DQS0 */
+					0x0000000a /* EMC_DLL_XFORM_DQS1 */
+					0x0000000a /* EMC_DLL_XFORM_DQS2 */
+					0x0000000a /* EMC_DLL_XFORM_DQS3 */
+					0x0000000a /* EMC_DLL_XFORM_DQS4 */
+					0x0000000a /* EMC_DLL_XFORM_DQS5 */
+					0x0000000a /* EMC_DLL_XFORM_DQS6 */
+					0x0000000a /* EMC_DLL_XFORM_DQS7 */
+					0x0000000a /* EMC_DLL_XFORM_DQS8 */
+					0x0000000a /* EMC_DLL_XFORM_DQS9 */
+					0x0000000a /* EMC_DLL_XFORM_DQS10 */
+					0x0000000a /* EMC_DLL_XFORM_DQS11 */
+					0x0000000a /* EMC_DLL_XFORM_DQS12 */
+					0x0000000a /* EMC_DLL_XFORM_DQS13 */
+					0x0000000a /* EMC_DLL_XFORM_DQS14 */
+					0x0000000a /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00050000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000001 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000000 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000001 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS15 */
+					0x0000000e /* EMC_DLL_XFORM_DQ0 */
+					0x0000000e /* EMC_DLL_XFORM_DQ1 */
+					0x0000000e /* EMC_DLL_XFORM_DQ2 */
+					0x0000000e /* EMC_DLL_XFORM_DQ3 */
+					0x0000000e /* EMC_DLL_XFORM_DQ4 */
+					0x0000000e /* EMC_DLL_XFORM_DQ5 */
+					0x0000000e /* EMC_DLL_XFORM_DQ6 */
+					0x0000000e /* EMC_DLL_XFORM_DQ7 */
+					0x100002a0 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc085 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0606003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000000 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x013a000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000042a0 /* EMC_CFG_PIPE */
+					0x80002062 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000b /* EMC_QPOP */
+				>;
+			};
+
+			timing-600000000 {
+				clock-frequency = <600000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000000>;
+				nvidia,emc-cfg = <0x73300000>;
+				nvidia,emc-cfg-2 = <0x0000089d>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100002>;
+				nvidia,emc-mode-2 = <0x80200010>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80000b61>;
+				nvidia,emc-mrs-wait-cnt = <0x0128000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040008>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0121113d>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x0000001c /* EMC_RC */
+					0x0000009a /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x00000013 /* EMC_RAS */
+					0x00000007 /* EMC_RP */
+					0x00000007 /* EMC_R2W */
+					0x0000000b /* EMC_W2R */
+					0x00000003 /* EMC_R2P */
+					0x00000010 /* EMC_W2P */
+					0x00000007 /* EMC_RD_RCD */
+					0x00000007 /* EMC_WR_RCD */
+					0x00000003 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000005 /* EMC_WDV */
+					0x00000005 /* EMC_WDV_MASK */
+					0x0000000a /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000003 /* EMC_EINPUT */
+					0x0000000b /* EMC_EINPUT_DURATION */
+					0x00070000 /* EMC_PUTERM_EXTRA */
+					0x00000003 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000002 /* EMC_QRST */
+					0x00000012 /* EMC_QSAFE */
+					0x00000016 /* EMC_RDV */
+					0x00000018 /* EMC_RDV_MASK */
+					0x00001208 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x00000482 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000002 /* EMC_PDEX2WR */
+					0x0000000d /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x00000096 /* EMC_AR2PDEN */
+					0x00000015 /* EMC_RW2PDEN */
+					0x000000a2 /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000004 /* EMC_TCKE */
+					0x00000005 /* EMC_TCKESR */
+					0x00000004 /* EMC_TPD */
+					0x00000015 /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000006 /* EMC_TCLKSTABLE */
+					0x00000006 /* EMC_TCLKSTOP */
+					0x00001249 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x00000000 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab098 /* EMC_FBIO_CFG5 */
+					0xe00e00b1 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x0000000a /* EMC_DLL_XFORM_DQS0 */
+					0x0000000a /* EMC_DLL_XFORM_DQS1 */
+					0x0000000a /* EMC_DLL_XFORM_DQS2 */
+					0x0000000a /* EMC_DLL_XFORM_DQS3 */
+					0x0000000a /* EMC_DLL_XFORM_DQS4 */
+					0x0000000a /* EMC_DLL_XFORM_DQS5 */
+					0x0000000a /* EMC_DLL_XFORM_DQS6 */
+					0x0000000a /* EMC_DLL_XFORM_DQS7 */
+					0x0000000a /* EMC_DLL_XFORM_DQS8 */
+					0x0000000a /* EMC_DLL_XFORM_DQS9 */
+					0x0000000a /* EMC_DLL_XFORM_DQS10 */
+					0x0000000a /* EMC_DLL_XFORM_DQS11 */
+					0x0000000a /* EMC_DLL_XFORM_DQS12 */
+					0x0000000a /* EMC_DLL_XFORM_DQS13 */
+					0x0000000a /* EMC_DLL_XFORM_DQS14 */
+					0x0000000a /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR0 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR3 */
+					0x00048000 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000004 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000002 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000003 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000006 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS15 */
+					0x0000000e /* EMC_DLL_XFORM_DQ0 */
+					0x0000000e /* EMC_DLL_XFORM_DQ1 */
+					0x0000000e /* EMC_DLL_XFORM_DQ2 */
+					0x0000000e /* EMC_DLL_XFORM_DQ3 */
+					0x0000000e /* EMC_DLL_XFORM_DQ4 */
+					0x0000000e /* EMC_DLL_XFORM_DQ5 */
+					0x0000000e /* EMC_DLL_XFORM_DQ6 */
+					0x0000000e /* EMC_DLL_XFORM_DQ7 */
+					0x100002a0 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc085 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x51451420 /* EMC_XM2DQSPADCTRL3 */
+					0x00514514 /* EMC_XM2DQSPADCTRL4 */
+					0x00514514 /* EMC_XM2DQSPADCTRL5 */
+					0x51451400 /* EMC_XM2DQSPADCTRL6 */
+					0x0606003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000000 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x0128000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000000 /* EMC_CTT */
+					0x00000003 /* EMC_CTT_DURATION */
+					0x000040a0 /* EMC_CFG_PIPE */
+					0x800024aa /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000e /* EMC_QPOP */
 				>;
 			};
 
+			timing-792000000 {
+				clock-frequency = <792000000>;
+
+				nvidia,emc-auto-cal-config = <0xa1430000>;
+				nvidia,emc-auto-cal-config2 = <0x00000000>;
+				nvidia,emc-auto-cal-config3 = <0x00000000>;
+				nvidia,emc-auto-cal-interval = <0x001fffff>;
+				nvidia,emc-bgbias-ctl0 = <0x00000000>;
+				nvidia,emc-cfg = <0x73300000>;
+				nvidia,emc-cfg-2 = <0x0080089d>;
+				nvidia,emc-ctt-term-ctrl = <0x00000802>;
+				nvidia,emc-mode-1 = <0x80100002>;
+				nvidia,emc-mode-2 = <0x80200418>;
+				nvidia,emc-mode-4 = <0x00000000>;
+				nvidia,emc-mode-reset = <0x80000d71>;
+				nvidia,emc-mrs-wait-cnt = <0x00f8000c>;
+				nvidia,emc-sel-dpd-ctrl = <0x00040000>;
+				nvidia,emc-xm2dqspadctrl2 = <0x0120113d>;
+				nvidia,emc-zcal-cnt-long = <0x00000042>;
+				nvidia,emc-zcal-interval = <0x00020000>;
+
+				nvidia,emc-configuration = <
+					0x00000025 /* EMC_RC */
+					0x000000cc /* EMC_RFC */
+					0x00000000 /* EMC_RFC_SLR */
+					0x0000001a /* EMC_RAS */
+					0x00000009 /* EMC_RP */
+					0x00000008 /* EMC_R2W */
+					0x0000000d /* EMC_W2R */
+					0x00000004 /* EMC_R2P */
+					0x00000013 /* EMC_W2P */
+					0x00000009 /* EMC_RD_RCD */
+					0x00000009 /* EMC_WR_RCD */
+					0x00000004 /* EMC_RRD */
+					0x00000002 /* EMC_REXT */
+					0x00000000 /* EMC_WEXT */
+					0x00000006 /* EMC_WDV */
+					0x00000006 /* EMC_WDV_MASK */
+					0x0000000b /* EMC_QUSE */
+					0x00000002 /* EMC_QUSE_WIDTH */
+					0x00000000 /* EMC_IBDLY */
+					0x00000002 /* EMC_EINPUT */
+					0x0000000d /* EMC_EINPUT_DURATION */
+					0x00080000 /* EMC_PUTERM_EXTRA */
+					0x00000004 /* EMC_PUTERM_WIDTH */
+					0x00000000 /* EMC_PUTERM_ADJ */
+					0x00000000 /* EMC_CDB_CNTL_1 */
+					0x00000000 /* EMC_CDB_CNTL_2 */
+					0x00000000 /* EMC_CDB_CNTL_3 */
+					0x00000001 /* EMC_QRST */
+					0x00000014 /* EMC_QSAFE */
+					0x00000018 /* EMC_RDV */
+					0x0000001a /* EMC_RDV_MASK */
+					0x000017e2 /* EMC_REFRESH */
+					0x00000000 /* EMC_BURST_REFRESH_NUM */
+					0x000005f8 /* EMC_PRE_REFRESH_REQ_CNT */
+					0x00000003 /* EMC_PDEX2WR */
+					0x00000011 /* EMC_PDEX2RD */
+					0x00000001 /* EMC_PCHG2PDEN */
+					0x00000000 /* EMC_ACT2PDEN */
+					0x000000c6 /* EMC_AR2PDEN */
+					0x00000018 /* EMC_RW2PDEN */
+					0x000000d6 /* EMC_TXSR */
+					0x00000200 /* EMC_TXSRDLL */
+					0x00000005 /* EMC_TCKE */
+					0x00000006 /* EMC_TCKESR */
+					0x00000005 /* EMC_TPD */
+					0x0000001d /* EMC_TFAW */
+					0x00000000 /* EMC_TRPAB */
+					0x00000008 /* EMC_TCLKSTABLE */
+					0x00000008 /* EMC_TCLKSTOP */
+					0x00001822 /* EMC_TREFBW */
+					0x00000000 /* EMC_FBIO_CFG6 */
+					0x80000005 /* EMC_ODT_WRITE */
+					0x00000000 /* EMC_ODT_READ */
+					0x104ab198 /* EMC_FBIO_CFG5 */
+					0xe00700b1 /* EMC_CFG_DIG_DLL */
+					0x00008000 /* EMC_CFG_DIG_DLL_PERIOD */
+					0x00000009 /* EMC_DLL_XFORM_DQS0 */
+					0x00000009 /* EMC_DLL_XFORM_DQS1 */
+					0x00000009 /* EMC_DLL_XFORM_DQS2 */
+					0x00000007 /* EMC_DLL_XFORM_DQS3 */
+					0x00000006 /* EMC_DLL_XFORM_DQS4 */
+					0x00000006 /* EMC_DLL_XFORM_DQS5 */
+					0x007fc009 /* EMC_DLL_XFORM_DQS6 */
+					0x00000006 /* EMC_DLL_XFORM_DQS7 */
+					0x00000009 /* EMC_DLL_XFORM_DQS8 */
+					0x00000009 /* EMC_DLL_XFORM_DQS9 */
+					0x00000009 /* EMC_DLL_XFORM_DQS10 */
+					0x00000007 /* EMC_DLL_XFORM_DQS11 */
+					0x00000006 /* EMC_DLL_XFORM_DQS12 */
+					0x00000007 /* EMC_DLL_XFORM_DQS13 */
+					0x00000009 /* EMC_DLL_XFORM_DQS14 */
+					0x00000007 /* EMC_DLL_XFORM_DQS15 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE0 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE1 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE2 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE3 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE4 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE6 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE7 */
+					0x00034002 /* EMC_DLL_XFORM_ADDR0 */
+					0x00034002 /* EMC_DLL_XFORM_ADDR1 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR2 */
+					0x00034002 /* EMC_DLL_XFORM_ADDR3 */
+					0x00034002 /* EMC_DLL_XFORM_ADDR4 */
+					0x00000000 /* EMC_DLL_XFORM_ADDR5 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE8 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE9 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE10 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE11 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE12 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE13 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE14 */
+					0x00000000 /* EMC_DLL_XFORM_QUSE15 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS0 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS1 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS2 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS3 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS4 */
+					0x00000007 /* EMC_DLI_TRIM_TXDQS5 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS6 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS7 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS8 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS9 */
+					0x00000005 /* EMC_DLI_TRIM_TXDQS10 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS11 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS12 */
+					0x00000007 /* EMC_DLI_TRIM_TXDQS13 */
+					0x00000009 /* EMC_DLI_TRIM_TXDQS14 */
+					0x00000008 /* EMC_DLI_TRIM_TXDQS15 */
+					0x0000000e /* EMC_DLL_XFORM_DQ0 */
+					0x0000000e /* EMC_DLL_XFORM_DQ1 */
+					0x0000000e /* EMC_DLL_XFORM_DQ2 */
+					0x0000000e /* EMC_DLL_XFORM_DQ3 */
+					0x0000000e /* EMC_DLL_XFORM_DQ4 */
+					0x0000000e /* EMC_DLL_XFORM_DQ5 */
+					0x0000000e /* EMC_DLL_XFORM_DQ6 */
+					0x0000000e /* EMC_DLL_XFORM_DQ7 */
+					0x100002a0 /* EMC_XM2CMDPADCTRL */
+					0x00000000 /* EMC_XM2CMDPADCTRL4 */
+					0x00111111 /* EMC_XM2CMDPADCTRL5 */
+					0x00000000 /* EMC_XM2DQPADCTRL2 */
+					0x00000000 /* EMC_XM2DQPADCTRL3 */
+					0x77ffc085 /* EMC_XM2CLKPADCTRL */
+					0x00000101 /* EMC_XM2CLKPADCTRL2 */
+					0x81f1f108 /* EMC_XM2COMPPADCTRL */
+					0x07070004 /* EMC_XM2VTTGENPADCTRL */
+					0x00000000 /* EMC_XM2VTTGENPADCTRL2 */
+					0x016eeeee /* EMC_XM2VTTGENPADCTRL3 */
+					0x61861820 /* EMC_XM2DQSPADCTRL3 */
+					0x004d34d3 /* EMC_XM2DQSPADCTRL4 */
+					0x004d34d3 /* EMC_XM2DQSPADCTRL5 */
+					0x61861800 /* EMC_XM2DQSPADCTRL6 */
+					0x0606003f /* EMC_DSR_VTTGEN_DRV */
+					0x00000000 /* EMC_TXDSRVTTGEN */
+					0x00000000 /* EMC_FBIO_SPARE */
+					0x00000100 /* EMC_ZCAL_WAIT_CNT */
+					0x00f8000c /* EMC_MRS_WAIT_CNT2 */
+					0x00000007 /* EMC_CTT */
+					0x00000004 /* EMC_CTT_DURATION */
+					0x00004080 /* EMC_CFG_PIPE */
+					0x80003012 /* EMC_DYN_SELF_REF_CONTROL */
+					0x0000000f /* EMC_QPOP */
+				>;
+			};
 		};
 	};
 
@@ -1759,30 +5781,609 @@
 		emc-timings-1 {
 			nvidia,ram-code = <1>;
 
+			timing-12750000 {
+				clock-frequency = <12750000>;
+
+				nvidia,emem-configuration = <
+					0x40040001 /* MC_EMEM_ARB_CFG */
+					0x8000000a /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
+					0x77e30303 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-20400000 {
+				clock-frequency = <20400000>;
+
+				nvidia,emem-configuration = <
+					0x40020001 /* MC_EMEM_ARB_CFG */
+					0x80000012 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
+					0x76230303 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-40800000 {
+				clock-frequency = <40800000>;
+
+				nvidia,emem-configuration = <
+					0xa0000001 /* MC_EMEM_ARB_CFG */
+					0x80000017 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
+					0x74a30303 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-68000000 {
+				clock-frequency = <68000000>;
+
+				nvidia,emem-configuration = <
+					0x00000001 /* MC_EMEM_ARB_CFG */
+					0x8000001e /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
+					0x74230403 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-102000000 {
+				clock-frequency = <102000000>;
+
+				nvidia,emem-configuration = <
+					0x08000001 /* MC_EMEM_ARB_CFG */
+					0x80000026 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0403 /* MC_EMEM_ARB_DA_COVERS */
+					0x73c30504 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-204000000 {
+				clock-frequency = <204000000>;
+
+				nvidia,emem-configuration = <
+					0x01000003 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000005 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000004 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000004 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06040203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0405 /* MC_EMEM_ARB_DA_COVERS */
+					0x73840a06 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-300000000 {
+				clock-frequency = <300000000>;
+
+				nvidia,emem-configuration = <
+					0x08000004 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000007 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000004 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000005 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000007 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000004 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06040202 /* MC_EMEM_ARB_DA_TURNS */
+					0x000b0607 /* MC_EMEM_ARB_DA_COVERS */
+					0x77450e08 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-396000000 {
+				clock-frequency = <396000000>;
+
+				nvidia,emem-configuration = <
+					0x0f000005 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000009 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000005 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000007 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000004 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06040202 /* MC_EMEM_ARB_DA_TURNS */
+					0x000d0709 /* MC_EMEM_ARB_DA_COVERS */
+					0x7586120a /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-528000000 {
+				clock-frequency = <528000000>;
+
+				nvidia,emem-configuration = <
+					0x0f000007 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RP */
+					0x0000000d /* MC_EMEM_ARB_TIMING_RC */
+					0x00000008 /* MC_EMEM_ARB_TIMING_RAS */
+					0x0000000a /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000009 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000005 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06050202 /* MC_EMEM_ARB_DA_TURNS */
+					0x0010090d /* MC_EMEM_ARB_DA_COVERS */
+					0x7428180e /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-600000000 {
+				clock-frequency = <600000000>;
+
+				nvidia,emem-configuration = <
+					0x00000009 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000004 /* MC_EMEM_ARB_TIMING_RP */
+					0x0000000e /* MC_EMEM_ARB_TIMING_RC */
+					0x00000009 /* MC_EMEM_ARB_TIMING_RAS */
+					0x0000000b /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x0000000b /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000005 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000007 /* MC_EMEM_ARB_TIMING_W2R */
+					0x07050202 /* MC_EMEM_ARB_DA_TURNS */
+					0x00130b0e /* MC_EMEM_ARB_DA_COVERS */
+					0x73a91b0f /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-792000000 {
+				clock-frequency = <792000000>;
+
+				nvidia,emem-configuration = <
+					0x0e00000b /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000004 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000005 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000013 /* MC_EMEM_ARB_TIMING_RC */
+					0x0000000c /* MC_EMEM_ARB_TIMING_RAS */
+					0x0000000f /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x0000000c /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000008 /* MC_EMEM_ARB_TIMING_W2R */
+					0x08060202 /* MC_EMEM_ARB_DA_TURNS */
+					0x00160d13 /* MC_EMEM_ARB_DA_COVERS */
+					0x734c2414 /* MC_EMEM_ARB_MISC0 */
+					0x70000f02 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+		};
+
+		emc-timings-4 {
+			nvidia,ram-code = <4>;
+
+			timing-12750000 {
+				clock-frequency = <12750000>;
+
+				nvidia,emem-configuration = <
+					0x40040001 /* MC_EMEM_ARB_CFG */
+					0x8000000a /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0502 /* MC_EMEM_ARB_DA_COVERS */
+					0x77e30303 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-20400000 {
+				clock-frequency = <20400000>;
+
+				nvidia,emem-configuration = <
+					0x40020001 /* MC_EMEM_ARB_CFG */
+					0x80000012 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0502 /* MC_EMEM_ARB_DA_COVERS */
+					0x77430303 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-40800000 {
+				clock-frequency = <40800000>;
+
+				nvidia,emem-configuration = <
+					0xa0000001 /* MC_EMEM_ARB_CFG */
+					0x80000017 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0502 /* MC_EMEM_ARB_DA_COVERS */
+					0x75e30303 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-68000000 {
+				clock-frequency = <68000000>;
+
+				nvidia,emem-configuration = <
+					0x00000001 /* MC_EMEM_ARB_CFG */
+					0x8000001e /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0502 /* MC_EMEM_ARB_DA_COVERS */
+					0x75430403 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-102000000 {
+				clock-frequency = <102000000>;
+
+				nvidia,emem-configuration = <
+					0x08000001 /* MC_EMEM_ARB_CFG */
+					0x80000026 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0503 /* MC_EMEM_ARB_DA_COVERS */
+					0x74e30504 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-204000000 {
+				clock-frequency = <204000000>;
+
+				nvidia,emem-configuration = <
+					0x01000003 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000004 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000004 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000004 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06040203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0504 /* MC_EMEM_ARB_DA_COVERS */
+					0x74a40a05 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-300000000 {
+				clock-frequency = <300000000>;
+
+				nvidia,emem-configuration = <
+					0x08000004 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000007 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000004 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000005 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000007 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000004 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06040202 /* MC_EMEM_ARB_DA_TURNS */
+					0x000b0607 /* MC_EMEM_ARB_DA_COVERS */
+					0x77450e08 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-396000000 {
+				clock-frequency = <396000000>;
+
+				nvidia,emem-configuration = <
+					0x0f000005 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000009 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000005 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000007 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000004 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06040202 /* MC_EMEM_ARB_DA_TURNS */
+					0x000d0709 /* MC_EMEM_ARB_DA_COVERS */
+					0x7586120a /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-528000000 {
+				clock-frequency = <528000000>;
+
+				nvidia,emem-configuration = <
+					0x0f000007 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RP */
+					0x0000000c /* MC_EMEM_ARB_TIMING_RC */
+					0x00000007 /* MC_EMEM_ARB_TIMING_RAS */
+					0x0000000a /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000009 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000005 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06050202 /* MC_EMEM_ARB_DA_TURNS */
+					0x0010090c /* MC_EMEM_ARB_DA_COVERS */
+					0x7488180d /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-600000000 {
+				clock-frequency = <600000000>;
+
+				nvidia,emem-configuration = <
+					0x00000009 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000004 /* MC_EMEM_ARB_TIMING_RP */
+					0x0000000e /* MC_EMEM_ARB_TIMING_RC */
+					0x00000009 /* MC_EMEM_ARB_TIMING_RAS */
+					0x0000000b /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x0000000b /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000005 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000007 /* MC_EMEM_ARB_TIMING_W2R */
+					0x07050202 /* MC_EMEM_ARB_DA_TURNS */
+					0x00130b0e /* MC_EMEM_ARB_DA_COVERS */
+					0x74691b0f /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-792000000 {
+				clock-frequency = <792000000>;
+
+				nvidia,emem-configuration = <
+					0x0e00000b /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000004 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000005 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000013 /* MC_EMEM_ARB_TIMING_RC */
+					0x0000000c /* MC_EMEM_ARB_TIMING_RAS */
+					0x0000000f /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x0000000c /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000008 /* MC_EMEM_ARB_TIMING_W2R */
+					0x08060202 /* MC_EMEM_ARB_DA_TURNS */
+					0x00170e13 /* MC_EMEM_ARB_DA_COVERS */
+					0x746c2414 /* MC_EMEM_ARB_MISC0 */
+					0x70000f02 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+		};
+
+		emc-timings-6 {
+			nvidia,ram-code = <6>;
 
 			timing-12750000 {
 				clock-frequency = <12750000>;
 
 				nvidia,emem-configuration = <
-					0x40040001
-					0x8000000a
-					0x00000001
-					0x00000001
-					0x00000002
-					0x00000000
-					0x00000002
-					0x00000001
-					0x00000002
-					0x00000008
-					0x00000003
-					0x00000002
-					0x00000003
-					0x00000006
-					0x06030203
-					0x000a0402
-					0x77e30303
-					0x70000f03
-					0x001f0000
+					0x40040001 /* MC_EMEM_ARB_CFG */
+					0x8000000a /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
+					0x77e30303 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
 				>;
 			};
 
@@ -1790,25 +6391,25 @@
 				clock-frequency = <20400000>;
 
 				nvidia,emem-configuration = <
-					0x40020001
-					0x80000012
-					0x00000001
-					0x00000001
-					0x00000002
-					0x00000000
-					0x00000002
-					0x00000001
-					0x00000002
-					0x00000008
-					0x00000003
-					0x00000002
-					0x00000003
-					0x00000006
-					0x06030203
-					0x000a0402
-					0x76230303
-					0x70000f03
-					0x001f0000
+					0x40020001 /* MC_EMEM_ARB_CFG */
+					0x80000012 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
+					0x76230303 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
 				>;
 			};
 
@@ -1816,25 +6417,25 @@
 				clock-frequency = <40800000>;
 
 				nvidia,emem-configuration = <
-					0xa0000001
-					0x80000017
-					0x00000001
-					0x00000001
-					0x00000002
-					0x00000000
-					0x00000002
-					0x00000001
-					0x00000002
-					0x00000008
-					0x00000003
-					0x00000002
-					0x00000003
-					0x00000006
-					0x06030203
-					0x000a0402
-					0x74a30303
-					0x70000f03
-					0x001f0000
+					0xa0000001 /* MC_EMEM_ARB_CFG */
+					0x80000017 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
+					0x74a30303 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
 				>;
 			};
 
@@ -1842,25 +6443,25 @@
 				clock-frequency = <68000000>;
 
 				nvidia,emem-configuration = <
-					0x00000001
-					0x8000001e
-					0x00000001
-					0x00000001
-					0x00000002
-					0x00000000
-					0x00000002
-					0x00000001
-					0x00000002
-					0x00000008
-					0x00000003
-					0x00000002
-					0x00000003
-					0x00000006
-					0x06030203
-					0x000a0402
-					0x74230403
-					0x70000f03
-					0x001f0000
+					0x00000001 /* MC_EMEM_ARB_CFG */
+					0x8000001e /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0402 /* MC_EMEM_ARB_DA_COVERS */
+					0x74230403 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
 				>;
 			};
 
@@ -1868,25 +6469,25 @@
 				clock-frequency = <102000000>;
 
 				nvidia,emem-configuration = <
-					0x08000001
-					0x80000026
-					0x00000001
-					0x00000001
-					0x00000003
-					0x00000000
-					0x00000002
-					0x00000001
-					0x00000002
-					0x00000008
-					0x00000003
-					0x00000002
-					0x00000003
-					0x00000006
-					0x06030203
-					0x000a0403
-					0x73c30504
-					0x70000f03
-					0x001f0000
+					0x08000001 /* MC_EMEM_ARB_CFG */
+					0x80000026 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000000 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000002 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06030203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0403 /* MC_EMEM_ARB_DA_COVERS */
+					0x73c30504 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
 				>;
 			};
 
@@ -1894,25 +6495,25 @@
 				clock-frequency = <204000000>;
 
 				nvidia,emem-configuration = <
-					0x01000003
-					0x80000040
-					0x00000001
-					0x00000001
-					0x00000005
-					0x00000002
-					0x00000004
-					0x00000001
-					0x00000002
-					0x00000008
-					0x00000003
-					0x00000002
-					0x00000004
-					0x00000006
-					0x06040203
-					0x000a0405
-					0x73840a06
-					0x70000f03
-					0x001f0000
+					0x01000003 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000005 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000004 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000003 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000004 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06040203 /* MC_EMEM_ARB_DA_TURNS */
+					0x000a0405 /* MC_EMEM_ARB_DA_COVERS */
+					0x73840a06 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
 				>;
 			};
 
@@ -1920,25 +6521,25 @@
 				clock-frequency = <300000000>;
 
 				nvidia,emem-configuration = <
-					0x08000004
-					0x80000040
-					0x00000001
-					0x00000002
-					0x00000007
-					0x00000004
-					0x00000005
-					0x00000001
-					0x00000002
-					0x00000007
-					0x00000002
-					0x00000002
-					0x00000004
-					0x00000006
-					0x06040202
-					0x000b0607
-					0x77450e08
-					0x70000f03
-					0x001f0000
+					0x08000004 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000007 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000004 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000005 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000007 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000004 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06040202 /* MC_EMEM_ARB_DA_TURNS */
+					0x000b0607 /* MC_EMEM_ARB_DA_COVERS */
+					0x77450e08 /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
 				>;
 			};
 
@@ -1946,25 +6547,51 @@
 				clock-frequency = <396000000>;
 
 				nvidia,emem-configuration = <
-					0x0f000005
-					0x80000040
-					0x00000001
-					0x00000002
-					0x00000009
-					0x00000005
-					0x00000007
-					0x00000001
-					0x00000002
-					0x00000008
-					0x00000002
-					0x00000002
-					0x00000004
-					0x00000006
-					0x06040202
-					0x000d0709
-					0x7586120a
-					0x70000f03
-					0x001f0000
+					0x0f000005 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000009 /* MC_EMEM_ARB_TIMING_RC */
+					0x00000005 /* MC_EMEM_ARB_TIMING_RAS */
+					0x00000007 /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000008 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000004 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06040202 /* MC_EMEM_ARB_DA_TURNS */
+					0x000d0709 /* MC_EMEM_ARB_DA_COVERS */
+					0x7586120a /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
+				>;
+			};
+
+			timing-528000000 {
+				clock-frequency = <528000000>;
+
+				nvidia,emem-configuration = <
+					0x0f000007 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RP */
+					0x0000000d /* MC_EMEM_ARB_TIMING_RC */
+					0x00000008 /* MC_EMEM_ARB_TIMING_RAS */
+					0x0000000a /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000001 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x00000009 /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000005 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_W2R */
+					0x06050202 /* MC_EMEM_ARB_DA_TURNS */
+					0x0010090d /* MC_EMEM_ARB_DA_COVERS */
+					0x7428180e /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
 				>;
 			};
 
@@ -1972,25 +6599,25 @@
 				clock-frequency = <600000000>;
 
 				nvidia,emem-configuration = <
-					0x00000009
-					0x80000040
-					0x00000003
-					0x00000004
-					0x0000000e
-					0x00000009
-					0x0000000b
-					0x00000001
-					0x00000003
-					0x0000000b
-					0x00000002
-					0x00000002
-					0x00000005
-					0x00000007
-					0x07050202
-					0x00130b0e
-					0x73a91b0f
-					0x70000f03
-					0x001f0000
+					0x00000009 /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000004 /* MC_EMEM_ARB_TIMING_RP */
+					0x0000000e /* MC_EMEM_ARB_TIMING_RC */
+					0x00000009 /* MC_EMEM_ARB_TIMING_RAS */
+					0x0000000b /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x0000000b /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000005 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000007 /* MC_EMEM_ARB_TIMING_W2R */
+					0x07050202 /* MC_EMEM_ARB_DA_TURNS */
+					0x00130b0e /* MC_EMEM_ARB_DA_COVERS */
+					0x73a91b0f /* MC_EMEM_ARB_MISC0 */
+					0x70000f03 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
 				>;
 			};
 
@@ -1998,25 +6625,25 @@
 				clock-frequency = <792000000>;
 
 				nvidia,emem-configuration = <
-					0x0e00000b
-					0x80000040
-					0x00000004
-					0x00000005
-					0x00000013
-					0x0000000c
-					0x0000000f
-					0x00000002
-					0x00000003
-					0x0000000c
-					0x00000002
-					0x00000002
-					0x00000006
-					0x00000008
-					0x08060202
-					0x00160d13
-					0x734c2414
-					0x70000f02
-					0x001f0000
+					0x0e00000b /* MC_EMEM_ARB_CFG */
+					0x80000040 /* MC_EMEM_ARB_OUTSTANDING_REQ */
+					0x00000004 /* MC_EMEM_ARB_TIMING_RCD */
+					0x00000005 /* MC_EMEM_ARB_TIMING_RP */
+					0x00000013 /* MC_EMEM_ARB_TIMING_RC */
+					0x0000000c /* MC_EMEM_ARB_TIMING_RAS */
+					0x0000000f /* MC_EMEM_ARB_TIMING_FAW */
+					0x00000002 /* MC_EMEM_ARB_TIMING_RRD */
+					0x00000003 /* MC_EMEM_ARB_TIMING_RAP2PRE */
+					0x0000000c /* MC_EMEM_ARB_TIMING_WAP2PRE */
+					0x00000002 /* MC_EMEM_ARB_TIMING_R2R */
+					0x00000002 /* MC_EMEM_ARB_TIMING_W2W */
+					0x00000006 /* MC_EMEM_ARB_TIMING_R2W */
+					0x00000008 /* MC_EMEM_ARB_TIMING_W2R */
+					0x08060202 /* MC_EMEM_ARB_DA_TURNS */
+					0x00160d13 /* MC_EMEM_ARB_DA_COVERS */
+					0x734c2414 /* MC_EMEM_ARB_MISC0 */
+					0x70000f02 /* MC_EMEM_ARB_MISC1 */
+					0x001f0000 /* MC_EMEM_ARB_RING1_THROTTLE */
 				>;
 			};
 		};
diff --git a/arch/arm/boot/dts/tegra124-venice2.dts b/arch/arm/boot/dts/tegra124-venice2.dts
index 5d5e6e18bc7b..7309393bfced 100644
--- a/arch/arm/boot/dts/tegra124-venice2.dts
+++ b/arch/arm/boot/dts/tegra124-venice2.dts
@@ -38,6 +38,9 @@
 		sor@54540000 {
 			status = "okay";
 
+			avdd-io-hdmi-dp-supply = <&vdd_1v05_run>;
+			vdd-hdmi-dp-pll-supply = <&vdd_3v3_run>;
+
 			nvidia,dpaux = <&dpaux>;
 			nvidia,panel = <&panel>;
 		};
diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index b113e47b2b2a..413bfb981de8 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -157,10 +157,11 @@
 			reg = <0x0 0x54540000 0x0 0x00040000>;
 			interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&tegra_car TEGRA124_CLK_SOR0>,
+				 <&tegra_car TEGRA124_CLK_SOR0_OUT>,
 				 <&tegra_car TEGRA124_CLK_PLL_D_OUT0>,
 				 <&tegra_car TEGRA124_CLK_PLL_DP>,
 				 <&tegra_car TEGRA124_CLK_CLK_M>;
-			clock-names = "sor", "parent", "dp", "safe";
+			clock-names = "sor", "out", "parent", "dp", "safe";
 			resets = <&tegra_car 182>;
 			reset-names = "sor";
 			status = "disabled";
diff --git a/arch/arm/boot/dts/tegra20-cpu-opp-microvolt.dtsi b/arch/arm/boot/dts/tegra20-cpu-opp-microvolt.dtsi
new file mode 100644
index 000000000000..e85ffdbef876
--- /dev/null
+++ b/arch/arm/boot/dts/tegra20-cpu-opp-microvolt.dtsi
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/ {
+	cpu0_opp_table: cpu_opp_table0 {
+		opp@216000000_750 {
+			opp-microvolt = <750000 750000 1125000>;
+		};
+
+		opp@216000000_800 {
+			opp-microvolt = <800000 800000 1125000>;
+		};
+
+		opp@312000000_750 {
+			opp-microvolt = <750000 750000 1125000>;
+		};
+
+		opp@312000000_800 {
+			opp-microvolt = <800000 800000 1125000>;
+		};
+
+		opp@456000000_750 {
+			opp-microvolt = <750000 750000 1125000>;
+		};
+
+		opp@456000000_800 {
+			opp-microvolt = <800000 800000 1125000>;
+		};
+
+		opp@456000000_800_2_2 {
+			opp-microvolt = <800000 800000 1125000>;
+		};
+
+		opp@456000000_800_3_2 {
+			opp-microvolt = <800000 800000 1125000>;
+		};
+
+		opp@456000000_825 {
+			opp-microvolt = <825000 825000 1125000>;
+		};
+
+		opp@608000000_750 {
+			opp-microvolt = <750000 750000 1125000>;
+		};
+
+		opp@608000000_800 {
+			opp-microvolt = <800000 800000 1125000>;
+		};
+
+		opp@608000000_800_3_2 {
+			opp-microvolt = <800000 800000 1125000>;
+		};
+
+		opp@608000000_825 {
+			opp-microvolt = <825000 825000 1125000>;
+		};
+
+		opp@608000000_850 {
+			opp-microvolt = <850000 850000 1125000>;
+		};
+
+		opp@608000000_900 {
+			opp-microvolt = <900000 900000 1125000>;
+		};
+
+		opp@760000000_775 {
+			opp-microvolt = <775000 775000 1125000>;
+		};
+
+		opp@760000000_800 {
+			opp-microvolt = <800000 800000 1125000>;
+		};
+
+		opp@760000000_850 {
+			opp-microvolt = <850000 850000 1125000>;
+		};
+
+		opp@760000000_875 {
+			opp-microvolt = <875000 875000 1125000>;
+		};
+
+		opp@760000000_875_1_1 {
+			opp-microvolt = <875000 875000 1125000>;
+		};
+
+		opp@760000000_875_0_2 {
+			opp-microvolt = <875000 875000 1125000>;
+		};
+
+		opp@760000000_875_1_2 {
+			opp-microvolt = <875000 875000 1125000>;
+		};
+
+		opp@760000000_900 {
+			opp-microvolt = <900000 900000 1125000>;
+		};
+
+		opp@760000000_975 {
+			opp-microvolt = <975000 975000 1125000>;
+		};
+
+		opp@816000000_800 {
+			opp-microvolt = <800000 800000 1125000>;
+		};
+
+		opp@816000000_850 {
+			opp-microvolt = <850000 850000 1125000>;
+		};
+
+		opp@816000000_875 {
+			opp-microvolt = <875000 875000 1125000>;
+		};
+
+		opp@816000000_950 {
+			opp-microvolt = <950000 950000 1125000>;
+		};
+
+		opp@816000000_1000 {
+			opp-microvolt = <1000000 1000000 1125000>;
+		};
+
+		opp@912000000_850 {
+			opp-microvolt = <850000 850000 1125000>;
+		};
+
+		opp@912000000_900 {
+			opp-microvolt = <900000 900000 1125000>;
+		};
+
+		opp@912000000_925 {
+			opp-microvolt = <925000 925000 1125000>;
+		};
+
+		opp@912000000_950 {
+			opp-microvolt = <950000 950000 1125000>;
+		};
+
+		opp@912000000_950_0_2 {
+			opp-microvolt = <950000 950000 1125000>;
+		};
+
+		opp@912000000_950_2_2 {
+			opp-microvolt = <950000 950000 1125000>;
+		};
+
+		opp@912000000_1000 {
+			opp-microvolt = <1000000 1000000 1125000>;
+		};
+
+		opp@912000000_1050 {
+			opp-microvolt = <1050000 1050000 1125000>;
+		};
+
+		opp@1000000000_875 {
+			opp-microvolt = <875000 875000 1125000>;
+		};
+
+		opp@1000000000_900 {
+			opp-microvolt = <900000 900000 1125000>;
+		};
+
+		opp@1000000000_950 {
+			opp-microvolt = <950000 950000 1125000>;
+		};
+
+		opp@1000000000_975 {
+			opp-microvolt = <975000 975000 1125000>;
+		};
+
+		opp@1000000000_1000 {
+			opp-microvolt = <1000000 1000000 1125000>;
+		};
+
+		opp@1000000000_1000_0_2 {
+			opp-microvolt = <1000000 1000000 1125000>;
+		};
+
+		opp@1000000000_1025 {
+			opp-microvolt = <1025000 1025000 1125000>;
+		};
+
+		opp@1000000000_1100 {
+			opp-microvolt = <1100000 1100000 1125000>;
+		};
+
+		opp@1200000000_1000 {
+			opp-microvolt = <1000000 1000000 1125000>;
+		};
+
+		opp@1200000000_1050 {
+			opp-microvolt = <1050000 1050000 1125000>;
+		};
+
+		opp@1200000000_1100 {
+			opp-microvolt = <1100000 1100000 1125000>;
+		};
+
+		opp@1200000000_1125 {
+			opp-microvolt = <1125000 1125000 1125000>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/tegra20-cpu-opp.dtsi b/arch/arm/boot/dts/tegra20-cpu-opp.dtsi
new file mode 100644
index 000000000000..c878f4231791
--- /dev/null
+++ b/arch/arm/boot/dts/tegra20-cpu-opp.dtsi
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/ {
+	cpu0_opp_table: cpu_opp_table0 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@216000000_750 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x0F 0x0003>;
+			opp-hz = /bits/ 64 <216000000>;
+		};
+
+		opp@216000000_800 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x0F 0x0004>;
+			opp-hz = /bits/ 64 <216000000>;
+		};
+
+		opp@312000000_750 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x0F 0x0003>;
+			opp-hz = /bits/ 64 <312000000>;
+		};
+
+		opp@312000000_800 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x0F 0x0004>;
+			opp-hz = /bits/ 64 <312000000>;
+		};
+
+		opp@456000000_750 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x0C 0x0003>;
+			opp-hz = /bits/ 64 <456000000>;
+		};
+
+		opp@456000000_800 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x03 0x0006>;
+			opp-hz = /bits/ 64 <456000000>;
+		};
+
+		opp@456000000_800_2_2 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0004>;
+			opp-hz = /bits/ 64 <456000000>;
+		};
+
+		opp@456000000_800_3_2 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x08 0x0004>;
+			opp-hz = /bits/ 64 <456000000>;
+		};
+
+		opp@456000000_825 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x03 0x0001>;
+			opp-hz = /bits/ 64 <456000000>;
+		};
+
+		opp@608000000_750 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x08 0x0003>;
+			opp-hz = /bits/ 64 <608000000>;
+		};
+
+		opp@608000000_800 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0006>;
+			opp-hz = /bits/ 64 <608000000>;
+		};
+
+		opp@608000000_800_3_2 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x08 0x0004>;
+			opp-hz = /bits/ 64 <608000000>;
+		};
+
+		opp@608000000_825 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0001>;
+			opp-hz = /bits/ 64 <608000000>;
+		};
+
+		opp@608000000_850 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x03 0x0006>;
+			opp-hz = /bits/ 64 <608000000>;
+		};
+
+		opp@608000000_900 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x03 0x0001>;
+			opp-hz = /bits/ 64 <608000000>;
+		};
+
+		opp@760000000_775 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x08 0x0003>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_800 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x08 0x0004>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_850 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0006>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_875 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0001>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_875_1_1 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x02 0x0002>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_875_0_2 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x01 0x0004>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_875_1_2 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x02 0x0004>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x01 0x0002>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_975 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x03 0x0001>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@816000000_800 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x08 0x0007>;
+			opp-hz = /bits/ 64 <816000000>;
+		};
+
+		opp@816000000_850 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0002>;
+			opp-hz = /bits/ 64 <816000000>;
+		};
+
+		opp@816000000_875 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0005>;
+			opp-hz = /bits/ 64 <816000000>;
+		};
+
+		opp@816000000_950 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x03 0x0006>;
+			opp-hz = /bits/ 64 <816000000>;
+		};
+
+		opp@816000000_1000 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x03 0x0001>;
+			opp-hz = /bits/ 64 <816000000>;
+		};
+
+		opp@912000000_850 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x08 0x0007>;
+			opp-hz = /bits/ 64 <912000000>;
+		};
+
+		opp@912000000_900 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0002>;
+			opp-hz = /bits/ 64 <912000000>;
+		};
+
+		opp@912000000_925 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0001>;
+			opp-hz = /bits/ 64 <912000000>;
+		};
+
+		opp@912000000_950 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x02 0x0006>;
+			opp-hz = /bits/ 64 <912000000>;
+		};
+
+		opp@912000000_950_0_2 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x01 0x0004>;
+			opp-hz = /bits/ 64 <912000000>;
+		};
+
+		opp@912000000_950_2_2 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0004>;
+			opp-hz = /bits/ 64 <912000000>;
+		};
+
+		opp@912000000_1000 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x01 0x0002>;
+			opp-hz = /bits/ 64 <912000000>;
+		};
+
+		opp@912000000_1050 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x03 0x0001>;
+			opp-hz = /bits/ 64 <912000000>;
+		};
+
+		opp@1000000000_875 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x08 0x0007>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_900 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0002>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_950 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0004>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0001>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_1000 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x02 0x0006>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_1000_0_2 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x01 0x0004>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_1025 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x01 0x0002>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_1100 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x03 0x0001>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1200000000_1000 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x08 0x0004>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1050 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x04 0x0004>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1100 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x02 0x0004>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1125 {
+			clock-latency-ns = <400000>;
+			opp-supported-hw = <0x01 0x0004>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/tegra20-paz00.dts b/arch/arm/boot/dts/tegra20-paz00.dts
index 8861e0976e37..85fce5bc72d6 100644
--- a/arch/arm/boot/dts/tegra20-paz00.dts
+++ b/arch/arm/boot/dts/tegra20-paz00.dts
@@ -3,6 +3,8 @@
 
 #include <dt-bindings/input/input.h>
 #include "tegra20.dtsi"
+#include "tegra20-cpu-opp.dtsi"
+#include "tegra20-cpu-opp-microvolt.dtsi"
 
 / {
 	model = "Toshiba AC100 / Dynabook AZ";
@@ -337,18 +339,26 @@
 					regulator-always-on;
 				};
 
-				sm0 {
+				core_vdd_reg: sm0 {
 					regulator-name = "+1.2vs_sm0,vdd_core";
 					regulator-min-microvolt = <1200000>;
-					regulator-max-microvolt = <1200000>;
+					regulator-max-microvolt = <1225000>;
+					regulator-coupled-with = <&rtc_vdd_reg &cpu_vdd_reg>;
+					regulator-coupled-max-spread = <170000 450000>;
 					regulator-always-on;
+
+					nvidia,tegra-core-regulator;
 				};
 
-				sm1 {
+				cpu_vdd_reg: sm1 {
 					regulator-name = "+1.0vs_sm1,vdd_cpu";
-					regulator-min-microvolt = <1000000>;
-					regulator-max-microvolt = <1000000>;
+					regulator-min-microvolt = <750000>;
+					regulator-max-microvolt = <1100000>;
+					regulator-coupled-with = <&core_vdd_reg &rtc_vdd_reg>;
+					regulator-coupled-max-spread = <450000 450000>;
 					regulator-always-on;
+
+					nvidia,tegra-cpu-regulator;
 				};
 
 				sm2_reg: sm2 {
@@ -367,10 +377,15 @@
 					regulator-always-on;
 				};
 
-				ldo2 {
+				rtc_vdd_reg: ldo2 {
 					regulator-name = "+1.2vs_ldo2,vdd_rtc";
 					regulator-min-microvolt = <1200000>;
-					regulator-max-microvolt = <1200000>;
+					regulator-max-microvolt = <1225000>;
+					regulator-coupled-with = <&core_vdd_reg &cpu_vdd_reg>;
+					regulator-coupled-max-spread = <170000 450000>;
+					regulator-always-on;
+
+					nvidia,tegra-rtc-regulator;
 				};
 
 				ldo3 {
@@ -603,4 +618,16 @@
 			 <&tegra_car TEGRA20_CLK_CDEV1>;
 		clock-names = "pll_a", "pll_a_out0", "mclk";
 	};
+
+	cpus {
+		cpu0: cpu@0 {
+			cpu-supply = <&cpu_vdd_reg>;
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+
+		cpu@1 {
+			cpu-supply = <&cpu_vdd_reg>;
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+	};
 };
diff --git a/arch/arm/boot/dts/tegra20-trimslice.dts b/arch/arm/boot/dts/tegra20-trimslice.dts
index 3e5ac096d85e..8debd3d3c20d 100644
--- a/arch/arm/boot/dts/tegra20-trimslice.dts
+++ b/arch/arm/boot/dts/tegra20-trimslice.dts
@@ -3,6 +3,7 @@
 
 #include <dt-bindings/input/input.h>
 #include "tegra20.dtsi"
+#include "tegra20-cpu-opp.dtsi"
 
 / {
 	model = "Compulab TrimSlice board";
@@ -471,4 +472,14 @@
 			 <&tegra_car TEGRA20_CLK_CDEV1>;
 		clock-names = "pll_a", "pll_a_out0", "mclk";
 	};
+
+	cpus {
+		cpu0: cpu@0 {
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+
+		cpu@1 {
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+	};
 };
diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index 8c942e60703e..9c58e7fcf5c0 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -851,12 +851,14 @@
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0>;
+			clocks = <&tegra_car TEGRA20_CLK_CCLK>;
 		};
 
 		cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <1>;
+			clocks = <&tegra_car TEGRA20_CLK_CCLK>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/tegra30-apalis-v1.1.dtsi b/arch/arm/boot/dts/tegra30-apalis-v1.1.dtsi
index 02f8126481a2..8b7a827d604d 100644
--- a/arch/arm/boot/dts/tegra30-apalis-v1.1.dtsi
+++ b/arch/arm/boot/dts/tegra30-apalis-v1.1.dtsi
@@ -994,11 +994,17 @@
 			id = <0>;
 			blocks = <0x5>;
 			irq-trigger = <0x1>;
+			/* 3.25 MHz ADC clock speed */
+			st,adc-freq = <1>;
+			/* 12-bit ADC */
+			st,mod-12b = <1>;
+			/* internal ADC reference */
+			st,ref-sel = <0>;
+			/* ADC converstion time: 80 clocks */
+			st,sample-time = <4>;
 
 			stmpe_touchscreen {
 				compatible = "st,stmpe-ts";
-				/* 3.25 MHz ADC clock speed */
-				st,adc-freq = <1>;
 				/* 8 sample average control */
 				st,ave-ctrl = <3>;
 				/* 7 length fractional part in z */
@@ -1008,17 +1014,17 @@
 				 * current limit value
 				 */
 				st,i-drive = <1>;
-				/* 12-bit ADC */
-				st,mod-12b = <1>;
-				/* internal ADC reference */
-				st,ref-sel = <0>;
-				/* ADC converstion time: 80 clocks */
-				st,sample-time = <4>;
 				/* 1 ms panel driver settling time */
 				st,settling = <3>;
 				/* 5 ms touch detect interrupt delay */
 				st,touch-det-delay = <5>;
 			};
+
+			stmpe_adc {
+				compatible = "st,stmpe-adc";
+				/* forbid to use ADC channels 3-0 (touch) */
+				st,norequest-mask = <0x0F>;
+			};
 		};
 
 		/*
diff --git a/arch/arm/boot/dts/tegra30-apalis.dtsi b/arch/arm/boot/dts/tegra30-apalis.dtsi
index 7f112f192fe9..c18f6f61d764 100644
--- a/arch/arm/boot/dts/tegra30-apalis.dtsi
+++ b/arch/arm/boot/dts/tegra30-apalis.dtsi
@@ -976,11 +976,17 @@
 			id = <0>;
 			blocks = <0x5>;
 			irq-trigger = <0x1>;
+			/* 3.25 MHz ADC clock speed */
+			st,adc-freq = <1>;
+			/* 12-bit ADC */
+			st,mod-12b = <1>;
+			/* internal ADC reference */
+			st,ref-sel = <0>;
+			/* ADC converstion time: 80 clocks */
+			st,sample-time = <4>;
 
 			stmpe_touchscreen {
 				compatible = "st,stmpe-ts";
-				/* 3.25 MHz ADC clock speed */
-				st,adc-freq = <1>;
 				/* 8 sample average control */
 				st,ave-ctrl = <3>;
 				/* 7 length fractional part in z */
@@ -990,17 +996,17 @@
 				 * current limit value
 				 */
 				st,i-drive = <1>;
-				/* 12-bit ADC */
-				st,mod-12b = <1>;
-				/* internal ADC reference */
-				st,ref-sel = <0>;
-				/* ADC converstion time: 80 clocks */
-				st,sample-time = <4>;
 				/* 1 ms panel driver settling time */
 				st,settling = <3>;
 				/* 5 ms touch detect interrupt delay */
 				st,touch-det-delay = <5>;
 			};
+
+			stmpe_adc {
+				compatible = "st,stmpe-adc";
+				/* forbid to use ADC channels 3-0 (touch) */
+				st,norequest-mask = <0x0F>;
+			};
 		};
 
 		/*
diff --git a/arch/arm/boot/dts/tegra30-cardhu-a04.dts b/arch/arm/boot/dts/tegra30-cardhu-a04.dts
index 4dbd4af679f0..9234988624ec 100644
--- a/arch/arm/boot/dts/tegra30-cardhu-a04.dts
+++ b/arch/arm/boot/dts/tegra30-cardhu-a04.dts
@@ -2,6 +2,8 @@
 /dts-v1/;
 
 #include "tegra30-cardhu.dtsi"
+#include "tegra30-cpu-opp.dtsi"
+#include "tegra30-cpu-opp-microvolt.dtsi"
 
 /* This dts file support the cardhu A04 and later versions of board */
 
@@ -103,4 +105,50 @@
 			gpio = <&gpio TEGRA_GPIO(DD, 0) GPIO_ACTIVE_HIGH>;
 		};
 	};
+
+	i2c@7000d000 {
+		pmic: tps65911@2d {
+			regulators {
+				vddctrl_reg: vddctrl {
+					regulator-min-microvolt = <800000>;
+					regulator-max-microvolt = <1125000>;
+					regulator-coupled-with = <&vddcore_reg>;
+					regulator-coupled-max-spread = <300000>;
+					regulator-max-step-microvolt = <100000>;
+
+					nvidia,tegra-cpu-regulator;
+				};
+			};
+		};
+
+		vddcore_reg: tps62361@60 {
+			regulator-coupled-with = <&vddctrl_reg>;
+			regulator-coupled-max-spread = <300000>;
+			regulator-max-step-microvolt = <100000>;
+
+			nvidia,tegra-core-regulator;
+		};
+	};
+
+	cpus {
+		cpu0: cpu@0 {
+			cpu-supply = <&vddctrl_reg>;
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+
+		cpu@1 {
+			cpu-supply = <&vddctrl_reg>;
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+
+		cpu@2 {
+			cpu-supply = <&vddctrl_reg>;
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+
+		cpu@3 {
+			cpu-supply = <&vddctrl_reg>;
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+	};
 };
diff --git a/arch/arm/boot/dts/tegra30-colibri.dtsi b/arch/arm/boot/dts/tegra30-colibri.dtsi
index 35af03ca9e90..1f9198bb24ff 100644
--- a/arch/arm/boot/dts/tegra30-colibri.dtsi
+++ b/arch/arm/boot/dts/tegra30-colibri.dtsi
@@ -845,11 +845,18 @@
 			id = <0>;
 			blocks = <0x5>;
 			irq-trigger = <0x1>;
+			/* 3.25 MHz ADC clock speed */
+			st,adc-freq = <1>;
+			/* 12-bit ADC */
+			st,mod-12b = <1>;
+			/* internal ADC reference */
+			st,ref-sel = <0>;
+			/* ADC converstion time: 80 clocks */
+			st,sample-time = <4>;
+			/* forbid to use ADC channels 3-0 (touch) */
 
 			stmpe_touchscreen {
 				compatible = "st,stmpe-ts";
-				/* 3.25 MHz ADC clock speed */
-				st,adc-freq = <1>;
 				/* 8 sample average control */
 				st,ave-ctrl = <3>;
 				/* 7 length fractional part in z */
@@ -859,17 +866,16 @@
 				 * current limit value
 				 */
 				st,i-drive = <1>;
-				/* 12-bit ADC */
-				st,mod-12b = <1>;
-				/* internal ADC reference */
-				st,ref-sel = <0>;
-				/* ADC converstion time: 80 clocks */
-				st,sample-time = <4>;
 				/* 1 ms panel driver settling time */
 				st,settling = <3>;
 				/* 5 ms touch detect interrupt delay */
 				st,touch-det-delay = <5>;
 			};
+
+			stmpe_adc {
+				compatible = "st,stmpe-adc";
+				st,norequest-mask = <0x0F>;
+			};
 		};
 
 		/*
diff --git a/arch/arm/boot/dts/tegra30-cpu-opp-microvolt.dtsi b/arch/arm/boot/dts/tegra30-cpu-opp-microvolt.dtsi
new file mode 100644
index 000000000000..5c40ef49894f
--- /dev/null
+++ b/arch/arm/boot/dts/tegra30-cpu-opp-microvolt.dtsi
@@ -0,0 +1,801 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/ {
+	cpu0_opp_table: cpu_opp_table0 {
+		opp@51000000_800 {
+			opp-microvolt = <800000 800000 1250000>;
+		};
+
+		opp@51000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@51000000_912 {
+			opp-microvolt = <912000 912000 1250000>;
+		};
+
+		opp@102000000_800 {
+			opp-microvolt = <800000 800000 1250000>;
+		};
+
+		opp@102000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@102000000_912 {
+			opp-microvolt = <912000 912000 1250000>;
+		};
+
+		opp@204000000_800 {
+			opp-microvolt = <800000 800000 1250000>;
+		};
+
+		opp@204000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@204000000_912 {
+			opp-microvolt = <912000 912000 1250000>;
+		};
+
+		opp@312000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@312000000_912 {
+			opp-microvolt = <912000 912000 1250000>;
+		};
+
+		opp@340000000_800 {
+			opp-microvolt = <800000 800000 1250000>;
+		};
+
+		opp@340000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@370000000_800 {
+			opp-microvolt = <800000 800000 1250000>;
+		};
+
+		opp@456000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@456000000_912 {
+			opp-microvolt = <912000 912000 1250000>;
+		};
+
+		opp@475000000_800 {
+			opp-microvolt = <800000 800000 1250000>;
+		};
+
+		opp@475000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@475000000_850_0_1 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@475000000_850_0_4 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@475000000_850_0_7 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@475000000_850_0_8 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@608000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@608000000_912 {
+			opp-microvolt = <912000 912000 1250000>;
+		};
+
+		opp@620000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_1_1 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_2_1 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_3_1 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_1_4 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_2_4 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_3_4 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_1_7 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_2_7 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_3_7 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_4_7 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_1_8 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_2_8 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_3_8 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_850_4_8 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@640000000_900 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@760000000_850_3_1 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@760000000_850_3_2 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@760000000_850_3_3 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@760000000_850_3_4 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@760000000_850_3_7 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@760000000_850_4_7 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@760000000_850_3_8 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@760000000_850_4_8 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@760000000_850_0_10 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@760000000_900 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_1_1 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_2_1 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_1_2 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_2_2 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_1_3 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_2_3 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_1_4 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_2_4 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_1_7 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_2_7 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_1_8 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_900_2_8 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@760000000_912 {
+			opp-microvolt = <912000 912000 1250000>;
+		};
+
+		opp@760000000_975 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@816000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@816000000_912 {
+			opp-microvolt = <912000 912000 1250000>;
+		};
+
+		opp@860000000_850 {
+			opp-microvolt = <850000 850000 1250000>;
+		};
+
+		opp@860000000_900 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_2_1 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_3_1 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_2_2 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_3_2 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_2_3 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_3_3 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_2_4 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_3_4 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_2_7 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_3_7 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_4_7 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_2_8 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_3_8 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_900_4_8 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@860000000_975 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@860000000_975_1_1 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@860000000_975_1_2 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@860000000_975_1_3 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@860000000_975_1_4 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@860000000_975_1_7 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@860000000_975_1_8 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@860000000_1000 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@910000000_900 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@1000000000_900 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@1000000000_975 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_2_1 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_3_1 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_2_2 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_3_2 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_2_3 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_3_3 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_2_4 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_3_4 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_2_7 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_3_7 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_4_7 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_2_8 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_3_8 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_975_4_8 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1000000000_1000 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1000000000_1025 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1100000000_900 {
+			opp-microvolt = <900000 900000 1250000>;
+		};
+
+		opp@1100000000_975 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1100000000_975_3_1 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1100000000_975_3_2 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1100000000_975_3_3 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1100000000_975_3_4 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1100000000_975_3_7 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1100000000_975_4_7 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1100000000_975_3_8 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1100000000_975_4_8 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1100000000_1000 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1100000000_1000_2_1 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1100000000_1000_2_2 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1100000000_1000_2_3 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1100000000_1000_2_4 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1100000000_1000_2_7 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1100000000_1000_2_8 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1100000000_1025 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1100000000_1075 {
+			opp-microvolt = <1075000 1075000 1250000>;
+		};
+
+		opp@1150000000_975 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1200000000_975 {
+			opp-microvolt = <975000 975000 1250000>;
+		};
+
+		opp@1200000000_1000 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1200000000_1000_3_1 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1200000000_1000_3_2 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1200000000_1000_3_3 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1200000000_1000_3_4 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1200000000_1000_3_7 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1200000000_1000_4_7 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1200000000_1000_3_8 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1200000000_1000_4_8 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1200000000_1025 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1200000000_1025_2_1 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1200000000_1025_2_2 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1200000000_1025_2_3 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1200000000_1025_2_4 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1200000000_1025_2_7 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1200000000_1025_2_8 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1200000000_1050 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1200000000_1075 {
+			opp-microvolt = <1075000 1075000 1250000>;
+		};
+
+		opp@1200000000_1100 {
+			opp-microvolt = <1100000 1100000 1250000>;
+		};
+
+		opp@1300000000_1000 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1300000000_1000_4_7 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1300000000_1000_4_8 {
+			opp-microvolt = <1000000 1000000 1250000>;
+		};
+
+		opp@1300000000_1025 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1300000000_1025_3_1 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1300000000_1025_3_7 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1300000000_1025_3_8 {
+			opp-microvolt = <1025000 1025000 1250000>;
+		};
+
+		opp@1300000000_1050 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1300000000_1050_2_1 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1300000000_1050_3_2 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1300000000_1050_3_3 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1300000000_1050_3_4 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1300000000_1050_3_5 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1300000000_1050_3_6 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1300000000_1050_2_7 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1300000000_1050_2_8 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1300000000_1050_3_12 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1300000000_1050_3_13 {
+			opp-microvolt = <1050000 1050000 1250000>;
+		};
+
+		opp@1300000000_1075 {
+			opp-microvolt = <1075000 1075000 1250000>;
+		};
+
+		opp@1300000000_1075_2_2 {
+			opp-microvolt = <1075000 1075000 1250000>;
+		};
+
+		opp@1300000000_1075_2_3 {
+			opp-microvolt = <1075000 1075000 1250000>;
+		};
+
+		opp@1300000000_1075_2_4 {
+			opp-microvolt = <1075000 1075000 1250000>;
+		};
+
+		opp@1300000000_1100 {
+			opp-microvolt = <1100000 1100000 1250000>;
+		};
+
+		opp@1300000000_1125 {
+			opp-microvolt = <1125000 1125000 1250000>;
+		};
+
+		opp@1300000000_1150 {
+			opp-microvolt = <1150000 1150000 1250000>;
+		};
+
+		opp@1300000000_1175 {
+			opp-microvolt = <1175000 1175000 1250000>;
+		};
+
+		opp@1400000000_1100 {
+			opp-microvolt = <1100000 1100000 1250000>;
+		};
+
+		opp@1400000000_1125 {
+			opp-microvolt = <1125000 1125000 1250000>;
+		};
+
+		opp@1400000000_1150 {
+			opp-microvolt = <1150000 1150000 1250000>;
+		};
+
+		opp@1400000000_1150_2_4 {
+			opp-microvolt = <1150000 1150000 1250000>;
+		};
+
+		opp@1400000000_1175 {
+			opp-microvolt = <1175000 1175000 1250000>;
+		};
+
+		opp@1400000000_1237 {
+			opp-microvolt = <1237000 1237000 1250000>;
+		};
+
+		opp@1500000000_1125 {
+			opp-microvolt = <1125000 1125000 1250000>;
+		};
+
+		opp@1500000000_1125_4_5 {
+			opp-microvolt = <1125000 1125000 1250000>;
+		};
+
+		opp@1500000000_1125_4_6 {
+			opp-microvolt = <1125000 1125000 1250000>;
+		};
+
+		opp@1500000000_1125_4_12 {
+			opp-microvolt = <1125000 1125000 1250000>;
+		};
+
+		opp@1500000000_1125_4_13 {
+			opp-microvolt = <1125000 1125000 1250000>;
+		};
+
+		opp@1500000000_1150 {
+			opp-microvolt = <1150000 1150000 1250000>;
+		};
+
+		opp@1500000000_1150_3_5 {
+			opp-microvolt = <1150000 1150000 1250000>;
+		};
+
+		opp@1500000000_1150_3_6 {
+			opp-microvolt = <1150000 1150000 1250000>;
+		};
+
+		opp@1500000000_1150_3_12 {
+			opp-microvolt = <1150000 1150000 1250000>;
+		};
+
+		opp@1500000000_1150_3_13 {
+			opp-microvolt = <1150000 1150000 1250000>;
+		};
+
+		opp@1500000000_1200 {
+			opp-microvolt = <1200000 1200000 1250000>;
+		};
+
+		opp@1500000000_1237 {
+			opp-microvolt = <1237000 1237000 1250000>;
+		};
+
+		opp@1600000000_1212 {
+			opp-microvolt = <1212000 1212000 1250000>;
+		};
+
+		opp@1600000000_1237 {
+			opp-microvolt = <1237000 1237000 1250000>;
+		};
+
+		opp@1700000000_1212 {
+			opp-microvolt = <1212000 1212000 1250000>;
+		};
+
+		opp@1700000000_1237 {
+			opp-microvolt = <1237000 1237000 1250000>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/tegra30-cpu-opp.dtsi b/arch/arm/boot/dts/tegra30-cpu-opp.dtsi
new file mode 100644
index 000000000000..d64fc262585e
--- /dev/null
+++ b/arch/arm/boot/dts/tegra30-cpu-opp.dtsi
@@ -0,0 +1,1202 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/ {
+	cpu0_opp_table: cpu_opp_table0 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@51000000_800 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x31FE>;
+			opp-hz = /bits/ 64 <51000000>;
+		};
+
+		opp@51000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0C01>;
+			opp-hz = /bits/ 64 <51000000>;
+		};
+
+		opp@51000000_912 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0200>;
+			opp-hz = /bits/ 64 <51000000>;
+		};
+
+		opp@102000000_800 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x31FE>;
+			opp-hz = /bits/ 64 <102000000>;
+		};
+
+		opp@102000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0C01>;
+			opp-hz = /bits/ 64 <102000000>;
+		};
+
+		opp@102000000_912 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0200>;
+			opp-hz = /bits/ 64 <102000000>;
+		};
+
+		opp@204000000_800 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x31FE>;
+			opp-hz = /bits/ 64 <204000000>;
+		};
+
+		opp@204000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0C01>;
+			opp-hz = /bits/ 64 <204000000>;
+		};
+
+		opp@204000000_912 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0200>;
+			opp-hz = /bits/ 64 <204000000>;
+		};
+
+		opp@312000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0C00>;
+			opp-hz = /bits/ 64 <312000000>;
+		};
+
+		opp@312000000_912 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0200>;
+			opp-hz = /bits/ 64 <312000000>;
+		};
+
+		opp@340000000_800 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0192>;
+			opp-hz = /bits/ 64 <340000000>;
+		};
+
+		opp@340000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x0F 0x0001>;
+			opp-hz = /bits/ 64 <340000000>;
+		};
+
+		opp@370000000_800 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1E 0x306C>;
+			opp-hz = /bits/ 64 <370000000>;
+		};
+
+		opp@456000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0C00>;
+			opp-hz = /bits/ 64 <456000000>;
+		};
+
+		opp@456000000_912 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0200>;
+			opp-hz = /bits/ 64 <456000000>;
+		};
+
+		opp@475000000_800 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1E 0x31FE>;
+			opp-hz = /bits/ 64 <475000000>;
+		};
+
+		opp@475000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x0F 0x0001>;
+			opp-hz = /bits/ 64 <475000000>;
+		};
+
+		opp@475000000_850_0_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0002>;
+			opp-hz = /bits/ 64 <475000000>;
+		};
+
+		opp@475000000_850_0_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0010>;
+			opp-hz = /bits/ 64 <475000000>;
+		};
+
+		opp@475000000_850_0_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0080>;
+			opp-hz = /bits/ 64 <475000000>;
+		};
+
+		opp@475000000_850_0_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0100>;
+			opp-hz = /bits/ 64 <475000000>;
+		};
+
+		opp@608000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0400>;
+			opp-hz = /bits/ 64 <608000000>;
+		};
+
+		opp@608000000_912 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0200>;
+			opp-hz = /bits/ 64 <608000000>;
+		};
+
+		opp@620000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1E 0x306C>;
+			opp-hz = /bits/ 64 <620000000>;
+		};
+
+		opp@640000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x0F 0x0001>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_1_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0002>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_2_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0002>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_3_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0002>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_1_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0010>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_2_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0010>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_3_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0010>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_1_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0080>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_2_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0080>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_3_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0080>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_4_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0080>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_1_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0100>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_2_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0100>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_3_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0100>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_850_4_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0100>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@640000000_900 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0192>;
+			opp-hz = /bits/ 64 <640000000>;
+		};
+
+		opp@760000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1E 0x3461>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_850_3_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0002>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_850_3_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0004>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_850_3_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0008>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_850_3_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0010>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_850_3_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0080>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_850_4_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0080>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_850_3_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0100>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_850_4_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0100>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_850_0_10 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0400>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0001>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_1_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0002>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_2_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0002>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_1_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0004>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_2_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0004>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_1_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0008>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_2_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0008>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_1_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0010>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_2_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0010>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_1_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0080>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_2_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0080>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_1_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0100>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_900_2_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0100>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_912 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0200>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@760000000_975 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0192>;
+			opp-hz = /bits/ 64 <760000000>;
+		};
+
+		opp@816000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0400>;
+			opp-hz = /bits/ 64 <816000000>;
+		};
+
+		opp@816000000_912 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x1F 0x0200>;
+			opp-hz = /bits/ 64 <816000000>;
+		};
+
+		opp@860000000_850 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x0C 0x0001>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0001>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_2_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0002>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_3_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0002>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_2_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0004>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_3_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0004>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_2_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0008>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_3_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0008>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_2_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0010>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_3_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0010>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_2_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0080>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_3_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0080>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_4_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0080>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_2_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0100>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_3_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0100>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_900_4_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0100>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_975 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0001>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_975_1_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0002>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_975_1_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0004>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_975_1_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0008>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_975_1_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0010>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_975_1_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0080>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_975_1_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0100>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@860000000_1000 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0192>;
+			opp-hz = /bits/ 64 <860000000>;
+		};
+
+		opp@910000000_900 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x18 0x3060>;
+			opp-hz = /bits/ 64 <910000000>;
+		};
+
+		opp@1000000000_900 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x0C 0x0001>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x03 0x0001>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_2_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0002>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_3_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0002>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_2_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0004>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_3_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0004>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_2_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0008>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_3_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0008>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_2_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0010>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_3_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0010>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_2_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0080>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_3_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0080>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_4_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0080>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_2_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0100>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_3_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0100>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_975_4_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0100>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_1000 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x019E>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1000000000_1025 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0192>;
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+
+		opp@1100000000_900 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0001>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_975 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x06 0x0001>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_975_3_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0002>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_975_3_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0004>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_975_3_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0008>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_975_3_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0010>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_975_3_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0080>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_975_4_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0080>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_975_3_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0100>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_975_4_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0100>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_1000 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0001>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_1000_2_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0002>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_1000_2_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0004>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_1000_2_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0008>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_1000_2_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0010>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_1000_2_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0080>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_1000_2_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0100>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_1025 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x019E>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1100000000_1075 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0192>;
+			opp-hz = /bits/ 64 <1100000000>;
+		};
+
+		opp@1150000000_975 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x18 0x3060>;
+			opp-hz = /bits/ 64 <1150000000>;
+		};
+
+		opp@1200000000_975 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0001>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1000 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0001>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1000_3_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0002>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1000_3_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0004>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1000_3_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0008>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1000_3_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0010>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1000_3_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0080>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1000_4_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0080>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1000_3_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0100>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1000_4_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0100>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1025 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0001>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1025_2_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0002>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1025_2_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0004>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1025_2_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0008>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1025_2_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0010>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1025_2_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0080>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1025_2_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0100>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1050 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x019E>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1075 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0001>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1200000000_1100 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0192>;
+			opp-hz = /bits/ 64 <1200000000>;
+		};
+
+		opp@1300000000_1000 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0001>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1000_4_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0080>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1000_4_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0100>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1025 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0001>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1025_3_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0002>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1025_3_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0080>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1025_3_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0100>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1050 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x12 0x3061>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1050_2_1 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0002>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1050_3_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0004>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1050_3_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0008>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1050_3_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0010>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1050_3_5 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0020>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1050_3_6 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0040>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1050_2_7 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0080>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1050_2_8 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0100>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1050_3_12 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x1000>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1050_3_13 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x2000>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1075 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0182>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1075_2_2 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0004>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1075_2_3 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0008>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1075_2_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0010>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1100 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x001C>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1125 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0001>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1150 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0182>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1300000000_1175 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0010>;
+			opp-hz = /bits/ 64 <1300000000>;
+		};
+
+		opp@1400000000_1100 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x18 0x307C>;
+			opp-hz = /bits/ 64 <1400000000>;
+		};
+
+		opp@1400000000_1125 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x000C>;
+			opp-hz = /bits/ 64 <1400000000>;
+		};
+
+		opp@1400000000_1150 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x000C>;
+			opp-hz = /bits/ 64 <1400000000>;
+		};
+
+		opp@1400000000_1150_2_4 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0010>;
+			opp-hz = /bits/ 64 <1400000000>;
+		};
+
+		opp@1400000000_1175 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0010>;
+			opp-hz = /bits/ 64 <1400000000>;
+		};
+
+		opp@1400000000_1237 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0010>;
+			opp-hz = /bits/ 64 <1400000000>;
+		};
+
+		opp@1500000000_1125 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0010>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1500000000_1125_4_5 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0020>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1500000000_1125_4_6 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x0040>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1500000000_1125_4_12 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x1000>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1500000000_1125_4_13 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x2000>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1500000000_1150 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x04 0x0010>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1500000000_1150_3_5 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0020>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1500000000_1150_3_6 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x0040>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1500000000_1150_3_12 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x1000>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1500000000_1150_3_13 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x2000>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1500000000_1200 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x02 0x0010>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1500000000_1237 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x01 0x0010>;
+			opp-hz = /bits/ 64 <1500000000>;
+		};
+
+		opp@1600000000_1212 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x3060>;
+			opp-hz = /bits/ 64 <1600000000>;
+		};
+
+		opp@1600000000_1237 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x3060>;
+			opp-hz = /bits/ 64 <1600000000>;
+		};
+
+		opp@1700000000_1212 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x10 0x3060>;
+			opp-hz = /bits/ 64 <1700000000>;
+		};
+
+		opp@1700000000_1237 {
+			clock-latency-ns = <100000>;
+			opp-supported-hw = <0x08 0x3060>;
+			opp-hz = /bits/ 64 <1700000000>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi
index e074258d4518..55ae050042ce 100644
--- a/arch/arm/boot/dts/tegra30.dtsi
+++ b/arch/arm/boot/dts/tegra30.dtsi
@@ -422,6 +422,7 @@
 		clocks = <&tegra_car TEGRA30_CLK_VDE>;
 		reset-names = "vde", "mc";
 		resets = <&tegra_car 61>, <&mc TEGRA30_MC_RESET_VDE>;
+		iommus = <&mc TEGRA_SWGROUP_VDE>;
 	};
 
 	apbmisc@70000800 {
@@ -732,6 +733,15 @@
 		#reset-cells = <1>;
 	};
 
+	memory-controller@7000f400 {
+		compatible = "nvidia,tegra30-emc";
+		reg = <0x7000f400 0x400>;
+		interrupts = <GIC_SPI 78 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&tegra_car TEGRA30_CLK_EMC>;
+
+		nvidia,memory-controller = <&mc>;
+	};
+
 	fuse@7000f800 {
 		compatible = "nvidia,tegra30-efuse";
 		reg = <0x7000f800 0x400>;
@@ -997,24 +1007,28 @@
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0>;
+			clocks = <&tegra_car TEGRA30_CLK_CCLK_G>;
 		};
 
 		cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <1>;
+			clocks = <&tegra_car TEGRA30_CLK_CCLK_G>;
 		};
 
 		cpu@2 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <2>;
+			clocks = <&tegra_car TEGRA30_CLK_CCLK_G>;
 		};
 
 		cpu@3 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <3>;
+			clocks = <&tegra_car TEGRA30_CLK_CCLK_G>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/vf-colibri.dtsi b/arch/arm/boot/dts/vf-colibri.dtsi
index b6a1eeeb2bb4..fba37b8756f7 100644
--- a/arch/arm/boot/dts/vf-colibri.dtsi
+++ b/arch/arm/boot/dts/vf-colibri.dtsi
@@ -129,8 +129,11 @@
 
 &i2c0 {
 	clock-frequency = <400000>;
-	pinctrl-names = "default";
+	pinctrl-names = "default", "gpio";
 	pinctrl-0 = <&pinctrl_i2c0>;
+	pinctrl-1 = <&pinctrl_i2c0_gpio>;
+	scl-gpios = <&gpio1 4 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+	sda-gpios = <&gpio1 5 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
 };
 
 &nfc {
@@ -308,6 +311,13 @@
 			>;
 		};
 
+		pinctrl_i2c0_gpio: i2c0gpiogrp {
+			fsl,pins = <
+				VF610_PAD_PTB14__GPIO_36		0x37ff
+				VF610_PAD_PTB15__GPIO_37		0x37ff
+			>;
+		};
+
 		pinctrl_nfc: nfcgrp {
 			fsl,pins = <
 				VF610_PAD_PTD23__NF_IO7		0x28df
diff --git a/arch/arm/boot/dts/vf500-colibri.dtsi b/arch/arm/boot/dts/vf500-colibri.dtsi
index 237b0246fa84..92255f8893ce 100644
--- a/arch/arm/boot/dts/vf500-colibri.dtsi
+++ b/arch/arm/boot/dts/vf500-colibri.dtsi
@@ -44,7 +44,7 @@
 
 / {
 	model = "Toradex Colibri VF50 COM";
-	compatible = "toradex,vf610-colibri_vf50", "fsl,vf500";
+	compatible = "toradex,vf500-colibri_vf50", "fsl,vf500";
 
 	memory@80000000 {
 		device_type = "memory";
diff --git a/arch/arm/boot/dts/vf610-bk4.dts b/arch/arm/boot/dts/vf610-bk4.dts
index 0f3870d3b099..830c85476b3d 100644
--- a/arch/arm/boot/dts/vf610-bk4.dts
+++ b/arch/arm/boot/dts/vf610-bk4.dts
@@ -259,24 +259,28 @@
 &uart0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_uart0>;
+	/delete-property/dma-names;
 	status = "okay";
 };
 
 &uart1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_uart1>;
+	/delete-property/dma-names;
 	status = "okay";
 };
 
 &uart2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_uart2>;
+	/delete-property/dma-names;
 	status = "okay";
 };
 
 &uart3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_uart3>;
+	/delete-property/dma-names;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/vf610-zii-scu4-aib.dts b/arch/arm/boot/dts/vf610-zii-scu4-aib.dts
index c8ebb23c4e02..d7caf618f980 100644
--- a/arch/arm/boot/dts/vf610-zii-scu4-aib.dts
+++ b/arch/arm/boot/dts/vf610-zii-scu4-aib.dts
@@ -183,11 +183,6 @@
 					#address-cells = <1>;
 					#size-cells = <0>;
 
-					port@1 {
-						reg = <1>;
-						label = "internal_j9";
-					};
-
 					port@2 {
 						reg = <2>;
 						label = "eth_fc_1000_2";
@@ -271,11 +266,6 @@
 					#address-cells = <1>;
 					#size-cells = <0>;
 
-					port@1 {
-						reg = <1>;
-						label = "internal_j8";
-					};
-
 					port@2 {
 						reg = <2>;
 						label = "eth_fc_1000_8";
@@ -689,7 +679,6 @@
 	linux,rs485-enabled-at-boot-time;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_uart1>;
-	rs485-rts-delay = <0 200>;
 	status = "okay";
 };
 
@@ -697,7 +686,6 @@
 	linux,rs485-enabled-at-boot-time;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_uart2>;
-	rs485-rts-delay = <0 200>;
 	status = "okay";
 };
 
diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig
index 1857df992484..303f75a3baec 100644
--- a/arch/arm/configs/aspeed_g4_defconfig
+++ b/arch/arm/configs/aspeed_g4_defconfig
@@ -132,10 +132,12 @@ CONFIG_ASPEED_BT_IPMI_BMC=y
 CONFIG_HW_RANDOM_TIMERIOMEM=y
 # CONFIG_I2C_COMPAT is not set
 CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MUX=y
 CONFIG_I2C_MUX_PCA9541=y
 CONFIG_I2C_MUX_PCA954x=y
 CONFIG_I2C_ASPEED=y
 CONFIG_I2C_FSI=y
+CONFIG_SPI=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_ASPEED=y
@@ -185,6 +187,12 @@ CONFIG_USB_CONFIGFS_F_LB_SS=y
 CONFIG_USB_CONFIGFS_F_FS=y
 CONFIG_USB_CONFIGFS_F_HID=y
 CONFIG_USB_CONFIGFS_F_PRINTER=y
+CONFIG_MMC=y
+# CONFIG_PWRSEQ_EMMC is not set
+# CONFIG_PWRSEQ_SIMPLE is not set
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_ASPEED=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_CLASS_FLASH=y
@@ -216,7 +224,6 @@ CONFIG_FSI_MASTER_GPIO=y
 CONFIG_FSI_MASTER_HUB=y
 CONFIG_FSI_MASTER_AST_CF=y
 CONFIG_FSI_SCOM=y
-CONFIG_FSI_SBEFIFO=y
 CONFIG_FANOTIFY=y
 CONFIG_OVERLAY_FS=y
 CONFIG_TMPFS=y
@@ -231,7 +238,6 @@ CONFIG_SQUASHFS_ZSTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_HARDENED_USERCOPY=y
 CONFIG_FORTIFY_SOURCE=y
-# CONFIG_CRYPTO_ECHAINIV is not set
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_USER_API_HASH=y
@@ -247,14 +253,14 @@ CONFIG_DEBUG_INFO_REDUCED=y
 CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_GDB_SCRIPTS=y
 CONFIG_STRIP_ASM_SYMS=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PANIC_TIMEOUT=-1
 CONFIG_SOFTLOCKUP_DETECTOR=y
 # CONFIG_DETECT_HUNG_TASK is not set
 CONFIG_WQ_WATCHDOG=y
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_PANIC_TIMEOUT=-1
 # CONFIG_SCHED_DEBUG is not set
-CONFIG_SCHED_STACK_END_CHECK=y
 CONFIG_FUNCTION_TRACER=y
-# CONFIG_RUNTIME_TESTING_MENU is not set
 CONFIG_DEBUG_WX=y
 CONFIG_DEBUG_USER=y
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig
index 597536cc9573..b0d056d49abe 100644
--- a/arch/arm/configs/aspeed_g5_defconfig
+++ b/arch/arm/configs/aspeed_g5_defconfig
@@ -139,6 +139,7 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=6
 CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_ASPEED_VUART=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_ASPEED_KCS_IPMI_BMC=y
 CONFIG_ASPEED_BT_IPMI_BMC=y
@@ -154,6 +155,7 @@ CONFIG_SPI=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_ASPEED=y
+CONFIG_GPIO_ASPEED_SGPIO=y
 CONFIG_W1=y
 CONFIG_W1_MASTER_GPIO=y
 CONFIG_W1_SLAVE_THERM=y
@@ -236,8 +238,10 @@ CONFIG_FSI=y
 CONFIG_FSI_MASTER_GPIO=y
 CONFIG_FSI_MASTER_HUB=y
 CONFIG_FSI_MASTER_AST_CF=y
+CONFIG_FSI_MASTER_ASPEED=y
 CONFIG_FSI_SCOM=y
 CONFIG_FSI_SBEFIFO=y
+CONFIG_FSI_OCC=y
 CONFIG_FANOTIFY=y
 CONFIG_OVERLAY_FS=y
 CONFIG_TMPFS=y
diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig
index 309c55a8d107..3729a6e0ee24 100644
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -18,6 +18,7 @@ CONFIG_ARCH_MULTI_V5=y
 CONFIG_ARCH_AT91=y
 CONFIG_SOC_AT91RM9200=y
 CONFIG_SOC_AT91SAM9=y
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
 CONFIG_AEABI=y
 CONFIG_UACCESS_WITH_MEMCPY=y
 CONFIG_ZBOOT_ROM_TEXT=0x0
diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig
index 31bfe1647d28..f53634af014b 100644
--- a/arch/arm/configs/axm55xx_defconfig
+++ b/arch/arm/configs/axm55xx_defconfig
@@ -20,7 +20,6 @@ CONFIG_NAMESPACES=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 08db1c83eb2d..fde84f123fbb 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -230,6 +230,7 @@ CONFIG_SND_SOC_SAMSUNG_SMDK_WM8994=y
 CONFIG_SND_SOC_SMDK_WM8994_PCM=y
 CONFIG_SND_SOC_SNOW=y
 CONFIG_SND_SOC_ODROID=y
+CONFIG_SND_SOC_ARNDALE=y
 CONFIG_SND_SIMPLE_CARD=y
 CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
@@ -294,6 +295,7 @@ CONFIG_DEVFREQ_GOV_PERFORMANCE=y
 CONFIG_DEVFREQ_GOV_POWERSAVE=y
 CONFIG_DEVFREQ_GOV_USERSPACE=y
 CONFIG_ARM_EXYNOS_BUS_DEVFREQ=y
+CONFIG_EXYNOS5422_DMC=y
 CONFIG_DEVFREQ_EVENT_EXYNOS_NOCP=y
 CONFIG_EXTCON=y
 CONFIG_EXTCON_MAX14577=y
@@ -348,6 +350,7 @@ CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_DEBUG_INFO=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
 # CONFIG_DETECT_HUNG_TASK is not set
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 0f7381ee0c37..3608e55eaecd 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -179,6 +179,7 @@ CONFIG_MOUSE_PS2=m
 CONFIG_MOUSE_PS2_ELANTECH=y
 CONFIG_INPUT_TOUCHSCREEN=y
 CONFIG_TOUCHSCREEN_ADS7846=y
+CONFIG_TOUCHSCREEN_DA9052=y
 CONFIG_TOUCHSCREEN_EGALAX=y
 CONFIG_TOUCHSCREEN_GOODIX=y
 CONFIG_TOUCHSCREEN_MAX11801=y
@@ -236,6 +237,7 @@ CONFIG_DA9062_WATCHDOG=y
 CONFIG_DA9063_WATCHDOG=m
 CONFIG_RN5T618_WATCHDOG=y
 CONFIG_IMX2_WDT=y
+CONFIG_IMX7ULP_WDT=y
 CONFIG_MFD_DA9052_I2C=y
 CONFIG_MFD_DA9062=y
 CONFIG_MFD_DA9063=y
@@ -335,7 +337,7 @@ CONFIG_NOP_USB_XCEIV=y
 CONFIG_USB_MXS_PHY=y
 CONFIG_USB_GADGET=y
 CONFIG_USB_FSL_USB2=y
-CONFIG_USB_CONFIGFS=m
+CONFIG_USB_CONFIGFS=y
 CONFIG_USB_CONFIGFS_SERIAL=y
 CONFIG_USB_CONFIGFS_ACM=y
 CONFIG_USB_CONFIGFS_OBEX=y
@@ -460,6 +462,7 @@ CONFIG_FONT_8x8=y
 CONFIG_FONT_8x16=y
 CONFIG_PRINTK_TIME=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_PROVE_LOCKING=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 3d5f5b501330..11e2211f9007 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig
@@ -11,7 +11,6 @@ CONFIG_CGROUP_CPUACCT=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_BLK_CGROUP=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_ELF_CORE is not set
 # CONFIG_BASE_FULL is not set
@@ -135,6 +134,7 @@ CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
 CONFIG_TI_KEYSTONE_NETCP=y
 CONFIG_TI_KEYSTONE_NETCP_ETHSS=y
+CONFIG_TI_CPTS=y
 CONFIG_MARVELL_PHY=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
diff --git a/arch/arm/configs/lpc32xx_defconfig b/arch/arm/configs/lpc32xx_defconfig
index 09deb57db942..989bcc84e7fb 100644
--- a/arch/arm/configs/lpc32xx_defconfig
+++ b/arch/arm/configs/lpc32xx_defconfig
@@ -9,7 +9,6 @@ CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
 # CONFIG_ARCH_MULTI_V7 is not set
diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig
index 9b98761e51c9..45d27190c9c9 100644
--- a/arch/arm/configs/moxart_defconfig
+++ b/arch/arm/configs/moxart_defconfig
@@ -4,7 +4,6 @@ CONFIG_SYSVIPC=y
 CONFIG_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
-CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_ELF_CORE is not set
 # CONFIG_BASE_FULL is not set
 # CONFIG_SIGNALFD is not set
diff --git a/arch/arm/configs/multi_v4t_defconfig b/arch/arm/configs/multi_v4t_defconfig
index 0b42bddfbc82..e530107be412 100644
--- a/arch/arm/configs/multi_v4t_defconfig
+++ b/arch/arm/configs/multi_v4t_defconfig
@@ -4,22 +4,19 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EMBEDDED=y
 CONFIG_SLOB=y
-CONFIG_JUMP_LABEL=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
 CONFIG_ARCH_MULTI_V4T=y
 # CONFIG_ARCH_MULTI_V7 is not set
 CONFIG_ARCH_AT91=y
 CONFIG_SOC_AT91RM9200=y
 CONFIG_ARCH_CLPS711X=y
+CONFIG_ARCH_MXC=y
+CONFIG_SOC_IMX1=y
 CONFIG_ARCH_INTEGRATOR=y
 CONFIG_ARCH_INTEGRATOR_AP=y
 CONFIG_INTEGRATOR_IMPD1=y
 CONFIG_INTEGRATOR_CM720T=y
 CONFIG_INTEGRATOR_CM920T=y
 CONFIG_INTEGRATOR_CM922T_XA10=y
-CONFIG_ARCH_MXC=y
-CONFIG_SOC_IMX1=y
 CONFIG_ARCH_NSPIRE=y
 CONFIG_AEABI=y
 # CONFIG_ATAGS is not set
@@ -28,6 +25,8 @@ CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_CPU_IDLE=y
 CONFIG_ARM_CPUIDLE=y
 CONFIG_ARM_CLPS711X_CPUIDLE=y
+CONFIG_JUMP_LABEL=y
+CONFIG_PARTITION_ADVANCED=y
 # CONFIG_COREDUMP is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
@@ -81,7 +80,6 @@ CONFIG_FB=y
 CONFIG_FB_CLPS711X=y
 CONFIG_FB_IMX=y
 CONFIG_LCD_PLATFORM=y
-CONFIG_BACKLIGHT_PWM=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
@@ -92,12 +90,11 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_PWM=y
 CONFIG_PWM_ATMEL=y
 CONFIG_PWM_CLPS711X=y
-CONFIG_PWM_IMX=y
 CONFIG_EXT2_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_CRAMFS=y
 CONFIG_MINIX_FS=y
+CONFIG_CRC_CCITT=y
 # CONFIG_FTRACE is not set
 CONFIG_DEBUG_USER=y
-CONFIG_CRC_CCITT=y
diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig
index bd018873e47a..2724fb3155cd 100644
--- a/arch/arm/configs/multi_v5_defconfig
+++ b/arch/arm/configs/multi_v5_defconfig
@@ -1,14 +1,11 @@
 CONFIG_SYSVIPC=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
 CONFIG_LOG_BUF_SHIFT=19
 CONFIG_CGROUPS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
 # CONFIG_ARCH_MULTI_V7 is not set
 CONFIG_ARCH_ASPEED=y
 CONFIG_MACH_ASPEED_G4=y
@@ -59,8 +56,6 @@ CONFIG_MACH_RD88F5181L_GE=y
 CONFIG_MACH_RD88F5181L_FXO=y
 CONFIG_MACH_RD88F6183AP_GE=y
 CONFIG_ARCH_U300=y
-CONFIG_PCI_MVEBU=y
-CONFIG_PREEMPT=y
 CONFIG_AEABI=y
 CONFIG_HIGHMEM=y
 CONFIG_ZBOOT_ROM_TEXT=0x0
@@ -72,6 +67,10 @@ CONFIG_CPU_FREQ_STAT=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_IDLE=y
 CONFIG_ARM_KIRKWOOD_CPUIDLE=y
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -84,6 +83,7 @@ CONFIG_NET_DSA=y
 CONFIG_NET_PKTGEN=m
 CONFIG_CFG80211=y
 CONFIG_MAC80211=y
+CONFIG_PCI_MVEBU=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_IMX_WEIM=y
@@ -165,6 +165,7 @@ CONFIG_SPI_ATMEL=y
 CONFIG_SPI_IMX=y
 CONFIG_SPI_ORION=y
 CONFIG_GPIO_ASPEED=m
+CONFIG_GPIO_ASPEED_SGPIO=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_GPIO=y
 CONFIG_POWER_RESET_QNAP=y
@@ -186,7 +187,6 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_MEDIA_SUPPORT=y
 CONFIG_MEDIA_CAMERA_SUPPORT=y
 CONFIG_V4L_PLATFORM_DRIVERS=y
-CONFIG_SOC_CAMERA=y
 CONFIG_VIDEO_ASPEED=m
 CONFIG_VIDEO_ATMEL_ISI=m
 CONFIG_DRM=y
@@ -241,6 +241,9 @@ CONFIG_USB_ASPEED_VHUB=m
 CONFIG_USB_CONFIGFS=m
 CONFIG_MMC=y
 CONFIG_SDIO_UART=y
+CONFIG_MMC_SDHCI=m
+CONFIG_MMC_SDHCI_PLTFM=m
+CONFIG_MMC_SDHCI_OF_ASPEED=m
 CONFIG_MMC_ATMELMCI=y
 CONFIG_MMC_MVSDIO=y
 CONFIG_NEW_LEDS=y
@@ -263,7 +266,6 @@ CONFIG_DMADEVICES=y
 CONFIG_AT_HDMAC=y
 CONFIG_MV_XOR=y
 CONFIG_STAGING=y
-CONFIG_FB_XGI=y
 CONFIG_ASPEED_LPC_CTRL=m
 CONFIG_ASPEED_LPC_SNOOP=m
 CONFIG_ASPEED_P2A_CTRL=m
@@ -292,6 +294,11 @@ CONFIG_NLS_CODEPAGE_850=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_ISO8859_2=y
 CONFIG_NLS_UTF8=y
+CONFIG_CRYPTO_CBC=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_DEV_MARVELL_CESA=y
+CONFIG_CRC_CCITT=y
+CONFIG_LIBCRC32C=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
@@ -300,8 +307,3 @@ CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_PREEMPT is not set
 # CONFIG_FTRACE is not set
 CONFIG_DEBUG_USER=y
-CONFIG_CRYPTO_CBC=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_DEV_MARVELL_CESA=y
-CONFIG_CRC_CCITT=y
-CONFIG_LIBCRC32C=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index e4c8def9a0a5..f5d19cc1f4f2 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -53,6 +53,9 @@ CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
 CONFIG_ARCH_MILBEAUT=y
 CONFIG_ARCH_MILBEAUT_M10V=y
+CONFIG_ARCH_MMP=y
+CONFIG_MACH_MMP2_DT=y
+CONFIG_MACH_MMP3_DT=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_MACH_ARMADA_370=y
 CONFIG_MACH_ARMADA_375=y
@@ -128,8 +131,6 @@ CONFIG_CRYPTO_AES_ARM_CE=m
 CONFIG_CRYPTO_GHASH_ARM_CE=m
 CONFIG_CRYPTO_CRC32_ARM_CE=m
 CONFIG_CRYPTO_CHACHA20_NEON=m
-CONFIG_GCC_PLUGINS=y
-CONFIG_GCC_PLUGIN_STRUCTLEAK=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_PARTITION_ADVANCED=y
@@ -168,13 +169,20 @@ CONFIG_MAC80211=m
 CONFIG_RFKILL=y
 CONFIG_RFKILL_INPUT=y
 CONFIG_RFKILL_GPIO=y
+CONFIG_NFC=m
+CONFIG_NFC_DIGITAL=m
+CONFIG_NFC_NCI=m
+CONFIG_NFC_NCI_SPI=m
+CONFIG_NFC_NCI_UART=m
+CONFIG_NFC_HCI=m
+CONFIG_NFC_SHDLC=y
+CONFIG_NFC_S3FWRN5_I2C=m
 CONFIG_PCIEPORTBUS=y
 CONFIG_PCI_MVEBU=y
 CONFIG_PCI_TEGRA=y
 CONFIG_PCI_RCAR_GEN2=y
 CONFIG_PCIE_RCAR=y
 CONFIG_PCI_DRA7XX_EP=y
-CONFIG_PCI_KEYSTONE=y
 CONFIG_PCI_ENDPOINT=y
 CONFIG_PCI_ENDPOINT_CONFIGFS=y
 CONFIG_PCI_EPF_TEST=m
@@ -189,15 +197,14 @@ CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_PHYSMAP=y
 CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_M25P80=y
 CONFIG_MTD_RAW_NAND=y
 CONFIG_MTD_NAND_DENALI_DT=y
 CONFIG_MTD_NAND_OMAP2=y
 CONFIG_MTD_NAND_OMAP_BCH=y
 CONFIG_MTD_NAND_ATMEL=y
 CONFIG_MTD_NAND_MARVELL=y
-CONFIG_MTD_NAND_GPMI_NAND=y
 CONFIG_MTD_NAND_BRCMNAND=y
+CONFIG_MTD_NAND_GPMI_NAND=y
 CONFIG_MTD_NAND_VF610_NFC=y
 CONFIG_MTD_NAND_DAVINCI=y
 CONFIG_MTD_NAND_STM32_FMC2=y
@@ -244,6 +251,7 @@ CONFIG_BGMAC_BCMA=y
 CONFIG_SYSTEMPORT=m
 CONFIG_MACB=y
 CONFIG_NET_CALXEDA_XGMAC=y
+CONFIG_FTGMAC100=m
 CONFIG_GIANFAR=y
 CONFIG_HIX5HD2_GMAC=y
 CONFIG_E1000E=y
@@ -260,11 +268,11 @@ CONFIG_STMMAC_ETH=y
 CONFIG_DWMAC_DWC_QOS_ETH=y
 CONFIG_TI_CPSW=y
 CONFIG_XILINX_EMACLITE=y
-CONFIG_AT803X_PHY=y
 CONFIG_BROADCOM_PHY=y
 CONFIG_ICPLUS_PHY=y
 CONFIG_MARVELL_PHY=y
 CONFIG_MICREL_PHY=y
+CONFIG_AT803X_PHY=y
 CONFIG_ROCKCHIP_PHY=y
 CONFIG_SMSC_PHY=y
 CONFIG_USB_PEGASUS=y
@@ -283,6 +291,7 @@ CONFIG_INPUT_EVDEV=y
 CONFIG_KEYBOARD_QT1070=m
 CONFIG_KEYBOARD_GPIO=y
 CONFIG_KEYBOARD_TEGRA=y
+CONFIG_KEYBOARD_PXA27x=m
 CONFIG_KEYBOARD_SAMSUNG=m
 CONFIG_KEYBOARD_ST_KEYSCAN=y
 CONFIG_KEYBOARD_SPEAR=y
@@ -377,7 +386,6 @@ CONFIG_I2C_DAVINCI=y
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 CONFIG_I2C_DIGICOLOR=m
 CONFIG_I2C_EMEV2=m
-CONFIG_I2C_GPIO=m
 CONFIG_I2C_IMX=y
 CONFIG_I2C_MESON=y
 CONFIG_I2C_MV64XXX=y
@@ -437,6 +445,7 @@ CONFIG_PINCTRL_MSM8X74=y
 CONFIG_PINCTRL_MSM8916=y
 CONFIG_PINCTRL_QCOM_SPMI_PMIC=y
 CONFIG_PINCTRL_QCOM_SSBI_PMIC=y
+CONFIG_GPIO_ASPEED_SGPIO=y
 CONFIG_GPIO_DAVINCI=y
 CONFIG_GPIO_DWAPB=y
 CONFIG_GPIO_EM=y
@@ -467,8 +476,8 @@ CONFIG_BATTERY_BQ27XXX=m
 CONFIG_AXP20X_POWER=m
 CONFIG_BATTERY_MAX17040=m
 CONFIG_BATTERY_MAX17042=m
-CONFIG_CHARGER_GPIO=m
 CONFIG_CHARGER_CPCAP=m
+CONFIG_CHARGER_GPIO=m
 CONFIG_CHARGER_MAX14577=m
 CONFIG_CHARGER_MAX77693=m
 CONFIG_CHARGER_MAX8997=m
@@ -487,16 +496,17 @@ CONFIG_IMX_THERMAL=y
 CONFIG_ROCKCHIP_THERMAL=y
 CONFIG_RCAR_THERMAL=y
 CONFIG_ARMADA_THERMAL=y
+CONFIG_BCM2711_THERMAL=m
 CONFIG_BCM2835_THERMAL=m
 CONFIG_BRCMSTB_THERMAL=m
 CONFIG_ST_THERMAL_MEMMAP=y
 CONFIG_UNIPHIER_THERMAL=y
-CONFIG_WATCHDOG=y
 CONFIG_DA9063_WATCHDOG=m
 CONFIG_XILINX_WATCHDOG=y
 CONFIG_ARM_SP805_WATCHDOG=y
 CONFIG_AT91SAM9X_WATCHDOG=y
 CONFIG_SAMA5D4_WATCHDOG=y
+CONFIG_S3C2410_WATCHDOG=m
 CONFIG_DW_WATCHDOG=y
 CONFIG_DAVINCI_WATCHDOG=m
 CONFIG_ORION_WATCHDOG=y
@@ -525,10 +535,6 @@ CONFIG_MFD_BCM590XX=y
 CONFIG_MFD_AC100=y
 CONFIG_MFD_AXP20X_I2C=y
 CONFIG_MFD_AXP20X_RSB=y
-CONFIG_MFD_CROS_EC=m
-CONFIG_CROS_EC_I2C=m
-CONFIG_CROS_EC_SPI=m
-CONFIG_MFD_CROS_EC_CHARDEV=m
 CONFIG_MFD_DA9063=m
 CONFIG_MFD_MAX14577=y
 CONFIG_MFD_MAX77686=y
@@ -581,6 +587,7 @@ CONFIG_REGULATOR_QCOM_RPM=y
 CONFIG_REGULATOR_QCOM_SMD_RPM=m
 CONFIG_REGULATOR_RK808=y
 CONFIG_REGULATOR_RN5T618=y
+CONFIG_REGULATOR_S2MPA01=m
 CONFIG_REGULATOR_S2MPS11=y
 CONFIG_REGULATOR_S5M8767=y
 CONFIG_REGULATOR_STM32_BOOSTER=m
@@ -605,6 +612,7 @@ CONFIG_VIDEO_V4L2_SUBDEV_API=y
 CONFIG_MEDIA_USB_SUPPORT=y
 CONFIG_USB_VIDEO_CLASS=m
 CONFIG_V4L_PLATFORM_DRIVERS=y
+CONFIG_VIDEO_MMP_CAMERA=m
 CONFIG_VIDEO_ASPEED=m
 CONFIG_VIDEO_STM32_DCMI=m
 CONFIG_VIDEO_SAMSUNG_EXYNOS4_IS=m
@@ -628,7 +636,6 @@ CONFIG_V4L_TEST_DRIVERS=y
 CONFIG_VIDEO_VIVID=m
 CONFIG_CEC_PLATFORM_DRIVERS=y
 CONFIG_VIDEO_SAMSUNG_S5P_CEC=m
-# CONFIG_MEDIA_SUBDRV_AUTOSELECT is not set
 CONFIG_VIDEO_ADV7180=m
 CONFIG_VIDEO_ML86V7667=m
 CONFIG_DRM=y
@@ -681,7 +688,6 @@ CONFIG_FB_EFI=y
 CONFIG_FB_WM8505=y
 CONFIG_FB_SH_MOBILE_LCDC=y
 CONFIG_FB_SIMPLE=y
-CONFIG_LCD_PLATFORM=m
 CONFIG_BACKLIGHT_PWM=y
 CONFIG_BACKLIGHT_AS3711=y
 CONFIG_BACKLIGHT_GPIO=y
@@ -702,6 +708,9 @@ CONFIG_SND_ATMEL_SOC_PDMIC=m
 CONFIG_SND_ATMEL_SOC_I2S=m
 CONFIG_SND_BCM2835_SOC_I2S=m
 CONFIG_SND_SOC_FSL_SAI=m
+CONFIG_SND_MMP_SOC=y
+CONFIG_SND_PXA_SOC_SSP=m
+CONFIG_SND_PXA910_SOC=m
 CONFIG_SND_SOC_ROCKCHIP=m
 CONFIG_SND_SOC_ROCKCHIP_SPDIF=m
 CONFIG_SND_SOC_ROCKCHIP_MAX98090=m
@@ -711,9 +720,12 @@ CONFIG_SND_SOC_SAMSUNG_SMDK_WM8994=m
 CONFIG_SND_SOC_SMDK_WM8994_PCM=m
 CONFIG_SND_SOC_SNOW=m
 CONFIG_SND_SOC_ODROID=m
+CONFIG_SND_SOC_ARNDALE=m
 CONFIG_SND_SOC_SH4_FSI=m
 CONFIG_SND_SOC_RCAR=m
 CONFIG_SND_SOC_STI=m
+CONFIG_SND_SOC_STM32_SAI=m
+CONFIG_SND_SOC_STM32_I2S=m
 CONFIG_SND_SUN4I_CODEC=m
 CONFIG_SND_SOC_TEGRA=m
 CONFIG_SND_SOC_TEGRA20_I2S=m
@@ -727,10 +739,12 @@ CONFIG_SND_SOC_TEGRA_ALC5632=m
 CONFIG_SND_SOC_TEGRA_MAX98090=m
 CONFIG_SND_SOC_AK4642=m
 CONFIG_SND_SOC_CPCAP=m
+CONFIG_SND_SOC_CS42L51_I2C=m
 CONFIG_SND_SOC_SGTL5000=m
 CONFIG_SND_SOC_SPDIF=m
 CONFIG_SND_SOC_STI_SAS=m
 CONFIG_SND_SOC_WM8978=m
+CONFIG_SND_AUDIO_GRAPH_CARD=m
 CONFIG_USB=y
 CONFIG_USB_OTG=y
 CONFIG_USB_XHCI_HCD=y
@@ -740,6 +754,7 @@ CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_STI=y
 CONFIG_USB_EHCI_TEGRA=y
 CONFIG_USB_EHCI_EXYNOS=y
+CONFIG_USB_EHCI_MV=m
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_STI=y
 CONFIG_USB_OHCI_EXYNOS=m
@@ -810,6 +825,7 @@ CONFIG_MMC_SDHCI_DOVE=y
 CONFIG_MMC_SDHCI_TEGRA=y
 CONFIG_MMC_SDHCI_S3C=y
 CONFIG_MMC_SDHCI_PXAV3=y
+CONFIG_MMC_SDHCI_PXAV2=m
 CONFIG_MMC_SDHCI_SPEAR=y
 CONFIG_MMC_SDHCI_S3C_DMA=y
 CONFIG_MMC_SDHCI_BCM_KONA=y
@@ -875,6 +891,7 @@ CONFIG_RTC_DRV_DA9063=m
 CONFIG_RTC_DRV_EFI=m
 CONFIG_RTC_DRV_DIGICOLOR=m
 CONFIG_RTC_DRV_S3C=m
+CONFIG_RTC_DRV_SA1100=m
 CONFIG_RTC_DRV_PL031=y
 CONFIG_RTC_DRV_AT91RM9200=m
 CONFIG_RTC_DRV_AT91SAM9=m
@@ -919,6 +936,9 @@ CONFIG_SERIO_NVEC_PS2=y
 CONFIG_NVEC_POWER=y
 CONFIG_NVEC_PAZ00=y
 CONFIG_STAGING_BOARD=y
+CONFIG_MFD_CROS_EC=m
+CONFIG_CROS_EC_I2C=m
+CONFIG_CROS_EC_SPI=m
 CONFIG_COMMON_CLK_MAX77686=y
 CONFIG_COMMON_CLK_RK808=m
 CONFIG_COMMON_CLK_S2MPS11=m
@@ -933,6 +953,7 @@ CONFIG_BCM2835_MBOX=y
 CONFIG_ROCKCHIP_IOMMU=y
 CONFIG_TEGRA_IOMMU_GART=y
 CONFIG_TEGRA_IOMMU_SMMU=y
+CONFIG_EXYNOS_IOMMU=y
 CONFIG_REMOTEPROC=y
 CONFIG_ST_REMOTEPROC=m
 CONFIG_RPMSG_VIRTIO=m
@@ -967,8 +988,14 @@ CONFIG_ARCH_TEGRA_2x_SOC=y
 CONFIG_ARCH_TEGRA_3x_SOC=y
 CONFIG_ARCH_TEGRA_114_SOC=y
 CONFIG_ARCH_TEGRA_124_SOC=y
+CONFIG_ARM_EXYNOS_BUS_DEVFREQ=m
 CONFIG_ARM_TEGRA_DEVFREQ=m
+CONFIG_DEVFREQ_EVENT_EXYNOS_NOCP=m
+CONFIG_EXTCON_MAX14577=m
+CONFIG_EXTCON_MAX77693=m
+CONFIG_EXTCON_MAX8997=m
 CONFIG_TI_AEMIF=y
+CONFIG_EXYNOS5422_DMC=m
 CONFIG_IIO=y
 CONFIG_IIO_SW_TRIGGER=y
 CONFIG_ASPEED_ADC=m
@@ -978,16 +1005,15 @@ CONFIG_BERLIN2_ADC=m
 CONFIG_CPCAP_ADC=m
 CONFIG_EXYNOS_ADC=m
 CONFIG_MESON_SARADC=m
+CONFIG_ROCKCHIP_SARADC=m
 CONFIG_STM32_ADC_CORE=m
 CONFIG_STM32_ADC=m
 CONFIG_STM32_DFSDM_ADC=m
 CONFIG_VF610_ADC=m
 CONFIG_XILINX_XADC=y
-CONFIG_STM32_LPTIMER_CNT=m
-CONFIG_STM32_DAC=m
-CONFIG_ROCKCHIP_SARADC=m
 CONFIG_IIO_CROS_EC_SENSORS_CORE=m
 CONFIG_IIO_CROS_EC_SENSORS=m
+CONFIG_STM32_DAC=m
 CONFIG_MPU3050_I2C=y
 CONFIG_CM36651=m
 CONFIG_IIO_CROS_EC_LIGHT_PROX=m
@@ -1020,12 +1046,14 @@ CONFIG_PHY_SUN9I_USB=y
 CONFIG_PHY_HIX5HD2_SATA=y
 CONFIG_PHY_BERLIN_SATA=y
 CONFIG_PHY_BERLIN_USB=y
+CONFIG_PHY_MMP3_USB=m
 CONFIG_PHY_CPCAP_USB=m
 CONFIG_PHY_QCOM_APQ8064_SATA=m
 CONFIG_PHY_RCAR_GEN2=m
 CONFIG_PHY_ROCKCHIP_DP=m
 CONFIG_PHY_ROCKCHIP_USB=y
 CONFIG_PHY_SAMSUNG_USB2=m
+CONFIG_PHY_EXYNOS5250_SATA=m
 CONFIG_PHY_UNIPHIER_USB2=y
 CONFIG_PHY_UNIPHIER_USB3=y
 CONFIG_PHY_MIPHY28LP=y
@@ -1036,11 +1064,18 @@ CONFIG_PHY_DM816X_USB=m
 CONFIG_OMAP_USB2=y
 CONFIG_TI_PIPE3=y
 CONFIG_TWL4030_USB=m
-CONFIG_MESON_MX_EFUSE=m
-CONFIG_ROCKCHIP_EFUSE=m
 CONFIG_NVMEM_IMX_OCOTP=y
+CONFIG_ROCKCHIP_EFUSE=m
 CONFIG_NVMEM_SUNXI_SID=y
 CONFIG_NVMEM_VF610_OCOTP=y
+CONFIG_MESON_MX_EFUSE=m
+CONFIG_FSI=m
+CONFIG_FSI_MASTER_GPIO=m
+CONFIG_FSI_MASTER_HUB=m
+CONFIG_FSI_MASTER_ASPEED=m
+CONFIG_FSI_SCOM=m
+CONFIG_FSI_SBEFIFO=m
+CONFIG_FSI_OCC=m
 CONFIG_EXT4_FS=y
 CONFIG_AUTOFS4_FS=y
 CONFIG_MSDOS_FS=y
@@ -1067,14 +1102,15 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_DEV_SUN4I_SS=m
 CONFIG_CRYPTO_DEV_MARVELL_CESA=m
 CONFIG_CRYPTO_DEV_EXYNOS_RNG=m
 CONFIG_CRYPTO_DEV_S5P=m
 CONFIG_CRYPTO_DEV_ATMEL_AES=m
 CONFIG_CRYPTO_DEV_ATMEL_TDES=m
 CONFIG_CRYPTO_DEV_ATMEL_SHA=m
-CONFIG_CRYPTO_DEV_SUN4I_SS=m
 CONFIG_CRYPTO_DEV_ROCKCHIP=m
 CONFIG_CMA_SIZE_MBYTES=64
 CONFIG_PRINTK_TIME=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 40d7f1a4fc45..c32c338f7704 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -92,6 +92,7 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NETFILTER=y
 CONFIG_PHONET=m
+CONFIG_NET_SWITCHDEV=y
 CONFIG_CAN=m
 CONFIG_CAN_C_CAN=m
 CONFIG_CAN_C_CAN_PLATFORM=m
@@ -128,7 +129,6 @@ CONFIG_PCI_ENDPOINT_CONFIGFS=y
 CONFIG_PCI_EPF_TEST=m
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_DMA_CMA=y
 CONFIG_OMAP_OCP2SCP=y
 CONFIG_CONNECTOR=m
 CONFIG_MTD=y
@@ -182,6 +182,7 @@ CONFIG_SMSC911X=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 CONFIG_TI_DAVINCI_EMAC=y
 CONFIG_TI_CPSW=y
+CONFIG_TI_CPSW_SWITCHDEV=y
 CONFIG_TI_CPTS=y
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
@@ -343,18 +344,16 @@ CONFIG_VIDEO_OMAP3=m
 CONFIG_CEC_PLATFORM_DRIVERS=y
 # CONFIG_MEDIA_SUBDRV_AUTOSELECT is not set
 CONFIG_VIDEO_TVP5150=m
+CONFIG_VIDEO_MT9P031=m
 CONFIG_DRM=m
 CONFIG_DRM_OMAP=m
 CONFIG_OMAP5_DSS_HDMI=y
 CONFIG_OMAP2_DSS_SDI=y
 CONFIG_OMAP2_DSS_DSI=y
 CONFIG_DRM_OMAP_ENCODER_OPA362=m
-CONFIG_DRM_OMAP_ENCODER_TFP410=m
 CONFIG_DRM_OMAP_ENCODER_TPD12S015=m
-CONFIG_DRM_OMAP_CONNECTOR_DVI=m
 CONFIG_DRM_OMAP_CONNECTOR_HDMI=m
 CONFIG_DRM_OMAP_CONNECTOR_ANALOG_TV=m
-CONFIG_DRM_OMAP_PANEL_DPI=m
 CONFIG_DRM_OMAP_PANEL_DSI_CM=m
 CONFIG_DRM_TILCDC=m
 CONFIG_DRM_PANEL_SIMPLE=m
@@ -539,11 +538,16 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_SECURITY=y
 CONFIG_CRYPTO_MICHAEL_MIC=y
+CONFIG_CRYPTO_DEV_OMAP=m
+CONFIG_CRYPTO_DEV_OMAP_SHAM=m
+CONFIG_CRYPTO_DEV_OMAP_AES=m
+CONFIG_CRYPTO_DEV_OMAP_DES=m
 CONFIG_CRC_CCITT=y
 CONFIG_CRC_T10DIF=y
 CONFIG_CRC_ITU_T=y
 CONFIG_CRC7=y
 CONFIG_LIBCRC32C=y
+CONFIG_DMA_CMA=y
 CONFIG_FONTS=y
 CONFIG_FONT_8x8=y
 CONFIG_FONT_8x16=y
@@ -552,5 +556,6 @@ CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_SPLIT=y
 CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
 CONFIG_SCHEDSTATS=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig
index 02f1e7b7c8f6..4dd1d8ca4455 100644
--- a/arch/arm/configs/qcom_defconfig
+++ b/arch/arm/configs/qcom_defconfig
@@ -5,7 +5,6 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_CGROUPS=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 # CONFIG_SLUB_DEBUG is not set
@@ -226,6 +225,7 @@ CONFIG_QCOM_WCNSS_PIL=y
 CONFIG_RPMSG_CHAR=y
 CONFIG_RPMSG_QCOM_SMD=y
 CONFIG_QCOM_GSBI=y
+CONFIG_QCOM_OCMEM=y
 CONFIG_QCOM_PM=y
 CONFIG_QCOM_SMEM=y
 CONFIG_QCOM_SMD_RPM=y
diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
index ef785340e6f8..27f6135c4ee7 100644
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -20,6 +20,7 @@ CONFIG_ARCH_AT91=y
 CONFIG_SOC_SAMA5D2=y
 CONFIG_SOC_SAMA5D3=y
 CONFIG_SOC_SAMA5D4=y
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
 CONFIG_AEABI=y
 CONFIG_UACCESS_WITH_MEMCPY=y
 CONFIG_ZBOOT_ROM_TEXT=0x0
diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig
index c6c70355141c..de3830443613 100644
--- a/arch/arm/configs/shmobile_defconfig
+++ b/arch/arm/configs/shmobile_defconfig
@@ -9,7 +9,6 @@ CONFIG_PERF_EVENTS=y
 CONFIG_SLAB=y
 CONFIG_ARCH_RENESAS=y
 CONFIG_PL310_ERRATA_588369=y
-CONFIG_ARM_ERRATA_754322=y
 CONFIG_SMP=y
 CONFIG_SCHED_MC=y
 CONFIG_NR_CPUS=8
@@ -50,7 +49,6 @@ CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_PHYSMAP=y
 CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_M25P80=y
 CONFIG_MTD_SPI_NOR=y
 CONFIG_EEPROM_AT24=y
 CONFIG_BLK_DEV_SD=y
@@ -130,7 +128,6 @@ CONFIG_DRM_SII902X=y
 CONFIG_DRM_I2C_ADV7511=y
 CONFIG_DRM_I2C_ADV7511_AUDIO=y
 CONFIG_FB_SH_MOBILE_LCDC=y
-# CONFIG_LCD_CLASS_DEVICE is not set
 # CONFIG_BACKLIGHT_GENERIC is not set
 CONFIG_BACKLIGHT_PWM=y
 CONFIG_BACKLIGHT_AS3711=y
@@ -215,4 +212,5 @@ CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=64
 CONFIG_PRINTK_TIME=y
 # CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index df433abfcb02..3f5d727efc41 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -56,6 +56,7 @@ CONFIG_SUN4I_EMAC=y
 CONFIG_STMMAC_ETH=y
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_MICREL_PHY=y
 # CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=y
 CONFIG_KEYBOARD_SUN4I_LRADC=y
@@ -150,4 +151,6 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_FS=y
+CONFIG_CRYPTO_DEV_ALLWINNER=y
+CONFIG_CRYPTO_DEV_SUN8I_CE=y
 CONFIG_CRYPTO_DEV_SUN4I_SS=y
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index 8f5c6a5b444c..a27592d3b1fa 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -250,6 +250,8 @@ CONFIG_KEYBOARD_NVEC=y
 CONFIG_SERIO_NVEC_PS2=y
 CONFIG_NVEC_POWER=y
 CONFIG_NVEC_PAZ00=y
+CONFIG_STAGING_MEDIA=y
+CONFIG_TEGRA_VDE=y
 CONFIG_TEGRA_IOMMU_GART=y
 CONFIG_TEGRA_IOMMU_SMMU=y
 CONFIG_ARCH_TEGRA_2x_SOC=y
diff --git a/arch/arm/configs/zx_defconfig b/arch/arm/configs/zx_defconfig
index c4070c19ea6c..4d2ef785ed34 100644
--- a/arch/arm/configs/zx_defconfig
+++ b/arch/arm/configs/zx_defconfig
@@ -11,7 +11,6 @@ CONFIG_RT_GROUP_SCHED=y
 CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 043b0b18bf7e..2674de6ada1f 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -30,7 +30,7 @@ config CRYPTO_SHA1_ARM_NEON
 
 config CRYPTO_SHA1_ARM_CE
 	tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
-	depends on KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
 	select CRYPTO_SHA1_ARM
 	select CRYPTO_HASH
 	help
@@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE
 
 config CRYPTO_SHA2_ARM_CE
 	tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
-	depends on KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
 	select CRYPTO_SHA256_ARM
 	select CRYPTO_HASH
 	help
@@ -81,7 +81,7 @@ config CRYPTO_AES_ARM
 config CRYPTO_AES_ARM_BS
 	tristate "Bit sliced AES using NEON instructions"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_AES
 	select CRYPTO_SIMD
 	help
@@ -96,8 +96,8 @@ config CRYPTO_AES_ARM_BS
 
 config CRYPTO_AES_ARM_CE
 	tristate "Accelerated AES using ARMv8 Crypto Extensions"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_AES
 	select CRYPTO_SIMD
 	help
@@ -106,7 +106,7 @@ config CRYPTO_AES_ARM_CE
 
 config CRYPTO_GHASH_ARM_CE
 	tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
-	depends on KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
 	select CRYPTO_HASH
 	select CRYPTO_CRYPTD
 	select CRYPTO_GF128MUL
@@ -118,23 +118,35 @@ config CRYPTO_GHASH_ARM_CE
 
 config CRYPTO_CRCT10DIF_ARM_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
-	depends on KERNEL_MODE_NEON && CRC_T10DIF
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
+	depends on CRC_T10DIF
 	select CRYPTO_HASH
 
 config CRYPTO_CRC32_ARM_CE
 	tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions"
-	depends on KERNEL_MODE_NEON && CRC32
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
+	depends on CRC32
 	select CRYPTO_HASH
 
 config CRYPTO_CHACHA20_NEON
-	tristate "NEON accelerated ChaCha stream cipher algorithms"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_CHACHA20
+	tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
+	select CRYPTO_SKCIPHER
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_POLY1305_ARM
+	tristate "Accelerated scalar and SIMD Poly1305 hash implementations"
+	select CRYPTO_HASH
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
 
 config CRYPTO_NHPOLY1305_NEON
 	tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_NHPOLY1305
 
+config CRYPTO_CURVE25519_NEON
+	tristate "NEON accelerated Curve25519 scalar multiplication library"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_LIB_CURVE25519_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+
 endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 4180f3a13512..b745c17d356f 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -10,34 +10,16 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
 obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
+obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o
 
-ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
-crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
-
-ifneq ($(crc-obj-y)$(crc-obj-m),)
-ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y)
-ce-obj-y += $(crc-obj-y)
-ce-obj-m += $(crc-obj-m)
-else
-$(warning These CRC Extensions modules need binutils 2.23 or higher)
-$(warning $(crc-obj-y) $(crc-obj-m))
-endif
-endif
-
-ifneq ($(ce-obj-y)$(ce-obj-m),)
-ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
-obj-y += $(ce-obj-y)
-obj-m += $(ce-obj-m)
-else
-$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher)
-$(warning $(ce-obj-y) $(ce-obj-m))
-endif
-endif
+obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
+obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
+obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
+obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
+obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
+obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
 
 aes-arm-y	:= aes-cipher-core.o aes-cipher-glue.o
 aes-arm-bs-y	:= aes-neonbs-core.o aes-neonbs-glue.o
@@ -53,13 +35,19 @@ aes-arm-ce-y	:= aes-ce-core.o aes-ce-glue.o
 ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+chacha-neon-y := chacha-scalar-core.o chacha-glue.o
+chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
+poly1305-arm-y := poly1305-core.o poly1305-glue.o
 nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
+curve25519-neon-y := curve25519-core.o curve25519-glue.o
 
 ifdef REGENERATE_ARM_CRYPTO
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
 
+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl
+	$(call cmd,perl)
+
 $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
 	$(call cmd,perl)
 
@@ -67,4 +55,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
 	$(call cmd,perl)
 endif
 
-clean-files += sha256-core.S sha512-core.S
+clean-files += poly1305-core.S sha256-core.S sha512-core.S
+
+# massage the perlasm code a bit so we only get the NEON routine if we need it
+poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
+poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
+AFLAGS_poly1305-core.o += $(poly1305-aflags-y)
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index cdb1a07e7ad0..b668c97663ec 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -138,14 +138,8 @@ static int ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 			 unsigned int key_len)
 {
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int ret;
-
-	ret = ce_aes_expandkey(ctx, in_key, key_len);
-	if (!ret)
-		return 0;
 
-	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	return -EINVAL;
+	return ce_aes_expandkey(ctx, in_key, key_len);
 }
 
 struct crypto_aes_xts_ctx {
@@ -167,11 +161,7 @@ static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 	if (!ret)
 		ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2],
 				       key_len / 2);
-	if (!ret)
-		return 0;
-
-	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	return -EINVAL;
+	return ret;
 }
 
 static int ecb_encrypt(struct skcipher_request *req)
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
new file mode 100644
index 000000000000..6fdb0ac62b3d
--- /dev/null
+++ b/arch/arm/crypto/chacha-glue.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 Martin Willi
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cputype.h>
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+				      int nrounds);
+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+				       int nrounds);
+asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+
+asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+			     const u32 *state, int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
+
+static inline bool neon_usable(void)
+{
+	return static_branch_likely(&use_neon) && crypto_simd_usable();
+}
+
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+			  unsigned int bytes, int nrounds)
+{
+	u8 buf[CHACHA_BLOCK_SIZE];
+
+	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+		chacha_4block_xor_neon(state, dst, src, nrounds);
+		bytes -= CHACHA_BLOCK_SIZE * 4;
+		src += CHACHA_BLOCK_SIZE * 4;
+		dst += CHACHA_BLOCK_SIZE * 4;
+		state[12] += 4;
+	}
+	while (bytes >= CHACHA_BLOCK_SIZE) {
+		chacha_block_xor_neon(state, dst, src, nrounds);
+		bytes -= CHACHA_BLOCK_SIZE;
+		src += CHACHA_BLOCK_SIZE;
+		dst += CHACHA_BLOCK_SIZE;
+		state[12]++;
+	}
+	if (bytes) {
+		memcpy(buf, src, bytes);
+		chacha_block_xor_neon(state, buf, buf, nrounds);
+		memcpy(dst, buf, bytes);
+	}
+}
+
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
+		hchacha_block_arm(state, stream, nrounds);
+	} else {
+		kernel_neon_begin();
+		hchacha_block_neon(state, stream, nrounds);
+		kernel_neon_end();
+	}
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+		       int nrounds)
+{
+	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
+	    bytes <= CHACHA_BLOCK_SIZE) {
+		chacha_doarm(dst, src, bytes, state, nrounds);
+		state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
+		return;
+	}
+
+	kernel_neon_begin();
+	chacha_doneon(state, dst, src, bytes, nrounds);
+	kernel_neon_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static int chacha_stream_xor(struct skcipher_request *req,
+			     const struct chacha_ctx *ctx, const u8 *iv,
+			     bool neon)
+{
+	struct skcipher_walk walk;
+	u32 state[16];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	chacha_init_generic(state, ctx->key, iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
+			chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
+				     nbytes, state, ctx->nrounds);
+			state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
+		} else {
+			kernel_neon_begin();
+			chacha_doneon(state, walk.dst.virt.addr,
+				      walk.src.virt.addr, nbytes, ctx->nrounds);
+			kernel_neon_end();
+		}
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
+static int do_chacha(struct skcipher_request *req, bool neon)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	return chacha_stream_xor(req, ctx, req->iv, neon);
+}
+
+static int chacha_arm(struct skcipher_request *req)
+{
+	return do_chacha(req, false);
+}
+
+static int chacha_neon(struct skcipher_request *req)
+{
+	return do_chacha(req, neon_usable());
+}
+
+static int do_xchacha(struct skcipher_request *req, bool neon)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct chacha_ctx subctx;
+	u32 state[16];
+	u8 real_iv[16];
+
+	chacha_init_generic(state, ctx->key, req->iv);
+
+	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
+		hchacha_block_arm(state, subctx.key, ctx->nrounds);
+	} else {
+		kernel_neon_begin();
+		hchacha_block_neon(state, subctx.key, ctx->nrounds);
+		kernel_neon_end();
+	}
+	subctx.nrounds = ctx->nrounds;
+
+	memcpy(&real_iv[0], req->iv + 24, 8);
+	memcpy(&real_iv[8], req->iv + 16, 8);
+	return chacha_stream_xor(req, &subctx, real_iv, neon);
+}
+
+static int xchacha_arm(struct skcipher_request *req)
+{
+	return do_xchacha(req, false);
+}
+
+static int xchacha_neon(struct skcipher_request *req)
+{
+	return do_xchacha(req, neon_usable());
+}
+
+static struct skcipher_alg arm_algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-arm",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= chacha_arm,
+		.decrypt		= chacha_arm,
+	}, {
+		.base.cra_name		= "xchacha20",
+		.base.cra_driver_name	= "xchacha20-arm",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= xchacha_arm,
+		.decrypt		= xchacha_arm,
+	}, {
+		.base.cra_name		= "xchacha12",
+		.base.cra_driver_name	= "xchacha12-arm",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha12_setkey,
+		.encrypt		= xchacha_arm,
+		.decrypt		= xchacha_arm,
+	},
+};
+
+static struct skcipher_alg neon_algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= chacha_neon,
+		.decrypt		= chacha_neon,
+	}, {
+		.base.cra_name		= "xchacha20",
+		.base.cra_driver_name	= "xchacha20-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= xchacha_neon,
+		.decrypt		= xchacha_neon,
+	}, {
+		.base.cra_name		= "xchacha12",
+		.base.cra_driver_name	= "xchacha12-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+		.setkey			= chacha12_setkey,
+		.encrypt		= xchacha_neon,
+		.decrypt		= xchacha_neon,
+	}
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+	int err = 0;
+
+	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
+		err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+		if (err)
+			return err;
+	}
+
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
+		int i;
+
+		switch (read_cpuid_part()) {
+		case ARM_CPU_PART_CORTEX_A7:
+		case ARM_CPU_PART_CORTEX_A5:
+			/*
+			 * The Cortex-A7 and Cortex-A5 do not perform well with
+			 * the NEON implementation but do incredibly with the
+			 * scalar one and use less power.
+			 */
+			for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
+				neon_algs[i].base.cra_priority = 0;
+			break;
+		default:
+			static_branch_enable(&use_neon);
+		}
+
+		if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
+			err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+			if (err)
+				crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+		}
+	}
+	return err;
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
+		crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+		if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
+			crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+	}
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-arm");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-arm");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-arm");
+#ifdef CONFIG_KERNEL_MODE_NEON
+MODULE_ALIAS_CRYPTO("chacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha12-neon");
+#endif
diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
deleted file mode 100644
index a8e9b534c8da..000000000000
--- a/arch/arm/crypto/chacha-neon-glue.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
- * including ChaCha20 (RFC7539)
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
-				      int nrounds);
-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
-				       int nrounds);
-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
-
-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
-			  unsigned int bytes, int nrounds)
-{
-	u8 buf[CHACHA_BLOCK_SIZE];
-
-	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
-		chacha_4block_xor_neon(state, dst, src, nrounds);
-		bytes -= CHACHA_BLOCK_SIZE * 4;
-		src += CHACHA_BLOCK_SIZE * 4;
-		dst += CHACHA_BLOCK_SIZE * 4;
-		state[12] += 4;
-	}
-	while (bytes >= CHACHA_BLOCK_SIZE) {
-		chacha_block_xor_neon(state, dst, src, nrounds);
-		bytes -= CHACHA_BLOCK_SIZE;
-		src += CHACHA_BLOCK_SIZE;
-		dst += CHACHA_BLOCK_SIZE;
-		state[12]++;
-	}
-	if (bytes) {
-		memcpy(buf, src, bytes);
-		chacha_block_xor_neon(state, buf, buf, nrounds);
-		memcpy(dst, buf, bytes);
-	}
-}
-
-static int chacha_neon_stream_xor(struct skcipher_request *req,
-				  const struct chacha_ctx *ctx, const u8 *iv)
-{
-	struct skcipher_walk walk;
-	u32 state[16];
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, false);
-
-	crypto_chacha_init(state, ctx, iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		kernel_neon_begin();
-		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
-			      nbytes, ctx->nrounds);
-		kernel_neon_end();
-		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-
-	return err;
-}
-
-static int chacha_neon(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_chacha_crypt(req);
-
-	return chacha_neon_stream_xor(req, ctx, req->iv);
-}
-
-static int xchacha_neon(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct chacha_ctx subctx;
-	u32 state[16];
-	u8 real_iv[16];
-
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_xchacha_crypt(req);
-
-	crypto_chacha_init(state, ctx, req->iv);
-
-	kernel_neon_begin();
-	hchacha_block_neon(state, subctx.key, ctx->nrounds);
-	kernel_neon_end();
-	subctx.nrounds = ctx->nrounds;
-
-	memcpy(&real_iv[0], req->iv + 24, 8);
-	memcpy(&real_iv[8], req->iv + 16, 8);
-	return chacha_neon_stream_xor(req, &subctx, real_iv);
-}
-
-static struct skcipher_alg algs[] = {
-	{
-		.base.cra_name		= "chacha20",
-		.base.cra_driver_name	= "chacha20-neon",
-		.base.cra_priority	= 300,
-		.base.cra_blocksize	= 1,
-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
-		.base.cra_module	= THIS_MODULE,
-
-		.min_keysize		= CHACHA_KEY_SIZE,
-		.max_keysize		= CHACHA_KEY_SIZE,
-		.ivsize			= CHACHA_IV_SIZE,
-		.chunksize		= CHACHA_BLOCK_SIZE,
-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
-		.encrypt		= chacha_neon,
-		.decrypt		= chacha_neon,
-	}, {
-		.base.cra_name		= "xchacha20",
-		.base.cra_driver_name	= "xchacha20-neon",
-		.base.cra_priority	= 300,
-		.base.cra_blocksize	= 1,
-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
-		.base.cra_module	= THIS_MODULE,
-
-		.min_keysize		= CHACHA_KEY_SIZE,
-		.max_keysize		= CHACHA_KEY_SIZE,
-		.ivsize			= XCHACHA_IV_SIZE,
-		.chunksize		= CHACHA_BLOCK_SIZE,
-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
-		.encrypt		= xchacha_neon,
-		.decrypt		= xchacha_neon,
-	}, {
-		.base.cra_name		= "xchacha12",
-		.base.cra_driver_name	= "xchacha12-neon",
-		.base.cra_priority	= 300,
-		.base.cra_blocksize	= 1,
-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
-		.base.cra_module	= THIS_MODULE,
-
-		.min_keysize		= CHACHA_KEY_SIZE,
-		.max_keysize		= CHACHA_KEY_SIZE,
-		.ivsize			= XCHACHA_IV_SIZE,
-		.chunksize		= CHACHA_BLOCK_SIZE,
-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha12_setkey,
-		.encrypt		= xchacha_neon,
-		.decrypt		= xchacha_neon,
-	}
-};
-
-static int __init chacha_simd_mod_init(void)
-{
-	if (!(elf_hwcap & HWCAP_NEON))
-		return -ENODEV;
-
-	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit chacha_simd_mod_fini(void)
-{
-	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-module_init(chacha_simd_mod_init);
-module_exit(chacha_simd_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha20");
-MODULE_ALIAS_CRYPTO("xchacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha12");
-MODULE_ALIAS_CRYPTO("xchacha12-neon");
diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
new file mode 100644
index 000000000000..2985b80a45b5
--- /dev/null
+++ b/arch/arm/crypto/chacha-scalar-core.S
@@ -0,0 +1,460 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Google, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Design notes:
+ *
+ * 16 registers would be needed to hold the state matrix, but only 14 are
+ * available because 'sp' and 'pc' cannot be used.  So we spill the elements
+ * (x8, x9) to the stack and swap them out with (x10, x11).  This adds one
+ * 'ldrd' and one 'strd' instruction per round.
+ *
+ * All rotates are performed using the implicit rotate operand accepted by the
+ * 'add' and 'eor' instructions.  This is faster than using explicit rotate
+ * instructions.  To make this work, we allow the values in the second and last
+ * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
+ * wrong rotation amount.  The rotation amount is then fixed up just in time
+ * when the values are used.  'brot' is the number of bits the values in row 'b'
+ * need to be rotated right to arrive at the correct values, and 'drot'
+ * similarly for row 'd'.  (brot, drot) start out as (0, 0) but we make it such
+ * that they end up as (25, 24) after every round.
+ */
+
+	// ChaCha state registers
+	X0	.req	r0
+	X1	.req	r1
+	X2	.req	r2
+	X3	.req	r3
+	X4	.req	r4
+	X5	.req	r5
+	X6	.req	r6
+	X7	.req	r7
+	X8_X10	.req	r8	// shared by x8 and x10
+	X9_X11	.req	r9	// shared by x9 and x11
+	X12	.req	r10
+	X13	.req	r11
+	X14	.req	r12
+	X15	.req	r14
+
+.macro __rev		out, in,  t0, t1, t2
+.if __LINUX_ARM_ARCH__ >= 6
+	rev		\out, \in
+.else
+	lsl		\t0, \in, #24
+	and		\t1, \in, #0xff00
+	and		\t2, \in, #0xff0000
+	orr		\out, \t0, \in, lsr #24
+	orr		\out, \out, \t1, lsl #8
+	orr		\out, \out, \t2, lsr #8
+.endif
+.endm
+
+.macro _le32_bswap	x,  t0, t1, t2
+#ifdef __ARMEB__
+	__rev		\x, \x,  \t0, \t1, \t2
+#endif
+.endm
+
+.macro _le32_bswap_4x	a, b, c, d,  t0, t1, t2
+	_le32_bswap	\a,  \t0, \t1, \t2
+	_le32_bswap	\b,  \t0, \t1, \t2
+	_le32_bswap	\c,  \t0, \t1, \t2
+	_le32_bswap	\d,  \t0, \t1, \t2
+.endm
+
+.macro __ldrd		a, b, src, offset
+#if __LINUX_ARM_ARCH__ >= 6
+	ldrd		\a, \b, [\src, #\offset]
+#else
+	ldr		\a, [\src, #\offset]
+	ldr		\b, [\src, #\offset + 4]
+#endif
+.endm
+
+.macro __strd		a, b, dst, offset
+#if __LINUX_ARM_ARCH__ >= 6
+	strd		\a, \b, [\dst, #\offset]
+#else
+	str		\a, [\dst, #\offset]
+	str		\b, [\dst, #\offset + 4]
+#endif
+.endm
+
+.macro _halfround	a1, b1, c1, d1,  a2, b2, c2, d2
+
+	// a += b; d ^= a; d = rol(d, 16);
+	add		\a1, \a1, \b1, ror #brot
+	add		\a2, \a2, \b2, ror #brot
+	eor		\d1, \a1, \d1, ror #drot
+	eor		\d2, \a2, \d2, ror #drot
+	// drot == 32 - 16 == 16
+
+	// c += d; b ^= c; b = rol(b, 12);
+	add		\c1, \c1, \d1, ror #16
+	add		\c2, \c2, \d2, ror #16
+	eor		\b1, \c1, \b1, ror #brot
+	eor		\b2, \c2, \b2, ror #brot
+	// brot == 32 - 12 == 20
+
+	// a += b; d ^= a; d = rol(d, 8);
+	add		\a1, \a1, \b1, ror #20
+	add		\a2, \a2, \b2, ror #20
+	eor		\d1, \a1, \d1, ror #16
+	eor		\d2, \a2, \d2, ror #16
+	// drot == 32 - 8 == 24
+
+	// c += d; b ^= c; b = rol(b, 7);
+	add		\c1, \c1, \d1, ror #24
+	add		\c2, \c2, \d2, ror #24
+	eor		\b1, \c1, \b1, ror #20
+	eor		\b2, \c2, \b2, ror #20
+	// brot == 32 - 7 == 25
+.endm
+
+.macro _doubleround
+
+	// column round
+
+	// quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
+	_halfround	X0, X4, X8_X10, X12,  X1, X5, X9_X11, X13
+
+	// save (x8, x9); restore (x10, x11)
+	__strd		X8_X10, X9_X11, sp, 0
+	__ldrd		X8_X10, X9_X11, sp, 8
+
+	// quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
+	_halfround	X2, X6, X8_X10, X14,  X3, X7, X9_X11, X15
+
+	.set brot, 25
+	.set drot, 24
+
+	// diagonal round
+
+	// quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
+	_halfround	X0, X5, X8_X10, X15,  X1, X6, X9_X11, X12
+
+	// save (x10, x11); restore (x8, x9)
+	__strd		X8_X10, X9_X11, sp, 8
+	__ldrd		X8_X10, X9_X11, sp, 0
+
+	// quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
+	_halfround	X2, X7, X8_X10, X13,  X3, X4, X9_X11, X14
+.endm
+
+.macro _chacha_permute	nrounds
+	.set brot, 0
+	.set drot, 0
+	.rept \nrounds / 2
+	 _doubleround
+	.endr
+.endm
+
+.macro _chacha		nrounds
+
+.Lnext_block\@:
+	// Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
+	// Registers contain x0-x9,x12-x15.
+
+	// Do the core ChaCha permutation to update x0-x15.
+	_chacha_permute	\nrounds
+
+	add		sp, #8
+	// Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
+	// Registers contain x0-x9,x12-x15.
+	// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+	// Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
+	push		{X8_X10, X9_X11, X12, X13, X14, X15}
+
+	// Load (OUT, IN, LEN).
+	ldr		r14, [sp, #96]
+	ldr		r12, [sp, #100]
+	ldr		r11, [sp, #104]
+
+	orr		r10, r14, r12
+
+	// Use slow path if fewer than 64 bytes remain.
+	cmp		r11, #64
+	blt		.Lxor_slowpath\@
+
+	// Use slow path if IN and/or OUT isn't 4-byte aligned.  Needed even on
+	// ARMv6+, since ldmia and stmia (used below) still require alignment.
+	tst		r10, #3
+	bne		.Lxor_slowpath\@
+
+	// Fast path: XOR 64 bytes of aligned data.
+
+	// Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
+	// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
+	// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+	// x0-x3
+	__ldrd		r8, r9, sp, 32
+	__ldrd		r10, r11, sp, 40
+	add		X0, X0, r8
+	add		X1, X1, r9
+	add		X2, X2, r10
+	add		X3, X3, r11
+	_le32_bswap_4x	X0, X1, X2, X3,  r8, r9, r10
+	ldmia		r12!, {r8-r11}
+	eor		X0, X0, r8
+	eor		X1, X1, r9
+	eor		X2, X2, r10
+	eor		X3, X3, r11
+	stmia		r14!, {X0-X3}
+
+	// x4-x7
+	__ldrd		r8, r9, sp, 48
+	__ldrd		r10, r11, sp, 56
+	add		X4, r8, X4, ror #brot
+	add		X5, r9, X5, ror #brot
+	ldmia		r12!, {X0-X3}
+	add		X6, r10, X6, ror #brot
+	add		X7, r11, X7, ror #brot
+	_le32_bswap_4x	X4, X5, X6, X7,  r8, r9, r10
+	eor		X4, X4, X0
+	eor		X5, X5, X1
+	eor		X6, X6, X2
+	eor		X7, X7, X3
+	stmia		r14!, {X4-X7}
+
+	// x8-x15
+	pop		{r0-r7}			// (x8-x9,x12-x15,x10-x11)
+	__ldrd		r8, r9, sp, 32
+	__ldrd		r10, r11, sp, 40
+	add		r0, r0, r8		// x8
+	add		r1, r1, r9		// x9
+	add		r6, r6, r10		// x10
+	add		r7, r7, r11		// x11
+	_le32_bswap_4x	r0, r1, r6, r7,  r8, r9, r10
+	ldmia		r12!, {r8-r11}
+	eor		r0, r0, r8		// x8
+	eor		r1, r1, r9		// x9
+	eor		r6, r6, r10		// x10
+	eor		r7, r7, r11		// x11
+	stmia		r14!, {r0,r1,r6,r7}
+	ldmia		r12!, {r0,r1,r6,r7}
+	__ldrd		r8, r9, sp, 48
+	__ldrd		r10, r11, sp, 56
+	add		r2, r8, r2, ror #drot	// x12
+	add		r3, r9, r3, ror #drot	// x13
+	add		r4, r10, r4, ror #drot	// x14
+	add		r5, r11, r5, ror #drot	// x15
+	_le32_bswap_4x	r2, r3, r4, r5,  r9, r10, r11
+	  ldr		r9, [sp, #72]		// load LEN
+	eor		r2, r2, r0		// x12
+	eor		r3, r3, r1		// x13
+	eor		r4, r4, r6		// x14
+	eor		r5, r5, r7		// x15
+	  subs		r9, #64			// decrement and check LEN
+	stmia		r14!, {r2-r5}
+
+	beq		.Ldone\@
+
+.Lprepare_for_next_block\@:
+
+	// Stack: x0-x15 OUT IN LEN
+
+	// Increment block counter (x12)
+	add		r8, #1
+
+	// Store updated (OUT, IN, LEN)
+	str		r14, [sp, #64]
+	str		r12, [sp, #68]
+	str		r9, [sp, #72]
+
+	  mov		r14, sp
+
+	// Store updated block counter (x12)
+	str		r8, [sp, #48]
+
+	  sub		sp, #16
+
+	// Reload state and do next block
+	ldmia		r14!, {r0-r11}		// load x0-x11
+	__strd		r10, r11, sp, 8		// store x10-x11 before state
+	ldmia		r14, {r10-r12,r14}	// load x12-x15
+	b		.Lnext_block\@
+
+.Lxor_slowpath\@:
+	// Slow path: < 64 bytes remaining, or unaligned input or output buffer.
+	// We handle it by storing the 64 bytes of keystream to the stack, then
+	// XOR-ing the needed portion with the data.
+
+	// Allocate keystream buffer
+	sub		sp, #64
+	mov		r14, sp
+
+	// Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
+	// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
+	// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+	// Save keystream for x0-x3
+	__ldrd		r8, r9, sp, 96
+	__ldrd		r10, r11, sp, 104
+	add		X0, X0, r8
+	add		X1, X1, r9
+	add		X2, X2, r10
+	add		X3, X3, r11
+	_le32_bswap_4x	X0, X1, X2, X3,  r8, r9, r10
+	stmia		r14!, {X0-X3}
+
+	// Save keystream for x4-x7
+	__ldrd		r8, r9, sp, 112
+	__ldrd		r10, r11, sp, 120
+	add		X4, r8, X4, ror #brot
+	add		X5, r9, X5, ror #brot
+	add		X6, r10, X6, ror #brot
+	add		X7, r11, X7, ror #brot
+	_le32_bswap_4x	X4, X5, X6, X7,  r8, r9, r10
+	  add		r8, sp, #64
+	stmia		r14!, {X4-X7}
+
+	// Save keystream for x8-x15
+	ldm		r8, {r0-r7}		// (x8-x9,x12-x15,x10-x11)
+	__ldrd		r8, r9, sp, 128
+	__ldrd		r10, r11, sp, 136
+	add		r0, r0, r8		// x8
+	add		r1, r1, r9		// x9
+	add		r6, r6, r10		// x10
+	add		r7, r7, r11		// x11
+	_le32_bswap_4x	r0, r1, r6, r7,  r8, r9, r10
+	stmia		r14!, {r0,r1,r6,r7}
+	__ldrd		r8, r9, sp, 144
+	__ldrd		r10, r11, sp, 152
+	add		r2, r8, r2, ror #drot	// x12
+	add		r3, r9, r3, ror #drot	// x13
+	add		r4, r10, r4, ror #drot	// x14
+	add		r5, r11, r5, ror #drot	// x15
+	_le32_bswap_4x	r2, r3, r4, r5,  r9, r10, r11
+	stmia		r14, {r2-r5}
+
+	// Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
+	// Registers: r8 is block counter, r12 is IN.
+
+	ldr		r9, [sp, #168]		// LEN
+	ldr		r14, [sp, #160]		// OUT
+	cmp		r9, #64
+	  mov		r0, sp
+	movle		r1, r9
+	movgt		r1, #64
+	// r1 is number of bytes to XOR, in range [1, 64]
+
+.if __LINUX_ARM_ARCH__ < 6
+	orr		r2, r12, r14
+	tst		r2, #3			// IN or OUT misaligned?
+	bne		.Lxor_next_byte\@
+.endif
+
+	// XOR a word at a time
+.rept 16
+	subs		r1, #4
+	blt		.Lxor_words_done\@
+	ldr		r2, [r12], #4
+	ldr		r3, [r0], #4
+	eor		r2, r2, r3
+	str		r2, [r14], #4
+.endr
+	b		.Lxor_slowpath_done\@
+.Lxor_words_done\@:
+	ands		r1, r1, #3
+	beq		.Lxor_slowpath_done\@
+
+	// XOR a byte at a time
+.Lxor_next_byte\@:
+	ldrb		r2, [r12], #1
+	ldrb		r3, [r0], #1
+	eor		r2, r2, r3
+	strb		r2, [r14], #1
+	subs		r1, #1
+	bne		.Lxor_next_byte\@
+
+.Lxor_slowpath_done\@:
+	subs		r9, #64
+	add		sp, #96
+	bgt		.Lprepare_for_next_block\@
+
+.Ldone\@:
+.endm	// _chacha
+
+/*
+ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+ *		     const u32 *state, int nrounds);
+ */
+ENTRY(chacha_doarm)
+	cmp		r2, #0			// len == 0?
+	reteq		lr
+
+	ldr		ip, [sp]
+	cmp		ip, #12
+
+	push		{r0-r2,r4-r11,lr}
+
+	// Push state x0-x15 onto stack.
+	// Also store an extra copy of x10-x11 just before the state.
+
+	add		X12, r3, #48
+	ldm		X12, {X12,X13,X14,X15}
+	push		{X12,X13,X14,X15}
+	sub		sp, sp, #64
+
+	__ldrd		X8_X10, X9_X11, r3, 40
+	__strd		X8_X10, X9_X11, sp, 8
+	__strd		X8_X10, X9_X11, sp, 56
+	ldm		r3, {X0-X9_X11}
+	__strd		X0, X1, sp, 16
+	__strd		X2, X3, sp, 24
+	__strd		X4, X5, sp, 32
+	__strd		X6, X7, sp, 40
+	__strd		X8_X10, X9_X11, sp, 48
+
+	beq		1f
+	_chacha		20
+
+0:	add		sp, #76
+	pop		{r4-r11, pc}
+
+1:	_chacha		12
+	b		0b
+ENDPROC(chacha_doarm)
+
+/*
+ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
+ */
+ENTRY(hchacha_block_arm)
+	push		{r1,r4-r11,lr}
+
+	cmp		r2, #12			// ChaCha12 ?
+
+	mov		r14, r0
+	ldmia		r14!, {r0-r11}		// load x0-x11
+	push		{r10-r11}		// store x10-x11 to stack
+	ldm		r14, {r10-r12,r14}	// load x12-x15
+	sub		sp, #8
+
+	beq		1f
+	_chacha_permute	20
+
+	// Skip over (unused0-unused1, x10-x11)
+0:	add		sp, #16
+
+	// Fix up rotations of x12-x15
+	ror		X12, X12, #drot
+	ror		X13, X13, #drot
+	  pop		{r4}			// load 'out'
+	ror		X14, X14, #drot
+	ror		X15, X15, #drot
+
+	// Store (x0-x3,x12-x15) to 'out'
+	stm		r4, {X0,X1,X2,X3,X12,X13,X14,X15}
+
+	pop		{r4-r11,pc}
+
+1:	_chacha_permute	12
+	b		0b
+ENDPROC(hchacha_block_arm)
diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c
index 95592499b9bd..2208445808d7 100644
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -54,10 +54,8 @@ static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
 {
 	u32 *mctx = crypto_shash_ctx(hash);
 
-	if (keylen != sizeof(u32)) {
-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != sizeof(u32))
 		return -EINVAL;
-	}
 	*mctx = le32_to_cpup((__le32 *)key);
 	return 0;
 }
diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S
index 86be258a803f..46c02c518a30 100644
--- a/arch/arm/crypto/crct10dif-ce-core.S
+++ b/arch/arm/crypto/crct10dif-ce-core.S
@@ -72,7 +72,7 @@
 #endif
 
 	.text
-	.arch		armv7-a
+	.arch		armv8-a
 	.fpu		crypto-neon-fp-armv8
 
 	init_crc	.req	r0
diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
new file mode 100644
index 000000000000..be18af52e7dc
--- /dev/null
+++ b/arch/arm/crypto/curve25519-core.S
@@ -0,0 +1,2062 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
+ * manually reworked for use in kernel space.
+ */
+
+#include <linux/linkage.h>
+
+.text
+.fpu neon
+.arch armv7-a
+.align 4
+
+ENTRY(curve25519_neon)
+	push		{r4-r11, lr}
+	mov		ip, sp
+	sub		r3, sp, #704
+	and		r3, r3, #0xfffffff0
+	mov		sp, r3
+	movw		r4, #0
+	movw		r5, #254
+	vmov.i32	q0, #1
+	vshr.u64	q1, q0, #7
+	vshr.u64	q0, q0, #8
+	vmov.i32	d4, #19
+	vmov.i32	d5, #38
+	add		r6, sp, #480
+	vst1.8		{d2-d3}, [r6, : 128]!
+	vst1.8		{d0-d1}, [r6, : 128]!
+	vst1.8		{d4-d5}, [r6, : 128]
+	add		r6, r3, #0
+	vmov.i32	q2, #0
+	vst1.8		{d4-d5}, [r6, : 128]!
+	vst1.8		{d4-d5}, [r6, : 128]!
+	vst1.8		d4, [r6, : 64]
+	add		r6, r3, #0
+	movw		r7, #960
+	sub		r7, r7, #2
+	neg		r7, r7
+	sub		r7, r7, r7, LSL #7
+	str		r7, [r6]
+	add		r6, sp, #672
+	vld1.8		{d4-d5}, [r1]!
+	vld1.8		{d6-d7}, [r1]
+	vst1.8		{d4-d5}, [r6, : 128]!
+	vst1.8		{d6-d7}, [r6, : 128]
+	sub		r1, r6, #16
+	ldrb		r6, [r1]
+	and		r6, r6, #248
+	strb		r6, [r1]
+	ldrb		r6, [r1, #31]
+	and		r6, r6, #127
+	orr		r6, r6, #64
+	strb		r6, [r1, #31]
+	vmov.i64	q2, #0xffffffff
+	vshr.u64	q3, q2, #7
+	vshr.u64	q2, q2, #6
+	vld1.8		{d8}, [r2]
+	vld1.8		{d10}, [r2]
+	add		r2, r2, #6
+	vld1.8		{d12}, [r2]
+	vld1.8		{d14}, [r2]
+	add		r2, r2, #6
+	vld1.8		{d16}, [r2]
+	add		r2, r2, #4
+	vld1.8		{d18}, [r2]
+	vld1.8		{d20}, [r2]
+	add		r2, r2, #6
+	vld1.8		{d22}, [r2]
+	add		r2, r2, #2
+	vld1.8		{d24}, [r2]
+	vld1.8		{d26}, [r2]
+	vshr.u64	q5, q5, #26
+	vshr.u64	q6, q6, #3
+	vshr.u64	q7, q7, #29
+	vshr.u64	q8, q8, #6
+	vshr.u64	q10, q10, #25
+	vshr.u64	q11, q11, #3
+	vshr.u64	q12, q12, #12
+	vshr.u64	q13, q13, #38
+	vand		q4, q4, q2
+	vand		q6, q6, q2
+	vand		q8, q8, q2
+	vand		q10, q10, q2
+	vand		q2, q12, q2
+	vand		q5, q5, q3
+	vand		q7, q7, q3
+	vand		q9, q9, q3
+	vand		q11, q11, q3
+	vand		q3, q13, q3
+	add		r2, r3, #48
+	vadd.i64	q12, q4, q1
+	vadd.i64	q13, q10, q1
+	vshr.s64	q12, q12, #26
+	vshr.s64	q13, q13, #26
+	vadd.i64	q5, q5, q12
+	vshl.i64	q12, q12, #26
+	vadd.i64	q14, q5, q0
+	vadd.i64	q11, q11, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q15, q11, q0
+	vsub.i64	q4, q4, q12
+	vshr.s64	q12, q14, #25
+	vsub.i64	q10, q10, q13
+	vshr.s64	q13, q15, #25
+	vadd.i64	q6, q6, q12
+	vshl.i64	q12, q12, #25
+	vadd.i64	q14, q6, q1
+	vadd.i64	q2, q2, q13
+	vsub.i64	q5, q5, q12
+	vshr.s64	q12, q14, #26
+	vshl.i64	q13, q13, #25
+	vadd.i64	q14, q2, q1
+	vadd.i64	q7, q7, q12
+	vshl.i64	q12, q12, #26
+	vadd.i64	q15, q7, q0
+	vsub.i64	q11, q11, q13
+	vshr.s64	q13, q14, #26
+	vsub.i64	q6, q6, q12
+	vshr.s64	q12, q15, #25
+	vadd.i64	q3, q3, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q14, q3, q0
+	vadd.i64	q8, q8, q12
+	vshl.i64	q12, q12, #25
+	vadd.i64	q15, q8, q1
+	add		r2, r2, #8
+	vsub.i64	q2, q2, q13
+	vshr.s64	q13, q14, #25
+	vsub.i64	q7, q7, q12
+	vshr.s64	q12, q15, #26
+	vadd.i64	q14, q13, q13
+	vadd.i64	q9, q9, q12
+	vtrn.32		d12, d14
+	vshl.i64	q12, q12, #26
+	vtrn.32		d13, d15
+	vadd.i64	q0, q9, q0
+	vadd.i64	q4, q4, q14
+	vst1.8		d12, [r2, : 64]!
+	vshl.i64	q6, q13, #4
+	vsub.i64	q7, q8, q12
+	vshr.s64	q0, q0, #25
+	vadd.i64	q4, q4, q6
+	vadd.i64	q6, q10, q0
+	vshl.i64	q0, q0, #25
+	vadd.i64	q8, q6, q1
+	vadd.i64	q4, q4, q13
+	vshl.i64	q10, q13, #25
+	vadd.i64	q1, q4, q1
+	vsub.i64	q0, q9, q0
+	vshr.s64	q8, q8, #26
+	vsub.i64	q3, q3, q10
+	vtrn.32		d14, d0
+	vshr.s64	q1, q1, #26
+	vtrn.32		d15, d1
+	vadd.i64	q0, q11, q8
+	vst1.8		d14, [r2, : 64]
+	vshl.i64	q7, q8, #26
+	vadd.i64	q5, q5, q1
+	vtrn.32		d4, d6
+	vshl.i64	q1, q1, #26
+	vtrn.32		d5, d7
+	vsub.i64	q3, q6, q7
+	add		r2, r2, #16
+	vsub.i64	q1, q4, q1
+	vst1.8		d4, [r2, : 64]
+	vtrn.32		d6, d0
+	vtrn.32		d7, d1
+	sub		r2, r2, #8
+	vtrn.32		d2, d10
+	vtrn.32		d3, d11
+	vst1.8		d6, [r2, : 64]
+	sub		r2, r2, #24
+	vst1.8		d2, [r2, : 64]
+	add		r2, r3, #96
+	vmov.i32	q0, #0
+	vmov.i64	d2, #0xff
+	vmov.i64	d3, #0
+	vshr.u32	q1, q1, #7
+	vst1.8		{d2-d3}, [r2, : 128]!
+	vst1.8		{d0-d1}, [r2, : 128]!
+	vst1.8		d0, [r2, : 64]
+	add		r2, r3, #144
+	vmov.i32	q0, #0
+	vst1.8		{d0-d1}, [r2, : 128]!
+	vst1.8		{d0-d1}, [r2, : 128]!
+	vst1.8		d0, [r2, : 64]
+	add		r2, r3, #240
+	vmov.i32	q0, #0
+	vmov.i64	d2, #0xff
+	vmov.i64	d3, #0
+	vshr.u32	q1, q1, #7
+	vst1.8		{d2-d3}, [r2, : 128]!
+	vst1.8		{d0-d1}, [r2, : 128]!
+	vst1.8		d0, [r2, : 64]
+	add		r2, r3, #48
+	add		r6, r3, #192
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d4}, [r2, : 64]
+	vst1.8		{d0-d1}, [r6, : 128]!
+	vst1.8		{d2-d3}, [r6, : 128]!
+	vst1.8		d4, [r6, : 64]
+.Lmainloop:
+	mov		r2, r5, LSR #3
+	and		r6, r5, #7
+	ldrb		r2, [r1, r2]
+	mov		r2, r2, LSR r6
+	and		r2, r2, #1
+	str		r5, [sp, #456]
+	eor		r4, r4, r2
+	str		r2, [sp, #460]
+	neg		r2, r4
+	add		r4, r3, #96
+	add		r5, r3, #192
+	add		r6, r3, #144
+	vld1.8		{d8-d9}, [r4, : 128]!
+	add		r7, r3, #240
+	vld1.8		{d10-d11}, [r5, : 128]!
+	veor		q6, q4, q5
+	vld1.8		{d14-d15}, [r6, : 128]!
+	vdup.i32	q8, r2
+	vld1.8		{d18-d19}, [r7, : 128]!
+	veor		q10, q7, q9
+	vld1.8		{d22-d23}, [r4, : 128]!
+	vand		q6, q6, q8
+	vld1.8		{d24-d25}, [r5, : 128]!
+	vand		q10, q10, q8
+	vld1.8		{d26-d27}, [r6, : 128]!
+	veor		q4, q4, q6
+	vld1.8		{d28-d29}, [r7, : 128]!
+	veor		q5, q5, q6
+	vld1.8		{d0}, [r4, : 64]
+	veor		q6, q7, q10
+	vld1.8		{d2}, [r5, : 64]
+	veor		q7, q9, q10
+	vld1.8		{d4}, [r6, : 64]
+	veor		q9, q11, q12
+	vld1.8		{d6}, [r7, : 64]
+	veor		q10, q0, q1
+	sub		r2, r4, #32
+	vand		q9, q9, q8
+	sub		r4, r5, #32
+	vand		q10, q10, q8
+	sub		r5, r6, #32
+	veor		q11, q11, q9
+	sub		r6, r7, #32
+	veor		q0, q0, q10
+	veor		q9, q12, q9
+	veor		q1, q1, q10
+	veor		q10, q13, q14
+	veor		q12, q2, q3
+	vand		q10, q10, q8
+	vand		q8, q12, q8
+	veor		q12, q13, q10
+	veor		q2, q2, q8
+	veor		q10, q14, q10
+	veor		q3, q3, q8
+	vadd.i32	q8, q4, q6
+	vsub.i32	q4, q4, q6
+	vst1.8		{d16-d17}, [r2, : 128]!
+	vadd.i32	q6, q11, q12
+	vst1.8		{d8-d9}, [r5, : 128]!
+	vsub.i32	q4, q11, q12
+	vst1.8		{d12-d13}, [r2, : 128]!
+	vadd.i32	q6, q0, q2
+	vst1.8		{d8-d9}, [r5, : 128]!
+	vsub.i32	q0, q0, q2
+	vst1.8		d12, [r2, : 64]
+	vadd.i32	q2, q5, q7
+	vst1.8		d0, [r5, : 64]
+	vsub.i32	q0, q5, q7
+	vst1.8		{d4-d5}, [r4, : 128]!
+	vadd.i32	q2, q9, q10
+	vst1.8		{d0-d1}, [r6, : 128]!
+	vsub.i32	q0, q9, q10
+	vst1.8		{d4-d5}, [r4, : 128]!
+	vadd.i32	q2, q1, q3
+	vst1.8		{d0-d1}, [r6, : 128]!
+	vsub.i32	q0, q1, q3
+	vst1.8		d4, [r4, : 64]
+	vst1.8		d0, [r6, : 64]
+	add		r2, sp, #512
+	add		r4, r3, #96
+	add		r5, r3, #144
+	vld1.8		{d0-d1}, [r2, : 128]
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vld1.8		{d4-d5}, [r5, : 128]!
+	vzip.i32	q1, q2
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vld1.8		{d8-d9}, [r5, : 128]!
+	vshl.i32	q5, q1, #1
+	vzip.i32	q3, q4
+	vshl.i32	q6, q2, #1
+	vld1.8		{d14}, [r4, : 64]
+	vshl.i32	q8, q3, #1
+	vld1.8		{d15}, [r5, : 64]
+	vshl.i32	q9, q4, #1
+	vmul.i32	d21, d7, d1
+	vtrn.32		d14, d15
+	vmul.i32	q11, q4, q0
+	vmul.i32	q0, q7, q0
+	vmull.s32	q12, d2, d2
+	vmlal.s32	q12, d11, d1
+	vmlal.s32	q12, d12, d0
+	vmlal.s32	q12, d13, d23
+	vmlal.s32	q12, d16, d22
+	vmlal.s32	q12, d7, d21
+	vmull.s32	q10, d2, d11
+	vmlal.s32	q10, d4, d1
+	vmlal.s32	q10, d13, d0
+	vmlal.s32	q10, d6, d23
+	vmlal.s32	q10, d17, d22
+	vmull.s32	q13, d10, d4
+	vmlal.s32	q13, d11, d3
+	vmlal.s32	q13, d13, d1
+	vmlal.s32	q13, d16, d0
+	vmlal.s32	q13, d17, d23
+	vmlal.s32	q13, d8, d22
+	vmull.s32	q1, d10, d5
+	vmlal.s32	q1, d11, d4
+	vmlal.s32	q1, d6, d1
+	vmlal.s32	q1, d17, d0
+	vmlal.s32	q1, d8, d23
+	vmull.s32	q14, d10, d6
+	vmlal.s32	q14, d11, d13
+	vmlal.s32	q14, d4, d4
+	vmlal.s32	q14, d17, d1
+	vmlal.s32	q14, d18, d0
+	vmlal.s32	q14, d9, d23
+	vmull.s32	q11, d10, d7
+	vmlal.s32	q11, d11, d6
+	vmlal.s32	q11, d12, d5
+	vmlal.s32	q11, d8, d1
+	vmlal.s32	q11, d19, d0
+	vmull.s32	q15, d10, d8
+	vmlal.s32	q15, d11, d17
+	vmlal.s32	q15, d12, d6
+	vmlal.s32	q15, d13, d5
+	vmlal.s32	q15, d19, d1
+	vmlal.s32	q15, d14, d0
+	vmull.s32	q2, d10, d9
+	vmlal.s32	q2, d11, d8
+	vmlal.s32	q2, d12, d7
+	vmlal.s32	q2, d13, d6
+	vmlal.s32	q2, d14, d1
+	vmull.s32	q0, d15, d1
+	vmlal.s32	q0, d10, d14
+	vmlal.s32	q0, d11, d19
+	vmlal.s32	q0, d12, d8
+	vmlal.s32	q0, d13, d17
+	vmlal.s32	q0, d6, d6
+	add		r2, sp, #480
+	vld1.8		{d18-d19}, [r2, : 128]!
+	vmull.s32	q3, d16, d7
+	vmlal.s32	q3, d10, d15
+	vmlal.s32	q3, d11, d14
+	vmlal.s32	q3, d12, d9
+	vmlal.s32	q3, d13, d8
+	vld1.8		{d8-d9}, [r2, : 128]
+	vadd.i64	q5, q12, q9
+	vadd.i64	q6, q15, q9
+	vshr.s64	q5, q5, #26
+	vshr.s64	q6, q6, #26
+	vadd.i64	q7, q10, q5
+	vshl.i64	q5, q5, #26
+	vadd.i64	q8, q7, q4
+	vadd.i64	q2, q2, q6
+	vshl.i64	q6, q6, #26
+	vadd.i64	q10, q2, q4
+	vsub.i64	q5, q12, q5
+	vshr.s64	q8, q8, #25
+	vsub.i64	q6, q15, q6
+	vshr.s64	q10, q10, #25
+	vadd.i64	q12, q13, q8
+	vshl.i64	q8, q8, #25
+	vadd.i64	q13, q12, q9
+	vadd.i64	q0, q0, q10
+	vsub.i64	q7, q7, q8
+	vshr.s64	q8, q13, #26
+	vshl.i64	q10, q10, #25
+	vadd.i64	q13, q0, q9
+	vadd.i64	q1, q1, q8
+	vshl.i64	q8, q8, #26
+	vadd.i64	q15, q1, q4
+	vsub.i64	q2, q2, q10
+	vshr.s64	q10, q13, #26
+	vsub.i64	q8, q12, q8
+	vshr.s64	q12, q15, #25
+	vadd.i64	q3, q3, q10
+	vshl.i64	q10, q10, #26
+	vadd.i64	q13, q3, q4
+	vadd.i64	q14, q14, q12
+	add		r2, r3, #288
+	vshl.i64	q12, q12, #25
+	add		r4, r3, #336
+	vadd.i64	q15, q14, q9
+	add		r2, r2, #8
+	vsub.i64	q0, q0, q10
+	add		r4, r4, #8
+	vshr.s64	q10, q13, #25
+	vsub.i64	q1, q1, q12
+	vshr.s64	q12, q15, #26
+	vadd.i64	q13, q10, q10
+	vadd.i64	q11, q11, q12
+	vtrn.32		d16, d2
+	vshl.i64	q12, q12, #26
+	vtrn.32		d17, d3
+	vadd.i64	q1, q11, q4
+	vadd.i64	q4, q5, q13
+	vst1.8		d16, [r2, : 64]!
+	vshl.i64	q5, q10, #4
+	vst1.8		d17, [r4, : 64]!
+	vsub.i64	q8, q14, q12
+	vshr.s64	q1, q1, #25
+	vadd.i64	q4, q4, q5
+	vadd.i64	q5, q6, q1
+	vshl.i64	q1, q1, #25
+	vadd.i64	q6, q5, q9
+	vadd.i64	q4, q4, q10
+	vshl.i64	q10, q10, #25
+	vadd.i64	q9, q4, q9
+	vsub.i64	q1, q11, q1
+	vshr.s64	q6, q6, #26
+	vsub.i64	q3, q3, q10
+	vtrn.32		d16, d2
+	vshr.s64	q9, q9, #26
+	vtrn.32		d17, d3
+	vadd.i64	q1, q2, q6
+	vst1.8		d16, [r2, : 64]
+	vshl.i64	q2, q6, #26
+	vst1.8		d17, [r4, : 64]
+	vadd.i64	q6, q7, q9
+	vtrn.32		d0, d6
+	vshl.i64	q7, q9, #26
+	vtrn.32		d1, d7
+	vsub.i64	q2, q5, q2
+	add		r2, r2, #16
+	vsub.i64	q3, q4, q7
+	vst1.8		d0, [r2, : 64]
+	add		r4, r4, #16
+	vst1.8		d1, [r4, : 64]
+	vtrn.32		d4, d2
+	vtrn.32		d5, d3
+	sub		r2, r2, #8
+	sub		r4, r4, #8
+	vtrn.32		d6, d12
+	vtrn.32		d7, d13
+	vst1.8		d4, [r2, : 64]
+	vst1.8		d5, [r4, : 64]
+	sub		r2, r2, #24
+	sub		r4, r4, #24
+	vst1.8		d6, [r2, : 64]
+	vst1.8		d7, [r4, : 64]
+	add		r2, r3, #240
+	add		r4, r3, #96
+	vld1.8		{d0-d1}, [r4, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vld1.8		{d4}, [r4, : 64]
+	add		r4, r3, #144
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vtrn.32		q0, q3
+	vld1.8		{d8-d9}, [r4, : 128]!
+	vshl.i32	q5, q0, #4
+	vtrn.32		q1, q4
+	vshl.i32	q6, q3, #4
+	vadd.i32	q5, q5, q0
+	vadd.i32	q6, q6, q3
+	vshl.i32	q7, q1, #4
+	vld1.8		{d5}, [r4, : 64]
+	vshl.i32	q8, q4, #4
+	vtrn.32		d4, d5
+	vadd.i32	q7, q7, q1
+	vadd.i32	q8, q8, q4
+	vld1.8		{d18-d19}, [r2, : 128]!
+	vshl.i32	q10, q2, #4
+	vld1.8		{d22-d23}, [r2, : 128]!
+	vadd.i32	q10, q10, q2
+	vld1.8		{d24}, [r2, : 64]
+	vadd.i32	q5, q5, q0
+	add		r2, r3, #192
+	vld1.8		{d26-d27}, [r2, : 128]!
+	vadd.i32	q6, q6, q3
+	vld1.8		{d28-d29}, [r2, : 128]!
+	vadd.i32	q8, q8, q4
+	vld1.8		{d25}, [r2, : 64]
+	vadd.i32	q10, q10, q2
+	vtrn.32		q9, q13
+	vadd.i32	q7, q7, q1
+	vadd.i32	q5, q5, q0
+	vtrn.32		q11, q14
+	vadd.i32	q6, q6, q3
+	add		r2, sp, #528
+	vadd.i32	q10, q10, q2
+	vtrn.32		d24, d25
+	vst1.8		{d12-d13}, [r2, : 128]!
+	vshl.i32	q6, q13, #1
+	vst1.8		{d20-d21}, [r2, : 128]!
+	vshl.i32	q10, q14, #1
+	vst1.8		{d12-d13}, [r2, : 128]!
+	vshl.i32	q15, q12, #1
+	vadd.i32	q8, q8, q4
+	vext.32		d10, d31, d30, #0
+	vadd.i32	q7, q7, q1
+	vst1.8		{d16-d17}, [r2, : 128]!
+	vmull.s32	q8, d18, d5
+	vmlal.s32	q8, d26, d4
+	vmlal.s32	q8, d19, d9
+	vmlal.s32	q8, d27, d3
+	vmlal.s32	q8, d22, d8
+	vmlal.s32	q8, d28, d2
+	vmlal.s32	q8, d23, d7
+	vmlal.s32	q8, d29, d1
+	vmlal.s32	q8, d24, d6
+	vmlal.s32	q8, d25, d0
+	vst1.8		{d14-d15}, [r2, : 128]!
+	vmull.s32	q2, d18, d4
+	vmlal.s32	q2, d12, d9
+	vmlal.s32	q2, d13, d8
+	vmlal.s32	q2, d19, d3
+	vmlal.s32	q2, d22, d2
+	vmlal.s32	q2, d23, d1
+	vmlal.s32	q2, d24, d0
+	vst1.8		{d20-d21}, [r2, : 128]!
+	vmull.s32	q7, d18, d9
+	vmlal.s32	q7, d26, d3
+	vmlal.s32	q7, d19, d8
+	vmlal.s32	q7, d27, d2
+	vmlal.s32	q7, d22, d7
+	vmlal.s32	q7, d28, d1
+	vmlal.s32	q7, d23, d6
+	vmlal.s32	q7, d29, d0
+	vst1.8		{d10-d11}, [r2, : 128]!
+	vmull.s32	q5, d18, d3
+	vmlal.s32	q5, d19, d2
+	vmlal.s32	q5, d22, d1
+	vmlal.s32	q5, d23, d0
+	vmlal.s32	q5, d12, d8
+	vst1.8		{d16-d17}, [r2, : 128]
+	vmull.s32	q4, d18, d8
+	vmlal.s32	q4, d26, d2
+	vmlal.s32	q4, d19, d7
+	vmlal.s32	q4, d27, d1
+	vmlal.s32	q4, d22, d6
+	vmlal.s32	q4, d28, d0
+	vmull.s32	q8, d18, d7
+	vmlal.s32	q8, d26, d1
+	vmlal.s32	q8, d19, d6
+	vmlal.s32	q8, d27, d0
+	add		r2, sp, #544
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q7, d24, d21
+	vmlal.s32	q7, d25, d20
+	vmlal.s32	q4, d23, d21
+	vmlal.s32	q4, d29, d20
+	vmlal.s32	q8, d22, d21
+	vmlal.s32	q8, d28, d20
+	vmlal.s32	q5, d24, d20
+	vst1.8		{d14-d15}, [r2, : 128]
+	vmull.s32	q7, d18, d6
+	vmlal.s32	q7, d26, d0
+	add		r2, sp, #624
+	vld1.8		{d30-d31}, [r2, : 128]
+	vmlal.s32	q2, d30, d21
+	vmlal.s32	q7, d19, d21
+	vmlal.s32	q7, d27, d20
+	add		r2, sp, #592
+	vld1.8		{d26-d27}, [r2, : 128]
+	vmlal.s32	q4, d25, d27
+	vmlal.s32	q8, d29, d27
+	vmlal.s32	q8, d25, d26
+	vmlal.s32	q7, d28, d27
+	vmlal.s32	q7, d29, d26
+	add		r2, sp, #576
+	vld1.8		{d28-d29}, [r2, : 128]
+	vmlal.s32	q4, d24, d29
+	vmlal.s32	q8, d23, d29
+	vmlal.s32	q8, d24, d28
+	vmlal.s32	q7, d22, d29
+	vmlal.s32	q7, d23, d28
+	vst1.8		{d8-d9}, [r2, : 128]
+	add		r2, sp, #528
+	vld1.8		{d8-d9}, [r2, : 128]
+	vmlal.s32	q7, d24, d9
+	vmlal.s32	q7, d25, d31
+	vmull.s32	q1, d18, d2
+	vmlal.s32	q1, d19, d1
+	vmlal.s32	q1, d22, d0
+	vmlal.s32	q1, d24, d27
+	vmlal.s32	q1, d23, d20
+	vmlal.s32	q1, d12, d7
+	vmlal.s32	q1, d13, d6
+	vmull.s32	q6, d18, d1
+	vmlal.s32	q6, d19, d0
+	vmlal.s32	q6, d23, d27
+	vmlal.s32	q6, d22, d20
+	vmlal.s32	q6, d24, d26
+	vmull.s32	q0, d18, d0
+	vmlal.s32	q0, d22, d27
+	vmlal.s32	q0, d23, d26
+	vmlal.s32	q0, d24, d31
+	vmlal.s32	q0, d19, d20
+	add		r2, sp, #608
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q2, d18, d7
+	vmlal.s32	q5, d18, d6
+	vmlal.s32	q1, d18, d21
+	vmlal.s32	q0, d18, d28
+	vmlal.s32	q6, d18, d29
+	vmlal.s32	q2, d19, d6
+	vmlal.s32	q5, d19, d21
+	vmlal.s32	q1, d19, d29
+	vmlal.s32	q0, d19, d9
+	vmlal.s32	q6, d19, d28
+	add		r2, sp, #560
+	vld1.8		{d18-d19}, [r2, : 128]
+	add		r2, sp, #480
+	vld1.8		{d22-d23}, [r2, : 128]
+	vmlal.s32	q5, d19, d7
+	vmlal.s32	q0, d18, d21
+	vmlal.s32	q0, d19, d29
+	vmlal.s32	q6, d18, d6
+	add		r2, sp, #496
+	vld1.8		{d6-d7}, [r2, : 128]
+	vmlal.s32	q6, d19, d21
+	add		r2, sp, #544
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q0, d30, d8
+	add		r2, sp, #640
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q5, d30, d29
+	add		r2, sp, #576
+	vld1.8		{d24-d25}, [r2, : 128]
+	vmlal.s32	q1, d30, d28
+	vadd.i64	q13, q0, q11
+	vadd.i64	q14, q5, q11
+	vmlal.s32	q6, d30, d9
+	vshr.s64	q4, q13, #26
+	vshr.s64	q13, q14, #26
+	vadd.i64	q7, q7, q4
+	vshl.i64	q4, q4, #26
+	vadd.i64	q14, q7, q3
+	vadd.i64	q9, q9, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q15, q9, q3
+	vsub.i64	q0, q0, q4
+	vshr.s64	q4, q14, #25
+	vsub.i64	q5, q5, q13
+	vshr.s64	q13, q15, #25
+	vadd.i64	q6, q6, q4
+	vshl.i64	q4, q4, #25
+	vadd.i64	q14, q6, q11
+	vadd.i64	q2, q2, q13
+	vsub.i64	q4, q7, q4
+	vshr.s64	q7, q14, #26
+	vshl.i64	q13, q13, #25
+	vadd.i64	q14, q2, q11
+	vadd.i64	q8, q8, q7
+	vshl.i64	q7, q7, #26
+	vadd.i64	q15, q8, q3
+	vsub.i64	q9, q9, q13
+	vshr.s64	q13, q14, #26
+	vsub.i64	q6, q6, q7
+	vshr.s64	q7, q15, #25
+	vadd.i64	q10, q10, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q14, q10, q3
+	vadd.i64	q1, q1, q7
+	add		r2, r3, #144
+	vshl.i64	q7, q7, #25
+	add		r4, r3, #96
+	vadd.i64	q15, q1, q11
+	add		r2, r2, #8
+	vsub.i64	q2, q2, q13
+	add		r4, r4, #8
+	vshr.s64	q13, q14, #25
+	vsub.i64	q7, q8, q7
+	vshr.s64	q8, q15, #26
+	vadd.i64	q14, q13, q13
+	vadd.i64	q12, q12, q8
+	vtrn.32		d12, d14
+	vshl.i64	q8, q8, #26
+	vtrn.32		d13, d15
+	vadd.i64	q3, q12, q3
+	vadd.i64	q0, q0, q14
+	vst1.8		d12, [r2, : 64]!
+	vshl.i64	q7, q13, #4
+	vst1.8		d13, [r4, : 64]!
+	vsub.i64	q1, q1, q8
+	vshr.s64	q3, q3, #25
+	vadd.i64	q0, q0, q7
+	vadd.i64	q5, q5, q3
+	vshl.i64	q3, q3, #25
+	vadd.i64	q6, q5, q11
+	vadd.i64	q0, q0, q13
+	vshl.i64	q7, q13, #25
+	vadd.i64	q8, q0, q11
+	vsub.i64	q3, q12, q3
+	vshr.s64	q6, q6, #26
+	vsub.i64	q7, q10, q7
+	vtrn.32		d2, d6
+	vshr.s64	q8, q8, #26
+	vtrn.32		d3, d7
+	vadd.i64	q3, q9, q6
+	vst1.8		d2, [r2, : 64]
+	vshl.i64	q6, q6, #26
+	vst1.8		d3, [r4, : 64]
+	vadd.i64	q1, q4, q8
+	vtrn.32		d4, d14
+	vshl.i64	q4, q8, #26
+	vtrn.32		d5, d15
+	vsub.i64	q5, q5, q6
+	add		r2, r2, #16
+	vsub.i64	q0, q0, q4
+	vst1.8		d4, [r2, : 64]
+	add		r4, r4, #16
+	vst1.8		d5, [r4, : 64]
+	vtrn.32		d10, d6
+	vtrn.32		d11, d7
+	sub		r2, r2, #8
+	sub		r4, r4, #8
+	vtrn.32		d0, d2
+	vtrn.32		d1, d3
+	vst1.8		d10, [r2, : 64]
+	vst1.8		d11, [r4, : 64]
+	sub		r2, r2, #24
+	sub		r4, r4, #24
+	vst1.8		d0, [r2, : 64]
+	vst1.8		d1, [r4, : 64]
+	add		r2, r3, #288
+	add		r4, r3, #336
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vsub.i32	q0, q0, q1
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d4-d5}, [r4, : 128]!
+	vsub.i32	q1, q1, q2
+	add		r5, r3, #240
+	vld1.8		{d4}, [r2, : 64]
+	vld1.8		{d6}, [r4, : 64]
+	vsub.i32	q2, q2, q3
+	vst1.8		{d0-d1}, [r5, : 128]!
+	vst1.8		{d2-d3}, [r5, : 128]!
+	vst1.8		d4, [r5, : 64]
+	add		r2, r3, #144
+	add		r4, r3, #96
+	add		r5, r3, #144
+	add		r6, r3, #192
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vsub.i32	q2, q0, q1
+	vadd.i32	q0, q0, q1
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vsub.i32	q4, q1, q3
+	vadd.i32	q1, q1, q3
+	vld1.8		{d6}, [r2, : 64]
+	vld1.8		{d10}, [r4, : 64]
+	vsub.i32	q6, q3, q5
+	vadd.i32	q3, q3, q5
+	vst1.8		{d4-d5}, [r5, : 128]!
+	vst1.8		{d0-d1}, [r6, : 128]!
+	vst1.8		{d8-d9}, [r5, : 128]!
+	vst1.8		{d2-d3}, [r6, : 128]!
+	vst1.8		d12, [r5, : 64]
+	vst1.8		d6, [r6, : 64]
+	add		r2, r3, #0
+	add		r4, r3, #240
+	vld1.8		{d0-d1}, [r4, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vld1.8		{d4}, [r4, : 64]
+	add		r4, r3, #336
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vtrn.32		q0, q3
+	vld1.8		{d8-d9}, [r4, : 128]!
+	vshl.i32	q5, q0, #4
+	vtrn.32		q1, q4
+	vshl.i32	q6, q3, #4
+	vadd.i32	q5, q5, q0
+	vadd.i32	q6, q6, q3
+	vshl.i32	q7, q1, #4
+	vld1.8		{d5}, [r4, : 64]
+	vshl.i32	q8, q4, #4
+	vtrn.32		d4, d5
+	vadd.i32	q7, q7, q1
+	vadd.i32	q8, q8, q4
+	vld1.8		{d18-d19}, [r2, : 128]!
+	vshl.i32	q10, q2, #4
+	vld1.8		{d22-d23}, [r2, : 128]!
+	vadd.i32	q10, q10, q2
+	vld1.8		{d24}, [r2, : 64]
+	vadd.i32	q5, q5, q0
+	add		r2, r3, #288
+	vld1.8		{d26-d27}, [r2, : 128]!
+	vadd.i32	q6, q6, q3
+	vld1.8		{d28-d29}, [r2, : 128]!
+	vadd.i32	q8, q8, q4
+	vld1.8		{d25}, [r2, : 64]
+	vadd.i32	q10, q10, q2
+	vtrn.32		q9, q13
+	vadd.i32	q7, q7, q1
+	vadd.i32	q5, q5, q0
+	vtrn.32		q11, q14
+	vadd.i32	q6, q6, q3
+	add		r2, sp, #528
+	vadd.i32	q10, q10, q2
+	vtrn.32		d24, d25
+	vst1.8		{d12-d13}, [r2, : 128]!
+	vshl.i32	q6, q13, #1
+	vst1.8		{d20-d21}, [r2, : 128]!
+	vshl.i32	q10, q14, #1
+	vst1.8		{d12-d13}, [r2, : 128]!
+	vshl.i32	q15, q12, #1
+	vadd.i32	q8, q8, q4
+	vext.32		d10, d31, d30, #0
+	vadd.i32	q7, q7, q1
+	vst1.8		{d16-d17}, [r2, : 128]!
+	vmull.s32	q8, d18, d5
+	vmlal.s32	q8, d26, d4
+	vmlal.s32	q8, d19, d9
+	vmlal.s32	q8, d27, d3
+	vmlal.s32	q8, d22, d8
+	vmlal.s32	q8, d28, d2
+	vmlal.s32	q8, d23, d7
+	vmlal.s32	q8, d29, d1
+	vmlal.s32	q8, d24, d6
+	vmlal.s32	q8, d25, d0
+	vst1.8		{d14-d15}, [r2, : 128]!
+	vmull.s32	q2, d18, d4
+	vmlal.s32	q2, d12, d9
+	vmlal.s32	q2, d13, d8
+	vmlal.s32	q2, d19, d3
+	vmlal.s32	q2, d22, d2
+	vmlal.s32	q2, d23, d1
+	vmlal.s32	q2, d24, d0
+	vst1.8		{d20-d21}, [r2, : 128]!
+	vmull.s32	q7, d18, d9
+	vmlal.s32	q7, d26, d3
+	vmlal.s32	q7, d19, d8
+	vmlal.s32	q7, d27, d2
+	vmlal.s32	q7, d22, d7
+	vmlal.s32	q7, d28, d1
+	vmlal.s32	q7, d23, d6
+	vmlal.s32	q7, d29, d0
+	vst1.8		{d10-d11}, [r2, : 128]!
+	vmull.s32	q5, d18, d3
+	vmlal.s32	q5, d19, d2
+	vmlal.s32	q5, d22, d1
+	vmlal.s32	q5, d23, d0
+	vmlal.s32	q5, d12, d8
+	vst1.8		{d16-d17}, [r2, : 128]!
+	vmull.s32	q4, d18, d8
+	vmlal.s32	q4, d26, d2
+	vmlal.s32	q4, d19, d7
+	vmlal.s32	q4, d27, d1
+	vmlal.s32	q4, d22, d6
+	vmlal.s32	q4, d28, d0
+	vmull.s32	q8, d18, d7
+	vmlal.s32	q8, d26, d1
+	vmlal.s32	q8, d19, d6
+	vmlal.s32	q8, d27, d0
+	add		r2, sp, #544
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q7, d24, d21
+	vmlal.s32	q7, d25, d20
+	vmlal.s32	q4, d23, d21
+	vmlal.s32	q4, d29, d20
+	vmlal.s32	q8, d22, d21
+	vmlal.s32	q8, d28, d20
+	vmlal.s32	q5, d24, d20
+	vst1.8		{d14-d15}, [r2, : 128]
+	vmull.s32	q7, d18, d6
+	vmlal.s32	q7, d26, d0
+	add		r2, sp, #624
+	vld1.8		{d30-d31}, [r2, : 128]
+	vmlal.s32	q2, d30, d21
+	vmlal.s32	q7, d19, d21
+	vmlal.s32	q7, d27, d20
+	add		r2, sp, #592
+	vld1.8		{d26-d27}, [r2, : 128]
+	vmlal.s32	q4, d25, d27
+	vmlal.s32	q8, d29, d27
+	vmlal.s32	q8, d25, d26
+	vmlal.s32	q7, d28, d27
+	vmlal.s32	q7, d29, d26
+	add		r2, sp, #576
+	vld1.8		{d28-d29}, [r2, : 128]
+	vmlal.s32	q4, d24, d29
+	vmlal.s32	q8, d23, d29
+	vmlal.s32	q8, d24, d28
+	vmlal.s32	q7, d22, d29
+	vmlal.s32	q7, d23, d28
+	vst1.8		{d8-d9}, [r2, : 128]
+	add		r2, sp, #528
+	vld1.8		{d8-d9}, [r2, : 128]
+	vmlal.s32	q7, d24, d9
+	vmlal.s32	q7, d25, d31
+	vmull.s32	q1, d18, d2
+	vmlal.s32	q1, d19, d1
+	vmlal.s32	q1, d22, d0
+	vmlal.s32	q1, d24, d27
+	vmlal.s32	q1, d23, d20
+	vmlal.s32	q1, d12, d7
+	vmlal.s32	q1, d13, d6
+	vmull.s32	q6, d18, d1
+	vmlal.s32	q6, d19, d0
+	vmlal.s32	q6, d23, d27
+	vmlal.s32	q6, d22, d20
+	vmlal.s32	q6, d24, d26
+	vmull.s32	q0, d18, d0
+	vmlal.s32	q0, d22, d27
+	vmlal.s32	q0, d23, d26
+	vmlal.s32	q0, d24, d31
+	vmlal.s32	q0, d19, d20
+	add		r2, sp, #608
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q2, d18, d7
+	vmlal.s32	q5, d18, d6
+	vmlal.s32	q1, d18, d21
+	vmlal.s32	q0, d18, d28
+	vmlal.s32	q6, d18, d29
+	vmlal.s32	q2, d19, d6
+	vmlal.s32	q5, d19, d21
+	vmlal.s32	q1, d19, d29
+	vmlal.s32	q0, d19, d9
+	vmlal.s32	q6, d19, d28
+	add		r2, sp, #560
+	vld1.8		{d18-d19}, [r2, : 128]
+	add		r2, sp, #480
+	vld1.8		{d22-d23}, [r2, : 128]
+	vmlal.s32	q5, d19, d7
+	vmlal.s32	q0, d18, d21
+	vmlal.s32	q0, d19, d29
+	vmlal.s32	q6, d18, d6
+	add		r2, sp, #496
+	vld1.8		{d6-d7}, [r2, : 128]
+	vmlal.s32	q6, d19, d21
+	add		r2, sp, #544
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q0, d30, d8
+	add		r2, sp, #640
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q5, d30, d29
+	add		r2, sp, #576
+	vld1.8		{d24-d25}, [r2, : 128]
+	vmlal.s32	q1, d30, d28
+	vadd.i64	q13, q0, q11
+	vadd.i64	q14, q5, q11
+	vmlal.s32	q6, d30, d9
+	vshr.s64	q4, q13, #26
+	vshr.s64	q13, q14, #26
+	vadd.i64	q7, q7, q4
+	vshl.i64	q4, q4, #26
+	vadd.i64	q14, q7, q3
+	vadd.i64	q9, q9, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q15, q9, q3
+	vsub.i64	q0, q0, q4
+	vshr.s64	q4, q14, #25
+	vsub.i64	q5, q5, q13
+	vshr.s64	q13, q15, #25
+	vadd.i64	q6, q6, q4
+	vshl.i64	q4, q4, #25
+	vadd.i64	q14, q6, q11
+	vadd.i64	q2, q2, q13
+	vsub.i64	q4, q7, q4
+	vshr.s64	q7, q14, #26
+	vshl.i64	q13, q13, #25
+	vadd.i64	q14, q2, q11
+	vadd.i64	q8, q8, q7
+	vshl.i64	q7, q7, #26
+	vadd.i64	q15, q8, q3
+	vsub.i64	q9, q9, q13
+	vshr.s64	q13, q14, #26
+	vsub.i64	q6, q6, q7
+	vshr.s64	q7, q15, #25
+	vadd.i64	q10, q10, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q14, q10, q3
+	vadd.i64	q1, q1, q7
+	add		r2, r3, #288
+	vshl.i64	q7, q7, #25
+	add		r4, r3, #96
+	vadd.i64	q15, q1, q11
+	add		r2, r2, #8
+	vsub.i64	q2, q2, q13
+	add		r4, r4, #8
+	vshr.s64	q13, q14, #25
+	vsub.i64	q7, q8, q7
+	vshr.s64	q8, q15, #26
+	vadd.i64	q14, q13, q13
+	vadd.i64	q12, q12, q8
+	vtrn.32		d12, d14
+	vshl.i64	q8, q8, #26
+	vtrn.32		d13, d15
+	vadd.i64	q3, q12, q3
+	vadd.i64	q0, q0, q14
+	vst1.8		d12, [r2, : 64]!
+	vshl.i64	q7, q13, #4
+	vst1.8		d13, [r4, : 64]!
+	vsub.i64	q1, q1, q8
+	vshr.s64	q3, q3, #25
+	vadd.i64	q0, q0, q7
+	vadd.i64	q5, q5, q3
+	vshl.i64	q3, q3, #25
+	vadd.i64	q6, q5, q11
+	vadd.i64	q0, q0, q13
+	vshl.i64	q7, q13, #25
+	vadd.i64	q8, q0, q11
+	vsub.i64	q3, q12, q3
+	vshr.s64	q6, q6, #26
+	vsub.i64	q7, q10, q7
+	vtrn.32		d2, d6
+	vshr.s64	q8, q8, #26
+	vtrn.32		d3, d7
+	vadd.i64	q3, q9, q6
+	vst1.8		d2, [r2, : 64]
+	vshl.i64	q6, q6, #26
+	vst1.8		d3, [r4, : 64]
+	vadd.i64	q1, q4, q8
+	vtrn.32		d4, d14
+	vshl.i64	q4, q8, #26
+	vtrn.32		d5, d15
+	vsub.i64	q5, q5, q6
+	add		r2, r2, #16
+	vsub.i64	q0, q0, q4
+	vst1.8		d4, [r2, : 64]
+	add		r4, r4, #16
+	vst1.8		d5, [r4, : 64]
+	vtrn.32		d10, d6
+	vtrn.32		d11, d7
+	sub		r2, r2, #8
+	sub		r4, r4, #8
+	vtrn.32		d0, d2
+	vtrn.32		d1, d3
+	vst1.8		d10, [r2, : 64]
+	vst1.8		d11, [r4, : 64]
+	sub		r2, r2, #24
+	sub		r4, r4, #24
+	vst1.8		d0, [r2, : 64]
+	vst1.8		d1, [r4, : 64]
+	add		r2, sp, #512
+	add		r4, r3, #144
+	add		r5, r3, #192
+	vld1.8		{d0-d1}, [r2, : 128]
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vld1.8		{d4-d5}, [r5, : 128]!
+	vzip.i32	q1, q2
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vld1.8		{d8-d9}, [r5, : 128]!
+	vshl.i32	q5, q1, #1
+	vzip.i32	q3, q4
+	vshl.i32	q6, q2, #1
+	vld1.8		{d14}, [r4, : 64]
+	vshl.i32	q8, q3, #1
+	vld1.8		{d15}, [r5, : 64]
+	vshl.i32	q9, q4, #1
+	vmul.i32	d21, d7, d1
+	vtrn.32		d14, d15
+	vmul.i32	q11, q4, q0
+	vmul.i32	q0, q7, q0
+	vmull.s32	q12, d2, d2
+	vmlal.s32	q12, d11, d1
+	vmlal.s32	q12, d12, d0
+	vmlal.s32	q12, d13, d23
+	vmlal.s32	q12, d16, d22
+	vmlal.s32	q12, d7, d21
+	vmull.s32	q10, d2, d11
+	vmlal.s32	q10, d4, d1
+	vmlal.s32	q10, d13, d0
+	vmlal.s32	q10, d6, d23
+	vmlal.s32	q10, d17, d22
+	vmull.s32	q13, d10, d4
+	vmlal.s32	q13, d11, d3
+	vmlal.s32	q13, d13, d1
+	vmlal.s32	q13, d16, d0
+	vmlal.s32	q13, d17, d23
+	vmlal.s32	q13, d8, d22
+	vmull.s32	q1, d10, d5
+	vmlal.s32	q1, d11, d4
+	vmlal.s32	q1, d6, d1
+	vmlal.s32	q1, d17, d0
+	vmlal.s32	q1, d8, d23
+	vmull.s32	q14, d10, d6
+	vmlal.s32	q14, d11, d13
+	vmlal.s32	q14, d4, d4
+	vmlal.s32	q14, d17, d1
+	vmlal.s32	q14, d18, d0
+	vmlal.s32	q14, d9, d23
+	vmull.s32	q11, d10, d7
+	vmlal.s32	q11, d11, d6
+	vmlal.s32	q11, d12, d5
+	vmlal.s32	q11, d8, d1
+	vmlal.s32	q11, d19, d0
+	vmull.s32	q15, d10, d8
+	vmlal.s32	q15, d11, d17
+	vmlal.s32	q15, d12, d6
+	vmlal.s32	q15, d13, d5
+	vmlal.s32	q15, d19, d1
+	vmlal.s32	q15, d14, d0
+	vmull.s32	q2, d10, d9
+	vmlal.s32	q2, d11, d8
+	vmlal.s32	q2, d12, d7
+	vmlal.s32	q2, d13, d6
+	vmlal.s32	q2, d14, d1
+	vmull.s32	q0, d15, d1
+	vmlal.s32	q0, d10, d14
+	vmlal.s32	q0, d11, d19
+	vmlal.s32	q0, d12, d8
+	vmlal.s32	q0, d13, d17
+	vmlal.s32	q0, d6, d6
+	add		r2, sp, #480
+	vld1.8		{d18-d19}, [r2, : 128]!
+	vmull.s32	q3, d16, d7
+	vmlal.s32	q3, d10, d15
+	vmlal.s32	q3, d11, d14
+	vmlal.s32	q3, d12, d9
+	vmlal.s32	q3, d13, d8
+	vld1.8		{d8-d9}, [r2, : 128]
+	vadd.i64	q5, q12, q9
+	vadd.i64	q6, q15, q9
+	vshr.s64	q5, q5, #26
+	vshr.s64	q6, q6, #26
+	vadd.i64	q7, q10, q5
+	vshl.i64	q5, q5, #26
+	vadd.i64	q8, q7, q4
+	vadd.i64	q2, q2, q6
+	vshl.i64	q6, q6, #26
+	vadd.i64	q10, q2, q4
+	vsub.i64	q5, q12, q5
+	vshr.s64	q8, q8, #25
+	vsub.i64	q6, q15, q6
+	vshr.s64	q10, q10, #25
+	vadd.i64	q12, q13, q8
+	vshl.i64	q8, q8, #25
+	vadd.i64	q13, q12, q9
+	vadd.i64	q0, q0, q10
+	vsub.i64	q7, q7, q8
+	vshr.s64	q8, q13, #26
+	vshl.i64	q10, q10, #25
+	vadd.i64	q13, q0, q9
+	vadd.i64	q1, q1, q8
+	vshl.i64	q8, q8, #26
+	vadd.i64	q15, q1, q4
+	vsub.i64	q2, q2, q10
+	vshr.s64	q10, q13, #26
+	vsub.i64	q8, q12, q8
+	vshr.s64	q12, q15, #25
+	vadd.i64	q3, q3, q10
+	vshl.i64	q10, q10, #26
+	vadd.i64	q13, q3, q4
+	vadd.i64	q14, q14, q12
+	add		r2, r3, #144
+	vshl.i64	q12, q12, #25
+	add		r4, r3, #192
+	vadd.i64	q15, q14, q9
+	add		r2, r2, #8
+	vsub.i64	q0, q0, q10
+	add		r4, r4, #8
+	vshr.s64	q10, q13, #25
+	vsub.i64	q1, q1, q12
+	vshr.s64	q12, q15, #26
+	vadd.i64	q13, q10, q10
+	vadd.i64	q11, q11, q12
+	vtrn.32		d16, d2
+	vshl.i64	q12, q12, #26
+	vtrn.32		d17, d3
+	vadd.i64	q1, q11, q4
+	vadd.i64	q4, q5, q13
+	vst1.8		d16, [r2, : 64]!
+	vshl.i64	q5, q10, #4
+	vst1.8		d17, [r4, : 64]!
+	vsub.i64	q8, q14, q12
+	vshr.s64	q1, q1, #25
+	vadd.i64	q4, q4, q5
+	vadd.i64	q5, q6, q1
+	vshl.i64	q1, q1, #25
+	vadd.i64	q6, q5, q9
+	vadd.i64	q4, q4, q10
+	vshl.i64	q10, q10, #25
+	vadd.i64	q9, q4, q9
+	vsub.i64	q1, q11, q1
+	vshr.s64	q6, q6, #26
+	vsub.i64	q3, q3, q10
+	vtrn.32		d16, d2
+	vshr.s64	q9, q9, #26
+	vtrn.32		d17, d3
+	vadd.i64	q1, q2, q6
+	vst1.8		d16, [r2, : 64]
+	vshl.i64	q2, q6, #26
+	vst1.8		d17, [r4, : 64]
+	vadd.i64	q6, q7, q9
+	vtrn.32		d0, d6
+	vshl.i64	q7, q9, #26
+	vtrn.32		d1, d7
+	vsub.i64	q2, q5, q2
+	add		r2, r2, #16
+	vsub.i64	q3, q4, q7
+	vst1.8		d0, [r2, : 64]
+	add		r4, r4, #16
+	vst1.8		d1, [r4, : 64]
+	vtrn.32		d4, d2
+	vtrn.32		d5, d3
+	sub		r2, r2, #8
+	sub		r4, r4, #8
+	vtrn.32		d6, d12
+	vtrn.32		d7, d13
+	vst1.8		d4, [r2, : 64]
+	vst1.8		d5, [r4, : 64]
+	sub		r2, r2, #24
+	sub		r4, r4, #24
+	vst1.8		d6, [r2, : 64]
+	vst1.8		d7, [r4, : 64]
+	add		r2, r3, #336
+	add		r4, r3, #288
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vadd.i32	q0, q0, q1
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d4-d5}, [r4, : 128]!
+	vadd.i32	q1, q1, q2
+	add		r5, r3, #288
+	vld1.8		{d4}, [r2, : 64]
+	vld1.8		{d6}, [r4, : 64]
+	vadd.i32	q2, q2, q3
+	vst1.8		{d0-d1}, [r5, : 128]!
+	vst1.8		{d2-d3}, [r5, : 128]!
+	vst1.8		d4, [r5, : 64]
+	add		r2, r3, #48
+	add		r4, r3, #144
+	vld1.8		{d0-d1}, [r4, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vld1.8		{d4}, [r4, : 64]
+	add		r4, r3, #288
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vtrn.32		q0, q3
+	vld1.8		{d8-d9}, [r4, : 128]!
+	vshl.i32	q5, q0, #4
+	vtrn.32		q1, q4
+	vshl.i32	q6, q3, #4
+	vadd.i32	q5, q5, q0
+	vadd.i32	q6, q6, q3
+	vshl.i32	q7, q1, #4
+	vld1.8		{d5}, [r4, : 64]
+	vshl.i32	q8, q4, #4
+	vtrn.32		d4, d5
+	vadd.i32	q7, q7, q1
+	vadd.i32	q8, q8, q4
+	vld1.8		{d18-d19}, [r2, : 128]!
+	vshl.i32	q10, q2, #4
+	vld1.8		{d22-d23}, [r2, : 128]!
+	vadd.i32	q10, q10, q2
+	vld1.8		{d24}, [r2, : 64]
+	vadd.i32	q5, q5, q0
+	add		r2, r3, #240
+	vld1.8		{d26-d27}, [r2, : 128]!
+	vadd.i32	q6, q6, q3
+	vld1.8		{d28-d29}, [r2, : 128]!
+	vadd.i32	q8, q8, q4
+	vld1.8		{d25}, [r2, : 64]
+	vadd.i32	q10, q10, q2
+	vtrn.32		q9, q13
+	vadd.i32	q7, q7, q1
+	vadd.i32	q5, q5, q0
+	vtrn.32		q11, q14
+	vadd.i32	q6, q6, q3
+	add		r2, sp, #528
+	vadd.i32	q10, q10, q2
+	vtrn.32		d24, d25
+	vst1.8		{d12-d13}, [r2, : 128]!
+	vshl.i32	q6, q13, #1
+	vst1.8		{d20-d21}, [r2, : 128]!
+	vshl.i32	q10, q14, #1
+	vst1.8		{d12-d13}, [r2, : 128]!
+	vshl.i32	q15, q12, #1
+	vadd.i32	q8, q8, q4
+	vext.32		d10, d31, d30, #0
+	vadd.i32	q7, q7, q1
+	vst1.8		{d16-d17}, [r2, : 128]!
+	vmull.s32	q8, d18, d5
+	vmlal.s32	q8, d26, d4
+	vmlal.s32	q8, d19, d9
+	vmlal.s32	q8, d27, d3
+	vmlal.s32	q8, d22, d8
+	vmlal.s32	q8, d28, d2
+	vmlal.s32	q8, d23, d7
+	vmlal.s32	q8, d29, d1
+	vmlal.s32	q8, d24, d6
+	vmlal.s32	q8, d25, d0
+	vst1.8		{d14-d15}, [r2, : 128]!
+	vmull.s32	q2, d18, d4
+	vmlal.s32	q2, d12, d9
+	vmlal.s32	q2, d13, d8
+	vmlal.s32	q2, d19, d3
+	vmlal.s32	q2, d22, d2
+	vmlal.s32	q2, d23, d1
+	vmlal.s32	q2, d24, d0
+	vst1.8		{d20-d21}, [r2, : 128]!
+	vmull.s32	q7, d18, d9
+	vmlal.s32	q7, d26, d3
+	vmlal.s32	q7, d19, d8
+	vmlal.s32	q7, d27, d2
+	vmlal.s32	q7, d22, d7
+	vmlal.s32	q7, d28, d1
+	vmlal.s32	q7, d23, d6
+	vmlal.s32	q7, d29, d0
+	vst1.8		{d10-d11}, [r2, : 128]!
+	vmull.s32	q5, d18, d3
+	vmlal.s32	q5, d19, d2
+	vmlal.s32	q5, d22, d1
+	vmlal.s32	q5, d23, d0
+	vmlal.s32	q5, d12, d8
+	vst1.8		{d16-d17}, [r2, : 128]!
+	vmull.s32	q4, d18, d8
+	vmlal.s32	q4, d26, d2
+	vmlal.s32	q4, d19, d7
+	vmlal.s32	q4, d27, d1
+	vmlal.s32	q4, d22, d6
+	vmlal.s32	q4, d28, d0
+	vmull.s32	q8, d18, d7
+	vmlal.s32	q8, d26, d1
+	vmlal.s32	q8, d19, d6
+	vmlal.s32	q8, d27, d0
+	add		r2, sp, #544
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q7, d24, d21
+	vmlal.s32	q7, d25, d20
+	vmlal.s32	q4, d23, d21
+	vmlal.s32	q4, d29, d20
+	vmlal.s32	q8, d22, d21
+	vmlal.s32	q8, d28, d20
+	vmlal.s32	q5, d24, d20
+	vst1.8		{d14-d15}, [r2, : 128]
+	vmull.s32	q7, d18, d6
+	vmlal.s32	q7, d26, d0
+	add		r2, sp, #624
+	vld1.8		{d30-d31}, [r2, : 128]
+	vmlal.s32	q2, d30, d21
+	vmlal.s32	q7, d19, d21
+	vmlal.s32	q7, d27, d20
+	add		r2, sp, #592
+	vld1.8		{d26-d27}, [r2, : 128]
+	vmlal.s32	q4, d25, d27
+	vmlal.s32	q8, d29, d27
+	vmlal.s32	q8, d25, d26
+	vmlal.s32	q7, d28, d27
+	vmlal.s32	q7, d29, d26
+	add		r2, sp, #576
+	vld1.8		{d28-d29}, [r2, : 128]
+	vmlal.s32	q4, d24, d29
+	vmlal.s32	q8, d23, d29
+	vmlal.s32	q8, d24, d28
+	vmlal.s32	q7, d22, d29
+	vmlal.s32	q7, d23, d28
+	vst1.8		{d8-d9}, [r2, : 128]
+	add		r2, sp, #528
+	vld1.8		{d8-d9}, [r2, : 128]
+	vmlal.s32	q7, d24, d9
+	vmlal.s32	q7, d25, d31
+	vmull.s32	q1, d18, d2
+	vmlal.s32	q1, d19, d1
+	vmlal.s32	q1, d22, d0
+	vmlal.s32	q1, d24, d27
+	vmlal.s32	q1, d23, d20
+	vmlal.s32	q1, d12, d7
+	vmlal.s32	q1, d13, d6
+	vmull.s32	q6, d18, d1
+	vmlal.s32	q6, d19, d0
+	vmlal.s32	q6, d23, d27
+	vmlal.s32	q6, d22, d20
+	vmlal.s32	q6, d24, d26
+	vmull.s32	q0, d18, d0
+	vmlal.s32	q0, d22, d27
+	vmlal.s32	q0, d23, d26
+	vmlal.s32	q0, d24, d31
+	vmlal.s32	q0, d19, d20
+	add		r2, sp, #608
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q2, d18, d7
+	vmlal.s32	q5, d18, d6
+	vmlal.s32	q1, d18, d21
+	vmlal.s32	q0, d18, d28
+	vmlal.s32	q6, d18, d29
+	vmlal.s32	q2, d19, d6
+	vmlal.s32	q5, d19, d21
+	vmlal.s32	q1, d19, d29
+	vmlal.s32	q0, d19, d9
+	vmlal.s32	q6, d19, d28
+	add		r2, sp, #560
+	vld1.8		{d18-d19}, [r2, : 128]
+	add		r2, sp, #480
+	vld1.8		{d22-d23}, [r2, : 128]
+	vmlal.s32	q5, d19, d7
+	vmlal.s32	q0, d18, d21
+	vmlal.s32	q0, d19, d29
+	vmlal.s32	q6, d18, d6
+	add		r2, sp, #496
+	vld1.8		{d6-d7}, [r2, : 128]
+	vmlal.s32	q6, d19, d21
+	add		r2, sp, #544
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q0, d30, d8
+	add		r2, sp, #640
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q5, d30, d29
+	add		r2, sp, #576
+	vld1.8		{d24-d25}, [r2, : 128]
+	vmlal.s32	q1, d30, d28
+	vadd.i64	q13, q0, q11
+	vadd.i64	q14, q5, q11
+	vmlal.s32	q6, d30, d9
+	vshr.s64	q4, q13, #26
+	vshr.s64	q13, q14, #26
+	vadd.i64	q7, q7, q4
+	vshl.i64	q4, q4, #26
+	vadd.i64	q14, q7, q3
+	vadd.i64	q9, q9, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q15, q9, q3
+	vsub.i64	q0, q0, q4
+	vshr.s64	q4, q14, #25
+	vsub.i64	q5, q5, q13
+	vshr.s64	q13, q15, #25
+	vadd.i64	q6, q6, q4
+	vshl.i64	q4, q4, #25
+	vadd.i64	q14, q6, q11
+	vadd.i64	q2, q2, q13
+	vsub.i64	q4, q7, q4
+	vshr.s64	q7, q14, #26
+	vshl.i64	q13, q13, #25
+	vadd.i64	q14, q2, q11
+	vadd.i64	q8, q8, q7
+	vshl.i64	q7, q7, #26
+	vadd.i64	q15, q8, q3
+	vsub.i64	q9, q9, q13
+	vshr.s64	q13, q14, #26
+	vsub.i64	q6, q6, q7
+	vshr.s64	q7, q15, #25
+	vadd.i64	q10, q10, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q14, q10, q3
+	vadd.i64	q1, q1, q7
+	add		r2, r3, #240
+	vshl.i64	q7, q7, #25
+	add		r4, r3, #144
+	vadd.i64	q15, q1, q11
+	add		r2, r2, #8
+	vsub.i64	q2, q2, q13
+	add		r4, r4, #8
+	vshr.s64	q13, q14, #25
+	vsub.i64	q7, q8, q7
+	vshr.s64	q8, q15, #26
+	vadd.i64	q14, q13, q13
+	vadd.i64	q12, q12, q8
+	vtrn.32		d12, d14
+	vshl.i64	q8, q8, #26
+	vtrn.32		d13, d15
+	vadd.i64	q3, q12, q3
+	vadd.i64	q0, q0, q14
+	vst1.8		d12, [r2, : 64]!
+	vshl.i64	q7, q13, #4
+	vst1.8		d13, [r4, : 64]!
+	vsub.i64	q1, q1, q8
+	vshr.s64	q3, q3, #25
+	vadd.i64	q0, q0, q7
+	vadd.i64	q5, q5, q3
+	vshl.i64	q3, q3, #25
+	vadd.i64	q6, q5, q11
+	vadd.i64	q0, q0, q13
+	vshl.i64	q7, q13, #25
+	vadd.i64	q8, q0, q11
+	vsub.i64	q3, q12, q3
+	vshr.s64	q6, q6, #26
+	vsub.i64	q7, q10, q7
+	vtrn.32		d2, d6
+	vshr.s64	q8, q8, #26
+	vtrn.32		d3, d7
+	vadd.i64	q3, q9, q6
+	vst1.8		d2, [r2, : 64]
+	vshl.i64	q6, q6, #26
+	vst1.8		d3, [r4, : 64]
+	vadd.i64	q1, q4, q8
+	vtrn.32		d4, d14
+	vshl.i64	q4, q8, #26
+	vtrn.32		d5, d15
+	vsub.i64	q5, q5, q6
+	add		r2, r2, #16
+	vsub.i64	q0, q0, q4
+	vst1.8		d4, [r2, : 64]
+	add		r4, r4, #16
+	vst1.8		d5, [r4, : 64]
+	vtrn.32		d10, d6
+	vtrn.32		d11, d7
+	sub		r2, r2, #8
+	sub		r4, r4, #8
+	vtrn.32		d0, d2
+	vtrn.32		d1, d3
+	vst1.8		d10, [r2, : 64]
+	vst1.8		d11, [r4, : 64]
+	sub		r2, r2, #24
+	sub		r4, r4, #24
+	vst1.8		d0, [r2, : 64]
+	vst1.8		d1, [r4, : 64]
+	ldr		r2, [sp, #456]
+	ldr		r4, [sp, #460]
+	subs		r5, r2, #1
+	bge		.Lmainloop
+	add		r1, r3, #144
+	add		r2, r3, #336
+	vld1.8		{d0-d1}, [r1, : 128]!
+	vld1.8		{d2-d3}, [r1, : 128]!
+	vld1.8		{d4}, [r1, : 64]
+	vst1.8		{d0-d1}, [r2, : 128]!
+	vst1.8		{d2-d3}, [r2, : 128]!
+	vst1.8		d4, [r2, : 64]
+	movw		r1, #0
+.Linvertloop:
+	add		r2, r3, #144
+	movw		r4, #0
+	movw		r5, #2
+	cmp		r1, #1
+	moveq		r5, #1
+	addeq		r2, r3, #336
+	addeq		r4, r3, #48
+	cmp		r1, #2
+	moveq		r5, #1
+	addeq		r2, r3, #48
+	cmp		r1, #3
+	moveq		r5, #5
+	addeq		r4, r3, #336
+	cmp		r1, #4
+	moveq		r5, #10
+	cmp		r1, #5
+	moveq		r5, #20
+	cmp		r1, #6
+	moveq		r5, #10
+	addeq		r2, r3, #336
+	addeq		r4, r3, #336
+	cmp		r1, #7
+	moveq		r5, #50
+	cmp		r1, #8
+	moveq		r5, #100
+	cmp		r1, #9
+	moveq		r5, #50
+	addeq		r2, r3, #336
+	cmp		r1, #10
+	moveq		r5, #5
+	addeq		r2, r3, #48
+	cmp		r1, #11
+	moveq		r5, #0
+	addeq		r2, r3, #96
+	add		r6, r3, #144
+	add		r7, r3, #288
+	vld1.8		{d0-d1}, [r6, : 128]!
+	vld1.8		{d2-d3}, [r6, : 128]!
+	vld1.8		{d4}, [r6, : 64]
+	vst1.8		{d0-d1}, [r7, : 128]!
+	vst1.8		{d2-d3}, [r7, : 128]!
+	vst1.8		d4, [r7, : 64]
+	cmp		r5, #0
+	beq		.Lskipsquaringloop
+.Lsquaringloop:
+	add		r6, r3, #288
+	add		r7, r3, #288
+	add		r8, r3, #288
+	vmov.i32	q0, #19
+	vmov.i32	q1, #0
+	vmov.i32	q2, #1
+	vzip.i32	q1, q2
+	vld1.8		{d4-d5}, [r7, : 128]!
+	vld1.8		{d6-d7}, [r7, : 128]!
+	vld1.8		{d9}, [r7, : 64]
+	vld1.8		{d10-d11}, [r6, : 128]!
+	add		r7, sp, #384
+	vld1.8		{d12-d13}, [r6, : 128]!
+	vmul.i32	q7, q2, q0
+	vld1.8		{d8}, [r6, : 64]
+	vext.32		d17, d11, d10, #1
+	vmul.i32	q9, q3, q0
+	vext.32		d16, d10, d8, #1
+	vshl.u32	q10, q5, q1
+	vext.32		d22, d14, d4, #1
+	vext.32		d24, d18, d6, #1
+	vshl.u32	q13, q6, q1
+	vshl.u32	d28, d8, d2
+	vrev64.i32	d22, d22
+	vmul.i32	d1, d9, d1
+	vrev64.i32	d24, d24
+	vext.32		d29, d8, d13, #1
+	vext.32		d0, d1, d9, #1
+	vrev64.i32	d0, d0
+	vext.32		d2, d9, d1, #1
+	vext.32		d23, d15, d5, #1
+	vmull.s32	q4, d20, d4
+	vrev64.i32	d23, d23
+	vmlal.s32	q4, d21, d1
+	vrev64.i32	d2, d2
+	vmlal.s32	q4, d26, d19
+	vext.32		d3, d5, d15, #1
+	vmlal.s32	q4, d27, d18
+	vrev64.i32	d3, d3
+	vmlal.s32	q4, d28, d15
+	vext.32		d14, d12, d11, #1
+	vmull.s32	q5, d16, d23
+	vext.32		d15, d13, d12, #1
+	vmlal.s32	q5, d17, d4
+	vst1.8		d8, [r7, : 64]!
+	vmlal.s32	q5, d14, d1
+	vext.32		d12, d9, d8, #0
+	vmlal.s32	q5, d15, d19
+	vmov.i64	d13, #0
+	vmlal.s32	q5, d29, d18
+	vext.32		d25, d19, d7, #1
+	vmlal.s32	q6, d20, d5
+	vrev64.i32	d25, d25
+	vmlal.s32	q6, d21, d4
+	vst1.8		d11, [r7, : 64]!
+	vmlal.s32	q6, d26, d1
+	vext.32		d9, d10, d10, #0
+	vmlal.s32	q6, d27, d19
+	vmov.i64	d8, #0
+	vmlal.s32	q6, d28, d18
+	vmlal.s32	q4, d16, d24
+	vmlal.s32	q4, d17, d5
+	vmlal.s32	q4, d14, d4
+	vst1.8		d12, [r7, : 64]!
+	vmlal.s32	q4, d15, d1
+	vext.32		d10, d13, d12, #0
+	vmlal.s32	q4, d29, d19
+	vmov.i64	d11, #0
+	vmlal.s32	q5, d20, d6
+	vmlal.s32	q5, d21, d5
+	vmlal.s32	q5, d26, d4
+	vext.32		d13, d8, d8, #0
+	vmlal.s32	q5, d27, d1
+	vmov.i64	d12, #0
+	vmlal.s32	q5, d28, d19
+	vst1.8		d9, [r7, : 64]!
+	vmlal.s32	q6, d16, d25
+	vmlal.s32	q6, d17, d6
+	vst1.8		d10, [r7, : 64]
+	vmlal.s32	q6, d14, d5
+	vext.32		d8, d11, d10, #0
+	vmlal.s32	q6, d15, d4
+	vmov.i64	d9, #0
+	vmlal.s32	q6, d29, d1
+	vmlal.s32	q4, d20, d7
+	vmlal.s32	q4, d21, d6
+	vmlal.s32	q4, d26, d5
+	vext.32		d11, d12, d12, #0
+	vmlal.s32	q4, d27, d4
+	vmov.i64	d10, #0
+	vmlal.s32	q4, d28, d1
+	vmlal.s32	q5, d16, d0
+	sub		r6, r7, #32
+	vmlal.s32	q5, d17, d7
+	vmlal.s32	q5, d14, d6
+	vext.32		d30, d9, d8, #0
+	vmlal.s32	q5, d15, d5
+	vld1.8		{d31}, [r6, : 64]!
+	vmlal.s32	q5, d29, d4
+	vmlal.s32	q15, d20, d0
+	vext.32		d0, d6, d18, #1
+	vmlal.s32	q15, d21, d25
+	vrev64.i32	d0, d0
+	vmlal.s32	q15, d26, d24
+	vext.32		d1, d7, d19, #1
+	vext.32		d7, d10, d10, #0
+	vmlal.s32	q15, d27, d23
+	vrev64.i32	d1, d1
+	vld1.8		{d6}, [r6, : 64]
+	vmlal.s32	q15, d28, d22
+	vmlal.s32	q3, d16, d4
+	add		r6, r6, #24
+	vmlal.s32	q3, d17, d2
+	vext.32		d4, d31, d30, #0
+	vmov		d17, d11
+	vmlal.s32	q3, d14, d1
+	vext.32		d11, d13, d13, #0
+	vext.32		d13, d30, d30, #0
+	vmlal.s32	q3, d15, d0
+	vext.32		d1, d8, d8, #0
+	vmlal.s32	q3, d29, d3
+	vld1.8		{d5}, [r6, : 64]
+	sub		r6, r6, #16
+	vext.32		d10, d6, d6, #0
+	vmov.i32	q1, #0xffffffff
+	vshl.i64	q4, q1, #25
+	add		r7, sp, #480
+	vld1.8		{d14-d15}, [r7, : 128]
+	vadd.i64	q9, q2, q7
+	vshl.i64	q1, q1, #26
+	vshr.s64	q10, q9, #26
+	vld1.8		{d0}, [r6, : 64]!
+	vadd.i64	q5, q5, q10
+	vand		q9, q9, q1
+	vld1.8		{d16}, [r6, : 64]!
+	add		r6, sp, #496
+	vld1.8		{d20-d21}, [r6, : 128]
+	vadd.i64	q11, q5, q10
+	vsub.i64	q2, q2, q9
+	vshr.s64	q9, q11, #25
+	vext.32		d12, d5, d4, #0
+	vand		q11, q11, q4
+	vadd.i64	q0, q0, q9
+	vmov		d19, d7
+	vadd.i64	q3, q0, q7
+	vsub.i64	q5, q5, q11
+	vshr.s64	q11, q3, #26
+	vext.32		d18, d11, d10, #0
+	vand		q3, q3, q1
+	vadd.i64	q8, q8, q11
+	vadd.i64	q11, q8, q10
+	vsub.i64	q0, q0, q3
+	vshr.s64	q3, q11, #25
+	vand		q11, q11, q4
+	vadd.i64	q3, q6, q3
+	vadd.i64	q6, q3, q7
+	vsub.i64	q8, q8, q11
+	vshr.s64	q11, q6, #26
+	vand		q6, q6, q1
+	vadd.i64	q9, q9, q11
+	vadd.i64	d25, d19, d21
+	vsub.i64	q3, q3, q6
+	vshr.s64	d23, d25, #25
+	vand		q4, q12, q4
+	vadd.i64	d21, d23, d23
+	vshl.i64	d25, d23, #4
+	vadd.i64	d21, d21, d23
+	vadd.i64	d25, d25, d21
+	vadd.i64	d4, d4, d25
+	vzip.i32	q0, q8
+	vadd.i64	d12, d4, d14
+	add		r6, r8, #8
+	vst1.8		d0, [r6, : 64]
+	vsub.i64	d19, d19, d9
+	add		r6, r6, #16
+	vst1.8		d16, [r6, : 64]
+	vshr.s64	d22, d12, #26
+	vand		q0, q6, q1
+	vadd.i64	d10, d10, d22
+	vzip.i32	q3, q9
+	vsub.i64	d4, d4, d0
+	sub		r6, r6, #8
+	vst1.8		d6, [r6, : 64]
+	add		r6, r6, #16
+	vst1.8		d18, [r6, : 64]
+	vzip.i32	q2, q5
+	sub		r6, r6, #32
+	vst1.8		d4, [r6, : 64]
+	subs		r5, r5, #1
+	bhi		.Lsquaringloop
+.Lskipsquaringloop:
+	mov		r2, r2
+	add		r5, r3, #288
+	add		r6, r3, #144
+	vmov.i32	q0, #19
+	vmov.i32	q1, #0
+	vmov.i32	q2, #1
+	vzip.i32	q1, q2
+	vld1.8		{d4-d5}, [r5, : 128]!
+	vld1.8		{d6-d7}, [r5, : 128]!
+	vld1.8		{d9}, [r5, : 64]
+	vld1.8		{d10-d11}, [r2, : 128]!
+	add		r5, sp, #384
+	vld1.8		{d12-d13}, [r2, : 128]!
+	vmul.i32	q7, q2, q0
+	vld1.8		{d8}, [r2, : 64]
+	vext.32		d17, d11, d10, #1
+	vmul.i32	q9, q3, q0
+	vext.32		d16, d10, d8, #1
+	vshl.u32	q10, q5, q1
+	vext.32		d22, d14, d4, #1
+	vext.32		d24, d18, d6, #1
+	vshl.u32	q13, q6, q1
+	vshl.u32	d28, d8, d2
+	vrev64.i32	d22, d22
+	vmul.i32	d1, d9, d1
+	vrev64.i32	d24, d24
+	vext.32		d29, d8, d13, #1
+	vext.32		d0, d1, d9, #1
+	vrev64.i32	d0, d0
+	vext.32		d2, d9, d1, #1
+	vext.32		d23, d15, d5, #1
+	vmull.s32	q4, d20, d4
+	vrev64.i32	d23, d23
+	vmlal.s32	q4, d21, d1
+	vrev64.i32	d2, d2
+	vmlal.s32	q4, d26, d19
+	vext.32		d3, d5, d15, #1
+	vmlal.s32	q4, d27, d18
+	vrev64.i32	d3, d3
+	vmlal.s32	q4, d28, d15
+	vext.32		d14, d12, d11, #1
+	vmull.s32	q5, d16, d23
+	vext.32		d15, d13, d12, #1
+	vmlal.s32	q5, d17, d4
+	vst1.8		d8, [r5, : 64]!
+	vmlal.s32	q5, d14, d1
+	vext.32		d12, d9, d8, #0
+	vmlal.s32	q5, d15, d19
+	vmov.i64	d13, #0
+	vmlal.s32	q5, d29, d18
+	vext.32		d25, d19, d7, #1
+	vmlal.s32	q6, d20, d5
+	vrev64.i32	d25, d25
+	vmlal.s32	q6, d21, d4
+	vst1.8		d11, [r5, : 64]!
+	vmlal.s32	q6, d26, d1
+	vext.32		d9, d10, d10, #0
+	vmlal.s32	q6, d27, d19
+	vmov.i64	d8, #0
+	vmlal.s32	q6, d28, d18
+	vmlal.s32	q4, d16, d24
+	vmlal.s32	q4, d17, d5
+	vmlal.s32	q4, d14, d4
+	vst1.8		d12, [r5, : 64]!
+	vmlal.s32	q4, d15, d1
+	vext.32		d10, d13, d12, #0
+	vmlal.s32	q4, d29, d19
+	vmov.i64	d11, #0
+	vmlal.s32	q5, d20, d6
+	vmlal.s32	q5, d21, d5
+	vmlal.s32	q5, d26, d4
+	vext.32		d13, d8, d8, #0
+	vmlal.s32	q5, d27, d1
+	vmov.i64	d12, #0
+	vmlal.s32	q5, d28, d19
+	vst1.8		d9, [r5, : 64]!
+	vmlal.s32	q6, d16, d25
+	vmlal.s32	q6, d17, d6
+	vst1.8		d10, [r5, : 64]
+	vmlal.s32	q6, d14, d5
+	vext.32		d8, d11, d10, #0
+	vmlal.s32	q6, d15, d4
+	vmov.i64	d9, #0
+	vmlal.s32	q6, d29, d1
+	vmlal.s32	q4, d20, d7
+	vmlal.s32	q4, d21, d6
+	vmlal.s32	q4, d26, d5
+	vext.32		d11, d12, d12, #0
+	vmlal.s32	q4, d27, d4
+	vmov.i64	d10, #0
+	vmlal.s32	q4, d28, d1
+	vmlal.s32	q5, d16, d0
+	sub		r2, r5, #32
+	vmlal.s32	q5, d17, d7
+	vmlal.s32	q5, d14, d6
+	vext.32		d30, d9, d8, #0
+	vmlal.s32	q5, d15, d5
+	vld1.8		{d31}, [r2, : 64]!
+	vmlal.s32	q5, d29, d4
+	vmlal.s32	q15, d20, d0
+	vext.32		d0, d6, d18, #1
+	vmlal.s32	q15, d21, d25
+	vrev64.i32	d0, d0
+	vmlal.s32	q15, d26, d24
+	vext.32		d1, d7, d19, #1
+	vext.32		d7, d10, d10, #0
+	vmlal.s32	q15, d27, d23
+	vrev64.i32	d1, d1
+	vld1.8		{d6}, [r2, : 64]
+	vmlal.s32	q15, d28, d22
+	vmlal.s32	q3, d16, d4
+	add		r2, r2, #24
+	vmlal.s32	q3, d17, d2
+	vext.32		d4, d31, d30, #0
+	vmov		d17, d11
+	vmlal.s32	q3, d14, d1
+	vext.32		d11, d13, d13, #0
+	vext.32		d13, d30, d30, #0
+	vmlal.s32	q3, d15, d0
+	vext.32		d1, d8, d8, #0
+	vmlal.s32	q3, d29, d3
+	vld1.8		{d5}, [r2, : 64]
+	sub		r2, r2, #16
+	vext.32		d10, d6, d6, #0
+	vmov.i32	q1, #0xffffffff
+	vshl.i64	q4, q1, #25
+	add		r5, sp, #480
+	vld1.8		{d14-d15}, [r5, : 128]
+	vadd.i64	q9, q2, q7
+	vshl.i64	q1, q1, #26
+	vshr.s64	q10, q9, #26
+	vld1.8		{d0}, [r2, : 64]!
+	vadd.i64	q5, q5, q10
+	vand		q9, q9, q1
+	vld1.8		{d16}, [r2, : 64]!
+	add		r2, sp, #496
+	vld1.8		{d20-d21}, [r2, : 128]
+	vadd.i64	q11, q5, q10
+	vsub.i64	q2, q2, q9
+	vshr.s64	q9, q11, #25
+	vext.32		d12, d5, d4, #0
+	vand		q11, q11, q4
+	vadd.i64	q0, q0, q9
+	vmov		d19, d7
+	vadd.i64	q3, q0, q7
+	vsub.i64	q5, q5, q11
+	vshr.s64	q11, q3, #26
+	vext.32		d18, d11, d10, #0
+	vand		q3, q3, q1
+	vadd.i64	q8, q8, q11
+	vadd.i64	q11, q8, q10
+	vsub.i64	q0, q0, q3
+	vshr.s64	q3, q11, #25
+	vand		q11, q11, q4
+	vadd.i64	q3, q6, q3
+	vadd.i64	q6, q3, q7
+	vsub.i64	q8, q8, q11
+	vshr.s64	q11, q6, #26
+	vand		q6, q6, q1
+	vadd.i64	q9, q9, q11
+	vadd.i64	d25, d19, d21
+	vsub.i64	q3, q3, q6
+	vshr.s64	d23, d25, #25
+	vand		q4, q12, q4
+	vadd.i64	d21, d23, d23
+	vshl.i64	d25, d23, #4
+	vadd.i64	d21, d21, d23
+	vadd.i64	d25, d25, d21
+	vadd.i64	d4, d4, d25
+	vzip.i32	q0, q8
+	vadd.i64	d12, d4, d14
+	add		r2, r6, #8
+	vst1.8		d0, [r2, : 64]
+	vsub.i64	d19, d19, d9
+	add		r2, r2, #16
+	vst1.8		d16, [r2, : 64]
+	vshr.s64	d22, d12, #26
+	vand		q0, q6, q1
+	vadd.i64	d10, d10, d22
+	vzip.i32	q3, q9
+	vsub.i64	d4, d4, d0
+	sub		r2, r2, #8
+	vst1.8		d6, [r2, : 64]
+	add		r2, r2, #16
+	vst1.8		d18, [r2, : 64]
+	vzip.i32	q2, q5
+	sub		r2, r2, #32
+	vst1.8		d4, [r2, : 64]
+	cmp		r4, #0
+	beq		.Lskippostcopy
+	add		r2, r3, #144
+	mov		r4, r4
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d4}, [r2, : 64]
+	vst1.8		{d0-d1}, [r4, : 128]!
+	vst1.8		{d2-d3}, [r4, : 128]!
+	vst1.8		d4, [r4, : 64]
+.Lskippostcopy:
+	cmp		r1, #1
+	bne		.Lskipfinalcopy
+	add		r2, r3, #288
+	add		r4, r3, #144
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d4}, [r2, : 64]
+	vst1.8		{d0-d1}, [r4, : 128]!
+	vst1.8		{d2-d3}, [r4, : 128]!
+	vst1.8		d4, [r4, : 64]
+.Lskipfinalcopy:
+	add		r1, r1, #1
+	cmp		r1, #12
+	blo		.Linvertloop
+	add		r1, r3, #144
+	ldr		r2, [r1], #4
+	ldr		r3, [r1], #4
+	ldr		r4, [r1], #4
+	ldr		r5, [r1], #4
+	ldr		r6, [r1], #4
+	ldr		r7, [r1], #4
+	ldr		r8, [r1], #4
+	ldr		r9, [r1], #4
+	ldr		r10, [r1], #4
+	ldr		r1, [r1]
+	add		r11, r1, r1, LSL #4
+	add		r11, r11, r1, LSL #1
+	add		r11, r11, #16777216
+	mov		r11, r11, ASR #25
+	add		r11, r11, r2
+	mov		r11, r11, ASR #26
+	add		r11, r11, r3
+	mov		r11, r11, ASR #25
+	add		r11, r11, r4
+	mov		r11, r11, ASR #26
+	add		r11, r11, r5
+	mov		r11, r11, ASR #25
+	add		r11, r11, r6
+	mov		r11, r11, ASR #26
+	add		r11, r11, r7
+	mov		r11, r11, ASR #25
+	add		r11, r11, r8
+	mov		r11, r11, ASR #26
+	add		r11, r11, r9
+	mov		r11, r11, ASR #25
+	add		r11, r11, r10
+	mov		r11, r11, ASR #26
+	add		r11, r11, r1
+	mov		r11, r11, ASR #25
+	add		r2, r2, r11
+	add		r2, r2, r11, LSL #1
+	add		r2, r2, r11, LSL #4
+	mov		r11, r2, ASR #26
+	add		r3, r3, r11
+	sub		r2, r2, r11, LSL #26
+	mov		r11, r3, ASR #25
+	add		r4, r4, r11
+	sub		r3, r3, r11, LSL #25
+	mov		r11, r4, ASR #26
+	add		r5, r5, r11
+	sub		r4, r4, r11, LSL #26
+	mov		r11, r5, ASR #25
+	add		r6, r6, r11
+	sub		r5, r5, r11, LSL #25
+	mov		r11, r6, ASR #26
+	add		r7, r7, r11
+	sub		r6, r6, r11, LSL #26
+	mov		r11, r7, ASR #25
+	add		r8, r8, r11
+	sub		r7, r7, r11, LSL #25
+	mov		r11, r8, ASR #26
+	add		r9, r9, r11
+	sub		r8, r8, r11, LSL #26
+	mov		r11, r9, ASR #25
+	add		r10, r10, r11
+	sub		r9, r9, r11, LSL #25
+	mov		r11, r10, ASR #26
+	add		r1, r1, r11
+	sub		r10, r10, r11, LSL #26
+	mov		r11, r1, ASR #25
+	sub		r1, r1, r11, LSL #25
+	add		r2, r2, r3, LSL #26
+	mov		r3, r3, LSR #6
+	add		r3, r3, r4, LSL #19
+	mov		r4, r4, LSR #13
+	add		r4, r4, r5, LSL #13
+	mov		r5, r5, LSR #19
+	add		r5, r5, r6, LSL #6
+	add		r6, r7, r8, LSL #25
+	mov		r7, r8, LSR #7
+	add		r7, r7, r9, LSL #19
+	mov		r8, r9, LSR #13
+	add		r8, r8, r10, LSL #12
+	mov		r9, r10, LSR #20
+	add		r1, r9, r1, LSL #6
+	str		r2, [r0]
+	str		r3, [r0, #4]
+	str		r4, [r0, #8]
+	str		r5, [r0, #12]
+	str		r6, [r0, #16]
+	str		r7, [r0, #20]
+	str		r8, [r0, #24]
+	str		r1, [r0, #28]
+	movw		r0, #0
+	mov		sp, ip
+	pop		{r4-r11, pc}
+ENDPROC(curve25519_neon)
diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
new file mode 100644
index 000000000000..776ae07e0469
--- /dev/null
+++ b/arch/arm/crypto/curve25519-glue.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
+ * manually reworked for use in kernel space.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/internal/simd.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <crypto/curve25519.h>
+
+asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE],
+				const u8 secret[CURVE25519_KEY_SIZE],
+				const u8 basepoint[CURVE25519_KEY_SIZE]);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
+		     const u8 scalar[CURVE25519_KEY_SIZE],
+		     const u8 point[CURVE25519_KEY_SIZE])
+{
+	if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
+		kernel_neon_begin();
+		curve25519_neon(out, scalar, point);
+		kernel_neon_end();
+	} else {
+		curve25519_generic(out, scalar, point);
+	}
+}
+EXPORT_SYMBOL(curve25519_arch);
+
+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
+			  const u8 secret[CURVE25519_KEY_SIZE])
+{
+	return curve25519_arch(pub, secret, curve25519_base_point);
+}
+EXPORT_SYMBOL(curve25519_base_arch);
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+				 unsigned int len)
+{
+	u8 *secret = kpp_tfm_ctx(tfm);
+
+	if (!len)
+		curve25519_generate_secret(secret);
+	else if (len == CURVE25519_KEY_SIZE &&
+		 crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+		memcpy(secret, buf, CURVE25519_KEY_SIZE);
+	else
+		return -EINVAL;
+	return 0;
+}
+
+static int curve25519_compute_value(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	const u8 *secret = kpp_tfm_ctx(tfm);
+	u8 public_key[CURVE25519_KEY_SIZE];
+	u8 buf[CURVE25519_KEY_SIZE];
+	int copied, nbytes;
+	u8 const *bp;
+
+	if (req->src) {
+		copied = sg_copy_to_buffer(req->src,
+					   sg_nents_for_len(req->src,
+							    CURVE25519_KEY_SIZE),
+					   public_key, CURVE25519_KEY_SIZE);
+		if (copied != CURVE25519_KEY_SIZE)
+			return -EINVAL;
+		bp = public_key;
+	} else {
+		bp = curve25519_base_point;
+	}
+
+	curve25519_arch(buf, secret, bp);
+
+	/* might want less than we've got */
+	nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+	copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+								nbytes),
+				     buf, nbytes);
+	if (copied != nbytes)
+		return -EINVAL;
+	return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+	return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+	.base.cra_name		= "curve25519",
+	.base.cra_driver_name	= "curve25519-neon",
+	.base.cra_priority	= 200,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_ctxsize	= CURVE25519_KEY_SIZE,
+
+	.set_secret		= curve25519_set_secret,
+	.generate_public_key	= curve25519_compute_value,
+	.compute_shared_secret	= curve25519_compute_value,
+	.max_size		= curve25519_max_size,
+};
+
+static int __init mod_init(void)
+{
+	if (elf_hwcap & HWCAP_NEON) {
+		static_branch_enable(&have_neon);
+		return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
+			crypto_register_kpp(&curve25519_alg) : 0;
+	}
+	return 0;
+}
+
+static void __exit mod_exit(void)
+{
+	if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
+		crypto_unregister_kpp(&curve25519_alg);
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-neon");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index c47fe81abcb0..534c9647726d 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -88,6 +88,7 @@
 	T3_H		.req	d17
 
 	.text
+	.arch		armv8-a
 	.fpu		crypto-neon-fp-armv8
 
 	.macro		__pmull_p64, rd, rn, rm, b1, b2, b3, b4
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index c691077679a6..a00fd329255f 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -163,10 +163,8 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	struct ghash_key *key = crypto_shash_ctx(tfm);
 	be128 h;
 
-	if (keylen != GHASH_BLOCK_SIZE) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != GHASH_BLOCK_SIZE)
 		return -EINVAL;
-	}
 
 	/* needed for the fallback */
 	memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
@@ -296,16 +294,11 @@ static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
 {
 	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct crypto_ahash *child = &ctx->cryptd_tfm->base;
-	int err;
 
 	crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
 			       & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ahash_setkey(child, key, keylen);
-	crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
-			       & CRYPTO_TFM_RES_MASK);
-
-	return err;
+	return crypto_ahash_setkey(child, key, keylen);
 }
 
 static int ghash_async_init_tfm(struct crypto_tfm *tfm)
diff --git a/arch/arm/crypto/poly1305-armv4.pl b/arch/arm/crypto/poly1305-armv4.pl
new file mode 100644
index 000000000000..6d79498d3115
--- /dev/null
+++ b/arch/arm/crypto/poly1305-armv4.pl
@@ -0,0 +1,1236 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
+# project.
+# ====================================================================
+#
+#			IALU(*)/gcc-4.4		NEON
+#
+# ARM11xx(ARMv6)	7.78/+100%		-
+# Cortex-A5		6.35/+130%		3.00
+# Cortex-A8		6.25/+115%		2.36
+# Cortex-A9		5.10/+95%		2.55
+# Cortex-A15		3.85/+85%		1.25(**)
+# Snapdragon S4		5.70/+100%		1.48(**)
+#
+# (*)	this is for -march=armv6, i.e. with bunch of ldrb loading data;
+# (**)	these are trade-off results, they can be improved by ~8% but at
+#	the cost of 15/12% regression on Cortex-A5/A7, it's even possible
+#	to improve Cortex-A9 result, but then A5/A7 loose more than 20%;
+
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}
+
+($ctx,$inp,$len,$padbit)=map("r$_",(0..3));
+
+$code.=<<___;
+#ifndef	__KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
+# define poly1305_init   poly1305_init_arm
+# define poly1305_blocks poly1305_blocks_arm
+# define poly1305_emit   poly1305_emit_arm
+.globl	poly1305_blocks_neon
+#endif
+
+#if defined(__thumb2__)
+.syntax	unified
+.thumb
+#else
+.code	32
+#endif
+
+.text
+
+.globl	poly1305_emit
+.globl	poly1305_blocks
+.globl	poly1305_init
+.type	poly1305_init,%function
+.align	5
+poly1305_init:
+.Lpoly1305_init:
+	stmdb	sp!,{r4-r11}
+
+	eor	r3,r3,r3
+	cmp	$inp,#0
+	str	r3,[$ctx,#0]		@ zero hash value
+	str	r3,[$ctx,#4]
+	str	r3,[$ctx,#8]
+	str	r3,[$ctx,#12]
+	str	r3,[$ctx,#16]
+	str	r3,[$ctx,#36]		@ clear is_base2_26
+	add	$ctx,$ctx,#20
+
+#ifdef	__thumb2__
+	it	eq
+#endif
+	moveq	r0,#0
+	beq	.Lno_key
+
+#if	__ARM_MAX_ARCH__>=7
+	mov	r3,#-1
+	str	r3,[$ctx,#28]		@ impossible key power value
+# ifndef __KERNEL__
+	adr	r11,.Lpoly1305_init
+	ldr	r12,.LOPENSSL_armcap
+# endif
+#endif
+	ldrb	r4,[$inp,#0]
+	mov	r10,#0x0fffffff
+	ldrb	r5,[$inp,#1]
+	and	r3,r10,#-4		@ 0x0ffffffc
+	ldrb	r6,[$inp,#2]
+	ldrb	r7,[$inp,#3]
+	orr	r4,r4,r5,lsl#8
+	ldrb	r5,[$inp,#4]
+	orr	r4,r4,r6,lsl#16
+	ldrb	r6,[$inp,#5]
+	orr	r4,r4,r7,lsl#24
+	ldrb	r7,[$inp,#6]
+	and	r4,r4,r10
+
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+# if !defined(_WIN32)
+	ldr	r12,[r11,r12]		@ OPENSSL_armcap_P
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
+	ldr	r12,[r12]
+# endif
+#endif
+	ldrb	r8,[$inp,#7]
+	orr	r5,r5,r6,lsl#8
+	ldrb	r6,[$inp,#8]
+	orr	r5,r5,r7,lsl#16
+	ldrb	r7,[$inp,#9]
+	orr	r5,r5,r8,lsl#24
+	ldrb	r8,[$inp,#10]
+	and	r5,r5,r3
+
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	tst	r12,#ARMV7_NEON		@ check for NEON
+# ifdef	__thumb2__
+	adr	r9,.Lpoly1305_blocks_neon
+	adr	r11,.Lpoly1305_blocks
+	it	ne
+	movne	r11,r9
+	adr	r12,.Lpoly1305_emit
+	orr	r11,r11,#1		@ thumb-ify addresses
+	orr	r12,r12,#1
+# else
+	add	r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+	ite	eq
+	addeq	r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+	addne	r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
+# endif
+#endif
+	ldrb	r9,[$inp,#11]
+	orr	r6,r6,r7,lsl#8
+	ldrb	r7,[$inp,#12]
+	orr	r6,r6,r8,lsl#16
+	ldrb	r8,[$inp,#13]
+	orr	r6,r6,r9,lsl#24
+	ldrb	r9,[$inp,#14]
+	and	r6,r6,r3
+
+	ldrb	r10,[$inp,#15]
+	orr	r7,r7,r8,lsl#8
+	str	r4,[$ctx,#0]
+	orr	r7,r7,r9,lsl#16
+	str	r5,[$ctx,#4]
+	orr	r7,r7,r10,lsl#24
+	str	r6,[$ctx,#8]
+	and	r7,r7,r3
+	str	r7,[$ctx,#12]
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	stmia	r2,{r11,r12}		@ fill functions table
+	mov	r0,#1
+#else
+	mov	r0,#0
+#endif
+.Lno_key:
+	ldmia	sp!,{r4-r11}
+#if	__ARM_ARCH__>=5
+	ret				@ bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_init,.-poly1305_init
+___
+{
+my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12));
+my ($s1,$s2,$s3)=($r1,$r2,$r3);
+
+$code.=<<___;
+.type	poly1305_blocks,%function
+.align	5
+poly1305_blocks:
+.Lpoly1305_blocks:
+	stmdb	sp!,{r3-r11,lr}
+
+	ands	$len,$len,#-16
+	beq	.Lno_data
+
+	add	$len,$len,$inp		@ end pointer
+	sub	sp,sp,#32
+
+#if __ARM_ARCH__<7
+	ldmia	$ctx,{$h0-$r3}		@ load context
+	add	$ctx,$ctx,#20
+	str	$len,[sp,#16]		@ offload stuff
+	str	$ctx,[sp,#12]
+#else
+	ldr	lr,[$ctx,#36]		@ is_base2_26
+	ldmia	$ctx!,{$h0-$h4}		@ load hash value
+	str	$len,[sp,#16]		@ offload stuff
+	str	$ctx,[sp,#12]
+
+	adds	$r0,$h0,$h1,lsl#26	@ base 2^26 -> base 2^32
+	mov	$r1,$h1,lsr#6
+	adcs	$r1,$r1,$h2,lsl#20
+	mov	$r2,$h2,lsr#12
+	adcs	$r2,$r2,$h3,lsl#14
+	mov	$r3,$h3,lsr#18
+	adcs	$r3,$r3,$h4,lsl#8
+	mov	$len,#0
+	teq	lr,#0
+	str	$len,[$ctx,#16]		@ clear is_base2_26
+	adc	$len,$len,$h4,lsr#24
+
+	itttt	ne
+	movne	$h0,$r0			@ choose between radixes
+	movne	$h1,$r1
+	movne	$h2,$r2
+	movne	$h3,$r3
+	ldmia	$ctx,{$r0-$r3}		@ load key
+	it	ne
+	movne	$h4,$len
+#endif
+
+	mov	lr,$inp
+	cmp	$padbit,#0
+	str	$r1,[sp,#20]
+	str	$r2,[sp,#24]
+	str	$r3,[sp,#28]
+	b	.Loop
+
+.align	4
+.Loop:
+#if __ARM_ARCH__<7
+	ldrb	r0,[lr],#16		@ load input
+# ifdef	__thumb2__
+	it	hi
+# endif
+	addhi	$h4,$h4,#1		@ 1<<128
+	ldrb	r1,[lr,#-15]
+	ldrb	r2,[lr,#-14]
+	ldrb	r3,[lr,#-13]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-12]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-11]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-10]
+	adds	$h0,$h0,r3		@ accumulate input
+
+	ldrb	r3,[lr,#-9]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-8]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-7]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-6]
+	adcs	$h1,$h1,r3
+
+	ldrb	r3,[lr,#-5]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-4]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-3]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-2]
+	adcs	$h2,$h2,r3
+
+	ldrb	r3,[lr,#-1]
+	orr	r1,r0,r1,lsl#8
+	str	lr,[sp,#8]		@ offload input pointer
+	orr	r2,r1,r2,lsl#16
+	add	$s1,$r1,$r1,lsr#2
+	orr	r3,r2,r3,lsl#24
+#else
+	ldr	r0,[lr],#16		@ load input
+	it	hi
+	addhi	$h4,$h4,#1		@ padbit
+	ldr	r1,[lr,#-12]
+	ldr	r2,[lr,#-8]
+	ldr	r3,[lr,#-4]
+# ifdef	__ARMEB__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+# endif
+	adds	$h0,$h0,r0		@ accumulate input
+	str	lr,[sp,#8]		@ offload input pointer
+	adcs	$h1,$h1,r1
+	add	$s1,$r1,$r1,lsr#2
+	adcs	$h2,$h2,r2
+#endif
+	add	$s2,$r2,$r2,lsr#2
+	adcs	$h3,$h3,r3
+	add	$s3,$r3,$r3,lsr#2
+
+	umull	r2,r3,$h1,$r0
+	 adc	$h4,$h4,#0
+	umull	r0,r1,$h0,$r0
+	umlal	r2,r3,$h4,$s1
+	umlal	r0,r1,$h3,$s1
+	ldr	$r1,[sp,#20]		@ reload $r1
+	umlal	r2,r3,$h2,$s3
+	umlal	r0,r1,$h1,$s3
+	umlal	r2,r3,$h3,$s2
+	umlal	r0,r1,$h2,$s2
+	umlal	r2,r3,$h0,$r1
+	str	r0,[sp,#0]		@ future $h0
+	 mul	r0,$s2,$h4
+	ldr	$r2,[sp,#24]		@ reload $r2
+	adds	r2,r2,r1		@ d1+=d0>>32
+	 eor	r1,r1,r1
+	adc	lr,r3,#0		@ future $h2
+	str	r2,[sp,#4]		@ future $h1
+
+	mul	r2,$s3,$h4
+	eor	r3,r3,r3
+	umlal	r0,r1,$h3,$s3
+	ldr	$r3,[sp,#28]		@ reload $r3
+	umlal	r2,r3,$h3,$r0
+	umlal	r0,r1,$h2,$r0
+	umlal	r2,r3,$h2,$r1
+	umlal	r0,r1,$h1,$r1
+	umlal	r2,r3,$h1,$r2
+	umlal	r0,r1,$h0,$r2
+	umlal	r2,r3,$h0,$r3
+	ldr	$h0,[sp,#0]
+	mul	$h4,$r0,$h4
+	ldr	$h1,[sp,#4]
+
+	adds	$h2,lr,r0		@ d2+=d1>>32
+	ldr	lr,[sp,#8]		@ reload input pointer
+	adc	r1,r1,#0
+	adds	$h3,r2,r1		@ d3+=d2>>32
+	ldr	r0,[sp,#16]		@ reload end pointer
+	adc	r3,r3,#0
+	add	$h4,$h4,r3		@ h4+=d3>>32
+
+	and	r1,$h4,#-4
+	and	$h4,$h4,#3
+	add	r1,r1,r1,lsr#2		@ *=5
+	adds	$h0,$h0,r1
+	adcs	$h1,$h1,#0
+	adcs	$h2,$h2,#0
+	adcs	$h3,$h3,#0
+	adc	$h4,$h4,#0
+
+	cmp	r0,lr			@ done yet?
+	bhi	.Loop
+
+	ldr	$ctx,[sp,#12]
+	add	sp,sp,#32
+	stmdb	$ctx,{$h0-$h4}		@ store the result
+
+.Lno_data:
+#if	__ARM_ARCH__>=5
+	ldmia	sp!,{r3-r11,pc}
+#else
+	ldmia	sp!,{r3-r11,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_blocks,.-poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce)=map("r$_",(0..2));
+my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11));
+my $g4=$ctx;
+
+$code.=<<___;
+.type	poly1305_emit,%function
+.align	5
+poly1305_emit:
+.Lpoly1305_emit:
+	stmdb	sp!,{r4-r11}
+
+	ldmia	$ctx,{$h0-$h4}
+
+#if __ARM_ARCH__>=7
+	ldr	ip,[$ctx,#36]		@ is_base2_26
+
+	adds	$g0,$h0,$h1,lsl#26	@ base 2^26 -> base 2^32
+	mov	$g1,$h1,lsr#6
+	adcs	$g1,$g1,$h2,lsl#20
+	mov	$g2,$h2,lsr#12
+	adcs	$g2,$g2,$h3,lsl#14
+	mov	$g3,$h3,lsr#18
+	adcs	$g3,$g3,$h4,lsl#8
+	mov	$g4,#0
+	adc	$g4,$g4,$h4,lsr#24
+
+	tst	ip,ip
+	itttt	ne
+	movne	$h0,$g0
+	movne	$h1,$g1
+	movne	$h2,$g2
+	movne	$h3,$g3
+	it	ne
+	movne	$h4,$g4
+#endif
+
+	adds	$g0,$h0,#5		@ compare to modulus
+	adcs	$g1,$h1,#0
+	adcs	$g2,$h2,#0
+	adcs	$g3,$h3,#0
+	adc	$g4,$h4,#0
+	tst	$g4,#4			@ did it carry/borrow?
+
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	$h0,$g0
+	ldr	$g0,[$nonce,#0]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	$h1,$g1
+	ldr	$g1,[$nonce,#4]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	$h2,$g2
+	ldr	$g2,[$nonce,#8]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	$h3,$g3
+	ldr	$g3,[$nonce,#12]
+
+	adds	$h0,$h0,$g0
+	adcs	$h1,$h1,$g1
+	adcs	$h2,$h2,$g2
+	adc	$h3,$h3,$g3
+
+#if __ARM_ARCH__>=7
+# ifdef __ARMEB__
+	rev	$h0,$h0
+	rev	$h1,$h1
+	rev	$h2,$h2
+	rev	$h3,$h3
+# endif
+	str	$h0,[$mac,#0]
+	str	$h1,[$mac,#4]
+	str	$h2,[$mac,#8]
+	str	$h3,[$mac,#12]
+#else
+	strb	$h0,[$mac,#0]
+	mov	$h0,$h0,lsr#8
+	strb	$h1,[$mac,#4]
+	mov	$h1,$h1,lsr#8
+	strb	$h2,[$mac,#8]
+	mov	$h2,$h2,lsr#8
+	strb	$h3,[$mac,#12]
+	mov	$h3,$h3,lsr#8
+
+	strb	$h0,[$mac,#1]
+	mov	$h0,$h0,lsr#8
+	strb	$h1,[$mac,#5]
+	mov	$h1,$h1,lsr#8
+	strb	$h2,[$mac,#9]
+	mov	$h2,$h2,lsr#8
+	strb	$h3,[$mac,#13]
+	mov	$h3,$h3,lsr#8
+
+	strb	$h0,[$mac,#2]
+	mov	$h0,$h0,lsr#8
+	strb	$h1,[$mac,#6]
+	mov	$h1,$h1,lsr#8
+	strb	$h2,[$mac,#10]
+	mov	$h2,$h2,lsr#8
+	strb	$h3,[$mac,#14]
+	mov	$h3,$h3,lsr#8
+
+	strb	$h0,[$mac,#3]
+	strb	$h1,[$mac,#7]
+	strb	$h2,[$mac,#11]
+	strb	$h3,[$mac,#15]
+#endif
+	ldmia	sp!,{r4-r11}
+#if	__ARM_ARCH__>=5
+	ret				@ bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_emit,.-poly1305_emit
+___
+{
+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9));
+my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14));
+my ($T0,$T1,$MASK) = map("q$_",(15,4,0));
+
+my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7));
+
+$code.=<<___;
+#if	__ARM_MAX_ARCH__>=7
+.fpu	neon
+
+.type	poly1305_init_neon,%function
+.align	5
+poly1305_init_neon:
+.Lpoly1305_init_neon:
+	ldr	r3,[$ctx,#48]		@ first table element
+	cmp	r3,#-1			@ is value impossible?
+	bne	.Lno_init_neon
+
+	ldr	r4,[$ctx,#20]		@ load key base 2^32
+	ldr	r5,[$ctx,#24]
+	ldr	r6,[$ctx,#28]
+	ldr	r7,[$ctx,#32]
+
+	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
+	mov	r3,r4,lsr#26
+	mov	r4,r5,lsr#20
+	orr	r3,r3,r5,lsl#6
+	mov	r5,r6,lsr#14
+	orr	r4,r4,r6,lsl#12
+	mov	r6,r7,lsr#8
+	orr	r5,r5,r7,lsl#18
+	and	r3,r3,#0x03ffffff
+	and	r4,r4,#0x03ffffff
+	and	r5,r5,#0x03ffffff
+
+	vdup.32	$R0,r2			@ r^1 in both lanes
+	add	r2,r3,r3,lsl#2		@ *5
+	vdup.32	$R1,r3
+	add	r3,r4,r4,lsl#2
+	vdup.32	$S1,r2
+	vdup.32	$R2,r4
+	add	r4,r5,r5,lsl#2
+	vdup.32	$S2,r3
+	vdup.32	$R3,r5
+	add	r5,r6,r6,lsl#2
+	vdup.32	$S3,r4
+	vdup.32	$R4,r6
+	vdup.32	$S4,r5
+
+	mov	$zeros,#2		@ counter
+
+.Lsquare_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+
+	vmull.u32	$D0,$R0,${R0}[1]
+	vmull.u32	$D1,$R1,${R0}[1]
+	vmull.u32	$D2,$R2,${R0}[1]
+	vmull.u32	$D3,$R3,${R0}[1]
+	vmull.u32	$D4,$R4,${R0}[1]
+
+	vmlal.u32	$D0,$R4,${S1}[1]
+	vmlal.u32	$D1,$R0,${R1}[1]
+	vmlal.u32	$D2,$R1,${R1}[1]
+	vmlal.u32	$D3,$R2,${R1}[1]
+	vmlal.u32	$D4,$R3,${R1}[1]
+
+	vmlal.u32	$D0,$R3,${S2}[1]
+	vmlal.u32	$D1,$R4,${S2}[1]
+	vmlal.u32	$D3,$R1,${R2}[1]
+	vmlal.u32	$D2,$R0,${R2}[1]
+	vmlal.u32	$D4,$R2,${R2}[1]
+
+	vmlal.u32	$D0,$R2,${S3}[1]
+	vmlal.u32	$D3,$R0,${R3}[1]
+	vmlal.u32	$D1,$R3,${S3}[1]
+	vmlal.u32	$D2,$R4,${S3}[1]
+	vmlal.u32	$D4,$R1,${R3}[1]
+
+	vmlal.u32	$D3,$R4,${S4}[1]
+	vmlal.u32	$D0,$R1,${S4}[1]
+	vmlal.u32	$D1,$R2,${S4}[1]
+	vmlal.u32	$D2,$R3,${S4}[1]
+	vmlal.u32	$D4,$R0,${R4}[1]
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+	@ and P. Schwabe
+	@
+	@ H0>>+H1>>+H2>>+H3>>+H4
+	@ H3>>+H4>>*5+H0>>+H1
+	@
+	@ Trivia.
+	@
+	@ Result of multiplication of n-bit number by m-bit number is
+	@ n+m bits wide. However! Even though 2^n is a n+1-bit number,
+	@ m-bit number multiplied by 2^n is still n+m bits wide.
+	@
+	@ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
+	@ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
+	@ one is n+1 bits wide.
+	@
+	@ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
+	@ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
+	@ can be 27. However! In cases when their width exceeds 26 bits
+	@ they are limited by 2^26+2^6. This in turn means that *sum*
+	@ of the products with these values can still be viewed as sum
+	@ of 52-bit numbers as long as the amount of addends is not a
+	@ power of 2. For example,
+	@
+	@ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
+	@
+	@ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
+	@ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
+	@ 8 * (2^52) or 2^55. However, the value is then multiplied by
+	@ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
+	@ which is less than 32 * (2^52) or 2^57. And when processing
+	@ data we are looking at triple as many addends...
+	@
+	@ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
+	@ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
+	@ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
+	@ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
+	@ instruction accepts 2x32-bit input and writes 2x64-bit result.
+	@ This means that result of reduction have to be compressed upon
+	@ loop wrap-around. This can be done in the process of reduction
+	@ to minimize amount of instructions [as well as amount of
+	@ 128-bit instructions, which benefits low-end processors], but
+	@ one has to watch for H2 (which is narrower than H0) and 5*H4
+	@ not being wider than 58 bits, so that result of right shift
+	@ by 26 bits fits in 32 bits. This is also useful on x86,
+	@ because it allows to use paddd in place for paddq, which
+	@ benefits Atom, where paddq is ridiculously slow.
+
+	vshr.u64	$T0,$D3,#26
+	vmovn.i64	$D3#lo,$D3
+	 vshr.u64	$T1,$D0,#26
+	 vmovn.i64	$D0#lo,$D0
+	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
+	vbic.i32	$D3#lo,#0xfc000000	@ &=0x03ffffff
+	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1
+	 vbic.i32	$D0#lo,#0xfc000000
+
+	vshrn.u64	$T0#lo,$D4,#26
+	vmovn.i64	$D4#lo,$D4
+	 vshr.u64	$T1,$D1,#26
+	 vmovn.i64	$D1#lo,$D1
+	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2
+	vbic.i32	$D4#lo,#0xfc000000
+	 vbic.i32	$D1#lo,#0xfc000000
+
+	vadd.i32	$D0#lo,$D0#lo,$T0#lo
+	vshl.u32	$T0#lo,$T0#lo,#2
+	 vshrn.u64	$T1#lo,$D2,#26
+	 vmovn.i64	$D2#lo,$D2
+	vadd.i32	$D0#lo,$D0#lo,$T0#lo	@ h4 -> h0
+	 vadd.i32	$D3#lo,$D3#lo,$T1#lo	@ h2 -> h3
+	 vbic.i32	$D2#lo,#0xfc000000
+
+	vshr.u32	$T0#lo,$D0#lo,#26
+	vbic.i32	$D0#lo,#0xfc000000
+	 vshr.u32	$T1#lo,$D3#lo,#26
+	 vbic.i32	$D3#lo,#0xfc000000
+	vadd.i32	$D1#lo,$D1#lo,$T0#lo	@ h0 -> h1
+	 vadd.i32	$D4#lo,$D4#lo,$T1#lo	@ h3 -> h4
+
+	subs		$zeros,$zeros,#1
+	beq		.Lsquare_break_neon
+
+	add		$tbl0,$ctx,#(48+0*9*4)
+	add		$tbl1,$ctx,#(48+1*9*4)
+
+	vtrn.32		$R0,$D0#lo		@ r^2:r^1
+	vtrn.32		$R2,$D2#lo
+	vtrn.32		$R3,$D3#lo
+	vtrn.32		$R1,$D1#lo
+	vtrn.32		$R4,$D4#lo
+
+	vshl.u32	$S2,$R2,#2		@ *5
+	vshl.u32	$S3,$R3,#2
+	vshl.u32	$S1,$R1,#2
+	vshl.u32	$S4,$R4,#2
+	vadd.i32	$S2,$S2,$R2
+	vadd.i32	$S1,$S1,$R1
+	vadd.i32	$S3,$S3,$R3
+	vadd.i32	$S4,$S4,$R4
+
+	vst4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
+	vst4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
+	vst4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+	vst4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+	vst1.32		{${S4}[0]},[$tbl0,:32]
+	vst1.32		{${S4}[1]},[$tbl1,:32]
+
+	b		.Lsquare_neon
+
+.align	4
+.Lsquare_break_neon:
+	add		$tbl0,$ctx,#(48+2*4*9)
+	add		$tbl1,$ctx,#(48+3*4*9)
+
+	vmov		$R0,$D0#lo		@ r^4:r^3
+	vshl.u32	$S1,$D1#lo,#2		@ *5
+	vmov		$R1,$D1#lo
+	vshl.u32	$S2,$D2#lo,#2
+	vmov		$R2,$D2#lo
+	vshl.u32	$S3,$D3#lo,#2
+	vmov		$R3,$D3#lo
+	vshl.u32	$S4,$D4#lo,#2
+	vmov		$R4,$D4#lo
+	vadd.i32	$S1,$S1,$D1#lo
+	vadd.i32	$S2,$S2,$D2#lo
+	vadd.i32	$S3,$S3,$D3#lo
+	vadd.i32	$S4,$S4,$D4#lo
+
+	vst4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
+	vst4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
+	vst4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+	vst4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+	vst1.32		{${S4}[0]},[$tbl0]
+	vst1.32		{${S4}[1]},[$tbl1]
+
+.Lno_init_neon:
+	ret				@ bx	lr
+.size	poly1305_init_neon,.-poly1305_init_neon
+
+.type	poly1305_blocks_neon,%function
+.align	5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+	ldr	ip,[$ctx,#36]		@ is_base2_26
+
+	cmp	$len,#64
+	blo	.Lpoly1305_blocks
+
+	stmdb	sp!,{r4-r7}
+	vstmdb	sp!,{d8-d15}		@ ABI specification says so
+
+	tst	ip,ip			@ is_base2_26?
+	bne	.Lbase2_26_neon
+
+	stmdb	sp!,{r1-r3,lr}
+	bl	.Lpoly1305_init_neon
+
+	ldr	r4,[$ctx,#0]		@ load hash value base 2^32
+	ldr	r5,[$ctx,#4]
+	ldr	r6,[$ctx,#8]
+	ldr	r7,[$ctx,#12]
+	ldr	ip,[$ctx,#16]
+
+	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
+	mov	r3,r4,lsr#26
+	 veor	$D0#lo,$D0#lo,$D0#lo
+	mov	r4,r5,lsr#20
+	orr	r3,r3,r5,lsl#6
+	 veor	$D1#lo,$D1#lo,$D1#lo
+	mov	r5,r6,lsr#14
+	orr	r4,r4,r6,lsl#12
+	 veor	$D2#lo,$D2#lo,$D2#lo
+	mov	r6,r7,lsr#8
+	orr	r5,r5,r7,lsl#18
+	 veor	$D3#lo,$D3#lo,$D3#lo
+	and	r3,r3,#0x03ffffff
+	orr	r6,r6,ip,lsl#24
+	 veor	$D4#lo,$D4#lo,$D4#lo
+	and	r4,r4,#0x03ffffff
+	mov	r1,#1
+	and	r5,r5,#0x03ffffff
+	str	r1,[$ctx,#36]		@ set is_base2_26
+
+	vmov.32	$D0#lo[0],r2
+	vmov.32	$D1#lo[0],r3
+	vmov.32	$D2#lo[0],r4
+	vmov.32	$D3#lo[0],r5
+	vmov.32	$D4#lo[0],r6
+	adr	$zeros,.Lzeros
+
+	ldmia	sp!,{r1-r3,lr}
+	b	.Lhash_loaded
+
+.align	4
+.Lbase2_26_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ load hash value
+
+	veor		$D0#lo,$D0#lo,$D0#lo
+	veor		$D1#lo,$D1#lo,$D1#lo
+	veor		$D2#lo,$D2#lo,$D2#lo
+	veor		$D3#lo,$D3#lo,$D3#lo
+	veor		$D4#lo,$D4#lo,$D4#lo
+	vld4.32		{$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
+	adr		$zeros,.Lzeros
+	vld1.32		{$D4#lo[0]},[$ctx]
+	sub		$ctx,$ctx,#16		@ rewind
+
+.Lhash_loaded:
+	add		$in2,$inp,#32
+	mov		$padbit,$padbit,lsl#24
+	tst		$len,#31
+	beq		.Leven
+
+	vld4.32		{$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]!
+	vmov.32		$H4#lo[0],$padbit
+	sub		$len,$len,#16
+	add		$in2,$inp,#32
+
+# ifdef	__ARMEB__
+	vrev32.8	$H0,$H0
+	vrev32.8	$H3,$H3
+	vrev32.8	$H1,$H1
+	vrev32.8	$H2,$H2
+# endif
+	vsri.u32	$H4#lo,$H3#lo,#8	@ base 2^32 -> base 2^26
+	vshl.u32	$H3#lo,$H3#lo,#18
+
+	vsri.u32	$H3#lo,$H2#lo,#14
+	vshl.u32	$H2#lo,$H2#lo,#12
+	vadd.i32	$H4#hi,$H4#lo,$D4#lo	@ add hash value and move to #hi
+
+	vbic.i32	$H3#lo,#0xfc000000
+	vsri.u32	$H2#lo,$H1#lo,#20
+	vshl.u32	$H1#lo,$H1#lo,#6
+
+	vbic.i32	$H2#lo,#0xfc000000
+	vsri.u32	$H1#lo,$H0#lo,#26
+	vadd.i32	$H3#hi,$H3#lo,$D3#lo
+
+	vbic.i32	$H0#lo,#0xfc000000
+	vbic.i32	$H1#lo,#0xfc000000
+	vadd.i32	$H2#hi,$H2#lo,$D2#lo
+
+	vadd.i32	$H0#hi,$H0#lo,$D0#lo
+	vadd.i32	$H1#hi,$H1#lo,$D1#lo
+
+	mov		$tbl1,$zeros
+	add		$tbl0,$ctx,#48
+
+	cmp		$len,$len
+	b		.Long_tail
+
+.align	4
+.Leven:
+	subs		$len,$len,#64
+	it		lo
+	movlo		$in2,$zeros
+
+	vmov.i32	$H4,#1<<24		@ padbit, yes, always
+	vld4.32		{$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp]	@ inp[0:1]
+	add		$inp,$inp,#64
+	vld4.32		{$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2]	@ inp[2:3] (or 0)
+	add		$in2,$in2,#64
+	itt		hi
+	addhi		$tbl1,$ctx,#(48+1*9*4)
+	addhi		$tbl0,$ctx,#(48+3*9*4)
+
+# ifdef	__ARMEB__
+	vrev32.8	$H0,$H0
+	vrev32.8	$H3,$H3
+	vrev32.8	$H1,$H1
+	vrev32.8	$H2,$H2
+# endif
+	vsri.u32	$H4,$H3,#8		@ base 2^32 -> base 2^26
+	vshl.u32	$H3,$H3,#18
+
+	vsri.u32	$H3,$H2,#14
+	vshl.u32	$H2,$H2,#12
+
+	vbic.i32	$H3,#0xfc000000
+	vsri.u32	$H2,$H1,#20
+	vshl.u32	$H1,$H1,#6
+
+	vbic.i32	$H2,#0xfc000000
+	vsri.u32	$H1,$H0,#26
+
+	vbic.i32	$H0,#0xfc000000
+	vbic.i32	$H1,#0xfc000000
+
+	bls		.Lskip_loop
+
+	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^2
+	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^4
+	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+	b		.Loop_neon
+
+.align	5
+.Loop_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+	@   \___________________/
+	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+	@   \___________________/ \____________________/
+	@
+	@ Note that we start with inp[2:3]*r^2. This is because it
+	@ doesn't depend on reduction in previous iteration.
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ inp[2:3]*r^2
+
+	vadd.i32	$H2#lo,$H2#lo,$D2#lo	@ accumulate inp[0:1]
+	vmull.u32	$D2,$H2#hi,${R0}[1]
+	vadd.i32	$H0#lo,$H0#lo,$D0#lo
+	vmull.u32	$D0,$H0#hi,${R0}[1]
+	vadd.i32	$H3#lo,$H3#lo,$D3#lo
+	vmull.u32	$D3,$H3#hi,${R0}[1]
+	vmlal.u32	$D2,$H1#hi,${R1}[1]
+	vadd.i32	$H1#lo,$H1#lo,$D1#lo
+	vmull.u32	$D1,$H1#hi,${R0}[1]
+
+	vadd.i32	$H4#lo,$H4#lo,$D4#lo
+	vmull.u32	$D4,$H4#hi,${R0}[1]
+	subs		$len,$len,#64
+	vmlal.u32	$D0,$H4#hi,${S1}[1]
+	it		lo
+	movlo		$in2,$zeros
+	vmlal.u32	$D3,$H2#hi,${R1}[1]
+	vld1.32		${S4}[1],[$tbl1,:32]
+	vmlal.u32	$D1,$H0#hi,${R1}[1]
+	vmlal.u32	$D4,$H3#hi,${R1}[1]
+
+	vmlal.u32	$D0,$H3#hi,${S2}[1]
+	vmlal.u32	$D3,$H1#hi,${R2}[1]
+	vmlal.u32	$D4,$H2#hi,${R2}[1]
+	vmlal.u32	$D1,$H4#hi,${S2}[1]
+	vmlal.u32	$D2,$H0#hi,${R2}[1]
+
+	vmlal.u32	$D3,$H0#hi,${R3}[1]
+	vmlal.u32	$D0,$H2#hi,${S3}[1]
+	vmlal.u32	$D4,$H1#hi,${R3}[1]
+	vmlal.u32	$D1,$H3#hi,${S3}[1]
+	vmlal.u32	$D2,$H4#hi,${S3}[1]
+
+	vmlal.u32	$D3,$H4#hi,${S4}[1]
+	vmlal.u32	$D0,$H1#hi,${S4}[1]
+	vmlal.u32	$D4,$H0#hi,${R4}[1]
+	vmlal.u32	$D1,$H2#hi,${S4}[1]
+	vmlal.u32	$D2,$H3#hi,${S4}[1]
+
+	vld4.32		{$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2]	@ inp[2:3] (or 0)
+	add		$in2,$in2,#64
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ (hash+inp[0:1])*r^4 and accumulate
+
+	vmlal.u32	$D3,$H3#lo,${R0}[0]
+	vmlal.u32	$D0,$H0#lo,${R0}[0]
+	vmlal.u32	$D4,$H4#lo,${R0}[0]
+	vmlal.u32	$D1,$H1#lo,${R0}[0]
+	vmlal.u32	$D2,$H2#lo,${R0}[0]
+	vld1.32		${S4}[0],[$tbl0,:32]
+
+	vmlal.u32	$D3,$H2#lo,${R1}[0]
+	vmlal.u32	$D0,$H4#lo,${S1}[0]
+	vmlal.u32	$D4,$H3#lo,${R1}[0]
+	vmlal.u32	$D1,$H0#lo,${R1}[0]
+	vmlal.u32	$D2,$H1#lo,${R1}[0]
+
+	vmlal.u32	$D3,$H1#lo,${R2}[0]
+	vmlal.u32	$D0,$H3#lo,${S2}[0]
+	vmlal.u32	$D4,$H2#lo,${R2}[0]
+	vmlal.u32	$D1,$H4#lo,${S2}[0]
+	vmlal.u32	$D2,$H0#lo,${R2}[0]
+
+	vmlal.u32	$D3,$H0#lo,${R3}[0]
+	vmlal.u32	$D0,$H2#lo,${S3}[0]
+	vmlal.u32	$D4,$H1#lo,${R3}[0]
+	vmlal.u32	$D1,$H3#lo,${S3}[0]
+	vmlal.u32	$D3,$H4#lo,${S4}[0]
+
+	vmlal.u32	$D2,$H4#lo,${S3}[0]
+	vmlal.u32	$D0,$H1#lo,${S4}[0]
+	vmlal.u32	$D4,$H0#lo,${R4}[0]
+	vmov.i32	$H4,#1<<24		@ padbit, yes, always
+	vmlal.u32	$D1,$H2#lo,${S4}[0]
+	vmlal.u32	$D2,$H3#lo,${S4}[0]
+
+	vld4.32		{$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp]	@ inp[0:1]
+	add		$inp,$inp,#64
+# ifdef	__ARMEB__
+	vrev32.8	$H0,$H0
+	vrev32.8	$H1,$H1
+	vrev32.8	$H2,$H2
+	vrev32.8	$H3,$H3
+# endif
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction interleaved with base 2^32 -> base 2^26 of
+	@ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4.
+
+	vshr.u64	$T0,$D3,#26
+	vmovn.i64	$D3#lo,$D3
+	 vshr.u64	$T1,$D0,#26
+	 vmovn.i64	$D0#lo,$D0
+	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
+	vbic.i32	$D3#lo,#0xfc000000
+	  vsri.u32	$H4,$H3,#8		@ base 2^32 -> base 2^26
+	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1
+	  vshl.u32	$H3,$H3,#18
+	 vbic.i32	$D0#lo,#0xfc000000
+
+	vshrn.u64	$T0#lo,$D4,#26
+	vmovn.i64	$D4#lo,$D4
+	 vshr.u64	$T1,$D1,#26
+	 vmovn.i64	$D1#lo,$D1
+	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2
+	  vsri.u32	$H3,$H2,#14
+	vbic.i32	$D4#lo,#0xfc000000
+	  vshl.u32	$H2,$H2,#12
+	 vbic.i32	$D1#lo,#0xfc000000
+
+	vadd.i32	$D0#lo,$D0#lo,$T0#lo
+	vshl.u32	$T0#lo,$T0#lo,#2
+	  vbic.i32	$H3,#0xfc000000
+	 vshrn.u64	$T1#lo,$D2,#26
+	 vmovn.i64	$D2#lo,$D2
+	vaddl.u32	$D0,$D0#lo,$T0#lo	@ h4 -> h0 [widen for a sec]
+	  vsri.u32	$H2,$H1,#20
+	 vadd.i32	$D3#lo,$D3#lo,$T1#lo	@ h2 -> h3
+	  vshl.u32	$H1,$H1,#6
+	 vbic.i32	$D2#lo,#0xfc000000
+	  vbic.i32	$H2,#0xfc000000
+
+	vshrn.u64	$T0#lo,$D0,#26		@ re-narrow
+	vmovn.i64	$D0#lo,$D0
+	  vsri.u32	$H1,$H0,#26
+	  vbic.i32	$H0,#0xfc000000
+	 vshr.u32	$T1#lo,$D3#lo,#26
+	 vbic.i32	$D3#lo,#0xfc000000
+	vbic.i32	$D0#lo,#0xfc000000
+	vadd.i32	$D1#lo,$D1#lo,$T0#lo	@ h0 -> h1
+	 vadd.i32	$D4#lo,$D4#lo,$T1#lo	@ h3 -> h4
+	  vbic.i32	$H1,#0xfc000000
+
+	bhi		.Loop_neon
+
+.Lskip_loop:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+	add		$tbl1,$ctx,#(48+0*9*4)
+	add		$tbl0,$ctx,#(48+1*9*4)
+	adds		$len,$len,#32
+	it		ne
+	movne		$len,#0
+	bne		.Long_tail
+
+	vadd.i32	$H2#hi,$H2#lo,$D2#lo	@ add hash value and move to #hi
+	vadd.i32	$H0#hi,$H0#lo,$D0#lo
+	vadd.i32	$H3#hi,$H3#lo,$D3#lo
+	vadd.i32	$H1#hi,$H1#lo,$D1#lo
+	vadd.i32	$H4#hi,$H4#lo,$D4#lo
+
+.Long_tail:
+	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^1
+	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^2
+
+	vadd.i32	$H2#lo,$H2#lo,$D2#lo	@ can be redundant
+	vmull.u32	$D2,$H2#hi,$R0
+	vadd.i32	$H0#lo,$H0#lo,$D0#lo
+	vmull.u32	$D0,$H0#hi,$R0
+	vadd.i32	$H3#lo,$H3#lo,$D3#lo
+	vmull.u32	$D3,$H3#hi,$R0
+	vadd.i32	$H1#lo,$H1#lo,$D1#lo
+	vmull.u32	$D1,$H1#hi,$R0
+	vadd.i32	$H4#lo,$H4#lo,$D4#lo
+	vmull.u32	$D4,$H4#hi,$R0
+
+	vmlal.u32	$D0,$H4#hi,$S1
+	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+	vmlal.u32	$D3,$H2#hi,$R1
+	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+	vmlal.u32	$D1,$H0#hi,$R1
+	vmlal.u32	$D4,$H3#hi,$R1
+	vmlal.u32	$D2,$H1#hi,$R1
+
+	vmlal.u32	$D3,$H1#hi,$R2
+	vld1.32		${S4}[1],[$tbl1,:32]
+	vmlal.u32	$D0,$H3#hi,$S2
+	vld1.32		${S4}[0],[$tbl0,:32]
+	vmlal.u32	$D4,$H2#hi,$R2
+	vmlal.u32	$D1,$H4#hi,$S2
+	vmlal.u32	$D2,$H0#hi,$R2
+
+	vmlal.u32	$D3,$H0#hi,$R3
+	 it		ne
+	 addne		$tbl1,$ctx,#(48+2*9*4)
+	vmlal.u32	$D0,$H2#hi,$S3
+	 it		ne
+	 addne		$tbl0,$ctx,#(48+3*9*4)
+	vmlal.u32	$D4,$H1#hi,$R3
+	vmlal.u32	$D1,$H3#hi,$S3
+	vmlal.u32	$D2,$H4#hi,$S3
+
+	vmlal.u32	$D3,$H4#hi,$S4
+	 vorn		$MASK,$MASK,$MASK	@ all-ones, can be redundant
+	vmlal.u32	$D0,$H1#hi,$S4
+	 vshr.u64	$MASK,$MASK,#38
+	vmlal.u32	$D4,$H0#hi,$R4
+	vmlal.u32	$D1,$H2#hi,$S4
+	vmlal.u32	$D2,$H3#hi,$S4
+
+	beq		.Lshort_tail
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ (hash+inp[0:1])*r^4:r^3 and accumulate
+
+	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^3
+	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^4
+
+	vmlal.u32	$D2,$H2#lo,$R0
+	vmlal.u32	$D0,$H0#lo,$R0
+	vmlal.u32	$D3,$H3#lo,$R0
+	vmlal.u32	$D1,$H1#lo,$R0
+	vmlal.u32	$D4,$H4#lo,$R0
+
+	vmlal.u32	$D0,$H4#lo,$S1
+	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+	vmlal.u32	$D3,$H2#lo,$R1
+	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+	vmlal.u32	$D1,$H0#lo,$R1
+	vmlal.u32	$D4,$H3#lo,$R1
+	vmlal.u32	$D2,$H1#lo,$R1
+
+	vmlal.u32	$D3,$H1#lo,$R2
+	vld1.32		${S4}[1],[$tbl1,:32]
+	vmlal.u32	$D0,$H3#lo,$S2
+	vld1.32		${S4}[0],[$tbl0,:32]
+	vmlal.u32	$D4,$H2#lo,$R2
+	vmlal.u32	$D1,$H4#lo,$S2
+	vmlal.u32	$D2,$H0#lo,$R2
+
+	vmlal.u32	$D3,$H0#lo,$R3
+	vmlal.u32	$D0,$H2#lo,$S3
+	vmlal.u32	$D4,$H1#lo,$R3
+	vmlal.u32	$D1,$H3#lo,$S3
+	vmlal.u32	$D2,$H4#lo,$S3
+
+	vmlal.u32	$D3,$H4#lo,$S4
+	 vorn		$MASK,$MASK,$MASK	@ all-ones
+	vmlal.u32	$D0,$H1#lo,$S4
+	 vshr.u64	$MASK,$MASK,#38
+	vmlal.u32	$D4,$H0#lo,$R4
+	vmlal.u32	$D1,$H2#lo,$S4
+	vmlal.u32	$D2,$H3#lo,$S4
+
+.Lshort_tail:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ horizontal addition
+
+	vadd.i64	$D3#lo,$D3#lo,$D3#hi
+	vadd.i64	$D0#lo,$D0#lo,$D0#hi
+	vadd.i64	$D4#lo,$D4#lo,$D4#hi
+	vadd.i64	$D1#lo,$D1#lo,$D1#hi
+	vadd.i64	$D2#lo,$D2#lo,$D2#hi
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction, but without narrowing
+
+	vshr.u64	$T0,$D3,#26
+	vand.i64	$D3,$D3,$MASK
+	 vshr.u64	$T1,$D0,#26
+	 vand.i64	$D0,$D0,$MASK
+	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
+	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1
+
+	vshr.u64	$T0,$D4,#26
+	vand.i64	$D4,$D4,$MASK
+	 vshr.u64	$T1,$D1,#26
+	 vand.i64	$D1,$D1,$MASK
+	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2
+
+	vadd.i64	$D0,$D0,$T0
+	vshl.u64	$T0,$T0,#2
+	 vshr.u64	$T1,$D2,#26
+	 vand.i64	$D2,$D2,$MASK
+	vadd.i64	$D0,$D0,$T0		@ h4 -> h0
+	 vadd.i64	$D3,$D3,$T1		@ h2 -> h3
+
+	vshr.u64	$T0,$D0,#26
+	vand.i64	$D0,$D0,$MASK
+	 vshr.u64	$T1,$D3,#26
+	 vand.i64	$D3,$D3,$MASK
+	vadd.i64	$D1,$D1,$T0		@ h0 -> h1
+	 vadd.i64	$D4,$D4,$T1		@ h3 -> h4
+
+	cmp		$len,#0
+	bne		.Leven
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ store hash value
+
+	vst4.32		{$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
+	vst1.32		{$D4#lo[0]},[$ctx]
+
+	vldmia	sp!,{d8-d15}			@ epilogue
+	ldmia	sp!,{r4-r7}
+	ret					@ bx	lr
+.size	poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align	5
+.Lzeros:
+.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#ifndef	__KERNEL__
+.LOPENSSL_armcap:
+# ifdef	_WIN32
+.word	OPENSSL_armcap_P
+# else
+.word	OPENSSL_armcap_P-.Lpoly1305_init
+# endif
+.comm	OPENSSL_armcap_P,4,4
+.hidden	OPENSSL_armcap_P
+#endif
+#endif
+___
+}	}
+$code.=<<___;
+.asciz	"Poly1305 for ARMv4/NEON, CRYPTOGAMS by \@dot-asm"
+.align	2
+___
+
+foreach (split("\n",$code)) {
+	s/\`([^\`]*)\`/eval $1/geo;
+
+	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or
+	s/\bret\b/bx	lr/go						or
+	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;	# make it possible to compile with -march=armv4
+
+	print $_,"\n";
+}
+close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped
new file mode 100644
index 000000000000..37b71d990293
--- /dev/null
+++ b/arch/arm/crypto/poly1305-core.S_shipped
@@ -0,0 +1,1158 @@
+#ifndef	__KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
+# define poly1305_init   poly1305_init_arm
+# define poly1305_blocks poly1305_blocks_arm
+# define poly1305_emit   poly1305_emit_arm
+.globl	poly1305_blocks_neon
+#endif
+
+#if defined(__thumb2__)
+.syntax	unified
+.thumb
+#else
+.code	32
+#endif
+
+.text
+
+.globl	poly1305_emit
+.globl	poly1305_blocks
+.globl	poly1305_init
+.type	poly1305_init,%function
+.align	5
+poly1305_init:
+.Lpoly1305_init:
+	stmdb	sp!,{r4-r11}
+
+	eor	r3,r3,r3
+	cmp	r1,#0
+	str	r3,[r0,#0]		@ zero hash value
+	str	r3,[r0,#4]
+	str	r3,[r0,#8]
+	str	r3,[r0,#12]
+	str	r3,[r0,#16]
+	str	r3,[r0,#36]		@ clear is_base2_26
+	add	r0,r0,#20
+
+#ifdef	__thumb2__
+	it	eq
+#endif
+	moveq	r0,#0
+	beq	.Lno_key
+
+#if	__ARM_MAX_ARCH__>=7
+	mov	r3,#-1
+	str	r3,[r0,#28]		@ impossible key power value
+# ifndef __KERNEL__
+	adr	r11,.Lpoly1305_init
+	ldr	r12,.LOPENSSL_armcap
+# endif
+#endif
+	ldrb	r4,[r1,#0]
+	mov	r10,#0x0fffffff
+	ldrb	r5,[r1,#1]
+	and	r3,r10,#-4		@ 0x0ffffffc
+	ldrb	r6,[r1,#2]
+	ldrb	r7,[r1,#3]
+	orr	r4,r4,r5,lsl#8
+	ldrb	r5,[r1,#4]
+	orr	r4,r4,r6,lsl#16
+	ldrb	r6,[r1,#5]
+	orr	r4,r4,r7,lsl#24
+	ldrb	r7,[r1,#6]
+	and	r4,r4,r10
+
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+# if !defined(_WIN32)
+	ldr	r12,[r11,r12]		@ OPENSSL_armcap_P
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
+	ldr	r12,[r12]
+# endif
+#endif
+	ldrb	r8,[r1,#7]
+	orr	r5,r5,r6,lsl#8
+	ldrb	r6,[r1,#8]
+	orr	r5,r5,r7,lsl#16
+	ldrb	r7,[r1,#9]
+	orr	r5,r5,r8,lsl#24
+	ldrb	r8,[r1,#10]
+	and	r5,r5,r3
+
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	tst	r12,#ARMV7_NEON		@ check for NEON
+# ifdef	__thumb2__
+	adr	r9,.Lpoly1305_blocks_neon
+	adr	r11,.Lpoly1305_blocks
+	it	ne
+	movne	r11,r9
+	adr	r12,.Lpoly1305_emit
+	orr	r11,r11,#1		@ thumb-ify addresses
+	orr	r12,r12,#1
+# else
+	add	r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+	ite	eq
+	addeq	r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+	addne	r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
+# endif
+#endif
+	ldrb	r9,[r1,#11]
+	orr	r6,r6,r7,lsl#8
+	ldrb	r7,[r1,#12]
+	orr	r6,r6,r8,lsl#16
+	ldrb	r8,[r1,#13]
+	orr	r6,r6,r9,lsl#24
+	ldrb	r9,[r1,#14]
+	and	r6,r6,r3
+
+	ldrb	r10,[r1,#15]
+	orr	r7,r7,r8,lsl#8
+	str	r4,[r0,#0]
+	orr	r7,r7,r9,lsl#16
+	str	r5,[r0,#4]
+	orr	r7,r7,r10,lsl#24
+	str	r6,[r0,#8]
+	and	r7,r7,r3
+	str	r7,[r0,#12]
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	stmia	r2,{r11,r12}		@ fill functions table
+	mov	r0,#1
+#else
+	mov	r0,#0
+#endif
+.Lno_key:
+	ldmia	sp!,{r4-r11}
+#if	__ARM_ARCH__>=5
+	bx	lr				@ bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_init,.-poly1305_init
+.type	poly1305_blocks,%function
+.align	5
+poly1305_blocks:
+.Lpoly1305_blocks:
+	stmdb	sp!,{r3-r11,lr}
+
+	ands	r2,r2,#-16
+	beq	.Lno_data
+
+	add	r2,r2,r1		@ end pointer
+	sub	sp,sp,#32
+
+#if __ARM_ARCH__<7
+	ldmia	r0,{r4-r12}		@ load context
+	add	r0,r0,#20
+	str	r2,[sp,#16]		@ offload stuff
+	str	r0,[sp,#12]
+#else
+	ldr	lr,[r0,#36]		@ is_base2_26
+	ldmia	r0!,{r4-r8}		@ load hash value
+	str	r2,[sp,#16]		@ offload stuff
+	str	r0,[sp,#12]
+
+	adds	r9,r4,r5,lsl#26	@ base 2^26 -> base 2^32
+	mov	r10,r5,lsr#6
+	adcs	r10,r10,r6,lsl#20
+	mov	r11,r6,lsr#12
+	adcs	r11,r11,r7,lsl#14
+	mov	r12,r7,lsr#18
+	adcs	r12,r12,r8,lsl#8
+	mov	r2,#0
+	teq	lr,#0
+	str	r2,[r0,#16]		@ clear is_base2_26
+	adc	r2,r2,r8,lsr#24
+
+	itttt	ne
+	movne	r4,r9			@ choose between radixes
+	movne	r5,r10
+	movne	r6,r11
+	movne	r7,r12
+	ldmia	r0,{r9-r12}		@ load key
+	it	ne
+	movne	r8,r2
+#endif
+
+	mov	lr,r1
+	cmp	r3,#0
+	str	r10,[sp,#20]
+	str	r11,[sp,#24]
+	str	r12,[sp,#28]
+	b	.Loop
+
+.align	4
+.Loop:
+#if __ARM_ARCH__<7
+	ldrb	r0,[lr],#16		@ load input
+# ifdef	__thumb2__
+	it	hi
+# endif
+	addhi	r8,r8,#1		@ 1<<128
+	ldrb	r1,[lr,#-15]
+	ldrb	r2,[lr,#-14]
+	ldrb	r3,[lr,#-13]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-12]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-11]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-10]
+	adds	r4,r4,r3		@ accumulate input
+
+	ldrb	r3,[lr,#-9]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-8]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-7]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-6]
+	adcs	r5,r5,r3
+
+	ldrb	r3,[lr,#-5]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-4]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-3]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-2]
+	adcs	r6,r6,r3
+
+	ldrb	r3,[lr,#-1]
+	orr	r1,r0,r1,lsl#8
+	str	lr,[sp,#8]		@ offload input pointer
+	orr	r2,r1,r2,lsl#16
+	add	r10,r10,r10,lsr#2
+	orr	r3,r2,r3,lsl#24
+#else
+	ldr	r0,[lr],#16		@ load input
+	it	hi
+	addhi	r8,r8,#1		@ padbit
+	ldr	r1,[lr,#-12]
+	ldr	r2,[lr,#-8]
+	ldr	r3,[lr,#-4]
+# ifdef	__ARMEB__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+# endif
+	adds	r4,r4,r0		@ accumulate input
+	str	lr,[sp,#8]		@ offload input pointer
+	adcs	r5,r5,r1
+	add	r10,r10,r10,lsr#2
+	adcs	r6,r6,r2
+#endif
+	add	r11,r11,r11,lsr#2
+	adcs	r7,r7,r3
+	add	r12,r12,r12,lsr#2
+
+	umull	r2,r3,r5,r9
+	 adc	r8,r8,#0
+	umull	r0,r1,r4,r9
+	umlal	r2,r3,r8,r10
+	umlal	r0,r1,r7,r10
+	ldr	r10,[sp,#20]		@ reload r10
+	umlal	r2,r3,r6,r12
+	umlal	r0,r1,r5,r12
+	umlal	r2,r3,r7,r11
+	umlal	r0,r1,r6,r11
+	umlal	r2,r3,r4,r10
+	str	r0,[sp,#0]		@ future r4
+	 mul	r0,r11,r8
+	ldr	r11,[sp,#24]		@ reload r11
+	adds	r2,r2,r1		@ d1+=d0>>32
+	 eor	r1,r1,r1
+	adc	lr,r3,#0		@ future r6
+	str	r2,[sp,#4]		@ future r5
+
+	mul	r2,r12,r8
+	eor	r3,r3,r3
+	umlal	r0,r1,r7,r12
+	ldr	r12,[sp,#28]		@ reload r12
+	umlal	r2,r3,r7,r9
+	umlal	r0,r1,r6,r9
+	umlal	r2,r3,r6,r10
+	umlal	r0,r1,r5,r10
+	umlal	r2,r3,r5,r11
+	umlal	r0,r1,r4,r11
+	umlal	r2,r3,r4,r12
+	ldr	r4,[sp,#0]
+	mul	r8,r9,r8
+	ldr	r5,[sp,#4]
+
+	adds	r6,lr,r0		@ d2+=d1>>32
+	ldr	lr,[sp,#8]		@ reload input pointer
+	adc	r1,r1,#0
+	adds	r7,r2,r1		@ d3+=d2>>32
+	ldr	r0,[sp,#16]		@ reload end pointer
+	adc	r3,r3,#0
+	add	r8,r8,r3		@ h4+=d3>>32
+
+	and	r1,r8,#-4
+	and	r8,r8,#3
+	add	r1,r1,r1,lsr#2		@ *=5
+	adds	r4,r4,r1
+	adcs	r5,r5,#0
+	adcs	r6,r6,#0
+	adcs	r7,r7,#0
+	adc	r8,r8,#0
+
+	cmp	r0,lr			@ done yet?
+	bhi	.Loop
+
+	ldr	r0,[sp,#12]
+	add	sp,sp,#32
+	stmdb	r0,{r4-r8}		@ store the result
+
+.Lno_data:
+#if	__ARM_ARCH__>=5
+	ldmia	sp!,{r3-r11,pc}
+#else
+	ldmia	sp!,{r3-r11,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_blocks,.-poly1305_blocks
+.type	poly1305_emit,%function
+.align	5
+poly1305_emit:
+.Lpoly1305_emit:
+	stmdb	sp!,{r4-r11}
+
+	ldmia	r0,{r3-r7}
+
+#if __ARM_ARCH__>=7
+	ldr	ip,[r0,#36]		@ is_base2_26
+
+	adds	r8,r3,r4,lsl#26	@ base 2^26 -> base 2^32
+	mov	r9,r4,lsr#6
+	adcs	r9,r9,r5,lsl#20
+	mov	r10,r5,lsr#12
+	adcs	r10,r10,r6,lsl#14
+	mov	r11,r6,lsr#18
+	adcs	r11,r11,r7,lsl#8
+	mov	r0,#0
+	adc	r0,r0,r7,lsr#24
+
+	tst	ip,ip
+	itttt	ne
+	movne	r3,r8
+	movne	r4,r9
+	movne	r5,r10
+	movne	r6,r11
+	it	ne
+	movne	r7,r0
+#endif
+
+	adds	r8,r3,#5		@ compare to modulus
+	adcs	r9,r4,#0
+	adcs	r10,r5,#0
+	adcs	r11,r6,#0
+	adc	r0,r7,#0
+	tst	r0,#4			@ did it carry/borrow?
+
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r3,r8
+	ldr	r8,[r2,#0]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r4,r9
+	ldr	r9,[r2,#4]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r5,r10
+	ldr	r10,[r2,#8]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r6,r11
+	ldr	r11,[r2,#12]
+
+	adds	r3,r3,r8
+	adcs	r4,r4,r9
+	adcs	r5,r5,r10
+	adc	r6,r6,r11
+
+#if __ARM_ARCH__>=7
+# ifdef __ARMEB__
+	rev	r3,r3
+	rev	r4,r4
+	rev	r5,r5
+	rev	r6,r6
+# endif
+	str	r3,[r1,#0]
+	str	r4,[r1,#4]
+	str	r5,[r1,#8]
+	str	r6,[r1,#12]
+#else
+	strb	r3,[r1,#0]
+	mov	r3,r3,lsr#8
+	strb	r4,[r1,#4]
+	mov	r4,r4,lsr#8
+	strb	r5,[r1,#8]
+	mov	r5,r5,lsr#8
+	strb	r6,[r1,#12]
+	mov	r6,r6,lsr#8
+
+	strb	r3,[r1,#1]
+	mov	r3,r3,lsr#8
+	strb	r4,[r1,#5]
+	mov	r4,r4,lsr#8
+	strb	r5,[r1,#9]
+	mov	r5,r5,lsr#8
+	strb	r6,[r1,#13]
+	mov	r6,r6,lsr#8
+
+	strb	r3,[r1,#2]
+	mov	r3,r3,lsr#8
+	strb	r4,[r1,#6]
+	mov	r4,r4,lsr#8
+	strb	r5,[r1,#10]
+	mov	r5,r5,lsr#8
+	strb	r6,[r1,#14]
+	mov	r6,r6,lsr#8
+
+	strb	r3,[r1,#3]
+	strb	r4,[r1,#7]
+	strb	r5,[r1,#11]
+	strb	r6,[r1,#15]
+#endif
+	ldmia	sp!,{r4-r11}
+#if	__ARM_ARCH__>=5
+	bx	lr				@ bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_emit,.-poly1305_emit
+#if	__ARM_MAX_ARCH__>=7
+.fpu	neon
+
+.type	poly1305_init_neon,%function
+.align	5
+poly1305_init_neon:
+.Lpoly1305_init_neon:
+	ldr	r3,[r0,#48]		@ first table element
+	cmp	r3,#-1			@ is value impossible?
+	bne	.Lno_init_neon
+
+	ldr	r4,[r0,#20]		@ load key base 2^32
+	ldr	r5,[r0,#24]
+	ldr	r6,[r0,#28]
+	ldr	r7,[r0,#32]
+
+	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
+	mov	r3,r4,lsr#26
+	mov	r4,r5,lsr#20
+	orr	r3,r3,r5,lsl#6
+	mov	r5,r6,lsr#14
+	orr	r4,r4,r6,lsl#12
+	mov	r6,r7,lsr#8
+	orr	r5,r5,r7,lsl#18
+	and	r3,r3,#0x03ffffff
+	and	r4,r4,#0x03ffffff
+	and	r5,r5,#0x03ffffff
+
+	vdup.32	d0,r2			@ r^1 in both lanes
+	add	r2,r3,r3,lsl#2		@ *5
+	vdup.32	d1,r3
+	add	r3,r4,r4,lsl#2
+	vdup.32	d2,r2
+	vdup.32	d3,r4
+	add	r4,r5,r5,lsl#2
+	vdup.32	d4,r3
+	vdup.32	d5,r5
+	add	r5,r6,r6,lsl#2
+	vdup.32	d6,r4
+	vdup.32	d7,r6
+	vdup.32	d8,r5
+
+	mov	r5,#2		@ counter
+
+.Lsquare_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+
+	vmull.u32	q5,d0,d0[1]
+	vmull.u32	q6,d1,d0[1]
+	vmull.u32	q7,d3,d0[1]
+	vmull.u32	q8,d5,d0[1]
+	vmull.u32	q9,d7,d0[1]
+
+	vmlal.u32	q5,d7,d2[1]
+	vmlal.u32	q6,d0,d1[1]
+	vmlal.u32	q7,d1,d1[1]
+	vmlal.u32	q8,d3,d1[1]
+	vmlal.u32	q9,d5,d1[1]
+
+	vmlal.u32	q5,d5,d4[1]
+	vmlal.u32	q6,d7,d4[1]
+	vmlal.u32	q8,d1,d3[1]
+	vmlal.u32	q7,d0,d3[1]
+	vmlal.u32	q9,d3,d3[1]
+
+	vmlal.u32	q5,d3,d6[1]
+	vmlal.u32	q8,d0,d5[1]
+	vmlal.u32	q6,d5,d6[1]
+	vmlal.u32	q7,d7,d6[1]
+	vmlal.u32	q9,d1,d5[1]
+
+	vmlal.u32	q8,d7,d8[1]
+	vmlal.u32	q5,d1,d8[1]
+	vmlal.u32	q6,d3,d8[1]
+	vmlal.u32	q7,d5,d8[1]
+	vmlal.u32	q9,d0,d7[1]
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+	@ and P. Schwabe
+	@
+	@ H0>>+H1>>+H2>>+H3>>+H4
+	@ H3>>+H4>>*5+H0>>+H1
+	@
+	@ Trivia.
+	@
+	@ Result of multiplication of n-bit number by m-bit number is
+	@ n+m bits wide. However! Even though 2^n is a n+1-bit number,
+	@ m-bit number multiplied by 2^n is still n+m bits wide.
+	@
+	@ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
+	@ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
+	@ one is n+1 bits wide.
+	@
+	@ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
+	@ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
+	@ can be 27. However! In cases when their width exceeds 26 bits
+	@ they are limited by 2^26+2^6. This in turn means that *sum*
+	@ of the products with these values can still be viewed as sum
+	@ of 52-bit numbers as long as the amount of addends is not a
+	@ power of 2. For example,
+	@
+	@ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
+	@
+	@ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
+	@ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
+	@ 8 * (2^52) or 2^55. However, the value is then multiplied by
+	@ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
+	@ which is less than 32 * (2^52) or 2^57. And when processing
+	@ data we are looking at triple as many addends...
+	@
+	@ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
+	@ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
+	@ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
+	@ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
+	@ instruction accepts 2x32-bit input and writes 2x64-bit result.
+	@ This means that result of reduction have to be compressed upon
+	@ loop wrap-around. This can be done in the process of reduction
+	@ to minimize amount of instructions [as well as amount of
+	@ 128-bit instructions, which benefits low-end processors], but
+	@ one has to watch for H2 (which is narrower than H0) and 5*H4
+	@ not being wider than 58 bits, so that result of right shift
+	@ by 26 bits fits in 32 bits. This is also useful on x86,
+	@ because it allows to use paddd in place for paddq, which
+	@ benefits Atom, where paddq is ridiculously slow.
+
+	vshr.u64	q15,q8,#26
+	vmovn.i64	d16,q8
+	 vshr.u64	q4,q5,#26
+	 vmovn.i64	d10,q5
+	vadd.i64	q9,q9,q15		@ h3 -> h4
+	vbic.i32	d16,#0xfc000000	@ &=0x03ffffff
+	 vadd.i64	q6,q6,q4		@ h0 -> h1
+	 vbic.i32	d10,#0xfc000000
+
+	vshrn.u64	d30,q9,#26
+	vmovn.i64	d18,q9
+	 vshr.u64	q4,q6,#26
+	 vmovn.i64	d12,q6
+	 vadd.i64	q7,q7,q4		@ h1 -> h2
+	vbic.i32	d18,#0xfc000000
+	 vbic.i32	d12,#0xfc000000
+
+	vadd.i32	d10,d10,d30
+	vshl.u32	d30,d30,#2
+	 vshrn.u64	d8,q7,#26
+	 vmovn.i64	d14,q7
+	vadd.i32	d10,d10,d30	@ h4 -> h0
+	 vadd.i32	d16,d16,d8	@ h2 -> h3
+	 vbic.i32	d14,#0xfc000000
+
+	vshr.u32	d30,d10,#26
+	vbic.i32	d10,#0xfc000000
+	 vshr.u32	d8,d16,#26
+	 vbic.i32	d16,#0xfc000000
+	vadd.i32	d12,d12,d30	@ h0 -> h1
+	 vadd.i32	d18,d18,d8	@ h3 -> h4
+
+	subs		r5,r5,#1
+	beq		.Lsquare_break_neon
+
+	add		r6,r0,#(48+0*9*4)
+	add		r7,r0,#(48+1*9*4)
+
+	vtrn.32		d0,d10		@ r^2:r^1
+	vtrn.32		d3,d14
+	vtrn.32		d5,d16
+	vtrn.32		d1,d12
+	vtrn.32		d7,d18
+
+	vshl.u32	d4,d3,#2		@ *5
+	vshl.u32	d6,d5,#2
+	vshl.u32	d2,d1,#2
+	vshl.u32	d8,d7,#2
+	vadd.i32	d4,d4,d3
+	vadd.i32	d2,d2,d1
+	vadd.i32	d6,d6,d5
+	vadd.i32	d8,d8,d7
+
+	vst4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!
+	vst4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!
+	vst4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vst4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vst1.32		{d8[0]},[r6,:32]
+	vst1.32		{d8[1]},[r7,:32]
+
+	b		.Lsquare_neon
+
+.align	4
+.Lsquare_break_neon:
+	add		r6,r0,#(48+2*4*9)
+	add		r7,r0,#(48+3*4*9)
+
+	vmov		d0,d10		@ r^4:r^3
+	vshl.u32	d2,d12,#2		@ *5
+	vmov		d1,d12
+	vshl.u32	d4,d14,#2
+	vmov		d3,d14
+	vshl.u32	d6,d16,#2
+	vmov		d5,d16
+	vshl.u32	d8,d18,#2
+	vmov		d7,d18
+	vadd.i32	d2,d2,d12
+	vadd.i32	d4,d4,d14
+	vadd.i32	d6,d6,d16
+	vadd.i32	d8,d8,d18
+
+	vst4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!
+	vst4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!
+	vst4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vst4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vst1.32		{d8[0]},[r6]
+	vst1.32		{d8[1]},[r7]
+
+.Lno_init_neon:
+	bx	lr				@ bx	lr
+.size	poly1305_init_neon,.-poly1305_init_neon
+
+.type	poly1305_blocks_neon,%function
+.align	5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+	ldr	ip,[r0,#36]		@ is_base2_26
+
+	cmp	r2,#64
+	blo	.Lpoly1305_blocks
+
+	stmdb	sp!,{r4-r7}
+	vstmdb	sp!,{d8-d15}		@ ABI specification says so
+
+	tst	ip,ip			@ is_base2_26?
+	bne	.Lbase2_26_neon
+
+	stmdb	sp!,{r1-r3,lr}
+	bl	.Lpoly1305_init_neon
+
+	ldr	r4,[r0,#0]		@ load hash value base 2^32
+	ldr	r5,[r0,#4]
+	ldr	r6,[r0,#8]
+	ldr	r7,[r0,#12]
+	ldr	ip,[r0,#16]
+
+	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
+	mov	r3,r4,lsr#26
+	 veor	d10,d10,d10
+	mov	r4,r5,lsr#20
+	orr	r3,r3,r5,lsl#6
+	 veor	d12,d12,d12
+	mov	r5,r6,lsr#14
+	orr	r4,r4,r6,lsl#12
+	 veor	d14,d14,d14
+	mov	r6,r7,lsr#8
+	orr	r5,r5,r7,lsl#18
+	 veor	d16,d16,d16
+	and	r3,r3,#0x03ffffff
+	orr	r6,r6,ip,lsl#24
+	 veor	d18,d18,d18
+	and	r4,r4,#0x03ffffff
+	mov	r1,#1
+	and	r5,r5,#0x03ffffff
+	str	r1,[r0,#36]		@ set is_base2_26
+
+	vmov.32	d10[0],r2
+	vmov.32	d12[0],r3
+	vmov.32	d14[0],r4
+	vmov.32	d16[0],r5
+	vmov.32	d18[0],r6
+	adr	r5,.Lzeros
+
+	ldmia	sp!,{r1-r3,lr}
+	b	.Lhash_loaded
+
+.align	4
+.Lbase2_26_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ load hash value
+
+	veor		d10,d10,d10
+	veor		d12,d12,d12
+	veor		d14,d14,d14
+	veor		d16,d16,d16
+	veor		d18,d18,d18
+	vld4.32		{d10[0],d12[0],d14[0],d16[0]},[r0]!
+	adr		r5,.Lzeros
+	vld1.32		{d18[0]},[r0]
+	sub		r0,r0,#16		@ rewind
+
+.Lhash_loaded:
+	add		r4,r1,#32
+	mov		r3,r3,lsl#24
+	tst		r2,#31
+	beq		.Leven
+
+	vld4.32		{d20[0],d22[0],d24[0],d26[0]},[r1]!
+	vmov.32		d28[0],r3
+	sub		r2,r2,#16
+	add		r4,r1,#32
+
+# ifdef	__ARMEB__
+	vrev32.8	q10,q10
+	vrev32.8	q13,q13
+	vrev32.8	q11,q11
+	vrev32.8	q12,q12
+# endif
+	vsri.u32	d28,d26,#8	@ base 2^32 -> base 2^26
+	vshl.u32	d26,d26,#18
+
+	vsri.u32	d26,d24,#14
+	vshl.u32	d24,d24,#12
+	vadd.i32	d29,d28,d18	@ add hash value and move to #hi
+
+	vbic.i32	d26,#0xfc000000
+	vsri.u32	d24,d22,#20
+	vshl.u32	d22,d22,#6
+
+	vbic.i32	d24,#0xfc000000
+	vsri.u32	d22,d20,#26
+	vadd.i32	d27,d26,d16
+
+	vbic.i32	d20,#0xfc000000
+	vbic.i32	d22,#0xfc000000
+	vadd.i32	d25,d24,d14
+
+	vadd.i32	d21,d20,d10
+	vadd.i32	d23,d22,d12
+
+	mov		r7,r5
+	add		r6,r0,#48
+
+	cmp		r2,r2
+	b		.Long_tail
+
+.align	4
+.Leven:
+	subs		r2,r2,#64
+	it		lo
+	movlo		r4,r5
+
+	vmov.i32	q14,#1<<24		@ padbit, yes, always
+	vld4.32		{d20,d22,d24,d26},[r1]	@ inp[0:1]
+	add		r1,r1,#64
+	vld4.32		{d21,d23,d25,d27},[r4]	@ inp[2:3] (or 0)
+	add		r4,r4,#64
+	itt		hi
+	addhi		r7,r0,#(48+1*9*4)
+	addhi		r6,r0,#(48+3*9*4)
+
+# ifdef	__ARMEB__
+	vrev32.8	q10,q10
+	vrev32.8	q13,q13
+	vrev32.8	q11,q11
+	vrev32.8	q12,q12
+# endif
+	vsri.u32	q14,q13,#8		@ base 2^32 -> base 2^26
+	vshl.u32	q13,q13,#18
+
+	vsri.u32	q13,q12,#14
+	vshl.u32	q12,q12,#12
+
+	vbic.i32	q13,#0xfc000000
+	vsri.u32	q12,q11,#20
+	vshl.u32	q11,q11,#6
+
+	vbic.i32	q12,#0xfc000000
+	vsri.u32	q11,q10,#26
+
+	vbic.i32	q10,#0xfc000000
+	vbic.i32	q11,#0xfc000000
+
+	bls		.Lskip_loop
+
+	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^2
+	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^4
+	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	b		.Loop_neon
+
+.align	5
+.Loop_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+	@   ___________________/
+	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+	@   ___________________/ ____________________/
+	@
+	@ Note that we start with inp[2:3]*r^2. This is because it
+	@ doesn't depend on reduction in previous iteration.
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ inp[2:3]*r^2
+
+	vadd.i32	d24,d24,d14	@ accumulate inp[0:1]
+	vmull.u32	q7,d25,d0[1]
+	vadd.i32	d20,d20,d10
+	vmull.u32	q5,d21,d0[1]
+	vadd.i32	d26,d26,d16
+	vmull.u32	q8,d27,d0[1]
+	vmlal.u32	q7,d23,d1[1]
+	vadd.i32	d22,d22,d12
+	vmull.u32	q6,d23,d0[1]
+
+	vadd.i32	d28,d28,d18
+	vmull.u32	q9,d29,d0[1]
+	subs		r2,r2,#64
+	vmlal.u32	q5,d29,d2[1]
+	it		lo
+	movlo		r4,r5
+	vmlal.u32	q8,d25,d1[1]
+	vld1.32		d8[1],[r7,:32]
+	vmlal.u32	q6,d21,d1[1]
+	vmlal.u32	q9,d27,d1[1]
+
+	vmlal.u32	q5,d27,d4[1]
+	vmlal.u32	q8,d23,d3[1]
+	vmlal.u32	q9,d25,d3[1]
+	vmlal.u32	q6,d29,d4[1]
+	vmlal.u32	q7,d21,d3[1]
+
+	vmlal.u32	q8,d21,d5[1]
+	vmlal.u32	q5,d25,d6[1]
+	vmlal.u32	q9,d23,d5[1]
+	vmlal.u32	q6,d27,d6[1]
+	vmlal.u32	q7,d29,d6[1]
+
+	vmlal.u32	q8,d29,d8[1]
+	vmlal.u32	q5,d23,d8[1]
+	vmlal.u32	q9,d21,d7[1]
+	vmlal.u32	q6,d25,d8[1]
+	vmlal.u32	q7,d27,d8[1]
+
+	vld4.32		{d21,d23,d25,d27},[r4]	@ inp[2:3] (or 0)
+	add		r4,r4,#64
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ (hash+inp[0:1])*r^4 and accumulate
+
+	vmlal.u32	q8,d26,d0[0]
+	vmlal.u32	q5,d20,d0[0]
+	vmlal.u32	q9,d28,d0[0]
+	vmlal.u32	q6,d22,d0[0]
+	vmlal.u32	q7,d24,d0[0]
+	vld1.32		d8[0],[r6,:32]
+
+	vmlal.u32	q8,d24,d1[0]
+	vmlal.u32	q5,d28,d2[0]
+	vmlal.u32	q9,d26,d1[0]
+	vmlal.u32	q6,d20,d1[0]
+	vmlal.u32	q7,d22,d1[0]
+
+	vmlal.u32	q8,d22,d3[0]
+	vmlal.u32	q5,d26,d4[0]
+	vmlal.u32	q9,d24,d3[0]
+	vmlal.u32	q6,d28,d4[0]
+	vmlal.u32	q7,d20,d3[0]
+
+	vmlal.u32	q8,d20,d5[0]
+	vmlal.u32	q5,d24,d6[0]
+	vmlal.u32	q9,d22,d5[0]
+	vmlal.u32	q6,d26,d6[0]
+	vmlal.u32	q8,d28,d8[0]
+
+	vmlal.u32	q7,d28,d6[0]
+	vmlal.u32	q5,d22,d8[0]
+	vmlal.u32	q9,d20,d7[0]
+	vmov.i32	q14,#1<<24		@ padbit, yes, always
+	vmlal.u32	q6,d24,d8[0]
+	vmlal.u32	q7,d26,d8[0]
+
+	vld4.32		{d20,d22,d24,d26},[r1]	@ inp[0:1]
+	add		r1,r1,#64
+# ifdef	__ARMEB__
+	vrev32.8	q10,q10
+	vrev32.8	q11,q11
+	vrev32.8	q12,q12
+	vrev32.8	q13,q13
+# endif
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction interleaved with base 2^32 -> base 2^26 of
+	@ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14.
+
+	vshr.u64	q15,q8,#26
+	vmovn.i64	d16,q8
+	 vshr.u64	q4,q5,#26
+	 vmovn.i64	d10,q5
+	vadd.i64	q9,q9,q15		@ h3 -> h4
+	vbic.i32	d16,#0xfc000000
+	  vsri.u32	q14,q13,#8		@ base 2^32 -> base 2^26
+	 vadd.i64	q6,q6,q4		@ h0 -> h1
+	  vshl.u32	q13,q13,#18
+	 vbic.i32	d10,#0xfc000000
+
+	vshrn.u64	d30,q9,#26
+	vmovn.i64	d18,q9
+	 vshr.u64	q4,q6,#26
+	 vmovn.i64	d12,q6
+	 vadd.i64	q7,q7,q4		@ h1 -> h2
+	  vsri.u32	q13,q12,#14
+	vbic.i32	d18,#0xfc000000
+	  vshl.u32	q12,q12,#12
+	 vbic.i32	d12,#0xfc000000
+
+	vadd.i32	d10,d10,d30
+	vshl.u32	d30,d30,#2
+	  vbic.i32	q13,#0xfc000000
+	 vshrn.u64	d8,q7,#26
+	 vmovn.i64	d14,q7
+	vaddl.u32	q5,d10,d30	@ h4 -> h0 [widen for a sec]
+	  vsri.u32	q12,q11,#20
+	 vadd.i32	d16,d16,d8	@ h2 -> h3
+	  vshl.u32	q11,q11,#6
+	 vbic.i32	d14,#0xfc000000
+	  vbic.i32	q12,#0xfc000000
+
+	vshrn.u64	d30,q5,#26		@ re-narrow
+	vmovn.i64	d10,q5
+	  vsri.u32	q11,q10,#26
+	  vbic.i32	q10,#0xfc000000
+	 vshr.u32	d8,d16,#26
+	 vbic.i32	d16,#0xfc000000
+	vbic.i32	d10,#0xfc000000
+	vadd.i32	d12,d12,d30	@ h0 -> h1
+	 vadd.i32	d18,d18,d8	@ h3 -> h4
+	  vbic.i32	q11,#0xfc000000
+
+	bhi		.Loop_neon
+
+.Lskip_loop:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+	add		r7,r0,#(48+0*9*4)
+	add		r6,r0,#(48+1*9*4)
+	adds		r2,r2,#32
+	it		ne
+	movne		r2,#0
+	bne		.Long_tail
+
+	vadd.i32	d25,d24,d14	@ add hash value and move to #hi
+	vadd.i32	d21,d20,d10
+	vadd.i32	d27,d26,d16
+	vadd.i32	d23,d22,d12
+	vadd.i32	d29,d28,d18
+
+.Long_tail:
+	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^1
+	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^2
+
+	vadd.i32	d24,d24,d14	@ can be redundant
+	vmull.u32	q7,d25,d0
+	vadd.i32	d20,d20,d10
+	vmull.u32	q5,d21,d0
+	vadd.i32	d26,d26,d16
+	vmull.u32	q8,d27,d0
+	vadd.i32	d22,d22,d12
+	vmull.u32	q6,d23,d0
+	vadd.i32	d28,d28,d18
+	vmull.u32	q9,d29,d0
+
+	vmlal.u32	q5,d29,d2
+	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vmlal.u32	q8,d25,d1
+	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vmlal.u32	q6,d21,d1
+	vmlal.u32	q9,d27,d1
+	vmlal.u32	q7,d23,d1
+
+	vmlal.u32	q8,d23,d3
+	vld1.32		d8[1],[r7,:32]
+	vmlal.u32	q5,d27,d4
+	vld1.32		d8[0],[r6,:32]
+	vmlal.u32	q9,d25,d3
+	vmlal.u32	q6,d29,d4
+	vmlal.u32	q7,d21,d3
+
+	vmlal.u32	q8,d21,d5
+	 it		ne
+	 addne		r7,r0,#(48+2*9*4)
+	vmlal.u32	q5,d25,d6
+	 it		ne
+	 addne		r6,r0,#(48+3*9*4)
+	vmlal.u32	q9,d23,d5
+	vmlal.u32	q6,d27,d6
+	vmlal.u32	q7,d29,d6
+
+	vmlal.u32	q8,d29,d8
+	 vorn		q0,q0,q0	@ all-ones, can be redundant
+	vmlal.u32	q5,d23,d8
+	 vshr.u64	q0,q0,#38
+	vmlal.u32	q9,d21,d7
+	vmlal.u32	q6,d25,d8
+	vmlal.u32	q7,d27,d8
+
+	beq		.Lshort_tail
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ (hash+inp[0:1])*r^4:r^3 and accumulate
+
+	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^3
+	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^4
+
+	vmlal.u32	q7,d24,d0
+	vmlal.u32	q5,d20,d0
+	vmlal.u32	q8,d26,d0
+	vmlal.u32	q6,d22,d0
+	vmlal.u32	q9,d28,d0
+
+	vmlal.u32	q5,d28,d2
+	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vmlal.u32	q8,d24,d1
+	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vmlal.u32	q6,d20,d1
+	vmlal.u32	q9,d26,d1
+	vmlal.u32	q7,d22,d1
+
+	vmlal.u32	q8,d22,d3
+	vld1.32		d8[1],[r7,:32]
+	vmlal.u32	q5,d26,d4
+	vld1.32		d8[0],[r6,:32]
+	vmlal.u32	q9,d24,d3
+	vmlal.u32	q6,d28,d4
+	vmlal.u32	q7,d20,d3
+
+	vmlal.u32	q8,d20,d5
+	vmlal.u32	q5,d24,d6
+	vmlal.u32	q9,d22,d5
+	vmlal.u32	q6,d26,d6
+	vmlal.u32	q7,d28,d6
+
+	vmlal.u32	q8,d28,d8
+	 vorn		q0,q0,q0	@ all-ones
+	vmlal.u32	q5,d22,d8
+	 vshr.u64	q0,q0,#38
+	vmlal.u32	q9,d20,d7
+	vmlal.u32	q6,d24,d8
+	vmlal.u32	q7,d26,d8
+
+.Lshort_tail:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ horizontal addition
+
+	vadd.i64	d16,d16,d17
+	vadd.i64	d10,d10,d11
+	vadd.i64	d18,d18,d19
+	vadd.i64	d12,d12,d13
+	vadd.i64	d14,d14,d15
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction, but without narrowing
+
+	vshr.u64	q15,q8,#26
+	vand.i64	q8,q8,q0
+	 vshr.u64	q4,q5,#26
+	 vand.i64	q5,q5,q0
+	vadd.i64	q9,q9,q15		@ h3 -> h4
+	 vadd.i64	q6,q6,q4		@ h0 -> h1
+
+	vshr.u64	q15,q9,#26
+	vand.i64	q9,q9,q0
+	 vshr.u64	q4,q6,#26
+	 vand.i64	q6,q6,q0
+	 vadd.i64	q7,q7,q4		@ h1 -> h2
+
+	vadd.i64	q5,q5,q15
+	vshl.u64	q15,q15,#2
+	 vshr.u64	q4,q7,#26
+	 vand.i64	q7,q7,q0
+	vadd.i64	q5,q5,q15		@ h4 -> h0
+	 vadd.i64	q8,q8,q4		@ h2 -> h3
+
+	vshr.u64	q15,q5,#26
+	vand.i64	q5,q5,q0
+	 vshr.u64	q4,q8,#26
+	 vand.i64	q8,q8,q0
+	vadd.i64	q6,q6,q15		@ h0 -> h1
+	 vadd.i64	q9,q9,q4		@ h3 -> h4
+
+	cmp		r2,#0
+	bne		.Leven
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ store hash value
+
+	vst4.32		{d10[0],d12[0],d14[0],d16[0]},[r0]!
+	vst1.32		{d18[0]},[r0]
+
+	vldmia	sp!,{d8-d15}			@ epilogue
+	ldmia	sp!,{r4-r7}
+	bx	lr					@ bx	lr
+.size	poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align	5
+.Lzeros:
+.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#ifndef	__KERNEL__
+.LOPENSSL_armcap:
+# ifdef	_WIN32
+.word	OPENSSL_armcap_P
+# else
+.word	OPENSSL_armcap_P-.Lpoly1305_init
+# endif
+.comm	OPENSSL_armcap_P,4,4
+.hidden	OPENSSL_armcap_P
+#endif
+#endif
+.asciz	"Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm"
+.align	2
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..ceec04ec2f40
--- /dev/null
+++ b/arch/arm/crypto/poly1305-glue.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+
+void poly1305_init_arm(void *state, const u8 *key);
+void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
+void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
+
+void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
+{
+}
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+	poly1305_init_arm(&dctx->h, key);
+	dctx->s[0] = get_unaligned_le32(key + 16);
+	dctx->s[1] = get_unaligned_le32(key + 20);
+	dctx->s[2] = get_unaligned_le32(key + 24);
+	dctx->s[3] = get_unaligned_le32(key + 28);
+	dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int arm_poly1305_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	dctx->buflen = 0;
+	dctx->rset = 0;
+	dctx->sset = false;
+
+	return 0;
+}
+
+static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+				 u32 len, u32 hibit, bool do_neon)
+{
+	if (unlikely(!dctx->sset)) {
+		if (!dctx->rset) {
+			poly1305_init_arm(&dctx->h, src);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->rset = 1;
+		}
+		if (len >= POLY1305_BLOCK_SIZE) {
+			dctx->s[0] = get_unaligned_le32(src +  0);
+			dctx->s[1] = get_unaligned_le32(src +  4);
+			dctx->s[2] = get_unaligned_le32(src +  8);
+			dctx->s[3] = get_unaligned_le32(src + 12);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->sset = true;
+		}
+		if (len < POLY1305_BLOCK_SIZE)
+			return;
+	}
+
+	len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+	if (static_branch_likely(&have_neon) && likely(do_neon))
+		poly1305_blocks_neon(&dctx->h, src, len, hibit);
+	else
+		poly1305_blocks_arm(&dctx->h, src, len, hibit);
+}
+
+static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
+				    const u8 *src, u32 len, bool do_neon)
+{
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		len -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			arm_poly1305_blocks(dctx, dctx->buf,
+					    POLY1305_BLOCK_SIZE, 1, false);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(len >= POLY1305_BLOCK_SIZE)) {
+		arm_poly1305_blocks(dctx, src, len, 1, do_neon);
+		src += round_down(len, POLY1305_BLOCK_SIZE);
+		len %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(len)) {
+		dctx->buflen = len;
+		memcpy(dctx->buf, src, len);
+	}
+}
+
+static int arm_poly1305_update(struct shash_desc *desc,
+			       const u8 *src, unsigned int srclen)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	arm_poly1305_do_update(dctx, src, srclen, false);
+	return 0;
+}
+
+static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
+						   const u8 *src,
+						   unsigned int srclen)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	bool do_neon = crypto_simd_usable() && srclen > 128;
+
+	if (static_branch_likely(&have_neon) && do_neon)
+		kernel_neon_begin();
+	arm_poly1305_do_update(dctx, src, srclen, do_neon);
+	if (static_branch_likely(&have_neon) && do_neon)
+		kernel_neon_end();
+	return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+			  unsigned int nbytes)
+{
+	bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+		       crypto_simd_usable();
+
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		nbytes -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			poly1305_blocks_arm(&dctx->h, dctx->buf,
+					    POLY1305_BLOCK_SIZE, 1);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+		if (static_branch_likely(&have_neon) && do_neon) {
+			kernel_neon_begin();
+			poly1305_blocks_neon(&dctx->h, src, len, 1);
+			kernel_neon_end();
+		} else {
+			poly1305_blocks_arm(&dctx->h, src, len, 1);
+		}
+		src += len;
+		nbytes %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(nbytes)) {
+		dctx->buflen = nbytes;
+		memcpy(dctx->buf, src, nbytes);
+	}
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+	if (unlikely(dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+	}
+
+	poly1305_emit_arm(&dctx->h, dst, dctx->s);
+	*dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
+
+	poly1305_final_arch(dctx, dst);
+	return 0;
+}
+
+static struct shash_alg arm_poly1305_algs[] = {{
+	.init			= arm_poly1305_init,
+	.update			= arm_poly1305_update,
+	.final			= arm_poly1305_final,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.descsize		= sizeof(struct poly1305_desc_ctx),
+
+	.base.cra_name		= "poly1305",
+	.base.cra_driver_name	= "poly1305-arm",
+	.base.cra_priority	= 150,
+	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+#ifdef CONFIG_KERNEL_MODE_NEON
+}, {
+	.init			= arm_poly1305_init,
+	.update			= arm_poly1305_update_neon,
+	.final			= arm_poly1305_final,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.descsize		= sizeof(struct poly1305_desc_ctx),
+
+	.base.cra_name		= "poly1305",
+	.base.cra_driver_name	= "poly1305-neon",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+#endif
+}};
+
+static int __init arm_poly1305_mod_init(void)
+{
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+	    (elf_hwcap & HWCAP_NEON))
+		static_branch_enable(&have_neon);
+	else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
+		/* register only the first entry */
+		return crypto_register_shash(&arm_poly1305_algs[0]);
+
+	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+		crypto_register_shashes(arm_poly1305_algs,
+					ARRAY_SIZE(arm_poly1305_algs)) : 0;
+}
+
+static void __exit arm_poly1305_mod_exit(void)
+{
+	if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
+		return;
+	if (!static_branch_likely(&have_neon)) {
+		crypto_unregister_shash(&arm_poly1305_algs[0]);
+		return;
+	}
+	crypto_unregister_shashes(arm_poly1305_algs,
+				  ARRAY_SIZE(arm_poly1305_algs));
+}
+
+module_init(arm_poly1305_mod_init);
+module_exit(arm_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-arm");
+MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S
index 49a74a441aec..8a702e051738 100644
--- a/arch/arm/crypto/sha1-ce-core.S
+++ b/arch/arm/crypto/sha1-ce-core.S
@@ -10,6 +10,7 @@
 #include <asm/assembler.h>
 
 	.text
+	.arch		armv8-a
 	.fpu		crypto-neon-fp-armv8
 
 	k0		.req	q0
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S
index 4ad517577e23..b6369d2440a1 100644
--- a/arch/arm/crypto/sha2-ce-core.S
+++ b/arch/arm/crypto/sha2-ce-core.S
@@ -10,6 +10,7 @@
 #include <asm/assembler.h>
 
 	.text
+	.arch		armv8-a
 	.fpu		crypto-neon-fp-armv8
 
 	k0		.req	q7
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 68ca86f85eb7..fa579b23b4df 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -12,7 +12,6 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mm-arch-hooks.h
 generic-y += mmiowb.h
-generic-y += msi.h
 generic-y += parport.h
 generic-y += preempt.h
 generic-y += seccomp.h
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 0555f14cc8be..b5752f0e8936 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -10,6 +10,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <asm/barrier.h>
 #include <asm/cacheflush.h>
 #include <asm/cp15.h>
@@ -327,13 +328,14 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr)
 /*
  * GITS_VPROPBASER - hi and lo bits may be accessed independently.
  */
+#define gits_read_vpropbaser(c)		__gic_readq_nonatomic(c)
 #define gits_write_vpropbaser(v, c)	__gic_writeq_nonatomic(v, c)
 
 /*
  * GITS_VPENDBASER - the Valid bit must be cleared before changing
  * anything else.
  */
-static inline void gits_write_vpendbaser(u64 val, void * __iomem addr)
+static inline void gits_write_vpendbaser(u64 val, void __iomem *addr)
 {
 	u32 tmp;
 
diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h
index b67e5fc1fe43..7c3001a6a775 100644
--- a/arch/arm/include/asm/dma-direct.h
+++ b/arch/arm/include/asm/dma-direct.h
@@ -14,23 +14,4 @@ static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
 	return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset;
 }
 
-static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
-	u64 limit, mask;
-
-	if (!dev->dma_mask)
-		return 0;
-
-	mask = *dev->dma_mask;
-
-	limit = (mask + 1) & ~mask;
-	if (limit && size > limit)
-		return 0;
-
-	if ((addr | (addr + size - 1)) & ~mask)
-		return 0;
-
-	return 1;
-}
-
 #endif /* ASM_ARM_DMA_DIRECT_H */
diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index 7667826b93f1..5ac46e2860bc 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -50,19 +50,16 @@ void efi_virtmap_unload(void);
 
 /* arch specific definitions used by the stub code */
 
-#define efi_call_early(f, ...)		sys_table_arg->boottime->f(__VA_ARGS__)
-#define __efi_call_early(f, ...)	f(__VA_ARGS__)
-#define efi_call_runtime(f, ...)	sys_table_arg->runtime->f(__VA_ARGS__)
-#define efi_is_64bit()			(false)
+#define efi_bs_call(func, ...)	efi_system_table()->boottime->func(__VA_ARGS__)
+#define efi_rt_call(func, ...)	efi_system_table()->runtime->func(__VA_ARGS__)
+#define efi_is_native()		(true)
 
-#define efi_table_attr(table, attr, instance)				\
-	((table##_t *)instance)->attr
+#define efi_table_attr(inst, attr)	(inst->attr)
 
-#define efi_call_proto(protocol, f, instance, ...)			\
-	((protocol##_t *)instance)->f(instance, ##__VA_ARGS__)
+#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__)
 
-struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg);
-void free_screen_info(efi_system_table_t *sys_table, struct screen_info *si);
+struct screen_info *alloc_screen_info(void);
+void free_screen_info(struct screen_info *si);
 
 static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
 {
diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 18b0197f2384..48ec1d0337da 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -11,7 +11,6 @@
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
 
 #ifndef __ASSEMBLY__
-extern void mcount(void);
 extern void __gnu_mcount_nc(void);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -23,9 +22,6 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 	/* With Thumb-2, the recorded addresses have the lsb set */
 	return addr & ~1;
 }
-
-extern void ftrace_caller_old(void);
-extern void ftrace_call_old(void);
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 32edfadb1593..a6d4ee86ba54 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -118,6 +118,8 @@
 #define L310_AUX_CTRL_STORE_LIMITATION		BIT(11)	/* R2P0+ */
 #define L310_AUX_CTRL_EXCLUSIVE_CACHE		BIT(12)
 #define L310_AUX_CTRL_ASSOCIATIVITY_16		BIT(16)
+#define L310_AUX_CTRL_FWA_SHIFT			23
+#define L310_AUX_CTRL_FWA_MASK			(3 << 23)
 #define L310_AUX_CTRL_CACHE_REPLACE_RR		BIT(25)	/* R2P0+ */
 #define L310_AUX_CTRL_NS_LOCKDOWN		BIT(26)
 #define L310_AUX_CTRL_NS_INT_CTRL		BIT(27)
diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h
index ac54c06764e6..62358d3ca0a8 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -53,6 +53,9 @@ static inline void decode_ctrl_reg(u32 reg,
 #define ARM_DEBUG_ARCH_V7_MM	4
 #define ARM_DEBUG_ARCH_V7_1	5
 #define ARM_DEBUG_ARCH_V8	6
+#define ARM_DEBUG_ARCH_V8_1	7
+#define ARM_DEBUG_ARCH_V8_2	8
+#define ARM_DEBUG_ARCH_V8_4	9
 
 /* Breakpoint */
 #define ARM_BREAKPOINT_EXECUTE	0
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 7a0596fcb2e7..ab2b654084fa 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -356,7 +356,6 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from,
  *
  * Function		Memory type	Cacheability	Cache hint
  * ioremap()		Device		n/a		n/a
- * ioremap_nocache()	Device		n/a		n/a
  * ioremap_cache()	Normal		Writeback	Read allocate
  * ioremap_wc()		Normal		Non-cacheable	n/a
  * ioremap_wt()		Normal		Non-cacheable	n/a
@@ -368,13 +367,6 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from,
  * - unaligned accesses are "unpredictable"
  * - writes may be delayed before they hit the endpoint device
  *
- * ioremap_nocache() is the same as ioremap() as there are too many device
- * drivers using this for device registers, and documentation which tells
- * people to use it for such for this to be any different.  This is not a
- * safe fallback for memory-like mappings, or memory regions where the
- * compiler may generate unaligned accesses - eg, via inlining its own
- * memcpy.
- *
  * All normal memory mappings have the following properties:
  * - reads can be repeated with no side effects
  * - repeated reads return the last value written
@@ -392,7 +384,6 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from,
  */
 void __iomem *ioremap(resource_size_t res_cookie, size_t size);
 #define ioremap ioremap
-#define ioremap_nocache ioremap
 
 /*
  * Do not use ioremap_cache for mapping memory. Use memremap instead.
@@ -400,12 +391,6 @@ void __iomem *ioremap(resource_size_t res_cookie, size_t size);
 void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size);
 #define ioremap_cache ioremap_cache
 
-/*
- * Do not use ioremap_cached in new code. Provided for the benefit of
- * the pxa2xx-flash MTD driver only.
- */
-void __iomem *ioremap_cached(resource_size_t res_cookie, size_t size);
-
 void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size);
 #define ioremap_wc ioremap_wc
 #define ioremap_wt ioremap_wc
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 0125aa059d5b..9c04bd810d07 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -162,6 +162,7 @@
 #define HSR_ISV		(_AC(1, UL) << HSR_ISV_SHIFT)
 #define HSR_SRT_SHIFT	(16)
 #define HSR_SRT_MASK	(0xf << HSR_SRT_SHIFT)
+#define HSR_CM		(1 << 8)
 #define HSR_FSC		(0x3f)
 #define HSR_FSC_TYPE	(0x3c)
 #define HSR_SSE		(1 << 21)
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 40002416efec..9b118516d2db 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -95,12 +95,12 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
 	return (unsigned long *)&vcpu->arch.hcr;
 }
 
-static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hcr &= ~HCR_TWE;
 }
 
-static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hcr |= HCR_TWE;
 }
@@ -167,6 +167,11 @@ static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & HSR_ISV;
 }
 
+static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & (HSR_CM | HSR_WNR | HSR_FSC);
+}
+
 static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & HSR_WNR;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8a37c8e89777..556cd818eccf 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -7,6 +7,7 @@
 #ifndef __ARM_KVM_HOST_H__
 #define __ARM_KVM_HOST_H__
 
+#include <linux/arm-smccc.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kvm_types.h>
@@ -38,6 +39,7 @@
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
 #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
 
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
@@ -76,6 +78,14 @@ struct kvm_arch {
 
 	/* Mandated version of PSCI */
 	u32 psci_version;
+
+	/*
+	 * If we encounter a data abort without valid instruction syndrome
+	 * information, report this to user space.  User space can (and
+	 * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
+	 * supported.
+	 */
+	bool return_nisv_io_abort_to_user;
 };
 
 #define KVM_NR_MEM_OBJS     40
@@ -323,6 +333,29 @@ static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+static inline long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
+{
+	return SMCCC_RET_NOT_SUPPORTED;
+}
+
+static inline gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
+{
+	return GPA_INVALID;
+}
+
+static inline void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
+{
+}
+
+static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
+{
+	return false;
+}
+
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index 0abd389cf0ec..68e6f25784a4 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -27,5 +27,7 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 	return channel ? 15 : 14;
 }
 
+extern void pcibios_report_status(unsigned int status_mask, int warn);
+
 #endif /* __KERNEL__ */
 #endif
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 3ae120cd1715..eabcb48a7840 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -12,7 +12,7 @@
 
 #ifndef CONFIG_MMU
 
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
 #include <asm/pgtable-nommu.h>
 
 #else
diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
index d3e937dcee4d..007d8fea7157 100644
--- a/arch/arm/include/asm/switch_to.h
+++ b/arch/arm/include/asm/switch_to.h
@@ -10,7 +10,7 @@
  * to ensure that the maintenance completes in case we migrate to another
  * CPU.
  */
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7)
+#if defined(CONFIG_PREEMPTION) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7)
 #define __complete_pending_tlbi()	dsb(ish)
 #else
 #define __complete_pending_tlbi()
diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..fe6e1f65932d
--- /dev/null
+++ b/arch/arm/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/barrier.h>
+#include <asm/cp15.h>
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#define VDSO_HAS_CLOCK_GETRES		1
+
+extern struct vdso_data *__get_datapage(void);
+
+static __always_inline int gettimeofday_fallback(
+				struct __kernel_old_timeval *_tv,
+				struct timezone *_tz)
+{
+	register struct timezone *tz asm("r1") = _tz;
+	register struct __kernel_old_timeval *tv asm("r0") = _tv;
+	register long ret asm ("r0");
+	register long nr asm("r7") = __NR_gettimeofday;
+
+	asm volatile(
+	"	swi #0\n"
+	: "=r" (ret)
+	: "r" (tv), "r" (tz), "r" (nr)
+	: "memory");
+
+	return ret;
+}
+
+static __always_inline long clock_gettime_fallback(
+					clockid_t _clkid,
+					struct __kernel_timespec *_ts)
+{
+	register struct __kernel_timespec *ts asm("r1") = _ts;
+	register clockid_t clkid asm("r0") = _clkid;
+	register long ret asm ("r0");
+	register long nr asm("r7") = __NR_clock_gettime64;
+
+	asm volatile(
+	"	swi #0\n"
+	: "=r" (ret)
+	: "r" (clkid), "r" (ts), "r" (nr)
+	: "memory");
+
+	return ret;
+}
+
+static __always_inline long clock_gettime32_fallback(
+					clockid_t _clkid,
+					struct old_timespec32 *_ts)
+{
+	register struct old_timespec32 *ts asm("r1") = _ts;
+	register clockid_t clkid asm("r0") = _clkid;
+	register long ret asm ("r0");
+	register long nr asm("r7") = __NR_clock_gettime;
+
+	asm volatile(
+	"	swi #0\n"
+	: "=r" (ret)
+	: "r" (clkid), "r" (ts), "r" (nr)
+	: "memory");
+
+	return ret;
+}
+
+static __always_inline int clock_getres_fallback(
+					clockid_t _clkid,
+					struct __kernel_timespec *_ts)
+{
+	register struct __kernel_timespec *ts asm("r1") = _ts;
+	register clockid_t clkid asm("r0") = _clkid;
+	register long ret asm ("r0");
+	register long nr asm("r7") = __NR_clock_getres_time64;
+
+	asm volatile(
+	"       swi #0\n"
+	: "=r" (ret)
+	: "r" (clkid), "r" (ts), "r" (nr)
+	: "memory");
+
+	return ret;
+}
+
+static __always_inline int clock_getres32_fallback(
+					clockid_t _clkid,
+					struct old_timespec32 *_ts)
+{
+	register struct old_timespec32 *ts asm("r1") = _ts;
+	register clockid_t clkid asm("r0") = _clkid;
+	register long ret asm ("r0");
+	register long nr asm("r7") = __NR_clock_getres;
+
+	asm volatile(
+	"       swi #0\n"
+	: "=r" (ret)
+	: "r" (clkid), "r" (ts), "r" (nr)
+	: "memory");
+
+	return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(int clock_mode)
+{
+#ifdef CONFIG_ARM_ARCH_TIMER
+	u64 cycle_now;
+
+	if (!clock_mode)
+		return -EINVAL;
+
+	isb();
+	cycle_now = read_sysreg(CNTVCT);
+
+	return cycle_now;
+#else
+	return -EINVAL; /* use fallback */
+#endif
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+	return __get_datapage();
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm/include/asm/vdso/vsyscall.h b/arch/arm/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..cff87d8d30da
--- /dev/null
+++ b/arch/arm/include/asm/vdso/vsyscall.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <asm/cacheflush.h>
+
+extern struct vdso_data *vdso_data;
+extern bool cntvct_ok;
+
+static __always_inline
+bool tk_is_cntvct(const struct timekeeper *tk)
+{
+	if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
+		return false;
+
+	if (!tk->tkr_mono.clock->archdata.vdso_direct)
+		return false;
+
+	return true;
+}
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__arm_get_k_vdso_data(void)
+{
+	return vdso_data;
+}
+#define __arch_get_k_vdso_data __arm_get_k_vdso_data
+
+static __always_inline
+bool __arm_update_vdso_data(void)
+{
+	return cntvct_ok;
+}
+#define __arch_update_vdso_data __arm_update_vdso_data
+
+static __always_inline
+int __arm_get_clock_mode(struct timekeeper *tk)
+{
+	u32 __tk_is_cntvct = tk_is_cntvct(tk);
+
+	return __tk_is_cntvct;
+}
+#define __arch_get_clock_mode __arm_get_clock_mode
+
+static __always_inline
+int __arm_use_vsyscall(struct vdso_data *vdata)
+{
+	return vdata[CS_HRES_COARSE].clock_mode;
+}
+#define __arch_use_vsyscall __arm_use_vsyscall
+
+static __always_inline
+void __arm_sync_vdso_data(struct vdso_data *vdata)
+{
+	flush_dcache_page(virt_to_page(vdata));
+}
+#define __arch_sync_vdso_data __arm_sync_vdso_data
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/arm/include/asm/vdso_datapage.h b/arch/arm/include/asm/vdso_datapage.h
index 7910abf89b1c..bef68f59928d 100644
--- a/arch/arm/include/asm/vdso_datapage.h
+++ b/arch/arm/include/asm/vdso_datapage.h
@@ -11,35 +11,12 @@
 
 #ifndef __ASSEMBLY__
 
+#include <vdso/datapage.h>
 #include <asm/page.h>
 
-/* Try to be cache-friendly on systems that don't implement the
- * generic timer: fit the unconditionally updated fields in the first
- * 32 bytes.
- */
-struct vdso_data {
-	u32 seq_count;		/* sequence count - odd during updates */
-	u16 tk_is_cntvct;	/* fall back to syscall if false */
-	u16 cs_shift;		/* clocksource shift */
-	u32 xtime_coarse_sec;	/* coarse time */
-	u32 xtime_coarse_nsec;
-
-	u32 wtm_clock_sec;	/* wall to monotonic offset */
-	u32 wtm_clock_nsec;
-	u32 xtime_clock_sec;	/* CLOCK_REALTIME - seconds */
-	u32 cs_mult;		/* clocksource multiplier */
-
-	u64 cs_cycle_last;	/* last cycle value */
-	u64 cs_mask;		/* clocksource mask */
-
-	u64 xtime_clock_snsec;	/* CLOCK_REALTIME sub-ns base */
-	u32 tz_minuteswest;	/* timezone info for gettimeofday(2) */
-	u32 tz_dsttime;
-};
-
 union vdso_data_store {
-	struct vdso_data data;
-	u8 page[PAGE_SIZE];
+	struct vdso_data	data[CS_BASES];
+	u8			page[PAGE_SIZE];
 };
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/arm/include/asm/vmalloc.h b/arch/arm/include/asm/vmalloc.h
new file mode 100644
index 000000000000..a9b3718b8600
--- /dev/null
+++ b/arch/arm/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_ARM_VMALLOC_H
+#define _ASM_ARM_VMALLOC_H
+
+#endif /* _ASM_ARM_VMALLOC_H */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 2769360f195c..03cd7c19a683 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -131,8 +131,9 @@ struct kvm_vcpu_events {
 	struct {
 		__u8 serror_pending;
 		__u8 serror_has_esr;
+		__u8 ext_dabt_pending;
 		/* Align it to 8 bytes */
-		__u8 pad[6];
+		__u8 pad[5];
 		__u64 serror_esr;
 	} exception;
 	__u32 reserved[12];
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 8cad59465af3..89e5d864e923 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -17,10 +17,14 @@ CFLAGS_REMOVE_return_address.o = -pg
 # Object file lists.
 
 obj-y		:= elf.o entry-common.o irq.o opcodes.o \
-		   process.o ptrace.o reboot.o return_address.o \
+		   process.o ptrace.o reboot.o \
 		   setup.o signal.o sigreturn_codes.o \
 		   stacktrace.o sys_arm.o time.o traps.o
 
+ifneq ($(CONFIG_ARM_UNWIND),y)
+obj-$(CONFIG_FRAME_POINTER)	+= return_address.o
+endif
+
 obj-$(CONFIG_ATAGS)		+= atags_parse.o
 obj-$(CONFIG_ATAGS_PROC)	+= atags_proc.o
 obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += atags_compat.o
@@ -49,8 +53,8 @@ obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
 obj-$(CONFIG_HAVE_ARM_TWD)	+= smp_twd.o
 obj-$(CONFIG_ARM_ARCH_TIMER)	+= arch_timer.o
 obj-$(CONFIG_FUNCTION_TRACER)	+= entry-ftrace.o
-obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o insn.o
-obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o insn.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o insn.o patch.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o insn.o patch.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o insn.o patch.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 # Main staffs in KPROBES are in arch/arm/probes/ .
diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c
index c125582de2e7..b5e217907686 100644
--- a/arch/arm/kernel/arch_timer.c
+++ b/arch/arm/kernel/arch_timer.c
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 
 #include <asm/delay.h>
+#include <asm/arch_timer.h>
 
 #include <clocksource/arm_arch_timer.h>
 
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 858d4e541532..77f54830554c 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -211,7 +211,7 @@ __irq_svc:
 	svc_entry
 	irq_handler
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
 	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
 	teq	r8, #0				@ if preempt count != 0
@@ -226,7 +226,7 @@ ENDPROC(__irq_svc)
 
 	.ltorg
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 svc_preempt:
 	mov	r8, lr
 1:	bl	preempt_schedule_irq		@ irq en/disable is done inside
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index bda949fd84e8..2a5ff69c28e6 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -22,6 +22,7 @@
 #include <asm/ftrace.h>
 #include <asm/insn.h>
 #include <asm/set_memory.h>
+#include <asm/patch.h>
 
 #ifdef CONFIG_THUMB2_KERNEL
 #define	NOP		0xf85deb04	/* pop.w {lr} */
@@ -35,9 +36,7 @@ static int __ftrace_modify_code(void *data)
 {
 	int *command = data;
 
-	set_kernel_text_rw();
 	ftrace_modify_all_code(*command);
-	set_kernel_text_ro();
 
 	return 0;
 }
@@ -59,13 +58,11 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
 
 int ftrace_arch_code_modify_prepare(void)
 {
-	set_all_modules_text_rw();
 	return 0;
 }
 
 int ftrace_arch_code_modify_post_process(void)
 {
-	set_all_modules_text_ro();
 	/* Make sure any TLB misses during machine stop are cleared. */
 	flush_tlb_all();
 	return 0;
@@ -97,10 +94,7 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old,
 			return -EINVAL;
 	}
 
-	if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE))
-		return -EPERM;
-
-	flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+	__patch_text((void *)pc, new);
 
 	return 0;
 }
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index b0c195e3a06d..02ca7adf5375 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -246,6 +246,9 @@ static int enable_monitor_mode(void)
 	case ARM_DEBUG_ARCH_V7_ECP14:
 	case ARM_DEBUG_ARCH_V7_1:
 	case ARM_DEBUG_ARCH_V8:
+	case ARM_DEBUG_ARCH_V8_1:
+	case ARM_DEBUG_ARCH_V8_2:
+	case ARM_DEBUG_ARCH_V8_4:
 		ARM_DBG_WRITE(c0, c2, 2, (dscr | ARM_DSCR_MDBGEN));
 		isb();
 		break;
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index ae5020302de4..6607fa817bba 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -146,10 +146,9 @@ ARM_BE8(orr	r7, r7, #(1 << 25))     @ HSCTLR.EE
 #if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER)
 	@ make CNTP_* and CNTPCT accessible from PL1
 	mrc	p15, 0, r7, c0, c1, 1	@ ID_PFR1
-	lsr	r7, #16
-	and	r7, #0xf
-	cmp	r7, #1
-	bne	1f
+	ubfx	r7, r7, #16, #4
+	teq	r7, #0
+	beq	1f
 	mrc	p15, 4, r7, c14, c1, 0	@ CNTHCTL
 	orr	r7, r7, #3		@ PL1PCEN | PL1PCTEN
 	mcr	p15, 4, r7, c14, c1, 0	@ CNTHCTL
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index b647741c0ab0..6e626abaefc5 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sort.h>
+#include <linux/moduleloader.h>
 
 #include <asm/cache.h>
 #include <asm/opcodes.h>
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 9485acc520a4..46e478fb5ea2 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -36,6 +36,8 @@
 #include <asm/tls.h>
 #include <asm/vdso.h>
 
+#include "signal.h"
+
 #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
 #include <linux/stackprotector.h>
 unsigned long __stack_chk_guard __read_mostly;
@@ -224,8 +226,8 @@ void release_thread(struct task_struct *dead_task)
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
 int
-copy_thread(unsigned long clone_flags, unsigned long stack_start,
-	    unsigned long stk_sz, struct task_struct *p)
+copy_thread_tls(unsigned long clone_flags, unsigned long stack_start,
+	    unsigned long stk_sz, struct task_struct *p, unsigned long tls)
 {
 	struct thread_info *thread = task_thread_info(p);
 	struct pt_regs *childregs = task_pt_regs(p);
@@ -259,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value[0] = childregs->ARM_r3;
+		thread->tp_value[0] = tls;
 	thread->tp_value[1] = get_tpuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
index aba6b2ab7a58..d4392e177484 100644
--- a/arch/arm/kernel/psci_smp.c
+++ b/arch/arm/kernel/psci_smp.c
@@ -51,7 +51,7 @@ static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-int psci_cpu_disable(unsigned int cpu)
+static int psci_cpu_disable(unsigned int cpu)
 {
 	/* Fail early if we don't have CPU_OFF support */
 	if (!psci_ops.cpu_off)
@@ -64,7 +64,7 @@ int psci_cpu_disable(unsigned int cpu)
 	return 0;
 }
 
-void psci_cpu_die(unsigned int cpu)
+static void psci_cpu_die(unsigned int cpu)
 {
 	u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN <<
 		    PSCI_0_2_POWER_STATE_TYPE_SHIFT;
@@ -76,7 +76,7 @@ void psci_cpu_die(unsigned int cpu)
 	panic("psci: cpu %d failed to shutdown\n", cpu);
 }
 
-int psci_cpu_kill(unsigned int cpu)
+static int psci_cpu_kill(unsigned int cpu)
 {
 	int err, i;
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 324352787aea..b606cded90cd 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -923,7 +923,7 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 
 	/* Do seccomp after ptrace; syscall may have changed. */
 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
-	if (secure_computing(NULL) == -1)
+	if (secure_computing() == -1)
 		return -1;
 #else
 	/* XXX: remove this once OABI gets fixed */
diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c
index b0d2f1fe891d..7b42ac010fdf 100644
--- a/arch/arm/kernel/return_address.c
+++ b/arch/arm/kernel/return_address.c
@@ -7,8 +7,6 @@
  */
 #include <linux/export.h>
 #include <linux/ftrace.h>
-
-#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND)
 #include <linux/sched.h>
 
 #include <asm/stacktrace.h>
@@ -53,6 +51,4 @@ void *return_address(unsigned int level)
 		return NULL;
 }
 
-#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */
-
 EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index d0a464e317ea..d8e18cdd96d3 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1164,8 +1164,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
 	conswitchp = &vga_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
 #endif
 #endif
 
diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h
index b7b838b05229..cb076d30ab38 100644
--- a/arch/arm/kernel/signal.h
+++ b/arch/arm/kernel/signal.h
@@ -9,3 +9,5 @@ struct rt_sigframe {
 	struct siginfo info;
 	struct sigframe sig;
 };
+
+extern struct page *get_signal_page(void);
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 4b0bab2607e4..46e1be9e57a8 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -240,6 +240,10 @@ int __cpu_disable(void)
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_GENERIC_ARCH_TOPOLOGY
+	remove_cpu_topology(cpu);
+#endif
+
 	/*
 	 * Take this CPU offline.  Once we clear this, we can't return,
 	 * and we must not schedule until we're ready to give up the cpu.
diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
index 9d9b1db73932..d3a85f01b328 100644
--- a/arch/arm/kernel/tcm.c
+++ b/arch/arm/kernel/tcm.c
@@ -18,6 +18,7 @@
 #include <asm/memory.h>
 #include <asm/system_info.h>
 #include <asm/traps.h>
+#include <asm/tcm.h>
 
 #define TCMTR_FORMAT_MASK	0xe0000000U
 
@@ -30,8 +31,8 @@ extern char __itcm_start, __sitcm_text, __eitcm_text;
 extern char __dtcm_start, __sdtcm_data, __edtcm_data;
 
 /* These will be increased as we run */
-u32 dtcm_end = DTCM_OFFSET;
-u32 itcm_end = ITCM_OFFSET;
+static u32 dtcm_end = DTCM_OFFSET;
+static u32 itcm_end = ITCM_OFFSET;
 
 /*
  * TCM memory resources
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index b996b2cf0703..dddc7ebf4db4 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -9,6 +9,7 @@
  *  reading the RTC at bootup, etc...
  */
 #include <linux/clk-provider.h>
+#include <linux/clockchips.h>
 #include <linux/clocksource.h>
 #include <linux/errno.h>
 #include <linux/export.h>
@@ -107,5 +108,6 @@ void __init time_init(void)
 		of_clk_init(NULL);
 #endif
 		timer_probe();
+		tick_setup_hrtimer_broadcast();
 	}
 }
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 5b9faba03afb..b5adaf744630 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -95,7 +95,7 @@ static void __init parse_dt_topology(void)
 				 GFP_NOWAIT);
 
 	for_each_possible_cpu(cpu) {
-		const u32 *rate;
+		const __be32 *rate;
 		int len;
 
 		/* too early to use cpu->of_node */
@@ -196,9 +196,8 @@ void store_cpu_topology(unsigned int cpuid)
 	struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
 	unsigned int mpidr;
 
-	/* If the cpu topology has been already set, just return */
-	if (cpuid_topo->core_id != -1)
-		return;
+	if (cpuid_topo->package_id != -1)
+		goto topology_populated;
 
 	mpidr = read_cpuid_mpidr();
 
@@ -231,14 +230,15 @@ void store_cpu_topology(unsigned int cpuid)
 		cpuid_topo->package_id = -1;
 	}
 
-	update_siblings_masks(cpuid);
-
 	update_cpu_capacity(cpuid);
 
 	pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
 		cpu_topology[cpuid].core_id,
 		cpu_topology[cpuid].package_id, mpidr);
+
+topology_populated:
+	update_siblings_masks(cpuid);
 }
 
 static inline int cpu_corepower_flags(void)
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index c053abd1fb53..abb7dd7e656f 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -248,6 +248,8 @@ void show_stack(struct task_struct *tsk, unsigned long *sp)
 
 #ifdef CONFIG_PREEMPT
 #define S_PREEMPT " PREEMPT"
+#elif defined(CONFIG_PREEMPT_RT)
+#define S_PREEMPT " PREEMPT_RT"
 #else
 #define S_PREEMPT ""
 #endif
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index 9bf16c93ee6a..c89ac1b9d28b 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -23,6 +23,8 @@
 #include <asm/vdso.h>
 #include <asm/vdso_datapage.h>
 #include <clocksource/arm_arch_timer.h>
+#include <vdso/helpers.h>
+#include <vdso/vsyscall.h>
 
 #define MAX_SYMNAME	64
 
@@ -37,7 +39,7 @@ unsigned int vdso_total_pages __ro_after_init;
  * The VDSO data page.
  */
 static union vdso_data_store vdso_data_store __page_aligned_data;
-static struct vdso_data *vdso_data = &vdso_data_store.data;
+struct vdso_data *vdso_data = vdso_data_store.data;
 
 static struct page *vdso_data_page __ro_after_init;
 static const struct vm_special_mapping vdso_data_mapping = {
@@ -77,7 +79,7 @@ struct elfinfo {
 /* Cached result of boot-time check for whether the arch timer exists,
  * and if so, whether the virtual counter is useable.
  */
-static bool cntvct_ok __ro_after_init;
+bool cntvct_ok __ro_after_init;
 
 static bool __init cntvct_functional(void)
 {
@@ -262,84 +264,3 @@ void arm_install_vdso(struct mm_struct *mm, unsigned long addr)
 		mm->context.vdso = addr;
 }
 
-static void vdso_write_begin(struct vdso_data *vdata)
-{
-	++vdso_data->seq_count;
-	smp_wmb(); /* Pairs with smp_rmb in vdso_read_retry */
-}
-
-static void vdso_write_end(struct vdso_data *vdata)
-{
-	smp_wmb(); /* Pairs with smp_rmb in vdso_read_begin */
-	++vdso_data->seq_count;
-}
-
-static bool tk_is_cntvct(const struct timekeeper *tk)
-{
-	if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
-		return false;
-
-	if (!tk->tkr_mono.clock->archdata.vdso_direct)
-		return false;
-
-	return true;
-}
-
-/**
- * update_vsyscall - update the vdso data page
- *
- * Increment the sequence counter, making it odd, indicating to
- * userspace that an update is in progress.  Update the fields used
- * for coarse clocks and, if the architected system timer is in use,
- * the fields used for high precision clocks.  Increment the sequence
- * counter again, making it even, indicating to userspace that the
- * update is finished.
- *
- * Userspace is expected to sample seq_count before reading any other
- * fields from the data page.  If seq_count is odd, userspace is
- * expected to wait until it becomes even.  After copying data from
- * the page, userspace must sample seq_count again; if it has changed
- * from its previous value, userspace must retry the whole sequence.
- *
- * Calls to update_vsyscall are serialized by the timekeeping core.
- */
-void update_vsyscall(struct timekeeper *tk)
-{
-	struct timespec64 *wtm = &tk->wall_to_monotonic;
-
-	if (!cntvct_ok) {
-		/* The entry points have been zeroed, so there is no
-		 * point in updating the data page.
-		 */
-		return;
-	}
-
-	vdso_write_begin(vdso_data);
-
-	vdso_data->tk_is_cntvct			= tk_is_cntvct(tk);
-	vdso_data->xtime_coarse_sec		= tk->xtime_sec;
-	vdso_data->xtime_coarse_nsec		= (u32)(tk->tkr_mono.xtime_nsec >>
-							tk->tkr_mono.shift);
-	vdso_data->wtm_clock_sec		= wtm->tv_sec;
-	vdso_data->wtm_clock_nsec		= wtm->tv_nsec;
-
-	if (vdso_data->tk_is_cntvct) {
-		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
-		vdso_data->xtime_clock_sec	= tk->xtime_sec;
-		vdso_data->xtime_clock_snsec	= tk->tkr_mono.xtime_nsec;
-		vdso_data->cs_mult		= tk->tkr_mono.mult;
-		vdso_data->cs_shift		= tk->tkr_mono.shift;
-		vdso_data->cs_mask		= tk->tkr_mono.mask;
-	}
-
-	vdso_write_end(vdso_data);
-
-	flush_dcache_page(virt_to_page(vdso_data));
-}
-
-void update_vsyscall_tz(void)
-{
-	vdso_data->tz_minuteswest	= sys_tz.tz_minuteswest;
-	vdso_data->tz_dsttime		= sys_tz.tz_dsttime;
-	flush_dcache_page(virt_to_page(vdso_data));
-}
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 8c74037ade22..21b8b271c80d 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -70,8 +70,6 @@ SECTIONS
 	ARM_UNWIND_SECTIONS
 #endif
 
-	NOTES
-
 	_etext = .;			/* End of text and rodata section */
 
 	ARM_VECTORS
@@ -114,7 +112,7 @@ SECTIONS
 
 	. = ALIGN(THREAD_SIZE);
 	_sdata = .;
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 	.data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) {
 		*(.data..ro_after_init)
 	}
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 23150c0f0f4d..319ccb10846a 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -81,8 +81,6 @@ SECTIONS
 	ARM_UNWIND_SECTIONS
 #endif
 
-	NOTES
-
 #ifdef CONFIG_STRICT_KERNEL_RWX
 	. = ALIGN(1<<SECTION_SHIFT);
 #else
@@ -143,7 +141,7 @@ SECTIONS
 	__init_end = .;
 
 	_sdata = .;
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 	_edata = .;
 
 	BSS_SECTION(0, 0, 0)
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index b76b75bd9e00..e442d82821df 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -24,7 +24,7 @@ obj-y += kvm-arm.o init.o interrupts.o
 obj-y += handle_exit.o guest.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o   vgic-v3-coproc.o
 obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
-obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
+obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o
 obj-y += $(KVM)/arm/aarch32.o
 
 obj-y += $(KVM)/arm/vgic/vgic.o
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 684cf64b4033..0e6f23504c26 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -21,6 +21,10 @@
 #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
+	VCPU_STAT(halt_successful_poll),
+	VCPU_STAT(halt_attempted_poll),
+	VCPU_STAT(halt_poll_invalid),
+	VCPU_STAT(halt_wakeup),
 	VCPU_STAT(hvc_exit_stat),
 	VCPU_STAT(wfe_exit_stat),
 	VCPU_STAT(wfi_exit_stat),
@@ -255,6 +259,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
 {
 	events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA);
 
+	/*
+	 * We never return a pending ext_dabt here because we deliver it to
+	 * the virtual CPU directly when setting the event and it's no longer
+	 * 'pending' at this point.
+	 */
+
 	return 0;
 }
 
@@ -263,12 +273,16 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
 {
 	bool serror_pending = events->exception.serror_pending;
 	bool has_esr = events->exception.serror_has_esr;
+	bool ext_dabt_pending = events->exception.ext_dabt_pending;
 
 	if (serror_pending && has_esr)
 		return -EINVAL;
 	else if (serror_pending)
 		kvm_inject_vabt(vcpu);
 
+	if (ext_dabt_pending)
+		kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+
 	return 0;
 }
 
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 2a6a1394d26e..e58a89d2f13f 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -9,7 +9,7 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_mmu.h>
-#include <kvm/arm_psci.h>
+#include <kvm/arm_hypercalls.h>
 #include <trace/events/kvm.h>
 
 #include "trace.h"
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index 5e5f1fabc3d4..e4e25f287ad7 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -161,6 +161,8 @@ config ARCH_BCM2835
 	select GPIOLIB
 	select ARM_AMBA
 	select ARM_ERRATA_411920 if ARCH_MULTI_V6
+	select ARM_GIC if ARCH_MULTI_V7
+	select ZONE_DMA if ARCH_MULTI_V7
 	select ARM_TIMER_SP804
 	select HAVE_ARM_ARCH_TIMER if ARCH_MULTI_V7
 	select TIMER_OF
@@ -169,7 +171,7 @@ config ARCH_BCM2835
 	select PINCTRL_BCM2835
 	select MFD_CORE
 	help
-	  This enables support for the Broadcom BCM2835 and BCM2836 SoCs.
+	  This enables support for the Broadcom BCM2711 and BCM283x SoCs.
 	  This SoC is used in the Raspberry Pi and Roku 2 devices.
 
 config ARCH_BCM_53573
diff --git a/arch/arm/mach-bcm/Makefile b/arch/arm/mach-bcm/Makefile
index b59c813b1af4..7baa8c9427d5 100644
--- a/arch/arm/mach-bcm/Makefile
+++ b/arch/arm/mach-bcm/Makefile
@@ -42,8 +42,9 @@ obj-$(CONFIG_ARCH_BCM_MOBILE_L2_CACHE) += kona_l2_cache.o
 obj-$(CONFIG_ARCH_BCM_MOBILE_SMC) += bcm_kona_smc.o
 
 # BCM2835
-obj-$(CONFIG_ARCH_BCM2835)	+= board_bcm2835.o
 ifeq ($(CONFIG_ARCH_BCM2835),y)
+obj-y				+= board_bcm2835.o
+obj-y				+= bcm2711.o
 ifeq ($(CONFIG_ARM),y)
 obj-$(CONFIG_SMP)		+= platsmp.o
 endif
diff --git a/arch/arm/mach-bcm/bcm2711.c b/arch/arm/mach-bcm/bcm2711.c
new file mode 100644
index 000000000000..fa0300d8c79d
--- /dev/null
+++ b/arch/arm/mach-bcm/bcm2711.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Stefan Wahren
+ */
+
+#include <linux/of_address.h>
+
+#include <asm/mach/arch.h>
+
+#include "platsmp.h"
+
+static const char * const bcm2711_compat[] = {
+#ifdef CONFIG_ARCH_MULTI_V7
+	"brcm,bcm2711",
+#endif
+	NULL
+};
+
+DT_MACHINE_START(BCM2711, "BCM2711")
+#ifdef CONFIG_ZONE_DMA
+	.dma_zone_size	= SZ_1G,
+#endif
+	.dt_compat = bcm2711_compat,
+	.smp = smp_ops(bcm2836_smp_ops),
+MACHINE_END
diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c
index 541e850a736c..43a16f922b53 100644
--- a/arch/arm/mach-bcm/bcm_kona_smc.c
+++ b/arch/arm/mach-bcm/bcm_kona_smc.c
@@ -140,7 +140,7 @@ static int bcm_kona_do_smc(u32 service_id, u32 buffer_phys)
 static void __bcm_kona_smc(void *info)
 {
 	struct bcm_kona_smc_data *data = info;
-	u32 *args = bcm_smc_buffer;
+	u32 __iomem *args = bcm_smc_buffer;
 
 	BUG_ON(smp_processor_id() != 0);
 	BUG_ON(!args);
diff --git a/arch/arm/mach-bcm/platsmp.c b/arch/arm/mach-bcm/platsmp.c
index 47f8053d0240..c9db2a9006d9 100644
--- a/arch/arm/mach-bcm/platsmp.c
+++ b/arch/arm/mach-bcm/platsmp.c
@@ -22,6 +22,8 @@
 #include <asm/smp_plat.h>
 #include <asm/smp_scu.h>
 
+#include "platsmp.h"
+
 /* Size of mapped Cortex A9 SCU address space */
 #define CORTEX_A9_SCU_SIZE	0x58
 
@@ -103,7 +105,7 @@ static int nsp_write_lut(unsigned int cpu)
 	if (!secondary_boot_addr)
 		return -EINVAL;
 
-	sku_rom_lut = ioremap_nocache((phys_addr_t)secondary_boot_addr,
+	sku_rom_lut = ioremap((phys_addr_t)secondary_boot_addr,
 				      sizeof(phys_addr_t));
 	if (!sku_rom_lut) {
 		pr_warn("unable to ioremap SKU-ROM LUT register for cpu %u\n", cpu);
@@ -172,7 +174,7 @@ static int kona_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	if (!secondary_boot_addr)
 		return -EINVAL;
 
-	boot_reg = ioremap_nocache((phys_addr_t)secondary_boot_addr,
+	boot_reg = ioremap((phys_addr_t)secondary_boot_addr,
 				   sizeof(phys_addr_t));
 	if (!boot_reg) {
 		pr_err("unable to map boot register for cpu %u\n", cpu_id);
diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index dd427bd2768c..02b180ad7245 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -9,6 +9,7 @@ menuconfig ARCH_DAVINCI
 	select PM_GENERIC_DOMAINS if PM
 	select PM_GENERIC_DOMAINS_OF if PM && OF
 	select REGMAP_MMIO
+	select RESET_CONTROLLER
 	select HAVE_IDE
 	select PINCTRL_SINGLE
 
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index 9d87d4e440ea..040c949414fa 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -823,6 +823,17 @@ static int davinci_phy_fixup(struct phy_device *phydev)
 
 #define HAS_NAND	IS_ENABLED(CONFIG_MTD_NAND_DAVINCI)
 
+#define GPIO_nVBUS_DRV		160
+
+static struct gpiod_lookup_table dm644evm_usb_gpio_table = {
+	.dev_id = "musb-davinci",
+	.table = {
+		GPIO_LOOKUP("davinci_gpio", GPIO_nVBUS_DRV, NULL,
+			    GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
+
 static __init void davinci_evm_init(void)
 {
 	int ret;
@@ -875,6 +886,7 @@ static __init void davinci_evm_init(void)
 	dm644x_init_asp();
 
 	/* irlml6401 switches over 1A, in under 8 msec */
+	gpiod_add_lookup_table(&dm644evm_usb_gpio_table);
 	davinci_setup_usb(1000, 8);
 
 	if (IS_BUILTIN(CONFIG_PHYLIB)) {
diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index 3e447d468845..e650131ee88f 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -34,7 +34,7 @@ void __iomem  *davinci_sysmod_base;
 
 void davinci_map_sysmod(void)
 {
-	davinci_sysmod_base = ioremap_nocache(DAVINCI_SYSTEM_MODULE_BASE,
+	davinci_sysmod_base = ioremap(DAVINCI_SYSTEM_MODULE_BASE,
 					      0x800);
 	/*
 	 * Throw a bug since a lot of board initialization code depends
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 9dab1f50a02f..6e7f10c8098a 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -12,7 +12,9 @@ menuconfig ARCH_EXYNOS
 	select ARCH_SUPPORTS_BIG_ENDIAN
 	select ARM_AMBA
 	select ARM_GIC
+	select EXYNOS_IRQ_COMBINER
 	select COMMON_CLK_SAMSUNG
+	select EXYNOS_ASV
 	select EXYNOS_CHIPID
 	select EXYNOS_THERMAL
 	select EXYNOS_PMU
diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index 8b81a17f675d..416462e3f5d6 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -31,7 +31,6 @@
 				  PCI_STATUS_PARITY) << 16)
 
 extern int setup_arm_irq(int, struct irqaction *);
-extern void pcibios_report_status(u_int status_mask, int warn);
 
 static unsigned long
 dc21285_base_address(struct pci_bus *bus, unsigned int devfn)
diff --git a/arch/arm/mach-hisi/Kconfig b/arch/arm/mach-hisi/Kconfig
index 98338a489921..3b010fe7c0e9 100644
--- a/arch/arm/mach-hisi/Kconfig
+++ b/arch/arm/mach-hisi/Kconfig
@@ -15,7 +15,6 @@ menu "Hisilicon platform type"
 
 config ARCH_HI3xxx
 	bool "Hisilicon Hi36xx family"
-	depends on ARCH_MULTI_V7
 	select CACHE_L2X0
 	select HAVE_ARM_SCU if SMP
 	select HAVE_ARM_TWD if SMP
@@ -25,17 +24,15 @@ config ARCH_HI3xxx
 	  Support for Hisilicon Hi36xx SoC family
 
 config ARCH_HIP01
-       bool "Hisilicon HIP01 family"
-       depends on ARCH_MULTI_V7
-       select HAVE_ARM_SCU if SMP
-       select HAVE_ARM_TWD if SMP
-       select ARM_GLOBAL_TIMER
-       help
-         Support for Hisilicon HIP01 SoC family
+	bool "Hisilicon HIP01 family"
+	select HAVE_ARM_SCU if SMP
+	select HAVE_ARM_TWD if SMP
+	select ARM_GLOBAL_TIMER
+	help
+	  Support for Hisilicon HIP01 SoC family
 
 config ARCH_HIP04
 	bool "Hisilicon HiP04 Cortex A15 family"
-	depends on ARCH_MULTI_V7
 	select ARM_ERRATA_798181 if SMP
 	select HAVE_ARM_ARCH_TIMER
 	select MCPM if SMP
@@ -46,7 +43,6 @@ config ARCH_HIP04
 
 config ARCH_HIX5HD2
 	bool "Hisilicon X5HD2 family"
-	depends on ARCH_MULTI_V7
 	select CACHE_L2X0
 	select HAVE_ARM_SCU if SMP
 	select HAVE_ARM_TWD if SMP
diff --git a/arch/arm/mach-imx/anatop.c b/arch/arm/mach-imx/anatop.c
index 777d8c255501..8fb68c0ec34c 100644
--- a/arch/arm/mach-imx/anatop.c
+++ b/arch/arm/mach-imx/anatop.c
@@ -19,8 +19,6 @@
 #define ANADIG_REG_2P5		0x130
 #define ANADIG_REG_CORE		0x140
 #define ANADIG_ANA_MISC0	0x150
-#define ANADIG_USB1_CHRG_DETECT	0x1b0
-#define ANADIG_USB2_CHRG_DETECT	0x210
 #define ANADIG_DIGPROG		0x260
 #define ANADIG_DIGPROG_IMX6SL	0x280
 #define ANADIG_DIGPROG_IMX7D	0x800
@@ -33,8 +31,6 @@
 #define BM_ANADIG_ANA_MISC0_STOP_MODE_CONFIG	0x1000
 /* Below MISC0_DISCON_HIGH_SNVS is only for i.MX6SL */
 #define BM_ANADIG_ANA_MISC0_DISCON_HIGH_SNVS	0x2000
-#define BM_ANADIG_USB_CHRG_DETECT_CHK_CHRG_B	0x80000
-#define BM_ANADIG_USB_CHRG_DETECT_EN_B		0x100000
 
 static struct regmap *anatop;
 
@@ -96,16 +92,6 @@ void imx_anatop_post_resume(void)
 
 }
 
-static void imx_anatop_usb_chrg_detect_disable(void)
-{
-	regmap_write(anatop, ANADIG_USB1_CHRG_DETECT,
-		BM_ANADIG_USB_CHRG_DETECT_EN_B
-		| BM_ANADIG_USB_CHRG_DETECT_CHK_CHRG_B);
-	regmap_write(anatop, ANADIG_USB2_CHRG_DETECT,
-		BM_ANADIG_USB_CHRG_DETECT_EN_B |
-		BM_ANADIG_USB_CHRG_DETECT_CHK_CHRG_B);
-}
-
 void __init imx_init_revision_from_anatop(void)
 {
 	struct device_node *np;
@@ -171,10 +157,6 @@ void __init imx_init_revision_from_anatop(void)
 void __init imx_anatop_init(void)
 {
 	anatop = syscon_regmap_lookup_by_compatible("fsl,imx6q-anatop");
-	if (IS_ERR(anatop)) {
+	if (IS_ERR(anatop))
 		pr_err("%s: failed to find imx6q-anatop regmap!\n", __func__);
-		return;
-	}
-
-	imx_anatop_usb_chrg_detect_disable();
 }
diff --git a/arch/arm/mach-imx/cpu.c b/arch/arm/mach-imx/cpu.c
index 0b137eeffb61..871f98342d50 100644
--- a/arch/arm/mach-imx/cpu.c
+++ b/arch/arm/mach-imx/cpu.c
@@ -1,15 +1,20 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/err.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/sys_soc.h>
 
 #include "hardware.h"
 #include "common.h"
 
+#define OCOTP_UID_H	0x420
+#define OCOTP_UID_L	0x410
+
 unsigned int __mxc_cpu_type;
 static unsigned int imx_soc_revision;
 
@@ -76,9 +81,13 @@ void __init imx_aips_allow_unprivileged_access(
 struct device * __init imx_soc_device_init(void)
 {
 	struct soc_device_attribute *soc_dev_attr;
+	const char *ocotp_compat = NULL;
 	struct soc_device *soc_dev;
 	struct device_node *root;
+	struct regmap *ocotp = NULL;
 	const char *soc_id;
+	u64 soc_uid = 0;
+	u32 val;
 	int ret;
 
 	soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL);
@@ -119,30 +128,39 @@ struct device * __init imx_soc_device_init(void)
 		soc_id = "i.MX53";
 		break;
 	case MXC_CPU_IMX6SL:
+		ocotp_compat = "fsl,imx6sl-ocotp";
 		soc_id = "i.MX6SL";
 		break;
 	case MXC_CPU_IMX6DL:
+		ocotp_compat = "fsl,imx6q-ocotp";
 		soc_id = "i.MX6DL";
 		break;
 	case MXC_CPU_IMX6SX:
+		ocotp_compat = "fsl,imx6sx-ocotp";
 		soc_id = "i.MX6SX";
 		break;
 	case MXC_CPU_IMX6Q:
+		ocotp_compat = "fsl,imx6q-ocotp";
 		soc_id = "i.MX6Q";
 		break;
 	case MXC_CPU_IMX6UL:
+		ocotp_compat = "fsl,imx6ul-ocotp";
 		soc_id = "i.MX6UL";
 		break;
 	case MXC_CPU_IMX6ULL:
+		ocotp_compat = "fsl,imx6ull-ocotp";
 		soc_id = "i.MX6ULL";
 		break;
 	case MXC_CPU_IMX6ULZ:
+		ocotp_compat = "fsl,imx6ull-ocotp";
 		soc_id = "i.MX6ULZ";
 		break;
 	case MXC_CPU_IMX6SLL:
+		ocotp_compat = "fsl,imx6sll-ocotp";
 		soc_id = "i.MX6SLL";
 		break;
 	case MXC_CPU_IMX7D:
+		ocotp_compat = "fsl,imx7d-ocotp";
 		soc_id = "i.MX7D";
 		break;
 	case MXC_CPU_IMX7ULP:
@@ -153,18 +171,38 @@ struct device * __init imx_soc_device_init(void)
 	}
 	soc_dev_attr->soc_id = soc_id;
 
+	if (ocotp_compat) {
+		ocotp = syscon_regmap_lookup_by_compatible(ocotp_compat);
+		if (IS_ERR(ocotp))
+			pr_err("%s: failed to find %s regmap!\n", __func__, ocotp_compat);
+	}
+
+	if (!IS_ERR_OR_NULL(ocotp)) {
+		regmap_read(ocotp, OCOTP_UID_H, &val);
+		soc_uid = val;
+		regmap_read(ocotp, OCOTP_UID_L, &val);
+		soc_uid <<= 32;
+		soc_uid |= val;
+	}
+
 	soc_dev_attr->revision = kasprintf(GFP_KERNEL, "%d.%d",
 					   (imx_soc_revision >> 4) & 0xf,
 					   imx_soc_revision & 0xf);
 	if (!soc_dev_attr->revision)
 		goto free_soc;
 
+	soc_dev_attr->serial_number = kasprintf(GFP_KERNEL, "%016llX", soc_uid);
+	if (!soc_dev_attr->serial_number)
+		goto free_rev;
+
 	soc_dev = soc_device_register(soc_dev_attr);
 	if (IS_ERR(soc_dev))
-		goto free_rev;
+		goto free_serial_number;
 
 	return soc_device_to_device(soc_dev);
 
+free_serial_number:
+	kfree(soc_dev_attr->serial_number);
 free_rev:
 	kfree(soc_dev_attr->revision);
 free_soc:
diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c
index 39a7d9393641..24dd5bbe60e4 100644
--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -62,13 +62,13 @@ static struct cpuidle_driver imx6q_cpuidle_driver = {
  */
 void imx6q_cpuidle_fec_irqs_used(void)
 {
-	imx6q_cpuidle_driver.states[1].disabled = true;
+	cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, true);
 }
 EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_used);
 
 void imx6q_cpuidle_fec_irqs_unused(void)
 {
-	imx6q_cpuidle_driver.states[1].disabled = false;
+	cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, false);
 }
 EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_unused);
 
diff --git a/arch/arm/mach-imx/hotplug.c b/arch/arm/mach-imx/hotplug.c
index 089d11ffaa3e..82e22398d43d 100644
--- a/arch/arm/mach-imx/hotplug.c
+++ b/arch/arm/mach-imx/hotplug.c
@@ -6,32 +6,12 @@
 
 #include <linux/errno.h>
 #include <linux/jiffies.h>
+#include <asm/cacheflush.h>
 #include <asm/cp15.h>
 #include <asm/proc-fns.h>
 
 #include "common.h"
 
-static inline void cpu_enter_lowpower(void)
-{
-	unsigned int v;
-
-	asm volatile(
-		"mcr	p15, 0, %1, c7, c5, 0\n"
-	"	mcr	p15, 0, %1, c7, c10, 4\n"
-	/*
-	 * Turn off coherency
-	 */
-	"	mrc	p15, 0, %0, c1, c0, 1\n"
-	"	bic	%0, %0, %3\n"
-	"	mcr	p15, 0, %0, c1, c0, 1\n"
-	"	mrc	p15, 0, %0, c1, c0, 0\n"
-	"	bic	%0, %0, %2\n"
-	"	mcr	p15, 0, %0, c1, c0, 0\n"
-	  : "=&r" (v)
-	  : "r" (0), "Ir" (CR_C), "Ir" (0x40)
-	  : "cc");
-}
-
 /*
  * platform-specific code to shutdown a CPU
  *
@@ -39,7 +19,7 @@ static inline void cpu_enter_lowpower(void)
  */
 void imx_cpu_die(unsigned int cpu)
 {
-	cpu_enter_lowpower();
+	v7_exit_coherency_flush(louis);
 	/*
 	 * We use the cpu jumping argument register to sync with
 	 * imx_cpu_kill() which is running on cpu0 and waiting for
diff --git a/arch/arm/mach-ixp4xx/fsg-setup.c b/arch/arm/mach-ixp4xx/fsg-setup.c
index 648932d8d7a8..507ee3878769 100644
--- a/arch/arm/mach-ixp4xx/fsg-setup.c
+++ b/arch/arm/mach-ixp4xx/fsg-setup.c
@@ -132,6 +132,22 @@ static struct platform_device fsg_leds = {
 };
 
 /* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource fsg_eth_npeb_resources[] = {
+	{
+		.start		= IXP4XX_EthB_BASE_PHYS,
+		.end		= IXP4XX_EthB_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
+static struct resource fsg_eth_npec_resources[] = {
+	{
+		.start		= IXP4XX_EthC_BASE_PHYS,
+		.end		= IXP4XX_EthC_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
 static struct eth_plat_info fsg_plat_eth[] = {
 	{
 		.phy		= 5,
@@ -151,12 +167,16 @@ static struct platform_device fsg_eth[] = {
 		.dev = {
 			.platform_data	= fsg_plat_eth,
 		},
+		.num_resources	= ARRAY_SIZE(fsg_eth_npeb_resources),
+		.resource	= fsg_eth_npeb_resources,
 	}, {
 		.name			= "ixp4xx_eth",
 		.id			= IXP4XX_ETH_NPEC,
 		.dev = {
 			.platform_data	= fsg_plat_eth + 1,
 		},
+		.num_resources	= ARRAY_SIZE(fsg_eth_npec_resources),
+		.resource	= fsg_eth_npec_resources,
 	}
 };
 
diff --git a/arch/arm/mach-ixp4xx/goramo_mlr.c b/arch/arm/mach-ixp4xx/goramo_mlr.c
index a0e0b6b7dc5c..07b50dfcc489 100644
--- a/arch/arm/mach-ixp4xx/goramo_mlr.c
+++ b/arch/arm/mach-ixp4xx/goramo_mlr.c
@@ -11,6 +11,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/platform_data/wan_ixp4xx_hss.h>
 #include <linux/serial_8250.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -272,6 +273,22 @@ static struct platform_device device_uarts = {
 
 
 /* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource eth_npeb_resources[] = {
+	{
+		.start		= IXP4XX_EthB_BASE_PHYS,
+		.end		= IXP4XX_EthB_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
+static struct resource eth_npec_resources[] = {
+	{
+		.start		= IXP4XX_EthC_BASE_PHYS,
+		.end		= IXP4XX_EthC_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
 static struct eth_plat_info eth_plat[] = {
 	{
 		.phy		= 0,
@@ -289,10 +306,14 @@ static struct platform_device device_eth_tab[] = {
 		.name			= "ixp4xx_eth",
 		.id			= IXP4XX_ETH_NPEB,
 		.dev.platform_data	= eth_plat,
+		.num_resources		= ARRAY_SIZE(eth_npeb_resources),
+		.resource		= eth_npeb_resources,
 	}, {
 		.name			= "ixp4xx_eth",
 		.id			= IXP4XX_ETH_NPEC,
 		.dev.platform_data	= eth_plat + 1,
+		.num_resources		= ARRAY_SIZE(eth_npec_resources),
+		.resource		= eth_npec_resources,
 	}
 };
 
@@ -405,6 +426,9 @@ static void __init gmlr_init(void)
 	if (hw_bits & CFG_HW_HAS_HSS1)
 		device_tab[devices++] = &device_hss_tab[1]; /* max index 5 */
 
+	hss_plat[0].timer_freq = ixp4xx_timer_freq;
+	hss_plat[1].timer_freq = ixp4xx_timer_freq;
+
 	gpio_request(GPIO_SCL, "SCL/clock");
 	gpio_request(GPIO_SDA, "SDA/data");
 	gpio_request(GPIO_STR, "strobe");
diff --git a/arch/arm/mach-ixp4xx/include/mach/platform.h b/arch/arm/mach-ixp4xx/include/mach/platform.h
index 342acbe20f7c..6d403fe0bf52 100644
--- a/arch/arm/mach-ixp4xx/include/mach/platform.h
+++ b/arch/arm/mach-ixp4xx/include/mach/platform.h
@@ -15,6 +15,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/reboot.h>
+#include <linux/platform_data/eth_ixp4xx.h>
 
 #include <asm/types.h>
 
@@ -92,27 +93,6 @@ struct ixp4xx_pata_data {
 	void __iomem	*cs1;
 };
 
-#define IXP4XX_ETH_NPEA		0x00
-#define IXP4XX_ETH_NPEB		0x10
-#define IXP4XX_ETH_NPEC		0x20
-
-/* Information about built-in Ethernet MAC interfaces */
-struct eth_plat_info {
-	u8 phy;		/* MII PHY ID, 0 - 31 */
-	u8 rxq;		/* configurable, currently 0 - 31 only */
-	u8 txreadyq;
-	u8 hwaddr[6];
-};
-
-/* Information about built-in HSS (synchronous serial) interfaces */
-struct hss_plat_info {
-	int (*set_clock)(int port, unsigned int clock_type);
-	int (*open)(int port, void *pdev,
-		    void (*set_carrier_cb)(void *pdev, int carrier));
-	void (*close)(int port, void *pdev);
-	u8 txreadyq;
-};
-
 /*
  * Frequency of clock used for primary clocksource
  */
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index 6f0f7ed18ea8..45d5b720ded6 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -187,6 +187,22 @@ static struct platform_device ixdp425_uart = {
 };
 
 /* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource ixp425_npeb_resources[] = {
+	{
+		.start		= IXP4XX_EthB_BASE_PHYS,
+		.end		= IXP4XX_EthB_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
+static struct resource ixp425_npec_resources[] = {
+	{
+		.start		= IXP4XX_EthC_BASE_PHYS,
+		.end		= IXP4XX_EthC_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
 static struct eth_plat_info ixdp425_plat_eth[] = {
 	{
 		.phy		= 0,
@@ -204,10 +220,14 @@ static struct platform_device ixdp425_eth[] = {
 		.name			= "ixp4xx_eth",
 		.id			= IXP4XX_ETH_NPEB,
 		.dev.platform_data	= ixdp425_plat_eth,
+		.num_resources		= ARRAY_SIZE(ixp425_npeb_resources),
+		.resource		= ixp425_npeb_resources,
 	}, {
 		.name			= "ixp4xx_eth",
 		.id			= IXP4XX_ETH_NPEC,
 		.dev.platform_data	= ixdp425_plat_eth + 1,
+		.num_resources		= ARRAY_SIZE(ixp425_npec_resources),
+		.resource		= ixp425_npec_resources,
 	}
 };
 
diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c
index c142cfa8c5d6..6959ad2e3aec 100644
--- a/arch/arm/mach-ixp4xx/nas100d-setup.c
+++ b/arch/arm/mach-ixp4xx/nas100d-setup.c
@@ -165,6 +165,14 @@ static struct platform_device nas100d_uart = {
 };
 
 /* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource nas100d_eth_resources[] = {
+	{
+		.start		= IXP4XX_EthB_BASE_PHYS,
+		.end		= IXP4XX_EthB_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
 static struct eth_plat_info nas100d_plat_eth[] = {
 	{
 		.phy		= 0,
@@ -178,6 +186,8 @@ static struct platform_device nas100d_eth[] = {
 		.name			= "ixp4xx_eth",
 		.id			= IXP4XX_ETH_NPEB,
 		.dev.platform_data	= nas100d_plat_eth,
+		.num_resources		= ARRAY_SIZE(nas100d_eth_resources),
+		.resource		= nas100d_eth_resources,
 	}
 };
 
diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c
index ee1877fcfafe..a428bb918703 100644
--- a/arch/arm/mach-ixp4xx/nslu2-setup.c
+++ b/arch/arm/mach-ixp4xx/nslu2-setup.c
@@ -185,6 +185,14 @@ static struct platform_device nslu2_uart = {
 };
 
 /* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource nslu2_eth_resources[] = {
+	{
+		.start		= IXP4XX_EthB_BASE_PHYS,
+		.end		= IXP4XX_EthB_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
 static struct eth_plat_info nslu2_plat_eth[] = {
 	{
 		.phy		= 1,
@@ -198,6 +206,8 @@ static struct platform_device nslu2_eth[] = {
 		.name			= "ixp4xx_eth",
 		.id			= IXP4XX_ETH_NPEB,
 		.dev.platform_data	= nslu2_plat_eth,
+		.num_resources		= ARRAY_SIZE(nslu2_eth_resources),
+		.resource		= nslu2_eth_resources,
 	}
 };
 
diff --git a/arch/arm/mach-ixp4xx/omixp-setup.c b/arch/arm/mach-ixp4xx/omixp-setup.c
index 6ed5a9aed600..8f2b8c473d7a 100644
--- a/arch/arm/mach-ixp4xx/omixp-setup.c
+++ b/arch/arm/mach-ixp4xx/omixp-setup.c
@@ -170,6 +170,22 @@ static struct platform_device mic256_leds = {
 };
 
 /* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource ixp425_npeb_resources[] = {
+	{
+		.start		= IXP4XX_EthB_BASE_PHYS,
+		.end		= IXP4XX_EthB_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
+static struct resource ixp425_npec_resources[] = {
+	{
+		.start		= IXP4XX_EthC_BASE_PHYS,
+		.end		= IXP4XX_EthC_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
 static struct eth_plat_info ixdp425_plat_eth[] = {
 	{
 		.phy		= 0,
@@ -187,10 +203,14 @@ static struct platform_device ixdp425_eth[] = {
 		.name			= "ixp4xx_eth",
 		.id			= IXP4XX_ETH_NPEB,
 		.dev.platform_data	= ixdp425_plat_eth,
+		.num_resources		= ARRAY_SIZE(ixp425_npeb_resources),
+		.resource		= ixp425_npeb_resources,
 	}, {
 		.name			= "ixp4xx_eth",
 		.id			= IXP4XX_ETH_NPEC,
 		.dev.platform_data	= ixdp425_plat_eth + 1,
+		.num_resources		= ARRAY_SIZE(ixp425_npec_resources),
+		.resource		= ixp425_npec_resources,
 	},
 };
 
diff --git a/arch/arm/mach-ixp4xx/vulcan-setup.c b/arch/arm/mach-ixp4xx/vulcan-setup.c
index d2ebb7c675a8..e506d2af98ad 100644
--- a/arch/arm/mach-ixp4xx/vulcan-setup.c
+++ b/arch/arm/mach-ixp4xx/vulcan-setup.c
@@ -124,6 +124,22 @@ static struct platform_device vulcan_uart = {
 	.num_resources		= ARRAY_SIZE(vulcan_uart_resources),
 };
 
+static struct resource vulcan_npeb_resources[] = {
+	{
+		.start		= IXP4XX_EthB_BASE_PHYS,
+		.end		= IXP4XX_EthB_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
+static struct resource vulcan_npec_resources[] = {
+	{
+		.start		= IXP4XX_EthC_BASE_PHYS,
+		.end		= IXP4XX_EthC_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
 static struct eth_plat_info vulcan_plat_eth[] = {
 	[0] = {
 		.phy		= 0,
@@ -144,6 +160,8 @@ static struct platform_device vulcan_eth[] = {
 		.dev = {
 			.platform_data	= &vulcan_plat_eth[0],
 		},
+		.num_resources		= ARRAY_SIZE(vulcan_npeb_resources),
+		.resource		= vulcan_npeb_resources,
 	},
 	[1] = {
 		.name			= "ixp4xx_eth",
@@ -151,6 +169,8 @@ static struct platform_device vulcan_eth[] = {
 		.dev = {
 			.platform_data	= &vulcan_plat_eth[1],
 		},
+		.num_resources		= ARRAY_SIZE(vulcan_npec_resources),
+		.resource		= vulcan_npec_resources,
 	},
 };
 
diff --git a/arch/arm/mach-mmp/Kconfig b/arch/arm/mach-mmp/Kconfig
index 0440109e973b..b58a03b18bde 100644
--- a/arch/arm/mach-mmp/Kconfig
+++ b/arch/arm/mach-mmp/Kconfig
@@ -1,13 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menuconfig ARCH_MMP
-	bool "Marvell PXA168/910/MMP2"
+	bool "Marvell PXA168/910/MMP2/MMP3"
 	depends on ARCH_MULTI_V5 || ARCH_MULTI_V7
 	select GPIO_PXA
 	select GPIOLIB
 	select PINCTRL
 	select PLAT_PXA
 	help
-	  Support for Marvell's PXA168/PXA910(MMP) and MMP2 processor line.
+	  Support for Marvell's PXA168/PXA910(MMP), MMP2, and MMP3 processor lines.
 
 if ARCH_MMP
 
@@ -129,6 +129,24 @@ config MACH_MMP2_DT
 	  Include support for Marvell MMP2 based platforms using
 	  the device tree.
 
+config MACH_MMP3_DT
+	bool "Support MMP3 (ARMv7) platforms"
+	depends on ARCH_MULTI_V7
+	select ARM_GIC
+	select HAVE_ARM_SCU if SMP
+	select HAVE_ARM_TWD if SMP
+	select CACHE_L2X0
+	select PINCTRL
+	select PINCTRL_SINGLE
+	select ARCH_HAS_RESET_CONTROLLER
+	select CPU_PJ4B
+	select PM_GENERIC_DOMAINS if PM
+	select PM_GENERIC_DOMAINS_OF if PM && OF
+	help
+	  Say 'Y' here if you want to include support for platforms
+	  with Marvell MMP3 processor, also known as PXA2128 or
+	  Armada 620.
+
 endmenu
 
 config CPU_PXA168
diff --git a/arch/arm/mach-mmp/Makefile b/arch/arm/mach-mmp/Makefile
index 8f267c7bc6e8..7b3a7f979eec 100644
--- a/arch/arm/mach-mmp/Makefile
+++ b/arch/arm/mach-mmp/Makefile
@@ -22,6 +22,9 @@ ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_CPU_PXA910)	+= pm-pxa910.o
 obj-$(CONFIG_CPU_MMP2)		+= pm-mmp2.o
 endif
+ifeq ($(CONFIG_SMP),y)
+obj-$(CONFIG_MACH_MMP3_DT)	+= platsmp.o
+endif
 
 # board support
 obj-$(CONFIG_MACH_ASPENITE)	+= aspenite.o
@@ -34,5 +37,6 @@ obj-$(CONFIG_MACH_FLINT)	+= flint.o
 obj-$(CONFIG_MACH_MARVELL_JASPER) += jasper.o
 obj-$(CONFIG_MACH_MMP_DT)	+= mmp-dt.o
 obj-$(CONFIG_MACH_MMP2_DT)	+= mmp2-dt.o
+obj-$(CONFIG_MACH_MMP3_DT)	+= mmp3.o
 obj-$(CONFIG_MACH_TETON_BGA)	+= teton_bga.o
 obj-$(CONFIG_MACH_GPLUGD)	+= gplugd.o
diff --git a/arch/arm/mach-mmp/addr-map.h b/arch/arm/mach-mmp/addr-map.h
index 25edf6a92276..3dc2f0b0ecba 100644
--- a/arch/arm/mach-mmp/addr-map.h
+++ b/arch/arm/mach-mmp/addr-map.h
@@ -20,6 +20,10 @@
 #define AXI_VIRT_BASE		IOMEM(0xfe200000)
 #define AXI_PHYS_SIZE		0x00200000
 
+#define PGU_PHYS_BASE		0xe0000000
+#define PGU_VIRT_BASE		IOMEM(0xfe400000)
+#define PGU_PHYS_SIZE		0x00100000
+
 /* Static Memory Controller - Chip Select 0 and 1 */
 #define SMC_CS0_PHYS_BASE	0x80000000
 #define SMC_CS0_PHYS_SIZE	0x10000000
@@ -38,4 +42,7 @@
 #define CIU_VIRT_BASE		(AXI_VIRT_BASE + 0x82c00)
 #define CIU_REG(x)		(CIU_VIRT_BASE + (x))
 
+#define SCU_VIRT_BASE		(PGU_VIRT_BASE)
+#define SCU_REG(x)		(SCU_VIRT_BASE + (x))
+
 #endif /* __ASM_MACH_ADDR_MAP_H */
diff --git a/arch/arm/mach-mmp/common.c b/arch/arm/mach-mmp/common.c
index 6684abc7708b..e94349d4726c 100644
--- a/arch/arm/mach-mmp/common.c
+++ b/arch/arm/mach-mmp/common.c
@@ -13,11 +13,11 @@
 #include <asm/mach/map.h>
 #include <asm/system_misc.h>
 #include "addr-map.h"
-#include "cputype.h"
+#include <linux/soc/mmp/cputype.h>
 
 #include "common.h"
 
-#define MMP_CHIPID	(AXI_VIRT_BASE + 0x82c00)
+#define MMP_CHIPID	CIU_REG(0x00)
 
 unsigned int mmp_chip_id;
 EXPORT_SYMBOL(mmp_chip_id);
@@ -36,6 +36,15 @@ static struct map_desc standard_io_desc[] __initdata = {
 	},
 };
 
+static struct map_desc mmp2_io_desc[] __initdata = {
+	{
+		.pfn		= __phys_to_pfn(PGU_PHYS_BASE),
+		.virtual	= (unsigned long)PGU_VIRT_BASE,
+		.length		= PGU_PHYS_SIZE,
+		.type		= MT_DEVICE,
+	},
+};
+
 void __init mmp_map_io(void)
 {
 	iotable_init(standard_io_desc, ARRAY_SIZE(standard_io_desc));
@@ -44,6 +53,12 @@ void __init mmp_map_io(void)
 	mmp_chip_id = __raw_readl(MMP_CHIPID);
 }
 
+void __init mmp2_map_io(void)
+{
+	mmp_map_io();
+	iotable_init(mmp2_io_desc, ARRAY_SIZE(mmp2_io_desc));
+}
+
 void mmp_restart(enum reboot_mode mode, const char *cmd)
 {
 	soft_restart(0);
diff --git a/arch/arm/mach-mmp/common.h b/arch/arm/mach-mmp/common.h
index 483b8b6d3005..ed56b3f15b45 100644
--- a/arch/arm/mach-mmp/common.h
+++ b/arch/arm/mach-mmp/common.h
@@ -5,4 +5,5 @@
 extern void mmp_timer_init(int irq, unsigned long rate);
 
 extern void __init mmp_map_io(void);
+extern void __init mmp2_map_io(void);
 extern void mmp_restart(enum reboot_mode, const char *);
diff --git a/arch/arm/mach-mmp/cputype.h b/arch/arm/mach-mmp/cputype.h
deleted file mode 100644
index a96abcf521b4..000000000000
--- a/arch/arm/mach-mmp/cputype.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MACH_CPUTYPE_H
-#define __ASM_MACH_CPUTYPE_H
-
-#include <asm/cputype.h>
-
-/*
- *  CPU   Stepping   CPU_ID      CHIP_ID
- *
- * PXA168    S0    0x56158400   0x0000C910
- * PXA168    A0    0x56158400   0x00A0A168
- * PXA910    Y1    0x56158400   0x00F2C920
- * PXA910    A0    0x56158400   0x00F2C910
- * PXA910    A1    0x56158400   0x00A0C910
- * PXA920    Y0    0x56158400   0x00F2C920
- * PXA920    A0    0x56158400   0x00A0C920
- * PXA920    A1    0x56158400   0x00A1C920
- * MMP2	     Z0	   0x560f5811   0x00F00410
- * MMP2      Z1    0x560f5811   0x00E00410
- * MMP2      A0    0x560f5811   0x00A0A610
- */
-
-extern unsigned int mmp_chip_id;
-
-#ifdef CONFIG_CPU_PXA168
-static inline int cpu_is_pxa168(void)
-{
-	return (((read_cpuid_id() >> 8) & 0xff) == 0x84) &&
-		((mmp_chip_id & 0xfff) == 0x168);
-}
-#else
-#define cpu_is_pxa168()	(0)
-#endif
-
-/* cpu_is_pxa910() is shared on both pxa910 and pxa920 */
-#ifdef CONFIG_CPU_PXA910
-static inline int cpu_is_pxa910(void)
-{
-	return (((read_cpuid_id() >> 8) & 0xff) == 0x84) &&
-		(((mmp_chip_id & 0xfff) == 0x910) ||
-		 ((mmp_chip_id & 0xfff) == 0x920));
-}
-#else
-#define cpu_is_pxa910()	(0)
-#endif
-
-#if defined(CONFIG_CPU_MMP2) || defined(CONFIG_MACH_MMP2_DT)
-static inline int cpu_is_mmp2(void)
-{
-	return (((read_cpuid_id() >> 8) & 0xff) == 0x58) &&
-		(((mmp_chip_id & 0xfff) == 0x410) ||
-		 ((mmp_chip_id & 0xfff) == 0x610));
-}
-#else
-#define cpu_is_mmp2()	(0)
-#endif
-
-#endif /* __ASM_MACH_CPUTYPE_H */
diff --git a/arch/arm/mach-mmp/devices.c b/arch/arm/mach-mmp/devices.c
index 130c1a603ba2..18bee66a671f 100644
--- a/arch/arm/mach-mmp/devices.c
+++ b/arch/arm/mach-mmp/devices.c
@@ -11,7 +11,7 @@
 #include <asm/irq.h>
 #include "irqs.h"
 #include "devices.h"
-#include "cputype.h"
+#include <linux/soc/mmp/cputype.h>
 #include "regs-usb.h"
 
 int __init pxa_register_device(struct pxa_device_desc *desc,
diff --git a/arch/arm/mach-mmp/mmp-dt.c b/arch/arm/mach-mmp/mmp-dt.c
index 35559792d5cc..91214996acec 100644
--- a/arch/arm/mach-mmp/mmp-dt.c
+++ b/arch/arm/mach-mmp/mmp-dt.c
@@ -9,14 +9,13 @@
 #include <linux/irqchip.h>
 #include <linux/of_platform.h>
 #include <linux/clk-provider.h>
+#include <linux/clocksource.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
 #include <asm/hardware/cache-tauros2.h>
 
 #include "common.h"
 
-extern void __init mmp_dt_init_timer(void);
-
 static const char *const pxa168_dt_board_compat[] __initconst = {
 	"mrvl,pxa168-aspenite",
 	NULL,
@@ -32,8 +31,8 @@ static void __init mmp_init_time(void)
 #ifdef CONFIG_CACHE_TAUROS2
 	tauros2_init(0);
 #endif
-	mmp_dt_init_timer();
 	of_clk_init(NULL);
+	timer_probe();
 }
 
 DT_MACHINE_START(PXA168_DT, "Marvell PXA168 (Device Tree Support)")
diff --git a/arch/arm/mach-mmp/mmp2-dt.c b/arch/arm/mach-mmp/mmp2-dt.c
index 305a9daba6d6..510c762ddc48 100644
--- a/arch/arm/mach-mmp/mmp2-dt.c
+++ b/arch/arm/mach-mmp/mmp2-dt.c
@@ -10,21 +10,20 @@
 #include <linux/irqchip.h>
 #include <linux/of_platform.h>
 #include <linux/clk-provider.h>
+#include <linux/clocksource.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
 #include <asm/hardware/cache-tauros2.h>
 
 #include "common.h"
 
-extern void __init mmp_dt_init_timer(void);
-
 static void __init mmp_init_time(void)
 {
 #ifdef CONFIG_CACHE_TAUROS2
 	tauros2_init(0);
 #endif
 	of_clk_init(NULL);
-	mmp_dt_init_timer();
+	timer_probe();
 }
 
 static const char *const mmp2_dt_board_compat[] __initconst = {
@@ -33,7 +32,7 @@ static const char *const mmp2_dt_board_compat[] __initconst = {
 };
 
 DT_MACHINE_START(MMP2_DT, "Marvell MMP2 (Device Tree Support)")
-	.map_io		= mmp_map_io,
+	.map_io		= mmp2_map_io,
 	.init_time	= mmp_init_time,
 	.dt_compat	= mmp2_dt_board_compat,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/mmp2.c b/arch/arm/mach-mmp/mmp2.c
index 18ea3e1a26e6..bbc4c2274de3 100644
--- a/arch/arm/mach-mmp/mmp2.c
+++ b/arch/arm/mach-mmp/mmp2.c
@@ -20,7 +20,7 @@
 #include <asm/mach/time.h>
 #include "addr-map.h"
 #include "regs-apbc.h"
-#include "cputype.h"
+#include <linux/soc/mmp/cputype.h>
 #include "irqs.h"
 #include "mfp.h"
 #include "devices.h"
diff --git a/arch/arm/mach-mmp/mmp3.c b/arch/arm/mach-mmp/mmp3.c
new file mode 100644
index 000000000000..b0e86964f302
--- /dev/null
+++ b/arch/arm/mach-mmp/mmp3.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Marvell MMP3 aka PXA2128 aka 88AP2128 support
+ *
+ *  Copyright (C) 2019 Lubomir Rintel <lkundrak@v3.sk>
+ */
+
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/of_platform.h>
+#include <linux/clk-provider.h>
+#include <asm/mach/arch.h>
+#include <asm/hardware/cache-l2x0.h>
+
+#include "common.h"
+
+static const char *const mmp3_dt_board_compat[] __initconst = {
+	"marvell,mmp3",
+	NULL,
+};
+
+DT_MACHINE_START(MMP2_DT, "Marvell MMP3")
+	.map_io		= mmp2_map_io,
+	.dt_compat	= mmp3_dt_board_compat,
+	.l2c_aux_val	= 1 << L310_AUX_CTRL_FWA_SHIFT |
+			  L310_AUX_CTRL_DATA_PREFETCH |
+			  L310_AUX_CTRL_INSTR_PREFETCH,
+	.l2c_aux_mask	= 0xc20fffff,
+MACHINE_END
diff --git a/arch/arm/mach-mmp/platsmp.c b/arch/arm/mach-mmp/platsmp.c
new file mode 100644
index 000000000000..c99405469bb4
--- /dev/null
+++ b/arch/arm/mach-mmp/platsmp.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Lubomir Rintel <lkundrak@v3.sk>
+ */
+#include <linux/io.h>
+#include <asm/smp_scu.h>
+#include <asm/smp.h>
+#include "addr-map.h"
+
+#define SW_BRANCH_VIRT_ADDR	CIU_REG(0x24)
+
+static int mmp3_boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	/*
+	 * Apparently, the boot ROM on the second core spins on this
+	 * register becoming non-zero and then jumps to the address written
+	 * there. No IPIs involved.
+	 */
+	__raw_writel(__pa_symbol(secondary_startup), SW_BRANCH_VIRT_ADDR);
+	return 0;
+}
+
+static void mmp3_smp_prepare_cpus(unsigned int max_cpus)
+{
+	scu_enable(SCU_VIRT_BASE);
+}
+
+static const struct smp_operations mmp3_smp_ops __initconst = {
+	.smp_prepare_cpus	= mmp3_smp_prepare_cpus,
+	.smp_boot_secondary	= mmp3_boot_secondary,
+};
+CPU_METHOD_OF_DECLARE(mmp3_smp, "marvell,mmp3-smp", &mmp3_smp_ops);
diff --git a/arch/arm/mach-mmp/pm-mmp2.c b/arch/arm/mach-mmp/pm-mmp2.c
index 2923dd5732a6..2d86381e152d 100644
--- a/arch/arm/mach-mmp/pm-mmp2.c
+++ b/arch/arm/mach-mmp/pm-mmp2.c
@@ -17,7 +17,7 @@
 #include <linux/interrupt.h>
 #include <asm/mach-types.h>
 
-#include "cputype.h"
+#include <linux/soc/mmp/cputype.h>
 #include "addr-map.h"
 #include "pm-mmp2.h"
 #include "regs-icu.h"
diff --git a/arch/arm/mach-mmp/pm-pxa910.c b/arch/arm/mach-mmp/pm-pxa910.c
index 58535ce206dc..69ebe18ff209 100644
--- a/arch/arm/mach-mmp/pm-pxa910.c
+++ b/arch/arm/mach-mmp/pm-pxa910.c
@@ -18,7 +18,7 @@
 #include <asm/mach-types.h>
 #include <asm/outercache.h>
 
-#include "cputype.h"
+#include <linux/soc/mmp/cputype.h>
 #include "addr-map.h"
 #include "pm-pxa910.h"
 #include "regs-icu.h"
diff --git a/arch/arm/mach-mmp/pxa168.c b/arch/arm/mach-mmp/pxa168.c
index 6e0277488967..b642e900727a 100644
--- a/arch/arm/mach-mmp/pxa168.c
+++ b/arch/arm/mach-mmp/pxa168.c
@@ -21,7 +21,7 @@
 #include "addr-map.h"
 #include "clock.h"
 #include "common.h"
-#include "cputype.h"
+#include <linux/soc/mmp/cputype.h>
 #include "devices.h"
 #include "irqs.h"
 #include "mfp.h"
diff --git a/arch/arm/mach-mmp/pxa168.h b/arch/arm/mach-mmp/pxa168.h
index 0331c58b07a2..dff651b9f252 100644
--- a/arch/arm/mach-mmp/pxa168.h
+++ b/arch/arm/mach-mmp/pxa168.h
@@ -17,9 +17,9 @@ extern void pxa168_clear_keypad_wakeup(void);
 #include <linux/platform_data/keypad-pxa27x.h>
 #include <linux/pxa168_eth.h>
 #include <linux/platform_data/mv_usb.h>
+#include <linux/soc/mmp/cputype.h>
 
 #include "devices.h"
-#include "cputype.h"
 
 extern struct pxa_device_desc pxa168_device_uart1;
 extern struct pxa_device_desc pxa168_device_uart2;
diff --git a/arch/arm/mach-mmp/pxa910.c b/arch/arm/mach-mmp/pxa910.c
index cba31c758dea..b19a069d9fab 100644
--- a/arch/arm/mach-mmp/pxa910.c
+++ b/arch/arm/mach-mmp/pxa910.c
@@ -18,7 +18,7 @@
 #include <asm/mach/time.h>
 #include "addr-map.h"
 #include "regs-apbc.h"
-#include "cputype.h"
+#include <linux/soc/mmp/cputype.h>
 #include "irqs.h"
 #include "mfp.h"
 #include "devices.h"
diff --git a/arch/arm/mach-mmp/regs-usb.h b/arch/arm/mach-mmp/regs-usb.h
index d9f08c160154..ed0d1aa0ad6c 100644
--- a/arch/arm/mach-mmp/regs-usb.h
+++ b/arch/arm/mach-mmp/regs-usb.h
@@ -121,100 +121,6 @@
 
 #define UTMI_OTG_ADDON_OTG_ON			(1 << 0)
 
-/* For MMP3 USB Phy */
-#define USB2_PLL_REG0		0x4
-#define USB2_PLL_REG1		0x8
-#define USB2_TX_REG0		0x10
-#define USB2_TX_REG1		0x14
-#define USB2_TX_REG2		0x18
-#define USB2_RX_REG0		0x20
-#define USB2_RX_REG1		0x24
-#define USB2_RX_REG2		0x28
-#define USB2_ANA_REG0		0x30
-#define USB2_ANA_REG1		0x34
-#define USB2_ANA_REG2		0x38
-#define USB2_DIG_REG0		0x3C
-#define USB2_DIG_REG1		0x40
-#define USB2_DIG_REG2		0x44
-#define USB2_DIG_REG3		0x48
-#define USB2_TEST_REG0		0x4C
-#define USB2_TEST_REG1		0x50
-#define USB2_TEST_REG2		0x54
-#define USB2_CHARGER_REG0	0x58
-#define USB2_OTG_REG0		0x5C
-#define USB2_PHY_MON0		0x60
-#define USB2_RESETVE_REG0	0x64
-#define USB2_ICID_REG0		0x78
-#define USB2_ICID_REG1		0x7C
-
-/* USB2_PLL_REG0 */
-/* This is for Ax stepping */
-#define USB2_PLL_FBDIV_SHIFT_MMP3		0
-#define USB2_PLL_FBDIV_MASK_MMP3		(0xFF << 0)
-
-#define USB2_PLL_REFDIV_SHIFT_MMP3		8
-#define USB2_PLL_REFDIV_MASK_MMP3		(0xF << 8)
-
-#define USB2_PLL_VDD12_SHIFT_MMP3		12
-#define USB2_PLL_VDD18_SHIFT_MMP3		14
-
-/* This is for B0 stepping */
-#define USB2_PLL_FBDIV_SHIFT_MMP3_B0		0
-#define USB2_PLL_REFDIV_SHIFT_MMP3_B0		9
-#define USB2_PLL_VDD18_SHIFT_MMP3_B0		14
-#define USB2_PLL_FBDIV_MASK_MMP3_B0		0x01FF
-#define USB2_PLL_REFDIV_MASK_MMP3_B0		0x3E00
-
-#define USB2_PLL_CAL12_SHIFT_MMP3		0
-#define USB2_PLL_CALI12_MASK_MMP3		(0x3 << 0)
-
-#define USB2_PLL_VCOCAL_START_SHIFT_MMP3	2
-
-#define USB2_PLL_KVCO_SHIFT_MMP3		4
-#define USB2_PLL_KVCO_MASK_MMP3			(0x7<<4)
-
-#define USB2_PLL_ICP_SHIFT_MMP3			8
-#define USB2_PLL_ICP_MASK_MMP3			(0x7<<8)
-
-#define USB2_PLL_LOCK_BYPASS_SHIFT_MMP3		12
-
-#define USB2_PLL_PU_PLL_SHIFT_MMP3		13
-#define USB2_PLL_PU_PLL_MASK			(0x1 << 13)
-
-#define USB2_PLL_READY_MASK_MMP3		(0x1 << 15)
-
-/* USB2_TX_REG0 */
-#define USB2_TX_IMPCAL_VTH_SHIFT_MMP3		8
-#define USB2_TX_IMPCAL_VTH_MASK_MMP3		(0x7 << 8)
-
-#define USB2_TX_RCAL_START_SHIFT_MMP3		13
-
-/* USB2_TX_REG1 */
-#define USB2_TX_CK60_PHSEL_SHIFT_MMP3		0
-#define USB2_TX_CK60_PHSEL_MASK_MMP3		(0xf << 0)
-
-#define USB2_TX_AMP_SHIFT_MMP3			4
-#define USB2_TX_AMP_MASK_MMP3			(0x7 << 4)
-
-#define USB2_TX_VDD12_SHIFT_MMP3		8
-#define USB2_TX_VDD12_MASK_MMP3			(0x3 << 8)
-
-/* USB2_TX_REG2 */
-#define USB2_TX_DRV_SLEWRATE_SHIFT		10
-
-/* USB2_RX_REG0 */
-#define USB2_RX_SQ_THRESH_SHIFT_MMP3		4
-#define USB2_RX_SQ_THRESH_MASK_MMP3		(0xf << 4)
-
-#define USB2_RX_SQ_LENGTH_SHIFT_MMP3		10
-#define USB2_RX_SQ_LENGTH_MASK_MMP3		(0x3 << 10)
-
-/* USB2_ANA_REG1*/
-#define USB2_ANA_PU_ANA_SHIFT_MMP3		14
-
-/* USB2_OTG_REG0 */
-#define USB2_OTG_PU_OTG_SHIFT_MMP3		3
-
 /* fsic registers */
 #define FSIC_MISC			0x4
 #define FSIC_INT			0x28
diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c
index 483df32583be..c65cfc1ad99b 100644
--- a/arch/arm/mach-mmp/time.c
+++ b/arch/arm/mach-mmp/time.c
@@ -33,7 +33,7 @@
 #include "regs-timers.h"
 #include "regs-apbc.h"
 #include "irqs.h"
-#include "cputype.h"
+#include <linux/soc/mmp/cputype.h>
 #include "clock.h"
 
 #define TIMERS_VIRT_BASE	TIMERS1_VIRT_BASE
@@ -155,7 +155,8 @@ static void __init timer_config(void)
 
 	__raw_writel(0x0, mmp_timer_base + TMR_CER); /* disable */
 
-	ccr &= (cpu_is_mmp2()) ? (TMR_CCR_CS_0(0) | TMR_CCR_CS_1(0)) :
+	ccr &= (cpu_is_mmp2() || cpu_is_mmp3()) ?
+		(TMR_CCR_CS_0(0) | TMR_CCR_CS_1(0)) :
 		(TMR_CCR_CS_0(3) | TMR_CCR_CS_1(3));
 	__raw_writel(ccr, mmp_timer_base + TMR_CCR);
 
@@ -195,31 +196,18 @@ void __init mmp_timer_init(int irq, unsigned long rate)
 	clockevents_config_and_register(&ckevt, rate, MIN_DELTA, MAX_DELTA);
 }
 
-#ifdef CONFIG_OF
-static const struct of_device_id mmp_timer_dt_ids[] = {
-	{ .compatible = "mrvl,mmp-timer", },
-	{}
-};
-
-void __init mmp_dt_init_timer(void)
+static int __init mmp_dt_init_timer(struct device_node *np)
 {
-	struct device_node *np;
 	struct clk *clk;
 	int irq, ret;
 	unsigned long rate;
 
-	np = of_find_matching_node(NULL, mmp_timer_dt_ids);
-	if (!np) {
-		ret = -ENODEV;
-		goto out;
-	}
-
 	clk = of_clk_get(np, 0);
 	if (!IS_ERR(clk)) {
 		ret = clk_prepare_enable(clk);
 		if (ret)
-			goto out;
-		rate = clk_get_rate(clk) / 2;
+			return ret;
+		rate = clk_get_rate(clk);
 	} else if (cpu_is_pj4()) {
 		rate = 6500000;
 	} else {
@@ -227,18 +215,15 @@ void __init mmp_dt_init_timer(void)
 	}
 
 	irq = irq_of_parse_and_map(np, 0);
-	if (!irq) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (!irq)
+		return -EINVAL;
+
 	mmp_timer_base = of_iomap(np, 0);
-	if (!mmp_timer_base) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!mmp_timer_base)
+		return -ENOMEM;
+
 	mmp_timer_init(irq, rate);
-	return;
-out:
-	pr_err("Failed to get timer from device tree with error:%d\n", ret);
+	return 0;
 }
-#endif
+
+TIMER_OF_DECLARE(mmp_timer, "mrvl,mmp-timer", mmp_dt_init_timer);
diff --git a/arch/arm/mach-omap1/Kconfig b/arch/arm/mach-omap1/Kconfig
index 2a17dc1d122c..948da556162e 100644
--- a/arch/arm/mach-omap1/Kconfig
+++ b/arch/arm/mach-omap1/Kconfig
@@ -4,30 +4,25 @@ if ARCH_OMAP1
 menu "TI OMAP1 specific features"
 
 comment "OMAP Core Type"
-	depends on ARCH_OMAP1
 
 config ARCH_OMAP730
-	depends on ARCH_OMAP1
 	bool "OMAP730 Based System"
 	select ARCH_OMAP_OTG
 	select CPU_ARM926T
 	select OMAP_MPU_TIMER
 
 config ARCH_OMAP850
-	depends on ARCH_OMAP1
 	bool "OMAP850 Based System"
 	select ARCH_OMAP_OTG
 	select CPU_ARM926T
 
 config ARCH_OMAP15XX
-	depends on ARCH_OMAP1
 	default y
 	bool "OMAP15xx Based System"
 	select CPU_ARM925T
 	select OMAP_MPU_TIMER
 
 config ARCH_OMAP16XX
-	depends on ARCH_OMAP1
 	bool "OMAP16xx Based System"
 	select ARCH_OMAP_OTG
 	select CPU_ARM926T
@@ -35,7 +30,6 @@ config ARCH_OMAP16XX
 
 config OMAP_MUX
 	bool "OMAP multiplexing support"
-	depends on ARCH_OMAP
 	default y
 	help
 	  Pin multiplexing support for OMAP boards. If your bootloader
@@ -60,25 +54,24 @@ config OMAP_MUX_WARNINGS
 	  printed, it's safe to deselect OMAP_MUX for your product.
 
 comment "OMAP Board Type"
-	depends on ARCH_OMAP1
 
 config MACH_OMAP_INNOVATOR
 	bool "TI Innovator"
-	depends on ARCH_OMAP1 && (ARCH_OMAP15XX || ARCH_OMAP16XX)
+	depends on ARCH_OMAP15XX || ARCH_OMAP16XX
 	help
           TI OMAP 1510 or 1610 Innovator board support. Say Y here if you
           have such a board.
 
 config MACH_OMAP_H2
 	bool "TI H2 Support"
-	depends on ARCH_OMAP1 && ARCH_OMAP16XX
+	depends on ARCH_OMAP16XX
     	help
 	  TI OMAP 1610/1611B H2 board support. Say Y here if you have such
 	  a board.
 
 config MACH_OMAP_H3
 	bool "TI H3 Support"
-	depends on ARCH_OMAP1 && ARCH_OMAP16XX
+	depends on ARCH_OMAP16XX
     	help
 	  TI OMAP 1710 H3 board support. Say Y here if you have such
 	  a board.
@@ -91,7 +84,7 @@ config MACH_HERALD
 
 config MACH_OMAP_OSK
 	bool "TI OSK Support"
-	depends on ARCH_OMAP1 && ARCH_OMAP16XX
+	depends on ARCH_OMAP16XX
     	help
 	  TI OMAP 5912 OSK (OMAP Starter Kit) board support. Say Y here
           if you have such a board.
@@ -106,21 +99,21 @@ config OMAP_OSK_MISTRAL
 
 config MACH_OMAP_PERSEUS2
 	bool "TI Perseus2"
-	depends on ARCH_OMAP1 && ARCH_OMAP730
+	depends on ARCH_OMAP730
     	help
 	  Support for TI OMAP 730 Perseus2 board. Say Y here if you have such
 	  a board.
 
 config MACH_OMAP_FSAMPLE
 	bool "TI F-Sample"
-	depends on ARCH_OMAP1 && ARCH_OMAP730
+	depends on ARCH_OMAP730
     	help
 	  Support for TI OMAP 850 F-Sample board. Say Y here if you have such
 	  a board.
 
 config MACH_OMAP_PALMTE
 	bool "Palm Tungsten E"
-	depends on ARCH_OMAP1 && ARCH_OMAP15XX
+	depends on ARCH_OMAP15XX
 	help
 	  Support for the Palm Tungsten E PDA.  To boot the kernel, you'll
 	  need a PalmOS compatible bootloader; check out
@@ -129,7 +122,7 @@ config MACH_OMAP_PALMTE
 
 config MACH_OMAP_PALMZ71
 	bool "Palm Zire71"
-	depends on ARCH_OMAP1 && ARCH_OMAP15XX
+	depends on ARCH_OMAP15XX
 	help
 	 Support for the Palm Zire71 PDA. To boot the kernel,
 	 you'll need a PalmOS compatible bootloader; check out
@@ -138,7 +131,7 @@ config MACH_OMAP_PALMZ71
 
 config MACH_OMAP_PALMTT
 	bool "Palm Tungsten|T"
-	depends on ARCH_OMAP1 && ARCH_OMAP15XX
+	depends on ARCH_OMAP15XX
 	help
 	  Support for the Palm Tungsten|T PDA. To boot the kernel, you'll
 	  need a PalmOS compatible bootloader (Garux); check out
@@ -147,7 +140,7 @@ config MACH_OMAP_PALMTT
 
 config MACH_SX1
 	bool "Siemens SX1"
-	depends on ARCH_OMAP1 && ARCH_OMAP15XX
+	depends on ARCH_OMAP15XX
 	select I2C
 	help
 	  Support for the Siemens SX1 phone. To boot the kernel,
@@ -159,14 +152,14 @@ config MACH_SX1
 
 config MACH_NOKIA770
 	bool "Nokia 770"
-	depends on ARCH_OMAP1 && ARCH_OMAP16XX
+	depends on ARCH_OMAP16XX
 	help
 	  Support for the Nokia 770 Internet Tablet. Say Y here if you
 	  have such a device.
 
 config MACH_AMS_DELTA
 	bool "Amstrad E3 (Delta)"
-	depends on ARCH_OMAP1 && ARCH_OMAP15XX
+	depends on ARCH_OMAP15XX
 	select FIQ
 	select GPIO_GENERIC_PLATFORM
 	select LEDS_GPIO_REGISTER
@@ -178,7 +171,7 @@ config MACH_AMS_DELTA
 
 config MACH_OMAP_GENERIC
 	bool "Generic OMAP board"
-	depends on ARCH_OMAP1 && (ARCH_OMAP15XX || ARCH_OMAP16XX)
+	depends on ARCH_OMAP15XX || ARCH_OMAP16XX
 	help
           Support for generic OMAP-1510, 1610 or 1710 board with
           no FPGA. Can be used as template for porting Linux to
diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c
index 0254eb9cf8c6..4eea3e39e633 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq.c
+++ b/arch/arm/mach-omap1/ams-delta-fiq.c
@@ -110,7 +110,7 @@ void __init ams_delta_init_fiq(struct gpio_chip *chip,
 
 		/*
 		 * FIQ handler takes full control over serio data and clk GPIO
-		 * pins.  Initiaize them and keep requested so nobody can
+		 * pins.  Initialize them and keep requested so nobody can
 		 * interfere.  Fail if any of those two couldn't be requested.
 		 */
 		switch (i) {
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index fdb6743760a2..dca7d06c0b93 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -95,6 +95,7 @@ config ARCH_OMAP2PLUS
 	bool
 	select ARCH_HAS_BANDGAP
 	select ARCH_HAS_HOLES_MEMORYMODEL
+	select ARCH_HAS_RESET_CONTROLLER
 	select ARCH_OMAP
 	select CLKSRC_MMIO
 	select GENERIC_IRQ_CHIP
@@ -105,6 +106,7 @@ config ARCH_OMAP2PLUS
 	select OMAP_DM_TIMER
 	select OMAP_GPMC
 	select PINCTRL
+	select RESET_CONTROLLER
 	select SOC_BUS
 	select TI_SYSC
 	select OMAP_IRQCHIP
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 8f208197988f..f07cfda85156 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -29,6 +29,11 @@ obj-y += mcbsp.o
 endif
 
 obj-$(CONFIG_TWL4030_CORE) += omap_twl.o
+
+ifneq ($(CONFIG_MFD_CPCAP),)
+obj-y 					+= pmic-cpcap.o
+endif
+
 obj-$(CONFIG_SOC_HAS_OMAP2_SDRC)	+= sdrc.o
 
 # SMP support ONLY available for OMAP4
@@ -216,9 +221,6 @@ obj-$(CONFIG_MACH_NOKIA_N8X0)		+= board-n8x0.o
 
 # Platform specific device init code
 
-omap-hsmmc-$(CONFIG_MMC_OMAP_HS)	:= hsmmc.o
-obj-y					+= $(omap-hsmmc-m) $(omap-hsmmc-y)
-
 obj-y					+= omap_phy_internal.o
 
 obj-$(CONFIG_MACH_OMAP2_TUSB6010)	+= usb-tusb6010.o
diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c
index f98c8ecc9ca2..dedd47e30b98 100644
--- a/arch/arm/mach-omap2/clockdomain.c
+++ b/arch/arm/mach-omap2/clockdomain.c
@@ -1147,7 +1147,21 @@ void clkdm_del_autodeps(struct clockdomain *clkdm)
 
 /* Clockdomain-to-clock/hwmod framework interface code */
 
-static int _clkdm_clk_hwmod_enable(struct clockdomain *clkdm)
+/**
+ * clkdm_clk_enable - add an enabled downstream clock to this clkdm
+ * @clkdm: struct clockdomain *
+ * @clk: struct clk * of the enabled downstream clock
+ *
+ * Increment the usecount of the clockdomain @clkdm and ensure that it
+ * is awake before @clk is enabled.  Intended to be called by
+ * clk_enable() code.  If the clockdomain is in software-supervised
+ * idle mode, force the clockdomain to wake.  If the clockdomain is in
+ * hardware-supervised idle mode, add clkdm-pwrdm autodependencies, to
+ * ensure that devices in the clockdomain can be read from/written to
+ * by on-chip processors.  Returns -EINVAL if passed null pointers;
+ * returns 0 upon success or if the clockdomain is in hwsup idle mode.
+ */
+int clkdm_clk_enable(struct clockdomain *clkdm, struct clk *unused)
 {
 	if (!clkdm || !arch_clkdm || !arch_clkdm->clkdm_clk_enable)
 		return -EINVAL;
@@ -1175,33 +1189,6 @@ static int _clkdm_clk_hwmod_enable(struct clockdomain *clkdm)
 }
 
 /**
- * clkdm_clk_enable - add an enabled downstream clock to this clkdm
- * @clkdm: struct clockdomain *
- * @clk: struct clk * of the enabled downstream clock
- *
- * Increment the usecount of the clockdomain @clkdm and ensure that it
- * is awake before @clk is enabled.  Intended to be called by
- * clk_enable() code.  If the clockdomain is in software-supervised
- * idle mode, force the clockdomain to wake.  If the clockdomain is in
- * hardware-supervised idle mode, add clkdm-pwrdm autodependencies, to
- * ensure that devices in the clockdomain can be read from/written to
- * by on-chip processors.  Returns -EINVAL if passed null pointers;
- * returns 0 upon success or if the clockdomain is in hwsup idle mode.
- */
-int clkdm_clk_enable(struct clockdomain *clkdm, struct clk *clk)
-{
-	/*
-	 * XXX Rewrite this code to maintain a list of enabled
-	 * downstream clocks for debugging purposes?
-	 */
-
-	if (!clk)
-		return -EINVAL;
-
-	return _clkdm_clk_hwmod_enable(clkdm);
-}
-
-/**
  * clkdm_clk_disable - remove an enabled downstream clock from this clkdm
  * @clkdm: struct clockdomain *
  * @clk: struct clk * of the disabled downstream clock
@@ -1216,13 +1203,13 @@ int clkdm_clk_enable(struct clockdomain *clkdm, struct clk *clk)
  */
 int clkdm_clk_disable(struct clockdomain *clkdm, struct clk *clk)
 {
-	if (!clkdm || !clk || !arch_clkdm || !arch_clkdm->clkdm_clk_disable)
+	if (!clkdm || !arch_clkdm || !arch_clkdm->clkdm_clk_disable)
 		return -EINVAL;
 
 	pwrdm_lock(clkdm->pwrdm.ptr);
 
 	/* corner case: disabling unused clocks */
-	if ((__clk_get_enable_count(clk) == 0) && clkdm->usecount == 0)
+	if (clk && (__clk_get_enable_count(clk) == 0) && clkdm->usecount == 0)
 		goto ccd_exit;
 
 	if (clkdm->usecount == 0) {
@@ -1277,7 +1264,7 @@ int clkdm_hwmod_enable(struct clockdomain *clkdm, struct omap_hwmod *oh)
 	if (!oh)
 		return -EINVAL;
 
-	return _clkdm_clk_hwmod_enable(clkdm);
+	return clkdm_clk_enable(clkdm, NULL);
 }
 
 /**
@@ -1300,35 +1287,10 @@ int clkdm_hwmod_disable(struct clockdomain *clkdm, struct omap_hwmod *oh)
 	if (cpu_is_omap24xx() || cpu_is_omap34xx())
 		return 0;
 
-	/*
-	 * XXX Rewrite this code to maintain a list of enabled
-	 * downstream hwmods for debugging purposes?
-	 */
-
-	if (!clkdm || !oh || !arch_clkdm || !arch_clkdm->clkdm_clk_disable)
+	if (!oh)
 		return -EINVAL;
 
-	pwrdm_lock(clkdm->pwrdm.ptr);
-
-	if (clkdm->usecount == 0) {
-		pwrdm_unlock(clkdm->pwrdm.ptr);
-		WARN_ON(1); /* underflow */
-		return -ERANGE;
-	}
-
-	clkdm->usecount--;
-	if (clkdm->usecount > 0) {
-		pwrdm_unlock(clkdm->pwrdm.ptr);
-		return 0;
-	}
-
-	arch_clkdm->clkdm_clk_disable(clkdm);
-	pwrdm_state_switch_nolock(clkdm->pwrdm.ptr);
-	pwrdm_unlock(clkdm->pwrdm.ptr);
-
-	pr_debug("clockdomain: %s: disabled\n", clkdm->name);
-
-	return 0;
+	return clkdm_clk_disable(clkdm, NULL);
 }
 
 /**
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 6316da3623b3..223b37c48389 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -352,7 +352,6 @@ void omap_pcs_legacy_init(int irq, void (*rearm)(void));
 struct omap_sdrc_params;
 extern void omap_sdrc_init(struct omap_sdrc_params *sdrc_cs0,
 				      struct omap_sdrc_params *sdrc_cs1);
-struct omap2_hsmmc_info;
 extern void omap_reserve(void);
 
 struct omap_hwmod;
diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c
index c84b5e260617..73338cf80d76 100644
--- a/arch/arm/mach-omap2/control.c
+++ b/arch/arm/mach-omap2/control.c
@@ -684,7 +684,7 @@ static u32 am33xx_control_vals[ARRAY_SIZE(am43xx_control_reg_offsets)];
  *
  * Save the wkup domain registers
  */
-void am43xx_control_save_context(void)
+static void am43xx_control_save_context(void)
 {
 	int i;
 
@@ -698,7 +698,7 @@ void am43xx_control_save_context(void)
  *
  * Restore the wkup domain registers
  */
-void am43xx_control_restore_context(void)
+static void am43xx_control_restore_context(void)
 {
 	int i;
 
diff --git a/arch/arm/mach-omap2/control.h b/arch/arm/mach-omap2/control.h
index 393b42110511..eceb4b09adb2 100644
--- a/arch/arm/mach-omap2/control.h
+++ b/arch/arm/mach-omap2/control.h
@@ -195,6 +195,7 @@
 #define OMAP44XX_CONTROL_FUSE_MPU_OPP100	0x243
 #define OMAP44XX_CONTROL_FUSE_MPU_OPPTURBO	0x246
 #define OMAP44XX_CONTROL_FUSE_MPU_OPPNITRO	0x249
+#define OMAP44XX_CONTROL_FUSE_MPU_OPPNITROSB	0x24C
 #define OMAP44XX_CONTROL_FUSE_CORE_OPP50	0x254
 #define OMAP44XX_CONTROL_FUSE_CORE_OPP100	0x257
 #define OMAP44XX_CONTROL_FUSE_CORE_OPP100OV	0x25A
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 439e143cad7b..46012ca812f4 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -265,6 +265,7 @@ static int __init omapdss_init_of(void)
 	r = of_platform_populate(node, NULL, NULL, &pdev->dev);
 	if (r) {
 		pr_err("Unable to populate DSS submodule devices\n");
+		put_device(&pdev->dev);
 		return r;
 	}
 
diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
deleted file mode 100644
index 63423ea6a240..000000000000
--- a/arch/arm/mach-omap2/hsmmc.c
+++ /dev/null
@@ -1,171 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/arm/mach-omap2/hsmmc.c
- *
- * Copyright (C) 2007-2008 Texas Instruments
- * Copyright (C) 2008 Nokia Corporation
- * Author: Texas Instruments
- */
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/delay.h>
-#include <linux/mmc/host.h>
-#include <linux/platform_data/hsmmc-omap.h>
-
-#include "soc.h"
-#include "omap_device.h"
-
-#include "hsmmc.h"
-#include "control.h"
-
-#if IS_ENABLED(CONFIG_MMC_OMAP_HS)
-
-static u16 control_pbias_offset;
-static u16 control_devconf1_offset;
-
-#define HSMMC_NAME_LEN	9
-
-static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
-					struct omap_hsmmc_platform_data *mmc)
-{
-	char *hc_name;
-
-	hc_name = kzalloc(HSMMC_NAME_LEN + 1, GFP_KERNEL);
-	if (!hc_name)
-		return -ENOMEM;
-
-	snprintf(hc_name, (HSMMC_NAME_LEN + 1), "mmc%islot%i", c->mmc, 1);
-	mmc->name = hc_name;
-	mmc->caps = c->caps;
-	mmc->reg_offset = 0;
-
-	return 0;
-}
-
-static int omap_hsmmc_done;
-
-void omap_hsmmc_late_init(struct omap2_hsmmc_info *c)
-{
-	struct platform_device *pdev;
-	int res;
-
-	if (omap_hsmmc_done)
-		return;
-
-	omap_hsmmc_done = 1;
-
-	for (; c->mmc; c++) {
-		pdev = c->pdev;
-		if (!pdev)
-			continue;
-		res = omap_device_register(pdev);
-		if (res)
-			pr_err("Could not late init MMC\n");
-	}
-}
-
-#define MAX_OMAP_MMC_HWMOD_NAME_LEN		16
-
-static void __init omap_hsmmc_init_one(struct omap2_hsmmc_info *hsmmcinfo,
-					int ctrl_nr)
-{
-	struct omap_hwmod *oh;
-	struct omap_hwmod *ohs[1];
-	struct omap_device *od;
-	struct platform_device *pdev;
-	char oh_name[MAX_OMAP_MMC_HWMOD_NAME_LEN];
-	struct omap_hsmmc_platform_data *mmc_data;
-	struct omap_hsmmc_dev_attr *mmc_dev_attr;
-	char *name;
-	int res;
-
-	mmc_data = kzalloc(sizeof(*mmc_data), GFP_KERNEL);
-	if (!mmc_data)
-		return;
-
-	res = omap_hsmmc_pdata_init(hsmmcinfo, mmc_data);
-	if (res < 0)
-		goto free_mmc;
-
-	name = "omap_hsmmc";
-	res = snprintf(oh_name, MAX_OMAP_MMC_HWMOD_NAME_LEN,
-		     "mmc%d", ctrl_nr);
-	WARN(res >= MAX_OMAP_MMC_HWMOD_NAME_LEN,
-	     "String buffer overflow in MMC%d device setup\n", ctrl_nr);
-
-	oh = omap_hwmod_lookup(oh_name);
-	if (!oh) {
-		pr_err("Could not look up %s\n", oh_name);
-		goto free_name;
-	}
-	ohs[0] = oh;
-	if (oh->dev_attr != NULL) {
-		mmc_dev_attr = oh->dev_attr;
-		mmc_data->controller_flags = mmc_dev_attr->flags;
-	}
-
-	pdev = platform_device_alloc(name, ctrl_nr - 1);
-	if (!pdev) {
-		pr_err("Could not allocate pdev for %s\n", name);
-		goto free_name;
-	}
-	dev_set_name(&pdev->dev, "%s.%d", pdev->name, pdev->id);
-
-	od = omap_device_alloc(pdev, ohs, 1);
-	if (IS_ERR(od)) {
-		pr_err("Could not allocate od for %s\n", name);
-		goto put_pdev;
-	}
-
-	res = platform_device_add_data(pdev, mmc_data,
-			      sizeof(struct omap_hsmmc_platform_data));
-	if (res) {
-		pr_err("Could not add pdata for %s\n", name);
-		goto put_pdev;
-	}
-
-	hsmmcinfo->pdev = pdev;
-
-	res = omap_device_register(pdev);
-	if (res) {
-		pr_err("Could not register od for %s\n", name);
-		goto free_od;
-	}
-
-	goto free_mmc;
-
-free_od:
-	omap_device_delete(od);
-
-put_pdev:
-	platform_device_put(pdev);
-
-free_name:
-	kfree(mmc_data->name);
-
-free_mmc:
-	kfree(mmc_data);
-}
-
-void __init omap_hsmmc_init(struct omap2_hsmmc_info *controllers)
-{
-	if (omap_hsmmc_done)
-		return;
-
-	omap_hsmmc_done = 1;
-
-	if (cpu_is_omap2430()) {
-		control_pbias_offset = OMAP243X_CONTROL_PBIAS_LITE;
-		control_devconf1_offset = OMAP243X_CONTROL_DEVCONF1;
-	} else {
-		control_pbias_offset = OMAP343X_CONTROL_PBIAS_LITE;
-		control_devconf1_offset = OMAP343X_CONTROL_DEVCONF1;
-	}
-
-	for (; controllers->mmc; controllers++)
-		omap_hsmmc_init_one(controllers, controllers->mmc);
-
-}
-
-#endif
diff --git a/arch/arm/mach-omap2/hsmmc.h b/arch/arm/mach-omap2/hsmmc.h
deleted file mode 100644
index 76c5ed2afa72..000000000000
--- a/arch/arm/mach-omap2/hsmmc.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * MMC definitions for OMAP2
- */
-
-struct mmc_card;
-
-struct omap2_hsmmc_info {
-	u8	mmc;		/* controller 1/2/3 */
-	u32	caps;		/* 4/8 wires and any additional host
-				 * capabilities OR'd (ref. linux/mmc/host.h) */
-	struct platform_device *pdev;	/* mmc controller instance */
-	/* init some special card */
-	void (*init_card)(struct mmc_card *card);
-};
-
-#if IS_ENABLED(CONFIG_MMC_OMAP_HS)
-
-void omap_hsmmc_init(struct omap2_hsmmc_info *);
-void omap_hsmmc_late_init(struct omap2_hsmmc_info *);
-
-#else
-
-static inline void omap_hsmmc_init(struct omap2_hsmmc_info *info)
-{
-}
-
-static inline void omap_hsmmc_late_init(struct omap2_hsmmc_info *info)
-{
-}
-
-#endif
diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
index 2d8f90546591..67fa28532a3a 100644
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -227,7 +227,6 @@ int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state)
 {
 	struct omap4_cpu_pm_info *pm_info = &per_cpu(omap4_pm_info, cpu);
 	unsigned int save_state = 0, cpu_logic_state = PWRDM_POWER_RET;
-	unsigned int wakeup_cpu;
 
 	if (omap_rev() == OMAP4430_REV_ES1_0)
 		return -ENXIO;
@@ -292,7 +291,6 @@ int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state)
 	 * secure devices, CPUx does WFI which can result in
 	 * domain transition
 	 */
-	wakeup_cpu = smp_processor_id();
 	pwrdm_set_next_pwrst(pm_info->pwrdm, PWRDM_POWER_ON);
 
 	pwrdm_post_transition(NULL);
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 3acb4192918d..1d55602b3f8f 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -119,11 +119,7 @@ static void _add_hwmod_clocks_clkdev(struct omap_device *od,
 
 /**
  * omap_device_build_from_dt - build an omap_device with multiple hwmods
- * @pdev_name: name of the platform_device driver to use
- * @pdev_id: this platform_device's connection ID
- * @oh: ptr to the single omap_hwmod that backs this omap_device
- * @pdata: platform_data ptr to associate with the platform_device
- * @pdata_len: amount of memory pointed to by @pdata
+ * @pdev: The platform device to update.
  *
  * Function for building an omap_device already registered from device-tree
  *
@@ -292,7 +288,7 @@ static int _omap_device_idle_hwmods(struct omap_device *od)
 
 /**
  * omap_device_get_context_loss_count - get lost context count
- * @od: struct omap_device *
+ * @pdev: The platform device to update.
  *
  * Using the primary hwmod, query the context loss count for this
  * device.
@@ -321,9 +317,8 @@ int omap_device_get_context_loss_count(struct platform_device *pdev)
 /**
  * omap_device_alloc - allocate an omap_device
  * @pdev: platform_device that will be included in this omap_device
- * @oh: ptr to the single omap_hwmod that backs this omap_device
- * @pdata: platform_data ptr to associate with the platform_device
- * @pdata_len: amount of memory pointed to by @pdata
+ * @ohs: ptr to the omap_hwmod for this omap_device
+ * @oh_cnt: the size of the ohs list
  *
  * Convenience function for allocating an omap_device structure and filling
  * hwmods, and resources.
@@ -649,7 +644,7 @@ struct dev_pm_domain omap_device_pm_domain = {
 
 /**
  * omap_device_register - register an omap_device with one omap_hwmod
- * @od: struct omap_device * to register
+ * @pdev: the platform device (omap_device) to register.
  *
  * Register the omap_device structure.  This currently just calls
  * platform_device_register() on the underlying platform_device.
@@ -668,7 +663,7 @@ int omap_device_register(struct platform_device *pdev)
 
 /**
  * omap_device_enable - fully activate an omap_device
- * @od: struct omap_device * to activate
+ * @pdev: the platform device to activate
  *
  * Do whatever is necessary for the hwmods underlying omap_device @od
  * to be accessible and ready to operate.  This generally involves
@@ -702,7 +697,7 @@ int omap_device_enable(struct platform_device *pdev)
 
 /**
  * omap_device_idle - idle an omap_device
- * @od: struct omap_device * to idle
+ * @pdev: The platform_device (omap_device) to idle
  *
  * Idle omap_device @od.  Device drivers call this function indirectly
  * via pm_runtime_put*().  Returns -EINVAL if the omap_device is not
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 203664c40d3d..a136788db839 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -623,39 +623,6 @@ static int _enable_wakeup(struct omap_hwmod *oh, u32 *v)
 	return 0;
 }
 
-/**
- * _disable_wakeup: clear OCP_SYSCONFIG.ENAWAKEUP bit in the hardware
- * @oh: struct omap_hwmod *
- *
- * Prevent the hardware module @oh to send wakeups.  Returns -EINVAL
- * upon error or 0 upon success.
- */
-static int _disable_wakeup(struct omap_hwmod *oh, u32 *v)
-{
-	if (!oh->class->sysc ||
-	    !((oh->class->sysc->sysc_flags & SYSC_HAS_ENAWAKEUP) ||
-	      (oh->class->sysc->idlemodes & SIDLE_SMART_WKUP) ||
-	      (oh->class->sysc->idlemodes & MSTANDBY_SMART_WKUP)))
-		return -EINVAL;
-
-	if (!oh->class->sysc->sysc_fields) {
-		WARN(1, "omap_hwmod: %s: offset struct for sysconfig not provided in class\n", oh->name);
-		return -EINVAL;
-	}
-
-	if (oh->class->sysc->sysc_flags & SYSC_HAS_ENAWAKEUP)
-		*v &= ~(0x1 << oh->class->sysc->sysc_fields->enwkup_shift);
-
-	if (oh->class->sysc->idlemodes & SIDLE_SMART_WKUP)
-		_set_slave_idlemode(oh, HWMOD_IDLEMODE_SMART, v);
-	if (oh->class->sysc->idlemodes & MSTANDBY_SMART_WKUP)
-		_set_master_standbymode(oh, HWMOD_IDLEMODE_SMART, v);
-
-	/* XXX test pwrdm_get_wken for this hwmod's subsystem */
-
-	return 0;
-}
-
 static struct clockdomain *_get_clkdm(struct omap_hwmod *oh)
 {
 	struct clk_hw_omap *clk;
@@ -3868,70 +3835,6 @@ void __iomem *omap_hwmod_get_mpu_rt_va(struct omap_hwmod *oh)
  */
 
 /**
- * omap_hwmod_enable_wakeup - allow device to wake up the system
- * @oh: struct omap_hwmod *
- *
- * Sets the module OCP socket ENAWAKEUP bit to allow the module to
- * send wakeups to the PRCM, and enable I/O ring wakeup events for
- * this IP block if it has dynamic mux entries.  Eventually this
- * should set PRCM wakeup registers to cause the PRCM to receive
- * wakeup events from the module.  Does not set any wakeup routing
- * registers beyond this point - if the module is to wake up any other
- * module or subsystem, that must be set separately.  Called by
- * omap_device code.  Returns -EINVAL on error or 0 upon success.
- */
-int omap_hwmod_enable_wakeup(struct omap_hwmod *oh)
-{
-	unsigned long flags;
-	u32 v;
-
-	spin_lock_irqsave(&oh->_lock, flags);
-
-	if (oh->class->sysc &&
-	    (oh->class->sysc->sysc_flags & SYSC_HAS_ENAWAKEUP)) {
-		v = oh->_sysc_cache;
-		_enable_wakeup(oh, &v);
-		_write_sysconfig(v, oh);
-	}
-
-	spin_unlock_irqrestore(&oh->_lock, flags);
-
-	return 0;
-}
-
-/**
- * omap_hwmod_disable_wakeup - prevent device from waking the system
- * @oh: struct omap_hwmod *
- *
- * Clears the module OCP socket ENAWAKEUP bit to prevent the module
- * from sending wakeups to the PRCM, and disable I/O ring wakeup
- * events for this IP block if it has dynamic mux entries.  Eventually
- * this should clear PRCM wakeup registers to cause the PRCM to ignore
- * wakeup events from the module.  Does not set any wakeup routing
- * registers beyond this point - if the module is to wake up any other
- * module or subsystem, that must be set separately.  Called by
- * omap_device code.  Returns -EINVAL on error or 0 upon success.
- */
-int omap_hwmod_disable_wakeup(struct omap_hwmod *oh)
-{
-	unsigned long flags;
-	u32 v;
-
-	spin_lock_irqsave(&oh->_lock, flags);
-
-	if (oh->class->sysc &&
-	    (oh->class->sysc->sysc_flags & SYSC_HAS_ENAWAKEUP)) {
-		v = oh->_sysc_cache;
-		_disable_wakeup(oh, &v);
-		_write_sysconfig(v, oh);
-	}
-
-	spin_unlock_irqrestore(&oh->_lock, flags);
-
-	return 0;
-}
-
-/**
  * omap_hwmod_assert_hardreset - assert the HW reset line of submodules
  * contained in the hwmod module.
  * @oh: struct omap_hwmod *
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index ef1bb08b1a2d..2d0fd99d4713 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -646,9 +646,6 @@ int omap_hwmod_get_resource_byname(struct omap_hwmod *oh, unsigned int type,
 struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh);
 void __iomem *omap_hwmod_get_mpu_rt_va(struct omap_hwmod *oh);
 
-int omap_hwmod_enable_wakeup(struct omap_hwmod *oh);
-int omap_hwmod_disable_wakeup(struct omap_hwmod *oh);
-
 int omap_hwmod_for_each_by_class(const char *classname,
 				 int (*fn)(struct omap_hwmod *oh,
 					   void *user),
diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_common_data.h b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_common_data.h
index 3de3d7a115b3..26e13d4fa19c 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_common_data.h
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_common_data.h
@@ -35,10 +35,7 @@ extern struct omap_hwmod_ocp_if am33xx_l4_ls__epwmss0;
 extern struct omap_hwmod_ocp_if am33xx_l4_ls__epwmss1;
 extern struct omap_hwmod_ocp_if am33xx_l4_ls__epwmss2;
 extern struct omap_hwmod_ocp_if am33xx_l3_s__gpmc;
-extern struct omap_hwmod_ocp_if am33xx_l4_per__mailbox;
 extern struct omap_hwmod_ocp_if am33xx_l4_ls__spinlock;
-extern struct omap_hwmod_ocp_if am33xx_l4_ls__mcasp0;
-extern struct omap_hwmod_ocp_if am33xx_l4_ls__mcasp1;
 extern struct omap_hwmod_ocp_if am33xx_l4_ls__mcspi0;
 extern struct omap_hwmod_ocp_if am33xx_l4_ls__mcspi1;
 extern struct omap_hwmod_ocp_if am33xx_l4_ls__timer2;
@@ -54,7 +51,6 @@ extern struct omap_hwmod_ocp_if am33xx_l3_main__tptc2;
 extern struct omap_hwmod_ocp_if am33xx_l3_main__ocmc;
 extern struct omap_hwmod_ocp_if am33xx_l3_main__sha0;
 extern struct omap_hwmod_ocp_if am33xx_l3_main__aes0;
-extern struct omap_hwmod_ocp_if am33xx_l4_per__rng;
 
 extern struct omap_hwmod am33xx_l3_main_hwmod;
 extern struct omap_hwmod am33xx_l3_s_hwmod;
@@ -67,7 +63,6 @@ extern struct omap_hwmod am33xx_gfx_hwmod;
 extern struct omap_hwmod am33xx_prcm_hwmod;
 extern struct omap_hwmod am33xx_aes0_hwmod;
 extern struct omap_hwmod am33xx_sha0_hwmod;
-extern struct omap_hwmod am33xx_rng_hwmod;
 extern struct omap_hwmod am33xx_ocmcram_hwmod;
 extern struct omap_hwmod am33xx_smartreflex0_hwmod;
 extern struct omap_hwmod am33xx_smartreflex1_hwmod;
@@ -78,9 +73,6 @@ extern struct omap_hwmod am33xx_epwmss0_hwmod;
 extern struct omap_hwmod am33xx_epwmss1_hwmod;
 extern struct omap_hwmod am33xx_epwmss2_hwmod;
 extern struct omap_hwmod am33xx_gpmc_hwmod;
-extern struct omap_hwmod am33xx_mailbox_hwmod;
-extern struct omap_hwmod am33xx_mcasp0_hwmod;
-extern struct omap_hwmod am33xx_mcasp1_hwmod;
 extern struct omap_hwmod am33xx_rtc_hwmod;
 extern struct omap_hwmod am33xx_spi0_hwmod;
 extern struct omap_hwmod am33xx_spi1_hwmod;
@@ -96,7 +88,6 @@ extern struct omap_hwmod am33xx_tpcc_hwmod;
 extern struct omap_hwmod am33xx_tptc0_hwmod;
 extern struct omap_hwmod am33xx_tptc1_hwmod;
 extern struct omap_hwmod am33xx_tptc2_hwmod;
-extern struct omap_hwmod am33xx_wd_timer1_hwmod;
 
 extern struct omap_hwmod_class am33xx_emif_hwmod_class;
 extern struct omap_hwmod_class am33xx_l4_hwmod_class;
diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_interconnect_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_interconnect_data.c
index 63698ffa6d27..7123c455aaa9 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_interconnect_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_interconnect_data.c
@@ -158,14 +158,6 @@ struct omap_hwmod_ocp_if am33xx_l3_s__gpmc = {
 	.user		= OCP_USER_MPU,
 };
 
-/* l4 ls -> mailbox */
-struct omap_hwmod_ocp_if am33xx_l4_per__mailbox = {
-	.master		= &am33xx_l4_ls_hwmod,
-	.slave		= &am33xx_mailbox_hwmod,
-	.clk		= "l4ls_gclk",
-	.user		= OCP_USER_MPU,
-};
-
 /* l4 ls -> spinlock */
 struct omap_hwmod_ocp_if am33xx_l4_ls__spinlock = {
 	.master		= &am33xx_l4_ls_hwmod,
@@ -174,22 +166,6 @@ struct omap_hwmod_ocp_if am33xx_l4_ls__spinlock = {
 	.user		= OCP_USER_MPU,
 };
 
-/* l4 ls -> mcasp0 */
-struct omap_hwmod_ocp_if am33xx_l4_ls__mcasp0 = {
-	.master		= &am33xx_l4_ls_hwmod,
-	.slave		= &am33xx_mcasp0_hwmod,
-	.clk		= "l4ls_gclk",
-	.user		= OCP_USER_MPU,
-};
-
-/* l4 ls -> mcasp1 */
-struct omap_hwmod_ocp_if am33xx_l4_ls__mcasp1 = {
-	.master		= &am33xx_l4_ls_hwmod,
-	.slave		= &am33xx_mcasp1_hwmod,
-	.clk		= "l4ls_gclk",
-	.user		= OCP_USER_MPU,
-};
-
 /* l4 ls -> mcspi0 */
 struct omap_hwmod_ocp_if am33xx_l4_ls__mcspi0 = {
 	.master		= &am33xx_l4_ls_hwmod,
@@ -308,11 +284,3 @@ struct omap_hwmod_ocp_if am33xx_l3_main__aes0 = {
 	.clk		= "aes0_fck",
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
-
-/* l4 per -> rng */
-struct omap_hwmod_ocp_if am33xx_l4_per__rng = {
-	.master		= &am33xx_l4_ls_hwmod,
-	.slave		= &am33xx_rng_hwmod,
-	.clk		= "rng_fck",
-	.user		= OCP_USER_MPU,
-};
diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
index 29fd13684a68..2df8659612ef 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
@@ -17,7 +17,6 @@
 #include <linux/types.h>
 
 #include "omap_hwmod.h"
-#include "wd_timer.h"
 #include "cm33xx.h"
 #include "prm33xx.h"
 #include "omap_hwmod_33xx_43xx_common_data.h"
@@ -266,33 +265,6 @@ struct omap_hwmod am33xx_sha0_hwmod = {
 	},
 };
 
-/* rng */
-static struct omap_hwmod_class_sysconfig am33xx_rng_sysc = {
-	.rev_offs	= 0x1fe0,
-	.sysc_offs	= 0x1fe4,
-	.sysc_flags	= SYSC_HAS_AUTOIDLE | SYSC_HAS_SIDLEMODE,
-	.idlemodes	= SIDLE_FORCE | SIDLE_NO,
-	.sysc_fields	= &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class am33xx_rng_hwmod_class = {
-	.name		= "rng",
-	.sysc		= &am33xx_rng_sysc,
-};
-
-struct omap_hwmod am33xx_rng_hwmod = {
-	.name		= "rng",
-	.class		= &am33xx_rng_hwmod_class,
-	.clkdm_name	= "l4ls_clkdm",
-	.flags		= HWMOD_SWSUP_SIDLE,
-	.main_clk	= "rng_fck",
-	.prcm		= {
-		.omap4	= {
-			.modulemode	= MODULEMODE_SWCTRL,
-		},
-	},
-};
-
 /* ocmcram */
 static struct omap_hwmod_class am33xx_ocmcram_hwmod_class = {
 	.name = "ocmcram",
@@ -466,86 +438,6 @@ struct omap_hwmod am33xx_epwmss2_hwmod = {
 	},
 };
 
-/*
- * 'gpio' class: for gpio 0,1,2,3
- */
-static struct omap_hwmod_class_sysconfig am33xx_gpio_sysc = {
-	.rev_offs	= 0x0000,
-	.sysc_offs	= 0x0010,
-	.syss_offs	= 0x0114,
-	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_ENAWAKEUP |
-			  SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET |
-			  SYSS_HAS_RESET_STATUS),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-			  SIDLE_SMART_WKUP),
-	.sysc_fields	= &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class am33xx_gpio_hwmod_class = {
-	.name		= "gpio",
-	.sysc		= &am33xx_gpio_sysc,
-};
-
-/* gpio1 */
-static struct omap_hwmod_opt_clk gpio1_opt_clks[] = {
-	{ .role = "dbclk", .clk = "gpio1_dbclk" },
-};
-
-static struct omap_hwmod am33xx_gpio1_hwmod = {
-	.name		= "gpio2",
-	.class		= &am33xx_gpio_hwmod_class,
-	.clkdm_name	= "l4ls_clkdm",
-	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
-	.main_clk	= "l4ls_gclk",
-	.prcm		= {
-		.omap4	= {
-			.modulemode	= MODULEMODE_SWCTRL,
-		},
-	},
-	.opt_clks	= gpio1_opt_clks,
-	.opt_clks_cnt	= ARRAY_SIZE(gpio1_opt_clks),
-};
-
-/* gpio2 */
-static struct omap_hwmod_opt_clk gpio2_opt_clks[] = {
-	{ .role = "dbclk", .clk = "gpio2_dbclk" },
-};
-
-static struct omap_hwmod am33xx_gpio2_hwmod = {
-	.name		= "gpio3",
-	.class		= &am33xx_gpio_hwmod_class,
-	.clkdm_name	= "l4ls_clkdm",
-	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
-	.main_clk	= "l4ls_gclk",
-	.prcm		= {
-		.omap4	= {
-			.modulemode	= MODULEMODE_SWCTRL,
-		},
-	},
-	.opt_clks	= gpio2_opt_clks,
-	.opt_clks_cnt	= ARRAY_SIZE(gpio2_opt_clks),
-};
-
-/* gpio3 */
-static struct omap_hwmod_opt_clk gpio3_opt_clks[] = {
-	{ .role = "dbclk", .clk = "gpio3_dbclk" },
-};
-
-static struct omap_hwmod am33xx_gpio3_hwmod = {
-	.name		= "gpio4",
-	.class		= &am33xx_gpio_hwmod_class,
-	.clkdm_name	= "l4ls_clkdm",
-	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
-	.main_clk	= "l4ls_gclk",
-	.prcm		= {
-		.omap4	= {
-			.modulemode	= MODULEMODE_SWCTRL,
-		},
-	},
-	.opt_clks	= gpio3_opt_clks,
-	.opt_clks_cnt	= ARRAY_SIZE(gpio3_opt_clks),
-};
-
 /* gpmc */
 static struct omap_hwmod_class_sysconfig gpmc_sysc = {
 	.rev_offs	= 0x0,
@@ -576,78 +468,6 @@ struct omap_hwmod am33xx_gpmc_hwmod = {
 	},
 };
 
-/*
- * 'mailbox' class
- * mailbox module allowing communication between the on-chip processors using a
- * queued mailbox-interrupt mechanism.
- */
-static struct omap_hwmod_class_sysconfig am33xx_mailbox_sysc = {
-	.rev_offs	= 0x0000,
-	.sysc_offs	= 0x0010,
-	.sysc_flags	= (SYSC_HAS_RESET_STATUS | SYSC_HAS_SIDLEMODE |
-			  SYSC_HAS_SOFTRESET),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
-	.sysc_fields	= &omap_hwmod_sysc_type2,
-};
-
-static struct omap_hwmod_class am33xx_mailbox_hwmod_class = {
-	.name	= "mailbox",
-	.sysc	= &am33xx_mailbox_sysc,
-};
-
-struct omap_hwmod am33xx_mailbox_hwmod = {
-	.name		= "mailbox",
-	.class		= &am33xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4ls_clkdm",
-	.main_clk	= "l4ls_gclk",
-	.prcm = {
-		.omap4 = {
-			.modulemode	= MODULEMODE_SWCTRL,
-		},
-	},
-};
-
-/*
- * 'mcasp' class
- */
-static struct omap_hwmod_class_sysconfig am33xx_mcasp_sysc = {
-	.rev_offs	= 0x0,
-	.sysc_offs	= 0x4,
-	.sysc_flags	= SYSC_HAS_SIDLEMODE,
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
-	.sysc_fields	= &omap_hwmod_sysc_type3,
-};
-
-static struct omap_hwmod_class am33xx_mcasp_hwmod_class = {
-	.name		= "mcasp",
-	.sysc		= &am33xx_mcasp_sysc,
-};
-
-/* mcasp0 */
-struct omap_hwmod am33xx_mcasp0_hwmod = {
-	.name		= "mcasp0",
-	.class		= &am33xx_mcasp_hwmod_class,
-	.clkdm_name	= "l3s_clkdm",
-	.main_clk	= "mcasp0_fck",
-	.prcm		= {
-		.omap4	= {
-			.modulemode	= MODULEMODE_SWCTRL,
-		},
-	},
-};
-
-/* mcasp1 */
-struct omap_hwmod am33xx_mcasp1_hwmod = {
-	.name		= "mcasp1",
-	.class		= &am33xx_mcasp_hwmod_class,
-	.clkdm_name	= "l3s_clkdm",
-	.main_clk	= "mcasp1_fck",
-	.prcm		= {
-		.omap4	= {
-			.modulemode	= MODULEMODE_SWCTRL,
-		},
-	},
-};
 
 /*
  * 'rtc' class
@@ -950,41 +770,6 @@ struct omap_hwmod am33xx_tptc2_hwmod = {
 	},
 };
 
-/* 'wd_timer' class */
-static struct omap_hwmod_class_sysconfig wdt_sysc = {
-	.rev_offs	= 0x0,
-	.sysc_offs	= 0x10,
-	.syss_offs	= 0x14,
-	.sysc_flags	= (SYSC_HAS_EMUFREE | SYSC_HAS_SIDLEMODE |
-			SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-			SIDLE_SMART_WKUP),
-	.sysc_fields	= &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class am33xx_wd_timer_hwmod_class = {
-	.name		= "wd_timer",
-	.sysc		= &wdt_sysc,
-	.pre_shutdown	= &omap2_wd_timer_disable,
-};
-
-/*
- * XXX: device.c file uses hardcoded name for watchdog timer
- * driver "wd_timer2, so we are also using same name as of now...
- */
-struct omap_hwmod am33xx_wd_timer1_hwmod = {
-	.name		= "wd_timer2",
-	.class		= &am33xx_wd_timer_hwmod_class,
-	.clkdm_name	= "l4_wkup_clkdm",
-	.flags		= HWMOD_SWSUP_SIDLE,
-	.main_clk	= "wdt1_fck",
-	.prcm		= {
-		.omap4	= {
-			.modulemode	= MODULEMODE_SWCTRL,
-		},
-	},
-};
-
 static void omap_hwmod_am33xx_clkctrl(void)
 {
 	CLKCTRL(am33xx_dcan0_hwmod, AM33XX_CM_PER_DCAN0_CLKCTRL_OFFSET);
@@ -993,12 +778,6 @@ static void omap_hwmod_am33xx_clkctrl(void)
 	CLKCTRL(am33xx_epwmss0_hwmod, AM33XX_CM_PER_EPWMSS0_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_epwmss1_hwmod, AM33XX_CM_PER_EPWMSS1_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_epwmss2_hwmod, AM33XX_CM_PER_EPWMSS2_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_gpio1_hwmod, AM33XX_CM_PER_GPIO1_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_gpio2_hwmod, AM33XX_CM_PER_GPIO2_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_gpio3_hwmod, AM33XX_CM_PER_GPIO3_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_mailbox_hwmod, AM33XX_CM_PER_MAILBOX0_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_mcasp0_hwmod, AM33XX_CM_PER_MCASP0_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_mcasp1_hwmod, AM33XX_CM_PER_MCASP1_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_spi0_hwmod, AM33XX_CM_PER_SPI0_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_spi1_hwmod, AM33XX_CM_PER_SPI1_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_spinlock_hwmod, AM33XX_CM_PER_SPINLOCK_CLKCTRL_OFFSET);
@@ -1013,7 +792,6 @@ static void omap_hwmod_am33xx_clkctrl(void)
 	CLKCTRL(am33xx_smartreflex1_hwmod,
 		AM33XX_CM_WKUP_SMARTREFLEX1_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_timer1_hwmod, AM33XX_CM_WKUP_TIMER1_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_wd_timer1_hwmod, AM33XX_CM_WKUP_WDT1_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_rtc_hwmod, AM33XX_CM_RTC_RTC_CLKCTRL_OFFSET);
 	PRCM_FLAGS(am33xx_rtc_hwmod, HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_gpmc_hwmod, AM33XX_CM_PER_GPMC_CLKCTRL_OFFSET);
@@ -1031,7 +809,6 @@ static void omap_hwmod_am33xx_clkctrl(void)
 	CLKCTRL(am33xx_ocmcram_hwmod , AM33XX_CM_PER_OCMCRAM_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_sha0_hwmod , AM33XX_CM_PER_SHA0_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_aes0_hwmod , AM33XX_CM_PER_AES0_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_rng_hwmod, AM33XX_CM_PER_RNG_CLKCTRL_OFFSET);
 }
 
 static void omap_hwmod_am33xx_rst(void)
@@ -1055,12 +832,6 @@ static void omap_hwmod_am43xx_clkctrl(void)
 	CLKCTRL(am33xx_epwmss0_hwmod, AM43XX_CM_PER_EPWMSS0_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_epwmss1_hwmod, AM43XX_CM_PER_EPWMSS1_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_epwmss2_hwmod, AM43XX_CM_PER_EPWMSS2_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_gpio1_hwmod, AM43XX_CM_PER_GPIO1_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_gpio2_hwmod, AM43XX_CM_PER_GPIO2_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_gpio3_hwmod, AM43XX_CM_PER_GPIO3_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_mailbox_hwmod, AM43XX_CM_PER_MAILBOX0_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_mcasp0_hwmod, AM43XX_CM_PER_MCASP0_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_mcasp1_hwmod, AM43XX_CM_PER_MCASP1_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_spi0_hwmod, AM43XX_CM_PER_SPI0_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_spi1_hwmod, AM43XX_CM_PER_SPI1_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_spinlock_hwmod, AM43XX_CM_PER_SPINLOCK_CLKCTRL_OFFSET);
@@ -1075,7 +846,6 @@ static void omap_hwmod_am43xx_clkctrl(void)
 	CLKCTRL(am33xx_smartreflex1_hwmod,
 		AM43XX_CM_WKUP_SMARTREFLEX1_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_timer1_hwmod, AM43XX_CM_WKUP_TIMER1_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_wd_timer1_hwmod, AM43XX_CM_WKUP_WDT1_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_rtc_hwmod, AM43XX_CM_RTC_RTC_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_gpmc_hwmod, AM43XX_CM_PER_GPMC_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_l4_ls_hwmod, AM43XX_CM_PER_L4LS_CLKCTRL_OFFSET);
@@ -1092,7 +862,6 @@ static void omap_hwmod_am43xx_clkctrl(void)
 	CLKCTRL(am33xx_ocmcram_hwmod , AM43XX_CM_PER_OCMCRAM_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_sha0_hwmod , AM43XX_CM_PER_SHA0_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_aes0_hwmod , AM43XX_CM_PER_AES0_CLKCTRL_OFFSET);
-	CLKCTRL(am33xx_rng_hwmod, AM43XX_CM_PER_RNG_CLKCTRL_OFFSET);
 }
 
 static void omap_hwmod_am43xx_rst(void)
diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
index 54524775f278..c63f66427e46 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
@@ -21,7 +21,6 @@
 #include "cm33xx.h"
 #include "prm33xx.h"
 #include "prm-regbits-33xx.h"
-#include "wd_timer.h"
 #include "omap_hwmod_33xx_43xx_common_data.h"
 
 /*
@@ -257,39 +256,6 @@ static struct omap_hwmod am33xx_lcdc_hwmod = {
 };
 
 /*
- * 'usb_otg' class
- * high-speed on-the-go universal serial bus (usb_otg) controller
- */
-static struct omap_hwmod_class_sysconfig am33xx_usbhsotg_sysc = {
-	.rev_offs	= 0x0,
-	.sysc_offs	= 0x10,
-	.sysc_flags	= (SYSC_HAS_SIDLEMODE | SYSC_HAS_MIDLEMODE),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-			  MSTANDBY_FORCE | MSTANDBY_NO | MSTANDBY_SMART),
-	.sysc_fields	= &omap_hwmod_sysc_type2,
-};
-
-static struct omap_hwmod_class am33xx_usbotg_class = {
-	.name		= "usbotg",
-	.sysc		= &am33xx_usbhsotg_sysc,
-};
-
-static struct omap_hwmod am33xx_usbss_hwmod = {
-	.name		= "usb_otg_hs",
-	.class		= &am33xx_usbotg_class,
-	.clkdm_name	= "l3s_clkdm",
-	.flags		= HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY,
-	.main_clk	= "usbotg_fck",
-	.prcm		= {
-		.omap4	= {
-			.clkctrl_offs	= AM33XX_CM_PER_USB0_CLKCTRL_OFFSET,
-			.modulemode	= MODULEMODE_SWCTRL,
-		},
-	},
-};
-
-
-/*
  * Interfaces
  */
 
@@ -388,24 +354,6 @@ static struct omap_hwmod_ocp_if am33xx_l4_wkup__timer1 = {
 	.user		= OCP_USER_MPU,
 };
 
-/* l4 wkup -> wd_timer1 */
-static struct omap_hwmod_ocp_if am33xx_l4_wkup__wd_timer1 = {
-	.master		= &am33xx_l4_wkup_hwmod,
-	.slave		= &am33xx_wd_timer1_hwmod,
-	.clk		= "dpll_core_m4_div2_ck",
-	.user		= OCP_USER_MPU,
-};
-
-/* usbss */
-/* l3 s -> USBSS interface */
-static struct omap_hwmod_ocp_if am33xx_l3_s__usbss = {
-	.master		= &am33xx_l3_s_hwmod,
-	.slave		= &am33xx_usbss_hwmod,
-	.clk		= "l3s_gclk",
-	.user		= OCP_USER_MPU,
-	.flags		= OCPIF_SWSUP_IDLE,
-};
-
 static struct omap_hwmod_ocp_if *am33xx_hwmod_ocp_ifs[] __initdata = {
 	&am33xx_l3_main__emif,
 	&am33xx_mpu__l3_main,
@@ -428,13 +376,9 @@ static struct omap_hwmod_ocp_if *am33xx_hwmod_ocp_ifs[] __initdata = {
 	&am33xx_l4_wkup__timer1,
 	&am33xx_l4_wkup__rtc,
 	&am33xx_l4_wkup__adc_tsc,
-	&am33xx_l4_wkup__wd_timer1,
 	&am33xx_l4_hs__pruss,
 	&am33xx_l4_per__dcan0,
 	&am33xx_l4_per__dcan1,
-	&am33xx_l4_per__mailbox,
-	&am33xx_l4_ls__mcasp0,
-	&am33xx_l4_ls__mcasp1,
 	&am33xx_l4_ls__timer2,
 	&am33xx_l4_ls__timer3,
 	&am33xx_l4_ls__timer4,
@@ -455,10 +399,8 @@ static struct omap_hwmod_ocp_if *am33xx_hwmod_ocp_ifs[] __initdata = {
 	&am33xx_l3_main__tptc1,
 	&am33xx_l3_main__tptc2,
 	&am33xx_l3_main__ocmc,
-	&am33xx_l3_s__usbss,
 	&am33xx_l3_main__sha0,
 	&am33xx_l3_main__aes0,
-	&am33xx_l4_per__rng,
 	NULL,
 };
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_43xx_data.c b/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
index 5c3db6b6438b..b81f83466c94 100644
--- a/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
@@ -18,8 +18,6 @@
 #include "omap_hwmod_33xx_43xx_common_data.h"
 #include "prcm43xx.h"
 #include "omap_hwmod_common_data.h"
-#include "hdq1w.h"
-
 
 /* IP blocks */
 static struct omap_hwmod am43xx_emif_hwmod = {
@@ -468,32 +466,6 @@ static struct omap_hwmod am43xx_dss_rfbi_hwmod = {
 	.parent_hwmod	= &am43xx_dss_core_hwmod,
 };
 
-/* HDQ1W */
-static struct omap_hwmod_class_sysconfig am43xx_hdq1w_sysc = {
-	.rev_offs       = 0x0000,
-	.sysc_offs      = 0x0014,
-	.syss_offs      = 0x0018,
-	.sysc_flags     = (SYSC_HAS_SOFTRESET | SYSC_HAS_AUTOIDLE),
-	.sysc_fields    = &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class am43xx_hdq1w_hwmod_class = {
-	.name   = "hdq1w",
-	.sysc   = &am43xx_hdq1w_sysc,
-	.reset	= &omap_hdq1w_reset,
-};
-
-static struct omap_hwmod am43xx_hdq1w_hwmod = {
-	.name           = "hdq1w",
-	.class          = &am43xx_hdq1w_hwmod_class,
-	.clkdm_name     = "l4ls_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = AM43XX_CM_PER_HDQ1W_CLKCTRL_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-};
 
 static struct omap_hwmod_class_sysconfig am43xx_vpfe_sysc = {
 	.rev_offs       = 0x0,
@@ -604,13 +576,6 @@ static struct omap_hwmod_ocp_if am43xx_l4_wkup__timer1 = {
 	.user		= OCP_USER_MPU,
 };
 
-static struct omap_hwmod_ocp_if am43xx_l4_wkup__wd_timer1 = {
-	.master		= &am33xx_l4_wkup_hwmod,
-	.slave		= &am33xx_wd_timer1_hwmod,
-	.clk		= "sys_clkin_ck",
-	.user		= OCP_USER_MPU,
-};
-
 static struct omap_hwmod_ocp_if am33xx_l4_wkup__synctimer = {
 	.master		= &am33xx_l4_wkup_hwmod,
 	.slave		= &am43xx_synctimer_hwmod,
@@ -751,13 +716,6 @@ static struct omap_hwmod_ocp_if am43xx_l4_ls__dss_rfbi = {
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-static struct omap_hwmod_ocp_if am43xx_l4_ls__hdq1w = {
-	.master         = &am33xx_l4_ls_hwmod,
-	.slave          = &am43xx_hdq1w_hwmod,
-	.clk            = "l4ls_gclk",
-	.user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 static struct omap_hwmod_ocp_if am43xx_l3__vpfe0 = {
 	.master         = &am43xx_vpfe0_hwmod,
 	.slave          = &am33xx_l3_main_hwmod,
@@ -824,15 +782,10 @@ static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = {
 	&am43xx_l4_wkup__smartreflex0,
 	&am43xx_l4_wkup__smartreflex1,
 	&am43xx_l4_wkup__timer1,
-	&am43xx_l4_wkup__wd_timer1,
 	&am43xx_l4_wkup__adc_tsc,
 	&am43xx_l3_s__qspi,
 	&am33xx_l4_per__dcan0,
 	&am33xx_l4_per__dcan1,
-	&am33xx_l4_per__mailbox,
-	&am33xx_l4_per__rng,
-	&am33xx_l4_ls__mcasp0,
-	&am33xx_l4_ls__mcasp1,
 	&am33xx_l4_ls__timer2,
 	&am33xx_l4_ls__timer3,
 	&am33xx_l4_ls__timer4,
@@ -863,7 +816,6 @@ static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = {
 	&am43xx_l4_ls__dss,
 	&am43xx_l4_ls__dss_dispc,
 	&am43xx_l4_ls__dss_rfbi,
-	&am43xx_l4_ls__hdq1w,
 	&am43xx_l3__vpfe0,
 	&am43xx_l3__vpfe1,
 	&am43xx_l4_ls__vpfe0,
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 28ea2960a9b2..292f544bd62d 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -790,7 +790,7 @@ static struct omap_hwmod_class omap44xx_sha0_hwmod_class = {
 	.sysc		= &omap44xx_sha0_sysc,
 };
 
-struct omap_hwmod omap44xx_sha0_hwmod = {
+static struct omap_hwmod omap44xx_sha0_hwmod = {
 	.name		= "sham",
 	.class		= &omap44xx_sha0_hwmod_class,
 	.clkdm_name	= "l4_secure_clkdm",
@@ -974,7 +974,7 @@ static struct omap_hwmod omap44xx_des_hwmod = {
 	},
 };
 
-struct omap_hwmod_ocp_if omap44xx_l3_main_2__des = {
+static struct omap_hwmod_ocp_if omap44xx_l3_main_2__des = {
 	.master		= &omap44xx_l3_main_2_hwmod,
 	.slave		= &omap44xx_des_hwmod,
 	.clk		= "l3_div_ck",
@@ -1061,40 +1061,6 @@ static struct omap_hwmod omap44xx_gpmc_hwmod = {
 	},
 };
 
-/*
- * 'hdq1w' class
- * hdq / 1-wire serial interface controller
- */
-
-static struct omap_hwmod_class_sysconfig omap44xx_hdq1w_sysc = {
-	.rev_offs	= 0x0000,
-	.sysc_offs	= 0x0014,
-	.syss_offs	= 0x0018,
-	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_SOFTRESET |
-			   SYSS_HAS_RESET_STATUS),
-	.sysc_fields	= &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class omap44xx_hdq1w_hwmod_class = {
-	.name	= "hdq1w",
-	.sysc	= &omap44xx_hdq1w_sysc,
-};
-
-/* hdq1w */
-static struct omap_hwmod omap44xx_hdq1w_hwmod = {
-	.name		= "hdq1w",
-	.class		= &omap44xx_hdq1w_hwmod_class,
-	.clkdm_name	= "l4_per_clkdm",
-	.flags		= HWMOD_INIT_NO_RESET, /* XXX temporary */
-	.main_clk	= "func_12m_fclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP4_CM_L4PER_HDQ1W_CLKCTRL_OFFSET,
-			.context_offs = OMAP4_RM_L4PER_HDQ1W_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-};
 
 /*
  * 'hsi' class
@@ -1288,180 +1254,6 @@ static struct omap_hwmod omap44xx_kbd_hwmod = {
 	},
 };
 
-/*
- * 'mailbox' class
- * mailbox module allowing communication between the on-chip processors using a
- * queued mailbox-interrupt mechanism.
- */
-
-static struct omap_hwmod_class_sysconfig omap44xx_mailbox_sysc = {
-	.rev_offs	= 0x0000,
-	.sysc_offs	= 0x0010,
-	.sysc_flags	= (SYSC_HAS_RESET_STATUS | SYSC_HAS_SIDLEMODE |
-			   SYSC_HAS_SOFTRESET),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
-	.sysc_fields	= &omap_hwmod_sysc_type2,
-};
-
-static struct omap_hwmod_class omap44xx_mailbox_hwmod_class = {
-	.name	= "mailbox",
-	.sysc	= &omap44xx_mailbox_sysc,
-};
-
-/* mailbox */
-static struct omap_hwmod omap44xx_mailbox_hwmod = {
-	.name		= "mailbox",
-	.class		= &omap44xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4_cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP4_CM_L4CFG_MAILBOX_CLKCTRL_OFFSET,
-			.context_offs = OMAP4_RM_L4CFG_MAILBOX_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/*
- * 'mcasp' class
- * multi-channel audio serial port controller
- */
-
-/* The IP is not compliant to type1 / type2 scheme */
-static struct omap_hwmod_class_sysconfig omap44xx_mcasp_sysc = {
-	.rev_offs	= 0,
-	.sysc_offs	= 0x0004,
-	.sysc_flags	= SYSC_HAS_SIDLEMODE,
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-			   SIDLE_SMART_WKUP),
-	.sysc_fields	= &omap_hwmod_sysc_type_mcasp,
-};
-
-static struct omap_hwmod_class omap44xx_mcasp_hwmod_class = {
-	.name	= "mcasp",
-	.sysc	= &omap44xx_mcasp_sysc,
-};
-
-/* mcasp */
-static struct omap_hwmod omap44xx_mcasp_hwmod = {
-	.name		= "mcasp",
-	.class		= &omap44xx_mcasp_hwmod_class,
-	.clkdm_name	= "abe_clkdm",
-	.main_clk	= "func_mcasp_abe_gfclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP4_CM1_ABE_MCASP_CLKCTRL_OFFSET,
-			.context_offs = OMAP4_RM_ABE_MCASP_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-};
-
-/*
- * 'mcbsp' class
- * multi channel buffered serial port controller
- */
-
-static struct omap_hwmod_class_sysconfig omap44xx_mcbsp_sysc = {
-	.rev_offs	= -ENODEV,
-	.sysc_offs	= 0x008c,
-	.sysc_flags	= (SYSC_HAS_CLOCKACTIVITY | SYSC_HAS_ENAWAKEUP |
-			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
-	.sysc_fields	= &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class omap44xx_mcbsp_hwmod_class = {
-	.name	= "mcbsp",
-	.sysc	= &omap44xx_mcbsp_sysc,
-};
-
-/* mcbsp1 */
-static struct omap_hwmod_opt_clk mcbsp1_opt_clks[] = {
-	{ .role = "pad_fck", .clk = "pad_clks_ck" },
-	{ .role = "prcm_fck", .clk = "mcbsp1_sync_mux_ck" },
-};
-
-static struct omap_hwmod omap44xx_mcbsp1_hwmod = {
-	.name		= "mcbsp1",
-	.class		= &omap44xx_mcbsp_hwmod_class,
-	.clkdm_name	= "abe_clkdm",
-	.main_clk	= "func_mcbsp1_gfclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP4_CM1_ABE_MCBSP1_CLKCTRL_OFFSET,
-			.context_offs = OMAP4_RM_ABE_MCBSP1_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-	.opt_clks	= mcbsp1_opt_clks,
-	.opt_clks_cnt	= ARRAY_SIZE(mcbsp1_opt_clks),
-};
-
-/* mcbsp2 */
-static struct omap_hwmod_opt_clk mcbsp2_opt_clks[] = {
-	{ .role = "pad_fck", .clk = "pad_clks_ck" },
-	{ .role = "prcm_fck", .clk = "mcbsp2_sync_mux_ck" },
-};
-
-static struct omap_hwmod omap44xx_mcbsp2_hwmod = {
-	.name		= "mcbsp2",
-	.class		= &omap44xx_mcbsp_hwmod_class,
-	.clkdm_name	= "abe_clkdm",
-	.main_clk	= "func_mcbsp2_gfclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP4_CM1_ABE_MCBSP2_CLKCTRL_OFFSET,
-			.context_offs = OMAP4_RM_ABE_MCBSP2_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-	.opt_clks	= mcbsp2_opt_clks,
-	.opt_clks_cnt	= ARRAY_SIZE(mcbsp2_opt_clks),
-};
-
-/* mcbsp3 */
-static struct omap_hwmod_opt_clk mcbsp3_opt_clks[] = {
-	{ .role = "pad_fck", .clk = "pad_clks_ck" },
-	{ .role = "prcm_fck", .clk = "mcbsp3_sync_mux_ck" },
-};
-
-static struct omap_hwmod omap44xx_mcbsp3_hwmod = {
-	.name		= "mcbsp3",
-	.class		= &omap44xx_mcbsp_hwmod_class,
-	.clkdm_name	= "abe_clkdm",
-	.main_clk	= "func_mcbsp3_gfclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP4_CM1_ABE_MCBSP3_CLKCTRL_OFFSET,
-			.context_offs = OMAP4_RM_ABE_MCBSP3_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-	.opt_clks	= mcbsp3_opt_clks,
-	.opt_clks_cnt	= ARRAY_SIZE(mcbsp3_opt_clks),
-};
-
-/* mcbsp4 */
-static struct omap_hwmod_opt_clk mcbsp4_opt_clks[] = {
-	{ .role = "pad_fck", .clk = "pad_clks_ck" },
-	{ .role = "prcm_fck", .clk = "mcbsp4_sync_mux_ck" },
-};
-
-static struct omap_hwmod omap44xx_mcbsp4_hwmod = {
-	.name		= "mcbsp4",
-	.class		= &omap44xx_mcbsp_hwmod_class,
-	.clkdm_name	= "l4_per_clkdm",
-	.main_clk	= "per_mcbsp4_gfclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP4_CM_L4PER_MCBSP4_CLKCTRL_OFFSET,
-			.context_offs = OMAP4_RM_L4PER_MCBSP4_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-	.opt_clks	= mcbsp4_opt_clks,
-	.opt_clks_cnt	= ARRAY_SIZE(mcbsp4_opt_clks),
-};
 
 /*
  * 'mcpdm' class
@@ -2295,51 +2087,6 @@ static struct omap_hwmod omap44xx_usb_host_hs_hwmod = {
 };
 
 /*
- * 'usb_otg_hs' class
- * high-speed on-the-go universal serial bus (usb_otg_hs) controller
- */
-
-static struct omap_hwmod_class_sysconfig omap44xx_usb_otg_hs_sysc = {
-	.rev_offs	= 0x0400,
-	.sysc_offs	= 0x0404,
-	.syss_offs	= 0x0408,
-	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_ENAWAKEUP |
-			   SYSC_HAS_MIDLEMODE | SYSC_HAS_SIDLEMODE |
-			   SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-			   SIDLE_SMART_WKUP | MSTANDBY_FORCE | MSTANDBY_NO |
-			   MSTANDBY_SMART),
-	.sysc_fields	= &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class omap44xx_usb_otg_hs_hwmod_class = {
-	.name	= "usb_otg_hs",
-	.sysc	= &omap44xx_usb_otg_hs_sysc,
-};
-
-/* usb_otg_hs */
-static struct omap_hwmod_opt_clk usb_otg_hs_opt_clks[] = {
-	{ .role = "xclk", .clk = "usb_otg_hs_xclk" },
-};
-
-static struct omap_hwmod omap44xx_usb_otg_hs_hwmod = {
-	.name		= "usb_otg_hs",
-	.class		= &omap44xx_usb_otg_hs_hwmod_class,
-	.clkdm_name	= "l3_init_clkdm",
-	.flags		= HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY,
-	.main_clk	= "usb_otg_hs_ick",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP4_CM_L3INIT_USB_OTG_CLKCTRL_OFFSET,
-			.context_offs = OMAP4_RM_L3INIT_USB_OTG_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_HWCTRL,
-		},
-	},
-	.opt_clks	= usb_otg_hs_opt_clks,
-	.opt_clks_cnt	= ARRAY_SIZE(usb_otg_hs_opt_clks),
-};
-
-/*
  * 'usb_tll_hs' class
  * usb_tll_hs module is the adapter on the usb_host_hs ports
  */
@@ -2546,14 +2293,6 @@ static struct omap_hwmod_ocp_if omap44xx_usb_host_hs__l3_main_2 = {
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* usb_otg_hs -> l3_main_2 */
-static struct omap_hwmod_ocp_if omap44xx_usb_otg_hs__l3_main_2 = {
-	.master		= &omap44xx_usb_otg_hs_hwmod,
-	.slave		= &omap44xx_l3_main_2_hwmod,
-	.clk		= "l3_div_ck",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* l3_main_1 -> l3_main_3 */
 static struct omap_hwmod_ocp_if omap44xx_l3_main_1__l3_main_3 = {
 	.master		= &omap44xx_l3_main_1_hwmod,
@@ -2898,14 +2637,6 @@ static struct omap_hwmod_ocp_if omap44xx_l3_main_2__gpmc = {
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_per -> hdq1w */
-static struct omap_hwmod_ocp_if omap44xx_l4_per__hdq1w = {
-	.master		= &omap44xx_l4_per_hwmod,
-	.slave		= &omap44xx_hdq1w_hwmod,
-	.clk		= "l4_div_ck",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* l4_cfg -> hsi */
 static struct omap_hwmod_ocp_if omap44xx_l4_cfg__hsi = {
 	.master		= &omap44xx_l4_cfg_hwmod,
@@ -2954,62 +2685,6 @@ static struct omap_hwmod_ocp_if omap44xx_l4_wkup__kbd = {
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_cfg -> mailbox */
-static struct omap_hwmod_ocp_if omap44xx_l4_cfg__mailbox = {
-	.master		= &omap44xx_l4_cfg_hwmod,
-	.slave		= &omap44xx_mailbox_hwmod,
-	.clk		= "l4_div_ck",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_abe -> mcasp */
-static struct omap_hwmod_ocp_if omap44xx_l4_abe__mcasp = {
-	.master		= &omap44xx_l4_abe_hwmod,
-	.slave		= &omap44xx_mcasp_hwmod,
-	.clk		= "ocp_abe_iclk",
-	.user		= OCP_USER_MPU,
-};
-
-/* l4_abe -> mcasp (dma) */
-static struct omap_hwmod_ocp_if omap44xx_l4_abe__mcasp_dma = {
-	.master		= &omap44xx_l4_abe_hwmod,
-	.slave		= &omap44xx_mcasp_hwmod,
-	.clk		= "ocp_abe_iclk",
-	.user		= OCP_USER_SDMA,
-};
-
-/* l4_abe -> mcbsp1 */
-static struct omap_hwmod_ocp_if omap44xx_l4_abe__mcbsp1 = {
-	.master		= &omap44xx_l4_abe_hwmod,
-	.slave		= &omap44xx_mcbsp1_hwmod,
-	.clk		= "ocp_abe_iclk",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_abe -> mcbsp2 */
-static struct omap_hwmod_ocp_if omap44xx_l4_abe__mcbsp2 = {
-	.master		= &omap44xx_l4_abe_hwmod,
-	.slave		= &omap44xx_mcbsp2_hwmod,
-	.clk		= "ocp_abe_iclk",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_abe -> mcbsp3 */
-static struct omap_hwmod_ocp_if omap44xx_l4_abe__mcbsp3 = {
-	.master		= &omap44xx_l4_abe_hwmod,
-	.slave		= &omap44xx_mcbsp3_hwmod,
-	.clk		= "ocp_abe_iclk",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per -> mcbsp4 */
-static struct omap_hwmod_ocp_if omap44xx_l4_per__mcbsp4 = {
-	.master		= &omap44xx_l4_per_hwmod,
-	.slave		= &omap44xx_mcbsp4_hwmod,
-	.clk		= "l4_div_ck",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* l4_abe -> mcpdm */
 static struct omap_hwmod_ocp_if omap44xx_l4_abe__mcpdm = {
 	.master		= &omap44xx_l4_abe_hwmod,
@@ -3242,14 +2917,6 @@ static struct omap_hwmod_ocp_if omap44xx_l4_cfg__usb_host_hs = {
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_cfg -> usb_otg_hs */
-static struct omap_hwmod_ocp_if omap44xx_l4_cfg__usb_otg_hs = {
-	.master		= &omap44xx_l4_cfg_hwmod,
-	.slave		= &omap44xx_usb_otg_hs_hwmod,
-	.clk		= "l4_div_ck",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* l4_cfg -> usb_tll_hs */
 static struct omap_hwmod_ocp_if omap44xx_l4_cfg__usb_tll_hs = {
 	.master		= &omap44xx_l4_cfg_hwmod,
@@ -3296,7 +2963,6 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
 	&omap44xx_l4_cfg__l3_main_2,
 	/* &omap44xx_usb_host_fs__l3_main_2, */
 	&omap44xx_usb_host_hs__l3_main_2,
-	&omap44xx_usb_otg_hs__l3_main_2,
 	&omap44xx_l3_main_1__l3_main_3,
 	&omap44xx_l3_main_2__l3_main_3,
 	&omap44xx_l4_cfg__l3_main_3,
@@ -3339,20 +3005,12 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
 	&omap44xx_l4_per__elm,
 	&omap44xx_l4_cfg__fdif,
 	&omap44xx_l3_main_2__gpmc,
-	&omap44xx_l4_per__hdq1w,
 	&omap44xx_l4_cfg__hsi,
 	&omap44xx_l3_main_2__ipu,
 	&omap44xx_l3_main_2__iss,
 	/* &omap44xx_iva__sl2if, */
 	&omap44xx_l3_main_2__iva,
 	&omap44xx_l4_wkup__kbd,
-	&omap44xx_l4_cfg__mailbox,
-	&omap44xx_l4_abe__mcasp,
-	&omap44xx_l4_abe__mcasp_dma,
-	&omap44xx_l4_abe__mcbsp1,
-	&omap44xx_l4_abe__mcbsp2,
-	&omap44xx_l4_abe__mcbsp3,
-	&omap44xx_l4_per__mcbsp4,
 	&omap44xx_l4_abe__mcpdm,
 	&omap44xx_l3_main_2__mmu_ipu,
 	&omap44xx_l4_cfg__mmu_dsp,
@@ -3384,7 +3042,6 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
 	&omap44xx_l4_per__timer11,
 	/* &omap44xx_l4_cfg__usb_host_fs, */
 	&omap44xx_l4_cfg__usb_host_hs,
-	&omap44xx_l4_cfg__usb_otg_hs,
 	&omap44xx_l4_cfg__usb_tll_hs,
 	&omap44xx_mpu__emif1,
 	&omap44xx_mpu__emif2,
diff --git a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
index 8006b4383534..cc5ad6acab1d 100644
--- a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
@@ -24,7 +24,6 @@
 #include "cm1_54xx.h"
 #include "cm2_54xx.h"
 #include "prm54xx.h"
-#include "wd_timer.h"
 
 /* Base offset for all OMAP5 interrupts external to MPUSS */
 #define OMAP54XX_IRQ_GIC_START	32
@@ -629,124 +628,6 @@ static struct omap_hwmod omap54xx_kbd_hwmod = {
 };
 
 /*
- * 'mailbox' class
- * mailbox module allowing communication between the on-chip processors using a
- * queued mailbox-interrupt mechanism.
- */
-
-static struct omap_hwmod_class_sysconfig omap54xx_mailbox_sysc = {
-	.rev_offs	= 0x0000,
-	.sysc_offs	= 0x0010,
-	.sysc_flags	= (SYSC_HAS_RESET_STATUS | SYSC_HAS_SIDLEMODE |
-			   SYSC_HAS_SOFTRESET),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
-	.sysc_fields	= &omap_hwmod_sysc_type2,
-};
-
-static struct omap_hwmod_class omap54xx_mailbox_hwmod_class = {
-	.name	= "mailbox",
-	.sysc	= &omap54xx_mailbox_sysc,
-};
-
-/* mailbox */
-static struct omap_hwmod omap54xx_mailbox_hwmod = {
-	.name		= "mailbox",
-	.class		= &omap54xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP54XX_CM_L4CFG_MAILBOX_CLKCTRL_OFFSET,
-			.context_offs = OMAP54XX_RM_L4CFG_MAILBOX_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/*
- * 'mcbsp' class
- * multi channel buffered serial port controller
- */
-
-static struct omap_hwmod_class_sysconfig omap54xx_mcbsp_sysc = {
-	.rev_offs	= -ENODEV,
-	.sysc_offs	= 0x008c,
-	.sysc_flags	= (SYSC_HAS_CLOCKACTIVITY | SYSC_HAS_ENAWAKEUP |
-			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
-	.sysc_fields	= &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class omap54xx_mcbsp_hwmod_class = {
-	.name	= "mcbsp",
-	.sysc	= &omap54xx_mcbsp_sysc,
-};
-
-/* mcbsp1 */
-static struct omap_hwmod_opt_clk mcbsp1_opt_clks[] = {
-	{ .role = "pad_fck", .clk = "pad_clks_ck" },
-	{ .role = "prcm_fck", .clk = "mcbsp1_sync_mux_ck" },
-};
-
-static struct omap_hwmod omap54xx_mcbsp1_hwmod = {
-	.name		= "mcbsp1",
-	.class		= &omap54xx_mcbsp_hwmod_class,
-	.clkdm_name	= "abe_clkdm",
-	.main_clk	= "mcbsp1_gfclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP54XX_CM_ABE_MCBSP1_CLKCTRL_OFFSET,
-			.context_offs = OMAP54XX_RM_ABE_MCBSP1_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-	.opt_clks	= mcbsp1_opt_clks,
-	.opt_clks_cnt	= ARRAY_SIZE(mcbsp1_opt_clks),
-};
-
-/* mcbsp2 */
-static struct omap_hwmod_opt_clk mcbsp2_opt_clks[] = {
-	{ .role = "pad_fck", .clk = "pad_clks_ck" },
-	{ .role = "prcm_fck", .clk = "mcbsp2_sync_mux_ck" },
-};
-
-static struct omap_hwmod omap54xx_mcbsp2_hwmod = {
-	.name		= "mcbsp2",
-	.class		= &omap54xx_mcbsp_hwmod_class,
-	.clkdm_name	= "abe_clkdm",
-	.main_clk	= "mcbsp2_gfclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP54XX_CM_ABE_MCBSP2_CLKCTRL_OFFSET,
-			.context_offs = OMAP54XX_RM_ABE_MCBSP2_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-	.opt_clks	= mcbsp2_opt_clks,
-	.opt_clks_cnt	= ARRAY_SIZE(mcbsp2_opt_clks),
-};
-
-/* mcbsp3 */
-static struct omap_hwmod_opt_clk mcbsp3_opt_clks[] = {
-	{ .role = "pad_fck", .clk = "pad_clks_ck" },
-	{ .role = "prcm_fck", .clk = "mcbsp3_sync_mux_ck" },
-};
-
-static struct omap_hwmod omap54xx_mcbsp3_hwmod = {
-	.name		= "mcbsp3",
-	.class		= &omap54xx_mcbsp_hwmod_class,
-	.clkdm_name	= "abe_clkdm",
-	.main_clk	= "mcbsp3_gfclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP54XX_CM_ABE_MCBSP3_CLKCTRL_OFFSET,
-			.context_offs = OMAP54XX_RM_ABE_MCBSP3_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-	.opt_clks	= mcbsp3_opt_clks,
-	.opt_clks_cnt	= ARRAY_SIZE(mcbsp3_opt_clks),
-};
-
-/*
  * 'mcpdm' class
  * multi channel pdm controller (proprietary interface with phoenix power
  * ic)
@@ -795,86 +676,6 @@ static struct omap_hwmod omap54xx_mcpdm_hwmod = {
 	},
 };
 
-/*
- * 'mcspi' class
- * multichannel serial port interface (mcspi) / master/slave synchronous serial
- * bus
- */
-
-static struct omap_hwmod_class_sysconfig omap54xx_mcspi_sysc = {
-	.rev_offs	= 0x0000,
-	.sysc_offs	= 0x0010,
-	.sysc_flags	= (SYSC_HAS_EMUFREE | SYSC_HAS_RESET_STATUS |
-			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-			   SIDLE_SMART_WKUP),
-	.sysc_fields	= &omap_hwmod_sysc_type2,
-};
-
-static struct omap_hwmod_class omap54xx_mcspi_hwmod_class = {
-	.name	= "mcspi",
-	.sysc	= &omap54xx_mcspi_sysc,
-};
-
-/* mcspi1 */
-static struct omap_hwmod omap54xx_mcspi1_hwmod = {
-	.name		= "mcspi1",
-	.class		= &omap54xx_mcspi_hwmod_class,
-	.clkdm_name	= "l4per_clkdm",
-	.main_clk	= "func_48m_fclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP54XX_CM_L4PER_MCSPI1_CLKCTRL_OFFSET,
-			.context_offs = OMAP54XX_RM_L4PER_MCSPI1_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-};
-
-/* mcspi2 */
-static struct omap_hwmod omap54xx_mcspi2_hwmod = {
-	.name		= "mcspi2",
-	.class		= &omap54xx_mcspi_hwmod_class,
-	.clkdm_name	= "l4per_clkdm",
-	.main_clk	= "func_48m_fclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP54XX_CM_L4PER_MCSPI2_CLKCTRL_OFFSET,
-			.context_offs = OMAP54XX_RM_L4PER_MCSPI2_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-};
-
-/* mcspi3 */
-static struct omap_hwmod omap54xx_mcspi3_hwmod = {
-	.name		= "mcspi3",
-	.class		= &omap54xx_mcspi_hwmod_class,
-	.clkdm_name	= "l4per_clkdm",
-	.main_clk	= "func_48m_fclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP54XX_CM_L4PER_MCSPI3_CLKCTRL_OFFSET,
-			.context_offs = OMAP54XX_RM_L4PER_MCSPI3_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-};
-
-/* mcspi4 */
-static struct omap_hwmod omap54xx_mcspi4_hwmod = {
-	.name		= "mcspi4",
-	.class		= &omap54xx_mcspi_hwmod_class,
-	.clkdm_name	= "l4per_clkdm",
-	.main_clk	= "func_48m_fclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP54XX_CM_L4PER_MCSPI4_CLKCTRL_OFFSET,
-			.context_offs = OMAP54XX_RM_L4PER_MCSPI4_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-};
 
 /*
  * 'mmu' class
@@ -1392,43 +1193,6 @@ static struct omap_hwmod omap54xx_usb_otg_ss_hwmod = {
 	.opt_clks_cnt	= ARRAY_SIZE(usb_otg_ss_opt_clks),
 };
 
-/*
- * 'wd_timer' class
- * 32-bit watchdog upward counter that generates a pulse on the reset pin on
- * overflow condition
- */
-
-static struct omap_hwmod_class_sysconfig omap54xx_wd_timer_sysc = {
-	.rev_offs	= 0x0000,
-	.sysc_offs	= 0x0010,
-	.syss_offs	= 0x0014,
-	.sysc_flags	= (SYSC_HAS_EMUFREE | SYSC_HAS_SIDLEMODE |
-			   SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-			   SIDLE_SMART_WKUP),
-	.sysc_fields	= &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class omap54xx_wd_timer_hwmod_class = {
-	.name		= "wd_timer",
-	.sysc		= &omap54xx_wd_timer_sysc,
-	.pre_shutdown	= &omap2_wd_timer_disable,
-};
-
-/* wd_timer2 */
-static struct omap_hwmod omap54xx_wd_timer2_hwmod = {
-	.name		= "wd_timer2",
-	.class		= &omap54xx_wd_timer_hwmod_class,
-	.clkdm_name	= "wkupaon_clkdm",
-	.main_clk	= "sys_32k_ck",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = OMAP54XX_CM_WKUPAON_WD_TIMER2_CLKCTRL_OFFSET,
-			.context_offs = OMAP54XX_RM_WKUPAON_WD_TIMER2_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-};
 
 /*
  * 'ocp2scp' class
@@ -1747,38 +1511,6 @@ static struct omap_hwmod_ocp_if omap54xx_l4_wkup__kbd = {
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_cfg -> mailbox */
-static struct omap_hwmod_ocp_if omap54xx_l4_cfg__mailbox = {
-	.master		= &omap54xx_l4_cfg_hwmod,
-	.slave		= &omap54xx_mailbox_hwmod,
-	.clk		= "l4_root_clk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_abe -> mcbsp1 */
-static struct omap_hwmod_ocp_if omap54xx_l4_abe__mcbsp1 = {
-	.master		= &omap54xx_l4_abe_hwmod,
-	.slave		= &omap54xx_mcbsp1_hwmod,
-	.clk		= "abe_iclk",
-	.user		= OCP_USER_MPU,
-};
-
-/* l4_abe -> mcbsp2 */
-static struct omap_hwmod_ocp_if omap54xx_l4_abe__mcbsp2 = {
-	.master		= &omap54xx_l4_abe_hwmod,
-	.slave		= &omap54xx_mcbsp2_hwmod,
-	.clk		= "abe_iclk",
-	.user		= OCP_USER_MPU,
-};
-
-/* l4_abe -> mcbsp3 */
-static struct omap_hwmod_ocp_if omap54xx_l4_abe__mcbsp3 = {
-	.master		= &omap54xx_l4_abe_hwmod,
-	.slave		= &omap54xx_mcbsp3_hwmod,
-	.clk		= "abe_iclk",
-	.user		= OCP_USER_MPU,
-};
-
 /* l4_abe -> mcpdm */
 static struct omap_hwmod_ocp_if omap54xx_l4_abe__mcpdm = {
 	.master		= &omap54xx_l4_abe_hwmod,
@@ -1787,38 +1519,6 @@ static struct omap_hwmod_ocp_if omap54xx_l4_abe__mcpdm = {
 	.user		= OCP_USER_MPU,
 };
 
-/* l4_per -> mcspi1 */
-static struct omap_hwmod_ocp_if omap54xx_l4_per__mcspi1 = {
-	.master		= &omap54xx_l4_per_hwmod,
-	.slave		= &omap54xx_mcspi1_hwmod,
-	.clk		= "l4_root_clk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per -> mcspi2 */
-static struct omap_hwmod_ocp_if omap54xx_l4_per__mcspi2 = {
-	.master		= &omap54xx_l4_per_hwmod,
-	.slave		= &omap54xx_mcspi2_hwmod,
-	.clk		= "l4_root_clk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per -> mcspi3 */
-static struct omap_hwmod_ocp_if omap54xx_l4_per__mcspi3 = {
-	.master		= &omap54xx_l4_per_hwmod,
-	.slave		= &omap54xx_mcspi3_hwmod,
-	.clk		= "l4_root_clk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per -> mcspi4 */
-static struct omap_hwmod_ocp_if omap54xx_l4_per__mcspi4 = {
-	.master		= &omap54xx_l4_per_hwmod,
-	.slave		= &omap54xx_mcspi4_hwmod,
-	.clk		= "l4_root_clk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* l4_cfg -> mpu */
 static struct omap_hwmod_ocp_if omap54xx_l4_cfg__mpu = {
 	.master		= &omap54xx_l4_cfg_hwmod,
@@ -1955,14 +1655,6 @@ static struct omap_hwmod_ocp_if omap54xx_l4_cfg__usb_otg_ss = {
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_wkup -> wd_timer2 */
-static struct omap_hwmod_ocp_if omap54xx_l4_wkup__wd_timer2 = {
-	.master		= &omap54xx_l4_wkup_hwmod,
-	.slave		= &omap54xx_wd_timer2_hwmod,
-	.clk		= "wkupaon_iclk_mux",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 static struct omap_hwmod_ocp_if *omap54xx_hwmod_ocp_ifs[] __initdata = {
 	&omap54xx_l3_main_1__dmm,
 	&omap54xx_l3_main_3__l3_instr,
@@ -1994,15 +1686,7 @@ static struct omap_hwmod_ocp_if *omap54xx_hwmod_ocp_ifs[] __initdata = {
 	&omap54xx_mpu__emif2,
 	&omap54xx_l3_main_2__mmu_ipu,
 	&omap54xx_l4_wkup__kbd,
-	&omap54xx_l4_cfg__mailbox,
-	&omap54xx_l4_abe__mcbsp1,
-	&omap54xx_l4_abe__mcbsp2,
-	&omap54xx_l4_abe__mcbsp3,
 	&omap54xx_l4_abe__mcpdm,
-	&omap54xx_l4_per__mcspi1,
-	&omap54xx_l4_per__mcspi2,
-	&omap54xx_l4_per__mcspi3,
-	&omap54xx_l4_per__mcspi4,
 	&omap54xx_l4_cfg__mpu,
 	&omap54xx_l4_cfg__spinlock,
 	&omap54xx_l4_cfg__ocp2scp1,
@@ -2020,7 +1704,6 @@ static struct omap_hwmod_ocp_if *omap54xx_hwmod_ocp_ifs[] __initdata = {
 	&omap54xx_l4_cfg__usb_host_hs,
 	&omap54xx_l4_cfg__usb_tll_hs,
 	&omap54xx_l4_cfg__usb_otg_ss,
-	&omap54xx_l4_wkup__wd_timer2,
 	&omap54xx_l4_cfg__ocp2scp3,
 	&omap54xx_l4_cfg__sata,
 	NULL,
diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
index e5bd549d2a5e..f8715bd96687 100644
--- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
@@ -24,7 +24,6 @@
 #include "cm1_7xx.h"
 #include "cm2_7xx.h"
 #include "prm7xx.h"
-#include "wd_timer.h"
 #include "soc.h"
 
 /* Base offset for all DRA7XX interrupts external to MPUSS */
@@ -683,7 +682,7 @@ static struct omap_hwmod_class dra7xx_sha0_hwmod_class = {
 	.sysc		= &dra7xx_sha0_sysc,
 };
 
-struct omap_hwmod dra7xx_sha0_hwmod = {
+static struct omap_hwmod dra7xx_sha0_hwmod = {
 	.name		= "sham",
 	.class		= &dra7xx_sha0_hwmod_class,
 	.clkdm_name	= "l4sec_clkdm",
@@ -772,229 +771,7 @@ static struct omap_hwmod dra7xx_gpmc_hwmod = {
 	},
 };
 
-/*
- * 'hdq1w' class
- *
- */
-
-static struct omap_hwmod_class_sysconfig dra7xx_hdq1w_sysc = {
-	.rev_offs	= 0x0000,
-	.sysc_offs	= 0x0014,
-	.syss_offs	= 0x0018,
-	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_SOFTRESET |
-			   SYSS_HAS_RESET_STATUS),
-	.sysc_fields	= &omap_hwmod_sysc_type1,
-};
 
-static struct omap_hwmod_class dra7xx_hdq1w_hwmod_class = {
-	.name	= "hdq1w",
-	.sysc	= &dra7xx_hdq1w_sysc,
-};
-
-/* hdq1w */
-
-static struct omap_hwmod dra7xx_hdq1w_hwmod = {
-	.name		= "hdq1w",
-	.class		= &dra7xx_hdq1w_hwmod_class,
-	.clkdm_name	= "l4per_clkdm",
-	.flags		= HWMOD_INIT_NO_RESET,
-	.main_clk	= "func_12m_fclk",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4PER_HDQ1W_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4PER_HDQ1W_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-};
-
-/*
- * 'mailbox' class
- *
- */
-
-static struct omap_hwmod_class_sysconfig dra7xx_mailbox_sysc = {
-	.rev_offs	= 0x0000,
-	.sysc_offs	= 0x0010,
-	.sysc_flags	= (SYSC_HAS_RESET_STATUS | SYSC_HAS_SIDLEMODE |
-			   SYSC_HAS_SOFTRESET),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
-	.sysc_fields	= &omap_hwmod_sysc_type2,
-};
-
-static struct omap_hwmod_class dra7xx_mailbox_hwmod_class = {
-	.name	= "mailbox",
-	.sysc	= &dra7xx_mailbox_sysc,
-};
-
-/* mailbox1 */
-static struct omap_hwmod dra7xx_mailbox1_hwmod = {
-	.name		= "mailbox1",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX1_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX1_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox2 */
-static struct omap_hwmod dra7xx_mailbox2_hwmod = {
-	.name		= "mailbox2",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX2_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX2_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox3 */
-static struct omap_hwmod dra7xx_mailbox3_hwmod = {
-	.name		= "mailbox3",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX3_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX3_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox4 */
-static struct omap_hwmod dra7xx_mailbox4_hwmod = {
-	.name		= "mailbox4",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX4_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX4_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox5 */
-static struct omap_hwmod dra7xx_mailbox5_hwmod = {
-	.name		= "mailbox5",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX5_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX5_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox6 */
-static struct omap_hwmod dra7xx_mailbox6_hwmod = {
-	.name		= "mailbox6",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX6_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX6_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox7 */
-static struct omap_hwmod dra7xx_mailbox7_hwmod = {
-	.name		= "mailbox7",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX7_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX7_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox8 */
-static struct omap_hwmod dra7xx_mailbox8_hwmod = {
-	.name		= "mailbox8",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX8_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX8_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox9 */
-static struct omap_hwmod dra7xx_mailbox9_hwmod = {
-	.name		= "mailbox9",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX9_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX9_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox10 */
-static struct omap_hwmod dra7xx_mailbox10_hwmod = {
-	.name		= "mailbox10",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX10_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX10_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox11 */
-static struct omap_hwmod dra7xx_mailbox11_hwmod = {
-	.name		= "mailbox11",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX11_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX11_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox12 */
-static struct omap_hwmod dra7xx_mailbox12_hwmod = {
-	.name		= "mailbox12",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX12_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX12_CONTEXT_OFFSET,
-		},
-	},
-};
-
-/* mailbox13 */
-static struct omap_hwmod dra7xx_mailbox13_hwmod = {
-	.name		= "mailbox13",
-	.class		= &dra7xx_mailbox_hwmod_class,
-	.clkdm_name	= "l4cfg_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4CFG_MAILBOX13_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4CFG_MAILBOX13_CONTEXT_OFFSET,
-		},
-	},
-};
 
 /*
  * 'mpu' class
@@ -1655,34 +1432,6 @@ static struct omap_hwmod dra7xx_des_hwmod = {
 	},
 };
 
-/* rng */
-static struct omap_hwmod_class_sysconfig dra7xx_rng_sysc = {
-	.rev_offs       = 0x1fe0,
-	.sysc_offs      = 0x1fe4,
-	.sysc_flags     = SYSC_HAS_AUTOIDLE | SYSC_HAS_SIDLEMODE,
-	.idlemodes      = SIDLE_FORCE | SIDLE_NO,
-	.sysc_fields    = &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class dra7xx_rng_hwmod_class = {
-	.name           = "rng",
-	.sysc           = &dra7xx_rng_sysc,
-};
-
-static struct omap_hwmod dra7xx_rng_hwmod = {
-	.name           = "rng",
-	.class          = &dra7xx_rng_hwmod_class,
-	.flags		= HWMOD_SWSUP_SIDLE,
-	.clkdm_name     = "l4sec_clkdm",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_L4SEC_RNG_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_L4SEC_RNG_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_HWCTRL,
-		},
-	},
-};
-
 /*
  * 'usb_otg_ss' class
  *
@@ -1815,43 +1564,6 @@ static struct omap_hwmod dra7xx_vcp2_hwmod = {
 	},
 };
 
-/*
- * 'wd_timer' class
- *
- */
-
-static struct omap_hwmod_class_sysconfig dra7xx_wd_timer_sysc = {
-	.rev_offs	= 0x0000,
-	.sysc_offs	= 0x0010,
-	.syss_offs	= 0x0014,
-	.sysc_flags	= (SYSC_HAS_EMUFREE | SYSC_HAS_SIDLEMODE |
-			   SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-			   SIDLE_SMART_WKUP),
-	.sysc_fields	= &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class dra7xx_wd_timer_hwmod_class = {
-	.name		= "wd_timer",
-	.sysc		= &dra7xx_wd_timer_sysc,
-	.pre_shutdown	= &omap2_wd_timer_disable,
-	.reset		= &omap2_wd_timer_reset,
-};
-
-/* wd_timer2 */
-static struct omap_hwmod dra7xx_wd_timer2_hwmod = {
-	.name		= "wd_timer2",
-	.class		= &dra7xx_wd_timer_hwmod_class,
-	.clkdm_name	= "wkupaon_clkdm",
-	.main_clk	= "sys_32k_ck",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_WKUPAON_WD_TIMER2_CLKCTRL_OFFSET,
-			.context_offs = DRA7XX_RM_WKUPAON_WD_TIMER2_CONTEXT_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-};
 
 
 /*
@@ -2090,118 +1802,6 @@ static struct omap_hwmod_ocp_if dra7xx_l3_main_1__gpmc = {
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_per1 -> hdq1w */
-static struct omap_hwmod_ocp_if dra7xx_l4_per1__hdq1w = {
-	.master		= &dra7xx_l4_per1_hwmod,
-	.slave		= &dra7xx_hdq1w_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_cfg -> mailbox1 */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__mailbox1 = {
-	.master		= &dra7xx_l4_cfg_hwmod,
-	.slave		= &dra7xx_mailbox1_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox2 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox2 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox2_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox3 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox3 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox3_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox4 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox4 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox4_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox5 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox5 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox5_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox6 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox6 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox6_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox7 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox7 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox7_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox8 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox8 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox8_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox9 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox9 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox9_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox10 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox10 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox10_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox11 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox11 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox11_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox12 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox12 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox12_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per3 -> mailbox13 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox13 = {
-	.master		= &dra7xx_l4_per3_hwmod,
-	.slave		= &dra7xx_mailbox13_hwmod,
-	.clk		= "l3_iclk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* l4_cfg -> mpu */
 static struct omap_hwmod_ocp_if dra7xx_l4_cfg__mpu = {
 	.master		= &dra7xx_l4_cfg_hwmod,
@@ -2442,13 +2042,6 @@ static struct omap_hwmod_ocp_if dra7xx_l4_per1__des = {
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_per1 -> rng */
-static struct omap_hwmod_ocp_if dra7xx_l4_per1__rng = {
-	.master         = &dra7xx_l4_per1_hwmod,
-	.slave          = &dra7xx_rng_hwmod,
-	.user           = OCP_USER_MPU,
-};
-
 /* l4_per3 -> usb_otg_ss1 */
 static struct omap_hwmod_ocp_if dra7xx_l4_per3__usb_otg_ss1 = {
 	.master		= &dra7xx_l4_per3_hwmod,
@@ -2513,14 +2106,6 @@ static struct omap_hwmod_ocp_if dra7xx_l4_per2__vcp2 = {
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_wkup -> wd_timer2 */
-static struct omap_hwmod_ocp_if dra7xx_l4_wkup__wd_timer2 = {
-	.master		= &dra7xx_l4_wkup_hwmod,
-	.slave		= &dra7xx_wd_timer2_hwmod,
-	.clk		= "wkupaon_iclk_mux",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* l4_per2 -> epwmss0 */
 static struct omap_hwmod_ocp_if dra7xx_l4_per2__epwmss0 = {
 	.master		= &dra7xx_l4_per2_hwmod,
@@ -2575,20 +2160,6 @@ static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = {
 	&dra7xx_l3_main_1__sha0,
 	&dra7xx_l4_per1__elm,
 	&dra7xx_l3_main_1__gpmc,
-	&dra7xx_l4_per1__hdq1w,
-	&dra7xx_l4_cfg__mailbox1,
-	&dra7xx_l4_per3__mailbox2,
-	&dra7xx_l4_per3__mailbox3,
-	&dra7xx_l4_per3__mailbox4,
-	&dra7xx_l4_per3__mailbox5,
-	&dra7xx_l4_per3__mailbox6,
-	&dra7xx_l4_per3__mailbox7,
-	&dra7xx_l4_per3__mailbox8,
-	&dra7xx_l4_per3__mailbox9,
-	&dra7xx_l4_per3__mailbox10,
-	&dra7xx_l4_per3__mailbox11,
-	&dra7xx_l4_per3__mailbox12,
-	&dra7xx_l4_per3__mailbox13,
 	&dra7xx_l4_cfg__mpu,
 	&dra7xx_l4_cfg__ocp2scp1,
 	&dra7xx_l4_cfg__ocp2scp3,
@@ -2624,7 +2195,6 @@ static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = {
 	&dra7xx_l4_per2__vcp1,
 	&dra7xx_l3_main_1__vcp2,
 	&dra7xx_l4_per2__vcp2,
-	&dra7xx_l4_wkup__wd_timer2,
 	&dra7xx_l4_per2__epwmss0,
 	&dra7xx_l4_per2__epwmss1,
 	&dra7xx_l4_per2__epwmss2,
@@ -2634,7 +2204,6 @@ static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = {
 /* GP-only hwmod links */
 static struct omap_hwmod_ocp_if *dra7xx_gp_hwmod_ocp_ifs[] __initdata = {
 	&dra7xx_l4_wkup__timer12,
-	&dra7xx_l4_per1__rng,
 	NULL,
 };
 
diff --git a/arch/arm/mach-omap2/omap_twl.c b/arch/arm/mach-omap2/omap_twl.c
index 6787f1e72c6b..a642d3b39e50 100644
--- a/arch/arm/mach-omap2/omap_twl.c
+++ b/arch/arm/mach-omap2/omap_twl.c
@@ -36,11 +36,6 @@
 #define OMAP4_VDD_CORE_SR_VOLT_REG	0x61
 #define OMAP4_VDD_CORE_SR_CMD_REG	0x62
 
-#define OMAP4_VP_CONFIG_ERROROFFSET	0x00
-#define OMAP4_VP_VSTEPMIN_VSTEPMIN	0x01
-#define OMAP4_VP_VSTEPMAX_VSTEPMAX	0x04
-#define OMAP4_VP_VLIMITTO_TIMEOUT_US	200
-
 static bool is_offset_valid;
 static u8 smps_offset;
 
@@ -219,7 +214,8 @@ int __init omap4_twl_init(void)
 {
 	struct voltagedomain *voltdm;
 
-	if (!cpu_is_omap44xx())
+	if (!cpu_is_omap44xx() ||
+	    of_find_compatible_node(NULL, NULL, "motorola,cpcap"))
 		return -ENODEV;
 
 	voltdm = voltdm_lookup("mpu");
diff --git a/arch/arm/mach-omap2/opp4xxx_data.c b/arch/arm/mach-omap2/opp4xxx_data.c
index adea43ea1c60..985aeab9bc2a 100644
--- a/arch/arm/mach-omap2/opp4xxx_data.c
+++ b/arch/arm/mach-omap2/opp4xxx_data.c
@@ -32,20 +32,22 @@
 
 #define OMAP4430_VDD_MPU_OPP50_UV		1025000
 #define OMAP4430_VDD_MPU_OPP100_UV		1200000
-#define OMAP4430_VDD_MPU_OPPTURBO_UV		1313000
-#define OMAP4430_VDD_MPU_OPPNITRO_UV		1375000
+#define OMAP4430_VDD_MPU_OPPTURBO_UV		1325000
+#define OMAP4430_VDD_MPU_OPPNITRO_UV		1388000
+#define OMAP4430_VDD_MPU_OPPNITROSB_UV		1398000
 
 struct omap_volt_data omap443x_vdd_mpu_volt_data[] = {
 	VOLT_DATA_DEFINE(OMAP4430_VDD_MPU_OPP50_UV, OMAP44XX_CONTROL_FUSE_MPU_OPP50, 0xf4, 0x0c),
 	VOLT_DATA_DEFINE(OMAP4430_VDD_MPU_OPP100_UV, OMAP44XX_CONTROL_FUSE_MPU_OPP100, 0xf9, 0x16),
 	VOLT_DATA_DEFINE(OMAP4430_VDD_MPU_OPPTURBO_UV, OMAP44XX_CONTROL_FUSE_MPU_OPPTURBO, 0xfa, 0x23),
 	VOLT_DATA_DEFINE(OMAP4430_VDD_MPU_OPPNITRO_UV, OMAP44XX_CONTROL_FUSE_MPU_OPPNITRO, 0xfa, 0x27),
+	VOLT_DATA_DEFINE(OMAP4430_VDD_MPU_OPPNITROSB_UV, OMAP44XX_CONTROL_FUSE_MPU_OPPNITROSB, 0xfa, 0x27),
 	VOLT_DATA_DEFINE(0, 0, 0, 0),
 };
 
-#define OMAP4430_VDD_IVA_OPP50_UV		1013000
-#define OMAP4430_VDD_IVA_OPP100_UV		1188000
-#define OMAP4430_VDD_IVA_OPPTURBO_UV		1300000
+#define OMAP4430_VDD_IVA_OPP50_UV		 950000
+#define OMAP4430_VDD_IVA_OPP100_UV		1114000
+#define OMAP4430_VDD_IVA_OPPTURBO_UV		1291000
 
 struct omap_volt_data omap443x_vdd_iva_volt_data[] = {
 	VOLT_DATA_DEFINE(OMAP4430_VDD_IVA_OPP50_UV, OMAP44XX_CONTROL_FUSE_IVA_OPP50, 0xf4, 0x0c),
@@ -54,8 +56,8 @@ struct omap_volt_data omap443x_vdd_iva_volt_data[] = {
 	VOLT_DATA_DEFINE(0, 0, 0, 0),
 };
 
-#define OMAP4430_VDD_CORE_OPP50_UV		1025000
-#define OMAP4430_VDD_CORE_OPP100_UV		1200000
+#define OMAP4430_VDD_CORE_OPP50_UV		 962000
+#define OMAP4430_VDD_CORE_OPP100_UV		1127000
 
 struct omap_volt_data omap443x_vdd_core_volt_data[] = {
 	VOLT_DATA_DEFINE(OMAP4430_VDD_CORE_OPP50_UV, OMAP44XX_CONTROL_FUSE_CORE_OPP50, 0xf4, 0x0c),
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 2efd18e8824c..e95c224ffc4d 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -7,11 +7,9 @@
 #include <linux/clk.h>
 #include <linux/davinci_emac.h>
 #include <linux/gpio.h>
-#include <linux/gpio/machine.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of_platform.h>
-#include <linux/ti_wilink_st.h>
 #include <linux/wl12xx.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
@@ -33,7 +31,6 @@
 #include "omap_device.h"
 #include "omap-secure.h"
 #include "soc.h"
-#include "hsmmc.h"
 
 static struct omap_hsmmc_platform_data __maybe_unused mmc_pdata[2];
 
@@ -146,53 +143,6 @@ static void __init omap3_sbc_t3530_legacy_init(void)
 	omap3_sbc_t3x_usb_hub_init(167, "sb-t35 usb hub");
 }
 
-static struct ti_st_plat_data wilink_pdata = {
-	.nshutdown_gpio = 137,
-	.dev_name = "/dev/ttyO1",
-	.flow_cntrl = 1,
-	.baud_rate = 300000,
-};
-
-static struct platform_device wl18xx_device = {
-	.name	= "kim",
-	.id	= -1,
-	.dev	= {
-		.platform_data = &wilink_pdata,
-	}
-};
-
-static struct ti_st_plat_data wilink7_pdata = {
-	.nshutdown_gpio = 162,
-	.dev_name = "/dev/ttyO1",
-	.flow_cntrl = 1,
-	.baud_rate = 3000000,
-};
-
-static struct platform_device wl128x_device = {
-	.name	= "kim",
-	.id	= -1,
-	.dev	= {
-		.platform_data = &wilink7_pdata,
-	}
-};
-
-static struct platform_device btwilink_device = {
-	.name	= "btwilink",
-	.id	= -1,
-};
-
-static void __init omap3_igep0020_rev_f_legacy_init(void)
-{
-	platform_device_register(&wl18xx_device);
-	platform_device_register(&btwilink_device);
-}
-
-static void __init omap3_igep0030_rev_g_legacy_init(void)
-{
-	platform_device_register(&wl18xx_device);
-	platform_device_register(&btwilink_device);
-}
-
 static void __init omap3_evm_legacy_init(void)
 {
 	hsmmc2_internal_input_clk();
@@ -269,21 +219,13 @@ static void __init am3517_evm_legacy_init(void)
 	am35xx_emac_reset();
 }
 
-static struct platform_device omap3_rom_rng_device = {
-	.name		= "omap3-rom-rng",
-	.id		= -1,
-	.dev	= {
-		.platform_data	= rx51_secure_rng_call,
-	},
-};
-
 static void __init nokia_n900_legacy_init(void)
 {
 	hsmmc2_internal_input_clk();
 	mmc_pdata[0].name = "external";
 	mmc_pdata[1].name = "internal";
 
-	if (omap_type() == OMAP2_DEVICE_TYPE_SEC) {
+	if (omap_type() != OMAP2_DEVICE_TYPE_GP) {
 		if (IS_ENABLED(CONFIG_ARM_ERRATA_430973)) {
 			pr_info("RX-51: Enabling ARM errata 430973 workaround\n");
 			/* set IBE to 1 */
@@ -292,9 +234,6 @@ static void __init nokia_n900_legacy_init(void)
 			pr_warn("RX-51: Not enabling ARM errata 430973 workaround\n");
 			pr_warn("Thumb binaries may crash randomly without this workaround\n");
 		}
-
-		pr_info("RX-51: Registering OMAP3 HWRNG device\n");
-		platform_device_register(&omap3_rom_rng_device);
 	}
 }
 
@@ -306,123 +245,18 @@ static void __init omap3_tao3530_legacy_init(void)
 static void __init omap3_logicpd_torpedo_init(void)
 {
 	omap3_gpio126_127_129();
-	platform_device_register(&wl128x_device);
-	platform_device_register(&btwilink_device);
 }
 
 /* omap3pandora legacy devices */
-#define PANDORA_WIFI_IRQ_GPIO		21
-#define PANDORA_WIFI_NRESET_GPIO	23
 
 static struct platform_device pandora_backlight = {
 	.name	= "pandora-backlight",
 	.id	= -1,
 };
 
-static struct regulator_consumer_supply pandora_vmmc3_supply[] = {
-	REGULATOR_SUPPLY("vmmc", "omap_hsmmc.2"),
-};
-
-static struct regulator_init_data pandora_vmmc3 = {
-	.constraints = {
-		.valid_ops_mask		= REGULATOR_CHANGE_STATUS,
-	},
-	.num_consumer_supplies	= ARRAY_SIZE(pandora_vmmc3_supply),
-	.consumer_supplies	= pandora_vmmc3_supply,
-};
-
-static struct fixed_voltage_config pandora_vwlan = {
-	.supply_name		= "vwlan",
-	.microvolts		= 1800000, /* 1.8V */
-	.startup_delay		= 50000, /* 50ms */
-	.init_data		= &pandora_vmmc3,
-};
-
-static struct platform_device pandora_vwlan_device = {
-	.name		= "reg-fixed-voltage",
-	.id		= 1,
-	.dev = {
-		.platform_data = &pandora_vwlan,
-	},
-};
-
-static struct gpiod_lookup_table pandora_vwlan_gpiod_table = {
-	.dev_id = "reg-fixed-voltage.1",
-	.table = {
-		/*
-		 * As this is a low GPIO number it should be at the first
-		 * GPIO bank.
-		 */
-		GPIO_LOOKUP("gpio-0-31", PANDORA_WIFI_NRESET_GPIO,
-			    NULL, GPIO_ACTIVE_HIGH),
-		{ },
-	},
-};
-
-static void pandora_wl1251_init_card(struct mmc_card *card)
-{
-	/*
-	 * We have TI wl1251 attached to MMC3. Pass this information to
-	 * SDIO core because it can't be probed by normal methods.
-	 */
-	if (card->type == MMC_TYPE_SDIO || card->type == MMC_TYPE_SD_COMBO) {
-		card->quirks |= MMC_QUIRK_NONSTD_SDIO;
-		card->cccr.wide_bus = 1;
-		card->cis.vendor = 0x104c;
-		card->cis.device = 0x9066;
-		card->cis.blksize = 512;
-		card->cis.max_dtr = 24000000;
-		card->ocr = 0x80;
-	}
-}
-
-static struct omap2_hsmmc_info pandora_mmc3[] = {
-	{
-		.mmc		= 3,
-		.caps		= MMC_CAP_4_BIT_DATA | MMC_CAP_POWER_OFF_CARD,
-		.init_card	= pandora_wl1251_init_card,
-	},
-	{}	/* Terminator */
-};
-
-static void __init pandora_wl1251_init(void)
-{
-	struct wl1251_platform_data pandora_wl1251_pdata;
-	int ret;
-
-	memset(&pandora_wl1251_pdata, 0, sizeof(pandora_wl1251_pdata));
-
-	pandora_wl1251_pdata.power_gpio = -1;
-
-	ret = gpio_request_one(PANDORA_WIFI_IRQ_GPIO, GPIOF_IN, "wl1251 irq");
-	if (ret < 0)
-		goto fail;
-
-	pandora_wl1251_pdata.irq = gpio_to_irq(PANDORA_WIFI_IRQ_GPIO);
-	if (pandora_wl1251_pdata.irq < 0)
-		goto fail_irq;
-
-	pandora_wl1251_pdata.use_eeprom = true;
-	ret = wl1251_set_platform_data(&pandora_wl1251_pdata);
-	if (ret < 0)
-		goto fail_irq;
-
-	return;
-
-fail_irq:
-	gpio_free(PANDORA_WIFI_IRQ_GPIO);
-fail:
-	pr_err("wl1251 board initialisation failed\n");
-}
-
 static void __init omap3_pandora_legacy_init(void)
 {
 	platform_device_register(&pandora_backlight);
-	gpiod_add_lookup_table(&pandora_vwlan_gpiod_table);
-	platform_device_register(&pandora_vwlan_device);
-	omap_hsmmc_init(pandora_mmc3);
-	omap_hsmmc_late_init(pandora_mmc3);
-	pandora_wl1251_init();
 }
 #endif /* CONFIG_ARCH_OMAP3 */
 
@@ -472,10 +306,14 @@ static void __init dra7x_evm_mmc_quirk(void)
 
 static struct clockdomain *ti_sysc_find_one_clockdomain(struct clk *clk)
 {
+	struct clk_hw *hw = __clk_get_hw(clk);
 	struct clockdomain *clkdm = NULL;
 	struct clk_hw_omap *hwclk;
 
-	hwclk = to_clk_hw_omap(__clk_get_hw(clk));
+	hwclk = to_clk_hw_omap(hw);
+	if (!omap2_clk_is_hw_omap(hw))
+		return NULL;
+
 	if (hwclk && hwclk->clkdm_name)
 		clkdm = clkdm_lookup(hwclk->clkdm_name);
 
@@ -638,6 +476,7 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = {
 	OF_DEV_AUXDATA("ti,davinci_mdio", 0x5c030000, "davinci_mdio.0", NULL),
 	OF_DEV_AUXDATA("ti,am3517-emac", 0x5c000000, "davinci_emac.0",
 		       &am35xx_emac_pdata),
+	OF_DEV_AUXDATA("nokia,n900-rom-rng", 0, NULL, rx51_secure_rng_call),
 	/* McBSP modules with sidetone core */
 #if IS_ENABLED(CONFIG_SND_SOC_OMAP_MCBSP)
 	OF_DEV_AUXDATA("ti,omap3-mcbsp", 0x49022000, "49022000.mcbsp", &mcbsp_pdata),
@@ -690,8 +529,6 @@ static struct pdata_init pdata_quirks[] __initdata = {
 	{ "nokia,omap3-n900", nokia_n900_legacy_init, },
 	{ "nokia,omap3-n9", hsmmc2_internal_input_clk, },
 	{ "nokia,omap3-n950", hsmmc2_internal_input_clk, },
-	{ "isee,omap3-igep0020-rev-f", omap3_igep0020_rev_f_legacy_init, },
-	{ "isee,omap3-igep0030-rev-g", omap3_igep0030_rev_g_legacy_init, },
 	{ "logicpd,dm3730-torpedo-devkit", omap3_logicpd_torpedo_init, },
 	{ "ti,omap3-evm-37xx", omap3_evm_legacy_init, },
 	{ "ti,am3517-evm", am3517_evm_legacy_init, },
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 7ac9af56762d..01ec1ba4878b 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -148,6 +148,7 @@ int __init omap2_common_pm_late_init(void)
 	/* Init the voltage layer */
 	omap3_twl_init();
 	omap4_twl_init();
+	omap4_cpcap_init();
 	omap_voltage_late_init();
 
 	/* Smartreflex device init */
diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h
index 8a55b69bca63..2a883a0c1fcd 100644
--- a/arch/arm/mach-omap2/pm.h
+++ b/arch/arm/mach-omap2/pm.h
@@ -107,6 +107,11 @@ extern u16 pm44xx_errata;
 #define IS_PM44XX_ERRATUM(id)		0
 #endif
 
+#define OMAP4_VP_CONFIG_ERROROFFSET	0x00
+#define OMAP4_VP_VSTEPMIN_VSTEPMIN	0x01
+#define OMAP4_VP_VSTEPMAX_VSTEPMAX	0x04
+#define OMAP4_VP_VLIMITTO_TIMEOUT_US	200
+
 #ifdef CONFIG_POWER_AVS_OMAP
 extern int omap_devinit_smartreflex(void);
 extern void omap_enable_smartreflex_on_init(void);
@@ -134,6 +139,15 @@ static inline int omap4_twl_init(void)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_MFD_CPCAP)
+extern int omap4_cpcap_init(void);
+#else
+static inline int omap4_cpcap_init(void)
+{
+	return -EINVAL;
+}
+#endif
+
 #ifdef CONFIG_PM
 extern void omap_pm_setup_oscillator(u32 tstart, u32 tshut);
 extern void omap_pm_get_oscillator(u32 *tstart, u32 *tshut);
diff --git a/arch/arm/mach-omap2/pm44xx.c b/arch/arm/mach-omap2/pm44xx.c
index 485550af2506..5a7a949ae965 100644
--- a/arch/arm/mach-omap2/pm44xx.c
+++ b/arch/arm/mach-omap2/pm44xx.c
@@ -128,18 +128,9 @@ static int __init pwrdms_setup(struct powerdomain *pwrdm, void *unused)
 		return 0;
 	}
 
-	/*
-	 * Bootloader or kexec boot may have LOGICRETSTATE cleared
-	 * for some domains. This is the case when kexec booting from
-	 * Android kernels that support off mode for example.
-	 * Make sure it's set at least for core and per, otherwise
-	 * we currently will see lost GPIO interrupts for wlcore and
-	 * smsc911x at least if per hits retention during idle.
-	 */
 	if (!strncmp(pwrdm->name, "core", 4) ||
-	    !strncmp(pwrdm->name, "l4per", 5) ||
-	    !strncmp(pwrdm->name, "wkup", 4))
-		pwrdm_set_logic_retst(pwrdm, PWRDM_POWER_RET);
+	    !strncmp(pwrdm->name, "l4per", 5))
+		pwrdm_set_logic_retst(pwrdm, PWRDM_POWER_OFF);
 
 	pwrst = kmalloc(sizeof(struct power_state), GFP_ATOMIC);
 	if (!pwrst)
diff --git a/arch/arm/mach-omap2/pmic-cpcap.c b/arch/arm/mach-omap2/pmic-cpcap.c
new file mode 100644
index 000000000000..eab281a5fc9f
--- /dev/null
+++ b/arch/arm/mach-omap2/pmic-cpcap.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * pmic-cpcap.c - CPCAP-specific functions for the OPP code
+ *
+ * Adapted from Motorola Mapphone Android Linux kernel
+ * Copyright (C) 2011 Motorola, Inc.
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+
+#include "soc.h"
+#include "pm.h"
+#include "voltage.h"
+
+#include <linux/init.h>
+#include "vc.h"
+
+/**
+ * omap_cpcap_vsel_to_vdc - convert CPCAP VSEL value to microvolts DC
+ * @vsel: CPCAP VSEL value to convert
+ *
+ * Returns the microvolts DC that the CPCAP PMIC should generate when
+ * programmed with @vsel.
+ */
+static unsigned long omap_cpcap_vsel_to_uv(unsigned char vsel)
+{
+	if (vsel > 0x44)
+		vsel = 0x44;
+	return (((vsel * 125) + 6000)) * 100;
+}
+
+/**
+ * omap_cpcap_uv_to_vsel - convert microvolts DC to CPCAP VSEL value
+ * @uv: microvolts DC to convert
+ *
+ * Returns the VSEL value necessary for the CPCAP PMIC to
+ * generate an output voltage equal to or greater than @uv microvolts DC.
+ */
+static unsigned char omap_cpcap_uv_to_vsel(unsigned long uv)
+{
+	if (uv < 600000)
+		uv = 600000;
+	else if (uv > 1450000)
+		uv = 1450000;
+	return DIV_ROUND_UP(uv - 600000, 12500);
+}
+
+static struct omap_voltdm_pmic omap_cpcap_core = {
+	.slew_rate = 4000,
+	.step_size = 12500,
+	.vp_erroroffset = OMAP4_VP_CONFIG_ERROROFFSET,
+	.vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN,
+	.vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX,
+	.vddmin = 900000,
+	.vddmax = 1350000,
+	.vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US,
+	.i2c_slave_addr = 0x02,
+	.volt_reg_addr = 0x00,
+	.cmd_reg_addr = 0x01,
+	.i2c_high_speed = false,
+	.vsel_to_uv = omap_cpcap_vsel_to_uv,
+	.uv_to_vsel = omap_cpcap_uv_to_vsel,
+};
+
+static struct omap_voltdm_pmic omap_cpcap_iva = {
+	.slew_rate = 4000,
+	.step_size = 12500,
+	.vp_erroroffset = OMAP4_VP_CONFIG_ERROROFFSET,
+	.vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN,
+	.vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX,
+	.vddmin = 900000,
+	.vddmax = 1350000,
+	.vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US,
+	.i2c_slave_addr = 0x44,
+	.volt_reg_addr = 0x0,
+	.cmd_reg_addr = 0x01,
+	.i2c_high_speed = false,
+	.vsel_to_uv = omap_cpcap_vsel_to_uv,
+	.uv_to_vsel = omap_cpcap_uv_to_vsel,
+};
+
+/**
+ * omap_max8952_vsel_to_vdc - convert MAX8952 VSEL value to microvolts DC
+ * @vsel: MAX8952 VSEL value to convert
+ *
+ * Returns the microvolts DC that the MAX8952 Regulator should generate when
+ * programmed with @vsel.
+ */
+static unsigned long omap_max8952_vsel_to_uv(unsigned char vsel)
+{
+	if (vsel > 0x3F)
+		vsel = 0x3F;
+	return (((vsel * 100) + 7700)) * 100;
+}
+
+/**
+ * omap_max8952_uv_to_vsel - convert microvolts DC to MAX8952 VSEL value
+ * @uv: microvolts DC to convert
+ *
+ * Returns the VSEL value necessary for the MAX8952 Regulator to
+ * generate an output voltage equal to or greater than @uv microvolts DC.
+ */
+static unsigned char omap_max8952_uv_to_vsel(unsigned long uv)
+{
+	if (uv < 770000)
+		uv = 770000;
+	else if (uv > 1400000)
+		uv = 1400000;
+	return DIV_ROUND_UP(uv - 770000, 10000);
+}
+
+static struct omap_voltdm_pmic omap443x_max8952_mpu = {
+	.slew_rate = 16000,
+	.step_size = 10000,
+	.vp_erroroffset = OMAP4_VP_CONFIG_ERROROFFSET,
+	.vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN,
+	.vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX,
+	.vddmin = 900000,
+	.vddmax = 1400000,
+	.vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US,
+	.i2c_slave_addr = 0x60,
+	.volt_reg_addr = 0x03,
+	.cmd_reg_addr = 0x03,
+	.i2c_high_speed = false,
+	.vsel_to_uv = omap_max8952_vsel_to_uv,
+	.uv_to_vsel = omap_max8952_uv_to_vsel,
+};
+
+/**
+ * omap_fan5355_vsel_to_vdc - convert FAN535503 VSEL value to microvolts DC
+ * @vsel: FAN535503 VSEL value to convert
+ *
+ * Returns the microvolts DC that the FAN535503 Regulator should generate when
+ * programmed with @vsel.
+ */
+static unsigned long omap_fan535503_vsel_to_uv(unsigned char vsel)
+{
+	/* Extract bits[5:0] */
+	vsel &= 0x3F;
+
+	return (((vsel * 125) + 7500)) * 100;
+}
+
+/**
+ * omap_fan535508_vsel_to_vdc - convert FAN535508 VSEL value to microvolts DC
+ * @vsel: FAN535508 VSEL value to convert
+ *
+ * Returns the microvolts DC that the FAN535508 Regulator should generate when
+ * programmed with @vsel.
+ */
+static unsigned long omap_fan535508_vsel_to_uv(unsigned char vsel)
+{
+	/* Extract bits[5:0] */
+	vsel &= 0x3F;
+
+	if (vsel > 0x37)
+		vsel = 0x37;
+	return (((vsel * 125) + 7500)) * 100;
+}
+
+
+/**
+ * omap_fan535503_uv_to_vsel - convert microvolts DC to FAN535503 VSEL value
+ * @uv: microvolts DC to convert
+ *
+ * Returns the VSEL value necessary for the MAX8952 Regulator to
+ * generate an output voltage equal to or greater than @uv microvolts DC.
+ */
+static unsigned char omap_fan535503_uv_to_vsel(unsigned long uv)
+{
+	unsigned char vsel;
+	if (uv < 750000)
+		uv = 750000;
+	else if (uv > 1537500)
+		uv = 1537500;
+
+	vsel = DIV_ROUND_UP(uv - 750000, 12500);
+	return vsel | 0xC0;
+}
+
+/**
+ * omap_fan535508_uv_to_vsel - convert microvolts DC to FAN535508 VSEL value
+ * @uv: microvolts DC to convert
+ *
+ * Returns the VSEL value necessary for the MAX8952 Regulator to
+ * generate an output voltage equal to or greater than @uv microvolts DC.
+ */
+static unsigned char omap_fan535508_uv_to_vsel(unsigned long uv)
+{
+	unsigned char vsel;
+	if (uv < 750000)
+		uv = 750000;
+	else if (uv > 1437500)
+		uv = 1437500;
+
+	vsel = DIV_ROUND_UP(uv - 750000, 12500);
+	return vsel | 0xC0;
+}
+
+/* fan5335-core */
+static struct omap_voltdm_pmic omap4_fan_core = {
+	.slew_rate = 4000,
+	.step_size = 12500,
+	.vp_erroroffset = OMAP4_VP_CONFIG_ERROROFFSET,
+	.vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN,
+	.vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX,
+	.vddmin = 850000,
+	.vddmax = 1375000,
+	.vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US,
+	.i2c_slave_addr = 0x4A,
+	.i2c_high_speed = false,
+	.volt_reg_addr = 0x01,
+	.cmd_reg_addr = 0x01,
+	.vsel_to_uv = omap_fan535508_vsel_to_uv,
+	.uv_to_vsel = omap_fan535508_uv_to_vsel,
+};
+
+/* fan5335 iva */
+static struct omap_voltdm_pmic omap4_fan_iva = {
+	.slew_rate = 4000,
+	.step_size = 12500,
+	.vp_erroroffset = OMAP4_VP_CONFIG_ERROROFFSET,
+	.vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN,
+	.vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX,
+	.vddmin = 850000,
+	.vddmax = 1375000,
+	.vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US,
+	.i2c_slave_addr = 0x48,
+	.volt_reg_addr = 0x01,
+	.cmd_reg_addr = 0x01,
+	.i2c_high_speed = false,
+	.vsel_to_uv = omap_fan535503_vsel_to_uv,
+	.uv_to_vsel = omap_fan535503_uv_to_vsel,
+};
+
+int __init omap4_cpcap_init(void)
+{
+	struct voltagedomain *voltdm;
+
+	if (!of_find_compatible_node(NULL, NULL, "motorola,cpcap"))
+		return -ENODEV;
+
+	voltdm = voltdm_lookup("mpu");
+	omap_voltage_register_pmic(voltdm, &omap443x_max8952_mpu);
+
+	if (of_machine_is_compatible("motorola,droid-bionic")) {
+		voltdm = voltdm_lookup("mpu");
+		omap_voltage_register_pmic(voltdm, &omap_cpcap_core);
+
+		voltdm = voltdm_lookup("mpu");
+		omap_voltage_register_pmic(voltdm, &omap_cpcap_iva);
+	} else {
+		voltdm = voltdm_lookup("core");
+		omap_voltage_register_pmic(voltdm, &omap4_fan_core);
+
+		voltdm = voltdm_lookup("iva");
+		omap_voltage_register_pmic(voltdm, &omap4_fan_iva);
+	}
+
+	return 0;
+}
+
+static int __init cpcap_late_init(void)
+{
+	omap4_vc_set_pmic_signaling(PWRDM_POWER_RET);
+
+	return 0;
+}
+omap_late_initcall(cpcap_late_init);
diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c
index 1d9346f2a4ae..25093c1e5b9a 100644
--- a/arch/arm/mach-omap2/prm44xx.c
+++ b/arch/arm/mach-omap2/prm44xx.c
@@ -745,7 +745,7 @@ struct pwrdm_ops omap4_pwrdm_operations = {
 
 static int omap44xx_prm_late_init(void);
 
-void prm_save_context(void)
+static void prm_save_context(void)
 {
 	omap_prm_context.irq_enable =
 			omap4_prm_read_inst_reg(AM43XX_PRM_OCP_SOCKET_INST,
@@ -756,7 +756,7 @@ void prm_save_context(void)
 						omap4_prcm_irq_setup.pm_ctrl);
 }
 
-void prm_restore_context(void)
+static void prm_restore_context(void)
 {
 	omap4_prm_write_inst_reg(omap_prm_context.irq_enable,
 				 OMAP4430_PRM_OCP_SOCKET_INST,
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 07bea84c5d6e..0d0a731cb476 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -545,7 +545,7 @@ static void __init __omap_sync32k_timer_init(int clkev_nr, const char *clkev_src
 	omap2_gp_clockevent_init(clkev_nr, clkev_src, clkev_prop);
 
 	/* Enable the use of clocksource="gp_timer" kernel parameter */
-	if (use_gptimer_clksrc || gptimer)
+	if (clksrc_nr && (use_gptimer_clksrc || gptimer))
 		omap2_gptimer_clocksource_init(clksrc_nr, clksrc_src,
 						clksrc_prop);
 	else
@@ -586,7 +586,7 @@ void __init omap3_gptimer_timer_init(void)
 static void __init omap4_sync32k_timer_init(void)
 {
 	__omap_sync32k_timer_init(1, "timer_32k_ck", "ti,timer-alwon",
-			2, "sys_clkin_ck", NULL, false);
+				  0, NULL, NULL, false);
 }
 
 void __init omap4_local_timer_init(void)
diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c
index d76b1e5eb8ba..86f1ac4c2412 100644
--- a/arch/arm/mach-omap2/vc.c
+++ b/arch/arm/mach-omap2/vc.c
@@ -26,6 +26,31 @@
 #include "scrm44xx.h"
 #include "control.h"
 
+#define OMAP4430_VDD_IVA_I2C_DISABLE		BIT(14)
+#define OMAP4430_VDD_MPU_I2C_DISABLE		BIT(13)
+#define OMAP4430_VDD_CORE_I2C_DISABLE		BIT(12)
+#define OMAP4430_VDD_IVA_PRESENCE		BIT(9)
+#define OMAP4430_VDD_MPU_PRESENCE		BIT(8)
+#define OMAP4430_AUTO_CTRL_VDD_IVA(x)		((x) << 4)
+#define OMAP4430_AUTO_CTRL_VDD_MPU(x)		((x) << 2)
+#define OMAP4430_AUTO_CTRL_VDD_CORE(x)		((x) << 0)
+#define OMAP4430_AUTO_CTRL_VDD_RET		2
+
+#define OMAP4430_VDD_I2C_DISABLE_MASK	\
+	(OMAP4430_VDD_IVA_I2C_DISABLE | \
+	 OMAP4430_VDD_MPU_I2C_DISABLE | \
+	 OMAP4430_VDD_CORE_I2C_DISABLE)
+
+#define OMAP4_VDD_DEFAULT_VAL	\
+	(OMAP4430_VDD_I2C_DISABLE_MASK | \
+	 OMAP4430_VDD_IVA_PRESENCE | OMAP4430_VDD_MPU_PRESENCE | \
+	 OMAP4430_AUTO_CTRL_VDD_IVA(OMAP4430_AUTO_CTRL_VDD_RET) | \
+	 OMAP4430_AUTO_CTRL_VDD_MPU(OMAP4430_AUTO_CTRL_VDD_RET) | \
+	 OMAP4430_AUTO_CTRL_VDD_CORE(OMAP4430_AUTO_CTRL_VDD_RET))
+
+#define OMAP4_VDD_RET_VAL	\
+	(OMAP4_VDD_DEFAULT_VAL & ~OMAP4430_VDD_I2C_DISABLE_MASK)
+
 /**
  * struct omap_vc_channel_cfg - describe the cfg_channel bitfield
  * @sa: bit for slave address
@@ -280,6 +305,26 @@ void omap3_vc_set_pmic_signaling(int core_next_state)
 	}
 }
 
+void omap4_vc_set_pmic_signaling(int core_next_state)
+{
+	struct voltagedomain *vd = vc.vd;
+	u32 val;
+
+	if (!vd)
+		return;
+
+	switch (core_next_state) {
+	case PWRDM_POWER_RET:
+		val = OMAP4_VDD_RET_VAL;
+		break;
+	default:
+		val = OMAP4_VDD_DEFAULT_VAL;
+		break;
+	}
+
+	vd->write(val, OMAP4_PRM_VOLTCTRL_OFFSET);
+}
+
 /*
  * Configure signal polarity for sys_clkreq and sys_off_mode pins
  * as the default values are wrong and can cause the system to hang
@@ -542,9 +587,19 @@ static void omap4_set_timings(struct voltagedomain *voltdm, bool off_mode)
 	writel_relaxed(val, OMAP4_SCRM_CLKSETUPTIME);
 }
 
+static void __init omap4_vc_init_pmic_signaling(struct voltagedomain *voltdm)
+{
+	if (vc.vd)
+		return;
+
+	vc.vd = voltdm;
+	voltdm->write(OMAP4_VDD_DEFAULT_VAL, OMAP4_PRM_VOLTCTRL_OFFSET);
+}
+
 /* OMAP4 specific voltage init functions */
 static void __init omap4_vc_init_channel(struct voltagedomain *voltdm)
 {
+	omap4_vc_init_pmic_signaling(voltdm);
 	omap4_set_timings(voltdm, true);
 	omap4_set_timings(voltdm, false);
 }
@@ -615,7 +670,7 @@ static void __init omap4_vc_i2c_timing_init(struct voltagedomain *voltdm)
 	const struct i2c_init_data *i2c_data;
 
 	if (!voltdm->pmic->i2c_high_speed) {
-		pr_warn("%s: only high speed supported!\n", __func__);
+		pr_info("%s: using bootloader low-speed timings\n", __func__);
 		return;
 	}
 
diff --git a/arch/arm/mach-omap2/vc.h b/arch/arm/mach-omap2/vc.h
index 5bf088633b62..9e861dbc2c4c 100644
--- a/arch/arm/mach-omap2/vc.h
+++ b/arch/arm/mach-omap2/vc.h
@@ -117,7 +117,7 @@ extern struct omap_vc_param omap4_iva_vc_data;
 extern struct omap_vc_param omap4_core_vc_data;
 
 void omap3_vc_set_pmic_signaling(int core_next_state);
-
+void omap4_vc_set_pmic_signaling(int core_next_state);
 
 void omap_vc_init_channel(struct voltagedomain *voltdm);
 int omap_vc_pre_scale(struct voltagedomain *voltdm,
diff --git a/arch/arm/mach-pxa/colibri-pxa320.c b/arch/arm/mach-pxa/colibri-pxa320.c
index eba917d69c0a..35dd3adb7712 100644
--- a/arch/arm/mach-pxa/colibri-pxa320.c
+++ b/arch/arm/mach-pxa/colibri-pxa320.c
@@ -11,9 +11,9 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include <linux/gpio/machine.h>
 #include <linux/gpio.h>
 #include <linux/interrupt.h>
-#include <linux/usb/gpio_vbus.h>
 
 #include <asm/mach-types.h>
 #include <linux/sizes.h>
@@ -144,17 +144,18 @@ static inline void __init colibri_pxa320_init_eth(void) {}
 #endif /* CONFIG_AX88796 */
 
 #if defined(CONFIG_USB_PXA27X)||defined(CONFIG_USB_PXA27X_MODULE)
-static struct gpio_vbus_mach_info colibri_pxa320_gpio_vbus_info = {
-	.gpio_vbus		= mfp_to_gpio(MFP_PIN_GPIO96),
-	.gpio_pullup		= -1,
+static struct gpiod_lookup_table gpio_vbus_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", MFP_PIN_GPIO96,
+			    "vbus", GPIO_ACTIVE_HIGH),
+		{ },
+	},
 };
 
 static struct platform_device colibri_pxa320_gpio_vbus = {
 	.name	= "gpio-vbus",
 	.id	= -1,
-	.dev	= {
-		.platform_data	= &colibri_pxa320_gpio_vbus_info,
-	},
 };
 
 static void colibri_pxa320_udc_command(int cmd)
@@ -173,6 +174,7 @@ static struct pxa2xx_udc_mach_info colibri_pxa320_udc_info __initdata = {
 static void __init colibri_pxa320_init_udc(void)
 {
 	pxa_set_udc_info(&colibri_pxa320_udc_info);
+	gpiod_add_lookup_table(&gpio_vbus_gpiod_table);
 	platform_device_register(&colibri_pxa320_gpio_vbus);
 }
 #else
diff --git a/arch/arm/mach-pxa/eseries.c b/arch/arm/mach-pxa/eseries.c
index 91f7c3e40065..f37c44b6139d 100644
--- a/arch/arm/mach-pxa/eseries.c
+++ b/arch/arm/mach-pxa/eseries.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/clk-provider.h>
+#include <linux/gpio/machine.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
@@ -22,7 +23,6 @@
 #include <linux/mfd/t7l66xb.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
-#include <linux/usb/gpio_vbus.h>
 #include <linux/memblock.h>
 
 #include <video/w100fb.h>
@@ -51,18 +51,20 @@ void __init eseries_fixup(struct tag *tags, char **cmdline)
 		memblock_add(0xa0000000, SZ_64M);
 }
 
-struct gpio_vbus_mach_info e7xx_udc_info = {
-	.gpio_vbus   = GPIO_E7XX_USB_DISC,
-	.gpio_pullup = GPIO_E7XX_USB_PULLUP,
-	.gpio_pullup_inverted = 1
+static struct gpiod_lookup_table e7xx_gpio_vbus_gpiod_table __maybe_unused = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", GPIO_E7XX_USB_DISC,
+			    "vbus", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio-pxa", GPIO_E7XX_USB_PULLUP,
+			    "pullup", GPIO_ACTIVE_LOW),
+		{ },
+	},
 };
 
 static struct platform_device e7xx_gpio_vbus __maybe_unused = {
 	.name	= "gpio-vbus",
 	.id	= -1,
-	.dev	= {
-		.platform_data	= &e7xx_udc_info,
-	},
 };
 
 struct pxaficp_platform_data e7xx_ficp_platform_data = {
@@ -165,6 +167,7 @@ static void __init e330_init(void)
 	pxa_set_stuart_info(NULL);
 	eseries_register_clks();
 	eseries_get_tmio_gpios();
+	gpiod_add_lookup_table(&e7xx_gpio_vbus_gpiod_table);
 	platform_add_devices(ARRAY_AND_SIZE(e330_devices));
 }
 
@@ -216,6 +219,7 @@ static void __init e350_init(void)
 	pxa_set_stuart_info(NULL);
 	eseries_register_clks();
 	eseries_get_tmio_gpios();
+	gpiod_add_lookup_table(&e7xx_gpio_vbus_gpiod_table);
 	platform_add_devices(ARRAY_AND_SIZE(e350_devices));
 }
 
@@ -340,6 +344,7 @@ static void __init e400_init(void)
 	eseries_register_clks();
 	eseries_get_tmio_gpios();
 	pxa_set_fb_info(NULL, &e400_pxafb_mach_info);
+	gpiod_add_lookup_table(&e7xx_gpio_vbus_gpiod_table);
 	platform_add_devices(ARRAY_AND_SIZE(e400_devices));
 }
 
@@ -534,6 +539,7 @@ static void __init e740_init(void)
 	clk_add_alias("CLK_CK48M", e740_t7l66xb_device.name,
 			"UDCCLK", &pxa25x_device_udc.dev),
 	eseries_get_tmio_gpios();
+	gpiod_add_lookup_table(&e7xx_gpio_vbus_gpiod_table);
 	platform_add_devices(ARRAY_AND_SIZE(e740_devices));
 	pxa_set_ac97_info(NULL);
 	pxa_set_ficp_info(&e7xx_ficp_platform_data);
@@ -733,6 +739,7 @@ static void __init e750_init(void)
 	clk_add_alias("CLK_CK3P6MI", e750_tc6393xb_device.name,
 			"GPIO11_CLK", NULL),
 	eseries_get_tmio_gpios();
+	gpiod_add_lookup_table(&e7xx_gpio_vbus_gpiod_table);
 	platform_add_devices(ARRAY_AND_SIZE(e750_devices));
 	pxa_set_ac97_info(NULL);
 	pxa_set_ficp_info(&e7xx_ficp_platform_data);
@@ -888,18 +895,20 @@ static struct platform_device e800_fb_device = {
 
 /* --------------------------- UDC definitions --------------------------- */
 
-static struct gpio_vbus_mach_info e800_udc_info = {
-	.gpio_vbus   = GPIO_E800_USB_DISC,
-	.gpio_pullup = GPIO_E800_USB_PULLUP,
-	.gpio_pullup_inverted = 1
+static struct gpiod_lookup_table e800_gpio_vbus_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", GPIO_E800_USB_DISC,
+			    "vbus", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio-pxa", GPIO_E800_USB_PULLUP,
+			    "pullup", GPIO_ACTIVE_LOW),
+		{ },
+	},
 };
 
 static struct platform_device e800_gpio_vbus = {
 	.name	= "gpio-vbus",
 	.id	= -1,
-	.dev	= {
-		.platform_data	= &e800_udc_info,
-	},
 };
 
 
@@ -949,6 +958,7 @@ static void __init e800_init(void)
 	clk_add_alias("CLK_CK3P6MI", e800_tc6393xb_device.name,
 			"GPIO11_CLK", NULL),
 	eseries_get_tmio_gpios();
+	gpiod_add_lookup_table(&e800_gpio_vbus_gpiod_table);
 	platform_add_devices(ARRAY_AND_SIZE(e800_devices));
 	pxa_set_ac97_info(NULL);
 }
diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c
index 4b4589cf431f..49dd618b10f7 100644
--- a/arch/arm/mach-pxa/gumstix.c
+++ b/arch/arm/mach-pxa/gumstix.c
@@ -20,10 +20,10 @@
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
+#include <linux/gpio/machine.h>
 #include <linux/gpio.h>
 #include <linux/err.h>
 #include <linux/clk.h>
-#include <linux/usb/gpio_vbus.h>
 
 #include <asm/setup.h>
 #include <asm/memory.h>
@@ -101,21 +101,25 @@ static void __init gumstix_mmc_init(void)
 #endif
 
 #ifdef CONFIG_USB_PXA25X
-static struct gpio_vbus_mach_info gumstix_udc_info = {
-	.gpio_vbus		= GPIO_GUMSTIX_USB_GPIOn,
-	.gpio_pullup		= GPIO_GUMSTIX_USB_GPIOx,
+static struct gpiod_lookup_table gumstix_gpio_vbus_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", GPIO_GUMSTIX_USB_GPIOn,
+			    "vbus", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio-pxa", GPIO_GUMSTIX_USB_GPIOx,
+			    "pullup", GPIO_ACTIVE_HIGH),
+		{ },
+	},
 };
 
 static struct platform_device gumstix_gpio_vbus = {
 	.name	= "gpio-vbus",
 	.id	= -1,
-	.dev	= {
-		.platform_data	= &gumstix_udc_info,
-	},
 };
 
 static void __init gumstix_udc_init(void)
 {
+	gpiod_add_lookup_table(&gumstix_gpio_vbus_gpiod_table);
 	platform_device_register(&gumstix_gpio_vbus);
 }
 #else
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 311268d186ab..238a751a8797 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -34,7 +34,6 @@
 #include <linux/spi/ads7846.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/pxa2xx_spi.h>
-#include <linux/usb/gpio_vbus.h>
 #include <linux/platform_data/i2c-pxa.h>
 
 #include <mach/hardware.h>
@@ -578,18 +577,24 @@ static struct pwm_lookup hx4700_pwm_lookup[] = {
  * USB "Transceiver"
  */
 
-static struct gpio_vbus_mach_info gpio_vbus_info = {
-	.gpio_pullup        = GPIO76_HX4700_USBC_PUEN,
-	.gpio_vbus          = GPIOD14_nUSBC_DETECT,
-	.gpio_vbus_inverted = 1,
+static struct gpiod_lookup_table gpio_vbus_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		/* This GPIO is on ASIC3 */
+		GPIO_LOOKUP("asic3",
+			    /* Convert to a local offset on the ASIC3 */
+			    GPIOD14_nUSBC_DETECT - HX4700_ASIC3_GPIO_BASE,
+			    "vbus", GPIO_ACTIVE_LOW),
+		/* This one is on the primary SOC GPIO */
+		GPIO_LOOKUP("gpio-pxa", GPIO76_HX4700_USBC_PUEN,
+			    "pullup", GPIO_ACTIVE_HIGH),
+		{ },
+	},
 };
 
 static struct platform_device gpio_vbus = {
 	.name          = "gpio-vbus",
 	.id            = -1,
-	.dev = {
-		.platform_data = &gpio_vbus_info,
-	},
 };
 
 static struct pxa2xx_udc_mach_info hx4700_udc_info;
@@ -883,6 +888,7 @@ static void __init hx4700_init(void)
 	pxa_set_stuart_info(NULL);
 
 	gpiod_add_lookup_table(&bq24022_gpiod_table);
+	gpiod_add_lookup_table(&gpio_vbus_gpiod_table);
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 	pwm_add_table(hx4700_pwm_lookup, ARRAY_SIZE(hx4700_pwm_lookup));
 
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 865b10344ea2..151e26ec0696 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -12,6 +12,7 @@
 
 #include <linux/irq.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/gpio.h>
 
 #include <asm/mach-types.h>
@@ -22,7 +23,6 @@
 
 #include <linux/spi/spi.h>
 #include <linux/spi/pxa2xx_spi.h>
-#include <linux/can/platform/mcp251x.h>
 #include <linux/regulator/machine.h>
 
 #include "generic.h"
@@ -69,8 +69,9 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = {
 	.gpio_cs        = ICONTROL_MCP251x_nCS4
 };
 
-static struct mcp251x_platform_data mcp251x_info = {
-	.oscillator_frequency = 16E6,
+static const struct property_entry mcp251x_properties[] = {
+	PROPERTY_ENTRY_U32("clock-frequency", 16000000),
+	{}
 };
 
 static struct spi_board_info mcp251x_board_info[] = {
@@ -79,7 +80,7 @@ static struct spi_board_info mcp251x_board_info[] = {
 		.max_speed_hz    = 6500000,
 		.bus_num         = 3,
 		.chip_select     = 0,
-		.platform_data   = &mcp251x_info,
+		.properties      = mcp251x_properties,
 		.controller_data = &mcp251x_chip_info1,
 		.irq             = PXA_GPIO_TO_IRQ(ICONTROL_MCP251x_nIRQ1)
 	},
@@ -88,7 +89,7 @@ static struct spi_board_info mcp251x_board_info[] = {
 		.max_speed_hz    = 6500000,
 		.bus_num         = 3,
 		.chip_select     = 1,
-		.platform_data   = &mcp251x_info,
+		.properties      = mcp251x_properties,
 		.controller_data = &mcp251x_chip_info2,
 		.irq             = PXA_GPIO_TO_IRQ(ICONTROL_MCP251x_nIRQ2)
 	},
@@ -97,7 +98,7 @@ static struct spi_board_info mcp251x_board_info[] = {
 		.max_speed_hz    = 6500000,
 		.bus_num         = 4,
 		.chip_select     = 0,
-		.platform_data   = &mcp251x_info,
+		.properties      = mcp251x_properties,
 		.controller_data = &mcp251x_chip_info3,
 		.irq             = PXA_GPIO_TO_IRQ(ICONTROL_MCP251x_nIRQ3)
 	},
@@ -106,7 +107,7 @@ static struct spi_board_info mcp251x_board_info[] = {
 		.max_speed_hz    = 6500000,
 		.bus_num         = 4,
 		.chip_select     = 1,
-		.platform_data   = &mcp251x_info,
+		.properties      = mcp251x_properties,
 		.controller_data = &mcp251x_chip_info4,
 		.irq             = PXA_GPIO_TO_IRQ(ICONTROL_MCP251x_nIRQ4)
 	}
diff --git a/arch/arm/mach-pxa/include/mach/tosa.h b/arch/arm/mach-pxa/include/mach/tosa.h
index a499ed17931e..8bfaca3a8b64 100644
--- a/arch/arm/mach-pxa/include/mach/tosa.h
+++ b/arch/arm/mach-pxa/include/mach/tosa.h
@@ -73,18 +73,6 @@
 #define TOSA_GPIO_BAT1_TH_ON		(TOSA_TC6393XB_GPIO_BASE + 15)
 
 /*
- * Timing Generator
- */
-#define TG_PNLCTL 			0x00
-#define TG_TPOSCTL 			0x01
-#define TG_DUTYCTL 			0x02
-#define TG_GPOSR 			0x03
-#define TG_GPODR1 			0x04
-#define TG_GPODR2 			0x05
-#define TG_PINICTL 			0x06
-#define TG_HPOSCTL 			0x07
-
-/*
  * PXA GPIOs
  */
 #define TOSA_GPIO_POWERON		(0)
@@ -192,7 +180,4 @@
 #define TOSA_KEY_MAIL		KEY_MAIL
 #endif
 
-struct spi_device;
-extern int tosa_bl_enable(struct spi_device *spi, int enable);
-
 #endif /* _ASM_ARCH_TOSA_H_ */
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index e1a394ac3eea..5d0591f93f4d 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -27,7 +27,6 @@
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/gpio-regulator.h>
 #include <linux/regulator/machine.h>
-#include <linux/usb/gpio_vbus.h>
 #include <linux/platform_data/i2c-pxa.h>
 
 #include <mach/hardware.h>
@@ -506,9 +505,20 @@ static struct resource gpio_vbus_resource = {
 	.end	= IRQ_MAGICIAN_VBUS,
 };
 
-static struct gpio_vbus_mach_info gpio_vbus_info = {
-	.gpio_pullup	= GPIO27_MAGICIAN_USBC_PUEN,
-	.gpio_vbus	= EGPIO_MAGICIAN_CABLE_VBUS,
+static struct gpiod_lookup_table gpio_vbus_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		/*
+		 * EGPIO on register 4 index 1, the second EGPIO chip
+		 * starts at register 4 so this will be at index 1 on that
+		 * chip.
+		 */
+		GPIO_LOOKUP("htc-egpio-1", 1,
+			    "vbus", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio-pxa", GPIO27_MAGICIAN_USBC_PUEN,
+			    "pullup", GPIO_ACTIVE_HIGH),
+		{ },
+	},
 };
 
 static struct platform_device gpio_vbus = {
@@ -516,9 +526,6 @@ static struct platform_device gpio_vbus = {
 	.id		= -1,
 	.num_resources	= 1,
 	.resource	= &gpio_vbus_resource,
-	.dev = {
-		.platform_data = &gpio_vbus_info,
-	},
 };
 
 /*
@@ -1008,7 +1015,7 @@ static void __init magician_init(void)
 	pxa_set_udc_info(&magician_udc_info);
 
 	/* Check LCD type we have */
-	cpld = ioremap_nocache(PXA_CS3_PHYS, 0x1000);
+	cpld = ioremap(PXA_CS3_PHYS, 0x1000);
 	if (cpld) {
 		u8 board_id = __raw_readb(cpld + 0x14);
 
@@ -1032,6 +1039,7 @@ static void __init magician_init(void)
 		ARRAY_SIZE(pwm_backlight_supply), 5000000);
 
 	gpiod_add_lookup_table(&bq24022_gpiod_table);
+	gpiod_add_lookup_table(&gpio_vbus_gpiod_table);
 	platform_add_devices(ARRAY_AND_SIZE(devices));
 }
 
diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c
index c360023a989c..0b8bae9610f1 100644
--- a/arch/arm/mach-pxa/mioa701.c
+++ b/arch/arm/mach-pxa/mioa701.c
@@ -24,7 +24,6 @@
 #include <linux/power_supply.h>
 #include <linux/wm97xx.h>
 #include <linux/mtd/physmap.h>
-#include <linux/usb/gpio_vbus.h>
 #include <linux/reboot.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/max1586.h>
@@ -368,10 +367,13 @@ static struct pxa2xx_udc_mach_info mioa701_udc_info = {
 	.gpio_pullup	  = GPIO22_USB_ENABLE,
 };
 
-struct gpio_vbus_mach_info gpio_vbus_data = {
-	.gpio_vbus = GPIO13_nUSB_DETECT,
-	.gpio_vbus_inverted = 1,
-	.gpio_pullup = -1,
+static struct gpiod_lookup_table gpio_vbus_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", GPIO13_nUSB_DETECT,
+			    "vbus", GPIO_ACTIVE_LOW),
+		{ },
+	},
 };
 
 /*
@@ -677,7 +679,7 @@ MIO_SIMPLE_DEV(mioa701_led,	  "leds-gpio",	    &gpio_led_info)
 MIO_SIMPLE_DEV(pxa2xx_pcm,	  "pxa2xx-pcm",	    NULL)
 MIO_SIMPLE_DEV(mioa701_sound,	  "mioa701-wm9713", NULL)
 MIO_SIMPLE_DEV(mioa701_board,	  "mioa701-board",  NULL)
-MIO_SIMPLE_DEV(gpio_vbus,	  "gpio-vbus",      &gpio_vbus_data);
+MIO_SIMPLE_DEV(gpio_vbus,	  "gpio-vbus",      NULL);
 
 static struct platform_device *devices[] __initdata = {
 	&mioa701_gpio_keys,
@@ -750,6 +752,7 @@ static void __init mioa701_machine_init(void)
 	pxa_set_ac97_info(&mioa701_ac97_info);
 	pm_power_off = mioa701_poweroff;
 	pwm_add_table(mioa701_pwm_lookup, ARRAY_SIZE(mioa701_pwm_lookup));
+	gpiod_add_lookup_table(&gpio_vbus_gpiod_table);
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 	gsm_init();
 
diff --git a/arch/arm/mach-pxa/palm27x.c b/arch/arm/mach-pxa/palm27x.c
index 3ad0b3915ae1..b600b63af3a6 100644
--- a/arch/arm/mach-pxa/palm27x.c
+++ b/arch/arm/mach-pxa/palm27x.c
@@ -13,10 +13,10 @@
 #include <linux/pda_power.h>
 #include <linux/pwm.h>
 #include <linux/pwm_backlight.h>
+#include <linux/gpio/machine.h>
 #include <linux/gpio.h>
 #include <linux/wm97xx.h>
 #include <linux/power_supply.h>
-#include <linux/usb/gpio_vbus.h>
 #include <linux/regulator/max1586.h>
 #include <linux/platform_data/i2c-pxa.h>
 
@@ -159,32 +159,32 @@ void __init palm27x_lcd_init(int power, struct pxafb_mode_info *mode)
  ******************************************************************************/
 #if	defined(CONFIG_USB_PXA27X) || \
 	defined(CONFIG_USB_PXA27X_MODULE)
-static struct gpio_vbus_mach_info palm27x_udc_info = {
-	.gpio_vbus_inverted	= 1,
+
+/* The actual GPIO offsets get filled in in the palm27x_udc_init() call */
+static struct gpiod_lookup_table palm27x_udc_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", 0,
+			    "vbus", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio-pxa", 0,
+			    "pullup", GPIO_ACTIVE_HIGH),
+		{ },
+	},
 };
 
 static struct platform_device palm27x_gpio_vbus = {
 	.name	= "gpio-vbus",
 	.id	= -1,
-	.dev	= {
-		.platform_data	= &palm27x_udc_info,
-	},
 };
 
 void __init palm27x_udc_init(int vbus, int pullup, int vbus_inverted)
 {
-	palm27x_udc_info.gpio_vbus	= vbus;
-	palm27x_udc_info.gpio_pullup	= pullup;
-
-	palm27x_udc_info.gpio_vbus_inverted = vbus_inverted;
-
-	if (!gpio_request(pullup, "USB Pullup")) {
-		gpio_direction_output(pullup,
-			palm27x_udc_info.gpio_vbus_inverted);
-		gpio_free(pullup);
-	} else
-		return;
+	palm27x_udc_gpiod_table.table[0].chip_hwnum = vbus;
+	palm27x_udc_gpiod_table.table[1].chip_hwnum = pullup;
+	if (vbus_inverted)
+		palm27x_udc_gpiod_table.table[0].flags = GPIO_ACTIVE_LOW;
 
+	gpiod_add_lookup_table(&palm27x_udc_gpiod_table);
 	platform_device_register(&palm27x_gpio_vbus);
 }
 #endif
diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c
index 902403367786..7c7cbb4e677e 100644
--- a/arch/arm/mach-pxa/palmt5.c
+++ b/arch/arm/mach-pxa/palmt5.c
@@ -23,7 +23,6 @@
 #include <linux/gpio.h>
 #include <linux/wm97xx.h>
 #include <linux/power_supply.h>
-#include <linux/usb/gpio_vbus.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-pxa/palmtc.c b/arch/arm/mach-pxa/palmtc.c
index f52bd155e825..fda9deaaae02 100644
--- a/arch/arm/mach-pxa/palmtc.c
+++ b/arch/arm/mach-pxa/palmtc.c
@@ -23,7 +23,6 @@
 #include <linux/power_supply.h>
 #include <linux/gpio_keys.h>
 #include <linux/mtd/physmap.h>
-#include <linux/usb/gpio_vbus.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -319,22 +318,25 @@ static inline void palmtc_mkp_init(void) {}
  * UDC
  ******************************************************************************/
 #if defined(CONFIG_USB_PXA25X)||defined(CONFIG_USB_PXA25X_MODULE)
-static struct gpio_vbus_mach_info palmtc_udc_info = {
-	.gpio_vbus		= GPIO_NR_PALMTC_USB_DETECT_N,
-	.gpio_vbus_inverted	= 1,
-	.gpio_pullup		= GPIO_NR_PALMTC_USB_POWER,
+static struct gpiod_lookup_table palmtc_udc_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTC_USB_DETECT_N,
+			    "vbus", GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTC_USB_POWER,
+			    "pullup", GPIO_ACTIVE_HIGH),
+		{ },
+	},
 };
 
 static struct platform_device palmtc_gpio_vbus = {
 	.name	= "gpio-vbus",
 	.id	= -1,
-	.dev	= {
-		.platform_data	= &palmtc_udc_info,
-	},
 };
 
 static void __init palmtc_udc_init(void)
 {
+	gpiod_add_lookup_table(&palmtc_udc_gpiod_table);
 	platform_device_register(&palmtc_gpio_vbus);
 };
 #else
diff --git a/arch/arm/mach-pxa/palmte2.c b/arch/arm/mach-pxa/palmte2.c
index a92b9665f425..7171014fd311 100644
--- a/arch/arm/mach-pxa/palmte2.c
+++ b/arch/arm/mach-pxa/palmte2.c
@@ -23,7 +23,6 @@
 #include <linux/gpio.h>
 #include <linux/wm97xx.h>
 #include <linux/power_supply.h>
-#include <linux/usb/gpio_vbus.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -201,18 +200,20 @@ static struct pxaficp_platform_data palmte2_ficp_platform_data = {
 /******************************************************************************
  * UDC
  ******************************************************************************/
-static struct gpio_vbus_mach_info palmte2_udc_info = {
-	.gpio_vbus		= GPIO_NR_PALMTE2_USB_DETECT_N,
-	.gpio_vbus_inverted	= 1,
-	.gpio_pullup		= GPIO_NR_PALMTE2_USB_PULLUP,
+static struct gpiod_lookup_table palmte2_udc_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTE2_USB_DETECT_N,
+			    "vbus", GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTE2_USB_PULLUP,
+			    "pullup", GPIO_ACTIVE_HIGH),
+		{ },
+	},
 };
 
 static struct platform_device palmte2_gpio_vbus = {
 	.name	= "gpio-vbus",
 	.id	= -1,
-	.dev	= {
-		.platform_data	= &palmte2_udc_info,
-	},
 };
 
 /******************************************************************************
@@ -368,6 +369,7 @@ static void __init palmte2_init(void)
 	pxa_set_ficp_info(&palmte2_ficp_platform_data);
 
 	pwm_add_table(palmte2_pwm_lookup, ARRAY_SIZE(palmte2_pwm_lookup));
+	gpiod_add_lookup_table(&palmte2_udc_gpiod_table);
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index 926593ecf1c9..07332c92c9f7 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -23,7 +23,6 @@
 #include <linux/gpio.h>
 #include <linux/wm97xx.h>
 #include <linux/power_supply.h>
-#include <linux/usb/gpio_vbus.h>
 #include <linux/mtd/platnand.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/physmap.h>
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c
index 77fe2e367324..4df443943579 100644
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -25,7 +25,6 @@
 #include <linux/gpio.h>
 #include <linux/wm97xx.h>
 #include <linux/power_supply.h>
-#include <linux/usb/gpio_vbus.h>
 #include <linux/platform_data/i2c-gpio.h>
 #include <linux/gpio/machine.h>
 
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index f537ff1c3ba7..3d2c108e911e 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -33,7 +33,6 @@
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/input/matrix_keypad.h>
 #include <linux/platform_data/i2c-pxa.h>
-#include <linux/usb/gpio_vbus.h>
 #include <linux/reboot.h>
 #include <linux/memblock.h>
 
@@ -240,18 +239,20 @@ static struct scoop_pcmcia_config tosa_pcmcia_config = {
 /*
  * USB Device Controller
  */
-static struct gpio_vbus_mach_info tosa_udc_info = {
-	.gpio_pullup		= TOSA_GPIO_USB_PULLUP,
-	.gpio_vbus		= TOSA_GPIO_USB_IN,
-	.gpio_vbus_inverted	= 1,
+static struct gpiod_lookup_table tosa_udc_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_USB_IN,
+			    "vbus", GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_USB_PULLUP,
+			    "pullup", GPIO_ACTIVE_HIGH),
+		{ },
+	},
 };
 
 static struct platform_device tosa_gpio_vbus = {
 	.name	= "gpio-vbus",
 	.id	= -1,
-	.dev	= {
-		.platform_data	= &tosa_udc_info,
-	},
 };
 
 /*
@@ -813,6 +814,26 @@ static struct pxa2xx_spi_controller pxa_ssp_master_info = {
 	.num_chipselect	= 1,
 };
 
+static struct gpiod_lookup_table tosa_lcd_gpio_table = {
+	.dev_id = "spi2.0",
+	.table = {
+		GPIO_LOOKUP("tc6393xb",
+			    TOSA_GPIO_TG_ON - TOSA_TC6393XB_GPIO_BASE,
+			    "tg #pwr", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
+static struct gpiod_lookup_table tosa_lcd_bl_gpio_table = {
+	.dev_id = "i2c-tosa-bl",
+	.table = {
+		GPIO_LOOKUP("tc6393xb",
+			    TOSA_GPIO_BL_C20MA - TOSA_TC6393XB_GPIO_BASE,
+			    "backlight", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static struct spi_board_info spi_board_info[] __initdata = {
 	{
 		.modalias	= "tosa-lcd",
@@ -923,10 +944,13 @@ static void __init tosa_init(void)
 	platform_scoop_config = &tosa_pcmcia_config;
 
 	pxa2xx_set_spi_info(2, &pxa_ssp_master_info);
+	gpiod_add_lookup_table(&tosa_lcd_gpio_table);
+	gpiod_add_lookup_table(&tosa_lcd_bl_gpio_table);
 	spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
 
 	clk_add_alias("CLK_CK3P6MI", tc6393xb_device.name, "GPIO11_CLK", NULL);
 
+	gpiod_add_lookup_table(&tosa_udc_gpiod_table);
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c
index 26a5ebc00069..14505e83479e 100644
--- a/arch/arm/mach-pxa/vpac270.c
+++ b/arch/arm/mach-pxa/vpac270.c
@@ -14,7 +14,6 @@
 #include <linux/leds.h>
 #include <linux/gpio.h>
 #include <linux/gpio/machine.h>
-#include <linux/usb/gpio_vbus.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
@@ -352,17 +351,18 @@ static inline void vpac270_uhc_init(void) {}
  * USB Gadget
  ******************************************************************************/
 #if defined(CONFIG_USB_PXA27X)||defined(CONFIG_USB_PXA27X_MODULE)
-static struct gpio_vbus_mach_info vpac270_gpio_vbus_info = {
-	.gpio_vbus		= GPIO41_VPAC270_UDC_DETECT,
-	.gpio_pullup		= -1,
+static struct gpiod_lookup_table vpac270_gpio_vbus_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", GPIO41_VPAC270_UDC_DETECT,
+			    "vbus", GPIO_ACTIVE_HIGH),
+		{ },
+	},
 };
 
 static struct platform_device vpac270_gpio_vbus = {
 	.name	= "gpio-vbus",
 	.id	= -1,
-	.dev	= {
-		.platform_data	= &vpac270_gpio_vbus_info,
-	},
 };
 
 static void vpac270_udc_command(int cmd)
@@ -381,6 +381,7 @@ static struct pxa2xx_udc_mach_info vpac270_udc_info __initdata = {
 static void __init vpac270_udc_init(void)
 {
 	pxa_set_udc_info(&vpac270_udc_info);
+	gpiod_add_lookup_table(&vpac270_gpio_vbus_gpiod_table);
 	platform_device_register(&vpac270_gpio_vbus);
 }
 #else
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index da113c8eefbf..b27fc7ac9cea 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -13,6 +13,7 @@
 #include <linux/leds.h>
 #include <linux/irq.h>
 #include <linux/pm.h>
+#include <linux/property.h>
 #include <linux/gpio.h>
 #include <linux/gpio/machine.h>
 #include <linux/serial_8250.h>
@@ -27,7 +28,6 @@
 #include <linux/platform_data/i2c-pxa.h>
 #include <linux/platform_data/pca953x.h>
 #include <linux/apm-emulation.h>
-#include <linux/can/platform/mcp251x.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
 
@@ -428,14 +428,15 @@ static struct gpiod_lookup_table can_regulator_gpiod_table = {
 	},
 };
 
-static struct mcp251x_platform_data zeus_mcp2515_pdata = {
-	.oscillator_frequency	= 16*1000*1000,
+static const struct property_entry mcp251x_properties[] = {
+	PROPERTY_ENTRY_U32("clock-frequency", 16000000),
+	{}
 };
 
 static struct spi_board_info zeus_spi_board_info[] = {
 	[0] = {
 		.modalias	= "mcp2515",
-		.platform_data	= &zeus_mcp2515_pdata,
+		.properties	= mcp251x_properties,
 		.irq		= PXA_GPIO_TO_IRQ(ZEUS_CAN_GPIO),
 		.max_speed_hz	= 1*1000*1000,
 		.bus_num	= 3,
diff --git a/arch/arm/mach-s3c24xx/s3c2416.c b/arch/arm/mach-s3c24xx/s3c2416.c
index 1cdb7bd3e713..9514196cad8c 100644
--- a/arch/arm/mach-s3c24xx/s3c2416.c
+++ b/arch/arm/mach-s3c24xx/s3c2416.c
@@ -113,7 +113,7 @@ void __init s3c2416_map_io(void)
 	/* initialize device information early */
 	s3c2416_default_sdhci0();
 	s3c2416_default_sdhci1();
-	s3c64xx_spi_setname("s3c2443-spi");
+	s3c24xx_spi_setname("s3c2443-spi");
 
 	iotable_init(s3c2416_iodesc, ARRAY_SIZE(s3c2416_iodesc));
 }
diff --git a/arch/arm/mach-s3c24xx/s3c2443.c b/arch/arm/mach-s3c24xx/s3c2443.c
index 313e369c3ddd..4cbeb74cf3d6 100644
--- a/arch/arm/mach-s3c24xx/s3c2443.c
+++ b/arch/arm/mach-s3c24xx/s3c2443.c
@@ -91,7 +91,7 @@ void __init s3c2443_map_io(void)
 	s3c24xx_gpiocfg_default.get_pull = s3c2443_gpio_getpull;
 
 	/* initialize device information early */
-	s3c64xx_spi_setname("s3c2443-spi");
+	s3c24xx_spi_setname("s3c2443-spi");
 
 	iotable_init(s3c2443_iodesc, ARRAY_SIZE(s3c2443_iodesc));
 }
diff --git a/arch/arm/mach-s3c24xx/spi-core.h b/arch/arm/mach-s3c24xx/spi-core.h
index bb555ccbe057..1048fac629a2 100644
--- a/arch/arm/mach-s3c24xx/spi-core.h
+++ b/arch/arm/mach-s3c24xx/spi-core.h
@@ -11,7 +11,7 @@
  */
 
 /* re-define device name depending on support. */
-static inline void s3c64xx_spi_setname(char *name)
+static inline void s3c24xx_spi_setname(char *name)
 {
 #ifdef CONFIG_S3C64XX_DEV_SPI0
 	s3c64xx_device_spi0.name = name;
diff --git a/arch/arm/mach-s3c64xx/mach-smartq.c b/arch/arm/mach-s3c64xx/mach-smartq.c
index 951208f168e7..829d5dbd69ee 100644
--- a/arch/arm/mach-s3c64xx/mach-smartq.c
+++ b/arch/arm/mach-s3c64xx/mach-smartq.c
@@ -13,7 +13,6 @@
 #include <linux/serial_core.h>
 #include <linux/serial_s3c.h>
 #include <linux/spi/spi_gpio.h>
-#include <linux/usb/gpio_vbus.h>
 #include <linux/platform_data/s3c-hsotg.h>
 
 #include <asm/mach-types.h>
@@ -124,15 +123,16 @@ static struct s3c2410_hcd_info smartq_usb_host_info = {
 	.enable_oc	= smartq_usb_host_enableoc,
 };
 
-static struct gpio_vbus_mach_info smartq_usb_otg_vbus_pdata = {
-	.gpio_vbus		= S3C64XX_GPL(9),
-	.gpio_pullup		= -1,
-	.gpio_vbus_inverted	= true,
+static struct gpiod_lookup_table smartq_usb_otg_vbus_gpiod_table = {
+	.dev_id = "gpio-vbus",
+	.table = {
+		GPIO_LOOKUP("GPL", 9, "vbus", GPIO_ACTIVE_LOW),
+		{ },
+	},
 };
 
 static struct platform_device smartq_usb_otg_vbus_dev = {
 	.name			= "gpio-vbus",
-	.dev.platform_data	= &smartq_usb_otg_vbus_pdata,
 };
 
 static struct pwm_lookup smartq_pwm_lookup[] = {
@@ -418,6 +418,7 @@ void __init smartq_machine_init(void)
 
 	pwm_add_table(smartq_pwm_lookup, ARRAY_SIZE(smartq_pwm_lookup));
 	gpiod_add_lookup_table(&smartq_lcd_control_gpiod_table);
+	gpiod_add_lookup_table(&smartq_usb_otg_vbus_gpiod_table);
 	platform_add_devices(smartq_devices, ARRAY_SIZE(smartq_devices));
 
 	gpiod_add_lookup_table(&smartq_audio_gpios);
diff --git a/arch/arm/mach-s3c64xx/setup-usb-phy.c b/arch/arm/mach-s3c64xx/setup-usb-phy.c
index 6aaaa1d8e8b9..d6b0e3b268af 100644
--- a/arch/arm/mach-s3c64xx/setup-usb-phy.c
+++ b/arch/arm/mach-s3c64xx/setup-usb-phy.c
@@ -73,7 +73,7 @@ static int s3c_usb_otgphy_exit(struct platform_device *pdev)
 	return 0;
 }
 
-int s5p_usb_phy_init(struct platform_device *pdev, int type)
+int s3c_usb_phy_init(struct platform_device *pdev, int type)
 {
 	if (type == USB_PHY_TYPE_DEVICE)
 		return s3c_usb_otgphy_init(pdev);
@@ -81,7 +81,7 @@ int s5p_usb_phy_init(struct platform_device *pdev, int type)
 	return -EINVAL;
 }
 
-int s5p_usb_phy_exit(struct platform_device *pdev, int type)
+int s3c_usb_phy_exit(struct platform_device *pdev, int type)
 {
 	if (type == USB_PHY_TYPE_DEVICE)
 		return s3c_usb_otgphy_exit(pdev);
diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c
index 96330ef25641..e771ce70e132 100644
--- a/arch/arm/mach-shmobile/platsmp-apmu.c
+++ b/arch/arm/mach-shmobile/platsmp-apmu.c
@@ -189,7 +189,7 @@ static void apmu_init_cpu(struct resource *res, int cpu, int bit)
 	if ((cpu >= ARRAY_SIZE(apmu_cpus)) || apmu_cpus[cpu].iomem)
 		return;
 
-	apmu_cpus[cpu].iomem = ioremap_nocache(res->start, resource_size(res));
+	apmu_cpus[cpu].iomem = ioremap(res->start, resource_size(res));
 	apmu_cpus[cpu].bit = bit;
 
 	pr_debug("apmu ioremap %d %d %pr\n", cpu, bit, res);
diff --git a/arch/arm/mach-shmobile/pm-rcar-gen2.c b/arch/arm/mach-shmobile/pm-rcar-gen2.c
index e84599dd96f1..672081405a7e 100644
--- a/arch/arm/mach-shmobile/pm-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/pm-rcar-gen2.c
@@ -103,7 +103,7 @@ map:
 	iounmap(p);
 
 	/* setup reset vectors */
-	p = ioremap_nocache(RST, 0x63);
+	p = ioremap(RST, 0x63);
 	bar = phys_to_sbar(res.start);
 	if (has_a15) {
 		writel_relaxed(bar, p + CA15BAR);
diff --git a/arch/arm/mach-shmobile/setup-r8a7740.c b/arch/arm/mach-shmobile/setup-r8a7740.c
index 787d039b5a07..f760c27c9907 100644
--- a/arch/arm/mach-shmobile/setup-r8a7740.c
+++ b/arch/arm/mach-shmobile/setup-r8a7740.c
@@ -28,7 +28,7 @@ static void __init r8a7740_meram_workaround(void)
 {
 	void __iomem *reg;
 
-	reg = ioremap_nocache(MEBUFCNTR, 4);
+	reg = ioremap(MEBUFCNTR, 4);
 	if (reg) {
 		iowrite32(0x01600164, reg);
 		iounmap(reg);
@@ -37,9 +37,9 @@ static void __init r8a7740_meram_workaround(void)
 
 static void __init r8a7740_init_irq_of(void)
 {
-	void __iomem *intc_prio_base = ioremap_nocache(0xe6900010, 0x10);
-	void __iomem *intc_msk_base = ioremap_nocache(0xe6900040, 0x10);
-	void __iomem *pfc_inta_ctrl = ioremap_nocache(0xe605807c, 0x4);
+	void __iomem *intc_prio_base = ioremap(0xe6900010, 0x10);
+	void __iomem *intc_msk_base = ioremap(0xe6900040, 0x10);
+	void __iomem *pfc_inta_ctrl = ioremap(0xe605807c, 0x4);
 
 	irqchip_init();
 
diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c
index ce51794f64c7..2bc93f391bcf 100644
--- a/arch/arm/mach-shmobile/setup-r8a7778.c
+++ b/arch/arm/mach-shmobile/setup-r8a7778.c
@@ -22,7 +22,7 @@
 
 static void __init r8a7778_init_irq_dt(void)
 {
-	void __iomem *base = ioremap_nocache(0xfe700000, 0x00100000);
+	void __iomem *base = ioremap(0xfe700000, 0x00100000);
 
 	BUG_ON(!base);
 
diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c
index 9e4bc1865f84..2fd3aa6f3212 100644
--- a/arch/arm/mach-shmobile/setup-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c
@@ -24,7 +24,6 @@
 #include "rcar-gen2.h"
 
 static const struct of_device_id cpg_matches[] __initconst = {
-	{ .compatible = "renesas,rcar-gen2-cpg-clocks", },
 	{ .compatible = "renesas,r8a7743-cpg-mssr", .data = "extal" },
 	{ .compatible = "renesas,r8a7744-cpg-mssr", .data = "extal" },
 	{ .compatible = "renesas,r8a7790-cpg-mssr", .data = "extal" },
diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c
index 47ebcc8a5085..9e4cb2ffd580 100644
--- a/arch/arm/mach-socfpga/socfpga.c
+++ b/arch/arm/mach-socfpga/socfpga.c
@@ -73,10 +73,10 @@ static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd)
 
 	temp = readl(rst_manager_base_addr + SOCFPGA_RSTMGR_CTRL);
 
-	if (mode == REBOOT_HARD)
-		temp |= RSTMGR_CTRL_SWCOLDRSTREQ;
-	else
+	if (mode == REBOOT_WARM)
 		temp |= RSTMGR_CTRL_SWWARMRSTREQ;
+	else
+		temp |= RSTMGR_CTRL_SWCOLDRSTREQ;
 	writel(temp, rst_manager_base_addr + SOCFPGA_RSTMGR_CTRL);
 }
 
@@ -86,10 +86,10 @@ static void socfpga_arria10_restart(enum reboot_mode mode, const char *cmd)
 
 	temp = readl(rst_manager_base_addr + SOCFPGA_A10_RSTMGR_CTRL);
 
-	if (mode == REBOOT_HARD)
-		temp |= RSTMGR_CTRL_SWCOLDRSTREQ;
-	else
+	if (mode == REBOOT_WARM)
 		temp |= RSTMGR_CTRL_SWWARMRSTREQ;
+	else
+		temp |= RSTMGR_CTRL_SWCOLDRSTREQ;
 	writel(temp, rst_manager_base_addr + SOCFPGA_A10_RSTMGR_CTRL);
 }
 
diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c
index 2447427cb4a8..69f3fa270fbe 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra20.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra20.c
@@ -203,7 +203,7 @@ void tegra20_cpuidle_pcie_irqs_in_use(void)
 {
 	pr_info_once(
 		"Disabling cpuidle LP2 state, since PCIe IRQs are in use\n");
-	tegra_idle_driver.states[1].disabled = true;
+	cpuidle_driver_state_disabled(&tegra_idle_driver, 1, true);
 }
 
 int __init tegra20_cpuidle_init(void)
diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S
index 67b763fea005..e3f34815c9da 100644
--- a/arch/arm/mach-tegra/reset-handler.S
+++ b/arch/arm/mach-tegra/reset-handler.S
@@ -44,16 +44,16 @@ ENTRY(tegra_resume)
 	cmp	r6, #TEGRA20
 	beq	1f				@ Yes
 	/* Clear the flow controller flags for this CPU. */
-	cpu_to_csr_reg r1, r0
+	cpu_to_csr_reg r3, r0
 	mov32	r2, TEGRA_FLOW_CTRL_BASE
-	ldr	r1, [r2, r1]
+	ldr	r1, [r2, r3]
 	/* Clear event & intr flag */
 	orr	r1, r1, \
 		#FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG
 	movw	r0, #0x3FFD	@ enable, cluster_switch, immed, bitmaps
 				@ & ext flags for CPU power mgnt
 	bic	r1, r1, r0
-	str	r1, [r2]
+	str	r1, [r2, r3]
 1:
 
 	mov32	r9, 0xc09
diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S
index b408fa56eb89..3341a12bbb9c 100644
--- a/arch/arm/mach-tegra/sleep-tegra30.S
+++ b/arch/arm/mach-tegra/sleep-tegra30.S
@@ -682,10 +682,12 @@ tegra30_enter_sleep:
 	dsb
 	ldr	r0, [r6, r2] /* memory barrier */
 
+	cmp	r10, #TEGRA30
 halted:
 	isb
 	dsb
-	wfi	/* CPU should be power gated here */
+	wfine	/* CPU should be power gated here */
+	wfeeq
 
 	/* !!!FIXME!!! Implement halt failure handler */
 	b	halted
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index a79fa3b0c8ed..a1694d977ec9 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -201,7 +201,7 @@ static unsigned long pin_highz_conf[] = {
 };
 
 /* Pin control settings */
-static struct pinctrl_map __initdata u300_pinmux_map[] = {
+static const struct pinctrl_map u300_pinmux_map[] = {
 	/* anonymous maps for chip power and EMIFs */
 	PIN_MAP_MUX_GROUP_HOG_DEFAULT("pinctrl-u300", NULL, "power"),
 	PIN_MAP_MUX_GROUP_HOG_DEFAULT("pinctrl-u300", NULL, "emif0"),
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index 3875027ef8fc..e929aaa744c0 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -84,6 +84,7 @@ static void __init ux500_init_irq(void)
 	struct resource r;
 
 	irqchip_init();
+	prcmu_early_init();
 	np = of_find_compatible_node(NULL, NULL, "stericsson,db8500-prcmu");
 	of_address_to_resource(np, 0, &r);
 	of_node_put(np);
@@ -91,7 +92,6 @@ static void __init ux500_init_irq(void)
 		pr_err("could not find PRCMU base resource\n");
 		return;
 	}
-	prcmu_early_init(r.start, r.end-r.start);
 	ux500_pm_init(r.start, r.end-r.start);
 
 	/* Unlock before init */
diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c
index 354e0e7025ae..1da11bdb1dfb 100644
--- a/arch/arm/mach-vexpress/spc.c
+++ b/arch/arm/mach-vexpress/spc.c
@@ -551,8 +551,9 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev)
 
 static int __init ve_spc_clk_init(void)
 {
-	int cpu;
+	int cpu, cluster;
 	struct clk *clk;
+	bool init_opp_table[MAX_CLUSTERS] = { false };
 
 	if (!info)
 		return 0; /* Continue only if SPC is initialised */
@@ -578,8 +579,17 @@ static int __init ve_spc_clk_init(void)
 			continue;
 		}
 
+		cluster = topology_physical_package_id(cpu_dev->id);
+		if (init_opp_table[cluster])
+			continue;
+
 		if (ve_init_opp_table(cpu_dev))
 			pr_warn("failed to initialise cpu%d opp table\n", cpu);
+		else if (dev_pm_opp_set_sharing_cpus(cpu_dev,
+			 topology_core_cpumask(cpu_dev->id)))
+			pr_warn("failed to mark OPPs shared for cpu%d\n", cpu);
+		else
+			init_opp_table[cluster] = true;
 	}
 
 	platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0);
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 0ab3a86b1f52..65e4482e3849 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -896,7 +896,10 @@ config VDSO
 	bool "Enable VDSO for acceleration of some system calls"
 	depends on AEABI && MMU && CPU_V7
 	default y if ARM_ARCH_TIMER
+	select HAVE_GENERIC_VDSO
 	select GENERIC_TIME_VSYSCALL
+	select GENERIC_VDSO_32
+	select GENERIC_GETTIMEOFDAY
 	help
 	  Place in the process address space an ELF shared object
 	  providing fast implementations of gettimeofday and
@@ -1041,7 +1044,7 @@ endif
 
 config CACHE_TAUROS2
 	bool "Enable the Tauros2 L2 cache controller"
-	depends on (ARCH_DOVE || ARCH_MMP || CPU_PJ4)
+	depends on (CPU_MOHAWK || CPU_PJ4)
 	default y
 	select OUTER_CACHE
 	help
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 0ee8fc4b4672..dc8f152f3556 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -135,13 +135,13 @@ flush_levels:
 	and	r1, r1, #7			@ mask of the bits for current cache only
 	cmp	r1, #2				@ see what cache we have at this level
 	blt	skip				@ skip if no cache, or just i-cache
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
 #endif
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
 	isb					@ isb to sych the new cssr&csidr
 	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	restore_irqs_notrace r9
 #endif
 	and	r2, r1, #7			@ extract the length of the cache lines
diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S
index a0035c426ce6..1bc3a0a50753 100644
--- a/arch/arm/mm/cache-v7m.S
+++ b/arch/arm/mm/cache-v7m.S
@@ -183,13 +183,13 @@ flush_levels:
 	and	r1, r1, #7			@ mask of the bits for current cache only
 	cmp	r1, #2				@ see what cache we have at this level
 	blt	skip				@ skip if no cache, or just i-cache
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
 #endif
 	write_csselr r10, r1			@ set current cache level
 	isb					@ isb to sych the new cssr&csidr
 	read_ccsidr r1				@ read the new csidr
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	restore_irqs_notrace r9
 #endif
 	and	r2, r1, #7			@ extract the length of the cache lines
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index db9247898300..287ef898a55e 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -35,7 +35,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
 				 unsigned long attrs)
 
 {
-	void *ret = dma_alloc_from_global_coherent(size, dma_handle);
+	void *ret = dma_alloc_from_global_coherent(dev, size, dma_handle);
 
 	/*
 	 * dma_alloc_from_global_coherent() may fail because:
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 7d042d5c43e3..e822af0d9219 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -529,7 +529,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page)
 
 static bool __in_atomic_pool(void *start, size_t size)
 {
-	return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+	return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
 }
 
 static int __free_from_pool(void *start, size_t size)
@@ -1559,7 +1559,7 @@ static int arm_coherent_iommu_mmap_attrs(struct device *dev,
  * free a page as defined by the above mapping.
  * Must not be called with IRQs disabled.
  */
-void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
+static void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 	dma_addr_t handle, unsigned long attrs, int coherent_flag)
 {
 	struct page **pages;
@@ -1583,13 +1583,14 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 	__iommu_free_buffer(dev, pages, size, attrs);
 }
 
-void arm_iommu_free_attrs(struct device *dev, size_t size,
-		    void *cpu_addr, dma_addr_t handle, unsigned long attrs)
+static void arm_iommu_free_attrs(struct device *dev, size_t size,
+				 void *cpu_addr, dma_addr_t handle,
+				 unsigned long attrs)
 {
 	__arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL);
 }
 
-void arm_coherent_iommu_free_attrs(struct device *dev, size_t size,
+static void arm_coherent_iommu_free_attrs(struct device *dev, size_t size,
 		    void *cpu_addr, dma_addr_t handle, unsigned long attrs)
 {
 	__arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, COHERENT);
@@ -1713,7 +1714,7 @@ bad_mapping:
  * possible) and tagged with the appropriate dma address and length. They are
  * obtained via sg_dma_{address,length}.
  */
-int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg,
+static int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg,
 		int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	return __iommu_map_sg(dev, sg, nents, dir, attrs, true);
@@ -1731,7 +1732,7 @@ int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg,
  * tagged with the appropriate dma address and length. They are obtained via
  * sg_dma_{address,length}.
  */
-int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg,
+static int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg,
 		int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	return __iommu_map_sg(dev, sg, nents, dir, attrs, false);
@@ -1764,8 +1765,8 @@ static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
  * Unmap a set of streaming mode DMA translations.  Again, CPU access
  * rules concerning calls here are the same as for dma_unmap_single().
  */
-void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
-		int nents, enum dma_data_direction dir,
+static void arm_coherent_iommu_unmap_sg(struct device *dev,
+		struct scatterlist *sg, int nents, enum dma_data_direction dir,
 		unsigned long attrs)
 {
 	__iommu_unmap_sg(dev, sg, nents, dir, attrs, true);
@@ -1781,9 +1782,10 @@ void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
  * Unmap a set of streaming mode DMA translations.  Again, CPU access
  * rules concerning calls here are the same as for dma_unmap_single().
  */
-void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
-			enum dma_data_direction dir,
-			unsigned long attrs)
+static void arm_iommu_unmap_sg(struct device *dev,
+			       struct scatterlist *sg, int nents,
+			       enum dma_data_direction dir,
+			       unsigned long attrs)
 {
 	__iommu_unmap_sg(dev, sg, nents, dir, attrs, false);
 }
@@ -1795,7 +1797,8 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
  * @nents: number of buffers to map (returned from dma_map_sg)
  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
  */
-void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+static void arm_iommu_sync_sg_for_cpu(struct device *dev,
+			struct scatterlist *sg,
 			int nents, enum dma_data_direction dir)
 {
 	struct scatterlist *s;
@@ -1813,7 +1816,8 @@ void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
  * @nents: number of buffers to map (returned from dma_map_sg)
  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
  */
-void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+static void arm_iommu_sync_sg_for_device(struct device *dev,
+			struct scatterlist *sg,
 			int nents, enum dma_data_direction dir)
 {
 	struct scatterlist *s;
@@ -2015,7 +2019,7 @@ static void arm_iommu_sync_single_for_device(struct device *dev,
 	__dma_page_cpu_to_dev(page, offset, size, dir);
 }
 
-const struct dma_map_ops iommu_ops = {
+static const struct dma_map_ops iommu_ops = {
 	.alloc		= arm_iommu_alloc_attrs,
 	.free		= arm_iommu_free_attrs,
 	.mmap		= arm_iommu_mmap_attrs,
@@ -2037,7 +2041,7 @@ const struct dma_map_ops iommu_ops = {
 	.dma_supported		= arm_dma_supported,
 };
 
-const struct dma_map_ops iommu_coherent_ops = {
+static const struct dma_map_ops iommu_coherent_ops = {
 	.alloc		= arm_coherent_iommu_alloc_attrs,
 	.free		= arm_coherent_iommu_free_attrs,
 	.mmap		= arm_coherent_iommu_mmap_attrs,
@@ -2332,26 +2336,20 @@ void arch_teardown_dma_ops(struct device *dev)
 }
 
 #ifdef CONFIG_SWIOTLB
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	__dma_page_cpu_to_dev(phys_to_page(paddr), paddr & (PAGE_SIZE - 1),
 			      size, dir);
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	__dma_page_dev_to_cpu(phys_to_page(paddr), paddr & (PAGE_SIZE - 1),
 			      size, dir);
 }
 
-long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
-		dma_addr_t dma_addr)
-{
-	return dma_to_pfn(dev, dma_addr);
-}
-
 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs)
 {
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index b4be3baa83d4..3ef204137e73 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -30,6 +30,7 @@
 #include <asm/prom.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/set_memory.h>
 #include <asm/system_info.h>
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
@@ -180,7 +181,7 @@ int pfn_valid(unsigned long pfn)
 	if (__phys_to_pfn(addr) != pfn)
 		return 0;
 
-	return memblock_is_map_memory(__pfn_to_phys(pfn));
+	return memblock_is_map_memory(addr);
 }
 EXPORT_SYMBOL(pfn_valid);
 #endif
@@ -593,8 +594,8 @@ static inline bool arch_has_strict_perms(void)
 	return !!(get_cr() & CR_XP);
 }
 
-void set_section_perms(struct section_perm *perms, int n, bool set,
-			struct mm_struct *mm)
+static void set_section_perms(struct section_perm *perms, int n, bool set,
+			      struct mm_struct *mm)
 {
 	size_t i;
 	unsigned long addr;
diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c
index 091ddc56827e..415d0a454237 100644
--- a/arch/arm/mm/iomap.c
+++ b/arch/arm/mm/iomap.c
@@ -10,6 +10,8 @@
 #include <linux/ioport.h>
 #include <linux/io.h>
 
+#include <asm/vga.h>
+
 unsigned long vga_base;
 EXPORT_SYMBOL(vga_base);
 
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index d42b93316183..72286f9a4d30 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -382,15 +382,11 @@ void __iomem *ioremap(resource_size_t res_cookie, size_t size)
 EXPORT_SYMBOL(ioremap);
 
 void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
-	__alias(ioremap_cached);
-
-void __iomem *ioremap_cached(resource_size_t res_cookie, size_t size)
 {
 	return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
 				   __builtin_return_address(0));
 }
 EXPORT_SYMBOL(ioremap_cache);
-EXPORT_SYMBOL(ioremap_cached);
 
 void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
 {
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 48c2888297dd..5d0d0f86e790 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -259,7 +259,7 @@ static struct mem_type mem_types[] __ro_after_init = {
 		.prot_sect	= PROT_SECT_DEVICE,
 		.domain		= DOMAIN_IO,
 	},
-	[MT_DEVICE_CACHED] = {	  /* ioremap_cached */
+	[MT_DEVICE_CACHED] = {	  /* ioremap_cache */
 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
 		.prot_l1	= PMD_TYPE_TABLE,
 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_WB,
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 24ecf8d30a1e..8b3d7191e2b8 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -206,15 +206,11 @@ void __iomem *ioremap(resource_size_t res_cookie, size_t size)
 EXPORT_SYMBOL(ioremap);
 
 void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
-	__alias(ioremap_cached);
-
-void __iomem *ioremap_cached(resource_size_t res_cookie, size_t size)
 {
 	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
 				    __builtin_return_address(0));
 }
 EXPORT_SYMBOL(ioremap_cache);
-EXPORT_SYMBOL(ioremap_cached);
 
 void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
 {
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 4fa5371bc662..2785da387c91 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -491,7 +491,7 @@ cpu_arm1020_name:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	.type	__arm1020_proc_info,#object
 __arm1020_proc_info:
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index 5d8a8339e09a..e9ea237ed785 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -449,7 +449,7 @@ arm1020e_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	.type	__arm1020e_proc_info,#object
 __arm1020e_proc_info:
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index b3dd95c345e4..920c279e7879 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -443,7 +443,7 @@ arm1022_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	.type	__arm1022_proc_info,#object
 __arm1022_proc_info:
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index ac5afde12f35..0bdf25a95b10 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -138,7 +138,7 @@ ENTRY(arm1026_flush_kern_cache_all)
 	mov	ip, #0
 __flush_whole_cache:
 #ifndef CONFIG_CPU_DCACHE_DISABLE
-1:	mrc	p15, 0, r15, c7, c14, 3		@ test, clean, invalidate
+1:	mrc	p15, 0, APSR_nzcv, c7, c14, 3		@ test, clean, invalidate
 	bne	1b
 #endif
 	tst	r2, #VM_EXEC
@@ -363,7 +363,7 @@ ENTRY(cpu_arm1026_switch_mm)
 #ifdef CONFIG_MMU
 	mov	r1, #0
 #ifndef CONFIG_CPU_DCACHE_DISABLE
-1:	mrc	p15, 0, r15, c7, c14, 3		@ test, clean, invalidate
+1:	mrc	p15, 0, APSR_nzcv, c7, c14, 3		@ test, clean, invalidate
 	bne	1b
 #endif
 #ifndef CONFIG_CPU_ICACHE_DISABLE
@@ -437,7 +437,7 @@ arm1026_crval:
 	string	cpu_arm1026_name, "ARM1026EJ-S"
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	.type	__arm1026_proc_info,#object
 __arm1026_proc_info:
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index c99d24363f32..39361e196d61 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -172,7 +172,7 @@ arm720_crval:
  * See <asm/procinfo.h> for a definition of this structure.
  */
 	
-		.section ".proc.info.init", #alloc
+		.section ".proc.info.init", "a"
 
 .macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req
 		.type	__\name\()_proc_info,#object
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index 1b4a3838393f..1a94bbf6e53f 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -128,7 +128,7 @@ __arm740_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 	.type	__arm740_proc_info,#object
 __arm740_proc_info:
 	.long	0x41807400
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index 17a4687065c7..52b66cf0259e 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -72,7 +72,7 @@ __arm7tdmi_setup:
 
 		.align
 
-		.section ".proc.info.init", #alloc
+		.section ".proc.info.init", "a"
 
 .macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \
 	extra_hwcaps=0
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 298c76b47749..31ac8acc34dc 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -434,7 +434,7 @@ arm920_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	.type	__arm920_proc_info,#object
 __arm920_proc_info:
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 824be3a0bc23..ca2c7ca8af21 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -412,7 +412,7 @@ arm922_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	.type	__arm922_proc_info,#object
 __arm922_proc_info:
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index d40cff8f102c..a381a0c9f109 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -477,7 +477,7 @@ arm925_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 .macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
 	.type	__\name\()_proc_info,#object
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index f3cd08f353f0..1ba253c2bce1 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -131,7 +131,7 @@ __flush_whole_cache:
 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
 #else
-1:	mrc	p15, 0, r15, c7, c14, 3 	@ test,clean,invalidate
+1:	mrc	p15, 0, APSR_nzcv, c7, c14, 3 	@ test,clean,invalidate
 	bne	1b
 #endif
 	tst	r2, #VM_EXEC
@@ -358,7 +358,7 @@ ENTRY(cpu_arm926_switch_mm)
 	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
 #else
 @ && 'Clean & Invalidate whole DCache'
-1:	mrc	p15, 0, r15, c7, c14, 3 	@ test,clean,invalidate
+1:	mrc	p15, 0, APSR_nzcv, c7, c14, 3 	@ test,clean,invalidate
 	bne	1b
 #endif
 	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
@@ -460,7 +460,7 @@ arm926_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	.type	__arm926_proc_info,#object
 __arm926_proc_info:
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index 1c26d991386d..4b8a00220cc9 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -340,7 +340,7 @@ __arm940_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	.type	__arm940_proc_info,#object
 __arm940_proc_info:
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 2dc1c75a4fd4..555becf9c758 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -395,7 +395,7 @@ __arm946_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 	.type	__arm946_proc_info,#object
 __arm946_proc_info:
 	.long	0x41009460
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index 913c06e590af..ef517530130b 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -66,7 +66,7 @@ __arm9tdmi_setup:
 
 		.align
 
-		.section ".proc.info.init", #alloc
+		.section ".proc.info.init", "a"
 
 .macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
 		.type	__\name\()_proc_info, #object
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index 8120b6f4dbb8..dddf833fe000 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -185,7 +185,7 @@ fa526_cr1_set:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	.type	__fa526_proc_info,#object
 __fa526_proc_info:
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index bb6dc34d42a3..b12b76bc8d30 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -571,7 +571,7 @@ feroceon_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 .macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req
 	.type	__\name\()_proc_info,#object
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index f08308578885..d47d6c5cee63 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -416,7 +416,7 @@ mohawk_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	.type	__88sv331x_proc_info,#object
 __88sv331x_proc_info:
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index d5bc5d702563..baba503ba816 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -196,7 +196,7 @@ sa110_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	.type	__sa110_proc_info,#object
 __sa110_proc_info:
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index be7b611c76c7..75ebacc8e4e5 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -239,7 +239,7 @@ sa1100_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 .macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
 	.type	__\name\()_proc_info,#object
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index c1c85eb3484f..1dd0d5ca27da 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -261,7 +261,7 @@ v6_crval:
 	string	cpu_elf_name, "v6"
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	/*
 	 * Match any ARMv6 processor core.
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 9a07916af8dd..c0fbfca5da8b 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/arm-smccc.h>
 #include <linux/kernel.h>
-#include <linux/psci.h>
 #include <linux/smp.h>
 
 #include <asm/cp15.h>
@@ -65,6 +64,9 @@ static void cpu_v7_spectre_init(void)
 		break;
 
 #ifdef CONFIG_ARM_PSCI
+	case ARM_CPU_PART_BRAHMA_B53:
+		/* Requires no workaround */
+		break;
 	default:
 		/* Other ARM CPUs require no workaround */
 		if (read_cpuid_implementor() == ARM_CPU_IMP_ARM)
@@ -75,26 +77,20 @@ static void cpu_v7_spectre_init(void)
 	case ARM_CPU_PART_CORTEX_A72: {
 		struct arm_smccc_res res;
 
-		if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
-			break;
+		arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+				     ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+		if ((int)res.a0 != 0)
+			return;
 
-		switch (psci_ops.conduit) {
-		case PSCI_CONDUIT_HVC:
-			arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-			if ((int)res.a0 != 0)
-				break;
+		switch (arm_smccc_1_1_get_conduit()) {
+		case SMCCC_CONDUIT_HVC:
 			per_cpu(harden_branch_predictor_fn, cpu) =
 				call_hvc_arch_workaround_1;
 			cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
 			spectre_v2_method = "hypervisor";
 			break;
 
-		case PSCI_CONDUIT_SMC:
-			arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-			if ((int)res.a0 != 0)
-				break;
+		case SMCCC_CONDUIT_SMC:
 			per_cpu(harden_branch_predictor_fn, cpu) =
 				call_smc_arch_workaround_1;
 			cpu_do_switch_mm = cpu_v7_smc_switch_mm;
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index c4e8006a1a8c..48e0ef6f0dcc 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -644,7 +644,7 @@ __v7_setup_stack:
 	string	cpu_elf_name, "v7"
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 	/*
 	 * Standard v7 proc info content
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 1a49d503eafc..84459c1d31b8 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -93,7 +93,7 @@ ENTRY(cpu_cm7_proc_fin)
 	ret	lr
 ENDPROC(cpu_cm7_proc_fin)
 
-	.section ".init.text", #alloc, #execinstr
+	.section ".init.text", "ax"
 
 __v7m_cm7_setup:
 	mov	r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP)
@@ -177,7 +177,7 @@ ENDPROC(__v7m_setup)
 	string cpu_elf_name "v7m"
 	string cpu_v7m_name "ARMv7-M"
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 .macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0,  proc_fns = v7m_processor_functions
 	.long	0			/* proc_info_list.__cpu_mm_mmu_flags */
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index 1ac0fbbe9f12..42eaecc43cfe 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -496,7 +496,7 @@ xsc3_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 .macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req
 	.type	__\name\()_proc_info,#object
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index bdb2b7749b03..18ac5a1f8922 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -610,7 +610,7 @@ xscale_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc
+	.section ".proc.info.init", "a"
 
 .macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
 	.type	__\name\()_proc_info,#object
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 97dc386e3cb8..cc29869d12a3 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1260,12 +1260,9 @@ static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx)
 
 static void build_prologue(struct jit_ctx *ctx)
 {
-	const s8 r0 = bpf2a32[BPF_REG_0][1];
-	const s8 r2 = bpf2a32[BPF_REG_1][1];
-	const s8 r3 = bpf2a32[BPF_REG_1][0];
-	const s8 r4 = bpf2a32[BPF_REG_6][1];
-	const s8 fplo = bpf2a32[BPF_REG_FP][1];
-	const s8 fphi = bpf2a32[BPF_REG_FP][0];
+	const s8 arm_r0 = bpf2a32[BPF_REG_0][1];
+	const s8 *bpf_r1 = bpf2a32[BPF_REG_1];
+	const s8 *bpf_fp = bpf2a32[BPF_REG_FP];
 	const s8 *tcc = bpf2a32[TCALL_CNT];
 
 	/* Save callee saved registers. */
@@ -1278,8 +1275,10 @@ static void build_prologue(struct jit_ctx *ctx)
 	emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
 	emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
 #endif
-	/* Save frame pointer for later */
-	emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
+	/* mov r3, #0 */
+	/* sub r2, sp, #SCRATCH_SIZE */
+	emit(ARM_MOV_I(bpf_r1[0], 0), ctx);
+	emit(ARM_SUB_I(bpf_r1[1], ARM_SP, SCRATCH_SIZE), ctx);
 
 	ctx->stack_size = imm8m(STACK_SIZE);
 
@@ -1287,18 +1286,15 @@ static void build_prologue(struct jit_ctx *ctx)
 	emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
 
 	/* Set up BPF prog stack base register */
-	emit_a32_mov_r(fplo, ARM_IP, ctx);
-	emit_a32_mov_i(fphi, 0, ctx);
+	emit_a32_mov_r64(true, bpf_fp, bpf_r1, ctx);
 
-	/* mov r4, 0 */
-	emit(ARM_MOV_I(r4, 0), ctx);
+	/* Initialize Tail Count */
+	emit(ARM_MOV_I(bpf_r1[1], 0), ctx);
+	emit_a32_mov_r64(true, tcc, bpf_r1, ctx);
 
 	/* Move BPF_CTX to BPF_R1 */
-	emit(ARM_MOV_R(r3, r4), ctx);
-	emit(ARM_MOV_R(r2, r0), ctx);
-	/* Initialize Tail Count */
-	emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx);
-	emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx);
+	emit(ARM_MOV_R(bpf_r1[1], arm_r0), ctx);
+
 	/* end of prologue */
 }
 
diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c
index 9a6e4923bd69..563440315acd 100644
--- a/arch/arm/plat-pxa/ssp.c
+++ b/arch/arm/plat-pxa/ssp.c
@@ -89,7 +89,7 @@ void pxa_ssp_free(struct ssp_device *ssp)
 		ssp->use_count--;
 		ssp->label = NULL;
 	} else
-		dev_err(&ssp->pdev->dev, "device already free\n");
+		dev_err(ssp->dev, "device already free\n");
 	mutex_unlock(&ssp_lock);
 }
 EXPORT_SYMBOL(pxa_ssp_free);
@@ -118,7 +118,7 @@ static int pxa_ssp_probe(struct platform_device *pdev)
 	if (ssp == NULL)
 		return -ENOMEM;
 
-	ssp->pdev = pdev;
+	ssp->dev = dev;
 
 	ssp->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(ssp->clk))
diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index 1d1fa068d228..1602f6dc900b 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c
@@ -1010,9 +1010,9 @@ void __init dwc2_hsotg_set_platdata(struct dwc2_hsotg_plat *pd)
 	npd = s3c_set_platdata(pd, sizeof(*npd), &s3c_device_usb_hsotg);
 
 	if (!npd->phy_init)
-		npd->phy_init = s5p_usb_phy_init;
+		npd->phy_init = s3c_usb_phy_init;
 	if (!npd->phy_exit)
-		npd->phy_exit = s5p_usb_phy_exit;
+		npd->phy_exit = s3c_usb_phy_exit;
 }
 #endif /* CONFIG_S3C_DEV_USB_HSOTG */
 
diff --git a/arch/arm/plat-samsung/include/plat/usb-phy.h b/arch/arm/plat-samsung/include/plat/usb-phy.h
index 94da89ecbd3b..759d66a0773a 100644
--- a/arch/arm/plat-samsung/include/plat/usb-phy.h
+++ b/arch/arm/plat-samsung/include/plat/usb-phy.h
@@ -7,7 +7,7 @@
 #ifndef __PLAT_SAMSUNG_USB_PHY_H
 #define __PLAT_SAMSUNG_USB_PHY_H __FILE__
 
-extern int s5p_usb_phy_init(struct platform_device *pdev, int type);
-extern int s5p_usb_phy_exit(struct platform_device *pdev, int type);
+extern int s3c_usb_phy_init(struct platform_device *pdev, int type);
+extern int s3c_usb_phy_exit(struct platform_device *pdev, int type);
 
 #endif /* __PLAT_SAMSUNG_USB_PHY_H */
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 6da7dc4d79cc..4d1cf74a2caa 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -449,3 +449,5 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 435	common	clone3				sys_clone3
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 87b7769214e0..1babb392e70a 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -1,14 +1,20 @@
 # SPDX-License-Identifier: GPL-2.0
+
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32
+include $(srctree)/lib/vdso/Makefile
+
 hostprogs-y := vdsomunge
 
-obj-vdso := vgettimeofday.o datapage.o
+obj-vdso := vgettimeofday.o datapage.o note.o
 
 # Build rules
 targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds
 obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
 
 ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector
-ccflags-y += -DDISABLE_BRANCH_PROFILING
+ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32
 
 ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8
 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
@@ -24,7 +30,11 @@ CFLAGS_REMOVE_vdso.o = -pg
 
 # Force -O2 to avoid libgcc dependencies
 CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
+ifeq ($(c-gettimeofday-y),)
 CFLAGS_vgettimeofday.o = -O2
+else
+CFLAGS_vgettimeofday.o = -O2 -include $(c-gettimeofday-y)
+endif
 
 # Disable gcov profiling for VDSO code
 GCOV_PROFILE := n
@@ -37,7 +47,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
 
 # Link rule for the .so file
 $(obj)/vdso.so.raw: $(obj)/vdso.lds $(obj-vdso) FORCE
-	$(call if_changed,ld)
+	$(call if_changed,vdsold_and_vdso_check)
 
 $(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/vdsomunge FORCE
 	$(call if_changed,vdsomunge)
@@ -47,6 +57,10 @@ $(obj)/%.so: OBJCOPYFLAGS := -S
 $(obj)/%.so: $(obj)/%.so.dbg FORCE
 	$(call if_changed,objcopy)
 
+# Actual build commands
+quiet_cmd_vdsold_and_vdso_check = LD      $@
+      cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check)
+
 quiet_cmd_vdsomunge = MUNGE   $@
       cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@
 
diff --git a/arch/arm/vdso/note.c b/arch/arm/vdso/note.c
new file mode 100644
index 000000000000..eff5bf9efb8b
--- /dev/null
+++ b/arch/arm/vdso/note.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012-2018 ARM Limited
+ *
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+#include <linux/build-salt.h>
+
+ELFNOTE32("Linux", 0, LINUX_VERSION_CODE);
+BUILD_SALT;
diff --git a/arch/arm/vdso/vdso.lds.S b/arch/arm/vdso/vdso.lds.S
index 73cf205b003e..165d1d2eb76b 100644
--- a/arch/arm/vdso/vdso.lds.S
+++ b/arch/arm/vdso/vdso.lds.S
@@ -71,6 +71,8 @@ VERSION
 	global:
 		__vdso_clock_gettime;
 		__vdso_gettimeofday;
+		__vdso_clock_getres;
+		__vdso_clock_gettime64;
 	local: *;
 	};
 }
diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c
index d1fdbb12760a..1976c6f325a4 100644
--- a/arch/arm/vdso/vgettimeofday.c
+++ b/arch/arm/vdso/vgettimeofday.c
@@ -1,259 +1,34 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
+ * ARM userspace implementations of gettimeofday() and similar.
+ *
  * Copyright 2015 Mentor Graphics Corporation.
  */
-
-#include <linux/compiler.h>
-#include <linux/hrtimer.h>
 #include <linux/time.h>
-#include <asm/barrier.h>
-#include <asm/bug.h>
-#include <asm/cp15.h>
-#include <asm/page.h>
-#include <asm/unistd.h>
-#include <asm/vdso_datapage.h>
-
-#ifndef CONFIG_AEABI
-#error This code depends on AEABI system call conventions
-#endif
-
-extern struct vdso_data *__get_datapage(void);
-
-static notrace u32 __vdso_read_begin(const struct vdso_data *vdata)
-{
-	u32 seq;
-repeat:
-	seq = READ_ONCE(vdata->seq_count);
-	if (seq & 1) {
-		cpu_relax();
-		goto repeat;
-	}
-	return seq;
-}
-
-static notrace u32 vdso_read_begin(const struct vdso_data *vdata)
-{
-	u32 seq;
-
-	seq = __vdso_read_begin(vdata);
-
-	smp_rmb(); /* Pairs with smp_wmb in vdso_write_end */
-	return seq;
-}
+#include <linux/types.h>
 
-static notrace int vdso_read_retry(const struct vdso_data *vdata, u32 start)
+int __vdso_clock_gettime(clockid_t clock,
+			 struct old_timespec32 *ts)
 {
-	smp_rmb(); /* Pairs with smp_wmb in vdso_write_begin */
-	return vdata->seq_count != start;
+	return __cvdso_clock_gettime32(clock, ts);
 }
 
-static notrace long clock_gettime_fallback(clockid_t _clkid,
-					   struct timespec *_ts)
+int __vdso_clock_gettime64(clockid_t clock,
+			   struct __kernel_timespec *ts)
 {
-	register struct timespec *ts asm("r1") = _ts;
-	register clockid_t clkid asm("r0") = _clkid;
-	register long ret asm ("r0");
-	register long nr asm("r7") = __NR_clock_gettime;
-
-	asm volatile(
-	"	swi #0\n"
-	: "=r" (ret)
-	: "r" (clkid), "r" (ts), "r" (nr)
-	: "memory");
-
-	return ret;
+	return __cvdso_clock_gettime(clock, ts);
 }
 
-static notrace int do_realtime_coarse(struct timespec *ts,
-				      struct vdso_data *vdata)
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
+			struct timezone *tz)
 {
-	u32 seq;
-
-	do {
-		seq = vdso_read_begin(vdata);
-
-		ts->tv_sec = vdata->xtime_coarse_sec;
-		ts->tv_nsec = vdata->xtime_coarse_nsec;
-
-	} while (vdso_read_retry(vdata, seq));
-
-	return 0;
+	return __cvdso_gettimeofday(tv, tz);
 }
 
-static notrace int do_monotonic_coarse(struct timespec *ts,
-				       struct vdso_data *vdata)
+int __vdso_clock_getres(clockid_t clock_id,
+			struct old_timespec32 *res)
 {
-	struct timespec tomono;
-	u32 seq;
-
-	do {
-		seq = vdso_read_begin(vdata);
-
-		ts->tv_sec = vdata->xtime_coarse_sec;
-		ts->tv_nsec = vdata->xtime_coarse_nsec;
-
-		tomono.tv_sec = vdata->wtm_clock_sec;
-		tomono.tv_nsec = vdata->wtm_clock_nsec;
-
-	} while (vdso_read_retry(vdata, seq));
-
-	ts->tv_sec += tomono.tv_sec;
-	timespec_add_ns(ts, tomono.tv_nsec);
-
-	return 0;
-}
-
-#ifdef CONFIG_ARM_ARCH_TIMER
-
-static notrace u64 get_ns(struct vdso_data *vdata)
-{
-	u64 cycle_delta;
-	u64 cycle_now;
-	u64 nsec;
-
-	isb();
-	cycle_now = read_sysreg(CNTVCT);
-
-	cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask;
-
-	nsec = (cycle_delta * vdata->cs_mult) + vdata->xtime_clock_snsec;
-	nsec >>= vdata->cs_shift;
-
-	return nsec;
-}
-
-static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata)
-{
-	u64 nsecs;
-	u32 seq;
-
-	do {
-		seq = vdso_read_begin(vdata);
-
-		if (!vdata->tk_is_cntvct)
-			return -1;
-
-		ts->tv_sec = vdata->xtime_clock_sec;
-		nsecs = get_ns(vdata);
-
-	} while (vdso_read_retry(vdata, seq));
-
-	ts->tv_nsec = 0;
-	timespec_add_ns(ts, nsecs);
-
-	return 0;
-}
-
-static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata)
-{
-	struct timespec tomono;
-	u64 nsecs;
-	u32 seq;
-
-	do {
-		seq = vdso_read_begin(vdata);
-
-		if (!vdata->tk_is_cntvct)
-			return -1;
-
-		ts->tv_sec = vdata->xtime_clock_sec;
-		nsecs = get_ns(vdata);
-
-		tomono.tv_sec = vdata->wtm_clock_sec;
-		tomono.tv_nsec = vdata->wtm_clock_nsec;
-
-	} while (vdso_read_retry(vdata, seq));
-
-	ts->tv_sec += tomono.tv_sec;
-	ts->tv_nsec = 0;
-	timespec_add_ns(ts, nsecs + tomono.tv_nsec);
-
-	return 0;
-}
-
-#else /* CONFIG_ARM_ARCH_TIMER */
-
-static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata)
-{
-	return -1;
-}
-
-static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata)
-{
-	return -1;
-}
-
-#endif /* CONFIG_ARM_ARCH_TIMER */
-
-notrace int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts)
-{
-	struct vdso_data *vdata;
-	int ret = -1;
-
-	vdata = __get_datapage();
-
-	switch (clkid) {
-	case CLOCK_REALTIME_COARSE:
-		ret = do_realtime_coarse(ts, vdata);
-		break;
-	case CLOCK_MONOTONIC_COARSE:
-		ret = do_monotonic_coarse(ts, vdata);
-		break;
-	case CLOCK_REALTIME:
-		ret = do_realtime(ts, vdata);
-		break;
-	case CLOCK_MONOTONIC:
-		ret = do_monotonic(ts, vdata);
-		break;
-	default:
-		break;
-	}
-
-	if (ret)
-		ret = clock_gettime_fallback(clkid, ts);
-
-	return ret;
-}
-
-static notrace long gettimeofday_fallback(struct timeval *_tv,
-					  struct timezone *_tz)
-{
-	register struct timezone *tz asm("r1") = _tz;
-	register struct timeval *tv asm("r0") = _tv;
-	register long ret asm ("r0");
-	register long nr asm("r7") = __NR_gettimeofday;
-
-	asm volatile(
-	"	swi #0\n"
-	: "=r" (ret)
-	: "r" (tv), "r" (tz), "r" (nr)
-	: "memory");
-
-	return ret;
-}
-
-notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-	struct timespec ts;
-	struct vdso_data *vdata;
-	int ret;
-
-	vdata = __get_datapage();
-
-	ret = do_realtime(&ts, vdata);
-	if (ret)
-		return gettimeofday_fallback(tv, tz);
-
-	if (tv) {
-		tv->tv_sec = ts.tv_sec;
-		tv->tv_usec = ts.tv_nsec / 1000;
-	}
-	if (tz) {
-		tz->tz_minuteswest = vdata->tz_minuteswest;
-		tz->tz_dsttime = vdata->tz_dsttime;
-	}
-
-	return ret;
+	return __cvdso_clock_getres_time32(clock_id, res);
 }
 
 /* Avoid unresolved references emitted by GCC */
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 38fa917c8585..d40e9e5fc52b 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -15,6 +15,7 @@
 #include <xen/interface/grant_table.h>
 #include <xen/interface/memory.h>
 #include <xen/page.h>
+#include <xen/xen-ops.h>
 #include <xen/swiotlb-xen.h>
 
 #include <asm/cacheflush.h>
@@ -70,20 +71,20 @@ static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op)
  * pfn_valid returns true the pages is local and we can use the native
  * dma-direct functions, otherwise we call the Xen specific version.
  */
-void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
-		phys_addr_t paddr, size_t size, enum dma_data_direction dir)
+void xen_dma_sync_for_cpu(dma_addr_t handle, phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	if (pfn_valid(PFN_DOWN(handle)))
-		arch_sync_dma_for_cpu(dev, paddr, size, dir);
+		arch_sync_dma_for_cpu(paddr, size, dir);
 	else if (dir != DMA_TO_DEVICE)
 		dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
 }
 
-void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
-		phys_addr_t paddr, size_t size, enum dma_data_direction dir)
+void xen_dma_sync_for_device(dma_addr_t handle, phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	if (pfn_valid(PFN_DOWN(handle)))
-		arch_sync_dma_for_device(dev, paddr, size, dir);
+		arch_sync_dma_for_device(paddr, size, dir);
 	else if (dir == DMA_FROM_DEVICE)
 		dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
 	else
@@ -133,7 +134,7 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 	return;
 }
 
-int __init xen_mm_init(void)
+static int __init xen_mm_init(void)
 {
 	struct gnttab_cache_flush cflush;
 	if (!xen_initial_domain())
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3f047afb982c..de238b59d9eb 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -12,7 +12,6 @@ config ARM64
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
-	select ARCH_HAS_DMA_COHERENT_TO_PFN
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_FAST_MULTIPLIER
@@ -35,41 +34,42 @@ config ARM64
 	select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
-	select ARCH_INLINE_READ_LOCK if !PREEMPT
-	select ARCH_INLINE_READ_LOCK_BH if !PREEMPT
-	select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT
-	select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPT
-	select ARCH_INLINE_READ_UNLOCK if !PREEMPT
-	select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPT
-	select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPT
-	select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPT
-	select ARCH_INLINE_WRITE_LOCK if !PREEMPT
-	select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPT
-	select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPT
-	select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPT
-	select ARCH_INLINE_WRITE_UNLOCK if !PREEMPT
-	select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT
-	select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT
-	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT
-	select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPT
-	select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPT
-	select ARCH_INLINE_SPIN_LOCK if !PREEMPT
-	select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPT
-	select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPT
-	select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPT
-	select ARCH_INLINE_SPIN_UNLOCK if !PREEMPT
-	select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPT
-	select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPT
-	select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPT
+	select ARCH_INLINE_READ_LOCK if !PREEMPTION
+	select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION
+	select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION
+	select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPTION
+	select ARCH_INLINE_READ_UNLOCK if !PREEMPTION
+	select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPTION
+	select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPTION
+	select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPTION
+	select ARCH_INLINE_WRITE_LOCK if !PREEMPTION
+	select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPTION
+	select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPTION
+	select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPTION
+	select ARCH_INLINE_WRITE_UNLOCK if !PREEMPTION
+	select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPTION
+	select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPTION
+	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPTION
+	select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPTION
+	select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPTION
+	select ARCH_INLINE_SPIN_LOCK if !PREEMPTION
+	select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPTION
+	select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPTION
+	select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPTION
+	select ARCH_INLINE_SPIN_UNLOCK if !PREEMPTION
+	select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION
+	select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION
+	select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION
 	select ARCH_KEEP_MEMBLOCK
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_SUPPORTS_MEMORY_FAILURE
 	select ARCH_SUPPORTS_ATOMIC_RMW
-	select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
+	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
+	select ARCH_WANT_DEFAULT_BPF_JIT
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
@@ -82,7 +82,7 @@ config ARM64
 	select ARM_GIC_V3
 	select ARM_GIC_V3_ITS if PCI
 	select ARM_PSCI_FW
-	select BUILDTIME_EXTABLE_SORT
+	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select CPU_PM if (SUSPEND || CPU_IDLE)
@@ -139,10 +139,13 @@ config ARM64
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_CONTEXT_TRACKING
+	select HAVE_COPY_THREAD_TLS
 	select HAVE_DEBUG_BUGVERBOSE
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS \
+		if $(cc-option,-fpatchable-function-entry=2)
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_FAST_GUP
 	select HAVE_FTRACE_MCOUNT_RECORD
@@ -160,6 +163,7 @@ config ARM64
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_FUNCTION_ARG_ACCESS_API
+	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_RCU_TABLE_FREE
 	select HAVE_RSEQ
 	select HAVE_STACKPROTECTOR
@@ -180,7 +184,6 @@ config ARM64
 	select PCI_SYSCALL if PCI
 	select POWER_RESET
 	select POWER_SUPPLY
-	select REFCOUNT_FULL
 	select SPARSE_IRQ
 	select SWIOTLB
 	select SYSCTL_EXCEPTION_TRACE
@@ -266,6 +269,10 @@ config GENERIC_CSUM
 config GENERIC_CALIBRATE_DELAY
 	def_bool y
 
+config ZONE_DMA
+	bool "Support DMA zone" if EXPERT
+	default y
+
 config ZONE_DMA32
 	bool "Support DMA32 zone" if EXPERT
 	default y
@@ -297,6 +304,9 @@ config ARCH_SUPPORTS_UPROBES
 config ARCH_PROC_KCORE_TEXT
 	def_bool y
 
+config BROKEN_GAS_INST
+	def_bool !$(as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n)
+
 config KASAN_SHADOW_OFFSET
 	hex
 	depends on KASAN
@@ -510,9 +520,13 @@ config ARM64_ERRATUM_1418040
 
 	  If unsure, say Y.
 
+config ARM64_WORKAROUND_SPECULATIVE_AT_VHE
+	bool
+
 config ARM64_ERRATUM_1165522
 	bool "Cortex-A76: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
 	default y
+	select ARM64_WORKAROUND_SPECULATIVE_AT_VHE
 	help
 	  This option adds a workaround for ARM Cortex-A76 erratum 1165522.
 
@@ -522,6 +536,19 @@ config ARM64_ERRATUM_1165522
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_1530923
+	bool "Cortex-A55: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
+	default y
+	select ARM64_WORKAROUND_SPECULATIVE_AT_VHE
+	help
+	  This option adds a workaround for ARM Cortex-A55 erratum 1530923.
+
+	  Affected Cortex-A55 cores (r0p0, r0p1, r1p0, r2p0) could end-up with
+	  corrupted TLBs by speculating an AT instruction during a guest
+	  context switch.
+
+	  If unsure, say Y.
+
 config ARM64_ERRATUM_1286807
 	bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
 	default y
@@ -538,6 +565,20 @@ config ARM64_ERRATUM_1286807
 	  invalidated has been observed by other observers. The
 	  workaround repeats the TLBI+DSB operation.
 
+config ARM64_WORKAROUND_SPECULATIVE_AT_NVHE
+	bool
+
+config ARM64_ERRATUM_1319367
+	bool "Cortex-A57/A72: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
+	default y
+	select ARM64_WORKAROUND_SPECULATIVE_AT_NVHE
+	help
+	  This option adds work arounds for ARM Cortex-A57 erratum 1319537
+	  and A72 erratum 1319367
+
+	  Cortex-A57 and A72 cores could end-up with corrupted TLBs by
+	  speculating an AT instruction during a guest context switch.
+
 	  If unsure, say Y.
 
 config ARM64_ERRATUM_1463225
@@ -558,6 +599,22 @@ config ARM64_ERRATUM_1463225
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_1542419
+	bool "Neoverse-N1: workaround mis-ordering of instruction fetches"
+	default y
+	help
+	  This option adds a workaround for ARM Neoverse-N1 erratum
+	  1542419.
+
+	  Affected Neoverse-N1 cores could execute a stale instruction when
+	  modified by another CPU. The workaround depends on a firmware
+	  counterpart.
+
+	  Workaround the issue by hiding the DIC feature from EL0. This
+	  forces user-space to perform cache maintenance.
+
+	  If unsure, say Y.
+
 config CAVIUM_ERRATUM_22375
 	bool "Cavium erratum 22375, 24313"
 	default y
@@ -845,10 +902,26 @@ config ARM64_PA_BITS
 	default 48 if ARM64_PA_BITS_48
 	default 52 if ARM64_PA_BITS_52
 
+choice
+	prompt "Endianness"
+	default CPU_LITTLE_ENDIAN
+	help
+	  Select the endianness of data accesses performed by the CPU. Userspace
+	  applications will need to be compiled and linked for the endianness
+	  that is selected here.
+
 config CPU_BIG_ENDIAN
        bool "Build big-endian kernel"
        help
-         Say Y if you plan on running a kernel in big-endian mode.
+	  Say Y if you plan on running a kernel with a big-endian userspace.
+
+config CPU_LITTLE_ENDIAN
+	bool "Build little-endian kernel"
+	help
+	  Say Y if you plan on running a kernel with a little-endian userspace.
+	  This is usually the case for distributions targeting arm64.
+
+endchoice
 
 config SCHED_MC
 	bool "Multi-core scheduler support"
@@ -1317,6 +1390,11 @@ config ARM64_PAN
 	 instruction if the cpu does not implement the feature.
 
 config ARM64_LSE_ATOMICS
+	bool
+	default ARM64_USE_LSE_ATOMICS
+	depends on $(as-instr,.arch_extension lse)
+
+config ARM64_USE_LSE_ATOMICS
 	bool "Atomic instructions"
 	depends on JUMP_LABEL
 	default y
@@ -1438,6 +1516,30 @@ config ARM64_PTR_AUTH
 
 endmenu
 
+menu "ARMv8.5 architectural features"
+
+config ARM64_E0PD
+	bool "Enable support for E0PD"
+	default y
+	help
+	  E0PD (part of the ARMv8.5 extensions) allows us to ensure
+	  that EL0 accesses made via TTBR1 always fault in constant time,
+	  providing similar benefits to KASLR as those provided by KPTI, but
+	  with lower overhead and without disrupting legitimate access to
+	  kernel memory such as SPE.
+
+	  This option enables E0PD for TTBR1 where available.
+
+config ARCH_RANDOM
+	bool "Enable support for random number generation"
+	default y
+	help
+	  Random number generation (part of the ARMv8.5 Extensions)
+	  provides a high bandwidth, cryptographically secure
+	  hardware random number generator.
+
+endmenu
+
 config ARM64_SVE
 	bool "ARM Scalable Vector Extension support"
 	default y
@@ -1498,7 +1600,7 @@ config ARM64_MODULE_PLTS
 
 config ARM64_PSEUDO_NMI
 	bool "Support for NMI-like interrupts"
-	select CONFIG_ARM_GIC_V3
+	select ARM_GIC_V3
 	help
 	  Adds support for mimicking Non-Maskable Interrupts through the use of
 	  GIC interrupt priority. This support requires version 3 or later of
@@ -1597,6 +1699,7 @@ config CMDLINE
 
 config CMDLINE_FORCE
 	bool "Always use the default kernel command string"
+	depends on CMDLINE != ""
 	help
 	  Always use the default kernel command string, even if the boot
 	  loader passes other arguments to the kernel.
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 16d761475a86..b2b504ea6fd6 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -37,11 +37,12 @@ config ARCH_BCM2835
 	select PINCTRL
 	select PINCTRL_BCM2835
 	select ARM_AMBA
+	select ARM_GIC
 	select ARM_TIMER_SP804
 	select HAVE_ARM_ARCH_TIMER
 	help
-	  This enables support for the Broadcom BCM2837 SoC.
-	  This SoC is used in the Raspberry Pi 3 device.
+	  This enables support for the Broadcom BCM2837 and BCM2711 SoC.
+	  These SoCs are used in the Raspberry Pi 3 and 4 devices.
 
 config ARCH_BCM_IPROC
 	bool "Broadcom iProc SoC Family"
@@ -188,6 +189,7 @@ config ARCH_QCOM
 
 config ARCH_REALTEK
 	bool "Realtek Platforms"
+	select RESET_CONTROLLER
 	help
 	  This enables support for the ARMv8 based Realtek chipsets,
 	  like the RTD1295.
@@ -212,6 +214,11 @@ config ARCH_ROCKCHIP
 	  This enables support for the ARMv8 based Rockchip chipsets,
 	  like the RK3368.
 
+config ARCH_S32
+	bool "NXP S32 SoC Family"
+	help
+	  This enables support for the NXP S32 family of processors.
+
 config ARCH_SEATTLE
 	bool "AMD Seattle SoC Family"
 	help
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2c0238ce0551..dca1a97751ab 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -30,11 +30,8 @@ LDFLAGS_vmlinux	+= --fix-cortex-a53-843419
   endif
 endif
 
-# Check for binutils support for specific extensions
-lseinstr := $(call as-instr,.arch_extension lse,-DCONFIG_AS_LSE=1)
-
-ifeq ($(CONFIG_ARM64_LSE_ATOMICS), y)
-  ifeq ($(lseinstr),)
+ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y)
+  ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y)
 $(warning LSE atomics not supported by binutils)
   endif
 endif
@@ -45,19 +42,15 @@ cc_has_k_constraint := $(call try-run,echo				\
 		return 0;						\
 	}' | $(CC) -S -x c -o "$$TMP" -,,-DCONFIG_CC_HAS_K_CONSTRAINT=1)
 
-ifeq ($(CONFIG_ARM64), y)
-brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1)
-
-  ifneq ($(brokengasinst),)
+ifeq ($(CONFIG_BROKEN_GAS_INST),y)
 $(warning Detected assembler with broken .inst; disassembly will be unreliable)
-  endif
 endif
 
-KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst)	\
+KBUILD_CFLAGS	+= -mgeneral-regs-only	\
 		   $(compat_vdso) $(cc_has_k_constraint)
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 KBUILD_CFLAGS	+= $(call cc-disable-warning, psabi)
-KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst) $(compat_vdso)
+KBUILD_AFLAGS	+= $(compat_vdso)
 
 KBUILD_CFLAGS	+= $(call cc-option,-mabi=lp64)
 KBUILD_AFLAGS	+= $(call cc-option,-mabi=lp64)
@@ -95,6 +88,11 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
 KBUILD_LDS_MODULE	+= $(srctree)/arch/arm64/kernel/module.lds
 endif
 
+ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)
+  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+endif
+
 # Default value
 head-y		:= arch/arm64/kernel/head.o
 
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
index 1f012c506434..cd3414898d10 100644
--- a/arch/arm64/boot/Makefile
+++ b/arch/arm64/boot/Makefile
@@ -16,7 +16,7 @@
 
 OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 
-targets := Image Image.gz
+targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo
 
 $(obj)/Image: vmlinux FORCE
 	$(call if_changed,objcopy)
diff --git a/arch/arm64/boot/dts/actions/s900-bubblegum-96.dts b/arch/arm64/boot/dts/actions/s900-bubblegum-96.dts
index 732daaa6e9d3..59291e0ea1ee 100644
--- a/arch/arm64/boot/dts/actions/s900-bubblegum-96.dts
+++ b/arch/arm64/boot/dts/actions/s900-bubblegum-96.dts
@@ -12,6 +12,9 @@
 	model = "Bubblegum-96";
 
 	aliases {
+		mmc0 = &mmc0;
+		mmc1 = &mmc1;
+		mmc2 = &mmc2;
 		serial5 = &uart5;
 	};
 
@@ -23,6 +26,24 @@
 		device_type = "memory";
 		reg = <0x0 0x0 0x0 0x80000000>;
 	};
+
+	/* Fixed regulator used in the absence of PMIC */
+	vcc_3v1: vcc-3v1 {
+		compatible = "regulator-fixed";
+		regulator-name = "fixed-3.1V";
+		regulator-min-microvolt = <3100000>;
+		regulator-max-microvolt = <3100000>;
+		regulator-always-on;
+	};
+
+	/* Fixed regulator used in the absence of PMIC */
+	sd_vcc: sd-vcc {
+		compatible = "regulator-fixed";
+		regulator-name = "fixed-3.1V";
+		regulator-min-microvolt = <3100000>;
+		regulator-max-microvolt = <3100000>;
+		regulator-always-on;
+	};
 };
 
 &i2c0 {
@@ -241,6 +262,47 @@
 			bias-pull-up;
 		};
 	};
+
+	mmc0_default: mmc0_default {
+		pinmux {
+			groups = "sd0_d0_mfp", "sd0_d1_mfp", "sd0_d2_d3_mfp",
+				 "sd0_cmd_mfp", "sd0_clk_mfp";
+			function = "sd0";
+		};
+	};
+
+	mmc2_default: mmc2_default {
+		pinmux {
+			groups = "nand0_d0_ceb3_mfp";
+			function = "sd2";
+		};
+	};
+};
+
+/* uSD */
+&mmc0 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc0_default>;
+	no-sdio;
+	no-mmc;
+	no-1-8-v;
+	cd-gpios = <&pinctrl 120 GPIO_ACTIVE_LOW>;
+	bus-width = <4>;
+	vmmc-supply = <&sd_vcc>;
+	vqmmc-supply = <&sd_vcc>;
+};
+
+/* eMMC */
+&mmc2 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc2_default>;
+	no-sdio;
+	no-sd;
+	non-removable;
+	bus-width = <8>;
+	vmmc-supply = <&vcc_3v1>;
 };
 
 &timer {
diff --git a/arch/arm64/boot/dts/actions/s900.dtsi b/arch/arm64/boot/dts/actions/s900.dtsi
index df3a68a3ac97..eb35cf78ab73 100644
--- a/arch/arm64/boot/dts/actions/s900.dtsi
+++ b/arch/arm64/boot/dts/actions/s900.dtsi
@@ -4,6 +4,7 @@
  */
 
 #include <dt-bindings/clock/actions,s900-cmu.h>
+#include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/reset/actions,s900-reset.h>
 
@@ -284,5 +285,49 @@
 			dma-requests = <46>;
 			clocks = <&cmu CLK_DMAC>;
 		};
+
+		mmc0: mmc@e0330000 {
+			compatible = "actions,owl-mmc";
+			reg = <0x0 0xe0330000 0x0 0x4000>;
+			interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cmu CLK_SD0>;
+			resets = <&cmu RESET_SD0>;
+			dmas = <&dma 2>;
+			dma-names = "mmc";
+			status = "disabled";
+		};
+
+		mmc1: mmc@e0334000 {
+			compatible = "actions,owl-mmc";
+			reg = <0x0 0xe0334000 0x0 0x4000>;
+			interrupts = <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cmu CLK_SD1>;
+			resets = <&cmu RESET_SD1>;
+			dmas = <&dma 3>;
+			dma-names = "mmc";
+			status = "disabled";
+		};
+
+		mmc2: mmc@e0338000 {
+			compatible = "actions,owl-mmc";
+			reg = <0x0 0xe0338000 0x0 0x4000>;
+			interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cmu CLK_SD2>;
+			resets = <&cmu RESET_SD2>;
+			dmas = <&dma 4>;
+			dma-names = "mmc";
+			status = "disabled";
+		};
+
+		mmc3: mmc@e033c000 {
+			compatible = "actions,owl-mmc";
+			reg = <0x0 0xe033c000 0x0 0x4000>;
+			interrupts = <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cmu CLK_SD3>;
+			resets = <&cmu RESET_SD3>;
+			dmas = <&dma 46>;
+			dma-names = "mmc";
+			status = "disabled";
+		};
 	};
 };
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts
index 96ab0227e82d..121e6cc4849b 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts
@@ -15,7 +15,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc2_pins>;
 	vmmc-supply = <&reg_dcdc1>;
-	vqmmc-supply = <&reg_dcdc1>;
+	vqmmc-supply = <&reg_eldo1>;
 	bus-width = <8>;
 	non-removable;
 	cap-mmc-hw-reset;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts
index 01a9a52edae4..393c1948a495 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts
@@ -140,7 +140,7 @@
 &mmc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc1_pins>;
-	vmmc-supply = <&reg_aldo2>;
+	vmmc-supply = <&reg_dcdc1>;
 	vqmmc-supply = <&reg_dldo4>;
 	mmc-pwrseq = <&wifi_pwrseq>;
 	bus-width = <4>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
index 04446e4716c4..f54a415f2e3b 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
@@ -114,6 +114,19 @@
 	};
 };
 
+&codec {
+	status = "okay";
+};
+
+&codec_analog {
+	cpvdd-supply = <&reg_eldo1>;
+	status = "okay";
+};
+
+&dai {
+	status = "okay";
+};
+
 &de {
 	status = "okay";
 };
@@ -333,6 +346,22 @@
 	vcc-hdmi-supply = <&reg_dldo1>;
 };
 
+&sound {
+	status = "okay";
+	simple-audio-card,widgets = "Headphone", "Headphone Jack",
+				    "Microphone", "Microphone Jack",
+				    "Microphone", "Onboard Microphone";
+	simple-audio-card,routing =
+			"Left DAC", "AIF1 Slot 0 Left",
+			"Right DAC", "AIF1 Slot 0 Right",
+			"AIF1 Slot 0 Left ADC", "Left ADC",
+			"AIF1 Slot 0 Right ADC", "Right ADC",
+			"Headphone Jack", "HP",
+			"MIC2", "Microphone Jack",
+			"Onboard Microphone", "MBIAS",
+			"MIC1", "Onboard Microphone";
+};
+
 &spi0 {
 	status = "okay";
 
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
index 25099202c52c..920103ec0046 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
@@ -55,6 +55,10 @@
 	aliases {
 		ethernet0 = &emac;
 		serial0 = &uart0;
+		serial1 = &uart1;
+		serial2 = &uart2;
+		serial3 = &uart3;
+		serial4 = &uart4;
 	};
 
 	chosen {
@@ -210,6 +214,27 @@
 	status = "okay";
 };
 
+/* On Pi-2 connector */
+&uart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart2_pins>;
+	status = "disabled";
+};
+
+/* On Euler connector */
+&uart3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart3_pins>;
+	status = "disabled";
+};
+
+/* On Euler connector, RTS/CTS optional */
+&uart4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart4_pins>;
+	status = "disabled";
+};
+
 &usb_otg {
 	dr_mode = "host";
 	status = "okay";
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-teres-i.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-teres-i.dts
index 1069e7012c9c..970415106dcf 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-teres-i.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-teres-i.dts
@@ -100,18 +100,41 @@
 	status = "okay";
 };
 
+&de {
+	status = "okay";
+};
+
 &ehci1 {
 	status = "okay";
 };
 
 
-/* The ANX6345 eDP-bridge is on i2c0. There is no linux (mainline)
- * driver for this chip at the moment, the bootloader initializes it.
- * However it can be accessed with the i2c-dev driver from user space.
- */
 &i2c0 {
 	clock-frequency = <100000>;
 	status = "okay";
+
+	anx6345: anx6345@38 {
+		compatible = "analogix,anx6345";
+		reg = <0x38>;
+		reset-gpios = <&pio 3 24 GPIO_ACTIVE_LOW>; /* PD24 */
+		dvdd25-supply = <&reg_dldo2>;
+		dvdd12-supply = <&reg_dldo3>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				anx6345_in: endpoint {
+					remote-endpoint = <&tcon0_out_anx6345>;
+				};
+			};
+		};
+	};
+};
+
+&mixer0 {
+	status = "okay";
 };
 
 &mmc0 {
@@ -319,6 +342,20 @@
 	status = "okay";
 };
 
+&tcon0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&lcd_rgb666_pins>;
+
+	status = "okay";
+};
+
+&tcon0_out {
+	tcon0_out_anx6345: endpoint@0 {
+		reg = <0>;
+		remote-endpoint = <&anx6345_in>;
+	};
+};
+
 &uart0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart0_pb_pins>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
index 70f4cce6be43..27e48234f1c2 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
@@ -142,6 +142,15 @@
 		clock-output-names = "ext-osc32k";
 	};
 
+	pmu {
+		compatible = "arm,cortex-a53-pmu";
+		interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>;
+	};
+
 	psci {
 		compatible = "arm,psci-0.2";
 		method = "smc";
@@ -478,6 +487,15 @@
 			reg = <0x1c14000 0x400>;
 		};
 
+		crypto: crypto@1c15000 {
+			compatible = "allwinner,sun50i-a64-crypto";
+			reg = <0x01c15000 0x1000>;
+			interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&ccu CLK_BUS_CE>, <&ccu CLK_CE>;
+			clock-names = "bus", "mod";
+			resets = <&ccu RST_BUS_CE>;
+		};
+
 		usb_otg: usb@1c19000 {
 			compatible = "allwinner,sun8i-a33-musb";
 			reg = <0x01c19000 0x0400>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-emlid-neutis-n5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5-emlid-neutis-n5.dtsi
index 82f4b44d525f..5bec574fa108 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-emlid-neutis-n5.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-emlid-neutis-n5.dtsi
@@ -23,6 +23,8 @@
 		compatible = "mmc-pwrseq-simple";
 		reset-gpios = <&pio 2 7 GPIO_ACTIVE_LOW>; /* PC7 */
 		post-power-on-delay-ms = <200>;
+		clocks = <&rtc 1>;
+		clock-names = "ext_clock";
 	};
 };
 
@@ -56,5 +58,16 @@
 &uart1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart1_pins>, <&uart1_rts_cts_pins>;
+	uart-has-rtscts;
 	status = "okay";
+
+	bluetooth {
+		compatible = "brcm,bcm43438-bt";
+		clocks = <&rtc 1>;
+		clock-names = "lpo";
+		vbat-supply = <&reg_vcc3v3>;
+		vddio-supply = <&reg_vcc3v3>;
+		shutdown-gpios = <&pio 2 4 GPIO_ACTIVE_HIGH>; /* PC4 */
+		device-wakeup-gpios = <&r_pio 0 7 GPIO_ACTIVE_HIGH>; /* PL7 */
+	};
 };
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
index f002a496d7cb..e92c4de5bf3b 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
@@ -127,6 +127,15 @@
 			allwinner,sram = <&ve_sram 1>;
 		};
 
+		crypto: crypto@1c15000 {
+			compatible = "allwinner,sun50i-h5-crypto";
+			reg = <0x01c15000 0x1000>;
+			interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&ccu CLK_BUS_CE>, <&ccu CLK_CE>;
+			clock-names = "bus", "mod";
+			resets = <&ccu RST_BUS_CE>;
+		};
+
 		mali: gpu@1e80000 {
 			compatible = "allwinner,sun50i-h5-mali", "arm,mali-450";
 			reg = <0x01e80000 0x30000>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
index 1d05d570142f..f335f7482a73 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
@@ -89,6 +89,11 @@
 	status = "okay";
 };
 
+&gpu {
+	mali-supply = <&reg_dcdcc>;
+	status = "okay";
+};
+
 &hdmi {
 	status = "okay";
 };
@@ -225,6 +230,7 @@
 			};
 
 			reg_dcdcc: dcdcc {
+				regulator-enable-ramp-delay = <32000>;
 				regulator-min-microvolt = <810000>;
 				regulator-max-microvolt = <1080000>;
 				regulator-name = "vdd-gpu";
@@ -252,6 +258,7 @@
 };
 
 &r_ir {
+	linux,rc-map-name = "rc-beelink-gs1";
 	status = "okay";
 };
 
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts
index eb379cd402ac..4ed3fc2c7734 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts
@@ -15,6 +15,7 @@
 
 	aliases {
 		serial0 = &uart0;
+		serial1 = &uart1;
 	};
 
 	chosen {
@@ -94,6 +95,10 @@
 	status = "okay";
 };
 
+&dwc3 {
+	status = "okay";
+};
+
 &ehci0 {
 	status = "okay";
 };
@@ -102,6 +107,11 @@
 	status = "okay";
 };
 
+&gpu {
+	mali-supply = <&reg_dcdcc>;
+	status = "okay";
+};
+
 &hdmi {
 	status = "okay";
 };
@@ -237,6 +247,7 @@
 			};
 
 			reg_dcdcc: dcdcc {
+				regulator-enable-ramp-delay = <32000>;
 				regulator-min-microvolt = <810000>;
 				regulator-max-microvolt = <1080000>;
 				regulator-name = "vdd-gpu";
@@ -269,6 +280,24 @@
 	status = "okay";
 };
 
+/* There's the BT part of the AP6256 connected to that UART */
+&uart1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart1_pins>, <&uart1_rts_cts_pins>;
+	uart-has-rtscts;
+	status = "okay";
+
+	bluetooth {
+		compatible = "brcm,bcm4345c5";
+		clocks = <&rtc 1>;
+		clock-names = "lpo";
+		device-wakeup-gpios = <&r_pio 1 2 GPIO_ACTIVE_HIGH>; /* PM2 */
+		host-wakeup-gpios = <&r_pio 1 1 GPIO_ACTIVE_HIGH>; /* PM1 */
+		shutdown-gpios = <&r_pio 1 4 GPIO_ACTIVE_HIGH>; /* PM4 */
+		max-speed = <1500000>;
+	};
+};
+
 &usb2otg {
 	/*
 	 * This board doesn't have a controllable VBUS even though it
@@ -285,3 +314,7 @@
 	usb3_vbus-supply = <&reg_vcc5v>;
 	status = "okay";
 };
+
+&usb3phy {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi
index ec9b6a578e3f..df4cbd7ef96c 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi
@@ -55,6 +55,11 @@
 	status = "okay";
 };
 
+&gpu {
+	mali-supply = <&reg_dcdcc>;
+	status = "okay";
+};
+
 &mmc0 {
 	vmmc-supply = <&reg_cldo1>;
 	cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>;
@@ -163,6 +168,7 @@
 			};
 
 			reg_dcdcc: dcdcc {
+				regulator-enable-ramp-delay = <32000>;
 				regulator-min-microvolt = <810000>;
 				regulator-max-microvolt = <1080000>;
 				regulator-name = "vdd-gpu";
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts
index 30102daf83cc..74899ede00fb 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts
@@ -85,6 +85,11 @@
 	status = "okay";
 };
 
+&gpu {
+	mali-supply = <&reg_dcdcc>;
+	status = "okay";
+};
+
 &hdmi {
 	status = "okay";
 };
@@ -221,6 +226,7 @@
 			};
 
 			reg_dcdcc: dcdcc {
+				regulator-enable-ramp-delay = <32000>;
 				regulator-min-microvolt = <810000>;
 				regulator-max-microvolt = <1080000>;
 				regulator-name = "vdd-gpu";
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix-tx6.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix-tx6.dts
index 7e7cb10e3d96..bccfe1e65b6a 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix-tx6.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix-tx6.dts
@@ -53,6 +53,10 @@
 	status = "okay";
 };
 
+&gpu {
+	status = "okay";
+};
+
 &hdmi {
 	status = "okay";
 };
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi
index 0d5ea19336a1..29824081b43b 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi
@@ -149,6 +149,29 @@
 			allwinner,sram = <&ve_sram 1>;
 		};
 
+		gpu: gpu@1800000 {
+			compatible = "allwinner,sun50i-h6-mali",
+				     "arm,mali-t720";
+			reg = <0x01800000 0x4000>;
+			interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "job", "mmu", "gpu";
+			clocks = <&ccu CLK_GPU>, <&ccu CLK_BUS_GPU>;
+			clock-names = "core", "bus";
+			resets = <&ccu RST_BUS_GPU>;
+			status = "disabled";
+		};
+
+		crypto: crypto@1904000 {
+			compatible = "allwinner,sun50i-h6-crypto";
+			reg = <0x01904000 0x1000>;
+			interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&ccu CLK_BUS_CE>, <&ccu CLK_CE>, <&ccu CLK_MBUS_CE>;
+			clock-names = "bus", "mod", "ram";
+			resets = <&ccu RST_BUS_CE>;
+		};
+
 		syscon: syscon@3000000 {
 			compatible = "allwinner,sun50i-h6-system-control",
 				     "allwinner,sun50i-a64-system-control";
@@ -299,6 +322,16 @@
 				pins = "PH0", "PH1";
 				function = "uart0";
 			};
+
+			uart1_pins: uart1-pins {
+				pins = "PG6", "PG7";
+				function = "uart1";
+			};
+
+			uart1_rts_cts_pins: uart1-rts-cts-pins {
+				pins = "PG8", "PG9";
+				function = "uart1";
+			};
 		};
 
 		gic: interrupt-controller@3021000 {
@@ -537,6 +570,38 @@
 			status = "disabled";
 		};
 
+		dwc3: dwc3@5200000 {
+			compatible = "snps,dwc3";
+			reg = <0x05200000 0x10000>;
+			interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&ccu CLK_BUS_XHCI>,
+				 <&ccu CLK_BUS_XHCI>,
+				 <&rtc 0>;
+			clock-names = "ref", "bus_early", "suspend";
+			resets = <&ccu RST_BUS_XHCI>;
+			/*
+			 * The datasheet of the chip doesn't declare the
+			 * peripheral function, and there's no boards known
+			 * to have a USB Type-B port routed to the port.
+			 * In addition, no one has tested the peripheral
+			 * function yet.
+			 * So set the dr_mode to "host" in the DTSI file.
+			 */
+			dr_mode = "host";
+			phys = <&usb3phy>;
+			phy-names = "usb3-phy";
+			status = "disabled";
+		};
+
+		usb3phy: phy@5210000 {
+			compatible = "allwinner,sun50i-h6-usb3-phy";
+			reg = <0x5210000 0x10000>;
+			clocks = <&ccu CLK_USB_PHY1>;
+			resets = <&ccu RST_USB_PHY1>;
+			#phy-cells = <0>;
+			status = "disabled";
+		};
+
 		ehci3: usb@5311000 {
 			compatible = "allwinner,sun50i-h6-ehci", "generic-ehci";
 			reg = <0x05311000 0x100>;
@@ -618,7 +683,6 @@
 				      "tcon-tv0";
 			clock-output-names = "tcon-top-tv0";
 			resets = <&ccu RST_BUS_TCON_TOP>;
-			reset-names = "rst";
 			#clock-cells = <1>;
 
 			ports {
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 144a2c19ac02..d1fc9c2055f4 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -61,10 +61,10 @@
 
 	pmu {
 		compatible = "arm,armv8-pmuv3";
-		interrupts = <0 120 8>,
-			     <0 121 8>,
-			     <0 122 8>,
-			     <0 123 8>;
+		interrupts = <0 170 4>,
+			     <0 171 4>,
+			     <0 172 4>,
+			     <0 173 4>;
 		interrupt-affinity = <&cpu0>,
 				     <&cpu1>,
 				     <&cpu2>,
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
index 66e4ffb4e929..fb11ef05d556 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
@@ -178,12 +178,12 @@
 
 			qspi_boot: partition@0 {
 				label = "Boot and fpga data";
-				reg = <0x0 0x4000000>;
+				reg = <0x0 0x034B0000>;
 			};
 
 			qspi_rootfs: partition@4000000 {
 				label = "Root Filesystem - JFFS2";
-				reg = <0x4000000 0x4000000>;
+				reg = <0x034B0000 0x0EB50000>;
 			};
 		};
 	};
diff --git a/arch/arm64/boot/dts/amlogic/Makefile b/arch/arm64/boot/dts/amlogic/Makefile
index 84afecba9ec0..63400538d39f 100644
--- a/arch/arm64/boot/dts/amlogic/Makefile
+++ b/arch/arm64/boot/dts/amlogic/Makefile
@@ -6,6 +6,7 @@ dtb-$(CONFIG_ARCH_MESON) += meson-g12a-x96-max.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12b-a311d-khadas-vim3.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12b-s922x-khadas-vim3.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12b-odroid-n2.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-g12b-ugoos-am6.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-nanopi-k2.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-nexbox-a95x.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-odroidc2.dtb
@@ -36,3 +37,4 @@ dtb-$(CONFIG_ARCH_MESON) += meson-gxm-rbox-pro.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxm-vega-s96.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-sm1-sei610.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-sm1-khadas-vim3l.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-a1-ad401.dtb
diff --git a/arch/arm64/boot/dts/amlogic/meson-a1-ad401.dts b/arch/arm64/boot/dts/amlogic/meson-a1-ad401.dts
new file mode 100644
index 000000000000..69c25c68c358
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-a1-ad401.dts
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2019 Amlogic, Inc. All rights reserved.
+ */
+
+/dts-v1/;
+
+#include "meson-a1.dtsi"
+
+/ {
+	compatible = "amlogic,ad401", "amlogic,a1";
+	model = "Amlogic Meson A1 AD401 Development Board";
+
+	aliases {
+		serial0 = &uart_AO_B;
+	};
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x0 0x0 0x8000000>;
+	};
+};
+
+&uart_AO_B {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-a1.dtsi b/arch/arm64/boot/dts/amlogic/meson-a1.dtsi
new file mode 100644
index 000000000000..7210ad049d1d
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-a1.dtsi
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2019 Amlogic, Inc. All rights reserved.
+ */
+
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+	compatible = "amlogic,a1";
+
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a35";
+			reg = <0x0 0x0>;
+			enable-method = "psci";
+			next-level-cache = <&l2>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a35";
+			reg = <0x0 0x1>;
+			enable-method = "psci";
+			next-level-cache = <&l2>;
+		};
+
+		l2: l2-cache0 {
+			compatible = "cache";
+		};
+	};
+
+	psci {
+		compatible = "arm,psci-1.0";
+		method = "smc";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		linux,cma {
+			compatible = "shared-dma-pool";
+			reusable;
+			size = <0x0 0x800000>;
+			alignment = <0x0 0x400000>;
+			linux,cma-default;
+		};
+	};
+
+	sm: secure-monitor {
+		compatible = "amlogic,meson-gxbb-sm";
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		apb: bus@fe000000 {
+			compatible = "simple-bus";
+			reg = <0x0 0xfe000000 0x0 0x1000000>;
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges = <0x0 0x0 0x0 0xfe000000 0x0 0x1000000>;
+
+			uart_AO: serial@1c00 {
+				compatible = "amlogic,meson-gx-uart",
+					     "amlogic,meson-ao-uart";
+				reg = <0x0 0x1c00 0x0 0x18>;
+				interrupts = <GIC_SPI 25 IRQ_TYPE_EDGE_RISING>;
+				clocks = <&xtal>, <&xtal>, <&xtal>;
+				clock-names = "xtal", "pclk", "baud";
+				status = "disabled";
+			};
+
+			uart_AO_B: serial@2000 {
+				compatible = "amlogic,meson-gx-uart",
+					     "amlogic,meson-ao-uart";
+				reg = <0x0 0x2000 0x0 0x18>;
+				interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;
+				clocks = <&xtal>, <&xtal>, <&xtal>;
+				clock-names = "xtal", "pclk", "baud";
+				status = "disabled";
+			};
+		};
+
+		gic: interrupt-controller@ff901000 {
+			compatible = "arm,gic-400";
+			reg = <0x0 0xff901000 0x0 0x1000>,
+			      <0x0 0xff902000 0x0 0x2000>,
+			      <0x0 0xff904000 0x0 0x2000>,
+			      <0x0 0xff906000 0x0 0x2000>;
+			interrupt-controller;
+			interrupts = <GIC_PPI 9
+				(GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_HIGH)>;
+			#interrupt-cells = <3>;
+			#address-cells = <0>;
+		};
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13
+			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14
+			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11
+			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10
+			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+
+	xtal: xtal-clk {
+		compatible = "fixed-clock";
+		clock-frequency = <24000000>;
+		clock-output-names = "xtal";
+		#clock-cells = <0>;
+	};
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
index 82919b106010..04803c3bccfa 100644
--- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
@@ -117,6 +117,7 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 		read-only;
+		secure-monitor = <&sm>;
 	};
 
 	psci {
@@ -1162,7 +1163,7 @@
 
 			toddr_a: audio-controller@100 {
 				compatible = "amlogic,axg-toddr";
-				reg = <0x0 0x100 0x0 0x1c>;
+				reg = <0x0 0x100 0x0 0x2c>;
 				#sound-dai-cells = <0>;
 				sound-name-prefix = "TODDR_A";
 				interrupts = <GIC_SPI 84 IRQ_TYPE_EDGE_RISING>;
@@ -1173,7 +1174,7 @@
 
 			toddr_b: audio-controller@140 {
 				compatible = "amlogic,axg-toddr";
-				reg = <0x0 0x140 0x0 0x1c>;
+				reg = <0x0 0x140 0x0 0x2c>;
 				#sound-dai-cells = <0>;
 				sound-name-prefix = "TODDR_B";
 				interrupts = <GIC_SPI 85 IRQ_TYPE_EDGE_RISING>;
@@ -1184,7 +1185,7 @@
 
 			toddr_c: audio-controller@180 {
 				compatible = "amlogic,axg-toddr";
-				reg = <0x0 0x180 0x0 0x1c>;
+				reg = <0x0 0x180 0x0 0x2c>;
 				#sound-dai-cells = <0>;
 				sound-name-prefix = "TODDR_C";
 				interrupts = <GIC_SPI 86 IRQ_TYPE_EDGE_RISING>;
@@ -1195,7 +1196,7 @@
 
 			frddr_a: audio-controller@1c0 {
 				compatible = "amlogic,axg-frddr";
-				reg = <0x0 0x1c0 0x0 0x1c>;
+				reg = <0x0 0x1c0 0x0 0x2c>;
 				#sound-dai-cells = <0>;
 				sound-name-prefix = "FRDDR_A";
 				interrupts = <GIC_SPI 88 IRQ_TYPE_EDGE_RISING>;
@@ -1206,7 +1207,7 @@
 
 			frddr_b: audio-controller@200 {
 				compatible = "amlogic,axg-frddr";
-				reg = <0x0 0x200 0x0 0x1c>;
+				reg = <0x0 0x200 0x0 0x2c>;
 				#sound-dai-cells = <0>;
 				sound-name-prefix = "FRDDR_B";
 				interrupts = <GIC_SPI 89 IRQ_TYPE_EDGE_RISING>;
@@ -1217,7 +1218,7 @@
 
 			frddr_c: audio-controller@240 {
 				compatible = "amlogic,axg-frddr";
-				reg = <0x0 0x240 0x0 0x1c>;
+				reg = <0x0 0x240 0x0 0x2c>;
 				#sound-dai-cells = <0>;
 				sound-name-prefix = "FRDDR_C";
 				interrupts = <GIC_SPI 90 IRQ_TYPE_EDGE_RISING>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
index 3f39e020f74e..7fabc8d9654a 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
@@ -5,51 +5,42 @@
 
 #include <dt-bindings/phy/phy.h>
 #include <dt-bindings/gpio/gpio.h>
-#include <dt-bindings/clock/axg-audio-clkc.h>
 #include <dt-bindings/clock/g12a-clkc.h>
 #include <dt-bindings/clock/g12a-aoclkc.h>
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
-#include <dt-bindings/reset/amlogic,meson-axg-audio-arb.h>
-#include <dt-bindings/reset/amlogic,meson-g12a-audio-reset.h>
 #include <dt-bindings/reset/amlogic,meson-g12a-reset.h>
+#include <dt-bindings/thermal/thermal.h>
 
 / {
 	interrupt-parent = <&gic>;
 	#address-cells = <2>;
 	#size-cells = <2>;
 
-	tdmif_a: audio-controller-0 {
-		compatible = "amlogic,axg-tdm-iface";
-		#sound-dai-cells = <0>;
-		sound-name-prefix = "TDM_A";
-		clocks = <&clkc_audio AUD_CLKID_MST_A_MCLK>,
-			 <&clkc_audio AUD_CLKID_MST_A_SCLK>,
-			 <&clkc_audio AUD_CLKID_MST_A_LRCLK>;
-		clock-names = "mclk", "sclk", "lrclk";
-		status = "disabled";
-	};
+	chosen {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
 
-	tdmif_b: audio-controller-1 {
-		compatible = "amlogic,axg-tdm-iface";
-		#sound-dai-cells = <0>;
-		sound-name-prefix = "TDM_B";
-		clocks = <&clkc_audio AUD_CLKID_MST_B_MCLK>,
-			 <&clkc_audio AUD_CLKID_MST_B_SCLK>,
-			 <&clkc_audio AUD_CLKID_MST_B_LRCLK>;
-		clock-names = "mclk", "sclk", "lrclk";
-		status = "disabled";
-	};
+		simplefb_cvbs: framebuffer-cvbs {
+			compatible = "amlogic,simple-framebuffer",
+				     "simple-framebuffer";
+			amlogic,pipeline = "vpu-cvbs";
+			clocks = <&clkc CLKID_HDMI>,
+				 <&clkc CLKID_HTX_PCLK>,
+				 <&clkc CLKID_VPU_INTR>;
+			status = "disabled";
+		};
 
-	tdmif_c: audio-controller-2 {
-		compatible = "amlogic,axg-tdm-iface";
-		#sound-dai-cells = <0>;
-		sound-name-prefix = "TDM_C";
-		clocks = <&clkc_audio AUD_CLKID_MST_C_MCLK>,
-			 <&clkc_audio AUD_CLKID_MST_C_SCLK>,
-			 <&clkc_audio AUD_CLKID_MST_C_LRCLK>;
-		clock-names = "mclk", "sclk", "lrclk";
-		status = "disabled";
+		simplefb_hdmi: framebuffer-hdmi {
+			compatible = "amlogic,simple-framebuffer",
+				    "simple-framebuffer";
+			amlogic,pipeline = "vpu-hdmi";
+			clocks = <&clkc CLKID_HDMI>,
+				 <&clkc CLKID_HTX_PCLK>,
+				 <&clkc CLKID_VPU_INTR>;
+			status = "disabled";
+		};
 	};
 
 	efuse: efuse {
@@ -58,6 +49,7 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 		read-only;
+		secure-monitor = <&sm>;
 	};
 
 	psci {
@@ -95,6 +87,94 @@
 		#size-cells = <2>;
 		ranges;
 
+		pcie: pcie@fc000000 {
+			compatible = "amlogic,g12a-pcie", "snps,dw-pcie";
+			reg = <0x0 0xfc000000 0x0 0x400000
+			       0x0 0xff648000 0x0 0x2000
+			       0x0 0xfc400000 0x0 0x200000>;
+			reg-names = "elbi", "cfg", "config";
+			interrupts = <GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>;
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0 0 0 0>;
+			interrupt-map = <0 0 0 0 &gic GIC_SPI 223 IRQ_TYPE_LEVEL_HIGH>;
+			bus-range = <0x0 0xff>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			ranges = <0x81000000 0 0 0x0 0xfc600000 0 0x00100000
+				  0x82000000 0 0xfc700000 0x0 0xfc700000 0 0x1900000>;
+
+			clocks = <&clkc CLKID_PCIE_PHY
+				  &clkc CLKID_PCIE_COMB
+				  &clkc CLKID_PCIE_PLL>;
+			clock-names = "general",
+				      "pclk",
+				      "port";
+			resets = <&reset RESET_PCIE_CTRL_A>,
+				 <&reset RESET_PCIE_APB>;
+			reset-names = "port",
+				      "apb";
+			num-lanes = <1>;
+			phys = <&usb3_pcie_phy PHY_TYPE_PCIE>;
+			phy-names = "pcie";
+			status = "disabled";
+		};
+
+		thermal-zones {
+			cpu_thermal: cpu-thermal {
+				polling-delay = <1000>;
+				polling-delay-passive = <100>;
+				thermal-sensors = <&cpu_temp>;
+
+				trips {
+					cpu_passive: cpu-passive {
+						temperature = <85000>; /* millicelsius */
+						hysteresis = <2000>; /* millicelsius */
+						type = "passive";
+					};
+
+					cpu_hot: cpu-hot {
+						temperature = <95000>; /* millicelsius */
+						hysteresis = <2000>; /* millicelsius */
+						type = "hot";
+					};
+
+					cpu_critical: cpu-critical {
+						temperature = <110000>; /* millicelsius */
+						hysteresis = <2000>; /* millicelsius */
+						type = "critical";
+					};
+				};
+			};
+
+			ddr_thermal: ddr-thermal {
+				polling-delay = <1000>;
+				polling-delay-passive = <100>;
+				thermal-sensors = <&ddr_temp>;
+
+				trips {
+					ddr_passive: ddr-passive {
+						temperature = <85000>; /* millicelsius */
+						hysteresis = <2000>; /* millicelsius */
+						type = "passive";
+					};
+
+					ddr_critical: ddr-critical {
+						temperature = <110000>; /* millicelsius */
+						hysteresis = <2000>; /* millicelsius */
+						type = "critical";
+					};
+				};
+
+				cooling-maps {
+					map {
+						trip = <&ddr_passive>;
+						cooling-device = <&mali THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+					};
+				};
+			};
+		};
+
 		ethmac: ethernet@ff3f0000 {
 			compatible = "amlogic,meson-axg-dwmac",
 				     "snps,dwmac-3.70a",
@@ -1356,6 +1436,26 @@
 				};
 			};
 
+			cpu_temp: temperature-sensor@34800 {
+				compatible = "amlogic,g12a-cpu-thermal",
+					     "amlogic,g12a-thermal";
+				reg = <0x0 0x34800 0x0 0x50>;
+				interrupts = <GIC_SPI 35 IRQ_TYPE_EDGE_RISING>;
+				clocks = <&clkc CLKID_TS>;
+				#thermal-sensor-cells = <0>;
+				amlogic,ao-secure = <&sec_AO>;
+			};
+
+			ddr_temp: temperature-sensor@34c00 {
+				compatible = "amlogic,g12a-ddr-thermal",
+					     "amlogic,g12a-thermal";
+				reg = <0x0 0x34c00 0x0 0x50>;
+				interrupts = <GIC_SPI 36 IRQ_TYPE_EDGE_RISING>;
+				clocks = <&clkc CLKID_TS>;
+				#thermal-sensor-cells = <0>;
+				amlogic,ao-secure = <&sec_AO>;
+			};
+
 			usb2_phy0: phy@36000 {
 				compatible = "amlogic,g12a-usb2-phy";
 				reg = <0x0 0x36000 0x0 0x2000>;
@@ -1457,290 +1557,6 @@
 				};
 			};
 
-			pdm: audio-controller@40000 {
-				compatible = "amlogic,g12a-pdm",
-					     "amlogic,axg-pdm";
-				reg = <0x0 0x40000 0x0 0x34>;
-				#sound-dai-cells = <0>;
-				sound-name-prefix = "PDM";
-				clocks = <&clkc_audio AUD_CLKID_PDM>,
-					 <&clkc_audio AUD_CLKID_PDM_DCLK>,
-					 <&clkc_audio AUD_CLKID_PDM_SYSCLK>;
-				clock-names = "pclk", "dclk", "sysclk";
-				status = "disabled";
-			};
-
-			audio: bus@42000 {
-				compatible = "simple-bus";
-				reg = <0x0 0x42000 0x0 0x2000>;
-				#address-cells = <2>;
-				#size-cells = <2>;
-				ranges = <0x0 0x0 0x0 0x42000 0x0 0x2000>;
-
-				clkc_audio: clock-controller@0 {
-					status = "disabled";
-					compatible = "amlogic,g12a-audio-clkc";
-					reg = <0x0 0x0 0x0 0xb4>;
-					#clock-cells = <1>;
-					#reset-cells = <1>;
-
-					clocks = <&clkc CLKID_AUDIO>,
-						 <&clkc CLKID_MPLL0>,
-						 <&clkc CLKID_MPLL1>,
-						 <&clkc CLKID_MPLL2>,
-						 <&clkc CLKID_MPLL3>,
-						 <&clkc CLKID_HIFI_PLL>,
-						 <&clkc CLKID_FCLK_DIV3>,
-						 <&clkc CLKID_FCLK_DIV4>,
-						 <&clkc CLKID_GP0_PLL>;
-					clock-names = "pclk",
-						      "mst_in0",
-						      "mst_in1",
-						      "mst_in2",
-						      "mst_in3",
-						      "mst_in4",
-						      "mst_in5",
-						      "mst_in6",
-						      "mst_in7";
-
-					resets = <&reset RESET_AUDIO>;
-				};
-
-				toddr_a: audio-controller@100 {
-					compatible = "amlogic,g12a-toddr",
-						     "amlogic,axg-toddr";
-					reg = <0x0 0x100 0x0 0x1c>;
-					#sound-dai-cells = <0>;
-					sound-name-prefix = "TODDR_A";
-					interrupts = <GIC_SPI 148 IRQ_TYPE_EDGE_RISING>;
-					clocks = <&clkc_audio AUD_CLKID_TODDR_A>;
-					resets = <&arb AXG_ARB_TODDR_A>;
-					status = "disabled";
-				};
-
-				toddr_b: audio-controller@140 {
-					compatible = "amlogic,g12a-toddr",
-						     "amlogic,axg-toddr";
-					reg = <0x0 0x140 0x0 0x1c>;
-					#sound-dai-cells = <0>;
-					sound-name-prefix = "TODDR_B";
-					interrupts = <GIC_SPI 149 IRQ_TYPE_EDGE_RISING>;
-					clocks = <&clkc_audio AUD_CLKID_TODDR_B>;
-					resets = <&arb AXG_ARB_TODDR_B>;
-					status = "disabled";
-				};
-
-				toddr_c: audio-controller@180 {
-					compatible = "amlogic,g12a-toddr",
-						     "amlogic,axg-toddr";
-					reg = <0x0 0x180 0x0 0x1c>;
-					#sound-dai-cells = <0>;
-					sound-name-prefix = "TODDR_C";
-					interrupts = <GIC_SPI 150 IRQ_TYPE_EDGE_RISING>;
-					clocks = <&clkc_audio AUD_CLKID_TODDR_C>;
-					resets = <&arb AXG_ARB_TODDR_C>;
-					status = "disabled";
-				};
-
-				frddr_a: audio-controller@1c0 {
-					compatible = "amlogic,g12a-frddr",
-						     "amlogic,axg-frddr";
-					reg = <0x0 0x1c0 0x0 0x1c>;
-					#sound-dai-cells = <0>;
-					sound-name-prefix = "FRDDR_A";
-					interrupts = <GIC_SPI 152 IRQ_TYPE_EDGE_RISING>;
-					clocks = <&clkc_audio AUD_CLKID_FRDDR_A>;
-					resets = <&arb AXG_ARB_FRDDR_A>;
-					status = "disabled";
-				};
-
-				frddr_b: audio-controller@200 {
-					compatible = "amlogic,g12a-frddr",
-						     "amlogic,axg-frddr";
-					reg = <0x0 0x200 0x0 0x1c>;
-					#sound-dai-cells = <0>;
-					sound-name-prefix = "FRDDR_B";
-					interrupts = <GIC_SPI 153 IRQ_TYPE_EDGE_RISING>;
-					clocks = <&clkc_audio AUD_CLKID_FRDDR_B>;
-					resets = <&arb AXG_ARB_FRDDR_B>;
-					status = "disabled";
-				};
-
-				frddr_c: audio-controller@240 {
-					compatible = "amlogic,g12a-frddr",
-						     "amlogic,axg-frddr";
-					reg = <0x0 0x240 0x0 0x1c>;
-					#sound-dai-cells = <0>;
-					sound-name-prefix = "FRDDR_C";
-					interrupts = <GIC_SPI 154 IRQ_TYPE_EDGE_RISING>;
-					clocks = <&clkc_audio AUD_CLKID_FRDDR_C>;
-					resets = <&arb AXG_ARB_FRDDR_C>;
-					status = "disabled";
-				};
-
-				arb: reset-controller@280 {
-					status = "disabled";
-					compatible = "amlogic,meson-axg-audio-arb";
-					reg = <0x0 0x280 0x0 0x4>;
-					#reset-cells = <1>;
-					clocks = <&clkc_audio AUD_CLKID_DDR_ARB>;
-				};
-
-				tdmin_a: audio-controller@300 {
-					compatible = "amlogic,g12a-tdmin",
-						     "amlogic,axg-tdmin";
-					reg = <0x0 0x300 0x0 0x40>;
-					sound-name-prefix = "TDMIN_A";
-					resets = <&clkc_audio AUD_RESET_TDMIN_A>;
-					clocks = <&clkc_audio AUD_CLKID_TDMIN_A>,
-						 <&clkc_audio AUD_CLKID_TDMIN_A_SCLK>,
-						 <&clkc_audio AUD_CLKID_TDMIN_A_SCLK_SEL>,
-						 <&clkc_audio AUD_CLKID_TDMIN_A_LRCLK>,
-						 <&clkc_audio AUD_CLKID_TDMIN_A_LRCLK>;
-					clock-names = "pclk", "sclk", "sclk_sel",
-						      "lrclk", "lrclk_sel";
-					status = "disabled";
-				};
-
-				tdmin_b: audio-controller@340 {
-					compatible = "amlogic,g12a-tdmin",
-						     "amlogic,axg-tdmin";
-					reg = <0x0 0x340 0x0 0x40>;
-					sound-name-prefix = "TDMIN_B";
-					resets = <&clkc_audio AUD_RESET_TDMIN_B>;
-					clocks = <&clkc_audio AUD_CLKID_TDMIN_B>,
-						 <&clkc_audio AUD_CLKID_TDMIN_B_SCLK>,
-						 <&clkc_audio AUD_CLKID_TDMIN_B_SCLK_SEL>,
-						 <&clkc_audio AUD_CLKID_TDMIN_B_LRCLK>,
-						 <&clkc_audio AUD_CLKID_TDMIN_B_LRCLK>;
-					clock-names = "pclk", "sclk", "sclk_sel",
-						      "lrclk", "lrclk_sel";
-					status = "disabled";
-				};
-
-				tdmin_c: audio-controller@380 {
-					compatible = "amlogic,g12a-tdmin",
-						     "amlogic,axg-tdmin";
-					reg = <0x0 0x380 0x0 0x40>;
-					sound-name-prefix = "TDMIN_C";
-					resets = <&clkc_audio AUD_RESET_TDMIN_C>;
-					clocks = <&clkc_audio AUD_CLKID_TDMIN_C>,
-						 <&clkc_audio AUD_CLKID_TDMIN_C_SCLK>,
-						 <&clkc_audio AUD_CLKID_TDMIN_C_SCLK_SEL>,
-						 <&clkc_audio AUD_CLKID_TDMIN_C_LRCLK>,
-						 <&clkc_audio AUD_CLKID_TDMIN_C_LRCLK>;
-					clock-names = "pclk", "sclk", "sclk_sel",
-						      "lrclk", "lrclk_sel";
-					status = "disabled";
-				};
-
-				tdmin_lb: audio-controller@3c0 {
-					compatible = "amlogic,g12a-tdmin",
-						     "amlogic,axg-tdmin";
-					reg = <0x0 0x3c0 0x0 0x40>;
-					sound-name-prefix = "TDMIN_LB";
-					resets = <&clkc_audio AUD_RESET_TDMIN_LB>;
-					clocks = <&clkc_audio AUD_CLKID_TDMIN_LB>,
-						 <&clkc_audio AUD_CLKID_TDMIN_LB_SCLK>,
-						 <&clkc_audio AUD_CLKID_TDMIN_LB_SCLK_SEL>,
-						 <&clkc_audio AUD_CLKID_TDMIN_LB_LRCLK>,
-						 <&clkc_audio AUD_CLKID_TDMIN_LB_LRCLK>;
-					clock-names = "pclk", "sclk", "sclk_sel",
-						      "lrclk", "lrclk_sel";
-					status = "disabled";
-				};
-
-				spdifin: audio-controller@400 {
-					compatible = "amlogic,g12a-spdifin",
-						     "amlogic,axg-spdifin";
-					reg = <0x0 0x400 0x0 0x30>;
-					#sound-dai-cells = <0>;
-					sound-name-prefix = "SPDIFIN";
-					interrupts = <GIC_SPI 151 IRQ_TYPE_EDGE_RISING>;
-					clocks = <&clkc_audio AUD_CLKID_SPDIFIN>,
-						 <&clkc_audio AUD_CLKID_SPDIFIN_CLK>;
-					clock-names = "pclk", "refclk";
-					status = "disabled";
-				};
-
-				spdifout: audio-controller@480 {
-					compatible = "amlogic,g12a-spdifout",
-						     "amlogic,axg-spdifout";
-					reg = <0x0 0x480 0x0 0x50>;
-					#sound-dai-cells = <0>;
-					sound-name-prefix = "SPDIFOUT";
-					clocks = <&clkc_audio AUD_CLKID_SPDIFOUT>,
-						 <&clkc_audio AUD_CLKID_SPDIFOUT_CLK>;
-					clock-names = "pclk", "mclk";
-					status = "disabled";
-				};
-
-				tdmout_a: audio-controller@500 {
-					compatible = "amlogic,g12a-tdmout";
-					reg = <0x0 0x500 0x0 0x40>;
-					sound-name-prefix = "TDMOUT_A";
-					resets = <&clkc_audio AUD_RESET_TDMOUT_A>;
-					clocks = <&clkc_audio AUD_CLKID_TDMOUT_A>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_A_SCLK>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_A_SCLK_SEL>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_A_LRCLK>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_A_LRCLK>;
-					clock-names = "pclk", "sclk", "sclk_sel",
-						      "lrclk", "lrclk_sel";
-					status = "disabled";
-				};
-
-				tdmout_b: audio-controller@540 {
-					compatible = "amlogic,g12a-tdmout";
-					reg = <0x0 0x540 0x0 0x40>;
-					sound-name-prefix = "TDMOUT_B";
-					resets = <&clkc_audio AUD_RESET_TDMOUT_B>;
-					clocks = <&clkc_audio AUD_CLKID_TDMOUT_B>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_B_SCLK>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_B_SCLK_SEL>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_B_LRCLK>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_B_LRCLK>;
-					clock-names = "pclk", "sclk", "sclk_sel",
-						      "lrclk", "lrclk_sel";
-					status = "disabled";
-				};
-
-				tdmout_c: audio-controller@580 {
-					compatible = "amlogic,g12a-tdmout";
-					reg = <0x0 0x580 0x0 0x40>;
-					sound-name-prefix = "TDMOUT_C";
-					resets = <&clkc_audio AUD_RESET_TDMOUT_C>;
-					clocks = <&clkc_audio AUD_CLKID_TDMOUT_C>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_C_SCLK>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_C_SCLK_SEL>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_C_LRCLK>,
-						 <&clkc_audio AUD_CLKID_TDMOUT_C_LRCLK>;
-					clock-names = "pclk", "sclk", "sclk_sel",
-						      "lrclk", "lrclk_sel";
-					status = "disabled";
-				};
-
-				spdifout_b: audio-controller@680 {
-					compatible = "amlogic,g12a-spdifout",
-						     "amlogic,axg-spdifout";
-					reg = <0x0 0x680 0x0 0x50>;
-					#sound-dai-cells = <0>;
-					sound-name-prefix = "SPDIFOUT_B";
-					clocks = <&clkc_audio AUD_CLKID_SPDIFOUT_B>,
-						 <&clkc_audio AUD_CLKID_SPDIFOUT_B_CLK>;
-					clock-names = "pclk", "mclk";
-					status = "disabled";
-				};
-
-				tohdmitx: audio-controller@744 {
-					compatible = "amlogic,g12a-tohdmitx";
-					reg = <0x0 0x744 0x0 0x4>;
-					#sound-dai-cells = <1>;
-					sound-name-prefix = "TOHDMITX";
-					status = "disabled";
-				};
-			};
-
 			usb3_pcie_phy: phy@46000 {
 				compatible = "amlogic,g12a-usb3-pcie-phy";
 				reg = <0x0 0x46000 0x0 0x2000>;
@@ -2388,10 +2204,10 @@
 			compatible = "amlogic,meson-g12a-mali", "arm,mali-bifrost";
 			reg = <0x0 0xffe40000 0x0 0x40000>;
 			interrupt-parent = <&gic>;
-			interrupts = <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>,
+			interrupts = <GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>,
 				     <GIC_SPI 161 IRQ_TYPE_LEVEL_HIGH>,
-				     <GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>;
-			interrupt-names = "gpu", "mmu", "job";
+				     <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "job", "mmu", "gpu";
 			clocks = <&clkc CLKID_MALI>;
 			resets = <&reset RESET_DVALIN_CAPB3>, <&reset RESET_DVALIN>;
 
@@ -2409,6 +2225,7 @@
 			assigned-clock-rates = <0>, /* Do Nothing */
 					       <800000000>,
 					       <0>; /* Do Nothing */
+			#cooling-cells = <2>;
 		};
 	};
 
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12.dtsi
new file mode 100644
index 000000000000..b3ba2fda8af8
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-g12.dtsi
@@ -0,0 +1,392 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2019 BayLibre, SAS
+ * Author: Jerome Brunet <jbrunet@baylibre.com>
+ */
+
+#include "meson-g12-common.dtsi"
+#include <dt-bindings/clock/axg-audio-clkc.h>
+#include <dt-bindings/power/meson-g12a-power.h>
+#include <dt-bindings/reset/amlogic,meson-axg-audio-arb.h>
+#include <dt-bindings/reset/amlogic,meson-g12a-audio-reset.h>
+
+/ {
+	tdmif_a: audio-controller-0 {
+		compatible = "amlogic,axg-tdm-iface";
+		#sound-dai-cells = <0>;
+		sound-name-prefix = "TDM_A";
+		clocks = <&clkc_audio AUD_CLKID_MST_A_MCLK>,
+			 <&clkc_audio AUD_CLKID_MST_A_SCLK>,
+			 <&clkc_audio AUD_CLKID_MST_A_LRCLK>;
+		clock-names = "mclk", "sclk", "lrclk";
+		status = "disabled";
+	};
+
+	tdmif_b: audio-controller-1 {
+		compatible = "amlogic,axg-tdm-iface";
+		#sound-dai-cells = <0>;
+		sound-name-prefix = "TDM_B";
+		clocks = <&clkc_audio AUD_CLKID_MST_B_MCLK>,
+			 <&clkc_audio AUD_CLKID_MST_B_SCLK>,
+			 <&clkc_audio AUD_CLKID_MST_B_LRCLK>;
+		clock-names = "mclk", "sclk", "lrclk";
+		status = "disabled";
+	};
+
+	tdmif_c: audio-controller-2 {
+		compatible = "amlogic,axg-tdm-iface";
+		#sound-dai-cells = <0>;
+		sound-name-prefix = "TDM_C";
+		clocks = <&clkc_audio AUD_CLKID_MST_C_MCLK>,
+			 <&clkc_audio AUD_CLKID_MST_C_SCLK>,
+			 <&clkc_audio AUD_CLKID_MST_C_LRCLK>;
+		clock-names = "mclk", "sclk", "lrclk";
+		status = "disabled";
+	};
+};
+
+&apb {
+	pdm: audio-controller@40000 {
+		compatible = "amlogic,g12a-pdm",
+			     "amlogic,axg-pdm";
+		reg = <0x0 0x40000 0x0 0x34>;
+		#sound-dai-cells = <0>;
+		sound-name-prefix = "PDM";
+		clocks = <&clkc_audio AUD_CLKID_PDM>,
+			 <&clkc_audio AUD_CLKID_PDM_DCLK>,
+			 <&clkc_audio AUD_CLKID_PDM_SYSCLK>;
+		clock-names = "pclk", "dclk", "sysclk";
+		status = "disabled";
+	};
+
+	audio: bus@42000 {
+		compatible = "simple-bus";
+		reg = <0x0 0x42000 0x0 0x2000>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges = <0x0 0x0 0x0 0x42000 0x0 0x2000>;
+
+		clkc_audio: clock-controller@0 {
+			status = "disabled";
+			compatible = "amlogic,g12a-audio-clkc";
+			reg = <0x0 0x0 0x0 0xb4>;
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+
+			clocks = <&clkc CLKID_AUDIO>,
+				 <&clkc CLKID_MPLL0>,
+				 <&clkc CLKID_MPLL1>,
+				 <&clkc CLKID_MPLL2>,
+				 <&clkc CLKID_MPLL3>,
+				 <&clkc CLKID_HIFI_PLL>,
+				 <&clkc CLKID_FCLK_DIV3>,
+				 <&clkc CLKID_FCLK_DIV4>,
+				 <&clkc CLKID_GP0_PLL>;
+			clock-names = "pclk",
+				      "mst_in0",
+				      "mst_in1",
+				      "mst_in2",
+				      "mst_in3",
+				      "mst_in4",
+				      "mst_in5",
+				      "mst_in6",
+				      "mst_in7";
+
+			resets = <&reset RESET_AUDIO>;
+		};
+
+		toddr_a: audio-controller@100 {
+			compatible = "amlogic,g12a-toddr",
+				     "amlogic,axg-toddr";
+			reg = <0x0 0x100 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "TODDR_A";
+			interrupts = <GIC_SPI 148 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_TODDR_A>;
+			resets = <&arb AXG_ARB_TODDR_A>,
+				 <&clkc_audio AUD_RESET_TODDR_A>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		toddr_b: audio-controller@140 {
+			compatible = "amlogic,g12a-toddr",
+				     "amlogic,axg-toddr";
+			reg = <0x0 0x140 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "TODDR_B";
+			interrupts = <GIC_SPI 149 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_TODDR_B>;
+			resets = <&arb AXG_ARB_TODDR_B>,
+				 <&clkc_audio AUD_RESET_TODDR_B>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		toddr_c: audio-controller@180 {
+			compatible = "amlogic,g12a-toddr",
+				     "amlogic,axg-toddr";
+			reg = <0x0 0x180 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "TODDR_C";
+			interrupts = <GIC_SPI 150 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_TODDR_C>;
+			resets = <&arb AXG_ARB_TODDR_C>,
+				 <&clkc_audio AUD_RESET_TODDR_C>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		frddr_a: audio-controller@1c0 {
+			compatible = "amlogic,g12a-frddr",
+				     "amlogic,axg-frddr";
+			reg = <0x0 0x1c0 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "FRDDR_A";
+			interrupts = <GIC_SPI 152 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_FRDDR_A>;
+			resets = <&arb AXG_ARB_FRDDR_A>,
+				 <&clkc_audio AUD_RESET_FRDDR_A>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		frddr_b: audio-controller@200 {
+			compatible = "amlogic,g12a-frddr",
+				     "amlogic,axg-frddr";
+			reg = <0x0 0x200 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "FRDDR_B";
+			interrupts = <GIC_SPI 153 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_FRDDR_B>;
+			resets = <&arb AXG_ARB_FRDDR_B>,
+				 <&clkc_audio AUD_RESET_FRDDR_B>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		frddr_c: audio-controller@240 {
+			compatible = "amlogic,g12a-frddr",
+				     "amlogic,axg-frddr";
+			reg = <0x0 0x240 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "FRDDR_C";
+			interrupts = <GIC_SPI 154 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_FRDDR_C>;
+			resets = <&arb AXG_ARB_FRDDR_C>,
+				 <&clkc_audio AUD_RESET_FRDDR_C>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		arb: reset-controller@280 {
+			status = "disabled";
+			compatible = "amlogic,meson-axg-audio-arb";
+			reg = <0x0 0x280 0x0 0x4>;
+			#reset-cells = <1>;
+			clocks = <&clkc_audio AUD_CLKID_DDR_ARB>;
+		};
+
+		tdmin_a: audio-controller@300 {
+			compatible = "amlogic,g12a-tdmin",
+				     "amlogic,axg-tdmin";
+			reg = <0x0 0x300 0x0 0x40>;
+			sound-name-prefix = "TDMIN_A";
+			resets = <&clkc_audio AUD_RESET_TDMIN_A>;
+			clocks = <&clkc_audio AUD_CLKID_TDMIN_A>,
+				 <&clkc_audio AUD_CLKID_TDMIN_A_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_A_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMIN_A_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_A_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tdmin_b: audio-controller@340 {
+			compatible = "amlogic,g12a-tdmin",
+				     "amlogic,axg-tdmin";
+			reg = <0x0 0x340 0x0 0x40>;
+			sound-name-prefix = "TDMIN_B";
+			resets = <&clkc_audio AUD_RESET_TDMIN_B>;
+			clocks = <&clkc_audio AUD_CLKID_TDMIN_B>,
+				 <&clkc_audio AUD_CLKID_TDMIN_B_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_B_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMIN_B_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_B_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tdmin_c: audio-controller@380 {
+			compatible = "amlogic,g12a-tdmin",
+				     "amlogic,axg-tdmin";
+			reg = <0x0 0x380 0x0 0x40>;
+			sound-name-prefix = "TDMIN_C";
+			resets = <&clkc_audio AUD_RESET_TDMIN_C>;
+			clocks = <&clkc_audio AUD_CLKID_TDMIN_C>,
+				 <&clkc_audio AUD_CLKID_TDMIN_C_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_C_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMIN_C_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_C_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tdmin_lb: audio-controller@3c0 {
+			compatible = "amlogic,g12a-tdmin",
+				     "amlogic,axg-tdmin";
+			reg = <0x0 0x3c0 0x0 0x40>;
+			sound-name-prefix = "TDMIN_LB";
+			resets = <&clkc_audio AUD_RESET_TDMIN_LB>;
+			clocks = <&clkc_audio AUD_CLKID_TDMIN_LB>,
+				 <&clkc_audio AUD_CLKID_TDMIN_LB_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_LB_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMIN_LB_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_LB_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		spdifin: audio-controller@400 {
+			compatible = "amlogic,g12a-spdifin",
+				     "amlogic,axg-spdifin";
+			reg = <0x0 0x400 0x0 0x30>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "SPDIFIN";
+			interrupts = <GIC_SPI 151 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_SPDIFIN>,
+				 <&clkc_audio AUD_CLKID_SPDIFIN_CLK>;
+			clock-names = "pclk", "refclk";
+			resets = <&clkc_audio AUD_RESET_SPDIFIN>;
+			status = "disabled";
+		};
+
+		spdifout: audio-controller@480 {
+			compatible = "amlogic,g12a-spdifout",
+				     "amlogic,axg-spdifout";
+			reg = <0x0 0x480 0x0 0x50>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "SPDIFOUT";
+			clocks = <&clkc_audio AUD_CLKID_SPDIFOUT>,
+				 <&clkc_audio AUD_CLKID_SPDIFOUT_CLK>;
+			clock-names = "pclk", "mclk";
+			resets = <&clkc_audio AUD_RESET_SPDIFOUT>;
+			status = "disabled";
+		};
+
+		tdmout_a: audio-controller@500 {
+			compatible = "amlogic,g12a-tdmout";
+			reg = <0x0 0x500 0x0 0x40>;
+			sound-name-prefix = "TDMOUT_A";
+			resets = <&clkc_audio AUD_RESET_TDMOUT_A>;
+			clocks = <&clkc_audio AUD_CLKID_TDMOUT_A>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_A_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_A_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_A_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_A_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tdmout_b: audio-controller@540 {
+			compatible = "amlogic,g12a-tdmout";
+			reg = <0x0 0x540 0x0 0x40>;
+			sound-name-prefix = "TDMOUT_B";
+			resets = <&clkc_audio AUD_RESET_TDMOUT_B>;
+			clocks = <&clkc_audio AUD_CLKID_TDMOUT_B>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_B_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_B_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_B_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_B_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tdmout_c: audio-controller@580 {
+			compatible = "amlogic,g12a-tdmout";
+			reg = <0x0 0x580 0x0 0x40>;
+			sound-name-prefix = "TDMOUT_C";
+			resets = <&clkc_audio AUD_RESET_TDMOUT_C>;
+			clocks = <&clkc_audio AUD_CLKID_TDMOUT_C>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_C_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_C_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_C_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_C_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		spdifout_b: audio-controller@680 {
+			compatible = "amlogic,g12a-spdifout",
+				     "amlogic,axg-spdifout";
+			reg = <0x0 0x680 0x0 0x50>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "SPDIFOUT_B";
+			clocks = <&clkc_audio AUD_CLKID_SPDIFOUT_B>,
+				 <&clkc_audio AUD_CLKID_SPDIFOUT_B_CLK>;
+			clock-names = "pclk", "mclk";
+			resets = <&clkc_audio AUD_RESET_SPDIFOUT_B>;
+			status = "disabled";
+		};
+
+		tohdmitx: audio-controller@744 {
+			compatible = "amlogic,g12a-tohdmitx";
+			reg = <0x0 0x744 0x0 0x4>;
+			#sound-dai-cells = <1>;
+			sound-name-prefix = "TOHDMITX";
+			resets = <&clkc_audio AUD_RESET_TOHDMITX>;
+			status = "disabled";
+		};
+	};
+};
+
+&cpu_thermal {
+	cooling-maps {
+		map0 {
+			trip = <&cpu_passive>;
+			cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					 <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					 <&cpu100 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					 <&cpu101 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					 <&cpu102 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					 <&cpu103 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+		};
+		map1 {
+			trip = <&cpu_hot>;
+			cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					 <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					 <&cpu100 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					 <&cpu101 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					 <&cpu102 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					 <&cpu103 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+		};
+	};
+};
+
+&ethmac {
+	power-domains = <&pwrc PWRC_G12A_ETH_ID>;
+};
+
+&vpu {
+	power-domains = <&pwrc PWRC_G12A_VPU_ID>;
+};
+
+&sd_emmc_a {
+	amlogic,dram-access-quirk;
+};
+
+&simplefb_cvbs {
+	power-domains = <&pwrc PWRC_G12A_VPU_ID>;
+};
+
+&simplefb_hdmi {
+	power-domains = <&pwrc PWRC_G12A_VPU_ID>;
+};
+
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
index c9fa23a56562..2ac9e3a43b96 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
@@ -438,6 +438,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts
index 17155fb73fce..4f2596d82989 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts
@@ -409,6 +409,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi
index eb5d177d7a99..fb0ab27d1f64 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi
@@ -3,8 +3,7 @@
  * Copyright (c) 2018 Amlogic, Inc. All rights reserved.
  */
 
-#include "meson-g12-common.dtsi"
-#include <dt-bindings/power/meson-g12a-power.h>
+#include "meson-g12.dtsi"
 
 / {
 	compatible = "amlogic,g12a";
@@ -19,6 +18,7 @@
 			reg = <0x0 0x0>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			#cooling-cells = <2>;
 		};
 
 		cpu1: cpu@1 {
@@ -27,6 +27,7 @@
 			reg = <0x0 0x1>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			#cooling-cells = <2>;
 		};
 
 		cpu2: cpu@2 {
@@ -35,6 +36,7 @@
 			reg = <0x0 0x2>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			#cooling-cells = <2>;
 		};
 
 		cpu3: cpu@3 {
@@ -43,6 +45,7 @@
 			reg = <0x0 0x3>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			#cooling-cells = <2>;
 		};
 
 		l2: l2-cache0 {
@@ -111,14 +114,22 @@
 	};
 };
 
-&ethmac {
-	power-domains = <&pwrc PWRC_G12A_ETH_ID>;
-};
-
-&vpu {
-	power-domains = <&pwrc PWRC_G12A_VPU_ID>;
-};
+&cpu_thermal {
+	cooling-maps {
+		map0 {
+			trip = <&cpu_passive>;
+			cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					<&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					<&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					<&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+		};
 
-&sd_emmc_a {
-	amlogic,dram-access-quirk;
+		map1 {
+			trip = <&cpu_hot>;
+			cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					<&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					<&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+					<&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+		};
+	};
 };
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d-khadas-vim3.dts b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d-khadas-vim3.dts
index 3a6a1e0c1e32..124a80901084 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d-khadas-vim3.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d-khadas-vim3.dts
@@ -14,3 +14,28 @@
 / {
 	compatible = "khadas,vim3", "amlogic,a311d", "amlogic,g12b";
 };
+
+/*
+ * The VIM3 on-board  MCU can mux the PCIe/USB3.0 shared differential
+ * lines using a FUSB340TMX USB 3.1 SuperSpeed Data Switch between
+ * an USB3.0 Type A connector and a M.2 Key M slot.
+ * The PHY driving these differential lines is shared between
+ * the USB3.0 controller and the PCIe Controller, thus only
+ * a single controller can use it.
+ * If the MCU is configured to mux the PCIe/USB3.0 differential lines
+ * to the M.2 Key M slot, uncomment the following block to disable
+ * USB3.0 from the USB Complex and enable the PCIe controller.
+ * The End User is not expected to uncomment the following except for
+ * testing purposes, but instead rely on the firmware/bootloader to
+ * update these nodes accordingly if PCIe mode is selected by the MCU.
+ */
+/*
+&pcie {
+	status = "okay";
+};
+
+&usb {
+	phys = <&usb2_phy0>, <&usb2_phy1>;
+	phy-names = "usb2-phy0", "usb2-phy1";
+};
+ */
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dts b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dts
index 42f15405750c..0e54c1dc2842 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dts
@@ -12,7 +12,7 @@
 #include <dt-bindings/sound/meson-g12a-tohdmitx.h>
 
 / {
-	compatible = "hardkernel,odroid-n2", "amlogic,g12b";
+	compatible = "hardkernel,odroid-n2", "amlogic,s922x", "amlogic,g12b";
 	model = "Hardkernel ODROID-N2";
 
 	aliases {
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x-khadas-vim3.dts b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x-khadas-vim3.dts
index b73deb282120..bba98f982ad6 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x-khadas-vim3.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x-khadas-vim3.dts
@@ -14,3 +14,28 @@
 / {
 	compatible = "khadas,vim3", "amlogic,s922x", "amlogic,g12b";
 };
+
+/*
+ * The VIM3 on-board  MCU can mux the PCIe/USB3.0 shared differential
+ * lines using a FUSB340TMX USB 3.1 SuperSpeed Data Switch between
+ * an USB3.0 Type A connector and a M.2 Key M slot.
+ * The PHY driving these differential lines is shared between
+ * the USB3.0 controller and the PCIe Controller, thus only
+ * a single controller can use it.
+ * If the MCU is configured to mux the PCIe/USB3.0 differential lines
+ * to the M.2 Key M slot, uncomment the following block to disable
+ * USB3.0 from the USB Complex and enable the PCIe controller.
+ * The End User is not expected to uncomment the following except for
+ * testing purposes, but instead rely on the firmware/bootloader to
+ * update these nodes accordingly if PCIe mode is selected by the MCU.
+ */
+/*
+&pcie {
+	status = "okay";
+};
+
+&usb {
+	phys = <&usb2_phy0>, <&usb2_phy1>;
+	phy-names = "usb2-phy0", "usb2-phy1";
+};
+ */
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-ugoos-am6.dts b/arch/arm64/boot/dts/amlogic/meson-g12b-ugoos-am6.dts
new file mode 100644
index 000000000000..ccd0bced01e8
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-ugoos-am6.dts
@@ -0,0 +1,557 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2019 BayLibre, SAS
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ * Copyright (c) 2019 Christian Hewitt <christianshewitt@gmail.com>
+ */
+
+/dts-v1/;
+
+#include "meson-g12b.dtsi"
+#include "meson-g12b-s922x.dtsi"
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/gpio/meson-g12a-gpio.h>
+#include <dt-bindings/sound/meson-g12a-tohdmitx.h>
+
+/ {
+	compatible = "ugoos,am6", "amlogic,g12b";
+	model = "Ugoos AM6";
+
+	aliases {
+		serial0 = &uart_AO;
+		ethernet0 = &ethmac;
+	};
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x0 0x0 0x40000000>;
+	};
+
+	emmc_pwrseq: emmc-pwrseq {
+		compatible = "mmc-pwrseq-emmc";
+		reset-gpios = <&gpio BOOT_12 GPIO_ACTIVE_LOW>;
+	};
+
+	sdio_pwrseq: sdio-pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>;
+		clocks = <&wifi32k>;
+		clock-names = "ext_clock";
+	};
+
+	spdif_dit: audio-codec-1 {
+		#sound-dai-cells = <0>;
+		compatible = "linux,spdif-dit";
+		status = "okay";
+		sound-name-prefix = "DIT";
+	};
+
+	flash_1v8: regulator-flash_1v8 {
+		compatible = "regulator-fixed";
+		regulator-name = "FLASH_1V8";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		vin-supply = <&vcc_3v3>;
+		regulator-always-on;
+	};
+
+	main_12v: regulator-main_12v {
+		compatible = "regulator-fixed";
+		regulator-name = "12V";
+		regulator-min-microvolt = <12000000>;
+		regulator-max-microvolt = <12000000>;
+		regulator-always-on;
+	};
+
+	vcc_5v: regulator-vcc_5v {
+		compatible = "regulator-fixed";
+		regulator-name = "VCC_5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		vin-supply = <&main_12v>;
+
+		gpio = <&gpio GPIOH_8 GPIO_OPEN_DRAIN>;
+		enable-active-high;
+	};
+
+	vcc_1v8: regulator-vcc_1v8 {
+		compatible = "regulator-fixed";
+		regulator-name = "VCC_1V8";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		vin-supply = <&vcc_3v3>;
+		regulator-always-on;
+	};
+
+	vcc_3v3: regulator-vcc_3v3 {
+		compatible = "regulator-fixed";
+		regulator-name = "VCC_3V3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		vin-supply = <&vddao_3v3>;
+		regulator-always-on;
+		/* FIXME: actually controlled by VDDCPU_B_EN */
+	};
+
+	vddcpu_a: regulator-vddcpu-a {
+		/*
+		 * MP1653 Regulator.
+		 */
+		compatible = "pwm-regulator";
+
+		regulator-name = "VDDCPU_A";
+		regulator-min-microvolt = <721000>;
+		regulator-max-microvolt = <1022000>;
+
+		vin-supply = <&main_12v>;
+
+		pwms = <&pwm_ab 0 1250 0>;
+		pwm-dutycycle-range = <100 0>;
+
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	vddcpu_b: regulator-vddcpu-b {
+		/*
+		 * MP1652 Regulator.
+		 */
+		compatible = "pwm-regulator";
+
+		regulator-name = "VDDCPU_B";
+		regulator-min-microvolt = <721000>;
+		regulator-max-microvolt = <1022000>;
+
+		vin-supply = <&main_12v>;
+
+		pwms = <&pwm_AO_cd 1 1250 0>;
+		pwm-dutycycle-range = <100 0>;
+
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	usb1_pow: regulator-usb1-pow {
+		compatible = "regulator-fixed";
+		regulator-name = "USB1_POW";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		vin-supply = <&vcc_5v>;
+
+		/* connected to SY6280A Power Switch */
+		gpio = <&gpio GPIOA_8 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+	};
+
+	usb_pwr_en: regulator-usb-pwr-en {
+		compatible = "regulator-fixed";
+		regulator-name = "USB_PWR_EN";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		vin-supply = <&vcc_5v>;
+
+		/* Connected to USB3 Type-A Port power enable */
+		gpio = <&gpio GPIOAO_7 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+	};
+
+	vddao_1v8: regulator-vddao-1v8 {
+		compatible = "regulator-fixed";
+		regulator-name = "VDDAO_1V8";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		vin-supply = <&vddao_3v3>;
+		regulator-always-on;
+	};
+
+	vddao_3v3: regulator-vddao-3v3 {
+		compatible = "regulator-fixed";
+		regulator-name = "VDDAO_3V3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		vin-supply = <&main_12v>;
+		regulator-always-on;
+	};
+
+	cvbs-connector {
+		compatible = "composite-video-connector";
+
+		port {
+			cvbs_connector_in: endpoint {
+				remote-endpoint = <&cvbs_vdac_out>;
+			};
+		};
+	};
+
+	hdmi-connector {
+		compatible = "hdmi-connector";
+		type = "a";
+
+		port {
+			hdmi_connector_in: endpoint {
+				remote-endpoint = <&hdmi_tx_tmds_out>;
+			};
+		};
+	};
+
+	sound {
+		compatible = "amlogic,axg-sound-card";
+		model = "G12B-UGOOS-AM6";
+		audio-aux-devs = <&tdmout_b>;
+		audio-routing = "TDMOUT_B IN 0", "FRDDR_A OUT 1",
+				"TDMOUT_B IN 1", "FRDDR_B OUT 1",
+				"TDMOUT_B IN 2", "FRDDR_C OUT 1",
+				"TDM_B Playback", "TDMOUT_B OUT",
+				"SPDIFOUT IN 0", "FRDDR_A OUT 3",
+				"SPDIFOUT IN 1", "FRDDR_B OUT 3",
+				"SPDIFOUT IN 2", "FRDDR_C OUT 3";
+
+		assigned-clocks = <&clkc CLKID_MPLL2>,
+				  <&clkc CLKID_MPLL0>,
+				  <&clkc CLKID_MPLL1>;
+		assigned-clock-parents = <0>, <0>, <0>;
+		assigned-clock-rates = <294912000>,
+				       <270950400>,
+				       <393216000>;
+		status = "okay";
+
+		dai-link-0 {
+			sound-dai = <&frddr_a>;
+		};
+
+		dai-link-1 {
+			sound-dai = <&frddr_b>;
+		};
+
+		dai-link-2 {
+			sound-dai = <&frddr_c>;
+		};
+
+		/* 8ch hdmi interface */
+		dai-link-3 {
+			sound-dai = <&tdmif_b>;
+			dai-format = "i2s";
+			dai-tdm-slot-tx-mask-0 = <1 1>;
+			dai-tdm-slot-tx-mask-1 = <1 1>;
+			dai-tdm-slot-tx-mask-2 = <1 1>;
+			dai-tdm-slot-tx-mask-3 = <1 1>;
+			mclk-fs = <256>;
+
+			codec {
+				sound-dai = <&tohdmitx TOHDMITX_I2S_IN_B>;
+			};
+		};
+
+		/* spdif hdmi or toslink interface */
+		dai-link-4 {
+			sound-dai = <&spdifout>;
+
+			codec-0 {
+				sound-dai = <&spdif_dit>;
+			};
+
+			codec-1 {
+				sound-dai = <&tohdmitx TOHDMITX_SPDIF_IN_A>;
+			};
+		};
+
+		/* spdif hdmi interface */
+		dai-link-5 {
+			sound-dai = <&spdifout_b>;
+
+			codec {
+				sound-dai = <&tohdmitx TOHDMITX_SPDIF_IN_B>;
+			};
+		};
+
+		/* hdmi glue */
+		dai-link-6 {
+			sound-dai = <&tohdmitx TOHDMITX_I2S_OUT>;
+
+			codec {
+				sound-dai = <&hdmi_tx>;
+			};
+		};
+	};
+
+	wifi32k: wifi32k {
+		compatible = "pwm-clock";
+		#clock-cells = <0>;
+		clock-frequency = <32768>;
+		pwms = <&pwm_ef 0 30518 0>; /* PWM_E at 32.768KHz */
+	};
+};
+
+&arb {
+	status = "okay";
+};
+
+&cec_AO {
+	pinctrl-0 = <&cec_ao_a_h_pins>;
+	pinctrl-names = "default";
+	status = "disabled";
+	hdmi-phandle = <&hdmi_tx>;
+};
+
+&cecb_AO {
+	pinctrl-0 = <&cec_ao_b_h_pins>;
+	pinctrl-names = "default";
+	status = "okay";
+	hdmi-phandle = <&hdmi_tx>;
+};
+
+&clkc_audio {
+	status = "okay";
+};
+
+&cpu0 {
+	cpu-supply = <&vddcpu_b>;
+	operating-points-v2 = <&cpu_opp_table_0>;
+	clocks = <&clkc CLKID_CPU_CLK>;
+	clock-latency = <50000>;
+};
+
+&cpu1 {
+	cpu-supply = <&vddcpu_b>;
+	operating-points-v2 = <&cpu_opp_table_0>;
+	clocks = <&clkc CLKID_CPU_CLK>;
+	clock-latency = <50000>;
+};
+
+&cpu100 {
+	cpu-supply = <&vddcpu_a>;
+	operating-points-v2 = <&cpub_opp_table_1>;
+	clocks = <&clkc CLKID_CPUB_CLK>;
+	clock-latency = <50000>;
+};
+
+&cpu101 {
+	cpu-supply = <&vddcpu_a>;
+	operating-points-v2 = <&cpub_opp_table_1>;
+	clocks = <&clkc CLKID_CPUB_CLK>;
+	clock-latency = <50000>;
+};
+
+&cpu102 {
+	cpu-supply = <&vddcpu_a>;
+	operating-points-v2 = <&cpub_opp_table_1>;
+	clocks = <&clkc CLKID_CPUB_CLK>;
+	clock-latency = <50000>;
+};
+
+&cpu103 {
+	cpu-supply = <&vddcpu_a>;
+	operating-points-v2 = <&cpub_opp_table_1>;
+	clocks = <&clkc CLKID_CPUB_CLK>;
+	clock-latency = <50000>;
+};
+
+&cvbs_vdac_port {
+	cvbs_vdac_out: endpoint {
+		remote-endpoint = <&cvbs_connector_in>;
+	};
+};
+
+&ext_mdio {
+	external_phy: ethernet-phy@0 {
+		/* Realtek RTL8211F (0x001cc916) */
+		reg = <0>;
+		max-speed = <1000>;
+
+		reset-assert-us = <10000>;
+		reset-deassert-us = <30000>;
+		reset-gpios = <&gpio GPIOZ_15 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>;
+
+		interrupt-parent = <&gpio_intc>;
+		/* MAC_INTR on GPIOZ_14 */
+		interrupts = <26 IRQ_TYPE_LEVEL_LOW>;
+	};
+};
+
+&ethmac {
+	pinctrl-0 = <&eth_pins>, <&eth_rgmii_pins>;
+	pinctrl-names = "default";
+	status = "okay";
+	phy-mode = "rgmii";
+	phy-handle = <&external_phy>;
+	amlogic,tx-delay-ns = <2>;
+};
+
+&frddr_a {
+	status = "okay";
+};
+
+&frddr_b {
+	status = "okay";
+};
+
+&frddr_c {
+	status = "okay";
+};
+
+&hdmi_tx {
+	status = "okay";
+	pinctrl-0 = <&hdmitx_hpd_pins>, <&hdmitx_ddc_pins>;
+	pinctrl-names = "default";
+	hdmi-supply = <&vcc_5v>;
+};
+
+&hdmi_tx_tmds_port {
+	hdmi_tx_tmds_out: endpoint {
+		remote-endpoint = <&hdmi_connector_in>;
+	};
+};
+
+&ir {
+	status = "okay";
+	pinctrl-0 = <&remote_input_ao_pins>;
+	pinctrl-names = "default";
+	linux,rc-map-name = "rc-khadas";
+};
+
+&pwm_ab {
+	pinctrl-0 = <&pwm_a_e_pins>;
+	pinctrl-names = "default";
+	clocks = <&xtal>;
+	clock-names = "clkin0";
+	status = "okay";
+};
+
+&pwm_AO_cd {
+	pinctrl-0 = <&pwm_ao_d_e_pins>;
+	pinctrl-names = "default";
+	clocks = <&xtal>;
+	clock-names = "clkin1";
+	status = "okay";
+};
+
+&pwm_ef {
+	pinctrl-0 = <&pwm_e_pins>;
+	pinctrl-names = "default";
+	clocks = <&xtal>;
+	clock-names = "clkin0";
+	status = "okay";
+};
+
+/* SDIO */
+&sd_emmc_a {
+	status = "okay";
+	pinctrl-0 = <&sdio_pins>;
+	pinctrl-1 = <&sdio_clk_gate_pins>;
+	pinctrl-names = "default", "clk-gate";
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	bus-width = <4>;
+	cap-sd-highspeed;
+	sd-uhs-sdr50;
+	max-frequency = <100000000>;
+
+	non-removable;
+	disable-wp;
+
+	mmc-pwrseq = <&sdio_pwrseq>;
+
+	vmmc-supply = <&vddao_3v3>;
+	vqmmc-supply = <&vddao_1v8>;
+
+	brcmf: wifi@1 {
+		reg = <1>;
+		compatible = "brcm,bcm4329-fmac";
+	};
+};
+
+/* SD card */
+&sd_emmc_b {
+	status = "okay";
+	pinctrl-0 = <&sdcard_c_pins>;
+	pinctrl-1 = <&sdcard_clk_gate_c_pins>;
+	pinctrl-names = "default", "clk-gate";
+
+	bus-width = <4>;
+	cap-sd-highspeed;
+	max-frequency = <50000000>;
+	disable-wp;
+
+	cd-gpios = <&gpio GPIOC_6 GPIO_ACTIVE_LOW>;
+	vmmc-supply = <&vddao_3v3>;
+	vqmmc-supply = <&vddao_3v3>;
+};
+
+/* eMMC */
+&sd_emmc_c {
+	status = "okay";
+	pinctrl-0 = <&emmc_pins>, <&emmc_ds_pins>;
+	pinctrl-1 = <&emmc_clk_gate_pins>;
+	pinctrl-names = "default", "clk-gate";
+
+	bus-width = <8>;
+	cap-mmc-highspeed;
+	max-frequency = <100000000>;
+	disable-wp;
+
+	mmc-pwrseq = <&emmc_pwrseq>;
+	vmmc-supply = <&vcc_3v3>;
+	vqmmc-supply = <&flash_1v8>;
+};
+
+&spdifout {
+	pinctrl-0 = <&spdif_out_h_pins>;
+	pinctrl-names = "default";
+	status = "okay";
+};
+
+&spdifout_b {
+	status = "okay";
+};
+
+&tdmif_b {
+	status = "okay";
+};
+
+&tdmout_b {
+	status = "okay";
+};
+
+&tohdmitx {
+	status = "okay";
+};
+
+&uart_A {
+	status = "okay";
+	pinctrl-0 = <&uart_a_pins>, <&uart_a_cts_rts_pins>;
+	pinctrl-names = "default";
+	uart-has-rtscts;
+
+	bluetooth {
+		compatible = "brcm,bcm43438-bt";
+		shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>;
+		max-speed = <2000000>;
+		clocks = <&wifi32k>;
+	clock-names = "lpo";
+	};
+};
+
+&uart_AO {
+	status = "okay";
+	pinctrl-0 = <&uart_ao_a_pins>;
+	pinctrl-names = "default";
+};
+
+&usb {
+	status = "okay";
+	dr_mode = "host";
+	vbus-regulator = <&usb_pwr_en>;
+};
+
+&usb2_phy0 {
+	phy-supply = <&usb1_pow>;
+};
+
+&usb2_phy1 {
+	phy-supply = <&usb1_pow>;
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi
index 5628ccd54531..6dbc3968045b 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi
@@ -4,8 +4,7 @@
  * Author: Neil Armstrong <narmstrong@baylibre.com>
  */
 
-#include "meson-g12-common.dtsi"
-#include <dt-bindings/power/meson-g12a-power.h>
+#include "meson-g12.dtsi"
 
 / {
 	compatible = "amlogic,g12b";
@@ -49,7 +48,9 @@
 			compatible = "arm,cortex-a53";
 			reg = <0x0 0x0>;
 			enable-method = "psci";
+			capacity-dmips-mhz = <592>;
 			next-level-cache = <&l2>;
+			#cooling-cells = <2>;
 		};
 
 		cpu1: cpu@1 {
@@ -57,7 +58,9 @@
 			compatible = "arm,cortex-a53";
 			reg = <0x0 0x1>;
 			enable-method = "psci";
+			capacity-dmips-mhz = <592>;
 			next-level-cache = <&l2>;
+			#cooling-cells = <2>;
 		};
 
 		cpu100: cpu@100 {
@@ -65,7 +68,9 @@
 			compatible = "arm,cortex-a73";
 			reg = <0x0 0x100>;
 			enable-method = "psci";
+			capacity-dmips-mhz = <1024>;
 			next-level-cache = <&l2>;
+			#cooling-cells = <2>;
 		};
 
 		cpu101: cpu@101 {
@@ -73,7 +78,9 @@
 			compatible = "arm,cortex-a73";
 			reg = <0x0 0x101>;
 			enable-method = "psci";
+			capacity-dmips-mhz = <1024>;
 			next-level-cache = <&l2>;
+			#cooling-cells = <2>;
 		};
 
 		cpu102: cpu@102 {
@@ -81,7 +88,9 @@
 			compatible = "arm,cortex-a73";
 			reg = <0x0 0x102>;
 			enable-method = "psci";
+			capacity-dmips-mhz = <1024>;
 			next-level-cache = <&l2>;
+			#cooling-cells = <2>;
 		};
 
 		cpu103: cpu@103 {
@@ -89,7 +98,9 @@
 			compatible = "arm,cortex-a73";
 			reg = <0x0 0x103>;
 			enable-method = "psci";
+			capacity-dmips-mhz = <1024>;
 			next-level-cache = <&l2>;
+			#cooling-cells = <2>;
 		};
 
 		l2: l2-cache0 {
@@ -102,14 +113,3 @@
 	compatible = "amlogic,g12b-clkc";
 };
 
-&ethmac {
-	power-domains = <&pwrc PWRC_G12A_ETH_ID>;
-};
-
-&vpu {
-	power-domains = <&pwrc PWRC_G12A_VPU_ID>;
-};
-
-&sd_emmc_a {
-	amlogic,dram-access-quirk;
-};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
index a9b778571cf5..12d5e333e5f2 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
@@ -169,6 +169,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index 6733050d735f..40db06e28b66 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -161,6 +161,7 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 		read-only;
+		secure-monitor = <&sm>;
 
 		sn: sn@14 {
 			reg = <0x14 0x10>;
@@ -240,7 +241,7 @@
 			};
 
 			i2c_A: i2c@8500 {
-				compatible = "amlogic,meson-gx-i2c", "amlogic,meson-gxbb-i2c";
+				compatible = "amlogic,meson-gxbb-i2c";
 				reg = <0x0 0x08500 0x0 0x20>;
 				interrupts = <GIC_SPI 21 IRQ_TYPE_EDGE_RISING>;
 				#address-cells = <1>;
@@ -290,7 +291,7 @@
 			};
 
 			i2c_B: i2c@87c0 {
-				compatible = "amlogic,meson-gx-i2c", "amlogic,meson-gxbb-i2c";
+				compatible = "amlogic,meson-gxbb-i2c";
 				reg = <0x0 0x087c0 0x0 0x20>;
 				interrupts = <GIC_SPI 214 IRQ_TYPE_EDGE_RISING>;
 				#address-cells = <1>;
@@ -299,7 +300,7 @@
 			};
 
 			i2c_C: i2c@87e0 {
-				compatible = "amlogic,meson-gx-i2c", "amlogic,meson-gxbb-i2c";
+				compatible = "amlogic,meson-gxbb-i2c";
 				reg = <0x0 0x087e0 0x0 0x20>;
 				interrupts = <GIC_SPI 215 IRQ_TYPE_EDGE_RISING>;
 				#address-cells = <1>;
@@ -391,6 +392,7 @@
 				compatible = "amlogic,meson-gx-ao-cec";
 				reg = <0x0 0x00100 0x0 0x14>;
 				interrupts = <GIC_SPI 199 IRQ_TYPE_EDGE_RISING>;
+				status = "disabled";
 			};
 
 			sec_AO: ao-secure@140 {
@@ -414,7 +416,7 @@
 			};
 
 			i2c_AO: i2c@500 {
-				compatible = "amlogic,meson-gx-i2c", "amlogic,meson-gxbb-i2c";
+				compatible = "amlogic,meson-gxbb-i2c";
 				reg = <0x0 0x500 0x0 0x20>;
 				interrupts = <GIC_SPI 195 IRQ_TYPE_EDGE_RISING>;
 				#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts
index 233eb1cd7967..d6ca684e0e61 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts
@@ -280,6 +280,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddio_ao3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts
index afcf8a9f667b..65ec7dea828c 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts
@@ -220,6 +220,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
index 6039adda12ee..6ded279c40c8 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
@@ -36,8 +36,15 @@
 		regulator-min-microvolt = <5000000>;
 		regulator-max-microvolt = <5000000>;
 
+		/*
+		 * signal name from schematics: PWREN
+		 */
 		gpio = <&gpio_ao GPIOAO_5 GPIO_ACTIVE_HIGH>;
 		enable-active-high;
+		/*
+		 * signal name from schematics: USB_POWER
+		 */
+		vin-supply = <&p5v0>;
 	};
 
 	leds {
@@ -50,18 +57,38 @@
 		};
 	};
 
+	p5v0: regulator-p5v0 {
+		compatible = "regulator-fixed";
+
+		regulator-name = "P5V0";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-always-on;
+	};
+
+	hdmi_p5v0: regulator-hdmi_p5v0 {
+		compatible = "regulator-fixed";
+		regulator-name = "HDMI_P5V0";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		/* AP2331SA-7 */
+		vin-supply = <&p5v0>;
+	};
+
 	tflash_vdd: regulator-tflash_vdd {
-		/*
-		 * signal name from schematics: TFLASH_VDD_EN
-		 */
 		compatible = "regulator-fixed";
 
 		regulator-name = "TFLASH_VDD";
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
 
+		/*
+		 * signal name from schematics: TFLASH_VDD_EN
+		 */
 		gpio = <&gpio GPIOY_12 GPIO_ACTIVE_HIGH>;
 		enable-active-high;
+		/* U16 RT9179GB */
+		vin-supply = <&vddio_ao3v3>;
 	};
 
 	tf_io: gpio-regulator-tf_io {
@@ -79,6 +106,8 @@
 
 		states = <3300000 0>,
 			 <1800000 1>;
+		/* U12/U13 RT9179GB */
+		vin-supply = <&vddio_ao3v3>;
 	};
 
 	vcc1v8: regulator-vcc1v8 {
@@ -86,6 +115,9 @@
 		regulator-name = "VCC1V8";
 		regulator-min-microvolt = <1800000>;
 		regulator-max-microvolt = <1800000>;
+		regulator-always-on;
+		/* U18 RT9179GB */
+		vin-supply = <&vddio_ao3v3>;
 	};
 
 	vcc3v3: regulator-vcc3v3 {
@@ -95,6 +127,36 @@
 		regulator-max-microvolt = <3300000>;
 	};
 
+	vddio_ao1v8: regulator-vddio-ao1v8 {
+		compatible = "regulator-fixed";
+		regulator-name = "VDDIO_AO1V8";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		regulator-always-on;
+		/* U17 RT9179GB */
+		vin-supply = <&p5v0>;
+	};
+
+	vddio_ao3v3: regulator-vddio-ao3v3 {
+		compatible = "regulator-fixed";
+		regulator-name = "VDDIO_AO3V3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+		/* U11 MP2161GJ-C499 */
+		vin-supply = <&p5v0>;
+	};
+
+	ddr3_1v5: regulator-ddr3_1v5 {
+		compatible = "regulator-fixed";
+		regulator-name = "DDR3_1V5";
+		regulator-min-microvolt = <1500000>;
+		regulator-max-microvolt = <1500000>;
+		regulator-always-on;
+		/* U15 MP2161GJ-C499 */
+		vin-supply = <&p5v0>;
+	};
+
 	emmc_pwrseq: emmc-pwrseq {
 		compatible = "mmc-pwrseq-emmc";
 		reset-gpios = <&gpio BOOT_9 GPIO_ACTIVE_LOW>;
@@ -167,6 +229,7 @@
 	status = "okay";
 	pinctrl-0 = <&hdmi_hpd_pins>, <&hdmi_i2c_pins>;
 	pinctrl-names = "default";
+	hdmi-supply = <&hdmi_p5v0>;
 };
 
 &hdmi_tx_tmds_port {
@@ -296,7 +359,7 @@
 };
 
 &usb0_phy {
-	status = "okay";
+	status = "disabled";
 	phy-supply = <&usb_otg_pwr>;
 };
 
@@ -306,7 +369,7 @@
 };
 
 &usb0 {
-	status = "okay";
+	status = "disabled";
 };
 
 &usb1 {
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi
index 89f7b41b0e9e..e803a466fe4e 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi
@@ -170,6 +170,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
index 43b11e3dfe11..5eab3dfdbd55 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
@@ -152,6 +152,7 @@
 	status = "okay";
 	pinctrl-0 = <&remote_input_ao_pins>;
 	pinctrl-names = "default";
+	linux,rc-map-name = "rc-vega-s9x";
 };
 
 &pwm_ef {
@@ -183,6 +184,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
index 4c539881fbb7..dee51cf95223 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
@@ -200,6 +200,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts
index 82b1c4851147..4d5949496596 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts
@@ -14,7 +14,7 @@
 / {
 	compatible = "libretech,aml-s805x-ac", "amlogic,s805x",
 		     "amlogic,meson-gxl";
-	model = "Libre Computer Board AML-S805X-AC";
+	model = "Libre Computer AML-S805X-AC";
 
 	aliases {
 		serial0 = &uart_AO;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts
index 3a1484e5b8e1..a1119cfb0280 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts
@@ -165,6 +165,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts
index 2a5cd303123d..440bc23c7342 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts
@@ -33,11 +33,9 @@
 
 	gpio-keys-polled {
 		compatible = "gpio-keys-polled";
-		#address-cells = <1>;
-		#size-cells = <0>;
 		poll-interval = <100>;
 
-		button@0 {
+		power-button {
 			label = "power";
 			linux,code = <KEY_POWER>;
 			gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_LOW>;
@@ -192,6 +190,9 @@
 	bluetooth {
 		compatible = "brcm,bcm43438-bt";
 		shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>;
+		max-speed = <2000000>;
+		clocks = <&wifi32k>;
+		clock-names = "lpo";
 	};
 };
 
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
index 4b8ce738e213..e8348b2728db 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
@@ -12,8 +12,9 @@
 #include "meson-gxl-s905x.dtsi"
 
 / {
-	compatible = "libretech,cc", "amlogic,s905x", "amlogic,meson-gxl";
-	model = "Libre Computer Board AML-S905X-CC";
+	compatible = "libretech,aml-s905x-cc", "amlogic,s905x",
+		     "amlogic,meson-gxl";
+	model = "Libre Computer AML-S905X-CC";
 
 	aliases {
 		serial0 = &uart_AO;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
index c433a031841f..62dd87821ce5 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
@@ -165,6 +165,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
index e3c16f50814b..43eb7d149e36 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
@@ -119,6 +119,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 49ff0a7d0210..ed33d8efaf62 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -36,6 +36,16 @@
 				phys = <&usb3_phy>, <&usb2_phy0>, <&usb2_phy1>;
 			};
 		};
+
+		crypto: crypto@c883e000 {
+			compatible = "amlogic,gxl-crypto";
+			reg = <0x0 0xc883e000 0x0 0x36>;
+			interrupts = <GIC_SPI 188 IRQ_TYPE_EDGE_RISING>,
+				     <GIC_SPI 189 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc CLKID_BLKMV>;
+			clock-names = "blkmv";
+			status = "okay";
+		};
 	};
 };
 
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
index f25ddd18a607..f82f25c1a5f9 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
@@ -332,6 +332,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
@@ -409,6 +412,9 @@
 	bluetooth {
 		compatible = "brcm,bcm43438-bt";
 		shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>;
+		max-speed = <2000000>;
+		clocks = <&wifi32k>;
+		clock-names = "lpo";
 	};
 };
 
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts
index 5cd4d35006d0..420a88e9a195 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts
@@ -148,6 +148,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-vega-s96.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-vega-s96.dts
index e2ea6753263b..0bdf51d041ae 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm-vega-s96.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm-vega-s96.dts
@@ -35,3 +35,7 @@
 		reg = <0>;
 	};
 };
+
+&ir {
+	linux,rc-map-name = "rc-vega-s9x";
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
index a0e677d5a8f7..5ff64a0d2dcf 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
@@ -96,10 +96,10 @@
 		compatible = "amlogic,meson-gxm-mali", "arm,mali-t820";
 		reg = <0x0 0xc0000 0x0 0x40000>;
 		interrupt-parent = <&gic>;
-		interrupts = <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>,
+		interrupts = <GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 161 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-names = "gpu", "mmu", "job";
+			     <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "job", "mmu", "gpu";
 		clocks = <&clkc CLKID_MALI>;
 		resets = <&reset RESET_MALI_CAPB3>, <&reset RESET_MALI>;
 
diff --git a/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi b/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi
index 8647da7d6609..90815fa25ec6 100644
--- a/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi
@@ -246,6 +246,10 @@
 	linux,rc-map-name = "rc-khadas";
 };
 
+&pcie {
+	reset-gpios = <&gpio GPIOA_8 GPIO_ACTIVE_LOW>;
+};
+
 &pwm_ef {
         status = "okay";
         pinctrl-0 = <&pwm_e_pins>;
@@ -274,6 +278,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vsys_3v3>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts
index 5233bd7cacfb..dbbf29a0dbf6 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts
@@ -68,3 +68,28 @@
 	clock-names = "clkin1";
 	status = "okay";
 };
+
+/*
+ * The VIM3 on-board  MCU can mux the PCIe/USB3.0 shared differential
+ * lines using a FUSB340TMX USB 3.1 SuperSpeed Data Switch between
+ * an USB3.0 Type A connector and a M.2 Key M slot.
+ * The PHY driving these differential lines is shared between
+ * the USB3.0 controller and the PCIe Controller, thus only
+ * a single controller can use it.
+ * If the MCU is configured to mux the PCIe/USB3.0 differential lines
+ * to the M.2 Key M slot, uncomment the following block to disable
+ * USB3.0 from the USB Complex and enable the PCIe controller.
+ * The End User is not expected to uncomment the following except for
+ * testing purposes, but instead rely on the firmware/bootloader to
+ * update these nodes accordingly if PCIe mode is selected by the MCU.
+ */
+/*
+&pcie {
+	status = "okay";
+};
+
+&usb {
+	phys = <&usb2_phy0>, <&usb2_phy1>;
+	phy-names = "usb2-phy0", "usb2-phy1";
+};
+ */
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
index 3435aaa4e8db..a8bb3fa9fec9 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
@@ -9,6 +9,7 @@
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/input/input.h>
 #include <dt-bindings/gpio/meson-g12a-gpio.h>
+#include <dt-bindings/sound/meson-g12a-tohdmitx.h>
 
 / {
 	compatible = "seirobotics,sei610", "amlogic,sm1";
@@ -19,6 +20,22 @@
 		ethernet0 = &ethmac;
 	};
 
+	mono_dac: audio-codec-0 {
+		compatible = "maxim,max98357a";
+		#sound-dai-cells = <0>;
+		sound-name-prefix = "U16";
+		sdmode-gpios = <&gpio GPIOX_8 GPIO_ACTIVE_HIGH>;
+	};
+
+	dmics: audio-codec-1 {
+		#sound-dai-cells = <0>;
+		compatible = "dmic-codec";
+		num-channels = <2>;
+		wakeup-delay-ms = <50>;
+		status = "okay";
+		sound-name-prefix = "MIC";
+	};
+
 	chosen {
 		stdout-path = "serial0:115200n8";
 	};
@@ -29,25 +46,47 @@
 	};
 
 	gpio-keys {
-		compatible = "gpio-keys-polled";
-		poll-interval = <100>;
+		compatible = "gpio-keys";
 
 		key1 {
 			label = "A";
 			linux,code = <BTN_0>;
 			gpios = <&gpio GPIOH_6 GPIO_ACTIVE_LOW>;
+			interrupt-parent = <&gpio_intc>;
+			interrupts = <34 IRQ_TYPE_EDGE_BOTH>;
 		};
 
 		key2 {
 			label = "B";
 			linux,code = <BTN_1>;
 			gpios = <&gpio GPIOH_7 GPIO_ACTIVE_LOW>;
+			interrupt-parent = <&gpio_intc>;
+			interrupts = <35 IRQ_TYPE_EDGE_BOTH>;
 		};
 
 		key3 {
 			label = "C";
 			linux,code = <BTN_2>;
 			gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_LOW>;
+			interrupt-parent = <&gpio_intc>;
+			interrupts = <2 IRQ_TYPE_EDGE_BOTH>;
+		};
+
+		mic_mute {
+			label = "MicMute";
+			linux,code = <SW_MUTE_DEVICE>;
+			linux,input-type = <EV_SW>;
+			gpios = <&gpio_ao GPIOE_2 GPIO_ACTIVE_LOW>;
+			interrupt-parent = <&gpio_intc>;
+			interrupts = <99 IRQ_TYPE_EDGE_BOTH>;
+		};
+
+		power_key {
+			label = "PowerKey";
+			linux,code = <KEY_POWER>;
+			gpios = <&gpio_ao GPIOAO_3 GPIO_ACTIVE_LOW>;
+			interrupt-parent = <&gpio_intc>;
+			interrupts = <3 IRQ_TYPE_EDGE_BOTH>;
 		};
 	};
 
@@ -179,6 +218,120 @@
 		clock-names = "ext_clock";
 	};
 
+	sound {
+		compatible = "amlogic,axg-sound-card";
+		model = "SM1-SEI610";
+		audio-aux-devs = <&tdmout_a>, <&tdmout_b>,
+				 <&tdmin_a>, <&tdmin_b>;
+		audio-routing = "TDMOUT_A IN 0", "FRDDR_A OUT 0",
+				"TDMOUT_A IN 1", "FRDDR_B OUT 0",
+				"TDMOUT_A IN 2", "FRDDR_C OUT 0",
+				"TDM_A Playback", "TDMOUT_A OUT",
+				"TDMOUT_B IN 0", "FRDDR_A OUT 1",
+				"TDMOUT_B IN 1", "FRDDR_B OUT 1",
+				"TDMOUT_B IN 2", "FRDDR_C OUT 1",
+				"TDM_B Playback", "TDMOUT_B OUT",
+				"TODDR_A IN 4", "PDM Capture",
+				"TODDR_B IN 4", "PDM Capture",
+				"TODDR_C IN 4", "PDM Capture",
+				"TDMIN_A IN 0", "TDM_A Capture",
+				"TDMIN_A IN 3", "TDM_A Loopback",
+				"TDMIN_B IN 0", "TDM_A Capture",
+				"TDMIN_B IN 3", "TDM_A Loopback",
+				"TDMIN_A IN 1", "TDM_B Capture",
+				"TDMIN_A IN 4", "TDM_B Loopback",
+				"TDMIN_B IN 1", "TDM_B Capture",
+				"TDMIN_B IN 4", "TDM_B Loopback",
+				"TODDR_A IN 0", "TDMIN_A OUT",
+				"TODDR_B IN 0", "TDMIN_A OUT",
+				"TODDR_C IN 0", "TDMIN_A OUT",
+				"TODDR_A IN 1", "TDMIN_B OUT",
+				"TODDR_B IN 1", "TDMIN_B OUT",
+				"TODDR_C IN 1", "TDMIN_B OUT";
+
+		assigned-clocks = <&clkc CLKID_MPLL2>,
+				  <&clkc CLKID_MPLL0>,
+				  <&clkc CLKID_MPLL1>;
+		assigned-clock-parents = <0>, <0>, <0>;
+		assigned-clock-rates = <294912000>,
+				       <270950400>,
+				       <393216000>;
+		status = "okay";
+
+		dai-link-0 {
+			sound-dai = <&frddr_a>;
+		};
+
+		dai-link-1 {
+			sound-dai = <&frddr_b>;
+		};
+
+		dai-link-2 {
+			sound-dai = <&frddr_c>;
+		};
+
+		dai-link-3 {
+			sound-dai = <&toddr_a>;
+		};
+
+		dai-link-4 {
+			sound-dai = <&toddr_b>;
+		};
+
+		dai-link-5 {
+			sound-dai = <&toddr_c>;
+		};
+
+		/* internal speaker interface */
+		dai-link-6 {
+			sound-dai = <&tdmif_a>;
+			dai-format = "i2s";
+			dai-tdm-slot-tx-mask-0 = <1 1>;
+			mclk-fs = <256>;
+
+			codec-0 {
+				sound-dai = <&mono_dac>;
+			};
+
+			codec-1 {
+				sound-dai = <&tohdmitx TOHDMITX_I2S_IN_A>;
+			};
+		};
+
+		/* 8ch hdmi interface */
+		dai-link-7 {
+			sound-dai = <&tdmif_b>;
+			dai-format = "i2s";
+			dai-tdm-slot-tx-mask-0 = <1 1>;
+			dai-tdm-slot-tx-mask-1 = <1 1>;
+			dai-tdm-slot-tx-mask-2 = <1 1>;
+			dai-tdm-slot-tx-mask-3 = <1 1>;
+			mclk-fs = <256>;
+
+			codec {
+				sound-dai = <&tohdmitx TOHDMITX_I2S_IN_B>;
+			};
+		};
+
+		/* internal digital mics */
+		dai-link-8 {
+			sound-dai = <&pdm>;
+
+			codec {
+				sound-dai = <&dmics>;
+			};
+		};
+
+		/* hdmi glue */
+		dai-link-9 {
+			sound-dai = <&tohdmitx TOHDMITX_I2S_OUT>;
+
+			codec {
+				sound-dai = <&hdmi_tx>;
+			};
+		};
+	};
+
 	wifi32k: wifi32k {
 		compatible = "pwm-clock";
 		#clock-cells = <0>;
@@ -187,6 +340,10 @@
 	};
 };
 
+&arb {
+	status = "okay";
+};
+
 &cec_AO {
 	pinctrl-0 = <&cec_ao_a_h_pins>;
 	pinctrl-names = "default";
@@ -201,6 +358,10 @@
 	hdmi-phandle = <&hdmi_tx>;
 };
 
+&clkc_audio {
+	status = "okay";
+};
+
 &cpu0 {
 	cpu-supply = <&vddcpu>;
 	operating-points-v2 = <&cpu_opp_table>;
@@ -235,6 +396,18 @@
 	phy-mode = "rmii";
 };
 
+&frddr_a {
+	status = "okay";
+};
+
+&frddr_b {
+	status = "okay";
+};
+
+&frddr_c {
+	status = "okay";
+};
+
 &hdmi_tx {
 	status = "okay";
 	pinctrl-0 = <&hdmitx_hpd_pins>, <&hdmitx_ddc_pins>;
@@ -259,6 +432,12 @@
 	pinctrl-names = "default";
 };
 
+&pdm {
+	pinctrl-0 = <&pdm_din0_z_pins>, <&pdm_dclk_z_pins>;
+	pinctrl-names = "default";
+	status = "okay";
+};
+
 &pwm_AO_ab {
 	status = "okay";
 	pinctrl-0 = <&pwm_ao_a_pins>;
@@ -305,6 +484,9 @@
 	non-removable;
 	disable-wp;
 
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+
 	mmc-pwrseq = <&sdio_pwrseq>;
 
 	vmmc-supply = <&vddao_3v3>;
@@ -353,6 +535,54 @@
 	vqmmc-supply = <&emmc_1v8>;
 };
 
+&tdmif_a {
+	pinctrl-0 = <&tdm_a_dout0_pins>, <&tdm_a_fs_pins>, <&tdm_a_sclk_pins>;
+	pinctrl-names = "default";
+	status = "okay";
+
+	assigned-clocks = <&clkc_audio AUD_CLKID_TDM_SCLK_PAD0>,
+			  <&clkc_audio AUD_CLKID_TDM_LRCLK_PAD0>;
+	assigned-clock-parents = <&clkc_audio AUD_CLKID_MST_A_SCLK>,
+				 <&clkc_audio AUD_CLKID_MST_A_LRCLK>;
+	assigned-clock-rates = <0>, <0>;
+};
+
+&tdmif_b {
+	status = "okay";
+};
+
+&tdmin_a {
+	status = "okay";
+};
+
+&tdmin_b {
+	status = "okay";
+};
+
+&tdmout_a {
+	status = "okay";
+};
+
+&tdmout_b {
+	status = "okay";
+};
+
+&toddr_a {
+	status = "okay";
+};
+
+&toddr_b {
+	status = "okay";
+};
+
+&toddr_c {
+	status = "okay";
+};
+
+&tohdmitx {
+	status = "okay";
+};
+
 &uart_A {
 	status = "okay";
 	pinctrl-0 = <&uart_a_pins>, <&uart_a_cts_rts_pins>;
@@ -361,6 +591,8 @@
 
 	bluetooth {
 		compatible = "brcm,bcm43438-bt";
+		interrupt-parent = <&gpio_intc>;
+		interrupts = <95 IRQ_TYPE_LEVEL_HIGH>;
 		shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>;
 		max-speed = <2000000>;
 		clocks = <&wifi32k>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi
index 521573f3a5ba..7894a5458dbc 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi
@@ -5,11 +5,47 @@
  */
 
 #include "meson-g12-common.dtsi"
+#include <dt-bindings/clock/axg-audio-clkc.h>
 #include <dt-bindings/power/meson-sm1-power.h>
+#include <dt-bindings/reset/amlogic,meson-axg-audio-arb.h>
+#include <dt-bindings/reset/amlogic,meson-g12a-audio-reset.h>
 
 / {
 	compatible = "amlogic,sm1";
 
+	tdmif_a: audio-controller-0 {
+		compatible = "amlogic,axg-tdm-iface";
+		#sound-dai-cells = <0>;
+		sound-name-prefix = "TDM_A";
+		clocks = <&clkc_audio AUD_CLKID_MST_A_MCLK>,
+			 <&clkc_audio AUD_CLKID_MST_A_SCLK>,
+			 <&clkc_audio AUD_CLKID_MST_A_LRCLK>;
+		clock-names = "mclk", "sclk", "lrclk";
+		status = "disabled";
+	};
+
+	tdmif_b: audio-controller-1 {
+		compatible = "amlogic,axg-tdm-iface";
+		#sound-dai-cells = <0>;
+		sound-name-prefix = "TDM_B";
+		clocks = <&clkc_audio AUD_CLKID_MST_B_MCLK>,
+			 <&clkc_audio AUD_CLKID_MST_B_SCLK>,
+			 <&clkc_audio AUD_CLKID_MST_B_LRCLK>;
+		clock-names = "mclk", "sclk", "lrclk";
+		status = "disabled";
+	};
+
+	tdmif_c: audio-controller-2 {
+		compatible = "amlogic,axg-tdm-iface";
+		#sound-dai-cells = <0>;
+		sound-name-prefix = "TDM_C";
+		clocks = <&clkc_audio AUD_CLKID_MST_C_MCLK>,
+			 <&clkc_audio AUD_CLKID_MST_C_SCLK>,
+			 <&clkc_audio AUD_CLKID_MST_C_LRCLK>;
+		clock-names = "mclk", "sclk", "lrclk";
+		status = "disabled";
+	};
+
 	cpus {
 		#address-cells = <0x2>;
 		#size-cells = <0x0>;
@@ -117,6 +153,297 @@
 	};
 };
 
+&apb {
+	audio: bus@60000 {
+		compatible = "simple-bus";
+		reg = <0x0 0x60000 0x0 0x1000>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges = <0x0 0x0 0x0 0x60000 0x0 0x1000>;
+
+		clkc_audio: clock-controller@0 {
+			status = "disabled";
+			compatible = "amlogic,sm1-audio-clkc";
+			reg = <0x0 0x0 0x0 0xb4>;
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+
+			clocks = <&clkc CLKID_AUDIO>,
+				 <&clkc CLKID_MPLL0>,
+				 <&clkc CLKID_MPLL1>,
+				 <&clkc CLKID_MPLL2>,
+				 <&clkc CLKID_MPLL3>,
+				 <&clkc CLKID_HIFI_PLL>,
+				 <&clkc CLKID_FCLK_DIV3>,
+				 <&clkc CLKID_FCLK_DIV4>,
+				 <&clkc CLKID_FCLK_DIV5>;
+			clock-names = "pclk",
+				      "mst_in0",
+				      "mst_in1",
+				      "mst_in2",
+				      "mst_in3",
+				      "mst_in4",
+				      "mst_in5",
+				      "mst_in6",
+				      "mst_in7";
+
+			resets = <&reset RESET_AUDIO>;
+		};
+
+		toddr_a: audio-controller@100 {
+			compatible = "amlogic,sm1-toddr",
+				     "amlogic,axg-toddr";
+			reg = <0x0 0x100 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "TODDR_A";
+			interrupts = <GIC_SPI 148 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_TODDR_A>;
+			resets = <&arb AXG_ARB_TODDR_A>,
+				 <&clkc_audio AUD_RESET_TODDR_A>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		toddr_b: audio-controller@140 {
+			compatible = "amlogic,sm1-toddr",
+				     "amlogic,axg-toddr";
+			reg = <0x0 0x140 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "TODDR_B";
+			interrupts = <GIC_SPI 149 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_TODDR_B>;
+			resets = <&arb AXG_ARB_TODDR_B>,
+				 <&clkc_audio AUD_RESET_TODDR_B>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		toddr_c: audio-controller@180 {
+			compatible = "amlogic,sm1-toddr",
+				     "amlogic,axg-toddr";
+			reg = <0x0 0x180 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "TODDR_C";
+			interrupts = <GIC_SPI 150 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_TODDR_C>;
+			resets = <&arb AXG_ARB_TODDR_C>,
+				 <&clkc_audio AUD_RESET_TODDR_C>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		frddr_a: audio-controller@1c0 {
+			compatible = "amlogic,sm1-frddr",
+				     "amlogic,axg-frddr";
+			reg = <0x0 0x1c0 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "FRDDR_A";
+			interrupts = <GIC_SPI 152 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_FRDDR_A>;
+			resets = <&arb AXG_ARB_FRDDR_A>,
+				 <&clkc_audio AUD_RESET_FRDDR_A>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		frddr_b: audio-controller@200 {
+			compatible = "amlogic,sm1-frddr",
+				     "amlogic,axg-frddr";
+			reg = <0x0 0x200 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "FRDDR_B";
+			interrupts = <GIC_SPI 153 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_FRDDR_B>;
+			resets = <&arb AXG_ARB_FRDDR_B>,
+				 <&clkc_audio AUD_RESET_FRDDR_B>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		frddr_c: audio-controller@240 {
+			compatible = "amlogic,sm1-frddr",
+				     "amlogic,axg-frddr";
+			reg = <0x0 0x240 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "FRDDR_C";
+			interrupts = <GIC_SPI 154 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_FRDDR_C>;
+			resets = <&arb AXG_ARB_FRDDR_C>,
+				 <&clkc_audio AUD_RESET_FRDDR_C>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		arb: reset-controller@280 {
+			status = "disabled";
+			compatible = "amlogic,meson-sm1-audio-arb";
+			reg = <0x0 0x280 0x0 0x4>;
+			#reset-cells = <1>;
+			clocks = <&clkc_audio AUD_CLKID_DDR_ARB>;
+		};
+
+		tdmin_a: audio-controller@300 {
+			compatible = "amlogic,sm1-tdmin",
+				     "amlogic,axg-tdmin";
+			reg = <0x0 0x300 0x0 0x40>;
+			sound-name-prefix = "TDMIN_A";
+			resets = <&clkc_audio AUD_RESET_TDMIN_A>;
+			clocks = <&clkc_audio AUD_CLKID_TDMIN_A>,
+				 <&clkc_audio AUD_CLKID_TDMIN_A_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_A_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMIN_A_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_A_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tdmin_b: audio-controller@340 {
+			compatible = "amlogic,sm1-tdmin",
+				     "amlogic,axg-tdmin";
+			reg = <0x0 0x340 0x0 0x40>;
+			sound-name-prefix = "TDMIN_B";
+			resets = <&clkc_audio AUD_RESET_TDMIN_B>;
+			clocks = <&clkc_audio AUD_CLKID_TDMIN_B>,
+				 <&clkc_audio AUD_CLKID_TDMIN_B_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_B_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMIN_B_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_B_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tdmin_c: audio-controller@380 {
+			compatible = "amlogic,sm1-tdmin",
+				     "amlogic,axg-tdmin";
+			reg = <0x0 0x380 0x0 0x40>;
+			sound-name-prefix = "TDMIN_C";
+			resets = <&clkc_audio AUD_RESET_TDMIN_C>;
+			clocks = <&clkc_audio AUD_CLKID_TDMIN_C>,
+				 <&clkc_audio AUD_CLKID_TDMIN_C_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_C_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMIN_C_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_C_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tdmin_lb: audio-controller@3c0 {
+			compatible = "amlogic,sm1-tdmin",
+				     "amlogic,axg-tdmin";
+			reg = <0x0 0x3c0 0x0 0x40>;
+			sound-name-prefix = "TDMIN_LB";
+			resets = <&clkc_audio AUD_RESET_TDMIN_LB>;
+			clocks = <&clkc_audio AUD_CLKID_TDMIN_LB>,
+				 <&clkc_audio AUD_CLKID_TDMIN_LB_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_LB_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMIN_LB_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMIN_LB_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tdmout_a: audio-controller@500 {
+			compatible = "amlogic,sm1-tdmout";
+			reg = <0x0 0x500 0x0 0x40>;
+			sound-name-prefix = "TDMOUT_A";
+			resets = <&clkc_audio AUD_RESET_TDMOUT_A>;
+			clocks = <&clkc_audio AUD_CLKID_TDMOUT_A>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_A_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_A_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_A_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_A_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tdmout_b: audio-controller@540 {
+			compatible = "amlogic,sm1-tdmout";
+			reg = <0x0 0x540 0x0 0x40>;
+			sound-name-prefix = "TDMOUT_B";
+			resets = <&clkc_audio AUD_RESET_TDMOUT_B>;
+			clocks = <&clkc_audio AUD_CLKID_TDMOUT_B>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_B_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_B_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_B_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_B_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tdmout_c: audio-controller@580 {
+			compatible = "amlogic,sm1-tdmout";
+			reg = <0x0 0x580 0x0 0x40>;
+			sound-name-prefix = "TDMOUT_C";
+			resets = <&clkc_audio AUD_RESET_TDMOUT_C>;
+			clocks = <&clkc_audio AUD_CLKID_TDMOUT_C>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_C_SCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_C_SCLK_SEL>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_C_LRCLK>,
+				 <&clkc_audio AUD_CLKID_TDMOUT_C_LRCLK>;
+			clock-names = "pclk", "sclk", "sclk_sel",
+				      "lrclk", "lrclk_sel";
+			status = "disabled";
+		};
+
+		tohdmitx: audio-controller@744 {
+			compatible = "amlogic,sm1-tohdmitx",
+				     "amlogic,g12a-tohdmitx";
+			reg = <0x0 0x744 0x0 0x4>;
+			#sound-dai-cells = <1>;
+			sound-name-prefix = "TOHDMITX";
+			resets = <&clkc_audio AUD_RESET_TOHDMITX>;
+			status = "disabled";
+		};
+
+		toddr_d: audio-controller@840 {
+			compatible = "amlogic,sm1-toddr",
+				     "amlogic,axg-toddr";
+			reg = <0x0 0x840 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "TODDR_D";
+			interrupts = <GIC_SPI 49 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_TODDR_D>;
+			resets = <&arb AXG_ARB_TODDR_D>,
+				 <&clkc_audio AUD_RESET_TODDR_D>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+
+		frddr_d: audio-controller@880 {
+			 compatible = "amlogic,sm1-frddr",
+				      "amlogic,axg-frddr";
+			reg = <0x0 0x880 0x0 0x2c>;
+			#sound-dai-cells = <0>;
+			sound-name-prefix = "FRDDR_D";
+			interrupts = <GIC_SPI 50 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkc_audio AUD_CLKID_FRDDR_D>;
+			resets = <&arb AXG_ARB_FRDDR_D>,
+				 <&clkc_audio AUD_RESET_FRDDR_D>;
+			reset-names = "arb", "rst";
+			status = "disabled";
+		};
+	};
+
+	pdm: audio-controller@61000 {
+		compatible = "amlogic,sm1-pdm",
+			     "amlogic,axg-pdm";
+		reg = <0x0 0x61000 0x0 0x34>;
+		#sound-dai-cells = <0>;
+		sound-name-prefix = "PDM";
+		clocks = <&clkc_audio AUD_CLKID_PDM>,
+			 <&clkc_audio AUD_CLKID_PDM_DCLK>,
+			 <&clkc_audio AUD_CLKID_PDM_SYSCLK>;
+		clock-names = "pclk", "dclk", "sysclk";
+		status = "disabled";
+	};
+};
+
 &cecb_AO {
 	compatible = "amlogic,meson-sm1-ao-cec";
 };
@@ -134,10 +461,27 @@
 	power-domains = <&pwrc PWRC_SM1_ETH_ID>;
 };
 
+&gpio_intc {
+	compatible = "amlogic,meson-sm1-gpio-intc",
+		     "amlogic,meson-gpio-intc";
+};
+
+&pcie {
+	power-domains = <&pwrc PWRC_SM1_PCIE_ID>;
+};
+
 &pwrc {
 	compatible = "amlogic,meson-sm1-pwrc";
 };
 
+&simplefb_cvbs {
+	power-domains = <&pwrc PWRC_SM1_VPU_ID>;
+};
+
+&simplefb_hdmi {
+	power-domains = <&pwrc PWRC_SM1_VPU_ID>;
+};
+
 &vpu {
 	power-domains = <&pwrc PWRC_SM1_VPU_ID>;
 };
diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi
index 26a039a028b8..1f3c80aafbd7 100644
--- a/arch/arm64/boot/dts/arm/juno-base.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-base.dtsi
@@ -6,7 +6,6 @@
 	/*
 	 *  Devices shared by all Juno boards
 	 */
-	dma-ranges = <0 0 0 0 0x100 0>;
 
 	memtimer: timer@2a810000 {
 		compatible = "arm,armv7-timer-mem";
@@ -35,6 +34,18 @@
 		clock-names = "apb_pclk";
 	};
 
+	smmu_gpu: iommu@2b400000 {
+		compatible = "arm,mmu-400", "arm,smmu-v1";
+		reg = <0x0 0x2b400000 0x0 0x10000>;
+		interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+		#iommu-cells = <1>;
+		#global-interrupts = <1>;
+		power-domains = <&scpi_devpd 1>;
+		dma-coherent;
+		status = "disabled";
+	};
+
 	smmu_pcie: iommu@2b500000 {
 		compatible = "arm,mmu-401", "arm,smmu-v1";
 		reg = <0x0 0x2b500000 0x0 0x10000>;
@@ -487,6 +498,21 @@
 		};
 	};
 
+	gpu: gpu@2d000000 {
+		compatible = "arm,juno-mali", "arm,mali-t624";
+		reg = <0 0x2d000000 0 0x10000>;
+		interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "gpu", "job", "mmu";
+		clocks = <&scpi_dvfs 2>;
+		power-domains = <&scpi_devpd 1>;
+		dma-coherent;
+		/* The SMMU is only really of interest to bare-metal hypervisors */
+		/* iommus = <&smmu_gpu 0>; */
+		status = "disabled";
+	};
+
 	sram: sram@2e000000 {
 		compatible = "arm,juno-sram-ns", "mmio-sram";
 		reg = <0x0 0x2e000000 0x0 0x8000>;
diff --git a/arch/arm64/boot/dts/arm/juno-clocks.dtsi b/arch/arm64/boot/dts/arm/juno-clocks.dtsi
index e5e265dfa902..2870b5eeb198 100644
--- a/arch/arm64/boot/dts/arm/juno-clocks.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-clocks.dtsi
@@ -8,10 +8,10 @@
  */
 / {
 	/* SoC fixed clocks */
-	soc_uartclk: refclk7273800hz {
+	soc_uartclk: refclk7372800hz {
 		compatible = "fixed-clock";
 		#clock-cells = <0>;
-		clock-frequency = <7273800>;
+		clock-frequency = <7372800>;
 		clock-output-names = "juno:uartclk";
 	};
 
diff --git a/arch/arm64/boot/dts/broadcom/Makefile b/arch/arm64/boot/dts/broadcom/Makefile
index d1d31ccad758..cb7de8d99223 100644
--- a/arch/arm64/boot/dts/broadcom/Makefile
+++ b/arch/arm64/boot/dts/broadcom/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
-dtb-$(CONFIG_ARCH_BCM2835) += bcm2837-rpi-3-a-plus.dtb \
+dtb-$(CONFIG_ARCH_BCM2835) += bcm2711-rpi-4-b.dtb \
+			      bcm2837-rpi-3-a-plus.dtb \
 			      bcm2837-rpi-3-b.dtb \
 			      bcm2837-rpi-3-b-plus.dtb \
 			      bcm2837-rpi-cm3-io3.dtb
diff --git a/arch/arm64/boot/dts/broadcom/bcm2711-rpi-4-b.dts b/arch/arm64/boot/dts/broadcom/bcm2711-rpi-4-b.dts
new file mode 100644
index 000000000000..d24c53682e44
--- /dev/null
+++ b/arch/arm64/boot/dts/broadcom/bcm2711-rpi-4-b.dts
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "arm/bcm2711-rpi-4-b.dts"
diff --git a/arch/arm64/boot/dts/exynos/exynos5433.dtsi b/arch/arm64/boot/dts/exynos/exynos5433.dtsi
index a76f620f7f35..6721966140f4 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos5433.dtsi
@@ -18,8 +18,8 @@
 
 / {
 	compatible = "samsung,exynos5433";
-	#address-cells = <1>;
-	#size-cells = <1>;
+	#address-cells = <2>;
+	#size-cells = <2>;
 
 	interrupt-parent = <&gic>;
 
@@ -249,57 +249,6 @@
 		};
 	};
 
-	gpu: gpu@14ac0000 {
-		compatible = "samsung,exynos5433-mali", "arm,mali-t760";
-		reg = <0x14ac0000 0x5000>;
-		interrupts = <GIC_SPI 282 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 283 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 281 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-names = "job", "mmu", "gpu";
-		clocks = <&cmu_g3d CLK_ACLK_G3D>;
-		clock-names = "core";
-		power-domains = <&pd_g3d>;
-		operating-points-v2 = <&gpu_opp_table>;
-		status = "disabled";
-
-		gpu_opp_table: opp_table {
-			compatible = "operating-points-v2";
-
-			opp-160000000 {
-				opp-hz = /bits/ 64 <160000000>;
-				opp-microvolt = <1000000>;
-			};
-			opp-267000000 {
-				opp-hz = /bits/ 64 <267000000>;
-				opp-microvolt = <1000000>;
-			};
-			opp-350000000 {
-				opp-hz = /bits/ 64 <350000000>;
-				opp-microvolt = <1025000>;
-			};
-			opp-420000000 {
-				opp-hz = /bits/ 64 <420000000>;
-				opp-microvolt = <1025000>;
-			};
-			opp-500000000 {
-				opp-hz = /bits/ 64 <500000000>;
-				opp-microvolt = <1075000>;
-			};
-			opp-550000000 {
-				opp-hz = /bits/ 64 <550000000>;
-				opp-microvolt = <1125000>;
-			};
-			opp-600000000 {
-				opp-hz = /bits/ 64 <600000000>;
-				opp-microvolt = <1150000>;
-			};
-			opp-700000000 {
-				opp-hz = /bits/ 64 <700000000>;
-				opp-microvolt = <1150000>;
-			};
-		};
-	};
-
 	psci {
 		compatible = "arm,psci";
 		method = "smc";
@@ -311,7 +260,7 @@
 		compatible = "simple-bus";
 		#address-cells = <1>;
 		#size-cells = <1>;
-		ranges;
+		ranges = <0x0 0x0 0x0 0x18000000>;
 
 		chipid@10000000 {
 			compatible = "samsung,exynos4210-chipid";
@@ -754,7 +703,7 @@
 			status = "disabled";
 		};
 
-		mct@101c0000 {
+		timer@101c0000 {
 			compatible = "samsung,exynos4210-mct";
 			reg = <0x101c0000 0x800>;
 			interrupts = <GIC_SPI 102 IRQ_TYPE_LEVEL_HIGH>,
@@ -1125,6 +1074,57 @@
 			power-domains = <&pd_gscl>;
 		};
 
+		gpu: gpu@14ac0000 {
+			compatible = "samsung,exynos5433-mali", "arm,mali-t760";
+			reg = <0x14ac0000 0x5000>;
+			interrupts = <GIC_SPI 282 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 283 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 281 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "job", "mmu", "gpu";
+			clocks = <&cmu_g3d CLK_ACLK_G3D>;
+			clock-names = "core";
+			power-domains = <&pd_g3d>;
+			operating-points-v2 = <&gpu_opp_table>;
+			status = "disabled";
+
+			gpu_opp_table: opp_table {
+				compatible = "operating-points-v2";
+
+				opp-160000000 {
+					opp-hz = /bits/ 64 <160000000>;
+					opp-microvolt = <1000000>;
+				};
+				opp-267000000 {
+					opp-hz = /bits/ 64 <267000000>;
+					opp-microvolt = <1000000>;
+				};
+				opp-350000000 {
+					opp-hz = /bits/ 64 <350000000>;
+					opp-microvolt = <1025000>;
+				};
+				opp-420000000 {
+					opp-hz = /bits/ 64 <420000000>;
+					opp-microvolt = <1025000>;
+				};
+				opp-500000000 {
+					opp-hz = /bits/ 64 <500000000>;
+					opp-microvolt = <1075000>;
+				};
+				opp-550000000 {
+					opp-hz = /bits/ 64 <550000000>;
+					opp-microvolt = <1125000>;
+				};
+				opp-600000000 {
+					opp-hz = /bits/ 64 <600000000>;
+					opp-microvolt = <1150000>;
+				};
+				opp-700000000 {
+					opp-hz = /bits/ 64 <700000000>;
+					opp-microvolt = <1150000>;
+				};
+			};
+		};
+
 		scaler_0: scaler@15000000 {
 			compatible = "samsung,exynos5433-scaler";
 			reg = <0x15000000 0x1294>;
@@ -1179,9 +1179,9 @@
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x13a00000 0x1000>;
 			interrupts = <GIC_SPI 192 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "pclk", "aclk";
-			clocks = <&cmu_disp CLK_PCLK_SMMU_DECON0X>,
-				<&cmu_disp CLK_ACLK_SMMU_DECON0X>;
+			clock-names = "aclk", "pclk";
+			clocks = <&cmu_disp CLK_ACLK_SMMU_DECON0X>,
+				<&cmu_disp CLK_PCLK_SMMU_DECON0X>;
 			power-domains = <&pd_disp>;
 			#iommu-cells = <0>;
 		};
@@ -1190,9 +1190,9 @@
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x13a10000 0x1000>;
 			interrupts = <GIC_SPI 194 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "pclk", "aclk";
-			clocks = <&cmu_disp CLK_PCLK_SMMU_DECON1X>,
-				<&cmu_disp CLK_ACLK_SMMU_DECON1X>;
+			clock-names = "aclk", "pclk";
+			clocks = <&cmu_disp CLK_ACLK_SMMU_DECON1X>,
+				<&cmu_disp CLK_PCLK_SMMU_DECON1X>;
 			#iommu-cells = <0>;
 			power-domains = <&pd_disp>;
 		};
@@ -1201,9 +1201,9 @@
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x13a20000 0x1000>;
 			interrupts = <GIC_SPI 214 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "pclk", "aclk";
-			clocks = <&cmu_disp CLK_PCLK_SMMU_TV0X>,
-				<&cmu_disp CLK_ACLK_SMMU_TV0X>;
+			clock-names = "aclk", "pclk";
+			clocks = <&cmu_disp CLK_ACLK_SMMU_TV0X>,
+				<&cmu_disp CLK_PCLK_SMMU_TV0X>;
 			#iommu-cells = <0>;
 			power-domains = <&pd_disp>;
 		};
@@ -1212,9 +1212,9 @@
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x13a30000 0x1000>;
 			interrupts = <GIC_SPI 216 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "pclk", "aclk";
-			clocks = <&cmu_disp CLK_PCLK_SMMU_TV1X>,
-				<&cmu_disp CLK_ACLK_SMMU_TV1X>;
+			clock-names = "aclk", "pclk";
+			clocks = <&cmu_disp CLK_ACLK_SMMU_TV1X>,
+				<&cmu_disp CLK_PCLK_SMMU_TV1X>;
 			#iommu-cells = <0>;
 			power-domains = <&pd_disp>;
 		};
@@ -1256,9 +1256,9 @@
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x15040000 0x1000>;
 			interrupts = <GIC_SPI 404 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "pclk", "aclk";
-			clocks = <&cmu_mscl CLK_PCLK_SMMU_M2MSCALER0>,
-				 <&cmu_mscl CLK_ACLK_SMMU_M2MSCALER0>;
+			clock-names = "aclk", "pclk";
+			clocks = <&cmu_mscl CLK_ACLK_SMMU_M2MSCALER0>,
+				<&cmu_mscl CLK_PCLK_SMMU_M2MSCALER0>;
 			#iommu-cells = <0>;
 			power-domains = <&pd_mscl>;
 		};
@@ -1267,9 +1267,9 @@
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x15050000 0x1000>;
 			interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "pclk", "aclk";
-			clocks = <&cmu_mscl CLK_PCLK_SMMU_M2MSCALER1>,
-				 <&cmu_mscl CLK_ACLK_SMMU_M2MSCALER1>;
+			clock-names = "aclk", "pclk";
+			clocks = <&cmu_mscl CLK_ACLK_SMMU_M2MSCALER1>,
+				<&cmu_mscl CLK_PCLK_SMMU_M2MSCALER1>;
 			#iommu-cells = <0>;
 			power-domains = <&pd_mscl>;
 		};
@@ -1278,9 +1278,9 @@
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x15060000 0x1000>;
 			interrupts = <GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "pclk", "aclk";
-			clocks = <&cmu_mscl CLK_PCLK_SMMU_JPEG>,
-				 <&cmu_mscl CLK_ACLK_SMMU_JPEG>;
+			clock-names = "aclk", "pclk";
+			clocks = <&cmu_mscl CLK_ACLK_SMMU_JPEG>,
+				<&cmu_mscl CLK_PCLK_SMMU_JPEG>;
 			#iommu-cells = <0>;
 			power-domains = <&pd_mscl>;
 		};
@@ -1289,9 +1289,9 @@
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x15200000 0x1000>;
 			interrupts = <GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "pclk", "aclk";
-			clocks = <&cmu_mfc CLK_PCLK_SMMU_MFC_0>,
-				 <&cmu_mfc CLK_ACLK_SMMU_MFC_0>;
+			clock-names = "aclk", "pclk";
+			clocks = <&cmu_mfc CLK_ACLK_SMMU_MFC_0>,
+				<&cmu_mfc CLK_PCLK_SMMU_MFC_0>;
 			#iommu-cells = <0>;
 			power-domains = <&pd_mfc>;
 		};
@@ -1300,9 +1300,9 @@
 			compatible = "samsung,exynos-sysmmu";
 			reg = <0x15210000 0x1000>;
 			interrupts = <GIC_SPI 354 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "pclk", "aclk";
-			clocks = <&cmu_mfc CLK_PCLK_SMMU_MFC_1>,
-				 <&cmu_mfc CLK_ACLK_SMMU_MFC_1>;
+			clock-names = "aclk", "pclk";
+			clocks = <&cmu_mfc CLK_ACLK_SMMU_MFC_1>,
+				<&cmu_mfc CLK_PCLK_SMMU_MFC_1>;
 			#iommu-cells = <0>;
 			power-domains = <&pd_mfc>;
 		};
@@ -1452,7 +1452,7 @@
 		i2s1: i2s@14d60000 {
 			compatible = "samsung,exynos7-i2s";
 			reg = <0x14d60000 0x100>;
-			dmas = <&pdma0 31 &pdma0 30>;
+			dmas = <&pdma0 31>, <&pdma0 30>;
 			dma-names = "tx", "rx";
 			interrupts = <GIC_SPI 435 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&cmu_peric CLK_PCLK_I2S1>,
@@ -1811,7 +1811,7 @@
 			i2s0: i2s@11440000 {
 				compatible = "samsung,exynos7-i2s";
 				reg = <0x11440000 0x100>;
-				dmas = <&adma 0 &adma 2>;
+				dmas = <&adma 0>, <&adma 2>;
 				dma-names = "tx", "rx";
 				interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
 				#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi
index bcb9d8cee267..3a00ef0a17ff 100644
--- a/arch/arm64/boot/dts/exynos/exynos7.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi
@@ -12,8 +12,8 @@
 / {
 	compatible = "samsung,exynos7";
 	interrupt-parent = <&gic>;
-	#address-cells = <1>;
-	#size-cells = <1>;
+	#address-cells = <2>;
+	#size-cells = <2>;
 
 	aliases {
 		pinctrl0 = &pinctrl_alive;
@@ -78,17 +78,6 @@
 		};
 	};
 
-	gpu: gpu@14ac0000 {
-		compatible = "samsung,exynos5433-mali", "arm,mali-t760";
-		reg = <0x14ac0000 0x5000>;
-		interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 242 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-names = "job", "mmu", "gpu";
-		status = "disabled";
-		/* TODO: operating points for DVFS, cooling device */
-	};
-
 	psci {
 		compatible = "arm,psci-0.2";
 		method = "smc";
@@ -98,7 +87,7 @@
 		compatible = "simple-bus";
 		#address-cells = <1>;
 		#size-cells = <1>;
-		ranges;
+		ranges = <0 0 0 0x18000000>;
 
 		chipid@10000000 {
 			compatible = "samsung,exynos4210-chipid";
@@ -523,6 +512,17 @@
 			status = "disabled";
 		};
 
+		gpu: gpu@14ac0000 {
+			compatible = "samsung,exynos5433-mali", "arm,mali-t760";
+			reg = <0x14ac0000 0x5000>;
+			interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 242 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "job", "mmu", "gpu";
+			status = "disabled";
+			/* TODO: operating points for DVFS, cooling device */
+		};
+
 		mmc_0: mmc@15740000 {
 			compatible = "samsung,exynos7-dw-mshc-smu";
 			interrupts = <GIC_SPI 201 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/boot/dts/freescale/Makefile b/arch/arm64/boot/dts/freescale/Makefile
index 93fce8f0c66d..38e344a2f0ff 100644
--- a/arch/arm64/boot/dts/freescale/Makefile
+++ b/arch/arm64/boot/dts/freescale/Makefile
@@ -22,6 +22,7 @@ dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-qds.dtb
 dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-rdb.dtb
 
 dtb-$(CONFIG_ARCH_MXC) += imx8mm-evk.dtb
+dtb-$(CONFIG_ARCH_MXC) += imx8mn-evk.dtb
 dtb-$(CONFIG_ARCH_MXC) += imx8mn-ddr4-evk.dtb
 dtb-$(CONFIG_ARCH_MXC) += imx8mq-evk.dtb
 dtb-$(CONFIG_ARCH_MXC) += imx8mq-hummingboard-pulse.dtb
@@ -31,4 +32,7 @@ dtb-$(CONFIG_ARCH_MXC) += imx8mq-pico-pi.dtb
 dtb-$(CONFIG_ARCH_MXC) += imx8mq-zii-ultra-rmb3.dtb
 dtb-$(CONFIG_ARCH_MXC) += imx8mq-zii-ultra-zest.dtb
 dtb-$(CONFIG_ARCH_MXC) += imx8qxp-ai_ml.dtb
+dtb-$(CONFIG_ARCH_MXC) += imx8qxp-colibri-eval-v3.dtb
 dtb-$(CONFIG_ARCH_MXC) += imx8qxp-mek.dtb
+
+dtb-$(CONFIG_ARCH_S32) += s32v234-evb.dtb
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
index 078a5010228c..5b9d4b35dd35 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
@@ -194,8 +194,6 @@
 	};
 
 	fpga@66 {
-		#address-cells = <1>;
-		#size-cells = <0>;
 		compatible = "fsl,ls1028aqds-fpga", "fsl,fpga-qixis-i2c",
 			     "simple-mfd";
 		reg = <0x66>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
index 1a69221d9a1b..9720a190049f 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
@@ -184,3 +184,7 @@
 &sata {
 	status = "okay";
 };
+
+&usb1 {
+	dr_mode = "otg";
+};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index 72b9a75976a1..a6f9b7784e8f 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -82,27 +82,13 @@
 	dpclk: clock-controller@f1f0000 {
 		compatible = "fsl,ls1028a-plldig";
 		reg = <0x0 0xf1f0000 0x0 0xffff>;
-		#clock-cells = <1>;
-		clocks = <&osc_27m>;
-	};
-
-	aclk: clock-axi {
-		compatible = "fixed-clock";
-		#clock-cells = <0>;
-		clock-frequency = <650000000>;
-		clock-output-names= "aclk";
-	};
-
-	pclk: clock-apb {
-		compatible = "fixed-clock";
 		#clock-cells = <0>;
-		clock-frequency = <650000000>;
-		clock-output-names= "pclk";
+		clocks = <&osc_27m>;
 	};
 
 	reboot {
 		compatible ="syscon-reboot";
-		regmap = <&dcfg>;
+		regmap = <&rst>;
 		offset = <0xb0>;
 		mask = <0x02>;
 	};
@@ -142,6 +128,37 @@
 		};
 	};
 
+	thermal-zones {
+		core-cluster {
+			polling-delay-passive = <1000>;
+			polling-delay = <5000>;
+			thermal-sensors = <&tmu 0>;
+
+			trips {
+				core_cluster_alert: core-cluster-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+
+				core_cluster_crit: core-cluster-crit {
+					temperature = <95000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				map0 {
+					trip = <&core_cluster_alert>;
+					cooling-device =
+						<&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+	};
+
 	soc: soc {
 		compatible = "simple-bus";
 		#address-cells = <2>;
@@ -158,7 +175,13 @@
 		dcfg: syscon@1e00000 {
 			compatible = "fsl,ls1028a-dcfg", "syscon";
 			reg = <0x0 0x1e00000 0x0 0x10000>;
-			big-endian;
+			little-endian;
+		};
+
+		rst: syscon@1e60000 {
+			compatible = "syscon";
+			reg = <0x0 0x1e60000 0x0 0x10000>;
+			little-endian;
 		};
 
 		scfg: syscon@1fc0000 {
@@ -542,7 +565,7 @@
 			status = "disabled";
 		};
 
-		tmu: tmu@1f00000 {
+		tmu: tmu@1f80000 {
 			compatible = "fsl,qoriq-tmu";
 			reg = <0x0 0x1f80000 0x0 0x10000>;
 			interrupts = <0 23 0x4>;
@@ -567,7 +590,7 @@
 					       0x00010004 0x0000003d
 					       0x00010005 0x00000045
 					       0x00010006 0x0000004d
-					       0x00010007 0x00000045
+					       0x00010007 0x00000055
 					       0x00010008 0x0000005e
 					       0x00010009 0x00000066
 					       0x0001000a 0x0000006e
@@ -594,37 +617,6 @@
 			#thermal-sensor-cells = <1>;
 		};
 
-		thermal-zones {
-			core-cluster {
-				polling-delay-passive = <1000>;
-				polling-delay = <5000>;
-				thermal-sensors = <&tmu 0>;
-
-				trips {
-					core_cluster_alert: core-cluster-alert {
-						temperature = <85000>;
-						hysteresis = <2000>;
-						type = "passive";
-					};
-
-					core_cluster_crit: core-cluster-crit {
-						temperature = <95000>;
-						hysteresis = <2000>;
-						type = "critical";
-					};
-				};
-
-				cooling-maps {
-					map0 {
-						trip = <&core_cluster_alert>;
-						cooling-device =
-							<&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
-							<&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
-					};
-				};
-			};
-		};
-
 		pcie@1f0000000 { /* Integrated Endpoint Root Complex */
 			compatible = "pci-host-ecam-generic";
 			reg = <0x01 0xf0000000 0x0 0x100000>;
@@ -679,7 +671,8 @@
 		interrupts = <0 222 IRQ_TYPE_LEVEL_HIGH>,
 			     <0 223 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-names = "DE", "SE";
-		clocks = <&dpclk 0>, <&aclk>, <&aclk>, <&pclk>;
+		clocks = <&dpclk>, <&clockgen 2 2>, <&clockgen 2 2>,
+			 <&clockgen 2 2>;
 		clock-names = "pxlclk", "mclk", "aclk", "pclk";
 		arm,malidp-output-port-lines = /bits/ 8 <8 8 8>;
 		arm,malidp-arqos-value = <0xd000d000>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
index 6a6514d0e5a9..0c742befb761 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
@@ -122,6 +122,10 @@
 	};
 };
 
+&usb1 {
+	dr_mode = "otg";
+};
+
 #include "fsl-ls1046-post.dtsi"
 
 &fman0 {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts
index 8e925df6c01c..90b198939251 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts
@@ -95,5 +95,6 @@
 };
 
 &usb1 {
+	dr_mode = "otg";
 	status = "okay";
 };
diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
index b032f3890c8c..e883fe0fc1b7 100644
--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
@@ -6,6 +6,7 @@
 
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/thermal/thermal.h>
 
 /memreserve/ 0x80000000 0x00010000;
 
@@ -20,7 +21,7 @@
 		#size-cells = <0>;
 
 		// 8 clusters having 2 Cortex-A72 cores each
-		cpu@0 {
+		cpu0: cpu@0 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -34,9 +35,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster0_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@1 {
+		cpu1: cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -50,9 +52,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster0_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@100 {
+		cpu100: cpu@100 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -66,9 +69,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster1_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@101 {
+		cpu101: cpu@101 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -82,9 +86,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster1_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@200 {
+		cpu200: cpu@200 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -98,9 +103,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster2_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@201 {
+		cpu201: cpu@201 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -114,9 +120,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster2_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@300 {
+		cpu300: cpu@300 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -130,9 +137,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster3_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@301 {
+		cpu301: cpu@301 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -146,9 +154,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster3_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@400 {
+		cpu400: cpu@400 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -162,9 +171,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster4_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@401 {
+		cpu401: cpu@401 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -178,9 +188,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster4_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@500 {
+		cpu500: cpu@500 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -194,9 +205,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster5_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@501 {
+		cpu501: cpu@501 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -210,9 +222,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster5_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@600 {
+		cpu600: cpu@600 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -226,9 +239,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster6_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@601 {
+		cpu601: cpu@601 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -242,9 +256,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster6_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@700 {
+		cpu700: cpu@700 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -258,9 +273,10 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster7_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
-		cpu@701 {
+		cpu701: cpu@701 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72";
 			enable-method = "psci";
@@ -274,6 +290,7 @@
 			i-cache-sets = <192>;
 			next-level-cache = <&cluster7_l2>;
 			cpu-idle-states = <&cpu_pw15>;
+			#cooling-cells = <2>;
 		};
 
 		cluster0_l2: l2-cache0 {
@@ -418,6 +435,51 @@
 		clock-output-names = "sysclk";
 	};
 
+	thermal-zones {
+		core_thermal1: core-thermal1 {
+			polling-delay-passive = <1000>;
+			polling-delay = <5000>;
+			thermal-sensors = <&tmu 0>;
+
+			trips {
+				core_cluster_alert: core-cluster-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+
+				core_cluster_crit: core-cluster-crit {
+					temperature = <95000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				map0 {
+					trip = <&core_cluster_alert>;
+					cooling-device =
+						<&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu100 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu101 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu200 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu201 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu300 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu301 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu400 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu401 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu500 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu501 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu600 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu601 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu700 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu701 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+	};
+
 	soc {
 		compatible = "simple-bus";
 		#address-cells = <2>;
@@ -478,6 +540,20 @@
 			little-endian;
 		};
 
+		tmu: tmu@1f80000 {
+			compatible = "fsl,qoriq-tmu";
+			reg = <0x0 0x1f80000 0x0 0x10000>;
+			interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
+			fsl,tmu-range = <0x800000e6 0x8001017d>;
+			fsl,tmu-calibration =
+				/* Calibration data group 1 */
+				<0x00000000 0x00000035
+				/* Calibration data group 2 */
+				0x00010001 0x00000154>;
+			little-endian;
+			#thermal-sensor-cells = <1>;
+		};
+
 		i2c0: i2c@2000000 {
 			compatible = "fsl,vf610-i2c";
 			#address-cells = <1>;
@@ -586,6 +662,7 @@
 			reg = <0x0 0x2140000 0x0 0x10000>;
 			interrupts = <0 28 0x4>; /* Level high type */
 			clocks = <&clockgen 4 1>;
+			dma-coherent;
 			voltage-ranges = <1800 1800 3300 3300>;
 			sdhci,auto-cmd12;
 			little-endian;
@@ -598,6 +675,7 @@
 			reg = <0x0 0x2150000 0x0 0x10000>;
 			interrupts = <0 63 0x4>; /* Level high type */
 			clocks = <&clockgen 4 1>;
+			dma-coherent;
 			voltage-ranges = <1800 1800 3300 3300>;
 			sdhci,auto-cmd12;
 			broken-cd;
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts
index f7a15f3904c2..28ab17a277bb 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts
@@ -62,6 +62,8 @@
 
 		cpudai: simple-audio-card,cpu {
 			sound-dai = <&sai3>;
+			dai-tdm-slot-num = <2>;
+			dai-tdm-slot-width = <32>;
 		};
 
 		simple-audio-card,codec {
@@ -94,68 +96,6 @@
 	};
 };
 
-&sai3 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_sai3>;
-	assigned-clocks = <&clk IMX8MM_CLK_SAI3>;
-	assigned-clock-parents = <&clk IMX8MM_AUDIO_PLL1_OUT>;
-	assigned-clock-rates = <24576000>;
-	status = "okay";
-};
-
-&snvs_pwrkey {
-	status = "okay";
-};
-
-&uart2 { /* console */
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_uart2>;
-	status = "okay";
-};
-
-&usbotg1 {
-	dr_mode = "otg";
-	hnp-disable;
-	srp-disable;
-	adp-disable;
-	usb-role-switch;
-	status = "okay";
-
-	port {
-		usb1_drd_sw: endpoint {
-			remote-endpoint = <&typec1_dr_sw>;
-		};
-	};
-};
-
-&usdhc2 {
-	pinctrl-names = "default", "state_100mhz", "state_200mhz";
-	pinctrl-0 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
-	pinctrl-1 = <&pinctrl_usdhc2_100mhz>, <&pinctrl_usdhc2_gpio>;
-	pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>;
-	cd-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
-	bus-width = <4>;
-	vmmc-supply = <&reg_usdhc2_vmmc>;
-	status = "okay";
-};
-
-&usdhc3 {
-	pinctrl-names = "default", "state_100mhz", "state_200mhz";
-	pinctrl-0 = <&pinctrl_usdhc3>;
-	pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
-	pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
-	bus-width = <8>;
-	non-removable;
-	status = "okay";
-};
-
-&wdog1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_wdog>;
-	fsl,ext-reset-output;
-	status = "okay";
-};
-
 &i2c1 {
 	clock-frequency = <400000>;
 	pinctrl-names = "default";
@@ -306,6 +246,86 @@
 	};
 };
 
+&i2c3 {
+	clock-frequency = <400000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c3>;
+	status = "okay";
+
+	pca6416: gpio@20 {
+		compatible = "ti,tca6416";
+		reg = <0x20>;
+		gpio-controller;
+		#gpio-cells = <2>;
+	};
+};
+
+&sai3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_sai3>;
+	assigned-clocks = <&clk IMX8MM_CLK_SAI3>;
+	assigned-clock-parents = <&clk IMX8MM_AUDIO_PLL1_OUT>;
+	assigned-clock-rates = <24576000>;
+	status = "okay";
+};
+
+&snvs_pwrkey {
+	status = "okay";
+};
+
+&uart2 { /* console */
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart2>;
+	status = "okay";
+};
+
+&usbotg1 {
+	dr_mode = "otg";
+	hnp-disable;
+	srp-disable;
+	adp-disable;
+	usb-role-switch;
+	status = "okay";
+
+	port {
+		usb1_drd_sw: endpoint {
+			remote-endpoint = <&typec1_dr_sw>;
+		};
+	};
+};
+
+&usdhc2 {
+	assigned-clocks = <&clk IMX8MM_CLK_USDHC2>;
+	assigned-clock-rates = <200000000>;
+	pinctrl-names = "default", "state_100mhz", "state_200mhz";
+	pinctrl-0 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
+	pinctrl-1 = <&pinctrl_usdhc2_100mhz>, <&pinctrl_usdhc2_gpio>;
+	pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>;
+	cd-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
+	bus-width = <4>;
+	vmmc-supply = <&reg_usdhc2_vmmc>;
+	status = "okay";
+};
+
+&usdhc3 {
+	assigned-clocks = <&clk IMX8MM_CLK_USDHC3_ROOT>;
+	assigned-clock-rates = <400000000>;
+	pinctrl-names = "default", "state_100mhz", "state_200mhz";
+	pinctrl-0 = <&pinctrl_usdhc3>;
+	pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
+	pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
+	bus-width = <8>;
+	non-removable;
+	status = "okay";
+};
+
+&wdog1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_wdog>;
+	fsl,ext-reset-output;
+	status = "okay";
+};
+
 &iomuxc {
 	pinctrl-names = "default";
 
@@ -355,6 +375,13 @@
 		>;
 	};
 
+	pinctrl_i2c3: i2c3grp {
+		fsl,pins = <
+			MX8MM_IOMUXC_I2C3_SCL_I2C3_SCL			0x400001c3
+			MX8MM_IOMUXC_I2C3_SDA_I2C3_SDA			0x400001c3
+		>;
+	};
+
 	pinctrl_pmic: pmicirq {
 		fsl,pins = <
 			MX8MM_IOMUXC_GPIO1_IO03_GPIO1_IO3		0x41
diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
index 23c8fad7932b..3d95b66a2d71 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
@@ -12,7 +12,6 @@
 #include "imx8mm-pinfunc.h"
 
 / {
-	compatible = "fsl,imx8mm";
 	interrupt-parent = <&gic>;
 	#address-cells = <2>;
 	#size-cells = <2>;
@@ -426,7 +425,7 @@
 			};
 
 			ocotp: ocotp-ctrl@30350000 {
-				compatible = "fsl,imx8mm-ocotp", "fsl,imx7d-ocotp", "syscon";
+				compatible = "fsl,imx8mm-ocotp", "syscon";
 				reg = <0x30350000 0x10000>;
 				clocks = <&clk IMX8MM_CLK_OCOTP_ROOT>;
 				/* For nvmem subnodes */
@@ -479,14 +478,18 @@
 						<&clk IMX8MM_CLK_AUDIO_AHB>,
 						<&clk IMX8MM_CLK_IPG_AUDIO_ROOT>,
 						<&clk IMX8MM_SYS_PLL3>,
-						<&clk IMX8MM_VIDEO_PLL1>;
+						<&clk IMX8MM_VIDEO_PLL1>,
+						<&clk IMX8MM_AUDIO_PLL1>,
+						<&clk IMX8MM_AUDIO_PLL2>;
 				assigned-clock-parents = <&clk IMX8MM_SYS_PLL3_OUT>,
 							 <&clk IMX8MM_SYS_PLL1_800M>;
 				assigned-clock-rates = <0>,
 							<400000000>,
 							<400000000>,
 							<750000000>,
-							<594000000>;
+							<594000000>,
+							<393216000>,
+							<361267200>;
 			};
 
 			src: reset-controller@30390000 {
@@ -698,8 +701,6 @@
 					 <&clk IMX8MM_CLK_NAND_USDHC_BUS>,
 					 <&clk IMX8MM_CLK_USDHC1_ROOT>;
 				clock-names = "ipg", "ahb", "per";
-				assigned-clocks = <&clk IMX8MM_CLK_USDHC1>;
-				assigned-clock-rates = <400000000>;
 				fsl,tuning-start-tap = <20>;
 				fsl,tuning-step= <2>;
 				bus-width = <4>;
@@ -728,8 +729,6 @@
 					 <&clk IMX8MM_CLK_NAND_USDHC_BUS>,
 					 <&clk IMX8MM_CLK_USDHC3_ROOT>;
 				clock-names = "ipg", "ahb", "per";
-				assigned-clocks = <&clk IMX8MM_CLK_USDHC3_ROOT>;
-				assigned-clock-rates = <400000000>;
 				fsl,tuning-start-tap = <20>;
 				fsl,tuning-step= <2>;
 				bus-width = <4>;
@@ -741,7 +740,7 @@
 				reg = <0x30bd0000 0x10000>;
 				interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clk IMX8MM_CLK_SDMA1_ROOT>,
-					 <&clk IMX8MM_CLK_SDMA1_ROOT>;
+					 <&clk IMX8MM_CLK_AHB>;
 				clock-names = "ipg", "ahb";
 				#dma-cells = <3>;
 				fsl,sdma-ram-script-name = "imx/sdma/sdma-imx7d.bin";
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
index 11c705d225d0..071949412caf 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
@@ -6,205 +6,18 @@
 /dts-v1/;
 
 #include "imx8mn.dtsi"
+#include "imx8mn-evk.dtsi"
 
 / {
 	model = "NXP i.MX8MNano DDR4 EVK board";
 	compatible = "fsl,imx8mn-ddr4-evk", "fsl,imx8mn";
-
-	chosen {
-		stdout-path = &uart2;
-	};
-
-	reg_usdhc2_vmmc: regulator-usdhc2 {
-		compatible = "regulator-fixed";
-		pinctrl-names = "default";
-		pinctrl-0 = <&pinctrl_reg_usdhc2_vmmc>;
-		regulator-name = "VSD_3V3";
-		regulator-min-microvolt = <3300000>;
-		regulator-max-microvolt = <3300000>;
-		gpio = <&gpio2 19 GPIO_ACTIVE_HIGH>;
-		enable-active-high;
-	};
 };
 
 &A53_0 {
 	cpu-supply = <&buck2_reg>;
 };
 
-&iomuxc {
-	pinctrl-names = "default";
-
-	pinctrl_fec1: fec1grp {
-		fsl,pins = <
-			MX8MN_IOMUXC_ENET_MDC_ENET1_MDC		0x3
-			MX8MN_IOMUXC_ENET_MDIO_ENET1_MDIO	0x3
-			MX8MN_IOMUXC_ENET_TD3_ENET1_RGMII_TD3	0x1f
-			MX8MN_IOMUXC_ENET_TD2_ENET1_RGMII_TD2	0x1f
-			MX8MN_IOMUXC_ENET_TD1_ENET1_RGMII_TD1	0x1f
-			MX8MN_IOMUXC_ENET_TD0_ENET1_RGMII_TD0	0x1f
-			MX8MN_IOMUXC_ENET_RD3_ENET1_RGMII_RD3	0x91
-			MX8MN_IOMUXC_ENET_RD2_ENET1_RGMII_RD2	0x91
-			MX8MN_IOMUXC_ENET_RD1_ENET1_RGMII_RD1	0x91
-			MX8MN_IOMUXC_ENET_RD0_ENET1_RGMII_RD0	0x91
-			MX8MN_IOMUXC_ENET_TXC_ENET1_RGMII_TXC	0x1f
-			MX8MN_IOMUXC_ENET_RXC_ENET1_RGMII_RXC	0x91
-			MX8MN_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL	0x91
-			MX8MN_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL	0x1f
-			MX8MN_IOMUXC_SAI2_RXC_GPIO4_IO22	0x19
-		>;
-	};
-
-	pinctrl_i2c1: i2c1grp {
-		fsl,pins = <
-			MX8MN_IOMUXC_I2C1_SCL_I2C1_SCL		0x400001c3
-			MX8MN_IOMUXC_I2C1_SDA_I2C1_SDA		0x400001c3
-		>;
-	};
-
-	pinctrl_pmic: pmicirq {
-		fsl,pins = <
-			MX8MN_IOMUXC_GPIO1_IO03_GPIO1_IO3	0x41
-		>;
-	};
-
-	pinctrl_reg_usdhc2_vmmc: regusdhc2vmmc {
-		fsl,pins = <
-			MX8MN_IOMUXC_SD2_RESET_B_GPIO2_IO19	0x41
-		>;
-	};
-
-	pinctrl_uart2: uart2grp {
-		fsl,pins = <
-			MX8MN_IOMUXC_UART2_RXD_UART2_DCE_RX	0x140
-			MX8MN_IOMUXC_UART2_TXD_UART2_DCE_TX	0x140
-		>;
-	};
-
-	pinctrl_usdhc2_gpio: usdhc2grpgpio {
-		fsl,pins = <
-			MX8MN_IOMUXC_GPIO1_IO15_GPIO1_IO15	0x1c4
-		>;
-	};
-
-	pinctrl_usdhc2: usdhc2grp {
-		fsl,pins = <
-			MX8MN_IOMUXC_SD2_CLK_USDHC2_CLK		0x190
-			MX8MN_IOMUXC_SD2_CMD_USDHC2_CMD		0x1d0
-			MX8MN_IOMUXC_SD2_DATA0_USDHC2_DATA0	0x1d0
-			MX8MN_IOMUXC_SD2_DATA1_USDHC2_DATA1	0x1d0
-			MX8MN_IOMUXC_SD2_DATA2_USDHC2_DATA2	0x1d0
-			MX8MN_IOMUXC_SD2_DATA3_USDHC2_DATA3	0x1d0
-			MX8MN_IOMUXC_GPIO1_IO04_USDHC2_VSELECT	0x1d0
-		>;
-	};
-
-	pinctrl_usdhc2_100mhz: usdhc2grp100mhz {
-		fsl,pins = <
-			MX8MN_IOMUXC_SD2_CLK_USDHC2_CLK		0x194
-			MX8MN_IOMUXC_SD2_CMD_USDHC2_CMD		0x1d4
-			MX8MN_IOMUXC_SD2_DATA0_USDHC2_DATA0	0x1d4
-			MX8MN_IOMUXC_SD2_DATA1_USDHC2_DATA1	0x1d4
-			MX8MN_IOMUXC_SD2_DATA2_USDHC2_DATA2	0x1d4
-			MX8MN_IOMUXC_SD2_DATA3_USDHC2_DATA3	0x1d4
-			MX8MN_IOMUXC_GPIO1_IO04_USDHC2_VSELECT	0x1d0
-		>;
-	};
-
-	pinctrl_usdhc2_200mhz: usdhc2grp200mhz {
-		fsl,pins = <
-			MX8MN_IOMUXC_SD2_CLK_USDHC2_CLK		0x196
-			MX8MN_IOMUXC_SD2_CMD_USDHC2_CMD		0x1d6
-			MX8MN_IOMUXC_SD2_DATA0_USDHC2_DATA0	0x1d6
-			MX8MN_IOMUXC_SD2_DATA1_USDHC2_DATA1	0x1d6
-			MX8MN_IOMUXC_SD2_DATA2_USDHC2_DATA2	0x1d6
-			MX8MN_IOMUXC_SD2_DATA3_USDHC2_DATA3	0x1d6
-			MX8MN_IOMUXC_GPIO1_IO04_USDHC2_VSELECT	0x1d0
-		>;
-	};
-
-	pinctrl_usdhc3: usdhc3grp {
-		fsl,pins = <
-			MX8MN_IOMUXC_NAND_WE_B_USDHC3_CLK		0x40000190
-			MX8MN_IOMUXC_NAND_WP_B_USDHC3_CMD		0x1d0
-			MX8MN_IOMUXC_NAND_DATA04_USDHC3_DATA0		0x1d0
-			MX8MN_IOMUXC_NAND_DATA05_USDHC3_DATA1		0x1d0
-			MX8MN_IOMUXC_NAND_DATA06_USDHC3_DATA2		0x1d0
-			MX8MN_IOMUXC_NAND_DATA07_USDHC3_DATA3		0x1d0
-			MX8MN_IOMUXC_NAND_RE_B_USDHC3_DATA4		0x1d0
-			MX8MN_IOMUXC_NAND_CE2_B_USDHC3_DATA5		0x1d0
-			MX8MN_IOMUXC_NAND_CE3_B_USDHC3_DATA6		0x1d0
-			MX8MN_IOMUXC_NAND_CLE_USDHC3_DATA7		0x1d0
-			MX8MN_IOMUXC_NAND_CE1_B_USDHC3_STROBE		0x190
-		>;
-	};
-
-	pinctrl_usdhc3_100mhz: usdhc3grp100mhz {
-		fsl,pins = <
-			MX8MN_IOMUXC_NAND_WE_B_USDHC3_CLK		0x40000194
-			MX8MN_IOMUXC_NAND_WP_B_USDHC3_CMD		0x1d4
-			MX8MN_IOMUXC_NAND_DATA04_USDHC3_DATA0		0x1d4
-			MX8MN_IOMUXC_NAND_DATA05_USDHC3_DATA1		0x1d4
-			MX8MN_IOMUXC_NAND_DATA06_USDHC3_DATA2		0x1d4
-			MX8MN_IOMUXC_NAND_DATA07_USDHC3_DATA3		0x1d4
-			MX8MN_IOMUXC_NAND_RE_B_USDHC3_DATA4		0x1d4
-			MX8MN_IOMUXC_NAND_CE2_B_USDHC3_DATA5		0x1d4
-			MX8MN_IOMUXC_NAND_CE3_B_USDHC3_DATA6		0x1d4
-			MX8MN_IOMUXC_NAND_CLE_USDHC3_DATA7		0x1d4
-			MX8MN_IOMUXC_NAND_CE1_B_USDHC3_STROBE		0x194
-		>;
-	};
-
-	pinctrl_usdhc3_200mhz: usdhc3grp200mhz {
-		fsl,pins = <
-			MX8MN_IOMUXC_NAND_WE_B_USDHC3_CLK		0x40000196
-			MX8MN_IOMUXC_NAND_WP_B_USDHC3_CMD		0x1d6
-			MX8MN_IOMUXC_NAND_DATA04_USDHC3_DATA0		0x1d6
-			MX8MN_IOMUXC_NAND_DATA05_USDHC3_DATA1		0x1d6
-			MX8MN_IOMUXC_NAND_DATA06_USDHC3_DATA2		0x1d6
-			MX8MN_IOMUXC_NAND_DATA07_USDHC3_DATA3		0x1d6
-			MX8MN_IOMUXC_NAND_RE_B_USDHC3_DATA4		0x1d6
-			MX8MN_IOMUXC_NAND_CE2_B_USDHC3_DATA5		0x1d6
-			MX8MN_IOMUXC_NAND_CE3_B_USDHC3_DATA6		0x1d6
-			MX8MN_IOMUXC_NAND_CLE_USDHC3_DATA7		0x1d6
-			MX8MN_IOMUXC_NAND_CE1_B_USDHC3_STROBE		0x196
-		>;
-	};
-
-	pinctrl_wdog: wdoggrp {
-		fsl,pins = <
-			MX8MN_IOMUXC_GPIO1_IO02_WDOG1_WDOG_B		0xc6
-		>;
-	};
-};
-
-&fec1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_fec1>;
-	phy-mode = "rgmii-id";
-	phy-handle = <&ethphy0>;
-	fsl,magic-packet;
-	status = "okay";
-
-	mdio {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		ethphy0: ethernet-phy@0 {
-			compatible = "ethernet-phy-ieee802.3-c22";
-			reg = <0>;
-			at803x,led-act-blind-workaround;
-			at803x,eee-disabled;
-			at803x,vddio-1p8v;
-		};
-	};
-};
-
 &i2c1 {
-	clock-frequency = <400000>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_i2c1>;
-	status = "okay";
-
 	pmic@4b {
 		compatible = "rohm,bd71847";
 		reg = <0x4b>;
@@ -309,40 +122,10 @@
 	};
 };
 
-&snvs_pwrkey {
-	status = "okay";
-};
-
-&uart2 { /* console */
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_uart2>;
-	status = "okay";
-};
-
-&usdhc2 {
-	pinctrl-names = "default", "state_100mhz", "state_200mhz";
-	pinctrl-0 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
-	pinctrl-1 = <&pinctrl_usdhc2_100mhz>, <&pinctrl_usdhc2_gpio>;
-	pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>;
-	cd-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
-	bus-width = <4>;
-	vmmc-supply = <&reg_usdhc2_vmmc>;
-	status = "okay";
-};
-
-&usdhc3 {
-	pinctrl-names = "default", "state_100mhz", "state_200mhz";
-	pinctrl-0 = <&pinctrl_usdhc3>;
-	pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
-	pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
-	bus-width = <8>;
-	non-removable;
-	status = "okay";
-};
-
-&wdog1 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_wdog>;
-	fsl,ext-reset-output;
-	status = "okay";
+&iomuxc {
+	pinctrl_pmic: pmicirq {
+		fsl,pins = <
+			MX8MN_IOMUXC_GPIO1_IO03_GPIO1_IO3	0x41
+		>;
+	};
 };
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts
new file mode 100644
index 000000000000..61f351958618
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright 2019 NXP
+ */
+
+/dts-v1/;
+
+#include "imx8mn.dtsi"
+#include "imx8mn-evk.dtsi"
+
+/ {
+	model = "NXP i.MX8MNano EVK board";
+	compatible = "fsl,imx8mn-evk", "fsl,imx8mn";
+};
+
+&A53_0 {
+	/delete-property/operating-points-v2;
+};
+
+&A53_1 {
+	/delete-property/operating-points-v2;
+};
+
+&A53_2 {
+	/delete-property/operating-points-v2;
+};
+
+&A53_3 {
+	/delete-property/operating-points-v2;
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-evk.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-evk.dtsi
new file mode 100644
index 000000000000..2a74330aee8c
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8mn-evk.dtsi
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright 2019 NXP
+ */
+
+#include "imx8mn.dtsi"
+
+/ {
+	chosen {
+		stdout-path = &uart2;
+	};
+
+	gpio-leds {
+		compatible = "gpio-leds";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_gpio_led>;
+
+		status {
+			label = "yellow:status";
+			gpios = <&gpio3 16 GPIO_ACTIVE_HIGH>;
+			default-state = "on";
+		};
+	};
+
+	reg_usdhc2_vmmc: regulator-usdhc2 {
+		compatible = "regulator-fixed";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_reg_usdhc2_vmmc>;
+		regulator-name = "VSD_3V3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		gpio = <&gpio2 19 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+	};
+};
+
+&fec1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_fec1>;
+	phy-mode = "rgmii-id";
+	phy-handle = <&ethphy0>;
+	fsl,magic-packet;
+	status = "okay";
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethphy0: ethernet-phy@0 {
+			compatible = "ethernet-phy-ieee802.3-c22";
+			reg = <0>;
+		};
+	};
+};
+
+&i2c1 {
+	clock-frequency = <400000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c1>;
+	status = "okay";
+};
+
+&snvs_pwrkey {
+	status = "okay";
+};
+
+&uart2 { /* console */
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart2>;
+	status = "okay";
+};
+
+&usdhc2 {
+	assigned-clocks = <&clk IMX8MN_CLK_USDHC2>;
+	assigned-clock-rates = <200000000>;
+	pinctrl-names = "default", "state_100mhz", "state_200mhz";
+	pinctrl-0 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
+	pinctrl-1 = <&pinctrl_usdhc2_100mhz>, <&pinctrl_usdhc2_gpio>;
+	pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>;
+	cd-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
+	bus-width = <4>;
+	vmmc-supply = <&reg_usdhc2_vmmc>;
+	status = "okay";
+};
+
+&usdhc3 {
+	assigned-clocks = <&clk IMX8MN_CLK_USDHC3_ROOT>;
+	assigned-clock-rates = <400000000>;
+	pinctrl-names = "default", "state_100mhz", "state_200mhz";
+	pinctrl-0 = <&pinctrl_usdhc3>;
+	pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
+	pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
+	bus-width = <8>;
+	non-removable;
+	status = "okay";
+};
+
+&wdog1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_wdog>;
+	fsl,ext-reset-output;
+	status = "okay";
+};
+
+&iomuxc {
+	pinctrl-names = "default";
+
+	pinctrl_fec1: fec1grp {
+		fsl,pins = <
+			MX8MN_IOMUXC_ENET_MDC_ENET1_MDC		0x3
+			MX8MN_IOMUXC_ENET_MDIO_ENET1_MDIO	0x3
+			MX8MN_IOMUXC_ENET_TD3_ENET1_RGMII_TD3	0x1f
+			MX8MN_IOMUXC_ENET_TD2_ENET1_RGMII_TD2	0x1f
+			MX8MN_IOMUXC_ENET_TD1_ENET1_RGMII_TD1	0x1f
+			MX8MN_IOMUXC_ENET_TD0_ENET1_RGMII_TD0	0x1f
+			MX8MN_IOMUXC_ENET_RD3_ENET1_RGMII_RD3	0x91
+			MX8MN_IOMUXC_ENET_RD2_ENET1_RGMII_RD2	0x91
+			MX8MN_IOMUXC_ENET_RD1_ENET1_RGMII_RD1	0x91
+			MX8MN_IOMUXC_ENET_RD0_ENET1_RGMII_RD0	0x91
+			MX8MN_IOMUXC_ENET_TXC_ENET1_RGMII_TXC	0x1f
+			MX8MN_IOMUXC_ENET_RXC_ENET1_RGMII_RXC	0x91
+			MX8MN_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL	0x91
+			MX8MN_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL	0x1f
+			MX8MN_IOMUXC_SAI2_RXC_GPIO4_IO22	0x19
+		>;
+	};
+
+	pinctrl_gpio_led: gpioledgrp {
+		fsl,pins = <
+			MX8MN_IOMUXC_NAND_READY_B_GPIO3_IO16	0x19
+		>;
+	};
+
+	pinctrl_i2c1: i2c1grp {
+		fsl,pins = <
+			MX8MN_IOMUXC_I2C1_SCL_I2C1_SCL		0x400001c3
+			MX8MN_IOMUXC_I2C1_SDA_I2C1_SDA		0x400001c3
+		>;
+	};
+
+	pinctrl_reg_usdhc2_vmmc: regusdhc2vmmc {
+		fsl,pins = <
+			MX8MN_IOMUXC_SD2_RESET_B_GPIO2_IO19	0x41
+		>;
+	};
+
+	pinctrl_uart2: uart2grp {
+		fsl,pins = <
+			MX8MN_IOMUXC_UART2_RXD_UART2_DCE_RX	0x140
+			MX8MN_IOMUXC_UART2_TXD_UART2_DCE_TX	0x140
+		>;
+	};
+
+	pinctrl_usdhc2_gpio: usdhc2grpgpio {
+		fsl,pins = <
+			MX8MN_IOMUXC_GPIO1_IO15_GPIO1_IO15	0x1c4
+		>;
+	};
+
+	pinctrl_usdhc2: usdhc2grp {
+		fsl,pins = <
+			MX8MN_IOMUXC_SD2_CLK_USDHC2_CLK		0x190
+			MX8MN_IOMUXC_SD2_CMD_USDHC2_CMD		0x1d0
+			MX8MN_IOMUXC_SD2_DATA0_USDHC2_DATA0	0x1d0
+			MX8MN_IOMUXC_SD2_DATA1_USDHC2_DATA1	0x1d0
+			MX8MN_IOMUXC_SD2_DATA2_USDHC2_DATA2	0x1d0
+			MX8MN_IOMUXC_SD2_DATA3_USDHC2_DATA3	0x1d0
+			MX8MN_IOMUXC_GPIO1_IO04_USDHC2_VSELECT	0x1d0
+		>;
+	};
+
+	pinctrl_usdhc2_100mhz: usdhc2grp100mhz {
+		fsl,pins = <
+			MX8MN_IOMUXC_SD2_CLK_USDHC2_CLK		0x194
+			MX8MN_IOMUXC_SD2_CMD_USDHC2_CMD		0x1d4
+			MX8MN_IOMUXC_SD2_DATA0_USDHC2_DATA0	0x1d4
+			MX8MN_IOMUXC_SD2_DATA1_USDHC2_DATA1	0x1d4
+			MX8MN_IOMUXC_SD2_DATA2_USDHC2_DATA2	0x1d4
+			MX8MN_IOMUXC_SD2_DATA3_USDHC2_DATA3	0x1d4
+			MX8MN_IOMUXC_GPIO1_IO04_USDHC2_VSELECT	0x1d0
+		>;
+	};
+
+	pinctrl_usdhc2_200mhz: usdhc2grp200mhz {
+		fsl,pins = <
+			MX8MN_IOMUXC_SD2_CLK_USDHC2_CLK		0x196
+			MX8MN_IOMUXC_SD2_CMD_USDHC2_CMD		0x1d6
+			MX8MN_IOMUXC_SD2_DATA0_USDHC2_DATA0	0x1d6
+			MX8MN_IOMUXC_SD2_DATA1_USDHC2_DATA1	0x1d6
+			MX8MN_IOMUXC_SD2_DATA2_USDHC2_DATA2	0x1d6
+			MX8MN_IOMUXC_SD2_DATA3_USDHC2_DATA3	0x1d6
+			MX8MN_IOMUXC_GPIO1_IO04_USDHC2_VSELECT	0x1d0
+		>;
+	};
+
+	pinctrl_usdhc3: usdhc3grp {
+		fsl,pins = <
+			MX8MN_IOMUXC_NAND_WE_B_USDHC3_CLK		0x40000190
+			MX8MN_IOMUXC_NAND_WP_B_USDHC3_CMD		0x1d0
+			MX8MN_IOMUXC_NAND_DATA04_USDHC3_DATA0		0x1d0
+			MX8MN_IOMUXC_NAND_DATA05_USDHC3_DATA1		0x1d0
+			MX8MN_IOMUXC_NAND_DATA06_USDHC3_DATA2		0x1d0
+			MX8MN_IOMUXC_NAND_DATA07_USDHC3_DATA3		0x1d0
+			MX8MN_IOMUXC_NAND_RE_B_USDHC3_DATA4		0x1d0
+			MX8MN_IOMUXC_NAND_CE2_B_USDHC3_DATA5		0x1d0
+			MX8MN_IOMUXC_NAND_CE3_B_USDHC3_DATA6		0x1d0
+			MX8MN_IOMUXC_NAND_CLE_USDHC3_DATA7		0x1d0
+			MX8MN_IOMUXC_NAND_CE1_B_USDHC3_STROBE		0x190
+		>;
+	};
+
+	pinctrl_usdhc3_100mhz: usdhc3grp100mhz {
+		fsl,pins = <
+			MX8MN_IOMUXC_NAND_WE_B_USDHC3_CLK		0x40000194
+			MX8MN_IOMUXC_NAND_WP_B_USDHC3_CMD		0x1d4
+			MX8MN_IOMUXC_NAND_DATA04_USDHC3_DATA0		0x1d4
+			MX8MN_IOMUXC_NAND_DATA05_USDHC3_DATA1		0x1d4
+			MX8MN_IOMUXC_NAND_DATA06_USDHC3_DATA2		0x1d4
+			MX8MN_IOMUXC_NAND_DATA07_USDHC3_DATA3		0x1d4
+			MX8MN_IOMUXC_NAND_RE_B_USDHC3_DATA4		0x1d4
+			MX8MN_IOMUXC_NAND_CE2_B_USDHC3_DATA5		0x1d4
+			MX8MN_IOMUXC_NAND_CE3_B_USDHC3_DATA6		0x1d4
+			MX8MN_IOMUXC_NAND_CLE_USDHC3_DATA7		0x1d4
+			MX8MN_IOMUXC_NAND_CE1_B_USDHC3_STROBE		0x194
+		>;
+	};
+
+	pinctrl_usdhc3_200mhz: usdhc3grp200mhz {
+		fsl,pins = <
+			MX8MN_IOMUXC_NAND_WE_B_USDHC3_CLK		0x40000196
+			MX8MN_IOMUXC_NAND_WP_B_USDHC3_CMD		0x1d6
+			MX8MN_IOMUXC_NAND_DATA04_USDHC3_DATA0		0x1d6
+			MX8MN_IOMUXC_NAND_DATA05_USDHC3_DATA1		0x1d6
+			MX8MN_IOMUXC_NAND_DATA06_USDHC3_DATA2		0x1d6
+			MX8MN_IOMUXC_NAND_DATA07_USDHC3_DATA3		0x1d6
+			MX8MN_IOMUXC_NAND_RE_B_USDHC3_DATA4		0x1d6
+			MX8MN_IOMUXC_NAND_CE2_B_USDHC3_DATA5		0x1d6
+			MX8MN_IOMUXC_NAND_CE3_B_USDHC3_DATA6		0x1d6
+			MX8MN_IOMUXC_NAND_CLE_USDHC3_DATA7		0x1d6
+			MX8MN_IOMUXC_NAND_CE1_B_USDHC3_STROBE		0x196
+		>;
+	};
+
+	pinctrl_wdog: wdoggrp {
+		fsl,pins = <
+			MX8MN_IOMUXC_GPIO1_IO02_WDOG1_WDOG_B		0xc6
+		>;
+	};
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
index 43c4db312146..e91625063f8e 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
@@ -11,7 +11,6 @@
 #include "imx8mn-pinfunc.h"
 
 / {
-	compatible = "fsl,imx8mn";
 	interrupt-parent = <&gic>;
 	#address-cells = <2>;
 	#size-cells = <2>;
@@ -43,6 +42,19 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
+		idle-states {
+			entry-method = "psci";
+
+			cpu_pd_wait: cpu-pd-wait {
+				compatible = "arm,idle-state";
+				arm,psci-suspend-param = <0x0010033>;
+				local-timer-stop;
+				entry-latency-us = <1000>;
+				exit-latency-us = <700>;
+				min-residency-us = <2700>;
+			};
+		};
+
 		A53_0: cpu@0 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a53";
@@ -54,6 +66,7 @@
 			operating-points-v2 = <&a53_opp_table>;
 			nvmem-cells = <&cpu_speed_grade>;
 			nvmem-cell-names = "speed_grade";
+			cpu-idle-states = <&cpu_pd_wait>;
 		};
 
 		A53_1: cpu@1 {
@@ -65,6 +78,7 @@
 			enable-method = "psci";
 			next-level-cache = <&A53_L2>;
 			operating-points-v2 = <&a53_opp_table>;
+			cpu-idle-states = <&cpu_pd_wait>;
 		};
 
 		A53_2: cpu@2 {
@@ -76,6 +90,7 @@
 			enable-method = "psci";
 			next-level-cache = <&A53_L2>;
 			operating-points-v2 = <&a53_opp_table>;
+			cpu-idle-states = <&cpu_pd_wait>;
 		};
 
 		A53_3: cpu@3 {
@@ -87,6 +102,7 @@
 			enable-method = "psci";
 			next-level-cache = <&A53_L2>;
 			operating-points-v2 = <&a53_opp_table>;
+			cpu-idle-states = <&cpu_pd_wait>;
 		};
 
 		A53_L2: l2-cache0 {
@@ -320,7 +336,7 @@
 			};
 
 			ocotp: ocotp-ctrl@30350000 {
-				compatible = "fsl,imx8mn-ocotp", "fsl,imx7d-ocotp", "syscon";
+				compatible = "fsl,imx8mn-ocotp", "fsl,imx8mm-ocotp", "syscon";
 				reg = <0x30350000 0x10000>;
 				clocks = <&clk IMX8MN_CLK_OCOTP_ROOT>;
 				#address-cells = <1>;
@@ -371,7 +387,7 @@
 			};
 
 			src: reset-controller@30390000 {
-				compatible = "fsl,imx8mn-src", "syscon";
+				compatible = "fsl,imx8mn-src", "fsl,imx8mq-src", "syscon";
 				reg = <0x30390000 0x10000>;
 				interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
 				#reset-cells = <1>;
@@ -428,6 +444,14 @@
 				#pwm-cells = <2>;
 				status = "disabled";
 			};
+
+			system_counter: timer@306a0000 {
+				compatible = "nxp,sysctr-timer";
+				reg = <0x306a0000 0x20000>;
+				interrupts = <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&osc_24m>;
+				clock-names = "per";
+			};
 		};
 
 		aips3: bus@30800000 {
@@ -573,8 +597,6 @@
 					 <&clk IMX8MN_CLK_NAND_USDHC_BUS>,
 					 <&clk IMX8MN_CLK_USDHC1_ROOT>;
 				clock-names = "ipg", "ahb", "per";
-				assigned-clocks = <&clk IMX8MN_CLK_USDHC1>;
-				assigned-clock-rates = <400000000>;
 				fsl,tuning-start-tap = <20>;
 				fsl,tuning-step= <2>;
 				bus-width = <4>;
@@ -603,8 +625,6 @@
 					 <&clk IMX8MN_CLK_NAND_USDHC_BUS>,
 					 <&clk IMX8MN_CLK_USDHC3_ROOT>;
 				clock-names = "ipg", "ahb", "per";
-				assigned-clocks = <&clk IMX8MN_CLK_USDHC3_ROOT>;
-				assigned-clock-rates = <400000000>;
 				fsl,tuning-start-tap = <20>;
 				fsl,tuning-step= <2>;
 				bus-width = <4>;
@@ -738,6 +758,12 @@
 			interrupt-controller;
 			interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
 		};
+
+		ddr-pmu@3d800000 {
+			compatible = "fsl,imx8mn-ddr-pmu", "fsl,imx8m-ddr-pmu";
+			reg = <0x3d800000 0x400000>;
+			interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
+		};
 	};
 
 	usbphynop1: usbphynop1 {
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
index 05958124f173..c36685916683 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
@@ -48,6 +48,15 @@
 		gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>;
 		states = <1000000 0x0
 			  900000 0x1>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	ir-receiver {
+		compatible = "gpio-ir-receiver";
+		gpios = <&gpio1 12 GPIO_ACTIVE_LOW>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_ir>;
 	};
 
 	wm8524: audio-codec {
@@ -115,15 +124,6 @@
 	};
 };
 
-&sai2 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_sai2>;
-	assigned-clocks = <&clk IMX8MQ_AUDIO_PLL1_BYPASS>, <&clk IMX8MQ_CLK_SAI2>;
-	assigned-clock-parents = <&clk IMX8MQ_AUDIO_PLL1>, <&clk IMX8MQ_AUDIO_PLL1_OUT>;
-	assigned-clock-rates = <0>, <24576000>;
-	status = "okay";
-};
-
 &gpio5 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_wifi_reset>;
@@ -242,6 +242,29 @@
 	power-supply = <&sw1a_reg>;
 };
 
+&qspi0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_qspi>;
+	status = "okay";
+
+	n25q256a: flash@0 {
+		reg = <0>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "micron,n25q256a", "jedec,spi-nor";
+		spi-max-frequency = <29000000>;
+	};
+};
+
+&sai2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_sai2>;
+	assigned-clocks = <&clk IMX8MQ_AUDIO_PLL1_BYPASS>, <&clk IMX8MQ_CLK_SAI2>;
+	assigned-clock-parents = <&clk IMX8MQ_AUDIO_PLL1>, <&clk IMX8MQ_AUDIO_PLL1_OUT>;
+	assigned-clock-rates = <0>, <24576000>;
+	status = "okay";
+};
+
 &snvs_pwrkey {
 	status = "okay";
 };
@@ -261,21 +284,9 @@
 	status = "okay";
 };
 
-&qspi0 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_qspi>;
-	status = "okay";
-
-	n25q256a: flash@0 {
-		reg = <0>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		compatible = "micron,n25q256a", "jedec,spi-nor";
-		spi-max-frequency = <29000000>;
-	};
-};
-
 &usdhc1 {
+	assigned-clocks = <&clk IMX8MQ_CLK_USDHC1>;
+	assigned-clock-rates = <400000000>;
 	pinctrl-names = "default", "state_100mhz", "state_200mhz";
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	pinctrl-1 = <&pinctrl_usdhc1_100mhz>;
@@ -289,6 +300,8 @@
 };
 
 &usdhc2 {
+	assigned-clocks = <&clk IMX8MQ_CLK_USDHC2>;
+	assigned-clock-rates = <200000000>;
 	pinctrl-names = "default", "state_100mhz", "state_200mhz";
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	pinctrl-1 = <&pinctrl_usdhc2_100mhz>;
@@ -340,6 +353,12 @@
 		>;
 	};
 
+	pinctrl_ir: irgrp {
+		fsl,pins = <
+			MX8MQ_IOMUXC_GPIO1_IO12_GPIO1_IO12		0x4f
+		>;
+	};
+
 	pinctrl_pcie0: pcie0grp {
 		fsl,pins = <
 			MX8MQ_IOMUXC_I2C4_SCL_PCIE1_CLKREQ_B		0x76
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-hummingboard-pulse.dts b/arch/arm64/boot/dts/freescale/imx8mq-hummingboard-pulse.dts
index f52e872ac96f..b8cb20c01a79 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-hummingboard-pulse.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-hummingboard-pulse.dts
@@ -110,6 +110,8 @@
 };
 
 &usdhc2 {
+	assigned-clocks = <&clk IMX8MQ_CLK_USDHC2>;
+	assigned-clock-rates = <200000000>;
 	pinctrl-names = "default", "state_100mhz", "state_200mhz";
 	pinctrl-0 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
 	pinctrl-1 = <&pinctrl_usdhc2_100mhz>, <&pinctrl_usdhc2_gpio>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts b/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts
index 683a11035643..596bc65f475c 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts
@@ -421,7 +421,7 @@
 		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_imu>;
 		interrupt-parent = <&gpio3>;
-		interrupts = <19 IRQ_TYPE_LEVEL_LOW>;
+		interrupts = <19 IRQ_TYPE_LEVEL_HIGH>;
 		vdd-supply = <&reg_3v3_p>;
 		vddio-supply = <&reg_3v3_p>;
 	};
@@ -780,6 +780,8 @@
 };
 
 &usdhc1 {
+	assigned-clocks = <&clk IMX8MQ_CLK_USDHC1>;
+	assigned-clock-rates = <400000000>;
 	pinctrl-names = "default", "state_100mhz", "state_200mhz";
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	pinctrl-1 = <&pinctrl_usdhc1_100mhz>;
@@ -790,6 +792,8 @@
 };
 
 &usdhc2 {
+	assigned-clocks = <&clk IMX8MQ_CLK_USDHC2>;
+	assigned-clock-rates = <200000000>;
 	pinctrl-names = "default", "state_100mhz", "state_200mhz";
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	pinctrl-1 = <&pinctrl_usdhc2_100mhz>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts b/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts
index c832bf0fcc60..81d269296610 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts
@@ -191,6 +191,8 @@
 };
 
 &usdhc1 {
+	assigned-clocks = <&clk IMX8MQ_CLK_USDHC1>;
+	assigned-clock-rates = <400000000>;
 	bus-width = <8>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc1>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-pico-pi.dts b/arch/arm64/boot/dts/freescale/imx8mq-pico-pi.dts
index 8a4aee2348ee..59da96b7143f 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-pico-pi.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-pico-pi.dts
@@ -207,6 +207,8 @@
 };
 
 &usdhc1 {
+	assigned-clocks = <&clk IMX8MQ_CLK_USDHC1>;
+	assigned-clock-rates = <400000000>;
 	pinctrl-names = "default", "state_100mhz", "state_200mhz";
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	pinctrl-1 = <&pinctrl_usdhc1_100mhz>;
@@ -217,6 +219,8 @@
 };
 
 &usdhc2 {
+	assigned-clocks = <&clk IMX8MQ_CLK_USDHC2>;
+	assigned-clock-rates = <200000000>;
 	pinctrl-names = "default", "state_100mhz", "state_200mhz";
 	pinctrl-0 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
 	pinctrl-1 = <&pinctrl_usdhc2_100mhz>, <&pinctrl_usdhc2_gpio>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-sr-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-sr-som.dtsi
index d7f03c65832b..3dc44114da0e 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-sr-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq-sr-som.dtsi
@@ -170,6 +170,8 @@
 };
 
 &usdhc1 {
+	assigned-clocks = <&clk IMX8MQ_CLK_USDHC1>;
+	assigned-clock-rates = <400000000>;
 	pinctrl-names = "default", "state_100mhz", "state_200mhz";
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	pinctrl-1 = <&pinctrl_usdhc1_100mhz>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
index 32ce14936b01..6a55165bd76a 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
@@ -62,7 +62,16 @@
 	reg_3p3_main: regulator-3p3-main {
 		compatible = "regulator-fixed";
 		vin-supply = <&reg_12p0_main>;
-		regulator-name = "3V3V_MAIN";
+		regulator-name = "3V3_MAIN";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+	};
+
+	reg_gen_3p3: regulator-gen-3p3 {
+		compatible = "regulator-fixed";
+		vin-supply = <&reg_3p3_main>;
+		regulator-name = "GEN_3V3";
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
 		regulator-always-on;
@@ -72,7 +81,7 @@
 		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_reg_usdhc2>;
 		compatible = "regulator-fixed";
-		vin-supply = <&reg_3p3_main>;
+		vin-supply = <&reg_gen_3p3>;
 		regulator-name = "3V3_SD";
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
@@ -253,6 +262,18 @@
 	pinctrl-0 = <&pinctrl_i2c1>;
 	status = "okay";
 
+	accelerometer@1c {
+		compatible = "fsl,mma8451";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_accel>;
+		reg = <0x1c>;
+		interrupt-parent = <&gpio3>;
+		interrupts = <20 IRQ_TYPE_LEVEL_LOW>;
+		interrupt-names = "INT2";
+		vdd-supply = <&reg_gen_3p3>;
+		vddio-supply = <&reg_gen_3p3>;
+	};
+
 	ucs1002: charger@32 {
 		compatible = "microchip,ucs1002";
 		pinctrl-names = "default";
@@ -379,6 +400,11 @@
 		reg = <0x2c>;
 		reset-gpios = <&gpio3 25 GPIO_ACTIVE_LOW>;
 	};
+
+	watchdog@38 {
+		compatible = "zii,rave-wdt";
+		reg = <0x38>;
+	};
 };
 
 &i2c4 {
@@ -486,6 +512,8 @@
 };
 
 &usdhc1 {
+	assigned-clocks = <&clk IMX8MQ_CLK_USDHC1>;
+	assigned-clock-rates = <400000000>;
 	pinctrl-names = "default", "state_100mhz", "state_200mhz";
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	pinctrl-1 = <&pinctrl_usdhc1_100mhz>;
@@ -499,6 +527,8 @@
 };
 
 &usdhc2 {
+	assigned-clocks = <&clk IMX8MQ_CLK_USDHC2>;
+	assigned-clock-rates = <200000000>;
 	pinctrl-names = "default", "state_100mhz", "state_200mhz";
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	pinctrl-1 = <&pinctrl_usdhc2_100mhz>;
@@ -513,6 +543,12 @@
 };
 
 &iomuxc {
+	pinctrl_accel: accelgrp {
+		fsl,pins = <
+			MX8MQ_IOMUXC_SAI5_RXC_GPIO3_IO20		0x41
+		>;
+	};
+
 	pinctrl_fec1: fec1grp {
 		fsl,pins = <
 			MX8MQ_IOMUXC_ENET_MDC_ENET1_MDC			0x3
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 55a3d1c4bdf0..7f9319452b58 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -235,12 +235,26 @@
 			thermal-sensors = <&tmu 1>;
 
 			trips {
+				gpu_alert: gpu-alert {
+					temperature = <80000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+
 				gpu-crit {
 					temperature = <90000>;
 					hysteresis = <2000>;
 					type = "critical";
 				};
 			};
+
+			cooling-maps {
+				map0 {
+					trip = <&gpu_alert>;
+					cooling-device =
+						<&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+				};
+			};
 		};
 
 		vpu-thermal {
@@ -854,8 +868,6 @@
 				         <&clk IMX8MQ_CLK_NAND_USDHC_BUS>,
 				         <&clk IMX8MQ_CLK_USDHC1_ROOT>;
 				clock-names = "ipg", "ahb", "per";
-				assigned-clocks = <&clk IMX8MQ_CLK_USDHC1>;
-				assigned-clock-rates = <400000000>;
 				fsl,tuning-start-tap = <20>;
 				fsl,tuning-step = <2>;
 				bus-width = <4>;
@@ -949,6 +961,7 @@
 			         <&clk IMX8MQ_CLK_GPU_AXI>,
 			         <&clk IMX8MQ_CLK_GPU_AHB>;
 			clock-names = "core", "shader", "bus", "reg";
+			#cooling-cells = <2>;
 			assigned-clocks = <&clk IMX8MQ_CLK_GPU_CORE_SRC>,
 			                  <&clk IMX8MQ_CLK_GPU_SHADER_SRC>,
 			                  <&clk IMX8MQ_CLK_GPU_AXI>,
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-ai_ml.dts b/arch/arm64/boot/dts/freescale/imx8qxp-ai_ml.dts
index 91eef9754101..a3f8cf195974 100644
--- a/arch/arm64/boot/dts/freescale/imx8qxp-ai_ml.dts
+++ b/arch/arm64/boot/dts/freescale/imx8qxp-ai_ml.dts
@@ -133,6 +133,8 @@
 &usdhc1 {
 	#address-cells = <1>;
 	#size-cells = <0>;
+	assigned-clocks = <&clk IMX_CONN_SDHC0_CLK>;
+	assigned-clock-rates = <200000000>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	bus-width = <4>;
@@ -149,6 +151,8 @@
 
 /* SD */
 &usdhc2 {
+	assigned-clocks = <&clk IMX_CONN_SDHC1_CLK>;
+	assigned-clock-rates = <200000000>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	bus-width = <4>;
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-colibri-eval-v3.dts b/arch/arm64/boot/dts/freescale/imx8qxp-colibri-eval-v3.dts
new file mode 100644
index 000000000000..6b21a295c126
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8qxp-colibri-eval-v3.dts
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0+ OR MIT
+/*
+ * Copyright 2019 Toradex
+ */
+
+/dts-v1/;
+
+#include "imx8qxp-colibri.dtsi"
+#include "imx8qxp-colibri-eval-v3.dtsi"
+
+/ {
+	model = "Toradex Colibri iMX8QXP/DX on Colibri Evaluation Board V3";
+	compatible = "toradex,colibri-imx8x-eval-v3",
+		     "toradex,colibri-imx8x", "fsl,imx8qxp";
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-colibri-eval-v3.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp-colibri-eval-v3.dtsi
new file mode 100644
index 000000000000..c7336f387605
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8qxp-colibri-eval-v3.dtsi
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0+ OR MIT
+/*
+ * Copyright 2019 Toradex
+ */
+
+#include "dt-bindings/input/linux-event-codes.h"
+
+/ {
+	aliases {
+		rtc0 = &rtc_i2c;
+		rtc1 = &rtc;
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_gpiokeys>;
+
+		wakeup {
+			label = "Wake-Up";
+			gpios = <&lsio_gpio3 10 GPIO_ACTIVE_HIGH>;
+			linux,code = <KEY_WAKEUP>;
+			debounce-interval = <10>;
+			wakeup-source;
+		};
+	};
+};
+
+&adma_i2c1 {
+	status = "okay";
+
+	/* M41T0M6 real time clock on carrier board */
+	rtc_i2c: rtc@68 {
+		compatible = "st,m41t0";
+		reg = <0x68>;
+	};
+};
+
+/* Colibri UART_B */
+&adma_lpuart0 {
+	status= "okay";
+};
+
+/* Colibri UART_C */
+&adma_lpuart2 {
+	status= "okay";
+};
+
+/* Colibri UART_A */
+&adma_lpuart3 {
+	status= "okay";
+};
+
+/* Colibri FastEthernet */
+&fec1 {
+	status = "okay";
+};
+
+/* Colibri SD/MMC Card */
+&usdhc2 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-colibri.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp-colibri.dtsi
new file mode 100644
index 000000000000..75f17a29f81e
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8qxp-colibri.dtsi
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0+ OR MIT
+/*
+ * Copyright 2019 Toradex
+ */
+
+#include "imx8qxp.dtsi"
+
+/ {
+	model = "Toradex Colibri iMX8QXP/DX Module";
+	compatible = "toradex,colibri-imx8x", "fsl,imx8qxp";
+
+	chosen {
+		stdout-path = &adma_lpuart3;
+	};
+
+	reg_module_3v3: regulator-module-3v3 {
+		compatible = "regulator-fixed";
+		regulator-name = "+V3.3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
+};
+
+/* On-module I2C */
+&adma_i2c0 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c0>, <&pinctrl_sgtl5000_usb_clk>;
+	status = "okay";
+
+	/* Touch controller */
+	touchscreen@2c {
+		compatible = "adi,ad7879-1";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_ad7879_int>;
+		reg = <0x2c>;
+		interrupt-parent = <&lsio_gpio3>;
+		interrupts = <5 IRQ_TYPE_EDGE_FALLING>;
+		touchscreen-max-pressure = <4096>;
+		adi,resistance-plate-x = <120>;
+		adi,first-conversion-delay = /bits/ 8 <3>;
+		adi,acquisition-time = /bits/ 8 <1>;
+		adi,median-filter-size = /bits/ 8 <2>;
+		adi,averaging = /bits/ 8 <1>;
+		adi,conversion-interval = /bits/ 8 <255>;
+	};
+};
+
+/* Colibri I2C */
+&adma_i2c1 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c1>;
+};
+
+/* Colibri UART_B */
+&adma_lpuart0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_lpuart0>;
+};
+
+/* Colibri UART_C */
+&adma_lpuart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_lpuart2>;
+};
+
+/* Colibri UART_A */
+&adma_lpuart3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_lpuart3>, <&pinctrl_lpuart3_ctrl>;
+};
+
+/* Colibri FastEthernet */
+&fec1 {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&pinctrl_fec1>;
+	pinctrl-1 = <&pinctrl_fec1_sleep>;
+	phy-mode = "rmii";
+	phy-handle = <&ethphy0>;
+	fsl,magic-packet;
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethphy0: ethernet-phy@2 {
+			compatible = "ethernet-phy-ieee802.3-c22";
+			max-speed = <100>;
+			reg = <2>;
+		};
+	};
+};
+
+/* On-module eMMC */
+&usdhc1 {
+	bus-width = <8>;
+	non-removable;
+	no-sd;
+	no-sdio;
+	pinctrl-names = "default", "state_100mhz", "state_200mhz";
+	pinctrl-0 = <&pinctrl_usdhc1>;
+	pinctrl-1 = <&pinctrl_usdhc1_100mhz>;
+	pinctrl-2 = <&pinctrl_usdhc1_200mhz>;
+	status = "okay";
+};
+
+/* Colibri SD/MMC Card */
+&usdhc2 {
+	bus-width = <4>;
+	cd-gpios = <&lsio_gpio3 9 GPIO_ACTIVE_LOW>;
+	vmmc-supply = <&reg_module_3v3>;
+	pinctrl-names = "default", "state_100mhz", "state_200mhz", "sleep";
+	pinctrl-0 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
+	pinctrl-1 = <&pinctrl_usdhc2_100mhz>, <&pinctrl_usdhc2_gpio>;
+	pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>;
+	pinctrl-3 = <&pinctrl_usdhc2_sleep>, <&pinctrl_usdhc2_gpio_sleep>;
+	disable-wp;
+};
+
+&iomuxc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ext_io0>, <&pinctrl_hog0>, <&pinctrl_hog1>;
+
+	/* On-module touch pen-down interrupt */
+	pinctrl_ad7879_int: ad7879intgrp {
+		fsl,pins = <
+			IMX8QXP_MIPI_CSI0_I2C0_SCL_LSIO_GPIO3_IO05	0x21
+		>;
+	};
+
+	/* Colibri Analogue Inputs */
+	pinctrl_adc0: adc0grp {
+		fsl,pins = <
+			IMX8QXP_ADC_IN0_ADMA_ADC_IN0			0x60		/* SODIMM   8 */
+			IMX8QXP_ADC_IN1_ADMA_ADC_IN1			0x60		/* SODIMM   6 */
+			IMX8QXP_ADC_IN4_ADMA_ADC_IN4			0x60		/* SODIMM   4 */
+			IMX8QXP_ADC_IN5_ADMA_ADC_IN5			0x60		/* SODIMM   2 */
+		>;
+	};
+
+	pinctrl_can_int: canintgrp {
+		fsl,pins = <
+			IMX8QXP_QSPI0A_DQS_LSIO_GPIO3_IO13		0x40		/* SODIMM  73 */
+		>;
+	};
+
+	pinctrl_csi_ctl: csictlgrp {
+		fsl,pins = <
+			IMX8QXP_QSPI0A_SS0_B_LSIO_GPIO3_IO14		0x20		/* SODIMM  77 */
+			IMX8QXP_QSPI0A_SS1_B_LSIO_GPIO3_IO15		0x20		/* SODIMM  89 */
+		>;
+	};
+
+	pinctrl_ext_io0: extio0grp {
+		fsl,pins = <
+			IMX8QXP_ENET0_RGMII_RXD3_LSIO_GPIO5_IO08	0x06000040	/* SODIMM 135 */
+		>;
+	};
+
+	/* Colibri Ethernet: On-module 100Mbps PHY Micrel KSZ8041 */
+	pinctrl_fec1: fec1grp {
+		fsl,pins = <
+			IMX8QXP_ENET0_MDC_CONN_ENET0_MDC			0x06000020
+			IMX8QXP_ENET0_MDIO_CONN_ENET0_MDIO			0x06000020
+			IMX8QXP_ENET0_RGMII_TX_CTL_CONN_ENET0_RGMII_TX_CTL	0x61
+			IMX8QXP_ENET0_RGMII_TXC_CONN_ENET0_RCLK50M_OUT		0x06000061
+			IMX8QXP_ENET0_RGMII_TXD0_CONN_ENET0_RGMII_TXD0		0x61
+			IMX8QXP_ENET0_RGMII_TXD1_CONN_ENET0_RGMII_TXD1		0x61
+			IMX8QXP_ENET0_RGMII_RX_CTL_CONN_ENET0_RGMII_RX_CTL	0x61
+			IMX8QXP_ENET0_RGMII_RXD0_CONN_ENET0_RGMII_RXD0		0x61
+			IMX8QXP_ENET0_RGMII_RXD1_CONN_ENET0_RGMII_RXD1		0x61
+			IMX8QXP_ENET0_RGMII_RXD2_CONN_ENET0_RMII_RX_ER		0x61
+		>;
+	};
+
+	pinctrl_fec1_sleep: fec1slpgrp {
+		fsl,pins = <
+			IMX8QXP_ENET0_MDC_LSIO_GPIO5_IO11		0x06000041
+			IMX8QXP_ENET0_MDIO_LSIO_GPIO5_IO10		0x06000041
+			IMX8QXP_ENET0_RGMII_TX_CTL_LSIO_GPIO4_IO30	0x41
+			IMX8QXP_ENET0_RGMII_TXC_LSIO_GPIO4_IO29		0x41
+			IMX8QXP_ENET0_RGMII_TXD0_LSIO_GPIO4_IO31	0x41
+			IMX8QXP_ENET0_RGMII_TXD1_LSIO_GPIO5_IO00	0x41
+			IMX8QXP_ENET0_RGMII_RX_CTL_LSIO_GPIO5_IO04	0x41
+			IMX8QXP_ENET0_RGMII_RXD0_LSIO_GPIO5_IO05	0x41
+			IMX8QXP_ENET0_RGMII_RXD1_LSIO_GPIO5_IO06	0x41
+			IMX8QXP_ENET0_RGMII_RXD2_LSIO_GPIO5_IO07	0x41
+		>;
+	};
+
+	/* Colibri optional CAN on UART_B RTS/CTS */
+	pinctrl_flexcan1: flexcan0grp {
+		fsl,pins = <
+			IMX8QXP_FLEXCAN0_TX_ADMA_FLEXCAN0_TX		0x21		/* SODIMM  32 */
+			IMX8QXP_FLEXCAN0_RX_ADMA_FLEXCAN0_RX		0x21		/* SODIMM  34 */
+		>;
+	};
+
+	/* Colibri optional CAN on PS2 */
+	pinctrl_flexcan2: flexcan1grp {
+		fsl,pins = <
+			IMX8QXP_FLEXCAN1_TX_ADMA_FLEXCAN1_TX		0x21		/* SODIMM  55 */
+			IMX8QXP_FLEXCAN1_RX_ADMA_FLEXCAN1_RX		0x21		/* SODIMM  63 */
+		>;
+	};
+
+	/* Colibri optional CAN on UART_A TXD/RXD */
+	pinctrl_flexcan3: flexcan2grp {
+		fsl,pins = <
+			IMX8QXP_FLEXCAN2_TX_ADMA_FLEXCAN2_TX		0x21		/* SODIMM  35 */
+			IMX8QXP_FLEXCAN2_RX_ADMA_FLEXCAN2_RX		0x21		/* SODIMM  33 */
+		>;
+	};
+
+	/* Colibri LCD Back-Light GPIO */
+	pinctrl_gpio_bl_on: gpioblongrp {
+		fsl,pins = <
+			IMX8QXP_QSPI0A_DATA3_LSIO_GPIO3_IO12		0x60		/* SODIMM  71 */
+		>;
+	};
+
+	pinctrl_gpiokeys: gpiokeysgrp {
+		fsl,pins = <
+			IMX8QXP_QSPI0A_DATA1_LSIO_GPIO3_IO10		0x06700041	/* SODIMM  45 */
+		>;
+	};
+
+	pinctrl_hog0: hog0grp {
+		fsl,pins = <
+			IMX8QXP_ENET0_RGMII_TXD3_LSIO_GPIO5_IO02	0x06000020	/* SODIMM  65 */
+			IMX8QXP_CSI_D07_CI_PI_D09			0x61		/* SODIMM  65 */
+			IMX8QXP_QSPI0A_DATA2_LSIO_GPIO3_IO11		0x20		/* SODIMM  69 */
+			IMX8QXP_SAI0_TXC_LSIO_GPIO0_IO26		0x20		/* SODIMM  79 */
+			IMX8QXP_CSI_D02_CI_PI_D04			0x61		/* SODIMM  79 */
+			IMX8QXP_ENET0_RGMII_RXC_LSIO_GPIO5_IO03		0x06000020	/* SODIMM  85 */
+			IMX8QXP_CSI_D06_CI_PI_D08			0x61		/* SODIMM  85 */
+			IMX8QXP_QSPI0B_SCLK_LSIO_GPIO3_IO17		0x20		/* SODIMM  95 */
+			IMX8QXP_SAI0_RXD_LSIO_GPIO0_IO27		0x20		/* SODIMM  97 */
+			IMX8QXP_CSI_D03_CI_PI_D05			0x61		/* SODIMM  97 */
+			IMX8QXP_QSPI0B_DATA0_LSIO_GPIO3_IO18		0x20		/* SODIMM  99 */
+			IMX8QXP_SAI0_TXFS_LSIO_GPIO0_IO28		0x20		/* SODIMM 101 */
+			IMX8QXP_CSI_D00_CI_PI_D02			0x61		/* SODIMM 101 */
+			IMX8QXP_SAI0_TXD_LSIO_GPIO0_IO25		0x20		/* SODIMM 103 */
+			IMX8QXP_CSI_D01_CI_PI_D03			0x61		/* SODIMM 103 */
+			IMX8QXP_QSPI0B_DATA1_LSIO_GPIO3_IO19		0x20		/* SODIMM 105 */
+			IMX8QXP_QSPI0B_DATA2_LSIO_GPIO3_IO20		0x20		/* SODIMM 107 */
+			IMX8QXP_USB_SS3_TC2_LSIO_GPIO4_IO05		0x20		/* SODIMM 127 */
+			IMX8QXP_USB_SS3_TC3_LSIO_GPIO4_IO06		0x20		/* SODIMM 131 */
+			IMX8QXP_USB_SS3_TC1_LSIO_GPIO4_IO04		0x20		/* SODIMM 133 */
+			IMX8QXP_CSI_PCLK_LSIO_GPIO3_IO00		0x20		/* SODIMM  96 */
+			IMX8QXP_QSPI0B_DATA3_LSIO_GPIO3_IO21		0x20		/* SODIMM  98 */
+			IMX8QXP_SAI1_RXFS_LSIO_GPIO0_IO31		0x20		/* SODIMM 100 */
+			IMX8QXP_QSPI0B_DQS_LSIO_GPIO3_IO22		0x20		/* SODIMM 102 */
+			IMX8QXP_QSPI0B_SS0_B_LSIO_GPIO3_IO23		0x20		/* SODIMM 104 */
+			IMX8QXP_QSPI0B_SS1_B_LSIO_GPIO3_IO24		0x20		/* SODIMM 106 */
+		>;
+	};
+
+	pinctrl_hog1: hog1grp {
+		fsl,pins = <
+			IMX8QXP_CSI_MCLK_LSIO_GPIO3_IO01		0x20		/* SODIMM  75 */
+			IMX8QXP_QSPI0A_SCLK_LSIO_GPIO3_IO16		0x20		/* SODIMM  93 */
+		>;
+	};
+
+	/*
+	 * This pin is used in the SCFW as a UART. Using it from
+	 * Linux would require rewritting the SCFW board file.
+	 */
+	pinctrl_hog_scfw: hogscfwgrp {
+		fsl,pins = <
+			IMX8QXP_SCU_GPIO0_00_LSIO_GPIO2_IO03		0x20		/* SODIMM 144 */
+		>;
+	};
+
+	/* On Module I2C */
+	pinctrl_i2c0: i2c0grp {
+		fsl,pins = <
+			IMX8QXP_MIPI_CSI0_GPIO0_00_ADMA_I2C0_SCL	0x06000021
+			IMX8QXP_MIPI_CSI0_GPIO0_01_ADMA_I2C0_SDA	0x06000021
+		>;
+	};
+
+	/* MIPI DSI I2C accessible on SODIMM (X1) and FFC (X2) */
+	pinctrl_i2c0_mipi_lvds0: i2c0mipilvds0grp {
+		fsl,pins = <
+			IMX8QXP_MIPI_DSI0_I2C0_SCL_MIPI_DSI0_I2C0_SCL	0xc6000020	/* SODIMM 140 */
+			IMX8QXP_MIPI_DSI0_I2C0_SDA_MIPI_DSI0_I2C0_SDA	0xc6000020	/* SODIMM 142 */
+		>;
+	};
+
+	/* MIPI CSI I2C accessible on SODIMM (X1) and FFC (X3) */
+	pinctrl_i2c0_mipi_lvds1: i2c0mipilvds1grp {
+		fsl,pins = <
+			IMX8QXP_MIPI_DSI1_I2C0_SCL_MIPI_DSI1_I2C0_SCL	0xc6000020	/* SODIMM 186 */
+			IMX8QXP_MIPI_DSI1_I2C0_SDA_MIPI_DSI1_I2C0_SDA	0xc6000020	/* SODIMM 188 */
+		>;
+	};
+
+	/* Colibri I2C */
+	pinctrl_i2c1: i2c1grp {
+		fsl,pins = <
+			IMX8QXP_MIPI_DSI0_GPIO0_00_ADMA_I2C1_SCL	0x06000021	/* SODIMM 196 */
+			IMX8QXP_MIPI_DSI0_GPIO0_01_ADMA_I2C1_SDA	0x06000021	/* SODIMM 194 */
+		>;
+	};
+
+	/* Colibri Parallel RGB LCD Interface */
+	pinctrl_lcdif: lcdifgrp {
+		fsl,pins = <
+			IMX8QXP_MCLK_OUT0_ADMA_LCDIF_CLK		0x60		/* SODIMM  56 */
+			IMX8QXP_SPI3_CS0_ADMA_LCDIF_HSYNC		0x60		/* SODIMM  68 */
+			IMX8QXP_MCLK_IN0_ADMA_LCDIF_VSYNC		0x60		/* SODIMM  82 */
+			IMX8QXP_MCLK_IN1_ADMA_LCDIF_EN			0x60		/* SODIMM  44 */
+			IMX8QXP_USDHC1_RESET_B_LSIO_GPIO4_IO19		0x60		/* SODIMM  44 */
+			IMX8QXP_ESAI0_FSR_ADMA_LCDIF_D00		0x60		/* SODIMM  76 */
+			IMX8QXP_USDHC1_WP_LSIO_GPIO4_IO21		0x60		/* SODIMM  76 */
+			IMX8QXP_ESAI0_FST_ADMA_LCDIF_D01		0x60		/* SODIMM  70 */
+			IMX8QXP_ESAI0_SCKR_ADMA_LCDIF_D02		0x60		/* SODIMM  60 */
+			IMX8QXP_ESAI0_SCKT_ADMA_LCDIF_D03		0x60		/* SODIMM  58 */
+			IMX8QXP_ESAI0_TX0_ADMA_LCDIF_D04		0x60		/* SODIMM  78 */
+			IMX8QXP_ESAI0_TX1_ADMA_LCDIF_D05		0x60		/* SODIMM  72 */
+			IMX8QXP_ESAI0_TX2_RX3_ADMA_LCDIF_D06		0x60		/* SODIMM  80 */
+			IMX8QXP_ESAI0_TX3_RX2_ADMA_LCDIF_D07		0x60		/* SODIMM  46 */
+			IMX8QXP_ESAI0_TX4_RX1_ADMA_LCDIF_D08		0x60		/* SODIMM  62 */
+			IMX8QXP_ESAI0_TX5_RX0_ADMA_LCDIF_D09		0x60		/* SODIMM  48 */
+			IMX8QXP_SPDIF0_RX_ADMA_LCDIF_D10		0x60		/* SODIMM  74 */
+			IMX8QXP_SPDIF0_TX_ADMA_LCDIF_D11		0x60		/* SODIMM  50 */
+			IMX8QXP_SPDIF0_EXT_CLK_ADMA_LCDIF_D12		0x60		/* SODIMM  52 */
+			IMX8QXP_SPI3_SCK_ADMA_LCDIF_D13			0x60		/* SODIMM  54 */
+			IMX8QXP_SPI3_SDO_ADMA_LCDIF_D14			0x60		/* SODIMM  66 */
+			IMX8QXP_SPI3_SDI_ADMA_LCDIF_D15			0x60		/* SODIMM  64 */
+			IMX8QXP_SPI3_CS1_ADMA_LCDIF_D16			0x60		/* SODIMM  57 */
+			IMX8QXP_ENET0_RGMII_TXD2_LSIO_GPIO5_IO01	0x60		/* SODIMM  57 */
+			IMX8QXP_UART1_CTS_B_ADMA_LCDIF_D17		0x60		/* SODIMM  61 */
+		>;
+	};
+
+	/* Colibri SPI */
+	pinctrl_lpspi2: lpspi2grp {
+		fsl,pins = <
+			IMX8QXP_SPI2_CS0_LSIO_GPIO1_IO00		0x21		/* SODIMM  86 */
+			IMX8QXP_SPI2_SDO_ADMA_SPI2_SDO			0x06000040	/* SODIMM  92 */
+			IMX8QXP_SPI2_SDI_ADMA_SPI2_SDI			0x06000040	/* SODIMM  90 */
+			IMX8QXP_SPI2_SCK_ADMA_SPI2_SCK			0x06000040	/* SODIMM  88 */
+		>;
+	};
+
+	/* Colibri UART_B */
+	pinctrl_lpuart0: lpuart0grp {
+		fsl,pins = <
+			IMX8QXP_UART0_RX_ADMA_UART0_RX			0x06000020	/* SODIMM  36 */
+			IMX8QXP_UART0_TX_ADMA_UART0_TX			0x06000020	/* SODIMM  38 */
+			IMX8QXP_FLEXCAN0_RX_ADMA_UART0_RTS_B		0x06000020	/* SODIMM  34 */
+			IMX8QXP_FLEXCAN0_TX_ADMA_UART0_CTS_B		0x06000020	/* SODIMM  32 */
+		>;
+	};
+
+	/* Colibri UART_C */
+	pinctrl_lpuart2: lpuart2grp {
+		fsl,pins = <
+			IMX8QXP_UART2_RX_ADMA_UART2_RX			0x06000020	/* SODIMM  19 */
+			IMX8QXP_UART2_TX_ADMA_UART2_TX			0x06000020	/* SODIMM  21 */
+		>;
+	};
+
+	/* Colibri UART_A */
+	pinctrl_lpuart3: lpuart3grp {
+		fsl,pins = <
+			IMX8QXP_FLEXCAN2_RX_ADMA_UART3_RX		0x06000020	/* SODIMM  33 */
+			IMX8QXP_FLEXCAN2_TX_ADMA_UART3_TX		0x06000020	/* SODIMM  35 */
+		>;
+	};
+
+	/* Colibri UART_A Control */
+	pinctrl_lpuart3_ctrl: lpuart3ctrlgrp {
+		fsl,pins = <
+			IMX8QXP_MIPI_DSI1_GPIO0_01_LSIO_GPIO2_IO00	0x20		/* SODIMM  23 */
+			IMX8QXP_SAI1_RXD_LSIO_GPIO0_IO29		0x20		/* SODIMM  25 */
+			IMX8QXP_SAI1_RXC_LSIO_GPIO0_IO30		0x20		/* SODIMM  27 */
+			IMX8QXP_CSI_RESET_LSIO_GPIO3_IO03		0x20		/* SODIMM  29 */
+			IMX8QXP_USDHC1_CD_B_LSIO_GPIO4_IO22		0x20		/* SODIMM  31 */
+			IMX8QXP_CSI_EN_LSIO_GPIO3_IO02			0x20		/* SODIMM  37 */
+		>;
+	};
+
+	/* On module wifi module */
+	pinctrl_pcieb: pciebgrp {
+		fsl,pins = <
+			IMX8QXP_PCIE_CTRL0_CLKREQ_B_LSIO_GPIO4_IO01	0x04000061	/* SODIMM 178 */
+			IMX8QXP_PCIE_CTRL0_WAKE_B_LSIO_GPIO4_IO02	0x04000061	/* SODIMM  94 */
+			IMX8QXP_PCIE_CTRL0_PERST_B_LSIO_GPIO4_IO00	0x60		/* SODIMM  81 */
+		>;
+	};
+
+	/* Colibri PWM_A */
+	pinctrl_pwm_a: pwmagrp {
+	/* both pins are connected together, reserve the unused CSI_D05 */
+		fsl,pins = <
+			IMX8QXP_CSI_D05_CI_PI_D07			0x61		/* SODIMM  59 */
+			IMX8QXP_SPI0_CS1_ADMA_LCD_PWM0_OUT		0x60		/* SODIMM  59 */
+		>;
+	};
+
+	/* Colibri PWM_B */
+	pinctrl_pwm_b: pwmbgrp {
+		fsl,pins = <
+			IMX8QXP_UART1_TX_LSIO_PWM0_OUT			0x60		/* SODIMM  28 */
+		>;
+	};
+
+	/* Colibri PWM_C */
+	pinctrl_pwm_c: pwmcgrp {
+		fsl,pins = <
+			IMX8QXP_UART1_RX_LSIO_PWM1_OUT			0x60		/* SODIMM  30 */
+		>;
+	};
+
+	/* Colibri PWM_D */
+	pinctrl_pwm_d: pwmdgrp {
+	/* both pins are connected together, reserve the unused CSI_D04 */
+		fsl,pins = <
+			IMX8QXP_CSI_D04_CI_PI_D06			0x61		/* SODIMM  67 */
+			IMX8QXP_UART1_RTS_B_LSIO_PWM2_OUT		0x60		/* SODIMM  67 */
+		>;
+	};
+
+	/* On-module I2S */
+	pinctrl_sai0: sai0grp {
+		fsl,pins = <
+			IMX8QXP_SPI0_SDI_ADMA_SAI0_TXD			0x06000040
+			IMX8QXP_SPI0_CS0_ADMA_SAI0_RXD			0x06000040
+			IMX8QXP_SPI0_SCK_ADMA_SAI0_TXC			0x06000040
+			IMX8QXP_SPI0_SDO_ADMA_SAI0_TXFS			0x06000040
+		>;
+	};
+
+	/* Colibri Audio Analogue Microphone GND */
+	pinctrl_sgtl5000: sgtl5000grp {
+		fsl,pins = <
+			/* MIC GND EN */
+			IMX8QXP_MIPI_CSI0_I2C0_SDA_LSIO_GPIO3_IO06	0x41
+		>;
+	};
+
+	/* On-module SGTL5000 clock */
+	pinctrl_sgtl5000_usb_clk: sgtl5000usbclkgrp {
+		fsl,pins = <
+			IMX8QXP_ADC_IN3_ADMA_ACM_MCLK_OUT0		0x21
+		>;
+	};
+
+	/* On-module USB interrupt */
+	pinctrl_usb3503a: usb3503agrp {
+		fsl,pins = <
+			IMX8QXP_MIPI_CSI0_MCLK_OUT_LSIO_GPIO3_IO04	0x61
+		>;
+	};
+
+	/* Colibri USB Client Cable Detect */
+	pinctrl_usbc_det: usbcdetgrp {
+		fsl,pins = <
+			IMX8QXP_ENET0_REFCLK_125M_25M_LSIO_GPIO5_IO09	0x06000040	/* SODIMM 137 */
+		>;
+	};
+
+	/* USB Host Power Enable */
+	pinctrl_usbh1_reg: usbh1reggrp {
+		fsl,pins = <
+			IMX8QXP_USB_SS3_TC0_LSIO_GPIO4_IO03		0x06000040	/* SODIMM 129 */
+		>;
+	};
+
+	/* On-module eMMC */
+	pinctrl_usdhc1: usdhc1grp {
+		fsl,pins = <
+			IMX8QXP_EMMC0_CLK_CONN_EMMC0_CLK		0x06000041
+			IMX8QXP_EMMC0_CMD_CONN_EMMC0_CMD		0x21
+			IMX8QXP_EMMC0_DATA0_CONN_EMMC0_DATA0		0x21
+			IMX8QXP_EMMC0_DATA1_CONN_EMMC0_DATA1		0x21
+			IMX8QXP_EMMC0_DATA2_CONN_EMMC0_DATA2		0x21
+			IMX8QXP_EMMC0_DATA3_CONN_EMMC0_DATA3		0x21
+			IMX8QXP_EMMC0_DATA4_CONN_EMMC0_DATA4		0x21
+			IMX8QXP_EMMC0_DATA5_CONN_EMMC0_DATA5		0x21
+			IMX8QXP_EMMC0_DATA6_CONN_EMMC0_DATA6		0x21
+			IMX8QXP_EMMC0_DATA7_CONN_EMMC0_DATA7		0x21
+			IMX8QXP_EMMC0_STROBE_CONN_EMMC0_STROBE		0x41
+			IMX8QXP_EMMC0_RESET_B_CONN_EMMC0_RESET_B	0x21
+		>;
+	};
+
+	pinctrl_usdhc1_100mhz: usdhc1grp100mhz {
+		fsl,pins = <
+			IMX8QXP_EMMC0_CLK_CONN_EMMC0_CLK		0x06000041
+			IMX8QXP_EMMC0_CMD_CONN_EMMC0_CMD		0x21
+			IMX8QXP_EMMC0_DATA0_CONN_EMMC0_DATA0		0x21
+			IMX8QXP_EMMC0_DATA1_CONN_EMMC0_DATA1		0x21
+			IMX8QXP_EMMC0_DATA2_CONN_EMMC0_DATA2		0x21
+			IMX8QXP_EMMC0_DATA3_CONN_EMMC0_DATA3		0x21
+			IMX8QXP_EMMC0_DATA4_CONN_EMMC0_DATA4		0x21
+			IMX8QXP_EMMC0_DATA5_CONN_EMMC0_DATA5		0x21
+			IMX8QXP_EMMC0_DATA6_CONN_EMMC0_DATA6		0x21
+			IMX8QXP_EMMC0_DATA7_CONN_EMMC0_DATA7		0x21
+			IMX8QXP_EMMC0_STROBE_CONN_EMMC0_STROBE		0x41
+			IMX8QXP_EMMC0_RESET_B_CONN_EMMC0_RESET_B	0x21
+		>;
+	};
+
+	pinctrl_usdhc1_200mhz: usdhc1grp200mhz {
+		fsl,pins = <
+			IMX8QXP_EMMC0_CLK_CONN_EMMC0_CLK		0x06000041
+			IMX8QXP_EMMC0_CMD_CONN_EMMC0_CMD		0x21
+			IMX8QXP_EMMC0_DATA0_CONN_EMMC0_DATA0		0x21
+			IMX8QXP_EMMC0_DATA1_CONN_EMMC0_DATA1		0x21
+			IMX8QXP_EMMC0_DATA2_CONN_EMMC0_DATA2		0x21
+			IMX8QXP_EMMC0_DATA3_CONN_EMMC0_DATA3		0x21
+			IMX8QXP_EMMC0_DATA4_CONN_EMMC0_DATA4		0x21
+			IMX8QXP_EMMC0_DATA5_CONN_EMMC0_DATA5		0x21
+			IMX8QXP_EMMC0_DATA6_CONN_EMMC0_DATA6		0x21
+			IMX8QXP_EMMC0_DATA7_CONN_EMMC0_DATA7		0x21
+			IMX8QXP_EMMC0_STROBE_CONN_EMMC0_STROBE		0x41
+			IMX8QXP_EMMC0_RESET_B_CONN_EMMC0_RESET_B	0x21
+		>;
+	};
+
+	/* Colibri SD/MMC Card Detect */
+	pinctrl_usdhc2_gpio: usdhc2gpiogrp {
+		fsl,pins = <
+			IMX8QXP_QSPI0A_DATA0_LSIO_GPIO3_IO09		0x06000021	/* SODIMM  43 */
+		>;
+	};
+
+	pinctrl_usdhc2_gpio_sleep: usdhc2gpioslpgrp {
+		fsl,pins = <
+			IMX8QXP_QSPI0A_DATA0_LSIO_GPIO3_IO09		0x60		/* SODIMM  43 */
+		>;
+	};
+
+	/* Colibri SD/MMC Card */
+	pinctrl_usdhc2: usdhc2grp {
+		fsl,pins = <
+			IMX8QXP_USDHC1_CLK_CONN_USDHC1_CLK		0x06000041	/* SODIMM  47 */
+			IMX8QXP_USDHC1_CMD_CONN_USDHC1_CMD		0x21		/* SODIMM 190 */
+			IMX8QXP_USDHC1_DATA0_CONN_USDHC1_DATA0		0x21		/* SODIMM 192 */
+			IMX8QXP_USDHC1_DATA1_CONN_USDHC1_DATA1		0x21		/* SODIMM  49 */
+			IMX8QXP_USDHC1_DATA2_CONN_USDHC1_DATA2		0x21		/* SODIMM  51 */
+			IMX8QXP_USDHC1_DATA3_CONN_USDHC1_DATA3		0x21		/* SODIMM  53 */
+			IMX8QXP_USDHC1_VSELECT_CONN_USDHC1_VSELECT	0x21
+		>;
+	};
+
+	pinctrl_usdhc2_100mhz: usdhc2grp100mhz {
+		fsl,pins = <
+			IMX8QXP_USDHC1_CLK_CONN_USDHC1_CLK		0x06000041	/* SODIMM  47 */
+			IMX8QXP_USDHC1_CMD_CONN_USDHC1_CMD		0x21		/* SODIMM 190 */
+			IMX8QXP_USDHC1_DATA0_CONN_USDHC1_DATA0		0x21		/* SODIMM 192 */
+			IMX8QXP_USDHC1_DATA1_CONN_USDHC1_DATA1		0x21		/* SODIMM  49 */
+			IMX8QXP_USDHC1_DATA2_CONN_USDHC1_DATA2		0x21		/* SODIMM  51 */
+			IMX8QXP_USDHC1_DATA3_CONN_USDHC1_DATA3		0x21		/* SODIMM  53 */
+			IMX8QXP_USDHC1_VSELECT_CONN_USDHC1_VSELECT	0x21
+		>;
+	};
+
+	pinctrl_usdhc2_200mhz: usdhc2grp200mhz {
+		fsl,pins = <
+			IMX8QXP_USDHC1_CLK_CONN_USDHC1_CLK		0x06000041	/* SODIMM  47 */
+			IMX8QXP_USDHC1_CMD_CONN_USDHC1_CMD		0x21		/* SODIMM 190 */
+			IMX8QXP_USDHC1_DATA0_CONN_USDHC1_DATA0		0x21		/* SODIMM 192 */
+			IMX8QXP_USDHC1_DATA1_CONN_USDHC1_DATA1		0x21		/* SODIMM  49 */
+			IMX8QXP_USDHC1_DATA2_CONN_USDHC1_DATA2		0x21		/* SODIMM  51 */
+			IMX8QXP_USDHC1_DATA3_CONN_USDHC1_DATA3		0x21		/* SODIMM  53 */
+			IMX8QXP_USDHC1_VSELECT_CONN_USDHC1_VSELECT	0x21
+		>;
+	};
+
+	pinctrl_usdhc2_sleep: usdhc2slpgrp {
+		fsl,pins = <
+			IMX8QXP_USDHC1_CLK_LSIO_GPIO4_IO23		0x60		/* SODIMM  47 */
+			IMX8QXP_USDHC1_CMD_LSIO_GPIO4_IO24		0x60		/* SODIMM 190 */
+			IMX8QXP_USDHC1_DATA0_LSIO_GPIO4_IO25		0x60		/* SODIMM 192 */
+			IMX8QXP_USDHC1_DATA1_LSIO_GPIO4_IO26		0x60		/* SODIMM  49 */
+			IMX8QXP_USDHC1_DATA2_LSIO_GPIO4_IO27		0x60		/* SODIMM  51 */
+			IMX8QXP_USDHC1_DATA3_LSIO_GPIO4_IO28		0x60		/* SODIMM  53 */
+			IMX8QXP_USDHC1_VSELECT_CONN_USDHC1_VSELECT	0x21
+		>;
+	};
+
+	pinctrl_wifi: wifigrp {
+		fsl,pins = <
+			IMX8QXP_SCU_BOOT_MODE3_SCU_DSC_RTC_CLOCK_OUTPUT_32K	0x20
+		>;
+	};
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
index 19468058e6ae..d3d26cca7d52 100644
--- a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
+++ b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
@@ -137,6 +137,8 @@
 };
 
 &usdhc1 {
+	assigned-clocks = <&clk IMX_CONN_SDHC0_CLK>;
+	assigned-clock-rates = <200000000>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	bus-width = <8>;
@@ -147,6 +149,8 @@
 };
 
 &usdhc2 {
+	assigned-clocks = <&clk IMX_CONN_SDHC1_CLK>;
+	assigned-clock-rates = <200000000>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	bus-width = <4>;
@@ -234,3 +238,7 @@
 &adma_dsp {
 	status = "okay";
 };
+
+&scu_key {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
index 1133b412182a..9646a41e0532 100644
--- a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
@@ -8,6 +8,7 @@
 #include <dt-bindings/clock/imx8-clock.h>
 #include <dt-bindings/firmware/imx/rsrc.h>
 #include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/pinctrl/pads-imx8qxp.h>
 
@@ -174,6 +175,12 @@
 			#power-domain-cells = <1>;
 		};
 
+		scu_key: scu-key {
+			compatible = "fsl,imx8qxp-sc-key", "fsl,imx-sc-key";
+			linux,keycodes = <KEY_POWER>;
+			status = "disabled";
+		};
+
 		rtc: rtc {
 			compatible = "fsl,imx8qxp-sc-rtc";
 		};
@@ -361,8 +368,6 @@
 				 <&conn_lpcg IMX_CONN_LPCG_SDHC0_PER_CLK>,
 				 <&conn_lpcg IMX_CONN_LPCG_SDHC0_HCLK>;
 			clock-names = "ipg", "per", "ahb";
-			assigned-clocks = <&clk IMX_CONN_SDHC0_CLK>;
-			assigned-clock-rates = <200000000>;
 			power-domains = <&pd IMX_SC_R_SDHC_0>;
 			status = "disabled";
 		};
@@ -376,8 +381,6 @@
 				 <&conn_lpcg IMX_CONN_LPCG_SDHC1_PER_CLK>,
 				 <&conn_lpcg IMX_CONN_LPCG_SDHC1_HCLK>;
 			clock-names = "ipg", "per", "ahb";
-			assigned-clocks = <&clk IMX_CONN_SDHC1_CLK>;
-			assigned-clock-rates = <200000000>;
 			power-domains = <&pd IMX_SC_R_SDHC_1>;
 			fsl,tuning-start-tap = <20>;
 			fsl,tuning-step= <2>;
@@ -393,8 +396,6 @@
 				 <&conn_lpcg IMX_CONN_LPCG_SDHC2_PER_CLK>,
 				 <&conn_lpcg IMX_CONN_LPCG_SDHC2_HCLK>;
 			clock-names = "ipg", "per", "ahb";
-			assigned-clocks = <&clk IMX_CONN_SDHC2_CLK>;
-			assigned-clock-rates = <200000000>;
 			power-domains = <&pd IMX_SC_R_SDHC_2>;
 			status = "disabled";
 		};
diff --git a/arch/arm64/boot/dts/freescale/s32v234-evb.dts b/arch/arm64/boot/dts/freescale/s32v234-evb.dts
new file mode 100644
index 000000000000..4b802518cefc
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/s32v234-evb.dts
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2016-2017 NXP
+ */
+
+/dts-v1/;
+#include "s32v234.dtsi"
+
+/ {
+	model = "NXP S32V234-EVB2 Board";
+	compatible = "fsl,s32v234-evb", "fsl,s32v234";
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&uart1 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/freescale/s32v234.dtsi b/arch/arm64/boot/dts/freescale/s32v234.dtsi
new file mode 100644
index 000000000000..e746b9c48f7a
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/s32v234.dtsi
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2016-2018 NXP
+ */
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/memreserve/ 0x80000000 0x00010000;
+
+/ {
+	compatible = "fsl,s32v234";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		serial0 = &uart0;
+		serial1 = &uart1;
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x0 0x0>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x80000000>;
+			next-level-cache = <&cluster0_l2_cache>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x0 0x1>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x80000000>;
+			next-level-cache = <&cluster0_l2_cache>;
+		};
+
+		cpu2: cpu@100 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x0 0x100>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x80000000>;
+			next-level-cache = <&cluster1_l2_cache>;
+		};
+
+		cpu3: cpu@101 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x0 0x101>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x80000000>;
+			next-level-cache = <&cluster1_l2_cache>;
+		};
+
+		cluster0_l2_cache: l2-cache0 {
+			compatible = "cache";
+		};
+
+		cluster1_l2_cache: l2-cache1 {
+			compatible = "cache";
+		};
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) |
+					  IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) |
+					  IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) |
+					  IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) |
+					  IRQ_TYPE_LEVEL_LOW)>;
+		/* clock-frequency might be modified by u-boot, depending on the
+		 * chip version.
+		 */
+		clock-frequency = <10000000>;
+	};
+
+	gic: interrupt-controller@7d001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x7d001000 0 0x1000>,
+		      <0 0x7d002000 0 0x2000>,
+		      <0 0x7d004000 0 0x2000>,
+		      <0 0x7d006000 0 0x2000>;
+		interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) |
+					 IRQ_TYPE_LEVEL_HIGH)>;
+	};
+
+	soc {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		compatible = "simple-bus";
+		interrupt-parent = <&gic>;
+		ranges;
+
+		aips0: aips-bus@40000000 {
+			compatible = "simple-bus";
+			#address-cells = <2>;
+			#size-cells = <2>;
+			interrupt-parent = <&gic>;
+			reg = <0x0 0x40000000 0x0 0x7d000>;
+			ranges;
+
+			uart0: serial@40053000 {
+				compatible = "fsl,s32v234-linflexuart";
+				reg = <0x0 0x40053000 0x0 0x1000>;
+				interrupts = <GIC_SPI 59 IRQ_TYPE_EDGE_RISING>;
+				status = "disabled";
+			};
+		};
+
+		aips1: aips-bus@40080000 {
+			compatible = "simple-bus";
+			#address-cells = <2>;
+			#size-cells = <2>;
+			interrupt-parent = <&gic>;
+			reg = <0x0 0x40080000 0x0 0x70000>;
+			ranges;
+
+			uart1: serial@400bc000 {
+				compatible = "fsl,s32v234-linflexuart";
+				reg = <0x0 0x400bc000 0x0 0x1000>;
+				interrupts = <GIC_SPI 60 IRQ_TYPE_EDGE_RISING>;
+				status = "disabled";
+			};
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
index 108e2a4227f6..2072b637b5af 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
@@ -260,6 +260,7 @@
 			compatible = "hisilicon,hi6220-aoctrl", "syscon";
 			reg = <0x0 0xf7800000 0x0 0x2000>;
 			#clock-cells = <1>;
+			#reset-cells = <1>;
 		};
 
 		sys_ctrl: sys_ctrl@f7030000 {
@@ -1021,6 +1022,43 @@
 			clock-names = "apb_pclk";
 			cpu = <&cpu7>;
 		};
+
+		mali: gpu@f4080000 {
+			compatible = "hisilicon,hi6220-mali", "arm,mali-450";
+			reg = <0x0 0xf4080000 0x0 0x00040000>;
+			interrupt-parent = <&gic>;
+			interrupts =	<GIC_PPI 126 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_PPI 126 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_PPI 126 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_PPI 126 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_PPI 126 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_PPI 126 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_PPI 126 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_PPI 126 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_PPI 126 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_PPI 126 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_PPI 126 IRQ_TYPE_LEVEL_HIGH>;
+
+			interrupt-names = "gp",
+					  "gpmmu",
+					  "pp",
+					  "pp0",
+					  "ppmmu0",
+					  "pp1",
+					  "ppmmu1",
+					  "pp2",
+					  "ppmmu2",
+					  "pp3",
+					  "ppmmu3";
+			clocks = <&media_ctrl HI6220_G3D_CLK>,
+				 <&media_ctrl HI6220_G3D_PCLK>;
+			clock-names = "core", "bus";
+			assigned-clocks = <&media_ctrl HI6220_G3D_CLK>,
+					  <&media_ctrl HI6220_G3D_PCLK>;
+			assigned-clock-rates = <500000000>, <144000000>;
+			reset-names = "ao_g3d", "media_g3d";
+			resets = <&ao_ctrl AO_G3D>, <&media_ctrl MEDIA_G3D>;
+		};
 	};
 };
 
diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
index 36abc25320a8..d43e1299c8ef 100644
--- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
+++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
@@ -12,6 +12,19 @@
 	#address-cells = <2>;
 	#size-cells = <2>;
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		service_reserved: svcbuffer@0 {
+			compatible = "shared-dma-pool";
+			reg = <0x0 0x0 0x0 0x1000000>;
+			alignment = <0x1000>;
+			no-map;
+		};
+	};
+
 	cpus {
 		#address-cells = <1>;
 		#size-cells = <0>;
@@ -47,10 +60,10 @@
 
 	pmu {
 		compatible = "arm,armv8-pmuv3";
-		interrupts = <0 120 8>,
-			     <0 121 8>,
-			     <0 122 8>,
-			     <0 123 8>;
+		interrupts = <0 170 4>,
+			     <0 171 4>,
+			     <0 172 4>,
+			     <0 173 4>;
 		interrupt-affinity = <&cpu0>,
 				     <&cpu1>,
 				     <&cpu2>,
@@ -81,6 +94,13 @@
 		interrupt-parent = <&intc>;
 		ranges = <0 0 0 0xffffffff>;
 
+		base_fpga_region {
+			#address-cells = <0x1>;
+			#size-cells = <0x1>;
+			compatible = "fpga-region";
+			fpga-mgr = <&fpga_mgr>;
+		};
+
 		gmac0: ethernet@ff800000 {
 			compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac";
 			reg = <0xff800000 0x2000>;
@@ -442,5 +462,17 @@
 
 			status = "disabled";
 		};
+
+		firmware {
+			svc {
+				compatible = "intel,stratix10-svc";
+				method = "smc";
+				memory-region = <&service_reserved>;
+
+				fpga_mgr: fpga-mgr {
+					compatible = "intel,stratix10-soc-fpga-mgr";
+				};
+			};
+		};
 	};
 };
diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts
index 7814a9e8eb08..e794a12ba7c5 100644
--- a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts
+++ b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts
@@ -18,6 +18,24 @@
 		stdout-path = "serial0:115200n8";
 	};
 
+	leds {
+		compatible = "gpio-leds";
+		hps0 {
+			label = "hps_led0";
+			gpios = <&portb 20 GPIO_ACTIVE_HIGH>;
+		};
+
+		hps1 {
+			label = "hps_led1";
+			gpios = <&portb 19 GPIO_ACTIVE_HIGH>;
+		};
+
+		hps2 {
+			label = "hps_led2";
+			gpios = <&portb 21 GPIO_ACTIVE_HIGH>;
+		};
+	};
+
 	memory {
 		device_type = "memory";
 		/* We expect the bootloader to fill in the reg */
@@ -70,6 +88,46 @@
 	status = "okay";
 };
 
+&usb0 {
+	status = "okay";
+	disable-over-current;
+};
+
 &watchdog0 {
 	status = "okay";
 };
+
+&qspi {
+	flash@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "mt25qu02g";
+		reg = <0>;
+		spi-max-frequency = <100000000>;
+
+		m25p,fast-read;
+		cdns,page-size = <256>;
+		cdns,block-size = <16>;
+		cdns,read-delay = <1>;
+		cdns,tshsl-ns = <50>;
+		cdns,tsd2d-ns = <50>;
+		cdns,tchsh-ns = <4>;
+		cdns,tslch-ns = <4>;
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			qspi_boot: partition@0 {
+				label = "Boot and fpga data";
+				reg = <0x0 0x034B0000>;
+			};
+
+			qspi_rootfs: partition@34B0000 {
+				label = "Root Filesystem - JFFS2";
+				reg = <0x034B0000 0x0EB50000>;
+			};
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi
index c8dc9c20fba3..64f3b135068d 100644
--- a/arch/arm64/boot/dts/lg/lg1312.dtsi
+++ b/arch/arm64/boot/dts/lg/lg1312.dtsi
@@ -124,7 +124,7 @@
 	amba {
 		#address-cells = <2>;
 		#size-cells = <1>;
-		#interrupts-cells = <3>;
+		#interrupt-cells = <3>;
 
 		compatible = "simple-bus";
 		interrupt-parent = <&gic>;
diff --git a/arch/arm64/boot/dts/lg/lg1313.dtsi b/arch/arm64/boot/dts/lg/lg1313.dtsi
index 82c6645b58b7..ac23592ab011 100644
--- a/arch/arm64/boot/dts/lg/lg1313.dtsi
+++ b/arch/arm64/boot/dts/lg/lg1313.dtsi
@@ -124,7 +124,7 @@
 	amba {
 		#address-cells = <2>;
 		#size-cells = <1>;
-		#interrupts-cells = <3>;
+		#interrupt-cells = <3>;
 
 		compatible = "simple-bus";
 		interrupt-parent = <&gic>;
diff --git a/arch/arm64/boot/dts/marvell/Makefile b/arch/arm64/boot/dts/marvell/Makefile
index 243338c914a4..f1b5127f0b89 100644
--- a/arch/arm64/boot/dts/marvell/Makefile
+++ b/arch/arm64/boot/dts/marvell/Makefile
@@ -10,3 +10,6 @@ dtb-$(CONFIG_ARCH_MVEBU) += armada-8040-db.dtb
 dtb-$(CONFIG_ARCH_MVEBU) += armada-8040-mcbin.dtb
 dtb-$(CONFIG_ARCH_MVEBU) += armada-8040-mcbin-singleshot.dtb
 dtb-$(CONFIG_ARCH_MVEBU) += armada-8080-db.dtb
+dtb-$(CONFIG_ARCH_MVEBU) += cn9130-db.dtb
+dtb-$(CONFIG_ARCH_MVEBU) += cn9131-db.dtb
+dtb-$(CONFIG_ARCH_MVEBU) += cn9132-db.dtb
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-emmc.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-emmc.dts
new file mode 100644
index 000000000000..bd9ed9dc9c3e
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-emmc.dts
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Device Tree file for Globalscale Marvell ESPRESSOBin Board with eMMC
+ * Copyright (C) 2018 Marvell
+ *
+ * Romain Perier <romain.perier@free-electrons.com>
+ * Konstantin Porotchkin <kostap@marvell.com>
+ *
+ */
+/*
+ * Schematic available at http://espressobin.net/wp-content/uploads/2017/08/ESPRESSObin_V5_Schematics.pdf
+ */
+
+#include "armada-3720-espressobin.dtsi"
+
+/ {
+	model = "Globalscale Marvell ESPRESSOBin Board (eMMC)";
+	compatible = "globalscale,espressobin-emmc", "globalscale,espressobin",
+		     "marvell,armada3720", "marvell,armada3710";
+};
+
+/* U11 */
+&sdhci0 {
+	non-removable;
+	bus-width = <8>;
+	mmc-ddr-1_8v;
+	mmc-hs400-1_8v;
+	marvell,xenon-emmc;
+	marvell,xenon-tun-count = <9>;
+	marvell,pad-type = "fixed-1-8v";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc_pins>;
+	status = "okay";
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	mmccard: mmccard@0 {
+		compatible = "mmc-card";
+		reg = <0>;
+	};
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts
new file mode 100644
index 000000000000..6e876a6d9532
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Device Tree file for Globalscale Marvell ESPRESSOBin Board V7 with eMMC
+ * Copyright (C) 2018 Marvell
+ *
+ * Romain Perier <romain.perier@free-electrons.com>
+ * Konstantin Porotchkin <kostap@marvell.com>
+ *
+ */
+/*
+ * Schematic available at http://wiki.espressobin.net/tiki-download_file.php?fileId=200
+ */
+
+#include "armada-3720-espressobin.dtsi"
+
+/ {
+	model = "Globalscale Marvell ESPRESSOBin Board V7 (eMMC)";
+	compatible = "globalscale,espressobin-v7-emmc", "globalscale,espressobin-v7",
+		     "globalscale,espressobin", "marvell,armada3720",
+		     "marvell,armada3710";
+};
+
+&switch0 {
+	ports {
+		port@1 {
+			reg = <1>;
+			label = "lan1";
+			phy-handle = <&switch0phy0>;
+		};
+
+		port@3 {
+			reg = <3>;
+			label = "wan";
+			phy-handle = <&switch0phy2>;
+		};
+	};
+};
+
+/* U11 */
+&sdhci0 {
+	non-removable;
+	bus-width = <8>;
+	mmc-ddr-1_8v;
+	mmc-hs400-1_8v;
+	marvell,xenon-emmc;
+	marvell,xenon-tun-count = <9>;
+	marvell,pad-type = "fixed-1-8v";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc_pins>;
+	status = "okay";
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	mmccard: mmccard@0 {
+		compatible = "mmc-card";
+		reg = <0>;
+	};
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts
new file mode 100644
index 000000000000..0f8405d085fd
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Device Tree file for Globalscale Marvell ESPRESSOBin Board V7
+ * Copyright (C) 2018 Marvell
+ *
+ * Romain Perier <romain.perier@free-electrons.com>
+ * Konstantin Porotchkin <kostap@marvell.com>
+ *
+ */
+/*
+ * Schematic available at http://wiki.espressobin.net/tiki-download_file.php?fileId=200
+ */
+
+#include "armada-3720-espressobin.dtsi"
+
+/ {
+	model = "Globalscale Marvell ESPRESSOBin Board V7";
+	compatible = "globalscale,espressobin-v7", "globalscale,espressobin",
+		     "marvell,armada3720", "marvell,armada3710";
+};
+
+&switch0 {
+	ports {
+		port@1 {
+			reg = <1>;
+			label = "lan1";
+			phy-handle = <&switch0phy0>;
+		};
+
+		port@3 {
+			reg = <3>;
+			label = "wan";
+			phy-handle = <&switch0phy2>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts
index fbcf03f86c96..1542d836c090 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts
@@ -12,191 +12,9 @@
 
 /dts-v1/;
 
-#include <dt-bindings/gpio/gpio.h>
-#include "armada-372x.dtsi"
+#include "armada-3720-espressobin.dtsi"
 
 / {
 	model = "Globalscale Marvell ESPRESSOBin Board";
 	compatible = "globalscale,espressobin", "marvell,armada3720", "marvell,armada3710";
-
-	chosen {
-		stdout-path = "serial0:115200n8";
-	};
-
-	memory@0 {
-		device_type = "memory";
-		reg = <0x00000000 0x00000000 0x00000000 0x20000000>;
-	};
-
-	vcc_sd_reg1: regulator {
-		compatible = "regulator-gpio";
-		regulator-name = "vcc_sd1";
-		regulator-min-microvolt = <1800000>;
-		regulator-max-microvolt = <3300000>;
-		regulator-boot-on;
-
-		gpios = <&gpionb 4 GPIO_ACTIVE_HIGH>;
-		gpios-states = <0>;
-		states = <1800000 0x1
-			  3300000 0x0>;
-		enable-active-high;
-	};
-};
-
-/* J9 */
-&pcie0 {
-	status = "okay";
-	phys = <&comphy1 0>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pcie_reset_pins &pcie_clkreq_pins>;
-};
-
-/* J6 */
-&sata {
-	status = "okay";
-	phys = <&comphy2 0>;
-	phy-names = "sata-phy";
-};
-
-/* J1 */
-&sdhci1 {
-	wp-inverted;
-	bus-width = <4>;
-	cd-gpios = <&gpionb 3 GPIO_ACTIVE_LOW>;
-	marvell,pad-type = "sd";
-	vqmmc-supply = <&vcc_sd_reg1>;
-
-	pinctrl-names = "default";
-	pinctrl-0 = <&sdio_pins>;
-	status = "okay";
-};
-
-/* U11 */
-&sdhci0 {
-	non-removable;
-	bus-width = <8>;
-	mmc-ddr-1_8v;
-	mmc-hs400-1_8v;
-	marvell,xenon-emmc;
-	marvell,xenon-tun-count = <9>;
-	marvell,pad-type = "fixed-1-8v";
-
-	pinctrl-names = "default";
-	pinctrl-0 = <&mmc_pins>;
-/*
- * This eMMC is not populated on all boards, so disable it by
- * default and let the bootloader enable it, if it is present
- */
-	status = "disabled";
-};
-
-&spi0 {
-	status = "okay";
-
-	flash@0 {
-		reg = <0>;
-		compatible = "jedec,spi-nor";
-		spi-max-frequency = <104000000>;
-		m25p,fast-read;
-	};
-};
-
-/* Exported on the micro USB connector J5 through an FTDI */
-&uart0 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&uart1_pins>;
-	status = "okay";
-};
-
-/*
- * Connector J17 and J18 expose a number of different features. Some pins are
- * multiplexed. This is the case for instance for the following features:
- * - UART1 (pin 24 = RX, pin 26 = TX). See armada-3720-db.dts for an example of
- *   how to enable it. Beware that the signals are 1.8V TTL.
- * - I2C
- * - SPI
- * - MMC
- */
-
-/* J7 */
-&usb3 {
-	status = "okay";
-};
-
-/* J8 */
-&usb2 {
-	status = "okay";
-};
-
-&mdio {
-	switch0: switch0@1 {
-		compatible = "marvell,mv88e6085";
-		#address-cells = <1>;
-		#size-cells = <0>;
-		reg = <1>;
-
-		dsa,member = <0 0>;
-
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			port@0 {
-				reg = <0>;
-				label = "cpu";
-				ethernet = <&eth0>;
-				phy-mode = "rgmii-id";
-				fixed-link {
-					speed = <1000>;
-					full-duplex;
-				};
-			};
-
-			port@1 {
-				reg = <1>;
-				label = "wan";
-				phy-handle = <&switch0phy0>;
-			};
-
-			port@2 {
-				reg = <2>;
-				label = "lan0";
-				phy-handle = <&switch0phy1>;
-			};
-
-			port@3 {
-				reg = <3>;
-				label = "lan1";
-				phy-handle = <&switch0phy2>;
-			};
-
-		};
-
-		mdio {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			switch0phy0: switch0phy0@11 {
-				reg = <0x11>;
-			};
-			switch0phy1: switch0phy1@12 {
-				reg = <0x12>;
-			};
-			switch0phy2: switch0phy2@13 {
-				reg = <0x13>;
-			};
-		};
-	};
-};
-
-&eth0 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&rgmii_pins>, <&smi_pins>;
-	phy-mode = "rgmii-id";
-	status = "okay";
-
-	fixed-link {
-		speed = <1000>;
-		full-duplex;
-	};
 };
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi
new file mode 100644
index 000000000000..53b8ac55a7f3
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Device Tree file for Globalscale Marvell ESPRESSOBin Board
+ * Copyright (C) 2016 Marvell
+ *
+ * Romain Perier <romain.perier@free-electrons.com>
+ *
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include "armada-372x.dtsi"
+
+/ {
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000 0x20000000>;
+	};
+
+	vcc_sd_reg1: regulator {
+		compatible = "regulator-gpio";
+		regulator-name = "vcc_sd1";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-boot-on;
+
+		gpios = <&gpionb 4 GPIO_ACTIVE_HIGH>;
+		gpios-states = <0>;
+		states = <1800000 0x1
+			  3300000 0x0>;
+		enable-active-high;
+	};
+};
+
+/* J9 */
+&pcie0 {
+	status = "okay";
+	phys = <&comphy1 0>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pcie_reset_pins &pcie_clkreq_pins>;
+};
+
+/* J6 */
+&sata {
+	status = "okay";
+	phys = <&comphy2 0>;
+	phy-names = "sata-phy";
+};
+
+/* J1 */
+&sdhci1 {
+	wp-inverted;
+	bus-width = <4>;
+	cd-gpios = <&gpionb 3 GPIO_ACTIVE_LOW>;
+	marvell,pad-type = "sd";
+	vqmmc-supply = <&vcc_sd_reg1>;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&sdio_pins>;
+	status = "okay";
+};
+
+&spi0 {
+	status = "okay";
+
+	flash@0 {
+		reg = <0>;
+		compatible = "jedec,spi-nor";
+		spi-max-frequency = <104000000>;
+		m25p,fast-read;
+	};
+};
+
+/* Exported on the micro USB connector J5 through an FTDI */
+&uart0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart1_pins>;
+	status = "okay";
+};
+
+/*
+ * Connector J17 and J18 expose a number of different features. Some pins are
+ * multiplexed. This is the case for instance for the following features:
+ * - UART1 (pin 24 = RX, pin 26 = TX). See armada-3720-db.dts for an example of
+ *   how to enable it. Beware that the signals are 1.8V TTL.
+ * - I2C
+ * - SPI
+ * - MMC
+ */
+
+/* J7 */
+&usb3 {
+	status = "okay";
+};
+
+/* J8 */
+&usb2 {
+	status = "okay";
+};
+
+&mdio {
+	switch0: switch0@1 {
+		compatible = "marvell,mv88e6085";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <1>;
+
+		dsa,member = <0 0>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+				label = "cpu";
+				ethernet = <&eth0>;
+				phy-mode = "rgmii-id";
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
+			};
+
+			port@1 {
+				reg = <1>;
+				label = "wan";
+				phy-handle = <&switch0phy0>;
+			};
+
+			port@2 {
+				reg = <2>;
+				label = "lan0";
+				phy-handle = <&switch0phy1>;
+			};
+
+			port@3 {
+				reg = <3>;
+				label = "lan1";
+				phy-handle = <&switch0phy2>;
+			};
+
+		};
+
+		mdio {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			switch0phy0: switch0phy0@11 {
+				reg = <0x11>;
+			};
+			switch0phy1: switch0phy1@12 {
+				reg = <0x12>;
+			};
+			switch0phy2: switch0phy2@13 {
+				reg = <0x13>;
+			};
+		};
+	};
+};
+
+&eth0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&rgmii_pins>, <&smi_pins>;
+	phy-mode = "rgmii-id";
+	status = "okay";
+
+	fixed-link {
+		speed = <1000>;
+		full-duplex;
+	};
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
index 5f350cc71a2f..bb42d1e6a4e9 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
@@ -106,6 +106,14 @@
 		/* enabled by U-Boot if SFP module is present */
 		status = "disabled";
 	};
+
+	firmware {
+		turris-mox-rwtm {
+			compatible = "cznic,turris-mox-rwtm";
+			mboxes = <&rwtm 0>;
+			status = "okay";
+		};
+	};
 };
 
 &i2c0 {
diff --git a/arch/arm64/boot/dts/marvell/armada-70x0.dtsi b/arch/arm64/boot/dts/marvell/armada-70x0.dtsi
index e5c6d7c25819..293403a1a333 100644
--- a/arch/arm64/boot/dts/marvell/armada-70x0.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-70x0.dtsi
@@ -17,23 +17,23 @@
 /*
  * Instantiate the CP110
  */
-#define CP110_NAME		cp0
-#define CP110_BASE		f2000000
-#define CP110_PCIE_IO_BASE	0xf9000000
-#define CP110_PCIE_MEM_BASE	0xf6000000
-#define CP110_PCIE0_BASE	f2600000
-#define CP110_PCIE1_BASE	f2620000
-#define CP110_PCIE2_BASE	f2640000
+#define CP11X_NAME		cp0
+#define CP11X_BASE		f2000000
+#define CP11X_PCIEx_MEM_BASE(iface) (0xf6000000 + (iface * 0x1000000))
+#define CP11X_PCIEx_MEM_SIZE(iface) 0xf00000
+#define CP11X_PCIE0_BASE	f2600000
+#define CP11X_PCIE1_BASE	f2620000
+#define CP11X_PCIE2_BASE	f2640000
 
 #include "armada-cp110.dtsi"
 
-#undef CP110_NAME
-#undef CP110_BASE
-#undef CP110_PCIE_IO_BASE
-#undef CP110_PCIE_MEM_BASE
-#undef CP110_PCIE0_BASE
-#undef CP110_PCIE1_BASE
-#undef CP110_PCIE2_BASE
+#undef CP11X_NAME
+#undef CP11X_BASE
+#undef CP11X_PCIEx_MEM_BASE
+#undef CP11X_PCIEx_MEM_SIZE
+#undef CP11X_PCIE0_BASE
+#undef CP11X_PCIE1_BASE
+#undef CP11X_PCIE2_BASE
 
 &cp0_gpio1 {
 	status = "okay";
diff --git a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi
index d250f4b2bfed..572e2610e0a3 100644
--- a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi
@@ -179,8 +179,7 @@
 	num-lanes = <4>;
 	num-viewport = <8>;
 	reset-gpios = <&cp0_gpio2 20 GPIO_ACTIVE_LOW>;
-	ranges = <0x81000000 0x0 0xf9010000 0x0 0xf9010000 0x0 0x10000
-		  0x82000000 0x0 0xc0000000 0x0 0xc0000000 0x0 0x20000000>;
+	ranges = <0x82000000 0x0 0xc0000000 0x0 0xc0000000 0x0 0x20000000>;
 	phys = <&cp0_comphy0 0>, <&cp0_comphy1 0>,
 	       <&cp0_comphy2 0>, <&cp0_comphy3 0>;
 	phy-names = "cp0-pcie0-x4-lane0-phy", "cp0-pcie0-x4-lane1-phy",
diff --git a/arch/arm64/boot/dts/marvell/armada-80x0.dtsi b/arch/arm64/boot/dts/marvell/armada-80x0.dtsi
index 8129b40f12a4..ee67c70bf02e 100644
--- a/arch/arm64/boot/dts/marvell/armada-80x0.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-80x0.dtsi
@@ -19,44 +19,44 @@
 /*
  * Instantiate the master CP110
  */
-#define CP110_NAME		cp0
-#define CP110_BASE		f2000000
-#define CP110_PCIE_IO_BASE	0xf9000000
-#define CP110_PCIE_MEM_BASE	0xf6000000
-#define CP110_PCIE0_BASE	f2600000
-#define CP110_PCIE1_BASE	f2620000
-#define CP110_PCIE2_BASE	f2640000
+#define CP11X_NAME		cp0
+#define CP11X_BASE		f2000000
+#define CP11X_PCIEx_MEM_BASE(iface) (0xf6000000 + (iface * 0x1000000))
+#define CP11X_PCIEx_MEM_SIZE(iface) 0xf00000
+#define CP11X_PCIE0_BASE	f2600000
+#define CP11X_PCIE1_BASE	f2620000
+#define CP11X_PCIE2_BASE	f2640000
 
 #include "armada-cp110.dtsi"
 
-#undef CP110_NAME
-#undef CP110_BASE
-#undef CP110_PCIE_IO_BASE
-#undef CP110_PCIE_MEM_BASE
-#undef CP110_PCIE0_BASE
-#undef CP110_PCIE1_BASE
-#undef CP110_PCIE2_BASE
+#undef CP11X_NAME
+#undef CP11X_BASE
+#undef CP11X_PCIEx_MEM_BASE
+#undef CP11X_PCIEx_MEM_SIZE
+#undef CP11X_PCIE0_BASE
+#undef CP11X_PCIE1_BASE
+#undef CP11X_PCIE2_BASE
 
 /*
  * Instantiate the slave CP110
  */
-#define CP110_NAME		cp1
-#define CP110_BASE		f4000000
-#define CP110_PCIE_IO_BASE	0xfd000000
-#define CP110_PCIE_MEM_BASE	0xfa000000
-#define CP110_PCIE0_BASE	f4600000
-#define CP110_PCIE1_BASE	f4620000
-#define CP110_PCIE2_BASE	f4640000
+#define CP11X_NAME		cp1
+#define CP11X_BASE		f4000000
+#define CP11X_PCIEx_MEM_BASE(iface) (0xfa000000 + (iface * 0x1000000))
+#define CP11X_PCIEx_MEM_SIZE(iface) 0xf00000
+#define CP11X_PCIE0_BASE	f4600000
+#define CP11X_PCIE1_BASE	f4620000
+#define CP11X_PCIE2_BASE	f4640000
 
 #include "armada-cp110.dtsi"
 
-#undef CP110_NAME
-#undef CP110_BASE
-#undef CP110_PCIE_IO_BASE
-#undef CP110_PCIE_MEM_BASE
-#undef CP110_PCIE0_BASE
-#undef CP110_PCIE1_BASE
-#undef CP110_PCIE2_BASE
+#undef CP11X_NAME
+#undef CP11X_BASE
+#undef CP11X_PCIEx_MEM_BASE
+#undef CP11X_PCIEx_MEM_SIZE
+#undef CP11X_PCIE0_BASE
+#undef CP11X_PCIE1_BASE
+#undef CP11X_PCIE2_BASE
 
 /* The 80x0 has two CP blocks, but uses only one block from each. */
 &cp1_gpio1 {
diff --git a/arch/arm64/boot/dts/marvell/armada-ap806-dual.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806-dual.dtsi
index 9024a2d9db07..09849558a776 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap806-dual.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap806-dual.dtsi
@@ -21,6 +21,14 @@
 			reg = <0x000>;
 			enable-method = "psci";
 			#cooling-cells = <2>;
+			clocks = <&cpu_clk 0>;
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2>;
 		};
 		cpu1: cpu@1 {
 			device_type = "cpu";
@@ -28,6 +36,21 @@
 			reg = <0x001>;
 			enable-method = "psci";
 			#cooling-cells = <2>;
+			clocks = <&cpu_clk 0>;
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2>;
+		};
+
+		l2: l2-cache {
+			compatible = "cache";
+			cache-size = <0x80000>;
+			cache-line-size = <64>;
+			cache-sets = <512>;
 		};
 	};
 };
diff --git a/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi
index c25bc65727b5..3db427122f9e 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi
@@ -22,6 +22,13 @@
 			enable-method = "psci";
 			#cooling-cells = <2>;
 			clocks = <&cpu_clk 0>;
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2_0>;
 		};
 		cpu1: cpu@1 {
 			device_type = "cpu";
@@ -30,6 +37,13 @@
 			enable-method = "psci";
 			#cooling-cells = <2>;
 			clocks = <&cpu_clk 0>;
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2_0>;
 		};
 		cpu2: cpu@100 {
 			device_type = "cpu";
@@ -38,6 +52,13 @@
 			enable-method = "psci";
 			#cooling-cells = <2>;
 			clocks = <&cpu_clk 1>;
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2_1>;
 		};
 		cpu3: cpu@101 {
 			device_type = "cpu";
@@ -46,6 +67,27 @@
 			enable-method = "psci";
 			#cooling-cells = <2>;
 			clocks = <&cpu_clk 1>;
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2_1>;
+		};
+
+		l2_0: l2-cache0 {
+			compatible = "cache";
+			cache-size = <0x80000>;
+			cache-line-size = <64>;
+			cache-sets = <512>;
+		};
+
+		l2_1: l2-cache1 {
+			compatible = "cache";
+			cache-size = <0x80000>;
+			cache-line-size = <64>;
+			cache-sets = <512>;
 		};
 	};
 };
diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
index d06dd198f2c7..866628679ac7 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
@@ -5,454 +5,26 @@
  * Device Tree file for Marvell Armada AP806.
  */
 
-#include <dt-bindings/interrupt-controller/arm-gic.h>
-#include <dt-bindings/thermal/thermal.h>
-
-/dts-v1/;
+#define AP_NAME		ap806
+#include "armada-ap80x.dtsi"
 
 / {
 	model = "Marvell Armada AP806";
 	compatible = "marvell,armada-ap806";
-	#address-cells = <2>;
-	#size-cells = <2>;
-
-	aliases {
-		serial0 = &uart0;
-		serial1 = &uart1;
-		gpio0 = &ap_gpio;
-		spi0 = &spi0;
-	};
-
-	psci {
-		compatible = "arm,psci-0.2";
-		method = "smc";
-	};
-
-	reserved-memory {
-		#address-cells = <2>;
-		#size-cells = <2>;
-		ranges;
-
-		/*
-		 * This area matches the mapping done with a
-		 * mainline U-Boot, and should be updated by the
-		 * bootloader.
-		 */
-
-		psci-area@4000000 {
-			reg = <0x0 0x4000000 0x0 0x200000>;
-			no-map;
-		};
-	};
-
-	ap806 {
-		#address-cells = <2>;
-		#size-cells = <2>;
-		compatible = "simple-bus";
-		interrupt-parent = <&gic>;
-		ranges;
-
-		config-space@f0000000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "simple-bus";
-			ranges = <0x0 0x0 0xf0000000 0x1000000>;
-
-			gic: interrupt-controller@210000 {
-				compatible = "arm,gic-400";
-				#interrupt-cells = <3>;
-				#address-cells = <1>;
-				#size-cells = <1>;
-				ranges;
-				interrupt-controller;
-				interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
-				reg = <0x210000 0x10000>,
-				      <0x220000 0x20000>,
-				      <0x240000 0x20000>,
-				      <0x260000 0x20000>;
-
-				gic_v2m0: v2m@280000 {
-					compatible = "arm,gic-v2m-frame";
-					msi-controller;
-					reg = <0x280000 0x1000>;
-					arm,msi-base-spi = <160>;
-					arm,msi-num-spis = <32>;
-				};
-				gic_v2m1: v2m@290000 {
-					compatible = "arm,gic-v2m-frame";
-					msi-controller;
-					reg = <0x290000 0x1000>;
-					arm,msi-base-spi = <192>;
-					arm,msi-num-spis = <32>;
-				};
-				gic_v2m2: v2m@2a0000 {
-					compatible = "arm,gic-v2m-frame";
-					msi-controller;
-					reg = <0x2a0000 0x1000>;
-					arm,msi-base-spi = <224>;
-					arm,msi-num-spis = <32>;
-				};
-				gic_v2m3: v2m@2b0000 {
-					compatible = "arm,gic-v2m-frame";
-					msi-controller;
-					reg = <0x2b0000 0x1000>;
-					arm,msi-base-spi = <256>;
-					arm,msi-num-spis = <32>;
-				};
-			};
-
-			timer {
-				compatible = "arm,armv8-timer";
-				interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
-					     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
-					     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
-					     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
-			};
-
-			pmu {
-				compatible = "arm,cortex-a72-pmu";
-				interrupt-parent = <&pic>;
-				interrupts = <17>;
-			};
-
-			odmi: odmi@300000 {
-				compatible = "marvell,odmi-controller";
-				interrupt-controller;
-				msi-controller;
-				marvell,odmi-frames = <4>;
-				reg = <0x300000 0x4000>,
-				      <0x304000 0x4000>,
-				      <0x308000 0x4000>,
-				      <0x30C000 0x4000>;
-				marvell,spi-base = <128>, <136>, <144>, <152>;
-			};
-
-			gicp: gicp@3f0040 {
-				compatible = "marvell,ap806-gicp";
-				reg = <0x3f0040 0x10>;
-				marvell,spi-ranges = <64 64>, <288 64>;
-				msi-controller;
-			};
-
-			pic: interrupt-controller@3f0100 {
-				compatible = "marvell,armada-8k-pic";
-				reg = <0x3f0100 0x10>;
-				#interrupt-cells = <1>;
-				interrupt-controller;
-				interrupts = <GIC_PPI 15 IRQ_TYPE_LEVEL_HIGH>;
-			};
-
-			sei: interrupt-controller@3f0200 {
-				compatible = "marvell,ap806-sei";
-				reg = <0x3f0200 0x40>;
-				interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>;
-				#interrupt-cells = <1>;
-				interrupt-controller;
-				msi-controller;
-			};
-
-			xor@400000 {
-				compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
-				reg = <0x400000 0x1000>,
-				      <0x410000 0x1000>;
-				msi-parent = <&gic_v2m0>;
-				clocks = <&ap_clk 3>;
-				dma-coherent;
-			};
-
-			xor@420000 {
-				compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
-				reg = <0x420000 0x1000>,
-				      <0x430000 0x1000>;
-				msi-parent = <&gic_v2m0>;
-				clocks = <&ap_clk 3>;
-				dma-coherent;
-			};
-
-			xor@440000 {
-				compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
-				reg = <0x440000 0x1000>,
-				      <0x450000 0x1000>;
-				msi-parent = <&gic_v2m0>;
-				clocks = <&ap_clk 3>;
-				dma-coherent;
-			};
-
-			xor@460000 {
-				compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
-				reg = <0x460000 0x1000>,
-				      <0x470000 0x1000>;
-				msi-parent = <&gic_v2m0>;
-				clocks = <&ap_clk 3>;
-				dma-coherent;
-			};
-
-			spi0: spi@510600 {
-				compatible = "marvell,armada-380-spi";
-				reg = <0x510600 0x50>;
-				#address-cells = <1>;
-				#size-cells = <0>;
-				interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
-				clocks = <&ap_clk 3>;
-				status = "disabled";
-			};
-
-			i2c0: i2c@511000 {
-				compatible = "marvell,mv78230-i2c";
-				reg = <0x511000 0x20>;
-				#address-cells = <1>;
-				#size-cells = <0>;
-				interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
-				timeout-ms = <1000>;
-				clocks = <&ap_clk 3>;
-				status = "disabled";
-			};
-
-			uart0: serial@512000 {
-				compatible = "snps,dw-apb-uart";
-				reg = <0x512000 0x100>;
-				reg-shift = <2>;
-				interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
-				reg-io-width = <1>;
-				clocks = <&ap_clk 3>;
-				status = "disabled";
-			};
-
-			uart1: serial@512100 {
-				compatible = "snps,dw-apb-uart";
-				reg = <0x512100 0x100>;
-				reg-shift = <2>;
-				interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>;
-				reg-io-width = <1>;
-				clocks = <&ap_clk 3>;
-				status = "disabled";
-
-			};
-
-			watchdog: watchdog@610000 {
-				compatible = "arm,sbsa-gwdt";
-				reg = <0x610000 0x1000>, <0x600000 0x1000>;
-				interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
-			};
-
-			ap_sdhci0: sdhci@6e0000 {
-				compatible = "marvell,armada-ap806-sdhci";
-				reg = <0x6e0000 0x300>;
-				interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
-				clock-names = "core";
-				clocks = <&ap_clk 4>;
-				dma-coherent;
-				marvell,xenon-phy-slow-mode;
-				status = "disabled";
-			};
-
-			ap_syscon: system-controller@6f4000 {
-				compatible = "syscon", "simple-mfd";
-				reg = <0x6f4000 0x2000>;
-
-				ap_clk: clock {
-					compatible = "marvell,ap806-clock";
-					#clock-cells = <1>;
-				};
-
-				ap_pinctrl: pinctrl {
-					compatible = "marvell,ap806-pinctrl";
-
-					uart0_pins: uart0-pins {
-						marvell,pins = "mpp11", "mpp19";
-						marvell,function = "uart0";
-					};
-				};
-
-				ap_gpio: gpio@1040 {
-					compatible = "marvell,armada-8k-gpio";
-					offset = <0x1040>;
-					ngpios = <20>;
-					gpio-controller;
-					#gpio-cells = <2>;
-					gpio-ranges = <&ap_pinctrl 0 0 20>;
-				};
-			};
-
-			ap_syscon1: system-controller@6f8000 {
-				compatible = "syscon", "simple-mfd";
-				reg = <0x6f8000 0x1000>;
-				#address-cells = <1>;
-				#size-cells = <1>;
-
-				cpu_clk: clock-cpu@278 {
-					compatible = "marvell,ap806-cpu-clock";
-					clocks = <&ap_clk 0>, <&ap_clk 1>;
-					#clock-cells = <1>;
-					reg = <0x278 0xa30>;
-				};
+};
 
-				ap_thermal: thermal-sensor@80 {
-					compatible = "marvell,armada-ap806-thermal";
-					reg = <0x80 0x10>;
-					interrupt-parent = <&sei>;
-					interrupts = <18>;
-					#thermal-sensor-cells = <1>;
-				};
-			};
-		};
+&ap_syscon0 {
+	ap_clk: clock {
+		compatible = "marvell,ap806-clock";
+		#clock-cells = <1>;
 	};
+};
 
-	/*
-	 * The thermal IP features one internal sensor plus, if applicable, one
-	 * remote channel wired to one sensor per CPU.
-	 *
-	 * Only one thermal zone per AP/CP may trigger interrupts at a time, the
-	 * first one that will have a critical trip point will be chosen.
-	 */
-	thermal-zones {
-		ap_thermal_ic: ap-thermal-ic {
-			polling-delay-passive = <0>; /* Interrupt driven */
-			polling-delay = <0>; /* Interrupt driven */
-
-			thermal-sensors = <&ap_thermal 0>;
-
-			trips {
-				ap_crit: ap-crit {
-					temperature = <100000>; /* mC degrees */
-					hysteresis = <2000>; /* mC degrees */
-					type = "critical";
-				};
-			};
-
-			cooling-maps { };
-		};
-
-		ap_thermal_cpu0: ap-thermal-cpu0 {
-			polling-delay-passive = <1000>;
-			polling-delay = <1000>;
-
-			thermal-sensors = <&ap_thermal 1>;
-
-			trips {
-				cpu0_hot: cpu0-hot {
-					temperature = <85000>;
-					hysteresis = <2000>;
-					type = "passive";
-				};
-				cpu0_emerg: cpu0-emerg {
-					temperature = <95000>;
-					hysteresis = <2000>;
-					type = "passive";
-				};
-			};
-
-			cooling-maps {
-				map0_hot: map0-hot {
-					trip = <&cpu0_hot>;
-					cooling-device = <&cpu0 1 2>,
-						<&cpu1 1 2>;
-				};
-				map0_emerg: map0-ermerg {
-					trip = <&cpu0_emerg>;
-					cooling-device = <&cpu0 3 3>,
-						<&cpu1 3 3>;
-				};
-			};
-		};
-
-		ap_thermal_cpu1: ap-thermal-cpu1 {
-			polling-delay-passive = <1000>;
-			polling-delay = <1000>;
-
-			thermal-sensors = <&ap_thermal 2>;
-
-			trips {
-				cpu1_hot: cpu1-hot {
-					temperature = <85000>;
-					hysteresis = <2000>;
-					type = "passive";
-				};
-				cpu1_emerg: cpu1-emerg {
-					temperature = <95000>;
-					hysteresis = <2000>;
-					type = "passive";
-				};
-			};
-
-			cooling-maps {
-				map1_hot: map1-hot {
-					trip = <&cpu1_hot>;
-					cooling-device = <&cpu0 1 2>,
-						<&cpu1 1 2>;
-				};
-				map1_emerg: map1-emerg {
-					trip = <&cpu1_emerg>;
-					cooling-device = <&cpu0 3 3>,
-						<&cpu1 3 3>;
-				};
-			};
-		};
-
-		ap_thermal_cpu2: ap-thermal-cpu2 {
-			polling-delay-passive = <1000>;
-			polling-delay = <1000>;
-
-			thermal-sensors = <&ap_thermal 3>;
-
-			trips {
-				cpu2_hot: cpu2-hot {
-					temperature = <85000>;
-					hysteresis = <2000>;
-					type = "passive";
-				};
-				cpu2_emerg: cpu2-emerg {
-					temperature = <95000>;
-					hysteresis = <2000>;
-					type = "passive";
-				};
-			};
-
-			cooling-maps {
-				map2_hot: map2-hot {
-					trip = <&cpu2_hot>;
-					cooling-device = <&cpu2 1 2>,
-						<&cpu3 1 2>;
-				};
-				map2_emerg: map2-emerg {
-					trip = <&cpu2_emerg>;
-					cooling-device = <&cpu2 3 3>,
-						<&cpu3 3 3>;
-				};
-			};
-		};
-
-		ap_thermal_cpu3: ap-thermal-cpu3 {
-			polling-delay-passive = <1000>;
-			polling-delay = <1000>;
-
-			thermal-sensors = <&ap_thermal 4>;
-
-			trips {
-				cpu3_hot: cpu3-hot {
-					temperature = <85000>;
-					hysteresis = <2000>;
-					type = "passive";
-				};
-				cpu3_emerg: cpu3-emerg {
-					temperature = <95000>;
-					hysteresis = <2000>;
-					type = "passive";
-				};
-			};
-
-			cooling-maps {
-				map3_hot: map3-bhot {
-					trip = <&cpu3_hot>;
-					cooling-device = <&cpu2 1 2>,
-						<&cpu3 1 2>;
-				};
-				map3_emerg: map3-emerg {
-					trip = <&cpu3_emerg>;
-					cooling-device = <&cpu2 3 3>,
-						<&cpu3 3 3>;
-				};
-			};
-		};
+&ap_syscon1 {
+	cpu_clk: clock-cpu@278 {
+		compatible = "marvell,ap806-cpu-clock";
+		clocks = <&ap_clk 0>, <&ap_clk 1>;
+		#clock-cells = <1>;
+		reg = <0x278 0xa30>;
 	};
 };
diff --git a/arch/arm64/boot/dts/marvell/armada-ap807-quad.dtsi b/arch/arm64/boot/dts/marvell/armada-ap807-quad.dtsi
new file mode 100644
index 000000000000..840466e143b4
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/armada-ap807-quad.dtsi
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Device Tree file for Marvell Armada AP807 Quad
+ *
+ * Copyright (C) 2019 Marvell Technology Group Ltd.
+ */
+
+#include "armada-ap807.dtsi"
+
+/ {
+	model = "Marvell Armada AP807 Quad";
+	compatible = "marvell,armada-ap807-quad", "marvell,armada-ap807";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a72", "arm,armv8";
+			reg = <0x000>;
+			enable-method = "psci";
+			#cooling-cells = <2>;
+			clocks = <&cpu_clk 0>;
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2_0>;
+		};
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a72", "arm,armv8";
+			reg = <0x001>;
+			enable-method = "psci";
+			#cooling-cells = <2>;
+			clocks = <&cpu_clk 0>;
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2_0>;
+		};
+		cpu2: cpu@100 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a72", "arm,armv8";
+			reg = <0x100>;
+			enable-method = "psci";
+			#cooling-cells = <2>;
+			clocks = <&cpu_clk 1>;
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2_1>;
+		};
+		cpu3: cpu@101 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a72", "arm,armv8";
+			reg = <0x101>;
+			enable-method = "psci";
+			#cooling-cells = <2>;
+			clocks = <&cpu_clk 1>;
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2_1>;
+		};
+
+		l2_0: l2-cache0 {
+			compatible = "cache";
+			cache-size = <0x80000>;
+			cache-line-size = <64>;
+			cache-sets = <512>;
+		};
+
+		l2_1: l2-cache1 {
+			compatible = "cache";
+			cache-size = <0x80000>;
+			cache-line-size = <64>;
+			cache-sets = <512>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-ap807.dtsi b/arch/arm64/boot/dts/marvell/armada-ap807.dtsi
new file mode 100644
index 000000000000..623010f3ca89
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/armada-ap807.dtsi
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Device Tree file for Marvell Armada AP807
+ *
+ * Copyright (C) 2019 Marvell Technology Group Ltd.
+ */
+
+#define AP_NAME		ap807
+#include "armada-ap80x.dtsi"
+
+/ {
+	model = "Marvell Armada AP807";
+	compatible = "marvell,armada-ap807";
+};
+
+&ap_syscon0 {
+	ap_clk: clock {
+		compatible = "marvell,ap807-clock";
+		#clock-cells = <1>;
+	};
+};
+
+&ap_syscon1 {
+	cpu_clk: clock-cpu {
+		compatible = "marvell,ap807-cpu-clock";
+		clocks = <&ap_clk 0>, <&ap_clk 1>;
+		#clock-cells = <1>;
+	};
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
new file mode 100644
index 000000000000..e7438c21ccee
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Marvell Technology Group Ltd.
+ *
+ * Device Tree file for Marvell Armada AP80x.
+ */
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/thermal/thermal.h>
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		serial0 = &uart0;
+		serial1 = &uart1;
+		gpio0 = &ap_gpio;
+		spi0 = &spi0;
+	};
+
+	psci {
+		compatible = "arm,psci-0.2";
+		method = "smc";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		/*
+		 * This area matches the mapping done with a
+		 * mainline U-Boot, and should be updated by the
+		 * bootloader.
+		 */
+
+		psci-area@4000000 {
+			reg = <0x0 0x4000000 0x0 0x200000>;
+			no-map;
+		};
+	};
+
+	AP_NAME {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		compatible = "simple-bus";
+		interrupt-parent = <&gic>;
+		ranges;
+
+		config-space@f0000000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "simple-bus";
+			ranges = <0x0 0x0 0xf0000000 0x1000000>;
+
+			gic: interrupt-controller@210000 {
+				compatible = "arm,gic-400";
+				#interrupt-cells = <3>;
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges;
+				interrupt-controller;
+				interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+				reg = <0x210000 0x10000>,
+				      <0x220000 0x20000>,
+				      <0x240000 0x20000>,
+				      <0x260000 0x20000>;
+
+				gic_v2m0: v2m@280000 {
+					compatible = "arm,gic-v2m-frame";
+					msi-controller;
+					reg = <0x280000 0x1000>;
+					arm,msi-base-spi = <160>;
+					arm,msi-num-spis = <32>;
+				};
+				gic_v2m1: v2m@290000 {
+					compatible = "arm,gic-v2m-frame";
+					msi-controller;
+					reg = <0x290000 0x1000>;
+					arm,msi-base-spi = <192>;
+					arm,msi-num-spis = <32>;
+				};
+				gic_v2m2: v2m@2a0000 {
+					compatible = "arm,gic-v2m-frame";
+					msi-controller;
+					reg = <0x2a0000 0x1000>;
+					arm,msi-base-spi = <224>;
+					arm,msi-num-spis = <32>;
+				};
+				gic_v2m3: v2m@2b0000 {
+					compatible = "arm,gic-v2m-frame";
+					msi-controller;
+					reg = <0x2b0000 0x1000>;
+					arm,msi-base-spi = <256>;
+					arm,msi-num-spis = <32>;
+				};
+			};
+
+			timer {
+				compatible = "arm,armv8-timer";
+				interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+					     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+					     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+					     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+			};
+
+			pmu {
+				compatible = "arm,cortex-a72-pmu";
+				interrupt-parent = <&pic>;
+				interrupts = <17>;
+			};
+
+			odmi: odmi@300000 {
+				compatible = "marvell,odmi-controller";
+				interrupt-controller;
+				msi-controller;
+				marvell,odmi-frames = <4>;
+				reg = <0x300000 0x4000>,
+				      <0x304000 0x4000>,
+				      <0x308000 0x4000>,
+				      <0x30C000 0x4000>;
+				marvell,spi-base = <128>, <136>, <144>, <152>;
+			};
+
+			gicp: gicp@3f0040 {
+				compatible = "marvell,ap806-gicp";
+				reg = <0x3f0040 0x10>;
+				marvell,spi-ranges = <64 64>, <288 64>;
+				msi-controller;
+			};
+
+			pic: interrupt-controller@3f0100 {
+				compatible = "marvell,armada-8k-pic";
+				reg = <0x3f0100 0x10>;
+				#interrupt-cells = <1>;
+				interrupt-controller;
+				interrupts = <GIC_PPI 15 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			sei: interrupt-controller@3f0200 {
+				compatible = "marvell,ap806-sei";
+				reg = <0x3f0200 0x40>;
+				interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>;
+				#interrupt-cells = <1>;
+				interrupt-controller;
+				msi-controller;
+			};
+
+			xor@400000 {
+				compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
+				reg = <0x400000 0x1000>,
+				      <0x410000 0x1000>;
+				msi-parent = <&gic_v2m0>;
+				clocks = <&ap_clk 3>;
+				dma-coherent;
+			};
+
+			xor@420000 {
+				compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
+				reg = <0x420000 0x1000>,
+				      <0x430000 0x1000>;
+				msi-parent = <&gic_v2m0>;
+				clocks = <&ap_clk 3>;
+				dma-coherent;
+			};
+
+			xor@440000 {
+				compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
+				reg = <0x440000 0x1000>,
+				      <0x450000 0x1000>;
+				msi-parent = <&gic_v2m0>;
+				clocks = <&ap_clk 3>;
+				dma-coherent;
+			};
+
+			xor@460000 {
+				compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
+				reg = <0x460000 0x1000>,
+				      <0x470000 0x1000>;
+				msi-parent = <&gic_v2m0>;
+				clocks = <&ap_clk 3>;
+				dma-coherent;
+			};
+
+			spi0: spi@510600 {
+				compatible = "marvell,armada-380-spi";
+				reg = <0x510600 0x50>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&ap_clk 3>;
+				status = "disabled";
+			};
+
+			i2c0: i2c@511000 {
+				compatible = "marvell,mv78230-i2c";
+				reg = <0x511000 0x20>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+				timeout-ms = <1000>;
+				clocks = <&ap_clk 3>;
+				status = "disabled";
+			};
+
+			uart0: serial@512000 {
+				compatible = "snps,dw-apb-uart";
+				reg = <0x512000 0x100>;
+				reg-shift = <2>;
+				interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+				reg-io-width = <1>;
+				clocks = <&ap_clk 3>;
+				status = "disabled";
+			};
+
+			uart1: serial@512100 {
+				compatible = "snps,dw-apb-uart";
+				reg = <0x512100 0x100>;
+				reg-shift = <2>;
+				interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>;
+				reg-io-width = <1>;
+				clocks = <&ap_clk 3>;
+				status = "disabled";
+
+			};
+
+			watchdog: watchdog@610000 {
+				compatible = "arm,sbsa-gwdt";
+				reg = <0x610000 0x1000>, <0x600000 0x1000>;
+				interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			ap_sdhci0: sdhci@6e0000 {
+				compatible = "marvell,armada-ap806-sdhci";
+				reg = <0x6e0000 0x300>;
+				interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
+				clock-names = "core";
+				clocks = <&ap_clk 4>;
+				dma-coherent;
+				marvell,xenon-phy-slow-mode;
+				status = "disabled";
+			};
+
+			ap_syscon0: system-controller@6f4000 {
+				compatible = "syscon", "simple-mfd";
+				reg = <0x6f4000 0x2000>;
+
+				ap_pinctrl: pinctrl {
+					compatible = "marvell,ap806-pinctrl";
+
+					uart0_pins: uart0-pins {
+						marvell,pins = "mpp11", "mpp19";
+						marvell,function = "uart0";
+					};
+				};
+
+				ap_gpio: gpio@1040 {
+					compatible = "marvell,armada-8k-gpio";
+					offset = <0x1040>;
+					ngpios = <20>;
+					gpio-controller;
+					#gpio-cells = <2>;
+					gpio-ranges = <&ap_pinctrl 0 0 20>;
+				};
+			};
+
+			ap_syscon1: system-controller@6f8000 {
+				compatible = "syscon", "simple-mfd";
+				reg = <0x6f8000 0x1000>;
+				#address-cells = <1>;
+				#size-cells = <1>;
+
+				ap_thermal: thermal-sensor@80 {
+					compatible = "marvell,armada-ap806-thermal";
+					reg = <0x80 0x10>;
+					interrupt-parent = <&sei>;
+					interrupts = <18>;
+					#thermal-sensor-cells = <1>;
+				};
+			};
+		};
+	};
+
+	/*
+	 * The thermal IP features one internal sensor plus, if applicable, one
+	 * remote channel wired to one sensor per CPU.
+	 *
+	 * Only one thermal zone per AP/CP may trigger interrupts at a time, the
+	 * first one that will have a critical trip point will be chosen.
+	 */
+	thermal-zones {
+		ap_thermal_ic: ap-thermal-ic {
+			polling-delay-passive = <0>; /* Interrupt driven */
+			polling-delay = <0>; /* Interrupt driven */
+
+			thermal-sensors = <&ap_thermal 0>;
+
+			trips {
+				ap_crit: ap-crit {
+					temperature = <100000>; /* mC degrees */
+					hysteresis = <2000>; /* mC degrees */
+					type = "critical";
+				};
+			};
+
+			cooling-maps { };
+		};
+
+		ap_thermal_cpu0: ap-thermal-cpu0 {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors = <&ap_thermal 1>;
+
+			trips {
+				cpu0_hot: cpu0-hot {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				cpu0_emerg: cpu0-emerg {
+					temperature = <95000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+			};
+
+			cooling-maps {
+				map0_hot: map0-hot {
+					trip = <&cpu0_hot>;
+					cooling-device = <&cpu0 1 2>,
+						<&cpu1 1 2>;
+				};
+				map0_emerg: map0-ermerg {
+					trip = <&cpu0_emerg>;
+					cooling-device = <&cpu0 3 3>,
+						<&cpu1 3 3>;
+				};
+			};
+		};
+
+		ap_thermal_cpu1: ap-thermal-cpu1 {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors = <&ap_thermal 2>;
+
+			trips {
+				cpu1_hot: cpu1-hot {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				cpu1_emerg: cpu1-emerg {
+					temperature = <95000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+			};
+
+			cooling-maps {
+				map1_hot: map1-hot {
+					trip = <&cpu1_hot>;
+					cooling-device = <&cpu0 1 2>,
+						<&cpu1 1 2>;
+				};
+				map1_emerg: map1-emerg {
+					trip = <&cpu1_emerg>;
+					cooling-device = <&cpu0 3 3>,
+						<&cpu1 3 3>;
+				};
+			};
+		};
+
+		ap_thermal_cpu2: ap-thermal-cpu2 {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors = <&ap_thermal 3>;
+
+			trips {
+				cpu2_hot: cpu2-hot {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				cpu2_emerg: cpu2-emerg {
+					temperature = <95000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+			};
+
+			cooling-maps {
+				map2_hot: map2-hot {
+					trip = <&cpu2_hot>;
+					cooling-device = <&cpu2 1 2>,
+						<&cpu3 1 2>;
+				};
+				map2_emerg: map2-emerg {
+					trip = <&cpu2_emerg>;
+					cooling-device = <&cpu2 3 3>,
+						<&cpu3 3 3>;
+				};
+			};
+		};
+
+		ap_thermal_cpu3: ap-thermal-cpu3 {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors = <&ap_thermal 4>;
+
+			trips {
+				cpu3_hot: cpu3-hot {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				cpu3_emerg: cpu3-emerg {
+					temperature = <95000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+			};
+
+			cooling-maps {
+				map3_hot: map3-bhot {
+					trip = <&cpu3_hot>;
+					cooling-device = <&cpu2 1 2>,
+						<&cpu3 1 2>;
+				};
+				map3_emerg: map3-emerg {
+					trip = <&cpu3_emerg>;
+					cooling-device = <&cpu2 3 3>,
+						<&cpu3 3 3>;
+				};
+			};
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-common.dtsi b/arch/arm64/boot/dts/marvell/armada-common.dtsi
index b29c6405d214..c04c6c475022 100644
--- a/arch/arm64/boot/dts/marvell/armada-common.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-common.dtsi
@@ -6,6 +6,6 @@
 /* Common definitions used by Armada 7K/8K DTs */
 #define PASTER(x, y) x ## y
 #define EVALUATOR(x, y) PASTER(x, y)
-#define CP110_LABEL(name) EVALUATOR(CP110_NAME, EVALUATOR(_, name))
-#define CP110_NODE_NAME(name) EVALUATOR(CP110_NAME, EVALUATOR(-, name))
+#define CP11X_LABEL(name) EVALUATOR(CP11X_NAME, EVALUATOR(_, name))
+#define CP11X_NODE_NAME(name) EVALUATOR(CP11X_NAME, EVALUATOR(-, name))
 #define ADDRESSIFY(addr) EVALUATOR(0x, addr)
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi
index d81944902650..4fd33b0fa56e 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi
@@ -1,579 +1,12 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR MIT)
 /*
- * Copyright (C) 2016 Marvell Technology Group Ltd.
+ * Copyright (C) 2019 Marvell Technology Group Ltd.
  *
  * Device Tree file for Marvell Armada CP110.
  */
 
-#include <dt-bindings/interrupt-controller/mvebu-icu.h>
-#include <dt-bindings/thermal/thermal.h>
+#define CP11X_TYPE cp110
 
-#include "armada-common.dtsi"
+#include "armada-cp11x.dtsi"
 
-#define CP110_PCIEx_IO_BASE(iface)	(CP110_PCIE_IO_BASE + (iface *  0x10000))
-#define CP110_PCIEx_MEM_BASE(iface)	(CP110_PCIE_MEM_BASE + (iface *  0x1000000))
-#define CP110_PCIEx_CONF_BASE(iface)	(CP110_PCIEx_MEM_BASE(iface) + 0xf00000)
-
-/ {
-	/*
-	 * The contents of the node are defined below, in order to
-	 * save one indentation level
-	 */
-	CP110_NAME: CP110_NAME { };
-
-	/*
-	 * CPs only have one sensor in the thermal IC.
-	 *
-	 * The cooling maps are empty as there are no cooling devices.
-	 */
-	thermal-zones {
-		CP110_LABEL(thermal_ic): CP110_NODE_NAME(thermal-ic) {
-			polling-delay-passive = <0>; /* Interrupt driven */
-			polling-delay = <0>; /* Interrupt driven */
-
-			thermal-sensors = <&CP110_LABEL(thermal) 0>;
-
-			trips {
-				CP110_LABEL(crit): crit {
-					temperature = <100000>; /* mC degrees */
-					hysteresis = <2000>; /* mC degrees */
-					type = "critical";
-				};
-			};
-
-			cooling-maps { };
-		};
-	};
-};
-
-&CP110_NAME {
-	#address-cells = <2>;
-	#size-cells = <2>;
-	compatible = "simple-bus";
-	interrupt-parent = <&CP110_LABEL(icu_nsr)>;
-	ranges;
-
-	config-space@CP110_BASE {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		compatible = "simple-bus";
-		ranges = <0x0 0x0 ADDRESSIFY(CP110_BASE) 0x2000000>;
-
-		CP110_LABEL(ethernet): ethernet@0 {
-			compatible = "marvell,armada-7k-pp22";
-			reg = <0x0 0x100000>, <0x129000 0xb000>;
-			clocks = <&CP110_LABEL(clk) 1 3>, <&CP110_LABEL(clk) 1 9>,
-				 <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 6>,
-				 <&CP110_LABEL(clk) 1 18>;
-			clock-names = "pp_clk", "gop_clk",
-				      "mg_clk", "mg_core_clk", "axi_clk";
-			marvell,system-controller = <&CP110_LABEL(syscon0)>;
-			status = "disabled";
-			dma-coherent;
-
-			CP110_LABEL(eth0): eth0 {
-				interrupts = <39 IRQ_TYPE_LEVEL_HIGH>,
-					<43 IRQ_TYPE_LEVEL_HIGH>,
-					<47 IRQ_TYPE_LEVEL_HIGH>,
-					<51 IRQ_TYPE_LEVEL_HIGH>,
-					<55 IRQ_TYPE_LEVEL_HIGH>,
-					<59 IRQ_TYPE_LEVEL_HIGH>,
-					<63 IRQ_TYPE_LEVEL_HIGH>,
-					<67 IRQ_TYPE_LEVEL_HIGH>,
-					<71 IRQ_TYPE_LEVEL_HIGH>,
-					<129 IRQ_TYPE_LEVEL_HIGH>;
-				interrupt-names = "hif0", "hif1", "hif2",
-					"hif3", "hif4", "hif5", "hif6", "hif7",
-					"hif8", "link";
-				port-id = <0>;
-				gop-port-id = <0>;
-				status = "disabled";
-			};
-
-			CP110_LABEL(eth1): eth1 {
-				interrupts = <40 IRQ_TYPE_LEVEL_HIGH>,
-					<44 IRQ_TYPE_LEVEL_HIGH>,
-					<48 IRQ_TYPE_LEVEL_HIGH>,
-					<52 IRQ_TYPE_LEVEL_HIGH>,
-					<56 IRQ_TYPE_LEVEL_HIGH>,
-					<60 IRQ_TYPE_LEVEL_HIGH>,
-					<64 IRQ_TYPE_LEVEL_HIGH>,
-					<68 IRQ_TYPE_LEVEL_HIGH>,
-					<72 IRQ_TYPE_LEVEL_HIGH>,
-					<128 IRQ_TYPE_LEVEL_HIGH>;
-				interrupt-names = "hif0", "hif1", "hif2",
-					"hif3", "hif4", "hif5", "hif6", "hif7",
-					"hif8", "link";
-				port-id = <1>;
-				gop-port-id = <2>;
-				status = "disabled";
-			};
-
-			CP110_LABEL(eth2): eth2 {
-				interrupts = <41 IRQ_TYPE_LEVEL_HIGH>,
-					<45 IRQ_TYPE_LEVEL_HIGH>,
-					<49 IRQ_TYPE_LEVEL_HIGH>,
-					<53 IRQ_TYPE_LEVEL_HIGH>,
-					<57 IRQ_TYPE_LEVEL_HIGH>,
-					<61 IRQ_TYPE_LEVEL_HIGH>,
-					<65 IRQ_TYPE_LEVEL_HIGH>,
-					<69 IRQ_TYPE_LEVEL_HIGH>,
-					<73 IRQ_TYPE_LEVEL_HIGH>,
-					<127 IRQ_TYPE_LEVEL_HIGH>;
-				interrupt-names = "hif0", "hif1", "hif2",
-					"hif3", "hif4", "hif5", "hif6", "hif7",
-					"hif8", "link";
-				port-id = <2>;
-				gop-port-id = <3>;
-				status = "disabled";
-			};
-		};
-
-		CP110_LABEL(comphy): phy@120000 {
-			compatible = "marvell,comphy-cp110";
-			reg = <0x120000 0x6000>;
-			marvell,system-controller = <&CP110_LABEL(syscon0)>;
-			clocks = <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 6>,
-				 <&CP110_LABEL(clk) 1 18>;
-			clock-names = "mg_clk", "mg_core_clk", "axi_clk";
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			CP110_LABEL(comphy0): phy@0 {
-				reg = <0>;
-				#phy-cells = <1>;
-			};
-
-			CP110_LABEL(comphy1): phy@1 {
-				reg = <1>;
-				#phy-cells = <1>;
-			};
-
-			CP110_LABEL(comphy2): phy@2 {
-				reg = <2>;
-				#phy-cells = <1>;
-			};
-
-			CP110_LABEL(comphy3): phy@3 {
-				reg = <3>;
-				#phy-cells = <1>;
-			};
-
-			CP110_LABEL(comphy4): phy@4 {
-				reg = <4>;
-				#phy-cells = <1>;
-			};
-
-			CP110_LABEL(comphy5): phy@5 {
-				reg = <5>;
-				#phy-cells = <1>;
-			};
-		};
-
-		CP110_LABEL(mdio): mdio@12a200 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "marvell,orion-mdio";
-			reg = <0x12a200 0x10>;
-			clocks = <&CP110_LABEL(clk) 1 9>, <&CP110_LABEL(clk) 1 5>,
-				 <&CP110_LABEL(clk) 1 6>, <&CP110_LABEL(clk) 1 18>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(xmdio): mdio@12a600 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "marvell,xmdio";
-			reg = <0x12a600 0x10>;
-			clocks = <&CP110_LABEL(clk) 1 5>,
-				 <&CP110_LABEL(clk) 1 6>, <&CP110_LABEL(clk) 1 18>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(icu): interrupt-controller@1e0000 {
-			compatible = "marvell,cp110-icu";
-			reg = <0x1e0000 0x440>;
-			#address-cells = <1>;
-			#size-cells = <1>;
-
-			CP110_LABEL(icu_nsr): interrupt-controller@10 {
-				compatible = "marvell,cp110-icu-nsr";
-				reg = <0x10 0x20>;
-				#interrupt-cells = <2>;
-				interrupt-controller;
-				msi-parent = <&gicp>;
-			};
-
-			CP110_LABEL(icu_sei): interrupt-controller@50 {
-				compatible = "marvell,cp110-icu-sei";
-				reg = <0x50 0x10>;
-				#interrupt-cells = <2>;
-				interrupt-controller;
-				msi-parent = <&sei>;
-			};
-		};
-
-		CP110_LABEL(rtc): rtc@284000 {
-			compatible = "marvell,armada-8k-rtc";
-			reg = <0x284000 0x20>, <0x284080 0x24>;
-			reg-names = "rtc", "rtc-soc";
-			interrupts = <77 IRQ_TYPE_LEVEL_HIGH>;
-		};
-
-		CP110_LABEL(syscon0): system-controller@440000 {
-			compatible = "syscon", "simple-mfd";
-			reg = <0x440000 0x2000>;
-
-			CP110_LABEL(clk): clock {
-				compatible = "marvell,cp110-clock";
-				#clock-cells = <2>;
-			};
-
-			CP110_LABEL(gpio1): gpio@100 {
-				compatible = "marvell,armada-8k-gpio";
-				offset = <0x100>;
-				ngpios = <32>;
-				gpio-controller;
-				#gpio-cells = <2>;
-				gpio-ranges = <&CP110_LABEL(pinctrl) 0 0 32>;
-				interrupt-controller;
-				interrupts = <86 IRQ_TYPE_LEVEL_HIGH>,
-					<85 IRQ_TYPE_LEVEL_HIGH>,
-					<84 IRQ_TYPE_LEVEL_HIGH>,
-					<83 IRQ_TYPE_LEVEL_HIGH>;
-				#interrupt-cells = <2>;
-				status = "disabled";
-			};
-
-			CP110_LABEL(gpio2): gpio@140 {
-				compatible = "marvell,armada-8k-gpio";
-				offset = <0x140>;
-				ngpios = <31>;
-				gpio-controller;
-				#gpio-cells = <2>;
-				gpio-ranges = <&CP110_LABEL(pinctrl) 0 32 31>;
-				interrupt-controller;
-				interrupts = <82 IRQ_TYPE_LEVEL_HIGH>,
-					<81 IRQ_TYPE_LEVEL_HIGH>,
-					<80 IRQ_TYPE_LEVEL_HIGH>,
-					<79 IRQ_TYPE_LEVEL_HIGH>;
-				#interrupt-cells = <2>;
-				status = "disabled";
-			};
-		};
-
-		CP110_LABEL(syscon1): system-controller@400000 {
-			compatible = "syscon", "simple-mfd";
-			reg = <0x400000 0x1000>;
-			#address-cells = <1>;
-			#size-cells = <1>;
-
-			CP110_LABEL(thermal): thermal-sensor@70 {
-				compatible = "marvell,armada-cp110-thermal";
-				reg = <0x70 0x10>;
-				interrupts-extended =
-					<&CP110_LABEL(icu_sei) 116 IRQ_TYPE_LEVEL_HIGH>;
-				#thermal-sensor-cells = <1>;
-			};
-		};
-
-		CP110_LABEL(usb3_0): usb3@500000 {
-			compatible = "marvell,armada-8k-xhci",
-			"generic-xhci";
-			reg = <0x500000 0x4000>;
-			dma-coherent;
-			interrupts = <106 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "core", "reg";
-			clocks = <&CP110_LABEL(clk) 1 22>,
-				 <&CP110_LABEL(clk) 1 16>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(usb3_1): usb3@510000 {
-			compatible = "marvell,armada-8k-xhci",
-			"generic-xhci";
-			reg = <0x510000 0x4000>;
-			dma-coherent;
-			interrupts = <105 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "core", "reg";
-			clocks = <&CP110_LABEL(clk) 1 23>,
-				 <&CP110_LABEL(clk) 1 16>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(sata0): sata@540000 {
-			compatible = "marvell,armada-8k-ahci",
-			"generic-ahci";
-			reg = <0x540000 0x30000>;
-			dma-coherent;
-			interrupts = <107 IRQ_TYPE_LEVEL_HIGH>;
-			clocks = <&CP110_LABEL(clk) 1 15>,
-				 <&CP110_LABEL(clk) 1 16>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			status = "disabled";
-
-			sata-port@0 {
-				reg = <0>;
-			};
-
-			sata-port@1 {
-				reg = <1>;
-			};
-		};
-
-		CP110_LABEL(xor0): xor@6a0000 {
-			compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
-			reg = <0x6a0000 0x1000>, <0x6b0000 0x1000>;
-			dma-coherent;
-			msi-parent = <&gic_v2m0>;
-			clock-names = "core", "reg";
-			clocks = <&CP110_LABEL(clk) 1 8>,
-				 <&CP110_LABEL(clk) 1 14>;
-		};
-
-		CP110_LABEL(xor1): xor@6c0000 {
-			compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
-			reg = <0x6c0000 0x1000>, <0x6d0000 0x1000>;
-			dma-coherent;
-			msi-parent = <&gic_v2m0>;
-			clock-names = "core", "reg";
-			clocks = <&CP110_LABEL(clk) 1 7>,
-				 <&CP110_LABEL(clk) 1 14>;
-		};
-
-		CP110_LABEL(spi0): spi@700600 {
-			compatible = "marvell,armada-380-spi";
-			reg = <0x700600 0x50>;
-			#address-cells = <0x1>;
-			#size-cells = <0x0>;
-			clock-names = "core", "axi";
-			clocks = <&CP110_LABEL(clk) 1 21>,
-				 <&CP110_LABEL(clk) 1 17>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(spi1): spi@700680 {
-			compatible = "marvell,armada-380-spi";
-			reg = <0x700680 0x50>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			clock-names = "core", "axi";
-			clocks = <&CP110_LABEL(clk) 1 21>,
-				 <&CP110_LABEL(clk) 1 17>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(i2c0): i2c@701000 {
-			compatible = "marvell,mv78230-i2c";
-			reg = <0x701000 0x20>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			interrupts = <120 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "core", "reg";
-			clocks = <&CP110_LABEL(clk) 1 21>,
-				 <&CP110_LABEL(clk) 1 17>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(i2c1): i2c@701100 {
-			compatible = "marvell,mv78230-i2c";
-			reg = <0x701100 0x20>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			interrupts = <121 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "core", "reg";
-			clocks = <&CP110_LABEL(clk) 1 21>,
-				 <&CP110_LABEL(clk) 1 17>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(uart0): serial@702000 {
-			compatible = "snps,dw-apb-uart";
-			reg = <0x702000 0x100>;
-			reg-shift = <2>;
-			interrupts = <122 IRQ_TYPE_LEVEL_HIGH>;
-			reg-io-width = <1>;
-			clock-names = "baudclk", "apb_pclk";
-			clocks = <&CP110_LABEL(clk) 1 21>,
-				 <&CP110_LABEL(clk) 1 17>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(uart1): serial@702100 {
-			compatible = "snps,dw-apb-uart";
-			reg = <0x702100 0x100>;
-			reg-shift = <2>;
-			interrupts = <123 IRQ_TYPE_LEVEL_HIGH>;
-			reg-io-width = <1>;
-			clock-names = "baudclk", "apb_pclk";
-			clocks = <&CP110_LABEL(clk) 1 21>,
-				 <&CP110_LABEL(clk) 1 17>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(uart2): serial@702200 {
-			compatible = "snps,dw-apb-uart";
-			reg = <0x702200 0x100>;
-			reg-shift = <2>;
-			interrupts = <124 IRQ_TYPE_LEVEL_HIGH>;
-			reg-io-width = <1>;
-			clock-names = "baudclk", "apb_pclk";
-			clocks = <&CP110_LABEL(clk) 1 21>,
-				 <&CP110_LABEL(clk) 1 17>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(uart3): serial@702300 {
-			compatible = "snps,dw-apb-uart";
-			reg = <0x702300 0x100>;
-			reg-shift = <2>;
-			interrupts = <125 IRQ_TYPE_LEVEL_HIGH>;
-			reg-io-width = <1>;
-			clock-names = "baudclk", "apb_pclk";
-			clocks = <&CP110_LABEL(clk) 1 21>,
-				 <&CP110_LABEL(clk) 1 17>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(nand_controller): nand@720000 {
-			/*
-			* Due to the limitation of the pins available
-			* this controller is only usable on the CPM
-			* for A7K and on the CPS for A8K.
-			*/
-			compatible = "marvell,armada-8k-nand-controller",
-				"marvell,armada370-nand-controller";
-			reg = <0x720000 0x54>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			interrupts = <115 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "core", "reg";
-			clocks = <&CP110_LABEL(clk) 1 2>,
-				 <&CP110_LABEL(clk) 1 17>;
-			marvell,system-controller = <&CP110_LABEL(syscon0)>;
-			status = "disabled";
-		};
-
-		CP110_LABEL(trng): trng@760000 {
-			compatible = "marvell,armada-8k-rng",
-			"inside-secure,safexcel-eip76";
-			reg = <0x760000 0x7d>;
-			interrupts = <95 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "core", "reg";
-			clocks = <&CP110_LABEL(clk) 1 25>,
-				 <&CP110_LABEL(clk) 1 17>;
-			status = "okay";
-		};
-
-		CP110_LABEL(sdhci0): sdhci@780000 {
-			compatible = "marvell,armada-cp110-sdhci";
-			reg = <0x780000 0x300>;
-			interrupts = <27 IRQ_TYPE_LEVEL_HIGH>;
-			clock-names = "core", "axi";
-			clocks = <&CP110_LABEL(clk) 1 4>, <&CP110_LABEL(clk) 1 18>;
-			dma-coherent;
-			status = "disabled";
-		};
-
-		CP110_LABEL(crypto): crypto@800000 {
-			compatible = "inside-secure,safexcel-eip197b";
-			reg = <0x800000 0x200000>;
-			interrupts = <87 IRQ_TYPE_LEVEL_HIGH>,
-				<88 IRQ_TYPE_LEVEL_HIGH>,
-				<89 IRQ_TYPE_LEVEL_HIGH>,
-				<90 IRQ_TYPE_LEVEL_HIGH>,
-				<91 IRQ_TYPE_LEVEL_HIGH>,
-				<92 IRQ_TYPE_LEVEL_HIGH>;
-			interrupt-names = "mem", "ring0", "ring1",
-				"ring2", "ring3", "eip";
-			clock-names = "core", "reg";
-			clocks = <&CP110_LABEL(clk) 1 26>,
-				 <&CP110_LABEL(clk) 1 17>;
-			dma-coherent;
-		};
-	};
-
-	CP110_LABEL(pcie0): pcie@CP110_PCIE0_BASE {
-		compatible = "marvell,armada8k-pcie", "snps,dw-pcie";
-		reg = <0 ADDRESSIFY(CP110_PCIE0_BASE) 0 0x10000>,
-		      <0 CP110_PCIEx_CONF_BASE(0) 0 0x80000>;
-		reg-names = "ctrl", "config";
-		#address-cells = <3>;
-		#size-cells = <2>;
-		#interrupt-cells = <1>;
-		device_type = "pci";
-		dma-coherent;
-		msi-parent = <&gic_v2m0>;
-
-		bus-range = <0 0xff>;
-		ranges =
-		/* downstream I/O */
-		<0x81000000 0 CP110_PCIEx_IO_BASE(0) 0  CP110_PCIEx_IO_BASE(0) 0 0x10000
-		/* non-prefetchable memory */
-		0x82000000 0 CP110_PCIEx_MEM_BASE(0) 0  CP110_PCIEx_MEM_BASE(0) 0 0xf00000>;
-		interrupt-map-mask = <0 0 0 0>;
-		interrupt-map = <0 0 0 0 &CP110_LABEL(icu_nsr) 22 IRQ_TYPE_LEVEL_HIGH>;
-		interrupts = <22 IRQ_TYPE_LEVEL_HIGH>;
-		num-lanes = <1>;
-		clock-names = "core", "reg";
-		clocks = <&CP110_LABEL(clk) 1 13>, <&CP110_LABEL(clk) 1 14>;
-		status = "disabled";
-	};
-
-	CP110_LABEL(pcie1): pcie@CP110_PCIE1_BASE {
-		compatible = "marvell,armada8k-pcie", "snps,dw-pcie";
-		reg = <0 ADDRESSIFY(CP110_PCIE1_BASE) 0 0x10000>,
-		      <0 CP110_PCIEx_CONF_BASE(1) 0 0x80000>;
-		reg-names = "ctrl", "config";
-		#address-cells = <3>;
-		#size-cells = <2>;
-		#interrupt-cells = <1>;
-		device_type = "pci";
-		dma-coherent;
-		msi-parent = <&gic_v2m0>;
-
-		bus-range = <0 0xff>;
-		ranges =
-		/* downstream I/O */
-		<0x81000000 0 CP110_PCIEx_IO_BASE(1) 0  CP110_PCIEx_IO_BASE(1) 0 0x10000
-		/* non-prefetchable memory */
-		0x82000000 0 CP110_PCIEx_MEM_BASE(1) 0  CP110_PCIEx_MEM_BASE(1) 0 0xf00000>;
-		interrupt-map-mask = <0 0 0 0>;
-		interrupt-map = <0 0 0 0 &CP110_LABEL(icu_nsr) 24 IRQ_TYPE_LEVEL_HIGH>;
-		interrupts = <24 IRQ_TYPE_LEVEL_HIGH>;
-
-		num-lanes = <1>;
-		clock-names = "core", "reg";
-		clocks = <&CP110_LABEL(clk) 1 11>, <&CP110_LABEL(clk) 1 14>;
-		status = "disabled";
-	};
-
-	CP110_LABEL(pcie2): pcie@CP110_PCIE2_BASE {
-		compatible = "marvell,armada8k-pcie", "snps,dw-pcie";
-		reg = <0 ADDRESSIFY(CP110_PCIE2_BASE) 0 0x10000>,
-		      <0 CP110_PCIEx_CONF_BASE(2) 0 0x80000>;
-		reg-names = "ctrl", "config";
-		#address-cells = <3>;
-		#size-cells = <2>;
-		#interrupt-cells = <1>;
-		device_type = "pci";
-		dma-coherent;
-		msi-parent = <&gic_v2m0>;
-
-		bus-range = <0 0xff>;
-		ranges =
-		/* downstream I/O */
-		<0x81000000 0 CP110_PCIEx_IO_BASE(2) 0  CP110_PCIEx_IO_BASE(2) 0 0x10000
-		/* non-prefetchable memory */
-		0x82000000 0 CP110_PCIEx_MEM_BASE(2) 0  CP110_PCIEx_MEM_BASE(2) 0 0xf00000>;
-		interrupt-map-mask = <0 0 0 0>;
-		interrupt-map = <0 0 0 0 &CP110_LABEL(icu_nsr) 23 IRQ_TYPE_LEVEL_HIGH>;
-		interrupts = <23 IRQ_TYPE_LEVEL_HIGH>;
-
-		num-lanes = <1>;
-		clock-names = "core", "reg";
-		clocks = <&CP110_LABEL(clk) 1 12>, <&CP110_LABEL(clk) 1 14>;
-		status = "disabled";
-	};
-};
+#undef CP11X_TYPE
diff --git a/arch/arm64/boot/dts/marvell/armada-cp115.dtsi b/arch/arm64/boot/dts/marvell/armada-cp115.dtsi
new file mode 100644
index 000000000000..1d0a9653e681
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/armada-cp115.dtsi
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Marvell Technology Group Ltd.
+ *
+ * Device Tree file for Marvell Armada CP115.
+ */
+
+#define CP11X_TYPE cp115
+
+#include "armada-cp11x.dtsi"
+
+#undef CP11X_TYPE
diff --git a/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi
new file mode 100644
index 000000000000..9dcf16beabf5
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi
@@ -0,0 +1,568 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2016 Marvell Technology Group Ltd.
+ *
+ * Device Tree file for Marvell Armada CP11x.
+ */
+
+#include <dt-bindings/interrupt-controller/mvebu-icu.h>
+#include <dt-bindings/thermal/thermal.h>
+
+#include "armada-common.dtsi"
+
+#define CP11X_PCIEx_CONF_BASE(iface)	(CP11X_PCIEx_MEM_BASE(iface) + CP11X_PCIEx_MEM_SIZE(iface))
+
+/ {
+	/*
+	 * The contents of the node are defined below, in order to
+	 * save one indentation level
+	 */
+	CP11X_NAME: CP11X_NAME { };
+
+	/*
+	 * CPs only have one sensor in the thermal IC.
+	 *
+	 * The cooling maps are empty as there are no cooling devices.
+	 */
+	thermal-zones {
+		CP11X_LABEL(thermal_ic): CP11X_NODE_NAME(thermal-ic) {
+			polling-delay-passive = <0>; /* Interrupt driven */
+			polling-delay = <0>; /* Interrupt driven */
+
+			thermal-sensors = <&CP11X_LABEL(thermal) 0>;
+
+			trips {
+				CP11X_LABEL(crit): crit {
+					temperature = <100000>; /* mC degrees */
+					hysteresis = <2000>; /* mC degrees */
+					type = "critical";
+				};
+			};
+
+			cooling-maps { };
+		};
+	};
+};
+
+&CP11X_NAME {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	compatible = "simple-bus";
+	interrupt-parent = <&CP11X_LABEL(icu_nsr)>;
+	ranges;
+
+	config-space@CP11X_BASE {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "simple-bus";
+		ranges = <0x0 0x0 ADDRESSIFY(CP11X_BASE) 0x2000000>;
+
+		CP11X_LABEL(ethernet): ethernet@0 {
+			compatible = "marvell,armada-7k-pp22";
+			reg = <0x0 0x100000>, <0x129000 0xb000>;
+			clocks = <&CP11X_LABEL(clk) 1 3>, <&CP11X_LABEL(clk) 1 9>,
+				 <&CP11X_LABEL(clk) 1 5>, <&CP11X_LABEL(clk) 1 6>,
+				 <&CP11X_LABEL(clk) 1 18>;
+			clock-names = "pp_clk", "gop_clk",
+				      "mg_clk", "mg_core_clk", "axi_clk";
+			marvell,system-controller = <&CP11X_LABEL(syscon0)>;
+			status = "disabled";
+			dma-coherent;
+
+			CP11X_LABEL(eth0): eth0 {
+				interrupts = <39 IRQ_TYPE_LEVEL_HIGH>,
+					<43 IRQ_TYPE_LEVEL_HIGH>,
+					<47 IRQ_TYPE_LEVEL_HIGH>,
+					<51 IRQ_TYPE_LEVEL_HIGH>,
+					<55 IRQ_TYPE_LEVEL_HIGH>,
+					<59 IRQ_TYPE_LEVEL_HIGH>,
+					<63 IRQ_TYPE_LEVEL_HIGH>,
+					<67 IRQ_TYPE_LEVEL_HIGH>,
+					<71 IRQ_TYPE_LEVEL_HIGH>,
+					<129 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-names = "hif0", "hif1", "hif2",
+					"hif3", "hif4", "hif5", "hif6", "hif7",
+					"hif8", "link";
+				port-id = <0>;
+				gop-port-id = <0>;
+				status = "disabled";
+			};
+
+			CP11X_LABEL(eth1): eth1 {
+				interrupts = <40 IRQ_TYPE_LEVEL_HIGH>,
+					<44 IRQ_TYPE_LEVEL_HIGH>,
+					<48 IRQ_TYPE_LEVEL_HIGH>,
+					<52 IRQ_TYPE_LEVEL_HIGH>,
+					<56 IRQ_TYPE_LEVEL_HIGH>,
+					<60 IRQ_TYPE_LEVEL_HIGH>,
+					<64 IRQ_TYPE_LEVEL_HIGH>,
+					<68 IRQ_TYPE_LEVEL_HIGH>,
+					<72 IRQ_TYPE_LEVEL_HIGH>,
+					<128 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-names = "hif0", "hif1", "hif2",
+					"hif3", "hif4", "hif5", "hif6", "hif7",
+					"hif8", "link";
+				port-id = <1>;
+				gop-port-id = <2>;
+				status = "disabled";
+			};
+
+			CP11X_LABEL(eth2): eth2 {
+				interrupts = <41 IRQ_TYPE_LEVEL_HIGH>,
+					<45 IRQ_TYPE_LEVEL_HIGH>,
+					<49 IRQ_TYPE_LEVEL_HIGH>,
+					<53 IRQ_TYPE_LEVEL_HIGH>,
+					<57 IRQ_TYPE_LEVEL_HIGH>,
+					<61 IRQ_TYPE_LEVEL_HIGH>,
+					<65 IRQ_TYPE_LEVEL_HIGH>,
+					<69 IRQ_TYPE_LEVEL_HIGH>,
+					<73 IRQ_TYPE_LEVEL_HIGH>,
+					<127 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-names = "hif0", "hif1", "hif2",
+					"hif3", "hif4", "hif5", "hif6", "hif7",
+					"hif8", "link";
+				port-id = <2>;
+				gop-port-id = <3>;
+				status = "disabled";
+			};
+		};
+
+		CP11X_LABEL(comphy): phy@120000 {
+			compatible = "marvell,comphy-cp110";
+			reg = <0x120000 0x6000>;
+			marvell,system-controller = <&CP11X_LABEL(syscon0)>;
+			clocks = <&CP11X_LABEL(clk) 1 5>, <&CP11X_LABEL(clk) 1 6>,
+				 <&CP11X_LABEL(clk) 1 18>;
+			clock-names = "mg_clk", "mg_core_clk", "axi_clk";
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			CP11X_LABEL(comphy0): phy@0 {
+				reg = <0>;
+				#phy-cells = <1>;
+			};
+
+			CP11X_LABEL(comphy1): phy@1 {
+				reg = <1>;
+				#phy-cells = <1>;
+			};
+
+			CP11X_LABEL(comphy2): phy@2 {
+				reg = <2>;
+				#phy-cells = <1>;
+			};
+
+			CP11X_LABEL(comphy3): phy@3 {
+				reg = <3>;
+				#phy-cells = <1>;
+			};
+
+			CP11X_LABEL(comphy4): phy@4 {
+				reg = <4>;
+				#phy-cells = <1>;
+			};
+
+			CP11X_LABEL(comphy5): phy@5 {
+				reg = <5>;
+				#phy-cells = <1>;
+			};
+		};
+
+		CP11X_LABEL(mdio): mdio@12a200 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "marvell,orion-mdio";
+			reg = <0x12a200 0x10>;
+			clocks = <&CP11X_LABEL(clk) 1 9>, <&CP11X_LABEL(clk) 1 5>,
+				 <&CP11X_LABEL(clk) 1 6>, <&CP11X_LABEL(clk) 1 18>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(xmdio): mdio@12a600 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "marvell,xmdio";
+			reg = <0x12a600 0x10>;
+			clocks = <&CP11X_LABEL(clk) 1 5>,
+				 <&CP11X_LABEL(clk) 1 6>, <&CP11X_LABEL(clk) 1 18>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(icu): interrupt-controller@1e0000 {
+			compatible = "marvell,cp110-icu";
+			reg = <0x1e0000 0x440>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			CP11X_LABEL(icu_nsr): interrupt-controller@10 {
+				compatible = "marvell,cp110-icu-nsr";
+				reg = <0x10 0x20>;
+				#interrupt-cells = <2>;
+				interrupt-controller;
+				msi-parent = <&gicp>;
+			};
+
+			CP11X_LABEL(icu_sei): interrupt-controller@50 {
+				compatible = "marvell,cp110-icu-sei";
+				reg = <0x50 0x10>;
+				#interrupt-cells = <2>;
+				interrupt-controller;
+				msi-parent = <&sei>;
+			};
+		};
+
+		CP11X_LABEL(rtc): rtc@284000 {
+			compatible = "marvell,armada-8k-rtc";
+			reg = <0x284000 0x20>, <0x284080 0x24>;
+			reg-names = "rtc", "rtc-soc";
+			interrupts = <77 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		CP11X_LABEL(syscon0): system-controller@440000 {
+			compatible = "syscon", "simple-mfd";
+			reg = <0x440000 0x2000>;
+
+			CP11X_LABEL(clk): clock {
+				compatible = "marvell,cp110-clock";
+				#clock-cells = <2>;
+			};
+
+			CP11X_LABEL(gpio1): gpio@100 {
+				compatible = "marvell,armada-8k-gpio";
+				offset = <0x100>;
+				ngpios = <32>;
+				gpio-controller;
+				#gpio-cells = <2>;
+				gpio-ranges = <&CP11X_LABEL(pinctrl) 0 0 32>;
+				interrupt-controller;
+				interrupts = <86 IRQ_TYPE_LEVEL_HIGH>,
+					<85 IRQ_TYPE_LEVEL_HIGH>,
+					<84 IRQ_TYPE_LEVEL_HIGH>,
+					<83 IRQ_TYPE_LEVEL_HIGH>;
+				#interrupt-cells = <2>;
+				status = "disabled";
+			};
+
+			CP11X_LABEL(gpio2): gpio@140 {
+				compatible = "marvell,armada-8k-gpio";
+				offset = <0x140>;
+				ngpios = <31>;
+				gpio-controller;
+				#gpio-cells = <2>;
+				gpio-ranges = <&CP11X_LABEL(pinctrl) 0 32 31>;
+				interrupt-controller;
+				interrupts = <82 IRQ_TYPE_LEVEL_HIGH>,
+					<81 IRQ_TYPE_LEVEL_HIGH>,
+					<80 IRQ_TYPE_LEVEL_HIGH>,
+					<79 IRQ_TYPE_LEVEL_HIGH>;
+				#interrupt-cells = <2>;
+				status = "disabled";
+			};
+		};
+
+		CP11X_LABEL(syscon1): system-controller@400000 {
+			compatible = "syscon", "simple-mfd";
+			reg = <0x400000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			CP11X_LABEL(thermal): thermal-sensor@70 {
+				compatible = "marvell,armada-cp110-thermal";
+				reg = <0x70 0x10>;
+				interrupts-extended =
+					<&CP11X_LABEL(icu_sei) 116 IRQ_TYPE_LEVEL_HIGH>;
+				#thermal-sensor-cells = <1>;
+			};
+		};
+
+		CP11X_LABEL(usb3_0): usb3@500000 {
+			compatible = "marvell,armada-8k-xhci",
+			"generic-xhci";
+			reg = <0x500000 0x4000>;
+			dma-coherent;
+			interrupts = <106 IRQ_TYPE_LEVEL_HIGH>;
+			clock-names = "core", "reg";
+			clocks = <&CP11X_LABEL(clk) 1 22>,
+				 <&CP11X_LABEL(clk) 1 16>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(usb3_1): usb3@510000 {
+			compatible = "marvell,armada-8k-xhci",
+			"generic-xhci";
+			reg = <0x510000 0x4000>;
+			dma-coherent;
+			interrupts = <105 IRQ_TYPE_LEVEL_HIGH>;
+			clock-names = "core", "reg";
+			clocks = <&CP11X_LABEL(clk) 1 23>,
+				 <&CP11X_LABEL(clk) 1 16>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(sata0): sata@540000 {
+			compatible = "marvell,armada-8k-ahci",
+			"generic-ahci";
+			reg = <0x540000 0x30000>;
+			dma-coherent;
+			interrupts = <107 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&CP11X_LABEL(clk) 1 15>,
+				 <&CP11X_LABEL(clk) 1 16>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+
+			sata-port@0 {
+				reg = <0>;
+			};
+
+			sata-port@1 {
+				reg = <1>;
+			};
+		};
+
+		CP11X_LABEL(xor0): xor@6a0000 {
+			compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
+			reg = <0x6a0000 0x1000>, <0x6b0000 0x1000>;
+			dma-coherent;
+			msi-parent = <&gic_v2m0>;
+			clock-names = "core", "reg";
+			clocks = <&CP11X_LABEL(clk) 1 8>,
+				 <&CP11X_LABEL(clk) 1 14>;
+		};
+
+		CP11X_LABEL(xor1): xor@6c0000 {
+			compatible = "marvell,armada-7k-xor", "marvell,xor-v2";
+			reg = <0x6c0000 0x1000>, <0x6d0000 0x1000>;
+			dma-coherent;
+			msi-parent = <&gic_v2m0>;
+			clock-names = "core", "reg";
+			clocks = <&CP11X_LABEL(clk) 1 7>,
+				 <&CP11X_LABEL(clk) 1 14>;
+		};
+
+		CP11X_LABEL(spi0): spi@700600 {
+			compatible = "marvell,armada-380-spi";
+			reg = <0x700600 0x50>;
+			#address-cells = <0x1>;
+			#size-cells = <0x0>;
+			clock-names = "core", "axi";
+			clocks = <&CP11X_LABEL(clk) 1 21>,
+				 <&CP11X_LABEL(clk) 1 17>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(spi1): spi@700680 {
+			compatible = "marvell,armada-380-spi";
+			reg = <0x700680 0x50>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			clock-names = "core", "axi";
+			clocks = <&CP11X_LABEL(clk) 1 21>,
+				 <&CP11X_LABEL(clk) 1 17>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(i2c0): i2c@701000 {
+			compatible = "marvell,mv78230-i2c";
+			reg = <0x701000 0x20>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <120 IRQ_TYPE_LEVEL_HIGH>;
+			clock-names = "core", "reg";
+			clocks = <&CP11X_LABEL(clk) 1 21>,
+				 <&CP11X_LABEL(clk) 1 17>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(i2c1): i2c@701100 {
+			compatible = "marvell,mv78230-i2c";
+			reg = <0x701100 0x20>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <121 IRQ_TYPE_LEVEL_HIGH>;
+			clock-names = "core", "reg";
+			clocks = <&CP11X_LABEL(clk) 1 21>,
+				 <&CP11X_LABEL(clk) 1 17>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(uart0): serial@702000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x702000 0x100>;
+			reg-shift = <2>;
+			interrupts = <122 IRQ_TYPE_LEVEL_HIGH>;
+			reg-io-width = <1>;
+			clock-names = "baudclk", "apb_pclk";
+			clocks = <&CP11X_LABEL(clk) 1 21>,
+				 <&CP11X_LABEL(clk) 1 17>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(uart1): serial@702100 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x702100 0x100>;
+			reg-shift = <2>;
+			interrupts = <123 IRQ_TYPE_LEVEL_HIGH>;
+			reg-io-width = <1>;
+			clock-names = "baudclk", "apb_pclk";
+			clocks = <&CP11X_LABEL(clk) 1 21>,
+				 <&CP11X_LABEL(clk) 1 17>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(uart2): serial@702200 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x702200 0x100>;
+			reg-shift = <2>;
+			interrupts = <124 IRQ_TYPE_LEVEL_HIGH>;
+			reg-io-width = <1>;
+			clock-names = "baudclk", "apb_pclk";
+			clocks = <&CP11X_LABEL(clk) 1 21>,
+				 <&CP11X_LABEL(clk) 1 17>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(uart3): serial@702300 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x702300 0x100>;
+			reg-shift = <2>;
+			interrupts = <125 IRQ_TYPE_LEVEL_HIGH>;
+			reg-io-width = <1>;
+			clock-names = "baudclk", "apb_pclk";
+			clocks = <&CP11X_LABEL(clk) 1 21>,
+				 <&CP11X_LABEL(clk) 1 17>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(nand_controller): nand@720000 {
+			/*
+			 * Due to the limitation of the pins available
+			 * this controller is only usable on the CPM
+			 * for A7K and on the CPS for A8K.
+			 */
+			compatible = "marvell,armada-8k-nand-controller",
+				"marvell,armada370-nand-controller";
+			reg = <0x720000 0x54>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <115 IRQ_TYPE_LEVEL_HIGH>;
+			clock-names = "core", "reg";
+			clocks = <&CP11X_LABEL(clk) 1 2>,
+				 <&CP11X_LABEL(clk) 1 17>;
+			marvell,system-controller = <&CP11X_LABEL(syscon0)>;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(trng): trng@760000 {
+			compatible = "marvell,armada-8k-rng",
+			"inside-secure,safexcel-eip76";
+			reg = <0x760000 0x7d>;
+			interrupts = <95 IRQ_TYPE_LEVEL_HIGH>;
+			clock-names = "core", "reg";
+			clocks = <&CP11X_LABEL(clk) 1 25>,
+				 <&CP11X_LABEL(clk) 1 17>;
+			status = "okay";
+		};
+
+		CP11X_LABEL(sdhci0): sdhci@780000 {
+			compatible = "marvell,armada-cp110-sdhci";
+			reg = <0x780000 0x300>;
+			interrupts = <27 IRQ_TYPE_LEVEL_HIGH>;
+			clock-names = "core", "axi";
+			clocks = <&CP11X_LABEL(clk) 1 4>, <&CP11X_LABEL(clk) 1 18>;
+			dma-coherent;
+			status = "disabled";
+		};
+
+		CP11X_LABEL(crypto): crypto@800000 {
+			compatible = "inside-secure,safexcel-eip197b";
+			reg = <0x800000 0x200000>;
+			interrupts = <87 IRQ_TYPE_LEVEL_HIGH>,
+				<88 IRQ_TYPE_LEVEL_HIGH>,
+				<89 IRQ_TYPE_LEVEL_HIGH>,
+				<90 IRQ_TYPE_LEVEL_HIGH>,
+				<91 IRQ_TYPE_LEVEL_HIGH>,
+				<92 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "mem", "ring0", "ring1",
+				"ring2", "ring3", "eip";
+			clock-names = "core", "reg";
+			clocks = <&CP11X_LABEL(clk) 1 26>,
+				 <&CP11X_LABEL(clk) 1 17>;
+			dma-coherent;
+		};
+	};
+
+	CP11X_LABEL(pcie0): pcie@CP11X_PCIE0_BASE {
+		compatible = "marvell,armada8k-pcie", "snps,dw-pcie";
+		reg = <0 ADDRESSIFY(CP11X_PCIE0_BASE) 0 0x10000>,
+		      <0 CP11X_PCIEx_CONF_BASE(0) 0 0x80000>;
+		reg-names = "ctrl", "config";
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		dma-coherent;
+		msi-parent = <&gic_v2m0>;
+
+		bus-range = <0 0xff>;
+		/* non-prefetchable memory */
+		ranges = <0x82000000 0 CP11X_PCIEx_MEM_BASE(0) 0  CP11X_PCIEx_MEM_BASE(0) 0 CP11X_PCIEx_MEM_SIZE(0)>;
+		interrupt-map-mask = <0 0 0 0>;
+		interrupt-map = <0 0 0 0 &CP11X_LABEL(icu_nsr) 22 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <22 IRQ_TYPE_LEVEL_HIGH>;
+		num-lanes = <1>;
+		clock-names = "core", "reg";
+		clocks = <&CP11X_LABEL(clk) 1 13>, <&CP11X_LABEL(clk) 1 14>;
+		status = "disabled";
+	};
+
+	CP11X_LABEL(pcie1): pcie@CP11X_PCIE1_BASE {
+		compatible = "marvell,armada8k-pcie", "snps,dw-pcie";
+		reg = <0 ADDRESSIFY(CP11X_PCIE1_BASE) 0 0x10000>,
+		      <0 CP11X_PCIEx_CONF_BASE(1) 0 0x80000>;
+		reg-names = "ctrl", "config";
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		dma-coherent;
+		msi-parent = <&gic_v2m0>;
+
+		bus-range = <0 0xff>;
+		/* non-prefetchable memory */
+		ranges = <0x82000000 0 CP11X_PCIEx_MEM_BASE(1) 0  CP11X_PCIEx_MEM_BASE(1) 0 CP11X_PCIEx_MEM_SIZE(1)>;
+		interrupt-map-mask = <0 0 0 0>;
+		interrupt-map = <0 0 0 0 &CP11X_LABEL(icu_nsr) 24 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <24 IRQ_TYPE_LEVEL_HIGH>;
+
+		num-lanes = <1>;
+		clock-names = "core", "reg";
+		clocks = <&CP11X_LABEL(clk) 1 11>, <&CP11X_LABEL(clk) 1 14>;
+		status = "disabled";
+	};
+
+	CP11X_LABEL(pcie2): pcie@CP11X_PCIE2_BASE {
+		compatible = "marvell,armada8k-pcie", "snps,dw-pcie";
+		reg = <0 ADDRESSIFY(CP11X_PCIE2_BASE) 0 0x10000>,
+		      <0 CP11X_PCIEx_CONF_BASE(2) 0 0x80000>;
+		reg-names = "ctrl", "config";
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		dma-coherent;
+		msi-parent = <&gic_v2m0>;
+
+		bus-range = <0 0xff>;
+		/* non-prefetchable memory */
+		ranges = <0x82000000 0 CP11X_PCIEx_MEM_BASE(2) 0  CP11X_PCIEx_MEM_BASE(2) 0 CP11X_PCIEx_MEM_SIZE(2)>;
+		interrupt-map-mask = <0 0 0 0>;
+		interrupt-map = <0 0 0 0 &CP11X_LABEL(icu_nsr) 23 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <23 IRQ_TYPE_LEVEL_HIGH>;
+
+		num-lanes = <1>;
+		clock-names = "core", "reg";
+		clocks = <&CP11X_LABEL(clk) 1 12>, <&CP11X_LABEL(clk) 1 14>;
+		status = "disabled";
+	};
+};
diff --git a/arch/arm64/boot/dts/marvell/cn9130-db.dts b/arch/arm64/boot/dts/marvell/cn9130-db.dts
new file mode 100644
index 000000000000..ce49a70d88a0
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/cn9130-db.dts
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * Device tree for the CN9130-DB board.
+ */
+
+#include "cn9130.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+	model = "Marvell Armada CN9130-DB";
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	aliases {
+		gpio1 = &cp0_gpio1;
+		gpio2 = &cp0_gpio2;
+		i2c0 = &cp0_i2c0;
+		ethernet0 = &cp0_eth0;
+		ethernet1 = &cp0_eth1;
+		ethernet2 = &cp0_eth2;
+		spi1 = &cp0_spi0;
+		spi2 = &cp0_spi1;
+	};
+
+	memory@00000000 {
+		device_type = "memory";
+		reg = <0x0 0x0 0x0 0x80000000>;
+	};
+
+	ap0_reg_sd_vccq: ap0_sd_vccq@0 {
+		compatible = "regulator-gpio";
+		regulator-name = "ap0_sd_vccq";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <3300000>;
+		gpios = <&expander0 8 GPIO_ACTIVE_HIGH>;
+		states = <1800000 0x1 3300000 0x0>;
+	};
+
+	cp0_reg_usb3_vbus0: cp0_usb3_vbus@0 {
+		compatible = "regulator-fixed";
+		regulator-name = "cp0-xhci0-vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		enable-active-high;
+		gpio = <&expander0 0 GPIO_ACTIVE_HIGH>;
+	};
+
+	cp0_usb3_0_phy0: cp0_usb3_phy@0 {
+		compatible = "usb-nop-xceiv";
+		vcc-supply = <&cp0_reg_usb3_vbus0>;
+	};
+
+	cp0_reg_usb3_vbus1: cp0_usb3_vbus@1 {
+		compatible = "regulator-fixed";
+		regulator-name = "cp0-xhci1-vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		enable-active-high;
+		gpio = <&expander0 1 GPIO_ACTIVE_HIGH>;
+	};
+
+	cp0_usb3_0_phy1: cp0_usb3_phy@1 {
+		compatible = "usb-nop-xceiv";
+		vcc-supply = <&cp0_reg_usb3_vbus1>;
+	};
+
+	cp0_reg_sd_vccq: cp0_sd_vccq@0 {
+		compatible = "regulator-gpio";
+		regulator-name = "cp0_sd_vccq";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <3300000>;
+		gpios = <&expander0 15 GPIO_ACTIVE_HIGH>;
+		states = <1800000 0x1
+			  3300000 0x0>;
+	};
+
+	cp0_reg_sd_vcc: cp0_sd_vcc@0 {
+		compatible = "regulator-fixed";
+		regulator-name = "cp0_sd_vcc";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		gpio = <&expander0 14 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+		regulator-always-on;
+	};
+
+	cp0_sfp_eth0: sfp-eth@0 {
+		compatible = "sff,sfp";
+		i2c-bus = <&cp0_sfpp0_i2c>;
+		los-gpio = <&cp0_module_expander1 11 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpio = <&cp0_module_expander1 10 GPIO_ACTIVE_LOW>;
+		tx-disable-gpio = <&cp0_module_expander1 9 GPIO_ACTIVE_HIGH>;
+		tx-fault-gpio = <&cp0_module_expander1 8 GPIO_ACTIVE_HIGH>;
+		/*
+		 * SFP cages are unconnected on early PCBs because of an the I2C
+		 * lanes not being connected. Prevent the port for being
+		 * unusable by disabling the SFP node.
+		 */
+		status = "disabled";
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+/* on-board eMMC - U9 */
+&ap_sdhci0 {
+	pinctrl-names = "default";
+	bus-width = <8>;
+	vqmmc-supply = <&ap0_reg_sd_vccq>;
+	status = "okay";
+};
+
+&cp0_crypto {
+	status = "disabled";
+};
+
+&cp0_ethernet {
+	status = "okay";
+};
+
+/* SLM-1521-V2, CON9 */
+&cp0_eth0 {
+	status = "disabled";
+	phy-mode = "10gbase-kr";
+	/* Generic PHY, providing serdes lanes */
+	phys = <&cp0_comphy4 0>;
+	managed = "in-band-status";
+	sfp = <&cp0_sfp_eth0>;
+};
+
+/* CON56 */
+&cp0_eth1 {
+	status = "okay";
+	phy = <&phy0>;
+	phy-mode = "rgmii-id";
+};
+
+/* CON57 */
+&cp0_eth2 {
+	status = "okay";
+	phy = <&phy1>;
+	phy-mode = "rgmii-id";
+};
+
+&cp0_gpio1 {
+	status = "okay";
+};
+
+&cp0_gpio2 {
+	status = "okay";
+};
+
+&cp0_i2c0 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&cp0_i2c0_pins>;
+	clock-frequency = <100000>;
+
+	/* U36 */
+	expander0: pca953x@21 {
+		compatible = "nxp,pca9555";
+		pinctrl-names = "default";
+		gpio-controller;
+		#gpio-cells = <2>;
+		reg = <0x21>;
+		status = "okay";
+	};
+
+	/* U42 */
+	eeprom0: eeprom@50 {
+		compatible = "atmel,24c64";
+		reg = <0x50>;
+		pagesize = <0x20>;
+	};
+
+	/* U38 */
+	eeprom1: eeprom@57 {
+		compatible = "atmel,24c64";
+		reg = <0x57>;
+		pagesize = <0x20>;
+	};
+};
+
+&cp0_i2c1 {
+	status = "okay";
+	clock-frequency = <100000>;
+
+	/* SLM-1521-V2 - U3 */
+	i2c-mux@72 { /* verify address - depends on dpr */
+		compatible = "nxp,pca9544";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x72>;
+		cp0_sfpp0_i2c: i2c@0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0>;
+		};
+
+		i2c@1 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <1>;
+			/* U12 */
+			cp0_module_expander1: pca9555@21 {
+				compatible = "nxp,pca9555";
+				pinctrl-names = "default";
+				gpio-controller;
+				#gpio-cells = <2>;
+				reg = <0x21>;
+			};
+
+		};
+	};
+};
+
+&cp0_mdio {
+	status = "okay";
+
+	phy0: ethernet-phy@0 {
+		reg = <0>;
+	};
+
+	phy1: ethernet-phy@1 {
+		reg = <1>;
+	};
+};
+
+/* U54 */
+&cp0_nand_controller {
+	pinctrl-names = "default";
+	pinctrl-0 = <&nand_pins &nand_rb>;
+
+	nand@0 {
+		reg = <0>;
+		label = "main-storage";
+		nand-rb = <0>;
+		nand-ecc-mode = "hw";
+		nand-on-flash-bbt;
+		nand-ecc-strength = <8>;
+		nand-ecc-step-size = <512>;
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			partition@0 {
+				label = "U-Boot";
+				reg = <0 0x200000>;
+			};
+			partition@200000 {
+				label = "Linux";
+				reg = <0x200000 0xd00000>;
+			};
+			partition@1000000 {
+				label = "Filesystem";
+				reg = <0x1000000 0x3f000000>;
+			};
+		};
+	};
+};
+
+/* SLM-1521-V2, CON6 */
+&cp0_pcie0 {
+	status = "okay";
+	num-lanes = <4>;
+	num-viewport = <8>;
+	/* Generic PHY, providing serdes lanes */
+	phys = <&cp0_comphy0 0
+		&cp0_comphy1 0
+		&cp0_comphy2 0
+		&cp0_comphy3 0>;
+};
+
+&cp0_sata0 {
+	status = "okay";
+
+	/* SLM-1521-V2, CON2 */
+	sata-port@1 {
+		status = "okay";
+		/* Generic PHY, providing serdes lanes */
+		phys = <&cp0_comphy5 1>;
+	};
+};
+
+/* CON 28 */
+&cp0_sdhci0 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&cp0_sdhci_pins
+		     &cp0_sdhci_cd_pins>;
+	bus-width = <4>;
+	cd-gpios = <&cp0_gpio2 11 GPIO_ACTIVE_LOW>;
+	no-1-8-v;
+	vqmmc-supply = <&cp0_reg_sd_vccq>;
+	vmmc-supply = <&cp0_reg_sd_vcc>;
+};
+
+/* U55 */
+&cp0_spi1 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&cp0_spi0_pins>;
+	reg = <0x700680 0x50>;
+
+	spi-flash@0 {
+		#address-cells = <0x1>;
+		#size-cells = <0x1>;
+		compatible = "jedec,spi-nor";
+		reg = <0x0>;
+		/* On-board MUX does not allow higher frequencies */
+		spi-max-frequency = <40000000>;
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			partition@0 {
+				label = "U-Boot-0";
+				reg = <0x0 0x200000>;
+			};
+
+			partition@400000 {
+				label = "Filesystem-0";
+				reg = <0x200000 0xe00000>;
+			};
+		};
+	};
+};
+
+&cp0_syscon0 {
+	cp0_pinctrl: pinctrl {
+		compatible = "marvell,cp115-standalone-pinctrl";
+
+		cp0_i2c0_pins: cp0-i2c-pins-0 {
+			marvell,pins = "mpp37", "mpp38";
+			marvell,function = "i2c0";
+		};
+		cp0_i2c1_pins: cp0-i2c-pins-1 {
+			marvell,pins = "mpp35", "mpp36";
+			marvell,function = "i2c1";
+		};
+		cp0_ge1_rgmii_pins: cp0-ge-rgmii-pins-0 {
+			marvell,pins = "mpp0", "mpp1", "mpp2",
+				       "mpp3", "mpp4", "mpp5",
+				       "mpp6", "mpp7", "mpp8",
+				       "mpp9", "mpp10", "mpp11";
+			marvell,function = "ge0";
+		};
+		cp0_ge2_rgmii_pins: cp0-ge-rgmii-pins-1 {
+			marvell,pins = "mpp44", "mpp45", "mpp46",
+				       "mpp47", "mpp48", "mpp49",
+				       "mpp50", "mpp51", "mpp52",
+				       "mpp53", "mpp54", "mpp55";
+			marvell,function = "ge1";
+		};
+		cp0_sdhci_cd_pins: cp0-sdhci-cd-pins-0 {
+			marvell,pins = "mpp43";
+			marvell,function = "gpio";
+		};
+		cp0_sdhci_pins: cp0-sdhi-pins-0 {
+			marvell,pins = "mpp56", "mpp57", "mpp58",
+				       "mpp59", "mpp60", "mpp61";
+			marvell,function = "sdio";
+		};
+		cp0_spi0_pins: cp0-spi-pins-0 {
+			marvell,pins = "mpp13", "mpp14", "mpp15", "mpp16";
+			marvell,function = "spi1";
+		};
+		nand_pins: nand-pins {
+			marvell,pins = "mpp15", "mpp16", "mpp17", "mpp18",
+				       "mpp19", "mpp20", "mpp21", "mpp22",
+				       "mpp23", "mpp24", "mpp25", "mpp26",
+				       "mpp27";
+			marvell,function = "dev";
+		};
+		nand_rb: nand-rb {
+			marvell,pins = "mpp13";
+			marvell,function = "nf";
+		};
+	};
+};
+
+&cp0_usb3_0 {
+	status = "okay";
+	usb-phy = <&cp0_usb3_0_phy0>;
+	phy-names = "usb";
+};
+
+&cp0_usb3_1 {
+	status = "okay";
+	usb-phy = <&cp0_usb3_0_phy1>;
+	phy-names = "usb";
+};
diff --git a/arch/arm64/boot/dts/marvell/cn9130.dtsi b/arch/arm64/boot/dts/marvell/cn9130.dtsi
new file mode 100644
index 000000000000..a2b7e5ec979d
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/cn9130.dtsi
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * Device tree for the CN9130 SoC.
+ */
+
+#include "armada-ap807-quad.dtsi"
+
+/ {
+	model = "Marvell Armada CN9130 SoC";
+	compatible = "marvell,cn9130", "marvell,armada-ap807-quad",
+		     "marvell,armada-ap807";
+};
+
+/*
+ * Instantiate the internal CP115
+ */
+
+#define CP11X_NAME		cp0
+#define CP11X_BASE		f2000000
+#define CP11X_PCIEx_MEM_BASE(iface) ((iface == 0) ? 0xc0000000 : \
+						    0xe0000000 + ((iface - 1) * 0x1000000))
+#define CP11X_PCIEx_MEM_SIZE(iface) ((iface == 0) ? 0x1ff00000 : 0xf00000)
+#define CP11X_PCIE0_BASE	f2600000
+#define CP11X_PCIE1_BASE	f2620000
+#define CP11X_PCIE2_BASE	f2640000
+
+#include "armada-cp115.dtsi"
+
+#undef CP11X_NAME
+#undef CP11X_BASE
+#undef CP11X_PCIEx_MEM_BASE
+#undef CP11X_PCIEx_MEM_SIZE
+#undef CP11X_PCIE0_BASE
+#undef CP11X_PCIE1_BASE
+#undef CP11X_PCIE2_BASE
diff --git a/arch/arm64/boot/dts/marvell/cn9131-db.dts b/arch/arm64/boot/dts/marvell/cn9131-db.dts
new file mode 100644
index 000000000000..3c975f98b2a3
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/cn9131-db.dts
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * Device tree for the CN9131-DB board.
+ */
+
+#include "cn9130-db.dts"
+
+/ {
+	model = "Marvell Armada CN9131-DB";
+	compatible = "marvell,cn9131", "marvell,cn9130",
+		     "marvell,armada-ap807-quad", "marvell,armada-ap807";
+
+	aliases {
+		gpio3 = &cp1_gpio1;
+		gpio4 = &cp1_gpio2;
+		ethernet3 = &cp1_eth0;
+		ethernet4 = &cp1_eth1;
+	};
+
+	cp1_reg_usb3_vbus0: cp1_usb3_vbus@0 {
+		compatible = "regulator-fixed";
+		pinctrl-names = "default";
+		pinctrl-0 = <&cp1_xhci0_vbus_pins>;
+		regulator-name = "cp1-xhci0-vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		enable-active-high;
+		gpio = <&cp1_gpio1 3 GPIO_ACTIVE_HIGH>;
+	};
+
+	cp1_usb3_0_phy0: cp1_usb3_phy0 {
+		compatible = "usb-nop-xceiv";
+		vcc-supply = <&cp1_reg_usb3_vbus0>;
+	};
+
+	cp1_sfp_eth1: sfp-eth1 {
+		compatible = "sff,sfp";
+		i2c-bus = <&cp1_i2c0>;
+		los-gpio = <&cp1_gpio1 11 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpio = <&cp1_gpio1 10 GPIO_ACTIVE_LOW>;
+		tx-disable-gpio = <&cp1_gpio1 9 GPIO_ACTIVE_HIGH>;
+		tx-fault-gpio = <&cp1_gpio1 8 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&cp1_sfp_pins>;
+		/*
+		 * SFP cages are unconnected on early PCBs because of an the I2C
+		 * lanes not being connected. Prevent the port for being
+		 * unusable by disabling the SFP node.
+		 */
+		status = "disabled";
+	};
+};
+
+/*
+ * Instantiate the first slave CP115
+ */
+
+#define CP11X_NAME		cp1
+#define CP11X_BASE		f4000000
+#define CP11X_PCIEx_MEM_BASE(iface) (0xe2000000 + (iface * 0x1000000))
+#define CP11X_PCIEx_MEM_SIZE(iface) 0xf00000
+#define CP11X_PCIE0_BASE	f4600000
+#define CP11X_PCIE1_BASE	f4620000
+#define CP11X_PCIE2_BASE	f4640000
+
+#include "armada-cp115.dtsi"
+
+#undef CP11X_NAME
+#undef CP11X_BASE
+#undef CP11X_PCIEx_MEM_BASE
+#undef CP11X_PCIEx_MEM_SIZE
+#undef CP11X_PCIE0_BASE
+#undef CP11X_PCIE1_BASE
+#undef CP11X_PCIE2_BASE
+
+&cp1_crypto {
+	status = "disabled";
+};
+
+&cp1_ethernet {
+	status = "okay";
+};
+
+/* CON50 */
+&cp1_eth0 {
+	status = "disabled";
+	phy-mode = "10gbase-kr";
+	/* Generic PHY, providing serdes lanes */
+	phys = <&cp1_comphy4 0>;
+	managed = "in-band-status";
+	sfp = <&cp1_sfp_eth1>;
+};
+
+&cp1_gpio1 {
+	status = "okay";
+};
+
+&cp1_gpio2 {
+	status = "okay";
+};
+
+&cp1_i2c0 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&cp1_i2c0_pins>;
+	clock-frequency = <100000>;
+};
+
+/* CON40 */
+&cp1_pcie0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&cp1_pcie_reset_pins>;
+	num-lanes = <2>;
+	num-viewport = <8>;
+	marvell,reset-gpio = <&cp1_gpio1 0 GPIO_ACTIVE_HIGH>;
+	status = "okay";
+	/* Generic PHY, providing serdes lanes */
+	phys = <&cp1_comphy0 0
+		&cp1_comphy1 0>;
+};
+
+&cp1_sata0 {
+	status = "okay";
+
+	/* CON32 */
+	sata-port@1 {
+		/* Generic PHY, providing serdes lanes */
+		phys = <&cp1_comphy5 1>;
+	};
+};
+
+/* U24 */
+&cp1_spi1 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&cp1_spi0_pins>;
+	reg = <0x700680 0x50>;
+
+	spi-flash@0 {
+		#address-cells = <0x1>;
+		#size-cells = <0x1>;
+		compatible = "jedec,spi-nor";
+		reg = <0x0>;
+		/* On-board MUX does not allow higher frequencies */
+		spi-max-frequency = <40000000>;
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			partition@0 {
+				label = "U-Boot-1";
+				reg = <0x0 0x200000>;
+			};
+
+			partition@400000 {
+				label = "Filesystem-1";
+				reg = <0x200000 0xe00000>;
+			};
+		};
+	};
+
+};
+
+&cp1_syscon0 {
+	cp1_pinctrl: pinctrl {
+		compatible = "marvell,cp115-standalone-pinctrl";
+
+		cp1_i2c0_pins: cp1-i2c-pins-0 {
+			marvell,pins = "mpp37", "mpp38";
+			marvell,function = "i2c0";
+		};
+		cp1_spi0_pins: cp1-spi-pins-0 {
+			marvell,pins = "mpp13", "mpp14", "mpp15", "mpp16";
+			marvell,function = "spi1";
+		};
+		cp1_xhci0_vbus_pins: cp1-xhci0-vbus-pins {
+			marvell,pins = "mpp3";
+			marvell,function = "gpio";
+		};
+		cp1_sfp_pins: sfp-pins {
+			marvell,pins = "mpp8", "mpp9", "mpp10", "mpp11";
+			marvell,function = "gpio";
+		};
+		cp1_pcie_reset_pins: cp1-pcie-reset-pins {
+			marvell,pins = "mpp0";
+			marvell,function = "gpio";
+		};
+	};
+};
+
+/* CON58 */
+&cp1_usb3_1 {
+	status = "okay";
+	usb-phy = <&cp1_usb3_0_phy0>;
+	/* Generic PHY, providing serdes lanes */
+	phys = <&cp1_comphy3 1>;
+	phy-names = "usb";
+};
diff --git a/arch/arm64/boot/dts/marvell/cn9132-db.dts b/arch/arm64/boot/dts/marvell/cn9132-db.dts
new file mode 100644
index 000000000000..4ef0df3097ca
--- /dev/null
+++ b/arch/arm64/boot/dts/marvell/cn9132-db.dts
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * Device tree for the CN9132-DB board.
+ */
+
+#include "cn9131-db.dts"
+
+/ {
+	model = "Marvell Armada CN9132-DB";
+	compatible = "marvell,cn9132", "marvell,cn9131", "marvell,cn9130",
+		     "marvell,armada-ap807-quad", "marvell,armada-ap807";
+
+	aliases {
+		gpio5 = &cp2_gpio1;
+		gpio6 = &cp2_gpio2;
+		ethernet5 = &cp2_eth0;
+	};
+
+	cp2_reg_usb3_vbus0: cp2_usb3_vbus@0 {
+		compatible = "regulator-fixed";
+		regulator-name = "cp2-xhci0-vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		enable-active-high;
+		gpio = <&cp2_gpio1 2 GPIO_ACTIVE_HIGH>;
+	};
+
+	cp2_usb3_0_phy0: cp2_usb3_phy0 {
+		compatible = "usb-nop-xceiv";
+		vcc-supply = <&cp2_reg_usb3_vbus0>;
+	};
+
+	cp2_reg_usb3_vbus1: cp2_usb3_vbus@1 {
+		compatible = "regulator-fixed";
+		regulator-name = "cp2-xhci1-vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		enable-active-high;
+		gpio = <&cp2_gpio1 3 GPIO_ACTIVE_HIGH>;
+	};
+
+	cp2_usb3_0_phy1: cp2_usb3_phy1 {
+		compatible = "usb-nop-xceiv";
+		vcc-supply = <&cp2_reg_usb3_vbus1>;
+	};
+
+	cp2_reg_sd_vccq: cp2_sd_vccq@0 {
+		compatible = "regulator-gpio";
+		regulator-name = "cp2_sd_vcc";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <3300000>;
+		gpios = <&cp2_gpio2 17 GPIO_ACTIVE_HIGH>;
+		states = <1800000 0x1 3300000 0x0>;
+	};
+
+	cp2_sfp_eth0: sfp-eth0 {
+		compatible = "sff,sfp";
+		i2c-bus = <&cp2_sfpp0_i2c>;
+		los-gpio = <&cp2_module_expander1 11 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpio = <&cp2_module_expander1 10 GPIO_ACTIVE_LOW>;
+		tx-disable-gpio = <&cp2_module_expander1 9 GPIO_ACTIVE_HIGH>;
+		tx-fault-gpio = <&cp2_module_expander1 8 GPIO_ACTIVE_HIGH>;
+		/*
+		 * SFP cages are unconnected on early PCBs because of an the I2C
+		 * lanes not being connected. Prevent the port for being
+		 * unusable by disabling the SFP node.
+		 */
+		status = "disabled";
+	};
+};
+
+/*
+ * Instantiate the second slave CP115
+ */
+
+#define CP11X_NAME		cp2
+#define CP11X_BASE		f6000000
+#define CP11X_PCIEx_MEM_BASE(iface) (0xe5000000 + (iface * 0x1000000))
+#define CP11X_PCIEx_MEM_SIZE(iface) 0xf00000
+#define CP11X_PCIE0_BASE	f6600000
+#define CP11X_PCIE1_BASE	f6620000
+#define CP11X_PCIE2_BASE	f6640000
+
+#include "armada-cp115.dtsi"
+
+#undef CP11X_NAME
+#undef CP11X_BASE
+#undef CP11X_PCIEx_MEM_BASE
+#undef CP11X_PCIEx_MEM_SIZE
+#undef CP11X_PCIE0_BASE
+#undef CP11X_PCIE1_BASE
+#undef CP11X_PCIE2_BASE
+
+&cp2_crypto {
+	status = "disabled";
+};
+
+&cp2_ethernet {
+	status = "okay";
+};
+
+/* SLM-1521-V2, CON9 */
+&cp2_eth0 {
+	status = "disabled";
+	phy-mode = "10gbase-kr";
+	/* Generic PHY, providing serdes lanes */
+	phys = <&cp2_comphy4 0>;
+	managed = "in-band-status";
+	sfp = <&cp2_sfp_eth0>;
+};
+
+&cp2_gpio1 {
+	status = "okay";
+};
+
+&cp2_gpio2 {
+	status = "okay";
+};
+
+&cp2_i2c0 {
+	clock-frequency = <100000>;
+
+	/* SLM-1521-V2 - U3 */
+	i2c-mux@72 {
+		compatible = "nxp,pca9544";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x72>;
+		cp2_sfpp0_i2c: i2c@0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0>;
+		};
+
+		i2c@1 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <1>;
+			/* U12 */
+			cp2_module_expander1: pca9555@21 {
+				compatible = "nxp,pca9555";
+				pinctrl-names = "default";
+				gpio-controller;
+				#gpio-cells = <2>;
+				reg = <0x21>;
+			};
+		};
+	};
+};
+
+/* SLM-1521-V2, CON6 */
+&cp2_pcie0 {
+	status = "okay";
+	num-lanes = <2>;
+	num-viewport = <8>;
+	/* Generic PHY, providing serdes lanes */
+	phys = <&cp2_comphy0 0
+		&cp2_comphy1 0>;
+};
+
+/* SLM-1521-V2, CON8 */
+&cp2_pcie2 {
+	status = "okay";
+	num-lanes = <1>;
+	num-viewport = <8>;
+	/* Generic PHY, providing serdes lanes */
+	phys = <&cp2_comphy5 2>;
+};
+
+&cp2_sata0 {
+	status = "okay";
+
+	/* SLM-1521-V2, CON4 */
+	sata-port@0 {
+		/* Generic PHY, providing serdes lanes */
+		phys = <&cp2_comphy2 0>;
+	};
+};
+
+/* CON 2 on SLM-1683 - microSD */
+&cp2_sdhci0 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&cp2_sdhci_pins>;
+	bus-width = <4>;
+	cd-gpios = <&cp2_gpio2 23 GPIO_ACTIVE_LOW>;
+	vqmmc-supply = <&cp2_reg_sd_vccq>;
+};
+
+&cp2_syscon0 {
+	cp2_pinctrl: pinctrl {
+		compatible = "marvell,cp115-standalone-pinctrl";
+
+		cp2_i2c0_pins: cp2-i2c-pins-0 {
+			marvell,pins = "mpp37", "mpp38";
+			marvell,function = "i2c0";
+		};
+		cp2_sdhci_pins: cp2-sdhi-pins-0 {
+			marvell,pins = "mpp56", "mpp57", "mpp58",
+				       "mpp59", "mpp60", "mpp61";
+			marvell,function = "sdio";
+		};
+	};
+};
+
+&cp2_usb3_0 {
+	status = "okay";
+	usb-phy = <&cp2_usb3_0_phy0>;
+	phy-names = "usb";
+};
+
+/* SLM-1521-V2, CON11 */
+&cp2_usb3_1 {
+	status = "okay";
+	usb-phy = <&cp2_usb3_0_phy1>;
+	phy-names = "usb";
+	/* Generic PHY, providing serdes lanes */
+	phys = <&cp2_comphy3 1>;
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
index 97f84aa9fc6e..10b32471bc7b 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
@@ -269,6 +269,15 @@
 			clock-names = "spi", "wrap";
 		};
 
+		systimer: timer@10017000 {
+			compatible = "mediatek,mt8183-timer",
+				     "mediatek,mt6765-timer";
+			reg = <0 0x10017000 0 0x1000>;
+			interrupts = <GIC_SPI 200 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&topckgen CLK_TOP_CLK13M>;
+			clock-names = "clk13m";
+		};
+
 		auxadc: auxadc@11001000 {
 			compatible = "mediatek,mt8183-auxadc",
 				     "mediatek,mt8173-auxadc";
diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
index bdace01561ba..f1de4ff6230a 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
@@ -115,7 +115,7 @@
 	};
 
 	padctl@3520000 {
-		status = "disabled";
+		status = "okay";
 
 		avdd-pll-erefeut-supply = <&vdd_1v8_pll>;
 		avdd-usb-supply = <&vdd_3v3_sys>;
@@ -193,7 +193,7 @@
 	};
 
 	usb@3530000 {
-		status = "disabled";
+		status = "okay";
 
 		phys = <&{/padctl@3520000/pads/usb2/lanes/usb2-0}>,
 		       <&{/padctl@3520000/pads/usb2/lanes/usb2-1}>,
@@ -253,10 +253,14 @@
 			status = "disabled";
 		};
 
+		/* DP on E3320 */
 		sor@15540000 {
-			status = "disabled";
+			status = "okay";
+
+			avdd-io-hdmi-dp-supply = <&vdd_hdmi_1v05>;
+			vdd-hdmi-dp-pll = <&vdd_1v8_ap>;
 
-			nvidia,dpaux = <&dpaux1>;
+			nvidia,dpaux = <&dpaux>;
 		};
 
 		sor@15580000 {
diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index 47cd831fcf44..7893d78a0fb6 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -525,6 +525,7 @@
 		      <0x0 0x03538000 0x0 0x1000>;
 		reg-names = "hcd", "fpci";
 
+		iommus = <&smmu TEGRA186_SID_XUSB_HOST>;
 		interrupts = <GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>;
@@ -1018,6 +1019,7 @@
 			reset-names = "vic";
 
 			power-domains = <&bpmp TEGRA186_POWER_DOMAIN_VIC>;
+			iommus = <&smmu TEGRA186_SID_VIC>;
 		};
 
 		dsib: dsi@15400000 {
@@ -1060,7 +1062,7 @@
 		};
 
 		sor1: sor@15580000 {
-			compatible = "nvidia,tegra186-sor1";
+			compatible = "nvidia,tegra186-sor";
 			reg = <0x15580000 0x10000>;
 			interrupts = <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&bpmp TEGRA186_CLK_SOR1>,
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
index 4c38426a6969..c7f2a20e6b02 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
@@ -8,17 +8,18 @@
 	compatible = "nvidia,p2888", "nvidia,tegra194";
 
 	aliases {
-		sdhci0 = "/cbb/sdhci@3460000";
-		sdhci1 = "/cbb/sdhci@3400000";
+		ethernet0 = "/cbb@0/ethernet@2490000";
+		sdhci0 = "/cbb@0/sdhci@3460000";
+		sdhci1 = "/cbb@0/sdhci@3400000";
 		serial0 = &tcu;
 		i2c0 = "/bpmp/i2c";
-		i2c1 = "/cbb/i2c@3160000";
-		i2c2 = "/cbb/i2c@c240000";
-		i2c3 = "/cbb/i2c@3180000";
-		i2c4 = "/cbb/i2c@3190000";
-		i2c5 = "/cbb/i2c@31c0000";
-		i2c6 = "/cbb/i2c@c250000";
-		i2c7 = "/cbb/i2c@31e0000";
+		i2c1 = "/cbb@0/i2c@3160000";
+		i2c2 = "/cbb@0/i2c@c240000";
+		i2c3 = "/cbb@0/i2c@3180000";
+		i2c4 = "/cbb@0/i2c@3190000";
+		i2c5 = "/cbb@0/i2c@31c0000";
+		i2c6 = "/cbb@0/i2c@c250000";
+		i2c7 = "/cbb@0/i2c@31e0000";
 	};
 
 	chosen {
@@ -26,7 +27,7 @@
 		stdout-path = "serial0:115200n8";
 	};
 
-	cbb {
+	cbb@0 {
 		ethernet@2490000 {
 			status = "okay";
 
@@ -168,7 +169,7 @@
 					in-ldo7-8-supply = <&vdd_1v8ls>;
 
 					vdd_1v0: sd0 {
-						regulator-name = "VDD_1V0";
+						regulator-name = "VDDIO_SYS_1V0";
 						regulator-min-microvolt = <1000000>;
 						regulator-max-microvolt = <1000000>;
 						regulator-always-on;
@@ -176,7 +177,7 @@
 					};
 
 					vdd_1v8hs: sd1 {
-						regulator-name = "VDD_1V8HS";
+						regulator-name = "VDDIO_SYS_1V8HS";
 						regulator-min-microvolt = <1800000>;
 						regulator-max-microvolt = <1800000>;
 						regulator-always-on;
@@ -184,7 +185,7 @@
 					};
 
 					vdd_1v8ls: sd2 {
-						regulator-name = "VDD_1V8LS";
+						regulator-name = "VDDIO_SYS_1V8LS";
 						regulator-min-microvolt = <1800000>;
 						regulator-max-microvolt = <1800000>;
 						regulator-always-on;
@@ -192,7 +193,7 @@
 					};
 
 					vdd_1v8ao: sd3 {
-						regulator-name = "VDD_1V8AO";
+						regulator-name = "VDDIO_AO_1V8";
 						regulator-min-microvolt = <1800000>;
 						regulator-max-microvolt = <1800000>;
 						regulator-always-on;
@@ -216,7 +217,7 @@
 					};
 
 					ldo2 {
-						regulator-name = "VDD_AO_3V3";
+						regulator-name = "VDDIO_AO_3V3";
 						regulator-min-microvolt = <3300000>;
 						regulator-max-microvolt = <3300000>;
 						regulator-always-on;
@@ -242,7 +243,7 @@
 					};
 
 					ldo7 {
-						regulator-name = "VDD_CSI_1V2";
+						regulator-name = "AVDD_CSI_1V2";
 						regulator-min-microvolt = <1200000>;
 						regulator-max-microvolt = <1200000>;
 					};
@@ -309,9 +310,8 @@
 			regulator-name = "VDD_12V";
 			regulator-min-microvolt = <1200000>;
 			regulator-max-microvolt = <1200000>;
-			gpio = <&gpio TEGRA194_MAIN_GPIO(A, 1) GPIO_ACTIVE_LOW>;
+			gpio = <&gpio TEGRA194_MAIN_GPIO(A, 1) GPIO_ACTIVE_HIGH>;
 			regulator-boot-on;
-			enable-active-low;
 		};
 	};
 };
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts b/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts
index d47cd8c4dd24..353a6a22196d 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts
@@ -10,8 +10,8 @@
 	model = "NVIDIA Jetson AGX Xavier Developer Kit";
 	compatible = "nvidia,p2972-0000", "nvidia,tegra194";
 
-	cbb {
-		aconnect {
+	cbb@0 {
+		aconnect@2900000 {
 			status = "okay";
 
 			dma-controller@2930000 {
@@ -46,10 +46,39 @@
 				status = "okay";
 			};
 
+			dpaux@155c0000 {
+				status = "okay";
+			};
+
+			dpaux@155d0000 {
+				status = "okay";
+			};
+
 			dpaux@155e0000 {
 				status = "okay";
 			};
 
+			/* DP0 */
+			sor@15b00000 {
+				status = "okay";
+
+				avdd-io-hdmi-dp-supply = <&vdd_1v0>;
+				vdd-hdmi-dp-pll-supply = <&vdd_1v8hs>;
+
+				nvidia,dpaux = <&dpaux0>;
+			};
+
+			/* DP1 */
+			sor@15b40000 {
+				status = "okay";
+
+				avdd-io-hdmi-dp-supply = <&vdd_1v0>;
+				vdd-hdmi-dp-pll-supply = <&vdd_1v8hs>;
+
+				nvidia,dpaux = <&dpaux1>;
+			};
+
+			/* HDMI */
 			sor@15b80000 {
 				status = "okay";
 
diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index 3c0cf54f0aab..11220d97adb8 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -15,7 +15,7 @@
 	#size-cells = <2>;
 
 	/* control backbone */
-	cbb {
+	cbb@0 {
 		compatible = "simple-bus";
 		#address-cells = <1>;
 		#size-cells = <1>;
@@ -39,7 +39,8 @@
 		};
 
 		ethernet@2490000 {
-			compatible = "nvidia,tegra186-eqos",
+			compatible = "nvidia,tegra194-eqos",
+				     "nvidia,tegra186-eqos",
 				     "snps,dwc-qos-ethernet-4.10";
 			reg = <0x02490000 0x10000>;
 			interrupts = <GIC_SPI 194 IRQ_TYPE_LEVEL_HIGH>;
@@ -60,7 +61,7 @@
 			snps,rxpbl = <8>;
 		};
 
-		aconnect {
+		aconnect@2900000 {
 			compatible = "nvidia,tegra194-aconnect",
 				     "nvidia,tegra210-aconnect";
 			clocks = <&bpmp TEGRA194_CLK_APE>,
@@ -1078,7 +1079,7 @@
 
 			sor1: sor@15b40000 {
 				compatible = "nvidia,tegra194-sor";
-				reg = <0x155c0000 0x40000>;
+				reg = <0x15b40000 0x40000>;
 				interrupts = <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&bpmp TEGRA194_CLK_SOR1_REF>,
 					 <&bpmp TEGRA194_CLK_SOR1_OUT>,
@@ -1185,7 +1186,6 @@
 
 		nvidia,bpmp = <&bpmp 1>;
 
-		supports-clkreq;
 		nvidia,aspm-cmrt-us = <60>;
 		nvidia,aspm-pwr-on-t-us = <20>;
 		nvidia,aspm-l0s-entrance-latency-us = <3>;
@@ -1231,7 +1231,6 @@
 
 		nvidia,bpmp = <&bpmp 2>;
 
-		supports-clkreq;
 		nvidia,aspm-cmrt-us = <60>;
 		nvidia,aspm-pwr-on-t-us = <20>;
 		nvidia,aspm-l0s-entrance-latency-us = <3>;
@@ -1277,7 +1276,6 @@
 
 		nvidia,bpmp = <&bpmp 3>;
 
-		supports-clkreq;
 		nvidia,aspm-cmrt-us = <60>;
 		nvidia,aspm-pwr-on-t-us = <20>;
 		nvidia,aspm-l0s-entrance-latency-us = <3>;
@@ -1323,7 +1321,6 @@
 
 		nvidia,bpmp = <&bpmp 4>;
 
-		supports-clkreq;
 		nvidia,aspm-cmrt-us = <60>;
 		nvidia,aspm-pwr-on-t-us = <20>;
 		nvidia,aspm-l0s-entrance-latency-us = <3>;
@@ -1369,7 +1366,6 @@
 
 		nvidia,bpmp = <&bpmp 0>;
 
-		supports-clkreq;
 		nvidia,aspm-cmrt-us = <60>;
 		nvidia,aspm-pwr-on-t-us = <20>;
 		nvidia,aspm-l0s-entrance-latency-us = <3>;
@@ -1419,7 +1415,6 @@
 		interrupt-map-mask = <0 0 0 0>;
 		interrupt-map = <0 0 0 0 &gic GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
 
-		supports-clkreq;
 		nvidia,aspm-cmrt-us = <60>;
 		nvidia,aspm-pwr-on-t-us = <20>;
 		nvidia,aspm-l0s-entrance-latency-us = <3>;
@@ -1478,60 +1473,192 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu@0 {
+		cpu0_0: cpu@0 {
 			compatible = "nvidia,tegra194-carmel";
 			device_type = "cpu";
-			reg = <0x10000>;
+			reg = <0x000>;
 			enable-method = "psci";
+			i-cache-size = <131072>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <512>;
+			d-cache-size = <65536>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2c_0>;
 		};
 
-		cpu@1 {
+		cpu0_1: cpu@1 {
 			compatible = "nvidia,tegra194-carmel";
 			device_type = "cpu";
-			reg = <0x10001>;
+			reg = <0x001>;
 			enable-method = "psci";
+			i-cache-size = <131072>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <512>;
+			d-cache-size = <65536>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2c_0>;
 		};
 
-		cpu@2 {
+		cpu1_0: cpu@100 {
 			compatible = "nvidia,tegra194-carmel";
 			device_type = "cpu";
 			reg = <0x100>;
 			enable-method = "psci";
+			i-cache-size = <131072>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <512>;
+			d-cache-size = <65536>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2c_1>;
 		};
 
-		cpu@3 {
+		cpu1_1: cpu@101 {
 			compatible = "nvidia,tegra194-carmel";
 			device_type = "cpu";
 			reg = <0x101>;
 			enable-method = "psci";
+			i-cache-size = <131072>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <512>;
+			d-cache-size = <65536>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2c_1>;
 		};
 
-		cpu@4 {
+		cpu2_0: cpu@200 {
 			compatible = "nvidia,tegra194-carmel";
 			device_type = "cpu";
 			reg = <0x200>;
 			enable-method = "psci";
+			i-cache-size = <131072>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <512>;
+			d-cache-size = <65536>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2c_2>;
 		};
 
-		cpu@5 {
+		cpu2_1: cpu@201 {
 			compatible = "nvidia,tegra194-carmel";
 			device_type = "cpu";
 			reg = <0x201>;
 			enable-method = "psci";
+			i-cache-size = <131072>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <512>;
+			d-cache-size = <65536>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2c_2>;
 		};
 
-		cpu@6 {
+		cpu3_0: cpu@300 {
 			compatible = "nvidia,tegra194-carmel";
 			device_type = "cpu";
-			reg = <0x10300>;
+			reg = <0x300>;
 			enable-method = "psci";
+			i-cache-size = <131072>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <512>;
+			d-cache-size = <65536>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2c_3>;
 		};
 
-		cpu@7 {
+		cpu3_1: cpu@301 {
 			compatible = "nvidia,tegra194-carmel";
 			device_type = "cpu";
-			reg = <0x10301>;
+			reg = <0x301>;
 			enable-method = "psci";
+			i-cache-size = <131072>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <512>;
+			d-cache-size = <65536>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&l2c_3>;
+		};
+
+		cpu-map {
+			cluster0 {
+				core0 {
+					cpu = <&cpu0_0>;
+				};
+
+				core1 {
+					cpu = <&cpu0_1>;
+				};
+			};
+
+			cluster1 {
+				core0 {
+					cpu = <&cpu1_0>;
+				};
+
+				core1 {
+					cpu = <&cpu1_1>;
+				};
+			};
+
+			cluster2 {
+				core0 {
+					cpu = <&cpu2_0>;
+				};
+
+				core1 {
+					cpu = <&cpu2_1>;
+				};
+			};
+
+			cluster3 {
+				core0 {
+					cpu = <&cpu3_0>;
+				};
+
+				core1 {
+					cpu = <&cpu3_1>;
+				};
+			};
+		};
+
+		l2c_0: l2-cache0 {
+			cache-size = <2097152>;
+			cache-line-size = <64>;
+			cache-sets = <2048>;
+			next-level-cache = <&l3c>;
+		};
+
+		l2c_1: l2-cache1 {
+			cache-size = <2097152>;
+			cache-line-size = <64>;
+			cache-sets = <2048>;
+			next-level-cache = <&l3c>;
+		};
+
+		l2c_2: l2-cache2 {
+			cache-size = <2097152>;
+			cache-line-size = <64>;
+			cache-sets = <2048>;
+			next-level-cache = <&l3c>;
+		};
+
+		l2c_3: l2-cache3 {
+			cache-size = <2097152>;
+			cache-line-size = <64>;
+			cache-sets = <2048>;
+			next-level-cache = <&l3c>;
+		};
+
+		l3c: l3-cache {
+			cache-size = <4194304>;
+			cache-line-size = <64>;
+			cache-sets = <4096>;
 		};
 	};
 
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi
index 27723829d033..cb58f79deb48 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi
@@ -279,6 +279,13 @@
 
 	pmc@7000e400 {
 		nvidia,invert-interrupt;
+		nvidia,suspend-mode = <0>;
+		nvidia,cpu-pwr-good-time = <0>;
+		nvidia,cpu-pwr-off-time = <0>;
+		nvidia,core-pwr-good-time = <4587 3876>;
+		nvidia,core-pwr-off-time = <39065>;
+		nvidia,core-power-req-active-high;
+		nvidia,sys-clock-req-active-high;
 	};
 
 	/* eMMC */
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
index a7dc319214a4..b0095072bc28 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
@@ -1612,7 +1612,7 @@
 			regulator-name = "VDD_HDMI_5V0";
 			regulator-min-microvolt = <5000000>;
 			regulator-max-microvolt = <5000000>;
-			gpio = <&exp1 12 GPIO_ACTIVE_LOW>;
+			gpio = <&exp1 12 GPIO_ACTIVE_HIGH>;
 			enable-active-high;
 			vin-supply = <&vdd_5v0_sys>;
 		};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts b/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts
index 9d17ec707bce..90381d52ac54 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts
@@ -64,6 +64,16 @@
 			status = "okay";
 		};
 
+		sor@54540000 {
+			status = "okay";
+
+			avdd-io-hdmi-dp-supply = <&avdd_io_edp_1v05>;
+			vdd-hdmi-dp-pll-supply = <&vdd_1v8>;
+
+			nvidia,xbar-cfg = <2 1 0 3 4>;
+			nvidia,dpaux = <&dpaux>;
+		};
+
 		sor@54580000 {
 			status = "okay";
 
@@ -76,6 +86,10 @@
 					   GPIO_ACTIVE_LOW>;
 			nvidia,xbar-cfg = <0 1 2 3 4>;
 		};
+
+		dpaux@545c0000 {
+			status = "okay";
+		};
 	};
 
 	gpu@57000000 {
@@ -382,6 +396,13 @@
 
 	pmc@7000e400 {
 		nvidia,invert-interrupt;
+		nvidia,suspend-mode = <0>;
+		nvidia,cpu-pwr-good-time = <0>;
+		nvidia,cpu-pwr-off-time = <0>;
+		nvidia,core-pwr-good-time = <4587 3876>;
+		nvidia,core-pwr-off-time = <39065>;
+		nvidia,core-power-req-active-high;
+		nvidia,sys-clock-req-active-high;
 	};
 
 	hda@70030000 {
@@ -680,5 +701,19 @@
 			enable-gpios = <&pmic 6 GPIO_ACTIVE_HIGH>;
 			vin-supply = <&vdd_5v0_sys>;
 		};
+
+		avdd_io_edp_1v05: regulator@7 {
+			compatible = "regulator-fixed";
+			reg = <7>;
+
+			regulator-name = "AVDD_IO_EDP_1V05";
+			regulator-min-microvolt = <1050000>;
+			regulator-max-microvolt = <1050000>;
+
+			gpio = <&pmic 7 GPIO_ACTIVE_HIGH>;
+			enable-active-high;
+
+			vin-supply = <&avdd_1v05_pll>;
+		};
 	};
 };
diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi
index 659753118e96..48c63256ba7f 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra210.dtsi
@@ -254,10 +254,11 @@
 			reg = <0x0 0x54540000 0x0 0x00040000>;
 			interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&tegra_car TEGRA210_CLK_SOR0>,
+				 <&tegra_car TEGRA210_CLK_SOR0_OUT>,
 				 <&tegra_car TEGRA210_CLK_PLL_D_OUT0>,
 				 <&tegra_car TEGRA210_CLK_PLL_DP>,
 				 <&tegra_car TEGRA210_CLK_SOR_SAFE>;
-			clock-names = "sor", "parent", "dp", "safe";
+			clock-names = "sor", "out", "parent", "dp", "safe";
 			resets = <&tegra_car 182>;
 			reset-names = "sor";
 			pinctrl-0 = <&state_dpaux_aux>;
@@ -768,7 +769,8 @@
 	rtc@7000e000 {
 		compatible = "nvidia,tegra210-rtc", "nvidia,tegra20-rtc";
 		reg = <0x0 0x7000e000 0x0 0x100>;
-		interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <16 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-parent = <&pmc>;
 		clocks = <&tegra_car TEGRA210_CLK_RTC>;
 		clock-names = "rtc";
 	};
@@ -778,6 +780,8 @@
 		reg = <0x0 0x7000e400 0x0 0x400>;
 		clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>;
 		clock-names = "pclk", "clk32k_in";
+		#interrupt-cells = <2>;
+		interrupt-controller;
 
 		powergates {
 			pd_audio: aud {
@@ -1438,6 +1442,16 @@
 		};
 	};
 
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&{/cpus/cpu@0} &{/cpus/cpu@1}
+				      &{/cpus/cpu@2} &{/cpus/cpu@3}>;
+	};
+
 	timer {
 		compatible = "arm,armv8-timer";
 		interrupts = <GIC_PPI 13
@@ -1457,7 +1471,9 @@
 		reg = <0x0 0x700e2000 0x0 0x600 /* SOC_THERM reg_base */
 			0x0 0x60006000 0x0 0x400>; /* CAR reg_base */
 		reg-names = "soctherm-reg", "car-reg";
-		interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "thermal", "edp";
 		clocks = <&tegra_car TEGRA210_CLK_TSENSOR>,
 			<&tegra_car TEGRA210_CLK_SOC_THERM>;
 		clock-names = "tsensor", "soctherm";
@@ -1504,6 +1520,7 @@
 				};
 			};
 		};
+
 		mem {
 			polling-delay-passive = <0>;
 			polling-delay = <0>;
@@ -1526,6 +1543,7 @@
 				 */
 			};
 		};
+
 		gpu {
 			polling-delay-passive = <1000>;
 			polling-delay = <0>;
@@ -1554,6 +1572,7 @@
 				};
 			};
 		};
+
 		pllx {
 			polling-delay-passive = <0>;
 			polling-delay = <0>;
diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
index 04ad2fb22b9a..dba3488492f1 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
@@ -623,6 +623,8 @@
 				l21 {
 					regulator-min-microvolt = <2950000>;
 					regulator-max-microvolt = <2950000>;
+					regulator-allow-set-load;
+					regulator-system-load = <200000>;
 				};
 				l22 {
 					regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/qcom/msm8916-longcheer-l8150.dts b/arch/arm64/boot/dts/qcom/msm8916-longcheer-l8150.dts
index 2b28e383fd0b..d1ccb9472c8b 100644
--- a/arch/arm64/boot/dts/qcom/msm8916-longcheer-l8150.dts
+++ b/arch/arm64/boot/dts/qcom/msm8916-longcheer-l8150.dts
@@ -5,6 +5,7 @@
 #include "msm8916.dtsi"
 #include "pm8916.dtsi"
 #include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
 
 / {
 	model = "Longcheer L8150";
@@ -18,6 +19,16 @@
 		stdout-path = "serial0";
 	};
 
+	reserved-memory {
+		// wcnss.mdt is not relocatable, so it must be loaded at 0x8b600000
+		/delete-node/ wcnss@89300000;
+
+		wcnss_mem: wcnss@8b600000 {
+			reg = <0x0 0x8b600000 0x0 0x600000>;
+			no-map;
+		};
+	};
+
 	soc {
 		sdhci@7824000 {
 			status = "okay";
@@ -68,6 +79,10 @@
 			};
 		};
 
+		wcnss@a21b000 {
+			status = "okay";
+		};
+
 		/*
 		 * Attempting to enable these devices causes a "synchronous
 		 * external abort". Suspected cause is that the debug power
@@ -99,9 +114,36 @@
 		pinctrl-names = "default";
 		pinctrl-0 = <&usb_vbus_default>;
 	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&gpio_keys_default>;
+
+		label = "GPIO Buttons";
+
+		volume-up {
+			label = "Volume Up";
+			gpios = <&msmgpio 107 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_VOLUMEUP>;
+		};
+	};
 };
 
 &msmgpio {
+	gpio_keys_default: gpio_keys_default {
+		pinmux {
+			function = "gpio";
+			pins = "gpio107";
+		};
+		pinconf {
+			pins = "gpio107";
+			drive-strength = <2>;
+			bias-pull-up;
+		};
+	};
+
 	usb_vbus_default: usb-vbus-default {
 		pinmux {
 			function = "gpio";
@@ -114,6 +156,19 @@
 	};
 };
 
+&spmi_bus {
+	pm8916@0 {
+		pon@800 {
+			volume-down {
+				compatible = "qcom,pm8941-resin";
+				interrupts = <0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>;
+				bias-pull-up;
+				linux,code = <KEY_VOLUMEDOWN>;
+			};
+		};
+	};
+};
+
 &smd_rpm_regulators {
 	vdd_l1_l2_l3-supply = <&pm8916_s3>;
 	vdd_l4_l5_l6-supply = <&pm8916_s4>;
diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi b/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi
index e675ff48fdd2..bd1eb3eeca53 100644
--- a/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi
@@ -3,6 +3,7 @@
 #include "msm8916.dtsi"
 #include "pm8916.dtsi"
 #include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
 #include <dt-bindings/interrupt-controller/irq.h>
 
 / {
@@ -63,6 +64,10 @@
 			};
 		};
 
+		wcnss@a21b000 {
+			status = "okay";
+		};
+
 		/*
 		 * Attempting to enable these devices causes a "synchronous
 		 * external abort". Suspected cause is that the debug power
@@ -87,6 +92,44 @@
 		etm@85f000 { status = "disabled"; };
 	};
 
+	gpio-keys {
+		compatible = "gpio-keys";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&gpio_keys_default>;
+
+		label = "GPIO Buttons";
+
+		volume-up {
+			label = "Volume Up";
+			gpios = <&msmgpio 107 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_VOLUMEUP>;
+		};
+
+		home {
+			label = "Home";
+			gpios = <&msmgpio 109 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_HOMEPAGE>;
+		};
+	};
+
+	gpio-hall-sensor {
+		compatible = "gpio-keys";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&gpio_hall_sensor_default>;
+
+		label = "GPIO Hall Effect Sensor";
+
+		hall-sensor {
+			label = "Hall Effect Sensor";
+			gpios = <&msmgpio 52 GPIO_ACTIVE_LOW>;
+			linux,input-type = <EV_SW>;
+			linux,code = <SW_LID>;
+			linux,can-disable;
+		};
+	};
+
 	i2c-muic {
 		compatible = "i2c-gpio";
 		sda-gpios = <&msmgpio 105 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
@@ -109,6 +152,30 @@
 };
 
 &msmgpio {
+	gpio_keys_default: gpio_keys_default {
+		pinmux {
+			function = "gpio";
+			pins = "gpio107", "gpio109";
+		};
+		pinconf {
+			pins = "gpio107", "gpio109";
+			drive-strength = <2>;
+			bias-pull-up;
+		};
+	};
+
+	gpio_hall_sensor_default: gpio_hall_sensor_default {
+		pinmux {
+			function = "gpio";
+			pins = "gpio52";
+		};
+		pinconf {
+			pins = "gpio52";
+			drive-strength = <2>;
+			bias-disable;
+		};
+	};
+
 	muic_int_default: muic_int_default {
 		pinmux {
 			function = "gpio";
@@ -234,3 +301,16 @@
 		regulator-max-microvolt = <2700000>;
 	};
 };
+
+&spmi_bus {
+	pm8916@0 {
+		pon@800 {
+			volume-down {
+				compatible = "qcom,pm8941-resin";
+				interrupts = <0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>;
+				bias-pull-up;
+				linux,code = <KEY_VOLUMEDOWN>;
+			};
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-a5u-eur.dts b/arch/arm64/boot/dts/qcom/msm8916-samsung-a5u-eur.dts
index 1aa59da98495..6629a621139c 100644
--- a/arch/arm64/boot/dts/qcom/msm8916-samsung-a5u-eur.dts
+++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-a5u-eur.dts
@@ -8,3 +8,9 @@
 	model = "Samsung Galaxy A5U (EUR)";
 	compatible = "samsung,a5u-eur", "qcom,msm8916";
 };
+
+&pronto {
+	iris {
+		compatible = "qcom,wcn3680";
+	};
+};
diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi
index 5ea9fb8f2f87..8686e101905c 100644
--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -179,7 +179,7 @@
 			polling-delay-passive = <250>;
 			polling-delay = <1000>;
 
-			thermal-sensors = <&tsens 4>;
+			thermal-sensors = <&tsens 5>;
 
 			trips {
 				cpu0_1_alert0: trip-point@0 {
@@ -209,7 +209,7 @@
 			polling-delay-passive = <250>;
 			polling-delay = <1000>;
 
-			thermal-sensors = <&tsens 3>;
+			thermal-sensors = <&tsens 4>;
 
 			trips {
 				cpu2_3_alert0: trip-point@0 {
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 87f4d9c1b0d4..4ca2e7b44559 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -591,6 +591,8 @@
 			reg = <0x4a9000 0x1000>, /* TM */
 			      <0x4a8000 0x1000>; /* SROT */
 			#qcom,sensors = <13>;
+			interrupts = <GIC_SPI 458 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "uplow";
 			#thermal-sensor-cells = <1>;
 		};
 
@@ -599,6 +601,8 @@
 			reg = <0x4ad000 0x1000>, /* TM */
 			      <0x4ac000 0x1000>; /* SROT */
 			#qcom,sensors = <8>;
+			interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "uplow";
 			#thermal-sensor-cells = <1>;
 		};
 
diff --git a/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi b/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi
index 9682d4dd7496..6138b58db6d2 100644
--- a/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi
@@ -23,6 +23,57 @@
 	};
 };
 
+&blsp1_uart3 {
+	status = "okay";
+
+	bluetooth {
+		compatible = "qcom,wcn3990-bt";
+
+		vddio-supply = <&vreg_s4a_1p8>;
+		vddxo-supply = <&vreg_l7a_1p8>;
+		vddrf-supply = <&vreg_l17a_1p3>;
+		vddch0-supply = <&vreg_l25a_3p3>;
+		max-speed = <3200000>;
+	};
+};
+
+/*
+ * The laptop FW does not appear to support the retention state as it is
+ * not advertised as enabled in ACPI, and enabling it in DT can cause boot
+ * hangs.
+ */
+&CPU0 {
+	cpu-idle-states = <&LITTLE_CPU_SLEEP_1>;
+};
+
+&CPU1 {
+	cpu-idle-states = <&LITTLE_CPU_SLEEP_1>;
+};
+
+&CPU2 {
+	cpu-idle-states = <&LITTLE_CPU_SLEEP_1>;
+};
+
+&CPU3 {
+	cpu-idle-states = <&LITTLE_CPU_SLEEP_1>;
+};
+
+&CPU4 {
+	cpu-idle-states = <&BIG_CPU_SLEEP_1>;
+};
+
+&CPU5 {
+	cpu-idle-states = <&BIG_CPU_SLEEP_1>;
+};
+
+&CPU6 {
+	cpu-idle-states = <&BIG_CPU_SLEEP_1>;
+};
+
+&CPU7 {
+	cpu-idle-states = <&BIG_CPU_SLEEP_1>;
+};
+
 &qusb2phy {
 	status = "okay";
 
@@ -104,6 +155,7 @@
 		vreg_l7a_1p8: l7 {
 			regulator-min-microvolt = <1800000>;
 			regulator-max-microvolt = <1800000>;
+			regulator-allow-set-load;
 		};
 		vreg_l8a_1p2: l8 {
 			regulator-min-microvolt = <1200000>;
@@ -144,6 +196,7 @@
 		vreg_l17a_1p3: l17 {
 			regulator-min-microvolt = <1304000>;
 			regulator-max-microvolt = <1304000>;
+			regulator-allow-set-load;
 		};
 		vreg_l18a_2p7: l18 {
 			regulator-min-microvolt = <2704000>;
@@ -179,6 +232,7 @@
 		vreg_l25a_3p3: l25 {
 			regulator-min-microvolt = <3104000>;
 			regulator-max-microvolt = <3312000>;
+			regulator-allow-set-load;
 		};
 		vreg_l26a_1p2: l26 {
 			regulator-min-microvolt = <1200000>;
diff --git a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
index 108667ce4f31..5f101a20a20a 100644
--- a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
@@ -23,10 +23,84 @@
 	};
 };
 
+&blsp1_uart3 {
+	status = "okay";
+
+	bluetooth {
+		compatible = "qcom,wcn3990-bt";
+
+		vddio-supply = <&vreg_s4a_1p8>;
+		vddxo-supply = <&vreg_l7a_1p8>;
+		vddrf-supply = <&vreg_l17a_1p3>;
+		vddch0-supply = <&vreg_l25a_3p3>;
+		max-speed = <3200000>;
+	};
+};
+
 &blsp2_uart1 {
 	status = "okay";
 };
 
+&etf {
+	status = "okay";
+};
+
+&etm1 {
+	status = "okay";
+};
+
+&etm2 {
+	status = "okay";
+};
+
+&etm3 {
+	status = "okay";
+};
+
+&etm4 {
+	status = "okay";
+};
+
+&etm5 {
+	status = "okay";
+};
+
+&etm6 {
+	status = "okay";
+};
+
+&etm7 {
+	status = "okay";
+};
+
+&etm8 {
+	status = "okay";
+};
+
+&etr {
+	status = "okay";
+};
+
+&funnel1 {
+	status = "okay";
+};
+
+&funnel2 {
+	status = "okay";
+};
+
+&funnel3 {
+	status = "okay";
+};
+
+&funnel4 {
+	status = "okay";
+};
+
+&funnel5 {
+	status = "okay";
+};
+
 &pm8005_lsid1 {
 	pm8005-regulators {
 		compatible = "qcom,pm8005-regulators";
@@ -51,6 +125,10 @@
 	vdda-phy-dpdm-supply = <&vreg_l24a_3p075>;
 };
 
+&replicator1 {
+	status = "okay";
+};
+
 &rpm_requests {
 	pm8998-regulators {
 		compatible = "qcom,rpm-pm8998-regulators";
@@ -249,6 +327,10 @@
 	pinctrl-1 = <&sdc2_clk_off &sdc2_cmd_off &sdc2_data_off &sdc2_cd_off>;
 };
 
+&stm {
+	status = "okay";
+};
+
 &ufshc {
 	vcc-supply = <&vreg_l20a_2p95>;
 	vccq-supply = <&vreg_l26a_1p2>;
diff --git a/arch/arm64/boot/dts/qcom/msm8998-pins.dtsi b/arch/arm64/boot/dts/qcom/msm8998-pins.dtsi
index 6db70acd38ee..e32d3ab395ea 100644
--- a/arch/arm64/boot/dts/qcom/msm8998-pins.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998-pins.dtsi
@@ -75,4 +75,17 @@
 			drive-strength = <2>;   /* 2 mA */
 		};
 	};
+
+	blsp1_uart3_on: blsp1_uart3_on {
+		mux {
+			pins = "gpio45", "gpio46", "gpio47", "gpio48";
+			function = "blsp_uart3_a";
+		};
+
+		config {
+			pins = "gpio45", "gpio46", "gpio47", "gpio48";
+			drive-strength = <2>;
+			bias-disable;
+		};
+	};
 };
diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi
index c6f81431983e..fc7838ea9a01 100644
--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi
@@ -816,8 +816,9 @@
 			compatible = "qcom,msm8998-tsens", "qcom,tsens-v2";
 			reg = <0x010ab000 0x1000>, /* TM */
 			      <0x010aa000 0x1000>; /* SROT */
-
 			#qcom,sensors = <14>;
+			interrupts = <GIC_SPI 458 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "uplow";
 			#thermal-sensor-cells = <1>;
 		};
 
@@ -825,8 +826,9 @@
 			compatible = "qcom,msm8998-tsens", "qcom,tsens-v2";
 			reg = <0x010ae000 0x1000>, /* TM */
 			      <0x010ad000 0x1000>; /* SROT */
-
 			#qcom,sensors = <8>;
+			interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "uplow";
 			#thermal-sensor-cells = <1>;
 		};
 
@@ -998,11 +1000,12 @@
 			#interrupt-cells = <0x2>;
 		};
 
-		stm@6002000 {
+		stm: stm@6002000 {
 			compatible = "arm,coresight-stm", "arm,primecell";
 			reg = <0x06002000 0x1000>,
 			      <0x16280000 0x180000>;
 			reg-names = "stm-base", "stm-data-base";
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1016,9 +1019,10 @@
 			};
 		};
 
-		funnel@6041000 {
+		funnel1: funnel@6041000 {
 			compatible = "arm,coresight-dynamic-funnel", "arm,primecell";
 			reg = <0x06041000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1045,9 +1049,10 @@
 			};
 		};
 
-		funnel@6042000 {
+		funnel2: funnel@6042000 {
 			compatible = "arm,coresight-dynamic-funnel", "arm,primecell";
 			reg = <0x06042000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1075,9 +1080,10 @@
 			};
 		};
 
-		funnel@6045000 {
+		funnel3: funnel@6045000 {
 			compatible = "arm,coresight-dynamic-funnel", "arm,primecell";
 			reg = <0x06045000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1113,9 +1119,10 @@
 			};
 		};
 
-		replicator@6046000 {
+		replicator1: replicator@6046000 {
 			compatible = "arm,coresight-dynamic-replicator", "arm,primecell";
 			reg = <0x06046000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1137,9 +1144,10 @@
 			};
 		};
 
-		etf@6047000 {
+		etf: etf@6047000 {
 			compatible = "arm,coresight-tmc", "arm,primecell";
 			reg = <0x06047000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1163,9 +1171,10 @@
 			};
 		};
 
-		etr@6048000 {
+		etr: etr@6048000 {
 			compatible = "arm,coresight-tmc", "arm,primecell";
 			reg = <0x06048000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1181,9 +1190,10 @@
 			};
 		};
 
-		etm@7840000 {
+		etm1: etm@7840000 {
 			compatible = "arm,coresight-etm4x", "arm,primecell";
 			reg = <0x07840000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1200,9 +1210,10 @@
 			};
 		};
 
-		etm@7940000 {
+		etm2: etm@7940000 {
 			compatible = "arm,coresight-etm4x", "arm,primecell";
 			reg = <0x07940000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1219,9 +1230,10 @@
 			};
 		};
 
-		etm@7a40000 {
+		etm3: etm@7a40000 {
 			compatible = "arm,coresight-etm4x", "arm,primecell";
 			reg = <0x07a40000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1238,9 +1250,10 @@
 			};
 		};
 
-		etm@7b40000 {
+		etm4: etm@7b40000 {
 			compatible = "arm,coresight-etm4x", "arm,primecell";
 			reg = <0x07b40000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1257,9 +1270,10 @@
 			};
 		};
 
-		funnel@7b60000 { /* APSS Funnel */
+		funnel4: funnel@7b60000 { /* APSS Funnel */
 			compatible = "arm,coresight-etm4x", "arm,primecell";
 			reg = <0x07b60000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1343,9 +1357,10 @@
 			};
 		};
 
-		funnel@7b70000 {
+		funnel5: funnel@7b70000 {
 			compatible = "arm,coresight-dynamic-funnel", "arm,primecell";
 			reg = <0x07b70000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1369,9 +1384,10 @@
 			};
 		};
 
-		etm@7c40000 {
+		etm5: etm@7c40000 {
 			compatible = "arm,coresight-etm4x", "arm,primecell";
 			reg = <0x07c40000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1385,9 +1401,10 @@
 			};
 		};
 
-		etm@7d40000 {
+		etm6: etm@7d40000 {
 			compatible = "arm,coresight-etm4x", "arm,primecell";
 			reg = <0x07d40000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1401,9 +1418,10 @@
 			};
 		};
 
-		etm@7e40000 {
+		etm7: etm@7e40000 {
 			compatible = "arm,coresight-etm4x", "arm,primecell";
 			reg = <0x07e40000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1417,9 +1435,10 @@
 			};
 		};
 
-		etm@7f40000 {
+		etm8: etm@7f40000 {
 			compatible = "arm,coresight-etm4x", "arm,primecell";
 			reg = <0x07f40000 0x1000>;
+			status = "disabled";
 
 			clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
 			clock-names = "apb_pclk", "atclk";
@@ -1556,6 +1575,33 @@
 			status = "disabled";
 		};
 
+		blsp1_dma: dma@c144000 {
+			compatible = "qcom,bam-v1.7.0";
+			reg = <0x0c144000 0x25000>;
+			interrupts = <GIC_SPI 238 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&gcc GCC_BLSP1_AHB_CLK>;
+			clock-names = "bam_clk";
+			#dma-cells = <1>;
+			qcom,ee = <0>;
+			qcom,controlled-remotely;
+			num-channels = <18>;
+			qcom,num-ees = <4>;
+		};
+
+		blsp1_uart3: serial@c171000 {
+			compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
+			reg = <0x0c171000 0x1000>;
+			interrupts = <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&gcc GCC_BLSP1_UART3_APPS_CLK>,
+				 <&gcc GCC_BLSP1_AHB_CLK>;
+			clock-names = "core", "iface";
+			dmas = <&blsp1_dma 4>, <&blsp1_dma 5>;
+			dma-names = "tx", "rx";
+			pinctrl-names = "default";
+			pinctrl-0 = <&blsp1_uart3_on>;
+			status = "disabled";
+		};
+
 		blsp1_i2c1: i2c@c175000 {
 			compatible = "qcom,i2c-qup-v2.2.1";
 			reg = <0x0c175000 0x600>;
diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi
index a97eeb4569c0..f5f0c4c9cb16 100644
--- a/arch/arm64/boot/dts/qcom/qcs404.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi
@@ -22,6 +22,12 @@
 			#clock-cells = <0>;
 			clock-frequency = <19200000>;
 		};
+
+		sleep_clk: sleep-clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <32768>;
+		};
 	};
 
 	cpus {
@@ -283,6 +289,15 @@
 			clock-names = "core";
 		};
 
+		bimc: interconnect@400000 {
+			reg = <0x00400000 0x80000>;
+			compatible = "qcom,qcs404-bimc";
+			#interconnect-cells = <1>;
+			clock-names = "bus", "bus_a";
+			clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
+				<&rpmcc RPM_SMD_BIMC_A_CLK>;
+		};
+
 		tsens: thermal-sensor@4a9000 {
 			compatible = "qcom,qcs404-tsens", "qcom,tsens-v1";
 			reg = <0x004a9000 0x1000>, /* TM */
@@ -290,9 +305,29 @@
 			nvmem-cells = <&tsens_caldata>;
 			nvmem-cell-names = "calib";
 			#qcom,sensors = <10>;
+			interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "uplow";
 			#thermal-sensor-cells = <1>;
 		};
 
+		pcnoc: interconnect@500000 {
+			reg = <0x00500000 0x15080>;
+			compatible = "qcom,qcs404-pcnoc";
+			#interconnect-cells = <1>;
+			clock-names = "bus", "bus_a";
+			clocks = <&rpmcc RPM_SMD_PNOC_CLK>,
+				<&rpmcc RPM_SMD_PNOC_A_CLK>;
+		};
+
+		snoc: interconnect@580000 {
+			reg = <0x00580000 0x23080>;
+			compatible = "qcom,qcs404-snoc";
+			#interconnect-cells = <1>;
+			clock-names = "bus", "bus_a";
+			clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
+				<&rpmcc RPM_SMD_SNOC_A_CLK>;
+		};
+
 		remoteproc_cdsp: remoteproc@b00000 {
 			compatible = "qcom,qcs404-cdsp-pas";
 			reg = <0x00b00000 0x4040>;
@@ -869,6 +904,12 @@
 			#mbox-cells = <1>;
 		};
 
+		watchdog@b017000 {
+			compatible = "qcom,kpss-wdt";
+			reg = <0x0b017000 0x1000>;
+			clocks = <&sleep_clk>;
+		};
+
 		timer@b120000 {
 			#address-cells = <1>;
 			#size-cells = <1>;
diff --git a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
index 34881c0113cb..9a4ff57fc877 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
@@ -165,6 +165,8 @@
 /delete-node/ &venus_mem;
 /delete-node/ &cdsp_mem;
 /delete-node/ &cdsp_pas;
+/delete-node/ &zap_shader;
+/delete-node/ &gpu_mem;
 
 /* Increase the size from 120 MB to 128 MB */
 &mpss_region {
@@ -701,9 +703,8 @@ ap_ts_i2c: &i2c14 {
 
 &ufs_mem_hc {
 	status = "okay";
-	pinctrl-names = "init", "default";
-	pinctrl-0 = <&ufs_dev_reset_assert>;
-	pinctrl-1 = <&ufs_dev_reset_deassert>;
+
+	reset-gpios = <&tlmm 150 GPIO_ACTIVE_LOW>;
 
 	vcc-supply = <&src_pp2950_l20a>;
 	vcc-max-microamp = <600000>;
@@ -1258,52 +1259,6 @@ ap_ts_i2c: &i2c14 {
 		};
 	};
 
-	ufs_dev_reset_assert: ufs_dev_reset_assert {
-		config {
-			pins = "ufs_reset";
-			bias-pull-down;		/* default: pull down */
-			/*
-			 * UFS_RESET driver strengths are having
-			 * different values/steps compared to typical
-			 * GPIO drive strengths.
-			 *
-			 * Following table clarifies:
-			 *
-			 * HDRV value | UFS_RESET | Typical GPIO
-			 *   (dec)    |   (mA)    |    (mA)
-			 *     0      |   0.8     |    2
-			 *     1      |   1.55    |    4
-			 *     2      |   2.35    |    6
-			 *     3      |   3.1     |    8
-			 *     4      |   3.9     |    10
-			 *     5      |   4.65    |    12
-			 *     6      |   5.4     |    14
-			 *     7      |   6.15    |    16
-			 *
-			 * POR value for UFS_RESET HDRV is 3 which means
-			 * 3.1mA and we want to use that. Hence just
-			 * specify 8mA to "drive-strength" binding and
-			 * that should result into writing 3 to HDRV
-			 * field.
-			 */
-			drive-strength = <8>;	/* default: 3.1 mA */
-			output-low; /* active low reset */
-		};
-	};
-
-	ufs_dev_reset_deassert: ufs_dev_reset_deassert {
-		config {
-			pins = "ufs_reset";
-			bias-pull-down;		/* default: pull down */
-			/*
-			 * default: 3.1 mA
-			 * check comments under ufs_dev_reset_assert
-			 */
-			drive-strength = <8>;
-			output-high; /* active low reset */
-		};
-	};
-
 	ap_suspend_l_assert: ap_suspend_l_assert {
 		config {
 			pins = "gpio126";
diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
index f5a85caff1a3..d100f46791a6 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
+++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
@@ -312,6 +312,18 @@
 			regulator-max-microvolt = <1200000>;
 			regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
 		};
+
+		vreg_lvs1a_1p8: lvs1 {
+			regulator-min-microvolt = <1800000>;
+			regulator-max-microvolt = <1800000>;
+			regulator-always-on;
+		};
+
+		vreg_lvs2a_1p8: lvs2 {
+			regulator-min-microvolt = <1800000>;
+			regulator-max-microvolt = <1800000>;
+			regulator-always-on;
+		};
 	};
 
 	pmi8998-rpmh-regulators {
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index f406a4340b05..ddb1f23c936f 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -2824,7 +2824,7 @@
 
 			qcom,gmu = <&gmu>;
 
-			zap-shader {
+			zap_shader: zap-shader {
 				memory-region = <&gpu_mem>;
 			};
 
@@ -2950,6 +2950,8 @@
 			reg = <0 0x0c263000 0 0x1ff>, /* TM */
 			      <0 0x0c222000 0 0x1ff>; /* SROT */
 			#qcom,sensors = <13>;
+			interrupts = <GIC_SPI 506 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "uplow";
 			#thermal-sensor-cells = <1>;
 		};
 
@@ -2958,6 +2960,8 @@
 			reg = <0 0x0c265000 0 0x1ff>, /* TM */
 			      <0 0x0c223000 0 0x1ff>; /* SROT */
 			#qcom,sensors = <8>;
+			interrupts = <GIC_SPI 507 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "uplow";
 			#thermal-sensor-cells = <1>;
 		};
 
@@ -3084,6 +3088,12 @@
 			status = "disabled";
 		};
 
+		watchdog@17980000 {
+			compatible = "qcom,apss-wdt-sdm845", "qcom,kpss-wdt";
+			reg = <0 0x17980000 0 0x1000>;
+			clocks = <&sleep_clk>;
+		};
+
 		apss_shared: mailbox@17990000 {
 			compatible = "qcom,sdm845-apss-shared";
 			reg = <0 0x17990000 0 0x1000>;
diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
index ded120d3aef5..13dc619687f3 100644
--- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
+++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
@@ -20,6 +20,11 @@
 	};
 };
 
+&adsp_pas {
+	firmware-name = "qcom/LENOVO/81JL/qcadsp850.mbn";
+	status = "okay";
+};
+
 &apps_rsc {
 	pm8998-rpmh-regulators {
 		compatible = "qcom,pm8998-rpmh-regulators";
@@ -229,6 +234,11 @@
 	status = "disabled";
 };
 
+&cdsp_pas {
+	firmware-name = "qcom/LENOVO/81JL/qccdsp850.mbn";
+	status = "okay";
+};
+
 &gcc {
 	protected-clocks = <GCC_QSPI_CORE_CLK>,
 			   <GCC_QSPI_CORE_CLK_SRC>,
@@ -296,6 +306,10 @@
 	};
 };
 
+&mss_pil {
+	firmware-name = "qcom/LENOVO/81JL/qcdsp1v2850.mbn", "qcom/LENOVO/81JL/qcdsp2850.mbn";
+};
+
 &qup_i2c12_default {
 	drive-strength = <2>;
 	bias-disable;
diff --git a/arch/arm64/boot/dts/realtek/Makefile b/arch/arm64/boot/dts/realtek/Makefile
index 90c897ac3f7a..555638ada721 100644
--- a/arch/arm64/boot/dts/realtek/Makefile
+++ b/arch/arm64/boot/dts/realtek/Makefile
@@ -1,4 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0-only
+
+dtb-$(CONFIG_ARCH_REALTEK) += rtd1293-ds418j.dtb
+
 dtb-$(CONFIG_ARCH_REALTEK) += rtd1295-mele-v9.dtb
 dtb-$(CONFIG_ARCH_REALTEK) += rtd1295-probox2-ava.dtb
 dtb-$(CONFIG_ARCH_REALTEK) += rtd1295-zidoo-x9s.dtb
+
+dtb-$(CONFIG_ARCH_REALTEK) += rtd1296-ds418.dtb
diff --git a/arch/arm64/boot/dts/realtek/rtd1293-ds418j.dts b/arch/arm64/boot/dts/realtek/rtd1293-ds418j.dts
new file mode 100644
index 000000000000..b2dd583146b4
--- /dev/null
+++ b/arch/arm64/boot/dts/realtek/rtd1293-ds418j.dts
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * Copyright (c) 2017 Andreas Färber
+ */
+
+/dts-v1/;
+
+#include "rtd1293.dtsi"
+
+/ {
+	compatible = "synology,ds418j", "realtek,rtd1293";
+	model = "Synology DiskStation DS418j";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x40000000>;
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/realtek/rtd1293.dtsi b/arch/arm64/boot/dts/realtek/rtd1293.dtsi
new file mode 100644
index 000000000000..bd4e22723f7b
--- /dev/null
+++ b/arch/arm64/boot/dts/realtek/rtd1293.dtsi
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * Realtek RTD1293 SoC
+ *
+ * Copyright (c) 2017-2019 Andreas Färber
+ */
+
+#include "rtd129x.dtsi"
+
+/ {
+	compatible = "realtek,rtd1293";
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x0 0x0>;
+			next-level-cache = <&l2>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x0 0x1>;
+			next-level-cache = <&l2>;
+		};
+
+		l2: l2-cache {
+			compatible = "cache";
+		};
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13
+			(GIC_CPU_MASK_RAW(0xf) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14
+			(GIC_CPU_MASK_RAW(0xf) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11
+			(GIC_CPU_MASK_RAW(0xf) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10
+			(GIC_CPU_MASK_RAW(0xf) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+};
+
+&arm_pmu {
+	interrupt-affinity = <&cpu0>, <&cpu1>;
+};
diff --git a/arch/arm64/boot/dts/realtek/rtd1295-zidoo-x9s.dts b/arch/arm64/boot/dts/realtek/rtd1295-zidoo-x9s.dts
index da19faab29d5..e98e508b9514 100644
--- a/arch/arm64/boot/dts/realtek/rtd1295-zidoo-x9s.dts
+++ b/arch/arm64/boot/dts/realtek/rtd1295-zidoo-x9s.dts
@@ -1,7 +1,6 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
 /*
  * Copyright (c) 2016-2017 Andreas Färber
- *
- * SPDX-License-Identifier: (GPL-2.0+ OR MIT)
  */
 
 /dts-v1/;
diff --git a/arch/arm64/boot/dts/realtek/rtd1295.dtsi b/arch/arm64/boot/dts/realtek/rtd1295.dtsi
index 41d7858da826..93f0e1d97721 100644
--- a/arch/arm64/boot/dts/realtek/rtd1295.dtsi
+++ b/arch/arm64/boot/dts/realtek/rtd1295.dtsi
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
 /*
  * Realtek RTD1295 SoC
  *
  * Copyright (c) 2016-2017 Andreas Färber
- *
- * SPDX-License-Identifier: (GPL-2.0+ OR MIT)
  */
 
 #include "rtd129x.dtsi"
diff --git a/arch/arm64/boot/dts/realtek/rtd1296-ds418.dts b/arch/arm64/boot/dts/realtek/rtd1296-ds418.dts
new file mode 100644
index 000000000000..5a051a52bf88
--- /dev/null
+++ b/arch/arm64/boot/dts/realtek/rtd1296-ds418.dts
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * Copyright (c) 2017-2019 Andreas Färber
+ */
+
+/dts-v1/;
+
+#include "rtd1296.dtsi"
+
+/ {
+	compatible = "synology,ds418", "realtek,rtd1296";
+	model = "Synology DiskStation DS418";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x80000000>;
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/realtek/rtd1296.dtsi b/arch/arm64/boot/dts/realtek/rtd1296.dtsi
new file mode 100644
index 000000000000..0f9e59cac086
--- /dev/null
+++ b/arch/arm64/boot/dts/realtek/rtd1296.dtsi
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * Realtek RTD1296 SoC
+ *
+ * Copyright (c) 2017-2019 Andreas Färber
+ */
+
+#include "rtd129x.dtsi"
+
+/ {
+	compatible = "realtek,rtd1296";
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x0 0x0>;
+			next-level-cache = <&l2>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x0 0x1>;
+			next-level-cache = <&l2>;
+		};
+
+		cpu2: cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x0 0x2>;
+			next-level-cache = <&l2>;
+		};
+
+		cpu3: cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x0 0x3>;
+			next-level-cache = <&l2>;
+		};
+
+		l2: l2-cache {
+			compatible = "cache";
+		};
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13
+			(GIC_CPU_MASK_RAW(0xf) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14
+			(GIC_CPU_MASK_RAW(0xf) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11
+			(GIC_CPU_MASK_RAW(0xf) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10
+			(GIC_CPU_MASK_RAW(0xf) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+};
+
+&arm_pmu {
+	interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>;
+};
diff --git a/arch/arm64/boot/dts/realtek/rtd129x.dtsi b/arch/arm64/boot/dts/realtek/rtd129x.dtsi
index b9cb92466fc7..4433114476f5 100644
--- a/arch/arm64/boot/dts/realtek/rtd129x.dtsi
+++ b/arch/arm64/boot/dts/realtek/rtd129x.dtsi
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
 /*
  * Realtek RTD1293/RTD1295/RTD1296 SoC
  *
  * Copyright (c) 2016-2017 Andreas Färber
- *
- * SPDX-License-Identifier: (GPL-2.0+ OR MIT)
  */
 
 /memreserve/	0x0000000000000000 0x0000000000030000;
@@ -13,6 +12,7 @@
 /memreserve/	0x0000000001ffe000 0x0000000000004000;
 
 #include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/reset/realtek,rtd1295.h>
 
 / {
 	interrupt-parent = <&gic>;
@@ -24,6 +24,13 @@
 		interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
 	};
 
+	osc27M: osc {
+		compatible = "fixed-clock";
+		clock-frequency = <27000000>;
+		#clock-cells = <0>;
+		clock-output-names = "osc27M";
+	};
+
 	soc {
 		compatible = "simple-bus";
 		#address-cells = <1>;
@@ -31,12 +38,49 @@
 		/* Exclude up to 2 GiB of RAM */
 		ranges = <0x80000000 0x80000000 0x80000000>;
 
+		reset1: reset-controller@98000000 {
+			compatible = "snps,dw-low-reset";
+			reg = <0x98000000 0x4>;
+			#reset-cells = <1>;
+		};
+
+		reset2: reset-controller@98000004 {
+			compatible = "snps,dw-low-reset";
+			reg = <0x98000004 0x4>;
+			#reset-cells = <1>;
+		};
+
+		reset3: reset-controller@98000008 {
+			compatible = "snps,dw-low-reset";
+			reg = <0x98000008 0x4>;
+			#reset-cells = <1>;
+		};
+
+		reset4: reset-controller@98000050 {
+			compatible = "snps,dw-low-reset";
+			reg = <0x98000050 0x4>;
+			#reset-cells = <1>;
+		};
+
+		iso_reset: reset-controller@98007088 {
+			compatible = "snps,dw-low-reset";
+			reg = <0x98007088 0x4>;
+			#reset-cells = <1>;
+		};
+
+		wdt: watchdog@98007680 {
+			compatible = "realtek,rtd1295-watchdog";
+			reg = <0x98007680 0x100>;
+			clocks = <&osc27M>;
+		};
+
 		uart0: serial@98007800 {
 			compatible = "snps,dw-apb-uart";
 			reg = <0x98007800 0x400>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
 			clock-frequency = <27000000>;
+			resets = <&iso_reset RTD1295_ISO_RSTN_UR0>;
 			status = "disabled";
 		};
 
@@ -46,6 +90,7 @@
 			reg-shift = <2>;
 			reg-io-width = <4>;
 			clock-frequency = <432000000>;
+			resets = <&reset2 RTD1295_RSTN_UR1>;
 			status = "disabled";
 		};
 
@@ -55,6 +100,7 @@
 			reg-shift = <2>;
 			reg-io-width = <4>;
 			clock-frequency = <432000000>;
+			resets = <&reset2 RTD1295_RSTN_UR2>;
 			status = "disabled";
 		};
 
diff --git a/arch/arm64/boot/dts/renesas/Makefile b/arch/arm64/boot/dts/renesas/Makefile
index 42b74c283289..8fdbd2267384 100644
--- a/arch/arm64/boot/dts/renesas/Makefile
+++ b/arch/arm64/boot/dts/renesas/Makefile
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_ARCH_R8A774A1) += r8a774a1-hihope-rzg2m.dtb
 dtb-$(CONFIG_ARCH_R8A774A1) += r8a774a1-hihope-rzg2m-ex.dtb
+dtb-$(CONFIG_ARCH_R8A774B1) += r8a774b1-hihope-rzg2n.dtb
+dtb-$(CONFIG_ARCH_R8A774B1) += r8a774b1-hihope-rzg2n-ex.dtb
 dtb-$(CONFIG_ARCH_R8A774C0) += r8a774c0-cat874.dtb r8a774c0-ek874.dtb
 dtb-$(CONFIG_ARCH_R8A7795) += r8a7795-salvator-x.dtb r8a7795-h3ulcb.dtb
 dtb-$(CONFIG_ARCH_R8A7795) += r8a7795-h3ulcb-kf.dtb
@@ -10,6 +12,10 @@ dtb-$(CONFIG_ARCH_R8A7795) += r8a7795-es1-h3ulcb-kf.dtb
 dtb-$(CONFIG_ARCH_R8A7796) += r8a7796-salvator-x.dtb r8a7796-m3ulcb.dtb
 dtb-$(CONFIG_ARCH_R8A7796) += r8a7796-m3ulcb-kf.dtb
 dtb-$(CONFIG_ARCH_R8A7796) += r8a7796-salvator-xs.dtb
+dtb-$(CONFIG_ARCH_R8A77960) += r8a7796-salvator-x.dtb r8a7796-m3ulcb.dtb
+dtb-$(CONFIG_ARCH_R8A77960) += r8a7796-m3ulcb-kf.dtb
+dtb-$(CONFIG_ARCH_R8A77960) += r8a7796-salvator-xs.dtb
+dtb-$(CONFIG_ARCH_R8A77961) += r8a77961-salvator-xs.dtb
 dtb-$(CONFIG_ARCH_R8A77965) += r8a77965-salvator-x.dtb r8a77965-salvator-xs.dtb
 dtb-$(CONFIG_ARCH_R8A77965) += r8a77965-m3nulcb.dtb
 dtb-$(CONFIG_ARCH_R8A77965) += r8a77965-m3nulcb-kf.dtb
diff --git a/arch/arm64/boot/dts/renesas/hihope-common.dtsi b/arch/arm64/boot/dts/renesas/hihope-common.dtsi
index 3e376d29a730..2c942a7eaeeb 100644
--- a/arch/arm64/boot/dts/renesas/hihope-common.dtsi
+++ b/arch/arm64/boot/dts/renesas/hihope-common.dtsi
@@ -86,7 +86,7 @@
 
 		label = "rcar-sound";
 
-		dais = <&rsnd_port0>;
+		dais = <&rsnd_port>;
 	};
 
 	vbus0_usb2: regulator-vbus0-usb2 {
@@ -142,14 +142,6 @@
 };
 
 &du {
-	clocks = <&cpg CPG_MOD 724>,
-		 <&cpg CPG_MOD 723>,
-		 <&cpg CPG_MOD 722>,
-		 <&versaclock5 1>,
-		 <&x302_clk>,
-		 <&versaclock5 2>;
-	clock-names = "du.0", "du.1", "du.2",
-		      "dclkin.0", "dclkin.1", "dclkin.2";
 	status = "okay";
 };
 
@@ -191,7 +183,7 @@
 		port@2 {
 			reg = <2>;
 			dw_hdmi0_snd_in: endpoint {
-				remote-endpoint = <&rsnd_endpoint0>;
+				remote-endpoint = <&rsnd_endpoint>;
 			};
 		};
 	};
@@ -327,17 +319,15 @@
 	/* Single DAI */
 	#sound-dai-cells = <0>;
 
-	ports {
-		rsnd_port0: port@0 {
-			rsnd_endpoint0: endpoint {
-				remote-endpoint = <&dw_hdmi0_snd_in>;
+	rsnd_port: port {
+		rsnd_endpoint: endpoint {
+			remote-endpoint = <&dw_hdmi0_snd_in>;
 
-				dai-format = "i2s";
-				bitclock-master = <&rsnd_endpoint0>;
-				frame-master = <&rsnd_endpoint0>;
+			dai-format = "i2s";
+			bitclock-master = <&rsnd_endpoint>;
+			frame-master = <&rsnd_endpoint>;
 
-				playback = <&ssi2>;
-			};
+			playback = <&ssi2>;
 		};
 	};
 };
diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi
index 4280b190dc68..28fe17e3bc4e 100644
--- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi
+++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi
@@ -13,6 +13,14 @@
 	chosen {
 		bootargs = "ignore_loglevel rw root=/dev/nfs ip=on";
 	};
+
+	backlight {
+		compatible = "pwm-backlight";
+		pwms = <&pwm0 0 50000>;
+
+		brightness-levels = <0 2 8 16 32 64 128 255>;
+		default-brightness-level = <6>;
+	};
 };
 
 &avb {
@@ -43,11 +51,36 @@
 	status = "okay";
 };
 
-&pciec0 {
-	status = "okay";
+&gpio1 {
+	/*
+	 * When GP1_20 is LOW LVDS0 is connected to the LVDS connector
+	 * When GP1_20 is HIGH LVDS0 is connected to the LT8918L
+	 */
+	lvds-connector-en-gpio {
+		gpio-hog;
+		gpios = <20 GPIO_ACTIVE_HIGH>;
+		output-low;
+		line-name = "lvds-connector-en-gpio";
+	};
+};
+
+&lvds0 {
+	/*
+	 * Please include the LVDS panel .dtsi file and uncomment the below line
+	 * to enable LVDS panel connected to RZ/G2[MN] boards.
+	 */
+
+	/* status = "okay"; */
+
+	ports {
+		port@1 {
+			lvds_connector: endpoint {
+			};
+		};
+	};
 };
 
-&pciec1 {
+&pciec0 {
 	status = "okay";
 };
 
@@ -82,4 +115,16 @@
 		groups = "can1_data";
 		function = "can1";
 	};
+
+	pwm0_pins: pwm0 {
+		groups = "pwm0";
+		function = "pwm0";
+	};
+};
+
+&pwm0 {
+	pinctrl-0 = <&pwm0_pins>;
+	pinctrl-names = "default";
+
+	status = "okay";
 };
diff --git a/arch/arm64/boot/dts/renesas/r8a774a1-hihope-rzg2m-ex.dts b/arch/arm64/boot/dts/renesas/r8a774a1-hihope-rzg2m-ex.dts
index 6e33a3b27706..c754fca239d9 100644
--- a/arch/arm64/boot/dts/renesas/r8a774a1-hihope-rzg2m-ex.dts
+++ b/arch/arm64/boot/dts/renesas/r8a774a1-hihope-rzg2m-ex.dts
@@ -13,3 +13,7 @@
 	compatible = "hoperun,hihope-rzg2-ex", "hoperun,hihope-rzg2m",
 		     "renesas,r8a774a1";
 };
+
+&pciec1 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a774a1-hihope-rzg2m.dts b/arch/arm64/boot/dts/renesas/r8a774a1-hihope-rzg2m.dts
index 93ca973c856c..96f2fb080a1a 100644
--- a/arch/arm64/boot/dts/renesas/r8a774a1-hihope-rzg2m.dts
+++ b/arch/arm64/boot/dts/renesas/r8a774a1-hihope-rzg2m.dts
@@ -24,3 +24,14 @@
 		reg = <0x6 0x00000000 0x0 0x80000000>;
 	};
 };
+
+&du {
+	clocks = <&cpg CPG_MOD 724>,
+		 <&cpg CPG_MOD 723>,
+		 <&cpg CPG_MOD 722>,
+		 <&versaclock5 1>,
+		 <&x302_clk>,
+		 <&versaclock5 2>;
+	clock-names = "du.0", "du.1", "du.2",
+		      "dclkin.0", "dclkin.1", "dclkin.2";
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
index 06c7c849c8ab..34a9f472fbb4 100644
--- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
@@ -1726,17 +1726,6 @@
 				      "ssi.1", "ssi.0";
 			status = "disabled";
 
-			ports {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				port@0 {
-					reg = <0>;
-				};
-				port@1 {
-					reg = <1>;
-				};
-			};
-
 			rcar_sound,ctu {
 				ctu00: ctu-0 { };
 				ctu01: ctu-1 { };
@@ -2651,7 +2640,7 @@
 			clock-names = "du.0", "du.1", "du.2";
 			status = "disabled";
 
-			vsps = <&vspd0 &vspd1 &vspd2>;
+			vsps = <&vspd0 0>, <&vspd1 0>, <&vspd2 0>;
 
 			ports {
 				#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774b1-hihope-rzg2n-ex.dts b/arch/arm64/boot/dts/renesas/r8a774b1-hihope-rzg2n-ex.dts
new file mode 100644
index 000000000000..ab47c0bd9c19
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/r8a774b1-hihope-rzg2n-ex.dts
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device Tree Source for the HiHope RZ/G2N sub board
+ *
+ * Copyright (C) 2019 Renesas Electronics Corp.
+ */
+
+#include "r8a774b1-hihope-rzg2n.dts"
+#include "hihope-rzg2-ex.dtsi"
+
+/ {
+	model = "HopeRun HiHope RZ/G2N with sub board";
+	compatible = "hoperun,hihope-rzg2-ex", "hoperun,hihope-rzg2n",
+		     "renesas,r8a774b1";
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a774b1-hihope-rzg2n.dts b/arch/arm64/boot/dts/renesas/r8a774b1-hihope-rzg2n.dts
new file mode 100644
index 000000000000..9910c1aa0a61
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/r8a774b1-hihope-rzg2n.dts
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device Tree Source for the HiHope RZ/G2N main board
+ *
+ * Copyright (C) 2019 Renesas Electronics Corp.
+ */
+
+/dts-v1/;
+#include "r8a774b1.dtsi"
+#include "hihope-common.dtsi"
+
+/ {
+	model = "HopeRun HiHope RZ/G2N main board based on r8a774b1";
+	compatible = "hoperun,hihope-rzg2n", "renesas,r8a774b1";
+
+	memory@48000000 {
+		device_type = "memory";
+		/* first 128MB is reserved for secure area. */
+		reg = <0x0 0x48000000 0x0 0x78000000>;
+	};
+
+	memory@480000000 {
+		device_type = "memory";
+		reg = <0x4 0x80000000 0x0 0x80000000>;
+	};
+};
+
+&du {
+	clocks = <&cpg CPG_MOD 724>,
+		 <&cpg CPG_MOD 723>,
+		 <&cpg CPG_MOD 721>,
+		 <&versaclock5 1>,
+		 <&x302_clk>,
+		 <&versaclock5 2>;
+	clock-names = "du.0", "du.1", "du.3",
+		      "dclkin.0", "dclkin.1", "dclkin.3";
+};
+
+&sdhi3 {
+	mmc-hs400-1_8v;
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
new file mode 100644
index 000000000000..fe78387e4bb8
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
@@ -0,0 +1,2627 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device Tree Source for the r8a774b1 SoC
+ *
+ * Copyright (C) 2019 Renesas Electronics Corp.
+ */
+
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/clock/r8a774b1-cpg-mssr.h>
+#include <dt-bindings/power/r8a774b1-sysc.h>
+
+/ {
+	compatible = "renesas,r8a774b1";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	/*
+	 * The external audio clocks are configured as 0 Hz fixed frequency
+	 * clocks by default.
+	 * Boards that provide audio clocks should override them.
+	 */
+	audio_clk_a: audio_clk_a {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	audio_clk_b: audio_clk_b {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	audio_clk_c: audio_clk_c {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	/* External CAN clock - to be overridden by boards that provide it */
+	can_clk: can {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	cluster0_opp: opp_table0 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp-500000000 {
+			opp-hz = /bits/ 64 <500000000>;
+			opp-microvolt = <830000>;
+			clock-latency-ns = <300000>;
+		};
+		opp-1000000000 {
+			opp-hz = /bits/ 64 <1000000000>;
+			opp-microvolt = <830000>;
+			clock-latency-ns = <300000>;
+		};
+		opp-1500000000 {
+			opp-hz = /bits/ 64 <1500000000>;
+			opp-microvolt = <830000>;
+			clock-latency-ns = <300000>;
+			opp-suspend;
+		};
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		a57_0: cpu@0 {
+			compatible = "arm,cortex-a57";
+			reg = <0x0>;
+			device_type = "cpu";
+			power-domains = <&sysc R8A774B1_PD_CA57_CPU0>;
+			next-level-cache = <&L2_CA57>;
+			enable-method = "psci";
+			#cooling-cells = <2>;
+			dynamic-power-coefficient = <854>;
+			clocks = <&cpg CPG_CORE R8A774B1_CLK_Z>;
+			operating-points-v2 = <&cluster0_opp>;
+		};
+
+		a57_1: cpu@1 {
+			compatible = "arm,cortex-a57";
+			reg = <0x1>;
+			device_type = "cpu";
+			power-domains = <&sysc R8A774B1_PD_CA57_CPU1>;
+			next-level-cache = <&L2_CA57>;
+			enable-method = "psci";
+			clocks = <&cpg CPG_CORE R8A774B1_CLK_Z>;
+			operating-points-v2 = <&cluster0_opp>;
+		};
+
+		L2_CA57: cache-controller-0 {
+			compatible = "cache";
+			power-domains = <&sysc R8A774B1_PD_CA57_SCU>;
+			cache-unified;
+			cache-level = <2>;
+		};
+	};
+
+	extal_clk: extal {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		/* This value must be overridden by the board */
+		clock-frequency = <0>;
+	};
+
+	extalr_clk: extalr {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		/* This value must be overridden by the board */
+		clock-frequency = <0>;
+	};
+
+	/* External PCIe clock - can be overridden by the board */
+	pcie_bus_clk: pcie_bus {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	pmu_a57 {
+		compatible = "arm,cortex-a57-pmu";
+		interrupts-extended = <&gic GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>,
+				      <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&a57_0>, <&a57_1>;
+	};
+
+	psci {
+		compatible = "arm,psci-1.0", "arm,psci-0.2";
+		method = "smc";
+	};
+
+	/* External SCIF clock - to be overridden by boards that provide it */
+	scif_clk: scif {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	soc {
+		compatible = "simple-bus";
+		interrupt-parent = <&gic>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		rwdt: watchdog@e6020000 {
+			compatible = "renesas,r8a774b1-wdt",
+				     "renesas,rcar-gen3-wdt";
+			reg = <0 0xe6020000 0 0x0c>;
+			clocks = <&cpg CPG_MOD 402>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 402>;
+			status = "disabled";
+		};
+
+		gpio0: gpio@e6050000 {
+			compatible = "renesas,gpio-r8a774b1",
+				     "renesas,rcar-gen3-gpio";
+			reg = <0 0xe6050000 0 0x50>;
+			interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			gpio-ranges = <&pfc 0 0 16>;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			clocks = <&cpg CPG_MOD 912>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 912>;
+		};
+
+		gpio1: gpio@e6051000 {
+			compatible = "renesas,gpio-r8a774b1",
+				     "renesas,rcar-gen3-gpio";
+			reg = <0 0xe6051000 0 0x50>;
+			interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			gpio-ranges = <&pfc 0 32 29>;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			clocks = <&cpg CPG_MOD 911>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 911>;
+		};
+
+		gpio2: gpio@e6052000 {
+			compatible = "renesas,gpio-r8a774b1",
+				     "renesas,rcar-gen3-gpio";
+			reg = <0 0xe6052000 0 0x50>;
+			interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			gpio-ranges = <&pfc 0 64 15>;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			clocks = <&cpg CPG_MOD 910>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 910>;
+		};
+
+		gpio3: gpio@e6053000 {
+			compatible = "renesas,gpio-r8a774b1",
+				     "renesas,rcar-gen3-gpio";
+			reg = <0 0xe6053000 0 0x50>;
+			interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			gpio-ranges = <&pfc 0 96 16>;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			clocks = <&cpg CPG_MOD 909>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 909>;
+		};
+
+		gpio4: gpio@e6054000 {
+			compatible = "renesas,gpio-r8a774b1",
+				     "renesas,rcar-gen3-gpio";
+			reg = <0 0xe6054000 0 0x50>;
+			interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			gpio-ranges = <&pfc 0 128 18>;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			clocks = <&cpg CPG_MOD 908>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 908>;
+		};
+
+		gpio5: gpio@e6055000 {
+			compatible = "renesas,gpio-r8a774b1",
+				     "renesas,rcar-gen3-gpio";
+			reg = <0 0xe6055000 0 0x50>;
+			interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			gpio-ranges = <&pfc 0 160 26>;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			clocks = <&cpg CPG_MOD 907>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 907>;
+		};
+
+		gpio6: gpio@e6055400 {
+			compatible = "renesas,gpio-r8a774b1",
+				     "renesas,rcar-gen3-gpio";
+			reg = <0 0xe6055400 0 0x50>;
+			interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			gpio-ranges = <&pfc 0 192 32>;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			clocks = <&cpg CPG_MOD 906>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 906>;
+		};
+
+		gpio7: gpio@e6055800 {
+			compatible = "renesas,gpio-r8a774b1",
+				     "renesas,rcar-gen3-gpio";
+			reg = <0 0xe6055800 0 0x50>;
+			interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			gpio-ranges = <&pfc 0 224 4>;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			clocks = <&cpg CPG_MOD 905>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 905>;
+		};
+
+		pfc: pin-controller@e6060000 {
+			compatible = "renesas,pfc-r8a774b1";
+			reg = <0 0xe6060000 0 0x50c>;
+		};
+
+		cmt0: timer@e60f0000 {
+			compatible = "renesas,r8a774b1-cmt0",
+				     "renesas,rcar-gen3-cmt0";
+			reg = <0 0xe60f0000 0 0x1004>;
+			interrupts = <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 303>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 303>;
+			status = "disabled";
+		};
+
+		cmt1: timer@e6130000 {
+			compatible = "renesas,r8a774b1-cmt1",
+				     "renesas,rcar-gen3-cmt1";
+			reg = <0 0xe6130000 0 0x1004>;
+			interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 302>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 302>;
+			status = "disabled";
+		};
+
+		cmt2: timer@e6140000 {
+			compatible = "renesas,r8a774b1-cmt1",
+				     "renesas,rcar-gen3-cmt1";
+			reg = <0 0xe6140000 0 0x1004>;
+			interrupts = <GIC_SPI 398 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 399 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 400 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 401 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 402 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 403 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 404 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 405 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 301>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 301>;
+			status = "disabled";
+		};
+
+		cmt3: timer@e6148000 {
+			compatible = "renesas,r8a774b1-cmt1",
+				     "renesas,rcar-gen3-cmt1";
+			reg = <0 0xe6148000 0 0x1004>;
+			interrupts = <GIC_SPI 470 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 471 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 472 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 473 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 474 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 476 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 477 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 300>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 300>;
+			status = "disabled";
+		};
+
+		cpg: clock-controller@e6150000 {
+			compatible = "renesas,r8a774b1-cpg-mssr";
+			reg = <0 0xe6150000 0 0x1000>;
+			clocks = <&extal_clk>, <&extalr_clk>;
+			clock-names = "extal", "extalr";
+			#clock-cells = <2>;
+			#power-domain-cells = <0>;
+			#reset-cells = <1>;
+		};
+
+		rst: reset-controller@e6160000 {
+			compatible = "renesas,r8a774b1-rst";
+			reg = <0 0xe6160000 0 0x0200>;
+		};
+
+		sysc: system-controller@e6180000 {
+			compatible = "renesas,r8a774b1-sysc";
+			reg = <0 0xe6180000 0 0x0400>;
+			#power-domain-cells = <1>;
+		};
+
+		tsc: thermal@e6198000 {
+			compatible = "renesas,r8a774b1-thermal";
+			reg = <0 0xe6198000 0 0x100>,
+			      <0 0xe61a0000 0 0x100>,
+			      <0 0xe61a8000 0 0x100>;
+			interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 522>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 522>;
+			#thermal-sensor-cells = <1>;
+		};
+
+		intc_ex: interrupt-controller@e61c0000 {
+			compatible = "renesas,intc-ex-r8a774b1", "renesas,irqc";
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			reg = <0 0xe61c0000 0 0x200>;
+			interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 161 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 407>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 407>;
+		};
+
+		tmu0: timer@e61e0000 {
+			compatible = "renesas,tmu-r8a774b1", "renesas,tmu";
+			reg = <0 0xe61e0000 0 0x30>;
+			interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 125>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 125>;
+			status = "disabled";
+		};
+
+		tmu1: timer@e6fc0000 {
+			compatible = "renesas,tmu-r8a774b1", "renesas,tmu";
+			reg = <0 0xe6fc0000 0 0x30>;
+			interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 124>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 124>;
+			status = "disabled";
+		};
+
+		tmu2: timer@e6fd0000 {
+			compatible = "renesas,tmu-r8a774b1", "renesas,tmu";
+			reg = <0 0xe6fd0000 0 0x30>;
+			interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 123>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 123>;
+			status = "disabled";
+		};
+
+		tmu3: timer@e6fe0000 {
+			compatible = "renesas,tmu-r8a774b1", "renesas,tmu";
+			reg = <0 0xe6fe0000 0 0x30>;
+			interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 122>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 122>;
+			status = "disabled";
+		};
+
+		tmu4: timer@ffc00000 {
+			compatible = "renesas,tmu-r8a774b1", "renesas,tmu";
+			reg = <0 0xffc00000 0 0x30>;
+			interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 121>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 121>;
+			status = "disabled";
+		};
+
+		i2c0: i2c@e6500000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "renesas,i2c-r8a774b1",
+				     "renesas,rcar-gen3-i2c";
+			reg = <0 0xe6500000 0 0x40>;
+			interrupts = <GIC_SPI 287 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 931>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 931>;
+			dmas = <&dmac1 0x91>, <&dmac1 0x90>,
+			       <&dmac2 0x91>, <&dmac2 0x90>;
+			dma-names = "tx", "rx", "tx", "rx";
+			i2c-scl-internal-delay-ns = <110>;
+			status = "disabled";
+		};
+
+		i2c1: i2c@e6508000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "renesas,i2c-r8a774b1",
+				     "renesas,rcar-gen3-i2c";
+			reg = <0 0xe6508000 0 0x40>;
+			interrupts = <GIC_SPI 288 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 930>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 930>;
+			dmas = <&dmac1 0x93>, <&dmac1 0x92>,
+			       <&dmac2 0x93>, <&dmac2 0x92>;
+			dma-names = "tx", "rx", "tx", "rx";
+			i2c-scl-internal-delay-ns = <6>;
+			status = "disabled";
+		};
+
+		i2c2: i2c@e6510000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "renesas,i2c-r8a774b1",
+				     "renesas,rcar-gen3-i2c";
+			reg = <0 0xe6510000 0 0x40>;
+			interrupts = <GIC_SPI 286 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 929>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 929>;
+			dmas = <&dmac1 0x95>, <&dmac1 0x94>,
+			       <&dmac2 0x95>, <&dmac2 0x94>;
+			dma-names = "tx", "rx", "tx", "rx";
+			i2c-scl-internal-delay-ns = <6>;
+			status = "disabled";
+		};
+
+		i2c3: i2c@e66d0000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "renesas,i2c-r8a774b1",
+				     "renesas,rcar-gen3-i2c";
+			reg = <0 0xe66d0000 0 0x40>;
+			interrupts = <GIC_SPI 290 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 928>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 928>;
+			dmas = <&dmac0 0x97>, <&dmac0 0x96>;
+			dma-names = "tx", "rx";
+			i2c-scl-internal-delay-ns = <110>;
+			status = "disabled";
+		};
+
+		i2c4: i2c@e66d8000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "renesas,i2c-r8a774b1",
+				     "renesas,rcar-gen3-i2c";
+			reg = <0 0xe66d8000 0 0x40>;
+			interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 927>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 927>;
+			dmas = <&dmac0 0x99>, <&dmac0 0x98>;
+			dma-names = "tx", "rx";
+			i2c-scl-internal-delay-ns = <110>;
+			status = "disabled";
+		};
+
+		i2c5: i2c@e66e0000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "renesas,i2c-r8a774b1",
+				     "renesas,rcar-gen3-i2c";
+			reg = <0 0xe66e0000 0 0x40>;
+			interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 919>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 919>;
+			dmas = <&dmac0 0x9b>, <&dmac0 0x9a>;
+			dma-names = "tx", "rx";
+			i2c-scl-internal-delay-ns = <110>;
+			status = "disabled";
+		};
+
+		i2c6: i2c@e66e8000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "renesas,i2c-r8a774b1",
+				     "renesas,rcar-gen3-i2c";
+			reg = <0 0xe66e8000 0 0x40>;
+			interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 918>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 918>;
+			dmas = <&dmac0 0x9d>, <&dmac0 0x9c>;
+			dma-names = "tx", "rx";
+			i2c-scl-internal-delay-ns = <6>;
+			status = "disabled";
+		};
+
+		i2c_dvfs: i2c@e60b0000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "renesas,iic-r8a774b1",
+				     "renesas,rcar-gen3-iic",
+				     "renesas,rmobile-iic";
+			reg = <0 0xe60b0000 0 0x425>;
+			interrupts = <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 926>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 926>;
+			dmas = <&dmac0 0x11>, <&dmac0 0x10>;
+			dma-names = "tx", "rx";
+			status = "disabled";
+		};
+
+		hscif0: serial@e6540000 {
+			compatible = "renesas,hscif-r8a774b1",
+				     "renesas,rcar-gen3-hscif",
+				     "renesas,hscif";
+			reg = <0 0xe6540000 0 0x60>;
+			interrupts = <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 520>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			dmas = <&dmac1 0x31>, <&dmac1 0x30>,
+			       <&dmac2 0x31>, <&dmac2 0x30>;
+			dma-names = "tx", "rx", "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 520>;
+			status = "disabled";
+		};
+
+		hscif1: serial@e6550000 {
+			compatible = "renesas,hscif-r8a774b1",
+				     "renesas,rcar-gen3-hscif",
+				     "renesas,hscif";
+			reg = <0 0xe6550000 0 0x60>;
+			interrupts = <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 519>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			dmas = <&dmac1 0x33>, <&dmac1 0x32>,
+			       <&dmac2 0x33>, <&dmac2 0x32>;
+			dma-names = "tx", "rx", "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 519>;
+			status = "disabled";
+		};
+
+		hscif2: serial@e6560000 {
+			compatible = "renesas,hscif-r8a774b1",
+				     "renesas,rcar-gen3-hscif",
+				     "renesas,hscif";
+			reg = <0 0xe6560000 0 0x60>;
+			interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 518>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			dmas = <&dmac1 0x35>, <&dmac1 0x34>,
+			       <&dmac2 0x35>, <&dmac2 0x34>;
+			dma-names = "tx", "rx", "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 518>;
+			status = "disabled";
+		};
+
+		hscif3: serial@e66a0000 {
+			compatible = "renesas,hscif-r8a774b1",
+				     "renesas,rcar-gen3-hscif",
+				     "renesas,hscif";
+			reg = <0 0xe66a0000 0 0x60>;
+			interrupts = <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 517>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			dmas = <&dmac0 0x37>, <&dmac0 0x36>;
+			dma-names = "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 517>;
+			status = "disabled";
+		};
+
+		hscif4: serial@e66b0000 {
+			compatible = "renesas,hscif-r8a774b1",
+				     "renesas,rcar-gen3-hscif",
+				     "renesas,hscif";
+			reg = <0 0xe66b0000 0 0x60>;
+			interrupts = <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 516>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			dmas = <&dmac0 0x39>, <&dmac0 0x38>;
+			dma-names = "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 516>;
+			status = "disabled";
+		};
+
+		hsusb: usb@e6590000 {
+			compatible = "renesas,usbhs-r8a774b1",
+				     "renesas,rcar-gen3-usbhs";
+			reg = <0 0xe6590000 0 0x200>;
+			interrupts = <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 704>, <&cpg CPG_MOD 703>;
+			dmas = <&usb_dmac0 0>, <&usb_dmac0 1>,
+			       <&usb_dmac1 0>, <&usb_dmac1 1>;
+			dma-names = "ch0", "ch1", "ch2", "ch3";
+			renesas,buswait = <11>;
+			phys = <&usb2_phy0 3>;
+			phy-names = "usb";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 704>, <&cpg 703>;
+			status = "disabled";
+		};
+
+		usb_dmac0: dma-controller@e65a0000 {
+			compatible = "renesas,r8a774b1-usb-dmac",
+				     "renesas,usb-dmac";
+			reg = <0 0xe65a0000 0 0x100>;
+			interrupts = <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "ch0", "ch1";
+			clocks = <&cpg CPG_MOD 330>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 330>;
+			#dma-cells = <1>;
+			dma-channels = <2>;
+		};
+
+		usb_dmac1: dma-controller@e65b0000 {
+			compatible = "renesas,r8a774b1-usb-dmac",
+				     "renesas,usb-dmac";
+			reg = <0 0xe65b0000 0 0x100>;
+			interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "ch0", "ch1";
+			clocks = <&cpg CPG_MOD 331>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 331>;
+			#dma-cells = <1>;
+			dma-channels = <2>;
+		};
+
+		usb3_phy0: usb-phy@e65ee000 {
+			compatible = "renesas,r8a774b1-usb3-phy",
+				     "renesas,rcar-gen3-usb3-phy";
+			reg = <0 0xe65ee000 0 0x90>;
+			clocks = <&cpg CPG_MOD 328>, <&usb3s0_clk>,
+				 <&usb_extal_clk>;
+			clock-names = "usb3-if", "usb3s_clk", "usb_extal";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 328>;
+			#phy-cells = <0>;
+			status = "disabled";
+		};
+
+		dmac0: dma-controller@e6700000 {
+			compatible = "renesas,dmac-r8a774b1",
+				     "renesas,rcar-dmac";
+			reg = <0 0xe6700000 0 0x10000>;
+			interrupts = <GIC_SPI 199 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 200 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 201 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 202 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 203 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 204 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 207 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 209 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 210 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 211 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 212 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 213 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 214 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "error",
+					"ch0", "ch1", "ch2", "ch3",
+					"ch4", "ch5", "ch6", "ch7",
+					"ch8", "ch9", "ch10", "ch11",
+					"ch12", "ch13", "ch14", "ch15";
+			clocks = <&cpg CPG_MOD 219>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 219>;
+			#dma-cells = <1>;
+			dma-channels = <16>;
+			iommus = <&ipmmu_ds0 0>, <&ipmmu_ds0 1>,
+			       <&ipmmu_ds0 2>, <&ipmmu_ds0 3>,
+			       <&ipmmu_ds0 4>, <&ipmmu_ds0 5>,
+			       <&ipmmu_ds0 6>, <&ipmmu_ds0 7>,
+			       <&ipmmu_ds0 8>, <&ipmmu_ds0 9>,
+			       <&ipmmu_ds0 10>, <&ipmmu_ds0 11>,
+			       <&ipmmu_ds0 12>, <&ipmmu_ds0 13>,
+			       <&ipmmu_ds0 14>, <&ipmmu_ds0 15>;
+		};
+
+		dmac1: dma-controller@e7300000 {
+			compatible = "renesas,dmac-r8a774b1",
+				     "renesas,rcar-dmac";
+			reg = <0 0xe7300000 0 0x10000>;
+			interrupts = <GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 216 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 219 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 309 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 310 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 311 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 313 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 314 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 315 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 316 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 317 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 318 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 319 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "error",
+					"ch0", "ch1", "ch2", "ch3",
+					"ch4", "ch5", "ch6", "ch7",
+					"ch8", "ch9", "ch10", "ch11",
+					"ch12", "ch13", "ch14", "ch15";
+			clocks = <&cpg CPG_MOD 218>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 218>;
+			#dma-cells = <1>;
+			dma-channels = <16>;
+			iommus = <&ipmmu_ds1 0>, <&ipmmu_ds1 1>,
+			       <&ipmmu_ds1 2>, <&ipmmu_ds1 3>,
+			       <&ipmmu_ds1 4>, <&ipmmu_ds1 5>,
+			       <&ipmmu_ds1 6>, <&ipmmu_ds1 7>,
+			       <&ipmmu_ds1 8>, <&ipmmu_ds1 9>,
+			       <&ipmmu_ds1 10>, <&ipmmu_ds1 11>,
+			       <&ipmmu_ds1 12>, <&ipmmu_ds1 13>,
+			       <&ipmmu_ds1 14>, <&ipmmu_ds1 15>;
+		};
+
+		dmac2: dma-controller@e7310000 {
+			compatible = "renesas,dmac-r8a774b1",
+				     "renesas,rcar-dmac";
+			reg = <0 0xe7310000 0 0x10000>;
+			interrupts = <GIC_SPI 416 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 417 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 418 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 419 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 420 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 422 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 423 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 425 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 426 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 427 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 428 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 429 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 431 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 397 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "error",
+					"ch0", "ch1", "ch2", "ch3",
+					"ch4", "ch5", "ch6", "ch7",
+					"ch8", "ch9", "ch10", "ch11",
+					"ch12", "ch13", "ch14", "ch15";
+			clocks = <&cpg CPG_MOD 217>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 217>;
+			#dma-cells = <1>;
+			dma-channels = <16>;
+			iommus = <&ipmmu_ds1 16>, <&ipmmu_ds1 17>,
+			       <&ipmmu_ds1 18>, <&ipmmu_ds1 19>,
+			       <&ipmmu_ds1 20>, <&ipmmu_ds1 21>,
+			       <&ipmmu_ds1 22>, <&ipmmu_ds1 23>,
+			       <&ipmmu_ds1 24>, <&ipmmu_ds1 25>,
+			       <&ipmmu_ds1 26>, <&ipmmu_ds1 27>,
+			       <&ipmmu_ds1 28>, <&ipmmu_ds1 29>,
+			       <&ipmmu_ds1 30>, <&ipmmu_ds1 31>;
+		};
+
+		ipmmu_ds0: mmu@e6740000 {
+			compatible = "renesas,ipmmu-r8a774b1";
+			reg = <0 0xe6740000 0 0x1000>;
+			renesas,ipmmu-main = <&ipmmu_mm 0>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			#iommu-cells = <1>;
+		};
+
+		ipmmu_ds1: mmu@e7740000 {
+			compatible = "renesas,ipmmu-r8a774b1";
+			reg = <0 0xe7740000 0 0x1000>;
+			renesas,ipmmu-main = <&ipmmu_mm 1>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			#iommu-cells = <1>;
+		};
+
+		ipmmu_hc: mmu@e6570000 {
+			compatible = "renesas,ipmmu-r8a774b1";
+			reg = <0 0xe6570000 0 0x1000>;
+			renesas,ipmmu-main = <&ipmmu_mm 2>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			#iommu-cells = <1>;
+		};
+
+		ipmmu_mm: mmu@e67b0000 {
+			compatible = "renesas,ipmmu-r8a774b1";
+			reg = <0 0xe67b0000 0 0x1000>;
+			interrupts = <GIC_SPI 196 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 197 IRQ_TYPE_LEVEL_HIGH>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			#iommu-cells = <1>;
+		};
+
+		ipmmu_mp: mmu@ec670000 {
+			compatible = "renesas,ipmmu-r8a774b1";
+			reg = <0 0xec670000 0 0x1000>;
+			renesas,ipmmu-main = <&ipmmu_mm 4>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			#iommu-cells = <1>;
+		};
+
+		ipmmu_pv0: mmu@fd800000 {
+			compatible = "renesas,ipmmu-r8a774b1";
+			reg = <0 0xfd800000 0 0x1000>;
+			renesas,ipmmu-main = <&ipmmu_mm 6>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			#iommu-cells = <1>;
+		};
+
+		ipmmu_vc0: mmu@fe6b0000 {
+			compatible = "renesas,ipmmu-r8a774b1";
+			reg = <0 0xfe6b0000 0 0x1000>;
+			renesas,ipmmu-main = <&ipmmu_mm 12>;
+			power-domains = <&sysc R8A774B1_PD_A3VC>;
+			#iommu-cells = <1>;
+		};
+
+		ipmmu_vi0: mmu@febd0000 {
+			compatible = "renesas,ipmmu-r8a774b1";
+			reg = <0 0xfebd0000 0 0x1000>;
+			renesas,ipmmu-main = <&ipmmu_mm 14>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			#iommu-cells = <1>;
+		};
+
+		ipmmu_vp0: mmu@fe990000 {
+			compatible = "renesas,ipmmu-r8a774b1";
+			reg = <0 0xfe990000 0 0x1000>;
+			renesas,ipmmu-main = <&ipmmu_mm 16>;
+			power-domains = <&sysc R8A774B1_PD_A3VP>;
+			#iommu-cells = <1>;
+		};
+
+		avb: ethernet@e6800000 {
+			compatible = "renesas,etheravb-r8a774b1",
+				     "renesas,etheravb-rcar-gen3";
+			reg = <0 0xe6800000 0 0x800>;
+			interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 60 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "ch0", "ch1", "ch2", "ch3",
+					  "ch4", "ch5", "ch6", "ch7",
+					  "ch8", "ch9", "ch10", "ch11",
+					  "ch12", "ch13", "ch14", "ch15",
+					  "ch16", "ch17", "ch18", "ch19",
+					  "ch20", "ch21", "ch22", "ch23",
+					  "ch24";
+			clocks = <&cpg CPG_MOD 812>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 812>;
+			phy-mode = "rgmii";
+			iommus = <&ipmmu_ds0 16>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		can0: can@e6c30000 {
+			compatible = "renesas,can-r8a774b1",
+				     "renesas,rcar-gen3-can";
+			reg = <0 0xe6c30000 0 0x1000>;
+			interrupts = <GIC_SPI 186 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 916>,
+				 <&cpg CPG_CORE R8A774B1_CLK_CANFD>,
+				 <&can_clk>;
+			clock-names = "clkp1", "clkp2", "can_clk";
+			assigned-clocks = <&cpg CPG_CORE R8A774B1_CLK_CANFD>;
+			assigned-clock-rates = <40000000>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 916>;
+			status = "disabled";
+		};
+
+		can1: can@e6c38000 {
+			compatible = "renesas,can-r8a774b1",
+				     "renesas,rcar-gen3-can";
+			reg = <0 0xe6c38000 0 0x1000>;
+			interrupts = <GIC_SPI 187 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 915>,
+				 <&cpg CPG_CORE R8A774B1_CLK_CANFD>,
+				 <&can_clk>;
+			clock-names = "clkp1", "clkp2", "can_clk";
+			assigned-clocks = <&cpg CPG_CORE R8A774B1_CLK_CANFD>;
+			assigned-clock-rates = <40000000>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 915>;
+			status = "disabled";
+		};
+
+		canfd: can@e66c0000 {
+			compatible = "renesas,r8a774b1-canfd",
+				     "renesas,rcar-gen3-canfd";
+			reg = <0 0xe66c0000 0 0x8000>;
+			interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
+				   <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 914>,
+				 <&cpg CPG_CORE R8A774B1_CLK_CANFD>,
+				 <&can_clk>;
+			clock-names = "fck", "canfd", "can_clk";
+			assigned-clocks = <&cpg CPG_CORE R8A774B1_CLK_CANFD>;
+			assigned-clock-rates = <40000000>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 914>;
+			status = "disabled";
+
+			channel0 {
+				status = "disabled";
+			};
+
+			channel1 {
+				status = "disabled";
+			};
+		};
+
+		pwm0: pwm@e6e30000 {
+			compatible = "renesas,pwm-r8a774b1", "renesas,pwm-rcar";
+			reg = <0 0xe6e30000 0 0x8>;
+			#pwm-cells = <2>;
+			clocks = <&cpg CPG_MOD 523>;
+			resets = <&cpg 523>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			status = "disabled";
+		};
+
+		pwm1: pwm@e6e31000 {
+			compatible = "renesas,pwm-r8a774b1", "renesas,pwm-rcar";
+			reg = <0 0xe6e31000 0 0x8>;
+			#pwm-cells = <2>;
+			clocks = <&cpg CPG_MOD 523>;
+			resets = <&cpg 523>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			status = "disabled";
+		};
+
+		pwm2: pwm@e6e32000 {
+			compatible = "renesas,pwm-r8a774b1", "renesas,pwm-rcar";
+			reg = <0 0xe6e32000 0 0x8>;
+			#pwm-cells = <2>;
+			clocks = <&cpg CPG_MOD 523>;
+			resets = <&cpg 523>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			status = "disabled";
+		};
+
+		pwm3: pwm@e6e33000 {
+			compatible = "renesas,pwm-r8a774b1", "renesas,pwm-rcar";
+			reg = <0 0xe6e33000 0 0x8>;
+			#pwm-cells = <2>;
+			clocks = <&cpg CPG_MOD 523>;
+			resets = <&cpg 523>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			status = "disabled";
+		};
+
+		pwm4: pwm@e6e34000 {
+			compatible = "renesas,pwm-r8a774b1", "renesas,pwm-rcar";
+			reg = <0 0xe6e34000 0 0x8>;
+			#pwm-cells = <2>;
+			clocks = <&cpg CPG_MOD 523>;
+			resets = <&cpg 523>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			status = "disabled";
+		};
+
+		pwm5: pwm@e6e35000 {
+			compatible = "renesas,pwm-r8a774b1", "renesas,pwm-rcar";
+			reg = <0 0xe6e35000 0 0x8>;
+			#pwm-cells = <2>;
+			clocks = <&cpg CPG_MOD 523>;
+			resets = <&cpg 523>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			status = "disabled";
+		};
+
+		pwm6: pwm@e6e36000 {
+			compatible = "renesas,pwm-r8a774b1", "renesas,pwm-rcar";
+			reg = <0 0xe6e36000 0 0x8>;
+			#pwm-cells = <2>;
+			clocks = <&cpg CPG_MOD 523>;
+			resets = <&cpg 523>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			status = "disabled";
+		};
+
+		scif0: serial@e6e60000 {
+			compatible = "renesas,scif-r8a774b1",
+				     "renesas,rcar-gen3-scif", "renesas,scif";
+			reg = <0 0xe6e60000 0 0x40>;
+			interrupts = <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 207>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			dmas = <&dmac1 0x51>, <&dmac1 0x50>,
+			       <&dmac2 0x51>, <&dmac2 0x50>;
+			dma-names = "tx", "rx", "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 207>;
+			status = "disabled";
+		};
+
+		scif1: serial@e6e68000 {
+			compatible = "renesas,scif-r8a774b1",
+				     "renesas,rcar-gen3-scif", "renesas,scif";
+			reg = <0 0xe6e68000 0 0x40>;
+			interrupts = <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 206>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			dmas = <&dmac1 0x53>, <&dmac1 0x52>,
+			       <&dmac2 0x53>, <&dmac2 0x52>;
+			dma-names = "tx", "rx", "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 206>;
+			status = "disabled";
+		};
+
+		scif2: serial@e6e88000 {
+			compatible = "renesas,scif-r8a774b1",
+				     "renesas,rcar-gen3-scif", "renesas,scif";
+			reg = <0 0xe6e88000 0 0x40>;
+			interrupts = <GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 310>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			dmas = <&dmac1 0x13>, <&dmac1 0x12>,
+			       <&dmac2 0x13>, <&dmac2 0x12>;
+			dma-names = "tx", "rx", "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 310>;
+			status = "disabled";
+		};
+
+		scif3: serial@e6c50000 {
+			compatible = "renesas,scif-r8a774b1",
+				     "renesas,rcar-gen3-scif", "renesas,scif";
+			reg = <0 0xe6c50000 0 0x40>;
+			interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 204>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			dmas = <&dmac0 0x57>, <&dmac0 0x56>;
+			dma-names = "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 204>;
+			status = "disabled";
+		};
+
+		scif4: serial@e6c40000 {
+			compatible = "renesas,scif-r8a774b1",
+				     "renesas,rcar-gen3-scif", "renesas,scif";
+			reg = <0 0xe6c40000 0 0x40>;
+			interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 203>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			dmas = <&dmac0 0x59>, <&dmac0 0x58>;
+			dma-names = "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 203>;
+			status = "disabled";
+		};
+
+		scif5: serial@e6f30000 {
+			compatible = "renesas,scif-r8a774b1",
+				     "renesas,rcar-gen3-scif", "renesas,scif";
+			reg = <0 0xe6f30000 0 0x40>;
+			interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 202>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			dmas = <&dmac1 0x5b>, <&dmac1 0x5a>,
+			       <&dmac2 0x5b>, <&dmac2 0x5a>;
+			dma-names = "tx", "rx", "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 202>;
+			status = "disabled";
+		};
+
+		msiof0: spi@e6e90000 {
+			compatible = "renesas,msiof-r8a774b1",
+				     "renesas,rcar-gen3-msiof";
+			reg = <0 0xe6e90000 0 0x0064>;
+			interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 211>;
+			dmas = <&dmac1 0x41>, <&dmac1 0x40>,
+			       <&dmac2 0x41>, <&dmac2 0x40>;
+			dma-names = "tx", "rx", "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 211>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		msiof1: spi@e6ea0000 {
+			compatible = "renesas,msiof-r8a774b1",
+				     "renesas,rcar-gen3-msiof";
+			reg = <0 0xe6ea0000 0 0x0064>;
+			interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 210>;
+			dmas = <&dmac1 0x43>, <&dmac1 0x42>,
+			       <&dmac2 0x43>, <&dmac2 0x42>;
+			dma-names = "tx", "rx", "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 210>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		msiof2: spi@e6c00000 {
+			compatible = "renesas,msiof-r8a774b1",
+				     "renesas,rcar-gen3-msiof";
+			reg = <0 0xe6c00000 0 0x0064>;
+			interrupts = <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 209>;
+			dmas = <&dmac0 0x45>, <&dmac0 0x44>;
+			dma-names = "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 209>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		msiof3: spi@e6c10000 {
+			compatible = "renesas,msiof-r8a774b1",
+				     "renesas,rcar-gen3-msiof";
+			reg = <0 0xe6c10000 0 0x0064>;
+			interrupts = <GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 208>;
+			dmas = <&dmac0 0x47>, <&dmac0 0x46>;
+			dma-names = "tx", "rx";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 208>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		vin0: video@e6ef0000 {
+			compatible = "renesas,vin-r8a774b1";
+			reg = <0 0xe6ef0000 0 0x1000>;
+			interrupts = <GIC_SPI 188 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 811>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 811>;
+			renesas,id = <0>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					reg = <1>;
+
+					vin0csi20: endpoint@0 {
+						reg = <0>;
+						remote-endpoint = <&csi20vin0>;
+					};
+					vin0csi40: endpoint@2 {
+						reg = <2>;
+						remote-endpoint = <&csi40vin0>;
+					};
+				};
+			};
+		};
+
+		vin1: video@e6ef1000 {
+			compatible = "renesas,vin-r8a774b1";
+			reg = <0 0xe6ef1000 0 0x1000>;
+			interrupts = <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 810>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 810>;
+			renesas,id = <1>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					reg = <1>;
+
+					vin1csi20: endpoint@0 {
+						reg = <0>;
+						remote-endpoint = <&csi20vin1>;
+					};
+					vin1csi40: endpoint@2 {
+						reg = <2>;
+						remote-endpoint = <&csi40vin1>;
+					};
+				};
+			};
+		};
+
+		vin2: video@e6ef2000 {
+			compatible = "renesas,vin-r8a774b1";
+			reg = <0 0xe6ef2000 0 0x1000>;
+			interrupts = <GIC_SPI 190 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 809>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 809>;
+			renesas,id = <2>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					reg = <1>;
+
+					vin2csi20: endpoint@0 {
+						reg = <0>;
+						remote-endpoint = <&csi20vin2>;
+					};
+					vin2csi40: endpoint@2 {
+						reg = <2>;
+						remote-endpoint = <&csi40vin2>;
+					};
+				};
+			};
+		};
+
+		vin3: video@e6ef3000 {
+			compatible = "renesas,vin-r8a774b1";
+			reg = <0 0xe6ef3000 0 0x1000>;
+			interrupts = <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 808>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 808>;
+			renesas,id = <3>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					reg = <1>;
+
+					vin3csi20: endpoint@0 {
+						reg = <0>;
+						remote-endpoint = <&csi20vin3>;
+					};
+					vin3csi40: endpoint@2 {
+						reg = <2>;
+						remote-endpoint = <&csi40vin3>;
+					};
+				};
+			};
+		};
+
+		vin4: video@e6ef4000 {
+			compatible = "renesas,vin-r8a774b1";
+			reg = <0 0xe6ef4000 0 0x1000>;
+			interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 807>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 807>;
+			renesas,id = <4>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					reg = <1>;
+
+					vin4csi20: endpoint@0 {
+						reg = <0>;
+						remote-endpoint = <&csi20vin4>;
+					};
+					vin4csi40: endpoint@2 {
+						reg = <2>;
+						remote-endpoint = <&csi40vin4>;
+					};
+				};
+			};
+		};
+
+		vin5: video@e6ef5000 {
+			compatible = "renesas,vin-r8a774b1";
+			reg = <0 0xe6ef5000 0 0x1000>;
+			interrupts = <GIC_SPI 175 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 806>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 806>;
+			renesas,id = <5>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					reg = <1>;
+
+					vin5csi20: endpoint@0 {
+						reg = <0>;
+						remote-endpoint = <&csi20vin5>;
+					};
+					vin5csi40: endpoint@2 {
+						reg = <2>;
+						remote-endpoint = <&csi40vin5>;
+					};
+				};
+			};
+		};
+
+		vin6: video@e6ef6000 {
+			compatible = "renesas,vin-r8a774b1";
+			reg = <0 0xe6ef6000 0 0x1000>;
+			interrupts = <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 805>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 805>;
+			renesas,id = <6>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					reg = <1>;
+
+					vin6csi20: endpoint@0 {
+						reg = <0>;
+						remote-endpoint = <&csi20vin6>;
+					};
+					vin6csi40: endpoint@2 {
+						reg = <2>;
+						remote-endpoint = <&csi40vin6>;
+					};
+				};
+			};
+		};
+
+		vin7: video@e6ef7000 {
+			compatible = "renesas,vin-r8a774b1";
+			reg = <0 0xe6ef7000 0 0x1000>;
+			interrupts = <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 804>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 804>;
+			renesas,id = <7>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					reg = <1>;
+
+					vin7csi20: endpoint@0 {
+						reg = <0>;
+						remote-endpoint = <&csi20vin7>;
+					};
+					vin7csi40: endpoint@2 {
+						reg = <2>;
+						remote-endpoint = <&csi40vin7>;
+					};
+				};
+			};
+		};
+
+		rcar_sound: sound@ec500000 {
+			/*
+			 * #sound-dai-cells is required
+			 *
+			 * Single DAI : #sound-dai-cells = <0>;	<&rcar_sound>;
+			 * Multi  DAI : #sound-dai-cells = <1>;	<&rcar_sound N>;
+			 */
+			/*
+			 * #clock-cells is required for audio_clkout0/1/2/3
+			 *
+			 * clkout	: #clock-cells = <0>;	<&rcar_sound>;
+			 * clkout0/1/2/3: #clock-cells = <1>;	<&rcar_sound N>;
+			 */
+			compatible =  "renesas,rcar_sound-r8a774b1", "renesas,rcar_sound-gen3";
+			reg = <0 0xec500000 0 0x1000>, /* SCU */
+			      <0 0xec5a0000 0 0x100>,  /* ADG */
+			      <0 0xec540000 0 0x1000>, /* SSIU */
+			      <0 0xec541000 0 0x280>,  /* SSI */
+			      <0 0xec760000 0 0x200>;  /* Audio DMAC peri peri*/
+			reg-names = "scu", "adg", "ssiu", "ssi", "audmapp";
+
+			clocks = <&cpg CPG_MOD 1005>,
+				 <&cpg CPG_MOD 1006>, <&cpg CPG_MOD 1007>,
+				 <&cpg CPG_MOD 1008>, <&cpg CPG_MOD 1009>,
+				 <&cpg CPG_MOD 1010>, <&cpg CPG_MOD 1011>,
+				 <&cpg CPG_MOD 1012>, <&cpg CPG_MOD 1013>,
+				 <&cpg CPG_MOD 1014>, <&cpg CPG_MOD 1015>,
+				 <&cpg CPG_MOD 1022>, <&cpg CPG_MOD 1023>,
+				 <&cpg CPG_MOD 1024>, <&cpg CPG_MOD 1025>,
+				 <&cpg CPG_MOD 1026>, <&cpg CPG_MOD 1027>,
+				 <&cpg CPG_MOD 1028>, <&cpg CPG_MOD 1029>,
+				 <&cpg CPG_MOD 1030>, <&cpg CPG_MOD 1031>,
+				 <&cpg CPG_MOD 1020>, <&cpg CPG_MOD 1021>,
+				 <&cpg CPG_MOD 1020>, <&cpg CPG_MOD 1021>,
+				 <&cpg CPG_MOD 1019>, <&cpg CPG_MOD 1018>,
+				 <&audio_clk_a>, <&audio_clk_b>,
+				 <&audio_clk_c>,
+				 <&cpg CPG_CORE R8A774B1_CLK_S0D4>;
+			clock-names = "ssi-all",
+				      "ssi.9", "ssi.8", "ssi.7", "ssi.6",
+				      "ssi.5", "ssi.4", "ssi.3", "ssi.2",
+				      "ssi.1", "ssi.0",
+				      "src.9", "src.8", "src.7", "src.6",
+				      "src.5", "src.4", "src.3", "src.2",
+				      "src.1", "src.0",
+				      "mix.1", "mix.0",
+				      "ctu.1", "ctu.0",
+				      "dvc.0", "dvc.1",
+				      "clk_a", "clk_b", "clk_c", "clk_i";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 1005>,
+				 <&cpg 1006>, <&cpg 1007>,
+				 <&cpg 1008>, <&cpg 1009>,
+				 <&cpg 1010>, <&cpg 1011>,
+				 <&cpg 1012>, <&cpg 1013>,
+				 <&cpg 1014>, <&cpg 1015>;
+			reset-names = "ssi-all",
+				      "ssi.9", "ssi.8", "ssi.7", "ssi.6",
+				      "ssi.5", "ssi.4", "ssi.3", "ssi.2",
+				      "ssi.1", "ssi.0";
+			status = "disabled";
+
+			rcar_sound,ctu {
+				ctu00: ctu-0 { };
+				ctu01: ctu-1 { };
+				ctu02: ctu-2 { };
+				ctu03: ctu-3 { };
+				ctu10: ctu-4 { };
+				ctu11: ctu-5 { };
+				ctu12: ctu-6 { };
+				ctu13: ctu-7 { };
+			};
+
+			rcar_sound,dvc {
+				dvc0: dvc-0 {
+					dmas = <&audma1 0xbc>;
+					dma-names = "tx";
+				};
+				dvc1: dvc-1 {
+					dmas = <&audma1 0xbe>;
+					dma-names = "tx";
+				};
+			};
+
+			rcar_sound,mix {
+				mix0: mix-0 { };
+				mix1: mix-1 { };
+			};
+
+			rcar_sound,src {
+				src0: src-0 {
+					interrupts = <GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x85>, <&audma1 0x9a>;
+					dma-names = "rx", "tx";
+				};
+				src1: src-1 {
+					interrupts = <GIC_SPI 353 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x87>, <&audma1 0x9c>;
+					dma-names = "rx", "tx";
+				};
+				src2: src-2 {
+					interrupts = <GIC_SPI 354 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x89>, <&audma1 0x9e>;
+					dma-names = "rx", "tx";
+				};
+				src3: src-3 {
+					interrupts = <GIC_SPI 355 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x8b>, <&audma1 0xa0>;
+					dma-names = "rx", "tx";
+				};
+				src4: src-4 {
+					interrupts = <GIC_SPI 356 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x8d>, <&audma1 0xb0>;
+					dma-names = "rx", "tx";
+				};
+				src5: src-5 {
+					interrupts = <GIC_SPI 357 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x8f>, <&audma1 0xb2>;
+					dma-names = "rx", "tx";
+				};
+				src6: src-6 {
+					interrupts = <GIC_SPI 358 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x91>, <&audma1 0xb4>;
+					dma-names = "rx", "tx";
+				};
+				src7: src-7 {
+					interrupts = <GIC_SPI 359 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x93>, <&audma1 0xb6>;
+					dma-names = "rx", "tx";
+				};
+				src8: src-8 {
+					interrupts = <GIC_SPI 360 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x95>, <&audma1 0xb8>;
+					dma-names = "rx", "tx";
+				};
+				src9: src-9 {
+					interrupts = <GIC_SPI 361 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x97>, <&audma1 0xba>;
+					dma-names = "rx", "tx";
+				};
+			};
+
+			rcar_sound,ssi {
+				ssi0: ssi-0 {
+					interrupts = <GIC_SPI 370 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x01>, <&audma1 0x02>;
+					dma-names = "rx", "tx";
+				};
+				ssi1: ssi-1 {
+					interrupts = <GIC_SPI 371 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x03>, <&audma1 0x04>;
+					dma-names = "rx", "tx";
+				};
+				ssi2: ssi-2 {
+					interrupts = <GIC_SPI 372 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x05>, <&audma1 0x06>;
+					dma-names = "rx", "tx";
+				};
+				ssi3: ssi-3 {
+					interrupts = <GIC_SPI 373 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x07>, <&audma1 0x08>;
+					dma-names = "rx", "tx";
+				};
+				ssi4: ssi-4 {
+					interrupts = <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x09>, <&audma1 0x0a>;
+					dma-names = "rx", "tx";
+				};
+				ssi5: ssi-5 {
+					interrupts = <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x0b>, <&audma1 0x0c>;
+					dma-names = "rx", "tx";
+				};
+				ssi6: ssi-6 {
+					interrupts = <GIC_SPI 376 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x0d>, <&audma1 0x0e>;
+					dma-names = "rx", "tx";
+				};
+				ssi7: ssi-7 {
+					interrupts = <GIC_SPI 377 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x0f>, <&audma1 0x10>;
+					dma-names = "rx", "tx";
+				};
+				ssi8: ssi-8 {
+					interrupts = <GIC_SPI 378 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x11>, <&audma1 0x12>;
+					dma-names = "rx", "tx";
+				};
+				ssi9: ssi-9 {
+					interrupts = <GIC_SPI 379 IRQ_TYPE_LEVEL_HIGH>;
+					dmas = <&audma0 0x13>, <&audma1 0x14>;
+					dma-names = "rx", "tx";
+				};
+			};
+
+			rcar_sound,ssiu {
+				ssiu00: ssiu-0 {
+					dmas = <&audma0 0x15>, <&audma1 0x16>;
+					dma-names = "rx", "tx";
+				};
+				ssiu01: ssiu-1 {
+					dmas = <&audma0 0x35>, <&audma1 0x36>;
+					dma-names = "rx", "tx";
+				};
+				ssiu02: ssiu-2 {
+					dmas = <&audma0 0x37>, <&audma1 0x38>;
+					dma-names = "rx", "tx";
+				};
+				ssiu03: ssiu-3 {
+					dmas = <&audma0 0x47>, <&audma1 0x48>;
+					dma-names = "rx", "tx";
+				};
+				ssiu04: ssiu-4 {
+					dmas = <&audma0 0x3F>, <&audma1 0x40>;
+					dma-names = "rx", "tx";
+				};
+				ssiu05: ssiu-5 {
+					dmas = <&audma0 0x43>, <&audma1 0x44>;
+					dma-names = "rx", "tx";
+				};
+				ssiu06: ssiu-6 {
+					dmas = <&audma0 0x4F>, <&audma1 0x50>;
+					dma-names = "rx", "tx";
+				};
+				ssiu07: ssiu-7 {
+					dmas = <&audma0 0x53>, <&audma1 0x54>;
+					dma-names = "rx", "tx";
+				};
+				ssiu10: ssiu-8 {
+					dmas = <&audma0 0x49>, <&audma1 0x4a>;
+					dma-names = "rx", "tx";
+				};
+				ssiu11: ssiu-9 {
+					dmas = <&audma0 0x4B>, <&audma1 0x4C>;
+					dma-names = "rx", "tx";
+				};
+				ssiu12: ssiu-10 {
+					dmas = <&audma0 0x57>, <&audma1 0x58>;
+					dma-names = "rx", "tx";
+				};
+				ssiu13: ssiu-11 {
+					dmas = <&audma0 0x59>, <&audma1 0x5A>;
+					dma-names = "rx", "tx";
+				};
+				ssiu14: ssiu-12 {
+					dmas = <&audma0 0x5F>, <&audma1 0x60>;
+					dma-names = "rx", "tx";
+				};
+				ssiu15: ssiu-13 {
+					dmas = <&audma0 0xC3>, <&audma1 0xC4>;
+					dma-names = "rx", "tx";
+				};
+				ssiu16: ssiu-14 {
+					dmas = <&audma0 0xC7>, <&audma1 0xC8>;
+					dma-names = "rx", "tx";
+				};
+				ssiu17: ssiu-15 {
+					dmas = <&audma0 0xCB>, <&audma1 0xCC>;
+					dma-names = "rx", "tx";
+				};
+				ssiu20: ssiu-16 {
+					dmas = <&audma0 0x63>, <&audma1 0x64>;
+					dma-names = "rx", "tx";
+				};
+				ssiu21: ssiu-17 {
+					dmas = <&audma0 0x67>, <&audma1 0x68>;
+					dma-names = "rx", "tx";
+				};
+				ssiu22: ssiu-18 {
+					dmas = <&audma0 0x6B>, <&audma1 0x6C>;
+					dma-names = "rx", "tx";
+				};
+				ssiu23: ssiu-19 {
+					dmas = <&audma0 0x6D>, <&audma1 0x6E>;
+					dma-names = "rx", "tx";
+				};
+				ssiu24: ssiu-20 {
+					dmas = <&audma0 0xCF>, <&audma1 0xCE>;
+					dma-names = "rx", "tx";
+				};
+				ssiu25: ssiu-21 {
+					dmas = <&audma0 0xEB>, <&audma1 0xEC>;
+					dma-names = "rx", "tx";
+				};
+				ssiu26: ssiu-22 {
+					dmas = <&audma0 0xED>, <&audma1 0xEE>;
+					dma-names = "rx", "tx";
+				};
+				ssiu27: ssiu-23 {
+					dmas = <&audma0 0xEF>, <&audma1 0xF0>;
+					dma-names = "rx", "tx";
+				};
+				ssiu30: ssiu-24 {
+					dmas = <&audma0 0x6f>, <&audma1 0x70>;
+					dma-names = "rx", "tx";
+				};
+				ssiu31: ssiu-25 {
+					dmas = <&audma0 0x21>, <&audma1 0x22>;
+					dma-names = "rx", "tx";
+				};
+				ssiu32: ssiu-26 {
+					dmas = <&audma0 0x23>, <&audma1 0x24>;
+					dma-names = "rx", "tx";
+				};
+				ssiu33: ssiu-27 {
+					dmas = <&audma0 0x25>, <&audma1 0x26>;
+					dma-names = "rx", "tx";
+				};
+				ssiu34: ssiu-28 {
+					dmas = <&audma0 0x27>, <&audma1 0x28>;
+					dma-names = "rx", "tx";
+				};
+				ssiu35: ssiu-29 {
+					dmas = <&audma0 0x29>, <&audma1 0x2A>;
+					dma-names = "rx", "tx";
+				};
+				ssiu36: ssiu-30 {
+					dmas = <&audma0 0x2B>, <&audma1 0x2C>;
+					dma-names = "rx", "tx";
+				};
+				ssiu37: ssiu-31 {
+					dmas = <&audma0 0x2D>, <&audma1 0x2E>;
+					dma-names = "rx", "tx";
+				};
+				ssiu40: ssiu-32 {
+					dmas =	<&audma0 0x71>, <&audma1 0x72>;
+					dma-names = "rx", "tx";
+				};
+				ssiu41: ssiu-33 {
+					dmas = <&audma0 0x17>, <&audma1 0x18>;
+					dma-names = "rx", "tx";
+				};
+				ssiu42: ssiu-34 {
+					dmas = <&audma0 0x19>, <&audma1 0x1A>;
+					dma-names = "rx", "tx";
+				};
+				ssiu43: ssiu-35 {
+					dmas = <&audma0 0x1B>, <&audma1 0x1C>;
+					dma-names = "rx", "tx";
+				};
+				ssiu44: ssiu-36 {
+					dmas = <&audma0 0x1D>, <&audma1 0x1E>;
+					dma-names = "rx", "tx";
+				};
+				ssiu45: ssiu-37 {
+					dmas = <&audma0 0x1F>, <&audma1 0x20>;
+					dma-names = "rx", "tx";
+				};
+				ssiu46: ssiu-38 {
+					dmas = <&audma0 0x31>, <&audma1 0x32>;
+					dma-names = "rx", "tx";
+				};
+				ssiu47: ssiu-39 {
+					dmas = <&audma0 0x33>, <&audma1 0x34>;
+					dma-names = "rx", "tx";
+				};
+				ssiu50: ssiu-40 {
+					dmas = <&audma0 0x73>, <&audma1 0x74>;
+					dma-names = "rx", "tx";
+				};
+				ssiu60: ssiu-41 {
+					dmas = <&audma0 0x75>, <&audma1 0x76>;
+					dma-names = "rx", "tx";
+				};
+				ssiu70: ssiu-42 {
+					dmas = <&audma0 0x79>, <&audma1 0x7a>;
+					dma-names = "rx", "tx";
+				};
+				ssiu80: ssiu-43 {
+					dmas = <&audma0 0x7b>, <&audma1 0x7c>;
+					dma-names = "rx", "tx";
+				};
+				ssiu90: ssiu-44 {
+					dmas = <&audma0 0x7d>, <&audma1 0x7e>;
+					dma-names = "rx", "tx";
+				};
+				ssiu91: ssiu-45 {
+					dmas = <&audma0 0x7F>, <&audma1 0x80>;
+					dma-names = "rx", "tx";
+				};
+				ssiu92: ssiu-46 {
+					dmas = <&audma0 0x81>, <&audma1 0x82>;
+					dma-names = "rx", "tx";
+				};
+				ssiu93: ssiu-47 {
+					dmas = <&audma0 0x83>, <&audma1 0x84>;
+					dma-names = "rx", "tx";
+				};
+				ssiu94: ssiu-48 {
+					dmas = <&audma0 0xA3>, <&audma1 0xA4>;
+					dma-names = "rx", "tx";
+				};
+				ssiu95: ssiu-49 {
+					dmas = <&audma0 0xA5>, <&audma1 0xA6>;
+					dma-names = "rx", "tx";
+				};
+				ssiu96: ssiu-50 {
+					dmas = <&audma0 0xA7>, <&audma1 0xA8>;
+					dma-names = "rx", "tx";
+				};
+				ssiu97: ssiu-51 {
+					dmas = <&audma0 0xA9>, <&audma1 0xAA>;
+					dma-names = "rx", "tx";
+				};
+			};
+		};
+
+		audma0: dma-controller@ec700000 {
+			compatible = "renesas,dmac-r8a774b1",
+				     "renesas,rcar-dmac";
+			reg = <0 0xec700000 0 0x10000>;
+			interrupts = <GIC_SPI 350 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 320 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 322 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 323 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 324 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 325 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 326 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 327 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 328 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 329 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 330 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 331 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 332 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 333 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "error",
+					"ch0", "ch1", "ch2", "ch3",
+					"ch4", "ch5", "ch6", "ch7",
+					"ch8", "ch9", "ch10", "ch11",
+					"ch12", "ch13", "ch14", "ch15";
+			clocks = <&cpg CPG_MOD 502>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 502>;
+			#dma-cells = <1>;
+			dma-channels = <16>;
+		};
+
+		audma1: dma-controller@ec720000 {
+			compatible = "renesas,dmac-r8a774b1",
+				     "renesas,rcar-dmac";
+			reg = <0 0xec720000 0 0x10000>;
+			interrupts = <GIC_SPI 351 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 340 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 342 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 343 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 344 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 345 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 346 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 348 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 349 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 382 IRQ_TYPE_LEVEL_HIGH
+				      GIC_SPI 383 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "error",
+					"ch0", "ch1", "ch2", "ch3",
+					"ch4", "ch5", "ch6", "ch7",
+					"ch8", "ch9", "ch10", "ch11",
+					"ch12", "ch13", "ch14", "ch15";
+			clocks = <&cpg CPG_MOD 501>;
+			clock-names = "fck";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 501>;
+			#dma-cells = <1>;
+			dma-channels = <16>;
+		};
+
+		xhci0: usb@ee000000 {
+			compatible = "renesas,xhci-r8a774b1",
+				     "renesas,rcar-gen3-xhci";
+			reg = <0 0xee000000 0 0xc00>;
+			interrupts = <GIC_SPI 102 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 328>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 328>;
+			status = "disabled";
+		};
+
+		usb3_peri0: usb@ee020000 {
+			compatible = "renesas,r8a774b1-usb3-peri",
+				     "renesas,rcar-gen3-usb3-peri";
+			reg = <0 0xee020000 0 0x400>;
+			interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 328>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 328>;
+			status = "disabled";
+		};
+
+		ohci0: usb@ee080000 {
+			compatible = "generic-ohci";
+			reg = <0 0xee080000 0 0x100>;
+			interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 703>, <&cpg CPG_MOD 704>;
+			phys = <&usb2_phy0 1>;
+			phy-names = "usb";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 703>, <&cpg 704>;
+			status = "disabled";
+		};
+
+		ohci1: usb@ee0a0000 {
+			compatible = "generic-ohci";
+			reg = <0 0xee0a0000 0 0x100>;
+			interrupts = <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 702>;
+			phys = <&usb2_phy1 1>;
+			phy-names = "usb";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 702>;
+			status = "disabled";
+		};
+
+		ehci0: usb@ee080100 {
+			compatible = "generic-ehci";
+			reg = <0 0xee080100 0 0x100>;
+			interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 703>, <&cpg CPG_MOD 704>;
+			phys = <&usb2_phy0 2>;
+			phy-names = "usb";
+			companion = <&ohci0>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 703>, <&cpg 704>;
+			status = "disabled";
+		};
+
+		ehci1: usb@ee0a0100 {
+			compatible = "generic-ehci";
+			reg = <0 0xee0a0100 0 0x100>;
+			interrupts = <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 702>;
+			phys = <&usb2_phy1 2>;
+			phy-names = "usb";
+			companion = <&ohci1>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 702>;
+			status = "disabled";
+		};
+
+		usb2_phy0: usb-phy@ee080200 {
+			compatible = "renesas,usb2-phy-r8a774b1",
+				     "renesas,rcar-gen3-usb2-phy";
+			reg = <0 0xee080200 0 0x700>;
+			interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 703>, <&cpg CPG_MOD 704>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 703>, <&cpg 704>;
+			#phy-cells = <1>;
+			status = "disabled";
+		};
+
+		usb2_phy1: usb-phy@ee0a0200 {
+			compatible = "renesas,usb2-phy-r8a774b1",
+				     "renesas,rcar-gen3-usb2-phy";
+			reg = <0 0xee0a0200 0 0x700>;
+			clocks = <&cpg CPG_MOD 702>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 702>;
+			#phy-cells = <1>;
+			status = "disabled";
+		};
+
+		sdhi0: sd@ee100000 {
+			compatible = "renesas,sdhi-r8a774b1",
+				     "renesas,rcar-gen3-sdhi";
+			reg = <0 0xee100000 0 0x2000>;
+			interrupts = <GIC_SPI 165 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 314>;
+			max-frequency = <200000000>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 314>;
+			status = "disabled";
+		};
+
+		sdhi1: sd@ee120000 {
+			compatible = "renesas,sdhi-r8a774b1",
+				     "renesas,rcar-gen3-sdhi";
+			reg = <0 0xee120000 0 0x2000>;
+			interrupts = <GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 313>;
+			max-frequency = <200000000>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 313>;
+			status = "disabled";
+		};
+
+		sdhi2: sd@ee140000 {
+			compatible = "renesas,sdhi-r8a774b1",
+				     "renesas,rcar-gen3-sdhi";
+			reg = <0 0xee140000 0 0x2000>;
+			interrupts = <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 312>;
+			max-frequency = <200000000>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 312>;
+			status = "disabled";
+		};
+
+		sdhi3: sd@ee160000 {
+			compatible = "renesas,sdhi-r8a774b1",
+				     "renesas,rcar-gen3-sdhi";
+			reg = <0 0xee160000 0 0x2000>;
+			interrupts = <GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 311>;
+			max-frequency = <200000000>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 311>;
+			status = "disabled";
+		};
+
+		sata: sata@ee300000 {
+			compatible = "renesas,sata-r8a774b1",
+				     "renesas,rcar-gen3-sata";
+			reg = <0 0xee300000 0 0x200000>;
+			interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 815>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 815>;
+			status = "disabled";
+		};
+
+		gic: interrupt-controller@f1010000 {
+			compatible = "arm,gic-400";
+			#interrupt-cells = <3>;
+			#address-cells = <0>;
+			interrupt-controller;
+			reg = <0x0 0xf1010000 0 0x1000>,
+			      <0x0 0xf1020000 0 0x20000>,
+			      <0x0 0xf1040000 0 0x20000>,
+			      <0x0 0xf1060000 0 0x20000>;
+			interrupts = <GIC_PPI 9
+					(GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>;
+			clocks = <&cpg CPG_MOD 408>;
+			clock-names = "clk";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 408>;
+		};
+
+		pciec0: pcie@fe000000 {
+			compatible = "renesas,pcie-r8a774b1",
+				     "renesas,pcie-rcar-gen3";
+			reg = <0 0xfe000000 0 0x80000>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			bus-range = <0x00 0xff>;
+			device_type = "pci";
+			ranges = <0x01000000 0 0x00000000 0 0xfe100000 0 0x00100000
+				  0x02000000 0 0xfe200000 0 0xfe200000 0 0x00200000
+				  0x02000000 0 0x30000000 0 0x30000000 0 0x08000000
+				  0x42000000 0 0x38000000 0 0x38000000 0 0x08000000>;
+			/* Map all possible DDR as inbound ranges */
+			dma-ranges = <0x42000000 0 0x40000000 0 0x40000000 0 0x80000000>;
+			interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0 0 0 0>;
+			interrupt-map = <0 0 0 0 &gic GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 319>, <&pcie_bus_clk>;
+			clock-names = "pcie", "pcie_bus";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 319>;
+			status = "disabled";
+		};
+
+		pciec1: pcie@ee800000 {
+			compatible = "renesas,pcie-r8a774b1",
+				     "renesas,pcie-rcar-gen3";
+			reg = <0 0xee800000 0 0x80000>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			bus-range = <0x00 0xff>;
+			device_type = "pci";
+			ranges = <0x01000000 0 0x00000000 0 0xee900000 0 0x00100000
+				  0x02000000 0 0xeea00000 0 0xeea00000 0 0x00200000
+				  0x02000000 0 0xc0000000 0 0xc0000000 0 0x08000000
+				  0x42000000 0 0xc8000000 0 0xc8000000 0 0x08000000>;
+			/* Map all possible DDR as inbound ranges */
+			dma-ranges = <0x42000000 0 0x40000000 0 0x40000000 0 0x80000000>;
+			interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 149 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 150 IRQ_TYPE_LEVEL_HIGH>;
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0 0 0 0>;
+			interrupt-map = <0 0 0 0 &gic GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 318>, <&pcie_bus_clk>;
+			clock-names = "pcie", "pcie_bus";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 318>;
+			status = "disabled";
+		};
+
+		fdp1@fe940000 {
+			compatible = "renesas,fdp1";
+			reg = <0 0xfe940000 0 0x2400>;
+			interrupts = <GIC_SPI 262 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 119>;
+			power-domains = <&sysc R8A774B1_PD_A3VP>;
+			resets = <&cpg 119>;
+			renesas,fcp = <&fcpf0>;
+		};
+
+		fcpf0: fcp@fe950000 {
+			compatible = "renesas,fcpf";
+			reg = <0 0xfe950000 0 0x200>;
+			clocks = <&cpg CPG_MOD 615>;
+			power-domains = <&sysc R8A774B1_PD_A3VP>;
+			resets = <&cpg 615>;
+		};
+
+		vspb: vsp@fe960000 {
+			compatible = "renesas,vsp2";
+			reg = <0 0xfe960000 0 0x8000>;
+			interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 626>;
+			power-domains = <&sysc R8A774B1_PD_A3VP>;
+			resets = <&cpg 626>;
+
+			renesas,fcp = <&fcpvb0>;
+		};
+
+		vspi0: vsp@fe9a0000 {
+			compatible = "renesas,vsp2";
+			reg = <0 0xfe9a0000 0 0x8000>;
+			interrupts = <GIC_SPI 444 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 631>;
+			power-domains = <&sysc R8A774B1_PD_A3VP>;
+			resets = <&cpg 631>;
+
+			renesas,fcp = <&fcpvi0>;
+		};
+
+		vspd0: vsp@fea20000 {
+			compatible = "renesas,vsp2";
+			reg = <0 0xfea20000 0 0x5000>;
+			interrupts = <GIC_SPI 466 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 623>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 623>;
+
+			renesas,fcp = <&fcpvd0>;
+		};
+
+		vspd1: vsp@fea28000 {
+			compatible = "renesas,vsp2";
+			reg = <0 0xfea28000 0 0x5000>;
+			interrupts = <GIC_SPI 467 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 622>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 622>;
+
+			renesas,fcp = <&fcpvd1>;
+		};
+
+		fcpvb0: fcp@fe96f000 {
+			compatible = "renesas,fcpv";
+			reg = <0 0xfe96f000 0 0x200>;
+			clocks = <&cpg CPG_MOD 607>;
+			power-domains = <&sysc R8A774B1_PD_A3VP>;
+			resets = <&cpg 607>;
+		};
+
+		fcpvd0: fcp@fea27000 {
+			compatible = "renesas,fcpv";
+			reg = <0 0xfea27000 0 0x200>;
+			clocks = <&cpg CPG_MOD 603>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 603>;
+		};
+
+		fcpvd1: fcp@fea2f000 {
+			compatible = "renesas,fcpv";
+			reg = <0 0xfea2f000 0 0x200>;
+			clocks = <&cpg CPG_MOD 602>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 602>;
+		};
+
+		fcpvi0: fcp@fe9af000 {
+			compatible = "renesas,fcpv";
+			reg = <0 0xfe9af000 0 0x200>;
+			clocks = <&cpg CPG_MOD 611>;
+			power-domains = <&sysc R8A774B1_PD_A3VP>;
+			resets = <&cpg 611>;
+		};
+
+		csi20: csi2@fea80000 {
+			compatible = "renesas,r8a774b1-csi2";
+			reg = <0 0xfea80000 0 0x10000>;
+			interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 714>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 714>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					reg = <1>;
+
+					csi20vin0: endpoint@0 {
+						reg = <0>;
+						remote-endpoint = <&vin0csi20>;
+					};
+					csi20vin1: endpoint@1 {
+						reg = <1>;
+						remote-endpoint = <&vin1csi20>;
+					};
+					csi20vin2: endpoint@2 {
+						reg = <2>;
+						remote-endpoint = <&vin2csi20>;
+					};
+					csi20vin3: endpoint@3 {
+						reg = <3>;
+						remote-endpoint = <&vin3csi20>;
+					};
+					csi20vin4: endpoint@4 {
+						reg = <4>;
+						remote-endpoint = <&vin4csi20>;
+					};
+					csi20vin5: endpoint@5 {
+						reg = <5>;
+						remote-endpoint = <&vin5csi20>;
+					};
+					csi20vin6: endpoint@6 {
+						reg = <6>;
+						remote-endpoint = <&vin6csi20>;
+					};
+					csi20vin7: endpoint@7 {
+						reg = <7>;
+						remote-endpoint = <&vin7csi20>;
+					};
+				};
+			};
+		};
+
+		csi40: csi2@feaa0000 {
+			compatible = "renesas,r8a774b1-csi2";
+			reg = <0 0xfeaa0000 0 0x10000>;
+			interrupts = <GIC_SPI 246 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 716>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 716>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					reg = <1>;
+
+					csi40vin0: endpoint@0 {
+						reg = <0>;
+						remote-endpoint = <&vin0csi40>;
+					};
+					csi40vin1: endpoint@1 {
+						reg = <1>;
+						remote-endpoint = <&vin1csi40>;
+					};
+					csi40vin2: endpoint@2 {
+						reg = <2>;
+						remote-endpoint = <&vin2csi40>;
+					};
+					csi40vin3: endpoint@3 {
+						reg = <3>;
+						remote-endpoint = <&vin3csi40>;
+					};
+					csi40vin4: endpoint@4 {
+						reg = <4>;
+						remote-endpoint = <&vin4csi40>;
+					};
+					csi40vin5: endpoint@5 {
+						reg = <5>;
+						remote-endpoint = <&vin5csi40>;
+					};
+					csi40vin6: endpoint@6 {
+						reg = <6>;
+						remote-endpoint = <&vin6csi40>;
+					};
+					csi40vin7: endpoint@7 {
+						reg = <7>;
+						remote-endpoint = <&vin7csi40>;
+					};
+				};
+			};
+		};
+
+		hdmi0: hdmi@fead0000 {
+			compatible = "renesas,r8a774b1-hdmi",
+				     "renesas,rcar-gen3-hdmi";
+			reg = <0 0xfead0000 0 0x10000>;
+			interrupts = <GIC_SPI 389 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 729>,
+				 <&cpg CPG_CORE R8A774B1_CLK_HDMI>;
+			clock-names = "iahb", "isfr";
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 729>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@0 {
+					reg = <0>;
+					dw_hdmi0_in: endpoint {
+						remote-endpoint = <&du_out_hdmi0>;
+					};
+				};
+				port@1 {
+					reg = <1>;
+				};
+				port@2 {
+					/* HDMI sound */
+					reg = <2>;
+				};
+			};
+		};
+
+		du: display@feb00000 {
+			compatible = "renesas,du-r8a774b1";
+			reg = <0 0xfeb00000 0 0x80000>;
+			interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 270 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 724>,
+				 <&cpg CPG_MOD 723>,
+				 <&cpg CPG_MOD 721>;
+			clock-names = "du.0", "du.1", "du.3";
+			status = "disabled";
+
+			vsps = <&vspd0 0>, <&vspd1 0>, <&vspd0 1>;
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@0 {
+					reg = <0>;
+					du_out_rgb: endpoint {
+					};
+				};
+				port@1 {
+					reg = <1>;
+					du_out_hdmi0: endpoint {
+						remote-endpoint = <&dw_hdmi0_in>;
+					};
+				};
+				port@2 {
+					reg = <2>;
+					du_out_lvds0: endpoint {
+						remote-endpoint = <&lvds0_in>;
+					};
+				};
+			};
+		};
+
+		lvds0: lvds@feb90000 {
+			compatible = "renesas,r8a774b1-lvds";
+			reg = <0 0xfeb90000 0 0x14>;
+			clocks = <&cpg CPG_MOD 727>;
+			power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
+			resets = <&cpg 727>;
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@0 {
+					reg = <0>;
+					lvds0_in: endpoint {
+						remote-endpoint = <&du_out_lvds0>;
+					};
+				};
+				port@1 {
+					reg = <1>;
+					lvds0_out: endpoint {
+					};
+				};
+			};
+		};
+
+		prr: chipid@fff00044 {
+			compatible = "renesas,prr";
+			reg = <0 0xfff00044 0 4>;
+		};
+	};
+
+	thermal-zones {
+		sensor_thermal1: sensor-thermal1 {
+			polling-delay-passive = <250>;
+			polling-delay = <1000>;
+			thermal-sensors = <&tsc 0>;
+			sustainable-power = <2439>;
+
+			trips {
+				sensor1_crit: sensor1-crit {
+					temperature = <120000>;
+					hysteresis = <1000>;
+					type = "critical";
+				};
+			};
+		};
+
+		sensor_thermal2: sensor-thermal2 {
+			polling-delay-passive = <250>;
+			polling-delay = <1000>;
+			thermal-sensors = <&tsc 1>;
+			sustainable-power = <2439>;
+
+			trips {
+				sensor2_crit: sensor2-crit {
+					temperature = <120000>;
+					hysteresis = <1000>;
+					type = "critical";
+				};
+			};
+		};
+
+		sensor_thermal3: sensor-thermal3 {
+			polling-delay-passive = <250>;
+			polling-delay = <1000>;
+			thermal-sensors = <&tsc 2>;
+			sustainable-power = <2439>;
+
+			cooling-maps {
+				map0 {
+					trip = <&target>;
+					cooling-device = <&a57_0 0 2>;
+					contribution = <1024>;
+				};
+			};
+			trips {
+				target: trip-point1 {
+					temperature = <100000>;
+					hysteresis = <1000>;
+					type = "passive";
+				};
+
+				sensor3_crit: sensor3-crit {
+					temperature = <120000>;
+					hysteresis = <1000>;
+					type = "critical";
+				};
+			};
+		};
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts-extended = <&gic GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+				      <&gic GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+				      <&gic GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+				      <&gic GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+
+	/* External USB clocks - can be overridden by the board */
+	usb3s0_clk: usb3s0 {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	usb_extal_clk: usb_extal {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
index a1c2de90e470..c7bdc3606323 100644
--- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
@@ -73,9 +73,11 @@
 			compatible = "arm,cortex-a53";
 			reg = <0>;
 			device_type = "cpu";
+			#cooling-cells = <2>;
 			power-domains = <&sysc R8A774C0_PD_CA53_CPU0>;
 			next-level-cache = <&L2_CA53>;
 			enable-method = "psci";
+			dynamic-power-coefficient = <277>;
 			clocks = <&cpg CPG_CORE R8A774C0_CLK_Z2>;
 			operating-points-v2 = <&cluster1_opp>;
 		};
@@ -1905,18 +1907,30 @@
 	thermal-zones {
 		cpu-thermal {
 			polling-delay-passive = <250>;
-			polling-delay = <1000>;
-			thermal-sensors = <&thermal>;
+			polling-delay = <0>;
+			thermal-sensors = <&thermal 0>;
+			sustainable-power = <717>;
 
 			cooling-maps {
+				map0 {
+					trip = <&target>;
+					cooling-device = <&a53_0 0 2>;
+					contribution = <1024>;
+				};
 			};
 
 			trips {
-				cpu-crit {
+				sensor1_crit: sensor1-crit {
 					temperature = <120000>;
 					hysteresis = <2000>;
 					type = "critical";
 				};
+
+				target: trip-point1 {
+					temperature = <100000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
 			};
 		};
 	};
diff --git a/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi b/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi
index e4650ae5b75a..14d8513d2a47 100644
--- a/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi
@@ -30,7 +30,7 @@
 };
 
 &du {
-	vsps = <&vspd0 &vspd1 &vspd2 &vspd3>;
+	vsps = <&vspd0 0>, <&vspd1 0>, <&vspd2 0>, <&vspd3 0>;
 };
 
 &fcpvb1 {
diff --git a/arch/arm64/boot/dts/renesas/r8a7795.dtsi b/arch/arm64/boot/dts/renesas/r8a7795.dtsi
index 95deff66eeb6..fde6ec122d3b 100644
--- a/arch/arm64/boot/dts/renesas/r8a7795.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a7795.dtsi
@@ -155,6 +155,7 @@
 			power-domains = <&sysc R8A7795_PD_CA57_CPU0>;
 			next-level-cache = <&L2_CA57>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0>;
 			dynamic-power-coefficient = <854>;
 			clocks = <&cpg CPG_CORE R8A7795_CLK_Z>;
 			operating-points-v2 = <&cluster0_opp>;
@@ -169,6 +170,7 @@
 			power-domains = <&sysc R8A7795_PD_CA57_CPU1>;
 			next-level-cache = <&L2_CA57>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0>;
 			clocks = <&cpg CPG_CORE R8A7795_CLK_Z>;
 			operating-points-v2 = <&cluster0_opp>;
 			capacity-dmips-mhz = <1024>;
@@ -182,6 +184,7 @@
 			power-domains = <&sysc R8A7795_PD_CA57_CPU2>;
 			next-level-cache = <&L2_CA57>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0>;
 			clocks = <&cpg CPG_CORE R8A7795_CLK_Z>;
 			operating-points-v2 = <&cluster0_opp>;
 			capacity-dmips-mhz = <1024>;
@@ -195,6 +198,7 @@
 			power-domains = <&sysc R8A7795_PD_CA57_CPU3>;
 			next-level-cache = <&L2_CA57>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0>;
 			clocks = <&cpg CPG_CORE R8A7795_CLK_Z>;
 			operating-points-v2 = <&cluster0_opp>;
 			capacity-dmips-mhz = <1024>;
@@ -208,6 +212,7 @@
 			power-domains = <&sysc R8A7795_PD_CA53_CPU0>;
 			next-level-cache = <&L2_CA53>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
 			#cooling-cells = <2>;
 			dynamic-power-coefficient = <277>;
 			clocks = <&cpg CPG_CORE R8A7795_CLK_Z2>;
@@ -222,6 +227,7 @@
 			power-domains = <&sysc R8A7795_PD_CA53_CPU1>;
 			next-level-cache = <&L2_CA53>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
 			clocks = <&cpg CPG_CORE R8A7795_CLK_Z2>;
 			operating-points-v2 = <&cluster1_opp>;
 			capacity-dmips-mhz = <535>;
@@ -234,6 +240,7 @@
 			power-domains = <&sysc R8A7795_PD_CA53_CPU2>;
 			next-level-cache = <&L2_CA53>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
 			clocks = <&cpg CPG_CORE R8A7795_CLK_Z2>;
 			operating-points-v2 = <&cluster1_opp>;
 			capacity-dmips-mhz = <535>;
@@ -246,6 +253,7 @@
 			power-domains = <&sysc R8A7795_PD_CA53_CPU3>;
 			next-level-cache = <&L2_CA53>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
 			clocks = <&cpg CPG_CORE R8A7795_CLK_Z2>;
 			operating-points-v2 = <&cluster1_opp>;
 			capacity-dmips-mhz = <535>;
@@ -264,6 +272,28 @@
 			cache-unified;
 			cache-level = <2>;
 		};
+
+		idle-states {
+			entry-method = "psci";
+
+			CPU_SLEEP_0: cpu-sleep-0 {
+				compatible = "arm,idle-state";
+				arm,psci-suspend-param = <0x0010000>;
+				local-timer-stop;
+				entry-latency-us = <400>;
+				exit-latency-us = <500>;
+				min-residency-us = <4000>;
+			};
+
+			CPU_SLEEP_1: cpu-sleep-1 {
+				compatible = "arm,idle-state";
+				arm,psci-suspend-param = <0x0010000>;
+				local-timer-stop;
+				entry-latency-us = <700>;
+				exit-latency-us = <700>;
+				min-residency-us = <5000>;
+			};
+		};
 	};
 
 	extal_clk: extal {
@@ -2569,6 +2599,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
 			resets = <&cpg 314>;
+			iommus = <&ipmmu_ds1 32>;
 			status = "disabled";
 		};
 
@@ -2581,6 +2612,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
 			resets = <&cpg 313>;
+			iommus = <&ipmmu_ds1 33>;
 			status = "disabled";
 		};
 
@@ -2593,6 +2625,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
 			resets = <&cpg 312>;
+			iommus = <&ipmmu_ds1 34>;
 			status = "disabled";
 		};
 
@@ -2605,6 +2638,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
 			resets = <&cpg 311>;
+			iommus = <&ipmmu_ds1 35>;
 			status = "disabled";
 		};
 
@@ -2909,6 +2943,42 @@
 			iommus = <&ipmmu_vi1 10>;
 		};
 
+		cmm0: cmm@fea40000 {
+			compatible = "renesas,r8a7795-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea40000 0 0x1000>;
+			power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 711>;
+			resets = <&cpg 711>;
+		};
+
+		cmm1: cmm@fea50000 {
+			compatible = "renesas,r8a7795-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea50000 0 0x1000>;
+			power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 710>;
+			resets = <&cpg 710>;
+		};
+
+		cmm2: cmm@fea60000 {
+			compatible = "renesas,r8a7795-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea60000 0 0x1000>;
+			power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 709>;
+			resets = <&cpg 709>;
+		};
+
+		cmm3: cmm@fea70000 {
+			compatible = "renesas,r8a7795-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea70000 0 0x1000>;
+			power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 708>;
+			resets = <&cpg 708>;
+		};
+
 		csi20: csi2@fea80000 {
 			compatible = "renesas,r8a7795-csi2";
 			reg = <0 0xfea80000 0 0x10000>;
@@ -3112,7 +3182,10 @@
 				 <&cpg CPG_MOD 722>,
 				 <&cpg CPG_MOD 721>;
 			clock-names = "du.0", "du.1", "du.2", "du.3";
+
+			renesas,cmms = <&cmm0>, <&cmm1>, <&cmm2>, <&cmm3>;
 			vsps = <&vspd0 0>, <&vspd1 0>, <&vspd2 0>, <&vspd0 1>;
+
 			status = "disabled";
 
 			ports {
diff --git a/arch/arm64/boot/dts/renesas/r8a7796.dtsi b/arch/arm64/boot/dts/renesas/r8a7796.dtsi
index 3dc9d73f589a..b9db882b0351 100644
--- a/arch/arm64/boot/dts/renesas/r8a7796.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a7796.dtsi
@@ -160,6 +160,7 @@
 			power-domains = <&sysc R8A7796_PD_CA57_CPU0>;
 			next-level-cache = <&L2_CA57>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0>;
 			dynamic-power-coefficient = <854>;
 			clocks = <&cpg CPG_CORE R8A7796_CLK_Z>;
 			operating-points-v2 = <&cluster0_opp>;
@@ -174,6 +175,7 @@
 			power-domains = <&sysc R8A7796_PD_CA57_CPU1>;
 			next-level-cache = <&L2_CA57>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0>;
 			clocks = <&cpg CPG_CORE R8A7796_CLK_Z>;
 			operating-points-v2 = <&cluster0_opp>;
 			capacity-dmips-mhz = <1024>;
@@ -187,6 +189,7 @@
 			power-domains = <&sysc R8A7796_PD_CA53_CPU0>;
 			next-level-cache = <&L2_CA53>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
 			#cooling-cells = <2>;
 			dynamic-power-coefficient = <277>;
 			clocks = <&cpg CPG_CORE R8A7796_CLK_Z2>;
@@ -201,6 +204,7 @@
 			power-domains = <&sysc R8A7796_PD_CA53_CPU1>;
 			next-level-cache = <&L2_CA53>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
 			clocks = <&cpg CPG_CORE R8A7796_CLK_Z2>;
 			operating-points-v2 = <&cluster1_opp>;
 			capacity-dmips-mhz = <535>;
@@ -213,6 +217,7 @@
 			power-domains = <&sysc R8A7796_PD_CA53_CPU2>;
 			next-level-cache = <&L2_CA53>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
 			clocks = <&cpg CPG_CORE R8A7796_CLK_Z2>;
 			operating-points-v2 = <&cluster1_opp>;
 			capacity-dmips-mhz = <535>;
@@ -225,6 +230,7 @@
 			power-domains = <&sysc R8A7796_PD_CA53_CPU3>;
 			next-level-cache = <&L2_CA53>;
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
 			clocks = <&cpg CPG_CORE R8A7796_CLK_Z2>;
 			operating-points-v2 = <&cluster1_opp>;
 			capacity-dmips-mhz = <535>;
@@ -243,6 +249,28 @@
 			cache-unified;
 			cache-level = <2>;
 		};
+
+		idle-states {
+			entry-method = "psci";
+
+			CPU_SLEEP_0: cpu-sleep-0 {
+				compatible = "arm,idle-state";
+				arm,psci-suspend-param = <0x0010000>;
+				local-timer-stop;
+				entry-latency-us = <400>;
+				exit-latency-us = <500>;
+				min-residency-us = <4000>;
+			};
+
+			CPU_SLEEP_1: cpu-sleep-1 {
+				compatible = "arm,idle-state";
+				arm,psci-suspend-param = <0x0010000>;
+				local-timer-stop;
+				entry-latency-us = <700>;
+				exit-latency-us = <700>;
+				min-residency-us = <5000>;
+			};
+		};
 	};
 
 	extal_clk: extal {
@@ -2366,6 +2394,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
 			resets = <&cpg 314>;
+			iommus = <&ipmmu_ds1 32>;
 			status = "disabled";
 		};
 
@@ -2378,6 +2407,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
 			resets = <&cpg 313>;
+			iommus = <&ipmmu_ds1 33>;
 			status = "disabled";
 		};
 
@@ -2390,6 +2420,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
 			resets = <&cpg 312>;
+			iommus = <&ipmmu_ds1 34>;
 			status = "disabled";
 		};
 
@@ -2402,6 +2433,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
 			resets = <&cpg 311>;
+			iommus = <&ipmmu_ds1 35>;
 			status = "disabled";
 		};
 
@@ -2613,6 +2645,33 @@
 			renesas,fcp = <&fcpvi0>;
 		};
 
+		cmm0: cmm@fea40000 {
+			compatible = "renesas,r8a7796-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea40000 0 0x1000>;
+			power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 711>;
+			resets = <&cpg 711>;
+		};
+
+		cmm1: cmm@fea50000 {
+			compatible = "renesas,r8a7796-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea50000 0 0x1000>;
+			power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 710>;
+			resets = <&cpg 710>;
+		};
+
+		cmm2: cmm@fea60000 {
+			compatible = "renesas,r8a7796-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea60000 0 0x1000>;
+			power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 709>;
+			resets = <&cpg 709>;
+		};
+
 		csi20: csi2@fea80000 {
 			compatible = "renesas,r8a7796-csi2";
 			reg = <0 0xfea80000 0 0x10000>;
@@ -2763,9 +2822,11 @@
 				 <&cpg CPG_MOD 723>,
 				 <&cpg CPG_MOD 722>;
 			clock-names = "du.0", "du.1", "du.2";
-			status = "disabled";
 
-			vsps = <&vspd0 &vspd1 &vspd2>;
+			renesas,cmms = <&cmm0>, <&cmm1>, <&cmm2>;
+			vsps = <&vspd0 0>, <&vspd1 0>, <&vspd2 0>;
+
+			status = "disabled";
 
 			ports {
 				#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77961-salvator-xs.dts b/arch/arm64/boot/dts/renesas/r8a77961-salvator-xs.dts
new file mode 100644
index 000000000000..4abd78ac1cd5
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/r8a77961-salvator-xs.dts
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device Tree Source for the Salvator-X 2nd version board with R-Car M3-W+
+ *
+ * Copyright (C) 2018 Renesas Electronics Corp.
+ */
+
+/dts-v1/;
+#include "r8a77961.dtsi"
+#include "salvator-xs.dtsi"
+
+/ {
+	model = "Renesas Salvator-X 2nd version board based on r8a77961";
+	compatible = "renesas,salvator-xs", "renesas,r8a77961";
+
+	memory@48000000 {
+		device_type = "memory";
+		/* first 128MB is reserved for secure area. */
+		reg = <0x0 0x48000000 0x0 0x78000000>;
+	};
+
+	memory@400000000 {
+		device_type = "memory";
+		reg = <0x4 0x80000000 0x0 0x80000000>;
+	};
+
+	memory@600000000 {
+		device_type = "memory";
+		reg = <0x6 0x00000000 0x1 0x00000000>;
+	};
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
new file mode 100644
index 000000000000..64466c86b698
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
@@ -0,0 +1,723 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device Tree Source for the R-Car M3-W+ (R8A77961) SoC
+ *
+ * Copyright (C) 2016-2017 Renesas Electronics Corp.
+ */
+
+#include <dt-bindings/clock/r8a77961-cpg-mssr.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/power/r8a77961-sysc.h>
+
+#define CPG_AUDIO_CLK_I		R8A77961_CLK_S0D4
+
+/ {
+	compatible = "renesas,r8a77961";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	/*
+	 * The external audio clocks are configured as 0 Hz fixed frequency
+	 * clocks by default.
+	 * Boards that provide audio clocks should override them.
+	 */
+	audio_clk_a: audio_clk_a {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	audio_clk_b: audio_clk_b {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	audio_clk_c: audio_clk_c {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	/* External CAN clock - to be overridden by boards that provide it */
+	can_clk: can {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	cluster0_opp: opp_table0 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp-500000000 {
+			opp-hz = /bits/ 64 <500000000>;
+			opp-microvolt = <820000>;
+			clock-latency-ns = <300000>;
+		};
+		opp-1000000000 {
+			opp-hz = /bits/ 64 <1000000000>;
+			opp-microvolt = <820000>;
+			clock-latency-ns = <300000>;
+		};
+		opp-1500000000 {
+			opp-hz = /bits/ 64 <1500000000>;
+			opp-microvolt = <820000>;
+			clock-latency-ns = <300000>;
+		};
+		opp-1600000000 {
+			opp-hz = /bits/ 64 <1600000000>;
+			opp-microvolt = <900000>;
+			clock-latency-ns = <300000>;
+			turbo-mode;
+		};
+		opp-1700000000 {
+			opp-hz = /bits/ 64 <1700000000>;
+			opp-microvolt = <900000>;
+			clock-latency-ns = <300000>;
+			turbo-mode;
+		};
+		opp-1800000000 {
+			opp-hz = /bits/ 64 <1800000000>;
+			opp-microvolt = <960000>;
+			clock-latency-ns = <300000>;
+			turbo-mode;
+		};
+	};
+
+	cluster1_opp: opp_table1 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp-800000000 {
+			opp-hz = /bits/ 64 <800000000>;
+			opp-microvolt = <820000>;
+			clock-latency-ns = <300000>;
+		};
+		opp-1000000000 {
+			opp-hz = /bits/ 64 <1000000000>;
+			opp-microvolt = <820000>;
+			clock-latency-ns = <300000>;
+		};
+		opp-1200000000 {
+			opp-hz = /bits/ 64 <1200000000>;
+			opp-microvolt = <820000>;
+			clock-latency-ns = <300000>;
+		};
+		opp-1300000000 {
+			opp-hz = /bits/ 64 <1300000000>;
+			opp-microvolt = <820000>;
+			clock-latency-ns = <300000>;
+			turbo-mode;
+		};
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu-map {
+			cluster0 {
+				core0 {
+					cpu = <&a57_0>;
+				};
+				core1 {
+					cpu = <&a57_1>;
+				};
+			};
+
+			cluster1 {
+				core0 {
+					cpu = <&a53_0>;
+				};
+				core1 {
+					cpu = <&a53_1>;
+				};
+				core2 {
+					cpu = <&a53_2>;
+				};
+				core3 {
+					cpu = <&a53_3>;
+				};
+			};
+		};
+
+		a57_0: cpu@0 {
+			compatible = "arm,cortex-a57";
+			reg = <0x0>;
+			device_type = "cpu";
+			power-domains = <&sysc R8A77961_PD_CA57_CPU0>;
+			next-level-cache = <&L2_CA57>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0>;
+			dynamic-power-coefficient = <854>;
+			clocks = <&cpg CPG_CORE R8A77961_CLK_Z>;
+			operating-points-v2 = <&cluster0_opp>;
+			capacity-dmips-mhz = <1024>;
+			#cooling-cells = <2>;
+		};
+
+		a57_1: cpu@1 {
+			compatible = "arm,cortex-a57";
+			reg = <0x1>;
+			device_type = "cpu";
+			power-domains = <&sysc R8A77961_PD_CA57_CPU1>;
+			next-level-cache = <&L2_CA57>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0>;
+			clocks = <&cpg CPG_CORE R8A77961_CLK_Z>;
+			operating-points-v2 = <&cluster0_opp>;
+			capacity-dmips-mhz = <1024>;
+			#cooling-cells = <2>;
+		};
+
+		a53_0: cpu@100 {
+			compatible = "arm,cortex-a53";
+			reg = <0x100>;
+			device_type = "cpu";
+			power-domains = <&sysc R8A77961_PD_CA53_CPU0>;
+			next-level-cache = <&L2_CA53>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
+			#cooling-cells = <2>;
+			dynamic-power-coefficient = <277>;
+			clocks = <&cpg CPG_CORE R8A77961_CLK_Z2>;
+			operating-points-v2 = <&cluster1_opp>;
+			capacity-dmips-mhz = <535>;
+		};
+
+		a53_1: cpu@101 {
+			compatible = "arm,cortex-a53";
+			reg = <0x101>;
+			device_type = "cpu";
+			power-domains = <&sysc R8A77961_PD_CA53_CPU1>;
+			next-level-cache = <&L2_CA53>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
+			clocks = <&cpg CPG_CORE R8A77961_CLK_Z2>;
+			operating-points-v2 = <&cluster1_opp>;
+			capacity-dmips-mhz = <535>;
+		};
+
+		a53_2: cpu@102 {
+			compatible = "arm,cortex-a53";
+			reg = <0x102>;
+			device_type = "cpu";
+			power-domains = <&sysc R8A77961_PD_CA53_CPU2>;
+			next-level-cache = <&L2_CA53>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
+			clocks = <&cpg CPG_CORE R8A77961_CLK_Z2>;
+			operating-points-v2 = <&cluster1_opp>;
+			capacity-dmips-mhz = <535>;
+		};
+
+		a53_3: cpu@103 {
+			compatible = "arm,cortex-a53";
+			reg = <0x103>;
+			device_type = "cpu";
+			power-domains = <&sysc R8A77961_PD_CA53_CPU3>;
+			next-level-cache = <&L2_CA53>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_1>;
+			clocks = <&cpg CPG_CORE R8A77961_CLK_Z2>;
+			operating-points-v2 = <&cluster1_opp>;
+			capacity-dmips-mhz = <535>;
+		};
+
+		L2_CA57: cache-controller-0 {
+			compatible = "cache";
+			power-domains = <&sysc R8A77961_PD_CA57_SCU>;
+			cache-unified;
+			cache-level = <2>;
+		};
+
+		L2_CA53: cache-controller-1 {
+			compatible = "cache";
+			power-domains = <&sysc R8A77961_PD_CA53_SCU>;
+			cache-unified;
+			cache-level = <2>;
+		};
+
+		idle-states {
+			entry-method = "psci";
+
+			CPU_SLEEP_0: cpu-sleep-0 {
+				compatible = "arm,idle-state";
+				arm,psci-suspend-param = <0x0010000>;
+				local-timer-stop;
+				entry-latency-us = <400>;
+				exit-latency-us = <500>;
+				min-residency-us = <4000>;
+			};
+
+			CPU_SLEEP_1: cpu-sleep-1 {
+				compatible = "arm,idle-state";
+				arm,psci-suspend-param = <0x0010000>;
+				local-timer-stop;
+				entry-latency-us = <700>;
+				exit-latency-us = <700>;
+				min-residency-us = <5000>;
+			};
+		};
+	};
+
+	extal_clk: extal {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		/* This value must be overridden by the board */
+		clock-frequency = <0>;
+	};
+
+	extalr_clk: extalr {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		/* This value must be overridden by the board */
+		clock-frequency = <0>;
+	};
+
+	/* External PCIe clock - can be overridden by the board */
+	pcie_bus_clk: pcie_bus {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	pmu_a53 {
+		compatible = "arm,cortex-a53-pmu";
+		interrupts-extended = <&gic GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>,
+				      <&gic GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>,
+				      <&gic GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>,
+				      <&gic GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&a53_0>, <&a53_1>, <&a53_2>, <&a53_3>;
+	};
+
+	pmu_a57 {
+		compatible = "arm,cortex-a57-pmu";
+		interrupts-extended = <&gic GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>,
+				      <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&a57_0>, <&a57_1>;
+	};
+
+	psci {
+		compatible = "arm,psci-1.0", "arm,psci-0.2";
+		method = "smc";
+	};
+
+	/* External SCIF clock - to be overridden by boards that provide it */
+	scif_clk: scif {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	soc {
+		compatible = "simple-bus";
+		interrupt-parent = <&gic>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		rwdt: watchdog@e6020000 {
+			reg = <0 0xe6020000 0 0x0c>;
+			/* placeholder */
+		};
+
+		gpio2: gpio@e6052000 {
+			reg = <0 0xe6052000 0 0x50>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			/* placeholder */
+		};
+
+		gpio3: gpio@e6053000 {
+			reg = <0 0xe6053000 0 0x50>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			/* placeholder */
+		};
+
+		gpio4: gpio@e6054000 {
+			reg = <0 0xe6054000 0 0x50>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			/* placeholder */
+		};
+
+		gpio5: gpio@e6055000 {
+			reg = <0 0xe6055000 0 0x50>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			/* placeholder */
+		};
+
+		gpio6: gpio@e6055400 {
+			reg = <0 0xe6055400 0 0x50>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			/* placeholder */
+		};
+
+		pfc: pin-controller@e6060000 {
+			compatible = "renesas,pfc-r8a77961";
+			reg = <0 0xe6060000 0 0x50c>;
+		};
+
+		cpg: clock-controller@e6150000 {
+			compatible = "renesas,r8a77961-cpg-mssr";
+			reg = <0 0xe6150000 0 0x1000>;
+			clocks = <&extal_clk>, <&extalr_clk>;
+			clock-names = "extal", "extalr";
+			#clock-cells = <2>;
+			#power-domain-cells = <0>;
+			#reset-cells = <1>;
+		};
+
+		rst: reset-controller@e6160000 {
+			compatible = "renesas,r8a77961-rst";
+			reg = <0 0xe6160000 0 0x0200>;
+		};
+
+		sysc: system-controller@e6180000 {
+			compatible = "renesas,r8a77961-sysc";
+			reg = <0 0xe6180000 0 0x0400>;
+			#power-domain-cells = <1>;
+		};
+
+		intc_ex: interrupt-controller@e61c0000 {
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			reg = <0 0xe61c0000 0 0x200>;
+			/* placeholder */
+		};
+
+		i2c2: i2c@e6510000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0 0xe6510000 0 0x40>;
+			/* placeholder */
+		};
+
+		i2c4: i2c@e66d8000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0 0xe66d8000 0 0x40>;
+			/* placeholder */
+		};
+
+		i2c_dvfs: i2c@e60b0000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0 0xe60b0000 0 0x425>;
+			/* placeholder */
+		};
+
+		hscif1: serial@e6550000 {
+			reg = <0 0xe6550000 0 0x60>;
+			/* placeholder */
+		};
+
+		hsusb: usb@e6590000 {
+			reg = <0 0xe6590000 0 0x200>;
+			/* placeholder */
+		};
+
+		usb3_phy0: usb-phy@e65ee000 {
+			reg = <0 0xe65ee000 0 0x90>;
+			#phy-cells = <0>;
+			/* placeholder */
+		};
+
+		avb: ethernet@e6800000 {
+			reg = <0 0xe6800000 0 0x800>, <0 0xe6a00000 0 0x10000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			/* placeholder */
+		};
+
+		pwm1: pwm@e6e31000 {
+			reg = <0 0xe6e31000 0 8>;
+			#pwm-cells = <2>;
+			/* placeholder */
+		};
+
+		scif1: serial@e6e68000 {
+			reg = <0 0xe6e68000 0 64>;
+			/* placeholder */
+		};
+
+		scif2: serial@e6e88000 {
+			compatible = "renesas,scif-r8a77961",
+				     "renesas,rcar-gen3-scif", "renesas,scif";
+			reg = <0 0xe6e88000 0 64>;
+			interrupts = <GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 310>,
+				 <&cpg CPG_CORE R8A77961_CLK_S3D1>,
+				 <&scif_clk>;
+			clock-names = "fck", "brg_int", "scif_clk";
+			power-domains = <&sysc R8A77961_PD_ALWAYS_ON>;
+			resets = <&cpg 310>;
+			status = "disabled";
+		};
+
+		vin0: video@e6ef0000 {
+			reg = <0 0xe6ef0000 0 0x1000>;
+			/* placeholder */
+		};
+
+		vin1: video@e6ef1000 {
+			reg = <0 0xe6ef1000 0 0x1000>;
+			/* placeholder */
+		};
+
+		vin2: video@e6ef2000 {
+			reg = <0 0xe6ef2000 0 0x1000>;
+			/* placeholder */
+		};
+
+		vin3: video@e6ef3000 {
+			reg = <0 0xe6ef3000 0 0x1000>;
+			/* placeholder */
+		};
+
+		vin4: video@e6ef4000 {
+			reg = <0 0xe6ef4000 0 0x1000>;
+			/* placeholder */
+		};
+
+		vin5: video@e6ef5000 {
+			reg = <0 0xe6ef5000 0 0x1000>;
+			/* placeholder */
+		};
+
+		vin6: video@e6ef6000 {
+			reg = <0 0xe6ef6000 0 0x1000>;
+			/* placeholder */
+		};
+
+		vin7: video@e6ef7000 {
+			reg = <0 0xe6ef7000 0 0x1000>;
+			/* placeholder */
+		};
+
+		rcar_sound: sound@ec500000 {
+			reg = <0 0xec500000 0 0x1000>, /* SCU */
+			      <0 0xec5a0000 0 0x100>,  /* ADG */
+			      <0 0xec540000 0 0x1000>, /* SSIU */
+			      <0 0xec541000 0 0x280>,  /* SSI */
+			      <0 0xec760000 0 0x200>;  /* Audio DMAC peri peri*/
+			/* placeholder */
+			rcar_sound,dvc {
+				dvc0: dvc-0 { };
+				dvc1: dvc-1 { };
+			};
+
+			rcar_sound,src {
+				src0: src-0 { };
+				src1: src-1 { };
+			};
+
+			rcar_sound,ssi {
+				ssi0: ssi-0 { };
+				ssi1: ssi-1 { };
+			};
+		};
+
+		xhci0: usb@ee000000 {
+			reg = <0 0xee000000 0 0xc00>;
+			/* placeholder */
+		};
+
+		usb3_peri0: usb@ee020000 {
+			reg = <0 0xee020000 0 0x400>;
+			/* placeholder */
+		};
+
+		ohci0: usb@ee080000 {
+			reg = <0 0xee080000 0 0x100>;
+			/* placeholder */
+		};
+
+		ohci1: usb@ee0a0000 {
+			reg = <0 0xee0a0000 0 0x100>;
+			/* placeholder */
+		};
+
+		ehci0: usb@ee080100 {
+			reg = <0 0xee080100 0 0x100>;
+			/* placeholder */
+		};
+
+		ehci1: usb@ee0a0100 {
+			reg = <0 0xee0a0100 0 0x100>;
+			/* placeholder */
+		};
+
+		usb2_phy0: usb-phy@ee080200 {
+			reg = <0 0xee080200 0 0x700>;
+			/* placeholder */
+		};
+
+		usb2_phy1: usb-phy@ee0a0200 {
+			reg = <0 0xee0a0200 0 0x700>;
+			/* placeholder */
+		};
+
+		sdhi0: sd@ee100000 {
+			reg = <0 0xee100000 0 0x2000>;
+			/* placeholder */
+		};
+
+		sdhi2: sd@ee140000 {
+			reg = <0 0xee140000 0 0x2000>;
+			/* placeholder */
+		};
+
+		sdhi3: sd@ee160000 {
+			reg = <0 0xee160000 0 0x2000>;
+			/* placeholder */
+		};
+
+		gic: interrupt-controller@f1010000 {
+			compatible = "arm,gic-400";
+			#interrupt-cells = <3>;
+			#address-cells = <0>;
+			interrupt-controller;
+			reg = <0x0 0xf1010000 0 0x1000>,
+			      <0x0 0xf1020000 0 0x20000>,
+			      <0x0 0xf1040000 0 0x20000>,
+			      <0x0 0xf1060000 0 0x20000>;
+			interrupts = <GIC_PPI 9
+					(GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_HIGH)>;
+			clocks = <&cpg CPG_MOD 408>;
+			clock-names = "clk";
+			power-domains = <&sysc R8A77961_PD_ALWAYS_ON>;
+			resets = <&cpg 408>;
+		};
+
+		pciec0: pcie@fe000000 {
+			reg = <0 0xfe000000 0 0x80000>;
+			/* placeholder */
+		};
+
+		pciec1: pcie@ee800000 {
+			reg = <0 0xee800000 0 0x80000>;
+			/* placeholder */
+		};
+
+		csi20: csi2@fea80000 {
+			reg = <0 0xfea80000 0 0x10000>;
+			/* placeholder */
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <1>;
+				};
+			};
+		};
+
+		csi40: csi2@feaa0000 {
+			reg = <0 0xfeaa0000 0 0x10000>;
+			/* placeholder */
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					reg = <1>;
+				};
+			};
+		};
+
+		hdmi0: hdmi@fead0000 {
+			reg = <0 0xfead0000 0 0x10000>;
+			/* placeholder */
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				port@0 {
+					reg = <0>;
+				};
+				port@1 {
+					reg = <1>;
+				};
+				port@2 {
+					/* HDMI sound */
+					reg = <2>;
+				};
+			};
+		};
+
+		du: display@feb00000 {
+			reg = <0 0xfeb00000 0 0x70000>;
+			/* placeholder */
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@0 {
+					reg = <0>;
+					du_out_rgb: endpoint {
+					};
+				};
+				port@1 {
+					reg = <1>;
+					du_out_hdmi0: endpoint {
+					};
+				};
+				port@2 {
+					reg = <2>;
+					du_out_lvds0: endpoint {
+					};
+				};
+			};
+		};
+
+		prr: chipid@fff00044 {
+			compatible = "renesas,prr";
+			reg = <0 0xfff00044 0 4>;
+		};
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts-extended = <&gic GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>,
+				      <&gic GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>,
+				      <&gic GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>,
+				      <&gic GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+
+	/* External USB clocks - can be overridden by the board */
+	usb3s0_clk: usb3s0 {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
+	usb_extal_clk: usb_extal {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
index 4ae163220f60..bdbe197774d2 100644
--- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
@@ -2105,6 +2105,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
 			resets = <&cpg 314>;
+			iommus = <&ipmmu_ds1 32>;
 			status = "disabled";
 		};
 
@@ -2117,6 +2118,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
 			resets = <&cpg 313>;
+			iommus = <&ipmmu_ds1 33>;
 			status = "disabled";
 		};
 
@@ -2129,6 +2131,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
 			resets = <&cpg 312>;
+			iommus = <&ipmmu_ds1 34>;
 			status = "disabled";
 		};
 
@@ -2141,6 +2144,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
 			resets = <&cpg 311>;
+			iommus = <&ipmmu_ds1 35>;
 			status = "disabled";
 		};
 
@@ -2320,6 +2324,33 @@
 			resets = <&cpg 611>;
 		};
 
+		cmm0: cmm@fea40000 {
+			compatible = "renesas,r8a77965-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea40000 0 0x1000>;
+			power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 711>;
+			resets = <&cpg 711>;
+		};
+
+		cmm1: cmm@fea50000 {
+			compatible = "renesas,r8a77965-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea50000 0 0x1000>;
+			power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 710>;
+			resets = <&cpg 710>;
+		};
+
+		cmm3: cmm@fea70000 {
+			compatible = "renesas,r8a77965-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea70000 0 0x1000>;
+			power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 708>;
+			resets = <&cpg 708>;
+		};
+
 		csi20: csi2@fea80000 {
 			compatible = "renesas,r8a77965-csi2";
 			reg = <0 0xfea80000 0 0x10000>;
@@ -2467,10 +2498,12 @@
 				 <&cpg CPG_MOD 723>,
 				 <&cpg CPG_MOD 721>;
 			clock-names = "du.0", "du.1", "du.3";
-			status = "disabled";
 
+			renesas,cmms = <&cmm0>, <&cmm1>, <&cmm3>;
 			vsps = <&vspd0 0>, <&vspd1 0>, <&vspd0 1>;
 
+			status = "disabled";
+
 			ports {
 				#address-cells = <1>;
 				#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
index 0cd3b376635d..0d0558e53533 100644
--- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
@@ -652,7 +652,7 @@
 		};
 
 		pwm3: pwm@e6e33000 {
-			compatible = "renesas,pwm-r8a7790", "renesas,pwm-rcar";
+			compatible = "renesas,pwm-r8a77970", "renesas,pwm-rcar";
 			reg = <0 0xe6e33000 0 8>;
 			#pwm-cells = <2>;
 			clocks = <&cpg CPG_MOD 523>;
@@ -1035,6 +1035,7 @@
 			power-domains = <&sysc R8A77970_PD_ALWAYS_ON>;
 			resets = <&cpg 314>;
 			max-frequency = <200000000>;
+			iommus = <&ipmmu_ds1 32>;
 			status = "disabled";
 		};
 
@@ -1120,7 +1121,7 @@
 			clock-names = "du.0";
 			power-domains = <&sysc R8A77970_PD_ALWAYS_ON>;
 			resets = <&cpg 724>;
-			vsps = <&vspd0>;
+			vsps = <&vspd0 0>;
 			status = "disabled";
 
 			ports {
diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
index 461a47ea656d..4d86669af819 100644
--- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
@@ -1338,6 +1338,7 @@
 			power-domains = <&sysc R8A77980_PD_ALWAYS_ON>;
 			resets = <&cpg 314>;
 			max-frequency = <200000000>;
+			iommus = <&ipmmu_ds1 32>;
 			status = "disabled";
 		};
 
@@ -1495,7 +1496,7 @@
 			clock-names = "du.0";
 			power-domains = <&sysc R8A77980_PD_ALWAYS_ON>;
 			resets = <&cpg 724>;
-			vsps = <&vspd0>;
+			vsps = <&vspd0 0>;
 			status = "disabled";
 
 			ports {
diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
index 455954c3d98e..67a6824a962c 100644
--- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
@@ -1580,6 +1580,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
 			resets = <&cpg 314>;
+			iommus = <&ipmmu_ds1 32>;
 			status = "disabled";
 		};
 
@@ -1592,6 +1593,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
 			resets = <&cpg 313>;
+			iommus = <&ipmmu_ds1 33>;
 			status = "disabled";
 		};
 
@@ -1604,6 +1606,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
 			resets = <&cpg 311>;
+			iommus = <&ipmmu_ds1 35>;
 			status = "disabled";
 		};
 
@@ -1727,6 +1730,24 @@
 			iommus = <&ipmmu_vi0 9>;
 		};
 
+		cmm0: cmm@fea40000 {
+			compatible = "renesas,r8a77990-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea40000 0 0x1000>;
+			power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 711>;
+			resets = <&cpg 711>;
+		};
+
+		cmm1: cmm@fea50000 {
+			compatible = "renesas,r8a77990-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea50000 0 0x1000>;
+			power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 710>;
+			resets = <&cpg 710>;
+		};
+
 		csi40: csi2@feaa0000 {
 			compatible = "renesas,r8a77990-csi2";
 			reg = <0 0xfeaa0000 0 0x10000>;
@@ -1768,7 +1789,10 @@
 			clock-names = "du.0", "du.1";
 			resets = <&cpg 724>;
 			reset-names = "du.0";
+
+			renesas,cmms = <&cmm0>, <&cmm1>;
 			vsps = <&vspd0 0>, <&vspd1 0>;
+
 			status = "disabled";
 
 			ports {
diff --git a/arch/arm64/boot/dts/renesas/r8a77995.dtsi b/arch/arm64/boot/dts/renesas/r8a77995.dtsi
index 183fef86cf7c..e6ee2b709ba6 100644
--- a/arch/arm64/boot/dts/renesas/r8a77995.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77995.dtsi
@@ -916,6 +916,7 @@
 			max-frequency = <200000000>;
 			power-domains = <&sysc R8A77995_PD_ALWAYS_ON>;
 			resets = <&cpg 312>;
+			iommus = <&ipmmu_ds1 34>;
 			status = "disabled";
 		};
 
@@ -993,6 +994,24 @@
 			iommus = <&ipmmu_vi0 9>;
 		};
 
+		cmm0: cmm@fea40000 {
+			compatible = "renesas,r8a77995-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea40000 0 0x1000>;
+			power-domains = <&sysc R8A77995_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 711>;
+			resets = <&cpg 711>;
+		};
+
+		cmm1: cmm@fea50000 {
+			compatible = "renesas,r8a77995-cmm",
+				     "renesas,rcar-gen3-cmm";
+			reg = <0 0xfea50000 0 0x1000>;
+			power-domains = <&sysc R8A77995_PD_ALWAYS_ON>;
+			clocks = <&cpg CPG_MOD 710>;
+			resets = <&cpg 710>;
+		};
+
 		du: display@feb00000 {
 			compatible = "renesas,du-r8a77995";
 			reg = <0 0xfeb00000 0 0x40000>;
@@ -1003,7 +1022,10 @@
 			clock-names = "du.0", "du.1";
 			resets = <&cpg 724>;
 			reset-names = "du.0";
+
+			renesas,cmms = <&cmm0>, <&cmm1>;
 			vsps = <&vspd0 0>, <&vspd1 0>;
+
 			status = "disabled";
 
 			ports {
diff --git a/arch/arm64/boot/dts/renesas/rzg2-advantech-idk-1110wr-panel.dtsi b/arch/arm64/boot/dts/renesas/rzg2-advantech-idk-1110wr-panel.dtsi
new file mode 100644
index 000000000000..bcc21178ae04
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/rzg2-advantech-idk-1110wr-panel.dtsi
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device Tree Source for the Advantech idk-1110wr LVDS panel connected
+ * to RZ/G2 boards
+ *
+ * Copyright (C) 2019 Renesas Electronics Corp.
+ */
+
+/ {
+	panel-lvds {
+		compatible = "advantech,idk-1110wr", "panel-lvds";
+
+		width-mm = <223>;
+		height-mm = <125>;
+
+		data-mapping = "jeida-24";
+
+		panel-timing {
+			/* 1024x600 @60Hz */
+			clock-frequency = <51200000>;
+			hactive = <1024>;
+			vactive = <600>;
+			hsync-len = <240>;
+			hfront-porch = <40>;
+			hback-porch = <40>;
+			vfront-porch = <15>;
+			vback-porch = <10>;
+			vsync-len = <10>;
+		};
+
+		port {
+			panel_in: endpoint {
+				remote-endpoint = <&lvds_connector>;
+			};
+		};
+	};
+};
+
+&lvds_connector {
+	remote-endpoint = <&panel_in>;
+};
diff --git a/arch/arm64/boot/dts/rockchip/Makefile b/arch/arm64/boot/dts/rockchip/Makefile
index 1f18a9392d15..48fb631d5451 100644
--- a/arch/arm64/boot/dts/rockchip/Makefile
+++ b/arch/arm64/boot/dts/rockchip/Makefile
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_ARCH_ROCKCHIP) += px30-evb.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3308-evb.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3308-roc-cc.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3328-a1.dtb
 dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3328-evb.dtb
 dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3328-rock64.dtb
 dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3328-roc-cc.dtb
@@ -27,6 +30,7 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-nanopi-neo4.dtb
 dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-orangepi.dtb
 dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-puma-haikou.dtb
 dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-roc-pc.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-roc-pc-mezzanine.dtb
 dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4.dtb
 dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock960.dtb
 dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rockpro64.dtb
diff --git a/arch/arm64/boot/dts/rockchip/px30-evb.dts b/arch/arm64/boot/dts/rockchip/px30-evb.dts
index 6eb7407a84aa..936ed7d71ffc 100644
--- a/arch/arm64/boot/dts/rockchip/px30-evb.dts
+++ b/arch/arm64/boot/dts/rockchip/px30-evb.dts
@@ -14,7 +14,7 @@
 	compatible = "rockchip,px30-evb", "rockchip,px30";
 
 	chosen {
-		stdout-path = "serial2:1500000n8";
+		stdout-path = "serial5:115200n8";
 	};
 
 	adc-keys {
@@ -58,6 +58,14 @@
 	backlight: backlight {
 		compatible = "pwm-backlight";
 		pwms = <&pwm1 0 25000 0>;
+		power-supply = <&vcc3v3_lcd>;
+	};
+
+	emmc_pwrseq: emmc-pwrseq {
+		compatible = "mmc-pwrseq-emmc";
+		pinctrl-0 = <&emmc_reset>;
+		pinctrl-names = "default";
+		reset-gpios = <&gpio1 RK_PB3 GPIO_ACTIVE_HIGH>;
 	};
 
 	sdio_pwrseq: sdio-pwrseq {
@@ -74,13 +82,6 @@
 		reset-gpios = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>; /* GPIO3_A4 */
 	};
 
-	vcc_phy: vcc-phy-regulator {
-		compatible = "regulator-fixed";
-		regulator-name = "vcc_phy";
-		regulator-always-on;
-		regulator-boot-on;
-	};
-
 	vcc5v0_sys: vccsys {
 		compatible = "regulator-fixed";
 		regulator-name = "vcc5v0_sys";
@@ -91,6 +92,22 @@
 	};
 };
 
+&cpu0 {
+	cpu-supply = <&vdd_arm>;
+};
+
+&cpu1 {
+	cpu-supply = <&vdd_arm>;
+};
+
+&cpu2 {
+	cpu-supply = <&vdd_arm>;
+};
+
+&cpu3 {
+	cpu-supply = <&vdd_arm>;
+};
+
 &display_subsystem {
 	status = "okay";
 };
@@ -100,12 +117,15 @@
 	cap-mmc-highspeed;
 	mmc-hs200-1_8v;
 	non-removable;
+	mmc-pwrseq = <&emmc_pwrseq>;
+	vmmc-supply = <&vcc_3v0>;
+	vqmmc-supply = <&vccio_flash>;
 	status = "okay";
 };
 
 &gmac {
 	clock_in_out = "output";
-	phy-supply = <&vcc_phy>;
+	phy-supply = <&vcc_rmii>;
 	snps,reset-gpio = <&gpio2 13 GPIO_ACTIVE_LOW>;
 	snps,reset-active-low;
 	snps,reset-delays-us = <0 50000 50000>;
@@ -114,6 +134,256 @@
 
 &i2c0 {
 	status = "okay";
+
+	rk809: pmic@20 {
+		compatible = "rockchip,rk809";
+		reg = <0x20>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <7 IRQ_TYPE_LEVEL_LOW>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pmic_int>;
+		rockchip,system-power-controller;
+		wakeup-source;
+		#clock-cells = <0>;
+		clock-output-names = "xin32k";
+
+		vcc1-supply = <&vcc5v0_sys>;
+		vcc2-supply = <&vcc5v0_sys>;
+		vcc3-supply = <&vcc5v0_sys>;
+		vcc4-supply = <&vcc5v0_sys>;
+		vcc5-supply = <&vcc3v3_sys>;
+		vcc6-supply = <&vcc3v3_sys>;
+		vcc7-supply = <&vcc3v3_sys>;
+		vcc8-supply = <&vcc3v3_sys>;
+		vcc9-supply = <&vcc5v0_sys>;
+
+		regulators {
+			vdd_log: DCDC_REG1 {
+				regulator-name = "vdd_log";
+				regulator-min-microvolt = <950000>;
+				regulator-max-microvolt = <1350000>;
+				regulator-ramp-delay = <6001>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <950000>;
+				};
+			};
+
+			vdd_arm: DCDC_REG2 {
+				regulator-name = "vdd_arm";
+				regulator-min-microvolt = <950000>;
+				regulator-max-microvolt = <1350000>;
+				regulator-ramp-delay = <6001>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-off-in-suspend;
+					regulator-suspend-microvolt = <950000>;
+				};
+			};
+
+			vcc_ddr: DCDC_REG3 {
+				regulator-name = "vcc_ddr";
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+				};
+			};
+
+			vcc_3v0: vcc_rmii: DCDC_REG4 {
+				regulator-name = "vcc_3v0";
+				regulator-min-microvolt = <3000000>;
+				regulator-max-microvolt = <3000000>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <3000000>;
+				};
+			};
+
+			vcc3v3_sys: DCDC_REG5 {
+				regulator-name = "vcc3v3_sys";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <3300000>;
+				};
+			};
+
+			vcc_1v0: LDO_REG1 {
+				regulator-name = "vcc_1v0";
+				regulator-min-microvolt = <1000000>;
+				regulator-max-microvolt = <1000000>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <1000000>;
+				};
+			};
+
+			vcc_1v8: vccio_flash: vccio_sdio: LDO_REG2 {
+				regulator-name = "vcc_1v8";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <1800000>;
+				};
+			};
+
+			vdd_1v0: LDO_REG3 {
+				regulator-name = "vdd_1v0";
+				regulator-min-microvolt = <1000000>;
+				regulator-max-microvolt = <1000000>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <1000000>;
+				};
+			};
+
+			vcc3v0_pmu: LDO_REG4 {
+				regulator-name = "vcc3v0_pmu";
+				regulator-min-microvolt = <3000000>;
+				regulator-max-microvolt = <3000000>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <3000000>;
+				};
+			};
+
+			vccio_sd: LDO_REG5 {
+				regulator-name = "vccio_sd";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <3300000>;
+				};
+			};
+
+			vcc_sd: LDO_REG6 {
+				regulator-name = "vcc_sd";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <3300000>;
+				};
+			};
+
+			vcc2v8_dvp: LDO_REG7 {
+				regulator-name = "vcc2v8_dvp";
+				regulator-min-microvolt = <2800000>;
+				regulator-max-microvolt = <2800000>;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-off-in-suspend;
+					regulator-suspend-microvolt = <2800000>;
+				};
+			};
+
+			vcc1v8_dvp: LDO_REG8 {
+				regulator-name = "vcc1v8_dvp";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <1800000>;
+				};
+			};
+
+			vcc1v5_dvp: LDO_REG9 {
+				regulator-name = "vcc1v5_dvp";
+				regulator-min-microvolt = <1500000>;
+				regulator-max-microvolt = <1500000>;
+				regulator-boot-on;
+
+				regulator-state-mem {
+					regulator-off-in-suspend;
+					regulator-suspend-microvolt = <1500000>;
+				};
+			};
+
+			vcc3v3_lcd: SWITCH_REG1 {
+				regulator-name = "vcc3v3_lcd";
+				regulator-boot-on;
+			};
+
+			vcc5v0_host: SWITCH_REG2 {
+				regulator-name = "vcc5v0_host";
+				regulator-always-on;
+				regulator-boot-on;
+			};
+		};
+	};
+};
+
+&i2c1 {
+	status = "okay";
+
+	sensor@d {
+		compatible = "asahi-kasei,ak8963";
+		reg = <0x0d>;
+		gpios = <&gpio0 RK_PB7 GPIO_ACTIVE_HIGH>;
+		vdd-supply = <&vcc3v0_pmu>;
+		mount-matrix = "1", /* x0 */
+			       "0", /* y0 */
+			       "0", /* z0 */
+			       "0", /* x1 */
+			       "1", /* y1 */
+			       "0", /* z1 */
+			       "0", /* x2 */
+			       "0", /* y2 */
+			       "1"; /* z2 */
+	};
+
+	touchscreen@14 {
+		compatible = "goodix,gt1151";
+		reg = <0x14>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <RK_PA5 IRQ_TYPE_LEVEL_LOW>;
+		irq-gpios = <&gpio0 RK_PA5 GPIO_ACTIVE_LOW>;
+		reset-gpios = <&gpio0 RK_PB4 GPIO_ACTIVE_HIGH>;
+		VDDIO-supply = <&vcc3v3_lcd>;
+	};
+
+	sensor@4c {
+		compatible = "fsl,mma7660";
+		reg = <0x4c>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <RK_PB7 IRQ_TYPE_LEVEL_LOW>;
+	};
 };
 
 &i2s1_2ch {
@@ -122,6 +392,13 @@
 
 &io_domains {
 	status = "okay";
+
+	vccio1-supply = <&vccio_sdio>;
+	vccio2-supply = <&vccio_sd>;
+	vccio3-supply = <&vcc_3v0>;
+	vccio4-supply = <&vcc3v0_pmu>;
+	vccio5-supply = <&vcc_3v0>;
+	vccio6-supply = <&vccio_flash>;
 };
 
 &pinctrl {
@@ -132,6 +409,12 @@
 		};
 	};
 
+	emmc {
+		emmc_reset: emmc-reset {
+			rockchip,pins = <1 RK_PB3 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+	};
+
 	pmic {
 		pmic_int: pmic_int {
 			rockchip,pins =
@@ -164,6 +447,9 @@
 
 &pmu_io_domains {
 	status = "okay";
+
+	pmuio1-supply = <&vcc3v0_pmu>;
+	pmuio2-supply = <&vcc3v0_pmu>;
 };
 
 &pwm1 {
@@ -171,6 +457,7 @@
 };
 
 &saradc {
+	vref-supply = <&vcc_1v8>;
 	status = "okay";
 };
 
@@ -183,6 +470,8 @@
 	sd-uhs-sdr25;
 	sd-uhs-sdr50;
 	sd-uhs-sdr104;
+	vmmc-supply = <&vcc_sd>;
+	vqmmc-supply = <&vccio_sd>;
 	status = "okay";
 };
 
@@ -196,13 +485,25 @@
 	status = "okay";
 };
 
+&u2phy {
+	status = "okay";
+
+	u2phy_host: host-port {
+		status = "okay";
+	};
+
+	u2phy_otg: otg-port {
+		status = "okay";
+	};
+};
+
 &uart1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart1_xfer &uart1_cts>;
 	status = "okay";
 };
 
-&uart2 {
+&uart5 {
 	status = "okay";
 };
 
diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi
index eb992d60e6ba..8812b70f3911 100644
--- a/arch/arm64/boot/dts/rockchip/px30.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30.dtsi
@@ -161,13 +161,6 @@
 		status = "disabled";
 	};
 
-	firmware {
-		optee {
-			compatible = "linaro,optee-tz";
-			method = "smc";
-		};
-	};
-
 	gmac_clkin: external-gmac-clock {
 		compatible = "fixed-clock";
 		clock-frequency = <50000000>;
@@ -195,13 +188,6 @@
 		clock-output-names = "xin24m";
 	};
 
-	xin32k: xin32k {
-		compatible = "fixed-clock";
-		#clock-cells = <0>;
-		clock-frequency = <32768>;
-		clock-output-names = "xin32k";
-	};
-
 	pmu: power-management@ff000000 {
 		compatible = "rockchip,px30-pmu", "syscon", "simple-mfd";
 		reg = <0x0 0xff000000 0x0 0x1000>;
@@ -671,36 +657,102 @@
 		status = "disabled";
 	};
 
+	otp: nvmem@ff290000 {
+		compatible = "rockchip,px30-otp";
+		reg = <0x0 0xff290000 0x0 0x4000>;
+		clocks = <&cru SCLK_OTP_USR>, <&cru PCLK_OTP_NS>,
+			 <&cru PCLK_OTP_PHY>;
+		clock-names = "otp", "apb_pclk", "phy";
+		resets = <&cru SRST_OTP_PHY>;
+		reset-names = "phy";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* Data cells */
+		cpu_id: id@7 {
+			reg = <0x07 0x10>;
+		};
+		cpu_leakage: cpu-leakage@17 {
+			reg = <0x17 0x1>;
+		};
+		performance: performance@1e {
+			reg = <0x1e 0x1>;
+			bits = <4 3>;
+		};
+	};
+
 	cru: clock-controller@ff2b0000 {
 		compatible = "rockchip,px30-cru";
 		reg = <0x0 0xff2b0000 0x0 0x1000>;
+		clocks = <&xin24m>, <&pmucru PLL_GPLL>;
+		clock-names = "xin24m", "gpll";
 		rockchip,grf = <&grf>;
 		#clock-cells = <1>;
 		#reset-cells = <1>;
 
-		assigned-clocks = <&cru PLL_NPLL>;
-		assigned-clock-rates = <1188000000>;
+		assigned-clocks = <&cru PLL_NPLL>,
+			<&cru ACLK_BUS_PRE>, <&cru ACLK_PERI_PRE>,
+			<&cru HCLK_BUS_PRE>, <&cru HCLK_PERI_PRE>,
+			<&cru PCLK_BUS_PRE>, <&cru SCLK_GPU>;
+
+		assigned-clock-rates = <1188000000>,
+			<200000000>, <200000000>,
+			<150000000>, <150000000>,
+			<100000000>, <200000000>;
 	};
 
 	pmucru: clock-controller@ff2bc000 {
 		compatible = "rockchip,px30-pmucru";
 		reg = <0x0 0xff2bc000 0x0 0x1000>;
+		clocks = <&xin24m>;
+		clock-names = "xin24m";
 		rockchip,grf = <&grf>;
 		#clock-cells = <1>;
 		#reset-cells = <1>;
 
 		assigned-clocks =
 			<&pmucru PLL_GPLL>, <&pmucru PCLK_PMU_PRE>,
-			<&pmucru SCLK_WIFI_PMU>, <&cru ARMCLK>,
-			<&cru ACLK_BUS_PRE>, <&cru ACLK_PERI_PRE>,
-			<&cru HCLK_BUS_PRE>, <&cru HCLK_PERI_PRE>,
-			<&cru PCLK_BUS_PRE>, <&cru SCLK_GPU>;
+			<&pmucru SCLK_WIFI_PMU>;
 		assigned-clock-rates =
 			<1200000000>, <100000000>,
-			<26000000>, <600000000>,
-			<200000000>, <200000000>,
-			<150000000>, <150000000>,
-			<100000000>, <200000000>;
+			<26000000>;
+	};
+
+	usb2phy_grf: syscon@ff2c0000 {
+		compatible = "rockchip,px30-usb2phy-grf", "syscon",
+			     "simple-mfd";
+		reg = <0x0 0xff2c0000 0x0 0x10000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		u2phy: usb2-phy@100 {
+			compatible = "rockchip,px30-usb2phy";
+			reg = <0x100 0x20>;
+			clocks = <&pmucru SCLK_USBPHY_REF>;
+			clock-names = "phyclk";
+			#clock-cells = <0>;
+			assigned-clocks = <&cru USB480M>;
+			assigned-clock-parents = <&u2phy>;
+			clock-output-names = "usb480m_phy";
+			status = "disabled";
+
+			u2phy_host: host-port {
+				#phy-cells = <0>;
+				interrupts = <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-names = "linestate";
+				status = "disabled";
+			};
+
+			u2phy_otg: otg-port {
+				#phy-cells = <0>;
+				interrupts = <GIC_SPI 66 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-names = "otg-bvalid", "otg-id",
+						  "linestate";
+				status = "disabled";
+			};
+		};
 	};
 
 	usb20_otg: usb@ff300000 {
@@ -715,6 +767,8 @@
 		g-rx-fifo-size = <280>;
 		g-tx-fifo-size = <256 128 128 64 32 16>;
 		g-use-dma;
+		phys = <&u2phy_otg>;
+		phy-names = "usb2-phy";
 		power-domains = <&power PX30_PD_USB>;
 		status = "disabled";
 	};
@@ -725,6 +779,8 @@
 		interrupts = <GIC_SPI 60 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru HCLK_HOST>;
 		clock-names = "usbhost";
+		phys = <&u2phy_host>;
+		phy-names = "usb";
 		power-domains = <&power PX30_PD_USB>;
 		status = "disabled";
 	};
@@ -735,6 +791,8 @@
 		interrupts = <GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru HCLK_HOST>;
 		clock-names = "usbhost";
+		phys = <&u2phy_host>;
+		phy-names = "usb";
 		power-domains = <&power PX30_PD_USB>;
 		status = "disabled";
 	};
@@ -801,6 +859,8 @@
 		clock-names = "biu", "ciu", "ciu-drv", "ciu-sample";
 		fifo-depth = <0x100>;
 		max-frequency = <150000000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&emmc_clk &emmc_cmd &emmc_bus8>;
 		power-domains = <&power PX30_PD_MMC_NAND>;
 		status = "disabled";
 	};
@@ -831,7 +891,7 @@
 		interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-names = "vopb_mmu";
 		clocks = <&cru ACLK_VOPB>, <&cru HCLK_VOPB>;
-		clock-names = "aclk", "hclk";
+		clock-names = "aclk", "iface";
 		power-domains = <&power PX30_PD_VO>;
 		#iommu-cells = <0>;
 		status = "disabled";
@@ -863,7 +923,7 @@
 		interrupts = <GIC_SPI 79 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-names = "vopl_mmu";
 		clocks = <&cru ACLK_VOPL>, <&cru HCLK_VOPL>;
-		clock-names = "aclk", "hclk";
+		clock-names = "aclk", "iface";
 		power-domains = <&power PX30_PD_VO>;
 		#iommu-cells = <0>;
 		status = "disabled";
@@ -1164,11 +1224,6 @@
 				rockchip,pins =
 					<0 RK_PB5 1 &pcfg_pull_none>;
 			};
-
-			uart0_rts_gpio: uart0-rts-gpio {
-				rockchip,pins =
-					<0 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>;
-			};
 		};
 
 		uart1 {
@@ -1187,11 +1242,6 @@
 				rockchip,pins =
 					<1 RK_PC3 1 &pcfg_pull_none>;
 			};
-
-			uart1_rts_gpio: uart1-rts-gpio {
-				rockchip,pins =
-					<1 RK_PC3 RK_FUNC_GPIO &pcfg_pull_none>;
-			};
 		};
 
 		uart2-m0 {
@@ -1226,11 +1276,6 @@
 				rockchip,pins =
 					<0 RK_PC3 2 &pcfg_pull_none>;
 			};
-
-			uart3m0_rts_gpio: uart3m0-rts-gpio {
-				rockchip,pins =
-					<0 RK_PC3 RK_FUNC_GPIO &pcfg_pull_none>;
-			};
 		};
 
 		uart3-m1 {
@@ -1249,11 +1294,6 @@
 				rockchip,pins =
 					<1 RK_PB5 2 &pcfg_pull_none>;
 			};
-
-			uart3m1_rts_gpio: uart3m1-rts-gpio {
-				rockchip,pins =
-					<1 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>;
-			};
 		};
 
 		uart4 {
@@ -1602,16 +1642,6 @@
 					<1 RK_PD4 1 &pcfg_pull_up_8ma>,
 					<1 RK_PD5 1 &pcfg_pull_up_8ma>;
 			};
-
-			sdmmc_gpio: sdmmc-gpio {
-				rockchip,pins =
-					<1 RK_PD2 RK_FUNC_GPIO &pcfg_pull_up_4ma>,
-					<1 RK_PD3 RK_FUNC_GPIO &pcfg_pull_up_4ma>,
-					<1 RK_PD4 RK_FUNC_GPIO &pcfg_pull_up_4ma>,
-					<1 RK_PD5 RK_FUNC_GPIO &pcfg_pull_up_4ma>,
-					<1 RK_PD6 RK_FUNC_GPIO &pcfg_pull_up_4ma>,
-					<1 RK_PD7 RK_FUNC_GPIO &pcfg_pull_up_4ma>;
-			};
 		};
 
 		sdio {
@@ -1632,16 +1662,6 @@
 					<1 RK_PD0 1 &pcfg_pull_up>,
 					<1 RK_PD1 1 &pcfg_pull_up>;
 			};
-
-			sdio_gpio: sdio-gpio {
-				rockchip,pins =
-					<1 RK_PC6 RK_FUNC_GPIO &pcfg_pull_up>,
-					<1 RK_PC7 RK_FUNC_GPIO &pcfg_pull_up>,
-					<1 RK_PD0 RK_FUNC_GPIO &pcfg_pull_up>,
-					<1 RK_PD1 RK_FUNC_GPIO &pcfg_pull_up>,
-					<1 RK_PC4 RK_FUNC_GPIO &pcfg_pull_up>,
-					<1 RK_PC5 RK_FUNC_GPIO &pcfg_pull_up>;
-			};
 		};
 
 		emmc {
@@ -1655,11 +1675,6 @@
 					<1 RK_PB2 2 &pcfg_pull_up_8ma>;
 			};
 
-			emmc_pwren: emmc-pwren {
-				rockchip,pins =
-					<1 RK_PB0 2 &pcfg_pull_none>;
-			};
-
 			emmc_rstnout: emmc-rstnout {
 				rockchip,pins =
 					<1 RK_PB3 2 &pcfg_pull_none>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3308-evb.dts b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts
new file mode 100644
index 000000000000..9b4f855ea5d4
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2019 Fuzhou Rockchip Electronics Co., Ltd
+ *
+ */
+
+/dts-v1/;
+#include <dt-bindings/input/input.h>
+#include "rk3308.dtsi"
+
+/ {
+	model = "Rockchip RK3308 EVB";
+	compatible = "rockchip,rk3308-evb", "rockchip,rk3308";
+
+	chosen {
+		stdout-path = "serial4:1500000n8";
+	};
+
+	adc-keys0 {
+		compatible = "adc-keys";
+		io-channels = <&saradc 0>;
+		io-channel-names = "buttons";
+		poll-interval = <100>;
+		keyup-threshold-microvolt = <1800000>;
+
+		func-key {
+			linux,code = <KEY_FN>;
+			label = "function";
+			press-threshold-microvolt = <18000>;
+		};
+	};
+
+	adc-keys1 {
+		compatible = "adc-keys";
+		io-channels = <&saradc 1>;
+		io-channel-names = "buttons";
+		poll-interval = <100>;
+		keyup-threshold-microvolt = <1800000>;
+
+		esc-key {
+			linux,code = <KEY_MICMUTE>;
+			label = "micmute";
+			press-threshold-microvolt = <1130000>;
+		};
+
+		home-key {
+			linux,code = <KEY_MODE>;
+			label = "mode";
+			press-threshold-microvolt = <901000>;
+		};
+
+		menu-key {
+			linux,code = <KEY_PLAY>;
+			label = "play";
+			press-threshold-microvolt = <624000>;
+		};
+
+		vol-down-key {
+			linux,code = <KEY_VOLUMEDOWN>;
+			label = "volume down";
+			press-threshold-microvolt = <300000>;
+		};
+
+		vol-up-key {
+			linux,code = <KEY_VOLUMEUP>;
+			label = "volume up";
+			press-threshold-microvolt = <18000>;
+		};
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+		autorepeat;
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwr_key>;
+
+		power {
+			gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_POWER>;
+			label = "GPIO Key Power";
+			debounce-interval = <100>;
+			wakeup-source;
+		};
+	};
+
+	vcc12v_dcin: vcc12v-dcin {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc12v_dcin";
+		regulator-min-microvolt = <12000000>;
+		regulator-max-microvolt = <12000000>;
+		regulator-always-on;
+		regulator-boot-on;
+	};
+
+	vcc5v0_sys: vcc5v0-sys {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc5v0_sys";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc12v_dcin>;
+	};
+
+	vccio_sdio: vcc_1v8: vcc-1v8 {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc_1v8";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc_io>;
+	};
+
+	vcc_ddr: vcc-ddr {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc_ddr";
+		regulator-min-microvolt = <1500000>;
+		regulator-max-microvolt = <1500000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc5v0_sys>;
+	};
+
+	vcc_io: vcc-io {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc_io";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc5v0_sys>;
+	};
+
+	vccio_flash: vccio-flash {
+		compatible = "regulator-fixed";
+		regulator-name = "vccio_flash";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc_io>;
+	};
+
+	vcc5v0_host: vcc5v0-host {
+		compatible = "regulator-fixed";
+		gpio = <&gpio0 RK_PC5 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+		pinctrl-names = "default";
+		pinctrl-0 = <&usb_drv>;
+		regulator-name = "vbus_host";
+		vin-supply = <&vcc5v0_sys>;
+	};
+
+	vdd_core: vdd-core {
+		compatible = "pwm-regulator";
+		pwms = <&pwm0 0 5000 1>;
+		regulator-name = "vdd_core";
+		regulator-min-microvolt = <827000>;
+		regulator-max-microvolt = <1340000>;
+		regulator-always-on;
+		regulator-boot-on;
+		regulator-settling-time-up-us = <250>;
+		pwm-supply = <&vcc5v0_sys>;
+	};
+
+	vdd_log: vdd-log {
+		compatible = "regulator-fixed";
+		regulator-name = "vdd_log";
+		regulator-min-microvolt = <1050000>;
+		regulator-max-microvolt = <1050000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc5v0_sys>;
+	};
+
+	vdd_1v0: vdd-1v0 {
+		compatible = "regulator-fixed";
+		regulator-name = "vdd_1v0";
+		regulator-min-microvolt = <1000000>;
+		regulator-max-microvolt = <1000000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc5v0_sys>;
+	};
+};
+
+&cpu0 {
+	cpu-supply = <&vdd_core>;
+};
+
+&saradc {
+	status = "okay";
+	vref-supply = <&vcc_1v8>;
+};
+
+&pinctrl {
+	pinctrl-names = "default";
+	pinctrl-0 = <&rtc_32k>;
+
+	buttons {
+		pwr_key: pwr-key {
+			rockchip,pins = <0 RK_PA6 0 &pcfg_pull_up>;
+		};
+	};
+
+	usb {
+		usb_drv: usb-drv {
+			rockchip,pins = <0 RK_PC5 0 &pcfg_pull_none>;
+		};
+	};
+
+	sdio-pwrseq {
+		wifi_enable_h: wifi-enable-h {
+			rockchip,pins = <0 RK_PA2 0 &pcfg_pull_none>;
+		};
+	};
+};
+
+&pwm0 {
+	status = "okay";
+	pinctrl-0 = <&pwm0_pin_pull_down>;
+};
+
+&uart4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart4_xfer>;
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
new file mode 100644
index 000000000000..aa256350b18f
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2019 Fuzhou Rockchip Electronics Co., Ltd
+ */
+
+/dts-v1/;
+#include "rk3308.dtsi"
+
+/ {
+	model = "Firefly ROC-RK3308-CC board";
+	compatible = "firefly,roc-rk3308-cc", "rockchip,rk3308";
+	chosen {
+		stdout-path = "serial2:1500000n8";
+	};
+
+	ir_rx {
+		compatible = "gpio-ir-receiver";
+		gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&ir_recv_pin>;
+	};
+
+	ir_tx {
+		compatible = "pwm-ir-tx";
+		pwms = <&pwm5 0 25000 0>;
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		power {
+			label = "firefly:red:power";
+			linux,default-trigger = "ir-power-click";
+			default-state = "on";
+			gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>;
+		};
+
+		user {
+			label = "firefly:blue:user";
+			linux,default-trigger = "ir-user-click";
+			default-state = "off";
+			gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_HIGH>;
+		};
+	};
+
+	typec_vcc5v: typec-vcc5v {
+		compatible = "regulator-fixed";
+		regulator-name = "typec_vcc5v";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-always-on;
+		regulator-boot-on;
+	};
+
+	vcc5v0_sys: vcc5v0-sys {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc5v0_sys";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&typec_vcc5v>;
+	};
+
+	vcc_io: vcc-io {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc_io";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc5v0_sys>;
+	};
+
+	vcc_sdmmc: vcc-sdmmc {
+		compatible = "regulator-gpio";
+		regulator-name = "vcc_sdmmc";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <3300000>;
+		gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_HIGH>;
+		states = <1800000 0x0
+			  3300000 0x1>;
+		vin-supply = <&vcc5v0_sys>;
+	};
+
+	vcc_sd: vcc-sd {
+		compatible = "regulator-fixed";
+		gpio = <&gpio4 RK_PD6 GPIO_ACTIVE_LOW>;
+		regulator-name = "vcc_sd";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vim-supply = <&vcc_io>;
+	};
+
+	vdd_core: vdd-core {
+		compatible = "pwm-regulator";
+		pwms = <&pwm0 0 5000 1>;
+		regulator-name = "vdd_core";
+		regulator-min-microvolt = <827000>;
+		regulator-max-microvolt = <1340000>;
+		regulator-init-microvolt = <1015000>;
+		regulator-settling-time-up-us = <250>;
+		regulator-always-on;
+		regulator-boot-on;
+		pwm-supply = <&vcc5v0_sys>;
+	};
+
+	vdd_log: vdd-log {
+		compatible = "regulator-fixed";
+		regulator-name = "vdd_log";
+		regulator-min-microvolt = <1050000>;
+		regulator-max-microvolt = <1050000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc5v0_sys>;
+	};
+};
+
+&cpu0 {
+	cpu-supply = <&vdd_core>;
+};
+
+&emmc {
+	bus-width = <8>;
+	cap-mmc-highspeed;
+	disable-wp;
+	mmc-hs200-1_8v;
+	non-removable;
+	status = "okay";
+};
+
+&i2c1 {
+	clock-frequency = <400000>;
+	status = "okay";
+
+	rtc: rtc@51 {
+		compatible = "nxp,pcf8563";
+		reg = <0x51>;
+		#clock-cells = <0>;
+	};
+};
+
+&pwm5 {
+	status = "okay";
+	pinctrl-names = "active";
+	pinctrl-0 = <&pwm5_pin_pull_down>;
+};
+
+&pinctrl {
+	pinctrl-names = "default";
+	pinctrl-0 = <&rtc_32k>;
+
+	ir-receiver {
+		ir_recv_pin: ir-recv-pin  {
+			rockchip,pins = <0 RK_PC0 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+	};
+
+	buttons {
+		pwr_key: pwr-key {
+			rockchip,pins = <0 RK_PA6 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+	};
+};
+
+&pwm0 {
+	status = "okay";
+	pinctrl-0 = <&pwm0_pin_pull_down>;
+};
+
+&sdmmc {
+	bus-width = <4>;
+	cap-mmc-highspeed;
+	cap-sd-highspeed;
+	card-detect-delay = <300>;
+	sd-uhs-sdr25;
+	sd-uhs-sdr50;
+	sd-uhs-sdr104;
+	vmmc-supply = <&vcc_sd>;
+	vqmmc-supply = <&vcc_sdmmc>;
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
new file mode 100644
index 000000000000..8bdc66c62975
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
@@ -0,0 +1,1739 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2019 Fuzhou Rockchip Electronics Co., Ltd
+ *
+ */
+
+#include <dt-bindings/clock/rk3308-cru.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/pinctrl/rockchip.h>
+#include <dt-bindings/soc/rockchip,boot-mode.h>
+#include <dt-bindings/thermal/thermal.h>
+
+/ {
+	compatible = "rockchip,rk3308";
+
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		i2c0 = &i2c0;
+		i2c1 = &i2c1;
+		i2c2 = &i2c2;
+		i2c3 = &i2c3;
+		serial0 = &uart0;
+		serial1 = &uart1;
+		serial2 = &uart2;
+		serial3 = &uart3;
+		serial4 = &uart4;
+		spi0 = &spi0;
+		spi1 = &spi1;
+		spi2 = &spi2;
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a35", "arm,armv8";
+			reg = <0x0 0x0>;
+			enable-method = "psci";
+			clocks = <&cru ARMCLK>;
+			#cooling-cells = <2>;
+			dynamic-power-coefficient = <90>;
+			operating-points-v2 = <&cpu0_opp_table>;
+			cpu-idle-states = <&CPU_SLEEP>;
+			next-level-cache = <&l2>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a35", "arm,armv8";
+			reg = <0x0 0x1>;
+			enable-method = "psci";
+			operating-points-v2 = <&cpu0_opp_table>;
+			cpu-idle-states = <&CPU_SLEEP>;
+			next-level-cache = <&l2>;
+		};
+
+		cpu2: cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a35", "arm,armv8";
+			reg = <0x0 0x2>;
+			enable-method = "psci";
+			operating-points-v2 = <&cpu0_opp_table>;
+			cpu-idle-states = <&CPU_SLEEP>;
+			next-level-cache = <&l2>;
+		};
+
+		cpu3: cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a35", "arm,armv8";
+			reg = <0x0 0x3>;
+			enable-method = "psci";
+			operating-points-v2 = <&cpu0_opp_table>;
+			cpu-idle-states = <&CPU_SLEEP>;
+			next-level-cache = <&l2>;
+		};
+
+		idle-states {
+			entry-method = "psci";
+
+			CPU_SLEEP: cpu-sleep {
+				compatible = "arm,idle-state";
+				local-timer-stop;
+				arm,psci-suspend-param = <0x0010000>;
+				entry-latency-us = <120>;
+				exit-latency-us = <250>;
+				min-residency-us = <900>;
+			};
+		};
+
+		l2: l2-cache {
+			compatible = "cache";
+		};
+	};
+
+	cpu0_opp_table: cpu0-opp-table {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp-408000000 {
+			opp-hz = /bits/ 64 <408000000>;
+			opp-microvolt = <950000 950000 1340000>;
+			clock-latency-ns = <40000>;
+			opp-suspend;
+		};
+		opp-600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <950000 950000 1340000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-816000000 {
+			opp-hz = /bits/ 64 <816000000>;
+			opp-microvolt = <1025000 1025000 1340000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1008000000 {
+			opp-hz = /bits/ 64 <1008000000>;
+			opp-microvolt = <1125000 1125000 1340000>;
+			clock-latency-ns = <40000>;
+		};
+	};
+
+	arm-pmu {
+		compatible = "arm,cortex-a53-pmu";
+		interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>;
+	};
+
+	mac_clkin: external-mac-clock {
+		compatible = "fixed-clock";
+		clock-frequency = <50000000>;
+		clock-output-names = "mac_clkin";
+		#clock-cells = <0>;
+	};
+
+	psci {
+		compatible = "arm,psci-1.0";
+		method = "smc";
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>,
+			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>,
+			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>,
+			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+	};
+
+	xin24m: xin24m {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <24000000>;
+		clock-output-names = "xin24m";
+	};
+
+	grf: grf@ff000000 {
+		compatible = "rockchip,rk3308-grf", "syscon", "simple-mfd";
+		reg = <0x0 0xff000000 0x0 0x10000>;
+
+		reboot-mode {
+			compatible = "syscon-reboot-mode";
+			offset = <0x500>;
+			mode-bootloader = <BOOT_BL_DOWNLOAD>;
+			mode-loader = <BOOT_BL_DOWNLOAD>;
+			mode-normal = <BOOT_NORMAL>;
+			mode-recovery = <BOOT_RECOVERY>;
+			mode-fastboot = <BOOT_FASTBOOT>;
+		};
+	};
+
+	detect_grf: syscon@ff00b000 {
+		compatible = "rockchip,rk3308-detect-grf", "syscon", "simple-mfd";
+		reg = <0x0 0xff00b000 0x0 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+	};
+
+	core_grf: syscon@ff00c000 {
+		compatible = "rockchip,rk3308-core-grf", "syscon", "simple-mfd";
+		reg = <0x0 0xff00c000 0x0 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+	};
+
+	i2c0: i2c@ff040000 {
+		compatible = "rockchip,rk3308-i2c", "rockchip,rk3399-i2c";
+		reg = <0x0 0xff040000 0x0 0x1000>;
+		clocks = <&cru SCLK_I2C0>, <&cru PCLK_I2C0>;
+		clock-names = "i2c", "pclk";
+		interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&i2c0_xfer>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		status = "disabled";
+	};
+
+	i2c1: i2c@ff050000 {
+		compatible = "rockchip,rk3308-i2c", "rockchip,rk3399-i2c";
+		reg = <0x0 0xff050000 0x0 0x1000>;
+		clocks = <&cru SCLK_I2C1>, <&cru PCLK_I2C1>;
+		clock-names = "i2c", "pclk";
+		interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&i2c1_xfer>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		status = "disabled";
+	};
+
+	i2c2: i2c@ff060000 {
+		compatible = "rockchip,rk3308-i2c", "rockchip,rk3399-i2c";
+		reg = <0x0 0xff060000 0x0 0x1000>;
+		clocks = <&cru SCLK_I2C2>, <&cru PCLK_I2C2>;
+		clock-names = "i2c", "pclk";
+		interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&i2c2_xfer>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		status = "disabled";
+	};
+
+	i2c3: i2c@ff070000 {
+		compatible = "rockchip,rk3308-i2c", "rockchip,rk3399-i2c";
+		reg = <0x0 0xff070000 0x0 0x1000>;
+		clocks = <&cru SCLK_I2C3>, <&cru PCLK_I2C3>;
+		clock-names = "i2c", "pclk";
+		interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&i2c3m0_xfer>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		status = "disabled";
+	};
+
+	wdt: watchdog@ff080000 {
+		compatible = "snps,dw-wdt";
+		reg = <0x0 0xff080000 0x0 0x100>;
+		clocks = <&cru PCLK_WDT>;
+		interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+		status = "disabled";
+	};
+
+	uart0: serial@ff0a0000 {
+		compatible = "rockchip,rk3308-uart", "snps,dw-apb-uart";
+		reg = <0x0 0xff0a0000 0x0 0x100>;
+		interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru SCLK_UART0>, <&cru PCLK_UART0>;
+		clock-names = "baudclk", "apb_pclk";
+		reg-shift = <2>;
+		reg-io-width = <4>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&uart0_xfer &uart0_cts &uart0_rts>;
+		status = "disabled";
+	};
+
+	uart1: serial@ff0b0000 {
+		compatible = "rockchip,rk3308-uart", "snps,dw-apb-uart";
+		reg = <0x0 0xff0b0000 0x0 0x100>;
+		interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru SCLK_UART1>, <&cru PCLK_UART1>;
+		clock-names = "baudclk", "apb_pclk";
+		reg-shift = <2>;
+		reg-io-width = <4>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&uart1_xfer &uart1_cts &uart1_rts>;
+		status = "disabled";
+	};
+
+	uart2: serial@ff0c0000 {
+		compatible = "rockchip,rk3308-uart", "snps,dw-apb-uart";
+		reg = <0x0 0xff0c0000 0x0 0x100>;
+		interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru SCLK_UART2>, <&cru PCLK_UART2>;
+		clock-names = "baudclk", "apb_pclk";
+		reg-shift = <2>;
+		reg-io-width = <4>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&uart2m0_xfer>;
+		status = "disabled";
+	};
+
+	uart3: serial@ff0d0000 {
+		compatible = "rockchip,rk3308-uart", "snps,dw-apb-uart";
+		reg = <0x0 0xff0d0000 0x0 0x100>;
+		interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru SCLK_UART3>, <&cru PCLK_UART3>;
+		clock-names = "baudclk", "apb_pclk";
+		reg-shift = <2>;
+		reg-io-width = <4>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&uart3_xfer>;
+		status = "disabled";
+	};
+
+	uart4: serial@ff0e0000 {
+		compatible = "rockchip,rk3308-uart", "snps,dw-apb-uart";
+		reg = <0x0 0xff0e0000 0x0 0x100>;
+		interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru SCLK_UART4>, <&cru PCLK_UART4>;
+		clock-names = "baudclk", "apb_pclk";
+		reg-shift = <2>;
+		reg-io-width = <4>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&uart4_xfer &uart4_cts &uart4_rts>;
+		status = "disabled";
+	};
+
+	spi0: spi@ff120000 {
+		compatible = "rockchip,rk3308-spi", "rockchip,rk3066-spi";
+		reg = <0x0 0xff120000 0x0 0x1000>;
+		interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <&cru SCLK_SPI0>, <&cru PCLK_SPI0>;
+		clock-names = "spiclk", "apb_pclk";
+		dmas = <&dmac0 0>, <&dmac0 1>;
+		dma-names = "tx", "rx";
+		pinctrl-names = "default";
+		pinctrl-0 = <&spi0_clk &spi0_csn0 &spi0_miso &spi0_mosi>;
+		status = "disabled";
+	};
+
+	spi1: spi@ff130000 {
+		compatible = "rockchip,rk3308-spi", "rockchip,rk3066-spi";
+		reg = <0x0 0xff130000 0x0 0x1000>;
+		interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <&cru SCLK_SPI1>, <&cru PCLK_SPI1>;
+		clock-names = "spiclk", "apb_pclk";
+		dmas = <&dmac0 2>, <&dmac0 3>;
+		dma-names = "tx", "rx";
+		pinctrl-names = "default";
+		pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_miso &spi1_mosi>;
+		status = "disabled";
+	};
+
+	spi2: spi@ff140000 {
+		compatible = "rockchip,rk3308-spi", "rockchip,rk3066-spi";
+		reg = <0x0 0xff140000 0x0 0x1000>;
+		interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <&cru SCLK_SPI2>, <&cru PCLK_SPI2>;
+		clock-names = "spiclk", "apb_pclk";
+		dmas = <&dmac1 16>, <&dmac1 17>;
+		dma-names = "tx", "rx";
+		pinctrl-names = "default";
+		pinctrl-0 = <&spi2_clk &spi2_csn0 &spi2_miso &spi2_mosi>;
+		status = "disabled";
+	};
+
+	pwm8: pwm@ff160000 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff160000 0x0 0x10>;
+		clocks = <&cru SCLK_PWM2>, <&cru PCLK_PWM2>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm8_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	pwm9: pwm@ff160010 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff160010 0x0 0x10>;
+		clocks = <&cru SCLK_PWM2>, <&cru PCLK_PWM2>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm9_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	pwm10: pwm@ff160020 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff160020 0x0 0x10>;
+		clocks = <&cru SCLK_PWM2>, <&cru PCLK_PWM2>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm10_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	pwm11: pwm@ff160030 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff160030 0x0 0x10>;
+		clocks = <&cru SCLK_PWM2>, <&cru PCLK_PWM2>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm11_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	pwm4: pwm@ff170000 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff170000 0x0 0x10>;
+		clocks = <&cru SCLK_PWM1>, <&cru PCLK_PWM1>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm4_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	pwm5: pwm@ff170010 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff170010 0x0 0x10>;
+		clocks = <&cru SCLK_PWM1>, <&cru PCLK_PWM1>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm5_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	pwm6: pwm@ff170020 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff170020 0x0 0x10>;
+		clocks = <&cru SCLK_PWM1>, <&cru PCLK_PWM1>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm6_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	pwm7: pwm@ff170030 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff170030 0x0 0x10>;
+		clocks = <&cru SCLK_PWM1>, <&cru PCLK_PWM1>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm7_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	pwm0: pwm@ff180000 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff180000 0x0 0x10>;
+		clocks = <&cru SCLK_PWM0>, <&cru PCLK_PWM0>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm0_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	pwm1: pwm@ff180010 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff180010 0x0 0x10>;
+		clocks = <&cru SCLK_PWM0>, <&cru PCLK_PWM0>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm1_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	pwm2: pwm@ff180020 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff180020 0x0 0x10>;
+		clocks = <&cru SCLK_PWM0>, <&cru PCLK_PWM0>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm2_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	pwm3: pwm@ff180030 {
+		compatible = "rockchip,rk3308-pwm", "rockchip,rk3328-pwm";
+		reg = <0x0 0xff180030 0x0 0x10>;
+		clocks = <&cru SCLK_PWM0>, <&cru PCLK_PWM0>;
+		clock-names = "pwm", "pclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm3_pin>;
+		#pwm-cells = <3>;
+		status = "disabled";
+	};
+
+	rktimer: rktimer@ff1a0000 {
+		compatible = "rockchip,rk3288-timer";
+		reg = <0x0 0xff1a0000 0x0 0x20>;
+		interrupts = <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER0>;
+		clock-names = "pclk", "timer";
+	};
+
+	saradc: saradc@ff1e0000 {
+		compatible = "rockchip,rk3308-saradc", "rockchip,rk3399-saradc";
+		reg = <0x0 0xff1e0000 0x0 0x100>;
+		interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
+		clock-names = "saradc", "apb_pclk";
+		#io-channel-cells = <1>;
+		resets = <&cru SRST_SARADC_P>;
+		reset-names = "saradc-apb";
+		status = "disabled";
+	};
+
+	amba {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		dmac0: dma-controller@ff2c0000 {
+			compatible = "arm,pl330", "arm,primecell";
+			reg = <0x0 0xff2c0000 0x0 0x4000>;
+			interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cru ACLK_DMAC0>;
+			clock-names = "apb_pclk";
+			#dma-cells = <1>;
+		};
+
+		dmac1: dma-controller@ff2d0000 {
+			compatible = "arm,pl330", "arm,primecell";
+			reg = <0x0 0xff2d0000 0x0 0x4000>;
+			interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cru ACLK_DMAC1>;
+			clock-names = "apb_pclk";
+			#dma-cells = <1>;
+		};
+	};
+
+	i2s_2ch_0: i2s@ff350000 {
+		compatible = "rockchip,rk3308-i2s", "rockchip,rk3066-i2s";
+		reg = <0x0 0xff350000 0x0 0x1000>;
+		interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru SCLK_I2S0_2CH>, <&cru HCLK_I2S0_2CH>;
+		clock-names = "i2s_clk", "i2s_hclk";
+		dmas = <&dmac1 8>, <&dmac1 9>;
+		dma-names = "tx", "rx";
+		resets = <&cru SRST_I2S0_2CH_M>, <&cru SRST_I2S0_2CH_H>;
+		reset-names = "reset-m", "reset-h";
+		pinctrl-names = "default";
+		pinctrl-0 = <&i2s_2ch_0_sclk
+			     &i2s_2ch_0_lrck
+			     &i2s_2ch_0_sdi
+			     &i2s_2ch_0_sdo>;
+		status = "disabled";
+	};
+
+	i2s_2ch_1: i2s@ff360000 {
+		compatible = "rockchip,rk3308-i2s", "rockchip,rk3066-i2s";
+		reg = <0x0 0xff360000 0x0 0x1000>;
+		interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru SCLK_I2S1_2CH>, <&cru HCLK_I2S1_2CH>;
+		clock-names = "i2s_clk", "i2s_hclk";
+		dmas = <&dmac1 11>;
+		dma-names = "rx";
+		resets = <&cru SRST_I2S1_2CH_M>, <&cru SRST_I2S1_2CH_H>;
+		reset-names = "reset-m", "reset-h";
+		status = "disabled";
+	};
+
+	spdif_tx: spdif-tx@ff3a0000 {
+		compatible = "rockchip,rk3308-spdif", "rockchip,rk3328-spdif";
+		reg = <0x0 0xff3a0000 0x0 0x1000>;
+		interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru SCLK_SPDIF_TX>, <&cru HCLK_SPDIFTX>;
+		clock-names = "mclk", "hclk";
+		dmas = <&dmac1 13>;
+		dma-names = "tx";
+		pinctrl-names = "default";
+		pinctrl-0 = <&spdif_out>;
+		status = "disabled";
+	};
+
+	sdmmc: dwmmc@ff480000 {
+		compatible = "rockchip,rk3308-dw-mshc", "rockchip,rk3288-dw-mshc";
+		reg = <0x0 0xff480000 0x0 0x4000>;
+		interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+		bus-width = <4>;
+		clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>,
+			 <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>;
+		clock-names = "biu", "ciu", "ciu-drv", "ciu-sample";
+		fifo-depth = <0x100>;
+		max-frequency = <150000000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_det &sdmmc_bus4>;
+		status = "disabled";
+	};
+
+	emmc: dwmmc@ff490000 {
+		compatible = "rockchip,rk3308-dw-mshc", "rockchip,rk3288-dw-mshc";
+		reg = <0x0 0xff490000 0x0 0x4000>;
+		interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+		bus-width = <8>;
+		clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,
+			 <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
+		clock-names = "biu", "ciu", "ciu-drv", "ciu-sample";
+		fifo-depth = <0x100>;
+		max-frequency = <150000000>;
+		status = "disabled";
+	};
+
+	sdio: dwmmc@ff4a0000 {
+		compatible = "rockchip,rk3308-dw-mshc", "rockchip,rk3288-dw-mshc";
+		reg = <0x0 0xff4a0000 0x0 0x4000>;
+		interrupts = <GIC_SPI 78 IRQ_TYPE_LEVEL_HIGH>;
+		bus-width = <4>;
+		clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,
+			 <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>;
+		clock-names = "biu", "ciu", "ciu-drv", "ciu-sample";
+		fifo-depth = <0x100>;
+		max-frequency = <150000000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&sdio_bus4 &sdio_cmd &sdio_clk>;
+		status = "disabled";
+	};
+
+	cru: clock-controller@ff500000 {
+		compatible = "rockchip,rk3308-cru";
+		reg = <0x0 0xff500000 0x0 0x1000>;
+		#clock-cells = <1>;
+		#reset-cells = <1>;
+		rockchip,grf = <&grf>;
+
+		assigned-clocks = <&cru SCLK_RTC32K>;
+		assigned-clock-rates = <32768>;
+	};
+
+	gic: interrupt-controller@ff580000 {
+		compatible = "arm,gic-400";
+		reg = <0x0 0xff581000 0x0 0x1000>,
+		      <0x0 0xff582000 0x0 0x2000>,
+		      <0x0 0xff584000 0x0 0x2000>,
+		      <0x0 0xff586000 0x0 0x2000>;
+		interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
+		#interrupt-cells = <3>;
+		interrupt-controller;
+		#address-cells = <0>;
+	};
+
+	sram: sram@fff80000 {
+		compatible = "mmio-sram";
+		reg = <0x0 0xfff80000 0x0 0x40000>;
+		ranges = <0 0x0 0xfff80000 0x40000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* reserved for ddr dvfs and system suspend/resume */
+		ddr-sram@0 {
+			reg = <0x0 0x8000>;
+		};
+
+		/* reserved for vad audio buffer */
+		vad_sram: vad-sram@8000 {
+			reg = <0x8000 0x38000>;
+		};
+	};
+
+	pinctrl: pinctrl {
+		compatible = "rockchip,rk3308-pinctrl";
+		rockchip,grf = <&grf>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		gpio0: gpio0@ff220000 {
+			compatible = "rockchip,gpio-bank";
+			reg = <0x0 0xff220000 0x0 0x100>;
+			interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cru PCLK_GPIO0>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+		};
+
+		gpio1: gpio1@ff230000 {
+			compatible = "rockchip,gpio-bank";
+			reg = <0x0 0xff230000 0x0 0x100>;
+			interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cru PCLK_GPIO1>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+		};
+
+		gpio2: gpio2@ff240000 {
+			compatible = "rockchip,gpio-bank";
+			reg = <0x0 0xff240000 0x0 0x100>;
+			interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cru PCLK_GPIO2>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+		};
+
+		gpio3: gpio3@ff250000 {
+			compatible = "rockchip,gpio-bank";
+			reg = <0x0 0xff250000 0x0 0x100>;
+			interrupts = <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cru PCLK_GPIO3>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+		};
+
+		gpio4: gpio4@ff260000 {
+			compatible = "rockchip,gpio-bank";
+			reg = <0x0 0xff260000 0x0 0x100>;
+			interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cru PCLK_GPIO4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+		};
+
+		pcfg_pull_up: pcfg-pull-up {
+			bias-pull-up;
+		};
+
+		pcfg_pull_down: pcfg-pull-down {
+			bias-pull-down;
+		};
+
+		pcfg_pull_none: pcfg-pull-none {
+			bias-disable;
+		};
+
+		pcfg_pull_none_2ma: pcfg-pull-none-2ma {
+			bias-disable;
+			drive-strength = <2>;
+		};
+
+		pcfg_pull_up_2ma: pcfg-pull-up-2ma {
+			bias-pull-up;
+			drive-strength = <2>;
+		};
+
+		pcfg_pull_up_4ma: pcfg-pull-up-4ma {
+			bias-pull-up;
+			drive-strength = <4>;
+		};
+
+		pcfg_pull_none_4ma: pcfg-pull-none-4ma {
+			bias-disable;
+			drive-strength = <4>;
+		};
+
+		pcfg_pull_down_4ma: pcfg-pull-down-4ma {
+			bias-pull-down;
+			drive-strength = <4>;
+		};
+
+		pcfg_pull_none_8ma: pcfg-pull-none-8ma {
+			bias-disable;
+			drive-strength = <8>;
+		};
+
+		pcfg_pull_up_8ma: pcfg-pull-up-8ma {
+			bias-pull-up;
+			drive-strength = <8>;
+		};
+
+		pcfg_pull_none_12ma: pcfg-pull-none-12ma {
+			bias-disable;
+			drive-strength = <12>;
+		};
+
+		pcfg_pull_up_12ma: pcfg-pull-up-12ma {
+			bias-pull-up;
+			drive-strength = <12>;
+		};
+
+		pcfg_pull_none_smt: pcfg-pull-none-smt {
+			bias-disable;
+			input-schmitt-enable;
+		};
+
+		pcfg_output_high: pcfg-output-high {
+			output-high;
+		};
+
+		pcfg_output_low: pcfg-output-low {
+			output-low;
+		};
+
+		pcfg_input_high: pcfg-input-high {
+			bias-pull-up;
+			input-enable;
+		};
+
+		pcfg_input: pcfg-input {
+			input-enable;
+		};
+
+		emmc {
+			emmc_clk: emmc-clk {
+				rockchip,pins =
+					<3 RK_PB1 2 &pcfg_pull_none_8ma>;
+			};
+
+			emmc_cmd: emmc-cmd {
+				rockchip,pins =
+					<3 RK_PB0 2 &pcfg_pull_up_8ma>;
+			};
+
+			emmc_pwren: emmc-pwren {
+				rockchip,pins =
+					<3 RK_PB3 2 &pcfg_pull_none>;
+			};
+
+			emmc_rstn: emmc-rstn {
+				rockchip,pins =
+					<3 RK_PB2 2 &pcfg_pull_none>;
+			};
+
+			emmc_bus1: emmc-bus1 {
+				rockchip,pins =
+					<3 RK_PA0 2 &pcfg_pull_up_8ma>;
+			};
+
+			emmc_bus4: emmc-bus4 {
+				rockchip,pins =
+					<3 RK_PA0 2 &pcfg_pull_up_8ma>,
+					<3 RK_PA1 2 &pcfg_pull_up_8ma>,
+					<3 RK_PA2 2 &pcfg_pull_up_8ma>,
+					<3 RK_PA3 2 &pcfg_pull_up_8ma>;
+			};
+
+			emmc_bus8: emmc-bus8 {
+				rockchip,pins =
+					<3 RK_PA0 2 &pcfg_pull_up_8ma>,
+					<3 RK_PA1 2 &pcfg_pull_up_8ma>,
+					<3 RK_PA2 2 &pcfg_pull_up_8ma>,
+					<3 RK_PA3 2 &pcfg_pull_up_8ma>,
+					<3 RK_PA4 2 &pcfg_pull_up_8ma>,
+					<3 RK_PA5 2 &pcfg_pull_up_8ma>,
+					<3 RK_PA6 2 &pcfg_pull_up_8ma>,
+					<3 RK_PA7 2 &pcfg_pull_up_8ma>;
+			};
+		};
+
+		flash {
+			flash_csn0: flash-csn0 {
+				rockchip,pins =
+					<3 RK_PB5 1 &pcfg_pull_none>;
+			};
+
+			flash_rdy: flash-rdy {
+				rockchip,pins =
+					<3 RK_PB4 1 &pcfg_pull_none>;
+			};
+
+			flash_ale: flash-ale {
+				rockchip,pins =
+					<3 RK_PB3 1 &pcfg_pull_none>;
+			};
+
+			flash_cle: flash-cle {
+				rockchip,pins =
+					<3 RK_PB1 1 &pcfg_pull_none>;
+			};
+
+			flash_wrn: flash-wrn {
+				rockchip,pins =
+					<3 RK_PB0 1 &pcfg_pull_none>;
+			};
+
+			flash_rdn: flash-rdn {
+				rockchip,pins =
+					<3 RK_PB2 1 &pcfg_pull_none>;
+			};
+
+			flash_bus8: flash-bus8 {
+				rockchip,pins =
+					<3 RK_PA0 1 &pcfg_pull_up_12ma>,
+					<3 RK_PA1 1 &pcfg_pull_up_12ma>,
+					<3 RK_PA2 1 &pcfg_pull_up_12ma>,
+					<3 RK_PA3 1 &pcfg_pull_up_12ma>,
+					<3 RK_PA4 1 &pcfg_pull_up_12ma>,
+					<3 RK_PA5 1 &pcfg_pull_up_12ma>,
+					<3 RK_PA6 1 &pcfg_pull_up_12ma>,
+					<3 RK_PA7 1 &pcfg_pull_up_12ma>;
+			};
+		};
+
+		gmac {
+			rmii_pins: rmii-pins {
+				rockchip,pins =
+					/* mac_txen */
+					<1 RK_PC1 3 &pcfg_pull_none_12ma>,
+					/* mac_txd1 */
+					<1 RK_PC3 3 &pcfg_pull_none_12ma>,
+					/* mac_txd0 */
+					<1 RK_PC2 3 &pcfg_pull_none_12ma>,
+					/* mac_rxd0 */
+					<1 RK_PC4 3 &pcfg_pull_none>,
+					/* mac_rxd1 */
+					<1 RK_PC5 3 &pcfg_pull_none>,
+					/* mac_rxer */
+					<1 RK_PB7 3 &pcfg_pull_none>,
+					/* mac_rxdv */
+					<1 RK_PC0 3 &pcfg_pull_none>,
+					/* mac_mdio */
+					<1 RK_PB6 3 &pcfg_pull_none>,
+					/* mac_mdc */
+					<1 RK_PB5 3 &pcfg_pull_none>;
+			};
+
+			mac_refclk_12ma: mac-refclk-12ma {
+				rockchip,pins =
+					<1 RK_PB4 3 &pcfg_pull_none_12ma>;
+			};
+
+			mac_refclk: mac-refclk {
+				rockchip,pins =
+					<1 RK_PB4 3 &pcfg_pull_none>;
+			};
+		};
+
+		gmac-m1 {
+			rmiim1_pins: rmiim1-pins {
+				rockchip,pins =
+					/* mac_txen */
+					<4 RK_PB7 2 &pcfg_pull_none_12ma>,
+					/* mac_txd1 */
+					<4 RK_PA5 2 &pcfg_pull_none_12ma>,
+					/* mac_txd0 */
+					<4 RK_PA4 2 &pcfg_pull_none_12ma>,
+					/* mac_rxd0 */
+					<4 RK_PA2 2 &pcfg_pull_none>,
+					/* mac_rxd1 */
+					<4 RK_PA3 2 &pcfg_pull_none>,
+					/* mac_rxer */
+					<4 RK_PA0 2 &pcfg_pull_none>,
+					/* mac_rxdv */
+					<4 RK_PA1 2 &pcfg_pull_none>,
+					/* mac_mdio */
+					<4 RK_PB6 2 &pcfg_pull_none>,
+					/* mac_mdc */
+					<4 RK_PB5 2 &pcfg_pull_none>;
+			};
+
+			macm1_refclk_12ma: macm1-refclk-12ma {
+				rockchip,pins =
+					<4 RK_PB4 2 &pcfg_pull_none_12ma>;
+			};
+
+			macm1_refclk: macm1-refclk {
+				rockchip,pins =
+					<4 RK_PB4 2 &pcfg_pull_none>;
+			};
+		};
+
+		i2c0 {
+			i2c0_xfer: i2c0-xfer {
+				rockchip,pins =
+					<1 RK_PD0 2 &pcfg_pull_none_smt>,
+					<1 RK_PD1 2 &pcfg_pull_none_smt>;
+			};
+		};
+
+		i2c1 {
+			i2c1_xfer: i2c1-xfer {
+				rockchip,pins =
+					<0 RK_PB3 1 &pcfg_pull_none_smt>,
+					<0 RK_PB4 1 &pcfg_pull_none_smt>;
+			};
+		};
+
+		i2c2 {
+			i2c2_xfer: i2c2-xfer {
+				rockchip,pins =
+					<2 RK_PA2 3 &pcfg_pull_none_smt>,
+					<2 RK_PA3 3 &pcfg_pull_none_smt>;
+			};
+		};
+
+		i2c3-m0 {
+			i2c3m0_xfer: i2c3m0-xfer {
+				rockchip,pins =
+					<0 RK_PB7 2 &pcfg_pull_none_smt>,
+					<0 RK_PC0 2 &pcfg_pull_none_smt>;
+			};
+		};
+
+		i2c3-m1 {
+			i2c3m1_xfer: i2c3m1-xfer {
+				rockchip,pins =
+					<3 RK_PB4 2 &pcfg_pull_none_smt>,
+					<3 RK_PB5 2 &pcfg_pull_none_smt>;
+			};
+		};
+
+		i2c3-m2 {
+			i2c3m2_xfer: i2c3m2-xfer {
+				rockchip,pins =
+					<2 RK_PA1 3 &pcfg_pull_none_smt>,
+					<2 RK_PA0 3 &pcfg_pull_none_smt>;
+			};
+		};
+
+		i2s_2ch_0 {
+			i2s_2ch_0_mclk: i2s-2ch-0-mclk {
+				rockchip,pins =
+					<4 RK_PB4 1 &pcfg_pull_none>;
+			};
+
+			i2s_2ch_0_sclk: i2s-2ch-0-sclk {
+				rockchip,pins =
+					<4 RK_PB5 1 &pcfg_pull_none>;
+			};
+
+			i2s_2ch_0_lrck: i2s-2ch-0-lrck {
+				rockchip,pins =
+					<4 RK_PB6 1 &pcfg_pull_none>;
+			};
+
+			i2s_2ch_0_sdo: i2s-2ch-0-sdo {
+				rockchip,pins =
+					<4 RK_PB7 1 &pcfg_pull_none>;
+			};
+
+			i2s_2ch_0_sdi: i2s-2ch-0-sdi {
+				rockchip,pins =
+					<4 RK_PC0 1 &pcfg_pull_none>;
+			};
+		};
+
+		i2s_8ch_0 {
+			i2s_8ch_0_mclk: i2s-8ch-0-mclk {
+				rockchip,pins =
+					<2 RK_PA4 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_sclktx: i2s-8ch-0-sclktx {
+				rockchip,pins =
+					<2 RK_PA5 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_sclkrx: i2s-8ch-0-sclkrx {
+				rockchip,pins =
+					<2 RK_PA6 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_lrcktx: i2s-8ch-0-lrcktx {
+				rockchip,pins =
+					<2 RK_PA7 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_lrckrx: i2s-8ch-0-lrckrx {
+				rockchip,pins =
+					<2 RK_PB0 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_sdo0: i2s-8ch-0-sdo0 {
+				rockchip,pins =
+					<2 RK_PB1 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_sdo1: i2s-8ch-0-sdo1 {
+				rockchip,pins =
+					<2 RK_PB2 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_sdo2: i2s-8ch-0-sdo2 {
+				rockchip,pins =
+					<2 RK_PB3 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_sdo3: i2s-8ch-0-sdo3 {
+				rockchip,pins =
+					<2 RK_PB4 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_sdi0: i2s-8ch-0-sdi0 {
+				rockchip,pins =
+					<2 RK_PB5 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_sdi1: i2s-8ch-0-sdi1 {
+				rockchip,pins =
+					<2 RK_PB6 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_sdi2: i2s-8ch-0-sdi2 {
+				rockchip,pins =
+					<2 RK_PB7 1 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_0_sdi3: i2s-8ch-0-sdi3 {
+				rockchip,pins =
+					<2 RK_PC0 1 &pcfg_pull_none>;
+			};
+		};
+
+		i2s_8ch_1_m0 {
+			i2s_8ch_1_m0_mclk: i2s-8ch-1-m0-mclk {
+				rockchip,pins =
+					<1 RK_PA2 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m0_sclktx: i2s-8ch-1-m0-sclktx {
+				rockchip,pins =
+					<1 RK_PA3 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m0_sclkrx: i2s-8ch-1-m0-sclkrx {
+				rockchip,pins =
+					<1 RK_PA4 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m0_lrcktx: i2s-8ch-1-m0-lrcktx {
+				rockchip,pins =
+					<1 RK_PA5 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m0_lrckrx: i2s-8ch-1-m0-lrckrx {
+				rockchip,pins =
+					<1 RK_PA6 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m0_sdo0: i2s-8ch-1-m0-sdo0 {
+				rockchip,pins =
+					<1 RK_PA7 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m0_sdo1_sdi3: i2s-8ch-1-m0-sdo1-sdi3 {
+				rockchip,pins =
+					<1 RK_PB0 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m0_sdo2_sdi2: i2s-8ch-1-m0-sdo2-sdi2 {
+				rockchip,pins =
+					<1 RK_PB1 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m0_sdo3_sdi1: i2s-8ch-1-m0-sdo3_sdi1 {
+				rockchip,pins =
+					<1 RK_PB2 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m0_sdi0: i2s-8ch-1-m0-sdi0 {
+				rockchip,pins =
+					<1 RK_PB3 2 &pcfg_pull_none>;
+			};
+		};
+
+		i2s_8ch_1_m1 {
+			i2s_8ch_1_m1_mclk: i2s-8ch-1-m1-mclk {
+				rockchip,pins =
+					<1 RK_PB4 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m1_sclktx: i2s-8ch-1-m1-sclktx {
+				rockchip,pins =
+					<1 RK_PB5 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m1_sclkrx: i2s-8ch-1-m1-sclkrx {
+				rockchip,pins =
+					<1 RK_PB6 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m1_lrcktx: i2s-8ch-1-m1-lrcktx {
+				rockchip,pins =
+					<1 RK_PB7 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m1_lrckrx: i2s-8ch-1-m1-lrckrx {
+				rockchip,pins =
+					<1 RK_PC0 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m1_sdo0: i2s-8ch-1-m1-sdo0 {
+				rockchip,pins =
+					<1 RK_PC1 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m1_sdo1_sdi3: i2s-8ch-1-m1-sdo1-sdi3 {
+				rockchip,pins =
+					<1 RK_PC2 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m1_sdo2_sdi2: i2s-8ch-1-m1-sdo2-sdi2 {
+				rockchip,pins =
+					<1 RK_PC3 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m1_sdo3_sdi1: i2s-8ch-1-m1-sdo3_sdi1 {
+				rockchip,pins =
+					<1 RK_PC4 2 &pcfg_pull_none>;
+			};
+
+			i2s_8ch_1_m1_sdi0: i2s-8ch-1-m1-sdi0 {
+				rockchip,pins =
+					<1 RK_PC5 2 &pcfg_pull_none>;
+			};
+		};
+
+		pdm_m0 {
+			pdm_m0_clk: pdm-m0-clk {
+				rockchip,pins =
+					<1 RK_PA4 3 &pcfg_pull_none>;
+			};
+
+			pdm_m0_sdi0: pdm-m0-sdi0 {
+				rockchip,pins =
+					<1 RK_PB3 3 &pcfg_pull_none>;
+			};
+
+			pdm_m0_sdi1: pdm-m0-sdi1 {
+				rockchip,pins =
+					<1 RK_PB2 3 &pcfg_pull_none>;
+			};
+
+			pdm_m0_sdi2: pdm-m0-sdi2 {
+				rockchip,pins =
+					<1 RK_PB1 3 &pcfg_pull_none>;
+			};
+
+			pdm_m0_sdi3: pdm-m0-sdi3 {
+				rockchip,pins =
+					<1 RK_PB0 3 &pcfg_pull_none>;
+			};
+		};
+
+		pdm_m1 {
+			pdm_m1_clk: pdm-m1-clk {
+				rockchip,pins =
+					<1 RK_PB6 4 &pcfg_pull_none>;
+			};
+
+			pdm_m1_sdi0: pdm-m1-sdi0 {
+				rockchip,pins =
+					<1 RK_PC5 4 &pcfg_pull_none>;
+			};
+
+			pdm_m1_sdi1: pdm-m1-sdi1 {
+				rockchip,pins =
+					<1 RK_PC4 4 &pcfg_pull_none>;
+			};
+
+			pdm_m1_sdi2: pdm-m1-sdi2 {
+				rockchip,pins =
+					<1 RK_PC3 4 &pcfg_pull_none>;
+			};
+
+			pdm_m1_sdi3: pdm-m1-sdi3 {
+				rockchip,pins =
+					<1 RK_PC2 4 &pcfg_pull_none>;
+			};
+		};
+
+		pdm_m2 {
+			pdm_m2_clkm: pdm-m2-clkm {
+				rockchip,pins =
+					<2 RK_PA4 3 &pcfg_pull_none>;
+			};
+
+			pdm_m2_clk: pdm-m2-clk {
+				rockchip,pins =
+					<2 RK_PA6 2 &pcfg_pull_none>;
+			};
+
+			pdm_m2_sdi0: pdm-m2-sdi0 {
+				rockchip,pins =
+					<2 RK_PB5 2 &pcfg_pull_none>;
+			};
+
+			pdm_m2_sdi1: pdm-m2-sdi1 {
+				rockchip,pins =
+					<2 RK_PB6 2 &pcfg_pull_none>;
+			};
+
+			pdm_m2_sdi2: pdm-m2-sdi2 {
+				rockchip,pins =
+					<2 RK_PB7 2 &pcfg_pull_none>;
+			};
+
+			pdm_m2_sdi3: pdm-m2-sdi3 {
+				rockchip,pins =
+					<2 RK_PC0 2 &pcfg_pull_none>;
+			};
+		};
+
+		pwm0 {
+			pwm0_pin: pwm0-pin {
+				rockchip,pins =
+					<0 RK_PB5 1 &pcfg_pull_none>;
+			};
+
+			pwm0_pin_pull_down: pwm0-pin-pull-down {
+				rockchip,pins =
+					<0 RK_PB5 1 &pcfg_pull_down>;
+			};
+		};
+
+		pwm1 {
+			pwm1_pin: pwm1-pin {
+				rockchip,pins =
+					<0 RK_PB6 1 &pcfg_pull_none>;
+			};
+
+			pwm1_pin_pull_down: pwm1-pin-pull-down {
+				rockchip,pins =
+					<0 RK_PB6 1 &pcfg_pull_down>;
+			};
+		};
+
+		pwm2 {
+			pwm2_pin: pwm2-pin {
+				rockchip,pins =
+					<0 RK_PB7 1 &pcfg_pull_none>;
+			};
+
+			pwm2_pin_pull_down: pwm2-pin-pull-down {
+				rockchip,pins =
+					<0 RK_PB7 1 &pcfg_pull_down>;
+			};
+		};
+
+		pwm3 {
+			pwm3_pin: pwm3-pin {
+				rockchip,pins =
+					<0 RK_PC0 1 &pcfg_pull_none>;
+			};
+
+			pwm3_pin_pull_down: pwm3-pin-pull-down {
+				rockchip,pins =
+					<0 RK_PC0 1 &pcfg_pull_down>;
+			};
+		};
+
+		pwm4 {
+			pwm4_pin: pwm4-pin {
+				rockchip,pins =
+					<0 RK_PA1 2 &pcfg_pull_none>;
+			};
+
+			pwm4_pin_pull_down: pwm4-pin-pull-down {
+				rockchip,pins =
+					<0 RK_PA1 2 &pcfg_pull_down>;
+			};
+		};
+
+		pwm5 {
+			pwm5_pin: pwm5-pin {
+				rockchip,pins =
+					<0 RK_PC1 2 &pcfg_pull_none>;
+			};
+
+			pwm5_pin_pull_down: pwm5-pin-pull-down {
+				rockchip,pins =
+					<0 RK_PC1 2 &pcfg_pull_down>;
+			};
+		};
+
+		pwm6 {
+			pwm6_pin: pwm6-pin {
+				rockchip,pins =
+					<0 RK_PC2 2 &pcfg_pull_none>;
+			};
+
+			pwm6_pin_pull_down: pwm6-pin-pull-down {
+				rockchip,pins =
+					<0 RK_PC2 2 &pcfg_pull_down>;
+			};
+		};
+
+		pwm7 {
+			pwm7_pin: pwm7-pin {
+				rockchip,pins =
+					<2 RK_PB0 2 &pcfg_pull_none>;
+			};
+
+			pwm7_pin_pull_down: pwm7-pin-pull-down {
+				rockchip,pins =
+					<2 RK_PB0 2 &pcfg_pull_down>;
+			};
+		};
+
+		pwm8 {
+			pwm8_pin: pwm8-pin {
+				rockchip,pins =
+					<2 RK_PB2 2 &pcfg_pull_none>;
+			};
+
+			pwm8_pin_pull_down: pwm8-pin-pull-down {
+				rockchip,pins =
+					<2 RK_PB2 2 &pcfg_pull_down>;
+			};
+		};
+
+		pwm9 {
+			pwm9_pin: pwm9-pin {
+				rockchip,pins =
+					<2 RK_PB3 2 &pcfg_pull_none>;
+			};
+
+			pwm9_pin_pull_down: pwm9-pin-pull-down {
+				rockchip,pins =
+					<2 RK_PB3 2 &pcfg_pull_down>;
+			};
+		};
+
+		pwm10 {
+			pwm10_pin: pwm10-pin {
+				rockchip,pins =
+					<2 RK_PB4 2 &pcfg_pull_none>;
+			};
+
+			pwm10_pin_pull_down: pwm10-pin-pull-down {
+				rockchip,pins =
+					<2 RK_PB4 2 &pcfg_pull_down>;
+			};
+		};
+
+		pwm11 {
+			pwm11_pin: pwm11-pin {
+				rockchip,pins =
+					<2 RK_PC0 4 &pcfg_pull_none>;
+			};
+
+			pwm11_pin_pull_down: pwm11-pin-pull-down {
+				rockchip,pins =
+					<2 RK_PC0 4 &pcfg_pull_down>;
+			};
+		};
+
+		rtc {
+			rtc_32k: rtc-32k {
+				rockchip,pins =
+					<0 RK_PC3 1 &pcfg_pull_none>;
+			};
+		};
+
+		sdmmc {
+			sdmmc_clk: sdmmc-clk {
+				rockchip,pins =
+					<4 RK_PD5 1 &pcfg_pull_none_4ma>;
+			};
+
+			sdmmc_cmd: sdmmc-cmd {
+				rockchip,pins =
+					<4 RK_PD4 1 &pcfg_pull_up_4ma>;
+			};
+
+			sdmmc_det: sdmmc-det {
+				rockchip,pins =
+					<0 RK_PA3 1 &pcfg_pull_up_4ma>;
+			};
+
+			sdmmc_pwren: sdmmc-pwren {
+				rockchip,pins =
+					<4 RK_PD6 1 &pcfg_pull_none_4ma>;
+			};
+
+			sdmmc_bus1: sdmmc-bus1 {
+				rockchip,pins =
+					<4 RK_PD0 1 &pcfg_pull_up_4ma>;
+			};
+
+			sdmmc_bus4: sdmmc-bus4 {
+				rockchip,pins =
+					<4 RK_PD0 1 &pcfg_pull_up_4ma>,
+					<4 RK_PD1 1 &pcfg_pull_up_4ma>,
+					<4 RK_PD2 1 &pcfg_pull_up_4ma>,
+					<4 RK_PD3 1 &pcfg_pull_up_4ma>;
+			};
+		};
+
+		sdio {
+			sdio_clk: sdio-clk {
+				rockchip,pins =
+					<4 RK_PA5 1 &pcfg_pull_none_8ma>;
+			};
+
+			sdio_cmd: sdio-cmd {
+				rockchip,pins =
+					<4 RK_PA4 1 &pcfg_pull_up_8ma>;
+			};
+
+			sdio_pwren: sdio-pwren {
+				rockchip,pins =
+					<0 RK_PA2 1 &pcfg_pull_none_8ma>;
+			};
+
+			sdio_wrpt: sdio-wrpt {
+				rockchip,pins =
+					<0 RK_PA1 1 &pcfg_pull_none_8ma>;
+			};
+
+			sdio_intn: sdio-intn {
+				rockchip,pins =
+					<0 RK_PA0 1 &pcfg_pull_none_8ma>;
+			};
+
+			sdio_bus1: sdio-bus1 {
+				rockchip,pins =
+					<4 RK_PA0 1 &pcfg_pull_up_8ma>;
+			};
+
+			sdio_bus4: sdio-bus4 {
+				rockchip,pins =
+					<4 RK_PA0 1 &pcfg_pull_up_8ma>,
+					<4 RK_PA1 1 &pcfg_pull_up_8ma>,
+					<4 RK_PA2 1 &pcfg_pull_up_8ma>,
+					<4 RK_PA3 1 &pcfg_pull_up_8ma>;
+			};
+		};
+
+		spdif_in {
+			spdif_in: spdif-in {
+				rockchip,pins =
+					<0 RK_PC2 1 &pcfg_pull_none>;
+			};
+		};
+
+		spdif_out {
+			spdif_out: spdif-out {
+				rockchip,pins =
+					<0 RK_PC1 1 &pcfg_pull_none>;
+			};
+		};
+
+		spi0 {
+			spi0_clk: spi0-clk {
+				rockchip,pins =
+					<2 RK_PA2 2 &pcfg_pull_up_4ma>;
+			};
+
+			spi0_csn0: spi0-csn0 {
+				rockchip,pins =
+					<2 RK_PA3 2 &pcfg_pull_up_4ma>;
+			};
+
+			spi0_miso: spi0-miso {
+				rockchip,pins =
+					<2 RK_PA0 2 &pcfg_pull_up_4ma>;
+			};
+
+			spi0_mosi: spi0-mosi {
+				rockchip,pins =
+					<2 RK_PA1 2 &pcfg_pull_up_4ma>;
+			};
+		};
+
+		spi1 {
+			spi1_clk: spi1-clk {
+				rockchip,pins =
+					<3 RK_PB3 3 &pcfg_pull_up_4ma>;
+			};
+
+			spi1_csn0: spi1-csn0 {
+				rockchip,pins =
+					<3 RK_PB5 3 &pcfg_pull_up_4ma>;
+			};
+
+			spi1_miso: spi1-miso {
+				rockchip,pins =
+					<3 RK_PB2 3 &pcfg_pull_up_4ma>;
+			};
+
+			spi1_mosi: spi1-mosi {
+				rockchip,pins =
+					<3 RK_PB4 3 &pcfg_pull_up_4ma>;
+			};
+		};
+
+		spi1-m1 {
+			spi1m1_miso: spi1m1-miso {
+				rockchip,pins =
+					<2 RK_PA4 2 &pcfg_pull_up_4ma>;
+			};
+
+			spi1m1_mosi: spi1m1-mosi {
+				rockchip,pins =
+					<2 RK_PA5 2 &pcfg_pull_up_4ma>;
+			};
+
+			spi1m1_clk: spi1m1-clk {
+				rockchip,pins =
+					<2 RK_PA7 2 &pcfg_pull_up_4ma>;
+			};
+
+			spi1m1_csn0: spi1m1-csn0 {
+				rockchip,pins =
+					<2 RK_PB1 2 &pcfg_pull_up_4ma>;
+			};
+		};
+
+		spi2 {
+			spi2_clk: spi2-clk {
+				rockchip,pins =
+					<1 RK_PD0 3 &pcfg_pull_up_4ma>;
+			};
+
+			spi2_csn0: spi2-csn0 {
+				rockchip,pins =
+					<1 RK_PD1 3 &pcfg_pull_up_4ma>;
+			};
+
+			spi2_miso: spi2-miso {
+				rockchip,pins =
+					<1 RK_PC6 3 &pcfg_pull_up_4ma>;
+			};
+
+			spi2_mosi: spi2-mosi {
+				rockchip,pins =
+					<1 RK_PC7 3 &pcfg_pull_up_4ma>;
+			};
+		};
+
+		tsadc {
+			tsadc_otp_gpio: tsadc-otp-gpio {
+				rockchip,pins =
+					<0 RK_PB2 0 &pcfg_pull_none>;
+			};
+
+			tsadc_otp_out: tsadc-otp-out {
+				rockchip,pins =
+					<0 RK_PB2 1 &pcfg_pull_none>;
+			};
+		};
+
+		uart0 {
+			uart0_xfer: uart0-xfer {
+				rockchip,pins =
+					<2 RK_PA1 1 &pcfg_pull_up>,
+					<2 RK_PA0 1 &pcfg_pull_up>;
+			};
+
+			uart0_cts: uart0-cts {
+				rockchip,pins =
+					<2 RK_PA2 1 &pcfg_pull_none>;
+			};
+
+			uart0_rts: uart0-rts {
+				rockchip,pins =
+					<2 RK_PA3 1 &pcfg_pull_none>;
+			};
+
+			uart0_rts_gpio: uart0-rts-gpio {
+				rockchip,pins =
+					<2 RK_PA3 0 &pcfg_pull_none>;
+			};
+		};
+
+		uart1 {
+			uart1_xfer: uart1-xfer {
+				rockchip,pins =
+					<1 RK_PD1 1 &pcfg_pull_up>,
+					<1 RK_PD0 1 &pcfg_pull_up>;
+			};
+
+			uart1_cts: uart1-cts {
+				rockchip,pins =
+					<1 RK_PC6 1 &pcfg_pull_none>;
+			};
+
+			uart1_rts: uart1-rts {
+				rockchip,pins =
+					<1 RK_PC7 1 &pcfg_pull_none>;
+			};
+		};
+
+		uart2-m0 {
+			uart2m0_xfer: uart2m0-xfer {
+				rockchip,pins =
+					<1 RK_PC7 2 &pcfg_pull_up>,
+					<1 RK_PC6 2 &pcfg_pull_up>;
+			};
+		};
+
+		uart2-m1 {
+			uart2m1_xfer: uart2m1-xfer {
+				rockchip,pins =
+					<4 RK_PD3 2 &pcfg_pull_up>,
+					<4 RK_PD2 2 &pcfg_pull_up>;
+			};
+		};
+
+		uart3 {
+			uart3_xfer: uart3-xfer {
+				rockchip,pins =
+					<3 RK_PB5 4 &pcfg_pull_up>,
+					<3 RK_PB4 4 &pcfg_pull_up>;
+			};
+		};
+
+		uart3-m1 {
+			uart3m1_xfer: uart3m1-xfer {
+				rockchip,pins =
+					<0 RK_PC2 3 &pcfg_pull_up>,
+					<0 RK_PC1 3 &pcfg_pull_up>;
+			};
+		};
+
+		uart4 {
+			uart4_xfer: uart4-xfer {
+				rockchip,pins =
+					<4 RK_PB1 1 &pcfg_pull_up>,
+					<4 RK_PB0 1 &pcfg_pull_up>;
+			};
+
+			uart4_cts: uart4-cts {
+				rockchip,pins =
+					<4 RK_PA6 1 &pcfg_pull_none>;
+			};
+
+			uart4_rts: uart4-rts {
+				rockchip,pins =
+					<4 RK_PA7 1 &pcfg_pull_none>;
+			};
+
+			uart4_rts_gpio: uart4-rts-gpio {
+				rockchip,pins =
+					<4 RK_PA7 0 &pcfg_pull_none>;
+			};
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-a1.dts b/arch/arm64/boot/dts/rockchip/rk3328-a1.dts
new file mode 100644
index 000000000000..16f1656d5203
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3328-a1.dts
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR MIT)
+// Copyright (c) 2017-2019 Arm Ltd.
+
+/dts-v1/;
+#include "rk3328.dtsi"
+
+/ {
+	model = "Beelink A1";
+	compatible = "azw,beelink-a1", "rockchip,rk3328";
+
+	/*
+	 * UART pins, as viewed with bottom of case removed:
+	 *
+	 *           Front
+	 *        /-------
+	 *  L    / o <- Gnd
+	 *  e   / o <-- Rx
+	 *  f  / o <--- Tx
+	 *  t / o <---- +3.3v
+	 *    |
+	 */
+	chosen {
+		stdout-path = "serial2:1500000n8";
+	};
+
+	gmac_clkin: external-gmac-clock {
+		compatible = "fixed-clock";
+		clock-frequency = <125000000>;
+		clock-output-names = "gmac_clkin";
+		#clock-cells = <0>;
+	};
+
+	vcc_host_5v: usb3-current-switch {
+		compatible = "regulator-fixed";
+		enable-active-high;
+		gpio = <&gpio0 RK_PA0 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&usb30_host_drv>;
+		regulator-name = "vcc_host_5v";
+		vin-supply = <&vcc_sys>;
+	};
+
+	vcc_sys: vcc-sys {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc_sys";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+	};
+
+	ir-receiver {
+		compatible = "gpio-ir-receiver";
+		gpios = <&gpio2 RK_PA2 GPIO_ACTIVE_LOW>;
+		linux,rc-map-name = "rc-beelink-gs1";
+	};
+};
+
+&analog_sound {
+	simple-audio-card,name = "Analog A/V";
+	status = "okay";
+};
+
+&codec {
+	status = "okay";
+};
+
+&cpu0 {
+	cpu-supply = <&vdd_arm>;
+};
+
+&cpu1 {
+	cpu-supply = <&vdd_arm>;
+};
+
+&cpu2 {
+	cpu-supply = <&vdd_arm>;
+};
+
+&cpu3 {
+	cpu-supply = <&vdd_arm>;
+};
+
+&emmc {
+	bus-width = <8>;
+	cap-mmc-highspeed;
+	mmc-ddr-1_8v;
+	mmc-hs200-1_8v;
+	no-sd;
+	no-sdio;
+	non-removable;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_clk &emmc_cmd &emmc_bus8>;
+	vmmc-supply = <&vcc_io>;
+	vqmmc-supply = <&vcc18_emmc>;
+	status = "okay";
+};
+
+&gmac2io {
+	assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;
+	assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;
+	clock_in_out = "input";
+	phy-handle = <&rtl8211f>;
+	phy-mode = "rgmii";
+	phy-supply = <&vcc_io>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&rgmiim1_pins>;
+	snps,aal;
+	snps,pbl = <0x4>;
+	tx_delay = <0x26>;
+	rx_delay = <0x11>;
+	status = "okay";
+
+	mdio {
+		compatible = "snps,dwmac-mdio";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		rtl8211f: phy@0 {
+			reg = <0>;
+			reset-assert-us = <10000>;
+			reset-deassert-us = <30000>;
+			reset-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_LOW>;
+		};
+	};
+};
+
+&gpu {
+	mali-supply = <&vdd_logic>;
+};
+
+&hdmi {
+	status = "okay";
+};
+
+&hdmiphy {
+	status = "okay";
+};
+
+&hdmi_sound {
+	status = "okay";
+};
+
+&i2c1 {
+	clock-frequency = <1000000>;
+	i2c-scl-falling-time-ns = <5>;
+	i2c-scl-rising-time-ns = <83>;
+	status = "okay";
+
+	pmic@18 {
+		compatible = "rockchip,rk805";
+		reg = <0x18>;
+		interrupt-parent = <&gpio2>;
+		interrupts = <RK_PA6 IRQ_TYPE_LEVEL_LOW>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pmic_int_l>;
+		rockchip,system-power-controller;
+		wakeup-source;
+
+		vcc1-supply = <&vcc_sys>;
+		vcc2-supply = <&vcc_sys>;
+		vcc3-supply = <&vcc_sys>;
+		vcc4-supply = <&vcc_sys>;
+		vcc5-supply = <&vcc_io>;
+		vcc6-supply = <&vcc_io>;
+
+		regulators {
+			vdd_logic: DCDC_REG1 {
+				regulator-name = "vdd_logic";
+				regulator-min-microvolt = <700000>;
+				regulator-max-microvolt = <1350000>;
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <1000000>;
+				};
+			};
+
+			vdd_arm: DCDC_REG2 {
+				regulator-name = "vdd_arm";
+				regulator-min-microvolt = <700000>;
+				regulator-max-microvolt = <1350000>;
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <950000>;
+				};
+			};
+
+			vcc_ddr: DCDC_REG3 {
+				regulator-name = "vcc_ddr";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+				};
+			};
+
+			vcc_io: DCDC_REG4 {
+				regulator-name = "vcc_io";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <3300000>;
+				};
+			};
+
+			vdd_18: LDO_REG1 {
+				regulator-name = "vdd_18";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <1800000>;
+				};
+			};
+
+			vcc18_emmc: LDO_REG2 {
+				regulator-name = "vcc_18emmc";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <1800000>;
+				};
+			};
+
+			vdd_11: LDO_REG3 {
+				regulator-name = "vdd_11";
+				regulator-min-microvolt = <1100000>;
+				regulator-max-microvolt = <1100000>;
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <1100000>;
+				};
+			};
+		};
+	};
+};
+
+&i2s0 {
+	status = "okay";
+};
+
+&i2s1 {
+	status = "okay";
+};
+
+&io_domains {
+	vccio1-supply = <&vcc_io>;
+	vccio2-supply = <&vcc18_emmc>;
+	vccio3-supply = <&vcc_io>;
+	vccio4-supply = <&vdd_18>;
+	vccio5-supply = <&vcc_io>;
+	vccio6-supply = <&vdd_18>;
+	pmuio-supply = <&vcc_io>;
+	status = "okay";
+};
+
+&pinctrl {
+	pmic {
+		pmic_int_l: pmic-int-l {
+			rockchip,pins = <2 RK_PA6 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+	};
+
+	usb3 {
+		usb30_host_drv: usb30-host-drv {
+			rockchip,pins = <0 RK_PA0 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+	};
+
+	wifi {
+		bt_dis: bt-dis {
+			rockchip,pins = <2 RK_PC5 RK_FUNC_GPIO &pcfg_output_low>;
+		};
+
+		bt_wake_host: bt-wake-host {
+			rockchip,pins = <2 RK_PC0 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+
+		chip_en: chip-en {
+			rockchip,pins = <2 RK_PC3 RK_FUNC_GPIO &pcfg_output_low>;
+		};
+
+		host_wake_bt: host-wake-bt {
+			rockchip,pins = <2 RK_PB7 RK_FUNC_GPIO &pcfg_output_high>;
+		};
+
+		wl_dis: wl-dis {
+			rockchip,pins = <3 RK_PB0 RK_FUNC_GPIO &pcfg_output_low>;
+		};
+
+		wl_wake_host: wl-wake-host {
+			rockchip,pins = <3 RK_PA1 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+	};
+};
+
+&sdmmc {
+	bus-width = <4>;
+	cap-mmc-highspeed;
+	cap-sd-highspeed;
+	disable-wp;
+	pinctrl-names = "default";
+	pinctrl-0 = <&sdmmc0_clk &sdmmc0_cmd &sdmmc0_dectn &sdmmc0_bus4>;
+	vmmc-supply = <&vcc_io>;
+	vqmmc-supply = <&vcc_io>;
+	status = "okay";
+};
+
+&tsadc {
+	rockchip,hw-tshut-mode = <0>;
+	rockchip,hw-tshut-polarity = <0>;
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&u2phy {
+	status = "okay";
+};
+
+&u2phy_host {
+	status = "okay";
+};
+
+&u2phy_otg {
+	status = "okay";
+};
+
+&usb20_otg {
+	dr_mode = "host";
+	status = "okay";
+};
+
+&usb_host0_ehci {
+	pinctrl-names = "default";
+	pinctrl-0 = <&bt_dis &bt_wake_host &chip_en &host_wake_bt &wl_dis &wl_wake_host>;
+	status = "okay";
+};
+
+&vop {
+	status = "okay";
+};
+
+&vop_mmu {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
index bb40c163b05d..8d553c92182a 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
@@ -35,6 +35,7 @@
 		gpio = <&gpio0 RK_PD6 GPIO_ACTIVE_LOW>;
 		pinctrl-names = "default";
 		pinctrl-0 = <&sdmmc0m1_gpio>;
+		regulator-boot-on;
 		regulator-name = "vcc_sd";
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 31cc1541f1f5..91306ebed4da 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -142,6 +142,22 @@
 		};
 	};
 
+	analog_sound: analog-sound {
+		compatible = "simple-audio-card";
+		simple-audio-card,format = "i2s";
+		simple-audio-card,mclk-fs = <256>;
+		simple-audio-card,name = "Analog";
+		status = "disabled";
+
+		simple-audio-card,cpu {
+			sound-dai = <&i2s1>;
+		};
+
+		simple-audio-card,codec {
+			sound-dai = <&codec>;
+		};
+	};
+
 	arm-pmu {
 		compatible = "arm,cortex-a53-pmu";
 		interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>,
@@ -156,6 +172,22 @@
 		ports = <&vop_out>;
 	};
 
+	hdmi_sound: hdmi-sound {
+		compatible = "simple-audio-card";
+		simple-audio-card,format = "i2s";
+		simple-audio-card,mclk-fs = <128>;
+		simple-audio-card,name = "HDMI";
+		status = "disabled";
+
+		simple-audio-card,cpu {
+			sound-dai = <&i2s0>;
+		};
+
+		simple-audio-card,codec {
+			sound-dai = <&hdmi>;
+		};
+	};
+
 	psci {
 		compatible = "arm,psci-1.0", "arm,psci-0.2";
 		method = "smc";
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
index a9f4d6d7d2b7..9dd3b171e91d 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
@@ -68,6 +68,16 @@
 
 &spi0 {
 	status = "okay";
+
+	cr50@0 {
+		compatible = "google,cr50";
+		reg = <0>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <5 IRQ_TYPE_EDGE_RISING>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&h1_int_od_l>;
+		spi-max-frequency = <800000>;
+	};
 };
 
 &pinctrl {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
index 50dfab51f175..4373ed732af7 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
@@ -436,6 +436,16 @@ camera: &i2c7 {
 
 &spi2 {
 	status = "okay";
+
+	cr50@0 {
+		compatible = "google,cr50";
+		reg = <0>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <17 IRQ_TYPE_EDGE_RISING>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&h1_int_od_l>;
+		spi-max-frequency = <800000>;
+	};
 };
 
 &usb_host0_ohci {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi
index dd16c80d923e..b788ae4f47f0 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi
@@ -152,9 +152,6 @@
 	phy-handle = <&rtl8211e>;
 	phy-mode = "rgmii";
 	phy-supply = <&vcc3v3_s3>;
-	snps,reset-active-low;
-	snps,reset-delays-us = <0 10000 30000>;
-	snps,reset-gpio = <&gpio3 RK_PB7 GPIO_ACTIVE_LOW>;
 	tx_delay = <0x28>;
 	rx_delay = <0x11>;
 	status = "okay";
@@ -168,6 +165,9 @@
 			reg = <1>;
 			interrupt-parent = <&gpio3>;
 			interrupts = <RK_PB2 IRQ_TYPE_LEVEL_LOW>;
+			reset-assert-us = <10000>;
+			reset-deassert-us = <30000>;
+			reset-gpios = <&gpio3 RK_PB7 GPIO_ACTIVE_LOW>;
 		};
 	};
 };
@@ -184,6 +184,10 @@
 	status = "okay";
 };
 
+&hdmi_sound {
+	status = "okay";
+};
+
 &i2c0 {
 	clock-frequency = <400000>;
 	i2c-scl-rising-time-ns = <160>;
@@ -459,6 +463,10 @@
 	status = "okay";
 };
 
+&i2s2 {
+	status = "okay";
+};
+
 &io_domains {
 	bt656-supply = <&vcc_1v8>;
 	audio-supply = <&vcca1v8_codec>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
index 62ea288a1a70..c1edca3872c7 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
@@ -165,6 +165,11 @@
 	status = "okay";
 };
 
+&gpu {
+	mali-supply = <&vdd_gpu>;
+	status = "okay";
+};
+
 &i2c0 {
 	status = "okay";
 	i2c-scl-rising-time-ns = <168>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-mezzanine.dts b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-mezzanine.dts
new file mode 100644
index 000000000000..d6b3042cffa9
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-mezzanine.dts
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2017 T-Chip Intelligent Technology Co., Ltd
+ * Copyright (c) 2019 Markus Reichl <m.reichl@fivetechno.de>
+ */
+
+/dts-v1/;
+#include "rk3399-roc-pc.dtsi"
+
+/ {
+	model = "Firefly ROC-RK3399-PC Mezzanine Board";
+	compatible = "firefly,roc-rk3399-pc-mezzanine", "rockchip,rk3399";
+
+	vcc3v3_ngff: vcc3v3-ngff {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc3v3_ngff";
+		enable-active-high;
+		gpio = <&gpio4 RK_PD3 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&vcc3v3_ngff_en>;
+		regulator-always-on;
+		regulator-boot-on;
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		vin-supply = <&dc_12v>;
+	};
+
+	vcc3v3_pcie: vcc3v3-pcie {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc3v3_pcie";
+		enable-active-high;
+		gpio = <&gpio1 RK_PC1 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&vcc3v3_pcie_en>;
+		regulator-always-on;
+		regulator-boot-on;
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		vin-supply = <&dc_12v>;
+	};
+};
+
+&pcie_phy {
+	status = "okay";
+};
+
+&pcie0 {
+	ep-gpios = <&gpio4 RK_PD1 GPIO_ACTIVE_HIGH>;
+	num-lanes = <4>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pcie_perst>;
+	vpcie3v3-supply = <&vcc3v3_pcie>;
+	status = "okay";
+};
+
+&pinctrl {
+	ngff {
+		vcc3v3_ngff_en: vcc3v3-ngff-en {
+			rockchip,pins = <4 RK_PD3 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+	};
+
+	pcie {
+		vcc3v3_pcie_en: vcc3v3-pcie-en {
+			rockchip,pins = <1 RK_PC1 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+
+		pcie_perst: pcie-perst {
+			rockchip,pins = <4 RK_PD1 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dts
index 19f7732d728c..cd4195425309 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dts
@@ -4,677 +4,9 @@
  */
 
 /dts-v1/;
-#include <dt-bindings/pwm/pwm.h>
-#include "rk3399.dtsi"
-#include "rk3399-opp.dtsi"
+#include "rk3399-roc-pc.dtsi"
 
 / {
 	model = "Firefly ROC-RK3399-PC Board";
 	compatible = "firefly,roc-rk3399-pc", "rockchip,rk3399";
-
-	chosen {
-		stdout-path = "serial2:1500000n8";
-	};
-
-	backlight: backlight {
-		compatible = "pwm-backlight";
-		pwms = <&pwm0 0 25000 0>;
-	};
-
-	clkin_gmac: external-gmac-clock {
-		compatible = "fixed-clock";
-		clock-frequency = <125000000>;
-		clock-output-names = "clkin_gmac";
-		#clock-cells = <0>;
-	};
-
-	sdio_pwrseq: sdio-pwrseq {
-		compatible = "mmc-pwrseq-simple";
-		clocks = <&rk808 1>;
-		clock-names = "ext_clock";
-		pinctrl-names = "default";
-		pinctrl-0 = <&wifi_enable_h>;
-
-		/*
-		 * On the module itself this is one of these (depending
-		 * on the actual card populated):
-		 * - SDIO_RESET_L_WL_REG_ON
-		 * - PDN (power down when low)
-		 */
-		reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>;
-	};
-
-	vcc_vbus_typec0: vcc-vbus-typec0 {
-		compatible = "regulator-fixed";
-		regulator-name = "vcc_vbus_typec0";
-		regulator-always-on;
-		regulator-boot-on;
-		regulator-min-microvolt = <5000000>;
-		regulator-max-microvolt = <5000000>;
-	};
-
-	/*
-	 * should be placed inside mp8859, but not until mp8859 has
-	 * its own dt-binding.
-	 */
-	vcc12v_sys: mp8859-dcdc1 {
-		compatible = "regulator-fixed";
-		regulator-name = "vcc12v_sys";
-		regulator-always-on;
-		regulator-boot-on;
-		regulator-min-microvolt = <12000000>;
-		regulator-max-microvolt = <12000000>;
-		vin-supply = <&vcc_vbus_typec0>;
-	};
-
-	/* switched by pmic_sleep */
-	vcc1v8_s3: vcca1v8_s3: vcc1v8-s3 {
-		compatible = "regulator-fixed";
-		regulator-name = "vcc1v8_s3";
-		regulator-always-on;
-		regulator-boot-on;
-		regulator-min-microvolt = <1800000>;
-		regulator-max-microvolt = <1800000>;
-		vin-supply = <&vcc_1v8>;
-	};
-
-	vcc3v3_sys: vcc3v3-sys {
-		compatible = "regulator-fixed";
-		regulator-name = "vcc3v3_sys";
-		regulator-always-on;
-		regulator-boot-on;
-		regulator-min-microvolt = <3300000>;
-		regulator-max-microvolt = <3300000>;
-		vin-supply = <&vcc12v_sys>;
-	};
-
-	/* Actually 3 regulators (host0, 1, 2) controlled by the same gpio */
-	vcc5v0_host: vcc5v0-host-regulator {
-		compatible = "regulator-fixed";
-		enable-active-high;
-		gpio = <&gpio1 RK_PA0 GPIO_ACTIVE_HIGH>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&vcc5v0_host_en &hub_rst>;
-		regulator-name = "vcc5v0_host";
-		regulator-always-on;
-		vin-supply = <&vcc_sys>;
-	};
-
-	vcc_vbus_typec1: vcc-vbus-typec1 {
-		compatible = "regulator-fixed";
-		enable-active-high;
-		gpio = <&gpio1 RK_PB5 GPIO_ACTIVE_HIGH>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&vcc_vbus_typec1_en>;
-		regulator-name = "vcc_vbus_typec1";
-		regulator-always-on;
-		vin-supply = <&vcc_sys>;
-	};
-
-	vcc_sys: vcc-sys {
-		compatible = "regulator-fixed";
-		regulator-name = "vcc_sys";
-		regulator-always-on;
-		regulator-boot-on;
-		regulator-min-microvolt = <5000000>;
-		regulator-max-microvolt = <5000000>;
-		vin-supply = <&vcc12v_sys>;
-	};
-
-	vdd_log: vdd-log {
-		compatible = "pwm-regulator";
-		pwms = <&pwm2 0 25000 1>;
-		regulator-name = "vdd_log";
-		regulator-always-on;
-		regulator-boot-on;
-		regulator-min-microvolt = <800000>;
-		regulator-max-microvolt = <1400000>;
-		vin-supply = <&vcc3v3_sys>;
-	};
-};
-
-&cpu_l0 {
-	cpu-supply = <&vdd_cpu_l>;
-};
-
-&cpu_l1 {
-	cpu-supply = <&vdd_cpu_l>;
-};
-
-&cpu_l2 {
-	cpu-supply = <&vdd_cpu_l>;
-};
-
-&cpu_l3 {
-	cpu-supply = <&vdd_cpu_l>;
-};
-
-&cpu_b0 {
-	cpu-supply = <&vdd_cpu_b>;
-};
-
-&cpu_b1 {
-	cpu-supply = <&vdd_cpu_b>;
-};
-
-&emmc_phy {
-	status = "okay";
-};
-
-&gmac {
-	assigned-clocks = <&cru SCLK_RMII_SRC>;
-	assigned-clock-parents = <&clkin_gmac>;
-	clock_in_out = "input";
-	phy-supply = <&vcc_lan>;
-	phy-mode = "rgmii";
-	pinctrl-names = "default";
-	pinctrl-0 = <&rgmii_pins>;
-	snps,reset-gpio = <&gpio3 RK_PB7 GPIO_ACTIVE_LOW>;
-	snps,reset-active-low;
-	snps,reset-delays-us = <0 10000 50000>;
-	tx_delay = <0x28>;
-	rx_delay = <0x11>;
-	status = "okay";
-};
-
-&hdmi {
-	ddc-i2c-bus = <&i2c3>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&hdmi_cec>;
-	status = "okay";
-};
-
-&i2c0 {
-	clock-frequency = <400000>;
-	i2c-scl-rising-time-ns = <168>;
-	i2c-scl-falling-time-ns = <4>;
-	status = "okay";
-
-	rk808: pmic@1b {
-		compatible = "rockchip,rk808";
-		reg = <0x1b>;
-		interrupt-parent = <&gpio1>;
-		interrupts = <21 IRQ_TYPE_LEVEL_LOW>;
-		#clock-cells = <1>;
-		clock-output-names = "xin32k", "rk808-clkout2";
-		pinctrl-names = "default";
-		pinctrl-0 = <&pmic_int_l>;
-		rockchip,system-power-controller;
-		wakeup-source;
-
-		vcc1-supply = <&vcc3v3_sys>;
-		vcc2-supply = <&vcc3v3_sys>;
-		vcc3-supply = <&vcc3v3_sys>;
-		vcc4-supply = <&vcc3v3_sys>;
-		vcc6-supply = <&vcc3v3_sys>;
-		vcc7-supply = <&vcc3v3_sys>;
-		vcc8-supply = <&vcc3v3_sys>;
-		vcc9-supply = <&vcc3v3_sys>;
-		vcc10-supply = <&vcc3v3_sys>;
-		vcc11-supply = <&vcc3v3_sys>;
-		vcc12-supply = <&vcc3v3_sys>;
-		vddio-supply = <&vcc1v8_pmu>;
-
-		regulators {
-			vdd_center: DCDC_REG1 {
-				regulator-name = "vdd_center";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-min-microvolt = <750000>;
-				regulator-max-microvolt = <1350000>;
-				regulator-ramp-delay = <6001>;
-				regulator-state-mem {
-					regulator-off-in-suspend;
-				};
-			};
-
-			vdd_cpu_l: DCDC_REG2 {
-				regulator-name = "vdd_cpu_l";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-min-microvolt = <750000>;
-				regulator-max-microvolt = <1350000>;
-				regulator-ramp-delay = <6001>;
-				regulator-state-mem {
-					regulator-off-in-suspend;
-				};
-			};
-
-			vcc_ddr: DCDC_REG3 {
-				regulator-name = "vcc_ddr";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-state-mem {
-					regulator-on-in-suspend;
-				};
-			};
-
-			vcc_1v8: DCDC_REG4 {
-				regulator-name = "vcc_1v8";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-min-microvolt = <1800000>;
-				regulator-max-microvolt = <1800000>;
-				regulator-state-mem {
-					regulator-on-in-suspend;
-					regulator-suspend-microvolt = <1800000>;
-				};
-			};
-
-			vcca1v8_codec: LDO_REG1 {
-				regulator-name = "vcca1v8_codec";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-min-microvolt = <1800000>;
-				regulator-max-microvolt = <1800000>;
-				regulator-state-mem {
-					regulator-off-in-suspend;
-				};
-			};
-
-			vcc1v8_hdmi: LDO_REG2 {
-				regulator-name = "vcc1v8_hdmi";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-min-microvolt = <1800000>;
-				regulator-max-microvolt = <1800000>;
-				regulator-state-mem {
-					regulator-off-in-suspend;
-				};
-			};
-
-			vcc1v8_pmu: LDO_REG3 {
-				regulator-name = "vcc1v8_pmu";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-min-microvolt = <1800000>;
-				regulator-max-microvolt = <1800000>;
-				regulator-state-mem {
-					regulator-on-in-suspend;
-					regulator-suspend-microvolt = <1800000>;
-				};
-			};
-
-			vcc_sdio: LDO_REG4 {
-				regulator-name = "vcc_sdio";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-min-microvolt = <1800000>;
-				regulator-max-microvolt = <3000000>;
-				regulator-state-mem {
-					regulator-on-in-suspend;
-					regulator-suspend-microvolt = <3000000>;
-				};
-			};
-
-			vcca3v0_codec: LDO_REG5 {
-				regulator-name = "vcca3v0_codec";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-min-microvolt = <3000000>;
-				regulator-max-microvolt = <3000000>;
-				regulator-state-mem {
-					regulator-off-in-suspend;
-				};
-			};
-
-			vcc_1v5: LDO_REG6 {
-				regulator-name = "vcc_1v5";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-min-microvolt = <1500000>;
-				regulator-max-microvolt = <1500000>;
-				regulator-state-mem {
-					regulator-on-in-suspend;
-					regulator-suspend-microvolt = <1500000>;
-				};
-			};
-
-			vcca0v9_hdmi: LDO_REG7 {
-				regulator-name = "vcca0v9_hdmi";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-min-microvolt = <900000>;
-				regulator-max-microvolt = <900000>;
-				regulator-state-mem {
-					regulator-off-in-suspend;
-				};
-			};
-
-			vcc_3v0: LDO_REG8 {
-				regulator-name = "vcc_3v0";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-min-microvolt = <3000000>;
-				regulator-max-microvolt = <3000000>;
-				regulator-state-mem {
-					regulator-on-in-suspend;
-					regulator-suspend-microvolt = <3000000>;
-				};
-			};
-
-			vcc3v3_s3: vcc_lan: SWITCH_REG1 {
-				regulator-name = "vcc3v3_s3";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-state-mem {
-					regulator-off-in-suspend;
-				};
-			};
-
-			vcc3v3_s0: SWITCH_REG2 {
-				regulator-name = "vcc3v3_s0";
-				regulator-always-on;
-				regulator-boot-on;
-				regulator-state-mem {
-					regulator-off-in-suspend;
-				};
-			};
-		};
-	};
-
-	vdd_cpu_b: regulator@40 {
-		compatible = "silergy,syr827";
-		reg = <0x40>;
-		fcs,suspend-voltage-selector = <1>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&vsel1_gpio>;
-		regulator-name = "vdd_cpu_b";
-		regulator-min-microvolt = <712500>;
-		regulator-max-microvolt = <1500000>;
-		regulator-ramp-delay = <1000>;
-		regulator-always-on;
-		regulator-boot-on;
-		vin-supply = <&vcc3v3_sys>;
-
-		regulator-state-mem {
-			regulator-off-in-suspend;
-		};
-	};
-
-	vdd_gpu: regulator@41 {
-		compatible = "silergy,syr828";
-		reg = <0x41>;
-		fcs,suspend-voltage-selector = <1>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&vsel2_gpio>;
-		regulator-name = "vdd_gpu";
-		regulator-min-microvolt = <712500>;
-		regulator-max-microvolt = <1500000>;
-		regulator-ramp-delay = <1000>;
-		regulator-always-on;
-		regulator-boot-on;
-		vin-supply = <&vcc3v3_sys>;
-
-		regulator-state-mem {
-			regulator-off-in-suspend;
-		};
-	};
-};
-
-&i2c1 {
-	i2c-scl-rising-time-ns = <300>;
-	i2c-scl-falling-time-ns = <15>;
-	status = "okay";
-};
-
-&i2c3 {
-	i2c-scl-rising-time-ns = <450>;
-	i2c-scl-falling-time-ns = <15>;
-	status = "okay";
-};
-
-&i2c4 {
-	i2c-scl-rising-time-ns = <600>;
-	i2c-scl-falling-time-ns = <20>;
-	status = "okay";
-
-	fusb1: usb-typec@22 {
-		compatible = "fcs,fusb302";
-		reg = <0x22>;
-		interrupt-parent = <&gpio1>;
-		interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&fusb1_int>;
-		vbus-supply = <&vcc_vbus_typec1>;
-		status = "okay";
-	};
-};
-
-&i2c7 {
-	i2c-scl-rising-time-ns = <600>;
-	i2c-scl-falling-time-ns = <20>;
-	status = "okay";
-
-	fusb0: usb-typec@22 {
-		compatible = "fcs,fusb302";
-		reg = <0x22>;
-		interrupt-parent = <&gpio1>;
-		interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&fusb0_int>;
-		vbus-supply = <&vcc_vbus_typec0>;
-		status = "okay";
-	};
-};
-
-&i2s0 {
-	rockchip,playback-channels = <8>;
-	rockchip,capture-channels = <8>;
-	status = "okay";
-};
-
-&i2s1 {
-	rockchip,playback-channels = <2>;
-	rockchip,capture-channels = <2>;
-	status = "okay";
-};
-
-&i2s2 {
-	status = "okay";
-};
-
-&io_domains {
-	audio-supply = <&vcca1v8_codec>;
-	bt656-supply = <&vcc_3v0>;
-	gpio1830-supply = <&vcc_3v0>;
-	sdmmc-supply = <&vcc_sdio>;
-	status = "okay";
-};
-
-&pmu_io_domains {
-	pmu1830-supply = <&vcc_3v0>;
-	status = "okay";
-};
-
-&pinctrl {
-	lcd-panel {
-		lcd_panel_reset: lcd-panel-reset {
-			rockchip,pins = <4 RK_PD6 RK_FUNC_GPIO &pcfg_pull_up>;
-		};
-	};
-
-	pmic {
-		vsel1_gpio: vsel1-gpio {
-			rockchip,pins = <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_down>;
-		};
-
-		vsel2_gpio: vsel2-gpio {
-			rockchip,pins = <1 RK_PB6 RK_FUNC_GPIO &pcfg_pull_down>;
-		};
-	};
-
-	sdio-pwrseq {
-		wifi_enable_h: wifi-enable-h {
-			rockchip,pins = <0 RK_PB2 RK_FUNC_GPIO &pcfg_pull_none>;
-		};
-	};
-
-	pmic {
-		pmic_int_l: pmic-int-l {
-			rockchip,pins = <1 RK_PC5 RK_FUNC_GPIO &pcfg_pull_up>;
-		};
-	};
-
-	usb2 {
-		vcc5v0_host_en: vcc5v0-host-en {
-			rockchip,pins = <1 RK_PA0 RK_FUNC_GPIO &pcfg_pull_none>;
-		};
-
-		hub_rst: hub-rst {
-			rockchip,pins = <2 RK_PA4 RK_FUNC_GPIO &pcfg_output_high>;
-		};
-	};
-
-	usb-typec {
-		vcc_vbus_typec1_en: vcc-vbus-typec1-en {
-			rockchip,pins = <1 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>;
-		};
-	};
-
-	fusb30x {
-		fusb0_int: fusb0-int {
-			rockchip,pins = <1 RK_PA2 RK_FUNC_GPIO &pcfg_pull_up>;
-		};
-
-		fusb1_int: fusb1-int {
-			rockchip,pins = <1 RK_PA1 RK_FUNC_GPIO &pcfg_pull_up>;
-		};
-	};
-};
-
-&pwm0 {
-	status = "okay";
-};
-
-&pwm2 {
-	status = "okay";
-};
-
-&saradc {
-	vref-supply = <&vcca1v8_s3>;
-	status = "okay";
-};
-
-&sdmmc {
-	bus-width = <4>;
-	cap-mmc-highspeed;
-	cap-sd-highspeed;
-	cd-gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_LOW>;
-	disable-wp;
-	max-frequency = <150000000>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_bus4>;
-	status = "okay";
-};
-
-&sdhci {
-	bus-width = <8>;
-	mmc-hs400-1_8v;
-	mmc-hs400-enhanced-strobe;
-	non-removable;
-	status = "okay";
-};
-
-&tcphy0 {
-	status = "okay";
-};
-
-&tcphy1 {
-	status = "okay";
-};
-
-&tsadc {
-	/* tshut mode 0:CRU 1:GPIO */
-	rockchip,hw-tshut-mode = <1>;
-	/* tshut polarity 0:LOW 1:HIGH */
-	rockchip,hw-tshut-polarity = <1>;
-	status = "okay";
-};
-
-&u2phy0 {
-	status = "okay";
-
-	u2phy0_otg: otg-port {
-		phy-supply = <&vcc_vbus_typec0>;
-		status = "okay";
-	};
-
-	u2phy0_host: host-port {
-		phy-supply = <&vcc5v0_host>;
-		status = "okay";
-	};
-};
-
-&u2phy1 {
-	status = "okay";
-
-	u2phy1_otg: otg-port {
-		phy-supply = <&vcc_vbus_typec1>;
-		status = "okay";
-	};
-
-	u2phy1_host: host-port {
-		phy-supply = <&vcc5v0_host>;
-		status = "okay";
-	};
-};
-
-&uart0 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&uart0_xfer &uart0_cts>;
-	status = "okay";
-};
-
-&uart2 {
-	status = "okay";
-};
-
-&usb_host0_ehci {
-	status = "okay";
-};
-
-&usb_host0_ohci {
-	status = "okay";
-};
-
-&usb_host1_ehci {
-	status = "okay";
-};
-
-&usb_host1_ohci {
-	status = "okay";
-};
-
-&usbdrd3_0 {
-	status = "okay";
-};
-
-&usbdrd_dwc3_0 {
-	status = "okay";
-};
-
-&usbdrd3_1 {
-	status = "okay";
-};
-
-&usbdrd_dwc3_1 {
-	status = "okay";
-	dr_mode = "host";
-};
-
-&vopb {
-	status = "okay";
-};
-
-&vopb_mmu {
-	status = "okay";
-};
-
-&vopl {
-	status = "okay";
-};
-
-&vopl_mmu {
-	status = "okay";
 };
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi
new file mode 100644
index 000000000000..7e07dae33d0f
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi
@@ -0,0 +1,767 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2017 T-Chip Intelligent Technology Co., Ltd
+ */
+
+/dts-v1/;
+#include <dt-bindings/input/linux-event-codes.h>
+#include <dt-bindings/pwm/pwm.h>
+#include "rk3399.dtsi"
+#include "rk3399-opp.dtsi"
+
+/ {
+	model = "Firefly ROC-RK3399-PC Board";
+	compatible = "firefly,roc-rk3399-pc", "rockchip,rk3399";
+
+	chosen {
+		stdout-path = "serial2:1500000n8";
+	};
+
+	backlight: backlight {
+		compatible = "pwm-backlight";
+		pwms = <&pwm0 0 25000 0>;
+	};
+
+	clkin_gmac: external-gmac-clock {
+		compatible = "fixed-clock";
+		clock-frequency = <125000000>;
+		clock-output-names = "clkin_gmac";
+		#clock-cells = <0>;
+	};
+
+	adc-keys {
+		compatible = "adc-keys";
+		io-channels = <&saradc 1>;
+		io-channel-names = "buttons";
+		keyup-threshold-microvolt = <1500000>;
+		poll-interval = <100>;
+
+		recovery {
+			label = "Recovery";
+			linux,code = <KEY_VENDOR>;
+			press-threshold-microvolt = <18000>;
+		};
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+		autorepeat;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwr_key_l>;
+
+		power {
+			debounce-interval = <100>;
+			gpios = <&gpio0 RK_PA5 GPIO_ACTIVE_LOW>;
+			label = "GPIO Key Power";
+			linux,code = <KEY_POWER>;
+			wakeup-source;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+		pinctrl-names = "default";
+		pinctrl-0 = <&work_led_gpio>, <&diy_led_gpio>, <&yellow_led_gpio>;
+
+		work-led {
+			label = "green:work";
+			gpios = <&gpio2 RK_PD3 GPIO_ACTIVE_HIGH>;
+			default-state = "on";
+			linux,default-trigger = "heartbeat";
+		};
+
+		diy-led {
+			label = "red:diy";
+			gpios = <&gpio0 RK_PB5 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+			linux,default-trigger = "mmc1";
+		};
+
+		yellow-led {
+			label = "yellow:yellow-led";
+			gpios = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+			linux,default-trigger = "mmc0";
+		};
+	};
+
+	sdio_pwrseq: sdio-pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		clocks = <&rk808 1>;
+		clock-names = "ext_clock";
+		pinctrl-names = "default";
+		pinctrl-0 = <&wifi_enable_h>;
+
+		/*
+		 * On the module itself this is one of these (depending
+		 * on the actual card populated):
+		 * - SDIO_RESET_L_WL_REG_ON
+		 * - PDN (power down when low)
+		 */
+		reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>;
+	};
+
+	vcc_vbus_typec0: vcc-vbus-typec0 {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc_vbus_typec0";
+		regulator-always-on;
+		regulator-boot-on;
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+	};
+
+	/*
+	 * should be placed inside mp8859, but not until mp8859 has
+	 * its own dt-binding.
+	 */
+	dc_12v: mp8859-dcdc1 {
+		compatible = "regulator-fixed";
+		regulator-name = "dc_12v";
+		regulator-always-on;
+		regulator-boot-on;
+		regulator-min-microvolt = <12000000>;
+		regulator-max-microvolt = <12000000>;
+		vin-supply = <&vcc_vbus_typec0>;
+	};
+
+	/* switched by pmic_sleep */
+	vcc1v8_s3: vcca1v8_s3: vcc1v8-s3 {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc1v8_s3";
+		regulator-always-on;
+		regulator-boot-on;
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		vin-supply = <&vcc_1v8>;
+	};
+
+	vcc3v3_sys: vcc3v3-sys {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc3v3_sys";
+		regulator-always-on;
+		regulator-boot-on;
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		vin-supply = <&dc_12v>;
+	};
+
+	/* Actually 3 regulators (host0, 1, 2) controlled by the same gpio */
+	vcc5v0_host: vcc5v0-host-regulator {
+		compatible = "regulator-fixed";
+		enable-active-high;
+		gpio = <&gpio1 RK_PA0 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&vcc5v0_host_en &hub_rst>;
+		regulator-name = "vcc5v0_host";
+		regulator-always-on;
+		vin-supply = <&vcc_sys>;
+	};
+
+	vcc_vbus_typec1: vcc-vbus-typec1 {
+		compatible = "regulator-fixed";
+		enable-active-high;
+		gpio = <&gpio1 RK_PB5 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&vcc_vbus_typec1_en>;
+		regulator-name = "vcc_vbus_typec1";
+		regulator-always-on;
+		vin-supply = <&vcc_sys>;
+	};
+
+	vcc_sys: vcc-sys {
+		compatible = "regulator-fixed";
+		enable-active-high;
+		gpio = <&gpio2 RK_PA6 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&vcc_sys_en>;
+		regulator-name = "vcc_sys";
+		regulator-always-on;
+		regulator-boot-on;
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		vin-supply = <&dc_12v>;
+	};
+
+	vdd_log: vdd-log {
+		compatible = "pwm-regulator";
+		pwms = <&pwm2 0 25000 1>;
+		regulator-name = "vdd_log";
+		regulator-always-on;
+		regulator-boot-on;
+		regulator-min-microvolt = <800000>;
+		regulator-max-microvolt = <1400000>;
+		vin-supply = <&vcc3v3_sys>;
+	};
+};
+
+&cpu_l0 {
+	cpu-supply = <&vdd_cpu_l>;
+};
+
+&cpu_l1 {
+	cpu-supply = <&vdd_cpu_l>;
+};
+
+&cpu_l2 {
+	cpu-supply = <&vdd_cpu_l>;
+};
+
+&cpu_l3 {
+	cpu-supply = <&vdd_cpu_l>;
+};
+
+&cpu_b0 {
+	cpu-supply = <&vdd_cpu_b>;
+};
+
+&cpu_b1 {
+	cpu-supply = <&vdd_cpu_b>;
+};
+
+&emmc_phy {
+	status = "okay";
+};
+
+&gmac {
+	assigned-clocks = <&cru SCLK_RMII_SRC>;
+	assigned-clock-parents = <&clkin_gmac>;
+	clock_in_out = "input";
+	phy-supply = <&vcc_lan>;
+	phy-mode = "rgmii";
+	pinctrl-names = "default";
+	pinctrl-0 = <&rgmii_pins>;
+	snps,reset-gpio = <&gpio3 RK_PB7 GPIO_ACTIVE_LOW>;
+	snps,reset-active-low;
+	snps,reset-delays-us = <0 10000 50000>;
+	tx_delay = <0x28>;
+	rx_delay = <0x11>;
+	status = "okay";
+};
+
+&hdmi {
+	ddc-i2c-bus = <&i2c3>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&hdmi_cec>;
+	status = "okay";
+};
+
+&i2c0 {
+	clock-frequency = <400000>;
+	i2c-scl-rising-time-ns = <168>;
+	i2c-scl-falling-time-ns = <4>;
+	status = "okay";
+
+	rk808: pmic@1b {
+		compatible = "rockchip,rk808";
+		reg = <0x1b>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <21 IRQ_TYPE_LEVEL_LOW>;
+		#clock-cells = <1>;
+		clock-output-names = "xin32k", "rk808-clkout2";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pmic_int_l>;
+		rockchip,system-power-controller;
+		wakeup-source;
+
+		vcc1-supply = <&vcc3v3_sys>;
+		vcc2-supply = <&vcc3v3_sys>;
+		vcc3-supply = <&vcc3v3_sys>;
+		vcc4-supply = <&vcc3v3_sys>;
+		vcc6-supply = <&vcc3v3_sys>;
+		vcc7-supply = <&vcc3v3_sys>;
+		vcc8-supply = <&vcc3v3_sys>;
+		vcc9-supply = <&vcc3v3_sys>;
+		vcc10-supply = <&vcc3v3_sys>;
+		vcc11-supply = <&vcc3v3_sys>;
+		vcc12-supply = <&vcc3v3_sys>;
+		vcc13-supply = <&vcc3v3_sys>;
+		vcc14-supply = <&vcc3v3_sys>;
+		vddio-supply = <&vcc_3v0>;
+
+		regulators {
+			vdd_center: DCDC_REG1 {
+				regulator-name = "vdd_center";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-min-microvolt = <750000>;
+				regulator-max-microvolt = <1350000>;
+				regulator-ramp-delay = <6001>;
+				regulator-state-mem {
+					regulator-off-in-suspend;
+				};
+			};
+
+			vdd_cpu_l: DCDC_REG2 {
+				regulator-name = "vdd_cpu_l";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-min-microvolt = <750000>;
+				regulator-max-microvolt = <1350000>;
+				regulator-ramp-delay = <6001>;
+				regulator-state-mem {
+					regulator-off-in-suspend;
+				};
+			};
+
+			vcc_ddr: DCDC_REG3 {
+				regulator-name = "vcc_ddr";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+				};
+			};
+
+			vcc_1v8: DCDC_REG4 {
+				regulator-name = "vcc_1v8";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <1800000>;
+				};
+			};
+
+			vcca1v8_codec: LDO_REG1 {
+				regulator-name = "vcca1v8_codec";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-state-mem {
+					regulator-off-in-suspend;
+				};
+			};
+
+			vcc1v8_hdmi: LDO_REG2 {
+				regulator-name = "vcc1v8_hdmi";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-state-mem {
+					regulator-off-in-suspend;
+				};
+			};
+
+			vcc1v8_pmu: LDO_REG3 {
+				regulator-name = "vcc1v8_pmu";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <1800000>;
+				};
+			};
+
+			vcc_sdio: LDO_REG4 {
+				regulator-name = "vcc_sdio";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3000000>;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <3000000>;
+				};
+			};
+
+			vcca3v0_codec: LDO_REG5 {
+				regulator-name = "vcca3v0_codec";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-min-microvolt = <3000000>;
+				regulator-max-microvolt = <3000000>;
+				regulator-state-mem {
+					regulator-off-in-suspend;
+				};
+			};
+
+			vcc_1v5: LDO_REG6 {
+				regulator-name = "vcc_1v5";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-min-microvolt = <1500000>;
+				regulator-max-microvolt = <1500000>;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <1500000>;
+				};
+			};
+
+			vcca0v9_hdmi: LDO_REG7 {
+				regulator-name = "vcca0v9_hdmi";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-min-microvolt = <900000>;
+				regulator-max-microvolt = <900000>;
+				regulator-state-mem {
+					regulator-off-in-suspend;
+				};
+			};
+
+			vcc_3v0: LDO_REG8 {
+				regulator-name = "vcc_3v0";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-min-microvolt = <3000000>;
+				regulator-max-microvolt = <3000000>;
+				regulator-state-mem {
+					regulator-on-in-suspend;
+					regulator-suspend-microvolt = <3000000>;
+				};
+			};
+
+			vcc3v3_s3: vcc_lan: SWITCH_REG1 {
+				regulator-name = "vcc3v3_s3";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-state-mem {
+					regulator-off-in-suspend;
+				};
+			};
+
+			vcc3v3_s0: SWITCH_REG2 {
+				regulator-name = "vcc3v3_s0";
+				regulator-always-on;
+				regulator-boot-on;
+				regulator-state-mem {
+					regulator-off-in-suspend;
+				};
+			};
+		};
+	};
+
+	vdd_cpu_b: regulator@40 {
+		compatible = "silergy,syr827";
+		reg = <0x40>;
+		fcs,suspend-voltage-selector = <1>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&vsel1_gpio>;
+		regulator-name = "vdd_cpu_b";
+		regulator-min-microvolt = <712500>;
+		regulator-max-microvolt = <1500000>;
+		regulator-ramp-delay = <1000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc3v3_sys>;
+
+		regulator-state-mem {
+			regulator-off-in-suspend;
+		};
+	};
+
+	vdd_gpu: regulator@41 {
+		compatible = "silergy,syr828";
+		reg = <0x41>;
+		fcs,suspend-voltage-selector = <1>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&vsel2_gpio>;
+		regulator-name = "vdd_gpu";
+		regulator-min-microvolt = <712500>;
+		regulator-max-microvolt = <1500000>;
+		regulator-ramp-delay = <1000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc3v3_sys>;
+
+		regulator-state-mem {
+			regulator-off-in-suspend;
+		};
+	};
+};
+
+&i2c1 {
+	i2c-scl-rising-time-ns = <300>;
+	i2c-scl-falling-time-ns = <15>;
+	status = "okay";
+};
+
+&i2c3 {
+	i2c-scl-rising-time-ns = <450>;
+	i2c-scl-falling-time-ns = <15>;
+	status = "okay";
+};
+
+&i2c4 {
+	i2c-scl-rising-time-ns = <600>;
+	i2c-scl-falling-time-ns = <20>;
+	status = "okay";
+
+	fusb1: usb-typec@22 {
+		compatible = "fcs,fusb302";
+		reg = <0x22>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&fusb1_int>;
+		vbus-supply = <&vcc_vbus_typec1>;
+		status = "okay";
+	};
+};
+
+&i2c7 {
+	i2c-scl-rising-time-ns = <600>;
+	i2c-scl-falling-time-ns = <20>;
+	status = "okay";
+
+	fusb0: usb-typec@22 {
+		compatible = "fcs,fusb302";
+		reg = <0x22>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&fusb0_int>;
+		vbus-supply = <&vcc_vbus_typec0>;
+		status = "okay";
+	};
+};
+
+&i2s0 {
+	rockchip,playback-channels = <8>;
+	rockchip,capture-channels = <8>;
+	status = "okay";
+};
+
+&i2s1 {
+	rockchip,playback-channels = <2>;
+	rockchip,capture-channels = <2>;
+	status = "okay";
+};
+
+&i2s2 {
+	status = "okay";
+};
+
+&io_domains {
+	audio-supply = <&vcca1v8_codec>;
+	bt656-supply = <&vcc_3v0>;
+	gpio1830-supply = <&vcc_3v0>;
+	sdmmc-supply = <&vcc_sdio>;
+	status = "okay";
+};
+
+&pmu_io_domains {
+	pmu1830-supply = <&vcc_3v0>;
+	status = "okay";
+};
+
+&pinctrl {
+	buttons {
+		pwr_key_l: pwr-key-l {
+			rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+	};
+
+	lcd-panel {
+		lcd_panel_reset: lcd-panel-reset {
+			rockchip,pins = <4 RK_PD6 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+	};
+
+	leds {
+		diy_led_gpio: diy_led-gpio {
+			rockchip,pins = <0 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+
+		work_led_gpio: work_led-gpio {
+			rockchip,pins = <2 RK_PD3 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+
+		yellow_led_gpio: yellow_led-gpio {
+			rockchip,pins = <0 RK_PA2 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+	};
+
+	pmic {
+		vsel1_gpio: vsel1-gpio {
+			rockchip,pins = <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_down>;
+		};
+
+		vsel2_gpio: vsel2-gpio {
+			rockchip,pins = <1 RK_PB6 RK_FUNC_GPIO &pcfg_pull_down>;
+		};
+	};
+
+	sdio-pwrseq {
+		wifi_enable_h: wifi-enable-h {
+			rockchip,pins = <0 RK_PB2 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+	};
+
+	pmic {
+		pmic_int_l: pmic-int-l {
+			rockchip,pins = <1 RK_PC5 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+	};
+
+	usb2 {
+		vcc5v0_host_en: vcc5v0-host-en {
+			rockchip,pins = <1 RK_PA0 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+
+		vcc_sys_en: vcc-sys-en {
+			rockchip,pins = <2 RK_PA6 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+
+		hub_rst: hub-rst {
+			rockchip,pins = <2 RK_PA4 RK_FUNC_GPIO &pcfg_output_high>;
+		};
+	};
+
+	usb-typec {
+		vcc_vbus_typec1_en: vcc-vbus-typec1-en {
+			rockchip,pins = <1 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+	};
+
+	fusb30x {
+		fusb0_int: fusb0-int {
+			rockchip,pins = <1 RK_PA2 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+
+		fusb1_int: fusb1-int {
+			rockchip,pins = <1 RK_PA1 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+	};
+};
+
+&pwm0 {
+	status = "okay";
+};
+
+&pwm2 {
+	status = "okay";
+};
+
+&saradc {
+	vref-supply = <&vcca1v8_s3>;
+	status = "okay";
+};
+
+&sdmmc {
+	bus-width = <4>;
+	cap-mmc-highspeed;
+	cap-sd-highspeed;
+	cd-gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_LOW>;
+	disable-wp;
+	max-frequency = <150000000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_bus4>;
+	status = "okay";
+};
+
+&sdhci {
+	bus-width = <8>;
+	mmc-hs400-1_8v;
+	mmc-hs400-enhanced-strobe;
+	non-removable;
+	status = "okay";
+};
+
+&tcphy0 {
+	status = "okay";
+};
+
+&tcphy1 {
+	status = "okay";
+};
+
+&tsadc {
+	/* tshut mode 0:CRU 1:GPIO */
+	rockchip,hw-tshut-mode = <1>;
+	/* tshut polarity 0:LOW 1:HIGH */
+	rockchip,hw-tshut-polarity = <1>;
+	status = "okay";
+};
+
+&u2phy0 {
+	status = "okay";
+
+	u2phy0_otg: otg-port {
+		phy-supply = <&vcc_vbus_typec0>;
+		status = "okay";
+	};
+
+	u2phy0_host: host-port {
+		phy-supply = <&vcc5v0_host>;
+		status = "okay";
+	};
+};
+
+&u2phy1 {
+	status = "okay";
+
+	u2phy1_otg: otg-port {
+		phy-supply = <&vcc_vbus_typec1>;
+		status = "okay";
+	};
+
+	u2phy1_host: host-port {
+		phy-supply = <&vcc5v0_host>;
+		status = "okay";
+	};
+};
+
+&uart0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart0_xfer &uart0_cts>;
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&usb_host0_ehci {
+	status = "okay";
+};
+
+&usb_host0_ohci {
+	status = "okay";
+};
+
+&usb_host1_ehci {
+	status = "okay";
+};
+
+&usb_host1_ohci {
+	status = "okay";
+};
+
+&usbdrd3_0 {
+	status = "okay";
+};
+
+&usbdrd_dwc3_0 {
+	status = "okay";
+};
+
+&usbdrd3_1 {
+	status = "okay";
+};
+
+&usbdrd_dwc3_1 {
+	status = "okay";
+	dr_mode = "host";
+};
+
+&vopb {
+	status = "okay";
+};
+
+&vopb_mmu {
+	status = "okay";
+};
+
+&vopl {
+	status = "okay";
+};
+
+&vopl_mmu {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
index 1ae1ebd4efdd..188d9dfc297b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
@@ -486,21 +486,18 @@
 
 	sdio0 {
 		sdio0_bus4: sdio0-bus4 {
-			rockchip,pins =
-				<2 20 RK_FUNC_1 &pcfg_pull_up_20ma>,
-				<2 21 RK_FUNC_1 &pcfg_pull_up_20ma>,
-				<2 22 RK_FUNC_1 &pcfg_pull_up_20ma>,
-				<2 23 RK_FUNC_1 &pcfg_pull_up_20ma>;
+			rockchip,pins = <2 RK_PC4 1 &pcfg_pull_up_20ma>,
+					<2 RK_PC5 1 &pcfg_pull_up_20ma>,
+					<2 RK_PC6 1 &pcfg_pull_up_20ma>,
+					<2 RK_PC7 1 &pcfg_pull_up_20ma>;
 		};
 
 		sdio0_cmd: sdio0-cmd {
-			rockchip,pins =
-				<2 24 RK_FUNC_1 &pcfg_pull_up_20ma>;
+			rockchip,pins = <2 RK_PD0 1 &pcfg_pull_up_20ma>;
 		};
 
 		sdio0_clk: sdio0-clk {
-			rockchip,pins =
-				<2 25 RK_FUNC_1 &pcfg_pull_none_20ma>;
+			rockchip,pins = <2 RK_PD1 1 &pcfg_pull_none_20ma>;
 		};
 	};
 
@@ -532,8 +529,7 @@
 
 	wifi {
 		wifi_enable_h: wifi-enable-h {
-			rockchip,pins =
-				<0 RK_PB2 RK_FUNC_GPIO &pcfg_pull_none>;
+			rockchip,pins = <0 RK_PB2 RK_FUNC_GPIO &pcfg_pull_none>;
 		};
 
 		wifi_host_wake_l: wifi-host-wake-l {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts
index e544deb61d28..7f4b2eba31d4 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts
@@ -81,6 +81,12 @@
 		reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>;
 	};
 
+	sound {
+		compatible = "audio-graph-card";
+		label = "rockchip,rk3399";
+		dais = <&i2s1_p0>;
+	};
+
 	vcc12v_dcin: vcc12v-dcin {
 		compatible = "regulator-fixed";
 		regulator-name = "vcc12v_dcin";
@@ -470,6 +476,20 @@
 	i2c-scl-rising-time-ns = <300>;
 	i2c-scl-falling-time-ns = <15>;
 	status = "okay";
+
+	es8316: codec@11 {
+		compatible = "everest,es8316";
+		reg = <0x11>;
+		clocks = <&cru SCLK_I2S_8CH_OUT>;
+		clock-names = "mclk";
+		#sound-dai-cells = <0>;
+
+		port {
+			es8316_p0_0: endpoint {
+				remote-endpoint = <&i2s1_p0_0>;
+			};
+		};
+	};
 };
 
 &i2c3 {
@@ -505,6 +525,14 @@
 	rockchip,playback-channels = <2>;
 	rockchip,capture-channels = <2>;
 	status = "okay";
+
+	i2s1_p0: port {
+		i2s1_p0_0: endpoint {
+			dai-format = "i2s";
+			mclk-fs = <256>;
+			remote-endpoint = <&es8316_p0_0>;
+		};
+	};
 };
 
 &i2s2 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index cede1ad81be2..e62ea0e2b657 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -520,6 +520,7 @@
 		its: interrupt-controller@fee20000 {
 			compatible = "arm,gic-v3-its";
 			msi-controller;
+			#msi-cells = <1>;
 			reg = <0x0 0xfee20000 0x0 0x20000>;
 		};
 
diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
index 799c75fa7981..efb24579922c 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
@@ -419,6 +419,114 @@
 			reg = <0x00 0x30e00000 0x00 0x1000>;
 			#hwlock-cells = <1>;
 		};
+
+		mailbox0_cluster0: mailbox@31f80000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f80000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
+
+		mailbox0_cluster1: mailbox@31f81000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f81000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
+
+		mailbox0_cluster2: mailbox@31f82000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f82000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
+
+		mailbox0_cluster3: mailbox@31f83000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f83000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
+
+		mailbox0_cluster4: mailbox@31f84000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f84000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
+
+		mailbox0_cluster5: mailbox@31f85000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f85000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
+
+		mailbox0_cluster6: mailbox@31f86000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f86000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
+
+		mailbox0_cluster7: mailbox@31f87000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f87000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
+
+		mailbox0_cluster8: mailbox@31f88000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f88000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
+
+		mailbox0_cluster9: mailbox@31f89000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f89000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
+
+		mailbox0_cluster10: mailbox@31f8a000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f8a000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
+
+		mailbox0_cluster11: mailbox@31f8b000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f8b000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+		};
 	};
 
 	main_gpio0:  main_gpio0@600000 {
diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
index 1102b84f853d..8a85b482ad31 100644
--- a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
@@ -221,6 +221,7 @@
 	bus-width = <8>;
 	non-removable;
 	ti,driver-strength-ohm = <50>;
+	disable-wp;
 };
 
 &dwc3_1 {
@@ -280,3 +281,61 @@
 &pcie1_ep {
 	status = "disabled";
 };
+
+&mailbox0_cluster0 {
+	interrupts = <164 0>;
+
+	mbox_mcu_r5fss0_core0: mbox-mcu-r5fss0-core0 {
+		ti,mbox-tx = <1 0 0>;
+		ti,mbox-rx = <0 0 0>;
+	};
+};
+
+&mailbox0_cluster1 {
+	interrupts = <165 0>;
+
+	mbox_mcu_r5fss0_core1: mbox-mcu-r5fss0-core1 {
+		ti,mbox-tx = <1 0 0>;
+		ti,mbox-rx = <0 0 0>;
+	};
+};
+
+&mailbox0_cluster2 {
+	status = "disabled";
+};
+
+&mailbox0_cluster3 {
+	status = "disabled";
+};
+
+&mailbox0_cluster4 {
+	status = "disabled";
+};
+
+&mailbox0_cluster5 {
+	status = "disabled";
+};
+
+&mailbox0_cluster6 {
+	status = "disabled";
+};
+
+&mailbox0_cluster7 {
+	status = "disabled";
+};
+
+&mailbox0_cluster8 {
+	status = "disabled";
+};
+
+&mailbox0_cluster9 {
+	status = "disabled";
+};
+
+&mailbox0_cluster10 {
+	status = "disabled";
+};
+
+&mailbox0_cluster11 {
+	status = "disabled";
+};
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts
index d2894d55fbbe..2a3cd6174504 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts
@@ -41,6 +41,32 @@
 			J721E_IOPAD(0x0, PIN_INPUT, 7) /* (AC18) EXTINTn.GPIO0_0 */
 		>;
 	};
+
+	main_mmc1_pins_default: main_mmc1_pins_default {
+		pinctrl-single,pins = <
+			J721E_IOPAD(0x254, PIN_INPUT, 0) /* (R29) MMC1_CMD */
+			J721E_IOPAD(0x250, PIN_INPUT, 0) /* (P25) MMC1_CLK */
+			J721E_IOPAD(0x2ac, PIN_INPUT, 0) /* (P25) MMC1_CLKLB */
+			J721E_IOPAD(0x24c, PIN_INPUT, 0) /* (R24) MMC1_DAT0 */
+			J721E_IOPAD(0x248, PIN_INPUT, 0) /* (P24) MMC1_DAT1 */
+			J721E_IOPAD(0x244, PIN_INPUT, 0) /* (R25) MMC1_DAT2 */
+			J721E_IOPAD(0x240, PIN_INPUT, 0) /* (R26) MMC1_DAT3 */
+			J721E_IOPAD(0x258, PIN_INPUT, 0) /* (P23) MMC1_SDCD */
+			J721E_IOPAD(0x25c, PIN_INPUT, 0) /* (R28) MMC1_SDWP */
+		>;
+	};
+
+	main_usbss0_pins_default: main_usbss0_pins_default {
+		pinctrl-single,pins = <
+			J721E_IOPAD(0x290, PIN_OUTPUT, 0) /* (U6) USB0_DRVVBUS */
+		>;
+	};
+
+	main_usbss1_pins_default: main_usbss1_pins_default {
+		pinctrl-single,pins = <
+			J721E_IOPAD(0x214, PIN_OUTPUT, 4) /* (V4) MCAN1_TX.USB1_DRVVBUS */
+		>;
+	};
 };
 
 &wkup_pmx0 {
@@ -117,3 +143,139 @@
 &wkup_gpio1 {
 	status = "disabled";
 };
+
+&mailbox0_cluster0 {
+	interrupts = <214 0>;
+
+	mbox_mcu_r5fss0_core0: mbox-mcu-r5fss0-core0 {
+		ti,mbox-rx = <0 0 0>;
+		ti,mbox-tx = <1 0 0>;
+	};
+
+	mbox_mcu_r5fss0_core1: mbox-mcu-r5fss0-core1 {
+		ti,mbox-rx = <2 0 0>;
+		ti,mbox-tx = <3 0 0>;
+	};
+};
+
+&mailbox0_cluster1 {
+	interrupts = <215 0>;
+
+	mbox_main_r5fss0_core0: mbox-main-r5fss0-core0 {
+		ti,mbox-rx = <0 0 0>;
+		ti,mbox-tx = <1 0 0>;
+	};
+
+	mbox_main_r5fss0_core1: mbox-main-r5fss0-core1 {
+		ti,mbox-rx = <2 0 0>;
+		ti,mbox-tx = <3 0 0>;
+	};
+};
+
+&mailbox0_cluster2 {
+	interrupts = <216 0>;
+
+	mbox_main_r5fss1_core0: mbox-main-r5fss1-core0 {
+		ti,mbox-rx = <0 0 0>;
+		ti,mbox-tx = <1 0 0>;
+	};
+
+	mbox_main_r5fss1_core1: mbox-main-r5fss1-core1 {
+		ti,mbox-rx = <2 0 0>;
+		ti,mbox-tx = <3 0 0>;
+	};
+};
+
+&mailbox0_cluster3 {
+	interrupts = <217 0>;
+
+	mbox_c66_0: mbox-c66-0 {
+		ti,mbox-rx = <0 0 0>;
+		ti,mbox-tx = <1 0 0>;
+	};
+
+	mbox_c66_1: mbox-c66-1 {
+		ti,mbox-rx = <2 0 0>;
+		ti,mbox-tx = <3 0 0>;
+	};
+};
+
+&mailbox0_cluster4 {
+	interrupts = <218 0>;
+
+	mbox_c71_0: mbox-c71-0 {
+		ti,mbox-rx = <0 0 0>;
+		ti,mbox-tx = <1 0 0>;
+	};
+};
+
+&mailbox0_cluster5 {
+	status = "disabled";
+};
+
+&mailbox0_cluster6 {
+	status = "disabled";
+};
+
+&mailbox0_cluster7 {
+	status = "disabled";
+};
+
+&mailbox0_cluster8 {
+	status = "disabled";
+};
+
+&mailbox0_cluster9 {
+	status = "disabled";
+};
+
+&mailbox0_cluster10 {
+	status = "disabled";
+};
+
+&mailbox0_cluster11 {
+	status = "disabled";
+};
+
+&main_sdhci0 {
+	/* eMMC */
+	non-removable;
+	ti,driver-strength-ohm = <50>;
+	disable-wp;
+};
+
+&main_sdhci1 {
+	/* SD/MMC */
+	pinctrl-names = "default";
+	pinctrl-0 = <&main_mmc1_pins_default>;
+	ti,driver-strength-ohm = <50>;
+	disable-wp;
+};
+
+&main_sdhci2 {
+	/* Unused */
+	status = "disabled";
+};
+
+&usbss0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&main_usbss0_pins_default>;
+	ti,usb2-only;
+	ti,vbus-divider;
+};
+
+&usb0 {
+	dr_mode = "otg";
+	maximum-speed = "high-speed";
+};
+
+&usbss1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&main_usbss1_pins_default>;
+	ti,usb2-only;
+};
+
+&usb1 {
+	dr_mode = "host";
+	maximum-speed = "high-speed";
+};
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
index 698ef9a1d5b7..1e4c2b78d66d 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
@@ -95,6 +95,114 @@
 			reg = <0x00 0x30e00000 0x00 0x1000>;
 			#hwlock-cells = <1>;
 		};
+
+		mailbox0_cluster0: mailbox@31f80000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f80000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
+
+		mailbox0_cluster1: mailbox@31f81000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f81000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
+
+		mailbox0_cluster2: mailbox@31f82000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f82000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
+
+		mailbox0_cluster3: mailbox@31f83000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f83000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
+
+		mailbox0_cluster4: mailbox@31f84000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f84000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
+
+		mailbox0_cluster5: mailbox@31f85000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f85000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
+
+		mailbox0_cluster6: mailbox@31f86000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f86000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
+
+		mailbox0_cluster7: mailbox@31f87000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f87000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
+
+		mailbox0_cluster8: mailbox@31f88000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f88000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
+
+		mailbox0_cluster9: mailbox@31f89000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f89000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
+
+		mailbox0_cluster10: mailbox@31f8a000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f8a000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
+
+		mailbox0_cluster11: mailbox@31f8b000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f8b000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&main_navss_intr>;
+		};
 	};
 
 	secure_proxy_main: mailbox@32c00000 {
@@ -378,4 +486,114 @@
 		clocks = <&k3_clks 112 0>;
 		clock-names = "gpio";
 	};
+
+	main_sdhci0: sdhci@4f80000 {
+		compatible = "ti,j721e-sdhci-8bit";
+		reg = <0x0 0x4f80000 0x0 0x1000>, <0x0 0x4f88000 0x0 0x400>;
+		interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
+		power-domains = <&k3_pds 91 TI_SCI_PD_EXCLUSIVE>;
+		clock-names = "clk_xin", "clk_ahb";
+		clocks = <&k3_clks 91 1>, <&k3_clks 91 0>;
+		assigned-clocks = <&k3_clks 91 1>;
+		assigned-clock-parents = <&k3_clks 91 2>;
+		bus-width = <8>;
+		mmc-hs400-1_8v;
+		mmc-ddr-1_8v;
+		ti,otap-del-sel = <0x2>;
+		ti,trm-icp = <0x8>;
+		ti,strobe-sel = <0x77>;
+		dma-coherent;
+	};
+
+	main_sdhci1: sdhci@4fb0000 {
+		compatible = "ti,j721e-sdhci-4bit";
+		reg = <0x0 0x04fb0000 0x0 0x1000>, <0x0 0x4fb8000 0x0 0x400>;
+		interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
+		power-domains = <&k3_pds 92 TI_SCI_PD_EXCLUSIVE>;
+		clock-names = "clk_xin", "clk_ahb";
+		clocks = <&k3_clks 92 0>, <&k3_clks 92 5>;
+		assigned-clocks = <&k3_clks 92 0>;
+		assigned-clock-parents = <&k3_clks 92 1>;
+		ti,otap-del-sel = <0x2>;
+		ti,trm-icp = <0x8>;
+		ti,clkbuf-sel = <0x7>;
+		dma-coherent;
+		no-1-8-v;
+	};
+
+	main_sdhci2: sdhci@4f98000 {
+		compatible = "ti,j721e-sdhci-4bit";
+		reg = <0x0 0x4f98000 0x0 0x1000>, <0x0 0x4f90000 0x0 0x400>;
+		interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
+		power-domains = <&k3_pds 93 TI_SCI_PD_EXCLUSIVE>;
+		clock-names = "clk_xin", "clk_ahb";
+		clocks = <&k3_clks 93 0>, <&k3_clks 93 5>;
+		assigned-clocks = <&k3_clks 93 0>;
+		assigned-clock-parents = <&k3_clks 93 1>;
+		ti,otap-del-sel = <0x2>;
+		ti,trm-icp = <0x8>;
+		ti,clkbuf-sel = <0x7>;
+		dma-coherent;
+		no-1-8-v;
+	};
+
+	usbss0: cdns_usb@4104000 {
+		compatible = "ti,j721e-usb";
+		reg = <0x00 0x4104000 0x00 0x100>;
+		dma-coherent;
+		power-domains = <&k3_pds 288 TI_SCI_PD_EXCLUSIVE>;
+		clocks = <&k3_clks 288 15>, <&k3_clks 288 3>;
+		clock-names = "ref", "lpm";
+		assigned-clocks = <&k3_clks 288 15>;	/* USB2_REFCLK */
+		assigned-clock-parents = <&k3_clks 288 16>; /* HFOSC0 */
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		usb0: usb@6000000 {
+			compatible = "cdns,usb3";
+			reg = <0x00 0x6000000 0x00 0x10000>,
+			      <0x00 0x6010000 0x00 0x10000>,
+			      <0x00 0x6020000 0x00 0x10000>;
+			reg-names = "otg", "xhci", "dev";
+			interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>,	/* irq.0 */
+				     <GIC_SPI 102 IRQ_TYPE_LEVEL_HIGH>,	/* irq.6 */
+				     <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;	/* otgirq.0 */
+			interrupt-names = "host",
+					  "peripheral",
+					  "otg";
+			maximum-speed = "super-speed";
+			dr_mode = "otg";
+		};
+	};
+
+	usbss1: cdns_usb@4114000 {
+		compatible = "ti,j721e-usb";
+		reg = <0x00 0x4114000 0x00 0x100>;
+		dma-coherent;
+		power-domains = <&k3_pds 289 TI_SCI_PD_EXCLUSIVE>;
+		clocks = <&k3_clks 289 15>, <&k3_clks 289 3>;
+		clock-names = "ref", "lpm";
+		assigned-clocks = <&k3_clks 289 15>;	/* USB2_REFCLK */
+		assigned-clock-parents = <&k3_clks 289 16>; /* HFOSC0 */
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		usb1: usb@6400000 {
+			compatible = "cdns,usb3";
+			reg = <0x00 0x6400000 0x00 0x10000>,
+			      <0x00 0x6410000 0x00 0x10000>,
+			      <0x00 0x6420000 0x00 0x10000>;
+			reg-names = "otg", "xhci", "dev";
+			interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,	/* irq.0 */
+				     <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,	/* irq.6 */
+				     <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;	/* otgirq.0 */
+			interrupt-names = "host",
+					  "peripheral",
+					  "otg";
+			maximum-speed = "super-speed";
+			dr_mode = "otg";
+		};
+	};
 };
diff --git a/arch/arm64/boot/dts/ti/k3-j721e.dtsi b/arch/arm64/boot/dts/ti/k3-j721e.dtsi
index 43ea1ba97922..ee5470edb435 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e.dtsi
@@ -127,6 +127,8 @@
 			 <0x00 0x00600000 0x00 0x00600000 0x00 0x00031100>, /* GPIO */
 			 <0x00 0x00900000 0x00 0x00900000 0x00 0x00012000>, /* serdes */
 			 <0x00 0x00A40000 0x00 0x00A40000 0x00 0x00000800>, /* timesync router */
+			 <0x00 0x06000000 0x00 0x06000000 0x00 0x00400000>, /* USBSS0 */
+			 <0x00 0x06400000 0x00 0x06400000 0x00 0x00400000>, /* USBSS1 */
 			 <0x00 0x01000000 0x00 0x01000000 0x00 0x0af02400>, /* Most peripherals */
 			 <0x00 0x30800000 0x00 0x30800000 0x00 0x0bc00000>, /* MAIN NAVSS */
 			 <0x00 0x0d000000 0x00 0x0d000000 0x00 0x01000000>, /* PCIe Core*/
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
index 9aa67340a4d8..3c731e73903a 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
+++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
@@ -115,6 +115,27 @@
 		method = "smc";
 	};
 
+	firmware {
+		zynqmp_firmware: zynqmp-firmware {
+			compatible = "xlnx,zynqmp-firmware";
+			method = "smc";
+
+			nvmem_firmware {
+				compatible = "xlnx,zynqmp-nvmem-fw";
+				#address-cells = <1>;
+				#size-cells = <1>;
+
+				soc_revision: soc_revision@0 {
+					reg = <0x0 0x4>;
+				};
+			};
+
+			zynqmp_pcap: pcap {
+				compatible = "xlnx,zynqmp-pcap-fpga";
+			};
+		};
+	};
+
 	timer {
 		compatible = "arm,armv8-timer";
 		interrupt-parent = <&gic>;
@@ -124,6 +145,14 @@
 			     <1 10 0xf08>;
 	};
 
+	fpga_full: fpga-full {
+		compatible = "fpga-region";
+		fpga-mgr = <&zynqmp_pcap>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+	};
+
 	amba_apu: amba-apu@0 {
 		compatible = "simple-bus";
 		#address-cells = <2>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index c9a867ac32d4..b2f667307f82 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -7,8 +7,6 @@ CONFIG_PREEMPT=y
 CONFIG_IRQ_TIME_ACCOUNTING=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_IKCONFIG=y
@@ -29,6 +27,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
+CONFIG_ARCH_ACTIONS=y
 CONFIG_ARCH_AGILEX=y
 CONFIG_ARCH_SUNXI=y
 CONFIG_ARCH_ALPINE=y
@@ -48,6 +47,7 @@ CONFIG_ARCH_MXC=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_RENESAS=y
 CONFIG_ARCH_ROCKCHIP=y
+CONFIG_ARCH_S32=y
 CONFIG_ARCH_SEATTLE=y
 CONFIG_ARCH_STRATIX10=y
 CONFIG_ARCH_SYNQUACER=y
@@ -71,6 +71,7 @@ CONFIG_COMPAT=y
 CONFIG_RANDOMIZE_BASE=y
 CONFIG_HIBERNATION=y
 CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
+CONFIG_ENERGY_MODEL=y
 CONFIG_ARM_CPUIDLE=y
 CONFIG_ARM_PSCI_CPUIDLE=y
 CONFIG_CPU_FREQ=y
@@ -90,7 +91,7 @@ CONFIG_ARM_TEGRA186_CPUFREQ=y
 CONFIG_ARM_SCPI_PROTOCOL=y
 CONFIG_RASPBERRYPI_FIRMWARE=y
 CONFIG_INTEL_STRATIX10_SERVICE=y
-CONFIG_TI_SCI_PROTOCOL=y
+CONFIG_INTEL_STRATIX10_RSU=m
 CONFIG_EFI_CAPSULE_LOADER=y
 CONFIG_IMX_SCU=y
 CONFIG_IMX_SCU_PD=y
@@ -121,7 +122,6 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_KSM=y
 CONFIG_MEMORY_FAILURE=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CMA=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -205,12 +205,12 @@ CONFIG_HISILICON_LPC=y
 CONFIG_SIMPLE_PM_BUS=y
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
-CONFIG_MTD_M25P80=y
 CONFIG_MTD_RAW_NAND=y
 CONFIG_MTD_NAND_DENALI_DT=y
 CONFIG_MTD_NAND_MARVELL=y
 CONFIG_MTD_NAND_QCOM=y
 CONFIG_MTD_SPI_NOR=y
+CONFIG_SPI_CADENCE_QUADSPI=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_NBD=m
 CONFIG_VIRTIO_BLK=y
@@ -265,18 +265,12 @@ CONFIG_HNS3_ENET=y
 CONFIG_E1000E=y
 CONFIG_IGB=y
 CONFIG_IGBVF=y
-CONFIG_MLX4_EN=m
-CONFIG_MLX4_CORE=m
-CONFIG_MLX4_DEBUG=y
-CONFIG_MLX4_CORE_GEN2=y
-CONFIG_MLX5_CORE=m
-CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_EN_ARFS=y
-CONFIG_MLX5_EN_RXNFC=y
-CONFIG_MLX5_MPFS=y
 CONFIG_MVNETA=y
 CONFIG_MVPP2=y
 CONFIG_SKY2=y
+CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
 CONFIG_QCOM_EMAC=m
 CONFIG_RAVB=y
 CONFIG_SMC91X=y
@@ -285,11 +279,11 @@ CONFIG_SNI_AVE=y
 CONFIG_SNI_NETSEC=y
 CONFIG_STMMAC_ETH=m
 CONFIG_MDIO_BUS_MUX_MMIOREG=y
-CONFIG_AT803X_PHY=m
 CONFIG_MARVELL_PHY=m
 CONFIG_MARVELL_10G_PHY=m
 CONFIG_MESON_GXL_PHY=m
 CONFIG_MICREL_PHY=y
+CONFIG_AT803X_PHY=y
 CONFIG_REALTEK_PHY=m
 CONFIG_ROCKCHIP_PHY=y
 CONFIG_USB_PEGASUS=m
@@ -314,6 +308,7 @@ CONFIG_INPUT_EVDEV=y
 CONFIG_KEYBOARD_ADC=m
 CONFIG_KEYBOARD_GPIO=y
 CONFIG_KEYBOARD_SNVS_PWRKEY=m
+CONFIG_KEYBOARD_IMX_SC_KEY=m
 CONFIG_KEYBOARD_CROS_EC=y
 CONFIG_INPUT_TOUCHSCREEN=y
 CONFIG_TOUCHSCREEN_ATMEL_MXT=m
@@ -352,6 +347,8 @@ CONFIG_SERIAL_XILINX_PS_UART=y
 CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_SERIAL_FSL_LPUART=y
 CONFIG_SERIAL_FSL_LPUART_CONSOLE=y
+CONFIG_SERIAL_FSL_LINFLEXUART=y
+CONFIG_SERIAL_FSL_LINFLEXUART_CONSOLE=y
 CONFIG_SERIAL_MVEBU_UART=y
 CONFIG_SERIAL_DEV_BUS=y
 CONFIG_VIRTIO_CONSOLE=y
@@ -392,8 +389,8 @@ CONFIG_SPI_PL022=y
 CONFIG_SPI_ROCKCHIP=y
 CONFIG_SPI_QUP=y
 CONFIG_SPI_S3C64XX=y
-CONFIG_SPI_SPIDEV=m
 CONFIG_SPI_SUN6I=y
+CONFIG_SPI_SPIDEV=m
 CONFIG_SPMI=y
 CONFIG_PINCTRL_SINGLE=y
 CONFIG_PINCTRL_MAX77620=y
@@ -411,6 +408,7 @@ CONFIG_PINCTRL_QDF2XXX=y
 CONFIG_PINCTRL_QCOM_SPMI_PMIC=y
 CONFIG_PINCTRL_SDM845=y
 CONFIG_PINCTRL_SM8150=y
+CONFIG_GPIO_ALTERA=m
 CONFIG_GPIO_DWAPB=y
 CONFIG_GPIO_MB86S7X=y
 CONFIG_GPIO_PL061=y
@@ -444,6 +442,7 @@ CONFIG_ROCKCHIP_THERMAL=m
 CONFIG_RCAR_THERMAL=y
 CONFIG_RCAR_GEN3_THERMAL=y
 CONFIG_ARMADA_THERMAL=y
+CONFIG_BCM2711_THERMAL=m
 CONFIG_BCM2835_THERMAL=m
 CONFIG_BRCMSTB_THERMAL=m
 CONFIG_EXYNOS_THERMAL=y
@@ -466,8 +465,6 @@ CONFIG_MFD_ALTERA_SYSMGR=y
 CONFIG_MFD_BD9571MWV=y
 CONFIG_MFD_AXP20X_I2C=y
 CONFIG_MFD_AXP20X_RSB=y
-CONFIG_MFD_CROS_EC=y
-CONFIG_MFD_CROS_EC_CHARDEV=m
 CONFIG_MFD_EXYNOS_LPASS=m
 CONFIG_MFD_HI6421_PMIC=y
 CONFIG_MFD_HI655X_PMIC=y
@@ -662,9 +659,9 @@ CONFIG_RTC_DRV_SNVS=m
 CONFIG_RTC_DRV_IMX_SC=m
 CONFIG_RTC_DRV_XGENE=y
 CONFIG_DMADEVICES=y
-CONFIG_FSL_EDMA=y
 CONFIG_DMA_BCM2835=m
 CONFIG_DMA_SUN6I=m
+CONFIG_FSL_EDMA=y
 CONFIG_IMX_SDMA=y
 CONFIG_K3_DMA=y
 CONFIG_MV_XOR=y
@@ -683,6 +680,7 @@ CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_XEN_GNTDEV=y
 CONFIG_XEN_GRANT_DEV_ALLOC=y
+CONFIG_MFD_CROS_EC=y
 CONFIG_CROS_EC_I2C=y
 CONFIG_CROS_EC_SPI=y
 CONFIG_COMMON_CLK_RK808=y
@@ -716,7 +714,6 @@ CONFIG_ARM_MHU=y
 CONFIG_IMX_MBOX=y
 CONFIG_PLATFORM_MHU=y
 CONFIG_BCM2835_MBOX=y
-CONFIG_TI_MESSAGE_MANAGER=y
 CONFIG_QCOM_APCS_IPC=y
 CONFIG_ROCKCHIP_IOMMU=y
 CONFIG_TEGRA_IOMMU_SMMU=y
@@ -732,7 +729,6 @@ CONFIG_RPMSG_QCOM_GLINK_SMEM=m
 CONFIG_RPMSG_QCOM_SMD=y
 CONFIG_RASPBERRYPI_POWER=y
 CONFIG_IMX_SCU_SOC=y
-CONFIG_QCOM_COMMAND_DB=y
 CONFIG_QCOM_GENI_SE=y
 CONFIG_QCOM_GLINK_SSR=m
 CONFIG_QCOM_RPMH=y
@@ -741,9 +737,11 @@ CONFIG_QCOM_SMD_RPM=y
 CONFIG_QCOM_SMP2P=y
 CONFIG_QCOM_SMSM=y
 CONFIG_ARCH_R8A774A1=y
+CONFIG_ARCH_R8A774B1=y
 CONFIG_ARCH_R8A774C0=y
 CONFIG_ARCH_R8A7795=y
 CONFIG_ARCH_R8A7796=y
+CONFIG_ARCH_R8A77961=y
 CONFIG_ARCH_R8A77965=y
 CONFIG_ARCH_R8A77970=y
 CONFIG_ARCH_R8A77980=y
@@ -756,9 +754,7 @@ CONFIG_ARCH_TEGRA_186_SOC=y
 CONFIG_ARCH_TEGRA_194_SOC=y
 CONFIG_ARCH_K3_AM6_SOC=y
 CONFIG_ARCH_K3_J721E_SOC=y
-CONFIG_SOC_TI=y
 CONFIG_TI_SCI_PM_DOMAINS=y
-CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
 CONFIG_EXTCON_USB_GPIO=y
 CONFIG_EXTCON_USBC_CROS_EC=y
 CONFIG_MEMORY=y
@@ -801,15 +797,16 @@ CONFIG_PHY_ROCKCHIP_TYPEC=y
 CONFIG_PHY_UNIPHIER_USB2=y
 CONFIG_PHY_UNIPHIER_USB3=y
 CONFIG_PHY_TEGRA_XUSB=y
+CONFIG_ARM_SMMU_V3_PMU=m
 CONFIG_FSL_IMX8_DDR_PMU=m
 CONFIG_HISI_PMU=y
 CONFIG_QCOM_L2_PMU=y
 CONFIG_QCOM_L3_PMU=y
-CONFIG_NVMEM_SUNXI_SID=y
 CONFIG_NVMEM_IMX_OCOTP=y
 CONFIG_NVMEM_IMX_OCOTP_SCU=y
 CONFIG_QCOM_QFPROM=y
 CONFIG_ROCKCHIP_EFUSE=y
+CONFIG_NVMEM_SUNXI_SID=y
 CONFIG_UNIPHIER_EFUSE=y
 CONFIG_MESON_EFUSE=m
 CONFIG_FPGA=y
@@ -848,7 +845,8 @@ CONFIG_NLS_ISO8859_1=y
 CONFIG_SECURITY=y
 CONFIG_CRYPTO_ECHAINIV=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
-CONFIG_DMA_CMA=y
+CONFIG_CRYPTO_DEV_SUN8I_CE=m
+CONFIG_CRYPTO_DEV_HISI_ZIP=m
 CONFIG_CMA_SIZE_MBYTES=32
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 4922c4451e7c..b8eb0453123d 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -86,7 +86,7 @@ config CRYPTO_AES_ARM64_CE_CCM
 config CRYPTO_AES_ARM64_CE_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AES_ARM64_CE
 	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
@@ -94,7 +94,7 @@ config CRYPTO_AES_ARM64_CE_BLK
 config CRYPTO_AES_ARM64_NEON_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AES_ARM64
 	select CRYPTO_LIB_AES
 	select CRYPTO_SIMD
@@ -102,8 +102,15 @@ config CRYPTO_AES_ARM64_NEON_BLK
 config CRYPTO_CHACHA20_NEON
 	tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_CHACHA20
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_POLY1305_NEON
+	tristate "Poly1305 hash function using scalar or NEON instructions"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
 
 config CRYPTO_NHPOLY1305_NEON
 	tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
@@ -113,7 +120,7 @@ config CRYPTO_NHPOLY1305_NEON
 config CRYPTO_AES_ARM64_BS
 	tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AES_ARM64_NEON_BLK
 	select CRYPTO_AES_ARM64
 	select CRYPTO_LIB_AES
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 0435f2a0610e..d0901e610df3 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -50,6 +50,10 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
 chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
 
+obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o
+poly1305-neon-y := poly1305-core.o poly1305-glue.o
+AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64
+
 obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
 nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
 
@@ -68,11 +72,15 @@ ifdef REGENERATE_ARM64_CRYPTO
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $(<) void $(@)
 
+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl
+	$(call cmd,perlasm)
+
 $(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
 	$(call cmd,perlasm)
 
 $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
 	$(call cmd,perlasm)
+
 endif
 
-clean-files += sha256-core.S sha512-core.S
+clean-files += poly1305-core.S sha256-core.S sha512-core.S
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 9add9bbc48d8..99a028e298ed 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -15,7 +15,7 @@
 	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
 	 *			     u32 *macp, u8 const rk[], u32 rounds);
 	 */
-ENTRY(ce_aes_ccm_auth_data)
+SYM_FUNC_START(ce_aes_ccm_auth_data)
 	ldr	w8, [x3]			/* leftover from prev round? */
 	ld1	{v0.16b}, [x0]			/* load mac */
 	cbz	w8, 1f
@@ -81,13 +81,13 @@ ENTRY(ce_aes_ccm_auth_data)
 	st1	{v0.16b}, [x0]
 10:	str	w8, [x3]
 	ret
-ENDPROC(ce_aes_ccm_auth_data)
+SYM_FUNC_END(ce_aes_ccm_auth_data)
 
 	/*
 	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
 	 * 			 u32 rounds);
 	 */
-ENTRY(ce_aes_ccm_final)
+SYM_FUNC_START(ce_aes_ccm_final)
 	ld1	{v3.4s}, [x2], #16		/* load first round key */
 	ld1	{v0.16b}, [x0]			/* load mac */
 	cmp	w3, #12				/* which key size? */
@@ -121,7 +121,7 @@ ENTRY(ce_aes_ccm_final)
 	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
 	st1	{v0.16b}, [x0]			/* store result */
 	ret
-ENDPROC(ce_aes_ccm_final)
+SYM_FUNC_END(ce_aes_ccm_final)
 
 	.macro	aes_ccm_do_crypt,enc
 	ldr	x8, [x6, #8]			/* load lower ctr */
@@ -212,10 +212,10 @@ CPU_LE(	rev	x8, x8			)
 	 * 			   u8 const rk[], u32 rounds, u8 mac[],
 	 * 			   u8 ctr[]);
 	 */
-ENTRY(ce_aes_ccm_encrypt)
+SYM_FUNC_START(ce_aes_ccm_encrypt)
 	aes_ccm_do_crypt	1
-ENDPROC(ce_aes_ccm_encrypt)
+SYM_FUNC_END(ce_aes_ccm_encrypt)
 
-ENTRY(ce_aes_ccm_decrypt)
+SYM_FUNC_START(ce_aes_ccm_decrypt)
 	aes_ccm_do_crypt	0
-ENDPROC(ce_aes_ccm_decrypt)
+SYM_FUNC_END(ce_aes_ccm_decrypt)
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 541cf9165748..f6d19b0dc893 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -47,14 +47,8 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
 		      unsigned int key_len)
 {
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
-	int ret;
 
-	ret = ce_aes_expandkey(ctx, in_key, key_len);
-	if (!ret)
-		return 0;
-
-	tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-	return -EINVAL;
+	return ce_aes_expandkey(ctx, in_key, key_len);
 }
 
 static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
diff --git a/arch/arm64/crypto/aes-ce-core.S b/arch/arm64/crypto/aes-ce-core.S
index 76a30fe4ba8b..e52e13eb8fdb 100644
--- a/arch/arm64/crypto/aes-ce-core.S
+++ b/arch/arm64/crypto/aes-ce-core.S
@@ -8,7 +8,7 @@
 
 	.arch		armv8-a+crypto
 
-ENTRY(__aes_ce_encrypt)
+SYM_FUNC_START(__aes_ce_encrypt)
 	sub		w3, w3, #2
 	ld1		{v0.16b}, [x2]
 	ld1		{v1.4s}, [x0], #16
@@ -34,9 +34,9 @@ ENTRY(__aes_ce_encrypt)
 	eor		v0.16b, v0.16b, v3.16b
 	st1		{v0.16b}, [x1]
 	ret
-ENDPROC(__aes_ce_encrypt)
+SYM_FUNC_END(__aes_ce_encrypt)
 
-ENTRY(__aes_ce_decrypt)
+SYM_FUNC_START(__aes_ce_decrypt)
 	sub		w3, w3, #2
 	ld1		{v0.16b}, [x2]
 	ld1		{v1.4s}, [x0], #16
@@ -62,23 +62,23 @@ ENTRY(__aes_ce_decrypt)
 	eor		v0.16b, v0.16b, v3.16b
 	st1		{v0.16b}, [x1]
 	ret
-ENDPROC(__aes_ce_decrypt)
+SYM_FUNC_END(__aes_ce_decrypt)
 
 /*
  * __aes_ce_sub() - use the aese instruction to perform the AES sbox
  *                  substitution on each byte in 'input'
  */
-ENTRY(__aes_ce_sub)
+SYM_FUNC_START(__aes_ce_sub)
 	dup		v1.4s, w0
 	movi		v0.16b, #0
 	aese		v0.16b, v1.16b
 	umov		w0, v0.s[0]
 	ret
-ENDPROC(__aes_ce_sub)
+SYM_FUNC_END(__aes_ce_sub)
 
-ENTRY(__aes_ce_invert)
+SYM_FUNC_START(__aes_ce_invert)
 	ld1		{v0.4s}, [x1]
 	aesimc		v1.16b, v0.16b
 	st1		{v1.4s}, [x0]
 	ret
-ENDPROC(__aes_ce_invert)
+SYM_FUNC_END(__aes_ce_invert)
diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c
index 6d085dc56c51..56a5f6f0b0c1 100644
--- a/arch/arm64/crypto/aes-ce-glue.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -143,14 +143,8 @@ int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 		  unsigned int key_len)
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	int ret;
 
-	ret = ce_aes_expandkey(ctx, in_key, key_len);
-	if (!ret)
-		return 0;
-
-	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-	return -EINVAL;
+	return ce_aes_expandkey(ctx, in_key, key_len);
 }
 EXPORT_SYMBOL(ce_aes_setkey);
 
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index c132c49c89a8..45062553467f 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -9,8 +9,8 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-#define AES_ENTRY(func)		ENTRY(ce_ ## func)
-#define AES_ENDPROC(func)	ENDPROC(ce_ ## func)
+#define AES_ENTRY(func)		SYM_FUNC_START(ce_ ## func)
+#define AES_ENDPROC(func)	SYM_FUNC_END(ce_ ## func)
 
 	.arch		armv8-a+crypto
 
diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S
index 423d0aebc570..c9d6955f8404 100644
--- a/arch/arm64/crypto/aes-cipher-core.S
+++ b/arch/arm64/crypto/aes-cipher-core.S
@@ -122,11 +122,11 @@ CPU_BE(	rev		w7, w7		)
 	ret
 	.endm
 
-ENTRY(__aes_arm64_encrypt)
+SYM_FUNC_START(__aes_arm64_encrypt)
 	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
-ENDPROC(__aes_arm64_encrypt)
+SYM_FUNC_END(__aes_arm64_encrypt)
 
 	.align		5
-ENTRY(__aes_arm64_decrypt)
+SYM_FUNC_START(__aes_arm64_decrypt)
 	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
-ENDPROC(__aes_arm64_decrypt)
+SYM_FUNC_END(__aes_arm64_decrypt)
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index aa57dc639f77..ed5409c6abf4 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -132,13 +132,8 @@ static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 			       unsigned int key_len)
 {
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int ret;
-
-	ret = aes_expandkey(ctx, in_key, key_len);
-	if (ret)
-		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 
-	return ret;
+	return aes_expandkey(ctx, in_key, key_len);
 }
 
 static int __maybe_unused xts_set_key(struct crypto_skcipher *tfm,
@@ -155,11 +150,7 @@ static int __maybe_unused xts_set_key(struct crypto_skcipher *tfm,
 	if (!ret)
 		ret = aes_expandkey(&ctx->key2, &in_key[key_len / 2],
 				    key_len / 2);
-	if (!ret)
-		return 0;
-
-	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	return -EINVAL;
+	return ret;
 }
 
 static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm,
@@ -173,19 +164,12 @@ static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm,
 
 	ret = aes_expandkey(&ctx->key1, in_key, key_len);
 	if (ret)
-		goto out;
+		return ret;
 
 	desc->tfm = ctx->hash;
 	crypto_shash_digest(desc, in_key, key_len, digest);
 
-	ret = aes_expandkey(&ctx->key2, digest, sizeof(digest));
-	if (ret)
-		goto out;
-
-	return 0;
-out:
-	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	return -EINVAL;
+	return aes_expandkey(&ctx->key2, digest, sizeof(digest));
 }
 
 static int __maybe_unused ecb_encrypt(struct skcipher_request *req)
@@ -791,13 +775,8 @@ static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
 			 unsigned int key_len)
 {
 	struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
-	int err;
 
-	err = aes_expandkey(&ctx->key, in_key, key_len);
-	if (err)
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-
-	return err;
+	return aes_expandkey(&ctx->key, in_key, key_len);
 }
 
 static void cmac_gf128_mul_by_x(be128 *y, const be128 *x)
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 131618389f1f..8a2faa42b57e 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -22,26 +22,26 @@
 #define ST5(x...) x
 #endif
 
-aes_encrypt_block4x:
+SYM_FUNC_START_LOCAL(aes_encrypt_block4x)
 	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
-ENDPROC(aes_encrypt_block4x)
+SYM_FUNC_END(aes_encrypt_block4x)
 
-aes_decrypt_block4x:
+SYM_FUNC_START_LOCAL(aes_decrypt_block4x)
 	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
-ENDPROC(aes_decrypt_block4x)
+SYM_FUNC_END(aes_decrypt_block4x)
 
 #if MAX_STRIDE == 5
-aes_encrypt_block5x:
+SYM_FUNC_START_LOCAL(aes_encrypt_block5x)
 	encrypt_block5x	v0, v1, v2, v3, v4, w3, x2, x8, w7
 	ret
-ENDPROC(aes_encrypt_block5x)
+SYM_FUNC_END(aes_encrypt_block5x)
 
-aes_decrypt_block5x:
+SYM_FUNC_START_LOCAL(aes_decrypt_block5x)
 	decrypt_block5x	v0, v1, v2, v3, v4, w3, x2, x8, w7
 	ret
-ENDPROC(aes_decrypt_block5x)
+SYM_FUNC_END(aes_decrypt_block5x)
 #endif
 
 	/*
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index 22d9b110cf78..247d34ddaab0 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -8,8 +8,8 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-#define AES_ENTRY(func)		ENTRY(neon_ ## func)
-#define AES_ENDPROC(func)	ENDPROC(neon_ ## func)
+#define AES_ENTRY(func)		SYM_FUNC_START(neon_ ## func)
+#define AES_ENDPROC(func)	SYM_FUNC_END(neon_ ## func)
 
 	xtsmask		.req	v7
 	cbciv		.req	v7
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index 65982039fa36..b357164379f6 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -380,7 +380,7 @@ ISRM0:	.octa		0x0306090c00070a0d01040b0e0205080f
 	/*
 	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
 	 */
-ENTRY(aesbs_convert_key)
+SYM_FUNC_START(aesbs_convert_key)
 	ld1		{v7.4s}, [x1], #16		// load round 0 key
 	ld1		{v17.4s}, [x1], #16		// load round 1 key
 
@@ -425,10 +425,10 @@ ENTRY(aesbs_convert_key)
 	eor		v17.16b, v17.16b, v7.16b
 	str		q17, [x0]
 	ret
-ENDPROC(aesbs_convert_key)
+SYM_FUNC_END(aesbs_convert_key)
 
 	.align		4
-aesbs_encrypt8:
+SYM_FUNC_START_LOCAL(aesbs_encrypt8)
 	ldr		q9, [bskey], #16		// round 0 key
 	ldr		q8, M0SR
 	ldr		q24, SR
@@ -488,10 +488,10 @@ aesbs_encrypt8:
 	eor		v2.16b, v2.16b, v12.16b
 	eor		v5.16b, v5.16b, v12.16b
 	ret
-ENDPROC(aesbs_encrypt8)
+SYM_FUNC_END(aesbs_encrypt8)
 
 	.align		4
-aesbs_decrypt8:
+SYM_FUNC_START_LOCAL(aesbs_decrypt8)
 	lsl		x9, rounds, #7
 	add		bskey, bskey, x9
 
@@ -553,7 +553,7 @@ aesbs_decrypt8:
 	eor		v3.16b, v3.16b, v12.16b
 	eor		v5.16b, v5.16b, v12.16b
 	ret
-ENDPROC(aesbs_decrypt8)
+SYM_FUNC_END(aesbs_decrypt8)
 
 	/*
 	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
@@ -621,21 +621,21 @@ ENDPROC(aesbs_decrypt8)
 	.endm
 
 	.align		4
-ENTRY(aesbs_ecb_encrypt)
+SYM_FUNC_START(aesbs_ecb_encrypt)
 	__ecb_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
-ENDPROC(aesbs_ecb_encrypt)
+SYM_FUNC_END(aesbs_ecb_encrypt)
 
 	.align		4
-ENTRY(aesbs_ecb_decrypt)
+SYM_FUNC_START(aesbs_ecb_decrypt)
 	__ecb_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
-ENDPROC(aesbs_ecb_decrypt)
+SYM_FUNC_END(aesbs_ecb_decrypt)
 
 	/*
 	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 *		     int blocks, u8 iv[])
 	 */
 	.align		4
-ENTRY(aesbs_cbc_decrypt)
+SYM_FUNC_START(aesbs_cbc_decrypt)
 	frame_push	6
 
 	mov		x19, x0
@@ -720,7 +720,7 @@ ENTRY(aesbs_cbc_decrypt)
 
 2:	frame_pop
 	ret
-ENDPROC(aesbs_cbc_decrypt)
+SYM_FUNC_END(aesbs_cbc_decrypt)
 
 	.macro		next_tweak, out, in, const, tmp
 	sshr		\tmp\().2d,  \in\().2d,   #63
@@ -736,7 +736,7 @@ ENDPROC(aesbs_cbc_decrypt)
 	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 *		     int blocks, u8 iv[])
 	 */
-__xts_crypt8:
+SYM_FUNC_START_LOCAL(__xts_crypt8)
 	mov		x6, #1
 	lsl		x6, x6, x23
 	subs		w23, w23, #8
@@ -789,7 +789,7 @@ __xts_crypt8:
 0:	mov		bskey, x21
 	mov		rounds, x22
 	br		x7
-ENDPROC(__xts_crypt8)
+SYM_FUNC_END(__xts_crypt8)
 
 	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
 	frame_push	6, 64
@@ -854,13 +854,13 @@ ENDPROC(__xts_crypt8)
 	ret
 	.endm
 
-ENTRY(aesbs_xts_encrypt)
+SYM_FUNC_START(aesbs_xts_encrypt)
 	__xts_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
-ENDPROC(aesbs_xts_encrypt)
+SYM_FUNC_END(aesbs_xts_encrypt)
 
-ENTRY(aesbs_xts_decrypt)
+SYM_FUNC_START(aesbs_xts_decrypt)
 	__xts_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
-ENDPROC(aesbs_xts_decrypt)
+SYM_FUNC_END(aesbs_xts_decrypt)
 
 	.macro		next_ctr, v
 	mov		\v\().d[1], x8
@@ -874,7 +874,7 @@ ENDPROC(aesbs_xts_decrypt)
 	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
 	 *		     int rounds, int blocks, u8 iv[], u8 final[])
 	 */
-ENTRY(aesbs_ctr_encrypt)
+SYM_FUNC_START(aesbs_ctr_encrypt)
 	frame_push	8
 
 	mov		x19, x0
@@ -1002,4 +1002,4 @@ CPU_LE(	rev		x8, x8		)
 7:	cbz		x25, 8b
 	st1		{v5.16b}, [x25]
 	b		8b
-ENDPROC(aesbs_ctr_encrypt)
+SYM_FUNC_END(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index ea873b8904c4..e3e27349a9fe 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -384,7 +384,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
 			goto xts_tail;
 
 		kernel_neon_end();
-		skcipher_walk_done(&walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	if (err || likely(!tail))
diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S
index 706c4e10e9e2..e90386a7db8e 100644
--- a/arch/arm64/crypto/chacha-neon-core.S
+++ b/arch/arm64/crypto/chacha-neon-core.S
@@ -36,7 +36,7 @@
  *
  * Clobbers: w3, x10, v4, v12
  */
-chacha_permute:
+SYM_FUNC_START_LOCAL(chacha_permute)
 
 	adr_l		x10, ROT8
 	ld1		{v12.4s}, [x10]
@@ -104,9 +104,9 @@ chacha_permute:
 	b.ne		.Ldoubleround
 
 	ret
-ENDPROC(chacha_permute)
+SYM_FUNC_END(chacha_permute)
 
-ENTRY(chacha_block_xor_neon)
+SYM_FUNC_START(chacha_block_xor_neon)
 	// x0: Input state matrix, s
 	// x1: 1 data block output, o
 	// x2: 1 data block input, i
@@ -143,9 +143,9 @@ ENTRY(chacha_block_xor_neon)
 
 	ldp		x29, x30, [sp], #16
 	ret
-ENDPROC(chacha_block_xor_neon)
+SYM_FUNC_END(chacha_block_xor_neon)
 
-ENTRY(hchacha_block_neon)
+SYM_FUNC_START(hchacha_block_neon)
 	// x0: Input state matrix, s
 	// x1: output (8 32-bit words)
 	// w2: nrounds
@@ -163,7 +163,7 @@ ENTRY(hchacha_block_neon)
 
 	ldp		x29, x30, [sp], #16
 	ret
-ENDPROC(hchacha_block_neon)
+SYM_FUNC_END(hchacha_block_neon)
 
 	a0		.req	w12
 	a1		.req	w13
@@ -183,7 +183,7 @@ ENDPROC(hchacha_block_neon)
 	a15		.req	w28
 
 	.align		6
-ENTRY(chacha_4block_xor_neon)
+SYM_FUNC_START(chacha_4block_xor_neon)
 	frame_push	10
 
 	// x0: Input state matrix, s
@@ -845,7 +845,7 @@ CPU_BE(	  rev		a15, a15	)
 	eor		v31.16b, v31.16b, v3.16b
 	st1		{v28.16b-v31.16b}, [x1]
 	b		.Lout
-ENDPROC(chacha_4block_xor_neon)
+SYM_FUNC_END(chacha_4block_xor_neon)
 
 	.section	".rodata", "a", %progbits
 	.align		L1_CACHE_SHIFT
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index 1495d2b18518..c1f9660d104c 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -1,5 +1,5 @@
 /*
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
  * including ChaCha20 (RFC7539)
  *
  * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
@@ -20,9 +20,10 @@
  */
 
 #include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 
@@ -36,6 +37,8 @@ asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
 				       int nrounds, int bytes);
 asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
 
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
 static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
 			  int bytes, int nrounds)
 {
@@ -59,6 +62,37 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
 	}
 }
 
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+	if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
+		hchacha_block_generic(state, stream, nrounds);
+	} else {
+		kernel_neon_begin();
+		hchacha_block_neon(state, stream, nrounds);
+		kernel_neon_end();
+	}
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+		       int nrounds)
+{
+	if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
+	    !crypto_simd_usable())
+		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+	kernel_neon_begin();
+	chacha_doneon(state, dst, src, bytes, nrounds);
+	kernel_neon_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
 static int chacha_neon_stream_xor(struct skcipher_request *req,
 				  const struct chacha_ctx *ctx, const u8 *iv)
 {
@@ -68,7 +102,7 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
 
 	err = skcipher_walk_virt(&walk, req, false);
 
-	crypto_chacha_init(state, ctx, iv);
+	chacha_init_generic(state, ctx->key, iv);
 
 	while (walk.nbytes > 0) {
 		unsigned int nbytes = walk.nbytes;
@@ -76,10 +110,17 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
 		if (nbytes < walk.total)
 			nbytes = rounddown(nbytes, walk.stride);
 
-		kernel_neon_begin();
-		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
-			      nbytes, ctx->nrounds);
-		kernel_neon_end();
+		if (!static_branch_likely(&have_neon) ||
+		    !crypto_simd_usable()) {
+			chacha_crypt_generic(state, walk.dst.virt.addr,
+					     walk.src.virt.addr, nbytes,
+					     ctx->nrounds);
+		} else {
+			kernel_neon_begin();
+			chacha_doneon(state, walk.dst.virt.addr,
+				      walk.src.virt.addr, nbytes, ctx->nrounds);
+			kernel_neon_end();
+		}
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
@@ -91,9 +132,6 @@ static int chacha_neon(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_chacha_crypt(req);
-
 	return chacha_neon_stream_xor(req, ctx, req->iv);
 }
 
@@ -105,14 +143,8 @@ static int xchacha_neon(struct skcipher_request *req)
 	u32 state[16];
 	u8 real_iv[16];
 
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_xchacha_crypt(req);
-
-	crypto_chacha_init(state, ctx, req->iv);
-
-	kernel_neon_begin();
-	hchacha_block_neon(state, subctx.key, ctx->nrounds);
-	kernel_neon_end();
+	chacha_init_generic(state, ctx->key, req->iv);
+	hchacha_block_arch(state, subctx.key, ctx->nrounds);
 	subctx.nrounds = ctx->nrounds;
 
 	memcpy(&real_iv[0], req->iv + 24, 8);
@@ -134,7 +166,7 @@ static struct skcipher_alg algs[] = {
 		.ivsize			= CHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
+		.setkey			= chacha20_setkey,
 		.encrypt		= chacha_neon,
 		.decrypt		= chacha_neon,
 	}, {
@@ -150,7 +182,7 @@ static struct skcipher_alg algs[] = {
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
+		.setkey			= chacha20_setkey,
 		.encrypt		= xchacha_neon,
 		.decrypt		= xchacha_neon,
 	}, {
@@ -166,7 +198,7 @@ static struct skcipher_alg algs[] = {
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha12_setkey,
+		.setkey			= chacha12_setkey,
 		.encrypt		= xchacha_neon,
 		.decrypt		= xchacha_neon,
 	}
@@ -175,14 +207,18 @@ static struct skcipher_alg algs[] = {
 static int __init chacha_simd_mod_init(void)
 {
 	if (!cpu_have_named_feature(ASIMD))
-		return -ENODEV;
+		return 0;
+
+	static_branch_enable(&have_neon);
 
-	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+	return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
+		crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
 }
 
 static void __exit chacha_simd_mod_fini(void)
 {
-	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && cpu_have_named_feature(ASIMD))
+		crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
 }
 
 module_init(chacha_simd_mod_init);
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index e545b42e6a46..5a95c2628fbf 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -131,7 +131,7 @@
 	tbl		bd4.16b, {\bd\().16b}, perm4.16b
 	.endm
 
-__pmull_p8_core:
+SYM_FUNC_START_LOCAL(__pmull_p8_core)
 .L__pmull_p8_core:
 	ext		t4.8b, ad.8b, ad.8b, #1			// A1
 	ext		t5.8b, ad.8b, ad.8b, #2			// A2
@@ -194,7 +194,7 @@ __pmull_p8_core:
 	eor		t4.16b, t4.16b, t5.16b
 	eor		t6.16b, t6.16b, t3.16b
 	ret
-ENDPROC(__pmull_p8_core)
+SYM_FUNC_END(__pmull_p8_core)
 
 	.macro		__pmull_p8, rq, ad, bd, i
 	.ifnc		\bd, fold_consts
@@ -488,9 +488,9 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 //
 // Assumes len >= 16.
 //
-ENTRY(crc_t10dif_pmull_p8)
+SYM_FUNC_START(crc_t10dif_pmull_p8)
 	crc_t10dif_pmull	p8
-ENDPROC(crc_t10dif_pmull_p8)
+SYM_FUNC_END(crc_t10dif_pmull_p8)
 
 	.align		5
 //
@@ -498,9 +498,9 @@ ENDPROC(crc_t10dif_pmull_p8)
 //
 // Assumes len >= 16.
 //
-ENTRY(crc_t10dif_pmull_p64)
+SYM_FUNC_START(crc_t10dif_pmull_p64)
 	crc_t10dif_pmull	p64
-ENDPROC(crc_t10dif_pmull_p64)
+SYM_FUNC_END(crc_t10dif_pmull_p64)
 
 	.section	".rodata", "a"
 	.align		4
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 410e8afcf5a7..084c6a30b03a 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -13,8 +13,8 @@
 	T1		.req	v2
 	T2		.req	v3
 	MASK		.req	v4
-	XL		.req	v5
-	XM		.req	v6
+	XM		.req	v5
+	XL		.req	v6
 	XH		.req	v7
 	IN1		.req	v7
 
@@ -350,28 +350,45 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
 	 *			   struct ghash_key const *k, const char *head)
 	 */
-ENTRY(pmull_ghash_update_p64)
+SYM_FUNC_START(pmull_ghash_update_p64)
 	__pmull_ghash	p64
-ENDPROC(pmull_ghash_update_p64)
+SYM_FUNC_END(pmull_ghash_update_p64)
 
-ENTRY(pmull_ghash_update_p8)
+SYM_FUNC_START(pmull_ghash_update_p8)
 	__pmull_ghash	p8
-ENDPROC(pmull_ghash_update_p8)
-
-	KS0		.req	v12
-	KS1		.req	v13
-	INP0		.req	v14
-	INP1		.req	v15
-
-	.macro		load_round_keys, rounds, rk
-	cmp		\rounds, #12
-	blo		2222f		/* 128 bits */
-	beq		1111f		/* 192 bits */
-	ld1		{v17.4s-v18.4s}, [\rk], #32
-1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
-2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
-	ld1		{v25.4s-v28.4s}, [\rk], #64
-	ld1		{v29.4s-v31.4s}, [\rk]
+SYM_FUNC_END(pmull_ghash_update_p8)
+
+	KS0		.req	v8
+	KS1		.req	v9
+	KS2		.req	v10
+	KS3		.req	v11
+
+	INP0		.req	v21
+	INP1		.req	v22
+	INP2		.req	v23
+	INP3		.req	v24
+
+	K0		.req	v25
+	K1		.req	v26
+	K2		.req	v27
+	K3		.req	v28
+	K4		.req	v12
+	K5		.req	v13
+	K6		.req	v4
+	K7		.req	v5
+	K8		.req	v14
+	K9		.req	v15
+	KK		.req	v29
+	KL		.req	v30
+	KM		.req	v31
+
+	.macro		load_round_keys, rounds, rk, tmp
+	add		\tmp, \rk, #64
+	ld1		{K0.4s-K3.4s}, [\rk]
+	ld1		{K4.4s-K5.4s}, [\tmp]
+	add		\tmp, \rk, \rounds, lsl #4
+	sub		\tmp, \tmp, #32
+	ld1		{KK.4s-KM.4s}, [\tmp]
 	.endm
 
 	.macro		enc_round, state, key
@@ -379,197 +396,367 @@ ENDPROC(pmull_ghash_update_p8)
 	aesmc		\state\().16b, \state\().16b
 	.endm
 
-	.macro		enc_block, state, rounds
-	cmp		\rounds, #12
-	b.lo		2222f		/* 128 bits */
-	b.eq		1111f		/* 192 bits */
-	enc_round	\state, v17
-	enc_round	\state, v18
-1111:	enc_round	\state, v19
-	enc_round	\state, v20
-2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+	.macro		enc_qround, s0, s1, s2, s3, key
+	enc_round	\s0, \key
+	enc_round	\s1, \key
+	enc_round	\s2, \key
+	enc_round	\s3, \key
+	.endm
+
+	.macro		enc_block, state, rounds, rk, tmp
+	add		\tmp, \rk, #96
+	ld1		{K6.4s-K7.4s}, [\tmp], #32
+	.irp		key, K0, K1, K2, K3, K4 K5
 	enc_round	\state, \key
 	.endr
-	aese		\state\().16b, v30.16b
-	eor		\state\().16b, \state\().16b, v31.16b
+
+	tbnz		\rounds, #2, .Lnot128_\@
+.Lout256_\@:
+	enc_round	\state, K6
+	enc_round	\state, K7
+
+.Lout192_\@:
+	enc_round	\state, KK
+	aese		\state\().16b, KL.16b
+	eor		\state\().16b, \state\().16b, KM.16b
+
+	.subsection	1
+.Lnot128_\@:
+	ld1		{K8.4s-K9.4s}, [\tmp], #32
+	enc_round	\state, K6
+	enc_round	\state, K7
+	ld1		{K6.4s-K7.4s}, [\tmp]
+	enc_round	\state, K8
+	enc_round	\state, K9
+	tbz		\rounds, #1, .Lout192_\@
+	b		.Lout256_\@
+	.previous
 	.endm
 
+	.align		6
 	.macro		pmull_gcm_do_crypt, enc
-	ld1		{SHASH.2d}, [x4], #16
-	ld1		{HH.2d}, [x4]
-	ld1		{XL.2d}, [x1]
-	ldr		x8, [x5, #8]			// load lower counter
+	stp		x29, x30, [sp, #-32]!
+	mov		x29, sp
+	str		x19, [sp, #24]
+
+	load_round_keys	x7, x6, x8
+
+	ld1		{SHASH.2d}, [x3], #16
+	ld1		{HH.2d-HH4.2d}, [x3]
 
-	movi		MASK.16b, #0xe1
 	trn1		SHASH2.2d, SHASH.2d, HH.2d
 	trn2		T1.2d, SHASH.2d, HH.2d
-CPU_LE(	rev		x8, x8		)
-	shl		MASK.2d, MASK.2d, #57
 	eor		SHASH2.16b, SHASH2.16b, T1.16b
 
-	.if		\enc == 1
-	ldr		x10, [sp]
-	ld1		{KS0.16b-KS1.16b}, [x10]
-	.endif
+	trn1		HH34.2d, HH3.2d, HH4.2d
+	trn2		T1.2d, HH3.2d, HH4.2d
+	eor		HH34.16b, HH34.16b, T1.16b
 
-	cbnz		x6, 4f
+	ld1		{XL.2d}, [x4]
 
-0:	ld1		{INP0.16b-INP1.16b}, [x3], #32
+	cbz		x0, 3f				// tag only?
 
-	rev		x9, x8
-	add		x11, x8, #1
-	add		x8, x8, #2
+	ldr		w8, [x5, #12]			// load lower counter
+CPU_LE(	rev		w8, w8		)
 
-	.if		\enc == 1
-	eor		INP0.16b, INP0.16b, KS0.16b	// encrypt input
-	eor		INP1.16b, INP1.16b, KS1.16b
+0:	mov		w9, #4				// max blocks per round
+	add		x10, x0, #0xf
+	lsr		x10, x10, #4			// remaining blocks
+
+	subs		x0, x0, #64
+	csel		w9, w10, w9, mi
+	add		w8, w8, w9
+
+	bmi		1f
+	ld1		{INP0.16b-INP3.16b}, [x2], #64
+	.subsection	1
+	/*
+	 * Populate the four input registers right to left with up to 63 bytes
+	 * of data, using overlapping loads to avoid branches.
+	 *
+	 *                INP0     INP1     INP2     INP3
+	 *  1 byte     |        |        |        |x       |
+	 * 16 bytes    |        |        |        |xxxxxxxx|
+	 * 17 bytes    |        |        |xxxxxxxx|x       |
+	 * 47 bytes    |        |xxxxxxxx|xxxxxxxx|xxxxxxx |
+	 * etc etc
+	 *
+	 * Note that this code may read up to 15 bytes before the start of
+	 * the input. It is up to the calling code to ensure this is safe if
+	 * this happens in the first iteration of the loop (i.e., when the
+	 * input size is < 16 bytes)
+	 */
+1:	mov		x15, #16
+	ands		x19, x0, #0xf
+	csel		x19, x19, x15, ne
+	adr_l		x17, .Lpermute_table + 16
+
+	sub		x11, x15, x19
+	add		x12, x17, x11
+	sub		x17, x17, x11
+	ld1		{T1.16b}, [x12]
+	sub		x10, x1, x11
+	sub		x11, x2, x11
+
+	cmp		x0, #-16
+	csel		x14, x15, xzr, gt
+	cmp		x0, #-32
+	csel		x15, x15, xzr, gt
+	cmp		x0, #-48
+	csel		x16, x19, xzr, gt
+	csel		x1, x1, x10, gt
+	csel		x2, x2, x11, gt
+
+	ld1		{INP0.16b}, [x2], x14
+	ld1		{INP1.16b}, [x2], x15
+	ld1		{INP2.16b}, [x2], x16
+	ld1		{INP3.16b}, [x2]
+	tbl		INP3.16b, {INP3.16b}, T1.16b
+	b		2f
+	.previous
+
+2:	.if		\enc == 0
+	bl		pmull_gcm_ghash_4x
 	.endif
 
-	ld1		{KS0.8b}, [x5]			// load upper counter
-	rev		x11, x11
-	sub		w0, w0, #2
-	mov		KS1.8b, KS0.8b
-	ins		KS0.d[1], x9			// set lower counter
-	ins		KS1.d[1], x11
+	bl		pmull_gcm_enc_4x
 
-	rev64		T1.16b, INP1.16b
+	tbnz		x0, #63, 6f
+	st1		{INP0.16b-INP3.16b}, [x1], #64
+	.if		\enc == 1
+	bl		pmull_gcm_ghash_4x
+	.endif
+	bne		0b
 
-	cmp		w7, #12
-	b.ge		2f				// AES-192/256?
+3:	ldp		x19, x10, [sp, #24]
+	cbz		x10, 5f				// output tag?
 
-1:	enc_round	KS0, v21
-	ext		IN1.16b, T1.16b, T1.16b, #8
+	ld1		{INP3.16b}, [x10]		// load lengths[]
+	mov		w9, #1
+	bl		pmull_gcm_ghash_4x
 
-	enc_round	KS1, v21
-	pmull2		XH2.1q, SHASH.2d, IN1.2d	// a1 * b1
+	mov		w11, #(0x1 << 24)		// BE '1U'
+	ld1		{KS0.16b}, [x5]
+	mov		KS0.s[3], w11
 
-	enc_round	KS0, v22
-	eor		T1.16b, T1.16b, IN1.16b
+	enc_block	KS0, x7, x6, x12
 
-	enc_round	KS1, v22
-	pmull		XL2.1q, SHASH.1d, IN1.1d	// a0 * b0
+	ext		XL.16b, XL.16b, XL.16b, #8
+	rev64		XL.16b, XL.16b
+	eor		XL.16b, XL.16b, KS0.16b
+	st1		{XL.16b}, [x10]			// store tag
 
-	enc_round	KS0, v23
-	pmull		XM2.1q, SHASH2.1d, T1.1d	// (a1 + a0)(b1 + b0)
+4:	ldp		x29, x30, [sp], #32
+	ret
 
-	enc_round	KS1, v23
-	rev64		T1.16b, INP0.16b
-	ext		T2.16b, XL.16b, XL.16b, #8
+5:
+CPU_LE(	rev		w8, w8		)
+	str		w8, [x5, #12]			// store lower counter
+	st1		{XL.2d}, [x4]
+	b		4b
+
+6:	ld1		{T1.16b-T2.16b}, [x17], #32	// permute vectors
+	sub		x17, x17, x19, lsl #1
+
+	cmp		w9, #1
+	beq		7f
+	.subsection	1
+7:	ld1		{INP2.16b}, [x1]
+	tbx		INP2.16b, {INP3.16b}, T1.16b
+	mov		INP3.16b, INP2.16b
+	b		8f
+	.previous
+
+	st1		{INP0.16b}, [x1], x14
+	st1		{INP1.16b}, [x1], x15
+	st1		{INP2.16b}, [x1], x16
+	tbl		INP3.16b, {INP3.16b}, T1.16b
+	tbx		INP3.16b, {INP2.16b}, T2.16b
+8:	st1		{INP3.16b}, [x1]
 
-	enc_round	KS0, v24
-	ext		IN1.16b, T1.16b, T1.16b, #8
-	eor		T1.16b, T1.16b, T2.16b
+	.if		\enc == 1
+	ld1		{T1.16b}, [x17]
+	tbl		INP3.16b, {INP3.16b}, T1.16b	// clear non-data bits
+	bl		pmull_gcm_ghash_4x
+	.endif
+	b		3b
+	.endm
 
-	enc_round	KS1, v24
-	eor		XL.16b, XL.16b, IN1.16b
+	/*
+	 * void pmull_gcm_encrypt(int blocks, u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u64 dg[], u8 ctr[],
+	 *			  int rounds, u8 tag)
+	 */
+ENTRY(pmull_gcm_encrypt)
+	pmull_gcm_do_crypt	1
+ENDPROC(pmull_gcm_encrypt)
 
-	enc_round	KS0, v25
-	eor		T1.16b, T1.16b, XL.16b
+	/*
+	 * void pmull_gcm_decrypt(int blocks, u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u64 dg[], u8 ctr[],
+	 *			  int rounds, u8 tag)
+	 */
+ENTRY(pmull_gcm_decrypt)
+	pmull_gcm_do_crypt	0
+ENDPROC(pmull_gcm_decrypt)
 
-	enc_round	KS1, v25
-	pmull2		XH.1q, HH.2d, XL.2d		// a1 * b1
+pmull_gcm_ghash_4x:
+	movi		MASK.16b, #0xe1
+	shl		MASK.2d, MASK.2d, #57
 
-	enc_round	KS0, v26
-	pmull		XL.1q, HH.1d, XL.1d		// a0 * b0
+	rev64		T1.16b, INP0.16b
+	rev64		T2.16b, INP1.16b
+	rev64		TT3.16b, INP2.16b
+	rev64		TT4.16b, INP3.16b
 
-	enc_round	KS1, v26
-	pmull2		XM.1q, SHASH2.2d, T1.2d		// (a1 + a0)(b1 + b0)
+	ext		XL.16b, XL.16b, XL.16b, #8
 
-	enc_round	KS0, v27
-	eor		XL.16b, XL.16b, XL2.16b
-	eor		XH.16b, XH.16b, XH2.16b
+	tbz		w9, #2, 0f			// <4 blocks?
+	.subsection	1
+0:	movi		XH2.16b, #0
+	movi		XM2.16b, #0
+	movi		XL2.16b, #0
 
-	enc_round	KS1, v27
-	eor		XM.16b, XM.16b, XM2.16b
-	ext		T1.16b, XL.16b, XH.16b, #8
+	tbz		w9, #0, 1f			// 2 blocks?
+	tbz		w9, #1, 2f			// 1 block?
 
-	enc_round	KS0, v28
-	eor		T2.16b, XL.16b, XH.16b
-	eor		XM.16b, XM.16b, T1.16b
+	eor		T2.16b, T2.16b, XL.16b
+	ext		T1.16b, T2.16b, T2.16b, #8
+	b		.Lgh3
 
-	enc_round	KS1, v28
-	eor		XM.16b, XM.16b, T2.16b
+1:	eor		TT3.16b, TT3.16b, XL.16b
+	ext		T2.16b, TT3.16b, TT3.16b, #8
+	b		.Lgh2
 
-	enc_round	KS0, v29
-	pmull		T2.1q, XL.1d, MASK.1d
+2:	eor		TT4.16b, TT4.16b, XL.16b
+	ext		IN1.16b, TT4.16b, TT4.16b, #8
+	b		.Lgh1
+	.previous
 
-	enc_round	KS1, v29
-	mov		XH.d[0], XM.d[1]
-	mov		XM.d[1], XL.d[0]
+	eor		T1.16b, T1.16b, XL.16b
+	ext		IN1.16b, T1.16b, T1.16b, #8
 
-	aese		KS0.16b, v30.16b
-	eor		XL.16b, XM.16b, T2.16b
+	pmull2		XH2.1q, HH4.2d, IN1.2d		// a1 * b1
+	eor		T1.16b, T1.16b, IN1.16b
+	pmull		XL2.1q, HH4.1d, IN1.1d		// a0 * b0
+	pmull2		XM2.1q, HH34.2d, T1.2d		// (a1 + a0)(b1 + b0)
 
-	aese		KS1.16b, v30.16b
-	ext		T2.16b, XL.16b, XL.16b, #8
+	ext		T1.16b, T2.16b, T2.16b, #8
+.Lgh3:	eor		T2.16b, T2.16b, T1.16b
+	pmull2		XH.1q, HH3.2d, T1.2d		// a1 * b1
+	pmull		XL.1q, HH3.1d, T1.1d		// a0 * b0
+	pmull		XM.1q, HH34.1d, T2.1d		// (a1 + a0)(b1 + b0)
 
-	eor		KS0.16b, KS0.16b, v31.16b
-	pmull		XL.1q, XL.1d, MASK.1d
-	eor		T2.16b, T2.16b, XH.16b
+	eor		XH2.16b, XH2.16b, XH.16b
+	eor		XL2.16b, XL2.16b, XL.16b
+	eor		XM2.16b, XM2.16b, XM.16b
 
-	eor		KS1.16b, KS1.16b, v31.16b
-	eor		XL.16b, XL.16b, T2.16b
+	ext		T2.16b, TT3.16b, TT3.16b, #8
+.Lgh2:	eor		TT3.16b, TT3.16b, T2.16b
+	pmull2		XH.1q, HH.2d, T2.2d		// a1 * b1
+	pmull		XL.1q, HH.1d, T2.1d		// a0 * b0
+	pmull2		XM.1q, SHASH2.2d, TT3.2d	// (a1 + a0)(b1 + b0)
 
-	.if		\enc == 0
-	eor		INP0.16b, INP0.16b, KS0.16b
-	eor		INP1.16b, INP1.16b, KS1.16b
-	.endif
+	eor		XH2.16b, XH2.16b, XH.16b
+	eor		XL2.16b, XL2.16b, XL.16b
+	eor		XM2.16b, XM2.16b, XM.16b
 
-	st1		{INP0.16b-INP1.16b}, [x2], #32
+	ext		IN1.16b, TT4.16b, TT4.16b, #8
+.Lgh1:	eor		TT4.16b, TT4.16b, IN1.16b
+	pmull		XL.1q, SHASH.1d, IN1.1d		// a0 * b0
+	pmull2		XH.1q, SHASH.2d, IN1.2d		// a1 * b1
+	pmull		XM.1q, SHASH2.1d, TT4.1d	// (a1 + a0)(b1 + b0)
 
-	cbnz		w0, 0b
+	eor		XH.16b, XH.16b, XH2.16b
+	eor		XL.16b, XL.16b, XL2.16b
+	eor		XM.16b, XM.16b, XM2.16b
 
-CPU_LE(	rev		x8, x8		)
-	st1		{XL.2d}, [x1]
-	str		x8, [x5, #8]			// store lower counter
+	eor		T2.16b, XL.16b, XH.16b
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		XM.16b, XM.16b, T2.16b
 
-	.if		\enc == 1
-	st1		{KS0.16b-KS1.16b}, [x10]
-	.endif
+	__pmull_reduce_p64
+
+	eor		T2.16b, T2.16b, XH.16b
+	eor		XL.16b, XL.16b, T2.16b
 
 	ret
+ENDPROC(pmull_gcm_ghash_4x)
+
+pmull_gcm_enc_4x:
+	ld1		{KS0.16b}, [x5]			// load upper counter
+	sub		w10, w8, #4
+	sub		w11, w8, #3
+	sub		w12, w8, #2
+	sub		w13, w8, #1
+	rev		w10, w10
+	rev		w11, w11
+	rev		w12, w12
+	rev		w13, w13
+	mov		KS1.16b, KS0.16b
+	mov		KS2.16b, KS0.16b
+	mov		KS3.16b, KS0.16b
+	ins		KS0.s[3], w10			// set lower counter
+	ins		KS1.s[3], w11
+	ins		KS2.s[3], w12
+	ins		KS3.s[3], w13
+
+	add		x10, x6, #96			// round key pointer
+	ld1		{K6.4s-K7.4s}, [x10], #32
+	.irp		key, K0, K1, K2, K3, K4, K5
+	enc_qround	KS0, KS1, KS2, KS3, \key
+	.endr
 
-2:	b.eq		3f				// AES-192?
-	enc_round	KS0, v17
-	enc_round	KS1, v17
-	enc_round	KS0, v18
-	enc_round	KS1, v18
-3:	enc_round	KS0, v19
-	enc_round	KS1, v19
-	enc_round	KS0, v20
-	enc_round	KS1, v20
-	b		1b
+	tbnz		x7, #2, .Lnot128
+	.subsection	1
+.Lnot128:
+	ld1		{K8.4s-K9.4s}, [x10], #32
+	.irp		key, K6, K7
+	enc_qround	KS0, KS1, KS2, KS3, \key
+	.endr
+	ld1		{K6.4s-K7.4s}, [x10]
+	.irp		key, K8, K9
+	enc_qround	KS0, KS1, KS2, KS3, \key
+	.endr
+	tbz		x7, #1, .Lout192
+	b		.Lout256
+	.previous
 
-4:	load_round_keys	w7, x6
-	b		0b
-	.endm
+.Lout256:
+	.irp		key, K6, K7
+	enc_qround	KS0, KS1, KS2, KS3, \key
+	.endr
 
-	/*
-	 * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
-	 *			  struct ghash_key const *k, u8 ctr[],
-	 *			  int rounds, u8 ks[])
-	 */
-ENTRY(pmull_gcm_encrypt)
-	pmull_gcm_do_crypt	1
-ENDPROC(pmull_gcm_encrypt)
+.Lout192:
+	enc_qround	KS0, KS1, KS2, KS3, KK
 
-	/*
-	 * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
-	 *			  struct ghash_key const *k, u8 ctr[],
-	 *			  int rounds)
-	 */
-ENTRY(pmull_gcm_decrypt)
-	pmull_gcm_do_crypt	0
-ENDPROC(pmull_gcm_decrypt)
+	aese		KS0.16b, KL.16b
+	aese		KS1.16b, KL.16b
+	aese		KS2.16b, KL.16b
+	aese		KS3.16b, KL.16b
+
+	eor		KS0.16b, KS0.16b, KM.16b
+	eor		KS1.16b, KS1.16b, KM.16b
+	eor		KS2.16b, KS2.16b, KM.16b
+	eor		KS3.16b, KS3.16b, KM.16b
+
+	eor		INP0.16b, INP0.16b, KS0.16b
+	eor		INP1.16b, INP1.16b, KS1.16b
+	eor		INP2.16b, INP2.16b, KS2.16b
+	eor		INP3.16b, INP3.16b, KS3.16b
 
-	/*
-	 * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
-	 */
-ENTRY(pmull_gcm_encrypt_block)
-	cbz		x2, 0f
-	load_round_keys	w3, x2
-0:	ld1		{v0.16b}, [x1]
-	enc_block	v0, w3
-	st1		{v0.16b}, [x0]
 	ret
-ENDPROC(pmull_gcm_encrypt_block)
+ENDPROC(pmull_gcm_enc_4x)
+
+	.section	".rodata", "a"
+	.align		6
+.Lpermute_table:
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
+	.previous
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 70b1469783f9..22831d3b7f62 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -58,17 +58,15 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
 				      struct ghash_key const *k,
 				      const char *head);
 
-asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
-				  const u8 src[], struct ghash_key const *k,
+asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
+				  struct ghash_key const *k, u64 dg[],
 				  u8 ctr[], u32 const rk[], int rounds,
-				  u8 ks[]);
+				  u8 tag[]);
 
-asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
-				  const u8 src[], struct ghash_key const *k,
-				  u8 ctr[], u32 const rk[], int rounds);
-
-asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
-					u32 const rk[], int rounds);
+asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
+				  struct ghash_key const *k, u64 dg[],
+				  u8 ctr[], u32 const rk[], int rounds,
+				  u8 tag[]);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -85,7 +83,7 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
 						struct ghash_key const *k,
 						const char *head))
 {
-	if (likely(crypto_simd_usable())) {
+	if (likely(crypto_simd_usable() && simd_update)) {
 		kernel_neon_begin();
 		simd_update(blocks, dg, src, key, head);
 		kernel_neon_end();
@@ -250,10 +248,8 @@ static int ghash_setkey(struct crypto_shash *tfm,
 {
 	struct ghash_key *key = crypto_shash_ctx(tfm);
 
-	if (keylen != GHASH_BLOCK_SIZE) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != GHASH_BLOCK_SIZE)
 		return -EINVAL;
-	}
 
 	return __ghash_setkey(key, inkey, keylen);
 }
@@ -261,7 +257,7 @@ static int ghash_setkey(struct crypto_shash *tfm,
 static struct shash_alg ghash_alg[] = {{
 	.base.cra_name		= "ghash",
 	.base.cra_driver_name	= "ghash-neon",
-	.base.cra_priority	= 100,
+	.base.cra_priority	= 150,
 	.base.cra_blocksize	= GHASH_BLOCK_SIZE,
 	.base.cra_ctxsize	= sizeof(struct ghash_key),
 	.base.cra_module	= THIS_MODULE,
@@ -308,10 +304,8 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
 	int ret;
 
 	ret = aes_expandkey(&ctx->aes_key, inkey, keylen);
-	if (ret) {
-		tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	if (ret)
 		return -EINVAL;
-	}
 
 	aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){});
 
@@ -398,136 +392,112 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
 	}
 }
 
-static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
-		      u64 dg[], u8 tag[], int cryptlen)
-{
-	u8 mac[AES_BLOCK_SIZE];
-	u128 lengths;
-
-	lengths.a = cpu_to_be64(req->assoclen * 8);
-	lengths.b = cpu_to_be64(cryptlen * 8);
-
-	ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL,
-			pmull_ghash_update_p64);
-
-	put_unaligned_be64(dg[1], mac);
-	put_unaligned_be64(dg[0], mac + 8);
-
-	crypto_xor(tag, mac, AES_BLOCK_SIZE);
-}
-
 static int gcm_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	int nrounds = num_rounds(&ctx->aes_key);
 	struct skcipher_walk walk;
+	u8 buf[AES_BLOCK_SIZE];
 	u8 iv[AES_BLOCK_SIZE];
-	u8 ks[2 * AES_BLOCK_SIZE];
-	u8 tag[AES_BLOCK_SIZE];
 	u64 dg[2] = {};
-	int nrounds = num_rounds(&ctx->aes_key);
+	u128 lengths;
+	u8 *tag;
 	int err;
 
+	lengths.a = cpu_to_be64(req->assoclen * 8);
+	lengths.b = cpu_to_be64(req->cryptlen * 8);
+
 	if (req->assoclen)
 		gcm_calculate_auth_mac(req, dg);
 
 	memcpy(iv, req->iv, GCM_IV_SIZE);
-	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+	put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
 	err = skcipher_walk_aead_encrypt(&walk, req, false);
 
-	if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) {
-		u32 const *rk = NULL;
-
-		kernel_neon_begin();
-		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
-		put_unaligned_be32(2, iv + GCM_IV_SIZE);
-		pmull_gcm_encrypt_block(ks, iv, NULL, nrounds);
-		put_unaligned_be32(3, iv + GCM_IV_SIZE);
-		pmull_gcm_encrypt_block(ks + AES_BLOCK_SIZE, iv, NULL, nrounds);
-		put_unaligned_be32(4, iv + GCM_IV_SIZE);
-
+	if (likely(crypto_simd_usable())) {
 		do {
-			int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+			const u8 *src = walk.src.virt.addr;
+			u8 *dst = walk.dst.virt.addr;
+			int nbytes = walk.nbytes;
 
-			if (rk)
-				kernel_neon_begin();
+			tag = (u8 *)&lengths;
+
+			if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
+				src = dst = memcpy(buf + sizeof(buf) - nbytes,
+						   src, nbytes);
+			} else if (nbytes < walk.total) {
+				nbytes &= ~(AES_BLOCK_SIZE - 1);
+				tag = NULL;
+			}
 
-			pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
-					  walk.src.virt.addr, &ctx->ghash_key,
-					  iv, rk, nrounds, ks);
+			kernel_neon_begin();
+			pmull_gcm_encrypt(nbytes, dst, src, &ctx->ghash_key, dg,
+					  iv, ctx->aes_key.key_enc, nrounds,
+					  tag);
 			kernel_neon_end();
 
-			err = skcipher_walk_done(&walk,
-					walk.nbytes % (2 * AES_BLOCK_SIZE));
+			if (unlikely(!nbytes))
+				break;
 
-			rk = ctx->aes_key.key_enc;
-		} while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
-	} else {
-		aes_encrypt(&ctx->aes_key, tag, iv);
-		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+			if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+				memcpy(walk.dst.virt.addr,
+				       buf + sizeof(buf) - nbytes, nbytes);
 
-		while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
-			const int blocks =
-				walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+			err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+		} while (walk.nbytes);
+	} else {
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			const u8 *src = walk.src.virt.addr;
 			u8 *dst = walk.dst.virt.addr;
-			u8 *src = walk.src.virt.addr;
 			int remaining = blocks;
 
 			do {
-				aes_encrypt(&ctx->aes_key, ks, iv);
-				crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
+				aes_encrypt(&ctx->aes_key, buf, iv);
+				crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
 				crypto_inc(iv, AES_BLOCK_SIZE);
 
 				dst += AES_BLOCK_SIZE;
 				src += AES_BLOCK_SIZE;
 			} while (--remaining > 0);
 
-			ghash_do_update(blocks, dg,
-					walk.dst.virt.addr, &ctx->ghash_key,
-					NULL, pmull_ghash_update_p64);
+			ghash_do_update(blocks, dg, walk.dst.virt.addr,
+					&ctx->ghash_key, NULL, NULL);
 
 			err = skcipher_walk_done(&walk,
-						 walk.nbytes % (2 * AES_BLOCK_SIZE));
+						 walk.nbytes % AES_BLOCK_SIZE);
 		}
-		if (walk.nbytes) {
-			aes_encrypt(&ctx->aes_key, ks, iv);
-			if (walk.nbytes > AES_BLOCK_SIZE) {
-				crypto_inc(iv, AES_BLOCK_SIZE);
-				aes_encrypt(&ctx->aes_key, ks + AES_BLOCK_SIZE, iv);
-			}
-		}
-	}
 
-	/* handle the tail */
-	if (walk.nbytes) {
-		u8 buf[GHASH_BLOCK_SIZE];
-		unsigned int nbytes = walk.nbytes;
-		u8 *dst = walk.dst.virt.addr;
-		u8 *head = NULL;
+		/* handle the tail */
+		if (walk.nbytes) {
+			aes_encrypt(&ctx->aes_key, buf, iv);
 
-		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
-			       walk.nbytes);
+			crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
+				       buf, walk.nbytes);
 
-		if (walk.nbytes > GHASH_BLOCK_SIZE) {
-			head = dst;
-			dst += GHASH_BLOCK_SIZE;
-			nbytes %= GHASH_BLOCK_SIZE;
+			memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+			memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
 		}
 
-		memcpy(buf, dst, nbytes);
-		memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
-		ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
-				pmull_ghash_update_p64);
+		tag = (u8 *)&lengths;
+		ghash_do_update(1, dg, tag, &ctx->ghash_key,
+				walk.nbytes ? buf : NULL, NULL);
+
+		if (walk.nbytes)
+			err = skcipher_walk_done(&walk, 0);
 
-		err = skcipher_walk_done(&walk, 0);
+		put_unaligned_be64(dg[1], tag);
+		put_unaligned_be64(dg[0], tag + 8);
+		put_unaligned_be32(1, iv + GCM_IV_SIZE);
+		aes_encrypt(&ctx->aes_key, iv, iv);
+		crypto_xor(tag, iv, AES_BLOCK_SIZE);
 	}
 
 	if (err)
 		return err;
 
-	gcm_final(req, ctx, dg, tag, req->cryptlen);
-
 	/* copy authtag to end of dst */
 	scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
 				 crypto_aead_authsize(aead), 1);
@@ -540,75 +510,65 @@ static int gcm_decrypt(struct aead_request *req)
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
 	unsigned int authsize = crypto_aead_authsize(aead);
+	int nrounds = num_rounds(&ctx->aes_key);
 	struct skcipher_walk walk;
-	u8 iv[2 * AES_BLOCK_SIZE];
-	u8 tag[AES_BLOCK_SIZE];
-	u8 buf[2 * GHASH_BLOCK_SIZE];
+	u8 buf[AES_BLOCK_SIZE];
+	u8 iv[AES_BLOCK_SIZE];
 	u64 dg[2] = {};
-	int nrounds = num_rounds(&ctx->aes_key);
+	u128 lengths;
+	u8 *tag;
 	int err;
 
+	lengths.a = cpu_to_be64(req->assoclen * 8);
+	lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8);
+
 	if (req->assoclen)
 		gcm_calculate_auth_mac(req, dg);
 
 	memcpy(iv, req->iv, GCM_IV_SIZE);
-	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+	put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
 	err = skcipher_walk_aead_decrypt(&walk, req, false);
 
-	if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) {
-		u32 const *rk = NULL;
-
-		kernel_neon_begin();
-		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
-		put_unaligned_be32(2, iv + GCM_IV_SIZE);
-
+	if (likely(crypto_simd_usable())) {
 		do {
-			int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
-			int rem = walk.total - blocks * AES_BLOCK_SIZE;
-
-			if (rk)
-				kernel_neon_begin();
-
-			pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
-					  walk.src.virt.addr, &ctx->ghash_key,
-					  iv, rk, nrounds);
-
-			/* check if this is the final iteration of the loop */
-			if (rem < (2 * AES_BLOCK_SIZE)) {
-				u8 *iv2 = iv + AES_BLOCK_SIZE;
-
-				if (rem > AES_BLOCK_SIZE) {
-					memcpy(iv2, iv, AES_BLOCK_SIZE);
-					crypto_inc(iv2, AES_BLOCK_SIZE);
-				}
+			const u8 *src = walk.src.virt.addr;
+			u8 *dst = walk.dst.virt.addr;
+			int nbytes = walk.nbytes;
 
-				pmull_gcm_encrypt_block(iv, iv, NULL, nrounds);
+			tag = (u8 *)&lengths;
 
-				if (rem > AES_BLOCK_SIZE)
-					pmull_gcm_encrypt_block(iv2, iv2, NULL,
-								nrounds);
+			if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
+				src = dst = memcpy(buf + sizeof(buf) - nbytes,
+						   src, nbytes);
+			} else if (nbytes < walk.total) {
+				nbytes &= ~(AES_BLOCK_SIZE - 1);
+				tag = NULL;
 			}
 
+			kernel_neon_begin();
+			pmull_gcm_decrypt(nbytes, dst, src, &ctx->ghash_key, dg,
+					  iv, ctx->aes_key.key_enc, nrounds,
+					  tag);
 			kernel_neon_end();
 
-			err = skcipher_walk_done(&walk,
-					walk.nbytes % (2 * AES_BLOCK_SIZE));
+			if (unlikely(!nbytes))
+				break;
 
-			rk = ctx->aes_key.key_enc;
-		} while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
-	} else {
-		aes_encrypt(&ctx->aes_key, tag, iv);
-		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+			if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+				memcpy(walk.dst.virt.addr,
+				       buf + sizeof(buf) - nbytes, nbytes);
 
-		while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
-			int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+			err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+		} while (walk.nbytes);
+	} else {
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			const u8 *src = walk.src.virt.addr;
 			u8 *dst = walk.dst.virt.addr;
-			u8 *src = walk.src.virt.addr;
 
 			ghash_do_update(blocks, dg, walk.src.virt.addr,
-					&ctx->ghash_key, NULL,
-					pmull_ghash_update_p64);
+					&ctx->ghash_key, NULL, NULL);
 
 			do {
 				aes_encrypt(&ctx->aes_key, buf, iv);
@@ -620,49 +580,38 @@ static int gcm_decrypt(struct aead_request *req)
 			} while (--blocks > 0);
 
 			err = skcipher_walk_done(&walk,
-						 walk.nbytes % (2 * AES_BLOCK_SIZE));
+						 walk.nbytes % AES_BLOCK_SIZE);
 		}
-		if (walk.nbytes) {
-			if (walk.nbytes > AES_BLOCK_SIZE) {
-				u8 *iv2 = iv + AES_BLOCK_SIZE;
-
-				memcpy(iv2, iv, AES_BLOCK_SIZE);
-				crypto_inc(iv2, AES_BLOCK_SIZE);
 
-				aes_encrypt(&ctx->aes_key, iv2, iv2);
-			}
-			aes_encrypt(&ctx->aes_key, iv, iv);
+		/* handle the tail */
+		if (walk.nbytes) {
+			memcpy(buf, walk.src.virt.addr, walk.nbytes);
+			memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
 		}
-	}
 
-	/* handle the tail */
-	if (walk.nbytes) {
-		const u8 *src = walk.src.virt.addr;
-		const u8 *head = NULL;
-		unsigned int nbytes = walk.nbytes;
+		tag = (u8 *)&lengths;
+		ghash_do_update(1, dg, tag, &ctx->ghash_key,
+				walk.nbytes ? buf : NULL, NULL);
 
-		if (walk.nbytes > GHASH_BLOCK_SIZE) {
-			head = src;
-			src += GHASH_BLOCK_SIZE;
-			nbytes %= GHASH_BLOCK_SIZE;
-		}
+		if (walk.nbytes) {
+			aes_encrypt(&ctx->aes_key, buf, iv);
 
-		memcpy(buf, src, nbytes);
-		memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
-		ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
-				pmull_ghash_update_p64);
+			crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
+				       buf, walk.nbytes);
 
-		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
-			       walk.nbytes);
+			err = skcipher_walk_done(&walk, 0);
+		}
 
-		err = skcipher_walk_done(&walk, 0);
+		put_unaligned_be64(dg[1], tag);
+		put_unaligned_be64(dg[0], tag + 8);
+		put_unaligned_be32(1, iv + GCM_IV_SIZE);
+		aes_encrypt(&ctx->aes_key, iv, iv);
+		crypto_xor(tag, iv, AES_BLOCK_SIZE);
 	}
 
 	if (err)
 		return err;
 
-	gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
-
 	/* compare calculated auth tag with the stored one */
 	scatterwalk_map_and_copy(buf, req->src,
 				 req->assoclen + req->cryptlen - authsize,
@@ -675,7 +624,7 @@ static int gcm_decrypt(struct aead_request *req)
 
 static struct aead_alg gcm_aes_alg = {
 	.ivsize			= GCM_IV_SIZE,
-	.chunksize		= 2 * AES_BLOCK_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
 	.maxauthsize		= AES_BLOCK_SIZE,
 	.setkey			= gcm_setkey,
 	.setauthsize		= gcm_setauthsize,
diff --git a/arch/arm64/crypto/nh-neon-core.S b/arch/arm64/crypto/nh-neon-core.S
index e05570c38de7..51c0a534ef87 100644
--- a/arch/arm64/crypto/nh-neon-core.S
+++ b/arch/arm64/crypto/nh-neon-core.S
@@ -62,7 +62,7 @@
  *
  * It's guaranteed that message_len % 16 == 0.
  */
-ENTRY(nh_neon)
+SYM_FUNC_START(nh_neon)
 
 	ld1		{K0.4s,K1.4s}, [KEY], #32
 	  movi		PASS0_SUMS.2d, #0
@@ -100,4 +100,4 @@ ENTRY(nh_neon)
 	addp		T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
 	st1		{T0.16b,T1.16b}, [HASH]
 	ret
-ENDPROC(nh_neon)
+SYM_FUNC_END(nh_neon)
diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl
new file mode 100644
index 000000000000..6e5576d19af8
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-armv8.pl
@@ -0,0 +1,913 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
+# project.
+# ====================================================================
+#
+# This module implements Poly1305 hash for ARMv8.
+#
+# June 2015
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone.
+#
+#		IALU/gcc-4.9	NEON
+#
+# Apple A7	1.86/+5%	0.72
+# Cortex-A53	2.69/+58%	1.47
+# Cortex-A57	2.70/+7%	1.14
+# Denver	1.64/+50%	1.18(*)
+# X-Gene	2.13/+68%	2.27
+# Mongoose	1.77/+75%	1.12
+# Kryo		2.70/+55%	1.13
+# ThunderX2	1.17/+95%	1.36
+#
+# (*)	estimate based on resources availability is less than 1.0,
+#	i.e. measured result is worse than expected, presumably binary
+#	translator is not almighty;
+
+$flavour=shift;
+$output=shift;
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}
+
+my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
+my ($mac,$nonce)=($inp,$len);
+
+my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern	OPENSSL_armcap_P
+#endif
+
+.text
+
+// forward "declarations" are required for Apple
+.globl	poly1305_blocks
+.globl	poly1305_emit
+
+.globl	poly1305_init
+.type	poly1305_init,%function
+.align	5
+poly1305_init:
+	cmp	$inp,xzr
+	stp	xzr,xzr,[$ctx]		// zero hash value
+	stp	xzr,xzr,[$ctx,#16]	// [along with is_base2_26]
+
+	csel	x0,xzr,x0,eq
+	b.eq	.Lno_key
+
+#ifndef	__KERNEL__
+	adrp	x17,OPENSSL_armcap_P
+	ldr	w17,[x17,#:lo12:OPENSSL_armcap_P]
+#endif
+
+	ldp	$r0,$r1,[$inp]		// load key
+	mov	$s1,#0xfffffffc0fffffff
+	movk	$s1,#0x0fff,lsl#48
+#ifdef	__AARCH64EB__
+	rev	$r0,$r0			// flip bytes
+	rev	$r1,$r1
+#endif
+	and	$r0,$r0,$s1		// &=0ffffffc0fffffff
+	and	$s1,$s1,#-4
+	and	$r1,$r1,$s1		// &=0ffffffc0ffffffc
+	mov	w#$s1,#-1
+	stp	$r0,$r1,[$ctx,#32]	// save key value
+	str	w#$s1,[$ctx,#48]	// impossible key power value
+
+#ifndef	__KERNEL__
+	tst	w17,#ARMV7_NEON
+
+	adr	$d0,.Lpoly1305_blocks
+	adr	$r0,.Lpoly1305_blocks_neon
+	adr	$d1,.Lpoly1305_emit
+
+	csel	$d0,$d0,$r0,eq
+
+# ifdef	__ILP32__
+	stp	w#$d0,w#$d1,[$len]
+# else
+	stp	$d0,$d1,[$len]
+# endif
+#endif
+	mov	x0,#1
+.Lno_key:
+	ret
+.size	poly1305_init,.-poly1305_init
+
+.type	poly1305_blocks,%function
+.align	5
+poly1305_blocks:
+.Lpoly1305_blocks:
+	ands	$len,$len,#-16
+	b.eq	.Lno_data
+
+	ldp	$h0,$h1,[$ctx]		// load hash value
+	ldp	$h2,x17,[$ctx,#16]	// [along with is_base2_26]
+	ldp	$r0,$r1,[$ctx,#32]	// load key value
+
+#ifdef	__AARCH64EB__
+	lsr	$d0,$h0,#32
+	mov	w#$d1,w#$h0
+	lsr	$d2,$h1,#32
+	mov	w15,w#$h1
+	lsr	x16,$h2,#32
+#else
+	mov	w#$d0,w#$h0
+	lsr	$d1,$h0,#32
+	mov	w#$d2,w#$h1
+	lsr	x15,$h1,#32
+	mov	w16,w#$h2
+#endif
+
+	add	$d0,$d0,$d1,lsl#26	// base 2^26 -> base 2^64
+	lsr	$d1,$d2,#12
+	adds	$d0,$d0,$d2,lsl#52
+	add	$d1,$d1,x15,lsl#14
+	adc	$d1,$d1,xzr
+	lsr	$d2,x16,#24
+	adds	$d1,$d1,x16,lsl#40
+	adc	$d2,$d2,xzr
+
+	cmp	x17,#0			// is_base2_26?
+	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
+	csel	$h0,$h0,$d0,eq		// choose between radixes
+	csel	$h1,$h1,$d1,eq
+	csel	$h2,$h2,$d2,eq
+
+.Loop:
+	ldp	$t0,$t1,[$inp],#16	// load input
+	sub	$len,$len,#16
+#ifdef	__AARCH64EB__
+	rev	$t0,$t0
+	rev	$t1,$t1
+#endif
+	adds	$h0,$h0,$t0		// accumulate input
+	adcs	$h1,$h1,$t1
+
+	mul	$d0,$h0,$r0		// h0*r0
+	adc	$h2,$h2,$padbit
+	umulh	$d1,$h0,$r0
+
+	mul	$t0,$h1,$s1		// h1*5*r1
+	umulh	$t1,$h1,$s1
+
+	adds	$d0,$d0,$t0
+	mul	$t0,$h0,$r1		// h0*r1
+	adc	$d1,$d1,$t1
+	umulh	$d2,$h0,$r1
+
+	adds	$d1,$d1,$t0
+	mul	$t0,$h1,$r0		// h1*r0
+	adc	$d2,$d2,xzr
+	umulh	$t1,$h1,$r0
+
+	adds	$d1,$d1,$t0
+	mul	$t0,$h2,$s1		// h2*5*r1
+	adc	$d2,$d2,$t1
+	mul	$t1,$h2,$r0		// h2*r0
+
+	adds	$d1,$d1,$t0
+	adc	$d2,$d2,$t1
+
+	and	$t0,$d2,#-4		// final reduction
+	and	$h2,$d2,#3
+	add	$t0,$t0,$d2,lsr#2
+	adds	$h0,$d0,$t0
+	adcs	$h1,$d1,xzr
+	adc	$h2,$h2,xzr
+
+	cbnz	$len,.Loop
+
+	stp	$h0,$h1,[$ctx]		// store hash value
+	stp	$h2,xzr,[$ctx,#16]	// [and clear is_base2_26]
+
+.Lno_data:
+	ret
+.size	poly1305_blocks,.-poly1305_blocks
+
+.type	poly1305_emit,%function
+.align	5
+poly1305_emit:
+.Lpoly1305_emit:
+	ldp	$h0,$h1,[$ctx]		// load hash base 2^64
+	ldp	$h2,$r0,[$ctx,#16]	// [along with is_base2_26]
+	ldp	$t0,$t1,[$nonce]	// load nonce
+
+#ifdef	__AARCH64EB__
+	lsr	$d0,$h0,#32
+	mov	w#$d1,w#$h0
+	lsr	$d2,$h1,#32
+	mov	w15,w#$h1
+	lsr	x16,$h2,#32
+#else
+	mov	w#$d0,w#$h0
+	lsr	$d1,$h0,#32
+	mov	w#$d2,w#$h1
+	lsr	x15,$h1,#32
+	mov	w16,w#$h2
+#endif
+
+	add	$d0,$d0,$d1,lsl#26	// base 2^26 -> base 2^64
+	lsr	$d1,$d2,#12
+	adds	$d0,$d0,$d2,lsl#52
+	add	$d1,$d1,x15,lsl#14
+	adc	$d1,$d1,xzr
+	lsr	$d2,x16,#24
+	adds	$d1,$d1,x16,lsl#40
+	adc	$d2,$d2,xzr
+
+	cmp	$r0,#0			// is_base2_26?
+	csel	$h0,$h0,$d0,eq		// choose between radixes
+	csel	$h1,$h1,$d1,eq
+	csel	$h2,$h2,$d2,eq
+
+	adds	$d0,$h0,#5		// compare to modulus
+	adcs	$d1,$h1,xzr
+	adc	$d2,$h2,xzr
+
+	tst	$d2,#-4			// see if it's carried/borrowed
+
+	csel	$h0,$h0,$d0,eq
+	csel	$h1,$h1,$d1,eq
+
+#ifdef	__AARCH64EB__
+	ror	$t0,$t0,#32		// flip nonce words
+	ror	$t1,$t1,#32
+#endif
+	adds	$h0,$h0,$t0		// accumulate nonce
+	adc	$h1,$h1,$t1
+#ifdef	__AARCH64EB__
+	rev	$h0,$h0			// flip output bytes
+	rev	$h1,$h1
+#endif
+	stp	$h0,$h1,[$mac]		// write result
+
+	ret
+.size	poly1305_emit,.-poly1305_emit
+___
+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
+my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
+my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
+my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
+my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
+my ($T0,$T1,$MASK) = map("v$_",(29..31));
+
+my ($in2,$zeros)=("x16","x17");
+my $is_base2_26 = $zeros;		# borrow
+
+$code.=<<___;
+.type	poly1305_mult,%function
+.align	5
+poly1305_mult:
+	mul	$d0,$h0,$r0		// h0*r0
+	umulh	$d1,$h0,$r0
+
+	mul	$t0,$h1,$s1		// h1*5*r1
+	umulh	$t1,$h1,$s1
+
+	adds	$d0,$d0,$t0
+	mul	$t0,$h0,$r1		// h0*r1
+	adc	$d1,$d1,$t1
+	umulh	$d2,$h0,$r1
+
+	adds	$d1,$d1,$t0
+	mul	$t0,$h1,$r0		// h1*r0
+	adc	$d2,$d2,xzr
+	umulh	$t1,$h1,$r0
+
+	adds	$d1,$d1,$t0
+	mul	$t0,$h2,$s1		// h2*5*r1
+	adc	$d2,$d2,$t1
+	mul	$t1,$h2,$r0		// h2*r0
+
+	adds	$d1,$d1,$t0
+	adc	$d2,$d2,$t1
+
+	and	$t0,$d2,#-4		// final reduction
+	and	$h2,$d2,#3
+	add	$t0,$t0,$d2,lsr#2
+	adds	$h0,$d0,$t0
+	adcs	$h1,$d1,xzr
+	adc	$h2,$h2,xzr
+
+	ret
+.size	poly1305_mult,.-poly1305_mult
+
+.type	poly1305_splat,%function
+.align	4
+poly1305_splat:
+	and	x12,$h0,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x13,$h0,#26,#26
+	extr	x14,$h1,$h0,#52
+	and	x14,x14,#0x03ffffff
+	ubfx	x15,$h1,#14,#26
+	extr	x16,$h2,$h1,#40
+
+	str	w12,[$ctx,#16*0]	// r0
+	add	w12,w13,w13,lsl#2	// r1*5
+	str	w13,[$ctx,#16*1]	// r1
+	add	w13,w14,w14,lsl#2	// r2*5
+	str	w12,[$ctx,#16*2]	// s1
+	str	w14,[$ctx,#16*3]	// r2
+	add	w14,w15,w15,lsl#2	// r3*5
+	str	w13,[$ctx,#16*4]	// s2
+	str	w15,[$ctx,#16*5]	// r3
+	add	w15,w16,w16,lsl#2	// r4*5
+	str	w14,[$ctx,#16*6]	// s3
+	str	w16,[$ctx,#16*7]	// r4
+	str	w15,[$ctx,#16*8]	// s4
+
+	ret
+.size	poly1305_splat,.-poly1305_splat
+
+#ifdef	__KERNEL__
+.globl	poly1305_blocks_neon
+#endif
+.type	poly1305_blocks_neon,%function
+.align	5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+	ldr	$is_base2_26,[$ctx,#24]
+	cmp	$len,#128
+	b.lo	.Lpoly1305_blocks
+
+	.inst	0xd503233f		// paciasp
+	stp	x29,x30,[sp,#-80]!
+	add	x29,sp,#0
+
+	stp	d8,d9,[sp,#16]		// meet ABI requirements
+	stp	d10,d11,[sp,#32]
+	stp	d12,d13,[sp,#48]
+	stp	d14,d15,[sp,#64]
+
+	cbz	$is_base2_26,.Lbase2_64_neon
+
+	ldp	w10,w11,[$ctx]		// load hash value base 2^26
+	ldp	w12,w13,[$ctx,#8]
+	ldr	w14,[$ctx,#16]
+
+	tst	$len,#31
+	b.eq	.Leven_neon
+
+	ldp	$r0,$r1,[$ctx,#32]	// load key value
+
+	add	$h0,x10,x11,lsl#26	// base 2^26 -> base 2^64
+	lsr	$h1,x12,#12
+	adds	$h0,$h0,x12,lsl#52
+	add	$h1,$h1,x13,lsl#14
+	adc	$h1,$h1,xzr
+	lsr	$h2,x14,#24
+	adds	$h1,$h1,x14,lsl#40
+	adc	$d2,$h2,xzr		// can be partially reduced...
+
+	ldp	$d0,$d1,[$inp],#16	// load input
+	sub	$len,$len,#16
+	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
+
+#ifdef	__AARCH64EB__
+	rev	$d0,$d0
+	rev	$d1,$d1
+#endif
+	adds	$h0,$h0,$d0		// accumulate input
+	adcs	$h1,$h1,$d1
+	adc	$h2,$h2,$padbit
+
+	bl	poly1305_mult
+
+	and	x10,$h0,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x11,$h0,#26,#26
+	extr	x12,$h1,$h0,#52
+	and	x12,x12,#0x03ffffff
+	ubfx	x13,$h1,#14,#26
+	extr	x14,$h2,$h1,#40
+
+	b	.Leven_neon
+
+.align	4
+.Lbase2_64_neon:
+	ldp	$r0,$r1,[$ctx,#32]	// load key value
+
+	ldp	$h0,$h1,[$ctx]		// load hash value base 2^64
+	ldr	$h2,[$ctx,#16]
+
+	tst	$len,#31
+	b.eq	.Linit_neon
+
+	ldp	$d0,$d1,[$inp],#16	// load input
+	sub	$len,$len,#16
+	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
+#ifdef	__AARCH64EB__
+	rev	$d0,$d0
+	rev	$d1,$d1
+#endif
+	adds	$h0,$h0,$d0		// accumulate input
+	adcs	$h1,$h1,$d1
+	adc	$h2,$h2,$padbit
+
+	bl	poly1305_mult
+
+.Linit_neon:
+	ldr	w17,[$ctx,#48]		// first table element
+	and	x10,$h0,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x11,$h0,#26,#26
+	extr	x12,$h1,$h0,#52
+	and	x12,x12,#0x03ffffff
+	ubfx	x13,$h1,#14,#26
+	extr	x14,$h2,$h1,#40
+
+	cmp	w17,#-1			// is value impossible?
+	b.ne	.Leven_neon
+
+	fmov	${H0},x10
+	fmov	${H1},x11
+	fmov	${H2},x12
+	fmov	${H3},x13
+	fmov	${H4},x14
+
+	////////////////////////////////// initialize r^n table
+	mov	$h0,$r0			// r^1
+	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
+	mov	$h1,$r1
+	mov	$h2,xzr
+	add	$ctx,$ctx,#48+12
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^2
+	sub	$ctx,$ctx,#4
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^3
+	sub	$ctx,$ctx,#4
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^4
+	sub	$ctx,$ctx,#4
+	bl	poly1305_splat
+	sub	$ctx,$ctx,#48		// restore original $ctx
+	b	.Ldo_neon
+
+.align	4
+.Leven_neon:
+	fmov	${H0},x10
+	fmov	${H1},x11
+	fmov	${H2},x12
+	fmov	${H3},x13
+	fmov	${H4},x14
+
+.Ldo_neon:
+	ldp	x8,x12,[$inp,#32]	// inp[2:3]
+	subs	$len,$len,#64
+	ldp	x9,x13,[$inp,#48]
+	add	$in2,$inp,#96
+	adr	$zeros,.Lzeros
+
+	lsl	$padbit,$padbit,#24
+	add	x15,$ctx,#48
+
+#ifdef	__AARCH64EB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	and	x5,x9,#0x03ffffff
+	ubfx	x6,x8,#26,#26
+	ubfx	x7,x9,#26,#26
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	extr	x8,x12,x8,#52
+	extr	x9,x13,x9,#52
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	fmov	$IN23_0,x4
+	and	x8,x8,#0x03ffffff
+	and	x9,x9,#0x03ffffff
+	ubfx	x10,x12,#14,#26
+	ubfx	x11,x13,#14,#26
+	add	x12,$padbit,x12,lsr#40
+	add	x13,$padbit,x13,lsr#40
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	fmov	$IN23_1,x6
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	fmov	$IN23_2,x8
+	fmov	$IN23_3,x10
+	fmov	$IN23_4,x12
+
+	ldp	x8,x12,[$inp],#16	// inp[0:1]
+	ldp	x9,x13,[$inp],#48
+
+	ld1	{$R0,$R1,$S1,$R2},[x15],#64
+	ld1	{$S2,$R3,$S3,$R4},[x15],#64
+	ld1	{$S4},[x15]
+
+#ifdef	__AARCH64EB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	and	x5,x9,#0x03ffffff
+	ubfx	x6,x8,#26,#26
+	ubfx	x7,x9,#26,#26
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	extr	x8,x12,x8,#52
+	extr	x9,x13,x9,#52
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	fmov	$IN01_0,x4
+	and	x8,x8,#0x03ffffff
+	and	x9,x9,#0x03ffffff
+	ubfx	x10,x12,#14,#26
+	ubfx	x11,x13,#14,#26
+	add	x12,$padbit,x12,lsr#40
+	add	x13,$padbit,x13,lsr#40
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	fmov	$IN01_1,x6
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	movi	$MASK.2d,#-1
+	fmov	$IN01_2,x8
+	fmov	$IN01_3,x10
+	fmov	$IN01_4,x12
+	ushr	$MASK.2d,$MASK.2d,#38
+
+	b.ls	.Lskip_loop
+
+.align	4
+.Loop_neon:
+	////////////////////////////////////////////////////////////////
+	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+	//   \___________________/
+	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+	//   \___________________/ \____________________/
+	//
+	// Note that we start with inp[2:3]*r^2. This is because it
+	// doesn't depend on reduction in previous iteration.
+	////////////////////////////////////////////////////////////////
+	// d4 = h0*r4 + h1*r3   + h2*r2   + h3*r1   + h4*r0
+	// d3 = h0*r3 + h1*r2   + h2*r1   + h3*r0   + h4*5*r4
+	// d2 = h0*r2 + h1*r1   + h2*r0   + h3*5*r4 + h4*5*r3
+	// d1 = h0*r1 + h1*r0   + h2*5*r4 + h3*5*r3 + h4*5*r2
+	// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
+
+	subs	$len,$len,#64
+	umull	$ACC4,$IN23_0,${R4}[2]
+	csel	$in2,$zeros,$in2,lo
+	umull	$ACC3,$IN23_0,${R3}[2]
+	umull	$ACC2,$IN23_0,${R2}[2]
+	 ldp	x8,x12,[$in2],#16	// inp[2:3] (or zero)
+	umull	$ACC1,$IN23_0,${R1}[2]
+	 ldp	x9,x13,[$in2],#48
+	umull	$ACC0,$IN23_0,${R0}[2]
+#ifdef	__AARCH64EB__
+	 rev	x8,x8
+	 rev	x12,x12
+	 rev	x9,x9
+	 rev	x13,x13
+#endif
+
+	umlal	$ACC4,$IN23_1,${R3}[2]
+	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	umlal	$ACC3,$IN23_1,${R2}[2]
+	 and	x5,x9,#0x03ffffff
+	umlal	$ACC2,$IN23_1,${R1}[2]
+	 ubfx	x6,x8,#26,#26
+	umlal	$ACC1,$IN23_1,${R0}[2]
+	 ubfx	x7,x9,#26,#26
+	umlal	$ACC0,$IN23_1,${S4}[2]
+	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+
+	umlal	$ACC4,$IN23_2,${R2}[2]
+	 extr	x8,x12,x8,#52
+	umlal	$ACC3,$IN23_2,${R1}[2]
+	 extr	x9,x13,x9,#52
+	umlal	$ACC2,$IN23_2,${R0}[2]
+	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	umlal	$ACC1,$IN23_2,${S4}[2]
+	 fmov	$IN23_0,x4
+	umlal	$ACC0,$IN23_2,${S3}[2]
+	 and	x8,x8,#0x03ffffff
+
+	umlal	$ACC4,$IN23_3,${R1}[2]
+	 and	x9,x9,#0x03ffffff
+	umlal	$ACC3,$IN23_3,${R0}[2]
+	 ubfx	x10,x12,#14,#26
+	umlal	$ACC2,$IN23_3,${S4}[2]
+	 ubfx	x11,x13,#14,#26
+	umlal	$ACC1,$IN23_3,${S3}[2]
+	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	umlal	$ACC0,$IN23_3,${S2}[2]
+	 fmov	$IN23_1,x6
+
+	add	$IN01_2,$IN01_2,$H2
+	 add	x12,$padbit,x12,lsr#40
+	umlal	$ACC4,$IN23_4,${R0}[2]
+	 add	x13,$padbit,x13,lsr#40
+	umlal	$ACC3,$IN23_4,${S4}[2]
+	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	umlal	$ACC2,$IN23_4,${S3}[2]
+	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	umlal	$ACC1,$IN23_4,${S2}[2]
+	 fmov	$IN23_2,x8
+	umlal	$ACC0,$IN23_4,${S1}[2]
+	 fmov	$IN23_3,x10
+
+	////////////////////////////////////////////////////////////////
+	// (hash+inp[0:1])*r^4 and accumulate
+
+	add	$IN01_0,$IN01_0,$H0
+	 fmov	$IN23_4,x12
+	umlal	$ACC3,$IN01_2,${R1}[0]
+	 ldp	x8,x12,[$inp],#16	// inp[0:1]
+	umlal	$ACC0,$IN01_2,${S3}[0]
+	 ldp	x9,x13,[$inp],#48
+	umlal	$ACC4,$IN01_2,${R2}[0]
+	umlal	$ACC1,$IN01_2,${S4}[0]
+	umlal	$ACC2,$IN01_2,${R0}[0]
+#ifdef	__AARCH64EB__
+	 rev	x8,x8
+	 rev	x12,x12
+	 rev	x9,x9
+	 rev	x13,x13
+#endif
+
+	add	$IN01_1,$IN01_1,$H1
+	umlal	$ACC3,$IN01_0,${R3}[0]
+	umlal	$ACC4,$IN01_0,${R4}[0]
+	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	umlal	$ACC2,$IN01_0,${R2}[0]
+	 and	x5,x9,#0x03ffffff
+	umlal	$ACC0,$IN01_0,${R0}[0]
+	 ubfx	x6,x8,#26,#26
+	umlal	$ACC1,$IN01_0,${R1}[0]
+	 ubfx	x7,x9,#26,#26
+
+	add	$IN01_3,$IN01_3,$H3
+	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	umlal	$ACC3,$IN01_1,${R2}[0]
+	 extr	x8,x12,x8,#52
+	umlal	$ACC4,$IN01_1,${R3}[0]
+	 extr	x9,x13,x9,#52
+	umlal	$ACC0,$IN01_1,${S4}[0]
+	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	umlal	$ACC2,$IN01_1,${R1}[0]
+	 fmov	$IN01_0,x4
+	umlal	$ACC1,$IN01_1,${R0}[0]
+	 and	x8,x8,#0x03ffffff
+
+	add	$IN01_4,$IN01_4,$H4
+	 and	x9,x9,#0x03ffffff
+	umlal	$ACC3,$IN01_3,${R0}[0]
+	 ubfx	x10,x12,#14,#26
+	umlal	$ACC0,$IN01_3,${S2}[0]
+	 ubfx	x11,x13,#14,#26
+	umlal	$ACC4,$IN01_3,${R1}[0]
+	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	umlal	$ACC1,$IN01_3,${S3}[0]
+	 fmov	$IN01_1,x6
+	umlal	$ACC2,$IN01_3,${S4}[0]
+	 add	x12,$padbit,x12,lsr#40
+
+	umlal	$ACC3,$IN01_4,${S4}[0]
+	 add	x13,$padbit,x13,lsr#40
+	umlal	$ACC0,$IN01_4,${S1}[0]
+	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	umlal	$ACC4,$IN01_4,${R0}[0]
+	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	umlal	$ACC1,$IN01_4,${S2}[0]
+	 fmov	$IN01_2,x8
+	umlal	$ACC2,$IN01_4,${S3}[0]
+	 fmov	$IN01_3,x10
+	 fmov	$IN01_4,x12
+
+	/////////////////////////////////////////////////////////////////
+	// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+	// and P. Schwabe
+	//
+	// [see discussion in poly1305-armv4 module]
+
+	ushr	$T0.2d,$ACC3,#26
+	xtn	$H3,$ACC3
+	 ushr	$T1.2d,$ACC0,#26
+	 and	$ACC0,$ACC0,$MASK.2d
+	add	$ACC4,$ACC4,$T0.2d	// h3 -> h4
+	bic	$H3,#0xfc,lsl#24	// &=0x03ffffff
+	 add	$ACC1,$ACC1,$T1.2d	// h0 -> h1
+
+	ushr	$T0.2d,$ACC4,#26
+	xtn	$H4,$ACC4
+	 ushr	$T1.2d,$ACC1,#26
+	 xtn	$H1,$ACC1
+	bic	$H4,#0xfc,lsl#24
+	 add	$ACC2,$ACC2,$T1.2d	// h1 -> h2
+
+	add	$ACC0,$ACC0,$T0.2d
+	shl	$T0.2d,$T0.2d,#2
+	 shrn	$T1.2s,$ACC2,#26
+	 xtn	$H2,$ACC2
+	add	$ACC0,$ACC0,$T0.2d	// h4 -> h0
+	 bic	$H1,#0xfc,lsl#24
+	 add	$H3,$H3,$T1.2s		// h2 -> h3
+	 bic	$H2,#0xfc,lsl#24
+
+	shrn	$T0.2s,$ACC0,#26
+	xtn	$H0,$ACC0
+	 ushr	$T1.2s,$H3,#26
+	 bic	$H3,#0xfc,lsl#24
+	 bic	$H0,#0xfc,lsl#24
+	add	$H1,$H1,$T0.2s		// h0 -> h1
+	 add	$H4,$H4,$T1.2s		// h3 -> h4
+
+	b.hi	.Loop_neon
+
+.Lskip_loop:
+	dup	$IN23_2,${IN23_2}[0]
+	add	$IN01_2,$IN01_2,$H2
+
+	////////////////////////////////////////////////////////////////
+	// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+	adds	$len,$len,#32
+	b.ne	.Long_tail
+
+	dup	$IN23_2,${IN01_2}[0]
+	add	$IN23_0,$IN01_0,$H0
+	add	$IN23_3,$IN01_3,$H3
+	add	$IN23_1,$IN01_1,$H1
+	add	$IN23_4,$IN01_4,$H4
+
+.Long_tail:
+	dup	$IN23_0,${IN23_0}[0]
+	umull2	$ACC0,$IN23_2,${S3}
+	umull2	$ACC3,$IN23_2,${R1}
+	umull2	$ACC4,$IN23_2,${R2}
+	umull2	$ACC2,$IN23_2,${R0}
+	umull2	$ACC1,$IN23_2,${S4}
+
+	dup	$IN23_1,${IN23_1}[0]
+	umlal2	$ACC0,$IN23_0,${R0}
+	umlal2	$ACC2,$IN23_0,${R2}
+	umlal2	$ACC3,$IN23_0,${R3}
+	umlal2	$ACC4,$IN23_0,${R4}
+	umlal2	$ACC1,$IN23_0,${R1}
+
+	dup	$IN23_3,${IN23_3}[0]
+	umlal2	$ACC0,$IN23_1,${S4}
+	umlal2	$ACC3,$IN23_1,${R2}
+	umlal2	$ACC2,$IN23_1,${R1}
+	umlal2	$ACC4,$IN23_1,${R3}
+	umlal2	$ACC1,$IN23_1,${R0}
+
+	dup	$IN23_4,${IN23_4}[0]
+	umlal2	$ACC3,$IN23_3,${R0}
+	umlal2	$ACC4,$IN23_3,${R1}
+	umlal2	$ACC0,$IN23_3,${S2}
+	umlal2	$ACC1,$IN23_3,${S3}
+	umlal2	$ACC2,$IN23_3,${S4}
+
+	umlal2	$ACC3,$IN23_4,${S4}
+	umlal2	$ACC0,$IN23_4,${S1}
+	umlal2	$ACC4,$IN23_4,${R0}
+	umlal2	$ACC1,$IN23_4,${S2}
+	umlal2	$ACC2,$IN23_4,${S3}
+
+	b.eq	.Lshort_tail
+
+	////////////////////////////////////////////////////////////////
+	// (hash+inp[0:1])*r^4:r^3 and accumulate
+
+	add	$IN01_0,$IN01_0,$H0
+	umlal	$ACC3,$IN01_2,${R1}
+	umlal	$ACC0,$IN01_2,${S3}
+	umlal	$ACC4,$IN01_2,${R2}
+	umlal	$ACC1,$IN01_2,${S4}
+	umlal	$ACC2,$IN01_2,${R0}
+
+	add	$IN01_1,$IN01_1,$H1
+	umlal	$ACC3,$IN01_0,${R3}
+	umlal	$ACC0,$IN01_0,${R0}
+	umlal	$ACC4,$IN01_0,${R4}
+	umlal	$ACC1,$IN01_0,${R1}
+	umlal	$ACC2,$IN01_0,${R2}
+
+	add	$IN01_3,$IN01_3,$H3
+	umlal	$ACC3,$IN01_1,${R2}
+	umlal	$ACC0,$IN01_1,${S4}
+	umlal	$ACC4,$IN01_1,${R3}
+	umlal	$ACC1,$IN01_1,${R0}
+	umlal	$ACC2,$IN01_1,${R1}
+
+	add	$IN01_4,$IN01_4,$H4
+	umlal	$ACC3,$IN01_3,${R0}
+	umlal	$ACC0,$IN01_3,${S2}
+	umlal	$ACC4,$IN01_3,${R1}
+	umlal	$ACC1,$IN01_3,${S3}
+	umlal	$ACC2,$IN01_3,${S4}
+
+	umlal	$ACC3,$IN01_4,${S4}
+	umlal	$ACC0,$IN01_4,${S1}
+	umlal	$ACC4,$IN01_4,${R0}
+	umlal	$ACC1,$IN01_4,${S2}
+	umlal	$ACC2,$IN01_4,${S3}
+
+.Lshort_tail:
+	////////////////////////////////////////////////////////////////
+	// horizontal add
+
+	addp	$ACC3,$ACC3,$ACC3
+	 ldp	d8,d9,[sp,#16]		// meet ABI requirements
+	addp	$ACC0,$ACC0,$ACC0
+	 ldp	d10,d11,[sp,#32]
+	addp	$ACC4,$ACC4,$ACC4
+	 ldp	d12,d13,[sp,#48]
+	addp	$ACC1,$ACC1,$ACC1
+	 ldp	d14,d15,[sp,#64]
+	addp	$ACC2,$ACC2,$ACC2
+	 ldr	x30,[sp,#8]
+	 .inst	0xd50323bf		// autiasp
+
+	////////////////////////////////////////////////////////////////
+	// lazy reduction, but without narrowing
+
+	ushr	$T0.2d,$ACC3,#26
+	and	$ACC3,$ACC3,$MASK.2d
+	 ushr	$T1.2d,$ACC0,#26
+	 and	$ACC0,$ACC0,$MASK.2d
+
+	add	$ACC4,$ACC4,$T0.2d	// h3 -> h4
+	 add	$ACC1,$ACC1,$T1.2d	// h0 -> h1
+
+	ushr	$T0.2d,$ACC4,#26
+	and	$ACC4,$ACC4,$MASK.2d
+	 ushr	$T1.2d,$ACC1,#26
+	 and	$ACC1,$ACC1,$MASK.2d
+	 add	$ACC2,$ACC2,$T1.2d	// h1 -> h2
+
+	add	$ACC0,$ACC0,$T0.2d
+	shl	$T0.2d,$T0.2d,#2
+	 ushr	$T1.2d,$ACC2,#26
+	 and	$ACC2,$ACC2,$MASK.2d
+	add	$ACC0,$ACC0,$T0.2d	// h4 -> h0
+	 add	$ACC3,$ACC3,$T1.2d	// h2 -> h3
+
+	ushr	$T0.2d,$ACC0,#26
+	and	$ACC0,$ACC0,$MASK.2d
+	 ushr	$T1.2d,$ACC3,#26
+	 and	$ACC3,$ACC3,$MASK.2d
+	add	$ACC1,$ACC1,$T0.2d	// h0 -> h1
+	 add	$ACC4,$ACC4,$T1.2d	// h3 -> h4
+
+	////////////////////////////////////////////////////////////////
+	// write the result, can be partially reduced
+
+	st4	{$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
+	mov	x4,#1
+	st1	{$ACC4}[0],[$ctx]
+	str	x4,[$ctx,#8]		// set is_base2_26
+
+	ldr	x29,[sp],#80
+	ret
+.size	poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align	5
+.Lzeros:
+.long	0,0,0,0,0,0,0,0
+.asciz	"Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm"
+.align	2
+#if !defined(__KERNEL__) && !defined(_WIN64)
+.comm	OPENSSL_armcap_P,4,4
+.hidden	OPENSSL_armcap_P
+#endif
+___
+
+foreach (split("\n",$code)) {
+	s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/			or
+	s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/	or
+	(m/\bdup\b/ and (s/\.[24]s/.2d/g or 1))			or
+	(m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1))	or
+	(m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1))		or
+	(m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1))		or
+	(m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
+
+	s/\.[124]([sd])\[/.$1\[/;
+	s/w#x([0-9]+)/w$1/g;
+
+	print $_,"\n";
+}
+close STDOUT;
diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped
new file mode 100644
index 000000000000..8d1c4e420ccd
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-core.S_shipped
@@ -0,0 +1,835 @@
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern	OPENSSL_armcap_P
+#endif
+
+.text
+
+// forward "declarations" are required for Apple
+.globl	poly1305_blocks
+.globl	poly1305_emit
+
+.globl	poly1305_init
+.type	poly1305_init,%function
+.align	5
+poly1305_init:
+	cmp	x1,xzr
+	stp	xzr,xzr,[x0]		// zero hash value
+	stp	xzr,xzr,[x0,#16]	// [along with is_base2_26]
+
+	csel	x0,xzr,x0,eq
+	b.eq	.Lno_key
+
+#ifndef	__KERNEL__
+	adrp	x17,OPENSSL_armcap_P
+	ldr	w17,[x17,#:lo12:OPENSSL_armcap_P]
+#endif
+
+	ldp	x7,x8,[x1]		// load key
+	mov	x9,#0xfffffffc0fffffff
+	movk	x9,#0x0fff,lsl#48
+#ifdef	__AARCH64EB__
+	rev	x7,x7			// flip bytes
+	rev	x8,x8
+#endif
+	and	x7,x7,x9		// &=0ffffffc0fffffff
+	and	x9,x9,#-4
+	and	x8,x8,x9		// &=0ffffffc0ffffffc
+	mov	w9,#-1
+	stp	x7,x8,[x0,#32]	// save key value
+	str	w9,[x0,#48]	// impossible key power value
+
+#ifndef	__KERNEL__
+	tst	w17,#ARMV7_NEON
+
+	adr	x12,.Lpoly1305_blocks
+	adr	x7,.Lpoly1305_blocks_neon
+	adr	x13,.Lpoly1305_emit
+
+	csel	x12,x12,x7,eq
+
+# ifdef	__ILP32__
+	stp	w12,w13,[x2]
+# else
+	stp	x12,x13,[x2]
+# endif
+#endif
+	mov	x0,#1
+.Lno_key:
+	ret
+.size	poly1305_init,.-poly1305_init
+
+.type	poly1305_blocks,%function
+.align	5
+poly1305_blocks:
+.Lpoly1305_blocks:
+	ands	x2,x2,#-16
+	b.eq	.Lno_data
+
+	ldp	x4,x5,[x0]		// load hash value
+	ldp	x6,x17,[x0,#16]	// [along with is_base2_26]
+	ldp	x7,x8,[x0,#32]	// load key value
+
+#ifdef	__AARCH64EB__
+	lsr	x12,x4,#32
+	mov	w13,w4
+	lsr	x14,x5,#32
+	mov	w15,w5
+	lsr	x16,x6,#32
+#else
+	mov	w12,w4
+	lsr	x13,x4,#32
+	mov	w14,w5
+	lsr	x15,x5,#32
+	mov	w16,w6
+#endif
+
+	add	x12,x12,x13,lsl#26	// base 2^26 -> base 2^64
+	lsr	x13,x14,#12
+	adds	x12,x12,x14,lsl#52
+	add	x13,x13,x15,lsl#14
+	adc	x13,x13,xzr
+	lsr	x14,x16,#24
+	adds	x13,x13,x16,lsl#40
+	adc	x14,x14,xzr
+
+	cmp	x17,#0			// is_base2_26?
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+	csel	x4,x4,x12,eq		// choose between radixes
+	csel	x5,x5,x13,eq
+	csel	x6,x6,x14,eq
+
+.Loop:
+	ldp	x10,x11,[x1],#16	// load input
+	sub	x2,x2,#16
+#ifdef	__AARCH64EB__
+	rev	x10,x10
+	rev	x11,x11
+#endif
+	adds	x4,x4,x10		// accumulate input
+	adcs	x5,x5,x11
+
+	mul	x12,x4,x7		// h0*r0
+	adc	x6,x6,x3
+	umulh	x13,x4,x7
+
+	mul	x10,x5,x9		// h1*5*r1
+	umulh	x11,x5,x9
+
+	adds	x12,x12,x10
+	mul	x10,x4,x8		// h0*r1
+	adc	x13,x13,x11
+	umulh	x14,x4,x8
+
+	adds	x13,x13,x10
+	mul	x10,x5,x7		// h1*r0
+	adc	x14,x14,xzr
+	umulh	x11,x5,x7
+
+	adds	x13,x13,x10
+	mul	x10,x6,x9		// h2*5*r1
+	adc	x14,x14,x11
+	mul	x11,x6,x7		// h2*r0
+
+	adds	x13,x13,x10
+	adc	x14,x14,x11
+
+	and	x10,x14,#-4		// final reduction
+	and	x6,x14,#3
+	add	x10,x10,x14,lsr#2
+	adds	x4,x12,x10
+	adcs	x5,x13,xzr
+	adc	x6,x6,xzr
+
+	cbnz	x2,.Loop
+
+	stp	x4,x5,[x0]		// store hash value
+	stp	x6,xzr,[x0,#16]	// [and clear is_base2_26]
+
+.Lno_data:
+	ret
+.size	poly1305_blocks,.-poly1305_blocks
+
+.type	poly1305_emit,%function
+.align	5
+poly1305_emit:
+.Lpoly1305_emit:
+	ldp	x4,x5,[x0]		// load hash base 2^64
+	ldp	x6,x7,[x0,#16]	// [along with is_base2_26]
+	ldp	x10,x11,[x2]	// load nonce
+
+#ifdef	__AARCH64EB__
+	lsr	x12,x4,#32
+	mov	w13,w4
+	lsr	x14,x5,#32
+	mov	w15,w5
+	lsr	x16,x6,#32
+#else
+	mov	w12,w4
+	lsr	x13,x4,#32
+	mov	w14,w5
+	lsr	x15,x5,#32
+	mov	w16,w6
+#endif
+
+	add	x12,x12,x13,lsl#26	// base 2^26 -> base 2^64
+	lsr	x13,x14,#12
+	adds	x12,x12,x14,lsl#52
+	add	x13,x13,x15,lsl#14
+	adc	x13,x13,xzr
+	lsr	x14,x16,#24
+	adds	x13,x13,x16,lsl#40
+	adc	x14,x14,xzr
+
+	cmp	x7,#0			// is_base2_26?
+	csel	x4,x4,x12,eq		// choose between radixes
+	csel	x5,x5,x13,eq
+	csel	x6,x6,x14,eq
+
+	adds	x12,x4,#5		// compare to modulus
+	adcs	x13,x5,xzr
+	adc	x14,x6,xzr
+
+	tst	x14,#-4			// see if it's carried/borrowed
+
+	csel	x4,x4,x12,eq
+	csel	x5,x5,x13,eq
+
+#ifdef	__AARCH64EB__
+	ror	x10,x10,#32		// flip nonce words
+	ror	x11,x11,#32
+#endif
+	adds	x4,x4,x10		// accumulate nonce
+	adc	x5,x5,x11
+#ifdef	__AARCH64EB__
+	rev	x4,x4			// flip output bytes
+	rev	x5,x5
+#endif
+	stp	x4,x5,[x1]		// write result
+
+	ret
+.size	poly1305_emit,.-poly1305_emit
+.type	poly1305_mult,%function
+.align	5
+poly1305_mult:
+	mul	x12,x4,x7		// h0*r0
+	umulh	x13,x4,x7
+
+	mul	x10,x5,x9		// h1*5*r1
+	umulh	x11,x5,x9
+
+	adds	x12,x12,x10
+	mul	x10,x4,x8		// h0*r1
+	adc	x13,x13,x11
+	umulh	x14,x4,x8
+
+	adds	x13,x13,x10
+	mul	x10,x5,x7		// h1*r0
+	adc	x14,x14,xzr
+	umulh	x11,x5,x7
+
+	adds	x13,x13,x10
+	mul	x10,x6,x9		// h2*5*r1
+	adc	x14,x14,x11
+	mul	x11,x6,x7		// h2*r0
+
+	adds	x13,x13,x10
+	adc	x14,x14,x11
+
+	and	x10,x14,#-4		// final reduction
+	and	x6,x14,#3
+	add	x10,x10,x14,lsr#2
+	adds	x4,x12,x10
+	adcs	x5,x13,xzr
+	adc	x6,x6,xzr
+
+	ret
+.size	poly1305_mult,.-poly1305_mult
+
+.type	poly1305_splat,%function
+.align	4
+poly1305_splat:
+	and	x12,x4,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x13,x4,#26,#26
+	extr	x14,x5,x4,#52
+	and	x14,x14,#0x03ffffff
+	ubfx	x15,x5,#14,#26
+	extr	x16,x6,x5,#40
+
+	str	w12,[x0,#16*0]	// r0
+	add	w12,w13,w13,lsl#2	// r1*5
+	str	w13,[x0,#16*1]	// r1
+	add	w13,w14,w14,lsl#2	// r2*5
+	str	w12,[x0,#16*2]	// s1
+	str	w14,[x0,#16*3]	// r2
+	add	w14,w15,w15,lsl#2	// r3*5
+	str	w13,[x0,#16*4]	// s2
+	str	w15,[x0,#16*5]	// r3
+	add	w15,w16,w16,lsl#2	// r4*5
+	str	w14,[x0,#16*6]	// s3
+	str	w16,[x0,#16*7]	// r4
+	str	w15,[x0,#16*8]	// s4
+
+	ret
+.size	poly1305_splat,.-poly1305_splat
+
+#ifdef	__KERNEL__
+.globl	poly1305_blocks_neon
+#endif
+.type	poly1305_blocks_neon,%function
+.align	5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+	ldr	x17,[x0,#24]
+	cmp	x2,#128
+	b.lo	.Lpoly1305_blocks
+
+	.inst	0xd503233f		// paciasp
+	stp	x29,x30,[sp,#-80]!
+	add	x29,sp,#0
+
+	stp	d8,d9,[sp,#16]		// meet ABI requirements
+	stp	d10,d11,[sp,#32]
+	stp	d12,d13,[sp,#48]
+	stp	d14,d15,[sp,#64]
+
+	cbz	x17,.Lbase2_64_neon
+
+	ldp	w10,w11,[x0]		// load hash value base 2^26
+	ldp	w12,w13,[x0,#8]
+	ldr	w14,[x0,#16]
+
+	tst	x2,#31
+	b.eq	.Leven_neon
+
+	ldp	x7,x8,[x0,#32]	// load key value
+
+	add	x4,x10,x11,lsl#26	// base 2^26 -> base 2^64
+	lsr	x5,x12,#12
+	adds	x4,x4,x12,lsl#52
+	add	x5,x5,x13,lsl#14
+	adc	x5,x5,xzr
+	lsr	x6,x14,#24
+	adds	x5,x5,x14,lsl#40
+	adc	x14,x6,xzr		// can be partially reduced...
+
+	ldp	x12,x13,[x1],#16	// load input
+	sub	x2,x2,#16
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+
+#ifdef	__AARCH64EB__
+	rev	x12,x12
+	rev	x13,x13
+#endif
+	adds	x4,x4,x12		// accumulate input
+	adcs	x5,x5,x13
+	adc	x6,x6,x3
+
+	bl	poly1305_mult
+
+	and	x10,x4,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x11,x4,#26,#26
+	extr	x12,x5,x4,#52
+	and	x12,x12,#0x03ffffff
+	ubfx	x13,x5,#14,#26
+	extr	x14,x6,x5,#40
+
+	b	.Leven_neon
+
+.align	4
+.Lbase2_64_neon:
+	ldp	x7,x8,[x0,#32]	// load key value
+
+	ldp	x4,x5,[x0]		// load hash value base 2^64
+	ldr	x6,[x0,#16]
+
+	tst	x2,#31
+	b.eq	.Linit_neon
+
+	ldp	x12,x13,[x1],#16	// load input
+	sub	x2,x2,#16
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+#ifdef	__AARCH64EB__
+	rev	x12,x12
+	rev	x13,x13
+#endif
+	adds	x4,x4,x12		// accumulate input
+	adcs	x5,x5,x13
+	adc	x6,x6,x3
+
+	bl	poly1305_mult
+
+.Linit_neon:
+	ldr	w17,[x0,#48]		// first table element
+	and	x10,x4,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x11,x4,#26,#26
+	extr	x12,x5,x4,#52
+	and	x12,x12,#0x03ffffff
+	ubfx	x13,x5,#14,#26
+	extr	x14,x6,x5,#40
+
+	cmp	w17,#-1			// is value impossible?
+	b.ne	.Leven_neon
+
+	fmov	d24,x10
+	fmov	d25,x11
+	fmov	d26,x12
+	fmov	d27,x13
+	fmov	d28,x14
+
+	////////////////////////////////// initialize r^n table
+	mov	x4,x7			// r^1
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+	mov	x5,x8
+	mov	x6,xzr
+	add	x0,x0,#48+12
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^2
+	sub	x0,x0,#4
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^3
+	sub	x0,x0,#4
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^4
+	sub	x0,x0,#4
+	bl	poly1305_splat
+	sub	x0,x0,#48		// restore original x0
+	b	.Ldo_neon
+
+.align	4
+.Leven_neon:
+	fmov	d24,x10
+	fmov	d25,x11
+	fmov	d26,x12
+	fmov	d27,x13
+	fmov	d28,x14
+
+.Ldo_neon:
+	ldp	x8,x12,[x1,#32]	// inp[2:3]
+	subs	x2,x2,#64
+	ldp	x9,x13,[x1,#48]
+	add	x16,x1,#96
+	adr	x17,.Lzeros
+
+	lsl	x3,x3,#24
+	add	x15,x0,#48
+
+#ifdef	__AARCH64EB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	and	x5,x9,#0x03ffffff
+	ubfx	x6,x8,#26,#26
+	ubfx	x7,x9,#26,#26
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	extr	x8,x12,x8,#52
+	extr	x9,x13,x9,#52
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	fmov	d14,x4
+	and	x8,x8,#0x03ffffff
+	and	x9,x9,#0x03ffffff
+	ubfx	x10,x12,#14,#26
+	ubfx	x11,x13,#14,#26
+	add	x12,x3,x12,lsr#40
+	add	x13,x3,x13,lsr#40
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	fmov	d15,x6
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	fmov	d16,x8
+	fmov	d17,x10
+	fmov	d18,x12
+
+	ldp	x8,x12,[x1],#16	// inp[0:1]
+	ldp	x9,x13,[x1],#48
+
+	ld1	{v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
+	ld1	{v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
+	ld1	{v8.4s},[x15]
+
+#ifdef	__AARCH64EB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	and	x5,x9,#0x03ffffff
+	ubfx	x6,x8,#26,#26
+	ubfx	x7,x9,#26,#26
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	extr	x8,x12,x8,#52
+	extr	x9,x13,x9,#52
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	fmov	d9,x4
+	and	x8,x8,#0x03ffffff
+	and	x9,x9,#0x03ffffff
+	ubfx	x10,x12,#14,#26
+	ubfx	x11,x13,#14,#26
+	add	x12,x3,x12,lsr#40
+	add	x13,x3,x13,lsr#40
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	fmov	d10,x6
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	movi	v31.2d,#-1
+	fmov	d11,x8
+	fmov	d12,x10
+	fmov	d13,x12
+	ushr	v31.2d,v31.2d,#38
+
+	b.ls	.Lskip_loop
+
+.align	4
+.Loop_neon:
+	////////////////////////////////////////////////////////////////
+	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+	//   ___________________/
+	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+	//   ___________________/ ____________________/
+	//
+	// Note that we start with inp[2:3]*r^2. This is because it
+	// doesn't depend on reduction in previous iteration.
+	////////////////////////////////////////////////////////////////
+	// d4 = h0*r4 + h1*r3   + h2*r2   + h3*r1   + h4*r0
+	// d3 = h0*r3 + h1*r2   + h2*r1   + h3*r0   + h4*5*r4
+	// d2 = h0*r2 + h1*r1   + h2*r0   + h3*5*r4 + h4*5*r3
+	// d1 = h0*r1 + h1*r0   + h2*5*r4 + h3*5*r3 + h4*5*r2
+	// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
+
+	subs	x2,x2,#64
+	umull	v23.2d,v14.2s,v7.s[2]
+	csel	x16,x17,x16,lo
+	umull	v22.2d,v14.2s,v5.s[2]
+	umull	v21.2d,v14.2s,v3.s[2]
+	 ldp	x8,x12,[x16],#16	// inp[2:3] (or zero)
+	umull	v20.2d,v14.2s,v1.s[2]
+	 ldp	x9,x13,[x16],#48
+	umull	v19.2d,v14.2s,v0.s[2]
+#ifdef	__AARCH64EB__
+	 rev	x8,x8
+	 rev	x12,x12
+	 rev	x9,x9
+	 rev	x13,x13
+#endif
+
+	umlal	v23.2d,v15.2s,v5.s[2]
+	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	umlal	v22.2d,v15.2s,v3.s[2]
+	 and	x5,x9,#0x03ffffff
+	umlal	v21.2d,v15.2s,v1.s[2]
+	 ubfx	x6,x8,#26,#26
+	umlal	v20.2d,v15.2s,v0.s[2]
+	 ubfx	x7,x9,#26,#26
+	umlal	v19.2d,v15.2s,v8.s[2]
+	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+
+	umlal	v23.2d,v16.2s,v3.s[2]
+	 extr	x8,x12,x8,#52
+	umlal	v22.2d,v16.2s,v1.s[2]
+	 extr	x9,x13,x9,#52
+	umlal	v21.2d,v16.2s,v0.s[2]
+	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	umlal	v20.2d,v16.2s,v8.s[2]
+	 fmov	d14,x4
+	umlal	v19.2d,v16.2s,v6.s[2]
+	 and	x8,x8,#0x03ffffff
+
+	umlal	v23.2d,v17.2s,v1.s[2]
+	 and	x9,x9,#0x03ffffff
+	umlal	v22.2d,v17.2s,v0.s[2]
+	 ubfx	x10,x12,#14,#26
+	umlal	v21.2d,v17.2s,v8.s[2]
+	 ubfx	x11,x13,#14,#26
+	umlal	v20.2d,v17.2s,v6.s[2]
+	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	umlal	v19.2d,v17.2s,v4.s[2]
+	 fmov	d15,x6
+
+	add	v11.2s,v11.2s,v26.2s
+	 add	x12,x3,x12,lsr#40
+	umlal	v23.2d,v18.2s,v0.s[2]
+	 add	x13,x3,x13,lsr#40
+	umlal	v22.2d,v18.2s,v8.s[2]
+	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	umlal	v21.2d,v18.2s,v6.s[2]
+	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	umlal	v20.2d,v18.2s,v4.s[2]
+	 fmov	d16,x8
+	umlal	v19.2d,v18.2s,v2.s[2]
+	 fmov	d17,x10
+
+	////////////////////////////////////////////////////////////////
+	// (hash+inp[0:1])*r^4 and accumulate
+
+	add	v9.2s,v9.2s,v24.2s
+	 fmov	d18,x12
+	umlal	v22.2d,v11.2s,v1.s[0]
+	 ldp	x8,x12,[x1],#16	// inp[0:1]
+	umlal	v19.2d,v11.2s,v6.s[0]
+	 ldp	x9,x13,[x1],#48
+	umlal	v23.2d,v11.2s,v3.s[0]
+	umlal	v20.2d,v11.2s,v8.s[0]
+	umlal	v21.2d,v11.2s,v0.s[0]
+#ifdef	__AARCH64EB__
+	 rev	x8,x8
+	 rev	x12,x12
+	 rev	x9,x9
+	 rev	x13,x13
+#endif
+
+	add	v10.2s,v10.2s,v25.2s
+	umlal	v22.2d,v9.2s,v5.s[0]
+	umlal	v23.2d,v9.2s,v7.s[0]
+	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	umlal	v21.2d,v9.2s,v3.s[0]
+	 and	x5,x9,#0x03ffffff
+	umlal	v19.2d,v9.2s,v0.s[0]
+	 ubfx	x6,x8,#26,#26
+	umlal	v20.2d,v9.2s,v1.s[0]
+	 ubfx	x7,x9,#26,#26
+
+	add	v12.2s,v12.2s,v27.2s
+	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	umlal	v22.2d,v10.2s,v3.s[0]
+	 extr	x8,x12,x8,#52
+	umlal	v23.2d,v10.2s,v5.s[0]
+	 extr	x9,x13,x9,#52
+	umlal	v19.2d,v10.2s,v8.s[0]
+	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	umlal	v21.2d,v10.2s,v1.s[0]
+	 fmov	d9,x4
+	umlal	v20.2d,v10.2s,v0.s[0]
+	 and	x8,x8,#0x03ffffff
+
+	add	v13.2s,v13.2s,v28.2s
+	 and	x9,x9,#0x03ffffff
+	umlal	v22.2d,v12.2s,v0.s[0]
+	 ubfx	x10,x12,#14,#26
+	umlal	v19.2d,v12.2s,v4.s[0]
+	 ubfx	x11,x13,#14,#26
+	umlal	v23.2d,v12.2s,v1.s[0]
+	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	umlal	v20.2d,v12.2s,v6.s[0]
+	 fmov	d10,x6
+	umlal	v21.2d,v12.2s,v8.s[0]
+	 add	x12,x3,x12,lsr#40
+
+	umlal	v22.2d,v13.2s,v8.s[0]
+	 add	x13,x3,x13,lsr#40
+	umlal	v19.2d,v13.2s,v2.s[0]
+	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	umlal	v23.2d,v13.2s,v0.s[0]
+	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	umlal	v20.2d,v13.2s,v4.s[0]
+	 fmov	d11,x8
+	umlal	v21.2d,v13.2s,v6.s[0]
+	 fmov	d12,x10
+	 fmov	d13,x12
+
+	/////////////////////////////////////////////////////////////////
+	// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+	// and P. Schwabe
+	//
+	// [see discussion in poly1305-armv4 module]
+
+	ushr	v29.2d,v22.2d,#26
+	xtn	v27.2s,v22.2d
+	 ushr	v30.2d,v19.2d,#26
+	 and	v19.16b,v19.16b,v31.16b
+	add	v23.2d,v23.2d,v29.2d	// h3 -> h4
+	bic	v27.2s,#0xfc,lsl#24	// &=0x03ffffff
+	 add	v20.2d,v20.2d,v30.2d	// h0 -> h1
+
+	ushr	v29.2d,v23.2d,#26
+	xtn	v28.2s,v23.2d
+	 ushr	v30.2d,v20.2d,#26
+	 xtn	v25.2s,v20.2d
+	bic	v28.2s,#0xfc,lsl#24
+	 add	v21.2d,v21.2d,v30.2d	// h1 -> h2
+
+	add	v19.2d,v19.2d,v29.2d
+	shl	v29.2d,v29.2d,#2
+	 shrn	v30.2s,v21.2d,#26
+	 xtn	v26.2s,v21.2d
+	add	v19.2d,v19.2d,v29.2d	// h4 -> h0
+	 bic	v25.2s,#0xfc,lsl#24
+	 add	v27.2s,v27.2s,v30.2s		// h2 -> h3
+	 bic	v26.2s,#0xfc,lsl#24
+
+	shrn	v29.2s,v19.2d,#26
+	xtn	v24.2s,v19.2d
+	 ushr	v30.2s,v27.2s,#26
+	 bic	v27.2s,#0xfc,lsl#24
+	 bic	v24.2s,#0xfc,lsl#24
+	add	v25.2s,v25.2s,v29.2s		// h0 -> h1
+	 add	v28.2s,v28.2s,v30.2s		// h3 -> h4
+
+	b.hi	.Loop_neon
+
+.Lskip_loop:
+	dup	v16.2d,v16.d[0]
+	add	v11.2s,v11.2s,v26.2s
+
+	////////////////////////////////////////////////////////////////
+	// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+	adds	x2,x2,#32
+	b.ne	.Long_tail
+
+	dup	v16.2d,v11.d[0]
+	add	v14.2s,v9.2s,v24.2s
+	add	v17.2s,v12.2s,v27.2s
+	add	v15.2s,v10.2s,v25.2s
+	add	v18.2s,v13.2s,v28.2s
+
+.Long_tail:
+	dup	v14.2d,v14.d[0]
+	umull2	v19.2d,v16.4s,v6.4s
+	umull2	v22.2d,v16.4s,v1.4s
+	umull2	v23.2d,v16.4s,v3.4s
+	umull2	v21.2d,v16.4s,v0.4s
+	umull2	v20.2d,v16.4s,v8.4s
+
+	dup	v15.2d,v15.d[0]
+	umlal2	v19.2d,v14.4s,v0.4s
+	umlal2	v21.2d,v14.4s,v3.4s
+	umlal2	v22.2d,v14.4s,v5.4s
+	umlal2	v23.2d,v14.4s,v7.4s
+	umlal2	v20.2d,v14.4s,v1.4s
+
+	dup	v17.2d,v17.d[0]
+	umlal2	v19.2d,v15.4s,v8.4s
+	umlal2	v22.2d,v15.4s,v3.4s
+	umlal2	v21.2d,v15.4s,v1.4s
+	umlal2	v23.2d,v15.4s,v5.4s
+	umlal2	v20.2d,v15.4s,v0.4s
+
+	dup	v18.2d,v18.d[0]
+	umlal2	v22.2d,v17.4s,v0.4s
+	umlal2	v23.2d,v17.4s,v1.4s
+	umlal2	v19.2d,v17.4s,v4.4s
+	umlal2	v20.2d,v17.4s,v6.4s
+	umlal2	v21.2d,v17.4s,v8.4s
+
+	umlal2	v22.2d,v18.4s,v8.4s
+	umlal2	v19.2d,v18.4s,v2.4s
+	umlal2	v23.2d,v18.4s,v0.4s
+	umlal2	v20.2d,v18.4s,v4.4s
+	umlal2	v21.2d,v18.4s,v6.4s
+
+	b.eq	.Lshort_tail
+
+	////////////////////////////////////////////////////////////////
+	// (hash+inp[0:1])*r^4:r^3 and accumulate
+
+	add	v9.2s,v9.2s,v24.2s
+	umlal	v22.2d,v11.2s,v1.2s
+	umlal	v19.2d,v11.2s,v6.2s
+	umlal	v23.2d,v11.2s,v3.2s
+	umlal	v20.2d,v11.2s,v8.2s
+	umlal	v21.2d,v11.2s,v0.2s
+
+	add	v10.2s,v10.2s,v25.2s
+	umlal	v22.2d,v9.2s,v5.2s
+	umlal	v19.2d,v9.2s,v0.2s
+	umlal	v23.2d,v9.2s,v7.2s
+	umlal	v20.2d,v9.2s,v1.2s
+	umlal	v21.2d,v9.2s,v3.2s
+
+	add	v12.2s,v12.2s,v27.2s
+	umlal	v22.2d,v10.2s,v3.2s
+	umlal	v19.2d,v10.2s,v8.2s
+	umlal	v23.2d,v10.2s,v5.2s
+	umlal	v20.2d,v10.2s,v0.2s
+	umlal	v21.2d,v10.2s,v1.2s
+
+	add	v13.2s,v13.2s,v28.2s
+	umlal	v22.2d,v12.2s,v0.2s
+	umlal	v19.2d,v12.2s,v4.2s
+	umlal	v23.2d,v12.2s,v1.2s
+	umlal	v20.2d,v12.2s,v6.2s
+	umlal	v21.2d,v12.2s,v8.2s
+
+	umlal	v22.2d,v13.2s,v8.2s
+	umlal	v19.2d,v13.2s,v2.2s
+	umlal	v23.2d,v13.2s,v0.2s
+	umlal	v20.2d,v13.2s,v4.2s
+	umlal	v21.2d,v13.2s,v6.2s
+
+.Lshort_tail:
+	////////////////////////////////////////////////////////////////
+	// horizontal add
+
+	addp	v22.2d,v22.2d,v22.2d
+	 ldp	d8,d9,[sp,#16]		// meet ABI requirements
+	addp	v19.2d,v19.2d,v19.2d
+	 ldp	d10,d11,[sp,#32]
+	addp	v23.2d,v23.2d,v23.2d
+	 ldp	d12,d13,[sp,#48]
+	addp	v20.2d,v20.2d,v20.2d
+	 ldp	d14,d15,[sp,#64]
+	addp	v21.2d,v21.2d,v21.2d
+	 ldr	x30,[sp,#8]
+	 .inst	0xd50323bf		// autiasp
+
+	////////////////////////////////////////////////////////////////
+	// lazy reduction, but without narrowing
+
+	ushr	v29.2d,v22.2d,#26
+	and	v22.16b,v22.16b,v31.16b
+	 ushr	v30.2d,v19.2d,#26
+	 and	v19.16b,v19.16b,v31.16b
+
+	add	v23.2d,v23.2d,v29.2d	// h3 -> h4
+	 add	v20.2d,v20.2d,v30.2d	// h0 -> h1
+
+	ushr	v29.2d,v23.2d,#26
+	and	v23.16b,v23.16b,v31.16b
+	 ushr	v30.2d,v20.2d,#26
+	 and	v20.16b,v20.16b,v31.16b
+	 add	v21.2d,v21.2d,v30.2d	// h1 -> h2
+
+	add	v19.2d,v19.2d,v29.2d
+	shl	v29.2d,v29.2d,#2
+	 ushr	v30.2d,v21.2d,#26
+	 and	v21.16b,v21.16b,v31.16b
+	add	v19.2d,v19.2d,v29.2d	// h4 -> h0
+	 add	v22.2d,v22.2d,v30.2d	// h2 -> h3
+
+	ushr	v29.2d,v19.2d,#26
+	and	v19.16b,v19.16b,v31.16b
+	 ushr	v30.2d,v22.2d,#26
+	 and	v22.16b,v22.16b,v31.16b
+	add	v20.2d,v20.2d,v29.2d	// h0 -> h1
+	 add	v23.2d,v23.2d,v30.2d	// h3 -> h4
+
+	////////////////////////////////////////////////////////////////
+	// write the result, can be partially reduced
+
+	st4	{v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
+	mov	x4,#1
+	st1	{v23.s}[0],[x0]
+	str	x4,[x0,#8]		// set is_base2_26
+
+	ldr	x29,[sp],#80
+	ret
+.size	poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align	5
+.Lzeros:
+.long	0,0,0,0,0,0,0,0
+.asciz	"Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm"
+.align	2
+#if !defined(__KERNEL__) && !defined(_WIN64)
+.comm	OPENSSL_armcap_P,4,4
+.hidden	OPENSSL_armcap_P
+#endif
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..e97b092f56b8
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+
+asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
+asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+	poly1305_init_arm64(&dctx->h, key);
+	dctx->s[0] = get_unaligned_le32(key + 16);
+	dctx->s[1] = get_unaligned_le32(key + 20);
+	dctx->s[2] = get_unaligned_le32(key + 24);
+	dctx->s[3] = get_unaligned_le32(key + 28);
+	dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int neon_poly1305_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	dctx->buflen = 0;
+	dctx->rset = 0;
+	dctx->sset = false;
+
+	return 0;
+}
+
+static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+				 u32 len, u32 hibit, bool do_neon)
+{
+	if (unlikely(!dctx->sset)) {
+		if (!dctx->rset) {
+			poly1305_init_arch(dctx, src);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->rset = 1;
+		}
+		if (len >= POLY1305_BLOCK_SIZE) {
+			dctx->s[0] = get_unaligned_le32(src +  0);
+			dctx->s[1] = get_unaligned_le32(src +  4);
+			dctx->s[2] = get_unaligned_le32(src +  8);
+			dctx->s[3] = get_unaligned_le32(src + 12);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->sset = true;
+		}
+		if (len < POLY1305_BLOCK_SIZE)
+			return;
+	}
+
+	len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+	if (static_branch_likely(&have_neon) && likely(do_neon))
+		poly1305_blocks_neon(&dctx->h, src, len, hibit);
+	else
+		poly1305_blocks(&dctx->h, src, len, hibit);
+}
+
+static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
+				    const u8 *src, u32 len, bool do_neon)
+{
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		len -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			neon_poly1305_blocks(dctx, dctx->buf,
+					     POLY1305_BLOCK_SIZE, 1, false);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(len >= POLY1305_BLOCK_SIZE)) {
+		neon_poly1305_blocks(dctx, src, len, 1, do_neon);
+		src += round_down(len, POLY1305_BLOCK_SIZE);
+		len %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(len)) {
+		dctx->buflen = len;
+		memcpy(dctx->buf, src, len);
+	}
+}
+
+static int neon_poly1305_update(struct shash_desc *desc,
+				const u8 *src, unsigned int srclen)
+{
+	bool do_neon = crypto_simd_usable() && srclen > 128;
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (static_branch_likely(&have_neon) && do_neon)
+		kernel_neon_begin();
+	neon_poly1305_do_update(dctx, src, srclen, do_neon);
+	if (static_branch_likely(&have_neon) && do_neon)
+		kernel_neon_end();
+	return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+			  unsigned int nbytes)
+{
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		nbytes -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+		if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
+			kernel_neon_begin();
+			poly1305_blocks_neon(&dctx->h, src, len, 1);
+			kernel_neon_end();
+		} else {
+			poly1305_blocks(&dctx->h, src, len, 1);
+		}
+		src += len;
+		nbytes %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(nbytes)) {
+		dctx->buflen = nbytes;
+		memcpy(dctx->buf, src, nbytes);
+	}
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+	if (unlikely(dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+	}
+
+	poly1305_emit(&dctx->h, dst, dctx->s);
+	*dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
+
+	poly1305_final_arch(dctx, dst);
+	return 0;
+}
+
+static struct shash_alg neon_poly1305_alg = {
+	.init			= neon_poly1305_init,
+	.update			= neon_poly1305_update,
+	.final			= neon_poly1305_final,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.descsize		= sizeof(struct poly1305_desc_ctx),
+
+	.base.cra_name		= "poly1305",
+	.base.cra_driver_name	= "poly1305-neon",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+};
+
+static int __init neon_poly1305_mod_init(void)
+{
+	if (!cpu_have_named_feature(ASIMD))
+		return 0;
+
+	static_branch_enable(&have_neon);
+
+	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+		crypto_register_shash(&neon_poly1305_alg) : 0;
+}
+
+static void __exit neon_poly1305_mod_exit(void)
+{
+	if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
+		crypto_unregister_shash(&neon_poly1305_alg);
+}
+
+module_init(neon_poly1305_mod_init);
+module_exit(neon_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index c2ce1f820706..92d0d2753e81 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -65,7 +65,7 @@
 	 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
 	 *			  int blocks)
 	 */
-ENTRY(sha1_ce_transform)
+SYM_FUNC_START(sha1_ce_transform)
 	frame_push	3
 
 	mov		x19, x0
@@ -160,4 +160,4 @@ CPU_LE(	rev32		v11.16b, v11.16b	)
 	str		dgb, [x19, #16]
 	frame_pop
 	ret
-ENDPROC(sha1_ce_transform)
+SYM_FUNC_END(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index bdc1b6d7aff7..63c875d3314b 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -28,6 +28,13 @@ struct sha1_ce_state {
 asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
 				  int blocks);
 
+static void __sha1_ce_transform(struct sha1_state *sst, u8 const *src,
+				int blocks)
+{
+	sha1_ce_transform(container_of(sst, struct sha1_ce_state, sst), src,
+			  blocks);
+}
+
 const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count);
 const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize);
 
@@ -41,8 +48,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
 
 	sctx->finalize = 0;
 	kernel_neon_begin();
-	sha1_base_do_update(desc, data, len,
-			    (sha1_block_fn *)sha1_ce_transform);
+	sha1_base_do_update(desc, data, len, __sha1_ce_transform);
 	kernel_neon_end();
 
 	return 0;
@@ -64,10 +70,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
 	sctx->finalize = finalize;
 
 	kernel_neon_begin();
-	sha1_base_do_update(desc, data, len,
-			    (sha1_block_fn *)sha1_ce_transform);
+	sha1_base_do_update(desc, data, len, __sha1_ce_transform);
 	if (!finalize)
-		sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
+		sha1_base_do_finalize(desc, __sha1_ce_transform);
 	kernel_neon_end();
 	return sha1_base_finish(desc, out);
 }
@@ -81,7 +86,7 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 
 	sctx->finalize = 0;
 	kernel_neon_begin();
-	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
+	sha1_base_do_finalize(desc, __sha1_ce_transform);
 	kernel_neon_end();
 	return sha1_base_finish(desc, out);
 }
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 6f728a419009..3f9d0f326987 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -75,7 +75,7 @@
 	 *			  int blocks)
 	 */
 	.text
-ENTRY(sha2_ce_transform)
+SYM_FUNC_START(sha2_ce_transform)
 	frame_push	3
 
 	mov		x19, x0
@@ -166,4 +166,4 @@ CPU_LE(	rev32		v19.16b, v19.16b	)
 4:	st1		{dgav.4s, dgbv.4s}, [x19]
 	frame_pop
 	ret
-ENDPROC(sha2_ce_transform)
+SYM_FUNC_END(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 604a01a4ede6..a8e67bafba3d 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -28,6 +28,13 @@ struct sha256_ce_state {
 asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
 				  int blocks);
 
+static void __sha2_ce_transform(struct sha256_state *sst, u8 const *src,
+				int blocks)
+{
+	sha2_ce_transform(container_of(sst, struct sha256_ce_state, sst), src,
+			  blocks);
+}
+
 const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
 					      sst.count);
 const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
@@ -35,6 +42,12 @@ const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
 
 asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks);
 
+static void __sha256_block_data_order(struct sha256_state *sst, u8 const *src,
+				      int blocks)
+{
+	sha256_block_data_order(sst->state, src, blocks);
+}
+
 static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int len)
 {
@@ -42,12 +55,11 @@ static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
 
 	if (!crypto_simd_usable())
 		return sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_data_order);
+				__sha256_block_data_order);
 
 	sctx->finalize = 0;
 	kernel_neon_begin();
-	sha256_base_do_update(desc, data, len,
-			      (sha256_block_fn *)sha2_ce_transform);
+	sha256_base_do_update(desc, data, len, __sha2_ce_transform);
 	kernel_neon_end();
 
 	return 0;
@@ -62,9 +74,8 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 	if (!crypto_simd_usable()) {
 		if (len)
 			sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_data_order);
-		sha256_base_do_finalize(desc,
-				(sha256_block_fn *)sha256_block_data_order);
+				__sha256_block_data_order);
+		sha256_base_do_finalize(desc, __sha256_block_data_order);
 		return sha256_base_finish(desc, out);
 	}
 
@@ -75,11 +86,9 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 	sctx->finalize = finalize;
 
 	kernel_neon_begin();
-	sha256_base_do_update(desc, data, len,
-			      (sha256_block_fn *)sha2_ce_transform);
+	sha256_base_do_update(desc, data, len, __sha2_ce_transform);
 	if (!finalize)
-		sha256_base_do_finalize(desc,
-					(sha256_block_fn *)sha2_ce_transform);
+		sha256_base_do_finalize(desc, __sha2_ce_transform);
 	kernel_neon_end();
 	return sha256_base_finish(desc, out);
 }
@@ -89,14 +98,13 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out)
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
 	if (!crypto_simd_usable()) {
-		sha256_base_do_finalize(desc,
-				(sha256_block_fn *)sha256_block_data_order);
+		sha256_base_do_finalize(desc, __sha256_block_data_order);
 		return sha256_base_finish(desc, out);
 	}
 
 	sctx->finalize = 0;
 	kernel_neon_begin();
-	sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
+	sha256_base_do_finalize(desc, __sha2_ce_transform);
 	kernel_neon_end();
 	return sha256_base_finish(desc, out);
 }
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index e273faca924f..ddf4a0d85c1c 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -27,14 +27,26 @@ asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
 					unsigned int num_blks);
 EXPORT_SYMBOL(sha256_block_data_order);
 
+static void __sha256_block_data_order(struct sha256_state *sst, u8 const *src,
+				      int blocks)
+{
+	sha256_block_data_order(sst->state, src, blocks);
+}
+
 asmlinkage void sha256_block_neon(u32 *digest, const void *data,
 				  unsigned int num_blks);
 
+static void __sha256_block_neon(struct sha256_state *sst, u8 const *src,
+				int blocks)
+{
+	sha256_block_neon(sst->state, src, blocks);
+}
+
 static int crypto_sha256_arm64_update(struct shash_desc *desc, const u8 *data,
 				      unsigned int len)
 {
 	return sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_data_order);
+				     __sha256_block_data_order);
 }
 
 static int crypto_sha256_arm64_finup(struct shash_desc *desc, const u8 *data,
@@ -42,9 +54,8 @@ static int crypto_sha256_arm64_finup(struct shash_desc *desc, const u8 *data,
 {
 	if (len)
 		sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_data_order);
-	sha256_base_do_finalize(desc,
-				(sha256_block_fn *)sha256_block_data_order);
+				      __sha256_block_data_order);
+	sha256_base_do_finalize(desc, __sha256_block_data_order);
 
 	return sha256_base_finish(desc, out);
 }
@@ -87,7 +98,7 @@ static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
 
 	if (!crypto_simd_usable())
 		return sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_data_order);
+				__sha256_block_data_order);
 
 	while (len > 0) {
 		unsigned int chunk = len;
@@ -97,14 +108,13 @@ static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
 		 * input when running on a preemptible kernel, but process the
 		 * data block by block instead.
 		 */
-		if (IS_ENABLED(CONFIG_PREEMPT) &&
+		if (IS_ENABLED(CONFIG_PREEMPTION) &&
 		    chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE)
 			chunk = SHA256_BLOCK_SIZE -
 				sctx->count % SHA256_BLOCK_SIZE;
 
 		kernel_neon_begin();
-		sha256_base_do_update(desc, data, chunk,
-				      (sha256_block_fn *)sha256_block_neon);
+		sha256_base_do_update(desc, data, chunk, __sha256_block_neon);
 		kernel_neon_end();
 		data += chunk;
 		len -= chunk;
@@ -118,15 +128,13 @@ static int sha256_finup_neon(struct shash_desc *desc, const u8 *data,
 	if (!crypto_simd_usable()) {
 		if (len)
 			sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_data_order);
-		sha256_base_do_finalize(desc,
-				(sha256_block_fn *)sha256_block_data_order);
+				__sha256_block_data_order);
+		sha256_base_do_finalize(desc, __sha256_block_data_order);
 	} else {
 		if (len)
 			sha256_update_neon(desc, data, len);
 		kernel_neon_begin();
-		sha256_base_do_finalize(desc,
-				(sha256_block_fn *)sha256_block_neon);
+		sha256_base_do_finalize(desc, __sha256_block_neon);
 		kernel_neon_end();
 	}
 	return sha256_base_finish(desc, out);
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
index a7d587fa54f6..1cfb768df350 100644
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ b/arch/arm64/crypto/sha3-ce-core.S
@@ -40,7 +40,7 @@
 	 * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
 	 */
 	.text
-ENTRY(sha3_ce_transform)
+SYM_FUNC_START(sha3_ce_transform)
 	frame_push	4
 
 	mov	x19, x0
@@ -218,7 +218,7 @@ ENTRY(sha3_ce_transform)
 	st1	{v24.1d}, [x19]
 	frame_pop
 	ret
-ENDPROC(sha3_ce_transform)
+SYM_FUNC_END(sha3_ce_transform)
 
 	.section	".rodata", "a"
 	.align		8
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S
index ce65e3abe4f2..cde606c0323e 100644
--- a/arch/arm64/crypto/sha512-ce-core.S
+++ b/arch/arm64/crypto/sha512-ce-core.S
@@ -106,7 +106,7 @@
 	 *			  int blocks)
 	 */
 	.text
-ENTRY(sha512_ce_transform)
+SYM_FUNC_START(sha512_ce_transform)
 	frame_push	3
 
 	mov		x19, x0
@@ -216,4 +216,4 @@ CPU_LE(	rev64		v19.16b, v19.16b	)
 3:	st1		{v8.2d-v11.2d}, [x19]
 	frame_pop
 	ret
-ENDPROC(sha512_ce_transform)
+SYM_FUNC_END(sha512_ce_transform)
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index 2369540040aa..dc890a719f54 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -29,16 +29,21 @@ asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
 
 asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks);
 
+static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src,
+				      int blocks)
+{
+	sha512_block_data_order(sst->state, src, blocks);
+}
+
 static int sha512_ce_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int len)
 {
 	if (!crypto_simd_usable())
 		return sha512_base_do_update(desc, data, len,
-				(sha512_block_fn *)sha512_block_data_order);
+					     __sha512_block_data_order);
 
 	kernel_neon_begin();
-	sha512_base_do_update(desc, data, len,
-			      (sha512_block_fn *)sha512_ce_transform);
+	sha512_base_do_update(desc, data, len, sha512_ce_transform);
 	kernel_neon_end();
 
 	return 0;
@@ -50,16 +55,14 @@ static int sha512_ce_finup(struct shash_desc *desc, const u8 *data,
 	if (!crypto_simd_usable()) {
 		if (len)
 			sha512_base_do_update(desc, data, len,
-				(sha512_block_fn *)sha512_block_data_order);
-		sha512_base_do_finalize(desc,
-				(sha512_block_fn *)sha512_block_data_order);
+					      __sha512_block_data_order);
+		sha512_base_do_finalize(desc, __sha512_block_data_order);
 		return sha512_base_finish(desc, out);
 	}
 
 	kernel_neon_begin();
-	sha512_base_do_update(desc, data, len,
-			      (sha512_block_fn *)sha512_ce_transform);
-	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_ce_transform);
+	sha512_base_do_update(desc, data, len, sha512_ce_transform);
+	sha512_base_do_finalize(desc, sha512_ce_transform);
 	kernel_neon_end();
 	return sha512_base_finish(desc, out);
 }
@@ -67,13 +70,12 @@ static int sha512_ce_finup(struct shash_desc *desc, const u8 *data,
 static int sha512_ce_final(struct shash_desc *desc, u8 *out)
 {
 	if (!crypto_simd_usable()) {
-		sha512_base_do_finalize(desc,
-				(sha512_block_fn *)sha512_block_data_order);
+		sha512_base_do_finalize(desc, __sha512_block_data_order);
 		return sha512_base_finish(desc, out);
 	}
 
 	kernel_neon_begin();
-	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_ce_transform);
+	sha512_base_do_finalize(desc, sha512_ce_transform);
 	kernel_neon_end();
 	return sha512_base_finish(desc, out);
 }
diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c
index d915c656e5fe..78d3083de6b7 100644
--- a/arch/arm64/crypto/sha512-glue.c
+++ b/arch/arm64/crypto/sha512-glue.c
@@ -20,15 +20,21 @@ MODULE_LICENSE("GPL v2");
 MODULE_ALIAS_CRYPTO("sha384");
 MODULE_ALIAS_CRYPTO("sha512");
 
-asmlinkage void sha512_block_data_order(u32 *digest, const void *data,
+asmlinkage void sha512_block_data_order(u64 *digest, const void *data,
 					unsigned int num_blks);
 EXPORT_SYMBOL(sha512_block_data_order);
 
+static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src,
+				      int blocks)
+{
+	sha512_block_data_order(sst->state, src, blocks);
+}
+
 static int sha512_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
 {
 	return sha512_base_do_update(desc, data, len,
-			(sha512_block_fn *)sha512_block_data_order);
+				     __sha512_block_data_order);
 }
 
 static int sha512_finup(struct shash_desc *desc, const u8 *data,
@@ -36,9 +42,8 @@ static int sha512_finup(struct shash_desc *desc, const u8 *data,
 {
 	if (len)
 		sha512_base_do_update(desc, data, len,
-			(sha512_block_fn *)sha512_block_data_order);
-	sha512_base_do_finalize(desc,
-			(sha512_block_fn *)sha512_block_data_order);
+				      __sha512_block_data_order);
+	sha512_base_do_finalize(desc, __sha512_block_data_order);
 
 	return sha512_base_finish(desc, out);
 }
diff --git a/arch/arm64/crypto/sm3-ce-core.S b/arch/arm64/crypto/sm3-ce-core.S
index d50d187906cb..ef97d3187cb7 100644
--- a/arch/arm64/crypto/sm3-ce-core.S
+++ b/arch/arm64/crypto/sm3-ce-core.S
@@ -73,7 +73,7 @@
 	 *                       int blocks)
 	 */
 	.text
-ENTRY(sm3_ce_transform)
+SYM_FUNC_START(sm3_ce_transform)
 	/* load state */
 	ld1		{v8.4s-v9.4s}, [x0]
 	rev64		v8.4s, v8.4s
@@ -131,7 +131,7 @@ CPU_LE(	rev32		v3.16b, v3.16b		)
 	ext		v9.16b, v9.16b, v9.16b, #8
 	st1		{v8.4s-v9.4s}, [x0]
 	ret
-ENDPROC(sm3_ce_transform)
+SYM_FUNC_END(sm3_ce_transform)
 
 	.section	".rodata", "a"
 	.align		3
diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index af3bfbc3f4d4..4ac6cfbc5797 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -15,7 +15,7 @@
 	 * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
 	 */
 	.text
-ENTRY(sm4_ce_do_crypt)
+SYM_FUNC_START(sm4_ce_do_crypt)
 	ld1		{v8.4s}, [x2]
 	ld1		{v0.4s-v3.4s}, [x0], #64
 CPU_LE(	rev32		v8.16b, v8.16b		)
@@ -33,4 +33,4 @@ CPU_LE(	rev32		v8.16b, v8.16b		)
 CPU_LE(	rev32		v8.16b, v8.16b		)
 	st1		{v8.4s}, [x1]
 	ret
-ENDPROC(sm4_ce_do_crypt)
+SYM_FUNC_END(sm4_ce_do_crypt)
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 98a5405c8558..bd23f87d6c55 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += mmiowb.h
-generic-y += msi.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
 generic-y += serial.h
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index b9f8d787eea9..324e7d5ab37e 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -35,13 +35,16 @@ void apply_alternatives_module(void *start, size_t length);
 static inline void apply_alternatives_module(void *start, size_t length) { }
 #endif
 
-#define ALTINSTR_ENTRY(feature,cb)					      \
+#define ALTINSTR_ENTRY(feature)					              \
 	" .word 661b - .\n"				/* label           */ \
-	" .if " __stringify(cb) " == 0\n"				      \
 	" .word 663f - .\n"				/* new instruction */ \
-	" .else\n"							      \
+	" .hword " __stringify(feature) "\n"		/* feature bit     */ \
+	" .byte 662b-661b\n"				/* source len      */ \
+	" .byte 664f-663f\n"				/* replacement len */
+
+#define ALTINSTR_ENTRY_CB(feature, cb)					      \
+	" .word 661b - .\n"				/* label           */ \
 	" .word " __stringify(cb) "- .\n"		/* callback */	      \
-	" .endif\n"							      \
 	" .hword " __stringify(feature) "\n"		/* feature bit     */ \
 	" .byte 662b-661b\n"				/* source len      */ \
 	" .byte 664f-663f\n"				/* replacement len */
@@ -62,15 +65,14 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
  *
  * Alternatives with callbacks do not generate replacement instructions.
  */
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb)	\
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled)	\
 	".if "__stringify(cfg_enabled)" == 1\n"				\
 	"661:\n\t"							\
 	oldinstr "\n"							\
 	"662:\n"							\
 	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY(feature,cb)					\
+	ALTINSTR_ENTRY(feature)						\
 	".popsection\n"							\
-	" .if " __stringify(cb) " == 0\n"				\
 	".pushsection .altinstr_replacement, \"a\"\n"			\
 	"663:\n\t"							\
 	newinstr "\n"							\
@@ -78,17 +80,25 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
 	".popsection\n\t"						\
 	".org	. - (664b-663b) + (662b-661b)\n\t"			\
 	".org	. - (662b-661b) + (664b-663b)\n"			\
-	".else\n\t"							\
+	".endif\n"
+
+#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb)	\
+	".if "__stringify(cfg_enabled)" == 1\n"				\
+	"661:\n\t"							\
+	oldinstr "\n"							\
+	"662:\n"							\
+	".pushsection .altinstructions,\"a\"\n"				\
+	ALTINSTR_ENTRY_CB(feature, cb)					\
+	".popsection\n"							\
 	"663:\n\t"							\
 	"664:\n\t"							\
-	".endif\n"							\
 	".endif\n"
 
 #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)	\
-	__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
+	__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
 
 #define ALTERNATIVE_CB(oldinstr, cb) \
-	__ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
+	__ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
 #else
 
 #include <asm/assembler.h>
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 89e4c8b79349..4750fc8030c3 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -141,6 +141,7 @@ static inline u32 gic_read_rpr(void)
 #define gicr_read_pendbaser(c)		readq_relaxed(c)
 
 #define gits_write_vpropbaser(v, c)	writeq_relaxed(v, c)
+#define gits_read_vpropbaser(c)		readq_relaxed(c)
 
 #define gits_write_vpendbaser(v, c)	writeq_relaxed(v, c)
 #define gits_read_vpendbaser(c)		readq_relaxed(c)
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
new file mode 100644
index 000000000000..3fe02da70004
--- /dev/null
+++ b/arch/arm64/include/asm/archrandom.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARCHRANDOM_H
+#define _ASM_ARCHRANDOM_H
+
+#ifdef CONFIG_ARCH_RANDOM
+
+#include <linux/random.h>
+#include <asm/cpufeature.h>
+
+static inline bool __arm64_rndr(unsigned long *v)
+{
+	bool ok;
+
+	/*
+	 * Reads of RNDR set PSTATE.NZCV to 0b0000 on success,
+	 * and set PSTATE.NZCV to 0b0100 otherwise.
+	 */
+	asm volatile(
+		__mrs_s("%0", SYS_RNDR_EL0) "\n"
+	"	cset %w1, ne\n"
+	: "=r" (*v), "=r" (ok)
+	:
+	: "cc");
+
+	return ok;
+}
+
+static inline bool __must_check arch_get_random_long(unsigned long *v)
+{
+	return false;
+}
+
+static inline bool __must_check arch_get_random_int(unsigned int *v)
+{
+	return false;
+}
+
+static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
+{
+	/*
+	 * Only support the generic interface after we have detected
+	 * the system wide capability, avoiding complexity with the
+	 * cpufeature code and with potential scheduling between CPUs
+	 * with and without the feature.
+	 */
+	if (!cpus_have_const_cap(ARM64_HAS_RNG))
+		return false;
+
+	return __arm64_rndr(v);
+}
+
+
+static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+{
+	unsigned long val;
+	bool ok = arch_get_random_seed_long(&val);
+
+	*v = val;
+	return ok;
+}
+
+static inline bool __init __early_cpu_has_rndr(void)
+{
+	/* Open code as we run prior to the first call to cpufeature. */
+	unsigned long ftr = read_sysreg_s(SYS_ID_AA64ISAR0_EL1);
+	return (ftr >> ID_AA64ISAR0_RNDR_SHIFT) & 0xf;
+}
+
+#else
+
+static inline bool __arm64_rndr(unsigned long *v) { return false; }
+static inline bool __init __early_cpu_has_rndr(void) { return false; }
+
+#endif /* CONFIG_ARCH_RANDOM */
+#endif /* _ASM_ARCHRANDOM_H */
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index c764cc8fb3b6..f68a0e64482a 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -58,12 +58,4 @@ alternative_else_nop_endif
 	.endm
 #endif
 
-/*
- * Remove the address tag from a virtual address, if present.
- */
-	.macro	untagged_addr, dst, addr
-	sbfx	\dst, \addr, #0, #56
-	and	\dst, \dst, \addr
-	.endm
-
 #endif
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index b8cf7c85ffa2..aca337d79d12 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -40,12 +40,6 @@
 	msr	daif, \flags
 	.endm
 
-	/* Only on aarch64 pstate, PSR_D_BIT is different for aarch32 */
-	.macro	inherit_daif, pstate:req, tmp:req
-	and	\tmp, \pstate, #(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
-	msr	daif, \tmp
-	.endm
-
 	/* IRQ is the lowest priority flag, unconditionally unmask the rest. */
 	.macro enable_da_f
 	msr	daifclr, #(8 | 4 | 1)
@@ -86,13 +80,6 @@
 	.endm
 
 /*
- * SMP data memory barrier
- */
-	.macro	smp_dmb, opt
-	dmb	\opt
-	.endm
-
-/*
  * RAS Error Synchronization barrier
  */
 	.macro  esb
@@ -462,17 +449,6 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 	.endm
 
 /*
- * Annotate a function as position independent, i.e., safe to be called before
- * the kernel virtual mapping is activated.
- */
-#define ENDPIPROC(x)			\
-	.globl	__pi_##x;		\
-	.type 	__pi_##x, %function;	\
-	.set	__pi_##x, x;		\
-	.size	__pi_##x, . - x;	\
-	ENDPROC(x)
-
-/*
  * Annotate a function as being unsuitable for kprobes.
  */
 #ifdef CONFIG_KPROBES
@@ -699,8 +675,8 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
  * where <label> is optional, and marks the point where execution will resume
  * after a yield has been performed. If omitted, execution resumes right after
  * the endif_yield_neon invocation. Note that the entire sequence, including
- * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT
- * is not defined.
+ * the provided patchup code, will be omitted from the image if
+ * CONFIG_PREEMPTION is not defined.
  *
  * As a convenience, in the case where no patchup code is required, the above
  * sequence may be abbreviated to
@@ -728,7 +704,7 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 	.endm
 
 	.macro		if_will_cond_yield_neon
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	get_current_task	x0
 	ldr		x0, [x0, #TSK_TI_PREEMPT]
 	sub		x0, x0, #PREEMPT_DISABLE_OFFSET
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 7b012148bfd6..13869b76b58c 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -12,7 +12,7 @@
 
 #include <linux/stringify.h>
 
-#if IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) && IS_ENABLED(CONFIG_AS_LSE)
+#ifdef CONFIG_ARM64_LSE_ATOMICS
 #define __LL_SC_FALLBACK(asm_ops)					\
 "	b	3f\n"							\
 "	.subsection	1\n"						\
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 574808b9df4c..da3280f639cd 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -14,6 +14,7 @@
 static inline void __lse_atomic_##op(int i, atomic_t *v)			\
 {									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 "	" #asm_op "	%w[i], %[v]\n"					\
 	: [i] "+r" (i), [v] "+Q" (v->counter)				\
 	: "r" (v));							\
@@ -30,6 +31,7 @@ ATOMIC_OP(add, stadd)
 static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v)	\
 {									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 "	" #asm_op #mb "	%w[i], %w[i], %[v]"				\
 	: [i] "+r" (i), [v] "+Q" (v->counter)				\
 	: "r" (v)							\
@@ -58,6 +60,7 @@ static inline int __lse_atomic_add_return##name(int i, atomic_t *v)	\
 	u32 tmp;							\
 									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 	"	ldadd" #mb "	%w[i], %w[tmp], %[v]\n"			\
 	"	add	%w[i], %w[i], %w[tmp]"				\
 	: [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp)	\
@@ -77,6 +80,7 @@ ATOMIC_OP_ADD_RETURN(        , al, "memory")
 static inline void __lse_atomic_and(int i, atomic_t *v)
 {
 	asm volatile(
+	__LSE_PREAMBLE
 	"	mvn	%w[i], %w[i]\n"
 	"	stclr	%w[i], %[v]"
 	: [i] "+&r" (i), [v] "+Q" (v->counter)
@@ -87,6 +91,7 @@ static inline void __lse_atomic_and(int i, atomic_t *v)
 static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v)	\
 {									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 	"	mvn	%w[i], %w[i]\n"					\
 	"	ldclr" #mb "	%w[i], %w[i], %[v]"			\
 	: [i] "+&r" (i), [v] "+Q" (v->counter)				\
@@ -106,6 +111,7 @@ ATOMIC_FETCH_OP_AND(        , al, "memory")
 static inline void __lse_atomic_sub(int i, atomic_t *v)
 {
 	asm volatile(
+	__LSE_PREAMBLE
 	"	neg	%w[i], %w[i]\n"
 	"	stadd	%w[i], %[v]"
 	: [i] "+&r" (i), [v] "+Q" (v->counter)
@@ -118,6 +124,7 @@ static inline int __lse_atomic_sub_return##name(int i, atomic_t *v)	\
 	u32 tmp;							\
 									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 	"	neg	%w[i], %w[i]\n"					\
 	"	ldadd" #mb "	%w[i], %w[tmp], %[v]\n"			\
 	"	add	%w[i], %w[i], %w[tmp]"				\
@@ -139,6 +146,7 @@ ATOMIC_OP_SUB_RETURN(        , al, "memory")
 static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v)	\
 {									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 	"	neg	%w[i], %w[i]\n"					\
 	"	ldadd" #mb "	%w[i], %w[i], %[v]"			\
 	: [i] "+&r" (i), [v] "+Q" (v->counter)				\
@@ -159,6 +167,7 @@ ATOMIC_FETCH_OP_SUB(        , al, "memory")
 static inline void __lse_atomic64_##op(s64 i, atomic64_t *v)		\
 {									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 "	" #asm_op "	%[i], %[v]\n"					\
 	: [i] "+r" (i), [v] "+Q" (v->counter)				\
 	: "r" (v));							\
@@ -175,6 +184,7 @@ ATOMIC64_OP(add, stadd)
 static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\
 {									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 "	" #asm_op #mb "	%[i], %[i], %[v]"				\
 	: [i] "+r" (i), [v] "+Q" (v->counter)				\
 	: "r" (v)							\
@@ -203,6 +213,7 @@ static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\
 	unsigned long tmp;						\
 									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 	"	ldadd" #mb "	%[i], %x[tmp], %[v]\n"			\
 	"	add	%[i], %[i], %x[tmp]"				\
 	: [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp)	\
@@ -222,6 +233,7 @@ ATOMIC64_OP_ADD_RETURN(        , al, "memory")
 static inline void __lse_atomic64_and(s64 i, atomic64_t *v)
 {
 	asm volatile(
+	__LSE_PREAMBLE
 	"	mvn	%[i], %[i]\n"
 	"	stclr	%[i], %[v]"
 	: [i] "+&r" (i), [v] "+Q" (v->counter)
@@ -232,6 +244,7 @@ static inline void __lse_atomic64_and(s64 i, atomic64_t *v)
 static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v)	\
 {									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 	"	mvn	%[i], %[i]\n"					\
 	"	ldclr" #mb "	%[i], %[i], %[v]"			\
 	: [i] "+&r" (i), [v] "+Q" (v->counter)				\
@@ -251,6 +264,7 @@ ATOMIC64_FETCH_OP_AND(        , al, "memory")
 static inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
 {
 	asm volatile(
+	__LSE_PREAMBLE
 	"	neg	%[i], %[i]\n"
 	"	stadd	%[i], %[v]"
 	: [i] "+&r" (i), [v] "+Q" (v->counter)
@@ -263,6 +277,7 @@ static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)	\
 	unsigned long tmp;						\
 									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 	"	neg	%[i], %[i]\n"					\
 	"	ldadd" #mb "	%[i], %x[tmp], %[v]\n"			\
 	"	add	%[i], %[i], %x[tmp]"				\
@@ -284,6 +299,7 @@ ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v)	\
 {									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 	"	neg	%[i], %[i]\n"					\
 	"	ldadd" #mb "	%[i], %[i], %[v]"			\
 	: [i] "+&r" (i), [v] "+Q" (v->counter)				\
@@ -305,6 +321,7 @@ static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile(
+	__LSE_PREAMBLE
 	"1:	ldr	%x[tmp], %[v]\n"
 	"	subs	%[ret], %x[tmp], #1\n"
 	"	b.lt	2f\n"
@@ -332,6 +349,7 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr,			\
 	unsigned long tmp;						\
 									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 	"	mov	%" #w "[tmp], %" #w "[old]\n"			\
 	"	cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n"	\
 	"	mov	%" #w "[ret], %" #w "[tmp]"			\
@@ -379,6 +397,7 @@ __lse__cmpxchg_double##name(unsigned long old1,				\
 	register unsigned long x4 asm ("x4") = (unsigned long)ptr;	\
 									\
 	asm volatile(							\
+	__LSE_PREAMBLE							\
 	"	casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
 	"	eor	%[old1], %[old1], %[oldval1]\n"			\
 	"	eor	%[old2], %[old2], %[oldval2]\n"			\
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index e0e2b1946f42..7d9cc5ec4971 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -29,6 +29,18 @@
 						 SB_BARRIER_INSN"nop\n",	\
 						 ARM64_HAS_SB))
 
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+#define pmr_sync()						\
+	do {							\
+		extern struct static_key_false gic_pmr_sync;	\
+								\
+		if (static_branch_unlikely(&gic_pmr_sync))	\
+			dsb(sy);				\
+	} while(0)
+#else
+#define pmr_sync()	do {} while (0)
+#endif
+
 #define mb()		dsb(sy)
 #define rmb()		dsb(ld)
 #define wmb()		dsb(st)
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 43da6dd29592..806e9dc2a852 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -11,6 +11,7 @@
 #define CTR_L1IP_MASK		3
 #define CTR_DMINLINE_SHIFT	16
 #define CTR_IMINLINE_SHIFT	0
+#define CTR_IMINLINE_MASK	0xf
 #define CTR_ERG_SHIFT		20
 #define CTR_CWG_SHIFT		24
 #define CTR_CWG_MASK		15
@@ -18,7 +19,7 @@
 #define CTR_DIC_SHIFT		29
 
 #define CTR_CACHE_MINLINE_MASK	\
-	(0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT)
+	(0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT)
 
 #define CTR_L1IP(ctr)		(((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
 
diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h
index d064a50deb5f..8d2a7de39744 100644
--- a/arch/arm64/include/asm/checksum.h
+++ b/arch/arm64/include/asm/checksum.h
@@ -35,6 +35,9 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 }
 #define ip_fast_csum ip_fast_csum
 
+extern unsigned int do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+
 #include <asm-generic/checksum.h>
 
 #endif	/* __ASM_CHECKSUM_H */
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index b0d53a265f1d..935d2aa231bf 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -4,6 +4,9 @@
  */
 #ifndef __ASM_COMPAT_H
 #define __ASM_COMPAT_H
+
+#include <asm-generic/compat.h>
+
 #ifdef CONFIG_COMPAT
 
 /*
@@ -13,8 +16,6 @@
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 
-#include <asm-generic/compat.h>
-
 #define COMPAT_USER_HZ		100
 #ifdef __AARCH64EB__
 #define COMPAT_UTS_MACHINE	"armv8b\0\0"
@@ -113,23 +114,6 @@ typedef u32		compat_sigset_word;
 
 #define COMPAT_OFF_T_MAX	0x7fffffff
 
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
-	return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
-	return (u32)(unsigned long)uptr;
-}
-
 #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
 #define COMPAT_MINSIGSTKSZ	2048
 
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index d72d995b7e25..b4a40535a3d8 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -39,6 +39,7 @@ struct cpuinfo_arm64 {
 	u32		reg_id_isar3;
 	u32		reg_id_isar4;
 	u32		reg_id_isar5;
+	u32		reg_id_isar6;
 	u32		reg_id_mmfr0;
 	u32		reg_id_mmfr1;
 	u32		reg_id_mmfr2;
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index ac1dbca3d0cd..865e0253fc1e 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -44,7 +44,7 @@
 #define ARM64_SSBS				34
 #define ARM64_WORKAROUND_1418040		35
 #define ARM64_HAS_SB				36
-#define ARM64_WORKAROUND_1165522		37
+#define ARM64_WORKAROUND_SPECULATIVE_AT_VHE	37
 #define ARM64_HAS_ADDRESS_AUTH_ARCH		38
 #define ARM64_HAS_ADDRESS_AUTH_IMP_DEF		39
 #define ARM64_HAS_GENERIC_AUTH_ARCH		40
@@ -54,7 +54,11 @@
 #define ARM64_WORKAROUND_1463225		44
 #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM	45
 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM	46
+#define ARM64_WORKAROUND_1542419		47
+#define ARM64_WORKAROUND_SPECULATIVE_AT_NVHE	48
+#define ARM64_HAS_E0PD				49
+#define ARM64_HAS_RNG				50
 
-#define ARM64_NCAPS				47
+#define ARM64_NCAPS				51
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9cde5d2e768f..92ef9539874a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -613,6 +613,11 @@ static inline bool system_has_prio_mask_debugging(void)
 	       system_uses_irq_prio_masking();
 }
 
+static inline bool system_capabilities_finalized(void)
+{
+	return static_branch_likely(&arm64_const_caps_ready);
+}
+
 #define ARM64_BP_HARDEN_UNKNOWN		-1
 #define ARM64_BP_HARDEN_WA_NEEDED	0
 #define ARM64_BP_HARDEN_NOT_REQUIRED	1
@@ -659,6 +664,20 @@ static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
 	default: return CONFIG_ARM64_PA_BITS;
 	}
 }
+
+/* Check whether hardware update of the Access flag is supported */
+static inline bool cpu_has_hw_af(void)
+{
+	u64 mmfr1;
+
+	if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM))
+		return false;
+
+	mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+	return cpuid_feature_extract_unsigned_field(mmfr1,
+						ID_AA64MMFR1_HADBS_SHIFT);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index aca07c2f6e6e..a87a93f67671 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -85,6 +85,8 @@
 #define QCOM_CPU_PART_FALKOR_V1		0x800
 #define QCOM_CPU_PART_FALKOR		0xC00
 #define QCOM_CPU_PART_KRYO		0x200
+#define QCOM_CPU_PART_KRYO_3XX_SILVER	0x803
+#define QCOM_CPU_PART_KRYO_4XX_SILVER	0x805
 
 #define NVIDIA_CPU_PART_DENVER		0x003
 #define NVIDIA_CPU_PART_CARMEL		0x004
@@ -111,6 +113,8 @@
 #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
 #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
 #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
+#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
+#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
 #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
 #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
 #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 063c964af705..ec213b4a1650 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -8,7 +8,9 @@
 #include <linux/irqflags.h>
 
 #include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
 #include <asm/cpufeature.h>
+#include <asm/ptrace.h>
 
 #define DAIF_PROCCTX		0
 #define DAIF_PROCCTX_NOIRQ	PSR_I_BIT
@@ -36,7 +38,7 @@ static inline void local_daif_mask(void)
 	trace_hardirqs_off();
 }
 
-static inline unsigned long local_daif_save(void)
+static inline unsigned long local_daif_save_flags(void)
 {
 	unsigned long flags;
 
@@ -48,6 +50,15 @@ static inline unsigned long local_daif_save(void)
 			flags |= PSR_I_BIT;
 	}
 
+	return flags;
+}
+
+static inline unsigned long local_daif_save(void)
+{
+	unsigned long flags;
+
+	flags = local_daif_save_flags();
+
 	local_daif_mask();
 
 	return flags;
@@ -65,7 +76,7 @@ static inline void local_daif_restore(unsigned long flags)
 
 		if (system_uses_irq_prio_masking()) {
 			gic_write_pmr(GIC_PRIO_IRQON);
-			dsb(sy);
+			pmr_sync();
 		}
 	} else if (system_uses_irq_prio_masking()) {
 		u64 pmr;
@@ -109,4 +120,19 @@ static inline void local_daif_restore(unsigned long flags)
 		trace_hardirqs_off();
 }
 
+/*
+ * Called by synchronous exception handlers to restore the DAIF bits that were
+ * modified by taking an exception.
+ */
+static inline void local_daif_inherit(struct pt_regs *regs)
+{
+	unsigned long flags = regs->pstate & DAIF_MASK;
+
+	/*
+	 * We can't use local_daif_restore(regs->pstate) here as
+	 * system_has_prio_mask_debugging() won't restore the I bit if it can
+	 * use the pmr instead.
+	 */
+	write_sysreg(flags, daif);
+}
 #endif
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index b54d3a86c444..44531a69d32b 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -93,21 +93,17 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base,
 	return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1));
 }
 
-#define efi_call_early(f, ...)		sys_table_arg->boottime->f(__VA_ARGS__)
-#define __efi_call_early(f, ...)	f(__VA_ARGS__)
-#define efi_call_runtime(f, ...)	sys_table_arg->runtime->f(__VA_ARGS__)
-#define efi_is_64bit()			(true)
+#define efi_bs_call(func, ...)	efi_system_table()->boottime->func(__VA_ARGS__)
+#define efi_rt_call(func, ...)	efi_system_table()->runtime->func(__VA_ARGS__)
+#define efi_is_native()		(true)
 
-#define efi_table_attr(table, attr, instance)				\
-	((table##_t *)instance)->attr
+#define efi_table_attr(inst, attr)	(inst->attr)
 
-#define efi_call_proto(protocol, f, instance, ...)			\
-	((protocol##_t *)instance)->f(instance, ##__VA_ARGS__)
+#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__)
 
 #define alloc_screen_info(x...)		&screen_info
 
-static inline void free_screen_info(efi_system_table_t *sys_table_arg,
-				    struct screen_info *si)
+static inline void free_screen_info(struct screen_info *si)
 {
 }
 
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index a17393ff6677..b87c6e276ab1 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -8,14 +8,15 @@
 #define __ASM_EXCEPTION_H
 
 #include <asm/esr.h>
+#include <asm/kprobes.h>
+#include <asm/ptrace.h>
 
 #include <linux/interrupt.h>
 
-#define __exception	__attribute__((section(".exception.text")))
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 #define __exception_irq_entry	__irq_entry
 #else
-#define __exception_irq_entry	__exception
+#define __exception_irq_entry	__kprobes
 #endif
 
 static inline u32 disr_to_esr(u64 disr)
@@ -31,5 +32,22 @@ static inline u32 disr_to_esr(u64 disr)
 }
 
 asmlinkage void enter_from_user_mode(void);
+void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void do_undefinstr(struct pt_regs *regs);
+asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+			struct pt_regs *regs);
+void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
+void do_sve_acc(unsigned int esr, struct pt_regs *regs);
+void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
+void do_sysinstr(unsigned int esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
+void do_cp15instr(unsigned int esr, struct pt_regs *regs);
+void do_el0_svc(struct pt_regs *regs);
+void do_el0_svc_compat(struct pt_regs *regs);
+void do_el0_ia_bp_hardening(unsigned long addr,  unsigned int esr,
+			    struct pt_regs *regs);
 
 #endif	/* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index d48667b04c41..91fa4baa1a93 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -11,9 +11,20 @@
 #include <asm/insn.h>
 
 #define HAVE_FUNCTION_GRAPH_FP_TEST
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#else
 #define MCOUNT_ADDR		((unsigned long)_mcount)
+#endif
+
+/* The BL at the callsite's adjusted rec->ip */
 #define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
 
+#define FTRACE_PLT_IDX		0
+#define FTRACE_REGS_PLT_IDX	1
+#define NR_FTRACE_PLTS		2
+
 /*
  * Currently, gcc tends to save the link register after the local variables
  * on the stack. This causes the max stack tracer to report the function
@@ -44,12 +55,24 @@ extern void return_to_handler(void);
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
 	/*
+	 * Adjust addr to point at the BL in the callsite.
+	 * See ftrace_init_nop() for the callsite sequence.
+	 */
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		return addr + AARCH64_INSN_SIZE;
+	/*
 	 * addr is the address of the mcount call instruction.
 	 * recordmcount does the necessary offset calculation.
 	 */
 	return addr;
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+struct dyn_ftrace;
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+#define ftrace_init_nop ftrace_init_nop
+#endif
+
 #define ftrace_return_address(n) return_address(n)
 
 /*
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 3d2f2472a36c..0f00265248b5 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -86,6 +86,14 @@
 #define KERNEL_HWCAP_SVESM4		__khwcap2_feature(SVESM4)
 #define KERNEL_HWCAP_FLAGM2		__khwcap2_feature(FLAGM2)
 #define KERNEL_HWCAP_FRINT		__khwcap2_feature(FRINT)
+#define KERNEL_HWCAP_SVEI8MM		__khwcap2_feature(SVEI8MM)
+#define KERNEL_HWCAP_SVEF32MM		__khwcap2_feature(SVEF32MM)
+#define KERNEL_HWCAP_SVEF64MM		__khwcap2_feature(SVEF64MM)
+#define KERNEL_HWCAP_SVEBF16		__khwcap2_feature(SVEBF16)
+#define KERNEL_HWCAP_I8MM		__khwcap2_feature(I8MM)
+#define KERNEL_HWCAP_BF16		__khwcap2_feature(BF16)
+#define KERNEL_HWCAP_DGH		__khwcap2_feature(DGH)
+#define KERNEL_HWCAP_RNG		__khwcap2_feature(RNG)
 
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 39e7780bedd6..bb313dde58a4 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -440,6 +440,9 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
 					 int shift,
 					 enum aarch64_insn_variant variant,
 					 enum aarch64_insn_logic_type type);
+u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
+			      enum aarch64_insn_register src,
+			      enum aarch64_insn_variant variant);
 u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
 				       enum aarch64_insn_variant variant,
 				       enum aarch64_insn_register Rn,
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 323cb306bd28..4e531f57147d 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -167,9 +167,7 @@ extern void iounmap(volatile void __iomem *addr);
 extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 
 #define ioremap(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
-#define ioremap_nocache(addr, size)	__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
 #define ioremap_wc(addr, size)		__ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
-#define ioremap_wt(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
 
 /*
  * PCI configuration space mapping function.
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 1a59f0ed1ae3..aa4b6521ef14 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -6,6 +6,7 @@
 #define __ASM_IRQFLAGS_H
 
 #include <asm/alternative.h>
+#include <asm/barrier.h>
 #include <asm/ptrace.h>
 #include <asm/sysreg.h>
 
@@ -34,14 +35,14 @@ static inline void arch_local_irq_enable(void)
 	}
 
 	asm volatile(ALTERNATIVE(
-		"msr	daifclr, #2		// arch_local_irq_enable\n"
-		"nop",
-		__msr_s(SYS_ICC_PMR_EL1, "%0")
-		"dsb	sy",
+		"msr	daifclr, #2		// arch_local_irq_enable",
+		__msr_s(SYS_ICC_PMR_EL1, "%0"),
 		ARM64_HAS_IRQ_PRIO_MASKING)
 		:
 		: "r" ((unsigned long) GIC_PRIO_IRQON)
 		: "memory");
+
+	pmr_sync();
 }
 
 static inline void arch_local_irq_disable(void)
@@ -116,14 +117,14 @@ static inline unsigned long arch_local_irq_save(void)
 static inline void arch_local_irq_restore(unsigned long flags)
 {
 	asm volatile(ALTERNATIVE(
-			"msr	daif, %0\n"
-			"nop",
-			__msr_s(SYS_ICC_PMR_EL1, "%0")
-			"dsb	sy",
-			ARM64_HAS_IRQ_PRIO_MASKING)
+		"msr	daif, %0",
+		__msr_s(SYS_ICC_PMR_EL1, "%0"),
+		ARM64_HAS_IRQ_PRIO_MASKING)
 		:
 		: "r" (flags)
 		: "memory");
+
+	pmr_sync();
 }
 
 #endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 12a561a54128..d24b527e8c00 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -96,6 +96,10 @@ static inline void crash_post_resume(void) {}
 struct kimage_arch {
 	void *dtb;
 	unsigned long dtb_mem;
+	/* Core ELF header buffer */
+	void *elf_headers;
+	unsigned long elf_headers_mem;
+	unsigned long elf_headers_sz;
 };
 
 extern const struct kexec_file_ops kexec_image_ops;
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index ddf9d762ac62..6e5d839f42b5 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -61,7 +61,6 @@
  * RW:		64bit by default, can be overridden for 32bit VMs
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
- * TVM:		Trap VM ops (until M+C set in SCTLR_EL1)
  * TSW:		Trap cache operations by set/way
  * TWE:		Trap WFE
  * TWI:		Trap WFI
@@ -74,7 +73,7 @@
  * SWIO:	Turn set/way invalidates into set/way clean+invalidate
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
-			 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
+			 HCR_BSU_IS | HCR_FB | HCR_TAC | \
 			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
 			 HCR_FMO | HCR_IMO)
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index d69c1efc63e7..5efe5ca8fecf 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -53,8 +53,18 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 		/* trap error record accesses */
 		vcpu->arch.hcr_el2 |= HCR_TERR;
 	}
-	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+
+	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
 		vcpu->arch.hcr_el2 |= HCR_FWB;
+	} else {
+		/*
+		 * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
+		 * get set in SCTLR_EL1 such that we can detect when the guest
+		 * MMU gets turned on and do the necessary cache maintenance
+		 * then.
+		 */
+		vcpu->arch.hcr_el2 |= HCR_TVM;
+	}
 
 	if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
 		vcpu->arch.hcr_el2 &= ~HCR_RW;
@@ -77,14 +87,19 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
 	return (unsigned long *)&vcpu->arch.hcr_el2;
 }
 
-static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hcr_el2 &= ~HCR_TWE;
+	if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count))
+		vcpu->arch.hcr_el2 &= ~HCR_TWI;
+	else
+		vcpu->arch.hcr_el2 |= HCR_TWI;
 }
 
-static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hcr_el2 |= HCR_TWE;
+	vcpu->arch.hcr_el2 |= HCR_TWI;
 }
 
 static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
@@ -258,6 +273,11 @@ static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
 	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
 }
 
+static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC);
+}
+
 static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
 {
 	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f656169db8c3..f5acdde17f3b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -44,6 +44,7 @@
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
 #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
 
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
@@ -83,6 +84,14 @@ struct kvm_arch {
 
 	/* Mandated version of PSCI */
 	u32 psci_version;
+
+	/*
+	 * If we encounter a data abort without valid instruction syndrome
+	 * information, report this to user space.  User space can (and
+	 * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
+	 * supported.
+	 */
+	bool return_nisv_io_abort_to_user;
 };
 
 #define KVM_NR_MEM_OBJS     40
@@ -338,6 +347,13 @@ struct kvm_vcpu_arch {
 	/* True when deferrable sysregs are loaded on the physical CPU,
 	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
 	bool sysregs_loaded_on_cpu;
+
+	/* Guest PV state */
+	struct {
+		u64 steal;
+		u64 last_steal;
+		gpa_t base;
+	} steal;
 };
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -478,6 +494,27 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
+gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
+void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
+
+int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
+			    struct kvm_device_attr *attr);
+int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
+			    struct kvm_device_attr *attr);
+int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
+			    struct kvm_device_attr *attr);
+
+static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
+{
+	vcpu_arch->steal.base = GPA_INVALID;
+}
+
+static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
+{
+	return (vcpu_arch->steal.base != GPA_INVALID);
+}
+
 void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
@@ -510,7 +547,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 	 * wrong, and hyp will crash and burn when it uses any
 	 * cpus_have_const_cap() wrapper.
 	 */
-	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
+	BUG_ON(!system_capabilities_finalized());
 	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
 
 	/*
@@ -534,7 +571,7 @@ static inline bool kvm_arch_requires_vhe(void)
 		return true;
 
 	/* Some implementations have defects that confine them to VHE */
-	if (cpus_have_cap(ARM64_WORKAROUND_1165522))
+	if (cpus_have_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE))
 		return true;
 
 	return false;
@@ -600,8 +637,7 @@ static inline void kvm_arm_vhe_guest_enter(void)
 	 * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
 	 * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
 	 */
-	if (system_uses_irq_prio_masking())
-		dsb(sy);
+	pmr_sync();
 }
 
 static inline void kvm_arm_vhe_guest_exit(void)
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 97f21cc66657..a3a6a2ba9a63 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -91,11 +91,11 @@ static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
 	write_sysreg(kvm_get_vttbr(kvm), vttbr_el2);
 
 	/*
-	 * ARM erratum 1165522 requires the actual execution of the above
-	 * before we can switch to the EL1/EL0 translation regime used by
+	 * ARM errata 1165522 and 1530923 require the actual execution of the
+	 * above before we can switch to the EL1/EL0 translation regime used by
 	 * the guest.
 	 */
-	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522));
+	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE));
 }
 
 #endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index befe37d4bc0e..53d846f1bfe7 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -91,6 +91,7 @@ alternative_cb_end
 
 void kvm_update_va_mask(struct alt_instr *alt,
 			__le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_compute_layout(void);
 
 static inline unsigned long __kern_hyp_va(unsigned long v)
 {
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index 1b266292f0be..ebee3113a62f 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -4,4 +4,20 @@
 #define __ALIGN		.align 2
 #define __ALIGN_STR	".align 2"
 
+/*
+ * Annotate a function as position independent, i.e., safe to be called before
+ * the kernel virtual mapping is activated.
+ */
+#define SYM_FUNC_START_PI(x)			\
+		SYM_FUNC_START_ALIAS(__pi_##x);	\
+		SYM_FUNC_START(x)
+
+#define SYM_FUNC_START_WEAK_PI(x)		\
+		SYM_FUNC_START_ALIAS(__pi_##x);	\
+		SYM_FUNC_START_WEAK(x)
+
+#define SYM_FUNC_END_PI(x)			\
+		SYM_FUNC_END(x);		\
+		SYM_FUNC_END_ALIAS(__pi_##x)
+
 #endif
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 80b388278149..d429f7701c36 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -4,7 +4,9 @@
 
 #include <asm/atomic_ll_sc.h>
 
-#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+
+#define __LSE_PREAMBLE	".arch armv8-a+lse\n"
 
 #include <linux/compiler_types.h>
 #include <linux/export.h>
@@ -14,8 +16,6 @@
 #include <asm/atomic_lse.h>
 #include <asm/cpucaps.h>
 
-__asm__(".arch_extension	lse");
-
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
 extern struct static_key_false arm64_const_caps_ready;
 
@@ -34,9 +34,9 @@ static inline bool system_uses_lse_atomics(void)
 
 /* In-line patching at runtime */
 #define ARM64_LSE_ATOMIC_INSN(llsc, lse)				\
-	ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS)
+	ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS)
 
-#else	/* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+#else	/* CONFIG_ARM64_LSE_ATOMICS */
 
 static inline bool system_uses_lse_atomics(void) { return false; }
 
@@ -44,5 +44,5 @@ static inline bool system_uses_lse_atomics(void) { return false; }
 
 #define ARM64_LSE_ATOMIC_INSN(llsc, lse)	llsc
 
-#endif	/* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+#endif	/* CONFIG_ARM64_LSE_ATOMICS */
 #endif	/* __ASM_LSE_H */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index c23c47360664..a4f9ca5479b0 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -69,12 +69,6 @@
 #define KERNEL_START		_text
 #define KERNEL_END		_end
 
-#ifdef CONFIG_ARM64_VA_BITS_52
-#define MAX_USER_VA_BITS	52
-#else
-#define MAX_USER_VA_BITS	VA_BITS
-#endif
-
 /*
  * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual
  * address space for the shadow region respectively. They can bloat the stack
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index f217e3292919..e4d862420bb4 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -29,52 +29,11 @@ typedef struct {
  */
 #define ASID(mm)	((mm)->context.id.counter & 0xffff)
 
-static inline bool arm64_kernel_unmapped_at_el0(void)
-{
-	return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) &&
-	       cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
-}
+extern bool arm64_use_ng_mappings;
 
-static inline bool arm64_kernel_use_ng_mappings(void)
+static inline bool arm64_kernel_unmapped_at_el0(void)
 {
-	bool tx1_bug;
-
-	/* What's a kpti? Use global mappings if we don't know. */
-	if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0))
-		return false;
-
-	/*
-	 * Note: this function is called before the CPU capabilities have
-	 * been configured, so our early mappings will be global. If we
-	 * later determine that kpti is required, then
-	 * kpti_install_ng_mappings() will make them non-global.
-	 */
-	if (arm64_kernel_unmapped_at_el0())
-		return true;
-
-	if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
-		return false;
-
-	/*
-	 * KASLR is enabled so we're going to be enabling kpti on non-broken
-	 * CPUs regardless of their susceptibility to Meltdown. Rather
-	 * than force everybody to go through the G -> nG dance later on,
-	 * just put down non-global mappings from the beginning.
-	 */
-	if (!IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
-		tx1_bug = false;
-#ifndef MODULE
-	} else if (!static_branch_likely(&arm64_const_caps_ready)) {
-		extern const struct midr_range cavium_erratum_27456_cpus[];
-
-		tx1_bug = is_midr_in_range_list(read_cpuid_id(),
-						cavium_erratum_27456_cpus);
-#endif
-	} else {
-		tx1_bug = __cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456);
-	}
-
-	return !tx1_bug && kaslr_offset() > 0;
+	return arm64_use_ng_mappings;
 }
 
 typedef void (*bp_hardening_cb_t)(void);
@@ -128,6 +87,7 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
 			       pgprot_t prot, bool page_mappings_only);
 extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
 extern void mark_linear_text_alias_ro(void);
+extern bool kaslr_requires_kpti(void);
 
 #define INIT_MM_CONTEXT(name)	\
 	.pgd = init_pg_dir,
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index f80e13cbf8ec..1e93de68c044 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -21,7 +21,7 @@ struct mod_arch_specific {
 	struct mod_plt_sec	init;
 
 	/* for CONFIG_DYNAMIC_FTRACE */
-	struct plt_entry 	*ftrace_trampoline;
+	struct plt_entry	*ftrace_trampolines;
 };
 #endif
 
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h
index 799d9dd6f7cc..cf3a0fd7c1a7 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -21,6 +21,13 @@ static inline u64 paravirt_steal_clock(int cpu)
 {
 	return pv_ops.time.steal_clock(cpu);
 }
-#endif
+
+int __init pv_time_init(void);
+
+#else
+
+#define pv_time_init() do {} while (0)
+
+#endif // CONFIG_PARAVIRT
 
 #endif
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 3df60f97da1f..6bf5e650da78 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -69,7 +69,7 @@
 #define PGDIR_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
 #define PGDIR_SIZE		(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK		(~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD		(1 << (MAX_USER_VA_BITS - PGDIR_SHIFT))
+#define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))
 
 /*
  * Section address mask and size definitions.
@@ -110,6 +110,7 @@
 #define PUD_TABLE_BIT		(_AT(pudval_t, 1) << 1)
 #define PUD_TYPE_MASK		(_AT(pudval_t, 3) << 0)
 #define PUD_TYPE_SECT		(_AT(pudval_t, 1) << 0)
+#define PUD_SECT_RDONLY		(_AT(pudval_t, 1) << 7)		/* AP[2] */
 
 /*
  * Level 2 descriptor (PMD).
@@ -292,6 +293,8 @@
 #define TCR_HD			(UL(1) << 40)
 #define TCR_NFD0		(UL(1) << 53)
 #define TCR_NFD1		(UL(1) << 54)
+#define TCR_E0PD0		(UL(1) << 55)
+#define TCR_E0PD1		(UL(1) << 56)
 
 /*
  * TTBR.
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 8dc6c5cdabe6..6f87839f0249 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -26,8 +26,8 @@
 #define _PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define _PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#define PTE_MAYBE_NG		(arm64_kernel_use_ng_mappings() ? PTE_NG : 0)
-#define PMD_MAYBE_NG		(arm64_kernel_use_ng_mappings() ? PMD_SECT_NG : 0)
+#define PTE_MAYBE_NG		(arm64_kernel_unmapped_at_el0() ? PTE_NG : 0)
+#define PMD_MAYBE_NG		(arm64_kernel_unmapped_at_el0() ? PMD_SECT_NG : 0)
 
 #define PROT_DEFAULT		(_PROT_DEFAULT | PTE_MAYBE_NG)
 #define PROT_SECT_DEFAULT	(_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
@@ -85,13 +85,12 @@
 #define PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
 #define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
 #define PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
-#define PAGE_EXECONLY		__pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
 
 #define __P000  PAGE_NONE
 #define __P001  PAGE_READONLY
 #define __P010  PAGE_READONLY
 #define __P011  PAGE_READONLY
-#define __P100  PAGE_EXECONLY
+#define __P100  PAGE_READONLY_EXEC
 #define __P101  PAGE_READONLY_EXEC
 #define __P110  PAGE_READONLY_EXEC
 #define __P111  PAGE_READONLY_EXEC
@@ -100,7 +99,7 @@
 #define __S001  PAGE_READONLY
 #define __S010  PAGE_SHARED
 #define __S011  PAGE_SHARED
-#define __S100  PAGE_EXECONLY
+#define __S100  PAGE_READONLY_EXEC
 #define __S101  PAGE_READONLY_EXEC
 #define __S110  PAGE_SHARED_EXEC
 #define __S111  PAGE_SHARED_EXEC
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 565aa45ef134..cd5de0e40bfa 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -17,7 +17,7 @@
  * VMALLOC range.
  *
  * VMALLOC_START: beginning of the kernel vmalloc space
- * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space
+ * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
  *	and fixed mappings
  */
 #define VMALLOC_START		(MODULES_END)
@@ -96,12 +96,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
-/*
- * Execute-only user mappings do not have the PTE_USER bit set. All valid
- * kernel mappings have the PTE_UXN bit set.
- */
 #define pte_valid_not_user(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
+	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
 #define pte_valid_young(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
 #define pte_valid_user(pte) \
@@ -117,8 +113,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
 /*
  * p??_access_permitted() is true for valid user mappings (subject to the
- * write permission check) other than user execute-only which do not have the
- * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set.
+ * write permission check). PROT_NONE mappings do not have the PTE_VALID bit
+ * set.
  */
 #define pte_access_permitted(pte, write) \
 	(pte_valid_user(pte) && (!(write) || pte_write(pte)))
@@ -865,6 +861,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 #define phys_to_ttbr(addr)	(addr)
 #endif
 
+/*
+ * On arm64 without hardware Access Flag, copying from user will fail because
+ * the pte is old and cannot be marked young. So we always end up with zeroed
+ * page after fork() + CoW for pfn mappings. We don't always have a
+ * hardware-managed access flag on arm64.
+ */
+static inline bool arch_faults_on_old_pte(void)
+{
+	WARN_ON(preemptible());
+
+	return !cpu_has_hw_af();
+}
+#define arch_faults_on_old_pte arch_faults_on_old_pte
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
index d49951647014..80e946b2abee 100644
--- a/arch/arm64/include/asm/preempt.h
+++ b/arch/arm64/include/asm/preempt.h
@@ -79,11 +79,11 @@ static inline bool should_resched(int preempt_offset)
 	return pc == preempt_offset;
 }
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 void preempt_schedule(void);
 #define __preempt_schedule() preempt_schedule()
 void preempt_schedule_notrace(void);
 #define __preempt_schedule_notrace() preempt_schedule_notrace()
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 #endif /* __ASM_PREEMPT_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5623685c7d13..5ba63204d078 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -9,7 +9,7 @@
 #define __ASM_PROCESSOR_H
 
 #define KERNEL_DS		UL(-1)
-#define USER_DS			((UL(1) << MAX_USER_VA_BITS) - 1)
+#define USER_DS			((UL(1) << VA_BITS) - 1)
 
 /*
  * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
@@ -26,10 +26,12 @@
 #include <linux/init.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <linux/thread_info.h>
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
 #include <asm/hw_breakpoint.h>
+#include <asm/kasan.h>
 #include <asm/lse.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pointer_auth.h>
@@ -214,6 +216,18 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc,
 	regs->sp = sp;
 }
 
+static inline bool is_ttbr0_addr(unsigned long addr)
+{
+	/* entry assembly clears tags for TTBR0 addrs */
+	return addr < TASK_SIZE;
+}
+
+static inline bool is_ttbr1_addr(unsigned long addr)
+{
+	/* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
+	return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
+}
+
 #ifdef CONFIG_COMPAT
 static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
 				       unsigned long sp)
diff --git a/arch/arm64/include/asm/pvclock-abi.h b/arch/arm64/include/asm/pvclock-abi.h
new file mode 100644
index 000000000000..c4f1c0a0789c
--- /dev/null
+++ b/arch/arm64/include/asm/pvclock-abi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 Arm Ltd. */
+
+#ifndef __ASM_PVCLOCK_ABI_H
+#define __ASM_PVCLOCK_ABI_H
+
+/* The below structure is defined in ARM DEN0057A */
+
+struct pvclock_vcpu_stolen_time {
+	__le32 revision;
+	__le32 attributes;
+	__le64 stolen_time;
+	/* Structure must be 64 byte aligned, pad to that size */
+	u8 padding[48];
+} __packed;
+
+#endif
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 788ae971f11c..3994169985ef 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -8,13 +8,13 @@
 #include <asm-generic/sections.h>
 
 extern char __alt_instructions[], __alt_instructions_end[];
-extern char __exception_text_start[], __exception_text_end[];
 extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
 extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 extern char __hyp_text_start[], __hyp_text_end[];
 extern char __idmap_text_start[], __idmap_text_end[];
 extern char __initdata_begin[], __initdata_end[];
 extern char __inittext_begin[], __inittext_end[];
+extern char __exittext_begin[], __exittext_end[];
 extern char __irqentry_text_start[], __irqentry_text_end[];
 extern char __mmuoff_data_start[], __mmuoff_data_end[];
 extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 7434844036d3..89cba2622b79 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -26,6 +26,8 @@ DECLARE_PER_CPU(bool, fpsimd_context_busy);
 static __must_check inline bool may_use_simd(void)
 {
 	/*
+	 * We must make sure that the SVE has been initialized properly
+	 * before using the SIMD in kernel.
 	 * fpsimd_context_busy is only set while preemption is disabled,
 	 * and is clear whenever preemption is enabled. Since
 	 * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
@@ -33,8 +35,10 @@ static __must_check inline bool may_use_simd(void)
 	 * migrated, and if it's clear we cannot be migrated to a CPU
 	 * where it is set.
 	 */
-	return !in_irq() && !irqs_disabled() && !in_nmi() &&
-		!this_cpu_read(fpsimd_context_busy);
+	return !WARN_ON(!system_capabilities_finalized()) &&
+	       system_supports_fpsimd() &&
+	       !in_irq() && !irqs_disabled() && !in_nmi() &&
+	       !this_cpu_read(fpsimd_context_busy);
 }
 
 #else /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index b093b287babf..102404dc1e13 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -11,4 +11,13 @@
 /* See include/linux/spinlock.h */
 #define smp_mb__after_spinlock()	smp_mb()
 
+/*
+ * Changing this will break osq_lock() thanks to the call inside
+ * smp_cond_load_relaxed().
+ *
+ * See:
+ * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net
+ */
+#define vcpu_is_preempted(cpu)	false
+
 #endif /* __ASM_SPINLOCK_H */
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index 06d880b3526c..b383b4802a7b 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -66,24 +66,18 @@ struct pt_regs;
 	}									\
 	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 
-#ifndef SYSCALL_DEFINE0
 #define SYSCALL_DEFINE0(sname)							\
 	SYSCALL_METADATA(_##sname, 0);						\
 	asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused);	\
 	ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO);			\
 	asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused)
-#endif
 
-#ifndef COND_SYSCALL
 #define COND_SYSCALL(name)							\
 	asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs)	\
 	{									\
 		return sys_ni_syscall();					\
 	}
-#endif
 
-#ifndef SYS_NI
 #define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers);
-#endif
 
 #endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6e919fafb43d..b91570ff9db1 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -146,6 +146,7 @@
 #define SYS_ID_ISAR4_EL1		sys_reg(3, 0, 0, 2, 4)
 #define SYS_ID_ISAR5_EL1		sys_reg(3, 0, 0, 2, 5)
 #define SYS_ID_MMFR4_EL1		sys_reg(3, 0, 0, 2, 6)
+#define SYS_ID_ISAR6_EL1		sys_reg(3, 0, 0, 2, 7)
 
 #define SYS_MVFR0_EL1			sys_reg(3, 0, 0, 3, 0)
 #define SYS_MVFR1_EL1			sys_reg(3, 0, 0, 3, 1)
@@ -365,6 +366,9 @@
 #define SYS_CTR_EL0			sys_reg(3, 3, 0, 0, 1)
 #define SYS_DCZID_EL0			sys_reg(3, 3, 0, 0, 7)
 
+#define SYS_RNDR_EL0			sys_reg(3, 3, 2, 4, 0)
+#define SYS_RNDRRS_EL0			sys_reg(3, 3, 2, 4, 1)
+
 #define SYS_PMCR_EL0			sys_reg(3, 3, 9, 12, 0)
 #define SYS_PMCNTENSET_EL0		sys_reg(3, 3, 9, 12, 1)
 #define SYS_PMCNTENCLR_EL0		sys_reg(3, 3, 9, 12, 2)
@@ -538,7 +542,20 @@
 			 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\
 			 ENDIAN_SET_EL1 | SCTLR_EL1_UCI  | SCTLR_EL1_RES1)
 
+/* MAIR_ELx memory attributes (used by Linux) */
+#define MAIR_ATTR_DEVICE_nGnRnE		UL(0x00)
+#define MAIR_ATTR_DEVICE_nGnRE		UL(0x04)
+#define MAIR_ATTR_DEVICE_GRE		UL(0x0c)
+#define MAIR_ATTR_NORMAL_NC		UL(0x44)
+#define MAIR_ATTR_NORMAL_WT		UL(0xbb)
+#define MAIR_ATTR_NORMAL		UL(0xff)
+#define MAIR_ATTR_MASK			UL(0xff)
+
+/* Position the attr at the correct index */
+#define MAIR_ATTRIDX(attr, idx)		((attr) << ((idx) * 8))
+
 /* id_aa64isar0 */
+#define ID_AA64ISAR0_RNDR_SHIFT		60
 #define ID_AA64ISAR0_TS_SHIFT		52
 #define ID_AA64ISAR0_FHM_SHIFT		48
 #define ID_AA64ISAR0_DP_SHIFT		44
@@ -553,6 +570,10 @@
 #define ID_AA64ISAR0_AES_SHIFT		4
 
 /* id_aa64isar1 */
+#define ID_AA64ISAR1_I8MM_SHIFT		52
+#define ID_AA64ISAR1_DGH_SHIFT		48
+#define ID_AA64ISAR1_BF16_SHIFT		44
+#define ID_AA64ISAR1_SPECRES_SHIFT	40
 #define ID_AA64ISAR1_SB_SHIFT		36
 #define ID_AA64ISAR1_FRINTTS_SHIFT	32
 #define ID_AA64ISAR1_GPI_SHIFT		28
@@ -605,12 +626,20 @@
 #define ID_AA64PFR1_SSBS_PSTATE_INSNS	2
 
 /* id_aa64zfr0 */
+#define ID_AA64ZFR0_F64MM_SHIFT		56
+#define ID_AA64ZFR0_F32MM_SHIFT		52
+#define ID_AA64ZFR0_I8MM_SHIFT		44
 #define ID_AA64ZFR0_SM4_SHIFT		40
 #define ID_AA64ZFR0_SHA3_SHIFT		32
+#define ID_AA64ZFR0_BF16_SHIFT		20
 #define ID_AA64ZFR0_BITPERM_SHIFT	16
 #define ID_AA64ZFR0_AES_SHIFT		4
 #define ID_AA64ZFR0_SVEVER_SHIFT	0
 
+#define ID_AA64ZFR0_F64MM		0x1
+#define ID_AA64ZFR0_F32MM		0x1
+#define ID_AA64ZFR0_I8MM		0x1
+#define ID_AA64ZFR0_BF16		0x1
 #define ID_AA64ZFR0_SM4			0x1
 #define ID_AA64ZFR0_SHA3		0x1
 #define ID_AA64ZFR0_BITPERM		0x1
@@ -655,6 +684,7 @@
 #define ID_AA64MMFR1_VMIDBITS_16	2
 
 /* id_aa64mmfr2 */
+#define ID_AA64MMFR2_E0PD_SHIFT		60
 #define ID_AA64MMFR2_FWB_SHIFT		40
 #define ID_AA64MMFR2_AT_SHIFT		32
 #define ID_AA64MMFR2_LVA_SHIFT		16
@@ -679,6 +709,14 @@
 #define ID_ISAR5_AES_SHIFT		4
 #define ID_ISAR5_SEVL_SHIFT		0
 
+#define ID_ISAR6_I8MM_SHIFT		24
+#define ID_ISAR6_BF16_SHIFT		20
+#define ID_ISAR6_SPECRES_SHIFT		16
+#define ID_ISAR6_SB_SHIFT		12
+#define ID_ISAR6_FHM_SHIFT		8
+#define ID_ISAR6_DP_SHIFT		4
+#define ID_ISAR6_JSCVT_SHIFT		0
+
 #define MVFR0_FPROUND_SHIFT		28
 #define MVFR0_FPSHVEC_SHIFT		24
 #define MVFR0_FPSQRT_SHIFT		20
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 59690613ac31..cee5928e1b7d 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -42,16 +42,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
 	       ptr < (unsigned long)&__irqentry_text_end;
 }
 
-static inline int in_exception_text(unsigned long ptr)
-{
-	int in;
-
-	in = ptr >= (unsigned long)&__exception_text_start &&
-	     ptr < (unsigned long)&__exception_text_end;
-
-	return in ? : __in_irqentry_text(ptr);
-}
-
 static inline int in_entry_text(unsigned long ptr)
 {
 	return ptr >= (unsigned long)&__entry_text_start &&
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 127712b0b970..32fc8061aa76 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -62,8 +62,13 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si
 {
 	unsigned long ret, limit = current_thread_info()->addr_limit;
 
+	/*
+	 * Asynchronous I/O running in a kernel thread does not have the
+	 * TIF_TAGGED_ADDR flag of the process owning the mm, so always untag
+	 * the user address before checking.
+	 */
 	if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) &&
-	    test_thread_flag(TIF_TAGGED_ADDR))
+	    (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR)))
 		addr = untagged_addr(addr);
 
 	__chk_user_ptr(addr);
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 2629a68b8724..1dd22da1c3a9 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,11 +38,10 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		436
+#define __NR_compat_syscalls		439
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
-#define __ARCH_WANT_SYS_CLONE3
 
 #ifndef __COMPAT_SYSCALL_NR
 #include <uapi/asm/unistd.h>
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 94ab29cf4f00..c1c61635f89c 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -879,6 +879,10 @@ __SYSCALL(__NR_fspick, sys_fspick)
 __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
 #define __NR_clone3 435
 __SYSCALL(__NR_clone3, sys_clone3)
+#define __NR_openat2 437
+__SYSCALL(__NR_openat2, sys_openat2)
+#define __NR_pidfd_getfd 438
+__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index c50ee1b7d5cd..537b1e695365 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -16,7 +16,7 @@
 
 #define VDSO_HAS_CLOCK_GETRES		1
 
-#define VDSO_HAS_32BIT_FALLBACK		1
+#define BUILD_VDSO32			1
 
 static __always_inline
 int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
new file mode 100644
index 000000000000..2ca708ab9b20
--- /dev/null
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_ARM64_VMALLOC_H
+#define _ASM_ARM64_VMALLOC_H
+
+#endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index a1e72886b30c..7752d93bb50f 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -65,5 +65,13 @@
 #define HWCAP2_SVESM4		(1 << 6)
 #define HWCAP2_FLAGM2		(1 << 7)
 #define HWCAP2_FRINT		(1 << 8)
+#define HWCAP2_SVEI8MM		(1 << 9)
+#define HWCAP2_SVEF32MM		(1 << 10)
+#define HWCAP2_SVEF64MM		(1 << 11)
+#define HWCAP2_SVEBF16		(1 << 12)
+#define HWCAP2_I8MM		(1 << 13)
+#define HWCAP2_BF16		(1 << 14)
+#define HWCAP2_DGH		(1 << 15)
+#define HWCAP2_RNG		(1 << 16)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 67c21f9bdbad..820e5751ada7 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -164,8 +164,9 @@ struct kvm_vcpu_events {
 	struct {
 		__u8 serror_pending;
 		__u8 serror_has_esr;
+		__u8 ext_dabt_pending;
 		/* Align it to 8 bytes */
-		__u8 pad[6];
+		__u8 pad[5];
 		__u64 serror_esr;
 	} exception;
 	__u32 reserved[12];
@@ -323,6 +324,8 @@ struct kvm_vcpu_events {
 #define KVM_ARM_VCPU_TIMER_CTRL		1
 #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0
 #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1
+#define KVM_ARM_VCPU_PVTIME_CTRL	2
+#define   KVM_ARM_VCPU_PVTIME_IPA	0
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_VCPU2_SHIFT		28
diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h
index 4703d218663a..f83a70e07df8 100644
--- a/arch/arm64/include/uapi/asm/unistd.h
+++ b/arch/arm64/include/uapi/asm/unistd.h
@@ -19,5 +19,6 @@
 #define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_SET_GET_RLIMIT
 #define __ARCH_WANT_TIME32_SYSCALLS
+#define __ARCH_WANT_SYS_CLONE3
 
 #include <asm-generic/unistd.h>
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 478491f07b4f..fc6488660f64 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -13,9 +13,9 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
 
 # Object file lists.
 obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
-			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
-			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
-			   hyp-stub.o psci.o cpu_ops.o insn.o	\
+			   entry-common.o entry-fpsimd.o process.o ptrace.o	\
+			   setup.o signal.o sys.o stacktrace.o time.o traps.o	\
+			   io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o	\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 3a58e9db5cfe..a100483b47c4 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -274,7 +274,7 @@ int apei_claim_sea(struct pt_regs *regs)
 	if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES))
 		return err;
 
-	current_flags = arch_local_save_flags();
+	current_flags = local_daif_save_flags();
 
 	/*
 	 * SEA can interrupt SError, mask it and describe this as an NMI so
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index ca158be21f83..7832b3216370 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -618,7 +618,8 @@ static struct insn_emulation_ops setend_ops = {
 };
 
 /*
- * Invoked as late_initcall, since not needed before init spawned.
+ * Invoked as core_initcall, which guarantees that the instruction
+ * emulation is ready for userspace.
  */
 static int __init armv8_deprecated_init(void)
 {
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 214685760e1c..a5bdce8af65b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ int main(void)
   DEFINE(S_X24,			offsetof(struct pt_regs, regs[24]));
   DEFINE(S_X26,			offsetof(struct pt_regs, regs[26]));
   DEFINE(S_X28,			offsetof(struct pt_regs, regs[28]));
+  DEFINE(S_FP,			offsetof(struct pt_regs, regs[29]));
   DEFINE(S_LR,			offsetof(struct pt_regs, regs[30]));
   DEFINE(S_SP,			offsetof(struct pt_regs, sp));
   DEFINE(S_PSTATE,		offsetof(struct pt_regs, pstate));
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 6ea337d464c4..32c7bf858dd9 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -42,11 +42,11 @@ ENTRY(__cpu_soft_restart)
 	mov	x0, #HVC_SOFT_RESTART
 	hvc	#0				// no return
 
-1:	mov	x18, x1				// entry
+1:	mov	x8, x1				// entry
 	mov	x0, x2				// arg0
 	mov	x1, x3				// arg1
 	mov	x2, x4				// arg2
-	br	x18
+	br	x8
 ENDPROC(__cpu_soft_restart)
 
 .popsection
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 93f34b4eca25..703ad0a84f99 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/arm-smccc.h>
-#include <linux/psci.h>
 #include <linux/types.h>
 #include <linux/cpu.h>
 #include <asm/cpu.h>
@@ -88,13 +87,21 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry,
 }
 
 static void
-cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused)
+cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
 {
 	u64 mask = arm64_ftr_reg_ctrel0.strict_mask;
+	bool enable_uct_trap = false;
 
 	/* Trap CTR_EL0 access on this CPU, only if it has a mismatch */
 	if ((read_cpuid_cachetype() & mask) !=
 	    (arm64_ftr_reg_ctrel0.sys_val & mask))
+		enable_uct_trap = true;
+
+	/* ... or if the system is affected by an erratum */
+	if (cap->capability == ARM64_WORKAROUND_1542419)
+		enable_uct_trap = true;
+
+	if (enable_uct_trap)
 		sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
 }
 
@@ -167,9 +174,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
 }
 #endif	/* CONFIG_KVM_INDIRECT_VECTORS */
 
-#include <uapi/linux/psci.h>
 #include <linux/arm-smccc.h>
-#include <linux/psci.h>
 
 static void call_smc_arch_workaround_1(void)
 {
@@ -213,43 +218,31 @@ static int detect_harden_bp_fw(void)
 	struct arm_smccc_res res;
 	u32 midr = read_cpuid_id();
 
-	if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+			     ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+
+	switch ((int)res.a0) {
+	case 1:
+		/* Firmware says we're just fine */
+		return 0;
+	case 0:
+		break;
+	default:
 		return -1;
+	}
 
-	switch (psci_ops.conduit) {
-	case PSCI_CONDUIT_HVC:
-		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-		switch ((int)res.a0) {
-		case 1:
-			/* Firmware says we're just fine */
-			return 0;
-		case 0:
-			cb = call_hvc_arch_workaround_1;
-			/* This is a guest, no need to patch KVM vectors */
-			smccc_start = NULL;
-			smccc_end = NULL;
-			break;
-		default:
-			return -1;
-		}
+	switch (arm_smccc_1_1_get_conduit()) {
+	case SMCCC_CONDUIT_HVC:
+		cb = call_hvc_arch_workaround_1;
+		/* This is a guest, no need to patch KVM vectors */
+		smccc_start = NULL;
+		smccc_end = NULL;
 		break;
 
-	case PSCI_CONDUIT_SMC:
-		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-		switch ((int)res.a0) {
-		case 1:
-			/* Firmware says we're just fine */
-			return 0;
-		case 0:
-			cb = call_smc_arch_workaround_1;
-			smccc_start = __smccc_workaround_1_smc_start;
-			smccc_end = __smccc_workaround_1_smc_end;
-			break;
-		default:
-			return -1;
-		}
+	case SMCCC_CONDUIT_SMC:
+		cb = call_smc_arch_workaround_1;
+		smccc_start = __smccc_workaround_1_smc_start;
+		smccc_end = __smccc_workaround_1_smc_end;
 		break;
 
 	default:
@@ -309,11 +302,11 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt,
 
 	BUG_ON(nr_inst != 1);
 
-	switch (psci_ops.conduit) {
-	case PSCI_CONDUIT_HVC:
+	switch (arm_smccc_1_1_get_conduit()) {
+	case SMCCC_CONDUIT_HVC:
 		insn = aarch64_insn_get_hvc_value();
 		break;
-	case PSCI_CONDUIT_SMC:
+	case SMCCC_CONDUIT_SMC:
 		insn = aarch64_insn_get_smc_value();
 		break;
 	default:
@@ -339,6 +332,8 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
 
 void arm64_set_ssbd_mitigation(bool state)
 {
+	int conduit;
+
 	if (!IS_ENABLED(CONFIG_ARM64_SSBD)) {
 		pr_info_once("SSBD disabled by kernel configuration\n");
 		return;
@@ -352,19 +347,10 @@ void arm64_set_ssbd_mitigation(bool state)
 		return;
 	}
 
-	switch (psci_ops.conduit) {
-	case PSCI_CONDUIT_HVC:
-		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
-		break;
-
-	case PSCI_CONDUIT_SMC:
-		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
-		break;
+	conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state,
+				       NULL);
 
-	default:
-		WARN_ON_ONCE(1);
-		break;
-	}
+	WARN_ON_ONCE(conduit == SMCCC_CONDUIT_NONE);
 }
 
 static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
@@ -374,6 +360,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 	bool required = true;
 	s32 val;
 	bool this_cpu_safe = false;
+	int conduit;
 
 	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
 
@@ -391,25 +378,10 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 		goto out_printmsg;
 	}
 
-	if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
-		ssbd_state = ARM64_SSBD_UNKNOWN;
-		if (!this_cpu_safe)
-			__ssb_safe = false;
-		return false;
-	}
-
-	switch (psci_ops.conduit) {
-	case PSCI_CONDUIT_HVC:
-		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
-		break;
-
-	case PSCI_CONDUIT_SMC:
-		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
-		break;
+	conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+				       ARM_SMCCC_ARCH_WORKAROUND_2, &res);
 
-	default:
+	if (conduit == SMCCC_CONDUIT_NONE) {
 		ssbd_state = ARM64_SSBD_UNKNOWN;
 		if (!this_cpu_safe)
 			__ssb_safe = false;
@@ -575,6 +547,9 @@ static const struct midr_range spectre_v2_safe_list[] = {
 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
 	MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+	MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+	MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+	MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
 	{ /* sentinel */ }
 };
 
@@ -650,9 +625,21 @@ needs_tx2_tvm_workaround(const struct arm64_cpu_capabilities *entry,
 	return false;
 }
 
-#ifdef CONFIG_HARDEN_EL2_VECTORS
+static bool __maybe_unused
+has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
+				int scope)
+{
+	u32 midr = read_cpuid_id();
+	bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT);
+	const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1);
 
-static const struct midr_range arm64_harden_el2_vectors[] = {
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+	return is_midr_in_range(midr, &range) && has_dic;
+}
+
+#if defined(CONFIG_HARDEN_EL2_VECTORS) || defined(CONFIG_ARM64_ERRATUM_1319367)
+
+static const struct midr_range ca57_a72[] = {
 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
 	{},
@@ -772,6 +759,20 @@ static const struct arm64_cpu_capabilities erratum_843419_list[] = {
 };
 #endif
 
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE
+static const struct midr_range erratum_speculative_at_vhe_list[] = {
+#ifdef CONFIG_ARM64_ERRATUM_1165522
+	/* Cortex A76 r0p0 to r2p0 */
+	MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1530923
+	/* Cortex A55 r0p0 to r2p0 */
+	MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 2, 0),
+#endif
+	{},
+};
+#endif
+
 const struct arm64_cpu_capabilities arm64_errata[] = {
 #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
 	{
@@ -881,7 +882,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	{
 		.desc = "EL2 vector hardening",
 		.capability = ARM64_HARDEN_EL2_VECTORS,
-		ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors),
+		ERRATA_MIDR_RANGE_LIST(ca57_a72),
 	},
 #endif
 	{
@@ -898,12 +899,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		ERRATA_MIDR_RANGE_LIST(erratum_1418040_list),
 	},
 #endif
-#ifdef CONFIG_ARM64_ERRATUM_1165522
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE
 	{
-		/* Cortex-A76 r0p0 to r2p0 */
-		.desc = "ARM erratum 1165522",
-		.capability = ARM64_WORKAROUND_1165522,
-		ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+		.desc = "ARM errata 1165522, 1530923",
+		.capability = ARM64_WORKAROUND_SPECULATIVE_AT_VHE,
+		ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_vhe_list),
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_1463225
@@ -927,6 +927,23 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		ERRATA_MIDR_RANGE_LIST(tx2_family_cpus),
 	},
 #endif
+#ifdef CONFIG_ARM64_ERRATUM_1542419
+	{
+		/* we depend on the firmware portion for correctness */
+		.desc = "ARM erratum 1542419 (kernel portion)",
+		.capability = ARM64_WORKAROUND_1542419,
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		.matches = has_neoverse_n1_erratum_1542419,
+		.cpu_enable = cpu_enable_trap_ctr_access,
+	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1319367
+	{
+		.desc = "ARM erratum 1319367",
+		.capability = ARM64_WORKAROUND_SPECULATIVE_AT_NVHE,
+		ERRATA_MIDR_RANGE_LIST(ca57_a72),
+	},
+#endif
 	{
 	}
 };
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 80f459ad0190..0b6715625cf6 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -32,9 +32,7 @@ static unsigned long elf_hwcap __read_mostly;
 #define COMPAT_ELF_HWCAP_DEFAULT	\
 				(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
 				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
-				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
-				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
-				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
+				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\
 				 COMPAT_HWCAP_LPAE)
 unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
 unsigned int compat_elf_hwcap2 __read_mostly;
@@ -47,19 +45,23 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM6
 /* Need also bit for ARM64_CB_PATCH */
 DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
 
+bool arm64_use_ng_mappings = false;
+EXPORT_SYMBOL(arm64_use_ng_mappings);
+
 /*
  * Flag to indicate if we have computed the system wide
  * capabilities based on the boot time active CPUs. This
  * will be used to determine if a new booting CPU should
  * go through the verification process to make sure that it
  * supports the system capabilities, without using a hotplug
- * notifier.
+ * notifier. This is also used to decide if we could use
+ * the fast path for checking constant CPU caps.
  */
-static bool sys_caps_initialised;
-
-static inline void set_sys_caps_initialised(void)
+DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
+EXPORT_SYMBOL(arm64_const_caps_ready);
+static inline void finalize_system_capabilities(void)
 {
-	sys_caps_initialised = true;
+	static_branch_enable(&arm64_const_caps_ready);
 }
 
 static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p)
@@ -119,6 +121,7 @@ static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap);
  * sync with the documentation of the CPU feature register ABI.
  */
 static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RNDR_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
@@ -135,6 +138,10 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_I8MM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DGH_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_BF16_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SPECRES_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
@@ -177,10 +184,18 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
 
 static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F64MM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F32MM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_I8MM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BF16_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
@@ -225,6 +240,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
@@ -313,6 +329,17 @@ static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
 	ARM64_FTR_END,
 };
 
+static const struct arm64_ftr_bits ftr_id_isar6[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_I8MM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_BF16_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SPECRES_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SB_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_FHM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_DP_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_JSCVT_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
 static const struct arm64_ftr_bits ftr_id_pfr0[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),		/* State3 */
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),		/* State2 */
@@ -396,6 +423,7 @@ static const struct __ftr_reg_entry {
 	ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits),
 	ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5),
 	ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4),
+	ARM64_FTR_REG(SYS_ID_ISAR6_EL1, ftr_id_isar6),
 
 	/* Op1 = 0, CRn = 0, CRm = 3 */
 	ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits),
@@ -600,6 +628,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 		init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
 		init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
 		init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+		init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6);
 		init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
 		init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
 		init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
@@ -753,6 +782,8 @@ void update_cpu_features(int cpu,
 					info->reg_id_isar4, boot->reg_id_isar4);
 		taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
 					info->reg_id_isar5, boot->reg_id_isar5);
+		taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu,
+					info->reg_id_isar6, boot->reg_id_isar6);
 
 		/*
 		 * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
@@ -785,7 +816,7 @@ void update_cpu_features(int cpu,
 
 		/* Probe vector lengths, unless we already gave up on SVE */
 		if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
-		    !sys_caps_initialised)
+		    !system_capabilities_finalized())
 			sve_update_vq_map();
 	}
 
@@ -831,6 +862,7 @@ static u64 __read_sysreg_by_encoding(u32 sys_id)
 	read_sysreg_case(SYS_ID_ISAR3_EL1);
 	read_sysreg_case(SYS_ID_ISAR4_EL1);
 	read_sysreg_case(SYS_ID_ISAR5_EL1);
+	read_sysreg_case(SYS_ID_ISAR6_EL1);
 	read_sysreg_case(SYS_MVFR0_EL1);
 	read_sysreg_case(SYS_MVFR1_EL1);
 	read_sysreg_case(SYS_MVFR2_EL1);
@@ -965,6 +997,46 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
 	return has_cpuid_feature(entry, scope);
 }
 
+/*
+ * This check is triggered during the early boot before the cpufeature
+ * is initialised. Checking the status on the local CPU allows the boot
+ * CPU to detect the need for non-global mappings and thus avoiding a
+ * pagetable re-write after all the CPUs are booted. This check will be
+ * anyway run on individual CPUs, allowing us to get the consistent
+ * state once the SMP CPUs are up and thus make the switch to non-global
+ * mappings if required.
+ */
+bool kaslr_requires_kpti(void)
+{
+	if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+		return false;
+
+	/*
+	 * E0PD does a similar job to KPTI so can be used instead
+	 * where available.
+	 */
+	if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
+		u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+		if (cpuid_feature_extract_unsigned_field(mmfr2,
+						ID_AA64MMFR2_E0PD_SHIFT))
+			return false;
+	}
+
+	/*
+	 * Systems affected by Cavium erratum 24756 are incompatible
+	 * with KPTI.
+	 */
+	if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
+		extern const struct midr_range cavium_erratum_27456_cpus[];
+
+		if (is_midr_in_range_list(read_cpuid_id(),
+					  cavium_erratum_27456_cpus))
+			return false;
+	}
+
+	return kaslr_offset() > 0;
+}
+
 static bool __meltdown_safe = true;
 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
 
@@ -975,6 +1047,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 	static const struct midr_range kpti_safe_list[] = {
 		MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
 		MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+		MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
 		MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
 		MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
 		MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
@@ -982,6 +1055,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 		MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
 		MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
 		MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+		MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
 		{ /* sentinel */ }
 	};
 	char const *str = "kpti command line option";
@@ -1007,7 +1081,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 	}
 
 	/* Useful for KASLR robustness */
-	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) {
+	if (kaslr_requires_kpti()) {
 		if (!__kpti_forced) {
 			str = "KASLR";
 			__kpti_forced = 1;
@@ -1042,7 +1116,6 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 	extern kpti_remap_fn idmap_kpti_install_ng_mappings;
 	kpti_remap_fn *remap_fn;
 
-	static bool kpti_applied = false;
 	int cpu = smp_processor_id();
 
 	/*
@@ -1050,7 +1123,7 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 	 * it already or we have KASLR enabled and therefore have not
 	 * created any global mappings at all.
 	 */
-	if (kpti_applied || kaslr_offset() > 0)
+	if (arm64_use_ng_mappings)
 		return;
 
 	remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
@@ -1060,7 +1133,7 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 	cpu_uninstall_idmap();
 
 	if (!cpu)
-		kpti_applied = true;
+		arm64_use_ng_mappings = true;
 
 	return;
 }
@@ -1250,6 +1323,14 @@ static void cpu_enable_address_auth(struct arm64_cpu_capabilities const *cap)
 }
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
+#ifdef CONFIG_ARM64_E0PD
+static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap)
+{
+	if (this_cpu_has_cap(ARM64_HAS_E0PD))
+		sysreg_clear_set(tcr_el1, 0, TCR_E0PD1);
+}
+#endif /* CONFIG_ARM64_E0PD */
+
 #ifdef CONFIG_ARM64_PSEUDO_NMI
 static bool enable_pseudo_nmi;
 
@@ -1290,7 +1371,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.cpu_enable = cpu_enable_pan,
 	},
 #endif /* CONFIG_ARM64_PAN */
-#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
+#ifdef CONFIG_ARM64_LSE_ATOMICS
 	{
 		.desc = "LSE atomic instructions",
 		.capability = ARM64_HAS_LSE_ATOMICS,
@@ -1301,7 +1382,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 2,
 	},
-#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+#endif /* CONFIG_ARM64_LSE_ATOMICS */
 	{
 		.desc = "Software prefetching using PRFM",
 		.capability = ARM64_HAS_NO_HW_PREFETCH,
@@ -1367,7 +1448,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		/* FP/SIMD is not implemented */
 		.capability = ARM64_HAS_NO_FPSIMD,
-		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE,
 		.min_field_value = 0,
 		.matches = has_no_fpsimd,
 	},
@@ -1566,6 +1647,31 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = 1,
 	},
 #endif
+#ifdef CONFIG_ARM64_E0PD
+	{
+		.desc = "E0PD",
+		.capability = ARM64_HAS_E0PD,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.sys_reg = SYS_ID_AA64MMFR2_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64MMFR2_E0PD_SHIFT,
+		.matches = has_cpuid_feature,
+		.min_field_value = 1,
+		.cpu_enable = cpu_enable_e0pd,
+	},
+#endif
+#ifdef CONFIG_ARCH_RANDOM
+	{
+		.desc = "Random Number Generator",
+		.capability = ARM64_HAS_RNG,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64ISAR0_EL1,
+		.field_pos = ID_AA64ISAR0_RNDR_SHIFT,
+		.sign = FTR_UNSIGNED,
+		.min_field_value = 1,
+	},
+#endif
 	{},
 };
 
@@ -1595,6 +1701,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.match_list = list,						\
 	}
 
+#define HWCAP_CAP_MATCH(match, cap_type, cap)					\
+	{									\
+		__HWCAP_CAP(#cap, cap_type, cap)				\
+		.matches = match,						\
+	}
+
 #ifdef CONFIG_ARM64_PTR_AUTH
 static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
 	{
@@ -1637,6 +1749,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
@@ -1650,6 +1763,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
 	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
 #ifdef CONFIG_ARM64_SVE
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
@@ -1657,8 +1773,12 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
 #endif
 	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
 #ifdef CONFIG_ARM64_PTR_AUTH
@@ -1668,8 +1788,35 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	{},
 };
 
+#ifdef CONFIG_COMPAT
+static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope)
+{
+	/*
+	 * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available,
+	 * in line with that of arm32 as in vfp_init(). We make sure that the
+	 * check is future proof, by making sure value is non-zero.
+	 */
+	u32 mvfr1;
+
+	WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible());
+	if (scope == SCOPE_SYSTEM)
+		mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1);
+	else
+		mvfr1 = read_sysreg_s(SYS_MVFR1_EL1);
+
+	return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) &&
+		cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) &&
+		cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT);
+}
+#endif
+
 static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
 #ifdef CONFIG_COMPAT
+	HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON),
+	HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
+	/* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */
+	HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
+	HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
 	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
 	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
 	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
@@ -1973,7 +2120,7 @@ void check_local_cpu_capabilities(void)
 	 * Otherwise, this CPU should verify that it has all the system
 	 * advertised capabilities.
 	 */
-	if (!sys_caps_initialised)
+	if (!system_capabilities_finalized())
 		update_cpu_capabilities(SCOPE_LOCAL_CPU);
 	else
 		verify_local_cpu_capabilities();
@@ -1987,14 +2134,6 @@ static void __init setup_boot_cpu_capabilities(void)
 	enable_cpu_capabilities(SCOPE_BOOT_CPU);
 }
 
-DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
-EXPORT_SYMBOL(arm64_const_caps_ready);
-
-static void __init mark_const_caps_ready(void)
-{
-	static_branch_enable(&arm64_const_caps_ready);
-}
-
 bool this_cpu_has_cap(unsigned int n)
 {
 	if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) {
@@ -2053,7 +2192,6 @@ void __init setup_cpu_features(void)
 	u32 cwg;
 
 	setup_system_capabilities();
-	mark_const_caps_ready();
 	setup_elf_hwcaps(arm64_elf_hwcaps);
 
 	if (system_supports_32bit_el0())
@@ -2066,7 +2204,7 @@ void __init setup_cpu_features(void)
 	minsigstksz_setup();
 
 	/* Advertise that we have computed the system capabilities */
-	set_sys_caps_initialised();
+	finalize_system_capabilities();
 
 	/*
 	 * Check for sane CTR_EL0.CWG value.
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 05933c065732..86136075ae41 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -84,6 +84,14 @@ static const char *const hwcap_str[] = {
 	"svesm4",
 	"flagm2",
 	"frint",
+	"svei8mm",
+	"svef32mm",
+	"svef64mm",
+	"svebf16",
+	"i8mm",
+	"bf16",
+	"dgh",
+	"rng",
 	NULL
 };
 
@@ -329,7 +337,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	info->reg_cntfrq = arch_timer_get_cntfrq();
 	/*
 	 * Use the effective value of the CTR_EL0 than the raw value
-	 * exposed by the CPU. CTR_E0.IDC field value must be interpreted
+	 * exposed by the CPU. CTR_EL0.IDC field value must be interpreted
 	 * with the CLIDR_EL1 fields to avoid triggering false warnings
 	 * when there is a mismatch across the CPUs. Keep track of the
 	 * effective value of the CTR_EL0 in our internal records for
@@ -360,6 +368,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 		info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
 		info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
 		info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
+		info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1);
 		info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
 		info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
 		info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
new file mode 100644
index 000000000000..fde59981445c
--- /dev/null
+++ b/arch/arm64/kernel/entry-common.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Exception handling code
+ *
+ * Copyright (C) 2019 ARM Ltd.
+ */
+
+#include <linux/context_tracking.h>
+#include <linux/ptrace.h>
+#include <linux/thread_info.h>
+
+#include <asm/cpufeature.h>
+#include <asm/daifflags.h>
+#include <asm/esr.h>
+#include <asm/exception.h>
+#include <asm/kprobes.h>
+#include <asm/mmu.h>
+#include <asm/sysreg.h>
+
+static void notrace el1_abort(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	local_daif_inherit(regs);
+	far = untagged_addr(far);
+	do_mem_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_abort);
+
+static void notrace el1_pc(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	local_daif_inherit(regs);
+	do_sp_pc_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_pc);
+
+static void notrace el1_undef(struct pt_regs *regs)
+{
+	local_daif_inherit(regs);
+	do_undefinstr(regs);
+}
+NOKPROBE_SYMBOL(el1_undef);
+
+static void notrace el1_inv(struct pt_regs *regs, unsigned long esr)
+{
+	local_daif_inherit(regs);
+	bad_mode(regs, 0, esr);
+}
+NOKPROBE_SYMBOL(el1_inv);
+
+static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	/*
+	 * The CPU masked interrupts, and we are leaving them masked during
+	 * do_debug_exception(). Update PMR as if we had called
+	 * local_mask_daif().
+	 */
+	if (system_uses_irq_prio_masking())
+		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+	do_debug_exception(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_dbg);
+
+asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
+{
+	unsigned long esr = read_sysreg(esr_el1);
+
+	switch (ESR_ELx_EC(esr)) {
+	case ESR_ELx_EC_DABT_CUR:
+	case ESR_ELx_EC_IABT_CUR:
+		el1_abort(regs, esr);
+		break;
+	/*
+	 * We don't handle ESR_ELx_EC_SP_ALIGN, since we will have hit a
+	 * recursive exception when trying to push the initial pt_regs.
+	 */
+	case ESR_ELx_EC_PC_ALIGN:
+		el1_pc(regs, esr);
+		break;
+	case ESR_ELx_EC_SYS64:
+	case ESR_ELx_EC_UNKNOWN:
+		el1_undef(regs);
+		break;
+	case ESR_ELx_EC_BREAKPT_CUR:
+	case ESR_ELx_EC_SOFTSTP_CUR:
+	case ESR_ELx_EC_WATCHPT_CUR:
+	case ESR_ELx_EC_BRK64:
+		el1_dbg(regs, esr);
+		break;
+	default:
+		el1_inv(regs, esr);
+	};
+}
+NOKPROBE_SYMBOL(el1_sync_handler);
+
+static void notrace el0_da(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	far = untagged_addr(far);
+	do_mem_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_da);
+
+static void notrace el0_ia(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	/*
+	 * We've taken an instruction abort from userspace and not yet
+	 * re-enabled IRQs. If the address is a kernel address, apply
+	 * BP hardening prior to enabling IRQs and pre-emption.
+	 */
+	if (!is_ttbr0_addr(far))
+		arm64_apply_bp_hardening();
+
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_mem_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_ia);
+
+static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_fpsimd_acc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_fpsimd_acc);
+
+static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_sve_acc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sve_acc);
+
+static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_fpsimd_exc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_fpsimd_exc);
+
+static void notrace el0_sys(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_sysinstr(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sys);
+
+static void notrace el0_pc(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	if (!is_ttbr0_addr(instruction_pointer(regs)))
+		arm64_apply_bp_hardening();
+
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_sp_pc_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_pc);
+
+static void notrace el0_sp(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX_NOIRQ);
+	do_sp_pc_abort(regs->sp, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sp);
+
+static void notrace el0_undef(struct pt_regs *regs)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_undefinstr(regs);
+}
+NOKPROBE_SYMBOL(el0_undef);
+
+static void notrace el0_inv(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	bad_el0_sync(regs, 0, esr);
+}
+NOKPROBE_SYMBOL(el0_inv);
+
+static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr)
+{
+	/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
+	unsigned long far = read_sysreg(far_el1);
+
+	if (system_uses_irq_prio_masking())
+		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+	user_exit_irqoff();
+	do_debug_exception(far, esr, regs);
+	local_daif_restore(DAIF_PROCCTX_NOIRQ);
+}
+NOKPROBE_SYMBOL(el0_dbg);
+
+static void notrace el0_svc(struct pt_regs *regs)
+{
+	if (system_uses_irq_prio_masking())
+		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+	do_el0_svc(regs);
+}
+NOKPROBE_SYMBOL(el0_svc);
+
+asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
+{
+	unsigned long esr = read_sysreg(esr_el1);
+
+	switch (ESR_ELx_EC(esr)) {
+	case ESR_ELx_EC_SVC64:
+		el0_svc(regs);
+		break;
+	case ESR_ELx_EC_DABT_LOW:
+		el0_da(regs, esr);
+		break;
+	case ESR_ELx_EC_IABT_LOW:
+		el0_ia(regs, esr);
+		break;
+	case ESR_ELx_EC_FP_ASIMD:
+		el0_fpsimd_acc(regs, esr);
+		break;
+	case ESR_ELx_EC_SVE:
+		el0_sve_acc(regs, esr);
+		break;
+	case ESR_ELx_EC_FP_EXC64:
+		el0_fpsimd_exc(regs, esr);
+		break;
+	case ESR_ELx_EC_SYS64:
+	case ESR_ELx_EC_WFx:
+		el0_sys(regs, esr);
+		break;
+	case ESR_ELx_EC_SP_ALIGN:
+		el0_sp(regs, esr);
+		break;
+	case ESR_ELx_EC_PC_ALIGN:
+		el0_pc(regs, esr);
+		break;
+	case ESR_ELx_EC_UNKNOWN:
+		el0_undef(regs);
+		break;
+	case ESR_ELx_EC_BREAKPT_LOW:
+	case ESR_ELx_EC_SOFTSTP_LOW:
+	case ESR_ELx_EC_WATCHPT_LOW:
+	case ESR_ELx_EC_BRK64:
+		el0_dbg(regs, esr);
+		break;
+	default:
+		el0_inv(regs, esr);
+	}
+}
+NOKPROBE_SYMBOL(el0_sync_handler);
+
+#ifdef CONFIG_COMPAT
+static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_cp15instr(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_cp15);
+
+static void notrace el0_svc_compat(struct pt_regs *regs)
+{
+	if (system_uses_irq_prio_masking())
+		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+	do_el0_svc_compat(regs);
+}
+NOKPROBE_SYMBOL(el0_svc_compat);
+
+asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
+{
+	unsigned long esr = read_sysreg(esr_el1);
+
+	switch (ESR_ELx_EC(esr)) {
+	case ESR_ELx_EC_SVC32:
+		el0_svc_compat(regs);
+		break;
+	case ESR_ELx_EC_DABT_LOW:
+		el0_da(regs, esr);
+		break;
+	case ESR_ELx_EC_IABT_LOW:
+		el0_ia(regs, esr);
+		break;
+	case ESR_ELx_EC_FP_ASIMD:
+		el0_fpsimd_acc(regs, esr);
+		break;
+	case ESR_ELx_EC_FP_EXC32:
+		el0_fpsimd_exc(regs, esr);
+		break;
+	case ESR_ELx_EC_PC_ALIGN:
+		el0_pc(regs, esr);
+		break;
+	case ESR_ELx_EC_UNKNOWN:
+	case ESR_ELx_EC_CP14_MR:
+	case ESR_ELx_EC_CP14_LS:
+	case ESR_ELx_EC_CP14_64:
+		el0_undef(regs);
+		break;
+	case ESR_ELx_EC_CP15_32:
+	case ESR_ELx_EC_CP15_64:
+		el0_cp15(regs, esr);
+		break;
+	case ESR_ELx_EC_BREAKPT_LOW:
+	case ESR_ELx_EC_SOFTSTP_LOW:
+	case ESR_ELx_EC_WATCHPT_LOW:
+	case ESR_ELx_EC_BKPT32:
+		el0_dbg(regs, esr);
+		break;
+	default:
+		el0_inv(regs, esr);
+	}
+}
+NOKPROBE_SYMBOL(el0_sync_compat_handler);
+#endif /* CONFIG_COMPAT */
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 33d003d80121..7d02f9966d34 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -7,10 +7,136 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/asm-offsets.h>
 #include <asm/assembler.h>
 #include <asm/ftrace.h>
 #include <asm/insn.h>
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+/*
+ * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before
+ * the regular function prologue. For an enabled callsite, ftrace_init_nop() and
+ * ftrace_make_call() have patched those NOPs to:
+ *
+ * 	MOV	X9, LR
+ * 	BL	<entry>
+ *
+ * ... where <entry> is either ftrace_caller or ftrace_regs_caller.
+ *
+ * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are
+ * live, and x9-x18 are safe to clobber.
+ *
+ * We save the callsite's context into a pt_regs before invoking any ftrace
+ * callbacks. So that we can get a sensible backtrace, we create a stack record
+ * for the callsite and the ftrace entry assembly. This is not sufficient for
+ * reliable stacktrace: until we create the callsite stack record, its caller
+ * is missing from the LR and existing chain of frame records.
+ */
+	.macro  ftrace_regs_entry, allregs=0
+	/* Make room for pt_regs, plus a callee frame */
+	sub	sp, sp, #(S_FRAME_SIZE + 16)
+
+	/* Save function arguments (and x9 for simplicity) */
+	stp	x0, x1, [sp, #S_X0]
+	stp	x2, x3, [sp, #S_X2]
+	stp	x4, x5, [sp, #S_X4]
+	stp	x6, x7, [sp, #S_X6]
+	stp	x8, x9, [sp, #S_X8]
+
+	/* Optionally save the callee-saved registers, always save the FP */
+	.if \allregs == 1
+	stp	x10, x11, [sp, #S_X10]
+	stp	x12, x13, [sp, #S_X12]
+	stp	x14, x15, [sp, #S_X14]
+	stp	x16, x17, [sp, #S_X16]
+	stp	x18, x19, [sp, #S_X18]
+	stp	x20, x21, [sp, #S_X20]
+	stp	x22, x23, [sp, #S_X22]
+	stp	x24, x25, [sp, #S_X24]
+	stp	x26, x27, [sp, #S_X26]
+	stp	x28, x29, [sp, #S_X28]
+	.else
+	str	x29, [sp, #S_FP]
+	.endif
+
+	/* Save the callsite's SP and LR */
+	add	x10, sp, #(S_FRAME_SIZE + 16)
+	stp	x9, x10, [sp, #S_LR]
+
+	/* Save the PC after the ftrace callsite */
+	str	x30, [sp, #S_PC]
+
+	/* Create a frame record for the callsite above pt_regs */
+	stp	x29, x9, [sp, #S_FRAME_SIZE]
+	add	x29, sp, #S_FRAME_SIZE
+
+	/* Create our frame record within pt_regs. */
+	stp	x29, x30, [sp, #S_STACKFRAME]
+	add	x29, sp, #S_STACKFRAME
+	.endm
+
+ENTRY(ftrace_regs_caller)
+	ftrace_regs_entry	1
+	b	ftrace_common
+ENDPROC(ftrace_regs_caller)
+
+ENTRY(ftrace_caller)
+	ftrace_regs_entry	0
+	b	ftrace_common
+ENDPROC(ftrace_caller)
+
+ENTRY(ftrace_common)
+	sub	x0, x30, #AARCH64_INSN_SIZE	// ip (callsite's BL insn)
+	mov	x1, x9				// parent_ip (callsite's LR)
+	ldr_l	x2, function_trace_op		// op
+	mov	x3, sp				// regs
+
+GLOBAL(ftrace_call)
+	bl	ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+GLOBAL(ftrace_graph_call)		// ftrace_graph_caller();
+	nop				// If enabled, this will be replaced
+					// "b ftrace_graph_caller"
+#endif
+
+/*
+ * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
+ * x19-x29 per the AAPCS, and we created frame records upon entry, so we need
+ * to restore x0-x8, x29, and x30.
+ */
+ftrace_common_return:
+	/* Restore function arguments */
+	ldp	x0, x1, [sp]
+	ldp	x2, x3, [sp, #S_X2]
+	ldp	x4, x5, [sp, #S_X4]
+	ldp	x6, x7, [sp, #S_X6]
+	ldr	x8, [sp, #S_X8]
+
+	/* Restore the callsite's FP, LR, PC */
+	ldr	x29, [sp, #S_FP]
+	ldr	x30, [sp, #S_LR]
+	ldr	x9, [sp, #S_PC]
+
+	/* Restore the callsite's SP */
+	add	sp, sp, #S_FRAME_SIZE + 16
+
+	ret	x9
+ENDPROC(ftrace_common)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+	ldr	x0, [sp, #S_PC]
+	sub	x0, x0, #AARCH64_INSN_SIZE	// ip (callsite's BL insn)
+	add	x1, sp, #S_LR			// parent_ip (callsite's LR)
+	ldr	x2, [sp, #S_FRAME_SIZE]	   	// parent fp (callsite's FP)
+	bl	prepare_ftrace_return
+	b	ftrace_common_return
+ENDPROC(ftrace_graph_caller)
+#endif
+
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
 /*
  * Gcc with -pg will put the following code in the beginning of each function:
  *      mov x0, x30
@@ -162,10 +288,6 @@ GLOBAL(ftrace_graph_call)		// ftrace_graph_caller();
 ENDPROC(ftrace_caller)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
-ENTRY(ftrace_stub)
-	ret
-ENDPROC(ftrace_stub)
-
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 /*
  * void ftrace_graph_caller(void)
@@ -184,7 +306,14 @@ ENTRY(ftrace_graph_caller)
 
 	mcount_exit
 ENDPROC(ftrace_graph_caller)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+ENTRY(ftrace_stub)
+	ret
+ENDPROC(ftrace_stub)
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 /*
  * void return_to_handler(void)
  *
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf3bd2976e57..9461d812ae27 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -60,23 +60,24 @@
 	.macro kernel_ventry, el, label, regsize = 64
 	.align 7
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-alternative_if ARM64_UNMAP_KERNEL_AT_EL0
 	.if	\el == 0
+alternative_if ARM64_UNMAP_KERNEL_AT_EL0
 	.if	\regsize == 64
 	mrs	x30, tpidrro_el0
 	msr	tpidrro_el0, xzr
 	.else
 	mov	x30, xzr
 	.endif
-	.endif
 alternative_else_nop_endif
+	.endif
 #endif
 
 	sub	sp, sp, #S_FRAME_SIZE
 #ifdef CONFIG_VMAP_STACK
 	/*
 	 * Test whether the SP has overflowed, without corrupting a GPR.
-	 * Task and IRQ stacks are aligned to (1 << THREAD_SHIFT).
+	 * Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT)
+	 * should always be zero.
 	 */
 	add	sp, sp, x0			// sp' = sp + x0
 	sub	x0, sp, x0			// x0' = sp' - x0 = (sp + x0) - x0 = sp
@@ -166,9 +167,13 @@ alternative_cb_end
 	.if	\el == 0
 	clear_gp_regs
 	mrs	x21, sp_el0
-	ldr_this_cpu	tsk, __entry_task, x20	// Ensure MDSCR_EL1.SS is clear,
-	ldr	x19, [tsk, #TSK_TI_FLAGS]	// since we can unmask debug
-	disable_step_tsk x19, x20		// exceptions when scheduling.
+	ldr_this_cpu	tsk, __entry_task, x20
+	msr	sp_el0, tsk
+
+	// Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions
+	// when scheduling.
+	ldr	x19, [tsk, #TSK_TI_FLAGS]
+	disable_step_tsk x19, x20
 
 	apply_ssbd 1, x22, x23
 
@@ -231,13 +236,6 @@ alternative_else_nop_endif
 	str	w21, [sp, #S_SYSCALLNO]
 	.endif
 
-	/*
-	 * Set sp_el0 to current thread_info.
-	 */
-	.if	\el == 0
-	msr	sp_el0, tsk
-	.endif
-
 	/* Save pmr */
 alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	mrs_s	x20, SYS_ICC_PMR_EL1
@@ -269,8 +267,10 @@ alternative_else_nop_endif
 alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	ldr	x20, [sp, #S_PMR_SAVE]
 	msr_s	SYS_ICC_PMR_EL1, x20
-	/* Ensure priority change is seen by redistributor */
-	dsb	sy
+	mrs_s	x21, SYS_ICC_CTLR_EL1
+	tbz	x21, #6, .L__skip_pmr_sync\@	// Check for ICC_CTLR_EL1.PMHE
+	dsb	sy				// Ensure priority change is seen by redistributor
+.L__skip_pmr_sync\@:
 alternative_else_nop_endif
 
 	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
@@ -578,76 +578,9 @@ ENDPROC(el1_error_invalid)
 	.align	6
 el1_sync:
 	kernel_entry 1
-	mrs	x1, esr_el1			// read the syndrome register
-	lsr	x24, x1, #ESR_ELx_EC_SHIFT	// exception class
-	cmp	x24, #ESR_ELx_EC_DABT_CUR	// data abort in EL1
-	b.eq	el1_da
-	cmp	x24, #ESR_ELx_EC_IABT_CUR	// instruction abort in EL1
-	b.eq	el1_ia
-	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
-	b.eq	el1_undef
-	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el1_pc
-	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL1
-	b.eq	el1_undef
-	cmp	x24, #ESR_ELx_EC_BREAKPT_CUR	// debug exception in EL1
-	b.ge	el1_dbg
-	b	el1_inv
-
-el1_ia:
-	/*
-	 * Fall through to the Data abort case
-	 */
-el1_da:
-	/*
-	 * Data abort handling
-	 */
-	mrs	x3, far_el1
-	inherit_daif	pstate=x23, tmp=x2
-	untagged_addr	x0, x3
-	mov	x2, sp				// struct pt_regs
-	bl	do_mem_abort
-
-	kernel_exit 1
-el1_pc:
-	/*
-	 * PC alignment exception handling. We don't handle SP alignment faults,
-	 * since we will have hit a recursive exception when trying to push the
-	 * initial pt_regs.
-	 */
-	mrs	x0, far_el1
-	inherit_daif	pstate=x23, tmp=x2
-	mov	x2, sp
-	bl	do_sp_pc_abort
-	ASM_BUG()
-el1_undef:
-	/*
-	 * Undefined instruction
-	 */
-	inherit_daif	pstate=x23, tmp=x2
 	mov	x0, sp
-	bl	do_undefinstr
-	kernel_exit 1
-el1_dbg:
-	/*
-	 * Debug exception handling
-	 */
-	cmp	x24, #ESR_ELx_EC_BRK64		// if BRK64
-	cinc	x24, x24, eq			// set bit '0'
-	tbz	x24, #0, el1_inv		// EL1 only
-	gic_prio_kentry_setup tmp=x3
-	mrs	x0, far_el1
-	mov	x2, sp				// struct pt_regs
-	bl	do_debug_exception
+	bl	el1_sync_handler
 	kernel_exit 1
-el1_inv:
-	// TODO: add support for undefined instructions in kernel mode
-	inherit_daif	pstate=x23, tmp=x2
-	mov	x0, sp
-	mov	x2, x1
-	mov	x1, #BAD_SYNC
-	bl	bad_mode
-	ASM_BUG()
 ENDPROC(el1_sync)
 
 	.align	6
@@ -669,7 +602,7 @@ el1_irq:
 
 	irq_handler
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	ldr	x24, [tsk, #TSK_TI_PREEMPT]	// get preempt count
 alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	/*
@@ -714,213 +647,31 @@ ENDPROC(el1_irq)
 	.align	6
 el0_sync:
 	kernel_entry 0
-	mrs	x25, esr_el1			// read the syndrome register
-	lsr	x24, x25, #ESR_ELx_EC_SHIFT	// exception class
-	cmp	x24, #ESR_ELx_EC_SVC64		// SVC in 64-bit state
-	b.eq	el0_svc
-	cmp	x24, #ESR_ELx_EC_DABT_LOW	// data abort in EL0
-	b.eq	el0_da
-	cmp	x24, #ESR_ELx_EC_IABT_LOW	// instruction abort in EL0
-	b.eq	el0_ia
-	cmp	x24, #ESR_ELx_EC_FP_ASIMD	// FP/ASIMD access
-	b.eq	el0_fpsimd_acc
-	cmp	x24, #ESR_ELx_EC_SVE		// SVE access
-	b.eq	el0_sve_acc
-	cmp	x24, #ESR_ELx_EC_FP_EXC64	// FP/ASIMD exception
-	b.eq	el0_fpsimd_exc
-	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
-	ccmp	x24, #ESR_ELx_EC_WFx, #4, ne
-	b.eq	el0_sys
-	cmp	x24, #ESR_ELx_EC_SP_ALIGN	// stack alignment exception
-	b.eq	el0_sp
-	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el0_pc
-	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_BREAKPT_LOW	// debug exception in EL0
-	b.ge	el0_dbg
-	b	el0_inv
+	mov	x0, sp
+	bl	el0_sync_handler
+	b	ret_to_user
+ENDPROC(el0_sync)
 
 #ifdef CONFIG_COMPAT
 	.align	6
 el0_sync_compat:
 	kernel_entry 0, 32
-	mrs	x25, esr_el1			// read the syndrome register
-	lsr	x24, x25, #ESR_ELx_EC_SHIFT	// exception class
-	cmp	x24, #ESR_ELx_EC_SVC32		// SVC in 32-bit state
-	b.eq	el0_svc_compat
-	cmp	x24, #ESR_ELx_EC_DABT_LOW	// data abort in EL0
-	b.eq	el0_da
-	cmp	x24, #ESR_ELx_EC_IABT_LOW	// instruction abort in EL0
-	b.eq	el0_ia
-	cmp	x24, #ESR_ELx_EC_FP_ASIMD	// FP/ASIMD access
-	b.eq	el0_fpsimd_acc
-	cmp	x24, #ESR_ELx_EC_FP_EXC32	// FP/ASIMD exception
-	b.eq	el0_fpsimd_exc
-	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el0_pc
-	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_CP15_32	// CP15 MRC/MCR trap
-	b.eq	el0_cp15
-	cmp	x24, #ESR_ELx_EC_CP15_64	// CP15 MRRC/MCRR trap
-	b.eq	el0_cp15
-	cmp	x24, #ESR_ELx_EC_CP14_MR	// CP14 MRC/MCR trap
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_CP14_LS	// CP14 LDC/STC trap
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_CP14_64	// CP14 MRRC/MCRR trap
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_BREAKPT_LOW	// debug exception in EL0
-	b.ge	el0_dbg
-	b	el0_inv
-el0_svc_compat:
-	gic_prio_kentry_setup tmp=x1
 	mov	x0, sp
-	bl	el0_svc_compat_handler
+	bl	el0_sync_compat_handler
 	b	ret_to_user
+ENDPROC(el0_sync_compat)
 
 	.align	6
 el0_irq_compat:
 	kernel_entry 0, 32
 	b	el0_irq_naked
+ENDPROC(el0_irq_compat)
 
 el0_error_compat:
 	kernel_entry 0, 32
 	b	el0_error_naked
-
-el0_cp15:
-	/*
-	 * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_cp15instr
-	b	ret_to_user
-#endif
-
-el0_da:
-	/*
-	 * Data abort handling
-	 */
-	mrs	x26, far_el1
-	ct_user_exit_irqoff
-	enable_daif
-	untagged_addr	x0, x26
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_mem_abort
-	b	ret_to_user
-el0_ia:
-	/*
-	 * Instruction abort handling
-	 */
-	mrs	x26, far_el1
-	gic_prio_kentry_setup tmp=x0
-	ct_user_exit_irqoff
-	enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
+ENDPROC(el0_error_compat)
 #endif
-	mov	x0, x26
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_el0_ia_bp_hardening
-	b	ret_to_user
-el0_fpsimd_acc:
-	/*
-	 * Floating Point or Advanced SIMD access
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_fpsimd_acc
-	b	ret_to_user
-el0_sve_acc:
-	/*
-	 * Scalable Vector Extension access
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_sve_acc
-	b	ret_to_user
-el0_fpsimd_exc:
-	/*
-	 * Floating Point, Advanced SIMD or SVE exception
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_fpsimd_exc
-	b	ret_to_user
-el0_sp:
-	ldr	x26, [sp, #S_SP]
-	b	el0_sp_pc
-el0_pc:
-	mrs	x26, far_el1
-el0_sp_pc:
-	/*
-	 * Stack or PC alignment exception handling
-	 */
-	gic_prio_kentry_setup tmp=x0
-	ct_user_exit_irqoff
-	enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
-	mov	x0, x26
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_sp_pc_abort
-	b	ret_to_user
-el0_undef:
-	/*
-	 * Undefined instruction
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, sp
-	bl	do_undefinstr
-	b	ret_to_user
-el0_sys:
-	/*
-	 * System instructions, for trapped cache maintenance instructions
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_sysinstr
-	b	ret_to_user
-el0_dbg:
-	/*
-	 * Debug exception handling
-	 */
-	tbnz	x24, #0, el0_inv		// EL0 only
-	mrs	x24, far_el1
-	gic_prio_kentry_setup tmp=x3
-	ct_user_exit_irqoff
-	mov	x0, x24
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_debug_exception
-	enable_da_f
-	b	ret_to_user
-el0_inv:
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, sp
-	mov	x1, #BAD_SYNC
-	mov	x2, x25
-	bl	bad_el0_sync
-	b	ret_to_user
-ENDPROC(el0_sync)
 
 	.align	6
 el0_irq:
@@ -999,17 +750,6 @@ finish_ret_to_user:
 	kernel_exit 0
 ENDPROC(ret_to_user)
 
-/*
- * SVC handler.
- */
-	.align	6
-el0_svc:
-	gic_prio_kentry_setup tmp=x1
-	mov	x0, sp
-	bl	el0_svc_handler
-	b	ret_to_user
-ENDPROC(el0_svc)
-
 	.popsection				// .entry.text
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 37d3912cfe06..94289d126993 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -269,6 +269,7 @@ static void sve_free(struct task_struct *task)
  */
 static void task_fpsimd_load(void)
 {
+	WARN_ON(!system_supports_fpsimd());
 	WARN_ON(!have_cpu_fpsimd_context());
 
 	if (system_supports_sve() && test_thread_flag(TIF_SVE))
@@ -289,6 +290,7 @@ static void fpsimd_save(void)
 		this_cpu_ptr(&fpsimd_last_state);
 	/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
 
+	WARN_ON(!system_supports_fpsimd());
 	WARN_ON(!have_cpu_fpsimd_context());
 
 	if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -920,7 +922,7 @@ void fpsimd_release_task(struct task_struct *dead_task)
  * would have disabled the SVE access trap for userspace during
  * ret_to_user, making an SVE access trap impossible in that case.
  */
-asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 {
 	/* Even if we chose not to use SVE, the hardware could still trap: */
 	if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
@@ -947,7 +949,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 /*
  * Trapped FP/ASIMD access.
  */
-asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
 {
 	/* TODO: implement lazy context saving/restoring */
 	WARN_ON(1);
@@ -956,7 +958,7 @@ asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
 /*
  * Raise a SIGFPE for the current process.
  */
-asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
 {
 	unsigned int si_code = FPE_FLTUNK;
 
@@ -1092,6 +1094,7 @@ void fpsimd_bind_task_to_cpu(void)
 	struct fpsimd_last_state_struct *last =
 		this_cpu_ptr(&fpsimd_last_state);
 
+	WARN_ON(!system_supports_fpsimd());
 	last->st = &current->thread.uw.fpsimd_state;
 	last->sve_state = current->thread.sve_state;
 	last->sve_vl = current->thread.sve_vl;
@@ -1114,6 +1117,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
 	struct fpsimd_last_state_struct *last =
 		this_cpu_ptr(&fpsimd_last_state);
 
+	WARN_ON(!system_supports_fpsimd());
 	WARN_ON(!in_softirq() && !irqs_disabled());
 
 	last->st = st;
@@ -1128,8 +1132,19 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
  */
 void fpsimd_restore_current_state(void)
 {
-	if (!system_supports_fpsimd())
+	/*
+	 * For the tasks that were created before we detected the absence of
+	 * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(),
+	 * e.g, init. This could be then inherited by the children processes.
+	 * If we later detect that the system doesn't support FP/SIMD,
+	 * we must clear the flag for  all the tasks to indicate that the
+	 * FPSTATE is clean (as we can't have one) to avoid looping for ever in
+	 * do_notify_resume().
+	 */
+	if (!system_supports_fpsimd()) {
+		clear_thread_flag(TIF_FOREIGN_FPSTATE);
 		return;
+	}
 
 	get_cpu_fpsimd_context();
 
@@ -1148,7 +1163,7 @@ void fpsimd_restore_current_state(void)
  */
 void fpsimd_update_current_state(struct user_fpsimd_state const *state)
 {
-	if (!system_supports_fpsimd())
+	if (WARN_ON(!system_supports_fpsimd()))
 		return;
 
 	get_cpu_fpsimd_context();
@@ -1179,7 +1194,13 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
 void fpsimd_flush_task_state(struct task_struct *t)
 {
 	t->thread.fpsimd_cpu = NR_CPUS;
-
+	/*
+	 * If we don't support fpsimd, bail out after we have
+	 * reset the fpsimd_cpu for this task and clear the
+	 * FPSTATE.
+	 */
+	if (!system_supports_fpsimd())
+		return;
 	barrier();
 	set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);
 
@@ -1193,6 +1214,7 @@ void fpsimd_flush_task_state(struct task_struct *t)
  */
 static void fpsimd_flush_cpu_state(void)
 {
+	WARN_ON(!system_supports_fpsimd());
 	__this_cpu_write(fpsimd_last_state.st, NULL);
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 }
@@ -1203,6 +1225,8 @@ static void fpsimd_flush_cpu_state(void)
  */
 void fpsimd_save_and_flush_cpu_state(void)
 {
+	if (!system_supports_fpsimd())
+		return;
 	WARN_ON(preemptible());
 	__get_cpu_fpsimd_context();
 	fpsimd_save();
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 06e56b470315..8618faa82e6d 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -62,6 +62,19 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 	return ftrace_modify_code(pc, 0, new, false);
 }
 
+static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
+{
+#ifdef CONFIG_ARM64_MODULE_PLTS
+	struct plt_entry *plt = mod->arch.ftrace_trampolines;
+
+	if (addr == FTRACE_ADDR)
+		return &plt[FTRACE_PLT_IDX];
+	if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS))
+		return &plt[FTRACE_REGS_PLT_IDX];
+#endif
+	return NULL;
+}
+
 /*
  * Turn on the call to ftrace_caller() in instrumented function
  */
@@ -72,9 +85,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	long offset = (long)pc - (long)addr;
 
 	if (offset < -SZ_128M || offset >= SZ_128M) {
-#ifdef CONFIG_ARM64_MODULE_PLTS
-		struct plt_entry trampoline, *dst;
 		struct module *mod;
+		struct plt_entry *plt;
+
+		if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+			return -EINVAL;
 
 		/*
 		 * On kernels that support module PLTs, the offset between the
@@ -93,49 +108,13 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		if (WARN_ON(!mod))
 			return -EINVAL;
 
-		/*
-		 * There is only one ftrace trampoline per module. For now,
-		 * this is not a problem since on arm64, all dynamic ftrace
-		 * invocations are routed via ftrace_caller(). This will need
-		 * to be revisited if support for multiple ftrace entry points
-		 * is added in the future, but for now, the pr_err() below
-		 * deals with a theoretical issue only.
-		 *
-		 * Note that PLTs are place relative, and plt_entries_equal()
-		 * checks whether they point to the same target. Here, we need
-		 * to check if the actual opcodes are in fact identical,
-		 * regardless of the offset in memory so use memcmp() instead.
-		 */
-		dst = mod->arch.ftrace_trampoline;
-		trampoline = get_plt_entry(addr, dst);
-		if (memcmp(dst, &trampoline, sizeof(trampoline))) {
-			if (plt_entry_is_initialized(dst)) {
-				pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
-				return -EINVAL;
-			}
-
-			/* point the trampoline to our ftrace entry point */
-			module_disable_ro(mod);
-			*dst = trampoline;
-			module_enable_ro(mod, true);
-
-			/*
-			 * Ensure updated trampoline is visible to instruction
-			 * fetch before we patch in the branch. Although the
-			 * architecture doesn't require an IPI in this case,
-			 * Neoverse-N1 erratum #1542419 does require one
-			 * if the TLB maintenance in module_enable_ro() is
-			 * skipped due to rodata_enabled. It doesn't seem worth
-			 * it to make it conditional given that this is
-			 * certainly not a fast-path.
-			 */
-			flush_icache_range((unsigned long)&dst[0],
-					   (unsigned long)&dst[1]);
+		plt = get_ftrace_plt(mod, addr);
+		if (!plt) {
+			pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
+			return -EINVAL;
 		}
-		addr = (unsigned long)dst;
-#else /* CONFIG_ARM64_MODULE_PLTS */
-		return -EINVAL;
-#endif /* CONFIG_ARM64_MODULE_PLTS */
+
+		addr = (unsigned long)plt;
 	}
 
 	old = aarch64_insn_gen_nop();
@@ -144,6 +123,55 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	return ftrace_modify_code(pc, old, new, true);
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+			unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	u32 old, new;
+
+	old = aarch64_insn_gen_branch_imm(pc, old_addr,
+					  AARCH64_INSN_BRANCH_LINK);
+	new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+
+/*
+ * The compiler has inserted two NOPs before the regular function prologue.
+ * All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live,
+ * and x9-x18 are free for our use.
+ *
+ * At runtime we want to be able to swing a single NOP <-> BL to enable or
+ * disable the ftrace call. The BL requires us to save the original LR value,
+ * so here we insert a <MOV X9, LR> over the first NOP so the instructions
+ * before the regular prologue are:
+ *
+ * | Compiled | Disabled   | Enabled    |
+ * +----------+------------+------------+
+ * | NOP      | MOV X9, LR | MOV X9, LR |
+ * | NOP      | NOP        | BL <entry> |
+ *
+ * The LR value will be recovered by ftrace_regs_entry, and restored into LR
+ * before returning to the regular function prologue. When a function is not
+ * being traced, the MOV is not harmful given x9 is not live per the AAPCS.
+ *
+ * Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of
+ * the BL.
+ */
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+	unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
+	u32 old, new;
+
+	old = aarch64_insn_gen_nop();
+	new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
+					AARCH64_INSN_REG_LR,
+					AARCH64_INSN_VARIANT_64BIT);
+	return ftrace_modify_code(pc, old, new, true);
+}
+#endif
+
 /*
  * Turn off the call to ftrace_caller() in instrumented function
  */
@@ -156,9 +184,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 	long offset = (long)pc - (long)addr;
 
 	if (offset < -SZ_128M || offset >= SZ_128M) {
-#ifdef CONFIG_ARM64_MODULE_PLTS
 		u32 replaced;
 
+		if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+			return -EINVAL;
+
 		/*
 		 * 'mod' is only set at module load time, but if we end up
 		 * dealing with an out-of-range condition, we can assume it
@@ -189,9 +219,6 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 			return -EINVAL;
 
 		validate = false;
-#else /* CONFIG_ARM64_MODULE_PLTS */
-		return -EINVAL;
-#endif /* CONFIG_ARM64_MODULE_PLTS */
 	} else {
 		old = aarch64_insn_gen_branch_imm(pc, addr,
 						  AARCH64_INSN_BRANCH_LINK);
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index a96b2921d22c..590963c9c609 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -182,78 +182,79 @@ int arch_hibernation_header_restore(void *addr)
 }
 EXPORT_SYMBOL(arch_hibernation_header_restore);
 
-/*
- * Copies length bytes, starting at src_start into an new page,
- * perform cache maintentance, then maps it at the specified address low
- * address as executable.
- *
- * This is used by hibernate to copy the code it needs to execute when
- * overwriting the kernel text. This function generates a new set of page
- * tables, which it loads into ttbr0.
- *
- * Length is provided as we probably only want 4K of data, even on a 64K
- * page system.
- */
-static int create_safe_exec_page(void *src_start, size_t length,
-				 unsigned long dst_addr,
-				 phys_addr_t *phys_dst_addr,
-				 void *(*allocator)(gfp_t mask),
-				 gfp_t mask)
+static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
+		       unsigned long dst_addr,
+		       pgprot_t pgprot)
 {
-	int rc = 0;
-	pgd_t *trans_pgd;
 	pgd_t *pgdp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
-	unsigned long dst = (unsigned long)allocator(mask);
-
-	if (!dst) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	memcpy((void *)dst, src_start, length);
-	__flush_icache_range(dst, dst + length);
-
-	trans_pgd = allocator(mask);
-	if (!trans_pgd) {
-		rc = -ENOMEM;
-		goto out;
-	}
 
 	pgdp = pgd_offset_raw(trans_pgd, dst_addr);
 	if (pgd_none(READ_ONCE(*pgdp))) {
-		pudp = allocator(mask);
-		if (!pudp) {
-			rc = -ENOMEM;
-			goto out;
-		}
+		pudp = (void *)get_safe_page(GFP_ATOMIC);
+		if (!pudp)
+			return -ENOMEM;
 		pgd_populate(&init_mm, pgdp, pudp);
 	}
 
 	pudp = pud_offset(pgdp, dst_addr);
 	if (pud_none(READ_ONCE(*pudp))) {
-		pmdp = allocator(mask);
-		if (!pmdp) {
-			rc = -ENOMEM;
-			goto out;
-		}
+		pmdp = (void *)get_safe_page(GFP_ATOMIC);
+		if (!pmdp)
+			return -ENOMEM;
 		pud_populate(&init_mm, pudp, pmdp);
 	}
 
 	pmdp = pmd_offset(pudp, dst_addr);
 	if (pmd_none(READ_ONCE(*pmdp))) {
-		ptep = allocator(mask);
-		if (!ptep) {
-			rc = -ENOMEM;
-			goto out;
-		}
+		ptep = (void *)get_safe_page(GFP_ATOMIC);
+		if (!ptep)
+			return -ENOMEM;
 		pmd_populate_kernel(&init_mm, pmdp, ptep);
 	}
 
 	ptep = pte_offset_kernel(pmdp, dst_addr);
-	set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
+	set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));
+
+	return 0;
+}
+
+/*
+ * Copies length bytes, starting at src_start into an new page,
+ * perform cache maintenance, then maps it at the specified address low
+ * address as executable.
+ *
+ * This is used by hibernate to copy the code it needs to execute when
+ * overwriting the kernel text. This function generates a new set of page
+ * tables, which it loads into ttbr0.
+ *
+ * Length is provided as we probably only want 4K of data, even on a 64K
+ * page system.
+ */
+static int create_safe_exec_page(void *src_start, size_t length,
+				 unsigned long dst_addr,
+				 phys_addr_t *phys_dst_addr)
+{
+	void *page = (void *)get_safe_page(GFP_ATOMIC);
+	pgd_t *trans_pgd;
+	int rc;
+
+	if (!page)
+		return -ENOMEM;
+
+	memcpy(page, src_start, length);
+	__flush_icache_range((unsigned long)page, (unsigned long)page + length);
+
+	trans_pgd = (void *)get_safe_page(GFP_ATOMIC);
+	if (!trans_pgd)
+		return -ENOMEM;
+
+	rc = trans_pgd_map_page(trans_pgd, page, dst_addr,
+				PAGE_KERNEL_EXEC);
+	if (rc)
+		return rc;
 
 	/*
 	 * Load our new page tables. A strict BBM approach requires that we
@@ -269,13 +270,12 @@ static int create_safe_exec_page(void *src_start, size_t length,
 	 */
 	cpu_set_reserved_ttbr0();
 	local_flush_tlb_all();
-	write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
+	write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1);
 	isb();
 
-	*phys_dst_addr = virt_to_phys((void *)dst);
+	*phys_dst_addr = virt_to_phys(page);
 
-out:
-	return rc;
+	return 0;
 }
 
 #define dcache_clean_range(start, end)	__flush_dcache_area(start, (end - start))
@@ -450,7 +450,7 @@ static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
 				return -ENOMEM;
 		} else {
 			set_pud(dst_pudp,
-				__pud(pud_val(pud) & ~PMD_SECT_RDONLY));
+				__pud(pud_val(pud) & ~PUD_SECT_RDONLY));
 		}
 	} while (dst_pudp++, src_pudp++, addr = next, addr != end);
 
@@ -476,6 +476,24 @@ static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
 	return 0;
 }
 
+static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
+			  unsigned long end)
+{
+	int rc;
+	pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
+
+	if (!trans_pgd) {
+		pr_err("Failed to allocate memory for temporary page tables.\n");
+		return -ENOMEM;
+	}
+
+	rc = copy_page_tables(trans_pgd, start, end);
+	if (!rc)
+		*dst_pgdp = trans_pgd;
+
+	return rc;
+}
+
 /*
  * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
  *
@@ -484,7 +502,7 @@ static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
  */
 int swsusp_arch_resume(void)
 {
-	int rc = 0;
+	int rc;
 	void *zero_page;
 	size_t exit_size;
 	pgd_t *tmp_pg_dir;
@@ -497,15 +515,9 @@ int swsusp_arch_resume(void)
 	 * Create a second copy of just the linear map, and use this when
 	 * restoring.
 	 */
-	tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
-	if (!tmp_pg_dir) {
-		pr_err("Failed to allocate memory for temporary page tables.\n");
-		rc = -ENOMEM;
-		goto out;
-	}
-	rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, PAGE_END);
+	rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END);
 	if (rc)
-		goto out;
+		return rc;
 
 	/*
 	 * We need a zero page that is zero before & after resume in order to
@@ -514,8 +526,7 @@ int swsusp_arch_resume(void)
 	zero_page = (void *)get_safe_page(GFP_ATOMIC);
 	if (!zero_page) {
 		pr_err("Failed to allocate zero page.\n");
-		rc = -ENOMEM;
-		goto out;
+		return -ENOMEM;
 	}
 
 	/*
@@ -530,11 +541,10 @@ int swsusp_arch_resume(void)
 	 */
 	rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
 				   (unsigned long)hibernate_exit,
-				   &phys_hibernate_exit,
-				   (void *)get_safe_page, GFP_ATOMIC);
+				   &phys_hibernate_exit);
 	if (rc) {
 		pr_err("Failed to create safe executable page for hibernate_exit code.\n");
-		goto out;
+		return rc;
 	}
 
 	/*
@@ -561,8 +571,7 @@ int swsusp_arch_resume(void)
 		       resume_hdr.reenter_kernel, restore_pblist,
 		       resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
 
-out:
-	return rc;
+	return 0;
 }
 
 int hibernate_resume_nonboot_cpu_disable(void)
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 38ee1514cd9c..0b727edf4104 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -51,7 +51,7 @@ int hw_breakpoint_slots(int type)
 	case TYPE_DATA:
 		return get_num_wrps();
 	default:
-		pr_warning("unknown slot type: %d\n", type);
+		pr_warn("unknown slot type: %d\n", type);
 		return 0;
 	}
 }
@@ -112,7 +112,7 @@ static u64 read_wb_reg(int reg, int n)
 	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
 	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
 	default:
-		pr_warning("attempt to read from unknown breakpoint register %d\n", n);
+		pr_warn("attempt to read from unknown breakpoint register %d\n", n);
 	}
 
 	return val;
@@ -127,7 +127,7 @@ static void write_wb_reg(int reg, int n, u64 val)
 	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
 	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
 	default:
-		pr_warning("attempt to write to unknown breakpoint register %d\n", n);
+		pr_warn("attempt to write to unknown breakpoint register %d\n", n);
 	}
 	isb();
 }
@@ -145,7 +145,7 @@ static enum dbg_active_el debug_exception_level(int privilege)
 	case AARCH64_BREAKPOINT_EL1:
 		return DBG_ACTIVE_EL1;
 	default:
-		pr_warning("invalid breakpoint privilege level %d\n", privilege);
+		pr_warn("invalid breakpoint privilege level %d\n", privilege);
 		return -EINVAL;
 	}
 }
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index d801a7094076..4a9e773a177f 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -21,6 +21,7 @@
 #include <asm/fixmap.h>
 #include <asm/insn.h>
 #include <asm/kprobes.h>
+#include <asm/sections.h>
 
 #define AARCH64_INSN_SF_BIT	BIT(31)
 #define AARCH64_INSN_N_BIT	BIT(22)
@@ -78,16 +79,29 @@ bool aarch64_insn_is_branch_imm(u32 insn)
 
 static DEFINE_RAW_SPINLOCK(patch_lock);
 
+static bool is_exit_text(unsigned long addr)
+{
+	/* discarded with init text/data */
+	return system_state < SYSTEM_RUNNING &&
+		addr >= (unsigned long)__exittext_begin &&
+		addr < (unsigned long)__exittext_end;
+}
+
+static bool is_image_text(unsigned long addr)
+{
+	return core_kernel_text(addr) || is_exit_text(addr);
+}
+
 static void __kprobes *patch_map(void *addr, int fixmap)
 {
 	unsigned long uintaddr = (uintptr_t) addr;
-	bool module = !core_kernel_text(uintaddr);
+	bool image = is_image_text(uintaddr);
 	struct page *page;
 
-	if (module && IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
-		page = vmalloc_to_page(addr);
-	else if (!module)
+	if (image)
 		page = phys_to_page(__pa_symbol(addr));
+	else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+		page = vmalloc_to_page(addr);
 	else
 		return addr;
 
@@ -1268,6 +1282,19 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
 	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
 }
 
+/*
+ * MOV (register) is architecturally an alias of ORR (shifted register) where
+ * MOV <*d>, <*m> is equivalent to ORR <*d>, <*ZR>, <*m>
+ */
+u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
+			      enum aarch64_insn_register src,
+			      enum aarch64_insn_variant variant)
+{
+	return aarch64_insn_gen_logical_shifted_reg(dst, AARCH64_INSN_REG_ZR,
+						    src, 0, variant,
+						    AARCH64_INSN_LOGIC_ORR);
+}
+
 u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
 			 enum aarch64_insn_register reg,
 			 enum aarch64_insn_adr_type type)
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 416f537bf614..53b8a4ee64ff 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -19,6 +19,14 @@
 #include <asm/pgtable.h>
 #include <asm/sections.h>
 
+enum kaslr_status {
+	KASLR_ENABLED,
+	KASLR_DISABLED_CMDLINE,
+	KASLR_DISABLED_NO_SEED,
+	KASLR_DISABLED_FDT_REMAP,
+};
+
+static enum kaslr_status __initdata kaslr_status;
 u64 __ro_after_init module_alloc_base;
 u16 __initdata memstart_offset_seed;
 
@@ -91,15 +99,15 @@ u64 __init kaslr_early_init(u64 dt_phys)
 	 */
 	early_fixmap_init();
 	fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
-	if (!fdt)
+	if (!fdt) {
+		kaslr_status = KASLR_DISABLED_FDT_REMAP;
 		return 0;
+	}
 
 	/*
 	 * Retrieve (and wipe) the seed from the FDT
 	 */
 	seed = get_kaslr_seed(fdt);
-	if (!seed)
-		return 0;
 
 	/*
 	 * Check if 'nokaslr' appears on the command line, and
@@ -107,8 +115,26 @@ u64 __init kaslr_early_init(u64 dt_phys)
 	 */
 	cmdline = kaslr_get_cmdline(fdt);
 	str = strstr(cmdline, "nokaslr");
-	if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+	if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) {
+		kaslr_status = KASLR_DISABLED_CMDLINE;
+		return 0;
+	}
+
+	/*
+	 * Mix in any entropy obtainable architecturally, open coded
+	 * since this runs extremely early.
+	 */
+	if (__early_cpu_has_rndr()) {
+		unsigned long raw;
+
+		if (__arm64_rndr(&raw))
+			seed ^= raw;
+	}
+
+	if (!seed) {
+		kaslr_status = KASLR_DISABLED_NO_SEED;
 		return 0;
+	}
 
 	/*
 	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
@@ -170,3 +196,24 @@ u64 __init kaslr_early_init(u64 dt_phys)
 
 	return offset;
 }
+
+static int __init kaslr_init(void)
+{
+	switch (kaslr_status) {
+	case KASLR_ENABLED:
+		pr_info("KASLR enabled\n");
+		break;
+	case KASLR_DISABLED_CMDLINE:
+		pr_info("KASLR disabled on command line\n");
+		break;
+	case KASLR_DISABLED_NO_SEED:
+		pr_warn("KASLR disabled due to lack of seed\n");
+		break;
+	case KASLR_DISABLED_FDT_REMAP:
+		pr_warn("KASLR disabled due to FDT remapping failure\n");
+		break;
+	}
+
+	return 0;
+}
+core_initcall(kaslr_init)
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
index 29a9428486a5..af9987c154ca 100644
--- a/arch/arm64/kernel/kexec_image.c
+++ b/arch/arm64/kernel/kexec_image.c
@@ -47,10 +47,6 @@ static void *image_load(struct kimage *image,
 	struct kexec_segment *kernel_segment;
 	int ret;
 
-	/* We don't support crash kernels yet. */
-	if (image->type == KEXEC_TYPE_CRASH)
-		return ERR_PTR(-EOPNOTSUPP);
-
 	/*
 	 * We require a kernel with an unambiguous Image header. Per
 	 * Documentation/arm64/booting.rst, this is the case when image_size
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 0df8493624e0..8e9c924423b4 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -160,18 +160,6 @@ void machine_kexec(struct kimage *kimage)
 
 	kexec_image_info(kimage);
 
-	pr_debug("%s:%d: control_code_page:        %p\n", __func__, __LINE__,
-		kimage->control_code_page);
-	pr_debug("%s:%d: reboot_code_buffer_phys:  %pa\n", __func__, __LINE__,
-		&reboot_code_buffer_phys);
-	pr_debug("%s:%d: reboot_code_buffer:       %p\n", __func__, __LINE__,
-		reboot_code_buffer);
-	pr_debug("%s:%d: relocate_new_kernel:      %p\n", __func__, __LINE__,
-		arm64_relocate_new_kernel);
-	pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
-		__func__, __LINE__, arm64_relocate_new_kernel_size,
-		arm64_relocate_new_kernel_size);
-
 	/*
 	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
 	 * after the kernel is shut down.
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index 7b08bf9499b6..dd3ae8081b38 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -17,12 +17,15 @@
 #include <linux/memblock.h>
 #include <linux/of_fdt.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/vmalloc.h>
 #include <asm/byteorder.h>
 
 /* relevant device tree properties */
+#define FDT_PROP_KEXEC_ELFHDR	"linux,elfcorehdr"
+#define FDT_PROP_MEM_RANGE	"linux,usable-memory-range"
 #define FDT_PROP_INITRD_START	"linux,initrd-start"
 #define FDT_PROP_INITRD_END	"linux,initrd-end"
 #define FDT_PROP_BOOTARGS	"bootargs"
@@ -40,6 +43,10 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 	vfree(image->arch.dtb);
 	image->arch.dtb = NULL;
 
+	vfree(image->arch.elf_headers);
+	image->arch.elf_headers = NULL;
+	image->arch.elf_headers_sz = 0;
+
 	return kexec_image_post_load_cleanup_default(image);
 }
 
@@ -55,6 +62,31 @@ static int setup_dtb(struct kimage *image,
 
 	off = ret;
 
+	ret = fdt_delprop(dtb, off, FDT_PROP_KEXEC_ELFHDR);
+	if (ret && ret != -FDT_ERR_NOTFOUND)
+		goto out;
+	ret = fdt_delprop(dtb, off, FDT_PROP_MEM_RANGE);
+	if (ret && ret != -FDT_ERR_NOTFOUND)
+		goto out;
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		/* add linux,elfcorehdr */
+		ret = fdt_appendprop_addrrange(dtb, 0, off,
+				FDT_PROP_KEXEC_ELFHDR,
+				image->arch.elf_headers_mem,
+				image->arch.elf_headers_sz);
+		if (ret)
+			return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL);
+
+		/* add linux,usable-memory-range */
+		ret = fdt_appendprop_addrrange(dtb, 0, off,
+				FDT_PROP_MEM_RANGE,
+				crashk_res.start,
+				crashk_res.end - crashk_res.start + 1);
+		if (ret)
+			return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL);
+	}
+
 	/* add bootargs */
 	if (cmdline) {
 		ret = fdt_setprop_string(dtb, off, FDT_PROP_BOOTARGS, cmdline);
@@ -125,8 +157,8 @@ out:
 }
 
 /*
- * More space needed so that we can add initrd, bootargs, kaslr-seed, and
- * rng-seed.
+ * More space needed so that we can add initrd, bootargs, kaslr-seed,
+ * rng-seed, userable-memory-range and elfcorehdr.
  */
 #define DTB_EXTRA_SPACE 0x1000
 
@@ -174,6 +206,43 @@ static int create_dtb(struct kimage *image,
 	}
 }
 
+static int prepare_elf_headers(void **addr, unsigned long *sz)
+{
+	struct crash_mem *cmem;
+	unsigned int nr_ranges;
+	int ret;
+	u64 i;
+	phys_addr_t start, end;
+
+	nr_ranges = 1; /* for exclusion of crashkernel region */
+	for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
+					MEMBLOCK_NONE, &start, &end, NULL)
+		nr_ranges++;
+
+	cmem = kmalloc(sizeof(struct crash_mem) +
+			sizeof(struct crash_mem_range) * nr_ranges, GFP_KERNEL);
+	if (!cmem)
+		return -ENOMEM;
+
+	cmem->max_nr_ranges = nr_ranges;
+	cmem->nr_ranges = 0;
+	for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
+					MEMBLOCK_NONE, &start, &end, NULL) {
+		cmem->ranges[cmem->nr_ranges].start = start;
+		cmem->ranges[cmem->nr_ranges].end = end - 1;
+		cmem->nr_ranges++;
+	}
+
+	/* Exclude crashkernel region */
+	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+
+	if (!ret)
+		ret =  crash_prepare_elf64_headers(cmem, true, addr, sz);
+
+	kfree(cmem);
+	return ret;
+}
+
 int load_other_segments(struct kimage *image,
 			unsigned long kernel_load_addr,
 			unsigned long kernel_size,
@@ -181,14 +250,43 @@ int load_other_segments(struct kimage *image,
 			char *cmdline)
 {
 	struct kexec_buf kbuf;
-	void *dtb = NULL;
-	unsigned long initrd_load_addr = 0, dtb_len;
+	void *headers, *dtb = NULL;
+	unsigned long headers_sz, initrd_load_addr = 0, dtb_len;
 	int ret = 0;
 
 	kbuf.image = image;
 	/* not allocate anything below the kernel */
 	kbuf.buf_min = kernel_load_addr + kernel_size;
 
+	/* load elf core header */
+	if (image->type == KEXEC_TYPE_CRASH) {
+		ret = prepare_elf_headers(&headers, &headers_sz);
+		if (ret) {
+			pr_err("Preparing elf core header failed\n");
+			goto out_err;
+		}
+
+		kbuf.buffer = headers;
+		kbuf.bufsz = headers_sz;
+		kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+		kbuf.memsz = headers_sz;
+		kbuf.buf_align = SZ_64K; /* largest supported page size */
+		kbuf.buf_max = ULONG_MAX;
+		kbuf.top_down = true;
+
+		ret = kexec_add_buffer(&kbuf);
+		if (ret) {
+			vfree(headers);
+			goto out_err;
+		}
+		image->arch.elf_headers = headers;
+		image->arch.elf_headers_mem = kbuf.mem;
+		image->arch.elf_headers_sz = headers_sz;
+
+		pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+			 image->arch.elf_headers_mem, headers_sz, headers_sz);
+	}
+
 	/* load initrd */
 	if (initrd) {
 		kbuf.buffer = initrd;
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index b182442b87a3..65b08a74aec6 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/elf.h>
+#include <linux/ftrace.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sort.h>
@@ -330,7 +331,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 		tramp->sh_type = SHT_NOBITS;
 		tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
 		tramp->sh_addralign = __alignof__(struct plt_entry);
-		tramp->sh_size = sizeof(struct plt_entry);
+		tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
 	}
 
 	return 0;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 03ff15bffbb6..1cd1a4d0ed30 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -9,6 +9,7 @@
 
 #include <linux/bitops.h>
 #include <linux/elf.h>
+#include <linux/ftrace.h>
 #include <linux/gfp.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
@@ -470,22 +471,58 @@ overflow:
 	return -ENOEXEC;
 }
 
-int module_finalize(const Elf_Ehdr *hdr,
-		    const Elf_Shdr *sechdrs,
-		    struct module *me)
+static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+				    const Elf_Shdr *sechdrs,
+				    const char *name)
 {
 	const Elf_Shdr *s, *se;
 	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
 	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
-		if (strcmp(".altinstructions", secstrs + s->sh_name) == 0)
-			apply_alternatives_module((void *)s->sh_addr, s->sh_size);
-#ifdef CONFIG_ARM64_MODULE_PLTS
-		if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
-		    !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name))
-			me->arch.ftrace_trampoline = (void *)s->sh_addr;
-#endif
+		if (strcmp(name, secstrs + s->sh_name) == 0)
+			return s;
 	}
 
+	return NULL;
+}
+
+static inline void __init_plt(struct plt_entry *plt, unsigned long addr)
+{
+	*plt = get_plt_entry(addr, plt);
+}
+
+static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
+				  const Elf_Shdr *sechdrs,
+				  struct module *mod)
+{
+#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE)
+	const Elf_Shdr *s;
+	struct plt_entry *plts;
+
+	s = find_section(hdr, sechdrs, ".text.ftrace_trampoline");
+	if (!s)
+		return -ENOEXEC;
+
+	plts = (void *)s->sh_addr;
+
+	__init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
+
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		__init_plt(&plts[FTRACE_REGS_PLT_IDX], FTRACE_REGS_ADDR);
+
+	mod->arch.ftrace_trampolines = plts;
+#endif
 	return 0;
 }
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	const Elf_Shdr *s;
+	s = find_section(hdr, sechdrs, ".altinstructions");
+	if (s)
+		apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+
+	return module_init_ftrace_plt(hdr, sechdrs, me);
+}
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 4cfed91fe256..1ef702b0be2d 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -6,13 +6,153 @@
  * Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
  */
 
+#define pr_fmt(fmt) "arm-pv: " fmt
+
+#include <linux/arm-smccc.h>
+#include <linux/cpuhotplug.h>
 #include <linux/export.h>
+#include <linux/io.h>
 #include <linux/jump_label.h>
+#include <linux/printk.h>
+#include <linux/psci.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
 #include <linux/types.h>
+
 #include <asm/paravirt.h>
+#include <asm/pvclock-abi.h>
+#include <asm/smp_plat.h>
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
 struct paravirt_patch_template pv_ops;
 EXPORT_SYMBOL_GPL(pv_ops);
+
+struct pv_time_stolen_time_region {
+	struct pvclock_vcpu_stolen_time *kaddr;
+};
+
+static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+	steal_acc = false;
+	return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+
+/* return stolen time in ns by asking the hypervisor */
+static u64 pv_steal_clock(int cpu)
+{
+	struct pv_time_stolen_time_region *reg;
+
+	reg = per_cpu_ptr(&stolen_time_region, cpu);
+	if (!reg->kaddr) {
+		pr_warn_once("stolen time enabled but not configured for cpu %d\n",
+			     cpu);
+		return 0;
+	}
+
+	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+}
+
+static int stolen_time_dying_cpu(unsigned int cpu)
+{
+	struct pv_time_stolen_time_region *reg;
+
+	reg = this_cpu_ptr(&stolen_time_region);
+	if (!reg->kaddr)
+		return 0;
+
+	memunmap(reg->kaddr);
+	memset(reg, 0, sizeof(*reg));
+
+	return 0;
+}
+
+static int init_stolen_time_cpu(unsigned int cpu)
+{
+	struct pv_time_stolen_time_region *reg;
+	struct arm_smccc_res res;
+
+	reg = this_cpu_ptr(&stolen_time_region);
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_ST, &res);
+
+	if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
+		return -EINVAL;
+
+	reg->kaddr = memremap(res.a0,
+			      sizeof(struct pvclock_vcpu_stolen_time),
+			      MEMREMAP_WB);
+
+	if (!reg->kaddr) {
+		pr_warn("Failed to map stolen time data structure\n");
+		return -ENOMEM;
+	}
+
+	if (le32_to_cpu(reg->kaddr->revision) != 0 ||
+	    le32_to_cpu(reg->kaddr->attributes) != 0) {
+		pr_warn_once("Unexpected revision or attributes in stolen time data\n");
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static int pv_time_init_stolen_time(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING,
+				"hypervisor/arm/pvtime:starting",
+				init_stolen_time_cpu, stolen_time_dying_cpu);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static bool has_pv_steal_clock(void)
+{
+	struct arm_smccc_res res;
+
+	/* To detect the presence of PV time support we require SMCCC 1.1+ */
+	if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
+		return false;
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+			     ARM_SMCCC_HV_PV_TIME_FEATURES, &res);
+
+	if (res.a0 != SMCCC_RET_SUCCESS)
+		return false;
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_FEATURES,
+			     ARM_SMCCC_HV_PV_TIME_ST, &res);
+
+	return (res.a0 == SMCCC_RET_SUCCESS);
+}
+
+int __init pv_time_init(void)
+{
+	int ret;
+
+	if (!has_pv_steal_clock())
+		return 0;
+
+	ret = pv_time_init_stolen_time();
+	if (ret)
+		return ret;
+
+	pv_ops.time.steal_clock = pv_steal_clock;
+
+	static_key_slow_inc(&paravirt_steal_enabled);
+	if (steal_acc)
+		static_key_slow_inc(&paravirt_steal_rq_enabled);
+
+	pr_info("using stolen time PV\n");
+
+	return 0;
+}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a0b4f1bca491..e40b65645c86 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -158,133 +158,74 @@ armv8pmu_events_sysfs_show(struct device *dev,
 	return sprintf(page, "event=0x%03llx\n", pmu_attr->id);
 }
 
-#define ARMV8_EVENT_ATTR(name, config) \
-	PMU_EVENT_ATTR(name, armv8_event_attr_##name, \
-		       config, armv8pmu_events_sysfs_show)
-
-ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR);
-ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE);
-ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL);
-ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED);
-ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED);
-ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED);
-ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN);
-ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN);
-ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED);
-ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED);
-ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED);
-ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES);
-ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED);
-ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS);
-ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE);
-ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE);
-ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB);
-ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS);
-ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR);
-ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC);
-ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES);
-/* Don't expose the chain event in /sys, since it's useless in isolation */
-ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED);
-ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND);
-ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND);
-ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB);
-ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB);
-ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE);
-ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE);
-ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB);
-ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB);
-ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS);
-ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE);
-ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS);
-ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK);
-ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK);
-ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD);
-ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD);
-ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD);
-ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP);
-ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED);
-ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE);
-ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION);
+#define ARMV8_EVENT_ATTR(name, config)						\
+	(&((struct perf_pmu_events_attr) {					\
+		.attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL),	\
+		.id = config,							\
+	}).attr.attr)
 
 static struct attribute *armv8_pmuv3_event_attrs[] = {
-	&armv8_event_attr_sw_incr.attr.attr,
-	&armv8_event_attr_l1i_cache_refill.attr.attr,
-	&armv8_event_attr_l1i_tlb_refill.attr.attr,
-	&armv8_event_attr_l1d_cache_refill.attr.attr,
-	&armv8_event_attr_l1d_cache.attr.attr,
-	&armv8_event_attr_l1d_tlb_refill.attr.attr,
-	&armv8_event_attr_ld_retired.attr.attr,
-	&armv8_event_attr_st_retired.attr.attr,
-	&armv8_event_attr_inst_retired.attr.attr,
-	&armv8_event_attr_exc_taken.attr.attr,
-	&armv8_event_attr_exc_return.attr.attr,
-	&armv8_event_attr_cid_write_retired.attr.attr,
-	&armv8_event_attr_pc_write_retired.attr.attr,
-	&armv8_event_attr_br_immed_retired.attr.attr,
-	&armv8_event_attr_br_return_retired.attr.attr,
-	&armv8_event_attr_unaligned_ldst_retired.attr.attr,
-	&armv8_event_attr_br_mis_pred.attr.attr,
-	&armv8_event_attr_cpu_cycles.attr.attr,
-	&armv8_event_attr_br_pred.attr.attr,
-	&armv8_event_attr_mem_access.attr.attr,
-	&armv8_event_attr_l1i_cache.attr.attr,
-	&armv8_event_attr_l1d_cache_wb.attr.attr,
-	&armv8_event_attr_l2d_cache.attr.attr,
-	&armv8_event_attr_l2d_cache_refill.attr.attr,
-	&armv8_event_attr_l2d_cache_wb.attr.attr,
-	&armv8_event_attr_bus_access.attr.attr,
-	&armv8_event_attr_memory_error.attr.attr,
-	&armv8_event_attr_inst_spec.attr.attr,
-	&armv8_event_attr_ttbr_write_retired.attr.attr,
-	&armv8_event_attr_bus_cycles.attr.attr,
-	&armv8_event_attr_l1d_cache_allocate.attr.attr,
-	&armv8_event_attr_l2d_cache_allocate.attr.attr,
-	&armv8_event_attr_br_retired.attr.attr,
-	&armv8_event_attr_br_mis_pred_retired.attr.attr,
-	&armv8_event_attr_stall_frontend.attr.attr,
-	&armv8_event_attr_stall_backend.attr.attr,
-	&armv8_event_attr_l1d_tlb.attr.attr,
-	&armv8_event_attr_l1i_tlb.attr.attr,
-	&armv8_event_attr_l2i_cache.attr.attr,
-	&armv8_event_attr_l2i_cache_refill.attr.attr,
-	&armv8_event_attr_l3d_cache_allocate.attr.attr,
-	&armv8_event_attr_l3d_cache_refill.attr.attr,
-	&armv8_event_attr_l3d_cache.attr.attr,
-	&armv8_event_attr_l3d_cache_wb.attr.attr,
-	&armv8_event_attr_l2d_tlb_refill.attr.attr,
-	&armv8_event_attr_l2i_tlb_refill.attr.attr,
-	&armv8_event_attr_l2d_tlb.attr.attr,
-	&armv8_event_attr_l2i_tlb.attr.attr,
-	&armv8_event_attr_remote_access.attr.attr,
-	&armv8_event_attr_ll_cache.attr.attr,
-	&armv8_event_attr_ll_cache_miss.attr.attr,
-	&armv8_event_attr_dtlb_walk.attr.attr,
-	&armv8_event_attr_itlb_walk.attr.attr,
-	&armv8_event_attr_ll_cache_rd.attr.attr,
-	&armv8_event_attr_ll_cache_miss_rd.attr.attr,
-	&armv8_event_attr_remote_access_rd.attr.attr,
-	&armv8_event_attr_sample_pop.attr.attr,
-	&armv8_event_attr_sample_feed.attr.attr,
-	&armv8_event_attr_sample_filtrate.attr.attr,
-	&armv8_event_attr_sample_collision.attr.attr,
+	ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
+	ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
+	ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE),
+	ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL),
+	ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED),
+	ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED),
+	ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED),
+	ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN),
+	ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN),
+	ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED),
+	ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED),
+	ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED),
+	ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED),
+	ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED),
+	ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED),
+	ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES),
+	ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED),
+	ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS),
+	ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE),
+	ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB),
+	ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE),
+	ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB),
+	ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS),
+	ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR),
+	ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC),
+	ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED),
+	ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES),
+	/* Don't expose the chain event in /sys, since it's useless in isolation */
+	ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE),
+	ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE),
+	ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED),
+	ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED),
+	ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND),
+	ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND),
+	ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB),
+	ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB),
+	ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE),
+	ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE),
+	ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE),
+	ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB),
+	ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL),
+	ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL),
+	ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB),
+	ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB),
+	ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS),
+	ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE),
+	ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS),
+	ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK),
+	ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK),
+	ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD),
+	ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD),
+	ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD),
+	ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP),
+	ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED),
+	ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE),
+	ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION),
 	NULL,
 };
 
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index c4452827419b..d1c95dcf1d78 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -455,10 +455,6 @@ int __init arch_populate_kprobe_blacklist(void)
 					(unsigned long)__irqentry_text_end);
 	if (ret)
 		return ret;
-	ret = kprobe_add_area_blacklist((unsigned long)__exception_text_start,
-					(unsigned long)__exception_text_end);
-	if (ret)
-		return ret;
 	ret = kprobe_add_area_blacklist((unsigned long)__idmap_text_start,
 					(unsigned long)__idmap_text_end);
 	if (ret)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 71f788cd2b18..bbb0f0c145f6 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -360,8 +360,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
 asmlinkage void ret_from_fork(void) asm("ret_from_fork");
 
-int copy_thread(unsigned long clone_flags, unsigned long stack_start,
-		unsigned long stk_sz, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long stack_start,
+		unsigned long stk_sz, struct task_struct *p, unsigned long tls)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 
@@ -394,11 +394,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 		}
 
 		/*
-		 * If a TLS pointer was passed to clone (4th argument), use it
-		 * for the new thread.
+		 * If a TLS pointer was passed to clone, use it for the new
+		 * thread.
 		 */
 		if (clone_flags & CLONE_SETTLS)
-			p->thread.uw.tp_value = childregs->regs[3];
+			p->thread.uw.tp_value = tls;
 	} else {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->pstate = PSR_MODE_EL1h;
@@ -646,6 +646,6 @@ asmlinkage void __sched arm64_preempt_schedule_irq(void)
 	 * Only allow a task to be preempted once cpufeatures have been
 	 * enabled.
 	 */
-	if (static_branch_likely(&arm64_const_caps_ready))
+	if (system_capabilities_finalized())
 		preempt_schedule_irq();
 }
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index c9f72b2665f1..43ae4e0c968f 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -81,7 +81,8 @@ static void cpu_psci_cpu_die(unsigned int cpu)
 
 static int cpu_psci_cpu_kill(unsigned int cpu)
 {
-	int err, i;
+	int err;
+	unsigned long start, end;
 
 	if (!psci_ops.affinity_info)
 		return 0;
@@ -91,16 +92,18 @@ static int cpu_psci_cpu_kill(unsigned int cpu)
 	 * while it is dying. So, try again a few times.
 	 */
 
-	for (i = 0; i < 10; i++) {
+	start = jiffies;
+	end = start + msecs_to_jiffies(100);
+	do {
 		err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
 		if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
-			pr_info("CPU%d killed.\n", cpu);
+			pr_info("CPU%d killed (polled %d ms)\n", cpu,
+				jiffies_to_msecs(jiffies - start));
 			return 0;
 		}
 
-		msleep(10);
-		pr_info("Retrying again to check for CPU kill\n");
-	}
+		usleep_range(100, 1000);
+	} while (time_before(jiffies, end));
 
 	pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
 			cpu, err);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 21176d02e21a..cd6e5fa48b9c 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -615,6 +615,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
 	return 0;
 }
 
+static int fpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!system_supports_fpsimd())
+		return -ENODEV;
+	return regset->n;
+}
+
 /*
  * TODO: update fp accessors for lazy context switching (sync/flush hwstate)
  */
@@ -637,6 +644,9 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   void *kbuf, void __user *ubuf)
 {
+	if (!system_supports_fpsimd())
+		return -EINVAL;
+
 	if (target == current)
 		fpsimd_preserve_current_state();
 
@@ -676,6 +686,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 {
 	int ret;
 
+	if (!system_supports_fpsimd())
+		return -EINVAL;
+
 	ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0);
 	if (ret)
 		return ret;
@@ -1134,6 +1147,7 @@ static const struct user_regset aarch64_regsets[] = {
 		 */
 		.size = sizeof(u32),
 		.align = sizeof(u32),
+		.active = fpr_active,
 		.get = fpr_get,
 		.set = fpr_set
 	},
@@ -1348,6 +1362,9 @@ static int compat_vfp_get(struct task_struct *target,
 	compat_ulong_t fpscr;
 	int ret, vregs_end_pos;
 
+	if (!system_supports_fpsimd())
+		return -EINVAL;
+
 	uregs = &target->thread.uw.fpsimd_state;
 
 	if (target == current)
@@ -1381,6 +1398,9 @@ static int compat_vfp_set(struct task_struct *target,
 	compat_ulong_t fpscr;
 	int ret, vregs_end_pos;
 
+	if (!system_supports_fpsimd())
+		return -EINVAL;
+
 	uregs = &target->thread.uw.fpsimd_state;
 
 	vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
@@ -1438,6 +1458,7 @@ static const struct user_regset aarch32_regsets[] = {
 		.n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
 		.size = sizeof(compat_ulong_t),
 		.align = sizeof(compat_ulong_t),
+		.active = fpr_active,
 		.get = compat_vfp_get,
 		.set = compat_vfp_set
 	},
@@ -1816,7 +1837,7 @@ int syscall_trace_enter(struct pt_regs *regs)
 	}
 
 	/* Do the secure computing after ptrace; failures should be fast. */
-	if (secure_computing(NULL) == -1)
+	if (secure_computing() == -1)
 		return -1;
 
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index ea94cf8f9dc6..d6259dac62b6 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -2,6 +2,7 @@
 // Copyright (C) 2017 Arm Ltd.
 #define pr_fmt(fmt) "sdei: " fmt
 
+#include <linux/arm-smccc.h>
 #include <linux/arm_sdei.h>
 #include <linux/hardirq.h>
 #include <linux/irqflags.h>
@@ -161,7 +162,7 @@ unsigned long sdei_arch_get_entry_point(int conduit)
 			return 0;
 	}
 
-	sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
+	sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	if (arm64_kernel_unmapped_at_el0()) {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 56f664561754..a34890bf309f 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -285,6 +285,13 @@ void __init setup_arch(char **cmdline_p)
 
 	*cmdline_p = boot_command_line;
 
+	/*
+	 * If know now we are going to need KPTI then use non-global
+	 * mappings from the start, avoiding the cost of rewriting
+	 * everything later.
+	 */
+	arm64_use_ng_mappings = kaslr_requires_kpti();
+
 	early_fixmap_init();
 	early_ioremap_init();
 
@@ -353,9 +360,6 @@ void __init setup_arch(char **cmdline_p)
 	init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page);
 #endif
 
-#ifdef CONFIG_VT
-	conswitchp = &dummy_con;
-#endif
 	if (boot_args[1] || boot_args[2] || boot_args[3]) {
 		pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n"
 			"\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n"
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index dd2cdc0d5be2..339882db5a91 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -371,6 +371,8 @@ static int parse_user_sigframe(struct user_ctxs *user,
 			goto done;
 
 		case FPSIMD_MAGIC:
+			if (!system_supports_fpsimd())
+				goto invalid;
 			if (user->fpsimd)
 				goto invalid;
 
@@ -506,7 +508,7 @@ static int restore_sigframe(struct pt_regs *regs,
 	if (err == 0)
 		err = parse_user_sigframe(&user, sf);
 
-	if (err == 0) {
+	if (err == 0 && system_supports_fpsimd()) {
 		if (!user.fpsimd)
 			return -EINVAL;
 
@@ -623,7 +625,7 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
 
 	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
 
-	if (err == 0) {
+	if (err == 0 && system_supports_fpsimd()) {
 		struct fpsimd_context __user *fpsimd_ctx =
 			apply_user_offset(user, user->fpsimd_offset);
 		err |= preserve_fpsimd_context(fpsimd_ctx);
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 12a585386c2f..82feca6f7052 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -223,7 +223,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
 	err |= !valid_user_regs(&regs->user_regs, current);
 
 	aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace;
-	if (err == 0)
+	if (err == 0 && system_supports_fpsimd())
 		err |= compat_restore_vfp_context(&aux->vfp);
 
 	return err;
@@ -419,7 +419,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf,
 
 	aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace;
 
-	if (err == 0)
+	if (err == 0 && system_supports_fpsimd())
 		err |= compat_preserve_vfp_context(&aux->vfp);
 	__put_user_error(0, &aux->end_magic, err);
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dc9fe879c279..d4ed9a19d8fe 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -31,6 +31,7 @@
 #include <linux/of.h>
 #include <linux/irq_work.h>
 #include <linux/kexec.h>
+#include <linux/kvm_host.h>
 
 #include <asm/alternative.h>
 #include <asm/atomic.h>
@@ -39,6 +40,7 @@
 #include <asm/cputype.h>
 #include <asm/cpu_ops.h>
 #include <asm/daifflags.h>
+#include <asm/kvm_mmu.h>
 #include <asm/mmu_context.h>
 #include <asm/numa.h>
 #include <asm/pgtable.h>
@@ -345,8 +347,7 @@ void __cpu_die(unsigned int cpu)
 	 */
 	err = op_cpu_kill(cpu);
 	if (err)
-		pr_warn("CPU%d may not have shut down cleanly: %d\n",
-			cpu, err);
+		pr_warn("CPU%d may not have shut down cleanly: %d\n", cpu, err);
 }
 
 /*
@@ -408,6 +409,8 @@ static void __init hyp_mode_check(void)
 			   "CPU: CPUs started in inconsistent modes");
 	else
 		pr_info("CPU: All CPU(s) started at EL1\n");
+	if (IS_ENABLED(CONFIG_KVM_ARM_HOST))
+		kvm_compute_layout();
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -976,8 +979,8 @@ void smp_send_stop(void)
 		udelay(1);
 
 	if (num_online_cpus() > 1)
-		pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
-			   cpumask_pr_args(cpu_online_mask));
+		pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+			cpumask_pr_args(cpu_online_mask));
 
 	sdei_mask_local_cpu();
 }
@@ -1017,8 +1020,8 @@ void crash_smp_send_stop(void)
 		udelay(1);
 
 	if (atomic_read(&waiting_for_crash_ipi) > 0)
-		pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
-			   cpumask_pr_args(&mask));
+		pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+			cpumask_pr_args(&mask));
 
 	sdei_mask_local_cpu();
 }
diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c
index 52cfc6148355..b26955f56750 100644
--- a/arch/arm64/kernel/ssbd.c
+++ b/arch/arm64/kernel/ssbd.c
@@ -37,7 +37,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
 
 	/* Unsupported */
 	if (state == ARM64_SSBD_UNKNOWN)
-		return -EINVAL;
+		return -ENODEV;
 
 	/* Treat the unaffected/mitigated state separately */
 	if (state == ARM64_SSBD_MITIGATED) {
@@ -102,7 +102,7 @@ static int ssbd_prctl_get(struct task_struct *task)
 {
 	switch (arm64_get_ssbd_state()) {
 	case ARM64_SSBD_UNKNOWN:
-		return -EINVAL;
+		return -ENODEV;
 	case ARM64_SSBD_FORCE_ENABLE:
 		return PR_SPEC_DISABLE;
 	case ARM64_SSBD_KERNEL:
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index f1cb64959427..3c18c2454089 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/compat.h>
+#include <linux/cpufeature.h>
 #include <linux/personality.h>
 #include <linux/sched.h>
 #include <linux/sched/signal.h>
@@ -17,6 +18,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/system_misc.h>
+#include <asm/tlbflush.h>
 #include <asm/unistd.h>
 
 static long
@@ -30,6 +32,15 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
 		if (fatal_signal_pending(current))
 			return 0;
 
+		if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+			/*
+			 * The workaround requires an inner-shareable tlbi.
+			 * We pick the reserved-ASID to minimise the impact.
+			 */
+			__tlbi(aside1is, __TLBI_VADDR(0, 0));
+			dsb(ish);
+		}
+
 		ret = __flush_cache_user_range(start, start + chunk);
 		if (ret)
 			return ret;
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 871c739f060a..a12c0c88d345 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -154,14 +154,14 @@ static inline void sve_user_discard(void)
 	sve_user_disable();
 }
 
-asmlinkage void el0_svc_handler(struct pt_regs *regs)
+void do_el0_svc(struct pt_regs *regs)
 {
 	sve_user_discard();
 	el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
 }
 
 #ifdef CONFIG_COMPAT
-asmlinkage void el0_svc_compat_handler(struct pt_regs *regs)
+void do_el0_svc_compat(struct pt_regs *regs)
 {
 	el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls,
 		       compat_sys_call_table);
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 0b2946414dc9..73f06d4b3aae 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -30,6 +30,7 @@
 
 #include <asm/thread_info.h>
 #include <asm/stacktrace.h>
+#include <asm/paravirt.h>
 
 unsigned long profile_pc(struct pt_regs *regs)
 {
@@ -65,4 +66,6 @@ void __init time_init(void)
 
 	/* Calibrate the delay loop directly */
 	lpj_fine = arch_timer_rate / HZ;
+
+	pv_time_init();
 }
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 34739e80211b..cf402be5c573 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -35,6 +35,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
 #include <asm/insn.h>
+#include <asm/kprobes.h>
 #include <asm/traps.h>
 #include <asm/smp.h>
 #include <asm/stack_pointer.h>
@@ -143,9 +144,12 @@ void show_stack(struct task_struct *tsk, unsigned long *sp)
 
 #ifdef CONFIG_PREEMPT
 #define S_PREEMPT " PREEMPT"
+#elif defined(CONFIG_PREEMPT_RT)
+#define S_PREEMPT " PREEMPT_RT"
 #else
 #define S_PREEMPT ""
 #endif
+
 #define S_SMP " SMP"
 
 static int __die(const char *str, int err, struct pt_regs *regs)
@@ -393,7 +397,7 @@ void arm64_notify_segfault(unsigned long addr)
 	force_signal_inject(SIGSEGV, code, addr);
 }
 
-asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+void do_undefinstr(struct pt_regs *regs)
 {
 	/* check for AArch32 breakpoint instructions */
 	if (!aarch32_break_handler(regs))
@@ -405,6 +409,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 	BUG_ON(!user_mode(regs));
 	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
 }
+NOKPROBE_SYMBOL(do_undefinstr);
 
 #define __user_cache_maint(insn, address, res)			\
 	if (address >= user_addr_max()) {			\
@@ -470,6 +475,15 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
 	int rt = ESR_ELx_SYS64_ISS_RT(esr);
 	unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
 
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+		/* Hide DIC so that we can trap the unnecessary maintenance...*/
+		val &= ~BIT(CTR_DIC_SHIFT);
+
+		/* ... and fake IminLine to reduce the number of traps. */
+		val &= ~CTR_IMINLINE_MASK;
+		val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK;
+	}
+
 	pt_regs_write_reg(regs, rt, val);
 
 	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
@@ -667,7 +681,7 @@ static const struct sys64_hook cp15_64_hooks[] = {
 	{},
 };
 
-asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs)
+void do_cp15instr(unsigned int esr, struct pt_regs *regs)
 {
 	const struct sys64_hook *hook, *hook_base;
 
@@ -705,9 +719,10 @@ asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs)
 	 */
 	do_undefinstr(regs);
 }
+NOKPROBE_SYMBOL(do_cp15instr);
 #endif
 
-asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
+void do_sysinstr(unsigned int esr, struct pt_regs *regs)
 {
 	const struct sys64_hook *hook;
 
@@ -724,6 +739,7 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
 	 */
 	do_undefinstr(regs);
 }
+NOKPROBE_SYMBOL(do_sysinstr);
 
 static const char *esr_class_str[] = {
 	[0 ... ESR_ELx_EC_MAX]		= "UNRECOGNIZED EC",
@@ -793,7 +809,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
  * bad_el0_sync handles unexpected, but potentially recoverable synchronous
  * exceptions taken from EL0. Unlike bad_mode, this returns.
  */
-asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 {
 	void __user *pc = (void __user *)instruction_pointer(regs);
 
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index aa76f7259668..497f9675071d 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -5,6 +5,8 @@
  * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
  */
 
+#define RO_EXCEPTION_TABLE_ALIGN	8
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/cache.h>
 #include <asm/kernel-pgtable.h>
@@ -111,9 +113,6 @@ SECTIONS
 	}
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
-			__exception_text_start = .;
-			*(.exception.text)
-			__exception_text_end = .;
 			IRQENTRY_TEXT
 			SOFTIRQENTRY_TEXT
 			ENTRY_TEXT
@@ -135,11 +134,9 @@ SECTIONS
 	. = ALIGN(SEGMENT_ALIGN);
 	_etext = .;			/* End of text section */
 
-	RO_DATA(PAGE_SIZE)		/* everything from this point to     */
-	EXCEPTION_TABLE(8)		/* __init_begin will be marked RO NX */
-	NOTES
+	/* everything from this point to __init_begin will be marked RO NX */
+	RO_DATA(PAGE_SIZE)
 
-	. = ALIGN(PAGE_SIZE);
 	idmap_pg_dir = .;
 	. += IDMAP_DIR_SIZE;
 
@@ -161,9 +158,12 @@ SECTIONS
 	__inittext_begin = .;
 
 	INIT_TEXT_SECTION(8)
+
+	__exittext_begin = .;
 	.exit.text : {
 		ARM_EXIT_KEEP(EXIT_TEXT)
 	}
+	__exittext_end = .;
 
 	. = ALIGN(4);
 	.altinstructions : {
@@ -215,7 +215,7 @@ SECTIONS
 
 	_data = .;
 	_sdata = .;
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
 
 	/*
 	 * Data written with the MMU off but read with the MMU on requires
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a67121d419a2..a475c68cbfec 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,6 +21,8 @@ if VIRTUALIZATION
 config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
 	depends on OF
+	# for TASKSTATS/TASK_DELAY_ACCT:
+	depends on NET && MULTIUSER
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -39,6 +41,8 @@ config KVM
 	select IRQ_BYPASS_MANAGER
 	select HAVE_KVM_IRQ_BYPASS
 	select HAVE_KVM_VCPU_RUN_PID_CHANGE
+	select TASKSTATS
+	select TASK_DELAY_ACCT
 	---help---
 	  Support hosting virtualized guest machines.
 	  We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 3ac1a64d2fb9..5ffbdc39e780 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -13,6 +13,8 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o
 
 kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 43487f035385..7a7e425616b5 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -101,7 +101,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
 void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
 {
 	bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY);
-	unsigned long mdscr;
+	unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2;
 
 	trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
 
@@ -197,6 +197,10 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
 	if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
 		vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
 
+	/* Write mdcr_el2 changes since vcpu_load on VHE systems */
+	if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
+		write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+
 	trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
 	trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
 }
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index dfd626447482..2fff06114a8f 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -34,6 +34,10 @@
 #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
+	VCPU_STAT(halt_successful_poll),
+	VCPU_STAT(halt_attempted_poll),
+	VCPU_STAT(halt_poll_invalid),
+	VCPU_STAT(halt_wakeup),
 	VCPU_STAT(hvc_exit_stat),
 	VCPU_STAT(wfe_exit_stat),
 	VCPU_STAT(wfi_exit_stat),
@@ -712,6 +716,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
 	if (events->exception.serror_pending && events->exception.serror_has_esr)
 		events->exception.serror_esr = vcpu_get_vsesr(vcpu);
 
+	/*
+	 * We never return a pending ext_dabt here because we deliver it to
+	 * the virtual CPU directly when setting the event and it's no longer
+	 * 'pending' at this point.
+	 */
+
 	return 0;
 }
 
@@ -720,6 +730,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
 {
 	bool serror_pending = events->exception.serror_pending;
 	bool has_esr = events->exception.serror_has_esr;
+	bool ext_dabt_pending = events->exception.ext_dabt_pending;
 
 	if (serror_pending && has_esr) {
 		if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
@@ -733,6 +744,9 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
 		kvm_inject_vabt(vcpu);
 	}
 
+	if (ext_dabt_pending)
+		kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+
 	return 0;
 }
 
@@ -858,6 +872,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
 	case KVM_ARM_VCPU_TIMER_CTRL:
 		ret = kvm_arm_timer_set_attr(vcpu, attr);
 		break;
+	case KVM_ARM_VCPU_PVTIME_CTRL:
+		ret = kvm_arm_pvtime_set_attr(vcpu, attr);
+		break;
 	default:
 		ret = -ENXIO;
 		break;
@@ -878,6 +895,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
 	case KVM_ARM_VCPU_TIMER_CTRL:
 		ret = kvm_arm_timer_get_attr(vcpu, attr);
 		break;
+	case KVM_ARM_VCPU_PVTIME_CTRL:
+		ret = kvm_arm_pvtime_get_attr(vcpu, attr);
+		break;
 	default:
 		ret = -ENXIO;
 		break;
@@ -898,6 +918,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
 	case KVM_ARM_VCPU_TIMER_CTRL:
 		ret = kvm_arm_timer_has_attr(vcpu, attr);
 		break;
+	case KVM_ARM_VCPU_PVTIME_CTRL:
+		ret = kvm_arm_pvtime_has_attr(vcpu, attr);
+		break;
 	default:
 		ret = -ENXIO;
 		break;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 706cca23f0d2..aacfc55de44c 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -11,8 +11,6 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 
-#include <kvm/arm_psci.h>
-
 #include <asm/esr.h>
 #include <asm/exception.h>
 #include <asm/kvm_asm.h>
@@ -22,6 +20,8 @@
 #include <asm/debug-monitors.h>
 #include <asm/traps.h>
 
+#include <kvm/arm_hypercalls.h>
+
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e5cc8d66bf53..0c6832ec52b1 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -22,7 +22,12 @@
 	.text
 	.pushsection	.hyp.text, "ax"
 
+/*
+ * We treat x18 as callee-saved as the host may use it as a platform
+ * register (e.g. for shadow call stack).
+ */
 .macro save_callee_saved_regs ctxt
+	str	x18,      [\ctxt, #CPU_XREG_OFFSET(18)]
 	stp	x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
 	stp	x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
 	stp	x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -32,6 +37,8 @@
 .endm
 
 .macro restore_callee_saved_regs ctxt
+	// We require \ctxt is not x18-x28
+	ldr	x18,      [\ctxt, #CPU_XREG_OFFSET(18)]
 	ldp	x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
 	ldp	x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
 	ldp	x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -48,7 +55,7 @@ ENTRY(__guest_enter)
 	// x0: vcpu
 	// x1: host context
 	// x2-x17: clobbered by macros
-	// x18: guest context
+	// x29: guest context
 
 	// Store the host regs
 	save_callee_saved_regs x1
@@ -67,31 +74,28 @@ alternative_else_nop_endif
 	ret
 
 1:
-	add	x18, x0, #VCPU_CONTEXT
+	add	x29, x0, #VCPU_CONTEXT
 
 	// Macro ptrauth_switch_to_guest format:
 	// 	ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
 	// The below macro to restore guest keys is not implemented in C code
 	// as it may cause Pointer Authentication key signing mismatch errors
 	// when this feature is enabled for kernel code.
-	ptrauth_switch_to_guest x18, x0, x1, x2
+	ptrauth_switch_to_guest x29, x0, x1, x2
 
 	// Restore guest regs x0-x17
-	ldp	x0, x1,   [x18, #CPU_XREG_OFFSET(0)]
-	ldp	x2, x3,   [x18, #CPU_XREG_OFFSET(2)]
-	ldp	x4, x5,   [x18, #CPU_XREG_OFFSET(4)]
-	ldp	x6, x7,   [x18, #CPU_XREG_OFFSET(6)]
-	ldp	x8, x9,   [x18, #CPU_XREG_OFFSET(8)]
-	ldp	x10, x11, [x18, #CPU_XREG_OFFSET(10)]
-	ldp	x12, x13, [x18, #CPU_XREG_OFFSET(12)]
-	ldp	x14, x15, [x18, #CPU_XREG_OFFSET(14)]
-	ldp	x16, x17, [x18, #CPU_XREG_OFFSET(16)]
-
-	// Restore guest regs x19-x29, lr
-	restore_callee_saved_regs x18
-
-	// Restore guest reg x18
-	ldr	x18,      [x18, #CPU_XREG_OFFSET(18)]
+	ldp	x0, x1,   [x29, #CPU_XREG_OFFSET(0)]
+	ldp	x2, x3,   [x29, #CPU_XREG_OFFSET(2)]
+	ldp	x4, x5,   [x29, #CPU_XREG_OFFSET(4)]
+	ldp	x6, x7,   [x29, #CPU_XREG_OFFSET(6)]
+	ldp	x8, x9,   [x29, #CPU_XREG_OFFSET(8)]
+	ldp	x10, x11, [x29, #CPU_XREG_OFFSET(10)]
+	ldp	x12, x13, [x29, #CPU_XREG_OFFSET(12)]
+	ldp	x14, x15, [x29, #CPU_XREG_OFFSET(14)]
+	ldp	x16, x17, [x29, #CPU_XREG_OFFSET(16)]
+
+	// Restore guest regs x18-x29, lr
+	restore_callee_saved_regs x29
 
 	// Do not touch any register after this!
 	eret
@@ -114,7 +118,7 @@ ENTRY(__guest_exit)
 	// Retrieve the guest regs x0-x1 from the stack
 	ldp	x2, x3, [sp], #16	// x0, x1
 
-	// Store the guest regs x0-x1 and x4-x18
+	// Store the guest regs x0-x1 and x4-x17
 	stp	x2, x3,   [x1, #CPU_XREG_OFFSET(0)]
 	stp	x4, x5,   [x1, #CPU_XREG_OFFSET(4)]
 	stp	x6, x7,   [x1, #CPU_XREG_OFFSET(6)]
@@ -123,9 +127,8 @@ ENTRY(__guest_exit)
 	stp	x12, x13, [x1, #CPU_XREG_OFFSET(12)]
 	stp	x14, x15, [x1, #CPU_XREG_OFFSET(14)]
 	stp	x16, x17, [x1, #CPU_XREG_OFFSET(16)]
-	str	x18,      [x1, #CPU_XREG_OFFSET(18)]
 
-	// Store the guest regs x19-x29, lr
+	// Store the guest regs x18-x29, lr
 	save_callee_saved_regs x1
 
 	get_host_ctxt	x2, x3
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 799e84a40335..dfe8dd172512 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -12,7 +12,7 @@
 
 #include <kvm/arm_psci.h>
 
-#include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
 #include <asm/cpufeature.h>
 #include <asm/kprobes.h>
 #include <asm/kvm_asm.h>
@@ -28,7 +28,15 @@
 /* Check whether the FP regs were dirtied while in the host-side run loop: */
 static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+	/*
+	 * When the system doesn't support FP/SIMD, we cannot rely on
+	 * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
+	 * abort on the very first access to FP and thus we should never
+	 * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
+	 * trap the accesses.
+	 */
+	if (!system_supports_fpsimd() ||
+	    vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
 		vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
 				      KVM_ARM64_FP_HOST);
 
@@ -118,6 +126,20 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
 	}
 
 	write_sysreg(val, cptr_el2);
+
+	if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+		struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+
+		isb();
+		/*
+		 * At this stage, and thanks to the above isb(), S2 is
+		 * configured and enabled. We can now restore the guest's S1
+		 * configuration: SCTLR, and only then TCR.
+		 */
+		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+		isb();
+		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
+	}
 }
 
 static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
@@ -144,11 +166,11 @@ static void deactivate_traps_vhe(void)
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 
 	/*
-	 * ARM erratum 1165522 requires the actual execution of the above
-	 * before we can switch to the EL2/EL0 translation regime used by
+	 * ARM errata 1165522 and 1530923 require the actual execution of the
+	 * above before we can switch to the EL2/EL0 translation regime used by
 	 * the host.
 	 */
-	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522));
+	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE));
 
 	write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
 	write_sysreg(vectors, vbar_el1);
@@ -159,6 +181,23 @@ static void __hyp_text __deactivate_traps_nvhe(void)
 {
 	u64 mdcr_el2 = read_sysreg(mdcr_el2);
 
+	if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+		u64 val;
+
+		/*
+		 * Set the TCR and SCTLR registers in the exact opposite
+		 * sequence as __activate_traps_nvhe (first prevent walks,
+		 * then force the MMU on). A generous sprinkling of isb()
+		 * ensure that things happen in this exact order.
+		 */
+		val = read_sysreg_el1(SYS_TCR);
+		write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
+		isb();
+		val = read_sysreg_el1(SYS_SCTLR);
+		write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
+		isb();
+	}
+
 	__deactivate_traps_common();
 
 	mdcr_el2 &= MDCR_EL2_HPMN_MASK;
@@ -657,7 +696,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 	 */
 	if (system_uses_irq_prio_masking()) {
 		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-		dsb(sy);
+		pmr_sync();
 	}
 
 	vcpu = kern_hyp_va(vcpu);
@@ -670,18 +709,23 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 
 	__sysreg_save_state_nvhe(host_ctxt);
 
-	__activate_vm(kern_hyp_va(vcpu->kvm));
-	__activate_traps(vcpu);
-
-	__hyp_vgic_restore_state(vcpu);
-	__timer_enable_traps(vcpu);
-
 	/*
 	 * We must restore the 32-bit state before the sysregs, thanks
 	 * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+	 *
+	 * Also, and in order to be able to deal with erratum #1319537 (A57)
+	 * and #1319367 (A72), we must ensure that all VM-related sysreg are
+	 * restored before we enable S2 translation.
 	 */
 	__sysreg32_restore_state(vcpu);
 	__sysreg_restore_state_nvhe(guest_ctxt);
+
+	__activate_vm(kern_hyp_va(vcpu->kvm));
+	__activate_traps(vcpu);
+
+	__hyp_vgic_restore_state(vcpu);
+	__timer_enable_traps(vcpu);
+
 	__debug_switch_to_guest(vcpu);
 
 	__set_guest_arch_workaround_state(vcpu);
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 7ddbc849b580..7672a978926c 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -117,12 +117,26 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 {
 	write_sysreg(ctxt->sys_regs[MPIDR_EL1],		vmpidr_el2);
 	write_sysreg(ctxt->sys_regs[CSSELR_EL1],	csselr_el1);
-	write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+
+	if (!cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
+	} else	if (!ctxt->__hyp_running_vcpu) {
+		/*
+		 * Must only be done for guest registers, hence the context
+		 * test. We're coming from the host, so SCTLR.M is already
+		 * set. Pairs with __activate_traps_nvhe().
+		 */
+		write_sysreg_el1((ctxt->sys_regs[TCR_EL1] |
+				  TCR_EPD1_MASK | TCR_EPD0_MASK),
+				 SYS_TCR);
+		isb();
+	}
+
 	write_sysreg(ctxt->sys_regs[ACTLR_EL1],		actlr_el1);
 	write_sysreg_el1(ctxt->sys_regs[CPACR_EL1],	SYS_CPACR);
 	write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1],	SYS_TTBR0);
 	write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1],	SYS_TTBR1);
-	write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
 	write_sysreg_el1(ctxt->sys_regs[ESR_EL1],	SYS_ESR);
 	write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1],	SYS_AFSR0);
 	write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1],	SYS_AFSR1);
@@ -135,6 +149,23 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 	write_sysreg(ctxt->sys_regs[PAR_EL1],		par_el1);
 	write_sysreg(ctxt->sys_regs[TPIDR_EL1],		tpidr_el1);
 
+	if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) &&
+	    ctxt->__hyp_running_vcpu) {
+		/*
+		 * Must only be done for host registers, hence the context
+		 * test. Pairs with __deactivate_traps_nvhe().
+		 */
+		isb();
+		/*
+		 * At this stage, and thanks to the above isb(), S2 is
+		 * deconfigured and disabled. We can now restore the host's
+		 * S1 configuration: SCTLR, and only then TCR.
+		 */
+		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+		isb();
+		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
+	}
+
 	write_sysreg(ctxt->gp_regs.sp_el1,		sp_el1);
 	write_sysreg_el1(ctxt->gp_regs.elr_el1,		SYS_ELR);
 	write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index eb0efc5557f3..92f560e3e1aa 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -23,10 +23,10 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
 
 	local_irq_save(cxt->flags);
 
-	if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) {
+	if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
 		/*
-		 * For CPUs that are affected by ARM erratum 1165522, we
-		 * cannot trust stage-1 to be in a correct state at that
+		 * For CPUs that are affected by ARM errata 1165522 or 1530923,
+		 * we cannot trust stage-1 to be in a correct state at that
 		 * point. Since we do not want to force a full load of the
 		 * vcpu state, we prevent the EL1 page-table walker to
 		 * allocate new TLBs. This is done by setting the EPD bits
@@ -63,6 +63,22 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
 static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
 						  struct tlb_inv_context *cxt)
 {
+	if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+		u64 val;
+
+		/*
+		 * For CPUs that are affected by ARM 1319367, we need to
+		 * avoid a host Stage-1 walk while we have the guest's
+		 * VMID set in the VTTBR in order to invalidate TLBs.
+		 * We're guaranteed that the S1 MMU is enabled, so we can
+		 * simply set the EPD bits to avoid any further TLB fill.
+		 */
+		val = cxt->tcr = read_sysreg_el1(SYS_TCR);
+		val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
+		write_sysreg_el1(val, SYS_TCR);
+		isb();
+	}
+
 	__load_guest_stage2(kvm);
 	isb();
 }
@@ -87,7 +103,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 	isb();
 
-	if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) {
+	if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
 		/* Restore the registers to what they were */
 		write_sysreg_el1(cxt->tcr, SYS_TCR);
 		write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
@@ -100,6 +116,13 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
 						 struct tlb_inv_context *cxt)
 {
 	write_sysreg(0, vttbr_el2);
+
+	if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+		/* Ensure write of the host VMID */
+		isb();
+		/* Restore the host's TCR_EL1 */
+		write_sysreg_el1(cxt->tcr, SYS_TCR);
+	}
 }
 
 static void __hyp_text __tlb_switch_to_host(struct kvm *kvm,
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index a9d25a305af5..ccdb6a051ab2 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -109,7 +109,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
 
 /**
  * kvm_inject_dabt - inject a data abort into the guest
- * @vcpu: The VCPU to receive the undefined exception
+ * @vcpu: The VCPU to receive the data abort
  * @addr: The address to report in the DFAR
  *
  * It is assumed that this code is called from the VCPU thread and that the
@@ -125,7 +125,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
 
 /**
  * kvm_inject_pabt - inject a prefetch abort into the guest
- * @vcpu: The VCPU to receive the undefined exception
+ * @vcpu: The VCPU to receive the prefetch abort
  * @addr: The address to report in the DFAR
  *
  * It is assumed that this code is called from the VCPU thread and that the
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 46822afc57e0..3e909b117f0c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1424,7 +1424,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	ID_SANITISED(ID_ISAR4_EL1),
 	ID_SANITISED(ID_ISAR5_EL1),
 	ID_SANITISED(ID_MMFR4_EL1),
-	ID_UNALLOCATED(2,7),
+	ID_SANITISED(ID_ISAR6_EL1),
 
 	/* CRm=3 */
 	ID_SANITISED(MVFR0_EL1),
@@ -2098,9 +2098,9 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
 		WARN_ON(1);
 	}
 
-	kvm_err("Unsupported guest CP%d access at: %08lx [%08lx]\n",
-		cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
-	print_sys_reg_instr(params);
+	print_sys_reg_msg(params,
+			  "Unsupported guest CP%d access at: %08lx [%08lx]\n",
+			  cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
 	kvm_inject_undefined(vcpu);
 }
 
@@ -2233,6 +2233,12 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 				NULL, 0);
 }
 
+static bool is_imp_def_sys_reg(struct sys_reg_params *params)
+{
+	// See ARM DDI 0487E.a, section D12.3.2
+	return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011;
+}
+
 static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 			   struct sys_reg_params *params)
 {
@@ -2248,10 +2254,12 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 
 	if (likely(r)) {
 		perform_access(vcpu, params, r);
+	} else if (is_imp_def_sys_reg(params)) {
+		kvm_inject_undefined(vcpu);
 	} else {
-		kvm_err("Unsupported guest sys_reg access at: %lx [%08lx]\n",
-			*vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
-		print_sys_reg_instr(params);
+		print_sys_reg_msg(params,
+				  "Unsupported guest sys_reg access at: %lx [%08lx]\n",
+				  *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
 		kvm_inject_undefined(vcpu);
 	}
 	return 1;
@@ -2360,8 +2368,11 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
 	if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
 		return NULL;
 
+	if (!index_to_params(id, &params))
+		return NULL;
+
 	table = get_target_table(vcpu->arch.target, true, &num);
-	r = find_reg_by_id(id, &params, table, num);
+	r = find_reg(&params, table, num);
 	if (!r)
 		r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
 
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 9bca0312d798..5a6fc30f5989 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -62,11 +62,24 @@ struct sys_reg_desc {
 #define REG_HIDDEN_USER		(1 << 0) /* hidden from userspace ioctls */
 #define REG_HIDDEN_GUEST	(1 << 1) /* hidden from guest */
 
-static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+static __printf(2, 3)
+inline void print_sys_reg_msg(const struct sys_reg_params *p,
+				       char *fmt, ...)
 {
+	va_list va;
+
+	va_start(va, fmt);
 	/* Look, we even formatted it for you to paste into the table! */
-	kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+	kvm_pr_unimpl("%pV { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+		      &(struct va_format){ fmt, &va },
 		      p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+	va_end(va);
+}
+
+static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+{
+	/* GCC warns on an empty format string */
+	print_sys_reg_msg(p, "%s", "");
 }
 
 static inline bool ignore_write(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index 2cf7d4b606c3..dab1fea4752a 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -22,7 +22,7 @@ static u8 tag_lsb;
 static u64 tag_val;
 static u64 va_mask;
 
-static void compute_layout(void)
+__init void kvm_compute_layout(void)
 {
 	phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
 	u64 hyp_va_msb;
@@ -110,9 +110,6 @@ void __init kvm_update_va_mask(struct alt_instr *alt,
 
 	BUG_ON(nr_inst != 5);
 
-	if (!has_vhe() && !va_mask)
-		compute_layout();
-
 	for (i = 0; i < nr_inst; i++) {
 		u32 rd, rn, insn, oinsn;
 
@@ -156,9 +153,6 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
 		return;
 	}
 
-	if (!va_mask)
-		compute_layout();
-
 	/*
 	 * Compute HYP VA by using the same computation as kern_hyp_va()
 	 */
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index c21b936dc01d..2fc253466dbf 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 lib-y		:= clear_user.o delay.o copy_from_user.o		\
 		   copy_to_user.o copy_in_user.o copy_page.o		\
-		   clear_page.o memchr.o memcpy.o memmove.o memset.o	\
-		   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o	\
-		   strchr.o strrchr.o tishift.o
+		   clear_page.o csum.o memchr.o memcpy.o memmove.o	\
+		   memset.o memcmp.o strcmp.o strncmp.o strlen.o	\
+		   strnlen.o strchr.o strrchr.o tishift.o
 
 ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
 obj-$(CONFIG_XOR_BLOCKS)	+= xor-neon.o
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
index 78a9ef66288a..073acbf02a7c 100644
--- a/arch/arm64/lib/clear_page.S
+++ b/arch/arm64/lib/clear_page.S
@@ -14,7 +14,7 @@
  * Parameters:
  *	x0 - dest
  */
-ENTRY(clear_page)
+SYM_FUNC_START(clear_page)
 	mrs	x1, dczid_el0
 	and	w1, w1, #0xf
 	mov	x2, #4
@@ -25,5 +25,5 @@ ENTRY(clear_page)
 	tst	x0, #(PAGE_SIZE - 1)
 	b.ne	1b
 	ret
-ENDPROC(clear_page)
+SYM_FUNC_END(clear_page)
 EXPORT_SYMBOL(clear_page)
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index aeafc03e961a..48a3a26eff66 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -19,7 +19,7 @@
  *
  * Alignment fixed up by hardware.
  */
-ENTRY(__arch_clear_user)
+SYM_FUNC_START(__arch_clear_user)
 	mov	x2, x1			// save the size for fixup return
 	subs	x1, x1, #8
 	b.mi	2f
@@ -40,7 +40,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
 uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
 5:	mov	x0, #0
 	ret
-ENDPROC(__arch_clear_user)
+SYM_FUNC_END(__arch_clear_user)
 EXPORT_SYMBOL(__arch_clear_user)
 
 	.section .fixup,"ax"
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index ebb3c06cbb5d..8e25e89ad01f 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -53,12 +53,12 @@
 	.endm
 
 end	.req	x5
-ENTRY(__arch_copy_from_user)
+SYM_FUNC_START(__arch_copy_from_user)
 	add	end, x0, x2
 #include "copy_template.S"
 	mov	x0, #0				// Nothing to copy
 	ret
-ENDPROC(__arch_copy_from_user)
+SYM_FUNC_END(__arch_copy_from_user)
 EXPORT_SYMBOL(__arch_copy_from_user)
 
 	.section .fixup,"ax"
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 3d8153a1ebce..667139013ed1 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -55,12 +55,12 @@
 
 end	.req	x5
 
-ENTRY(__arch_copy_in_user)
+SYM_FUNC_START(__arch_copy_in_user)
 	add	end, x0, x2
 #include "copy_template.S"
 	mov	x0, #0
 	ret
-ENDPROC(__arch_copy_in_user)
+SYM_FUNC_END(__arch_copy_in_user)
 EXPORT_SYMBOL(__arch_copy_in_user)
 
 	.section .fixup,"ax"
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index bbb8562396af..e7a793961408 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -17,7 +17,7 @@
  *	x0 - dest
  *	x1 - src
  */
-ENTRY(copy_page)
+SYM_FUNC_START(copy_page)
 alternative_if ARM64_HAS_NO_HW_PREFETCH
 	// Prefetch three cache lines ahead.
 	prfm	pldl1strm, [x1, #128]
@@ -34,46 +34,46 @@ alternative_else_nop_endif
 	ldp	x14, x15, [x1, #96]
 	ldp	x16, x17, [x1, #112]
 
-	mov	x18, #(PAGE_SIZE - 128)
+	add	x0, x0, #256
 	add	x1, x1, #128
 1:
-	subs	x18, x18, #128
+	tst	x0, #(PAGE_SIZE - 1)
 
 alternative_if ARM64_HAS_NO_HW_PREFETCH
 	prfm	pldl1strm, [x1, #384]
 alternative_else_nop_endif
 
-	stnp	x2, x3, [x0]
+	stnp	x2, x3, [x0, #-256]
 	ldp	x2, x3, [x1]
-	stnp	x4, x5, [x0, #16]
+	stnp	x4, x5, [x0, #16 - 256]
 	ldp	x4, x5, [x1, #16]
-	stnp	x6, x7, [x0, #32]
+	stnp	x6, x7, [x0, #32 - 256]
 	ldp	x6, x7, [x1, #32]
-	stnp	x8, x9, [x0, #48]
+	stnp	x8, x9, [x0, #48 - 256]
 	ldp	x8, x9, [x1, #48]
-	stnp	x10, x11, [x0, #64]
+	stnp	x10, x11, [x0, #64 - 256]
 	ldp	x10, x11, [x1, #64]
-	stnp	x12, x13, [x0, #80]
+	stnp	x12, x13, [x0, #80 - 256]
 	ldp	x12, x13, [x1, #80]
-	stnp	x14, x15, [x0, #96]
+	stnp	x14, x15, [x0, #96 - 256]
 	ldp	x14, x15, [x1, #96]
-	stnp	x16, x17, [x0, #112]
+	stnp	x16, x17, [x0, #112 - 256]
 	ldp	x16, x17, [x1, #112]
 
 	add	x0, x0, #128
 	add	x1, x1, #128
 
-	b.gt	1b
+	b.ne	1b
 
-	stnp	x2, x3, [x0]
-	stnp	x4, x5, [x0, #16]
-	stnp	x6, x7, [x0, #32]
-	stnp	x8, x9, [x0, #48]
-	stnp	x10, x11, [x0, #64]
-	stnp	x12, x13, [x0, #80]
-	stnp	x14, x15, [x0, #96]
-	stnp	x16, x17, [x0, #112]
+	stnp	x2, x3, [x0, #-256]
+	stnp	x4, x5, [x0, #16 - 256]
+	stnp	x6, x7, [x0, #32 - 256]
+	stnp	x8, x9, [x0, #48 - 256]
+	stnp	x10, x11, [x0, #64 - 256]
+	stnp	x12, x13, [x0, #80 - 256]
+	stnp	x14, x15, [x0, #96 - 256]
+	stnp	x16, x17, [x0, #112 - 256]
 
 	ret
-ENDPROC(copy_page)
+SYM_FUNC_END(copy_page)
 EXPORT_SYMBOL(copy_page)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 357eae2c18eb..1a104d0089f3 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -52,12 +52,12 @@
 	.endm
 
 end	.req	x5
-ENTRY(__arch_copy_to_user)
+SYM_FUNC_START(__arch_copy_to_user)
 	add	end, x0, x2
 #include "copy_template.S"
 	mov	x0, #0
 	ret
-ENDPROC(__arch_copy_to_user)
+SYM_FUNC_END(__arch_copy_to_user)
 EXPORT_SYMBOL(__arch_copy_to_user)
 
 	.section .fixup,"ax"
diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
index e6135f16649b..243e107e9896 100644
--- a/arch/arm64/lib/crc32.S
+++ b/arch/arm64/lib/crc32.S
@@ -85,17 +85,17 @@ CPU_BE(	rev16		w3, w3		)
 	.endm
 
 	.align		5
-ENTRY(crc32_le)
+SYM_FUNC_START(crc32_le)
 alternative_if_not ARM64_HAS_CRC32
 	b		crc32_le_base
 alternative_else_nop_endif
 	__crc32
-ENDPROC(crc32_le)
+SYM_FUNC_END(crc32_le)
 
 	.align		5
-ENTRY(__crc32c_le)
+SYM_FUNC_START(__crc32c_le)
 alternative_if_not ARM64_HAS_CRC32
 	b		__crc32c_le_base
 alternative_else_nop_endif
 	__crc32		c
-ENDPROC(__crc32c_le)
+SYM_FUNC_END(__crc32c_le)
diff --git a/arch/arm64/lib/csum.c b/arch/arm64/lib/csum.c
new file mode 100644
index 000000000000..1f82c66b32ea
--- /dev/null
+++ b/arch/arm64/lib/csum.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2019-2020 Arm Ltd.
+
+#include <linux/compiler.h>
+#include <linux/kasan-checks.h>
+#include <linux/kernel.h>
+
+#include <net/checksum.h>
+
+/* Looks dumb, but generates nice-ish code */
+static u64 accumulate(u64 sum, u64 data)
+{
+	__uint128_t tmp = (__uint128_t)sum + data;
+	return tmp + (tmp >> 64);
+}
+
+unsigned int do_csum(const unsigned char *buff, int len)
+{
+	unsigned int offset, shift, sum;
+	const u64 *ptr;
+	u64 data, sum64 = 0;
+
+	if (unlikely(len == 0))
+		return 0;
+
+	offset = (unsigned long)buff & 7;
+	/*
+	 * This is to all intents and purposes safe, since rounding down cannot
+	 * result in a different page or cache line being accessed, and @buff
+	 * should absolutely not be pointing to anything read-sensitive. We do,
+	 * however, have to be careful not to piss off KASAN, which means using
+	 * unchecked reads to accommodate the head and tail, for which we'll
+	 * compensate with an explicit check up-front.
+	 */
+	kasan_check_read(buff, len);
+	ptr = (u64 *)(buff - offset);
+	len = len + offset - 8;
+
+	/*
+	 * Head: zero out any excess leading bytes. Shifting back by the same
+	 * amount should be at least as fast as any other way of handling the
+	 * odd/even alignment, and means we can ignore it until the very end.
+	 */
+	shift = offset * 8;
+	data = READ_ONCE_NOCHECK(*ptr++);
+#ifdef __LITTLE_ENDIAN
+	data = (data >> shift) << shift;
+#else
+	data = (data << shift) >> shift;
+#endif
+
+	/*
+	 * Body: straightforward aligned loads from here on (the paired loads
+	 * underlying the quadword type still only need dword alignment). The
+	 * main loop strictly excludes the tail, so the second loop will always
+	 * run at least once.
+	 */
+	while (unlikely(len > 64)) {
+		__uint128_t tmp1, tmp2, tmp3, tmp4;
+
+		tmp1 = READ_ONCE_NOCHECK(*(__uint128_t *)ptr);
+		tmp2 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 2));
+		tmp3 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 4));
+		tmp4 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 6));
+
+		len -= 64;
+		ptr += 8;
+
+		/* This is the "don't dump the carry flag into a GPR" idiom */
+		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+		tmp2 += (tmp2 >> 64) | (tmp2 << 64);
+		tmp3 += (tmp3 >> 64) | (tmp3 << 64);
+		tmp4 += (tmp4 >> 64) | (tmp4 << 64);
+		tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64);
+		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+		tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64);
+		tmp3 += (tmp3 >> 64) | (tmp3 << 64);
+		tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64);
+		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+		tmp1 = ((tmp1 >> 64) << 64) | sum64;
+		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+		sum64 = tmp1 >> 64;
+	}
+	while (len > 8) {
+		__uint128_t tmp;
+
+		sum64 = accumulate(sum64, data);
+		tmp = READ_ONCE_NOCHECK(*(__uint128_t *)ptr);
+
+		len -= 16;
+		ptr += 2;
+
+#ifdef __LITTLE_ENDIAN
+		data = tmp >> 64;
+		sum64 = accumulate(sum64, tmp);
+#else
+		data = tmp;
+		sum64 = accumulate(sum64, tmp >> 64);
+#endif
+	}
+	if (len > 0) {
+		sum64 = accumulate(sum64, data);
+		data = READ_ONCE_NOCHECK(*ptr);
+		len -= 8;
+	}
+	/*
+	 * Tail: zero any over-read bytes similarly to the head, again
+	 * preserving odd/even alignment.
+	 */
+	shift = len * -8;
+#ifdef __LITTLE_ENDIAN
+	data = (data << shift) >> shift;
+#else
+	data = (data >> shift) << shift;
+#endif
+	sum64 = accumulate(sum64, data);
+
+	/* Finally, folding */
+	sum64 += (sum64 >> 32) | (sum64 << 32);
+	sum = sum64 >> 32;
+	sum += (sum >> 16) | (sum << 16);
+	if (offset & 1)
+		return (u16)swab32(sum);
+
+	return sum >> 16;
+}
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
index 48a3ab636e4f..edf6b970a277 100644
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -19,7 +19,7 @@
  * Returns:
  *	x0 - address of first occurrence of 'c' or 0
  */
-WEAK(memchr)
+SYM_FUNC_START_WEAK_PI(memchr)
 	and	w1, w1, #0xff
 1:	subs	x2, x2, #1
 	b.mi	2f
@@ -30,5 +30,5 @@ WEAK(memchr)
 	ret
 2:	mov	x0, #0
 	ret
-ENDPIPROC(memchr)
+SYM_FUNC_END_PI(memchr)
 EXPORT_SYMBOL_NOKASAN(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index b297bdaaf549..c0671e793ea9 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -46,7 +46,7 @@ pos		.req	x11
 limit_wd	.req	x12
 mask		.req	x13
 
-WEAK(memcmp)
+SYM_FUNC_START_WEAK_PI(memcmp)
 	cbz	limit, .Lret0
 	eor	tmp1, src1, src2
 	tst	tmp1, #7
@@ -243,5 +243,5 @@ CPU_LE( rev	data2, data2 )
 .Lret0:
 	mov	result, #0
 	ret
-ENDPIPROC(memcmp)
+SYM_FUNC_END_PI(memcmp)
 EXPORT_SYMBOL_NOKASAN(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index d79f48994dbb..9f382adfa88a 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -57,11 +57,11 @@
 	.endm
 
 	.weak memcpy
-ENTRY(__memcpy)
-ENTRY(memcpy)
+SYM_FUNC_START_ALIAS(__memcpy)
+SYM_FUNC_START_PI(memcpy)
 #include "copy_template.S"
 	ret
-ENDPIPROC(memcpy)
+SYM_FUNC_END_PI(memcpy)
 EXPORT_SYMBOL(memcpy)
-ENDPROC(__memcpy)
+SYM_FUNC_END_ALIAS(__memcpy)
 EXPORT_SYMBOL(__memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
index 784775136480..02cda2e33bde 100644
--- a/arch/arm64/lib/memmove.S
+++ b/arch/arm64/lib/memmove.S
@@ -46,8 +46,8 @@ D_l	.req	x13
 D_h	.req	x14
 
 	.weak memmove
-ENTRY(__memmove)
-ENTRY(memmove)
+SYM_FUNC_START_ALIAS(__memmove)
+SYM_FUNC_START_PI(memmove)
 	cmp	dstin, src
 	b.lo	__memcpy
 	add	tmp1, src, count
@@ -184,7 +184,7 @@ ENTRY(memmove)
 	tst	count, #0x3f
 	b.ne	.Ltail63
 	ret
-ENDPIPROC(memmove)
+SYM_FUNC_END_PI(memmove)
 EXPORT_SYMBOL(memmove)
-ENDPROC(__memmove)
+SYM_FUNC_END_ALIAS(__memmove)
 EXPORT_SYMBOL(__memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index 9fb97e6bc560..77c3c7ba0084 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -43,8 +43,8 @@ tmp3w		.req	w9
 tmp3		.req	x9
 
 	.weak memset
-ENTRY(__memset)
-ENTRY(memset)
+SYM_FUNC_START_ALIAS(__memset)
+SYM_FUNC_START_PI(memset)
 	mov	dst, dstin	/* Preserve return value.  */
 	and	A_lw, val, #255
 	orr	A_lw, A_lw, A_lw, lsl #8
@@ -203,7 +203,7 @@ ENTRY(memset)
 	ands	count, count, zva_bits_x
 	b.ne	.Ltail_maybe_long
 	ret
-ENDPIPROC(memset)
+SYM_FUNC_END_PI(memset)
 EXPORT_SYMBOL(memset)
-ENDPROC(__memset)
+SYM_FUNC_END_ALIAS(__memset)
 EXPORT_SYMBOL(__memset)
diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S
index ca3ec18171a4..1f47eae3b0d6 100644
--- a/arch/arm64/lib/strchr.S
+++ b/arch/arm64/lib/strchr.S
@@ -18,7 +18,7 @@
  * Returns:
  *	x0 - address of first occurrence of 'c' or 0
  */
-WEAK(strchr)
+SYM_FUNC_START_WEAK(strchr)
 	and	w1, w1, #0xff
 1:	ldrb	w2, [x0], #1
 	cmp	w2, w1
@@ -28,5 +28,5 @@ WEAK(strchr)
 	cmp	w2, w1
 	csel	x0, x0, xzr, eq
 	ret
-ENDPROC(strchr)
+SYM_FUNC_END(strchr)
 EXPORT_SYMBOL_NOKASAN(strchr)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index e9aefbe0b740..4767540d1b94 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -48,7 +48,7 @@ tmp3		.req	x9
 zeroones	.req	x10
 pos		.req	x11
 
-WEAK(strcmp)
+SYM_FUNC_START_WEAK_PI(strcmp)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
 	tst	tmp1, #7
@@ -219,5 +219,5 @@ CPU_BE(	orr	syndrome, diff, has_nul )
 	lsr	data1, data1, #56
 	sub	result, data1, data2, lsr #56
 	ret
-ENDPIPROC(strcmp)
+SYM_FUNC_END_PI(strcmp)
 EXPORT_SYMBOL_NOKASAN(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index 87b0cb066915..ee3ed882dd79 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -44,7 +44,7 @@ pos		.req	x12
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 
-WEAK(strlen)
+SYM_FUNC_START_WEAK_PI(strlen)
 	mov	zeroones, #REP8_01
 	bic	src, srcin, #15
 	ands	tmp1, srcin, #15
@@ -111,5 +111,5 @@ CPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
 	csinv	data1, data1, xzr, le
 	csel	data2, data2, data2a, le
 	b	.Lrealigned
-ENDPIPROC(strlen)
+SYM_FUNC_END_PI(strlen)
 EXPORT_SYMBOL_NOKASAN(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index f571581888fa..2a7ee949ed47 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -52,7 +52,7 @@ limit_wd	.req	x13
 mask		.req	x14
 endloop		.req	x15
 
-WEAK(strncmp)
+SYM_FUNC_START_WEAK_PI(strncmp)
 	cbz	limit, .Lret0
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
@@ -295,5 +295,5 @@ CPU_BE( orr	syndrome, diff, has_nul )
 .Lret0:
 	mov	result, #0
 	ret
-ENDPIPROC(strncmp)
+SYM_FUNC_END_PI(strncmp)
 EXPORT_SYMBOL_NOKASAN(strncmp)
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
index c0bac9493c68..b72913a99038 100644
--- a/arch/arm64/lib/strnlen.S
+++ b/arch/arm64/lib/strnlen.S
@@ -47,7 +47,7 @@ limit_wd	.req	x14
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 
-WEAK(strnlen)
+SYM_FUNC_START_WEAK_PI(strnlen)
 	cbz	limit, .Lhit_limit
 	mov	zeroones, #REP8_01
 	bic	src, srcin, #15
@@ -156,5 +156,5 @@ CPU_LE( lsr	tmp2, tmp2, tmp4 )	/* Shift (tmp1 & 63).  */
 .Lhit_limit:
 	mov	len, limit
 	ret
-ENDPIPROC(strnlen)
+SYM_FUNC_END_PI(strnlen)
 EXPORT_SYMBOL_NOKASAN(strnlen)
diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S
index 794ac49ea433..13132d1ed6d1 100644
--- a/arch/arm64/lib/strrchr.S
+++ b/arch/arm64/lib/strrchr.S
@@ -18,7 +18,7 @@
  * Returns:
  *	x0 - address of last occurrence of 'c' or 0
  */
-WEAK(strrchr)
+SYM_FUNC_START_WEAK_PI(strrchr)
 	mov	x3, #0
 	and	w1, w1, #0xff
 1:	ldrb	w2, [x0], #1
@@ -29,5 +29,5 @@ WEAK(strrchr)
 	b	1b
 2:	mov	x0, x3
 	ret
-ENDPIPROC(strrchr)
+SYM_FUNC_END_PI(strrchr)
 EXPORT_SYMBOL_NOKASAN(strrchr)
diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S
index 047622536535..a88613834fb0 100644
--- a/arch/arm64/lib/tishift.S
+++ b/arch/arm64/lib/tishift.S
@@ -7,7 +7,7 @@
 
 #include <asm/assembler.h>
 
-ENTRY(__ashlti3)
+SYM_FUNC_START(__ashlti3)
 	cbz	x2, 1f
 	mov	x3, #64
 	sub	x3, x3, x2
@@ -26,10 +26,10 @@ ENTRY(__ashlti3)
 	lsl	x1, x0, x1
 	mov	x0, x2
 	ret
-ENDPROC(__ashlti3)
+SYM_FUNC_END(__ashlti3)
 EXPORT_SYMBOL(__ashlti3)
 
-ENTRY(__ashrti3)
+SYM_FUNC_START(__ashrti3)
 	cbz	x2, 1f
 	mov	x3, #64
 	sub	x3, x3, x2
@@ -48,10 +48,10 @@ ENTRY(__ashrti3)
 	asr	x0, x1, x0
 	mov	x1, x2
 	ret
-ENDPROC(__ashrti3)
+SYM_FUNC_END(__ashrti3)
 EXPORT_SYMBOL(__ashrti3)
 
-ENTRY(__lshrti3)
+SYM_FUNC_START(__lshrti3)
 	cbz	x2, 1f
 	mov	x3, #64
 	sub	x3, x3, x2
@@ -70,5 +70,5 @@ ENTRY(__lshrti3)
 	lsr	x0, x1, x0
 	mov	x1, x2
 	ret
-ENDPROC(__lshrti3)
+SYM_FUNC_END(__lshrti3)
 EXPORT_SYMBOL(__lshrti3)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index db767b072601..2d881f34dd9d 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -24,7 +24,7 @@
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-ENTRY(__flush_icache_range)
+SYM_FUNC_START(__flush_icache_range)
 	/* FALLTHROUGH */
 
 /*
@@ -37,7 +37,7 @@ ENTRY(__flush_icache_range)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-ENTRY(__flush_cache_user_range)
+SYM_FUNC_START(__flush_cache_user_range)
 	uaccess_ttbr0_enable x2, x3, x4
 alternative_if ARM64_HAS_CACHE_IDC
 	dsb	ishst
@@ -66,8 +66,8 @@ alternative_else_nop_endif
 9:
 	mov	x0, #-EFAULT
 	b	1b
-ENDPROC(__flush_icache_range)
-ENDPROC(__flush_cache_user_range)
+SYM_FUNC_END(__flush_icache_range)
+SYM_FUNC_END(__flush_cache_user_range)
 
 /*
  *	invalidate_icache_range(start,end)
@@ -77,7 +77,7 @@ ENDPROC(__flush_cache_user_range)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-ENTRY(invalidate_icache_range)
+SYM_FUNC_START(invalidate_icache_range)
 alternative_if ARM64_HAS_CACHE_DIC
 	mov	x0, xzr
 	isb
@@ -94,7 +94,7 @@ alternative_else_nop_endif
 2:
 	mov	x0, #-EFAULT
 	b	1b
-ENDPROC(invalidate_icache_range)
+SYM_FUNC_END(invalidate_icache_range)
 
 /*
  *	__flush_dcache_area(kaddr, size)
@@ -105,10 +105,10 @@ ENDPROC(invalidate_icache_range)
  *	- kaddr   - kernel address
  *	- size    - size in question
  */
-ENTRY(__flush_dcache_area)
+SYM_FUNC_START_PI(__flush_dcache_area)
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-ENDPIPROC(__flush_dcache_area)
+SYM_FUNC_END_PI(__flush_dcache_area)
 
 /*
  *	__clean_dcache_area_pou(kaddr, size)
@@ -119,14 +119,14 @@ ENDPIPROC(__flush_dcache_area)
  *	- kaddr   - kernel address
  *	- size    - size in question
  */
-ENTRY(__clean_dcache_area_pou)
+SYM_FUNC_START(__clean_dcache_area_pou)
 alternative_if ARM64_HAS_CACHE_IDC
 	dsb	ishst
 	ret
 alternative_else_nop_endif
 	dcache_by_line_op cvau, ish, x0, x1, x2, x3
 	ret
-ENDPROC(__clean_dcache_area_pou)
+SYM_FUNC_END(__clean_dcache_area_pou)
 
 /*
  *	__inval_dcache_area(kaddr, size)
@@ -138,7 +138,8 @@ ENDPROC(__clean_dcache_area_pou)
  *	- kaddr   - kernel address
  *	- size    - size in question
  */
-ENTRY(__inval_dcache_area)
+SYM_FUNC_START_LOCAL(__dma_inv_area)
+SYM_FUNC_START_PI(__inval_dcache_area)
 	/* FALLTHROUGH */
 
 /*
@@ -146,7 +147,6 @@ ENTRY(__inval_dcache_area)
  *	- start   - virtual start address of region
  *	- size    - size in question
  */
-__dma_inv_area:
 	add	x1, x1, x0
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
@@ -165,8 +165,8 @@ __dma_inv_area:
 	b.lo	2b
 	dsb	sy
 	ret
-ENDPIPROC(__inval_dcache_area)
-ENDPROC(__dma_inv_area)
+SYM_FUNC_END_PI(__inval_dcache_area)
+SYM_FUNC_END(__dma_inv_area)
 
 /*
  *	__clean_dcache_area_poc(kaddr, size)
@@ -177,7 +177,8 @@ ENDPROC(__dma_inv_area)
  *	- kaddr   - kernel address
  *	- size    - size in question
  */
-ENTRY(__clean_dcache_area_poc)
+SYM_FUNC_START_LOCAL(__dma_clean_area)
+SYM_FUNC_START_PI(__clean_dcache_area_poc)
 	/* FALLTHROUGH */
 
 /*
@@ -185,11 +186,10 @@ ENTRY(__clean_dcache_area_poc)
  *	- start   - virtual start address of region
  *	- size    - size in question
  */
-__dma_clean_area:
 	dcache_by_line_op cvac, sy, x0, x1, x2, x3
 	ret
-ENDPIPROC(__clean_dcache_area_poc)
-ENDPROC(__dma_clean_area)
+SYM_FUNC_END_PI(__clean_dcache_area_poc)
+SYM_FUNC_END(__dma_clean_area)
 
 /*
  *	__clean_dcache_area_pop(kaddr, size)
@@ -200,13 +200,13 @@ ENDPROC(__dma_clean_area)
  *	- kaddr   - kernel address
  *	- size    - size in question
  */
-ENTRY(__clean_dcache_area_pop)
+SYM_FUNC_START_PI(__clean_dcache_area_pop)
 	alternative_if_not ARM64_HAS_DCPOP
 	b	__clean_dcache_area_poc
 	alternative_else_nop_endif
 	dcache_by_line_op cvap, sy, x0, x1, x2, x3
 	ret
-ENDPIPROC(__clean_dcache_area_pop)
+SYM_FUNC_END_PI(__clean_dcache_area_pop)
 
 /*
  *	__dma_flush_area(start, size)
@@ -216,10 +216,10 @@ ENDPIPROC(__clean_dcache_area_pop)
  *	- start   - virtual start address of region
  *	- size    - size in question
  */
-ENTRY(__dma_flush_area)
+SYM_FUNC_START_PI(__dma_flush_area)
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-ENDPIPROC(__dma_flush_area)
+SYM_FUNC_END_PI(__dma_flush_area)
 
 /*
  *	__dma_map_area(start, size, dir)
@@ -227,11 +227,11 @@ ENDPIPROC(__dma_flush_area)
  *	- size	- size of region
  *	- dir	- DMA direction
  */
-ENTRY(__dma_map_area)
+SYM_FUNC_START_PI(__dma_map_area)
 	cmp	w2, #DMA_FROM_DEVICE
 	b.eq	__dma_inv_area
 	b	__dma_clean_area
-ENDPIPROC(__dma_map_area)
+SYM_FUNC_END_PI(__dma_map_area)
 
 /*
  *	__dma_unmap_area(start, size, dir)
@@ -239,8 +239,8 @@ ENDPIPROC(__dma_map_area)
  *	- size	- size of region
  *	- dir	- DMA direction
  */
-ENTRY(__dma_unmap_area)
+SYM_FUNC_START_PI(__dma_unmap_area)
 	cmp	w2, #DMA_TO_DEVICE
 	b.ne	__dma_inv_area
 	ret
-ENDPIPROC(__dma_unmap_area)
+SYM_FUNC_END_PI(__dma_unmap_area)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b5e329fde2dd..8ef73e89d514 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -29,15 +29,9 @@ static cpumask_t tlb_flush_pending;
 #define ASID_MASK		(~GENMASK(asid_bits - 1, 0))
 #define ASID_FIRST_VERSION	(1UL << asid_bits)
 
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define NUM_USER_ASIDS		(ASID_FIRST_VERSION >> 1)
-#define asid2idx(asid)		(((asid) & ~ASID_MASK) >> 1)
-#define idx2asid(idx)		(((idx) << 1) & ~ASID_MASK)
-#else
-#define NUM_USER_ASIDS		(ASID_FIRST_VERSION)
+#define NUM_USER_ASIDS		ASID_FIRST_VERSION
 #define asid2idx(asid)		((asid) & ~ASID_MASK)
 #define idx2asid(idx)		asid2idx(idx)
-#endif
 
 /* Get the ASIDBits supported by the current CPU */
 static u32 get_cpu_asid_bits(void)
@@ -77,13 +71,33 @@ void verify_cpu_asid_bits(void)
 	}
 }
 
+static void set_kpti_asid_bits(void)
+{
+	unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long);
+	/*
+	 * In case of KPTI kernel/user ASIDs are allocated in
+	 * pairs, the bottom bit distinguishes the two: if it
+	 * is set, then the ASID will map only userspace. Thus
+	 * mark even as reserved for kernel.
+	 */
+	memset(asid_map, 0xaa, len);
+}
+
+static void set_reserved_asid_bits(void)
+{
+	if (arm64_kernel_unmapped_at_el0())
+		set_kpti_asid_bits();
+	else
+		bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+}
+
 static void flush_context(void)
 {
 	int i;
 	u64 asid;
 
 	/* Update the list of reserved ASIDs and the ASID bitmap. */
-	bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+	set_reserved_asid_bits();
 
 	for_each_possible_cpu(i) {
 		asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
@@ -261,6 +275,14 @@ static int asids_init(void)
 		panic("Failed to allocate bitmap for %lu ASIDs\n",
 		      NUM_USER_ASIDS);
 
+	/*
+	 * We cannot call set_reserved_asid_bits() here because CPU
+	 * caps are not finalized yet, so it is safer to assume KPTI
+	 * and reserve kernel ASID's from beginning.
+	 */
+	if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0))
+		set_kpti_asid_bits();
+
 	pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
 	return 0;
 }
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 9239416e93d4..6c45350e33aa 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -13,14 +13,14 @@
 
 #include <asm/cacheflush.h>
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	__dma_map_area(phys_to_virt(paddr), size, dir);
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	__dma_unmap_area(phys_to_virt(paddr), size, dir);
 }
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 93f9f77582ae..0a920b538a89 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -142,6 +142,7 @@ static const struct prot_bits pte_bits[] = {
 		.mask	= PTE_UXN,
 		.val	= PTE_UXN,
 		.set	= "UXN",
+		.clear	= "   ",
 	}, {
 		.mask	= PTE_ATTRINDX_MASK,
 		.val	= PTE_ATTRINDX(MT_DEVICE_nGnRnE),
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 9fc6db0bcbad..85566d32958f 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -32,7 +32,8 @@
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
-#include <asm/kasan.h>
+#include <asm/kprobes.h>
+#include <asm/processor.h>
 #include <asm/sysreg.h>
 #include <asm/system_misc.h>
 #include <asm/pgtable.h>
@@ -101,18 +102,6 @@ static void mem_abort_decode(unsigned int esr)
 		data_abort_decode(esr);
 }
 
-static inline bool is_ttbr0_addr(unsigned long addr)
-{
-	/* entry assembly clears tags for TTBR0 addrs */
-	return addr < TASK_SIZE;
-}
-
-static inline bool is_ttbr1_addr(unsigned long addr)
-{
-	/* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
-	return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
-}
-
 static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm)
 {
 	/* Either init_pg_dir or swapper_pg_dir */
@@ -318,6 +307,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
 	if (is_el1_permission_fault(addr, esr, regs)) {
 		if (esr & ESR_ELx_WNR)
 			msg = "write to read-only memory";
+		else if (is_el1_instruction_abort(esr))
+			msg = "execute from non-executable memory";
 		else
 			msg = "read from unreadable memory";
 	} else if (addr < PAGE_SIZE) {
@@ -454,7 +445,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	const struct fault_info *inf;
 	struct mm_struct *mm = current->mm;
 	vm_fault_t fault, major = 0;
-	unsigned long vm_flags = VM_READ | VM_WRITE;
+	unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
 	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	if (kprobe_page_fault(regs, esr))
@@ -736,8 +727,7 @@ static const struct fault_info fault_info[] = {
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 63"			},
 };
 
-asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
-					 struct pt_regs *regs)
+void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
 	const struct fault_info *inf = esr_to_fault_info(esr);
 
@@ -753,43 +743,21 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 	arm64_notify_die(inf->name, regs,
 			 inf->sig, inf->code, (void __user *)addr, esr);
 }
+NOKPROBE_SYMBOL(do_mem_abort);
 
-asmlinkage void __exception do_el0_irq_bp_hardening(void)
+void do_el0_irq_bp_hardening(void)
 {
 	/* PC has already been checked in entry.S */
 	arm64_apply_bp_hardening();
 }
+NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
 
-asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
-						   unsigned int esr,
-						   struct pt_regs *regs)
-{
-	/*
-	 * We've taken an instruction abort from userspace and not yet
-	 * re-enabled IRQs. If the address is a kernel address, apply
-	 * BP hardening prior to enabling IRQs and pre-emption.
-	 */
-	if (!is_ttbr0_addr(addr))
-		arm64_apply_bp_hardening();
-
-	local_daif_restore(DAIF_PROCCTX);
-	do_mem_abort(addr, esr, regs);
-}
-
-
-asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
-					   unsigned int esr,
-					   struct pt_regs *regs)
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
-	if (user_mode(regs)) {
-		if (!is_ttbr0_addr(instruction_pointer(regs)))
-			arm64_apply_bp_hardening();
-		local_daif_restore(DAIF_PROCCTX);
-	}
-
 	arm64_notify_die("SP/PC alignment exception", regs,
 			 SIGBUS, BUS_ADRALN, (void __user *)addr, esr);
 }
+NOKPROBE_SYMBOL(do_sp_pc_abort);
 
 int __init early_brk64(unsigned long addr, unsigned int esr,
 		       struct pt_regs *regs);
@@ -872,8 +840,7 @@ NOKPROBE_SYMBOL(debug_exception_exit);
 #ifdef CONFIG_ARM64_ERRATUM_1463225
 DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
 
-static int __exception
-cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
 {
 	if (user_mode(regs))
 		return 0;
@@ -892,16 +859,15 @@ cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
 	return 1;
 }
 #else
-static int __exception
-cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
 {
 	return 0;
 }
 #endif /* CONFIG_ARM64_ERRATUM_1463225 */
+NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler);
 
-asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
-					       unsigned int esr,
-					       struct pt_regs *regs)
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+			struct pt_regs *regs)
 {
 	const struct fault_info *inf = esr_to_debug_fault_info(esr);
 	unsigned long pc = instruction_pointer(regs);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 45c00a54909c..b65dffdfb201 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -20,6 +20,7 @@
 #include <linux/sort.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
+#include <linux/dma-direct.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-contiguous.h>
 #include <linux/efi.h>
@@ -41,6 +42,8 @@
 #include <asm/tlb.h>
 #include <asm/alternative.h>
 
+#define ARM64_ZONE_DMA_BITS	30
+
 /*
  * We need to be able to catch inadvertent references to memstart_addr
  * that occur (potentially in generic code) before arm64_memblock_init()
@@ -56,7 +59,14 @@ EXPORT_SYMBOL(physvirt_offset);
 struct page *vmemmap __ro_after_init;
 EXPORT_SYMBOL(vmemmap);
 
+/*
+ * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
+ * memory as some devices, namely the Raspberry Pi 4, have peripherals with
+ * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
+ * bit addressable memory area.
+ */
 phys_addr_t arm64_dma_phys_limit __ro_after_init;
+static phys_addr_t arm64_dma32_phys_limit __ro_after_init;
 
 #ifdef CONFIG_KEXEC_CORE
 /*
@@ -81,7 +91,7 @@ static void __init reserve_crashkernel(void)
 
 	if (crash_base == 0) {
 		/* Current arm64 boot protocol requires 2MB alignment */
-		crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
+		crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit,
 				crash_size, SZ_2M);
 		if (crash_base == 0) {
 			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
@@ -169,15 +179,16 @@ static void __init reserve_elfcorehdr(void)
 {
 }
 #endif /* CONFIG_CRASH_DUMP */
+
 /*
- * Return the maximum physical address for ZONE_DMA32 (DMA_BIT_MASK(32)). It
- * currently assumes that for memory starting above 4G, 32-bit devices will
- * use a DMA offset.
+ * Return the maximum physical address for a zone with a given address size
+ * limit. It currently assumes that for memory starting above 4G, 32-bit
+ * devices will use a DMA offset.
  */
-static phys_addr_t __init max_zone_dma_phys(void)
+static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
 {
-	phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
-	return min(offset + (1ULL << 32), memblock_end_of_DRAM());
+	phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits);
+	return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
 }
 
 #ifdef CONFIG_NUMA
@@ -186,8 +197,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
 
+#ifdef CONFIG_ZONE_DMA
+	max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
+#endif
 #ifdef CONFIG_ZONE_DMA32
-	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys());
+	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
 #endif
 	max_zone_pfns[ZONE_NORMAL] = max;
 
@@ -200,16 +214,20 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
 	struct memblock_region *reg;
 	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
-	unsigned long max_dma = min;
+	unsigned long __maybe_unused max_dma, max_dma32;
 
 	memset(zone_size, 0, sizeof(zone_size));
 
-	/* 4GB maximum for 32-bit only capable devices */
+	max_dma = max_dma32 = min;
+#ifdef CONFIG_ZONE_DMA
+	max_dma = max_dma32 = PFN_DOWN(arm64_dma_phys_limit);
+	zone_size[ZONE_DMA] = max_dma - min;
+#endif
 #ifdef CONFIG_ZONE_DMA32
-	max_dma = PFN_DOWN(arm64_dma_phys_limit);
-	zone_size[ZONE_DMA32] = max_dma - min;
+	max_dma32 = PFN_DOWN(arm64_dma32_phys_limit);
+	zone_size[ZONE_DMA32] = max_dma32 - max_dma;
 #endif
-	zone_size[ZONE_NORMAL] = max - max_dma;
+	zone_size[ZONE_NORMAL] = max - max_dma32;
 
 	memcpy(zhole_size, zone_size, sizeof(zhole_size));
 
@@ -217,19 +235,23 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 		unsigned long start = memblock_region_memory_base_pfn(reg);
 		unsigned long end = memblock_region_memory_end_pfn(reg);
 
-		if (start >= max)
-			continue;
-
-#ifdef CONFIG_ZONE_DMA32
-		if (start < max_dma) {
+#ifdef CONFIG_ZONE_DMA
+		if (start >= min && start < max_dma) {
 			unsigned long dma_end = min(end, max_dma);
-			zhole_size[ZONE_DMA32] -= dma_end - start;
+			zhole_size[ZONE_DMA] -= dma_end - start;
+			start = dma_end;
+		}
+#endif
+#ifdef CONFIG_ZONE_DMA32
+		if (start >= max_dma && start < max_dma32) {
+			unsigned long dma32_end = min(end, max_dma32);
+			zhole_size[ZONE_DMA32] -= dma32_end - start;
+			start = dma32_end;
 		}
 #endif
-		if (end > max_dma) {
+		if (start >= max_dma32 && start < max) {
 			unsigned long normal_end = min(end, max);
-			unsigned long normal_start = max(start, max_dma);
-			zhole_size[ZONE_NORMAL] -= normal_end - normal_start;
+			zhole_size[ZONE_NORMAL] -= normal_end - start;
 		}
 	}
 
@@ -418,11 +440,15 @@ void __init arm64_memblock_init(void)
 
 	early_init_fdt_scan_reserved_mem();
 
-	/* 4GB maximum for 32-bit only capable devices */
+	if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+		zone_dma_bits = ARM64_ZONE_DMA_BITS;
+		arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
+	}
+
 	if (IS_ENABLED(CONFIG_ZONE_DMA32))
-		arm64_dma_phys_limit = max_zone_dma_phys();
+		arm64_dma32_phys_limit = max_zone_phys(32);
 	else
-		arm64_dma_phys_limit = PHYS_MASK + 1;
+		arm64_dma32_phys_limit = PHYS_MASK + 1;
 
 	reserve_crashkernel();
 
@@ -430,7 +456,7 @@ void __init arm64_memblock_init(void)
 
 	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
 
-	dma_contiguous_reserve(arm64_dma_phys_limit);
+	dma_contiguous_reserve(arm64_dma32_phys_limit);
 }
 
 void __init bootmem_init(void)
@@ -534,7 +560,7 @@ static void __init free_unused_memmap(void)
 void __init mem_init(void)
 {
 	if (swiotlb_force == SWIOTLB_FORCE ||
-	    max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
+	    max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit))
 		swiotlb_init(1);
 	else
 		swiotlb_force = SWIOTLB_NO_FORCE;
@@ -571,7 +597,7 @@ void free_initmem(void)
 {
 	free_reserved_area(lm_alias(__init_begin),
 			   lm_alias(__init_end),
-			   0, "unused kernel");
+			   POISON_FREE_INITMEM, "unused kernel");
 	/*
 	 * Unmap the __init region but leave the VM area in place. This
 	 * prevents the region from being reused for kernel modules, which
@@ -580,18 +606,6 @@ void free_initmem(void)
 	unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init free_initrd_mem(unsigned long start, unsigned long end)
-{
-	unsigned long aligned_start, aligned_end;
-
-	aligned_start = __virt_to_phys(start) & PAGE_MASK;
-	aligned_end = PAGE_ALIGN(__virt_to_phys(end));
-	memblock_free(aligned_start, aligned_end - aligned_start);
-	free_reserved_area((void *)start, (void *)end, 0, "initrd");
-}
-#endif
-
 /*
  * Dump out memory limit information on panic.
  */
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 60c929f3683b..40797cbfba2d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -338,7 +338,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 				 phys_addr_t (*pgtable_alloc)(int),
 				 int flags)
 {
-	unsigned long addr, length, end, next;
+	unsigned long addr, end, next;
 	pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
 
 	/*
@@ -350,9 +350,8 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 
 	phys &= PAGE_MASK;
 	addr = virt & PAGE_MASK;
-	length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
+	end = PAGE_ALIGN(virt + size);
 
-	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
 		alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
@@ -1061,6 +1060,8 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
 			     size, PAGE_KERNEL, __pgd_pgtable_alloc, flags);
 
+	memblock_clear_nomap(start, size);
+
 	return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
 			   restrictions);
 }
@@ -1069,7 +1070,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct zone *zone;
 
 	/*
 	 * FIXME: Cleanup page tables (also in arch_add_memory() in case
@@ -1078,7 +1078,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 	 * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be
 	 * unlocked yet.
 	 */
-	zone = page_zone(pfn_to_page(start_pfn));
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 }
 #endif
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 9ce7bd9d4d9c..250c49008d73 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -54,7 +54,7 @@ static int change_memory_common(unsigned long addr, int numpages,
 				pgprot_t set_mask, pgprot_t clear_mask)
 {
 	unsigned long start = addr;
-	unsigned long size = PAGE_SIZE*numpages;
+	unsigned long size = PAGE_SIZE * numpages;
 	unsigned long end = start + size;
 	struct vm_struct *area;
 	int i;
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a1e0592d1fbc..aafed6902411 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -42,7 +42,14 @@
 #define TCR_KASAN_FLAGS 0
 #endif
 
-#define MAIR(attr, mt)	((attr) << ((mt) * 8))
+/* Default MAIR_EL1 */
+#define MAIR_EL1_SET							\
+	(MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) |	\
+	 MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) |	\
+	 MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) |		\
+	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) |		\
+	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) |			\
+	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
 
 #ifdef CONFIG_CPU_PM
 /**
@@ -50,7 +57,7 @@
  *
  * x0: virtual address of context pointer
  */
-ENTRY(cpu_do_suspend)
+SYM_FUNC_START(cpu_do_suspend)
 	mrs	x2, tpidr_el0
 	mrs	x3, tpidrro_el0
 	mrs	x4, contextidr_el1
@@ -74,7 +81,7 @@ alternative_endif
 	stp	x10, x11, [x0, #64]
 	stp	x12, x13, [x0, #80]
 	ret
-ENDPROC(cpu_do_suspend)
+SYM_FUNC_END(cpu_do_suspend)
 
 /**
  * cpu_do_resume - restore CPU register context
@@ -82,7 +89,7 @@ ENDPROC(cpu_do_suspend)
  * x0: Address of context pointer
  */
 	.pushsection ".idmap.text", "awx"
-ENTRY(cpu_do_resume)
+SYM_FUNC_START(cpu_do_resume)
 	ldp	x2, x3, [x0]
 	ldp	x4, x5, [x0, #16]
 	ldp	x6, x8, [x0, #32]
@@ -131,7 +138,7 @@ alternative_else_nop_endif
 
 	isb
 	ret
-ENDPROC(cpu_do_resume)
+SYM_FUNC_END(cpu_do_resume)
 	.popsection
 #endif
 
@@ -142,7 +149,7 @@ ENDPROC(cpu_do_resume)
  *
  *	- pgd_phys - physical address of new TTB
  */
-ENTRY(cpu_do_switch_mm)
+SYM_FUNC_START(cpu_do_switch_mm)
 	mrs	x2, ttbr1_el1
 	mmid	x1, x1				// get mm->context.id
 	phys_to_ttbr x3, x0
@@ -161,7 +168,7 @@ alternative_else_nop_endif
 	msr	ttbr0_el1, x3			// now update TTBR0
 	isb
 	b	post_ttbr_update_workaround	// Back to C code...
-ENDPROC(cpu_do_switch_mm)
+SYM_FUNC_END(cpu_do_switch_mm)
 
 	.pushsection ".idmap.text", "awx"
 
@@ -182,7 +189,7 @@ ENDPROC(cpu_do_switch_mm)
  * This is the low-level counterpart to cpu_replace_ttbr1, and should not be
  * called by anything else. It can only be executed from a TTBR0 mapping.
  */
-ENTRY(idmap_cpu_replace_ttbr1)
+SYM_FUNC_START(idmap_cpu_replace_ttbr1)
 	save_and_disable_daif flags=x2
 
 	__idmap_cpu_set_reserved_ttbr1 x1, x3
@@ -194,7 +201,7 @@ ENTRY(idmap_cpu_replace_ttbr1)
 	restore_daif x2
 
 	ret
-ENDPROC(idmap_cpu_replace_ttbr1)
+SYM_FUNC_END(idmap_cpu_replace_ttbr1)
 	.popsection
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
@@ -222,7 +229,7 @@ ENDPROC(idmap_cpu_replace_ttbr1)
  */
 __idmap_kpti_flag:
 	.long	1
-ENTRY(idmap_kpti_install_ng_mappings)
+SYM_FUNC_START(idmap_kpti_install_ng_mappings)
 	cpu		.req	w0
 	num_cpus	.req	w1
 	swapper_pa	.req	x2
@@ -250,15 +257,15 @@ ENTRY(idmap_kpti_install_ng_mappings)
 	/* We're the boot CPU. Wait for the others to catch up */
 	sevl
 1:	wfe
-	ldaxr	w18, [flag_ptr]
-	eor	w18, w18, num_cpus
-	cbnz	w18, 1b
+	ldaxr	w17, [flag_ptr]
+	eor	w17, w17, num_cpus
+	cbnz	w17, 1b
 
 	/* We need to walk swapper, so turn off the MMU. */
 	pre_disable_mmu_workaround
-	mrs	x18, sctlr_el1
-	bic	x18, x18, #SCTLR_ELx_M
-	msr	sctlr_el1, x18
+	mrs	x17, sctlr_el1
+	bic	x17, x17, #SCTLR_ELx_M
+	msr	sctlr_el1, x17
 	isb
 
 	/* Everybody is enjoying the idmap, so we can rewrite swapper. */
@@ -281,9 +288,9 @@ skip_pgd:
 	isb
 
 	/* We're done: fire up the MMU again */
-	mrs	x18, sctlr_el1
-	orr	x18, x18, #SCTLR_ELx_M
-	msr	sctlr_el1, x18
+	mrs	x17, sctlr_el1
+	orr	x17, x17, #SCTLR_ELx_M
+	msr	sctlr_el1, x17
 	isb
 
 	/*
@@ -353,47 +360,48 @@ skip_pte:
 	b.ne	do_pte
 	b	next_pmd
 
+	.unreq	cpu
+	.unreq	num_cpus
+	.unreq	swapper_pa
+	.unreq	cur_pgdp
+	.unreq	end_pgdp
+	.unreq	pgd
+	.unreq	cur_pudp
+	.unreq	end_pudp
+	.unreq	pud
+	.unreq	cur_pmdp
+	.unreq	end_pmdp
+	.unreq	pmd
+	.unreq	cur_ptep
+	.unreq	end_ptep
+	.unreq	pte
+
 	/* Secondary CPUs end up here */
 __idmap_kpti_secondary:
 	/* Uninstall swapper before surgery begins */
-	__idmap_cpu_set_reserved_ttbr1 x18, x17
+	__idmap_cpu_set_reserved_ttbr1 x16, x17
 
 	/* Increment the flag to let the boot CPU we're ready */
-1:	ldxr	w18, [flag_ptr]
-	add	w18, w18, #1
-	stxr	w17, w18, [flag_ptr]
+1:	ldxr	w16, [flag_ptr]
+	add	w16, w16, #1
+	stxr	w17, w16, [flag_ptr]
 	cbnz	w17, 1b
 
 	/* Wait for the boot CPU to finish messing around with swapper */
 	sevl
 1:	wfe
-	ldxr	w18, [flag_ptr]
-	cbnz	w18, 1b
+	ldxr	w16, [flag_ptr]
+	cbnz	w16, 1b
 
 	/* All done, act like nothing happened */
-	offset_ttbr1 swapper_ttb, x18
+	offset_ttbr1 swapper_ttb, x16
 	msr	ttbr1_el1, swapper_ttb
 	isb
 	ret
 
-	.unreq	cpu
-	.unreq	num_cpus
-	.unreq	swapper_pa
 	.unreq	swapper_ttb
 	.unreq	flag_ptr
-	.unreq	cur_pgdp
-	.unreq	end_pgdp
-	.unreq	pgd
-	.unreq	cur_pudp
-	.unreq	end_pudp
-	.unreq	pud
-	.unreq	cur_pmdp
-	.unreq	end_pmdp
-	.unreq	pmd
-	.unreq	cur_ptep
-	.unreq	end_ptep
-	.unreq	pte
-ENDPROC(idmap_kpti_install_ng_mappings)
+SYM_FUNC_END(idmap_kpti_install_ng_mappings)
 	.popsection
 #endif
 
@@ -404,7 +412,7 @@ ENDPROC(idmap_kpti_install_ng_mappings)
  *	value of the SCTLR_EL1 register.
  */
 	.pushsection ".idmap.text", "awx"
-ENTRY(__cpu_setup)
+SYM_FUNC_START(__cpu_setup)
 	tlbi	vmalle1				// Invalidate local TLB
 	dsb	nsh
 
@@ -416,23 +424,9 @@ ENTRY(__cpu_setup)
 	enable_dbg				// since this is per-cpu
 	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
 	/*
-	 * Memory region attributes for LPAE:
-	 *
-	 *   n = AttrIndx[2:0]
-	 *			n	MAIR
-	 *   DEVICE_nGnRnE	000	00000000
-	 *   DEVICE_nGnRE	001	00000100
-	 *   DEVICE_GRE		010	00001100
-	 *   NORMAL_NC		011	01000100
-	 *   NORMAL		100	11111111
-	 *   NORMAL_WT		101	10111011
+	 * Memory region attributes
 	 */
-	ldr	x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
-		     MAIR(0x04, MT_DEVICE_nGnRE) | \
-		     MAIR(0x0c, MT_DEVICE_GRE) | \
-		     MAIR(0x44, MT_NORMAL_NC) | \
-		     MAIR(0xff, MT_NORMAL) | \
-		     MAIR(0xbb, MT_NORMAL_WT)
+	mov_q	x5, MAIR_EL1_SET
 	msr	mair_el1, x5
 	/*
 	 * Prepare SCTLR
@@ -475,4 +469,4 @@ ENTRY(__cpu_setup)
 #endif	/* CONFIG_ARM64_HW_AFDBM */
 	msr	tcr_el1, x10
 	ret					// return to head.S
-ENDPROC(__cpu_setup)
+SYM_FUNC_END(__cpu_setup)
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index c5f05c4a4d00..5b09aca55108 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -56,11 +56,11 @@
 #define XEN_IMM 0xEA1
 
 #define HYPERCALL_SIMPLE(hypercall)		\
-ENTRY(HYPERVISOR_##hypercall)			\
+SYM_FUNC_START(HYPERVISOR_##hypercall)		\
 	mov x16, #__HYPERVISOR_##hypercall;	\
 	hvc XEN_IMM;				\
 	ret;					\
-ENDPROC(HYPERVISOR_##hypercall)
+SYM_FUNC_END(HYPERVISOR_##hypercall)
 
 #define HYPERCALL0 HYPERCALL_SIMPLE
 #define HYPERCALL1 HYPERCALL_SIMPLE
@@ -86,7 +86,7 @@ HYPERCALL2(multicall);
 HYPERCALL2(vm_assist);
 HYPERCALL3(dm_op);
 
-ENTRY(privcmd_call)
+SYM_FUNC_START(privcmd_call)
 	mov x16, x0
 	mov x0, x1
 	mov x1, x2
@@ -109,4 +109,4 @@ ENTRY(privcmd_call)
 	 */
 	uaccess_ttbr0_disable x6, x7
 	ret
-ENDPROC(privcmd_call);
+SYM_FUNC_END(privcmd_call);
diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index 0b6919c00413..197c473b796a 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -8,7 +8,7 @@
 #ifndef _ASM_C6X_PGTABLE_H
 #define _ASM_C6X_PGTABLE_H
 
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
 
 #include <asm/setup.h>
 #include <asm/page.h>
diff --git a/arch/c6x/include/asm/vmalloc.h b/arch/c6x/include/asm/vmalloc.h
new file mode 100644
index 000000000000..26c6c6696bbd
--- /dev/null
+++ b/arch/c6x/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_C6X_VMALLOC_H
+#define _ASM_C6X_VMALLOC_H
+
+#endif /* _ASM_C6X_VMALLOC_H */
diff --git a/arch/c6x/kernel/entry.S b/arch/c6x/kernel/entry.S
index 4332a10aec6c..fb154d19625b 100644
--- a/arch/c6x/kernel/entry.S
+++ b/arch/c6x/kernel/entry.S
@@ -18,7 +18,7 @@
 #define DP	B14
 #define SP	B15
 
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 #define resume_kernel restore_all
 #endif
 
@@ -287,7 +287,7 @@ work_notifysig:
 	;; is a little bit different
 	;;
 ENTRY(ret_from_exception)
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	MASK_INT B2
 #endif
 
@@ -557,7 +557,7 @@ ENDPROC(_nmi_handler)
 	;;
 	;; Jump to schedule() then return to ret_from_isr
 	;;
-#ifdef	CONFIG_PREEMPT
+#ifdef	CONFIG_PREEMPTION
 resume_kernel:
 	GET_THREAD_INFO A12
 	LDW	.D1T1	*+A12(THREAD_INFO_PREEMPT_COUNT),A1
@@ -582,7 +582,7 @@ preempt_schedule:
 	B	.S2	preempt_schedule_irq
 #endif
 	ADDKPC	.S2	preempt_schedule,B3,4
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 ENTRY(enable_exception)
 	DINT
diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S
index 584bab2bace6..ac99ba0864bf 100644
--- a/arch/c6x/kernel/vmlinux.lds.S
+++ b/arch/c6x/kernel/vmlinux.lds.S
@@ -5,6 +5,9 @@
  *  Copyright (C) 2010, 2011 Texas Instruments Incorporated
  *  Mark Salter <msalter@redhat.com>
  */
+
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
@@ -80,10 +83,7 @@ SECTIONS
 		*(.gnu.warning)
 	}
 
-	EXCEPTION_TABLE(16)
-	NOTES
-
-	RO_DATA_SECTION(PAGE_SIZE)
+	RO_DATA(PAGE_SIZE)
 	.const :
 	{
 		*(.const .const.* .gnu.linkonce.r.*)
diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c
index b319808e8f6b..a5909091cb14 100644
--- a/arch/c6x/mm/dma-coherent.c
+++ b/arch/c6x/mm/dma-coherent.c
@@ -140,7 +140,7 @@ void __init coherent_mem_init(phys_addr_t start, u32 size)
 		      sizeof(long));
 }
 
-static void c6x_dma_sync(struct device *dev, phys_addr_t paddr, size_t size,
+static void c6x_dma_sync(phys_addr_t paddr, size_t size,
 		enum dma_data_direction dir)
 {
 	BUG_ON(!valid_dma_direction(dir));
@@ -160,14 +160,14 @@ static void c6x_dma_sync(struct device *dev, phys_addr_t paddr, size_t size,
 	}
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
-	return c6x_dma_sync(dev, paddr, size, dir);
+	return c6x_dma_sync(paddr, size, dir);
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
-	return c6x_dma_sync(dev, paddr, size, dir);
+	return c6x_dma_sync(paddr, size, dir);
 }
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 3973847b5f42..da09c884cc30 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -17,6 +17,7 @@ config CSKY
 	select IRQ_DOMAIN
 	select HANDLE_DOMAIN_IRQ
 	select DW_APB_TIMER_OF
+	select GENERIC_IOREMAP
 	select GENERIC_LIB_ASHLDI3
 	select GENERIC_LIB_ASHRDI3
 	select GENERIC_LIB_LSHRDI3
diff --git a/arch/csky/include/asm/io.h b/arch/csky/include/asm/io.h
index 80d071e2567f..332f51bc68fb 100644
--- a/arch/csky/include/asm/io.h
+++ b/arch/csky/include/asm/io.h
@@ -36,14 +36,9 @@
 /*
  * I/O memory mapping functions.
  */
-extern void __iomem *ioremap_cache(phys_addr_t addr, size_t size);
-extern void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot);
-extern void iounmap(void *addr);
-
-#define ioremap(addr, size)		__ioremap((addr), (size), pgprot_noncached(PAGE_KERNEL))
-#define ioremap_wc(addr, size)		__ioremap((addr), (size), pgprot_writecombine(PAGE_KERNEL))
-#define ioremap_nocache(addr, size)	ioremap((addr), (size))
-#define ioremap_cache			ioremap_cache
+#define ioremap_wc(addr, size) \
+	ioremap_prot((addr), (size), \
+		(_PAGE_IOREMAP & ~_CACHE_MASK) | _CACHE_UNCACHED)
 
 #include <asm-generic/io.h>
 
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index 7c21985c60dc..4b2a41e15f2e 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -86,6 +86,10 @@
 #define PAGE_USERIO	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 				_CACHE_CACHED)
 
+#define _PAGE_IOREMAP \
+	(_PAGE_PRESENT | __READABLE | __WRITEABLE | _PAGE_GLOBAL | \
+	 _CACHE_UNCACHED | _PAGE_SO)
+
 #define __P000	PAGE_NONE
 #define __P001	PAGE_READONLY
 #define __P010	PAGE_COPY
diff --git a/arch/csky/include/asm/vmalloc.h b/arch/csky/include/asm/vmalloc.h
new file mode 100644
index 000000000000..43dca6336b4c
--- /dev/null
+++ b/arch/csky/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_CSKY_VMALLOC_H
+#define _ASM_CSKY_VMALLOC_H
+
+#endif /* _ASM_CSKY_VMALLOC_H */
diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S
index a7a5b67df898..007706328000 100644
--- a/arch/csky/kernel/entry.S
+++ b/arch/csky/kernel/entry.S
@@ -277,7 +277,7 @@ ENTRY(csky_irq)
 	zero_fp
 	psrset	ee
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	mov	r9, sp			/* Get current stack  pointer */
 	bmaski	r10, THREAD_SHIFT
 	andn	r9, r10			/* Get thread_info */
@@ -294,7 +294,7 @@ ENTRY(csky_irq)
 	mov	a0, sp
 	jbsr	csky_do_IRQ
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	subi	r12, 1
 	stw	r12, (r9, TINFO_PREEMPT)
 	cmpnei	r12, 0
diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c
index 23ee604aafdb..52eaf31ba27f 100644
--- a/arch/csky/kernel/setup.c
+++ b/arch/csky/kernel/setup.c
@@ -136,10 +136,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_HIGHMEM
 	kmap_init();
 #endif
-
-#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
-#endif
 }
 
 unsigned long va_pa_offset;
diff --git a/arch/csky/kernel/vmlinux.lds.S b/arch/csky/kernel/vmlinux.lds.S
index ae7961b973f2..2ff37beaf2bf 100644
--- a/arch/csky/kernel/vmlinux.lds.S
+++ b/arch/csky/kernel/vmlinux.lds.S
@@ -49,11 +49,10 @@ SECTIONS
 
 
 	_sdata = .;
-	RO_DATA_SECTION(PAGE_SIZE)
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RO_DATA(PAGE_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 	_edata = .;
 
-	NOTES
 	EXCEPTION_TABLE(L1_CACHE_BYTES)
 	BSS_SECTION(L1_CACHE_BYTES, PAGE_SIZE, L1_CACHE_BYTES)
 	VBR_BASE
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index 06e85b565454..8f6571ae27c8 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -58,8 +58,8 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 	cache_op(page_to_phys(page), size, dma_wbinv_set_zero_range);
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-			      size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_TO_DEVICE:
@@ -74,8 +74,8 @@ void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
 	}
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-			   size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_TO_DEVICE:
diff --git a/arch/csky/mm/ioremap.c b/arch/csky/mm/ioremap.c
index e13cd3497628..70c8268d3b2b 100644
--- a/arch/csky/mm/ioremap.c
+++ b/arch/csky/mm/ioremap.c
@@ -3,60 +3,8 @@
 
 #include <linux/export.h>
 #include <linux/mm.h>
-#include <linux/vmalloc.h>
 #include <linux/io.h>
 
-#include <asm/pgtable.h>
-
-static void __iomem *__ioremap_caller(phys_addr_t addr, size_t size,
-				      pgprot_t prot, void *caller)
-{
-	phys_addr_t last_addr;
-	unsigned long offset, vaddr;
-	struct vm_struct *area;
-
-	last_addr = addr + size - 1;
-	if (!size || last_addr < addr)
-		return NULL;
-
-	offset = addr & (~PAGE_MASK);
-	addr &= PAGE_MASK;
-	size = PAGE_ALIGN(size + offset);
-
-	area = get_vm_area_caller(size, VM_IOREMAP, caller);
-	if (!area)
-		return NULL;
-
-	vaddr = (unsigned long)area->addr;
-
-	if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
-		free_vm_area(area);
-		return NULL;
-	}
-
-	return (void __iomem *)(vaddr + offset);
-}
-
-void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot)
-{
-	return __ioremap_caller(phys_addr, size, prot,
-				__builtin_return_address(0));
-}
-EXPORT_SYMBOL(__ioremap);
-
-void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
-{
-	return __ioremap_caller(phys_addr, size, PAGE_KERNEL,
-				__builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_cache);
-
-void iounmap(void __iomem *addr)
-{
-	vunmap((void *)((unsigned long)addr & PAGE_MASK));
-}
-EXPORT_SYMBOL(iounmap);
-
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 			      unsigned long size, pgprot_t vma_prot)
 {
diff --git a/arch/h8300/include/asm/vmalloc.h b/arch/h8300/include/asm/vmalloc.h
new file mode 100644
index 000000000000..08a55c1dfa23
--- /dev/null
+++ b/arch/h8300/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_H8300_VMALLOC_H
+#define _ASM_H8300_VMALLOC_H
+
+#endif /* _ASM_H8300_VMALLOC_H */
diff --git a/arch/h8300/kernel/entry.S b/arch/h8300/kernel/entry.S
index 4ade5f8299ba..c6e289b5f1f2 100644
--- a/arch/h8300/kernel/entry.S
+++ b/arch/h8300/kernel/entry.S
@@ -284,12 +284,12 @@ badsys:
 	mov.l	er0,@(LER0:16,sp)
 	bra	resume_userspace
 
-#if !defined(CONFIG_PREEMPT)
+#if !defined(CONFIG_PREEMPTION)
 #define resume_kernel restore_all
 #endif
 
 ret_from_exception:
-#if defined(CONFIG_PREEMPT)
+#if defined(CONFIG_PREEMPTION)
 	orc	#0xc0,ccr
 #endif
 ret_from_interrupt:
@@ -319,7 +319,7 @@ work_resched:
 restore_all:
 	RESTORE_ALL			/* Does RTE */
 
-#if defined(CONFIG_PREEMPT)
+#if defined(CONFIG_PREEMPTION)
 resume_kernel:
 	mov.l	@(TI_PRE_COUNT:16,er4),er0
 	bne	restore_all:8
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index 49f716c0a1df..6b1afc2f9b68 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -1,4 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/page.h>
 #include <asm/thread_info.h>
@@ -37,9 +40,7 @@ SECTIONS
 #endif
 	_etext = . ;
 	}
-	EXCEPTION_TABLE(16)
-	NOTES
-	RO_DATA_SECTION(4)
+	RO_DATA(4)
 	ROMEND = .;
 #if defined(CONFIG_ROMKERNEL)
 	. = RAMTOP;
@@ -48,7 +49,7 @@ SECTIONS
 #endif
 	_sdata = . ;
 	__data_start = . ;
-	RW_DATA_SECTION(0, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA(0, PAGE_SIZE, THREAD_SIZE)
 #if defined(CONFIG_ROMKERNEL)
 #undef ADDR
 #endif
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 12cd9231c4b8..0231d69c8bf2 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -91,7 +91,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 		"1:	%0 = memw_locked(%1);\n"			\
 		"	%0 = "#op "(%0,%2);\n"				\
 		"	memw_locked(%1,P3)=%0;\n"			\
-		"	if !P3 jump 1b;\n"				\
+		"	if (!P3) jump 1b;\n"				\
 		: "=&r" (output)					\
 		: "r" (&v->counter), "r" (i)				\
 		: "memory", "p3"					\
@@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 		"1:	%0 = memw_locked(%1);\n"			\
 		"	%0 = "#op "(%0,%2);\n"				\
 		"	memw_locked(%1,P3)=%0;\n"			\
-		"	if !P3 jump 1b;\n"				\
+		"	if (!P3) jump 1b;\n"				\
 		: "=&r" (output)					\
 		: "r" (&v->counter), "r" (i)				\
 		: "memory", "p3"					\
@@ -124,7 +124,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 		"1:	%0 = memw_locked(%2);\n"			\
 		"	%1 = "#op "(%0,%3);\n"				\
 		"	memw_locked(%2,P3)=%1;\n"			\
-		"	if !P3 jump 1b;\n"				\
+		"	if (!P3) jump 1b;\n"				\
 		: "=&r" (output), "=&r" (val)				\
 		: "r" (&v->counter), "r" (i)				\
 		: "memory", "p3"					\
@@ -173,7 +173,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 		"	}"
 		"	memw_locked(%2, p3) = %1;"
 		"	{"
-		"		if !p3 jump 1b;"
+		"		if (!p3) jump 1b;"
 		"	}"
 		"2:"
 		: "=&r" (__oldval), "=&r" (tmp)
diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h
index 47384b094b94..71429f756af0 100644
--- a/arch/hexagon/include/asm/bitops.h
+++ b/arch/hexagon/include/asm/bitops.h
@@ -38,7 +38,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
 	"1:	R12 = memw_locked(R10);\n"
 	"	{ P0 = tstbit(R12,R11); R12 = clrbit(R12,R11); }\n"
 	"	memw_locked(R10,P1) = R12;\n"
-	"	{if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+	"	{if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n"
 	: "=&r" (oldval)
 	: "r" (addr), "r" (nr)
 	: "r10", "r11", "r12", "p0", "p1", "memory"
@@ -62,7 +62,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
 	"1:	R12 = memw_locked(R10);\n"
 	"	{ P0 = tstbit(R12,R11); R12 = setbit(R12,R11); }\n"
 	"	memw_locked(R10,P1) = R12;\n"
-	"	{if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+	"	{if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n"
 	: "=&r" (oldval)
 	: "r" (addr), "r" (nr)
 	: "r10", "r11", "r12", "p0", "p1", "memory"
@@ -88,7 +88,7 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
 	"1:	R12 = memw_locked(R10);\n"
 	"	{ P0 = tstbit(R12,R11); R12 = togglebit(R12,R11); }\n"
 	"	memw_locked(R10,P1) = R12;\n"
-	"	{if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+	"	{if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n"
 	: "=&r" (oldval)
 	: "r" (addr), "r" (nr)
 	: "r10", "r11", "r12", "p0", "p1", "memory"
@@ -223,7 +223,7 @@ static inline int ffs(int x)
 	int r;
 
 	asm("{ P0 = cmp.eq(%1,#0); %0 = ct0(%1);}\n"
-		"{ if P0 %0 = #0; if !P0 %0 = add(%0,#1);}\n"
+		"{ if (P0) %0 = #0; if (!P0) %0 = add(%0,#1);}\n"
 		: "=&r" (r)
 		: "r" (x)
 		: "p0");
diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h
index 6091322c3af9..92b8a02e588a 100644
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -30,7 +30,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
 	__asm__ __volatile__ (
 	"1:	%0 = memw_locked(%1);\n"    /*  load into retval */
 	"	memw_locked(%1,P0) = %2;\n" /*  store into memory */
-	"	if !P0 jump 1b;\n"
+	"	if (!P0) jump 1b;\n"
 	: "=&r" (retval)
 	: "r" (ptr), "r" (x)
 	: "memory", "p0"
diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h
index cb635216a732..0191f7c7193e 100644
--- a/arch/hexagon/include/asm/futex.h
+++ b/arch/hexagon/include/asm/futex.h
@@ -16,7 +16,7 @@
 	    /* For example: %1 = %4 */ \
 	    insn \
 	"2: memw_locked(%3,p2) = %1;\n" \
-	"   if !p2 jump 1b;\n" \
+	"   if (!p2) jump 1b;\n" \
 	"   %1 = #0;\n" \
 	"3:\n" \
 	".section .fixup,\"ax\"\n" \
@@ -84,10 +84,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
 	"1: %1 = memw_locked(%3)\n"
 	"   {\n"
 	"      p2 = cmp.eq(%1,%4)\n"
-	"      if !p2.new jump:NT 3f\n"
+	"      if (!p2.new) jump:NT 3f\n"
 	"   }\n"
 	"2: memw_locked(%3,p2) = %5\n"
-	"   if !p2 jump 1b\n"
+	"   if (!p2) jump 1b\n"
 	"3:\n"
 	".section .fixup,\"ax\"\n"
 	"4: %0 = #%6\n"
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index ba1a444d55b3..bda2a9c2df78 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -27,7 +27,7 @@
 extern int remap_area_pages(unsigned long start, unsigned long phys_addr,
 				unsigned long end, unsigned long flags);
 
-extern void __iounmap(const volatile void __iomem *addr);
+extern void iounmap(const volatile void __iomem *addr);
 
 /* Defined in lib/io.c, needed for smc91x driver. */
 extern void __raw_readsw(const void __iomem *addr, void *data, int wordlen);
@@ -171,21 +171,9 @@ static inline void writel(u32 data, volatile void __iomem *addr)
 #define writew_relaxed __raw_writew
 #define writel_relaxed __raw_writel
 
-/*
- * Need an mtype somewhere in here, for cache type deals?
- * This is probably too long for an inline.
- */
-void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size);
+void __iomem *ioremap(unsigned long phys_addr, unsigned long size);
+#define ioremap_uc(X, Y) ioremap((X), (Y))
 
-static inline void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
-{
-	return ioremap_nocache(phys_addr, size);
-}
-
-static inline void iounmap(volatile void __iomem *addr)
-{
-	__iounmap(addr);
-}
 
 #define __raw_writel writel
 
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
index bfe07d842ff3..ef103b73bec8 100644
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -30,9 +30,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock)
 	__asm__ __volatile__(
 		"1:	R6 = memw_locked(%0);\n"
 		"	{ P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n"
-		"	{ if !P3 jump 1b; }\n"
+		"	{ if (!P3) jump 1b; }\n"
 		"	memw_locked(%0,P3) = R6;\n"
-		"	{ if !P3 jump 1b; }\n"
+		"	{ if (!P3) jump 1b; }\n"
 		:
 		: "r" (&lock->lock)
 		: "memory", "r6", "p3"
@@ -46,7 +46,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock)
 		"1:	R6 = memw_locked(%0);\n"
 		"	R6 = add(R6,#-1);\n"
 		"	memw_locked(%0,P3) = R6\n"
-		"	if !P3 jump 1b;\n"
+		"	if (!P3) jump 1b;\n"
 		:
 		: "r" (&lock->lock)
 		: "memory", "r6", "p3"
@@ -61,7 +61,7 @@ static inline int arch_read_trylock(arch_rwlock_t *lock)
 	__asm__ __volatile__(
 		"	R6 = memw_locked(%1);\n"
 		"	{ %0 = #0; P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n"
-		"	{ if !P3 jump 1f; }\n"
+		"	{ if (!P3) jump 1f; }\n"
 		"	memw_locked(%1,P3) = R6;\n"
 		"	{ %0 = P3 }\n"
 		"1:\n"
@@ -78,9 +78,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock)
 	__asm__ __volatile__(
 		"1:	R6 = memw_locked(%0)\n"
 		"	{ P3 = cmp.eq(R6,#0);  R6 = #-1;}\n"
-		"	{ if !P3 jump 1b; }\n"
+		"	{ if (!P3) jump 1b; }\n"
 		"	memw_locked(%0,P3) = R6;\n"
-		"	{ if !P3 jump 1b; }\n"
+		"	{ if (!P3) jump 1b; }\n"
 		:
 		: "r" (&lock->lock)
 		: "memory", "r6", "p3"
@@ -94,7 +94,7 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
 	__asm__ __volatile__(
 		"	R6 = memw_locked(%1)\n"
 		"	{ %0 = #0; P3 = cmp.eq(R6,#0);  R6 = #-1;}\n"
-		"	{ if !P3 jump 1f; }\n"
+		"	{ if (!P3) jump 1f; }\n"
 		"	memw_locked(%1,P3) = R6;\n"
 		"	%0 = P3;\n"
 		"1:\n"
@@ -117,9 +117,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 	__asm__ __volatile__(
 		"1:	R6 = memw_locked(%0);\n"
 		"	P3 = cmp.eq(R6,#0);\n"
-		"	{ if !P3 jump 1b; R6 = #1; }\n"
+		"	{ if (!P3) jump 1b; R6 = #1; }\n"
 		"	memw_locked(%0,P3) = R6;\n"
-		"	{ if !P3 jump 1b; }\n"
+		"	{ if (!P3) jump 1b; }\n"
 		:
 		: "r" (&lock->lock)
 		: "memory", "r6", "p3"
@@ -139,7 +139,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
 	__asm__ __volatile__(
 		"	R6 = memw_locked(%1);\n"
 		"	P3 = cmp.eq(R6,#0);\n"
-		"	{ if !P3 jump 1f; R6 = #1; %0 = #0; }\n"
+		"	{ if (!P3) jump 1f; R6 = #1; %0 = #0; }\n"
 		"	memw_locked(%1,P3) = R6;\n"
 		"	%0 = P3;\n"
 		"1:\n"
diff --git a/arch/hexagon/include/asm/vmalloc.h b/arch/hexagon/include/asm/vmalloc.h
new file mode 100644
index 000000000000..7b04609e525c
--- /dev/null
+++ b/arch/hexagon/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_HEXAGON_VMALLOC_H
+#define _ASM_HEXAGON_VMALLOC_H
+
+#endif /* _ASM_HEXAGON_VMALLOC_H */
diff --git a/arch/hexagon/include/uapi/asm/bitsperlong.h b/arch/hexagon/include/uapi/asm/bitsperlong.h
deleted file mode 100644
index 5adca0d26913..000000000000
--- a/arch/hexagon/include/uapi/asm/bitsperlong.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __ASM_HEXAGON_BITSPERLONG_H
-#define __ASM_HEXAGON_BITSPERLONG_H
-
-#define __BITS_PER_LONG 32
-
-#include <asm-generic/bitsperlong.h>
-
-#endif
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
index f561b127c4b4..25f388d9cfcc 100644
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -55,8 +55,8 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 	gen_pool_free(coherent_pool, (unsigned long) vaddr, size);
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	void *addr = phys_to_virt(paddr);
 
diff --git a/arch/hexagon/kernel/hexagon_ksyms.c b/arch/hexagon/kernel/hexagon_ksyms.c
index cf8974beb500..6fb1aaab1c29 100644
--- a/arch/hexagon/kernel/hexagon_ksyms.c
+++ b/arch/hexagon/kernel/hexagon_ksyms.c
@@ -14,13 +14,13 @@
 EXPORT_SYMBOL(__clear_user_hexagon);
 EXPORT_SYMBOL(raw_copy_from_user);
 EXPORT_SYMBOL(raw_copy_to_user);
-EXPORT_SYMBOL(__iounmap);
+EXPORT_SYMBOL(iounmap);
 EXPORT_SYMBOL(__strnlen_user);
 EXPORT_SYMBOL(__vmgetie);
 EXPORT_SYMBOL(__vmsetie);
 EXPORT_SYMBOL(__vmyield);
 EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(ioremap_nocache);
+EXPORT_SYMBOL(ioremap);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
 
diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c
index 35f29423fda8..5ed02f699479 100644
--- a/arch/hexagon/kernel/stacktrace.c
+++ b/arch/hexagon/kernel/stacktrace.c
@@ -11,8 +11,6 @@
 #include <linux/thread_info.h>
 #include <linux/module.h>
 
-register unsigned long current_frame_pointer asm("r30");
-
 struct stackframe {
 	unsigned long fp;
 	unsigned long rets;
@@ -30,7 +28,7 @@ void save_stack_trace(struct stack_trace *trace)
 
 	low = (unsigned long)task_stack_page(current);
 	high = low + THREAD_SIZE;
-	fp = current_frame_pointer;
+	fp = (unsigned long)__builtin_frame_address(0);
 
 	while (fp >= low && fp <= (high - sizeof(*frame))) {
 		frame = (struct stackframe *)fp;
diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S
index 12242c27e2df..554371d92bed 100644
--- a/arch/hexagon/kernel/vm_entry.S
+++ b/arch/hexagon/kernel/vm_entry.S
@@ -265,12 +265,12 @@ event_dispatch:
 	 * should be in the designated register (usually R19)
 	 *
 	 * If we were in kernel mode, we don't need to check scheduler
-	 * or signals if CONFIG_PREEMPT is not set.  If set, then it has
+	 * or signals if CONFIG_PREEMPTION is not set.  If set, then it has
 	 * to jump to a need_resched kind of block.
-	 * BTW, CONFIG_PREEMPT is not supported yet.
+	 * BTW, CONFIG_PREEMPTION is not supported yet.
 	 */
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	R0 = #VM_INT_DISABLE
 	trap1(#HVM_TRAP1_VMSETIE)
 #endif
@@ -369,7 +369,7 @@ ret_from_fork:
 		R26.L = #LO(do_work_pending);
 		R0 = #VM_INT_DISABLE;
 	}
-	if P0 jump check_work_pending
+	if (P0) jump check_work_pending
 	{
 		R0 = R25;
 		callr R24
diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S
index 78f2418e97c8..0ca2471ddb9f 100644
--- a/arch/hexagon/kernel/vmlinux.lds.S
+++ b/arch/hexagon/kernel/vmlinux.lds.S
@@ -49,12 +49,11 @@ SECTIONS
 	INIT_DATA_SECTION(PAGE_SIZE)
 
 	_sdata = .;
-		RW_DATA_SECTION(32,PAGE_SIZE,_THREAD_SIZE)
-		RO_DATA_SECTION(PAGE_SIZE)
+		RW_DATA(32,PAGE_SIZE,_THREAD_SIZE)
+		RO_DATA(PAGE_SIZE)
 	_edata = .;
 
 	EXCEPTION_TABLE(16)
-	NOTES
 
 	BSS_SECTION(_PAGE_SIZE, _PAGE_SIZE, _PAGE_SIZE)
 
diff --git a/arch/hexagon/mm/ioremap.c b/arch/hexagon/mm/ioremap.c
index 77d8e1e69e9b..255c5b1ee1a7 100644
--- a/arch/hexagon/mm/ioremap.c
+++ b/arch/hexagon/mm/ioremap.c
@@ -9,7 +9,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 
-void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
 {
 	unsigned long last_addr, addr;
 	unsigned long offset = phys_addr & ~PAGE_MASK;
@@ -38,7 +38,7 @@ void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 	return (void __iomem *) (offset + addr);
 }
 
-void __iounmap(const volatile void __iomem *addr)
+void iounmap(const volatile void __iomem *addr)
 {
 	vunmap((void *) ((unsigned long) addr & PAGE_MASK));
 }
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 16714477eef4..bab7cd878464 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -33,7 +33,7 @@ config IA64
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_VIRT_CPU_ACCOUNTING
-	select ARCH_HAS_DMA_COHERENT_TO_PFN
+	select DMA_NONCOHERENT_MMAP
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select VIRT_TO_BUS
 	select GENERIC_IRQ_PROBE
diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index f886d4dc9d55..b66ba907019c 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -38,7 +38,10 @@ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
 /* Low-level suspend routine. */
 extern int acpi_suspend_lowlevel(void);
 
-extern unsigned long acpi_wakeup_address;
+static inline unsigned long acpi_get_wakeup_address(void)
+{
+	return 0;
+}
 
 /*
  * Record the cpei override flag and current logical cpu. This is
diff --git a/arch/ia64/include/asm/agp.h b/arch/ia64/include/asm/agp.h
index 2b451c4496da..0261507dc264 100644
--- a/arch/ia64/include/asm/agp.h
+++ b/arch/ia64/include/asm/agp.h
@@ -14,8 +14,8 @@
  * in coherent mode, which lets us map the AGP memory as normal (write-back) memory
  * (unlike x86, where it gets mapped "write-coalescing").
  */
-#define map_page_into_agp(page)		/* nothing */
-#define unmap_page_from_agp(page)	/* nothing */
+#define map_page_into_agp(page)		do { } while (0)
+#define unmap_page_from_agp(page)	do { } while (0)
 #define flush_agp_cache()		mb()
 
 /* GATT allocation. Returns/accepts GATT kernel virtual address. */
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 54e70c21352a..3d666a11a2de 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -256,16 +256,15 @@ static inline void outsl(unsigned long port, const void *src,
 # ifdef __KERNEL__
 
 extern void __iomem * ioremap(unsigned long offset, unsigned long size);
-extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
+extern void __iomem * ioremap_uc(unsigned long offset, unsigned long size);
 extern void iounmap (volatile void __iomem *addr);
 static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned long size)
 {
 	return ioremap(phys_addr, size);
 }
 #define ioremap ioremap
-#define ioremap_nocache ioremap_nocache
 #define ioremap_cache ioremap_cache
-#define ioremap_uc ioremap_nocache
+#define ioremap_uc ioremap_uc
 #define iounmap iounmap
 
 /*
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
index 7904f591a79b..eb0db20c9d4c 100644
--- a/arch/ia64/include/asm/iommu.h
+++ b/arch/ia64/include/asm/iommu.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_IA64_IOMMU_H
 #define _ASM_IA64_IOMMU_H 1
 
+#include <linux/acpi.h>
+
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
 
@@ -9,6 +11,9 @@ extern void no_iommu_init(void);
 #ifdef	CONFIG_INTEL_IOMMU
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
+
+static inline int __init
+arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) { return 0; }
 #else
 #define no_iommu		(1)
 #define iommu_detected		(0)
diff --git a/arch/ia64/include/asm/irqflags.h b/arch/ia64/include/asm/irqflags.h
index d97f8435be4f..1dc30f12e545 100644
--- a/arch/ia64/include/asm/irqflags.h
+++ b/arch/ia64/include/asm/irqflags.h
@@ -36,11 +36,7 @@ static inline void arch_maybe_save_ip(unsigned long flags)
 static inline unsigned long arch_local_save_flags(void)
 {
 	ia64_stop();
-#ifdef CONFIG_PARAVIRT
-	return ia64_get_psr_i();
-#else
 	return ia64_getreg(_IA64_REG_PSR);
-#endif
 }
 
 static inline unsigned long arch_local_irq_save(void)
diff --git a/arch/ia64/include/asm/vga.h b/arch/ia64/include/asm/vga.h
index 30cb373f3de8..64ce0b971a0a 100644
--- a/arch/ia64/include/asm/vga.h
+++ b/arch/ia64/include/asm/vga.h
@@ -18,7 +18,7 @@
 extern unsigned long vga_console_iobase;
 extern unsigned long vga_console_membase;
 
-#define VGA_MAP_MEM(x,s)	((unsigned long) ioremap_nocache(vga_console_membase + (x), s))
+#define VGA_MAP_MEM(x,s)	((unsigned long) ioremap(vga_console_membase + (x), s))
 
 #define vga_readb(x)	(*(x))
 #define vga_writeb(x,y)	(*(y) = (x))
diff --git a/arch/ia64/include/asm/vmalloc.h b/arch/ia64/include/asm/vmalloc.h
new file mode 100644
index 000000000000..a2b51141ad28
--- /dev/null
+++ b/arch/ia64/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_IA64_VMALLOC_H
+#define _ASM_IA64_VMALLOC_H
+
+#endif /* _ASM_IA64_VMALLOC_H */
diff --git a/arch/ia64/include/uapi/asm/errno.h b/arch/ia64/include/uapi/asm/errno.h
deleted file mode 100644
index 9addba592646..000000000000
--- a/arch/ia64/include/uapi/asm/errno.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/errno.h>
diff --git a/arch/ia64/include/uapi/asm/gcc_intrin.h b/arch/ia64/include/uapi/asm/gcc_intrin.h
index c60696fd1e37..ecfa3eadb217 100644
--- a/arch/ia64/include/uapi/asm/gcc_intrin.h
+++ b/arch/ia64/include/uapi/asm/gcc_intrin.h
@@ -31,7 +31,7 @@ extern void ia64_bad_param_for_setreg (void);
 extern void ia64_bad_param_for_getreg (void);
 
 
-#define ia64_native_setreg(regnum, val)						\
+#define ia64_setreg(regnum, val)						\
 ({										\
 	switch (regnum) {							\
 	    case _IA64_REG_PSR_L:						\
@@ -60,7 +60,7 @@ extern void ia64_bad_param_for_getreg (void);
 	}									\
 })
 
-#define ia64_native_getreg(regnum)						\
+#define ia64_getreg(regnum)							\
 ({										\
 	__u64 ia64_intri_res;							\
 										\
@@ -384,7 +384,7 @@ extern void ia64_bad_param_for_getreg (void);
 
 #define ia64_invala() asm volatile ("invala" ::: "memory")
 
-#define ia64_native_thash(addr)							\
+#define ia64_thash(addr)							\
 ({										\
 	unsigned long ia64_intri_res;						\
 	asm volatile ("thash %0=%1" : "=r"(ia64_intri_res) : "r" (addr));	\
@@ -437,10 +437,10 @@ extern void ia64_bad_param_for_getreg (void);
 #define ia64_set_pmd(index, val)						\
 	asm volatile ("mov pmd[%0]=%1" :: "r"(index), "r"(val) : "memory")
 
-#define ia64_native_set_rr(index, val)							\
+#define ia64_set_rr(index, val)							\
 	asm volatile ("mov rr[%0]=%1" :: "r"(index), "r"(val) : "memory");
 
-#define ia64_native_get_cpuid(index)							\
+#define ia64_get_cpuid(index)								\
 ({											\
 	unsigned long ia64_intri_res;							\
 	asm volatile ("mov %0=cpuid[%r1]" : "=r"(ia64_intri_res) : "rO"(index));	\
@@ -476,33 +476,33 @@ extern void ia64_bad_param_for_getreg (void);
 })
 
 
-#define ia64_native_get_pmd(index)						\
+#define ia64_get_pmd(index)							\
 ({										\
 	unsigned long ia64_intri_res;						\
 	asm volatile ("mov %0=pmd[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
 	ia64_intri_res;								\
 })
 
-#define ia64_native_get_rr(index)						\
+#define ia64_get_rr(index)							\
 ({										\
 	unsigned long ia64_intri_res;						\
 	asm volatile ("mov %0=rr[%1]" : "=r"(ia64_intri_res) : "r" (index));	\
 	ia64_intri_res;								\
 })
 
-#define ia64_native_fc(addr)	asm volatile ("fc %0" :: "r"(addr) : "memory")
+#define ia64_fc(addr)	asm volatile ("fc %0" :: "r"(addr) : "memory")
 
 
 #define ia64_sync_i()	asm volatile (";; sync.i" ::: "memory")
 
-#define ia64_native_ssm(mask)	asm volatile ("ssm %0":: "i"((mask)) : "memory")
-#define ia64_native_rsm(mask)	asm volatile ("rsm %0":: "i"((mask)) : "memory")
+#define ia64_ssm(mask)	asm volatile ("ssm %0":: "i"((mask)) : "memory")
+#define ia64_rsm(mask)	asm volatile ("rsm %0":: "i"((mask)) : "memory")
 #define ia64_sum(mask)	asm volatile ("sum %0":: "i"((mask)) : "memory")
 #define ia64_rum(mask)	asm volatile ("rum %0":: "i"((mask)) : "memory")
 
 #define ia64_ptce(addr)	asm volatile ("ptc.e %0" :: "r"(addr))
 
-#define ia64_native_ptcga(addr, size)						\
+#define ia64_ptcga(addr, size)							\
 do {										\
 	asm volatile ("ptc.ga %0,%1" :: "r"(addr), "r"(size) : "memory");	\
 	ia64_dv_serialize_data();						\
@@ -607,7 +607,7 @@ do {										\
         }								\
 })
 
-#define ia64_native_intrin_local_irq_restore(x)			\
+#define ia64_intrin_local_irq_restore(x)			\
 do {								\
 	asm volatile (";;   cmp.ne p6,p7=%0,r0;;"		\
 		      "(p6) ssm psr.i;"				\
diff --git a/arch/ia64/include/uapi/asm/intel_intrin.h b/arch/ia64/include/uapi/asm/intel_intrin.h
index ab649691545a..dc1884dc54b5 100644
--- a/arch/ia64/include/uapi/asm/intel_intrin.h
+++ b/arch/ia64/include/uapi/asm/intel_intrin.h
@@ -17,8 +17,8 @@
 		 	 * intrinsic
 		 	 */
 
-#define ia64_native_getreg	__getReg
-#define ia64_native_setreg	__setReg
+#define ia64_getreg		__getReg
+#define ia64_setreg		__setReg
 
 #define ia64_hint		__hint
 #define ia64_hint_pause		__hint_pause
@@ -40,10 +40,10 @@
 #define ia64_invala_fr		__invala_fr
 #define ia64_nop		__nop
 #define ia64_sum		__sum
-#define ia64_native_ssm		__ssm
+#define ia64_ssm		__ssm
 #define ia64_rum		__rum
-#define ia64_native_rsm		__rsm
-#define ia64_native_fc 		__fc
+#define ia64_rsm		__rsm
+#define ia64_fc			__fc
 
 #define ia64_ldfs		__ldfs
 #define ia64_ldfd		__ldfd
@@ -89,17 +89,17 @@
 		__setIndReg(_IA64_REG_INDR_PMC, index, val)
 #define ia64_set_pmd(index, val)	\
 		__setIndReg(_IA64_REG_INDR_PMD, index, val)
-#define ia64_native_set_rr(index, val)	\
+#define ia64_set_rr(index, val)		\
 		__setIndReg(_IA64_REG_INDR_RR, index, val)
 
-#define ia64_native_get_cpuid(index)	\
+#define ia64_get_cpuid(index)	\
 		__getIndReg(_IA64_REG_INDR_CPUID, index)
 #define __ia64_get_dbr(index)		__getIndReg(_IA64_REG_INDR_DBR, index)
 #define ia64_get_ibr(index)		__getIndReg(_IA64_REG_INDR_IBR, index)
 #define ia64_get_pkr(index)		__getIndReg(_IA64_REG_INDR_PKR, index)
 #define ia64_get_pmc(index)		__getIndReg(_IA64_REG_INDR_PMC, index)
-#define ia64_native_get_pmd(index)	__getIndReg(_IA64_REG_INDR_PMD, index)
-#define ia64_native_get_rr(index)	__getIndReg(_IA64_REG_INDR_RR, index)
+#define ia64_get_pmd(index)		__getIndReg(_IA64_REG_INDR_PMD, index)
+#define ia64_get_rr(index)		__getIndReg(_IA64_REG_INDR_RR, index)
 
 #define ia64_srlz_d		__dsrlz
 #define ia64_srlz_i		__isrlz
@@ -121,16 +121,16 @@
 #define ia64_ld8_acq		__ld8_acq
 
 #define ia64_sync_i		__synci
-#define ia64_native_thash	__thash
-#define ia64_native_ttag	__ttag
+#define ia64_thash		__thash
+#define ia64_ttag		__ttag
 #define ia64_itcd		__itcd
 #define ia64_itci		__itci
 #define ia64_itrd		__itrd
 #define ia64_itri		__itri
 #define ia64_ptce		__ptce
 #define ia64_ptcl		__ptcl
-#define ia64_native_ptcg	__ptcg
-#define ia64_native_ptcga	__ptcga
+#define ia64_ptcg		__ptcg
+#define ia64_ptcga		__ptcga
 #define ia64_ptri		__ptri
 #define ia64_ptrd		__ptrd
 #define ia64_dep_mi		_m64_dep_mi
@@ -147,13 +147,13 @@
 #define ia64_lfetch_fault	__lfetch_fault
 #define ia64_lfetch_fault_excl	__lfetch_fault_excl
 
-#define ia64_native_intrin_local_irq_restore(x)		\
+#define ia64_intrin_local_irq_restore(x)		\
 do {							\
 	if ((x) != 0) {					\
-		ia64_native_ssm(IA64_PSR_I);		\
+		ia64_ssm(IA64_PSR_I);			\
 		ia64_srlz_d();				\
 	} else {					\
-		ia64_native_rsm(IA64_PSR_I);		\
+		ia64_rsm(IA64_PSR_I);			\
 	}						\
 } while (0)
 
diff --git a/arch/ia64/include/uapi/asm/intrinsics.h b/arch/ia64/include/uapi/asm/intrinsics.h
index aecc217eca63..a0e0a064f5b1 100644
--- a/arch/ia64/include/uapi/asm/intrinsics.h
+++ b/arch/ia64/include/uapi/asm/intrinsics.h
@@ -21,15 +21,13 @@
 #endif
 #include <asm/cmpxchg.h>
 
-#define ia64_native_get_psr_i()	(ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I)
-
-#define ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4)	\
+#define ia64_set_rr0_to_rr4(val0, val1, val2, val3, val4)		\
 do {									\
-	ia64_native_set_rr(0x0000000000000000UL, (val0));		\
-	ia64_native_set_rr(0x2000000000000000UL, (val1));		\
-	ia64_native_set_rr(0x4000000000000000UL, (val2));		\
-	ia64_native_set_rr(0x6000000000000000UL, (val3));		\
-	ia64_native_set_rr(0x8000000000000000UL, (val4));		\
+	ia64_set_rr(0x0000000000000000UL, (val0));			\
+	ia64_set_rr(0x2000000000000000UL, (val1));			\
+	ia64_set_rr(0x4000000000000000UL, (val2));			\
+	ia64_set_rr(0x6000000000000000UL, (val3));			\
+	ia64_set_rr(0x8000000000000000UL, (val4));			\
 } while (0)
 
 /*
@@ -85,41 +83,4 @@ extern unsigned long __bad_increment_for_ia64_fetch_and_add (void);
 
 #endif
 
-
-#ifndef __ASSEMBLY__
-
-#define IA64_INTRINSIC_API(name)	ia64_native_ ## name
-#define IA64_INTRINSIC_MACRO(name)	ia64_native_ ## name
-
-
-/************************************************/
-/* Instructions paravirtualized for correctness */
-/************************************************/
-/* fc, thash, get_cpuid, get_pmd, get_eflags, set_eflags */
-/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
- * is not currently used (though it may be in a long-format VHPT system!)
- */
-#define ia64_fc				IA64_INTRINSIC_API(fc)
-#define ia64_thash			IA64_INTRINSIC_API(thash)
-#define ia64_get_cpuid			IA64_INTRINSIC_API(get_cpuid)
-#define ia64_get_pmd			IA64_INTRINSIC_API(get_pmd)
-
-
-/************************************************/
-/* Instructions paravirtualized for performance */
-/************************************************/
-#define ia64_ssm			IA64_INTRINSIC_MACRO(ssm)
-#define ia64_rsm			IA64_INTRINSIC_MACRO(rsm)
-#define ia64_getreg			IA64_INTRINSIC_MACRO(getreg)
-#define ia64_setreg			IA64_INTRINSIC_API(setreg)
-#define ia64_set_rr			IA64_INTRINSIC_API(set_rr)
-#define ia64_get_rr			IA64_INTRINSIC_API(get_rr)
-#define ia64_ptcga			IA64_INTRINSIC_API(ptcga)
-#define ia64_get_psr_i			IA64_INTRINSIC_API(get_psr_i)
-#define ia64_intrin_local_irq_restore	\
-	IA64_INTRINSIC_API(intrin_local_irq_restore)
-#define ia64_set_rr0_to_rr4		IA64_INTRINSIC_API(set_rr0_to_rr4)
-
-#endif /* !__ASSEMBLY__ */
-
 #endif /* _UAPI_ASM_IA64_INTRINSICS_H */
diff --git a/arch/ia64/include/uapi/asm/ioctl.h b/arch/ia64/include/uapi/asm/ioctl.h
deleted file mode 100644
index b809c4566e5f..000000000000
--- a/arch/ia64/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/ioctl.h>
diff --git a/arch/ia64/include/uapi/asm/ioctls.h b/arch/ia64/include/uapi/asm/ioctls.h
deleted file mode 100644
index b86001940209..000000000000
--- a/arch/ia64/include/uapi/asm/ioctls.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_IA64_IOCTLS_H
-#define _ASM_IA64_IOCTLS_H
-
-#include <asm-generic/ioctls.h>
-
-#endif /* _ASM_IA64_IOCTLS_H */
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 70d1587ddcd4..a5636524af76 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -42,8 +42,6 @@ int acpi_lapic;
 unsigned int acpi_cpei_override;
 unsigned int acpi_cpei_phys_cpuid;
 
-unsigned long acpi_wakeup_address = 0;
-
 #define ACPI_MAX_PLATFORM_INTERRUPTS	256
 
 /* Array to record platform interrupt vectors for generic interrupt routing. */
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 00e8e2a1eb19..fb0deb8a4221 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -211,7 +211,7 @@ void foo(void)
 	       offsetof (struct cpuinfo_ia64, ptce_stride));
 	BLANK();
 	DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET,
-	       offsetof (struct timespec, tv_nsec));
+	       offsetof (struct __kernel_old_timespec, tv_nsec));
 	DEFINE(IA64_TIME_SN_SPEC_SNSEC_OFFSET,
 	       offsetof (struct time_sn_spec, snsec));
 
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index f80eb7fb544d..258d7b70c0f3 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -50,7 +50,7 @@ int __init init_cyclone_clock(void)
 
 	/* find base address */
 	offset = (CYCLONE_CBAR_ADDR);
-	reg = ioremap_nocache(offset, sizeof(u64));
+	reg = ioremap(offset, sizeof(u64));
 	if(!reg){
 		printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
 				" register.\n");
@@ -68,7 +68,7 @@ int __init init_cyclone_clock(void)
 
 	/* setup PMCC */
 	offset = (base + CYCLONE_PMCC_OFFSET);
-	reg = ioremap_nocache(offset, sizeof(u64));
+	reg = ioremap(offset, sizeof(u64));
 	if(!reg){
 		printk(KERN_ERR "Summit chipset: Could not find valid PMCC"
 				" register.\n");
@@ -80,7 +80,7 @@ int __init init_cyclone_clock(void)
 
 	/* setup MPCS */
 	offset = (base + CYCLONE_MPCS_OFFSET);
-	reg = ioremap_nocache(offset, sizeof(u64));
+	reg = ioremap(offset, sizeof(u64));
 	if(!reg){
 		printk(KERN_ERR "Summit chipset: Could not find valid MPCS"
 				" register.\n");
@@ -92,7 +92,7 @@ int __init init_cyclone_clock(void)
 
 	/* map in cyclone_timer */
 	offset = (base + CYCLONE_MPMC_OFFSET);
-	cyclone_timer = ioremap_nocache(offset, sizeof(u32));
+	cyclone_timer = ioremap(offset, sizeof(u32));
 	if(!cyclone_timer){
 		printk(KERN_ERR "Summit chipset: Could not find valid MPMC"
 				" register.\n");
diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c
index 4a3262795890..09ef9ce9988d 100644
--- a/arch/ia64/kernel/dma-mapping.c
+++ b/arch/ia64/kernel/dma-mapping.c
@@ -19,9 +19,3 @@ void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
 {
 	dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
 }
-
-long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
-		dma_addr_t dma_addr)
-{
-	return page_to_pfn(virt_to_page(cpu_addr));
-}
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index a9992be5718b..2ac926331500 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -670,12 +670,12 @@ GLOBAL_ENTRY(ia64_leave_syscall)
 	 *
 	 * p6 controls whether current_thread_info()->flags needs to be check for
 	 * extra work.  We always check for extra work when returning to user-level.
-	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+	 * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count
 	 * is 0.  After extra work processing has been completed, execution
 	 * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check
 	 * needs to be redone.
 	 */
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	RSM_PSR_I(p0, r2, r18)			// disable interrupts
 	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
 (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
@@ -685,7 +685,7 @@ GLOBAL_ENTRY(ia64_leave_syscall)
 (pUStk)	mov r21=0			// r21 <- 0
 	;;
 	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
-#else /* !CONFIG_PREEMPT */
+#else /* !CONFIG_PREEMPTION */
 	RSM_PSR_I(pUStk, r2, r18)
 	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
 (pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
@@ -814,12 +814,12 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	 *
 	 * p6 controls whether current_thread_info()->flags needs to be check for
 	 * extra work.  We always check for extra work when returning to user-level.
-	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+	 * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count
 	 * is 0.  After extra work processing has been completed, execution
 	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
 	 * needs to be redone.
 	 */
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	RSM_PSR_I(p0, r17, r31)			// disable interrupts
 	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
 (pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
@@ -1120,7 +1120,7 @@ skip_rbs_switch:
 
 	/*
 	 * On entry:
-	 *	r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
+	 *	r20 = &current->thread_info->pre_count (if CONFIG_PREEMPTION)
 	 *	r31 = current->thread_info->flags
 	 * On exit:
 	 *	p6 = TRUE if work-pending-check needs to be redone
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index b8356edbde65..a6d6a0556f08 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -841,7 +841,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 		return 1;
 	}
 
-#if !defined(CONFIG_PREEMPT)
+#if !defined(CONFIG_PREEMPTION)
 	if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
 		/* Boost up -- we can execute copied instructions directly */
 		ia64_psr(regs)->ri = p->ainsn.slot;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index bb320c6d0cc9..4009383453f7 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -289,7 +289,7 @@ static void __init setup_crashkernel(unsigned long total, int *n)
 		}
 
 		if (!check_crashkernel_memory(base, size)) {
-			pr_warning("crashkernel: There would be kdump memory "
+			pr_warn("crashkernel: There would be kdump memory "
 				"at %ld GB but this is unusable because it "
 				"must\nbe below 4 GB. Change the memory "
 				"configuration of the machine.\n",
@@ -608,9 +608,6 @@ setup_arch (char **cmdline_p)
 
 #ifdef CONFIG_VT
 	if (!conswitchp) {
-# if defined(CONFIG_DUMMY_CONSOLE)
-		conswitchp = &dummy_con;
-# endif
 # if defined(CONFIG_VGA_CONSOLE)
 		/*
 		 * Non-legacy systems may route legacy VGA MMIO range to system
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 36d5faf4c86c..042911e670b8 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -356,3 +356,5 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 # 435 reserved for clone3
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 1e95d32c8877..91b4024c9351 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -132,7 +132,7 @@ static __u64 vtime_delta(struct task_struct *tsk)
 	return delta_stime;
 }
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 {
 	struct thread_info *ti = task_thread_info(tsk);
 	__u64 stime = vtime_delta(tsk);
@@ -146,7 +146,7 @@ void vtime_account_system(struct task_struct *tsk)
 	else
 		ti->stime += stime;
 }
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index d9d4e21107cd..1ec6b703c5b4 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -5,6 +5,9 @@
 #include <asm/pgtable.h>
 #include <asm/thread_info.h>
 
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
 #include <asm-generic/vmlinux.lds.h>
 
 OUTPUT_FORMAT("elf64-ia64-little")
@@ -13,7 +16,7 @@ ENTRY(phys_start)
 jiffies = jiffies_64;
 
 PHDRS {
-	code   PT_LOAD;
+	text   PT_LOAD;
 	percpu PT_LOAD;
 	data   PT_LOAD;
 	note   PT_NOTE;
@@ -36,7 +39,7 @@ SECTIONS {
 	phys_start = _start - LOAD_OFFSET;
 
 	code : {
-	} :code
+	} :text
 	. = KERNEL_START;
 
 	_text = .;
@@ -68,11 +71,6 @@ SECTIONS {
 	/*
 	 * Read-only data
 	 */
-	NOTES :code :note       /* put .notes in text and mark in PT_NOTE  */
-	code_continues : {
-	} : code               /* switch back to regular program...  */
-
-	EXCEPTION_TABLE(16)
 
 	/* MCA table */
 	. = ALIGN(16);
@@ -102,11 +100,11 @@ SECTIONS {
 		__start_unwind = .;
 		*(.IA_64.unwind*)
 		__end_unwind = .;
-	} :code :unwind
+	} :text :unwind
 	code_continues2 : {
-	} : code
+	} :text
 
-	RODATA
+	RO_DATA(4096)
 
 	.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
 		__start_opd = .;
@@ -214,7 +212,7 @@ SECTIONS {
 	_end = .;
 
 	code : {
-	} :code
+	} :text
 
 	STABS_DEBUG
 	DWARF_DEBUG
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index bf9df2625bc8..b01d68a2d5d9 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -73,8 +73,8 @@ __ia64_sync_icache_dcache (pte_t pte)
  * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
  * flush them when they get mapped into an executable vm-area.
  */
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	unsigned long pfn = PHYS_PFN(paddr);
 
@@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct zone *zone;
 
-	zone = page_zone(pfn_to_page(start_pfn));
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 }
 #endif
diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c
index 0c0de2c4ec69..a09cfa064536 100644
--- a/arch/ia64/mm/ioremap.c
+++ b/arch/ia64/mm/ioremap.c
@@ -99,14 +99,14 @@ ioremap (unsigned long phys_addr, unsigned long size)
 EXPORT_SYMBOL(ioremap);
 
 void __iomem *
-ioremap_nocache (unsigned long phys_addr, unsigned long size)
+ioremap_uc(unsigned long phys_addr, unsigned long size)
 {
 	if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB)
 		return NULL;
 
 	return __ioremap_uc(phys_addr);
 }
-EXPORT_SYMBOL(ioremap_nocache);
+EXPORT_SYMBOL(ioremap_uc);
 
 void
 early_iounmap (volatile void __iomem *addr, unsigned long size)
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 6663f1741798..6ad6cdac74b3 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -14,6 +14,7 @@ config M68K
 	select HAVE_AOUT if MMU
 	select HAVE_ASM_MODVERSIONS
 	select HAVE_DEBUG_BUGVERBOSE
+	select HAVE_COPY_THREAD_TLS
 	select GENERIC_IRQ_SHOW
 	select GENERIC_ATOMIC64
 	select HAVE_UID16
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 73bf5ea9ee1b..7ec3161e8517 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -869,8 +869,28 @@ static const struct resource atari_scsi_tt_rsrc[] __initconst = {
 };
 #endif
 
+/*
+ * Falcon IDE interface
+ */
+
+#define FALCON_IDE_BASE	0xfff00000
+
+static const struct resource atari_falconide_rsrc[] __initconst = {
+	{
+		.flags = IORESOURCE_MEM,
+		.start = FALCON_IDE_BASE,
+		.end   = FALCON_IDE_BASE + 0x39,
+	},
+	{
+		.flags = IORESOURCE_IRQ,
+		.start = IRQ_MFP_FSCSI,
+		.end   = IRQ_MFP_FSCSI,
+	},
+};
+
 int __init atari_platform_init(void)
 {
+	struct platform_device *pdev;
 	int rv = 0;
 
 	if (!MACH_IS_ATARI)
@@ -912,6 +932,13 @@ int __init atari_platform_init(void)
 			atari_scsi_tt_rsrc, ARRAY_SIZE(atari_scsi_tt_rsrc));
 #endif
 
+	if (ATARIHW_PRESENT(IDE)) {
+		pdev = platform_device_register_simple("atari-falcon-ide", -1,
+			atari_falconide_rsrc, ARRAY_SIZE(atari_falconide_rsrc));
+		if (IS_ERR(pdev))
+			rv = PTR_ERR(pdev);
+	}
+
 	return rv;
 }
 
diff --git a/arch/m68k/coldfire/entry.S b/arch/m68k/coldfire/entry.S
index 52d312d5b4d4..d43a02795a4a 100644
--- a/arch/m68k/coldfire/entry.S
+++ b/arch/m68k/coldfire/entry.S
@@ -108,7 +108,7 @@ ret_from_exception:
 	btst	#5,%sp@(PT_OFF_SR)	/* check if returning to kernel */
 	jeq	Luser_return		/* if so, skip resched, signals */
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	movel	%sp,%d1			/* get thread_info pointer */
 	andl	#-THREAD_SIZE,%d1	/* at base of kernel stack */
 	movel	%d1,%a0
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 9a33c1c006a1..e1134c3e0b69 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -355,6 +355,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -420,11 +421,15 @@ CONFIG_INPUT_M68K_BEEP=m
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_PRINTER=m
 # CONFIG_HW_RANDOM is not set
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_ICY=m
 CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PPS_CLIENT_PARPORT=m
 CONFIG_PTP_1588_CLOCK=m
-# CONFIG_HWMON is not set
+CONFIG_HWMON=m
+CONFIG_SENSORS_LTC2990=m
 CONFIG_FB=y
 CONFIG_FB_CIRRUS=y
 CONFIG_FB_AMIGA=y
@@ -432,8 +437,6 @@ CONFIG_FB_AMIGA_OCS=y
 CONFIG_FB_AMIGA_ECS=y
 CONFIG_FB_AMIGA_AGA=y
 CONFIG_FB_FM2=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
@@ -490,6 +493,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -558,12 +562,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -574,7 +575,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -612,6 +613,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
@@ -620,6 +624,7 @@ CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
@@ -651,4 +656,3 @@ CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
 CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 7fdbc797a05d..484cb1643df1 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -334,6 +334,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -393,8 +394,6 @@ CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_VGA16 is not set
@@ -450,6 +449,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -518,12 +518,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -534,7 +531,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -572,6 +569,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
@@ -580,6 +580,7 @@ CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
@@ -611,4 +612,3 @@ CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
 CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index f1763405a539..eb6a46b6d135 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -350,6 +350,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -417,8 +418,6 @@ CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FB_ATARI=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
@@ -472,6 +471,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -540,12 +540,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -556,7 +553,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -594,6 +591,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
@@ -602,6 +602,7 @@ CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
@@ -633,4 +634,3 @@ CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
 CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 91154d6acb31..bee9263a409c 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -332,6 +332,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -390,8 +391,6 @@ CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
@@ -443,6 +442,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -511,12 +511,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -527,7 +524,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -565,6 +562,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
@@ -573,6 +573,7 @@ CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
@@ -604,4 +605,3 @@ CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
 CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index c398c4a94d95..c8847a8bcbd6 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -333,6 +333,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -395,8 +396,6 @@ CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
@@ -452,6 +451,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -520,12 +520,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -536,7 +533,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -574,6 +571,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
@@ -582,6 +582,7 @@ CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
@@ -613,4 +614,3 @@ CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
 CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig
index 434bd3750966..579fd98afed6 100644
--- a/arch/m68k/configs/m5475evb_defconfig
+++ b/arch/m68k/configs/m5475evb_defconfig
@@ -1,6 +1,5 @@
 # CONFIG_SWAP is not set
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_KALLSYMS is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 350d004559be..303ffafd9cad 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -342,6 +342,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -419,8 +420,6 @@ CONFIG_PTP_1588_CLOCK=m
 CONFIG_FB=y
 CONFIG_FB_VALKYRIE=y
 CONFIG_FB_MAC=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_HID=m
@@ -474,6 +473,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -542,12 +542,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -558,7 +555,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -596,6 +593,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
@@ -604,6 +604,7 @@ CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
@@ -635,4 +636,3 @@ CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
 CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index b838dd820348..89a704226cd9 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -386,6 +386,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -480,11 +481,15 @@ CONFIG_SERIAL_PMACZILOG_TTYS=y
 CONFIG_SERIAL_PMACZILOG_CONSOLE=y
 CONFIG_PRINTER=m
 # CONFIG_HW_RANDOM is not set
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_ICY=m
 CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PPS_CLIENT_PARPORT=m
 CONFIG_PTP_1588_CLOCK=m
-# CONFIG_HWMON is not set
+CONFIG_HWMON=m
+CONFIG_SENSORS_LTC2990=m
 CONFIG_FB=y
 CONFIG_FB_CIRRUS=y
 CONFIG_FB_AMIGA=y
@@ -495,8 +500,6 @@ CONFIG_FB_FM2=y
 CONFIG_FB_ATARI=y
 CONFIG_FB_VALKYRIE=y
 CONFIG_FB_MAC=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
@@ -556,6 +559,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -624,12 +628,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -640,7 +641,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -678,6 +679,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
@@ -686,6 +690,7 @@ CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
@@ -717,4 +722,3 @@ CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
 CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 3f8dd61559cf..f62c1f4d03a0 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -331,6 +331,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -389,8 +390,6 @@ CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
@@ -442,6 +441,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -510,12 +510,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -526,7 +523,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -564,6 +561,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
@@ -572,6 +572,7 @@ CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
@@ -603,4 +604,3 @@ CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
 CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index ae3b2d4f636c..58dcad26a751 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -332,6 +332,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -390,8 +391,6 @@ CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
@@ -443,6 +442,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -511,12 +511,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -527,7 +524,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -565,6 +562,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
@@ -573,6 +573,7 @@ CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
@@ -604,4 +605,3 @@ CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
 CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index cd61ef14b582..5d3c28d1d545 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -339,6 +339,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -404,8 +405,6 @@ CONFIG_PPS_CLIENT_PARPORT=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
@@ -461,6 +460,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -529,12 +529,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -545,7 +542,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -583,6 +580,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
@@ -591,6 +591,7 @@ CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
@@ -622,4 +623,3 @@ CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
 CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 151f5371cd3d..5ef9e17dcd51 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -329,6 +329,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -390,8 +391,6 @@ CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_HID=m
@@ -445,6 +444,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -513,12 +513,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -529,7 +526,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -567,6 +564,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 1dcb0ee1fe98..22e1accc60a3 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -329,6 +329,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -389,8 +390,6 @@ CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_HID=m
@@ -444,6 +443,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -512,12 +512,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
@@ -528,7 +525,7 @@ CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -566,6 +563,9 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
@@ -574,6 +574,7 @@ CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
@@ -605,4 +606,3 @@ CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
 CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/emu/nfeth.c b/arch/m68k/emu/nfeth.c
index a4ebd2445eda..d2875e32abfc 100644
--- a/arch/m68k/emu/nfeth.c
+++ b/arch/m68k/emu/nfeth.c
@@ -167,7 +167,7 @@ static int nfeth_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static void nfeth_tx_timeout(struct net_device *dev)
+static void nfeth_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	dev->stats.tx_errors++;
 	netif_wake_queue(dev);
diff --git a/arch/m68k/include/asm/kmap.h b/arch/m68k/include/asm/kmap.h
index 421b6c9c769d..dec05743d426 100644
--- a/arch/m68k/include/asm/kmap.h
+++ b/arch/m68k/include/asm/kmap.h
@@ -20,7 +20,6 @@ extern void __iomem *__ioremap(unsigned long physaddr, unsigned long size,
 			       int cacheflag);
 #define iounmap iounmap
 extern void iounmap(void __iomem *addr);
-extern void __iounmap(void *addr, unsigned long size);
 
 #define ioremap ioremap
 static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size)
@@ -28,7 +27,6 @@ static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size)
 	return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
 }
 
-#define ioremap_nocache ioremap
 #define ioremap_uc ioremap
 #define ioremap_wt ioremap_wt
 static inline void __iomem *ioremap_wt(unsigned long physaddr,
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index b34d44d666a4..82ec54c2eaa4 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -28,9 +28,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 	return (pmd_t *) pgd;
 }
 
-#define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
-#define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
-
 #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
 	(unsigned long)(page_address(page)))
 
@@ -45,8 +42,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 	__free_page(page);
 }
 
-#define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
-
 static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_DMA, 0);
@@ -100,6 +95,4 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	return new_pgd;
 }
 
-#define pgd_populate(mm, pmd, pte) BUG()
-
 #endif /* M68K_MCF_PGALLOC_H */
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index 5d5502cb2b2d..b9f45aeded25 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -198,17 +198,9 @@ static inline int pmd_bad2(pmd_t *pmd) { return 0; }
 #define pmd_present(pmd) (!pmd_none2(&(pmd)))
 static inline void pmd_clear(pmd_t *pmdp) { pmd_val(*pmdp) = 0; }
 
-static inline int pgd_none(pgd_t pgd) { return 0; }
-static inline int pgd_bad(pgd_t pgd) { return 0; }
-static inline int pgd_present(pgd_t pgd) { return 1; }
-static inline void pgd_clear(pgd_t *pgdp) {}
-
 #define pte_ERROR(e) \
 	printk(KERN_ERR "%s:%d: bad pte %08lx.\n",	\
 	__FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
-	printk(KERN_ERR "%s:%d: bad pmd %08lx.\n",	\
-	__FILE__, __LINE__, pmd_val(e))
 #define pgd_ERROR(e) \
 	printk(KERN_ERR "%s:%d: bad pgd %08lx.\n",	\
 	__FILE__, __LINE__, pgd_val(e))
@@ -340,14 +332,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
 #define pgd_offset_k(address)	pgd_offset(&init_mm, address)
 
 /*
- * Find an entry in the second-level pagetable.
- */
-static inline pmd_t *pmd_offset(pgd_t *pgd, unsigned long address)
-{
-	return (pmd_t *) pgd;
-}
-
-/*
  * Find an entry in the third-level pagetable.
  */
 #define __pte_offset(address)	((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
@@ -360,12 +344,16 @@ static inline pmd_t *pmd_offset(pgd_t *pgd, unsigned long address)
 static inline void nocache_page(void *vaddr)
 {
 	pgd_t *dir;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 	unsigned long addr = (unsigned long) vaddr;
 
 	dir = pgd_offset_k(addr);
-	pmdp = pmd_offset(dir, addr);
+	p4dp = p4d_offset(dir, addr);
+	pudp = pud_offset(p4dp, addr);
+	pmdp = pmd_offset(pudp, addr);
 	ptep = pte_offset_kernel(pmdp, addr);
 	*ptep = pte_mknocache(*ptep);
 }
@@ -376,12 +364,16 @@ static inline void nocache_page(void *vaddr)
 static inline void cache_page(void *vaddr)
 {
 	pgd_t *dir;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 	unsigned long addr = (unsigned long) vaddr;
 
 	dir = pgd_offset_k(addr);
-	pmdp = pmd_offset(dir, addr);
+	p4dp = p4d_offset(dir, addr);
+	pudp = pud_offset(p4dp, addr);
+	pmdp = pmd_offset(pudp, addr);
 	ptep = pte_offset_kernel(pmdp, addr);
 	*ptep = pte_mkcache(*ptep);
 }
diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h
index f5b1852b4663..cac9f289d1f6 100644
--- a/arch/m68k/include/asm/mmu_context.h
+++ b/arch/m68k/include/asm/mmu_context.h
@@ -100,6 +100,8 @@ static inline void load_ksp_mmu(struct task_struct *task)
 	struct mm_struct *mm;
 	int asid;
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned long mmuar;
@@ -127,7 +129,15 @@ static inline void load_ksp_mmu(struct task_struct *task)
 	if (pgd_none(*pgd))
 		goto bug;
 
-	pmd = pmd_offset(pgd, mmuar);
+	p4d = p4d_offset(pgd, mmuar);
+	if (p4d_none(*p4d))
+		goto bug;
+
+	pud = pud_offset(p4d, mmuar);
+	if (pud_none(*pud))
+		goto bug;
+
+	pmd = pmd_offset(pud, mmuar);
 	if (pmd_none(*pmd))
 		goto bug;
 
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index acab315c851f..ff9cc401ffd1 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -106,9 +106,9 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
-	pgd_set(pgd, pmd);
+	pud_set(pud, pmd);
 }
 
 #endif /* _MOTOROLA_PGALLOC_H */
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index 7f66a7bad7a5..62bedc61f110 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -117,14 +117,14 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 	}
 }
 
-static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
+static inline void pud_set(pud_t *pudp, pmd_t *pmdp)
 {
-	pgd_val(*pgdp) = _PAGE_TABLE | _PAGE_ACCESSED | __pa(pmdp);
+	pud_val(*pudp) = _PAGE_TABLE | _PAGE_ACCESSED | __pa(pmdp);
 }
 
 #define __pte_page(pte) ((unsigned long)__va(pte_val(pte) & PAGE_MASK))
 #define __pmd_page(pmd) ((unsigned long)__va(pmd_val(pmd) & _TABLE_MASK))
-#define __pgd_page(pgd) ((unsigned long)__va(pgd_val(pgd) & _TABLE_MASK))
+#define pud_page_vaddr(pud) ((unsigned long)__va(pud_val(pud) & _TABLE_MASK))
 
 
 #define pte_none(pte)		(!pte_val(pte))
@@ -147,11 +147,11 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
 #define pmd_page(pmd)		virt_to_page(__va(pmd_val(pmd)))
 
 
-#define pgd_none(pgd)		(!pgd_val(pgd))
-#define pgd_bad(pgd)		((pgd_val(pgd) & _DESCTYPE_MASK) != _PAGE_TABLE)
-#define pgd_present(pgd)	(pgd_val(pgd) & _PAGE_TABLE)
-#define pgd_clear(pgdp)		({ pgd_val(*pgdp) = 0; })
-#define pgd_page(pgd)		(mem_map + ((unsigned long)(__va(pgd_val(pgd)) - PAGE_OFFSET) >> PAGE_SHIFT))
+#define pud_none(pud)		(!pud_val(pud))
+#define pud_bad(pud)		((pud_val(pud) & _DESCTYPE_MASK) != _PAGE_TABLE)
+#define pud_present(pud)	(pud_val(pud) & _PAGE_TABLE)
+#define pud_clear(pudp)		({ pud_val(*pudp) = 0; })
+#define pud_page(pud)		(mem_map + ((unsigned long)(__va(pud_val(pud)) - PAGE_OFFSET) >> PAGE_SHIFT))
 
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
@@ -209,9 +209,9 @@ static inline pgd_t *pgd_offset_k(unsigned long address)
 
 
 /* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
+static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address)
 {
-	return (pmd_t *)__pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD-1));
+	return (pmd_t *)pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD-1));
 }
 
 /* Find an entry in the third-level page table.. */
@@ -239,11 +239,15 @@ static inline void nocache_page(void *vaddr)
 
 	if (CPU_IS_040_OR_060) {
 		pgd_t *dir;
+		p4d_t *p4dp;
+		pud_t *pudp;
 		pmd_t *pmdp;
 		pte_t *ptep;
 
 		dir = pgd_offset_k(addr);
-		pmdp = pmd_offset(dir, addr);
+		p4dp = p4d_offset(dir, addr);
+		pudp = pud_offset(p4dp, addr);
+		pmdp = pmd_offset(pudp, addr);
 		ptep = pte_offset_kernel(pmdp, addr);
 		*ptep = pte_mknocache(*ptep);
 	}
@@ -255,11 +259,15 @@ static inline void cache_page(void *vaddr)
 
 	if (CPU_IS_040_OR_060) {
 		pgd_t *dir;
+		p4d_t *p4dp;
+		pud_t *pudp;
 		pmd_t *pmdp;
 		pte_t *ptep;
 
 		dir = pgd_offset_k(addr);
-		pmdp = pmd_offset(dir, addr);
+		p4dp = p4d_offset(dir, addr);
+		pudp = pud_offset(p4dp, addr);
+		pmdp = pmd_offset(pudp, addr);
 		ptep = pte_offset_kernel(pmdp, addr);
 		*ptep = pte_mkcache(*ptep);
 	}
diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h
index 700d8195880c..05e1e1e77a9a 100644
--- a/arch/m68k/include/asm/page.h
+++ b/arch/m68k/include/asm/page.h
@@ -21,19 +21,22 @@
 /*
  * These are used to make use of C type-checking..
  */
-typedef struct { unsigned long pte; } pte_t;
+#if !defined(CONFIG_MMU) || CONFIG_PGTABLE_LEVELS == 3
 typedef struct { unsigned long pmd[16]; } pmd_t;
+#define pmd_val(x)	((&x)->pmd[0])
+#define __pmd(x)	((pmd_t) { { (x) }, })
+#endif
+
+typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
 typedef struct page *pgtable_t;
 
 #define pte_val(x)	((x).pte)
-#define pmd_val(x)	((&x)->pmd[0])
 #define pgd_val(x)	((x).pgd)
 #define pgprot_val(x)	((x).pgprot)
 
 #define __pte(x)	((pte_t) { (x) } )
-#define __pmd(x)	((pmd_t) { { (x) }, })
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index 646c174fff99..2bf5c3501e78 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -2,7 +2,12 @@
 #ifndef _M68K_PGTABLE_H
 #define _M68K_PGTABLE_H
 
-#include <asm-generic/4level-fixup.h>
+
+#if defined(CONFIG_SUN3) || defined(CONFIG_COLDFIRE)
+#include <asm-generic/pgtable-nopmd.h>
+#else
+#include <asm-generic/pgtable-nopud.h>
+#endif
 
 #include <asm/setup.h>
 
@@ -30,9 +35,7 @@
 
 
 /* PMD_SHIFT determines the size of the area a second-level page table can map */
-#ifdef CONFIG_SUN3
-#define PMD_SHIFT       17
-#else
+#if CONFIG_PGTABLE_LEVELS == 3
 #define PMD_SHIFT	22
 #endif
 #define PMD_SIZE	(1UL << PMD_SHIFT)
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index c18165b0d904..ccc4568299e5 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -2,7 +2,7 @@
 #ifndef _M68KNOMMU_PGTABLE_H
 #define _M68KNOMMU_PGTABLE_H
 
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
 
 /*
  * (C) Copyright 2000-2002, Greg Ungerer <gerg@snapgear.com>
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 856121122b91..11b95dadf7c0 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -17,8 +17,6 @@
 
 extern const char bad_pmd_string[];
 
-#define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); })
-
 #define __pte_free_tlb(tlb,pte,addr)			\
 do {							\
 	pgtable_pte_page_dtor(pte);			\
@@ -41,7 +39,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page
  * inside the pgd, so has no extra memory associated with it.
  */
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb, x, addr)	do { } while (0)
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
@@ -58,6 +55,4 @@ static inline pgd_t * pgd_alloc(struct mm_struct *mm)
      return new_pgd;
 }
 
-#define pgd_populate(mm, pmd, pte) BUG()
-
 #endif /* SUN3_PGALLOC_H */
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index c987d50866b4..bc4155264810 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -110,11 +110,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 #define pmd_set(pmdp,ptep) do {} while (0)
 
-static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
-{
-	pgd_val(*pgdp) = virt_to_phys(pmdp);
-}
-
 #define __pte_page(pte) \
 ((unsigned long) __va ((pte_val (pte) & SUN3_PAGE_PGNUM_MASK) << PAGE_SHIFT))
 #define __pmd_page(pmd) \
@@ -145,16 +140,9 @@ static inline int pmd_present2 (pmd_t *pmd) { return pmd_val (*pmd) & SUN3_PMD_V
 #define pmd_present(pmd) (!pmd_none2(&(pmd)))
 static inline void pmd_clear (pmd_t *pmdp) { pmd_val (*pmdp) = 0; }
 
-static inline int pgd_none (pgd_t pgd) { return 0; }
-static inline int pgd_bad (pgd_t pgd) { return 0; }
-static inline int pgd_present (pgd_t pgd) { return 1; }
-static inline void pgd_clear (pgd_t *pgdp) {}
-
 
 #define pte_ERROR(e) \
 	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
-	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
 #define pgd_ERROR(e) \
 	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
@@ -194,12 +182,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
 /* Find an entry in a kernel pagetable directory. */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
-/* Find an entry in the second-level pagetable. */
-static inline pmd_t *pmd_offset (pgd_t *pgd, unsigned long address)
-{
-	return (pmd_t *) pgd;
-}
-
 /* Find an entry in the third-level pagetable. */
 #define pte_index(address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
 #define pte_offset_kernel(pmd, address) ((pte_t *) __pmd_page(*pmd) + pte_index(address))
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 2e0047cf86f8..4ae52414cd9d 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -30,5 +30,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE3
 
 #endif /* _ASM_M68K_UNISTD_H_ */
diff --git a/arch/m68k/include/asm/vmalloc.h b/arch/m68k/include/asm/vmalloc.h
new file mode 100644
index 000000000000..bc1dca6cf134
--- /dev/null
+++ b/arch/m68k/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_M68K_VMALLOC_H
+#define _ASM_M68K_VMALLOC_H
+
+#endif /* _ASM_M68K_VMALLOC_H */
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index 3fab684cc0db..871a0e11da34 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -61,8 +61,8 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 
 #endif /* CONFIG_MMU && !CONFIG_COLDFIRE */
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t handle,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t handle, size_t size,
+		enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_BIDIRECTIONAL:
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 97cd3ea5f10b..9dd76fbb7c6b 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -69,6 +69,13 @@ ENTRY(__sys_vfork)
 	lea     %sp@(24),%sp
 	rts
 
+ENTRY(__sys_clone3)
+	SAVE_SWITCH_STACK
+	pea	%sp@(SWITCH_STACK_SIZE)
+	jbsr	m68k_clone3
+	lea	%sp@(28),%sp
+	rts
+
 ENTRY(sys_sigreturn)
 	SAVE_SWITCH_STACK
 	movel	%sp,%sp@-		  | switch_stack pointer
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 4e77a06735c1..8f0d9140700f 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -30,8 +30,9 @@
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
 #include <linux/rcupdate.h>
-
+#include <linux/syscalls.h>
 #include <linux/uaccess.h>
+
 #include <asm/traps.h>
 #include <asm/machdep.h>
 #include <asm/setup.h>
@@ -107,20 +108,43 @@ void flush_thread(void)
  * on top of pt_regs, which means that sys_clone() arguments would be
  * buried.  We could, of course, copy them, but it's too costly for no
  * good reason - generic clone() would have to copy them *again* for
- * do_fork() anyway.  So in this case it's actually better to pass pt_regs *
- * and extract arguments for do_fork() from there.  Eventually we might
- * go for calling do_fork() directly from the wrapper, but only after we
- * are finished with do_fork() prototype conversion.
+ * _do_fork() anyway.  So in this case it's actually better to pass pt_regs *
+ * and extract arguments for _do_fork() from there.  Eventually we might
+ * go for calling _do_fork() directly from the wrapper, but only after we
+ * are finished with _do_fork() prototype conversion.
  */
 asmlinkage int m68k_clone(struct pt_regs *regs)
 {
 	/* regs will be equal to current_pt_regs() */
-	return do_fork(regs->d1, regs->d2, 0,
-		       (int __user *)regs->d3, (int __user *)regs->d4);
+	struct kernel_clone_args args = {
+		.flags		= regs->d1 & ~CSIGNAL,
+		.pidfd		= (int __user *)regs->d3,
+		.child_tid	= (int __user *)regs->d4,
+		.parent_tid	= (int __user *)regs->d3,
+		.exit_signal	= regs->d1 & CSIGNAL,
+		.stack		= regs->d2,
+		.tls		= regs->d5,
+	};
+
+	if (!legacy_clone_args_valid(&args))
+		return -EINVAL;
+
+	return _do_fork(&args);
+}
+
+/*
+ * Because extra registers are saved on the stack after the sys_clone3()
+ * arguments, this C wrapper extracts them from pt_regs * and then calls the
+ * generic sys_clone3() implementation.
+ */
+asmlinkage int m68k_clone3(struct pt_regs *regs)
+{
+	return sys_clone3((struct clone_args __user *)regs->d1, regs->d2);
 }
 
-int copy_thread(unsigned long clone_flags, unsigned long usp,
-		 unsigned long arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+		    unsigned long arg, struct task_struct *p,
+		    unsigned long tls)
 {
 	struct fork_frame {
 		struct switch_stack sw;
@@ -155,7 +179,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	p->thread.usp = usp ?: rdusp();
 
 	if (clone_flags & CLONE_SETTLS)
-		task_thread_info(p)->tp_value = frame->regs.d5;
+		task_thread_info(p)->tp_value = tls;
 
 #ifdef CONFIG_FPU
 	if (!FPU_IS_EMU) {
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 528484feff80..ab8aa7be260f 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -274,10 +274,6 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-
 	switch (m68k_machtype) {
 #ifdef CONFIG_AMIGA
 	case MACH_AMIGA:
diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index 3c5def10d486..a63483de7a42 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -146,10 +146,6 @@ void __init setup_arch(char **cmdline_p)
 	memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
 	boot_command_line[COMMAND_LINE_SIZE-1] = 0;
 
-#if defined(CONFIG_FRAMEBUFFER_CONSOLE) && defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
-#endif
-
 	/*
 	 * Give all the memory to the bootmap allocator, tell it to put the
 	 * boot mem_map at the start of memory.
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 6363ec83a290..18a4de7d5934 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -465,6 +465,8 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
 	for (;;) {
 		struct mm_struct *mm = current->mm;
 		pgd_t *pgd;
+		p4d_t *p4d;
+		pud_t *pud;
 		pmd_t *pmd;
 		pte_t *pte;
 		spinlock_t *ptl;
@@ -474,7 +476,13 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
 		pgd = pgd_offset(mm, (unsigned long)mem);
 		if (!pgd_present(*pgd))
 			goto bad_access;
-		pmd = pmd_offset(pgd, (unsigned long)mem);
+		p4d = p4d_offset(pgd, (unsigned long)mem);
+		if (!p4d_present(*p4d))
+			goto bad_access;
+		pud = pud_offset(p4d, (unsigned long)mem);
+		if (!pud_present(*pud))
+			goto bad_access;
+		pmd = pmd_offset(pud, (unsigned long)mem);
 		if (!pmd_present(*pmd))
 			goto bad_access;
 		pte = pte_offset_map_lock(mm, pmd, (unsigned long)mem, &ptl);
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index a88a285a0e5f..f4f49fcb76d0 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -434,4 +434,6 @@
 432	common	fsmount				sys_fsmount
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
-# 435 reserved for clone3
+435	common	clone3				__sys_clone3
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds
index cf6edda38971..7b975420c3d9 100644
--- a/arch/m68k/kernel/vmlinux-nommu.lds
+++ b/arch/m68k/kernel/vmlinux-nommu.lds
@@ -60,8 +60,8 @@ SECTIONS {
 #endif
 
 	_sdata = .;
-	RO_DATA_SECTION(PAGE_SIZE)
-	RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE)
+	RO_DATA(PAGE_SIZE)
+	RW_DATA(16, PAGE_SIZE, THREAD_SIZE)
 	_edata = .;
 
 	EXCEPTION_TABLE(16)
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 625a5785804f..4d33da4e7106 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -31,9 +31,9 @@ SECTIONS
 
   _sdata = .;			/* Start of data section */
 
-  RODATA
+  RO_DATA(4096)
 
-  RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE)
+  RW_DATA(16, PAGE_SIZE, THREAD_SIZE)
 
   BSS_SECTION(0, 0, 0)
 
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 9868270b0984..87d9f4d08f65 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -24,13 +24,13 @@ SECTIONS
 	*(.fixup)
 	*(.gnu.warning)
 	} :text = 0x4e75
-	RODATA
+	RO_DATA(4096)
 
   _etext = .;			/* End of text section */
 
   EXCEPTION_TABLE(16) :data
   _sdata = .;			/* Start of rw data section */
-  RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE) :data
+  RW_DATA(16, PAGE_SIZE, THREAD_SIZE) :data
   /* End of data goes *here* so that freeing init code works properly. */
   _edata = .;
   NOTES
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 778cacb7d57b..27c453f4fffe 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -130,8 +130,10 @@ static inline void init_pointer_tables(void)
 	/* insert pointer tables allocated so far into the tablelist */
 	init_pointer_table((unsigned long)kernel_pg_dir);
 	for (i = 0; i < PTRS_PER_PGD; i++) {
-		if (pgd_present(kernel_pg_dir[i]))
-			init_pointer_table(__pgd_page(kernel_pg_dir[i]));
+		pud_t *pud = (pud_t *)(&kernel_pg_dir[i]);
+
+		if (pud_present(*pud))
+			init_pointer_table(pgd_page_vaddr(kernel_pg_dir[i]));
 	}
 
 	/* insert also pointer table that we used to unmap the zero page */
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 40a3b327da07..120030ad8dc4 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -54,6 +54,61 @@ static inline void free_io_area(void *addr)
 
 static struct vm_struct *iolist;
 
+/*
+ * __free_io_area unmaps nearly everything, so be careful
+ * Currently it doesn't free pointer/page tables anymore but this
+ * wasn't used anyway and might be added later.
+ */
+static void __free_io_area(void *addr, unsigned long size)
+{
+	unsigned long virtaddr = (unsigned long)addr;
+	pgd_t *pgd_dir;
+	p4d_t *p4d_dir;
+	pud_t *pud_dir;
+	pmd_t *pmd_dir;
+	pte_t *pte_dir;
+
+	while ((long)size > 0) {
+		pgd_dir = pgd_offset_k(virtaddr);
+		p4d_dir = p4d_offset(pgd_dir, virtaddr);
+		pud_dir = pud_offset(p4d_dir, virtaddr);
+		if (pud_bad(*pud_dir)) {
+			printk("iounmap: bad pud(%08lx)\n", pud_val(*pud_dir));
+			pud_clear(pud_dir);
+			return;
+		}
+		pmd_dir = pmd_offset(pud_dir, virtaddr);
+
+#if CONFIG_PGTABLE_LEVELS == 3
+		if (CPU_IS_020_OR_030) {
+			int pmd_off = (virtaddr/PTRTREESIZE) & 15;
+			int pmd_type = pmd_dir->pmd[pmd_off] & _DESCTYPE_MASK;
+
+			if (pmd_type == _PAGE_PRESENT) {
+				pmd_dir->pmd[pmd_off] = 0;
+				virtaddr += PTRTREESIZE;
+				size -= PTRTREESIZE;
+				continue;
+			} else if (pmd_type == 0)
+				continue;
+		}
+#endif
+
+		if (pmd_bad(*pmd_dir)) {
+			printk("iounmap: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
+			pmd_clear(pmd_dir);
+			return;
+		}
+		pte_dir = pte_offset_kernel(pmd_dir, virtaddr);
+
+		pte_val(*pte_dir) = 0;
+		virtaddr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	flush_tlb_all();
+}
+
 static struct vm_struct *get_io_area(unsigned long size)
 {
 	unsigned long addr;
@@ -90,7 +145,7 @@ static inline void free_io_area(void *addr)
 		if (tmp->addr == addr) {
 			*p = tmp->next;
 			/* remove gap added in get_io_area() */
-			__iounmap(tmp->addr, tmp->size - IO_SIZE);
+			__free_io_area(tmp->addr, tmp->size - IO_SIZE);
 			kfree(tmp);
 			return;
 		}
@@ -110,6 +165,8 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla
 	unsigned long virtaddr, retaddr;
 	long offset;
 	pgd_t *pgd_dir;
+	p4d_t *p4d_dir;
+	pud_t *pud_dir;
 	pmd_t *pmd_dir;
 	pte_t *pte_dir;
 
@@ -196,18 +253,23 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla
 			printk ("\npa=%#lx va=%#lx ", physaddr, virtaddr);
 #endif
 		pgd_dir = pgd_offset_k(virtaddr);
-		pmd_dir = pmd_alloc(&init_mm, pgd_dir, virtaddr);
+		p4d_dir = p4d_offset(pgd_dir, virtaddr);
+		pud_dir = pud_offset(p4d_dir, virtaddr);
+		pmd_dir = pmd_alloc(&init_mm, pud_dir, virtaddr);
 		if (!pmd_dir) {
 			printk("ioremap: no mem for pmd_dir\n");
 			return NULL;
 		}
 
+#if CONFIG_PGTABLE_LEVELS == 3
 		if (CPU_IS_020_OR_030) {
 			pmd_dir->pmd[(virtaddr/PTRTREESIZE) & 15] = physaddr;
 			physaddr += PTRTREESIZE;
 			virtaddr += PTRTREESIZE;
 			size -= PTRTREESIZE;
-		} else {
+		} else
+#endif
+		{
 			pte_dir = pte_alloc_kernel(pmd_dir, virtaddr);
 			if (!pte_dir) {
 				printk("ioremap: no mem for pte_dir\n");
@@ -250,55 +312,6 @@ void iounmap(void __iomem *addr)
 EXPORT_SYMBOL(iounmap);
 
 /*
- * __iounmap unmaps nearly everything, so be careful
- * Currently it doesn't free pointer/page tables anymore but this
- * wasn't used anyway and might be added later.
- */
-void __iounmap(void *addr, unsigned long size)
-{
-	unsigned long virtaddr = (unsigned long)addr;
-	pgd_t *pgd_dir;
-	pmd_t *pmd_dir;
-	pte_t *pte_dir;
-
-	while ((long)size > 0) {
-		pgd_dir = pgd_offset_k(virtaddr);
-		if (pgd_bad(*pgd_dir)) {
-			printk("iounmap: bad pgd(%08lx)\n", pgd_val(*pgd_dir));
-			pgd_clear(pgd_dir);
-			return;
-		}
-		pmd_dir = pmd_offset(pgd_dir, virtaddr);
-
-		if (CPU_IS_020_OR_030) {
-			int pmd_off = (virtaddr/PTRTREESIZE) & 15;
-			int pmd_type = pmd_dir->pmd[pmd_off] & _DESCTYPE_MASK;
-
-			if (pmd_type == _PAGE_PRESENT) {
-				pmd_dir->pmd[pmd_off] = 0;
-				virtaddr += PTRTREESIZE;
-				size -= PTRTREESIZE;
-				continue;
-			} else if (pmd_type == 0)
-				continue;
-		}
-
-		if (pmd_bad(*pmd_dir)) {
-			printk("iounmap: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
-			pmd_clear(pmd_dir);
-			return;
-		}
-		pte_dir = pte_offset_kernel(pmd_dir, virtaddr);
-
-		pte_val(*pte_dir) = 0;
-		virtaddr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-
-	flush_tlb_all();
-}
-
-/*
  * Set new cache mode for some kernel address space.
  * The caller must push data for that range itself, if such data may already
  * be in the cache.
@@ -307,6 +320,8 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
 {
 	unsigned long virtaddr = (unsigned long)addr;
 	pgd_t *pgd_dir;
+	p4d_t *p4d_dir;
+	pud_t *pud_dir;
 	pmd_t *pmd_dir;
 	pte_t *pte_dir;
 
@@ -341,13 +356,16 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
 
 	while ((long)size > 0) {
 		pgd_dir = pgd_offset_k(virtaddr);
-		if (pgd_bad(*pgd_dir)) {
-			printk("iocachemode: bad pgd(%08lx)\n", pgd_val(*pgd_dir));
-			pgd_clear(pgd_dir);
+		p4d_dir = p4d_offset(pgd_dir, virtaddr);
+		pud_dir = pud_offset(p4d_dir, virtaddr);
+		if (pud_bad(*pud_dir)) {
+			printk("iocachemode: bad pud(%08lx)\n", pud_val(*pud_dir));
+			pud_clear(pud_dir);
 			return;
 		}
-		pmd_dir = pmd_offset(pgd_dir, virtaddr);
+		pmd_dir = pmd_offset(pud_dir, virtaddr);
 
+#if CONFIG_PGTABLE_LEVELS == 3
 		if (CPU_IS_020_OR_030) {
 			int pmd_off = (virtaddr/PTRTREESIZE) & 15;
 
@@ -359,6 +377,7 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
 				continue;
 			}
 		}
+#endif
 
 		if (pmd_bad(*pmd_dir)) {
 			printk("iocachemode: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 6cb1e41d58d0..0ea375607767 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -92,6 +92,8 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
 	unsigned long flags, mmuar, mmutr;
 	struct mm_struct *mm;
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	int asid;
@@ -113,7 +115,19 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
 		return -1;
 	}
 
-	pmd = pmd_offset(pgd, mmuar);
+	p4d = p4d_offset(pgd, mmuar);
+	if (p4d_none(*p4d)) {
+		local_irq_restore(flags);
+		return -1;
+	}
+
+	pud = pud_offset(p4d, mmuar);
+	if (pud_none(*pud)) {
+		local_irq_restore(flags);
+		return -1;
+	}
+
+	pmd = pmd_offset(pud, mmuar);
 	if (pmd_none(*pmd)) {
 		local_irq_restore(flags);
 		return -1;
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 356601bf96d9..4857985b8080 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -82,9 +82,11 @@ static pmd_t * __init kernel_ptr_table(void)
 		 */
 		last = (unsigned long)kernel_pg_dir;
 		for (i = 0; i < PTRS_PER_PGD; i++) {
-			if (!pgd_present(kernel_pg_dir[i]))
+			pud_t *pud = (pud_t *)(&kernel_pg_dir[i]);
+
+			if (!pud_present(*pud))
 				continue;
-			pmd = __pgd_page(kernel_pg_dir[i]);
+			pmd = pgd_page_vaddr(kernel_pg_dir[i]);
 			if (pmd > last)
 				last = pmd;
 		}
@@ -118,6 +120,8 @@ static void __init map_node(int node)
 #define ROOTTREESIZE (32*1024*1024)
 	unsigned long physaddr, virtaddr, size;
 	pgd_t *pgd_dir;
+	p4d_t *p4d_dir;
+	pud_t *pud_dir;
 	pmd_t *pmd_dir;
 	pte_t *pte_dir;
 
@@ -149,14 +153,16 @@ static void __init map_node(int node)
 				continue;
 			}
 		}
-		if (!pgd_present(*pgd_dir)) {
+		p4d_dir = p4d_offset(pgd_dir, virtaddr);
+		pud_dir = pud_offset(p4d_dir, virtaddr);
+		if (!pud_present(*pud_dir)) {
 			pmd_dir = kernel_ptr_table();
 #ifdef DEBUG
 			printk ("[new pointer %p]", pmd_dir);
 #endif
-			pgd_set(pgd_dir, pmd_dir);
+			pud_set(pud_dir, pmd_dir);
 		} else
-			pmd_dir = pmd_offset(pgd_dir, virtaddr);
+			pmd_dir = pmd_offset(pud_dir, virtaddr);
 
 		if (CPU_IS_020_OR_030) {
 			if (virtaddr) {
@@ -304,4 +310,3 @@ void __init paging_init(void)
 			node_set_state(i, N_NORMAL_MEMORY);
 	}
 }
-
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index e63eb5f06999..f31890078197 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -264,6 +264,7 @@ static int q40_get_rtc_pll(struct rtc_pll_info *pll)
 {
 	int tmp = Q40_RTC_CTRL;
 
+	pll->pll_ctrl = 0;
 	pll->pll_value = tmp & Q40_RTC_PLL_MASK;
 	if (tmp & Q40_RTC_PLL_SIGN)
 		pll->pll_value = -pll->pll_value;
diff --git a/arch/m68k/sun3x/config.c b/arch/m68k/sun3x/config.c
index 03ce7f9facfe..d806dee71a9c 100644
--- a/arch/m68k/sun3x/config.c
+++ b/arch/m68k/sun3x/config.c
@@ -70,7 +70,6 @@ void __init config_sun3x(void)
 		break;
 	default:
 		serial_console = 0;
-		conswitchp = &dummy_con;
 		break;
 	}
 #endif
diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c
index 89e630e66555..c4b8aa1d80f4 100644
--- a/arch/m68k/sun3x/dvma.c
+++ b/arch/m68k/sun3x/dvma.c
@@ -80,6 +80,8 @@ inline int dvma_map_cpu(unsigned long kaddr,
 			       unsigned long vaddr, int len)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	unsigned long end;
 	int ret = 0;
 
@@ -90,12 +92,14 @@ inline int dvma_map_cpu(unsigned long kaddr,
 
 	pr_debug("dvma: mapping kern %08lx to virt %08lx\n", kaddr, vaddr);
 	pgd = pgd_offset_k(vaddr);
+	p4d = p4d_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
 
 	do {
 		pmd_t *pmd;
 		unsigned long end2;
 
-		if((pmd = pmd_alloc(&init_mm, pgd, vaddr)) == NULL) {
+		if((pmd = pmd_alloc(&init_mm, pud, vaddr)) == NULL) {
 			ret = -ENOMEM;
 			goto out;
 		}
@@ -196,4 +200,3 @@ void dvma_unmap_iommu(unsigned long baddr, int len)
 	}
 
 }
-
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index c9c4be822456..a105f113fd67 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -4,7 +4,6 @@ config MICROBLAZE
 	select ARCH_32BIT_OFF_T
 	select ARCH_NO_SWAP
 	select ARCH_HAS_BINFMT_FLAT if !MMU
-	select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
@@ -12,7 +11,7 @@ config MICROBLAZE
 	select ARCH_HAS_UNCACHED_SEGMENT if !MMU
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select BUILDTIME_EXTABLE_SORT
+	select BUILDTIME_TABLE_SORT
 	select TIMER_OF
 	select CLONE_BACKWARDS3
 	select COMMON_CLK
@@ -46,6 +45,7 @@ config MICROBLAZE
 	select VIRT_TO_BUS
 	select CPU_NO_EFFICIENT_FFS
 	select MMU_GATHER_NO_RANGE if MMU
+	select SPARSE_IRQ
 
 # Endianness selection
 choice
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index 654edfdc7867..b3b433db89d8 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -33,6 +33,8 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 CONFIG_BRIDGE=m
 CONFIG_PCI=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_MTD=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -73,6 +75,7 @@ CONFIG_UIO_PDRV_GENIRQ=y
 CONFIG_UIO_DMEM_GENIRQ=y
 CONFIG_EXT2_FS=y
 # CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
 CONFIG_CRAMFS=y
 CONFIG_ROMFS_FS=y
 CONFIG_NFS_FS=y
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index 86c95b2a1ce1..d33c61737b8b 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -39,9 +39,6 @@ extern resource_size_t isa_mem_base;
 extern void iounmap(volatile void __iomem *addr);
 
 extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
-#define ioremap_nocache(addr, size)		ioremap((addr), (size))
-#define ioremap_wc(addr, size)			ioremap((addr), (size))
-#define ioremap_wt(addr, size)			ioremap((addr), (size))
 
 #endif /* CONFIG_MMU */
 
diff --git a/arch/microblaze/include/asm/irq.h b/arch/microblaze/include/asm/irq.h
index d785defeeed5..eac2fb4b3fb9 100644
--- a/arch/microblaze/include/asm/irq.h
+++ b/arch/microblaze/include/asm/irq.h
@@ -9,7 +9,6 @@
 #ifndef _ASM_MICROBLAZE_IRQ_H
 #define _ASM_MICROBLAZE_IRQ_H
 
-#define NR_IRQS		(32 + 1)
 #include <asm-generic/irq.h>
 
 struct pt_regs;
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index d506bb0893f9..f4b44b24b02e 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -90,7 +90,6 @@ typedef struct { unsigned long	pte; }		pte_t;
 typedef struct { unsigned long	pgprot; }	pgprot_t;
 /* FIXME this can depend on linux kernel version */
 #   ifdef CONFIG_MMU
-typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pgd; } pgd_t;
 #   else /* CONFIG_MMU */
 typedef struct { unsigned long	ste[64]; }	pmd_t;
@@ -103,7 +102,6 @@ typedef struct { p4d_t		pge[1]; }	pgd_t;
 # define pgprot_val(x)	((x).pgprot)
 
 #   ifdef CONFIG_MMU
-#   define pmd_val(x)      ((x).pmd)
 #   define pgd_val(x)      ((x).pgd)
 #   else  /* CONFIG_MMU */
 #   define pmd_val(x)	((x).ste[0])
@@ -112,7 +110,6 @@ typedef struct { p4d_t		pge[1]; }	pgd_t;
 #   endif  /* CONFIG_MMU */
 
 # define __pte(x)	((pte_t) { (x) })
-# define __pmd(x)	((pmd_t) { (x) })
 # define __pgd(x)	((pgd_t) { (x) })
 # define __pgprot(x)	((pgprot_t) { (x) })
 
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7ecb05baa601..fcf1e23f2e0a 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -41,13 +41,6 @@ static inline void free_pgd(pgd_t *pgd)
 
 #define pmd_pgtable(pmd)	pmd_page(pmd)
 
-/*
- * We don't have any real pmd's, and this code never triggers because
- * the pgd will always be present..
- */
-#define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
-#define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
-
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
 #define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, (pte))
@@ -58,15 +51,6 @@ extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 #define pmd_populate_kernel(mm, pmd, pte) \
 		(pmd_val(*(pmd)) = (unsigned long) (pte))
 
-/*
- * We don't have any real pmd's, and this code never triggers because
- * the pgd will always be present..
- */
-#define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb, x, addr)	pmd_free((tlb)->mm, x)
-#define pgd_populate(mm, pmd, pte)	BUG()
-
 #endif /* CONFIG_MMU */
 
 #endif /* _ASM_MICROBLAZE_PGALLOC_H */
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 954b69af451f..2def331f9e2c 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -59,9 +59,7 @@ extern int mem_init_done;
 
 #else /* CONFIG_MMU */
 
-#include <asm-generic/4level-fixup.h>
-
-#define __PAGETABLE_PMD_FOLDED 1
+#include <asm-generic/pgtable-nopmd.h>
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
@@ -138,13 +136,8 @@ static inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
  *
  */
 
-/* PMD_SHIFT determines the size of the area mapped by the PTE pages */
-#define PMD_SHIFT	(PAGE_SHIFT + PTE_SHIFT)
-#define PMD_SIZE	(1UL << PMD_SHIFT)
-#define PMD_MASK	(~(PMD_SIZE-1))
-
 /* PGDIR_SHIFT determines what a top-level page table entry can map */
-#define PGDIR_SHIFT	PMD_SHIFT
+#define PGDIR_SHIFT	(PAGE_SHIFT + PTE_SHIFT)
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
@@ -165,9 +158,6 @@ static inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
 #define pte_ERROR(e) \
 	printk(KERN_ERR "%s:%d: bad pte "PTE_FMT".\n", \
 		__FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
-	printk(KERN_ERR "%s:%d: bad pmd %08lx.\n", \
-		__FILE__, __LINE__, pmd_val(e))
 #define pgd_ERROR(e) \
 	printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \
 		__FILE__, __LINE__, pgd_val(e))
@@ -314,18 +304,6 @@ extern unsigned long empty_zero_page[1024];
 
 #ifndef __ASSEMBLY__
 /*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- */
-static inline int pgd_none(pgd_t pgd)		{ return 0; }
-static inline int pgd_bad(pgd_t pgd)		{ return 0; }
-static inline int pgd_present(pgd_t pgd)	{ return 1; }
-#define pgd_clear(xp)				do { } while (0)
-#define pgd_page(pgd) \
-	((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
-
-/*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
  */
@@ -479,12 +457,6 @@ static inline void ptep_mkdirty(struct mm_struct *mm,
 #define pgd_index(address)	 ((address) >> PGDIR_SHIFT)
 #define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
 
-/* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
-{
-	return (pmd_t *) dir;
-}
-
 /* Find an entry in the third-level page table.. */
 #define pte_index(address)		\
 	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
diff --git a/arch/microblaze/include/asm/vmalloc.h b/arch/microblaze/include/asm/vmalloc.h
new file mode 100644
index 000000000000..04013a42b0fe
--- /dev/null
+++ b/arch/microblaze/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_MICROBLAZE_VMALLOC_H
+#define _ASM_MICROBLAZE_VMALLOC_H
+
+#endif /* _ASM_MICROBLAZE_VMALLOC_H */
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index a89c2d4ed5ff..d7bebd04247b 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -15,7 +15,7 @@
 #include <linux/bug.h>
 #include <asm/cacheflush.h>
 
-static void __dma_sync(struct device *dev, phys_addr_t paddr, size_t size,
+static void __dma_sync(phys_addr_t paddr, size_t size,
 		enum dma_data_direction direction)
 {
 	switch (direction) {
@@ -31,14 +31,14 @@ static void __dma_sync(struct device *dev, phys_addr_t paddr, size_t size,
 	}
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
-	__dma_sync(dev, paddr, size, dir);
+	__dma_sync(paddr, size, dir);
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
-	__dma_sync(dev, paddr, size, dir);
+	__dma_sync(paddr, size, dir);
 }
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index 4e1b567becd6..f6ded356394a 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -728,7 +728,7 @@ no_intr_resched:
 	bri	6f;
 /* MS: Return to kernel state. */
 2:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	lwi	r11, CURRENT_TASK, TS_THREAD_INFO;
 	/* MS: get preempt_count from thread info */
 	lwi	r5, r11, TI_PREEMPT_COUNT;
@@ -738,14 +738,9 @@ no_intr_resched:
 	andi	r5, r5, _TIF_NEED_RESCHED;
 	beqi	r5, restore /* if zero jump over */
 
-preempt:
 	/* interrupts are off that's why I am calling preempt_chedule_irq */
 	bralid	r15, preempt_schedule_irq
 	nop
-	lwi	r11, CURRENT_TASK, TS_THREAD_INFO;	/* get thread info */
-	lwi	r5, r11, TI_FLAGS;		/* get flags in thread info */
-	andi	r5, r5, _TIF_NEED_RESCHED;
-	bnei	r5, preempt /* if non zero jump to resched */
 restore:
 #endif
 	VM_OFF /* MS: turn off MMU */
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index f264fdcf152a..7d2894418691 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -99,7 +99,7 @@ big_endian:
 _prepare_copy_fdt:
 	or	r11, r0, r0 /* incremment */
 	ori	r4, r0, TOPHYS(_fdt_start)
-	ori	r3, r0, (0x8000 - 4)
+	ori	r3, r0, (0x10000 - 4)
 _copy_fdt:
 	lw	r12, r7, r11 /* r12 = r7 + r11 */
 	sw	r12, r4, r11 /* addr[r4 + r11] = r12 */
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 522a0c5d9c59..511c1ab7f57f 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -65,10 +65,6 @@ void __init setup_arch(char **cmdline_p)
 	microblaze_cache_init();
 
 	xilinx_pci_init();
-
-#if defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
-#endif
 }
 
 #ifdef CONFIG_MTD_UCLINUX
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index cdd4feb279c5..c9125c328949 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -160,6 +160,9 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	int err = 0, sig = ksig->sig;
 	unsigned long address = 0;
 #ifdef CONFIG_MMU
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 #endif
@@ -195,9 +198,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 
 	address = ((unsigned long)frame->tramp);
 #ifdef CONFIG_MMU
-	pmdp = pmd_offset(pud_offset(
-			pgd_offset(current->mm, address),
-					address), address);
+	pgdp = pgd_offset(current->mm, address);
+	p4dp = p4d_offset(pgdp, address);
+	pudp = pud_offset(p4dp, address);
+	pmdp = pmd_offset(pudp, address);
 
 	preempt_disable();
 	ptep = pte_offset_map(pmdp, address);
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 09b0cd7dab0a..4c67b11f9c9e 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -441,3 +441,5 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 435	common	clone3				sys_clone3
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index e1f3e8741292..2c09fa3a8a01 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -11,6 +11,8 @@
 OUTPUT_ARCH(microblaze)
 ENTRY(microblaze_start)
 
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
 #include <asm/page.h>
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/thread_info.h>
@@ -46,14 +48,12 @@ SECTIONS {
 	__fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) {
 		_fdt_start = . ;		/* place for fdt blob */
 		*(__fdt_blob) ;			/* Any link-placed DTB */
-	        . = _fdt_start + 0x8000;	/* Pad up to 32kbyte */
+	        . = _fdt_start + 0x10000;	/* Pad up to 64kbyte */
 		_fdt_end = . ;
 	}
 
 	. = ALIGN(16);
-	RODATA
-	EXCEPTION_TABLE(16)
-	NOTES
+	RO_DATA(4096)
 
 	/*
 	 * sdata2 section can go anywhere, but must be word aligned
@@ -70,7 +70,7 @@ SECTIONS {
 	}
 
 	_sdata = . ;
-	RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA(32, PAGE_SIZE, THREAD_SIZE)
 	_edata = . ;
 
 	/* Under the microblaze ABI, .sdata and .sbss must be contiguous */
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index a015a951c8b7..050fc621c920 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -53,8 +53,11 @@ EXPORT_SYMBOL(kmap_prot);
 
 static inline pte_t *virt_to_kpte(unsigned long vaddr)
 {
-	return pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr),
-			vaddr), vaddr);
+	pgd_t *pgd = pgd_offset_k(vaddr);
+	p4d_t *p4d = p4d_offset(pgd, vaddr);
+	pud_t *pud = pud_offset(p4d, vaddr);
+
+	return pte_offset_kernel(pmd_offset(pud, vaddr), vaddr);
 }
 
 static void __init highmem_init(void)
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 010bb9cee2e4..68c26cacd930 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -134,11 +134,16 @@ EXPORT_SYMBOL(iounmap);
 
 int map_page(unsigned long va, phys_addr_t pa, int flags)
 {
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pd;
 	pte_t *pg;
 	int err = -ENOMEM;
+
 	/* Use upper 10 bits of VA to index the first level map */
-	pd = pmd_offset(pgd_offset_k(va), va);
+	p4d = p4d_offset(pgd_offset_k(va), va);
+	pud = pud_offset(p4d, va);
+	pd = pmd_offset(pud, va);
 	/* Use middle 10 bits of VA to index the second-level map */
 	pg = pte_alloc_kernel(pd, va); /* from powerpc - pgtable.c */
 	/* pg = pte_alloc_kernel(&init_mm, pd, va); */
@@ -188,13 +193,17 @@ void __init mapin_ram(void)
 static int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
 {
 	pgd_t	*pgd;
+	p4d_t	*p4d;
+	pud_t	*pud;
 	pmd_t	*pmd;
 	pte_t	*pte;
 	int     retval = 0;
 
 	pgd = pgd_offset(mm, addr & PAGE_MASK);
 	if (pgd) {
-		pmd = pmd_offset(pgd, addr & PAGE_MASK);
+		p4d = p4d_offset(pgd, addr & PAGE_MASK);
+		pud = pud_offset(p4d, addr & PAGE_MASK);
+		pmd = pmd_offset(pud, addr & PAGE_MASK);
 		if (pmd_present(*pmd)) {
 			pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
 			if (pte) {
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index 0de839882106..a69b272a3ab0 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -17,6 +17,7 @@ platforms += jazz
 platforms += jz4740
 platforms += lantiq
 platforms += lasat
+platforms += loongson2ef
 platforms += loongson32
 platforms += loongson64
 platforms += mti-malta
@@ -30,6 +31,7 @@ platforms += ralink
 platforms += rb532
 platforms += sgi-ip22
 platforms += sgi-ip27
+platforms += sgi-ip30
 platforms += sgi-ip32
 platforms += sibyte
 platforms += sni
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a0bd9bdb5f83..a2739a34bb12 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -7,6 +7,7 @@ config MIPS
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_SUPPORTS_UPROBES
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
@@ -14,7 +15,7 @@ config MIPS
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select BUILDTIME_EXTABLE_SORT
+	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS
 	select CPU_NO_EFFICIENT_FFS if (TARGET_ISA_REV < 1)
 	select CPU_PM if CPU_IDLE
@@ -46,7 +47,7 @@ config MIPS
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
 	select HAVE_ASM_MODVERSIONS
-	select HAVE_EBPF_JIT if (!CPU_MICROMIPS)
+	select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_COPY_THREAD_TLS
 	select HAVE_C_RECORDMCOUNT
@@ -73,6 +74,7 @@ config MIPS
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RSEQ
+	select HAVE_SPARSE_SYSCALL_NR
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP
@@ -86,6 +88,8 @@ config MIPS
 	select SYSCTL_EXCEPTION_TRACE
 	select VIRT_TO_BUS
 	select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI)
+	select ARCH_HAS_KCOV
+	select HAVE_GCC_PLUGINS
 
 menu "Machine selection"
 
@@ -359,6 +363,8 @@ config MACH_DECSTATION
 
 config MACH_JAZZ
 	bool "Jazz family of machines"
+	select ARC_MEMORY
+	select ARC_PROMLIB
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select FW_ARC
@@ -441,7 +447,7 @@ config LASAT
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 
 config MACH_LOONGSON32
-	bool "Loongson-1 family of machines"
+	bool "Loongson 32-bit family of machines"
 	select SYS_SUPPORTS_ZBOOT
 	help
 	  This enables support for the Loongson-1 family of machines.
@@ -450,18 +456,48 @@ config MACH_LOONGSON32
 	  the Institute of Computing Technology (ICT), Chinese Academy of
 	  Sciences (CAS).
 
+config MACH_LOONGSON2EF
+	bool "Loongson-2E/F family of machines"
+	select SYS_SUPPORTS_ZBOOT
+	help
+	  This enables the support of early Loongson-2E/F family of machines.
+
 config MACH_LOONGSON64
-	bool "Loongson-2/3 family of machines"
+	bool "Loongson 64-bit family of machines"
+	select ARCH_SPARSEMEM_ENABLE
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
+	select GENERIC_ISA_DMA_SUPPORT_BROKEN
+	select BOOT_ELF32
+	select BOARD_SCACHE
+	select CSRC_R4K
+	select CEVT_R4K
+	select CPU_HAS_WB
+	select FORCE_PCI
+	select ISA
+	select I8259
+	select IRQ_MIPS_CPU
+	select NR_CPUS_DEFAULT_4
+	select USE_GENERIC_EARLY_PRINTK_8250
+	select SYS_HAS_CPU_LOONGSON64
+	select SYS_HAS_EARLY_PRINTK
+	select SYS_SUPPORTS_SMP
+	select SYS_SUPPORTS_HOTPLUG_CPU
+	select SYS_SUPPORTS_NUMA
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select SYS_SUPPORTS_HIGHMEM
+	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_SUPPORTS_ZBOOT
+	select LOONGSON_MC146818
+	select ZONE_DMA32
+	select NUMA
 	help
 	  This enables the support of Loongson-2/3 family of machines.
 
-	  Loongson-2 is a family of single-core CPUs and Loongson-3 is a
-	  family of multi-core CPUs. They are both 64-bit general-purpose
-	  MIPS-compatible CPUs. Loongson-2/3 are developed by the Institute
-	  of Computing Technology (ICT), Chinese Academy of Sciences (CAS)
-	  in the People's Republic of China. The chief architect is Professor
-	  Weiwu Hu.
+	  Loongson-2 and Loongson-3 are 64-bit general-purpose processors with
+	  GS264/GS464/GS464E/GS464V microarchitecture (except old Loongson-2E
+	  and Loongson-2F which will be removed), developed by the Institute
+	  of Computing Technology (ICT), Chinese Academy of Sciences (CAS).
 
 config MACH_PISTACHIO
 	bool "IMG Pistachio SoC based boards"
@@ -631,6 +667,8 @@ config RALINK
 
 config SGI_IP22
 	bool "SGI IP22 (Indy/Indigo2)"
+	select ARC_MEMORY
+	select ARC_PROMLIB
 	select FW_ARC
 	select FW_ARC32
 	select ARCH_MIGHT_HAVE_PC_SERIO
@@ -654,14 +692,7 @@ config SGI_IP22
 	select SWAP_IO_SPACE
 	select SYS_HAS_CPU_R4X00
 	select SYS_HAS_CPU_R5000
-	#
-	# Disable EARLY_PRINTK for now since it leads to overwritten prom
-	# memory during early boot on some machines.
-	#
-	# See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com
-	# for a more details discussion
-	#
-	# select SYS_HAS_EARLY_PRINTK
+	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
@@ -674,8 +705,10 @@ config SGI_IP22
 config SGI_IP27
 	bool "SGI IP27 (Origin200/2000)"
 	select ARCH_HAS_PHYS_TO_DMA
+	select ARCH_SPARSEMEM_ENABLE
 	select FW_ARC
 	select FW_ARC64
+	select ARC_CMDLINE_ONLY
 	select BOOT_ELF64
 	select DEFAULT_SGI_PARTITION
 	select SYS_HAS_EARLY_PRINTK
@@ -698,6 +731,8 @@ config SGI_IP27
 
 config SGI_IP28
 	bool "SGI IP28 (Indigo2 R10k)"
+	select ARC_MEMORY
+	select ARC_PROMLIB
 	select FW_ARC
 	select FW_ARC64
 	select ARCH_MIGHT_HAVE_PC_SERIO
@@ -719,14 +754,7 @@ config SGI_IP28
 	select SGI_HAS_ZILOG
 	select SWAP_IO_SPACE
 	select SYS_HAS_CPU_R10000
-	#
-	# Disable EARLY_PRINTK for now since it leads to overwritten prom
-	# memory during early boot on some machines.
-	#
-	# See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com
-	# for a more details discussion
-	#
-	# select SYS_HAS_EARLY_PRINTK
+	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select MIPS_L1_CACHE_SHIFT_7
@@ -734,8 +762,37 @@ config SGI_IP28
 	  This is the SGI Indigo2 with R10000 processor.  To compile a Linux
 	  kernel that runs on these, say Y here.
 
+config SGI_IP30
+	bool "SGI IP30 (Octane/Octane2)"
+	select ARCH_HAS_PHYS_TO_DMA
+	select FW_ARC
+	select FW_ARC64
+	select BOOT_ELF64
+	select CEVT_R4K
+	select CSRC_R4K
+	select SYNC_R4K if SMP
+	select ZONE_DMA32
+	select HAVE_PCI
+	select IRQ_MIPS_CPU
+	select IRQ_DOMAIN_HIERARCHY
+	select NR_CPUS_DEFAULT_2
+	select PCI_DRIVERS_GENERIC
+	select PCI_XTALK_BRIDGE
+	select SYS_HAS_EARLY_PRINTK
+	select SYS_HAS_CPU_R10000
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_SMP
+	select MIPS_L1_CACHE_SHIFT_7
+	select ARC_MEMORY
+	help
+	  These are the SGI Octane and Octane2 graphics workstations.  To
+	  compile a Linux kernel that runs on these, say Y here.
+
 config SGI_IP32
 	bool "SGI IP32 (O2)"
+	select ARC_MEMORY
+	select ARC_PROMLIB
 	select ARCH_HAS_PHYS_TO_DMA
 	select FW_ARC
 	select FW_ARC32
@@ -843,6 +900,8 @@ config SIBYTE_BIGSUR
 
 config SNI_RM
 	bool "SNI RM200/300/400"
+	select ARC_MEMORY
+	select ARC_PROMLIB
 	select FW_ARC if CPU_LITTLE_ENDIAN
 	select FW_ARC32 if CPU_LITTLE_ENDIAN
 	select FW_SNIPROM if CPU_BIG_ENDIAN
@@ -1038,6 +1097,7 @@ source "arch/mips/sibyte/Kconfig"
 source "arch/mips/txx9/Kconfig"
 source "arch/mips/vr41xx/Kconfig"
 source "arch/mips/cavium-octeon/Kconfig"
+source "arch/mips/loongson2ef/Kconfig"
 source "arch/mips/loongson32/Kconfig"
 source "arch/mips/loongson64/Kconfig"
 source "arch/mips/netlogic/Kconfig"
@@ -1134,9 +1194,9 @@ config DMA_NONCOHERENT
 	select ARCH_HAS_DMA_WRITE_COMBINE
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_HAS_UNCACHED_SEGMENT
-	select NEED_DMA_MAP_STATE
-	select ARCH_HAS_DMA_COHERENT_TO_PFN
+	select DMA_NONCOHERENT_MMAP
 	select DMA_NONCOHERENT_CACHE_SYNC
+	select NEED_DMA_MAP_STATE
 
 config SYS_HAS_EARLY_PRINTK
 	bool
@@ -1353,19 +1413,18 @@ config MIPS_L1_CACHE_SHIFT
 config HAVE_STD_PC_SERIAL_PORT
 	bool
 
+config ARC_CMDLINE_ONLY
+	bool
+
 config ARC_CONSOLE
 	bool "ARC console support"
 	depends on SGI_IP22 || SGI_IP28 || (SNI_RM && CPU_LITTLE_ENDIAN)
 
 config ARC_MEMORY
 	bool
-	depends on MACH_JAZZ || SNI_RM || SGI_IP32
-	default y
 
 config ARC_PROMLIB
 	bool
-	depends on MACH_JAZZ || SNI_RM || SGI_IP22 || SGI_IP28 || SGI_IP32
-	default y
 
 config FW_ARC64
 	bool
@@ -1379,51 +1438,56 @@ choice
 	prompt "CPU type"
 	default CPU_R4X00
 
-config CPU_LOONGSON3
-	bool "Loongson 3 CPU"
-	depends on SYS_HAS_CPU_LOONGSON3
+config CPU_LOONGSON64
+	bool "Loongson 64-bit CPU"
+	depends on SYS_HAS_CPU_LOONGSON64
 	select ARCH_HAS_PHYS_TO_DMA
 	select CPU_SUPPORTS_64BIT_KERNEL
 	select CPU_SUPPORTS_HIGHMEM
 	select CPU_SUPPORTS_HUGEPAGES
+	select CPU_SUPPORTS_MSA
 	select CPU_HAS_LOAD_STORE_LR
 	select WEAK_ORDERING
 	select WEAK_REORDERING_BEYOND_LLSC
+	select MIPS_ASID_BITS_VARIABLE
 	select MIPS_PGD_C0_CONTEXT
 	select MIPS_L1_CACHE_SHIFT_6
 	select GPIOLIB
 	select SWIOTLB
 	help
-		The Loongson 3 processor implements the MIPS64R2 instruction
-		set with many extensions.
+		The Loongson GSx64(GS264/GS464/GS464E/GS464V) series of processor
+		cores implements the MIPS64R2 instruction set with many extensions,
+		including most 64-bit Loongson-2 (2H, 2K) and Loongson-3 (3A1000,
+		3B1000, 3B1500, 3A2000, 3A3000 and 3A4000) processors. However, old
+		Loongson-2E/2F is not covered here and will be removed in future.
 
 config LOONGSON3_ENHANCEMENT
-	bool "New Loongson 3 CPU Enhancements"
+	bool "New Loongson-3 CPU Enhancements"
 	default n
 	select CPU_MIPSR2
 	select CPU_HAS_PREFETCH
-	depends on CPU_LOONGSON3
+	depends on CPU_LOONGSON64
 	help
-	  New Loongson 3 CPU (since Loongson-3A R2, as opposed to Loongson-3A
+	  New Loongson-3 cores (since Loongson-3A R2, as opposed to Loongson-3A
 	  R1, Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as
-	  FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPv2 ASE, User
+	  FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPr2 ASE, User
 	  Local register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer),
 	  Fast TLB refill support, etc.
 
 	  This option enable those enhancements which are not probed at run
 	  time. If you want a generic kernel to run on all Loongson 3 machines,
 	  please say 'N' here. If you want a high-performance kernel to run on
-	  new Loongson 3 machines only, please say 'Y' here.
+	  new Loongson-3 machines only, please say 'Y' here.
 
 config CPU_LOONGSON3_WORKAROUNDS
-	bool "Old Loongson 3 LLSC Workarounds"
+	bool "Old Loongson-3 LLSC Workarounds"
 	default y if SMP
-	depends on CPU_LOONGSON3
+	depends on CPU_LOONGSON64
 	help
-	  Loongson 3 processors have the llsc issues which require workarounds.
+	  Loongson-3 processors have the llsc issues which require workarounds.
 	  Without workarounds the system may hang unexpectedly.
 
-	  Newer Loongson 3 will fix these issues and no workarounds are needed.
+	  Newer Loongson-3 will fix these issues and no workarounds are needed.
 	  The workarounds have no significant side effect on them but may
 	  decrease the performance of the system so this option should be
 	  disabled unless the kernel is intended to be run on old systems.
@@ -1433,7 +1497,7 @@ config CPU_LOONGSON3_WORKAROUNDS
 config CPU_LOONGSON2E
 	bool "Loongson 2E"
 	depends on SYS_HAS_CPU_LOONGSON2E
-	select CPU_LOONGSON2
+	select CPU_LOONGSON2EF
 	help
 	  The Loongson 2E processor implements the MIPS III instruction set
 	  with many extensions.
@@ -1444,7 +1508,7 @@ config CPU_LOONGSON2E
 config CPU_LOONGSON2F
 	bool "Loongson 2F"
 	depends on SYS_HAS_CPU_LOONGSON2F
-	select CPU_LOONGSON2
+	select CPU_LOONGSON2EF
 	select GPIOLIB
 	help
 	  The Loongson 2F processor implements the MIPS III instruction set
@@ -1457,7 +1521,7 @@ config CPU_LOONGSON2F
 config CPU_LOONGSON1B
 	bool "Loongson 1B"
 	depends on SYS_HAS_CPU_LOONGSON1B
-	select CPU_LOONGSON1
+	select CPU_LOONGSON32
 	select LEDS_GPIO_REGISTER
 	help
 	  The Loongson 1B is a 32-bit SoC, which implements the MIPS32
@@ -1467,7 +1531,7 @@ config CPU_LOONGSON1B
 config CPU_LOONGSON1C
 	bool "Loongson 1C"
 	depends on SYS_HAS_CPU_LOONGSON1C
-	select CPU_LOONGSON1
+	select CPU_LOONGSON32
 	select LEDS_GPIO_REGISTER
 	help
 	  The Loongson 1C is a 32-bit SoC, which implements the MIPS32
@@ -1857,7 +1921,7 @@ config SYS_SUPPORTS_ZBOOT_UART_PROM
 	bool
 	select SYS_SUPPORTS_ZBOOT
 
-config CPU_LOONGSON2
+config CPU_LOONGSON2EF
 	bool
 	select CPU_SUPPORTS_32BIT_KERNEL
 	select CPU_SUPPORTS_64BIT_KERNEL
@@ -1866,7 +1930,7 @@ config CPU_LOONGSON2
 	select ARCH_HAS_PHYS_TO_DMA
 	select CPU_HAS_LOAD_STORE_LR
 
-config CPU_LOONGSON1
+config CPU_LOONGSON32
 	bool
 	select CPU_MIPS32
 	select CPU_MIPSR2
@@ -1900,7 +1964,7 @@ config CPU_BMIPS5000
 	select SYS_SUPPORTS_HOTPLUG_CPU
 	select CPU_HAS_RIXI
 
-config SYS_HAS_CPU_LOONGSON3
+config SYS_HAS_CPU_LOONGSON64
 	bool
 	select CPU_SUPPORTS_CPUFREQ
 	select CPU_HAS_RIXI
@@ -1912,7 +1976,6 @@ config SYS_HAS_CPU_LOONGSON2F
 	bool
 	select CPU_SUPPORTS_CPUFREQ
 	select CPU_SUPPORTS_ADDRWINCFG if 64BIT
-	select CPU_SUPPORTS_UNCACHED_ACCELERATED
 
 config SYS_HAS_CPU_LOONGSON1B
 	bool
@@ -2089,8 +2152,6 @@ config CPU_SUPPORTS_ADDRWINCFG
 config CPU_SUPPORTS_HUGEPAGES
 	bool
 	depends on !(32BIT && (ARCH_PHYS_ADDR_T_64BIT || EVA))
-config CPU_SUPPORTS_UNCACHED_ACCELERATED
-	bool
 config MIPS_PGD_C0_CONTEXT
 	bool
 	default y if 64BIT && (CPU_MIPSR2 || CPU_MIPSR6) && !CPU_XLP
@@ -2162,7 +2223,7 @@ choice
 
 config PAGE_SIZE_4KB
 	bool "4kB"
-	depends on !CPU_LOONGSON2 && !CPU_LOONGSON3
+	depends on !CPU_LOONGSON2EF && !CPU_LOONGSON64
 	help
 	  This option select the standard 4kB Linux page size.  On some
 	  R3000-family processors this is the only available page size.  Using
@@ -2554,6 +2615,10 @@ config CPU_R4000_WORKAROUNDS
 config CPU_R4400_WORKAROUNDS
 	bool
 
+config CPU_R4X00_BUGS64
+	bool
+	default y if SYS_HAS_CPU_R4X00 && 64BIT && (TARGET_ISA_REV < 1)
+
 config MIPS_ASID_SHIFT
 	int
 	default 6 if CPU_R3000 || CPU_TX39XX
@@ -2612,20 +2677,11 @@ config CPU_SUPPORTS_MSA
 
 config ARCH_FLATMEM_ENABLE
 	def_bool y
-	depends on !NUMA && !CPU_LOONGSON2
-
-config ARCH_DISCONTIGMEM_ENABLE
-	bool
-	default y if SGI_IP27
-	help
-	  Say Y to support efficient handling of discontiguous physical memory,
-	  for architectures which are either NUMA (Non-Uniform Memory Access)
-	  or have huge holes in the physical address space for other reasons.
-	  See <file:Documentation/vm/numa.rst> for more.
+	depends on !NUMA && !CPU_LOONGSON2EF
 
 config ARCH_SPARSEMEM_ENABLE
 	bool
-	select SPARSEMEM_STATIC
+	select SPARSEMEM_STATIC if !SGI_IP27
 
 config NUMA
 	bool "NUMA Support"
@@ -2702,7 +2758,7 @@ config NODES_SHIFT
 
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
-	depends on PERF_EVENTS && !OPROFILE && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP || CPU_LOONGSON3)
+	depends on PERF_EVENTS && !OPROFILE && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP || CPU_LOONGSON64)
 	default y
 	help
 	  Enable hardware performance counter support for perf events. If
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 0c86b2a2adfc..93a2974d2ab7 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -32,7 +32,6 @@ config USE_GENERIC_EARLY_PRINTK_8250
 
 config CMDLINE_BOOL
 	bool "Built-in kernel command line"
-	default n
 	help
 	  For most systems, it is firmware or second stage bootloader that
 	  by default specifies the kernel command line options.  However,
@@ -53,7 +52,6 @@ config CMDLINE_BOOL
 config CMDLINE
 	string "Default kernel command string"
 	depends on CMDLINE_BOOL
-	default ""
 	help
 	  On some platforms, there is currently no way for the boot loader to
 	  pass arguments to the kernel.  For these platforms, and for the cases
@@ -68,7 +66,6 @@ config CMDLINE
 
 config CMDLINE_OVERRIDE
 	bool "Built-in command line overrides firmware arguments"
-	default n
 	depends on CMDLINE_BOOL
 	help
 	  By setting this option to 'Y' you will have your kernel ignore
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index cdc09b71febe..e1c44aed8156 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -14,6 +14,9 @@
 
 archscripts: scripts_basic
 	$(Q)$(MAKE) $(build)=arch/mips/tools elf-entry
+ifeq ($(CONFIG_CPU_LOONGSON3_WORKAROUNDS),y)
+	$(Q)$(MAKE) $(build)=arch/mips/tools loongson3-llsc-check
+endif
 	$(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs
 
 KBUILD_DEFCONFIG := 32r2el_defconfig
@@ -323,7 +326,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/mips/math-emu/
 # See arch/mips/Kbuild for content of core part of the kernel
 core-y += arch/mips/
 
-drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
+drivers-y			+= arch/mips/crypto/
 drivers-$(CONFIG_OPROFILE)	+= arch/mips/oprofile/
 
 # suspend and hibernation support
diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink
index 4eea4188cb20..f03fdc95143e 100644
--- a/arch/mips/Makefile.postlink
+++ b/arch/mips/Makefile.postlink
@@ -3,7 +3,8 @@
 # Post-link MIPS pass
 # ===========================================================================
 #
-# 1. Insert relocations into vmlinux
+# 1. Check that Loongson3 LL/SC workarounds are applied correctly
+# 2. Insert relocations into vmlinux
 
 PHONY := __archpost
 __archpost:
@@ -11,6 +12,10 @@ __archpost:
 -include include/config/auto.conf
 include scripts/Kbuild.include
 
+CMD_LS3_LLSC = arch/mips/tools/loongson3-llsc-check
+quiet_cmd_ls3_llsc = LLSCCHK $@
+      cmd_ls3_llsc = $(CMD_LS3_LLSC) $@
+
 CMD_RELOCS = arch/mips/boot/tools/relocs
 quiet_cmd_relocs = RELOCS $@
       cmd_relocs = $(CMD_RELOCS) $@
@@ -19,6 +24,9 @@ quiet_cmd_relocs = RELOCS $@
 
 vmlinux: FORCE
 	@true
+ifeq ($(CONFIG_CPU_LOONGSON3_WORKAROUNDS),y)
+	$(call if_changed,ls3_llsc)
+endif
 ifeq ($(CONFIG_RELOCATABLE),y)
 	$(call if_changed,relocs)
 endif
diff --git a/arch/mips/ar7/clock.c b/arch/mips/ar7/clock.c
index 7de162432d7f..95def949c971 100644
--- a/arch/mips/ar7/clock.c
+++ b/arch/mips/ar7/clock.c
@@ -236,9 +236,9 @@ static void tnetd7300_set_clock(u32 shift, struct tnetd7300_clock *clock,
 
 static void __init tnetd7300_init_clocks(void)
 {
-	u32 *bootcr = (u32 *)ioremap_nocache(AR7_REGS_DCL, 4);
+	u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4);
 	struct tnetd7300_clocks *clocks =
-					ioremap_nocache(UR8_REGS_CLOCKS,
+					ioremap(UR8_REGS_CLOCKS,
 					sizeof(struct tnetd7300_clocks));
 
 	bus_clk.rate = tnetd7300_get_clock(BUS_PLL_SOURCE_SHIFT,
@@ -320,9 +320,9 @@ static int tnetd7200_get_clock_base(int clock_id, u32 *bootcr)
 
 static void __init tnetd7200_init_clocks(void)
 {
-	u32 *bootcr = (u32 *)ioremap_nocache(AR7_REGS_DCL, 4);
+	u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4);
 	struct tnetd7200_clocks *clocks =
-					ioremap_nocache(AR7_REGS_CLOCKS,
+					ioremap(AR7_REGS_CLOCKS,
 					sizeof(struct tnetd7200_clocks));
 	int cpu_base, cpu_mul, cpu_prediv, cpu_postdiv;
 	int dsp_base, dsp_mul, dsp_prediv, dsp_postdiv;
diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c
index 2292e55c12e2..8b006addd6ba 100644
--- a/arch/mips/ar7/gpio.c
+++ b/arch/mips/ar7/gpio.c
@@ -308,7 +308,7 @@ int __init ar7_gpio_init(void)
 		size = 0x1f;
 	}
 
-	gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size);
+	gpch->regs = ioremap(AR7_REGS_GPIO, size);
 	if (!gpch->regs) {
 		printk(KERN_ERR "%s: failed to ioremap regs\n",
 					gpch->chip.label);
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index 1f2028266493..215149a85d83 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -702,7 +702,7 @@ static int __init ar7_register_devices(void)
 		pr_warn("unable to register usb slave: %d\n", res);
 
 	/* Register watchdog only if enabled in hardware */
-	bootcr = ioremap_nocache(AR7_REGS_DCL, 4);
+	bootcr = ioremap(AR7_REGS_DCL, 4);
 	val = readl(bootcr);
 	iounmap(bootcr);
 	if (val & AR7_WDT_HW_ENA) {
diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c
index 8da996142d6a..24f619199ee7 100644
--- a/arch/mips/ath25/ar2315.c
+++ b/arch/mips/ath25/ar2315.c
@@ -262,7 +262,7 @@ void __init ar2315_plat_mem_setup(void)
 	u32 config;
 
 	/* Detect memory size */
-	sdram_base = ioremap_nocache(AR2315_SDRAMCTL_BASE,
+	sdram_base = ioremap(AR2315_SDRAMCTL_BASE,
 				     AR2315_SDRAMCTL_SIZE);
 	memcfg = __raw_readl(sdram_base + AR2315_MEM_CFG);
 	memsize   = 1 + ATH25_REG_MS(memcfg, AR2315_MEM_CFG_DATA_WIDTH);
@@ -272,7 +272,7 @@ void __init ar2315_plat_mem_setup(void)
 	add_memory_region(0, memsize, BOOT_MEM_RAM);
 	iounmap(sdram_base);
 
-	ar2315_rst_base = ioremap_nocache(AR2315_RST_BASE, AR2315_RST_SIZE);
+	ar2315_rst_base = ioremap(AR2315_RST_BASE, AR2315_RST_SIZE);
 
 	/* Detect the hardware based on the device ID */
 	devid = ar2315_rst_reg_read(AR2315_SREV) & AR2315_REV_CHIP;
diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c
index acd55a9cffe3..47f3e98974fc 100644
--- a/arch/mips/ath25/ar5312.c
+++ b/arch/mips/ath25/ar5312.c
@@ -185,7 +185,7 @@ static void __init ar5312_flash_init(void)
 	void __iomem *flashctl_base;
 	u32 ctl;
 
-	flashctl_base = ioremap_nocache(AR5312_FLASHCTL_BASE,
+	flashctl_base = ioremap(AR5312_FLASHCTL_BASE,
 					AR5312_FLASHCTL_SIZE);
 
 	ctl = __raw_readl(flashctl_base + AR5312_FLASHCTL0);
@@ -358,7 +358,7 @@ void __init ar5312_plat_mem_setup(void)
 	u32 devid;
 
 	/* Detect memory size */
-	sdram_base = ioremap_nocache(AR5312_SDRAMCTL_BASE,
+	sdram_base = ioremap(AR5312_SDRAMCTL_BASE,
 				     AR5312_SDRAMCTL_SIZE);
 	memcfg = __raw_readl(sdram_base + AR5312_MEM_CFG1);
 	bank0_ac = ATH25_REG_MS(memcfg, AR5312_MEM_CFG1_AC0);
@@ -369,7 +369,7 @@ void __init ar5312_plat_mem_setup(void)
 	add_memory_region(0, memsize, BOOT_MEM_RAM);
 	iounmap(sdram_base);
 
-	ar5312_rst_base = ioremap_nocache(AR5312_RST_BASE, AR5312_RST_SIZE);
+	ar5312_rst_base = ioremap(AR5312_RST_BASE, AR5312_RST_SIZE);
 
 	devid = ar5312_rst_reg_read(AR5312_REV);
 	devid >>= AR5312_REV_WMAC_MIN_S;
diff --git a/arch/mips/ath25/board.c b/arch/mips/ath25/board.c
index 989e71015ee6..cb99f9739910 100644
--- a/arch/mips/ath25/board.c
+++ b/arch/mips/ath25/board.c
@@ -111,7 +111,7 @@ int __init ath25_find_config(phys_addr_t base, unsigned long size)
 	u8 *mac_addr;
 	u32 offset;
 
-	flash_base = ioremap_nocache(base, size);
+	flash_base = ioremap(base, size);
 	flash_limit = flash_base + size;
 
 	ath25_board.config = NULL;
diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c
index 63eacb8b0eb5..137abbc65c60 100644
--- a/arch/mips/ath79/common.c
+++ b/arch/mips/ath79/common.c
@@ -41,7 +41,7 @@ static void __iomem *ath79_ddr_pci_win_base;
 
 void ath79_ddr_ctrl_init(void)
 {
-	ath79_ddr_base = ioremap_nocache(AR71XX_DDR_CTRL_BASE,
+	ath79_ddr_base = ioremap(AR71XX_DDR_CTRL_BASE,
 					 AR71XX_DDR_CTRL_SIZE);
 	if (soc_is_ar913x() || soc_is_ar724x() || soc_is_ar933x()) {
 		ath79_ddr_wb_flush_base = ath79_ddr_base + 0x7c;
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index ea385a865781..484ee28922a9 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -226,9 +226,9 @@ void __init plat_mem_setup(void)
 	else if (fw_passed_dtb)
 		__dt_setup_arch((void *)KSEG0ADDR(fw_passed_dtb));
 
-	ath79_reset_base = ioremap_nocache(AR71XX_RESET_BASE,
+	ath79_reset_base = ioremap(AR71XX_RESET_BASE,
 					   AR71XX_RESET_SIZE);
-	ath79_pll_base = ioremap_nocache(AR71XX_PLL_BASE,
+	ath79_pll_base = ioremap(AR71XX_PLL_BASE,
 					 AR71XX_PLL_SIZE);
 	ath79_detect_sys_type();
 	ath79_ddr_ctrl_init();
diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c
index 3d13c77c125f..df56bf4179e3 100644
--- a/arch/mips/bmips/dma.c
+++ b/arch/mips/bmips/dma.c
@@ -64,7 +64,7 @@ phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
 	return dma_addr;
 }
 
-void arch_sync_dma_for_cpu_all(struct device *dev)
+void arch_sync_dma_for_cpu_all(void)
 {
 	void __iomem *cbr = BMIPS_GET_CBR();
 	u32 cfg;
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 172801ed35b8..d859f079b771 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -29,6 +29,9 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \
 	-DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \
 	-DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS)
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT		:= n
+
 # decompressor objects (linked with vmlinuz)
 vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o
 
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 2e9952311ecd..37b93166bf22 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -25,12 +25,47 @@
 		       0x30000000 0x30000000>;
 	};
 
+	leds {
+		compatible = "gpio-leds";
+
+		led0 {
+			label = "ci20:red:led0";
+			gpios = <&gpc 3 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "none";
+		};
+
+		led1 {
+			label = "ci20:red:led1";
+			gpios = <&gpc 2 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "nand-disk";
+		};
+
+		led2 {
+			label = "ci20:red:led2";
+			gpios = <&gpc 1 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "cpu1";
+		};
+
+		led3 {
+			label = "ci20:red:led3";
+			gpios = <&gpc 0 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "cpu0";
+		};
+	};
+
 	eth0_power: fixedregulator@0 {
 		compatible = "regulator-fixed";
 		regulator-name = "eth0_power";
 		gpio = <&gpb 25 GPIO_ACTIVE_LOW>;
 		enable-active-high;
 	};
+
+	wlan0_power: fixedregulator@1 {
+		compatible = "regulator-fixed";
+		regulator-name = "wlan0_power";
+		gpio = <&gpb 19 GPIO_ACTIVE_LOW>;
+		enable-active-high;
+	};
 };
 
 &ext {
@@ -54,9 +89,18 @@
 
 	bus-width = <4>;
 	max-frequency = <50000000>;
+	non-removable;
 
 	pinctrl-names = "default";
 	pinctrl-0 = <&pins_mmc1>;
+
+	brcmf: wifi@1 {
+/*		reg = <4>;*/
+		compatible = "brcm,bcm4330-fmac";
+		vcc-supply = <&wlan0_power>;
+		device-wakeup-gpios = <&gpd 9 GPIO_ACTIVE_HIGH>;
+		shutdown-gpios = <&gpf 7 GPIO_ACTIVE_LOW>;
+	};
 };
 
 &uart0 {
@@ -73,6 +117,23 @@
 	pinctrl-0 = <&pins_uart1>;
 };
 
+&uart2 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_uart2>;
+	uart-has-rtscts;
+
+	bluetooth {
+		compatible = "brcm,bcm4330-bt";
+		reset-gpios = <&gpf 8 GPIO_ACTIVE_HIGH>;
+		vcc-supply = <&wlan0_power>;
+		device-wakeup-gpios = <&gpf 5 GPIO_ACTIVE_HIGH>;
+		host-wakeup-gpios = <&gpf 6 GPIO_ACTIVE_HIGH>;
+		shutdown-gpios = <&gpf 4 GPIO_ACTIVE_LOW>;
+	};
+};
+
 &uart3 {
 	status = "okay";
 
@@ -87,6 +148,123 @@
 	pinctrl-0 = <&pins_uart4>;
 };
 
+&i2c0 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_i2c0>;
+
+	clock-frequency = <400000>;
+
+	act8600: act8600@5a {
+		compatible = "active-semi,act8600";
+		reg = <0x5a>;
+		status = "okay";
+
+		regulators {
+			vddcore: SUDCDC1 {
+				regulator-name = "VDDCORE";
+				regulator-min-microvolt = <1100000>;
+				regulator-max-microvolt = <1100000>;
+				regulator-always-on;
+			};
+			vddmem: SUDCDC2 {
+				regulator-name = "VDDMEM";
+				regulator-min-microvolt = <1500000>;
+				regulator-max-microvolt = <1500000>;
+				regulator-always-on;
+			};
+			vcc_33: SUDCDC3 {
+				regulator-name = "VCC33";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+			};
+			vcc_50: SUDCDC4 {
+				regulator-name = "VCC50";
+				regulator-min-microvolt = <5000000>;
+				regulator-max-microvolt = <5000000>;
+				regulator-always-on;
+			};
+			vcc_25: LDO_REG5 {
+				regulator-name = "VCC25";
+				regulator-min-microvolt = <2500000>;
+				regulator-max-microvolt = <2500000>;
+				regulator-always-on;
+			};
+			wifi_io: LDO_REG6 {
+				regulator-name = "WIFIIO";
+				regulator-min-microvolt = <2500000>;
+				regulator-max-microvolt = <2500000>;
+				regulator-always-on;
+			};
+			vcc_28: LDO_REG7 {
+				regulator-name = "VCC28";
+				regulator-min-microvolt = <2800000>;
+				regulator-max-microvolt = <2800000>;
+				regulator-always-on;
+			};
+			vcc_15: LDO_REG8 {
+				regulator-name = "VCC15";
+				regulator-min-microvolt = <1500000>;
+				regulator-max-microvolt = <1500000>;
+				regulator-always-on;
+			};
+			vcc_18: LDO_REG9 {
+				regulator-name = "VCC18";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+			};
+			vcc_11: LDO_REG10 {
+				regulator-name = "VCC11";
+				regulator-min-microvolt = <1100000>;
+				regulator-max-microvolt = <1100000>;
+				regulator-always-on;
+			};
+		};
+	};
+};
+
+&i2c1 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_i2c1>;
+
+};
+
+&i2c2 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_i2c2>;
+
+};
+
+&i2c3 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_i2c3>;
+
+};
+
+&i2c4 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_i2c4>;
+
+	clock-frequency = <400000>;
+
+		rtc@51 {
+			compatible = "nxp,pcf8563";
+			reg = <0x51>;
+			interrupts = <110>;
+		};
+};
+
 &nemc {
 	status = "okay";
 
@@ -197,6 +375,12 @@
 		bias-disable;
 	};
 
+	pins_uart2: uart2 {
+		function = "uart2";
+		groups = "uart2-data", "uart2-hwflow";
+		bias-disable;
+	};
+
 	pins_uart3: uart3 {
 		function = "uart3";
 		groups = "uart3-data", "uart3-hwflow";
@@ -209,6 +393,36 @@
 		bias-disable;
 	};
 
+	pins_i2c0: i2c0 {
+		function = "i2c0";
+		groups = "i2c0-data";
+		bias-disable;
+	};
+
+	pins_i2c1: i2c1 {
+		function = "i2c1";
+		groups = "i2c1-data";
+		bias-disable;
+	};
+
+	pins_i2c2: i2c2 {
+		function = "i2c2";
+		groups = "i2c2-data";
+		bias-disable;
+	};
+
+	pins_i2c3: i2c3 {
+		function = "i2c3";
+		groups = "i2c3-data";
+		bias-disable;
+	};
+
+	pins_i2c4: i2c4 {
+		function = "i2c4";
+		groups = "i2c4-data-e";
+		bias-disable;
+	};
+
 	pins_nemc: nemc {
 		function = "nemc";
 		groups = "nemc-data", "nemc-cle-ale", "nemc-rd-we", "nemc-frd-fwe";
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index c54bd7cfec55..f928329b034b 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -262,6 +262,92 @@
 		status = "disabled";
 	};
 
+	i2c0: i2c@10050000 {
+		compatible = "ingenic,jz4780-i2c";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		reg = <0x10050000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <60>;
+
+		clocks = <&cgu JZ4780_CLK_SMB0>;
+		clock-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_i2c0_data>;
+
+		status = "disabled";
+	};
+
+	i2c1: i2c@10051000 {
+		compatible = "ingenic,jz4780-i2c";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x10051000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <59>;
+
+		clocks = <&cgu JZ4780_CLK_SMB1>;
+		clock-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_i2c1_data>;
+
+		status = "disabled";
+	};
+
+	i2c2: i2c@10052000 {
+		compatible = "ingenic,jz4780-i2c";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x10052000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <58>;
+
+		clocks = <&cgu JZ4780_CLK_SMB2>;
+		clock-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_i2c2_data>;
+
+		status = "disabled";
+	};
+
+	i2c3: i2c@10053000 {
+		compatible = "ingenic,jz4780-i2c";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x10053000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <57>;
+
+		clocks = <&cgu JZ4780_CLK_SMB3>;
+		clock-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_i2c3_data>;
+
+		status = "disabled";
+	};
+
+	i2c4: i2c@10054000 {
+		compatible = "ingenic,jz4780-i2c";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x10054000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <56>;
+
+		clocks = <&cgu JZ4780_CLK_SMB4>;
+		clock-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_i2c4_data>;
+
+		status = "disabled";
+	};
+
 	watchdog: watchdog@10002000 {
 		compatible = "ingenic,jz4780-watchdog";
 		reg = <0x10002000 0x10>;
diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi
index 5cfc9d347826..8f5aed760abb 100644
--- a/arch/mips/boot/dts/qca/ar9331.dtsi
+++ b/arch/mips/boot/dts/qca/ar9331.dtsi
@@ -126,6 +126,9 @@
 			clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
 			clock-names = "eth", "mdio";
 
+			phy-mode = "mii";
+			phy-handle = <&phy_port4>;
+
 			status = "disabled";
 		};
 
@@ -133,13 +136,127 @@
 			compatible = "qca,ar9330-eth";
 			reg = <0x1a000000 0x200>;
 			interrupts = <5>;
-
 			resets = <&rst 13>, <&rst 23>;
 			reset-names = "mac", "mdio";
 			clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
 			clock-names = "eth", "mdio";
 
+			phy-mode = "gmii";
+
 			status = "disabled";
+
+			fixed-link {
+				speed = <1000>;
+				full-duplex;
+			};
+
+			mdio {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				switch10: switch@10 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					compatible = "qca,ar9331-switch";
+					reg = <0x10>;
+					resets = <&rst 8>;
+					reset-names = "switch";
+
+					interrupt-parent = <&miscintc>;
+					interrupts = <12>;
+
+					interrupt-controller;
+					#interrupt-cells = <1>;
+
+					ports {
+						#address-cells = <1>;
+						#size-cells = <0>;
+
+						switch_port0: port@0 {
+							reg = <0x0>;
+							label = "cpu";
+							ethernet = <&eth1>;
+
+							phy-mode = "gmii";
+
+							fixed-link {
+								speed = <1000>;
+								full-duplex;
+							};
+						};
+
+						switch_port1: port@1 {
+							reg = <0x1>;
+							phy-handle = <&phy_port0>;
+							phy-mode = "internal";
+
+							status = "disabled";
+						};
+
+						switch_port2: port@2 {
+							reg = <0x2>;
+							phy-handle = <&phy_port1>;
+							phy-mode = "internal";
+
+							status = "disabled";
+						};
+
+						switch_port3: port@3 {
+							reg = <0x3>;
+							phy-handle = <&phy_port2>;
+							phy-mode = "internal";
+
+							status = "disabled";
+						};
+
+						switch_port4: port@4 {
+							reg = <0x4>;
+							phy-handle = <&phy_port3>;
+							phy-mode = "internal";
+
+							status = "disabled";
+						};
+					};
+
+					mdio {
+						#address-cells = <1>;
+						#size-cells = <0>;
+
+						interrupt-parent = <&switch10>;
+
+						phy_port0: phy@0 {
+							reg = <0x0>;
+							interrupts = <0>;
+							status = "disabled";
+						};
+
+						phy_port1: phy@1 {
+							reg = <0x1>;
+							interrupts = <0>;
+							status = "disabled";
+						};
+
+						phy_port2: phy@2 {
+							reg = <0x2>;
+							interrupts = <0>;
+							status = "disabled";
+						};
+
+						phy_port3: phy@3 {
+							reg = <0x3>;
+							interrupts = <0>;
+							status = "disabled";
+						};
+
+						phy_port4: phy@4 {
+							reg = <0x4>;
+							interrupts = <0>;
+							status = "disabled";
+						};
+					};
+				};
+			};
 		};
 
 		usb: usb@1b000100 {
diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
index 77bab823eb3b..0f2b20044834 100644
--- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
+++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
@@ -84,3 +84,16 @@
 &eth1 {
 	status = "okay";
 };
+
+&switch_port1 {
+	label = "lan0";
+	status = "okay";
+};
+
+&phy_port0 {
+	status = "okay";
+};
+
+&phy_port4 {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts
new file mode 100644
index 000000000000..aa5caaa31104
--- /dev/null
+++ b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Stefan Roese <sr@denx.de>
+ */
+
+/dts-v1/;
+
+/include/ "mt7628a.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+	compatible = "gardena,smart-gateway-mt7688", "ralink,mt7688a-soc",
+		     "ralink,mt7628a-soc";
+	model = "GARDENA smart Gateway (MT7688)";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x8000000>;
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinmux_gpio_gpio>;	/* GPIO11 */
+
+		user_btn1 {
+			label = "USER_BTN1";
+			gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
+			linux,code =<KEY_PROG1> ;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinmux_pwm0_gpio>,	/* GPIO18 */
+			    <&pinmux_pwm1_gpio>,	/* GPIO19 */
+			    <&pinmux_sdmode_gpio>,	/* GPIO22..29 */
+			    <&pinmux_p0led_an_gpio>;	/* GPIO43 */
+		/*
+		 * <&pinmux_i2s_gpio> (covers GPIO0..3) is needed here as
+		 * well for GPIO3. But this is already claimed for uart1
+		 * (see below). So we can't include it in this LED node.
+		 */
+
+		power_blue {
+			label = "smartgw:power:blue";
+			gpios = <&gpio 18 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		power_green {
+			label = "smartgw:power:green";
+			gpios = <&gpio 19 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		power_red {
+			label = "smartgw:power:red";
+			gpios = <&gpio 22 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		radio_blue {
+			label = "smartgw:radio:blue";
+			gpios = <&gpio 23 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		radio_green {
+			label = "smartgw:radio:green";
+			gpios = <&gpio 24 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		radio_red {
+			label = "smartgw:radio:red";
+			gpios = <&gpio 25 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		internet_blue {
+			label = "smartgw:internet:blue";
+			gpios = <&gpio 26 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		internet_green {
+			label = "smartgw:internet:green";
+			gpios = <&gpio 27 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		internet_red {
+			label = "smartgw:internet:red";
+			gpios = <&gpio 28 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		ethernet_link {
+			label = "smartgw:eth:link";
+			gpios = <&gpio 3 GPIO_ACTIVE_LOW>;
+			linux,default-trigger = "netdev";
+		};
+
+		ethernet_activity {
+			label = "smartgw:eth:act";
+			gpios = <&gpio 43 GPIO_ACTIVE_LOW>;
+			linux,default-trigger = "netdev";
+		};
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+};
+
+&i2c {
+	status = "okay";
+};
+
+&spi {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinmux_spi_spi>, <&pinmux_spi_cs1_cs>;
+
+	m25p80@0 {
+		compatible = "jedec,spi-nor";
+		reg = <0>;
+		spi-max-frequency = <40000000>;
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			partition@0 {
+				label = "uboot";
+				reg = <0x0 0xa0000>;
+				read-only;
+			};
+
+			partition@a0000 {
+				label = "uboot_env0";
+				reg = <0xa0000 0x10000>;
+			};
+
+			partition@b0000 {
+				label = "uboot_env1";
+				reg = <0xb0000 0x10000>;
+			};
+
+			factory: partition@c0000 {
+				label = "factory";
+				reg = <0xc0000 0x10000>;
+				read-only;
+			};
+		};
+	};
+
+	nand_flash@1 {
+		compatible = "spi-nand";
+		linux,mtd-name = "gd5f";
+		reg = <1>;
+		spi-max-frequency = <40000000>;
+	};
+};
+
+&uart1 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinmux_i2s_gpio>;		/* GPIO0..3 */
+
+	rts-gpios = <&gpio 1 GPIO_ACTIVE_LOW>;
+	cts-gpios = <&gpio 2 GPIO_ACTIVE_LOW>;
+};
+
+&uart2 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinmux_p2led_an_gpio>,	/* GPIO41 */
+		    <&pinmux_p3led_an_gpio>;	/* GPIO40 */
+
+	rts-gpios = <&gpio 40 GPIO_ACTIVE_LOW>;
+	cts-gpios = <&gpio 41 GPIO_ACTIVE_LOW>;
+};
+
+&watchdog {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/ralink/mt7628a.dtsi b/arch/mips/boot/dts/ralink/mt7628a.dtsi
index 61f8621e88b3..742bcc1dc2e0 100644
--- a/arch/mips/boot/dts/ralink/mt7628a.dtsi
+++ b/arch/mips/boot/dts/ralink/mt7628a.dtsi
@@ -199,6 +199,22 @@
 			status = "disabled";
 		};
 
+		i2c: i2c@900 {
+			compatible = "mediatek,mt7621-i2c";
+			reg = <0x900 0x100>;
+
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinmux_i2c_i2c>;
+
+			resets = <&resetc 16>;
+			reset-names = "i2c";
+
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			status = "disabled";
+		};
+
 		uart0: uartlite@c00 {
 			compatible = "ns16550a";
 			reg = <0xc00 0x100>;
diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
index ba8f82a29a81..e794b2d53adf 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
@@ -45,13 +45,6 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc;
 /* See header file for descriptions of functions */
 
 /**
- * This macro returns the size of a member of a structure.
- * Logically it is the same as "sizeof(s::field)" in C++, but
- * C lacks the "::" operator.
- */
-#define SIZEOF_FIELD(s, field) sizeof(((s *)NULL)->field)
-
-/**
  * This macro returns a member of the
  * cvmx_bootmem_named_block_desc_t structure. These members can't
  * be directly addressed as they might be in memory not directly
@@ -65,7 +58,7 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc;
 #define CVMX_BOOTMEM_NAMED_GET_FIELD(addr, field)			\
 	__cvmx_bootmem_desc_get(addr,					\
 		offsetof(struct cvmx_bootmem_named_block_desc, field),	\
-		SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field))
+		sizeof_field(struct cvmx_bootmem_named_block_desc, field))
 
 /**
  * This function is the implementation of the get macros defined
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 95034bf5ca83..4f34d92b52f9 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -357,7 +357,7 @@ static void octeon_write_lcd(const char *s)
 {
 	if (octeon_bootinfo->led_display_base_addr) {
 		void __iomem *lcd_address =
-			ioremap_nocache(octeon_bootinfo->led_display_base_addr,
+			ioremap(octeon_bootinfo->led_display_base_addr,
 					8);
 		int i;
 		for (i = 0; i < 8; i++, s++) {
@@ -844,7 +844,7 @@ void __init prom_init(void)
 	 * BIST should always be enabled when doing a soft reset. L2
 	 * Cache locking for instance is not cleared unless BIST is
 	 * enabled.  Unfortunately due to a chip errata G-200 for
-	 * Cn38XX and CN31XX, BIST msut be disabled on these parts.
+	 * Cn38XX and CN31XX, BIST must be disabled on these parts.
 	 */
 	if (OCTEON_IS_MODEL(OCTEON_CN38XX_PASS2) ||
 	    OCTEON_IS_MODEL(OCTEON_CN31XX))
diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig
index cb4aa23a2bf4..be41df2a81fb 100644
--- a/arch/mips/configs/ci20_defconfig
+++ b/arch/mips/configs/ci20_defconfig
@@ -17,7 +17,6 @@ CONFIG_CGROUP_CPUACCT=y
 CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 # CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index 7a7af706e898..1788ae23bff9 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -15,7 +15,7 @@ CONFIG_EXPERT=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
-CONFIG_MACH_LOONGSON64=y
+CONFIG_MACH_LOONGSON2EF=y
 CONFIG_PCI=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index d44f1469cf64..f9f93427c9bd 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -12,7 +12,7 @@ CONFIG_LOG_BUF_SHIFT=15
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 CONFIG_PROFILING=y
-CONFIG_MACH_LOONGSON64=y
+CONFIG_MACH_LOONGSON2EF=y
 CONFIG_LEMOTE_MACH2F=y
 CONFIG_KEXEC=y
 # CONFIG_SECCOMP is not set
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 90ee0084d786..360c6b2d397a 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -12,7 +12,6 @@ CONFIG_TASKSTATS=y
 CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_LOG_BUF_SHIFT=14
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
@@ -21,10 +20,8 @@ CONFIG_SCHED_AUTOGROUP=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 CONFIG_MACH_LOONGSON64=y
-CONFIG_LOONGSON_MACH3X=y
 CONFIG_SMP=y
 CONFIG_HZ_256=y
 CONFIG_KEXEC=y
diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig
index e6c600dc1814..614af02d83e6 100644
--- a/arch/mips/configs/malta_qemu_32r6_defconfig
+++ b/arch/mips/configs/malta_qemu_32r6_defconfig
@@ -5,7 +5,6 @@ CONFIG_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
 CONFIG_MIPS_MALTA=y
diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig
index 82b44b774553..9c051f8fd330 100644
--- a/arch/mips/configs/maltaaprp_defconfig
+++ b/arch/mips/configs/maltaaprp_defconfig
@@ -5,7 +5,6 @@ CONFIG_AUDIT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
 CONFIG_MIPS_MALTA=y
diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig
index 4190fc6189a0..2e90d97551d6 100644
--- a/arch/mips/configs/maltasmvp_defconfig
+++ b/arch/mips/configs/maltasmvp_defconfig
@@ -5,7 +5,6 @@ CONFIG_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
 CONFIG_MIPS_MALTA=y
diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig
index a13c10e910ec..d1f7fdb27284 100644
--- a/arch/mips/configs/maltasmvp_eva_defconfig
+++ b/arch/mips/configs/maltasmvp_eva_defconfig
@@ -5,7 +5,6 @@ CONFIG_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
 CONFIG_MIPS_MALTA=y
diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig
index b35f1fc690fb..48e5bd492452 100644
--- a/arch/mips/configs/maltaup_defconfig
+++ b/arch/mips/configs/maltaup_defconfig
@@ -6,7 +6,6 @@ CONFIG_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
 CONFIG_MIPS_MALTA=y
diff --git a/arch/mips/configs/omega2p_defconfig b/arch/mips/configs/omega2p_defconfig
index a39426e57e91..fc39ddf610a9 100644
--- a/arch/mips/configs/omega2p_defconfig
+++ b/arch/mips/configs/omega2p_defconfig
@@ -16,7 +16,6 @@ CONFIG_CGROUP_CPUACCT=y
 CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 # CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig
index d3f4d5248d9f..97c9a69d1528 100644
--- a/arch/mips/configs/qi_lb60_defconfig
+++ b/arch/mips/configs/qi_lb60_defconfig
@@ -2,7 +2,6 @@
 CONFIG_SYSVIPC=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 # CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/mips/configs/vocore2_defconfig b/arch/mips/configs/vocore2_defconfig
index 523b944fd527..a14f8ea5c386 100644
--- a/arch/mips/configs/vocore2_defconfig
+++ b/arch/mips/configs/vocore2_defconfig
@@ -16,7 +16,6 @@ CONFIG_CGROUP_CPUACCT=y
 CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 # CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
index e07aca572c2e..8e1deaf00e0c 100644
--- a/arch/mips/crypto/Makefile
+++ b/arch/mips/crypto/Makefile
@@ -4,3 +4,21 @@
 #
 
 obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
+
+obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
+chacha-mips-y := chacha-core.o chacha-glue.o
+AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
+
+obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
+poly1305-mips-y := poly1305-core.o poly1305-glue.o
+
+perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
+perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
+
+quiet_cmd_perlasm = PERLASM $@
+      cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
+
+$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE
+	$(call if_changed,perlasm)
+
+targets += poly1305-core.S
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
new file mode 100644
index 000000000000..5755f69cfe00
--- /dev/null
+++ b/arch/mips/crypto/chacha-core.S
@@ -0,0 +1,497 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#define MASK_U32		0x3c
+#define CHACHA20_BLOCK_SIZE	64
+#define STACK_SIZE		32
+
+#define X0	$t0
+#define X1	$t1
+#define X2	$t2
+#define X3	$t3
+#define X4	$t4
+#define X5	$t5
+#define X6	$t6
+#define X7	$t7
+#define X8	$t8
+#define X9	$t9
+#define X10	$v1
+#define X11	$s6
+#define X12	$s5
+#define X13	$s4
+#define X14	$s3
+#define X15	$s2
+/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
+#define T0	$s1
+#define T1	$s0
+#define T(n)	T ## n
+#define X(n)	X ## n
+
+/* Input arguments */
+#define STATE		$a0
+#define OUT		$a1
+#define IN		$a2
+#define BYTES		$a3
+
+/* Output argument */
+/* NONCE[0] is kept in a register and not in memory.
+ * We don't want to touch original value in memory.
+ * Must be incremented every loop iteration.
+ */
+#define NONCE_0		$v0
+
+/* SAVED_X and SAVED_CA are set in the jump table.
+ * Use regs which are overwritten on exit else we don't leak clear data.
+ * They are used to handling the last bytes which are not multiple of 4.
+ */
+#define SAVED_X		X15
+#define SAVED_CA	$s7
+
+#define IS_UNALIGNED	$s7
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define MSB 0
+#define LSB 3
+#define ROTx rotl
+#define ROTR(n) rotr n, 24
+#define	CPU_TO_LE32(n) \
+	wsbh	n; \
+	rotr	n, 16;
+#else
+#define MSB 3
+#define LSB 0
+#define ROTx rotr
+#define CPU_TO_LE32(n)
+#define ROTR(n)
+#endif
+
+#define FOR_EACH_WORD(x) \
+	x( 0); \
+	x( 1); \
+	x( 2); \
+	x( 3); \
+	x( 4); \
+	x( 5); \
+	x( 6); \
+	x( 7); \
+	x( 8); \
+	x( 9); \
+	x(10); \
+	x(11); \
+	x(12); \
+	x(13); \
+	x(14); \
+	x(15);
+
+#define FOR_EACH_WORD_REV(x) \
+	x(15); \
+	x(14); \
+	x(13); \
+	x(12); \
+	x(11); \
+	x(10); \
+	x( 9); \
+	x( 8); \
+	x( 7); \
+	x( 6); \
+	x( 5); \
+	x( 4); \
+	x( 3); \
+	x( 2); \
+	x( 1); \
+	x( 0);
+
+#define PLUS_ONE_0	 1
+#define PLUS_ONE_1	 2
+#define PLUS_ONE_2	 3
+#define PLUS_ONE_3	 4
+#define PLUS_ONE_4	 5
+#define PLUS_ONE_5	 6
+#define PLUS_ONE_6	 7
+#define PLUS_ONE_7	 8
+#define PLUS_ONE_8	 9
+#define PLUS_ONE_9	10
+#define PLUS_ONE_10	11
+#define PLUS_ONE_11	12
+#define PLUS_ONE_12	13
+#define PLUS_ONE_13	14
+#define PLUS_ONE_14	15
+#define PLUS_ONE_15	16
+#define PLUS_ONE(x)	PLUS_ONE_ ## x
+#define _CONCAT3(a,b,c)	a ## b ## c
+#define CONCAT3(a,b,c)	_CONCAT3(a,b,c)
+
+#define STORE_UNALIGNED(x) \
+CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
+	.if (x != 12); \
+		lw	T0, (x*4)(STATE); \
+	.endif; \
+	lwl	T1, (x*4)+MSB ## (IN); \
+	lwr	T1, (x*4)+LSB ## (IN); \
+	.if (x == 12); \
+		addu	X ## x, NONCE_0; \
+	.else; \
+		addu	X ## x, T0; \
+	.endif; \
+	CPU_TO_LE32(X ## x); \
+	xor	X ## x, T1; \
+	swl	X ## x, (x*4)+MSB ## (OUT); \
+	swr	X ## x, (x*4)+LSB ## (OUT);
+
+#define STORE_ALIGNED(x) \
+CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
+	.if (x != 12); \
+		lw	T0, (x*4)(STATE); \
+	.endif; \
+	lw	T1, (x*4) ## (IN); \
+	.if (x == 12); \
+		addu	X ## x, NONCE_0; \
+	.else; \
+		addu	X ## x, T0; \
+	.endif; \
+	CPU_TO_LE32(X ## x); \
+	xor	X ## x, T1; \
+	sw	X ## x, (x*4) ## (OUT);
+
+/* Jump table macro.
+ * Used for setup and handling the last bytes, which are not multiple of 4.
+ * X15 is free to store Xn
+ * Every jumptable entry must be equal in size.
+ */
+#define JMPTBL_ALIGNED(x) \
+.Lchacha_mips_jmptbl_aligned_ ## x: ; \
+	.set	noreorder; \
+	b	.Lchacha_mips_xor_aligned_ ## x ## _b; \
+	.if (x == 12); \
+		addu	SAVED_X, X ## x, NONCE_0; \
+	.else; \
+		addu	SAVED_X, X ## x, SAVED_CA; \
+	.endif; \
+	.set	reorder
+
+#define JMPTBL_UNALIGNED(x) \
+.Lchacha_mips_jmptbl_unaligned_ ## x: ; \
+	.set	noreorder; \
+	b	.Lchacha_mips_xor_unaligned_ ## x ## _b; \
+	.if (x == 12); \
+		addu	SAVED_X, X ## x, NONCE_0; \
+	.else; \
+		addu	SAVED_X, X ## x, SAVED_CA; \
+	.endif; \
+	.set	reorder
+
+#define AXR(A, B, C, D,  K, L, M, N,  V, W, Y, Z,  S) \
+	addu	X(A), X(K); \
+	addu	X(B), X(L); \
+	addu	X(C), X(M); \
+	addu	X(D), X(N); \
+	xor	X(V), X(A); \
+	xor	X(W), X(B); \
+	xor	X(Y), X(C); \
+	xor	X(Z), X(D); \
+	rotl	X(V), S;    \
+	rotl	X(W), S;    \
+	rotl	X(Y), S;    \
+	rotl	X(Z), S;
+
+.text
+.set	reorder
+.set	noat
+.globl	chacha_crypt_arch
+.ent	chacha_crypt_arch
+chacha_crypt_arch:
+	.frame	$sp, STACK_SIZE, $ra
+
+	/* Load number of rounds */
+	lw	$at, 16($sp)
+
+	addiu	$sp, -STACK_SIZE
+
+	/* Return bytes = 0. */
+	beqz	BYTES, .Lchacha_mips_end
+
+	lw	NONCE_0, 48(STATE)
+
+	/* Save s0-s7 */
+	sw	$s0,  0($sp)
+	sw	$s1,  4($sp)
+	sw	$s2,  8($sp)
+	sw	$s3, 12($sp)
+	sw	$s4, 16($sp)
+	sw	$s5, 20($sp)
+	sw	$s6, 24($sp)
+	sw	$s7, 28($sp)
+
+	/* Test IN or OUT is unaligned.
+	 * IS_UNALIGNED = ( IN | OUT ) & 0x00000003
+	 */
+	or	IS_UNALIGNED, IN, OUT
+	andi	IS_UNALIGNED, 0x3
+
+	b	.Lchacha_rounds_start
+
+.align 4
+.Loop_chacha_rounds:
+	addiu	IN,  CHACHA20_BLOCK_SIZE
+	addiu	OUT, CHACHA20_BLOCK_SIZE
+	addiu	NONCE_0, 1
+
+.Lchacha_rounds_start:
+	lw	X0,  0(STATE)
+	lw	X1,  4(STATE)
+	lw	X2,  8(STATE)
+	lw	X3,  12(STATE)
+
+	lw	X4,  16(STATE)
+	lw	X5,  20(STATE)
+	lw	X6,  24(STATE)
+	lw	X7,  28(STATE)
+	lw	X8,  32(STATE)
+	lw	X9,  36(STATE)
+	lw	X10, 40(STATE)
+	lw	X11, 44(STATE)
+
+	move	X12, NONCE_0
+	lw	X13, 52(STATE)
+	lw	X14, 56(STATE)
+	lw	X15, 60(STATE)
+
+.Loop_chacha_xor_rounds:
+	addiu	$at, -2
+	AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15, 16);
+	AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7, 12);
+	AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15,  8);
+	AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7,  7);
+	AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14, 16);
+	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4, 12);
+	AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14,  8);
+	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4,  7);
+	bnez	$at, .Loop_chacha_xor_rounds
+
+	addiu	BYTES, -(CHACHA20_BLOCK_SIZE)
+
+	/* Is data src/dst unaligned? Jump */
+	bnez	IS_UNALIGNED, .Loop_chacha_unaligned
+
+	/* Set number rounds here to fill delayslot. */
+	lw	$at, (STACK_SIZE+16)($sp)
+
+	/* BYTES < 0, it has no full block. */
+	bltz	BYTES, .Lchacha_mips_no_full_block_aligned
+
+	FOR_EACH_WORD_REV(STORE_ALIGNED)
+
+	/* BYTES > 0? Loop again. */
+	bgtz	BYTES, .Loop_chacha_rounds
+
+	/* Place this here to fill delay slot */
+	addiu	NONCE_0, 1
+
+	/* BYTES < 0? Handle last bytes */
+	bltz	BYTES, .Lchacha_mips_xor_bytes
+
+.Lchacha_mips_xor_done:
+	/* Restore used registers */
+	lw	$s0,  0($sp)
+	lw	$s1,  4($sp)
+	lw	$s2,  8($sp)
+	lw	$s3, 12($sp)
+	lw	$s4, 16($sp)
+	lw	$s5, 20($sp)
+	lw	$s6, 24($sp)
+	lw	$s7, 28($sp)
+
+	/* Write NONCE_0 back to right location in state */
+	sw	NONCE_0, 48(STATE)
+
+.Lchacha_mips_end:
+	addiu	$sp, STACK_SIZE
+	jr	$ra
+
+.Lchacha_mips_no_full_block_aligned:
+	/* Restore the offset on BYTES */
+	addiu	BYTES, CHACHA20_BLOCK_SIZE
+
+	/* Get number of full WORDS */
+	andi	$at, BYTES, MASK_U32
+
+	/* Load upper half of jump table addr */
+	lui	T0, %hi(.Lchacha_mips_jmptbl_aligned_0)
+
+	/* Calculate lower half jump table offset */
+	ins	T0, $at, 1, 6
+
+	/* Add offset to STATE */
+	addu	T1, STATE, $at
+
+	/* Add lower half jump table addr */
+	addiu	T0, %lo(.Lchacha_mips_jmptbl_aligned_0)
+
+	/* Read value from STATE */
+	lw	SAVED_CA, 0(T1)
+
+	/* Store remaining bytecounter as negative value */
+	subu	BYTES, $at, BYTES
+
+	jr	T0
+
+	/* Jump table */
+	FOR_EACH_WORD(JMPTBL_ALIGNED)
+
+
+.Loop_chacha_unaligned:
+	/* Set number rounds here to fill delayslot. */
+	lw	$at, (STACK_SIZE+16)($sp)
+
+	/* BYTES > 0, it has no full block. */
+	bltz	BYTES, .Lchacha_mips_no_full_block_unaligned
+
+	FOR_EACH_WORD_REV(STORE_UNALIGNED)
+
+	/* BYTES > 0? Loop again. */
+	bgtz	BYTES, .Loop_chacha_rounds
+
+	/* Write NONCE_0 back to right location in state */
+	sw	NONCE_0, 48(STATE)
+
+	.set noreorder
+	/* Fall through to byte handling */
+	bgez	BYTES, .Lchacha_mips_xor_done
+.Lchacha_mips_xor_unaligned_0_b:
+.Lchacha_mips_xor_aligned_0_b:
+	/* Place this here to fill delay slot */
+	addiu	NONCE_0, 1
+	.set reorder
+
+.Lchacha_mips_xor_bytes:
+	addu	IN, $at
+	addu	OUT, $at
+	/* First byte */
+	lbu	T1, 0(IN)
+	addiu	$at, BYTES, 1
+	CPU_TO_LE32(SAVED_X)
+	ROTR(SAVED_X)
+	xor	T1, SAVED_X
+	sb	T1, 0(OUT)
+	beqz	$at, .Lchacha_mips_xor_done
+	/* Second byte */
+	lbu	T1, 1(IN)
+	addiu	$at, BYTES, 2
+	ROTx	SAVED_X, 8
+	xor	T1, SAVED_X
+	sb	T1, 1(OUT)
+	beqz	$at, .Lchacha_mips_xor_done
+	/* Third byte */
+	lbu	T1, 2(IN)
+	ROTx	SAVED_X, 8
+	xor	T1, SAVED_X
+	sb	T1, 2(OUT)
+	b	.Lchacha_mips_xor_done
+
+.Lchacha_mips_no_full_block_unaligned:
+	/* Restore the offset on BYTES */
+	addiu	BYTES, CHACHA20_BLOCK_SIZE
+
+	/* Get number of full WORDS */
+	andi	$at, BYTES, MASK_U32
+
+	/* Load upper half of jump table addr */
+	lui	T0, %hi(.Lchacha_mips_jmptbl_unaligned_0)
+
+	/* Calculate lower half jump table offset */
+	ins	T0, $at, 1, 6
+
+	/* Add offset to STATE */
+	addu	T1, STATE, $at
+
+	/* Add lower half jump table addr */
+	addiu	T0, %lo(.Lchacha_mips_jmptbl_unaligned_0)
+
+	/* Read value from STATE */
+	lw	SAVED_CA, 0(T1)
+
+	/* Store remaining bytecounter as negative value */
+	subu	BYTES, $at, BYTES
+
+	jr	T0
+
+	/* Jump table */
+	FOR_EACH_WORD(JMPTBL_UNALIGNED)
+.end chacha_crypt_arch
+.set at
+
+/* Input arguments
+ * STATE	$a0
+ * OUT		$a1
+ * NROUND	$a2
+ */
+
+#undef X12
+#undef X13
+#undef X14
+#undef X15
+
+#define X12	$a3
+#define X13	$at
+#define X14	$v0
+#define X15	STATE
+
+.set noat
+.globl	hchacha_block_arch
+.ent	hchacha_block_arch
+hchacha_block_arch:
+	.frame	$sp, STACK_SIZE, $ra
+
+	addiu	$sp, -STACK_SIZE
+
+	/* Save X11(s6) */
+	sw	X11, 0($sp)
+
+	lw	X0,  0(STATE)
+	lw	X1,  4(STATE)
+	lw	X2,  8(STATE)
+	lw	X3,  12(STATE)
+	lw	X4,  16(STATE)
+	lw	X5,  20(STATE)
+	lw	X6,  24(STATE)
+	lw	X7,  28(STATE)
+	lw	X8,  32(STATE)
+	lw	X9,  36(STATE)
+	lw	X10, 40(STATE)
+	lw	X11, 44(STATE)
+	lw	X12, 48(STATE)
+	lw	X13, 52(STATE)
+	lw	X14, 56(STATE)
+	lw	X15, 60(STATE)
+
+.Loop_hchacha_xor_rounds:
+	addiu	$a2, -2
+	AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15, 16);
+	AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7, 12);
+	AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15,  8);
+	AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7,  7);
+	AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14, 16);
+	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4, 12);
+	AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14,  8);
+	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4,  7);
+	bnez	$a2, .Loop_hchacha_xor_rounds
+
+	/* Restore used register */
+	lw	X11, 0($sp)
+
+	sw	X0,  0(OUT)
+	sw	X1,  4(OUT)
+	sw	X2,  8(OUT)
+	sw	X3,  12(OUT)
+	sw	X12, 16(OUT)
+	sw	X13, 20(OUT)
+	sw	X14, 24(OUT)
+	sw	X15, 28(OUT)
+
+	addiu	$sp, STACK_SIZE
+	jr	$ra
+.end hchacha_block_arch
+.set at
diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c
new file mode 100644
index 000000000000..d1fd23e6ef84
--- /dev/null
+++ b/arch/mips/crypto/chacha-glue.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MIPS accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/byteorder.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+				  unsigned int bytes, int nrounds);
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds);
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+static int chacha_mips_stream_xor(struct skcipher_request *req,
+				  const struct chacha_ctx *ctx, const u8 *iv)
+{
+	struct skcipher_walk walk;
+	u32 state[16];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	chacha_init_generic(state, ctx->key, iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+			     nbytes, ctx->nrounds);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
+static int chacha_mips(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	return chacha_mips_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_mips(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct chacha_ctx subctx;
+	u32 state[16];
+	u8 real_iv[16];
+
+	chacha_init_generic(state, ctx->key, req->iv);
+
+	hchacha_block(state, subctx.key, ctx->nrounds);
+	subctx.nrounds = ctx->nrounds;
+
+	memcpy(&real_iv[0], req->iv + 24, 8);
+	memcpy(&real_iv[8], req->iv + 16, 8);
+	return chacha_mips_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-mips",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= chacha_mips,
+		.decrypt		= chacha_mips,
+	}, {
+		.base.cra_name		= "xchacha20",
+		.base.cra_driver_name	= "xchacha20-mips",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= xchacha_mips,
+		.decrypt		= xchacha_mips,
+	}, {
+		.base.cra_name		= "xchacha12",
+		.base.cra_driver_name	= "xchacha12-mips",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha12_setkey,
+		.encrypt		= xchacha_mips,
+		.decrypt		= xchacha_mips,
+	}
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+	return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
+		crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER))
+		crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-mips");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-mips");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-mips");
diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c
index 7d1d2425746f..faa88a6a74c0 100644
--- a/arch/mips/crypto/crc32-mips.c
+++ b/arch/mips/crypto/crc32-mips.c
@@ -177,10 +177,8 @@ static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
 {
 	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
 
-	if (keylen != sizeof(mctx->key)) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != sizeof(mctx->key))
 		return -EINVAL;
-	}
 	mctx->key = get_unaligned_le32(key);
 	return 0;
 }
diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..fc881b46d911
--- /dev/null
+++ b/arch/mips/crypto/poly1305-glue.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+asmlinkage void poly1305_init_mips(void *state, const u8 *key);
+asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+	poly1305_init_mips(&dctx->h, key);
+	dctx->s[0] = get_unaligned_le32(key + 16);
+	dctx->s[1] = get_unaligned_le32(key + 20);
+	dctx->s[2] = get_unaligned_le32(key + 24);
+	dctx->s[3] = get_unaligned_le32(key + 28);
+	dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int mips_poly1305_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	dctx->buflen = 0;
+	dctx->rset = 0;
+	dctx->sset = false;
+
+	return 0;
+}
+
+static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+				 u32 len, u32 hibit)
+{
+	if (unlikely(!dctx->sset)) {
+		if (!dctx->rset) {
+			poly1305_init_mips(&dctx->h, src);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->rset = 1;
+		}
+		if (len >= POLY1305_BLOCK_SIZE) {
+			dctx->s[0] = get_unaligned_le32(src +  0);
+			dctx->s[1] = get_unaligned_le32(src +  4);
+			dctx->s[2] = get_unaligned_le32(src +  8);
+			dctx->s[3] = get_unaligned_le32(src + 12);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->sset = true;
+		}
+		if (len < POLY1305_BLOCK_SIZE)
+			return;
+	}
+
+	len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+	poly1305_blocks_mips(&dctx->h, src, len, hibit);
+}
+
+static int mips_poly1305_update(struct shash_desc *desc, const u8 *src,
+				unsigned int len)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		len -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(len >= POLY1305_BLOCK_SIZE)) {
+		mips_poly1305_blocks(dctx, src, len, 1);
+		src += round_down(len, POLY1305_BLOCK_SIZE);
+		len %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(len)) {
+		dctx->buflen = len;
+		memcpy(dctx->buf, src, len);
+	}
+	return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+			  unsigned int nbytes)
+{
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		nbytes -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			poly1305_blocks_mips(&dctx->h, dctx->buf,
+					     POLY1305_BLOCK_SIZE, 1);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+		poly1305_blocks_mips(&dctx->h, src, len, 1);
+		src += len;
+		nbytes %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(nbytes)) {
+		dctx->buflen = nbytes;
+		memcpy(dctx->buf, src, nbytes);
+	}
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+	if (unlikely(dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+	}
+
+	poly1305_emit_mips(&dctx->h, dst, dctx->s);
+	*dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int mips_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
+
+	poly1305_final_arch(dctx, dst);
+	return 0;
+}
+
+static struct shash_alg mips_poly1305_alg = {
+	.init			= mips_poly1305_init,
+	.update			= mips_poly1305_update,
+	.final			= mips_poly1305_final,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.descsize		= sizeof(struct poly1305_desc_ctx),
+
+	.base.cra_name		= "poly1305",
+	.base.cra_driver_name	= "poly1305-mips",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+};
+
+static int __init mips_poly1305_mod_init(void)
+{
+	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+		crypto_register_shash(&mips_poly1305_alg) : 0;
+}
+
+static void __exit mips_poly1305_mod_exit(void)
+{
+	if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
+		crypto_unregister_shash(&mips_poly1305_alg);
+}
+
+module_init(mips_poly1305_mod_init);
+module_exit(mips_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-mips");
diff --git a/arch/mips/crypto/poly1305-mips.pl b/arch/mips/crypto/poly1305-mips.pl
new file mode 100644
index 000000000000..b05bab884ed2
--- /dev/null
+++ b/arch/mips/crypto/poly1305-mips.pl
@@ -0,0 +1,1273 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL
+# project.
+# ====================================================================
+
+# Poly1305 hash for MIPS.
+#
+# May 2016
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone.
+#
+#		IALU/gcc
+# R1x000	~5.5/+130%	(big-endian)
+# Octeon II	2.50/+70%	(little-endian)
+#
+# March 2019
+#
+# Add 32-bit code path.
+#
+# October 2019
+#
+# Modulo-scheduling reduction allows to omit dependency chain at the
+# end of inner loop and improve performance. Also optimize MIPS32R2
+# code path for MIPS 1004K core. Per René von Dorst's suggestions.
+#
+#		IALU/gcc
+# R1x000	~9.8/?		(big-endian)
+# Octeon II	3.65/+140%	(little-endian)
+# MT7621/1004K	4.75/?		(little-endian)
+#
+######################################################################
+# There is a number of MIPS ABI in use, O32 and N32/64 are most
+# widely used. Then there is a new contender: NUBI. It appears that if
+# one picks the latter, it's possible to arrange code in ABI neutral
+# manner. Therefore let's stick to NUBI register layout:
+#
+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
+#
+# The return value is placed in $a0. Following coding rules facilitate
+# interoperability:
+#
+# - never ever touch $tp, "thread pointer", former $gp [o32 can be
+#   excluded from the rule, because it's specified volatile];
+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
+#   old code];
+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
+#
+# For reference here is register layout for N32/64 MIPS ABIs:
+#
+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
+#
+# <appro@openssl.org>
+#
+######################################################################
+
+$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
+
+$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
+
+if ($flavour =~ /64|n32/i) {{{
+######################################################################
+# 64-bit code path
+#
+
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
+
+$code.=<<___;
+#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
+     defined(_MIPS_ARCH_MIPS64R6)) \\
+     && !defined(_MIPS_ARCH_MIPS64R2)
+# define _MIPS_ARCH_MIPS64R2
+#endif
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+# define dmultu(rs,rt)
+# define mflo(rd,rs,rt)	dmulu	rd,rs,rt
+# define mfhi(rd,rs,rt)	dmuhu	rd,rs,rt
+#else
+# define dmultu(rs,rt)		dmultu	rs,rt
+# define mflo(rd,rs,rt)	mflo	rd
+# define mfhi(rd,rs,rt)	mfhi	rd
+#endif
+
+#ifdef	__KERNEL__
+# define poly1305_init   poly1305_init_mips
+# define poly1305_blocks poly1305_blocks_mips
+# define poly1305_emit   poly1305_emit_mips
+#endif
+
+#if defined(__MIPSEB__) && !defined(MIPSEB)
+# define MIPSEB
+#endif
+
+#ifdef MIPSEB
+# define MSB 0
+# define LSB 7
+#else
+# define MSB 7
+# define LSB 0
+#endif
+
+.text
+.set	noat
+.set	noreorder
+
+.align	5
+.globl	poly1305_init
+.ent	poly1305_init
+poly1305_init:
+	.frame	$sp,0,$ra
+	.set	reorder
+
+	sd	$zero,0($ctx)
+	sd	$zero,8($ctx)
+	sd	$zero,16($ctx)
+
+	beqz	$inp,.Lno_key
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+	andi	$tmp0,$inp,7		# $inp % 8
+	dsubu	$inp,$inp,$tmp0		# align $inp
+	sll	$tmp0,$tmp0,3		# byte to bit offset
+	ld	$in0,0($inp)
+	ld	$in1,8($inp)
+	beqz	$tmp0,.Laligned_key
+	ld	$tmp2,16($inp)
+
+	subu	$tmp1,$zero,$tmp0
+# ifdef	MIPSEB
+	dsllv	$in0,$in0,$tmp0
+	dsrlv	$tmp3,$in1,$tmp1
+	dsllv	$in1,$in1,$tmp0
+	dsrlv	$tmp2,$tmp2,$tmp1
+# else
+	dsrlv	$in0,$in0,$tmp0
+	dsllv	$tmp3,$in1,$tmp1
+	dsrlv	$in1,$in1,$tmp0
+	dsllv	$tmp2,$tmp2,$tmp1
+# endif
+	or	$in0,$in0,$tmp3
+	or	$in1,$in1,$tmp2
+.Laligned_key:
+#else
+	ldl	$in0,0+MSB($inp)
+	ldl	$in1,8+MSB($inp)
+	ldr	$in0,0+LSB($inp)
+	ldr	$in1,8+LSB($inp)
+#endif
+#ifdef	MIPSEB
+# if defined(_MIPS_ARCH_MIPS64R2)
+	dsbh	$in0,$in0		# byte swap
+	 dsbh	$in1,$in1
+	dshd	$in0,$in0
+	 dshd	$in1,$in1
+# else
+	ori	$tmp0,$zero,0xFF
+	dsll	$tmp2,$tmp0,32
+	or	$tmp0,$tmp2		# 0x000000FF000000FF
+
+	and	$tmp1,$in0,$tmp0	# byte swap
+	 and	$tmp3,$in1,$tmp0
+	dsrl	$tmp2,$in0,24
+	 dsrl	$tmp4,$in1,24
+	dsll	$tmp1,24
+	 dsll	$tmp3,24
+	and	$tmp2,$tmp0
+	 and	$tmp4,$tmp0
+	dsll	$tmp0,8			# 0x0000FF000000FF00
+	or	$tmp1,$tmp2
+	 or	$tmp3,$tmp4
+	and	$tmp2,$in0,$tmp0
+	 and	$tmp4,$in1,$tmp0
+	dsrl	$in0,8
+	 dsrl	$in1,8
+	dsll	$tmp2,8
+	 dsll	$tmp4,8
+	and	$in0,$tmp0
+	 and	$in1,$tmp0
+	or	$tmp1,$tmp2
+	 or	$tmp3,$tmp4
+	or	$in0,$tmp1
+	 or	$in1,$tmp3
+	dsrl	$tmp1,$in0,32
+	 dsrl	$tmp3,$in1,32
+	dsll	$in0,32
+	 dsll	$in1,32
+	or	$in0,$tmp1
+	 or	$in1,$tmp3
+# endif
+#endif
+	li	$tmp0,1
+	dsll	$tmp0,32		# 0x0000000100000000
+	daddiu	$tmp0,-63		# 0x00000000ffffffc1
+	dsll	$tmp0,28		# 0x0ffffffc10000000
+	daddiu	$tmp0,-1		# 0x0ffffffc0fffffff
+
+	and	$in0,$tmp0
+	daddiu	$tmp0,-3		# 0x0ffffffc0ffffffc
+	and	$in1,$tmp0
+
+	sd	$in0,24($ctx)
+	dsrl	$tmp0,$in1,2
+	sd	$in1,32($ctx)
+	daddu	$tmp0,$in1		# s1 = r1 + (r1 >> 2)
+	sd	$tmp0,40($ctx)
+
+.Lno_key:
+	li	$v0,0			# return 0
+	jr	$ra
+.end	poly1305_init
+___
+{
+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
+
+my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =
+   ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
+my ($shr,$shl) = ($s6,$s7);		# used on R6
+
+$code.=<<___;
+.align	5
+.globl	poly1305_blocks
+.ent	poly1305_blocks
+poly1305_blocks:
+	.set	noreorder
+	dsrl	$len,4			# number of complete blocks
+	bnez	$len,poly1305_blocks_internal
+	nop
+	jr	$ra
+	nop
+.end	poly1305_blocks
+
+.align	5
+.ent	poly1305_blocks_internal
+poly1305_blocks_internal:
+	.set	noreorder
+#if defined(_MIPS_ARCH_MIPS64R6)
+	.frame	$sp,8*8,$ra
+	.mask	$SAVED_REGS_MASK|0x000c0000,-8
+	dsubu	$sp,8*8
+	sd	$s7,56($sp)
+	sd	$s6,48($sp)
+#else
+	.frame	$sp,6*8,$ra
+	.mask	$SAVED_REGS_MASK,-8
+	dsubu	$sp,6*8
+#endif
+	sd	$s5,40($sp)
+	sd	$s4,32($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
+	sd	$s3,24($sp)
+	sd	$s2,16($sp)
+	sd	$s1,8($sp)
+	sd	$s0,0($sp)
+___
+$code.=<<___;
+	.set	reorder
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+	andi	$shr,$inp,7
+	dsubu	$inp,$inp,$shr		# align $inp
+	sll	$shr,$shr,3		# byte to bit offset
+	subu	$shl,$zero,$shr
+#endif
+
+	ld	$h0,0($ctx)		# load hash value
+	ld	$h1,8($ctx)
+	ld	$h2,16($ctx)
+
+	ld	$r0,24($ctx)		# load key
+	ld	$r1,32($ctx)
+	ld	$rs1,40($ctx)
+
+	dsll	$len,4
+	daddu	$len,$inp		# end of buffer
+	b	.Loop
+
+.align	4
+.Loop:
+#if defined(_MIPS_ARCH_MIPS64R6)
+	ld	$in0,0($inp)		# load input
+	ld	$in1,8($inp)
+	beqz	$shr,.Laligned_inp
+
+	ld	$tmp2,16($inp)
+# ifdef	MIPSEB
+	dsllv	$in0,$in0,$shr
+	dsrlv	$tmp3,$in1,$shl
+	dsllv	$in1,$in1,$shr
+	dsrlv	$tmp2,$tmp2,$shl
+# else
+	dsrlv	$in0,$in0,$shr
+	dsllv	$tmp3,$in1,$shl
+	dsrlv	$in1,$in1,$shr
+	dsllv	$tmp2,$tmp2,$shl
+# endif
+	or	$in0,$in0,$tmp3
+	or	$in1,$in1,$tmp2
+.Laligned_inp:
+#else
+	ldl	$in0,0+MSB($inp)	# load input
+	ldl	$in1,8+MSB($inp)
+	ldr	$in0,0+LSB($inp)
+	ldr	$in1,8+LSB($inp)
+#endif
+	daddiu	$inp,16
+#ifdef	MIPSEB
+# if defined(_MIPS_ARCH_MIPS64R2)
+	dsbh	$in0,$in0		# byte swap
+	 dsbh	$in1,$in1
+	dshd	$in0,$in0
+	 dshd	$in1,$in1
+# else
+	ori	$tmp0,$zero,0xFF
+	dsll	$tmp2,$tmp0,32
+	or	$tmp0,$tmp2		# 0x000000FF000000FF
+
+	and	$tmp1,$in0,$tmp0	# byte swap
+	 and	$tmp3,$in1,$tmp0
+	dsrl	$tmp2,$in0,24
+	 dsrl	$tmp4,$in1,24
+	dsll	$tmp1,24
+	 dsll	$tmp3,24
+	and	$tmp2,$tmp0
+	 and	$tmp4,$tmp0
+	dsll	$tmp0,8			# 0x0000FF000000FF00
+	or	$tmp1,$tmp2
+	 or	$tmp3,$tmp4
+	and	$tmp2,$in0,$tmp0
+	 and	$tmp4,$in1,$tmp0
+	dsrl	$in0,8
+	 dsrl	$in1,8
+	dsll	$tmp2,8
+	 dsll	$tmp4,8
+	and	$in0,$tmp0
+	 and	$in1,$tmp0
+	or	$tmp1,$tmp2
+	 or	$tmp3,$tmp4
+	or	$in0,$tmp1
+	 or	$in1,$tmp3
+	dsrl	$tmp1,$in0,32
+	 dsrl	$tmp3,$in1,32
+	dsll	$in0,32
+	 dsll	$in1,32
+	or	$in0,$tmp1
+	 or	$in1,$tmp3
+# endif
+#endif
+	dsrl	$tmp1,$h2,2		# modulo-scheduled reduction
+	andi	$h2,$h2,3
+	dsll	$tmp0,$tmp1,2
+
+	daddu	$d0,$h0,$in0		# accumulate input
+	 daddu	$tmp1,$tmp0
+	sltu	$tmp0,$d0,$h0
+	daddu	$d0,$d0,$tmp1		# ... and residue
+	sltu	$tmp1,$d0,$tmp1
+	daddu	$d1,$h1,$in1
+	daddu	$tmp0,$tmp1
+	sltu	$tmp1,$d1,$h1
+	daddu	$d1,$tmp0
+
+	dmultu	($r0,$d0)		# h0*r0
+	 daddu	$d2,$h2,$padbit
+	 sltu	$tmp0,$d1,$tmp0
+	mflo	($h0,$r0,$d0)
+	mfhi	($h1,$r0,$d0)
+
+	dmultu	($rs1,$d1)		# h1*5*r1
+	 daddu	$d2,$tmp1
+	 daddu	$d2,$tmp0
+	mflo	($tmp0,$rs1,$d1)
+	mfhi	($tmp1,$rs1,$d1)
+
+	dmultu	($r1,$d0)		# h0*r1
+	mflo	($tmp2,$r1,$d0)
+	mfhi	($h2,$r1,$d0)
+	 daddu	$h0,$tmp0
+	 daddu	$h1,$tmp1
+	 sltu	$tmp0,$h0,$tmp0
+
+	dmultu	($r0,$d1)		# h1*r0
+	 daddu	$h1,$tmp0
+	 daddu	$h1,$tmp2
+	mflo	($tmp0,$r0,$d1)
+	mfhi	($tmp1,$r0,$d1)
+
+	dmultu	($rs1,$d2)		# h2*5*r1
+	 sltu	$tmp2,$h1,$tmp2
+	 daddu	$h2,$tmp2
+	mflo	($tmp2,$rs1,$d2)
+
+	dmultu	($r0,$d2)		# h2*r0
+	 daddu	$h1,$tmp0
+	 daddu	$h2,$tmp1
+	mflo	($tmp3,$r0,$d2)
+	 sltu	$tmp0,$h1,$tmp0
+	 daddu	$h2,$tmp0
+
+	daddu	$h1,$tmp2
+	sltu	$tmp2,$h1,$tmp2
+	daddu	$h2,$tmp2
+	daddu	$h2,$tmp3
+
+	bne	$inp,$len,.Loop
+
+	sd	$h0,0($ctx)		# store hash value
+	sd	$h1,8($ctx)
+	sd	$h2,16($ctx)
+
+	.set	noreorder
+#if defined(_MIPS_ARCH_MIPS64R6)
+	ld	$s7,56($sp)
+	ld	$s6,48($sp)
+#endif
+	ld	$s5,40($sp)		# epilogue
+	ld	$s4,32($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi epilogue
+	ld	$s3,24($sp)
+	ld	$s2,16($sp)
+	ld	$s1,8($sp)
+	ld	$s0,0($sp)
+___
+$code.=<<___;
+	jr	$ra
+#if defined(_MIPS_ARCH_MIPS64R6)
+	daddu	$sp,8*8
+#else
+	daddu	$sp,6*8
+#endif
+.end	poly1305_blocks_internal
+___
+}
+{
+my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
+
+$code.=<<___;
+.align	5
+.globl	poly1305_emit
+.ent	poly1305_emit
+poly1305_emit:
+	.frame	$sp,0,$ra
+	.set	reorder
+
+	ld	$tmp2,16($ctx)
+	ld	$tmp0,0($ctx)
+	ld	$tmp1,8($ctx)
+
+	li	$in0,-4			# final reduction
+	dsrl	$in1,$tmp2,2
+	and	$in0,$tmp2
+	andi	$tmp2,$tmp2,3
+	daddu	$in0,$in1
+
+	daddu	$tmp0,$tmp0,$in0
+	sltu	$in1,$tmp0,$in0
+	 daddiu	$in0,$tmp0,5		# compare to modulus
+	daddu	$tmp1,$tmp1,$in1
+	 sltiu	$tmp3,$in0,5
+	sltu	$tmp4,$tmp1,$in1
+	 daddu	$in1,$tmp1,$tmp3
+	daddu	$tmp2,$tmp2,$tmp4
+	 sltu	$tmp3,$in1,$tmp3
+	 daddu	$tmp2,$tmp2,$tmp3
+
+	dsrl	$tmp2,2			# see if it carried/borrowed
+	dsubu	$tmp2,$zero,$tmp2
+
+	xor	$in0,$tmp0
+	xor	$in1,$tmp1
+	and	$in0,$tmp2
+	and	$in1,$tmp2
+	xor	$in0,$tmp0
+	xor	$in1,$tmp1
+
+	lwu	$tmp0,0($nonce)		# load nonce
+	lwu	$tmp1,4($nonce)
+	lwu	$tmp2,8($nonce)
+	lwu	$tmp3,12($nonce)
+	dsll	$tmp1,32
+	dsll	$tmp3,32
+	or	$tmp0,$tmp1
+	or	$tmp2,$tmp3
+
+	daddu	$in0,$tmp0		# accumulate nonce
+	daddu	$in1,$tmp2
+	sltu	$tmp0,$in0,$tmp0
+	daddu	$in1,$tmp0
+
+	dsrl	$tmp0,$in0,8		# write mac value
+	dsrl	$tmp1,$in0,16
+	dsrl	$tmp2,$in0,24
+	sb	$in0,0($mac)
+	dsrl	$tmp3,$in0,32
+	sb	$tmp0,1($mac)
+	dsrl	$tmp0,$in0,40
+	sb	$tmp1,2($mac)
+	dsrl	$tmp1,$in0,48
+	sb	$tmp2,3($mac)
+	dsrl	$tmp2,$in0,56
+	sb	$tmp3,4($mac)
+	dsrl	$tmp3,$in1,8
+	sb	$tmp0,5($mac)
+	dsrl	$tmp0,$in1,16
+	sb	$tmp1,6($mac)
+	dsrl	$tmp1,$in1,24
+	sb	$tmp2,7($mac)
+
+	sb	$in1,8($mac)
+	dsrl	$tmp2,$in1,32
+	sb	$tmp3,9($mac)
+	dsrl	$tmp3,$in1,40
+	sb	$tmp0,10($mac)
+	dsrl	$tmp0,$in1,48
+	sb	$tmp1,11($mac)
+	dsrl	$tmp1,$in1,56
+	sb	$tmp2,12($mac)
+	sb	$tmp3,13($mac)
+	sb	$tmp0,14($mac)
+	sb	$tmp1,15($mac)
+
+	jr	$ra
+.end	poly1305_emit
+.rdata
+.asciiz	"Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm"
+.align	2
+___
+}
+}}} else {{{
+######################################################################
+# 32-bit code path
+#
+
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =
+   ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2);
+
+$code.=<<___;
+#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\
+     defined(_MIPS_ARCH_MIPS32R6)) \\
+     && !defined(_MIPS_ARCH_MIPS32R2)
+# define _MIPS_ARCH_MIPS32R2
+#endif
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+# define multu(rs,rt)
+# define mflo(rd,rs,rt)	mulu	rd,rs,rt
+# define mfhi(rd,rs,rt)	muhu	rd,rs,rt
+#else
+# define multu(rs,rt)	multu	rs,rt
+# define mflo(rd,rs,rt)	mflo	rd
+# define mfhi(rd,rs,rt)	mfhi	rd
+#endif
+
+#ifdef	__KERNEL__
+# define poly1305_init   poly1305_init_mips
+# define poly1305_blocks poly1305_blocks_mips
+# define poly1305_emit   poly1305_emit_mips
+#endif
+
+#if defined(__MIPSEB__) && !defined(MIPSEB)
+# define MIPSEB
+#endif
+
+#ifdef MIPSEB
+# define MSB 0
+# define LSB 3
+#else
+# define MSB 3
+# define LSB 0
+#endif
+
+.text
+.set	noat
+.set	noreorder
+
+.align	5
+.globl	poly1305_init
+.ent	poly1305_init
+poly1305_init:
+	.frame	$sp,0,$ra
+	.set	reorder
+
+	sw	$zero,0($ctx)
+	sw	$zero,4($ctx)
+	sw	$zero,8($ctx)
+	sw	$zero,12($ctx)
+	sw	$zero,16($ctx)
+
+	beqz	$inp,.Lno_key
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+	andi	$tmp0,$inp,3		# $inp % 4
+	subu	$inp,$inp,$tmp0		# align $inp
+	sll	$tmp0,$tmp0,3		# byte to bit offset
+	lw	$in0,0($inp)
+	lw	$in1,4($inp)
+	lw	$in2,8($inp)
+	lw	$in3,12($inp)
+	beqz	$tmp0,.Laligned_key
+
+	lw	$tmp2,16($inp)
+	subu	$tmp1,$zero,$tmp0
+# ifdef	MIPSEB
+	sllv	$in0,$in0,$tmp0
+	srlv	$tmp3,$in1,$tmp1
+	sllv	$in1,$in1,$tmp0
+	or	$in0,$in0,$tmp3
+	srlv	$tmp3,$in2,$tmp1
+	sllv	$in2,$in2,$tmp0
+	or	$in1,$in1,$tmp3
+	srlv	$tmp3,$in3,$tmp1
+	sllv	$in3,$in3,$tmp0
+	or	$in2,$in2,$tmp3
+	srlv	$tmp2,$tmp2,$tmp1
+	or	$in3,$in3,$tmp2
+# else
+	srlv	$in0,$in0,$tmp0
+	sllv	$tmp3,$in1,$tmp1
+	srlv	$in1,$in1,$tmp0
+	or	$in0,$in0,$tmp3
+	sllv	$tmp3,$in2,$tmp1
+	srlv	$in2,$in2,$tmp0
+	or	$in1,$in1,$tmp3
+	sllv	$tmp3,$in3,$tmp1
+	srlv	$in3,$in3,$tmp0
+	or	$in2,$in2,$tmp3
+	sllv	$tmp2,$tmp2,$tmp1
+	or	$in3,$in3,$tmp2
+# endif
+.Laligned_key:
+#else
+	lwl	$in0,0+MSB($inp)
+	lwl	$in1,4+MSB($inp)
+	lwl	$in2,8+MSB($inp)
+	lwl	$in3,12+MSB($inp)
+	lwr	$in0,0+LSB($inp)
+	lwr	$in1,4+LSB($inp)
+	lwr	$in2,8+LSB($inp)
+	lwr	$in3,12+LSB($inp)
+#endif
+#ifdef	MIPSEB
+# if defined(_MIPS_ARCH_MIPS32R2)
+	wsbh	$in0,$in0		# byte swap
+	wsbh	$in1,$in1
+	wsbh	$in2,$in2
+	wsbh	$in3,$in3
+	rotr	$in0,$in0,16
+	rotr	$in1,$in1,16
+	rotr	$in2,$in2,16
+	rotr	$in3,$in3,16
+# else
+	srl	$tmp0,$in0,24		# byte swap
+	srl	$tmp1,$in0,8
+	andi	$tmp2,$in0,0xFF00
+	sll	$in0,$in0,24
+	andi	$tmp1,0xFF00
+	sll	$tmp2,$tmp2,8
+	or	$in0,$tmp0
+	 srl	$tmp0,$in1,24
+	or	$tmp1,$tmp2
+	 srl	$tmp2,$in1,8
+	or	$in0,$tmp1
+	 andi	$tmp1,$in1,0xFF00
+	 sll	$in1,$in1,24
+	 andi	$tmp2,0xFF00
+	 sll	$tmp1,$tmp1,8
+	 or	$in1,$tmp0
+	srl	$tmp0,$in2,24
+	 or	$tmp2,$tmp1
+	srl	$tmp1,$in2,8
+	 or	$in1,$tmp2
+	andi	$tmp2,$in2,0xFF00
+	sll	$in2,$in2,24
+	andi	$tmp1,0xFF00
+	sll	$tmp2,$tmp2,8
+	or	$in2,$tmp0
+	 srl	$tmp0,$in3,24
+	or	$tmp1,$tmp2
+	 srl	$tmp2,$in3,8
+	or	$in2,$tmp1
+	 andi	$tmp1,$in3,0xFF00
+	 sll	$in3,$in3,24
+	 andi	$tmp2,0xFF00
+	 sll	$tmp1,$tmp1,8
+	 or	$in3,$tmp0
+	 or	$tmp2,$tmp1
+	 or	$in3,$tmp2
+# endif
+#endif
+	lui	$tmp0,0x0fff
+	ori	$tmp0,0xffff		# 0x0fffffff
+	and	$in0,$in0,$tmp0
+	subu	$tmp0,3			# 0x0ffffffc
+	and	$in1,$in1,$tmp0
+	and	$in2,$in2,$tmp0
+	and	$in3,$in3,$tmp0
+
+	sw	$in0,20($ctx)
+	sw	$in1,24($ctx)
+	sw	$in2,28($ctx)
+	sw	$in3,32($ctx)
+
+	srl	$tmp1,$in1,2
+	srl	$tmp2,$in2,2
+	srl	$tmp3,$in3,2
+	addu	$in1,$in1,$tmp1		# s1 = r1 + (r1 >> 2)
+	addu	$in2,$in2,$tmp2
+	addu	$in3,$in3,$tmp3
+	sw	$in1,36($ctx)
+	sw	$in2,40($ctx)
+	sw	$in3,44($ctx)
+.Lno_key:
+	li	$v0,0
+	jr	$ra
+.end	poly1305_init
+___
+{
+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000";
+
+my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =
+   ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11);
+my ($d0,$d1,$d2,$d3) =
+   ($a4,$a5,$a6,$a7);
+my $shr = $t2;		# used on R6
+my $one = $t2;		# used on R2
+
+$code.=<<___;
+.globl	poly1305_blocks
+.align	5
+.ent	poly1305_blocks
+poly1305_blocks:
+	.frame	$sp,16*4,$ra
+	.mask	$SAVED_REGS_MASK,-4
+	.set	noreorder
+	subu	$sp, $sp,4*12
+	sw	$s11,4*11($sp)
+	sw	$s10,4*10($sp)
+	sw	$s9, 4*9($sp)
+	sw	$s8, 4*8($sp)
+	sw	$s7, 4*7($sp)
+	sw	$s6, 4*6($sp)
+	sw	$s5, 4*5($sp)
+	sw	$s4, 4*4($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
+	sw	$s3, 4*3($sp)
+	sw	$s2, 4*2($sp)
+	sw	$s1, 4*1($sp)
+	sw	$s0, 4*0($sp)
+___
+$code.=<<___;
+	.set	reorder
+
+	srl	$len,4			# number of complete blocks
+	li	$one,1
+	beqz	$len,.Labort
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+	andi	$shr,$inp,3
+	subu	$inp,$inp,$shr		# align $inp
+	sll	$shr,$shr,3		# byte to bit offset
+#endif
+
+	lw	$h0,0($ctx)		# load hash value
+	lw	$h1,4($ctx)
+	lw	$h2,8($ctx)
+	lw	$h3,12($ctx)
+	lw	$h4,16($ctx)
+
+	lw	$r0,20($ctx)		# load key
+	lw	$r1,24($ctx)
+	lw	$r2,28($ctx)
+	lw	$r3,32($ctx)
+	lw	$rs1,36($ctx)
+	lw	$rs2,40($ctx)
+	lw	$rs3,44($ctx)
+
+	sll	$len,4
+	addu	$len,$len,$inp		# end of buffer
+	b	.Loop
+
+.align	4
+.Loop:
+#if defined(_MIPS_ARCH_MIPS32R6)
+	lw	$d0,0($inp)		# load input
+	lw	$d1,4($inp)
+	lw	$d2,8($inp)
+	lw	$d3,12($inp)
+	beqz	$shr,.Laligned_inp
+
+	lw	$t0,16($inp)
+	subu	$t1,$zero,$shr
+# ifdef	MIPSEB
+	sllv	$d0,$d0,$shr
+	srlv	$at,$d1,$t1
+	sllv	$d1,$d1,$shr
+	or	$d0,$d0,$at
+	srlv	$at,$d2,$t1
+	sllv	$d2,$d2,$shr
+	or	$d1,$d1,$at
+	srlv	$at,$d3,$t1
+	sllv	$d3,$d3,$shr
+	or	$d2,$d2,$at
+	srlv	$t0,$t0,$t1
+	or	$d3,$d3,$t0
+# else
+	srlv	$d0,$d0,$shr
+	sllv	$at,$d1,$t1
+	srlv	$d1,$d1,$shr
+	or	$d0,$d0,$at
+	sllv	$at,$d2,$t1
+	srlv	$d2,$d2,$shr
+	or	$d1,$d1,$at
+	sllv	$at,$d3,$t1
+	srlv	$d3,$d3,$shr
+	or	$d2,$d2,$at
+	sllv	$t0,$t0,$t1
+	or	$d3,$d3,$t0
+# endif
+.Laligned_inp:
+#else
+	lwl	$d0,0+MSB($inp)		# load input
+	lwl	$d1,4+MSB($inp)
+	lwl	$d2,8+MSB($inp)
+	lwl	$d3,12+MSB($inp)
+	lwr	$d0,0+LSB($inp)
+	lwr	$d1,4+LSB($inp)
+	lwr	$d2,8+LSB($inp)
+	lwr	$d3,12+LSB($inp)
+#endif
+#ifdef	MIPSEB
+# if defined(_MIPS_ARCH_MIPS32R2)
+	wsbh	$d0,$d0			# byte swap
+	wsbh	$d1,$d1
+	wsbh	$d2,$d2
+	wsbh	$d3,$d3
+	rotr	$d0,$d0,16
+	rotr	$d1,$d1,16
+	rotr	$d2,$d2,16
+	rotr	$d3,$d3,16
+# else
+	srl	$at,$d0,24		# byte swap
+	srl	$t0,$d0,8
+	andi	$t1,$d0,0xFF00
+	sll	$d0,$d0,24
+	andi	$t0,0xFF00
+	sll	$t1,$t1,8
+	or	$d0,$at
+	 srl	$at,$d1,24
+	or	$t0,$t1
+	 srl	$t1,$d1,8
+	or	$d0,$t0
+	 andi	$t0,$d1,0xFF00
+	 sll	$d1,$d1,24
+	 andi	$t1,0xFF00
+	 sll	$t0,$t0,8
+	 or	$d1,$at
+	srl	$at,$d2,24
+	 or	$t1,$t0
+	srl	$t0,$d2,8
+	 or	$d1,$t1
+	andi	$t1,$d2,0xFF00
+	sll	$d2,$d2,24
+	andi	$t0,0xFF00
+	sll	$t1,$t1,8
+	or	$d2,$at
+	 srl	$at,$d3,24
+	or	$t0,$t1
+	 srl	$t1,$d3,8
+	or	$d2,$t0
+	 andi	$t0,$d3,0xFF00
+	 sll	$d3,$d3,24
+	 andi	$t1,0xFF00
+	 sll	$t0,$t0,8
+	 or	$d3,$at
+	 or	$t1,$t0
+	 or	$d3,$t1
+# endif
+#endif
+	srl	$t0,$h4,2		# modulo-scheduled reduction
+	andi	$h4,$h4,3
+	sll	$at,$t0,2
+
+	addu	$d0,$d0,$h0		# accumulate input
+	 addu	$t0,$t0,$at
+	sltu	$h0,$d0,$h0
+	addu	$d0,$d0,$t0		# ... and residue
+	sltu	$at,$d0,$t0
+
+	addu	$d1,$d1,$h1
+	 addu	$h0,$h0,$at		# carry
+	sltu	$h1,$d1,$h1
+	addu	$d1,$d1,$h0
+	sltu	$h0,$d1,$h0
+
+	addu	$d2,$d2,$h2
+	 addu	$h1,$h1,$h0		# carry
+	sltu	$h2,$d2,$h2
+	addu	$d2,$d2,$h1
+	sltu	$h1,$d2,$h1
+
+	addu	$d3,$d3,$h3
+	 addu	$h2,$h2,$h1		# carry
+	sltu	$h3,$d3,$h3
+	addu	$d3,$d3,$h2
+
+#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6)
+	multu	$r0,$d0			# d0*r0
+	 sltu	$h2,$d3,$h2
+	maddu	$rs3,$d1		# d1*s3
+	 addu	$h3,$h3,$h2		# carry
+	maddu	$rs2,$d2		# d2*s2
+	 addu	$h4,$h4,$padbit
+	maddu	$rs1,$d3		# d3*s1
+	 addu	$h4,$h4,$h3
+	mfhi	$at
+	mflo	$h0
+
+	multu	$r1,$d0			# d0*r1
+	maddu	$r0,$d1			# d1*r0
+	maddu	$rs3,$d2		# d2*s3
+	maddu	$rs2,$d3		# d3*s2
+	maddu	$rs1,$h4		# h4*s1
+	maddu	$at,$one		# hi*1
+	mfhi	$at
+	mflo	$h1
+
+	multu	$r2,$d0			# d0*r2
+	maddu	$r1,$d1			# d1*r1
+	maddu	$r0,$d2			# d2*r0
+	maddu	$rs3,$d3		# d3*s3
+	maddu	$rs2,$h4		# h4*s2
+	maddu	$at,$one		# hi*1
+	mfhi	$at
+	mflo	$h2
+
+	mul	$t0,$r0,$h4		# h4*r0
+
+	multu	$r3,$d0			# d0*r3
+	maddu	$r2,$d1			# d1*r2
+	maddu	$r1,$d2			# d2*r1
+	maddu	$r0,$d3			# d3*r0
+	maddu	$rs3,$h4		# h4*s3
+	maddu	$at,$one		# hi*1
+	mfhi	$at
+	mflo	$h3
+
+	 addiu	$inp,$inp,16
+
+	addu	$h4,$t0,$at
+#else
+	multu	($r0,$d0)		# d0*r0
+	mflo	($h0,$r0,$d0)
+	mfhi	($h1,$r0,$d0)
+
+	 sltu	$h2,$d3,$h2
+	 addu	$h3,$h3,$h2		# carry
+
+	multu	($rs3,$d1)		# d1*s3
+	mflo	($at,$rs3,$d1)
+	mfhi	($t0,$rs3,$d1)
+
+	 addu	$h4,$h4,$padbit
+	 addiu	$inp,$inp,16
+	 addu	$h4,$h4,$h3
+
+	multu	($rs2,$d2)		# d2*s2
+	mflo	($a3,$rs2,$d2)
+	mfhi	($t1,$rs2,$d2)
+	 addu	$h0,$h0,$at
+	 addu	$h1,$h1,$t0
+	multu	($rs1,$d3)		# d3*s1
+	 sltu	$at,$h0,$at
+	 addu	$h1,$h1,$at
+
+	mflo	($at,$rs1,$d3)
+	mfhi	($t0,$rs1,$d3)
+	 addu	$h0,$h0,$a3
+	 addu	$h1,$h1,$t1
+	multu	($r1,$d0)		# d0*r1
+	 sltu	$a3,$h0,$a3
+	 addu	$h1,$h1,$a3
+
+
+	mflo	($a3,$r1,$d0)
+	mfhi	($h2,$r1,$d0)
+	 addu	$h0,$h0,$at
+	 addu	$h1,$h1,$t0
+	multu	($r0,$d1)		# d1*r0
+	 sltu	$at,$h0,$at
+	 addu	$h1,$h1,$at
+
+	mflo	($at,$r0,$d1)
+	mfhi	($t0,$r0,$d1)
+	 addu	$h1,$h1,$a3
+	 sltu	$a3,$h1,$a3
+	multu	($rs3,$d2)		# d2*s3
+	 addu	$h2,$h2,$a3
+
+	mflo	($a3,$rs3,$d2)
+	mfhi	($t1,$rs3,$d2)
+	 addu	$h1,$h1,$at
+	 addu	$h2,$h2,$t0
+	multu	($rs2,$d3)		# d3*s2
+	 sltu	$at,$h1,$at
+	 addu	$h2,$h2,$at
+
+	mflo	($at,$rs2,$d3)
+	mfhi	($t0,$rs2,$d3)
+	 addu	$h1,$h1,$a3
+	 addu	$h2,$h2,$t1
+	multu	($rs1,$h4)		# h4*s1
+	 sltu	$a3,$h1,$a3
+	 addu	$h2,$h2,$a3
+
+	mflo	($a3,$rs1,$h4)
+	 addu	$h1,$h1,$at
+	 addu	$h2,$h2,$t0
+	multu	($r2,$d0)		# d0*r2
+	 sltu	$at,$h1,$at
+	 addu	$h2,$h2,$at
+
+
+	mflo	($at,$r2,$d0)
+	mfhi	($h3,$r2,$d0)
+	 addu	$h1,$h1,$a3
+	 sltu	$a3,$h1,$a3
+	multu	($r1,$d1)		# d1*r1
+	 addu	$h2,$h2,$a3
+
+	mflo	($a3,$r1,$d1)
+	mfhi	($t1,$r1,$d1)
+	 addu	$h2,$h2,$at
+	 sltu	$at,$h2,$at
+	multu	($r0,$d2)		# d2*r0
+	 addu	$h3,$h3,$at
+
+	mflo	($at,$r0,$d2)
+	mfhi	($t0,$r0,$d2)
+	 addu	$h2,$h2,$a3
+	 addu	$h3,$h3,$t1
+	multu	($rs3,$d3)		# d3*s3
+	 sltu	$a3,$h2,$a3
+	 addu	$h3,$h3,$a3
+
+	mflo	($a3,$rs3,$d3)
+	mfhi	($t1,$rs3,$d3)
+	 addu	$h2,$h2,$at
+	 addu	$h3,$h3,$t0
+	multu	($rs2,$h4)		# h4*s2
+	 sltu	$at,$h2,$at
+	 addu	$h3,$h3,$at
+
+	mflo	($at,$rs2,$h4)
+	 addu	$h2,$h2,$a3
+	 addu	$h3,$h3,$t1
+	multu	($r3,$d0)		# d0*r3
+	 sltu	$a3,$h2,$a3
+	 addu	$h3,$h3,$a3
+
+
+	mflo	($a3,$r3,$d0)
+	mfhi	($t1,$r3,$d0)
+	 addu	$h2,$h2,$at
+	 sltu	$at,$h2,$at
+	multu	($r2,$d1)		# d1*r2
+	 addu	$h3,$h3,$at
+
+	mflo	($at,$r2,$d1)
+	mfhi	($t0,$r2,$d1)
+	 addu	$h3,$h3,$a3
+	 sltu	$a3,$h3,$a3
+	multu	($r0,$d3)		# d3*r0
+	 addu	$t1,$t1,$a3
+
+	mflo	($a3,$r0,$d3)
+	mfhi	($d3,$r0,$d3)
+	 addu	$h3,$h3,$at
+	 addu	$t1,$t1,$t0
+	multu	($r1,$d2)		# d2*r1
+	 sltu	$at,$h3,$at
+	 addu	$t1,$t1,$at
+
+	mflo	($at,$r1,$d2)
+	mfhi	($t0,$r1,$d2)
+	 addu	$h3,$h3,$a3
+	 addu	$t1,$t1,$d3
+	multu	($rs3,$h4)		# h4*s3
+	 sltu	$a3,$h3,$a3
+	 addu	$t1,$t1,$a3
+
+	mflo	($a3,$rs3,$h4)
+	 addu	$h3,$h3,$at
+	 addu	$t1,$t1,$t0
+	multu	($r0,$h4)		# h4*r0
+	 sltu	$at,$h3,$at
+	 addu	$t1,$t1,$at
+
+
+	mflo	($h4,$r0,$h4)
+	 addu	$h3,$h3,$a3
+	 sltu	$a3,$h3,$a3
+	 addu	$t1,$t1,$a3
+	addu	$h4,$h4,$t1
+
+	li	$padbit,1		# if we loop, padbit is 1
+#endif
+	bne	$inp,$len,.Loop
+
+	sw	$h0,0($ctx)		# store hash value
+	sw	$h1,4($ctx)
+	sw	$h2,8($ctx)
+	sw	$h3,12($ctx)
+	sw	$h4,16($ctx)
+
+	.set	noreorder
+.Labort:
+	lw	$s11,4*11($sp)
+	lw	$s10,4*10($sp)
+	lw	$s9, 4*9($sp)
+	lw	$s8, 4*8($sp)
+	lw	$s7, 4*7($sp)
+	lw	$s6, 4*6($sp)
+	lw	$s5, 4*5($sp)
+	lw	$s4, 4*4($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
+	lw	$s3, 4*3($sp)
+	lw	$s2, 4*2($sp)
+	lw	$s1, 4*1($sp)
+	lw	$s0, 4*0($sp)
+___
+$code.=<<___;
+	jr	$ra
+	addu	$sp,$sp,4*12
+.end	poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3);
+
+$code.=<<___;
+.align	5
+.globl	poly1305_emit
+.ent	poly1305_emit
+poly1305_emit:
+	.frame	$sp,0,$ra
+	.set	reorder
+
+	lw	$tmp4,16($ctx)
+	lw	$tmp0,0($ctx)
+	lw	$tmp1,4($ctx)
+	lw	$tmp2,8($ctx)
+	lw	$tmp3,12($ctx)
+
+	li	$in0,-4			# final reduction
+	srl	$ctx,$tmp4,2
+	and	$in0,$in0,$tmp4
+	andi	$tmp4,$tmp4,3
+	addu	$ctx,$ctx,$in0
+
+	addu	$tmp0,$tmp0,$ctx
+	sltu	$ctx,$tmp0,$ctx
+	 addiu	$in0,$tmp0,5		# compare to modulus
+	addu	$tmp1,$tmp1,$ctx
+	 sltiu	$in1,$in0,5
+	sltu	$ctx,$tmp1,$ctx
+	 addu	$in1,$in1,$tmp1
+	addu	$tmp2,$tmp2,$ctx
+	 sltu	$in2,$in1,$tmp1
+	sltu	$ctx,$tmp2,$ctx
+	 addu	$in2,$in2,$tmp2
+	addu	$tmp3,$tmp3,$ctx
+	 sltu	$in3,$in2,$tmp2
+	sltu	$ctx,$tmp3,$ctx
+	 addu	$in3,$in3,$tmp3
+	addu	$tmp4,$tmp4,$ctx
+	 sltu	$ctx,$in3,$tmp3
+	 addu	$ctx,$tmp4
+
+	srl	$ctx,2			# see if it carried/borrowed
+	subu	$ctx,$zero,$ctx
+
+	xor	$in0,$tmp0
+	xor	$in1,$tmp1
+	xor	$in2,$tmp2
+	xor	$in3,$tmp3
+	and	$in0,$ctx
+	and	$in1,$ctx
+	and	$in2,$ctx
+	and	$in3,$ctx
+	xor	$in0,$tmp0
+	xor	$in1,$tmp1
+	xor	$in2,$tmp2
+	xor	$in3,$tmp3
+
+	lw	$tmp0,0($nonce)		# load nonce
+	lw	$tmp1,4($nonce)
+	lw	$tmp2,8($nonce)
+	lw	$tmp3,12($nonce)
+
+	addu	$in0,$tmp0		# accumulate nonce
+	sltu	$ctx,$in0,$tmp0
+
+	addu	$in1,$tmp1
+	sltu	$tmp1,$in1,$tmp1
+	addu	$in1,$ctx
+	sltu	$ctx,$in1,$ctx
+	addu	$ctx,$tmp1
+
+	addu	$in2,$tmp2
+	sltu	$tmp2,$in2,$tmp2
+	addu	$in2,$ctx
+	sltu	$ctx,$in2,$ctx
+	addu	$ctx,$tmp2
+
+	addu	$in3,$tmp3
+	addu	$in3,$ctx
+
+	srl	$tmp0,$in0,8		# write mac value
+	srl	$tmp1,$in0,16
+	srl	$tmp2,$in0,24
+	sb	$in0, 0($mac)
+	sb	$tmp0,1($mac)
+	srl	$tmp0,$in1,8
+	sb	$tmp1,2($mac)
+	srl	$tmp1,$in1,16
+	sb	$tmp2,3($mac)
+	srl	$tmp2,$in1,24
+	sb	$in1, 4($mac)
+	sb	$tmp0,5($mac)
+	srl	$tmp0,$in2,8
+	sb	$tmp1,6($mac)
+	srl	$tmp1,$in2,16
+	sb	$tmp2,7($mac)
+	srl	$tmp2,$in2,24
+	sb	$in2, 8($mac)
+	sb	$tmp0,9($mac)
+	srl	$tmp0,$in3,8
+	sb	$tmp1,10($mac)
+	srl	$tmp1,$in3,16
+	sb	$tmp2,11($mac)
+	srl	$tmp2,$in3,24
+	sb	$in3, 12($mac)
+	sb	$tmp0,13($mac)
+	sb	$tmp1,14($mac)
+	sb	$tmp2,15($mac)
+
+	jr	$ra
+.end	poly1305_emit
+.rdata
+.asciiz	"Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm"
+.align	2
+___
+}
+}}}
+
+$output=pop and open STDOUT,">$output";
+print $code;
+close STDOUT;
diff --git a/arch/mips/fw/arc/Makefile b/arch/mips/fw/arc/Makefile
index 31dd7305d643..64d685efcc77 100644
--- a/arch/mips/fw/arc/Makefile
+++ b/arch/mips/fw/arc/Makefile
@@ -3,8 +3,12 @@
 # Makefile for the ARC prom monitor library routines under Linux.
 #
 
+ifdef CONFIG_ARC_CMDLINE_ONLY
+lib-y				+= cmdline.o
+else
 lib-y				+= cmdline.o env.o file.o identify.o init.o \
-				   misc.o salone.o time.o tree.o
+				   misc.o
+endif
 
 lib-$(CONFIG_ARC_MEMORY)	+= memory.o
 lib-$(CONFIG_ARC_CONSOLE)	+= arc_con.o
diff --git a/arch/mips/fw/arc/cmdline.c b/arch/mips/fw/arc/cmdline.c
index c0122a1dc587..155c5e911723 100644
--- a/arch/mips/fw/arc/cmdline.c
+++ b/arch/mips/fw/arc/cmdline.c
@@ -17,6 +17,12 @@
 
 #undef DEBUG_CMDLINE
 
+/*
+ * A 32-bit ARC PROM pass arguments and environment as 32-bit pointer.
+ * These macro take care of sign extension.
+ */
+#define prom_argv(index) ((char *) (long)argv[(index)])
+
 static char *ignored[] = {
 	"ConsoleIn=",
 	"ConsoleOut=",
@@ -32,14 +38,14 @@ static char *used_arc[][2] = {
 	{ "OSLoadOptions=", "" }
 };
 
-static char * __init move_firmware_args(char* cp)
+static char __init *move_firmware_args(int argc, LONG *argv, char *cp)
 {
 	char *s;
 	int actr, i;
 
 	actr = 1; /* Always ignore argv[0] */
 
-	while (actr < prom_argc) {
+	while (actr < argc) {
 		for(i = 0; i < ARRAY_SIZE(used_arc); i++) {
 			int len = strlen(used_arc[i][0]);
 
@@ -64,7 +70,7 @@ static char * __init move_firmware_args(char* cp)
 	return cp;
 }
 
-void __init prom_init_cmdline(void)
+void __init prom_init_cmdline(int argc, LONG *argv)
 {
 	char *cp;
 	int actr, i;
@@ -76,9 +82,9 @@ void __init prom_init_cmdline(void)
 	 * Move ARC variables to the beginning to make sure they can be
 	 * overridden by later arguments.
 	 */
-	cp = move_firmware_args(cp);
+	cp = move_firmware_args(argc, argv, cp);
 
-	while (actr < prom_argc) {
+	while (actr < argc) {
 		for (i = 0; i < ARRAY_SIZE(ignored); i++) {
 			int len = strlen(ignored[i]);
 
diff --git a/arch/mips/fw/arc/env.c b/arch/mips/fw/arc/env.c
index 1118a26b32ee..02407a7bb38e 100644
--- a/arch/mips/fw/arc/env.c
+++ b/arch/mips/fw/arc/env.c
@@ -19,9 +19,3 @@ ArcGetEnvironmentVariable(CHAR *name)
 {
 	return (CHAR *) ARC_CALL1(get_evar, name);
 }
-
-LONG __init
-ArcSetEnvironmentVariable(PCHAR name, PCHAR value)
-{
-	return ARC_CALL2(set_evar, name, value);
-}
diff --git a/arch/mips/fw/arc/file.c b/arch/mips/fw/arc/file.c
index 49fd3ff13fe5..b0d8535c80cc 100644
--- a/arch/mips/fw/arc/file.c
+++ b/arch/mips/fw/arc/file.c
@@ -13,62 +13,13 @@
 #include <asm/sgialib.h>
 
 LONG
-ArcGetDirectoryEntry(ULONG FileID, struct linux_vdirent *Buffer,
-		     ULONG N, ULONG *Count)
-{
-	return ARC_CALL4(get_vdirent, FileID, Buffer, N, Count);
-}
-
-LONG
-ArcOpen(CHAR *Path, enum linux_omode OpenMode, ULONG *FileID)
-{
-	return ARC_CALL3(open, Path, OpenMode, FileID);
-}
-
-LONG
-ArcClose(ULONG FileID)
-{
-	return ARC_CALL1(close, FileID);
-}
-
-LONG
 ArcRead(ULONG FileID, VOID *Buffer, ULONG N, ULONG *Count)
 {
 	return ARC_CALL4(read, FileID, Buffer, N, Count);
 }
 
 LONG
-ArcGetReadStatus(ULONG FileID)
-{
-	return ARC_CALL1(get_rstatus, FileID);
-}
-
-LONG
 ArcWrite(ULONG FileID, PVOID Buffer, ULONG N, PULONG Count)
 {
 	return ARC_CALL4(write, FileID, Buffer, N, Count);
 }
-
-LONG
-ArcSeek(ULONG FileID, struct linux_bigint *Position, enum linux_seekmode SeekMode)
-{
-	return ARC_CALL3(seek, FileID, Position, SeekMode);
-}
-
-LONG
-ArcMount(char *name, enum linux_mountops op)
-{
-	return ARC_CALL2(mount, name, op);
-}
-
-LONG
-ArcGetFileInformation(ULONG FileID, struct linux_finfo *Information)
-{
-	return ARC_CALL2(get_finfo, FileID, Information);
-}
-
-LONG ArcSetFileInformation(ULONG FileID, ULONG AttributeFlags,
-			   ULONG AttributeMask)
-{
-	return ARC_CALL3(set_finfo, FileID, AttributeFlags, AttributeMask);
-}
diff --git a/arch/mips/fw/arc/identify.c b/arch/mips/fw/arc/identify.c
index f90266c02c9d..5527e0f54079 100644
--- a/arch/mips/fw/arc/identify.c
+++ b/arch/mips/fw/arc/identify.c
@@ -32,10 +32,6 @@ static struct smatch mach_table[] = {
 		.liname		= "SGI Indy",
 		.flags		= PROM_FLAG_ARCS,
 	}, {
-		.arcname	= "SGI-IP27",
-		.liname		= "SGI Origin",
-		.flags		= PROM_FLAG_ARCS,
-	}, {
 		.arcname	= "SGI-IP28",
 		.liname		= "SGI IP28",
 		.flags		= PROM_FLAG_ARCS,
@@ -87,6 +83,11 @@ const char *get_system_type(void)
 	return system_type;
 }
 
+static pcomponent * __init ArcGetChild(pcomponent *Current)
+{
+	return (pcomponent *) ARC_CALL1(child_component, Current);
+}
+
 void __init prom_identify_arch(void)
 {
 	pcomponent *p;
@@ -98,13 +99,7 @@ void __init prom_identify_arch(void)
 	 */
 	p = ArcGetChild(PROM_NULL_COMPONENT);
 	if (p == NULL) {
-#ifdef CONFIG_SGI_IP27
-		/* IP27 PROM misbehaves, seems to not implement ARC
-		   GetChild().	So we just assume it's an IP27.	 */
-		iname = "SGI-IP27";
-#else
 		iname = "Unknown";
-#endif
 	} else
 		iname = (char *) (long) p->iname;
 
diff --git a/arch/mips/fw/arc/init.c b/arch/mips/fw/arc/init.c
index 008555969534..f9d1dea9b2ca 100644
--- a/arch/mips/fw/arc/init.c
+++ b/arch/mips/fw/arc/init.c
@@ -18,8 +18,11 @@
 
 /* Master romvec interface. */
 struct linux_romvec *romvec;
-int prom_argc;
-LONG *_prom_argv, *_prom_envp;
+
+#if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32)
+/* stack for calling 32bit ARC prom */
+u64 o32_stk[4096];
+#endif
 
 void __init prom_init(void)
 {
@@ -27,10 +30,6 @@ void __init prom_init(void)
 
 	romvec = ROMVECTOR;
 
-	prom_argc = fw_arg0;
-	_prom_argv = (LONG *) fw_arg1;
-	_prom_envp = (LONG *) fw_arg2;
-
 	if (pb->magic != 0x53435241) {
 		printk(KERN_CRIT "Aieee, bad prom vector magic %08lx\n",
 		       (unsigned long) pb->magic);
@@ -38,7 +37,7 @@ void __init prom_init(void)
 			;
 	}
 
-	prom_init_cmdline();
+	prom_init_cmdline(fw_arg0, (LONG *)fw_arg1);
 	prom_identify_arch();
 	printk(KERN_INFO "PROMLIB: ARC firmware Version %d Revision %d\n",
 	       pb->ver, pb->rev);
@@ -49,11 +48,4 @@ void __init prom_init(void)
 	ArcRead(0, &c, 1, &cnt);
 	ArcEnterInteractiveMode();
 #endif
-#ifdef CONFIG_SGI_IP27
-	{
-		extern const struct plat_smp_ops ip27_smp_ops;
-
-		register_smp_ops(&ip27_smp_ops);
-	}
-#endif
 }
diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c
index b4328b3b5288..dbbcddc82823 100644
--- a/arch/mips/fw/arc/memory.c
+++ b/arch/mips/fw/arc/memory.c
@@ -158,6 +158,10 @@ void __init prom_meminit(void)
 	}
 }
 
+void __weak __init prom_cleanup(void)
+{
+}
+
 void __init prom_free_prom_memory(void)
 {
 	int i;
@@ -169,4 +173,9 @@ void __init prom_free_prom_memory(void)
 		free_init_pages("prom memory",
 			prom_mem_base[i], prom_mem_base[i] + prom_mem_size[i]);
 	}
+	/*
+	 * at this point it isn't safe to call PROM functions
+	 * give platforms a way to do PROM cleanups
+	 */
+	prom_cleanup();
 }
diff --git a/arch/mips/fw/arc/misc.c b/arch/mips/fw/arc/misc.c
index 19f710117d97..d5b2d5901324 100644
--- a/arch/mips/fw/arc/misc.c
+++ b/arch/mips/fw/arc/misc.c
@@ -21,47 +21,6 @@
 #include <asm/bootinfo.h>
 
 VOID __noreturn
-ArcHalt(VOID)
-{
-	bc_disable();
-	local_irq_disable();
-	ARC_CALL0(halt);
-
-	unreachable();
-}
-
-VOID __noreturn
-ArcPowerDown(VOID)
-{
-	bc_disable();
-	local_irq_disable();
-	ARC_CALL0(pdown);
-
-	unreachable();
-}
-
-/* XXX is this a soft reset basically? XXX */
-VOID __noreturn
-ArcRestart(VOID)
-{
-	bc_disable();
-	local_irq_disable();
-	ARC_CALL0(restart);
-
-	unreachable();
-}
-
-VOID __noreturn
-ArcReboot(VOID)
-{
-	bc_disable();
-	local_irq_disable();
-	ARC_CALL0(reboot);
-
-	unreachable();
-}
-
-VOID __noreturn
 ArcEnterInteractiveMode(VOID)
 {
 	bc_disable();
@@ -71,24 +30,6 @@ ArcEnterInteractiveMode(VOID)
 	unreachable();
 }
 
-LONG
-ArcSaveConfiguration(VOID)
-{
-	return ARC_CALL0(cfg_save);
-}
-
-struct linux_sysid *
-ArcGetSystemId(VOID)
-{
-	return (struct linux_sysid *) ARC_CALL0(get_sysid);
-}
-
-VOID __init
-ArcFlushAllCaches(VOID)
-{
-	ARC_CALL0(cache_flush);
-}
-
 DISPLAY_STATUS * __init ArcGetDisplayStatus(ULONG FileID)
 {
 	return (DISPLAY_STATUS *) ARC_CALL1(GetDisplayStatus, FileID);
diff --git a/arch/mips/fw/arc/promlib.c b/arch/mips/fw/arc/promlib.c
index be381307fbb0..5e9e840a9314 100644
--- a/arch/mips/fw/arc/promlib.c
+++ b/arch/mips/fw/arc/promlib.c
@@ -11,6 +11,21 @@
 #include <asm/bcache.h>
 #include <asm/setup.h>
 
+#if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32)
+/*
+ * For 64bit kernels working with a 32bit ARC PROM pointer arguments
+ * for ARC calls need to reside in CKEG0/1. But as soon as the kernel
+ * switches to it's first kernel thread stack is set to an address in
+ * XKPHYS, so anything on stack can't be used anymore. This is solved
+ * by using a * static declartion variables are put into BSS, which is
+ * linked to a CKSEG0 address. Since this is only used on UP platforms
+ * there is not spinlock needed
+ */
+#define O32_STATIC	static
+#else
+#define O32_STATIC
+#endif
+
 /*
  * IP22 boardcache is not compatible with board caches.	 Thus we disable it
  * during romvec action.  Since r4xx0.c is always compiled and linked with your
@@ -23,8 +38,10 @@
 
 void prom_putchar(char c)
 {
-	ULONG cnt;
-	CHAR it = c;
+	O32_STATIC ULONG cnt;
+	O32_STATIC CHAR it;
+
+	it = c;
 
 	bc_disable();
 	ArcWrite(1, &it, 1, &cnt);
@@ -33,8 +50,8 @@ void prom_putchar(char c)
 
 char prom_getchar(void)
 {
-	ULONG cnt;
-	CHAR c;
+	O32_STATIC ULONG cnt;
+	O32_STATIC CHAR c;
 
 	bc_disable();
 	ArcRead(0, &c, 1, &cnt);
diff --git a/arch/mips/fw/arc/salone.c b/arch/mips/fw/arc/salone.c
deleted file mode 100644
index 2d99f44d5576..000000000000
--- a/arch/mips/fw/arc/salone.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Routines to load into memory and execute stand-along program images using
- * ARCS PROM firmware.
- *
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- */
-#include <linux/init.h>
-#include <asm/sgialib.h>
-
-LONG __init ArcLoad(CHAR *Path, ULONG TopAddr, ULONG *ExecAddr, ULONG *LowAddr)
-{
-	return ARC_CALL4(load, Path, TopAddr, ExecAddr, LowAddr);
-}
-
-LONG __init ArcInvoke(ULONG ExecAddr, ULONG StackAddr, ULONG Argc, CHAR *Argv[],
-	CHAR *Envp[])
-{
-	return ARC_CALL5(invoke, ExecAddr, StackAddr, Argc, Argv, Envp);
-}
-
-LONG __init ArcExecute(CHAR *Path, LONG Argc, CHAR *Argv[], CHAR *Envp[])
-{
-	return ARC_CALL4(exec, Path, Argc, Argv, Envp);
-}
diff --git a/arch/mips/fw/arc/time.c b/arch/mips/fw/arc/time.c
deleted file mode 100644
index 190cdb50b895..000000000000
--- a/arch/mips/fw/arc/time.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Extracting time information from ARCS prom.
- *
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- */
-#include <linux/init.h>
-
-#include <asm/fw/arc/types.h>
-#include <asm/sgialib.h>
-
-struct linux_tinfo * __init
-ArcGetTime(VOID)
-{
-	return (struct linux_tinfo *) ARC_CALL0(get_tinfo);
-}
-
-ULONG __init
-ArcGetRelativeTime(VOID)
-{
-	return ARC_CALL0(get_rtime);
-}
diff --git a/arch/mips/fw/arc/tree.c b/arch/mips/fw/arc/tree.c
deleted file mode 100644
index 924a37dc2569..000000000000
--- a/arch/mips/fw/arc/tree.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * PROM component device tree code.
- *
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1999 Ralf Baechle (ralf@gnu.org)
- * Copyright (C) 1999 Silicon Graphics, Inc.
- */
-#include <linux/init.h>
-#include <asm/fw/arc/types.h>
-#include <asm/sgialib.h>
-
-#undef DEBUG_PROM_TREE
-
-pcomponent * __init
-ArcGetPeer(pcomponent *Current)
-{
-	if (Current == PROM_NULL_COMPONENT)
-		return PROM_NULL_COMPONENT;
-
-	return (pcomponent *) ARC_CALL1(next_component, Current);
-}
-
-pcomponent * __init
-ArcGetChild(pcomponent *Current)
-{
-	return (pcomponent *) ARC_CALL1(child_component, Current);
-}
-
-pcomponent * __init
-ArcGetParent(pcomponent *Current)
-{
-	if (Current == PROM_NULL_COMPONENT)
-		return PROM_NULL_COMPONENT;
-
-	return (pcomponent *) ARC_CALL1(parent_component, Current);
-}
-
-LONG __init
-ArcGetConfigurationData(VOID *Buffer, pcomponent *Current)
-{
-	return ARC_CALL2(component_data, Buffer, Current);
-}
-
-pcomponent * __init
-ArcAddChild(pcomponent *Current, pcomponent *Template, VOID *ConfigurationData)
-{
-	return (pcomponent *)
-	       ARC_CALL3(child_add, Current, Template, ConfigurationData);
-}
-
-LONG __init
-ArcDeleteComponent(pcomponent *ComponentToDelete)
-{
-	return ARC_CALL1(comp_del, ComponentToDelete);
-}
-
-pcomponent * __init
-ArcGetComponent(CHAR *Path)
-{
-	return (pcomponent *)ARC_CALL1(component_by_path, Path);
-}
-
-#ifdef DEBUG_PROM_TREE
-
-static char *classes[] = {
-	"system", "processor", "cache", "adapter", "controller", "peripheral",
-	"memory"
-};
-
-static char *types[] = {
-	"arc", "cpu", "fpu", "picache", "pdcache", "sicache", "sdcache",
-	"sccache", "memdev", "eisa adapter", "tc adapter", "scsi adapter",
-	"dti adapter", "multi-func adapter", "disk controller",
-	"tp controller", "cdrom controller", "worm controller",
-	"serial controller", "net controller", "display controller",
-	"parallel controller", "pointer controller", "keyboard controller",
-	"audio controller", "misc controller", "disk peripheral",
-	"floppy peripheral", "tp peripheral", "modem peripheral",
-	"monitor peripheral", "printer peripheral", "pointer peripheral",
-	"keyboard peripheral", "terminal peripheral", "line peripheral",
-	"net peripheral", "misc peripheral", "anonymous"
-};
-
-static char *iflags[] = {
-	"bogus", "read only", "removable", "console in", "console out",
-	"input", "output"
-};
-
-static void __init
-dump_component(pcomponent *p)
-{
-	printk("[%p]:class<%s>type<%s>flags<%s>ver<%d>rev<%d>",
-	       p, classes[p->class], types[p->type],
-	       iflags[p->iflags], p->vers, p->rev);
-	printk("key<%08lx>\n\tamask<%08lx>cdsize<%d>ilen<%d>iname<%s>\n",
-	       p->key, p->amask, (int)p->cdsize, (int)p->ilen, p->iname);
-}
-
-static void __init
-traverse(pcomponent *p, int op)
-{
-	dump_component(p);
-	if(ArcGetChild(p))
-		traverse(ArcGetChild(p), 1);
-	if(ArcGetPeer(p) && op)
-		traverse(ArcGetPeer(p), 1);
-}
-
-void __init
-prom_testtree(void)
-{
-	pcomponent *p;
-
-	p = ArcGetChild(PROM_NULL_COMPONENT);
-	dump_component(p);
-	p = ArcGetChild(p);
-	while(p) {
-		dump_component(p);
-		p = ArcGetPeer(p);
-	}
-}
-
-#endif /* DEBUG_PROM_TREE  */
diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c
index 06d92fb37769..c238e95190ac 100644
--- a/arch/mips/generic/board-ocelot.c
+++ b/arch/mips/generic/board-ocelot.c
@@ -51,7 +51,7 @@ static void __init ocelot_earlyprintk_init(void)
 {
 	void __iomem *uart_base;
 
-	uart_base = ioremap_nocache(UART_UART, 0x20);
+	uart_base = ioremap(UART_UART, 0x20);
 	setup_8250_early_printk_port((unsigned long)uart_base, 2, 50000);
 }
 
diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c
index d5b8c4717ded..1de215b283d6 100644
--- a/arch/mips/generic/init.c
+++ b/arch/mips/generic/init.c
@@ -20,9 +20,9 @@
 #include <asm/smp-ops.h>
 #include <asm/time.h>
 
-static __initdata const void *fdt;
-static __initdata const struct mips_machine *mach;
-static __initdata const void *mach_match_data;
+static __initconst const void *fdt;
+static __initconst const struct mips_machine *mach;
+static __initconst const void *mach_match_data;
 
 void __init prom_init(void)
 {
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index c8b595c60910..61b0fc2026e6 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -13,7 +13,6 @@ generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += msi.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index feb069cbf44e..655f40ddb6d1 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -63,7 +63,7 @@
 	.endm
 
 	.macro	local_irq_disable reg=t0
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	lw      \reg, TI_PRE_COUNT($28)
 	addi    \reg, \reg, 1
 	sw      \reg, TI_PRE_COUNT($28)
@@ -73,7 +73,7 @@
 	xori	\reg, \reg, 1
 	mtc0	\reg, CP0_STATUS
 	irq_disable_hazard
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	lw      \reg, TI_PRE_COUNT($28)
 	addi    \reg, \reg, -1
 	sw      \reg, TI_PRE_COUNT($28)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index bb8658cc7f12..e5ac88392d1f 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -20,157 +20,176 @@
 #include <asm/compiler.h>
 #include <asm/cpu-features.h>
 #include <asm/cmpxchg.h>
+#include <asm/llsc.h>
+#include <asm/sync.h>
 #include <asm/war.h>
 
-/*
- * Using a branch-likely instruction to check the result of an sc instruction
- * works around a bug present in R10000 CPUs prior to revision 3.0 that could
- * cause ll-sc sequences to execute non-atomically.
- */
-#if R10000_LLSC_WAR
-# define __scbeqz "beqzl"
-#else
-# define __scbeqz "beqz"
-#endif
-
-#define ATOMIC_INIT(i)	  { (i) }
+#define ATOMIC_OPS(pfx, type)						\
+static __always_inline type pfx##_read(const pfx##_t *v)		\
+{									\
+	return READ_ONCE(v->counter);					\
+}									\
+									\
+static __always_inline void pfx##_set(pfx##_t *v, type i)		\
+{									\
+	WRITE_ONCE(v->counter, i);					\
+}									\
+									\
+static __always_inline type pfx##_cmpxchg(pfx##_t *v, type o, type n)	\
+{									\
+	return cmpxchg(&v->counter, o, n);				\
+}									\
+									\
+static __always_inline type pfx##_xchg(pfx##_t *v, type n)		\
+{									\
+	return xchg(&v->counter, n);					\
+}
 
-/*
- * atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
-#define atomic_read(v)		READ_ONCE((v)->counter)
+#define ATOMIC_INIT(i)		{ (i) }
+ATOMIC_OPS(atomic, int)
 
-/*
- * atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
-#define atomic_set(v, i)	WRITE_ONCE((v)->counter, (i))
+#ifdef CONFIG_64BIT
+# define ATOMIC64_INIT(i)	{ (i) }
+ATOMIC_OPS(atomic64, s64)
+#endif
 
-#define ATOMIC_OP(op, c_op, asm_op)					      \
-static __inline__ void atomic_##op(int i, atomic_t * v)			      \
-{									      \
-	if (kernel_uses_llsc) {						      \
-		int temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	ll	%0, %1		# atomic_" #op "	\n"   \
-		"	" #asm_op " %0, %2				\n"   \
-		"	sc	%0, %1					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
-		"	.set	pop					\n"   \
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		v->counter c_op i;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
+#define ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc)			\
+static __inline__ void pfx##_##op(type i, pfx##_t * v)			\
+{									\
+	type temp;							\
+									\
+	if (!kernel_uses_llsc) {					\
+		unsigned long flags;					\
+									\
+		raw_local_irq_save(flags);				\
+		v->counter c_op i;					\
+		raw_local_irq_restore(flags);				\
+		return;							\
+	}								\
+									\
+	__asm__ __volatile__(						\
+	"	.set	push					\n"	\
+	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"	" __SYNC(full, loongson3_war) "			\n"	\
+	"1:	" #ll "	%0, %1		# " #pfx "_" #op "	\n"	\
+	"	" #asm_op " %0, %2				\n"	\
+	"	" #sc "	%0, %1					\n"	\
+	"\t" __SC_BEQZ "%0, 1b					\n"	\
+	"	.set	pop					\n"	\
+	: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)		\
+	: "Ir" (i) : __LLSC_CLOBBER);					\
 }
 
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				      \
-static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	      \
-{									      \
-	int result;							      \
-									      \
-	if (kernel_uses_llsc) {						      \
-		int temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	ll	%1, %2		# atomic_" #op "_return	\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	sc	%0, %2					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	.set	pop					\n"   \
-		: "=&r" (result), "=&r" (temp),				      \
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		result = v->counter;					      \
-		result c_op i;						      \
-		v->counter = result;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
-									      \
-	return result;							      \
+#define ATOMIC_OP_RETURN(pfx, op, type, c_op, asm_op, ll, sc)		\
+static __inline__ type pfx##_##op##_return_relaxed(type i, pfx##_t * v)	\
+{									\
+	type temp, result;						\
+									\
+	if (!kernel_uses_llsc) {					\
+		unsigned long flags;					\
+									\
+		raw_local_irq_save(flags);				\
+		result = v->counter;					\
+		result c_op i;						\
+		v->counter = result;					\
+		raw_local_irq_restore(flags);				\
+		return result;						\
+	}								\
+									\
+	__asm__ __volatile__(						\
+	"	.set	push					\n"	\
+	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"	" __SYNC(full, loongson3_war) "			\n"	\
+	"1:	" #ll "	%1, %2		# " #pfx "_" #op "_return\n"	\
+	"	" #asm_op " %0, %1, %3				\n"	\
+	"	" #sc "	%0, %2					\n"	\
+	"\t" __SC_BEQZ "%0, 1b					\n"	\
+	"	" #asm_op " %0, %1, %3				\n"	\
+	"	.set	pop					\n"	\
+	: "=&r" (result), "=&r" (temp),					\
+	  "+" GCC_OFF_SMALL_ASM() (v->counter)				\
+	: "Ir" (i) : __LLSC_CLOBBER);					\
+									\
+	return result;							\
 }
 
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				      \
-static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)	      \
-{									      \
-	int result;							      \
-									      \
-	if (kernel_uses_llsc) {						      \
-		int temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	ll	%1, %2		# atomic_fetch_" #op "	\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	sc	%0, %2					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
-		"	.set	pop					\n"   \
-		"	move	%0, %1					\n"   \
-		: "=&r" (result), "=&r" (temp),				      \
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		result = v->counter;					      \
-		v->counter c_op i;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
-									      \
-	return result;							      \
+#define ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc)		\
+static __inline__ type pfx##_fetch_##op##_relaxed(type i, pfx##_t * v)	\
+{									\
+	int temp, result;						\
+									\
+	if (!kernel_uses_llsc) {					\
+		unsigned long flags;					\
+									\
+		raw_local_irq_save(flags);				\
+		result = v->counter;					\
+		v->counter c_op i;					\
+		raw_local_irq_restore(flags);				\
+		return result;						\
+	}								\
+									\
+	__asm__ __volatile__(						\
+	"	.set	push					\n"	\
+	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"	" __SYNC(full, loongson3_war) "			\n"	\
+	"1:	" #ll "	%1, %2		# " #pfx "_fetch_" #op "\n"	\
+	"	" #asm_op " %0, %1, %3				\n"	\
+	"	" #sc "	%0, %2					\n"	\
+	"\t" __SC_BEQZ "%0, 1b					\n"	\
+	"	.set	pop					\n"	\
+	"	move	%0, %1					\n"	\
+	: "=&r" (result), "=&r" (temp),					\
+	  "+" GCC_OFF_SMALL_ASM() (v->counter)				\
+	: "Ir" (i) : __LLSC_CLOBBER);					\
+									\
+	return result;							\
 }
 
-#define ATOMIC_OPS(op, c_op, asm_op)					      \
-	ATOMIC_OP(op, c_op, asm_op)					      \
-	ATOMIC_OP_RETURN(op, c_op, asm_op)				      \
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(pfx, op, type, c_op, asm_op, ll, sc)			\
+	ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc)			\
+	ATOMIC_OP_RETURN(pfx, op, type, c_op, asm_op, ll, sc)		\
+	ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc)
 
-ATOMIC_OPS(add, +=, addu)
-ATOMIC_OPS(sub, -=, subu)
+ATOMIC_OPS(atomic, add, int, +=, addu, ll, sc)
+ATOMIC_OPS(atomic, sub, int, -=, subu, ll, sc)
 
 #define atomic_add_return_relaxed	atomic_add_return_relaxed
 #define atomic_sub_return_relaxed	atomic_sub_return_relaxed
 #define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
 #define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
 
+#ifdef CONFIG_64BIT
+ATOMIC_OPS(atomic64, add, s64, +=, daddu, lld, scd)
+ATOMIC_OPS(atomic64, sub, s64, -=, dsubu, lld, scd)
+# define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+# define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+# define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+# define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+#endif /* CONFIG_64BIT */
+
 #undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)					      \
-	ATOMIC_OP(op, c_op, asm_op)					      \
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
+#define ATOMIC_OPS(pfx, op, type, c_op, asm_op, ll, sc)			\
+	ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc)			\
+	ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc)
 
-ATOMIC_OPS(and, &=, and)
-ATOMIC_OPS(or, |=, or)
-ATOMIC_OPS(xor, ^=, xor)
+ATOMIC_OPS(atomic, and, int, &=, and, ll, sc)
+ATOMIC_OPS(atomic, or, int, |=, or, ll, sc)
+ATOMIC_OPS(atomic, xor, int, ^=, xor, ll, sc)
 
 #define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
 #define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
 #define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
 
+#ifdef CONFIG_64BIT
+ATOMIC_OPS(atomic64, and, s64, &=, and, lld, scd)
+ATOMIC_OPS(atomic64, or, s64, |=, or, lld, scd)
+ATOMIC_OPS(atomic64, xor, s64, ^=, xor, lld, scd)
+# define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+# define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+# define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
+#endif
+
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
@@ -184,258 +203,66 @@ ATOMIC_OPS(xor, ^=, xor)
  * Atomically test @v and subtract @i if @v is greater or equal than @i.
  * The function returns the old value of @v minus @i.
  */
-static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
-{
-	int result;
-
-	smp_mb__before_llsc();
-
-	if (kernel_uses_llsc) {
-		int temp;
-
-		loongson_llsc_mb();
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	"MIPS_ISA_LEVEL"			\n"
-		"1:	ll	%1, %2		# atomic_sub_if_positive\n"
-		"	.set	pop					\n"
-		"	subu	%0, %1, %3				\n"
-		"	move	%1, %0					\n"
-		"	bltz	%0, 2f					\n"
-		"	.set	push					\n"
-		"	.set	"MIPS_ISA_LEVEL"			\n"
-		"	sc	%1, %2					\n"
-		"\t" __scbeqz "	%1, 1b					\n"
-		"2:							\n"
-		"	.set	pop					\n"
-		: "=&r" (result), "=&r" (temp),
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)
-		: "Ir" (i) : __LLSC_CLOBBER);
-	} else {
-		unsigned long flags;
-
-		raw_local_irq_save(flags);
-		result = v->counter;
-		result -= i;
-		if (result >= 0)
-			v->counter = result;
-		raw_local_irq_restore(flags);
-	}
-
-	smp_llsc_mb();
-
-	return result;
+#define ATOMIC_SIP_OP(pfx, type, op, ll, sc)				\
+static __inline__ int pfx##_sub_if_positive(type i, pfx##_t * v)	\
+{									\
+	type temp, result;						\
+									\
+	smp_mb__before_atomic();					\
+									\
+	if (!kernel_uses_llsc) {					\
+		unsigned long flags;					\
+									\
+		raw_local_irq_save(flags);				\
+		result = v->counter;					\
+		result -= i;						\
+		if (result >= 0)					\
+			v->counter = result;				\
+		raw_local_irq_restore(flags);				\
+		smp_mb__after_atomic();					\
+		return result;						\
+	}								\
+									\
+	__asm__ __volatile__(						\
+	"	.set	push					\n"	\
+	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"	" __SYNC(full, loongson3_war) "			\n"	\
+	"1:	" #ll "	%1, %2		# atomic_sub_if_positive\n"	\
+	"	.set	pop					\n"	\
+	"	" #op "	%0, %1, %3				\n"	\
+	"	move	%1, %0					\n"	\
+	"	bltz	%0, 2f					\n"	\
+	"	.set	push					\n"	\
+	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"	" #sc "	%1, %2					\n"	\
+	"	" __SC_BEQZ "%1, 1b				\n"	\
+	"2:	" __SYNC(full, loongson3_war) "			\n"	\
+	"	.set	pop					\n"	\
+	: "=&r" (result), "=&r" (temp),					\
+	  "+" GCC_OFF_SMALL_ASM() (v->counter)				\
+	: "Ir" (i)							\
+	: __LLSC_CLOBBER);						\
+									\
+	/*								\
+	 * In the Loongson3 workaround case we already have a		\
+	 * completion barrier at 2: above, which is needed due to the	\
+	 * bltz that can branch	to code outside of the LL/SC loop. As	\
+	 * such, we don't need to emit another barrier here.		\
+	 */								\
+	if (!__SYNC_loongson3_war)					\
+		smp_mb__after_atomic();					\
+									\
+	return result;							\
 }
 
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
-
-/*
- * atomic_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- */
+ATOMIC_SIP_OP(atomic, int, subu, ll, sc)
 #define atomic_dec_if_positive(v)	atomic_sub_if_positive(1, v)
 
 #ifdef CONFIG_64BIT
-
-#define ATOMIC64_INIT(i)    { (i) }
-
-/*
- * atomic64_read - read atomic variable
- * @v: pointer of type atomic64_t
- *
- */
-#define atomic64_read(v)	READ_ONCE((v)->counter)
-
-/*
- * atomic64_set - set atomic variable
- * @v: pointer of type atomic64_t
- * @i: required value
- */
-#define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
-
-#define ATOMIC64_OP(op, c_op, asm_op)					      \
-static __inline__ void atomic64_##op(s64 i, atomic64_t * v)		      \
-{									      \
-	if (kernel_uses_llsc) {						      \
-		s64 temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	lld	%0, %1		# atomic64_" #op "	\n"   \
-		"	" #asm_op " %0, %2				\n"   \
-		"	scd	%0, %1					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
-		"	.set	pop					\n"   \
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		v->counter c_op i;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
-}
-
-#define ATOMIC64_OP_RETURN(op, c_op, asm_op)				      \
-static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v)   \
-{									      \
-	s64 result;							      \
-									      \
-	if (kernel_uses_llsc) {						      \
-		s64 temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	lld	%1, %2		# atomic64_" #op "_return\n"  \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	scd	%0, %2					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	.set	pop					\n"   \
-		: "=&r" (result), "=&r" (temp),				      \
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		result = v->counter;					      \
-		result c_op i;						      \
-		v->counter = result;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
-									      \
-	return result;							      \
-}
-
-#define ATOMIC64_FETCH_OP(op, c_op, asm_op)				      \
-static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)    \
-{									      \
-	s64 result;							      \
-									      \
-	if (kernel_uses_llsc) {						      \
-		s64 temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	lld	%1, %2		# atomic64_fetch_" #op "\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	scd	%0, %2					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
-		"	move	%0, %1					\n"   \
-		"	.set	pop					\n"   \
-		: "=&r" (result), "=&r" (temp),				      \
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		result = v->counter;					      \
-		v->counter c_op i;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
-									      \
-	return result;							      \
-}
-
-#define ATOMIC64_OPS(op, c_op, asm_op)					      \
-	ATOMIC64_OP(op, c_op, asm_op)					      \
-	ATOMIC64_OP_RETURN(op, c_op, asm_op)				      \
-	ATOMIC64_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC64_OPS(add, +=, daddu)
-ATOMIC64_OPS(sub, -=, dsubu)
-
-#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
-#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
-
-#undef ATOMIC64_OPS
-#define ATOMIC64_OPS(op, c_op, asm_op)					      \
-	ATOMIC64_OP(op, c_op, asm_op)					      \
-	ATOMIC64_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC64_OPS(and, &=, and)
-ATOMIC64_OPS(or, |=, or)
-ATOMIC64_OPS(xor, ^=, xor)
-
-#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
-#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
-
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP_RETURN
-#undef ATOMIC64_OP
-
-/*
- * atomic64_sub_if_positive - conditionally subtract integer from atomic
- *                            variable
- * @i: integer value to subtract
- * @v: pointer of type atomic64_t
- *
- * Atomically test @v and subtract @i if @v is greater or equal than @i.
- * The function returns the old value of @v minus @i.
- */
-static __inline__ s64 atomic64_sub_if_positive(s64 i, atomic64_t * v)
-{
-	s64 result;
-
-	smp_mb__before_llsc();
-
-	if (kernel_uses_llsc) {
-		s64 temp;
-
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	"MIPS_ISA_LEVEL"			\n"
-		"1:	lld	%1, %2		# atomic64_sub_if_positive\n"
-		"	dsubu	%0, %1, %3				\n"
-		"	move	%1, %0					\n"
-		"	bltz	%0, 1f					\n"
-		"	scd	%1, %2					\n"
-		"\t" __scbeqz "	%1, 1b					\n"
-		"1:							\n"
-		"	.set	pop					\n"
-		: "=&r" (result), "=&r" (temp),
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)
-		: "Ir" (i));
-	} else {
-		unsigned long flags;
-
-		raw_local_irq_save(flags);
-		result = v->counter;
-		result -= i;
-		if (result >= 0)
-			v->counter = result;
-		raw_local_irq_restore(flags);
-	}
-
-	smp_llsc_mb();
-
-	return result;
-}
-
-#define atomic64_cmpxchg(v, o, n) \
-	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new)))
-
-/*
- * atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- */
+ATOMIC_SIP_OP(atomic64, s64, dsubu, lld, scd)
 #define atomic64_dec_if_positive(v)	atomic64_sub_if_positive(1, v)
+#endif
 
-#endif /* CONFIG_64BIT */
+#undef ATOMIC_SIP_OP
 
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 9228f7386220..49ff172a72b9 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -9,131 +9,26 @@
 #define __ASM_BARRIER_H
 
 #include <asm/addrspace.h>
+#include <asm/sync.h>
 
-/*
- * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
- * These values are used with the sync instruction to perform memory barriers.
- * Types of ordering guarantees available through the SYNC instruction:
- * - Completion Barriers
- * - Ordering Barriers
- * As compared to the completion barrier, the ordering barrier is a
- * lighter-weight operation as it does not require the specified instructions
- * before the SYNC to be already completed. Instead it only requires that those
- * specified instructions which are subsequent to the SYNC in the instruction
- * stream are never re-ordered for processing ahead of the specified
- * instructions which are before the SYNC in the instruction stream.
- * This potentially reduces how many cycles the barrier instruction must stall
- * before it completes.
- * Implementations that do not use any of the non-zero values of stype to define
- * different barriers, such as ordering barriers, must make those stype values
- * act the same as stype zero.
- */
-
-/*
- * Completion barriers:
- * - Every synchronizable specified memory instruction (loads or stores or both)
- *   that occurs in the instruction stream before the SYNC instruction must be
- *   already globally performed before any synchronizable specified memory
- *   instructions that occur after the SYNC are allowed to be performed, with
- *   respect to any other processor or coherent I/O module.
- *
- * - The barrier does not guarantee the order in which instruction fetches are
- *   performed.
- *
- * - A stype value of zero will always be defined such that it performs the most
- *   complete set of synchronization operations that are defined.This means
- *   stype zero always does a completion barrier that affects both loads and
- *   stores preceding the SYNC instruction and both loads and stores that are
- *   subsequent to the SYNC instruction. Non-zero values of stype may be defined
- *   by the architecture or specific implementations to perform synchronization
- *   behaviors that are less complete than that of stype zero. If an
- *   implementation does not use one of these non-zero values to define a
- *   different synchronization behavior, then that non-zero value of stype must
- *   act the same as stype zero completion barrier. This allows software written
- *   for an implementation with a lighter-weight barrier to work on another
- *   implementation which only implements the stype zero completion barrier.
- *
- * - A completion barrier is required, potentially in conjunction with SSNOP (in
- *   Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
- *   to guarantee that memory reference results are visible across operating
- *   mode changes. For example, a completion barrier is required on some
- *   implementations on entry to and exit from Debug Mode to guarantee that
- *   memory effects are handled correctly.
- */
-
-/*
- * stype 0 - A completion barrier that affects preceding loads and stores and
- * subsequent loads and stores.
- * Older instructions which must reach the load/store ordering point before the
- * SYNC instruction completes: Loads, Stores
- * Younger instructions which must reach the load/store ordering point only
- * after the SYNC instruction completes: Loads, Stores
- * Older instructions which must be globally performed when the SYNC instruction
- * completes: Loads, Stores
- */
-#define STYPE_SYNC 0x0
-
-/*
- * Ordering barriers:
- * - Every synchronizable specified memory instruction (loads or stores or both)
- *   that occurs in the instruction stream before the SYNC instruction must
- *   reach a stage in the load/store datapath after which no instruction
- *   re-ordering is possible before any synchronizable specified memory
- *   instruction which occurs after the SYNC instruction in the instruction
- *   stream reaches the same stage in the load/store datapath.
- *
- * - If any memory instruction before the SYNC instruction in program order,
- *   generates a memory request to the external memory and any memory
- *   instruction after the SYNC instruction in program order also generates a
- *   memory request to external memory, the memory request belonging to the
- *   older instruction must be globally performed before the time the memory
- *   request belonging to the younger instruction is globally performed.
- *
- * - The barrier does not guarantee the order in which instruction fetches are
- *   performed.
- */
+static inline void __sync(void)
+{
+	asm volatile(__SYNC(full, always) ::: "memory");
+}
 
-/*
- * stype 0x10 - An ordering barrier that affects preceding loads and stores and
- * subsequent loads and stores.
- * Older instructions which must reach the load/store ordering point before the
- * SYNC instruction completes: Loads, Stores
- * Younger instructions which must reach the load/store ordering point only
- * after the SYNC instruction completes: Loads, Stores
- * Older instructions which must be globally performed when the SYNC instruction
- * completes: N/A
- */
-#define STYPE_SYNC_MB 0x10
+static inline void rmb(void)
+{
+	asm volatile(__SYNC(rmb, always) ::: "memory");
+}
+#define rmb rmb
 
-/*
- * stype 0x14 - A completion barrier specific to global invalidations
- *
- * When a sync instruction of this type completes any preceding GINVI or GINVT
- * operation has been globalized & completed on all coherent CPUs. Anything
- * that the GINV* instruction should invalidate will have been invalidated on
- * all coherent CPUs when this instruction completes. It is implementation
- * specific whether the GINV* instructions themselves will ensure completion,
- * or this sync type will.
- *
- * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
- * this sync type also requires that previous SYNCI operations have completed.
- */
-#define STYPE_GINV	0x14
+static inline void wmb(void)
+{
+	asm volatile(__SYNC(wmb, always) ::: "memory");
+}
+#define wmb wmb
 
-#ifdef CONFIG_CPU_HAS_SYNC
-#define __sync()				\
-	__asm__ __volatile__(			\
-		".set	push\n\t"		\
-		".set	noreorder\n\t"		\
-		".set	mips2\n\t"		\
-		"sync\n\t"			\
-		".set	pop"			\
-		: /* no output */		\
-		: /* no input */		\
-		: "memory")
-#else
-#define __sync()	do { } while(0)
-#endif
+#define fast_mb()	__sync()
 
 #define __fast_iob()				\
 	__asm__ __volatile__(			\
@@ -146,17 +41,8 @@
 		: "m" (*(int *)CKSEG1)		\
 		: "memory")
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
-# define OCTEON_SYNCW_STR	".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
-# define __syncw()	__asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
-
-# define fast_wmb()	__syncw()
-# define fast_rmb()	barrier()
-# define fast_mb()	__sync()
 # define fast_iob()	do { } while (0)
 #else /* ! CONFIG_CPU_CAVIUM_OCTEON */
-# define fast_wmb()	__sync()
-# define fast_rmb()	__sync()
-# define fast_mb()	__sync()
 # ifdef CONFIG_SGI_IP28
 #  define fast_iob()				\
 	__asm__ __volatile__(			\
@@ -192,23 +78,14 @@
 
 #endif /* !CONFIG_CPU_HAS_WB */
 
-#define wmb()		fast_wmb()
-#define rmb()		fast_rmb()
-
 #if defined(CONFIG_WEAK_ORDERING)
-# ifdef CONFIG_CPU_CAVIUM_OCTEON
-#  define __smp_mb()	__sync()
-#  define __smp_rmb()	barrier()
-#  define __smp_wmb()	__syncw()
-# else
-#  define __smp_mb()	__asm__ __volatile__("sync" : : :"memory")
-#  define __smp_rmb()	__asm__ __volatile__("sync" : : :"memory")
-#  define __smp_wmb()	__asm__ __volatile__("sync" : : :"memory")
-# endif
+# define __smp_mb()	__sync()
+# define __smp_rmb()	rmb()
+# define __smp_wmb()	wmb()
 #else
-#define __smp_mb()	barrier()
-#define __smp_rmb()	barrier()
-#define __smp_wmb()	barrier()
+# define __smp_mb()	barrier()
+# define __smp_rmb()	barrier()
+# define __smp_wmb()	barrier()
 #endif
 
 /*
@@ -218,13 +95,14 @@
  * ordering will be done by smp_llsc_mb() and friends.
  */
 #if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
-#define __WEAK_LLSC_MB		"	sync	\n"
-#define smp_llsc_mb()		__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
-#define __LLSC_CLOBBER
+# define __WEAK_LLSC_MB		sync
+# define smp_llsc_mb() \
+	__asm__ __volatile__(__stringify(__WEAK_LLSC_MB) : : :"memory")
+# define __LLSC_CLOBBER
 #else
-#define __WEAK_LLSC_MB		"		\n"
-#define smp_llsc_mb()		do { } while (0)
-#define __LLSC_CLOBBER		"memory"
+# define __WEAK_LLSC_MB
+# define smp_llsc_mb()		do { } while (0)
+# define __LLSC_CLOBBER		"memory"
 #endif
 
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
@@ -241,52 +119,22 @@
 #define nudge_writes() mb()
 #endif
 
-#define __smp_mb__before_atomic()	__smp_mb__before_llsc()
-#define __smp_mb__after_atomic()	smp_llsc_mb()
-
 /*
- * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
- * store or prefetch) in between an LL & SC can cause the SC instruction to
- * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
- * containing such sequences, this bug bites harder than we might otherwise
- * expect due to reordering & speculation:
- *
- * 1) A memory access appearing prior to the LL in program order may actually
- *    be executed after the LL - this is the reordering case.
- *
- *    In order to avoid this we need to place a memory barrier (ie. a SYNC
- *    instruction) prior to every LL instruction, in between it and any earlier
- *    memory access instructions.
- *
- *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
- *
- * 2) If a conditional branch exists between an LL & SC with a target outside
- *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
- *    or similar, then misprediction of the branch may allow speculative
- *    execution of memory accesses from outside of the LL-SC loop.
- *
- *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
- *    at each affected branch target, for which we also use loongson_llsc_mb()
- *    defined below.
- *
- *    This case affects all current Loongson 3 CPUs.
- *
- * The above described cases cause an error in the cache coherence protocol;
- * such that the Invalidate of a competing LL-SC goes 'missing' and SC
- * erroneously observes its core still has Exclusive state and lets the SC
- * proceed.
- *
- * Therefore the error only occurs on SMP systems.
+ * In the Loongson3 LL/SC workaround case, all of our LL/SC loops already have
+ * a completion barrier immediately preceding the LL instruction. Therefore we
+ * can skip emitting a barrier from __smp_mb__before_atomic().
  */
-#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
-#define loongson_llsc_mb()	__asm__ __volatile__("sync" : : :"memory")
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
+# define __smp_mb__before_atomic()
 #else
-#define loongson_llsc_mb()	do { } while (0)
+# define __smp_mb__before_atomic()	__smp_mb__before_llsc()
 #endif
 
+#define __smp_mb__after_atomic()	smp_llsc_mb()
+
 static inline void sync_ginv(void)
 {
-	asm volatile("sync\t%0" :: "i"(STYPE_GINV));
+	asm volatile(__SYNC(ginv, always));
 }
 
 #include <asm-generic/barrier.h>
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 985d6a02f9ea..a74769940fbd 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -13,16 +13,55 @@
 #error only <linux/bitops.h> can be included directly
 #endif
 
+#include <linux/bits.h>
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <asm/barrier.h>
 #include <asm/byteorder.h>		/* sigh ... */
 #include <asm/compiler.h>
 #include <asm/cpu-features.h>
+#include <asm/isa-rev.h>
 #include <asm/llsc.h>
 #include <asm/sgidefs.h>
 #include <asm/war.h>
 
+#define __bit_op(mem, insn, inputs...) do {			\
+	unsigned long temp;					\
+								\
+	asm volatile(						\
+	"	.set		push			\n"	\
+	"	.set		" MIPS_ISA_LEVEL "	\n"	\
+	"	" __SYNC(full, loongson3_war) "		\n"	\
+	"1:	" __LL		"%0, %1			\n"	\
+	"	" insn		"			\n"	\
+	"	" __SC		"%0, %1			\n"	\
+	"	" __SC_BEQZ	"%0, 1b			\n"	\
+	"	.set		pop			\n"	\
+	: "=&r"(temp), "+" GCC_OFF_SMALL_ASM()(mem)		\
+	: inputs						\
+	: __LLSC_CLOBBER);					\
+} while (0)
+
+#define __test_bit_op(mem, ll_dst, insn, inputs...) ({		\
+	unsigned long orig, temp;				\
+								\
+	asm volatile(						\
+	"	.set		push			\n"	\
+	"	.set		" MIPS_ISA_LEVEL "	\n"	\
+	"	" __SYNC(full, loongson3_war) "		\n"	\
+	"1:	" __LL		ll_dst ", %2		\n"	\
+	"	" insn		"			\n"	\
+	"	" __SC		"%1, %2			\n"	\
+	"	" __SC_BEQZ	"%1, 1b			\n"	\
+	"	.set		pop			\n"	\
+	: "=&r"(orig), "=&r"(temp),				\
+	  "+" GCC_OFF_SMALL_ASM()(mem)				\
+	: inputs						\
+	: __LLSC_CLOBBER);					\
+								\
+	orig;							\
+})
+
 /*
  * These are the "slower" versions of the functions and are in bitops.c.
  * These functions call raw_local_irq_{save,restore}().
@@ -30,8 +69,6 @@
 void __mips_set_bit(unsigned long nr, volatile unsigned long *addr);
 void __mips_clear_bit(unsigned long nr, volatile unsigned long *addr);
 void __mips_change_bit(unsigned long nr, volatile unsigned long *addr);
-int __mips_test_and_set_bit(unsigned long nr,
-			    volatile unsigned long *addr);
 int __mips_test_and_set_bit_lock(unsigned long nr,
 				 volatile unsigned long *addr);
 int __mips_test_and_clear_bit(unsigned long nr,
@@ -52,51 +89,20 @@ int __mips_test_and_change_bit(unsigned long nr,
  */
 static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-	int bit = nr & SZLONG_MASK;
-	unsigned long temp;
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
 
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL "%0, %1			# set_bit	\n"
-		"	or	%0, %2					\n"
-		"	" __SC	"%0, %1					\n"
-		"	beqzl	%0, 1b					\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)
-		: __LLSC_CLOBBER);
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	" __LL "%0, %1		# set_bit	\n"
-			"	" __INS "%0, %3, %2, 1			\n"
-			"	" __SC "%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (bit), "r" (~0)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
-#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
-	} else if (kernel_uses_llsc) {
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1		# set_bit	\n"
-			"	or	%0, %2				\n"
-			"	" __SC	"%0, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (1UL << bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
-	} else
+	if (!kernel_uses_llsc) {
 		__mips_set_bit(nr, addr);
+		return;
+	}
+
+	if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit) && (bit >= 16)) {
+		__bit_op(*m, __INS "%0, %3, %2, 1", "i"(bit), "r"(~0));
+		return;
+	}
+
+	__bit_op(*m, "or\t%0, %2", "ir"(BIT(bit)));
 }
 
 /*
@@ -111,51 +117,20 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-	int bit = nr & SZLONG_MASK;
-	unsigned long temp;
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
 
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL "%0, %1			# clear_bit	\n"
-		"	and	%0, %2					\n"
-		"	" __SC "%0, %1					\n"
-		"	beqzl	%0, 1b					\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (~(1UL << bit))
-		: __LLSC_CLOBBER);
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	" __LL "%0, %1		# clear_bit	\n"
-			"	" __INS "%0, $0, %2, 1			\n"
-			"	" __SC "%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
-#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
-	} else if (kernel_uses_llsc) {
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1		# clear_bit	\n"
-			"	and	%0, %2				\n"
-			"	" __SC "%0, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (~(1UL << bit))
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
-	} else
+	if (!kernel_uses_llsc) {
 		__mips_clear_bit(nr, addr);
+		return;
+	}
+
+	if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit)) {
+		__bit_op(*m, __INS "%0, $0, %2, 1", "i"(bit));
+		return;
+	}
+
+	__bit_op(*m, "and\t%0, %2", "ir"(~BIT(bit)));
 }
 
 /*
@@ -183,159 +158,61 @@ static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *ad
  */
 static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	int bit = nr & SZLONG_MASK;
-
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
-		__asm__ __volatile__(
-		"	.set	push				\n"
-		"	.set	arch=r4000			\n"
-		"1:	" __LL "%0, %1		# change_bit	\n"
-		"	xor	%0, %2				\n"
-		"	" __SC	"%0, %1				\n"
-		"	beqzl	%0, 1b				\n"
-		"	.set	pop				\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (1UL << bit)
-		: __LLSC_CLOBBER);
-	} else if (kernel_uses_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1		# change_bit	\n"
-			"	xor	%0, %2				\n"
-			"	" __SC	"%0, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (1UL << bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
-	} else
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
+
+	if (!kernel_uses_llsc) {
 		__mips_change_bit(nr, addr);
+		return;
+	}
+
+	__bit_op(*m, "xor\t%0, %2", "ir"(BIT(bit)));
 }
 
 /*
- * test_and_set_bit - Set a bit and return its old value
+ * test_and_set_bit_lock - Set a bit and return its old value
  * @nr: Bit to set
  * @addr: Address to count from
  *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
+ * This operation is atomic and implies acquire ordering semantics
+ * after the memory operation.
  */
-static inline int test_and_set_bit(unsigned long nr,
+static inline int test_and_set_bit_lock(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	int bit = nr & SZLONG_MASK;
-	unsigned long res;
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
+	unsigned long res, orig;
 
-	smp_mb__before_llsc();
-
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqzl	%2, 1b					\n"
-		"	and	%2, %0, %3				\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-		: "r" (1UL << bit)
-		: __LLSC_CLOBBER);
-	} else if (kernel_uses_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1	# test_and_set_bit	\n"
-			"	or	%2, %0, %3			\n"
-			"	" __SC	"%2, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "r" (1UL << bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!res));
-
-		res = temp & (1UL << bit);
-	} else
-		res = __mips_test_and_set_bit(nr, addr);
+	if (!kernel_uses_llsc) {
+		res = __mips_test_and_set_bit_lock(nr, addr);
+	} else {
+		orig = __test_bit_op(*m, "%0",
+				     "or\t%1, %0, %3",
+				     "ir"(BIT(bit)));
+		res = (orig & BIT(bit)) != 0;
+	}
 
 	smp_llsc_mb();
 
-	return res != 0;
+	return res;
 }
 
 /*
- * test_and_set_bit_lock - Set a bit and return its old value
+ * test_and_set_bit - Set a bit and return its old value
  * @nr: Bit to set
  * @addr: Address to count from
  *
- * This operation is atomic and implies acquire ordering semantics
- * after the memory operation.
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
  */
-static inline int test_and_set_bit_lock(unsigned long nr,
+static inline int test_and_set_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	int bit = nr & SZLONG_MASK;
-	unsigned long res;
-
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqzl	%2, 1b					\n"
-		"	and	%2, %0, %3				\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "+m" (*m), "=&r" (res)
-		: "r" (1UL << bit)
-		: __LLSC_CLOBBER);
-	} else if (kernel_uses_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1	# test_and_set_bit	\n"
-			"	or	%2, %0, %3			\n"
-			"	" __SC	"%2, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "r" (1UL << bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!res));
-
-		res = temp & (1UL << bit);
-	} else
-		res = __mips_test_and_set_bit_lock(nr, addr);
-
-	smp_llsc_mb();
-
-	return res != 0;
+	smp_mb__before_atomic();
+	return test_and_set_bit_lock(nr, addr);
 }
+
 /*
  * test_and_clear_bit - Clear a bit and return its old value
  * @nr: Bit to clear
@@ -347,71 +224,30 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 static inline int test_and_clear_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	int bit = nr & SZLONG_MASK;
-	unsigned long res;
-
-	smp_mb__before_llsc();
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
+	unsigned long res, orig;
 
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
+	smp_mb__before_atomic();
 
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL	"%0, %1		# test_and_clear_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	xor	%2, %3					\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqzl	%2, 1b					\n"
-		"	and	%2, %0, %3				\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-		: "r" (1UL << bit)
-		: __LLSC_CLOBBER);
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-	} else if (kernel_uses_llsc && __builtin_constant_p(nr)) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	" __LL	"%0, %1 # test_and_clear_bit	\n"
-			"	" __EXT "%2, %0, %3, 1			\n"
-			"	" __INS "%0, $0, %3, 1			\n"
-			"	" __SC	"%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "ir" (bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
-#endif
-	} else if (kernel_uses_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL	"%0, %1 # test_and_clear_bit	\n"
-			"	or	%2, %0, %3			\n"
-			"	xor	%2, %3				\n"
-			"	" __SC	"%2, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "r" (1UL << bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!res));
-
-		res = temp & (1UL << bit);
-	} else
+	if (!kernel_uses_llsc) {
 		res = __mips_test_and_clear_bit(nr, addr);
+	} else if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(nr)) {
+		res = __test_bit_op(*m, "%1",
+				    __EXT "%0, %1, %3, 1;"
+				    __INS "%1, $0, %3, 1",
+				    "i"(bit));
+	} else {
+		orig = __test_bit_op(*m, "%0",
+				     "or\t%1, %0, %3;"
+				     "xor\t%1, %1, %3",
+				     "ir"(BIT(bit)));
+		res = (orig & BIT(bit)) != 0;
+	}
 
 	smp_llsc_mb();
 
-	return res != 0;
+	return res;
 }
 
 /*
@@ -425,54 +261,29 @@ static inline int test_and_clear_bit(unsigned long nr,
 static inline int test_and_change_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	int bit = nr & SZLONG_MASK;
-	unsigned long res;
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
+	unsigned long res, orig;
 
-	smp_mb__before_llsc();
-
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
+	smp_mb__before_atomic();
 
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL	"%0, %1		# test_and_change_bit	\n"
-		"	xor	%2, %0, %3				\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqzl	%2, 1b					\n"
-		"	and	%2, %0, %3				\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-		: "r" (1UL << bit)
-		: __LLSC_CLOBBER);
-	} else if (kernel_uses_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL	"%0, %1 # test_and_change_bit	\n"
-			"	xor	%2, %0, %3			\n"
-			"	" __SC	"\t%2, %1			\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "r" (1UL << bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!res));
-
-		res = temp & (1UL << bit);
-	} else
+	if (!kernel_uses_llsc) {
 		res = __mips_test_and_change_bit(nr, addr);
+	} else {
+		orig = __test_bit_op(*m, "%0",
+				     "xor\t%1, %0, %3",
+				     "ir"(BIT(bit)));
+		res = (orig & BIT(bit)) != 0;
+	}
 
 	smp_llsc_mb();
 
-	return res != 0;
+	return res;
 }
 
+#undef __bit_op
+#undef __test_bit_op
+
 #include <asm-generic/bitops/non-atomic.h>
 
 /*
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index 34d62229dea5..d41a5057bc69 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -61,7 +61,7 @@
 /*
  * Valid machtype for Loongson family
  */
-enum loongson_machine_type {
+enum loongson2ef_machine_type {
 	MACH_LOONGSON_UNKNOWN,
 	MACH_LEMOTE_FL2E,
 	MACH_LEMOTE_FL2F,
@@ -70,7 +70,6 @@ enum loongson_machine_type {
 	MACH_DEXXON_GDIUM2F10,
 	MACH_LEMOTE_NAS,
 	MACH_LEMOTE_LL2F,
-	MACH_LOONGSON_GENERIC,
 	MACH_LOONGSON_END
 };
 
@@ -99,6 +98,7 @@ extern void detect_memory_region(phys_addr_t start, phys_addr_t sz_min,  phys_ad
 
 extern void prom_init(void);
 extern void prom_free_prom_memory(void);
+extern void prom_cleanup(void);
 
 extern void free_init_pages(const char *what,
 			    unsigned long begin, unsigned long end);
diff --git a/arch/mips/include/asm/bugs.h b/arch/mips/include/asm/bugs.h
index d8ab8b7129b5..d72dc6e1cf3c 100644
--- a/arch/mips/include/asm/bugs.h
+++ b/arch/mips/include/asm/bugs.h
@@ -26,9 +26,8 @@ extern void check_bugs64(void);
 
 static inline void check_bugs_early(void)
 {
-#ifdef CONFIG_64BIT
-	check_bugs64_early();
-#endif
+	if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+		check_bugs64_early();
 }
 
 static inline void check_bugs(void)
@@ -37,19 +36,18 @@ static inline void check_bugs(void)
 
 	cpu_data[cpu].udelay_val = loops_per_jiffy;
 	check_bugs32();
-#ifdef CONFIG_64BIT
-	check_bugs64();
-#endif
+
+	if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+		check_bugs64();
 }
 
 static inline int r4k_daddiu_bug(void)
 {
-#ifdef CONFIG_64BIT
+	if (!IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+		return 0;
+
 	WARN_ON(daddiu_bug < 0);
 	return daddiu_bug != 0;
-#else
-	return 0;
-#endif
 }
 
 #endif /* _ASM_BUGS_H */
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index f6136871561d..5b0b3a6777ea 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -11,20 +11,11 @@
 #include <linux/bug.h>
 #include <linux/irqflags.h>
 #include <asm/compiler.h>
+#include <asm/llsc.h>
+#include <asm/sync.h>
 #include <asm/war.h>
 
 /*
- * Using a branch-likely instruction to check the result of an sc instruction
- * works around a bug present in R10000 CPUs prior to revision 3.0 that could
- * cause ll-sc sequences to execute non-atomically.
- */
-#if R10000_LLSC_WAR
-# define __scbeqz "beqzl"
-#else
-# define __scbeqz "beqz"
-#endif
-
-/*
  * These functions doesn't exist, so if they are called you'll either:
  *
  * - Get an error at compile-time due to __compiletime_error, if supported by
@@ -46,18 +37,18 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
 	__typeof(*(m)) __ret;						\
 									\
 	if (kernel_uses_llsc) {						\
-		loongson_llsc_mb();					\
 		__asm__ __volatile__(					\
 		"	.set	push				\n"	\
 		"	.set	noat				\n"	\
 		"	.set	push				\n"	\
 		"	.set	" MIPS_ISA_ARCH_LEVEL "		\n"	\
+		"	" __SYNC(full, loongson3_war) "		\n"	\
 		"1:	" ld "	%0, %2		# __xchg_asm	\n"	\
 		"	.set	pop				\n"	\
 		"	move	$1, %z3				\n"	\
 		"	.set	" MIPS_ISA_ARCH_LEVEL "		\n"	\
 		"	" st "	$1, %1				\n"	\
-		"\t" __scbeqz "	$1, 1b				\n"	\
+		"\t" __SC_BEQZ	"$1, 1b				\n"	\
 		"	.set	pop				\n"	\
 		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
 		: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)			\
@@ -103,7 +94,13 @@ unsigned long __xchg(volatile void *ptr, unsigned long x, int size)
 ({									\
 	__typeof__(*(ptr)) __res;					\
 									\
-	smp_mb__before_llsc();						\
+	/*								\
+	 * In the Loongson3 workaround case __xchg_asm() already	\
+	 * contains a completion barrier prior to the LL, so we don't	\
+	 * need to emit an extra one here.				\
+	 */								\
+	if (!__SYNC_loongson3_war)					\
+		smp_mb__before_llsc();					\
 									\
 	__res = (__typeof__(*(ptr)))					\
 		__xchg((ptr), (unsigned long)(x), sizeof(*(ptr)));	\
@@ -118,25 +115,24 @@ unsigned long __xchg(volatile void *ptr, unsigned long x, int size)
 	__typeof(*(m)) __ret;						\
 									\
 	if (kernel_uses_llsc) {						\
-		loongson_llsc_mb();					\
 		__asm__ __volatile__(					\
 		"	.set	push				\n"	\
 		"	.set	noat				\n"	\
 		"	.set	push				\n"	\
 		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
+		"	" __SYNC(full, loongson3_war) "		\n"	\
 		"1:	" ld "	%0, %2		# __cmpxchg_asm \n"	\
 		"	bne	%0, %z3, 2f			\n"	\
 		"	.set	pop				\n"	\
 		"	move	$1, %z4				\n"	\
 		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
 		"	" st "	$1, %1				\n"	\
-		"\t" __scbeqz "	$1, 1b				\n"	\
+		"\t" __SC_BEQZ	"$1, 1b				\n"	\
 		"	.set	pop				\n"	\
-		"2:						\n"	\
+		"2:	" __SYNC(full, loongson3_war) "		\n"	\
 		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
 		: GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new)	\
 		: __LLSC_CLOBBER);					\
-		loongson_llsc_mb();					\
 	} else {							\
 		unsigned long __flags;					\
 									\
@@ -190,9 +186,23 @@ unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 ({									\
 	__typeof__(*(ptr)) __res;					\
 									\
-	smp_mb__before_llsc();						\
+	/*								\
+	 * In the Loongson3 workaround case __cmpxchg_asm() already	\
+	 * contains a completion barrier prior to the LL, so we don't	\
+	 * need to emit an extra one here.				\
+	 */								\
+	if (!__SYNC_loongson3_war)					\
+		smp_mb__before_llsc();					\
+									\
 	__res = cmpxchg_local((ptr), (old), (new));			\
-	smp_llsc_mb();							\
+									\
+	/*								\
+	 * In the Loongson3 workaround case __cmpxchg_asm() already	\
+	 * contains a completion barrier after the SC, so we don't	\
+	 * need to emit an extra one here.				\
+	 */								\
+	if (!__SYNC_loongson3_war)					\
+		smp_llsc_mb();						\
 									\
 	__res;								\
 })
@@ -233,11 +243,11 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
 	 */
 	local_irq_save(flags);
 
-	loongson_llsc_mb();
 	asm volatile(
 	"	.set	push				\n"
 	"	.set	" MIPS_ISA_ARCH_LEVEL "		\n"
 	/* Load 64 bits from ptr */
+	"	" __SYNC(full, loongson3_war) "		\n"
 	"1:	lld	%L0, %3		# __cmpxchg64	\n"
 	/*
 	 * Split the 64 bit value we loaded into the 2 registers that hold the
@@ -269,9 +279,9 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
 	/* Attempt to store new at ptr */
 	"	scd	%L1, %2				\n"
 	/* If we failed, loop! */
-	"\t" __scbeqz "	%L1, 1b				\n"
+	"\t" __SC_BEQZ "%L1, 1b				\n"
 	"	.set	pop				\n"
-	"2:						\n"
+	"2:	" __SYNC(full, loongson3_war) "		\n"
 	: "=&r"(ret),
 	  "=&r"(tmp),
 	  "=" GCC_OFF_SMALL_ASM() (*(unsigned long long *)ptr)
@@ -279,7 +289,6 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
 	  "r" (old),
 	  "r" (new)
 	: "memory");
-	loongson_llsc_mb();
 
 	local_irq_restore(flags);
 	return ret;
@@ -312,6 +321,4 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
 # endif /* !CONFIG_SMP */
 #endif /* !CONFIG_64BIT */
 
-#undef __scbeqz
-
 #endif /* __ASM_CMPXCHG_H */
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index c99166eadbde..255afcdd79c9 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -100,24 +100,6 @@ typedef u32		compat_sigset_word;
 
 #define COMPAT_OFF_T_MAX	0x7fffffff
 
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
-	/* cast to a __user pointer via "unsigned long" makes sparse happy */
-	return (void __user *)(unsigned long)(long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
-	return (u32)(unsigned long)uptr;
-}
-
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = (struct pt_regs *)
diff --git a/arch/mips/include/asm/cop2.h b/arch/mips/include/asm/cop2.h
index 63b3468ede4c..6b7396a6a115 100644
--- a/arch/mips/include/asm/cop2.h
+++ b/arch/mips/include/asm/cop2.h
@@ -33,7 +33,7 @@ extern void nlm_cop2_restore(struct nlm_cop2_state *);
 #define cop2_present		1
 #define cop2_lazy_restore	0
 
-#elif defined(CONFIG_CPU_LOONGSON3)
+#elif defined(CONFIG_CPU_LOONGSON64)
 
 #define cop2_present		1
 #define cop2_lazy_restore	1
diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index 7bbb66760a07..49f0061a6051 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -17,16 +17,16 @@ static inline int __pure __get_cpu_type(const int cpu_type)
 	switch (cpu_type) {
 #if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \
     defined(CONFIG_SYS_HAS_CPU_LOONGSON2F)
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 #endif
 
-#ifdef CONFIG_SYS_HAS_CPU_LOONGSON3
-	case CPU_LOONGSON3:
+#ifdef CONFIG_SYS_HAS_CPU_LOONGSON64
+	case CPU_LOONGSON64:
 #endif
 
 #if defined(CONFIG_SYS_HAS_CPU_LOONGSON1B) || \
     defined(CONFIG_SYS_HAS_CPU_LOONGSON1C)
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 #endif
 
 #ifdef CONFIG_SYS_HAS_CPU_MIPS32_R1
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 7fddcb8350c6..ea830783d663 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -91,7 +91,9 @@
 #define PRID_IMP_LOONGSON_32	0x4200  /* Loongson-1 */
 #define PRID_IMP_R5432		0x5400
 #define PRID_IMP_R5500		0x5500
-#define PRID_IMP_LOONGSON_64	0x6300  /* Loongson-2/3 */
+#define PRID_IMP_LOONGSON_64R	0x6100  /* Reduced Loongson-2 */
+#define PRID_IMP_LOONGSON_64C	0x6300  /* Classic Loongson-2 and Loongson-3 */
+#define PRID_IMP_LOONGSON_64G	0xc000  /* Generic Loongson-2 and Loongson-3 */
 
 #define PRID_IMP_UNKNOWN	0xff00
 
@@ -310,15 +312,15 @@ enum cpu_type_enum {
 	 */
 	CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K,
 	CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350,
-	CPU_BMIPS4380, CPU_BMIPS5000, CPU_XBURST, CPU_LOONGSON1, CPU_M14KC,
+	CPU_BMIPS4380, CPU_BMIPS5000, CPU_XBURST, CPU_LOONGSON32, CPU_M14KC,
 	CPU_M14KEC, CPU_INTERAPTIV, CPU_P5600, CPU_PROAPTIV, CPU_1074K,
 	CPU_M5150, CPU_I6400, CPU_P6600, CPU_M6250,
 
 	/*
 	 * MIPS64 class processors
 	 */
-	CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
-	CPU_LOONGSON3, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS,
+	CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2EF,
+	CPU_LOONGSON64, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS,
 	CPU_CAVIUM_OCTEON2, CPU_CAVIUM_OCTEON3, CPU_XLR, CPU_XLP, CPU_I6500,
 
 	CPU_QEMU_GENERIC,
diff --git a/arch/mips/include/asm/dma-direct.h b/arch/mips/include/asm/dma-direct.h
index b5c240806e1b..14e352651ce9 100644
--- a/arch/mips/include/asm/dma-direct.h
+++ b/arch/mips/include/asm/dma-direct.h
@@ -2,14 +2,6 @@
 #ifndef _MIPS_DMA_DIRECT_H
 #define _MIPS_DMA_DIRECT_H 1
 
-static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
-	if (!dev->dma_mask)
-		return false;
-
-	return addr + size - 1 <= *dev->dma_mask;
-}
-
 dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
 phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
 
diff --git a/arch/mips/include/asm/fixmap.h b/arch/mips/include/asm/fixmap.h
index 6842ffafd1e7..1784d4348c36 100644
--- a/arch/mips/include/asm/fixmap.h
+++ b/arch/mips/include/asm/fixmap.h
@@ -70,7 +70,7 @@ enum fixed_addresses {
 #include <asm-generic/fixmap.h>
 
 #define kmap_get_fixmap_pte(vaddr)					\
-	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
+	pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr)), (vaddr))
 
 /*
  * Called from pgtable_init()
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index b83b0397462d..110220705e97 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -16,6 +16,7 @@
 #include <asm/barrier.h>
 #include <asm/compiler.h>
 #include <asm/errno.h>
+#include <asm/sync.h>
 #include <asm/war.h>
 
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)		\
@@ -32,7 +33,7 @@
 		"	.set	arch=r4000			\n"	\
 		"2:	sc	$1, %2				\n"	\
 		"	beqzl	$1, 1b				\n"	\
-		__WEAK_LLSC_MB						\
+		__stringify(__WEAK_LLSC_MB) "			\n"	\
 		"3:						\n"	\
 		"	.insn					\n"	\
 		"	.set	pop				\n"	\
@@ -50,19 +51,19 @@
 		  "i" (-EFAULT)						\
 		: "memory");						\
 	} else if (cpu_has_llsc) {					\
-		loongson_llsc_mb();					\
 		__asm__ __volatile__(					\
 		"	.set	push				\n"	\
 		"	.set	noat				\n"	\
 		"	.set	push				\n"	\
 		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
+		"	" __SYNC(full, loongson3_war) "		\n"	\
 		"1:	"user_ll("%1", "%4")" # __futex_atomic_op\n"	\
 		"	.set	pop				\n"	\
 		"	" insn	"				\n"	\
 		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
 		"2:	"user_sc("$1", "%2")"			\n"	\
 		"	beqz	$1, 1b				\n"	\
-		__WEAK_LLSC_MB						\
+		__stringify(__WEAK_LLSC_MB) "			\n"	\
 		"3:						\n"	\
 		"	.insn					\n"	\
 		"	.set	pop				\n"	\
@@ -147,7 +148,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		"	.set	arch=r4000				\n"
 		"2:	sc	$1, %2					\n"
 		"	beqzl	$1, 1b					\n"
-		__WEAK_LLSC_MB
+		__stringify(__WEAK_LLSC_MB) "				\n"
 		"3:							\n"
 		"	.insn						\n"
 		"	.set	pop					\n"
@@ -164,13 +165,13 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		  "i" (-EFAULT)
 		: "memory");
 	} else if (cpu_has_llsc) {
-		loongson_llsc_mb();
 		__asm__ __volatile__(
 		"# futex_atomic_cmpxchg_inatomic			\n"
 		"	.set	push					\n"
 		"	.set	noat					\n"
 		"	.set	push					\n"
 		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
+		"	" __SYNC(full, loongson3_war) "			\n"
 		"1:	"user_ll("%1", "%3")"				\n"
 		"	bne	%1, %z4, 3f				\n"
 		"	.set	pop					\n"
@@ -178,8 +179,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
 		"2:	"user_sc("$1", "%2")"				\n"
 		"	beqz	$1, 1b					\n"
-		__WEAK_LLSC_MB
-		"3:							\n"
+		"3:	" __SYNC_ELSE(full, loongson3_war, __WEAK_LLSC_MB) "\n"
 		"	.insn						\n"
 		"	.set	pop					\n"
 		"	.section .fixup,\"ax\"				\n"
@@ -194,7 +194,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
 		  "i" (-EFAULT)
 		: "memory");
-		loongson_llsc_mb();
 	} else
 		return -ENOSYS;
 
diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h
index 0fa27446869a..a4f48b0f5541 100644
--- a/arch/mips/include/asm/hazards.h
+++ b/arch/mips/include/asm/hazards.h
@@ -158,7 +158,7 @@ do {									\
 } while (0)
 
 #elif defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \
-	defined(CONFIG_CPU_LOONGSON2) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \
+	defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \
 	defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR)
 
 /*
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 2b7b56736372..cf1f2a4a2418 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -227,29 +227,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset,
  */
 #define ioremap(offset, size)						\
 	__ioremap_mode((offset), (size), _CACHE_UNCACHED)
-
-/*
- * ioremap_nocache     -   map bus memory into CPU space
- * @offset:    bus address of the memory
- * @size:      size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- */
-#define ioremap_nocache(offset, size)					\
-	__ioremap_mode((offset), (size), _CACHE_UNCACHED)
-#define ioremap_uc ioremap_nocache
+#define ioremap_uc		ioremap
 
 /*
  * ioremap_cache -	map bus memory into CPU space
@@ -306,7 +284,7 @@ static inline void iounmap(const volatile void __iomem *addr)
 #undef __IS_KSEG1
 }
 
-#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON3)
+#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON64)
 #define war_io_reorder_wmb()		wmb()
 #else
 #define war_io_reorder_wmb()		barrier()
diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h
index f0b862a83816..c4728bbdf15b 100644
--- a/arch/mips/include/asm/irqflags.h
+++ b/arch/mips/include/asm/irqflags.h
@@ -41,7 +41,7 @@ static inline unsigned long arch_local_irq_save(void)
 	"	.set	push						\n"
 	"	.set	reorder						\n"
 	"	.set	noat						\n"
-#if defined(CONFIG_CPU_LOONGSON3) || defined (CONFIG_CPU_LOONGSON1)
+#if defined(CONFIG_CPU_LOONGSON64) || defined(CONFIG_CPU_LOONGSON32)
 	"	mfc0	%[flags], $12					\n"
 	"	di							\n"
 #else
diff --git a/arch/mips/include/asm/llsc.h b/arch/mips/include/asm/llsc.h
index c6d17d171147..c49738bc3bda 100644
--- a/arch/mips/include/asm/llsc.h
+++ b/arch/mips/include/asm/llsc.h
@@ -9,20 +9,31 @@
 #ifndef __ASM_LLSC_H
 #define __ASM_LLSC_H
 
+#include <asm/isa-rev.h>
+
 #if _MIPS_SZLONG == 32
-#define SZLONG_LOG 5
-#define SZLONG_MASK 31UL
 #define __LL		"ll	"
 #define __SC		"sc	"
 #define __INS		"ins	"
 #define __EXT		"ext	"
 #elif _MIPS_SZLONG == 64
-#define SZLONG_LOG 6
-#define SZLONG_MASK 63UL
 #define __LL		"lld	"
 #define __SC		"scd	"
 #define __INS		"dins	"
 #define __EXT		"dext	"
 #endif
 
+/*
+ * Using a branch-likely instruction to check the result of an sc instruction
+ * works around a bug present in R10000 CPUs prior to revision 3.0 that could
+ * cause ll-sc sequences to execute non-atomically.
+ */
+#if R10000_LLSC_WAR
+# define __SC_BEQZ "beqzl	"
+#elif MIPS_ISA_REV >= 6
+# define __SC_BEQZ "beqzc	"
+#else
+# define __SC_BEQZ "beqz	"
+#endif
+
 #endif /* __ASM_LLSC_H  */
diff --git a/arch/mips/include/asm/mach-ip22/spaces.h b/arch/mips/include/asm/mach-ip22/spaces.h
index 7f9fa6f66059..24fe92cb5313 100644
--- a/arch/mips/include/asm/mach-ip22/spaces.h
+++ b/arch/mips/include/asm/mach-ip22/spaces.h
@@ -10,17 +10,7 @@
 #ifndef _ASM_MACH_IP22_SPACES_H
 #define _ASM_MACH_IP22_SPACES_H
 
-
-#ifdef CONFIG_64BIT
-
-#define PAGE_OFFSET		0xffffffff80000000UL
-
-#define CAC_BASE		0xffffffff80000000
-#define IO_BASE			0xffffffffa0000000
-#define UNCAC_BASE		0xffffffffa0000000
-#define MAP_BASE		0xc000000000000000
-
-#endif /* CONFIG_64BIT */
+#define PHYS_OFFSET     _AC(0x08000000, UL)
 
 #include <asm/mach-generic/spaces.h>
 
diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h
index 1cd6a23a84f2..f463826515df 100644
--- a/arch/mips/include/asm/mach-ip27/mmzone.h
+++ b/arch/mips/include/asm/mach-ip27/mmzone.h
@@ -6,7 +6,7 @@
 #include <asm/sn/arch.h>
 #include <asm/sn/hub.h>
 
-#define pa_to_nid(addr)		NASID_TO_COMPACT_NODEID(NASID_GET(addr))
+#define pa_to_nid(addr)		NASID_GET(addr)
 
 struct hub_data {
 	kern_vars_t	kern_vars;
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 965f0793a5f9..be61ddcdacab 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -7,14 +7,13 @@
 #include <asm/mmzone.h>
 
 struct cpuinfo_ip27 {
-	cnodeid_t	p_nodeid;	/* my node ID in compact-id-space */
 	nasid_t		p_nasid;	/* my node ID in numa-as-id-space */
 	unsigned char	p_slice;	/* Physical position on node board */
 };
 
 extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
 
-#define cpu_to_node(cpu)	(sn_cpu_info[(cpu)].p_nodeid)
+#define cpu_to_node(cpu)	(cputonasid(cpu))
 #define cpumask_of_node(node)	((node) == -1 ?				\
 				 cpu_all_mask :				\
 				 &hub_data(node)->h_cpus)
@@ -23,7 +22,7 @@ extern int pcibus_to_node(struct pci_bus *);
 
 #define cpumask_of_pcibus(bus)	(cpumask_of_node(pcibus_to_node(bus)))
 
-extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
+extern unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
 
 #define node_distance(from, to) (__node_distances[(from)][(to)])
 
diff --git a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h
new file mode 100644
index 000000000000..cfa02f3d25df
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IP30/Octane cpu-features overrides.
+ *
+ * Copyright (C) 2003 Ralf Baechle <ralf@linux-mips.org>
+ *		 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *		 2009 Johannes Dickgreber <tanzy@gmx.de>
+ *		 2015 Joshua Kinard <kumba@gentoo.org>
+ *
+ */
+#ifndef __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H
+
+#include <asm/cpu.h>
+
+/*
+ * IP30 only supports R1[024]000 processors, all using the same config
+ */
+#define cpu_has_tlb			1
+#define cpu_has_tlbinv			0
+#define cpu_has_segments		0
+#define cpu_has_eva			0
+#define cpu_has_htw			0
+#define cpu_has_rixiex			0
+#define cpu_has_maar			0
+#define cpu_has_rw_llb			0
+#define cpu_has_3kex			0
+#define cpu_has_4kex			1
+#define cpu_has_3k_cache		0
+#define cpu_has_4k_cache		1
+#define cpu_has_6k_cache		0
+#define cpu_has_8k_cache		0
+#define cpu_has_tx39_cache		0
+#define cpu_has_fpu			1
+#define cpu_has_nofpuex			0
+#define cpu_has_32fpr			1
+#define cpu_has_counter			1
+#define cpu_has_watch			1
+#define cpu_has_64bits			1
+#define cpu_has_divec			0
+#define cpu_has_vce			0
+#define cpu_has_cache_cdex_p		0
+#define cpu_has_cache_cdex_s		0
+#define cpu_has_prefetch		1
+#define cpu_has_mcheck			0
+#define cpu_has_ejtag			0
+#define cpu_has_llsc			1
+#define cpu_has_mips16			0
+#define cpu_has_mdmx			0
+#define cpu_has_mips3d			0
+#define cpu_has_smartmips		0
+#define cpu_has_rixi			0
+#define cpu_has_xpa			0
+#define cpu_has_vtag_icache		0
+#define cpu_has_dc_aliases		0
+#define cpu_has_ic_fills_f_dc		0
+
+#define cpu_icache_snoops_remote_store	1
+
+#define cpu_has_mips32r1		0
+#define cpu_has_mips32r2		0
+#define cpu_has_mips64r1		0
+#define cpu_has_mips64r2		0
+#define cpu_has_mips32r6		0
+#define cpu_has_mips64r6		0
+
+#define cpu_has_dsp			0
+#define cpu_has_dsp2			0
+#define cpu_has_mipsmt			0
+#define cpu_has_userlocal		0
+#define cpu_has_inclusive_pcaches	1
+#define cpu_hwrena_impl_bits		0
+#define cpu_has_perf_cntr_intr_bit	0
+#define cpu_has_vz			0
+#define cpu_has_fre			0
+#define cpu_has_cdmm			0
+
+#define cpu_dcache_line_size()		32
+#define cpu_icache_line_size()		64
+#define cpu_scache_line_size()		128
+
+#endif /* __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H */
+
diff --git a/arch/mips/include/asm/mach-ip30/irq.h b/arch/mips/include/asm/mach-ip30/irq.h
new file mode 100644
index 000000000000..e5c3dd965266
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/irq.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HEART IRQ defines
+ *
+ * Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de>
+ *		 2014-2016 Joshua Kinard <kumba@gentoo.org>
+ *
+ */
+
+#ifndef __ASM_MACH_IP30_IRQ_H
+#define __ASM_MACH_IP30_IRQ_H
+
+/*
+ * HEART has 64 hardware interrupts, but use 128 to leave room for a few
+ * software interrupts as well (such as the CPU timer interrupt.
+ */
+#define NR_IRQS				128
+
+extern void __init ip30_install_ipi(void);
+
+/*
+ * HEART has 64 interrupt vectors available to it, subdivided into five
+ * priority levels.  They are numbered 0 to 63.
+ */
+#define HEART_NUM_IRQS			64
+
+/*
+ * These are the five interrupt priority levels and their corresponding
+ * CPU IPx interrupt pins.
+ *
+ * Level 4 - Error Interrupts.
+ * Level 3 - HEART timer interrupt.
+ * Level 2 - CPU IPI, CPU debug, power putton, general device interrupts.
+ * Level 1 - General device interrupts.
+ * Level 0 - General device GFX flow control interrupts.
+ */
+#define HEART_L4_INT_MASK		0xfff8000000000000ULL	/* IP6 */
+#define HEART_L3_INT_MASK		0x0004000000000000ULL	/* IP5 */
+#define HEART_L2_INT_MASK		0x0003ffff00000000ULL	/* IP4 */
+#define HEART_L1_INT_MASK		0x00000000ffff0000ULL	/* IP3 */
+#define HEART_L0_INT_MASK		0x000000000000ffffULL	/* IP2 */
+
+/* HEART L0 Interrupts (Low Priority) */
+#define HEART_L0_INT_GENERIC		 0
+#define HEART_L0_INT_FLOW_CTRL_HWTR_0	 1
+#define HEART_L0_INT_FLOW_CTRL_HWTR_1	 2
+
+/* HEART L2 Interrupts (High Priority) */
+#define HEART_L2_INT_RESCHED_CPU_0	46
+#define HEART_L2_INT_RESCHED_CPU_1	47
+#define HEART_L2_INT_CALL_CPU_0		48
+#define HEART_L2_INT_CALL_CPU_1		49
+
+/* HEART L3 Interrupts (Compare/Counter Timer) */
+#define HEART_L3_INT_TIMER		50
+
+/* HEART L4 Interrupts (Errors) */
+#define HEART_L4_INT_XWID_ERR_9		51
+#define HEART_L4_INT_XWID_ERR_A		52
+#define HEART_L4_INT_XWID_ERR_B		53
+#define HEART_L4_INT_XWID_ERR_C		54
+#define HEART_L4_INT_XWID_ERR_D		55
+#define HEART_L4_INT_XWID_ERR_E		56
+#define HEART_L4_INT_XWID_ERR_F		57
+#define HEART_L4_INT_XWID_ERR_XBOW	58
+#define HEART_L4_INT_CPU_BUS_ERR_0	59
+#define HEART_L4_INT_CPU_BUS_ERR_1	60
+#define HEART_L4_INT_CPU_BUS_ERR_2	61
+#define HEART_L4_INT_CPU_BUS_ERR_3	62
+#define HEART_L4_INT_HEART_EXCP		63
+
+/*
+ * Power Switch is wired via BaseIO BRIDGE slot #6.
+ *
+ * ACFail is wired via BaseIO BRIDGE slot #7.
+ */
+#define IP30_POWER_IRQ		HEART_L2_INT_POWER_BTN
+
+#include_next <irq.h>
+
+#define IP30_HEART_L0_IRQ	(MIPS_CPU_IRQ_BASE + 2)
+#define IP30_HEART_L1_IRQ	(MIPS_CPU_IRQ_BASE + 3)
+#define IP30_HEART_L2_IRQ	(MIPS_CPU_IRQ_BASE + 4)
+#define IP30_HEART_TIMER_IRQ	(MIPS_CPU_IRQ_BASE + 5)
+#define IP30_HEART_ERR_IRQ	(MIPS_CPU_IRQ_BASE + 6)
+
+#endif /* __ASM_MACH_IP30_IRQ_H */
diff --git a/arch/mips/include/asm/mach-ip30/kernel-entry-init.h b/arch/mips/include/asm/mach-ip30/kernel-entry-init.h
new file mode 100644
index 000000000000..be0472c977d8
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/kernel-entry-init.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_MACH_IP30_KERNEL_ENTRY_H
+#define __ASM_MACH_IP30_KERNEL_ENTRY_H
+
+	.macro  kernel_entry_setup
+	.endm
+
+	.macro	smp_slave_setup
+	move	gp, a0
+	.endm
+
+#endif /* __ASM_MACH_IP30_KERNEL_ENTRY_H */
diff --git a/arch/mips/include/asm/mach-ip30/mangle-port.h b/arch/mips/include/asm/mach-ip30/mangle-port.h
new file mode 100644
index 000000000000..f3e1262a2d5e
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/mangle-port.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2003, 2004 Ralf Baechle
+ */
+#ifndef __ASM_MACH_IP30_MANGLE_PORT_H
+#define __ASM_MACH_IP30_MANGLE_PORT_H
+
+#define __swizzle_addr_b(port)	((port)^3)
+#define __swizzle_addr_w(port)	((port)^2)
+#define __swizzle_addr_l(port)	(port)
+#define __swizzle_addr_q(port)	(port)
+
+#define ioswabb(a, x)		(x)
+#define __mem_ioswabb(a, x)	(x)
+#define ioswabw(a, x)		(x)
+#define __mem_ioswabw(a, x)	cpu_to_le16(x)
+#define ioswabl(a, x)		(x)
+#define __mem_ioswabl(a, x)	cpu_to_le32(x)
+#define ioswabq(a, x)		(x)
+#define __mem_ioswabq(a, x)	cpu_to_le64(x)
+
+#endif /* __ASM_MACH_IP30_MANGLE_PORT_H */
diff --git a/arch/mips/include/asm/mach-ip30/spaces.h b/arch/mips/include/asm/mach-ip30/spaces.h
new file mode 100644
index 000000000000..c8a302dfbe05
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/spaces.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2016 Joshua Kinard <kumba@gentoo.org>
+ *
+ */
+#ifndef _ASM_MACH_IP30_SPACES_H
+#define _ASM_MACH_IP30_SPACES_H
+
+/*
+ * Memory in IP30/Octane is offset 512MB in the physical address space.
+ */
+#define PHYS_OFFSET	_AC(0x20000000, UL)
+
+#ifdef CONFIG_64BIT
+#define CAC_BASE	_AC(0xA800000000000000, UL)
+#endif
+
+#include <asm/mach-generic/spaces.h>
+
+#endif /* _ASM_MACH_IP30_SPACES_H */
diff --git a/arch/mips/include/asm/mach-ip30/war.h b/arch/mips/include/asm/mach-ip30/war.h
new file mode 100644
index 000000000000..a98ba204f183
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/war.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
+ */
+#ifndef __ASM_MIPS_MACH_IP30_WAR_H
+#define __ASM_MIPS_MACH_IP30_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR	0
+#define R4600_V1_HIT_CACHEOP_WAR	0
+#define R4600_V2_HIT_CACHEOP_WAR	0
+#define MIPS_CACHE_SYNC_WAR		0
+#define BCM1250_M3_WAR			0
+#define SIBYTE_1956_WAR			0
+#define MIPS4K_ICACHE_REFILL_WAR	0
+#define MIPS34K_MISSED_ITLB_WAR		0
+#define R5432_CP0_INTERRUPT_WAR		0
+#define TX49XX_ICACHE_INDEX_INV_WAR	0
+#define ICACHE_REFILLS_WORKAROUND_WAR	0
+
+#ifdef CONFIG_CPU_R10000
+#define R10000_LLSC_WAR			1
+#else
+#define R10000_LLSC_WAR			0
+#endif
+
+#endif /* __ASM_MIPS_MACH_IP30_WAR_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h
new file mode 100644
index 000000000000..b2ee859ca0b7
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2009 Wu Zhangjin <wuzhangjin@gmail.com>
+ * Copyright (C) 2009 Philippe Vachon <philippe@cowpig.ca>
+ * Copyright (C) 2009 Zhang Le <r0bertz@gentoo.org>
+ *
+ * reference: /proc/cpuinfo,
+ *	arch/mips/kernel/cpu-probe.c(cpu_probe_legacy),
+ *	arch/mips/kernel/proc.c(show_cpuinfo),
+ *	loongson2f user manual.
+ */
+
+#ifndef __ASM_MACH_LOONGSON2EF_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_LOONGSON2EF_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_32fpr		1
+#define cpu_has_3k_cache	0
+#define cpu_has_4k_cache	1
+#define cpu_has_4kex		1
+#define cpu_has_64bits		1
+#define cpu_has_cache_cdex_p	0
+#define cpu_has_cache_cdex_s	0
+#define cpu_has_counter		1
+#define cpu_has_dc_aliases	(PAGE_SIZE < 0x4000)
+#define cpu_has_divec		0
+#define cpu_has_ejtag		0
+#define cpu_has_inclusive_pcaches	1
+#define cpu_has_llsc		1
+#define cpu_has_mcheck		0
+#define cpu_has_mdmx		0
+#define cpu_has_mips16		0
+#define cpu_has_mips16e2	0
+#define cpu_has_mips3d		0
+#define cpu_has_mipsmt		0
+#define cpu_has_smartmips	0
+#define cpu_has_tlb		1
+#define cpu_has_tx39_cache	0
+#define cpu_has_vce		0
+#define cpu_has_veic		0
+#define cpu_has_vint		0
+#define cpu_has_vtag_icache	0
+#define cpu_has_watch		1
+
+#endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536.h
index 9795b3361532..9795b3361532 100644
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536.h
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h
index 52e8bb0fc04d..52e8bb0fc04d 100644
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h
index a0d4b752899e..a0d4b752899e 100644
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_vsm.h
index 70d0153cccc3..70d0153cccc3 100644
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_vsm.h
diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h
new file mode 100644
index 000000000000..5008af0a1a19
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h
@@ -0,0 +1,326 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+
+#ifndef __ASM_MACH_LOONGSON2EF_LOONGSON_H
+#define __ASM_MACH_LOONGSON2EF_LOONGSON_H
+
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+
+/* loongson internal northbridge initialization */
+extern void bonito_irq_init(void);
+
+/* machine-specific reboot/halt operation */
+extern void mach_prepare_reboot(void);
+extern void mach_prepare_shutdown(void);
+
+/* environment arguments from bootloader */
+extern u32 cpu_clock_freq;
+extern u32 memsize, highmemsize;
+
+/* loongson-specific command line, env and memory initialization */
+extern void __init prom_init_memory(void);
+extern void __init prom_init_machtype(void);
+extern void __init prom_init_env(void);
+#ifdef CONFIG_LOONGSON_UART_BASE
+extern unsigned long _loongson_uart_base, loongson_uart_base;
+extern void prom_init_loongson_uart_base(void);
+#endif
+
+static inline void prom_init_uart_base(void)
+{
+#ifdef CONFIG_LOONGSON_UART_BASE
+	prom_init_loongson_uart_base();
+#endif
+}
+
+/* irq operation functions */
+extern void bonito_irqdispatch(void);
+extern void __init bonito_irq_init(void);
+extern void __init mach_init_irq(void);
+extern void mach_irq_dispatch(unsigned int pending);
+extern int mach_i8259_irq(void);
+
+/* We need this in some places... */
+#define delay() ({		\
+	int x;				\
+	for (x = 0; x < 100000; x++)	\
+		__asm__ __volatile__(""); \
+})
+
+#define LOONGSON_REG(x) \
+	(*(volatile u32 *)((char *)CKSEG1ADDR(LOONGSON_REG_BASE) + (x)))
+
+#define LOONGSON_IRQ_BASE	32
+#define LOONGSON2_PERFCNT_IRQ	(MIPS_CPU_IRQ_BASE + 6) /* cpu perf counter */
+
+#include <linux/interrupt.h>
+static inline void do_perfcnt_IRQ(void)
+{
+#if IS_ENABLED(CONFIG_OPROFILE)
+	do_IRQ(LOONGSON2_PERFCNT_IRQ);
+#endif
+}
+
+#define LOONGSON_FLASH_BASE	0x1c000000
+#define LOONGSON_FLASH_SIZE	0x02000000	/* 32M */
+#define LOONGSON_FLASH_TOP	(LOONGSON_FLASH_BASE+LOONGSON_FLASH_SIZE-1)
+
+#define LOONGSON_LIO0_BASE	0x1e000000
+#define LOONGSON_LIO0_SIZE	0x01C00000	/* 28M */
+#define LOONGSON_LIO0_TOP	(LOONGSON_LIO0_BASE+LOONGSON_LIO0_SIZE-1)
+
+#define LOONGSON_BOOT_BASE	0x1fc00000
+#define LOONGSON_BOOT_SIZE	0x00100000	/* 1M */
+#define LOONGSON_BOOT_TOP	(LOONGSON_BOOT_BASE+LOONGSON_BOOT_SIZE-1)
+#define LOONGSON_REG_BASE	0x1fe00000
+#define LOONGSON_REG_SIZE	0x00100000	/* 256Bytes + 256Bytes + ??? */
+#define LOONGSON_REG_TOP	(LOONGSON_REG_BASE+LOONGSON_REG_SIZE-1)
+
+#define LOONGSON_LIO1_BASE	0x1ff00000
+#define LOONGSON_LIO1_SIZE	0x00100000	/* 1M */
+#define LOONGSON_LIO1_TOP	(LOONGSON_LIO1_BASE+LOONGSON_LIO1_SIZE-1)
+
+#define LOONGSON_PCILO0_BASE	0x10000000
+#define LOONGSON_PCILO1_BASE	0x14000000
+#define LOONGSON_PCILO2_BASE	0x18000000
+#define LOONGSON_PCILO_BASE	LOONGSON_PCILO0_BASE
+#define LOONGSON_PCILO_SIZE	0x0c000000	/* 64M * 3 */
+#define LOONGSON_PCILO_TOP	(LOONGSON_PCILO0_BASE+LOONGSON_PCILO_SIZE-1)
+
+#define LOONGSON_PCICFG_BASE	0x1fe80000
+#define LOONGSON_PCICFG_SIZE	0x00000800	/* 2K */
+#define LOONGSON_PCICFG_TOP	(LOONGSON_PCICFG_BASE+LOONGSON_PCICFG_SIZE-1)
+#define LOONGSON_PCIIO_BASE	0x1fd00000
+
+#define LOONGSON_PCIIO_SIZE	0x00100000	/* 1M */
+#define LOONGSON_PCIIO_TOP	(LOONGSON_PCIIO_BASE+LOONGSON_PCIIO_SIZE-1)
+
+/* Loongson Register Bases */
+
+#define LOONGSON_PCICONFIGBASE	0x00
+#define LOONGSON_REGBASE	0x100
+
+/* PCI Configuration Registers */
+
+#define LOONGSON_PCI_REG(x)	LOONGSON_REG(LOONGSON_PCICONFIGBASE + (x))
+#define LOONGSON_PCIDID		LOONGSON_PCI_REG(0x00)
+#define LOONGSON_PCICMD		LOONGSON_PCI_REG(0x04)
+#define LOONGSON_PCICLASS	LOONGSON_PCI_REG(0x08)
+#define LOONGSON_PCILTIMER	LOONGSON_PCI_REG(0x0c)
+#define LOONGSON_PCIBASE0	LOONGSON_PCI_REG(0x10)
+#define LOONGSON_PCIBASE1	LOONGSON_PCI_REG(0x14)
+#define LOONGSON_PCIBASE2	LOONGSON_PCI_REG(0x18)
+#define LOONGSON_PCIBASE3	LOONGSON_PCI_REG(0x1c)
+#define LOONGSON_PCIBASE4	LOONGSON_PCI_REG(0x20)
+#define LOONGSON_PCIEXPRBASE	LOONGSON_PCI_REG(0x30)
+#define LOONGSON_PCIINT		LOONGSON_PCI_REG(0x3c)
+
+#define LOONGSON_PCI_ISR4C	LOONGSON_PCI_REG(0x4c)
+
+#define LOONGSON_PCICMD_PERR_CLR	0x80000000
+#define LOONGSON_PCICMD_SERR_CLR	0x40000000
+#define LOONGSON_PCICMD_MABORT_CLR	0x20000000
+#define LOONGSON_PCICMD_MTABORT_CLR	0x10000000
+#define LOONGSON_PCICMD_TABORT_CLR	0x08000000
+#define LOONGSON_PCICMD_MPERR_CLR	0x01000000
+#define LOONGSON_PCICMD_PERRRESPEN	0x00000040
+#define LOONGSON_PCICMD_ASTEPEN		0x00000080
+#define LOONGSON_PCICMD_SERREN		0x00000100
+#define LOONGSON_PCILTIMER_BUSLATENCY	0x0000ff00
+#define LOONGSON_PCILTIMER_BUSLATENCY_SHIFT	8
+
+/* Loongson h/w Configuration */
+
+#define LOONGSON_GENCFG_OFFSET		0x4
+#define LOONGSON_GENCFG LOONGSON_REG(LOONGSON_REGBASE + LOONGSON_GENCFG_OFFSET)
+
+#define LOONGSON_GENCFG_DEBUGMODE	0x00000001
+#define LOONGSON_GENCFG_SNOOPEN		0x00000002
+#define LOONGSON_GENCFG_CPUSELFRESET	0x00000004
+
+#define LOONGSON_GENCFG_FORCE_IRQA	0x00000008
+#define LOONGSON_GENCFG_IRQA_ISOUT	0x00000010
+#define LOONGSON_GENCFG_IRQA_FROM_INT1	0x00000020
+#define LOONGSON_GENCFG_BYTESWAP	0x00000040
+
+#define LOONGSON_GENCFG_UNCACHED	0x00000080
+#define LOONGSON_GENCFG_PREFETCHEN	0x00000100
+#define LOONGSON_GENCFG_WBEHINDEN	0x00000200
+#define LOONGSON_GENCFG_CACHEALG	0x00000c00
+#define LOONGSON_GENCFG_CACHEALG_SHIFT	10
+#define LOONGSON_GENCFG_PCIQUEUE	0x00001000
+#define LOONGSON_GENCFG_CACHESTOP	0x00002000
+#define LOONGSON_GENCFG_MSTRBYTESWAP	0x00004000
+#define LOONGSON_GENCFG_BUSERREN	0x00008000
+#define LOONGSON_GENCFG_NORETRYTIMEOUT	0x00010000
+#define LOONGSON_GENCFG_SHORTCOPYTIMEOUT	0x00020000
+
+/* PCI address map control */
+
+#define LOONGSON_PCIMAP			LOONGSON_REG(LOONGSON_REGBASE + 0x10)
+#define LOONGSON_PCIMEMBASECFG		LOONGSON_REG(LOONGSON_REGBASE + 0x14)
+#define LOONGSON_PCIMAP_CFG		LOONGSON_REG(LOONGSON_REGBASE + 0x18)
+
+/* GPIO Regs - r/w */
+
+#define LOONGSON_GPIODATA		LOONGSON_REG(LOONGSON_REGBASE + 0x1c)
+#define LOONGSON_GPIOIE			LOONGSON_REG(LOONGSON_REGBASE + 0x20)
+
+/* ICU Configuration Regs - r/w */
+
+#define LOONGSON_INTEDGE		LOONGSON_REG(LOONGSON_REGBASE + 0x24)
+#define LOONGSON_INTSTEER		LOONGSON_REG(LOONGSON_REGBASE + 0x28)
+#define LOONGSON_INTPOL			LOONGSON_REG(LOONGSON_REGBASE + 0x2c)
+
+/* ICU Enable Regs - IntEn & IntISR are r/o. */
+
+#define LOONGSON_INTENSET		LOONGSON_REG(LOONGSON_REGBASE + 0x30)
+#define LOONGSON_INTENCLR		LOONGSON_REG(LOONGSON_REGBASE + 0x34)
+#define LOONGSON_INTEN			LOONGSON_REG(LOONGSON_REGBASE + 0x38)
+#define LOONGSON_INTISR			LOONGSON_REG(LOONGSON_REGBASE + 0x3c)
+
+/* ICU */
+#define LOONGSON_ICU_MBOXES		0x0000000f
+#define LOONGSON_ICU_MBOXES_SHIFT	0
+#define LOONGSON_ICU_DMARDY		0x00000010
+#define LOONGSON_ICU_DMAEMPTY		0x00000020
+#define LOONGSON_ICU_COPYRDY		0x00000040
+#define LOONGSON_ICU_COPYEMPTY		0x00000080
+#define LOONGSON_ICU_COPYERR		0x00000100
+#define LOONGSON_ICU_PCIIRQ		0x00000200
+#define LOONGSON_ICU_MASTERERR		0x00000400
+#define LOONGSON_ICU_SYSTEMERR		0x00000800
+#define LOONGSON_ICU_DRAMPERR		0x00001000
+#define LOONGSON_ICU_RETRYERR		0x00002000
+#define LOONGSON_ICU_GPIOS		0x01ff0000
+#define LOONGSON_ICU_GPIOS_SHIFT		16
+#define LOONGSON_ICU_GPINS		0x7e000000
+#define LOONGSON_ICU_GPINS_SHIFT		25
+#define LOONGSON_ICU_MBOX(N)		(1<<(LOONGSON_ICU_MBOXES_SHIFT+(N)))
+#define LOONGSON_ICU_GPIO(N)		(1<<(LOONGSON_ICU_GPIOS_SHIFT+(N)))
+#define LOONGSON_ICU_GPIN(N)		(1<<(LOONGSON_ICU_GPINS_SHIFT+(N)))
+
+/* PCI prefetch window base & mask */
+
+#define LOONGSON_MEM_WIN_BASE_L		LOONGSON_REG(LOONGSON_REGBASE + 0x40)
+#define LOONGSON_MEM_WIN_BASE_H		LOONGSON_REG(LOONGSON_REGBASE + 0x44)
+#define LOONGSON_MEM_WIN_MASK_L		LOONGSON_REG(LOONGSON_REGBASE + 0x48)
+#define LOONGSON_MEM_WIN_MASK_H		LOONGSON_REG(LOONGSON_REGBASE + 0x4c)
+
+/* PCI_Hit*_Sel_* */
+
+#define LOONGSON_PCI_HIT0_SEL_L		LOONGSON_REG(LOONGSON_REGBASE + 0x50)
+#define LOONGSON_PCI_HIT0_SEL_H		LOONGSON_REG(LOONGSON_REGBASE + 0x54)
+#define LOONGSON_PCI_HIT1_SEL_L		LOONGSON_REG(LOONGSON_REGBASE + 0x58)
+#define LOONGSON_PCI_HIT1_SEL_H		LOONGSON_REG(LOONGSON_REGBASE + 0x5c)
+#define LOONGSON_PCI_HIT2_SEL_L		LOONGSON_REG(LOONGSON_REGBASE + 0x60)
+#define LOONGSON_PCI_HIT2_SEL_H		LOONGSON_REG(LOONGSON_REGBASE + 0x64)
+
+/* PXArb Config & Status */
+
+#define LOONGSON_PXARB_CFG		LOONGSON_REG(LOONGSON_REGBASE + 0x68)
+#define LOONGSON_PXARB_STATUS		LOONGSON_REG(LOONGSON_REGBASE + 0x6c)
+
+/* Chip Config registor of each physical cpu package, PRid >= Loongson-2F */
+#define LOONGSON_CHIPCFG	(void __iomem *)TO_UNCAC(0x1fc00180)
+
+/* pcimap */
+
+#define LOONGSON_PCIMAP_PCIMAP_LO0	0x0000003f
+#define LOONGSON_PCIMAP_PCIMAP_LO0_SHIFT	0
+#define LOONGSON_PCIMAP_PCIMAP_LO1	0x00000fc0
+#define LOONGSON_PCIMAP_PCIMAP_LO1_SHIFT	6
+#define LOONGSON_PCIMAP_PCIMAP_LO2	0x0003f000
+#define LOONGSON_PCIMAP_PCIMAP_LO2_SHIFT	12
+#define LOONGSON_PCIMAP_PCIMAP_2	0x00040000
+#define LOONGSON_PCIMAP_WIN(WIN, ADDR)	\
+	((((ADDR)>>26) & LOONGSON_PCIMAP_PCIMAP_LO0) << ((WIN)*6))
+
+#ifdef CONFIG_CPU_SUPPORTS_CPUFREQ
+#include <linux/cpufreq.h>
+extern struct cpufreq_frequency_table loongson2_clockmod_table[];
+#endif
+
+/*
+ * address windows configuration module
+ *
+ * loongson2e do not have this module
+ */
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+
+/* address window config module base address */
+#define LOONGSON_ADDRWINCFG_BASE		0x3ff00000ul
+#define LOONGSON_ADDRWINCFG_SIZE		0x180
+
+extern unsigned long _loongson_addrwincfg_base;
+#define LOONGSON_ADDRWINCFG(offset) \
+	(*(volatile u64 *)(_loongson_addrwincfg_base + (offset)))
+
+#define CPU_WIN0_BASE	LOONGSON_ADDRWINCFG(0x00)
+#define CPU_WIN1_BASE	LOONGSON_ADDRWINCFG(0x08)
+#define CPU_WIN2_BASE	LOONGSON_ADDRWINCFG(0x10)
+#define CPU_WIN3_BASE	LOONGSON_ADDRWINCFG(0x18)
+
+#define CPU_WIN0_MASK	LOONGSON_ADDRWINCFG(0x20)
+#define CPU_WIN1_MASK	LOONGSON_ADDRWINCFG(0x28)
+#define CPU_WIN2_MASK	LOONGSON_ADDRWINCFG(0x30)
+#define CPU_WIN3_MASK	LOONGSON_ADDRWINCFG(0x38)
+
+#define CPU_WIN0_MMAP	LOONGSON_ADDRWINCFG(0x40)
+#define CPU_WIN1_MMAP	LOONGSON_ADDRWINCFG(0x48)
+#define CPU_WIN2_MMAP	LOONGSON_ADDRWINCFG(0x50)
+#define CPU_WIN3_MMAP	LOONGSON_ADDRWINCFG(0x58)
+
+#define PCIDMA_WIN0_BASE	LOONGSON_ADDRWINCFG(0x60)
+#define PCIDMA_WIN1_BASE	LOONGSON_ADDRWINCFG(0x68)
+#define PCIDMA_WIN2_BASE	LOONGSON_ADDRWINCFG(0x70)
+#define PCIDMA_WIN3_BASE	LOONGSON_ADDRWINCFG(0x78)
+
+#define PCIDMA_WIN0_MASK	LOONGSON_ADDRWINCFG(0x80)
+#define PCIDMA_WIN1_MASK	LOONGSON_ADDRWINCFG(0x88)
+#define PCIDMA_WIN2_MASK	LOONGSON_ADDRWINCFG(0x90)
+#define PCIDMA_WIN3_MASK	LOONGSON_ADDRWINCFG(0x98)
+
+#define PCIDMA_WIN0_MMAP	LOONGSON_ADDRWINCFG(0xa0)
+#define PCIDMA_WIN1_MMAP	LOONGSON_ADDRWINCFG(0xa8)
+#define PCIDMA_WIN2_MMAP	LOONGSON_ADDRWINCFG(0xb0)
+#define PCIDMA_WIN3_MMAP	LOONGSON_ADDRWINCFG(0xb8)
+
+#define ADDRWIN_WIN0	0
+#define ADDRWIN_WIN1	1
+#define ADDRWIN_WIN2	2
+#define ADDRWIN_WIN3	3
+
+#define ADDRWIN_MAP_DST_DDR	0
+#define ADDRWIN_MAP_DST_PCI	1
+#define ADDRWIN_MAP_DST_LIO	1
+
+/*
+ * s: CPU, PCIDMA
+ * d: DDR, PCI, LIO
+ * win: 0, 1, 2, 3
+ * src: map source
+ * dst: map destination
+ * size: ~mask + 1
+ */
+#define LOONGSON_ADDRWIN_CFG(s, d, w, src, dst, size) do {\
+	s##_WIN##w##_BASE = (src); \
+	s##_WIN##w##_MMAP = (dst) | ADDRWIN_MAP_DST_##d; \
+	s##_WIN##w##_MASK = ~(size-1); \
+} while (0)
+
+#define LOONGSON_ADDRWIN_CPUTOPCI(win, src, dst, size) \
+	LOONGSON_ADDRWIN_CFG(CPU, PCI, win, src, dst, size)
+#define LOONGSON_ADDRWIN_CPUTODDR(win, src, dst, size) \
+	LOONGSON_ADDRWIN_CFG(CPU, DDR, win, src, dst, size)
+#define LOONGSON_ADDRWIN_PCITODDR(win, src, dst, size) \
+	LOONGSON_ADDRWIN_CFG(PCIDMA, DDR, win, src, dst, size)
+
+#endif	/* ! CONFIG_CPU_SUPPORTS_ADDRWINCFG */
+
+#endif /* __ASM_MACH_LOONGSON2EF_LOONGSON_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/machine.h b/arch/mips/include/asm/mach-loongson2ef/machine.h
new file mode 100644
index 000000000000..4097267ef186
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/machine.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+
+#ifndef __ASM_MACH_LOONGSON2EF_MACHINE_H
+#define __ASM_MACH_LOONGSON2EF_MACHINE_H
+
+#ifdef CONFIG_LEMOTE_FULOONG2E
+
+#define LOONGSON_MACHTYPE MACH_LEMOTE_FL2E
+
+#endif
+
+/* use fuloong2f as the default machine of LEMOTE_MACH2F */
+#ifdef CONFIG_LEMOTE_MACH2F
+
+#define LOONGSON_MACHTYPE MACH_LEMOTE_FL2F
+
+#endif
+
+#endif /* __ASM_MACH_LOONGSON2EF_MACHINE_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h b/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
new file mode 100644
index 000000000000..00d602629a55
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
@@ -0,0 +1,36 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998, 2001, 03, 07 by Ralf Baechle (ralf@linux-mips.org)
+ *
+ * RTC routines for PC style attached Dallas chip.
+ */
+#ifndef __ASM_MACH_LOONGSON2EF_MC146818RTC_H
+#define __ASM_MACH_LOONGSON2EF_MC146818RTC_H
+
+#include <linux/io.h>
+
+#define RTC_PORT(x)	(0x70 + (x))
+#define RTC_IRQ		8
+
+static inline unsigned char CMOS_READ(unsigned long addr)
+{
+	outb_p(addr, RTC_PORT(0));
+	return inb_p(RTC_PORT(1));
+}
+
+static inline void CMOS_WRITE(unsigned char data, unsigned long addr)
+{
+	outb_p(addr, RTC_PORT(0));
+	outb_p(data, RTC_PORT(1));
+}
+
+#define RTC_ALWAYS_BCD	0
+
+#ifndef mc146818_decode_year
+#define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970)
+#endif
+
+#endif /* __ASM_MACH_LOONGSON2EF_MC146818RTC_H */
diff --git a/arch/mips/include/asm/mach-loongson64/mem.h b/arch/mips/include/asm/mach-loongson2ef/mem.h
index ce33c174c04d..d1d759b8974e 100644
--- a/arch/mips/include/asm/mach-loongson64/mem.h
+++ b/arch/mips/include/asm/mach-loongson2ef/mem.h
@@ -4,8 +4,8 @@
  * Author: Wu Zhangjin <wuzhangjin@gmail.com>
  */
 
-#ifndef __ASM_MACH_LOONGSON64_MEM_H
-#define __ASM_MACH_LOONGSON64_MEM_H
+#ifndef __ASM_MACH_LOONGSON2EF_MEM_H
+#define __ASM_MACH_LOONGSON2EF_MEM_H
 
 /*
  * high memory space
@@ -34,4 +34,4 @@
 #define LOONGSON_MMIO_MEM_END	0x80000000
 #endif
 
-#endif /* __ASM_MACH_LOONGSON64_MEM_H */
+#endif /* __ASM_MACH_LOONGSON2EF_MEM_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/pci.h b/arch/mips/include/asm/mach-loongson2ef/pci.h
new file mode 100644
index 000000000000..5588c5bc5395
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/pci.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2008 Zhang Le <r0bertz@gentoo.org>
+ * Copyright (c) 2009 Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+
+#ifndef __ASM_MACH_LOONGSON2EF_PCI_H_
+#define __ASM_MACH_LOONGSON2EF_PCI_H_
+
+extern struct pci_ops loongson_pci_ops;
+
+/* this is an offset from mips_io_port_base */
+#define LOONGSON_PCI_IO_START	0x00004000UL
+
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+
+/*
+ * we use address window2 to map cpu address space to pci space
+ * window2: cpu [1G, 2G] -> pci [1G, 2G]
+ * why not use window 0 & 1? because they are used by cpu when booting.
+ * window0: cpu [0, 256M] -> ddr [0, 256M]
+ * window1: cpu [256M, 512M] -> pci [256M, 512M]
+ */
+
+/* the smallest LOONGSON_CPU_MEM_SRC can be 512M */
+#define LOONGSON_CPU_MEM_SRC	0x40000000ul		/* 1G */
+#define LOONGSON_PCI_MEM_DST	LOONGSON_CPU_MEM_SRC
+
+#define LOONGSON_PCI_MEM_START	LOONGSON_PCI_MEM_DST
+#define LOONGSON_PCI_MEM_END	(0x80000000ul-1)	/* 2G */
+
+#define MMAP_CPUTOPCI_SIZE	(LOONGSON_PCI_MEM_END - \
+					LOONGSON_PCI_MEM_START + 1)
+
+#else	/* loongson2f/32bit & loongson2e */
+
+/* this pci memory space is mapped by pcimap in pci.c */
+#define LOONGSON_PCI_MEM_START	LOONGSON_PCILO1_BASE
+#define LOONGSON_PCI_MEM_END	(LOONGSON_PCILO1_BASE + 0x04000000 * 2)
+
+/* this is an offset from mips_io_port_base */
+#define LOONGSON_PCI_IO_START	0x00004000UL
+
+#endif	/* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
+
+#endif /* !__ASM_MACH_LOONGSON2EF_PCI_H_ */
diff --git a/arch/mips/include/asm/mach-loongson2ef/spaces.h b/arch/mips/include/asm/mach-loongson2ef/spaces.h
new file mode 100644
index 000000000000..ba4e8e9b618e
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/spaces.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MACH_LOONGSON2EF_SPACES_H_
+#define __ASM_MACH_LOONGSON2EF_SPACES_H_
+
+#if defined(CONFIG_64BIT)
+#define CAC_BASE        _AC(0x9800000000000000, UL)
+#endif /* CONFIG_64BIT */
+
+#include <asm/mach-generic/spaces.h>
+#endif
diff --git a/arch/mips/include/asm/mach-loongson32/prom.h b/arch/mips/include/asm/mach-loongson32/prom.h
deleted file mode 100644
index cb789f18d790..000000000000
--- a/arch/mips/include/asm/mach-loongson32/prom.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
- */
-
-#ifndef __ASM_MACH_LOONGSON32_PROM_H
-#define __ASM_MACH_LOONGSON32_PROM_H
-
-#include <linux/io.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-
-/* environment arguments from bootloader */
-extern unsigned long memsize, highmemsize;
-
-/* loongson-specific command line, env and memory initialization */
-extern char *prom_getenv(char *name);
-extern void __init prom_init_cmdline(void);
-
-#endif /* __ASM_MACH_LOONGSON32_PROM_H */
diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
index 4aca25f2ff06..7dc8d75445a9 100644
--- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
@@ -43,11 +43,8 @@
 #define cpu_has_vint		0
 #define cpu_has_vtag_icache	0
 #define cpu_has_watch		1
-
-#ifdef CONFIG_CPU_LOONGSON3
 #define cpu_has_wsbh		1
 #define cpu_has_ic_fills_f_dc	1
 #define cpu_hwrena_impl_bits	0xc0000000
-#endif
 
 #endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h
index be9f727a9328..73a89913dc38 100644
--- a/arch/mips/include/asm/mach-loongson64/irq.h
+++ b/arch/mips/include/asm/mach-loongson64/irq.h
@@ -4,8 +4,6 @@
 
 #include <boot_param.h>
 
-#ifdef CONFIG_CPU_LOONGSON3
-
 /* cpu core interrupt numbers */
 #define MIPS_CPU_IRQ_BASE 56
 
@@ -35,8 +33,6 @@
 
 #define LOONGSON_INT_COREx_INTy(x, y)	(1<<(x) | 1<<(y+4))	/* route to int y of core x */
 
-#endif
-
 extern void fixup_irqs(void);
 extern void loongson3_ipi_interrupt(struct pt_regs *regs);
 
diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
index b5e288a12dfe..87a5bfbf8cfe 100644
--- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
@@ -17,7 +17,6 @@
  * Override macros used in arch/mips/kernel/head.S.
  */
 	.macro	kernel_entry_setup
-#ifdef CONFIG_CPU_LOONGSON3
 	.set	push
 	.set	mips64
 	/* Set LPA on LOONGSON3 config3 */
@@ -30,23 +29,29 @@
 	mtc0	t0, CP0_PAGEGRAIN
 	/* Enable STFill Buffer */
 	mfc0	t0, CP0_PRID
+	/* Loongson-3A R4+ */
+	andi	t1, t0, PRID_IMP_MASK
+	li	t2, PRID_IMP_LOONGSON_64G
+	beq     t1, t2, 1f
+	nop
+	/* Loongson-3A R2/R3 */
 	andi	t0, (PRID_IMP_MASK | PRID_REV_MASK)
-	slti	t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2_0)
-	bnez	t0, 1f
+	slti	t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
+	bnez	t0, 2f
+	nop
+1:
 	mfc0	t0, CP0_CONFIG6
 	or	t0, 0x100
 	mtc0	t0, CP0_CONFIG6
-1:
+2:
 	_ehb
 	.set	pop
-#endif
 	.endm
 
 /*
  * Do SMP slave processor setup.
  */
 	.macro	smp_slave_setup
-#ifdef CONFIG_CPU_LOONGSON3
 	.set	push
 	.set	mips64
 	/* Set LPA on LOONGSON3 config3 */
@@ -59,16 +64,23 @@
 	mtc0	t0, CP0_PAGEGRAIN
 	/* Enable STFill Buffer */
 	mfc0	t0, CP0_PRID
+	/* Loongson-3A R4+ */
+	andi	t1, t0, PRID_IMP_MASK
+	li	t2, PRID_IMP_LOONGSON_64G
+	beq     t1, t2, 1f
+	nop
+	/* Loongson-3A R2/R3 */
 	andi	t0, (PRID_IMP_MASK | PRID_REV_MASK)
-	slti	t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2_0)
-	bnez	t0, 1f
+	slti	t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
+	bnez	t0, 2f
+	nop
+1:
 	mfc0	t0, CP0_CONFIG6
 	or	t0, 0x100
 	mtc0	t0, CP0_CONFIG6
-1:
+2:
 	_ehb
 	.set	pop
-#endif
 	.endm
 
 #endif /* __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H */
diff --git a/arch/mips/include/asm/mach-loongson64/loongson.h b/arch/mips/include/asm/mach-loongson64/loongson.h
index 694a58574ec0..a8fce112a9b0 100644
--- a/arch/mips/include/asm/mach-loongson64/loongson.h
+++ b/arch/mips/include/asm/mach-loongson64/loongson.h
@@ -12,8 +12,6 @@
 #include <linux/irq.h>
 #include <boot_param.h>
 
-/* loongson internal northbridge initialization */
-extern void bonito_irq_init(void);
 
 /* machine-specific reboot/halt operation */
 extern void mach_prepare_reboot(void);
@@ -26,25 +24,9 @@ extern const struct plat_smp_ops loongson3_smp_ops;
 
 /* loongson-specific command line, env and memory initialization */
 extern void __init prom_init_memory(void);
-extern void __init prom_init_cmdline(void);
-extern void __init prom_init_machtype(void);
 extern void __init prom_init_env(void);
-#ifdef CONFIG_LOONGSON_UART_BASE
-extern unsigned long _loongson_uart_base[], loongson_uart_base[];
-extern void prom_init_loongson_uart_base(void);
-#endif
-
-static inline void prom_init_uart_base(void)
-{
-#ifdef CONFIG_LOONGSON_UART_BASE
-	prom_init_loongson_uart_base();
-#endif
-}
 
 /* irq operation functions */
-extern void bonito_irqdispatch(void);
-extern void __init bonito_irq_init(void);
-extern void __init mach_init_irq(void);
 extern void mach_irq_dispatch(unsigned int pending);
 extern int mach_i8259_irq(void);
 
@@ -64,17 +46,6 @@ extern int mach_i8259_irq(void);
 #define LOONGSON3_REG32(base, x) \
 	(*(volatile u32 *)((char *)TO_UNCAC(base) + (x)))
 
-#define LOONGSON_IRQ_BASE	32
-#define LOONGSON2_PERFCNT_IRQ	(MIPS_CPU_IRQ_BASE + 6) /* cpu perf counter */
-
-#include <linux/interrupt.h>
-static inline void do_perfcnt_IRQ(void)
-{
-#if IS_ENABLED(CONFIG_OPROFILE)
-	do_IRQ(LOONGSON2_PERFCNT_IRQ);
-#endif
-}
-
 #define LOONGSON_FLASH_BASE	0x1c000000
 #define LOONGSON_FLASH_SIZE	0x02000000	/* 32M */
 #define LOONGSON_FLASH_TOP	(LOONGSON_FLASH_BASE+LOONGSON_FLASH_SIZE-1)
@@ -109,11 +80,7 @@ static inline void do_perfcnt_IRQ(void)
 #define LOONGSON_PCICFG_SIZE	0x00000800	/* 2K */
 #define LOONGSON_PCICFG_TOP	(LOONGSON_PCICFG_BASE+LOONGSON_PCICFG_SIZE-1)
 
-#ifdef CONFIG_CPU_LOONGSON3
 #define LOONGSON_PCIIO_BASE	loongson_sysconf.pci_io_base
-#else
-#define LOONGSON_PCIIO_BASE	0x1fd00000
-#endif
 
 #define LOONGSON_PCIIO_SIZE	0x00100000	/* 1M */
 #define LOONGSON_PCIIO_TOP	(LOONGSON_PCIIO_BASE+LOONGSON_PCIIO_SIZE-1)
@@ -270,86 +237,4 @@ extern u64 loongson_freqctrl[MAX_PACKAGES];
 #define LOONGSON_PCIMAP_WIN(WIN, ADDR)	\
 	((((ADDR)>>26) & LOONGSON_PCIMAP_PCIMAP_LO0) << ((WIN)*6))
 
-#ifdef CONFIG_CPU_SUPPORTS_CPUFREQ
-#include <linux/cpufreq.h>
-extern struct cpufreq_frequency_table loongson2_clockmod_table[];
-#endif
-
-/*
- * address windows configuration module
- *
- * loongson2e do not have this module
- */
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-
-/* address window config module base address */
-#define LOONGSON_ADDRWINCFG_BASE		0x3ff00000ul
-#define LOONGSON_ADDRWINCFG_SIZE		0x180
-
-extern unsigned long _loongson_addrwincfg_base;
-#define LOONGSON_ADDRWINCFG(offset) \
-	(*(volatile u64 *)(_loongson_addrwincfg_base + (offset)))
-
-#define CPU_WIN0_BASE	LOONGSON_ADDRWINCFG(0x00)
-#define CPU_WIN1_BASE	LOONGSON_ADDRWINCFG(0x08)
-#define CPU_WIN2_BASE	LOONGSON_ADDRWINCFG(0x10)
-#define CPU_WIN3_BASE	LOONGSON_ADDRWINCFG(0x18)
-
-#define CPU_WIN0_MASK	LOONGSON_ADDRWINCFG(0x20)
-#define CPU_WIN1_MASK	LOONGSON_ADDRWINCFG(0x28)
-#define CPU_WIN2_MASK	LOONGSON_ADDRWINCFG(0x30)
-#define CPU_WIN3_MASK	LOONGSON_ADDRWINCFG(0x38)
-
-#define CPU_WIN0_MMAP	LOONGSON_ADDRWINCFG(0x40)
-#define CPU_WIN1_MMAP	LOONGSON_ADDRWINCFG(0x48)
-#define CPU_WIN2_MMAP	LOONGSON_ADDRWINCFG(0x50)
-#define CPU_WIN3_MMAP	LOONGSON_ADDRWINCFG(0x58)
-
-#define PCIDMA_WIN0_BASE	LOONGSON_ADDRWINCFG(0x60)
-#define PCIDMA_WIN1_BASE	LOONGSON_ADDRWINCFG(0x68)
-#define PCIDMA_WIN2_BASE	LOONGSON_ADDRWINCFG(0x70)
-#define PCIDMA_WIN3_BASE	LOONGSON_ADDRWINCFG(0x78)
-
-#define PCIDMA_WIN0_MASK	LOONGSON_ADDRWINCFG(0x80)
-#define PCIDMA_WIN1_MASK	LOONGSON_ADDRWINCFG(0x88)
-#define PCIDMA_WIN2_MASK	LOONGSON_ADDRWINCFG(0x90)
-#define PCIDMA_WIN3_MASK	LOONGSON_ADDRWINCFG(0x98)
-
-#define PCIDMA_WIN0_MMAP	LOONGSON_ADDRWINCFG(0xa0)
-#define PCIDMA_WIN1_MMAP	LOONGSON_ADDRWINCFG(0xa8)
-#define PCIDMA_WIN2_MMAP	LOONGSON_ADDRWINCFG(0xb0)
-#define PCIDMA_WIN3_MMAP	LOONGSON_ADDRWINCFG(0xb8)
-
-#define ADDRWIN_WIN0	0
-#define ADDRWIN_WIN1	1
-#define ADDRWIN_WIN2	2
-#define ADDRWIN_WIN3	3
-
-#define ADDRWIN_MAP_DST_DDR	0
-#define ADDRWIN_MAP_DST_PCI	1
-#define ADDRWIN_MAP_DST_LIO	1
-
-/*
- * s: CPU, PCIDMA
- * d: DDR, PCI, LIO
- * win: 0, 1, 2, 3
- * src: map source
- * dst: map destination
- * size: ~mask + 1
- */
-#define LOONGSON_ADDRWIN_CFG(s, d, w, src, dst, size) do {\
-	s##_WIN##w##_BASE = (src); \
-	s##_WIN##w##_MMAP = (dst) | ADDRWIN_MAP_DST_##d; \
-	s##_WIN##w##_MASK = ~(size-1); \
-} while (0)
-
-#define LOONGSON_ADDRWIN_CPUTOPCI(win, src, dst, size) \
-	LOONGSON_ADDRWIN_CFG(CPU, PCI, win, src, dst, size)
-#define LOONGSON_ADDRWIN_CPUTODDR(win, src, dst, size) \
-	LOONGSON_ADDRWIN_CFG(CPU, DDR, win, src, dst, size)
-#define LOONGSON_ADDRWIN_PCITODDR(win, src, dst, size) \
-	LOONGSON_ADDRWIN_CFG(PCIDMA, DDR, win, src, dst, size)
-
-#endif	/* ! CONFIG_CPU_SUPPORTS_ADDRWINCFG */
-
 #endif /* __ASM_MACH_LOONGSON64_LOONGSON_H */
diff --git a/arch/mips/include/asm/mach-loongson64/loongson_regs.h b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
new file mode 100644
index 000000000000..363a47a5d26e
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
@@ -0,0 +1,227 @@
+/*
+ * Read/Write Loongson Extension Registers
+ */
+
+#ifndef _LOONGSON_REGS_H_
+#define _LOONGSON_REGS_H_
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#include <asm/mipsregs.h>
+#include <asm/cpu.h>
+
+static inline bool cpu_has_cfg(void)
+{
+	return ((read_c0_prid() & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G);
+}
+
+static inline u32 read_cpucfg(u32 reg)
+{
+	u32 __res;
+
+	__asm__ __volatile__(
+		"parse_r __res,%0\n\t"
+		"parse_r reg,%1\n\t"
+		".insn \n\t"
+		".word (0xc8080118 | (reg << 21) | (__res << 11))\n\t"
+		:"=r"(__res)
+		:"r"(reg)
+		:
+		);
+	return __res;
+}
+
+/* Bit Domains for CFG registers */
+#define LOONGSON_CFG0	0x0
+#define LOONGSON_CFG0_PRID GENMASK(31, 0)
+
+#define LOONGSON_CFG1 0x1
+#define LOONGSON_CFG1_FP	BIT(0)
+#define LOONGSON_CFG1_FPREV	GENMASK(3, 1)
+#define LOONGSON_CFG1_MMI	BIT(4)
+#define LOONGSON_CFG1_MSA1	BIT(5)
+#define LOONGSON_CFG1_MSA2	BIT(6)
+#define LOONGSON_CFG1_CGP	BIT(7)
+#define LOONGSON_CFG1_WRP	BIT(8)
+#define LOONGSON_CFG1_LSX1	BIT(9)
+#define LOONGSON_CFG1_LSX2	BIT(10)
+#define LOONGSON_CFG1_LASX	BIT(11)
+#define LOONGSON_CFG1_R6FXP	BIT(12)
+#define LOONGSON_CFG1_R6CRCP	BIT(13)
+#define LOONGSON_CFG1_R6FPP	BIT(14)
+#define LOONGSON_CFG1_CNT64	BIT(15)
+#define LOONGSON_CFG1_LSLDR0	BIT(16)
+#define LOONGSON_CFG1_LSPREF	BIT(17)
+#define LOONGSON_CFG1_LSPREFX	BIT(18)
+#define LOONGSON_CFG1_LSSYNCI	BIT(19)
+#define LOONGSON_CFG1_LSUCA	BIT(20)
+#define LOONGSON_CFG1_LLSYNC	BIT(21)
+#define LOONGSON_CFG1_TGTSYNC	BIT(22)
+#define LOONGSON_CFG1_LLEXC	BIT(23)
+#define LOONGSON_CFG1_SCRAND	BIT(24)
+#define LOONGSON_CFG1_MUALP	BIT(25)
+#define LOONGSON_CFG1_KMUALEN	BIT(26)
+#define LOONGSON_CFG1_ITLBT	BIT(27)
+#define LOONGSON_CFG1_LSUPERF	BIT(28)
+#define LOONGSON_CFG1_SFBP	BIT(29)
+#define LOONGSON_CFG1_CDMAP	BIT(30)
+
+#define LOONGSON_CFG2 0x2
+#define LOONGSON_CFG2_LEXT1	BIT(0)
+#define LOONGSON_CFG2_LEXT2	BIT(1)
+#define LOONGSON_CFG2_LEXT3	BIT(2)
+#define LOONGSON_CFG2_LSPW	BIT(3)
+#define LOONGSON_CFG2_LBT1	BIT(4)
+#define LOONGSON_CFG2_LBT2	BIT(5)
+#define LOONGSON_CFG2_LBT3	BIT(6)
+#define LOONGSON_CFG2_LBTMMU	BIT(7)
+#define LOONGSON_CFG2_LPMP	BIT(8)
+#define LOONGSON_CFG2_LPMPREV	GENMASK(11, 9)
+#define LOONGSON_CFG2_LAMO	BIT(12)
+#define LOONGSON_CFG2_LPIXU	BIT(13)
+#define LOONGSON_CFG2_LPIXUN	BIT(14)
+#define LOONGSON_CFG2_LZVP	BIT(15)
+#define LOONGSON_CFG2_LZVREV	GENMASK(18, 16)
+#define LOONGSON_CFG2_LGFTP	BIT(19)
+#define LOONGSON_CFG2_LGFTPREV	GENMASK(22, 20)
+#define LOONGSON_CFG2_LLFTP	BIT(23)
+#define LOONGSON_CFG2_LLFTPREV	GENMASK(26, 24)
+#define LOONGSON_CFG2_LCSRP	BIT(27)
+#define LOONGSON_CFG2_LDISBLIKELY	BIT(28)
+
+#define LOONGSON_CFG3 0x3
+#define LOONGSON_CFG3_LCAMP	BIT(0)
+#define LOONGSON_CFG3_LCAMREV	GENMASK(3, 1)
+#define LOONGSON_CFG3_LCAMNUM	GENMASK(11, 4)
+#define LOONGSON_CFG3_LCAMKW	GENMASK(19, 12)
+#define LOONGSON_CFG3_LCAMVW	GENMASK(27, 20)
+
+#define LOONGSON_CFG4 0x4
+#define LOONGSON_CFG4_CCFREQ	GENMASK(31, 0)
+
+#define LOONGSON_CFG5 0x5
+#define LOONGSON_CFG5_CFM	GENMASK(15, 0)
+#define LOONGSON_CFG5_CFD	GENMASK(31, 16)
+
+#define LOONGSON_CFG6 0x6
+
+#define LOONGSON_CFG7 0x7
+#define LOONGSON_CFG7_GCCAEQRP	BIT(0)
+#define LOONGSON_CFG7_UCAWINP	BIT(1)
+
+static inline bool cpu_has_csr(void)
+{
+	if (cpu_has_cfg())
+		return (read_cpucfg(LOONGSON_CFG2) & LOONGSON_CFG2_LCSRP);
+
+	return false;
+}
+
+static inline u32 csr_readl(u32 reg)
+{
+	u32 __res;
+
+	/* RDCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r __res,%0\n\t"
+		"parse_r reg,%1\n\t"
+		".insn \n\t"
+		".word (0xc8000118 | (reg << 21) | (__res << 11))\n\t"
+		:"=r"(__res)
+		:"r"(reg)
+		:
+		);
+	return __res;
+}
+
+static inline u64 csr_readq(u32 reg)
+{
+	u64 __res;
+
+	/* DWRCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r __res,%0\n\t"
+		"parse_r reg,%1\n\t"
+		".insn \n\t"
+		".word (0xc8020118 | (reg << 21) | (__res << 11))\n\t"
+		:"=r"(__res)
+		:"r"(reg)
+		:
+		);
+	return __res;
+}
+
+static inline void csr_writel(u32 val, u32 reg)
+{
+	/* WRCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r reg,%0\n\t"
+		"parse_r val,%1\n\t"
+		".insn \n\t"
+		".word (0xc8010118 | (reg << 21) | (val << 11))\n\t"
+		:
+		:"r"(reg),"r"(val)
+		:
+		);
+}
+
+static inline void csr_writeq(u64 val, u32 reg)
+{
+	/* DWRCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r reg,%0\n\t"
+		"parse_r val,%1\n\t"
+		".insn \n\t"
+		".word (0xc8030118 | (reg << 21) | (val << 11))\n\t"
+		:
+		:"r"(reg),"r"(val)
+		:
+		);
+}
+
+/* Public CSR Register can also be accessed with regular addresses */
+#define CSR_PUBLIC_MMIO_BASE 0x1fe00000
+
+#define MMIO_CSR(x)		(void *)TO_UNCAC(CSR_PUBLIC_MMIO_BASE + x)
+
+#define LOONGSON_CSR_FEATURES	0x8
+#define LOONGSON_CSRF_TEMP	BIT(0)
+#define LOONGSON_CSRF_NODECNT	BIT(1)
+#define LOONGSON_CSRF_MSI	BIT(2)
+#define LOONGSON_CSRF_EXTIOI	BIT(3)
+#define LOONGSON_CSRF_IPI	BIT(4)
+#define LOONGSON_CSRF_FREQ	BIT(5)
+
+#define LOONGSON_CSR_VENDOR	0x10 /* Vendor name string, should be "Loongson" */
+#define LOONGSON_CSR_CPUNAME	0x20 /* Processor name string */
+#define LOONGSON_CSR_NODECNT	0x408
+#define LOONGSON_CSR_CPUTEMP	0x428
+
+/* PerCore CSR, only accessable by local cores */
+#define LOONGSON_CSR_IPI_STATUS	0x1000
+#define LOONGSON_CSR_IPI_EN	0x1004
+#define LOONGSON_CSR_IPI_SET	0x1008
+#define LOONGSON_CSR_IPI_CLEAR	0x100c
+#define LOONGSON_CSR_IPI_SEND	0x1040
+#define CSR_IPI_SEND_IP_SHIFT	0
+#define CSR_IPI_SEND_CPU_SHIFT	16
+#define CSR_IPI_SEND_BLOCK	BIT(31)
+
+static inline u64 drdtime(void)
+{
+	int rID = 0;
+	u64 val = 0;
+
+	__asm__ __volatile__(
+		"parse_r rID,%0\n\t"
+		"parse_r val,%1\n\t"
+		".insn \n\t"
+		".word (0xc8090118 | (rID << 21) | (val << 11))\n\t"
+		:"=r"(rID),"=r"(val)
+		:
+		);
+	return val;
+}
+
+#endif
diff --git a/arch/mips/include/asm/mach-loongson64/machine.h b/arch/mips/include/asm/mach-loongson64/machine.h
deleted file mode 100644
index 8ef7ea94a26d..000000000000
--- a/arch/mips/include/asm/mach-loongson64/machine.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Wu Zhangjin <wuzhangjin@gmail.com>
- */
-
-#ifndef __ASM_MACH_LOONGSON64_MACHINE_H
-#define __ASM_MACH_LOONGSON64_MACHINE_H
-
-#ifdef CONFIG_LEMOTE_FULOONG2E
-
-#define LOONGSON_MACHTYPE MACH_LEMOTE_FL2E
-
-#endif
-
-/* use fuloong2f as the default machine of LEMOTE_MACH2F */
-#ifdef CONFIG_LEMOTE_MACH2F
-
-#define LOONGSON_MACHTYPE MACH_LEMOTE_FL2F
-
-#endif
-
-#ifdef CONFIG_LOONGSON_MACH3X
-
-#define LOONGSON_MACHTYPE MACH_LOONGSON_GENERIC
-
-#endif /* CONFIG_LOONGSON_MACH3X */
-
-#endif /* __ASM_MACH_LOONGSON64_MACHINE_H */
diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h
index 62073d60739f..3a25dbd3b3e9 100644
--- a/arch/mips/include/asm/mach-loongson64/mmzone.h
+++ b/arch/mips/include/asm/mach-loongson64/mmzone.h
@@ -6,8 +6,8 @@
  *          Huacai Chen, chenhc@lemote.com
  *          Xiaofu Meng, Shuangshuang Zhang
  */
-#ifndef _ASM_MACH_MMZONE_H
-#define _ASM_MACH_MMZONE_H
+#ifndef _ASM_MACH_LOONGSON64_MMZONE_H
+#define _ASM_MACH_LOONGSON64_MMZONE_H
 
 #include <boot_param.h>
 #define NODE_ADDRSPACE_SHIFT 44
@@ -19,30 +19,9 @@
 #define pa_to_nid(addr)  (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT)
 #define nid_to_addrbase(nid) ((nid) << NODE_ADDRSPACE_SHIFT)
 
-#define LEVELS_PER_SLICE 128
+extern struct pglist_data *__node_data[];
 
-struct slice_data {
-	unsigned long irq_enable_mask[2];
-	int level_to_irq[LEVELS_PER_SLICE];
-};
-
-struct hub_data {
-	cpumask_t	h_cpus;
-	unsigned long slice_map;
-	unsigned long irq_alloc_mask[2];
-	struct slice_data slice[2];
-};
-
-struct node_data {
-	struct pglist_data pglist;
-	struct hub_data hub;
-	cpumask_t cpumask;
-};
-
-extern struct node_data *__node_data[];
-
-#define NODE_DATA(n)		(&__node_data[(n)]->pglist)
-#define hub_data(n)		(&__node_data[(n)]->hub)
+#define NODE_DATA(n)		(__node_data[n])
 
 extern void setup_zero_pages(void);
 extern void __init prom_init_numa_memory(void);
diff --git a/arch/mips/include/asm/mach-loongson64/pci.h b/arch/mips/include/asm/mach-loongson64/pci.h
index 97f807fb2117..8b59d64a23e8 100644
--- a/arch/mips/include/asm/mach-loongson64/pci.h
+++ b/arch/mips/include/asm/mach-loongson64/pci.h
@@ -12,39 +12,8 @@ extern struct pci_ops loongson_pci_ops;
 /* this is an offset from mips_io_port_base */
 #define LOONGSON_PCI_IO_START	0x00004000UL
 
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-
-/*
- * we use address window2 to map cpu address space to pci space
- * window2: cpu [1G, 2G] -> pci [1G, 2G]
- * why not use window 0 & 1? because they are used by cpu when booting.
- * window0: cpu [0, 256M] -> ddr [0, 256M]
- * window1: cpu [256M, 512M] -> pci [256M, 512M]
- */
-
-/* the smallest LOONGSON_CPU_MEM_SRC can be 512M */
-#define LOONGSON_CPU_MEM_SRC	0x40000000ul		/* 1G */
-#define LOONGSON_PCI_MEM_DST	LOONGSON_CPU_MEM_SRC
-
-#define LOONGSON_PCI_MEM_START	LOONGSON_PCI_MEM_DST
-#define LOONGSON_PCI_MEM_END	(0x80000000ul-1)	/* 2G */
-
-#define MMAP_CPUTOPCI_SIZE	(LOONGSON_PCI_MEM_END - \
-					LOONGSON_PCI_MEM_START + 1)
-
-#else	/* loongson2f/32bit & loongson2e */
-
-/* this pci memory space is mapped by pcimap in pci.c */
-#ifdef CONFIG_CPU_LOONGSON3
 #define LOONGSON_PCI_MEM_START	0x40000000UL
 #define LOONGSON_PCI_MEM_END	0x7effffffUL
-#else
-#define LOONGSON_PCI_MEM_START	LOONGSON_PCILO1_BASE
-#define LOONGSON_PCI_MEM_END	(LOONGSON_PCILO1_BASE + 0x04000000 * 2)
-#endif
-/* this is an offset from mips_io_port_base */
-#define LOONGSON_PCI_IO_START	0x00004000UL
 
-#endif	/* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
 
 #endif /* !__ASM_MACH_LOONGSON64_PCI_H_ */
diff --git a/arch/mips/include/asm/mach-loongson64/topology.h b/arch/mips/include/asm/mach-loongson64/topology.h
index 7ff819ab308a..3414a1fd1783 100644
--- a/arch/mips/include/asm/mach-loongson64/topology.h
+++ b/arch/mips/include/asm/mach-loongson64/topology.h
@@ -5,7 +5,9 @@
 #ifdef CONFIG_NUMA
 
 #define cpu_to_node(cpu)	(cpu_logical_map(cpu) >> 2)
-#define cpumask_of_node(node)	(&__node_data[(node)]->cpumask)
+
+extern cpumask_t __node_cpumask[];
+#define cpumask_of_node(node)	(&__node_cpumask[node])
 
 struct pci_bus;
 extern int pcibus_to_node(struct pci_bus *);
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index bdbdc19a2b8f..0d5a30988697 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -689,6 +689,9 @@
 #define MIPS_CONF7_IAR		(_ULCAST_(1) << 10)
 #define MIPS_CONF7_AR		(_ULCAST_(1) << 16)
 
+/* Ingenic HPTLB off bits */
+#define XBURST_PAGECTRL_HPTLB_DIS 0xa9000000
+
 /* Ingenic Config7 bits */
 #define MIPS_CONF7_BTB_LOOP_EN	(_ULCAST_(1) << 4)
 
@@ -1971,6 +1974,9 @@ do {									\
 #define read_c0_brcm_sleepcount()	__read_32bit_c0_register($22, 7)
 #define write_c0_brcm_sleepcount(val)	__write_32bit_c0_register($22, 7, val)
 
+/* Ingenic page ctrl register */
+#define write_c0_page_ctrl(val)	__write_32bit_c0_register($5, 4, val)
+
 /*
  * Macros to access the guest system control coprocessor
  */
diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index ed70994fbbec..9846047b3d3d 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h
@@ -119,12 +119,12 @@ search_module_dbetables(unsigned long addr)
 #define MODULE_PROC_FAMILY "RM7000 "
 #elif defined CONFIG_CPU_SB1
 #define MODULE_PROC_FAMILY "SB1 "
-#elif defined CONFIG_CPU_LOONGSON1
-#define MODULE_PROC_FAMILY "LOONGSON1 "
-#elif defined CONFIG_CPU_LOONGSON2
-#define MODULE_PROC_FAMILY "LOONGSON2 "
-#elif defined CONFIG_CPU_LOONGSON3
-#define MODULE_PROC_FAMILY "LOONGSON3 "
+#elif defined CONFIG_CPU_LOONGSON32
+#define MODULE_PROC_FAMILY "LOONGSON32 "
+#elif defined CONFIG_CPU_LOONGSON2EF
+#define MODULE_PROC_FAMILY "LOONGSON2EF "
+#elif defined CONFIG_CPU_LOONGSON64
+#define MODULE_PROC_FAMILY "LOONGSON64 "
 #elif defined CONFIG_CPU_CAVIUM_OCTEON
 #define MODULE_PROC_FAMILY "OCTEON "
 #elif defined CONFIG_CPU_XLR
diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h
index a92cd30b48c9..3bc630ff9ad4 100644
--- a/arch/mips/include/asm/pci/bridge.h
+++ b/arch/mips/include/asm/pci/bridge.h
@@ -807,6 +807,7 @@ struct bridge_controller {
 	unsigned long		intr_addr;
 	struct irq_domain	*domain;
 	unsigned int		pci_int[8];
+	u32			ioc3_sid[8];
 	nasid_t			nasid;
 };
 
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 166842337eb2..fa77cb71f303 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -96,9 +96,9 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 	free_pages((unsigned long)pud, PUD_ORDER);
 }
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 {
-	set_pgd(pgd, __pgd((unsigned long)pud));
+	set_p4d(p4d, __p4d((unsigned long)pud));
 }
 
 #define __pud_free_tlb(tlb, x, addr)	pud_free((tlb)->mm, x)
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index ba967148b016..1945c8970141 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -16,7 +16,6 @@
 #include <asm/cachectl.h>
 #include <asm/fixmap.h>
 
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifdef CONFIG_HIGHMEM
@@ -196,14 +195,11 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 
 #define pte_page(x)		pfn_to_page(pte_pfn(x))
 
-#define __pgd_offset(address)	pgd_index(address)
-#define __pud_offset(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#define __pmd_offset(address)	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
 #define pgd_index(address)	(((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pud_index(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 #define pmd_index(address)	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 
 /* to find an entry in a page-table-directory */
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 93a9dce31f25..f92716cfa4f4 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -17,11 +17,12 @@
 #include <asm/cachectl.h>
 #include <asm/fixmap.h>
 
-#define __ARCH_USE_5LEVEL_HACK
-#if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
+#if CONFIG_PGTABLE_LEVELS == 2
 #include <asm-generic/pgtable-nopmd.h>
-#elif !(defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_MIPS_VA_BITS_48))
+#elif CONFIG_PGTABLE_LEVELS == 3
 #include <asm-generic/pgtable-nopud.h>
+#else
+#include <asm-generic/pgtable-nop4d.h>
 #endif
 
 /*
@@ -186,44 +187,49 @@ extern pud_t invalid_pud_table[PTRS_PER_PUD];
 /*
  * Empty pgd entries point to the invalid_pud_table.
  */
-static inline int pgd_none(pgd_t pgd)
+static inline int p4d_none(p4d_t p4d)
 {
-	return pgd_val(pgd) == (unsigned long)invalid_pud_table;
+	return p4d_val(p4d) == (unsigned long)invalid_pud_table;
 }
 
-static inline int pgd_bad(pgd_t pgd)
+static inline int p4d_bad(p4d_t p4d)
 {
-	if (unlikely(pgd_val(pgd) & ~PAGE_MASK))
+	if (unlikely(p4d_val(p4d) & ~PAGE_MASK))
 		return 1;
 
 	return 0;
 }
 
-static inline int pgd_present(pgd_t pgd)
+static inline int p4d_present(p4d_t p4d)
 {
-	return pgd_val(pgd) != (unsigned long)invalid_pud_table;
+	return p4d_val(p4d) != (unsigned long)invalid_pud_table;
 }
 
-static inline void pgd_clear(pgd_t *pgdp)
+static inline void p4d_clear(p4d_t *p4dp)
 {
-	pgd_val(*pgdp) = (unsigned long)invalid_pud_table;
+	p4d_val(*p4dp) = (unsigned long)invalid_pud_table;
 }
 
 #define pud_index(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
 
-static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+static inline unsigned long p4d_page_vaddr(p4d_t p4d)
 {
-	return pgd_val(pgd);
+	return p4d_val(p4d);
 }
 
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+#define p4d_phys(p4d)		virt_to_phys((void *)p4d_val(p4d))
+#define p4d_page(p4d)		(pfn_to_page(p4d_phys(p4d) >> PAGE_SHIFT))
+
+#define p4d_index(address)	(((address) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 {
-	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+	return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
 }
 
-static inline void set_pgd(pgd_t *pgd, pgd_t pgdval)
+static inline void set_p4d(p4d_t *p4d, p4d_t p4dval)
 {
-	*pgd = pgdval;
+	*p4d = p4dval;
 }
 
 #endif
@@ -314,10 +320,6 @@ static inline void pud_clear(pud_t *pudp)
 #define pfn_pmd(pfn, prot)	__pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
 #endif
 
-#define __pgd_offset(address)	pgd_index(address)
-#define __pud_offset(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#define __pmd_offset(address)	pmd_index(address)
-
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index f85bd5b15f51..91b89aab1787 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -644,17 +644,6 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 #include <asm-generic/pgtable.h>
 
 /*
- * uncached accelerated TLB map for video memory access
- */
-#ifdef CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED
-#define __HAVE_PHYS_MEM_ACCESS_PROT
-
-struct file;
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-		unsigned long size, pgprot_t vma_prot);
-#endif
-
-/*
  * We provide our own get_unmapped area to cope with the virtual aliasing
  * constraints placed on us by the cache architecture.
  */
diff --git a/arch/mips/include/asm/pmon.h b/arch/mips/include/asm/pmon.h
deleted file mode 100644
index 6ad519189ce2..000000000000
--- a/arch/mips/include/asm/pmon.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2004 by Ralf Baechle
- *
- * The cpustart method is a PMC-Sierra's function to start the secondary CPU.
- * Stock PMON 2000 has the smpfork, semlock and semunlock methods instead.
- */
-#ifndef _ASM_PMON_H
-#define _ASM_PMON_H
-
-struct callvectors {
-	int	(*open) (char*, int, int);
-	int	(*close) (int);
-	int	(*read) (int, void*, int);
-	int	(*write) (int, void*, int);
-	off_t	(*lseek) (int, off_t, int);
-	int	(*printf) (const char*, ...);
-	void	(*cacheflush) (void);
-	char*	(*gets) (char*);
-	union {
-		int	(*smpfork) (unsigned long cp, char *sp);
-		int	(*cpustart) (long, void (*)(void), void *, long);
-	} _s;
-	int	(*semlock) (int sem);
-	void	(*semunlock) (int sem);
-};
-
-extern struct callvectors *debug_vectors;
-
-#define pmon_open(name, flags, mode)	debug_vectors->open(name, flage, mode)
-#define pmon_close(fd)			debug_vectors->close(fd)
-#define pmon_read(fd, buf, count)	debug_vectors->read(fd, buf, count)
-#define pmon_write(fd, buf, count)	debug_vectors->write(fd, buf, count)
-#define pmon_lseek(fd, off, whence)	debug_vectors->lseek(fd, off, whence)
-#define pmon_printf(fmt...)		debug_vectors->printf(fmt)
-#define pmon_cacheflush()		debug_vectors->cacheflush()
-#define pmon_gets(s)			debug_vectors->gets(s)
-#define pmon_cpustart(n, f, sp, gp)	debug_vectors->_s.cpustart(n, f, sp, gp)
-#define pmon_smpfork(cp, sp)		debug_vectors->_s.smpfork(cp, sp)
-#define pmon_semlock(sem)		debug_vectors->semlock(sem)
-#define pmon_semunlock(sem)		debug_vectors->semunlock(sem)
-
-#endif /* _ASM_PMON_H */
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index fba18d4a9190..7619ad319400 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -385,7 +385,7 @@ unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[29])
 #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status)
 
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
 /*
  * Loongson-3's SFB (Store-Fill-Buffer) may buffer writes indefinitely when a
  * tight read loop is executed, because reads take priority over writes & the
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 7f4a32d3345a..15ab16f99f28 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -15,12 +15,14 @@
 #include <linux/stringify.h>
 
 #include <asm/asm.h>
+#include <asm/asm-eva.h>
 #include <asm/cacheops.h>
 #include <asm/compiler.h>
 #include <asm/cpu-features.h>
 #include <asm/cpu-type.h>
 #include <asm/mipsmtregs.h>
 #include <asm/mmzone.h>
+#include <asm/unroll.h>
 #include <linux/uaccess.h> /* for uaccess_kernel() */
 
 extern void (*r4k_blast_dcache)(void);
@@ -39,16 +41,19 @@ extern void (*r4k_blast_icache)(void);
  */
 #define INDEX_BASE	CKSEG0
 
-#define cache_op(op,addr)						\
+#define _cache_op(insn, op, addr)					\
 	__asm__ __volatile__(						\
 	"	.set	push					\n"	\
 	"	.set	noreorder				\n"	\
 	"	.set "MIPS_ISA_ARCH_LEVEL"			\n"	\
-	"	cache	%0, %1					\n"	\
+	"	" insn("%0", "%1") "				\n"	\
 	"	.set	pop					\n"	\
 	:								\
 	: "i" (op), "R" (*(unsigned char *)(addr)))
 
+#define cache_op(op, addr)						\
+	_cache_op(kernel_cache, op, addr)
+
 static inline void flush_icache_line_indexed(unsigned long addr)
 {
 	cache_op(Index_Invalidate_I, addr);
@@ -67,7 +72,7 @@ static inline void flush_scache_line_indexed(unsigned long addr)
 static inline void flush_icache_line(unsigned long addr)
 {
 	switch (boot_cpu_type()) {
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		cache_op(Hit_Invalidate_I_Loongson2, addr);
 		break;
 
@@ -149,7 +154,7 @@ static inline void flush_scache_line(unsigned long addr)
 static inline int protected_flush_icache_line(unsigned long addr)
 {
 	switch (boot_cpu_type()) {
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		return protected_cache_op(Hit_Invalidate_I_Loongson2, addr);
 
 	default:
@@ -193,338 +198,10 @@ static inline void invalidate_tcache_page(unsigned long addr)
 	cache_op(Page_Invalidate_T, addr);
 }
 
-#ifndef CONFIG_CPU_MIPSR6
-#define cache16_unroll32(base,op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips3					\n"	\
-	"	cache %1, 0x000(%0); cache %1, 0x010(%0)	\n"	\
-	"	cache %1, 0x020(%0); cache %1, 0x030(%0)	\n"	\
-	"	cache %1, 0x040(%0); cache %1, 0x050(%0)	\n"	\
-	"	cache %1, 0x060(%0); cache %1, 0x070(%0)	\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x090(%0)	\n"	\
-	"	cache %1, 0x0a0(%0); cache %1, 0x0b0(%0)	\n"	\
-	"	cache %1, 0x0c0(%0); cache %1, 0x0d0(%0)	\n"	\
-	"	cache %1, 0x0e0(%0); cache %1, 0x0f0(%0)	\n"	\
-	"	cache %1, 0x100(%0); cache %1, 0x110(%0)	\n"	\
-	"	cache %1, 0x120(%0); cache %1, 0x130(%0)	\n"	\
-	"	cache %1, 0x140(%0); cache %1, 0x150(%0)	\n"	\
-	"	cache %1, 0x160(%0); cache %1, 0x170(%0)	\n"	\
-	"	cache %1, 0x180(%0); cache %1, 0x190(%0)	\n"	\
-	"	cache %1, 0x1a0(%0); cache %1, 0x1b0(%0)	\n"	\
-	"	cache %1, 0x1c0(%0); cache %1, 0x1d0(%0)	\n"	\
-	"	cache %1, 0x1e0(%0); cache %1, 0x1f0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#define cache32_unroll32(base,op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips3					\n"	\
-	"	cache %1, 0x000(%0); cache %1, 0x020(%0)	\n"	\
-	"	cache %1, 0x040(%0); cache %1, 0x060(%0)	\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x0a0(%0)	\n"	\
-	"	cache %1, 0x0c0(%0); cache %1, 0x0e0(%0)	\n"	\
-	"	cache %1, 0x100(%0); cache %1, 0x120(%0)	\n"	\
-	"	cache %1, 0x140(%0); cache %1, 0x160(%0)	\n"	\
-	"	cache %1, 0x180(%0); cache %1, 0x1a0(%0)	\n"	\
-	"	cache %1, 0x1c0(%0); cache %1, 0x1e0(%0)	\n"	\
-	"	cache %1, 0x200(%0); cache %1, 0x220(%0)	\n"	\
-	"	cache %1, 0x240(%0); cache %1, 0x260(%0)	\n"	\
-	"	cache %1, 0x280(%0); cache %1, 0x2a0(%0)	\n"	\
-	"	cache %1, 0x2c0(%0); cache %1, 0x2e0(%0)	\n"	\
-	"	cache %1, 0x300(%0); cache %1, 0x320(%0)	\n"	\
-	"	cache %1, 0x340(%0); cache %1, 0x360(%0)	\n"	\
-	"	cache %1, 0x380(%0); cache %1, 0x3a0(%0)	\n"	\
-	"	cache %1, 0x3c0(%0); cache %1, 0x3e0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#define cache64_unroll32(base,op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips3					\n"	\
-	"	cache %1, 0x000(%0); cache %1, 0x040(%0)	\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x0c0(%0)	\n"	\
-	"	cache %1, 0x100(%0); cache %1, 0x140(%0)	\n"	\
-	"	cache %1, 0x180(%0); cache %1, 0x1c0(%0)	\n"	\
-	"	cache %1, 0x200(%0); cache %1, 0x240(%0)	\n"	\
-	"	cache %1, 0x280(%0); cache %1, 0x2c0(%0)	\n"	\
-	"	cache %1, 0x300(%0); cache %1, 0x340(%0)	\n"	\
-	"	cache %1, 0x380(%0); cache %1, 0x3c0(%0)	\n"	\
-	"	cache %1, 0x400(%0); cache %1, 0x440(%0)	\n"	\
-	"	cache %1, 0x480(%0); cache %1, 0x4c0(%0)	\n"	\
-	"	cache %1, 0x500(%0); cache %1, 0x540(%0)	\n"	\
-	"	cache %1, 0x580(%0); cache %1, 0x5c0(%0)	\n"	\
-	"	cache %1, 0x600(%0); cache %1, 0x640(%0)	\n"	\
-	"	cache %1, 0x680(%0); cache %1, 0x6c0(%0)	\n"	\
-	"	cache %1, 0x700(%0); cache %1, 0x740(%0)	\n"	\
-	"	cache %1, 0x780(%0); cache %1, 0x7c0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#define cache128_unroll32(base,op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips3					\n"	\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)	\n"	\
-	"	cache %1, 0x100(%0); cache %1, 0x180(%0)	\n"	\
-	"	cache %1, 0x200(%0); cache %1, 0x280(%0)	\n"	\
-	"	cache %1, 0x300(%0); cache %1, 0x380(%0)	\n"	\
-	"	cache %1, 0x400(%0); cache %1, 0x480(%0)	\n"	\
-	"	cache %1, 0x500(%0); cache %1, 0x580(%0)	\n"	\
-	"	cache %1, 0x600(%0); cache %1, 0x680(%0)	\n"	\
-	"	cache %1, 0x700(%0); cache %1, 0x780(%0)	\n"	\
-	"	cache %1, 0x800(%0); cache %1, 0x880(%0)	\n"	\
-	"	cache %1, 0x900(%0); cache %1, 0x980(%0)	\n"	\
-	"	cache %1, 0xa00(%0); cache %1, 0xa80(%0)	\n"	\
-	"	cache %1, 0xb00(%0); cache %1, 0xb80(%0)	\n"	\
-	"	cache %1, 0xc00(%0); cache %1, 0xc80(%0)	\n"	\
-	"	cache %1, 0xd00(%0); cache %1, 0xd80(%0)	\n"	\
-	"	cache %1, 0xe00(%0); cache %1, 0xe80(%0)	\n"	\
-	"	cache %1, 0xf00(%0); cache %1, 0xf80(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#else
-/*
- * MIPS R6 changed the cache opcode and moved to a 8-bit offset field.
- * This means we now need to increment the base register before we flush
- * more cache lines
- */
-#define cache16_unroll32(base,op)				\
-	__asm__ __volatile__(					\
-	"	.set push\n"					\
-	"	.set noreorder\n"				\
-	"	.set mips64r6\n"				\
-	"	.set noat\n"					\
-	"	cache %1, 0x000(%0); cache %1, 0x010(%0)\n"	\
-	"	cache %1, 0x020(%0); cache %1, 0x030(%0)\n"	\
-	"	cache %1, 0x040(%0); cache %1, 0x050(%0)\n"	\
-	"	cache %1, 0x060(%0); cache %1, 0x070(%0)\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x090(%0)\n"	\
-	"	cache %1, 0x0a0(%0); cache %1, 0x0b0(%0)\n"	\
-	"	cache %1, 0x0c0(%0); cache %1, 0x0d0(%0)\n"	\
-	"	cache %1, 0x0e0(%0); cache %1, 0x0f0(%0)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100	\n"	\
-	"	cache %1, 0x000($1); cache %1, 0x010($1)\n"	\
-	"	cache %1, 0x020($1); cache %1, 0x030($1)\n"	\
-	"	cache %1, 0x040($1); cache %1, 0x050($1)\n"	\
-	"	cache %1, 0x060($1); cache %1, 0x070($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x090($1)\n"	\
-	"	cache %1, 0x0a0($1); cache %1, 0x0b0($1)\n"	\
-	"	cache %1, 0x0c0($1); cache %1, 0x0d0($1)\n"	\
-	"	cache %1, 0x0e0($1); cache %1, 0x0f0($1)\n"	\
-	"	.set pop\n"					\
-		:						\
-		: "r" (base),					\
-		  "i" (op));
-
-#define cache32_unroll32(base,op)				\
-	__asm__ __volatile__(					\
-	"	.set push\n"					\
-	"	.set noreorder\n"				\
-	"	.set mips64r6\n"				\
-	"	.set noat\n"					\
-	"	cache %1, 0x000(%0); cache %1, 0x020(%0)\n"	\
-	"	cache %1, 0x040(%0); cache %1, 0x060(%0)\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x0a0(%0)\n"	\
-	"	cache %1, 0x0c0(%0); cache %1, 0x0e0(%0)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
-	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
-	"	cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
-	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
-	"	cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100\n"	\
-	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
-	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
-	"	cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n"	\
-	"	.set pop\n"					\
-		:						\
-		: "r" (base),					\
-		  "i" (op));
-
-#define cache64_unroll32(base,op)				\
-	__asm__ __volatile__(					\
-	"	.set push\n"					\
-	"	.set noreorder\n"				\
-	"	.set mips64r6\n"				\
-	"	.set noat\n"					\
-	"	cache %1, 0x000(%0); cache %1, 0x040(%0)\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x0c0(%0)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	.set pop\n"					\
-		:						\
-		: "r" (base),					\
-		  "i" (op));
-
-#define cache128_unroll32(base,op)				\
-	__asm__ __volatile__(					\
-	"	.set push\n"					\
-	"	.set noreorder\n"				\
-	"	.set mips64r6\n"				\
-	"	.set noat\n"					\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	.set pop\n"					\
-		:						\
-		: "r" (base),					\
-		  "i" (op));
-#endif /* CONFIG_CPU_MIPSR6 */
-
-/*
- * Perform the cache operation specified by op using a user mode virtual
- * address while in kernel mode.
- */
-#define cache16_unroll32_user(base,op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips0					\n"	\
-	"	.set eva					\n"	\
-	"	cachee %1, 0x000(%0); cachee %1, 0x010(%0)	\n"	\
-	"	cachee %1, 0x020(%0); cachee %1, 0x030(%0)	\n"	\
-	"	cachee %1, 0x040(%0); cachee %1, 0x050(%0)	\n"	\
-	"	cachee %1, 0x060(%0); cachee %1, 0x070(%0)	\n"	\
-	"	cachee %1, 0x080(%0); cachee %1, 0x090(%0)	\n"	\
-	"	cachee %1, 0x0a0(%0); cachee %1, 0x0b0(%0)	\n"	\
-	"	cachee %1, 0x0c0(%0); cachee %1, 0x0d0(%0)	\n"	\
-	"	cachee %1, 0x0e0(%0); cachee %1, 0x0f0(%0)	\n"	\
-	"	cachee %1, 0x100(%0); cachee %1, 0x110(%0)	\n"	\
-	"	cachee %1, 0x120(%0); cachee %1, 0x130(%0)	\n"	\
-	"	cachee %1, 0x140(%0); cachee %1, 0x150(%0)	\n"	\
-	"	cachee %1, 0x160(%0); cachee %1, 0x170(%0)	\n"	\
-	"	cachee %1, 0x180(%0); cachee %1, 0x190(%0)	\n"	\
-	"	cachee %1, 0x1a0(%0); cachee %1, 0x1b0(%0)	\n"	\
-	"	cachee %1, 0x1c0(%0); cachee %1, 0x1d0(%0)	\n"	\
-	"	cachee %1, 0x1e0(%0); cachee %1, 0x1f0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#define cache32_unroll32_user(base, op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips0					\n"	\
-	"	.set eva					\n"	\
-	"	cachee %1, 0x000(%0); cachee %1, 0x020(%0)	\n"	\
-	"	cachee %1, 0x040(%0); cachee %1, 0x060(%0)	\n"	\
-	"	cachee %1, 0x080(%0); cachee %1, 0x0a0(%0)	\n"	\
-	"	cachee %1, 0x0c0(%0); cachee %1, 0x0e0(%0)	\n"	\
-	"	cachee %1, 0x100(%0); cachee %1, 0x120(%0)	\n"	\
-	"	cachee %1, 0x140(%0); cachee %1, 0x160(%0)	\n"	\
-	"	cachee %1, 0x180(%0); cachee %1, 0x1a0(%0)	\n"	\
-	"	cachee %1, 0x1c0(%0); cachee %1, 0x1e0(%0)	\n"	\
-	"	cachee %1, 0x200(%0); cachee %1, 0x220(%0)	\n"	\
-	"	cachee %1, 0x240(%0); cachee %1, 0x260(%0)	\n"	\
-	"	cachee %1, 0x280(%0); cachee %1, 0x2a0(%0)	\n"	\
-	"	cachee %1, 0x2c0(%0); cachee %1, 0x2e0(%0)	\n"	\
-	"	cachee %1, 0x300(%0); cachee %1, 0x320(%0)	\n"	\
-	"	cachee %1, 0x340(%0); cachee %1, 0x360(%0)	\n"	\
-	"	cachee %1, 0x380(%0); cachee %1, 0x3a0(%0)	\n"	\
-	"	cachee %1, 0x3c0(%0); cachee %1, 0x3e0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#define cache64_unroll32_user(base, op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips0					\n"	\
-	"	.set eva					\n"	\
-	"	cachee %1, 0x000(%0); cachee %1, 0x040(%0)	\n"	\
-	"	cachee %1, 0x080(%0); cachee %1, 0x0c0(%0)	\n"	\
-	"	cachee %1, 0x100(%0); cachee %1, 0x140(%0)	\n"	\
-	"	cachee %1, 0x180(%0); cachee %1, 0x1c0(%0)	\n"	\
-	"	cachee %1, 0x200(%0); cachee %1, 0x240(%0)	\n"	\
-	"	cachee %1, 0x280(%0); cachee %1, 0x2c0(%0)	\n"	\
-	"	cachee %1, 0x300(%0); cachee %1, 0x340(%0)	\n"	\
-	"	cachee %1, 0x380(%0); cachee %1, 0x3c0(%0)	\n"	\
-	"	cachee %1, 0x400(%0); cachee %1, 0x440(%0)	\n"	\
-	"	cachee %1, 0x480(%0); cachee %1, 0x4c0(%0)	\n"	\
-	"	cachee %1, 0x500(%0); cachee %1, 0x540(%0)	\n"	\
-	"	cachee %1, 0x580(%0); cachee %1, 0x5c0(%0)	\n"	\
-	"	cachee %1, 0x600(%0); cachee %1, 0x640(%0)	\n"	\
-	"	cachee %1, 0x680(%0); cachee %1, 0x6c0(%0)	\n"	\
-	"	cachee %1, 0x700(%0); cachee %1, 0x740(%0)	\n"	\
-	"	cachee %1, 0x780(%0); cachee %1, 0x7c0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
+#define cache_unroll(times, insn, op, addr, lsize) do {			\
+	int i = 0;							\
+	unroll(times, _cache_op, insn, op, (addr) + (i++ * (lsize)));	\
+} while (0)
 
 /* build blast_xxx, blast_xxx_page, blast_xxx_page_indexed */
 #define __BUILD_BLAST_CACHE(pfx, desc, indexop, hitop, lsize, extra)	\
@@ -539,7 +216,8 @@ static inline void extra##blast_##pfx##cache##lsize(void)		\
 									\
 	for (ws = 0; ws < ws_end; ws += ws_inc)				\
 		for (addr = start; addr < end; addr += lsize * 32)	\
-			cache##lsize##_unroll32(addr|ws, indexop);	\
+			cache_unroll(32, kernel_cache, indexop,		\
+				     addr | ws, lsize);			\
 }									\
 									\
 static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \
@@ -548,7 +226,7 @@ static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \
 	unsigned long end = page + PAGE_SIZE;				\
 									\
 	do {								\
-		cache##lsize##_unroll32(start, hitop);			\
+		cache_unroll(32, kernel_cache, hitop, start, lsize);	\
 		start += lsize * 32;					\
 	} while (start < end);						\
 }									\
@@ -565,7 +243,8 @@ static inline void extra##blast_##pfx##cache##lsize##_page_indexed(unsigned long
 									\
 	for (ws = 0; ws < ws_end; ws += ws_inc)				\
 		for (addr = start; addr < end; addr += lsize * 32)	\
-			cache##lsize##_unroll32(addr|ws, indexop);	\
+			cache_unroll(32, kernel_cache, indexop,		\
+				     addr | ws, lsize);			\
 }
 
 __BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 16, )
@@ -596,7 +275,7 @@ static inline void blast_##pfx##cache##lsize##_user_page(unsigned long page) \
 	unsigned long end = page + PAGE_SIZE;				\
 									\
 	do {								\
-		cache##lsize##_unroll32_user(start, hitop);             \
+		cache_unroll(32, user_cache, hitop, start, lsize);	\
 		start += lsize * 32;					\
 	} while (start < end);						\
 }
@@ -688,7 +367,8 @@ static inline void blast_##pfx##cache##lsize##_node(long node)		\
 									\
 	for (ws = 0; ws < ws_end; ws += ws_inc)				\
 		for (addr = start; addr < end; addr += lsize * 32)	\
-			cache##lsize##_unroll32(addr|ws, indexop);	\
+			cache_unroll(32, kernel_cache, indexop,		\
+				     addr | ws, lsize);			\
 }
 
 __BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
diff --git a/arch/mips/include/asm/sgi/heart.h b/arch/mips/include/asm/sgi/heart.h
new file mode 100644
index 000000000000..c423221b4792
--- /dev/null
+++ b/arch/mips/include/asm/sgi/heart.h
@@ -0,0 +1,272 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HEART chip definitions
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *		 2009 Johannes Dickgreber <tanzy@gmx.de>
+ *		 2007-2015 Joshua Kinard <kumba@gentoo.org>
+ */
+#ifndef __ASM_SGI_HEART_H
+#define __ASM_SGI_HEART_H
+
+#include <linux/types.h>
+#include <linux/time.h>
+
+/*
+ * There are 8 DIMM slots on an IP30 system
+ * board, which are grouped into four banks
+ */
+#define HEART_MEMORY_BANKS	4
+
+/* HEART can support up to four CPUs */
+#define HEART_MAX_CPUS		4
+
+#define HEART_XKPHYS_BASE	((void *)(IO_BASE | 0x000000000ff00000ULL))
+
+/**
+ * struct ip30_heart_regs - struct that maps IP30 HEART registers.
+ * @mode: HEART_MODE - Purpose Unknown, machine reset called from here.
+ * @sdram_mode: HEART_SDRAM_MODE - purpose unknown.
+ * @mem_refresh: HEART_MEM_REF - purpose unknown.
+ * @mem_req_arb: HEART_MEM_REQ_ARB - purpose unknown.
+ * @mem_cfg.q: union for 64bit access to HEART_MEMCFG - 4x 64bit registers.
+ * @mem_cfg.l: union for 32bit access to HEART_MEMCFG - 8x 32bit registers.
+ * @fc_mode: HEART_FC_MODE - Purpose Unknown, possibly for GFX flow control.
+ * @fc_timer_limit: HEART_FC_TIMER_LIMIT - purpose unknown.
+ * @fc_addr: HEART_FC0_ADDR, HEART_FC1_ADDR - purpose unknown.
+ * @fc_credit_cnt: HEART_FC0_CR_CNT, HEART_FC1_CR_CNT - purpose unknown.
+ * @fc_timer: HEART_FC0_TIMER, HEART_FC1_TIMER - purpose unknown.
+ * @status: HEART_STATUS - HEART status information.
+ * @bus_err_addr: HEART_BERR_ADDR - likely contains addr of recent SIGBUS.
+ * @bus_err_misc: HEART_BERR_MISC - purpose unknown.
+ * @mem_err_addr: HEART_MEMERR_ADDR - likely contains addr of recent mem err.
+ * @mem_err_data: HEART_MEMERR_DATA - purpose unknown.
+ * @piur_acc_err: HEART_PIUR_ACC_ERR - likely for access err to HEART regs.
+ * @mlan_clock_div: HEART_MLAN_CLK_DIV - MicroLAN clock divider.
+ * @mlan_ctrl: HEART_MLAN_CTL - MicroLAN control.
+ * @__pad0: 0x0f40 bytes of padding -> next HEART register 0x01000.
+ * @undefined: Undefined/diag register, write to it triggers PIUR_ACC_ERR.
+ * @__pad1: 0xeff8 bytes of padding -> next HEART register 0x10000.
+ * @imr: HEART_IMR0 to HEART_IMR3 - per-cpu interrupt mask register.
+ * @set_isr: HEART_SET_ISR - set interrupt status register.
+ * @clear_isr: HEART_CLR_ISR - clear interrupt status register.
+ * @isr: HEART_ISR - interrupt status register (read-only).
+ * @imsr: HEART_IMSR - purpose unknown.
+ * @cause: HEART_CAUSE - HEART cause information.
+ * @__pad2: 0xffb8 bytes of padding -> next HEART register 0x20000.
+ * @count: HEART_COUNT - 52-bit counter.
+ * @__pad3: 0xfff8 bytes of padding -> next HEART register 0x30000.
+ * @compare: HEART_COMPARE - 24-bit compare.
+ * @__pad4: 0xfff8 bytes of padding -> next HEART register 0x40000.
+ * @trigger: HEART_TRIGGER - purpose unknown.
+ * @__pad5: 0xfff8 bytes of padding -> next HEART register 0x50000.
+ * @cpuid: HEART_PRID - contains CPU ID of CPU currently accessing HEART.
+ * @__pad6: 0xfff8 bytes of padding -> next HEART register 0x60000.
+ * @sync: HEART_SYNC - purpose unknown.
+ *
+ * HEART is the main system controller ASIC for IP30 system.  It incorporates
+ * a memory controller, interrupt status/cause/set/clear management, basic
+ * timer with count/compare, and other functionality.  For Linux, not all of
+ * HEART's functions are fully understood.
+ *
+ * Implementation note: All HEART registers are 64bits-wide, but the mem_cfg
+ * register only reports correct values if queried in 32bits.  Hence the need
+ * for a union.  Even though mem_cfg.l has 8 array slots, we only ever query
+ * up to 4 of those.  IP30 has 8 DIMM slots arranged into 4 banks, w/ 2 DIMMs
+ * per bank.  Each 32bit read accesses one of these banks.  Perhaps HEART was
+ * designed to address up to 8 banks (16 DIMMs)?  We may never know.
+ */
+struct ip30_heart_regs {		/* 0x0ff00000 */
+	u64 mode;			/* +  0x00000 */
+	/* Memory */
+	u64 sdram_mode;			/* +  0x00008 */
+	u64 mem_refresh;		/* +  0x00010 */
+	u64 mem_req_arb;		/* +  0x00018 */
+	union {
+		u64 q[HEART_MEMORY_BANKS];	/* readq() */
+		u32 l[HEART_MEMORY_BANKS * 2];	/* readl() */
+	} mem_cfg;			/* +  0x00020 */
+	/* Flow control (gfx?) */
+	u64 fc_mode;			/* +  0x00040 */
+	u64 fc_timer_limit;		/* +  0x00048 */
+	u64 fc_addr[2];			/* +  0x00050 */
+	u64 fc_credit_cnt[2];		/* +  0x00060 */
+	u64 fc_timer[2];		/* +  0x00070 */
+	/* Status */
+	u64 status;			/* +  0x00080 */
+	/* Bus error */
+	u64 bus_err_addr;		/* +  0x00088 */
+	u64 bus_err_misc;		/* +  0x00090 */
+	/* Memory error */
+	u64 mem_err_addr;		/* +  0x00098 */
+	u64 mem_err_data;		/* +  0x000a0 */
+	/* Misc */
+	u64 piur_acc_err;		/* +  0x000a8 */
+	u64 mlan_clock_div;		/* +  0x000b0 */
+	u64 mlan_ctrl;			/* +  0x000b8 */
+	u64 __pad0[0x01e8];		/* +  0x000c0 + 0x0f40 */
+	/* Undefined */
+	u64 undefined;			/* +  0x01000 */
+	u64 __pad1[0x1dff];		/* +  0x01008 + 0xeff8 */
+	/* Interrupts */
+	u64 imr[HEART_MAX_CPUS];	/* +  0x10000 */
+	u64 set_isr;			/* +  0x10020 */
+	u64 clear_isr;			/* +  0x10028 */
+	u64 isr;			/* +  0x10030 */
+	u64 imsr;			/* +  0x10038 */
+	u64 cause;			/* +  0x10040 */
+	u64 __pad2[0x1ff7];		/* +  0x10048 + 0xffb8 */
+	/* Timer */
+	u64 count;			/* +  0x20000 */
+	u64 __pad3[0x1fff];		/* +  0x20008 + 0xfff8 */
+	u64 compare;			/* +  0x30000 */
+	u64 __pad4[0x1fff];		/* +  0x30008 + 0xfff8 */
+	u64 trigger;			/* +  0x40000 */
+	u64 __pad5[0x1fff];		/* +  0x40008 + 0xfff8 */
+	/* Misc */
+	u64 cpuid;			/* +  0x50000 */
+	u64 __pad6[0x1fff];		/* +  0x50008 + 0xfff8 */
+	u64 sync;			/* +  0x60000 */
+};
+
+
+/* For timer-related bits. */
+#define HEART_NS_PER_CYCLE	80
+#define HEART_CYCLES_PER_SEC	(NSEC_PER_SEC / HEART_NS_PER_CYCLE)
+
+
+/*
+ * XXX: Everything below this comment will either go away or be cleaned
+ *      up to fit in better with Linux.  A lot of the bit definitions for
+ *      HEART were derived from IRIX's sys/RACER/heart.h header file.
+ */
+
+/* HEART Masks */
+#define HEART_ATK_MASK		0x0007ffffffffffff	/* HEART attack mask */
+#define HEART_ACK_ALL_MASK	0xffffffffffffffff	/* Ack everything */
+#define HEART_CLR_ALL_MASK	0x0000000000000000	/* Clear all */
+#define HEART_BR_ERR_MASK	0x7ff8000000000000	/* BRIDGE error mask */
+#define HEART_CPU0_ERR_MASK	0x8ff8000000000000	/* CPU0 error mask */
+#define HEART_CPU1_ERR_MASK	0x97f8000000000000	/* CPU1 error mask */
+#define HEART_CPU2_ERR_MASK	0xa7f8000000000000	/* CPU2 error mask */
+#define HEART_CPU3_ERR_MASK	0xc7f8000000000000	/* CPU3 error mask */
+#define HEART_ERR_MASK		0x1ff			/* HEART error mask */
+#define HEART_ERR_MASK_START	51			/* HEART error start */
+#define HEART_ERR_MASK_END	63			/* HEART error end */
+
+/* Bits in the HEART_MODE register. */
+#define HM_PROC_DISABLE_SHFT		60
+#define HM_PROC_DISABLE_MSK		(0xfUL << HM_PROC_DISABLE_SHFT)
+#define HM_PROC_DISABLE(x)		(0x1UL << (x) + HM_PROC_DISABLE_SHFT)
+#define HM_MAX_PSR			(0x7UL << 57)
+#define HM_MAX_IOSR			(0x7UL << 54)
+#define HM_MAX_PEND_IOSR		(0x7UL << 51)
+#define HM_TRIG_SRC_SEL_MSK		(0x7UL << 48)
+#define HM_TRIG_HEART_EXC		(0x0UL << 48)
+#define HM_TRIG_REG_BIT			(0x1UL << 48)
+#define HM_TRIG_SYSCLK			(0x2UL << 48)
+#define HM_TRIG_MEMCLK_2X		(0x3UL << 48)
+#define HM_TRIG_MEMCLK			(0x4UL << 48)
+#define HM_TRIG_IOCLK			(0x5UL << 48)
+#define HM_PIU_TEST_MODE		(0xfUL << 40)
+#define HM_GP_FLAG_MSK			(0xfUL << 36)
+#define HM_GP_FLAG(x)			BIT((x) + 36)
+#define HM_MAX_PROC_HYST		(0xfUL << 32)
+#define HM_LLP_WRST_AFTER_RST		BIT(28)
+#define HM_LLP_LINK_RST			BIT(27)
+#define HM_LLP_WARM_RST			BIT(26)
+#define HM_COR_ECC_LCK			BIT(25)
+#define HM_REDUCED_PWR			BIT(24)
+#define HM_COLD_RST			BIT(23)
+#define HM_SW_RST			BIT(22)
+#define HM_MEM_FORCE_WR			BIT(21)
+#define HM_DB_ERR_GEN			BIT(20)
+#define HM_SB_ERR_GEN			BIT(19)
+#define HM_CACHED_PIO_EN		BIT(18)
+#define HM_CACHED_PROM_EN		BIT(17)
+#define HM_PE_SYS_COR_ERE		BIT(16)
+#define HM_GLOBAL_ECC_EN		BIT(15)
+#define HM_IO_COH_EN			BIT(14)
+#define HM_INT_EN			BIT(13)
+#define HM_DATA_CHK_EN			BIT(12)
+#define HM_REF_EN			BIT(11)
+#define HM_BAD_SYSWR_ERE		BIT(10)
+#define HM_BAD_SYSRD_ERE		BIT(9)
+#define HM_SYSSTATE_ERE			BIT(8)
+#define HM_SYSCMD_ERE			BIT(7)
+#define HM_NCOR_SYS_ERE			BIT(6)
+#define HM_COR_SYS_ERE			BIT(5)
+#define HM_DATA_ELMNT_ERE		BIT(4)
+#define HM_MEM_ADDR_PROC_ERE		BIT(3)
+#define HM_MEM_ADDR_IO_ERE		BIT(2)
+#define HM_NCOR_MEM_ERE			BIT(1)
+#define HM_COR_MEM_ERE			BIT(0)
+
+/* Bits in the HEART_MEM_REF register. */
+#define HEART_MEMREF_REFS(x)		((0xfUL & (x)) << 16)
+#define HEART_MEMREF_PERIOD(x)		((0xffffUL & (x)))
+#define HEART_MEMREF_REFS_VAL		HEART_MEMREF_REFS(8)
+#define HEART_MEMREF_PERIOD_VAL		HEART_MEMREF_PERIOD(0x4000)
+#define HEART_MEMREF_VAL		(HEART_MEMREF_REFS_VAL | \
+					 HEART_MEMREF_PERIOD_VAL)
+
+/* Bits in the HEART_MEM_REQ_ARB register. */
+#define HEART_MEMARB_IODIS		(1  << 20)
+#define HEART_MEMARB_MAXPMWRQS		(15 << 16)
+#define HEART_MEMARB_MAXPMRRQS		(15 << 12)
+#define HEART_MEMARB_MAXPMRQS		(15 << 8)
+#define HEART_MEMARB_MAXRRRQS		(15 << 4)
+#define HEART_MEMARB_MAXGBRRQS		(15)
+
+/* Bits in the HEART_MEMCFG<x> registers. */
+#define HEART_MEMCFG_VALID		0x80000000	/* Bank is valid */
+#define HEART_MEMCFG_DENSITY		0x01c00000	/* Mem density */
+#define HEART_MEMCFG_SIZE_MASK		0x003f0000	/* Mem size mask */
+#define HEART_MEMCFG_ADDR_MASK		0x000001ff	/* Base addr mask */
+#define HEART_MEMCFG_SIZE_SHIFT		16		/* Mem size shift */
+#define HEART_MEMCFG_DENSITY_SHIFT	22		/* Density Shift */
+#define HEART_MEMCFG_UNIT_SHIFT		25		/* Unit Shift, 32MB */
+
+/* Bits in the HEART_STATUS register */
+#define HEART_STAT_HSTL_SDRV		BIT(14)
+#define HEART_STAT_FC_CR_OUT(x)		BIT((x) + 12)
+#define HEART_STAT_DIR_CNNCT		BIT(11)
+#define HEART_STAT_TRITON		BIT(10)
+#define HEART_STAT_R4K			BIT(9)
+#define HEART_STAT_BIG_ENDIAN		BIT(8)
+#define HEART_STAT_PROC_SHFT		4
+#define HEART_STAT_PROC_MSK		(0xfUL << HEART_STAT_PROC_SHFT)
+#define HEART_STAT_PROC_ACTIVE(x)	(0x1UL << ((x) + HEART_STAT_PROC_SHFT))
+#define HEART_STAT_WIDGET_ID		0xf
+
+/* Bits in the HEART_CAUSE register */
+#define HC_PE_SYS_COR_ERR_MSK		(0xfUL << 60)
+#define HC_PE_SYS_COR_ERR(x)		BIT((x) + 60)
+#define HC_PIOWDB_OFLOW			BIT(44)
+#define HC_PIORWRB_OFLOW		BIT(43)
+#define HC_PIUR_ACC_ERR			BIT(42)
+#define HC_BAD_SYSWR_ERR		BIT(41)
+#define HC_BAD_SYSRD_ERR		BIT(40)
+#define HC_SYSSTATE_ERR_MSK		(0xfUL << 36)
+#define HC_SYSSTATE_ERR(x)		BIT((x) + 36)
+#define HC_SYSCMD_ERR_MSK		(0xfUL << 32)
+#define HC_SYSCMD_ERR(x)		BIT((x) + 32)
+#define HC_NCOR_SYSAD_ERR_MSK		(0xfUL << 28)
+#define HC_NCOR_SYSAD_ERR(x)		BIT((x) + 28)
+#define HC_COR_SYSAD_ERR_MSK		(0xfUL << 24)
+#define HC_COR_SYSAD_ERR(x)		BIT((x) + 24)
+#define HC_DATA_ELMNT_ERR_MSK		(0xfUL << 20)
+#define HC_DATA_ELMNT_ERR(x)		BIT((x) + 20)
+#define HC_WIDGET_ERR			BIT(16)
+#define HC_MEM_ADDR_ERR_PROC_MSK	(0xfUL << 4)
+#define HC_MEM_ADDR_ERR_PROC(x)	BIT((x) + 4)
+#define HC_MEM_ADDR_ERR_IO		BIT(2)
+#define HC_NCOR_MEM_ERR			BIT(1)
+#define HC_COR_MEM_ERR			BIT(0)
+
+extern struct ip30_heart_regs __iomem *heart_regs;
+
+#define heart_read	____raw_readq
+#define heart_write	____raw_writeq
+
+#endif /* __ASM_SGI_HEART_H */
diff --git a/arch/mips/include/asm/sgi/sgi.h b/arch/mips/include/asm/sgi/sgi.h
deleted file mode 100644
index b61557151e3f..000000000000
--- a/arch/mips/include/asm/sgi/sgi.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * sgi.h: Definitions specific to SGI machines.
- *
- * Copyright (C) 1996 David S. Miller (dm@sgi.com)
- */
-#ifndef _ASM_SGI_SGI_H
-#define _ASM_SGI_SGI_H
-
-/* UP=UniProcessor MP=MultiProcessor(capable) */
-enum sgi_mach {
-	ip4,	/* R2k UP */
-	ip5,	/* R2k MP */
-	ip6,	/* R3k UP */
-	ip7,	/* R3k MP */
-	ip9,	/* R3k UP */
-	ip12,	/* R3kA UP, Indigo */
-	ip15,	/* R3kA MP */
-	ip17,	/* R4K UP */
-	ip19,	/* R4K MP */
-	ip20,	/* R4K UP, Indigo */
-	ip21,	/* R8k/TFP MP */
-	ip22,	/* R4x00 UP, Indy, Indigo2 */
-	ip25,	/* R10k MP */
-	ip26,	/* R8k/TFP UP, Indigo2 */
-	ip27,	/* R10k MP, R12k MP, R14k MP, Origin 200/2k, Onyx2 */
-	ip28,	/* R10k UP, Indigo2 Impact R10k */
-	ip30,	/* R10k MP, R12k MP, R14k MP, Octane */
-	ip32,	/* R5k UP, RM5200 UP, RM7k UP, R10k UP, R12k UP, O2 */
-	ip35,   /* R14k MP, R16k MP, Origin 300/3k, Onyx3, Fuel, Tezro */
-};
-
-extern enum sgi_mach sgimach;
-extern void sgi_sysinit(void);
-
-/* Many I/O space registers are byte sized and are contained within
- * one byte per word, specifically the MSB, this macro helps out.
- */
-#ifdef __MIPSEL__
-#define SGI_MSB(regaddr)   (regaddr)
-#else
-#define SGI_MSB(regaddr)   ((regaddr) | 0x3)
-#endif
-
-#endif /* _ASM_SGI_SGI_H */
diff --git a/arch/mips/include/asm/sgialib.h b/arch/mips/include/asm/sgialib.h
index 0d9fad5915fe..80f900417f7e 100644
--- a/arch/mips/include/asm/sgialib.h
+++ b/arch/mips/include/asm/sgialib.h
@@ -15,14 +15,6 @@
 #include <asm/sgiarcs.h>
 
 extern struct linux_romvec *romvec;
-extern int prom_argc;
-
-extern LONG *_prom_argv, *_prom_envp;
-
-/* A 32-bit ARC PROM pass arguments and environment as 32-bit pointer.
-   These macros take care of sign extension.  */
-#define prom_argv(index) ((char *) (long) _prom_argv[(index)])
-#define prom_argc(index) ((char *) (long) _prom_argc[(index)])
 
 extern int prom_flags;
 
@@ -47,12 +39,6 @@ extern void prom_meminit(void);
 /* PROM device tree library routines. */
 #define PROM_NULL_COMPONENT ((pcomponent *) 0)
 
-/* Get sibling component of THIS. */
-extern pcomponent *ArcGetPeer(pcomponent *this);
-
-/* Get child component of THIS. */
-extern pcomponent *ArcGetChild(pcomponent *this);
-
 /* This is called at prom_init time to identify the
  * ARC architecture we are running on
  */
@@ -60,22 +46,16 @@ extern void prom_identify_arch(void);
 
 /* Environment variable routines. */
 extern PCHAR ArcGetEnvironmentVariable(PCHAR name);
-extern LONG ArcSetEnvironmentVariable(PCHAR name, PCHAR value);
 
 /* ARCS command line parsing. */
-extern void prom_init_cmdline(void);
+extern void prom_init_cmdline(int argc, LONG *argv);
 
 /* File operations. */
 extern LONG ArcRead(ULONG fd, PVOID buf, ULONG num, PULONG cnt);
 extern LONG ArcWrite(ULONG fd, PVOID buf, ULONG num, PULONG cnt);
 
 /* Misc. routines. */
-extern VOID ArcHalt(VOID) __noreturn;
-extern VOID ArcPowerDown(VOID) __noreturn;
-extern VOID ArcRestart(VOID) __noreturn;
-extern VOID ArcReboot(VOID) __noreturn;
 extern VOID ArcEnterInteractiveMode(VOID) __noreturn;
-extern VOID ArcFlushAllCaches(VOID);
 extern DISPLAY_STATUS *ArcGetDisplayStatus(ULONG FileID);
 
 #endif /* _ASM_SGIALIB_H */
diff --git a/arch/mips/include/asm/sgiarcs.h b/arch/mips/include/asm/sgiarcs.h
index 105a9479ac5f..e1512cab180b 100644
--- a/arch/mips/include/asm/sgiarcs.h
+++ b/arch/mips/include/asm/sgiarcs.h
@@ -12,6 +12,8 @@
 #ifndef _ASM_SGIARCS_H
 #define _ASM_SGIARCS_H
 
+#include <linux/kernel.h>
+
 #include <asm/types.h>
 #include <asm/fw/arc/types.h>
 
@@ -368,110 +370,65 @@ struct linux_smonblock {
 
 #if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32)
 
-#define __arc_clobbers							\
-	"$2", "$3" /* ... */, "$8", "$9", "$10", "$11",				\
-	"$12", "$13", "$14", "$15", "$16", "$24", "$25", "$31"
+extern long call_o32(long vec, void *stack, ...);
+
+extern u64 o32_stk[4096];
+#define O32_STK	(&o32_stk[ARRAY_SIZE(o32_stk)])
 
 #define ARC_CALL0(dest)							\
 ({	long __res;							\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec)							\
-	: __arc_clobbers, "$4", "$5", "$6", "$7");			\
-	(unsigned long) __res;						\
+	__res = call_o32(__vec, O32_STK);				\
+	__res;								\
 })
 
 #define ARC_CALL1(dest, a1)						\
 ({	long __res;							\
-	register signed int __a1 __asm__("$4") = (int) (long) (a1);	\
+	int  __a1 = (int) (long) (a1);					\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec), "r" (__a1)					\
-	: __arc_clobbers, "$5", "$6", "$7");				\
-	(unsigned long) __res;						\
+	__res = call_o32(__vec, O32_STK, __a1);				\
+	__res;								\
 })
 
 #define ARC_CALL2(dest, a1, a2)						\
 ({	long __res;							\
-	register signed int __a1 __asm__("$4") = (int) (long) (a1);	\
-	register signed int __a2 __asm__("$5") = (int) (long) (a2);	\
+	int  __a1 = (int) (long) (a1);					\
+	int  __a2 = (int) (long) (a2);					\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec), "r" (__a1), "r" (__a2)				\
-	: __arc_clobbers, "$6", "$7");					\
+	__res = call_o32(__vec, O32_STK, __a1, __a2);			\
 	__res;								\
 })
 
 #define ARC_CALL3(dest, a1, a2, a3)					\
 ({	long __res;							\
-	register signed int __a1 __asm__("$4") = (int) (long) (a1);	\
-	register signed int __a2 __asm__("$5") = (int) (long) (a2);	\
-	register signed int __a3 __asm__("$6") = (int) (long) (a3);	\
+	int  __a1 = (int) (long) (a1);					\
+	int  __a2 = (int) (long) (a2);					\
+	int  __a3 = (int) (long) (a3);					\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec), "r" (__a1), "r" (__a2), "r" (__a3)		\
-	: __arc_clobbers, "$7");					\
+	__res = call_o32(__vec, O32_STK, __a1, __a2, __a3);		\
 	__res;								\
 })
 
 #define ARC_CALL4(dest, a1, a2, a3, a4)					\
 ({	long __res;							\
-	register signed int __a1 __asm__("$4") = (int) (long) (a1);	\
-	register signed int __a2 __asm__("$5") = (int) (long) (a2);	\
-	register signed int __a3 __asm__("$6") = (int) (long) (a3);	\
-	register signed int __a4 __asm__("$7") = (int) (long) (a4);	\
+	int  __a1 = (int) (long) (a1);					\
+	int  __a2 = (int) (long) (a2);					\
+	int  __a3 = (int) (long) (a3);					\
+	int  __a4 = (int) (long) (a4);					\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec), "r" (__a1), "r" (__a2), "r" (__a3),		\
-	  "r" (__a4)							\
-	: __arc_clobbers);						\
+	__res = call_o32(__vec, O32_STK, __a1, __a2, __a3, __a4);	\
 	__res;								\
 })
 
-#define ARC_CALL5(dest, a1, a2, a3, a4, a5)					\
+#define ARC_CALL5(dest, a1, a2, a3, a4, a5)				\
 ({	long __res;							\
-	register signed int __a1 __asm__("$4") = (int) (long) (a1);	\
-	register signed int __a2 __asm__("$5") = (int) (long) (a2);	\
-	register signed int __a3 __asm__("$6") = (int) (long) (a3);	\
-	register signed int __a4 __asm__("$7") = (int) (long) (a4);	\
-	register signed int __a5 = (int) (long) (a5);			\
+	int  __a1 = (int) (long) (a1);					\
+	int  __a2 = (int) (long) (a2);					\
+	int  __a3 = (int) (long) (a3);					\
+	int  __a4 = (int) (long) (a4);					\
+	int  __a5 = (int) (long) (a5);					\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"sw\t%7, 16($29)\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec),							\
-	  "r" (__a1), "r" (__a2), "r" (__a3), "r" (__a4),		\
-	  "r" (__a5)							\
-	: __arc_clobbers);						\
+	__res = call_o32(__vec, O32_STK, __a1, __a2, __a3, __a4, __a5);	\
 	__res;								\
 })
 
diff --git a/arch/mips/include/asm/sn/agent.h b/arch/mips/include/asm/sn/agent.h
index e33d09293019..7e9b3271737a 100644
--- a/arch/mips/include/asm/sn/agent.h
+++ b/arch/mips/include/asm/sn/agent.h
@@ -26,7 +26,7 @@
 
 #if defined(CONFIG_SGI_IP27)
 #define HUB_NIC_ADDR(_cpuid)						   \
-	REMOTE_HUB_ADDR(COMPACT_TO_NASID_NODEID(cpu_to_node(_cpuid)),	    \
+	REMOTE_HUB_ADDR(cpu_to_node(_cpuid),				   \
 		MD_MLAN_CTL)
 #endif
 
diff --git a/arch/mips/include/asm/sn/arch.h b/arch/mips/include/asm/sn/arch.h
index 3f1fb1454749..f7d3273d9599 100644
--- a/arch/mips/include/asm/sn/arch.h
+++ b/arch/mips/include/asm/sn/arch.h
@@ -19,44 +19,13 @@
 
 #define cputonasid(cpu)		(sn_cpu_info[(cpu)].p_nasid)
 #define cputoslice(cpu)		(sn_cpu_info[(cpu)].p_slice)
-#define makespnum(_nasid, _slice)					\
-		(((_nasid) << CPUS_PER_NODE_SHFT) | (_slice))
 
 #define INVALID_NASID		(nasid_t)-1
-#define INVALID_CNODEID		(cnodeid_t)-1
 #define INVALID_PNODEID		(pnodeid_t)-1
 #define INVALID_MODULE		(moduleid_t)-1
 #define INVALID_PARTID		(partid_t)-1
 
 extern nasid_t get_nasid(void);
-extern cnodeid_t get_cpu_cnode(cpuid_t);
 extern int get_cpu_slice(cpuid_t);
 
-/*
- * NO ONE should access these arrays directly.	The only reason we refer to
- * them here is to avoid the procedure call that would be required in the
- * macros below.  (Really want private data members here :-)
- */
-extern cnodeid_t nasid_to_compact_node[MAX_NASIDS];
-extern nasid_t compact_to_nasid_node[MAX_COMPACT_NODES];
-
-/*
- * These macros are used by various parts of the kernel to convert
- * between the three different kinds of node numbering.	  At least some
- * of them may change to procedure calls in the future, but the macros
- * will continue to work.  Don't use the arrays above directly.
- */
-
-#define NASID_TO_REGION(nnode)		\
-    ((nnode) >> \
-     (is_fine_dirmode() ? NASID_TO_FINEREG_SHFT : NASID_TO_COARSEREG_SHFT))
-
-extern cnodeid_t nasid_to_compact_node[MAX_NASIDS];
-extern nasid_t compact_to_nasid_node[MAX_COMPACT_NODES];
-extern cnodeid_t cpuid_to_compact_node[MAXCPUS];
-
-#define NASID_TO_COMPACT_NODEID(nnode)	(nasid_to_compact_node[nnode])
-#define COMPACT_TO_NASID_NODEID(cnode)	(compact_to_nasid_node[cnode])
-#define CPUID_TO_COMPACT_NODEID(cpu)	(cpuid_to_compact_node[(cpu)])
-
 #endif /* _ASM_SN_ARCH_H */
diff --git a/arch/mips/include/asm/sn/gda.h b/arch/mips/include/asm/sn/gda.h
index 85fa1b5f639d..d52f81620661 100644
--- a/arch/mips/include/asm/sn/gda.h
+++ b/arch/mips/include/asm/sn/gda.h
@@ -60,9 +60,7 @@ typedef struct gda {
 				/* Pointer to a mask of nodes with copies
 				 * of the kernel. */
 	char	g_padding[56];	/* pad out to 128 bytes */
-	nasid_t g_nasidtable[MAX_COMPACT_NODES]; /* NASID of each node,
-						  * indexed by cnodeid.
-						  */
+	nasid_t g_nasidtable[MAX_NUMNODES]; /* NASID of each node */
 } gda_t;
 
 #define GDA ((gda_t*) GDA_ADDR(get_nasid()))
diff --git a/arch/mips/include/asm/sn/hub.h b/arch/mips/include/asm/sn/hub.h
index 338f7eed74f1..45878fbefbae 100644
--- a/arch/mips/include/asm/sn/hub.h
+++ b/arch/mips/include/asm/sn/hub.h
@@ -10,8 +10,8 @@
 #include <asm/xtalk/xtalk.h>
 
 /* ip27-hubio.c */
-extern unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
+extern unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget,
 			  unsigned long xtalk_addr, size_t size);
-extern void hub_pio_init(cnodeid_t cnode);
+extern void hub_pio_init(nasid_t nasid);
 
 #endif /* __ASM_SN_HUB_H */
diff --git a/arch/mips/include/asm/sn/ioc3.h b/arch/mips/include/asm/sn/ioc3.h
index a947eed48fee..78ef760ddde4 100644
--- a/arch/mips/include/asm/sn/ioc3.h
+++ b/arch/mips/include/asm/sn/ioc3.h
@@ -590,4 +590,13 @@ struct ioc3_etxd {
 
 #define MIDR_DATA_MASK		0x0000ffff
 
+/* subsystem IDs supplied by card detection in pci-xtalk-bridge */
+#define	IOC3_SUBSYS_IP27_BASEIO6G	0xc300
+#define	IOC3_SUBSYS_IP27_MIO		0xc301
+#define	IOC3_SUBSYS_IP27_BASEIO		0xc302
+#define	IOC3_SUBSYS_IP29_SYSBOARD	0xc303
+#define	IOC3_SUBSYS_IP30_SYSBOARD	0xc304
+#define	IOC3_SUBSYS_MENET		0xc305
+#define	IOC3_SUBSYS_MENET4		0xc306
+
 #endif /* MIPS_SN_IOC3_H */
diff --git a/arch/mips/include/asm/sn/mapped_kernel.h b/arch/mips/include/asm/sn/mapped_kernel.h
index 2f3efa91c16e..3f1049807018 100644
--- a/arch/mips/include/asm/sn/mapped_kernel.h
+++ b/arch/mips/include/asm/sn/mapped_kernel.h
@@ -37,10 +37,10 @@
 
 #define MAPPED_KERN_RO_TO_PHYS(x) \
 				((unsigned long)MAPPED_ADDR_RO_TO_PHYS(x) | \
-				MAPPED_KERN_RO_PHYSBASE(get_compact_nodeid()))
+				MAPPED_KERN_RO_PHYSBASE(get_nasid()))
 #define MAPPED_KERN_RW_TO_PHYS(x) \
 				((unsigned long)MAPPED_ADDR_RW_TO_PHYS(x) | \
-				MAPPED_KERN_RW_PHYSBASE(get_compact_nodeid()))
+				MAPPED_KERN_RW_PHYSBASE(get_nasid()))
 
 #else /* CONFIG_MAPPED_KERNEL */
 
diff --git a/arch/mips/include/asm/sn/sn0/arch.h b/arch/mips/include/asm/sn/sn0/arch.h
index 425a67e6a947..12f4c4649ff0 100644
--- a/arch/mips/include/asm/sn/sn0/arch.h
+++ b/arch/mips/include/asm/sn/sn0/arch.h
@@ -12,25 +12,11 @@
 #define _ASM_SN_SN0_ARCH_H
 
 
-#ifndef SN0XXL	/* 128 cpu SMP max */
-/*
- * This is the maximum number of nodes that can be part of a kernel.
- * Effectively, it's the maximum number of compact node ids (cnodeid_t).
- */
-#define MAX_COMPACT_NODES	64
-
 /*
  * MAXCPUS refers to the maximum number of CPUs in a single kernel.
  * This is not necessarily the same as MAXNODES * CPUS_PER_NODE
  */
-#define MAXCPUS			128
-
-#else /* SN0XXL system */
-
-#define MAX_COMPACT_NODES	128
-#define MAXCPUS			256
-
-#endif /* SN0XXL */
+#define MAXCPUS			(MAX_NUMNODES * CPUS_PER_NODE)
 
 /*
  * This is the maximum number of NASIDS that can be present in a system.
@@ -66,7 +52,5 @@
 #define SLOT_MIN_MEM_SIZE	(32*1024*1024)
 
 #define CPUS_PER_NODE		2	/* CPUs on a single hub */
-#define CPUS_PER_NODE_SHFT	1	/* Bits to shift in the node number */
-#define CPUS_PER_SUBNODE	2	/* CPUs on a single hub PI */
 
 #endif /* _ASM_SN_SN0_ARCH_H */
diff --git a/arch/mips/include/asm/sn/sn_private.h b/arch/mips/include/asm/sn/sn_private.h
index f09ba846c644..63a2c30d81c6 100644
--- a/arch/mips/include/asm/sn/sn_private.h
+++ b/arch/mips/include/asm/sn/sn_private.h
@@ -7,14 +7,13 @@
 extern nasid_t master_nasid;
 
 extern void cpu_node_probe(void);
-extern cnodeid_t get_compact_nodeid(void);
-extern void hub_rtc_init(cnodeid_t);
+extern void hub_rtc_init(nasid_t nasid);
 extern void cpu_time_init(void);
 extern void per_cpu_init(void);
 extern void install_cpu_nmi_handler(int slice);
 extern void install_ipi(void);
 extern void setup_replication_mask(void);
 extern void replicate_kernel_text(void);
-extern unsigned long node_getfirstfree(cnodeid_t);
+extern unsigned long node_getfirstfree(nasid_t nasid);
 
 #endif /* __ASM_SN_SN_PRIVATE_H */
diff --git a/arch/mips/include/asm/sn/types.h b/arch/mips/include/asm/sn/types.h
index 6d24d4e8b9ed..203c927e84d1 100644
--- a/arch/mips/include/asm/sn/types.h
+++ b/arch/mips/include/asm/sn/types.h
@@ -12,13 +12,9 @@
 #include <linux/types.h>
 
 typedef unsigned long	cpuid_t;
-typedef unsigned long	cnodemask_t;
 typedef signed short	nasid_t;	/* node id in numa-as-id space */
-typedef signed short	cnodeid_t;	/* node id in compact-id space */
 typedef signed char	partid_t;	/* partition ID type */
 typedef signed short	moduleid_t;	/* user-visible module number type */
-typedef signed short	cmoduleid_t;	/* kernel compact module id type */
-typedef unsigned char	clusterid_t;	/* Clusterid of the cell */
 
 typedef dev_t		vertex_hdl_t;	/* hardware graph vertex handle */
 
diff --git a/arch/mips/include/asm/string.h b/arch/mips/include/asm/string.h
index 29030cb398ee..1de3bbce8e88 100644
--- a/arch/mips/include/asm/string.h
+++ b/arch/mips/include/asm/string.h
@@ -10,127 +10,6 @@
 #ifndef _ASM_STRING_H
 #define _ASM_STRING_H
 
-
-/*
- * Most of the inline functions are rather naive implementations so I just
- * didn't bother updating them for 64-bit ...
- */
-#ifdef CONFIG_32BIT
-
-#ifndef IN_STRING_C
-
-#define __HAVE_ARCH_STRCPY
-static __inline__ char *strcpy(char *__dest, __const__ char *__src)
-{
-  char *__xdest = __dest;
-
-  __asm__ __volatile__(
-	".set\tnoreorder\n\t"
-	".set\tnoat\n"
-	"1:\tlbu\t$1,(%1)\n\t"
-	"addiu\t%1,1\n\t"
-	"sb\t$1,(%0)\n\t"
-	"bnez\t$1,1b\n\t"
-	"addiu\t%0,1\n\t"
-	".set\tat\n\t"
-	".set\treorder"
-	: "=r" (__dest), "=r" (__src)
-	: "0" (__dest), "1" (__src)
-	: "memory");
-
-  return __xdest;
-}
-
-#define __HAVE_ARCH_STRNCPY
-static __inline__ char *strncpy(char *__dest, __const__ char *__src, size_t __n)
-{
-  char *__xdest = __dest;
-
-  if (__n == 0)
-    return __xdest;
-
-  __asm__ __volatile__(
-	".set\tnoreorder\n\t"
-	".set\tnoat\n"
-	"1:\tlbu\t$1,(%1)\n\t"
-	"subu\t%2,1\n\t"
-	"sb\t$1,(%0)\n\t"
-	"beqz\t$1,2f\n\t"
-	"addiu\t%0,1\n\t"
-	"bnez\t%2,1b\n\t"
-	"addiu\t%1,1\n"
-	"2:\n\t"
-	".set\tat\n\t"
-	".set\treorder"
-	: "=r" (__dest), "=r" (__src), "=r" (__n)
-	: "0" (__dest), "1" (__src), "2" (__n)
-	: "memory");
-
-  return __xdest;
-}
-
-#define __HAVE_ARCH_STRCMP
-static __inline__ int strcmp(__const__ char *__cs, __const__ char *__ct)
-{
-  int __res;
-
-  __asm__ __volatile__(
-	".set\tnoreorder\n\t"
-	".set\tnoat\n\t"
-	"lbu\t%2,(%0)\n"
-	"1:\tlbu\t$1,(%1)\n\t"
-	"addiu\t%0,1\n\t"
-	"bne\t$1,%2,2f\n\t"
-	"addiu\t%1,1\n\t"
-	"bnez\t%2,1b\n\t"
-	"lbu\t%2,(%0)\n\t"
-#if defined(CONFIG_CPU_R3000)
-	"nop\n\t"
-#endif
-	"move\t%2,$1\n"
-	"2:\tsubu\t%2,$1\n"
-	"3:\t.set\tat\n\t"
-	".set\treorder"
-	: "=r" (__cs), "=r" (__ct), "=r" (__res)
-	: "0" (__cs), "1" (__ct));
-
-  return __res;
-}
-
-#endif /* !defined(IN_STRING_C) */
-
-#define __HAVE_ARCH_STRNCMP
-static __inline__ int
-strncmp(__const__ char *__cs, __const__ char *__ct, size_t __count)
-{
-	int __res;
-
-	__asm__ __volatile__(
-	".set\tnoreorder\n\t"
-	".set\tnoat\n"
-	"1:\tlbu\t%3,(%0)\n\t"
-	"beqz\t%2,2f\n\t"
-	"lbu\t$1,(%1)\n\t"
-	"subu\t%2,1\n\t"
-	"bne\t$1,%3,3f\n\t"
-	"addiu\t%0,1\n\t"
-	"bnez\t%3,1b\n\t"
-	"addiu\t%1,1\n"
-	"2:\n\t"
-#if defined(CONFIG_CPU_R3000)
-	"nop\n\t"
-#endif
-	"move\t%3,$1\n"
-	"3:\tsubu\t%3,$1\n\t"
-	".set\tat\n\t"
-	".set\treorder"
-	: "=r" (__cs), "=r" (__ct), "=r" (__count), "=r" (__res)
-	: "0" (__cs), "1" (__ct), "2" (__count));
-
-	return __res;
-}
-#endif /* CONFIG_32BIT */
-
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *__s, int __c, size_t __count);
 
diff --git a/arch/mips/include/asm/sync.h b/arch/mips/include/asm/sync.h
new file mode 100644
index 000000000000..7c6a1095f556
--- /dev/null
+++ b/arch/mips/include/asm/sync.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __MIPS_ASM_SYNC_H__
+#define __MIPS_ASM_SYNC_H__
+
+/*
+ * sync types are defined by the MIPS64 Instruction Set documentation in Volume
+ * II-A of the MIPS Architecture Reference Manual, which can be found here:
+ *
+ *   https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
+ *
+ * Two types of barrier are provided:
+ *
+ *   1) Completion barriers, which ensure that a memory operation has actually
+ *      completed & often involve stalling the CPU pipeline to do so.
+ *
+ *   2) Ordering barriers, which only ensure that affected memory operations
+ *      won't be reordered in the CPU pipeline in a manner that violates the
+ *      restrictions imposed by the barrier.
+ *
+ * Ordering barriers can be more efficient than completion barriers, since:
+ *
+ *   a) Ordering barriers only require memory access instructions which preceed
+ *      them in program order (older instructions) to reach a point in the
+ *      load/store datapath beyond which reordering is not possible before
+ *      allowing memory access instructions which follow them (younger
+ *      instructions) to be performed.  That is, older instructions don't
+ *      actually need to complete - they just need to get far enough that all
+ *      other coherent CPUs will observe their completion before they observe
+ *      the effects of younger instructions.
+ *
+ *   b) Multiple variants of ordering barrier are provided which allow the
+ *      effects to be restricted to different combinations of older or younger
+ *      loads or stores. By way of example, if we only care that stores older
+ *      than a barrier are observed prior to stores that are younger than a
+ *      barrier & don't care about the ordering of loads then the 'wmb'
+ *      ordering barrier can be used. Limiting the barrier's effects to stores
+ *      allows loads to continue unaffected & potentially allows the CPU to
+ *      make progress faster than if younger loads had to wait for older stores
+ *      to complete.
+ */
+
+/*
+ * No sync instruction at all; used to allow code to nullify the effect of the
+ * __SYNC() macro without needing lots of #ifdefery.
+ */
+#define __SYNC_none	-1
+
+/*
+ * A full completion barrier; all memory accesses appearing prior to this sync
+ * instruction in program order must complete before any memory accesses
+ * appearing after this sync instruction in program order.
+ */
+#define __SYNC_full	0x00
+
+/*
+ * For now we use a full completion barrier to implement all sync types, until
+ * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
+ * sufficient to uphold our desired memory model.
+ */
+#define __SYNC_aq	__SYNC_full
+#define __SYNC_rl	__SYNC_full
+#define __SYNC_mb	__SYNC_full
+
+/*
+ * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
+ * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
+ * speculative reads.
+ */
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+# define __SYNC_rmb	__SYNC_none
+# define __SYNC_wmb	0x04
+#else
+# define __SYNC_rmb	__SYNC_full
+# define __SYNC_wmb	__SYNC_full
+#endif
+
+/*
+ * A GINV sync is a little different; it doesn't relate directly to loads or
+ * stores, but instead causes synchronization of an icache or TLB global
+ * invalidation operation triggered by the ginvi or ginvt instructions
+ * respectively. In cases where we need to know that a ginvi or ginvt operation
+ * has been performed by all coherent CPUs, we must issue a sync instruction of
+ * this type. Once this instruction graduates all coherent CPUs will have
+ * observed the invalidation.
+ */
+#define __SYNC_ginv	0x14
+
+/* Trivial; indicate that we always need this sync instruction. */
+#define __SYNC_always	(1 << 0)
+
+/*
+ * Indicate that we need this sync instruction only on systems with weakly
+ * ordered memory access. In general this is most MIPS systems, but there are
+ * exceptions which provide strongly ordered memory.
+ */
+#ifdef CONFIG_WEAK_ORDERING
+# define __SYNC_weak_ordering	(1 << 1)
+#else
+# define __SYNC_weak_ordering	0
+#endif
+
+/*
+ * Indicate that we need this sync instruction only on systems where LL/SC
+ * don't implicitly provide a memory barrier. In general this is most MIPS
+ * systems.
+ */
+#ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
+# define __SYNC_weak_llsc	(1 << 2)
+#else
+# define __SYNC_weak_llsc	0
+#endif
+
+/*
+ * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
+ * store or prefetch) in between an LL & SC can cause the SC instruction to
+ * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
+ * containing such sequences, this bug bites harder than we might otherwise
+ * expect due to reordering & speculation:
+ *
+ * 1) A memory access appearing prior to the LL in program order may actually
+ *    be executed after the LL - this is the reordering case.
+ *
+ *    In order to avoid this we need to place a memory barrier (ie. a SYNC
+ *    instruction) prior to every LL instruction, in between it and any earlier
+ *    memory access instructions.
+ *
+ *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
+ *
+ * 2) If a conditional branch exists between an LL & SC with a target outside
+ *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
+ *    or similar, then misprediction of the branch may allow speculative
+ *    execution of memory accesses from outside of the LL-SC loop.
+ *
+ *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
+ *    at each affected branch target.
+ *
+ *    This case affects all current Loongson 3 CPUs.
+ *
+ * The above described cases cause an error in the cache coherence protocol;
+ * such that the Invalidate of a competing LL-SC goes 'missing' and SC
+ * erroneously observes its core still has Exclusive state and lets the SC
+ * proceed.
+ *
+ * Therefore the error only occurs on SMP systems.
+ */
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
+# define __SYNC_loongson3_war	(1 << 31)
+#else
+# define __SYNC_loongson3_war	0
+#endif
+
+/*
+ * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
+ * barrier to be ineffective, requiring the use of 2 in sequence to provide an
+ * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
+ * optimized memory barrier primitives."). Here we specify that the affected
+ * sync instructions should be emitted twice.
+ */
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+# define __SYNC_rpt(type)	(1 + (type == __SYNC_wmb))
+#else
+# define __SYNC_rpt(type)	1
+#endif
+
+/*
+ * The main event. Here we actually emit a sync instruction of a given type, if
+ * reason is non-zero.
+ *
+ * In future we have the option of emitting entries in a fixups-style table
+ * here that would allow us to opportunistically remove some sync instructions
+ * when we detect at runtime that we're running on a CPU that doesn't need
+ * them.
+ */
+#ifdef CONFIG_CPU_HAS_SYNC
+# define ____SYNC(_type, _reason, _else)			\
+	.if	(( _type ) != -1) && ( _reason );		\
+	.set	push;						\
+	.set	MIPS_ISA_LEVEL_RAW;				\
+	.rept	__SYNC_rpt(_type);				\
+	sync	_type;						\
+	.endr;							\
+	.set	pop;						\
+	.else;							\
+	_else;							\
+	.endif
+#else
+# define ____SYNC(_type, _reason, _else)
+#endif
+
+/*
+ * Preprocessor magic to expand macros used as arguments before we insert them
+ * into assembly code.
+ */
+#ifdef __ASSEMBLY__
+# define ___SYNC(type, reason, else)				\
+	____SYNC(type, reason, else)
+#else
+# define ___SYNC(type, reason, else)				\
+	__stringify(____SYNC(type, reason, else))
+#endif
+
+#define __SYNC(type, reason)					\
+	___SYNC(__SYNC_##type, __SYNC_##reason, )
+#define __SYNC_ELSE(type, reason, else)				\
+	___SYNC(__SYNC_##type, __SYNC_##reason, else)
+
+#endif /* __MIPS_ASM_SYNC_H__ */
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 4993db40482c..ee26f9a4575d 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -49,8 +49,26 @@ struct thread_info {
 	.addr_limit	= KERNEL_DS,		\
 }
 
-/* How to get the thread information struct from C.  */
+/*
+ * A pointer to the struct thread_info for the currently executing thread is
+ * held in register $28/$gp.
+ *
+ * We declare __current_thread_info as a global register variable rather than a
+ * local register variable within current_thread_info() because clang doesn't
+ * support explicit local register variables.
+ *
+ * When building the VDSO we take care not to declare the global register
+ * variable because this causes GCC to not preserve the value of $28/$gp in
+ * functions that change its value (which is common in the PIC VDSO when
+ * accessing the GOT). Since the VDSO shouldn't be accessing
+ * __current_thread_info anyway we declare it extern in order to cause a link
+ * failure if it's referenced.
+ */
+#ifdef __VDSO__
+extern struct thread_info *__current_thread_info;
+#else
 register struct thread_info *__current_thread_info __asm__("$28");
+#endif
 
 static inline struct thread_info *current_thread_info(void)
 {
diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h
new file mode 100644
index 000000000000..c628747d4ecd
--- /dev/null
+++ b/arch/mips/include/asm/unroll.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_UNROLL_H__
+#define __ASM_UNROLL_H__
+
+/*
+ * Explicitly unroll a loop, for use in cases where doing so is performance
+ * critical.
+ *
+ * Ideally we'd rely upon the compiler to provide this but there's no commonly
+ * available means to do so. For example GCC's "#pragma GCC unroll"
+ * functionality would be ideal but is only available from GCC 8 onwards. Using
+ * -funroll-loops is an option but GCC tends to make poor choices when
+ * compiling our string functions. -funroll-all-loops leads to massive code
+ * bloat, even if only applied to the string functions.
+ */
+#define unroll(times, fn, ...) do {				\
+	extern void bad_unroll(void)				\
+		__compiletime_error("Unsupported unroll");	\
+								\
+	/*							\
+	 * We can't unroll if the number of iterations isn't	\
+	 * compile-time constant. Unfortunately GCC versions	\
+	 * up until 4.6 tend to miss obvious constants & cause	\
+	 * this check to fail, even though they go on to	\
+	 * generate reasonable code for the switch statement,	\
+	 * so we skip the sanity check for those compilers.	\
+	 */							\
+	BUILD_BUG_ON((CONFIG_GCC_VERSION >= 40700 ||		\
+		      CONFIG_CLANG_VERSION >= 80000) &&		\
+		     !__builtin_constant_p(times));		\
+								\
+	switch (times) {					\
+	case 32: fn(__VA_ARGS__); /* fall through */		\
+	case 31: fn(__VA_ARGS__); /* fall through */		\
+	case 30: fn(__VA_ARGS__); /* fall through */		\
+	case 29: fn(__VA_ARGS__); /* fall through */		\
+	case 28: fn(__VA_ARGS__); /* fall through */		\
+	case 27: fn(__VA_ARGS__); /* fall through */		\
+	case 26: fn(__VA_ARGS__); /* fall through */		\
+	case 25: fn(__VA_ARGS__); /* fall through */		\
+	case 24: fn(__VA_ARGS__); /* fall through */		\
+	case 23: fn(__VA_ARGS__); /* fall through */		\
+	case 22: fn(__VA_ARGS__); /* fall through */		\
+	case 21: fn(__VA_ARGS__); /* fall through */		\
+	case 20: fn(__VA_ARGS__); /* fall through */		\
+	case 19: fn(__VA_ARGS__); /* fall through */		\
+	case 18: fn(__VA_ARGS__); /* fall through */		\
+	case 17: fn(__VA_ARGS__); /* fall through */		\
+	case 16: fn(__VA_ARGS__); /* fall through */		\
+	case 15: fn(__VA_ARGS__); /* fall through */		\
+	case 14: fn(__VA_ARGS__); /* fall through */		\
+	case 13: fn(__VA_ARGS__); /* fall through */		\
+	case 12: fn(__VA_ARGS__); /* fall through */		\
+	case 11: fn(__VA_ARGS__); /* fall through */		\
+	case 10: fn(__VA_ARGS__); /* fall through */		\
+	case 9: fn(__VA_ARGS__); /* fall through */		\
+	case 8: fn(__VA_ARGS__); /* fall through */		\
+	case 7: fn(__VA_ARGS__); /* fall through */		\
+	case 6: fn(__VA_ARGS__); /* fall through */		\
+	case 5: fn(__VA_ARGS__); /* fall through */		\
+	case 4: fn(__VA_ARGS__); /* fall through */		\
+	case 3: fn(__VA_ARGS__); /* fall through */		\
+	case 2: fn(__VA_ARGS__); /* fall through */		\
+	case 1: fn(__VA_ARGS__); /* fall through */		\
+	case 0: break;						\
+								\
+	default:						\
+		/*						\
+		 * Either the iteration count is unreasonable	\
+		 * or we need to add more cases above.		\
+		 */						\
+		bad_unroll();					\
+		break;						\
+	}							\
+} while (0)
+
+#endif /* __ASM_UNROLL_H__ */
diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h
index b08825531e9f..a58687e26c5d 100644
--- a/arch/mips/include/asm/vdso/gettimeofday.h
+++ b/arch/mips/include/asm/vdso/gettimeofday.h
@@ -26,8 +26,6 @@
 
 #define __VDSO_USE_SYSCALL		ULLONG_MAX
 
-#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
-
 static __always_inline long gettimeofday_fallback(
 				struct __kernel_old_timeval *_tv,
 				struct timezone *_tz)
@@ -48,17 +46,6 @@ static __always_inline long gettimeofday_fallback(
 	return error ? -ret : ret;
 }
 
-#else
-
-static __always_inline long gettimeofday_fallback(
-				struct __kernel_old_timeval *_tv,
-				struct timezone *_tz)
-{
-	return -1;
-}
-
-#endif
-
 static __always_inline long clock_gettime_fallback(
 					clockid_t _clkid,
 					struct __kernel_timespec *_ts)
@@ -109,8 +96,6 @@ static __always_inline int clock_getres_fallback(
 
 #if _MIPS_SIM != _MIPS_SIM_ABI64
 
-#define VDSO_HAS_32BIT_FALLBACK	1
-
 static __always_inline long clock_gettime32_fallback(
 					clockid_t _clkid,
 					struct old_timespec32 *_ts)
diff --git a/arch/mips/include/asm/vmalloc.h b/arch/mips/include/asm/vmalloc.h
new file mode 100644
index 000000000000..25dc09b25eaf
--- /dev/null
+++ b/arch/mips/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_MIPS_VMALLOC_H
+#define _ASM_MIPS_VMALLOC_H
+
+#endif /* _ASM_MIPS_VMALLOC_H */
diff --git a/arch/mips/include/uapi/asm/msgbuf.h b/arch/mips/include/uapi/asm/msgbuf.h
index 46aa15b13e4e..128af72f2dfe 100644
--- a/arch/mips/include/uapi/asm/msgbuf.h
+++ b/arch/mips/include/uapi/asm/msgbuf.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_MSGBUF_H
 #define _ASM_MSGBUF_H
 
+#include <asm/ipcbuf.h>
 
 /*
  * The msqid64_ds structure for the MIPS architecture.
@@ -15,9 +16,9 @@
 #if defined(__mips64)
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	__kernel_time_t msg_ctime;	/* last change time */
+	long msg_stime;			/* last msgsnd time */
+	long msg_rtime;			/* last msgrcv time */
+	long msg_ctime;			/* last change time */
 	unsigned long  msg_cbytes;	/* current number of bytes on queue */
 	unsigned long  msg_qnum;	/* number of messages in queue */
 	unsigned long  msg_qbytes;	/* max number of bytes on queue */
diff --git a/arch/mips/include/uapi/asm/sembuf.h b/arch/mips/include/uapi/asm/sembuf.h
index 60c89e6cb25b..ba7fe0c89e7d 100644
--- a/arch/mips/include/uapi/asm/sembuf.h
+++ b/arch/mips/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_SEMBUF_H
 #define _ASM_SEMBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * The semid64_ds structure for the MIPS architecture.
  * Note extra padding because this structure is passed back and forth
@@ -14,8 +16,8 @@
 #ifdef __mips64
 struct semid64_ds {
 	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-	__kernel_time_t sem_otime;		/* last semop time */
-	__kernel_time_t sem_ctime;		/* last change time */
+	long		 sem_otime;		/* last semop time */
+	long		 sem_ctime;		/* last change time */
 	unsigned long	sem_nsems;		/* no. of semaphores in array */
 	unsigned long	__unused1;
 	unsigned long	__unused2;
diff --git a/arch/mips/include/uapi/asm/shmbuf.h b/arch/mips/include/uapi/asm/shmbuf.h
index 9b9bba3401f2..680bb95b2240 100644
--- a/arch/mips/include/uapi/asm/shmbuf.h
+++ b/arch/mips/include/uapi/asm/shmbuf.h
@@ -17,9 +17,9 @@
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
 	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	__kernel_time_t		shm_ctime;	/* last change time */
+	long			shm_atime;	/* last attach time */
+	long			shm_dtime;	/* last detach time */
+	long			shm_ctime;	/* last change time */
 	__kernel_pid_t		shm_cpid;	/* pid of creator */
 	__kernel_pid_t		shm_lpid;	/* pid of last operator */
 	unsigned long		shm_nattch;	/* no. of current attaches */
diff --git a/arch/mips/include/uapi/asm/stat.h b/arch/mips/include/uapi/asm/stat.h
index 95416f366d7f..3d2a3b71845c 100644
--- a/arch/mips/include/uapi/asm/stat.h
+++ b/arch/mips/include/uapi/asm/stat.h
@@ -26,17 +26,17 @@ struct stat {
 	gid_t		st_gid;
 	unsigned	st_rdev;
 	long		st_pad2[2];
-	off_t		st_size;
+	long		st_size;
 	long		st_pad3;
 	/*
 	 * Actually this should be timestruc_t st_atime, st_mtime and st_ctime
 	 * but we don't have it under Linux.
 	 */
-	time_t		st_atime;
+	long		st_atime;
 	long		st_atime_nsec;
-	time_t		st_mtime;
+	long		st_mtime;
 	long		st_mtime_nsec;
-	time_t		st_ctime;
+	long		st_ctime;
 	long		st_ctime_nsec;
 	long		st_blksize;
 	long		st_blocks;
@@ -70,13 +70,13 @@ struct stat64 {
 	 * Actually this should be timestruc_t st_atime, st_mtime and st_ctime
 	 * but we don't have it under Linux.
 	 */
-	time_t		st_atime;
+	long		st_atime;
 	unsigned long	st_atime_nsec;	/* Reserved for st_atime expansion  */
 
-	time_t		st_mtime;
+	long		st_mtime;
 	unsigned long	st_mtime_nsec;	/* Reserved for st_mtime expansion  */
 
-	time_t		st_ctime;
+	long		st_ctime;
 	unsigned long	st_ctime_nsec;	/* Reserved for st_ctime expansion  */
 
 	unsigned long	st_blksize;
@@ -105,7 +105,7 @@ struct stat {
 	unsigned int		st_rdev;
 	unsigned int		st_pad1[3]; /* Reserved for st_rdev expansion */
 
-	off_t			st_size;
+	long			st_size;
 
 	/*
 	 * Actually this should be timestruc_t st_atime, st_mtime and st_ctime
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index a01e14955187..c64a297e82b3 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -592,7 +592,7 @@ static dma_addr_t jazz_dma_map_page(struct device *dev, struct page *page,
 	phys_addr_t phys = page_to_phys(page) + offset;
 
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		arch_sync_dma_for_device(dev, phys, size, dir);
+		arch_sync_dma_for_device(phys, size, dir);
 	return vdma_alloc(phys, size);
 }
 
@@ -600,7 +600,7 @@ static void jazz_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		arch_sync_dma_for_cpu(dev, vdma_log2phys(dma_addr), size, dir);
+		arch_sync_dma_for_cpu(vdma_log2phys(dma_addr), size, dir);
 	vdma_free(dma_addr);
 }
 
@@ -612,7 +612,7 @@ static int jazz_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
 	for_each_sg(sglist, sg, nents, i) {
 		if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-			arch_sync_dma_for_device(dev, sg_phys(sg), sg->length,
+			arch_sync_dma_for_device(sg_phys(sg), sg->length,
 				dir);
 		sg->dma_address = vdma_alloc(sg_phys(sg), sg->length);
 		if (sg->dma_address == DMA_MAPPING_ERROR)
@@ -631,8 +631,7 @@ static void jazz_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 
 	for_each_sg(sglist, sg, nents, i) {
 		if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-			arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length,
-				dir);
+			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
 		vdma_free(sg->dma_address);
 	}
 }
@@ -640,13 +639,13 @@ static void jazz_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 static void jazz_dma_sync_single_for_device(struct device *dev,
 		dma_addr_t addr, size_t size, enum dma_data_direction dir)
 {
-	arch_sync_dma_for_device(dev, vdma_log2phys(addr), size, dir);
+	arch_sync_dma_for_device(vdma_log2phys(addr), size, dir);
 }
 
 static void jazz_dma_sync_single_for_cpu(struct device *dev,
 		dma_addr_t addr, size_t size, enum dma_data_direction dir)
 {
-	arch_sync_dma_for_cpu(dev, vdma_log2phys(addr), size, dir);
+	arch_sync_dma_for_cpu(vdma_log2phys(addr), size, dir);
 }
 
 static void jazz_dma_sync_sg_for_device(struct device *dev,
@@ -656,7 +655,7 @@ static void jazz_dma_sync_sg_for_device(struct device *dev,
 	int i;
 
 	for_each_sg(sgl, sg, nents, i)
-		arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
+		arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
 }
 
 static void jazz_dma_sync_sg_for_cpu(struct device *dev,
@@ -666,7 +665,7 @@ static void jazz_dma_sync_sg_for_cpu(struct device *dev,
 	int i;
 
 	for_each_sg(sgl, sg, nents, i)
-		arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
+		arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
 }
 
 const struct dma_map_ops jazz_dma_ops = {
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 89b07ea8d249..d6e97df51cfb 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -80,7 +80,7 @@ obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_PROC_FS)		+= proc.o
 obj-$(CONFIG_MAGIC_SYSRQ)	+= sysrq.o
 
-obj-$(CONFIG_64BIT)		+= cpu-bugs64.o
+obj-$(CONFIG_CPU_R4X00_BUGS64)	+= r4k-bugs64.o
 
 obj-$(CONFIG_I8253)		+= i8253.o
 
diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c
index 7a12763d553a..6ee3f7218c67 100644
--- a/arch/mips/kernel/binfmt_elfn32.c
+++ b/arch/mips/kernel/binfmt_elfn32.c
@@ -100,7 +100,7 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
 #undef TASK_SIZE
 #define TASK_SIZE TASK_SIZE32
 
-#undef ns_to_timeval
-#define ns_to_timeval ns_to_old_timeval32
+#undef ns_to_kernel_old_timeval
+#define ns_to_kernel_old_timeval ns_to_old_timeval32
 
 #include "../../../fs/binfmt_elf.c"
diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
index e6db06a1d31a..6dd103d3cebb 100644
--- a/arch/mips/kernel/binfmt_elfo32.c
+++ b/arch/mips/kernel/binfmt_elfo32.c
@@ -103,7 +103,7 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
 #undef TASK_SIZE
 #define TASK_SIZE TASK_SIZE32
 
-#undef ns_to_timeval
-#define ns_to_timeval ns_to_old_timeval32
+#undef ns_to_kernel_old_timeval
+#define ns_to_kernel_old_timeval ns_to_old_timeval32
 
 #include "../../../fs/binfmt_elf.c"
diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c
index f777e44653d5..47312c529410 100644
--- a/arch/mips/kernel/cacheinfo.c
+++ b/arch/mips/kernel/cacheinfo.c
@@ -50,6 +50,25 @@ static int __init_cache_level(unsigned int cpu)
 	return 0;
 }
 
+static void fill_cpumask_siblings(int cpu, cpumask_t *cpu_map)
+{
+	int cpu1;
+
+	for_each_possible_cpu(cpu1)
+		if (cpus_are_siblings(cpu, cpu1))
+			cpumask_set_cpu(cpu1, cpu_map);
+}
+
+static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map)
+{
+	int cpu1;
+	int cluster = cpu_cluster(&cpu_data[cpu]);
+
+	for_each_possible_cpu(cpu1)
+		if (cpu_cluster(&cpu_data[cpu1]) == cluster)
+			cpumask_set_cpu(cpu1, cpu_map);
+}
+
 static int __populate_cache_leaves(unsigned int cpu)
 {
 	struct cpuinfo_mips *c = &current_cpu_data;
@@ -57,14 +76,20 @@ static int __populate_cache_leaves(unsigned int cpu)
 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
 
 	if (c->icache.waysize) {
+		/* L1 caches are per core */
+		fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map);
 		populate_cache(dcache, this_leaf, 1, CACHE_TYPE_DATA);
+		fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map);
 		populate_cache(icache, this_leaf, 1, CACHE_TYPE_INST);
 	} else {
 		populate_cache(dcache, this_leaf, 1, CACHE_TYPE_UNIFIED);
 	}
 
-	if (c->scache.waysize)
+	if (c->scache.waysize) {
+		/* L2 cache is per cluster */
+		fill_cpumask_cluster(cpu, &this_leaf->shared_cpu_map);
 		populate_cache(scache, this_leaf, 2, CACHE_TYPE_UNIFIED);
+	}
 
 	if (c->tcache.waysize)
 		populate_cache(tcache, this_leaf, 3, CACHE_TYPE_UNIFIED);
diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/cpu-bugs64.c
deleted file mode 100644
index 6a7afe7ef4d3..000000000000
--- a/arch/mips/kernel/cpu-bugs64.c
+++ /dev/null
@@ -1,325 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2003, 2004, 2007  Maciej W. Rozycki
- */
-#include <linux/context_tracking.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/stddef.h>
-
-#include <asm/bugs.h>
-#include <asm/compiler.h>
-#include <asm/cpu.h>
-#include <asm/fpu.h>
-#include <asm/mipsregs.h>
-#include <asm/setup.h>
-
-static char bug64hit[] __initdata =
-	"reliable operation impossible!\n%s";
-static char nowar[] __initdata =
-	"Please report to <linux-mips@linux-mips.org>.";
-static char r4kwar[] __initdata =
-	"Enable CPU_R4000_WORKAROUNDS to rectify.";
-static char daddiwar[] __initdata =
-	"Enable CPU_DADDI_WORKAROUNDS to rectify.";
-
-static __always_inline __init
-void align_mod(const int align, const int mod)
-{
-	asm volatile(
-		".set	push\n\t"
-		".set	noreorder\n\t"
-		".balign %0\n\t"
-		".rept	%1\n\t"
-		"nop\n\t"
-		".endr\n\t"
-		".set	pop"
-		:
-		: "n"(align), "n"(mod));
-}
-
-static __always_inline __init
-void mult_sh_align_mod(long *v1, long *v2, long *w,
-		       const int align, const int mod)
-{
-	unsigned long flags;
-	int m1, m2;
-	long p, s, lv1, lv2, lw;
-
-	/*
-	 * We want the multiply and the shift to be isolated from the
-	 * rest of the code to disable gcc optimizations.  Hence the
-	 * asm statements that execute nothing, but make gcc not know
-	 * what the values of m1, m2 and s are and what lv2 and p are
-	 * used for.
-	 */
-
-	local_irq_save(flags);
-	/*
-	 * The following code leads to a wrong result of the first
-	 * dsll32 when executed on R4000 rev. 2.2 or 3.0 (PRId
-	 * 00000422 or 00000430, respectively).
-	 *
-	 * See "MIPS R4000PC/SC Errata, Processor Revision 2.2 and
-	 * 3.0" by MIPS Technologies, Inc., errata #16 and #28 for
-	 * details.  I got no permission to duplicate them here,
-	 * sigh... --macro
-	 */
-	asm volatile(
-		""
-		: "=r" (m1), "=r" (m2), "=r" (s)
-		: "0" (5), "1" (8), "2" (5));
-	align_mod(align, mod);
-	/*
-	 * The trailing nop is needed to fulfill the two-instruction
-	 * requirement between reading hi/lo and staring a mult/div.
-	 * Leaving it out may cause gas insert a nop itself breaking
-	 * the desired alignment of the next chunk.
-	 */
-	asm volatile(
-		".set	push\n\t"
-		".set	noat\n\t"
-		".set	noreorder\n\t"
-		".set	nomacro\n\t"
-		"mult	%2, %3\n\t"
-		"dsll32 %0, %4, %5\n\t"
-		"mflo	$0\n\t"
-		"dsll32 %1, %4, %5\n\t"
-		"nop\n\t"
-		".set	pop"
-		: "=&r" (lv1), "=r" (lw)
-		: "r" (m1), "r" (m2), "r" (s), "I" (0)
-		: "hi", "lo", "$0");
-	/* We have to use single integers for m1 and m2 and a double
-	 * one for p to be sure the mulsidi3 gcc's RTL multiplication
-	 * instruction has the workaround applied.  Older versions of
-	 * gcc have correct umulsi3 and mulsi3, but other
-	 * multiplication variants lack the workaround.
-	 */
-	asm volatile(
-		""
-		: "=r" (m1), "=r" (m2), "=r" (s)
-		: "0" (m1), "1" (m2), "2" (s));
-	align_mod(align, mod);
-	p = m1 * m2;
-	lv2 = s << 32;
-	asm volatile(
-		""
-		: "=r" (lv2)
-		: "0" (lv2), "r" (p));
-	local_irq_restore(flags);
-
-	*v1 = lv1;
-	*v2 = lv2;
-	*w = lw;
-}
-
-static __always_inline __init void check_mult_sh(void)
-{
-	long v1[8], v2[8], w[8];
-	int bug, fix, i;
-
-	printk("Checking for the multiply/shift bug... ");
-
-	/*
-	 * Testing discovered false negatives for certain code offsets
-	 * into cache lines.  Hence we test all possible offsets for
-	 * the worst assumption of an R4000 I-cache line width of 32
-	 * bytes.
-	 *
-	 * We can't use a loop as alignment directives need to be
-	 * immediates.
-	 */
-	mult_sh_align_mod(&v1[0], &v2[0], &w[0], 32, 0);
-	mult_sh_align_mod(&v1[1], &v2[1], &w[1], 32, 1);
-	mult_sh_align_mod(&v1[2], &v2[2], &w[2], 32, 2);
-	mult_sh_align_mod(&v1[3], &v2[3], &w[3], 32, 3);
-	mult_sh_align_mod(&v1[4], &v2[4], &w[4], 32, 4);
-	mult_sh_align_mod(&v1[5], &v2[5], &w[5], 32, 5);
-	mult_sh_align_mod(&v1[6], &v2[6], &w[6], 32, 6);
-	mult_sh_align_mod(&v1[7], &v2[7], &w[7], 32, 7);
-
-	bug = 0;
-	for (i = 0; i < 8; i++)
-		if (v1[i] != w[i])
-			bug = 1;
-
-	if (bug == 0) {
-		pr_cont("no.\n");
-		return;
-	}
-
-	pr_cont("yes, workaround... ");
-
-	fix = 1;
-	for (i = 0; i < 8; i++)
-		if (v2[i] != w[i])
-			fix = 0;
-
-	if (fix == 1) {
-		pr_cont("yes.\n");
-		return;
-	}
-
-	pr_cont("no.\n");
-	panic(bug64hit, !R4000_WAR ? r4kwar : nowar);
-}
-
-static volatile int daddi_ov;
-
-asmlinkage void __init do_daddi_ov(struct pt_regs *regs)
-{
-	enum ctx_state prev_state;
-
-	prev_state = exception_enter();
-	daddi_ov = 1;
-	regs->cp0_epc += 4;
-	exception_exit(prev_state);
-}
-
-static __init void check_daddi(void)
-{
-	extern asmlinkage void handle_daddi_ov(void);
-	unsigned long flags;
-	void *handler;
-	long v, tmp;
-
-	printk("Checking for the daddi bug... ");
-
-	local_irq_save(flags);
-	handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
-	/*
-	 * The following code fails to trigger an overflow exception
-	 * when executed on R4000 rev. 2.2 or 3.0 (PRId 00000422 or
-	 * 00000430, respectively).
-	 *
-	 * See "MIPS R4000PC/SC Errata, Processor Revision 2.2 and
-	 * 3.0" by MIPS Technologies, Inc., erratum #23 for details.
-	 * I got no permission to duplicate it here, sigh... --macro
-	 */
-	asm volatile(
-		".set	push\n\t"
-		".set	noat\n\t"
-		".set	noreorder\n\t"
-		".set	nomacro\n\t"
-		"addiu	%1, $0, %2\n\t"
-		"dsrl	%1, %1, 1\n\t"
-#ifdef HAVE_AS_SET_DADDI
-		".set	daddi\n\t"
-#endif
-		"daddi	%0, %1, %3\n\t"
-		".set	pop"
-		: "=r" (v), "=&r" (tmp)
-		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-	set_except_vector(EXCCODE_OV, handler);
-	local_irq_restore(flags);
-
-	if (daddi_ov) {
-		pr_cont("no.\n");
-		return;
-	}
-
-	pr_cont("yes, workaround... ");
-
-	local_irq_save(flags);
-	handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
-	asm volatile(
-		"addiu	%1, $0, %2\n\t"
-		"dsrl	%1, %1, 1\n\t"
-		"daddi	%0, %1, %3"
-		: "=r" (v), "=&r" (tmp)
-		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-	set_except_vector(EXCCODE_OV, handler);
-	local_irq_restore(flags);
-
-	if (daddi_ov) {
-		pr_cont("yes.\n");
-		return;
-	}
-
-	pr_cont("no.\n");
-	panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
-}
-
-int daddiu_bug	= IS_ENABLED(CONFIG_CPU_MIPSR6) ? 0 : -1;
-
-static __init void check_daddiu(void)
-{
-	long v, w, tmp;
-
-	printk("Checking for the daddiu bug... ");
-
-	/*
-	 * The following code leads to a wrong result of daddiu when
-	 * executed on R4400 rev. 1.0 (PRId 00000440).
-	 *
-	 * See "MIPS R4400PC/SC Errata, Processor Revision 1.0" by
-	 * MIPS Technologies, Inc., erratum #7 for details.
-	 *
-	 * According to "MIPS R4000PC/SC Errata, Processor Revision
-	 * 2.2 and 3.0" by MIPS Technologies, Inc., erratum #41 this
-	 * problem affects R4000 rev. 2.2 and 3.0 (PRId 00000422 and
-	 * 00000430, respectively), too.  Testing failed to trigger it
-	 * so far.
-	 *
-	 * I got no permission to duplicate the errata here, sigh...
-	 * --macro
-	 */
-	asm volatile(
-		".set	push\n\t"
-		".set	noat\n\t"
-		".set	noreorder\n\t"
-		".set	nomacro\n\t"
-		"addiu	%2, $0, %3\n\t"
-		"dsrl	%2, %2, 1\n\t"
-#ifdef HAVE_AS_SET_DADDI
-		".set	daddi\n\t"
-#endif
-		"daddiu %0, %2, %4\n\t"
-		"addiu	%1, $0, %4\n\t"
-		"daddu	%1, %2\n\t"
-		".set	pop"
-		: "=&r" (v), "=&r" (w), "=&r" (tmp)
-		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-
-	daddiu_bug = v != w;
-
-	if (!daddiu_bug) {
-		pr_cont("no.\n");
-		return;
-	}
-
-	pr_cont("yes, workaround... ");
-
-	asm volatile(
-		"addiu	%2, $0, %3\n\t"
-		"dsrl	%2, %2, 1\n\t"
-		"daddiu %0, %2, %4\n\t"
-		"addiu	%1, $0, %4\n\t"
-		"daddu	%1, %2"
-		: "=&r" (v), "=&r" (w), "=&r" (tmp)
-		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-
-	if (v == w) {
-		pr_cont("yes.\n");
-		return;
-	}
-
-	pr_cont("no.\n");
-	panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
-}
-
-void __init check_bugs64_early(void)
-{
-	if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) {
-		check_mult_sh();
-		check_daddiu();
-	}
-}
-
-void __init check_bugs64(void)
-{
-	if (!IS_ENABLED(CONFIG_CPU_MIPSR6))
-		check_daddi();
-}
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index f521cbf934e7..c54332697673 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -608,7 +608,7 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags)
 		if (!(flags & FTLB_EN))
 			return 1;
 		return 0;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		/* Flush ITLB, DTLB, VTLB and FTLB */
 		write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB |
 			      LOONGSON_DIAG_VTLB | LOONGSON_DIAG_FTLB);
@@ -1526,24 +1526,24 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 			     MIPS_CPU_LLSC | MIPS_CPU_BP_GHIST;
 		c->tlbsize = 64;
 		break;
-	case PRID_IMP_LOONGSON_64:  /* Loongson-2/3 */
+	case PRID_IMP_LOONGSON_64C:  /* Loongson-2/3 */
 		switch (c->processor_id & PRID_REV_MASK) {
 		case PRID_REV_LOONGSON2E:
-			c->cputype = CPU_LOONGSON2;
+			c->cputype = CPU_LOONGSON2EF;
 			__cpu_name[cpu] = "ICT Loongson-2";
 			set_elf_platform(cpu, "loongson2e");
 			set_isa(c, MIPS_CPU_ISA_III);
 			c->fpu_msk31 |= FPU_CSR_CONDX;
 			break;
 		case PRID_REV_LOONGSON2F:
-			c->cputype = CPU_LOONGSON2;
+			c->cputype = CPU_LOONGSON2EF;
 			__cpu_name[cpu] = "ICT Loongson-2";
 			set_elf_platform(cpu, "loongson2f");
 			set_isa(c, MIPS_CPU_ISA_III);
 			c->fpu_msk31 |= FPU_CSR_CONDX;
 			break;
 		case PRID_REV_LOONGSON3A_R1:
-			c->cputype = CPU_LOONGSON3;
+			c->cputype = CPU_LOONGSON64;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3a");
 			set_isa(c, MIPS_CPU_ISA_M64R1);
@@ -1552,7 +1552,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 			break;
 		case PRID_REV_LOONGSON3B_R1:
 		case PRID_REV_LOONGSON3B_R2:
-			c->cputype = CPU_LOONGSON3;
+			c->cputype = CPU_LOONGSON64;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3b");
 			set_isa(c, MIPS_CPU_ISA_M64R1);
@@ -1565,12 +1565,13 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 			     MIPS_CPU_FPU | MIPS_CPU_LLSC |
 			     MIPS_CPU_32FPR;
 		c->tlbsize = 64;
+		set_cpu_asid_mask(c, MIPS_ENTRYHI_ASID);
 		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
 		break;
 	case PRID_IMP_LOONGSON_32:  /* Loongson-1 */
 		decode_configs(c);
 
-		c->cputype = CPU_LOONGSON1;
+		c->cputype = CPU_LOONGSON32;
 
 		switch (c->processor_id & PRID_REV_MASK) {
 		case PRID_REV_LOONGSON1B:
@@ -1903,18 +1904,18 @@ platform:
 static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 {
 	switch (c->processor_id & PRID_IMP_MASK) {
-	case PRID_IMP_LOONGSON_64:  /* Loongson-2/3 */
+	case PRID_IMP_LOONGSON_64C:  /* Loongson-2/3 */
 		switch (c->processor_id & PRID_REV_MASK) {
 		case PRID_REV_LOONGSON3A_R2_0:
 		case PRID_REV_LOONGSON3A_R2_1:
-			c->cputype = CPU_LOONGSON3;
+			c->cputype = CPU_LOONGSON64;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3a");
 			set_isa(c, MIPS_CPU_ISA_M64R2);
 			break;
 		case PRID_REV_LOONGSON3A_R3_0:
 		case PRID_REV_LOONGSON3A_R3_1:
-			c->cputype = CPU_LOONGSON3;
+			c->cputype = CPU_LOONGSON64;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3a");
 			set_isa(c, MIPS_CPU_ISA_M64R2);
@@ -1927,6 +1928,17 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 		c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
 			MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2);
 		break;
+	case PRID_IMP_LOONGSON_64G:
+		c->cputype = CPU_LOONGSON64;
+		__cpu_name[cpu] = "ICT Loongson-3";
+		set_elf_platform(cpu, "loongson3a");
+		set_isa(c, MIPS_CPU_ISA_M64R2);
+		decode_configs(c);
+		c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
+		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
+		c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
+			MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2);
+		break;
 	default:
 		panic("Unknown Loongson Processor ID!");
 		break;
@@ -1965,13 +1977,30 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
 		break;
 	}
 
+	switch (c->processor_id & PRID_COMP_MASK) {
 	/*
-	 * The config0 register in the Xburst CPUs with a processor ID of
+	 * The config0 register in the XBurst CPUs with a processor ID of
+	 * PRID_COMP_INGENIC_D1 has an abandoned huge page tlb mode, this
+	 * mode is not compatible with the MIPS standard, it will cause
+	 * tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S)
+	 * when starting the init process. After chip reset, the default
+	 * is HPTLB mode, Write 0xa9000000 to cp0 register 5 sel 4 to
+	 * switch back to VTLB mode to prevent getting stuck.
+	 */
+	case PRID_COMP_INGENIC_D1:
+		write_c0_page_ctrl(XBURST_PAGECTRL_HPTLB_DIS);
+		break;
+	/*
+	 * The config0 register in the XBurst CPUs with a processor ID of
 	 * PRID_COMP_INGENIC_D0 report themselves as MIPS32r2 compatible,
 	 * but they don't actually support this ISA.
 	 */
-	if ((c->processor_id & PRID_COMP_MASK) == PRID_COMP_INGENIC_D0)
+	case PRID_COMP_INGENIC_D0:
 		c->isa_level &= ~MIPS_CPU_ISA_M32R2;
+		break;
+	default:
+		break;
+	}
 }
 
 static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu)
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 5469d43b6966..4849a48afc0f 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -19,7 +19,7 @@
 #include <asm/thread_info.h>
 #include <asm/war.h>
 
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 #define resume_kernel	restore_all
 #else
 #define __ret_from_irq	ret_from_exception
@@ -27,7 +27,7 @@
 
 	.text
 	.align	5
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 FEXPORT(ret_from_exception)
 	local_irq_disable			# preempt stop
 	b	__ret_from_irq
@@ -53,7 +53,7 @@ resume_userspace:
 	bnez	t0, work_pending
 	j	restore_all
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 resume_kernel:
 	local_irq_disable
 	lw	t0, TI_PRE_COUNT($28)
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index efde27c99414..0a43c9125267 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -18,6 +18,7 @@
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
+#include <asm/sync.h>
 #include <asm/war.h>
 #include <asm/thread_info.h>
 
@@ -353,8 +354,9 @@ NESTED(ejtag_debug_handler, PT_SIZE, sp)
 
 #ifdef CONFIG_SMP
 1:	PTR_LA	k0, ejtag_debug_buffer_spinlock
-	ll	k0, 0(k0)
-	bnez	k0, 1b
+	__SYNC(full, loongson3_war)
+2:	ll	k0, 0(k0)
+	bnez	k0, 2b
 	PTR_LA	k0, ejtag_debug_buffer_spinlock
 	sc	k0, 0(k0)
 	beqz	k0, 1b
@@ -657,7 +659,7 @@ isrdhwr:
 	.set	pop
 	END(handle_ri_rdhwr)
 
-#ifdef CONFIG_64BIT
+#ifdef CONFIG_CPU_R4X00_BUGS64
 /* A temporary overflow handler used by check_daddi(). */
 
 	__INIT
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index eb2afc0b8db1..37f8e78e2869 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -173,13 +173,14 @@ void __init check_wait(void)
 	case CPU_CAVIUM_OCTEON2:
 	case CPU_CAVIUM_OCTEON3:
 	case CPU_XBURST:
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 	case CPU_XLR:
 	case CPU_XLP:
 		cpu_wait = r4k_wait;
 		break;
-	case CPU_LOONGSON3:
-		if ((c->processor_id & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2_0)
+	case CPU_LOONGSON64:
+		if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >=
+				(PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0))
 			cpu_wait = r4k_wait;
 		break;
 
diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c
index e5ea3db23d6b..cdb93ed91cde 100644
--- a/arch/mips/kernel/mips-cm.c
+++ b/arch/mips/kernel/mips-cm.c
@@ -194,7 +194,7 @@ static void mips_cm_probe_l2sync(void)
 	write_gcr_l2_only_sync_base(addr | CM_GCR_L2_ONLY_SYNC_BASE_SYNCEN);
 
 	/* Map the region */
-	mips_cm_l2sync_base = ioremap_nocache(addr, MIPS_CM_L2SYNC_SIZE);
+	mips_cm_l2sync_base = ioremap(addr, MIPS_CM_L2SYNC_SIZE);
 }
 
 int mips_cm_probe(void)
@@ -215,7 +215,7 @@ int mips_cm_probe(void)
 	if (!addr)
 		return -ENODEV;
 
-	mips_gcr_base = ioremap_nocache(addr, MIPS_CM_GCR_SIZE);
+	mips_gcr_base = ioremap(addr, MIPS_CM_GCR_SIZE);
 	if (!mips_gcr_base)
 		return -ENXIO;
 
diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c
index 69e3e0b556bf..8d2535123f11 100644
--- a/arch/mips/kernel/mips-cpc.c
+++ b/arch/mips/kernel/mips-cpc.c
@@ -78,7 +78,7 @@ int mips_cpc_probe(void)
 	if (!addr)
 		return -ENODEV;
 
-	mips_cpc_base = ioremap_nocache(addr, 0x8000);
+	mips_cpc_base = ioremap(addr, 0x8000);
 	if (!mips_cpc_base)
 		return -ENXIO;
 
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index a3e2da8391ea..128fc9999c56 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1623,7 +1623,7 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 			raw_event.cntr_mask =
 				raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
 		break;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
 	break;
 	}
@@ -1764,12 +1764,12 @@ init_hw_perf_events(void)
 		mipspmu.general_event_map = &mipsxxcore_event_map;
 		mipspmu.cache_event_map = &mipsxxcore_cache_map;
 		break;
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 		mipspmu.name = "mips/loongson1";
 		mipspmu.general_event_map = &mipsxxcore_event_map;
 		mipspmu.cache_event_map = &mipsxxcore_cache_map;
 		break;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		mipspmu.name = "mips/loongson3";
 		mipspmu.general_event_map = &loongson3_event_map;
 		mipspmu.cache_event_map = &loongson3_cache_map;
diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index a26f40db15d0..9bf60d7d44d3 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -307,7 +307,7 @@ static int cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
 	}
 
 	/* Barrier ensuring previous cache invalidates are complete */
-	uasm_i_sync(pp, STYPE_SYNC);
+	uasm_i_sync(pp, __SYNC_full);
 	uasm_i_ehb(pp);
 
 	/* Check whether the pipeline stalled due to the FSB being full */
@@ -397,7 +397,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 
 	if (coupled_coherence) {
 		/* Increment ready_count */
-		uasm_i_sync(&p, STYPE_SYNC_MB);
+		uasm_i_sync(&p, __SYNC_mb);
 		uasm_build_label(&l, p, lbl_incready);
 		uasm_i_ll(&p, t1, 0, r_nc_count);
 		uasm_i_addiu(&p, t2, t1, 1);
@@ -406,7 +406,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 		uasm_i_addiu(&p, t1, t1, 1);
 
 		/* Barrier ensuring all CPUs see the updated r_nc_count value */
-		uasm_i_sync(&p, STYPE_SYNC_MB);
+		uasm_i_sync(&p, __SYNC_mb);
 
 		/*
 		 * If this is the last VPE to become ready for non-coherence
@@ -473,7 +473,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 			      Index_Writeback_Inv_D, lbl_flushdcache);
 
 	/* Barrier ensuring previous cache invalidates are complete */
-	uasm_i_sync(&p, STYPE_SYNC);
+	uasm_i_sync(&p, __SYNC_full);
 	uasm_i_ehb(&p);
 
 	if (mips_cm_revision() < CM_REV_CM3) {
@@ -487,7 +487,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 		uasm_i_lw(&p, t0, 0, r_pcohctl);
 
 		/* Barrier to ensure write to coherence control is complete */
-		uasm_i_sync(&p, STYPE_SYNC);
+		uasm_i_sync(&p, __SYNC_full);
 		uasm_i_ehb(&p);
 	}
 
@@ -534,7 +534,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 		}
 
 		/* Barrier to ensure write to CPC command is complete */
-		uasm_i_sync(&p, STYPE_SYNC);
+		uasm_i_sync(&p, __SYNC_full);
 		uasm_i_ehb(&p);
 	}
 
@@ -572,13 +572,13 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 	uasm_i_lw(&p, t0, 0, r_pcohctl);
 
 	/* Barrier to ensure write to coherence control is complete */
-	uasm_i_sync(&p, STYPE_SYNC);
+	uasm_i_sync(&p, __SYNC_full);
 	uasm_i_ehb(&p);
 
 	if (coupled_coherence && (state == CPS_PM_NC_WAIT)) {
 		/* Decrement ready_count */
 		uasm_build_label(&l, p, lbl_decready);
-		uasm_i_sync(&p, STYPE_SYNC_MB);
+		uasm_i_sync(&p, __SYNC_mb);
 		uasm_i_ll(&p, t1, 0, r_nc_count);
 		uasm_i_addiu(&p, t2, t1, -1);
 		uasm_i_sc(&p, t2, 0, r_nc_count);
@@ -586,7 +586,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 		uasm_i_andi(&p, v0, t1, (1 << fls(smp_num_siblings)) - 1);
 
 		/* Barrier ensuring all CPUs see the updated r_nc_count value */
-		uasm_i_sync(&p, STYPE_SYNC_MB);
+		uasm_i_sync(&p, __SYNC_mb);
 	}
 
 	if (coupled_coherence && (state == CPS_PM_CLOCK_GATED)) {
@@ -608,7 +608,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 		uasm_build_label(&l, p, lbl_secondary_cont);
 
 		/* Barrier ensuring all CPUs see the updated r_nc_count value */
-		uasm_i_sync(&p, STYPE_SYNC_MB);
+		uasm_i_sync(&p, __SYNC_mb);
 	}
 
 	/* The core is coherent, time to return to C code */
diff --git a/arch/mips/kernel/r4k-bugs64.c b/arch/mips/kernel/r4k-bugs64.c
new file mode 100644
index 000000000000..1ff19f1ea5ca
--- /dev/null
+++ b/arch/mips/kernel/r4k-bugs64.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2003, 2004, 2007  Maciej W. Rozycki
+ */
+#include <linux/context_tracking.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/stddef.h>
+
+#include <asm/bugs.h>
+#include <asm/compiler.h>
+#include <asm/cpu.h>
+#include <asm/fpu.h>
+#include <asm/mipsregs.h>
+#include <asm/setup.h>
+
+static char bug64hit[] __initdata =
+	"reliable operation impossible!\n%s";
+static char nowar[] __initdata =
+	"Please report to <linux-mips@linux-mips.org>.";
+static char r4kwar[] __initdata =
+	"Enable CPU_R4000_WORKAROUNDS to rectify.";
+static char daddiwar[] __initdata =
+	"Enable CPU_DADDI_WORKAROUNDS to rectify.";
+
+static __always_inline __init
+void align_mod(const int align, const int mod)
+{
+	asm volatile(
+		".set	push\n\t"
+		".set	noreorder\n\t"
+		".balign %0\n\t"
+		".rept	%1\n\t"
+		"nop\n\t"
+		".endr\n\t"
+		".set	pop"
+		:
+		: "n"(align), "n"(mod));
+}
+
+static __always_inline __init
+void mult_sh_align_mod(long *v1, long *v2, long *w,
+		       const int align, const int mod)
+{
+	unsigned long flags;
+	int m1, m2;
+	long p, s, lv1, lv2, lw;
+
+	/*
+	 * We want the multiply and the shift to be isolated from the
+	 * rest of the code to disable gcc optimizations.  Hence the
+	 * asm statements that execute nothing, but make gcc not know
+	 * what the values of m1, m2 and s are and what lv2 and p are
+	 * used for.
+	 */
+
+	local_irq_save(flags);
+	/*
+	 * The following code leads to a wrong result of the first
+	 * dsll32 when executed on R4000 rev. 2.2 or 3.0 (PRId
+	 * 00000422 or 00000430, respectively).
+	 *
+	 * See "MIPS R4000PC/SC Errata, Processor Revision 2.2 and
+	 * 3.0" by MIPS Technologies, Inc., errata #16 and #28 for
+	 * details.  I got no permission to duplicate them here,
+	 * sigh... --macro
+	 */
+	asm volatile(
+		""
+		: "=r" (m1), "=r" (m2), "=r" (s)
+		: "0" (5), "1" (8), "2" (5));
+	align_mod(align, mod);
+	/*
+	 * The trailing nop is needed to fulfill the two-instruction
+	 * requirement between reading hi/lo and staring a mult/div.
+	 * Leaving it out may cause gas insert a nop itself breaking
+	 * the desired alignment of the next chunk.
+	 */
+	asm volatile(
+		".set	push\n\t"
+		".set	noat\n\t"
+		".set	noreorder\n\t"
+		".set	nomacro\n\t"
+		"mult	%2, %3\n\t"
+		"dsll32 %0, %4, %5\n\t"
+		"mflo	$0\n\t"
+		"dsll32 %1, %4, %5\n\t"
+		"nop\n\t"
+		".set	pop"
+		: "=&r" (lv1), "=r" (lw)
+		: "r" (m1), "r" (m2), "r" (s), "I" (0)
+		: "hi", "lo", "$0");
+	/* We have to use single integers for m1 and m2 and a double
+	 * one for p to be sure the mulsidi3 gcc's RTL multiplication
+	 * instruction has the workaround applied.  Older versions of
+	 * gcc have correct umulsi3 and mulsi3, but other
+	 * multiplication variants lack the workaround.
+	 */
+	asm volatile(
+		""
+		: "=r" (m1), "=r" (m2), "=r" (s)
+		: "0" (m1), "1" (m2), "2" (s));
+	align_mod(align, mod);
+	p = m1 * m2;
+	lv2 = s << 32;
+	asm volatile(
+		""
+		: "=r" (lv2)
+		: "0" (lv2), "r" (p));
+	local_irq_restore(flags);
+
+	*v1 = lv1;
+	*v2 = lv2;
+	*w = lw;
+}
+
+static __always_inline __init void check_mult_sh(void)
+{
+	long v1[8], v2[8], w[8];
+	int bug, fix, i;
+
+	printk("Checking for the multiply/shift bug... ");
+
+	/*
+	 * Testing discovered false negatives for certain code offsets
+	 * into cache lines.  Hence we test all possible offsets for
+	 * the worst assumption of an R4000 I-cache line width of 32
+	 * bytes.
+	 *
+	 * We can't use a loop as alignment directives need to be
+	 * immediates.
+	 */
+	mult_sh_align_mod(&v1[0], &v2[0], &w[0], 32, 0);
+	mult_sh_align_mod(&v1[1], &v2[1], &w[1], 32, 1);
+	mult_sh_align_mod(&v1[2], &v2[2], &w[2], 32, 2);
+	mult_sh_align_mod(&v1[3], &v2[3], &w[3], 32, 3);
+	mult_sh_align_mod(&v1[4], &v2[4], &w[4], 32, 4);
+	mult_sh_align_mod(&v1[5], &v2[5], &w[5], 32, 5);
+	mult_sh_align_mod(&v1[6], &v2[6], &w[6], 32, 6);
+	mult_sh_align_mod(&v1[7], &v2[7], &w[7], 32, 7);
+
+	bug = 0;
+	for (i = 0; i < 8; i++)
+		if (v1[i] != w[i])
+			bug = 1;
+
+	if (bug == 0) {
+		pr_cont("no.\n");
+		return;
+	}
+
+	pr_cont("yes, workaround... ");
+
+	fix = 1;
+	for (i = 0; i < 8; i++)
+		if (v2[i] != w[i])
+			fix = 0;
+
+	if (fix == 1) {
+		pr_cont("yes.\n");
+		return;
+	}
+
+	pr_cont("no.\n");
+	panic(bug64hit, !R4000_WAR ? r4kwar : nowar);
+}
+
+static volatile int daddi_ov;
+
+asmlinkage void __init do_daddi_ov(struct pt_regs *regs)
+{
+	enum ctx_state prev_state;
+
+	prev_state = exception_enter();
+	daddi_ov = 1;
+	regs->cp0_epc += 4;
+	exception_exit(prev_state);
+}
+
+static __init void check_daddi(void)
+{
+	extern asmlinkage void handle_daddi_ov(void);
+	unsigned long flags;
+	void *handler;
+	long v, tmp;
+
+	printk("Checking for the daddi bug... ");
+
+	local_irq_save(flags);
+	handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
+	/*
+	 * The following code fails to trigger an overflow exception
+	 * when executed on R4000 rev. 2.2 or 3.0 (PRId 00000422 or
+	 * 00000430, respectively).
+	 *
+	 * See "MIPS R4000PC/SC Errata, Processor Revision 2.2 and
+	 * 3.0" by MIPS Technologies, Inc., erratum #23 for details.
+	 * I got no permission to duplicate it here, sigh... --macro
+	 */
+	asm volatile(
+		".set	push\n\t"
+		".set	noat\n\t"
+		".set	noreorder\n\t"
+		".set	nomacro\n\t"
+		"addiu	%1, $0, %2\n\t"
+		"dsrl	%1, %1, 1\n\t"
+#ifdef HAVE_AS_SET_DADDI
+		".set	daddi\n\t"
+#endif
+		"daddi	%0, %1, %3\n\t"
+		".set	pop"
+		: "=r" (v), "=&r" (tmp)
+		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
+	set_except_vector(EXCCODE_OV, handler);
+	local_irq_restore(flags);
+
+	if (daddi_ov) {
+		pr_cont("no.\n");
+		return;
+	}
+
+	pr_cont("yes, workaround... ");
+
+	local_irq_save(flags);
+	handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
+	asm volatile(
+		"addiu	%1, $0, %2\n\t"
+		"dsrl	%1, %1, 1\n\t"
+		"daddi	%0, %1, %3"
+		: "=r" (v), "=&r" (tmp)
+		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
+	set_except_vector(EXCCODE_OV, handler);
+	local_irq_restore(flags);
+
+	if (daddi_ov) {
+		pr_cont("yes.\n");
+		return;
+	}
+
+	pr_cont("no.\n");
+	panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
+}
+
+int daddiu_bug	= -1;
+
+static __init void check_daddiu(void)
+{
+	long v, w, tmp;
+
+	printk("Checking for the daddiu bug... ");
+
+	/*
+	 * The following code leads to a wrong result of daddiu when
+	 * executed on R4400 rev. 1.0 (PRId 00000440).
+	 *
+	 * See "MIPS R4400PC/SC Errata, Processor Revision 1.0" by
+	 * MIPS Technologies, Inc., erratum #7 for details.
+	 *
+	 * According to "MIPS R4000PC/SC Errata, Processor Revision
+	 * 2.2 and 3.0" by MIPS Technologies, Inc., erratum #41 this
+	 * problem affects R4000 rev. 2.2 and 3.0 (PRId 00000422 and
+	 * 00000430, respectively), too.  Testing failed to trigger it
+	 * so far.
+	 *
+	 * I got no permission to duplicate the errata here, sigh...
+	 * --macro
+	 */
+	asm volatile(
+		".set	push\n\t"
+		".set	noat\n\t"
+		".set	noreorder\n\t"
+		".set	nomacro\n\t"
+		"addiu	%2, $0, %3\n\t"
+		"dsrl	%2, %2, 1\n\t"
+#ifdef HAVE_AS_SET_DADDI
+		".set	daddi\n\t"
+#endif
+		"daddiu %0, %2, %4\n\t"
+		"addiu	%1, $0, %4\n\t"
+		"daddu	%1, %2\n\t"
+		".set	pop"
+		: "=&r" (v), "=&r" (w), "=&r" (tmp)
+		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
+
+	daddiu_bug = v != w;
+
+	if (!daddiu_bug) {
+		pr_cont("no.\n");
+		return;
+	}
+
+	pr_cont("yes, workaround... ");
+
+	asm volatile(
+		"addiu	%2, $0, %3\n\t"
+		"dsrl	%2, %2, 1\n\t"
+		"daddiu %0, %2, %4\n\t"
+		"addiu	%1, $0, %4\n\t"
+		"daddu	%1, %2"
+		: "=&r" (v), "=&r" (w), "=&r" (tmp)
+		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
+
+	if (v == w) {
+		pr_cont("yes.\n");
+		return;
+	}
+
+	pr_cont("no.\n");
+	panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
+}
+
+void __init check_bugs64_early(void)
+{
+	check_mult_sh();
+	check_daddiu();
+}
+
+void __init check_bugs64(void)
+{
+	check_daddi();
+}
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 5eec13b8d222..a28057946ed1 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -67,7 +67,9 @@ static char __initdata command_line[COMMAND_LINE_SIZE];
 char __initdata arcs_cmdline[COMMAND_LINE_SIZE];
 
 #ifdef CONFIG_CMDLINE_BOOL
-static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+static const char builtin_cmdline[] __initconst = CONFIG_CMDLINE;
+#else
+static const char builtin_cmdline[] __initconst = "";
 #endif
 
 /*
@@ -285,7 +287,7 @@ static unsigned long __init init_initrd(void)
  * Initialize the bootmem allocator. It also setup initrd related data
  * if needed.
  */
-#if defined(CONFIG_SGI_IP27) || (defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_NUMA))
+#if defined(CONFIG_SGI_IP27) || (defined(CONFIG_CPU_LOONGSON64) && defined(CONFIG_NUMA))
 
 static void __init bootmem_init(void)
 {
@@ -538,11 +540,94 @@ static void __init check_kernel_sections_mem(void)
 	}
 }
 
-#define USE_PROM_CMDLINE	IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER)
-#define USE_DTB_CMDLINE		IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)
-#define EXTEND_WITH_PROM	IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND)
-#define BUILTIN_EXTEND_WITH_PROM	\
-	IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND)
+static void __init bootcmdline_append(const char *s, size_t max)
+{
+	if (!s[0] || !max)
+		return;
+
+	if (boot_command_line[0])
+		strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+
+	strlcat(boot_command_line, s, max);
+}
+
+#ifdef CONFIG_OF_EARLY_FLATTREE
+
+static int __init bootcmdline_scan_chosen(unsigned long node, const char *uname,
+					  int depth, void *data)
+{
+	bool *dt_bootargs = data;
+	const char *p;
+	int l;
+
+	if (depth != 1 || !data ||
+	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
+		return 0;
+
+	p = of_get_flat_dt_prop(node, "bootargs", &l);
+	if (p != NULL && l > 0) {
+		bootcmdline_append(p, min(l, COMMAND_LINE_SIZE));
+		*dt_bootargs = true;
+	}
+
+	return 1;
+}
+
+#endif /* CONFIG_OF_EARLY_FLATTREE */
+
+static void __init bootcmdline_init(char **cmdline_p)
+{
+	bool dt_bootargs = false;
+
+	/*
+	 * If CMDLINE_OVERRIDE is enabled then initializing the command line is
+	 * trivial - we simply use the built-in command line unconditionally &
+	 * unmodified.
+	 */
+	if (IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) {
+		strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+		return;
+	}
+
+	/*
+	 * If the user specified a built-in command line &
+	 * MIPS_CMDLINE_BUILTIN_EXTEND, then the built-in command line is
+	 * prepended to arguments from the bootloader or DT so we'll copy them
+	 * to the start of boot_command_line here. Otherwise, empty
+	 * boot_command_line to undo anything early_init_dt_scan_chosen() did.
+	 */
+	if (IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND))
+		strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+	else
+		boot_command_line[0] = 0;
+
+#ifdef CONFIG_OF_EARLY_FLATTREE
+	/*
+	 * If we're configured to take boot arguments from DT, look for those
+	 * now.
+	 */
+	if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB))
+		of_scan_flat_dt(bootcmdline_scan_chosen, &dt_bootargs);
+#endif
+
+	/*
+	 * If we didn't get any arguments from DT (regardless of whether that's
+	 * because we weren't configured to look for them, or because we looked
+	 * & found none) then we'll take arguments from the bootloader.
+	 * plat_mem_setup() should have filled arcs_cmdline with arguments from
+	 * the bootloader.
+	 */
+	if (IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND) || !dt_bootargs)
+		bootcmdline_append(arcs_cmdline, COMMAND_LINE_SIZE);
+
+	/*
+	 * If the user specified a built-in command line & we didn't already
+	 * prepend it, we append it to boot_command_line here.
+	 */
+	if (IS_ENABLED(CONFIG_CMDLINE_BOOL) &&
+	    !IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND))
+		bootcmdline_append(builtin_cmdline, COMMAND_LINE_SIZE);
+}
 
 /*
  * arch_mem_init - initialize memory management subsystem
@@ -570,48 +655,12 @@ static void __init arch_mem_init(char **cmdline_p)
 {
 	extern void plat_mem_setup(void);
 
-	/*
-	 * Initialize boot_command_line to an innocuous but non-empty string in
-	 * order to prevent early_init_dt_scan_chosen() from copying
-	 * CONFIG_CMDLINE into it without our knowledge. We handle
-	 * CONFIG_CMDLINE ourselves below & don't want to duplicate its
-	 * content because repeating arguments can be problematic.
-	 */
-	strlcpy(boot_command_line, " ", COMMAND_LINE_SIZE);
-
 	/* call board setup routine */
 	plat_mem_setup();
 	memblock_set_bottom_up(true);
 
-#if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE)
-	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
-#else
-	if ((USE_PROM_CMDLINE && arcs_cmdline[0]) ||
-	    (USE_DTB_CMDLINE && !boot_command_line[0]))
-		strlcpy(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
-
-	if (EXTEND_WITH_PROM && arcs_cmdline[0]) {
-		if (boot_command_line[0])
-			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
-		strlcat(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
-	}
-
-#if defined(CONFIG_CMDLINE_BOOL)
-	if (builtin_cmdline[0]) {
-		if (boot_command_line[0])
-			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
-		strlcat(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
-	}
-
-	if (BUILTIN_EXTEND_WITH_PROM && arcs_cmdline[0]) {
-		if (boot_command_line[0])
-			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
-		strlcat(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
-	}
-#endif
-#endif
+	bootcmdline_init(cmdline_p);
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
-
 	*cmdline_p = command_line;
 
 	parse_early_param();
@@ -747,8 +796,6 @@ void __init setup_arch(char **cmdline_p)
 #if defined(CONFIG_VT)
 #if defined(CONFIG_VGA_CONSOLE)
 	conswitchp = &vga_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
 #endif
 #endif
 
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 712c15de6ab9..9058e9dcf080 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -31,7 +31,6 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/bootinfo.h>
-#include <asm/pmon.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/mipsregs.h>
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 3f16f3823031..c333e5788664 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -37,6 +37,7 @@
 #include <asm/signal.h>
 #include <asm/sim.h>
 #include <asm/shmparam.h>
+#include <asm/sync.h>
 #include <asm/sysmips.h>
 #include <asm/switch_to.h>
 
@@ -133,12 +134,12 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
 		  [efault] "i" (-EFAULT)
 		: "memory");
 	} else if (cpu_has_llsc) {
-		loongson_llsc_mb();
 		__asm__ __volatile__ (
 		"	.set	push					\n"
 		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
 		"	li	%[err], 0				\n"
 		"1:							\n"
+		"	" __SYNC(full, loongson3_war) "			\n"
 		user_ll("%[old]", "(%[addr])")
 		"	move	%[tmp], %[new]				\n"
 		"2:							\n"
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index e7c5ab38e403..1f9e8ad636cc 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -374,3 +374,5 @@
 433	n32	fspick				sys_fspick
 434	n32	pidfd_open			sys_pidfd_open
 435	n32	clone3				__sys_clone3
+437	n32	openat2				sys_openat2
+438	n32	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 13cd66581f3b..c0b9d802dbf6 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -350,3 +350,5 @@
 433	n64	fspick				sys_fspick
 434	n64	pidfd_open			sys_pidfd_open
 435	n64	clone3				__sys_clone3
+437	n64	openat2				sys_openat2
+438	n64	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 353539ea4140..ac586774c980 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -423,3 +423,5 @@
 433	o32	fspick				sys_fspick
 434	o32	pidfd_open			sys_pidfd_open
 435	o32	clone3				__sys_clone3
+437	o32	openat2				sys_openat2
+438	o32	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 342e41de9d64..83f2a437d9e2 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1761,7 +1761,7 @@ static inline void parity_protection_init(void)
 
 	case CPU_5KC:
 	case CPU_5KE:
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 		write_c0_ecc(0x80000000);
 		back_to_back_c0_hazard();
 		/* Set the PE bit (bit 31) in the c0_errctl register. */
@@ -2394,7 +2394,7 @@ void __init trap_init(void)
 	else {
 		if (cpu_has_vtag_icache)
 			set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
-		else if (current_cpu_type() == CPU_LOONGSON3)
+		else if (current_cpu_type() == CPU_LOONGSON64)
 			set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
 		else
 			set_except_vector(EXCCODE_RI, handle_ri_rdhwr);
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 33ee0d18fb0a..a5f00ec73ea6 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -10,6 +10,11 @@
  */
 #define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir)
 
+/* Cavium Octeon should not have a separate PT_NOTE Program Header. */
+#ifndef CONFIG_CAVIUM_OCTEON_SOC
+#define EMITS_PT_NOTE
+#endif
+
 #include <asm-generic/vmlinux.lds.h>
 
 #undef mips
@@ -76,16 +81,8 @@ SECTIONS
 		__stop___dbe_table = .;
 	}
 
-#ifdef CONFIG_CAVIUM_OCTEON_SOC
-#define NOTES_HEADER
-#else /* CONFIG_CAVIUM_OCTEON_SOC */
-#define NOTES_HEADER :note
-#endif /* CONFIG_CAVIUM_OCTEON_SOC */
-	NOTES :text NOTES_HEADER
-	.dummy : { *(.dummy) } :text
-
 	_sdata = .;			/* Start of data section */
-	RODATA
+	RO_DATA(4096)
 
 	/* writeable */
 	.data : {	/* Data */
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 97e538a8c1be..7dad7a293eae 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -136,6 +136,7 @@ pgd_t *kvm_pgd_alloc(void)
 static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
 				unsigned long addr)
 {
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -145,7 +146,8 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
 		BUG();
 		return NULL;
 	}
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	if (pud_none(*pud)) {
 		pmd_t *new_pmd;
 
@@ -204,8 +206,8 @@ static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
 {
 	pte_t *pte;
 	unsigned long end = ~0ul;
-	int i_min = __pmd_offset(start_gpa);
-	int i_max = __pmd_offset(end_gpa);
+	int i_min = pmd_index(start_gpa);
+	int i_max = pmd_index(end_gpa);
 	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
 	int i;
 
@@ -232,8 +234,8 @@ static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
 {
 	pmd_t *pmd;
 	unsigned long end = ~0ul;
-	int i_min = __pud_offset(start_gpa);
-	int i_max = __pud_offset(end_gpa);
+	int i_min = pud_index(start_gpa);
+	int i_max = pud_index(end_gpa);
 	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
 	int i;
 
@@ -258,6 +260,7 @@ static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
 static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
 				   unsigned long end_gpa)
 {
+	p4d_t *p4d;
 	pud_t *pud;
 	unsigned long end = ~0ul;
 	int i_min = pgd_index(start_gpa);
@@ -269,7 +272,8 @@ static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
 		if (!pgd_present(pgd[i]))
 			continue;
 
-		pud = pud_offset(pgd + i, 0);
+		p4d = p4d_offset(pgd, 0);
+		pud = pud_offset(p4d + i, 0);
 		if (i == i_max)
 			end = end_gpa;
 
@@ -334,8 +338,8 @@ static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start,	\
 	int ret = 0;							\
 	pte_t *pte;							\
 	unsigned long cur_end = ~0ul;					\
-	int i_min = __pmd_offset(start);				\
-	int i_max = __pmd_offset(end);					\
+	int i_min = pmd_index(start);				\
+	int i_max = pmd_index(end);					\
 	int i;								\
 									\
 	for (i = i_min; i <= i_max; ++i, start = 0) {			\
@@ -357,8 +361,8 @@ static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start,	\
 	int ret = 0;							\
 	pmd_t *pmd;							\
 	unsigned long cur_end = ~0ul;					\
-	int i_min = __pud_offset(start);				\
-	int i_max = __pud_offset(end);					\
+	int i_min = pud_index(start);				\
+	int i_max = pud_index(end);					\
 	int i;								\
 									\
 	for (i = i_min; i <= i_max; ++i, start = 0) {			\
@@ -378,6 +382,7 @@ static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start,	\
 				 unsigned long end)			\
 {									\
 	int ret = 0;							\
+	p4d_t *p4d;							\
 	pud_t *pud;							\
 	unsigned long cur_end = ~0ul;					\
 	int i_min = pgd_index(start);					\
@@ -388,7 +393,8 @@ static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start,	\
 		if (!pgd_present(pgd[i]))				\
 			continue;					\
 									\
-		pud = pud_offset(pgd + i, 0);				\
+		p4d = p4d_offset(pgd, 0);				\
+		pud = pud_offset(p4d + i, 0);				\
 		if (i == i_max)						\
 			cur_end = end;					\
 									\
@@ -862,8 +868,8 @@ static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva,
 {
 	pte_t *pte;
 	unsigned long end = ~0ul;
-	int i_min = __pmd_offset(start_gva);
-	int i_max = __pmd_offset(end_gva);
+	int i_min = pmd_index(start_gva);
+	int i_max = pmd_index(end_gva);
 	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
 	int i;
 
@@ -890,8 +896,8 @@ static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva,
 {
 	pmd_t *pmd;
 	unsigned long end = ~0ul;
-	int i_min = __pud_offset(start_gva);
-	int i_max = __pud_offset(end_gva);
+	int i_min = pud_index(start_gva);
+	int i_max = pud_index(end_gva);
 	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
 	int i;
 
@@ -916,6 +922,7 @@ static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva,
 static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva,
 				   unsigned long end_gva)
 {
+	p4d_t *p4d;
 	pud_t *pud;
 	unsigned long end = ~0ul;
 	int i_min = pgd_index(start_gva);
@@ -927,7 +934,8 @@ static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva,
 		if (!pgd_present(pgd[i]))
 			continue;
 
-		pud = pud_offset(pgd + i, 0);
+		p4d = p4d_offset(pgd, 0);
+		pud = pud_offset(p4d + i, 0);
 		if (i == i_max)
 			end = end_gva;
 
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index 73daa6ad33af..5a11e83dffe6 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -564,6 +564,7 @@ static void kvm_mips_emul_free_gva_pt(pgd_t *pgd)
 	/* Don't free host kernel page tables copied from init_mm.pgd */
 	const unsigned long end = 0x80000000;
 	unsigned long pgd_va, pud_va, pmd_va;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -576,7 +577,8 @@ static void kvm_mips_emul_free_gva_pt(pgd_t *pgd)
 		pgd_va = (unsigned long)i << PGDIR_SHIFT;
 		if (pgd_va >= end)
 			break;
-		pud = pud_offset(pgd + i, 0);
+		p4d = p4d_offset(pgd, 0);
+		pud = pud_offset(p4d + i, 0);
 		for (j = 0; j < PTRS_PER_PUD; j++) {
 			if (pud_none(pud[j]))
 				continue;
diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c
index 037b08f3257e..42222f849bd2 100644
--- a/arch/mips/lantiq/falcon/sysctrl.c
+++ b/arch/mips/lantiq/falcon/sysctrl.c
@@ -221,16 +221,16 @@ void __init ltq_soc_init(void)
 				res_sys[2].name) < 0))
 		pr_err("Failed to request core resources");
 
-	status_membase = ioremap_nocache(res_status.start,
+	status_membase = ioremap(res_status.start,
 					resource_size(&res_status));
-	ltq_ebu_membase = ioremap_nocache(res_ebu.start,
+	ltq_ebu_membase = ioremap(res_ebu.start,
 					resource_size(&res_ebu));
 
 	if (!status_membase || !ltq_ebu_membase)
 		panic("Failed to remap core resources");
 
 	for (i = 0; i < 3; i++) {
-		sysctl_membase[i] = ioremap_nocache(res_sys[i].start,
+		sysctl_membase[i] = ioremap(res_sys[i].start,
 						resource_size(&res_sys[i]));
 		if (!sysctl_membase[i])
 			panic("Failed to remap sysctrl resources");
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 115b417dfb8e..df8eed3875f6 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -349,7 +349,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
 					res.name))
 			pr_err("Failed to request icu%i memory\n", vpe);
 
-		ltq_icu_membase[vpe] = ioremap_nocache(res.start,
+		ltq_icu_membase[vpe] = ioremap(res.start,
 					resource_size(&res));
 
 		if (!ltq_icu_membase[vpe])
@@ -402,7 +402,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
 							res.name))
 			pr_err("Failed to request eiu memory");
 
-		ltq_eiu_membase = ioremap_nocache(res.start,
+		ltq_eiu_membase = ioremap(res.start,
 							resource_size(&res));
 		if (!ltq_eiu_membase)
 			panic("Failed to remap eiu memory");
diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c
index 156a95ac5c72..aa37545ebe8f 100644
--- a/arch/mips/lantiq/xway/sysctrl.c
+++ b/arch/mips/lantiq/xway/sysctrl.c
@@ -431,10 +431,10 @@ void __init ltq_soc_init(void)
 				res_ebu.name))
 		pr_err("Failed to request core resources");
 
-	pmu_membase = ioremap_nocache(res_pmu.start, resource_size(&res_pmu));
-	ltq_cgu_membase = ioremap_nocache(res_cgu.start,
+	pmu_membase = ioremap(res_pmu.start, resource_size(&res_pmu));
+	ltq_cgu_membase = ioremap(res_cgu.start,
 						resource_size(&res_cgu));
-	ltq_ebu_membase = ioremap_nocache(res_ebu.start,
+	ltq_ebu_membase = ioremap(res_ebu.start,
 						resource_size(&res_ebu));
 	if (!pmu_membase || !ltq_cgu_membase || !ltq_ebu_membase)
 		panic("Failed to remap core resources");
diff --git a/arch/mips/lib/bitops.c b/arch/mips/lib/bitops.c
index 3b2a1e78a543..116d0bd8b2ae 100644
--- a/arch/mips/lib/bitops.c
+++ b/arch/mips/lib/bitops.c
@@ -7,6 +7,7 @@
  * Copyright (c) 1999, 2000  Silicon Graphics, Inc.
  */
 #include <linux/bitops.h>
+#include <linux/bits.h>
 #include <linux/irqflags.h>
 #include <linux/export.h>
 
@@ -19,12 +20,11 @@
  */
 void __mips_set_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	*a |= mask;
@@ -41,12 +41,11 @@ EXPORT_SYMBOL(__mips_set_bit);
  */
 void __mips_clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	*a &= ~mask;
@@ -63,12 +62,11 @@ EXPORT_SYMBOL(__mips_clear_bit);
  */
 void __mips_change_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	*a ^= mask;
@@ -78,32 +76,6 @@ EXPORT_SYMBOL(__mips_change_bit);
 
 
 /**
- * __mips_test_and_set_bit - Set a bit and return its old value.  This is
- * called by test_and_set_bit() if it cannot find a faster solution.
- * @nr: Bit to set
- * @addr: Address to count from
- */
-int __mips_test_and_set_bit(unsigned long nr,
-			    volatile unsigned long *addr)
-{
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
-	unsigned long mask;
-	unsigned long flags;
-	int res;
-
-	a += nr >> SZLONG_LOG;
-	mask = 1UL << bit;
-	raw_local_irq_save(flags);
-	res = (mask & *a) != 0;
-	*a |= mask;
-	raw_local_irq_restore(flags);
-	return res;
-}
-EXPORT_SYMBOL(__mips_test_and_set_bit);
-
-
-/**
  * __mips_test_and_set_bit_lock - Set a bit and return its old value.  This is
  * called by test_and_set_bit_lock() if it cannot find a faster solution.
  * @nr: Bit to set
@@ -112,13 +84,12 @@ EXPORT_SYMBOL(__mips_test_and_set_bit);
 int __mips_test_and_set_bit_lock(unsigned long nr,
 				 volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 	int res;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	res = (mask & *a) != 0;
@@ -137,13 +108,12 @@ EXPORT_SYMBOL(__mips_test_and_set_bit_lock);
  */
 int __mips_test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 	int res;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	res = (mask & *a) != 0;
@@ -162,13 +132,12 @@ EXPORT_SYMBOL(__mips_test_and_clear_bit);
  */
 int __mips_test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 	int res;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	res = (mask & *a) != 0;
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 2ff84f4b1717..fda7b57b826e 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -279,7 +279,7 @@ EXPORT_SYMBOL(csum_partial)
 #endif
 
 	/* odd buffer alignment? */
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64)
 	.set	push
 	.set	arch=mips32r2
 	wsbh	v1, sum
@@ -732,7 +732,7 @@ EXPORT_SYMBOL(csum_partial)
 	addu	sum, v1
 #endif
 
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64)
 	.set	push
 	.set	arch=mips32r2
 	wsbh	v1, sum
diff --git a/arch/mips/loongson2ef/Kconfig b/arch/mips/loongson2ef/Kconfig
new file mode 100644
index 000000000000..595dd48e1e4d
--- /dev/null
+++ b/arch/mips/loongson2ef/Kconfig
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0
+if MACH_LOONGSON2EF
+
+choice
+	prompt "Machine Type"
+
+config LEMOTE_FULOONG2E
+	bool "Lemote Fuloong(2e) mini-PC"
+	select ARCH_SPARSEMEM_ENABLE
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
+	select CEVT_R4K
+	select CSRC_R4K
+	select SYS_HAS_CPU_LOONGSON2E
+	select DMA_NONCOHERENT
+	select BOOT_ELF32
+	select BOARD_SCACHE
+	select FORCE_PCI
+	select I8259
+	select ISA
+	select IRQ_MIPS_CPU
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select SYS_SUPPORTS_HIGHMEM
+	select SYS_HAS_EARLY_PRINTK
+	select USE_GENERIC_EARLY_PRINTK_8250
+	select GENERIC_ISA_DMA_SUPPORT_BROKEN
+	select CPU_HAS_WB
+	select LOONGSON_MC146818
+	help
+	  Lemote Fuloong(2e) mini-PC board based on the Chinese Loongson-2E CPU and
+	  an FPGA northbridge
+
+	  Lemote Fuloong(2e) mini PC have a VIA686B south bridge.
+
+config LEMOTE_MACH2F
+	bool "Lemote Loongson 2F family machines"
+	select ARCH_SPARSEMEM_ENABLE
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
+	select BOARD_SCACHE
+	select BOOT_ELF32
+	select CEVT_R4K if ! MIPS_EXTERNAL_TIMER
+	select CPU_HAS_WB
+	select CS5536
+	select CSRC_R4K if ! MIPS_EXTERNAL_TIMER
+	select DMA_NONCOHERENT
+	select GENERIC_ISA_DMA_SUPPORT_BROKEN
+	select HAVE_CLK
+	select FORCE_PCI
+	select I8259
+	select IRQ_MIPS_CPU
+	select ISA
+	select SYS_HAS_CPU_LOONGSON2F
+	select SYS_HAS_EARLY_PRINTK
+	select USE_GENERIC_EARLY_PRINTK_8250
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select SYS_SUPPORTS_HIGHMEM
+	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select LOONGSON_MC146818
+	help
+	  Lemote Loongson 2F family machines utilize the 2F revision of
+	  Loongson processor and the AMD CS5536 south bridge.
+
+	  These family machines include fuloong2f mini PC, yeeloong2f notebook,
+	  LingLoong allinone PC and so forth.
+
+endchoice
+
+config CS5536
+	bool
+
+config CS5536_MFGPT
+	bool "CS5536 MFGPT Timer"
+	depends on CS5536 && !HIGH_RES_TIMERS
+	select MIPS_EXTERNAL_TIMER
+	help
+	  This option enables the mfgpt0 timer of AMD CS5536. With this timer
+	  switched on you can not use high resolution timers.
+
+	  If you want to enable the Loongson2 CPUFreq Driver, Please enable
+	  this option at first, otherwise, You will get wrong system time.
+
+	  If unsure, say Yes.
+
+config LOONGSON_UART_BASE
+	bool
+	default y
+	depends on EARLY_PRINTK || SERIAL_8250
+
+config LOONGSON_MC146818
+	bool
+	default n
+
+endif # MACH_LOONGSON2EF
diff --git a/arch/mips/loongson2ef/Makefile b/arch/mips/loongson2ef/Makefile
new file mode 100644
index 000000000000..d4af1605cc9b
--- /dev/null
+++ b/arch/mips/loongson2ef/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Common code for all Loongson based systems
+#
+
+obj-$(CONFIG_MACH_LOONGSON2EF) += common/
+
+#
+# Lemote Fuloong mini-PC (Loongson 2E-based)
+#
+
+obj-$(CONFIG_LEMOTE_FULOONG2E)	+= fuloong-2e/
+
+#
+# Lemote loongson2f family machines
+#
+
+obj-$(CONFIG_LEMOTE_MACH2F)  += lemote-2f/
diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform
new file mode 100644
index 000000000000..3aca42963f35
--- /dev/null
+++ b/arch/mips/loongson2ef/Platform
@@ -0,0 +1,32 @@
+#
+# Loongson Processors' Support
+#
+
+# Only gcc >= 4.4 have Loongson specific support
+cflags-$(CONFIG_CPU_LOONGSON2EF)	+= -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2E) += \
+	$(call cc-option,-march=loongson2e,-march=r4600)
+cflags-$(CONFIG_CPU_LOONGSON2F) += \
+	$(call cc-option,-march=loongson2f,-march=r4600)
+# Enable the workarounds for Loongson2f
+ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
+  ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-nop,),)
+    $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-nop)
+  else
+    cflags-$(CONFIG_CPU_NOP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-nop
+  endif
+  ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-jump,),)
+    $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-jump)
+  else
+    cflags-$(CONFIG_CPU_JUMP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-jump
+  endif
+endif
+
+#
+# Loongson Machines' Support
+#
+
+platform-$(CONFIG_MACH_LOONGSON2EF) += loongson2ef/
+cflags-$(CONFIG_MACH_LOONGSON2EF) += -I$(srctree)/arch/mips/include/asm/mach-loongson2ef -mno-branch-likely
+load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000
+load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000
diff --git a/arch/mips/loongson2ef/common/Makefile b/arch/mips/loongson2ef/common/Makefile
new file mode 100644
index 000000000000..d5ab3e543ea3
--- /dev/null
+++ b/arch/mips/loongson2ef/common/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for loongson based machines.
+#
+
+obj-y += setup.o init.o env.o time.o reset.o irq.o \
+    bonito-irq.o mem.o machtype.o platform.o serial.o
+obj-$(CONFIG_PCI) += pci.o
+
+#
+# Serial port support
+#
+obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
+obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
+
+#
+# Enable CS5536 Virtual Support Module(VSM) to virtulize the PCI configure
+# space
+#
+obj-$(CONFIG_CS5536) += cs5536/
+
+#
+# Suspend Support
+#
+
+obj-$(CONFIG_SUSPEND) += pm.o
diff --git a/arch/mips/loongson64/common/bonito-irq.c b/arch/mips/loongson2ef/common/bonito-irq.c
index 82352cc25e4c..82352cc25e4c 100644
--- a/arch/mips/loongson64/common/bonito-irq.c
+++ b/arch/mips/loongson2ef/common/bonito-irq.c
diff --git a/arch/mips/loongson64/common/cs5536/Makefile b/arch/mips/loongson2ef/common/cs5536/Makefile
index b32b29661245..b32b29661245 100644
--- a/arch/mips/loongson64/common/cs5536/Makefile
+++ b/arch/mips/loongson2ef/common/cs5536/Makefile
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_acc.c b/arch/mips/loongson2ef/common/cs5536/cs5536_acc.c
index ff50aae72916..ff50aae72916 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_acc.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_acc.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ehci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c
index bd4c39fe6109..bd4c39fe6109 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_ehci.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ide.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ide.c
index bb933294b092..bb933294b092 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_ide.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ide.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_isa.c b/arch/mips/loongson2ef/common/cs5536/cs5536_isa.c
index 5ad38f86ee62..5ad38f86ee62 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_isa.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_isa.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c
index 30af1b7c7529..30af1b7c7529 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c
index 71a52b120317..71a52b120317 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_pci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_pci.c
index 202c89b568ba..202c89b568ba 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_pci.c
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_pci.c
diff --git a/arch/mips/loongson2ef/common/env.c b/arch/mips/loongson2ef/common/env.c
new file mode 100644
index 000000000000..6f20bdf9b242
--- /dev/null
+++ b/arch/mips/loongson2ef/common/env.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Based on Ocelot Linux port, which is
+ * Copyright 2001 MontaVista Software Inc.
+ * Author: jsun@mvista.com or jsun@junsun.net
+ *
+ * Copyright 2003 ICT CAS
+ * Author: Michael Guo <guoyi@ict.ac.cn>
+ *
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <linux/export.h>
+#include <asm/bootinfo.h>
+#include <asm/fw/fw.h>
+#include <loongson.h>
+
+u32 cpu_clock_freq;
+EXPORT_SYMBOL(cpu_clock_freq);
+
+void __init prom_init_env(void)
+{
+	/* pmon passes arguments in 32bit pointers */
+	unsigned int processor_id;
+
+	cpu_clock_freq = fw_getenvl("cpuclock");
+	memsize = fw_getenvl("memsize");
+	highmemsize = fw_getenvl("highmemsize");
+
+	if (memsize == 0)
+		memsize = 256;
+
+	pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize);
+
+	if (cpu_clock_freq == 0) {
+		processor_id = (&current_cpu_data)->processor_id;
+		switch (processor_id & PRID_REV_MASK) {
+		case PRID_REV_LOONGSON2E:
+			cpu_clock_freq = 533080000;
+			break;
+		case PRID_REV_LOONGSON2F:
+			cpu_clock_freq = 797000000;
+			break;
+		default:
+			cpu_clock_freq = 100000000;
+			break;
+		}
+	}
+	pr_info("CpuClock = %u\n", cpu_clock_freq);
+}
diff --git a/arch/mips/loongson2ef/common/init.c b/arch/mips/loongson2ef/common/init.c
new file mode 100644
index 000000000000..45512178be77
--- /dev/null
+++ b/arch/mips/loongson2ef/common/init.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <linux/memblock.h>
+#include <asm/bootinfo.h>
+#include <asm/traps.h>
+#include <asm/smp-ops.h>
+#include <asm/cacheflush.h>
+#include <asm/fw/fw.h>
+
+#include <loongson.h>
+
+/* Loongson CPU address windows config space base address */
+unsigned long __maybe_unused _loongson_addrwincfg_base;
+
+static void __init mips_nmi_setup(void)
+{
+	void *base;
+	extern char except_vec_nmi;
+
+	base = (void *)(CAC_BASE + 0x380);
+	memcpy(base, &except_vec_nmi, 0x80);
+	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
+}
+
+void __init prom_init(void)
+{
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+	_loongson_addrwincfg_base = (unsigned long)
+		ioremap(LOONGSON_ADDRWINCFG_BASE, LOONGSON_ADDRWINCFG_SIZE);
+#endif
+
+	fw_init_cmdline();
+	prom_init_machtype();
+	prom_init_env();
+
+	/* init base address of io space */
+	set_io_port_base((unsigned long)
+		ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE));
+	prom_init_memory();
+
+	/*init the uart base address */
+	prom_init_uart_base();
+	board_nmi_handler_setup = mips_nmi_setup;
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
diff --git a/arch/mips/loongson64/common/irq.c b/arch/mips/loongson2ef/common/irq.c
index 0ea93c1c0a97..0ea93c1c0a97 100644
--- a/arch/mips/loongson64/common/irq.c
+++ b/arch/mips/loongson2ef/common/irq.c
diff --git a/arch/mips/loongson64/common/machtype.c b/arch/mips/loongson2ef/common/machtype.c
index 4e42d929f1c7..82f6de49f20f 100644
--- a/arch/mips/loongson64/common/machtype.c
+++ b/arch/mips/loongson2ef/common/machtype.c
@@ -23,7 +23,6 @@ static const char *system_types[] = {
 	[MACH_DEXXON_GDIUM2F10]	= "dexxon-gdium-2f",
 	[MACH_LEMOTE_NAS]	= "lemote-nas-2f",
 	[MACH_LEMOTE_LL2F]	= "lemote-lynloong-2f",
-	[MACH_LOONGSON_GENERIC]	= "generic-loongson-machine",
 	[MACH_LOONGSON_END]	= NULL,
 };
 
diff --git a/arch/mips/loongson2ef/common/mem.c b/arch/mips/loongson2ef/common/mem.c
new file mode 100644
index 000000000000..ae21f1c62baa
--- /dev/null
+++ b/arch/mips/loongson2ef/common/mem.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ */
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+#include <mem.h>
+#include <pci.h>
+
+
+u32 memsize, highmemsize;
+
+void __init prom_init_memory(void)
+{
+	add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
+
+	add_memory_region(memsize << 20, LOONGSON_PCI_MEM_START - (memsize <<
+				20), BOOT_MEM_RESERVED);
+
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+	{
+		int bit;
+
+		bit = fls(memsize + highmemsize);
+		if (bit != ffs(memsize + highmemsize))
+			bit += 20;
+		else
+			bit = bit + 20 - 1;
+
+		/* set cpu window3 to map CPU to DDR: 2G -> 2G */
+		LOONGSON_ADDRWIN_CPUTODDR(ADDRWIN_WIN3, 0x80000000ul,
+					  0x80000000ul, (1 << bit));
+		mmiowb();
+	}
+#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
+
+#ifdef CONFIG_64BIT
+	if (highmemsize > 0)
+		add_memory_region(LOONGSON_HIGHMEM_START,
+				  highmemsize << 20, BOOT_MEM_RAM);
+
+	add_memory_region(LOONGSON_PCI_MEM_END + 1, LOONGSON_HIGHMEM_START -
+			  LOONGSON_PCI_MEM_END - 1, BOOT_MEM_RESERVED);
+
+#endif /* !CONFIG_64BIT */
+}
+
+/* override of arch/mips/mm/cache.c: __uncached_access */
+int __uncached_access(struct file *file, unsigned long addr)
+{
+	if (file->f_flags & O_DSYNC)
+		return 1;
+
+	return addr >= __pa(high_memory) ||
+		((addr >= LOONGSON_MMIO_MEM_START) &&
+		 (addr < LOONGSON_MMIO_MEM_END));
+}
diff --git a/arch/mips/loongson2ef/common/pci.c b/arch/mips/loongson2ef/common/pci.c
new file mode 100644
index 000000000000..200916925e95
--- /dev/null
+++ b/arch/mips/loongson2ef/common/pci.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/pci.h>
+
+#include <pci.h>
+#include <loongson.h>
+
+static struct resource loongson_pci_mem_resource = {
+	.name	= "pci memory space",
+	.start	= LOONGSON_PCI_MEM_START,
+	.end	= LOONGSON_PCI_MEM_END,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct resource loongson_pci_io_resource = {
+	.name	= "pci io space",
+	.start	= LOONGSON_PCI_IO_START,
+	.end	= IO_SPACE_LIMIT,
+	.flags	= IORESOURCE_IO,
+};
+
+static struct pci_controller  loongson_pci_controller = {
+	.pci_ops	= &loongson_pci_ops,
+	.io_resource	= &loongson_pci_io_resource,
+	.mem_resource	= &loongson_pci_mem_resource,
+	.mem_offset	= 0x00000000UL,
+	.io_offset	= 0x00000000UL,
+};
+
+static void __init setup_pcimap(void)
+{
+	/*
+	 * local to PCI mapping for CPU accessing PCI space
+	 * CPU address space [256M,448M] is window for accessing pci space
+	 * we set pcimap_lo[0,1,2] to map it to pci space[0M,64M], [320M,448M]
+	 *
+	 * pcimap: PCI_MAP2  PCI_Mem_Lo2 PCI_Mem_Lo1 PCI_Mem_Lo0
+	 *	     [<2G]   [384M,448M] [320M,384M] [0M,64M]
+	 */
+	LOONGSON_PCIMAP = LOONGSON_PCIMAP_PCIMAP_2 |
+		LOONGSON_PCIMAP_WIN(2, LOONGSON_PCILO2_BASE) |
+		LOONGSON_PCIMAP_WIN(1, LOONGSON_PCILO1_BASE) |
+		LOONGSON_PCIMAP_WIN(0, 0);
+
+	/*
+	 * PCI-DMA to local mapping: [2G,2G+256M] -> [0M,256M]
+	 */
+	LOONGSON_PCIBASE0 = 0x80000000ul;   /* base: 2G -> mmap: 0M */
+	/* size: 256M, burst transmission, pre-fetch enable, 64bit */
+	LOONGSON_PCI_HIT0_SEL_L = 0xc000000cul;
+	LOONGSON_PCI_HIT0_SEL_H = 0xfffffffful;
+	LOONGSON_PCI_HIT1_SEL_L = 0x00000006ul; /* set this BAR as invalid */
+	LOONGSON_PCI_HIT1_SEL_H = 0x00000000ul;
+	LOONGSON_PCI_HIT2_SEL_L = 0x00000006ul; /* set this BAR as invalid */
+	LOONGSON_PCI_HIT2_SEL_H = 0x00000000ul;
+
+	/* avoid deadlock of PCI reading/writing lock operation */
+	LOONGSON_PCI_ISR4C = 0xd2000001ul;
+
+	/* can not change gnt to break pci transfer when device's gnt not
+	deassert for some broken device */
+	LOONGSON_PXARB_CFG = 0x00fe0105ul;
+
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+	/*
+	 * set cpu addr window2 to map CPU address space to PCI address space
+	 */
+	LOONGSON_ADDRWIN_CPUTOPCI(ADDRWIN_WIN2, LOONGSON_CPU_MEM_SRC,
+		LOONGSON_PCI_MEM_DST, MMAP_CPUTOPCI_SIZE);
+#endif
+}
+
+extern int sbx00_acpi_init(void);
+
+static int __init pcibios_init(void)
+{
+	setup_pcimap();
+
+	loongson_pci_controller.io_map_base = mips_io_port_base;
+	register_pci_controller(&loongson_pci_controller);
+
+
+	return 0;
+}
+
+arch_initcall(pcibios_init);
diff --git a/arch/mips/loongson64/common/platform.c b/arch/mips/loongson2ef/common/platform.c
index 0084820cffaa..0084820cffaa 100644
--- a/arch/mips/loongson64/common/platform.c
+++ b/arch/mips/loongson2ef/common/platform.c
diff --git a/arch/mips/loongson2ef/common/pm.c b/arch/mips/loongson2ef/common/pm.c
new file mode 100644
index 000000000000..11f4cfd581fb
--- /dev/null
+++ b/arch/mips/loongson2ef/common/pm.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * loongson-specific suspend support
+ *
+ *  Copyright (C) 2009 Lemote Inc.
+ *  Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+#include <linux/suspend.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+
+#include <asm/i8259.h>
+#include <asm/mipsregs.h>
+
+#include <loongson.h>
+
+static unsigned int __maybe_unused cached_master_mask;	/* i8259A */
+static unsigned int __maybe_unused cached_slave_mask;
+static unsigned int __maybe_unused cached_bonito_irq_mask; /* bonito */
+
+void arch_suspend_disable_irqs(void)
+{
+	/* disable all mips events */
+	local_irq_disable();
+
+#ifdef CONFIG_I8259
+	/* disable all events of i8259A */
+	cached_slave_mask = inb(PIC_SLAVE_IMR);
+	cached_master_mask = inb(PIC_MASTER_IMR);
+
+	outb(0xff, PIC_SLAVE_IMR);
+	inb(PIC_SLAVE_IMR);
+	outb(0xff, PIC_MASTER_IMR);
+	inb(PIC_MASTER_IMR);
+#endif
+	/* disable all events of bonito */
+	cached_bonito_irq_mask = LOONGSON_INTEN;
+	LOONGSON_INTENCLR = 0xffff;
+	(void)LOONGSON_INTENCLR;
+}
+
+void arch_suspend_enable_irqs(void)
+{
+	/* enable all mips events */
+	local_irq_enable();
+#ifdef CONFIG_I8259
+	/* only enable the cached events of i8259A */
+	outb(cached_slave_mask, PIC_SLAVE_IMR);
+	outb(cached_master_mask, PIC_MASTER_IMR);
+#endif
+	/* enable all cached events of bonito */
+	LOONGSON_INTENSET = cached_bonito_irq_mask;
+	(void)LOONGSON_INTENSET;
+}
+
+/*
+ * Setup the board-specific events for waking up loongson from wait mode
+ */
+void __weak setup_wakeup_events(void)
+{
+}
+
+/*
+ * Check wakeup events
+ */
+int __weak wakeup_loongson(void)
+{
+	return 1;
+}
+
+/*
+ * If the events are really what we want to wakeup the CPU, wake it up
+ * otherwise put the CPU asleep again.
+ */
+static void wait_for_wakeup_events(void)
+{
+	while (!wakeup_loongson())
+		writel(readl(LOONGSON_CHIPCFG) & ~0x7, LOONGSON_CHIPCFG);
+}
+
+/*
+ * Stop all perf counters
+ *
+ * $24 is the control register of Loongson perf counter
+ */
+static inline void stop_perf_counters(void)
+{
+	__write_64bit_c0_register($24, 0, 0);
+}
+
+
+static void loongson_suspend_enter(void)
+{
+	static unsigned int cached_cpu_freq;
+
+	/* setup wakeup events via enabling the IRQs */
+	setup_wakeup_events();
+
+	stop_perf_counters();
+
+	cached_cpu_freq = readl(LOONGSON_CHIPCFG);
+
+	/* Put CPU into wait mode */
+	writel(readl(LOONGSON_CHIPCFG) & ~0x7, LOONGSON_CHIPCFG);
+
+	/* wait for the given events to wakeup cpu from wait mode */
+	wait_for_wakeup_events();
+
+	writel(cached_cpu_freq, LOONGSON_CHIPCFG);
+
+	mmiowb();
+}
+
+void __weak mach_suspend(void)
+{
+}
+
+void __weak mach_resume(void)
+{
+}
+
+static int loongson_pm_enter(suspend_state_t state)
+{
+	mach_suspend();
+
+	/* processor specific suspend */
+	loongson_suspend_enter();
+
+	mach_resume();
+
+	return 0;
+}
+
+static int loongson_pm_valid_state(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_ON:
+	case PM_SUSPEND_STANDBY:
+	case PM_SUSPEND_MEM:
+		return 1;
+
+	default:
+		return 0;
+	}
+}
+
+static const struct platform_suspend_ops loongson_pm_ops = {
+	.valid	= loongson_pm_valid_state,
+	.enter	= loongson_pm_enter,
+};
+
+static int __init loongson_pm_init(void)
+{
+	suspend_set_ops(&loongson_pm_ops);
+
+	return 0;
+}
+arch_initcall(loongson_pm_init);
diff --git a/arch/mips/loongson2ef/common/reset.c b/arch/mips/loongson2ef/common/reset.c
new file mode 100644
index 000000000000..e49c40646995
--- /dev/null
+++ b/arch/mips/loongson2ef/common/reset.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Zhangjin Wu, wuzhangjin@gmail.com
+ */
+#include <linux/init.h>
+#include <linux/pm.h>
+
+#include <asm/idle.h>
+#include <asm/reboot.h>
+
+#include <loongson.h>
+
+static inline void loongson_reboot(void)
+{
+#ifndef CONFIG_CPU_JUMP_WORKAROUNDS
+	((void (*)(void))ioremap(LOONGSON_BOOT_BASE, 4)) ();
+#else
+	void (*func)(void);
+
+	func = (void *)ioremap(LOONGSON_BOOT_BASE, 4);
+
+	__asm__ __volatile__(
+	"	.set	noat						\n"
+	"	jr	%[func]						\n"
+	"	.set	at						\n"
+	: /* No outputs */
+	: [func] "r" (func));
+#endif
+}
+
+static void loongson_restart(char *command)
+{
+	/* do preparation for reboot */
+	mach_prepare_reboot();
+
+	/* reboot via jumping to boot base address */
+	loongson_reboot();
+}
+
+static void loongson_poweroff(void)
+{
+	mach_prepare_shutdown();
+
+	/*
+	 * It needs a wait loop here, but mips/kernel/reset.c already calls
+	 * a generic delay loop, machine_hang(), so simply return.
+	 */
+	return;
+}
+
+static void loongson_halt(void)
+{
+	pr_notice("\n\n** You can safely turn off the power now **\n\n");
+	while (1) {
+		if (cpu_wait)
+			cpu_wait();
+	}
+}
+
+static int __init mips_reboot_setup(void)
+{
+	_machine_restart = loongson_restart;
+	_machine_halt = loongson_halt;
+	pm_power_off = loongson_poweroff;
+
+	return 0;
+}
+
+arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/loongson64/common/rtc.c b/arch/mips/loongson2ef/common/rtc.c
index 8d7628c0f513..8d7628c0f513 100644
--- a/arch/mips/loongson64/common/rtc.c
+++ b/arch/mips/loongson2ef/common/rtc.c
diff --git a/arch/mips/loongson2ef/common/serial.c b/arch/mips/loongson2ef/common/serial.c
new file mode 100644
index 000000000000..ac4f6e3ebc3e
--- /dev/null
+++ b/arch/mips/loongson2ef/common/serial.c
@@ -0,0 +1,86 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
+ *
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Yan hua (yanhua@lemote.com)
+ * Author: Wu Zhangjin (wuzhangjin@gmail.com)
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/serial_8250.h>
+
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+#include <machine.h>
+
+#define PORT(int, clk)			\
+{								\
+	.irq		= int,					\
+	.uartclk	= clk,					\
+	.iotype		= UPIO_PORT,				\
+	.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,	\
+	.regshift	= 0,					\
+}
+
+#define PORT_M(int, clk)				\
+{								\
+	.irq		= MIPS_CPU_IRQ_BASE + (int),		\
+	.uartclk	= clk,					\
+	.iotype		= UPIO_MEM,				\
+	.membase	= (void __iomem *)NULL,			\
+	.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,	\
+	.regshift	= 0,					\
+}
+
+static struct plat_serial8250_port uart8250_data[MACH_LOONGSON_END + 1] = {
+	[MACH_LOONGSON_UNKNOWN]	= {},
+	[MACH_LEMOTE_FL2E]	= PORT(4, 1843200),
+	[MACH_LEMOTE_FL2F]	= PORT(3, 1843200),
+	[MACH_LEMOTE_ML2F7]	= PORT_M(3, 3686400),
+	[MACH_LEMOTE_YL2F89]	= PORT_M(3, 3686400),
+	[MACH_DEXXON_GDIUM2F10]	= PORT_M(3, 3686400),
+	[MACH_LEMOTE_NAS]	= PORT_M(3, 3686400),
+	[MACH_LEMOTE_LL2F]	= PORT(3, 1843200),
+	[MACH_LOONGSON_END]	= {},
+};
+
+static struct platform_device uart8250_device = {
+	.name = "serial8250",
+	.id = PLAT8250_DEV_PLATFORM,
+};
+
+static int __init serial_init(void)
+{
+	unsigned char iotype;
+
+	iotype = uart8250_data[mips_machtype].iotype;
+
+	if (UPIO_MEM == iotype) {
+		uart8250_data[mips_machtype].mapbase =
+			loongson_uart_base;
+		uart8250_data[mips_machtype].membase =
+			(void __iomem *)_loongson_uart_base;
+	}
+	else if (UPIO_PORT == iotype)
+		uart8250_data[mips_machtype].iobase =
+			loongson_uart_base - LOONGSON_PCIIO_BASE;
+
+	memset(&uart8250_data[mips_machtype + 1], 0,
+			sizeof(struct plat_serial8250_port));
+	uart8250_device.dev.platform_data = &uart8250_data[mips_machtype];
+
+	return platform_device_register(&uart8250_device);
+}
+module_init(serial_init);
+
+static void __exit serial_exit(void)
+{
+	platform_device_unregister(&uart8250_device);
+}
+module_exit(serial_exit);
diff --git a/arch/mips/loongson2ef/common/setup.c b/arch/mips/loongson2ef/common/setup.c
new file mode 100644
index 000000000000..4fd27f4f90ed
--- /dev/null
+++ b/arch/mips/loongson2ef/common/setup.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/export.h>
+#include <linux/init.h>
+
+#include <asm/wbflush.h>
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+static void wbflush_loongson(void)
+{
+	asm(".set\tpush\n\t"
+	    ".set\tnoreorder\n\t"
+	    ".set mips3\n\t"
+	    "sync\n\t"
+	    "nop\n\t"
+	    ".set\tpop\n\t"
+	    ".set mips0\n\t");
+}
+
+void (*__wbflush)(void) = wbflush_loongson;
+EXPORT_SYMBOL(__wbflush);
+
+void __init plat_mem_setup(void)
+{
+}
diff --git a/arch/mips/loongson2ef/common/time.c b/arch/mips/loongson2ef/common/time.c
new file mode 100644
index 000000000000..585741af42a9
--- /dev/null
+++ b/arch/mips/loongson2ef/common/time.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <asm/mc146818-time.h>
+#include <asm/time.h>
+#include <asm/hpet.h>
+
+#include <loongson.h>
+#include <cs5536/cs5536_mfgpt.h>
+
+void __init plat_time_init(void)
+{
+	/* setup mips r4k timer */
+	mips_hpt_frequency = cpu_clock_freq / 2;
+
+	setup_mfgpt0_timer();
+}
+
+void read_persistent_clock64(struct timespec64 *ts)
+{
+	ts->tv_sec = mc146818_get_cmos_time();
+	ts->tv_nsec = 0;
+}
diff --git a/arch/mips/loongson2ef/common/uart_base.c b/arch/mips/loongson2ef/common/uart_base.c
new file mode 100644
index 000000000000..522bea6ad7b0
--- /dev/null
+++ b/arch/mips/loongson2ef/common/uart_base.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <linux/export.h>
+#include <asm/bootinfo.h>
+#include <asm/setup.h>
+
+#include <loongson.h>
+
+/* raw */
+unsigned long loongson_uart_base;
+/* ioremapped */
+unsigned long _loongson_uart_base;
+
+EXPORT_SYMBOL(loongson_uart_base);
+EXPORT_SYMBOL(_loongson_uart_base);
+
+void prom_init_loongson_uart_base(void)
+{
+	switch (mips_machtype) {
+	case MACH_LEMOTE_FL2E:
+		loongson_uart_base = LOONGSON_PCIIO_BASE + 0x3f8;
+		break;
+	case MACH_LEMOTE_FL2F:
+	case MACH_LEMOTE_LL2F:
+		loongson_uart_base = LOONGSON_PCIIO_BASE + 0x2f8;
+		break;
+	case MACH_LEMOTE_ML2F7:
+	case MACH_LEMOTE_YL2F89:
+	case MACH_DEXXON_GDIUM2F10:
+	case MACH_LEMOTE_NAS:
+	default:
+		/* The CPU provided serial port (LPC) */
+		loongson_uart_base = LOONGSON_LIO1_BASE + 0x3f8;
+		break;
+	}
+
+	_loongson_uart_base = TO_UNCAC(loongson_uart_base);
+	setup_8250_early_printk_port(_loongson_uart_base, 0, 1024);
+}
diff --git a/arch/mips/loongson64/fuloong-2e/Makefile b/arch/mips/loongson2ef/fuloong-2e/Makefile
index bb58edb3bea7..bb58edb3bea7 100644
--- a/arch/mips/loongson64/fuloong-2e/Makefile
+++ b/arch/mips/loongson2ef/fuloong-2e/Makefile
diff --git a/arch/mips/loongson64/fuloong-2e/dma.c b/arch/mips/loongson2ef/fuloong-2e/dma.c
index e122292bf666..e122292bf666 100644
--- a/arch/mips/loongson64/fuloong-2e/dma.c
+++ b/arch/mips/loongson2ef/fuloong-2e/dma.c
diff --git a/arch/mips/loongson64/fuloong-2e/irq.c b/arch/mips/loongson2ef/fuloong-2e/irq.c
index 32278e7bf85c..32278e7bf85c 100644
--- a/arch/mips/loongson64/fuloong-2e/irq.c
+++ b/arch/mips/loongson2ef/fuloong-2e/irq.c
diff --git a/arch/mips/loongson64/fuloong-2e/reset.c b/arch/mips/loongson2ef/fuloong-2e/reset.c
index 8273de1cf4bb..8273de1cf4bb 100644
--- a/arch/mips/loongson64/fuloong-2e/reset.c
+++ b/arch/mips/loongson2ef/fuloong-2e/reset.c
diff --git a/arch/mips/loongson64/lemote-2f/Makefile b/arch/mips/loongson2ef/lemote-2f/Makefile
index 881a0ec06d1f..881a0ec06d1f 100644
--- a/arch/mips/loongson64/lemote-2f/Makefile
+++ b/arch/mips/loongson2ef/lemote-2f/Makefile
diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson2ef/lemote-2f/clock.c
index 8281334df9c8..414f282c8ab5 100644
--- a/arch/mips/loongson64/lemote-2f/clock.c
+++ b/arch/mips/loongson2ef/lemote-2f/clock.c
@@ -15,7 +15,7 @@
 #include <linux/spinlock.h>
 
 #include <asm/clock.h>
-#include <asm/mach-loongson64/loongson.h>
+#include <asm/mach-loongson2ef/loongson.h>
 
 static LIST_HEAD(clock_list);
 static DEFINE_SPINLOCK(clock_lock);
@@ -118,9 +118,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate)
 
 	clk->rate = rate;
 
-	regval = LOONGSON_CHIPCFG(0);
+	regval = readl(LOONGSON_CHIPCFG);
 	regval = (regval & ~0x7) | (pos->driver_data - 1);
-	LOONGSON_CHIPCFG(0) = regval;
+	writel(regval, LOONGSON_CHIPCFG);
 
 	return ret;
 }
diff --git a/arch/mips/loongson64/lemote-2f/dma.c b/arch/mips/loongson2ef/lemote-2f/dma.c
index abf0e39d7e46..abf0e39d7e46 100644
--- a/arch/mips/loongson64/lemote-2f/dma.c
+++ b/arch/mips/loongson2ef/lemote-2f/dma.c
diff --git a/arch/mips/loongson64/lemote-2f/ec_kb3310b.c b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c
index d138220e96a2..d138220e96a2 100644
--- a/arch/mips/loongson64/lemote-2f/ec_kb3310b.c
+++ b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c
diff --git a/arch/mips/loongson64/lemote-2f/ec_kb3310b.h b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h
index aecdbc9c875a..aecdbc9c875a 100644
--- a/arch/mips/loongson64/lemote-2f/ec_kb3310b.h
+++ b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h
diff --git a/arch/mips/loongson64/lemote-2f/irq.c b/arch/mips/loongson2ef/lemote-2f/irq.c
index c58a044c6c07..c58a044c6c07 100644
--- a/arch/mips/loongson64/lemote-2f/irq.c
+++ b/arch/mips/loongson2ef/lemote-2f/irq.c
diff --git a/arch/mips/loongson64/lemote-2f/machtype.c b/arch/mips/loongson2ef/lemote-2f/machtype.c
index 9462a3ab57be..9462a3ab57be 100644
--- a/arch/mips/loongson64/lemote-2f/machtype.c
+++ b/arch/mips/loongson2ef/lemote-2f/machtype.c
diff --git a/arch/mips/loongson64/lemote-2f/pm.c b/arch/mips/loongson2ef/lemote-2f/pm.c
index 3d0027229e3c..3d0027229e3c 100644
--- a/arch/mips/loongson64/lemote-2f/pm.c
+++ b/arch/mips/loongson2ef/lemote-2f/pm.c
diff --git a/arch/mips/loongson2ef/lemote-2f/reset.c b/arch/mips/loongson2ef/lemote-2f/reset.c
new file mode 100644
index 000000000000..197dae4ffd23
--- /dev/null
+++ b/arch/mips/loongson2ef/lemote-2f/reset.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Board-specific reboot/shutdown routines
+ *
+ * Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca>
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/types.h>
+
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+#include <cs5536/cs5536.h>
+#include "ec_kb3310b.h"
+
+static void reset_cpu(void)
+{
+	/*
+	 * reset cpu to full speed, this is needed when enabling cpu frequency
+	 * scalling
+	 */
+	writel(readl(LOONGSON_CHIPCFG) | 0x7, LOONGSON_CHIPCFG);
+}
+
+/* reset support for fuloong2f */
+
+static void fl2f_reboot(void)
+{
+	reset_cpu();
+
+	/* send a reset signal to south bridge.
+	 *
+	 * NOTE: if enable "Power Management" in kernel, rtl8169 will not reset
+	 * normally with this reset operation and it will not work in PMON, but
+	 * you can type halt command and then reboot, seems the hardware reset
+	 * logic not work normally.
+	 */
+	{
+		u32 hi, lo;
+		_rdmsr(DIVIL_MSR_REG(DIVIL_SOFT_RESET), &hi, &lo);
+		lo |= 0x00000001;
+		_wrmsr(DIVIL_MSR_REG(DIVIL_SOFT_RESET), hi, lo);
+	}
+}
+
+static void fl2f_shutdown(void)
+{
+	u32 hi, lo, val;
+	int gpio_base;
+
+	/* get gpio base */
+	_rdmsr(DIVIL_MSR_REG(DIVIL_LBAR_GPIO), &hi, &lo);
+	gpio_base = lo & 0xff00;
+
+	/* make cs5536 gpio13 output enable */
+	val = inl(gpio_base + GPIOL_OUT_EN);
+	val &= ~(1 << (16 + 13));
+	val |= (1 << 13);
+	outl(val, gpio_base + GPIOL_OUT_EN);
+	mmiowb();
+	/* make cs5536 gpio13 output low level voltage. */
+	val = inl(gpio_base + GPIOL_OUT_VAL) & ~(1 << (13));
+	val |= (1 << (16 + 13));
+	outl(val, gpio_base + GPIOL_OUT_VAL);
+	mmiowb();
+}
+
+/* reset support for yeeloong2f and mengloong2f notebook */
+
+static void ml2f_reboot(void)
+{
+	reset_cpu();
+
+	/* sending an reset signal to EC(embedded controller) */
+	ec_write(REG_RESET, BIT_RESET_ON);
+}
+
+#define yl2f89_reboot ml2f_reboot
+
+/* menglong(7inches) laptop has different shutdown logic from 8.9inches */
+#define EC_SHUTDOWN_IO_PORT_HIGH 0xff2d
+#define EC_SHUTDOWN_IO_PORT_LOW	 0xff2e
+#define EC_SHUTDOWN_IO_PORT_DATA 0xff2f
+#define REG_SHUTDOWN_HIGH	 0xFC
+#define REG_SHUTDOWN_LOW	 0x29
+#define BIT_SHUTDOWN_ON		 (1 << 1)
+
+static void ml2f_shutdown(void)
+{
+	u8 val;
+	u64 i;
+
+	outb(REG_SHUTDOWN_HIGH, EC_SHUTDOWN_IO_PORT_HIGH);
+	outb(REG_SHUTDOWN_LOW, EC_SHUTDOWN_IO_PORT_LOW);
+	mmiowb();
+	val = inb(EC_SHUTDOWN_IO_PORT_DATA);
+	outb(val & (~BIT_SHUTDOWN_ON), EC_SHUTDOWN_IO_PORT_DATA);
+	mmiowb();
+	/* need enough wait here... how many microseconds needs? */
+	for (i = 0; i < 0x10000; i++)
+		delay();
+	outb(val | BIT_SHUTDOWN_ON, EC_SHUTDOWN_IO_PORT_DATA);
+	mmiowb();
+}
+
+static void yl2f89_shutdown(void)
+{
+	/* cpu-gpio0 output low */
+	LOONGSON_GPIODATA &= ~0x00000001;
+	/* cpu-gpio0 as output */
+	LOONGSON_GPIOIE &= ~0x00000001;
+}
+
+void mach_prepare_reboot(void)
+{
+	switch (mips_machtype) {
+	case MACH_LEMOTE_FL2F:
+	case MACH_LEMOTE_NAS:
+	case MACH_LEMOTE_LL2F:
+		fl2f_reboot();
+		break;
+	case MACH_LEMOTE_ML2F7:
+		ml2f_reboot();
+		break;
+	case MACH_LEMOTE_YL2F89:
+		yl2f89_reboot();
+		break;
+	default:
+		break;
+	}
+}
+
+void mach_prepare_shutdown(void)
+{
+	switch (mips_machtype) {
+	case MACH_LEMOTE_FL2F:
+	case MACH_LEMOTE_NAS:
+	case MACH_LEMOTE_LL2F:
+		fl2f_shutdown();
+		break;
+	case MACH_LEMOTE_ML2F7:
+		ml2f_shutdown();
+		break;
+	case MACH_LEMOTE_YL2F89:
+		yl2f89_shutdown();
+		break;
+	default:
+		break;
+	}
+}
diff --git a/arch/mips/loongson32/Kconfig b/arch/mips/loongson32/Kconfig
index 6dacc1438906..e27879b4813b 100644
--- a/arch/mips/loongson32/Kconfig
+++ b/arch/mips/loongson32/Kconfig
@@ -38,7 +38,7 @@ endchoice
 menuconfig CEVT_CSRC_LS1X
 	bool "Use PWM Timer for clockevent/clocksource"
 	select MIPS_EXTERNAL_TIMER
-	depends on CPU_LOONGSON1
+	depends on CPU_LOONGSON32
 	help
 	  This option changes the default clockevent/clocksource to PWM Timer,
 	  and is required by Loongson1 CPUFreq support.
diff --git a/arch/mips/loongson32/Platform b/arch/mips/loongson32/Platform
index 333215593092..7f8e342f1ef5 100644
--- a/arch/mips/loongson32/Platform
+++ b/arch/mips/loongson32/Platform
@@ -1,4 +1,4 @@
-cflags-$(CONFIG_CPU_LOONGSON1)		+= -march=mips32r2 -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON32)		+= -march=mips32r2 -Wa,--trap
 platform-$(CONFIG_MACH_LOONGSON32)	+= loongson32/
 cflags-$(CONFIG_MACH_LOONGSON32)	+= -I$(srctree)/arch/mips/include/asm/mach-loongson32
-load-$(CONFIG_CPU_LOONGSON1)		+= 0xffffffff80200000
+load-$(CONFIG_CPU_LOONGSON32)		+= 0xffffffff80200000
diff --git a/arch/mips/loongson32/common/prom.c b/arch/mips/loongson32/common/prom.c
index c4e043ee53ff..fd76114fa3b0 100644
--- a/arch/mips/loongson32/common/prom.c
+++ b/arch/mips/loongson32/common/prom.c
@@ -5,75 +5,42 @@
  * Modified from arch/mips/pnx833x/common/prom.c.
  */
 
+#include <linux/io.h>
+#include <linux/init.h>
 #include <linux/serial_reg.h>
 #include <asm/bootinfo.h>
+#include <asm/fw/fw.h>
 
 #include <loongson1.h>
-#include <prom.h>
 
-int prom_argc;
-char **prom_argv, **prom_envp;
-unsigned long memsize, highmemsize;
-
-char *prom_getenv(char *envname)
-{
-	char **env = prom_envp;
-	int i;
-
-	i = strlen(envname);
-
-	while (*env) {
-		if (strncmp(envname, *env, i) == 0 && *(*env + i) == '=')
-			return *env + i + 1;
-		env++;
-	}
-
-	return 0;
-}
-
-static inline unsigned long env_or_default(char *env, unsigned long dfl)
-{
-	char *str = prom_getenv(env);
-	return str ? simple_strtol(str, 0, 0) : dfl;
-}
-
-void __init prom_init_cmdline(void)
-{
-	char *c = &(arcs_cmdline[0]);
-	int i;
-
-	for (i = 1; i < prom_argc; i++) {
-		strcpy(c, prom_argv[i]);
-		c += strlen(prom_argv[i]);
-		if (i < prom_argc - 1)
-			*c++ = ' ';
-	}
-	*c = 0;
-}
+unsigned long memsize;
 
 void __init prom_init(void)
 {
 	void __iomem *uart_base;
-	prom_argc = fw_arg0;
-	prom_argv = (char **)fw_arg1;
-	prom_envp = (char **)fw_arg2;
 
-	prom_init_cmdline();
+	fw_init_cmdline();
 
-	memsize = env_or_default("memsize", DEFAULT_MEMSIZE);
-	highmemsize = env_or_default("highmemsize", 0x0);
+	memsize = fw_getenvl("memsize");
+	if(!memsize)
+		memsize = DEFAULT_MEMSIZE;
 
 	if (strstr(arcs_cmdline, "console=ttyS3"))
-		uart_base = ioremap_nocache(LS1X_UART3_BASE, 0x0f);
+		uart_base = ioremap(LS1X_UART3_BASE, 0x0f);
 	else if (strstr(arcs_cmdline, "console=ttyS2"))
-		uart_base = ioremap_nocache(LS1X_UART2_BASE, 0x0f);
+		uart_base = ioremap(LS1X_UART2_BASE, 0x0f);
 	else if (strstr(arcs_cmdline, "console=ttyS1"))
-		uart_base = ioremap_nocache(LS1X_UART1_BASE, 0x0f);
+		uart_base = ioremap(LS1X_UART1_BASE, 0x0f);
 	else
-		uart_base = ioremap_nocache(LS1X_UART0_BASE, 0x0f);
+		uart_base = ioremap(LS1X_UART0_BASE, 0x0f);
 	setup_8250_early_printk_port((unsigned long)uart_base, 0, 0);
 }
 
 void __init prom_free_prom_memory(void)
 {
 }
+
+void __init plat_mem_setup(void)
+{
+	add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
+}
diff --git a/arch/mips/loongson32/common/reset.c b/arch/mips/loongson32/common/reset.c
index 6c36a414dde7..0c7399b303fb 100644
--- a/arch/mips/loongson32/common/reset.c
+++ b/arch/mips/loongson32/common/reset.c
@@ -37,7 +37,7 @@ static void ls1x_power_off(void)
 
 static int __init ls1x_reboot_setup(void)
 {
-	wdt_reg_base = ioremap_nocache(LS1X_WDT_BASE, (SZ_4 + SZ_8));
+	wdt_reg_base = ioremap(LS1X_WDT_BASE, (SZ_4 + SZ_8));
 	if (!wdt_reg_base)
 		panic("Failed to remap watchdog registers");
 
diff --git a/arch/mips/loongson32/common/setup.c b/arch/mips/loongson32/common/setup.c
index 8b03e18fc4d8..4733fe037176 100644
--- a/arch/mips/loongson32/common/setup.c
+++ b/arch/mips/loongson32/common/setup.c
@@ -3,15 +3,12 @@
  * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
  */
 
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <asm/cpu-info.h>
 #include <asm/bootinfo.h>
 
-#include <prom.h>
-
-void __init plat_mem_setup(void)
-{
-	add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
-}
-
 const char *get_system_type(void)
 {
 	unsigned int processor_id = (&current_cpu_data)->processor_id;
diff --git a/arch/mips/loongson32/common/time.c b/arch/mips/loongson32/common/time.c
index f97662045c73..4cc73f7ac0d4 100644
--- a/arch/mips/loongson32/common/time.c
+++ b/arch/mips/loongson32/common/time.c
@@ -49,7 +49,7 @@ static inline void ls1x_pwmtimer_restart(void)
 
 void __init ls1x_pwmtimer_init(void)
 {
-	timer_reg_base = ioremap_nocache(LS1X_TIMER_BASE, SZ_16);
+	timer_reg_base = ioremap(LS1X_TIMER_BASE, SZ_16);
 	if (!timer_reg_base)
 		panic("Failed to remap timer registers");
 
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index 4c14a11525f4..48b29c198acf 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -1,119 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 if MACH_LOONGSON64
 
-choice
-	prompt "Machine Type"
-
-config LEMOTE_FULOONG2E
-	bool "Lemote Fuloong(2e) mini-PC"
-	select ARCH_SPARSEMEM_ENABLE
-	select ARCH_MIGHT_HAVE_PC_PARPORT
-	select ARCH_MIGHT_HAVE_PC_SERIO
-	select CEVT_R4K
-	select CSRC_R4K
-	select SYS_HAS_CPU_LOONGSON2E
-	select DMA_NONCOHERENT
-	select BOOT_ELF32
-	select BOARD_SCACHE
-	select HAVE_PCI
-	select I8259
-	select ISA
-	select IRQ_MIPS_CPU
-	select SYS_SUPPORTS_64BIT_KERNEL
-	select SYS_SUPPORTS_LITTLE_ENDIAN
-	select SYS_SUPPORTS_HIGHMEM
-	select SYS_HAS_EARLY_PRINTK
-	select GENERIC_ISA_DMA_SUPPORT_BROKEN
-	select CPU_HAS_WB
-	select LOONGSON_MC146818
-	help
-	  Lemote Fuloong(2e) mini-PC board based on the Chinese Loongson-2E CPU and
-	  an FPGA northbridge
-
-	  Lemote Fuloong(2e) mini PC have a VIA686B south bridge.
-
-config LEMOTE_MACH2F
-	bool "Lemote Loongson 2F family machines"
-	select ARCH_SPARSEMEM_ENABLE
-	select ARCH_MIGHT_HAVE_PC_PARPORT
-	select ARCH_MIGHT_HAVE_PC_SERIO
-	select BOARD_SCACHE
-	select BOOT_ELF32
-	select CEVT_R4K if ! MIPS_EXTERNAL_TIMER
-	select CPU_HAS_WB
-	select CS5536
-	select CSRC_R4K if ! MIPS_EXTERNAL_TIMER
-	select DMA_NONCOHERENT
-	select GENERIC_ISA_DMA_SUPPORT_BROKEN
-	select HAVE_CLK
-	select HAVE_PCI
-	select I8259
-	select IRQ_MIPS_CPU
-	select ISA
-	select SYS_HAS_CPU_LOONGSON2F
-	select SYS_HAS_EARLY_PRINTK
-	select SYS_SUPPORTS_64BIT_KERNEL
-	select SYS_SUPPORTS_HIGHMEM
-	select SYS_SUPPORTS_LITTLE_ENDIAN
-	select LOONGSON_MC146818
-	help
-	  Lemote Loongson 2F family machines utilize the 2F revision of
-	  Loongson processor and the AMD CS5536 south bridge.
-
-	  These family machines include fuloong2f mini PC, yeeloong2f notebook,
-	  LingLoong allinone PC and so forth.
-
-config LOONGSON_MACH3X
-	bool "Generic Loongson 3 family machines"
-	select ARCH_SPARSEMEM_ENABLE
-	select ARCH_MIGHT_HAVE_PC_PARPORT
-	select ARCH_MIGHT_HAVE_PC_SERIO
-	select GENERIC_ISA_DMA_SUPPORT_BROKEN
-	select BOOT_ELF32
-	select BOARD_SCACHE
-	select CSRC_R4K
-	select CEVT_R4K
-	select CPU_HAS_WB
-	select FORCE_PCI
-	select ISA
-	select I8259
-	select IRQ_MIPS_CPU
-	select NR_CPUS_DEFAULT_4
-	select SYS_HAS_CPU_LOONGSON3
-	select SYS_HAS_EARLY_PRINTK
-	select SYS_SUPPORTS_SMP
-	select SYS_SUPPORTS_HOTPLUG_CPU
-	select SYS_SUPPORTS_NUMA
-	select SYS_SUPPORTS_64BIT_KERNEL
-	select SYS_SUPPORTS_HIGHMEM
-	select SYS_SUPPORTS_LITTLE_ENDIAN
-	select LOONGSON_MC146818
-	select ZONE_DMA32
-	select LEFI_FIRMWARE_INTERFACE
-	help
-		Generic Loongson 3 family machines utilize the 3A/3B revision
-		of Loongson processor and RS780/SBX00 chipset.
-endchoice
-
-config CS5536
-	bool
-
-config CS5536_MFGPT
-	bool "CS5536 MFGPT Timer"
-	depends on CS5536 && !HIGH_RES_TIMERS
-	select MIPS_EXTERNAL_TIMER
-	help
-	  This option enables the mfgpt0 timer of AMD CS5536. With this timer
-	  switched on you can not use high resolution timers.
-
-	  If you want to enable the Loongson2 CPUFreq Driver, Please enable
-	  this option at first, otherwise, You will get wrong system time.
-
-	  If unsure, say Yes.
-
 config RS780_HPET
 	bool "RS780/SBX00 HPET Timer"
-	depends on LOONGSON_MACH3X
+	depends on MACH_LOONGSON64
 	select MIPS_EXTERNAL_TIMER
 	help
 	  This option enables the hpet timer of AMD RS780/SBX00.
@@ -123,16 +13,9 @@ config RS780_HPET
 
 	  If unsure, say Yes.
 
-config LOONGSON_UART_BASE
-	bool
-	default y
-	depends on EARLY_PRINTK || SERIAL_8250
 
 config LOONGSON_MC146818
 	bool
 	default n
 
-config LEFI_FIRMWARE_INTERFACE
-	bool
-
 endif # MACH_LOONGSON64
diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile
index 1a5df773707d..7821891bc5d0 100644
--- a/arch/mips/loongson64/Makefile
+++ b/arch/mips/loongson64/Makefile
@@ -1,24 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
-# Common code for all Loongson based systems
+# Makefile for Loongson-3 family machines
 #
+obj-$(CONFIG_MACH_LOONGSON64) += irq.o cop2-ex.o platform.o acpi_init.o dma.o \
+				setup.o init.o env.o time.o reset.o \
 
-obj-$(CONFIG_MACH_LOONGSON64) += common/
-
-#
-# Lemote Fuloong mini-PC (Loongson 2E-based)
-#
-
-obj-$(CONFIG_LEMOTE_FULOONG2E)	+= fuloong-2e/
-
-#
-# Lemote loongson2f family machines
-#
-
-obj-$(CONFIG_LEMOTE_MACH2F)  += lemote-2f/
-
-#
-# All Loongson-3 family machines
-#
-
-obj-$(CONFIG_CPU_LOONGSON3)  += loongson-3/
+obj-$(CONFIG_SMP)	+= smp.o
+obj-$(CONFIG_NUMA)	+= numa.o
+obj-$(CONFIG_RS780_HPET) += hpet.o
+obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
+obj-$(CONFIG_SUSPEND) += pm.o
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index 9f79908f5063..d5eb94c9edb4 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -2,32 +2,13 @@
 # Loongson Processors' Support
 #
 
-# Only gcc >= 4.4 have Loongson specific support
-cflags-$(CONFIG_CPU_LOONGSON2)	+= -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON2E) += \
-	$(call cc-option,-march=loongson2e,-march=r4600)
-cflags-$(CONFIG_CPU_LOONGSON2F) += \
-	$(call cc-option,-march=loongson2f,-march=r4600)
-# Enable the workarounds for Loongson2f
-ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
-  ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-nop,),)
-    $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-nop)
-  else
-    cflags-$(CONFIG_CPU_NOP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-nop
-  endif
-  ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-jump,),)
-    $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-jump)
-  else
-    cflags-$(CONFIG_CPU_JUMP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-jump
-  endif
-endif
 
-cflags-$(CONFIG_CPU_LOONGSON3)	+= -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON64)	+= -Wa,--trap
 
 #
 # Some versions of binutils, not currently mainline as of 2019/02/04, support
 # an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction
-# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a
+# to work around a CPU bug (see __SYNC_loongson3_war in asm/sync.h for a
 # description).
 #
 # We disable this in order to prevent the assembler meddling with the
@@ -44,7 +25,7 @@ cflags-$(CONFIG_CPU_LOONGSON3)	+= -Wa,--trap
 # binutils does not merge support for the flag then we can revisit & remove
 # this later - for now it ensures vendor toolchains don't cause problems.
 #
-cflags-$(CONFIG_CPU_LOONGSON3)	+= $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,)
+cflags-$(CONFIG_CPU_LOONGSON64)	+= $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,)
 
 #
 # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
@@ -55,14 +36,14 @@ cflags-$(CONFIG_CPU_LOONGSON3)	+= $(call as-option,-Wa$(comma)-mno-fix-loongson3
 #
 ifeq ($(call cc-ifversion, -ge, 0409, y), y)
   ifeq ($(call ld-ifversion, -ge, 225000000, y), y)
-    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+    cflags-$(CONFIG_CPU_LOONGSON64)  += \
       $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
   else
-    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+    cflags-$(CONFIG_CPU_LOONGSON64)  += \
       $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
   endif
 else
-    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+    cflags-$(CONFIG_CPU_LOONGSON64)  += \
       $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
 endif
 
@@ -76,6 +57,4 @@ cflags-y += $(call cc-option,-mno-loongson-mmi)
 
 platform-$(CONFIG_MACH_LOONGSON64) += loongson64/
 cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 -mno-branch-likely
-load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000
-load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000
-load-$(CONFIG_LOONGSON_MACH3X) += 0xffffffff80200000
+load-$(CONFIG_CPU_LOONGSON64) += 0xffffffff80200000
diff --git a/arch/mips/loongson64/loongson-3/acpi_init.c b/arch/mips/loongson64/acpi_init.c
index 8d7c119ddf91..8d7c119ddf91 100644
--- a/arch/mips/loongson64/loongson-3/acpi_init.c
+++ b/arch/mips/loongson64/acpi_init.c
diff --git a/arch/mips/loongson64/common/Makefile b/arch/mips/loongson64/common/Makefile
deleted file mode 100644
index 684624f61f5a..000000000000
--- a/arch/mips/loongson64/common/Makefile
+++ /dev/null
@@ -1,27 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for loongson based machines.
-#
-
-obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
-    bonito-irq.o mem.o machtype.o platform.o serial.o
-obj-$(CONFIG_PCI) += pci.o
-
-#
-# Serial port support
-#
-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
-obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
-
-#
-# Enable CS5536 Virtual Support Module(VSM) to virtulize the PCI configure
-# space
-#
-obj-$(CONFIG_CS5536) += cs5536/
-
-#
-# Suspend Support
-#
-
-obj-$(CONFIG_SUSPEND) += pm.o
diff --git a/arch/mips/loongson64/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c
deleted file mode 100644
index a735460682cf..000000000000
--- a/arch/mips/loongson64/common/cmdline.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Based on Ocelot Linux port, which is
- * Copyright 2001 MontaVista Software Inc.
- * Author: jsun@mvista.com or jsun@junsun.net
- *
- * Copyright 2003 ICT CAS
- * Author: Michael Guo <guoyi@ict.ac.cn>
- *
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-void __init prom_init_cmdline(void)
-{
-	int prom_argc;
-	/* pmon passes arguments in 32bit pointers */
-	int *_prom_argv;
-	int i;
-	long l;
-
-	/* firmware arguments are initialized in head.S */
-	prom_argc = fw_arg0;
-	_prom_argv = (int *)fw_arg1;
-
-	/* arg[0] is "g", the rest is boot parameters */
-	arcs_cmdline[0] = '\0';
-	for (i = 1; i < prom_argc; i++) {
-		l = (long)_prom_argv[i];
-		if (strlen(arcs_cmdline) + strlen(((char *)l) + 1)
-		    >= sizeof(arcs_cmdline))
-			break;
-		strcat(arcs_cmdline, ((char *)l));
-		strcat(arcs_cmdline, " ");
-	}
-
-	prom_init_machtype();
-}
diff --git a/arch/mips/loongson64/common/early_printk.c b/arch/mips/loongson64/common/early_printk.c
deleted file mode 100644
index 5e2a151aa30c..000000000000
--- a/arch/mips/loongson64/common/early_printk.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*  early printk support
- *
- *  Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca>
- *  Copyright (c) 2009 Lemote Inc.
- *  Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <linux/serial_reg.h>
-#include <asm/setup.h>
-
-#include <loongson.h>
-
-#define PORT(base, offset) (u8 *)(base + offset)
-
-static inline unsigned int serial_in(unsigned char *base, int offset)
-{
-	return readb(PORT(base, offset));
-}
-
-static inline void serial_out(unsigned char *base, int offset, int value)
-{
-	writeb(value, PORT(base, offset));
-}
-
-void prom_putchar(char c)
-{
-	int timeout;
-	unsigned char *uart_base;
-
-	uart_base = (unsigned char *)_loongson_uart_base[0];
-	timeout = 1024;
-
-	while (((serial_in(uart_base, UART_LSR) & UART_LSR_THRE) == 0) &&
-			(timeout-- > 0))
-		;
-
-	serial_out(uart_base, UART_TX, c);
-}
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c
deleted file mode 100644
index 09d5cf4676ca..000000000000
--- a/arch/mips/loongson64/common/env.c
+++ /dev/null
@@ -1,212 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Based on Ocelot Linux port, which is
- * Copyright 2001 MontaVista Software Inc.
- * Author: jsun@mvista.com or jsun@junsun.net
- *
- * Copyright 2003 ICT CAS
- * Author: Michael Guo <guoyi@ict.ac.cn>
- *
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <linux/export.h>
-#include <asm/bootinfo.h>
-#include <loongson.h>
-#include <boot_param.h>
-#include <workarounds.h>
-
-u32 cpu_clock_freq;
-EXPORT_SYMBOL(cpu_clock_freq);
-struct efi_memory_map_loongson *loongson_memmap;
-struct loongson_system_configuration loongson_sysconf;
-
-u64 loongson_chipcfg[MAX_PACKAGES] = {0xffffffffbfc00180};
-u64 loongson_chiptemp[MAX_PACKAGES];
-u64 loongson_freqctrl[MAX_PACKAGES];
-
-unsigned long long smp_group[4];
-
-#define parse_even_earlier(res, option, p)				\
-do {									\
-	unsigned int tmp __maybe_unused;				\
-									\
-	if (strncmp(option, (char *)p, strlen(option)) == 0)		\
-		tmp = kstrtou32((char *)p + strlen(option"="), 10, &res); \
-} while (0)
-
-void __init prom_init_env(void)
-{
-	/* pmon passes arguments in 32bit pointers */
-	unsigned int processor_id;
-
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
-	int *_prom_envp;
-	long l;
-
-	/* firmware arguments are initialized in head.S */
-	_prom_envp = (int *)fw_arg2;
-
-	l = (long)*_prom_envp;
-	while (l != 0) {
-		parse_even_earlier(cpu_clock_freq, "cpuclock", l);
-		parse_even_earlier(memsize, "memsize", l);
-		parse_even_earlier(highmemsize, "highmemsize", l);
-		_prom_envp++;
-		l = (long)*_prom_envp;
-	}
-	if (memsize == 0)
-		memsize = 256;
-
-	loongson_sysconf.nr_uarts = 1;
-
-	pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize);
-#else
-	struct boot_params *boot_p;
-	struct loongson_params *loongson_p;
-	struct system_loongson *esys;
-	struct efi_cpuinfo_loongson *ecpu;
-	struct irq_source_routing_table *eirq_source;
-
-	/* firmware arguments are initialized in head.S */
-	boot_p = (struct boot_params *)fw_arg2;
-	loongson_p = &(boot_p->efi.smbios.lp);
-
-	esys = (struct system_loongson *)
-		((u64)loongson_p + loongson_p->system_offset);
-	ecpu = (struct efi_cpuinfo_loongson *)
-		((u64)loongson_p + loongson_p->cpu_offset);
-	eirq_source = (struct irq_source_routing_table *)
-		((u64)loongson_p + loongson_p->irq_offset);
-	loongson_memmap = (struct efi_memory_map_loongson *)
-		((u64)loongson_p + loongson_p->memory_offset);
-
-	cpu_clock_freq = ecpu->cpu_clock_freq;
-	loongson_sysconf.cputype = ecpu->cputype;
-	switch (ecpu->cputype) {
-	case Legacy_3A:
-	case Loongson_3A:
-		loongson_sysconf.cores_per_node = 4;
-		loongson_sysconf.cores_per_package = 4;
-		smp_group[0] = 0x900000003ff01000;
-		smp_group[1] = 0x900010003ff01000;
-		smp_group[2] = 0x900020003ff01000;
-		smp_group[3] = 0x900030003ff01000;
-		loongson_chipcfg[0] = 0x900000001fe00180;
-		loongson_chipcfg[1] = 0x900010001fe00180;
-		loongson_chipcfg[2] = 0x900020001fe00180;
-		loongson_chipcfg[3] = 0x900030001fe00180;
-		loongson_chiptemp[0] = 0x900000001fe0019c;
-		loongson_chiptemp[1] = 0x900010001fe0019c;
-		loongson_chiptemp[2] = 0x900020001fe0019c;
-		loongson_chiptemp[3] = 0x900030001fe0019c;
-		loongson_freqctrl[0] = 0x900000001fe001d0;
-		loongson_freqctrl[1] = 0x900010001fe001d0;
-		loongson_freqctrl[2] = 0x900020001fe001d0;
-		loongson_freqctrl[3] = 0x900030001fe001d0;
-		loongson_sysconf.ht_control_base = 0x90000EFDFB000000;
-		loongson_sysconf.workarounds = WORKAROUND_CPUFREQ;
-		break;
-	case Legacy_3B:
-	case Loongson_3B:
-		loongson_sysconf.cores_per_node = 4; /* One chip has 2 nodes */
-		loongson_sysconf.cores_per_package = 8;
-		smp_group[0] = 0x900000003ff01000;
-		smp_group[1] = 0x900010003ff05000;
-		smp_group[2] = 0x900020003ff09000;
-		smp_group[3] = 0x900030003ff0d000;
-		loongson_chipcfg[0] = 0x900000001fe00180;
-		loongson_chipcfg[1] = 0x900020001fe00180;
-		loongson_chipcfg[2] = 0x900040001fe00180;
-		loongson_chipcfg[3] = 0x900060001fe00180;
-		loongson_chiptemp[0] = 0x900000001fe0019c;
-		loongson_chiptemp[1] = 0x900020001fe0019c;
-		loongson_chiptemp[2] = 0x900040001fe0019c;
-		loongson_chiptemp[3] = 0x900060001fe0019c;
-		loongson_freqctrl[0] = 0x900000001fe001d0;
-		loongson_freqctrl[1] = 0x900020001fe001d0;
-		loongson_freqctrl[2] = 0x900040001fe001d0;
-		loongson_freqctrl[3] = 0x900060001fe001d0;
-		loongson_sysconf.ht_control_base = 0x90001EFDFB000000;
-		loongson_sysconf.workarounds = WORKAROUND_CPUHOTPLUG;
-		break;
-	default:
-		loongson_sysconf.cores_per_node = 1;
-		loongson_sysconf.cores_per_package = 1;
-		loongson_chipcfg[0] = 0x900000001fe00180;
-	}
-
-	loongson_sysconf.nr_cpus = ecpu->nr_cpus;
-	loongson_sysconf.boot_cpu_id = ecpu->cpu_startup_core_id;
-	loongson_sysconf.reserved_cpus_mask = ecpu->reserved_cores_mask;
-	if (ecpu->nr_cpus > NR_CPUS || ecpu->nr_cpus == 0)
-		loongson_sysconf.nr_cpus = NR_CPUS;
-	loongson_sysconf.nr_nodes = (loongson_sysconf.nr_cpus +
-		loongson_sysconf.cores_per_node - 1) /
-		loongson_sysconf.cores_per_node;
-
-	loongson_sysconf.pci_mem_start_addr = eirq_source->pci_mem_start_addr;
-	loongson_sysconf.pci_mem_end_addr = eirq_source->pci_mem_end_addr;
-	loongson_sysconf.pci_io_base = eirq_source->pci_io_start_addr;
-	loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits;
-	if (loongson_sysconf.dma_mask_bits < 32 ||
-		loongson_sysconf.dma_mask_bits > 64)
-		loongson_sysconf.dma_mask_bits = 32;
-
-	loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm;
-	loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown;
-	loongson_sysconf.suspend_addr = boot_p->reset_system.DoSuspend;
-
-	loongson_sysconf.vgabios_addr = boot_p->efi.smbios.vga_bios;
-	pr_debug("Shutdown Addr: %llx, Restart Addr: %llx, VBIOS Addr: %llx\n",
-		loongson_sysconf.poweroff_addr, loongson_sysconf.restart_addr,
-		loongson_sysconf.vgabios_addr);
-
-	memset(loongson_sysconf.ecname, 0, 32);
-	if (esys->has_ec)
-		memcpy(loongson_sysconf.ecname, esys->ec_name, 32);
-	loongson_sysconf.workarounds |= esys->workarounds;
-
-	loongson_sysconf.nr_uarts = esys->nr_uarts;
-	if (esys->nr_uarts < 1 || esys->nr_uarts > MAX_UARTS)
-		loongson_sysconf.nr_uarts = 1;
-	memcpy(loongson_sysconf.uarts, esys->uarts,
-		sizeof(struct uart_device) * loongson_sysconf.nr_uarts);
-
-	loongson_sysconf.nr_sensors = esys->nr_sensors;
-	if (loongson_sysconf.nr_sensors > MAX_SENSORS)
-		loongson_sysconf.nr_sensors = 0;
-	if (loongson_sysconf.nr_sensors)
-		memcpy(loongson_sysconf.sensors, esys->sensors,
-			sizeof(struct sensor_device) * loongson_sysconf.nr_sensors);
-#endif
-	if (cpu_clock_freq == 0) {
-		processor_id = (&current_cpu_data)->processor_id;
-		switch (processor_id & PRID_REV_MASK) {
-		case PRID_REV_LOONGSON2E:
-			cpu_clock_freq = 533080000;
-			break;
-		case PRID_REV_LOONGSON2F:
-			cpu_clock_freq = 797000000;
-			break;
-		case PRID_REV_LOONGSON3A_R1:
-		case PRID_REV_LOONGSON3A_R2_0:
-		case PRID_REV_LOONGSON3A_R2_1:
-		case PRID_REV_LOONGSON3A_R3_0:
-		case PRID_REV_LOONGSON3A_R3_1:
-			cpu_clock_freq = 900000000;
-			break;
-		case PRID_REV_LOONGSON3B_R1:
-		case PRID_REV_LOONGSON3B_R2:
-			cpu_clock_freq = 1000000000;
-			break;
-		default:
-			cpu_clock_freq = 100000000;
-			break;
-		}
-	}
-	pr_info("CpuClock = %u\n", cpu_clock_freq);
-}
diff --git a/arch/mips/loongson64/common/init.c b/arch/mips/loongson64/common/init.c
deleted file mode 100644
index 912fe61c4fc7..000000000000
--- a/arch/mips/loongson64/common/init.c
+++ /dev/null
@@ -1,56 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <linux/memblock.h>
-#include <asm/bootinfo.h>
-#include <asm/traps.h>
-#include <asm/smp-ops.h>
-#include <asm/cacheflush.h>
-
-#include <loongson.h>
-
-/* Loongson CPU address windows config space base address */
-unsigned long __maybe_unused _loongson_addrwincfg_base;
-
-static void __init mips_nmi_setup(void)
-{
-	void *base;
-	extern char except_vec_nmi;
-
-	base = (void *)(CAC_BASE + 0x380);
-	memcpy(base, &except_vec_nmi, 0x80);
-	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
-}
-
-void __init prom_init(void)
-{
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-	_loongson_addrwincfg_base = (unsigned long)
-		ioremap(LOONGSON_ADDRWINCFG_BASE, LOONGSON_ADDRWINCFG_SIZE);
-#endif
-
-	prom_init_cmdline();
-	prom_init_env();
-
-	/* init base address of io space */
-	set_io_port_base((unsigned long)
-		ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE));
-
-#ifdef CONFIG_NUMA
-	prom_init_numa_memory();
-#else
-	prom_init_memory();
-#endif
-
-	/*init the uart base address */
-	prom_init_uart_base();
-	register_smp_ops(&loongson3_smp_ops);
-	board_nmi_handler_setup = mips_nmi_setup;
-}
-
-void __init prom_free_prom_memory(void)
-{
-}
diff --git a/arch/mips/loongson64/common/mem.c b/arch/mips/loongson64/common/mem.c
deleted file mode 100644
index 4254ac4ec616..000000000000
--- a/arch/mips/loongson64/common/mem.c
+++ /dev/null
@@ -1,157 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- */
-#include <linux/fs.h>
-#include <linux/fcntl.h>
-#include <linux/memblock.h>
-#include <linux/mm.h>
-
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-#include <boot_param.h>
-#include <mem.h>
-#include <pci.h>
-
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
-
-u32 memsize, highmemsize;
-
-void __init prom_init_memory(void)
-{
-	add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
-
-	add_memory_region(memsize << 20, LOONGSON_PCI_MEM_START - (memsize <<
-				20), BOOT_MEM_RESERVED);
-
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-	{
-		int bit;
-
-		bit = fls(memsize + highmemsize);
-		if (bit != ffs(memsize + highmemsize))
-			bit += 20;
-		else
-			bit = bit + 20 - 1;
-
-		/* set cpu window3 to map CPU to DDR: 2G -> 2G */
-		LOONGSON_ADDRWIN_CPUTODDR(ADDRWIN_WIN3, 0x80000000ul,
-					  0x80000000ul, (1 << bit));
-		mmiowb();
-	}
-#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
-
-#ifdef CONFIG_64BIT
-	if (highmemsize > 0)
-		add_memory_region(LOONGSON_HIGHMEM_START,
-				  highmemsize << 20, BOOT_MEM_RAM);
-
-	add_memory_region(LOONGSON_PCI_MEM_END + 1, LOONGSON_HIGHMEM_START -
-			  LOONGSON_PCI_MEM_END - 1, BOOT_MEM_RESERVED);
-
-#endif /* !CONFIG_64BIT */
-}
-
-#else /* CONFIG_LEFI_FIRMWARE_INTERFACE */
-
-void __init prom_init_memory(void)
-{
-	int i;
-	u32 node_id;
-	u32 mem_type;
-
-	/* parse memory information */
-	for (i = 0; i < loongson_memmap->nr_map; i++) {
-		node_id = loongson_memmap->map[i].node_id;
-		mem_type = loongson_memmap->map[i].mem_type;
-
-		if (node_id != 0)
-			continue;
-
-		switch (mem_type) {
-		case SYSTEM_RAM_LOW:
-			memblock_add(loongson_memmap->map[i].mem_start,
-				(u64)loongson_memmap->map[i].mem_size << 20);
-			break;
-		case SYSTEM_RAM_HIGH:
-			memblock_add(loongson_memmap->map[i].mem_start,
-				(u64)loongson_memmap->map[i].mem_size << 20);
-			break;
-		case SYSTEM_RAM_RESERVED:
-			memblock_reserve(loongson_memmap->map[i].mem_start,
-				(u64)loongson_memmap->map[i].mem_size << 20);
-			break;
-		}
-	}
-}
-
-#endif /* CONFIG_LEFI_FIRMWARE_INTERFACE */
-
-/* override of arch/mips/mm/cache.c: __uncached_access */
-int __uncached_access(struct file *file, unsigned long addr)
-{
-	if (file->f_flags & O_DSYNC)
-		return 1;
-
-	return addr >= __pa(high_memory) ||
-		((addr >= LOONGSON_MMIO_MEM_START) &&
-		 (addr < LOONGSON_MMIO_MEM_END));
-}
-
-#ifdef CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED
-
-#include <linux/pci.h>
-#include <linux/sched.h>
-#include <asm/current.h>
-
-static unsigned long uca_start, uca_end;
-
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-			      unsigned long size, pgprot_t vma_prot)
-{
-	unsigned long offset = pfn << PAGE_SHIFT;
-	unsigned long end = offset + size;
-
-	if (__uncached_access(file, offset)) {
-		if (uca_start && (offset >= uca_start) &&
-		    (end <= uca_end))
-			return __pgprot((pgprot_val(vma_prot) &
-					 ~_CACHE_MASK) |
-					_CACHE_UNCACHED_ACCELERATED);
-		else
-			return pgprot_noncached(vma_prot);
-	}
-	return vma_prot;
-}
-
-static int __init find_vga_mem_init(void)
-{
-	struct pci_dev *dev = 0;
-	struct resource *r;
-	int idx;
-
-	if (uca_start)
-		return 0;
-
-	for_each_pci_dev(dev) {
-		if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-			for (idx = 0; idx < PCI_NUM_RESOURCES; idx++) {
-				r = &dev->resource[idx];
-				if (!r->start && r->end)
-					continue;
-				if (r->flags & IORESOURCE_IO)
-					continue;
-				if (r->flags & IORESOURCE_MEM) {
-					uca_start = r->start;
-					uca_end = r->end;
-					return 0;
-				}
-			}
-		}
-	}
-
-	return 0;
-}
-
-late_initcall(find_vga_mem_init);
-#endif /* !CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED */
diff --git a/arch/mips/loongson64/common/pci.c b/arch/mips/loongson64/common/pci.c
deleted file mode 100644
index c47bb7bf3aa4..000000000000
--- a/arch/mips/loongson64/common/pci.c
+++ /dev/null
@@ -1,97 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- */
-#include <linux/pci.h>
-
-#include <pci.h>
-#include <loongson.h>
-#include <boot_param.h>
-
-static struct resource loongson_pci_mem_resource = {
-	.name	= "pci memory space",
-	.start	= LOONGSON_PCI_MEM_START,
-	.end	= LOONGSON_PCI_MEM_END,
-	.flags	= IORESOURCE_MEM,
-};
-
-static struct resource loongson_pci_io_resource = {
-	.name	= "pci io space",
-	.start	= LOONGSON_PCI_IO_START,
-	.end	= IO_SPACE_LIMIT,
-	.flags	= IORESOURCE_IO,
-};
-
-static struct pci_controller  loongson_pci_controller = {
-	.pci_ops	= &loongson_pci_ops,
-	.io_resource	= &loongson_pci_io_resource,
-	.mem_resource	= &loongson_pci_mem_resource,
-	.mem_offset	= 0x00000000UL,
-	.io_offset	= 0x00000000UL,
-};
-
-static void __init setup_pcimap(void)
-{
-	/*
-	 * local to PCI mapping for CPU accessing PCI space
-	 * CPU address space [256M,448M] is window for accessing pci space
-	 * we set pcimap_lo[0,1,2] to map it to pci space[0M,64M], [320M,448M]
-	 *
-	 * pcimap: PCI_MAP2  PCI_Mem_Lo2 PCI_Mem_Lo1 PCI_Mem_Lo0
-	 *	     [<2G]   [384M,448M] [320M,384M] [0M,64M]
-	 */
-	LOONGSON_PCIMAP = LOONGSON_PCIMAP_PCIMAP_2 |
-		LOONGSON_PCIMAP_WIN(2, LOONGSON_PCILO2_BASE) |
-		LOONGSON_PCIMAP_WIN(1, LOONGSON_PCILO1_BASE) |
-		LOONGSON_PCIMAP_WIN(0, 0);
-
-	/*
-	 * PCI-DMA to local mapping: [2G,2G+256M] -> [0M,256M]
-	 */
-	LOONGSON_PCIBASE0 = 0x80000000ul;   /* base: 2G -> mmap: 0M */
-	/* size: 256M, burst transmission, pre-fetch enable, 64bit */
-	LOONGSON_PCI_HIT0_SEL_L = 0xc000000cul;
-	LOONGSON_PCI_HIT0_SEL_H = 0xfffffffful;
-	LOONGSON_PCI_HIT1_SEL_L = 0x00000006ul; /* set this BAR as invalid */
-	LOONGSON_PCI_HIT1_SEL_H = 0x00000000ul;
-	LOONGSON_PCI_HIT2_SEL_L = 0x00000006ul; /* set this BAR as invalid */
-	LOONGSON_PCI_HIT2_SEL_H = 0x00000000ul;
-
-	/* avoid deadlock of PCI reading/writing lock operation */
-	LOONGSON_PCI_ISR4C = 0xd2000001ul;
-
-	/* can not change gnt to break pci transfer when device's gnt not
-	deassert for some broken device */
-	LOONGSON_PXARB_CFG = 0x00fe0105ul;
-
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-	/*
-	 * set cpu addr window2 to map CPU address space to PCI address space
-	 */
-	LOONGSON_ADDRWIN_CPUTOPCI(ADDRWIN_WIN2, LOONGSON_CPU_MEM_SRC,
-		LOONGSON_PCI_MEM_DST, MMAP_CPUTOPCI_SIZE);
-#endif
-}
-
-extern int sbx00_acpi_init(void);
-
-static int __init pcibios_init(void)
-{
-	setup_pcimap();
-
-	loongson_pci_controller.io_map_base = mips_io_port_base;
-#ifdef CONFIG_LEFI_FIRMWARE_INTERFACE
-	loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr;
-	loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr;
-#endif
-	register_pci_controller(&loongson_pci_controller);
-
-#ifdef CONFIG_CPU_LOONGSON3
-	sbx00_acpi_init();
-#endif
-
-	return 0;
-}
-
-arch_initcall(pcibios_init);
diff --git a/arch/mips/loongson64/common/pm.c b/arch/mips/loongson64/common/pm.c
deleted file mode 100644
index b8aed878d912..000000000000
--- a/arch/mips/loongson64/common/pm.c
+++ /dev/null
@@ -1,157 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * loongson-specific suspend support
- *
- *  Copyright (C) 2009 Lemote Inc.
- *  Author: Wu Zhangjin <wuzhangjin@gmail.com>
- */
-#include <linux/suspend.h>
-#include <linux/interrupt.h>
-#include <linux/pm.h>
-
-#include <asm/i8259.h>
-#include <asm/mipsregs.h>
-
-#include <loongson.h>
-
-static unsigned int __maybe_unused cached_master_mask;	/* i8259A */
-static unsigned int __maybe_unused cached_slave_mask;
-static unsigned int __maybe_unused cached_bonito_irq_mask; /* bonito */
-
-void arch_suspend_disable_irqs(void)
-{
-	/* disable all mips events */
-	local_irq_disable();
-
-#ifdef CONFIG_I8259
-	/* disable all events of i8259A */
-	cached_slave_mask = inb(PIC_SLAVE_IMR);
-	cached_master_mask = inb(PIC_MASTER_IMR);
-
-	outb(0xff, PIC_SLAVE_IMR);
-	inb(PIC_SLAVE_IMR);
-	outb(0xff, PIC_MASTER_IMR);
-	inb(PIC_MASTER_IMR);
-#endif
-	/* disable all events of bonito */
-	cached_bonito_irq_mask = LOONGSON_INTEN;
-	LOONGSON_INTENCLR = 0xffff;
-	(void)LOONGSON_INTENCLR;
-}
-
-void arch_suspend_enable_irqs(void)
-{
-	/* enable all mips events */
-	local_irq_enable();
-#ifdef CONFIG_I8259
-	/* only enable the cached events of i8259A */
-	outb(cached_slave_mask, PIC_SLAVE_IMR);
-	outb(cached_master_mask, PIC_MASTER_IMR);
-#endif
-	/* enable all cached events of bonito */
-	LOONGSON_INTENSET = cached_bonito_irq_mask;
-	(void)LOONGSON_INTENSET;
-}
-
-/*
- * Setup the board-specific events for waking up loongson from wait mode
- */
-void __weak setup_wakeup_events(void)
-{
-}
-
-/*
- * Check wakeup events
- */
-int __weak wakeup_loongson(void)
-{
-	return 1;
-}
-
-/*
- * If the events are really what we want to wakeup the CPU, wake it up
- * otherwise put the CPU asleep again.
- */
-static void wait_for_wakeup_events(void)
-{
-	while (!wakeup_loongson())
-		LOONGSON_CHIPCFG(0) &= ~0x7;
-}
-
-/*
- * Stop all perf counters
- *
- * $24 is the control register of Loongson perf counter
- */
-static inline void stop_perf_counters(void)
-{
-	__write_64bit_c0_register($24, 0, 0);
-}
-
-
-static void loongson_suspend_enter(void)
-{
-	static unsigned int cached_cpu_freq;
-
-	/* setup wakeup events via enabling the IRQs */
-	setup_wakeup_events();
-
-	stop_perf_counters();
-
-	cached_cpu_freq = LOONGSON_CHIPCFG(0);
-
-	/* Put CPU into wait mode */
-	LOONGSON_CHIPCFG(0) &= ~0x7;
-
-	/* wait for the given events to wakeup cpu from wait mode */
-	wait_for_wakeup_events();
-
-	LOONGSON_CHIPCFG(0) = cached_cpu_freq;
-	mmiowb();
-}
-
-void __weak mach_suspend(void)
-{
-}
-
-void __weak mach_resume(void)
-{
-}
-
-static int loongson_pm_enter(suspend_state_t state)
-{
-	mach_suspend();
-
-	/* processor specific suspend */
-	loongson_suspend_enter();
-
-	mach_resume();
-
-	return 0;
-}
-
-static int loongson_pm_valid_state(suspend_state_t state)
-{
-	switch (state) {
-	case PM_SUSPEND_ON:
-	case PM_SUSPEND_STANDBY:
-	case PM_SUSPEND_MEM:
-		return 1;
-
-	default:
-		return 0;
-	}
-}
-
-static const struct platform_suspend_ops loongson_pm_ops = {
-	.valid	= loongson_pm_valid_state,
-	.enter	= loongson_pm_enter,
-};
-
-static int __init loongson_pm_init(void)
-{
-	suspend_set_ops(&loongson_pm_ops);
-
-	return 0;
-}
-arch_initcall(loongson_pm_init);
diff --git a/arch/mips/loongson64/common/reset.c b/arch/mips/loongson64/common/reset.c
deleted file mode 100644
index ce39e918e4d5..000000000000
--- a/arch/mips/loongson64/common/reset.c
+++ /dev/null
@@ -1,94 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Zhangjin Wu, wuzhangjin@gmail.com
- */
-#include <linux/init.h>
-#include <linux/pm.h>
-
-#include <asm/idle.h>
-#include <asm/reboot.h>
-
-#include <loongson.h>
-#include <boot_param.h>
-
-static inline void loongson_reboot(void)
-{
-#ifndef CONFIG_CPU_JUMP_WORKAROUNDS
-	((void (*)(void))ioremap_nocache(LOONGSON_BOOT_BASE, 4)) ();
-#else
-	void (*func)(void);
-
-	func = (void *)ioremap_nocache(LOONGSON_BOOT_BASE, 4);
-
-	__asm__ __volatile__(
-	"	.set	noat						\n"
-	"	jr	%[func]						\n"
-	"	.set	at						\n"
-	: /* No outputs */
-	: [func] "r" (func));
-#endif
-}
-
-static void loongson_restart(char *command)
-{
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
-	/* do preparation for reboot */
-	mach_prepare_reboot();
-
-	/* reboot via jumping to boot base address */
-	loongson_reboot();
-#else
-	void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr;
-
-	fw_restart();
-	while (1) {
-		if (cpu_wait)
-			cpu_wait();
-	}
-#endif
-}
-
-static void loongson_poweroff(void)
-{
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
-	mach_prepare_shutdown();
-
-	/*
-	 * It needs a wait loop here, but mips/kernel/reset.c already calls
-	 * a generic delay loop, machine_hang(), so simply return.
-	 */
-	return;
-#else
-	void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr;
-
-	fw_poweroff();
-	while (1) {
-		if (cpu_wait)
-			cpu_wait();
-	}
-#endif
-}
-
-static void loongson_halt(void)
-{
-	pr_notice("\n\n** You can safely turn off the power now **\n\n");
-	while (1) {
-		if (cpu_wait)
-			cpu_wait();
-	}
-}
-
-static int __init mips_reboot_setup(void)
-{
-	_machine_restart = loongson_restart;
-	_machine_halt = loongson_halt;
-	pm_power_off = loongson_poweroff;
-
-	return 0;
-}
-
-arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/loongson64/common/serial.c b/arch/mips/loongson64/common/serial.c
deleted file mode 100644
index 98c3a7feb10f..000000000000
--- a/arch/mips/loongson64/common/serial.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
- *
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Yan hua (yanhua@lemote.com)
- * Author: Wu Zhangjin (wuzhangjin@gmail.com)
- */
-
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/serial_8250.h>
-
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-#include <machine.h>
-
-#define PORT(int, clk)			\
-{								\
-	.irq		= int,					\
-	.uartclk	= clk,					\
-	.iotype		= UPIO_PORT,				\
-	.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,	\
-	.regshift	= 0,					\
-}
-
-#define PORT_M(int, clk)				\
-{								\
-	.irq		= MIPS_CPU_IRQ_BASE + (int),		\
-	.uartclk	= clk,					\
-	.iotype		= UPIO_MEM,				\
-	.membase	= (void __iomem *)NULL,			\
-	.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,	\
-	.regshift	= 0,					\
-}
-
-static struct plat_serial8250_port uart8250_data[][MAX_UARTS + 1] = {
-	[MACH_LOONGSON_UNKNOWN]	= {},
-	[MACH_LEMOTE_FL2E]	= {PORT(4, 1843200), {} },
-	[MACH_LEMOTE_FL2F]	= {PORT(3, 1843200), {} },
-	[MACH_LEMOTE_ML2F7]	= {PORT_M(3, 3686400), {} },
-	[MACH_LEMOTE_YL2F89]	= {PORT_M(3, 3686400), {} },
-	[MACH_DEXXON_GDIUM2F10]	= {PORT_M(3, 3686400), {} },
-	[MACH_LEMOTE_NAS]	= {PORT_M(3, 3686400), {} },
-	[MACH_LEMOTE_LL2F]	= {PORT(3, 1843200), {} },
-	[MACH_LOONGSON_GENERIC]	= {PORT_M(2, 25000000), {} },
-	[MACH_LOONGSON_END]	= {},
-};
-
-static struct platform_device uart8250_device = {
-	.name = "serial8250",
-	.id = PLAT8250_DEV_PLATFORM,
-};
-
-static int __init serial_init(void)
-{
-	int i;
-	unsigned char iotype;
-
-	iotype = uart8250_data[mips_machtype][0].iotype;
-
-	if (UPIO_MEM == iotype) {
-		uart8250_data[mips_machtype][0].mapbase =
-			loongson_uart_base[0];
-		uart8250_data[mips_machtype][0].membase =
-			(void __iomem *)_loongson_uart_base[0];
-	}
-	else if (UPIO_PORT == iotype)
-		uart8250_data[mips_machtype][0].iobase =
-			loongson_uart_base[0] - LOONGSON_PCIIO_BASE;
-
-	if (loongson_sysconf.uarts[0].uartclk)
-		uart8250_data[mips_machtype][0].uartclk =
-			loongson_sysconf.uarts[0].uartclk;
-
-	for (i = 1; i < loongson_sysconf.nr_uarts; i++) {
-		iotype = loongson_sysconf.uarts[i].iotype;
-		uart8250_data[mips_machtype][i].iotype = iotype;
-		loongson_uart_base[i] = loongson_sysconf.uarts[i].uart_base;
-
-		if (UPIO_MEM == iotype) {
-			uart8250_data[mips_machtype][i].irq =
-				MIPS_CPU_IRQ_BASE + loongson_sysconf.uarts[i].int_offset;
-			uart8250_data[mips_machtype][i].mapbase =
-				loongson_uart_base[i];
-			uart8250_data[mips_machtype][i].membase =
-				ioremap_nocache(loongson_uart_base[i], 8);
-		} else if (UPIO_PORT == iotype) {
-			uart8250_data[mips_machtype][i].irq =
-				loongson_sysconf.uarts[i].int_offset;
-			uart8250_data[mips_machtype][i].iobase =
-				loongson_uart_base[i] - LOONGSON_PCIIO_BASE;
-		}
-
-		uart8250_data[mips_machtype][i].uartclk =
-			loongson_sysconf.uarts[i].uartclk;
-		uart8250_data[mips_machtype][i].flags =
-			UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
-	}
-
-	memset(&uart8250_data[mips_machtype][loongson_sysconf.nr_uarts],
-			0, sizeof(struct plat_serial8250_port));
-	uart8250_device.dev.platform_data = uart8250_data[mips_machtype];
-
-	return platform_device_register(&uart8250_device);
-}
-module_init(serial_init);
-
-static void __exit serial_exit(void)
-{
-	platform_device_unregister(&uart8250_device);
-}
-module_exit(serial_exit);
diff --git a/arch/mips/loongson64/common/setup.c b/arch/mips/loongson64/common/setup.c
deleted file mode 100644
index bc2da4c140c4..000000000000
--- a/arch/mips/loongson64/common/setup.c
+++ /dev/null
@@ -1,51 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- */
-#include <linux/export.h>
-#include <linux/init.h>
-
-#include <asm/wbflush.h>
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-#ifdef CONFIG_VT
-#include <linux/console.h>
-#include <linux/screen_info.h>
-#endif
-
-static void wbflush_loongson(void)
-{
-	asm(".set\tpush\n\t"
-	    ".set\tnoreorder\n\t"
-	    ".set mips3\n\t"
-	    "sync\n\t"
-	    "nop\n\t"
-	    ".set\tpop\n\t"
-	    ".set mips0\n\t");
-}
-
-void (*__wbflush)(void) = wbflush_loongson;
-EXPORT_SYMBOL(__wbflush);
-
-void __init plat_mem_setup(void)
-{
-#ifdef CONFIG_VT
-#if defined(CONFIG_VGA_CONSOLE)
-	conswitchp = &vga_con;
-
-	screen_info = (struct screen_info) {
-		.orig_x			= 0,
-		.orig_y			= 25,
-		.orig_video_cols	= 80,
-		.orig_video_lines	= 25,
-		.orig_video_isVGA	= VIDEO_TYPE_VGAC,
-		.orig_video_points	= 16,
-	};
-#elif defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
-#endif
-#endif
-}
diff --git a/arch/mips/loongson64/common/time.c b/arch/mips/loongson64/common/time.c
deleted file mode 100644
index e78760ce475b..000000000000
--- a/arch/mips/loongson64/common/time.c
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <asm/mc146818-time.h>
-#include <asm/time.h>
-#include <asm/hpet.h>
-
-#include <loongson.h>
-#include <cs5536/cs5536_mfgpt.h>
-
-void __init plat_time_init(void)
-{
-	/* setup mips r4k timer */
-	mips_hpt_frequency = cpu_clock_freq / 2;
-
-#ifdef CONFIG_RS780_HPET
-	setup_hpet_timer();
-#else
-	setup_mfgpt0_timer();
-#endif
-}
-
-void read_persistent_clock64(struct timespec64 *ts)
-{
-	ts->tv_sec = mc146818_get_cmos_time();
-	ts->tv_nsec = 0;
-}
diff --git a/arch/mips/loongson64/common/uart_base.c b/arch/mips/loongson64/common/uart_base.c
deleted file mode 100644
index e88d937f10fe..000000000000
--- a/arch/mips/loongson64/common/uart_base.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <linux/export.h>
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-/* raw */
-unsigned long loongson_uart_base[MAX_UARTS] = {};
-/* ioremapped */
-unsigned long _loongson_uart_base[MAX_UARTS] = {};
-
-EXPORT_SYMBOL(loongson_uart_base);
-EXPORT_SYMBOL(_loongson_uart_base);
-
-void prom_init_loongson_uart_base(void)
-{
-	switch (mips_machtype) {
-	case MACH_LOONGSON_GENERIC:
-		/* The CPU provided serial port (CPU) */
-		loongson_uart_base[0] = LOONGSON_REG_BASE + 0x1e0;
-		break;
-	case MACH_LEMOTE_FL2E:
-		loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x3f8;
-		break;
-	case MACH_LEMOTE_FL2F:
-	case MACH_LEMOTE_LL2F:
-		loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x2f8;
-		break;
-	case MACH_LEMOTE_ML2F7:
-	case MACH_LEMOTE_YL2F89:
-	case MACH_DEXXON_GDIUM2F10:
-	case MACH_LEMOTE_NAS:
-	default:
-		/* The CPU provided serial port (LPC) */
-		loongson_uart_base[0] = LOONGSON_LIO1_BASE + 0x3f8;
-		break;
-	}
-
-	_loongson_uart_base[0] =
-		(unsigned long)ioremap_nocache(loongson_uart_base[0], 8);
-}
diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/cop2-ex.c
index 9efdfe430ff0..9efdfe430ff0 100644
--- a/arch/mips/loongson64/loongson-3/cop2-ex.c
+++ b/arch/mips/loongson64/cop2-ex.c
diff --git a/arch/mips/loongson64/loongson-3/dma.c b/arch/mips/loongson64/dma.c
index 5e86635f71db..5e86635f71db 100644
--- a/arch/mips/loongson64/loongson-3/dma.c
+++ b/arch/mips/loongson64/dma.c
diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c
new file mode 100644
index 000000000000..0daeb7bcf023
--- /dev/null
+++ b/arch/mips/loongson64/env.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Based on Ocelot Linux port, which is
+ * Copyright 2001 MontaVista Software Inc.
+ * Author: jsun@mvista.com or jsun@junsun.net
+ *
+ * Copyright 2003 ICT CAS
+ * Author: Michael Guo <guoyi@ict.ac.cn>
+ *
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <linux/export.h>
+#include <asm/bootinfo.h>
+#include <loongson.h>
+#include <boot_param.h>
+#include <workarounds.h>
+
+u32 cpu_clock_freq;
+EXPORT_SYMBOL(cpu_clock_freq);
+struct efi_memory_map_loongson *loongson_memmap;
+struct loongson_system_configuration loongson_sysconf;
+
+u64 loongson_chipcfg[MAX_PACKAGES] = {0xffffffffbfc00180};
+u64 loongson_chiptemp[MAX_PACKAGES];
+u64 loongson_freqctrl[MAX_PACKAGES];
+
+unsigned long long smp_group[4];
+
+const char *get_system_type(void)
+{
+	return "Generic Loongson64 System";
+}
+
+void __init prom_init_env(void)
+{
+	struct boot_params *boot_p;
+	struct loongson_params *loongson_p;
+	struct system_loongson *esys;
+	struct efi_cpuinfo_loongson *ecpu;
+	struct irq_source_routing_table *eirq_source;
+
+	/* firmware arguments are initialized in head.S */
+	boot_p = (struct boot_params *)fw_arg2;
+	loongson_p = &(boot_p->efi.smbios.lp);
+
+	esys = (struct system_loongson *)
+		((u64)loongson_p + loongson_p->system_offset);
+	ecpu = (struct efi_cpuinfo_loongson *)
+		((u64)loongson_p + loongson_p->cpu_offset);
+	eirq_source = (struct irq_source_routing_table *)
+		((u64)loongson_p + loongson_p->irq_offset);
+	loongson_memmap = (struct efi_memory_map_loongson *)
+		((u64)loongson_p + loongson_p->memory_offset);
+
+	cpu_clock_freq = ecpu->cpu_clock_freq;
+	loongson_sysconf.cputype = ecpu->cputype;
+	switch (ecpu->cputype) {
+	case Legacy_3A:
+	case Loongson_3A:
+		loongson_sysconf.cores_per_node = 4;
+		loongson_sysconf.cores_per_package = 4;
+		smp_group[0] = 0x900000003ff01000;
+		smp_group[1] = 0x900010003ff01000;
+		smp_group[2] = 0x900020003ff01000;
+		smp_group[3] = 0x900030003ff01000;
+		loongson_chipcfg[0] = 0x900000001fe00180;
+		loongson_chipcfg[1] = 0x900010001fe00180;
+		loongson_chipcfg[2] = 0x900020001fe00180;
+		loongson_chipcfg[3] = 0x900030001fe00180;
+		loongson_chiptemp[0] = 0x900000001fe0019c;
+		loongson_chiptemp[1] = 0x900010001fe0019c;
+		loongson_chiptemp[2] = 0x900020001fe0019c;
+		loongson_chiptemp[3] = 0x900030001fe0019c;
+		loongson_freqctrl[0] = 0x900000001fe001d0;
+		loongson_freqctrl[1] = 0x900010001fe001d0;
+		loongson_freqctrl[2] = 0x900020001fe001d0;
+		loongson_freqctrl[3] = 0x900030001fe001d0;
+		loongson_sysconf.ht_control_base = 0x90000EFDFB000000;
+		loongson_sysconf.workarounds = WORKAROUND_CPUFREQ;
+		break;
+	case Legacy_3B:
+	case Loongson_3B:
+		loongson_sysconf.cores_per_node = 4; /* One chip has 2 nodes */
+		loongson_sysconf.cores_per_package = 8;
+		smp_group[0] = 0x900000003ff01000;
+		smp_group[1] = 0x900010003ff05000;
+		smp_group[2] = 0x900020003ff09000;
+		smp_group[3] = 0x900030003ff0d000;
+		loongson_chipcfg[0] = 0x900000001fe00180;
+		loongson_chipcfg[1] = 0x900020001fe00180;
+		loongson_chipcfg[2] = 0x900040001fe00180;
+		loongson_chipcfg[3] = 0x900060001fe00180;
+		loongson_chiptemp[0] = 0x900000001fe0019c;
+		loongson_chiptemp[1] = 0x900020001fe0019c;
+		loongson_chiptemp[2] = 0x900040001fe0019c;
+		loongson_chiptemp[3] = 0x900060001fe0019c;
+		loongson_freqctrl[0] = 0x900000001fe001d0;
+		loongson_freqctrl[1] = 0x900020001fe001d0;
+		loongson_freqctrl[2] = 0x900040001fe001d0;
+		loongson_freqctrl[3] = 0x900060001fe001d0;
+		loongson_sysconf.ht_control_base = 0x90001EFDFB000000;
+		loongson_sysconf.workarounds = WORKAROUND_CPUHOTPLUG;
+		break;
+	default:
+		loongson_sysconf.cores_per_node = 1;
+		loongson_sysconf.cores_per_package = 1;
+		loongson_chipcfg[0] = 0x900000001fe00180;
+	}
+
+	loongson_sysconf.nr_cpus = ecpu->nr_cpus;
+	loongson_sysconf.boot_cpu_id = ecpu->cpu_startup_core_id;
+	loongson_sysconf.reserved_cpus_mask = ecpu->reserved_cores_mask;
+	if (ecpu->nr_cpus > NR_CPUS || ecpu->nr_cpus == 0)
+		loongson_sysconf.nr_cpus = NR_CPUS;
+	loongson_sysconf.nr_nodes = (loongson_sysconf.nr_cpus +
+		loongson_sysconf.cores_per_node - 1) /
+		loongson_sysconf.cores_per_node;
+
+	loongson_sysconf.pci_mem_start_addr = eirq_source->pci_mem_start_addr;
+	loongson_sysconf.pci_mem_end_addr = eirq_source->pci_mem_end_addr;
+	loongson_sysconf.pci_io_base = eirq_source->pci_io_start_addr;
+	loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits;
+	if (loongson_sysconf.dma_mask_bits < 32 ||
+		loongson_sysconf.dma_mask_bits > 64)
+		loongson_sysconf.dma_mask_bits = 32;
+
+	loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm;
+	loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown;
+	loongson_sysconf.suspend_addr = boot_p->reset_system.DoSuspend;
+
+	loongson_sysconf.vgabios_addr = boot_p->efi.smbios.vga_bios;
+	pr_debug("Shutdown Addr: %llx, Restart Addr: %llx, VBIOS Addr: %llx\n",
+		loongson_sysconf.poweroff_addr, loongson_sysconf.restart_addr,
+		loongson_sysconf.vgabios_addr);
+
+	memset(loongson_sysconf.ecname, 0, 32);
+	if (esys->has_ec)
+		memcpy(loongson_sysconf.ecname, esys->ec_name, 32);
+	loongson_sysconf.workarounds |= esys->workarounds;
+
+	loongson_sysconf.nr_uarts = esys->nr_uarts;
+	if (esys->nr_uarts < 1 || esys->nr_uarts > MAX_UARTS)
+		loongson_sysconf.nr_uarts = 1;
+	memcpy(loongson_sysconf.uarts, esys->uarts,
+		sizeof(struct uart_device) * loongson_sysconf.nr_uarts);
+
+	loongson_sysconf.nr_sensors = esys->nr_sensors;
+	if (loongson_sysconf.nr_sensors > MAX_SENSORS)
+		loongson_sysconf.nr_sensors = 0;
+	if (loongson_sysconf.nr_sensors)
+		memcpy(loongson_sysconf.sensors, esys->sensors,
+			sizeof(struct sensor_device) * loongson_sysconf.nr_sensors);
+	pr_info("CpuClock = %u\n", cpu_clock_freq);
+}
diff --git a/arch/mips/loongson64/loongson-3/hpet.c b/arch/mips/loongson64/hpet.c
index ed15430ad64f..ed15430ad64f 100644
--- a/arch/mips/loongson64/loongson-3/hpet.c
+++ b/arch/mips/loongson64/hpet.c
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
new file mode 100644
index 000000000000..5ac1a0f35ca4
--- /dev/null
+++ b/arch/mips/loongson64/init.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <linux/memblock.h>
+#include <asm/bootinfo.h>
+#include <asm/traps.h>
+#include <asm/smp-ops.h>
+#include <asm/cacheflush.h>
+#include <asm/fw/fw.h>
+
+#include <loongson.h>
+
+static void __init mips_nmi_setup(void)
+{
+	void *base;
+	extern char except_vec_nmi;
+
+	base = (void *)(CAC_BASE + 0x380);
+	memcpy(base, &except_vec_nmi, 0x80);
+	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
+}
+
+void __init prom_init(void)
+{
+	fw_init_cmdline();
+	prom_init_env();
+
+	/* init base address of io space */
+	set_io_port_base((unsigned long)
+		ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE));
+
+	prom_init_numa_memory();
+
+	/* Hardcode to CPU UART 0 */
+	setup_8250_early_printk_port(TO_UNCAC(LOONGSON_REG_BASE + 0x1e0), 0, 1024);
+
+	register_smp_ops(&loongson3_smp_ops);
+	board_nmi_handler_setup = mips_nmi_setup;
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
diff --git a/arch/mips/loongson64/irq.c b/arch/mips/loongson64/irq.c
new file mode 100644
index 000000000000..79ad797497e4
--- /dev/null
+++ b/arch/mips/loongson64/irq.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <loongson.h>
+#include <irq.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <asm/irq_cpu.h>
+#include <asm/i8259.h>
+#include <asm/mipsregs.h>
+
+#include "smp.h"
+
+extern void loongson3_send_irq_by_ipi(int cpu, int irqs);
+
+unsigned int irq_cpu[16] = {[0 ... 15] = -1};
+unsigned int ht_irq[] = {0, 1, 3, 4, 5, 6, 7, 8, 12, 14, 15};
+unsigned int local_irq = 1<<0 | 1<<1 | 1<<2 | 1<<7 | 1<<8 | 1<<12;
+
+int plat_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
+			  bool force)
+{
+	unsigned int cpu;
+	struct cpumask new_affinity;
+
+	/* I/O devices are connected on package-0 */
+	cpumask_copy(&new_affinity, affinity);
+	for_each_cpu(cpu, affinity)
+		if (cpu_data[cpu].package > 0)
+			cpumask_clear_cpu(cpu, &new_affinity);
+
+	if (cpumask_empty(&new_affinity))
+		return -EINVAL;
+
+	cpumask_copy(d->common->affinity, &new_affinity);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static void ht_irqdispatch(void)
+{
+	unsigned int i, irq;
+	struct irq_data *irqd;
+	struct cpumask affinity;
+
+	irq = LOONGSON_HT1_INT_VECTOR(0);
+	LOONGSON_HT1_INT_VECTOR(0) = irq; /* Acknowledge the IRQs */
+
+	for (i = 0; i < ARRAY_SIZE(ht_irq); i++) {
+		if (!(irq & (0x1 << ht_irq[i])))
+			continue;
+
+		/* handled by local core */
+		if (local_irq & (0x1 << ht_irq[i])) {
+			do_IRQ(ht_irq[i]);
+			continue;
+		}
+
+		irqd = irq_get_irq_data(ht_irq[i]);
+		cpumask_and(&affinity, irqd->common->affinity, cpu_active_mask);
+		if (cpumask_empty(&affinity)) {
+			do_IRQ(ht_irq[i]);
+			continue;
+		}
+
+		irq_cpu[ht_irq[i]] = cpumask_next(irq_cpu[ht_irq[i]], &affinity);
+		if (irq_cpu[ht_irq[i]] >= nr_cpu_ids)
+			irq_cpu[ht_irq[i]] = cpumask_first(&affinity);
+
+		if (irq_cpu[ht_irq[i]] == 0) {
+			do_IRQ(ht_irq[i]);
+			continue;
+		}
+
+		/* balanced by other cores */
+		loongson3_send_irq_by_ipi(irq_cpu[ht_irq[i]], (0x1 << ht_irq[i]));
+	}
+}
+
+#define UNUSED_IPS (CAUSEF_IP5 | CAUSEF_IP4 | CAUSEF_IP1 | CAUSEF_IP0)
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	unsigned int pending;
+
+	pending = read_c0_cause() & read_c0_status() & ST0_IM;
+
+	if (pending & CAUSEF_IP7)
+		do_IRQ(LOONGSON_TIMER_IRQ);
+#if defined(CONFIG_SMP)
+	if (pending & CAUSEF_IP6)
+		loongson3_ipi_interrupt(NULL);
+#endif
+	if (pending & CAUSEF_IP3)
+		ht_irqdispatch();
+	if (pending & CAUSEF_IP2)
+		do_IRQ(LOONGSON_UART_IRQ);
+	if (pending & UNUSED_IPS) {
+		pr_err("%s : spurious interrupt\n", __func__);
+		spurious_interrupt();
+	}
+}
+
+static inline void mask_loongson_irq(struct irq_data *d) { }
+static inline void unmask_loongson_irq(struct irq_data *d) { }
+
+ /* For MIPS IRQs which shared by all cores */
+static struct irq_chip loongson_irq_chip = {
+	.name		= "Loongson",
+	.irq_ack	= mask_loongson_irq,
+	.irq_mask	= mask_loongson_irq,
+	.irq_mask_ack	= mask_loongson_irq,
+	.irq_unmask	= unmask_loongson_irq,
+	.irq_eoi	= unmask_loongson_irq,
+};
+
+void irq_router_init(void)
+{
+	int i;
+
+	/* route LPC int to cpu core0 int 0 */
+	LOONGSON_INT_ROUTER_LPC =
+		LOONGSON_INT_COREx_INTy(loongson_sysconf.boot_cpu_id, 0);
+	/* route HT1 int0 ~ int7 to cpu core0 INT1*/
+	for (i = 0; i < 8; i++)
+		LOONGSON_INT_ROUTER_HT1(i) =
+			LOONGSON_INT_COREx_INTy(loongson_sysconf.boot_cpu_id, 1);
+	/* enable HT1 interrupt */
+	LOONGSON_HT1_INTN_EN(0) = 0xffffffff;
+	/* enable router interrupt intenset */
+	LOONGSON_INT_ROUTER_INTENSET =
+		LOONGSON_INT_ROUTER_INTEN | (0xffff << 16) | 0x1 << 10;
+}
+
+void __init arch_init_irq(void)
+{
+	struct irq_chip *chip;
+
+	clear_c0_status(ST0_IM | ST0_BEV);
+
+	irq_router_init();
+	mips_cpu_irq_init();
+	init_i8259_irqs();
+	chip = irq_get_chip(I8259A_IRQ_BASE);
+	chip->irq_set_affinity = plat_set_irq_affinity;
+
+	irq_set_chip_and_handler(LOONGSON_UART_IRQ,
+			&loongson_irq_chip, handle_percpu_irq);
+	irq_set_chip_and_handler(LOONGSON_BRIDGE_IRQ,
+			&loongson_irq_chip, handle_percpu_irq);
+
+	set_c0_status(STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP6);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void fixup_irqs(void)
+{
+	irq_cpu_offline();
+	clear_c0_status(ST0_IM);
+}
+
+#endif
diff --git a/arch/mips/loongson64/lemote-2f/reset.c b/arch/mips/loongson64/lemote-2f/reset.c
deleted file mode 100644
index 0db0934302ea..000000000000
--- a/arch/mips/loongson64/lemote-2f/reset.c
+++ /dev/null
@@ -1,155 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* Board-specific reboot/shutdown routines
- *
- * Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca>
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/types.h>
-
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-#include <cs5536/cs5536.h>
-#include "ec_kb3310b.h"
-
-static void reset_cpu(void)
-{
-	/*
-	 * reset cpu to full speed, this is needed when enabling cpu frequency
-	 * scalling
-	 */
-	LOONGSON_CHIPCFG(0) |= 0x7;
-}
-
-/* reset support for fuloong2f */
-
-static void fl2f_reboot(void)
-{
-	reset_cpu();
-
-	/* send a reset signal to south bridge.
-	 *
-	 * NOTE: if enable "Power Management" in kernel, rtl8169 will not reset
-	 * normally with this reset operation and it will not work in PMON, but
-	 * you can type halt command and then reboot, seems the hardware reset
-	 * logic not work normally.
-	 */
-	{
-		u32 hi, lo;
-		_rdmsr(DIVIL_MSR_REG(DIVIL_SOFT_RESET), &hi, &lo);
-		lo |= 0x00000001;
-		_wrmsr(DIVIL_MSR_REG(DIVIL_SOFT_RESET), hi, lo);
-	}
-}
-
-static void fl2f_shutdown(void)
-{
-	u32 hi, lo, val;
-	int gpio_base;
-
-	/* get gpio base */
-	_rdmsr(DIVIL_MSR_REG(DIVIL_LBAR_GPIO), &hi, &lo);
-	gpio_base = lo & 0xff00;
-
-	/* make cs5536 gpio13 output enable */
-	val = inl(gpio_base + GPIOL_OUT_EN);
-	val &= ~(1 << (16 + 13));
-	val |= (1 << 13);
-	outl(val, gpio_base + GPIOL_OUT_EN);
-	mmiowb();
-	/* make cs5536 gpio13 output low level voltage. */
-	val = inl(gpio_base + GPIOL_OUT_VAL) & ~(1 << (13));
-	val |= (1 << (16 + 13));
-	outl(val, gpio_base + GPIOL_OUT_VAL);
-	mmiowb();
-}
-
-/* reset support for yeeloong2f and mengloong2f notebook */
-
-static void ml2f_reboot(void)
-{
-	reset_cpu();
-
-	/* sending an reset signal to EC(embedded controller) */
-	ec_write(REG_RESET, BIT_RESET_ON);
-}
-
-#define yl2f89_reboot ml2f_reboot
-
-/* menglong(7inches) laptop has different shutdown logic from 8.9inches */
-#define EC_SHUTDOWN_IO_PORT_HIGH 0xff2d
-#define EC_SHUTDOWN_IO_PORT_LOW	 0xff2e
-#define EC_SHUTDOWN_IO_PORT_DATA 0xff2f
-#define REG_SHUTDOWN_HIGH	 0xFC
-#define REG_SHUTDOWN_LOW	 0x29
-#define BIT_SHUTDOWN_ON		 (1 << 1)
-
-static void ml2f_shutdown(void)
-{
-	u8 val;
-	u64 i;
-
-	outb(REG_SHUTDOWN_HIGH, EC_SHUTDOWN_IO_PORT_HIGH);
-	outb(REG_SHUTDOWN_LOW, EC_SHUTDOWN_IO_PORT_LOW);
-	mmiowb();
-	val = inb(EC_SHUTDOWN_IO_PORT_DATA);
-	outb(val & (~BIT_SHUTDOWN_ON), EC_SHUTDOWN_IO_PORT_DATA);
-	mmiowb();
-	/* need enough wait here... how many microseconds needs? */
-	for (i = 0; i < 0x10000; i++)
-		delay();
-	outb(val | BIT_SHUTDOWN_ON, EC_SHUTDOWN_IO_PORT_DATA);
-	mmiowb();
-}
-
-static void yl2f89_shutdown(void)
-{
-	/* cpu-gpio0 output low */
-	LOONGSON_GPIODATA &= ~0x00000001;
-	/* cpu-gpio0 as output */
-	LOONGSON_GPIOIE &= ~0x00000001;
-}
-
-void mach_prepare_reboot(void)
-{
-	switch (mips_machtype) {
-	case MACH_LEMOTE_FL2F:
-	case MACH_LEMOTE_NAS:
-	case MACH_LEMOTE_LL2F:
-		fl2f_reboot();
-		break;
-	case MACH_LEMOTE_ML2F7:
-		ml2f_reboot();
-		break;
-	case MACH_LEMOTE_YL2F89:
-		yl2f89_reboot();
-		break;
-	default:
-		break;
-	}
-}
-
-void mach_prepare_shutdown(void)
-{
-	switch (mips_machtype) {
-	case MACH_LEMOTE_FL2F:
-	case MACH_LEMOTE_NAS:
-	case MACH_LEMOTE_LL2F:
-		fl2f_shutdown();
-		break;
-	case MACH_LEMOTE_ML2F7:
-		ml2f_shutdown();
-		break;
-	case MACH_LEMOTE_YL2F89:
-		yl2f89_shutdown();
-		break;
-	default:
-		break;
-	}
-}
diff --git a/arch/mips/loongson64/loongson-3/Makefile b/arch/mips/loongson64/loongson-3/Makefile
deleted file mode 100644
index df39598742b2..000000000000
--- a/arch/mips/loongson64/loongson-3/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for Loongson-3 family machines
-#
-obj-y			+= irq.o cop2-ex.o platform.o acpi_init.o dma.o
-
-obj-$(CONFIG_SMP)	+= smp.o
-
-obj-$(CONFIG_NUMA)	+= numa.o
-
-obj-$(CONFIG_RS780_HPET) += hpet.o
diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c
deleted file mode 100644
index 5605061f5f98..000000000000
--- a/arch/mips/loongson64/loongson-3/irq.c
+++ /dev/null
@@ -1,158 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <loongson.h>
-#include <irq.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-
-#include <asm/irq_cpu.h>
-#include <asm/i8259.h>
-#include <asm/mipsregs.h>
-
-#include "smp.h"
-
-extern void loongson3_send_irq_by_ipi(int cpu, int irqs);
-
-unsigned int irq_cpu[16] = {[0 ... 15] = -1};
-unsigned int ht_irq[] = {0, 1, 3, 4, 5, 6, 7, 8, 12, 14, 15};
-unsigned int local_irq = 1<<0 | 1<<1 | 1<<2 | 1<<7 | 1<<8 | 1<<12;
-
-int plat_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
-			  bool force)
-{
-	unsigned int cpu;
-	struct cpumask new_affinity;
-
-	/* I/O devices are connected on package-0 */
-	cpumask_copy(&new_affinity, affinity);
-	for_each_cpu(cpu, affinity)
-		if (cpu_data[cpu].package > 0)
-			cpumask_clear_cpu(cpu, &new_affinity);
-
-	if (cpumask_empty(&new_affinity))
-		return -EINVAL;
-
-	cpumask_copy(d->common->affinity, &new_affinity);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static void ht_irqdispatch(void)
-{
-	unsigned int i, irq;
-	struct irq_data *irqd;
-	struct cpumask affinity;
-
-	irq = LOONGSON_HT1_INT_VECTOR(0);
-	LOONGSON_HT1_INT_VECTOR(0) = irq; /* Acknowledge the IRQs */
-
-	for (i = 0; i < ARRAY_SIZE(ht_irq); i++) {
-		if (!(irq & (0x1 << ht_irq[i])))
-			continue;
-
-		/* handled by local core */
-		if (local_irq & (0x1 << ht_irq[i])) {
-			do_IRQ(ht_irq[i]);
-			continue;
-		}
-
-		irqd = irq_get_irq_data(ht_irq[i]);
-		cpumask_and(&affinity, irqd->common->affinity, cpu_active_mask);
-		if (cpumask_empty(&affinity)) {
-			do_IRQ(ht_irq[i]);
-			continue;
-		}
-
-		irq_cpu[ht_irq[i]] = cpumask_next(irq_cpu[ht_irq[i]], &affinity);
-		if (irq_cpu[ht_irq[i]] >= nr_cpu_ids)
-			irq_cpu[ht_irq[i]] = cpumask_first(&affinity);
-
-		if (irq_cpu[ht_irq[i]] == 0) {
-			do_IRQ(ht_irq[i]);
-			continue;
-		}
-
-		/* balanced by other cores */
-		loongson3_send_irq_by_ipi(irq_cpu[ht_irq[i]], (0x1 << ht_irq[i]));
-	}
-}
-
-#define UNUSED_IPS (CAUSEF_IP5 | CAUSEF_IP4 | CAUSEF_IP1 | CAUSEF_IP0)
-
-void mach_irq_dispatch(unsigned int pending)
-{
-	if (pending & CAUSEF_IP7)
-		do_IRQ(LOONGSON_TIMER_IRQ);
-#if defined(CONFIG_SMP)
-	if (pending & CAUSEF_IP6)
-		loongson3_ipi_interrupt(NULL);
-#endif
-	if (pending & CAUSEF_IP3)
-		ht_irqdispatch();
-	if (pending & CAUSEF_IP2)
-		do_IRQ(LOONGSON_UART_IRQ);
-	if (pending & UNUSED_IPS) {
-		pr_err("%s : spurious interrupt\n", __func__);
-		spurious_interrupt();
-	}
-}
-
-static inline void mask_loongson_irq(struct irq_data *d) { }
-static inline void unmask_loongson_irq(struct irq_data *d) { }
-
- /* For MIPS IRQs which shared by all cores */
-static struct irq_chip loongson_irq_chip = {
-	.name		= "Loongson",
-	.irq_ack	= mask_loongson_irq,
-	.irq_mask	= mask_loongson_irq,
-	.irq_mask_ack	= mask_loongson_irq,
-	.irq_unmask	= unmask_loongson_irq,
-	.irq_eoi	= unmask_loongson_irq,
-};
-
-void irq_router_init(void)
-{
-	int i;
-
-	/* route LPC int to cpu core0 int 0 */
-	LOONGSON_INT_ROUTER_LPC =
-		LOONGSON_INT_COREx_INTy(loongson_sysconf.boot_cpu_id, 0);
-	/* route HT1 int0 ~ int7 to cpu core0 INT1*/
-	for (i = 0; i < 8; i++)
-		LOONGSON_INT_ROUTER_HT1(i) =
-			LOONGSON_INT_COREx_INTy(loongson_sysconf.boot_cpu_id, 1);
-	/* enable HT1 interrupt */
-	LOONGSON_HT1_INTN_EN(0) = 0xffffffff;
-	/* enable router interrupt intenset */
-	LOONGSON_INT_ROUTER_INTENSET =
-		LOONGSON_INT_ROUTER_INTEN | (0xffff << 16) | 0x1 << 10;
-}
-
-void __init mach_init_irq(void)
-{
-	struct irq_chip *chip;
-
-	clear_c0_status(ST0_IM | ST0_BEV);
-
-	irq_router_init();
-	mips_cpu_irq_init();
-	init_i8259_irqs();
-	chip = irq_get_chip(I8259A_IRQ_BASE);
-	chip->irq_set_affinity = plat_set_irq_affinity;
-
-	irq_set_chip_and_handler(LOONGSON_UART_IRQ,
-			&loongson_irq_chip, handle_percpu_irq);
-	irq_set_chip_and_handler(LOONGSON_BRIDGE_IRQ,
-			&loongson_irq_chip, handle_percpu_irq);
-
-	set_c0_status(STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP6);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-void fixup_irqs(void)
-{
-	irq_cpu_offline();
-	clear_c0_status(ST0_IM);
-}
-
-#endif
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/numa.c
index 8f20d2cb3767..ef94a2278561 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/numa.c
@@ -26,12 +26,15 @@
 #include <asm/wbflush.h>
 #include <boot_param.h>
 
-static struct node_data prealloc__node_data[MAX_NUMNODES];
+static struct pglist_data prealloc__node_data[MAX_NUMNODES];
 unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
 EXPORT_SYMBOL(__node_distances);
-struct node_data *__node_data[MAX_NUMNODES];
+struct pglist_data *__node_data[MAX_NUMNODES];
 EXPORT_SYMBOL(__node_data);
 
+cpumask_t __node_cpumask[MAX_NUMNODES];
+EXPORT_SYMBOL(__node_cpumask);
+
 static void enable_lpa(void)
 {
 	unsigned long value;
@@ -214,7 +217,7 @@ static __init void prom_meminit(void)
 		if (node_online(node)) {
 			szmem(node);
 			node_mem_init(node);
-			cpumask_clear(&__node_data[(node)]->cpumask);
+			cpumask_clear(&__node_cpumask[node]);
 		}
 	}
 	memblocks_present();
@@ -228,7 +231,7 @@ static __init void prom_meminit(void)
 		if (loongson_sysconf.reserved_cpus_mask & (1<<cpu))
 			continue;
 
-		cpumask_set_cpu(active_cpu, &__node_data[(node)]->cpumask);
+		cpumask_set_cpu(active_cpu, &__node_cpumask[node]);
 		pr_info("NUMA: set cpumask cpu %d on node %d\n", active_cpu, node);
 
 		active_cpu++;
diff --git a/arch/mips/loongson64/pci.c b/arch/mips/loongson64/pci.c
new file mode 100644
index 000000000000..e84ae20c3290
--- /dev/null
+++ b/arch/mips/loongson64/pci.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/pci.h>
+
+#include <pci.h>
+#include <loongson.h>
+#include <boot_param.h>
+
+static struct resource loongson_pci_mem_resource = {
+	.name	= "pci memory space",
+	.start	= LOONGSON_PCI_MEM_START,
+	.end	= LOONGSON_PCI_MEM_END,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct resource loongson_pci_io_resource = {
+	.name	= "pci io space",
+	.start	= LOONGSON_PCI_IO_START,
+	.end	= IO_SPACE_LIMIT,
+	.flags	= IORESOURCE_IO,
+};
+
+static struct pci_controller  loongson_pci_controller = {
+	.pci_ops	= &loongson_pci_ops,
+	.io_resource	= &loongson_pci_io_resource,
+	.mem_resource	= &loongson_pci_mem_resource,
+	.mem_offset	= 0x00000000UL,
+	.io_offset	= 0x00000000UL,
+};
+
+
+extern int sbx00_acpi_init(void);
+
+static int __init pcibios_init(void)
+{
+
+	loongson_pci_controller.io_map_base = mips_io_port_base;
+	loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr;
+	loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr;
+
+	register_pci_controller(&loongson_pci_controller);
+
+	sbx00_acpi_init();
+
+	return 0;
+}
+
+arch_initcall(pcibios_init);
diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/platform.c
index 13f3404f0030..13f3404f0030 100644
--- a/arch/mips/loongson64/loongson-3/platform.c
+++ b/arch/mips/loongson64/platform.c
diff --git a/arch/mips/loongson64/pm.c b/arch/mips/loongson64/pm.c
new file mode 100644
index 000000000000..7c8556f09781
--- /dev/null
+++ b/arch/mips/loongson64/pm.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * loongson-specific suspend support
+ *
+ *  Copyright (C) 2009 Lemote Inc.
+ *  Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+#include <linux/suspend.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+
+#include <asm/i8259.h>
+#include <asm/mipsregs.h>
+
+#include <loongson.h>
+
+static unsigned int __maybe_unused cached_master_mask;	/* i8259A */
+static unsigned int __maybe_unused cached_slave_mask;
+static unsigned int __maybe_unused cached_bonito_irq_mask; /* bonito */
+
+void arch_suspend_disable_irqs(void)
+{
+	/* disable all mips events */
+	local_irq_disable();
+
+#ifdef CONFIG_I8259
+	/* disable all events of i8259A */
+	cached_slave_mask = inb(PIC_SLAVE_IMR);
+	cached_master_mask = inb(PIC_MASTER_IMR);
+
+	outb(0xff, PIC_SLAVE_IMR);
+	inb(PIC_SLAVE_IMR);
+	outb(0xff, PIC_MASTER_IMR);
+	inb(PIC_MASTER_IMR);
+#endif
+	/* disable all events of bonito */
+	cached_bonito_irq_mask = LOONGSON_INTEN;
+	LOONGSON_INTENCLR = 0xffff;
+	(void)LOONGSON_INTENCLR;
+}
+
+void arch_suspend_enable_irqs(void)
+{
+	/* enable all mips events */
+	local_irq_enable();
+#ifdef CONFIG_I8259
+	/* only enable the cached events of i8259A */
+	outb(cached_slave_mask, PIC_SLAVE_IMR);
+	outb(cached_master_mask, PIC_MASTER_IMR);
+#endif
+	/* enable all cached events of bonito */
+	LOONGSON_INTENSET = cached_bonito_irq_mask;
+	(void)LOONGSON_INTENSET;
+}
+
+/*
+ * Setup the board-specific events for waking up loongson from wait mode
+ */
+void __weak setup_wakeup_events(void)
+{
+}
+
+void __weak mach_suspend(void)
+{
+}
+
+void __weak mach_resume(void)
+{
+}
+
+static int loongson_pm_enter(suspend_state_t state)
+{
+	mach_suspend();
+
+	mach_resume();
+
+	return 0;
+}
+
+static int loongson_pm_valid_state(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_ON:
+	case PM_SUSPEND_STANDBY:
+	case PM_SUSPEND_MEM:
+		return 1;
+
+	default:
+		return 0;
+	}
+}
+
+static const struct platform_suspend_ops loongson_pm_ops = {
+	.valid	= loongson_pm_valid_state,
+	.enter	= loongson_pm_enter,
+};
+
+static int __init loongson_pm_init(void)
+{
+	suspend_set_ops(&loongson_pm_ops);
+
+	return 0;
+}
+arch_initcall(loongson_pm_init);
diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c
new file mode 100644
index 000000000000..bc7671079f0c
--- /dev/null
+++ b/arch/mips/loongson64/reset.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Zhangjin Wu, wuzhangjin@gmail.com
+ */
+#include <linux/init.h>
+#include <linux/pm.h>
+
+#include <asm/idle.h>
+#include <asm/reboot.h>
+
+#include <loongson.h>
+#include <boot_param.h>
+
+static inline void loongson_reboot(void)
+{
+	((void (*)(void))ioremap(LOONGSON_BOOT_BASE, 4)) ();
+}
+
+static void loongson_restart(char *command)
+{
+
+	void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr;
+
+	fw_restart();
+	while (1) {
+		if (cpu_wait)
+			cpu_wait();
+	}
+}
+
+static void loongson_poweroff(void)
+{
+	void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr;
+
+	fw_poweroff();
+	while (1) {
+		if (cpu_wait)
+			cpu_wait();
+	}
+}
+
+static void loongson_halt(void)
+{
+	pr_notice("\n\n** You can safely turn off the power now **\n\n");
+	while (1) {
+		if (cpu_wait)
+			cpu_wait();
+	}
+}
+
+static int __init mips_reboot_setup(void)
+{
+	_machine_restart = loongson_restart;
+	_machine_halt = loongson_halt;
+	pm_power_off = loongson_poweroff;
+
+	return 0;
+}
+
+arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/loongson64/rtc.c b/arch/mips/loongson64/rtc.c
new file mode 100644
index 000000000000..8d7628c0f513
--- /dev/null
+++ b/arch/mips/loongson64/rtc.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Lemote Fuloong platform support
+ *
+ *  Copyright(c) 2010 Arnaud Patard <apatard@mandriva.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/mc146818rtc.h>
+
+static struct resource loongson_rtc_resources[] = {
+	{
+		.start	= RTC_PORT(0),
+		.end	= RTC_PORT(1),
+		.flags	= IORESOURCE_IO,
+	}, {
+		.start	= RTC_IRQ,
+		.end	= RTC_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	}
+};
+
+static struct platform_device loongson_rtc_device = {
+	.name		= "rtc_cmos",
+	.id		= -1,
+	.resource	= loongson_rtc_resources,
+	.num_resources	= ARRAY_SIZE(loongson_rtc_resources),
+};
+
+
+static int __init loongson_rtc_platform_init(void)
+{
+	platform_device_register(&loongson_rtc_device);
+	return 0;
+}
+
+device_initcall(loongson_rtc_platform_init);
diff --git a/arch/mips/loongson64/setup.c b/arch/mips/loongson64/setup.c
new file mode 100644
index 000000000000..4fd27f4f90ed
--- /dev/null
+++ b/arch/mips/loongson64/setup.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/export.h>
+#include <linux/init.h>
+
+#include <asm/wbflush.h>
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+static void wbflush_loongson(void)
+{
+	asm(".set\tpush\n\t"
+	    ".set\tnoreorder\n\t"
+	    ".set mips3\n\t"
+	    "sync\n\t"
+	    "nop\n\t"
+	    ".set\tpop\n\t"
+	    ".set mips0\n\t");
+}
+
+void (*__wbflush)(void) = wbflush_loongson;
+EXPORT_SYMBOL(__wbflush);
+
+void __init plat_mem_setup(void)
+{
+}
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/smp.c
index ce68cdaaf33c..de8e0741ce2d 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -18,6 +18,7 @@
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 #include <loongson.h>
+#include <loongson_regs.h>
 #include <workarounds.h>
 
 #include "smp.h"
@@ -48,6 +49,62 @@ static uint32_t core0_c0count[NR_CPUS];
 		__wbflush();			\
 	} while (0)
 
+u32 (*ipi_read_clear)(int cpu);
+void (*ipi_write_action)(int cpu, u32 action);
+
+static u32 csr_ipi_read_clear(int cpu)
+{
+	u32 action;
+
+	/* Load the ipi register to figure out what we're supposed to do */
+	action = csr_readl(LOONGSON_CSR_IPI_STATUS);
+	/* Clear the ipi register to clear the interrupt */
+	csr_writel(action, LOONGSON_CSR_IPI_CLEAR);
+
+	return action;
+}
+
+static void csr_ipi_write_action(int cpu, u32 action)
+{
+	unsigned int irq = 0;
+
+	while ((irq = ffs(action))) {
+		uint32_t val = CSR_IPI_SEND_BLOCK;
+		val |= (irq - 1);
+		val |= (cpu << CSR_IPI_SEND_CPU_SHIFT);
+		csr_writel(val, LOONGSON_CSR_IPI_SEND);
+		action &= ~BIT(irq - 1);
+	}
+}
+
+static u32 legacy_ipi_read_clear(int cpu)
+{
+	u32 action;
+
+	/* Load the ipi register to figure out what we're supposed to do */
+	action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]);
+	/* Clear the ipi register to clear the interrupt */
+	loongson3_ipi_write32(action, ipi_clear0_regs[cpu_logical_map(cpu)]);
+
+	return action;
+}
+
+static void legacy_ipi_write_action(int cpu, u32 action)
+{
+	loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu]);
+}
+
+static void csr_ipi_probe(void)
+{
+	if (cpu_has_csr() && csr_readl(LOONGSON_CSR_FEATURES) & LOONGSON_CSRF_IPI) {
+		ipi_read_clear = csr_ipi_read_clear;
+		ipi_write_action = csr_ipi_write_action;
+	} else {
+		ipi_read_clear = legacy_ipi_read_clear;
+		ipi_write_action = legacy_ipi_write_action;
+	}
+}
+
 static void ipi_set0_regs_init(void)
 {
 	ipi_set0_regs[0] = (void *)
@@ -233,7 +290,7 @@ static void ipi_mailbox_buf_init(void)
  */
 static void loongson3_send_ipi_single(int cpu, unsigned int action)
 {
-	loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu_logical_map(cpu)]);
+	ipi_write_action(cpu_logical_map(cpu), (u32)action);
 }
 
 static void
@@ -242,14 +299,14 @@ loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action)
 	unsigned int i;
 
 	for_each_cpu(i, mask)
-		loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu_logical_map(i)]);
+		ipi_write_action(cpu_logical_map(i), (u32)action);
 }
 
 #define IPI_IRQ_OFFSET 6
 
 void loongson3_send_irq_by_ipi(int cpu, int irqs)
 {
-	loongson3_ipi_write32(irqs << IPI_IRQ_OFFSET, ipi_set0_regs[cpu_logical_map(cpu)]);
+	ipi_write_action(cpu_logical_map(cpu), irqs << IPI_IRQ_OFFSET);
 }
 
 void loongson3_ipi_interrupt(struct pt_regs *regs)
@@ -257,13 +314,9 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
 	int i, cpu = smp_processor_id();
 	unsigned int action, c0count, irqs;
 
-	/* Load the ipi register to figure out what we're supposed to do */
-	action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]);
+	action = ipi_read_clear(cpu);
 	irqs = action >> IPI_IRQ_OFFSET;
 
-	/* Clear the ipi register to clear the interrupt */
-	loongson3_ipi_write32((u32)action, ipi_clear0_regs[cpu_logical_map(cpu)]);
-
 	if (action & SMP_RESCHEDULE_YOURSELF)
 		scheduler_ipi();
 
@@ -372,6 +425,7 @@ static void __init loongson3_smp_setup(void)
 		num++;
 	}
 
+	csr_ipi_probe();
 	ipi_set0_regs_init();
 	ipi_clear0_regs_init();
 	ipi_status0_regs_init();
@@ -450,7 +504,7 @@ static void loongson3_cpu_die(unsigned int cpu)
  * flush all L1 entries at first. Then, another core (usually Core 0) can
  * safely disable the clock of the target core. loongson3_play_dead() is
  * called via CKSEG1 (uncached and unmmaped) */
-static void loongson3a_r1_play_dead(int *state_addr)
+static void loongson3_type1_play_dead(int *state_addr)
 {
 	register int val;
 	register long cpuid, core, node, count;
@@ -512,7 +566,7 @@ static void loongson3a_r1_play_dead(int *state_addr)
 		: "a1");
 }
 
-static void loongson3a_r2r3_play_dead(int *state_addr)
+static void loongson3_type2_play_dead(int *state_addr)
 {
 	register int val;
 	register long cpuid, core, node, count;
@@ -532,27 +586,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
 		"   cache 1, 3(%[addr])           \n"
 		"   addiu %[sets], %[sets], -1    \n"
 		"   bnez  %[sets], 1b             \n"
-		"   addiu %[addr], %[addr], 0x40  \n"
-		"   li %[addr], 0x80000000        \n" /* KSEG0 */
-		"2: cache 2, 0(%[addr])           \n" /* flush L1 VCache */
-		"   cache 2, 1(%[addr])           \n"
-		"   cache 2, 2(%[addr])           \n"
-		"   cache 2, 3(%[addr])           \n"
-		"   cache 2, 4(%[addr])           \n"
-		"   cache 2, 5(%[addr])           \n"
-		"   cache 2, 6(%[addr])           \n"
-		"   cache 2, 7(%[addr])           \n"
-		"   cache 2, 8(%[addr])           \n"
-		"   cache 2, 9(%[addr])           \n"
-		"   cache 2, 10(%[addr])          \n"
-		"   cache 2, 11(%[addr])          \n"
-		"   cache 2, 12(%[addr])          \n"
-		"   cache 2, 13(%[addr])          \n"
-		"   cache 2, 14(%[addr])          \n"
-		"   cache 2, 15(%[addr])          \n"
-		"   addiu %[vsets], %[vsets], -1  \n"
-		"   bnez  %[vsets], 2b            \n"
-		"   addiu %[addr], %[addr], 0x40  \n"
+		"   addiu %[addr], %[addr], 0x20  \n"
 		"   li    %[val], 0x7             \n" /* *state_addr = CPU_DEAD; */
 		"   sw    %[val], (%[state_addr]) \n"
 		"   sync                          \n"
@@ -560,8 +594,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
 		"   .set pop                      \n"
 		: [addr] "=&r" (addr), [val] "=&r" (val)
 		: [state_addr] "r" (state_addr),
-		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
-		  [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
+		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets));
 
 	__asm__ __volatile__(
 		"   .set push                         \n"
@@ -576,6 +609,8 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
 		"   andi  %[node], %[cpuid], 0xc      \n"
 		"   dsll  %[node], 42                 \n" /* get node id */
 		"   or    %[base], %[base], %[node]   \n"
+		"   dsrl  %[node], 30                 \n" /* 15:14 */
+		"   or    %[base], %[base], %[node]   \n"
 		"1: li    %[count], 0x100             \n" /* wait for init loop */
 		"2: bnez  %[count], 2b                \n" /* limit mailbox access */
 		"   addiu %[count], -1                \n"
@@ -595,7 +630,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
 		: "a1");
 }
 
-static void loongson3b_play_dead(int *state_addr)
+static void loongson3_type3_play_dead(int *state_addr)
 {
 	register int val;
 	register long cpuid, core, node, count;
@@ -615,7 +650,27 @@ static void loongson3b_play_dead(int *state_addr)
 		"   cache 1, 3(%[addr])           \n"
 		"   addiu %[sets], %[sets], -1    \n"
 		"   bnez  %[sets], 1b             \n"
-		"   addiu %[addr], %[addr], 0x20  \n"
+		"   addiu %[addr], %[addr], 0x40  \n"
+		"   li %[addr], 0x80000000        \n" /* KSEG0 */
+		"2: cache 2, 0(%[addr])           \n" /* flush L1 VCache */
+		"   cache 2, 1(%[addr])           \n"
+		"   cache 2, 2(%[addr])           \n"
+		"   cache 2, 3(%[addr])           \n"
+		"   cache 2, 4(%[addr])           \n"
+		"   cache 2, 5(%[addr])           \n"
+		"   cache 2, 6(%[addr])           \n"
+		"   cache 2, 7(%[addr])           \n"
+		"   cache 2, 8(%[addr])           \n"
+		"   cache 2, 9(%[addr])           \n"
+		"   cache 2, 10(%[addr])          \n"
+		"   cache 2, 11(%[addr])          \n"
+		"   cache 2, 12(%[addr])          \n"
+		"   cache 2, 13(%[addr])          \n"
+		"   cache 2, 14(%[addr])          \n"
+		"   cache 2, 15(%[addr])          \n"
+		"   addiu %[vsets], %[vsets], -1  \n"
+		"   bnez  %[vsets], 2b            \n"
+		"   addiu %[addr], %[addr], 0x40  \n"
 		"   li    %[val], 0x7             \n" /* *state_addr = CPU_DEAD; */
 		"   sw    %[val], (%[state_addr]) \n"
 		"   sync                          \n"
@@ -623,7 +678,8 @@ static void loongson3b_play_dead(int *state_addr)
 		"   .set pop                      \n"
 		: [addr] "=&r" (addr), [val] "=&r" (val)
 		: [state_addr] "r" (state_addr),
-		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets));
+		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
+		  [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
 
 	__asm__ __volatile__(
 		"   .set push                         \n"
@@ -638,8 +694,6 @@ static void loongson3b_play_dead(int *state_addr)
 		"   andi  %[node], %[cpuid], 0xc      \n"
 		"   dsll  %[node], 42                 \n" /* get node id */
 		"   or    %[base], %[base], %[node]   \n"
-		"   dsrl  %[node], 30                 \n" /* 15:14 */
-		"   or    %[base], %[base], %[node]   \n"
 		"1: li    %[count], 0x100             \n" /* wait for init loop */
 		"2: bnez  %[count], 2b                \n" /* limit mailbox access */
 		"   addiu %[count], -1                \n"
@@ -661,30 +715,42 @@ static void loongson3b_play_dead(int *state_addr)
 
 void play_dead(void)
 {
-	int *state_addr;
+	int prid_imp, prid_rev, *state_addr;
 	unsigned int cpu = smp_processor_id();
 	void (*play_dead_at_ckseg1)(int *);
 
 	idle_task_exit();
-	switch (read_c0_prid() & PRID_REV_MASK) {
+
+	prid_imp = read_c0_prid() & PRID_IMP_MASK;
+	prid_rev = read_c0_prid() & PRID_REV_MASK;
+
+	if (prid_imp == PRID_IMP_LOONGSON_64G) {
+		play_dead_at_ckseg1 =
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead);
+		goto out;
+	}
+
+	switch (prid_rev) {
 	case PRID_REV_LOONGSON3A_R1:
 	default:
 		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3a_r1_play_dead);
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type1_play_dead);
+		break;
+	case PRID_REV_LOONGSON3B_R1:
+	case PRID_REV_LOONGSON3B_R2:
+		play_dead_at_ckseg1 =
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type2_play_dead);
 		break;
 	case PRID_REV_LOONGSON3A_R2_0:
 	case PRID_REV_LOONGSON3A_R2_1:
 	case PRID_REV_LOONGSON3A_R3_0:
 	case PRID_REV_LOONGSON3A_R3_1:
 		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3a_r2r3_play_dead);
-		break;
-	case PRID_REV_LOONGSON3B_R1:
-	case PRID_REV_LOONGSON3B_R2:
-		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3b_play_dead);
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead);
 		break;
 	}
+
+out:
 	state_addr = &per_cpu(cpu_state, cpu);
 	mb();
 	play_dead_at_ckseg1(state_addr);
diff --git a/arch/mips/loongson64/loongson-3/smp.h b/arch/mips/loongson64/smp.h
index 957bde81e0e4..957bde81e0e4 100644
--- a/arch/mips/loongson64/loongson-3/smp.h
+++ b/arch/mips/loongson64/smp.h
diff --git a/arch/mips/loongson64/time.c b/arch/mips/loongson64/time.c
new file mode 100644
index 000000000000..1245f22cec84
--- /dev/null
+++ b/arch/mips/loongson64/time.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <asm/mc146818-time.h>
+#include <asm/time.h>
+#include <asm/hpet.h>
+
+#include <loongson.h>
+
+void __init plat_time_init(void)
+{
+	/* setup mips r4k timer */
+	mips_hpt_frequency = cpu_clock_freq / 2;
+
+#ifdef CONFIG_RS780_HPET
+	setup_hpet_timer();
+#endif
+}
+
+void read_persistent_clock64(struct timespec64 *ts)
+{
+	ts->tv_sec = mc146818_get_cmos_time();
+	ts->tv_nsec = 0;
+}
diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c
index 387724860fa6..d5ad76b2bb67 100644
--- a/arch/mips/math-emu/me-debugfs.c
+++ b/arch/mips/math-emu/me-debugfs.c
@@ -189,6 +189,7 @@ static int __init debugfs_fpuemu(void)
 {
 	struct dentry *fpuemu_debugfs_base_dir;
 	struct dentry *fpuemu_debugfs_inst_dir;
+	char name[32];
 
 	fpuemu_debugfs_base_dir = debugfs_create_dir("fpuemustats",
 						     mips_debugfs_dir);
@@ -225,8 +226,6 @@ do {									\
 
 #define FPU_STAT_CREATE_EX(m)						\
 do {									\
-	char name[32];							\
-									\
 	adjust_instruction_counter_name(name, #m);			\
 									\
 	debugfs_create_file(name, 0444, fpuemu_debugfs_inst_dir,	\
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
index 0ca401ddf3b7..15bb8cf59828 100644
--- a/arch/mips/mm/c-r3k.c
+++ b/arch/mips/mm/c-r3k.c
@@ -241,6 +241,7 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma,
 	int exec = vma->vm_flags & VM_EXEC;
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
@@ -253,7 +254,8 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma,
 		return;
 
 	pgdp = pgd_offset(mm, addr);
-	pudp = pud_offset(pgdp, addr);
+	p4dp = p4d_offset(pgdp, addr);
+	pudp = pud_offset(p4dp, addr);
 	pmdp = pmd_offset(pudp, addr);
 	ptep = pte_offset(pmdp, addr);
 
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 89b9c851d822..5f3d0103b95d 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -271,12 +271,14 @@ static inline void tx49_blast_icache32(void)
 	/* I'm in even chunk.  blast odd chunks */
 	for (ws = 0; ws < ws_end; ws += ws_inc)
 		for (addr = start + 0x400; addr < end; addr += 0x400 * 2)
-			cache32_unroll32(addr|ws, Index_Invalidate_I);
+			cache_unroll(32, kernel_cache, Index_Invalidate_I,
+				     addr | ws, 32);
 	CACHE32_UNROLL32_ALIGN;
 	/* I'm in odd chunk.  blast even chunks */
 	for (ws = 0; ws < ws_end; ws += ws_inc)
 		for (addr = start; addr < end; addr += 0x400 * 2)
-			cache32_unroll32(addr|ws, Index_Invalidate_I);
+			cache_unroll(32, kernel_cache, Index_Invalidate_I,
+				     addr | ws, 32);
 }
 
 static inline void blast_icache32_r4600_v1_page_indexed(unsigned long page)
@@ -302,12 +304,14 @@ static inline void tx49_blast_icache32_page_indexed(unsigned long page)
 	/* I'm in even chunk.  blast odd chunks */
 	for (ws = 0; ws < ws_end; ws += ws_inc)
 		for (addr = start + 0x400; addr < end; addr += 0x400 * 2)
-			cache32_unroll32(addr|ws, Index_Invalidate_I);
+			cache_unroll(32, kernel_cache, Index_Invalidate_I,
+				     addr | ws, 32);
 	CACHE32_UNROLL32_ALIGN;
 	/* I'm in odd chunk.  blast even chunks */
 	for (ws = 0; ws < ws_end; ws += ws_inc)
 		for (addr = start; addr < end; addr += 0x400 * 2)
-			cache32_unroll32(addr|ws, Index_Invalidate_I);
+			cache_unroll(32, kernel_cache, Index_Invalidate_I,
+				     addr | ws, 32);
 }
 
 static void (* r4k_blast_icache_page)(unsigned long addr);
@@ -320,7 +324,7 @@ static void r4k_blast_icache_page_setup(void)
 		r4k_blast_icache_page = (void *)cache_noop;
 	else if (ic_lsize == 16)
 		r4k_blast_icache_page = blast_icache16_page;
-	else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2)
+	else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2EF)
 		r4k_blast_icache_page = loongson2_blast_icache32_page;
 	else if (ic_lsize == 32)
 		r4k_blast_icache_page = blast_icache32_page;
@@ -369,7 +373,7 @@ static void r4k_blast_icache_page_indexed_setup(void)
 		else if (TX49XX_ICACHE_INDEX_INV_WAR)
 			r4k_blast_icache_page_indexed =
 				tx49_blast_icache32_page_indexed;
-		else if (current_cpu_type() == CPU_LOONGSON2)
+		else if (current_cpu_type() == CPU_LOONGSON2EF)
 			r4k_blast_icache_page_indexed =
 				loongson2_blast_icache32_page_indexed;
 		else
@@ -395,7 +399,7 @@ static void r4k_blast_icache_setup(void)
 			r4k_blast_icache = blast_r4600_v1_icache32;
 		else if (TX49XX_ICACHE_INDEX_INV_WAR)
 			r4k_blast_icache = tx49_blast_icache32;
-		else if (current_cpu_type() == CPU_LOONGSON2)
+		else if (current_cpu_type() == CPU_LOONGSON2EF)
 			r4k_blast_icache = loongson2_blast_icache32;
 		else
 			r4k_blast_icache = blast_icache32;
@@ -465,7 +469,7 @@ static void r4k_blast_scache_node_setup(void)
 {
 	unsigned long sc_lsize = cpu_scache_line_size();
 
-	if (current_cpu_type() != CPU_LOONGSON3)
+	if (current_cpu_type() != CPU_LOONGSON64)
 		r4k_blast_scache_node = (void *)cache_noop;
 	else if (sc_lsize == 16)
 		r4k_blast_scache_node = blast_scache16_node;
@@ -480,7 +484,7 @@ static void r4k_blast_scache_node_setup(void)
 static inline void local_r4k___flush_cache_all(void * args)
 {
 	switch (current_cpu_type()) {
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 	case CPU_R4000SC:
 	case CPU_R4000MC:
 	case CPU_R4400SC:
@@ -497,7 +501,7 @@ static inline void local_r4k___flush_cache_all(void * args)
 		r4k_blast_scache();
 		break;
 
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		/* Use get_ebase_cpunum() for both NUMA=y/n */
 		r4k_blast_scache_node(get_ebase_cpunum() >> 2);
 		break;
@@ -650,6 +654,7 @@ static inline void local_r4k_flush_cache_page(void *args)
 	struct mm_struct *mm = vma->vm_mm;
 	int map_coherent = 0;
 	pgd_t *pgdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
@@ -664,7 +669,8 @@ static inline void local_r4k_flush_cache_page(void *args)
 
 	addr &= PAGE_MASK;
 	pgdp = pgd_offset(mm, addr);
-	pudp = pud_offset(pgdp, addr);
+	p4dp = p4d_offset(pgdp, addr);
+	pudp = pud_offset(p4dp, addr);
 	pmdp = pmd_offset(pudp, addr);
 	ptep = pte_offset(pmdp, addr);
 
@@ -770,7 +776,7 @@ static inline void __local_r4k_flush_icache_range(unsigned long start,
 		r4k_blast_icache();
 	else {
 		switch (boot_cpu_type()) {
-		case CPU_LOONGSON2:
+		case CPU_LOONGSON2EF:
 			protected_loongson2_blast_icache_range(start, end);
 			break;
 
@@ -863,7 +869,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
 		if (size >= scache_size) {
-			if (current_cpu_type() != CPU_LOONGSON3)
+			if (current_cpu_type() != CPU_LOONGSON64)
 				r4k_blast_scache();
 			else
 				r4k_blast_scache_node(pa_to_nid(addr));
@@ -904,7 +910,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
 		if (size >= scache_size) {
-			if (current_cpu_type() != CPU_LOONGSON3)
+			if (current_cpu_type() != CPU_LOONGSON64)
 				r4k_blast_scache();
 			else
 				r4k_blast_scache_node(pa_to_nid(addr));
@@ -1224,7 +1230,7 @@ static void probe_pcache(void)
 		c->options |= MIPS_CPU_PREFETCH;
 		break;
 
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
 		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
 		if (prid & 0x3)
@@ -1242,7 +1248,7 @@ static void probe_pcache(void)
 		c->dcache.waybit = 0;
 		break;
 
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		config1 = read_c0_config1();
 		lsize = (config1 >> 19) & 7;
 		if (lsize)
@@ -1267,7 +1273,8 @@ static void probe_pcache(void)
 					  c->dcache.ways *
 					  c->dcache.linesz;
 		c->dcache.waybit = 0;
-		if ((prid & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2_0)
+		if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >=
+				(PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0))
 			c->options |= MIPS_CPU_PREFETCH;
 		break;
 
@@ -1452,7 +1459,7 @@ static void probe_pcache(void)
 		c->dcache.flags &= ~MIPS_CACHE_ALIASES;
 		break;
 
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		/*
 		 * LOONGSON2 has 4 way icache, but when using indexed cache op,
 		 * one op will act on all 4 ways
@@ -1478,7 +1485,7 @@ static void probe_vcache(void)
 	struct cpuinfo_mips *c = &current_cpu_data;
 	unsigned int config2, lsize;
 
-	if (current_cpu_type() != CPU_LOONGSON3)
+	if (current_cpu_type() != CPU_LOONGSON64)
 		return;
 
 	config2 = read_c0_config2();
@@ -1653,11 +1660,11 @@ static void setup_scache(void)
 #endif
 		return;
 
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		loongson2_sc_init();
 		return;
 
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		loongson3_sc_init();
 		return;
 
@@ -1926,7 +1933,7 @@ void r4k_cache_init(void)
 		/* Optimization: an L2 flush implicitly flushes the L1 */
 		current_cpu_data.options |= MIPS_CPU_INCLUSIVE_CACHES;
 		break;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		/* Loongson-3 maintains cache coherency by hardware */
 		__flush_cache_all	= cache_noop;
 		__flush_cache_vmap	= cache_noop;
diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c
index b7c8a9d79c35..686867270627 100644
--- a/arch/mips/mm/c-tx39.c
+++ b/arch/mips/mm/c-tx39.c
@@ -170,6 +170,7 @@ static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page
 	int exec = vma->vm_flags & VM_EXEC;
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
@@ -183,7 +184,8 @@ static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page
 
 	page &= PAGE_MASK;
 	pgdp = pgd_offset(mm, page);
-	pudp = pud_offset(pgdp, page);
+	p4dp = p4d_offset(pgdp, page);
+	pudp = pud_offset(p4dp, page);
 	pmdp = pmd_offset(pudp, page);
 	ptep = pte_offset(pmdp, page);
 
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index 1d4d57dd9acf..dc42ffc83825 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -27,7 +27,7 @@
  * R10000 and R12000 are used in such systems, the SGI IP28 Indigo² rsp.
  * SGI IP32 aka O2.
  */
-static inline bool cpu_needs_post_dma_flush(struct device *dev)
+static inline bool cpu_needs_post_dma_flush(void)
 {
 	switch (boot_cpu_type()) {
 	case CPU_R10000:
@@ -59,12 +59,6 @@ void *cached_kernel_address(void *addr)
 	return __va(addr) - UNCAC_BASE;
 }
 
-long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
-		dma_addr_t dma_addr)
-{
-	return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr)));
-}
-
 static inline void dma_sync_virt(void *addr, size_t size,
 		enum dma_data_direction dir)
 {
@@ -118,17 +112,17 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size,
 	} while (left);
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	dma_sync_phys(paddr, size, dir);
 }
 
 #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
-	if (cpu_needs_post_dma_flush(dev))
+	if (cpu_needs_post_dma_flush())
 		dma_sync_phys(paddr, size, dir);
 }
 #endif
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index f589aa8f47d9..1e8d00793784 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -292,8 +292,9 @@ vmalloc_fault:
 		 * Do _not_ use "tsk" here. We might be inside
 		 * an interrupt in the middle of a task switch..
 		 */
-		int offset = __pgd_offset(address);
+		int offset = pgd_index(address);
 		pgd_t *pgd, *pgd_k;
+		p4d_t *p4d, *p4d_k;
 		pud_t *pud, *pud_k;
 		pmd_t *pmd, *pmd_k;
 		pte_t *pte_k;
@@ -305,8 +306,13 @@ vmalloc_fault:
 			goto no_context;
 		set_pgd(pgd, *pgd_k);
 
-		pud = pud_offset(pgd, address);
-		pud_k = pud_offset(pgd_k, address);
+		p4d = p4d_offset(pgd, address);
+		p4d_k = p4d_offset(pgd_k, address);
+		if (!p4d_present(*p4d_k))
+			goto no_context;
+
+		pud = pud_offset(p4d, address);
+		pud_k = pud_offset(p4d_k, address);
 		if (!pud_present(*pud_k))
 			goto no_context;
 
diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c
index cef152234312..77ffece9c270 100644
--- a/arch/mips/mm/hugetlbpage.c
+++ b/arch/mips/mm/hugetlbpage.c
@@ -25,11 +25,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
 		      unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pte_t *pte = NULL;
 
 	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
 	if (pud)
 		pte = (pte_t *)pmd_alloc(mm, pud, addr);
 
@@ -40,14 +42,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
 		       unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd = NULL;
 
 	pgd = pgd_offset(mm, addr);
 	if (pgd_present(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (pud_present(*pud))
-			pmd = pmd_offset(pud, addr);
+		p4d = p4d_offset(pgd, addr);
+		if (p4d_present(*p4d)) {
+			pud = pud_offset(p4d, addr);
+			if (pud_present(*pud))
+				pmd = pmd_offset(pud, addr);
+		}
 	}
 	return (pte_t *) pmd;
 }
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 090fa653dfa9..50f9ed8c6c1b 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -239,9 +239,9 @@ void __init fixrange_init(unsigned long start, unsigned long end,
 	unsigned long vaddr;
 
 	vaddr = start;
-	i = __pgd_offset(vaddr);
-	j = __pud_offset(vaddr);
-	k = __pmd_offset(vaddr);
+	i = pgd_index(vaddr);
+	j = pud_index(vaddr);
+	k = pmd_index(vaddr);
 	pgd = pgd_base + i;
 
 	for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 1601d90b087b..8317f337a86e 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -78,11 +78,15 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr,
 	flush_cache_all();
 	BUG_ON(address >= end);
 	do {
+		p4d_t *p4d;
 		pud_t *pud;
 		pmd_t *pmd;
 
 		error = -ENOMEM;
-		pud = pud_alloc(&init_mm, dir, address);
+		p4d = p4d_alloc(&init_mm, dir, address);
+		if (!p4d)
+			break;
+		pud = pud_alloc(&init_mm, p4d, address);
 		if (!pud)
 			break;
 		pmd = pmd_alloc(&init_mm, pud, address);
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 56e4f8bffd4c..c5578897a4fa 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -187,7 +187,7 @@ static void set_prefetch_parameters(void)
 			}
 			break;
 
-		case CPU_LOONGSON3:
+		case CPU_LOONGSON64:
 			/* Loongson-3 only support the Pref_Load/Pref_Store. */
 			pref_bias_clear_store = 128;
 			pref_bias_copy_load = 128;
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index 6416a531a4c3..37c7a01427d2 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -56,6 +56,7 @@ void __init pagetable_init(void)
 	pgd_t *pgd_base;
 #ifdef CONFIG_HIGHMEM
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -81,8 +82,9 @@ void __init pagetable_init(void)
 	vaddr = PKMAP_BASE;
 	fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 
-	pgd = swapper_pg_dir + __pgd_offset(vaddr);
-	pud = pud_offset(pgd, vaddr);
+	pgd = swapper_pg_dir + pgd_index(vaddr);
+	p4d = p4d_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
 	pmd = pmd_offset(pud, vaddr);
 	pte = pte_offset_kernel(pmd, vaddr);
 	pkmap_page_table = pte;
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index c13e46ced425..d7a9d5f211f0 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -35,10 +35,10 @@ extern void build_tlb_refill_handler(void);
 static inline void flush_micro_tlb(void)
 {
 	switch (current_cpu_type()) {
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		write_c0_diag(LOONGSON_DIAG_ITLB);
 		break;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB);
 		break;
 	default:
@@ -295,6 +295,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
 {
 	unsigned long flags;
 	pgd_t *pgdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
@@ -320,7 +321,8 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
 	mtc0_tlbw_hazard();
 	tlb_probe();
 	tlb_probe_hazard();
-	pudp = pud_offset(pgdp, address);
+	p4dp = p4d_offset(pgdp, address);
+	pudp = pud_offset(p4dp, address);
 	pmdp = pmd_offset(pudp, address);
 	idx = read_c0_index();
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 41bb91f05688..344e6e9ea43b 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -571,8 +571,8 @@ void build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	case CPU_BMIPS4350:
 	case CPU_BMIPS4380:
 	case CPU_BMIPS5000:
-	case CPU_LOONGSON2:
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON2EF:
+	case CPU_LOONGSON64:
 	case CPU_R5500:
 		if (m4kc_tlbp_war())
 			uasm_i_nop(p);
@@ -1377,7 +1377,7 @@ static void build_r4000_tlb_refill_handler(void)
 	switch (boot_cpu_type()) {
 	default:
 		if (sizeof(long) == 4) {
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		/* Loongson2 ebase is different than r4k, we have more space */
 			if ((p - tlb_handler) > 64)
 				panic("TLB refill handler space exceeded");
diff --git a/arch/mips/mti-malta/malta-dtshim.c b/arch/mips/mti-malta/malta-dtshim.c
index 98a063093b69..0ddf03df6268 100644
--- a/arch/mips/mti-malta/malta-dtshim.c
+++ b/arch/mips/mti-malta/malta-dtshim.c
@@ -240,7 +240,7 @@ static void __init remove_gic(void *fdt)
 		 * On systems using the RocIT system controller a GIC may be
 		 * present without a CM. Detect whether that is the case.
 		 */
-		biu_base = ioremap_nocache(MSC01_BIU_REG_BASE,
+		biu_base = ioremap(MSC01_BIU_REG_BASE,
 				MSC01_BIU_ADDRSPACE_SZ);
 		sc_cfg = __raw_readl(biu_base + MSC01_SC_CFG_OFS);
 		if (sc_cfg & MSC01_SC_CFG_GICPRES_MSK) {
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 46b76751f3a5..561154cbcc40 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -604,6 +604,7 @@ static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value)
 static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx)
 {
 	int off, b_off;
+	int tcc_reg;
 
 	ctx->flags |= EBPF_SEEN_TC;
 	/*
@@ -616,14 +617,14 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx)
 	b_off = b_imm(this_idx + 1, ctx);
 	emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off);
 	/*
-	 * if (--TCC < 0)
+	 * if (TCC-- < 0)
 	 *     goto out;
 	 */
 	/* Delay slot */
-	emit_instr(ctx, daddiu, MIPS_R_T5,
-		   (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1);
+	tcc_reg = (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4;
+	emit_instr(ctx, daddiu, MIPS_R_T5, tcc_reg, -1);
 	b_off = b_imm(this_idx + 1, ctx);
-	emit_instr(ctx, bltz, MIPS_R_T5, b_off);
+	emit_instr(ctx, bltz, tcc_reg, b_off);
 	/*
 	 * prog = array->ptrs[index];
 	 * if (prog == NULL)
@@ -1803,7 +1804,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	unsigned int image_size;
 	u8 *image_ptr;
 
-	if (!prog->jit_requested || MIPS_ISA_REV < 2)
+	if (!prog->jit_requested)
 		return prog;
 
 	tmp = bpf_jit_blind_constants(prog);
diff --git a/arch/mips/oprofile/Makefile b/arch/mips/oprofile/Makefile
index 011cf9f891e7..e10f216d0422 100644
--- a/arch/mips/oprofile/Makefile
+++ b/arch/mips/oprofile/Makefile
@@ -14,5 +14,5 @@ oprofile-$(CONFIG_CPU_MIPS64)		+= op_model_mipsxx.o
 oprofile-$(CONFIG_CPU_R10000)		+= op_model_mipsxx.o
 oprofile-$(CONFIG_CPU_SB1)		+= op_model_mipsxx.o
 oprofile-$(CONFIG_CPU_XLR)		+= op_model_mipsxx.o
-oprofile-$(CONFIG_CPU_LOONGSON2)	+= op_model_loongson2.o
-oprofile-$(CONFIG_CPU_LOONGSON3)	+= op_model_loongson3.o
+oprofile-$(CONFIG_CPU_LOONGSON2EF)	+= op_model_loongson2.o
+oprofile-$(CONFIG_CPU_LOONGSON64)	+= op_model_loongson3.o
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index 2f33992f6dff..03db268cba5c 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -93,7 +93,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	case CPU_P5600:
 	case CPU_I6400:
 	case CPU_M5150:
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 	case CPU_SB1:
 	case CPU_SB1A:
 	case CPU_R10000:
@@ -104,10 +104,10 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 		lmodel = &op_model_mipsxx_ops;
 		break;
 
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		lmodel = &op_model_loongson2_ops;
 		break;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		lmodel = &op_model_loongson3_ops;
 		break;
 	};
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index 96c13a0ab078..a537bf98912c 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -420,7 +420,7 @@ static int __init mipsxx_init(void)
 		op_model_mipsxx_ops.cpu_type = "mips/sb1";
 		break;
 
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 		op_model_mipsxx_ops.cpu_type = "mips/loongson1";
 		break;
 
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index d6de4cb2e31c..342ce10ef593 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -35,7 +35,7 @@ obj-$(CONFIG_LASAT)		+= pci-lasat.o
 obj-$(CONFIG_MIPS_COBALT)	+= fixup-cobalt.o
 obj-$(CONFIG_LEMOTE_FULOONG2E)	+= fixup-fuloong2e.o ops-loongson2.o
 obj-$(CONFIG_LEMOTE_MACH2F)	+= fixup-lemote2f.o ops-loongson2.o
-obj-$(CONFIG_LOONGSON_MACH3X)	+= fixup-loongson3.o ops-loongson3.o
+obj-$(CONFIG_MACH_LOONGSON64)	+= fixup-loongson3.o ops-loongson3.o
 obj-$(CONFIG_MIPS_MALTA)	+= fixup-malta.o pci-malta.o
 obj-$(CONFIG_PMC_MSP7120_GW)	+= fixup-pmcmsp.o ops-pmcmsp.o
 obj-$(CONFIG_PMC_MSP7120_EVAL)	+= fixup-pmcmsp.o ops-pmcmsp.o
diff --git a/arch/mips/pci/fixup-sb1250.c b/arch/mips/pci/fixup-sb1250.c
index 8a41b359cf90..40efc990cdce 100644
--- a/arch/mips/pci/fixup-sb1250.c
+++ b/arch/mips/pci/fixup-sb1250.c
@@ -21,22 +21,22 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI,
 
 /*
  * The BCM1250, etc. PCI host bridge does not support DAC on its 32-bit
- * bus, so we set the bus's DMA mask accordingly.  However the HT link
+ * bus, so we set the bus's DMA limit accordingly.  However the HT link
  * down the artificial PCI-HT bridge supports 40-bit addressing and the
  * SP1011 HT-PCI bridge downstream supports both DAC and a 64-bit bus
  * width, so we record the PCI-HT bridge's secondary and subordinate bus
- * numbers and do not set the mask for devices present in the inclusive
+ * numbers and do not set the limit for devices present in the inclusive
  * range of those.
  */
-struct sb1250_bus_dma_mask_exclude {
+struct sb1250_bus_dma_limit_exclude {
 	bool set;
 	unsigned char start;
 	unsigned char end;
 };
 
-static int sb1250_bus_dma_mask(struct pci_dev *dev, void *data)
+static int sb1250_bus_dma_limit(struct pci_dev *dev, void *data)
 {
-	struct sb1250_bus_dma_mask_exclude *exclude = data;
+	struct sb1250_bus_dma_limit_exclude *exclude = data;
 	bool exclude_this;
 	bool ht_bridge;
 
@@ -55,7 +55,7 @@ static int sb1250_bus_dma_mask(struct pci_dev *dev, void *data)
 			exclude->start, exclude->end);
 	} else {
 		dev_dbg(&dev->dev, "disabling DAC for device");
-		dev->dev.bus_dma_mask = DMA_BIT_MASK(32);
+		dev->dev.bus_dma_limit = DMA_BIT_MASK(32);
 	}
 
 	return 0;
@@ -63,9 +63,9 @@ static int sb1250_bus_dma_mask(struct pci_dev *dev, void *data)
 
 static void quirk_sb1250_pci_dac(struct pci_dev *dev)
 {
-	struct sb1250_bus_dma_mask_exclude exclude = { .set = false };
+	struct sb1250_bus_dma_limit_exclude exclude = { .set = false };
 
-	pci_walk_bus(dev->bus, sb1250_bus_dma_mask, &exclude);
+	pci_walk_bus(dev->bus, sb1250_bus_dma_limit, &exclude);
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI,
 			quirk_sb1250_pci_dac);
diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c
index 4f2411f489af..01a2af8215c8 100644
--- a/arch/mips/pci/pci-alchemy.c
+++ b/arch/mips/pci/pci-alchemy.c
@@ -409,7 +409,7 @@ static int alchemy_pci_probe(struct platform_device *pdev)
 		goto out6;
 	}
 
-	ctx->regs = ioremap_nocache(r->start, resource_size(r));
+	ctx->regs = ioremap(r->start, resource_size(r));
 	if (!ctx->regs) {
 		dev_err(&pdev->dev, "cannot map pci regs\n");
 		ret = -ENODEV;
diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c
index 0fed6fc17fe4..490953f51528 100644
--- a/arch/mips/pci/pci-ar2315.c
+++ b/arch/mips/pci/pci-ar2315.c
@@ -441,7 +441,7 @@ static int ar2315_pci_probe(struct platform_device *pdev)
 	apc->mem_res.flags = IORESOURCE_MEM;
 
 	/* Remap PCI config space */
-	apc->cfg_mem = devm_ioremap_nocache(dev, res->start,
+	apc->cfg_mem = devm_ioremap(dev, res->start,
 					    AR2315_PCI_CFG_SIZE);
 	if (!apc->cfg_mem) {
 		dev_err(dev, "failed to remap PCI config space\n");
diff --git a/arch/mips/pci/pci-bcm63xx.c b/arch/mips/pci/pci-bcm63xx.c
index 151d9b5870bb..5548365605c0 100644
--- a/arch/mips/pci/pci-bcm63xx.c
+++ b/arch/mips/pci/pci-bcm63xx.c
@@ -221,7 +221,7 @@ static int __init bcm63xx_register_pci(void)
 	 * a spinlock for each io access, so this is currently kind of
 	 * broken on SMP.
 	 */
-	pci_iospace_start = ioremap_nocache(BCM_PCI_IO_BASE_PA, 4);
+	pci_iospace_start = ioremap(BCM_PCI_IO_BASE_PA, 4);
 	if (!pci_iospace_start)
 		return -ENOMEM;
 
diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index 441eb9383b20..8e26b120f994 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -7,21 +7,13 @@
  * Copyright (C) 1999, 2000, 04 Ralf Baechle (ralf@linux-mips.org)
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  */
+#include <asm/sn/addrs.h>
+#include <asm/sn/types.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/hub.h>
+#include <asm/sn/ioc3.h>
 #include <asm/pci/bridge.h>
 
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus);
-
-	return bc->baddr + paddr;
-}
-
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
-{
-	return dma_addr & ~(0xffUL << 56);
-}
-
 #ifdef CONFIG_NUMA
 int pcibus_to_node(struct pci_bus *bus)
 {
@@ -31,3 +23,20 @@ int pcibus_to_node(struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pcibus_to_node);
 #endif /* CONFIG_NUMA */
+
+static void ip29_fixup_phy(struct pci_dev *dev)
+{
+	int nasid = pcibus_to_node(dev->bus);
+	u32 sid;
+
+	if (nasid != 1)
+		return; /* only needed on second module */
+
+	/* enable ethernet PHY on IP29 systemboard */
+	pci_read_config_dword(dev, PCI_SUBSYSTEM_VENDOR_ID, &sid);
+	if (sid == (PCI_VENDOR_ID_SGI | (IOC3_SUBSYS_IP29_SYSBOARD) << 16))
+		REMOTE_HUB_S(nasid, MD_LED0, 0x09);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC3,
+			ip29_fixup_phy);
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c
index c9f4d4ba058a..e1f12e398136 100644
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -218,7 +218,7 @@ static int rt288x_pci_probe(struct platform_device *pdev)
 {
 	void __iomem *io_map_base;
 
-	rt2880_pci_base = ioremap_nocache(RT2880_PCI_BASE, PAGE_SIZE);
+	rt2880_pci_base = ioremap(RT2880_PCI_BASE, PAGE_SIZE);
 
 	io_map_base = ioremap(RT2880_PCI_IO_BASE, RT2880_PCI_IO_SIZE);
 	rt2880_pci_controller.io_map_base = (unsigned long) io_map_base;
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index 7b4d40354ee7..5c1a196be0c5 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -11,16 +11,38 @@
 #include <linux/dma-direct.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/xtalk-bridge.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/crc16.h>
 
 #include <asm/pci/bridge.h>
 #include <asm/paccess.h>
 #include <asm/sn/irq_alloc.h>
+#include <asm/sn/ioc3.h>
+
+#define CRC16_INIT	0
+#define CRC16_VALID	0xb001
+
+/*
+ * Common phys<->dma mapping for platforms using pci xtalk bridge
+ */
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus);
+
+	return bc->baddr + paddr;
+}
+
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr & ~(0xffUL << 56);
+}
 
 /*
  * Most of the IOC3 PCI config register aren't present
  * we emulate what is needed for a normal PCI enumeration
  */
-static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value)
+static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value, u32 sid)
 {
 	u32 cf, shift, mask;
 
@@ -30,6 +52,9 @@ static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value)
 		if (get_dbe(cf, (u32 *)addr))
 			return PCIBIOS_DEVICE_NOT_FOUND;
 		break;
+	case 0x2c:
+		cf = sid;
+		break;
 	case 0x3c:
 		/* emulate sane interrupt pin value */
 		cf = 0x00000100;
@@ -111,7 +136,8 @@ static int pci_conf0_read_config(struct pci_bus *bus, unsigned int devfn,
 	 */
 	if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) {
 		addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
-		return ioc3_cfg_rd(addr, where, size, value);
+		return ioc3_cfg_rd(addr, where, size, value,
+				   bc->ioc3_sid[slot]);
 	}
 
 	addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[where ^ (4 - size)];
@@ -149,7 +175,8 @@ static int pci_conf1_read_config(struct pci_bus *bus, unsigned int devfn,
 	 */
 	if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) {
 		addr = &bridge->b_type1_cfg.c[(fn << 8) | (where & ~3)];
-		return ioc3_cfg_rd(addr, where, size, value);
+		return ioc3_cfg_rd(addr, where, size, value,
+				   bc->ioc3_sid[slot]);
 	}
 
 	addr = &bridge->b_type1_cfg.c[(fn << 8) | (where ^ (4 - size))];
@@ -279,16 +306,15 @@ static int bridge_set_affinity(struct irq_data *d, const struct cpumask *mask,
 	struct bridge_irq_chip_data *data = d->chip_data;
 	int bit = d->parent_data->hwirq;
 	int pin = d->hwirq;
-	nasid_t nasid;
 	int ret, cpu;
 
 	ret = irq_chip_set_affinity_parent(d, mask, force);
 	if (ret >= 0) {
 		cpu = cpumask_first_and(mask, cpu_online_mask);
-		nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
+		data->nasid = cpu_to_node(cpu);
 		bridge_write(data->bc, b_int_addr[pin].addr,
 			     (((data->bc->intr_addr >> 30) & 0x30000) |
-			      bit | (nasid << 8)));
+			      bit | (data->nasid << 8)));
 		bridge_read(data->bc, b_wid_tflush);
 	}
 	return ret;
@@ -426,6 +452,117 @@ static int bridge_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 	return irq;
 }
 
+#define IOC3_SID(sid)	(PCI_VENDOR_ID_SGI | ((sid) << 16))
+
+static void bridge_setup_ip27_baseio6g(struct bridge_controller *bc)
+{
+	bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO6G);
+	bc->ioc3_sid[6] = IOC3_SID(IOC3_SUBSYS_IP27_MIO);
+}
+
+static void bridge_setup_ip27_baseio(struct bridge_controller *bc)
+{
+	bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO);
+}
+
+static void bridge_setup_ip29_baseio(struct bridge_controller *bc)
+{
+	bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP29_SYSBOARD);
+}
+
+static void bridge_setup_ip30_sysboard(struct bridge_controller *bc)
+{
+	bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP30_SYSBOARD);
+}
+
+static void bridge_setup_menet(struct bridge_controller *bc)
+{
+	bc->ioc3_sid[0] = IOC3_SID(IOC3_SUBSYS_MENET);
+	bc->ioc3_sid[1] = IOC3_SID(IOC3_SUBSYS_MENET);
+	bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_MENET);
+	bc->ioc3_sid[3] = IOC3_SID(IOC3_SUBSYS_MENET4);
+}
+
+#define BRIDGE_BOARD_SETUP(_partno, _setup)	\
+	{ .match = _partno, .setup = _setup }
+
+static const struct {
+	char *match;
+	void (*setup)(struct bridge_controller *bc);
+} bridge_ioc3_devid[] = {
+	BRIDGE_BOARD_SETUP("030-0734-", bridge_setup_ip27_baseio6g),
+	BRIDGE_BOARD_SETUP("030-0880-", bridge_setup_ip27_baseio6g),
+	BRIDGE_BOARD_SETUP("030-1023-", bridge_setup_ip27_baseio),
+	BRIDGE_BOARD_SETUP("030-1124-", bridge_setup_ip27_baseio),
+	BRIDGE_BOARD_SETUP("030-1025-", bridge_setup_ip29_baseio),
+	BRIDGE_BOARD_SETUP("030-1244-", bridge_setup_ip29_baseio),
+	BRIDGE_BOARD_SETUP("030-1389-", bridge_setup_ip29_baseio),
+	BRIDGE_BOARD_SETUP("030-0887-", bridge_setup_ip30_sysboard),
+	BRIDGE_BOARD_SETUP("030-1467-", bridge_setup_ip30_sysboard),
+	BRIDGE_BOARD_SETUP("030-0873-", bridge_setup_menet),
+};
+
+static void bridge_setup_board(struct bridge_controller *bc, char *partnum)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(bridge_ioc3_devid); i++)
+		if (!strncmp(partnum, bridge_ioc3_devid[i].match,
+			     strlen(bridge_ioc3_devid[i].match))) {
+			bridge_ioc3_devid[i].setup(bc);
+		}
+}
+
+static int bridge_nvmem_match(struct device *dev, const void *data)
+{
+	const char *name = dev_name(dev);
+	const char *prefix = data;
+
+	if (strlen(name) < strlen(prefix))
+		return 0;
+
+	return memcmp(prefix, dev_name(dev), strlen(prefix)) == 0;
+}
+
+static int bridge_get_partnum(u64 baddr, char *partnum)
+{
+	struct nvmem_device *nvmem;
+	char prefix[24];
+	u8 prom[64];
+	int i, j;
+	int ret;
+
+	snprintf(prefix, sizeof(prefix), "bridge-%012llx-0b-", baddr);
+
+	nvmem = nvmem_device_find(prefix, bridge_nvmem_match);
+	if (IS_ERR(nvmem))
+		return PTR_ERR(nvmem);
+
+	ret = nvmem_device_read(nvmem, 0, 64, prom);
+	nvmem_device_put(nvmem);
+
+	if (ret != 64)
+		return ret;
+
+	if (crc16(CRC16_INIT, prom, 32) != CRC16_VALID ||
+	    crc16(CRC16_INIT, prom + 32, 32) != CRC16_VALID)
+		return -EINVAL;
+
+	/* Assemble part number */
+	j = 0;
+	for (i = 0; i < 19; i++)
+		if (prom[i + 11] != ' ')
+			partnum[j++] = prom[i + 11];
+
+	for (i = 0; i < 6; i++)
+		if (prom[i + 32] != ' ')
+			partnum[j++] = prom[i + 32];
+
+	partnum[j] = 0;
+
+	return 0;
+}
+
 static int bridge_probe(struct platform_device *pdev)
 {
 	struct xtalk_bridge_platform_data *bd = dev_get_platdata(&pdev->dev);
@@ -434,9 +571,14 @@ static int bridge_probe(struct platform_device *pdev)
 	struct pci_host_bridge *host;
 	struct irq_domain *domain, *parent;
 	struct fwnode_handle *fn;
+	char partnum[26];
 	int slot;
 	int err;
 
+	/* get part number from one wire prom */
+	if (bridge_get_partnum(virt_to_phys((void *)bd->bridge_addr), partnum))
+		return -EPROBE_DEFER; /* not available yet */
+
 	parent = irq_get_default_host();
 	if (!parent)
 		return -ENODEV;
@@ -517,6 +659,8 @@ static int bridge_probe(struct platform_device *pdev)
 	}
 	bridge_read(bc, b_wid_tflush);	  /* wait until Bridge PIO complete */
 
+	bridge_setup_board(bc, partnum);
+
 	host->dev.parent = dev;
 	host->sysdata = bc;
 	host->busnr = 0;
diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c
index 8c236738b5ee..25372e62783b 100644
--- a/arch/mips/pic32/pic32mzda/early_console.c
+++ b/arch/mips/pic32/pic32mzda/early_console.c
@@ -135,7 +135,7 @@ void __init fw_init_early_console(char port)
 	char *arch_cmdline = pic32_getcmdline();
 	int baud = -1;
 
-	uart_base = ioremap_nocache(PIC32_BASE_UART, 0xc00);
+	uart_base = ioremap(PIC32_BASE_UART, 0xc00);
 
 	baud = get_baud_from_cmdline(arch_cmdline);
 	if (port == -1)
diff --git a/arch/mips/pic32/pic32mzda/early_pin.c b/arch/mips/pic32/pic32mzda/early_pin.c
index 504e6ab399b5..f2822632b017 100644
--- a/arch/mips/pic32/pic32mzda/early_pin.c
+++ b/arch/mips/pic32/pic32mzda/early_pin.c
@@ -122,7 +122,7 @@ static const struct
 
 void pic32_pps_input(int function, int pin)
 {
-	void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0xF4);
+	void __iomem *pps_base = ioremap(PPS_BASE, 0xF4);
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(input_pin_reg); i++) {
@@ -252,7 +252,7 @@ static const struct
 
 void pic32_pps_output(int function, int pin)
 {
-	void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0x170);
+	void __iomem *pps_base = ioremap(PPS_BASE, 0x170);
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(output_pin_reg); i++) {
diff --git a/arch/mips/pmcs-msp71xx/msp_serial.c b/arch/mips/pmcs-msp71xx/msp_serial.c
index 8e6e8db8dd5f..940c684f6921 100644
--- a/arch/mips/pmcs-msp71xx/msp_serial.c
+++ b/arch/mips/pmcs-msp71xx/msp_serial.c
@@ -105,7 +105,7 @@ void __init msp_serial_setup(void)
 
 	/* Initialize first serial port */
 	up.mapbase	= MSP_UART0_BASE;
-	up.membase	= ioremap_nocache(up.mapbase, MSP_UART_REG_LEN);
+	up.membase	= ioremap(up.mapbase, MSP_UART_REG_LEN);
 	up.irq		= MSP_INT_UART0;
 	up.uartclk	= uartclk;
 	up.regshift	= 2;
@@ -143,7 +143,7 @@ void __init msp_serial_setup(void)
 	}
 
 	up.mapbase	= MSP_UART1_BASE;
-	up.membase	= ioremap_nocache(up.mapbase, MSP_UART_REG_LEN);
+	up.membase	= ioremap(up.mapbase, MSP_UART_REG_LEN);
 	up.irq		= MSP_INT_UART1;
 	up.line		= 1;
 	up.private_data		= (void*)UART1_STATUS_REG;
diff --git a/arch/mips/power/cpu.c b/arch/mips/power/cpu.c
index 3340a5530de3..a15e29dfc7b3 100644
--- a/arch/mips/power/cpu.c
+++ b/arch/mips/power/cpu.c
@@ -19,8 +19,8 @@ void save_processor_state(void)
 
 	if (is_fpu_owner())
 		save_fp(current);
-	if (cpu_has_dsp)
-		save_dsp(current);
+
+	save_dsp(current);
 }
 
 void restore_processor_state(void)
@@ -29,8 +29,8 @@ void restore_processor_state(void)
 
 	if (is_fpu_owner())
 		restore_fp(current);
-	if (cpu_has_dsp)
-		restore_dsp(current);
+
+	restore_dsp(current);
 }
 
 int pfn_is_nosave(unsigned long pfn)
diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig
index 1434fa60f3db..94e9ce994494 100644
--- a/arch/mips/ralink/Kconfig
+++ b/arch/mips/ralink/Kconfig
@@ -51,6 +51,7 @@ choice
 		select MIPS_GIC
 		select COMMON_CLK
 		select CLKSRC_MIPS_GIC
+		select HAVE_PCI if PCI_MT7621
 endchoice
 
 choice
diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c
index c945d76cfce5..220ca0cd7945 100644
--- a/arch/mips/ralink/irq.c
+++ b/arch/mips/ralink/irq.c
@@ -165,7 +165,7 @@ static int __init intc_of_init(struct device_node *node,
 				res.name))
 		pr_err("Failed to request intc memory");
 
-	rt_intc_membase = ioremap_nocache(res.start,
+	rt_intc_membase = ioremap(res.start,
 					resource_size(&res));
 	if (!rt_intc_membase)
 		panic("Failed to remap intc memory");
diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c
index 59b23095bfbb..90c6d4a11c5d 100644
--- a/arch/mips/ralink/of.c
+++ b/arch/mips/ralink/of.c
@@ -43,7 +43,7 @@ __iomem void *plat_of_remap_node(const char *node)
 				res.name))
 		panic("Failed to request resources for %s", node);
 
-	return ioremap_nocache(res.start, resource_size(&res));
+	return ioremap(res.start, resource_size(&res));
 }
 
 void __init device_tree_init(void)
diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index c9ecf17f8660..dd34f1b32b79 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -286,7 +286,7 @@ static int __init plat_setup_devices(void)
 	nand_slot0_res[0].end = nand_slot0_res[0].start + 0x1000;
 
 	/* Read and map device controller 3 */
-	dev3.base = ioremap_nocache(readl(IDT434_REG_BASE + DEV3BASE), 1);
+	dev3.base = ioremap(readl(IDT434_REG_BASE + DEV3BASE), 1);
 
 	if (!dev3.base) {
 		printk(KERN_ERR "rb532: cannot remap device controller 3\n");
diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c
index fdc704abc8d4..94f02ada4082 100644
--- a/arch/mips/rb532/gpio.c
+++ b/arch/mips/rb532/gpio.c
@@ -192,7 +192,7 @@ int __init rb532_gpio_init(void)
 	struct resource *r;
 
 	r = rb532_gpio_reg0_res;
-	rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r));
+	rb532_gpio_chip->regbase = ioremap(r->start, resource_size(r));
 
 	if (!rb532_gpio_chip->regbase) {
 		printk(KERN_ERR "rb532: cannot remap GPIO register 0\n");
diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c
index 26e957b21fbf..303cc3dc1749 100644
--- a/arch/mips/rb532/prom.c
+++ b/arch/mips/rb532/prom.c
@@ -110,7 +110,7 @@ void __init prom_init(void)
 	phys_addr_t memsize;
 	phys_addr_t ddrbase;
 
-	ddr = ioremap_nocache(ddr_reg[0].start,
+	ddr = ioremap(ddr_reg[0].start,
 			ddr_reg[0].end - ddr_reg[0].start);
 
 	if (!ddr) {
diff --git a/arch/mips/rb532/setup.c b/arch/mips/rb532/setup.c
index 1aa4df1385cb..51af9d374d66 100644
--- a/arch/mips/rb532/setup.c
+++ b/arch/mips/rb532/setup.c
@@ -49,7 +49,7 @@ void __init plat_mem_setup(void)
 
 	set_io_port_base(KSEG1);
 
-	pci_reg = ioremap_nocache(pci0_res[0].start,
+	pci_reg = ioremap(pci0_res[0].start,
 				pci0_res[0].end - pci0_res[0].start);
 	if (!pci_reg) {
 		printk(KERN_ERR "Could not remap PCI registers\n");
diff --git a/arch/mips/sgi-ip22/ip22-mc.c b/arch/mips/sgi-ip22/ip22-mc.c
index 1944d41507ef..74e5b9e27d6c 100644
--- a/arch/mips/sgi-ip22/ip22-mc.c
+++ b/arch/mips/sgi-ip22/ip22-mc.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
+#include <linux/memblock.h>
 #include <linux/spinlock.h>
 
 #include <asm/io.h>
@@ -40,70 +41,36 @@ static inline unsigned int get_bank_config(int bank)
 	return bank % 2 ? res & 0xffff : res >> 16;
 }
 
-struct mem {
-	unsigned long addr;
-	unsigned long size;
-};
-
+#if defined(CONFIG_SGI_IP28) || defined(CONFIG_32BIT)
+static void __init probe_memory(void)
+{
+	/* prom detects all usable memory */
+}
+#else
 /*
- * Detect installed memory, do some sanity checks and notify kernel about it
+ * Detect installed memory, which PROM misses
  */
 static void __init probe_memory(void)
 {
-	int i, j, found, cnt = 0;
-	struct mem bank[4];
-	struct mem space[2] = {{SGIMC_SEG0_BADDR, 0}, {SGIMC_SEG1_BADDR, 0}};
+	unsigned long addr, size;
+	int i;
 
 	printk(KERN_INFO "MC: Probing memory configuration:\n");
-	for (i = 0; i < ARRAY_SIZE(bank); i++) {
+	for (i = 0; i < 4; i++) {
 		unsigned int tmp = get_bank_config(i);
 		if (!(tmp & SGIMC_MCONFIG_BVALID))
 			continue;
 
-		bank[cnt].size = get_bank_size(tmp);
-		bank[cnt].addr = get_bank_addr(tmp);
+		size = get_bank_size(tmp);
+		addr = get_bank_addr(tmp);
 		printk(KERN_INFO " bank%d: %3ldM @ %08lx\n",
-			i, bank[cnt].size / 1024 / 1024, bank[cnt].addr);
-		cnt++;
-	}
+			i, size / 1024 / 1024, addr);
 
-	/* And you thought bubble sort is dead algorithm... */
-	do {
-		unsigned long addr, size;
-
-		found = 0;
-		for (i = 1; i < cnt; i++)
-			if (bank[i-1].addr > bank[i].addr) {
-				addr = bank[i].addr;
-				size = bank[i].size;
-				bank[i].addr = bank[i-1].addr;
-				bank[i].size = bank[i-1].size;
-				bank[i-1].addr = addr;
-				bank[i-1].size = size;
-				found = 1;
-			}
-	} while (found);
-
-	/* Figure out how are memory banks mapped into spaces */
-	for (i = 0; i < cnt; i++) {
-		found = 0;
-		for (j = 0; j < ARRAY_SIZE(space) && !found; j++)
-			if (space[j].addr + space[j].size == bank[i].addr) {
-				space[j].size += bank[i].size;
-				found = 1;
-			}
-		/* There is either hole or overlapping memory */
-		if (!found)
-			printk(KERN_CRIT "MC: Memory configuration mismatch "
-					 "(%08lx), expect Bus Error soon\n",
-					 bank[i].addr);
+		if (addr >= SGIMC_SEG1_BADDR)
+			memblock_add(addr, size);
 	}
-
-	for (i = 0; i < ARRAY_SIZE(space); i++)
-		if (space[i].size)
-			add_memory_region(space[i].addr, space[i].size,
-					  BOOT_MEM_RAM);
 }
+#endif
 
 void __init sgimc_init(void)
 {
@@ -205,10 +172,9 @@ void __init sgimc_init(void)
 	probe_memory();
 }
 
-void __init prom_meminit(void) {}
-void __init prom_free_prom_memory(void)
-{
 #ifdef CONFIG_SGI_IP28
+void __init prom_cleanup(void)
+{
 	u32 mconfig1;
 	unsigned long flags;
 	spinlock_t lock;
@@ -233,5 +199,5 @@ void __init prom_free_prom_memory(void)
 	sgimc->mconfig1 = mconfig1;
 	iob();
 	spin_unlock_irqrestore(&lock, flags);
-#endif
 }
+#endif
diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h
new file mode 100644
index 000000000000..3ffbcf9bfd41
--- /dev/null
+++ b/arch/mips/sgi-ip27/ip27-common.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __IP27_COMMON_H
+#define __IP27_COMMON_H
+
+extern void ip27_reboot_setup(void);
+extern void hub_rt_clock_event_init(void);
+extern const struct plat_smp_ops ip27_smp_ops;
+
+#endif /* __IP27_COMMON_H */
diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c
index 6ebb8845a77c..a538d0ceb61d 100644
--- a/arch/mips/sgi-ip27/ip27-hubio.c
+++ b/arch/mips/sgi-ip27/ip27-hubio.c
@@ -25,10 +25,9 @@ static int force_fire_and_forget = 1;
  * @size:	size of the PIO mapping
  *
  **/
-unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
+unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget,
 			  unsigned long xtalk_addr, size_t size)
 {
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
 	unsigned i;
 
 	/* use small-window mapping if possible */
@@ -44,7 +43,7 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
 
 	xtalk_addr &= ~(BWIN_SIZE-1);
 	for (i = 0; i < HUB_NUM_BIG_WINDOW; i++) {
-		if (test_and_set_bit(i, hub_data(cnode)->h_bigwin_used))
+		if (test_and_set_bit(i, hub_data(nasid)->h_bigwin_used))
 			continue;
 
 		/*
@@ -171,13 +170,12 @@ static void hub_set_piomode(nasid_t nasid)
  *
  * @hub:	hubinfo structure for our hub
  */
-void hub_pio_init(cnodeid_t cnode)
+void hub_pio_init(nasid_t nasid)
 {
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
 	unsigned i;
 
 	/* initialize big window piomaps for this hub */
-	bitmap_zero(hub_data(cnode)->h_bigwin_used, HUB_NUM_BIG_WINDOW);
+	bitmap_zero(hub_data(nasid)->h_bigwin_used, HUB_NUM_BIG_WINDOW);
 	for (i = 0; i < HUB_NUM_BIG_WINDOW; i++)
 		IIO_ITTE_DISABLE(nasid, i);
 
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index 79a52c472782..f597e1ee2df7 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -13,9 +13,11 @@
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/cpumask.h>
+#include <asm/bootinfo.h>
 #include <asm/cpu.h>
 #include <asm/io.h>
 #include <asm/pgtable.h>
+#include <asm/sgialib.h>
 #include <asm/time.h>
 #include <asm/sn/types.h>
 #include <asm/sn/sn0/addrs.h>
@@ -36,30 +38,23 @@
 #include <asm/sn/sn0/ip27.h>
 #include <asm/sn/mapped_kernel.h>
 
+#include "ip27-common.h"
+
 #define CPU_NONE		(cpuid_t)-1
 
-static DECLARE_BITMAP(hub_init_mask, MAX_COMPACT_NODES);
+static DECLARE_BITMAP(hub_init_mask, MAX_NUMNODES);
 nasid_t master_nasid = INVALID_NASID;
 
-cnodeid_t	nasid_to_compact_node[MAX_NASIDS];
-nasid_t		compact_to_nasid_node[MAX_COMPACT_NODES];
-cnodeid_t	cpuid_to_compact_node[MAXCPUS];
-
-EXPORT_SYMBOL(nasid_to_compact_node);
-
 struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
 EXPORT_SYMBOL_GPL(sn_cpu_info);
 
-extern void pcibr_setup(cnodeid_t);
-
-static void per_hub_init(cnodeid_t cnode)
+static void per_hub_init(nasid_t nasid)
 {
-	struct hub_data *hub = hub_data(cnode);
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
+	struct hub_data *hub = hub_data(nasid);
 
 	cpumask_set_cpu(smp_processor_id(), &hub->h_cpus);
 
-	if (test_and_set_bit(cnode, hub_init_mask))
+	if (test_and_set_bit(nasid, hub_init_mask))
 		return;
 	/*
 	 * Set CRB timeout at 5ms, (< PI timeout of 10ms)
@@ -67,7 +62,7 @@ static void per_hub_init(cnodeid_t cnode)
 	REMOTE_HUB_S(nasid, IIO_ICTP, 0x800);
 	REMOTE_HUB_S(nasid, IIO_ICTO, 0xff);
 
-	hub_rtc_init(cnode);
+	hub_rtc_init(nasid);
 
 	if (nasid) {
 		/* copy exception handlers from first node to current node */
@@ -83,15 +78,15 @@ void per_cpu_init(void)
 {
 	int cpu = smp_processor_id();
 	int slice = LOCAL_HUB_L(PI_CPU_NUM);
-	cnodeid_t cnode = get_compact_nodeid();
-	struct hub_data *hub = hub_data(cnode);
+	nasid_t nasid = get_nasid();
+	struct hub_data *hub = hub_data(nasid);
 
 	if (test_and_set_bit(slice, &hub->slice_map))
 		return;
 
 	clear_c0_status(ST0_IM);
 
-	per_hub_init(cnode);
+	per_hub_init(nasid);
 
 	cpu_time_init();
 	install_ipi();
@@ -113,21 +108,13 @@ get_nasid(void)
 			 >> NSRI_NODEID_SHFT);
 }
 
-/*
- * Map the physical node id to a virtual node id (virtual node ids are contiguous).
- */
-cnodeid_t get_compact_nodeid(void)
-{
-	return NASID_TO_COMPACT_NODEID(get_nasid());
-}
-
-extern void ip27_reboot_setup(void);
-
 void __init plat_mem_setup(void)
 {
 	u64 p, e, n_mode;
 	nasid_t nid;
 
+	register_smp_ops(&ip27_smp_ops);
+
 	ip27_reboot_setup();
 
 	/*
@@ -166,3 +153,15 @@ void __init plat_mem_setup(void)
 	ioport_resource.end = ~0UL;
 	set_io_port_base(IO_BASE);
 }
+
+const char *get_system_type(void)
+{
+	return "SGI Origin";
+}
+
+void __init prom_init(void)
+{
+	prom_init_cmdline(fw_arg0, (LONG *)fw_arg1);
+	prom_meminit();
+}
+
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 37be04975831..c72ae330ea93 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -73,7 +73,10 @@ static void setup_hub_mask(struct hub_irq_data *hd, const struct cpumask *mask)
 	int cpu;
 
 	cpu = cpumask_first_and(mask, cpu_online_mask);
-	nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
+	if (cpu >= nr_cpu_ids)
+		cpu = cpumask_any(cpu_online_mask);
+
+	nasid = cpu_to_node(cpu);
 	hd->cpu = cpu;
 	if (!cputoslice(cpu)) {
 		hd->irq_mask[0] = REMOTE_HUB_PTR(nasid, PI_INT_MASK0_A);
@@ -137,8 +140,9 @@ static int hub_domain_alloc(struct irq_domain *domain, unsigned int virq,
 			    handle_level_irq, NULL, NULL);
 
 	/* use CPU connected to nearest hub */
-	hub = hub_data(NASID_TO_COMPACT_NODEID(info->nasid));
+	hub = hub_data(info->nasid);
 	setup_hub_mask(hd, &hub->h_cpus);
+	info->nasid = cpu_to_node(hd->cpu);
 
 	/* Make sure it's not already pending when we connect it. */
 	REMOTE_HUB_CLR_INTR(info->nasid, swlevel);
diff --git a/arch/mips/sgi-ip27/ip27-klconfig.c b/arch/mips/sgi-ip27/ip27-klconfig.c
index 41171ff0c75e..6cb2160e7689 100644
--- a/arch/mips/sgi-ip27/ip27-klconfig.c
+++ b/arch/mips/sgi-ip27/ip27-klconfig.c
@@ -73,11 +73,6 @@ lboard_t *find_lboard_class(lboard_t *start, unsigned char brd_type)
 	return (lboard_t *)NULL;
 }
 
-cnodeid_t get_cpu_cnode(cpuid_t cpu)
-{
-	return CPUID_TO_COMPACT_NODEID(cpu);
-}
-
 klcpu_t *nasid_slice_to_cpuinfo(nasid_t nasid, int slice)
 {
 	lboard_t *brd;
@@ -102,19 +97,14 @@ klcpu_t *sn_get_cpuinfo(cpuid_t cpu)
 	nasid_t nasid;
 	int slice;
 	klcpu_t *acpu;
-	gda_t *gdap = GDA;
-	cnodeid_t cnode;
 
 	if (!(cpu < MAXCPUS)) {
 		printk("sn_get_cpuinfo: illegal cpuid 0x%lx\n", cpu);
 		return NULL;
 	}
 
-	cnode = get_cpu_cnode(cpu);
-	if (cnode == INVALID_CNODEID)
-		return NULL;
-
-	if ((nasid = gdap->g_nasidtable[cnode]) == INVALID_NASID)
+	nasid = cputonasid(cpu);
+	if (nasid  == INVALID_NASID)
 		return NULL;
 
 	for (slice = 0; slice < CPUS_PER_NODE; slice++) {
diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c
index a4f01328de62..ee1c6ff4aa00 100644
--- a/arch/mips/sgi-ip27/ip27-klnuma.c
+++ b/arch/mips/sgi-ip27/ip27-klnuma.c
@@ -38,13 +38,13 @@ void __init setup_replication_mask(void)
 #error Kernel replication works with mapped kernel support. No calias support.
 #endif
 	{
-		cnodeid_t	cnode;
+		nasid_t nasid;
 
-		for_each_online_node(cnode) {
-			if (cnode == 0)
+		for_each_online_node(nasid) {
+			if (nasid == 0)
 				continue;
 			/* Advertise that we have a copy of the kernel */
-			cpumask_set_cpu(cnode, &ktext_repmask);
+			cpumask_set_cpu(nasid, &ktext_repmask);
 		}
 	}
 #endif
@@ -85,7 +85,6 @@ static __init void copy_kernel(nasid_t dest_nasid)
 
 void __init replicate_kernel_text(void)
 {
-	cnodeid_t cnode;
 	nasid_t client_nasid;
 	nasid_t server_nasid;
 
@@ -94,13 +93,12 @@ void __init replicate_kernel_text(void)
 	/* Record where the master node should get its kernel text */
 	set_ktext_source(master_nasid, master_nasid);
 
-	for_each_online_node(cnode) {
-		if (cnode == 0)
+	for_each_online_node(client_nasid) {
+		if (client_nasid == 0)
 			continue;
-		client_nasid = COMPACT_TO_NASID_NODEID(cnode);
 
 		/* Check if this node should get a copy of the kernel */
-		if (cpumask_test_cpu(cnode, &ktext_repmask)) {
+		if (cpumask_test_cpu(client_nasid, &ktext_repmask)) {
 			server_nasid = client_nasid;
 			copy_kernel(server_nasid);
 		}
@@ -115,17 +113,16 @@ void __init replicate_kernel_text(void)
  * data structures on the first couple of pages of the first slot of each
  * node. If this is the case, getfirstfree(node) > getslotstart(node, 0).
  */
-unsigned long node_getfirstfree(cnodeid_t cnode)
+unsigned long node_getfirstfree(nasid_t nasid)
 {
 	unsigned long loadbase = REP_BASE;
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
 	unsigned long offset;
 
 #ifdef CONFIG_MAPPED_KERNEL
 	loadbase += 16777216;
 #endif
 	offset = PAGE_ALIGN((unsigned long)(&_end)) - loadbase;
-	if ((cnode == 0) || (cpumask_test_cpu(cnode, &ktext_repmask)))
+	if ((nasid == 0) || (cpumask_test_cpu(nasid, &ktext_repmask)))
 		return TO_NODE(nasid, offset) >> PAGE_SHIFT;
 	else
 		return KDM_TO_PHYS(PAGE_ALIGN(SYMMON_STK_ADDR(nasid, 0))) >> PAGE_SHIFT;
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 8624a885d95b..563aad5e6398 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -33,7 +33,7 @@
 #define SLOT_PFNSHIFT		(SLOT_SHIFT - PAGE_SHIFT)
 #define PFN_NASIDSHFT		(NASID_SHFT - PAGE_SHIFT)
 
-struct node_data *__node_data[MAX_COMPACT_NODES];
+struct node_data *__node_data[MAX_NUMNODES];
 
 EXPORT_SYMBOL(__node_data);
 
@@ -44,23 +44,23 @@ static int is_fine_dirmode(void)
 	return ((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_REGIONSIZE_MASK) >> NSRI_REGIONSIZE_SHFT) & REGIONSIZE_FINE;
 }
 
-static u64 get_region(cnodeid_t cnode)
+static u64 get_region(nasid_t nasid)
 {
 	if (fine_mode)
-		return COMPACT_TO_NASID_NODEID(cnode) >> NASID_TO_FINEREG_SHFT;
+		return nasid >> NASID_TO_FINEREG_SHFT;
 	else
-		return COMPACT_TO_NASID_NODEID(cnode) >> NASID_TO_COARSEREG_SHFT;
+		return nasid >> NASID_TO_COARSEREG_SHFT;
 }
 
 static u64 region_mask;
 
 static void gen_region_mask(u64 *region_mask)
 {
-	cnodeid_t cnode;
+	nasid_t nasid;
 
 	(*region_mask) = 0;
-	for_each_online_node(cnode) {
-		(*region_mask) |= 1ULL << get_region(cnode);
+	for_each_online_node(nasid) {
+		(*region_mask) |= 1ULL << get_region(nasid);
 	}
 }
 
@@ -104,23 +104,18 @@ static void router_recurse(klrou_t *router_a, klrou_t *router_b, int depth)
 	router_a->rou_rflag = 0;
 }
 
-unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
+unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
 EXPORT_SYMBOL(__node_distances);
 
 static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b)
 {
 	klrou_t *router, *router_a = NULL, *router_b = NULL;
 	lboard_t *brd, *dest_brd;
-	cnodeid_t cnode;
 	nasid_t nasid;
 	int port;
 
 	/* Figure out which routers nodes in question are connected to */
-	for_each_online_node(cnode) {
-		nasid = COMPACT_TO_NASID_NODEID(cnode);
-
-		if (nasid == -1) continue;
-
+	for_each_online_node(nasid) {
 		brd = find_lboard_class((lboard_t *)KL_CONFIG_INFO(nasid),
 					KLTYPE_ROUTER);
 
@@ -176,19 +171,16 @@ static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b)
 
 static void __init init_topology_matrix(void)
 {
-	nasid_t nasid, nasid2;
-	cnodeid_t row, col;
+	nasid_t row, col;
 
-	for (row = 0; row < MAX_COMPACT_NODES; row++)
-		for (col = 0; col < MAX_COMPACT_NODES; col++)
+	for (row = 0; row < MAX_NUMNODES; row++)
+		for (col = 0; col < MAX_NUMNODES; col++)
 			__node_distances[row][col] = -1;
 
 	for_each_online_node(row) {
-		nasid = COMPACT_TO_NASID_NODEID(row);
 		for_each_online_node(col) {
-			nasid2 = COMPACT_TO_NASID_NODEID(col);
 			__node_distances[row][col] =
-				compute_node_distance(nasid, nasid2);
+				compute_node_distance(row, col);
 		}
 	}
 }
@@ -196,12 +188,11 @@ static void __init init_topology_matrix(void)
 static void __init dump_topology(void)
 {
 	nasid_t nasid;
-	cnodeid_t cnode;
 	lboard_t *brd, *dest_brd;
 	int port;
 	int router_num = 0;
 	klrou_t *router;
-	cnodeid_t row, col;
+	nasid_t row, col;
 
 	pr_info("************** Topology ********************\n");
 
@@ -216,11 +207,7 @@ static void __init dump_topology(void)
 		pr_cont("\n");
 	}
 
-	for_each_online_node(cnode) {
-		nasid = COMPACT_TO_NASID_NODEID(cnode);
-
-		if (nasid == -1) continue;
-
+	for_each_online_node(nasid) {
 		brd = find_lboard_class((lboard_t *)KL_CONFIG_INFO(nasid),
 					KLTYPE_ROUTER);
 
@@ -254,21 +241,17 @@ static void __init dump_topology(void)
 	}
 }
 
-static unsigned long __init slot_getbasepfn(cnodeid_t cnode, int slot)
+static unsigned long __init slot_getbasepfn(nasid_t nasid, int slot)
 {
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
-
 	return ((unsigned long)nasid << PFN_NASIDSHFT) | (slot << SLOT_PFNSHIFT);
 }
 
-static unsigned long __init slot_psize_compute(cnodeid_t node, int slot)
+static unsigned long __init slot_psize_compute(nasid_t nasid, int slot)
 {
-	nasid_t nasid;
 	lboard_t *brd;
 	klmembnk_t *banks;
 	unsigned long size;
 
-	nasid = COMPACT_TO_NASID_NODEID(node);
 	/* Find the node board */
 	brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27);
 	if (!brd)
@@ -298,7 +281,7 @@ static unsigned long __init slot_psize_compute(cnodeid_t node, int slot)
 
 static void __init mlreset(void)
 {
-	int i;
+	nasid_t nasid;
 
 	master_nasid = get_nasid();
 	fine_mode = is_fine_dirmode();
@@ -321,11 +304,7 @@ static void __init mlreset(void)
 	/*
 	 * Set all nodes' calias sizes to 8k
 	 */
-	for_each_online_node(i) {
-		nasid_t nasid;
-
-		nasid = COMPACT_TO_NASID_NODEID(i);
-
+	for_each_online_node(nasid) {
 		/*
 		 * Always have node 0 in the region mask, otherwise
 		 * CALIAS accesses get exceptions since the hub
@@ -350,7 +329,7 @@ static void __init szmem(void)
 {
 	unsigned long slot_psize, slot0sz = 0, nodebytes;	/* Hack to detect problem configs */
 	int slot;
-	cnodeid_t node;
+	nasid_t node;
 
 	for_each_online_node(node) {
 		nodebytes = 0;
@@ -380,7 +359,7 @@ static void __init szmem(void)
 	}
 }
 
-static void __init node_mem_init(cnodeid_t node)
+static void __init node_mem_init(nasid_t node)
 {
 	unsigned long slot_firstpfn = slot_getbasepfn(node, 0);
 	unsigned long slot_freepfn = node_getfirstfree(node);
@@ -402,12 +381,8 @@ static void __init node_mem_init(cnodeid_t node)
 	slot_freepfn += PFN_UP(sizeof(struct pglist_data) +
 			       sizeof(struct hub_data));
 
-	free_bootmem_with_active_regions(node, end_pfn);
-
 	memblock_reserve(slot_firstpfn << PAGE_SHIFT,
 			 ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT));
-
-	sparse_memory_present_with_active_regions(node);
 }
 
 /*
@@ -427,19 +402,21 @@ static struct node_data null_node = {
  */
 void __init prom_meminit(void)
 {
-	cnodeid_t node;
+	nasid_t node;
 
 	mlreset();
 	szmem();
 	max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
 
-	for (node = 0; node < MAX_COMPACT_NODES; node++) {
+	for (node = 0; node < MAX_NUMNODES; node++) {
 		if (node_online(node)) {
 			node_mem_init(node);
 			continue;
 		}
 		__node_data[node] = &null_node;
 	}
+
+	memblocks_present();
 }
 
 void __init prom_free_prom_memory(void)
diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c
index 3aae388561d9..daf3670d94e7 100644
--- a/arch/mips/sgi-ip27/ip27-nmi.c
+++ b/arch/mips/sgi-ip27/ip27-nmi.c
@@ -17,8 +17,6 @@
 #define NODE_NUM_CPUS(n)	CPUS_PER_NODE
 #endif
 
-#define CNODEID_NONE (cnodeid_t)-1
-
 typedef unsigned long machreg_t;
 
 static arch_spinlock_t nmi_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -152,16 +150,10 @@ void nmi_dump_hub_irq(nasid_t nasid, int slice)
  * Copy the cpu registers which have been saved in the IP27prom format
  * into the eframe format for the node under consideration.
  */
-void nmi_node_eframe_save(cnodeid_t  cnode)
+void nmi_node_eframe_save(nasid_t nasid)
 {
-	nasid_t nasid;
 	int slice;
 
-	/* Make sure that we have a valid node */
-	if (cnode == CNODEID_NONE)
-		return;
-
-	nasid = COMPACT_TO_NASID_NODEID(cnode);
 	if (nasid == INVALID_NASID)
 		return;
 
@@ -178,10 +170,10 @@ void nmi_node_eframe_save(cnodeid_t  cnode)
 void
 nmi_eframes_save(void)
 {
-	cnodeid_t	cnode;
+	nasid_t nasid;
 
-	for_each_online_node(cnode)
-		nmi_node_eframe_save(cnode);
+	for_each_online_node(nasid)
+		nmi_node_eframe_save(nasid);
 }
 
 void
diff --git a/arch/mips/sgi-ip27/ip27-reset.c b/arch/mips/sgi-ip27/ip27-reset.c
index e44a15d4f573..74d078247e49 100644
--- a/arch/mips/sgi-ip27/ip27-reset.c
+++ b/arch/mips/sgi-ip27/ip27-reset.c
@@ -26,6 +26,8 @@
 #include <asm/sn/gda.h>
 #include <asm/sn/sn0/hub.h>
 
+#include "ip27-common.h"
+
 void machine_restart(char *command) __noreturn;
 void machine_halt(void) __noreturn;
 void machine_power_off(void) __noreturn;
@@ -45,8 +47,7 @@ static void ip27_machine_restart(char *command)
 #endif
 #if 0
 	for_each_online_node(i)
-		REMOTE_HUB_S(COMPACT_TO_NASID_NODEID(i), PROMOP_REG,
-							PROMOP_REBOOT);
+		REMOTE_HUB_S(i, PROMOP_REG, PROMOP_REBOOT);
 #else
 	LOCAL_HUB_S(NI_PORT_RESET, NPR_PORTRESET | NPR_LOCALRESET);
 #endif
@@ -61,8 +62,7 @@ static void ip27_machine_halt(void)
 	smp_send_stop();
 #endif
 	for_each_online_node(i)
-		REMOTE_HUB_S(COMPACT_TO_NASID_NODEID(i), PROMOP_REG,
-							PROMOP_RESTART);
+		REMOTE_HUB_S(i, PROMOP_REG, PROMOP_RESTART);
 	LOCAL_HUB_S(NI_PORT_RESET, NPR_PORTRESET | NPR_LOCALRESET);
 	noreturn;
 }
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index 20b81209c6b8..faa0244c8b0c 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -27,38 +27,19 @@
 #include <asm/sn/sn0/hubio.h>
 #include <asm/sn/sn0/ip27.h>
 
+#include "ip27-common.h"
+
 /*
  * Takes as first input the PROM assigned cpu id, and the kernel
  * assigned cpu id as the second.
  */
-static void alloc_cpupda(cpuid_t cpu, int cpunum)
+static void alloc_cpupda(nasid_t nasid, cpuid_t cpu, int cpunum)
 {
-	cnodeid_t node = get_cpu_cnode(cpu);
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(node);
-
 	cputonasid(cpunum) = nasid;
-	sn_cpu_info[cpunum].p_nodeid = node;
 	cputoslice(cpunum) = get_cpu_slice(cpu);
 }
 
-static nasid_t get_actual_nasid(lboard_t *brd)
-{
-	klhub_t *hub;
-
-	if (!brd)
-		return INVALID_NASID;
-
-	/* find out if we are a completely disabled brd. */
-	hub  = (klhub_t *)find_first_component(brd, KLSTRUCT_HUB);
-	if (!hub)
-		return INVALID_NASID;
-	if (!(hub->hub_info.flags & KLINFO_ENABLE))	/* disabled node brd */
-		return hub->hub_info.physid;
-	else
-		return brd->brd_nasid;
-}
-
-static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest)
+static int do_cpumask(nasid_t nasid, int highest)
 {
 	static int tot_cpus_found = 0;
 	lboard_t *brd;
@@ -72,16 +53,13 @@ static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest)
 		acpu = (klcpu_t *)find_first_component(brd, KLSTRUCT_CPU);
 		while (acpu) {
 			cpuid = acpu->cpu_info.virtid;
-			/* cnode is not valid for completely disabled brds */
-			if (get_actual_nasid(brd) == brd->brd_nasid)
-				cpuid_to_compact_node[cpuid] = cnode;
-			if (cpuid > highest)
-				highest = cpuid;
 			/* Only let it join in if it's marked enabled */
 			if ((acpu->cpu_info.flags & KLINFO_ENABLE) &&
 			    (tot_cpus_found != NR_CPUS)) {
+				if (cpuid > highest)
+					highest = cpuid;
 				set_cpu_possible(cpuid, true);
-				alloc_cpupda(cpuid, tot_cpus_found);
+				alloc_cpupda(nasid, cpuid, tot_cpus_found);
 				cpus_found++;
 				tot_cpus_found++;
 			}
@@ -103,29 +81,13 @@ void cpu_node_probe(void)
 	int i, highest = 0;
 	gda_t *gdap = GDA;
 
-	/*
-	 * Initialize the arrays to invalid nodeid (-1)
-	 */
-	for (i = 0; i < MAX_COMPACT_NODES; i++)
-		compact_to_nasid_node[i] = INVALID_NASID;
-	for (i = 0; i < MAX_NASIDS; i++)
-		nasid_to_compact_node[i] = INVALID_CNODEID;
-	for (i = 0; i < MAXCPUS; i++)
-		cpuid_to_compact_node[i] = INVALID_CNODEID;
-
-	/*
-	 * MCD - this whole "compact node" stuff can probably be dropped,
-	 * as we can handle sparse numbering now
-	 */
 	nodes_clear(node_online_map);
-	for (i = 0; i < MAX_COMPACT_NODES; i++) {
+	for (i = 0; i < MAX_NUMNODES; i++) {
 		nasid_t nasid = gdap->g_nasidtable[i];
 		if (nasid == INVALID_NASID)
 			break;
-		compact_to_nasid_node[i] = nasid;
-		nasid_to_compact_node[nasid] = i;
-		node_set_online(num_online_nodes());
-		highest = do_cpumask(i, nasid, highest);
+		node_set_online(nasid);
+		highest = do_cpumask(nasid, highest);
 	}
 
 	printk("Discovered %d cpus on %d nodes\n", highest + 1, num_online_nodes());
@@ -162,11 +124,10 @@ static void ip27_send_ipi_single(int destid, unsigned int action)
 	irq += cputoslice(destid);
 
 	/*
-	 * Convert the compact hub number to the NASID to get the correct
-	 * part of the address space.  Then set the interrupt bit associated
-	 * with the CPU we want to send the interrupt to.
+	 * Set the interrupt bit associated with the CPU we want to
+	 * send the interrupt to.
 	 */
-	REMOTE_HUB_SEND_INTR(COMPACT_TO_NASID_NODEID(cpu_to_node(destid)), irq);
+	REMOTE_HUB_SEND_INTR(cpu_to_node(destid), irq);
 }
 
 static void ip27_send_ipi_mask(const struct cpumask *mask, unsigned int action)
@@ -184,8 +145,6 @@ static void ip27_init_cpu(void)
 
 static void ip27_smp_finish(void)
 {
-	extern void hub_rt_clock_event_init(void);
-
 	hub_rt_clock_event_init();
 	local_irq_enable();
 }
@@ -208,23 +167,20 @@ static int ip27_boot_secondary(int cpu, struct task_struct *idle)
 
 static void __init ip27_smp_setup(void)
 {
-	cnodeid_t	cnode;
+	nasid_t nasid;
 
-	for_each_online_node(cnode) {
-		if (cnode == 0)
+	for_each_online_node(nasid) {
+		if (nasid == 0)
 			continue;
-		intr_clear_all(COMPACT_TO_NASID_NODEID(cnode));
+		intr_clear_all(nasid);
 	}
 
 	replicate_kernel_text();
 
 	/*
-	 * Assumption to be fixed: we're always booted on logical / physical
-	 * processor 0.	 While we're always running on logical processor 0
-	 * this still means this is physical processor zero; it might for
-	 * example be disabled in the firmware.
+	 * PROM sets up system, that boot cpu is always first CPU on nasid 0
 	 */
-	alloc_cpupda(0, 0);
+	alloc_cpupda(0, 0, 0);
 }
 
 static void __init ip27_prepare_cpus(unsigned int max_cpus)
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 9b4b9ac621a3..17302bbfa7a6 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -38,6 +38,8 @@
 #include <asm/sn/sn0/hubio.h>
 #include <asm/pci/bridge.h>
 
+#include "ip27-common.h"
+
 static int rt_next_event(unsigned long delta, struct clock_event_device *evt)
 {
 	unsigned int cpu = smp_processor_id();
@@ -170,7 +172,7 @@ void cpu_time_init(void)
 	printk("CPU %d clock is %dMHz.\n", smp_processor_id(), cpu->cpu_speed);
 }
 
-void hub_rtc_init(cnodeid_t cnode)
+void hub_rtc_init(nasid_t nasid)
 {
 
 	/*
@@ -178,7 +180,7 @@ void hub_rtc_init(cnodeid_t cnode)
 	 * If this is not the current node then it is a cpuless
 	 * node and timeouts will not happen there.
 	 */
-	if (get_compact_nodeid() == cnode) {
+	if (get_nasid() == nasid) {
 		LOCAL_HUB_S(PI_RT_EN_A, 1);
 		LOCAL_HUB_S(PI_RT_EN_B, 1);
 		LOCAL_HUB_S(PI_PROF_EN_A, 0);
diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c
index 4a1f0b0c29e2..5218b900f855 100644
--- a/arch/mips/sgi-ip27/ip27-xtalk.c
+++ b/arch/mips/sgi-ip27/ip27-xtalk.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/sgi-w1.h>
 #include <linux/platform_data/xtalk-bridge.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/types.h>
@@ -26,9 +27,35 @@
 static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
 {
 	struct xtalk_bridge_platform_data *bd;
+	struct sgi_w1_platform_data *wd;
 	struct platform_device *pdev;
+	struct resource w1_res;
 	unsigned long offset;
 
+	offset = NODE_OFFSET(nasid);
+
+	wd = kzalloc(sizeof(*wd), GFP_KERNEL);
+	if (!wd)
+		goto no_mem;
+
+	snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx",
+		 offset + (widget << SWIN_SIZE_BITS));
+
+	memset(&w1_res, 0, sizeof(w1_res));
+	w1_res.start = offset + (widget << SWIN_SIZE_BITS) +
+				offsetof(struct bridge_regs, b_nic);
+	w1_res.end = w1_res.start + 3;
+	w1_res.flags = IORESOURCE_MEM;
+
+	pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO);
+	if (!pdev) {
+		kfree(wd);
+		goto no_mem;
+	}
+	platform_device_add_resources(pdev, &w1_res, 1);
+	platform_device_add_data(pdev, wd, sizeof(*wd));
+	platform_device_add(pdev);
+
 	bd = kzalloc(sizeof(*bd), GFP_KERNEL);
 	if (!bd)
 		goto no_mem;
@@ -38,7 +65,6 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
 		goto no_mem;
 	}
 
-	offset = NODE_OFFSET(nasid);
 
 	bd->bridge_addr = RAW_NODE_SWIN_BASE(nasid, widget);
 	bd->intr_addr	= BIT_ULL(47) + 0x01800000 + PI_INT_PEND_MOD;
@@ -46,14 +72,14 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
 	bd->masterwid	= masterwid;
 
 	bd->mem.name	= "Bridge PCI MEM";
-	bd->mem.start	= offset + (widget << SWIN_SIZE_BITS);
-	bd->mem.end	= bd->mem.start + SWIN_SIZE - 1;
+	bd->mem.start	= offset + (widget << SWIN_SIZE_BITS) + BRIDGE_DEVIO0;
+	bd->mem.end	= offset + (widget << SWIN_SIZE_BITS) + SWIN_SIZE - 1;
 	bd->mem.flags	= IORESOURCE_MEM;
 	bd->mem_offset	= offset;
 
 	bd->io.name	= "Bridge PCI IO";
-	bd->io.start	= offset + (widget << SWIN_SIZE_BITS);
-	bd->io.end	= bd->io.start + SWIN_SIZE - 1;
+	bd->io.start	= offset + (widget << SWIN_SIZE_BITS) + BRIDGE_DEVIO0;
+	bd->io.end	= offset + (widget << SWIN_SIZE_BITS) + SWIN_SIZE - 1;
 	bd->io.flags	= IORESOURCE_IO;
 	bd->io_offset	= offset;
 
@@ -81,6 +107,8 @@ static int probe_one_port(nasid_t nasid, int widget, int masterwid)
 		bridge_platform_create(nasid, widget, masterwid);
 		break;
 	default:
+		pr_info("xtalk:n%d/%d unknown widget (0x%x)\n",
+			nasid, widget, partnum);
 		break;
 	}
 
@@ -138,14 +166,12 @@ static int xbow_probe(nasid_t nasid)
 	return 0;
 }
 
-static void xtalk_probe_node(cnodeid_t nid)
+static void xtalk_probe_node(nasid_t nasid)
 {
 	volatile u64		hubreg;
-	nasid_t			nasid;
 	xwidget_part_num_t	partnum;
 	widgetreg_t		widget_id;
 
-	nasid = COMPACT_TO_NASID_NODEID(nid);
 	hubreg = REMOTE_HUB_L(nasid, IIO_LLP_CSR);
 
 	/* check whether the link is up */
@@ -173,10 +199,10 @@ static void xtalk_probe_node(cnodeid_t nid)
 
 static int __init xtalk_init(void)
 {
-	cnodeid_t cnode;
+	nasid_t nasid;
 
-	for_each_online_node(cnode)
-		xtalk_probe_node(cnode);
+	for_each_online_node(nasid)
+		xtalk_probe_node(nasid);
 
 	return 0;
 }
diff --git a/arch/mips/sgi-ip30/Makefile b/arch/mips/sgi-ip30/Makefile
new file mode 100644
index 000000000000..18cf561b3d61
--- /dev/null
+++ b/arch/mips/sgi-ip30/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the IP30 specific kernel interface routines under Linux.
+#
+
+obj-y	:= ip30-irq.o ip30-power.o ip30-setup.o ip30-timer.o ip30-xtalk.o
+
+obj-$(CONFIG_EARLY_PRINTK)	+= ip30-console.o
+obj-$(CONFIG_SMP)		+= ip30-smp.o
diff --git a/arch/mips/sgi-ip30/Platform b/arch/mips/sgi-ip30/Platform
new file mode 100644
index 000000000000..2b5695c2049a
--- /dev/null
+++ b/arch/mips/sgi-ip30/Platform
@@ -0,0 +1,8 @@
+#
+# SGI-IP30 (Octane/Octane2)
+#
+ifdef CONFIG_SGI_IP30
+platform-$(CONFIG_SGI_IP30)		+= sgi-ip30/
+cflags-$(CONFIG_SGI_IP30)		+= -I$(srctree)/arch/mips/include/asm/mach-ip30
+load-$(CONFIG_SGI_IP30)			+= 0xa800000020004000
+endif
diff --git a/arch/mips/sgi-ip30/ip30-common.h b/arch/mips/sgi-ip30/ip30-common.h
new file mode 100644
index 000000000000..d2bcaee712f3
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-common.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __IP30_COMMON_H
+#define __IP30_COMMON_H
+
+extern struct plat_smp_ops ip30_smp_ops;
+extern void __init ip30_per_cpu_init(void);
+
+#endif /* __IP30_COMMON_H */
diff --git a/arch/mips/sgi-ip30/ip30-console.c b/arch/mips/sgi-ip30/ip30-console.c
new file mode 100644
index 000000000000..b91f8c4fdc78
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-console.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/io.h>
+
+#include <asm/sn/ioc3.h>
+
+static inline struct ioc3_uartregs *console_uart(void)
+{
+	struct ioc3 *ioc3;
+
+	ioc3 = (struct ioc3 *)((void *)(0x900000001f600000));
+	return &ioc3->sregs.uarta;
+}
+
+void prom_putchar(char c)
+{
+	struct ioc3_uartregs *uart = console_uart();
+
+	while ((readb(&uart->iu_lsr) & 0x20) == 0)
+		cpu_relax();
+
+	writeb(c, &uart->iu_thr);
+}
diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c
new file mode 100644
index 000000000000..d46655b914f1
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-irq.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-irq.c: Highlevel interrupt handling for IP30 architecture.
+ */
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+#include <linux/tick.h>
+#include <linux/types.h>
+
+#include <asm/irq_cpu.h>
+#include <asm/sgi/heart.h>
+
+struct heart_irq_data {
+	u64	*irq_mask;
+	int	cpu;
+};
+
+static DECLARE_BITMAP(heart_irq_map, HEART_NUM_IRQS);
+
+static DEFINE_PER_CPU(unsigned long, irq_enable_mask);
+
+static inline int heart_alloc_int(void)
+{
+	int bit;
+
+again:
+	bit = find_first_zero_bit(heart_irq_map, HEART_NUM_IRQS);
+	if (bit >= HEART_NUM_IRQS)
+		return -ENOSPC;
+
+	if (test_and_set_bit(bit, heart_irq_map))
+		goto again;
+
+	return bit;
+}
+
+static void ip30_error_irq(struct irq_desc *desc)
+{
+	u64 pending, mask, cause, error_irqs, err_reg;
+	int cpu = smp_processor_id();
+	int i;
+
+	pending = heart_read(&heart_regs->isr);
+	mask = heart_read(&heart_regs->imr[cpu]);
+	cause = heart_read(&heart_regs->cause);
+	error_irqs = (pending & HEART_L4_INT_MASK & mask);
+
+	/* Bail if there's nothing to process (how did we get here, then?) */
+	if (unlikely(!error_irqs))
+		return;
+
+	/* Prevent any of the error IRQs from firing again. */
+	heart_write(mask & ~(pending), &heart_regs->imr[cpu]);
+
+	/* Ack all error IRQs. */
+	heart_write(HEART_L4_INT_MASK, &heart_regs->clear_isr);
+
+	/*
+	 * If we also have a cause value, then something happened, so loop
+	 * through the error IRQs and report a "heart attack" for each one
+	 * and print the value of the HEART cause register.  This is really
+	 * primitive right now, but it should hopefully work until a more
+	 * robust error handling routine can be put together.
+	 *
+	 * Refer to heart.h for the HC_* macros to work out the cause
+	 * that got us here.
+	 */
+	if (cause) {
+		pr_alert("IP30: CPU%d: HEART ATTACK! ISR = 0x%.16llx, IMR = 0x%.16llx, CAUSE = 0x%.16llx\n",
+			 cpu, pending, mask, cause);
+
+		if (cause & HC_COR_MEM_ERR) {
+			err_reg = heart_read(&heart_regs->mem_err_addr);
+			pr_alert("  HEART_MEMERR_ADDR = 0x%.16llx\n", err_reg);
+		}
+
+		/* i = 63; i >= 51; i-- */
+		for (i = HEART_ERR_MASK_END; i >= HEART_ERR_MASK_START; i--)
+			if ((pending >> i) & 1)
+				pr_alert("  HEART Error IRQ #%d\n", i);
+
+		/* XXX: Seems possible to loop forever here, so panic(). */
+		panic("IP30: Fatal Error !\n");
+	}
+
+	/* Unmask the error IRQs. */
+	heart_write(mask, &heart_regs->imr[cpu]);
+}
+
+static void ip30_normal_irq(struct irq_desc *desc)
+{
+	int cpu = smp_processor_id();
+	struct irq_domain *domain;
+	u64 pend, mask;
+	int irq;
+
+	pend = heart_read(&heart_regs->isr);
+	mask = (heart_read(&heart_regs->imr[cpu]) &
+		(HEART_L0_INT_MASK | HEART_L1_INT_MASK | HEART_L2_INT_MASK));
+
+	pend &= mask;
+	if (unlikely(!pend))
+		return;
+
+#ifdef CONFIG_SMP
+	if (pend & BIT_ULL(HEART_L2_INT_RESCHED_CPU_0)) {
+		heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_0),
+			    &heart_regs->clear_isr);
+		scheduler_ipi();
+	} else if (pend & BIT_ULL(HEART_L2_INT_RESCHED_CPU_1)) {
+		heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_1),
+			    &heart_regs->clear_isr);
+		scheduler_ipi();
+	} else if (pend & BIT_ULL(HEART_L2_INT_CALL_CPU_0)) {
+		heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_0),
+			    &heart_regs->clear_isr);
+		generic_smp_call_function_interrupt();
+	} else if (pend & BIT_ULL(HEART_L2_INT_CALL_CPU_1)) {
+		heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_1),
+			    &heart_regs->clear_isr);
+		generic_smp_call_function_interrupt();
+	} else
+#endif
+	{
+		domain = irq_desc_get_handler_data(desc);
+		irq = irq_linear_revmap(domain, __ffs(pend));
+		if (irq)
+			generic_handle_irq(irq);
+		else
+			spurious_interrupt();
+	}
+}
+
+static void ip30_ack_heart_irq(struct irq_data *d)
+{
+	heart_write(BIT_ULL(d->hwirq), &heart_regs->clear_isr);
+}
+
+static void ip30_mask_heart_irq(struct irq_data *d)
+{
+	struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+	unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu);
+
+	clear_bit(d->hwirq, mask);
+	heart_write(*mask, &heart_regs->imr[hd->cpu]);
+}
+
+static void ip30_mask_and_ack_heart_irq(struct irq_data *d)
+{
+	struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+	unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu);
+
+	clear_bit(d->hwirq, mask);
+	heart_write(*mask, &heart_regs->imr[hd->cpu]);
+	heart_write(BIT_ULL(d->hwirq), &heart_regs->clear_isr);
+}
+
+static void ip30_unmask_heart_irq(struct irq_data *d)
+{
+	struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+	unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu);
+
+	set_bit(d->hwirq, mask);
+	heart_write(*mask, &heart_regs->imr[hd->cpu]);
+}
+
+static int ip30_set_heart_irq_affinity(struct irq_data *d,
+				       const struct cpumask *mask, bool force)
+{
+	struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+
+	if (!hd)
+		return -EINVAL;
+
+	if (irqd_is_started(d))
+		ip30_mask_and_ack_heart_irq(d);
+
+	hd->cpu = cpumask_first_and(mask, cpu_online_mask);
+
+	if (irqd_is_started(d))
+		ip30_unmask_heart_irq(d);
+
+	irq_data_update_effective_affinity(d, cpumask_of(hd->cpu));
+
+	return 0;
+}
+
+static struct irq_chip heart_irq_chip = {
+	.name			= "HEART",
+	.irq_ack		= ip30_ack_heart_irq,
+	.irq_mask		= ip30_mask_heart_irq,
+	.irq_mask_ack		= ip30_mask_and_ack_heart_irq,
+	.irq_unmask		= ip30_unmask_heart_irq,
+	.irq_set_affinity	= ip30_set_heart_irq_affinity,
+};
+
+static int heart_domain_alloc(struct irq_domain *domain, unsigned int virq,
+			      unsigned int nr_irqs, void *arg)
+{
+	struct irq_alloc_info *info = arg;
+	struct heart_irq_data *hd;
+	int hwirq;
+
+	if (nr_irqs > 1 || !info)
+		return -EINVAL;
+
+	hd = kzalloc(sizeof(*hd), GFP_KERNEL);
+	if (!hd)
+		return -ENOMEM;
+
+	hwirq = heart_alloc_int();
+	if (hwirq < 0) {
+		kfree(hd);
+		return -EAGAIN;
+	}
+	irq_domain_set_info(domain, virq, hwirq, &heart_irq_chip, hd,
+			    handle_level_irq, NULL, NULL);
+
+	return 0;
+}
+
+static void heart_domain_free(struct irq_domain *domain,
+			      unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *irqd;
+
+	if (nr_irqs > 1)
+		return;
+
+	irqd = irq_domain_get_irq_data(domain, virq);
+	clear_bit(irqd->hwirq, heart_irq_map);
+	if (irqd && irqd->chip_data)
+		kfree(irqd->chip_data);
+}
+
+static const struct irq_domain_ops heart_domain_ops = {
+	.alloc = heart_domain_alloc,
+	.free  = heart_domain_free,
+};
+
+void __init ip30_install_ipi(void)
+{
+	int cpu = smp_processor_id();
+	unsigned long *mask = &per_cpu(irq_enable_mask, cpu);
+
+	set_bit(HEART_L2_INT_RESCHED_CPU_0 + cpu, mask);
+	heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_0 + cpu),
+		    &heart_regs->clear_isr);
+	set_bit(HEART_L2_INT_CALL_CPU_0 + cpu, mask);
+	heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_0 + cpu),
+		    &heart_regs->clear_isr);
+
+	heart_write(*mask, &heart_regs->imr[cpu]);
+}
+
+void __init arch_init_irq(void)
+{
+	struct irq_domain *domain;
+	struct fwnode_handle *fn;
+	unsigned long *mask;
+	int i;
+
+	mips_cpu_irq_init();
+
+	/* Mask all IRQs. */
+	heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[0]);
+	heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[1]);
+	heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[2]);
+	heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[3]);
+
+	/* Ack everything. */
+	heart_write(HEART_ACK_ALL_MASK, &heart_regs->clear_isr);
+
+	/* Enable specific HEART error IRQs for each CPU. */
+	mask = &per_cpu(irq_enable_mask, 0);
+	*mask |= HEART_CPU0_ERR_MASK;
+	heart_write(*mask, &heart_regs->imr[0]);
+	mask = &per_cpu(irq_enable_mask, 1);
+	*mask |= HEART_CPU1_ERR_MASK;
+	heart_write(*mask, &heart_regs->imr[1]);
+
+	/*
+	 * Some HEART bits are reserved by hardware or by software convention.
+	 * Mark these as reserved right away so they won't be accidentally
+	 * used later.
+	 */
+	set_bit(HEART_L0_INT_GENERIC, heart_irq_map);
+	set_bit(HEART_L0_INT_FLOW_CTRL_HWTR_0, heart_irq_map);
+	set_bit(HEART_L0_INT_FLOW_CTRL_HWTR_1, heart_irq_map);
+	set_bit(HEART_L2_INT_RESCHED_CPU_0, heart_irq_map);
+	set_bit(HEART_L2_INT_RESCHED_CPU_1, heart_irq_map);
+	set_bit(HEART_L2_INT_CALL_CPU_0, heart_irq_map);
+	set_bit(HEART_L2_INT_CALL_CPU_1, heart_irq_map);
+	set_bit(HEART_L3_INT_TIMER, heart_irq_map);
+
+	/* Reserve the error interrupts (#51 to #63). */
+	for (i = HEART_L4_INT_XWID_ERR_9; i <= HEART_L4_INT_HEART_EXCP; i++)
+		set_bit(i, heart_irq_map);
+
+	fn = irq_domain_alloc_named_fwnode("HEART");
+	WARN_ON(fn == NULL);
+	if (!fn)
+		return;
+	domain = irq_domain_create_linear(fn, HEART_NUM_IRQS,
+					  &heart_domain_ops, NULL);
+	WARN_ON(domain == NULL);
+	if (!domain)
+		return;
+
+	irq_set_default_host(domain);
+
+	irq_set_percpu_devid(IP30_HEART_L0_IRQ);
+	irq_set_chained_handler_and_data(IP30_HEART_L0_IRQ, ip30_normal_irq,
+					 domain);
+	irq_set_percpu_devid(IP30_HEART_L1_IRQ);
+	irq_set_chained_handler_and_data(IP30_HEART_L1_IRQ, ip30_normal_irq,
+					 domain);
+	irq_set_percpu_devid(IP30_HEART_L2_IRQ);
+	irq_set_chained_handler_and_data(IP30_HEART_L2_IRQ, ip30_normal_irq,
+					 domain);
+	irq_set_percpu_devid(IP30_HEART_ERR_IRQ);
+	irq_set_chained_handler_and_data(IP30_HEART_ERR_IRQ, ip30_error_irq,
+					 domain);
+}
diff --git a/arch/mips/sgi-ip30/ip30-power.c b/arch/mips/sgi-ip30/ip30-power.c
new file mode 100644
index 000000000000..120b3f3d5108
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-power.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-power.c: Software powerdown and reset handling for IP30 architecture.
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *               2014 Joshua Kinard <kumba@gentoo.org>
+ *               2009 Johannes Dickgreber <tanzy@gmx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/delay.h>
+#include <linux/rtc/ds1685.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+
+#include <asm/reboot.h>
+#include <asm/sgi/heart.h>
+
+static void __noreturn ip30_machine_restart(char *cmd)
+{
+	/*
+	 * Execute HEART cold reset
+	 *   Yes, it's cold-HEARTed!
+	 */
+	heart_write((heart_read(&heart_regs->mode) | HM_COLD_RST),
+		    &heart_regs->mode);
+	unreachable();
+}
+
+static int __init ip30_reboot_setup(void)
+{
+	_machine_restart = ip30_machine_restart;
+
+	return 0;
+}
+
+subsys_initcall(ip30_reboot_setup);
diff --git a/arch/mips/sgi-ip30/ip30-setup.c b/arch/mips/sgi-ip30/ip30-setup.c
new file mode 100644
index 000000000000..44b1607e964d
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-setup.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SGI IP30 miscellaneous setup bits.
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *               2007 Joshua Kinard <kumba@gentoo.org>
+ *               2009 Johannes Dickgreber <tanzy@gmx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/percpu.h>
+#include <linux/memblock.h>
+
+#include <asm/smp-ops.h>
+#include <asm/sgialib.h>
+#include <asm/time.h>
+#include <asm/sgi/heart.h>
+
+#include "ip30-common.h"
+
+/* Structure of accessible HEART registers located in XKPHYS space. */
+struct ip30_heart_regs __iomem *heart_regs = HEART_XKPHYS_BASE;
+
+/*
+ * ARCS will report up to the first 1GB of
+ * memory if queried.  Anything beyond that
+ * is marked as reserved.
+ */
+#define IP30_MAX_PROM_MEMORY	_AC(0x40000000, UL)
+
+/*
+ * Memory in the Octane starts at 512MB
+ */
+#define IP30_MEMORY_BASE	_AC(0x20000000, UL)
+
+/*
+ * If using ARCS to probe for memory, then
+ * remaining memory will start at this offset.
+ */
+#define IP30_REAL_MEMORY_START  (IP30_MEMORY_BASE + IP30_MAX_PROM_MEMORY)
+
+#define MEM_SHIFT(x) ((x) >> 20)
+
+static void __init ip30_mem_init(void)
+{
+	unsigned long total_mem;
+	phys_addr_t addr;
+	phys_addr_t size;
+	u32 memcfg;
+	int i;
+
+	total_mem = 0;
+	for (i = 0; i < HEART_MEMORY_BANKS; i++) {
+		memcfg = __raw_readl(&heart_regs->mem_cfg.l[i]);
+		if (!(memcfg & HEART_MEMCFG_VALID))
+			continue;
+
+		addr = memcfg & HEART_MEMCFG_ADDR_MASK;
+		addr <<= HEART_MEMCFG_UNIT_SHIFT;
+		addr += IP30_MEMORY_BASE;
+		size = memcfg & HEART_MEMCFG_SIZE_MASK;
+		size >>= HEART_MEMCFG_SIZE_SHIFT;
+		size += 1;
+		size <<= HEART_MEMCFG_UNIT_SHIFT;
+
+		total_mem += size;
+
+		if (addr >= IP30_REAL_MEMORY_START)
+			memblock_free(addr, size);
+		else if ((addr + size) > IP30_REAL_MEMORY_START)
+			memblock_free(IP30_REAL_MEMORY_START,
+				     size - IP30_MAX_PROM_MEMORY);
+	}
+	pr_info("Detected %luMB of physical memory.\n", MEM_SHIFT(total_mem));
+}
+
+/**
+ * ip30_cpu_time_init - platform time initialization.
+ */
+static void __init ip30_cpu_time_init(void)
+{
+	int cpu = smp_processor_id();
+	u64 heart_compare;
+	unsigned int start, end;
+	int time_diff;
+
+	heart_compare = (heart_read(&heart_regs->count) +
+			 (HEART_CYCLES_PER_SEC / 10));
+	start = read_c0_count();
+	while ((heart_read(&heart_regs->count) - heart_compare) & 0x800000)
+		cpu_relax();
+
+	end = read_c0_count();
+	time_diff = (int)end - (int)start;
+	mips_hpt_frequency = time_diff * 10;
+	pr_info("IP30: CPU%d: %d MHz CPU detected.\n", cpu,
+		(mips_hpt_frequency * 2) / 1000000);
+}
+
+void __init ip30_per_cpu_init(void)
+{
+	/* Disable all interrupts. */
+	clear_c0_status(ST0_IM);
+
+	ip30_cpu_time_init();
+#ifdef CONFIG_SMP
+	ip30_install_ipi();
+#endif
+
+	enable_percpu_irq(IP30_HEART_L0_IRQ, IRQ_TYPE_NONE);
+	enable_percpu_irq(IP30_HEART_L1_IRQ, IRQ_TYPE_NONE);
+	enable_percpu_irq(IP30_HEART_L2_IRQ, IRQ_TYPE_NONE);
+	enable_percpu_irq(IP30_HEART_ERR_IRQ, IRQ_TYPE_NONE);
+}
+
+/**
+ * plat_mem_setup - despite the name, misc setup happens here.
+ */
+void __init plat_mem_setup(void)
+{
+	ip30_mem_init();
+
+	/* XXX: Hard lock on /sbin/init if this flag isn't specified. */
+	prom_flags |= PROM_FLAG_DONT_FREE_TEMP;
+
+#ifdef CONFIG_SMP
+	register_smp_ops(&ip30_smp_ops);
+#else
+	ip30_per_cpu_init();
+#endif
+
+	ioport_resource.start = 0;
+	ioport_resource.end = ~0UL;
+	set_io_port_base(IO_BASE);
+}
diff --git a/arch/mips/sgi-ip30/ip30-smp.c b/arch/mips/sgi-ip30/ip30-smp.c
new file mode 100644
index 000000000000..4bfe654602b1
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-smp.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-smp.c: SMP on IP30 architecture.
+ * Based off of the original IP30 SMP code, with inspiration from ip27-smp.c
+ * and smp-bmips.c.
+ *
+ * Copyright (C) 2005-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *               2006-2007, 2014-2015 Joshua Kinard <kumba@gentoo.org>
+ *               2009 Johannes Dickgreber <tanzy@gmx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/time.h>
+#include <asm/sgi/heart.h>
+
+#include "ip30-common.h"
+
+#define MPCONF_MAGIC	0xbaddeed2
+#define	MPCONF_ADDR	0xa800000000000600L
+#define MPCONF_SIZE	0x80
+#define MPCONF(x)	(MPCONF_ADDR + (x) * MPCONF_SIZE)
+
+/* HEART can theoretically do 4 CPUs, but only 2 are physically possible */
+#define MP_NCPU		2
+
+struct mpconf {
+	u32 magic;
+	u32 prid;
+	u32 physid;
+	u32 virtid;
+	u32 scachesz;
+	u16 fanloads;
+	u16 res;
+	void *launch;
+	void *rendezvous;
+	u64 res2[3];
+	void *stackaddr;
+	void *lnch_parm;
+	void *rndv_parm;
+	u32 idleflag;
+};
+
+static void ip30_smp_send_ipi_single(int cpu, u32 action)
+{
+	int irq;
+
+	switch (action) {
+	case SMP_RESCHEDULE_YOURSELF:
+		irq = HEART_L2_INT_RESCHED_CPU_0;
+		break;
+	case SMP_CALL_FUNCTION:
+		irq = HEART_L2_INT_CALL_CPU_0;
+		break;
+	default:
+		panic("IP30: Unknown action value in %s!\n", __func__);
+	}
+
+	irq += cpu;
+
+	/* Poke the other CPU -- it's got mail! */
+	heart_write(BIT_ULL(irq), &heart_regs->set_isr);
+}
+
+static void ip30_smp_send_ipi_mask(const struct cpumask *mask, u32 action)
+{
+	u32 i;
+
+	for_each_cpu(i, mask)
+		ip30_smp_send_ipi_single(i, action);
+}
+
+static void __init ip30_smp_setup(void)
+{
+	int i;
+	int ncpu = 0;
+	struct mpconf *mpc;
+
+	init_cpu_possible(cpumask_of(0));
+
+	/* Scan the MPCONF structure and enumerate available CPUs. */
+	for (i = 0; i < MP_NCPU; i++) {
+		mpc = (struct mpconf *)MPCONF(i);
+		if (mpc->magic == MPCONF_MAGIC) {
+			set_cpu_possible(i, true);
+			__cpu_number_map[i] = ++ncpu;
+			__cpu_logical_map[ncpu] = i;
+			pr_info("IP30: Slot: %d, PrID: %.8x, PhyID: %d, VirtID: %d\n",
+				i, mpc->prid, mpc->physid, mpc->virtid);
+		}
+	}
+	pr_info("IP30: Detected %d CPU(s) present.\n", ncpu);
+
+	/*
+	 * Set the coherency algorithm to '5' (cacheable coherent
+	 * exclusive on write).  This is needed on IP30 SMP, especially
+	 * for R14000 CPUs, otherwise, instruction bus errors will
+	 * occur upon reaching userland.
+	 */
+	change_c0_config(CONF_CM_CMASK, CONF_CM_CACHABLE_COW);
+}
+
+static void __init ip30_smp_prepare_cpus(unsigned int max_cpus)
+{
+	/* nothing to do here */
+}
+
+static int __init ip30_smp_boot_secondary(int cpu, struct task_struct *idle)
+{
+	struct mpconf *mpc = (struct mpconf *)MPCONF(cpu);
+
+	/* Stack pointer (sp). */
+	mpc->stackaddr = (void *)__KSTK_TOS(idle);
+
+	/* Global pointer (gp). */
+	mpc->lnch_parm = task_thread_info(idle);
+
+	mb(); /* make sure stack and lparm are written */
+
+	/* Boot CPUx. */
+	mpc->launch = smp_bootstrap;
+
+	/* CPUx now executes smp_bootstrap, then ip30_smp_finish */
+	return 0;
+}
+
+static void __init ip30_smp_init_cpu(void)
+{
+	ip30_per_cpu_init();
+}
+
+static void __init ip30_smp_finish(void)
+{
+	enable_percpu_irq(get_c0_compare_int(), IRQ_TYPE_NONE);
+	local_irq_enable();
+}
+
+struct plat_smp_ops __read_mostly ip30_smp_ops = {
+	.send_ipi_single	= ip30_smp_send_ipi_single,
+	.send_ipi_mask		= ip30_smp_send_ipi_mask,
+	.smp_setup		= ip30_smp_setup,
+	.prepare_cpus		= ip30_smp_prepare_cpus,
+	.boot_secondary		= ip30_smp_boot_secondary,
+	.init_secondary		= ip30_smp_init_cpu,
+	.smp_finish		= ip30_smp_finish,
+	.prepare_boot_cpu	= ip30_smp_init_cpu,
+};
diff --git a/arch/mips/sgi-ip30/ip30-timer.c b/arch/mips/sgi-ip30/ip30-timer.c
new file mode 100644
index 000000000000..d13e105478ae
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-timer.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-timer.c: Clocksource/clockevent support for the
+ *               HEART chip in SGI Octane (IP30) systems.
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ * Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de>
+ * Copyright (C) 2011 Joshua Kinard <kumba@gentoo.org>
+ */
+
+#include <linux/clocksource.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/sched_clock.h>
+
+#include <asm/time.h>
+#include <asm/cevt-r4k.h>
+#include <asm/sgi/heart.h>
+
+static u64 ip30_heart_counter_read(struct clocksource *cs)
+{
+	return heart_read(&heart_regs->count);
+}
+
+struct clocksource ip30_heart_clocksource = {
+	.name	= "HEART",
+	.rating	= 400,
+	.read	= ip30_heart_counter_read,
+	.mask	= CLOCKSOURCE_MASK(52),
+	.flags	= (CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_VALID_FOR_HRES),
+};
+
+static u64 notrace ip30_heart_read_sched_clock(void)
+{
+	return heart_read(&heart_regs->count);
+}
+
+static void __init ip30_heart_clocksource_init(void)
+{
+	struct clocksource *cs = &ip30_heart_clocksource;
+
+	clocksource_register_hz(cs, HEART_CYCLES_PER_SEC);
+
+	sched_clock_register(ip30_heart_read_sched_clock, 52,
+			     HEART_CYCLES_PER_SEC);
+}
+
+void __init plat_time_init(void)
+{
+	int irq = get_c0_compare_int();
+
+	cp0_timer_irq_installed = 1;
+	c0_compare_irqaction.percpu_dev_id = &mips_clockevent_device;
+	c0_compare_irqaction.flags &= ~IRQF_SHARED;
+	irq_set_handler(irq, handle_percpu_devid_irq);
+	irq_set_percpu_devid(irq);
+	setup_percpu_irq(irq, &c0_compare_irqaction);
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+
+	ip30_heart_clocksource_init();
+}
diff --git a/arch/mips/sgi-ip30/ip30-xtalk.c b/arch/mips/sgi-ip30/ip30-xtalk.c
new file mode 100644
index 000000000000..8a2894645529
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-xtalk.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-xtalk.c - Very basic Crosstalk (XIO) detection support.
+ *   Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *   Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de>
+ *   Copyright (C) 2007, 2014-2016 Joshua Kinard <kumba@gentoo.org>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/sgi-w1.h>
+#include <linux/platform_data/xtalk-bridge.h>
+
+#include <asm/xtalk/xwidget.h>
+#include <asm/pci/bridge.h>
+
+#define IP30_SWIN_BASE(widget) \
+		(0x0000000010000000 | (((unsigned long)(widget)) << 24))
+
+#define IP30_RAW_SWIN_BASE(widget)	(IO_BASE + IP30_SWIN_BASE(widget))
+
+#define IP30_SWIN_SIZE		(1 << 24)
+
+#define IP30_WIDGET_XBOW        _AC(0x0, UL)    /* XBow is always 0 */
+#define IP30_WIDGET_HEART       _AC(0x8, UL)    /* HEART is always 8 */
+#define IP30_WIDGET_PCI_BASE    _AC(0xf, UL)    /* BaseIO PCI is always 15 */
+
+#define XTALK_NODEV             0xffffffff
+
+#define XBOW_REG_LINK_STAT_0    0x114
+#define XBOW_REG_LINK_BLK_SIZE  0x40
+#define XBOW_REG_LINK_ALIVE     0x80000000
+
+#define HEART_INTR_ADDR		0x00000080
+
+#define xtalk_read	__raw_readl
+
+static void bridge_platform_create(int widget, int masterwid)
+{
+	struct xtalk_bridge_platform_data *bd;
+	struct sgi_w1_platform_data *wd;
+	struct platform_device *pdev;
+	struct resource w1_res;
+
+	wd = kzalloc(sizeof(*wd), GFP_KERNEL);
+	if (!wd)
+		goto no_mem;
+
+	snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx",
+		 IP30_SWIN_BASE(widget));
+
+	memset(&w1_res, 0, sizeof(w1_res));
+	w1_res.start = IP30_SWIN_BASE(widget) +
+				offsetof(struct bridge_regs, b_nic);
+	w1_res.end = w1_res.start + 3;
+	w1_res.flags = IORESOURCE_MEM;
+
+	pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO);
+	if (!pdev) {
+		kfree(wd);
+		goto no_mem;
+	}
+	platform_device_add_resources(pdev, &w1_res, 1);
+	platform_device_add_data(pdev, wd, sizeof(*wd));
+	platform_device_add(pdev);
+
+	bd = kzalloc(sizeof(*bd), GFP_KERNEL);
+	if (!bd)
+		goto no_mem;
+	pdev = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO);
+	if (!pdev) {
+		kfree(bd);
+		goto no_mem;
+	}
+
+	bd->bridge_addr	= IP30_RAW_SWIN_BASE(widget);
+	bd->intr_addr	= HEART_INTR_ADDR;
+	bd->nasid	= 0;
+	bd->masterwid	= masterwid;
+
+	bd->mem.name	= "Bridge PCI MEM";
+	bd->mem.start	= IP30_SWIN_BASE(widget) + BRIDGE_DEVIO0;
+	bd->mem.end	= IP30_SWIN_BASE(widget) + IP30_SWIN_SIZE - 1;
+	bd->mem.flags	= IORESOURCE_MEM;
+	bd->mem_offset	= IP30_SWIN_BASE(widget);
+
+	bd->io.name	= "Bridge PCI IO";
+	bd->io.start	= IP30_SWIN_BASE(widget) + BRIDGE_DEVIO0;
+	bd->io.end	= IP30_SWIN_BASE(widget) + IP30_SWIN_SIZE - 1;
+	bd->io.flags	= IORESOURCE_IO;
+	bd->io_offset	= IP30_SWIN_BASE(widget);
+
+	platform_device_add_data(pdev, bd, sizeof(*bd));
+	platform_device_add(pdev);
+	pr_info("xtalk:%x bridge widget\n", widget);
+	return;
+
+no_mem:
+	pr_warn("xtalk:%x bridge create out of memory\n", widget);
+}
+
+static unsigned int __init xbow_widget_active(s8 wid)
+{
+	unsigned int link_stat;
+
+	link_stat = xtalk_read((void *)(IP30_RAW_SWIN_BASE(IP30_WIDGET_XBOW) +
+					XBOW_REG_LINK_STAT_0 +
+					XBOW_REG_LINK_BLK_SIZE *
+					(wid - 8)));
+
+	return (link_stat & XBOW_REG_LINK_ALIVE) ? 1 : 0;
+}
+
+static void __init xtalk_init_widget(s8 wid, s8 masterwid)
+{
+	xwidget_part_num_t partnum;
+	widgetreg_t widget_id;
+
+	if (!xbow_widget_active(wid))
+		return;
+
+	widget_id = xtalk_read((void *)(IP30_RAW_SWIN_BASE(wid) + WIDGET_ID));
+
+	partnum = XWIDGET_PART_NUM(widget_id);
+
+	switch (partnum) {
+	case BRIDGE_WIDGET_PART_NUM:
+	case XBRIDGE_WIDGET_PART_NUM:
+		bridge_platform_create(wid, masterwid);
+		break;
+	default:
+		pr_info("xtalk:%x unknown widget (0x%x)\n", wid, partnum);
+		break;
+	}
+}
+
+static int __init ip30_xtalk_init(void)
+{
+	int i;
+
+	/*
+	 * Walk widget IDs backwards so that BaseIO is probed first.  This
+	 * ensures that the BaseIO IOC3 is always detected as eth0.
+	 */
+	for (i = IP30_WIDGET_PCI_BASE; i > IP30_WIDGET_HEART; i--)
+		xtalk_init_widget(i, IP30_WIDGET_HEART);
+
+	return 0;
+}
+
+arch_initcall(ip30_xtalk_init);
diff --git a/arch/mips/sgi-ip32/ip32-platform.c b/arch/mips/sgi-ip32/ip32-platform.c
index 5a2a82148d8d..c3909bd8dd1a 100644
--- a/arch/mips/sgi-ip32/ip32-platform.c
+++ b/arch/mips/sgi-ip32/ip32-platform.c
@@ -115,7 +115,7 @@ ip32_rtc_platform_data[] = {
 		.bcd_mode = true,
 		.no_irq = false,
 		.uie_unsupported = false,
-		.alloc_io_resources = true,
+		.access_type = ds1685_reg_direct,
 		.plat_prepare_poweroff = ip32_prepare_poweroff,
 	},
 };
diff --git a/arch/mips/sni/rm200.c b/arch/mips/sni/rm200.c
index 160b88000b4b..f6fa9afcbfd3 100644
--- a/arch/mips/sni/rm200.c
+++ b/arch/mips/sni/rm200.c
@@ -399,10 +399,10 @@ void __init sni_rm200_i8259_irqs(void)
 {
 	int i;
 
-	rm200_pic_master = ioremap_nocache(0x16000020, 4);
+	rm200_pic_master = ioremap(0x16000020, 4);
 	if (!rm200_pic_master)
 		return;
-	rm200_pic_slave = ioremap_nocache(0x160000a0, 4);
+	rm200_pic_slave = ioremap(0x160000a0, 4);
 	if (!rm200_pic_slave) {
 		iounmap(rm200_pic_master);
 		return;
diff --git a/arch/mips/tools/.gitignore b/arch/mips/tools/.gitignore
index 56d34ccccce4..b0209450d9ff 100644
--- a/arch/mips/tools/.gitignore
+++ b/arch/mips/tools/.gitignore
@@ -1 +1,2 @@
 elf-entry
+loongson3-llsc-check
diff --git a/arch/mips/tools/Makefile b/arch/mips/tools/Makefile
index 3baee4bc6775..aaef688749f5 100644
--- a/arch/mips/tools/Makefile
+++ b/arch/mips/tools/Makefile
@@ -3,3 +3,8 @@ hostprogs-y := elf-entry
 PHONY += elf-entry
 elf-entry: $(obj)/elf-entry
 	@:
+
+hostprogs-$(CONFIG_CPU_LOONGSON3_WORKAROUNDS) += loongson3-llsc-check
+PHONY += loongson3-llsc-check
+loongson3-llsc-check: $(obj)/loongson3-llsc-check
+	@:
diff --git a/arch/mips/tools/loongson3-llsc-check.c b/arch/mips/tools/loongson3-llsc-check.c
new file mode 100644
index 000000000000..0ebddd0ae46f
--- /dev/null
+++ b/arch/mips/tools/loongson3-llsc-check.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <byteswap.h>
+#include <elf.h>
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#ifdef be32toh
+/* If libc provides le{16,32,64}toh() then we'll use them */
+#elif BYTE_ORDER == LITTLE_ENDIAN
+# define le16toh(x)	(x)
+# define le32toh(x)	(x)
+# define le64toh(x)	(x)
+#elif BYTE_ORDER == BIG_ENDIAN
+# define le16toh(x)	bswap_16(x)
+# define le32toh(x)	bswap_32(x)
+# define le64toh(x)	bswap_64(x)
+#endif
+
+/* MIPS opcodes, in bits 31:26 of an instruction */
+#define OP_SPECIAL	0x00
+#define OP_REGIMM	0x01
+#define OP_BEQ		0x04
+#define OP_BNE		0x05
+#define OP_BLEZ		0x06
+#define OP_BGTZ		0x07
+#define OP_BEQL		0x14
+#define OP_BNEL		0x15
+#define OP_BLEZL	0x16
+#define OP_BGTZL	0x17
+#define OP_LL		0x30
+#define OP_LLD		0x34
+#define OP_SC		0x38
+#define OP_SCD		0x3c
+
+/* Bits 20:16 of OP_REGIMM instructions */
+#define REGIMM_BLTZ	0x00
+#define REGIMM_BGEZ	0x01
+#define REGIMM_BLTZL	0x02
+#define REGIMM_BGEZL	0x03
+#define REGIMM_BLTZAL	0x10
+#define REGIMM_BGEZAL	0x11
+#define REGIMM_BLTZALL	0x12
+#define REGIMM_BGEZALL	0x13
+
+/* Bits 5:0 of OP_SPECIAL instructions */
+#define SPECIAL_SYNC	0x0f
+
+static void usage(FILE *f)
+{
+	fprintf(f, "Usage: loongson3-llsc-check /path/to/vmlinux\n");
+}
+
+static int se16(uint16_t x)
+{
+	return (int16_t)x;
+}
+
+static bool is_ll(uint32_t insn)
+{
+	switch (insn >> 26) {
+	case OP_LL:
+	case OP_LLD:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool is_sc(uint32_t insn)
+{
+	switch (insn >> 26) {
+	case OP_SC:
+	case OP_SCD:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool is_sync(uint32_t insn)
+{
+	/* Bits 31:11 should all be zeroes */
+	if (insn >> 11)
+		return false;
+
+	/* Bits 5:0 specify the SYNC special encoding */
+	if ((insn & 0x3f) != SPECIAL_SYNC)
+		return false;
+
+	return true;
+}
+
+static bool is_branch(uint32_t insn, int *off)
+{
+	switch (insn >> 26) {
+	case OP_BEQ:
+	case OP_BEQL:
+	case OP_BNE:
+	case OP_BNEL:
+	case OP_BGTZ:
+	case OP_BGTZL:
+	case OP_BLEZ:
+	case OP_BLEZL:
+		*off = se16(insn) + 1;
+		return true;
+
+	case OP_REGIMM:
+		switch ((insn >> 16) & 0x1f) {
+		case REGIMM_BGEZ:
+		case REGIMM_BGEZL:
+		case REGIMM_BGEZAL:
+		case REGIMM_BGEZALL:
+		case REGIMM_BLTZ:
+		case REGIMM_BLTZL:
+		case REGIMM_BLTZAL:
+		case REGIMM_BLTZALL:
+			*off = se16(insn) + 1;
+			return true;
+
+		default:
+			return false;
+		}
+
+	default:
+		return false;
+	}
+}
+
+static int check_ll(uint64_t pc, uint32_t *code, size_t sz)
+{
+	ssize_t i, max, sc_pos;
+	int off;
+
+	/*
+	 * Every LL must be preceded by a sync instruction in order to ensure
+	 * that instruction reordering doesn't allow a prior memory access to
+	 * execute after the LL & cause erroneous results.
+	 */
+	if (!is_sync(le32toh(code[-1]))) {
+		fprintf(stderr, "%" PRIx64 ": LL not preceded by sync\n", pc);
+		return -EINVAL;
+	}
+
+	/* Find the matching SC instruction */
+	max = sz / 4;
+	for (sc_pos = 0; sc_pos < max; sc_pos++) {
+		if (is_sc(le32toh(code[sc_pos])))
+			break;
+	}
+	if (sc_pos >= max) {
+		fprintf(stderr, "%" PRIx64 ": LL has no matching SC\n", pc);
+		return -EINVAL;
+	}
+
+	/*
+	 * Check branches within the LL/SC loop target sync instructions,
+	 * ensuring that speculative execution can't generate memory accesses
+	 * due to instructions outside of the loop.
+	 */
+	for (i = 0; i < sc_pos; i++) {
+		if (!is_branch(le32toh(code[i]), &off))
+			continue;
+
+		/*
+		 * If the branch target is within the LL/SC loop then we don't
+		 * need to worry about it.
+		 */
+		if ((off >= -i) && (off <= sc_pos))
+			continue;
+
+		/* If the branch targets a sync instruction we're all good... */
+		if (is_sync(le32toh(code[i + off])))
+			continue;
+
+		/* ...but if not, we have a problem */
+		fprintf(stderr, "%" PRIx64 ": Branch target not a sync\n",
+			pc + (i * 4));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int check_code(uint64_t pc, uint32_t *code, size_t sz)
+{
+	int err = 0;
+
+	if (sz % 4) {
+		fprintf(stderr, "%" PRIx64 ": Section size not a multiple of 4\n",
+			pc);
+		err = -EINVAL;
+		sz -= (sz % 4);
+	}
+
+	if (is_ll(le32toh(code[0]))) {
+		fprintf(stderr, "%" PRIx64 ": First instruction in section is an LL\n",
+			pc);
+		err = -EINVAL;
+	}
+
+#define advance() (	\
+	code++,		\
+	pc += 4,	\
+	sz -= 4		\
+)
+
+	/*
+	 * Skip the first instructionm allowing check_ll to look backwards
+	 * unconditionally.
+	 */
+	advance();
+
+	/* Now scan through the code looking for LL instructions */
+	for (; sz; advance()) {
+		if (is_ll(le32toh(code[0])))
+			err |= check_ll(pc, code, sz);
+	}
+
+	return err;
+}
+
+int main(int argc, char *argv[])
+{
+	int vmlinux_fd, status, err, i;
+	const char *vmlinux_path;
+	struct stat st;
+	Elf64_Ehdr *eh;
+	Elf64_Shdr *sh;
+	void *vmlinux;
+
+	status = EXIT_FAILURE;
+
+	if (argc < 2) {
+		usage(stderr);
+		goto out_ret;
+	}
+
+	vmlinux_path = argv[1];
+	vmlinux_fd = open(vmlinux_path, O_RDONLY);
+	if (vmlinux_fd == -1) {
+		perror("Unable to open vmlinux");
+		goto out_ret;
+	}
+
+	err = fstat(vmlinux_fd, &st);
+	if (err) {
+		perror("Unable to stat vmlinux");
+		goto out_close;
+	}
+
+	vmlinux = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, vmlinux_fd, 0);
+	if (vmlinux == MAP_FAILED) {
+		perror("Unable to mmap vmlinux");
+		goto out_close;
+	}
+
+	eh = vmlinux;
+	if (memcmp(eh->e_ident, ELFMAG, SELFMAG)) {
+		fprintf(stderr, "vmlinux is not an ELF?\n");
+		goto out_munmap;
+	}
+
+	if (eh->e_ident[EI_CLASS] != ELFCLASS64) {
+		fprintf(stderr, "vmlinux is not 64b?\n");
+		goto out_munmap;
+	}
+
+	if (eh->e_ident[EI_DATA] != ELFDATA2LSB) {
+		fprintf(stderr, "vmlinux is not little endian?\n");
+		goto out_munmap;
+	}
+
+	for (i = 0; i < le16toh(eh->e_shnum); i++) {
+		sh = vmlinux + le64toh(eh->e_shoff) + (i * le16toh(eh->e_shentsize));
+
+		if (sh->sh_type != SHT_PROGBITS)
+			continue;
+		if (!(sh->sh_flags & SHF_EXECINSTR))
+			continue;
+
+		err = check_code(le64toh(sh->sh_addr),
+				 vmlinux + le64toh(sh->sh_offset),
+				 le64toh(sh->sh_size));
+		if (err)
+			goto out_munmap;
+	}
+
+	status = EXIT_SUCCESS;
+out_munmap:
+	munmap(vmlinux, st.st_size);
+out_close:
+	close(vmlinux_fd);
+out_ret:
+	return status;
+}
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index 996a934ece7d..b2a2e032dc99 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -18,6 +18,10 @@ ccflags-vdso := \
 	$(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \
 	-D__VDSO__
 
+ifndef CONFIG_64BIT
+ccflags-vdso += -DBUILD_VDSO32
+endif
+
 ifdef CONFIG_CC_IS_CLANG
 ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
 endif
@@ -75,6 +79,7 @@ CFLAGS_REMOVE_vdso.o = -pg
 
 GCOV_PROFILE := n
 UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
 
 #
 # Shared build commands.
diff --git a/arch/mips/vdso/vgettimeofday.c b/arch/mips/vdso/vgettimeofday.c
index 6ebdc37c89fc..6b83b6376a4b 100644
--- a/arch/mips/vdso/vgettimeofday.c
+++ b/arch/mips/vdso/vgettimeofday.c
@@ -17,12 +17,22 @@ int __vdso_clock_gettime(clockid_t clock,
 	return __cvdso_clock_gettime32(clock, ts);
 }
 
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
+
+/*
+ * This is behind the ifdef so that we don't provide the symbol when there's no
+ * possibility of there being a usable clocksource, because there's nothing we
+ * can do without it. When libc fails the symbol lookup it should fall back on
+ * the standard syscall path.
+ */
 int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
 			struct timezone *tz)
 {
 	return __cvdso_gettimeofday(tv, tz);
 }
 
+#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */
+
 int __vdso_clock_getres(clockid_t clock_id,
 			struct old_timespec32 *res)
 {
@@ -43,12 +53,22 @@ int __vdso_clock_gettime(clockid_t clock,
 	return __cvdso_clock_gettime(clock, ts);
 }
 
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
+
+/*
+ * This is behind the ifdef so that we don't provide the symbol when there's no
+ * possibility of there being a usable clocksource, because there's nothing we
+ * can do without it. When libc fails the symbol lookup it should fall back on
+ * the standard syscall path.
+ */
 int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
 			struct timezone *tz)
 {
 	return __cvdso_gettimeofday(tv, tz);
 }
 
+#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */
+
 int __vdso_clock_getres(clockid_t clock_id,
 			struct __kernel_timespec *res)
 {
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index fbd68329737f..e30298e99e1b 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -20,6 +20,7 @@ config NDS32
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IRQ_CHIP
 	select GENERIC_IRQ_SHOW
+	select GENERIC_IOREMAP
 	select GENERIC_LIB_ASHLDI3
 	select GENERIC_LIB_ASHRDI3
 	select GENERIC_LIB_CMPDI2
@@ -61,7 +62,7 @@ config GENERIC_HWEIGHT
 
 config GENERIC_LOCKBREAK
 	def_bool y
-	depends on PREEMPT
+	depends on PREEMPTION
 
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu
index f80a4ab63da2..f88a12fdf0f3 100644
--- a/arch/nds32/Kconfig.cpu
+++ b/arch/nds32/Kconfig.cpu
@@ -13,7 +13,7 @@ config FPU
 	default n
 	help
 	  If FPU ISA is used in user space, this configuration shall be Y to
-          enable required support in kerenl such as fpu context switch and
+          enable required support in kernel such as fpu context switch and
           fpu exception handler.
 
 	  If no FPU ISA is used in user space, say N.
@@ -27,7 +27,7 @@ config LAZY_FPU
           enhance system performance by reducing the context switch
 	  frequency of the FPU register.
 
-	  For nomal case, say Y.
+	  For normal case, say Y.
 
 config SUPPORT_DENORMAL_ARITHMETIC
 	bool "Denormal arithmetic support"
@@ -36,7 +36,7 @@ config SUPPORT_DENORMAL_ARITHMETIC
 	help
 	  Say Y here to enable arithmetic of denormalized number. Enabling
 	  this feature can enhance the precision for tininess number.
-	  However, performance loss in float pointe calculations is
+	  However, performance loss in float point calculations is
 	  possibly significant due to additional FPU exception.
 
 	  If the calculated tolerance for tininess number is not critical,
@@ -73,7 +73,7 @@ choice
 	  the cache aliasing issue. The rest cpus(N13, N10 and D10) are
 	  implemented as VIPT data cache. It may cause the cache aliasing issue
 	  if its cache way size is larger than page size. You can specify the
-	  CPU type direcly or choose CPU_V3 if unsure.
+	  CPU type directly or choose CPU_V3 if unsure.
 
           A kernel built for N10 is able to run on N15, D15, N13, N10 or D10.
           A kernel built for N15 is able to run on N15 or D15.
diff --git a/arch/nds32/boot/dts/Makefile b/arch/nds32/boot/dts/Makefile
index fff8ade7a84f..f84bd529b6fd 100644
--- a/arch/nds32/boot/dts/Makefile
+++ b/arch/nds32/boot/dts/Makefile
@@ -5,5 +5,3 @@ else
 BUILTIN_DTB :=
 endif
 obj-$(CONFIG_OF) += $(BUILTIN_DTB)
-
-clean-files := *.dtb *.dtb.S
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index d9ac7e6408ef..caddded56e77 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -9,7 +9,11 @@
 #define PG_dcache_dirty PG_arch_1
 
 void flush_icache_range(unsigned long start, unsigned long end);
+#define flush_icache_range flush_icache_range
+
 void flush_icache_page(struct vm_area_struct *vma, struct page *page);
+#define flush_icache_page flush_icache_page
+
 #ifdef CONFIG_CPU_CACHE_ALIASING
 void flush_cache_mm(struct mm_struct *mm);
 void flush_cache_dup_mm(struct mm_struct *mm);
@@ -40,12 +44,11 @@ void invalidate_kernel_vmap_range(void *addr, int size);
 #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages)
 
 #else
-#include <asm-generic/cacheflush.h>
-#undef flush_icache_range
-#undef flush_icache_page
-#undef flush_icache_user_range
 void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
 	                     unsigned long addr, int len);
+#define flush_icache_user_range flush_icache_user_range
+
+#include <asm-generic/cacheflush.h>
 #endif
 
 #endif /* __NDS32_CACHEFLUSH_H__ */
diff --git a/arch/nds32/include/asm/io.h b/arch/nds32/include/asm/io.h
index 16f262322b8f..e57378d04006 100644
--- a/arch/nds32/include/asm/io.h
+++ b/arch/nds32/include/asm/io.h
@@ -6,7 +6,6 @@
 
 #include <linux/types.h>
 
-extern void iounmap(volatile void __iomem *addr);
 #define __raw_writeb __raw_writeb
 static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
 {
@@ -79,5 +78,7 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
 #define writeb(v,c)	({ __iowmb(); writeb_relaxed((v),(c)); })
 #define writew(v,c)	({ __iowmb(); writew_relaxed((v),(c)); })
 #define writel(v,c)	({ __iowmb(); writel_relaxed((v),(c)); })
+
 #include <asm-generic/io.h>
+
 #endif /* __ASM_NDS32_IO_H */
diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h
index 8feb1fa12f01..86b32014c5f9 100644
--- a/arch/nds32/include/asm/page.h
+++ b/arch/nds32/include/asm/page.h
@@ -41,17 +41,14 @@ void clear_page(void *page);
 void copy_page(void *to, void *from);
 
 typedef unsigned long pte_t;
-typedef unsigned long pmd_t;
 typedef unsigned long pgd_t;
 typedef unsigned long pgprot_t;
 
 #define pte_val(x)      (x)
-#define pmd_val(x)      (x)
 #define pgd_val(x)	(x)
 #define pgprot_val(x)   (x)
 
 #define __pte(x)        (x)
-#define __pmd(x)        (x)
 #define __pgd(x)        (x)
 #define __pgprot(x)     (x)
 
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 37125e6884d7..85c117347c86 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -15,9 +15,6 @@
 /*
  * Since we have only two-level page tables, these are trivial
  */
-#define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, pmd)			do { } while (0)
-#define pgd_populate(mm, pmd, pte)	BUG()
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index 0588ec99725c..6abc58ac406d 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -4,41 +4,33 @@
 #ifndef _ASMNDS32_PGTABLE_H
 #define _ASMNDS32_PGTABLE_H
 
-#define __PAGETABLE_PMD_FOLDED 1
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopmd.h>
 #include <linux/sizes.h>
 
 #include <asm/memory.h>
 #include <asm/nds32.h>
 #ifndef __ASSEMBLY__
 #include <asm/fixmap.h>
-#include <asm/io.h>
 #include <nds32_intrinsic.h>
 #endif
 
 #ifdef CONFIG_ANDES_PAGE_SIZE_4KB
 #define PGDIR_SHIFT      22
 #define PTRS_PER_PGD     1024
-#define PMD_SHIFT        22
-#define PTRS_PER_PMD     1
 #define PTRS_PER_PTE     1024
 #endif
 
 #ifdef CONFIG_ANDES_PAGE_SIZE_8KB
 #define PGDIR_SHIFT      24
 #define PTRS_PER_PGD     256
-#define PMD_SHIFT        24
-#define PTRS_PER_PMD     1
 #define PTRS_PER_PTE     2048
 #endif
 
 #ifndef __ASSEMBLY__
 extern void __pte_error(const char *file, int line, unsigned long val);
-extern void __pmd_error(const char *file, int line, unsigned long val);
 extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte))
-#define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd_val(pmd))
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
 #endif /* !__ASSEMBLY__ */
 
@@ -130,6 +122,9 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 #define _PAGE_CACHE		_PAGE_C_MEM_WB
 #endif
 
+#define _PAGE_IOREMAP \
+	(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_G | _PAGE_C_DEV)
+
 /*
  * + Level 1 descriptor (PMD)
  */
@@ -200,7 +195,7 @@ extern void paging_init(void);
 #define pte_unmap(pte)		do { } while (0)
 #define pte_unmap_nested(pte)	do { } while (0)
 
-#define pmd_off_k(address)	pmd_offset(pgd_offset_k(address), address)
+#define pmd_off_k(address)	pmd_offset(pud_offset(p4d_offset(pgd_offset_k(address), (address)), (address)), (address))
 
 #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
 /*
@@ -366,9 +361,6 @@ static inline pmd_t __mk_pmd(pte_t * ptep, unsigned long prot)
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(addr)      pgd_offset(&init_mm, addr)
 
-/* Find an entry in the second-level page table.. */
-#define pmd_offset(dir, addr)	((pmd_t *)(dir))
-
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	const unsigned long mask = 0xfff;
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
index a8aff1c8b4f4..672603804a3b 100644
--- a/arch/nds32/include/asm/tlb.h
+++ b/arch/nds32/include/asm/tlb.h
@@ -7,6 +7,5 @@
 #include <asm-generic/tlb.h>
 
 #define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
-#define __pmd_free_tlb(tlb, pmd, addr)	pmd_free((tln)->mm, pmd)
 
 #endif
diff --git a/arch/nds32/include/asm/vmalloc.h b/arch/nds32/include/asm/vmalloc.h
new file mode 100644
index 000000000000..caeed3898419
--- /dev/null
+++ b/arch/nds32/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_NDS32_VMALLOC_H
+#define _ASM_NDS32_VMALLOC_H
+
+#endif /* _ASM_NDS32_VMALLOC_H */
diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c
index 4206d4b6c8ce..69d762182d49 100644
--- a/arch/nds32/kernel/dma.c
+++ b/arch/nds32/kernel/dma.c
@@ -46,8 +46,8 @@ static inline void cache_op(phys_addr_t paddr, size_t size,
 	} while (left);
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_FROM_DEVICE:
@@ -61,8 +61,8 @@ void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
 	}
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_TO_DEVICE:
diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S
index 1df02a793364..6a2966c2d8c8 100644
--- a/arch/nds32/kernel/ex-exit.S
+++ b/arch/nds32/kernel/ex-exit.S
@@ -72,7 +72,7 @@
 	restore_user_regs_last
 	.endm
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	.macro	preempt_stop
 	.endm
 #else
@@ -158,7 +158,7 @@ no_work_pending:
 /*
  * preemptive kernel
  */
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 resume_kernel:
 	gie_disable
 	lwi	$t0, [tsk+#TSK_TI_PREEMPT]
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
index fd2a54b8cd57..22ab77ea27ad 100644
--- a/arch/nds32/kernel/ftrace.c
+++ b/arch/nds32/kernel/ftrace.c
@@ -89,18 +89,6 @@ int __init ftrace_dyn_arch_init(void)
 	return 0;
 }
 
-int ftrace_arch_code_modify_prepare(void)
-{
-	set_all_modules_text_rw();
-	return 0;
-}
-
-int ftrace_arch_code_modify_post_process(void)
-{
-	set_all_modules_text_ro();
-	return 0;
-}
-
 static unsigned long gen_sethi_insn(unsigned long addr)
 {
 	unsigned long opcode = 0x46000000;
diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c
index 334c2a6cec23..0ce6f9f307e6 100644
--- a/arch/nds32/kernel/perf_event_cpu.c
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -1119,7 +1119,7 @@ static void cpu_pmu_init(struct nds32_pmu *cpu_pmu)
 		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
 }
 
-const static struct of_device_id cpu_pmu_of_device_ids[] = {
+static const struct of_device_id cpu_pmu_of_device_ids[] = {
 	{.compatible = "andestech,nds32v3-pmu",
 	 .data = device_pmu_init},
 	{},
diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c
index ffa8040d8be7..e25700e125d8 100644
--- a/arch/nds32/kernel/pm.c
+++ b/arch/nds32/kernel/pm.c
@@ -14,6 +14,7 @@ unsigned int *phy_addr_sp_tmp;
 static void nds32_suspend2ram(void)
 {
 	pgd_t *pgdv;
+	p4d_t *p4dv;
 	pud_t *pudv;
 	pmd_t *pmdv;
 	pte_t *ptev;
@@ -21,7 +22,8 @@ static void nds32_suspend2ram(void)
 	pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) &
 		L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume);
 
-	pudv = pud_offset(pgdv, (unsigned int)cpu_resume);
+	p4dv = p4d_offset(pgdv, (unsigned int)cpu_resume);
+	pudv = pud_offset(p4dv, (unsigned int)cpu_resume);
 	pmdv = pmd_offset(pudv, (unsigned int)cpu_resume);
 	ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume);
 
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index 31d29d92478e..a066efbe53c0 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -317,11 +317,6 @@ void __init setup_arch(char **cmdline_p)
 
 	unflatten_and_copy_device_tree();
 
-	if(IS_ENABLED(CONFIG_VT)) {
-		if(IS_ENABLED(CONFIG_DUMMY_CONSOLE))
-			conswitchp = &dummy_con;
-	}
-
 	*cmdline_p = boot_command_line;
 	early_trap_init();
 }
diff --git a/arch/nds32/kernel/vdso/gettimeofday.c b/arch/nds32/kernel/vdso/gettimeofday.c
index b02581891c33..9ec03cf0ec54 100644
--- a/arch/nds32/kernel/vdso/gettimeofday.c
+++ b/arch/nds32/kernel/vdso/gettimeofday.c
@@ -48,9 +48,9 @@ static notrace int vdso_read_retry(const struct vdso_data *vdata, u32 start)
 }
 
 static notrace long clock_gettime_fallback(clockid_t _clkid,
-					   struct timespec *_ts)
+					   struct __kernel_old_timespec *_ts)
 {
-	register struct timespec *ts asm("$r1") = _ts;
+	register struct __kernel_old_timespec *ts asm("$r1") = _ts;
 	register clockid_t clkid asm("$r0") = _clkid;
 	register long ret asm("$r0");
 
@@ -63,7 +63,7 @@ static notrace long clock_gettime_fallback(clockid_t _clkid,
 	return ret;
 }
 
-static notrace int do_realtime_coarse(struct timespec *ts,
+static notrace int do_realtime_coarse(struct __kernel_old_timespec *ts,
 				      struct vdso_data *vdata)
 {
 	u32 seq;
@@ -78,25 +78,23 @@ static notrace int do_realtime_coarse(struct timespec *ts,
 	return 0;
 }
 
-static notrace int do_monotonic_coarse(struct timespec *ts,
+static notrace int do_monotonic_coarse(struct __kernel_old_timespec *ts,
 				       struct vdso_data *vdata)
 {
-	struct timespec tomono;
 	u32 seq;
+	u64 ns;
 
 	do {
 		seq = vdso_read_begin(vdata);
 
-		ts->tv_sec = vdata->xtime_coarse_sec;
-		ts->tv_nsec = vdata->xtime_coarse_nsec;
-
-		tomono.tv_sec = vdata->wtm_clock_sec;
-		tomono.tv_nsec = vdata->wtm_clock_nsec;
+		ts->tv_sec = vdata->xtime_coarse_sec + vdata->wtm_clock_sec;
+		ns = vdata->xtime_coarse_nsec + vdata->wtm_clock_nsec;
 
 	} while (vdso_read_retry(vdata, seq));
 
-	ts->tv_sec += tomono.tv_sec;
-	timespec_add_ns(ts, tomono.tv_nsec);
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
 	return 0;
 }
 
@@ -115,7 +113,7 @@ static notrace inline u64 vgetsns(struct vdso_data *vdso)
 	return ((u64) cycle_delta & vdso->cs_mask) * vdso->cs_mult;
 }
 
-static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata)
+static notrace int do_realtime(struct __kernel_old_timespec *ts, struct vdso_data *vdata)
 {
 	unsigned count;
 	u64 ns;
@@ -133,32 +131,31 @@ static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata)
 	return 0;
 }
 
-static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata)
+static notrace int do_monotonic(struct __kernel_old_timespec *ts, struct vdso_data *vdata)
 {
-	struct timespec tomono;
-	u64 nsecs;
+	u64 ns;
 	u32 seq;
 
 	do {
 		seq = vdso_read_begin(vdata);
 
 		ts->tv_sec = vdata->xtime_clock_sec;
-		nsecs = vdata->xtime_clock_nsec;
-		nsecs += vgetsns(vdata);
-		nsecs >>= vdata->cs_shift;
+		ns = vdata->xtime_clock_nsec;
+		ns += vgetsns(vdata);
+		ns >>= vdata->cs_shift;
 
-		tomono.tv_sec = vdata->wtm_clock_sec;
-		tomono.tv_nsec = vdata->wtm_clock_nsec;
+		ts->tv_sec += vdata->wtm_clock_sec;
+		ns += vdata->wtm_clock_nsec;
 
 	} while (vdso_read_retry(vdata, seq));
 
-	ts->tv_sec += tomono.tv_sec;
-	ts->tv_nsec = 0;
-	timespec_add_ns(ts, nsecs + tomono.tv_nsec);
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
 	return 0;
 }
 
-notrace int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts)
+notrace int __vdso_clock_gettime(clockid_t clkid, struct __kernel_old_timespec *ts)
 {
 	struct vdso_data *vdata;
 	int ret = -1;
@@ -191,10 +188,10 @@ notrace int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts)
 }
 
 static notrace int clock_getres_fallback(clockid_t _clk_id,
-					  struct timespec *_res)
+					  struct __kernel_old_timespec *_res)
 {
 	register clockid_t clk_id asm("$r0") = _clk_id;
-	register struct timespec *res asm("$r1") = _res;
+	register struct __kernel_old_timespec *res asm("$r1") = _res;
 	register int ret asm("$r0");
 
 	asm volatile ("movi	$r15, %3\n"
@@ -206,7 +203,7 @@ static notrace int clock_getres_fallback(clockid_t _clk_id,
 	return ret;
 }
 
-notrace int __vdso_clock_getres(clockid_t clk_id, struct timespec *res)
+notrace int __vdso_clock_getres(clockid_t clk_id, struct __kernel_old_timespec *res)
 {
 	struct vdso_data *vdata = __get_datapage();
 
@@ -230,10 +227,10 @@ notrace int __vdso_clock_getres(clockid_t clk_id, struct timespec *res)
 	return 0;
 }
 
-static notrace inline int gettimeofday_fallback(struct timeval *_tv,
+static notrace inline int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
 						struct timezone *_tz)
 {
-	register struct timeval *tv asm("$r0") = _tv;
+	register struct __kernel_old_timeval *tv asm("$r0") = _tv;
 	register struct timezone *tz asm("$r1") = _tz;
 	register int ret asm("$r0");
 
@@ -246,9 +243,9 @@ static notrace inline int gettimeofday_fallback(struct timeval *_tv,
 	return ret;
 }
 
-notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+notrace int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
 {
-	struct timespec ts;
+	struct __kernel_old_timespec ts;
 	struct vdso_data *vdata;
 	int ret;
 
diff --git a/arch/nds32/kernel/vmlinux.lds.S b/arch/nds32/kernel/vmlinux.lds.S
index 9e90f30a181d..f679d3397436 100644
--- a/arch/nds32/kernel/vmlinux.lds.S
+++ b/arch/nds32/kernel/vmlinux.lds.S
@@ -53,12 +53,11 @@ SECTIONS
 	_etext = .;			/* End of text and rodata section */
 
 	_sdata = .;
-	RO_DATA_SECTION(PAGE_SIZE)
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RO_DATA(PAGE_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 	_edata  =  .;
 
 	EXCEPTION_TABLE(16)
-	NOTES
 	BSS_SECTION(4, 4, 4)
 	_end = .;
 
diff --git a/arch/nds32/mm/Makefile b/arch/nds32/mm/Makefile
index bd360e4583b5..897ecaf5cf54 100644
--- a/arch/nds32/mm/Makefile
+++ b/arch/nds32/mm/Makefile
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-y				:= extable.o tlb.o \
-				   fault.o init.o ioremap.o mmap.o \
+obj-y				:= extable.o tlb.o fault.o init.o mmap.o \
                                    mm-nds32.o cacheflush.o proc.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index 064ae5d2159d..906dfb25353c 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -31,6 +31,8 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
 	pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
 
 	do {
+		p4d_t *p4d;
+		pud_t *pud;
 		pmd_t *pmd;
 
 		if (pgd_none(*pgd))
@@ -41,7 +43,9 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
 			break;
 		}
 
-		pmd = pmd_offset(pgd, addr);
+		p4d = p4d_offset(pgd, addr);
+		pud = pud_offset(p4d, addr);
+		pmd = pmd_offset(pud, addr);
 #if PTRS_PER_PMD != 1
 		pr_alert(", *pmd=%08lx", pmd_val(*pmd));
 #endif
@@ -359,6 +363,7 @@ vmalloc_fault:
 
 		unsigned int index = pgd_index(addr);
 		pgd_t *pgd, *pgd_k;
+		p4d_t *p4d, *p4d_k;
 		pud_t *pud, *pud_k;
 		pmd_t *pmd, *pmd_k;
 		pte_t *pte_k;
@@ -369,8 +374,13 @@ vmalloc_fault:
 		if (!pgd_present(*pgd_k))
 			goto no_context;
 
-		pud = pud_offset(pgd, addr);
-		pud_k = pud_offset(pgd_k, addr);
+		p4d = p4d_offset(pgd, addr);
+		p4d_k = p4d_offset(pgd_k, addr);
+		if (!p4d_present(*p4d_k))
+			goto no_context;
+
+		pud = pud_offset(p4d, addr);
+		pud_k = pud_offset(p4d_k, addr);
 		if (!pud_present(*pud_k))
 			goto no_context;
 
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index 55703b03d172..0be3833f6814 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -54,6 +54,7 @@ static void __init map_ram(void)
 {
 	unsigned long v, p, e;
 	pgd_t *pge;
+	p4d_t *p4e;
 	pud_t *pue;
 	pmd_t *pme;
 	pte_t *pte;
@@ -69,7 +70,8 @@ static void __init map_ram(void)
 
 	while (p < e) {
 		int j;
-		pue = pud_offset(pge, v);
+		p4e = p4d_offset(pge, v);
+		pue = pud_offset(p4e, v);
 		pme = pmd_offset(pue, v);
 
 		if ((u32) pue != (u32) pge || (u32) pme != (u32) pge) {
@@ -100,6 +102,7 @@ static void __init fixedrange_init(void)
 {
 	unsigned long vaddr;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 #ifdef CONFIG_HIGHMEM
@@ -111,7 +114,8 @@ static void __init fixedrange_init(void)
 	 */
 	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
 	pgd = swapper_pg_dir + pgd_index(vaddr);
-	pud = pud_offset(pgd, vaddr);
+	p4d = p4d_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
 	pmd = pmd_offset(pud, vaddr);
 	fixmap_pmd_p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	if (!fixmap_pmd_p)
@@ -126,7 +130,8 @@ static void __init fixedrange_init(void)
 	vaddr = PKMAP_BASE;
 
 	pgd = swapper_pg_dir + pgd_index(vaddr);
-	pud = pud_offset(pgd, vaddr);
+	p4d = p4d_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
 	pmd = pmd_offset(pud, vaddr);
 	pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	if (!pte)
diff --git a/arch/nds32/mm/ioremap.c b/arch/nds32/mm/ioremap.c
deleted file mode 100644
index 690140bb23a2..000000000000
--- a/arch/nds32/mm/ioremap.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/vmalloc.h>
-#include <linux/io.h>
-#include <linux/mm.h>
-#include <asm/pgtable.h>
-
-void __iomem *ioremap(phys_addr_t phys_addr, size_t size);
-
-static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
-				      void *caller)
-{
-	struct vm_struct *area;
-	unsigned long addr, offset, last_addr;
-	pgprot_t prot;
-
-	/* Don't allow wraparound or zero size */
-	last_addr = phys_addr + size - 1;
-	if (!size || last_addr < phys_addr)
-		return NULL;
-
-	/*
-	 * Mappings have to be page-aligned
-	 */
-	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
-	size = PAGE_ALIGN(last_addr + 1) - phys_addr;
-
-	/*
-	 * Ok, go for it..
-	 */
-	area = get_vm_area_caller(size, VM_IOREMAP, caller);
-	if (!area)
-		return NULL;
-
-	area->phys_addr = phys_addr;
-	addr = (unsigned long)area->addr;
-	prot = __pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D |
-			_PAGE_G | _PAGE_C_DEV);
-	if (ioremap_page_range(addr, addr + size, phys_addr, prot)) {
-		vunmap((void *)addr);
-		return NULL;
-	}
-	return (__force void __iomem *)(offset + (char *)addr);
-
-}
-
-void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
-{
-	return __ioremap_caller(phys_addr, size,
-				__builtin_return_address(0));
-}
-
-EXPORT_SYMBOL(ioremap);
-
-void iounmap(volatile void __iomem * addr)
-{
-	vunmap((void *)(PAGE_MASK & (unsigned long)addr));
-}
-
-EXPORT_SYMBOL(iounmap);
diff --git a/arch/nds32/mm/mm-nds32.c b/arch/nds32/mm/mm-nds32.c
index 3b43798d754f..8503bee882d1 100644
--- a/arch/nds32/mm/mm-nds32.c
+++ b/arch/nds32/mm/mm-nds32.c
@@ -74,6 +74,8 @@ void setup_mm_for_reboot(char mode)
 {
 	unsigned long pmdval;
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	int i;
 
@@ -84,7 +86,9 @@ void setup_mm_for_reboot(char mode)
 
 	for (i = 0; i < USER_PTRS_PER_PGD; i++) {
 		pmdval = (i << PGDIR_SHIFT);
-		pmd = pmd_offset(pgd + i, i << PGDIR_SHIFT);
+		p4d = p4d_offset(pgd, i << PGDIR_SHIFT);
+		pud = pud_offset(p4d, i << PGDIR_SHIFT);
+		pmd = pmd_offset(pud + i, i << PGDIR_SHIFT);
 		set_pmd(pmd, __pmd(pmdval));
 	}
 }
diff --git a/arch/nds32/mm/proc.c b/arch/nds32/mm/proc.c
index ba80992d13a2..837ae7728830 100644
--- a/arch/nds32/mm/proc.c
+++ b/arch/nds32/mm/proc.c
@@ -16,10 +16,14 @@ extern struct cache_info L1_cache_info[2];
 
 int va_kernel_present(unsigned long addr)
 {
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *ptep, pte;
 
-	pmd = pmd_offset(pgd_offset_k(addr), addr);
+	p4d = p4d_offset(pgd_offset_k(addr), addr);
+	pud = pud_offset(p4d, addr);
+	pmd = pmd_offset(pud, addr);
 	if (!pmd_none(*pmd)) {
 		ptep = pte_offset_map(pmd, addr);
 		pte = *ptep;
@@ -32,20 +36,24 @@ int va_kernel_present(unsigned long addr)
 pte_t va_present(struct mm_struct * mm, unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *ptep, pte;
 
 	pgd = pgd_offset(mm, addr);
 	if (!pgd_none(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (!pud_none(*pud)) {
-			pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd)) {
-				ptep = pte_offset_map(pmd, addr);
-				pte = *ptep;
-				if (pte_present(pte))
-					return pte;
+		p4d = p4d_offset(pgd, addr);
+		if (!p4d_none(*p4d)) {
+			pud = pud_offset(p4d, addr);
+			if (!pud_none(*pud)) {
+				pmd = pmd_offset(pud, addr);
+				if (!pmd_none(*pmd)) {
+					ptep = pte_offset_map(pmd, addr);
+					pte = *ptep;
+					if (pte_present(pte))
+						return pte;
+				}
 			}
 		}
 	}
diff --git a/arch/nios2/configs/10m50_defconfig b/arch/nios2/configs/10m50_defconfig
index 1137ef2ed3b0..a7967b4cfb6e 100644
--- a/arch/nios2/configs/10m50_defconfig
+++ b/arch/nios2/configs/10m50_defconfig
@@ -2,7 +2,6 @@ CONFIG_SYSVIPC=y
 CONFIG_NO_HZ_IDLE=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_ELF_CORE is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
diff --git a/arch/nios2/configs/3c120_defconfig b/arch/nios2/configs/3c120_defconfig
index a0f160ba7598..423a0c40a162 100644
--- a/arch/nios2/configs/3c120_defconfig
+++ b/arch/nios2/configs/3c120_defconfig
@@ -2,7 +2,6 @@ CONFIG_SYSVIPC=y
 CONFIG_NO_HZ_IDLE=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_ELF_CORE is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h
index 9010243077ab..746853ac7d8d 100644
--- a/arch/nios2/include/asm/io.h
+++ b/arch/nios2/include/asm/io.h
@@ -25,29 +25,8 @@
 #define writew_relaxed(x, addr)	writew(x, addr)
 #define writel_relaxed(x, addr)	writel(x, addr)
 
-extern void __iomem *__ioremap(unsigned long physaddr, unsigned long size,
-			unsigned long cacheflag);
-extern void __iounmap(void __iomem *addr);
-
-static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size)
-{
-	return __ioremap(physaddr, size, 0);
-}
-
-static inline void __iomem *ioremap_nocache(unsigned long physaddr,
-						unsigned long size)
-{
-	return __ioremap(physaddr, size, 0);
-}
-
-static inline void iounmap(void __iomem *addr)
-{
-	__iounmap(addr);
-}
-
-#define ioremap_nocache ioremap_nocache
-#define ioremap_wc ioremap_nocache
-#define ioremap_wt ioremap_nocache
+void __iomem *ioremap(unsigned long physaddr, unsigned long size);
+void iounmap(void __iomem *addr);
 
 /* Pages to physical address... */
 #define page_to_phys(page)	virt_to_phys(page_to_virt(page))
diff --git a/arch/nios2/include/asm/vmalloc.h b/arch/nios2/include/asm/vmalloc.h
new file mode 100644
index 000000000000..ec7a9260090b
--- /dev/null
+++ b/arch/nios2/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_NIOS2_VMALLOC_H
+#define _ASM_NIOS2_VMALLOC_H
+
+#endif /* _ASM_NIOS2_VMALLOC_H */
diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S
index 1e515ccd698e..3d8d1d0bcb64 100644
--- a/arch/nios2/kernel/entry.S
+++ b/arch/nios2/kernel/entry.S
@@ -365,7 +365,7 @@ ENTRY(ret_from_interrupt)
 	ldw	r1, PT_ESTATUS(sp)	/* check if returning to kernel */
 	TSTBNZ	r1, r1, ESTATUS_EU, Luser_return
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	GET_THREAD_INFO	r1
 	ldw	r4, TI_PREEMPT_COUNT(r1)
 	bne	r4, r0, restore_all
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index 4cf35b09c0ec..3c6e3c813a0b 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -196,8 +196,4 @@ void __init setup_arch(char **cmdline_p)
 	 * get kmalloc into gear
 	 */
 	paging_init();
-
-#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
-#endif
 }
diff --git a/arch/nios2/kernel/vmlinux.lds.S b/arch/nios2/kernel/vmlinux.lds.S
index 6ad64f14617d..c55a7cfa1075 100644
--- a/arch/nios2/kernel/vmlinux.lds.S
+++ b/arch/nios2/kernel/vmlinux.lds.S
@@ -49,8 +49,8 @@ SECTIONS
 	__init_end = .;
 
 	_sdata = .;
-	RO_DATA_SECTION(PAGE_SIZE)
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RO_DATA(PAGE_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 	_edata = .;
 
 	BSS_SECTION(0, 0, 0)
@@ -58,7 +58,6 @@ SECTIONS
 
 	STABS_DEBUG
 	DWARF_DEBUG
-	NOTES
 
 	DISCARDS
 }
diff --git a/arch/nios2/mm/dma-mapping.c b/arch/nios2/mm/dma-mapping.c
index 9cb238664584..0ed711e37902 100644
--- a/arch/nios2/mm/dma-mapping.c
+++ b/arch/nios2/mm/dma-mapping.c
@@ -18,8 +18,8 @@
 #include <linux/cache.h>
 #include <asm/cacheflush.h>
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	void *vaddr = phys_to_virt(paddr);
 
@@ -42,8 +42,8 @@ void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
 	}
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	void *vaddr = phys_to_virt(paddr);
 
diff --git a/arch/nios2/mm/ioremap.c b/arch/nios2/mm/ioremap.c
index 3a28177a01eb..819bdfcc2e71 100644
--- a/arch/nios2/mm/ioremap.c
+++ b/arch/nios2/mm/ioremap.c
@@ -112,8 +112,7 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 /*
  * Map some physical address range into the kernel address space.
  */
-void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
-			unsigned long cacheflag)
+void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
 {
 	struct vm_struct *area;
 	unsigned long offset;
@@ -144,8 +143,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	 * CONFIG_NIOS2_IO_REGION_BASE
 	 */
 	if (IS_MAPPABLE_UNCACHEABLE(phys_addr) &&
-	    IS_MAPPABLE_UNCACHEABLE(last_addr) &&
-	    !(cacheflag & _PAGE_CACHED))
+	    IS_MAPPABLE_UNCACHEABLE(last_addr))
 		return (void __iomem *)(CONFIG_NIOS2_IO_REGION_BASE + phys_addr);
 
 	/* Mappings have to be page-aligned */
@@ -158,21 +156,20 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	if (!area)
 		return NULL;
 	addr = area->addr;
-	if (remap_area_pages((unsigned long) addr, phys_addr, size,
-		cacheflag)) {
+	if (remap_area_pages((unsigned long) addr, phys_addr, size, 0)) {
 		vunmap(addr);
 		return NULL;
 	}
 	return (void __iomem *) (offset + (char *)addr);
 }
-EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(ioremap);
 
 /*
- * __iounmap unmaps nearly everything, so be careful
+ * iounmap unmaps nearly everything, so be careful
  * it doesn't free currently pointer/page tables anymore but it
  * wasn't used anyway and might be added later.
  */
-void __iounmap(void __iomem *addr)
+void iounmap(void __iomem *addr)
 {
 	struct vm_struct *p;
 
@@ -184,4 +181,4 @@ void __iounmap(void __iomem *addr)
 		pr_err("iounmap: bad address %p\n", addr);
 	kfree(p);
 }
-EXPORT_SYMBOL(__iounmap);
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index bf326f0edd2f..1928e061ff96 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -13,7 +13,7 @@ config OPENRISC
 	select IRQ_DOMAIN
 	select HANDLE_DOMAIN_IRQ
 	select GPIOLIB
-        select HAVE_ARCH_TRACEHOOK
+	select HAVE_ARCH_TRACEHOOK
 	select SPARSE_IRQ
 	select GENERIC_IRQ_CHIP
 	select GENERIC_IRQ_PROBE
@@ -51,12 +51,12 @@ config NO_IOPORT_MAP
 	def_bool y
 
 config TRACE_IRQFLAGS_SUPPORT
-        def_bool y
+	def_bool y
 
 # For now, use generic checksum functions
 #These can be reimplemented in assembly later if so inclined
 config GENERIC_CSUM
-        def_bool y
+	def_bool y
 
 config STACKTRACE_SUPPORT
 	def_bool y
@@ -89,8 +89,8 @@ config DCACHE_WRITETHROUGH
 	  If unsure say N here
 
 config OPENRISC_BUILTIN_DTB
-        string "Builtin DTB"
-        default ""
+	string "Builtin DTB"
+	default ""
 
 menu "Class II Instructions"
 
@@ -161,13 +161,13 @@ config OPENRISC_HAVE_SHADOW_GPRS
 	  On a unicore system it's safe to say N here if you are unsure.
 
 config CMDLINE
-        string "Default kernel command string"
-        default ""
-        help
-          On some architectures there is currently no way for the boot loader
-          to pass arguments to the kernel. For these architectures, you should
-          supply some command-line options at build time by entering them
-          here.
+	string "Default kernel command string"
+	default ""
+	help
+	  On some architectures there is currently no way for the boot loader
+	  to pass arguments to the kernel. For these architectures, you should
+	  supply some command-line options at build time by entering them
+	  here.
 
 menu "Debugging options"
 
@@ -185,7 +185,7 @@ config OPENRISC_ESR_EXCEPTION_BUG_CHECK
 	default n
 	help
 	  This option enables some checks that might expose some problems
-          in kernel.
+	  in kernel.
 
 	  Say N if you are unsure.
 
diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h
index 5b81a96ab85e..e18f038b2a6d 100644
--- a/arch/openrisc/include/asm/io.h
+++ b/arch/openrisc/include/asm/io.h
@@ -25,7 +25,6 @@
 #define PIO_OFFSET		0
 #define PIO_MASK		0
 
-#define ioremap_nocache ioremap
 #include <asm-generic/io.h>
 #include <asm/pgtable.h>
 
diff --git a/arch/openrisc/include/asm/vmalloc.h b/arch/openrisc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..75435eceec32
--- /dev/null
+++ b/arch/openrisc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_OPENRISC_VMALLOC_H
+#define _ASM_OPENRISC_VMALLOC_H
+
+#endif /* _ASM_OPENRISC_VMALLOC_H */
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index 4d5b8bd1d795..adec711ad39d 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -125,7 +125,7 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr,
 	free_pages_exact(vaddr, size);
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t addr, size_t size,
+void arch_sync_dma_for_device(phys_addr_t addr, size_t size,
 		enum dma_data_direction dir)
 {
 	unsigned long cl;
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index d668f5be3a99..c0a774b51e45 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -308,11 +308,6 @@ void __init setup_arch(char **cmdline_p)
 	/* paging_init() sets up the MMU and marks all pages as reserved */
 	paging_init();
 
-#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
-	if (!conswitchp)
-		conswitchp = &dummy_con;
-#endif
-
 	*cmdline_p = boot_command_line;
 
 	printk(KERN_INFO "OpenRISC Linux -- http://openrisc.io\n");
diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S
index 2e2c72c157f3..60449fd7f16f 100644
--- a/arch/openrisc/kernel/vmlinux.lds.S
+++ b/arch/openrisc/kernel/vmlinux.lds.S
@@ -67,19 +67,18 @@ SECTIONS
 
 	_sdata = .;
 
-	/* Page alignment required for RO_DATA_SECTION */
-	RO_DATA_SECTION(PAGE_SIZE)
+	/* Page alignment required for RO_DATA */
+	RO_DATA(PAGE_SIZE)
 	_e_kernel_ro = .;
 
 	/* Whatever comes after _e_kernel_ro had better be page-aligend, too */
 
 	/* 32 here is cacheline size... recheck this */
-	RW_DATA_SECTION(32, PAGE_SIZE, PAGE_SIZE)
+	RW_DATA(32, PAGE_SIZE, PAGE_SIZE)
 
         _edata  =  .;
 
 	EXCEPTION_TABLE(4)
-	NOTES
 
 	/* Init code and data */
 	. = ALIGN(PAGE_SIZE);
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index b16237c95ea3..71034b54d74e 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -18,7 +18,7 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select BUG
-	select BUILDTIME_EXTABLE_SORT
+	select BUILDTIME_TABLE_SORT
 	select HAVE_PCI
 	select HAVE_PERF_EVENTS
 	select HAVE_KERNEL_BZIP2
@@ -62,6 +62,7 @@ config PARISC
 	select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE
 	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
+	select HAVE_COPY_THREAD_TLS
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
@@ -81,7 +82,7 @@ config STACK_GROWSUP
 config GENERIC_LOCKBREAK
 	bool
 	default y
-	depends on SMP && PREEMPT
+	depends on SMP && PREEMPTION
 
 config ARCH_HAS_ILOG2_U32
 	bool
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 36b834f1c933..dca8f2de8cf5 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -60,7 +60,6 @@ KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \
 		 -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT)
 
 CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1)))
-KBUILD_LDS_MODULE += $(srctree)/arch/parisc/kernel/module.lds
 endif
 
 OBJCOPY_FLAGS =-O binary -R .note -R .comment -S
diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig
index 507f0644fcf8..db864b18962a 100644
--- a/arch/parisc/configs/c8000_defconfig
+++ b/arch/parisc/configs/c8000_defconfig
@@ -9,7 +9,6 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 18b072a47a10..c7a5726728a4 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -8,7 +8,6 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=16
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_PERF_EVENTS=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h
index 3cbf1f1c1188..c1c22819a04d 100644
--- a/arch/parisc/include/asm/checksum.h
+++ b/arch/parisc/include/asm/checksum.h
@@ -42,31 +42,32 @@ extern __wsum csum_partial_copy_from_user(const void __user *src,
 static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 {
 	unsigned int sum;
+	unsigned long t0, t1, t2;
 
 	__asm__ __volatile__ (
 "	ldws,ma		4(%1), %0\n"
 "	addib,<=	-4, %2, 2f\n"
 "\n"
-"	ldws		4(%1), %%r20\n"
-"	ldws		8(%1), %%r21\n"
-"	add		%0, %%r20, %0\n"
-"	ldws,ma		12(%1), %%r19\n"
-"	addc		%0, %%r21, %0\n"
-"	addc		%0, %%r19, %0\n"
-"1:	ldws,ma		4(%1), %%r19\n"
+"	ldws		4(%1), %4\n"
+"	ldws		8(%1), %5\n"
+"	add		%0, %4, %0\n"
+"	ldws,ma		12(%1), %3\n"
+"	addc		%0, %5, %0\n"
+"	addc		%0, %3, %0\n"
+"1:	ldws,ma		4(%1), %3\n"
 "	addib,<		0, %2, 1b\n"
-"	addc		%0, %%r19, %0\n"
+"	addc		%0, %3, %0\n"
 "\n"
-"	extru		%0, 31, 16, %%r20\n"
-"	extru		%0, 15, 16, %%r21\n"
-"	addc		%%r20, %%r21, %0\n"
-"	extru		%0, 15, 16, %%r21\n"
-"	add		%0, %%r21, %0\n"
+"	extru		%0, 31, 16, %4\n"
+"	extru		%0, 15, 16, %5\n"
+"	addc		%4, %5, %0\n"
+"	extru		%0, 15, 16, %5\n"
+"	add		%0, %5, %0\n"
 "	subi		-1, %0, %0\n"
 "2:\n"
-	: "=r" (sum), "=r" (iph), "=r" (ihl)
+	: "=r" (sum), "=r" (iph), "=r" (ihl), "=r" (t0), "=r" (t1), "=r" (t2)
 	: "1" (iph), "2" (ihl)
-	: "r19", "r20", "r21", "memory");
+	: "memory");
 
 	return (__force __sum16)sum;
 }
@@ -126,6 +127,10 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 					  __u32 len, __u8 proto,
 					  __wsum sum)
 {
+	unsigned long t0, t1, t2, t3;
+
+	len += proto;	/* add 16-bit proto + len */
+
 	__asm__ __volatile__ (
 
 #if BITS_PER_LONG > 32
@@ -136,20 +141,19 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 	** Try to keep 4 registers with "live" values ahead of the ALU.
 	*/
 
-"	ldd,ma		8(%1), %%r19\n"	/* get 1st saddr word */
-"	ldd,ma		8(%2), %%r20\n"	/* get 1st daddr word */
-"	add		%8, %3, %3\n"/* add 16-bit proto + len */
-"	add		%%r19, %0, %0\n"
-"	ldd,ma		8(%1), %%r21\n"	/* 2cd saddr */
-"	ldd,ma		8(%2), %%r22\n"	/* 2cd daddr */
-"	add,dc		%%r20, %0, %0\n"
-"	add,dc		%%r21, %0, %0\n"
-"	add,dc		%%r22, %0, %0\n"
+"	ldd,ma		8(%1), %4\n"	/* get 1st saddr word */
+"	ldd,ma		8(%2), %5\n"	/* get 1st daddr word */
+"	add		%4, %0, %0\n"
+"	ldd,ma		8(%1), %6\n"	/* 2nd saddr */
+"	ldd,ma		8(%2), %7\n"	/* 2nd daddr */
+"	add,dc		%5, %0, %0\n"
+"	add,dc		%6, %0, %0\n"
+"	add,dc		%7, %0, %0\n"
 "	add,dc		%3, %0, %0\n"  /* fold in proto+len | carry bit */
-"	extrd,u		%0, 31, 32, %%r19\n"	/* copy upper half down */
-"	depdi		0, 31, 32, %0\n"	/* clear upper half */
-"	add		%%r19, %0, %0\n"	/* fold into 32-bits */
-"	addc		0, %0, %0\n"		/* add carry */
+"	extrd,u		%0, 31, 32, %4\n"/* copy upper half down */
+"	depdi		0, 31, 32, %0\n"/* clear upper half */
+"	add		%4, %0, %0\n"	/* fold into 32-bits */
+"	addc		0, %0, %0\n"	/* add carry */
 
 #else
 
@@ -158,30 +162,29 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 	** Insn stream is serialized on the carry bit here too.
 	** result from the previous operation (eg r0 + x)
 	*/
-
-"	ldw,ma		4(%1), %%r19\n"	/* get 1st saddr word */
-"	ldw,ma		4(%2), %%r20\n"	/* get 1st daddr word */
-"	add		%8, %3, %3\n"	/* add 16-bit proto + len */
-"	add		%%r19, %0, %0\n"
-"	ldw,ma		4(%1), %%r21\n"	/* 2cd saddr */
-"	addc		%%r20, %0, %0\n"
-"	ldw,ma		4(%2), %%r22\n"	/* 2cd daddr */
-"	addc		%%r21, %0, %0\n"
-"	ldw,ma		4(%1), %%r19\n"	/* 3rd saddr */
-"	addc		%%r22, %0, %0\n"
-"	ldw,ma		4(%2), %%r20\n"	/* 3rd daddr */
-"	addc		%%r19, %0, %0\n"
-"	ldw,ma		4(%1), %%r21\n"	/* 4th saddr */
-"	addc		%%r20, %0, %0\n"
-"	ldw,ma		4(%2), %%r22\n"	/* 4th daddr */
-"	addc		%%r21, %0, %0\n"
-"	addc		%%r22, %0, %0\n"
+"	ldw,ma		4(%1), %4\n"	/* get 1st saddr word */
+"	ldw,ma		4(%2), %5\n"	/* get 1st daddr word */
+"	add		%4, %0, %0\n"
+"	ldw,ma		4(%1), %6\n"	/* 2nd saddr */
+"	addc		%5, %0, %0\n"
+"	ldw,ma		4(%2), %7\n"	/* 2nd daddr */
+"	addc		%6, %0, %0\n"
+"	ldw,ma		4(%1), %4\n"	/* 3rd saddr */
+"	addc		%7, %0, %0\n"
+"	ldw,ma		4(%2), %5\n"	/* 3rd daddr */
+"	addc		%4, %0, %0\n"
+"	ldw,ma		4(%1), %6\n"	/* 4th saddr */
+"	addc		%5, %0, %0\n"
+"	ldw,ma		4(%2), %7\n"	/* 4th daddr */
+"	addc		%6, %0, %0\n"
+"	addc		%7, %0, %0\n"
 "	addc		%3, %0, %0\n"	/* fold in proto+len, catch carry */
 
 #endif
-	: "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len)
-	: "0" (sum), "1" (saddr), "2" (daddr), "3" (len), "r" (proto)
-	: "r19", "r20", "r21", "r22", "memory");
+	: "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len),
+	  "=r" (t0), "=r" (t1), "=r" (t2), "=r" (t3)
+	: "0" (sum), "1" (saddr), "2" (daddr), "3" (len)
+	: "memory");
 	return csum_fold(sum);
 }
 
diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h
index f627c37dad9c..ab5c215cf46c 100644
--- a/arch/parisc/include/asm/cmpxchg.h
+++ b/arch/parisc/include/asm/cmpxchg.h
@@ -44,8 +44,14 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
 **		if (((unsigned long)p & 0xf) == 0)
 **			return __ldcw(p);
 */
-#define xchg(ptr, x) \
-	((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
+#define xchg(ptr, x)							\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	__typeof__(*(ptr)) _x_ = (x);					\
+	__ret = (__typeof__(*(ptr)))					\
+		__xchg((unsigned long)_x_, (ptr), sizeof(*(ptr)));	\
+	__ret;								\
+})
 
 /* bug catcher for when unsupported size is used - won't link */
 extern void __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index e03e3c849f40..2f4f66a3bac0 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -173,23 +173,6 @@ struct compat_shmid64_ds {
 #define COMPAT_ELF_NGREG 80
 typedef compat_ulong_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
 
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
-	return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
-	return (u32)(unsigned long)uptr;
-}
-
 static __inline__ void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = &current->thread.regs;
diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h
index 93d37010b375..cab8f64ca4a2 100644
--- a/arch/parisc/include/asm/io.h
+++ b/arch/parisc/include/asm/io.h
@@ -127,19 +127,9 @@ static inline void gsc_writeq(unsigned long long val, unsigned long addr)
 /*
  * The standard PCI ioremap interfaces
  */
-
-extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
-
-/* Most machines react poorly to I/O-space being cacheable... Instead let's
- * define ioremap() in terms of ioremap_nocache().
- */
-static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
-{
-	return __ioremap(offset, size, _PAGE_NO_CACHE);
-}
-#define ioremap_nocache(off, sz)	ioremap((off), (sz))
-#define ioremap_wc			ioremap_nocache
-#define ioremap_uc			ioremap_nocache
+void __iomem *ioremap(unsigned long offset, unsigned long size);
+#define ioremap_wc			ioremap
+#define ioremap_uc			ioremap
 
 extern void iounmap(const volatile void __iomem *addr);
 
diff --git a/arch/parisc/include/asm/kexec.h b/arch/parisc/include/asm/kexec.h
index a99ea747d7ed..87e174006995 100644
--- a/arch/parisc/include/asm/kexec.h
+++ b/arch/parisc/include/asm/kexec.h
@@ -2,8 +2,6 @@
 #ifndef _ASM_PARISC_KEXEC_H
 #define _ASM_PARISC_KEXEC_H
 
-#ifdef CONFIG_KEXEC
-
 /* Maximum physical address we can use pages from */
 #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
 /* Maximum address we can reach in physical address mode */
@@ -32,6 +30,4 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* CONFIG_KEXEC */
-
 #endif /* _ASM_PARISC_KEXEC_H */
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 93caf17ac5e2..796ae29e9b9a 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -42,48 +42,54 @@ typedef struct { unsigned long pte; } pte_t; /* either 32 or 64bit */
 
 /* NOTE: even on 64 bits, these entries are __u32 because we allocate
  * the pmd and pgd in ZONE_DMA (i.e. under 4GB) */
-typedef struct { __u32 pmd; } pmd_t;
 typedef struct { __u32 pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
 
-#define pte_val(x)	((x).pte)
-/* These do not work lvalues, so make sure we don't use them as such. */
+#if CONFIG_PGTABLE_LEVELS == 3
+typedef struct { __u32 pmd; } pmd_t;
+#define __pmd(x)	((pmd_t) { (x) } )
+/* pXd_val() do not work as lvalues, so make sure we don't use them as such. */
 #define pmd_val(x)	((x).pmd + 0)
+#endif
+
+#define pte_val(x)	((x).pte)
 #define pgd_val(x)	((x).pgd + 0)
 #define pgprot_val(x)	((x).pgprot)
 
 #define __pte(x)	((pte_t) { (x) } )
-#define __pmd(x)	((pmd_t) { (x) } )
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-#define __pmd_val_set(x,n) (x).pmd = (n)
-#define __pgd_val_set(x,n) (x).pgd = (n)
-
 #else
 /*
  * .. while these make it easier on the compiler
  */
 typedef unsigned long pte_t;
+
+#if CONFIG_PGTABLE_LEVELS == 3
 typedef         __u32 pmd_t;
+#define pmd_val(x)      (x)
+#define __pmd(x)	(x)
+#endif
+
 typedef         __u32 pgd_t;
 typedef unsigned long pgprot_t;
 
 #define pte_val(x)      (x)
-#define pmd_val(x)      (x)
 #define pgd_val(x)      (x)
 #define pgprot_val(x)   (x)
 
 #define __pte(x)        (x)
-#define __pmd(x)	(x)
 #define __pgd(x)        (x)
 #define __pgprot(x)     (x)
 
-#define __pmd_val_set(x,n) (x) = (n)
-#define __pgd_val_set(x,n) (x) = (n)
-
 #endif /* STRICT_MM_TYPECHECKS */
 
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#if CONFIG_PGTABLE_LEVELS == 3
+#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
+#endif
+
 typedef struct page *pgtable_t;
 
 typedef struct __physmem_range {
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index d98647c29b74..9ac74da256b8 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -34,13 +34,13 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 		/* Populate first pmd with allocated memory.  We mark it
 		 * with PxD_FLAG_ATTACHED as a signal to the system that this
 		 * pmd entry may not be cleared. */
-		__pgd_val_set(*actual_pgd, (PxD_FLAG_PRESENT | 
-				        PxD_FLAG_VALID | 
-					PxD_FLAG_ATTACHED) 
-			+ (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT));
+		set_pgd(actual_pgd, __pgd((PxD_FLAG_PRESENT |
+				        PxD_FLAG_VALID |
+					PxD_FLAG_ATTACHED)
+			+ (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT)));
 		/* The first pmd entry also is marked with PxD_FLAG_ATTACHED as
 		 * a signal that this pmd may not be freed */
-		__pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
+		set_pgd(pgd, __pgd(PxD_FLAG_ATTACHED));
 #endif
 	}
 	spin_lock_init(pgd_spinlock(actual_pgd));
@@ -59,10 +59,10 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 /* Three Level Page Table Support for pmd's */
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
-	__pgd_val_set(*pgd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
-		        (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT));
+	set_pud(pud, __pud((PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
+			(__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT)));
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -88,19 +88,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	free_pages((unsigned long)pmd, PMD_ORDER);
 }
 
-#else
-
-/* Two Level Page Table Support for pmd's */
-
-/*
- * allocating and freeing a pmd is trivial: the 1-entry pmd is
- * inside the pgd, so has no extra memory associated with it.
- */
-
-#define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, x)			do { } while (0)
-#define pgd_populate(mm, pmd, pte)	BUG()
-
 #endif
 
 static inline void
@@ -110,14 +97,14 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 	/* preserve the gateway marker if this is the beginning of
 	 * the permanent pmd */
 	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
-		__pmd_val_set(*pmd, (PxD_FLAG_PRESENT |
-				 PxD_FLAG_VALID |
-				 PxD_FLAG_ATTACHED) 
-			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
+		set_pmd(pmd, __pmd((PxD_FLAG_PRESENT |
+				PxD_FLAG_VALID |
+				PxD_FLAG_ATTACHED)
+			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
 	else
 #endif
-		__pmd_val_set(*pmd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) 
-			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
+		set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID)
+			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
 }
 
 #define pmd_populate(mm, pmd, pte_page) \
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 4ac374b3a99f..f0a365950536 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -3,7 +3,12 @@
 #define _PARISC_PGTABLE_H
 
 #include <asm/page.h>
-#include <asm-generic/4level-fixup.h>
+
+#if CONFIG_PGTABLE_LEVELS == 3
+#include <asm-generic/pgtable-nopud.h>
+#elif CONFIG_PGTABLE_LEVELS == 2
+#include <asm-generic/pgtable-nopmd.h>
+#endif
 
 #include <asm/fixmap.h>
 
@@ -101,8 +106,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#if CONFIG_PGTABLE_LEVELS == 3
 #define pmd_ERROR(e) \
 	printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, (unsigned long)pmd_val(e))
+#endif
 #define pgd_ERROR(e) \
 	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e))
 
@@ -132,19 +139,18 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define PTRS_PER_PTE    (1UL << BITS_PER_PTE)
 
 /* Definitions for 2nd level */
+#if CONFIG_PGTABLE_LEVELS == 3
 #define PMD_SHIFT       (PLD_SHIFT + BITS_PER_PTE)
 #define PMD_SIZE	(1UL << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
-#if CONFIG_PGTABLE_LEVELS == 3
 #define BITS_PER_PMD	(PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
+#define PTRS_PER_PMD    (1UL << BITS_PER_PMD)
 #else
-#define __PAGETABLE_PMD_FOLDED 1
 #define BITS_PER_PMD	0
 #endif
-#define PTRS_PER_PMD    (1UL << BITS_PER_PMD)
 
 /* Definitions for 1st level */
-#define PGDIR_SHIFT	(PMD_SHIFT + BITS_PER_PMD)
+#define PGDIR_SHIFT	(PLD_SHIFT + BITS_PER_PTE + BITS_PER_PMD)
 #if (PGDIR_SHIFT + PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY) > BITS_PER_LONG
 #define BITS_PER_PGD	(BITS_PER_LONG - PGDIR_SHIFT)
 #else
@@ -317,6 +323,8 @@ extern unsigned long *empty_zero_page;
 
 #define pmd_flag(x)	(pmd_val(x) & PxD_FLAG_MASK)
 #define pmd_address(x)	((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
+#define pud_flag(x)	(pud_val(x) & PxD_FLAG_MASK)
+#define pud_address(x)	((unsigned long)(pud_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
 #define pgd_flag(x)	(pgd_val(x) & PxD_FLAG_MASK)
 #define pgd_address(x)	((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
 
@@ -334,42 +342,32 @@ static inline void pmd_clear(pmd_t *pmd) {
 	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
 		/* This is the entry pointing to the permanent pmd
 		 * attached to the pgd; cannot clear it */
-		__pmd_val_set(*pmd, PxD_FLAG_ATTACHED);
+		set_pmd(pmd, __pmd(PxD_FLAG_ATTACHED));
 	else
 #endif
-		__pmd_val_set(*pmd,  0);
+		set_pmd(pmd,  __pmd(0));
 }
 
 
 
 #if CONFIG_PGTABLE_LEVELS == 3
-#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd)))
-#define pgd_page(pgd)	virt_to_page((void *)pgd_page_vaddr(pgd))
+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_address(pud)))
+#define pud_page(pud)	virt_to_page((void *)pud_page_vaddr(pud))
 
 /* For 64 bit we have three level tables */
 
-#define pgd_none(x)     (!pgd_val(x))
-#define pgd_bad(x)      (!(pgd_flag(x) & PxD_FLAG_VALID))
-#define pgd_present(x)  (pgd_flag(x) & PxD_FLAG_PRESENT)
-static inline void pgd_clear(pgd_t *pgd) {
+#define pud_none(x)     (!pud_val(x))
+#define pud_bad(x)      (!(pud_flag(x) & PxD_FLAG_VALID))
+#define pud_present(x)  (pud_flag(x) & PxD_FLAG_PRESENT)
+static inline void pud_clear(pud_t *pud) {
 #if CONFIG_PGTABLE_LEVELS == 3
-	if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED)
-		/* This is the permanent pmd attached to the pgd; cannot
+	if(pud_flag(*pud) & PxD_FLAG_ATTACHED)
+		/* This is the permanent pmd attached to the pud; cannot
 		 * free it */
 		return;
 #endif
-	__pgd_val_set(*pgd, 0);
+	set_pud(pud, __pud(0));
 }
-#else
-/*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- */
-static inline int pgd_none(pgd_t pgd)		{ return 0; }
-static inline int pgd_bad(pgd_t pgd)		{ return 0; }
-static inline int pgd_present(pgd_t pgd)	{ return 1; }
-static inline void pgd_clear(pgd_t * pgdp)	{ }
 #endif
 
 /*
@@ -452,7 +450,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #if CONFIG_PGTABLE_LEVELS == 3
 #define pmd_index(addr)         (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
 #define pmd_offset(dir,address) \
-((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(address))
+((pmd_t *) pud_page_vaddr(*(dir)) + pmd_index(address))
 #else
 #define pmd_offset(dir,addr) ((pmd_t *) dir)
 #endif
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
index 8c0446b04c9e..44235f367674 100644
--- a/arch/parisc/include/asm/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
@@ -4,7 +4,9 @@
 
 #include <asm-generic/tlb.h>
 
+#if CONFIG_PGTABLE_LEVELS == 3
 #define __pmd_free_tlb(tlb, pmd, addr)	pmd_free((tlb)->mm, pmd)
+#endif
 #define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
 
 #endif
diff --git a/arch/parisc/include/asm/vmalloc.h b/arch/parisc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..1088ae4e7af9
--- /dev/null
+++ b/arch/parisc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_PARISC_VMALLOC_H
+#define _ASM_PARISC_VMALLOC_H
+
+#endif /* _ASM_PARISC_VMALLOC_H */
diff --git a/arch/parisc/include/uapi/asm/msgbuf.h b/arch/parisc/include/uapi/asm/msgbuf.h
index 6a2e9ab2ef8d..3b4de5b668c3 100644
--- a/arch/parisc/include/uapi/asm/msgbuf.h
+++ b/arch/parisc/include/uapi/asm/msgbuf.h
@@ -3,6 +3,7 @@
 #define _PARISC_MSGBUF_H
 
 #include <asm/bitsperlong.h>
+#include <asm/ipcbuf.h>
 
 /* 
  * The msqid64_ds structure for parisc architecture, copied from sparc.
@@ -16,9 +17,9 @@
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
 #if __BITS_PER_LONG == 64
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	__kernel_time_t msg_ctime;	/* last change time */
+	long		 msg_stime;	/* last msgsnd time */
+	long		 msg_rtime;	/* last msgrcv time */
+	long		 msg_ctime;	/* last change time */
 #else
 	unsigned long	msg_stime_high;
 	unsigned long	msg_stime;	/* last msgsnd time */
diff --git a/arch/parisc/include/uapi/asm/sembuf.h b/arch/parisc/include/uapi/asm/sembuf.h
index 3c31163b1241..e2ca4301c7fb 100644
--- a/arch/parisc/include/uapi/asm/sembuf.h
+++ b/arch/parisc/include/uapi/asm/sembuf.h
@@ -3,6 +3,7 @@
 #define _PARISC_SEMBUF_H
 
 #include <asm/bitsperlong.h>
+#include <asm/ipcbuf.h>
 
 /* 
  * The semid64_ds structure for parisc architecture.
@@ -16,8 +17,8 @@
 struct semid64_ds {
 	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
 #if __BITS_PER_LONG == 64
-	__kernel_time_t	sem_otime;		/* last semop time */
-	__kernel_time_t	sem_ctime;		/* last change time */
+	long		sem_otime;		/* last semop time */
+	long		sem_ctime;		/* last change time */
 #else
 	unsigned long	sem_otime_high;
 	unsigned long	sem_otime;		/* last semop time */
diff --git a/arch/parisc/include/uapi/asm/shmbuf.h b/arch/parisc/include/uapi/asm/shmbuf.h
index c89b3dd8db21..5da3089be65e 100644
--- a/arch/parisc/include/uapi/asm/shmbuf.h
+++ b/arch/parisc/include/uapi/asm/shmbuf.h
@@ -16,9 +16,9 @@
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
 #if __BITS_PER_LONG == 64
-	__kernel_time_t		shm_atime;	/* last attach time */
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	__kernel_time_t		shm_ctime;	/* last change time */
+	long			shm_atime;	/* last attach time */
+	long			shm_dtime;	/* last detach time */
+	long			shm_ctime;	/* last change time */
 #else
 	unsigned long		shm_atime_high;
 	unsigned long		shm_atime;	/* last attach time */
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 2663c8f8be11..068d90950d93 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -37,5 +37,5 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 obj-$(CONFIG_KGDB)			+= kgdb.o
 obj-$(CONFIG_KPROBES)			+= kprobes.o
-obj-$(CONFIG_KEXEC)			+= kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC_CORE)		+= kexec.o relocate_kernel.o
 obj-$(CONFIG_KEXEC_FILE)		+= kexec_file.o
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index a82b3eaa5398..1eedfecc5137 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -365,7 +365,7 @@ void flush_dcache_page(struct page *page)
 		if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
 				      != (addr & (SHM_COLOUR - 1))) {
 			__flush_cache_page(mpnt, addr, page_to_phys(page));
-			if (old_addr)
+			if (parisc_requires_coherency() && old_addr)
 				printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", old_addr, addr, mpnt->vm_file);
 			old_addr = addr;
 		}
@@ -534,11 +534,14 @@ static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr)
 	pte_t *ptep = NULL;
 
 	if (!pgd_none(*pgd)) {
-		pud_t *pud = pud_offset(pgd, addr);
-		if (!pud_none(*pud)) {
-			pmd_t *pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd))
-				ptep = pte_offset_map(pmd, addr);
+		p4d_t *p4d = p4d_offset(pgd, addr);
+		if (!p4d_none(*p4d)) {
+			pud_t *pud = pud_offset(p4d, addr);
+			if (!pud_none(*pud)) {
+				pmd_t *pmd = pmd_offset(pud, addr);
+				if (!pmd_none(*pmd))
+					ptep = pte_offset_map(pmd, addr);
+			}
 		}
 	}
 	return ptep;
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index 3b330e58a4f0..a5f3e50fe976 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -810,7 +810,7 @@ EXPORT_SYMBOL(device_to_hwpath);
 static void walk_native_bus(unsigned long io_io_low, unsigned long io_io_high,
                             struct device *parent);
 
-static void walk_lower_bus(struct parisc_device *dev)
+static void __init walk_lower_bus(struct parisc_device *dev)
 {
 	unsigned long io_io_low, io_io_high;
 
@@ -889,8 +889,8 @@ static void print_parisc_device(struct parisc_device *dev)
 	static int count;
 
 	print_pa_hwpath(dev, hw_path);
-	pr_info("%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
-		++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type,
+	pr_info("%d. %s at %pap [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
+		++count, dev->name, &(dev->hpa.start), hw_path, dev->id.hw_type,
 		dev->id.hversion_rev, dev->id.hversion, dev->id.sversion);
 
 	if (dev->num_addrs) {
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index b96d74496977..9a03e29c8733 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -940,14 +940,14 @@ intr_restore:
 	rfi
 	nop
 
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 # define intr_do_preempt	intr_restore
-#endif /* !CONFIG_PREEMPT */
+#endif /* !CONFIG_PREEMPTION */
 
 	.import schedule,code
 intr_do_resched:
 	/* Only call schedule on return to userspace. If we're returning
-	 * to kernel space, we may schedule if CONFIG_PREEMPT, otherwise
+	 * to kernel space, we may schedule if CONFIG_PREEMPTION, otherwise
 	 * we jump back to intr_restore.
 	 */
 	LDREG	PT_IASQ0(%r16), %r20
@@ -979,7 +979,7 @@ intr_do_resched:
 	 * and preempt_count is 0. otherwise, we continue on
 	 * our merry way back to the current running task.
 	 */
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	.import preempt_schedule_irq,code
 intr_do_preempt:
 	rsm	PSW_SM_I, %r0		/* disable interrupts */
@@ -999,7 +999,7 @@ intr_do_preempt:
 	nop
 
 	b,n	intr_restore		/* ssm PSW_SM_I done by intr_restore */
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 	/*
 	 * External interrupts.
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index ac5f34993b53..1c50093e2ebe 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -43,6 +43,7 @@
 #include <linux/elf.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
+#include <linux/ftrace.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
@@ -862,7 +863,7 @@ int module_finalize(const Elf_Ehdr *hdr,
 	const char *strtab = NULL;
 	const Elf_Shdr *s;
 	char *secstrings;
-	int err, symindex = -1;
+	int symindex = -1;
 	Elf_Sym *newptr, *oldptr;
 	Elf_Shdr *symhdr = NULL;
 #ifdef DEBUG
@@ -946,11 +947,13 @@ int module_finalize(const Elf_Ehdr *hdr,
 			/* patch .altinstructions */
 			apply_alternatives(aseg, aseg + s->sh_size, me->name);
 
+#ifdef CONFIG_DYNAMIC_FTRACE
 		/* For 32 bit kernels we're compiling modules with
 		 * -ffunction-sections so we must relocate the addresses in the
-		 *__mcount_loc section.
+		 *  ftrace callsite section.
 		 */
-		if (symindex != -1 && !strcmp(secname, "__mcount_loc")) {
+		if (symindex != -1 && !strcmp(secname, FTRACE_CALLSITE_SECTION)) {
+			int err;
 			if (s->sh_type == SHT_REL)
 				err = apply_relocate((Elf_Shdr *)sechdrs,
 							strtab, symindex,
@@ -962,6 +965,7 @@ int module_finalize(const Elf_Ehdr *hdr,
 			if (err)
 				return err;
 		}
+#endif
 	}
 	return 0;
 }
diff --git a/arch/parisc/kernel/module.lds b/arch/parisc/kernel/module.lds
deleted file mode 100644
index 1a9a92aca5c8..000000000000
--- a/arch/parisc/kernel/module.lds
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-SECTIONS {
-	__mcount_loc : {
-		*(__patchable_function_entries)
-	}
-}
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index ca35d9a76e50..0f1b460ee715 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -133,9 +133,14 @@ static inline int map_uncached_pages(unsigned long vaddr, unsigned long size,
 
 	dir = pgd_offset_k(vaddr);
 	do {
+		p4d_t *p4d;
+		pud_t *pud;
 		pmd_t *pmd;
-		
-		pmd = pmd_alloc(NULL, dir, vaddr);
+
+		p4d = p4d_offset(dir, vaddr);
+		pud = pud_offset(p4d, vaddr);
+		pmd = pmd_alloc(NULL, pud, vaddr);
+
 		if (!pmd)
 			return -ENOMEM;
 		if (map_pmd_uncached(pmd, vaddr, end - vaddr, &paddr))
@@ -439,14 +444,14 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 	free_pages((unsigned long)__va(dma_handle), order);
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size);
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size);
 }
diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c
index 36434d4da381..749c4579db0d 100644
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -327,8 +327,7 @@ static int pdt_mainloop(void *unused)
 			    ((pde & PDT_ADDR_SINGLE_ERR) == 0))
 				memory_failure(pde >> PAGE_SHIFT, 0);
 			else
-				soft_offline_page(
-					pfn_to_page(pde >> PAGE_SHIFT), 0);
+				soft_offline_page(pde >> PAGE_SHIFT, 0);
 #else
 			pr_crit("PDT: memory error at 0x%lx ignored.\n"
 				"Rebuild kernel with CONFIG_MEMORY_FAILURE=y "
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index 676683641d00..e1a8fee3ad49 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -792,7 +792,7 @@ static int perf_write_image(uint64_t *memaddr)
 		return -1;
 	}
 
-	runway = ioremap_nocache(cpu_device->hpa.start, 4096);
+	runway = ioremap(cpu_device->hpa.start, 4096);
 	if (!runway) {
 		pr_err("perf_write_image: ioremap failed!\n");
 		return -ENOMEM;
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index ecc5c2771208..230a6422b99f 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -208,8 +208,8 @@ arch_initcall(parisc_idle_init);
  * Copy architecture-specific thread state
  */
 int
-copy_thread(unsigned long clone_flags, unsigned long usp,
-	    unsigned long kthread_arg, struct task_struct *p)
+copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+	    unsigned long kthread_arg, struct task_struct *p, unsigned long tls)
 {
 	struct pt_regs *cregs = &(p->thread.regs);
 	void *stack = task_stack_page(p);
@@ -254,9 +254,9 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 		cregs->ksp = (unsigned long)stack + THREAD_SZ_ALGN + FRAME_SIZE;
 		cregs->kpc = (unsigned long) &child_return;
 
-		/* Setup thread TLS area from the 4th parameter in clone */
+		/* Setup thread TLS area */
 		if (clone_flags & CLONE_SETTLS)
-			cregs->cr27 = cregs->gr[23];
+			cregs->cr27 = tls;
 	}
 
 	return 0;
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 9f6ff7bc06f9..f8c07dcbfb49 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -342,7 +342,7 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 	}
 
 	/* Do the secure computing check after ptrace. */
-	if (secure_computing(NULL) == -1)
+	if (secure_computing() == -1)
 		return -1;
 
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 53a21ce927de..e320bae501d3 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -151,10 +151,6 @@ void __init setup_arch(char **cmdline_p)
 	dma_ops_init();
 #endif
 
-#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;	/* we use do_take_over_console() later ! */
-#endif
-
 	clear_sched_clock_stable();
 }
 
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 285ff516150c..52a15f5cd130 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -433,3 +433,5 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 435	common	clone3				sys_clone3_wrapper
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 99cd24f2ea01..53e29d88f99c 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -19,6 +19,7 @@
 				*(.data..vm0.pte)
 
 #define CC_USING_PATCHABLE_FUNCTION_ENTRY
+#define RO_EXCEPTION_TABLE_ALIGN	8
 
 #include <asm-generic/vmlinux.lds.h>
 
@@ -109,7 +110,7 @@ SECTIONS
 	_sdata = .;
 
 	/* Architecturally we need to keep __gp below 0x1000000 and thus
-	 * in front of RO_DATA_SECTION() which stores lots of tracepoint
+	 * in front of RO_DATA() which stores lots of tracepoint
 	 * and ftrace symbols. */
 #ifdef CONFIG_64BIT
 	. = ALIGN(16);
@@ -127,11 +128,7 @@ SECTIONS
 	}
 #endif
 
-	RO_DATA_SECTION(8)
-
-	/* RO because of BUILDTIME_EXTABLE_SORT */
-	EXCEPTION_TABLE(8)
-	NOTES
+	RO_DATA(8)
 
 	/* unwind info */
 	.PARISC.unwind : {
@@ -149,7 +146,7 @@ SECTIONS
 	data_start = .;
 
 	/* Data */
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, PAGE_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, PAGE_SIZE)
 
 	/* PA-RISC locks requires 16-byte alignment */
 	. = ALIGN(16);
diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c
index 474cd241c150..e2d8b0a857ee 100644
--- a/arch/parisc/mm/fixmap.c
+++ b/arch/parisc/mm/fixmap.c
@@ -14,11 +14,13 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
 {
 	unsigned long vaddr = __fix_to_virt(idx);
 	pgd_t *pgd = pgd_offset_k(vaddr);
-	pmd_t *pmd = pmd_offset(pgd, vaddr);
+	p4d_t *p4d = p4d_offset(pgd, vaddr);
+	pud_t *pud = pud_offset(p4d, vaddr);
+	pmd_t *pmd = pmd_offset(pud, vaddr);
 	pte_t *pte;
 
 	if (pmd_none(*pmd))
-		pmd = pmd_alloc(NULL, pgd, vaddr);
+		pmd = pmd_alloc(NULL, pud, vaddr);
 
 	pte = pte_offset_kernel(pmd, vaddr);
 	if (pte_none(*pte))
@@ -32,7 +34,9 @@ void notrace clear_fixmap(enum fixed_addresses idx)
 {
 	unsigned long vaddr = __fix_to_virt(idx);
 	pgd_t *pgd = pgd_offset_k(vaddr);
-	pmd_t *pmd = pmd_offset(pgd, vaddr);
+	p4d_t *p4d = p4d_offset(pgd, vaddr);
+	pud_t *pud = pud_offset(p4d, vaddr);
+	pmd_t *pmd = pmd_offset(pud, vaddr);
 	pte_t *pte = pte_offset_kernel(pmd, vaddr);
 
 	if (WARN_ON(pte_none(*pte)))
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index d578809e55cf..0e1e212f1c96 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -49,6 +49,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte = NULL;
@@ -61,7 +62,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	addr &= HPAGE_MASK;
 
 	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
 	if (pud) {
 		pmd = pmd_alloc(mm, pud, addr);
 		if (pmd)
@@ -74,6 +76,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte = NULL;
@@ -82,11 +85,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 
 	pgd = pgd_offset(mm, addr);
 	if (!pgd_none(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (!pud_none(*pud)) {
-			pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd))
-				pte = pte_offset_map(pmd, addr);
+		p4d = p4d_offset(pgd, addr);
+		if (!p4d_none(*p4d)) {
+			pud = pud_offset(p4d, addr);
+			if (!pud_none(*pud)) {
+				pmd = pmd_offset(pud, addr);
+				if (!pmd_none(*pmd))
+					pte = pte_offset_map(pmd, addr);
+			}
 		}
 	}
 	return pte;
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index ddca8287d43b..354cf060b67f 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -401,7 +401,7 @@ static void __init map_pages(unsigned long start_vaddr,
 			pmd = (pmd_t *) __pa(pmd);
 		}
 
-		pgd_populate(NULL, pg_dir, __va(pmd));
+		pud_populate(NULL, (pud_t *)pg_dir, __va(pmd));
 #endif
 		pg_dir++;
 
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index f29f682352f0..6e7c005aa09b 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -25,7 +25,7 @@
  * have to convert them into an offset in a page-aligned mapping, but the
  * caller shouldn't need to know that small detail.
  */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
 {
 	void __iomem *addr;
 	struct vm_struct *area;
@@ -36,10 +36,8 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 	unsigned long end = phys_addr + size - 1;
 	/* Support EISA addresses */
 	if ((phys_addr >= 0x00080000 && end < 0x000fffff) ||
-	    (phys_addr >= 0x00500000 && end < 0x03bfffff)) {
+	    (phys_addr >= 0x00500000 && end < 0x03bfffff))
 		phys_addr |= F_EXTEND(0xfc000000);
-		flags |= _PAGE_NO_CACHE;
-	}
 #endif
 
 	/* Don't allow wraparound or zero size */
@@ -65,7 +63,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 	}
 
 	pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY |
-			  _PAGE_ACCESSED | flags);
+			  _PAGE_ACCESSED | _PAGE_NO_CACHE);
 
 	/*
 	 * Mappings have to be page-aligned
@@ -90,7 +88,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 
 	return (void __iomem *) (offset + (char __iomem *)addr);
 }
-EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(ioremap);
 
 void iounmap(const volatile void __iomem *io_addr)
 {
diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild
index 51e6908323ad..5e2f9eaa3ee7 100644
--- a/arch/powerpc/Kbuild
+++ b/arch/powerpc/Kbuild
@@ -14,4 +14,5 @@ obj-$(CONFIG_XMON) += xmon/
 obj-$(CONFIG_KVM)  += kvm/
 
 obj-$(CONFIG_PERF_EVENTS) += perf/
+obj-$(CONFIG_KEXEC_CORE)  += kexec/
 obj-$(CONFIG_KEXEC_FILE)  += purgatory/
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3e56c9c2f16e..c150a9d49343 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -106,7 +106,7 @@ config LOCKDEP_SUPPORT
 config GENERIC_LOCKBREAK
 	bool
 	default y
-	depends on SMP && PREEMPT
+	depends on SMP && PREEMPTION
 
 config GENERIC_HWEIGHT
 	bool
@@ -149,7 +149,7 @@ config PPC
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_WEAK_RELEASE_ACQUIRE
 	select BINFMT_ELF
-	select BUILDTIME_EXTABLE_SORT
+	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS
 	select DCACHE_WORD_ACCESS		if PPC64 && CPU_LITTLE_ENDIAN
 	select DYNAMIC_FTRACE			if FUNCTION_TRACER
@@ -161,6 +161,7 @@ config PPC
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_VULNERABILITIES	if PPC_BARRIER_NOSPEC
+	select GENERIC_EARLY_IOREMAP
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_PCI_IOMAP		if PCI
@@ -237,6 +238,7 @@ config PPC
 	select NEED_DMA_MAP_STATE		if PPC64 || NOT_COHERENT_CACHE
 	select NEED_SG_DMA_LENGTH
 	select OF
+	select OF_DMA_DEFAULT_COHERENT		if !NOT_COHERENT_CACHE
 	select OF_EARLY_FLATTREE
 	select OLD_SIGACTION			if PPC32
 	select OLD_SIGSUSPEND
@@ -451,6 +453,19 @@ config PPC_TRANSACTIONAL_MEM
 	help
 	  Support user-mode Transactional Memory on POWERPC.
 
+config PPC_UV
+	bool "Ultravisor support"
+	depends on KVM_BOOK3S_HV_POSSIBLE
+	depends on DEVICE_PRIVATE
+	default n
+	help
+	  This option paravirtualizes the kernel to run in POWER platforms that
+	  supports the Protected Execution Facility (PEF). On such platforms,
+	  the ultravisor firmware runs at a privilege level above the
+	  hypervisor.
+
+	  If unsure, say "N".
+
 config LD_HEAD_STUB_CATCH
 	bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT
 	depends on PPC64
@@ -551,6 +566,17 @@ config RELOCATABLE
 	  setting can still be useful to bootwrappers that need to know the
 	  load address of the kernel (eg. u-boot/mkimage).
 
+config RANDOMIZE_BASE
+	bool "Randomize the address of the kernel image"
+	depends on (FSL_BOOKE && FLATMEM && PPC32)
+	depends on RELOCATABLE
+	help
+	  Randomizes the virtual address at which the kernel image is
+	  loaded, as a security feature that deters exploit attempts
+	  relying on knowledge of the location of kernel internals.
+
+	  If unsure, say Y.
+
 config RELOCATABLE_TEST
 	bool "Test relocatable kernel"
 	depends on (PPC64 && RELOCATABLE)
@@ -874,15 +900,33 @@ config CMDLINE
 	  some command-line options at build time by entering them here.  In
 	  most cases you will need to specify the root device here.
 
+choice
+	prompt "Kernel command line type" if CMDLINE != ""
+	default CMDLINE_FROM_BOOTLOADER
+
+config CMDLINE_FROM_BOOTLOADER
+	bool "Use bootloader kernel arguments if available"
+	help
+	  Uses the command-line options passed by the boot loader. If
+	  the boot loader doesn't provide any, the default kernel command
+	  string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+	bool "Extend bootloader kernel arguments"
+	help
+	  The command-line arguments provided by the boot loader will be
+	  appended to the default kernel command string.
+
 config CMDLINE_FORCE
 	bool "Always use the default kernel command string"
-	depends on CMDLINE_BOOL
 	help
 	  Always use the default kernel command string, even if the boot
 	  loader passes other arguments to the kernel.
 	  This is useful if you cannot or don't want to change the
 	  command-line options your boot loader passes to the kernel.
 
+endchoice
+
 config EXTRA_TARGETS
 	string "Additional default image types"
 	help
@@ -934,6 +978,28 @@ config PPC_MEM_KEYS
 
 	  If unsure, say y.
 
+config PPC_SECURE_BOOT
+	prompt "Enable secure boot support"
+	bool
+	depends on PPC_POWERNV
+	depends on IMA_ARCH_POLICY
+	help
+	  Systems with firmware secure boot enabled need to define security
+	  policies to extend secure boot to the OS. This config allows a user
+	  to enable OS secure boot on systems that have firmware support for
+	  it. If in doubt say N.
+
+config PPC_SECVAR_SYSFS
+	bool "Enable sysfs interface for POWER secure variables"
+	default y
+	depends on PPC_SECURE_BOOT
+	depends on SYSFS
+	help
+	  POWER secure variables are managed and controlled by firmware.
+	  These variables are exposed to userspace via sysfs to enable
+	  read/write operations on these variables. Say Y if you have
+	  secure boot enabled and want to expose variables to userspace.
+
 endmenu
 
 config ISA_DMA_API
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index c59920920ddc..4e1d39847462 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -122,8 +122,8 @@ config XMON_DEFAULT_RO_MODE
 	depends on XMON
 	default y
 	help
-          Operate xmon in read-only mode. The cmdline options 'xmon=rw' and
-          'xmon=ro' override this default.
+	  Operate xmon in read-only mode. The cmdline options 'xmon=rw' and
+	  'xmon=ro' override this default.
 
 config DEBUGGER
 	bool
@@ -222,7 +222,7 @@ config PPC_EARLY_DEBUG_44x
 	help
 	  Select this to enable early debugging for IBM 44x chips via the
 	  inbuilt serial port.  If you enable this, ensure you set
-          PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board.
+	  PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board.
 
 config PPC_EARLY_DEBUG_40x
 	bool "Early serial debugging for IBM/AMCC 40x CPUs"
@@ -325,7 +325,7 @@ config PPC_EARLY_DEBUG_44x_PHYSLOW
 	default "0x40000200"
 	help
 	  You probably want 0x40000200 for ebony boards and
-          0x40000300 for taishan
+	  0x40000300 for taishan
 
 config PPC_EARLY_DEBUG_44x_PHYSHIGH
 	hex "EPRN of early debug UART physical address"
@@ -359,9 +359,9 @@ config FAIL_IOMMU
 	  If you are unsure, say N.
 
 config PPC_PTDUMP
-        bool "Export kernel pagetable layout to userspace via debugfs"
-        depends on DEBUG_KERNEL && DEBUG_FS
-        help
+	bool "Export kernel pagetable layout to userspace via debugfs"
+	depends on DEBUG_KERNEL && DEBUG_FS
+	help
 	  This option exports the state of the kernel pagetables to a
 	  debugfs file. This is only useful for kernel developers who are
 	  working in architecture specific areas of the kernel - probably
@@ -390,8 +390,8 @@ config PPC_DEBUG_WX
 
 config PPC_FAST_ENDIAN_SWITCH
 	bool "Deprecated fast endian-switch syscall"
-        depends on DEBUG_KERNEL && PPC_BOOK3S_64
-        help
+	depends on DEBUG_KERNEL && PPC_BOOK3S_64
+	help
 	  If you're unsure what this is, say N.
 
 config KASAN_SHADOW_OFFSET
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 83522c9fc7b6..f35730548e42 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -91,11 +91,13 @@ MULTIPLEWORD	:= -mmultiple
 endif
 
 ifdef CONFIG_PPC64
+ifndef CONFIG_CC_IS_CLANG
 cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
 cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mcall-aixdesc)
 aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
 aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mabi=elfv2
 endif
+endif
 
 ifndef CONFIG_CC_IS_CLANG
   cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mno-strict-align
@@ -141,6 +143,7 @@ endif
 endif
 
 CFLAGS-$(CONFIG_PPC64)	:= $(call cc-option,-mtraceback=no)
+ifndef CONFIG_CC_IS_CLANG
 ifdef CONFIG_CPU_LITTLE_ENDIAN
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
 AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2)
@@ -149,6 +152,7 @@ CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcall-aixdesc)
 AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
 endif
+endif
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mno-pointers-to-nested-functions)
 
@@ -330,32 +334,32 @@ powernv_be_defconfig:
 
 PHONY += mpc85xx_defconfig
 mpc85xx_defconfig:
-	$(call merge_into_defconfig,mpc85xx_basic_defconfig,\
+	$(call merge_into_defconfig,mpc85xx_base.config,\
 		85xx-32bit 85xx-hw fsl-emb-nonhw)
 
 PHONY += mpc85xx_smp_defconfig
 mpc85xx_smp_defconfig:
-	$(call merge_into_defconfig,mpc85xx_basic_defconfig,\
+	$(call merge_into_defconfig,mpc85xx_base.config,\
 		85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw)
 
 PHONY += corenet32_smp_defconfig
 corenet32_smp_defconfig:
-	$(call merge_into_defconfig,corenet_basic_defconfig,\
+	$(call merge_into_defconfig,corenet_base.config,\
 		85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw dpaa)
 
 PHONY += corenet64_smp_defconfig
 corenet64_smp_defconfig:
-	$(call merge_into_defconfig,corenet_basic_defconfig,\
+	$(call merge_into_defconfig,corenet_base.config,\
 		85xx-64bit 85xx-smp altivec 85xx-hw fsl-emb-nonhw dpaa)
 
 PHONY += mpc86xx_defconfig
 mpc86xx_defconfig:
-	$(call merge_into_defconfig,mpc86xx_basic_defconfig,\
+	$(call merge_into_defconfig,mpc86xx_base.config,\
 		86xx-hw fsl-emb-nonhw)
 
 PHONY += mpc86xx_smp_defconfig
 mpc86xx_smp_defconfig:
-	$(call merge_into_defconfig,mpc86xx_basic_defconfig,\
+	$(call merge_into_defconfig,mpc86xx_base.config,\
 		86xx-smp 86xx-hw fsl-emb-nonhw)
 
 PHONY += ppc32_allmodconfig
diff --git a/arch/powerpc/boot/dts/fsl/kmcent2.dts b/arch/powerpc/boot/dts/fsl/kmcent2.dts
index 48b7f9797124..8e7f0828af29 100644
--- a/arch/powerpc/boot/dts/fsl/kmcent2.dts
+++ b/arch/powerpc/boot/dts/fsl/kmcent2.dts
@@ -210,13 +210,19 @@
 
 		fman@400000 {
 			ethernet@e0000 {
-				fixed-link = <0 1 1000 0 0>;
-				phy-connection-type = "sgmii";
+				phy-mode = "sgmii";
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
 			};
 
 			ethernet@e2000 {
-				fixed-link = <1 1 1000 0 0>;
-				phy-connection-type = "sgmii";
+				phy-mode = "sgmii";
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
 			};
 
 			ethernet@e4000 {
@@ -229,7 +235,7 @@
 
 			ethernet@e8000 {
 				phy-handle = <&front_phy>;
-				phy-connection-type = "rgmii";
+				phy-mode = "rgmii-id";
 			};
 
 			mdio0: mdio@fc000 {
@@ -258,14 +264,50 @@
 
 	pci1: pcie@ffe250000 {
 		status = "disabled";
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+			  0x01000000 0 0 0xf 0xf8010000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
 	};
 
 	pci2: pcie@ffe260000 {
 		status = "disabled";
+		reg = <0xf 0xfe260000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
 	};
 
 	pci3: pcie@ffe270000 {
 		status = "disabled";
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
 	};
 
 	qe: qe@ffe140000 {
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi
index e1a961f05dcd..baa0c503e741 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi
@@ -63,6 +63,7 @@ fman@400000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe1000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy0: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi
index c288f3c6c637..93095600e808 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi
@@ -60,6 +60,7 @@ fman@400000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xf1000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy6: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi
index 94f3e7175012..ff4bd38f0645 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi
@@ -63,6 +63,7 @@ fman@400000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe3000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy1: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi
index 94a76982d214..1fa38ed6f59e 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi
@@ -60,6 +60,7 @@ fman@400000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xf3000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy7: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi
index b5ff5f71c6b8..a8cc9780c0c4 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe1000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy0: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi
index ee44182c6348..8b8bd70c9382 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe3000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy1: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi
index f05f0d775039..619c880b54d8 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe5000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy2: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi
index a9114ec51075..d7ebb73a400d 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe7000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy3: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi
index 44dd00ac7367..b151d696a069 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe9000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy4: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi
index 5b1b84b58602..adc0ae0013a3 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xeb000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy5: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi
index 0e1daaef9e74..435047e0e250 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi
@@ -60,6 +60,7 @@ fman@500000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xf1000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy14: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi
index 68c5ef779266..c098657cca0a 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi
@@ -60,6 +60,7 @@ fman@500000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xf3000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy15: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi
index 605363cc1117..9d06824815f3 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe1000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy8: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi
index 1955dfa13634..70e947730c4b 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe3000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy9: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi
index 2c1476454ee0..ad96e6529595 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe5000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy10: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi
index b8b541ff5fb0..034bc4b71f7a 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe7000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy11: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi
index 4b2cfddd1b15..93ca23d82b39 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xe9000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy12: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi
index 0a52ddf7cc17..23b3117a2fd2 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xeb000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
 
 		pcsphy13: ethernet-phy@0 {
 			reg = <0x0>;
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
index 2abc8e83b95e..9757d4f6331e 100644
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -6,6 +6,8 @@
 #include <string.h>
 
 #define INT_MAX			((int)(~0U>>1))
+#define UINT32_MAX		((u32)~0U)
+#define INT32_MAX		((s32)(UINT32_MAX >> 1))
 
 #include "of.h"
 
diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig
index 5a75e4f14273..db93c117be36 100644
--- a/arch/powerpc/configs/40x/acadia_defconfig
+++ b/arch/powerpc/configs/40x/acadia_defconfig
@@ -18,9 +18,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig
index e2691c5db766..a3854cf65f8d 100644
--- a/arch/powerpc/configs/40x/ep405_defconfig
+++ b/arch/powerpc/configs/40x/ep405_defconfig
@@ -17,9 +17,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig
index 949989ef2322..edc22464dfb5 100644
--- a/arch/powerpc/configs/40x/kilauea_defconfig
+++ b/arch/powerpc/configs/40x/kilauea_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig
index 4347a87088dc..579fa846839c 100644
--- a/arch/powerpc/configs/40x/klondike_defconfig
+++ b/arch/powerpc/configs/40x/klondike_defconfig
@@ -4,7 +4,6 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig
index 90b759bbf426..188789b9aa4c 100644
--- a/arch/powerpc/configs/40x/makalu_defconfig
+++ b/arch/powerpc/configs/40x/makalu_defconfig
@@ -17,9 +17,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig
index 881c300c011d..5bf6af7ef093 100644
--- a/arch/powerpc/configs/40x/obs600_defconfig
+++ b/arch/powerpc/configs/40x/obs600_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig
index 0ed46704b9fa..9eaaf1a1d2c6 100644
--- a/arch/powerpc/configs/40x/walnut_defconfig
+++ b/arch/powerpc/configs/40x/walnut_defconfig
@@ -15,9 +15,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
index 2fa553ebfdc9..f0c8a07cc274 100644
--- a/arch/powerpc/configs/44x/akebono_defconfig
+++ b/arch/powerpc/configs/44x/akebono_defconfig
@@ -29,9 +29,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig
index 5a1b9ee18075..82c6f49b8dcb 100644
--- a/arch/powerpc/configs/44x/arches_defconfig
+++ b/arch/powerpc/configs/44x/arches_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig
index 22e1ef5272ab..679213214a75 100644
--- a/arch/powerpc/configs/44x/bamboo_defconfig
+++ b/arch/powerpc/configs/44x/bamboo_defconfig
@@ -18,9 +18,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig
index 86f34ea4173a..ccc14eb7a2f1 100644
--- a/arch/powerpc/configs/44x/canyonlands_defconfig
+++ b/arch/powerpc/configs/44x/canyonlands_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig
index ce3ec5a2cd15..be76e066df01 100644
--- a/arch/powerpc/configs/44x/currituck_defconfig
+++ b/arch/powerpc/configs/44x/currituck_defconfig
@@ -27,9 +27,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig
index f67447c92e6f..93d2a4e64af9 100644
--- a/arch/powerpc/configs/44x/ebony_defconfig
+++ b/arch/powerpc/configs/44x/ebony_defconfig
@@ -16,9 +16,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig
index 5dbd83a1c11b..1abaa63e067f 100644
--- a/arch/powerpc/configs/44x/eiger_defconfig
+++ b/arch/powerpc/configs/44x/eiger_defconfig
@@ -21,9 +21,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
index e49114f0e526..e67fc041ca3e 100644
--- a/arch/powerpc/configs/44x/fsp2_defconfig
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -39,9 +39,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_VLAN_8021Q=m
 CONFIG_DEVTMPFS=y
diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig
index fa5378af44f9..7d7ff84c8200 100644
--- a/arch/powerpc/configs/44x/icon_defconfig
+++ b/arch/powerpc/configs/44x/icon_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig
index aae879c21239..fb5c73a29bf4 100644
--- a/arch/powerpc/configs/44x/iss476-smp_defconfig
+++ b/arch/powerpc/configs/44x/iss476-smp_defconfig
@@ -29,9 +29,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig
index 56eddca998c6..c6dc1445fc04 100644
--- a/arch/powerpc/configs/44x/katmai_defconfig
+++ b/arch/powerpc/configs/44x/katmai_defconfig
@@ -18,9 +18,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig
index 369bfd2e451d..c83ad03182df 100644
--- a/arch/powerpc/configs/44x/rainier_defconfig
+++ b/arch/powerpc/configs/44x/rainier_defconfig
@@ -19,9 +19,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig
index 8be95f6fe3a7..640fe1d5af28 100644
--- a/arch/powerpc/configs/44x/redwood_defconfig
+++ b/arch/powerpc/configs/44x/redwood_defconfig
@@ -21,9 +21,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig
index 974a4f038cda..ed02f12dbd54 100644
--- a/arch/powerpc/configs/44x/sam440ep_defconfig
+++ b/arch/powerpc/configs/44x/sam440ep_defconfig
@@ -23,9 +23,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig
index 10e517b69fa4..2c0973db8837 100644
--- a/arch/powerpc/configs/44x/sequoia_defconfig
+++ b/arch/powerpc/configs/44x/sequoia_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig
index cd08f3ddd609..a2d355ca62b2 100644
--- a/arch/powerpc/configs/44x/taishan_defconfig
+++ b/arch/powerpc/configs/44x/taishan_defconfig
@@ -18,9 +18,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig
index 303600ff1fdb..fdb11daeb688 100644
--- a/arch/powerpc/configs/52xx/pcm030_defconfig
+++ b/arch/powerpc/configs/52xx/pcm030_defconfig
@@ -31,9 +31,6 @@ CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/powerpc/configs/83xx/kmeter1_defconfig b/arch/powerpc/configs/83xx/kmeter1_defconfig
index d21b5cb365f2..648c6b3dccf9 100644
--- a/arch/powerpc/configs/83xx/kmeter1_defconfig
+++ b/arch/powerpc/configs/83xx/kmeter1_defconfig
@@ -25,9 +25,6 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_TIPC=y
 CONFIG_BRIDGE=m
diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
index dad53ef86b49..cbcae2a927e9 100644
--- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
@@ -22,9 +22,6 @@ CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
index 920f37316fdb..f29c166998af 100644
--- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig
+++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
@@ -60,7 +60,6 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-# CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_INET6_AH=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_TUNNEL=m
diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig
index f7a803ab2285..510f7fd1f6a3 100644
--- a/arch/powerpc/configs/adder875_defconfig
+++ b/arch/powerpc/configs/adder875_defconfig
@@ -22,9 +22,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig
index cf94d28d0e31..f6d140f2d922 100644
--- a/arch/powerpc/configs/amigaone_defconfig
+++ b/arch/powerpc/configs/amigaone_defconfig
@@ -26,9 +26,6 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_ADVANCED is not set
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 2dd1b58a18ae..42fbc70cec33 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -51,11 +51,9 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
-# CONFIG_INET6_XFRM_MODE_BEET is not set
 # CONFIG_IPV6_SIT is not set
 CONFIG_IPV6_TUNNEL=m
 CONFIG_NETFILTER=y
diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig
index 9ff493dd8439..502a75d49789 100644
--- a/arch/powerpc/configs/chrp32_defconfig
+++ b/arch/powerpc/configs/chrp32_defconfig
@@ -27,9 +27,6 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_ADVANCED is not set
diff --git a/arch/powerpc/configs/corenet_basic_defconfig b/arch/powerpc/configs/corenet_base.config
index b568d465e59e..b568d465e59e 100644
--- a/arch/powerpc/configs/corenet_basic_defconfig
+++ b/arch/powerpc/configs/corenet_base.config
diff --git a/arch/powerpc/configs/debug.config b/arch/powerpc/configs/debug.config
new file mode 100644
index 000000000000..a14ae1f20d60
--- /dev/null
+++ b/arch/powerpc/configs/debug.config
@@ -0,0 +1 @@
+CONFIG_SCOM_DEBUGFS=y
diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig
index b20bd0cf3543..9c1bf60f1e19 100644
--- a/arch/powerpc/configs/ep88xc_defconfig
+++ b/arch/powerpc/configs/ep88xc_defconfig
@@ -24,9 +24,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index 85e73c3bd859..24c0e0ea5aeb 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -29,9 +29,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
index 6203c1093a3a..1f3a045ab081 100644
--- a/arch/powerpc/configs/mpc512x_defconfig
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -25,9 +25,6 @@ CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_PNP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 CONFIG_CAN=y
diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig
index 6f87a5c74960..83d801307178 100644
--- a/arch/powerpc/configs/mpc5200_defconfig
+++ b/arch/powerpc/configs/mpc5200_defconfig
@@ -15,7 +15,6 @@ CONFIG_PPC_MEDIA5200=y
 CONFIG_PPC_MPC5200_BUGFIX=y
 CONFIG_PPC_MPC5200_LPBFIFO=m
 # CONFIG_PPC_PMAC is not set
-CONFIG_SIMPLE_GPIO=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/mpc85xx_basic_defconfig b/arch/powerpc/configs/mpc85xx_base.config
index b1593fe6f70b..b1593fe6f70b 100644
--- a/arch/powerpc/configs/mpc85xx_basic_defconfig
+++ b/arch/powerpc/configs/mpc85xx_base.config
diff --git a/arch/powerpc/configs/mpc86xx_basic_defconfig b/arch/powerpc/configs/mpc86xx_base.config
index 67bd1fa036ee..67bd1fa036ee 100644
--- a/arch/powerpc/configs/mpc86xx_basic_defconfig
+++ b/arch/powerpc/configs/mpc86xx_base.config
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index 285d506c5a76..0327a329316f 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -23,9 +23,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 4e6e95f92646..f492e7d35925 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -38,8 +38,6 @@ CONFIG_IP_MULTICAST=y
 CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=y
 CONFIG_INET_ESP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 6658cceb928c..32841456a573 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -83,9 +83,6 @@ CONFIG_INET_IPCOMP=m
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
-CONFIG_INET6_XFRM_MODE_TRANSPORT=m
-CONFIG_INET6_XFRM_MODE_TUNNEL=m
-CONFIG_INET6_XFRM_MODE_BEET=m
 CONFIG_IPV6_SIT=m
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_ADVANCED is not set
diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig
index 67952819593e..a41eedfe0a5f 100644
--- a/arch/powerpc/configs/ppc44x_defconfig
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -32,9 +32,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_BRIDGE=m
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 9dca4cffa623..7e28919041cf 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -109,9 +109,6 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
 CONFIG_INET_DIAG=m
 CONFIG_TCP_CONG_ADVANCED=y
 CONFIG_TCP_CONG_HSTCP=m
@@ -129,7 +126,6 @@ CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_MIP6=m
-CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
 CONFIG_IPV6_TUNNEL=m
 CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_IPV6_SUBTREES=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 314c63939816..4db51719342a 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -47,9 +47,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 CONFIG_BT=m
 CONFIG_BT_RFCOMM=m
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 1253482a67c0..069f67f12731 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -46,6 +46,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_IDLE=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC=y
+CONFIG_PRESERVE_FA_DUMP=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_NUMA=y
 # CONFIG_COMPACTION is not set
@@ -63,9 +64,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_NET_IPIP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_DNS_RESOLVER=y
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig
index 6c39c52b8e4a..29b19ec7e5d7 100644
--- a/arch/powerpc/configs/storcenter_defconfig
+++ b/arch/powerpc/configs/storcenter_defconfig
@@ -22,9 +22,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig
index 7493f36dd6e9..ffed2b4256d6 100644
--- a/arch/powerpc/configs/tqm8xx_defconfig
+++ b/arch/powerpc/configs/tqm8xx_defconfig
@@ -27,9 +27,6 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 5a04448ad6b5..379c171f3ddd 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -29,9 +29,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 CONFIG_BT=y
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 3a4ca7d32477..c2b23b69d7b1 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -17,7 +17,10 @@
 #include <asm/byteorder.h>
 #include <asm/switch_to.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
+#include <crypto/gf128mul.h>
+#include <crypto/scatterwalk.h>
 
 /*
  * MAX_BYTES defines the number of bytes that are allowed to be processed
@@ -91,13 +94,6 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 {
 	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	if (key_len != AES_KEYSIZE_128 &&
-	    key_len != AES_KEYSIZE_192 &&
-	    key_len != AES_KEYSIZE_256) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
 	switch (key_len) {
 	case AES_KEYSIZE_128:
 		ctx->rounds = 4;
@@ -111,6 +107,8 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 		ctx->rounds = 6;
 		ppc_expand_key_256(ctx->key_enc, in_key);
 		break;
+	default:
+		return -EINVAL;
 	}
 
 	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
@@ -118,25 +116,24 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
-static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *in_key, unsigned int key_len)
+{
+	return ppc_aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
+static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 		   unsigned int key_len)
 {
-	struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err;
 
-	err = xts_check_key(tfm, in_key, key_len);
+	err = xts_verify_key(tfm, in_key, key_len);
 	if (err)
 		return err;
 
 	key_len >>= 1;
 
-	if (key_len != AES_KEYSIZE_128 &&
-	    key_len != AES_KEYSIZE_192 &&
-	    key_len != AES_KEYSIZE_256) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
 	switch (key_len) {
 	case AES_KEYSIZE_128:
 		ctx->rounds = 4;
@@ -153,6 +150,8 @@ static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 		ppc_expand_key_256(ctx->key_enc, in_key);
 		ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
 		break;
+	default:
+		return -EINVAL;
 	}
 
 	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
@@ -178,208 +177,229 @@ static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 	spe_end();
 }
 
-static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_ecb_crypt(struct skcipher_request *req, bool enc)
 {
-	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		nbytes = round_down(nbytes, AES_BLOCK_SIZE);
 
 		spe_begin();
-		ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_enc, ctx->rounds, nbytes);
+		if (enc)
+			ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_enc, ctx->rounds, nbytes);
+		else
+			ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_dec, ctx->rounds, nbytes);
 		spe_end();
 
-		err = blkcipher_walk_done(desc, &walk, ubytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_ecb_encrypt(struct skcipher_request *req)
 {
-	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
-	int err;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
-
-		spe_begin();
-		ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_dec, ctx->rounds, nbytes);
-		spe_end();
-
-		err = blkcipher_walk_done(desc, &walk, ubytes);
-	}
+	return ppc_ecb_crypt(req, true);
+}
 
-	return err;
+static int ppc_ecb_decrypt(struct skcipher_request *req)
+{
+	return ppc_ecb_crypt(req, false);
 }
 
-static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_cbc_crypt(struct skcipher_request *req, bool enc)
 {
-	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		nbytes = round_down(nbytes, AES_BLOCK_SIZE);
 
 		spe_begin();
-		ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_enc, ctx->rounds, nbytes, walk.iv);
+		if (enc)
+			ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_enc, ctx->rounds, nbytes,
+					walk.iv);
+		else
+			ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_dec, ctx->rounds, nbytes,
+					walk.iv);
 		spe_end();
 
-		err = blkcipher_walk_done(desc, &walk, ubytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_cbc_encrypt(struct skcipher_request *req)
 {
-	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
-	int err;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
-
-		spe_begin();
-		ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_dec, ctx->rounds, nbytes, walk.iv);
-		spe_end();
-
-		err = blkcipher_walk_done(desc, &walk, ubytes);
-	}
+	return ppc_cbc_crypt(req, true);
+}
 
-	return err;
+static int ppc_cbc_decrypt(struct skcipher_request *req)
+{
+	return ppc_cbc_crypt(req, false);
 }
 
-static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			 struct scatterlist *src, unsigned int nbytes)
+static int ppc_ctr_crypt(struct skcipher_request *req)
 {
-	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int pbytes, ubytes;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((pbytes = walk.nbytes)) {
-		pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
-		pbytes = pbytes == nbytes ?
-			 nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
-		ubytes = walk.nbytes - pbytes;
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
 
 		spe_begin();
 		ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
-			      ctx->key_enc, ctx->rounds, pbytes , walk.iv);
+			      ctx->key_enc, ctx->rounds, nbytes, walk.iv);
 		spe_end();
 
-		nbytes -= pbytes;
-		err = blkcipher_walk_done(desc, &walk, ubytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_xts_crypt(struct skcipher_request *req, bool enc)
 {
-	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	u32 *twk;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 	twk = ctx->key_twk;
 
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		nbytes = round_down(nbytes, AES_BLOCK_SIZE);
 
 		spe_begin();
-		ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
+		if (enc)
+			ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_enc, ctx->rounds, nbytes,
+					walk.iv, twk);
+		else
+			ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_dec, ctx->rounds, nbytes,
+					walk.iv, twk);
 		spe_end();
 
 		twk = NULL;
-		err = blkcipher_walk_done(desc, &walk, ubytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_xts_encrypt(struct skcipher_request *req)
 {
-	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+	struct skcipher_request subreq;
+	u8 b[2][AES_BLOCK_SIZE];
 	int err;
-	u32 *twk;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	twk = ctx->key_twk;
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
 
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
+	if (tail) {
+		subreq = *req;
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   req->cryptlen - tail, req->iv);
+		req = &subreq;
+	}
 
-		spe_begin();
-		ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
-		spe_end();
+	err = ppc_xts_crypt(req, true);
+	if (err || !tail)
+		return err;
 
-		twk = NULL;
-		err = blkcipher_walk_done(desc, &walk, ubytes);
+	scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE, 0);
+	memcpy(b[1], b[0], tail);
+	scatterwalk_map_and_copy(b[0], req->src, offset + AES_BLOCK_SIZE, tail, 0);
+
+	spe_begin();
+	ppc_encrypt_xts(b[0], b[0], ctx->key_enc, ctx->rounds, AES_BLOCK_SIZE,
+			req->iv, NULL);
+	spe_end();
+
+	scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+	return 0;
+}
+
+static int ppc_xts_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+	struct skcipher_request subreq;
+	u8 b[3][AES_BLOCK_SIZE];
+	le128 twk;
+	int err;
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (tail) {
+		subreq = *req;
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   offset, req->iv);
+		req = &subreq;
 	}
 
-	return err;
+	err = ppc_xts_crypt(req, false);
+	if (err || !tail)
+		return err;
+
+	scatterwalk_map_and_copy(b[1], req->src, offset, AES_BLOCK_SIZE + tail, 0);
+
+	spe_begin();
+	if (!offset)
+		ppc_encrypt_ecb(req->iv, req->iv, ctx->key_twk, ctx->rounds,
+				AES_BLOCK_SIZE);
+
+	gf128mul_x_ble(&twk, (le128 *)req->iv);
+
+	ppc_decrypt_xts(b[1], b[1], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+			(u8 *)&twk, NULL);
+	memcpy(b[0], b[2], tail);
+	memcpy(b[0] + tail, b[1] + tail, AES_BLOCK_SIZE - tail);
+	ppc_decrypt_xts(b[0], b[0], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+			req->iv, NULL);
+	spe_end();
+
+	scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+	return 0;
 }
 
 /*
@@ -388,9 +408,9 @@ static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  * This improves IPsec thoughput by another few percent. Additionally we assume
  * that AES context is always aligned to at least 8 bytes because it is created
  * with kmalloc() in the crypto infrastructure
- *
  */
-static struct crypto_alg aes_algs[] = { {
+
+static struct crypto_alg aes_cipher_alg = {
 	.cra_name		=	"aes",
 	.cra_driver_name	=	"aes-ppc-spe",
 	.cra_priority		=	300,
@@ -408,96 +428,84 @@ static struct crypto_alg aes_algs[] = { {
 			.cia_decrypt		=	ppc_aes_decrypt
 		}
 	}
-}, {
-	.cra_name		=	"ecb(aes)",
-	.cra_driver_name	=	"ecb-ppc-spe",
-	.cra_priority		=	300,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
-	.cra_alignmask		=	0,
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ppc_aes_setkey,
-			.encrypt		=	ppc_ecb_encrypt,
-			.decrypt		=	ppc_ecb_decrypt,
-		}
-	}
-}, {
-	.cra_name		=	"cbc(aes)",
-	.cra_driver_name	=	"cbc-ppc-spe",
-	.cra_priority		=	300,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
-	.cra_alignmask		=	0,
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ppc_aes_setkey,
-			.encrypt		=	ppc_cbc_encrypt,
-			.decrypt		=	ppc_cbc_decrypt,
-		}
-	}
-}, {
-	.cra_name		=	"ctr(aes)",
-	.cra_driver_name	=	"ctr-ppc-spe",
-	.cra_priority		=	300,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
-	.cra_alignmask		=	0,
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ppc_aes_setkey,
-			.encrypt		=	ppc_ctr_crypt,
-			.decrypt		=	ppc_ctr_crypt,
-		}
-	}
-}, {
-	.cra_name		=	"xts(aes)",
-	.cra_driver_name	=	"xts-ppc-spe",
-	.cra_priority		=	300,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct ppc_xts_ctx),
-	.cra_alignmask		=	0,
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE * 2,
-			.max_keysize		=	AES_MAX_KEY_SIZE * 2,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ppc_xts_setkey,
-			.encrypt		=	ppc_xts_encrypt,
-			.decrypt		=	ppc_xts_decrypt,
-		}
+};
+
+static struct skcipher_alg aes_skcipher_algs[] = {
+	{
+		.base.cra_name		=	"ecb(aes)",
+		.base.cra_driver_name	=	"ecb-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	AES_BLOCK_SIZE,
+		.base.cra_ctxsize	=	sizeof(struct ppc_aes_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE,
+		.max_keysize		=	AES_MAX_KEY_SIZE,
+		.setkey			=	ppc_aes_setkey_skcipher,
+		.encrypt		=	ppc_ecb_encrypt,
+		.decrypt		=	ppc_ecb_decrypt,
+	}, {
+		.base.cra_name		=	"cbc(aes)",
+		.base.cra_driver_name	=	"cbc-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	AES_BLOCK_SIZE,
+		.base.cra_ctxsize	=	sizeof(struct ppc_aes_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE,
+		.max_keysize		=	AES_MAX_KEY_SIZE,
+		.ivsize			=	AES_BLOCK_SIZE,
+		.setkey			=	ppc_aes_setkey_skcipher,
+		.encrypt		=	ppc_cbc_encrypt,
+		.decrypt		=	ppc_cbc_decrypt,
+	}, {
+		.base.cra_name		=	"ctr(aes)",
+		.base.cra_driver_name	=	"ctr-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	1,
+		.base.cra_ctxsize	=	sizeof(struct ppc_aes_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE,
+		.max_keysize		=	AES_MAX_KEY_SIZE,
+		.ivsize			=	AES_BLOCK_SIZE,
+		.setkey			=	ppc_aes_setkey_skcipher,
+		.encrypt		=	ppc_ctr_crypt,
+		.decrypt		=	ppc_ctr_crypt,
+		.chunksize		=	AES_BLOCK_SIZE,
+	}, {
+		.base.cra_name		=	"xts(aes)",
+		.base.cra_driver_name	=	"xts-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	AES_BLOCK_SIZE,
+		.base.cra_ctxsize	=	sizeof(struct ppc_xts_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE * 2,
+		.max_keysize		=	AES_MAX_KEY_SIZE * 2,
+		.ivsize			=	AES_BLOCK_SIZE,
+		.setkey			=	ppc_xts_setkey,
+		.encrypt		=	ppc_xts_encrypt,
+		.decrypt		=	ppc_xts_decrypt,
 	}
-} };
+};
 
 static int __init ppc_aes_mod_init(void)
 {
-	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+	int err;
+
+	err = crypto_register_alg(&aes_cipher_alg);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(aes_skcipher_algs,
+					ARRAY_SIZE(aes_skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&aes_cipher_alg);
+	return err;
 }
 
 static void __exit ppc_aes_mod_fini(void)
 {
-	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+	crypto_unregister_alg(&aes_cipher_alg);
+	crypto_unregister_skciphers(aes_skcipher_algs,
+				    ARRAY_SIZE(aes_skcipher_algs));
 }
 
 module_init(ppc_aes_mod_init);
diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c
index 47985219a68f..dce86e75f1a8 100644
--- a/arch/powerpc/crypto/crc-vpmsum_test.c
+++ b/arch/powerpc/crypto/crc-vpmsum_test.c
@@ -103,6 +103,7 @@ static int __init crc_test_init(void)
 				       crc32, verify32, len);
 				break;
 			}
+		cond_resched();
 		}
 		pr_info("crc-vpmsum_test done, completed %lu iterations\n", i);
 	} while (0);
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index 2c232898b933..63760b7dbb76 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -73,10 +73,8 @@ static int crc32c_vpmsum_setkey(struct crypto_shash *hash, const u8 *key,
 {
 	u32 *mctx = crypto_shash_ctx(hash);
 
-	if (keylen != sizeof(u32)) {
-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != sizeof(u32))
 		return -EINVAL;
-	}
 	*mctx = le32_to_cpup((__le32 *)key);
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 64870c7be4a3..d0a23d0db863 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,10 +4,11 @@ generated-y += syscall_table_64.h
 generated-y += syscall_table_c32.h
 generated-y += syscall_table_spu.h
 generic-y += div64.h
+generic-y += dma-mapping.h
 generic-y += export.h
 generic-y += irq_regs.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += vtime.h
-generic-y += msi.h
+generic-y += early_ioremap.h
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9c63b596e6ce..a09595f00cab 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -28,7 +28,7 @@ static inline int arch_get_random_seed_int(unsigned int *v)
 	unsigned long val;
 	int rc;
 
-	rc = arch_get_random_long(&val);
+	rc = arch_get_random_seed_long(&val);
 	if (rc)
 		*v = val;
 
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 8561498e653c..983c0084fb3f 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -92,7 +92,8 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
 long sys_debug_setcontext(struct ucontext __user *ctx,
 			  int ndbg, struct sig_dbg_op __user *dbg);
 int
-ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp);
+ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
+	   struct __kernel_old_timeval __user *tvp);
 unsigned long __init early_init(unsigned long dt_ptr);
 void __init machine_init(u64 dt_ptr);
 #endif
@@ -152,9 +153,12 @@ void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 /* Patch sites */
 extern s32 patch__call_flush_count_cache;
 extern s32 patch__flush_count_cache_return;
+extern s32 patch__flush_link_stack_return;
+extern s32 patch__call_kvm_flush_link_stack;
 extern s32 patch__memset_nocache, patch__memcpy_nocache;
 
 extern long flush_count_cache;
+extern long kvm_flush_link_stack;
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index fbe8df433019..123adcefd40f 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -18,8 +18,6 @@
  * mb() prevents loads and stores being reordered across this point.
  * rmb() prevents loads being reordered across this point.
  * wmb() prevents stores being reordered across this point.
- * read_barrier_depends() prevents data-dependent loads being reordered
- *	across this point (nop on PPC).
  *
  * *mb() variants without smp_ prefix must order all types of memory
  * operations with one another. sync is the only instruction sufficient
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 603aed229af7..28dcf8222943 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -64,7 +64,7 @@
 
 /* Macro for generating the ***_bits() functions */
 #define DEFINE_BITOP(fn, op, prefix)		\
-static __inline__ void fn(unsigned long mask,	\
+static inline void fn(unsigned long mask,	\
 		volatile unsigned long *_p)	\
 {						\
 	unsigned long old;			\
@@ -86,22 +86,22 @@ DEFINE_BITOP(clear_bits, andc, "")
 DEFINE_BITOP(clear_bits_unlock, andc, PPC_RELEASE_BARRIER)
 DEFINE_BITOP(change_bits, xor, "")
 
-static __inline__ void set_bit(int nr, volatile unsigned long *addr)
+static inline void arch_set_bit(int nr, volatile unsigned long *addr)
 {
 	set_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
 }
 
-static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
+static inline void arch_clear_bit(int nr, volatile unsigned long *addr)
 {
 	clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
 }
 
-static __inline__ void clear_bit_unlock(int nr, volatile unsigned long *addr)
+static inline void arch_clear_bit_unlock(int nr, volatile unsigned long *addr)
 {
 	clear_bits_unlock(BIT_MASK(nr), addr + BIT_WORD(nr));
 }
 
-static __inline__ void change_bit(int nr, volatile unsigned long *addr)
+static inline void arch_change_bit(int nr, volatile unsigned long *addr)
 {
 	change_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
 }
@@ -109,7 +109,7 @@ static __inline__ void change_bit(int nr, volatile unsigned long *addr)
 /* Like DEFINE_BITOP(), with changes to the arguments to 'op' and the output
  * operands. */
 #define DEFINE_TESTOP(fn, op, prefix, postfix, eh)	\
-static __inline__ unsigned long fn(			\
+static inline unsigned long fn(			\
 		unsigned long mask,			\
 		volatile unsigned long *_p)		\
 {							\
@@ -138,34 +138,34 @@ DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER,
 DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER,
 	      PPC_ATOMIC_EXIT_BARRIER, 0)
 
-static __inline__ int test_and_set_bit(unsigned long nr,
-				       volatile unsigned long *addr)
+static inline int arch_test_and_set_bit(unsigned long nr,
+					volatile unsigned long *addr)
 {
 	return test_and_set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
 }
 
-static __inline__ int test_and_set_bit_lock(unsigned long nr,
-				       volatile unsigned long *addr)
+static inline int arch_test_and_set_bit_lock(unsigned long nr,
+					     volatile unsigned long *addr)
 {
 	return test_and_set_bits_lock(BIT_MASK(nr),
 				addr + BIT_WORD(nr)) != 0;
 }
 
-static __inline__ int test_and_clear_bit(unsigned long nr,
-					 volatile unsigned long *addr)
+static inline int arch_test_and_clear_bit(unsigned long nr,
+					  volatile unsigned long *addr)
 {
 	return test_and_clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
 }
 
-static __inline__ int test_and_change_bit(unsigned long nr,
-					  volatile unsigned long *addr)
+static inline int arch_test_and_change_bit(unsigned long nr,
+					   volatile unsigned long *addr)
 {
 	return test_and_change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
 }
 
 #ifdef CONFIG_PPC64
-static __inline__ unsigned long clear_bit_unlock_return_word(int nr,
-						volatile unsigned long *addr)
+static inline unsigned long
+clear_bit_unlock_return_word(int nr, volatile unsigned long *addr)
 {
 	unsigned long old, t;
 	unsigned long *p = (unsigned long *)addr + BIT_WORD(nr);
@@ -185,15 +185,18 @@ static __inline__ unsigned long clear_bit_unlock_return_word(int nr,
 	return old;
 }
 
-/* This is a special function for mm/filemap.c */
-#define clear_bit_unlock_is_negative_byte(nr, addr)			\
-	(clear_bit_unlock_return_word(nr, addr) & BIT_MASK(PG_waiters))
+/*
+ * This is a special function for mm/filemap.c
+ * Bit 7 corresponds to PG_waiters.
+ */
+#define arch_clear_bit_unlock_is_negative_byte(nr, addr)		\
+	(clear_bit_unlock_return_word(nr, addr) & BIT_MASK(7))
 
 #endif /* CONFIG_PPC64 */
 
 #include <asm-generic/bitops/non-atomic.h>
 
-static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr)
+static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr)
 {
 	__asm__ __volatile__(PPC_RELEASE_BARRIER "" ::: "memory");
 	__clear_bit(nr, addr);
@@ -215,14 +218,14 @@ static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr)
  * fls: find last (most-significant) bit set.
  * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
  */
-static __inline__ int fls(unsigned int x)
+static inline int fls(unsigned int x)
 {
 	return 32 - __builtin_clz(x);
 }
 
 #include <asm-generic/bitops/builtin-__fls.h>
 
-static __inline__ int fls64(__u64 x)
+static inline int fls64(__u64 x)
 {
 	return 64 - __builtin_clzll(x);
 }
@@ -239,6 +242,10 @@ unsigned long __arch_hweight64(__u64 w);
 
 #include <asm-generic/bitops/find.h>
 
+/* wrappers that deal with KASAN instrumentation */
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
+
 /* Little-endian versions */
 #include <asm-generic/bitops/le.h>
 
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 15b75005bc34..3fa1b962dc27 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -600,8 +600,11 @@ extern void slb_set_size(u16 size);
  *
  */
 #define MAX_USER_CONTEXT	((ASM_CONST(1) << CONTEXT_BITS) - 2)
+
+// The + 2 accounts for INVALID_REGION and 1 more to avoid overlap with kernel
 #define MIN_USER_CONTEXT	(MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
-				 MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT)
+				 MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT + 2)
+
 /*
  * For platforms that support on 65bit VA we limit the context bits
  */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index d5a44912902f..f6968c811026 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -122,11 +122,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 				  unsigned long address)
 {
-	/*
-	 * By now all the pud entries should be none entries. So go
-	 * ahead and flush the page walk cache
-	 */
-	flush_tlb_pgtable(tlb, address);
 	pgtable_free_tlb(tlb, pud, PUD_INDEX);
 }
 
@@ -143,11 +138,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 				  unsigned long address)
 {
-	/*
-	 * By now all the pud entries should be none entries. So go
-	 * ahead and flush the page walk cache
-	 */
-	flush_tlb_pgtable(tlb, address);
 	return pgtable_free_tlb(tlb, pmd, PMD_INDEX);
 }
 
@@ -166,11 +156,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
-	/*
-	 * By now all the pud entries should be none entries. So go
-	 * ahead and flush the page walk cache
-	 */
-	flush_tlb_pgtable(tlb, address);
 	pgtable_free_tlb(tlb, table, PTE_INDEX);
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
index a069dfcac9a9..4e697bc2f4cd 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
@@ -70,9 +70,6 @@ static inline int get_hugepd_cache_index(int index)
 	/* should not reach */
 }
 
-#else /* !CONFIG_HUGETLB_PAGE */
-static inline int pmd_huge(pmd_t pmd) { return 0; }
-static inline int pud_huge(pud_t pud) { return 0; }
 #endif /* CONFIG_HUGETLB_PAGE */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index e3d4dd4ae2fa..34d1018896b3 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -59,9 +59,6 @@ static inline int get_hugepd_cache_index(int index)
 	BUG();
 }
 
-#else /* !CONFIG_HUGETLB_PAGE */
-static inline int pmd_huge(pmd_t pmd) { return 0; }
-static inline int pud_huge(pud_t pud) { return 0; }
 #endif /* CONFIG_HUGETLB_PAGE */
 
 static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 7aa8195b6cff..dcb5c3839d2f 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -147,22 +147,6 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
 		flush_tlb_page(vma, address);
 }
 
-/*
- * flush the page walk cache for the address
- */
-static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long address)
-{
-	/*
-	 * Flush the page table walk cache on freeing a page table. We already
-	 * have marked the upper/higher level page table entry none by now.
-	 * So it is safe to flush PWC here.
-	 */
-	if (!radix_enabled())
-		return;
-
-	radix__flush_tlb_pwc(tlb, address);
-}
-
 extern bool tlbie_capable;
 extern bool tlbie_enabled;
 
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index f47e6ff6554d..338f36cd9934 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -49,6 +49,15 @@
 	".previous\n"
 #endif
 
+#define BUG_ENTRY(insn, flags, ...)			\
+	__asm__ __volatile__(				\
+		"1:	" insn "\n"			\
+		_EMIT_BUG_ENTRY				\
+		: : "i" (__FILE__), "i" (__LINE__),	\
+		  "i" (flags),				\
+		  "i" (sizeof(struct bug_entry)),	\
+		  ##__VA_ARGS__)
+
 /*
  * BUG_ON() and WARN_ON() do their best to cooperate with compile-time
  * optimisations. However depending on the complexity of the condition
@@ -56,11 +65,7 @@
  */
 
 #define BUG() do {						\
-	__asm__ __volatile__(					\
-		"1:	twi 31,0,0\n"				\
-		_EMIT_BUG_ENTRY					\
-		: : "i" (__FILE__), "i" (__LINE__),		\
-		    "i" (0), "i"  (sizeof(struct bug_entry)));	\
+	BUG_ENTRY("twi 31, 0, 0", 0);				\
 	unreachable();						\
 } while (0)
 
@@ -69,23 +74,11 @@
 		if (x)						\
 			BUG();					\
 	} else {						\
-		__asm__ __volatile__(				\
-		"1:	"PPC_TLNEI"	%4,0\n"			\
-		_EMIT_BUG_ENTRY					\
-		: : "i" (__FILE__), "i" (__LINE__), "i" (0),	\
-		  "i" (sizeof(struct bug_entry)),		\
-		  "r" ((__force long)(x)));			\
+		BUG_ENTRY(PPC_TLNEI " %4, 0", 0, "r" ((__force long)(x)));	\
 	}							\
 } while (0)
 
-#define __WARN_FLAGS(flags) do {				\
-	__asm__ __volatile__(					\
-		"1:	twi 31,0,0\n"				\
-		_EMIT_BUG_ENTRY					\
-		: : "i" (__FILE__), "i" (__LINE__),		\
-		  "i" (BUGFLAG_WARNING|(flags)),		\
-		  "i" (sizeof(struct bug_entry)));		\
-} while (0)
+#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
 
 #define WARN_ON(x) ({						\
 	int __ret_warn_on = !!(x);				\
@@ -93,13 +86,9 @@
 		if (__ret_warn_on)				\
 			__WARN();				\
 	} else {						\
-		__asm__ __volatile__(				\
-		"1:	"PPC_TLNEI"	%4,0\n"			\
-		_EMIT_BUG_ENTRY					\
-		: : "i" (__FILE__), "i" (__LINE__),		\
-		  "i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\
-		  "i" (sizeof(struct bug_entry)),		\
-		  "r" (__ret_warn_on));				\
+		BUG_ENTRY(PPC_TLNEI " %4, 0",			\
+			  BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN),	\
+			  "r" (__ret_warn_on));	\
 	}							\
 	unlikely(__ret_warn_on);				\
 })
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 45e3137ccd71..72b81015cebe 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -55,42 +55,48 @@ struct ppc64_caches {
 
 extern struct ppc64_caches ppc64_caches;
 
-static inline u32 l1_cache_shift(void)
+static inline u32 l1_dcache_shift(void)
 {
 	return ppc64_caches.l1d.log_block_size;
 }
 
-static inline u32 l1_cache_bytes(void)
+static inline u32 l1_dcache_bytes(void)
 {
 	return ppc64_caches.l1d.block_size;
 }
+
+static inline u32 l1_icache_shift(void)
+{
+	return ppc64_caches.l1i.log_block_size;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+	return ppc64_caches.l1i.block_size;
+}
 #else
-static inline u32 l1_cache_shift(void)
+static inline u32 l1_dcache_shift(void)
 {
 	return L1_CACHE_SHIFT;
 }
 
-static inline u32 l1_cache_bytes(void)
+static inline u32 l1_dcache_bytes(void)
 {
 	return L1_CACHE_BYTES;
 }
+
+static inline u32 l1_icache_shift(void)
+{
+	return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+	return L1_CACHE_BYTES;
+}
+
 #endif
-#endif /* ! __ASSEMBLY__ */
-
-#if defined(__ASSEMBLY__)
-/*
- * For a snooping icache, we still need a dummy icbi to purge all the
- * prefetched instructions from the ifetch buffers. We also need a sync
- * before the icbi to order the the actual stores to memory that might
- * have modified instructions with the icbi.
- */
-#define PURGE_PREFETCHED_INS	\
-	sync;			\
-	icbi	0,r3;		\
-	sync;			\
-	isync
 
-#else
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -124,6 +130,17 @@ static inline void dcbst(void *addr)
 {
 	__asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory");
 }
+
+static inline void icbi(void *addr)
+{
+	asm volatile ("icbi 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void iccci(void *addr)
+{
+	asm volatile ("iccci 0, %0" : : "r"(addr) : "memory");
+}
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_CACHE_H */
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index eef388f2659f..4a1c9f0200e1 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -42,29 +42,25 @@ extern void flush_dcache_page(struct page *page);
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 
-extern void flush_icache_range(unsigned long, unsigned long);
+void flush_icache_range(unsigned long start, unsigned long stop);
 extern void flush_icache_user_range(struct vm_area_struct *vma,
 				    struct page *page, unsigned long addr,
 				    int len);
-extern void __flush_dcache_icache(void *page_va);
 extern void flush_dcache_icache_page(struct page *page);
-#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE)
-extern void __flush_dcache_icache_phys(unsigned long physaddr);
-#else
-static inline void __flush_dcache_icache_phys(unsigned long physaddr)
-{
-	BUG();
-}
-#endif
-
-/*
- * Write any modified data cache blocks out to memory and invalidate them.
- * Does not invalidate the corresponding instruction cache blocks.
+void __flush_dcache_icache(void *page);
+
+/**
+ * flush_dcache_range(): Write any modified data cache blocks out to memory and
+ * invalidate them. Does not invalidate the corresponding instruction cache
+ * blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
  */
 static inline void flush_dcache_range(unsigned long start, unsigned long stop)
 {
-	unsigned long shift = l1_cache_shift();
-	unsigned long bytes = l1_cache_bytes();
+	unsigned long shift = l1_dcache_shift();
+	unsigned long bytes = l1_dcache_bytes();
 	void *addr = (void *)(start & ~(bytes - 1));
 	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
 	unsigned long i;
@@ -89,8 +85,8 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
  */
 static inline void clean_dcache_range(unsigned long start, unsigned long stop)
 {
-	unsigned long shift = l1_cache_shift();
-	unsigned long bytes = l1_cache_bytes();
+	unsigned long shift = l1_dcache_shift();
+	unsigned long bytes = l1_dcache_bytes();
 	void *addr = (void *)(start & ~(bytes - 1));
 	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
 	unsigned long i;
@@ -108,8 +104,8 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop)
 static inline void invalidate_dcache_range(unsigned long start,
 					   unsigned long stop)
 {
-	unsigned long shift = l1_cache_shift();
-	unsigned long bytes = l1_cache_bytes();
+	unsigned long shift = l1_dcache_shift();
+	unsigned long bytes = l1_dcache_bytes();
 	void *addr = (void *)(start & ~(bytes - 1));
 	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
 	unsigned long i;
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 74d0db511099..3e3cdfaa76c6 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -96,23 +96,6 @@ typedef u32		compat_sigset_word;
 
 #define COMPAT_OFF_T_MAX	0x7fffffff
 
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
-	return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
-	return (u32)(unsigned long)uptr;
-}
-
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = current->thread.regs;
diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
index a2912b47102c..abc154d784b0 100644
--- a/arch/powerpc/include/asm/dma-direct.h
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -2,26 +2,13 @@
 #ifndef ASM_POWERPC_DMA_DIRECT_H
 #define ASM_POWERPC_DMA_DIRECT_H 1
 
-static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
-	if (!dev->dma_mask)
-		return false;
-
-	return addr + size - 1 <=
-		min_not_zero(*dev->dma_mask, dev->bus_dma_mask);
-}
-
 static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-	if (!dev)
-		return paddr + PCI_DRAM_OFFSET;
 	return paddr + dev->archdata.dma_offset;
 }
 
 static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-	if (!dev)
-		return daddr - PCI_DRAM_OFFSET;
 	return daddr - dev->archdata.dma_offset;
 }
 #endif /* ASM_POWERPC_DMA_DIRECT_H */
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
deleted file mode 100644
index 565d6f74b189..000000000000
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2004 IBM
- */
-#ifndef _ASM_DMA_MAPPING_H
-#define _ASM_DMA_MAPPING_H
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-	/* We don't handle the NULL dev case for ISA for now. We could
-	 * do it via an out of line call but it is not needed for now. The
-	 * only ISA DMA device we support is the floppy and we have a hack
-	 * in the floppy driver directly to get a device for us.
-	 */
-	return NULL;
-}
-
-#endif	/* _ASM_DMA_MAPPING_H */
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 0cfc365d814b..2ef155a3c821 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -15,6 +15,7 @@
 #define _ASM_FIXMAP_H
 
 #ifndef __ASSEMBLY__
+#include <linux/sizes.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #ifdef CONFIG_HIGHMEM
@@ -63,7 +64,22 @@ enum fixed_addresses {
 	FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 +
 		       FIX_IMMR_SIZE,
 #endif
+#ifdef CONFIG_PPC_83xx
+	/* For IMMR we need an aligned 2M area */
+#define FIX_IMMR_SIZE	(SZ_2M / PAGE_SIZE)
+	FIX_IMMR_START,
+	FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 +
+		       FIX_IMMR_SIZE,
+#endif
 	/* FIX_PCIE_MCFG, */
+	__end_of_permanent_fixed_addresses,
+
+#define NR_FIX_BTMAPS		(SZ_256K / PAGE_SIZE)
+#define FIX_BTMAPS_SLOTS	16
+#define TOTAL_FIX_BTMAPS	(NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
 	__end_of_fixed_addresses
 };
 
@@ -71,14 +87,22 @@ enum fixed_addresses {
 #define FIXADDR_START		(FIXADDR_TOP - __FIXADDR_SIZE)
 
 #define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG
+#define FIXMAP_PAGE_IO	PAGE_KERNEL_NCG
 
 #include <asm-generic/fixmap.h>
 
 static inline void __set_fixmap(enum fixed_addresses idx,
 				phys_addr_t phys, pgprot_t flags)
 {
-	map_kernel_page(fix_to_virt(idx), phys, flags);
+	if (__builtin_constant_p(idx))
+		BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
+	else if (WARN_ON(idx >= __end_of_fixed_addresses))
+		return;
+
+	map_kernel_page(__fix_to_virt(idx), phys, flags);
 }
 
+#define __early_set_fixmap	__set_fixmap
+
 #endif /* !__ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 11112023e327..13bd870609c3 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -342,6 +342,15 @@
 #define H_TLB_INVALIDATE	0xF808
 #define H_COPY_TOFROM_GUEST	0xF80C
 
+/* Flags for H_SVM_PAGE_IN */
+#define H_PAGE_IN_SHARED        0x1
+
+/* Platform-specific hcalls used by the Ultravisor */
+#define H_SVM_PAGE_IN		0xEF00
+#define H_SVM_PAGE_OUT		0xEF04
+#define H_SVM_INIT_START	0xEF08
+#define H_SVM_INIT_DONE		0xEF0C
+
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR		1
 #define H_SET_MODE_RESOURCE_SET_DAWR		2
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index 67e2da195eae..27ac6f5d2891 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -14,6 +14,7 @@ struct arch_hw_breakpoint {
 	unsigned long	address;
 	u16		type;
 	u16		len; /* length of the target data symbol */
+	u16		hw_len; /* length programmed in hw */
 };
 
 /* Note: Don't change the the first 6 bits below as they are in the same order
@@ -33,6 +34,11 @@ struct arch_hw_breakpoint {
 #define HW_BRK_TYPE_PRIV_ALL	(HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
 				 HW_BRK_TYPE_HYP)
 
+#define HW_BREAKPOINT_ALIGN 0x7
+
+#define DABR_MAX_LEN	8
+#define DAWR_MAX_LEN	512
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #include <linux/kdebug.h>
 #include <asm/reg.h>
@@ -44,8 +50,6 @@ struct pmu;
 struct perf_sample_data;
 struct task_struct;
 
-#define HW_BREAKPOINT_ALIGN 0x7
-
 extern int hw_breakpoint_slots(int type);
 extern int arch_bp_generic_fields(int type, int *gen_bp_type);
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
@@ -70,6 +74,7 @@ static inline void hw_breakpoint_disable(void)
 	brk.address = 0;
 	brk.type = 0;
 	brk.len = 0;
+	brk.hw_len = 0;
 	if (ppc_breakpoint_available())
 		__set_breakpoint(&brk);
 }
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 32a18f2f49bc..e3a905e3d573 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -226,8 +226,8 @@ static inline bool arch_irqs_disabled(void)
 #endif /* CONFIG_PPC_BOOK3S */
 
 #ifdef CONFIG_PPC_BOOK3E
-#define __hard_irq_enable()	asm volatile("wrteei 1" : : : "memory")
-#define __hard_irq_disable()	asm volatile("wrteei 0" : : : "memory")
+#define __hard_irq_enable()	wrtee(MSR_EE)
+#define __hard_irq_disable()	wrtee(0)
 #else
 #define __hard_irq_enable()	__mtmsrd(MSR_EE|MSR_RI, 1)
 #define __hard_irq_disable()	__mtmsrd(MSR_RI, 1)
@@ -280,8 +280,6 @@ extern void force_external_irq_replay(void);
 
 #else /* CONFIG_PPC64 */
 
-#define SET_MSR_EE(x)	mtmsr(x)
-
 static inline unsigned long arch_local_save_flags(void)
 {
 	return mfmsr();
@@ -289,47 +287,44 @@ static inline unsigned long arch_local_save_flags(void)
 
 static inline void arch_local_irq_restore(unsigned long flags)
 {
-#if defined(CONFIG_BOOKE)
-	asm volatile("wrtee %0" : : "r" (flags) : "memory");
-#else
-	mtmsr(flags);
-#endif
+	if (IS_ENABLED(CONFIG_BOOKE))
+		wrtee(flags);
+	else
+		mtmsr(flags);
 }
 
 static inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags = arch_local_save_flags();
-#ifdef CONFIG_BOOKE
-	asm volatile("wrteei 0" : : : "memory");
-#elif defined(CONFIG_PPC_8xx)
-	wrtspr(SPRN_EID);
-#else
-	SET_MSR_EE(flags & ~MSR_EE);
-#endif
+
+	if (IS_ENABLED(CONFIG_BOOKE))
+		wrtee(0);
+	else if (IS_ENABLED(CONFIG_PPC_8xx))
+		wrtspr(SPRN_EID);
+	else
+		mtmsr(flags & ~MSR_EE);
+
 	return flags;
 }
 
 static inline void arch_local_irq_disable(void)
 {
-#ifdef CONFIG_BOOKE
-	asm volatile("wrteei 0" : : : "memory");
-#elif defined(CONFIG_PPC_8xx)
-	wrtspr(SPRN_EID);
-#else
-	arch_local_irq_save();
-#endif
+	if (IS_ENABLED(CONFIG_BOOKE))
+		wrtee(0);
+	else if (IS_ENABLED(CONFIG_PPC_8xx))
+		wrtspr(SPRN_EID);
+	else
+		mtmsr(mfmsr() & ~MSR_EE);
 }
 
 static inline void arch_local_irq_enable(void)
 {
-#ifdef CONFIG_BOOKE
-	asm volatile("wrteei 1" : : : "memory");
-#elif defined(CONFIG_PPC_8xx)
-	wrtspr(SPRN_EIE);
-#else
-	unsigned long msr = mfmsr();
-	SET_MSR_EE(msr | MSR_EE);
-#endif
+	if (IS_ENABLED(CONFIG_BOOKE))
+		wrtee(MSR_EE);
+	else if (IS_ENABLED(CONFIG_PPC_8xx))
+		wrtspr(SPRN_EIE);
+	else
+		mtmsr(mfmsr() | MSR_EE);
 }
 
 static inline bool arch_irqs_disabled_flags(unsigned long flags)
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index a63ec938636d..635969b5b58e 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -691,8 +691,6 @@ static inline void iosync(void)
  * * ioremap_prot allows to specify the page flags as an argument and can
  *   also be hooked by the platform via ppc_md.
  *
- * * ioremap_nocache is identical to ioremap
- *
  * * ioremap_wc enables write combining
  *
  * * ioremap_wt enables write through
@@ -715,7 +713,6 @@ extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
 extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
 void __iomem *ioremap_wt(phys_addr_t address, unsigned long size);
 void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
-#define ioremap_nocache(addr, size)	ioremap((addr), (size))
 #define ioremap_uc(addr, size)		ioremap((addr), (size))
 #define ioremap_cache(addr, size) \
 	ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL))
diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
new file mode 100644
index 000000000000..50204e228f16
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KVM_BOOK3S_UVMEM_H__
+#define __ASM_KVM_BOOK3S_UVMEM_H__
+
+#ifdef CONFIG_PPC_UV
+int kvmppc_uvmem_init(void);
+void kvmppc_uvmem_free(void);
+int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot);
+void kvmppc_uvmem_slot_free(struct kvm *kvm,
+			    const struct kvm_memory_slot *slot);
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm,
+				   unsigned long gra,
+				   unsigned long flags,
+				   unsigned long page_shift);
+unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
+				    unsigned long gra,
+				    unsigned long flags,
+				    unsigned long page_shift);
+unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
+int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+			     struct kvm *kvm);
+#else
+static inline int kvmppc_uvmem_init(void)
+{
+	return 0;
+}
+
+static inline void kvmppc_uvmem_free(void) { }
+
+static inline int
+kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+	return 0;
+}
+
+static inline void
+kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) { }
+
+static inline unsigned long
+kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
+		     unsigned long flags, unsigned long page_shift)
+{
+	return H_UNSUPPORTED;
+}
+
+static inline unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
+		      unsigned long flags, unsigned long page_shift)
+{
+	return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+	return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+	return H_UNSUPPORTED;
+}
+
+static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
+{
+	return -EFAULT;
+}
+
+static inline void
+kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+			struct kvm *kvm) { }
+#endif /* CONFIG_PPC_UV */
+#endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 6fe6ad64cba5..0a398f2321c2 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -275,6 +275,10 @@ struct kvm_hpt_info {
 
 struct kvm_resize_hpt;
 
+/* Flag values for kvm_arch.secure_guest */
+#define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */
+#define KVMPPC_SECURE_INIT_DONE  0x2 /* H_SVM_INIT_DONE completed */
+
 struct kvm_arch {
 	unsigned int lpid;
 	unsigned int smt_mode;		/* # vcpus per virtual core */
@@ -330,6 +334,8 @@ struct kvm_arch {
 #endif
 	struct kvmppc_ops *kvm_ops;
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	struct mutex uvmem_lock;
+	struct list_head uvmem_pfns;
 	struct mutex mmu_setup_lock;	/* nests inside vcpu mutexes */
 	u64 l1_ptcr;
 	int max_nested_lpid;
@@ -401,7 +407,6 @@ struct kvmppc_mmu {
 	u32  (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
 	int  (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
 		      struct kvmppc_pte *pte, bool data, bool iswrite);
-	void (*reset_msr)(struct kvm_vcpu *vcpu);
 	void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
 	int  (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
 	u64  (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data);
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ee62776e5433..3d2f871241a8 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -271,6 +271,7 @@ struct kvmppc_ops {
 			   union kvmppc_one_reg *val);
 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
+	void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
 	void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
 	int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
 	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id);
@@ -321,6 +322,7 @@ struct kvmppc_ops {
 			       int size);
 	int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
 			      int size);
+	int (*svm_off)(struct kvm *kvm);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index fdd00939270b..bc4bd19b7fc2 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -17,7 +17,7 @@ typedef struct
 
 #define LOCAL_INIT(i)	{ (i) }
 
-static __inline__ long local_read(local_t *l)
+static __inline__ long local_read(const local_t *l)
 {
 	return READ_ONCE(l->v);
 }
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 1c3133b5f86a..1006a427e99c 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -3,6 +3,7 @@
 #define _ASM_POWERPC_KUP_8XX_H_
 
 #include <asm/bug.h>
+#include <asm/mmu.h>
 
 #ifdef CONFIG_PPC_KUAP
 
diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h
index 4c9777d256fb..b41004664312 100644
--- a/arch/powerpc/include/asm/nohash/mmu-book3e.h
+++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h
@@ -75,7 +75,6 @@
 #define MAS2_E			0x00000001
 #define MAS2_WIMGE_MASK		0x0000001f
 #define MAS2_EPN_MASK(size)		(~0 << (size + 10))
-#define MAS2_VAL(addr, size, flags)	((addr) & MAS2_EPN_MASK(size) | (flags))
 
 #define MAS3_RPN		0xFFFFF000
 #define MAS3_U0			0x00000200
@@ -221,6 +220,16 @@
 #define TLBILX_T_CLASS2			6
 #define TLBILX_T_CLASS3			7
 
+/*
+ * The mapping only needs to be cache-coherent on SMP, except on
+ * Freescale e500mc derivatives where it's also needed for coherent DMA.
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+#define MAS2_M_IF_NEEDED	MAS2_M
+#else
+#define MAS2_M_IF_NEEDED	0
+#endif
+
 #ifndef __ASSEMBLY__
 #include <asm/bug.h>
 
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 378e3997845a..c1f25a760eb1 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -211,7 +211,10 @@
 #define OPAL_MPIPL_UPDATE			173
 #define OPAL_MPIPL_REGISTER_TAG			174
 #define OPAL_MPIPL_QUERY_TAG			175
-#define OPAL_LAST				175
+#define OPAL_SECVAR_GET				176
+#define OPAL_SECVAR_GET_NEXT			177
+#define OPAL_SECVAR_ENQUEUE_UPDATE		178
+#define OPAL_LAST				178
 
 #define QUIESCE_HOLD			1 /* Spin all calls at entry */
 #define QUIESCE_REJECT			2 /* Fail all calls with OPAL_BUSY */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index a0cf8fba4d12..9986ac34b8e2 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -298,6 +298,13 @@ int opal_sensor_group_clear(u32 group_hndl, int token);
 int opal_sensor_group_enable(u32 group_hndl, int token, bool enable);
 int opal_nx_coproc_init(uint32_t chip_id, uint32_t ct);
 
+int opal_secvar_get(const char *key, uint64_t key_len, u8 *data,
+		    uint64_t *data_size);
+int opal_secvar_get_next(const char *key, uint64_t *key_len,
+			 uint64_t key_buf_size);
+int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data,
+			       uint64_t data_size);
+
 s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size);
 s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr);
 s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr);
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index c8bb14ff4713..7f1fd41e3065 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -325,17 +325,15 @@ void arch_free_page(struct page *page, int order);
 
 struct vm_area_struct;
 
+extern unsigned long kernstart_virt_addr;
+
+static inline unsigned long kaslr_offset(void)
+{
+	return kernstart_virt_addr - KERNELBASE;
+}
+
 #include <asm-generic/memory_model.h>
 #endif /* __ASSEMBLY__ */
 #include <asm/slice.h>
 
-/*
- * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
- */
-#ifdef CONFIG_PPC32
-#define ARCH_ZONE_DMA_BITS 30
-#else
-#define ARCH_ZONE_DMA_BITS 31
-#endif
-
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 4053b2ab427c..0e4ec8cc37b7 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -157,13 +157,9 @@ static inline bool pgd_is_leaf(pgd_t pgd)
 #define is_ioremap_addr is_ioremap_addr
 static inline bool is_ioremap_addr(const void *x)
 {
-#ifdef CONFIG_MMU
 	unsigned long addr = (unsigned long)x;
 
 	return addr >= IOREMAP_BASE && addr < IOREMAP_END;
-#else
-	return false;
-#endif
 }
 #endif /* CONFIG_PPC64 */
 
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b3cbb1136bce..1aa46dff0957 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -25,9 +25,7 @@
 #include <asm/reg_fsl_emb.h>
 #endif
 
-#ifdef CONFIG_PPC_8xx
 #include <asm/reg_8xx.h>
-#endif /* CONFIG_PPC_8xx */
 
 #define MSR_SF_LG	63              /* Enable 64 bit mode */
 #define MSR_ISF_LG	61              /* Interrupt 64b mode valid on 630 */
@@ -748,6 +746,18 @@
 #define SPRN_USPRG7	0x107	/* SPRG7 userspace read */
 #define SPRN_SRR0	0x01A	/* Save/Restore Register 0 */
 #define SPRN_SRR1	0x01B	/* Save/Restore Register 1 */
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * Bits loaded from MSR upon interrupt.
+ * PPC (64-bit) bits 33-36,42-47 are interrupt dependent, the others are
+ * loaded from MSR. The exception is that SRESET and MCE do not always load
+ * bit 62 (RI) from MSR. Don't use PPC_BITMASK for this because 32-bit uses
+ * it.
+ */
+#define   SRR1_MSR_BITS		(~0x783f0000UL)
+#endif
+
 #define   SRR1_ISI_NOPT		0x40000000 /* ISI: Not found in hash */
 #define   SRR1_ISI_N_OR_G	0x10000000 /* ISI: Access is no-exec or G */
 #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
@@ -1370,6 +1380,14 @@ static inline void mtmsr_isync(unsigned long val)
 #define wrtspr(rn)	asm volatile("mtspr " __stringify(rn) ",0" : \
 				     : : "memory")
 
+static inline void wrtee(unsigned long val)
+{
+	if (__builtin_constant_p(val))
+		asm volatile("wrteei %0" : : "i" ((val & MSR_EE) ? 1 : 0) : "memory");
+	else
+		asm volatile("wrtee %0" : : "r" (val) : "memory");
+}
+
 extern unsigned long msr_check_and_set(unsigned long bits);
 extern bool strict_msr_control;
 extern void __msr_check_and_clear(unsigned long bits);
@@ -1384,19 +1402,9 @@ static inline void msr_check_and_clear(unsigned long bits)
 #define mftb()		({unsigned long rval;				\
 			asm volatile(					\
 				"90:	mfspr %0, %2;\n"		\
-				"97:	cmpwi %0,0;\n"			\
-				"	beq- 90b;\n"			\
-				"99:\n"					\
-				".section __ftr_fixup,\"a\"\n"		\
-				".align 3\n"				\
-				"98:\n"					\
-				"	.8byte %1\n"			\
-				"	.8byte %1\n"			\
-				"	.8byte 97b-98b\n"		\
-				"	.8byte 99b-98b\n"		\
-				"	.8byte 0\n"			\
-				"	.8byte 0\n"			\
-				".previous"				\
+				ASM_FTR_IFSET(				\
+					"97:	cmpwi %0,0;\n"		\
+					"	beq- 90b;\n", "", %1)	\
 			: "=r" (rval) \
 			: "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
 			rval;})
diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h
index 7192eece6c3e..07df35ee8cbc 100644
--- a/arch/powerpc/include/asm/reg_8xx.h
+++ b/arch/powerpc/include/asm/reg_8xx.h
@@ -5,8 +5,6 @@
 #ifndef _ASM_POWERPC_REG_8xx_H
 #define _ASM_POWERPC_REG_8xx_H
 
-#include <asm/mmu.h>
-
 /* Cache control on the MPC8xx is provided through some additional
  * special purpose registers.
  */
@@ -38,7 +36,9 @@
 #define SPRN_CMPF	153
 #define SPRN_LCTRL1	156
 #define SPRN_LCTRL2	157
+#ifdef CONFIG_PPC_8xx
 #define SPRN_ICTRL	158
+#endif
 #define SPRN_BAR	159
 
 /* Commands.  Only the first few are available to the instruction cache.
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 5a9b6eb651b6..d19871763ed4 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -5,8 +5,22 @@
 
 #include <linux/elf.h>
 #include <linux/uaccess.h>
+
+#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed
+
 #include <asm-generic/sections.h>
 
+extern bool init_mem_is_free;
+
+static inline int arch_is_kernel_initmem_freed(unsigned long addr)
+{
+	if (!init_mem_is_free)
+		return 0;
+
+	return addr >= (unsigned long)__init_begin &&
+		addr < (unsigned long)__init_end;
+}
+
 extern char __head_end[];
 
 #ifdef __powerpc64__
diff --git a/arch/powerpc/include/asm/secure_boot.h b/arch/powerpc/include/asm/secure_boot.h
new file mode 100644
index 000000000000..a2ff556916c6
--- /dev/null
+++ b/arch/powerpc/include/asm/secure_boot.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Secure boot definitions
+ *
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+#ifndef _ASM_POWER_SECURE_BOOT_H
+#define _ASM_POWER_SECURE_BOOT_H
+
+#ifdef CONFIG_PPC_SECURE_BOOT
+
+bool is_ppc_secureboot_enabled(void);
+bool is_ppc_trustedboot_enabled(void);
+
+#else
+
+static inline bool is_ppc_secureboot_enabled(void)
+{
+	return false;
+}
+
+static inline bool is_ppc_trustedboot_enabled(void)
+{
+	return false;
+}
+
+#endif
+#endif
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index 759597bf0fd8..7c05e95a5c44 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -9,7 +9,7 @@
 #define _ASM_POWERPC_SECURITY_FEATURES_H
 
 
-extern unsigned long powerpc_security_features;
+extern u64 powerpc_security_features;
 extern bool rfi_flush;
 
 /* These are bit flags */
@@ -24,17 +24,17 @@ void setup_stf_barrier(void);
 void do_stf_barrier_fixups(enum stf_barrier_type types);
 void setup_count_cache_flush(void);
 
-static inline void security_ftr_set(unsigned long feature)
+static inline void security_ftr_set(u64 feature)
 {
 	powerpc_security_features |= feature;
 }
 
-static inline void security_ftr_clear(unsigned long feature)
+static inline void security_ftr_clear(u64 feature)
 {
 	powerpc_security_features &= ~feature;
 }
 
-static inline bool security_ftr_enabled(unsigned long feature)
+static inline bool security_ftr_enabled(u64 feature)
 {
 	return !!(powerpc_security_features & feature);
 }
@@ -81,6 +81,9 @@ static inline bool security_ftr_enabled(unsigned long feature)
 // Software required to flush count cache on context switch
 #define SEC_FTR_FLUSH_COUNT_CACHE	0x0000000000000400ull
 
+// Software required to flush link stack on context switch
+#define SEC_FTR_FLUSH_LINK_STACK	0x0000000000001000ull
+
 
 // Features enabled by default
 #define SEC_FTR_DEFAULT \
diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h
new file mode 100644
index 000000000000..4cc35b58b986
--- /dev/null
+++ b/arch/powerpc/include/asm/secvar.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ * PowerPC secure variable operations.
+ */
+#ifndef SECVAR_OPS_H
+#define SECVAR_OPS_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+
+extern const struct secvar_operations *secvar_ops;
+
+struct secvar_operations {
+	int (*get)(const char *key, uint64_t key_len, u8 *data,
+		   uint64_t *data_size);
+	int (*get_next)(const char *key, uint64_t *key_len,
+			uint64_t keybufsize);
+	int (*set)(const char *key, uint64_t key_len, u8 *data,
+		   uint64_t data_size);
+};
+
+#ifdef CONFIG_PPC_SECURE_BOOT
+
+extern void set_secvar_ops(const struct secvar_operations *ops);
+
+#else
+
+static inline void set_secvar_ops(const struct secvar_operations *ops) { }
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index e9a960e28f3c..860228e917dc 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -15,6 +15,7 @@
  *
  * (the type definitions are in asm/spinlock_types.h)
  */
+#include <linux/jump_label.h>
 #include <linux/irqflags.h>
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
@@ -36,10 +37,12 @@
 #endif
 
 #ifdef CONFIG_PPC_PSERIES
+DECLARE_STATIC_KEY_FALSE(shared_processor);
+
 #define vcpu_is_preempted vcpu_is_preempted
 static inline bool vcpu_is_preempted(int cpu)
 {
-	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+	if (!static_branch_unlikely(&shared_processor))
 		return false;
 	return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
 }
@@ -110,13 +113,8 @@ static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
 
 static inline bool is_shared_processor(void)
 {
-/*
- * LPPACA is only available on Pseries so guard anything LPPACA related to
- * allow other platforms (which include this common header) to compile.
- */
-#ifdef CONFIG_PPC_PSERIES
-	return (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
-		lppaca_shared_proc(local_paca->lppaca_ptr));
+#ifdef CONFIG_PPC_SPLPAR
+	return static_branch_unlikely(&shared_processor);
 #else
 	return false;
 #endif
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 15002b51ff18..c92fe7fe9692 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -401,7 +401,7 @@ copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
 	return n;
 }
 
-extern unsigned long __clear_user(void __user *addr, unsigned long size);
+unsigned long __arch_clear_user(void __user *addr, unsigned long size);
 
 static inline unsigned long clear_user(void __user *addr, unsigned long size)
 {
@@ -409,12 +409,17 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
 	might_fault();
 	if (likely(access_ok(addr, size))) {
 		allow_write_to_user(addr, size);
-		ret = __clear_user(addr, size);
+		ret = __arch_clear_user(addr, size);
 		prevent_write_to_user(addr, size);
 	}
 	return ret;
 }
 
+static inline unsigned long __clear_user(void __user *addr, unsigned long size)
+{
+	return clear_user(addr, size);
+}
+
 extern long strncpy_from_user(char *dst, const char __user *src, long count);
 extern __must_check long strnlen_user(const char __user *str, long n);
 
diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h
index 4fcda1d5793d..b66f6db7be6c 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -26,8 +26,14 @@
 #define UV_WRITE_PATE			0xF104
 #define UV_RETURN			0xF11C
 #define UV_ESM				0xF110
+#define UV_REGISTER_MEM_SLOT		0xF120
+#define UV_UNREGISTER_MEM_SLOT		0xF124
+#define UV_PAGE_IN			0xF128
+#define UV_PAGE_OUT			0xF12C
 #define UV_SHARE_PAGE			0xF130
 #define UV_UNSHARE_PAGE			0xF134
 #define UV_UNSHARE_ALL_PAGES		0xF140
+#define UV_PAGE_INVAL			0xF138
+#define UV_SVM_TERMINATE		0xF13C
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h
index b1bc2e043ed4..790b0e63681f 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -46,4 +46,40 @@ static inline int uv_unshare_all_pages(void)
 	return ucall_norets(UV_UNSHARE_ALL_PAGES);
 }
 
+static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 flags,
+			     u64 page_shift)
+{
+	return ucall_norets(UV_PAGE_IN, lpid, src_ra, dst_gpa, flags,
+			    page_shift);
+}
+
+static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 src_gpa, u64 flags,
+			      u64 page_shift)
+{
+	return ucall_norets(UV_PAGE_OUT, lpid, dst_ra, src_gpa, flags,
+			    page_shift);
+}
+
+static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 size,
+				       u64 flags, u64 slotid)
+{
+	return ucall_norets(UV_REGISTER_MEM_SLOT, lpid, start_gpa,
+			    size, flags, slotid);
+}
+
+static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid)
+{
+	return ucall_norets(UV_UNREGISTER_MEM_SLOT, lpid, slotid);
+}
+
+static inline int uv_page_inval(u64 lpid, u64 gpa, u64 page_shift)
+{
+	return ucall_norets(UV_PAGE_INVAL, lpid, gpa, page_shift);
+}
+
+static inline int uv_svm_terminate(u64 lpid)
+{
+	return ucall_norets(UV_SVM_TERMINATE, lpid);
+}
+
 #endif	/* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index c61d59ed3b45..40f13f3626d3 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -81,7 +81,9 @@ struct vdso_data {
 	__u32 stamp_sec_fraction;		/* fractional seconds of stamp_xtime */
 	__s32 wtom_clock_nsec;			/* Wall to monotonic clock nsec */
 	__s64 wtom_clock_sec;			/* Wall to monotonic clock sec */
-	struct timespec stamp_xtime;		/* xtime as at tb_orig_stamp */
+	__s64 stamp_xtime_sec;			/* xtime secs as at tb_orig_stamp */
+	__s64 stamp_xtime_nsec;			/* xtime nsecs as at tb_orig_stamp */
+	__u32 hrtimer_res;			/* hrtimer resolution */
    	__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls  */
    	__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
 };
@@ -101,8 +103,10 @@ struct vdso_data {
 	__u32 tz_dsttime;		/* Type of dst correction	0x5C */
 	__s32 wtom_clock_sec;			/* Wall to monotonic clock */
 	__s32 wtom_clock_nsec;
-	struct timespec stamp_xtime;	/* xtime as at tb_orig_stamp */
+	__s32 stamp_xtime_sec;		/* xtime seconds as at tb_orig_stamp */
+	__s32 stamp_xtime_nsec;		/* xtime nsecs as at tb_orig_stamp */
 	__u32 stamp_sec_fraction;	/* fractional seconds of stamp_xtime */
+	__u32 hrtimer_res;		/* hrtimer resolution */
    	__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
 	__u32 dcache_block_size;	/* L1 d-cache block size     */
 	__u32 icache_block_size;	/* L1 i-cache block size     */
diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..b992dfaaa161
--- /dev/null
+++ b/arch/powerpc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_POWERPC_VMALLOC_H
+#define _ASM_POWERPC_VMALLOC_H
+
+#endif /* _ASM_POWERPC_VMALLOC_H */
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index f2dfcd50a2d3..33aee7490cbb 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -39,6 +39,7 @@
 
 #define XIVE_ESB_VAL_P		0x2
 #define XIVE_ESB_VAL_Q		0x1
+#define XIVE_ESB_INVALID	0xFF
 
 /*
  * Thread Management (aka "TM") registers
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index b0f72dea8b11..264e266a85bf 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -667,6 +667,8 @@ struct kvm_ppc_cpu_char {
 
 /* PPC64 eXternal Interrupt Controller Specification */
 #define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */
+#define KVM_DEV_XICS_GRP_CTRL		2
+#define   KVM_DEV_XICS_NR_SERVERS	1
 
 /* Layout of 64-bit source attribute values */
 #define  KVM_XICS_DESTINATION_SHIFT	0
@@ -683,6 +685,7 @@ struct kvm_ppc_cpu_char {
 #define KVM_DEV_XIVE_GRP_CTRL		1
 #define   KVM_DEV_XIVE_RESET		1
 #define   KVM_DEV_XIVE_EQ_SYNC		2
+#define   KVM_DEV_XIVE_NR_SERVERS	3
 #define KVM_DEV_XIVE_GRP_SOURCE		2	/* 64-bit source identifier */
 #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG	3	/* 64-bit source identifier */
 #define KVM_DEV_XIVE_GRP_EQ_CONFIG	4	/* 64-bit EQ identifier */
diff --git a/arch/powerpc/include/uapi/asm/msgbuf.h b/arch/powerpc/include/uapi/asm/msgbuf.h
index 2b1b37797a47..7919b2ba41b5 100644
--- a/arch/powerpc/include/uapi/asm/msgbuf.h
+++ b/arch/powerpc/include/uapi/asm/msgbuf.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_MSGBUF_H
 #define _ASM_POWERPC_MSGBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * The msqid64_ds structure for the PowerPC architecture.
  * Note extra padding because this structure is passed back and forth
@@ -11,9 +13,9 @@
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
 #ifdef __powerpc64__
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	__kernel_time_t msg_ctime;	/* last change time */
+	long		 msg_stime;	/* last msgsnd time */
+	long		 msg_rtime;	/* last msgrcv time */
+	long		 msg_ctime;	/* last change time */
 #else
 	unsigned long  msg_stime_high;
 	unsigned long  msg_stime;	/* last msgsnd time */
diff --git a/arch/powerpc/include/uapi/asm/sembuf.h b/arch/powerpc/include/uapi/asm/sembuf.h
index 3f60946f77e3..85e96ccb5f0f 100644
--- a/arch/powerpc/include/uapi/asm/sembuf.h
+++ b/arch/powerpc/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_SEMBUF_H
 #define _ASM_POWERPC_SEMBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -26,8 +28,8 @@ struct semid64_ds {
 	unsigned long	sem_ctime_high;
 	unsigned long	sem_ctime;	/* last change time */
 #else
-	__kernel_time_t	sem_otime;	/* last semop time */
-	__kernel_time_t	sem_ctime;	/* last change time */
+	long		sem_otime;	/* last semop time */
+	long		sem_ctime;	/* last change time */
 #endif
 	unsigned long	sem_nsems;	/* no. of semaphores in array */
 	unsigned long	__unused3;
diff --git a/arch/powerpc/include/uapi/asm/shmbuf.h b/arch/powerpc/include/uapi/asm/shmbuf.h
index b591c4d7e4c5..00422b2f3c63 100644
--- a/arch/powerpc/include/uapi/asm/shmbuf.h
+++ b/arch/powerpc/include/uapi/asm/shmbuf.h
@@ -22,9 +22,9 @@
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
 #ifdef __powerpc64__
-	__kernel_time_t		shm_atime;	/* last attach time */
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	__kernel_time_t		shm_ctime;	/* last change time */
+	long		shm_atime;	/* last attach time */
+	long		shm_dtime;	/* last detach time */
+	long		shm_ctime;	/* last change time */
 #else
 	unsigned long		shm_atime_high;
 	unsigned long		shm_atime;	/* last attach time */
diff --git a/arch/powerpc/include/uapi/asm/spu_info.h b/arch/powerpc/include/uapi/asm/spu_info.h
index cabfcbba9eac..45f97150587b 100644
--- a/arch/powerpc/include/uapi/asm/spu_info.h
+++ b/arch/powerpc/include/uapi/asm/spu_info.h
@@ -5,20 +5,6 @@
  * (C) Copyright 2006 IBM Corp.
  *
  * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef _UAPI_SPU_INFO_H
diff --git a/arch/powerpc/include/uapi/asm/stat.h b/arch/powerpc/include/uapi/asm/stat.h
index afd25f2ff4e8..7871055e5e32 100644
--- a/arch/powerpc/include/uapi/asm/stat.h
+++ b/arch/powerpc/include/uapi/asm/stat.h
@@ -40,7 +40,7 @@ struct stat {
 	uid_t		st_uid;
 	gid_t		st_gid;
 	unsigned long	st_rdev;
-	off_t		st_size;
+	long		st_size;
 	unsigned long	st_blksize;
 	unsigned long	st_blocks;
 	unsigned long	st_atime;
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index a7ca8fe62368..157b0147921f 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -5,9 +5,6 @@
 
 CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
-# Disable clang warning for using setjmp without setjmp.h header
-CFLAGS_crash.o		+= $(call cc-disable-warning, builtin-requires-header)
-
 ifdef CONFIG_PPC64
 CFLAGS_prom_init.o	+= $(NO_MINIMAL_TOC)
 endif
@@ -22,6 +19,8 @@ CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
 CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
 
 CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_prom_init.o += -ffreestanding
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace early boot code
@@ -39,7 +38,6 @@ KASAN_SANITIZE_btext.o := n
 ifdef CONFIG_KASAN
 CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
-CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
 endif
 
@@ -78,9 +76,8 @@ obj-$(CONFIG_EEH)              += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
 				  eeh_driver.o eeh_event.o eeh_sysfs.o
 obj-$(CONFIG_GENERIC_TBSYNC)	+= smp-tbsync.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
-ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),)
-obj-y				+= fadump.o
-endif
+obj-$(CONFIG_FA_DUMP)		+= fadump.o
+obj-$(CONFIG_PRESERVE_FA_DUMP)	+= fadump.o
 ifdef CONFIG_PPC32
 obj-$(CONFIG_E500)		+= idle_e500.o
 endif
@@ -126,14 +123,6 @@ pci64-$(CONFIG_PPC64)		+= pci_dn.o pci-hotplug.o isa-bridge.o
 obj-$(CONFIG_PCI)		+= pci_$(BITS).o $(pci64-y) \
 				   pci-common.o pci_of_scan.o
 obj-$(CONFIG_PCI_MSI)		+= msi.o
-obj-$(CONFIG_KEXEC_CORE)	+= machine_kexec.o crash.o \
-				   machine_kexec_$(BITS).o
-obj-$(CONFIG_KEXEC_FILE)	+= machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o
-ifdef CONFIG_HAVE_IMA_KEXEC
-ifdef CONFIG_IMA
-obj-y				+= ima_kexec.o
-endif
-endif
 
 obj-$(CONFIG_AUDIT)		+= audit.o
 obj64-$(CONFIG_AUDIT)		+= compat_audit.o
@@ -161,16 +150,13 @@ ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
 obj-y				+= ucall.o
 endif
 
+obj-$(CONFIG_PPC_SECURE_BOOT)	+= secure_boot.o ima_arch.o secvar-ops.o
+obj-$(CONFIG_PPC_SECVAR_SYSFS)	+= secvar-sysfs.o
+
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_prom_init.o := n
 KCOV_INSTRUMENT_prom_init.o := n
 UBSAN_SANITIZE_prom_init.o := n
-GCOV_PROFILE_machine_kexec_64.o := n
-KCOV_INSTRUMENT_machine_kexec_64.o := n
-UBSAN_SANITIZE_machine_kexec_64.o := n
-GCOV_PROFILE_machine_kexec_32.o := n
-KCOV_INSTRUMENT_machine_kexec_32.o := n
-UBSAN_SANITIZE_machine_kexec_32.o := n
 GCOV_PROFILE_kprobes.o := n
 KCOV_INSTRUMENT_kprobes.o := n
 UBSAN_SANITIZE_kprobes.o := n
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 484f54dab247..3d47aec7becf 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -385,28 +385,25 @@ int main(void)
 	OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32);
 	OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec);
 	OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
-	OFFSET(STAMP_XTIME, vdso_data, stamp_xtime);
+	OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec);
+	OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec);
 	OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
+	OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
 	OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
 	OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
 	OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
 	OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size);
 #ifdef CONFIG_PPC64
 	OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
-	OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
-	OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
+	OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec);
+	OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec);
+#endif
+	OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec);
+	OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec);
 	OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
 	OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
-	OFFSET(TSPC64_TV_SEC, timespec, tv_sec);
-	OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec);
 	OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
 	OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
-#else
-	OFFSET(TVAL32_TV_SEC, timeval, tv_sec);
-	OFFSET(TVAL32_TV_USEC, timeval, tv_usec);
-	OFFSET(TSPC32_TV_SEC, timespec, tv_sec);
-	OFFSET(TSPC32_TV_NSEC, timespec, tv_nsec);
-#endif
 	/* timeval/timezone offsets for use by vdso */
 	OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest);
 	OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime);
@@ -417,7 +414,6 @@ int main(void)
 	DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
 	DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
 	DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
-	DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
 
 #ifdef CONFIG_BUG
 	DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 2b4f3ec0acf7..1d308780e0d3 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -231,7 +231,7 @@ _GLOBAL(__setup_cpu_e5500)
 	blr
 #endif
 
-/* flush L1 date cache, it can apply to e500v2, e500mc and e5500 */
+/* flush L1 data cache, it can apply to e500v2, e500mc and e5500 */
 _GLOBAL(flush_dcache_L1)
 	mfmsr	r10
 	wrteei	0
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index 5f66b95b6858..cc14aa6c4a1b 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -30,10 +30,10 @@ int set_dawr(struct arch_hw_breakpoint *brk)
 	 * DAWR length is stored in field MDR bits 48:53.  Matches range in
 	 * doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
 	 * 0b111111=64DW.
-	 * brk->len is in bytes.
+	 * brk->hw_len is in bytes.
 	 * This aligns up to double word size, shifts and does the bias.
 	 */
-	mrd = ((brk->len + 7) >> 3) - 1;
+	mrd = ((brk->hw_len + 7) >> 3) - 1;
 	dawrx |= (mrd & 0x3f) << (63 - 53);
 
 	if (ppc_md.set_dawr)
@@ -54,7 +54,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
 				    const char __user *user_buf,
 				    size_t count, loff_t *ppos)
 {
-	struct arch_hw_breakpoint null_brk = {0, 0, 0};
+	struct arch_hw_breakpoint null_brk = {0};
 	size_t rc;
 
 	/* Send error to user if they hypervisor won't allow us to write DAWR */
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
index 3482118ffe76..ef2ad4945904 100644
--- a/arch/powerpc/kernel/early_32.c
+++ b/arch/powerpc/kernel/early_32.c
@@ -19,10 +19,13 @@
  */
 notrace unsigned long __init early_init(unsigned long dt_ptr)
 {
-	unsigned long offset = reloc_offset();
+	unsigned long kva, offset = reloc_offset();
+
+	kva = *PTRRELOC(&kernstart_virt_addr);
 
 	/* First zero the BSS */
-	memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
+	if (kva == KERNELBASE)
+		memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
 
 	/*
 	 * Identify the CPU type and fix up code sections
@@ -32,5 +35,5 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
 
 	apply_feature_fixups();
 
-	return KERNELBASE + offset;
+	return kva + offset;
 }
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index d9279d0ee9f5..3dd1a422fc29 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -1,25 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
  * Copyright IBM Corp. 2004 2005
  * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
  *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
  * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  */
 #include <linux/delay.h>
@@ -897,12 +881,12 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 
 	/* Log the event */
 	if (pe->type & EEH_PE_PHB) {
-		pr_err("EEH: PHB#%x failure detected, location: %s\n",
+		pr_err("EEH: Recovering PHB#%x, location: %s\n",
 			pe->phb->global_number, eeh_pe_loc_get(pe));
 	} else {
 		struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
 
-		pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+		pr_err("EEH: Recovering PHB#%x-PE#%x\n",
 		       pe->phb->global_number, pe->addr);
 		pr_err("EEH: PE location: %s, PHB location: %s\n",
 		       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 3fa04dda1737..ab44d965a53c 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -1,25 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
  * Copyright IBM Corporation 2007
  * Copyright Linas Vepstas <linas@austin.ibm.com> 2007
  *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
  * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  */
 #include <linux/pci.h>
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index d60908ea37fb..e1a4c39b83b8 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -897,7 +897,7 @@ resume_kernel:
 	bne-	0b
 1:
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	/* check current_thread_info->preempt_count */
 	lwz	r0,TI_PREEMPT(r2)
 	cmpwi	0,r0,0		/* if non-zero, just restore regs and return */
@@ -921,7 +921,7 @@ resume_kernel:
 	 */
 	bl	trace_hardirqs_on
 #endif
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 restore_kuap:
 	kuap_restore r1, r2, r9, r10, r0
 
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6467bdab8d40..a9a1d3cdb523 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -537,6 +537,7 @@ flush_count_cache:
 	/* Save LR into r9 */
 	mflr	r9
 
+	// Flush the link stack
 	.rept 64
 	bl	.+4
 	.endr
@@ -546,6 +547,11 @@ flush_count_cache:
 	.balign 32
 	/* Restore LR */
 1:	mtlr	r9
+
+	// If we're just flushing the link stack, return here
+3:	nop
+	patch_site 3b patch__flush_link_stack_return
+
 	li	r9,0x7fff
 	mtctr	r9
 
@@ -840,7 +846,7 @@ resume_kernel:
 	bne-	0b
 1:
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	/* Check if we need to preempt */
 	andi.	r0,r4,_TIF_NEED_RESCHED
 	beq+	restore
@@ -871,7 +877,7 @@ resume_kernel:
 	li	r10,MSR_RI
 	mtmsrd	r10,1		  /* Update machine state */
 #endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 	.globl	fast_exc_return_irq
 fast_exc_return_irq:
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 829950b96d29..e4076e3c072d 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1346,16 +1346,6 @@ skpinv:	addi	r6,r6,1				/* Increment */
 	sync
 	isync
 
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED	MAS2_M
-#else
-#define M_IF_NEEDED	0
-#endif
-
 /* 6. Setup KERNELBASE mapping in TLB[0]
  *
  * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
@@ -1368,7 +1358,7 @@ skpinv:	addi	r6,r6,1				/* Increment */
 	ori	r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
 	mtspr	SPRN_MAS1,r6
 
-	LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_NEEDED)
+	LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | MAS2_M_IF_NEEDED)
 	mtspr	SPRN_MAS2,r6
 
 	rlwinm	r5,r5,0,0,25
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index d0018dd17e0a..46508b148e16 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -514,7 +514,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
  * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
  * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
  */
-.macro INT_COMMON vec, area, stack, kaup, reconcile, dar, dsisr
+.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr
 	.if \stack
 	andi.	r10,r12,MSR_PR		/* See if coming from user	*/
 	mr	r10,r1			/* Save r1			*/
@@ -533,7 +533,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
 	std	r10,GPR1(r1)		/* save r1 in stackframe	*/
 
 	.if \stack
-	.if \kaup
+	.if \kuap
 	kuap_save_amr_and_lock r9, r10, cr1, cr0
 	.endif
 	beq	101f			/* if from kernel mode		*/
@@ -541,7 +541,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
 	SAVE_PPR(\area, r9)
 101:
 	.else
-	.if \kaup
+	.if \kuap
 	kuap_save_amr_and_lock r9, r10, cr1
 	.endif
 	.endif
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index ed59855430b9..ff0114aeba9b 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1466,16 +1466,15 @@ static void fadump_init_files(void)
  */
 int __init setup_fadump(void)
 {
-	if (!fw_dump.fadump_enabled)
-		return 0;
-
-	if (!fw_dump.fadump_supported) {
-		printk(KERN_ERR "Firmware-assisted dump is not supported on"
-			" this hardware\n");
+	if (!fw_dump.fadump_supported)
 		return 0;
-	}
 
+	fadump_init_files();
 	fadump_show_config();
+
+	if (!fw_dump.fadump_enabled)
+		return 1;
+
 	/*
 	 * If dump data is available then see if it is valid and prepare for
 	 * saving it to the disk.
@@ -1492,8 +1491,6 @@ int __init setup_fadump(void)
 	else if (fw_dump.reserve_dump_area_size)
 		fw_dump.ops->fadump_init_mem_struct(&fw_dump);
 
-	fadump_init_files();
-
 	return 1;
 }
 subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index ea065282b303..8bccce6544b5 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -153,35 +153,24 @@ skpinv:	addi	r6,r6,1				/* Increment */
 	tlbivax 0,r9
 	TLBSYNC
 
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED	MAS2_M
-#else
-#define M_IF_NEEDED	0
-#endif
-
 #if defined(ENTRY_MAPPING_BOOT_SETUP)
 
-/* 6. Setup KERNELBASE mapping in TLB1[0] */
+/* 6. Setup kernstart_virt_addr mapping in TLB1[0] */
 	lis	r6,0x1000		/* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
 	mtspr	SPRN_MAS0,r6
 	lis	r6,(MAS1_VALID|MAS1_IPROT)@h
 	ori	r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
 	mtspr	SPRN_MAS1,r6
-	lis	r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@h
-	ori	r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@l
+	lis	r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+	ori	r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+	and	r6,r6,r20
+	ori	r6,r6,MAS2_M_IF_NEEDED@l
 	mtspr	SPRN_MAS2,r6
 	mtspr	SPRN_MAS3,r8
 	tlbwe
 
-/* 7. Jump to KERNELBASE mapping */
-	lis	r6,(KERNELBASE & ~0xfff)@h
-	ori	r6,r6,(KERNELBASE & ~0xfff)@l
-	rlwinm	r7,r25,0,0x03ffffff
-	add	r6,r7,r6
+/* 7. Jump to kernstart_virt_addr mapping */
+	mr	r6,r20
 
 #elif defined(ENTRY_MAPPING_KEXEC_SETUP)
 /*
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index adf0505dbe02..6f7a3a7162c5 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -155,6 +155,8 @@ _ENTRY(_start);
  */
 
 _ENTRY(__early_start)
+	LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr)
+	lwz     r20,0(r20)
 
 #define ENTRY_MAPPING_BOOT_SETUP
 #include "fsl_booke_entry_mapping.S"
@@ -238,6 +240,9 @@ set_ivor:
 
 	bl	early_init
 
+#ifdef CONFIG_KASAN
+	bl	kasan_early_init
+#endif
 #ifdef CONFIG_RELOCATABLE
 	mr	r3,r30
 	mr	r4,r31
@@ -264,9 +269,6 @@ set_ivor:
 /*
  * Decide what sort of machine this is and initialize the MMU.
  */
-#ifdef CONFIG_KASAN
-	bl	kasan_early_init
-#endif
 	mr	r3,r30
 	mr	r4,r31
 	bl	machine_init
@@ -277,8 +279,8 @@ set_ivor:
 	ori	r6, r6, swapper_pg_dir@l
 	lis	r5, abatron_pteptrs@h
 	ori	r5, r5, abatron_pteptrs@l
-	lis	r4, KERNELBASE@h
-	ori	r4, r4, KERNELBASE@l
+	lis     r3, kernstart_virt_addr@ha
+	lwz     r4, kernstart_virt_addr@l(r3)
 	stw	r5, 0(r4)	/* Save abatron_pteptrs at a fixed location */
 	stw	r6, 0(r5)
 
@@ -1067,7 +1069,12 @@ __secondary_start:
 	mr	r5,r25		/* phys kernel start */
 	rlwinm	r5,r5,0,~0x3ffffff	/* aligned 64M */
 	subf	r4,r5,r4	/* memstart_addr - phys kernel start */
-	li	r5,0		/* no device tree */
+	lis	r7,KERNELBASE@h
+	ori	r7,r7,KERNELBASE@l
+	cmpw	r20,r7		/* if kernstart_virt_addr != KERNELBASE, randomized */
+	beq	2f
+	li	r4,0
+2:	li	r5,0		/* no device tree */
 	li	r6,0		/* not boot cpu */
 	bl	restore_to_as0
 
@@ -1115,6 +1122,54 @@ __secondary_hold_acknowledge:
 #endif
 
 /*
+ * Create a 64M tlb by address and entry
+ * r3 - entry
+ * r4 - virtual address
+ * r5/r6 - physical address
+ */
+_GLOBAL(create_kaslr_tlb_entry)
+	lis     r7,0x1000               /* Set MAS0(TLBSEL) = 1 */
+	rlwimi  r7,r3,16,4,15           /* Setup MAS0 = TLBSEL | ESEL(r6) */
+	mtspr   SPRN_MAS0,r7            /* Write MAS0 */
+
+	lis     r3,(MAS1_VALID|MAS1_IPROT)@h
+	ori     r3,r3,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+	mtspr   SPRN_MAS1,r3            /* Write MAS1 */
+
+	lis     r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+	ori     r3,r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+	and     r3,r3,r4
+	ori	r3,r3,MAS2_M_IF_NEEDED@l
+	mtspr   SPRN_MAS2,r3            /* Write MAS2(EPN) */
+
+#ifdef CONFIG_PHYS_64BIT
+	ori     r8,r6,(MAS3_SW|MAS3_SR|MAS3_SX)
+	mtspr   SPRN_MAS3,r8            /* Write MAS3(RPN) */
+	mtspr	SPRN_MAS7,r5
+#else
+	ori     r8,r5,(MAS3_SW|MAS3_SR|MAS3_SX)
+	mtspr   SPRN_MAS3,r8            /* Write MAS3(RPN) */
+#endif
+
+	tlbwe                           /* Write TLB */
+	isync
+	sync
+	blr
+
+/*
+ * Return to the start of the relocated kernel and run again
+ * r3 - virtual address of fdt
+ * r4 - entry of the kernel
+ */
+_GLOBAL(reloc_kernel_entry)
+	mfmsr	r7
+	rlwinm	r7, r7, 0, ~(MSR_IS | MSR_DS)
+
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r7
+	rfi
+
+/*
  * Create a tlb entry with the same effective and physical address as
  * the tlb entry used by the current running code. But set the TS to 1.
  * Then switch to the address space 1. It will return with the r3 set to
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 1007ec36b4cb..58ce3d37c2a3 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -127,15 +127,58 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
 }
 
 /*
+ * Watchpoint match range is always doubleword(8 bytes) aligned on
+ * powerpc. If the given range is crossing doubleword boundary, we
+ * need to increase the length such that next doubleword also get
+ * covered. Ex,
+ *
+ *          address   len = 6 bytes
+ *                |=========.
+ *   |------------v--|------v--------|
+ *   | | | | | | | | | | | | | | | | |
+ *   |---------------|---------------|
+ *    <---8 bytes--->
+ *
+ * In this case, we should configure hw as:
+ *   start_addr = address & ~HW_BREAKPOINT_ALIGN
+ *   len = 16 bytes
+ *
+ * @start_addr and @end_addr are inclusive.
+ */
+static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
+{
+	u16 max_len = DABR_MAX_LEN;
+	u16 hw_len;
+	unsigned long start_addr, end_addr;
+
+	start_addr = hw->address & ~HW_BREAKPOINT_ALIGN;
+	end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN;
+	hw_len = end_addr - start_addr + 1;
+
+	if (dawr_enabled()) {
+		max_len = DAWR_MAX_LEN;
+		/* DAWR region can't cross 512 bytes boundary */
+		if ((start_addr >> 9) != (end_addr >> 9))
+			return -EINVAL;
+	}
+
+	if (hw_len > max_len)
+		return -EINVAL;
+
+	hw->hw_len = hw_len;
+	return 0;
+}
+
+/*
  * Validate the arch-specific HW Breakpoint register settings
  */
 int hw_breakpoint_arch_parse(struct perf_event *bp,
 			     const struct perf_event_attr *attr,
 			     struct arch_hw_breakpoint *hw)
 {
-	int ret = -EINVAL, length_max;
+	int ret = -EINVAL;
 
-	if (!bp)
+	if (!bp || !attr->bp_len)
 		return ret;
 
 	hw->type = HW_BRK_TYPE_TRANSLATE;
@@ -155,26 +198,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
 	hw->address = attr->bp_addr;
 	hw->len = attr->bp_len;
 
-	/*
-	 * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
-	 * and breakpoint addresses are aligned to nearest double-word
-	 * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
-	 * 'symbolsize' should satisfy the check below.
-	 */
 	if (!ppc_breakpoint_available())
 		return -ENODEV;
-	length_max = 8; /* DABR */
-	if (dawr_enabled()) {
-		length_max = 512 ; /* 64 doublewords */
-		/* DAWR region can't cross 512 boundary */
-		if ((attr->bp_addr >> 9) !=
-		    ((attr->bp_addr + attr->bp_len - 1) >> 9))
-			return -EINVAL;
-	}
-	if (hw->len >
-	    (length_max - (hw->address & HW_BREAKPOINT_ALIGN)))
-		return -EINVAL;
-	return 0;
+
+	return hw_breakpoint_validate_len(hw);
 }
 
 /*
@@ -195,33 +222,49 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
 	tsk->thread.last_hit_ubp = NULL;
 }
 
-static bool is_larx_stcx_instr(struct pt_regs *regs, unsigned int instr)
+static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info)
 {
-	int ret, type;
-	struct instruction_op op;
+	return ((info->address <= dar) && (dar - info->address < info->len));
+}
 
-	ret = analyse_instr(&op, regs, instr);
-	type = GETTYPE(op.type);
-	return (!ret && (type == LARX || type == STCX));
+static bool
+dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info)
+{
+	return ((dar <= info->address + info->len - 1) &&
+		(dar + size - 1 >= info->address));
 }
 
 /*
  * Handle debug exception notifications.
  */
 static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
-			     unsigned long addr)
+			     struct arch_hw_breakpoint *info)
 {
 	unsigned int instr = 0;
+	int ret, type, size;
+	struct instruction_op op;
+	unsigned long addr = info->address;
 
 	if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
 		goto fail;
 
-	if (is_larx_stcx_instr(regs, instr)) {
+	ret = analyse_instr(&op, regs, instr);
+	type = GETTYPE(op.type);
+	size = GETSIZE(op.type);
+
+	if (!ret && (type == LARX || type == STCX)) {
 		printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
 				   " Breakpoint at 0x%lx will be disabled.\n", addr);
 		goto disable;
 	}
 
+	/*
+	 * If it's extraneous event, we still need to emulate/single-
+	 * step the instruction, but we don't generate an event.
+	 */
+	if (size && !dar_range_overlaps(regs->dar, size, info))
+		info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+
 	/* Do not emulate user-space instructions, instead single-step them */
 	if (user_mode(regs)) {
 		current->thread.last_hit_ubp = bp;
@@ -253,7 +296,6 @@ int hw_breakpoint_handler(struct die_args *args)
 	struct perf_event *bp;
 	struct pt_regs *regs = args->regs;
 	struct arch_hw_breakpoint *info;
-	unsigned long dar = regs->dar;
 
 	/* Disable breakpoints during exception handling */
 	hw_breakpoint_disable();
@@ -285,19 +327,14 @@ int hw_breakpoint_handler(struct die_args *args)
 		goto out;
 	}
 
-	/*
-	 * Verify if dar lies within the address range occupied by the symbol
-	 * being watched to filter extraneous exceptions.  If it doesn't,
-	 * we still need to single-step the instruction, but we don't
-	 * generate an event.
-	 */
 	info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
-	if (!((bp->attr.bp_addr <= dar) &&
-	      (dar - bp->attr.bp_addr < bp->attr.bp_len)))
-		info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-
-	if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info->address))
-		goto out;
+	if (IS_ENABLED(CONFIG_PPC_8xx)) {
+		if (!dar_within_range(regs->dar, info))
+			info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+	} else {
+		if (!stepping_handler(regs, bp, info))
+			goto out;
+	}
 
 	/*
 	 * As a policy, the callback is invoked in a 'trigger-after-execute'
diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c
new file mode 100644
index 000000000000..e34116255ced
--- /dev/null
+++ b/arch/powerpc/kernel/ima_arch.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+
+#include <linux/ima.h>
+#include <asm/secure_boot.h>
+
+bool arch_ima_get_secureboot(void)
+{
+	return is_ppc_secureboot_enabled();
+}
+
+/*
+ * The "secure_rules" are enabled only on "secureboot" enabled systems.
+ * These rules verify the file signatures against known good values.
+ * The "appraise_type=imasig|modsig" option allows the known good signature
+ * to be stored as an xattr or as an appended signature.
+ *
+ * To avoid duplicate signature verification as much as possible, the IMA
+ * policy rule for module appraisal is added only if CONFIG_MODULE_SIG_FORCE
+ * is not enabled.
+ */
+static const char *const secure_rules[] = {
+	"appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG_FORCE
+	"appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#endif
+	NULL
+};
+
+/*
+ * The "trusted_rules" are enabled only on "trustedboot" enabled systems.
+ * These rules add the kexec kernel image and kernel modules file hashes to
+ * the IMA measurement list.
+ */
+static const char *const trusted_rules[] = {
+	"measure func=KEXEC_KERNEL_CHECK",
+	"measure func=MODULE_CHECK",
+	NULL
+};
+
+/*
+ * The "secure_and_trusted_rules" contains rules for both the secure boot and
+ * trusted boot. The "template=ima-modsig" option includes the appended
+ * signature, when available, in the IMA measurement list.
+ */
+static const char *const secure_and_trusted_rules[] = {
+	"measure func=KEXEC_KERNEL_CHECK template=ima-modsig",
+	"measure func=MODULE_CHECK template=ima-modsig",
+	"appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG_FORCE
+	"appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#endif
+	NULL
+};
+
+/*
+ * Returns the relevant IMA arch-specific policies based on the system secure
+ * boot state.
+ */
+const char *const *arch_get_ima_policy(void)
+{
+	if (is_ppc_secureboot_enabled()) {
+		if (IS_ENABLED(CONFIG_MODULE_SIG))
+			set_module_sig_enforced();
+
+		if (is_ppc_trustedboot_enabled())
+			return secure_and_trusted_rules;
+		else
+			return secure_rules;
+	} else if (is_ppc_trustedboot_enabled()) {
+		return trusted_rules;
+	}
+
+	return NULL;
+}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5645bc9cbc09..add67498c126 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -619,8 +619,6 @@ void __do_irq(struct pt_regs *regs)
 
 	trace_irq_entry(regs);
 
-	check_stack_overflow();
-
 	/*
 	 * Query the platform PIC for the interrupt & ack it.
 	 *
@@ -652,6 +650,8 @@ void do_IRQ(struct pt_regs *regs)
 	irqsp = hardirq_ctx[raw_smp_processor_id()];
 	sirqsp = softirq_ctx[raw_smp_processor_id()];
 
+	check_stack_overflow();
+
 	/* Already there ? */
 	if (unlikely(cursp == irqsp || cursp == sirqsp)) {
 		__do_irq(regs);
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 7cea5978f21f..f061e06e9f51 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -479,8 +479,10 @@ static void __init fixup_port_irq(int index,
 	port->irq = virq;
 
 #ifdef CONFIG_SERIAL_8250_FSL
-	if (of_device_is_compatible(np, "fsl,ns16550"))
+	if (of_device_is_compatible(np, "fsl,ns16550")) {
 		port->handle_irq = fsl8250_handle_irq;
+		port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
+	}
 #endif
 }
 
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
deleted file mode 100644
index c4ed328a7b96..000000000000
--- a/arch/powerpc/kernel/machine_kexec.c
+++ /dev/null
@@ -1,279 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Code to handle transition of Linux booting another kernel.
- *
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * Copyright (C) 2005 IBM Corporation.
- */
-
-#include <linux/kexec.h>
-#include <linux/reboot.h>
-#include <linux/threads.h>
-#include <linux/memblock.h>
-#include <linux/of.h>
-#include <linux/irq.h>
-#include <linux/ftrace.h>
-
-#include <asm/kdump.h>
-#include <asm/machdep.h>
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/sections.h>
-
-void machine_kexec_mask_interrupts(void) {
-	unsigned int i;
-	struct irq_desc *desc;
-
-	for_each_irq_desc(i, desc) {
-		struct irq_chip *chip;
-
-		chip = irq_desc_get_chip(desc);
-		if (!chip)
-			continue;
-
-		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
-			chip->irq_eoi(&desc->irq_data);
-
-		if (chip->irq_mask)
-			chip->irq_mask(&desc->irq_data);
-
-		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
-			chip->irq_disable(&desc->irq_data);
-	}
-}
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
-	default_machine_crash_shutdown(regs);
-}
-
-/*
- * Do what every setup is needed on image and the
- * reboot code buffer to allow us to avoid allocations
- * later.
- */
-int machine_kexec_prepare(struct kimage *image)
-{
-	if (ppc_md.machine_kexec_prepare)
-		return ppc_md.machine_kexec_prepare(image);
-	else
-		return default_machine_kexec_prepare(image);
-}
-
-void machine_kexec_cleanup(struct kimage *image)
-{
-}
-
-void arch_crash_save_vmcoreinfo(void)
-{
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-	VMCOREINFO_SYMBOL(node_data);
-	VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-	VMCOREINFO_SYMBOL(contig_page_data);
-#endif
-#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
-	VMCOREINFO_SYMBOL(vmemmap_list);
-	VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
-	VMCOREINFO_SYMBOL(mmu_psize_defs);
-	VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
-	VMCOREINFO_OFFSET(vmemmap_backing, list);
-	VMCOREINFO_OFFSET(vmemmap_backing, phys);
-	VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
-	VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
-	VMCOREINFO_OFFSET(mmu_psize_def, shift);
-#endif
-}
-
-/*
- * Do not allocate memory (or fail in any way) in machine_kexec().
- * We are past the point of no return, committed to rebooting now.
- */
-void machine_kexec(struct kimage *image)
-{
-	int save_ftrace_enabled;
-
-	save_ftrace_enabled = __ftrace_enabled_save();
-	this_cpu_disable_ftrace();
-
-	if (ppc_md.machine_kexec)
-		ppc_md.machine_kexec(image);
-	else
-		default_machine_kexec(image);
-
-	this_cpu_enable_ftrace();
-	__ftrace_enabled_restore(save_ftrace_enabled);
-
-	/* Fall back to normal restart if we're still alive. */
-	machine_restart(NULL);
-	for(;;);
-}
-
-void __init reserve_crashkernel(void)
-{
-	unsigned long long crash_size, crash_base;
-	int ret;
-
-	/* use common parsing */
-	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
-			&crash_size, &crash_base);
-	if (ret == 0 && crash_size > 0) {
-		crashk_res.start = crash_base;
-		crashk_res.end = crash_base + crash_size - 1;
-	}
-
-	if (crashk_res.end == crashk_res.start) {
-		crashk_res.start = crashk_res.end = 0;
-		return;
-	}
-
-	/* We might have got these values via the command line or the
-	 * device tree, either way sanitise them now. */
-
-	crash_size = resource_size(&crashk_res);
-
-#ifndef CONFIG_NONSTATIC_KERNEL
-	if (crashk_res.start != KDUMP_KERNELBASE)
-		printk("Crash kernel location must be 0x%x\n",
-				KDUMP_KERNELBASE);
-
-	crashk_res.start = KDUMP_KERNELBASE;
-#else
-	if (!crashk_res.start) {
-#ifdef CONFIG_PPC64
-		/*
-		 * On 64bit we split the RMO in half but cap it at half of
-		 * a small SLB (128MB) since the crash kernel needs to place
-		 * itself and some stacks to be in the first segment.
-		 */
-		crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
-#else
-		crashk_res.start = KDUMP_KERNELBASE;
-#endif
-	}
-
-	crash_base = PAGE_ALIGN(crashk_res.start);
-	if (crash_base != crashk_res.start) {
-		printk("Crash kernel base must be aligned to 0x%lx\n",
-				PAGE_SIZE);
-		crashk_res.start = crash_base;
-	}
-
-#endif
-	crash_size = PAGE_ALIGN(crash_size);
-	crashk_res.end = crashk_res.start + crash_size - 1;
-
-	/* The crash region must not overlap the current kernel */
-	if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
-		printk(KERN_WARNING
-			"Crash kernel can not overlap current kernel\n");
-		crashk_res.start = crashk_res.end = 0;
-		return;
-	}
-
-	/* Crash kernel trumps memory limit */
-	if (memory_limit && memory_limit <= crashk_res.end) {
-		memory_limit = crashk_res.end + 1;
-		printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
-		       memory_limit);
-	}
-
-	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
-			"for crashkernel (System RAM: %ldMB)\n",
-			(unsigned long)(crash_size >> 20),
-			(unsigned long)(crashk_res.start >> 20),
-			(unsigned long)(memblock_phys_mem_size() >> 20));
-
-	if (!memblock_is_region_memory(crashk_res.start, crash_size) ||
-	    memblock_reserve(crashk_res.start, crash_size)) {
-		pr_err("Failed to reserve memory for crashkernel!\n");
-		crashk_res.start = crashk_res.end = 0;
-		return;
-	}
-}
-
-int overlaps_crashkernel(unsigned long start, unsigned long size)
-{
-	return (start + size) > crashk_res.start && start <= crashk_res.end;
-}
-
-/* Values we need to export to the second kernel via the device tree. */
-static phys_addr_t kernel_end;
-static phys_addr_t crashk_base;
-static phys_addr_t crashk_size;
-static unsigned long long mem_limit;
-
-static struct property kernel_end_prop = {
-	.name = "linux,kernel-end",
-	.length = sizeof(phys_addr_t),
-	.value = &kernel_end,
-};
-
-static struct property crashk_base_prop = {
-	.name = "linux,crashkernel-base",
-	.length = sizeof(phys_addr_t),
-	.value = &crashk_base
-};
-
-static struct property crashk_size_prop = {
-	.name = "linux,crashkernel-size",
-	.length = sizeof(phys_addr_t),
-	.value = &crashk_size,
-};
-
-static struct property memory_limit_prop = {
-	.name = "linux,memory-limit",
-	.length = sizeof(unsigned long long),
-	.value = &mem_limit,
-};
-
-#define cpu_to_be_ulong	__PASTE(cpu_to_be, BITS_PER_LONG)
-
-static void __init export_crashk_values(struct device_node *node)
-{
-	/* There might be existing crash kernel properties, but we can't
-	 * be sure what's in them, so remove them. */
-	of_remove_property(node, of_find_property(node,
-				"linux,crashkernel-base", NULL));
-	of_remove_property(node, of_find_property(node,
-				"linux,crashkernel-size", NULL));
-
-	if (crashk_res.start != 0) {
-		crashk_base = cpu_to_be_ulong(crashk_res.start),
-		of_add_property(node, &crashk_base_prop);
-		crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
-		of_add_property(node, &crashk_size_prop);
-	}
-
-	/*
-	 * memory_limit is required by the kexec-tools to limit the
-	 * crash regions to the actual memory used.
-	 */
-	mem_limit = cpu_to_be_ulong(memory_limit);
-	of_update_property(node, &memory_limit_prop);
-}
-
-static int __init kexec_setup(void)
-{
-	struct device_node *node;
-
-	node = of_find_node_by_path("/chosen");
-	if (!node)
-		return -ENOENT;
-
-	/* remove any stale properties so ours can be found */
-	of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL));
-
-	/* information needed by userspace when using default_machine_kexec */
-	kernel_end = cpu_to_be_ulong(__pa(_end));
-	of_add_property(node, &kernel_end_prop);
-
-	export_crashk_values(node);
-
-	of_node_put(node);
-	return 0;
-}
-late_initcall(kexec_setup);
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 82df4b09e79f..d80212be8698 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -6,11 +6,6 @@
  * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
  * and Paul Mackerras.
  *
- * kexec bits:
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * PPC44x port. Copyright (C) 2011,  IBM Corporation
- * 		Author: Suzuki Poulose <suzuki@in.ibm.com>
  */
 
 #include <linux/sys.h>
@@ -25,7 +20,6 @@
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 #include <asm/processor.h>
-#include <asm/kexec.h>
 #include <asm/bug.h>
 #include <asm/ptrace.h>
 #include <asm/export.h>
@@ -317,126 +311,6 @@ EXPORT_SYMBOL(flush_instruction_cache)
 #endif /* CONFIG_PPC_8xx */
 
 /*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- * This is a no-op on the 601.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- */
-_GLOBAL(flush_icache_range)
-#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
-	PURGE_PREFETCHED_INS
-	blr				/* for 601 and e200, do nothing */
-#else
-	rlwinm	r3,r3,0,0,31 - L1_CACHE_SHIFT
-	subf	r4,r3,r4
-	addi	r4,r4,L1_CACHE_BYTES - 1
-	srwi.	r4,r4,L1_CACHE_SHIFT
-	beqlr
-	mtctr	r4
-	mr	r6,r3
-1:	dcbst	0,r3
-	addi	r3,r3,L1_CACHE_BYTES
-	bdnz	1b
-	sync				/* wait for dcbst's to get to ram */
-#ifndef CONFIG_44x
-	mtctr	r4
-2:	icbi	0,r6
-	addi	r6,r6,L1_CACHE_BYTES
-	bdnz	2b
-#else
-	/* Flash invalidate on 44x because we are passed kmapped addresses and
-	   this doesn't work for userspace pages due to the virtually tagged
-	   icache.  Sigh. */
-	iccci	0, r0
-#endif
-	sync				/* additional sync needed on g4 */
-	isync
-	blr
-#endif
-_ASM_NOKPROBE_SYMBOL(flush_icache_range)
-EXPORT_SYMBOL(flush_icache_range)
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- * This is a no-op on the 601 and e200 which have a unified cache.
- *
- *	void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
-	PURGE_PREFETCHED_INS
-	blr
-#else
-	rlwinm	r3,r3,0,0,31-PAGE_SHIFT		/* Get page base address */
-	li	r4,PAGE_SIZE/L1_CACHE_BYTES	/* Number of lines in a page */
-	mtctr	r4
-	mr	r6,r3
-0:	dcbst	0,r3				/* Write line to ram */
-	addi	r3,r3,L1_CACHE_BYTES
-	bdnz	0b
-	sync
-#ifdef CONFIG_44x
-	/* We don't flush the icache on 44x. Those have a virtual icache
-	 * and we don't have access to the virtual address here (it's
-	 * not the page vaddr but where it's mapped in user space). The
-	 * flushing of the icache on these is handled elsewhere, when
-	 * a change in the address space occurs, before returning to
-	 * user space
-	 */
-BEGIN_MMU_FTR_SECTION
-	blr
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
-#endif /* CONFIG_44x */
-	mtctr	r4
-1:	icbi	0,r6
-	addi	r6,r6,L1_CACHE_BYTES
-	bdnz	1b
-	sync
-	isync
-	blr
-#endif
-
-#ifndef CONFIG_BOOKE
-/*
- * Flush a particular page from the data cache to RAM, identified
- * by its physical address.  We turn off the MMU so we can just use
- * the physical address (this may be a highmem page without a kernel
- * mapping).
- *
- *	void __flush_dcache_icache_phys(unsigned long physaddr)
- */
-_GLOBAL(__flush_dcache_icache_phys)
-#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
-	PURGE_PREFETCHED_INS
-	blr					/* for 601 and e200, do nothing */
-#else
-	mfmsr	r10
-	rlwinm	r0,r10,0,28,26			/* clear DR */
-	mtmsr	r0
-	isync
-	rlwinm	r3,r3,0,0,31-PAGE_SHIFT		/* Get page base address */
-	li	r4,PAGE_SIZE/L1_CACHE_BYTES	/* Number of lines in a page */
-	mtctr	r4
-	mr	r6,r3
-0:	dcbst	0,r3				/* Write line to ram */
-	addi	r3,r3,L1_CACHE_BYTES
-	bdnz	0b
-	sync
-	mtctr	r4
-1:	icbi	0,r6
-	addi	r6,r6,L1_CACHE_BYTES
-	bdnz	1b
-	sync
-	mtmsr	r10				/* restore DR */
-	isync
-	blr
-#endif
-#endif /* CONFIG_BOOKE */
-
-/*
  * Copy a whole page.  We use the dcbz instruction on the destination
  * to reduce memory traffic (it eliminates the unnecessary reads of
  * the destination into cache).  This requires that the destination
@@ -614,488 +488,3 @@ _GLOBAL(start_secondary_resume)
  */
 _GLOBAL(__main)
 	blr
-
-#ifdef CONFIG_KEXEC_CORE
-	/*
-	 * Must be relocatable PIC code callable as a C function.
-	 */
-	.globl relocate_new_kernel
-relocate_new_kernel:
-	/* r3 = page_list   */
-	/* r4 = reboot_code_buffer */
-	/* r5 = start_address      */
-
-#ifdef CONFIG_FSL_BOOKE
-
-	mr	r29, r3
-	mr	r30, r4
-	mr	r31, r5
-
-#define ENTRY_MAPPING_KEXEC_SETUP
-#include "fsl_booke_entry_mapping.S"
-#undef ENTRY_MAPPING_KEXEC_SETUP
-
-	mr      r3, r29
-	mr      r4, r30
-	mr      r5, r31
-
-	li	r0, 0
-#elif defined(CONFIG_44x)
-
-	/* Save our parameters */
-	mr	r29, r3
-	mr	r30, r4
-	mr	r31, r5
-
-#ifdef CONFIG_PPC_47x
-	/* Check for 47x cores */
-	mfspr	r3,SPRN_PVR
-	srwi	r3,r3,16
-	cmplwi	cr0,r3,PVR_476FPE@h
-	beq	setup_map_47x
-	cmplwi	cr0,r3,PVR_476@h
-	beq	setup_map_47x
-	cmplwi	cr0,r3,PVR_476_ISS@h
-	beq	setup_map_47x
-#endif /* CONFIG_PPC_47x */
-	
-/*
- * Code for setting up 1:1 mapping for PPC440x for KEXEC
- *
- * We cannot switch off the MMU on PPC44x.
- * So we:
- * 1) Invalidate all the mappings except the one we are running from.
- * 2) Create a tmp mapping for our code in the other address space(TS) and
- *    jump to it. Invalidate the entry we started in.
- * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
- * 4) Jump to the 1:1 mapping in original TS.
- * 5) Invalidate the tmp mapping.
- *
- * - Based on the kexec support code for FSL BookE
- *
- */
-
-	/* 
-	 * Load the PID with kernel PID (0).
-	 * Also load our MSR_IS and TID to MMUCR for TLB search.
-	 */
-	li	r3, 0
-	mtspr	SPRN_PID, r3
-	mfmsr	r4
-	andi.	r4,r4,MSR_IS@l
-	beq	wmmucr
-	oris	r3,r3,PPC44x_MMUCR_STS@h
-wmmucr:
-	mtspr	SPRN_MMUCR,r3
-	sync
-
-	/*
-	 * Invalidate all the TLB entries except the current entry
-	 * where we are running from
-	 */
-	bl	0f				/* Find our address */
-0:	mflr	r5				/* Make it accessible */
-	tlbsx	r23,0,r5			/* Find entry we are in */
-	li	r4,0				/* Start at TLB entry 0 */
-	li	r3,0				/* Set PAGEID inval value */
-1:	cmpw	r23,r4				/* Is this our entry? */
-	beq	skip				/* If so, skip the inval */
-	tlbwe	r3,r4,PPC44x_TLB_PAGEID		/* If not, inval the entry */
-skip:
-	addi	r4,r4,1				/* Increment */
-	cmpwi	r4,64				/* Are we done?	*/
-	bne	1b				/* If not, repeat */
-	isync
-
-	/* Create a temp mapping and jump to it */
-	andi.	r6, r23, 1		/* Find the index to use */
-	addi	r24, r6, 1		/* r24 will contain 1 or 2 */
-
-	mfmsr	r9			/* get the MSR */
-	rlwinm	r5, r9, 27, 31, 31	/* Extract the MSR[IS] */
-	xori	r7, r5, 1		/* Use the other address space */
-
-	/* Read the current mapping entries */
-	tlbre	r3, r23, PPC44x_TLB_PAGEID
-	tlbre	r4, r23, PPC44x_TLB_XLAT
-	tlbre	r5, r23, PPC44x_TLB_ATTRIB
-
-	/* Save our current XLAT entry */
-	mr	r25, r4
-
-	/* Extract the TLB PageSize */
-	li	r10, 1 			/* r10 will hold PageSize */
-	rlwinm	r11, r3, 0, 24, 27	/* bits 24-27 */
-
-	/* XXX: As of now we use 256M, 4K pages */
-	cmpwi	r11, PPC44x_TLB_256M
-	bne	tlb_4k
-	rotlwi	r10, r10, 28		/* r10 = 256M */
-	b	write_out
-tlb_4k:
-	cmpwi	r11, PPC44x_TLB_4K
-	bne	default
-	rotlwi	r10, r10, 12		/* r10 = 4K */
-	b	write_out
-default:
-	rotlwi	r10, r10, 10		/* r10 = 1K */
-
-write_out:
-	/*
-	 * Write out the tmp 1:1 mapping for this code in other address space
-	 * Fixup  EPN = RPN , TS=other address space
-	 */
-	insrwi	r3, r7, 1, 23		/* Bit 23 is TS for PAGEID field */
-
-	/* Write out the tmp mapping entries */
-	tlbwe	r3, r24, PPC44x_TLB_PAGEID
-	tlbwe	r4, r24, PPC44x_TLB_XLAT
-	tlbwe	r5, r24, PPC44x_TLB_ATTRIB
-
-	subi	r11, r10, 1		/* PageOffset Mask = PageSize - 1 */
-	not	r10, r11		/* Mask for PageNum */
-
-	/* Switch to other address space in MSR */
-	insrwi	r9, r7, 1, 26		/* Set MSR[IS] = r7 */
-
-	bl	1f
-1:	mflr	r8
-	addi	r8, r8, (2f-1b)		/* Find the target offset */
-
-	/* Jump to the tmp mapping */
-	mtspr	SPRN_SRR0, r8
-	mtspr	SPRN_SRR1, r9
-	rfi
-
-2:
-	/* Invalidate the entry we were executing from */
-	li	r3, 0
-	tlbwe	r3, r23, PPC44x_TLB_PAGEID
-
-	/* attribute fields. rwx for SUPERVISOR mode */
-	li	r5, 0
-	ori	r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
-
-	/* Create 1:1 mapping in 256M pages */
-	xori	r7, r7, 1			/* Revert back to Original TS */
-
-	li	r8, 0				/* PageNumber */
-	li	r6, 3				/* TLB Index, start at 3  */
-
-next_tlb:
-	rotlwi	r3, r8, 28			/* Create EPN (bits 0-3) */
-	mr	r4, r3				/* RPN = EPN  */
-	ori	r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
-	insrwi	r3, r7, 1, 23			/* Set TS from r7 */
-
-	tlbwe	r3, r6, PPC44x_TLB_PAGEID	/* PageID field : EPN, V, SIZE */
-	tlbwe	r4, r6, PPC44x_TLB_XLAT		/* Address translation : RPN   */
-	tlbwe	r5, r6, PPC44x_TLB_ATTRIB	/* Attributes */
-
-	addi	r8, r8, 1			/* Increment PN */
-	addi	r6, r6, 1			/* Increment TLB Index */
-	cmpwi	r8, 8				/* Are we done ? */
-	bne	next_tlb
-	isync
-
-	/* Jump to the new mapping 1:1 */
-	li	r9,0
-	insrwi	r9, r7, 1, 26			/* Set MSR[IS] = r7 */
-
-	bl	1f
-1:	mflr	r8
-	and	r8, r8, r11			/* Get our offset within page */
-	addi	r8, r8, (2f-1b)
-
-	and	r5, r25, r10			/* Get our target PageNum */
-	or	r8, r8, r5			/* Target jump address */
-
-	mtspr	SPRN_SRR0, r8
-	mtspr	SPRN_SRR1, r9
-	rfi
-2:
-	/* Invalidate the tmp entry we used */
-	li	r3, 0
-	tlbwe	r3, r24, PPC44x_TLB_PAGEID
-	sync
-	b	ppc44x_map_done
-
-#ifdef CONFIG_PPC_47x
-
-	/* 1:1 mapping for 47x */
-
-setup_map_47x:
-
-	/*
-	 * Load the kernel pid (0) to PID and also to MMUCR[TID].
-	 * Also set the MSR IS->MMUCR STS
-	 */
-	li	r3, 0
-	mtspr	SPRN_PID, r3			/* Set PID */
-	mfmsr	r4				/* Get MSR */
-	andi.	r4, r4, MSR_IS@l		/* TS=1? */
-	beq	1f				/* If not, leave STS=0 */
-	oris	r3, r3, PPC47x_MMUCR_STS@h	/* Set STS=1 */
-1:	mtspr	SPRN_MMUCR, r3			/* Put MMUCR */
-	sync
-
-	/* Find the entry we are running from */
-	bl	2f
-2:	mflr	r23
-	tlbsx	r23, 0, r23
-	tlbre	r24, r23, 0			/* TLB Word 0 */
-	tlbre	r25, r23, 1			/* TLB Word 1 */
-	tlbre	r26, r23, 2			/* TLB Word 2 */
-
-
-	/*
-	 * Invalidates all the tlb entries by writing to 256 RPNs(r4)
-	 * of 4k page size in all  4 ways (0-3 in r3).
-	 * This would invalidate the entire UTLB including the one we are
-	 * running from. However the shadow TLB entries would help us 
-	 * to continue the execution, until we flush them (rfi/isync).
-	 */
-	addis	r3, 0, 0x8000			/* specify the way */
-	addi	r4, 0, 0			/* TLB Word0 = (EPN=0, VALID = 0) */
-	addi	r5, 0, 0
-	b	clear_utlb_entry
-
-	/* Align the loop to speed things up. from head_44x.S */
-	.align	6
-
-clear_utlb_entry:
-
-	tlbwe	r4, r3, 0
-	tlbwe	r5, r3, 1
-	tlbwe	r5, r3, 2
-	addis	r3, r3, 0x2000			/* Increment the way */
-	cmpwi	r3, 0
-	bne	clear_utlb_entry
-	addis	r3, 0, 0x8000
-	addis	r4, r4, 0x100			/* Increment the EPN */
-	cmpwi	r4, 0
-	bne	clear_utlb_entry
-
-	/* Create the entries in the other address space */
-	mfmsr	r5
-	rlwinm	r7, r5, 27, 31, 31		/* Get the TS (Bit 26) from MSR */
-	xori	r7, r7, 1			/* r7 = !TS */
-
-	insrwi	r24, r7, 1, 21			/* Change the TS in the saved TLB word 0 */
-
-	/* 
-	 * write out the TLB entries for the tmp mapping
-	 * Use way '0' so that we could easily invalidate it later.
-	 */
-	lis	r3, 0x8000			/* Way '0' */ 
-
-	tlbwe	r24, r3, 0
-	tlbwe	r25, r3, 1
-	tlbwe	r26, r3, 2
-
-	/* Update the msr to the new TS */
-	insrwi	r5, r7, 1, 26
-
-	bl	1f
-1:	mflr	r6
-	addi	r6, r6, (2f-1b)
-
-	mtspr	SPRN_SRR0, r6
-	mtspr	SPRN_SRR1, r5
-	rfi
-
-	/* 
-	 * Now we are in the tmp address space.
-	 * Create a 1:1 mapping for 0-2GiB in the original TS.
-	 */
-2:
-	li	r3, 0
-	li	r4, 0				/* TLB Word 0 */
-	li	r5, 0				/* TLB Word 1 */
-	li	r6, 0
-	ori	r6, r6, PPC47x_TLB2_S_RWX	/* TLB word 2 */
-
-	li	r8, 0				/* PageIndex */
-
-	xori	r7, r7, 1			/* revert back to original TS */
-
-write_utlb:
-	rotlwi	r5, r8, 28			/* RPN = PageIndex * 256M */
-						/* ERPN = 0 as we don't use memory above 2G */
-
-	mr	r4, r5				/* EPN = RPN */
-	ori	r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
-	insrwi	r4, r7, 1, 21			/* Insert the TS to Word 0 */
-
-	tlbwe	r4, r3, 0			/* Write out the entries */
-	tlbwe	r5, r3, 1
-	tlbwe	r6, r3, 2
-	addi	r8, r8, 1
-	cmpwi	r8, 8				/* Have we completed ? */
-	bne	write_utlb
-
-	/* make sure we complete the TLB write up */
-	isync
-
-	/* 
-	 * Prepare to jump to the 1:1 mapping.
-	 * 1) Extract page size of the tmp mapping
-	 *    DSIZ = TLB_Word0[22:27]
-	 * 2) Calculate the physical address of the address
-	 *    to jump to.
-	 */
-	rlwinm	r10, r24, 0, 22, 27
-
-	cmpwi	r10, PPC47x_TLB0_4K
-	bne	0f
-	li	r10, 0x1000			/* r10 = 4k */
-	bl	1f
-
-0:
-	/* Defaults to 256M */
-	lis	r10, 0x1000
-	
-	bl	1f
-1:	mflr	r4
-	addi	r4, r4, (2f-1b)			/* virtual address  of 2f */
-
-	subi	r11, r10, 1			/* offsetmask = Pagesize - 1 */
-	not	r10, r11			/* Pagemask = ~(offsetmask) */
-
-	and	r5, r25, r10			/* Physical page */
-	and	r6, r4, r11			/* offset within the current page */
-
-	or	r5, r5, r6			/* Physical address for 2f */
-
-	/* Switch the TS in MSR to the original one */
-	mfmsr	r8
-	insrwi	r8, r7, 1, 26
-
-	mtspr	SPRN_SRR1, r8
-	mtspr	SPRN_SRR0, r5
-	rfi
-
-2:
-	/* Invalidate the tmp mapping */
-	lis	r3, 0x8000			/* Way '0' */
-
-	clrrwi	r24, r24, 12			/* Clear the valid bit */
-	tlbwe	r24, r3, 0
-	tlbwe	r25, r3, 1
-	tlbwe	r26, r3, 2
-
-	/* Make sure we complete the TLB write and flush the shadow TLB */
-	isync
-
-#endif
-
-ppc44x_map_done:
-
-
-	/* Restore the parameters */
-	mr	r3, r29
-	mr	r4, r30
-	mr	r5, r31
-
-	li	r0, 0
-#else
-	li	r0, 0
-
-	/*
-	 * Set Machine Status Register to a known status,
-	 * switch the MMU off and jump to 1: in a single step.
-	 */
-
-	mr	r8, r0
-	ori     r8, r8, MSR_RI|MSR_ME
-	mtspr	SPRN_SRR1, r8
-	addi	r8, r4, 1f - relocate_new_kernel
-	mtspr	SPRN_SRR0, r8
-	sync
-	rfi
-
-1:
-#endif
-	/* from this point address translation is turned off */
-	/* and interrupts are disabled */
-
-	/* set a new stack at the bottom of our page... */
-	/* (not really needed now) */
-	addi	r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
-	stw	r0, 0(r1)
-
-	/* Do the copies */
-	li	r6, 0 /* checksum */
-	mr	r0, r3
-	b	1f
-
-0:	/* top, read another word for the indirection page */
-	lwzu	r0, 4(r3)
-
-1:
-	/* is it a destination page? (r8) */
-	rlwinm.	r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
-	beq	2f
-
-	rlwinm	r8, r0, 0, 0, 19 /* clear kexec flags, page align */
-	b	0b
-
-2:	/* is it an indirection page? (r3) */
-	rlwinm.	r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
-	beq	2f
-
-	rlwinm	r3, r0, 0, 0, 19 /* clear kexec flags, page align */
-	subi	r3, r3, 4
-	b	0b
-
-2:	/* are we done? */
-	rlwinm.	r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
-	beq	2f
-	b	3f
-
-2:	/* is it a source page? (r9) */
-	rlwinm.	r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
-	beq	0b
-
-	rlwinm	r9, r0, 0, 0, 19 /* clear kexec flags, page align */
-
-	li	r7, PAGE_SIZE / 4
-	mtctr   r7
-	subi    r9, r9, 4
-	subi    r8, r8, 4
-9:
-	lwzu    r0, 4(r9)  /* do the copy */
-	xor	r6, r6, r0
-	stwu    r0, 4(r8)
-	dcbst	0, r8
-	sync
-	icbi	0, r8
-	bdnz    9b
-
-	addi    r9, r9, 4
-	addi    r8, r8, 4
-	b	0b
-
-3:
-
-	/* To be certain of avoiding problems with self-modifying code
-	 * execute a serializing instruction here.
-	 */
-	isync
-	sync
-
-	mfspr	r3, SPRN_PIR /* current core we are running on */
-	mr	r4, r5 /* load physical address of chunk called */
-
-	/* jump to the entry point, usually the setup routine */
-	mtlr	r5
-	blrl
-
-1:	b	1b
-
-relocate_new_kernel_end:
-
-	.globl relocate_new_kernel_size
-relocate_new_kernel_size:
-	.long relocate_new_kernel_end - relocate_new_kernel
-#endif
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b55a7b4cb543..1864605eca29 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -49,108 +49,6 @@ _GLOBAL(call_do_irq)
 	mtlr	r0
 	blr
 
-	.section	".toc","aw"
-PPC64_CACHES:
-	.tc		ppc64_caches[TC],ppc64_caches
-	.section	".text"
-
-/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- *
- *   flush all bytes from start through stop-1 inclusive
- */
-
-_GLOBAL_TOC(flush_icache_range)
-BEGIN_FTR_SECTION
-	PURGE_PREFETCHED_INS
-	blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- * and in some cases i-cache and d-cache line sizes differ from
- * each other.
- */
- 	ld	r10,PPC64_CACHES@toc(r2)
-	lwz	r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,DCACHEL1LOGBLOCKSIZE(r10)	/* Get log-2 of cache block size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	mtctr	r8
-1:	dcbst	0,r6
-	add	r6,r6,r7
-	bdnz	1b
-	sync
-
-/* Now invalidate the instruction cache */
-	
-	lwz	r7,ICACHEL1BLOCKSIZE(r10)	/* Get Icache block size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5
-	lwz	r9,ICACHEL1LOGBLOCKSIZE(r10)	/* Get log-2 of Icache block size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	mtctr	r8
-2:	icbi	0,r6
-	add	r6,r6,r7
-	bdnz	2b
-	isync
-	blr
-_ASM_NOKPROBE_SYMBOL(flush_icache_range)
-EXPORT_SYMBOL(flush_icache_range)
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- *
- *	void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- */
-
-BEGIN_FTR_SECTION
-	PURGE_PREFETCHED_INS
-	blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-
-/* Flush the dcache */
- 	ld	r7,PPC64_CACHES@toc(r2)
-	clrrdi	r3,r3,PAGE_SHIFT           	    /* Page align */
-	lwz	r4,DCACHEL1BLOCKSPERPAGE(r7)	/* Get # dcache blocks per page */
-	lwz	r5,DCACHEL1BLOCKSIZE(r7)	/* Get dcache block size */
-	mr	r6,r3
-	mtctr	r4
-0:	dcbst	0,r6
-	add	r6,r6,r5
-	bdnz	0b
-	sync
-
-/* Now invalidate the icache */	
-
-	lwz	r4,ICACHEL1BLOCKSPERPAGE(r7)	/* Get # icache blocks per page */
-	lwz	r5,ICACHEL1BLOCKSIZE(r7)	/* Get icache block size */
-	mtctr	r4
-1:	icbi	0,r3
-	add	r3,r3,r5
-	bdnz	1b
-	isync
-	blr
-
 _GLOBAL(__bswapdi2)
 EXPORT_SYMBOL(__bswapdi2)
 	srdi	r8,r3,32
@@ -432,18 +330,13 @@ kexec_create_tlb:
 	rlwimi	r9,r10,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r9) */
 
 /* Set up a temp identity mapping v:0 to p:0 and return to it. */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED	MAS2_M
-#else
-#define M_IF_NEEDED	0
-#endif
 	mtspr	SPRN_MAS0,r9
 
 	lis	r9,(MAS1_VALID|MAS1_IPROT)@h
 	ori	r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
 	mtspr	SPRN_MAS1,r9
 
-	LOAD_REG_IMMEDIATE(r9, 0x0 | M_IF_NEEDED)
+	LOAD_REG_IMMEDIATE(r9, 0x0 | MAS2_M_IF_NEEDED)
 	mtspr	SPRN_MAS2,r9
 
 	LOAD_REG_IMMEDIATE(r9, 0x0 | MAS3_SR | MAS3_SW | MAS3_SX)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 639ceae7da9d..4df94b6e2f32 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -715,6 +715,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
 {
 	thread->hw_brk.address = 0;
 	thread->hw_brk.type = 0;
+	thread->hw_brk.len = 0;
+	thread->hw_brk.hw_len = 0;
 	if (ppc_breakpoint_available())
 		set_breakpoint(&thread->hw_brk);
 }
@@ -816,6 +818,7 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
 		return false;
 	if (a->len != b->len)
 		return false;
+	/* no need to check hw_len. it's calculated from address and len */
 	return true;
 }
 
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 100f1b57ec2f..577345382b23 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -303,16 +303,24 @@ static char __init *prom_strstr(const char *s1, const char *s2)
 	return NULL;
 }
 
-static size_t __init prom_strlcpy(char *dest, const char *src, size_t size)
-{
-	size_t ret = prom_strlen(src);
+static size_t __init prom_strlcat(char *dest, const char *src, size_t count)
+{
+	size_t dsize = prom_strlen(dest);
+	size_t len = prom_strlen(src);
+	size_t res = dsize + len;
+
+	/* This would be a bug */
+	if (dsize >= count)
+		return count;
+
+	dest += dsize;
+	count -= dsize;
+	if (len >= count)
+		len = count-1;
+	memcpy(dest, src, len);
+	dest[len] = 0;
+	return res;
 
-	if (size) {
-		size_t len = (ret >= size) ? size - 1 : ret;
-		memcpy(dest, src, len);
-		dest[len] = '\0';
-	}
-	return ret;
 }
 
 #ifdef CONFIG_PPC_PSERIES
@@ -764,10 +772,14 @@ static void __init early_cmdline_parse(void)
 
 	prom_cmd_line[0] = 0;
 	p = prom_cmd_line;
-	if ((long)prom.chosen > 0)
+
+	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && (long)prom.chosen > 0)
 		l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
-	if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */
-		prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line));
+
+	if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || l <= 0 || p[0] == '\0')
+		prom_strlcat(prom_cmd_line, " " CONFIG_CMDLINE,
+			     sizeof(prom_cmd_line));
+
 	prom_printf("command line: %s\n", prom_cmd_line);
 
 #ifdef CONFIG_PPC64
@@ -1053,7 +1065,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
 		.reserved2 = 0,
 		.reserved3 = 0,
 		.subprocessors = 1,
-		.byte22 = OV5_FEAT(OV5_DRMEM_V2),
+		.byte22 = OV5_FEAT(OV5_DRMEM_V2) | OV5_FEAT(OV5_DRC_INFO),
 		.intarch = 0,
 		.mmu = 0,
 		.hash_ext = 0,
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 8c92febf5f44..25c0424e8868 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -2425,7 +2425,8 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 		return -EIO;
 	hw_brk.address = data & (~HW_BRK_TYPE_DABR);
 	hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
-	hw_brk.len = 8;
+	hw_brk.len = DABR_MAX_LEN;
+	hw_brk.hw_len = DABR_MAX_LEN;
 	set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	bp = thread->ptrace_bps[0];
@@ -2439,6 +2440,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	if (bp) {
 		attr = bp->attr;
 		attr.bp_addr = hw_brk.address;
+		attr.bp_len = DABR_MAX_LEN;
 		arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
 
 		/* Enable breakpoint */
@@ -2456,7 +2458,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	/* Create a new breakpoint request if one doesn't exist already */
 	hw_breakpoint_init(&attr);
 	attr.bp_addr = hw_brk.address;
-	attr.bp_len = 8;
+	attr.bp_len = DABR_MAX_LEN;
 	arch_bp_generic_fields(hw_brk.type,
 			       &attr.bp_type);
 
@@ -2880,18 +2882,14 @@ static long ppc_set_hwdebug(struct task_struct *child,
 	if ((unsigned long)bp_info->addr >= TASK_SIZE)
 		return -EIO;
 
-	brk.address = bp_info->addr & ~7UL;
+	brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN;
 	brk.type = HW_BRK_TYPE_TRANSLATE;
-	brk.len = 8;
+	brk.len = DABR_MAX_LEN;
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
 		brk.type |= HW_BRK_TYPE_READ;
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
 		brk.type |= HW_BRK_TYPE_WRITE;
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-	/*
-	 * Check if the request is for 'range' breakpoints. We can
-	 * support it if range < 8 bytes.
-	 */
 	if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
 		len = bp_info->addr2 - bp_info->addr;
 	else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
@@ -2904,7 +2902,7 @@ static long ppc_set_hwdebug(struct task_struct *child,
 
 	/* Create a new breakpoint request if one doesn't exist already */
 	hw_breakpoint_init(&attr);
-	attr.bp_addr = (unsigned long)bp_info->addr & ~HW_BREAKPOINT_ALIGN;
+	attr.bp_addr = (unsigned long)bp_info->addr;
 	attr.bp_len = len;
 	arch_bp_generic_fields(brk.type, &attr.bp_type);
 
@@ -3361,6 +3359,12 @@ void do_syscall_trace_leave(struct pt_regs *regs)
 	user_enter();
 }
 
+void __init pt_regs_check(void);
+
+/*
+ * Dummy function, its purpose is to break the build if struct pt_regs and
+ * struct user_pt_regs don't match.
+ */
 void __init pt_regs_check(void)
 {
 	BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
@@ -3398,4 +3402,67 @@ void __init pt_regs_check(void)
 		     offsetof(struct user_pt_regs, result));
 
 	BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+
+	// Now check that the pt_regs offsets match the uapi #defines
+	#define CHECK_REG(_pt, _reg) \
+		BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
+				     sizeof(unsigned long)));
+
+	CHECK_REG(PT_R0,  gpr[0]);
+	CHECK_REG(PT_R1,  gpr[1]);
+	CHECK_REG(PT_R2,  gpr[2]);
+	CHECK_REG(PT_R3,  gpr[3]);
+	CHECK_REG(PT_R4,  gpr[4]);
+	CHECK_REG(PT_R5,  gpr[5]);
+	CHECK_REG(PT_R6,  gpr[6]);
+	CHECK_REG(PT_R7,  gpr[7]);
+	CHECK_REG(PT_R8,  gpr[8]);
+	CHECK_REG(PT_R9,  gpr[9]);
+	CHECK_REG(PT_R10, gpr[10]);
+	CHECK_REG(PT_R11, gpr[11]);
+	CHECK_REG(PT_R12, gpr[12]);
+	CHECK_REG(PT_R13, gpr[13]);
+	CHECK_REG(PT_R14, gpr[14]);
+	CHECK_REG(PT_R15, gpr[15]);
+	CHECK_REG(PT_R16, gpr[16]);
+	CHECK_REG(PT_R17, gpr[17]);
+	CHECK_REG(PT_R18, gpr[18]);
+	CHECK_REG(PT_R19, gpr[19]);
+	CHECK_REG(PT_R20, gpr[20]);
+	CHECK_REG(PT_R21, gpr[21]);
+	CHECK_REG(PT_R22, gpr[22]);
+	CHECK_REG(PT_R23, gpr[23]);
+	CHECK_REG(PT_R24, gpr[24]);
+	CHECK_REG(PT_R25, gpr[25]);
+	CHECK_REG(PT_R26, gpr[26]);
+	CHECK_REG(PT_R27, gpr[27]);
+	CHECK_REG(PT_R28, gpr[28]);
+	CHECK_REG(PT_R29, gpr[29]);
+	CHECK_REG(PT_R30, gpr[30]);
+	CHECK_REG(PT_R31, gpr[31]);
+	CHECK_REG(PT_NIP, nip);
+	CHECK_REG(PT_MSR, msr);
+	CHECK_REG(PT_ORIG_R3, orig_gpr3);
+	CHECK_REG(PT_CTR, ctr);
+	CHECK_REG(PT_LNK, link);
+	CHECK_REG(PT_XER, xer);
+	CHECK_REG(PT_CCR, ccr);
+#ifdef CONFIG_PPC64
+	CHECK_REG(PT_SOFTE, softe);
+#else
+	CHECK_REG(PT_MQ, mq);
+#endif
+	CHECK_REG(PT_TRAP, trap);
+	CHECK_REG(PT_DAR, dar);
+	CHECK_REG(PT_DSISR, dsisr);
+	CHECK_REG(PT_RESULT, result);
+	#undef CHECK_REG
+
+	BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+	/*
+	 * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
+	 * real registers.
+	 */
+	BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
 }
diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c
new file mode 100644
index 000000000000..4b982324d368
--- /dev/null
+++ b/arch/powerpc/kernel/secure_boot.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+#include <linux/types.h>
+#include <linux/of.h>
+#include <asm/secure_boot.h>
+
+static struct device_node *get_ppc_fw_sb_node(void)
+{
+	static const struct of_device_id ids[] = {
+		{ .compatible = "ibm,secureboot", },
+		{ .compatible = "ibm,secureboot-v1", },
+		{ .compatible = "ibm,secureboot-v2", },
+		{},
+	};
+
+	return of_find_matching_node(NULL, ids);
+}
+
+bool is_ppc_secureboot_enabled(void)
+{
+	struct device_node *node;
+	bool enabled = false;
+
+	node = get_ppc_fw_sb_node();
+	enabled = of_property_read_bool(node, "os-secureboot-enforcing");
+
+	of_node_put(node);
+
+	pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled");
+
+	return enabled;
+}
+
+bool is_ppc_trustedboot_enabled(void)
+{
+	struct device_node *node;
+	bool enabled = false;
+
+	node = get_ppc_fw_sb_node();
+	enabled = of_property_read_bool(node, "trusted-enabled");
+
+	of_node_put(node);
+
+	pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled");
+
+	return enabled;
+}
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 7cfcb294b11c..bd70f5be1c27 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -16,7 +16,7 @@
 #include <asm/setup.h>
 
 
-unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
+u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
 
 enum count_cache_flush_type {
 	COUNT_CACHE_FLUSH_NONE	= 0x1,
@@ -24,6 +24,7 @@ enum count_cache_flush_type {
 	COUNT_CACHE_FLUSH_HW	= 0x4,
 };
 static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
+static bool link_stack_flush_enabled;
 
 bool barrier_nospec_enabled;
 static bool no_nospec;
@@ -94,13 +95,14 @@ static int barrier_nospec_get(void *data, u64 *val)
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(fops_barrier_nospec,
-			barrier_nospec_get, barrier_nospec_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get,
+			 barrier_nospec_set, "%llu\n");
 
 static __init int barrier_nospec_debugfs_init(void)
 {
-	debugfs_create_file("barrier_nospec", 0600, powerpc_debugfs_root, NULL,
-			    &fops_barrier_nospec);
+	debugfs_create_file_unsafe("barrier_nospec", 0600,
+				   powerpc_debugfs_root, NULL,
+				   &fops_barrier_nospec);
 	return 0;
 }
 device_initcall(barrier_nospec_debugfs_init);
@@ -108,7 +110,7 @@ device_initcall(barrier_nospec_debugfs_init);
 static __init int security_feature_debugfs_init(void)
 {
 	debugfs_create_x64("security_features", 0400, powerpc_debugfs_root,
-			   (u64 *)&powerpc_security_features);
+			   &powerpc_security_features);
 	return 0;
 }
 device_initcall(security_feature_debugfs_init);
@@ -141,32 +143,33 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha
 
 	thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
 
-	if (rfi_flush || thread_priv) {
+	if (rfi_flush) {
 		struct seq_buf s;
 		seq_buf_init(&s, buf, PAGE_SIZE - 1);
 
-		seq_buf_printf(&s, "Mitigation: ");
-
-		if (rfi_flush)
-			seq_buf_printf(&s, "RFI Flush");
-
-		if (rfi_flush && thread_priv)
-			seq_buf_printf(&s, ", ");
-
+		seq_buf_printf(&s, "Mitigation: RFI Flush");
 		if (thread_priv)
-			seq_buf_printf(&s, "L1D private per thread");
+			seq_buf_printf(&s, ", L1D private per thread");
 
 		seq_buf_printf(&s, "\n");
 
 		return s.len;
 	}
 
+	if (thread_priv)
+		return sprintf(buf, "Vulnerable: L1D private per thread\n");
+
 	if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
 	    !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
 		return sprintf(buf, "Not affected\n");
 
 	return sprintf(buf, "Vulnerable\n");
 }
+
+ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_meltdown(dev, attr, buf);
+}
 #endif
 
 ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
@@ -212,11 +215,19 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
 
 		if (ccd)
 			seq_buf_printf(&s, "Indirect branch cache disabled");
+
+		if (link_stack_flush_enabled)
+			seq_buf_printf(&s, ", Software link stack flush");
+
 	} else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
 		seq_buf_printf(&s, "Mitigation: Software count cache flush");
 
 		if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW)
 			seq_buf_printf(&s, " (hardware accelerated)");
+
+		if (link_stack_flush_enabled)
+			seq_buf_printf(&s, ", Software link stack flush");
+
 	} else if (btb_flush_enabled) {
 		seq_buf_printf(&s, "Mitigation: Branch predictor state flush");
 	} else {
@@ -367,28 +378,61 @@ static int stf_barrier_get(void *data, u64 *val)
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set,
+			 "%llu\n");
 
 static __init int stf_barrier_debugfs_init(void)
 {
-	debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier);
+	debugfs_create_file_unsafe("stf_barrier", 0600, powerpc_debugfs_root,
+				   NULL, &fops_stf_barrier);
 	return 0;
 }
 device_initcall(stf_barrier_debugfs_init);
 #endif /* CONFIG_DEBUG_FS */
 
+static void no_count_cache_flush(void)
+{
+	count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
+	pr_info("count-cache-flush: software flush disabled.\n");
+}
+
 static void toggle_count_cache_flush(bool enable)
 {
-	if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+	if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) &&
+	    !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK))
+		enable = false;
+
+	if (!enable) {
 		patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP);
-		count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
-		pr_info("count-cache-flush: software flush disabled.\n");
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+		patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP);
+#endif
+		pr_info("link-stack-flush: software flush disabled.\n");
+		link_stack_flush_enabled = false;
+		no_count_cache_flush();
 		return;
 	}
 
+	// This enables the branch from _switch to flush_count_cache
 	patch_branch_site(&patch__call_flush_count_cache,
 			  (u64)&flush_count_cache, BRANCH_SET_LINK);
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	// This enables the branch from guest_exit_cont to kvm_flush_link_stack
+	patch_branch_site(&patch__call_kvm_flush_link_stack,
+			  (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+#endif
+
+	pr_info("link-stack-flush: software flush enabled.\n");
+	link_stack_flush_enabled = true;
+
+	// If we just need to flush the link stack, patch an early return
+	if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+		patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR);
+		no_count_cache_flush();
+		return;
+	}
+
 	if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
 		count_cache_flush_type = COUNT_CACHE_FLUSH_SW;
 		pr_info("count-cache-flush: full software flush sequence enabled.\n");
@@ -407,11 +451,20 @@ void setup_count_cache_flush(void)
 	if (no_spectrev2 || cpu_mitigations_off()) {
 		if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) ||
 		    security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
-			pr_warn("Spectre v2 mitigations not under software control, can't disable\n");
+			pr_warn("Spectre v2 mitigations not fully under software control, can't disable\n");
 
 		enable = false;
 	}
 
+	/*
+	 * There's no firmware feature flag/hypervisor bit to tell us we need to
+	 * flush the link stack on context switch. So we set it here if we see
+	 * either of the Spectre v2 mitigations that aim to protect userspace.
+	 */
+	if (security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED) ||
+	    security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE))
+		security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
 	toggle_count_cache_flush(enable);
 }
 
@@ -442,13 +495,14 @@ static int count_cache_flush_get(void *data, u64 *val)
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
-			count_cache_flush_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
+			 count_cache_flush_set, "%llu\n");
 
 static __init int count_cache_flush_debugfs_init(void)
 {
-	debugfs_create_file("count_cache_flush", 0600, powerpc_debugfs_root,
-			    NULL, &fops_count_cache_flush);
+	debugfs_create_file_unsafe("count_cache_flush", 0600,
+				   powerpc_debugfs_root, NULL,
+				   &fops_count_cache_flush);
 	return 0;
 }
 device_initcall(count_cache_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c
new file mode 100644
index 000000000000..6a29777d6a2d
--- /dev/null
+++ b/arch/powerpc/kernel/secvar-ops.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ * This file initializes secvar operations for PowerPC Secureboot
+ */
+
+#include <linux/cache.h>
+#include <asm/secvar.h>
+
+const struct secvar_operations *secvar_ops __ro_after_init;
+
+void set_secvar_ops(const struct secvar_operations *ops)
+{
+	secvar_ops = ops;
+}
diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c
new file mode 100644
index 000000000000..a0a78aba2083
--- /dev/null
+++ b/arch/powerpc/kernel/secvar-sysfs.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 IBM Corporation <nayna@linux.ibm.com>
+ *
+ * This code exposes secure variables to user via sysfs
+ */
+
+#define pr_fmt(fmt) "secvar-sysfs: "fmt
+
+#include <linux/slab.h>
+#include <linux/compat.h>
+#include <linux/string.h>
+#include <linux/of.h>
+#include <asm/secvar.h>
+
+#define NAME_MAX_SIZE	   1024
+
+static struct kobject *secvar_kobj;
+static struct kset *secvar_kset;
+
+static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr,
+			   char *buf)
+{
+	ssize_t rc = 0;
+	struct device_node *node;
+	const char *format;
+
+	node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+	if (!of_device_is_available(node))
+		return -ENODEV;
+
+	rc = of_property_read_string(node, "format", &format);
+	if (rc)
+		return rc;
+
+	rc = sprintf(buf, "%s\n", format);
+
+	of_node_put(node);
+
+	return rc;
+}
+
+
+static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr,
+			 char *buf)
+{
+	uint64_t dsize;
+	int rc;
+
+	rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+	if (rc) {
+		pr_err("Error retrieving %s variable size %d\n", kobj->name,
+		       rc);
+		return rc;
+	}
+
+	return sprintf(buf, "%llu\n", dsize);
+}
+
+static ssize_t data_read(struct file *filep, struct kobject *kobj,
+			 struct bin_attribute *attr, char *buf, loff_t off,
+			 size_t count)
+{
+	uint64_t dsize;
+	char *data;
+	int rc;
+
+	rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+	if (rc) {
+		pr_err("Error getting %s variable size %d\n", kobj->name, rc);
+		return rc;
+	}
+	pr_debug("dsize is %llu\n", dsize);
+
+	data = kzalloc(dsize, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, data, &dsize);
+	if (rc) {
+		pr_err("Error getting %s variable %d\n", kobj->name, rc);
+		goto data_fail;
+	}
+
+	rc = memory_read_from_buffer(buf, count, &off, data, dsize);
+
+data_fail:
+	kfree(data);
+	return rc;
+}
+
+static ssize_t update_write(struct file *filep, struct kobject *kobj,
+			    struct bin_attribute *attr, char *buf, loff_t off,
+			    size_t count)
+{
+	int rc;
+
+	pr_debug("count is %ld\n", count);
+	rc = secvar_ops->set(kobj->name, strlen(kobj->name) + 1, buf, count);
+	if (rc) {
+		pr_err("Error setting the %s variable %d\n", kobj->name, rc);
+		return rc;
+	}
+
+	return count;
+}
+
+static struct kobj_attribute format_attr = __ATTR_RO(format);
+
+static struct kobj_attribute size_attr = __ATTR_RO(size);
+
+static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0);
+
+static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0);
+
+static struct bin_attribute *secvar_bin_attrs[] = {
+	&data_attr,
+	&update_attr,
+	NULL,
+};
+
+static struct attribute *secvar_attrs[] = {
+	&size_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group secvar_attr_group = {
+	.attrs = secvar_attrs,
+	.bin_attrs = secvar_bin_attrs,
+};
+__ATTRIBUTE_GROUPS(secvar_attr);
+
+static struct kobj_type secvar_ktype = {
+	.sysfs_ops	= &kobj_sysfs_ops,
+	.default_groups = secvar_attr_groups,
+};
+
+static int update_kobj_size(void)
+{
+
+	struct device_node *node;
+	u64 varsize;
+	int rc = 0;
+
+	node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+	if (!of_device_is_available(node)) {
+		rc = -ENODEV;
+		goto out;
+	}
+
+	rc = of_property_read_u64(node, "max-var-size", &varsize);
+	if (rc)
+		goto out;
+
+	data_attr.size = varsize;
+	update_attr.size = varsize;
+
+out:
+	of_node_put(node);
+
+	return rc;
+}
+
+static int secvar_sysfs_load(void)
+{
+	char *name;
+	uint64_t namesize = 0;
+	struct kobject *kobj;
+	int rc;
+
+	name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL);
+	if (!name)
+		return -ENOMEM;
+
+	do {
+		rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE);
+		if (rc) {
+			if (rc != -ENOENT)
+				pr_err("error getting secvar from firmware %d\n",
+				       rc);
+			break;
+		}
+
+		kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+		if (!kobj) {
+			rc = -ENOMEM;
+			break;
+		}
+
+		kobject_init(kobj, &secvar_ktype);
+
+		rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name);
+		if (rc) {
+			pr_warn("kobject_add error %d for attribute: %s\n", rc,
+				name);
+			kobject_put(kobj);
+			kobj = NULL;
+		}
+
+		if (kobj)
+			kobject_uevent(kobj, KOBJ_ADD);
+
+	} while (!rc);
+
+	kfree(name);
+	return rc;
+}
+
+static int secvar_sysfs_init(void)
+{
+	int rc;
+
+	if (!secvar_ops) {
+		pr_warn("secvar: failed to retrieve secvar operations.\n");
+		return -ENODEV;
+	}
+
+	secvar_kobj = kobject_create_and_add("secvar", firmware_kobj);
+	if (!secvar_kobj) {
+		pr_err("secvar: Failed to create firmware kobj\n");
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_file(secvar_kobj, &format_attr.attr);
+	if (rc) {
+		kobject_put(secvar_kobj);
+		return -ENOMEM;
+	}
+
+	secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj);
+	if (!secvar_kset) {
+		pr_err("secvar: sysfs kobject registration failed.\n");
+		kobject_put(secvar_kobj);
+		return -ENOMEM;
+	}
+
+	rc = update_kobj_size();
+	if (rc) {
+		pr_err("Cannot read the size of the attribute\n");
+		return rc;
+	}
+
+	secvar_sysfs_load();
+
+	return 0;
+}
+
+late_initcall(secvar_sysfs_init);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 25aaa3903000..7f8c890360fe 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -715,8 +715,28 @@ static struct notifier_block ppc_panic_block = {
 	.priority = INT_MIN /* may not return; must be done last */
 };
 
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+			      void *p)
+{
+	pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+		 kaslr_offset(), KERNELBASE);
+
+	return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+	.notifier_call = dump_kernel_offset
+};
+
 void __init setup_panic(void)
 {
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0)
+		atomic_notifier_chain_register(&panic_notifier_list,
+					       &kernel_offset_notifier);
+
 	/* PPC64 always does a hard irq disable in its panic handler */
 	if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic)
 		return;
@@ -929,9 +949,6 @@ void __init setup_arch(char **cmdline_p)
 
 	early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
 
-	if (IS_ENABLED(CONFIG_DUMMY_CONSOLE))
-		conswitchp = &dummy_con;
-
 	if (ppc_md.setup_arch)
 		ppc_md.setup_arch();
 
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index a7541edf0cdb..dcffe927f5b9 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -44,6 +44,7 @@
 #include <asm/asm-prototypes.h>
 #include <asm/kdump.h>
 #include <asm/feature-fixups.h>
+#include <asm/early_ioremap.h>
 
 #include "setup.h"
 
@@ -80,6 +81,8 @@ notrace void __init machine_init(u64 dt_ptr)
 	/* Configure static keys first, now that we're relocated. */
 	setup_feature_keys();
 
+	early_ioremap_setup();
+
 	/* Enable early debugging if any specified (see udbg.h) */
 	udbg_early_init();
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 44b4c432a273..6104917a282d 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -65,15 +65,10 @@
 #include <asm/hw_irq.h>
 #include <asm/feature-fixups.h>
 #include <asm/kup.h>
+#include <asm/early_ioremap.h>
 
 #include "setup.h"
 
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
 int spinning_secondaries;
 u64 ppc64_pft_size;
 
@@ -305,7 +300,7 @@ void __init early_setup(unsigned long dt_ptr)
 	/* Enable early debugging if any specified (see udbg.h) */
 	udbg_early_init();
 
- 	DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
+	udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr);
 
 	/*
 	 * Do early initialization using the flattened device
@@ -338,6 +333,8 @@ void __init early_setup(unsigned long dt_ptr)
 	apply_feature_fixups();
 	setup_feature_keys();
 
+	early_ioremap_setup();
+
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu();
 
@@ -362,11 +359,11 @@ void __init early_setup(unsigned long dt_ptr)
 	 */
 	this_cpu_enable_ftrace();
 
-	DBG(" <- early_setup()\n");
+	udbg_printf(" <- %s()\n", __func__);
 
 #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
 	/*
-	 * This needs to be done *last* (after the above DBG() even)
+	 * This needs to be done *last* (after the above udbg_printf() even)
 	 *
 	 * Right after we return from this function, we turn on the MMU
 	 * which means the real-mode access trick that btext does will
@@ -436,8 +433,6 @@ void smp_release_cpus(void)
 	if (!use_spinloop())
 		return;
 
-	DBG(" -> smp_release_cpus()\n");
-
 	/* All secondary cpus are spinning on a common spinloop, release them
 	 * all now so they can start to spin on their individual paca
 	 * spinloops. For non SMP kernels, the secondary cpus never get out
@@ -456,9 +451,7 @@ void smp_release_cpus(void)
 			break;
 		udelay(1);
 	}
-	DBG("spinning_secondaries = %d\n", spinning_secondaries);
-
-	DBG(" <- smp_release_cpus()\n");
+	pr_debug("spinning_secondaries = %d\n", spinning_secondaries);
 }
 #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
 
@@ -551,8 +544,6 @@ void __init initialize_cache_info(void)
 	struct device_node *cpu = NULL, *l2, *l3 = NULL;
 	u32 pvr;
 
-	DBG(" -> initialize_cache_info()\n");
-
 	/*
 	 * All shipping POWER8 machines have a firmware bug that
 	 * puts incorrect information in the device-tree. This will
@@ -576,10 +567,10 @@ void __init initialize_cache_info(void)
 	 */
 	if (cpu) {
 		if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
-			DBG("Argh, can't find dcache properties !\n");
+			pr_warn("Argh, can't find dcache properties !\n");
 
 		if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
-			DBG("Argh, can't find icache properties !\n");
+			pr_warn("Argh, can't find icache properties !\n");
 
 		/*
 		 * Try to find the L2 and L3 if any. Assume they are
@@ -604,8 +595,6 @@ void __init initialize_cache_info(void)
 
 	cur_cpu_spec->dcache_bsize = dcache_bsize;
 	cur_cpu_spec->icache_bsize = icache_bsize;
-
-	DBG(" <- initialize_cache_info()\n");
 }
 
 /*
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 3bfb3888e897..078608ec2e92 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -79,7 +79,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
  * sys_select() with the appropriate args. -- Cort
  */
 int
-ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp)
+ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
 {
 	if ( (unsigned long)n >= 4096 )
 	{
@@ -89,7 +89,7 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
 		    || __get_user(inp, ((fd_set __user * __user *)(buffer+1)))
 		    || __get_user(outp, ((fd_set  __user * __user *)(buffer+2)))
 		    || __get_user(exp, ((fd_set  __user * __user *)(buffer+3)))
-		    || __get_user(tvp, ((struct timeval  __user * __user *)(buffer+4))))
+		    || __get_user(tvp, ((struct __kernel_old_timeval  __user * __user *)(buffer+4))))
 			return -EFAULT;
 	}
 	return sys_select(n, inp, outp, exp, tvp);
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 43f736ed47f2..35b61bfc1b1a 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -517,3 +517,5 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 435	nospu	clone3				ppc_clone3
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 694522308cd5..1168e8b37e30 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -232,7 +232,7 @@ static u64 scan_dispatch_log(u64 stop_tb)
  * Accumulate stolen time by scanning the dispatch trace log.
  * Called on entry from user mode.
  */
-void accumulate_stolen_time(void)
+void notrace accumulate_stolen_time(void)
 {
 	u64 sst, ust;
 	unsigned long save_irq_soft_mask = irq_soft_mask_return();
@@ -338,7 +338,7 @@ static unsigned long vtime_delta(struct task_struct *tsk,
 	return stime;
 }
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 {
 	unsigned long stime, stime_scaled, steal_time;
 	struct cpu_accounting_data *acct = get_accounting(tsk);
@@ -366,7 +366,7 @@ void vtime_account_system(struct task_struct *tsk)
 #endif
 	}
 }
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
@@ -395,7 +395,7 @@ static void vtime_flush_scaled(struct task_struct *tsk,
 /*
  * Account the whole cputime accumulated in the paca
  * Must be called with interrupts disabled.
- * Assumes that vtime_account_system/idle() has been called
+ * Assumes that vtime_account_kernel/idle() has been called
  * recently (i.e. since the last entry from usermode) so that
  * get_paca()->user_time_scaled is up to date.
  */
@@ -885,7 +885,7 @@ static notrace u64 timebase_read(struct clocksource *cs)
 
 void update_vsyscall(struct timekeeper *tk)
 {
-	struct timespec xt;
+	struct timespec64 xt;
 	struct clocksource *clock = tk->tkr_mono.clock;
 	u32 mult = tk->tkr_mono.mult;
 	u32 shift = tk->tkr_mono.shift;
@@ -957,8 +957,10 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->tb_to_xs = new_tb_to_xs;
 	vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec;
 	vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
-	vdso_data->stamp_xtime = xt;
+	vdso_data->stamp_xtime_sec = xt.tv_sec;
+	vdso_data->stamp_xtime_nsec = xt.tv_nsec;
 	vdso_data->stamp_sec_fraction = frac_sec;
+	vdso_data->hrtimer_res = hrtimer_resolution;
 	smp_wmb();
 	++(vdso_data->tb_update_count);
 }
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 82f43535e686..014ff0701f24 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -250,15 +250,22 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
 }
 NOKPROBE_SYMBOL(oops_end);
 
+static char *get_mmu_str(void)
+{
+	if (early_radix_enabled())
+		return " MMU=Radix";
+	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		return " MMU=Hash";
+	return "";
+}
+
 static int __die(const char *str, struct pt_regs *regs, long err)
 {
 	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
 
-	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n",
+	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
 	       IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
-	       PAGE_SIZE / 1024,
-	       early_radix_enabled() ? " MMU=Radix" : "",
-	       early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "",
+	       PAGE_SIZE / 1024, get_mmu_str(),
 	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
 	       IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
 	       IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index a384e7c8b01c..01595e8cafe7 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -120,13 +120,15 @@ int udbg_write(const char *s, int n)
 #define UDBG_BUFSIZE 256
 void udbg_printf(const char *fmt, ...)
 {
-	char buf[UDBG_BUFSIZE];
-	va_list args;
+	if (udbg_putc) {
+		char buf[UDBG_BUFSIZE];
+		va_list args;
 
-	va_start(args, fmt);
-	vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
-	udbg_puts(buf);
-	va_end(args);
+		va_start(args, fmt);
+		vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
+		udbg_puts(buf);
+		va_end(args);
+	}
 }
 
 void __init udbg_progress(char *s, unsigned short hex)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index becd9f8767ed..3306672f57a9 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -15,10 +15,8 @@
 /* Offset for the low 32-bit part of a field of long type */
 #ifdef CONFIG_PPC64
 #define LOPART	4
-#define TSPEC_TV_SEC	TSPC64_TV_SEC+LOPART
 #else
 #define LOPART	0
-#define TSPEC_TV_SEC	TSPC32_TV_SEC
 #endif
 
 	.text
@@ -156,12 +154,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
 	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
 	bne	cr0,99f
 
+	mflr	r12
+  .cfi_register lr,r12
+	bl	__get_datapage@local	/* get data page */
+	lwz	r5, CLOCK_HRTIMER_RES(r3)
+	mtlr	r12
 	li	r3,0
 	cmpli	cr0,r4,0
 	crclr	cr0*4+so
 	beqlr
-	lis	r5,CLOCK_REALTIME_RES@h
-	ori	r5,r5,CLOCK_REALTIME_RES@l
 	stw	r3,TSPC32_TV_SEC(r4)
 	stw	r5,TSPC32_TV_NSEC(r4)
 	blr
@@ -192,7 +193,7 @@ V_FUNCTION_BEGIN(__kernel_time)
 	bl	__get_datapage@local
 	mr	r9, r3			/* datapage ptr in r9 */
 
-	lwz	r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
+	lwz	r3,STAMP_XTIME_SEC+LOPART(r9)
 
 	cmplwi	r11,0			/* check if t is NULL */
 	beq	2f
@@ -268,7 +269,7 @@ __do_get_tspec:
 	 * as a 32.32 fixed-point number in r3 and r4.
 	 * Load & add the xtime stamp.
 	 */
-	lwz	r5,STAMP_XTIME+TSPEC_TV_SEC(r9)
+	lwz	r5,STAMP_XTIME_SEC+LOPART(r9)
 	lwz	r6,STAMP_SEC_FRAC(r9)
 	addc	r4,r4,r6
 	adde	r3,r3,r5
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
index 3f92561a64c4..526f5ba2593e 100644
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -35,7 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
 	subf	r8,r6,r4		/* compute length */
 	add	r8,r8,r5		/* ensure we get enough */
 	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
-	srw.	r8,r8,r9		/* compute line count */
+	srd.	r8,r8,r9		/* compute line count */
 	crclr	cr0*4+so
 	beqlr				/* nothing to do? */
 	mtctr	r8
@@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
 	subf	r8,r6,r4		/* compute length */
 	add	r8,r8,r5
 	lwz	r9,CFG_ICACHE_LOGBLOCKSZ(r10)
-	srw.	r8,r8,r9		/* compute line count */
+	srd.	r8,r8,r9		/* compute line count */
 	crclr	cr0*4+so
 	beqlr				/* nothing to do? */
 	mtctr	r8
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 07bfe33fe874..1c9a04703250 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -116,8 +116,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	 * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
 	 * too
 	 */
-	ld      r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
-	ld      r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
+	ld      r4,STAMP_XTIME_SEC(r3)
+	ld      r5,STAMP_XTIME_NSEC(r3)
 	bne     cr6,75f
 
 	/* CLOCK_MONOTONIC_COARSE */
@@ -186,12 +186,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
 	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
 	bne	cr0,99f
 
+	mflr	r12
+  .cfi_register lr,r12
+	bl	V_LOCAL_FUNC(__get_datapage)
+	lwz	r5, CLOCK_HRTIMER_RES(r3)
+	mtlr	r12
 	li	r3,0
 	cmpldi	cr0,r4,0
 	crclr	cr0*4+so
 	beqlr
-	lis	r5,CLOCK_REALTIME_RES@h
-	ori	r5,r5,CLOCK_REALTIME_RES@l
 	std	r3,TSPC64_TV_SEC(r4)
 	std	r5,TSPC64_TV_NSEC(r4)
 	blr
@@ -220,7 +223,7 @@ V_FUNCTION_BEGIN(__kernel_time)
 	mr	r11,r3			/* r11 holds t */
 	bl	V_LOCAL_FUNC(__get_datapage)
 
-	ld	r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+	ld	r4,STAMP_XTIME_SEC(r3)
 
 	cmpldi	r11,0			/* check if t is NULL */
 	beq	2f
@@ -265,7 +268,7 @@ V_FUNCTION_BEGIN(__do_get_tspec)
 	mulhdu	r6,r6,r5		/* in units of 2^-32 seconds */
 
 	/* Add stamp since epoch */
-	ld	r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+	ld	r4,STAMP_XTIME_SEC(r3)
 	lwz	r5,STAMP_SEC_FRAC(r3)
 	or	r0,r4,r5
 	or	r0,r0,r6
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 060a1acd7c6d..8834220036a5 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -6,6 +6,8 @@
 #endif
 
 #define BSS_FIRST_SECTIONS *(.bss.prominit)
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN	0
 
 #include <asm/page.h>
 #include <asm-generic/vmlinux.lds.h>
@@ -18,22 +20,8 @@
 ENTRY(_stext)
 
 PHDRS {
-	kernel PT_LOAD FLAGS(7); /* RWX */
-	notes PT_NOTE FLAGS(0);
-	dummy PT_NOTE FLAGS(0);
-
-	/* binutils < 2.18 has a bug that makes it misbehave when taking an
-	   ELF file with all segments at load address 0 as input.  This
-	   happens when running "strip" on vmlinux, because of the AT() magic
-	   in this linker script.  People using GCC >= 4.2 won't run into
-	   this problem, because the "build-id" support will put some data
-	   into the "notes" segment (at a non-zero load address).
-
-	   To work around this, we force some data into both the "dummy"
-	   segment and the kernel segment, so the dummy segment will get a
-	   non-zero load address.  It's not enough to always create the
-	   "notes" segment, since if nothing gets assigned to it, its load
-	   address will be zero.  */
+	text PT_LOAD FLAGS(7); /* RWX */
+	note PT_NOTE FLAGS(0);
 }
 
 #ifdef CONFIG_PPC64
@@ -77,7 +65,7 @@ SECTIONS
 #else /* !CONFIG_PPC64 */
 		HEAD_TEXT
 #endif
-	} :kernel
+	} :text
 
 	__head_end = .;
 
@@ -126,7 +114,7 @@ SECTIONS
 		__got2_end = .;
 #endif /* CONFIG_PPC32 */
 
-	} :kernel
+	} :text
 
 	. = ALIGN(ETEXT_ALIGN_SIZE);
 	_etext = .;
@@ -175,17 +163,6 @@ SECTIONS
 		__stop__btb_flush_fixup = .;
 	}
 #endif
-	EXCEPTION_TABLE(0)
-
-	NOTES :kernel :notes
-
-	/* The dummy segment contents for the bug workaround mentioned above
-	   near PHDRS.  */
-	.dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
-		LONG(0)
-		LONG(0)
-		LONG(0)
-	} :kernel :dummy
 
 /*
  * Init sections discarded at runtime
@@ -200,7 +177,7 @@ SECTIONS
 #ifdef CONFIG_PPC64
 		*(.tramp.ftrace.init);
 #endif
-	} :kernel
+	} :text
 
 	/* .exit.text is discarded at runtime, not link time,
 	 * to deal with references from __bug_table
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
new file mode 100644
index 000000000000..378f6108a414
--- /dev/null
+++ b/arch/powerpc/kexec/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+# Avoid clang warnings around longjmp/setjmp declarations
+CFLAGS_crash.o += -ffreestanding
+
+obj-y				+= core.o crash.o core_$(BITS).o
+
+obj-$(CONFIG_PPC32)		+= relocate_32.o
+
+obj-$(CONFIG_KEXEC_FILE)	+= file_load.o elf_$(BITS).o
+
+ifdef CONFIG_HAVE_IMA_KEXEC
+ifdef CONFIG_IMA
+obj-y				+= ima.o
+endif
+endif
+
+
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
+GCOV_PROFILE_core_$(BITS).o := n
+KCOV_INSTRUMENT_core_$(BITS).o := n
+UBSAN_SANITIZE_core_$(BITS).o := n
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
new file mode 100644
index 000000000000..078fe3d76feb
--- /dev/null
+++ b/arch/powerpc/kexec/core.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Code to handle transition of Linux booting another kernel.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * Copyright (C) 2005 IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/reboot.h>
+#include <linux/threads.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/irq.h>
+#include <linux/ftrace.h>
+
+#include <asm/kdump.h>
+#include <asm/machdep.h>
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+
+void machine_kexec_mask_interrupts(void) {
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_chip *chip;
+
+		chip = irq_desc_get_chip(desc);
+		if (!chip)
+			continue;
+
+		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
+			chip->irq_eoi(&desc->irq_data);
+
+		if (chip->irq_mask)
+			chip->irq_mask(&desc->irq_data);
+
+		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+			chip->irq_disable(&desc->irq_data);
+	}
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	default_machine_crash_shutdown(regs);
+}
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+	if (ppc_md.machine_kexec_prepare)
+		return ppc_md.machine_kexec_prepare(image);
+	else
+		return default_machine_kexec_prepare(image);
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	VMCOREINFO_SYMBOL(node_data);
+	VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+	VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+	VMCOREINFO_SYMBOL(vmemmap_list);
+	VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
+	VMCOREINFO_SYMBOL(mmu_psize_defs);
+	VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
+	VMCOREINFO_OFFSET(vmemmap_backing, list);
+	VMCOREINFO_OFFSET(vmemmap_backing, phys);
+	VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
+	VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
+	VMCOREINFO_OFFSET(mmu_psize_def, shift);
+#endif
+	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+void machine_kexec(struct kimage *image)
+{
+	int save_ftrace_enabled;
+
+	save_ftrace_enabled = __ftrace_enabled_save();
+	this_cpu_disable_ftrace();
+
+	if (ppc_md.machine_kexec)
+		ppc_md.machine_kexec(image);
+	else
+		default_machine_kexec(image);
+
+	this_cpu_enable_ftrace();
+	__ftrace_enabled_restore(save_ftrace_enabled);
+
+	/* Fall back to normal restart if we're still alive. */
+	machine_restart(NULL);
+	for(;;);
+}
+
+void __init reserve_crashkernel(void)
+{
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	/* use common parsing */
+	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+			&crash_size, &crash_base);
+	if (ret == 0 && crash_size > 0) {
+		crashk_res.start = crash_base;
+		crashk_res.end = crash_base + crash_size - 1;
+	}
+
+	if (crashk_res.end == crashk_res.start) {
+		crashk_res.start = crashk_res.end = 0;
+		return;
+	}
+
+	/* We might have got these values via the command line or the
+	 * device tree, either way sanitise them now. */
+
+	crash_size = resource_size(&crashk_res);
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+	if (crashk_res.start != KDUMP_KERNELBASE)
+		printk("Crash kernel location must be 0x%x\n",
+				KDUMP_KERNELBASE);
+
+	crashk_res.start = KDUMP_KERNELBASE;
+#else
+	if (!crashk_res.start) {
+#ifdef CONFIG_PPC64
+		/*
+		 * On 64bit we split the RMO in half but cap it at half of
+		 * a small SLB (128MB) since the crash kernel needs to place
+		 * itself and some stacks to be in the first segment.
+		 */
+		crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
+#else
+		crashk_res.start = KDUMP_KERNELBASE;
+#endif
+	}
+
+	crash_base = PAGE_ALIGN(crashk_res.start);
+	if (crash_base != crashk_res.start) {
+		printk("Crash kernel base must be aligned to 0x%lx\n",
+				PAGE_SIZE);
+		crashk_res.start = crash_base;
+	}
+
+#endif
+	crash_size = PAGE_ALIGN(crash_size);
+	crashk_res.end = crashk_res.start + crash_size - 1;
+
+	/* The crash region must not overlap the current kernel */
+	if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
+		printk(KERN_WARNING
+			"Crash kernel can not overlap current kernel\n");
+		crashk_res.start = crashk_res.end = 0;
+		return;
+	}
+
+	/* Crash kernel trumps memory limit */
+	if (memory_limit && memory_limit <= crashk_res.end) {
+		memory_limit = crashk_res.end + 1;
+		printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
+		       memory_limit);
+	}
+
+	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+			"for crashkernel (System RAM: %ldMB)\n",
+			(unsigned long)(crash_size >> 20),
+			(unsigned long)(crashk_res.start >> 20),
+			(unsigned long)(memblock_phys_mem_size() >> 20));
+
+	if (!memblock_is_region_memory(crashk_res.start, crash_size) ||
+	    memblock_reserve(crashk_res.start, crash_size)) {
+		pr_err("Failed to reserve memory for crashkernel!\n");
+		crashk_res.start = crashk_res.end = 0;
+		return;
+	}
+}
+
+int overlaps_crashkernel(unsigned long start, unsigned long size)
+{
+	return (start + size) > crashk_res.start && start <= crashk_res.end;
+}
+
+/* Values we need to export to the second kernel via the device tree. */
+static phys_addr_t kernel_end;
+static phys_addr_t crashk_base;
+static phys_addr_t crashk_size;
+static unsigned long long mem_limit;
+
+static struct property kernel_end_prop = {
+	.name = "linux,kernel-end",
+	.length = sizeof(phys_addr_t),
+	.value = &kernel_end,
+};
+
+static struct property crashk_base_prop = {
+	.name = "linux,crashkernel-base",
+	.length = sizeof(phys_addr_t),
+	.value = &crashk_base
+};
+
+static struct property crashk_size_prop = {
+	.name = "linux,crashkernel-size",
+	.length = sizeof(phys_addr_t),
+	.value = &crashk_size,
+};
+
+static struct property memory_limit_prop = {
+	.name = "linux,memory-limit",
+	.length = sizeof(unsigned long long),
+	.value = &mem_limit,
+};
+
+#define cpu_to_be_ulong	__PASTE(cpu_to_be, BITS_PER_LONG)
+
+static void __init export_crashk_values(struct device_node *node)
+{
+	/* There might be existing crash kernel properties, but we can't
+	 * be sure what's in them, so remove them. */
+	of_remove_property(node, of_find_property(node,
+				"linux,crashkernel-base", NULL));
+	of_remove_property(node, of_find_property(node,
+				"linux,crashkernel-size", NULL));
+
+	if (crashk_res.start != 0) {
+		crashk_base = cpu_to_be_ulong(crashk_res.start),
+		of_add_property(node, &crashk_base_prop);
+		crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
+		of_add_property(node, &crashk_size_prop);
+	}
+
+	/*
+	 * memory_limit is required by the kexec-tools to limit the
+	 * crash regions to the actual memory used.
+	 */
+	mem_limit = cpu_to_be_ulong(memory_limit);
+	of_update_property(node, &memory_limit_prop);
+}
+
+static int __init kexec_setup(void)
+{
+	struct device_node *node;
+
+	node = of_find_node_by_path("/chosen");
+	if (!node)
+		return -ENOENT;
+
+	/* remove any stale properties so ours can be found */
+	of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL));
+
+	/* information needed by userspace when using default_machine_kexec */
+	kernel_end = cpu_to_be_ulong(__pa(_end));
+	of_add_property(node, &kernel_end_prop);
+
+	export_crashk_values(node);
+
+	of_node_put(node);
+	return 0;
+}
+late_initcall(kexec_setup);
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kexec/core_32.c
index bf9f1f906d64..bf9f1f906d64 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kexec/core_32.c
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kexec/core_64.c
index 04a7cba58eff..04a7cba58eff 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kexec/core_64.c
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kexec/crash.c
index d488311efab1..d488311efab1 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kexec/crash.c
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kexec/elf_64.c
index 3072fd6dbe94..3072fd6dbe94 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kexec/file_load.c
index 143c91724617..143c91724617 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kexec/file_load.c
diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kexec/ima.c
index 720e50e490b6..720e50e490b6 100644
--- a/arch/powerpc/kernel/ima_kexec.c
+++ b/arch/powerpc/kexec/ima.c
diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S
new file mode 100644
index 000000000000..61946c19e07c
--- /dev/null
+++ b/arch/powerpc/kexec/relocate_32.S
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains kexec low-level functions.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * PPC44x port. Copyright (C) 2011,  IBM Corporation
+ * 		Author: Suzuki Poulose <suzuki@in.ibm.com>
+ */
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/kexec.h>
+
+	.text
+
+	/*
+	 * Must be relocatable PIC code callable as a C function.
+	 */
+	.globl relocate_new_kernel
+relocate_new_kernel:
+	/* r3 = page_list   */
+	/* r4 = reboot_code_buffer */
+	/* r5 = start_address      */
+
+#ifdef CONFIG_FSL_BOOKE
+
+	mr	r29, r3
+	mr	r30, r4
+	mr	r31, r5
+
+#define ENTRY_MAPPING_KEXEC_SETUP
+#include <kernel/fsl_booke_entry_mapping.S>
+#undef ENTRY_MAPPING_KEXEC_SETUP
+
+	mr      r3, r29
+	mr      r4, r30
+	mr      r5, r31
+
+	li	r0, 0
+#elif defined(CONFIG_44x)
+
+	/* Save our parameters */
+	mr	r29, r3
+	mr	r30, r4
+	mr	r31, r5
+
+#ifdef CONFIG_PPC_47x
+	/* Check for 47x cores */
+	mfspr	r3,SPRN_PVR
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,PVR_476FPE@h
+	beq	setup_map_47x
+	cmplwi	cr0,r3,PVR_476@h
+	beq	setup_map_47x
+	cmplwi	cr0,r3,PVR_476_ISS@h
+	beq	setup_map_47x
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Code for setting up 1:1 mapping for PPC440x for KEXEC
+ *
+ * We cannot switch off the MMU on PPC44x.
+ * So we:
+ * 1) Invalidate all the mappings except the one we are running from.
+ * 2) Create a tmp mapping for our code in the other address space(TS) and
+ *    jump to it. Invalidate the entry we started in.
+ * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
+ * 4) Jump to the 1:1 mapping in original TS.
+ * 5) Invalidate the tmp mapping.
+ *
+ * - Based on the kexec support code for FSL BookE
+ *
+ */
+
+	/*
+	 * Load the PID with kernel PID (0).
+	 * Also load our MSR_IS and TID to MMUCR for TLB search.
+	 */
+	li	r3, 0
+	mtspr	SPRN_PID, r3
+	mfmsr	r4
+	andi.	r4,r4,MSR_IS@l
+	beq	wmmucr
+	oris	r3,r3,PPC44x_MMUCR_STS@h
+wmmucr:
+	mtspr	SPRN_MMUCR,r3
+	sync
+
+	/*
+	 * Invalidate all the TLB entries except the current entry
+	 * where we are running from
+	 */
+	bl	0f				/* Find our address */
+0:	mflr	r5				/* Make it accessible */
+	tlbsx	r23,0,r5			/* Find entry we are in */
+	li	r4,0				/* Start at TLB entry 0 */
+	li	r3,0				/* Set PAGEID inval value */
+1:	cmpw	r23,r4				/* Is this our entry? */
+	beq	skip				/* If so, skip the inval */
+	tlbwe	r3,r4,PPC44x_TLB_PAGEID		/* If not, inval the entry */
+skip:
+	addi	r4,r4,1				/* Increment */
+	cmpwi	r4,64				/* Are we done?	*/
+	bne	1b				/* If not, repeat */
+	isync
+
+	/* Create a temp mapping and jump to it */
+	andi.	r6, r23, 1		/* Find the index to use */
+	addi	r24, r6, 1		/* r24 will contain 1 or 2 */
+
+	mfmsr	r9			/* get the MSR */
+	rlwinm	r5, r9, 27, 31, 31	/* Extract the MSR[IS] */
+	xori	r7, r5, 1		/* Use the other address space */
+
+	/* Read the current mapping entries */
+	tlbre	r3, r23, PPC44x_TLB_PAGEID
+	tlbre	r4, r23, PPC44x_TLB_XLAT
+	tlbre	r5, r23, PPC44x_TLB_ATTRIB
+
+	/* Save our current XLAT entry */
+	mr	r25, r4
+
+	/* Extract the TLB PageSize */
+	li	r10, 1 			/* r10 will hold PageSize */
+	rlwinm	r11, r3, 0, 24, 27	/* bits 24-27 */
+
+	/* XXX: As of now we use 256M, 4K pages */
+	cmpwi	r11, PPC44x_TLB_256M
+	bne	tlb_4k
+	rotlwi	r10, r10, 28		/* r10 = 256M */
+	b	write_out
+tlb_4k:
+	cmpwi	r11, PPC44x_TLB_4K
+	bne	default
+	rotlwi	r10, r10, 12		/* r10 = 4K */
+	b	write_out
+default:
+	rotlwi	r10, r10, 10		/* r10 = 1K */
+
+write_out:
+	/*
+	 * Write out the tmp 1:1 mapping for this code in other address space
+	 * Fixup  EPN = RPN , TS=other address space
+	 */
+	insrwi	r3, r7, 1, 23		/* Bit 23 is TS for PAGEID field */
+
+	/* Write out the tmp mapping entries */
+	tlbwe	r3, r24, PPC44x_TLB_PAGEID
+	tlbwe	r4, r24, PPC44x_TLB_XLAT
+	tlbwe	r5, r24, PPC44x_TLB_ATTRIB
+
+	subi	r11, r10, 1		/* PageOffset Mask = PageSize - 1 */
+	not	r10, r11		/* Mask for PageNum */
+
+	/* Switch to other address space in MSR */
+	insrwi	r9, r7, 1, 26		/* Set MSR[IS] = r7 */
+
+	bl	1f
+1:	mflr	r8
+	addi	r8, r8, (2f-1b)		/* Find the target offset */
+
+	/* Jump to the tmp mapping */
+	mtspr	SPRN_SRR0, r8
+	mtspr	SPRN_SRR1, r9
+	rfi
+
+2:
+	/* Invalidate the entry we were executing from */
+	li	r3, 0
+	tlbwe	r3, r23, PPC44x_TLB_PAGEID
+
+	/* attribute fields. rwx for SUPERVISOR mode */
+	li	r5, 0
+	ori	r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+	/* Create 1:1 mapping in 256M pages */
+	xori	r7, r7, 1			/* Revert back to Original TS */
+
+	li	r8, 0				/* PageNumber */
+	li	r6, 3				/* TLB Index, start at 3  */
+
+next_tlb:
+	rotlwi	r3, r8, 28			/* Create EPN (bits 0-3) */
+	mr	r4, r3				/* RPN = EPN  */
+	ori	r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
+	insrwi	r3, r7, 1, 23			/* Set TS from r7 */
+
+	tlbwe	r3, r6, PPC44x_TLB_PAGEID	/* PageID field : EPN, V, SIZE */
+	tlbwe	r4, r6, PPC44x_TLB_XLAT		/* Address translation : RPN   */
+	tlbwe	r5, r6, PPC44x_TLB_ATTRIB	/* Attributes */
+
+	addi	r8, r8, 1			/* Increment PN */
+	addi	r6, r6, 1			/* Increment TLB Index */
+	cmpwi	r8, 8				/* Are we done ? */
+	bne	next_tlb
+	isync
+
+	/* Jump to the new mapping 1:1 */
+	li	r9,0
+	insrwi	r9, r7, 1, 26			/* Set MSR[IS] = r7 */
+
+	bl	1f
+1:	mflr	r8
+	and	r8, r8, r11			/* Get our offset within page */
+	addi	r8, r8, (2f-1b)
+
+	and	r5, r25, r10			/* Get our target PageNum */
+	or	r8, r8, r5			/* Target jump address */
+
+	mtspr	SPRN_SRR0, r8
+	mtspr	SPRN_SRR1, r9
+	rfi
+2:
+	/* Invalidate the tmp entry we used */
+	li	r3, 0
+	tlbwe	r3, r24, PPC44x_TLB_PAGEID
+	sync
+	b	ppc44x_map_done
+
+#ifdef CONFIG_PPC_47x
+
+	/* 1:1 mapping for 47x */
+
+setup_map_47x:
+
+	/*
+	 * Load the kernel pid (0) to PID and also to MMUCR[TID].
+	 * Also set the MSR IS->MMUCR STS
+	 */
+	li	r3, 0
+	mtspr	SPRN_PID, r3			/* Set PID */
+	mfmsr	r4				/* Get MSR */
+	andi.	r4, r4, MSR_IS@l		/* TS=1? */
+	beq	1f				/* If not, leave STS=0 */
+	oris	r3, r3, PPC47x_MMUCR_STS@h	/* Set STS=1 */
+1:	mtspr	SPRN_MMUCR, r3			/* Put MMUCR */
+	sync
+
+	/* Find the entry we are running from */
+	bl	2f
+2:	mflr	r23
+	tlbsx	r23, 0, r23
+	tlbre	r24, r23, 0			/* TLB Word 0 */
+	tlbre	r25, r23, 1			/* TLB Word 1 */
+	tlbre	r26, r23, 2			/* TLB Word 2 */
+
+
+	/*
+	 * Invalidates all the tlb entries by writing to 256 RPNs(r4)
+	 * of 4k page size in all  4 ways (0-3 in r3).
+	 * This would invalidate the entire UTLB including the one we are
+	 * running from. However the shadow TLB entries would help us
+	 * to continue the execution, until we flush them (rfi/isync).
+	 */
+	addis	r3, 0, 0x8000			/* specify the way */
+	addi	r4, 0, 0			/* TLB Word0 = (EPN=0, VALID = 0) */
+	addi	r5, 0, 0
+	b	clear_utlb_entry
+
+	/* Align the loop to speed things up. from head_44x.S */
+	.align	6
+
+clear_utlb_entry:
+
+	tlbwe	r4, r3, 0
+	tlbwe	r5, r3, 1
+	tlbwe	r5, r3, 2
+	addis	r3, r3, 0x2000			/* Increment the way */
+	cmpwi	r3, 0
+	bne	clear_utlb_entry
+	addis	r3, 0, 0x8000
+	addis	r4, r4, 0x100			/* Increment the EPN */
+	cmpwi	r4, 0
+	bne	clear_utlb_entry
+
+	/* Create the entries in the other address space */
+	mfmsr	r5
+	rlwinm	r7, r5, 27, 31, 31		/* Get the TS (Bit 26) from MSR */
+	xori	r7, r7, 1			/* r7 = !TS */
+
+	insrwi	r24, r7, 1, 21			/* Change the TS in the saved TLB word 0 */
+
+	/*
+	 * write out the TLB entries for the tmp mapping
+	 * Use way '0' so that we could easily invalidate it later.
+	 */
+	lis	r3, 0x8000			/* Way '0' */
+
+	tlbwe	r24, r3, 0
+	tlbwe	r25, r3, 1
+	tlbwe	r26, r3, 2
+
+	/* Update the msr to the new TS */
+	insrwi	r5, r7, 1, 26
+
+	bl	1f
+1:	mflr	r6
+	addi	r6, r6, (2f-1b)
+
+	mtspr	SPRN_SRR0, r6
+	mtspr	SPRN_SRR1, r5
+	rfi
+
+	/*
+	 * Now we are in the tmp address space.
+	 * Create a 1:1 mapping for 0-2GiB in the original TS.
+	 */
+2:
+	li	r3, 0
+	li	r4, 0				/* TLB Word 0 */
+	li	r5, 0				/* TLB Word 1 */
+	li	r6, 0
+	ori	r6, r6, PPC47x_TLB2_S_RWX	/* TLB word 2 */
+
+	li	r8, 0				/* PageIndex */
+
+	xori	r7, r7, 1			/* revert back to original TS */
+
+write_utlb:
+	rotlwi	r5, r8, 28			/* RPN = PageIndex * 256M */
+						/* ERPN = 0 as we don't use memory above 2G */
+
+	mr	r4, r5				/* EPN = RPN */
+	ori	r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
+	insrwi	r4, r7, 1, 21			/* Insert the TS to Word 0 */
+
+	tlbwe	r4, r3, 0			/* Write out the entries */
+	tlbwe	r5, r3, 1
+	tlbwe	r6, r3, 2
+	addi	r8, r8, 1
+	cmpwi	r8, 8				/* Have we completed ? */
+	bne	write_utlb
+
+	/* make sure we complete the TLB write up */
+	isync
+
+	/*
+	 * Prepare to jump to the 1:1 mapping.
+	 * 1) Extract page size of the tmp mapping
+	 *    DSIZ = TLB_Word0[22:27]
+	 * 2) Calculate the physical address of the address
+	 *    to jump to.
+	 */
+	rlwinm	r10, r24, 0, 22, 27
+
+	cmpwi	r10, PPC47x_TLB0_4K
+	bne	0f
+	li	r10, 0x1000			/* r10 = 4k */
+	bl	1f
+
+0:
+	/* Defaults to 256M */
+	lis	r10, 0x1000
+
+	bl	1f
+1:	mflr	r4
+	addi	r4, r4, (2f-1b)			/* virtual address  of 2f */
+
+	subi	r11, r10, 1			/* offsetmask = Pagesize - 1 */
+	not	r10, r11			/* Pagemask = ~(offsetmask) */
+
+	and	r5, r25, r10			/* Physical page */
+	and	r6, r4, r11			/* offset within the current page */
+
+	or	r5, r5, r6			/* Physical address for 2f */
+
+	/* Switch the TS in MSR to the original one */
+	mfmsr	r8
+	insrwi	r8, r7, 1, 26
+
+	mtspr	SPRN_SRR1, r8
+	mtspr	SPRN_SRR0, r5
+	rfi
+
+2:
+	/* Invalidate the tmp mapping */
+	lis	r3, 0x8000			/* Way '0' */
+
+	clrrwi	r24, r24, 12			/* Clear the valid bit */
+	tlbwe	r24, r3, 0
+	tlbwe	r25, r3, 1
+	tlbwe	r26, r3, 2
+
+	/* Make sure we complete the TLB write and flush the shadow TLB */
+	isync
+
+#endif
+
+ppc44x_map_done:
+
+
+	/* Restore the parameters */
+	mr	r3, r29
+	mr	r4, r30
+	mr	r5, r31
+
+	li	r0, 0
+#else
+	li	r0, 0
+
+	/*
+	 * Set Machine Status Register to a known status,
+	 * switch the MMU off and jump to 1: in a single step.
+	 */
+
+	mr	r8, r0
+	ori     r8, r8, MSR_RI|MSR_ME
+	mtspr	SPRN_SRR1, r8
+	addi	r8, r4, 1f - relocate_new_kernel
+	mtspr	SPRN_SRR0, r8
+	sync
+	rfi
+
+1:
+#endif
+	/* from this point address translation is turned off */
+	/* and interrupts are disabled */
+
+	/* set a new stack at the bottom of our page... */
+	/* (not really needed now) */
+	addi	r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
+	stw	r0, 0(r1)
+
+	/* Do the copies */
+	li	r6, 0 /* checksum */
+	mr	r0, r3
+	b	1f
+
+0:	/* top, read another word for the indirection page */
+	lwzu	r0, 4(r3)
+
+1:
+	/* is it a destination page? (r8) */
+	rlwinm.	r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
+	beq	2f
+
+	rlwinm	r8, r0, 0, 0, 19 /* clear kexec flags, page align */
+	b	0b
+
+2:	/* is it an indirection page? (r3) */
+	rlwinm.	r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
+	beq	2f
+
+	rlwinm	r3, r0, 0, 0, 19 /* clear kexec flags, page align */
+	subi	r3, r3, 4
+	b	0b
+
+2:	/* are we done? */
+	rlwinm.	r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
+	beq	2f
+	b	3f
+
+2:	/* is it a source page? (r9) */
+	rlwinm.	r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
+	beq	0b
+
+	rlwinm	r9, r0, 0, 0, 19 /* clear kexec flags, page align */
+
+	li	r7, PAGE_SIZE / 4
+	mtctr   r7
+	subi    r9, r9, 4
+	subi    r8, r8, 4
+9:
+	lwzu    r0, 4(r9)  /* do the copy */
+	xor	r6, r6, r0
+	stwu    r0, 4(r8)
+	dcbst	0, r8
+	sync
+	icbi	0, r8
+	bdnz    9b
+
+	addi    r9, r9, 4
+	addi    r8, r8, 4
+	b	0b
+
+3:
+
+	/* To be certain of avoiding problems with self-modifying code
+	 * execute a serializing instruction here.
+	 */
+	isync
+	sync
+
+	mfspr	r3, SPRN_PIR /* current core we are running on */
+	mr	r4, r5 /* load physical address of chunk called */
+
+	/* jump to the entry point, usually the setup routine */
+	mtlr	r5
+	blrl
+
+1:	b	1b
+
+relocate_new_kernel_end:
+
+	.globl relocate_new_kernel_size
+relocate_new_kernel_size:
+	.long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4c67cc79de7c..2bfeaa13befb 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -71,6 +71,9 @@ kvm-hv-y += \
 	book3s_64_mmu_radix.o \
 	book3s_hv_nested.o
 
+kvm-hv-$(CONFIG_PPC_UV) += \
+	book3s_hv_uvmem.o
+
 kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
 	book3s_hv_tm.o
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index ec2547cc5ecb..58a59ee998e2 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -74,27 +74,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
-		ulong pc = kvmppc_get_pc(vcpu);
-		ulong lr = kvmppc_get_lr(vcpu);
-		if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
-			kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
-		if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
-			kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
-		vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
-	}
-}
-EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real);
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
-	if (!is_kvmppc_hv_enabled(vcpu->kvm))
-		return to_book3s(vcpu)->hior;
-	return 0;
-}
-
 static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
 			unsigned long pending_now, unsigned long old_pending)
 {
@@ -134,11 +113,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
-	kvmppc_unfixup_split_real(vcpu);
-	kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
-	kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags);
-	kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
-	vcpu->arch.mmu.reset_msr(vcpu);
+	vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags);
 }
 
 static int kvmppc_book3s_vec2irqprio(unsigned int vec)
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 2ef1311a2a13..3a4613985949 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -32,4 +32,7 @@ extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val);
 static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {}
 #endif
 
+extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr);
+extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
+
 #endif
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 18f244aad7aa..f21e73492ce3 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -90,11 +90,6 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
 	return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16);
 }
 
-static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
-{
-	kvmppc_set_msr(vcpu, 0);
-}
-
 static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
 				      u32 sre, gva_t eaddr,
 				      bool primary)
@@ -406,7 +401,6 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu)
 	mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin;
 	mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin;
 	mmu->xlate = kvmppc_mmu_book3s_32_xlate;
-	mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr;
 	mmu->tlbie = kvmppc_mmu_book3s_32_tlbie;
 	mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid;
 	mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp;
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 5f63a5f7f24f..599133256a95 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -24,20 +24,6 @@
 #define dprintk(X...) do { } while(0)
 #endif
 
-static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
-{
-	unsigned long msr = vcpu->arch.intr_msr;
-	unsigned long cur_msr = kvmppc_get_msr(vcpu);
-
-	/* If transactional, change to suspend mode on IRQ delivery */
-	if (MSR_TM_TRANSACTIONAL(cur_msr))
-		msr |= MSR_TS_S;
-	else
-		msr |= cur_msr & MSR_TS_MASK;
-
-	kvmppc_set_msr(vcpu, msr);
-}
-
 static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
 				struct kvm_vcpu *vcpu,
 				gva_t eaddr)
@@ -676,7 +662,6 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu)
 	mmu->slbie = kvmppc_mmu_book3s_64_slbie;
 	mmu->slbia = kvmppc_mmu_book3s_64_slbia;
 	mmu->xlate = kvmppc_mmu_book3s_64_xlate;
-	mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr;
 	mmu->tlbie = kvmppc_mmu_book3s_64_tlbie;
 	mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
 	mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 9a75f0e1933b..d381526c5c9b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -275,18 +275,6 @@ int kvmppc_mmu_hv_init(void)
 	return 0;
 }
 
-static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
-{
-	unsigned long msr = vcpu->arch.intr_msr;
-
-	/* If transactional, change to suspend mode on IRQ delivery */
-	if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
-		msr |= MSR_TS_S;
-	else
-		msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
-	kvmppc_set_msr(vcpu, msr);
-}
-
 static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
 				long pte_index, unsigned long pteh,
 				unsigned long ptel, unsigned long *pte_idx_ret)
@@ -508,6 +496,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	struct vm_area_struct *vma;
 	unsigned long rcbits;
 	long mmio_update;
+	struct mm_struct *mm;
 
 	if (kvm_is_radix(kvm))
 		return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
@@ -584,6 +573,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	is_ci = false;
 	pfn = 0;
 	page = NULL;
+	mm = current->mm;
 	pte_size = PAGE_SIZE;
 	writing = (dsisr & DSISR_ISSTORE) != 0;
 	/* If writing != 0, then the HPTE must allow writing, if we get here */
@@ -592,8 +582,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
 	if (npages < 1) {
 		/* Check if it's an I/O mapping */
-		down_read(&current->mm->mmap_sem);
-		vma = find_vma(current->mm, hva);
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, hva);
 		if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
 		    (vma->vm_flags & VM_PFNMAP)) {
 			pfn = vma->vm_pgoff +
@@ -602,7 +592,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
 			write_ok = vma->vm_flags & VM_WRITE;
 		}
-		up_read(&current->mm->mmap_sem);
+		up_read(&mm->mmap_sem);
 		if (!pfn)
 			goto out_put;
 	} else {
@@ -621,8 +611,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * hugepage split and collapse.
 			 */
 			local_irq_save(flags);
-			ptep = find_current_mm_pte(current->mm->pgd,
-						   hva, NULL, NULL);
+			ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL);
 			if (ptep) {
 				pte = kvmppc_read_update_linux_pte(ptep, 1);
 				if (__pte_write(pte))
@@ -2000,7 +1989,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
 	ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
 	if (ret < 0) {
 		kfree(ctx);
-		kvm_put_kvm(kvm);
+		kvm_put_kvm_no_destroy(kvm);
 		return ret;
 	}
 
@@ -2161,7 +2150,6 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.slb_nr = 32;		/* POWER7/POWER8 */
 
 	mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
-	mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
 
 	vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
 }
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 2d415c36a61d..da857c8ba6e4 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -19,6 +19,8 @@
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/pte-walk.h>
+#include <asm/ultravisor.h>
+#include <asm/kvm_book3s_uvmem.h>
 
 /*
  * Supported radix tree geometry.
@@ -915,6 +917,9 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	if (!(dsisr & DSISR_PRTABLE_FAULT))
 		gpa |= ea & 0xfff;
 
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+		return kvmppc_send_page_to_uv(kvm, gfn);
+
 	/* Get the corresponding memslot */
 	memslot = gfn_to_memslot(kvm, gfn);
 
@@ -972,6 +977,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	unsigned long gpa = gfn << PAGE_SHIFT;
 	unsigned int shift;
 
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
+		uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
+		return 0;
+	}
+
 	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
 	if (ptep && pte_present(*ptep))
 		kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
@@ -989,6 +999,9 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	int ref = 0;
 	unsigned long old, *rmapp;
 
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+		return ref;
+
 	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
 	if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
 		old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
@@ -1013,6 +1026,9 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	unsigned int shift;
 	int ref = 0;
 
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+		return ref;
+
 	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
 	if (ptep && pte_present(*ptep) && pte_young(*ptep))
 		ref = 1;
@@ -1030,6 +1046,9 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
 	int ret = 0;
 	unsigned long old, *rmapp;
 
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+		return ret;
+
 	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
 	if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
 		ret = 1;
@@ -1082,6 +1101,12 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
 	unsigned long gpa;
 	unsigned int shift;
 
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
+		kvmppc_uvmem_drop_pages(memslot, kvm);
+
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+		return;
+
 	gpa = memslot->base_gfn << PAGE_SHIFT;
 	spin_lock(&kvm->mmu_lock);
 	for (n = memslot->npages; n; --n) {
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 5834db0a54c6..883a66e76638 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -317,7 +317,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	if (ret >= 0)
 		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
 	else
-		kvm_put_kvm(kvm);
+		kvm_put_kvm_no_destroy(kvm);
 
 	mutex_unlock(&kvm->lock);
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 709cf1fd4cf4..6ff3f896d908 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -72,6 +72,9 @@
 #include <asm/xics.h>
 #include <asm/xive.h>
 #include <asm/hw_breakpoint.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_book3s_uvmem.h>
+#include <asm/ultravisor.h>
 
 #include "book3s.h"
 
@@ -133,7 +136,6 @@ static inline bool nesting_enabled(struct kvm *kvm)
 /* If set, the threads on each CPU core have to be in the same MMU mode */
 static bool no_mixing_hpt_and_radix;
 
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
 /*
@@ -338,18 +340,6 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
 }
 
-static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
-{
-	/*
-	 * Check for illegal transactional state bit combination
-	 * and if we find it, force the TS field to a safe state.
-	 */
-	if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
-		msr &= ~MSR_TS_MASK;
-	vcpu->arch.shregs.msr = msr;
-	kvmppc_end_cede(vcpu);
-}
-
 static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
 {
 	vcpu->arch.pvr = pvr;
@@ -792,6 +782,11 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 		vcpu->arch.dawr  = value1;
 		vcpu->arch.dawrx = value2;
 		return H_SUCCESS;
+	case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+		/* KVM does not support mflags=2 (AIL=2) */
+		if (mflags != 0 && mflags != 3)
+			return H_UNSUPPORTED_FLAG_START;
+		return H_TOO_HARD;
 	default:
 		return H_TOO_HARD;
 	}
@@ -1078,6 +1073,25 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 					 kvmppc_get_gpr(vcpu, 5),
 					 kvmppc_get_gpr(vcpu, 6));
 		break;
+	case H_SVM_PAGE_IN:
+		ret = kvmppc_h_svm_page_in(vcpu->kvm,
+					   kvmppc_get_gpr(vcpu, 4),
+					   kvmppc_get_gpr(vcpu, 5),
+					   kvmppc_get_gpr(vcpu, 6));
+		break;
+	case H_SVM_PAGE_OUT:
+		ret = kvmppc_h_svm_page_out(vcpu->kvm,
+					    kvmppc_get_gpr(vcpu, 4),
+					    kvmppc_get_gpr(vcpu, 5),
+					    kvmppc_get_gpr(vcpu, 6));
+		break;
+	case H_SVM_INIT_START:
+		ret = kvmppc_h_svm_init_start(vcpu->kvm);
+		break;
+	case H_SVM_INIT_DONE:
+		ret = kvmppc_h_svm_init_done(vcpu->kvm);
+		break;
+
 	default:
 		return RESUME_HOST;
 	}
@@ -2454,15 +2468,6 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
 	vcpu->arch.timer_running = 1;
 }
 
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.ceded = 0;
-	if (vcpu->arch.timer_running) {
-		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
-		vcpu->arch.timer_running = 0;
-	}
-}
-
 extern int __kvmppc_vcore_entry(void);
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
@@ -4511,6 +4516,29 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
 	if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
 	    ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
 		kvmppc_radix_flush_memslot(kvm, old);
+	/*
+	 * If UV hasn't yet called H_SVM_INIT_START, don't register memslots.
+	 */
+	if (!kvm->arch.secure_guest)
+		return;
+
+	switch (change) {
+	case KVM_MR_CREATE:
+		if (kvmppc_uvmem_slot_init(kvm, new))
+			return;
+		uv_register_mem_slot(kvm->arch.lpid,
+				     new->base_gfn << PAGE_SHIFT,
+				     new->npages * PAGE_SIZE,
+				     0, new->id);
+		break;
+	case KVM_MR_DELETE:
+		uv_unregister_mem_slot(kvm->arch.lpid, old->id);
+		kvmppc_uvmem_slot_free(kvm, old);
+		break;
+	default:
+		/* TODO: Handle KVM_MR_MOVE */
+		break;
+	}
 }
 
 /*
@@ -4784,6 +4812,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	char buf[32];
 	int ret;
 
+	mutex_init(&kvm->arch.uvmem_lock);
+	INIT_LIST_HEAD(&kvm->arch.uvmem_pfns);
 	mutex_init(&kvm->arch.mmu_setup_lock);
 
 	/* Allocate the guest's logical partition ID */
@@ -4953,8 +4983,11 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 		if (nesting_enabled(kvm))
 			kvmhv_release_all_nested(kvm);
 		kvm->arch.process_table = 0;
+		if (kvm->arch.secure_guest)
+			uv_svm_terminate(kvm->arch.lpid);
 		kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
 	}
+
 	kvmppc_free_lpid(kvm->arch.lpid);
 
 	kvmppc_free_pimap(kvm);
@@ -5394,6 +5427,94 @@ static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
 	return rc;
 }
 
+static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa)
+{
+	unpin_vpa(kvm, vpa);
+	vpa->gpa = 0;
+	vpa->pinned_addr = NULL;
+	vpa->dirty = false;
+	vpa->update_pending = 0;
+}
+
+/*
+ *  IOCTL handler to turn off secure mode of guest
+ *
+ * - Release all device pages
+ * - Issue ucall to terminate the guest on the UV side
+ * - Unpin the VPA pages.
+ * - Reinit the partition scoped page tables
+ */
+static int kvmhv_svm_off(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	int mmu_was_ready;
+	int srcu_idx;
+	int ret = 0;
+	int i;
+
+	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+		return ret;
+
+	mutex_lock(&kvm->arch.mmu_setup_lock);
+	mmu_was_ready = kvm->arch.mmu_ready;
+	if (kvm->arch.mmu_ready) {
+		kvm->arch.mmu_ready = 0;
+		/* order mmu_ready vs. vcpus_running */
+		smp_mb();
+		if (atomic_read(&kvm->arch.vcpus_running)) {
+			kvm->arch.mmu_ready = 1;
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		struct kvm_memory_slot *memslot;
+		struct kvm_memslots *slots = __kvm_memslots(kvm, i);
+
+		if (!slots)
+			continue;
+
+		kvm_for_each_memslot(memslot, slots) {
+			kvmppc_uvmem_drop_pages(memslot, kvm);
+			uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
+		}
+	}
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+	ret = uv_svm_terminate(kvm->arch.lpid);
+	if (ret != U_SUCCESS) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * When secure guest is reset, all the guest pages are sent
+	 * to UV via UV_PAGE_IN before the non-boot vcpus get a
+	 * chance to run and unpin their VPA pages. Unpinning of all
+	 * VPA pages is done here explicitly so that VPA pages
+	 * can be migrated to the secure side.
+	 *
+	 * This is required to for the secure SMP guest to reboot
+	 * correctly.
+	 */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		unpin_vpa_reset(kvm, &vcpu->arch.dtl);
+		unpin_vpa_reset(kvm, &vcpu->arch.slb_shadow);
+		unpin_vpa_reset(kvm, &vcpu->arch.vpa);
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+	}
+
+	kvmppc_setup_partition_table(kvm);
+	kvm->arch.secure_guest = 0;
+	kvm->arch.mmu_ready = mmu_was_ready;
+out:
+	mutex_unlock(&kvm->arch.mmu_setup_lock);
+	return ret;
+}
+
 static struct kvmppc_ops kvm_ops_hv = {
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -5401,6 +5522,7 @@ static struct kvmppc_ops kvm_ops_hv = {
 	.set_one_reg = kvmppc_set_one_reg_hv,
 	.vcpu_load   = kvmppc_core_vcpu_load_hv,
 	.vcpu_put    = kvmppc_core_vcpu_put_hv,
+	.inject_interrupt = kvmppc_inject_interrupt_hv,
 	.set_msr     = kvmppc_set_msr_hv,
 	.vcpu_run    = kvmppc_vcpu_run_hv,
 	.vcpu_create = kvmppc_core_vcpu_create_hv,
@@ -5436,6 +5558,7 @@ static struct kvmppc_ops kvm_ops_hv = {
 	.enable_nested = kvmhv_enable_nested,
 	.load_from_eaddr = kvmhv_load_from_eaddr,
 	.store_to_eaddr = kvmhv_store_to_eaddr,
+	.svm_off = kvmhv_svm_off,
 };
 
 static int kvm_init_subcore_bitmap(void)
@@ -5544,11 +5667,16 @@ static int kvmppc_book3s_init_hv(void)
 			no_mixing_hpt_and_radix = true;
 	}
 
+	r = kvmppc_uvmem_init();
+	if (r < 0)
+		pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
+
 	return r;
 }
 
 static void kvmppc_book3s_exit_hv(void)
 {
+	kvmppc_uvmem_free();
 	kvmppc_free_host_rm_ops();
 	if (kvmppc_radix_possible())
 		kvmppc_radix_exit();
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7c1909657b55..7cd3cf3d366b 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -755,6 +755,71 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
 	local_paca->kvm_hstate.kvm_split_mode = NULL;
 }
 
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.ceded = 0;
+	if (vcpu->arch.timer_running) {
+		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+		vcpu->arch.timer_running = 0;
+	}
+}
+
+void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
+{
+	/*
+	 * Check for illegal transactional state bit combination
+	 * and if we find it, force the TS field to a safe state.
+	 */
+	if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
+		msr &= ~MSR_TS_MASK;
+	vcpu->arch.shregs.msr = msr;
+	kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv);
+
+static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+	unsigned long msr, pc, new_msr, new_pc;
+
+	msr = kvmppc_get_msr(vcpu);
+	pc = kvmppc_get_pc(vcpu);
+	new_msr = vcpu->arch.intr_msr;
+	new_pc = vec;
+
+	/* If transactional, change to suspend mode on IRQ delivery */
+	if (MSR_TM_TRANSACTIONAL(msr))
+		new_msr |= MSR_TS_S;
+	else
+		new_msr |= msr & MSR_TS_MASK;
+
+	/*
+	 * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and
+	 * applicable. AIL=2 is not supported.
+	 *
+	 * AIL does not apply to SRESET, MCE, or HMI (which is never
+	 * delivered to the guest), and does not apply if IR=0 or DR=0.
+	 */
+	if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET &&
+	    vec != BOOK3S_INTERRUPT_MACHINE_CHECK &&
+	    (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 &&
+	    (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) {
+		new_msr |= MSR_IR | MSR_DR;
+		new_pc += 0xC000000000004000ULL;
+	}
+
+	kvmppc_set_srr0(vcpu, pc);
+	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+	kvmppc_set_pc(vcpu, new_pc);
+	vcpu->arch.shregs.msr = new_msr;
+}
+
+void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+	inject_interrupt(vcpu, vec, srr1_flags);
+	kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv);
+
 /*
  * Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
  * Can we inject a Decrementer or a External interrupt?
@@ -762,7 +827,6 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
 void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
 {
 	int ext;
-	unsigned long vec = 0;
 	unsigned long lpcr;
 
 	/* Insert EXTERNAL bit into LPCR at the MER bit position */
@@ -774,26 +838,16 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
 
 	if (vcpu->arch.shregs.msr & MSR_EE) {
 		if (ext) {
-			vec = BOOK3S_INTERRUPT_EXTERNAL;
+			inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0);
 		} else {
 			long int dec = mfspr(SPRN_DEC);
 			if (!(lpcr & LPCR_LD))
 				dec = (int) dec;
 			if (dec < 0)
-				vec = BOOK3S_INTERRUPT_DECREMENTER;
+				inject_interrupt(vcpu,
+					BOOK3S_INTERRUPT_DECREMENTER, 0);
 		}
 	}
-	if (vec) {
-		unsigned long msr, old_msr = vcpu->arch.shregs.msr;
-
-		kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
-		kvmppc_set_srr1(vcpu, old_msr);
-		kvmppc_set_pc(vcpu, vec);
-		msr = vcpu->arch.intr_msr;
-		if (MSR_TM_ACTIVE(old_msr))
-			msr |= MSR_TS_S;
-		vcpu->arch.shregs.msr = msr;
-	}
 
 	if (vcpu->arch.doorbell_request) {
 		mtspr(SPRN_DPDES, 1);
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index cdf30c6eaf54..dc97e5be76f6 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1186,7 +1186,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
 forward_to_l1:
 	vcpu->arch.fault_dsisr = flags;
 	if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
-		vcpu->arch.shregs.msr &= ~0x783f0000ul;
+		vcpu->arch.shregs.msr &= SRR1_MSR_BITS;
 		vcpu->arch.shregs.msr |= flags;
 	}
 	return RESUME_HOST;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index faebcbb8c4db..c6fbbd29bd87 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -11,6 +11,7 @@
  */
 
 #include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
 #include <asm/mmu.h>
@@ -1116,7 +1117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	ld	r7, VCPU_GPR(R7)(r4)
 	bne	ret_to_ultra
 
-	lwz	r0, VCPU_CR(r4)
+	ld	r0, VCPU_CR(r4)
 	mtcr	r0
 
 	ld	r0, VCPU_GPR(R0)(r4)
@@ -1136,7 +1137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
  *   R3 = UV_RETURN
  */
 ret_to_ultra:
-	lwz	r0, VCPU_CR(r4)
+	ld	r0, VCPU_CR(r4)
 	mtcr	r0
 
 	ld	r0, VCPU_GPR(R3)(r4)
@@ -1487,6 +1488,13 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 1:
 #endif /* CONFIG_KVM_XICS */
 
+	/*
+	 * Possibly flush the link stack here, before we do a blr in
+	 * guest_exit_short_path.
+	 */
+1:	nop
+	patch_site 1b patch__call_kvm_flush_link_stack
+
 	/* If we came in through the P9 short path, go back out to C now */
 	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
 	cmpwi	r0, 0
@@ -1963,6 +1971,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	mtlr	r0
 	blr
 
+.balign 32
+.global kvm_flush_link_stack
+kvm_flush_link_stack:
+	/* Save LR into r0 */
+	mflr	r0
+
+	/* Flush the link stack. On Power8 it's up to 32 entries in size. */
+	.rept 32
+	bl	.+4
+	.endr
+
+	/* And on Power9 it's up to 64. */
+BEGIN_FTR_SECTION
+	.rept 32
+	bl	.+4
+	.endr
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
+	/* Restore LR */
+	mtlr	r0
+	blr
+
 kvmppc_guest_external:
 	/* External interrupt, first check for host_ipi. If this is
 	 * set, we know the host wants us out so let's do it now
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
new file mode 100644
index 000000000000..2de264fc3156
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -0,0 +1,785 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Secure pages management: Migration of pages between normal and secure
+ * memory of KVM guests.
+ *
+ * Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com>
+ */
+
+/*
+ * A pseries guest can be run as secure guest on Ultravisor-enabled
+ * POWER platforms. On such platforms, this driver will be used to manage
+ * the movement of guest pages between the normal memory managed by
+ * hypervisor (HV) and secure memory managed by Ultravisor (UV).
+ *
+ * The page-in or page-out requests from UV will come to HV as hcalls and
+ * HV will call back into UV via ultracalls to satisfy these page requests.
+ *
+ * Private ZONE_DEVICE memory equal to the amount of secure memory
+ * available in the platform for running secure guests is hotplugged.
+ * Whenever a page belonging to the guest becomes secure, a page from this
+ * private device memory is used to represent and track that secure page
+ * on the HV side. Some pages (like virtio buffers, VPA pages etc) are
+ * shared between UV and HV. However such pages aren't represented by
+ * device private memory and mappings to shared memory exist in both
+ * UV and HV page tables.
+ */
+
+/*
+ * Notes on locking
+ *
+ * kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent
+ * page-in and page-out requests for the same GPA. Concurrent accesses
+ * can either come via UV (guest vCPUs requesting for same page)
+ * or when HV and guest simultaneously access the same page.
+ * This mutex serializes the migration of page from HV(normal) to
+ * UV(secure) and vice versa. So the serialization points are around
+ * migrate_vma routines and page-in/out routines.
+ *
+ * Per-guest mutex comes with a cost though. Mainly it serializes the
+ * fault path as page-out can occur when HV faults on accessing secure
+ * guest pages. Currently UV issues page-in requests for all the guest
+ * PFNs one at a time during early boot (UV_ESM uvcall), so this is
+ * not a cause for concern. Also currently the number of page-outs caused
+ * by HV touching secure pages is very very low. If an when UV supports
+ * overcommitting, then we might see concurrent guest driven page-outs.
+ *
+ * Locking order
+ *
+ * 1. kvm->srcu - Protects KVM memslots
+ * 2. kvm->mm->mmap_sem - find_vma, migrate_vma_pages and helpers, ksm_madvise
+ * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting
+ *			     as sync-points for page-in/out
+ */
+
+/*
+ * Notes on page size
+ *
+ * Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN
+ * and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks
+ * secure GPAs at 64K page size and maintains one device PFN for each
+ * 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued
+ * for 64K page at a time.
+ *
+ * HV faulting on secure pages: When HV touches any secure page, it
+ * faults and issues a UV_PAGE_OUT request with 64K page size. Currently
+ * UV splits and remaps the 2MB page if necessary and copies out the
+ * required 64K page contents.
+ *
+ * Shared pages: Whenever guest shares a secure page, UV will split and
+ * remap the 2MB page if required and issue H_SVM_PAGE_IN with 64K page size.
+ *
+ * HV invalidating a page: When a regular page belonging to secure
+ * guest gets unmapped, HV informs UV with UV_PAGE_INVAL of 64K
+ * page size. Using 64K page size is correct here because any non-secure
+ * page will essentially be of 64K page size. Splitting by UV during sharing
+ * and page-out ensures this.
+ *
+ * Page fault handling: When HV handles page fault of a page belonging
+ * to secure guest, it sends that to UV with a 64K UV_PAGE_IN request.
+ * Using 64K size is correct here too as UV would have split the 2MB page
+ * into 64k mappings and would have done page-outs earlier.
+ *
+ * In summary, the current secure pages handling code in HV assumes
+ * 64K page size and in fact fails any page-in/page-out requests of
+ * non-64K size upfront. If and when UV starts supporting multiple
+ * page-sizes, we need to break this assumption.
+ */
+
+#include <linux/pagemap.h>
+#include <linux/migrate.h>
+#include <linux/kvm_host.h>
+#include <linux/ksm.h>
+#include <asm/ultravisor.h>
+#include <asm/mman.h>
+#include <asm/kvm_ppc.h>
+
+static struct dev_pagemap kvmppc_uvmem_pgmap;
+static unsigned long *kvmppc_uvmem_bitmap;
+static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
+
+#define KVMPPC_UVMEM_PFN	(1UL << 63)
+
+struct kvmppc_uvmem_slot {
+	struct list_head list;
+	unsigned long nr_pfns;
+	unsigned long base_pfn;
+	unsigned long *pfns;
+};
+
+struct kvmppc_uvmem_page_pvt {
+	struct kvm *kvm;
+	unsigned long gpa;
+	bool skip_page_out;
+};
+
+int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+	struct kvmppc_uvmem_slot *p;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+	p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns)));
+	if (!p->pfns) {
+		kfree(p);
+		return -ENOMEM;
+	}
+	p->nr_pfns = slot->npages;
+	p->base_pfn = slot->base_gfn;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	list_add(&p->list, &kvm->arch.uvmem_pfns);
+	mutex_unlock(&kvm->arch.uvmem_lock);
+
+	return 0;
+}
+
+/*
+ * All device PFNs are already released by the time we come here.
+ */
+void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+	struct kvmppc_uvmem_slot *p, *next;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) {
+		if (p->base_pfn == slot->base_gfn) {
+			vfree(p->pfns);
+			list_del(&p->list);
+			kfree(p);
+			break;
+		}
+	}
+	mutex_unlock(&kvm->arch.uvmem_lock);
+}
+
+static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
+				    struct kvm *kvm)
+{
+	struct kvmppc_uvmem_slot *p;
+
+	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+			unsigned long index = gfn - p->base_pfn;
+
+			p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN;
+			return;
+		}
+	}
+}
+
+static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
+{
+	struct kvmppc_uvmem_slot *p;
+
+	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+			p->pfns[gfn - p->base_pfn] = 0;
+			return;
+		}
+	}
+}
+
+static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
+				    unsigned long *uvmem_pfn)
+{
+	struct kvmppc_uvmem_slot *p;
+
+	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+			unsigned long index = gfn - p->base_pfn;
+
+			if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
+				if (uvmem_pfn)
+					*uvmem_pfn = p->pfns[index] &
+						     ~KVMPPC_UVMEM_PFN;
+				return true;
+			} else
+				return false;
+		}
+	}
+	return false;
+}
+
+unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int ret = H_SUCCESS;
+	int srcu_idx;
+
+	if (!kvmppc_uvmem_bitmap)
+		return H_UNSUPPORTED;
+
+	/* Only radix guests can be secure guests */
+	if (!kvm_is_radix(kvm))
+		return H_UNSUPPORTED;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots) {
+		if (kvmppc_uvmem_slot_init(kvm, memslot)) {
+			ret = H_PARAMETER;
+			goto out;
+		}
+		ret = uv_register_mem_slot(kvm->arch.lpid,
+					   memslot->base_gfn << PAGE_SHIFT,
+					   memslot->npages * PAGE_SIZE,
+					   0, memslot->id);
+		if (ret < 0) {
+			kvmppc_uvmem_slot_free(kvm, memslot);
+			ret = H_PARAMETER;
+			goto out;
+		}
+	}
+	kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_START;
+out:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return ret;
+}
+
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+		return H_UNSUPPORTED;
+
+	kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
+	pr_info("LPID %d went secure\n", kvm->arch.lpid);
+	return H_SUCCESS;
+}
+
+/*
+ * Drop device pages that we maintain for the secure guest
+ *
+ * We first mark the pages to be skipped from UV_PAGE_OUT when there
+ * is HV side fault on these pages. Next we *get* these pages, forcing
+ * fault on them, do fault time migration to replace the device PTEs in
+ * QEMU page table with normal PTEs from newly allocated pages.
+ */
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+			     struct kvm *kvm)
+{
+	int i;
+	struct kvmppc_uvmem_page_pvt *pvt;
+	unsigned long pfn, uvmem_pfn;
+	unsigned long gfn = free->base_gfn;
+
+	for (i = free->npages; i; --i, ++gfn) {
+		struct page *uvmem_page;
+
+		mutex_lock(&kvm->arch.uvmem_lock);
+		if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+			mutex_unlock(&kvm->arch.uvmem_lock);
+			continue;
+		}
+
+		uvmem_page = pfn_to_page(uvmem_pfn);
+		pvt = uvmem_page->zone_device_data;
+		pvt->skip_page_out = true;
+		mutex_unlock(&kvm->arch.uvmem_lock);
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (is_error_noslot_pfn(pfn))
+			continue;
+		kvm_release_pfn_clean(pfn);
+	}
+}
+
+/*
+ * Get a free device PFN from the pool
+ *
+ * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device
+ * PFN will be used to keep track of the secure page on HV side.
+ *
+ * Called with kvm->arch.uvmem_lock held
+ */
+static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
+{
+	struct page *dpage = NULL;
+	unsigned long bit, uvmem_pfn;
+	struct kvmppc_uvmem_page_pvt *pvt;
+	unsigned long pfn_last, pfn_first;
+
+	pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT;
+	pfn_last = pfn_first +
+		   (resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT);
+
+	spin_lock(&kvmppc_uvmem_bitmap_lock);
+	bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
+				  pfn_last - pfn_first);
+	if (bit >= (pfn_last - pfn_first))
+		goto out;
+	bitmap_set(kvmppc_uvmem_bitmap, bit, 1);
+	spin_unlock(&kvmppc_uvmem_bitmap_lock);
+
+	pvt = kzalloc(sizeof(*pvt), GFP_KERNEL);
+	if (!pvt)
+		goto out_clear;
+
+	uvmem_pfn = bit + pfn_first;
+	kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
+
+	pvt->gpa = gpa;
+	pvt->kvm = kvm;
+
+	dpage = pfn_to_page(uvmem_pfn);
+	dpage->zone_device_data = pvt;
+	get_page(dpage);
+	lock_page(dpage);
+	return dpage;
+out_clear:
+	spin_lock(&kvmppc_uvmem_bitmap_lock);
+	bitmap_clear(kvmppc_uvmem_bitmap, bit, 1);
+out:
+	spin_unlock(&kvmppc_uvmem_bitmap_lock);
+	return NULL;
+}
+
+/*
+ * Alloc a PFN from private device memory pool and copy page from normal
+ * memory to secure memory using UV_PAGE_IN uvcall.
+ */
+static int
+kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
+		   unsigned long end, unsigned long gpa, struct kvm *kvm,
+		   unsigned long page_shift, bool *downgrade)
+{
+	unsigned long src_pfn, dst_pfn = 0;
+	struct migrate_vma mig;
+	struct page *spage;
+	unsigned long pfn;
+	struct page *dpage;
+	int ret = 0;
+
+	memset(&mig, 0, sizeof(mig));
+	mig.vma = vma;
+	mig.start = start;
+	mig.end = end;
+	mig.src = &src_pfn;
+	mig.dst = &dst_pfn;
+
+	/*
+	 * We come here with mmap_sem write lock held just for
+	 * ksm_madvise(), otherwise we only need read mmap_sem.
+	 * Hence downgrade to read lock once ksm_madvise() is done.
+	 */
+	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+			  MADV_UNMERGEABLE, &vma->vm_flags);
+	downgrade_write(&kvm->mm->mmap_sem);
+	*downgrade = true;
+	if (ret)
+		return ret;
+
+	ret = migrate_vma_setup(&mig);
+	if (ret)
+		return ret;
+
+	if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
+		ret = -1;
+		goto out_finalize;
+	}
+
+	dpage = kvmppc_uvmem_get_page(gpa, kvm);
+	if (!dpage) {
+		ret = -1;
+		goto out_finalize;
+	}
+
+	pfn = *mig.src >> MIGRATE_PFN_SHIFT;
+	spage = migrate_pfn_to_page(*mig.src);
+	if (spage)
+		uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
+			   page_shift);
+
+	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+	migrate_vma_pages(&mig);
+out_finalize:
+	migrate_vma_finalize(&mig);
+	return ret;
+}
+
+/*
+ * Shares the page with HV, thus making it a normal page.
+ *
+ * - If the page is already secure, then provision a new page and share
+ * - If the page is a normal page, share the existing page
+ *
+ * In the former case, uses dev_pagemap_ops.migrate_to_ram handler
+ * to unmap the device page from QEMU's page tables.
+ */
+static unsigned long
+kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
+{
+
+	int ret = H_PARAMETER;
+	struct page *uvmem_page;
+	struct kvmppc_uvmem_page_pvt *pvt;
+	unsigned long pfn;
+	unsigned long gfn = gpa >> page_shift;
+	int srcu_idx;
+	unsigned long uvmem_pfn;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	mutex_lock(&kvm->arch.uvmem_lock);
+	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+		uvmem_page = pfn_to_page(uvmem_pfn);
+		pvt = uvmem_page->zone_device_data;
+		pvt->skip_page_out = true;
+	}
+
+retry:
+	mutex_unlock(&kvm->arch.uvmem_lock);
+	pfn = gfn_to_pfn(kvm, gfn);
+	if (is_error_noslot_pfn(pfn))
+		goto out;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+		uvmem_page = pfn_to_page(uvmem_pfn);
+		pvt = uvmem_page->zone_device_data;
+		pvt->skip_page_out = true;
+		kvm_release_pfn_clean(pfn);
+		goto retry;
+	}
+
+	if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
+		ret = H_SUCCESS;
+	kvm_release_pfn_clean(pfn);
+	mutex_unlock(&kvm->arch.uvmem_lock);
+out:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return ret;
+}
+
+/*
+ * H_SVM_PAGE_IN: Move page from normal memory to secure memory.
+ *
+ * H_PAGE_IN_SHARED flag makes the page shared which means that the same
+ * memory in is visible from both UV and HV.
+ */
+unsigned long
+kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
+		     unsigned long flags, unsigned long page_shift)
+{
+	bool downgrade = false;
+	unsigned long start, end;
+	struct vm_area_struct *vma;
+	int srcu_idx;
+	unsigned long gfn = gpa >> page_shift;
+	int ret;
+
+	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+		return H_UNSUPPORTED;
+
+	if (page_shift != PAGE_SHIFT)
+		return H_P3;
+
+	if (flags & ~H_PAGE_IN_SHARED)
+		return H_P2;
+
+	if (flags & H_PAGE_IN_SHARED)
+		return kvmppc_share_page(kvm, gpa, page_shift);
+
+	ret = H_PARAMETER;
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	down_write(&kvm->mm->mmap_sem);
+
+	start = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(start))
+		goto out;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	/* Fail the page-in request of an already paged-in page */
+	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
+		goto out_unlock;
+
+	end = start + (1UL << page_shift);
+	vma = find_vma_intersection(kvm->mm, start, end);
+	if (!vma || vma->vm_start > start || vma->vm_end < end)
+		goto out_unlock;
+
+	if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
+				&downgrade))
+		ret = H_SUCCESS;
+out_unlock:
+	mutex_unlock(&kvm->arch.uvmem_lock);
+out:
+	if (downgrade)
+		up_read(&kvm->mm->mmap_sem);
+	else
+		up_write(&kvm->mm->mmap_sem);
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return ret;
+}
+
+/*
+ * Provision a new page on HV side and copy over the contents
+ * from secure memory using UV_PAGE_OUT uvcall.
+ */
+static int
+kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
+		    unsigned long end, unsigned long page_shift,
+		    struct kvm *kvm, unsigned long gpa)
+{
+	unsigned long src_pfn, dst_pfn = 0;
+	struct migrate_vma mig;
+	struct page *dpage, *spage;
+	struct kvmppc_uvmem_page_pvt *pvt;
+	unsigned long pfn;
+	int ret = U_SUCCESS;
+
+	memset(&mig, 0, sizeof(mig));
+	mig.vma = vma;
+	mig.start = start;
+	mig.end = end;
+	mig.src = &src_pfn;
+	mig.dst = &dst_pfn;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	/* The requested page is already paged-out, nothing to do */
+	if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
+		goto out;
+
+	ret = migrate_vma_setup(&mig);
+	if (ret)
+		return ret;
+
+	spage = migrate_pfn_to_page(*mig.src);
+	if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
+		goto out_finalize;
+
+	if (!is_zone_device_page(spage))
+		goto out_finalize;
+
+	dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
+	if (!dpage) {
+		ret = -1;
+		goto out_finalize;
+	}
+
+	lock_page(dpage);
+	pvt = spage->zone_device_data;
+	pfn = page_to_pfn(dpage);
+
+	/*
+	 * This function is used in two cases:
+	 * - When HV touches a secure page, for which we do UV_PAGE_OUT
+	 * - When a secure page is converted to shared page, we *get*
+	 *   the page to essentially unmap the device page. In this
+	 *   case we skip page-out.
+	 */
+	if (!pvt->skip_page_out)
+		ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
+				  gpa, 0, page_shift);
+
+	if (ret == U_SUCCESS)
+		*mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
+	else {
+		unlock_page(dpage);
+		__free_page(dpage);
+		goto out_finalize;
+	}
+
+	migrate_vma_pages(&mig);
+out_finalize:
+	migrate_vma_finalize(&mig);
+out:
+	mutex_unlock(&kvm->arch.uvmem_lock);
+	return ret;
+}
+
+/*
+ * Fault handler callback that gets called when HV touches any page that
+ * has been moved to secure memory, we ask UV to give back the page by
+ * issuing UV_PAGE_OUT uvcall.
+ *
+ * This eventually results in dropping of device PFN and the newly
+ * provisioned page/PFN gets populated in QEMU page tables.
+ */
+static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
+{
+	struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data;
+
+	if (kvmppc_svm_page_out(vmf->vma, vmf->address,
+				vmf->address + PAGE_SIZE, PAGE_SHIFT,
+				pvt->kvm, pvt->gpa))
+		return VM_FAULT_SIGBUS;
+	else
+		return 0;
+}
+
+/*
+ * Release the device PFN back to the pool
+ *
+ * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT.
+ * Gets called with kvm->arch.uvmem_lock held.
+ */
+static void kvmppc_uvmem_page_free(struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page) -
+			(kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT);
+	struct kvmppc_uvmem_page_pvt *pvt;
+
+	spin_lock(&kvmppc_uvmem_bitmap_lock);
+	bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1);
+	spin_unlock(&kvmppc_uvmem_bitmap_lock);
+
+	pvt = page->zone_device_data;
+	page->zone_device_data = NULL;
+	kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+	kfree(pvt);
+}
+
+static const struct dev_pagemap_ops kvmppc_uvmem_ops = {
+	.page_free = kvmppc_uvmem_page_free,
+	.migrate_to_ram	= kvmppc_uvmem_migrate_to_ram,
+};
+
+/*
+ * H_SVM_PAGE_OUT: Move page from secure memory to normal memory.
+ */
+unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa,
+		      unsigned long flags, unsigned long page_shift)
+{
+	unsigned long gfn = gpa >> page_shift;
+	unsigned long start, end;
+	struct vm_area_struct *vma;
+	int srcu_idx;
+	int ret;
+
+	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+		return H_UNSUPPORTED;
+
+	if (page_shift != PAGE_SHIFT)
+		return H_P3;
+
+	if (flags)
+		return H_P2;
+
+	ret = H_PARAMETER;
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	down_read(&kvm->mm->mmap_sem);
+	start = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(start))
+		goto out;
+
+	end = start + (1UL << page_shift);
+	vma = find_vma_intersection(kvm->mm, start, end);
+	if (!vma || vma->vm_start > start || vma->vm_end < end)
+		goto out;
+
+	if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa))
+		ret = H_SUCCESS;
+out:
+	up_read(&kvm->mm->mmap_sem);
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return ret;
+}
+
+int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
+{
+	unsigned long pfn;
+	int ret = U_SUCCESS;
+
+	pfn = gfn_to_pfn(kvm, gfn);
+	if (is_error_noslot_pfn(pfn))
+		return -EFAULT;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
+		goto out;
+
+	ret = uv_page_in(kvm->arch.lpid, pfn << PAGE_SHIFT, gfn << PAGE_SHIFT,
+			 0, PAGE_SHIFT);
+out:
+	kvm_release_pfn_clean(pfn);
+	mutex_unlock(&kvm->arch.uvmem_lock);
+	return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT;
+}
+
+static u64 kvmppc_get_secmem_size(void)
+{
+	struct device_node *np;
+	int i, len;
+	const __be32 *prop;
+	u64 size = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
+	if (!np)
+		goto out;
+
+	prop = of_get_property(np, "secure-memory-ranges", &len);
+	if (!prop)
+		goto out_put;
+
+	for (i = 0; i < len / (sizeof(*prop) * 4); i++)
+		size += of_read_number(prop + (i * 4) + 2, 2);
+
+out_put:
+	of_node_put(np);
+out:
+	return size;
+}
+
+int kvmppc_uvmem_init(void)
+{
+	int ret = 0;
+	unsigned long size;
+	struct resource *res;
+	void *addr;
+	unsigned long pfn_last, pfn_first;
+
+	size = kvmppc_get_secmem_size();
+	if (!size) {
+		/*
+		 * Don't fail the initialization of kvm-hv module if
+		 * the platform doesn't export ibm,uv-firmware node.
+		 * Let normal guests run on such PEF-disabled platform.
+		 */
+		pr_info("KVMPPC-UVMEM: No support for secure guests\n");
+		goto out;
+	}
+
+	res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem");
+	if (IS_ERR(res)) {
+		ret = PTR_ERR(res);
+		goto out;
+	}
+
+	kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
+	kvmppc_uvmem_pgmap.res = *res;
+	kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
+	addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE);
+	if (IS_ERR(addr)) {
+		ret = PTR_ERR(addr);
+		goto out_free_region;
+	}
+
+	pfn_first = res->start >> PAGE_SHIFT;
+	pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT);
+	kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first),
+				      sizeof(unsigned long), GFP_KERNEL);
+	if (!kvmppc_uvmem_bitmap) {
+		ret = -ENOMEM;
+		goto out_unmap;
+	}
+
+	pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size);
+	return ret;
+out_unmap:
+	memunmap_pages(&kvmppc_uvmem_pgmap);
+out_free_region:
+	release_mem_region(res->start, size);
+out:
+	return ret;
+}
+
+void kvmppc_uvmem_free(void)
+{
+	memunmap_pages(&kvmppc_uvmem_pgmap);
+	release_mem_region(kvmppc_uvmem_pgmap.res.start,
+			   resource_size(&kvmppc_uvmem_pgmap.res));
+	kfree(kvmppc_uvmem_bitmap);
+}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cc65af8fe6f7..ce4fcf76e53e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -90,7 +90,43 @@ static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu)
 	kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
 }
 
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu);
+static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
+		ulong pc = kvmppc_get_pc(vcpu);
+		ulong lr = kvmppc_get_lr(vcpu);
+		if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+			kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+		if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+			kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
+		vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
+	}
+}
+
+static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+	unsigned long msr, pc, new_msr, new_pc;
+
+	kvmppc_unfixup_split_real(vcpu);
+
+	msr = kvmppc_get_msr(vcpu);
+	pc = kvmppc_get_pc(vcpu);
+	new_msr = vcpu->arch.intr_msr;
+	new_pc = to_book3s(vcpu)->hior + vec;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* If transactional, change to suspend mode on IRQ delivery */
+	if (MSR_TM_TRANSACTIONAL(msr))
+		new_msr |= MSR_TS_S;
+	else
+		new_msr |= msr & MSR_TS_MASK;
+#endif
+
+	kvmppc_set_srr0(vcpu, pc);
+	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+	kvmppc_set_pc(vcpu, new_pc);
+	kvmppc_set_msr(vcpu, new_msr);
+}
 
 static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 {
@@ -1761,6 +1797,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
 #else
 	/* default to book3s_32 (750) */
 	vcpu->arch.pvr = 0x84202;
+	vcpu->arch.intr_msr = 0;
 #endif
 	kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
 	vcpu->arch.slb_nr = 64;
@@ -2058,6 +2095,7 @@ static struct kvmppc_ops kvm_ops_pr = {
 	.set_one_reg = kvmppc_set_one_reg_pr,
 	.vcpu_load   = kvmppc_core_vcpu_load_pr,
 	.vcpu_put    = kvmppc_core_vcpu_put_pr,
+	.inject_interrupt = kvmppc_inject_interrupt_pr,
 	.set_msr     = kvmppc_set_msr_pr,
 	.vcpu_run    = kvmppc_vcpu_run_pr,
 	.vcpu_create = kvmppc_core_vcpu_create_pr,
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index a3f9c665bb5b..66858b7d3c6b 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1211,6 +1211,45 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
 	vcpu->arch.xive_vcpu = NULL;
 }
 
+static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
+{
+	/* We have a block of xive->nr_servers VPs. We just need to check
+	 * raw vCPU ids are below the expected limit for this guest's
+	 * core stride ; kvmppc_pack_vcpu_id() will pack them down to an
+	 * index that can be safely used to compute a VP id that belongs
+	 * to the VP block.
+	 */
+	return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode;
+}
+
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
+{
+	u32 vp_id;
+
+	if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) {
+		pr_devel("Out of bounds !\n");
+		return -EINVAL;
+	}
+
+	if (xive->vp_base == XIVE_INVALID_VP) {
+		xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers);
+		pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers);
+
+		if (xive->vp_base == XIVE_INVALID_VP)
+			return -ENOSPC;
+	}
+
+	vp_id = kvmppc_xive_vp(xive, cpu);
+	if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
+		pr_devel("Duplicate !\n");
+		return -EEXIST;
+	}
+
+	*vp = vp_id;
+
+	return 0;
+}
+
 int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 			     struct kvm_vcpu *vcpu, u32 cpu)
 {
@@ -1229,20 +1268,13 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 		return -EPERM;
 	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
 		return -EBUSY;
-	if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
-		pr_devel("Out of bounds !\n");
-		return -EINVAL;
-	}
 
 	/* We need to synchronize with queue provisioning */
 	mutex_lock(&xive->lock);
 
-	vp_id = kvmppc_xive_vp(xive, cpu);
-	if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
-		pr_devel("Duplicate !\n");
-		r = -EEXIST;
+	r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id);
+	if (r)
 		goto bail;
-	}
 
 	xc = kzalloc(sizeof(*xc), GFP_KERNEL);
 	if (!xc) {
@@ -1834,6 +1866,43 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	return 0;
 }
 
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
+{
+	u32 __user *ubufp = (u32 __user *) addr;
+	u32 nr_servers;
+	int rc = 0;
+
+	if (get_user(nr_servers, ubufp))
+		return -EFAULT;
+
+	pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
+
+	if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID)
+		return -EINVAL;
+
+	mutex_lock(&xive->lock);
+	if (xive->vp_base != XIVE_INVALID_VP)
+		/* The VP block is allocated once and freed when the device
+		 * is released. Better not allow to change its size since its
+		 * used by connect_vcpu to validate vCPU ids are valid (eg,
+		 * setting it back to a higher value could allow connect_vcpu
+		 * to come up with a VP id that goes beyond the VP block, which
+		 * is likely to cause a crash in OPAL).
+		 */
+		rc = -EBUSY;
+	else if (nr_servers > KVM_MAX_VCPUS)
+		/* We don't need more servers. Higher vCPU ids get packed
+		 * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id().
+		 */
+		xive->nr_servers = KVM_MAX_VCPUS;
+	else
+		xive->nr_servers = nr_servers;
+
+	mutex_unlock(&xive->lock);
+
+	return rc;
+}
+
 static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	struct kvmppc_xive *xive = dev->private;
@@ -1842,6 +1911,11 @@ static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 	switch (attr->group) {
 	case KVM_DEV_XICS_GRP_SOURCES:
 		return xive_set_source(xive, attr->attr, attr->addr);
+	case KVM_DEV_XICS_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_XICS_NR_SERVERS:
+			return kvmppc_xive_set_nr_servers(xive, attr->addr);
+		}
 	}
 	return -ENXIO;
 }
@@ -1867,6 +1941,11 @@ static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		    attr->attr < KVMPPC_XICS_NR_IRQS)
 			return 0;
 		break;
+	case KVM_DEV_XICS_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_XICS_NR_SERVERS:
+			return 0;
+		}
 	}
 	return -ENXIO;
 }
@@ -2001,10 +2080,13 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 {
 	struct kvmppc_xive *xive;
 	struct kvm *kvm = dev->kvm;
-	int ret = 0;
 
 	pr_devel("Creating xive for partition\n");
 
+	/* Already there ? */
+	if (kvm->arch.xive)
+		return -EEXIST;
+
 	xive = kvmppc_xive_get_device(kvm, type);
 	if (!xive)
 		return -ENOMEM;
@@ -2014,12 +2096,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	xive->kvm = kvm;
 	mutex_init(&xive->lock);
 
-	/* Already there ? */
-	if (kvm->arch.xive)
-		ret = -EEXIST;
-	else
-		kvm->arch.xive = xive;
-
 	/* We use the default queue size set by the host */
 	xive->q_order = xive_native_default_eq_shift();
 	if (xive->q_order < PAGE_SHIFT)
@@ -2027,18 +2103,16 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	else
 		xive->q_page_order = xive->q_order - PAGE_SHIFT;
 
-	/* Allocate a bunch of VPs */
-	xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
-	pr_devel("VP_Base=%x\n", xive->vp_base);
-
-	if (xive->vp_base == XIVE_INVALID_VP)
-		ret = -ENOMEM;
+	/* VP allocation is delayed to the first call to connect_vcpu */
+	xive->vp_base = XIVE_INVALID_VP;
+	/* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+	 * on a POWER9 system.
+	 */
+	xive->nr_servers = KVM_MAX_VCPUS;
 
 	xive->single_escalation = xive_native_has_single_escalation();
 
-	if (ret)
-		return ret;
-
+	kvm->arch.xive = xive;
 	return 0;
 }
 
@@ -2108,9 +2182,9 @@ static int xive_debug_show(struct seq_file *m, void *private)
 		if (!xc)
 			continue;
 
-		seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x"
+		seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x"
 			   " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
-			   xc->server_num, xc->cppr, xc->hw_cppr,
+			   xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr,
 			   xc->mfrr, xc->pending,
 			   xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
 
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index fe3ed50e0818..382e3a56e789 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -135,6 +135,9 @@ struct kvmppc_xive {
 	/* Flags */
 	u8	single_escalation;
 
+	/* Number of entries in the VP block */
+	u32	nr_servers;
+
 	struct kvmppc_xive_ops *ops;
 	struct address_space   *mapping;
 	struct mutex mapping_lock;
@@ -296,6 +299,8 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
 struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
 void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
 				    struct kvmppc_xive_vcpu *xc, int irq);
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
 
 #endif /* CONFIG_KVM_XICS */
 #endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 78b906ffa0d2..d83adb1e1490 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -50,6 +50,24 @@ static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
 	}
 }
 
+static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
+					      u8 prio, __be32 *qpage,
+					      u32 order, bool can_escalate)
+{
+	int rc;
+	__be32 *qpage_prev = q->qpage;
+
+	rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
+					 can_escalate);
+	if (rc)
+		return rc;
+
+	if (qpage_prev)
+		put_page(virt_to_page(qpage_prev));
+
+	return rc;
+}
+
 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -118,19 +136,12 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
 		return -EPERM;
 	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
 		return -EBUSY;
-	if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
-		pr_devel("Out of bounds !\n");
-		return -EINVAL;
-	}
 
 	mutex_lock(&xive->lock);
 
-	vp_id = kvmppc_xive_vp(xive, server_num);
-	if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
-		pr_devel("Duplicate !\n");
-		rc = -EEXIST;
+	rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
+	if (rc)
 		goto bail;
-	}
 
 	xc = kzalloc(sizeof(*xc), GFP_KERNEL);
 	if (!xc) {
@@ -582,19 +593,14 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 		q->guest_qaddr  = 0;
 		q->guest_qshift = 0;
 
-		rc = xive_native_configure_queue(xc->vp_id, q, priority,
-						 NULL, 0, true);
+		rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+							NULL, 0, true);
 		if (rc) {
 			pr_err("Failed to reset queue %d for VCPU %d: %d\n",
 			       priority, xc->server_num, rc);
 			return rc;
 		}
 
-		if (q->qpage) {
-			put_page(virt_to_page(q->qpage));
-			q->qpage = NULL;
-		}
-
 		return 0;
 	}
 
@@ -624,12 +630,6 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 	gfn = gpa_to_gfn(kvm_eq.qaddr);
-	page = gfn_to_page(kvm, gfn);
-	if (is_error_page(page)) {
-		srcu_read_unlock(&kvm->srcu, srcu_idx);
-		pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
-		return -EINVAL;
-	}
 
 	page_size = kvm_host_page_size(kvm, gfn);
 	if (1ull << kvm_eq.qshift > page_size) {
@@ -638,6 +638,13 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 		return -EINVAL;
 	}
 
+	page = gfn_to_page(kvm, gfn);
+	if (is_error_page(page)) {
+		srcu_read_unlock(&kvm->srcu, srcu_idx);
+		pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
+		return -EINVAL;
+	}
+
 	qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
@@ -653,8 +660,8 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 	  * OPAL level because the use of END ESBs is not supported by
 	  * Linux.
 	  */
-	rc = xive_native_configure_queue(xc->vp_id, q, priority,
-					 (__be32 *) qaddr, kvm_eq.qshift, true);
+	rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+					(__be32 *) qaddr, kvm_eq.qshift, true);
 	if (rc) {
 		pr_err("Failed to configure queue %d for VCPU %d: %d\n",
 		       priority, xc->server_num, rc);
@@ -928,6 +935,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 			return kvmppc_xive_reset(xive);
 		case KVM_DEV_XIVE_EQ_SYNC:
 			return kvmppc_xive_native_eq_sync(xive);
+		case KVM_DEV_XIVE_NR_SERVERS:
+			return kvmppc_xive_set_nr_servers(xive, attr->addr);
 		}
 		break;
 	case KVM_DEV_XIVE_GRP_SOURCE:
@@ -967,6 +976,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
 		switch (attr->attr) {
 		case KVM_DEV_XIVE_RESET:
 		case KVM_DEV_XIVE_EQ_SYNC:
+		case KVM_DEV_XIVE_NR_SERVERS:
 			return 0;
 		}
 		break;
@@ -1067,7 +1077,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
 {
 	struct kvmppc_xive *xive;
 	struct kvm *kvm = dev->kvm;
-	int ret = 0;
 
 	pr_devel("Creating xive native device\n");
 
@@ -1081,27 +1090,20 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
 	dev->private = xive;
 	xive->dev = dev;
 	xive->kvm = kvm;
-	kvm->arch.xive = xive;
 	mutex_init(&xive->mapping_lock);
 	mutex_init(&xive->lock);
 
-	/*
-	 * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
-	 * a default. Getting the max number of CPUs the VM was
-	 * configured with would improve our usage of the XIVE VP space.
+	/* VP allocation is delayed to the first call to connect_vcpu */
+	xive->vp_base = XIVE_INVALID_VP;
+	/* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+	 * on a POWER9 system.
 	 */
-	xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
-	pr_devel("VP_Base=%x\n", xive->vp_base);
-
-	if (xive->vp_base == XIVE_INVALID_VP)
-		ret = -ENXIO;
+	xive->nr_servers = KVM_MAX_VCPUS;
 
 	xive->single_escalation = xive_native_has_single_escalation();
 	xive->ops = &kvmppc_xive_native_ops;
 
-	if (ret)
-		return ret;
-
+	kvm->arch.xive = xive;
 	return 0;
 }
 
@@ -1204,8 +1206,8 @@ static int xive_native_debug_show(struct seq_file *m, void *private)
 		if (!xc)
 			continue;
 
-		seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
-			   xc->server_num,
+		seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+			   xc->server_num, xc->vp_id,
 			   vcpu->arch.xive_saved_state.nsr,
 			   vcpu->arch.xive_saved_state.cppr,
 			   vcpu->arch.xive_saved_state.ipb,
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 321db0fdb9db..425d13806645 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -355,9 +355,9 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 	if (tlbsel == 1) {
 		struct vm_area_struct *vma;
-		down_read(&current->mm->mmap_sem);
+		down_read(&kvm->mm->mmap_sem);
 
-		vma = find_vma(current->mm, hva);
+		vma = find_vma(kvm->mm, hva);
 		if (vma && hva >= vma->vm_start &&
 		    (vma->vm_flags & VM_PFNMAP)) {
 			/*
@@ -441,7 +441,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 			tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
 		}
 
-		up_read(&current->mm->mmap_sem);
+		up_read(&kvm->mm->mmap_sem);
 	}
 
 	if (likely(!pfnmap)) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3a77bb643452..416fb3d2a1d0 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -31,6 +31,8 @@
 #include <asm/hvcall.h>
 #include <asm/plpar_wrappers.h>
 #endif
+#include <asm/ultravisor.h>
+#include <asm/kvm_host.h>
 
 #include "timing.h"
 #include "irq.h"
@@ -522,6 +524,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_IMMEDIATE_EXIT:
 		r = 1;
 		break;
+	case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
+		/* fall through */
 	case KVM_CAP_PPC_PAIRED_SINGLES:
 	case KVM_CAP_PPC_OSI:
 	case KVM_CAP_PPC_GET_PVINFO:
@@ -2411,6 +2415,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			r = -EFAULT;
 		break;
 	}
+	case KVM_PPC_SVM_OFF: {
+		struct kvm *kvm = filp->private_data;
+
+		r = 0;
+		if (!kvm->arch.kvm_ops->svm_off)
+			goto out;
+
+		r = kvm->arch.kvm_ops->svm_off(kvm);
+		break;
+	}
 	default: {
 		struct kvm *kvm = filp->private_data;
 		r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 377712e85605..0666a8d29596 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -17,14 +17,14 @@ void arch_wb_cache_pmem(void *addr, size_t size)
 	unsigned long start = (unsigned long) addr;
 	flush_dcache_range(start, start + size);
 }
-EXPORT_SYMBOL(arch_wb_cache_pmem);
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
 void arch_invalidate_pmem(void *addr, size_t size)
 {
 	unsigned long start = (unsigned long) addr;
 	flush_dcache_range(start, start + size);
 }
-EXPORT_SYMBOL(arch_invalidate_pmem);
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 
 /*
  * CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
index f69a6aab7bfb..1ddb26394e8a 100644
--- a/arch/powerpc/lib/string_32.S
+++ b/arch/powerpc/lib/string_32.S
@@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES
 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 CACHELINE_MASK = (L1_CACHE_BYTES-1)
 
-_GLOBAL(__clear_user)
+_GLOBAL(__arch_clear_user)
 /*
  * Use dcbz on the complete cache lines in the destination
  * to set them to zero.  This requires that the destination
@@ -87,4 +87,4 @@ _GLOBAL(__clear_user)
 	EX_TABLE(8b, 91b)
 	EX_TABLE(9b, 91b)
 
-EXPORT_SYMBOL(__clear_user)
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index 507b18b1660e..169872bc0892 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -17,7 +17,7 @@ PPC64_CACHES:
 	.section	".text"
 
 /**
- * __clear_user: - Zero a block of memory in user space, with less checking.
+ * __arch_clear_user: - Zero a block of memory in user space, with less checking.
  * @to:   Destination address, in user space.
  * @n:    Number of bytes to zero.
  *
@@ -58,7 +58,7 @@ err3;	stb	r0,0(r3)
 	mr	r3,r4
 	blr
 
-_GLOBAL_TOC(__clear_user)
+_GLOBAL_TOC(__arch_clear_user)
 	cmpdi	r4,32
 	neg	r6,r3
 	li	r0,0
@@ -181,4 +181,4 @@ err1;	dcbz	0,r3
 	cmpdi	r4,32
 	blt	.Lshort_clear
 	b	.Lmedium_clear
-EXPORT_SYMBOL(__clear_user)
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 84d5fab94f8f..69b2419accef 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -251,9 +251,18 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
 {
 	unsigned int bl;
 	int wimgxpp;
-	struct ppc_bat *bat = BATS[index];
+	struct ppc_bat *bat;
 	unsigned long flags = pgprot_val(prot);
 
+	if (index == -1)
+		index = find_free_bat();
+	if (index == -1) {
+		pr_err("%s: no BAT available for mapping 0x%llx\n", __func__,
+		       (unsigned long long)phys);
+		return;
+	}
+	bat = BATS[index];
+
 	if ((flags & _PAGE_NO_CACHE) ||
 	    (cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
 		flags &= ~_PAGE_COHERENT;
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 523e42eb11da..d2d8237ea9d5 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -482,19 +482,12 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 	return ret;
 }
 
-static long native_hpte_find(unsigned long vpn, int psize, int ssize)
+static long __native_hpte_find(unsigned long want_v, unsigned long slot)
 {
 	struct hash_pte *hptep;
-	unsigned long hash;
+	unsigned long hpte_v;
 	unsigned long i;
-	long slot;
-	unsigned long want_v, hpte_v;
 
-	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
-	want_v = hpte_encode_avpn(vpn, psize, ssize);
-
-	/* Bolted mappings are only ever in the primary group */
-	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
 
 		hptep = htab_address + slot;
@@ -508,6 +501,33 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
 	return -1;
 }
 
+static long native_hpte_find(unsigned long vpn, int psize, int ssize)
+{
+	unsigned long hpte_group;
+	unsigned long want_v;
+	unsigned long hash;
+	long slot;
+
+	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+	/*
+	 * We try to keep bolted entries always in primary hash
+	 * But in some case we can find them in secondary too.
+	 */
+	hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	slot = __native_hpte_find(want_v, hpte_group);
+	if (slot < 0) {
+		/* Try in secondary */
+		hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot = __native_hpte_find(want_v, hpte_group);
+		if (slot < 0)
+			return -1;
+	}
+
+	return slot;
+}
+
 /*
  * Update the page protection bits. Intended to be used to create
  * guard pages for kernel data structures on pages which are bolted
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 6c123760164e..b30435c7d804 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -263,6 +263,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 		unsigned long vsid = get_kernel_vsid(vaddr, ssize);
 		unsigned long vpn  = hpt_vpn(vaddr, vsid, ssize);
 		unsigned long tprot = prot;
+		bool secondary_hash = false;
 
 		/*
 		 * If we hit a bad address return error.
@@ -291,13 +292,31 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 
 		BUG_ON(!mmu_hash_ops.hpte_insert);
+repeat:
 		ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
 					       HPTE_V_BOLTED, psize, psize,
 					       ssize);
+		if (ret == -1) {
+			/*
+			 * Try to to keep bolted entries in primary.
+			 * Remove non bolted entries and try insert again
+			 */
+			ret = mmu_hash_ops.hpte_remove(hpteg);
+			if (ret != -1)
+				ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
+							       HPTE_V_BOLTED, psize, psize,
+							       ssize);
+			if (ret == -1 && !secondary_hash) {
+				secondary_hash = true;
+				hpteg = ((~hash & htab_hash_mask) * HPTES_PER_GROUP);
+				goto repeat;
+			}
+		}
 
 		if (ret < 0)
 			break;
 
+		cond_resched();
 #ifdef CONFIG_DEBUG_PAGEALLOC
 		if (debug_pagealloc_enabled() &&
 			(paddr >> PAGE_SHIFT) < linear_map_hash_count)
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index ae7fca40e5b3..59e0ebbd8036 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -307,16 +307,6 @@ void thread_pkey_regs_init(struct thread_struct *thread)
 	write_iamr(pkey_iamr_mask);
 }
 
-static inline bool pkey_allows_readwrite(int pkey)
-{
-	int pkey_shift = pkeyshift(pkey);
-
-	if (!is_pkey_enabled(pkey))
-		return true;
-
-	return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift));
-}
-
 int __execute_only_pkey(struct mm_struct *mm)
 {
 	return mm->context.execute_only_pkey;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 6ee17d09649c..974109bb85db 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -13,6 +13,7 @@
 #include <linux/memblock.h>
 #include <linux/of_fdt.h>
 #include <linux/mm.h>
+#include <linux/hugetlb.h>
 #include <linux/string_helpers.h>
 #include <linux/stop_machine.h>
 
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 67af871190c6..a95175c0972b 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -732,18 +732,13 @@ local:
 	}
 	preempt_enable();
 }
+
 void radix__flush_all_mm(struct mm_struct *mm)
 {
 	__flush_all_mm(mm, false);
 }
 EXPORT_SYMBOL(radix__flush_all_mm);
 
-void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
-{
-	tlb->need_flush_all = 1;
-}
-EXPORT_SYMBOL(radix__flush_tlb_pwc);
-
 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 				 int psize)
 {
@@ -832,8 +827,7 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
 
 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
-					unsigned long start, unsigned long end,
-					bool flush_all_sizes)
+					    unsigned long start, unsigned long end)
 
 {
 	unsigned long pid;
@@ -879,26 +873,16 @@ is_local:
 			}
 		}
 	} else {
-		bool hflush = flush_all_sizes;
-		bool gflush = flush_all_sizes;
+		bool hflush = false;
 		unsigned long hstart, hend;
-		unsigned long gstart, gend;
 
-		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
-			hflush = true;
-
-		if (hflush) {
+		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
 			hstart = (start + PMD_SIZE - 1) & PMD_MASK;
 			hend = end & PMD_MASK;
 			if (hstart == hend)
 				hflush = false;
-		}
-
-		if (gflush) {
-			gstart = (start + PUD_SIZE - 1) & PUD_MASK;
-			gend = end & PUD_MASK;
-			if (gstart == gend)
-				gflush = false;
+			else
+				hflush = true;
 		}
 
 		if (local) {
@@ -907,9 +891,6 @@ is_local:
 			if (hflush)
 				__tlbiel_va_range(hstart, hend, pid,
 						PMD_SIZE, MMU_PAGE_2M);
-			if (gflush)
-				__tlbiel_va_range(gstart, gend, pid,
-						PUD_SIZE, MMU_PAGE_1G);
 			asm volatile("ptesync": : :"memory");
 		} else if (cputlb_use_tlbie()) {
 			asm volatile("ptesync": : :"memory");
@@ -917,10 +898,6 @@ is_local:
 			if (hflush)
 				__tlbie_va_range(hstart, hend, pid,
 						PMD_SIZE, MMU_PAGE_2M);
-			if (gflush)
-				__tlbie_va_range(gstart, gend, pid,
-						PUD_SIZE, MMU_PAGE_1G);
-
 			asm volatile("eieio; tlbsync; ptesync": : :"memory");
 		} else {
 			_tlbiel_va_range_multicast(mm,
@@ -928,9 +905,6 @@ is_local:
 			if (hflush)
 				_tlbiel_va_range_multicast(mm,
 					hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
-			if (gflush)
-				_tlbiel_va_range_multicast(mm,
-					gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false);
 		}
 	}
 	preempt_enable();
@@ -945,7 +919,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		return radix__flush_hugetlb_tlb_range(vma, start, end);
 #endif
 
-	__radix__flush_tlb_range(vma->vm_mm, start, end, false);
+	__radix__flush_tlb_range(vma->vm_mm, start, end);
 }
 EXPORT_SYMBOL(radix__flush_tlb_range);
 
@@ -1021,53 +995,19 @@ void radix__tlb_flush(struct mmu_gather *tlb)
 	 * that flushes the process table entry cache upon process teardown.
 	 * See the comment for radix in arch_exit_mmap().
 	 */
-	if (tlb->fullmm) {
+	if (tlb->fullmm || tlb->need_flush_all) {
 		__flush_all_mm(mm, true);
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
-	} else if (mm_tlb_flush_nested(mm)) {
-		/*
-		 * If there is a concurrent invalidation that is clearing ptes,
-		 * then it's possible this invalidation will miss one of those
-		 * cleared ptes and miss flushing the TLB. If this invalidate
-		 * returns before the other one flushes TLBs, that can result
-		 * in it returning while there are still valid TLBs inside the
-		 * range to be invalidated.
-		 *
-		 * See mm/memory.c:tlb_finish_mmu() for more details.
-		 *
-		 * The solution to this is ensure the entire range is always
-		 * flushed here. The problem for powerpc is that the flushes
-		 * are page size specific, so this "forced flush" would not
-		 * do the right thing if there are a mix of page sizes in
-		 * the range to be invalidated. So use __flush_tlb_range
-		 * which invalidates all possible page sizes in the range.
-		 *
-		 * PWC flush probably is not be required because the core code
-		 * shouldn't free page tables in this path, but accounting
-		 * for the possibility makes us a bit more robust.
-		 *
-		 * need_flush_all is an uncommon case because page table
-		 * teardown should be done with exclusive locks held (but
-		 * after locks are dropped another invalidate could come
-		 * in), it could be optimized further if necessary.
-		 */
-		if (!tlb->need_flush_all)
-			__radix__flush_tlb_range(mm, start, end, true);
-		else
-			radix__flush_all_mm(mm);
-#endif
 	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
-		if (!tlb->need_flush_all)
+		if (!tlb->freed_tables)
 			radix__flush_tlb_mm(mm);
 		else
 			radix__flush_all_mm(mm);
 	} else {
-		if (!tlb->need_flush_all)
+		if (!tlb->freed_tables)
 			radix__flush_tlb_range_psize(mm, start, end, psize);
 		else
 			radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
 	}
-	tlb->need_flush_all = 0;
 }
 
 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 2a82984356f8..5ab4f868e919 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -104,14 +104,14 @@ static void __dma_sync_page(phys_addr_t paddr, size_t size, int dir)
 #endif
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	__dma_sync_page(paddr, size, dir);
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	__dma_sync_page(paddr, size, dir);
 }
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8432c281de92..b5047f9b5dec 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -645,6 +645,7 @@ NOKPROBE_SYMBOL(do_page_fault);
 void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 {
 	const struct exception_table_entry *entry;
+	int is_write = page_fault_is_write(regs->dsisr);
 
 	/* Are we prepared to handle this fault?  */
 	if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -658,9 +659,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 	case 0x300:
 	case 0x380:
 	case 0xe00:
-		pr_alert("BUG: %s at 0x%08lx\n",
+		pr_alert("BUG: %s on %s at 0x%08lx\n",
 			 regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
-			 "Unable to handle kernel data access", regs->dar);
+			 "Unable to handle kernel data access",
+			 is_write ? "write" : "read", regs->dar);
 		break;
 	case 0x400:
 	case 0x480:
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index a84da92920f7..42ef7a6e6098 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -21,6 +21,13 @@
 #include <asm/pgtable.h>
 #include <asm/kup.h>
 
+phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull;
+EXPORT_SYMBOL_GPL(memstart_addr);
+phys_addr_t kernstart_addr __ro_after_init;
+EXPORT_SYMBOL_GPL(kernstart_addr);
+unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE;
+EXPORT_SYMBOL_GPL(kernstart_virt_addr);
+
 static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
 static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
 
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index b04896a88d79..872df48ae41b 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -56,11 +56,6 @@
 phys_addr_t total_memory;
 phys_addr_t total_lowmem;
 
-phys_addr_t memstart_addr = (phys_addr_t)~0ull;
-EXPORT_SYMBOL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL(kernstart_addr);
-
 #ifdef CONFIG_RELOCATABLE
 /* Used in __va()/__pa() */
 long long virt_phys_offset;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 4e08246acd79..4002ced3596f 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -63,38 +63,48 @@
 
 #include <mm/mmu_decl.h>
 
-phys_addr_t memstart_addr = ~0;
-EXPORT_SYMBOL_GPL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL_GPL(kernstart_addr);
-
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
- * Given an address within the vmemmap, determine the pfn of the page that
- * represents the start of the section it is within.  Note that we have to
+ * Given an address within the vmemmap, determine the page that
+ * represents the start of the subsection it is within.  Note that we have to
  * do this by hand as the proffered address may not be correctly aligned.
  * Subtraction of non-aligned pointers produces undefined results.
  */
-static unsigned long __meminit vmemmap_section_start(unsigned long page)
+static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_addr)
 {
-	unsigned long offset = page - ((unsigned long)(vmemmap));
+	unsigned long start_pfn;
+	unsigned long offset = vmemmap_addr - ((unsigned long)(vmemmap));
 
 	/* Return the pfn of the start of the section. */
-	return (offset / sizeof(struct page)) & PAGE_SECTION_MASK;
+	start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK;
+	return pfn_to_page(start_pfn);
 }
 
 /*
- * Check if this vmemmap page is already initialised.  If any section
- * which overlaps this vmemmap page is initialised then this page is
- * initialised already.
+ * Since memory is added in sub-section chunks, before creating a new vmemmap
+ * mapping, the kernel should check whether there is an existing memmap mapping
+ * covering the new subsection added. This is needed because kernel can map
+ * vmemmap area using 16MB pages which will cover a memory range of 16G. Such
+ * a range covers multiple subsections (2M)
+ *
+ * If any subsection in the 16G range mapped by vmemmap is valid we consider the
+ * vmemmap populated (There is a page table entry already present). We can't do
+ * a page table lookup here because with the hash translation we don't keep
+ * vmemmap details in linux page table.
  */
-static int __meminit vmemmap_populated(unsigned long start, int page_size)
+static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
 {
-	unsigned long end = start + page_size;
-	start = (unsigned long)(pfn_to_page(vmemmap_section_start(start)));
+	struct page *start;
+	unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size;
+	start = vmemmap_subsection_start(vmemmap_addr);
 
-	for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page)))
-		if (pfn_valid(page_to_pfn((struct page *)start)))
+	for (; (unsigned long)start < vmemmap_end; start += PAGES_PER_SUBSECTION)
+		/*
+		 * pfn valid check here is intended to really check
+		 * whether we have any subsection already initialized
+		 * in this range.
+		 */
+		if (pfn_valid(page_to_pfn(start)))
 			return 1;
 
 	return 0;
@@ -201,6 +211,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		void *p = NULL;
 		int rc;
 
+		/*
+		 * This vmemmap range is backing different subsections. If any
+		 * of that subsection is marked valid, that means we already
+		 * have initialized a page table covering this range and hence
+		 * the vmemmap range is populated.
+		 */
 		if (vmemmap_populated(start, page_size))
 			continue;
 
@@ -290,9 +306,10 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
 		struct page *page;
 
 		/*
-		 * the section has already be marked as invalid, so
-		 * vmemmap_populated() true means some other sections still
-		 * in this page, so skip it.
+		 * We have already marked the subsection we are trying to remove
+		 * invalid. So if we want to remove the vmemmap range, we
+		 * need to make sure there is no subsection marked valid
+		 * in this range.
 		 */
 		if (vmemmap_populated(start, page_size))
 			continue;
diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c
index f36121f25243..743e11384dea 100644
--- a/arch/powerpc/mm/ioremap_32.c
+++ b/arch/powerpc/mm/ioremap_32.c
@@ -68,6 +68,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
 	/*
 	 * Should check if it is a candidate for a BAT mapping
 	 */
+	pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
 
 	err = early_ioremap_range(ioremap_bot - size, p, size, prot);
 	if (err)
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
index fd29e51700cd..50a99d9684f7 100644
--- a/arch/powerpc/mm/ioremap_64.c
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -81,6 +81,8 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
 	if (slab_is_available())
 		return do_ioremap(paligned, offset, size, prot, caller);
 
+	pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
+
 	err = early_ioremap_range(ioremap_bot, paligned, size, prot);
 	if (err)
 		return NULL;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index be941d382c8d..f5535eae637f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/memremap.h>
+#include <linux/dma-direct.h>
 
 #include <asm/pgalloc.h>
 #include <asm/prom.h>
@@ -104,6 +105,27 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
 	return -ENODEV;
 }
 
+#define FLUSH_CHUNK_SIZE SZ_1G
+/**
+ * flush_dcache_range_chunked(): Write any modified data cache blocks out to
+ * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE
+ * Does not invalidate the corresponding instruction cache blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ * @chunk: the max size of the chunks
+ */
+static void flush_dcache_range_chunked(unsigned long start, unsigned long stop,
+				       unsigned long chunk)
+{
+	unsigned long i;
+
+	for (i = start; i < stop; i += chunk) {
+		flush_dcache_range(i, min(stop, i + chunk));
+		cond_resched();
+	}
+}
+
 int __ref arch_add_memory(int nid, u64 start, u64 size,
 			struct mhp_restrictions *restrictions)
 {
@@ -120,7 +142,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
 			start, start + size, rc);
 		return -EFAULT;
 	}
-	flush_dcache_range(start, start + size);
 
 	return __add_pages(nid, start_pfn, nr_pages, restrictions);
 }
@@ -130,14 +151,14 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
 	int ret;
 
-	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 
 	/* Remove htab bolted mappings for this section of memory */
 	start = (unsigned long)__va(start);
-	flush_dcache_range(start, start + size);
+	flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE);
+
 	ret = remove_section_mapping(start, start + size);
 	WARN_ON_ONCE(ret);
 
@@ -201,10 +222,10 @@ static int __init mark_nonram_nosave(void)
  * everything else. GFP_DMA32 page allocations automatically fall back to
  * ZONE_DMA.
  *
- * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
- * inform the generic DMA mapping code.  32-bit only devices (if not handled
- * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
- * otherwise served by ZONE_DMA.
+ * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * generic DMA mapping code.  32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
  */
 static unsigned long max_zone_pfns[MAX_NR_ZONES];
 
@@ -216,15 +237,13 @@ void __init paging_init(void)
 	unsigned long long total_ram = memblock_phys_mem_size();
 	phys_addr_t top_of_ram = memblock_end_of_DRAM();
 
-#ifdef CONFIG_PPC32
-	unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
-	unsigned long end = __fix_to_virt(FIX_HOLE);
+#ifdef CONFIG_HIGHMEM
+	unsigned long v = __fix_to_virt(FIX_KMAP_END);
+	unsigned long end = __fix_to_virt(FIX_KMAP_BEGIN);
 
 	for (; v < end; v += PAGE_SIZE)
 		map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */
-#endif
 
-#ifdef CONFIG_HIGHMEM
 	map_kernel_page(PKMAP_BASE, 0, __pgprot(0));	/* XXX gross */
 	pkmap_page_table = virt_to_kpte(PKMAP_BASE);
 
@@ -237,9 +256,18 @@ void __init paging_init(void)
 	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
 	       (long int)((top_of_ram - total_ram) >> 20));
 
+	/*
+	 * Allow 30-bit DMA for very limited Broadcom wifi chips on many
+	 * powerbooks.
+	 */
+	if (IS_ENABLED(CONFIG_PPC32))
+		zone_dma_bits = 30;
+	else
+		zone_dma_bits = 31;
+
 #ifdef CONFIG_ZONE_DMA
 	max_zone_pfns[ZONE_DMA]	= min(max_low_pfn,
-				      1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
+				      1UL << (zone_dma_bits - PAGE_SHIFT));
 #endif
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 #ifdef CONFIG_HIGHMEM
@@ -260,6 +288,14 @@ void __init mem_init(void)
 	BUILD_BUG_ON(MMU_PAGE_COUNT > 16);
 
 #ifdef CONFIG_SWIOTLB
+	/*
+	 * Some platforms (e.g. 85xx) limit DMA-able memory way below
+	 * 4G. We force memblock to bottom-up mode to ensure that the
+	 * memory allocated in swiotlb_init() is DMA-able.
+	 * As it's the last memblock allocation, no need to reset it
+	 * back to to-down.
+	 */
+	memblock_set_bottom_up(true);
 	swiotlb_init(0);
 #endif
 
@@ -318,6 +354,120 @@ void free_initmem(void)
 	free_initmem_default(POISON_FREE_INITMEM);
 }
 
+/**
+ * flush_coherent_icache() - if a CPU has a coherent icache, flush it
+ * @addr: The base address to use (can be any valid address, the whole cache will be flushed)
+ * Return true if the cache was flushed, false otherwise
+ */
+static inline bool flush_coherent_icache(unsigned long addr)
+{
+	/*
+	 * For a snooping icache, we still need a dummy icbi to purge all the
+	 * prefetched instructions from the ifetch buffers. We also need a sync
+	 * before the icbi to order the the actual stores to memory that might
+	 * have modified instructions with the icbi.
+	 */
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+		mb(); /* sync */
+		icbi((void *)addr);
+		mb(); /* sync */
+		isync();
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static void invalidate_icache_range(unsigned long start, unsigned long stop)
+{
+	unsigned long shift = l1_icache_shift();
+	unsigned long bytes = l1_icache_bytes();
+	char *addr = (char *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+	unsigned long i;
+
+	for (i = 0; i < size >> shift; i++, addr += bytes)
+		icbi(addr);
+
+	mb(); /* sync */
+	isync();
+}
+
+/**
+ * flush_icache_range: Write any modified data cache blocks out to memory
+ * and invalidate the corresponding blocks in the instruction cache
+ *
+ * Generic code will call this after writing memory, before executing from it.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+void flush_icache_range(unsigned long start, unsigned long stop)
+{
+	if (flush_coherent_icache(start))
+		return;
+
+	clean_dcache_range(start, stop);
+
+	if (IS_ENABLED(CONFIG_44x)) {
+		/*
+		 * Flash invalidate on 44x because we are passed kmapped
+		 * addresses and this doesn't work for userspace pages due to
+		 * the virtually tagged icache.
+		 */
+		iccci((void *)start);
+		mb(); /* sync */
+		isync();
+	} else
+		invalidate_icache_range(start, stop);
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+/**
+ * flush_dcache_icache_phys() - Flush a page by it's physical address
+ * @physaddr: the physical address of the page
+ */
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+	unsigned long bytes = l1_dcache_bytes();
+	unsigned long nb = PAGE_SIZE / bytes;
+	unsigned long addr = physaddr & PAGE_MASK;
+	unsigned long msr, msr0;
+	unsigned long loop1 = addr, loop2 = addr;
+
+	msr0 = mfmsr();
+	msr = msr0 & ~MSR_DR;
+	/*
+	 * This must remain as ASM to prevent potential memory accesses
+	 * while the data MMU is disabled
+	 */
+	asm volatile(
+		"   mtctr %2;\n"
+		"   mtmsr %3;\n"
+		"   isync;\n"
+		"0: dcbst   0, %0;\n"
+		"   addi    %0, %0, %4;\n"
+		"   bdnz    0b;\n"
+		"   sync;\n"
+		"   mtctr %2;\n"
+		"1: icbi    0, %1;\n"
+		"   addi    %1, %1, %4;\n"
+		"   bdnz    1b;\n"
+		"   sync;\n"
+		"   mtmsr %5;\n"
+		"   isync;\n"
+		: "+&r" (loop1), "+&r" (loop2)
+		: "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
+		: "ctr", "memory");
+}
+#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+
 /*
  * This is called when a page has been modified by the kernel.
  * It just marks the page as not i-cache clean.  We do the i-cache
@@ -350,12 +500,46 @@ void flush_dcache_icache_page(struct page *page)
 		__flush_dcache_icache(start);
 		kunmap_atomic(start);
 	} else {
-		__flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
+		unsigned long addr = page_to_pfn(page) << PAGE_SHIFT;
+
+		if (flush_coherent_icache(addr))
+			return;
+		flush_dcache_icache_phys(addr);
 	}
 #endif
 }
 EXPORT_SYMBOL(flush_dcache_icache_page);
 
+/**
+ * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ *
+ * @page: the address of the page to flush
+ */
+void __flush_dcache_icache(void *p)
+{
+	unsigned long addr = (unsigned long)p;
+
+	if (flush_coherent_icache(addr))
+		return;
+
+	clean_dcache_range(addr, addr + PAGE_SIZE);
+
+	/*
+	 * We don't flush the icache on 44x. Those have a virtual icache and we
+	 * don't have access to the virtual address here (it's not the page
+	 * vaddr but where it's mapped in user space). The flushing of the
+	 * icache on these is handled elsewhere, when a change in the address
+	 * space occurs, before returning to user space.
+	 */
+
+	if (cpu_has_feature(MMU_FTR_TYPE_44x))
+		return;
+
+	invalidate_icache_range(addr, addr + PAGE_SIZE);
+}
+
 void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
 {
 	clear_page(page);
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index c750ac9ec713..8e99649c24fc 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -139,10 +139,21 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
 extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
 extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
+void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys);
+void reloc_kernel_entry(void *fdt, int addr);
+extern int is_second_reloc;
 #endif
 extern void loadcam_entry(unsigned int index);
 extern void loadcam_multi(int first_idx, int num, int tmp_idx);
 
+#ifdef CONFIG_RANDOMIZE_BASE
+void kaslr_early_init(void *dt_ptr, phys_addr_t size);
+void kaslr_late_init(void);
+#else
+static inline void kaslr_early_init(void *dt_ptr, phys_addr_t size) {}
+static inline void kaslr_late_init(void) {}
+#endif
+
 struct tlbcam {
 	u32	MAS0;
 	u32	MAS1;
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 4a06cb342da2..96eb8e43f39b 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -103,6 +103,19 @@ static void mmu_patch_addis(s32 *site, long simm)
 	patch_instruction_site(site, instr);
 }
 
+static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot)
+{
+	unsigned long s = offset;
+	unsigned long v = PAGE_OFFSET + s;
+	phys_addr_t p = memstart_addr + s;
+
+	for (; s < top; s += PAGE_SIZE) {
+		map_kernel_page(v, p, prot);
+		v += PAGE_SIZE;
+		p += PAGE_SIZE;
+	}
+}
+
 unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 {
 	unsigned long mapped;
@@ -115,10 +128,20 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 		if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
 			mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
 	} else {
+		unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+
 		mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
 		if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
-			mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top,
-					    _ALIGN(__pa(_einittext), 8 << 20));
+			mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, einittext8);
+
+		/*
+		 * Populate page tables to:
+		 * - have them appear in /sys/kernel/debug/kernel_page_tables
+		 * - allow the BDI to find the pages when they are not PINNED
+		 */
+		mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X);
+		mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL);
+		mmu_mapin_immr();
 	}
 
 	mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
@@ -144,18 +167,41 @@ void mmu_mark_initmem_nx(void)
 	if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23)
 		mmu_patch_addis(&patch__itlbmiss_linmem_top8,
 				-((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1)));
-	if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+	if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) {
+		unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+		unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
+		unsigned long etext = __pa(_etext);
+
 		mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext));
+
+		/* Update page tables for PTDUMP and BDI */
+		mmu_mapin_ram_chunk(0, einittext8, __pgprot(0));
+		if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
+			mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT);
+			mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL);
+		} else {
+			mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT);
+			mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL);
+		}
+	}
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 void mmu_mark_rodata_ro(void)
 {
+	unsigned long sinittext = __pa(_sinittext);
+	unsigned long etext = __pa(_etext);
+
 	if (CONFIG_DATA_SHIFT < 23)
 		mmu_patch_addis(&patch__dtlbmiss_romem_top8,
 				-__pa(((unsigned long)_sinittext) &
 				      ~(LARGE_PAGE_SIZE_8M - 1)));
 	mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext));
+
+	/* Update page tables for PTDUMP and BDI */
+	mmu_mapin_ram_chunk(0, sinittext, __pgprot(0));
+	mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX);
+	mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO);
 }
 #endif
 
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
index 33b6f6f29d3f..0424f6ce5bd8 100644
--- a/arch/powerpc/mm/nohash/Makefile
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_40x)		+= 40x.o
 obj-$(CONFIG_44x)		+= 44x.o
 obj-$(CONFIG_PPC_8xx)		+= 8xx.o
 obj-$(CONFIG_PPC_FSL_BOOK3E)	+= fsl_booke.o
+obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr_booke.o
 ifdef CONFIG_HUGETLB_PAGE
 obj-$(CONFIG_PPC_FSL_BOOK3E)	+= book3e_hugetlbpage.o
 endif
diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c
index 556e3cd52a35..b4eb06ceb189 100644
--- a/arch/powerpc/mm/nohash/fsl_booke.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -263,11 +263,13 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 int __initdata is_second_reloc;
 notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
 {
-	unsigned long base = KERNELBASE;
+	unsigned long base = kernstart_virt_addr;
+	phys_addr_t size;
 
 	kernstart_addr = start;
 	if (is_second_reloc) {
 		virt_phys_offset = PAGE_OFFSET - memstart_addr;
+		kaslr_late_init();
 		return;
 	}
 
@@ -291,7 +293,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
 	start &= ~0x3ffffff;
 	base &= ~0x3ffffff;
 	virt_phys_offset = base - start;
-	early_get_first_memblock_info(__va(dt_ptr), NULL);
+	early_get_first_memblock_info(__va(dt_ptr), &size);
 	/*
 	 * We now get the memstart_addr, then we should check if this
 	 * address is the same as what the PAGE_OFFSET map to now. If
@@ -316,6 +318,8 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
 		/* We should never reach here */
 		panic("Relocation error");
 	}
+
+	kaslr_early_init(__va(dt_ptr), size);
 }
 #endif
 #endif
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
new file mode 100644
index 000000000000..4a75f2d9bf0e
--- /dev/null
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (C) 2019 Jason Yan <yanaijie@huawei.com>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
+#include <linux/crash_core.h>
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/kdump.h>
+#include <mm/mmu_decl.h>
+#include <generated/compile.h>
+#include <generated/utsrelease.h>
+
+struct regions {
+	unsigned long pa_start;
+	unsigned long pa_end;
+	unsigned long kernel_size;
+	unsigned long dtb_start;
+	unsigned long dtb_end;
+	unsigned long initrd_start;
+	unsigned long initrd_end;
+	unsigned long crash_start;
+	unsigned long crash_end;
+	int reserved_mem;
+	int reserved_mem_addr_cells;
+	int reserved_mem_size_cells;
+};
+
+/* Simplified build-specific string for starting entropy. */
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
+struct regions __initdata regions;
+
+static __init void kaslr_get_cmdline(void *fdt)
+{
+	int node = fdt_path_offset(fdt, "/chosen");
+
+	early_init_dt_scan_chosen(node, "chosen", 1, boot_command_line);
+}
+
+static unsigned long __init rotate_xor(unsigned long hash, const void *area,
+				       size_t size)
+{
+	size_t i;
+	const unsigned long *ptr = area;
+
+	for (i = 0; i < size / sizeof(hash); i++) {
+		/* Rotate by odd number of bits and XOR. */
+		hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+		hash ^= ptr[i];
+	}
+
+	return hash;
+}
+
+/* Attempt to create a simple starting entropy. This can make it defferent for
+ * every build but it is still not enough. Stronger entropy should
+ * be added to make it change for every boot.
+ */
+static unsigned long __init get_boot_seed(void *fdt)
+{
+	unsigned long hash = 0;
+
+	hash = rotate_xor(hash, build_str, sizeof(build_str));
+	hash = rotate_xor(hash, fdt, fdt_totalsize(fdt));
+
+	return hash;
+}
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+	int node, len;
+	fdt64_t *prop;
+	u64 ret;
+
+	node = fdt_path_offset(fdt, "/chosen");
+	if (node < 0)
+		return 0;
+
+	prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+	if (!prop || len != sizeof(u64))
+		return 0;
+
+	ret = fdt64_to_cpu(*prop);
+	*prop = 0;
+	return ret;
+}
+
+static __init bool regions_overlap(u32 s1, u32 e1, u32 s2, u32 e2)
+{
+	return e1 >= s2 && e2 >= s1;
+}
+
+static __init bool overlaps_reserved_region(const void *fdt, u32 start,
+					    u32 end)
+{
+	int subnode, len, i;
+	u64 base, size;
+
+	/* check for overlap with /memreserve/ entries */
+	for (i = 0; i < fdt_num_mem_rsv(fdt); i++) {
+		if (fdt_get_mem_rsv(fdt, i, &base, &size) < 0)
+			continue;
+		if (regions_overlap(start, end, base, base + size))
+			return true;
+	}
+
+	if (regions.reserved_mem < 0)
+		return false;
+
+	/* check for overlap with static reservations in /reserved-memory */
+	for (subnode = fdt_first_subnode(fdt, regions.reserved_mem);
+	     subnode >= 0;
+	     subnode = fdt_next_subnode(fdt, subnode)) {
+		const fdt32_t *reg;
+		u64 rsv_end;
+
+		len = 0;
+		reg = fdt_getprop(fdt, subnode, "reg", &len);
+		while (len >= (regions.reserved_mem_addr_cells +
+			       regions.reserved_mem_size_cells)) {
+			base = fdt32_to_cpu(reg[0]);
+			if (regions.reserved_mem_addr_cells == 2)
+				base = (base << 32) | fdt32_to_cpu(reg[1]);
+
+			reg += regions.reserved_mem_addr_cells;
+			len -= 4 * regions.reserved_mem_addr_cells;
+
+			size = fdt32_to_cpu(reg[0]);
+			if (regions.reserved_mem_size_cells == 2)
+				size = (size << 32) | fdt32_to_cpu(reg[1]);
+
+			reg += regions.reserved_mem_size_cells;
+			len -= 4 * regions.reserved_mem_size_cells;
+
+			if (base >= regions.pa_end)
+				continue;
+
+			rsv_end = min(base + size, (u64)U32_MAX);
+
+			if (regions_overlap(start, end, base, rsv_end))
+				return true;
+		}
+	}
+	return false;
+}
+
+static __init bool overlaps_region(const void *fdt, u32 start,
+				   u32 end)
+{
+	if (regions_overlap(start, end, __pa(_stext), __pa(_end)))
+		return true;
+
+	if (regions_overlap(start, end, regions.dtb_start,
+			    regions.dtb_end))
+		return true;
+
+	if (regions_overlap(start, end, regions.initrd_start,
+			    regions.initrd_end))
+		return true;
+
+	if (regions_overlap(start, end, regions.crash_start,
+			    regions.crash_end))
+		return true;
+
+	return overlaps_reserved_region(fdt, start, end);
+}
+
+static void __init get_crash_kernel(void *fdt, unsigned long size)
+{
+#ifdef CONFIG_CRASH_CORE
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, size, &crash_size,
+				&crash_base);
+	if (ret != 0 || crash_size == 0)
+		return;
+	if (crash_base == 0)
+		crash_base = KDUMP_KERNELBASE;
+
+	regions.crash_start = (unsigned long)crash_base;
+	regions.crash_end = (unsigned long)(crash_base + crash_size);
+
+	pr_debug("crash_base=0x%llx crash_size=0x%llx\n", crash_base, crash_size);
+#endif
+}
+
+static void __init get_initrd_range(void *fdt)
+{
+	u64 start, end;
+	int node, len;
+	const __be32 *prop;
+
+	node = fdt_path_offset(fdt, "/chosen");
+	if (node < 0)
+		return;
+
+	prop = fdt_getprop(fdt, node, "linux,initrd-start", &len);
+	if (!prop)
+		return;
+	start = of_read_number(prop, len / 4);
+
+	prop = fdt_getprop(fdt, node, "linux,initrd-end", &len);
+	if (!prop)
+		return;
+	end = of_read_number(prop, len / 4);
+
+	regions.initrd_start = (unsigned long)start;
+	regions.initrd_end = (unsigned long)end;
+
+	pr_debug("initrd_start=0x%llx  initrd_end=0x%llx\n", start, end);
+}
+
+static __init unsigned long get_usable_address(const void *fdt,
+					       unsigned long start,
+					       unsigned long offset)
+{
+	unsigned long pa;
+	unsigned long pa_end;
+
+	for (pa = offset; (long)pa > (long)start; pa -= SZ_16K) {
+		pa_end = pa + regions.kernel_size;
+		if (overlaps_region(fdt, pa, pa_end))
+			continue;
+
+		return pa;
+	}
+	return 0;
+}
+
+static __init void get_cell_sizes(const void *fdt, int node, int *addr_cells,
+				  int *size_cells)
+{
+	const int *prop;
+	int len;
+
+	/*
+	 * Retrieve the #address-cells and #size-cells properties
+	 * from the 'node', or use the default if not provided.
+	 */
+	*addr_cells = *size_cells = 1;
+
+	prop = fdt_getprop(fdt, node, "#address-cells", &len);
+	if (len == 4)
+		*addr_cells = fdt32_to_cpu(*prop);
+	prop = fdt_getprop(fdt, node, "#size-cells", &len);
+	if (len == 4)
+		*size_cells = fdt32_to_cpu(*prop);
+}
+
+static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long index,
+					       unsigned long offset)
+{
+	unsigned long koffset = 0;
+	unsigned long start;
+
+	while ((long)index >= 0) {
+		offset = memstart_addr + index * SZ_64M + offset;
+		start = memstart_addr + index * SZ_64M;
+		koffset = get_usable_address(dt_ptr, start, offset);
+		if (koffset)
+			break;
+		index--;
+	}
+
+	if (koffset != 0)
+		koffset -= memstart_addr;
+
+	return koffset;
+}
+
+static inline __init bool kaslr_disabled(void)
+{
+	return strstr(boot_command_line, "nokaslr") != NULL;
+}
+
+static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size,
+						  unsigned long kernel_sz)
+{
+	unsigned long offset, random;
+	unsigned long ram, linear_sz;
+	u64 seed;
+	unsigned long index;
+
+	kaslr_get_cmdline(dt_ptr);
+	if (kaslr_disabled())
+		return 0;
+
+	random = get_boot_seed(dt_ptr);
+
+	seed = get_tb() << 32;
+	seed ^= get_tb();
+	random = rotate_xor(random, &seed, sizeof(seed));
+
+	/*
+	 * Retrieve (and wipe) the seed from the FDT
+	 */
+	seed = get_kaslr_seed(dt_ptr);
+	if (seed)
+		random = rotate_xor(random, &seed, sizeof(seed));
+	else
+		pr_warn("KASLR: No safe seed for randomizing the kernel base.\n");
+
+	ram = min_t(phys_addr_t, __max_low_memory, size);
+	ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true);
+	linear_sz = min_t(unsigned long, ram, SZ_512M);
+
+	/* If the linear size is smaller than 64M, do not randmize */
+	if (linear_sz < SZ_64M)
+		return 0;
+
+	/* check for a reserved-memory node and record its cell sizes */
+	regions.reserved_mem = fdt_path_offset(dt_ptr, "/reserved-memory");
+	if (regions.reserved_mem >= 0)
+		get_cell_sizes(dt_ptr, regions.reserved_mem,
+			       &regions.reserved_mem_addr_cells,
+			       &regions.reserved_mem_size_cells);
+
+	regions.pa_start = memstart_addr;
+	regions.pa_end = memstart_addr + linear_sz;
+	regions.dtb_start = __pa(dt_ptr);
+	regions.dtb_end = __pa(dt_ptr) + fdt_totalsize(dt_ptr);
+	regions.kernel_size = kernel_sz;
+
+	get_initrd_range(dt_ptr);
+	get_crash_kernel(dt_ptr, ram);
+
+	/*
+	 * Decide which 64M we want to start
+	 * Only use the low 8 bits of the random seed
+	 */
+	index = random & 0xFF;
+	index %= linear_sz / SZ_64M;
+
+	/* Decide offset inside 64M */
+	offset = random % (SZ_64M - kernel_sz);
+	offset = round_down(offset, SZ_16K);
+
+	return kaslr_legal_offset(dt_ptr, index, offset);
+}
+
+/*
+ * To see if we need to relocate the kernel to a random offset
+ * void *dt_ptr - address of the device tree
+ * phys_addr_t size - size of the first memory block
+ */
+notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size)
+{
+	unsigned long tlb_virt;
+	phys_addr_t tlb_phys;
+	unsigned long offset;
+	unsigned long kernel_sz;
+
+	kernel_sz = (unsigned long)_end - (unsigned long)_stext;
+
+	offset = kaslr_choose_location(dt_ptr, size, kernel_sz);
+	if (offset == 0)
+		return;
+
+	kernstart_virt_addr += offset;
+	kernstart_addr += offset;
+
+	is_second_reloc = 1;
+
+	if (offset >= SZ_64M) {
+		tlb_virt = round_down(kernstart_virt_addr, SZ_64M);
+		tlb_phys = round_down(kernstart_addr, SZ_64M);
+
+		/* Create kernel map to relocate in */
+		create_kaslr_tlb_entry(1, tlb_virt, tlb_phys);
+	}
+
+	/* Copy the kernel to it's new location and run */
+	memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz);
+	flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz);
+
+	reloc_kernel_entry(dt_ptr, kernstart_virt_addr);
+}
+
+void __init kaslr_late_init(void)
+{
+	/* If randomized, clear the original kernel */
+	if (kernstart_virt_addr != KERNELBASE) {
+		unsigned long kernel_sz;
+
+		kernel_sz = (unsigned long)_end - kernstart_virt_addr;
+		memzero_explicit((void *)KERNELBASE, kernel_sz);
+	}
+}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 8ec5dfb65b2e..73b84166d06a 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -117,10 +117,7 @@ void __init mapin_ram(void)
 		if (base >= top)
 			continue;
 		base = mmu_mapin_ram(base, top);
-		if (IS_ENABLED(CONFIG_BDI_SWITCH))
-			__mapin_ram_chunk(reg->base, top);
-		else
-			__mapin_ram_chunk(base, top);
+		__mapin_ram_chunk(base, top);
 	}
 }
 
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 42bbcd47cc85..dffe1a45b6ed 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -50,7 +50,7 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) {
 
 #endif
 
-static inline bool slice_addr_is_low(unsigned long addr)
+static inline notrace bool slice_addr_is_low(unsigned long addr)
 {
 	u64 tmp = (u64)addr;
 
@@ -659,7 +659,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 				       mm_ctx_user_psize(&current->mm->context), 1);
 }
 
-unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
+unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr)
 {
 	unsigned char *psizes;
 	int index, mask_index;
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
index 6e5a2a4faeab..4ec2a9f14f84 100644
--- a/arch/powerpc/net/bpf_jit32.h
+++ b/arch/powerpc/net/bpf_jit32.h
@@ -97,12 +97,12 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PPC64
 #define PPC_BPF_LOAD_CPU(r)		\
-	do { BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, paca_index) != 2);	\
+	do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2);	\
 		PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index));	\
 	} while (0)
 #else
 #define PPC_BPF_LOAD_CPU(r)     \
-	do { BUILD_BUG_ON(FIELD_SIZEOF(struct task_struct, cpu) != 4);		\
+	do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4);		\
 		PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu));		\
 	} while(0)
 #endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d57b46e0dd60..0acc9d5fb19e 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -321,7 +321,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
 			break;
 		case BPF_LD | BPF_W | BPF_LEN: /*	A = skb->len; */
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+			BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
 			break;
 		case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
@@ -333,16 +333,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 
 			/*** Ancillary info loads ***/
 		case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+			BUILD_BUG_ON(sizeof_field(struct sk_buff,
 						  protocol) != 2);
 			PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							    protocol));
 			break;
 		case BPF_ANC | SKF_AD_IFINDEX:
 		case BPF_ANC | SKF_AD_HATYPE:
-			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
+			BUILD_BUG_ON(sizeof_field(struct net_device,
 						ifindex) != 4);
-			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
+			BUILD_BUG_ON(sizeof_field(struct net_device,
 						type) != 2);
 			PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
 								dev));
@@ -365,17 +365,17 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 
 			break;
 		case BPF_ANC | SKF_AD_MARK:
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+			BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  mark));
 			break;
 		case BPF_ANC | SKF_AD_RXHASH:
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+			BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  hash));
 			break;
 		case BPF_ANC | SKF_AD_VLAN_TAG:
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+			BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
 
 			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  vlan_tci));
@@ -388,7 +388,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 				PPC_ANDI(r_A, r_A, 1);
 			break;
 		case BPF_ANC | SKF_AD_QUEUE:
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+			BUILD_BUG_ON(sizeof_field(struct sk_buff,
 						  queue_mapping) != 2);
 			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  queue_mapping));
diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c
index 43245f4a9bcb..6ffcb80cf844 100644
--- a/arch/powerpc/oprofile/backtrace.c
+++ b/arch/powerpc/oprofile/backtrace.c
@@ -9,7 +9,7 @@
 #include <linux/sched.h>
 #include <asm/processor.h>
 #include <linux/uaccess.h>
-#include <asm/compat.h>
+#include <linux/compat.h>
 #include <asm/oprofile_impl.h>
 
 #define STACK_SP(STACK)		*(STACK)
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index c84bbd4298a0..35d542515faf 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -284,16 +284,6 @@ static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
 	}
 }
 
-static inline int current_is_64bit(void)
-{
-	/*
-	 * We can't use test_thread_flag() here because we may be on an
-	 * interrupt stack, and the thread flags don't get copied over
-	 * from the thread_info on the main stack to the interrupt stack.
-	 */
-	return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
-}
-
 #else  /* CONFIG_PPC64 */
 /*
  * On 32-bit we just access the address and let hash_page create a
@@ -321,11 +311,6 @@ static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry
 {
 }
 
-static inline int current_is_64bit(void)
-{
-	return 0;
-}
-
 static inline int valid_user_sp(unsigned long sp, int is_64)
 {
 	if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
@@ -486,7 +471,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
 void
 perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
-	if (current_is_64bit())
+	if (!is_32bit_task())
 		perf_callchain_user_64(entry, regs);
 	else
 		perf_callchain_user_32(entry, regs);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index ca92e01d0bd1..48604625ab31 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -96,7 +96,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
 {
 	return 0;
 }
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { }
 static inline u32 perf_get_misc_flags(struct pt_regs *regs)
 {
 	return 0;
@@ -127,7 +127,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
 static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
 static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
 static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
-static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
+static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
 static void pmao_restore_workaround(bool ebb) { }
 #endif /* CONFIG_PPC32 */
 
@@ -179,7 +179,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
  * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the
  * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER.
  */
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp)
 {
 	unsigned long mmcra = regs->dsisr;
 	bool sdar_valid;
@@ -204,8 +204,7 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
 	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
 		*addrp = mfspr(SPRN_SDAR);
 
-	if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
-		is_kernel_addr(mfspr(SPRN_SDAR)))
+	if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
 		*addrp = 0;
 }
 
@@ -444,7 +443,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
 }
 
 /* Processing BHRB entries */
-static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
+static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw)
 {
 	u64 val;
 	u64 addr;
@@ -472,8 +471,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 			 * exporting it to userspace (avoid exposure of regions
 			 * where we could have speculative execution)
 			 */
-			if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
-				is_kernel_addr(addr))
+			if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
 				continue;
 
 			/* Branches are read most recent first (ie. mfbhrb 0 is
@@ -2087,12 +2085,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 
 		if (event->attr.sample_type &
 		    (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
-			perf_get_data_addr(regs, &data.addr);
+			perf_get_data_addr(event, regs, &data.addr);
 
 		if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
 			struct cpu_hw_events *cpuhw;
 			cpuhw = this_cpu_ptr(&cpu_hw_events);
-			power_pmu_bhrb_read(cpuhw);
+			power_pmu_bhrb_read(event, cpuhw);
 			data.br_stack = &cpuhw->bhrb_stack;
 		}
 
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index ba12dc14a3d1..8c0d324f657e 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -650,6 +650,7 @@ static const struct file_operations mpc52xx_wdt_fops = {
 	.llseek		= no_llseek,
 	.write		= mpc52xx_wdt_write,
 	.unlocked_ioctl = mpc52xx_wdt_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
 	.open		= mpc52xx_wdt_open,
 	.release	= mpc52xx_wdt_release,
 };
diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c
index f46d7bf3b140..6399865a625e 100644
--- a/arch/powerpc/platforms/83xx/misc.c
+++ b/arch/powerpc/platforms/83xx/misc.c
@@ -18,6 +18,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include <mm/mmu_decl.h>
+
 #include "mpc83xx.h"
 
 static __be32 __iomem *restart_reg_base;
@@ -145,6 +147,15 @@ void __init mpc83xx_setup_arch(void)
 	if (ppc_md.progress)
 		ppc_md.progress("mpc83xx_setup_arch()", 0);
 
+	if (!__map_without_bats) {
+		phys_addr_t immrbase = get_immrbase();
+		int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M;
+		unsigned long va = fix_to_virt(FIX_IMMR_BASE);
+
+		setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG);
+		update_bats();
+	}
+
 	mpc83xx_setup_pci();
 }
 
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index 4a4efa906d35..240a26d88b07 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -39,7 +39,6 @@
 #include <asm/udbg.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
-#include <sysdev/simple_gpio.h>
 #include <soc/fsl/qe/qe.h>
 #include <soc/fsl/qe/qe_ic.h>
 
@@ -181,12 +180,6 @@ static int __init mpc836x_usb_cfg(void)
 		qe_usb_clock_set(QE_CLK21, 48000000);
 	} else {
 		setbits8(&bcsr[13], BCSR13_USBMODE);
-		/*
-		 * The BCSR GPIOs are used to control power and
-		 * speed of the USB transceiver. This is needed for
-		 * the USB Host only.
-		 */
-		simple_gpiochip_init("fsl,mpc8360mds-bcsr-gpio");
 	}
 
 	of_node_put(np);
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index fe0606439b5a..a554b6d87cf7 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -86,29 +86,6 @@ void __init mpc85xx_cpm2_pic_init(void)
 #endif
 
 #ifdef CONFIG_QUICC_ENGINE
-void __init mpc85xx_qe_init(void)
-{
-	struct device_node *np;
-
-	np = of_find_compatible_node(NULL, NULL, "fsl,qe");
-	if (!np) {
-		np = of_find_node_by_name(NULL, "qe");
-		if (!np) {
-			pr_err("%s: Could not find Quicc Engine node\n",
-					__func__);
-			return;
-		}
-	}
-
-	if (!of_device_is_available(np)) {
-		of_node_put(np);
-		return;
-	}
-
-	of_node_put(np);
-
-}
-
 void __init mpc85xx_qe_par_io_init(void)
 {
 	struct device_node *np;
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 7ee2c6628f64..a328a741b457 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -66,8 +66,6 @@ void __init corenet_gen_setup_arch(void)
 	swiotlb_detect_4g();
 
 	pr_info("%s board\n", ppc_md.name);
-
-	mpc85xx_qe_init();
 }
 
 static const struct of_device_id of_device_ids[] = {
diff --git a/arch/powerpc/platforms/85xx/mpc85xx.h b/arch/powerpc/platforms/85xx/mpc85xx.h
index fa23f9b0592c..cb84c5c56c36 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx.h
+++ b/arch/powerpc/platforms/85xx/mpc85xx.h
@@ -10,10 +10,8 @@ static inline void __init mpc85xx_cpm2_pic_init(void) {}
 #endif /* CONFIG_CPM2 */
 
 #ifdef CONFIG_QUICC_ENGINE
-extern void mpc85xx_qe_init(void);
 extern void mpc85xx_qe_par_io_init(void);
 #else
-static inline void __init mpc85xx_qe_init(void) {}
 static inline void __init mpc85xx_qe_par_io_init(void) {}
 #endif
 
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 5ca254256c47..381a6ac8cb4b 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -43,7 +43,6 @@
 #include <asm/udbg.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
-#include <sysdev/simple_gpio.h>
 #include <soc/fsl/qe/qe.h>
 #include <soc/fsl/qe/qe_ic.h>
 #include <asm/mpic.h>
@@ -238,7 +237,6 @@ static void __init mpc85xx_mds_qe_init(void)
 {
 	struct device_node *np;
 
-	mpc85xx_qe_init();
 	mpc85xx_qe_par_io_init();
 	mpc85xx_mds_reset_ucc_phys();
 
@@ -351,11 +349,6 @@ machine_arch_initcall(mpc8569_mds, board_fixups);
 
 static int __init mpc85xx_publish_devices(void)
 {
-	if (machine_is(mpc8568_mds))
-		simple_gpiochip_init("fsl,mpc8568mds-bcsr-gpio");
-	if (machine_is(mpc8569_mds))
-		simple_gpiochip_init("fsl,mpc8569mds-bcsr-gpio");
-
 	return mpc85xx_common_publish_devices();
 }
 
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index d3c540ee558f..7f9a84f85766 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -89,7 +89,6 @@ static void __init mpc85xx_rdb_setup_arch(void)
 	fsl_pci_assign_primary();
 
 #ifdef CONFIG_QUICC_ENGINE
-	mpc85xx_qe_init();
 	mpc85xx_qe_par_io_init();
 #if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
 	if (machine_is(p1025_rdb)) {
diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
index 720b0c0f03ba..6c3c0cdaee9a 100644
--- a/arch/powerpc/platforms/85xx/twr_p102x.c
+++ b/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -72,7 +72,6 @@ static void __init twr_p1025_setup_arch(void)
 	fsl_pci_assign_primary();
 
 #ifdef CONFIG_QUICC_ENGINE
-	mpc85xx_qe_init();
 	mpc85xx_qe_par_io_init();
 
 #if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE)
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 96b27f6fdd0f..7733d0607da2 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -34,7 +34,6 @@
 #include <linux/of_platform.h>
 #include <sysdev/fsl_pci.h>
 #include <sysdev/fsl_soc.h>
-#include <sysdev/simple_gpio.h>
 
 #include "mpc86xx.h"
 
@@ -93,9 +92,6 @@ static const struct of_device_id mpc8610_ids[] __initconst = {
 
 static int __init mpc8610_declare_of_platform_devices(void)
 {
-	/* Firstly, register PIXIS GPIOs. */
-	simple_gpiochip_init("fsl,fpga-pixis-gpio-bank");
-
 	/* Enable wakeup on PIXIS' event IRQ. */
 	mpc8610_suspend_init();
 
diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
index 0f65c51271db..a43ee7d1ff85 100644
--- a/arch/powerpc/platforms/8xx/cpm1.c
+++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -51,7 +51,7 @@
 #define CPM_MAP_SIZE    (0x4000)
 
 cpm8xx_t __iomem *cpmp;  /* Pointer to comm processor space */
-immap_t __iomem *mpc8xx_immr;
+immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE;
 static cpic8xx_t __iomem *cpic_reg;
 
 static struct irq_domain *cpm_pic_host;
@@ -130,7 +130,7 @@ static const struct irq_domain_ops cpm_pic_host_ops = {
 	.map = cpm_pic_host_map,
 };
 
-unsigned int cpm_pic_init(void)
+unsigned int __init cpm_pic_init(void)
 {
 	struct device_node *np = NULL;
 	struct resource res;
@@ -201,12 +201,6 @@ void __init cpm_reset(void)
 {
 	sysconf8xx_t __iomem *siu_conf;
 
-	mpc8xx_immr = ioremap(get_immrbase(), 0x4000);
-	if (!mpc8xx_immr) {
-		printk(KERN_CRIT "Could not map IMMR\n");
-		return;
-	}
-
 	cpmp = &mpc8xx_immr->im_cpm;
 
 #ifndef CONFIG_PPC_EARLY_DEBUG_CPM
@@ -306,7 +300,7 @@ struct cpm_ioport32e {
 	__be32 dir, par, sor, odr, dat;
 };
 
-static void cpm1_set_pin32(int port, int pin, int flags)
+static void __init cpm1_set_pin32(int port, int pin, int flags)
 {
 	struct cpm_ioport32e __iomem *iop;
 	pin = 1 << (31 - pin);
@@ -348,7 +342,7 @@ static void cpm1_set_pin32(int port, int pin, int flags)
 	}
 }
 
-static void cpm1_set_pin16(int port, int pin, int flags)
+static void __init cpm1_set_pin16(int port, int pin, int flags)
 {
 	struct cpm_ioport16 __iomem *iop =
 		(struct cpm_ioport16 __iomem *)&mpc8xx_immr->im_ioport;
@@ -386,7 +380,7 @@ static void cpm1_set_pin16(int port, int pin, int flags)
 	}
 }
 
-void cpm1_set_pin(enum cpm_port port, int pin, int flags)
+void __init cpm1_set_pin(enum cpm_port port, int pin, int flags)
 {
 	if (port == CPM_PORTB || port == CPM_PORTE)
 		cpm1_set_pin32(port, pin, flags);
@@ -394,7 +388,7 @@ void cpm1_set_pin(enum cpm_port port, int pin, int flags)
 		cpm1_set_pin16(port, pin, flags);
 }
 
-int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
+int __init cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
 {
 	int shift;
 	int i, bits = 0;
diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index e9617d35fd1f..f2ba837249d6 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -125,7 +125,7 @@ static const struct irq_domain_ops mpc8xx_pic_host_ops = {
 	.xlate = mpc8xx_pic_host_xlate,
 };
 
-int mpc8xx_pic_init(void)
+int __init mpc8xx_pic_init(void)
 {
 	struct resource res;
 	struct device_node *np;
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index d82e3664ffdf..e28df298df56 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -303,16 +303,6 @@ config GEN_RTC
 	  replacing their get_rtc_time/set_rtc_time callbacks with
 	  a proper RTC device driver.
 
-config SIMPLE_GPIO
-	bool "Support for simple, memory-mapped GPIO controllers"
-	depends on PPC
-	select GPIOLIB
-	help
-	  Say Y here to support simple, memory-mapped GPIO controllers.
-	  These are usually BCSRs used to control board's switches, LEDs,
-	  chip-selects, Ethernet/USB PHY's power and various other small
-	  on-board peripherals.
-
 config MCU_MPC8349EMITX
 	bool "MPC8349E-mITX MCU driver"
 	depends on I2C=y && PPC_83xx
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 12543e53fa96..8d7f9c3dc771 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -415,13 +415,13 @@ config PPC_MM_SLICES
 	bool
 
 config PPC_HAVE_PMU_SUPPORT
-       bool
+	bool
 
 config PPC_PERF_CTRS
-       def_bool y
-       depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
-       help
-         This enables the powerpc-specific perf_event back-end.
+	def_bool y
+	depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
+	help
+	 This enables the powerpc-specific perf_event back-end.
 
 config FORCE_SMP
 	# Allow platforms to force SMP=y by selecting this
@@ -459,7 +459,6 @@ config NOT_COHERENT_CACHE
 	bool
 	depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \
 		GAMECUBE_COMMON || AMIGAONE
-	select ARCH_HAS_DMA_COHERENT_TO_PFN
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 9680d766f20e..855eedb8d7d7 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -240,9 +240,6 @@ static void __init cell_setup_arch(void)
 	init_pci_config_tokens();
 
 	cbe_pervasive_init();
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
 
 	mmio_nvram_init();
 }
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 2dd452a047cd..9b1586b85152 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -198,14 +198,12 @@ static int spufs_fill_dir(struct dentry *dir,
 
 static int spufs_dir_close(struct inode *inode, struct file *file)
 {
-	struct spu_context *ctx;
 	struct inode *parent;
 	struct dentry *dir;
 	int ret;
 
 	dir = file->f_path.dentry;
 	parent = d_inode(dir->d_parent);
-	ctx = SPUFS_I(d_inode(dir))->i_ctx;
 
 	inode_lock_nested(parent, I_MUTEX_PARENT);
 	ret = spufs_rmdir(parent, dir);
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 9cd6f3e1000b..47f73103ef74 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -183,9 +183,6 @@ static void __init maple_setup_arch(void)
 	/* Lookup PCI hosts */
        	maple_pci_init();
 
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
 	maple_use_rtas_reboot_and_halt_if_present();
 
 	printk(KERN_DEBUG "Using native/NAP idle loop\n");
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index 05a52f10c2f0..b612474f8f8e 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -147,10 +147,6 @@ static void __init pas_setup_arch(void)
 	/* Lookup PCI hosts */
 	pas_pci_init();
 
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-
 	/* Remap SDC register for doing reset */
 	/* XXXOJN This should maybe come out of the device tree */
 	reset_reg = ioremap(0xfc101100, 4);
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index a3ac9646119d..c0f8120045c3 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -20,3 +20,4 @@ obj-$(CONFIG_PPC_MEMTRACE)	+= memtrace.o
 obj-$(CONFIG_PPC_VAS)	+= vas.o vas-window.o vas-debug.o
 obj-$(CONFIG_OCXL_BASE)	+= ocxl.o
 obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
+obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index a2aa5e433ac8..5cd0f52d258f 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -290,3 +290,6 @@ OPAL_CALL(opal_nx_coproc_init,			OPAL_NX_COPROC_INIT);
 OPAL_CALL(opal_mpipl_update,			OPAL_MPIPL_UPDATE);
 OPAL_CALL(opal_mpipl_register_tag,		OPAL_MPIPL_REGISTER_TAG);
 OPAL_CALL(opal_mpipl_query_tag,			OPAL_MPIPL_QUERY_TAG);
+OPAL_CALL(opal_secvar_get,			OPAL_SECVAR_GET);
+OPAL_CALL(opal_secvar_get_next,			OPAL_SECVAR_GET_NEXT);
+OPAL_CALL(opal_secvar_enqueue_update,		OPAL_SECVAR_ENQUEUE_UPDATE);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index e04b20625cb9..000b350d4060 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -59,10 +59,6 @@ static void export_imc_mode_and_cmd(struct device_node *node,
 
 	imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
 
-	/*
-	 * Return here, either because 'imc' directory already exists,
-	 * Or failed to create a new one.
-	 */
 	if (!imc_debugfs_parent)
 		return;
 
@@ -135,7 +131,6 @@ static int imc_get_mem_addr_nest(struct device_node *node,
 	}
 
 	pmu_ptr->imc_counter_mmaped = true;
-	export_imc_mode_and_cmd(node, pmu_ptr);
 	kfree(base_addr_arr);
 	kfree(chipid_arr);
 	return 0;
@@ -151,7 +146,7 @@ error:
  *		    and domain as the inputs.
  * Allocates memory for the struct imc_pmu, sets up its domain, size and offsets
  */
-static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
+static struct imc_pmu *imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
 {
 	int ret = 0;
 	struct imc_pmu *pmu_ptr;
@@ -159,27 +154,23 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
 
 	/* Return for unknown domain */
 	if (domain < 0)
-		return -EINVAL;
+		return NULL;
 
 	/* memory for pmu */
 	pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL);
 	if (!pmu_ptr)
-		return -ENOMEM;
+		return NULL;
 
 	/* Set the domain */
 	pmu_ptr->domain = domain;
 
 	ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size);
-	if (ret) {
-		ret = -EINVAL;
+	if (ret)
 		goto free_pmu;
-	}
 
 	if (!of_property_read_u32(parent, "offset", &offset)) {
-		if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) {
-			ret = -EINVAL;
+		if (imc_get_mem_addr_nest(parent, pmu_ptr, offset))
 			goto free_pmu;
-		}
 	}
 
 	/* Function to register IMC pmu */
@@ -190,14 +181,14 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
 		if (pmu_ptr->domain == IMC_DOMAIN_NEST)
 			kfree(pmu_ptr->mem_info);
 		kfree(pmu_ptr);
-		return ret;
+		return NULL;
 	}
 
-	return 0;
+	return pmu_ptr;
 
 free_pmu:
 	kfree(pmu_ptr);
-	return ret;
+	return NULL;
 }
 
 static void disable_nest_pmu_counters(void)
@@ -254,6 +245,7 @@ int get_max_nest_dev(void)
 static int opal_imc_counters_probe(struct platform_device *pdev)
 {
 	struct device_node *imc_dev = pdev->dev.of_node;
+	struct imc_pmu *pmu;
 	int pmu_count = 0, domain;
 	bool core_imc_reg = false, thread_imc_reg = false;
 	u32 type;
@@ -269,6 +261,7 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
 	}
 
 	for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) {
+		pmu = NULL;
 		if (of_property_read_u32(imc_dev, "type", &type)) {
 			pr_warn("IMC Device without type property\n");
 			continue;
@@ -285,7 +278,14 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
 			domain = IMC_DOMAIN_THREAD;
 			break;
 		case IMC_TYPE_TRACE:
-			domain = IMC_DOMAIN_TRACE;
+			/*
+			 * FIXME. Using trace_imc events to monitor application
+			 * or KVM thread performance can cause a checkstop
+			 * (system crash).
+			 * Disable it for now.
+			 */
+			pr_info_once("IMC: disabling trace_imc PMU\n");
+			domain = -1;
 			break;
 		default:
 			pr_warn("IMC Unknown Device type \n");
@@ -293,9 +293,13 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
 			break;
 		}
 
-		if (!imc_pmu_create(imc_dev, pmu_count, domain)) {
-			if (domain == IMC_DOMAIN_NEST)
+		pmu = imc_pmu_create(imc_dev, pmu_count, domain);
+		if (pmu != NULL) {
+			if (domain == IMC_DOMAIN_NEST) {
+				if (!imc_debugfs_parent)
+					export_imc_mode_and_cmd(imc_dev, pmu);
 				pmu_count++;
+			}
 			if (domain == IMC_DOMAIN_CORE)
 				core_imc_reg = true;
 			if (domain == IMC_DOMAIN_THREAD)
@@ -303,10 +307,6 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
 		}
 	}
 
-	/* If none of the nest units are registered, remove debugfs interface */
-	if (pmu_count == 0)
-		debugfs_remove_recursive(imc_debugfs_parent);
-
 	/* If core imc is not registered, unregister thread-imc */
 	if (!core_imc_reg && thread_imc_reg)
 		unregister_thread_imc();
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
index dc599e787f78..c16d44f6f1d1 100644
--- a/arch/powerpc/platforms/powernv/opal-powercap.c
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -13,7 +13,7 @@
 
 #include <asm/opal.h>
 
-DEFINE_MUTEX(powercap_mutex);
+static DEFINE_MUTEX(powercap_mutex);
 
 static struct kobject *powercap_kobj;
 
diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c
index b6ccb3026c6c..69d7e75950d1 100644
--- a/arch/powerpc/platforms/powernv/opal-psr.c
+++ b/arch/powerpc/platforms/powernv/opal-psr.c
@@ -13,11 +13,11 @@
 
 #include <asm/opal.h>
 
-DEFINE_MUTEX(psr_mutex);
+static DEFINE_MUTEX(psr_mutex);
 
 static struct kobject *psr_kobj;
 
-struct psr_attr {
+static struct psr_attr {
 	u32 handle;
 	struct kobj_attribute attr;
 } *psr_attrs;
diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c
new file mode 100644
index 000000000000..14133e120bdd
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-secvar.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PowerNV code for secure variables
+ *
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Claudio Carvalho
+ *         Nayna Jain
+ *
+ * APIs to access secure variables managed by OPAL.
+ */
+
+#define pr_fmt(fmt) "secvar: "fmt
+
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <asm/opal.h>
+#include <asm/secvar.h>
+#include <asm/secure_boot.h>
+
+static int opal_status_to_err(int rc)
+{
+	int err;
+
+	switch (rc) {
+	case OPAL_SUCCESS:
+		err = 0;
+		break;
+	case OPAL_UNSUPPORTED:
+		err = -ENXIO;
+		break;
+	case OPAL_PARAMETER:
+		err = -EINVAL;
+		break;
+	case OPAL_RESOURCE:
+		err = -ENOSPC;
+		break;
+	case OPAL_HARDWARE:
+		err = -EIO;
+		break;
+	case OPAL_NO_MEM:
+		err = -ENOMEM;
+		break;
+	case OPAL_EMPTY:
+		err = -ENOENT;
+		break;
+	case OPAL_PARTIAL:
+		err = -EFBIG;
+		break;
+	default:
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int opal_get_variable(const char *key, uint64_t ksize,
+			     u8 *data, uint64_t *dsize)
+{
+	int rc;
+
+	if (!key || !dsize)
+		return -EINVAL;
+
+	*dsize = cpu_to_be64(*dsize);
+
+	rc = opal_secvar_get(key, ksize, data, dsize);
+
+	*dsize = be64_to_cpu(*dsize);
+
+	return opal_status_to_err(rc);
+}
+
+static int opal_get_next_variable(const char *key, uint64_t *keylen,
+				  uint64_t keybufsize)
+{
+	int rc;
+
+	if (!key || !keylen)
+		return -EINVAL;
+
+	*keylen = cpu_to_be64(*keylen);
+
+	rc = opal_secvar_get_next(key, keylen, keybufsize);
+
+	*keylen = be64_to_cpu(*keylen);
+
+	return opal_status_to_err(rc);
+}
+
+static int opal_set_variable(const char *key, uint64_t ksize, u8 *data,
+			     uint64_t dsize)
+{
+	int rc;
+
+	if (!key || !data)
+		return -EINVAL;
+
+	rc = opal_secvar_enqueue_update(key, ksize, data, dsize);
+
+	return opal_status_to_err(rc);
+}
+
+static const struct secvar_operations opal_secvar_ops = {
+	.get = opal_get_variable,
+	.get_next = opal_get_next_variable,
+	.set = opal_set_variable,
+};
+
+static int opal_secvar_probe(struct platform_device *pdev)
+{
+	if (!opal_check_token(OPAL_SECVAR_GET)
+			|| !opal_check_token(OPAL_SECVAR_GET_NEXT)
+			|| !opal_check_token(OPAL_SECVAR_ENQUEUE_UPDATE)) {
+		pr_err("OPAL doesn't support secure variables\n");
+		return -ENODEV;
+	}
+
+	set_secvar_ops(&opal_secvar_ops);
+
+	return 0;
+}
+
+static const struct of_device_id opal_secvar_match[] = {
+	{ .compatible = "ibm,secvar-backend",},
+	{},
+};
+
+static struct platform_driver opal_secvar_driver = {
+	.driver = {
+		.name = "secvar",
+		.of_match_table = opal_secvar_match,
+	},
+};
+
+static int __init opal_secvar_init(void)
+{
+	return platform_driver_probe(&opal_secvar_driver, opal_secvar_probe);
+}
+device_initcall(opal_secvar_init);
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
index 31f13c13275f..f8ae1fb0c102 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -13,7 +13,7 @@
 
 #include <asm/opal.h>
 
-DEFINE_MUTEX(sg_mutex);
+static DEFINE_MUTEX(sg_mutex);
 
 static struct kobject *sg_kobj;
 
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 38e90270280b..a6ee08009f0f 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -35,6 +35,16 @@
 
 #include "powernv.h"
 
+#define OPAL_MSG_QUEUE_MAX 16
+
+struct opal_msg_node {
+	struct list_head	list;
+	struct opal_msg		msg;
+};
+
+static DEFINE_SPINLOCK(msg_list_lock);
+static LIST_HEAD(msg_list);
+
 /* /sys/firmware/opal */
 struct kobject *opal_kobj;
 
@@ -50,6 +60,8 @@ struct mcheck_recoverable_range {
 	u64 recover_addr;
 };
 
+static int msg_list_size;
+
 static struct mcheck_recoverable_range *mc_recoverable_range;
 static int mc_recoverable_range_len;
 
@@ -237,6 +249,43 @@ static int __init opal_register_exception_handlers(void)
 }
 machine_early_initcall(powernv, opal_register_exception_handlers);
 
+static void queue_replay_msg(void *msg)
+{
+	struct opal_msg_node *msg_node;
+
+	if (msg_list_size < OPAL_MSG_QUEUE_MAX) {
+		msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+		if (msg_node) {
+			INIT_LIST_HEAD(&msg_node->list);
+			memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+			list_add_tail(&msg_node->list, &msg_list);
+			msg_list_size++;
+		} else
+			pr_warn_once("message queue no memory\n");
+
+		if (msg_list_size >= OPAL_MSG_QUEUE_MAX)
+			pr_warn_once("message queue full\n");
+	}
+}
+
+static void dequeue_replay_msg(enum opal_msg_type msg_type)
+{
+	struct opal_msg_node *msg_node, *tmp;
+
+	list_for_each_entry_safe(msg_node, tmp, &msg_list, list) {
+		if (be32_to_cpu(msg_node->msg.msg_type) != msg_type)
+			continue;
+
+		atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
+					msg_type,
+					&msg_node->msg);
+
+		list_del(&msg_node->list);
+		kfree(msg_node);
+		msg_list_size--;
+	}
+}
+
 /*
  * Opal message notifier based on message type. Allow subscribers to get
  * notified for specific messgae type.
@@ -244,14 +293,30 @@ machine_early_initcall(powernv, opal_register_exception_handlers);
 int opal_message_notifier_register(enum opal_msg_type msg_type,
 					struct notifier_block *nb)
 {
+	int ret;
+	unsigned long flags;
+
 	if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
 		pr_warn("%s: Invalid arguments, msg_type:%d\n",
 			__func__, msg_type);
 		return -EINVAL;
 	}
 
-	return atomic_notifier_chain_register(
-				&opal_msg_notifier_head[msg_type], nb);
+	spin_lock_irqsave(&msg_list_lock, flags);
+	ret = atomic_notifier_chain_register(
+		&opal_msg_notifier_head[msg_type], nb);
+
+	/*
+	 * If the registration succeeded, replay any queued messages that came
+	 * in prior to the notifier chain registration. msg_list_lock held here
+	 * to ensure they're delivered prior to any subsequent messages.
+	 */
+	if (ret == 0)
+		dequeue_replay_msg(msg_type);
+
+	spin_unlock_irqrestore(&msg_list_lock, flags);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(opal_message_notifier_register);
 
@@ -265,6 +330,23 @@ EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
 
 static void opal_message_do_notify(uint32_t msg_type, void *msg)
 {
+	unsigned long flags;
+	bool queued = false;
+
+	spin_lock_irqsave(&msg_list_lock, flags);
+	if (opal_msg_notifier_head[msg_type].head == NULL) {
+		/*
+		 * Queue up the msg since no notifiers have registered
+		 * yet for this msg_type.
+		 */
+		queue_replay_msg(msg);
+		queued = true;
+	}
+	spin_unlock_irqrestore(&msg_list_lock, flags);
+
+	if (queued)
+		return;
+
 	/* notify subscribers */
 	atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
 					msg_type, msg);
@@ -1002,6 +1084,9 @@ static int __init opal_init(void)
 	/* Initialise OPAL Power control interface */
 	opal_power_control_init();
 
+	/* Initialize OPAL secure variables */
+	opal_pdev_init("ibm,secvar-backend");
+
 	return 0;
 }
 machine_subsys_initcall(powernv, opal_init);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index a0b9c0c23ed2..5dc6847d5f4c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -340,14 +340,6 @@ free_tces_exit:
 	return -ENOMEM;
 }
 
-static void pnv_iommu_table_group_link_free(struct rcu_head *head)
-{
-	struct iommu_table_group_link *tgl = container_of(head,
-			struct iommu_table_group_link, rcu);
-
-	kfree(tgl);
-}
-
 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
 		struct iommu_table_group *table_group)
 {
@@ -363,7 +355,7 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
 	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
 		if (tgl->table_group == table_group) {
 			list_del_rcu(&tgl->next);
-			call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
+			kfree_rcu(tgl, rcu);
 			found = true;
 			break;
 		}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index c28d0d9b7ee0..da1068a9c263 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3086,8 +3086,8 @@ static int pnv_pci_diag_data_set(void *data, u64 val)
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(pnv_pci_diag_data_fops, NULL,
-			pnv_pci_diag_data_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, pnv_pci_diag_data_set,
+			 "%llu\n");
 
 #endif /* CONFIG_DEBUG_FS */
 
@@ -3112,8 +3112,8 @@ static void pnv_pci_ioda_create_dbgfs(void)
 			continue;
 		}
 
-		debugfs_create_file("dump_diag_regs", 0200, phb->dbgfs, hose,
-				    &pnv_pci_diag_data_fops);
+		debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs,
+					   hose, &pnv_pci_diag_data_fops);
 	}
 #endif /* CONFIG_DEBUG_FS */
 }
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 2825d004dece..c0bea75ac27b 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -945,6 +945,23 @@ void __init pnv_pci_init(void)
 	if (!firmware_has_feature(FW_FEATURE_OPAL))
 		return;
 
+#ifdef CONFIG_PCIEPORTBUS
+	/*
+	 * On PowerNV PCIe devices are (currently) managed in cooperation
+	 * with firmware. This isn't *strictly* required, but there's enough
+	 * assumptions baked into both firmware and the platform code that
+	 * it's unwise to allow the portbus services to be used.
+	 *
+	 * We need to fix this eventually, but for now set this flag to disable
+	 * the portbus driver. The AER service isn't required since that AER
+	 * events are handled via EEH. The pciehp hotplug driver can't work
+	 * without kernel changes (and portbus binding breaks pnv_php). The
+	 * other services also require some thinking about how we're going
+	 * to integrate them.
+	 */
+	pcie_ports_disabled = true;
+#endif
+
 	/* Look for IODA IO-Hubs. */
 	for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
 		pnv_pci_init_ioda_hub(np);
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 8108b9b9b9ea..b29368931c56 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -200,10 +200,6 @@ static void __init ps3_setup_arch(void)
 	smp_init_ps3();
 #endif
 
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-
 	prealloc_ps3fb_videomemory();
 	prealloc_ps3flash_bounce_buffer();
 
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 9e35cddddf73..595e9f8a6539 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -108,6 +108,7 @@ config PPC_SMLPAR
 config CMM
 	tristate "Collaborative memory management"
 	depends on PPC_SMLPAR
+	select MEMORY_BALLOON
 	default y
 	help
 	  Select this option, if you want to enable the kernel interface
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index b33251d75927..9dba7e880885 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -19,6 +19,10 @@
 #include <linux/stringify.h>
 #include <linux/swap.h>
 #include <linux/device.h>
+#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
+#include <linux/magic.h>
+#include <linux/balloon_compaction.h>
 #include <asm/firmware.h>
 #include <asm/hvcall.h>
 #include <asm/mmu.h>
@@ -38,12 +42,8 @@
 #define CMM_MIN_MEM_MB		256
 #define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
 #define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
-/*
- * The priority level tries to ensure that this notifier is called as
- * late as possible to reduce thrashing in the shared memory pool.
- */
+
 #define CMM_MEM_HOTPLUG_PRI	1
-#define CMM_MEM_ISOLATE_PRI	15
 
 static unsigned int delay = CMM_DEFAULT_DELAY;
 static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
@@ -51,6 +51,8 @@ static unsigned int oom_kb = CMM_OOM_KB;
 static unsigned int cmm_debug = CMM_DEBUG;
 static unsigned int cmm_disabled = CMM_DISABLE;
 static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
+static bool __read_mostly simulate;
+static unsigned long simulate_loan_target_kb;
 static struct device cmm_dev;
 
 MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
@@ -74,35 +76,31 @@ MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
 module_param_named(debug, cmm_debug, uint, 0644);
 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
 		 "[Default=" __stringify(CMM_DEBUG) "]");
-
-#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
+module_param_named(simulate, simulate, bool, 0444);
+MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
 
 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
 
-struct cmm_page_array {
-	struct cmm_page_array *next;
-	unsigned long index;
-	unsigned long page[CMM_NR_PAGES];
-};
-
-static unsigned long loaned_pages;
+static atomic_long_t loaned_pages;
 static unsigned long loaned_pages_target;
 static unsigned long oom_freed_pages;
 
-static struct cmm_page_array *cmm_page_list;
-static DEFINE_SPINLOCK(cmm_lock);
-
 static DEFINE_MUTEX(hotplug_mutex);
 static int hotplug_occurred; /* protected by the hotplug mutex */
 
 static struct task_struct *cmm_thread_ptr;
+static struct balloon_dev_info b_dev_info;
 
-static long plpar_page_set_loaned(unsigned long vpa)
+static long plpar_page_set_loaned(struct page *page)
 {
+	const unsigned long vpa = page_to_phys(page);
 	unsigned long cmo_page_sz = cmo_get_page_size();
 	long rc = 0;
 	int i;
 
+	if (unlikely(simulate))
+		return 0;
+
 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
 
@@ -113,12 +111,16 @@ static long plpar_page_set_loaned(unsigned long vpa)
 	return rc;
 }
 
-static long plpar_page_set_active(unsigned long vpa)
+static long plpar_page_set_active(struct page *page)
 {
+	const unsigned long vpa = page_to_phys(page);
 	unsigned long cmo_page_sz = cmo_get_page_size();
 	long rc = 0;
 	int i;
 
+	if (unlikely(simulate))
+		return 0;
+
 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
 
@@ -138,8 +140,7 @@ static long plpar_page_set_active(unsigned long vpa)
  **/
 static long cmm_alloc_pages(long nr)
 {
-	struct cmm_page_array *pa, *npa;
-	unsigned long addr;
+	struct page *page;
 	long rc;
 
 	cmm_dbg("Begin request for %ld pages\n", nr);
@@ -156,46 +157,19 @@ static long cmm_alloc_pages(long nr)
 			break;
 		}
 
-		addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
-				       __GFP_NORETRY | __GFP_NOMEMALLOC);
-		if (!addr)
+		page = balloon_page_alloc();
+		if (!page)
 			break;
-		spin_lock(&cmm_lock);
-		pa = cmm_page_list;
-		if (!pa || pa->index >= CMM_NR_PAGES) {
-			/* Need a new page for the page list. */
-			spin_unlock(&cmm_lock);
-			npa = (struct cmm_page_array *)__get_free_page(
-					GFP_NOIO | __GFP_NOWARN |
-					__GFP_NORETRY | __GFP_NOMEMALLOC);
-			if (!npa) {
-				pr_info("%s: Can not allocate new page list\n", __func__);
-				free_page(addr);
-				break;
-			}
-			spin_lock(&cmm_lock);
-			pa = cmm_page_list;
-
-			if (!pa || pa->index >= CMM_NR_PAGES) {
-				npa->next = pa;
-				npa->index = 0;
-				pa = npa;
-				cmm_page_list = pa;
-			} else
-				free_page((unsigned long) npa);
-		}
-
-		if ((rc = plpar_page_set_loaned(__pa(addr)))) {
+		rc = plpar_page_set_loaned(page);
+		if (rc) {
 			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
-			spin_unlock(&cmm_lock);
-			free_page(addr);
+			__free_page(page);
 			break;
 		}
 
-		pa->page[pa->index++] = addr;
-		loaned_pages++;
-		totalram_pages_dec();
-		spin_unlock(&cmm_lock);
+		balloon_page_enqueue(&b_dev_info, page);
+		atomic_long_inc(&loaned_pages);
+		adjust_managed_page_count(page, -1);
 		nr--;
 	}
 
@@ -212,30 +186,19 @@ static long cmm_alloc_pages(long nr)
  **/
 static long cmm_free_pages(long nr)
 {
-	struct cmm_page_array *pa;
-	unsigned long addr;
+	struct page *page;
 
 	cmm_dbg("Begin free of %ld pages.\n", nr);
-	spin_lock(&cmm_lock);
-	pa = cmm_page_list;
 	while (nr) {
-		if (!pa || pa->index <= 0)
+		page = balloon_page_dequeue(&b_dev_info);
+		if (!page)
 			break;
-		addr = pa->page[--pa->index];
-
-		if (pa->index == 0) {
-			pa = pa->next;
-			free_page((unsigned long) cmm_page_list);
-			cmm_page_list = pa;
-		}
-
-		plpar_page_set_active(__pa(addr));
-		free_page(addr);
-		loaned_pages--;
+		plpar_page_set_active(page);
+		adjust_managed_page_count(page, 1);
+		__free_page(page);
+		atomic_long_dec(&loaned_pages);
 		nr--;
-		totalram_pages_inc();
 	}
-	spin_unlock(&cmm_lock);
 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
 	return nr;
 }
@@ -257,7 +220,7 @@ static int cmm_oom_notify(struct notifier_block *self,
 
 	cmm_dbg("OOM processing started\n");
 	nr = cmm_free_pages(nr);
-	loaned_pages_target = loaned_pages;
+	loaned_pages_target = atomic_long_read(&loaned_pages);
 	*freed += KB2PAGES(oom_kb) - nr;
 	oom_freed_pages += KB2PAGES(oom_kb) - nr;
 	cmm_dbg("OOM processing complete\n");
@@ -274,19 +237,24 @@ static int cmm_oom_notify(struct notifier_block *self,
  **/
 static void cmm_get_mpp(void)
 {
+	const long __loaned_pages = atomic_long_read(&loaned_pages);
+	const long total_pages = totalram_pages() + __loaned_pages;
 	int rc;
 	struct hvcall_mpp_data mpp_data;
 	signed long active_pages_target, page_loan_request, target;
-	signed long total_pages = totalram_pages() + loaned_pages;
 	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
 
-	rc = h_get_mpp(&mpp_data);
-
-	if (rc != H_SUCCESS)
-		return;
-
-	page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
-	target = page_loan_request + (signed long)loaned_pages;
+	if (likely(!simulate)) {
+		rc = h_get_mpp(&mpp_data);
+		if (rc != H_SUCCESS)
+			return;
+		page_loan_request = div_s64((s64)mpp_data.loan_request,
+					    PAGE_SIZE);
+		target = page_loan_request + __loaned_pages;
+	} else {
+		target = KB2PAGES(simulate_loan_target_kb);
+		page_loan_request = target - __loaned_pages;
+	}
 
 	if (target < 0 || total_pages < min_mem_pages)
 		target = 0;
@@ -307,7 +275,7 @@ static void cmm_get_mpp(void)
 	loaned_pages_target = target;
 
 	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
-		page_loan_request, loaned_pages, loaned_pages_target,
+		page_loan_request, __loaned_pages, loaned_pages_target,
 		oom_freed_pages, totalram_pages());
 }
 
@@ -325,6 +293,7 @@ static struct notifier_block cmm_oom_nb = {
 static int cmm_thread(void *dummy)
 {
 	unsigned long timeleft;
+	long __loaned_pages;
 
 	while (1) {
 		timeleft = msleep_interruptible(delay * 1000);
@@ -355,11 +324,12 @@ static int cmm_thread(void *dummy)
 
 		cmm_get_mpp();
 
-		if (loaned_pages_target > loaned_pages) {
-			if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
-				loaned_pages_target = loaned_pages;
-		} else if (loaned_pages_target < loaned_pages)
-			cmm_free_pages(loaned_pages - loaned_pages_target);
+		__loaned_pages = atomic_long_read(&loaned_pages);
+		if (loaned_pages_target > __loaned_pages) {
+			if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
+				loaned_pages_target = __loaned_pages;
+		} else if (loaned_pages_target < __loaned_pages)
+			cmm_free_pages(__loaned_pages - loaned_pages_target);
 	}
 	return 0;
 }
@@ -373,7 +343,7 @@ static int cmm_thread(void *dummy)
 	}							\
 	static DEVICE_ATTR(name, 0444, show_##name, NULL)
 
-CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
+CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
 
 static ssize_t show_oom_pages(struct device *dev,
@@ -406,11 +376,18 @@ static struct device_attribute *cmm_attrs[] = {
 	&dev_attr_oom_freed_kb,
 };
 
+static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
+			 simulate_loan_target_kb);
+
 static struct bus_type cmm_subsys = {
 	.name = "cmm",
 	.dev_name = "cmm",
 };
 
+static void cmm_release_device(struct device *dev)
+{
+}
+
 /**
  * cmm_sysfs_register - Register with sysfs
  *
@@ -426,6 +403,7 @@ static int cmm_sysfs_register(struct device *dev)
 
 	dev->id = 0;
 	dev->bus = &cmm_subsys;
+	dev->release = cmm_release_device;
 
 	if ((rc = device_register(dev)))
 		goto subsys_unregister;
@@ -435,6 +413,11 @@ static int cmm_sysfs_register(struct device *dev)
 			goto fail;
 	}
 
+	if (!simulate)
+		return 0;
+	rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
+	if (rc)
+		goto fail;
 	return 0;
 
 fail:
@@ -471,7 +454,7 @@ static int cmm_reboot_notifier(struct notifier_block *nb,
 		if (cmm_thread_ptr)
 			kthread_stop(cmm_thread_ptr);
 		cmm_thread_ptr = NULL;
-		cmm_free_pages(loaned_pages);
+		cmm_free_pages(atomic_long_read(&loaned_pages));
 	}
 	return NOTIFY_DONE;
 }
@@ -481,142 +464,6 @@ static struct notifier_block cmm_reboot_nb = {
 };
 
 /**
- * cmm_count_pages - Count the number of pages loaned in a particular range.
- *
- * @arg: memory_isolate_notify structure with address range and count
- *
- * Return value:
- *      0 on success
- **/
-static unsigned long cmm_count_pages(void *arg)
-{
-	struct memory_isolate_notify *marg = arg;
-	struct cmm_page_array *pa;
-	unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
-	unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
-	unsigned long idx;
-
-	spin_lock(&cmm_lock);
-	pa = cmm_page_list;
-	while (pa) {
-		if ((unsigned long)pa >= start && (unsigned long)pa < end)
-			marg->pages_found++;
-		for (idx = 0; idx < pa->index; idx++)
-			if (pa->page[idx] >= start && pa->page[idx] < end)
-				marg->pages_found++;
-		pa = pa->next;
-	}
-	spin_unlock(&cmm_lock);
-	return 0;
-}
-
-/**
- * cmm_memory_isolate_cb - Handle memory isolation notifier calls
- * @self:	notifier block struct
- * @action:	action to take
- * @arg:	struct memory_isolate_notify data for handler
- *
- * Return value:
- *	NOTIFY_OK or notifier error based on subfunction return value
- **/
-static int cmm_memory_isolate_cb(struct notifier_block *self,
-				 unsigned long action, void *arg)
-{
-	int ret = 0;
-
-	if (action == MEM_ISOLATE_COUNT)
-		ret = cmm_count_pages(arg);
-
-	return notifier_from_errno(ret);
-}
-
-static struct notifier_block cmm_mem_isolate_nb = {
-	.notifier_call = cmm_memory_isolate_cb,
-	.priority = CMM_MEM_ISOLATE_PRI
-};
-
-/**
- * cmm_mem_going_offline - Unloan pages where memory is to be removed
- * @arg: memory_notify structure with page range to be offlined
- *
- * Return value:
- *	0 on success
- **/
-static int cmm_mem_going_offline(void *arg)
-{
-	struct memory_notify *marg = arg;
-	unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
-	unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
-	struct cmm_page_array *pa_curr, *pa_last, *npa;
-	unsigned long idx;
-	unsigned long freed = 0;
-
-	cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
-			start_page, marg->nr_pages);
-	spin_lock(&cmm_lock);
-
-	/* Search the page list for pages in the range to be offlined */
-	pa_last = pa_curr = cmm_page_list;
-	while (pa_curr) {
-		for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
-			if ((pa_curr->page[idx] < start_page) ||
-			    (pa_curr->page[idx] >= end_page))
-				continue;
-
-			plpar_page_set_active(__pa(pa_curr->page[idx]));
-			free_page(pa_curr->page[idx]);
-			freed++;
-			loaned_pages--;
-			totalram_pages_inc();
-			pa_curr->page[idx] = pa_last->page[--pa_last->index];
-			if (pa_last->index == 0) {
-				if (pa_curr == pa_last)
-					pa_curr = pa_last->next;
-				pa_last = pa_last->next;
-				free_page((unsigned long)cmm_page_list);
-				cmm_page_list = pa_last;
-			}
-		}
-		pa_curr = pa_curr->next;
-	}
-
-	/* Search for page list structures in the range to be offlined */
-	pa_last = NULL;
-	pa_curr = cmm_page_list;
-	while (pa_curr) {
-		if (((unsigned long)pa_curr >= start_page) &&
-				((unsigned long)pa_curr < end_page)) {
-			npa = (struct cmm_page_array *)__get_free_page(
-					GFP_NOIO | __GFP_NOWARN |
-					__GFP_NORETRY | __GFP_NOMEMALLOC);
-			if (!npa) {
-				spin_unlock(&cmm_lock);
-				cmm_dbg("Failed to allocate memory for list "
-						"management. Memory hotplug "
-						"failed.\n");
-				return -ENOMEM;
-			}
-			memcpy(npa, pa_curr, PAGE_SIZE);
-			if (pa_curr == cmm_page_list)
-				cmm_page_list = npa;
-			if (pa_last)
-				pa_last->next = npa;
-			free_page((unsigned long) pa_curr);
-			freed++;
-			pa_curr = npa;
-		}
-
-		pa_last = pa_curr;
-		pa_curr = pa_curr->next;
-	}
-
-	spin_unlock(&cmm_lock);
-	cmm_dbg("Released %ld pages in the search range.\n", freed);
-
-	return 0;
-}
-
-/**
  * cmm_memory_cb - Handle memory hotplug notifier calls
  * @self:	notifier block struct
  * @action:	action to take
@@ -635,7 +482,6 @@ static int cmm_memory_cb(struct notifier_block *self,
 	case MEM_GOING_OFFLINE:
 		mutex_lock(&hotplug_mutex);
 		hotplug_occurred = 1;
-		ret = cmm_mem_going_offline(arg);
 		break;
 	case MEM_OFFLINE:
 	case MEM_CANCEL_OFFLINE:
@@ -656,6 +502,116 @@ static struct notifier_block cmm_mem_nb = {
 	.priority = CMM_MEM_HOTPLUG_PRI
 };
 
+#ifdef CONFIG_BALLOON_COMPACTION
+static struct vfsmount *balloon_mnt;
+
+static int cmm_init_fs_context(struct fs_context *fc)
+{
+	return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type balloon_fs = {
+	.name = "ppc-cmm",
+	.init_fs_context = cmm_init_fs_context,
+	.kill_sb = kill_anon_super,
+};
+
+static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
+			   struct page *newpage, struct page *page,
+			   enum migrate_mode mode)
+{
+	unsigned long flags;
+
+	/*
+	 * loan/"inflate" the newpage first.
+	 *
+	 * We might race against the cmm_thread who might discover after our
+	 * loan request that another page is to be unloaned. However, once
+	 * the cmm_thread runs again later, this error will automatically
+	 * be corrected.
+	 */
+	if (plpar_page_set_loaned(newpage)) {
+		/* Unlikely, but possible. Tell the caller not to retry now. */
+		pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
+		return -EBUSY;
+	}
+
+	/* balloon page list reference */
+	get_page(newpage);
+
+	/*
+	 * When we migrate a page to a different zone, we have to fixup the
+	 * count of both involved zones as we adjusted the managed page count
+	 * when inflating.
+	 */
+	if (page_zone(page) != page_zone(newpage)) {
+		adjust_managed_page_count(page, 1);
+		adjust_managed_page_count(newpage, -1);
+	}
+
+	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+	balloon_page_insert(b_dev_info, newpage);
+	balloon_page_delete(page);
+	b_dev_info->isolated_pages--;
+	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+
+	/*
+	 * activate/"deflate" the old page. We ignore any errors just like the
+	 * other callers.
+	 */
+	plpar_page_set_active(page);
+
+	/* balloon page list reference */
+	put_page(page);
+
+	return MIGRATEPAGE_SUCCESS;
+}
+
+static int cmm_balloon_compaction_init(void)
+{
+	int rc;
+
+	balloon_devinfo_init(&b_dev_info);
+	b_dev_info.migratepage = cmm_migratepage;
+
+	balloon_mnt = kern_mount(&balloon_fs);
+	if (IS_ERR(balloon_mnt)) {
+		rc = PTR_ERR(balloon_mnt);
+		balloon_mnt = NULL;
+		return rc;
+	}
+
+	b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
+	if (IS_ERR(b_dev_info.inode)) {
+		rc = PTR_ERR(b_dev_info.inode);
+		b_dev_info.inode = NULL;
+		kern_unmount(balloon_mnt);
+		balloon_mnt = NULL;
+		return rc;
+	}
+
+	b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
+	return 0;
+}
+static void cmm_balloon_compaction_deinit(void)
+{
+	if (b_dev_info.inode)
+		iput(b_dev_info.inode);
+	b_dev_info.inode = NULL;
+	kern_unmount(balloon_mnt);
+	balloon_mnt = NULL;
+}
+#else /* CONFIG_BALLOON_COMPACTION */
+static int cmm_balloon_compaction_init(void)
+{
+	return 0;
+}
+
+static void cmm_balloon_compaction_deinit(void)
+{
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+
 /**
  * cmm_init - Module initialization
  *
@@ -664,26 +620,31 @@ static struct notifier_block cmm_mem_nb = {
  **/
 static int cmm_init(void)
 {
-	int rc = -ENOMEM;
+	int rc;
 
-	if (!firmware_has_feature(FW_FEATURE_CMO))
+	if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
 		return -EOPNOTSUPP;
 
-	if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
+	rc = cmm_balloon_compaction_init();
+	if (rc)
 		return rc;
 
+	rc = register_oom_notifier(&cmm_oom_nb);
+	if (rc < 0)
+		goto out_balloon_compaction;
+
 	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
 		goto out_oom_notifier;
 
 	if ((rc = cmm_sysfs_register(&cmm_dev)))
 		goto out_reboot_notifier;
 
-	if (register_memory_notifier(&cmm_mem_nb) ||
-	    register_memory_isolate_notifier(&cmm_mem_isolate_nb))
+	rc = register_memory_notifier(&cmm_mem_nb);
+	if (rc)
 		goto out_unregister_notifier;
 
 	if (cmm_disabled)
-		return rc;
+		return 0;
 
 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
 	if (IS_ERR(cmm_thread_ptr)) {
@@ -691,16 +652,16 @@ static int cmm_init(void)
 		goto out_unregister_notifier;
 	}
 
-	return rc;
-
+	return 0;
 out_unregister_notifier:
 	unregister_memory_notifier(&cmm_mem_nb);
-	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
 	cmm_unregister_sysfs(&cmm_dev);
 out_reboot_notifier:
 	unregister_reboot_notifier(&cmm_reboot_nb);
 out_oom_notifier:
 	unregister_oom_notifier(&cmm_oom_nb);
+out_balloon_compaction:
+	cmm_balloon_compaction_deinit();
 	return rc;
 }
 
@@ -717,9 +678,9 @@ static void cmm_exit(void)
 	unregister_oom_notifier(&cmm_oom_nb);
 	unregister_reboot_notifier(&cmm_reboot_nb);
 	unregister_memory_notifier(&cmm_mem_nb);
-	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
-	cmm_free_pages(loaned_pages);
+	cmm_free_pages(atomic_long_read(&loaned_pages));
 	cmm_unregister_sysfs(&cmm_dev);
+	cmm_balloon_compaction_deinit();
 }
 
 /**
@@ -739,7 +700,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp)
 		if (cmm_thread_ptr)
 			kthread_stop(cmm_thread_ptr);
 		cmm_thread_ptr = NULL;
-		cmm_free_pages(loaned_pages);
+		cmm_free_pages(atomic_long_read(&loaned_pages));
 	} else if (!disable && cmm_disabled) {
 		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
 		if (IS_ERR(cmm_thread_ptr))
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 2b87480f2837..eab8aa293743 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -19,7 +19,6 @@
 
 struct dtl {
 	struct dtl_entry	*buf;
-	struct dentry		*file;
 	int			cpu;
 	int			buf_entries;
 	u64			last_idx;
@@ -320,46 +319,28 @@ static const struct file_operations dtl_fops = {
 
 static struct dentry *dtl_dir;
 
-static int dtl_setup_file(struct dtl *dtl)
+static void dtl_setup_file(struct dtl *dtl)
 {
 	char name[10];
 
 	sprintf(name, "cpu-%d", dtl->cpu);
 
-	dtl->file = debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
-	if (!dtl->file)
-		return -ENOMEM;
-
-	return 0;
+	debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
 }
 
 static int dtl_init(void)
 {
-	struct dentry *event_mask_file, *buf_entries_file;
-	int rc, i;
+	int i;
 
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 		return -ENODEV;
 
 	/* set up common debugfs structure */
 
-	rc = -ENOMEM;
 	dtl_dir = debugfs_create_dir("dtl", powerpc_debugfs_root);
-	if (!dtl_dir) {
-		printk(KERN_WARNING "%s: can't create dtl root dir\n",
-				__func__);
-		goto err;
-	}
 
-	event_mask_file = debugfs_create_x8("dtl_event_mask", 0600,
-				dtl_dir, &dtl_event_mask);
-	buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400,
-				dtl_dir, &dtl_buf_entries);
-
-	if (!event_mask_file || !buf_entries_file) {
-		printk(KERN_WARNING "%s: can't create dtl files\n", __func__);
-		goto err_remove_dir;
-	}
+	debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask);
+	debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries);
 
 	/* set up the per-cpu log structures */
 	for_each_possible_cpu(i) {
@@ -367,16 +348,9 @@ static int dtl_init(void)
 		spin_lock_init(&dtl->lock);
 		dtl->cpu = i;
 
-		rc = dtl_setup_file(dtl);
-		if (rc)
-			goto err_remove_dir;
+		dtl_setup_file(dtl);
 	}
 
 	return 0;
-
-err_remove_dir:
-	debugfs_remove_recursive(dtl_dir);
-err:
-	return rc;
 }
 machine_arch_initcall(pseries, dtl_init);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index bbda646b63b5..3e8cbfe7a80f 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -338,6 +338,62 @@ static void pseries_remove_processor(struct device_node *np)
 	cpu_maps_update_done();
 }
 
+static int dlpar_offline_cpu(struct device_node *dn)
+{
+	int rc = 0;
+	unsigned int cpu;
+	int len, nthreads, i;
+	const __be32 *intserv;
+	u32 thread;
+
+	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return -EINVAL;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		thread = be32_to_cpu(intserv[i]);
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != thread)
+				continue;
+
+			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+				break;
+
+			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
+				set_preferred_offline_state(cpu,
+							    CPU_STATE_OFFLINE);
+				cpu_maps_update_done();
+				timed_topology_update(1);
+				rc = device_offline(get_cpu_device(cpu));
+				if (rc)
+					goto out;
+				cpu_maps_update_begin();
+				break;
+			}
+
+			/*
+			 * The cpu is in CPU_STATE_INACTIVE.
+			 * Upgrade it's state to CPU_STATE_OFFLINE.
+			 */
+			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
+			WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS);
+			__cpu_die(cpu);
+			break;
+		}
+		if (cpu == num_possible_cpus()) {
+			pr_warn("Could not find cpu to offline with physical id 0x%x\n",
+				thread);
+		}
+	}
+	cpu_maps_update_done();
+
+out:
+	return rc;
+}
+
 static int dlpar_online_cpu(struct device_node *dn)
 {
 	int rc = 0;
@@ -364,8 +420,10 @@ static int dlpar_online_cpu(struct device_node *dn)
 			timed_topology_update(1);
 			find_and_online_cpu_nid(cpu);
 			rc = device_online(get_cpu_device(cpu));
-			if (rc)
+			if (rc) {
+				dlpar_offline_cpu(dn);
 				goto out;
+			}
 			cpu_maps_update_begin();
 
 			break;
@@ -407,17 +465,67 @@ static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
 	return found;
 }
 
+static bool drc_info_valid_index(struct device_node *parent, u32 drc_index)
+{
+	struct property *info;
+	struct of_drc_info drc;
+	const __be32 *value;
+	u32 index;
+	int count, i, j;
+
+	info = of_find_property(parent, "ibm,drc-info", NULL);
+	if (!info)
+		return false;
+
+	value = of_prop_next_u32(info, NULL, &count);
+
+	/* First value of ibm,drc-info is number of drc-info records */
+	if (value)
+		value++;
+	else
+		return false;
+
+	for (i = 0; i < count; i++) {
+		if (of_read_drc_info_cell(&info, &value, &drc))
+			return false;
+
+		if (strncmp(drc.drc_type, "CPU", 3))
+			break;
+
+		if (drc_index > drc.last_drc_index)
+			continue;
+
+		index = drc.drc_index_start;
+		for (j = 0; j < drc.num_sequential_elems; j++) {
+			if (drc_index == index)
+				return true;
+
+			index += drc.sequential_inc;
+		}
+	}
+
+	return false;
+}
+
 static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
 {
 	bool found = false;
 	int rc, index;
 
-	index = 0;
+	if (of_find_property(parent, "ibm,drc-info", NULL))
+		return drc_info_valid_index(parent, drc_index);
+
+	/* Note that the format of the ibm,drc-indexes array is
+	 * the number of entries in the array followed by the array
+	 * of drc values so we start looking at index = 1.
+	 */
+	index = 1;
 	while (!found) {
 		u32 drc;
 
 		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
 						index++, &drc);
+
 		if (rc)
 			break;
 
@@ -505,63 +613,6 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 	return rc;
 }
 
-static int dlpar_offline_cpu(struct device_node *dn)
-{
-	int rc = 0;
-	unsigned int cpu;
-	int len, nthreads, i;
-	const __be32 *intserv;
-	u32 thread;
-
-	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
-	if (!intserv)
-		return -EINVAL;
-
-	nthreads = len / sizeof(u32);
-
-	cpu_maps_update_begin();
-	for (i = 0; i < nthreads; i++) {
-		thread = be32_to_cpu(intserv[i]);
-		for_each_present_cpu(cpu) {
-			if (get_hard_smp_processor_id(cpu) != thread)
-				continue;
-
-			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
-				break;
-
-			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
-				set_preferred_offline_state(cpu,
-							    CPU_STATE_OFFLINE);
-				cpu_maps_update_done();
-				timed_topology_update(1);
-				rc = device_offline(get_cpu_device(cpu));
-				if (rc)
-					goto out;
-				cpu_maps_update_begin();
-				break;
-
-			}
-
-			/*
-			 * The cpu is in CPU_STATE_INACTIVE.
-			 * Upgrade it's state to CPU_STATE_OFFLINE.
-			 */
-			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
-			BUG_ON(plpar_hcall_norets(H_PROD, thread)
-								!= H_SUCCESS);
-			__cpu_die(cpu);
-			break;
-		}
-		if (cpu == num_possible_cpus())
-			printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread);
-	}
-	cpu_maps_update_done();
-
-out:
-	return rc;
-
-}
-
 static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
 {
 	int rc;
@@ -717,19 +768,52 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
 	return rc;
 }
 
-static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
+static int find_drc_info_cpus_to_add(struct device_node *cpus,
+				     struct property *info,
+				     u32 *cpu_drcs, u32 cpus_to_add)
 {
-	struct device_node *parent;
+	struct of_drc_info drc;
+	const __be32 *value;
+	u32 count, drc_index;
 	int cpus_found = 0;
-	int index, rc;
+	int i, j;
 
-	parent = of_find_node_by_path("/cpus");
-	if (!parent) {
-		pr_warn("Could not find CPU root node in device tree\n");
-		kfree(cpu_drcs);
+	if (!info)
 		return -1;
+
+	value = of_prop_next_u32(info, NULL, &count);
+	if (value)
+		value++;
+
+	for (i = 0; i < count; i++) {
+		of_read_drc_info_cell(&info, &value, &drc);
+		if (strncmp(drc.drc_type, "CPU", 3))
+			break;
+
+		drc_index = drc.drc_index_start;
+		for (j = 0; j < drc.num_sequential_elems; j++) {
+			if (dlpar_cpu_exists(cpus, drc_index))
+				continue;
+
+			cpu_drcs[cpus_found++] = drc_index;
+
+			if (cpus_found == cpus_to_add)
+				return cpus_found;
+
+			drc_index += drc.sequential_inc;
+		}
 	}
 
+	return cpus_found;
+}
+
+static int find_drc_index_cpus_to_add(struct device_node *cpus,
+				      u32 *cpu_drcs, u32 cpus_to_add)
+{
+	int cpus_found = 0;
+	int index, rc;
+	u32 drc_index;
+
 	/* Search the ibm,drc-indexes array for possible CPU drcs to
 	 * add. Note that the format of the ibm,drc-indexes array is
 	 * the number of entries in the array followed by the array
@@ -737,25 +821,25 @@ static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
 	 */
 	index = 1;
 	while (cpus_found < cpus_to_add) {
-		u32 drc;
+		rc = of_property_read_u32_index(cpus, "ibm,drc-indexes",
+						index++, &drc_index);
 
-		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
-						index++, &drc);
 		if (rc)
 			break;
 
-		if (dlpar_cpu_exists(parent, drc))
+		if (dlpar_cpu_exists(cpus, drc_index))
 			continue;
 
-		cpu_drcs[cpus_found++] = drc;
+		cpu_drcs[cpus_found++] = drc_index;
 	}
 
-	of_node_put(parent);
 	return cpus_found;
 }
 
 static int dlpar_cpu_add_by_count(u32 cpus_to_add)
 {
+	struct device_node *parent;
+	struct property *info;
 	u32 *cpu_drcs;
 	int cpus_added = 0;
 	int cpus_found;
@@ -767,7 +851,21 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
 	if (!cpu_drcs)
 		return -EINVAL;
 
-	cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add);
+	parent = of_find_node_by_path("/cpus");
+	if (!parent) {
+		pr_warn("Could not find CPU root node in device tree\n");
+		kfree(cpu_drcs);
+		return -1;
+	}
+
+	info = of_find_property(parent, "ibm,drc-info", NULL);
+	if (info)
+		cpus_found = find_drc_info_cpus_to_add(parent, info, cpu_drcs, cpus_to_add);
+	else
+		cpus_found = find_drc_index_cpus_to_add(parent, cpu_drcs, cpus_to_add);
+
+	of_node_put(parent);
+
 	if (cpus_found < cpus_to_add) {
 		pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
 			cpus_found, cpus_to_add);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 8e700390f3d6..c126b94d1943 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -338,7 +338,7 @@ static int pseries_remove_mem_node(struct device_node *np)
 static bool lmb_is_removable(struct drmem_lmb *lmb)
 {
 	int i, scns_per_block;
-	int rc = 1;
+	bool rc = true;
 	unsigned long pfn, block_sz;
 	u64 phys_addr;
 
@@ -363,11 +363,11 @@ static bool lmb_is_removable(struct drmem_lmb *lmb)
 		if (!pfn_present(pfn))
 			continue;
 
-		rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+		rc = rc && is_mem_section_removable(pfn, PAGES_PER_SECTION);
 		phys_addr += MIN_MEMORY_BLOCK_SIZE;
 	}
 
-	return rc ? true : false;
+	return rc;
 }
 
 static int dlpar_add_lmb(struct drmem_lmb *);
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index bcc1b67417a8..c40c62ec432e 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -129,7 +129,6 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, long retval,
 static int __init hcall_inst_init(void)
 {
 	struct dentry *hcall_root;
-	struct dentry *hcall_file;
 	char cpu_name_buf[CPU_NAME_BUF_SIZE];
 	int cpu;
 
@@ -145,17 +144,12 @@ static int __init hcall_inst_init(void)
 	}
 
 	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
-	if (!hcall_root)
-		return -ENOMEM;
 
 	for_each_possible_cpu(cpu) {
 		snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
-		hcall_file = debugfs_create_file(cpu_name_buf, 0444,
-						 hcall_root,
-						 per_cpu(hcall_stats, cpu),
-						 &hcall_inst_seq_fops);
-		if (!hcall_file)
-			return -ENOMEM;
+		debugfs_create_file(cpu_name_buf, 0444, hcall_root,
+				    per_cpu(hcall_stats, cpu),
+				    &hcall_inst_seq_fops);
 	}
 
 	return 0;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f87a5c64e24d..60cb29ae4739 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -774,7 +774,7 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
 
 		/* don't remove a bolted entry */
 		lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
-					   (0x1UL << 4), &dummy1, &dummy2);
+					   HPTE_V_BOLTED, &dummy1, &dummy2);
 		if (lpar_rc == H_SUCCESS)
 			return i;
 
@@ -938,11 +938,19 @@ static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
 	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
 	want_v = hpte_encode_avpn(vpn, psize, ssize);
 
-	/* Bolted entries are always in the primary group */
+	/*
+	 * We try to keep bolted entries always in primary hash
+	 * But in some case we can find them in secondary too.
+	 */
 	hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 	slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
-	if (slot < 0)
-		return -1;
+	if (slot < 0) {
+		/* Try in secondary */
+		hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
+		if (slot < 0)
+			return -1;
+	}
 	return hpte_group + slot;
 }
 
@@ -1992,30 +2000,17 @@ static int __init vpa_debugfs_init(void)
 {
 	char name[16];
 	long i;
-	static struct dentry *vpa_dir;
+	struct dentry *vpa_dir;
 
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 		return 0;
 
 	vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
-	if (!vpa_dir) {
-		pr_warn("%s: can't create vpa root dir\n", __func__);
-		return -ENOMEM;
-	}
 
 	/* set up the per-cpu vpa file*/
 	for_each_possible_cpu(i) {
-		struct dentry *d;
-
 		sprintf(name, "cpu-%ld", i);
-
-		d = debugfs_create_file(name, 0400, vpa_dir, (void *)i,
-					&vpa_fops);
-		if (!d) {
-			pr_warn("%s: can't create per-cpu vpa file\n",
-					__func__);
-			return -ENOMEM;
-		}
+		debugfs_create_file(name, 0400, vpa_dir, (void *)i, &vpa_fops);
 	}
 
 	return 0;
diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c
index 6df192f38f80..66dfd8256712 100644
--- a/arch/powerpc/platforms/pseries/of_helpers.c
+++ b/arch/powerpc/platforms/pseries/of_helpers.c
@@ -45,14 +45,14 @@ struct device_node *pseries_of_derive_parent(const char *path)
 int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
 			struct of_drc_info *data)
 {
-	const char *p;
+	const char *p = (char *)(*curval);
 	const __be32 *p2;
 
 	if (!data)
 		return -EINVAL;
 
 	/* Get drc-type:encode-string */
-	p = data->drc_type = (char*) (*curval);
+	data->drc_type = (char *)p;
 	p = of_prop_next_string(*prop, p);
 	if (!p)
 		return -EINVAL;
@@ -65,9 +65,7 @@ int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
 
 	/* Get drc-index-start:encode-int */
 	p2 = (const __be32 *)p;
-	p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start);
-	if (!p2)
-		return -EINVAL;
+	data->drc_index_start = be32_to_cpu(*p2);
 
 	/* Get drc-name-suffix-start:encode-int */
 	p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start);
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 61883291defc..c2ef320ba1bf 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -152,7 +152,7 @@ static int papr_scm_meta_get(struct papr_scm_priv *p,
 	int len, read;
 	int64_t ret;
 
-	if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
+	if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
 		return -EINVAL;
 
 	for (len = hdr->in_length; len; len -= read) {
@@ -206,7 +206,7 @@ static int papr_scm_meta_set(struct papr_scm_priv *p,
 	__be64 data_be;
 	int64_t ret;
 
-	if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
+	if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
 		return -EINVAL;
 
 	for (len = hdr->in_length; len; len -= wrote) {
@@ -284,25 +284,6 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	return 0;
 }
 
-static const struct attribute_group *region_attr_groups[] = {
-	&nd_region_attribute_group,
-	&nd_device_attribute_group,
-	&nd_mapping_attribute_group,
-	&nd_numa_attribute_group,
-	NULL,
-};
-
-static const struct attribute_group *bus_attr_groups[] = {
-	&nvdimm_bus_attribute_group,
-	NULL,
-};
-
-static const struct attribute_group *papr_scm_dimm_groups[] = {
-	&nvdimm_attribute_group,
-	&nd_device_attribute_group,
-	NULL,
-};
-
 static inline int papr_scm_node(int node)
 {
 	int min_dist = INT_MAX, dist;
@@ -333,7 +314,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	p->bus_desc.ndctl = papr_scm_ndctl;
 	p->bus_desc.module = THIS_MODULE;
 	p->bus_desc.of_node = p->pdev->dev.of_node;
-	p->bus_desc.attr_groups = bus_attr_groups;
 	p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
 
 	if (!p->bus_desc.provider_name)
@@ -348,8 +328,8 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	dimm_flags = 0;
 	set_bit(NDD_ALIASING, &dimm_flags);
 
-	p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
-				dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
+	p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags,
+				  PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
 	if (!p->nvdimm) {
 		dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
 		goto err;
@@ -366,7 +346,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	mapping.size = p->blocks * p->block_size; // XXX: potential overflow?
 
 	memset(&ndr_desc, 0, sizeof(ndr_desc));
-	ndr_desc.attr_groups = region_attr_groups;
 	target_nid = dev_to_node(&p->pdev->dev);
 	online_nid = papr_scm_node(target_nid);
 	ndr_desc.numa_node = online_nid;
@@ -513,7 +492,6 @@ static struct platform_driver papr_scm_driver = {
 	.remove = papr_scm_remove,
 	.driver = {
 		.name = "papr_scm",
-		.owner = THIS_MODULE,
 		.of_match_table = papr_scm_match,
 	},
 };
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 561917fa54a8..361986e4354e 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * PCI Dynamic LPAR, PCI Hot Plug and PCI EEH recovery code
  * for RPA-compliant PPC64 platform.
@@ -6,23 +7,6 @@
  *
  * Updates, 2005, John Rose <johnrose@austin.ibm.com>
  * Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/pci.h>
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index a96874f9492f..09e98d301db0 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -36,6 +36,7 @@ static int sysfs_entries;
 static u32 cpu_to_drc_index(int cpu)
 {
 	struct device_node *dn = NULL;
+	struct property *info;
 	int thread_index;
 	int rc = 1;
 	u32 ret = 0;
@@ -47,20 +48,18 @@ static u32 cpu_to_drc_index(int cpu)
 	/* Convert logical cpu number to core number */
 	thread_index = cpu_core_index_of_thread(cpu);
 
-	if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
-		struct property *info = NULL;
+	info = of_find_property(dn, "ibm,drc-info", NULL);
+	if (info) {
 		struct of_drc_info drc;
 		int j;
 		u32 num_set_entries;
 		const __be32 *value;
 
-		info = of_find_property(dn, "ibm,drc-info", NULL);
-		if (info == NULL)
-			goto err_of_node_put;
-
 		value = of_prop_next_u32(info, NULL, &num_set_entries);
 		if (!value)
 			goto err_of_node_put;
+		else
+			value++;
 
 		for (j = 0; j < num_set_entries; j++) {
 
@@ -110,6 +109,7 @@ err:
 static int drc_index_to_cpu(u32 drc_index)
 {
 	struct device_node *dn = NULL;
+	struct property *info;
 	const int *indexes;
 	int thread_index = 0, cpu = 0;
 	int rc = 1;
@@ -117,21 +117,18 @@ static int drc_index_to_cpu(u32 drc_index)
 	dn = of_find_node_by_path("/cpus");
 	if (dn == NULL)
 		goto err;
-
-	if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
-		struct property *info = NULL;
+	info = of_find_property(dn, "ibm,drc-info", NULL);
+	if (info) {
 		struct of_drc_info drc;
 		int j;
 		u32 num_set_entries;
 		const __be32 *value;
 
-		info = of_find_property(dn, "ibm,drc-info", NULL);
-		if (info == NULL)
-			goto err_of_node_put;
-
 		value = of_prop_next_u32(info, NULL, &num_set_entries);
 		if (!value)
 			goto err_of_node_put;
+		else
+			value++;
 
 		for (j = 0; j < num_set_entries; j++) {
 
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 3acdcc3bb908..1d7f973c647b 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -255,7 +255,7 @@ static void rtas_parse_epow_errlog(struct rtas_error_log *log)
 		break;
 
 	case EPOW_SYSTEM_SHUTDOWN:
-		handle_system_shutdown(epow_log->event_modifier);
+		handle_system_shutdown(modifier);
 		break;
 
 	case EPOW_SYSTEM_HALT:
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 0a40201f315f..0c8421dd01ab 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -74,6 +74,9 @@
 #include "pseries.h"
 #include "../../../../drivers/pci/pci.h"
 
+DEFINE_STATIC_KEY_FALSE(shared_processor);
+EXPORT_SYMBOL_GPL(shared_processor);
+
 int CMO_PrPSP = -1;
 int CMO_SecPSP = -1;
 unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
@@ -758,6 +761,10 @@ static void __init pSeries_setup_arch(void)
 
 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
 		vpa_init(boot_cpuid);
+
+		if (lppaca_shared_proc(get_lppaca()))
+			static_branch_enable(&shared_processor);
+
 		ppc_md.power_save = pseries_lpar_idle;
 		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
 #ifdef CONFIG_PCI_IOV
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 603b3c656d19..cb5a5bd2cef5 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -24,7 +24,6 @@ obj-$(CONFIG_FSL_CORENET_RCPM)	+= fsl_rcpm.o
 obj-$(CONFIG_FSL_LBC)		+= fsl_lbc.o
 obj-$(CONFIG_FSL_GTM)		+= fsl_gtm.o
 obj-$(CONFIG_FSL_85XX_CACHE_SRAM)	+= fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o
-obj-$(CONFIG_SIMPLE_GPIO)	+= simple_gpio.o
 obj-$(CONFIG_FSL_RIO)		+= fsl_rio.o fsl_rmu.o
 obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc_cmos_setup.o
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index ff0e2b156cb5..617a443d673d 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -115,8 +115,8 @@ static void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
 {
 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
 
-	pdev->dev.bus_dma_mask =
-		hose->dma_window_base_cur + hose->dma_window_size;
+	pdev->dev.bus_dma_limit =
+		hose->dma_window_base_cur + hose->dma_window_size - 1;
 }
 
 static void setup_swiotlb_ops(struct pci_controller *hose)
@@ -135,7 +135,7 @@ static void fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
 	 * mapping that allows addressing any RAM address from across PCI.
 	 */
 	if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) {
-		dev->bus_dma_mask = 0;
+		dev->bus_dma_limit = 0;
 		dev->archdata.dma_offset = pci64_dma_offset;
 	}
 }
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
deleted file mode 100644
index dc1740cd9e42..000000000000
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Simple Memory-Mapped GPIOs
- *
- * Copyright (c) MontaVista Software, Inc. 2008.
- *
- * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
-#include <linux/gpio/driver.h>
-#include <linux/slab.h>
-#include <asm/prom.h>
-#include "simple_gpio.h"
-
-struct u8_gpio_chip {
-	struct of_mm_gpio_chip mm_gc;
-	spinlock_t lock;
-
-	/* shadowed data register to clear/set bits safely */
-	u8 data;
-};
-
-static u8 u8_pin2mask(unsigned int pin)
-{
-	return 1 << (8 - 1 - pin);
-}
-
-static int u8_gpio_get(struct gpio_chip *gc, unsigned int gpio)
-{
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-
-	return !!(in_8(mm_gc->regs) & u8_pin2mask(gpio));
-}
-
-static void u8_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
-{
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct u8_gpio_chip *u8_gc = gpiochip_get_data(gc);
-	unsigned long flags;
-
-	spin_lock_irqsave(&u8_gc->lock, flags);
-
-	if (val)
-		u8_gc->data |= u8_pin2mask(gpio);
-	else
-		u8_gc->data &= ~u8_pin2mask(gpio);
-
-	out_8(mm_gc->regs, u8_gc->data);
-
-	spin_unlock_irqrestore(&u8_gc->lock, flags);
-}
-
-static int u8_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
-{
-	return 0;
-}
-
-static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
-{
-	u8_gpio_set(gc, gpio, val);
-	return 0;
-}
-
-static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
-{
-	struct u8_gpio_chip *u8_gc =
-		container_of(mm_gc, struct u8_gpio_chip, mm_gc);
-
-	u8_gc->data = in_8(mm_gc->regs);
-}
-
-static int __init u8_simple_gpiochip_add(struct device_node *np)
-{
-	int ret;
-	struct u8_gpio_chip *u8_gc;
-	struct of_mm_gpio_chip *mm_gc;
-	struct gpio_chip *gc;
-
-	u8_gc = kzalloc(sizeof(*u8_gc), GFP_KERNEL);
-	if (!u8_gc)
-		return -ENOMEM;
-
-	spin_lock_init(&u8_gc->lock);
-
-	mm_gc = &u8_gc->mm_gc;
-	gc = &mm_gc->gc;
-
-	mm_gc->save_regs = u8_gpio_save_regs;
-	gc->ngpio = 8;
-	gc->direction_input = u8_gpio_dir_in;
-	gc->direction_output = u8_gpio_dir_out;
-	gc->get = u8_gpio_get;
-	gc->set = u8_gpio_set;
-
-	ret = of_mm_gpiochip_add_data(np, mm_gc, u8_gc);
-	if (ret)
-		goto err;
-	return 0;
-err:
-	kfree(u8_gc);
-	return ret;
-}
-
-void __init simple_gpiochip_init(const char *compatible)
-{
-	struct device_node *np;
-
-	for_each_compatible_node(np, NULL, compatible) {
-		int ret;
-		struct resource r;
-
-		ret = of_address_to_resource(np, 0, &r);
-		if (ret)
-			goto err;
-
-		switch (resource_size(&r)) {
-		case 1:
-			ret = u8_simple_gpiochip_add(np);
-			if (ret)
-				goto err;
-			break;
-		default:
-			/*
-			 * Whenever you need support for GPIO bank width > 1,
-			 * please just turn u8_ code into huge macros, and
-			 * construct needed uX_ code with it.
-			 */
-			ret = -ENOSYS;
-			goto err;
-		}
-		continue;
-err:
-		pr_err("%pOF: registration failed, status %d\n", np, ret);
-	}
-}
diff --git a/arch/powerpc/sysdev/simple_gpio.h b/arch/powerpc/sysdev/simple_gpio.h
deleted file mode 100644
index f3f3a20d39e2..000000000000
--- a/arch/powerpc/sysdev/simple_gpio.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __SYSDEV_SIMPLE_GPIO_H
-#define __SYSDEV_SIMPLE_GPIO_H
-
-#include <linux/errno.h>
-
-#ifdef CONFIG_SIMPLE_GPIO
-extern void simple_gpiochip_init(const char *compatible);
-#else
-static inline void simple_gpiochip_init(const char *compatible) {}
-#endif /* CONFIG_SIMPLE_GPIO */
-
-#endif /* __SYSDEV_SIMPLE_GPIO_H */
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index df832b09e3e9..9651ca061828 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -972,12 +972,21 @@ static int xive_get_irqchip_state(struct irq_data *data,
 				  enum irqchip_irq_state which, bool *state)
 {
 	struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
+	u8 pq;
 
 	switch (which) {
 	case IRQCHIP_STATE_ACTIVE:
-		*state = !xd->stale_p &&
-			 (xd->saved_p ||
-			  !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P));
+		pq = xive_esb_read(xd, XIVE_ESB_GET);
+
+		/*
+		 * The esb value being all 1's means we couldn't get
+		 * the PQ state of the interrupt through mmio. It may
+		 * happen, for example when querying a PHB interrupt
+		 * while the PHB is in an error state. We consider the
+		 * interrupt to be inactive in that case.
+		 */
+		*state = (pq != XIVE_ESB_INVALID) && !xd->stale_p &&
+			(xd->saved_p || !!(pq & XIVE_ESB_VAL_P));
 		return 0;
 	default:
 		return -EINVAL;
@@ -1035,6 +1044,15 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
 	xd->target = XIVE_INVALID_TARGET;
 	irq_set_handler_data(virq, xd);
 
+	/*
+	 * Turn OFF by default the interrupt being mapped. A side
+	 * effect of this check is the mapping the ESB page of the
+	 * interrupt in the Linux address space. This prevents page
+	 * fault issues in the crash handler which masks all
+	 * interrupts.
+	 */
+	xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
+
 	return 0;
 }
 
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index 33c10749edec..55dc61cb4867 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -392,20 +392,28 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 	data->esb_shift = esb_shift;
 	data->trig_page = trig_page;
 
+	data->hw_irq = hw_irq;
+
 	/*
 	 * No chip-id for the sPAPR backend. This has an impact how we
 	 * pick a target. See xive_pick_irq_target().
 	 */
 	data->src_chip = XIVE_INVALID_CHIP_ID;
 
+	/*
+	 * When the H_INT_ESB flag is set, the H_INT_ESB hcall should
+	 * be used for interrupt management. Skip the remapping of the
+	 * ESB pages which are not available.
+	 */
+	if (data->flags & XIVE_IRQ_FLAG_H_INT_ESB)
+		return 0;
+
 	data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
 	if (!data->eoi_mmio) {
 		pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
 		return -ENOMEM;
 	}
 
-	data->hw_irq = hw_irq;
-
 	/* Full function page supports trigger */
 	if (flags & XIVE_SRC_TRIGGER) {
 		data->trig_mmio = data->eoi_mmio;
diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh
index 2b4e959caa36..7b9fe0a567cf 100755
--- a/arch/powerpc/tools/relocs_check.sh
+++ b/arch/powerpc/tools/relocs_check.sh
@@ -20,7 +20,7 @@ objdump="$1"
 vmlinux="$2"
 
 bad_relocs=$(
-"$objdump" -R "$vmlinux" |
+$objdump -R "$vmlinux" |
 	# Only look at relocation lines.
 	grep -E '\<R_' |
 	# These relocations are okay
diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 1e972df3107e..77114755dc6f 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -18,14 +18,14 @@ vmlinux="$2"
 #__end_interrupts should be located within the first 64K
 
 end_intr=0x$(
-"$objdump" -R "$vmlinux" -d --start-address=0xc000000000000000		\
+$objdump -R "$vmlinux" -d --start-address=0xc000000000000000           \
 		 --stop-address=0xc000000000010000 |
 grep '\<__end_interrupts>:' |
 awk '{print $1}'
 )
 
 BRANCHES=$(
-"$objdump" -R "$vmlinux" -D --start-address=0xc000000000000000		\
+$objdump -R "$vmlinux" -D --start-address=0xc000000000000000           \
 		--stop-address=${end_intr} |
 grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" |
 grep -v '\<__start_initialization_multiplatform>' |
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index f142570ad860..c3842dbeb1b7 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for xmon
 
-# Disable clang warning for using setjmp without setjmp.h header
-subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header)
+# Avoid clang warnings around longjmp/setjmp declarations
+subdir-ccflags-y := -ffreestanding
 
 GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index d83364ebc5c5..a7056049709e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -25,6 +25,7 @@
 #include <linux/nmi.h>
 #include <linux/ctype.h>
 #include <linux/highmem.h>
+#include <linux/security.h>
 
 #include <asm/debugfs.h>
 #include <asm/ptrace.h>
@@ -187,6 +188,8 @@ static void dump_tlb_44x(void);
 static void dump_tlb_book3e(void);
 #endif
 
+static void clear_all_bpt(void);
+
 #ifdef CONFIG_PPC64
 #define REG		"%.16lx"
 #else
@@ -283,10 +286,38 @@ Commands:\n\
 "  U	show uptime information\n"
 "  ?	help\n"
 "  # n	limit output to n lines per page (for dp, dpa, dl)\n"
-"  zr	reboot\n\
-  zh	halt\n"
+"  zr	reboot\n"
+"  zh	halt\n"
 ;
 
+#ifdef CONFIG_SECURITY
+static bool xmon_is_locked_down(void)
+{
+	static bool lockdown;
+
+	if (!lockdown) {
+		lockdown = !!security_locked_down(LOCKDOWN_XMON_RW);
+		if (lockdown) {
+			printf("xmon: Disabled due to kernel lockdown\n");
+			xmon_is_ro = true;
+		}
+	}
+
+	if (!xmon_is_ro) {
+		xmon_is_ro = !!security_locked_down(LOCKDOWN_XMON_WR);
+		if (xmon_is_ro)
+			printf("xmon: Read-only due to kernel lockdown\n");
+	}
+
+	return lockdown;
+}
+#else /* CONFIG_SECURITY */
+static inline bool xmon_is_locked_down(void)
+{
+	return false;
+}
+#endif
+
 static struct pt_regs *xmon_regs;
 
 static inline void sync(void)
@@ -438,7 +469,10 @@ static bool wait_for_other_cpus(int ncpus)
 
 	return false;
 }
-#endif /* CONFIG_SMP */
+#else /* CONFIG_SMP */
+static inline void get_output_lock(void) {}
+static inline void release_output_lock(void) {}
+#endif
 
 static inline int unrecoverable_excp(struct pt_regs *regs)
 {
@@ -455,6 +489,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 	int cmd = 0;
 	struct bpt *bp;
 	long recurse_jmp[JMP_BUF_LEN];
+	bool locked_down;
 	unsigned long offset;
 	unsigned long flags;
 #ifdef CONFIG_SMP
@@ -465,6 +500,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 	local_irq_save(flags);
 	hard_irq_disable();
 
+	locked_down = xmon_is_locked_down();
+
 	if (!fromipi) {
 		tracing_enabled = tracing_is_on();
 		tracing_off();
@@ -518,7 +555,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 
 	if (!fromipi) {
 		get_output_lock();
-		excprint(regs);
+		if (!locked_down)
+			excprint(regs);
 		if (bp) {
 			printf("cpu 0x%x stopped at breakpoint 0x%tx (",
 			       cpu, BP_NUM(bp));
@@ -570,10 +608,14 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		}
 		remove_bpts();
 		disable_surveillance();
-		/* for breakpoint or single step, print the current instr. */
-		if (bp || TRAP(regs) == 0xd00)
-			ppc_inst_dump(regs->nip, 1, 0);
-		printf("enter ? for help\n");
+
+		if (!locked_down) {
+			/* for breakpoint or single step, print curr insn */
+			if (bp || TRAP(regs) == 0xd00)
+				ppc_inst_dump(regs->nip, 1, 0);
+			printf("enter ? for help\n");
+		}
+
 		mb();
 		xmon_gate = 1;
 		barrier();
@@ -597,8 +639,9 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 			spin_cpu_relax();
 			touch_nmi_watchdog();
 		} else {
-			cmd = cmds(regs);
-			if (cmd != 0) {
+			if (!locked_down)
+				cmd = cmds(regs);
+			if (locked_down || cmd != 0) {
 				/* exiting xmon */
 				insert_bpts();
 				xmon_gate = 0;
@@ -635,13 +678,16 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 			       "can't continue\n");
 		remove_bpts();
 		disable_surveillance();
-		/* for breakpoint or single step, print the current instr. */
-		if (bp || TRAP(regs) == 0xd00)
-			ppc_inst_dump(regs->nip, 1, 0);
-		printf("enter ? for help\n");
+		if (!locked_down) {
+			/* for breakpoint or single step, print current insn */
+			if (bp || TRAP(regs) == 0xd00)
+				ppc_inst_dump(regs->nip, 1, 0);
+			printf("enter ? for help\n");
+		}
 	}
 
-	cmd = cmds(regs);
+	if (!locked_down)
+		cmd = cmds(regs);
 
 	insert_bpts();
 	in_xmon = 0;
@@ -670,7 +716,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		}
 	}
 #endif
-	insert_cpu_bpts();
+	if (locked_down)
+		clear_all_bpt();
+	else
+		insert_cpu_bpts();
 
 	touch_nmi_watchdog();
 	local_irq_restore(flags);
@@ -884,7 +933,7 @@ static void insert_cpu_bpts(void)
 	if (dabr.enabled) {
 		brk.address = dabr.address;
 		brk.type = (dabr.enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
-		brk.len = 8;
+		brk.len = DABR_MAX_LEN;
 		__set_breakpoint(&brk);
 	}
 
@@ -1047,10 +1096,6 @@ cmds(struct pt_regs *excp)
 			set_lpp_cmd();
 			break;
 		case 'b':
-			if (xmon_is_ro) {
-				printf(xmon_ro_msg);
-				break;
-			}
 			bpt_cmds();
 			break;
 		case 'C':
@@ -1319,11 +1364,16 @@ bpt_cmds(void)
 	struct bpt *bp;
 
 	cmd = inchar();
+
 	switch (cmd) {
 #ifndef CONFIG_PPC_8xx
 	static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
 	int mode;
 	case 'd':	/* bd - hardware data breakpoint */
+		if (xmon_is_ro) {
+			printf(xmon_ro_msg);
+			break;
+		}
 		if (!ppc_breakpoint_available()) {
 			printf("Hardware data breakpoint not supported on this cpu\n");
 			break;
@@ -1351,6 +1401,10 @@ bpt_cmds(void)
 		break;
 
 	case 'i':	/* bi - hardware instr breakpoint */
+		if (xmon_is_ro) {
+			printf(xmon_ro_msg);
+			break;
+		}
 		if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
 			printf("Hardware instruction breakpoint "
 			       "not supported on this cpu\n");
@@ -1409,7 +1463,8 @@ bpt_cmds(void)
 			break;
 		}
 		termch = cmd;
-		if (!scanhex(&a)) {
+
+		if (xmon_is_ro || !scanhex(&a)) {
 			/* print all breakpoints */
 			printf("   type            address\n");
 			if (dabr.enabled) {
@@ -3762,6 +3817,11 @@ static void xmon_init(int enable)
 #ifdef CONFIG_MAGIC_SYSRQ
 static void sysrq_handle_xmon(int key)
 {
+	if (xmon_is_locked_down()) {
+		clear_all_bpt();
+		xmon_init(0);
+		return;
+	}
 	/* ensure xmon is enabled */
 	xmon_init(1);
 	debugger(get_irq_regs());
@@ -3783,7 +3843,6 @@ static int __init setup_xmon_sysrq(void)
 device_initcall(setup_xmon_sysrq);
 #endif /* CONFIG_MAGIC_SYSRQ */
 
-#ifdef CONFIG_DEBUG_FS
 static void clear_all_bpt(void)
 {
 	int i;
@@ -3801,18 +3860,22 @@ static void clear_all_bpt(void)
 		iabr = NULL;
 		dabr.enabled = 0;
 	}
-
-	printf("xmon: All breakpoints cleared\n");
 }
 
+#ifdef CONFIG_DEBUG_FS
 static int xmon_dbgfs_set(void *data, u64 val)
 {
 	xmon_on = !!val;
 	xmon_init(xmon_on);
 
 	/* make sure all breakpoints removed when disabling */
-	if (!xmon_on)
+	if (!xmon_on) {
 		clear_all_bpt();
+		get_output_lock();
+		printf("xmon: All breakpoints cleared\n");
+		release_output_lock();
+	}
+
 	return 0;
 }
 
@@ -3838,7 +3901,11 @@ static int xmon_early __initdata;
 
 static int __init early_parse_xmon(char *p)
 {
-	if (!p || strncmp(p, "early", 5) == 0) {
+	if (xmon_is_locked_down()) {
+		xmon_init(0);
+		xmon_early = 0;
+		xmon_on = 0;
+	} else if (!p || strncmp(p, "early", 5) == 0) {
 		/* just "xmon" is equivalent to "xmon=early" */
 		xmon_init(1);
 		xmon_early = 1;
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 8eebbc8860bb..fa7dc03459e7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -26,14 +26,16 @@ config RISCV
 	select GENERIC_IRQ_SHOW
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SCHED_CLOCK
-	select GENERIC_STRNCPY_FROM_USER
-	select GENERIC_STRNLEN_USER
+	select GENERIC_STRNCPY_FROM_USER if MMU
+	select GENERIC_STRNLEN_USER if MMU
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_ATOMIC64 if !64BIT
+	select GENERIC_IOREMAP
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ASM_MODVERSIONS
 	select HAVE_MEMBLOCK_NODE_MAP
-	select HAVE_DMA_CONTIGUOUS
+	select HAVE_DMA_CONTIGUOUS if MMU
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
@@ -50,6 +52,7 @@ config RISCV
 	select PCI_DOMAINS_GENERIC if PCI
 	select PCI_MSI if PCI
 	select RISCV_TIMER
+	select UACCESS_MEMCPY if !MMU
 	select GENERIC_IRQ_MULTI_HANDLER
 	select GENERIC_ARCH_TOPOLOGY if SMP
 	select ARCH_HAS_PTE_SPECIAL
@@ -60,7 +63,9 @@ config RISCV
 	select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
 	select SPARSEMEM_STATIC if 32BIT
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
-	select HAVE_ARCH_MMAP_RND_BITS
+	select HAVE_ARCH_MMAP_RND_BITS if MMU
+	select ARCH_HAS_GCOV_PROFILE_ALL
+	select HAVE_COPY_THREAD_TLS
 
 config ARCH_MMAP_RND_BITS_MIN
 	default 18 if 64BIT
@@ -72,8 +77,23 @@ config ARCH_MMAP_RND_BITS_MAX
 	default 24 if 64BIT # SV39 based
 	default 17
 
+# set if we run in machine mode, cleared if we run in supervisor mode
+config RISCV_M_MODE
+	bool
+	default !MMU
+
+# set if we are running in S-mode and can use SBI calls
+config RISCV_SBI
+	bool
+	depends on !RISCV_M_MODE
+	default y
+
 config MMU
-	def_bool y
+	bool "MMU-based Paged Memory Management Support"
+	default y
+	help
+	  Select if you want MMU-based virtualised addressing space
+	  support by paged memory management. If unsure, say 'Y'.
 
 config ZONE_DMA32
 	bool
@@ -92,6 +112,7 @@ config PA_BITS
 config PAGE_OFFSET
 	hex
 	default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
+	default 0x80000000 if 64BIT && !MMU
 	default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
 	default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
 
@@ -135,7 +156,7 @@ config GENERIC_HWEIGHT
 	def_bool y
 
 config FIX_EARLYCON_MEM
-	def_bool y
+	def_bool MMU
 
 config PGTABLE_LEVELS
 	int
@@ -160,17 +181,18 @@ config ARCH_RV32I
 	select GENERIC_LIB_ASHRDI3
 	select GENERIC_LIB_LSHRDI3
 	select GENERIC_LIB_UCMPDI2
+	select MMU
 
 config ARCH_RV64I
 	bool "RV64I"
 	select 64BIT
-	select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000
+	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FTRACE_MCOUNT_RECORD
-	select HAVE_DYNAMIC_FTRACE
-	select HAVE_DYNAMIC_FTRACE_WITH_REGS
-	select SWIOTLB
+	select HAVE_DYNAMIC_FTRACE if MMU
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
+	select SWIOTLB if MMU
 
 endchoice
 
@@ -272,6 +294,19 @@ menu "Kernel features"
 
 source "kernel/Kconfig.hz"
 
+config SECCOMP
+	bool "Enable seccomp to safely compute untrusted bytecode"
+	help
+	  This kernel feature is useful for number crunching applications
+	  that may need to compute untrusted bytecode during their
+	  execution. By using pipes or other transports made available to
+	  the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in
+	  their own address space using seccomp. Once seccomp is
+	  enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
+	  and the task is only allowed to execute a few safe syscalls
+	  defined by each seccomp mode.
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 536c0ef4aee8..d325b67d00df 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -1,13 +1,13 @@
 menu "SoC selection"
 
 config SOC_SIFIVE
-       bool "SiFive SoCs"
-       select SERIAL_SIFIVE
-       select SERIAL_SIFIVE_CONSOLE
-       select CLK_SIFIVE
-       select CLK_SIFIVE_FU540_PRCI
-       select SIFIVE_PLIC
-       help
-         This enables support for SiFive SoC platform hardware.
+	bool "SiFive SoCs"
+	select SERIAL_SIFIVE if TTY
+	select SERIAL_SIFIVE_CONSOLE if TTY
+	select CLK_SIFIVE
+	select CLK_SIFIVE_FU540_PRCI
+	select SIFIVE_PLIC
+	help
+	  This enables support for SiFive SoC platform hardware.
 
 endmenu
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index f5e914210245..b9009a2fbaf5 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -83,13 +83,18 @@ PHONY += vdso_install
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
 
-all: Image.gz
+ifeq ($(CONFIG_RISCV_M_MODE),y)
+KBUILD_IMAGE := $(boot)/loader
+else
+KBUILD_IMAGE := $(boot)/Image.gz
+endif
+BOOT_TARGETS := Image Image.gz loader
 
-Image: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+all:	$(notdir $(KBUILD_IMAGE))
 
-Image.%: Image
+$(BOOT_TARGETS): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+	@$(kecho) '  Kernel: $(boot)/$@ is ready'
 
 zinstall install:
 	$(Q)$(MAKE) $(build)=$(boot) $@
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index 0990a9fdbe5d..36db8145f9f4 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -16,7 +16,7 @@
 
 OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 
-targets := Image
+targets := Image loader
 
 $(obj)/Image: vmlinux FORCE
 	$(call if_changed,objcopy)
@@ -24,6 +24,23 @@ $(obj)/Image: vmlinux FORCE
 $(obj)/Image.gz: $(obj)/Image FORCE
 	$(call if_changed,gzip)
 
+$(obj)/loader.o: $(src)/loader.S $(obj)/Image
+
+$(obj)/loader: $(obj)/loader.o $(obj)/Image $(obj)/loader.lds FORCE
+	$(Q)$(LD) -T $(obj)/loader.lds -o $@ $(obj)/loader.o
+
+$(obj)/Image.bz2: $(obj)/Image FORCE
+	$(call if_changed,bzip2)
+
+$(obj)/Image.lz4: $(obj)/Image FORCE
+	$(call if_changed,lz4)
+
+$(obj)/Image.lzma: $(obj)/Image FORCE
+	$(call if_changed,lzma)
+
+$(obj)/Image.lzo: $(obj)/Image FORCE
+	$(call if_changed,lzo)
+
 install:
 	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
 	$(obj)/Image System.map "$(INSTALL_PATH)"
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index afa43c7ea369..a2e3d54e830c 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -54,6 +54,7 @@
 			reg = <1>;
 			riscv,isa = "rv64imafdc";
 			tlb-split;
+			next-level-cache = <&l2cache>;
 			cpu1_intc: interrupt-controller {
 				#interrupt-cells = <1>;
 				compatible = "riscv,cpu-intc";
@@ -77,6 +78,7 @@
 			reg = <2>;
 			riscv,isa = "rv64imafdc";
 			tlb-split;
+			next-level-cache = <&l2cache>;
 			cpu2_intc: interrupt-controller {
 				#interrupt-cells = <1>;
 				compatible = "riscv,cpu-intc";
@@ -100,6 +102,7 @@
 			reg = <3>;
 			riscv,isa = "rv64imafdc";
 			tlb-split;
+			next-level-cache = <&l2cache>;
 			cpu3_intc: interrupt-controller {
 				#interrupt-cells = <1>;
 				compatible = "riscv,cpu-intc";
@@ -123,6 +126,7 @@
 			reg = <4>;
 			riscv,isa = "rv64imafdc";
 			tlb-split;
+			next-level-cache = <&l2cache>;
 			cpu4_intc: interrupt-controller {
 				#interrupt-cells = <1>;
 				compatible = "riscv,cpu-intc";
@@ -162,6 +166,13 @@
 			clocks = <&prci PRCI_CLK_TLCLK>;
 			status = "disabled";
 		};
+		dma: dma@3000000 {
+			compatible = "sifive,fu540-c000-pdma";
+			reg = <0x0 0x3000000 0x0 0x8000>;
+			interrupt-parent = <&plic0>;
+			interrupts = <23 24 25 26 27 28 29 30>;
+			#dma-cells = <1>;
+		};
 		uart1: serial@10011000 {
 			compatible = "sifive,fu540-c000-uart", "sifive,uart0";
 			reg = <0x0 0x10011000 0x0 0x1000>;
@@ -246,6 +257,17 @@
 			#pwm-cells = <3>;
 			status = "disabled";
 		};
+		l2cache: cache-controller@2010000 {
+			compatible = "sifive,fu540-c000-ccache", "cache";
+			cache-block-size = <64>;
+			cache-level = <2>;
+			cache-sets = <1024>;
+			cache-size = <2097152>;
+			cache-unified;
+			interrupt-parent = <&plic0>;
+			interrupts = <1 2 3>;
+			reg = <0x0 0x2010000 0x0 0x1000>;
+		};
 
 	};
 };
diff --git a/arch/riscv/boot/loader.S b/arch/riscv/boot/loader.S
new file mode 100644
index 000000000000..dcf88cf44dc1
--- /dev/null
+++ b/arch/riscv/boot/loader.S
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+	.align 4
+	.section .payload, "ax", %progbits
+	.globl _start
+_start:
+	.incbin "arch/riscv/boot/Image"
+
diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
new file mode 100644
index 000000000000..47a5003c2e28
--- /dev/null
+++ b/arch/riscv/boot/loader.lds.S
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/page.h>
+
+OUTPUT_ARCH(riscv)
+ENTRY(_start)
+
+SECTIONS
+{
+	. = PAGE_OFFSET;
+
+	.payload : {
+		*(.payload)
+		. = ALIGN(8);
+	}
+}
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 420a0dbef386..e2ff95cb3390 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -100,4 +100,28 @@ CONFIG_9P_FS=y
 CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_PGFLAGS=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_DEBUG_TIMEKEEPING=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_RWSEMS=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_STACKTRACE=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_PLIST=y
+CONFIG_DEBUG_SG=y
 # CONFIG_RCU_TRACE is not set
+CONFIG_RCU_EQS_DEBUG=y
+CONFIG_DEBUG_BLOCK_EXT_DEVT=y
+# CONFIG_FTRACE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
+CONFIG_MEMTEST=y
diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig
new file mode 100644
index 000000000000..cf74e179bf90
--- /dev/null
+++ b/arch/riscv/configs/nommu_virt_defconfig
@@ -0,0 +1,78 @@
+# CONFIG_CPU_ISOLATION is not set
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
+# CONFIG_FHANDLE is not set
+# CONFIG_BASE_FULL is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_AIO is not set
+# CONFIG_IO_URING is not set
+# CONFIG_ADVISE_SYSCALLS is not set
+# CONFIG_MEMBARRIER is not set
+# CONFIG_KALLSYMS is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLOB=y
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+# CONFIG_MMU is not set
+CONFIG_MAXPHYSMEM_2GB=y
+CONFIG_SMP=y
+CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0"
+CONFIG_CMDLINE_FORCE=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_MQ_IOSCHED_DEADLINE is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+CONFIG_BINFMT_FLAT=y
+# CONFIG_COREDUMP is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_VIRTIO_BLK=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_LDISC_AUTOLOAD is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_SIFIVE_PLIC=y
+# CONFIG_VALIDATE_FS_PARSER is not set
+CONFIG_EXT2_FS=y
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_LSM="[]"
+CONFIG_PRINTK_TIME=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_RCU_TRACE is not set
+# CONFIG_FTRACE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 87ee6e62b64b..eb519407c841 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -97,4 +97,28 @@ CONFIG_9P_FS=y
 CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_PGFLAGS=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_DEBUG_TIMEKEEPING=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_RWSEMS=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_STACKTRACE=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_PLIST=y
+CONFIG_DEBUG_SG=y
 # CONFIG_RCU_TRACE is not set
+CONFIG_RCU_EQS_DEBUG=y
+CONFIG_DEBUG_BLOCK_EXT_DEVT=y
+# CONFIG_FTRACE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
+CONFIG_MEMTEST=y
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 16970f246860..1efaeddf1e4b 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -22,7 +22,6 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mm-arch-hooks.h
-generic-y += msi.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += sections.h
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index c9fecd120d18..27e005fca584 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -1,7 +1,12 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_RISCV_PROTOTYPES_H
+#define _ASM_RISCV_PROTOTYPES_H
 
 #include <linux/ftrace.h>
 #include <asm-generic/asm-prototypes.h>
 
+long long __lshrti3(long long a, int b);
+long long __ashrti3(long long a, int b);
+long long __ashlti3(long long a, int b);
+
 #endif /* _ASM_RISCV_PROTOTYPES_H */
diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h
index bfd523e8f0b2..9b58b104559e 100644
--- a/arch/riscv/include/asm/cache.h
+++ b/arch/riscv/include/asm/cache.h
@@ -11,4 +11,12 @@
 
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
+/*
+ * RISC-V requires the stack pointer to be 16-byte aligned, so ensure that
+ * the flat loader aligns it accordingly.
+ */
+#ifndef CONFIG_MMU
+#define ARCH_SLAB_MINALIGN	16
+#endif
+
 #endif /* _ASM_RISCV_CACHE_H */
diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h
new file mode 100644
index 000000000000..6eaa2eedd694
--- /dev/null
+++ b/arch/riscv/include/asm/clint.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_CLINT_H
+#define _ASM_RISCV_CLINT_H 1
+
+#include <linux/io.h>
+#include <linux/smp.h>
+
+#ifdef CONFIG_RISCV_M_MODE
+extern u32 __iomem *clint_ipi_base;
+
+void clint_init_boot_cpu(void);
+
+static inline void clint_send_ipi_single(unsigned long hartid)
+{
+	writel(1, clint_ipi_base + hartid);
+}
+
+static inline void clint_send_ipi_mask(const struct cpumask *hartid_mask)
+{
+	int hartid;
+
+	for_each_cpu(hartid, hartid_mask)
+		clint_send_ipi_single(hartid);
+}
+
+static inline void clint_clear_ipi(unsigned long hartid)
+{
+	writel(0, clint_ipi_base + hartid);
+}
+#else /* CONFIG_RISCV_M_MODE */
+#define clint_init_boot_cpu()	do { } while (0)
+
+/* stubs to for code is only reachable under IS_ENABLED(CONFIG_RISCV_M_MODE): */
+void clint_send_ipi_single(unsigned long hartid);
+void clint_send_ipi_mask(const struct cpumask *hartid_mask);
+void clint_clear_ipi(unsigned long hartid);
+#endif /* CONFIG_RISCV_M_MODE */
+
+#endif /* _ASM_RISCV_CLINT_H */
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index a18923fa23c8..435b65532e29 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -11,8 +11,11 @@
 
 /* Status register flags */
 #define SR_SIE		_AC(0x00000002, UL) /* Supervisor Interrupt Enable */
+#define SR_MIE		_AC(0x00000008, UL) /* Machine Interrupt Enable */
 #define SR_SPIE		_AC(0x00000020, UL) /* Previous Supervisor IE */
+#define SR_MPIE		_AC(0x00000080, UL) /* Previous Machine IE */
 #define SR_SPP		_AC(0x00000100, UL) /* Previously Supervisor */
+#define SR_MPP		_AC(0x00001800, UL) /* Previously Machine */
 #define SR_SUM		_AC(0x00040000, UL) /* Supervisor User Memory Access */
 
 #define SR_FS		_AC(0x00006000, UL) /* Floating-point Status */
@@ -44,9 +47,10 @@
 #define SATP_MODE	SATP_MODE_39
 #endif
 
-/* SCAUSE */
-#define SCAUSE_IRQ_FLAG		(_AC(1, UL) << (__riscv_xlen - 1))
+/* Exception cause high bit - is an interrupt if set */
+#define CAUSE_IRQ_FLAG		(_AC(1, UL) << (__riscv_xlen - 1))
 
+/* Interrupt causes (minus the high bit) */
 #define IRQ_U_SOFT		0
 #define IRQ_S_SOFT		1
 #define IRQ_M_SOFT		3
@@ -57,6 +61,7 @@
 #define IRQ_S_EXT		9
 #define IRQ_M_EXT		11
 
+/* Exception causes */
 #define EXC_INST_MISALIGNED	0
 #define EXC_INST_ACCESS		1
 #define EXC_BREAKPOINT		3
@@ -67,14 +72,14 @@
 #define EXC_LOAD_PAGE_FAULT	13
 #define EXC_STORE_PAGE_FAULT	15
 
-/* SIE (Interrupt Enable) and SIP (Interrupt Pending) flags */
-#define SIE_SSIE		(_AC(0x1, UL) << IRQ_S_SOFT)
-#define SIE_STIE		(_AC(0x1, UL) << IRQ_S_TIMER)
-#define SIE_SEIE		(_AC(0x1, UL) << IRQ_S_EXT)
-
+/* symbolic CSR names: */
 #define CSR_CYCLE		0xc00
 #define CSR_TIME		0xc01
 #define CSR_INSTRET		0xc02
+#define CSR_CYCLEH		0xc80
+#define CSR_TIMEH		0xc81
+#define CSR_INSTRETH		0xc82
+
 #define CSR_SSTATUS		0x100
 #define CSR_SIE			0x104
 #define CSR_STVEC		0x105
@@ -85,9 +90,58 @@
 #define CSR_STVAL		0x143
 #define CSR_SIP			0x144
 #define CSR_SATP		0x180
-#define CSR_CYCLEH		0xc80
-#define CSR_TIMEH		0xc81
-#define CSR_INSTRETH		0xc82
+
+#define CSR_MSTATUS		0x300
+#define CSR_MISA		0x301
+#define CSR_MIE			0x304
+#define CSR_MTVEC		0x305
+#define CSR_MSCRATCH		0x340
+#define CSR_MEPC		0x341
+#define CSR_MCAUSE		0x342
+#define CSR_MTVAL		0x343
+#define CSR_MIP			0x344
+#define CSR_MHARTID		0xf14
+
+#ifdef CONFIG_RISCV_M_MODE
+# define CSR_STATUS	CSR_MSTATUS
+# define CSR_IE		CSR_MIE
+# define CSR_TVEC	CSR_MTVEC
+# define CSR_SCRATCH	CSR_MSCRATCH
+# define CSR_EPC	CSR_MEPC
+# define CSR_CAUSE	CSR_MCAUSE
+# define CSR_TVAL	CSR_MTVAL
+# define CSR_IP		CSR_MIP
+
+# define SR_IE		SR_MIE
+# define SR_PIE		SR_MPIE
+# define SR_PP		SR_MPP
+
+# define RV_IRQ_SOFT		IRQ_M_SOFT
+# define RV_IRQ_TIMER	IRQ_M_TIMER
+# define RV_IRQ_EXT		IRQ_M_EXT
+#else /* CONFIG_RISCV_M_MODE */
+# define CSR_STATUS	CSR_SSTATUS
+# define CSR_IE		CSR_SIE
+# define CSR_TVEC	CSR_STVEC
+# define CSR_SCRATCH	CSR_SSCRATCH
+# define CSR_EPC	CSR_SEPC
+# define CSR_CAUSE	CSR_SCAUSE
+# define CSR_TVAL	CSR_STVAL
+# define CSR_IP		CSR_SIP
+
+# define SR_IE		SR_SIE
+# define SR_PIE		SR_SPIE
+# define SR_PP		SR_SPP
+
+# define RV_IRQ_SOFT		IRQ_S_SOFT
+# define RV_IRQ_TIMER	IRQ_S_TIMER
+# define RV_IRQ_EXT		IRQ_S_EXT
+#endif /* CONFIG_RISCV_M_MODE */
+
+/* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
+#define IE_SIE		(_AC(0x1, UL) << RV_IRQ_SOFT)
+#define IE_TIE		(_AC(0x1, UL) << RV_IRQ_TIMER)
+#define IE_EIE		(_AC(0x1, UL) << RV_IRQ_EXT)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/riscv/include/asm/current.h b/arch/riscv/include/asm/current.h
index 44dcf7fc15ee..dd973efe5d7c 100644
--- a/arch/riscv/include/asm/current.h
+++ b/arch/riscv/include/asm/current.h
@@ -7,8 +7,8 @@
  */
 
 
-#ifndef __ASM_CURRENT_H
-#define __ASM_CURRENT_H
+#ifndef _ASM_RISCV_CURRENT_H
+#define _ASM_RISCV_CURRENT_H
 
 #include <linux/bug.h>
 #include <linux/compiler.h>
@@ -34,4 +34,4 @@ static __always_inline struct task_struct *get_current(void)
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* __ASM_CURRENT_H */
+#endif /* _ASM_RISCV_CURRENT_H */
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index ef04084bf0de..d83a4efd052b 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -56,16 +56,16 @@ extern unsigned long elf_hwcap;
  */
 #define ELF_PLATFORM	(NULL)
 
+#ifdef CONFIG_MMU
 #define ARCH_DLINFO						\
 do {								\
 	NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
 		(elf_addr_t)current->mm->context.vdso);		\
 } while (0)
-
-
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
 struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 	int uses_interp);
+#endif /* CONFIG_MMU */
 
 #endif /* _ASM_RISCV_ELF_H */
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 161f28d04a07..42d2c42f3cc9 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -11,6 +11,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
+#ifdef CONFIG_MMU
 /*
  * Here we define all the compile-time 'special' virtual addresses.
  * The point is to have a constant address at compile time, but to
@@ -42,4 +43,5 @@ extern void __set_fixmap(enum fixed_addresses idx,
 
 #include <asm-generic/fixmap.h>
 
+#endif /* CONFIG_MMU */
 #endif /* _ASM_RISCV_FIXMAP_H */
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index c6dcc5291f97..ace8a6e2d11d 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -1,6 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (C) 2017 Andes Technology Corporation */
 
+#ifndef _ASM_RISCV_FTRACE_H
+#define _ASM_RISCV_FTRACE_H
+
 /*
  * The graph frame test is not possible if CONFIG_FRAME_POINTER is not enabled.
  * Check arch/riscv/kernel/mcount.S for detail.
@@ -64,3 +67,5 @@ do {									\
  */
 #define MCOUNT_INSN_SIZE 8
 #endif
+
+#endif /* _ASM_RISCV_FTRACE_H */
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index 4ad6409c4647..fdfaf7f3df7c 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -4,14 +4,20 @@
  * Copyright (c) 2018  Jim Wilson (jimw@sifive.com)
  */
 
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
+#ifndef _ASM_RISCV_FUTEX_H
+#define _ASM_RISCV_FUTEX_H
 
 #include <linux/futex.h>
 #include <linux/uaccess.h>
 #include <linux/errno.h>
 #include <asm/asm.h>
 
+/* We don't even really need the extable code, but for now keep it simple */
+#ifndef CONFIG_MMU
+#define __enable_user_access()		do { } while (0)
+#define __disable_user_access()		do { } while (0)
+#endif
+
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)	\
 {								\
 	uintptr_t tmp;						\
@@ -112,4 +118,4 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	return ret;
 }
 
-#endif /* _ASM_FUTEX_H */
+#endif /* _ASM_RISCV_FUTEX_H */
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 7ecb7c6a57b1..1bb0cd04aec3 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -5,8 +5,8 @@
  * Copyright (C) 2012 ARM Ltd.
  * Copyright (C) 2017 SiFive
  */
-#ifndef __ASM_HWCAP_H
-#define __ASM_HWCAP_H
+#ifndef _ASM_RISCV_HWCAP_H
+#define _ASM_RISCV_HWCAP_H
 
 #include <uapi/asm/hwcap.h>
 
@@ -23,4 +23,5 @@ enum {
 
 extern unsigned long elf_hwcap;
 #endif
-#endif
+
+#endif /* _ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h
index 344db5244547..e0b319af3681 100644
--- a/arch/riscv/include/asm/image.h
+++ b/arch/riscv/include/asm/image.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
-#ifndef __ASM_IMAGE_H
-#define __ASM_IMAGE_H
+#ifndef _ASM_RISCV_IMAGE_H
+#define _ASM_RISCV_IMAGE_H
 
 #define RISCV_IMAGE_MAGIC	"RISCV\0\0\0"
 #define RISCV_IMAGE_MAGIC2	"RSC\x05"
@@ -42,7 +42,7 @@
  * @res2:		reserved
  * @magic:		Magic number (RISC-V specific; deprecated)
  * @magic2:		Magic number 2 (to match the ARM64 'magic' field pos)
- * @res4:		reserved (will be used for PE COFF offset)
+ * @res3:		reserved (will be used for PE COFF offset)
  *
  * The intention is for this header format to be shared between multiple
  * architectures to avoid a proliferation of image header formats.
@@ -59,7 +59,7 @@ struct riscv_image_header {
 	u64 res2;
 	u64 magic;
 	u32 magic2;
-	u32 res4;
+	u32 res3;
 };
 #endif /* __ASSEMBLY__ */
-#endif /* __ASM_IMAGE_H */
+#endif /* _ASM_RISCV_IMAGE_H */
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 3ba4d93721d3..0f477206a4ed 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -15,158 +15,19 @@
 #include <asm/mmiowb.h>
 #include <asm/pgtable.h>
 
-extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
-
-/*
- * The RISC-V ISA doesn't yet specify how to query or modify PMAs, so we can't
- * change the properties of memory regions.  This should be fixed by the
- * upcoming platform spec.
- */
-#define ioremap_nocache(addr, size) ioremap((addr), (size))
-#define ioremap_wc(addr, size) ioremap((addr), (size))
-#define ioremap_wt(addr, size) ioremap((addr), (size))
-
-extern void iounmap(volatile void __iomem *addr);
-
-/* Generic IO read/write.  These perform native-endian accesses. */
-#define __raw_writeb __raw_writeb
-static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
-{
-	asm volatile("sb %0, 0(%1)" : : "r" (val), "r" (addr));
-}
-
-#define __raw_writew __raw_writew
-static inline void __raw_writew(u16 val, volatile void __iomem *addr)
-{
-	asm volatile("sh %0, 0(%1)" : : "r" (val), "r" (addr));
-}
-
-#define __raw_writel __raw_writel
-static inline void __raw_writel(u32 val, volatile void __iomem *addr)
-{
-	asm volatile("sw %0, 0(%1)" : : "r" (val), "r" (addr));
-}
-
-#ifdef CONFIG_64BIT
-#define __raw_writeq __raw_writeq
-static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
-{
-	asm volatile("sd %0, 0(%1)" : : "r" (val), "r" (addr));
-}
-#endif
-
-#define __raw_readb __raw_readb
-static inline u8 __raw_readb(const volatile void __iomem *addr)
-{
-	u8 val;
-
-	asm volatile("lb %0, 0(%1)" : "=r" (val) : "r" (addr));
-	return val;
-}
-
-#define __raw_readw __raw_readw
-static inline u16 __raw_readw(const volatile void __iomem *addr)
-{
-	u16 val;
-
-	asm volatile("lh %0, 0(%1)" : "=r" (val) : "r" (addr));
-	return val;
-}
-
-#define __raw_readl __raw_readl
-static inline u32 __raw_readl(const volatile void __iomem *addr)
-{
-	u32 val;
-
-	asm volatile("lw %0, 0(%1)" : "=r" (val) : "r" (addr));
-	return val;
-}
-
-#ifdef CONFIG_64BIT
-#define __raw_readq __raw_readq
-static inline u64 __raw_readq(const volatile void __iomem *addr)
-{
-	u64 val;
-
-	asm volatile("ld %0, 0(%1)" : "=r" (val) : "r" (addr));
-	return val;
-}
-#endif
-
 /*
- * Unordered I/O memory access primitives.  These are even more relaxed than
- * the relaxed versions, as they don't even order accesses between successive
- * operations to the I/O regions.
+ * MMIO access functions are separated out to break dependency cycles
+ * when using {read,write}* fns in low-level headers
  */
-#define readb_cpu(c)		({ u8  __r = __raw_readb(c); __r; })
-#define readw_cpu(c)		({ u16 __r = le16_to_cpu((__force __le16)__raw_readw(c)); __r; })
-#define readl_cpu(c)		({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
-
-#define writeb_cpu(v,c)		((void)__raw_writeb((v),(c)))
-#define writew_cpu(v,c)		((void)__raw_writew((__force u16)cpu_to_le16(v),(c)))
-#define writel_cpu(v,c)		((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
-
-#ifdef CONFIG_64BIT
-#define readq_cpu(c)		({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
-#define writeq_cpu(v,c)		((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
-#endif
-
-/*
- * Relaxed I/O memory access primitives. These follow the Device memory
- * ordering rules but do not guarantee any ordering relative to Normal memory
- * accesses.  These are defined to order the indicated access (either a read or
- * write) with all other I/O memory accesses. Since the platform specification
- * defines that all I/O regions are strongly ordered on channel 2, no explicit
- * fences are required to enforce this ordering.
- */
-/* FIXME: These are now the same as asm-generic */
-#define __io_rbr()		do {} while (0)
-#define __io_rar()		do {} while (0)
-#define __io_rbw()		do {} while (0)
-#define __io_raw()		do {} while (0)
-
-#define readb_relaxed(c)	({ u8  __v; __io_rbr(); __v = readb_cpu(c); __io_rar(); __v; })
-#define readw_relaxed(c)	({ u16 __v; __io_rbr(); __v = readw_cpu(c); __io_rar(); __v; })
-#define readl_relaxed(c)	({ u32 __v; __io_rbr(); __v = readl_cpu(c); __io_rar(); __v; })
-
-#define writeb_relaxed(v,c)	({ __io_rbw(); writeb_cpu((v),(c)); __io_raw(); })
-#define writew_relaxed(v,c)	({ __io_rbw(); writew_cpu((v),(c)); __io_raw(); })
-#define writel_relaxed(v,c)	({ __io_rbw(); writel_cpu((v),(c)); __io_raw(); })
-
-#ifdef CONFIG_64BIT
-#define readq_relaxed(c)	({ u64 __v; __io_rbr(); __v = readq_cpu(c); __io_rar(); __v; })
-#define writeq_relaxed(v,c)	({ __io_rbw(); writeq_cpu((v),(c)); __io_raw(); })
-#endif
-
-/*
- * I/O memory access primitives. Reads are ordered relative to any
- * following Normal memory access. Writes are ordered relative to any prior
- * Normal memory access.  The memory barriers here are necessary as RISC-V
- * doesn't define any ordering between the memory space and the I/O space.
- */
-#define __io_br()	do {} while (0)
-#define __io_ar(v)	__asm__ __volatile__ ("fence i,r" : : : "memory");
-#define __io_bw()	__asm__ __volatile__ ("fence w,o" : : : "memory");
-#define __io_aw()	mmiowb_set_pending()
-
-#define readb(c)	({ u8  __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
-#define readw(c)	({ u16 __v; __io_br(); __v = readw_cpu(c); __io_ar(__v); __v; })
-#define readl(c)	({ u32 __v; __io_br(); __v = readl_cpu(c); __io_ar(__v); __v; })
-
-#define writeb(v,c)	({ __io_bw(); writeb_cpu((v),(c)); __io_aw(); })
-#define writew(v,c)	({ __io_bw(); writew_cpu((v),(c)); __io_aw(); })
-#define writel(v,c)	({ __io_bw(); writel_cpu((v),(c)); __io_aw(); })
-
-#ifdef CONFIG_64BIT
-#define readq(c)	({ u64 __v; __io_br(); __v = readq_cpu(c); __io_ar(__v); __v; })
-#define writeq(v,c)	({ __io_bw(); writeq_cpu((v),(c)); __io_aw(); })
-#endif
+#include <asm/mmio.h>
 
 /*
  *  I/O port access constants.
  */
+#ifdef CONFIG_MMU
 #define IO_SPACE_LIMIT		(PCI_IO_SIZE - 1)
 #define PCI_IOBASE		((void __iomem *)PCI_IO_START)
+#endif /* CONFIG_MMU */
 
 /*
  * Emulation routines for the port-mapped IO space used by some PCI drivers.
diff --git a/arch/riscv/include/asm/irqflags.h b/arch/riscv/include/asm/irqflags.h
index e70f647ce3b7..08d4d6a5b7e9 100644
--- a/arch/riscv/include/asm/irqflags.h
+++ b/arch/riscv/include/asm/irqflags.h
@@ -13,31 +13,31 @@
 /* read interrupt enabled status */
 static inline unsigned long arch_local_save_flags(void)
 {
-	return csr_read(CSR_SSTATUS);
+	return csr_read(CSR_STATUS);
 }
 
 /* unconditionally enable interrupts */
 static inline void arch_local_irq_enable(void)
 {
-	csr_set(CSR_SSTATUS, SR_SIE);
+	csr_set(CSR_STATUS, SR_IE);
 }
 
 /* unconditionally disable interrupts */
 static inline void arch_local_irq_disable(void)
 {
-	csr_clear(CSR_SSTATUS, SR_SIE);
+	csr_clear(CSR_STATUS, SR_IE);
 }
 
 /* get status and disable interrupts */
 static inline unsigned long arch_local_irq_save(void)
 {
-	return csr_read_clear(CSR_SSTATUS, SR_SIE);
+	return csr_read_clear(CSR_STATUS, SR_IE);
 }
 
 /* test flags */
 static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
-	return !(flags & SR_SIE);
+	return !(flags & SR_IE);
 }
 
 /* test hardware interrupt enable bit */
@@ -49,7 +49,7 @@ static inline int arch_irqs_disabled(void)
 /* set interrupt enabled status */
 static inline void arch_local_irq_restore(unsigned long flags)
 {
-	csr_set(CSR_SSTATUS, flags & SR_SIE);
+	csr_set(CSR_STATUS, flags & SR_IE);
 }
 
 #endif /* _ASM_RISCV_IRQFLAGS_H */
diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h
index 96e30ef637e8..56a98ea30731 100644
--- a/arch/riscv/include/asm/kprobes.h
+++ b/arch/riscv/include/asm/kprobes.h
@@ -6,9 +6,9 @@
  * Copyright (C) 2017 SiFive
  */
 
-#ifndef _RISCV_KPROBES_H
-#define _RISCV_KPROBES_H
+#ifndef _ASM_RISCV_KPROBES_H
+#define _ASM_RISCV_KPROBES_H
 
 #include <asm-generic/kprobes.h>
 
-#endif /* _RISCV_KPROBES_H */
+#endif /* _ASM_RISCV_KPROBES_H */
diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h
new file mode 100644
index 000000000000..a2c809df2733
--- /dev/null
+++ b/arch/riscv/include/asm/mmio.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * {read,write}{b,w,l,q} based on arch/arm64/include/asm/io.h
+ *   which was based on arch/arm/include/io.h
+ *
+ * Copyright (C) 1996-2000 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2014 Regents of the University of California
+ */
+
+#ifndef _ASM_RISCV_MMIO_H
+#define _ASM_RISCV_MMIO_H
+
+#include <linux/types.h>
+#include <asm/mmiowb.h>
+
+#ifndef CONFIG_MMU
+#define pgprot_noncached(x)	(x)
+#endif /* CONFIG_MMU */
+
+/* Generic IO read/write.  These perform native-endian accesses. */
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+{
+	asm volatile("sb %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+{
+	asm volatile("sh %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+{
+	asm volatile("sw %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+
+#ifdef CONFIG_64BIT
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+{
+	asm volatile("sd %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+#endif
+
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	u8 val;
+
+	asm volatile("lb %0, 0(%1)" : "=r" (val) : "r" (addr));
+	return val;
+}
+
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	u16 val;
+
+	asm volatile("lh %0, 0(%1)" : "=r" (val) : "r" (addr));
+	return val;
+}
+
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	u32 val;
+
+	asm volatile("lw %0, 0(%1)" : "=r" (val) : "r" (addr));
+	return val;
+}
+
+#ifdef CONFIG_64BIT
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+	u64 val;
+
+	asm volatile("ld %0, 0(%1)" : "=r" (val) : "r" (addr));
+	return val;
+}
+#endif
+
+/*
+ * Unordered I/O memory access primitives.  These are even more relaxed than
+ * the relaxed versions, as they don't even order accesses between successive
+ * operations to the I/O regions.
+ */
+#define readb_cpu(c)		({ u8  __r = __raw_readb(c); __r; })
+#define readw_cpu(c)		({ u16 __r = le16_to_cpu((__force __le16)__raw_readw(c)); __r; })
+#define readl_cpu(c)		({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
+
+#define writeb_cpu(v, c)	((void)__raw_writeb((v), (c)))
+#define writew_cpu(v, c)	((void)__raw_writew((__force u16)cpu_to_le16(v), (c)))
+#define writel_cpu(v, c)	((void)__raw_writel((__force u32)cpu_to_le32(v), (c)))
+
+#ifdef CONFIG_64BIT
+#define readq_cpu(c)		({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
+#define writeq_cpu(v, c)	((void)__raw_writeq((__force u64)cpu_to_le64(v), (c)))
+#endif
+
+/*
+ * Relaxed I/O memory access primitives. These follow the Device memory
+ * ordering rules but do not guarantee any ordering relative to Normal memory
+ * accesses.  These are defined to order the indicated access (either a read or
+ * write) with all other I/O memory accesses. Since the platform specification
+ * defines that all I/O regions are strongly ordered on channel 2, no explicit
+ * fences are required to enforce this ordering.
+ */
+/* FIXME: These are now the same as asm-generic */
+#define __io_rbr()		do {} while (0)
+#define __io_rar()		do {} while (0)
+#define __io_rbw()		do {} while (0)
+#define __io_raw()		do {} while (0)
+
+#define readb_relaxed(c)	({ u8  __v; __io_rbr(); __v = readb_cpu(c); __io_rar(); __v; })
+#define readw_relaxed(c)	({ u16 __v; __io_rbr(); __v = readw_cpu(c); __io_rar(); __v; })
+#define readl_relaxed(c)	({ u32 __v; __io_rbr(); __v = readl_cpu(c); __io_rar(); __v; })
+
+#define writeb_relaxed(v, c)	({ __io_rbw(); writeb_cpu((v), (c)); __io_raw(); })
+#define writew_relaxed(v, c)	({ __io_rbw(); writew_cpu((v), (c)); __io_raw(); })
+#define writel_relaxed(v, c)	({ __io_rbw(); writel_cpu((v), (c)); __io_raw(); })
+
+#ifdef CONFIG_64BIT
+#define readq_relaxed(c)	({ u64 __v; __io_rbr(); __v = readq_cpu(c); __io_rar(); __v; })
+#define writeq_relaxed(v, c)	({ __io_rbw(); writeq_cpu((v), (c)); __io_raw(); })
+#endif
+
+/*
+ * I/O memory access primitives. Reads are ordered relative to any
+ * following Normal memory access. Writes are ordered relative to any prior
+ * Normal memory access.  The memory barriers here are necessary as RISC-V
+ * doesn't define any ordering between the memory space and the I/O space.
+ */
+#define __io_br()	do {} while (0)
+#define __io_ar(v)	__asm__ __volatile__ ("fence i,r" : : : "memory")
+#define __io_bw()	__asm__ __volatile__ ("fence w,o" : : : "memory")
+#define __io_aw()	mmiowb_set_pending()
+
+#define readb(c)	({ u8  __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
+#define readw(c)	({ u16 __v; __io_br(); __v = readw_cpu(c); __io_ar(__v); __v; })
+#define readl(c)	({ u32 __v; __io_br(); __v = readl_cpu(c); __io_ar(__v); __v; })
+
+#define writeb(v, c)	({ __io_bw(); writeb_cpu((v), (c)); __io_aw(); })
+#define writew(v, c)	({ __io_bw(); writew_cpu((v), (c)); __io_aw(); })
+#define writel(v, c)	({ __io_bw(); writel_cpu((v), (c)); __io_aw(); })
+
+#ifdef CONFIG_64BIT
+#define readq(c)	({ u64 __v; __io_br(); __v = readq_cpu(c); __io_ar(__v); __v; })
+#define writeq(v, c)	({ __io_bw(); writeq_cpu((v), (c)); __io_aw(); })
+#endif
+
+#endif /* _ASM_RISCV_MMIO_H */
diff --git a/arch/riscv/include/asm/mmiowb.h b/arch/riscv/include/asm/mmiowb.h
index 5d7e3a2b4e3b..bb4091ff4a21 100644
--- a/arch/riscv/include/asm/mmiowb.h
+++ b/arch/riscv/include/asm/mmiowb.h
@@ -11,4 +11,4 @@
 
 #include <asm-generic/mmiowb.h>
 
-#endif	/* ASM_RISCV_MMIOWB_H */
+#endif	/* _ASM_RISCV_MMIOWB_H */
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 151476fb58cb..967eacb01ab5 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -10,6 +10,9 @@
 #ifndef __ASSEMBLY__
 
 typedef struct {
+#ifndef CONFIG_MMU
+	unsigned long	end_brk;
+#endif
 	void *vdso;
 #ifdef CONFIG_SMP
 	/* A local icache flush is needed before user execution can resume. */
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 3db261c4810f..ac699246ae7e 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -88,8 +88,14 @@ typedef struct page *pgtable_t;
 #define PTE_FMT "%08lx"
 #endif
 
+#ifdef CONFIG_MMU
 extern unsigned long va_pa_offset;
 extern unsigned long pfn_base;
+#define ARCH_PFN_OFFSET		(pfn_base)
+#else
+#define va_pa_offset		0
+#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
+#endif /* CONFIG_MMU */
 
 extern unsigned long max_low_pfn;
 extern unsigned long min_low_pfn;
@@ -112,11 +118,9 @@ extern unsigned long min_low_pfn;
 
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn) \
-	(((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
+	(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
 #endif
 
-#define ARCH_PFN_OFFSET		(pfn_base)
-
 #endif /* __ASSEMBLY__ */
 
 #define virt_addr_valid(vaddr)	(pfn_valid(virt_to_pfn(vaddr)))
diff --git a/arch/riscv/include/asm/pci.h b/arch/riscv/include/asm/pci.h
index 5ac8daa1cc36..1c473a1bd986 100644
--- a/arch/riscv/include/asm/pci.h
+++ b/arch/riscv/include/asm/pci.h
@@ -3,8 +3,8 @@
  * Copyright (C) 2016 SiFive
  */
 
-#ifndef __ASM_RISCV_PCI_H
-#define __ASM_RISCV_PCI_H
+#ifndef _ASM_RISCV_PCI_H
+#define _ASM_RISCV_PCI_H
 
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -34,4 +34,4 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 }
 #endif  /* CONFIG_PCI */
 
-#endif  /* __ASM_PCI_H */
+#endif  /* _ASM_RISCV_PCI_H */
diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
index aefbfaa6a781..0234048b12bc 100644
--- a/arch/riscv/include/asm/perf_event.h
+++ b/arch/riscv/include/asm/perf_event.h
@@ -82,4 +82,8 @@ struct riscv_pmu {
 	int		irq;
 };
 
+#ifdef CONFIG_PERF_EVENTS
+#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
+#endif
+
 #endif /* _ASM_RISCV_PERF_EVENT_H */
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index d59ea92285ec..3f601ee8233f 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -10,6 +10,7 @@
 #include <linux/mm.h>
 #include <asm/tlb.h>
 
+#ifdef CONFIG_MMU
 #include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
@@ -81,5 +82,6 @@ do {                                    \
 	pgtable_pte_page_dtor(pte);     \
 	tlb_remove_page((tlb), pte);    \
 } while (0)
+#endif /* CONFIG_MMU */
 
 #endif /* _ASM_RISCV_PGALLOC_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index d3221017194d..f66b87314fa2 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -25,6 +25,7 @@
 #include <asm/pgtable-32.h>
 #endif /* CONFIG_64BIT */
 
+#ifdef CONFIG_MMU
 /* Number of entries in the page global directory */
 #define PTRS_PER_PGD    (PAGE_SIZE / sizeof(pgd_t))
 /* Number of entries in the page table */
@@ -32,7 +33,6 @@
 
 /* Number of PGD entries that a user-mode program can use */
 #define USER_PTRS_PER_PGD   (TASK_SIZE / PGDIR_SIZE)
-#define FIRST_USER_ADDRESS  0
 
 /* Page protection bits */
 #define _PAGE_BASE	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER)
@@ -62,6 +62,12 @@
 
 #define PAGE_TABLE		__pgprot(_PAGE_TABLE)
 
+/*
+ * The RISC-V ISA doesn't yet specify how to query or modify PMAs, so we can't
+ * change the properties of memory regions.
+ */
+#define _PAGE_IOREMAP _PAGE_KERNEL
+
 extern pgd_t swapper_pg_dir[];
 
 /* MAP_PRIVATE permissions: xwr (copy-on-write) */
@@ -87,7 +93,10 @@ extern pgd_t swapper_pg_dir[];
 #define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
 #define VMALLOC_END      (PAGE_OFFSET - 1)
 #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
-#define PCI_IO_SIZE      SZ_16M
+
+#define BPF_JIT_REGION_SIZE	(SZ_128M)
+#define BPF_JIT_REGION_START	(PAGE_OFFSET - BPF_JIT_REGION_SIZE)
+#define BPF_JIT_REGION_END	(VMALLOC_END)
 
 /*
  * Roughly size the vmemmap space to be large enough to fit enough
@@ -100,25 +109,11 @@ extern pgd_t swapper_pg_dir[];
 #define VMEMMAP_END	(VMALLOC_START - 1)
 #define VMEMMAP_START	(VMALLOC_START - VMEMMAP_SIZE)
 
-#define vmemmap		((struct page *)VMEMMAP_START)
-
-#define PCI_IO_END       VMEMMAP_START
-#define PCI_IO_START     (PCI_IO_END - PCI_IO_SIZE)
-#define FIXADDR_TOP      PCI_IO_START
-
-#ifdef CONFIG_64BIT
-#define FIXADDR_SIZE     PMD_SIZE
-#else
-#define FIXADDR_SIZE     PGDIR_SIZE
-#endif
-#define FIXADDR_START    (FIXADDR_TOP - FIXADDR_SIZE)
-
 /*
- * ZERO_PAGE is a global shared page that is always zero,
- * used for zero-mapped memory areas, etc.
+ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
+ * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
  */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+#define vmemmap		((struct page *)VMEMMAP_START)
 
 static inline int pmd_present(pmd_t pmd)
 {
@@ -430,11 +425,17 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)	((pte_t) { (x).val })
 
-#define kern_addr_valid(addr)   (1) /* FIXME */
+#define PCI_IO_SIZE      SZ_16M
+#define PCI_IO_END       VMEMMAP_START
+#define PCI_IO_START     (PCI_IO_END - PCI_IO_SIZE)
 
-extern void *dtb_early_va;
-extern void setup_bootmem(void);
-extern void paging_init(void);
+#define FIXADDR_TOP      PCI_IO_START
+#ifdef CONFIG_64BIT
+#define FIXADDR_SIZE     PMD_SIZE
+#else
+#define FIXADDR_SIZE     PGDIR_SIZE
+#endif
+#define FIXADDR_START    (FIXADDR_TOP - FIXADDR_SIZE)
 
 /*
  * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
@@ -446,6 +447,31 @@ extern void paging_init(void);
 #define TASK_SIZE FIXADDR_START
 #endif
 
+#else /* CONFIG_MMU */
+
+#define PAGE_KERNEL		__pgprot(0)
+#define swapper_pg_dir		NULL
+#define VMALLOC_START		0
+
+#define TASK_SIZE 0xffffffffUL
+
+#endif /* !CONFIG_MMU */
+
+#define kern_addr_valid(addr)   (1) /* FIXME */
+
+extern void *dtb_early_va;
+void setup_bootmem(void);
+void paging_init(void);
+
+#define FIRST_USER_ADDRESS  0
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero,
+ * used for zero-mapped memory areas, etc.
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
 #include <asm-generic/pgtable.h>
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index f539149d04c2..3ddb798264f1 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -42,7 +42,7 @@ struct thread_struct {
 	((struct pt_regs *)(task_stack_page(tsk) + THREAD_SIZE		\
 			    - ALIGN(sizeof(struct pt_regs), STACK_ALIGN)))
 
-#define KSTK_EIP(tsk)		(task_pt_regs(tsk)->sepc)
+#define KSTK_EIP(tsk)		(task_pt_regs(tsk)->epc)
 #define KSTK_ESP(tsk)		(task_pt_regs(tsk)->sp)
 
 
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index d48d1e13973c..ee49f80c9533 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -12,7 +12,7 @@
 #ifndef __ASSEMBLY__
 
 struct pt_regs {
-	unsigned long sepc;
+	unsigned long epc;
 	unsigned long ra;
 	unsigned long sp;
 	unsigned long gp;
@@ -44,10 +44,10 @@ struct pt_regs {
 	unsigned long t4;
 	unsigned long t5;
 	unsigned long t6;
-	/* Supervisor CSRs */
-	unsigned long sstatus;
-	unsigned long sbadaddr;
-	unsigned long scause;
+	/* Supervisor/Machine CSRs */
+	unsigned long status;
+	unsigned long badaddr;
+	unsigned long cause;
 	/* a0 value before the syscall */
 	unsigned long orig_a0;
 };
@@ -58,18 +58,18 @@ struct pt_regs {
 #define REG_FMT "%08lx"
 #endif
 
-#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0)
+#define user_mode(regs) (((regs)->status & SR_PP) == 0)
 
 
 /* Helpers for working with the instruction pointer */
 static inline unsigned long instruction_pointer(struct pt_regs *regs)
 {
-	return regs->sepc;
+	return regs->epc;
 }
 static inline void instruction_pointer_set(struct pt_regs *regs,
 					   unsigned long val)
 {
-	regs->sepc = val;
+	regs->epc = val;
 }
 
 #define profile_pc(regs) instruction_pointer(regs)
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 21134b3ef404..2570c1e683d3 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 
+#ifdef CONFIG_RISCV_SBI
 #define SBI_SET_TIMER 0
 #define SBI_CONSOLE_PUTCHAR 1
 #define SBI_CONSOLE_GETCHAR 2
@@ -93,5 +94,11 @@ static inline void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
 {
 	SBI_CALL_4(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask, start, size, asid);
 }
-
-#endif
+#else /* CONFIG_RISCV_SBI */
+/* stubs for code that is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */
+void sbi_set_timer(uint64_t stime_value);
+void sbi_clear_ipi(void);
+void sbi_send_ipi(const unsigned long *hart_mask);
+void sbi_remote_fence_i(const unsigned long *hart_mask);
+#endif /* CONFIG_RISCV_SBI */
+#endif /* _ASM_RISCV_SBI_H */
diff --git a/arch/riscv/include/asm/seccomp.h b/arch/riscv/include/asm/seccomp.h
new file mode 100644
index 000000000000..bf7744ee3b3d
--- /dev/null
+++ b/arch/riscv/include/asm/seccomp.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm/unistd.h>
+
+#include <asm-generic/seccomp.h>
+
+#endif /* _ASM_SECCOMP_H */
diff --git a/arch/riscv/include/asm/sifive_l2_cache.h b/arch/riscv/include/asm/sifive_l2_cache.h
deleted file mode 100644
index 04f6748fc50b..000000000000
--- a/arch/riscv/include/asm/sifive_l2_cache.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * SiFive L2 Cache Controller header file
- *
- */
-
-#ifndef _ASM_RISCV_SIFIVE_L2_CACHE_H
-#define _ASM_RISCV_SIFIVE_L2_CACHE_H
-
-extern int register_sifive_l2_error_notifier(struct notifier_block *nb);
-extern int unregister_sifive_l2_error_notifier(struct notifier_block *nb);
-
-#define SIFIVE_L2_ERR_TYPE_CE 0
-#define SIFIVE_L2_ERR_TYPE_UE 1
-
-#endif /* _ASM_RISCV_SIFIVE_L2_CACHE_H */
diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h
index b58ba2d9ed6e..45a7018a8118 100644
--- a/arch/riscv/include/asm/sparsemem.h
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -1,11 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
-#ifndef __ASM_SPARSEMEM_H
-#define __ASM_SPARSEMEM_H
+#ifndef _ASM_RISCV_SPARSEMEM_H
+#define _ASM_RISCV_SPARSEMEM_H
 
 #ifdef CONFIG_SPARSEMEM
 #define MAX_PHYSMEM_BITS	CONFIG_PA_BITS
 #define SECTION_SIZE_BITS	27
 #endif /* CONFIG_SPARSEMEM */
 
-#endif /* __ASM_SPARSEMEM_H */
+#endif /* _ASM_RISCV_SPARSEMEM_H */
diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h
index 888cbf8e7111..f398e7638dd6 100644
--- a/arch/riscv/include/asm/spinlock_types.h
+++ b/arch/riscv/include/asm/spinlock_types.h
@@ -22,4 +22,4 @@ typedef struct {
 
 #define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
 
-#endif
+#endif /* _ASM_RISCV_SPINLOCK_TYPES_H */
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index ee4f0ac62c9d..407bcc96a710 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -17,19 +17,19 @@ extern void __fstate_restore(struct task_struct *restore_from);
 
 static inline void __fstate_clean(struct pt_regs *regs)
 {
-	regs->sstatus = (regs->sstatus & ~SR_FS) | SR_FS_CLEAN;
+	regs->status = (regs->status & ~SR_FS) | SR_FS_CLEAN;
 }
 
 static inline void fstate_off(struct task_struct *task,
 			      struct pt_regs *regs)
 {
-	regs->sstatus = (regs->sstatus & ~SR_FS) | SR_FS_OFF;
+	regs->status = (regs->status & ~SR_FS) | SR_FS_OFF;
 }
 
 static inline void fstate_save(struct task_struct *task,
 			       struct pt_regs *regs)
 {
-	if ((regs->sstatus & SR_FS) == SR_FS_DIRTY) {
+	if ((regs->status & SR_FS) == SR_FS_DIRTY) {
 		__fstate_save(task);
 		__fstate_clean(regs);
 	}
@@ -38,7 +38,7 @@ static inline void fstate_save(struct task_struct *task,
 static inline void fstate_restore(struct task_struct *task,
 				  struct pt_regs *regs)
 {
-	if ((regs->sstatus & SR_FS) != SR_FS_OFF) {
+	if ((regs->status & SR_FS) != SR_FS_OFF) {
 		__fstate_restore(task);
 		__fstate_clean(regs);
 	}
@@ -50,7 +50,7 @@ static inline void __switch_to_aux(struct task_struct *prev,
 	struct pt_regs *regs;
 
 	regs = task_pt_regs(prev);
-	if (unlikely(regs->sstatus & SR_SD))
+	if (unlikely(regs->status & SR_SD))
 		fstate_save(prev, regs);
 	fstate_restore(next, task_pt_regs(next));
 }
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 905372d7eeb8..1dd12a0cbb2b 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -75,6 +75,7 @@ struct thread_info {
 #define TIF_MEMDIE		5	/* is terminating due to OOM killer */
 #define TIF_SYSCALL_TRACEPOINT  6       /* syscall tracepoint instrumentation */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing */
+#define TIF_SECCOMP		8	/* syscall secure computing */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -82,11 +83,13 @@ struct thread_info {
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 
 #define _TIF_WORK_MASK \
 	(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED)
 
 #define _TIF_SYSCALL_WORK \
-	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
+	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \
+	 _TIF_SECCOMP)
 
 #endif /* _ASM_RISCV_THREAD_INFO_H */
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index c7ef131b9e4c..bad2a7c2cda5 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -7,12 +7,25 @@
 #define _ASM_RISCV_TIMEX_H
 
 #include <asm/csr.h>
+#include <asm/mmio.h>
 
 typedef unsigned long cycles_t;
 
+extern u64 __iomem *riscv_time_val;
+extern u64 __iomem *riscv_time_cmp;
+
+#ifdef CONFIG_64BIT
+#define mmio_get_cycles()	readq_relaxed(riscv_time_val)
+#else
+#define mmio_get_cycles()	readl_relaxed(riscv_time_val)
+#define mmio_get_cycles_hi()	readl_relaxed(((u32 *)riscv_time_val) + 1)
+#endif
+
 static inline cycles_t get_cycles(void)
 {
-	return csr_read(CSR_TIME);
+	if (IS_ENABLED(CONFIG_RISCV_SBI))
+		return csr_read(CSR_TIME);
+	return mmio_get_cycles();
 }
 #define get_cycles get_cycles
 
@@ -24,7 +37,9 @@ static inline u64 get_cycles64(void)
 #else /* CONFIG_64BIT */
 static inline u32 get_cycles_hi(void)
 {
-	return csr_read(CSR_TIMEH);
+	if (IS_ENABLED(CONFIG_RISCV_SBI))
+		return csr_read(CSR_TIMEH);
+	return mmio_get_cycles_hi();
 }
 
 static inline u64 get_cycles64(void)
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index f02188a5b0f4..394cfbccdcd9 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -10,6 +10,7 @@
 #include <linux/mm_types.h>
 #include <asm/smp.h>
 
+#ifdef CONFIG_MMU
 static inline void local_flush_tlb_all(void)
 {
 	__asm__ __volatile__ ("sfence.vma" : : : "memory");
@@ -20,14 +21,19 @@ static inline void local_flush_tlb_page(unsigned long addr)
 {
 	__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory");
 }
+#else /* CONFIG_MMU */
+#define local_flush_tlb_all()			do { } while (0)
+#define local_flush_tlb_page(addr)		do { } while (0)
+#endif /* CONFIG_MMU */
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_MMU)
 void flush_tlb_all(void);
 void flush_tlb_mm(struct mm_struct *mm);
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		     unsigned long end);
-#else /* CONFIG_SMP */
+#else /* CONFIG_SMP && CONFIG_MMU */
+
 #define flush_tlb_all() local_flush_tlb_all()
 #define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
 
@@ -38,7 +44,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 }
 
 #define flush_tlb_mm(mm) flush_tlb_all()
-#endif /* CONFIG_SMP */
+#endif /* !CONFIG_SMP || !CONFIG_MMU */
 
 /* Flush a range of kernel pages */
 static inline void flush_tlb_kernel_range(unsigned long start,
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index e076437cfafe..f462a183a9c2 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -11,6 +11,7 @@
 /*
  * User space memory access functions
  */
+#ifdef CONFIG_MMU
 #include <linux/errno.h>
 #include <linux/compiler.h>
 #include <linux/thread_info.h>
@@ -475,4 +476,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
 	__ret;							\
 })
 
+#else /* CONFIG_MMU */
+#include <asm-generic/uaccess.h>
+#endif /* CONFIG_MMU */
 #endif /* _ASM_RISCV_UACCESS_H */
diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h
new file mode 100644
index 000000000000..ff9abc00d139
--- /dev/null
+++ b/arch/riscv/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_RISCV_VMALLOC_H
+#define _ASM_RISCV_VMALLOC_H
+
+#endif /* _ASM_RISCV_VMALLOC_H */
diff --git a/arch/riscv/include/uapi/asm/bpf_perf_event.h b/arch/riscv/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..6cb1c2823288
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef struct user_regs_struct bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h
index 644a00ce6e2e..d696d6610231 100644
--- a/arch/riscv/include/uapi/asm/elf.h
+++ b/arch/riscv/include/uapi/asm/elf.h
@@ -9,8 +9,8 @@
  * (at your option) any later version.
  */
 
-#ifndef _UAPI_ASM_ELF_H
-#define _UAPI_ASM_ELF_H
+#ifndef _UAPI_ASM_RISCV_ELF_H
+#define _UAPI_ASM_RISCV_ELF_H
 
 #include <asm/ptrace.h>
 
@@ -95,4 +95,4 @@ typedef union __riscv_fp_state elf_fpregset_t;
 #define R_RISCV_32_PCREL	57
 
 
-#endif /* _UAPI_ASM_ELF_H */
+#endif /* _UAPI_ASM_RISCV_ELF_H */
diff --git a/arch/riscv/include/uapi/asm/hwcap.h b/arch/riscv/include/uapi/asm/hwcap.h
index 4e7646077056..dee98ee28318 100644
--- a/arch/riscv/include/uapi/asm/hwcap.h
+++ b/arch/riscv/include/uapi/asm/hwcap.h
@@ -5,8 +5,8 @@
  * Copyright (C) 2012 ARM Ltd.
  * Copyright (C) 2017 SiFive
  */
-#ifndef __UAPI_ASM_HWCAP_H
-#define __UAPI_ASM_HWCAP_H
+#ifndef _UAPI_ASM_RISCV_HWCAP_H
+#define _UAPI_ASM_RISCV_HWCAP_H
 
 /*
  * Linux saves the floating-point registers according to the ISA Linux is
@@ -22,4 +22,4 @@
 #define COMPAT_HWCAP_ISA_D	(1 << ('D' - 'A'))
 #define COMPAT_HWCAP_ISA_C	(1 << ('C' - 'A'))
 
-#endif
+#endif /* _UAPI_ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/include/uapi/asm/ucontext.h b/arch/riscv/include/uapi/asm/ucontext.h
index 411dd7b52ed6..44eb993950e5 100644
--- a/arch/riscv/include/uapi/asm/ucontext.h
+++ b/arch/riscv/include/uapi/asm/ucontext.h
@@ -5,8 +5,8 @@
  *
  * This file was copied from arch/arm64/include/uapi/asm/ucontext.h
  */
-#ifndef _UAPI__ASM_UCONTEXT_H
-#define _UAPI__ASM_UCONTEXT_H
+#ifndef _UAPI_ASM_RISCV_UCONTEXT_H
+#define _UAPI_ASM_RISCV_UCONTEXT_H
 
 #include <linux/types.h>
 
@@ -31,4 +31,4 @@ struct ucontext {
 	struct sigcontext uc_mcontext;
 };
 
-#endif /* _UAPI__ASM_UCONTEXT_H */
+#endif /* _UAPI_ASM_RISCV_UCONTEXT_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 696020ff72db..f40205cb9a22 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -25,10 +25,10 @@ obj-y	+= time.o
 obj-y	+= traps.o
 obj-y	+= riscv_ksyms.o
 obj-y	+= stacktrace.o
-obj-y	+= vdso.o
 obj-y	+= cacheinfo.o
-obj-y	+= vdso/
+obj-$(CONFIG_MMU) += vdso.o vdso/
 
+obj-$(CONFIG_RISCV_M_MODE)	+= clint.o
 obj-$(CONFIG_FPU)		+= fpu.o
 obj-$(CONFIG_SMP)		+= smpboot.o
 obj-$(CONFIG_SMP)		+= smp.o
@@ -41,5 +41,6 @@ obj-$(CONFIG_DYNAMIC_FTRACE)	+= mcount-dyn.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_callchain.o
 obj-$(CONFIG_HAVE_PERF_REGS)	+= perf_regs.o
+obj-$(CONFIG_RISCV_SBI)		+= sbi.o
 
 clean:
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 9f5628c38ac9..07cb9c10de4e 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -71,7 +71,7 @@ void asm_offsets(void)
 	OFFSET(TASK_THREAD_FCSR, task_struct, thread.fstate.fcsr);
 
 	DEFINE(PT_SIZE, sizeof(struct pt_regs));
-	OFFSET(PT_SEPC, pt_regs, sepc);
+	OFFSET(PT_EPC, pt_regs, epc);
 	OFFSET(PT_RA, pt_regs, ra);
 	OFFSET(PT_FP, pt_regs, s0);
 	OFFSET(PT_S0, pt_regs, s0);
@@ -105,9 +105,9 @@ void asm_offsets(void)
 	OFFSET(PT_T6, pt_regs, t6);
 	OFFSET(PT_GP, pt_regs, gp);
 	OFFSET(PT_ORIG_A0, pt_regs, orig_a0);
-	OFFSET(PT_SSTATUS, pt_regs, sstatus);
-	OFFSET(PT_SBADADDR, pt_regs, sbadaddr);
-	OFFSET(PT_SCAUSE, pt_regs, scause);
+	OFFSET(PT_STATUS, pt_regs, status);
+	OFFSET(PT_BADADDR, pt_regs, badaddr);
+	OFFSET(PT_CAUSE, pt_regs, cause);
 
 	/*
 	 * THREAD_{F,X}* might be larger than a S-type offset can handle, but
diff --git a/arch/riscv/kernel/clint.c b/arch/riscv/kernel/clint.c
new file mode 100644
index 000000000000..3647980d14c3
--- /dev/null
+++ b/arch/riscv/kernel/clint.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Christoph Hellwig.
+ */
+
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/types.h>
+#include <asm/clint.h>
+#include <asm/csr.h>
+#include <asm/timex.h>
+#include <asm/smp.h>
+
+/*
+ * This is the layout used by the SiFive clint, which is also shared by the qemu
+ * virt platform, and the Kendryte KD210 at least.
+ */
+#define CLINT_IPI_OFF		0
+#define CLINT_TIME_CMP_OFF	0x4000
+#define CLINT_TIME_VAL_OFF	0xbff8
+
+u32 __iomem *clint_ipi_base;
+
+void clint_init_boot_cpu(void)
+{
+	struct device_node *np;
+	void __iomem *base;
+
+	np = of_find_compatible_node(NULL, NULL, "riscv,clint0");
+	if (!np) {
+		panic("clint not found");
+		return;
+	}
+
+	base = of_iomap(np, 0);
+	if (!base)
+		panic("could not map CLINT");
+
+	clint_ipi_base = base + CLINT_IPI_OFF;
+	riscv_time_cmp = base + CLINT_TIME_CMP_OFF;
+	riscv_time_val = base + CLINT_TIME_VAL_OFF;
+
+	clint_clear_ipi(boot_cpu_hartid);
+}
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 7da3c6a93abd..40a3c442ac5f 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -46,51 +46,12 @@ int riscv_of_processor_hartid(struct device_node *node)
 
 #ifdef CONFIG_PROC_FS
 
-static void print_isa(struct seq_file *f, const char *orig_isa)
+static void print_isa(struct seq_file *f, const char *isa)
 {
-	static const char *ext = "mafdcsu";
-	const char *isa = orig_isa;
-	const char *e;
-
-	/*
-	 * Linux doesn't support rv32e or rv128i, and we only support booting
-	 * kernels on harts with the same ISA that the kernel is compiled for.
-	 */
-#if defined(CONFIG_32BIT)
-	if (strncmp(isa, "rv32i", 5) != 0)
-		return;
-#elif defined(CONFIG_64BIT)
-	if (strncmp(isa, "rv64i", 5) != 0)
-		return;
-#endif
-
-	/* Print the base ISA, as we already know it's legal. */
+	/* Print the entire ISA as it is */
 	seq_puts(f, "isa\t\t: ");
-	seq_write(f, isa, 5);
-	isa += 5;
-
-	/*
-	 * Check the rest of the ISA string for valid extensions, printing those
-	 * we find.  RISC-V ISA strings define an order, so we only print the
-	 * extension bits when they're in order. Hide the supervisor (S)
-	 * extension from userspace as it's not accessible from there.
-	 */
-	for (e = ext; *e != '\0'; ++e) {
-		if (isa[0] == e[0]) {
-			if (isa[0] != 's')
-				seq_write(f, isa, 1);
-
-			isa++;
-		}
-	}
+	seq_write(f, isa, strlen(isa));
 	seq_puts(f, "\n");
-
-	/*
-	 * If we were given an unsupported ISA in the device tree then print
-	 * a bit of info describing what went wrong.
-	 */
-	if (isa[0] != '\0')
-		pr_info("unsupported ISA \"%s\" in device tree\n", orig_isa);
 }
 
 static void print_mmu(struct seq_file *f, const char *mmu_type)
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 8ca479831142..bad4d85b5e91 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -26,14 +26,14 @@
 
 	/*
 	 * If coming from userspace, preserve the user thread pointer and load
-	 * the kernel thread pointer.  If we came from the kernel, sscratch
-	 * will contain 0, and we should continue on the current TP.
+	 * the kernel thread pointer.  If we came from the kernel, the scratch
+	 * register will contain 0, and we should continue on the current TP.
 	 */
-	csrrw tp, CSR_SSCRATCH, tp
+	csrrw tp, CSR_SCRATCH, tp
 	bnez tp, _save_context
 
 _restore_kernel_tpsp:
-	csrr tp, CSR_SSCRATCH
+	csrr tp, CSR_SCRATCH
 	REG_S sp, TASK_TI_KERNEL_SP(tp)
 _save_context:
 	REG_S sp, TASK_TI_USER_SP(tp)
@@ -79,16 +79,16 @@ _save_context:
 	li t0, SR_SUM | SR_FS
 
 	REG_L s0, TASK_TI_USER_SP(tp)
-	csrrc s1, CSR_SSTATUS, t0
-	csrr s2, CSR_SEPC
-	csrr s3, CSR_STVAL
-	csrr s4, CSR_SCAUSE
-	csrr s5, CSR_SSCRATCH
+	csrrc s1, CSR_STATUS, t0
+	csrr s2, CSR_EPC
+	csrr s3, CSR_TVAL
+	csrr s4, CSR_CAUSE
+	csrr s5, CSR_SCRATCH
 	REG_S s0, PT_SP(sp)
-	REG_S s1, PT_SSTATUS(sp)
-	REG_S s2, PT_SEPC(sp)
-	REG_S s3, PT_SBADADDR(sp)
-	REG_S s4, PT_SCAUSE(sp)
+	REG_S s1, PT_STATUS(sp)
+	REG_S s2, PT_EPC(sp)
+	REG_S s3, PT_BADADDR(sp)
+	REG_S s4, PT_CAUSE(sp)
 	REG_S s5, PT_TP(sp)
 	.endm
 
@@ -97,7 +97,7 @@ _save_context:
  * registers from the stack.
  */
 	.macro RESTORE_ALL
-	REG_L a0, PT_SSTATUS(sp)
+	REG_L a0, PT_STATUS(sp)
 	/*
 	 * The current load reservation is effectively part of the processor's
 	 * state, in the sense that load reservations cannot be shared between
@@ -115,11 +115,11 @@ _save_context:
 	 * completes, implementations are allowed to expand reservations to be
 	 * arbitrarily large.
 	 */
-	REG_L  a2, PT_SEPC(sp)
-	REG_SC x0, a2, PT_SEPC(sp)
+	REG_L  a2, PT_EPC(sp)
+	REG_SC x0, a2, PT_EPC(sp)
 
-	csrw CSR_SSTATUS, a0
-	csrw CSR_SEPC, a2
+	csrw CSR_STATUS, a0
+	csrw CSR_EPC, a2
 
 	REG_L x1,  PT_RA(sp)
 	REG_L x3,  PT_GP(sp)
@@ -155,7 +155,7 @@ _save_context:
 	REG_L x2,  PT_SP(sp)
 	.endm
 
-#if !IS_ENABLED(CONFIG_PREEMPT)
+#if !IS_ENABLED(CONFIG_PREEMPTION)
 .set resume_kernel, restore_all
 #endif
 
@@ -163,10 +163,10 @@ ENTRY(handle_exception)
 	SAVE_ALL
 
 	/*
-	 * Set sscratch register to 0, so that if a recursive exception
+	 * Set the scratch register to 0, so that if a recursive exception
 	 * occurs, the exception vector knows it came from the kernel
 	 */
-	csrw CSR_SSCRATCH, x0
+	csrw CSR_SCRATCH, x0
 
 	/* Load the global pointer */
 .option push
@@ -185,11 +185,13 @@ ENTRY(handle_exception)
 	move a0, sp /* pt_regs */
 	tail do_IRQ
 1:
-	/* Exceptions run with interrupts enabled or disabled
-	   depending on the state of sstatus.SR_SPIE */
-	andi t0, s1, SR_SPIE
+	/*
+	 * Exceptions run with interrupts enabled or disabled depending on the
+	 * state of SR_PIE in m/sstatus.
+	 */
+	andi t0, s1, SR_PIE
 	beqz t0, 1f
-	csrs CSR_SSTATUS, SR_SIE
+	csrs CSR_STATUS, SR_IE
 
 1:
 	/* Handle syscalls */
@@ -217,7 +219,7 @@ handle_syscall:
 	 * scall instruction on sret
 	 */
 	addi s2, s2, 0x4
-	REG_S s2, PT_SEPC(sp)
+	REG_S s2, PT_EPC(sp)
 	/* Trace syscalls, but only if requested by the user. */
 	REG_L t0, TASK_TI_FLAGS(tp)
 	andi t0, t0, _TIF_SYSCALL_WORK
@@ -226,8 +228,26 @@ check_syscall_nr:
 	/* Check to make sure we don't jump to a bogus syscall number. */
 	li t0, __NR_syscalls
 	la s0, sys_ni_syscall
-	/* Syscall number held in a7 */
-	bgeu a7, t0, 1f
+	/*
+	 * The tracer can change syscall number to valid/invalid value.
+	 * We use syscall_set_nr helper in syscall_trace_enter thus we
+	 * cannot trust the current value in a7 and have to reload from
+	 * the current task pt_regs.
+	 */
+	REG_L a7, PT_A7(sp)
+	/*
+	 * Syscall number held in a7.
+	 * If syscall number is above allowed value, redirect to ni_syscall.
+	 */
+	bge a7, t0, 1f
+	/*
+	 * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
+	 * If yes, we pretend it was executed.
+	 */
+	li t1, -1
+	beq a7, t1, ret_from_syscall_rejected
+	blt a7, t1, 1f
+	/* Call syscall */
 	la s0, sys_call_table
 	slli t0, a7, RISCV_LGPTR
 	add s0, s0, t0
@@ -238,15 +258,27 @@ check_syscall_nr:
 ret_from_syscall:
 	/* Set user a0 to kernel a0 */
 	REG_S a0, PT_A0(sp)
+	/*
+	 * We didn't execute the actual syscall.
+	 * Seccomp already set return value for the current task pt_regs.
+	 * (If it was configured with SECCOMP_RET_ERRNO/TRACE)
+	 */
+ret_from_syscall_rejected:
 	/* Trace syscalls, but only if requested by the user. */
 	REG_L t0, TASK_TI_FLAGS(tp)
 	andi t0, t0, _TIF_SYSCALL_WORK
 	bnez t0, handle_syscall_trace_exit
 
 ret_from_exception:
-	REG_L s0, PT_SSTATUS(sp)
-	csrc CSR_SSTATUS, SR_SIE
+	REG_L s0, PT_STATUS(sp)
+	csrc CSR_STATUS, SR_IE
+#ifdef CONFIG_RISCV_M_MODE
+	/* the MPP value is too large to be used as an immediate arg for addi */
+	li t0, SR_MPP
+	and s0, s0, t0
+#else
 	andi s0, s0, SR_SPP
+#endif
 	bnez s0, resume_kernel
 
 resume_userspace:
@@ -260,16 +292,20 @@ resume_userspace:
 	REG_S s0, TASK_TI_KERNEL_SP(tp)
 
 	/*
-	 * Save TP into sscratch, so we can find the kernel data structures
-	 * again.
+	 * Save TP into the scratch register , so we can find the kernel data
+	 * structures again.
 	 */
-	csrw CSR_SSCRATCH, tp
+	csrw CSR_SCRATCH, tp
 
 restore_all:
 	RESTORE_ALL
+#ifdef CONFIG_RISCV_M_MODE
+	mret
+#else
 	sret
+#endif
 
-#if IS_ENABLED(CONFIG_PREEMPT)
+#if IS_ENABLED(CONFIG_PREEMPTION)
 resume_kernel:
 	REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
 	bnez s0, restore_all
@@ -287,7 +323,7 @@ work_pending:
 	bnez s1, work_resched
 work_notifysig:
 	/* Handle pending signals and notify-resume requests */
-	csrs CSR_SSTATUS, SR_SIE /* Enable interrupts for do_notify_resume() */
+	csrs CSR_STATUS, SR_IE /* Enable interrupts for do_notify_resume() */
 	move a0, sp /* pt_regs */
 	move a1, s0 /* current_thread_info->flags */
 	tail do_notify_resume
@@ -386,6 +422,10 @@ ENTRY(__switch_to)
 	ret
 ENDPROC(__switch_to)
 
+#ifndef CONFIG_MMU
+#define do_page_fault do_trap_unknown
+#endif
+
 	.section ".rodata"
 	/* Exception vector table */
 ENTRY(excp_vect_table)
@@ -407,3 +447,10 @@ ENTRY(excp_vect_table)
 	RISCV_PTR do_page_fault   /* store page fault */
 excp_vect_table_end:
 END(excp_vect_table)
+
+#ifndef CONFIG_MMU
+ENTRY(__user_rt_sigreturn)
+	li a7, __NR_rt_sigreturn
+	scall
+END(__user_rt_sigreturn)
+#endif
diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S
index 631d31540660..dd2205473de7 100644
--- a/arch/riscv/kernel/fpu.S
+++ b/arch/riscv/kernel/fpu.S
@@ -23,7 +23,7 @@ ENTRY(__fstate_save)
 	li  a2,  TASK_THREAD_F0
 	add a0, a0, a2
 	li t1, SR_FS
-	csrs CSR_SSTATUS, t1
+	csrs CSR_STATUS, t1
 	frcsr t0
 	fsd f0,  TASK_THREAD_F0_F0(a0)
 	fsd f1,  TASK_THREAD_F1_F0(a0)
@@ -58,7 +58,7 @@ ENTRY(__fstate_save)
 	fsd f30, TASK_THREAD_F30_F0(a0)
 	fsd f31, TASK_THREAD_F31_F0(a0)
 	sw t0, TASK_THREAD_FCSR_F0(a0)
-	csrc CSR_SSTATUS, t1
+	csrc CSR_STATUS, t1
 	ret
 ENDPROC(__fstate_save)
 
@@ -67,7 +67,7 @@ ENTRY(__fstate_restore)
 	add a0, a0, a2
 	li t1, SR_FS
 	lw t0, TASK_THREAD_FCSR_F0(a0)
-	csrs CSR_SSTATUS, t1
+	csrs CSR_STATUS, t1
 	fld f0,  TASK_THREAD_F0_F0(a0)
 	fld f1,  TASK_THREAD_F1_F0(a0)
 	fld f2,  TASK_THREAD_F2_F0(a0)
@@ -101,6 +101,6 @@ ENTRY(__fstate_restore)
 	fld f30, TASK_THREAD_F30_F0(a0)
 	fld f31, TASK_THREAD_F31_F0(a0)
 	fscsr t0
-	csrc CSR_SSTATUS, t1
+	csrc CSR_STATUS, t1
 	ret
 ENDPROC(__fstate_restore)
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index b94d8db5ddcc..c40fdcdeb950 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -142,7 +142,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	 */
 	old = *parent;
 
-	if (function_graph_enter(old, self_addr, frame_pointer, parent))
+	if (!function_graph_enter(old, self_addr, frame_pointer, parent))
 		*parent = return_hooker;
 }
 
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 72f89b7590dd..a4242be66966 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -11,6 +11,7 @@
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/csr.h>
+#include <asm/hwcap.h>
 #include <asm/image.h>
 
 __INIT
@@ -47,8 +48,22 @@ ENTRY(_start)
 .global _start_kernel
 _start_kernel:
 	/* Mask all interrupts */
-	csrw CSR_SIE, zero
-	csrw CSR_SIP, zero
+	csrw CSR_IE, zero
+	csrw CSR_IP, zero
+
+#ifdef CONFIG_RISCV_M_MODE
+	/* flush the instruction cache */
+	fence.i
+
+	/* Reset all registers except ra, a0, a1 */
+	call reset_regs
+
+	/*
+	 * The hartid in a0 is expected later on, and we have no firmware
+	 * to hand it to us.
+	 */
+	csrr a0, CSR_MHARTID
+#endif /* CONFIG_RISCV_M_MODE */
 
 	/* Load the global pointer */
 .option push
@@ -61,11 +76,13 @@ _start_kernel:
 	 * floating point in kernel space
 	 */
 	li t0, SR_FS
-	csrc CSR_SSTATUS, t0
+	csrc CSR_STATUS, t0
 
 #ifdef CONFIG_SMP
 	li t0, CONFIG_NR_CPUS
-	bgeu a0, t0, .Lsecondary_park
+	blt a0, t0, .Lgood_cores
+	tail .Lsecondary_park
+.Lgood_cores:
 #endif
 
 	/* Pick one hart to run the main boot sequence */
@@ -94,8 +111,10 @@ clear_bss_done:
 	la sp, init_thread_union + THREAD_SIZE
 	mv a0, s1
 	call setup_vm
+#ifdef CONFIG_MMU
 	la a0, early_pg_dir
 	call relocate
+#endif /* CONFIG_MMU */
 
 	/* Restore C environment */
 	la tp, init_task
@@ -106,6 +125,7 @@ clear_bss_done:
 	call parse_dtb
 	tail start_kernel
 
+#ifdef CONFIG_MMU
 relocate:
 	/* Relocate return address */
 	li a1, PAGE_OFFSET
@@ -116,7 +136,7 @@ relocate:
 	/* Point stvec to virtual address of intruction after satp write */
 	la a2, 1f
 	add a2, a2, a1
-	csrw CSR_STVEC, a2
+	csrw CSR_TVEC, a2
 
 	/* Compute satp for kernel page tables, but don't load it yet */
 	srl a2, a0, PAGE_SHIFT
@@ -138,7 +158,7 @@ relocate:
 1:
 	/* Set trap vector to spin forever to help debug */
 	la a0, .Lsecondary_park
-	csrw CSR_STVEC, a0
+	csrw CSR_TVEC, a0
 
 	/* Reload the global pointer */
 .option push
@@ -156,12 +176,13 @@ relocate:
 	sfence.vma
 
 	ret
+#endif /* CONFIG_MMU */
 
 .Lsecondary_start:
 #ifdef CONFIG_SMP
 	/* Set trap vector to spin forever to help debug */
 	la a3, .Lsecondary_park
-	csrw CSR_STVEC, a3
+	csrw CSR_TVEC, a3
 
 	slli a3, a0, LGREG
 	la a1, __cpu_up_stack_pointer
@@ -181,19 +202,102 @@ relocate:
 	beqz tp, .Lwait_for_cpu_up
 	fence
 
+#ifdef CONFIG_MMU
 	/* Enable virtual memory and relocate to virtual address */
 	la a0, swapper_pg_dir
 	call relocate
+#endif
 
 	tail smp_callin
 #endif
 
+END(_start)
+
+#ifdef CONFIG_RISCV_M_MODE
+ENTRY(reset_regs)
+	li	sp, 0
+	li	gp, 0
+	li	tp, 0
+	li	t0, 0
+	li	t1, 0
+	li	t2, 0
+	li	s0, 0
+	li	s1, 0
+	li	a2, 0
+	li	a3, 0
+	li	a4, 0
+	li	a5, 0
+	li	a6, 0
+	li	a7, 0
+	li	s2, 0
+	li	s3, 0
+	li	s4, 0
+	li	s5, 0
+	li	s6, 0
+	li	s7, 0
+	li	s8, 0
+	li	s9, 0
+	li	s10, 0
+	li	s11, 0
+	li	t3, 0
+	li	t4, 0
+	li	t5, 0
+	li	t6, 0
+	csrw	CSR_SCRATCH, 0
+
+#ifdef CONFIG_FPU
+	csrr	t0, CSR_MISA
+	andi	t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)
+	beqz	t0, .Lreset_regs_done
+
+	li	t1, SR_FS
+	csrs	CSR_STATUS, t1
+	fmv.s.x	f0, zero
+	fmv.s.x	f1, zero
+	fmv.s.x	f2, zero
+	fmv.s.x	f3, zero
+	fmv.s.x	f4, zero
+	fmv.s.x	f5, zero
+	fmv.s.x	f6, zero
+	fmv.s.x	f7, zero
+	fmv.s.x	f8, zero
+	fmv.s.x	f9, zero
+	fmv.s.x	f10, zero
+	fmv.s.x	f11, zero
+	fmv.s.x	f12, zero
+	fmv.s.x	f13, zero
+	fmv.s.x	f14, zero
+	fmv.s.x	f15, zero
+	fmv.s.x	f16, zero
+	fmv.s.x	f17, zero
+	fmv.s.x	f18, zero
+	fmv.s.x	f19, zero
+	fmv.s.x	f20, zero
+	fmv.s.x	f21, zero
+	fmv.s.x	f22, zero
+	fmv.s.x	f23, zero
+	fmv.s.x	f24, zero
+	fmv.s.x	f25, zero
+	fmv.s.x	f26, zero
+	fmv.s.x	f27, zero
+	fmv.s.x	f28, zero
+	fmv.s.x	f29, zero
+	fmv.s.x	f30, zero
+	fmv.s.x	f31, zero
+	csrw	fcsr, 0
+	/* note that the caller must clear SR_FS */
+#endif /* CONFIG_FPU */
+.Lreset_regs_done:
+	ret
+END(reset_regs)
+#endif /* CONFIG_RISCV_M_MODE */
+
+.section ".text", "ax",@progbits
 .align 2
 .Lsecondary_park:
 	/* We lack SMP support or have too many harts, so park this hart */
 	wfi
 	j .Lsecondary_park
-END(_start)
 
 __PAGE_ALIGNED_BSS
 	/* Empty zero page */
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index fffac6ddb0e0..345c4f2eba13 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -11,13 +11,6 @@
 #include <linux/seq_file.h>
 #include <asm/smp.h>
 
-/*
- * Possible interrupt causes:
- */
-#define INTERRUPT_CAUSE_SOFTWARE	IRQ_S_SOFT
-#define INTERRUPT_CAUSE_TIMER		IRQ_S_TIMER
-#define INTERRUPT_CAUSE_EXTERNAL	IRQ_S_EXT
-
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
 	show_ipi_stats(p, prec);
@@ -29,12 +22,12 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs)
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	irq_enter();
-	switch (regs->scause & ~SCAUSE_IRQ_FLAG) {
-	case INTERRUPT_CAUSE_TIMER:
+	switch (regs->cause & ~CAUSE_IRQ_FLAG) {
+	case RV_IRQ_TIMER:
 		riscv_timer_interrupt();
 		break;
 #ifdef CONFIG_SMP
-	case INTERRUPT_CAUSE_SOFTWARE:
+	case RV_IRQ_SOFT:
 		/*
 		 * We only use software interrupts to pass IPIs, so if a non-SMP
 		 * system gets one, then we don't know what to do.
@@ -42,11 +35,11 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs)
 		riscv_software_interrupt();
 		break;
 #endif
-	case INTERRUPT_CAUSE_EXTERNAL:
+	case RV_IRQ_EXT:
 		handle_arch_irq(regs);
 		break;
 	default:
-		pr_alert("unexpected interrupt cause 0x%lx", regs->scause);
+		pr_alert("unexpected interrupt cause 0x%lx", regs->cause);
 		BUG();
 	}
 	irq_exit();
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 70bb94ae61c5..b7401858d872 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -315,8 +315,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 			/* Ignore unresolved weak symbol */
 			if (ELF_ST_BIND(sym->st_info) == STB_WEAK)
 				continue;
-			pr_warning("%s: Unknown symbol %s\n",
-				   me->name, strtab + sym->st_name);
+			pr_warn("%s: Unknown symbol %s\n",
+				me->name, strtab + sym->st_name);
 			return -ENOENT;
 		}
 
diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c
index 8d2804f05cf9..cf190197a22f 100644
--- a/arch/riscv/kernel/perf_callchain.c
+++ b/arch/riscv/kernel/perf_callchain.c
@@ -67,7 +67,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
 		return;
 
 	fp = regs->s0;
-	perf_callchain_store(entry, regs->sepc);
+	perf_callchain_store(entry, regs->epc);
 
 	fp = user_backtrace(entry, fp, regs->ra);
 	while (fp && !(fp & 0x3) && entry->nr < entry->max_stack)
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 85e3c39bb60b..817cf7b0974c 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -35,8 +35,8 @@ void show_regs(struct pt_regs *regs)
 {
 	show_regs_print_info(KERN_DEFAULT);
 
-	pr_cont("sepc: " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
-		regs->sepc, regs->ra, regs->sp);
+	pr_cont("epc: " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
+		regs->epc, regs->ra, regs->sp);
 	pr_cont(" gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n",
 		regs->gp, regs->tp, regs->t0);
 	pr_cont(" t1 : " REG_FMT " t2 : " REG_FMT " s0 : " REG_FMT "\n",
@@ -58,23 +58,23 @@ void show_regs(struct pt_regs *regs)
 	pr_cont(" t5 : " REG_FMT " t6 : " REG_FMT "\n",
 		regs->t5, regs->t6);
 
-	pr_cont("sstatus: " REG_FMT " sbadaddr: " REG_FMT " scause: " REG_FMT "\n",
-		regs->sstatus, regs->sbadaddr, regs->scause);
+	pr_cont("status: " REG_FMT " badaddr: " REG_FMT " cause: " REG_FMT "\n",
+		regs->status, regs->badaddr, regs->cause);
 }
 
 void start_thread(struct pt_regs *regs, unsigned long pc,
 	unsigned long sp)
 {
-	regs->sstatus = SR_SPIE;
+	regs->status = SR_PIE;
 	if (has_fpu) {
-		regs->sstatus |= SR_FS_INITIAL;
+		regs->status |= SR_FS_INITIAL;
 		/*
 		 * Restore the initial value to the FP register
 		 * before starting the user program.
 		 */
 		fstate_restore(current, regs);
 	}
-	regs->sepc = pc;
+	regs->epc = pc;
 	regs->sp = sp;
 	set_fs(USER_DS);
 }
@@ -99,8 +99,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return 0;
 }
 
-int copy_thread(unsigned long clone_flags, unsigned long usp,
-	unsigned long arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+	unsigned long arg, struct task_struct *p, unsigned long tls)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 
@@ -110,7 +110,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		const register unsigned long gp __asm__ ("gp");
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->gp = gp;
-		childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */
+		/* Supervisor/Machine, irqs on: */
+		childregs->status = SR_PP | SR_PIE;
 
 		p->thread.ra = (unsigned long)ret_from_kernel_thread;
 		p->thread.s[0] = usp; /* fn */
@@ -120,7 +121,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		if (usp) /* User fork */
 			childregs->sp = usp;
 		if (clone_flags & CLONE_SETTLS)
-			childregs->tp = childregs->a5;
+			childregs->tp = tls;
 		childregs->a0 = 0; /* Return value of fork() */
 		p->thread.ra = (unsigned long)ret_from_fork;
 	}
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 1252113ef8b2..407464201b91 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -154,6 +154,16 @@ __visible void do_syscall_trace_enter(struct pt_regs *regs)
 		if (tracehook_report_syscall_entry(regs))
 			syscall_set_nr(current, regs, -1);
 
+	/*
+	 * Do the secure computing after ptrace; failures should be fast.
+	 * If this fails we might have return value in a0 from seccomp
+	 * (via SECCOMP_RET_ERRNO/TRACE).
+	 */
+	if (secure_computing() == -1) {
+		syscall_set_nr(current, regs, -1);
+		return;
+	}
+
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_enter(regs, syscall_get_nr(current, regs));
diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c
index aa56bb135ec4..ee5878d968cc 100644
--- a/arch/riscv/kernel/reset.c
+++ b/arch/riscv/kernel/reset.c
@@ -5,12 +5,11 @@
 
 #include <linux/reboot.h>
 #include <linux/pm.h>
-#include <asm/sbi.h>
 
 static void default_power_off(void)
 {
-	sbi_shutdown();
-	while (1);
+	while (1)
+		wait_for_interrupt();
 }
 
 void (*pm_power_off)(void) = default_power_off;
diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c
index 4800cf703186..2a02b7eebee0 100644
--- a/arch/riscv/kernel/riscv_ksyms.c
+++ b/arch/riscv/kernel/riscv_ksyms.c
@@ -9,8 +9,5 @@
 /*
  * Assembly functions that may be used (directly or indirectly) by modules
  */
-EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(__asm_copy_to_user);
-EXPORT_SYMBOL(__asm_copy_from_user);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memcpy);
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
new file mode 100644
index 000000000000..f6c7c3e82d28
--- /dev/null
+++ b/arch/riscv/kernel/sbi.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <asm/sbi.h>
+
+static void sbi_power_off(void)
+{
+	sbi_shutdown();
+}
+
+static int __init sbi_init(void)
+{
+	pm_power_off = sbi_power_off;
+	return 0;
+}
+early_initcall(sbi_init);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 845ae0e12115..9babfcf3bb70 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -17,6 +17,7 @@
 #include <linux/sched/task.h>
 #include <linux/swiotlb.h>
 
+#include <asm/clint.h>
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
@@ -67,6 +68,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_bootmem();
 	paging_init();
 	unflatten_device_tree();
+	clint_init_boot_cpu();
 
 #ifdef CONFIG_SWIOTLB
 	swiotlb_init(1);
@@ -76,9 +78,5 @@ void __init setup_arch(char **cmdline_p)
 	setup_smp();
 #endif
 
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-
 	riscv_fill_hwcap();
 }
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index d0f6f212f5df..17ba190e84a5 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -17,11 +17,16 @@
 #include <asm/switch_to.h>
 #include <asm/csr.h>
 
+extern u32 __user_rt_sigreturn[2];
+
 #define DEBUG_SIG 0
 
 struct rt_sigframe {
 	struct siginfo info;
 	struct ucontext uc;
+#ifndef CONFIG_MMU
+	u32 sigreturn_code[2];
+#endif
 };
 
 #ifdef CONFIG_FPU
@@ -124,7 +129,7 @@ badframe:
 		pr_info_ratelimited(
 			"%s[%d]: bad frame in %s: frame=%p pc=%p sp=%p\n",
 			task->comm, task_pid_nr(task), __func__,
-			frame, (void *)regs->sepc, (void *)regs->sp);
+			frame, (void *)regs->epc, (void *)regs->sp);
 	}
 	force_sig(SIGSEGV);
 	return 0;
@@ -166,7 +171,6 @@ static inline void __user *get_sigframe(struct ksignal *ksig,
 	return (void __user *)sp;
 }
 
-
 static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	struct pt_regs *regs)
 {
@@ -189,8 +193,19 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 		return -EFAULT;
 
 	/* Set up to return from userspace. */
+#ifdef CONFIG_MMU
 	regs->ra = (unsigned long)VDSO_SYMBOL(
 		current->mm->context.vdso, rt_sigreturn);
+#else
+	/*
+	 * For the nommu case we don't have a VDSO.  Instead we push two
+	 * instructions to call the rt_sigreturn syscall onto the user stack.
+	 */
+	if (copy_to_user(&frame->sigreturn_code, __user_rt_sigreturn,
+			 sizeof(frame->sigreturn_code)))
+		return -EFAULT;
+	regs->ra = (unsigned long)&frame->sigreturn_code;
+#endif /* CONFIG_MMU */
 
 	/*
 	 * Set up registers for signal handler.
@@ -199,7 +214,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	 * We always pass siginfo and mcontext, regardless of SA_SIGINFO,
 	 * since some things rely on this (e.g. glibc's debug/segfault.c).
 	 */
-	regs->sepc = (unsigned long)ksig->ka.sa.sa_handler;
+	regs->epc = (unsigned long)ksig->ka.sa.sa_handler;
 	regs->sp = (unsigned long)frame;
 	regs->a0 = ksig->sig;                     /* a0: signal number */
 	regs->a1 = (unsigned long)(&frame->info); /* a1: siginfo pointer */
@@ -208,7 +223,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 #if DEBUG_SIG
 	pr_info("SIG deliver (%s:%d): sig=%d pc=%p ra=%p sp=%p\n",
 		current->comm, task_pid_nr(current), ksig->sig,
-		(void *)regs->sepc, (void *)regs->ra, frame);
+		(void *)regs->epc, (void *)regs->ra, frame);
 #endif
 
 	return 0;
@@ -220,10 +235,9 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	int ret;
 
 	/* Are we from a system call? */
-	if (regs->scause == EXC_SYSCALL) {
+	if (regs->cause == EXC_SYSCALL) {
 		/* Avoid additional syscall restarting via ret_from_exception */
-		regs->scause = -1UL;
-
+		regs->cause = -1UL;
 		/* If so, check system call restarting.. */
 		switch (regs->a0) {
 		case -ERESTART_RESTARTBLOCK:
@@ -239,7 +253,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 			/* fallthrough */
 		case -ERESTARTNOINTR:
                         regs->a0 = regs->orig_a0;
-			regs->sepc -= 0x4;
+			regs->epc -= 0x4;
 			break;
 		}
 	}
@@ -261,9 +275,9 @@ static void do_signal(struct pt_regs *regs)
 	}
 
 	/* Did we come from a system call? */
-	if (regs->scause == EXC_SYSCALL) {
+	if (regs->cause == EXC_SYSCALL) {
 		/* Avoid additional syscall restarting via ret_from_exception */
-		regs->scause = -1UL;
+		regs->cause = -1UL;
 
 		/* Restart the system call - no handlers present */
 		switch (regs->a0) {
@@ -271,12 +285,12 @@ static void do_signal(struct pt_regs *regs)
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
                         regs->a0 = regs->orig_a0;
-			regs->sepc -= 0x4;
+			regs->epc -= 0x4;
 			break;
 		case -ERESTART_RESTARTBLOCK:
                         regs->a0 = regs->orig_a0;
 			regs->a7 = __NR_restart_syscall;
-			regs->sepc -= 0x4;
+			regs->epc -= 0x4;
 			break;
 		}
 	}
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 5c9ec78422c2..eb878abcaaf8 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -16,6 +16,7 @@
 #include <linux/seq_file.h>
 #include <linux/delay.h>
 
+#include <asm/clint.h>
 #include <asm/sbi.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
@@ -92,7 +93,10 @@ static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op)
 	smp_mb__after_atomic();
 
 	riscv_cpuid_to_hartid_mask(mask, &hartid_mask);
-	sbi_send_ipi(cpumask_bits(&hartid_mask));
+	if (IS_ENABLED(CONFIG_RISCV_SBI))
+		sbi_send_ipi(cpumask_bits(&hartid_mask));
+	else
+		clint_send_ipi_mask(&hartid_mask);
 }
 
 static void send_ipi_single(int cpu, enum ipi_message_type op)
@@ -103,12 +107,18 @@ static void send_ipi_single(int cpu, enum ipi_message_type op)
 	set_bit(op, &ipi_data[cpu].bits);
 	smp_mb__after_atomic();
 
-	sbi_send_ipi(cpumask_bits(cpumask_of(hartid)));
+	if (IS_ENABLED(CONFIG_RISCV_SBI))
+		sbi_send_ipi(cpumask_bits(cpumask_of(hartid)));
+	else
+		clint_send_ipi_single(hartid);
 }
 
 static inline void clear_ipi(void)
 {
-	csr_clear(CSR_SIP, SIE_SSIE);
+	if (IS_ENABLED(CONFIG_RISCV_SBI))
+		csr_clear(CSR_IP, IE_SIE);
+	else
+		clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
 }
 
 void riscv_software_interrupt(void)
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 261f4087cc39..8bc01f0ca73b 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -24,6 +24,7 @@
 #include <linux/of.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/mm.h>
+#include <asm/clint.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -137,6 +138,9 @@ asmlinkage __visible void __init smp_callin(void)
 {
 	struct mm_struct *mm = &init_mm;
 
+	if (!IS_ENABLED(CONFIG_RISCV_SBI))
+		clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
+
 	/* All kernel threads share the same mm context.  */
 	mmgrab(mm);
 	current->active_mm = mm;
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 473de3ae8bb7..f4cad5163bf2 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -41,7 +41,7 @@ void die(struct pt_regs *regs, const char *str)
 	print_modules();
 	show_regs(regs);
 
-	ret = notify_die(DIE_OOPS, str, regs, 0, regs->scause, SIGSEGV);
+	ret = notify_die(DIE_OOPS, str, regs, 0, regs->cause, SIGSEGV);
 
 	bust_spinlocks(0);
 	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
@@ -86,7 +86,7 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code,
 #define DO_ERROR_INFO(name, signo, code, str)				\
 asmlinkage __visible void name(struct pt_regs *regs)			\
 {									\
-	do_trap_error(regs, signo, code, regs->sepc, "Oops - " str);	\
+	do_trap_error(regs, signo, code, regs->epc, "Oops - " str);	\
 }
 
 DO_ERROR_INFO(do_trap_unknown,
@@ -124,9 +124,9 @@ static inline unsigned long get_break_insn_length(unsigned long pc)
 asmlinkage __visible void do_trap_break(struct pt_regs *regs)
 {
 	if (user_mode(regs))
-		force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->sepc);
-	else if (report_bug(regs->sepc, regs) == BUG_TRAP_TYPE_WARN)
-		regs->sepc += get_break_insn_length(regs->sepc);
+		force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->epc);
+	else if (report_bug(regs->epc, regs) == BUG_TRAP_TYPE_WARN)
+		regs->epc += get_break_insn_length(regs->epc);
 	else
 		die(regs, "Kernel BUG");
 }
@@ -153,9 +153,9 @@ void __init trap_init(void)
 	 * Set sup0 scratch register to 0, indicating to exception vector
 	 * that we are presently executing in the kernel
 	 */
-	csr_write(CSR_SSCRATCH, 0);
+	csr_write(CSR_SCRATCH, 0);
 	/* Set the exception vector address */
-	csr_write(CSR_STVEC, &handle_exception);
+	csr_write(CSR_TVEC, &handle_exception);
 	/* Enable all interrupts */
-	csr_write(CSR_SIE, -1);
+	csr_write(CSR_IE, -1);
 }
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 49a5852fd07d..33b16f4212f7 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -58,7 +58,8 @@ quiet_cmd_vdsold = VDSOLD  $@
       cmd_vdsold = $(CC) $(KBUILD_CFLAGS) $(call cc-option, -no-pie) -nostdlib -nostartfiles $(SYSCFLAGS_$(@F)) \
                            -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp && \
                    $(CROSS_COMPILE)objcopy \
-                           $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@
+                           $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
+                   rm $@.tmp
 
 # install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 23cd1a9e52a1..12f42f96d46e 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -52,12 +52,12 @@ SECTIONS
 
 	/* Start of data section */
 	_sdata = .;
-	RO_DATA_SECTION(L1_CACHE_BYTES)
+	RO_DATA(L1_CACHE_BYTES)
 	.srodata : {
 		*(.srodata*)
 	}
 
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 	.sdata : {
 		__global_pointer$ = . + 0x800;
 		*(.sdata*)
@@ -69,7 +69,6 @@ SECTIONS
 	BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
 
 	EXCEPTION_TABLE(0x10)
-	NOTES
 
 	.rel.dyn : {
 		*(.rel.dyn*)
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 267feaa10f6a..47e7a8204460 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
-lib-y	+= delay.o
-lib-y	+= memcpy.o
-lib-y	+= memset.o
-lib-y	+= uaccess.o
-
-lib-$(CONFIG_64BIT) += tishift.o
+lib-y			+= delay.o
+lib-y			+= memcpy.o
+lib-y			+= memset.o
+lib-$(CONFIG_MMU)	+= uaccess.o
+lib-$(CONFIG_64BIT)	+= tishift.o
diff --git a/arch/riscv/lib/tishift.S b/arch/riscv/lib/tishift.S
index 15f9d54c7db6..ef90075c4b0a 100644
--- a/arch/riscv/lib/tishift.S
+++ b/arch/riscv/lib/tishift.S
@@ -4,34 +4,73 @@
  */
 
 #include <linux/linkage.h>
+#include <asm-generic/export.h>
 
-ENTRY(__lshrti3)
+SYM_FUNC_START(__lshrti3)
 	beqz	a2, .L1
 	li	a5,64
 	sub	a5,a5,a2
-	addi	sp,sp,-16
 	sext.w	a4,a5
 	blez	a5, .L2
 	sext.w	a2,a2
-	sll	a4,a1,a4
 	srl	a0,a0,a2
-	srl	a1,a1,a2
+	sll	a4,a1,a4
+	srl	a2,a1,a2
 	or	a0,a0,a4
-	sd	a1,8(sp)
-	sd	a0,0(sp)
-	ld	a0,0(sp)
-	ld	a1,8(sp)
-	addi	sp,sp,16
-	ret
+	mv	a1,a2
 .L1:
 	ret
 .L2:
-	negw	a4,a4
-	srl	a1,a1,a4
-	sd	a1,0(sp)
-	sd	zero,8(sp)
-	ld	a0,0(sp)
-	ld	a1,8(sp)
-	addi	sp,sp,16
+	negw	a0,a4
+	li	a2,0
+	srl	a0,a1,a0
+	mv	a1,a2
+	ret
+SYM_FUNC_END(__lshrti3)
+EXPORT_SYMBOL(__lshrti3)
+
+SYM_FUNC_START(__ashrti3)
+	beqz	a2, .L3
+	li	a5,64
+	sub	a5,a5,a2
+	sext.w	a4,a5
+	blez	a5, .L4
+	sext.w	a2,a2
+	srl	a0,a0,a2
+	sll	a4,a1,a4
+	sra	a2,a1,a2
+	or	a0,a0,a4
+	mv	a1,a2
+.L3:
+	ret
+.L4:
+	negw	a0,a4
+	srai	a2,a1,0x3f
+	sra	a0,a1,a0
+	mv	a1,a2
+	ret
+SYM_FUNC_END(__ashrti3)
+EXPORT_SYMBOL(__ashrti3)
+
+SYM_FUNC_START(__ashlti3)
+	beqz	a2, .L5
+	li	a5,64
+	sub	a5,a5,a2
+	sext.w	a4,a5
+	blez	a5, .L6
+	sext.w	a2,a2
+	sll	a1,a1,a2
+	srl	a4,a0,a4
+	sll	a2,a0,a2
+	or	a1,a1,a4
+	mv	a0,a2
+.L5:
+	ret
+.L6:
+	negw	a1,a4
+	li	a2,0
+	sll	a1,a0,a1
+	mv	a0,a2
 	ret
-ENDPROC(__lshrti3)
+SYM_FUNC_END(__ashlti3)
+EXPORT_SYMBOL(__ashlti3)
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index ed2696c0143d..f29d2ba2c0a6 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -1,4 +1,5 @@
 #include <linux/linkage.h>
+#include <asm-generic/export.h>
 #include <asm/asm.h>
 #include <asm/csr.h>
 
@@ -18,7 +19,7 @@ ENTRY(__asm_copy_from_user)
 
 	/* Enable access to user memory */
 	li t6, SR_SUM
-	csrs CSR_SSTATUS, t6
+	csrs CSR_STATUS, t6
 
 	add a3, a1, a2
 	/* Use word-oriented copy only if low-order bits match */
@@ -47,7 +48,7 @@ ENTRY(__asm_copy_from_user)
 
 3:
 	/* Disable access to user memory */
-	csrc CSR_SSTATUS, t6
+	csrc CSR_STATUS, t6
 	li a0, 0
 	ret
 4: /* Edge case: unalignment */
@@ -66,13 +67,15 @@ ENTRY(__asm_copy_from_user)
 	j 3b
 ENDPROC(__asm_copy_to_user)
 ENDPROC(__asm_copy_from_user)
+EXPORT_SYMBOL(__asm_copy_to_user)
+EXPORT_SYMBOL(__asm_copy_from_user)
 
 
 ENTRY(__clear_user)
 
 	/* Enable access to user memory */
 	li t6, SR_SUM
-	csrs CSR_SSTATUS, t6
+	csrs CSR_STATUS, t6
 
 	add a3, a0, a1
 	addi t0, a0, SZREG-1
@@ -94,7 +97,7 @@ ENTRY(__clear_user)
 
 3:
 	/* Disable access to user memory */
-	csrc CSR_SSTATUS, t6
+	csrc CSR_STATUS, t6
 	li a0, 0
 	ret
 4: /* Edge case: unalignment */
@@ -108,17 +111,18 @@ ENTRY(__clear_user)
 	bltu a0, a3, 5b
 	j 3b
 ENDPROC(__clear_user)
+EXPORT_SYMBOL(__clear_user)
 
 	.section .fixup,"ax"
 	.balign 4
 	/* Fixup code for __copy_user(10) and __clear_user(11) */
 10:
 	/* Disable access to user memory */
-	csrs CSR_SSTATUS, t6
+	csrs CSR_STATUS, t6
 	mv a0, a2
 	ret
 11:
-	csrs CSR_SSTATUS, t6
+	csrs CSR_STATUS, t6
 	mv a0, a1
 	ret
 	.previous
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 9d9a17335686..a1bd95c8047a 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -6,12 +6,10 @@ CFLAGS_REMOVE_init.o = -pg
 endif
 
 obj-y += init.o
-obj-y += fault.o
 obj-y += extable.o
-obj-y += ioremap.o
+obj-$(CONFIG_MMU) += fault.o
 obj-y += cacheflush.o
 obj-y += context.o
-obj-y += sifive_l2_cache.o
 
 ifeq ($(CONFIG_MMU),y)
 obj-$(CONFIG_SMP) += tlbflush.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 3f15938dec89..8930ab7278e6 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -10,10 +10,19 @@
 
 #include <asm/sbi.h>
 
+static void ipi_remote_fence_i(void *info)
+{
+	return local_flush_icache_all();
+}
+
 void flush_icache_all(void)
 {
-	sbi_remote_fence_i(NULL);
+	if (IS_ENABLED(CONFIG_RISCV_SBI))
+		sbi_remote_fence_i(NULL);
+	else
+		on_each_cpu(ipi_remote_fence_i, NULL, 1);
 }
+EXPORT_SYMBOL(flush_icache_all);
 
 /*
  * Performs an icache flush for the given MM context.  RISC-V has no direct
@@ -28,7 +37,7 @@ void flush_icache_all(void)
 void flush_icache_mm(struct mm_struct *mm, bool local)
 {
 	unsigned int cpu;
-	cpumask_t others, hmask, *mask;
+	cpumask_t others, *mask;
 
 	preempt_disable();
 
@@ -46,10 +55,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
 	 */
 	cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
 	local |= cpumask_empty(&others);
-	if (mm != current->active_mm || !local) {
-		riscv_cpuid_to_hartid_mask(&others, &hmask);
-		sbi_remote_fence_i(hmask.bits);
-	} else {
+	if (mm == current->active_mm && local) {
 		/*
 		 * It's assumed that at least one strongly ordered operation is
 		 * performed on this hart between setting a hart's cpumask bit
@@ -59,6 +65,13 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
 		 * with flush_icache_deferred().
 		 */
 		smp_mb();
+	} else if (IS_ENABLED(CONFIG_RISCV_SBI)) {
+		cpumask_t hartid_mask;
+
+		riscv_cpuid_to_hartid_mask(&others, &hartid_mask);
+		sbi_remote_fence_i(cpumask_bits(&hartid_mask));
+	} else {
+		on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1);
 	}
 
 	preempt_enable();
@@ -66,6 +79,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
 
 #endif /* CONFIG_SMP */
 
+#ifdef CONFIG_MMU
 void flush_icache_pte(pte_t pte)
 {
 	struct page *page = pte_page(pte);
@@ -73,3 +87,4 @@ void flush_icache_pte(pte_t pte)
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
 		flush_icache_all();
 }
+#endif /* CONFIG_MMU */
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index ca66d44156b6..613ec81a8979 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -58,8 +58,10 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	cpumask_clear_cpu(cpu, mm_cpumask(prev));
 	cpumask_set_cpu(cpu, mm_cpumask(next));
 
+#ifdef CONFIG_MMU
 	csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
 	local_flush_tlb_all();
+#endif
 
 	flush_icache_deferred(next);
 }
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index 7aed9178d365..2fc729422151 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -15,9 +15,9 @@ int fixup_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *fixup;
 
-	fixup = search_exception_tables(regs->sepc);
+	fixup = search_exception_tables(regs->epc);
 	if (fixup) {
-		regs->sepc = fixup->fixup;
+		regs->epc = fixup->fixup;
 		return 1;
 	}
 	return 0;
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 247b8c859c44..cf7248e07f43 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -34,8 +34,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
 	int code = SEGV_MAPERR;
 	vm_fault_t fault;
 
-	cause = regs->scause;
-	addr = regs->sbadaddr;
+	cause = regs->cause;
+	addr = regs->badaddr;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -53,7 +53,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
 		goto vmalloc_fault;
 
 	/* Enable interrupts if they were enabled in the parent context. */
-	if (likely(regs->sstatus & SR_SPIE))
+	if (likely(regs->status & SR_PIE))
 		local_irq_enable();
 
 	/*
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 573463d1c799..965a8cf4829c 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -26,6 +26,7 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
 EXPORT_SYMBOL(empty_zero_page);
 
 extern char _start[];
+void *dtb_early_va;
 
 static void __init zone_sizes_init(void)
 {
@@ -40,11 +41,42 @@ static void __init zone_sizes_init(void)
 	free_area_init_nodes(max_zone_pfns);
 }
 
-void setup_zero_page(void)
+static void setup_zero_page(void)
 {
 	memset((void *)empty_zero_page, 0, PAGE_SIZE);
 }
 
+#ifdef CONFIG_DEBUG_VM
+static inline void print_mlk(char *name, unsigned long b, unsigned long t)
+{
+	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld kB)\n", name, b, t,
+		  (((t) - (b)) >> 10));
+}
+
+static inline void print_mlm(char *name, unsigned long b, unsigned long t)
+{
+	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld MB)\n", name, b, t,
+		  (((t) - (b)) >> 20));
+}
+
+static void print_vm_layout(void)
+{
+	pr_notice("Virtual kernel memory layout:\n");
+	print_mlk("fixmap", (unsigned long)FIXADDR_START,
+		  (unsigned long)FIXADDR_TOP);
+	print_mlm("pci io", (unsigned long)PCI_IO_START,
+		  (unsigned long)PCI_IO_END);
+	print_mlm("vmemmap", (unsigned long)VMEMMAP_START,
+		  (unsigned long)VMEMMAP_END);
+	print_mlm("vmalloc", (unsigned long)VMALLOC_START,
+		  (unsigned long)VMALLOC_END);
+	print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
+		  (unsigned long)high_memory);
+}
+#else
+static void print_vm_layout(void) { }
+#endif /* CONFIG_DEBUG_VM */
+
 void __init mem_init(void)
 {
 #ifdef CONFIG_FLATMEM
@@ -55,6 +87,7 @@ void __init mem_init(void)
 	memblock_free_all();
 
 	mem_init_print_info(NULL);
+	print_vm_layout();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -66,13 +99,13 @@ static void __init setup_initrd(void)
 		pr_info("initrd not found or empty");
 		goto disable;
 	}
-	if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) {
+	if (__pa_symbol(initrd_end) > PFN_PHYS(max_low_pfn)) {
 		pr_err("initrd extends beyond end of memory");
 		goto disable;
 	}
 
 	size = initrd_end - initrd_start;
-	memblock_reserve(__pa(initrd_start), size);
+	memblock_reserve(__pa_symbol(initrd_start), size);
 	initrd_below_start_ok = 1;
 
 	pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
@@ -91,8 +124,8 @@ void __init setup_bootmem(void)
 {
 	struct memblock_region *reg;
 	phys_addr_t mem_size = 0;
-	phys_addr_t vmlinux_end = __pa(&_end);
-	phys_addr_t vmlinux_start = __pa(&_start);
+	phys_addr_t vmlinux_end = __pa_symbol(&_end);
+	phys_addr_t vmlinux_start = __pa_symbol(&_start);
 
 	/* Find the memory region containing the kernel */
 	for_each_memblock(memory, reg) {
@@ -142,12 +175,12 @@ void __init setup_bootmem(void)
 	}
 }
 
+#ifdef CONFIG_MMU
 unsigned long va_pa_offset;
 EXPORT_SYMBOL(va_pa_offset);
 unsigned long pfn_base;
 EXPORT_SYMBOL(pfn_base);
 
-void *dtb_early_va;
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
@@ -273,7 +306,6 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
 #define get_pgd_next_virt(__pa)	get_pmd_virt(__pa)
 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
 	create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
-#define PTE_PARENT_SIZE		PMD_SIZE
 #define fixmap_pgd_next		fixmap_pmd
 #else
 #define pgd_next_t		pte_t
@@ -281,7 +313,6 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
 #define get_pgd_next_virt(__pa)	get_pte_virt(__pa)
 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
 	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
-#define PTE_PARENT_SIZE		PGDIR_SIZE
 #define fixmap_pgd_next		fixmap_pte
 #endif
 
@@ -314,14 +345,11 @@ static void __init create_pgd_mapping(pgd_t *pgdp,
 
 static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
 {
-	uintptr_t map_size = PAGE_SIZE;
-
-	/* Upgrade to PMD/PGDIR mappings whenever possible */
-	if (!(base & (PTE_PARENT_SIZE - 1)) &&
-	    !(size & (PTE_PARENT_SIZE - 1)))
-		map_size = PTE_PARENT_SIZE;
+	/* Upgrade to PMD_SIZE mappings whenever possible */
+	if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1)))
+		return PAGE_SIZE;
 
-	return map_size;
+	return PMD_SIZE;
 }
 
 /*
@@ -417,7 +445,7 @@ static void __init setup_vm_final(void)
 
 	/* Setup swapper PGD for fixmap */
 	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
-			   __pa(fixmap_pgd_next),
+			   __pa_symbol(fixmap_pgd_next),
 			   PGDIR_SIZE, PAGE_TABLE);
 
 	/* Map all memory banks */
@@ -446,9 +474,19 @@ static void __init setup_vm_final(void)
 	clear_fixmap(FIX_PMD);
 
 	/* Move to swapper page table */
-	csr_write(CSR_SATP, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE);
+	csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
 	local_flush_tlb_all();
 }
+#else
+asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+{
+	dtb_early_va = (void *)dtb_pa;
+}
+
+static inline void setup_vm_final(void)
+{
+}
+#endif /* CONFIG_MMU */
 
 void __init paging_init(void)
 {
diff --git a/arch/riscv/mm/ioremap.c b/arch/riscv/mm/ioremap.c
deleted file mode 100644
index ac621ddb45c0..000000000000
--- a/arch/riscv/mm/ioremap.c
+++ /dev/null
@@ -1,84 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * (C) Copyright 1995 1996 Linus Torvalds
- * (C) Copyright 2012 Regents of the University of California
- */
-
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <linux/io.h>
-
-#include <asm/pgtable.h>
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-static void __iomem *__ioremap_caller(phys_addr_t addr, size_t size,
-	pgprot_t prot, void *caller)
-{
-	phys_addr_t last_addr;
-	unsigned long offset, vaddr;
-	struct vm_struct *area;
-
-	/* Disallow wrap-around or zero size */
-	last_addr = addr + size - 1;
-	if (!size || last_addr < addr)
-		return NULL;
-
-	/* Page-align mappings */
-	offset = addr & (~PAGE_MASK);
-	addr -= offset;
-	size = PAGE_ALIGN(size + offset);
-
-	area = get_vm_area_caller(size, VM_IOREMAP, caller);
-	if (!area)
-		return NULL;
-	vaddr = (unsigned long)area->addr;
-
-	if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
-		free_vm_area(area);
-		return NULL;
-	}
-
-	return (void __iomem *)(vaddr + offset);
-}
-
-/*
- * ioremap     -   map bus memory into CPU space
- * @offset:    bus address of the memory
- * @size:      size of the resource to map
- *
- * ioremap performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * Must be freed with iounmap.
- */
-void __iomem *ioremap(phys_addr_t offset, unsigned long size)
-{
-	return __ioremap_caller(offset, size, PAGE_KERNEL,
-		__builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap);
-
-
-/**
- * iounmap - Free a IO remapping
- * @addr: virtual address from ioremap_*
- *
- * Caller must ensure there is only one unmapping for the same pointer.
- */
-void iounmap(volatile void __iomem *addr)
-{
-	vunmap((void *)((unsigned long)addr & PAGE_MASK));
-}
-EXPORT_SYMBOL(iounmap);
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 24cd33d2c48f..720b443c4528 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -2,6 +2,7 @@
 
 #include <linux/mm.h>
 #include <linux/smp.h>
+#include <linux/sched.h>
 #include <asm/sbi.h>
 
 void flush_tlb_all(void)
@@ -9,13 +10,33 @@ void flush_tlb_all(void)
 	sbi_remote_sfence_vma(NULL, 0, -1);
 }
 
+/*
+ * This function must not be called with cmask being null.
+ * Kernel may panic if cmask is NULL.
+ */
 static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start,
 				  unsigned long size)
 {
 	struct cpumask hmask;
+	unsigned int cpuid;
 
-	riscv_cpuid_to_hartid_mask(cmask, &hmask);
-	sbi_remote_sfence_vma(hmask.bits, start, size);
+	if (cpumask_empty(cmask))
+		return;
+
+	cpuid = get_cpu();
+
+	if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) {
+		/* local cpu is the only cpu present in cpumask */
+		if (size <= PAGE_SIZE)
+			local_flush_tlb_page(start);
+		else
+			local_flush_tlb_all();
+	} else {
+		riscv_cpuid_to_hartid_mask(cmask, &hmask);
+		sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size);
+	}
+
+	put_cpu();
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c
index 5451ef3845f2..483f4ad7f4dc 100644
--- a/arch/riscv/net/bpf_jit_comp.c
+++ b/arch/riscv/net/bpf_jit_comp.c
@@ -120,6 +120,11 @@ static bool seen_reg(int reg, struct rv_jit_context *ctx)
 	return false;
 }
 
+static void mark_fp(struct rv_jit_context *ctx)
+{
+	__set_bit(RV_CTX_F_SEEN_S5, &ctx->flags);
+}
+
 static void mark_call(struct rv_jit_context *ctx)
 {
 	__set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
@@ -456,6 +461,11 @@ static u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
 	return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
 }
 
+static u32 rv_auipc(u8 rd, u32 imm31_12)
+{
+	return rv_u_insn(imm31_12, rd, 0x17);
+}
+
 static bool is_12b_int(s64 val)
 {
 	return -(1 << 11) <= val && val < (1 << 11);
@@ -479,27 +489,7 @@ static bool is_32b_int(s64 val)
 static int is_12b_check(int off, int insn)
 {
 	if (!is_12b_int(off)) {
-		pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
-		       insn, (int)off);
-		return -1;
-	}
-	return 0;
-}
-
-static int is_13b_check(int off, int insn)
-{
-	if (!is_13b_int(off)) {
-		pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
-		       insn, (int)off);
-		return -1;
-	}
-	return 0;
-}
-
-static int is_21b_check(int off, int insn)
-{
-	if (!is_21b_int(off)) {
-		pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
+		pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n",
 		       insn, (int)off);
 		return -1;
 	}
@@ -545,10 +535,13 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
 		emit(rv_addi(rd, rd, lower), ctx);
 }
 
-static int rv_offset(int bpf_to, int bpf_from, struct rv_jit_context *ctx)
+static int rv_offset(int insn, int off, struct rv_jit_context *ctx)
 {
-	int from = ctx->offset[bpf_from] - 1, to = ctx->offset[bpf_to];
+	int from, to;
 
+	off++; /* BPF branch is from PC+1, RV is from PC */
+	from = (insn > 0) ? ctx->offset[insn - 1] : 0;
+	to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
 	return (to - from) << 2;
 }
 
@@ -559,7 +552,7 @@ static int epilogue_offset(struct rv_jit_context *ctx)
 	return (to - from) << 2;
 }
 
-static void __build_epilogue(u8 reg, struct rv_jit_context *ctx)
+static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
 {
 	int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
 
@@ -596,8 +589,114 @@ static void __build_epilogue(u8 reg, struct rv_jit_context *ctx)
 
 	emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
 	/* Set return value. */
-	emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
-	emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx);
+	if (!is_tail_call)
+		emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
+	emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
+		     is_tail_call ? 4 : 0), /* skip TCC init */
+	     ctx);
+}
+
+/* return -1 or inverted cond */
+static int invert_bpf_cond(u8 cond)
+{
+	switch (cond) {
+	case BPF_JEQ:
+		return BPF_JNE;
+	case BPF_JGT:
+		return BPF_JLE;
+	case BPF_JLT:
+		return BPF_JGE;
+	case BPF_JGE:
+		return BPF_JLT;
+	case BPF_JLE:
+		return BPF_JGT;
+	case BPF_JNE:
+		return BPF_JEQ;
+	case BPF_JSGT:
+		return BPF_JSLE;
+	case BPF_JSLT:
+		return BPF_JSGE;
+	case BPF_JSGE:
+		return BPF_JSLT;
+	case BPF_JSLE:
+		return BPF_JSGT;
+	}
+	return -1;
+}
+
+static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
+		     struct rv_jit_context *ctx)
+{
+	switch (cond) {
+	case BPF_JEQ:
+		emit(rv_beq(rd, rs, rvoff >> 1), ctx);
+		return;
+	case BPF_JGT:
+		emit(rv_bltu(rs, rd, rvoff >> 1), ctx);
+		return;
+	case BPF_JLT:
+		emit(rv_bltu(rd, rs, rvoff >> 1), ctx);
+		return;
+	case BPF_JGE:
+		emit(rv_bgeu(rd, rs, rvoff >> 1), ctx);
+		return;
+	case BPF_JLE:
+		emit(rv_bgeu(rs, rd, rvoff >> 1), ctx);
+		return;
+	case BPF_JNE:
+		emit(rv_bne(rd, rs, rvoff >> 1), ctx);
+		return;
+	case BPF_JSGT:
+		emit(rv_blt(rs, rd, rvoff >> 1), ctx);
+		return;
+	case BPF_JSLT:
+		emit(rv_blt(rd, rs, rvoff >> 1), ctx);
+		return;
+	case BPF_JSGE:
+		emit(rv_bge(rd, rs, rvoff >> 1), ctx);
+		return;
+	case BPF_JSLE:
+		emit(rv_bge(rs, rd, rvoff >> 1), ctx);
+	}
+}
+
+static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
+			struct rv_jit_context *ctx)
+{
+	s64 upper, lower;
+
+	if (is_13b_int(rvoff)) {
+		emit_bcc(cond, rd, rs, rvoff, ctx);
+		return;
+	}
+
+	/* Adjust for jal */
+	rvoff -= 4;
+
+	/* Transform, e.g.:
+	 *   bne rd,rs,foo
+	 * to
+	 *   beq rd,rs,<.L1>
+	 *   (auipc foo)
+	 *   jal(r) foo
+	 * .L1
+	 */
+	cond = invert_bpf_cond(cond);
+	if (is_21b_int(rvoff)) {
+		emit_bcc(cond, rd, rs, 8, ctx);
+		emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
+		return;
+	}
+
+	/* 32b No need for an additional rvoff adjustment, since we
+	 * get that from the auipc at PC', where PC = PC' + 4.
+	 */
+	upper = (rvoff + (1 << 11)) >> 12;
+	lower = rvoff & 0xfff;
+
+	emit_bcc(cond, rd, rs, 12, ctx);
+	emit(rv_auipc(RV_REG_T1, upper), ctx);
+	emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx);
 }
 
 static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
@@ -627,18 +726,14 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 		return -1;
 	emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
 	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
-	if (is_13b_check(off, insn))
-		return -1;
-	emit(rv_bgeu(RV_REG_A2, RV_REG_T1, off >> 1), ctx);
+	emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
 
-	/* if (--TCC < 0)
+	/* if (TCC-- < 0)
 	 *     goto out;
 	 */
 	emit(rv_addi(RV_REG_T1, tcc, -1), ctx);
 	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
-	if (is_13b_check(off, insn))
-		return -1;
-	emit(rv_blt(RV_REG_T1, RV_REG_ZERO, off >> 1), ctx);
+	emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);
 
 	/* prog = array->ptrs[index];
 	 * if (!prog)
@@ -651,18 +746,15 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 		return -1;
 	emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx);
 	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
-	if (is_13b_check(off, insn))
-		return -1;
-	emit(rv_beq(RV_REG_T2, RV_REG_ZERO, off >> 1), ctx);
+	emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
 
 	/* goto *(prog->bpf_func + 4); */
 	off = offsetof(struct bpf_prog, bpf_func);
 	if (is_12b_check(off, insn))
 		return -1;
 	emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx);
-	emit(rv_addi(RV_REG_T3, RV_REG_T3, 4), ctx);
 	emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx);
-	__build_epilogue(RV_REG_T3, ctx);
+	__build_epilogue(true, ctx);
 	return 0;
 }
 
@@ -687,13 +779,6 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
 		*rs = bpf_to_rv_reg(insn->src_reg, ctx);
 }
 
-static int rv_offset_check(int *rvoff, s16 off, int insn,
-			   struct rv_jit_context *ctx)
-{
-	*rvoff = rv_offset(insn + off, insn, ctx);
-	return is_13b_check(*rvoff, insn);
-}
-
 static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
 {
 	emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
@@ -726,13 +811,57 @@ static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
 	*rd = RV_REG_T2;
 }
 
+static void emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr,
+			       struct rv_jit_context *ctx)
+{
+	s64 upper, lower;
+
+	if (rvoff && is_21b_int(rvoff) && !force_jalr) {
+		emit(rv_jal(rd, rvoff >> 1), ctx);
+		return;
+	}
+
+	upper = (rvoff + (1 << 11)) >> 12;
+	lower = rvoff & 0xfff;
+	emit(rv_auipc(RV_REG_T1, upper), ctx);
+	emit(rv_jalr(rd, RV_REG_T1, lower), ctx);
+}
+
+static bool is_signed_bpf_cond(u8 cond)
+{
+	return cond == BPF_JSGT || cond == BPF_JSLT ||
+		cond == BPF_JSGE || cond == BPF_JSLE;
+}
+
+static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
+{
+	s64 off = 0;
+	u64 ip;
+	u8 rd;
+
+	if (addr && ctx->insns) {
+		ip = (u64)(long)(ctx->insns + ctx->ninsns);
+		off = addr - ip;
+		if (!is_32b_int(off)) {
+			pr_err("bpf-jit: target call addr %pK is out of range\n",
+			       (void *)addr);
+			return -ERANGE;
+		}
+	}
+
+	emit_jump_and_link(RV_REG_RA, off, !fixed, ctx);
+	rd = bpf_to_rv_reg(BPF_REG_0, ctx);
+	emit(rv_addi(rd, RV_REG_A0, 0), ctx);
+	return 0;
+}
+
 static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 		     bool extra_pass)
 {
 	bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
 		    BPF_CLASS(insn->code) == BPF_JMP;
+	int s, e, rvoff, i = insn - ctx->prog->insnsi;
 	struct bpf_prog_aux *aux = ctx->prog->aux;
-	int rvoff, i = insn - ctx->prog->insnsi;
 	u8 rd = -1, rs = -1, code = insn->code;
 	s16 off = insn->off;
 	s32 imm = insn->imm;
@@ -1000,214 +1129,110 @@ out_be:
 
 	/* JUMP off */
 	case BPF_JMP | BPF_JA:
-		rvoff = rv_offset(i + off, i, ctx);
-		if (!is_21b_int(rvoff)) {
-			pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
-			       i, rvoff);
-			return -1;
-		}
-
-		emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
+		rvoff = rv_offset(i, off, ctx);
+		emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
 		break;
 
 	/* IF (dst COND src) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_X:
 	case BPF_JMP32 | BPF_JEQ | BPF_X:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		if (!is64)
-			emit_zext_32_rd_rs(&rd, &rs, ctx);
-		emit(rv_beq(rd, rs, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JGT | BPF_X:
 	case BPF_JMP32 | BPF_JGT | BPF_X:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		if (!is64)
-			emit_zext_32_rd_rs(&rd, &rs, ctx);
-		emit(rv_bltu(rs, rd, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JLT | BPF_X:
 	case BPF_JMP32 | BPF_JLT | BPF_X:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		if (!is64)
-			emit_zext_32_rd_rs(&rd, &rs, ctx);
-		emit(rv_bltu(rd, rs, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JGE | BPF_X:
 	case BPF_JMP32 | BPF_JGE | BPF_X:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		if (!is64)
-			emit_zext_32_rd_rs(&rd, &rs, ctx);
-		emit(rv_bgeu(rd, rs, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JLE | BPF_X:
 	case BPF_JMP32 | BPF_JLE | BPF_X:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		if (!is64)
-			emit_zext_32_rd_rs(&rd, &rs, ctx);
-		emit(rv_bgeu(rs, rd, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JNE | BPF_X:
 	case BPF_JMP32 | BPF_JNE | BPF_X:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		if (!is64)
-			emit_zext_32_rd_rs(&rd, &rs, ctx);
-		emit(rv_bne(rd, rs, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JSGT | BPF_X:
 	case BPF_JMP32 | BPF_JSGT | BPF_X:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		if (!is64)
-			emit_sext_32_rd_rs(&rd, &rs, ctx);
-		emit(rv_blt(rs, rd, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JSLT | BPF_X:
 	case BPF_JMP32 | BPF_JSLT | BPF_X:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		if (!is64)
-			emit_sext_32_rd_rs(&rd, &rs, ctx);
-		emit(rv_blt(rd, rs, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JSGE | BPF_X:
 	case BPF_JMP32 | BPF_JSGE | BPF_X:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		if (!is64)
-			emit_sext_32_rd_rs(&rd, &rs, ctx);
-		emit(rv_bge(rd, rs, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JSLE | BPF_X:
 	case BPF_JMP32 | BPF_JSLE | BPF_X:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		if (!is64)
-			emit_sext_32_rd_rs(&rd, &rs, ctx);
-		emit(rv_bge(rs, rd, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JSET | BPF_X:
 	case BPF_JMP32 | BPF_JSET | BPF_X:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		if (!is64)
-			emit_zext_32_rd_rs(&rd, &rs, ctx);
-		emit(rv_and(RV_REG_T1, rd, rs), ctx);
-		emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx);
+		rvoff = rv_offset(i, off, ctx);
+		if (!is64) {
+			s = ctx->ninsns;
+			if (is_signed_bpf_cond(BPF_OP(code)))
+				emit_sext_32_rd_rs(&rd, &rs, ctx);
+			else
+				emit_zext_32_rd_rs(&rd, &rs, ctx);
+			e = ctx->ninsns;
+
+			/* Adjust for extra insns */
+			rvoff -= (e - s) << 2;
+		}
+
+		if (BPF_OP(code) == BPF_JSET) {
+			/* Adjust for and */
+			rvoff -= 4;
+			emit(rv_and(RV_REG_T1, rd, rs), ctx);
+			emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
+				    ctx);
+		} else {
+			emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
+		}
 		break;
 
 	/* IF (dst COND imm) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_K:
 	case BPF_JMP32 | BPF_JEQ | BPF_K:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		emit_imm(RV_REG_T1, imm, ctx);
-		if (!is64)
-			emit_zext_32_rd_t1(&rd, ctx);
-		emit(rv_beq(rd, RV_REG_T1, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JGT | BPF_K:
 	case BPF_JMP32 | BPF_JGT | BPF_K:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		emit_imm(RV_REG_T1, imm, ctx);
-		if (!is64)
-			emit_zext_32_rd_t1(&rd, ctx);
-		emit(rv_bltu(RV_REG_T1, rd, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JLT | BPF_K:
 	case BPF_JMP32 | BPF_JLT | BPF_K:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		emit_imm(RV_REG_T1, imm, ctx);
-		if (!is64)
-			emit_zext_32_rd_t1(&rd, ctx);
-		emit(rv_bltu(rd, RV_REG_T1, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JGE | BPF_K:
 	case BPF_JMP32 | BPF_JGE | BPF_K:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		emit_imm(RV_REG_T1, imm, ctx);
-		if (!is64)
-			emit_zext_32_rd_t1(&rd, ctx);
-		emit(rv_bgeu(rd, RV_REG_T1, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JLE | BPF_K:
 	case BPF_JMP32 | BPF_JLE | BPF_K:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		emit_imm(RV_REG_T1, imm, ctx);
-		if (!is64)
-			emit_zext_32_rd_t1(&rd, ctx);
-		emit(rv_bgeu(RV_REG_T1, rd, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JNE | BPF_K:
 	case BPF_JMP32 | BPF_JNE | BPF_K:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		emit_imm(RV_REG_T1, imm, ctx);
-		if (!is64)
-			emit_zext_32_rd_t1(&rd, ctx);
-		emit(rv_bne(rd, RV_REG_T1, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JSGT | BPF_K:
 	case BPF_JMP32 | BPF_JSGT | BPF_K:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		emit_imm(RV_REG_T1, imm, ctx);
-		if (!is64)
-			emit_sext_32_rd(&rd, ctx);
-		emit(rv_blt(RV_REG_T1, rd, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JSLT | BPF_K:
 	case BPF_JMP32 | BPF_JSLT | BPF_K:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		emit_imm(RV_REG_T1, imm, ctx);
-		if (!is64)
-			emit_sext_32_rd(&rd, ctx);
-		emit(rv_blt(rd, RV_REG_T1, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JSGE | BPF_K:
 	case BPF_JMP32 | BPF_JSGE | BPF_K:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		emit_imm(RV_REG_T1, imm, ctx);
-		if (!is64)
-			emit_sext_32_rd(&rd, ctx);
-		emit(rv_bge(rd, RV_REG_T1, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JSLE | BPF_K:
 	case BPF_JMP32 | BPF_JSLE | BPF_K:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
-		emit_imm(RV_REG_T1, imm, ctx);
-		if (!is64)
-			emit_sext_32_rd(&rd, ctx);
-		emit(rv_bge(RV_REG_T1, rd, rvoff >> 1), ctx);
-		break;
 	case BPF_JMP | BPF_JSET | BPF_K:
 	case BPF_JMP32 | BPF_JSET | BPF_K:
-		if (rv_offset_check(&rvoff, off, i, ctx))
-			return -1;
+		rvoff = rv_offset(i, off, ctx);
+		s = ctx->ninsns;
 		emit_imm(RV_REG_T1, imm, ctx);
-		if (!is64)
-			emit_zext_32_rd_t1(&rd, ctx);
-		emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
-		emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx);
+		if (!is64) {
+			if (is_signed_bpf_cond(BPF_OP(code)))
+				emit_sext_32_rd(&rd, ctx);
+			else
+				emit_zext_32_rd_t1(&rd, ctx);
+		}
+		e = ctx->ninsns;
+
+		/* Adjust for extra insns */
+		rvoff -= (e - s) << 2;
+
+		if (BPF_OP(code) == BPF_JSET) {
+			/* Adjust for and */
+			rvoff -= 4;
+			emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
+			emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
+				    ctx);
+		} else {
+			emit_branch(BPF_OP(code), rd, RV_REG_T1, rvoff, ctx);
+		}
 		break;
 
 	/* function call */
 	case BPF_JMP | BPF_CALL:
 	{
 		bool fixed;
-		int i, ret;
+		int ret;
 		u64 addr;
 
 		mark_call(ctx);
@@ -1215,20 +1240,9 @@ out_be:
 					    &fixed);
 		if (ret < 0)
 			return ret;
-		if (fixed) {
-			emit_imm(RV_REG_T1, addr, ctx);
-		} else {
-			i = ctx->ninsns;
-			emit_imm(RV_REG_T1, addr, ctx);
-			for (i = ctx->ninsns - i; i < 8; i++) {
-				/* nop */
-				emit(rv_addi(RV_REG_ZERO, RV_REG_ZERO, 0),
-				     ctx);
-			}
-		}
-		emit(rv_jalr(RV_REG_RA, RV_REG_T1, 0), ctx);
-		rd = bpf_to_rv_reg(BPF_REG_0, ctx);
-		emit(rv_addi(rd, RV_REG_A0, 0), ctx);
+		ret = emit_call(fixed, addr, ctx);
+		if (ret)
+			return ret;
 		break;
 	}
 	/* tail call */
@@ -1243,9 +1257,7 @@ out_be:
 			break;
 
 		rvoff = epilogue_offset(ctx);
-		if (is_21b_check(rvoff, i))
-			return -1;
-		emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
+		emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
 		break;
 
 	/* dst = imm64 */
@@ -1426,6 +1438,10 @@ static void build_prologue(struct rv_jit_context *ctx)
 {
 	int stack_adjust = 0, store_offset, bpf_stack_adjust;
 
+	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
+	if (bpf_stack_adjust)
+		mark_fp(ctx);
+
 	if (seen_reg(RV_REG_RA, ctx))
 		stack_adjust += 8;
 	stack_adjust += 8; /* RV_REG_FP */
@@ -1443,7 +1459,6 @@ static void build_prologue(struct rv_jit_context *ctx)
 		stack_adjust += 8;
 
 	stack_adjust = round_up(stack_adjust, 16);
-	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
 	stack_adjust += bpf_stack_adjust;
 
 	store_offset = stack_adjust - 8;
@@ -1502,10 +1517,10 @@ static void build_prologue(struct rv_jit_context *ctx)
 
 static void build_epilogue(struct rv_jit_context *ctx)
 {
-	__build_epilogue(RV_REG_RA, ctx);
+	__build_epilogue(false, ctx);
 }
 
-static int build_body(struct rv_jit_context *ctx, bool extra_pass)
+static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset)
 {
 	const struct bpf_prog *prog = ctx->prog;
 	int i;
@@ -1517,12 +1532,12 @@ static int build_body(struct rv_jit_context *ctx, bool extra_pass)
 		ret = emit_insn(insn, ctx, extra_pass);
 		if (ret > 0) {
 			i++;
-			if (ctx->insns == NULL)
-				ctx->offset[i] = ctx->ninsns;
+			if (offset)
+				offset[i] = ctx->ninsns;
 			continue;
 		}
-		if (ctx->insns == NULL)
-			ctx->offset[i] = ctx->ninsns;
+		if (offset)
+			offset[i] = ctx->ninsns;
 		if (ret)
 			return ret;
 	}
@@ -1548,9 +1563,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	bool tmp_blinded = false, extra_pass = false;
 	struct bpf_prog *tmp, *orig_prog = prog;
+	int pass = 0, prev_ninsns = 0, i;
 	struct rv_jit_data *jit_data;
+	unsigned int image_size = 0;
 	struct rv_jit_context *ctx;
-	unsigned int image_size;
 
 	if (!prog->jit_requested)
 		return orig_prog;
@@ -1587,33 +1603,59 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		prog = orig_prog;
 		goto out_offset;
 	}
+	for (i = 0; i < prog->len; i++) {
+		prev_ninsns += 32;
+		ctx->offset[i] = prev_ninsns;
+	}
 
-	/* First pass generates the ctx->offset, but does not emit an image. */
-	if (build_body(ctx, extra_pass)) {
-		prog = orig_prog;
-		goto out_offset;
+	for (i = 0; i < 16; i++) {
+		pass++;
+		ctx->ninsns = 0;
+		if (build_body(ctx, extra_pass, ctx->offset)) {
+			prog = orig_prog;
+			goto out_offset;
+		}
+		build_prologue(ctx);
+		ctx->epilogue_offset = ctx->ninsns;
+		build_epilogue(ctx);
+
+		if (ctx->ninsns == prev_ninsns) {
+			if (jit_data->header)
+				break;
+
+			image_size = sizeof(u32) * ctx->ninsns;
+			jit_data->header =
+				bpf_jit_binary_alloc(image_size,
+						     &jit_data->image,
+						     sizeof(u32),
+						     bpf_fill_ill_insns);
+			if (!jit_data->header) {
+				prog = orig_prog;
+				goto out_offset;
+			}
+
+			ctx->insns = (u32 *)jit_data->image;
+			/* Now, when the image is allocated, the image
+			 * can potentially shrink more (auipc/jalr ->
+			 * jal).
+			 */
+		}
+		prev_ninsns = ctx->ninsns;
 	}
-	build_prologue(ctx);
-	ctx->epilogue_offset = ctx->ninsns;
-	build_epilogue(ctx);
 
-	/* Allocate image, now that we know the size. */
-	image_size = sizeof(u32) * ctx->ninsns;
-	jit_data->header = bpf_jit_binary_alloc(image_size, &jit_data->image,
-						sizeof(u32),
-						bpf_fill_ill_insns);
-	if (!jit_data->header) {
+	if (i == 16) {
+		pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
+		bpf_jit_binary_free(jit_data->header);
 		prog = orig_prog;
 		goto out_offset;
 	}
 
-	/* Second, real pass, that acutally emits the image. */
-	ctx->insns = (u32 *)jit_data->image;
 skip_init_ctx:
+	pass++;
 	ctx->ninsns = 0;
 
 	build_prologue(ctx);
-	if (build_body(ctx, extra_pass)) {
+	if (build_body(ctx, extra_pass, NULL)) {
 		bpf_jit_binary_free(jit_data->header);
 		prog = orig_prog;
 		goto out_offset;
@@ -1621,7 +1663,7 @@ skip_init_ctx:
 	build_epilogue(ctx);
 
 	if (bpf_jit_enable > 1)
-		bpf_jit_dump(prog->len, image_size, 2, ctx->insns);
+		bpf_jit_dump(prog->len, image_size, pass, ctx->insns);
 
 	prog->bpf_func = (void *)ctx->insns;
 	prog->jited = 1;
@@ -1641,3 +1683,16 @@ out:
 					   tmp : orig_prog);
 	return prog;
 }
+
+void *bpf_jit_alloc_exec(unsigned long size)
+{
+	return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
+				    BPF_JIT_REGION_END, GFP_KERNEL,
+				    PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+				    __builtin_return_address(0));
+}
+
+void bpf_jit_free_exec(void *addr)
+{
+	return vfree(addr);
+}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 43a81d0ad507..287714d51b47 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -30,7 +30,7 @@ config GENERIC_BUG_RELATIVE_POINTERS
 	def_bool y
 
 config GENERIC_LOCKBREAK
-	def_bool y if PREEMPT
+	def_bool y if PREEMPTTION
 
 config PGSTE
 	def_bool y if KVM
@@ -110,7 +110,7 @@ config S390
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select BUILDTIME_EXTABLE_SORT
+	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS2
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
 	select GENERIC_CLOCKEVENTS
@@ -124,6 +124,7 @@ config S390
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN
+	select HAVE_ARCH_KASAN_VMALLOC
 	select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_SOFT_DIRTY
@@ -170,6 +171,7 @@ config S390
 	select HAVE_PERF_EVENTS
 	select HAVE_RCU_TABLE_FREE
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_RELIABLE_STACKTRACE
 	select HAVE_RSEQ
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
@@ -246,8 +248,8 @@ choice
 
 config MARCH_Z900
 	bool "IBM zSeries model z800 and z900"
-	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z900_FEATURES
+	depends on $(cc-option,-march=z900)
 	help
 	  Select this to enable optimizations for model z800/z900 (2064 and
 	  2066 series). This will enable some optimizations that are not
@@ -255,8 +257,8 @@ config MARCH_Z900
 
 config MARCH_Z990
 	bool "IBM zSeries model z890 and z990"
-	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z990_FEATURES
+	depends on $(cc-option,-march=z990)
 	help
 	  Select this to enable optimizations for model z890/z990 (2084 and
 	  2086 series). The kernel will be slightly faster but will not work
@@ -264,8 +266,8 @@ config MARCH_Z990
 
 config MARCH_Z9_109
 	bool "IBM System z9"
-	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z9_109_FEATURES
+	depends on $(cc-option,-march=z9-109)
 	help
 	  Select this to enable optimizations for IBM System z9 (2094 and
 	  2096 series). The kernel will be slightly faster but will not work
@@ -274,6 +276,7 @@ config MARCH_Z9_109
 config MARCH_Z10
 	bool "IBM System z10"
 	select HAVE_MARCH_Z10_FEATURES
+	depends on $(cc-option,-march=z10)
 	help
 	  Select this to enable optimizations for IBM System z10 (2097 and
 	  2098 series). The kernel will be slightly faster but will not work
@@ -282,6 +285,7 @@ config MARCH_Z10
 config MARCH_Z196
 	bool "IBM zEnterprise 114 and 196"
 	select HAVE_MARCH_Z196_FEATURES
+	depends on $(cc-option,-march=z196)
 	help
 	  Select this to enable optimizations for IBM zEnterprise 114 and 196
 	  (2818 and 2817 series). The kernel will be slightly faster but will
@@ -290,6 +294,7 @@ config MARCH_Z196
 config MARCH_ZEC12
 	bool "IBM zBC12 and zEC12"
 	select HAVE_MARCH_ZEC12_FEATURES
+	depends on $(cc-option,-march=zEC12)
 	help
 	  Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
 	  2827 series). The kernel will be slightly faster but will not work on
@@ -298,6 +303,7 @@ config MARCH_ZEC12
 config MARCH_Z13
 	bool "IBM z13s and z13"
 	select HAVE_MARCH_Z13_FEATURES
+	depends on $(cc-option,-march=z13)
 	help
 	  Select this to enable optimizations for IBM z13s and z13 (2965 and
 	  2964 series). The kernel will be slightly faster but will not work on
@@ -306,6 +312,7 @@ config MARCH_Z13
 config MARCH_Z14
 	bool "IBM z14 ZR1 and z14"
 	select HAVE_MARCH_Z14_FEATURES
+	depends on $(cc-option,-march=z14)
 	help
 	  Select this to enable optimizations for IBM z14 ZR1 and z14 (3907
 	  and 3906 series). The kernel will be slightly faster but will not
@@ -314,6 +321,7 @@ config MARCH_Z14
 config MARCH_Z15
 	bool "IBM z15"
 	select HAVE_MARCH_Z15_FEATURES
+	depends on $(cc-option,-march=z15)
 	help
 	  Select this to enable optimizations for IBM z15 (8562
 	  and 8561 series). The kernel will be slightly faster but will not
@@ -367,33 +375,39 @@ config TUNE_DEFAULT
 
 config TUNE_Z900
 	bool "IBM zSeries model z800 and z900"
-	depends on !CC_IS_CLANG
+	depends on $(cc-option,-mtune=z900)
 
 config TUNE_Z990
 	bool "IBM zSeries model z890 and z990"
-	depends on !CC_IS_CLANG
+	depends on $(cc-option,-mtune=z990)
 
 config TUNE_Z9_109
 	bool "IBM System z9"
-	depends on !CC_IS_CLANG
+	depends on $(cc-option,-mtune=z9-109)
 
 config TUNE_Z10
 	bool "IBM System z10"
+	depends on $(cc-option,-mtune=z10)
 
 config TUNE_Z196
 	bool "IBM zEnterprise 114 and 196"
+	depends on $(cc-option,-mtune=z196)
 
 config TUNE_ZEC12
 	bool "IBM zBC12 and zEC12"
+	depends on $(cc-option,-mtune=zEC12)
 
 config TUNE_Z13
-	bool "IBM z13"
+	bool "IBM z13s and z13"
+	depends on $(cc-option,-mtune=z13)
 
 config TUNE_Z14
-	bool "IBM z14"
+	bool "IBM z14 ZR1 and z14"
+	depends on $(cc-option,-mtune=z14)
 
 config TUNE_Z15
 	bool "IBM z15"
+	depends on $(cc-option,-mtune=z15)
 
 endchoice
 
@@ -414,9 +428,6 @@ config COMPAT
 	  (and some other stuff like libraries and such) is needed for
 	  executing 31 bit applications.  It is safe to say "Y".
 
-config COMPAT_VDSO
-	def_bool COMPAT && !CC_IS_CLANG
-
 config SYSVIPC_COMPAT
 	def_bool y if COMPAT && SYSVIPC
 
@@ -1006,3 +1017,17 @@ config S390_GUEST
 	  the KVM hypervisor.
 
 endmenu
+
+menu "Selftests"
+
+config S390_UNWIND_SELFTEST
+	def_tristate n
+	prompt "Test unwind functions"
+	help
+	  This option enables s390 specific stack unwinder testing kernel
+	  module. This option is not useful for distributions or general
+	  kernels, but only for kernel developers working on architecture code.
+
+	  Say N if you are unsure.
+
+endmenu
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 478b645b20dd..e0e3a465bbfd 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -69,7 +69,7 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
 #
 cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
 
-ifeq ($(call cc-option-yn,-mpacked-stack),y)
+ifeq ($(call cc-option-yn,-mpacked-stack -mbackchain -msoft-float),y)
 cflags-$(CONFIG_PACK_STACK)  += -mpacked-stack -D__PACK_STACK
 aflags-$(CONFIG_PACK_STACK)  += -D__PACK_STACK
 endif
@@ -157,7 +157,6 @@ zfcpdump:
 
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
-	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index 4b86a8d3c121..dae10961d072 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -329,7 +329,7 @@ ENTRY(startup_kdump)
 	.quad	.Lduct			# cr5: primary-aste origin
 	.quad	0			# cr6:	I/O interrupts
 	.quad	0			# cr7:	secondary space segment table
-	.quad	0			# cr8:	access registers translation
+	.quad	0x0000000000008000	# cr8:	access registers translation
 	.quad	0			# cr9:	tracing off
 	.quad	0			# cr10: tracing off
 	.quad	0			# cr11: tracing off
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 5367950510f6..3b3a11f95269 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -46,7 +46,7 @@ struct diag_ops __bootdata_preserved(diag_dma_ops) = {
 	.diag0c = _diag0c_dma,
 	.diag308_reset = _diag308_reset_dma
 };
-static struct diag210 _diag210_tmp_dma __section(".dma.data");
+static struct diag210 _diag210_tmp_dma __section(.dma.data);
 struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma;
 void _swsusp_reset_dma(void);
 unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma);
@@ -170,6 +170,11 @@ void startup_kernel(void)
 		handle_relocs(__kaslr_offset);
 
 	if (__kaslr_offset) {
+		/*
+		 * Save KASLR offset for early dumps, before vmcore_info is set.
+		 * Mark as uneven to distinguish from real vmcore_info pointer.
+		 */
+		S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL;
 		/* Clear non-relocated kernel */
 		if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
 			memset(img, 0, vmlinux.image_size);
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 38d64030aacf..2e60c80395ab 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -62,7 +62,6 @@ CONFIG_OPROFILE=m
 CONFIG_KPROBES=y
 CONFIG_JUMP_LABEL=y
 CONFIG_STATIC_KEYS_SELFTEST=y
-CONFIG_REFCOUNT_FULL=y
 CONFIG_LOCK_EVENT_COUNTS=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 9803e96d2924..1c23d84a9097 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -44,7 +44,7 @@ struct s390_aes_ctx {
 	int key_len;
 	unsigned long fc;
 	union {
-		struct crypto_sync_skcipher *blk;
+		struct crypto_skcipher *skcipher;
 		struct crypto_cipher *cip;
 	} fallback;
 };
@@ -54,7 +54,7 @@ struct s390_xts_ctx {
 	u8 pcc_key[32];
 	int key_len;
 	unsigned long fc;
-	struct crypto_sync_skcipher *fallback;
+	struct crypto_skcipher *fallback;
 };
 
 struct gcm_sg_walk {
@@ -72,19 +72,12 @@ static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	int ret;
 
 	sctx->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
 	sctx->fallback.cip->base.crt_flags |= (tfm->crt_flags &
 			CRYPTO_TFM_REQ_MASK);
 
-	ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
-	if (ret) {
-		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-		tfm->crt_flags |= (sctx->fallback.cip->base.crt_flags &
-				CRYPTO_TFM_RES_MASK);
-	}
-	return ret;
+	return crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
 }
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
@@ -178,66 +171,36 @@ static struct crypto_alg aes_alg = {
 	}
 };
 
-static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
-		unsigned int len)
+static int setkey_fallback_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				    unsigned int len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	unsigned int ret;
-
-	crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
-					 CRYPTO_TFM_REQ_MASK);
-	crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
-						      CRYPTO_TFM_REQ_MASK);
-
-	ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
-
-	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
-			  CRYPTO_TFM_RES_MASK;
-
-	return ret;
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+
+	crypto_skcipher_clear_flags(sctx->fallback.skcipher,
+				    CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(sctx->fallback.skcipher,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	return crypto_skcipher_setkey(sctx->fallback.skcipher, key, len);
 }
 
-static int fallback_blk_dec(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
+static int fallback_skcipher_crypt(struct s390_aes_ctx *sctx,
+				   struct skcipher_request *req,
+				   unsigned long modifier)
 {
-	unsigned int ret;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
-
-	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+	struct skcipher_request *subreq = skcipher_request_ctx(req);
 
-	ret = crypto_skcipher_decrypt(req);
-
-	skcipher_request_zero(req);
-	return ret;
+	*subreq = *req;
+	skcipher_request_set_tfm(subreq, sctx->fallback.skcipher);
+	return (modifier & CPACF_DECRYPT) ?
+		crypto_skcipher_decrypt(subreq) :
+		crypto_skcipher_encrypt(subreq);
 }
 
-static int fallback_blk_enc(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
-{
-	unsigned int ret;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
-
-	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
-	ret = crypto_skcipher_encrypt(req);
-	return ret;
-}
-
-static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 
 	/* Pick the correct function code based on the key length */
@@ -248,111 +211,92 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	/* Check if the function code is available */
 	sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
 	if (!sctx->fc)
-		return setkey_fallback_blk(tfm, in_key, key_len);
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
 
 	sctx->key_len = key_len;
 	memcpy(sctx->key, in_key, key_len);
 	return 0;
 }
 
-static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int ecb_aes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, modifier);
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		cpacf_km(sctx->fc | modifier, sctx->key,
-			 walk->dst.virt.addr, walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			 walk.dst.virt.addr, walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
-
 	return ret;
 }
 
-static int ecb_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_aes_encrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_enc(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, 0, &walk);
+	return ecb_aes_crypt(req, 0);
 }
 
-static int ecb_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_aes_decrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_dec(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk);
+	return ecb_aes_crypt(req, CPACF_DECRYPT);
 }
 
-static int fallback_init_blk(struct crypto_tfm *tfm)
+static int fallback_init_skcipher(struct crypto_skcipher *tfm)
 {
-	const char *name = tfm->__crt_alg->cra_name;
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	const char *name = crypto_tfm_alg_name(&tfm->base);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 
-	sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
-						   CRYPTO_ALG_NEED_FALLBACK);
+	sctx->fallback.skcipher = crypto_alloc_skcipher(name, 0,
+				CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
 
-	if (IS_ERR(sctx->fallback.blk)) {
+	if (IS_ERR(sctx->fallback.skcipher)) {
 		pr_err("Allocating AES fallback algorithm %s failed\n",
 		       name);
-		return PTR_ERR(sctx->fallback.blk);
+		return PTR_ERR(sctx->fallback.skcipher);
 	}
 
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+				    crypto_skcipher_reqsize(sctx->fallback.skcipher));
 	return 0;
 }
 
-static void fallback_exit_blk(struct crypto_tfm *tfm)
+static void fallback_exit_skcipher(struct crypto_skcipher *tfm)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 
-	crypto_free_sync_skcipher(sctx->fallback.blk);
+	crypto_free_skcipher(sctx->fallback.skcipher);
 }
 
-static struct crypto_alg ecb_aes_alg = {
-	.cra_name		=	"ecb(aes)",
-	.cra_driver_name	=	"ecb-aes-s390",
-	.cra_priority		=	401,	/* combo: aes + ecb + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	fallback_init_blk,
-	.cra_exit		=	fallback_exit_blk,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.setkey			=	ecb_aes_set_key,
-			.encrypt		=	ecb_aes_encrypt,
-			.decrypt		=	ecb_aes_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_aes_alg = {
+	.base.cra_name		=	"ecb(aes)",
+	.base.cra_driver_name	=	"ecb-aes-s390",
+	.base.cra_priority	=	401,	/* combo: aes + ecb + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.setkey			=	ecb_aes_set_key,
+	.encrypt		=	ecb_aes_encrypt,
+	.decrypt		=	ecb_aes_decrypt,
 };
 
-static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 
 	/* Pick the correct function code based on the key length */
@@ -363,17 +307,18 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	/* Check if the function code is available */
 	sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
 	if (!sctx->fc)
-		return setkey_fallback_blk(tfm, in_key, key_len);
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
 
 	sctx->key_len = key_len;
 	memcpy(sctx->key, in_key, key_len);
 	return 0;
 }
 
-static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int cbc_aes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 	struct {
@@ -381,134 +326,69 @@ static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 		u8 key[AES_MAX_KEY_SIZE];
 	} param;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, modifier);
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+	memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
 	memcpy(param.key, sctx->key, sctx->key_len);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		cpacf_kmc(sctx->fc | modifier, &param,
-			  walk->dst.virt.addr, walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			  walk.dst.virt.addr, walk.src.virt.addr, n);
+		memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
-	memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
 	return ret;
 }
 
-static int cbc_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_aes_encrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_enc(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, 0, &walk);
+	return cbc_aes_crypt(req, 0);
 }
 
-static int cbc_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_aes_decrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_dec(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk);
+	return cbc_aes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_aes_alg = {
-	.cra_name		=	"cbc(aes)",
-	.cra_driver_name	=	"cbc-aes-s390",
-	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	fallback_init_blk,
-	.cra_exit		=	fallback_exit_blk,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	cbc_aes_set_key,
-			.encrypt		=	cbc_aes_encrypt,
-			.decrypt		=	cbc_aes_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_aes_alg = {
+	.base.cra_name		=	"cbc(aes)",
+	.base.cra_driver_name	=	"cbc-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	cbc_aes_set_key,
+	.encrypt		=	cbc_aes_encrypt,
+	.decrypt		=	cbc_aes_decrypt,
 };
 
-static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
-				   unsigned int len)
+static int xts_fallback_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int len)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
-	unsigned int ret;
-
-	crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
-					 CRYPTO_TFM_REQ_MASK);
-	crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
-						     CRYPTO_TFM_REQ_MASK);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
 
-	ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
-
-	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
-			  CRYPTO_TFM_RES_MASK;
-
-	return ret;
+	crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(xts_ctx->fallback,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	return crypto_skcipher_setkey(xts_ctx->fallback, key, len);
 }
 
-static int xts_fallback_decrypt(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
-{
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
-	unsigned int ret;
-
-	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
-	ret = crypto_skcipher_decrypt(req);
-
-	skcipher_request_zero(req);
-	return ret;
-}
-
-static int xts_fallback_encrypt(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
-{
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
-	unsigned int ret;
-
-	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
-	ret = crypto_skcipher_encrypt(req);
-
-	skcipher_request_zero(req);
-	return ret;
-}
-
-static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 	int err;
 
@@ -517,10 +397,8 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		return err;
 
 	/* In fips mode only 128 bit or 256 bit keys are valid */
-	if (fips_enabled && key_len != 32 && key_len != 64) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	if (fips_enabled && key_len != 32 && key_len != 64)
 		return -EINVAL;
-	}
 
 	/* Pick the correct function code based on the key length */
 	fc = (key_len == 32) ? CPACF_KM_XTS_128 :
@@ -539,10 +417,11 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
-static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int xts_aes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int offset, nbytes, n;
 	int ret;
 	struct {
@@ -557,113 +436,100 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 		u8 init[16];
 	} xts_param;
 
-	ret = blkcipher_walk_virt(desc, walk);
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (unlikely(!xts_ctx->fc || (req->cryptlen % AES_BLOCK_SIZE) != 0)) {
+		struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+		*subreq = *req;
+		skcipher_request_set_tfm(subreq, xts_ctx->fallback);
+		return (modifier & CPACF_DECRYPT) ?
+			crypto_skcipher_decrypt(subreq) :
+			crypto_skcipher_encrypt(subreq);
+	}
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
 	offset = xts_ctx->key_len & 0x10;
 	memset(pcc_param.block, 0, sizeof(pcc_param.block));
 	memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
 	memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
-	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+	memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
 	memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
 	cpacf_pcc(xts_ctx->fc, pcc_param.key + offset);
 
 	memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
 	memcpy(xts_param.init, pcc_param.xts, 16);
 
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset,
-			 walk->dst.virt.addr, walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			 walk.dst.virt.addr, walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
 	return ret;
 }
 
-static int xts_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int xts_aes_encrypt(struct skcipher_request *req)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (!nbytes)
-		return -EINVAL;
-
-	if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
-		return xts_fallback_encrypt(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, 0, &walk);
+	return xts_aes_crypt(req, 0);
 }
 
-static int xts_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int xts_aes_decrypt(struct skcipher_request *req)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (!nbytes)
-		return -EINVAL;
-
-	if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
-		return xts_fallback_decrypt(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, CPACF_DECRYPT, &walk);
+	return xts_aes_crypt(req, CPACF_DECRYPT);
 }
 
-static int xts_fallback_init(struct crypto_tfm *tfm)
+static int xts_fallback_init(struct crypto_skcipher *tfm)
 {
-	const char *name = tfm->__crt_alg->cra_name;
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	const char *name = crypto_tfm_alg_name(&tfm->base);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
 
-	xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
-						  CRYPTO_ALG_NEED_FALLBACK);
+	xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
+				CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
 
 	if (IS_ERR(xts_ctx->fallback)) {
 		pr_err("Allocating XTS fallback algorithm %s failed\n",
 		       name);
 		return PTR_ERR(xts_ctx->fallback);
 	}
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+				    crypto_skcipher_reqsize(xts_ctx->fallback));
 	return 0;
 }
 
-static void xts_fallback_exit(struct crypto_tfm *tfm)
+static void xts_fallback_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
 
-	crypto_free_sync_skcipher(xts_ctx->fallback);
+	crypto_free_skcipher(xts_ctx->fallback);
 }
 
-static struct crypto_alg xts_aes_alg = {
-	.cra_name		=	"xts(aes)",
-	.cra_driver_name	=	"xts-aes-s390",
-	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_xts_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	xts_fallback_init,
-	.cra_exit		=	xts_fallback_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	2 * AES_MIN_KEY_SIZE,
-			.max_keysize		=	2 * AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	xts_aes_set_key,
-			.encrypt		=	xts_aes_encrypt,
-			.decrypt		=	xts_aes_decrypt,
-		}
-	}
+static struct skcipher_alg xts_aes_alg = {
+	.base.cra_name		=	"xts(aes)",
+	.base.cra_driver_name	=	"xts-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_xts_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	xts_fallback_init,
+	.exit			=	xts_fallback_exit,
+	.min_keysize		=	2 * AES_MIN_KEY_SIZE,
+	.max_keysize		=	2 * AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	xts_aes_set_key,
+	.encrypt		=	xts_aes_encrypt,
+	.decrypt		=	xts_aes_decrypt,
 };
 
-static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 
 	/* Pick the correct function code based on the key length */
@@ -674,7 +540,7 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	/* Check if the function code is available */
 	sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
 	if (!sctx->fc)
-		return setkey_fallback_blk(tfm, in_key, key_len);
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
 
 	sctx->key_len = key_len;
 	memcpy(sctx->key, in_key, key_len);
@@ -696,30 +562,34 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 	return n;
 }
 
-static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int ctr_aes_crypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	u8 buf[AES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
 	unsigned int n, nbytes;
 	int ret, locked;
 
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, 0);
+
 	locked = mutex_trylock(&ctrblk_lock);
 
-	ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
 		n = AES_BLOCK_SIZE;
+
 		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
-			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
-		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
-		cpacf_kmctr(sctx->fc | modifier, sctx->key,
-			    walk->dst.virt.addr, walk->src.virt.addr,
-			    n, ctrptr);
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		cpacf_kmctr(sctx->fc, sctx->key, walk.dst.virt.addr,
+			    walk.src.virt.addr, n, ctrptr);
 		if (ctrptr == ctrblk)
-			memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE,
+			memcpy(walk.iv, ctrptr + n - AES_BLOCK_SIZE,
 			       AES_BLOCK_SIZE);
-		crypto_inc(walk->iv, AES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
 	if (locked)
 		mutex_unlock(&ctrblk_lock);
@@ -727,67 +597,33 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
 	 */
 	if (nbytes) {
-		cpacf_kmctr(sctx->fc | modifier, sctx->key,
-			    buf, walk->src.virt.addr,
-			    AES_BLOCK_SIZE, walk->iv);
-		memcpy(walk->dst.virt.addr, buf, nbytes);
-		crypto_inc(walk->iv, AES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, 0);
+		cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr,
+			    AES_BLOCK_SIZE, walk.iv);
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, 0);
 	}
 
 	return ret;
 }
 
-static int ctr_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_enc(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, 0, &walk);
-}
-
-static int ctr_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_dec(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_aes_alg = {
-	.cra_name		=	"ctr(aes)",
-	.cra_driver_name	=	"ctr-aes-s390",
-	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	fallback_init_blk,
-	.cra_exit		=	fallback_exit_blk,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ctr_aes_set_key,
-			.encrypt		=	ctr_aes_encrypt,
-			.decrypt		=	ctr_aes_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_aes_alg = {
+	.base.cra_name		=	"ctr(aes)",
+	.base.cra_driver_name	=	"ctr-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	ctr_aes_set_key,
+	.encrypt		=	ctr_aes_crypt,
+	.decrypt		=	ctr_aes_crypt,
+	.chunksize		=	AES_BLOCK_SIZE,
 };
 
 static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -1116,24 +952,27 @@ static struct aead_alg gcm_aes_aead = {
 	},
 };
 
-static struct crypto_alg *aes_s390_algs_ptr[5];
-static int aes_s390_algs_num;
+static struct crypto_alg *aes_s390_alg;
+static struct skcipher_alg *aes_s390_skcipher_algs[4];
+static int aes_s390_skciphers_num;
 static struct aead_alg *aes_s390_aead_alg;
 
-static int aes_s390_register_alg(struct crypto_alg *alg)
+static int aes_s390_register_skcipher(struct skcipher_alg *alg)
 {
 	int ret;
 
-	ret = crypto_register_alg(alg);
+	ret = crypto_register_skcipher(alg);
 	if (!ret)
-		aes_s390_algs_ptr[aes_s390_algs_num++] = alg;
+		aes_s390_skcipher_algs[aes_s390_skciphers_num++] = alg;
 	return ret;
 }
 
 static void aes_s390_fini(void)
 {
-	while (aes_s390_algs_num--)
-		crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]);
+	if (aes_s390_alg)
+		crypto_unregister_alg(aes_s390_alg);
+	while (aes_s390_skciphers_num--)
+		crypto_unregister_skcipher(aes_s390_skcipher_algs[aes_s390_skciphers_num]);
 	if (ctrblk)
 		free_page((unsigned long) ctrblk);
 
@@ -1154,10 +993,11 @@ static int __init aes_s390_init(void)
 	if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_AES_192) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_AES_256)) {
-		ret = aes_s390_register_alg(&aes_alg);
+		ret = crypto_register_alg(&aes_alg);
 		if (ret)
 			goto out_err;
-		ret = aes_s390_register_alg(&ecb_aes_alg);
+		aes_s390_alg = &aes_alg;
+		ret = aes_s390_register_skcipher(&ecb_aes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -1165,14 +1005,14 @@ static int __init aes_s390_init(void)
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) {
-		ret = aes_s390_register_alg(&cbc_aes_alg);
+		ret = aes_s390_register_skcipher(&cbc_aes_alg);
 		if (ret)
 			goto out_err;
 	}
 
 	if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
-		ret = aes_s390_register_alg(&xts_aes_alg);
+		ret = aes_s390_register_skcipher(&xts_aes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -1185,7 +1025,7 @@ static int __init aes_s390_init(void)
 			ret = -ENOMEM;
 			goto out_err;
 		}
-		ret = aes_s390_register_alg(&ctr_aes_alg);
+		ret = aes_s390_register_skcipher(&ctr_aes_alg);
 		if (ret)
 			goto out_err;
 	}
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 423ee05887e6..fafecad20752 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -111,10 +111,8 @@ static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
 {
 	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
 
-	if (newkeylen != sizeof(mctx->key)) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (newkeylen != sizeof(mctx->key))
 		return -EINVAL;
-	}
 	mctx->key = le32_to_cpu(*(__le32 *)newkey);
 	return 0;
 }
@@ -124,10 +122,8 @@ static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
 {
 	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
 
-	if (newkeylen != sizeof(mctx->key)) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (newkeylen != sizeof(mctx->key))
 		return -EINVAL;
-	}
 	mctx->key = be32_to_cpu(*(__be32 *)newkey);
 	return 0;
 }
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 439b100c6f2e..bfbafd35bcbd 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -17,6 +17,7 @@
 #include <linux/mutex.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
 #include <asm/cpacf.h>
 
 #define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
@@ -45,6 +46,12 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
 	return 0;
 }
 
+static int des_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int key_len)
+{
+	return des_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
 static void s390_des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -79,28 +86,30 @@ static struct crypto_alg des_alg = {
 	}
 };
 
-static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
-			    struct blkcipher_walk *walk)
+static int ecb_desall_crypt(struct skcipher_request *req, unsigned long fc)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		cpacf_km(fc, ctx->key, walk->dst.virt.addr,
-			 walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		cpacf_km(fc, ctx->key, walk.dst.virt.addr,
+			 walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
 	return ret;
 }
 
-static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
-			    struct blkcipher_walk *walk)
+static int cbc_desall_crypt(struct skcipher_request *req, unsigned long fc)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 	struct {
@@ -108,99 +117,69 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
 		u8 key[DES3_KEY_SIZE];
 	} param;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+	memcpy(param.iv, walk.iv, DES_BLOCK_SIZE);
 	memcpy(param.key, ctx->key, DES3_KEY_SIZE);
-	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		cpacf_kmc(fc, &param, walk->dst.virt.addr,
-			  walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		cpacf_kmc(fc, &param, walk.dst.virt.addr,
+			  walk.src.virt.addr, n);
+		memcpy(walk.iv, param.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
-	memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
 	return ret;
 }
 
-static int ecb_des_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_des_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk);
+	return ecb_desall_crypt(req, CPACF_KM_DEA);
 }
 
-static int ecb_des_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_des_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk);
+	return ecb_desall_crypt(req, CPACF_KM_DEA | CPACF_DECRYPT);
 }
 
-static struct crypto_alg ecb_des_alg = {
-	.cra_name		=	"ecb(des)",
-	.cra_driver_name	=	"ecb-des-s390",
-	.cra_priority		=	400,	/* combo: des + ecb */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES_KEY_SIZE,
-			.max_keysize		=	DES_KEY_SIZE,
-			.setkey			=	des_setkey,
-			.encrypt		=	ecb_des_encrypt,
-			.decrypt		=	ecb_des_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_des_alg = {
+	.base.cra_name		=	"ecb(des)",
+	.base.cra_driver_name	=	"ecb-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + ecb */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	ecb_des_encrypt,
+	.decrypt		=	ecb_des_decrypt,
 };
 
-static int cbc_des_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_des_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk);
+	return cbc_desall_crypt(req, CPACF_KMC_DEA);
 }
 
-static int cbc_des_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_des_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk);
+	return cbc_desall_crypt(req, CPACF_KMC_DEA | CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_des_alg = {
-	.cra_name		=	"cbc(des)",
-	.cra_driver_name	=	"cbc-des-s390",
-	.cra_priority		=	400,	/* combo: des + cbc */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES_KEY_SIZE,
-			.max_keysize		=	DES_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des_setkey,
-			.encrypt		=	cbc_des_encrypt,
-			.decrypt		=	cbc_des_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_des_alg = {
+	.base.cra_name		=	"cbc(des)",
+	.base.cra_driver_name	=	"cbc-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + cbc */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	cbc_des_encrypt,
+	.decrypt		=	cbc_des_decrypt,
 };
 
 /*
@@ -232,6 +211,12 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
 	return 0;
 }
 
+static int des3_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				unsigned int key_len)
+{
+	return des3_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
 static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -266,87 +251,53 @@ static struct crypto_alg des3_alg = {
 	}
 };
 
-static int ecb_des3_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_des3_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk);
+	return ecb_desall_crypt(req, CPACF_KM_TDEA_192);
 }
 
-static int ecb_des3_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_des3_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT,
-				&walk);
+	return ecb_desall_crypt(req, CPACF_KM_TDEA_192 | CPACF_DECRYPT);
 }
 
-static struct crypto_alg ecb_des3_alg = {
-	.cra_name		=	"ecb(des3_ede)",
-	.cra_driver_name	=	"ecb-des3_ede-s390",
-	.cra_priority		=	400,	/* combo: des3 + ecb */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_KEY_SIZE,
-			.max_keysize		=	DES3_KEY_SIZE,
-			.setkey			=	des3_setkey,
-			.encrypt		=	ecb_des3_encrypt,
-			.decrypt		=	ecb_des3_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_des3_alg = {
+	.base.cra_name		=	"ecb(des3_ede)",
+	.base.cra_driver_name	=	"ecb-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + ecb */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	ecb_des3_encrypt,
+	.decrypt		=	ecb_des3_decrypt,
 };
 
-static int cbc_des3_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_des3_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk);
+	return cbc_desall_crypt(req, CPACF_KMC_TDEA_192);
 }
 
-static int cbc_des3_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_des3_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT,
-				&walk);
+	return cbc_desall_crypt(req, CPACF_KMC_TDEA_192 | CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_des3_alg = {
-	.cra_name		=	"cbc(des3_ede)",
-	.cra_driver_name	=	"cbc-des3_ede-s390",
-	.cra_priority		=	400,	/* combo: des3 + cbc */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_KEY_SIZE,
-			.max_keysize		=	DES3_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des3_setkey,
-			.encrypt		=	cbc_des3_encrypt,
-			.decrypt		=	cbc_des3_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_des3_alg = {
+	.base.cra_name		=	"cbc(des3_ede)",
+	.base.cra_driver_name	=	"cbc-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + cbc */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	cbc_des3_encrypt,
+	.decrypt		=	cbc_des3_decrypt,
 };
 
 static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
@@ -364,128 +315,90 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 	return n;
 }
 
-static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
-			    struct blkcipher_walk *walk)
+static int ctr_desall_crypt(struct skcipher_request *req, unsigned long fc)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u8 buf[DES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
 	unsigned int n, nbytes;
 	int ret, locked;
 
 	locked = mutex_trylock(&ctrblk_lock);
 
-	ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
-	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) >= DES_BLOCK_SIZE) {
 		n = DES_BLOCK_SIZE;
 		if (nbytes >= 2*DES_BLOCK_SIZE && locked)
-			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
-		ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv;
-		cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr,
-			    walk->src.virt.addr, n, ctrptr);
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		cpacf_kmctr(fc, ctx->key, walk.dst.virt.addr,
+			    walk.src.virt.addr, n, ctrptr);
 		if (ctrptr == ctrblk)
-			memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE,
+			memcpy(walk.iv, ctrptr + n - DES_BLOCK_SIZE,
 				DES_BLOCK_SIZE);
-		crypto_inc(walk->iv, DES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		crypto_inc(walk.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
 	if (locked)
 		mutex_unlock(&ctrblk_lock);
 	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
 	if (nbytes) {
-		cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr,
-			    DES_BLOCK_SIZE, walk->iv);
-		memcpy(walk->dst.virt.addr, buf, nbytes);
-		crypto_inc(walk->iv, DES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, 0);
+		cpacf_kmctr(fc, ctx->key, buf, walk.src.virt.addr,
+			    DES_BLOCK_SIZE, walk.iv);
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, 0);
 	}
 	return ret;
 }
 
-static int ctr_des_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk);
-}
-
-static int ctr_des_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ctr_des_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk);
+	return ctr_desall_crypt(req, CPACF_KMCTR_DEA);
 }
 
-static struct crypto_alg ctr_des_alg = {
-	.cra_name		=	"ctr(des)",
-	.cra_driver_name	=	"ctr-des-s390",
-	.cra_priority		=	400,	/* combo: des + ctr */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES_KEY_SIZE,
-			.max_keysize		=	DES_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des_setkey,
-			.encrypt		=	ctr_des_encrypt,
-			.decrypt		=	ctr_des_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_des_alg = {
+	.base.cra_name		=	"ctr(des)",
+	.base.cra_driver_name	=	"ctr-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + ctr */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	ctr_des_crypt,
+	.decrypt		=	ctr_des_crypt,
+	.chunksize		=	DES_BLOCK_SIZE,
 };
 
-static int ctr_des3_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk);
-}
-
-static int ctr_des3_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ctr_des3_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT,
-				&walk);
+	return ctr_desall_crypt(req, CPACF_KMCTR_TDEA_192);
 }
 
-static struct crypto_alg ctr_des3_alg = {
-	.cra_name		=	"ctr(des3_ede)",
-	.cra_driver_name	=	"ctr-des3_ede-s390",
-	.cra_priority		=	400,	/* combo: des3 + ede */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_KEY_SIZE,
-			.max_keysize		=	DES3_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des3_setkey,
-			.encrypt		=	ctr_des3_encrypt,
-			.decrypt		=	ctr_des3_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_des3_alg = {
+	.base.cra_name		=	"ctr(des3_ede)",
+	.base.cra_driver_name	=	"ctr-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + ede */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	ctr_des3_crypt,
+	.decrypt		=	ctr_des3_crypt,
+	.chunksize		=	DES_BLOCK_SIZE,
 };
 
-static struct crypto_alg *des_s390_algs_ptr[8];
+static struct crypto_alg *des_s390_algs_ptr[2];
 static int des_s390_algs_num;
+static struct skcipher_alg *des_s390_skciphers_ptr[6];
+static int des_s390_skciphers_num;
 
 static int des_s390_register_alg(struct crypto_alg *alg)
 {
@@ -497,10 +410,22 @@ static int des_s390_register_alg(struct crypto_alg *alg)
 	return ret;
 }
 
+static int des_s390_register_skcipher(struct skcipher_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_skcipher(alg);
+	if (!ret)
+		des_s390_skciphers_ptr[des_s390_skciphers_num++] = alg;
+	return ret;
+}
+
 static void des_s390_exit(void)
 {
 	while (des_s390_algs_num--)
 		crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]);
+	while (des_s390_skciphers_num--)
+		crypto_unregister_skcipher(des_s390_skciphers_ptr[des_s390_skciphers_num]);
 	if (ctrblk)
 		free_page((unsigned long) ctrblk);
 }
@@ -518,12 +443,12 @@ static int __init des_s390_init(void)
 		ret = des_s390_register_alg(&des_alg);
 		if (ret)
 			goto out_err;
-		ret = des_s390_register_alg(&ecb_des_alg);
+		ret = des_s390_register_skcipher(&ecb_des_alg);
 		if (ret)
 			goto out_err;
 	}
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) {
-		ret = des_s390_register_alg(&cbc_des_alg);
+		ret = des_s390_register_skcipher(&cbc_des_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -531,12 +456,12 @@ static int __init des_s390_init(void)
 		ret = des_s390_register_alg(&des3_alg);
 		if (ret)
 			goto out_err;
-		ret = des_s390_register_alg(&ecb_des3_alg);
+		ret = des_s390_register_skcipher(&ecb_des3_alg);
 		if (ret)
 			goto out_err;
 	}
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) {
-		ret = des_s390_register_alg(&cbc_des3_alg);
+		ret = des_s390_register_skcipher(&cbc_des3_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -551,12 +476,12 @@ static int __init des_s390_init(void)
 	}
 
 	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) {
-		ret = des_s390_register_alg(&ctr_des_alg);
+		ret = des_s390_register_skcipher(&ctr_des_alg);
 		if (ret)
 			goto out_err;
 	}
 	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
-		ret = des_s390_register_alg(&ctr_des3_alg);
+		ret = des_s390_register_skcipher(&ctr_des3_alg);
 		if (ret)
 			goto out_err;
 	}
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index a3e7400e031c..6b07a2f1ce8a 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -43,10 +43,8 @@ static int ghash_setkey(struct crypto_shash *tfm,
 {
 	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
 
-	if (keylen != GHASH_BLOCK_SIZE) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != GHASH_BLOCK_SIZE)
 		return -EINVAL;
-	}
 
 	memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
 
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 6184dceed340..e2a85783f804 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -21,6 +21,7 @@
 #include <linux/cpufeature.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
 #include <asm/cpacf.h>
 #include <asm/pkey.h>
@@ -123,119 +124,99 @@ static int __paes_set_key(struct s390_paes_ctx *ctx)
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int ecb_paes_init(struct crypto_tfm *tfm)
+static int ecb_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->kb.key = NULL;
 
 	return 0;
 }
 
-static void ecb_paes_exit(struct crypto_tfm *tfm)
+static void ecb_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 }
 
-static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
 	int rc;
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 	rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
 	if (rc)
 		return rc;
 
-	if (__paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-	return 0;
+	return __paes_set_key(ctx);
 }
 
-static int ecb_paes_crypt(struct blkcipher_desc *desc,
-			  unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n, k;
 	int ret;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		k = cpacf_km(ctx->fc | modifier, ctx->pk.protkey,
-			     walk->dst.virt.addr, walk->src.virt.addr, n);
+			     walk.dst.virt.addr, walk.src.virt.addr, n);
 		if (k)
-			ret = blkcipher_walk_done(desc, walk, nbytes - k);
+			ret = skcipher_walk_done(&walk, nbytes - k);
 		if (k < n) {
 			if (__paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
+				return skcipher_walk_done(&walk, -EIO);
 		}
 	}
 	return ret;
 }
 
-static int ecb_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_paes_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_paes_crypt(desc, CPACF_ENCRYPT, &walk);
+	return ecb_paes_crypt(req, 0);
 }
 
-static int ecb_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_paes_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_paes_crypt(desc, CPACF_DECRYPT, &walk);
+	return ecb_paes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg ecb_paes_alg = {
-	.cra_name		=	"ecb(paes)",
-	.cra_driver_name	=	"ecb-paes-s390",
-	.cra_priority		=	401,	/* combo: aes + ecb + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ecb_paes_alg.cra_list),
-	.cra_init		=	ecb_paes_init,
-	.cra_exit		=	ecb_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	PAES_MIN_KEYSIZE,
-			.max_keysize		=	PAES_MAX_KEYSIZE,
-			.setkey			=	ecb_paes_set_key,
-			.encrypt		=	ecb_paes_encrypt,
-			.decrypt		=	ecb_paes_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_paes_alg = {
+	.base.cra_name		=	"ecb(paes)",
+	.base.cra_driver_name	=	"ecb-paes-s390",
+	.base.cra_priority	=	401,	/* combo: aes + ecb + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
+	.init			=	ecb_paes_init,
+	.exit			=	ecb_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.setkey			=	ecb_paes_set_key,
+	.encrypt		=	ecb_paes_encrypt,
+	.decrypt		=	ecb_paes_decrypt,
 };
 
-static int cbc_paes_init(struct crypto_tfm *tfm)
+static int cbc_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->kb.key = NULL;
 
 	return 0;
 }
 
-static void cbc_paes_exit(struct crypto_tfm *tfm)
+static void cbc_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 }
@@ -258,28 +239,25 @@ static int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
 	int rc;
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 	rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
 	if (rc)
 		return rc;
 
-	if (__cbc_paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-	return 0;
+	return __cbc_paes_set_key(ctx);
 }
 
-static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n, k;
 	int ret;
 	struct {
@@ -287,73 +265,60 @@ static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 		u8 key[MAXPROTKEYSIZE];
 	} param;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+	memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
 	memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		k = cpacf_kmc(ctx->fc | modifier, &param,
-			      walk->dst.virt.addr, walk->src.virt.addr, n);
-		if (k)
-			ret = blkcipher_walk_done(desc, walk, nbytes - k);
+			      walk.dst.virt.addr, walk.src.virt.addr, n);
+		if (k) {
+			memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+			ret = skcipher_walk_done(&walk, nbytes - k);
+		}
 		if (k < n) {
 			if (__cbc_paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
+				return skcipher_walk_done(&walk, -EIO);
 			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
 		}
 	}
-	memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
 	return ret;
 }
 
-static int cbc_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_paes_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_paes_crypt(desc, 0, &walk);
+	return cbc_paes_crypt(req, 0);
 }
 
-static int cbc_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_paes_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_paes_crypt(desc, CPACF_DECRYPT, &walk);
+	return cbc_paes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_paes_alg = {
-	.cra_name		=	"cbc(paes)",
-	.cra_driver_name	=	"cbc-paes-s390",
-	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(cbc_paes_alg.cra_list),
-	.cra_init		=	cbc_paes_init,
-	.cra_exit		=	cbc_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	PAES_MIN_KEYSIZE,
-			.max_keysize		=	PAES_MAX_KEYSIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	cbc_paes_set_key,
-			.encrypt		=	cbc_paes_encrypt,
-			.decrypt		=	cbc_paes_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_paes_alg = {
+	.base.cra_name		=	"cbc(paes)",
+	.base.cra_driver_name	=	"cbc-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(cbc_paes_alg.base.cra_list),
+	.init			=	cbc_paes_init,
+	.exit			=	cbc_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	cbc_paes_set_key,
+	.encrypt		=	cbc_paes_encrypt,
+	.decrypt		=	cbc_paes_decrypt,
 };
 
-static int xts_paes_init(struct crypto_tfm *tfm)
+static int xts_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->kb[0].key = NULL;
 	ctx->kb[1].key = NULL;
@@ -361,9 +326,9 @@ static int xts_paes_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static void xts_paes_exit(struct crypto_tfm *tfm)
+static void xts_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb[0]);
 	_free_kb_keybuf(&ctx->kb[1]);
@@ -391,11 +356,11 @@ static int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int xts_key_len)
 {
 	int rc;
-	struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u8 ckey[2 * AES_MAX_KEY_SIZE];
 	unsigned int ckey_len, key_len;
 
@@ -413,10 +378,9 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	if (rc)
 		return rc;
 
-	if (__xts_paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
+	rc = __xts_paes_set_key(ctx);
+	if (rc)
+		return rc;
 
 	/*
 	 * xts_check_key verifies the key length is not odd and makes
@@ -427,13 +391,14 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		AES_KEYSIZE_128 : AES_KEYSIZE_256;
 	memcpy(ckey, ctx->pk[0].protkey, ckey_len);
 	memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
-	return xts_check_key(tfm, ckey, 2*ckey_len);
+	return xts_verify_key(tfm, ckey, 2*ckey_len);
 }
 
-static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_pxts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int keylen, offset, nbytes, n, k;
 	int ret;
 	struct {
@@ -448,90 +413,76 @@ static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 		u8 init[16];
 	} xts_param;
 
-	ret = blkcipher_walk_virt(desc, walk);
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
 	keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
 	offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
 retry:
 	memset(&pcc_param, 0, sizeof(pcc_param));
-	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+	memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
 	memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
 	cpacf_pcc(ctx->fc, pcc_param.key + offset);
 
 	memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
 	memcpy(xts_param.init, pcc_param.xts, 16);
 
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
-			     walk->dst.virt.addr, walk->src.virt.addr, n);
+			     walk.dst.virt.addr, walk.src.virt.addr, n);
 		if (k)
-			ret = blkcipher_walk_done(desc, walk, nbytes - k);
+			ret = skcipher_walk_done(&walk, nbytes - k);
 		if (k < n) {
 			if (__xts_paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
+				return skcipher_walk_done(&walk, -EIO);
 			goto retry;
 		}
 	}
 	return ret;
 }
 
-static int xts_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int xts_paes_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_paes_crypt(desc, 0, &walk);
+	return xts_paes_crypt(req, 0);
 }
 
-static int xts_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int xts_paes_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_paes_crypt(desc, CPACF_DECRYPT, &walk);
+	return xts_paes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg xts_paes_alg = {
-	.cra_name		=	"xts(paes)",
-	.cra_driver_name	=	"xts-paes-s390",
-	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_pxts_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(xts_paes_alg.cra_list),
-	.cra_init		=	xts_paes_init,
-	.cra_exit		=	xts_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	2 * PAES_MIN_KEYSIZE,
-			.max_keysize		=	2 * PAES_MAX_KEYSIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	xts_paes_set_key,
-			.encrypt		=	xts_paes_encrypt,
-			.decrypt		=	xts_paes_decrypt,
-		}
-	}
+static struct skcipher_alg xts_paes_alg = {
+	.base.cra_name		=	"xts(paes)",
+	.base.cra_driver_name	=	"xts-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_pxts_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(xts_paes_alg.base.cra_list),
+	.init			=	xts_paes_init,
+	.exit			=	xts_paes_exit,
+	.min_keysize		=	2 * PAES_MIN_KEYSIZE,
+	.max_keysize		=	2 * PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	xts_paes_set_key,
+	.encrypt		=	xts_paes_encrypt,
+	.decrypt		=	xts_paes_decrypt,
 };
 
-static int ctr_paes_init(struct crypto_tfm *tfm)
+static int ctr_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->kb.key = NULL;
 
 	return 0;
 }
 
-static void ctr_paes_exit(struct crypto_tfm *tfm)
+static void ctr_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 }
@@ -555,22 +506,18 @@ static int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
 	int rc;
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 	rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
 	if (rc)
 		return rc;
 
-	if (__ctr_paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-	return 0;
+	return __ctr_paes_set_key(ctx);
 }
 
 static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
@@ -588,37 +535,37 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 	return n;
 }
 
-static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int ctr_paes_crypt(struct skcipher_request *req)
 {
-	struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u8 buf[AES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
 	unsigned int nbytes, n, k;
 	int ret, locked;
 
 	locked = spin_trylock(&ctrblk_lock);
 
-	ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
 		n = AES_BLOCK_SIZE;
 		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
-			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
-		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
-		k = cpacf_kmctr(ctx->fc | modifier, ctx->pk.protkey,
-				walk->dst.virt.addr, walk->src.virt.addr,
-				n, ctrptr);
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		k = cpacf_kmctr(ctx->fc, ctx->pk.protkey, walk.dst.virt.addr,
+				walk.src.virt.addr, n, ctrptr);
 		if (k) {
 			if (ctrptr == ctrblk)
-				memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+				memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE,
 				       AES_BLOCK_SIZE);
-			crypto_inc(walk->iv, AES_BLOCK_SIZE);
-			ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			crypto_inc(walk.iv, AES_BLOCK_SIZE);
+			ret = skcipher_walk_done(&walk, nbytes - n);
 		}
 		if (k < n) {
 			if (__ctr_paes_set_key(ctx) != 0) {
 				if (locked)
 					spin_unlock(&ctrblk_lock);
-				return blkcipher_walk_done(desc, walk, -EIO);
+				return skcipher_walk_done(&walk, -EIO);
 			}
 		}
 	}
@@ -629,80 +576,54 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 	 */
 	if (nbytes) {
 		while (1) {
-			if (cpacf_kmctr(ctx->fc | modifier,
-					ctx->pk.protkey, buf,
-					walk->src.virt.addr, AES_BLOCK_SIZE,
-					walk->iv) == AES_BLOCK_SIZE)
+			if (cpacf_kmctr(ctx->fc, ctx->pk.protkey, buf,
+					walk.src.virt.addr, AES_BLOCK_SIZE,
+					walk.iv) == AES_BLOCK_SIZE)
 				break;
 			if (__ctr_paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
+				return skcipher_walk_done(&walk, -EIO);
 		}
-		memcpy(walk->dst.virt.addr, buf, nbytes);
-		crypto_inc(walk->iv, AES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, 0);
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, 0);
 	}
 
 	return ret;
 }
 
-static int ctr_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_paes_crypt(desc, 0, &walk);
-}
-
-static int ctr_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_paes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_paes_alg = {
-	.cra_name		=	"ctr(paes)",
-	.cra_driver_name	=	"ctr-paes-s390",
-	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ctr_paes_alg.cra_list),
-	.cra_init		=	ctr_paes_init,
-	.cra_exit		=	ctr_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	PAES_MIN_KEYSIZE,
-			.max_keysize		=	PAES_MAX_KEYSIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ctr_paes_set_key,
-			.encrypt		=	ctr_paes_encrypt,
-			.decrypt		=	ctr_paes_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_paes_alg = {
+	.base.cra_name		=	"ctr(paes)",
+	.base.cra_driver_name	=	"ctr-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(ctr_paes_alg.base.cra_list),
+	.init			=	ctr_paes_init,
+	.exit			=	ctr_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	ctr_paes_set_key,
+	.encrypt		=	ctr_paes_crypt,
+	.decrypt		=	ctr_paes_crypt,
+	.chunksize		=	AES_BLOCK_SIZE,
 };
 
-static inline void __crypto_unregister_alg(struct crypto_alg *alg)
+static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg)
 {
-	if (!list_empty(&alg->cra_list))
-		crypto_unregister_alg(alg);
+	if (!list_empty(&alg->base.cra_list))
+		crypto_unregister_skcipher(alg);
 }
 
 static void paes_s390_fini(void)
 {
 	if (ctrblk)
 		free_page((unsigned long) ctrblk);
-	__crypto_unregister_alg(&ctr_paes_alg);
-	__crypto_unregister_alg(&xts_paes_alg);
-	__crypto_unregister_alg(&cbc_paes_alg);
-	__crypto_unregister_alg(&ecb_paes_alg);
+	__crypto_unregister_skcipher(&ctr_paes_alg);
+	__crypto_unregister_skcipher(&xts_paes_alg);
+	__crypto_unregister_skcipher(&cbc_paes_alg);
+	__crypto_unregister_skcipher(&ecb_paes_alg);
 }
 
 static int __init paes_s390_init(void)
@@ -717,7 +638,7 @@ static int __init paes_s390_init(void)
 	if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
-		ret = crypto_register_alg(&ecb_paes_alg);
+		ret = crypto_register_skcipher(&ecb_paes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -725,14 +646,14 @@ static int __init paes_s390_init(void)
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
-		ret = crypto_register_alg(&cbc_paes_alg);
+		ret = crypto_register_skcipher(&cbc_paes_alg);
 		if (ret)
 			goto out_err;
 	}
 
 	if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
-		ret = crypto_register_alg(&xts_paes_alg);
+		ret = crypto_register_skcipher(&xts_paes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -740,7 +661,7 @@ static int __init paes_s390_init(void)
 	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
 	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
 	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
-		ret = crypto_register_alg(&ctr_paes_alg);
+		ret = crypto_register_skcipher(&ctr_paes_alg);
 		if (ret)
 			goto out_err;
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index d39e0f079217..686fe7aa192f 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -74,14 +74,17 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
 	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
 	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
 	u64 bits;
-	unsigned int n, mbl_offset;
+	unsigned int n;
+	int mbl_offset;
 
 	n = ctx->count % bsize;
 	bits = ctx->count * 8;
-	mbl_offset = s390_crypto_shash_parmsize(ctx->func) / sizeof(u32);
+	mbl_offset = s390_crypto_shash_parmsize(ctx->func);
 	if (mbl_offset < 0)
 		return -EINVAL;
 
+	mbl_offset = mbl_offset / sizeof(u32);
+
 	/* set total msg bit length (mbl) in CPACF parmblock */
 	switch (ctx->func) {
 	case CPACF_KLMD_SHA_1:
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index c2cf7bcdef9b..1c8a38f762a3 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -139,10 +139,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
  * without volatile and memory clobber.
  */
 #define alternative(oldinstr, altinstr, facility)			\
-	asm volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
+	asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
 
 #define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
-	asm volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1,	    \
+	asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1,   \
 				   altinstr2, facility2) ::: "memory")
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index eb7eed43e780..431e208a5ea4 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -241,7 +241,9 @@ static inline void arch___clear_bit_unlock(unsigned long nr,
 	arch___clear_bit(nr, ptr);
 }
 
-#include <asm-generic/bitops-instrumented.h>
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-non-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
 
 /*
  * Functions which use MSB0 bit numbering.
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 713fc9735ffb..7725f8006fdf 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -9,16 +9,15 @@
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 
 #define __EMIT_BUG(x) do {					\
-	asm volatile(						\
-		"0:	j	0b+2\n"				\
-		"1:\n"						\
+	asm_inline volatile(					\
+		"0:	mc	0,0\n"				\
 		".section .rodata.str,\"aMS\",@progbits,1\n"	\
-		"2:	.asciz	\""__FILE__"\"\n"		\
+		"1:	.asciz	\""__FILE__"\"\n"		\
 		".previous\n"					\
 		".section __bug_table,\"awM\",@progbits,%2\n"	\
-		"3:	.long	1b-3b,2b-3b\n"			\
+		"2:	.long	0b-2b,1b-2b\n"			\
 		"	.short	%0,%1\n"			\
-		"	.org	3b+%2\n"			\
+		"	.org	2b+%2\n"			\
 		".previous\n"					\
 		: : "i" (__LINE__),				\
 		    "i" (x),					\
@@ -28,13 +27,12 @@
 #else /* CONFIG_DEBUG_BUGVERBOSE */
 
 #define __EMIT_BUG(x) do {					\
-	asm volatile(						\
-		"0:	j	0b+2\n"				\
-		"1:\n"						\
+	asm_inline volatile(					\
+		"0:	mc	0,0\n"				\
 		".section __bug_table,\"awM\",@progbits,%1\n"	\
-		"2:	.long	1b-2b\n"			\
+		"1:	.long	0b-1b\n"			\
 		"	.short	%0\n"				\
-		"	.org	2b+%1\n"			\
+		"	.org	1b+%1\n"			\
 		".previous\n"					\
 		: : "i" (x),					\
 		    "i" (sizeof(struct bug_entry)));		\
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 63b46e30b2c3..9547cd5d6cdc 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -177,11 +177,7 @@ static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
 	return (void __user *)(unsigned long)(uptr & 0x7fffffffUL);
 }
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
-	return (u32)(unsigned long)uptr;
-}
+#define compat_ptr(uptr) compat_ptr(uptr)
 
 #ifdef CONFIG_COMPAT
 
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 819803a97c2b..0d90cbeb89b4 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -313,7 +313,7 @@ static inline unsigned long *trailer_entry_ptr(unsigned long v)
 	return (unsigned long *) ret;
 }
 
-/* Return if the entry in the sample data block table (sdbt)
+/* Return true if the entry in the sample data block table (sdbt)
  * is a link to the next sdbt */
 static inline int is_link_entry(unsigned long *s)
 {
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 60f907516335..ed5efbb531c4 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -11,6 +11,7 @@
 #include <linux/bits.h>
 
 #define CR0_CLOCK_COMPARATOR_SIGN	BIT(63 - 10)
+#define CR0_LOW_ADDRESS_PROTECTION	BIT(63 - 35)
 #define CR0_EMERGENCY_SIGNAL_SUBMASK	BIT(63 - 49)
 #define CR0_EXTERNAL_CALL_SUBMASK	BIT(63 - 50)
 #define CR0_CLOCK_COMPARATOR_SUBMASK	BIT(63 - 52)
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index ca421614722f..5a16f500515a 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -26,10 +26,6 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
 
 #define IO_SPACE_LIMIT 0
 
-#define ioremap_nocache(addr, size)	ioremap(addr, size)
-#define ioremap_wc			ioremap_nocache
-#define ioremap_wt			ioremap_nocache
-
 void __iomem *ioremap(unsigned long offset, unsigned long size);
 void iounmap(volatile void __iomem *addr);
 
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 39f747d63758..dcb1bba4f406 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -10,7 +10,9 @@
 #define JUMP_LABEL_NOP_SIZE 6
 #define JUMP_LABEL_NOP_OFFSET 2
 
-#if __GNUC__ < 9
+#ifdef CONFIG_CC_IS_CLANG
+#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "i"
+#elif __GNUC__ < 9
 #define JUMP_LABEL_STATIC_KEY_CONSTRAINT "X"
 #else
 #define JUMP_LABEL_STATIC_KEY_CONSTRAINT "jdd"
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index abe60268335d..02f4c21c57f6 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -392,6 +392,7 @@ struct kvm_vcpu_stat {
 	u64 diagnose_10;
 	u64 diagnose_44;
 	u64 diagnose_9c;
+	u64 diagnose_9c_ignored;
 	u64 diagnose_258;
 	u64 diagnose_308;
 	u64 diagnose_500;
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 823578c6b9e2..a4d38092530a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#define ARCH_ZONE_DMA_BITS	31
-
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index a2399eff84ca..b05187ce5dbd 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -2,9 +2,6 @@
 #ifndef __ASM_S390_PCI_H
 #define __ASM_S390_PCI_H
 
-/* must be set before including pci_clp.h */
-#define PCI_BAR_COUNT	6
-
 #include <linux/pci.h>
 #include <linux/mutex.h>
 #include <linux/iommu.h>
@@ -138,7 +135,7 @@ struct zpci_dev {
 
 	char res_name[16];
 	bool mio_capable;
-	struct zpci_bar_struct bars[PCI_BAR_COUNT];
+	struct zpci_bar_struct bars[PCI_STD_NUM_BARS];
 
 	u64		start_dma;	/* Start of available DMA addresses */
 	u64		end_dma;	/* End of available DMA addresses */
@@ -183,7 +180,7 @@ void zpci_remove_reserved_devices(void);
 /* CLP */
 int clp_scan_pci_devices(void);
 int clp_rescan_pci_devices(void);
-int clp_rescan_pci_devices_simple(void);
+int clp_rescan_pci_devices_simple(u32 *fid);
 int clp_add_pci_device(u32, u32, int);
 int clp_enable_fh(struct zpci_dev *, u8);
 int clp_disable_fh(struct zpci_dev *);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index 50359172cc48..bd2cb4ea7d93 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -77,7 +77,7 @@ struct mio_info {
 	struct {
 		u64 wb;
 		u64 wt;
-	} addr[PCI_BAR_COUNT];
+	} addr[PCI_STD_NUM_BARS];
 	u32 reserved[6];
 } __packed;
 
@@ -98,9 +98,9 @@ struct clp_rsp_query_pci {
 	u16 util_str_avail	:  1;	/* utility string available? */
 	u16 pfgid		:  8;	/* pci function group id */
 	u32 fid;			/* pci function id */
-	u8 bar_size[PCI_BAR_COUNT];
+	u8 bar_size[PCI_STD_NUM_BARS];
 	u16 pchid;
-	__le32 bar[PCI_BAR_COUNT];
+	__le32 bar[PCI_STD_NUM_BARS];
 	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
 	u32			: 16;
 	u8 fmb_len;
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 4652ffffe0b2..b9da71632827 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -12,6 +12,7 @@
 
 #include <linux/perf_event.h>
 #include <linux/device.h>
+#include <asm/stacktrace.h>
 
 /* Per-CPU flags for PMU states */
 #define PMU_F_RESERVED			0x1000
@@ -73,4 +74,10 @@ struct perf_sf_sde_regs {
 #define SDB_FULL_BLOCKS(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
 #define SAMPLE_FREQ_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
 
+#define perf_arch_fetch_caller_regs(regs, __ip) do {			\
+	(regs)->psw.addr = (__ip);					\
+	(regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) -	\
+		offsetof(struct stack_frame, back_chain);		\
+} while (0)
+
 #endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index bccb8f4a63e2..77606c4acd58 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -56,7 +56,12 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
 		crst_table_init(table, _REGION2_ENTRY_EMPTY);
 	return (p4d_t *) table;
 }
-#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d)
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+	if (!mm_p4d_folded(mm))
+		crst_table_free(mm, (unsigned long *) p4d);
+}
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
@@ -65,7 +70,12 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 		crst_table_init(table, _REGION3_ENTRY_EMPTY);
 	return (pud_t *) table;
 }
-#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud)
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	if (!mm_pud_folded(mm))
+		crst_table_free(mm, (unsigned long *) pud);
+}
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 {
@@ -83,6 +93,8 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
+	if (mm_pmd_folded(mm))
+		return;
 	pgtable_pmd_page_dtor(virt_to_page(pmd));
 	crst_table_free(mm, (unsigned long *) pmd);
 }
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5ff98d76a66c..7b03037a8475 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -266,11 +266,9 @@ static inline int is_module_addr(void *addr)
 #endif
 
 #define _REGION_ENTRY_BITS	 0xfffffffffffff22fUL
-#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
 
 /* Bits in the segment table entry */
 #define _SEGMENT_ENTRY_BITS			0xfffffffffffffe33UL
-#define _SEGMENT_ENTRY_BITS_LARGE		0xfffffffffff0ff33UL
 #define _SEGMENT_ENTRY_HARDWARE_BITS		0xfffffffffffffe30UL
 #define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE	0xfffffffffff00730UL
 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
@@ -699,10 +697,8 @@ static inline int pmd_large(pmd_t pmd)
 
 static inline int pmd_bad(pmd_t pmd)
 {
-	if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0)
+	if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_large(pmd))
 		return 1;
-	if (pmd_large(pmd))
-		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
 	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
 }
 
@@ -710,12 +706,10 @@ static inline int pud_bad(pud_t pud)
 {
 	unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
 
-	if (type > _REGION_ENTRY_TYPE_R3)
+	if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
 		return 1;
 	if (type < _REGION_ENTRY_TYPE_R3)
 		return 0;
-	if (pud_large(pud))
-		return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
 	return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
 }
 
@@ -758,18 +752,12 @@ static inline int pmd_write(pmd_t pmd)
 
 static inline int pmd_dirty(pmd_t pmd)
 {
-	int dirty = 1;
-	if (pmd_large(pmd))
-		dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
-	return dirty;
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
 }
 
 static inline int pmd_young(pmd_t pmd)
 {
-	int young = 1;
-	if (pmd_large(pmd))
-		young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
-	return young;
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
 }
 
 static inline int pte_present(pte_t pte)
@@ -1173,8 +1161,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t entry)
 {
-	if (!MACHINE_HAS_NX)
-		pte_val(entry) &= ~_PAGE_NOEXEC;
 	if (pte_present(entry))
 		pte_val(entry) &= ~_PAGE_UNUSED;
 	if (mm_has_pgste(mm))
@@ -1191,6 +1177,8 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 {
 	pte_t __pte;
 	pte_val(__pte) = physpage + pgprot_val(pgprot);
+	if (!MACHINE_HAS_NX)
+		pte_val(__pte) &= ~_PAGE_NOEXEC;
 	return pte_mkyoung(__pte);
 }
 
@@ -1297,29 +1285,23 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
 static inline pmd_t pmd_mkwrite(pmd_t pmd)
 {
 	pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
-	if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
-		return pmd;
-	pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
 	return pmd;
 }
 
 static inline pmd_t pmd_mkclean(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
-		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
-	}
+	pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
+	pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
 	return pmd;
 }
 
 static inline pmd_t pmd_mkdirty(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY |
-				_SEGMENT_ENTRY_SOFT_DIRTY;
-		if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
-			pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
-	}
+	pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY;
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
 	return pmd;
 }
 
@@ -1333,29 +1315,23 @@ static inline pud_t pud_wrprotect(pud_t pud)
 static inline pud_t pud_mkwrite(pud_t pud)
 {
 	pud_val(pud) |= _REGION3_ENTRY_WRITE;
-	if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY))
-		return pud;
-	pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+	if (pud_val(pud) & _REGION3_ENTRY_DIRTY)
+		pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
 	return pud;
 }
 
 static inline pud_t pud_mkclean(pud_t pud)
 {
-	if (pud_large(pud)) {
-		pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
-		pud_val(pud) |= _REGION_ENTRY_PROTECT;
-	}
+	pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
+	pud_val(pud) |= _REGION_ENTRY_PROTECT;
 	return pud;
 }
 
 static inline pud_t pud_mkdirty(pud_t pud)
 {
-	if (pud_large(pud)) {
-		pud_val(pud) |= _REGION3_ENTRY_DIRTY |
-				_REGION3_ENTRY_SOFT_DIRTY;
-		if (pud_val(pud) & _REGION3_ENTRY_WRITE)
-			pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
-	}
+	pud_val(pud) |= _REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY;
+	if (pud_val(pud) & _REGION3_ENTRY_WRITE)
+		pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
 	return pud;
 }
 
@@ -1379,38 +1355,29 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
 
 static inline pmd_t pmd_mkyoung(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
-		if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
-			pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
-	}
+	pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
 	return pmd;
 }
 
 static inline pmd_t pmd_mkold(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
-		pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
-	}
+	pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+	pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
 	return pmd;
 }
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
-			_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
-			_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
-		pmd_val(pmd) |= massage_pgprot_pmd(newprot);
-		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
-			pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
-		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
-			pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
-		return pmd;
-	}
-	pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
+	pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
+		_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
+		_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
 	pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+	if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+	if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
+		pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
 	return pmd;
 }
 
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index b5ea9e14c017..6ede29907fbf 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -130,11 +130,11 @@ static inline bool should_resched(int preempt_offset)
 
 #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 extern asmlinkage void preempt_schedule(void);
 #define __preempt_schedule() preempt_schedule()
 extern asmlinkage void preempt_schedule_notrace(void);
 #define __preempt_schedule_notrace() preempt_schedule_notrace()
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 #endif /* __ASM_PREEMPT_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 51a0e4a2dc96..361ef5eda468 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -206,7 +206,7 @@ unsigned long get_wchan(struct task_struct *p);
 /* Has task runtime instrumentation enabled ? */
 #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
 
-static inline unsigned long current_stack_pointer(void)
+static __always_inline unsigned long current_stack_pointer(void)
 {
 	unsigned long sp;
 
@@ -310,7 +310,7 @@ void enabled_wait(void);
 /*
  * Function to drop a processor into disabled wait state
  */
-static inline void __noreturn disabled_wait(void)
+static __always_inline void __noreturn disabled_wait(void)
 {
 	psw_t psw;
 
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index e3f238e8c611..71e3f0146cda 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -276,6 +276,7 @@ struct qdio_outbuf_state {
 #define CHSC_AC2_MULTI_BUFFER_AVAILABLE	0x0080
 #define CHSC_AC2_MULTI_BUFFER_ENABLED	0x0040
 #define CHSC_AC2_DATA_DIV_AVAILABLE	0x0010
+#define CHSC_AC2_SNIFFER_AVAILABLE	0x0008
 #define CHSC_AC2_DATA_DIV_ENABLED	0x0002
 
 #define CHSC_AC3_FORMAT2_CQ_AVAILABLE	0x8000
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 6dc6c4fbc8e2..69289e99cabd 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -27,7 +27,6 @@
 #define MACHINE_FLAG_DIAG9C	BIT(3)
 #define MACHINE_FLAG_ESOP	BIT(4)
 #define MACHINE_FLAG_IDTE	BIT(5)
-#define MACHINE_FLAG_DIAG44	BIT(6)
 #define MACHINE_FLAG_EDAT1	BIT(7)
 #define MACHINE_FLAG_EDAT2	BIT(8)
 #define MACHINE_FLAG_TOPOLOGY	BIT(10)
@@ -94,7 +93,6 @@ extern unsigned long __swsusp_reset_dma;
 #define MACHINE_HAS_DIAG9C	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
 #define MACHINE_HAS_ESOP	(S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
 #define MACHINE_HAS_IDTE	(S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
-#define MACHINE_HAS_DIAG44	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
 #define MACHINE_HAS_EDAT1	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
 #define MACHINE_HAS_EDAT2	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
 #define MACHINE_HAS_TOPOLOGY	(S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index c02bff33f6c7..3a37172d5398 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -85,7 +85,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
 static inline void arch_spin_unlock(arch_spinlock_t *lp)
 {
 	typecheck(int, lp->lock);
-	asm volatile(
+	asm_inline volatile(
 		ALTERNATIVE("", ".long 0xb2fa0070", 49)	/* NIAI 7 */
 		"	sth	%1,%0\n"
 		: "=Q" (((unsigned short *) &lp->lock)[1])
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 0ae4bbf7779c..ee056f4a4fa3 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -33,8 +33,8 @@ static inline bool on_stack(struct stack_info *info,
 	return addr >= info->begin && addr + len <= info->end;
 }
 
-static inline unsigned long get_stack_pointer(struct task_struct *task,
-					      struct pt_regs *regs)
+static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
+						       struct pt_regs *regs)
 {
 	if (regs)
 		return (unsigned long) kernel_stack_pointer(regs);
@@ -62,6 +62,17 @@ struct stack_frame {
 };
 #endif
 
+/*
+ * Unlike current_stack_pointer() which simply returns current value of %r15
+ * current_frame_address() returns function stack frame address, which matches
+ * %r15 upon function invocation. It may differ from %r15 later if function
+ * allocates stack for local variables or new stack frame to call other
+ * functions.
+ */
+#define current_frame_address()						\
+	((unsigned long)__builtin_frame_address(0) -			\
+	 offsetof(struct stack_frame, back_chain))
+
 #define CALL_ARGS_0()							\
 	register unsigned long r2 asm("2")
 #define CALL_ARGS_1(arg1)						\
@@ -95,20 +106,33 @@ struct stack_frame {
 
 #define CALL_ON_STACK(fn, stack, nr, args...)				\
 ({									\
+	unsigned long frame = current_frame_address();			\
 	CALL_ARGS_##nr(args);						\
 	unsigned long prev;						\
 									\
 	asm volatile(							\
 		"	la	%[_prev],0(15)\n"			\
-		"	la	15,0(%[_stack])\n"			\
-		"	stg	%[_prev],%[_bc](15)\n"			\
+		"	lg	15,%[_stack]\n"				\
+		"	stg	%[_frame],%[_bc](15)\n"			\
 		"	brasl	14,%[_fn]\n"				\
 		"	la	15,0(%[_prev])\n"			\
 		: [_prev] "=&a" (prev), CALL_FMT_##nr			\
-		  [_stack] "a" (stack),					\
+		  [_stack] "R" (stack),					\
 		  [_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
+		  [_frame] "d" (frame),					\
 		  [_fn] "X" (fn) : CALL_CLOBBER_##nr);			\
 	r2;								\
 })
 
+#define CALL_ON_STACK_NORETURN(fn, stack)				\
+({									\
+	asm volatile(							\
+		"	la	15,0(%[_stack])\n"			\
+		"	xc	%[_bc](8,15),%[_bc](15)\n"		\
+		"	brasl	14,%[_fn]\n"				\
+		::[_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
+		  [_stack] "a" (stack), [_fn] "X" (fn));		\
+	BUG();								\
+})
+
 #endif /* _ASM_S390_STACKTRACE_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 64539c221672..670f14a228e5 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -10,8 +10,9 @@
 #ifndef _ASM_S390_TIMEX_H
 #define _ASM_S390_TIMEX_H
 
-#include <asm/lowcore.h>
+#include <linux/preempt.h>
 #include <linux/time64.h>
+#include <asm/lowcore.h>
 
 /* The value of the TOD clock for 1.1.1970. */
 #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
@@ -179,22 +180,24 @@ static inline cycles_t get_cycles(void)
 
 int get_phys_clock(unsigned long *clock);
 void init_cpu_timer(void);
-unsigned long long monotonic_clock(void);
 
 extern unsigned char tod_clock_base[16] __aligned(8);
 
 /**
  * get_clock_monotonic - returns current time in clock rate units
  *
- * The caller must ensure that preemption is disabled.
  * The clock and tod_clock_base get changed via stop_machine.
- * Therefore preemption must be disabled when calling this
- * function, otherwise the returned value is not guaranteed to
- * be monotonic.
+ * Therefore preemption must be disabled, otherwise the returned
+ * value is not guaranteed to be monotonic.
  */
 static inline unsigned long long get_tod_clock_monotonic(void)
 {
-	return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
+	unsigned long long tod;
+
+	preempt_disable_notrace();
+	tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
+	preempt_enable_notrace();
+	return tod;
 }
 
 /**
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index eaaefeceef6f..de9006b0cfeb 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -35,7 +35,6 @@ struct unwind_state {
 	struct task_struct *task;
 	struct pt_regs *regs;
 	unsigned long sp, ip;
-	bool reuse_sp;
 	int graph_idx;
 	bool reliable;
 	bool error;
@@ -59,10 +58,11 @@ static inline bool unwind_error(struct unwind_state *state)
 static inline void unwind_start(struct unwind_state *state,
 				struct task_struct *task,
 				struct pt_regs *regs,
-				unsigned long sp)
+				unsigned long first_frame)
 {
-	sp = sp ? : get_stack_pointer(task, regs);
-	__unwind_start(state, task, regs, sp);
+	task = task ?: current;
+	first_frame = first_frame ?: get_stack_pointer(task, regs);
+	__unwind_start(state, task, regs, first_frame);
 }
 
 static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index ef3c00b049ab..4093a2856929 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -86,7 +86,7 @@ static inline int share(unsigned long addr, u16 cmd)
 	};
 
 	if (!is_prot_virt_guest())
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 	/*
 	 * Sharing is page wise, if we encounter addresses that are
 	 * not page aligned, we assume something went wrong. If
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index 169d7604eb80..3bcfdeb01395 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -41,8 +41,17 @@ struct vdso_data {
 struct vdso_per_cpu_data {
 	__u64 ectg_timer_base;
 	__u64 ectg_user_time;
-	__u32 cpu_nr;
-	__u32 node_id;
+	/*
+	 * Note: node_id and cpu_nr must be at adjacent memory locations.
+	 * VDSO userspace must read both values with a single instruction.
+	 */
+	union {
+		__u64 getcpu_val;
+		struct {
+			__u32 node_id;
+			__u32 cpu_nr;
+		};
+	};
 };
 
 extern struct vdso_data *vdso_data;
diff --git a/arch/s390/include/asm/vmalloc.h b/arch/s390/include/asm/vmalloc.h
new file mode 100644
index 000000000000..3ba3a6bdca25
--- /dev/null
+++ b/arch/s390/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_S390_VMALLOC_H
+#define _ASM_S390_VMALLOC_H
+
+#endif /* _ASM_S390_VMALLOC_H */
diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h
index 5b1c4f47c656..1030cd186899 100644
--- a/arch/s390/include/uapi/asm/ipcbuf.h
+++ b/arch/s390/include/uapi/asm/ipcbuf.h
@@ -2,6 +2,8 @@
 #ifndef __S390_IPCBUF_H__
 #define __S390_IPCBUF_H__
 
+#include <linux/posix_types.h>
+
 /*
  * The user_ipc_perm structure for S/390 architecture.
  * Note extra padding because this structure is passed back and forth
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 7edbbcd8228a..2b1203cf7be6 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -81,4 +81,3 @@ obj-$(CONFIG_TRACEPOINTS)	+= trace.o
 
 # vdso
 obj-y				+= vdso64/
-obj-$(CONFIG_COMPAT_VDSO)	+= vdso32/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 41ac4ad21311..ce33406cfe83 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -78,8 +78,7 @@ int main(void)
 	OFFSET(__VDSO_TS_END, vdso_data, ts_end);
 	OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
 	OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
-	OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr);
-	OFFSET(__VDSO_NODE_ID, vdso_per_cpu_data, node_id);
+	OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);
 	BLANK();
 	/* constants used by the vdso */
 	DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 7abe6ae261b4..f304802ecf7b 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -461,10 +461,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
 				ptr += sprintf(ptr, "%%c%i", value);
 			else if (operand->flags & OPERAND_VR)
 				ptr += sprintf(ptr, "%%v%i", value);
-			else if (operand->flags & OPERAND_PCREL)
-				ptr += sprintf(ptr, "%lx", (signed int) value
-								      + addr);
-			else if (operand->flags & OPERAND_SIGNED)
+			else if (operand->flags & OPERAND_PCREL) {
+				void *pcrel = (void *)((int)value + addr);
+
+				ptr += sprintf(ptr, "%px", pcrel);
+			} else if (operand->flags & OPERAND_SIGNED)
 				ptr += sprintf(ptr, "%i", value);
 			else
 				ptr += sprintf(ptr, "%u", value);
@@ -536,7 +537,7 @@ void show_code(struct pt_regs *regs)
 		else
 			*ptr++ = ' ';
 		addr = regs->psw.addr + start - 32;
-		ptr += sprintf(ptr, "%016lx: ", addr);
+		ptr += sprintf(ptr, "%px: ", (void *)addr);
 		if (start + opsize >= end)
 			break;
 		for (i = 0; i < opsize; i++)
@@ -564,7 +565,7 @@ void print_fn_code(unsigned char *code, unsigned long len)
 		opsize = insn_length(*code);
 		if (opsize > len)
 			break;
-		ptr += sprintf(ptr, "%p: ", code);
+		ptr += sprintf(ptr, "%px: ", code);
 		for (i = 0; i < opsize; i++)
 			ptr += sprintf(ptr, "%02x", code[i]);
 		*ptr++ = '\t';
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 34bdc60c0b11..2c122d8bab93 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -38,6 +38,7 @@ const char *stack_type_name(enum stack_type type)
 		return "unknown";
 	}
 }
+EXPORT_SYMBOL_GPL(stack_type_name);
 
 static inline bool in_stack(unsigned long sp, struct stack_info *info,
 			    enum stack_type type, unsigned long low,
@@ -93,7 +94,9 @@ int get_stack_info(unsigned long sp, struct task_struct *task,
 	if (!sp)
 		goto unknown;
 
-	task = task ? : current;
+	/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+	if (sp & 0x7)
+		goto unknown;
 
 	/* Check per-task stack */
 	if (in_task_stack(sp, task, info))
@@ -128,8 +131,6 @@ void show_stack(struct task_struct *task, unsigned long *stack)
 	struct unwind_state state;
 
 	printk("Call Trace:\n");
-	if (!task)
-		task = current;
 	unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
 		printk(state.reliable ? " [<%016lx>] %pSR \n" :
 					"([<%016lx>] %pSR)\n",
@@ -194,6 +195,8 @@ void die(struct pt_regs *regs, const char *str)
 	       regs->int_code >> 17, ++die_counter);
 #ifdef CONFIG_PREEMPT
 	pr_cont("PREEMPT ");
+#elif defined(CONFIG_PREEMPT_RT)
+	pr_cont("PREEMPT_RT ");
 #endif
 	pr_cont("SMP ");
 	if (debug_pagealloc_enabled())
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index b432d63d0b37..cd241ee66eff 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -30,6 +30,7 @@
 #include <asm/sclp.h>
 #include <asm/facility.h>
 #include <asm/boot_data.h>
+#include <asm/switch_to.h>
 #include "entry.h"
 
 static void __init reset_tod_clock(void)
@@ -203,21 +204,6 @@ static __init void detect_diag9c(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
 }
 
-static __init void detect_diag44(void)
-{
-	int rc;
-
-	diag_stat_inc(DIAG_STAT_X044);
-	asm volatile(
-		"	diag	0,0,0x44\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
-	if (!rc)
-		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
-}
-
 static __init void detect_machine_facilities(void)
 {
 	if (test_facility(8)) {
@@ -238,7 +224,7 @@ static __init void detect_machine_facilities(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
 		__ctl_set_bit(0, 17);
 	}
-	if (test_facility(130)) {
+	if (test_facility(130) && !noexec_disabled) {
 		S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
 		__ctl_set_bit(0, 20);
 	}
@@ -260,6 +246,24 @@ static inline void save_vector_registers(void)
 #endif
 }
 
+static inline void setup_control_registers(void)
+{
+	unsigned long reg;
+
+	__ctl_store(reg, 0, 0);
+	reg |= CR0_LOW_ADDRESS_PROTECTION;
+	reg |= CR0_EMERGENCY_SIGNAL_SUBMASK;
+	reg |= CR0_EXTERNAL_CALL_SUBMASK;
+	__ctl_load(reg, 0, 0);
+}
+
+static inline void setup_access_registers(void)
+{
+	unsigned int acrs[NUM_ACRS] = { 0 };
+
+	restore_access_regs(acrs);
+}
+
 static int __init disable_vector_extension(char *str)
 {
 	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
@@ -268,21 +272,6 @@ static int __init disable_vector_extension(char *str)
 }
 early_param("novx", disable_vector_extension);
 
-static int __init noexec_setup(char *str)
-{
-	bool enabled;
-	int rc;
-
-	rc = kstrtobool(str, &enabled);
-	if (!rc && !enabled) {
-		/* Disable no-execute support */
-		S390_lowcore.machine_flags &= ~MACHINE_FLAG_NX;
-		__ctl_clear_bit(0, 20);
-	}
-	return rc;
-}
-early_param("noexec", noexec_setup);
-
 static int __init cad_setup(char *str)
 {
 	bool enabled;
@@ -327,10 +316,11 @@ void __init startup_init(void)
 	setup_arch_string();
 	setup_boot_command_line();
 	detect_diag9c();
-	detect_diag44();
 	detect_machine_facilities();
 	save_vector_registers();
 	setup_topology();
 	sclp_early_detect();
+	setup_control_registers();
+	setup_access_registers();
 	lockdep_on();
 }
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 270d1d145761..9205add8481d 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -790,7 +790,7 @@ ENTRY(io_int_handler)
 .Lio_work:
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
 	jo	.Lio_work_user		# yes -> do resched & signal
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	# check for preemptive scheduling
 	icm	%r0,15,__LC_PREEMPT_COUNT
 	jnz	.Lio_restore		# preemption is disabled
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index b2956d49b6ad..1d3927e01a5f 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -45,6 +45,7 @@ void specification_exception(struct pt_regs *regs);
 void transaction_exception(struct pt_regs *regs);
 void translation_exception(struct pt_regs *regs);
 void vector_exception(struct pt_regs *regs);
+void monitor_event_exception(struct pt_regs *regs);
 
 void do_per_trap(struct pt_regs *regs);
 void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 0d9ee198f4eb..8b88dbbda7df 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -26,30 +26,17 @@ ENTRY(startup_continue)
 0:	larl	%r1,tod_clock_base
 	mvc	0(16,%r1),__LC_BOOT_CLOCK
 	larl	%r13,.LPG1		# get base
-	larl	%r0,boot_vdso_data
-	stg	%r0,__LC_VDSO_PER_CPU
 #
 # Setup stack
 #
 	larl	%r14,init_task
 	stg	%r14,__LC_CURRENT
-	larl	%r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
+	larl	%r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE
 #ifdef CONFIG_KASAN
 	brasl	%r14,kasan_early_init
 #endif
-#
-# Early machine initialization and detection functions.
-#
-	brasl	%r14,startup_init
-
-# check control registers
-	stctg	%c0,%c15,0(%r15)
-	oi	6(%r15),0x60		# enable sigp emergency & external call
-	oi	4(%r15),0x10		# switch on low address proctection
-	lctlg	%c0,%c15,0(%r15)
-
-	lam	0,15,.Laregs-.LPG1(%r13)	# load acrs needed by uaccess
-	brasl	%r14,start_kernel		# go to C code
+	brasl	%r14,startup_init		# s390 specific early init
+	brasl	%r14,start_kernel		# common init code
 #
 # We returned from start_kernel ?!? PANIK
 #
@@ -59,4 +46,3 @@ ENTRY(startup_continue)
 	.align	16
 .LPG1:
 .Ldw:	.quad	0x0002000180000000,0x0000000000000000
-.Laregs:.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 444a19125a81..cb8b1cc285c9 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -164,7 +164,9 @@ static bool kdump_csum_valid(struct kimage *image)
 #ifdef CONFIG_CRASH_DUMP
 	int rc;
 
+	preempt_disable();
 	rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image);
+	preempt_enable();
 	return rc == 0;
 #else
 	return false;
@@ -254,10 +256,10 @@ void arch_crash_save_vmcoreinfo(void)
 	VMCOREINFO_SYMBOL(lowcore_ptr);
 	VMCOREINFO_SYMBOL(high_memory);
 	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
-	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
 	vmcoreinfo_append_str("SDMA=%lx\n", __sdma);
 	vmcoreinfo_append_str("EDMA=%lx\n", __edma);
 	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
 }
 
 void machine_shutdown(void)
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 9e1660a6b9db..f942341429b1 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -26,6 +26,12 @@ ENDPROC(ftrace_stub)
 #define STACK_PTREGS	  (STACK_FRAME_OVERHEAD)
 #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
 #define STACK_PTREGS_PSW  (STACK_PTREGS + __PT_PSW)
+#ifdef __PACK_STACK
+/* allocate just enough for r14, r15 and backchain */
+#define TRACED_FUNC_FRAME_SIZE	24
+#else
+#define TRACED_FUNC_FRAME_SIZE	STACK_FRAME_OVERHEAD
+#endif
 
 ENTRY(_mcount)
 	BR_EX	%r14
@@ -35,13 +41,21 @@ EXPORT_SYMBOL(_mcount)
 ENTRY(ftrace_caller)
 	.globl	ftrace_regs_caller
 	.set	ftrace_regs_caller,ftrace_caller
+	stg	%r14,(__SF_GPRS+8*8)(%r15)	# save traced function caller
 	lgr	%r1,%r15
 #if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT))
 	aghi	%r0,MCOUNT_RETURN_FIXUP
 #endif
-	aghi	%r15,-STACK_FRAME_SIZE
+	# allocate stack frame for ftrace_caller to contain traced function
+	aghi	%r15,-TRACED_FUNC_FRAME_SIZE
 	stg	%r1,__SF_BACKCHAIN(%r15)
+	stg	%r0,(__SF_GPRS+8*8)(%r15)
+	stg	%r15,(__SF_GPRS+9*8)(%r15)
+	# allocate pt_regs and stack frame for ftrace_trace_function
+	aghi	%r15,-STACK_FRAME_SIZE
 	stg	%r1,(STACK_PTREGS_GPRS+15*8)(%r15)
+	aghi	%r1,-TRACED_FUNC_FRAME_SIZE
+	stg	%r1,__SF_BACKCHAIN(%r15)
 	stg	%r0,(STACK_PTREGS_PSW+8)(%r15)
 	stmg	%r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 48d48b6187c0..0eb1d1cc53a8 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -199,7 +199,7 @@ static const int cpumf_generic_events_user[] = {
 	[PERF_COUNT_HW_BUS_CYCLES]	    = -1,
 };
 
-static int __hw_perf_event_init(struct perf_event *event)
+static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
 {
 	struct perf_event_attr *attr = &event->attr;
 	struct hw_perf_event *hwc = &event->hw;
@@ -207,7 +207,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 	int err = 0;
 	u64 ev;
 
-	switch (attr->type) {
+	switch (type) {
 	case PERF_TYPE_RAW:
 		/* Raw events are used to access counters directly,
 		 * hence do not permit excludes */
@@ -294,17 +294,16 @@ static int __hw_perf_event_init(struct perf_event *event)
 
 static int cpumf_pmu_event_init(struct perf_event *event)
 {
+	unsigned int type = event->attr.type;
 	int err;
 
-	switch (event->attr.type) {
-	case PERF_TYPE_HARDWARE:
-	case PERF_TYPE_HW_CACHE:
-	case PERF_TYPE_RAW:
-		err = __hw_perf_event_init(event);
-		break;
-	default:
+	if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
+		err = __hw_perf_event_init(event, type);
+	else if (event->pmu->type == type)
+		/* Registered as unknown PMU */
+		err = __hw_perf_event_init(event, PERF_TYPE_RAW);
+	else
 		return -ENOENT;
-	}
 
 	if (unlikely(err) && event->destroy)
 		event->destroy(event);
@@ -553,7 +552,7 @@ static int __init cpumf_pmu_init(void)
 		return -ENODEV;
 
 	cpumf_pmu.attr_groups = cpumf_cf_event_group();
-	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
+	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
 	if (rc)
 		pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
 	return rc;
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index 2654e348801a..e949ab832ed7 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -243,13 +243,13 @@ static int cf_diag_event_init(struct perf_event *event)
 	int err = -ENOENT;
 
 	debug_sprintf_event(cf_diag_dbg, 5,
-			    "%s event %p cpu %d config %#llx "
+			    "%s event %p cpu %d config %#llx type:%u "
 			    "sample_type %#llx cf_diag_events %d\n", __func__,
-			    event, event->cpu, attr->config, attr->sample_type,
-			    atomic_read(&cf_diag_events));
+			    event, event->cpu, attr->config, event->pmu->type,
+			    attr->sample_type, atomic_read(&cf_diag_events));
 
 	if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
-	    event->attr.type != PERF_TYPE_RAW)
+	    event->attr.type != event->pmu->type)
 		goto out;
 
 	/* Raw events are used to access counters directly,
@@ -693,7 +693,7 @@ static int __init cf_diag_init(void)
 	}
 	debug_register_view(cf_diag_dbg, &debug_sprintf_view);
 
-	rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", PERF_TYPE_RAW);
+	rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
 	if (rc) {
 		debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
 		debug_unregister(cf_diag_dbg);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3d8b12a9a6ff..b095b1c78987 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -156,8 +156,8 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
 		}
 	}
 
-	debug_sprintf_event(sfdbg, 5,
-			    "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+	debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
+			    (unsigned long)sfb->sdbt);
 	memset(sfb, 0, sizeof(*sfb));
 }
 
@@ -193,7 +193,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 				   unsigned long num_sdb, gfp_t gfp_flags)
 {
 	int i, rc;
-	unsigned long *new, *tail;
+	unsigned long *new, *tail, *tail_prev = NULL;
 
 	if (!sfb->sdbt || !sfb->tail)
 		return -EINVAL;
@@ -212,10 +212,11 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 	 * the sampling buffer origin.
 	 */
 	if (sfb->sdbt != get_next_sdbt(tail)) {
-		debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
-				    "sampling buffer is not linked: origin=%p"
-				    "tail=%p\n",
-				    (void *) sfb->sdbt, (void *) tail);
+		debug_sprintf_event(sfdbg, 3, "%s: "
+				    "sampling buffer is not linked: origin %#lx"
+				    " tail %#lx\n", __func__,
+				    (unsigned long)sfb->sdbt,
+				    (unsigned long)tail);
 		return -EINVAL;
 	}
 
@@ -232,6 +233,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 			sfb->num_sdbt++;
 			/* Link current page to tail of chain */
 			*tail = (unsigned long)(void *) new + 1;
+			tail_prev = tail;
 			tail = new;
 		}
 
@@ -241,18 +243,30 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 		 * issue, a new realloc call (if required) might succeed.
 		 */
 		rc = alloc_sample_data_block(tail, gfp_flags);
-		if (rc)
+		if (rc) {
+			/* Undo last SDBT. An SDBT with no SDB at its first
+			 * entry but with an SDBT entry instead can not be
+			 * handled by the interrupt handler code.
+			 * Avoid this situation.
+			 */
+			if (tail_prev) {
+				sfb->num_sdbt--;
+				free_page((unsigned long) new);
+				tail = tail_prev;
+			}
 			break;
+		}
 		sfb->num_sdb++;
 		tail++;
+		tail_prev = new = NULL;	/* Allocated at least one SBD */
 	}
 
 	/* Link sampling buffer to its origin */
 	*tail = (unsigned long) sfb->sdbt + 1;
 	sfb->tail = tail;
 
-	debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
-			    " settings: sdbt=%lu sdb=%lu\n",
+	debug_sprintf_event(sfdbg, 4, "%s: new buffer"
+			    " settings: sdbt %lu sdb %lu\n", __func__,
 			    sfb->num_sdbt, sfb->num_sdb);
 	return rc;
 }
@@ -292,12 +306,13 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
 	rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
 	if (rc) {
 		free_sampling_buffer(sfb);
-		debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
-			"realloc_sampling_buffer failed with rc=%i\n", rc);
+		debug_sprintf_event(sfdbg, 4, "%s: "
+			"realloc_sampling_buffer failed with rc %i\n",
+			__func__, rc);
 	} else
 		debug_sprintf_event(sfdbg, 4,
-			"alloc_sampling_buffer: tear=%p dear=%p\n",
-			sfb->sdbt, (void *) *sfb->sdbt);
+			"%s: tear %#lx dear %#lx\n", __func__,
+			(unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
 	return rc;
 }
 
@@ -404,8 +419,8 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 		return 0;
 
 	debug_sprintf_event(sfdbg, 3,
-			    "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
-			    " sample_size=%lu cpuhw=%p\n",
+			    "%s: rate %lu f %lu sdb %lu/%lu"
+			    " sample_size %lu cpuhw %p\n", __func__,
 			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
 			    sample_size, cpuhw);
 
@@ -465,8 +480,8 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
 	if (num)
 		sfb_account_allocs(num, hwc);
 
-	debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
-			    " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+	debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
+			    __func__, OVERFLOW_REG(hwc), ratio, num);
 	OVERFLOW_REG(hwc) = 0;
 }
 
@@ -504,13 +519,13 @@ static void extend_sampling_buffer(struct sf_buffer *sfb,
 	 */
 	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
 	if (rc)
-		debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
-				    "failed with rc=%i\n", rc);
+		debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
+				    __func__, rc);
 
 	if (sfb_has_pending_allocs(sfb, hwc))
-		debug_sprintf_event(sfdbg, 5, "sfb: extend: "
-				    "req=%lu alloc=%lu remaining=%lu\n",
-				    num, sfb->num_sdb - num_old,
+		debug_sprintf_event(sfdbg, 5, "%s: "
+				    "req %lu alloc %lu remaining %lu\n",
+				    __func__, num, sfb->num_sdb - num_old,
 				    sfb_pending_allocs(sfb, hwc));
 }
 
@@ -538,20 +553,22 @@ static void setup_pmc_cpu(void *flags)
 		err = sf_disable();
 		if (err)
 			pr_err("Switching off the sampling facility failed "
-			       "with rc=%i\n", err);
+			       "with rc %i\n", err);
 		debug_sprintf_event(sfdbg, 5,
-				    "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+				    "%s: initialized: cpuhw %p\n", __func__,
+				    cpusf);
 		break;
 	case PMC_RELEASE:
 		cpusf->flags &= ~PMU_F_RESERVED;
 		err = sf_disable();
 		if (err) {
 			pr_err("Switching off the sampling facility failed "
-			       "with rc=%i\n", err);
+			       "with rc %i\n", err);
 		} else
 			deallocate_buffers(cpusf);
 		debug_sprintf_event(sfdbg, 5,
-				    "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+				    "%s: released: cpuhw %p\n", __func__,
+				    cpusf);
 		break;
 	}
 	if (err)
@@ -598,13 +615,6 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period)
 	local64_set(&hwc->period_left, hwc->sample_period);
 }
 
-static void hw_reset_registers(struct hw_perf_event *hwc,
-			       unsigned long *sdbt_origin)
-{
-	/* (Re)set to first sample-data-block-table */
-	TEAR_REG(hwc) = (unsigned long) sdbt_origin;
-}
-
 static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
 				   unsigned long rate)
 {
@@ -696,9 +706,9 @@ static unsigned long getrate(bool freq, unsigned long sample,
 		 */
 		if (sample_rate_to_freq(si, rate) >
 		    sysctl_perf_event_sample_rate) {
-			debug_sprintf_event(sfdbg, 1,
+			debug_sprintf_event(sfdbg, 1, "%s: "
 					    "Sampling rate exceeds maximum "
-					    "perf sample rate\n");
+					    "perf sample rate\n", __func__);
 			rate = 0;
 		}
 	}
@@ -743,10 +753,9 @@ static int __hw_perf_event_init_rate(struct perf_event *event,
 	attr->sample_period = rate;
 	SAMPL_RATE(hwc) = rate;
 	hw_init_period(hwc, SAMPL_RATE(hwc));
-	debug_sprintf_event(sfdbg, 4, "__hw_perf_event_init_rate:"
-			    "cpu:%d period:%llx freq:%d,%#lx\n", event->cpu,
-			    event->attr.sample_period, event->attr.freq,
-			    SAMPLE_FREQ_MODE(hwc));
+	debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
+			    __func__, event->cpu, event->attr.sample_period,
+			    event->attr.freq, SAMPLE_FREQ_MODE(hwc));
 	return 0;
 }
 
@@ -949,8 +958,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 			 * buffer extents
 			 */
 			sfb_account_overflows(cpuhw, hwc);
-			if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
-				extend_sampling_buffer(&cpuhw->sfb, hwc);
+			extend_sampling_buffer(&cpuhw->sfb, hwc);
 		}
 		/* Rate may be adjusted with ioctl() */
 		cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
@@ -963,7 +971,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 	err = lsctl(&cpuhw->lsctl);
 	if (err) {
 		cpuhw->flags &= ~PMU_F_ENABLED;
-		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+		pr_err("Loading sampling controls failed: op %i err %i\n",
 			1, err);
 		return;
 	}
@@ -971,12 +979,11 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 	/* Load current program parameter */
 	lpp(&S390_lowcore.lpp);
 
-	debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
-			    "interval:%lx tear=%p dear=%p\n",
+	debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
+			    "interval %#lx tear %#lx dear %#lx\n", __func__,
 			    cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
 			    cpuhw->lsctl.cd, cpuhw->lsctl.interval,
-			    (void *) cpuhw->lsctl.tear,
-			    (void *) cpuhw->lsctl.dear);
+			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
 }
 
 static void cpumsf_pmu_disable(struct pmu *pmu)
@@ -999,13 +1006,14 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 
 	err = lsctl(&inactive);
 	if (err) {
-		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+		pr_err("Loading sampling controls failed: op %i err %i\n",
 			2, err);
 		return;
 	}
 
 	/* Save state of TEAR and DEAR register contents */
-	if (!qsi(&si)) {
+	err = qsi(&si);
+	if (!err) {
 		/* TEAR/DEAR values are valid only if the sampling facility is
 		 * enabled.  Note that cpumsf_pmu_disable() might be called even
 		 * for a disabled sampling facility because cpumsf_pmu_enable()
@@ -1016,8 +1024,8 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 			cpuhw->lsctl.dear = si.dear;
 		}
 	} else
-		debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
-				    "qsi() failed with err=%i\n", err);
+		debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
+				    __func__, err);
 
 	cpuhw->flags &= ~PMU_F_ENABLED;
 }
@@ -1130,15 +1138,6 @@ static void perf_event_count_update(struct perf_event *event, u64 count)
 	local64_add(count, &event->count);
 }
 
-static void debug_sample_entry(struct hws_basic_entry *sample,
-			       struct hws_trailer_entry *te)
-{
-	debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
-			    "sampling data entry: te->f=%i basic.def=%04x "
-			    "(%p)\n",
-			    te->f, sample->def, sample);
-}
-
 /* hw_collect_samples() - Walk through a sample-data-block and collect samples
  * @event:	The perf event
  * @sdbt:	Sample-data-block table
@@ -1192,7 +1191,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
 				/* Count discarded samples */
 				*overflow += 1;
 		} else {
-			debug_sample_entry(sample, te);
+			debug_sprintf_event(sfdbg, 4,
+					    "%s: Found unknown"
+					    " sampling data entry: te->f %i"
+					    " basic.def %#4x (%p)\n", __func__,
+					    te->f, sample->def, sample);
 			/* Sample slot is not yet written or other record.
 			 *
 			 * This condition can occur if the buffer was reused
@@ -1267,9 +1270,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 			sampl_overflow += te->overflow;
 
 		/* Timestamps are valid for full sample-data-blocks only */
-		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
-				    "overflow=%llu timestamp=%#llx\n",
-				    sdbt, te->overflow,
+		debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
+				    "overflow %llu timestamp %#llx\n",
+				    __func__, (unsigned long)sdbt, te->overflow,
 				    (te->f) ? trailer_timestamp(te) : 0ULL);
 
 		/* Collect all samples from a single sample-data-block and
@@ -1300,22 +1303,34 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 		 */
 		if (flush_all && done)
 			break;
-
-		/* If an event overflow happened, discard samples by
-		 * processing any remaining sample-data-blocks.
-		 */
-		if (event_overflow)
-			flush_all = 1;
 	}
 
 	/* Account sample overflows in the event hardware structure */
 	if (sampl_overflow)
 		OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
 						 sampl_overflow, 1 + num_sdb);
+
+	/* Perf_event_overflow() and perf_event_account_interrupt() limit
+	 * the interrupt rate to an upper limit. Roughly 1000 samples per
+	 * task tick.
+	 * Hitting this limit results in a large number
+	 * of throttled REF_REPORT_THROTTLE entries and the samples
+	 * are dropped.
+	 * Slightly increase the interval to avoid hitting this limit.
+	 */
+	if (event_overflow) {
+		SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
+		debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
+				    __func__,
+				    DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
+	}
+
 	if (sampl_overflow || event_overflow)
-		debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
-				    "overflow stats: sample=%llu event=%llu\n",
-				    sampl_overflow, event_overflow);
+		debug_sprintf_event(sfdbg, 4, "%s: "
+				    "overflows: sample %llu event %llu"
+				    " total %llu num_sdb %llu\n",
+				    __func__, sampl_overflow, event_overflow,
+				    OVERFLOW_REG(hwc), num_sdb);
 }
 
 #define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
@@ -1368,7 +1383,8 @@ static void aux_output_end(struct perf_output_handle *handle)
 	te = aux_sdb_trailer(aux, aux->alert_mark);
 	te->flags &= ~SDB_TE_ALERT_REQ_MASK;
 
-	debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i);
+	debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
+			    __func__, i, range_scan, aux->head);
 }
 
 /*
@@ -1401,13 +1417,17 @@ static int aux_output_begin(struct perf_output_handle *handle,
 	 * SDBs between aux->head and aux->empty_mark are already ready
 	 * for new data. range_scan is num of SDBs not within them.
 	 */
+	debug_sprintf_event(sfdbg, 6,
+			    "%s: range %ld head %ld alert %ld empty %ld\n",
+			    __func__, range, aux->head, aux->alert_mark,
+			    aux->empty_mark);
 	if (range > AUX_SDB_NUM_EMPTY(aux)) {
 		range_scan = range - AUX_SDB_NUM_EMPTY(aux);
 		idx = aux->empty_mark + 1;
 		for (i = 0; i < range_scan; i++, idx++) {
 			te = aux_sdb_trailer(aux, idx);
-			te->flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
-			te->flags = te->flags & ~SDB_TE_ALERT_REQ_MASK;
+			te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
+				       SDB_TE_ALERT_REQ_MASK);
 			te->overflow = 0;
 		}
 		/* Save the position of empty SDBs */
@@ -1426,15 +1446,11 @@ static int aux_output_begin(struct perf_output_handle *handle,
 	cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
 	cpuhw->lsctl.dear = aux->sdb_index[head];
 
-	debug_sprintf_event(sfdbg, 6, "aux_output_begin: "
-			    "head->alert_mark->empty_mark (num_alert, range)"
-			    "[%lx -> %lx -> %lx] (%lx, %lx) "
-			    "tear index %lx, tear %lx dear %lx\n",
+	debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
+			    "index %ld tear %#lx dear %#lx\n", __func__,
 			    aux->head, aux->alert_mark, aux->empty_mark,
-			    AUX_SDB_NUM_ALERT(aux), range,
 			    head / CPUM_SF_SDB_PER_TABLE,
-			    cpuhw->lsctl.tear,
-			    cpuhw->lsctl.dear);
+			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
 
 	return 0;
 }
@@ -1454,8 +1470,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
 	te = aux_sdb_trailer(aux, alert_index);
 	do {
 		orig_flags = te->flags;
-		orig_overflow = te->overflow;
-		*overflow = orig_overflow;
+		*overflow = orig_overflow = te->overflow;
 		if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
 			/*
 			 * SDB is already set by hardware.
@@ -1497,9 +1512,12 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
 			     unsigned long long *overflow)
 {
 	unsigned long long orig_overflow, orig_flags, new_flags;
-	unsigned long i, range_scan, idx;
+	unsigned long i, range_scan, idx, idx_old;
 	struct hws_trailer_entry *te;
 
+	debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
+			    "empty %ld\n", __func__, range, aux->head,
+			    aux->alert_mark, aux->empty_mark);
 	if (range <= AUX_SDB_NUM_EMPTY(aux))
 		/*
 		 * No need to scan. All SDBs in range are marked as empty.
@@ -1522,7 +1540,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
 	 * indicator fall into this range, set it.
 	 */
 	range_scan = range - AUX_SDB_NUM_EMPTY(aux);
-	idx = aux->empty_mark + 1;
+	idx_old = idx = aux->empty_mark + 1;
 	for (i = 0; i < range_scan; i++, idx++) {
 		te = aux_sdb_trailer(aux, idx);
 		do {
@@ -1542,6 +1560,9 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
 	/* Update empty_mark to new position */
 	aux->empty_mark = aux->head + range - 1;
 
+	debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
+			    "empty %ld\n", __func__, range_scan, idx_old,
+			    idx - 1, aux->empty_mark);
 	return true;
 }
 
@@ -1555,7 +1576,6 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 	unsigned long range = 0, size;
 	unsigned long long overflow = 0;
 	struct perf_output_handle *handle = &cpuhw->handle;
-	unsigned long num_sdb;
 
 	aux = perf_get_aux(handle);
 	if (WARN_ON_ONCE(!aux))
@@ -1563,8 +1583,9 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 
 	/* Inform user space new data arrived */
 	size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+	debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+			    size >> PAGE_SHIFT);
 	perf_aux_output_end(handle, size);
-	num_sdb = aux->sfb.num_sdb;
 
 	while (!done) {
 		/* Get an output handle */
@@ -1572,8 +1593,10 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 		if (handle->size == 0) {
 			pr_err("The AUX buffer with %lu pages for the "
 			       "diagnostic-sampling mode is full\n",
-				num_sdb);
-			debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n");
+				aux->sfb.num_sdb);
+			debug_sprintf_event(sfdbg, 1,
+					    "%s: AUX buffer used up\n",
+					    __func__);
 			break;
 		}
 		if (WARN_ON_ONCE(!aux))
@@ -1595,24 +1618,24 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 			size = range << PAGE_SHIFT;
 			perf_aux_output_end(&cpuhw->handle, size);
 			pr_err("Sample data caused the AUX buffer with %lu "
-			       "pages to overflow\n", num_sdb);
-			debug_sprintf_event(sfdbg, 1, "head %lx range %lx "
-					    "overflow %llx\n",
+			       "pages to overflow\n", aux->sfb.num_sdb);
+			debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
+					    "overflow %lld\n", __func__,
 					    aux->head, range, overflow);
 		} else {
 			size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
 			perf_aux_output_end(&cpuhw->handle, size);
-			debug_sprintf_event(sfdbg, 6, "head %lx alert %lx "
+			debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
 					    "already full, try another\n",
+					    __func__,
 					    aux->head, aux->alert_mark);
 		}
 	}
 
 	if (done)
-		debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: "
-				    "[%lx -> %lx -> %lx] (%lx, %lx)\n",
-				    aux->head, aux->alert_mark, aux->empty_mark,
-				    AUX_SDB_NUM_ALERT(aux), range);
+		debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
+				    "empty %ld\n", __func__, aux->head,
+				    aux->alert_mark, aux->empty_mark);
 }
 
 /*
@@ -1635,8 +1658,7 @@ static void aux_buffer_free(void *data)
 	kfree(aux->sdb_index);
 	kfree(aux);
 
-	debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free "
-			    "%lu SDBTs\n", num_sdbt);
+	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
 }
 
 static void aux_sdb_init(unsigned long sdb)
@@ -1688,13 +1710,13 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
 	}
 
 	/* Allocate aux_buffer struct for the event */
-	aux = kmalloc(sizeof(struct aux_buffer), GFP_KERNEL);
+	aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL);
 	if (!aux)
 		goto no_aux;
 	sfb = &aux->sfb;
 
 	/* Allocate sdbt_index for fast reference */
-	n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE;
+	n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE);
 	aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
 	if (!aux->sdbt_index)
 		goto no_sdbt_index;
@@ -1744,8 +1766,7 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
 	 */
 	aux->empty_mark = sfb->num_sdb - 1;
 
-	debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs"
-			    " and %lu SDBs\n",
+	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
 			    sfb->num_sdbt, sfb->num_sdb);
 
 	return aux;
@@ -1799,9 +1820,9 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
 	event->attr.sample_period = rate;
 	SAMPL_RATE(&event->hw) = rate;
 	hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
-	debug_sprintf_event(sfdbg, 4, "cpumsf_pmu_check_period:"
-			    "cpu:%d value:%llx period:%llx freq:%d\n",
-			    event->cpu, value,
+	debug_sprintf_event(sfdbg, 4, "%s:"
+			    " cpu %d value %#llx period %#llx freq %d\n",
+			    __func__, event->cpu, value,
 			    event->attr.sample_period, do_freq);
 	return 0;
 }
@@ -1877,7 +1898,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
 	if (!SAMPL_DIAG_MODE(&event->hw)) {
 		cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
 		cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
-		hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+		TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
 	}
 
 	/* Ensure sampling functions are in the disabled state.  If disabled,
@@ -2032,7 +2053,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
 
 	/* Report measurement alerts only for non-PRA codes */
 	if (alert != CPU_MF_INT_SF_PRA)
-		debug_sprintf_event(sfdbg, 6, "measurement alert: %#x\n",
+		debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
 				    alert);
 
 	/* Sampling authorization change request */
@@ -2111,7 +2132,7 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
 
 	sfb_set_limits(min, max);
 	pr_info("The sampling buffer limits have changed to: "
-		"min=%lu max=%lu (diag=x%lu)\n",
+		"min %lu max %lu (diag %lu)\n",
 		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
 	return 0;
 }
@@ -2129,7 +2150,7 @@ static const struct kernel_param_ops param_ops_sfb_size = {
 static void __init pr_cpumsf_err(unsigned int reason)
 {
 	pr_err("Sampling facility support for perf is not available: "
-	       "reason=%04x\n", reason);
+	       "reason %#x\n", reason);
 }
 
 static int __init init_cpum_sampling_pmu(void)
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index fcb6c2e92b07..1e75cc983546 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -224,9 +224,13 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 			   struct pt_regs *regs)
 {
 	struct unwind_state state;
+	unsigned long addr;
 
-	unwind_for_each_frame(&state, current, regs, 0)
-		perf_callchain_store(entry, state.ip);
+	unwind_for_each_frame(&state, current, regs, 0) {
+		addr = unwind_get_return_address(&state);
+		if (!addr || perf_callchain_store(entry, addr))
+			return;
+	}
 }
 
 /* Perf definitions for PMU event attributes in sysfs */
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index 59dee9d3bebf..eee3a482195a 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -81,7 +81,7 @@ PGM_CHECK_DEFAULT			/* 3c */
 PGM_CHECK_DEFAULT			/* 3d */
 PGM_CHECK_DEFAULT			/* 3e */
 PGM_CHECK_DEFAULT			/* 3f */
-PGM_CHECK_DEFAULT			/* 40 */
+PGM_CHECK(monitor_event_exception)	/* 40 */
 PGM_CHECK_DEFAULT			/* 41 */
 PGM_CHECK_DEFAULT			/* 42 */
 PGM_CHECK_DEFAULT			/* 43 */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index b0afec673f77..6ccef5f29761 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -40,6 +40,7 @@
 #include <asm/stacktrace.h>
 #include <asm/switch_to.h>
 #include <asm/runtime_instr.h>
+#include <asm/unwind.h>
 #include "entry.h"
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -178,9 +179,8 @@ EXPORT_SYMBOL(dump_fpu);
 
 unsigned long get_wchan(struct task_struct *p)
 {
-	struct stack_frame *sf, *low, *high;
-	unsigned long return_address;
-	int count;
+	struct unwind_state state;
+	unsigned long ip = 0;
 
 	if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
 		return 0;
@@ -188,26 +188,22 @@ unsigned long get_wchan(struct task_struct *p)
 	if (!try_get_task_stack(p))
 		return 0;
 
-	low = task_stack_page(p);
-	high = (struct stack_frame *) task_pt_regs(p);
-	sf = (struct stack_frame *) p->thread.ksp;
-	if (sf <= low || sf > high) {
-		return_address = 0;
-		goto out;
-	}
-	for (count = 0; count < 16; count++) {
-		sf = (struct stack_frame *)READ_ONCE_NOCHECK(sf->back_chain);
-		if (sf <= low || sf > high) {
-			return_address = 0;
-			goto out;
+	unwind_for_each_frame(&state, p, NULL, 0) {
+		if (state.stack_info.type != STACK_TYPE_TASK) {
+			ip = 0;
+			break;
 		}
-		return_address = READ_ONCE_NOCHECK(sf->gprs[8]);
-		if (!in_sched_functions(return_address))
-			goto out;
+
+		ip = unwind_get_return_address(&state);
+		if (!ip)
+			break;
+
+		if (!in_sched_functions(ip))
+			break;
 	}
-out:
+
 	put_task_stack(p);
-	return return_address;
+	return ip;
 }
 
 unsigned long arch_align_stack(unsigned long sp)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index ad71132374f0..58faa12542a1 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -856,7 +856,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	}
 
 	/* Do the secure computing check after ptrace. */
-	if (secure_computing(NULL)) {
+	if (secure_computing()) {
 		/* seccomp failures shouldn't expose any additional code. */
 		return -1;
 	}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3ff291bc63b7..87a467dff5eb 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -241,8 +241,6 @@ static void __init conmode_default(void)
 		SET_CONSOLE_SCLP;
 #endif
 	}
-	if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE))
-		conswitchp = &dummy_con;
 }
 
 #ifdef CONFIG_CRASH_DUMP
@@ -355,7 +353,6 @@ early_initcall(async_stack_realloc);
 
 void __init arch_call_rest_init(void)
 {
-	struct stack_frame *frame;
 	unsigned long stack;
 
 	stack = stack_alloc();
@@ -368,13 +365,7 @@ void __init arch_call_rest_init(void)
 	set_task_stack_end_magic(current);
 	stack += STACK_INIT_OFFSET;
 	S390_lowcore.kernel_stack = stack;
-	frame = (struct stack_frame *) stack;
-	memset(frame, 0, sizeof(*frame));
-	/* Branch to rest_init on the new stack, never returns */
-	asm volatile(
-		"	la	15,0(%[_frame])\n"
-		"	jg	rest_init\n"
-		: : [_frame] "a" (frame));
+	CALL_ON_STACK_NORETURN(rest_init, stack);
 }
 
 static void __init setup_lowcore_dat_off(void)
@@ -1059,7 +1050,7 @@ static void __init log_component_list(void)
 
 	if (!early_ipl_comp_list_addr)
 		return;
-	if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)
+	if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
 		pr_info("Linux is running with Secure-IPL enabled\n");
 	else
 		pr_info("Linux is running with Secure-IPL disabled\n");
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 44974654cbd0..a08bd2522dd9 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -262,10 +262,13 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 	lc->spinlock_index = 0;
 	lc->percpu_offset = __per_cpu_offset[cpu];
 	lc->kernel_asce = S390_lowcore.kernel_asce;
+	lc->user_asce = S390_lowcore.kernel_asce;
 	lc->machine_flags = S390_lowcore.machine_flags;
 	lc->user_timer = lc->system_timer =
 		lc->steal_timer = lc->avg_steal_timer = 0;
 	__ctl_store(lc->cregs_save_area, 0, 15);
+	lc->cregs_save_area[1] = lc->kernel_asce;
+	lc->cregs_save_area[7] = lc->vdso_asce;
 	save_access_regs((unsigned int *) lc->access_regs_save_area);
 	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
 	       sizeof(lc->stfle_fac_list));
@@ -410,14 +413,11 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted);
 
 void smp_yield_cpu(int cpu)
 {
-	if (MACHINE_HAS_DIAG9C) {
-		diag_stat_inc_norecursion(DIAG_STAT_X09C);
-		asm volatile("diag %0,0,0x9c"
-			     : : "d" (pcpu_devices[cpu].address));
-	} else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
-		diag_stat_inc_norecursion(DIAG_STAT_X044);
-		asm volatile("diag 0,0,0x44");
-	}
+	if (!MACHINE_HAS_DIAG9C)
+		return;
+	diag_stat_inc_norecursion(DIAG_STAT_X09C);
+	asm volatile("diag %0,0,0x9c"
+		     : : "d" (pcpu_devices[cpu].address));
 }
 
 /*
@@ -724,39 +724,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
 
 static int smp_add_present_cpu(int cpu);
 
-static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
+static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
+			bool configured, bool early)
 {
 	struct pcpu *pcpu;
-	cpumask_t avail;
-	int cpu, nr, i, j;
+	int cpu, nr, i;
 	u16 address;
 
 	nr = 0;
-	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
-	cpu = cpumask_first(&avail);
-	for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
-		if (sclp.has_core_type && info->core[i].type != boot_core_type)
+	if (sclp.has_core_type && core->type != boot_core_type)
+		return nr;
+	cpu = cpumask_first(avail);
+	address = core->core_id << smp_cpu_mt_shift;
+	for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
+		if (pcpu_find_address(cpu_present_mask, address + i))
 			continue;
-		address = info->core[i].core_id << smp_cpu_mt_shift;
-		for (j = 0; j <= smp_cpu_mtid; j++) {
-			if (pcpu_find_address(cpu_present_mask, address + j))
-				continue;
-			pcpu = pcpu_devices + cpu;
-			pcpu->address = address + j;
-			pcpu->state =
-				(cpu >= info->configured*(smp_cpu_mtid + 1)) ?
-				CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
-			smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
-			set_cpu_present(cpu, true);
-			if (sysfs_add && smp_add_present_cpu(cpu) != 0)
-				set_cpu_present(cpu, false);
-			else
-				nr++;
-			cpu = cpumask_next(cpu, &avail);
-			if (cpu >= nr_cpu_ids)
+		pcpu = pcpu_devices + cpu;
+		pcpu->address = address + i;
+		if (configured)
+			pcpu->state = CPU_STATE_CONFIGURED;
+		else
+			pcpu->state = CPU_STATE_STANDBY;
+		smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+		set_cpu_present(cpu, true);
+		if (!early && smp_add_present_cpu(cpu) != 0)
+			set_cpu_present(cpu, false);
+		else
+			nr++;
+		cpumask_clear_cpu(cpu, avail);
+		cpu = cpumask_next(cpu, avail);
+	}
+	return nr;
+}
+
+static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
+{
+	struct sclp_core_entry *core;
+	cpumask_t avail;
+	bool configured;
+	u16 core_id;
+	int nr, i;
+
+	nr = 0;
+	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+	/*
+	 * Add IPL core first (which got logical CPU number 0) to make sure
+	 * that all SMT threads get subsequent logical CPU numbers.
+	 */
+	if (early) {
+		core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+		for (i = 0; i < info->configured; i++) {
+			core = &info->core[i];
+			if (core->core_id == core_id) {
+				nr += smp_add_core(core, &avail, true, early);
 				break;
+			}
 		}
 	}
+	for (i = 0; i < info->combined; i++) {
+		configured = i < info->configured;
+		nr += smp_add_core(&info->core[i], &avail, configured, early);
+	}
 	return nr;
 }
 
@@ -805,7 +833,7 @@ void __init smp_detect_cpus(void)
 
 	/* Add CPUs present at boot */
 	get_online_cpus();
-	__smp_rescan_cpus(info, 0);
+	__smp_rescan_cpus(info, true);
 	put_online_cpus();
 	memblock_free_early((unsigned long)info, sizeof(*info));
 }
@@ -816,6 +844,8 @@ static void smp_init_secondary(void)
 
 	S390_lowcore.last_update_clock = get_tod_clock();
 	restore_access_regs(S390_lowcore.access_regs_save_area);
+	set_cpu_flag(CIF_ASCE_PRIMARY);
+	set_cpu_flag(CIF_ASCE_SECONDARY);
 	cpu_init();
 	preempt_disable();
 	init_cpu_timer();
@@ -843,7 +873,7 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid)
 	S390_lowcore.restart_source = -1UL;
 	__ctl_load(S390_lowcore.cregs_save_area, 0, 15);
 	__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
-	CALL_ON_STACK(smp_init_secondary, S390_lowcore.kernel_stack, 0);
+	CALL_ON_STACK_NORETURN(smp_init_secondary, S390_lowcore.kernel_stack);
 }
 
 /* Upping and downing of CPUs */
@@ -1148,7 +1178,7 @@ int __ref smp_rescan_cpus(void)
 	smp_get_core_info(info, 0);
 	get_online_cpus();
 	mutex_lock(&smp_cpu_state_mutex);
-	nr = __smp_rescan_cpus(info, 1);
+	nr = __smp_rescan_cpus(info, false);
 	mutex_unlock(&smp_cpu_state_mutex);
 	put_online_cpus();
 	kfree(info);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index f8fc4f8aef9b..fc5419ac64c8 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -9,6 +9,7 @@
 #include <linux/stacktrace.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
+#include <asm/kprobes.h>
 
 void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 		     struct task_struct *task, struct pt_regs *regs)
@@ -22,3 +23,45 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 			break;
 	}
 }
+
+/*
+ * This function returns an error if it detects any unreliable features of the
+ * stack.  Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+			     void *cookie, struct task_struct *task)
+{
+	struct unwind_state state;
+	unsigned long addr;
+
+	unwind_for_each_frame(&state, task, NULL, 0) {
+		if (state.stack_info.type != STACK_TYPE_TASK)
+			return -EINVAL;
+
+		if (state.regs)
+			return -EINVAL;
+
+		addr = unwind_get_return_address(&state);
+		if (!addr)
+			return -EINVAL;
+
+#ifdef CONFIG_KPROBES
+		/*
+		 * Mark stacktraces with kretprobed functions on them
+		 * as unreliable.
+		 */
+		if (state.ip == (unsigned long)kretprobe_trampoline)
+			return -EINVAL;
+#endif
+
+		if (!consume_entry(cookie, addr, false))
+			return -EINVAL;
+	}
+
+	/* Check for stack corruption */
+	if (unwind_error(&state))
+		return -EINVAL;
+	return 0;
+}
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 3054e9c035a3..bd7bd3581a0f 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -438,3 +438,5 @@
 433  common	fspick			sys_fspick			sys_fspick
 434  common	pidfd_open		sys_pidfd_open			sys_pidfd_open
 435  common	clone3			sys_clone3			sys_clone3
+437  common	openat2			sys_openat2			sys_openat2
+438  common	pidfd_getfd		sys_pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index e8766beee5ad..f9d070d016e3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -110,15 +110,6 @@ unsigned long long notrace sched_clock(void)
 }
 NOKPROBE_SYMBOL(sched_clock);
 
-/*
- * Monotonic_clock - returns # of nanoseconds passed since time_init()
- */
-unsigned long long monotonic_clock(void)
-{
-	return sched_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
 static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
 {
 	unsigned long long high, low, rem, sec, nsec;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 164c0282b41a..dc75588d7894 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -53,11 +53,6 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
                 if (fixup)
 			regs->psw.addr = extable_fixup(fixup);
 		else {
-			enum bug_trap_type btt;
-
-			btt = report_bug(regs->psw.addr, regs);
-			if (btt == BUG_TRAP_TYPE_WARN)
-				return;
 			die(regs, str);
 		}
         }
@@ -245,6 +240,27 @@ void space_switch_exception(struct pt_regs *regs)
 	do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
 }
 
+void monitor_event_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	if (user_mode(regs))
+		return;
+
+	switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
+	case BUG_TRAP_TYPE_NONE:
+		fixup = s390_search_extables(regs->psw.addr);
+		if (fixup)
+			regs->psw.addr = extable_fixup(fixup);
+		break;
+	case BUG_TRAP_TYPE_WARN:
+		break;
+	case BUG_TRAP_TYPE_BUG:
+		die(regs, "monitor event");
+		break;
+	}
+}
+
 void kernel_stack_overflow(struct pt_regs *regs)
 {
 	bust_spinlocks(1);
@@ -255,8 +271,23 @@ void kernel_stack_overflow(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(kernel_stack_overflow);
 
+static void test_monitor_call(void)
+{
+	int val = 1;
+
+	asm volatile(
+		"	mc	0,0\n"
+		"0:	xgr	%0,%0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (val));
+	if (!val)
+		panic("Monitor call doesn't work!\n");
+}
+
 void __init trap_init(void)
 {
 	sort_extable(__start_dma_ex_table, __stop_dma_ex_table);
 	local_mcck_enable();
+	test_monitor_call();
 }
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index a8204f952315..707fd99f6734 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -36,6 +36,19 @@ static bool update_stack_info(struct unwind_state *state, unsigned long sp)
 	return true;
 }
 
+static inline bool is_final_pt_regs(struct unwind_state *state,
+				    struct pt_regs *regs)
+{
+	/* user mode or kernel thread pt_regs at the bottom of task stack */
+	if (task_pt_regs(state->task) == regs)
+		return true;
+
+	/* user mode pt_regs at the bottom of irq stack */
+	return state->stack_info.type == STACK_TYPE_IRQ &&
+	       state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs &&
+	       READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
+}
+
 bool unwind_next_frame(struct unwind_state *state)
 {
 	struct stack_info *info = &state->stack_info;
@@ -46,20 +59,16 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	regs = state->regs;
 	if (unlikely(regs)) {
-		if (state->reuse_sp) {
-			sp = state->sp;
-			state->reuse_sp = false;
-		} else {
-			sp = READ_ONCE_NOCHECK(regs->gprs[15]);
-			if (unlikely(outside_of_stack(state, sp))) {
-				if (!update_stack_info(state, sp))
-					goto out_err;
-			}
-		}
+		sp = state->sp;
 		sf = (struct stack_frame *) sp;
 		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
 		reliable = false;
 		regs = NULL;
+		if (!__kernel_text_address(ip)) {
+			/* skip bogus %r14 */
+			state->regs = NULL;
+			return unwind_next_frame(state);
+		}
 	} else {
 		sf = (struct stack_frame *) state->sp;
 		sp = READ_ONCE_NOCHECK(sf->back_chain);
@@ -76,21 +85,25 @@ bool unwind_next_frame(struct unwind_state *state)
 			/* No back-chain, look for a pt_regs structure */
 			sp = state->sp + STACK_FRAME_OVERHEAD;
 			if (!on_stack(info, sp, sizeof(struct pt_regs)))
-				goto out_stop;
+				goto out_err;
 			regs = (struct pt_regs *) sp;
-			if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
+			if (is_final_pt_regs(state, regs))
 				goto out_stop;
 			ip = READ_ONCE_NOCHECK(regs->psw.addr);
+			sp = READ_ONCE_NOCHECK(regs->gprs[15]);
+			if (unlikely(outside_of_stack(state, sp))) {
+				if (!update_stack_info(state, sp))
+					goto out_err;
+			}
 			reliable = true;
 		}
 	}
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* Decode any ftrace redirection */
-	if (ip == (unsigned long) return_to_handler)
-		ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
-					   ip, (void *) sp);
-#endif
+	/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+	if (sp & 0x7)
+		goto out_err;
+
+	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp);
 
 	/* Update unwind state */
 	state->sp = sp;
@@ -108,13 +121,11 @@ out_stop:
 EXPORT_SYMBOL_GPL(unwind_next_frame);
 
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
-		    struct pt_regs *regs, unsigned long sp)
+		    struct pt_regs *regs, unsigned long first_frame)
 {
 	struct stack_info *info = &state->stack_info;
-	unsigned long *mask = &state->stack_mask;
-	bool reliable, reuse_sp;
 	struct stack_frame *sf;
-	unsigned long ip;
+	unsigned long ip, sp;
 
 	memset(state, 0, sizeof(*state));
 	state->task = task;
@@ -126,38 +137,46 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		return;
 	}
 
+	/* Get the instruction pointer from pt_regs or the stack frame */
+	if (regs) {
+		ip = regs->psw.addr;
+		sp = regs->gprs[15];
+	} else if (task == current) {
+		sp = current_frame_address();
+	} else {
+		sp = task->thread.ksp;
+	}
+
 	/* Get current stack pointer and initialize stack info */
-	if (get_stack_info(sp, task, info, mask) != 0 ||
-	    !on_stack(info, sp, sizeof(struct stack_frame))) {
+	if (!update_stack_info(state, sp)) {
 		/* Something is wrong with the stack pointer */
 		info->type = STACK_TYPE_UNKNOWN;
 		state->error = true;
 		return;
 	}
 
-	/* Get the instruction pointer from pt_regs or the stack frame */
-	if (regs) {
-		ip = READ_ONCE_NOCHECK(regs->psw.addr);
-		reliable = true;
-		reuse_sp = true;
-	} else {
-		sf = (struct stack_frame *) sp;
+	if (!regs) {
+		/* Stack frame is within valid stack */
+		sf = (struct stack_frame *)sp;
 		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
-		reliable = false;
-		reuse_sp = false;
 	}
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* Decode any ftrace redirection */
-	if (ip == (unsigned long) return_to_handler)
-		ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
-					   ip, NULL);
-#endif
+	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
 
 	/* Update unwind state */
 	state->sp = sp;
 	state->ip = ip;
-	state->reliable = reliable;
-	state->reuse_sp = reuse_sp;
+	state->reliable = true;
+
+	if (!first_frame)
+		return;
+	/* Skip through the call chain to the specified starting frame */
+	while (!unwind_done(state)) {
+		if (on_stack(&state->stack_info, first_frame, sizeof(struct stack_frame))) {
+			if (state->sp >= first_frame)
+				break;
+		}
+		unwind_next_frame(state);
+	}
 }
 EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index ed1fc08ccea2..bcc9bdb39ba2 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -29,13 +29,6 @@
 #include <asm/vdso.h>
 #include <asm/facility.h>
 
-#ifdef CONFIG_COMPAT_VDSO
-extern char vdso32_start, vdso32_end;
-static void *vdso32_kbase = &vdso32_start;
-static unsigned int vdso32_pages;
-static struct page **vdso32_pagelist;
-#endif
-
 extern char vdso64_start, vdso64_end;
 static void *vdso64_kbase = &vdso64_start;
 static unsigned int vdso64_pages;
@@ -55,12 +48,6 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
 
 	vdso_pagelist = vdso64_pagelist;
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
-	if (vma->vm_mm->context.compat_mm) {
-		vdso_pagelist = vdso32_pagelist;
-		vdso_pages = vdso32_pages;
-	}
-#endif
 
 	if (vmf->pgoff >= vdso_pages)
 		return VM_FAULT_SIGBUS;
@@ -76,10 +63,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
 	unsigned long vdso_pages;
 
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
-	if (vma->vm_mm->context.compat_mm)
-		vdso_pages = vdso32_pages;
-#endif
 
 	if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start)
 		return -EINVAL;
@@ -209,12 +192,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (!vdso_enabled)
 		return 0;
 
+	if (is_compat_task())
+		return 0;
+
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
-	mm->context.compat_mm = is_compat_task();
-	if (mm->context.compat_mm)
-		vdso_pages = vdso32_pages;
-#endif
 	/*
 	 * vDSO has a problem and was disabled, just don't "enable" it for
 	 * the process
@@ -267,23 +248,6 @@ static int __init vdso_init(void)
 	int i;
 
 	vdso_init_data(vdso_data);
-#ifdef CONFIG_COMPAT_VDSO
-	/* Calculate the size of the 32 bit vDSO */
-	vdso32_pages = ((&vdso32_end - &vdso32_start
-			 + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
-
-	/* Make sure pages are in the correct state */
-	vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *),
-				  GFP_KERNEL);
-	BUG_ON(vdso32_pagelist == NULL);
-	for (i = 0; i < vdso32_pages - 1; i++) {
-		struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
-		get_page(pg);
-		vdso32_pagelist[i] = pg;
-	}
-	vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data);
-	vdso32_pagelist[vdso32_pages] = NULL;
-#endif
 
 	/* Calculate the size of the 64 bit vDSO */
 	vdso64_pages = ((&vdso64_end - &vdso64_start
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
deleted file mode 100644
index e45fba9d0ced..000000000000
--- a/arch/s390/kernel/vdso32/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
deleted file mode 100644
index aee9ffbccb54..000000000000
--- a/arch/s390/kernel/vdso32/Makefile
+++ /dev/null
@@ -1,66 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# List of files in the vdso, has to be asm only for now
-
-KCOV_INSTRUMENT := n
-
-obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
-
-# Build rules
-
-targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
-obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-
-KBUILD_AFLAGS += -DBUILD_VDSO
-KBUILD_CFLAGS += -DBUILD_VDSO
-
-KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS))
-KBUILD_AFLAGS_31 += -m31 -s
-
-KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
-KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
-		    -Wl,--hash-style=both
-
-$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
-$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
-
-obj-y += vdso32_wrapper.o
-extra-y += vdso32.lds
-CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
-
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-
-# Force dependency (incbin is bad)
-$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
-	$(call if_changed,vdso32ld)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
-	$(call if_changed,objcopy)
-
-# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S FORCE
-	$(call if_changed_dep,vdso32as)
-
-# actual build commands
-quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
-quiet_cmd_vdso32as = VDSO32A $@
-      cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso32.so: $(obj)/vdso32.so.dbg
-	@mkdir -p $(MODLIB)/vdso
-	$(call cmd,vdso_install)
-
-vdso_install: vdso32.so
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
deleted file mode 100644
index eaf9cf1417f6..000000000000
--- a/arch/s390/kernel/vdso32/clock_getres.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_getres() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_getres
-	.type  __kernel_clock_getres,@function
-__kernel_clock_getres:
-	CFI_STARTPROC
-	basr	%r1,0
-	la	%r1,4f-.(%r1)
-	chi	%r2,__CLOCK_REALTIME
-	je	0f
-	chi	%r2,__CLOCK_MONOTONIC
-	je	0f
-	la	%r1,5f-4f(%r1)
-	chi	%r2,__CLOCK_REALTIME_COARSE
-	je	0f
-	chi	%r2,__CLOCK_MONOTONIC_COARSE
-	jne	3f
-0:	ltr	%r3,%r3
-	jz	2f				/* res == NULL */
-1:	l	%r0,0(%r1)
-	xc	0(4,%r3),0(%r3)			/* set tp->tv_sec to zero */
-	st	%r0,4(%r3)			/* store tp->tv_usec */
-2:	lhi	%r2,0
-	br	%r14
-3:	lhi	%r1,__NR_clock_getres		/* fallback to svc */
-	svc	0
-	br	%r14
-	CFI_ENDPROC
-4:	.long	__CLOCK_REALTIME_RES
-5:	.long	__CLOCK_COARSE_RES
-	.size	__kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
deleted file mode 100644
index ada5c11a16e5..000000000000
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ /dev/null
@@ -1,179 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_gettime() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_gettime
-	.type  __kernel_clock_gettime,@function
-__kernel_clock_gettime:
-	CFI_STARTPROC
-	ahi	%r15,-16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	basr	%r5,0
-0:	al	%r5,21f-0b(%r5)			/* get &_vdso_data */
-	chi	%r2,__CLOCK_REALTIME_COARSE
-	je	10f
-	chi	%r2,__CLOCK_REALTIME
-	je	11f
-	chi	%r2,__CLOCK_MONOTONIC_COARSE
-	je	9f
-	chi	%r2,__CLOCK_MONOTONIC
-	jne	19f
-
-	/* CLOCK_MONOTONIC */
-1:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	1b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,1(%r15)
-	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,2f
-	ahi	%r0,-1
-2:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	lr	%r2,%r0
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	3f
-	a	%r0,__VDSO_TK_MULT(%r5)
-3:	alr	%r0,%r2
-	al	%r0,__VDSO_WTOM_NSEC(%r5)
-	al	%r1,__VDSO_WTOM_NSEC+4(%r5)
-	brc	12,5f
-	ahi	%r0,1
-5:	l	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r2)			/*  >> tk->shift */
-	l	%r2,__VDSO_WTOM_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	1b
-	basr	%r5,0
-6:	ltr	%r0,%r0
-	jnz	7f
-	cl	%r1,20f-6b(%r5)
-	jl	8f
-7:	ahi	%r2,1
-	sl	%r1,20f-6b(%r5)
-	brc	3,6b
-	ahi	%r0,-1
-	j	6b
-8:	st	%r2,0(%r3)			/* store tp->tv_sec */
-	st	%r1,4(%r3)			/* store tp->tv_nsec */
-	lhi	%r2,0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* CLOCK_MONOTONIC_COARSE */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-9:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	9b
-	l	%r2,__VDSO_WTOM_CRS_SEC+4(%r5)
-	l	%r1,__VDSO_WTOM_CRS_NSEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	9b
-	j	8b
-
-	/* CLOCK_REALTIME_COARSE */
-10:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	10b
-	l	%r2,__VDSO_XTIME_CRS_SEC+4(%r5)
-	l	%r1,__VDSO_XTIME_CRS_NSEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	10b
-	j	17f
-
-	/* CLOCK_REALTIME */
-11:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	11b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,__VDSO_TS_END(%r5)	/* TOD steering end time */
-	s	%r0,1(%r15)			/* no - ts_steering_end */
-	sl	%r1,5(%r15)
-	brc	3,22f
-	ahi	%r0,-1
-22:	ltr	%r0,%r0				/* past end of steering? */
-	jm	24f
-	srdl	%r0,15				/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	23f
-	lcr	%r0,%r0				/* negative TOD offset */
-	lcr	%r1,%r1
-	je	23f
-	ahi	%r0,-1
-23:	a	%r0,1(%r15)			/* add TOD timestamp */
-	al	%r1,5(%r15)
-	brc	12,25f
-	ahi	%r0,1
-	j	25f
-24:	lm	%r0,%r1,1(%r15)			/* load TOD timestamp */
-25:	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,12f
-	ahi	%r0,-1
-12:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	lr	%r2,%r0
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	13f
-	a	%r0,__VDSO_TK_MULT(%r5)
-13:	alr	%r0,%r2
-	al	%r0,__VDSO_XTIME_NSEC(%r5)	/*  + tk->xtime_nsec */
-	al	%r1,__VDSO_XTIME_NSEC+4(%r5)
-	brc	12,14f
-	ahi	%r0,1
-14:	l	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r2)			/*  >> tk->shift */
-	l	%r2,__VDSO_XTIME_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	11b
-	basr	%r5,0
-15:	ltr	%r0,%r0
-	jnz	16f
-	cl	%r1,20f-15b(%r5)
-	jl	17f
-16:	ahi	%r2,1
-	sl	%r1,20f-15b(%r5)
-	brc	3,15b
-	ahi	%r0,-1
-	j	15b
-17:	st	%r2,0(%r3)			/* store tp->tv_sec */
-	st	%r1,4(%r3)			/* store tp->tv_nsec */
-	lhi	%r2,0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* Fallback to system call */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-19:	lhi	%r1,__NR_clock_gettime
-	svc	0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-
-20:	.long	1000000000
-21:	.long	_vdso_data - 0b
-	.size	__kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S
deleted file mode 100644
index 25515f3fbcea..000000000000
--- a/arch/s390/kernel/vdso32/getcpu.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of getcpu() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2016
- *  Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/dwarf.h>
-
-	.text
-	.align 4
-	.globl __kernel_getcpu
-	.type  __kernel_getcpu,@function
-__kernel_getcpu:
-	CFI_STARTPROC
-	la	%r4,0
-	sacf	256
-	l	%r5,__VDSO_CPU_NR(%r4)
-	l	%r4,__VDSO_NODE_ID(%r4)
-	sacf	0
-	ltr	%r2,%r2
-	jz	2f
-	st	%r5,0(%r2)
-2:	ltr	%r3,%r3
-	jz	3f
-	st	%r4,0(%r3)
-3:	lhi	%r2,0
-	br	%r14
-	CFI_ENDPROC
-	.size	__kernel_getcpu,.-__kernel_getcpu
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
deleted file mode 100644
index b23063fbc892..000000000000
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of gettimeofday() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_gettimeofday
-	.type  __kernel_gettimeofday,@function
-__kernel_gettimeofday:
-	CFI_STARTPROC
-	ahi	%r15,-16
-	CFI_ADJUST_CFA_OFFSET 16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	basr	%r5,0
-0:	al	%r5,13f-0b(%r5)			/* get &_vdso_data */
-1:	ltr	%r3,%r3				/* check if tz is NULL */
-	je	2f
-	mvc	0(8,%r3),__VDSO_TIMEZONE(%r5)
-2:	ltr	%r2,%r2				/* check if tv is NULL */
-	je	10f
-	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	1b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,__VDSO_TS_END(%r5)	/* TOD steering end time */
-	s	%r0,1(%r15)
-	sl	%r1,5(%r15)
-	brc	3,14f
-	ahi	%r0,-1
-14:	ltr	%r0,%r0				/* past end of steering? */
-	jm	16f
-	srdl	%r0,15				/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	15f
-	lcr	%r0,%r0				/* negative TOD offset */
-	lcr	%r1,%r1
-	je	15f
-	ahi	%r0,-1
-15:	a	%r0,1(%r15)			/* add TOD timestamp */
-	al	%r1,5(%r15)
-	brc	12,17f
-	ahi	%r0,1
-	j	17f
-16:	lm	%r0,%r1,1(%r15)			/* load TOD timestamp */
-17:	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,3f
-	ahi	%r0,-1
-3:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	st	%r0,0(%r15)
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	4f
-	a	%r0,__VDSO_TK_MULT(%r5)
-4:	al	%r0,0(%r15)
-	al	%r0,__VDSO_XTIME_NSEC(%r5)	/*  + xtime */
-	al	%r1,__VDSO_XTIME_NSEC+4(%r5)
-	brc	12,5f
-	ahi	%r0,1
-5:	mvc	0(4,%r15),__VDSO_XTIME_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	1b
-	l	%r4,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r4)			/*  >> tk->shift */
-	l	%r4,0(%r15)			/* get tv_sec from stack */
-	basr	%r5,0
-6:	ltr	%r0,%r0
-	jnz	7f
-	cl	%r1,11f-6b(%r5)
-	jl	8f
-7:	ahi	%r4,1
-	sl	%r1,11f-6b(%r5)
-	brc	3,6b
-	ahi	%r0,-1
-	j	6b
-8:	st	%r4,0(%r2)			/* store tv->tv_sec */
-	ltr	%r1,%r1
-	m	%r0,12f-6b(%r5)
-	jnm	9f
-	al	%r0,12f-6b(%r5)
-9:	srl	%r0,6
-	st	%r0,4(%r2)			/* store tv->tv_usec */
-10:	slr	%r2,%r2
-	ahi	%r15,16
-	CFI_ADJUST_CFA_OFFSET -16
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-11:	.long	1000000000
-12:	.long	274877907
-13:	.long	_vdso_data - 0b
-	.size	__kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
deleted file mode 100644
index db19d0680a0a..000000000000
--- a/arch/s390/kernel/vdso32/note.S
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
deleted file mode 100644
index 721c4954cb6e..000000000000
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the infamous ld script for the 32 bits vdso
- * library
- */
-
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390:31-bit)
-ENTRY(_start)
-
-SECTIONS
-{
-	. = VDSO32_LBASE + SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-	. = ALIGN(16);
-	.text		: {
-		*(.text .stub .text.* .gnu.linkonce.t.*)
-	} :text
-	PROVIDE(__etext = .);
-	PROVIDE(_etext = .);
-	PROVIDE(etext = .);
-
-	/*
-	 * Other stuff is appended to the text segment:
-	 */
-	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-	.rodata1	: { *(.rodata1) }
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-	.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
-
-	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
-	.got ALIGN(8)	: { *(.got .toc) }
-
-	_end = .;
-	PROVIDE(end = .);
-
-	/*
-	 * Stabs debugging sections are here too.
-	 */
-	.stab	       0 : { *(.stab) }
-	.stabstr       0 : { *(.stabstr) }
-	.stab.excl     0 : { *(.stab.excl) }
-	.stab.exclstr  0 : { *(.stab.exclstr) }
-	.stab.index    0 : { *(.stab.index) }
-	.stab.indexstr 0 : { *(.stab.indexstr) }
-	.comment       0 : { *(.comment) }
-
-	/*
-	 * DWARF debug sections.
-	 * Symbols in the DWARF debugging sections are relative to the
-	 * beginning of the section so we begin them at 0.
-	 */
-	/* DWARF 1 */
-	.debug		0 : { *(.debug) }
-	.line		0 : { *(.line) }
-	/* GNU DWARF 1 extensions */
-	.debug_srcinfo	0 : { *(.debug_srcinfo) }
-	.debug_sfnames	0 : { *(.debug_sfnames) }
-	/* DWARF 1.1 and DWARF 2 */
-	.debug_aranges	0 : { *(.debug_aranges) }
-	.debug_pubnames 0 : { *(.debug_pubnames) }
-	/* DWARF 2 */
-	.debug_info	0 : { *(.debug_info .gnu.linkonce.wi.*) }
-	.debug_abbrev	0 : { *(.debug_abbrev) }
-	.debug_line	0 : { *(.debug_line) }
-	.debug_frame	0 : { *(.debug_frame) }
-	.debug_str	0 : { *(.debug_str) }
-	.debug_loc	0 : { *(.debug_loc) }
-	.debug_macinfo	0 : { *(.debug_macinfo) }
-	/* SGI/MIPS DWARF 2 extensions */
-	.debug_weaknames 0 : { *(.debug_weaknames) }
-	.debug_funcnames 0 : { *(.debug_funcnames) }
-	.debug_typenames 0 : { *(.debug_typenames) }
-	.debug_varnames  0 : { *(.debug_varnames) }
-	/* DWARF 3 */
-	.debug_pubtypes 0 : { *(.debug_pubtypes) }
-	.debug_ranges	0 : { *(.debug_ranges) }
-	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
-
-	. = ALIGN(PAGE_SIZE);
-	PROVIDE(_vdso_data = .);
-
-	/DISCARD/	: {
-		*(.note.GNU-stack)
-		*(.branch_lt)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME	0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
-	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
-	note		PT_NOTE FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	VDSO_VERSION_STRING {
-	global:
-		/*
-		 * Has to be there for the kernel to find
-		 */
-		__kernel_gettimeofday;
-		__kernel_clock_gettime;
-		__kernel_clock_getres;
-		__kernel_getcpu;
-
-	local: *;
-	};
-}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
deleted file mode 100644
index de2fb930471a..000000000000
--- a/arch/s390/kernel/vdso32/vdso32_wrapper.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-	__PAGE_ALIGNED_DATA
-
-	.globl vdso32_start, vdso32_end
-	.balign PAGE_SIZE
-vdso32_start:
-	.incbin "arch/s390/kernel/vdso32/vdso32.so"
-	.balign PAGE_SIZE
-vdso32_end:
-
-	.previous
diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S
index 2446e9dac8ab..3c04f7328500 100644
--- a/arch/s390/kernel/vdso64/getcpu.S
+++ b/arch/s390/kernel/vdso64/getcpu.S
@@ -16,10 +16,8 @@
 	.type  __kernel_getcpu,@function
 __kernel_getcpu:
 	CFI_STARTPROC
-	la	%r4,0
 	sacf	256
-	l	%r5,__VDSO_CPU_NR(%r4)
-	l	%r4,__VDSO_NODE_ID(%r4)
+	lm	%r4,%r5,__VDSO_GETCPU_VAL(%r0)
 	sacf	0
 	ltgr	%r2,%r2
 	jz	2f
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 7e0eb4020917..37695499717d 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -15,6 +15,8 @@
 /* Handle ro_after_init data on our own. */
 #define RO_AFTER_INIT_DATA
 
+#define EMITS_PT_NOTE
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/vmlinux.lds.h>
 
@@ -50,11 +52,7 @@ SECTIONS
 		_etext = .;		/* End of text section */
 	} :text = 0x0700
 
-	NOTES :text :note
-
-	.dummy : { *(.dummy) } :data
-
-	RO_DATA_SECTION(PAGE_SIZE)
+	RO_DATA(PAGE_SIZE)
 
 	. = ALIGN(PAGE_SIZE);
 	_sdata = .;		/* Start of data section */
@@ -64,12 +62,12 @@ SECTIONS
 	.data..ro_after_init : {
 		 *(.data..ro_after_init)
 		JUMP_TABLE_DATA
-	}
+	} :data
 	EXCEPTION_TABLE(16)
 	. = ALIGN(PAGE_SIZE);
 	__end_ro_after_init = .;
 
-	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
 	BOOT_DATA_PRESERVED
 
 	_edata = .;		/* End of data section */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c475ca49cfc6..8df10d3c8f6c 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -247,9 +247,9 @@ void vtime_account_irq_enter(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 __attribute__((alias("vtime_account_irq_enter")));
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
 
 /*
  * Sorted add to a list. List is linear searched until first bigger
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 45634b3d2e0a..3fb54ec2cf3e 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -158,14 +158,28 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 
 	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	vcpu->stat.diagnose_9c++;
-	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
 
+	/* yield to self */
 	if (tid == vcpu->vcpu_id)
-		return 0;
+		goto no_yield;
 
+	/* yield to invalid */
 	tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
-	if (tcpu)
-		kvm_vcpu_yield_to(tcpu);
+	if (!tcpu)
+		goto no_yield;
+
+	/* target already running */
+	if (READ_ONCE(tcpu->cpu) >= 0)
+		goto no_yield;
+
+	if (kvm_vcpu_yield_to(tcpu) <= 0)
+		goto no_yield;
+
+	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: done", tid);
+	return 0;
+no_yield:
+	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid);
+	vcpu->stat.diagnose_9c_ignored++;
 	return 0;
 }
 
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index d1ccc168c071..165dea4c7f19 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1477,8 +1477,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	return 0;
 }
 
-static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
-				 struct kvm_s390_irq *irq)
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
@@ -2007,7 +2006,7 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 		rc = __inject_sigp_stop(vcpu, irq);
 		break;
 	case KVM_S390_RESTART:
-		rc = __inject_sigp_restart(vcpu, irq);
+		rc = __inject_sigp_restart(vcpu);
 		break;
 	case KVM_S390_INT_CLOCK_COMP:
 		rc = __inject_ckc(vcpu);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d047e846e1b9..d9e6bf3d54f0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -155,6 +155,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
+	{ "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
@@ -453,16 +454,14 @@ static void kvm_s390_cpu_feat_init(void)
 
 int kvm_arch_init(void *opaque)
 {
-	int rc;
+	int rc = -ENOMEM;
 
 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 	if (!kvm_s390_dbf)
 		return -ENOMEM;
 
-	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
-		rc = -ENOMEM;
-		goto out_debug_unreg;
-	}
+	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
+		goto out;
 
 	kvm_s390_cpu_feat_init();
 
@@ -470,19 +469,17 @@ int kvm_arch_init(void *opaque)
 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 	if (rc) {
 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
-		goto out_debug_unreg;
+		goto out;
 	}
 
 	rc = kvm_s390_gib_init(GAL_ISC);
 	if (rc)
-		goto out_gib_destroy;
+		goto out;
 
 	return 0;
 
-out_gib_destroy:
-	kvm_s390_gib_destroy();
-out_debug_unreg:
-	debug_unregister(kvm_s390_dbf);
+out:
+	kvm_arch_exit();
 	return rc;
 }
 
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index d7c218e8b559..28fd66d558ff 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -11,3 +11,6 @@ lib-$(CONFIG_UPROBES) += probes.o
 # Instrumenting memory accesses to __user data (in different address space)
 # produce false positives
 KASAN_SANITIZE_uaccess.o := n
+
+obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o
+CFLAGS_test_unwind.o += -fno-optimize-sibling-calls
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 30a7c8c29964..9b2dab5a69f9 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -74,7 +74,7 @@ static inline int arch_load_niai4(int *lock)
 {
 	int owner;
 
-	asm volatile(
+	asm_inline volatile(
 		ALTERNATIVE("", ".long 0xb2fa0040", 49)	/* NIAI 4 */
 		"	l	%0,%1\n"
 		: "=d" (owner) : "Q" (*lock) : "memory");
@@ -85,7 +85,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
 {
 	int expected = old;
 
-	asm volatile(
+	asm_inline volatile(
 		ALTERNATIVE("", ".long 0xb2fa0080", 49)	/* NIAI 8 */
 		"	cs	%0,%3,%1\n"
 		: "=d" (old), "=Q" (*lock)
@@ -242,7 +242,6 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
 
 void arch_spin_lock_wait(arch_spinlock_t *lp)
 {
-	/* Use classic spinlocks + niai if the steal time is >= 10% */
 	if (test_cpu_flag(CIF_DEDICATED_CPU))
 		arch_spin_lock_queued(lp);
 	else
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
new file mode 100644
index 000000000000..32b7a30b2485
--- /dev/null
+++ b/arch/s390/lib/test_unwind.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test module for unwind_for_each_frame
+ */
+
+#define pr_fmt(fmt) "test_unwind: " fmt
+#include <asm/unwind.h>
+#include <linux/completion.h>
+#include <linux/kallsyms.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kprobes.h>
+#include <linux/wait.h>
+#include <asm/irq.h>
+#include <asm/delay.h>
+
+#define BT_BUF_SIZE (PAGE_SIZE * 4)
+
+/*
+ * To avoid printk line limit split backtrace by lines
+ */
+static void print_backtrace(char *bt)
+{
+	char *p;
+
+	while (true) {
+		p = strsep(&bt, "\n");
+		if (!p)
+			break;
+		pr_err("%s\n", p);
+	}
+}
+
+/*
+ * Calls unwind_for_each_frame(task, regs, sp) and verifies that the result
+ * contains unwindme_func2 followed by unwindme_func1.
+ */
+static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
+				unsigned long sp)
+{
+	int frame_count, prev_is_func2, seen_func2_func1;
+	const int max_frames = 128;
+	struct unwind_state state;
+	size_t bt_pos = 0;
+	int ret = 0;
+	char *bt;
+
+	bt = kmalloc(BT_BUF_SIZE, GFP_ATOMIC);
+	if (!bt) {
+		pr_err("failed to allocate backtrace buffer\n");
+		return -ENOMEM;
+	}
+	/* Unwind. */
+	frame_count = 0;
+	prev_is_func2 = 0;
+	seen_func2_func1 = 0;
+	unwind_for_each_frame(&state, task, regs, sp) {
+		unsigned long addr = unwind_get_return_address(&state);
+		char sym[KSYM_SYMBOL_LEN];
+
+		if (frame_count++ == max_frames)
+			break;
+		if (state.reliable && !addr) {
+			pr_err("unwind state reliable but addr is 0\n");
+			return -EINVAL;
+		}
+		sprint_symbol(sym, addr);
+		if (bt_pos < BT_BUF_SIZE) {
+			bt_pos += snprintf(bt + bt_pos, BT_BUF_SIZE - bt_pos,
+					   state.reliable ? " [%-7s%px] %pSR\n" :
+							    "([%-7s%px] %pSR)\n",
+					   stack_type_name(state.stack_info.type),
+					   (void *)state.sp, (void *)state.ip);
+			if (bt_pos >= BT_BUF_SIZE)
+				pr_err("backtrace buffer is too small\n");
+		}
+		frame_count += 1;
+		if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1"))
+			seen_func2_func1 = 1;
+		prev_is_func2 = str_has_prefix(sym, "unwindme_func2");
+	}
+
+	/* Check the results. */
+	if (unwind_error(&state)) {
+		pr_err("unwind error\n");
+		ret = -EINVAL;
+	}
+	if (!seen_func2_func1) {
+		pr_err("unwindme_func2 and unwindme_func1 not found\n");
+		ret = -EINVAL;
+	}
+	if (frame_count == max_frames) {
+		pr_err("Maximum number of frames exceeded\n");
+		ret = -EINVAL;
+	}
+	if (ret)
+		print_backtrace(bt);
+	kfree(bt);
+	return ret;
+}
+
+/* State of the task being unwound. */
+struct unwindme {
+	int flags;
+	int ret;
+	struct task_struct *task;
+	struct completion task_ready;
+	wait_queue_head_t task_wq;
+	unsigned long sp;
+};
+
+static struct unwindme *unwindme;
+
+/* Values of unwindme.flags. */
+#define UWM_DEFAULT		0x0
+#define UWM_THREAD		0x1	/* Unwind a separate task. */
+#define UWM_REGS		0x2	/* Pass regs to test_unwind(). */
+#define UWM_SP			0x4	/* Pass sp to test_unwind(). */
+#define UWM_CALLER		0x8	/* Unwind starting from caller. */
+#define UWM_SWITCH_STACK	0x10	/* Use CALL_ON_STACK. */
+#define UWM_IRQ			0x20	/* Unwind from irq context. */
+#define UWM_PGM			0x40	/* Unwind from program check handler. */
+
+static __always_inline unsigned long get_psw_addr(void)
+{
+	unsigned long psw_addr;
+
+	asm volatile(
+		"basr	%[psw_addr],0\n"
+		: [psw_addr] "=d" (psw_addr));
+	return psw_addr;
+}
+
+#ifdef CONFIG_KPROBES
+static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct unwindme *u = unwindme;
+
+	u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL,
+			     (u->flags & UWM_SP) ? u->sp : 0);
+	return 0;
+}
+#endif
+
+/* This function may or may not appear in the backtrace. */
+static noinline int unwindme_func4(struct unwindme *u)
+{
+	if (!(u->flags & UWM_CALLER))
+		u->sp = current_frame_address();
+	if (u->flags & UWM_THREAD) {
+		complete(&u->task_ready);
+		wait_event(u->task_wq, kthread_should_park());
+		kthread_parkme();
+		return 0;
+#ifdef CONFIG_KPROBES
+	} else if (u->flags & UWM_PGM) {
+		struct kprobe kp;
+		int ret;
+
+		unwindme = u;
+		memset(&kp, 0, sizeof(kp));
+		kp.symbol_name = "do_report_trap";
+		kp.pre_handler = pgm_pre_handler;
+		ret = register_kprobe(&kp);
+		if (ret < 0) {
+			pr_err("register_kprobe failed %d\n", ret);
+			return -EINVAL;
+		}
+
+		/*
+		 * trigger specification exception
+		 */
+		asm volatile(
+			"	mvcl	%%r1,%%r1\n"
+			"0:	nopr	%%r7\n"
+			EX_TABLE(0b, 0b)
+			:);
+
+		unregister_kprobe(&kp);
+		unwindme = NULL;
+		return u->ret;
+#endif
+	} else {
+		struct pt_regs regs;
+
+		memset(&regs, 0, sizeof(regs));
+		regs.psw.addr = get_psw_addr();
+		regs.gprs[15] = current_stack_pointer();
+		return test_unwind(NULL,
+				   (u->flags & UWM_REGS) ? &regs : NULL,
+				   (u->flags & UWM_SP) ? u->sp : 0);
+	}
+}
+
+/* This function may or may not appear in the backtrace. */
+static noinline int unwindme_func3(struct unwindme *u)
+{
+	u->sp = current_frame_address();
+	return unwindme_func4(u);
+}
+
+/* This function must appear in the backtrace. */
+static noinline int unwindme_func2(struct unwindme *u)
+{
+	int rc;
+
+	if (u->flags & UWM_SWITCH_STACK) {
+		preempt_disable();
+		rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u);
+		preempt_enable();
+		return rc;
+	} else {
+		return unwindme_func3(u);
+	}
+}
+
+/* This function must follow unwindme_func2 in the backtrace. */
+static noinline int unwindme_func1(void *u)
+{
+	return unwindme_func2((struct unwindme *)u);
+}
+
+static void unwindme_irq_handler(struct ext_code ext_code,
+				       unsigned int param32,
+				       unsigned long param64)
+{
+	struct unwindme *u = READ_ONCE(unwindme);
+
+	if (u && u->task == current) {
+		unwindme = NULL;
+		u->task = NULL;
+		u->ret = unwindme_func1(u);
+	}
+}
+
+static int test_unwind_irq(struct unwindme *u)
+{
+	preempt_disable();
+	if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) {
+		pr_info("Couldn't register external interrupt handler");
+		return -1;
+	}
+	u->task = current;
+	unwindme = u;
+	udelay(1);
+	unregister_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler);
+	preempt_enable();
+	return u->ret;
+}
+
+/* Spawns a task and passes it to test_unwind(). */
+static int test_unwind_task(struct unwindme *u)
+{
+	struct task_struct *task;
+	int ret;
+
+	/* Initialize thread-related fields. */
+	init_completion(&u->task_ready);
+	init_waitqueue_head(&u->task_wq);
+
+	/*
+	 * Start the task and wait until it reaches unwindme_func4() and sleeps
+	 * in (task_ready, unwind_done] range.
+	 */
+	task = kthread_run(unwindme_func1, u, "%s", __func__);
+	if (IS_ERR(task)) {
+		pr_err("kthread_run() failed\n");
+		return PTR_ERR(task);
+	}
+	/*
+	 * Make sure task reaches unwindme_func4 before parking it,
+	 * we might park it before kthread function has been executed otherwise
+	 */
+	wait_for_completion(&u->task_ready);
+	kthread_park(task);
+	/* Unwind. */
+	ret = test_unwind(task, NULL, (u->flags & UWM_SP) ? u->sp : 0);
+	kthread_stop(task);
+	return ret;
+}
+
+static int test_unwind_flags(int flags)
+{
+	struct unwindme u;
+
+	u.flags = flags;
+	if (u.flags & UWM_THREAD)
+		return test_unwind_task(&u);
+	else if (u.flags & UWM_IRQ)
+		return test_unwind_irq(&u);
+	else
+		return unwindme_func1(&u);
+}
+
+static int test_unwind_init(void)
+{
+	int ret = 0;
+
+#define TEST(flags)							\
+do {									\
+	pr_info("[ RUN      ] " #flags "\n");				\
+	if (!test_unwind_flags((flags))) {				\
+		pr_info("[       OK ] " #flags "\n");			\
+	} else {							\
+		pr_err("[  FAILED  ] " #flags "\n");			\
+		ret = -EINVAL;						\
+	}								\
+} while (0)
+
+	TEST(UWM_DEFAULT);
+	TEST(UWM_SP);
+	TEST(UWM_REGS);
+	TEST(UWM_SWITCH_STACK);
+	TEST(UWM_SP | UWM_REGS);
+	TEST(UWM_CALLER | UWM_SP);
+	TEST(UWM_CALLER | UWM_SP | UWM_REGS);
+	TEST(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK);
+	TEST(UWM_THREAD);
+	TEST(UWM_THREAD | UWM_SP);
+	TEST(UWM_THREAD | UWM_CALLER | UWM_SP);
+	TEST(UWM_IRQ);
+	TEST(UWM_IRQ | UWM_SWITCH_STACK);
+	TEST(UWM_IRQ | UWM_SP);
+	TEST(UWM_IRQ | UWM_REGS);
+	TEST(UWM_IRQ | UWM_SP | UWM_REGS);
+	TEST(UWM_IRQ | UWM_CALLER | UWM_SP);
+	TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS);
+	TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK);
+#ifdef CONFIG_KPROBES
+	TEST(UWM_PGM);
+	TEST(UWM_PGM | UWM_SP);
+	TEST(UWM_PGM | UWM_REGS);
+	TEST(UWM_PGM | UWM_SP | UWM_REGS);
+#endif
+#undef TEST
+
+	return ret;
+}
+
+static void test_unwind_exit(void)
+{
+}
+
+module_init(test_unwind_init);
+module_exit(test_unwind_exit);
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index a124f19f7b3c..ac44bd76db4b 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -118,6 +118,7 @@ void __init paging_init(void)
 
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
+	zone_dma_bits = 31;
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
@@ -291,10 +292,8 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct zone *zone;
 
-	zone = page_zone(pfn_to_page(start_pfn));
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 	vmem_remove_mapping(start, size);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 460f25572940..06345616a646 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -82,7 +82,8 @@ static pte_t * __init kasan_early_pte_alloc(void)
 enum populate_mode {
 	POPULATE_ONE2ONE,
 	POPULATE_MAP,
-	POPULATE_ZERO_SHADOW
+	POPULATE_ZERO_SHADOW,
+	POPULATE_SHALLOW
 };
 static void __init kasan_early_vmemmap_populate(unsigned long address,
 						unsigned long end,
@@ -116,6 +117,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address,
 			pgd_populate(&init_mm, pg_dir, p4_dir);
 		}
 
+		if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) &&
+		    mode == POPULATE_SHALLOW) {
+			address = (address + P4D_SIZE) & P4D_MASK;
+			continue;
+		}
+
 		p4_dir = p4d_offset(pg_dir, address);
 		if (p4d_none(*p4_dir)) {
 			if (mode == POPULATE_ZERO_SHADOW &&
@@ -130,6 +137,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address,
 			p4d_populate(&init_mm, p4_dir, pu_dir);
 		}
 
+		if (!IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) &&
+		    mode == POPULATE_SHALLOW) {
+			address = (address + PUD_SIZE) & PUD_MASK;
+			continue;
+		}
+
 		pu_dir = pud_offset(p4_dir, address);
 		if (pud_none(*pu_dir)) {
 			if (mode == POPULATE_ZERO_SHADOW &&
@@ -195,6 +208,9 @@ static void __init kasan_early_vmemmap_populate(unsigned long address,
 				page = kasan_early_shadow_page;
 				pte_val(*pt_dir) = __pa(page) | pgt_prot_zero;
 				break;
+			case POPULATE_SHALLOW:
+				/* should never happen */
+				break;
 			}
 		}
 		address += PAGE_SIZE;
@@ -313,22 +329,50 @@ void __init kasan_early_init(void)
 	init_mm.pgd = early_pg_dir;
 	/*
 	 * Current memory layout:
-	 * +- 0 -------------+	 +- shadow start -+
-	 * | 1:1 ram mapping |	/| 1/8 ram	  |
-	 * +- end of ram ----+ / +----------------+
-	 * | ... gap ...     |/  |	kasan	  |
-	 * +- shadow start --+	 |	zero	  |
-	 * | 1/8 addr space  |	 |	page	  |
-	 * +- shadow end    -+	 |	mapping	  |
-	 * | ... gap ...     |\  |    (untracked) |
-	 * +- modules vaddr -+ \ +----------------+
-	 * | 2Gb	     |	\|	unmapped  | allocated per module
-	 * +-----------------+	 +- shadow end ---+
+	 * +- 0 -------------+	   +- shadow start -+
+	 * | 1:1 ram mapping |	  /| 1/8 ram	    |
+	 * |		     |	 / |		    |
+	 * +- end of ram ----+	/  +----------------+
+	 * | ... gap ...     | /   |		    |
+	 * |		     |/    |	kasan	    |
+	 * +- shadow start --+	   |	zero	    |
+	 * | 1/8 addr space  |	   |	page	    |
+	 * +- shadow end    -+	   |	mapping	    |
+	 * | ... gap ...     |\    |  (untracked)   |
+	 * +- vmalloc area  -+ \   |		    |
+	 * | vmalloc_size    |	\  |		    |
+	 * +- modules vaddr -+	 \ +----------------+
+	 * | 2Gb	     |	  \|	  unmapped  | allocated per module
+	 * +-----------------+	   +- shadow end ---+
+	 *
+	 * Current memory layout (KASAN_VMALLOC):
+	 * +- 0 -------------+	   +- shadow start -+
+	 * | 1:1 ram mapping |	  /| 1/8 ram	    |
+	 * |		     |	 / |		    |
+	 * +- end of ram ----+	/  +----------------+
+	 * | ... gap ...     | /   |	kasan	    |
+	 * |		     |/    |	zero	    |
+	 * +- shadow start --+	   |	page	    |
+	 * | 1/8 addr space  |	   |	mapping     |
+	 * +- shadow end    -+	   |  (untracked)   |
+	 * | ... gap ...     |\    |		    |
+	 * +- vmalloc area  -+ \   +- vmalloc area -+
+	 * | vmalloc_size    |	\  |shallow populate|
+	 * +- modules vaddr -+	 \ +- modules area -+
+	 * | 2Gb	     |	  \|shallow populate|
+	 * +-----------------+	   +- shadow end ---+
 	 */
 	/* populate kasan shadow (for identity mapping and zero page mapping) */
 	kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP);
 	if (IS_ENABLED(CONFIG_MODULES))
 		untracked_mem_end = vmax - MODULES_LEN;
+	if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+		untracked_mem_end = vmax - vmalloc_size - MODULES_LEN;
+		/* shallowly populate kasan shadow for vmalloc and modules */
+		kasan_early_vmemmap_populate(__sha(untracked_mem_end),
+					     __sha(vmax), POPULATE_SHALLOW);
+	}
+	/* populate kasan shadow for untracked memory */
 	kasan_early_vmemmap_populate(__sha(max_physmem_end),
 				     __sha(untracked_mem_end),
 				     POPULATE_ZERO_SHADOW);
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 1864a8bb9622..de7ca4b6718f 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -70,7 +70,7 @@ void notrace s390_kernel_write(void *dst, const void *src, size_t size)
 	spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
 }
 
-static int __memcpy_real(void *dest, void *src, size_t count)
+static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
 {
 	register unsigned long _dest asm("2") = (unsigned long) dest;
 	register unsigned long _len1 asm("3") = (unsigned long) count;
@@ -91,19 +91,23 @@ static int __memcpy_real(void *dest, void *src, size_t count)
 	return rc;
 }
 
-static unsigned long _memcpy_real(unsigned long dest, unsigned long src,
-				  unsigned long count)
+static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
+							unsigned long src,
+							unsigned long count)
 {
 	int irqs_disabled, rc;
 	unsigned long flags;
 
 	if (!count)
 		return 0;
-	flags = __arch_local_irq_stnsm(0xf8UL);
+	flags = arch_local_irq_save();
 	irqs_disabled = arch_irqs_disabled_flags(flags);
 	if (!irqs_disabled)
 		trace_hardirqs_off();
+	__arch_local_irq_stnsm(0xf8); // disable DAT
 	rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
+	if (flags & PSW_MASK_DAT)
+		__arch_local_irq_stosm(0x04); // enable DAT
 	if (!irqs_disabled)
 		trace_hardirqs_on();
 	__arch_local_irq_ssm(flags);
@@ -115,9 +119,15 @@ static unsigned long _memcpy_real(unsigned long dest, unsigned long src,
  */
 int memcpy_real(void *dest, void *src, size_t count)
 {
-	if (S390_lowcore.nodat_stack != 0)
-		return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack,
-				     3, dest, src, count);
+	int rc;
+
+	if (S390_lowcore.nodat_stack != 0) {
+		preempt_disable();
+		rc = CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, 3,
+				   dest, src, count);
+		preempt_enable();
+		return rc;
+	}
 	/*
 	 * This is a really early memcpy_real call, the stacks are
 	 * not set up yet. Just call _memcpy_real on the early boot
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index ce88211b9c6c..8d2134136290 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -23,6 +23,8 @@
 #include <linux/filter.h>
 #include <linux/init.h>
 #include <linux/bpf.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
 #include <asm/cacheflush.h>
 #include <asm/dis.h>
 #include <asm/facility.h>
@@ -38,10 +40,11 @@ struct bpf_jit {
 	int size;		/* Size of program and literal pool */
 	int size_prg;		/* Size of program */
 	int prg;		/* Current position in program */
-	int lit_start;		/* Start of literal pool */
-	int lit;		/* Current position in literal pool */
+	int lit32_start;	/* Start of 32-bit literal pool */
+	int lit32;		/* Current position in 32-bit literal pool */
+	int lit64_start;	/* Start of 64-bit literal pool */
+	int lit64;		/* Current position in 64-bit literal pool */
 	int base_ip;		/* Base address for literal pool */
-	int ret0_ip;		/* Address of return 0 */
 	int exit_ip;		/* Address of exit */
 	int r1_thunk_ip;	/* Address of expoline thunk for 'br %r1' */
 	int r14_thunk_ip;	/* Address of expoline thunk for 'br %r14' */
@@ -49,14 +52,10 @@ struct bpf_jit {
 	int labels[1];		/* Labels for local jumps */
 };
 
-#define BPF_SIZE_MAX	0xffff	/* Max size for program (16 bit branches) */
-
-#define SEEN_MEM	(1 << 0)	/* use mem[] for temporary storage */
-#define SEEN_RET0	(1 << 1)	/* ret0_ip points to a valid return 0 */
-#define SEEN_LITERAL	(1 << 2)	/* code uses literals */
-#define SEEN_FUNC	(1 << 3)	/* calls C functions */
-#define SEEN_TAIL_CALL	(1 << 4)	/* code uses tail calls */
-#define SEEN_REG_AX	(1 << 5)	/* code uses constant blinding */
+#define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
+#define SEEN_LITERAL	BIT(1)		/* code uses literals */
+#define SEEN_FUNC	BIT(2)		/* calls C functions */
+#define SEEN_TAIL_CALL	BIT(3)		/* code uses tail calls */
 #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)
 
 /*
@@ -131,13 +130,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define _EMIT2(op)						\
 ({								\
 	if (jit->prg_buf)					\
-		*(u16 *) (jit->prg_buf + jit->prg) = op;	\
+		*(u16 *) (jit->prg_buf + jit->prg) = (op);	\
 	jit->prg += 2;						\
 })
 
 #define EMIT2(op, b1, b2)					\
 ({								\
-	_EMIT2(op | reg(b1, b2));				\
+	_EMIT2((op) | reg(b1, b2));				\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
@@ -145,20 +144,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define _EMIT4(op)						\
 ({								\
 	if (jit->prg_buf)					\
-		*(u32 *) (jit->prg_buf + jit->prg) = op;	\
+		*(u32 *) (jit->prg_buf + jit->prg) = (op);	\
 	jit->prg += 4;						\
 })
 
 #define EMIT4(op, b1, b2)					\
 ({								\
-	_EMIT4(op | reg(b1, b2));				\
+	_EMIT4((op) | reg(b1, b2));				\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
 
 #define EMIT4_RRF(op, b1, b2, b3)				\
 ({								\
-	_EMIT4(op | reg_high(b3) << 8 | reg(b1, b2));		\
+	_EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2));		\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 	REG_SET_SEEN(b3);					\
@@ -167,13 +166,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define _EMIT4_DISP(op, disp)					\
 ({								\
 	unsigned int __disp = (disp) & 0xfff;			\
-	_EMIT4(op | __disp);					\
+	_EMIT4((op) | __disp);					\
 })
 
 #define EMIT4_DISP(op, b1, b2, disp)				\
 ({								\
-	_EMIT4_DISP(op | reg_high(b1) << 16 |			\
-		    reg_high(b2) << 8, disp);			\
+	_EMIT4_DISP((op) | reg_high(b1) << 16 |			\
+		    reg_high(b2) << 8, (disp));			\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
@@ -181,21 +180,27 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define EMIT4_IMM(op, b1, imm)					\
 ({								\
 	unsigned int __imm = (imm) & 0xffff;			\
-	_EMIT4(op | reg_high(b1) << 16 | __imm);		\
+	_EMIT4((op) | reg_high(b1) << 16 | __imm);		\
 	REG_SET_SEEN(b1);					\
 })
 
 #define EMIT4_PCREL(op, pcrel)					\
 ({								\
 	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
-	_EMIT4(op | __pcrel);					\
+	_EMIT4((op) | __pcrel);					\
+})
+
+#define EMIT4_PCREL_RIC(op, mask, target)			\
+({								\
+	int __rel = ((target) - jit->prg) / 2;			\
+	_EMIT4((op) | (mask) << 20 | (__rel & 0xffff));		\
 })
 
 #define _EMIT6(op1, op2)					\
 ({								\
 	if (jit->prg_buf) {					\
-		*(u32 *) (jit->prg_buf + jit->prg) = op1;	\
-		*(u16 *) (jit->prg_buf + jit->prg + 4) = op2;	\
+		*(u32 *) (jit->prg_buf + jit->prg) = (op1);	\
+		*(u16 *) (jit->prg_buf + jit->prg + 4) = (op2);	\
 	}							\
 	jit->prg += 6;						\
 })
@@ -203,20 +208,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define _EMIT6_DISP(op1, op2, disp)				\
 ({								\
 	unsigned int __disp = (disp) & 0xfff;			\
-	_EMIT6(op1 | __disp, op2);				\
+	_EMIT6((op1) | __disp, op2);				\
 })
 
 #define _EMIT6_DISP_LH(op1, op2, disp)				\
 ({								\
-	u32 _disp = (u32) disp;					\
+	u32 _disp = (u32) (disp);				\
 	unsigned int __disp_h = _disp & 0xff000;		\
 	unsigned int __disp_l = _disp & 0x00fff;		\
-	_EMIT6(op1 | __disp_l, op2 | __disp_h >> 4);		\
+	_EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4);	\
 })
 
 #define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
 ({								\
-	_EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 |		\
+	_EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 |		\
 		       reg_high(b3) << 8, op2, disp);		\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
@@ -226,8 +231,8 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask)	\
 ({								\
 	int rel = (jit->labels[label] - jit->prg) >> 1;		\
-	_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff),	\
-	       op2 | mask << 12);				\
+	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff),	\
+	       (op2) | (mask) << 12);				\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
@@ -235,68 +240,83 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask)	\
 ({								\
 	int rel = (jit->labels[label] - jit->prg) >> 1;		\
-	_EMIT6(op1 | (reg_high(b1) | mask) << 16 |		\
-		(rel & 0xffff), op2 | (imm & 0xff) << 8);	\
+	_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 |		\
+		(rel & 0xffff), (op2) | ((imm) & 0xff) << 8);	\
 	REG_SET_SEEN(b1);					\
-	BUILD_BUG_ON(((unsigned long) imm) > 0xff);		\
+	BUILD_BUG_ON(((unsigned long) (imm)) > 0xff);		\
 })
 
 #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
 ({								\
 	/* Branch instruction needs 6 bytes */			\
-	int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
-	_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask);	\
+	int rel = (addrs[(i) + (off) + 1] - (addrs[(i) + 1] - 6)) / 2;\
+	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
 
 #define EMIT6_PCREL_RILB(op, b, target)				\
 ({								\
-	int rel = (target - jit->prg) / 2;			\
-	_EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff);	\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	_EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
 	REG_SET_SEEN(b);					\
 })
 
 #define EMIT6_PCREL_RIL(op, target)				\
 ({								\
-	int rel = (target - jit->prg) / 2;			\
-	_EMIT6(op | rel >> 16, rel & 0xffff);			\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	_EMIT6((op) | rel >> 16, rel & 0xffff);			\
+})
+
+#define EMIT6_PCREL_RILC(op, mask, target)			\
+({								\
+	EMIT6_PCREL_RIL((op) | (mask) << 20, (target));		\
 })
 
 #define _EMIT6_IMM(op, imm)					\
 ({								\
 	unsigned int __imm = (imm);				\
-	_EMIT6(op | (__imm >> 16), __imm & 0xffff);		\
+	_EMIT6((op) | (__imm >> 16), __imm & 0xffff);		\
 })
 
 #define EMIT6_IMM(op, b1, imm)					\
 ({								\
-	_EMIT6_IMM(op | reg_high(b1) << 16, imm);		\
+	_EMIT6_IMM((op) | reg_high(b1) << 16, imm);		\
 	REG_SET_SEEN(b1);					\
 })
 
-#define EMIT_CONST_U32(val)					\
+#define _EMIT_CONST_U32(val)					\
 ({								\
 	unsigned int ret;					\
-	ret = jit->lit - jit->base_ip;				\
-	jit->seen |= SEEN_LITERAL;				\
+	ret = jit->lit32;					\
 	if (jit->prg_buf)					\
-		*(u32 *) (jit->prg_buf + jit->lit) = (u32) val;	\
-	jit->lit += 4;						\
+		*(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
+	jit->lit32 += 4;					\
 	ret;							\
 })
 
-#define EMIT_CONST_U64(val)					\
+#define EMIT_CONST_U32(val)					\
 ({								\
-	unsigned int ret;					\
-	ret = jit->lit - jit->base_ip;				\
 	jit->seen |= SEEN_LITERAL;				\
+	_EMIT_CONST_U32(val) - jit->base_ip;			\
+})
+
+#define _EMIT_CONST_U64(val)					\
+({								\
+	unsigned int ret;					\
+	ret = jit->lit64;					\
 	if (jit->prg_buf)					\
-		*(u64 *) (jit->prg_buf + jit->lit) = (u64) val;	\
-	jit->lit += 8;						\
+		*(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
+	jit->lit64 += 8;					\
 	ret;							\
 })
 
+#define EMIT_CONST_U64(val)					\
+({								\
+	jit->seen |= SEEN_LITERAL;				\
+	_EMIT_CONST_U64(val) - jit->base_ip;			\
+})
+
 #define EMIT_ZERO(b1)						\
 ({								\
 	if (!fp->aux->verifier_zext) {				\
@@ -307,6 +327,67 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 })
 
 /*
+ * Return whether this is the first pass. The first pass is special, since we
+ * don't know any sizes yet, and thus must be conservative.
+ */
+static bool is_first_pass(struct bpf_jit *jit)
+{
+	return jit->size == 0;
+}
+
+/*
+ * Return whether this is the code generation pass. The code generation pass is
+ * special, since we should change as little as possible.
+ */
+static bool is_codegen_pass(struct bpf_jit *jit)
+{
+	return jit->prg_buf;
+}
+
+/*
+ * Return whether "rel" can be encoded as a short PC-relative offset
+ */
+static bool is_valid_rel(int rel)
+{
+	return rel >= -65536 && rel <= 65534;
+}
+
+/*
+ * Return whether "off" can be reached using a short PC-relative offset
+ */
+static bool can_use_rel(struct bpf_jit *jit, int off)
+{
+	return is_valid_rel(off - jit->prg);
+}
+
+/*
+ * Return whether given displacement can be encoded using
+ * Long-Displacement Facility
+ */
+static bool is_valid_ldisp(int disp)
+{
+	return disp >= -524288 && disp <= 524287;
+}
+
+/*
+ * Return whether the next 32-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
+{
+	return is_valid_ldisp(jit->lit32 - jit->base_ip);
+}
+
+/*
+ * Return whether the next 64-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
+{
+	return is_valid_ldisp(jit->lit64 - jit->base_ip);
+}
+
+/*
  * Fill whole space with illegal instructions
  */
 static void jit_fill_hole(void *area, unsigned int size)
@@ -383,9 +464,18 @@ static int get_end(struct bpf_jit *jit, int start)
  */
 static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
 {
-
+	const int last = 15, save_restore_size = 6;
 	int re = 6, rs;
 
+	if (is_first_pass(jit)) {
+		/*
+		 * We don't know yet which registers are used. Reserve space
+		 * conservatively.
+		 */
+		jit->prg += (last - re + 1) * save_restore_size;
+		return;
+	}
+
 	do {
 		rs = get_start(jit, re);
 		if (!rs)
@@ -396,7 +486,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
 		else
 			restore_regs(jit, rs, re, stack_depth);
 		re++;
-	} while (re <= 15);
+	} while (re <= last);
 }
 
 /*
@@ -420,21 +510,28 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
 	/* Save registers */
 	save_restore_regs(jit, REGS_SAVE, stack_depth);
 	/* Setup literal pool */
-	if (jit->seen & SEEN_LITERAL) {
-		/* basr %r13,0 */
-		EMIT2(0x0d00, REG_L, REG_0);
-		jit->base_ip = jit->prg;
+	if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
+		if (!is_first_pass(jit) &&
+		    is_valid_ldisp(jit->size - (jit->prg + 2))) {
+			/* basr %l,0 */
+			EMIT2(0x0d00, REG_L, REG_0);
+			jit->base_ip = jit->prg;
+		} else {
+			/* larl %l,lit32_start */
+			EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
+			jit->base_ip = jit->lit32_start;
+		}
 	}
 	/* Setup stack and backchain */
-	if (jit->seen & SEEN_STACK) {
-		if (jit->seen & SEEN_FUNC)
+	if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
+		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
 			/* lgr %w1,%r15 (backchain) */
 			EMIT4(0xb9040000, REG_W1, REG_15);
 		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
 		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
 		/* aghi %r15,-STK_OFF */
 		EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
-		if (jit->seen & SEEN_FUNC)
+		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
 			/* stg %w1,152(%r15) (backchain) */
 			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
 				      REG_15, 152);
@@ -446,12 +543,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
  */
 static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
 {
-	/* Return 0 */
-	if (jit->seen & SEEN_RET0) {
-		jit->ret0_ip = jit->prg;
-		/* lghi %b0,0 */
-		EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
-	}
 	jit->exit_ip = jit->prg;
 	/* Load exit code: lgr %r2,%b0 */
 	EMIT4(0xb9040000, REG_2, BPF_REG_0);
@@ -476,7 +567,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
 	_EMIT2(0x07fe);
 
 	if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable &&
-	    (jit->seen & SEEN_FUNC)) {
+	    (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
 		jit->r1_thunk_ip = jit->prg;
 		/* Generate __s390_indirect_jump_r1 thunk */
 		if (test_facility(35)) {
@@ -506,16 +597,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 				 int i, bool extra_pass)
 {
 	struct bpf_insn *insn = &fp->insnsi[i];
-	int jmp_off, last, insn_count = 1;
 	u32 dst_reg = insn->dst_reg;
 	u32 src_reg = insn->src_reg;
+	int last, insn_count = 1;
 	u32 *addrs = jit->addrs;
 	s32 imm = insn->imm;
 	s16 off = insn->off;
 	unsigned int mask;
 
-	if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
-		jit->seen |= SEEN_REG_AX;
 	switch (insn->code) {
 	/*
 	 * BPF_MOV
@@ -549,9 +638,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		u64 imm64;
 
 		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
-		/* lg %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm64));
+		/* lgrl %dst,imm */
+		EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
 		insn_count = 2;
 		break;
 	}
@@ -680,9 +768,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT4_IMM(0xa7080000, REG_W0, 0);
 		/* lr %w1,%dst */
 		EMIT2(0x1800, REG_W1, dst_reg);
-		/* dl %w0,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
-			      EMIT_CONST_U32(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
+			/* dl %w0,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+				      EMIT_CONST_U32(imm));
+		} else {
+			/* lgfrl %dst,imm */
+			EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+					 _EMIT_CONST_U32(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* dlr %w0,%dst */
+			EMIT4(0xb9970000, REG_W0, dst_reg);
+		}
 		/* llgfr %dst,%rc */
 		EMIT4(0xb9160000, dst_reg, rc_reg);
 		if (insn_is_zext(&insn[1]))
@@ -704,9 +801,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT4_IMM(0xa7090000, REG_W0, 0);
 		/* lgr %w1,%dst */
 		EMIT4(0xb9040000, REG_W1, dst_reg);
-		/* dlg %w0,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* dlg %w0,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %dst,imm */
+			EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* dlgr %w0,%dst */
+			EMIT4(0xb9870000, REG_W0, dst_reg);
+		}
 		/* lgr %dst,%rc */
 		EMIT4(0xb9040000, dst_reg, rc_reg);
 		break;
@@ -729,9 +835,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT_ZERO(dst_reg);
 		break;
 	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
-		/* ng %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* ng %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0080,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* ngr %dst,%w0 */
+			EMIT4(0xb9800000, dst_reg, REG_W0);
+		}
 		break;
 	/*
 	 * BPF_OR
@@ -751,9 +867,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT_ZERO(dst_reg);
 		break;
 	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
-		/* og %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* og %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0081,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* ogr %dst,%w0 */
+			EMIT4(0xb9810000, dst_reg, REG_W0);
+		}
 		break;
 	/*
 	 * BPF_XOR
@@ -775,9 +901,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT_ZERO(dst_reg);
 		break;
 	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
-		/* xg %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* xg %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0082,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* xgr %dst,%w0 */
+			EMIT4(0xb9820000, dst_reg, REG_W0);
+		}
 		break;
 	/*
 	 * BPF_LSH
@@ -1023,9 +1159,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 
 		REG_SET_SEEN(BPF_REG_5);
 		jit->seen |= SEEN_FUNC;
-		/* lg %w1,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
-			      EMIT_CONST_U64(func));
+		/* lgrl %w1,func */
+		EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
 		if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
 			/* brasl %r14,__s390_indirect_jump_r1 */
 			EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
@@ -1054,9 +1189,17 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		/* llgf %w1,map.max_entries(%b2) */
 		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
 			      offsetof(struct bpf_array, map.max_entries));
-		/* clrj %b3,%w1,0xa,label0: if (u32)%b3 >= (u32)%w1 goto out */
-		EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
-				  REG_W1, 0, 0xa);
+		/* if ((u32)%b3 >= (u32)%w1) goto out; */
+		if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+			/* clrj %b3,%w1,0xa,label0 */
+			EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
+					  REG_W1, 0, 0xa);
+		} else {
+			/* clr %b3,%w1 */
+			EMIT2(0x1500, BPF_REG_3, REG_W1);
+			/* brcl 0xa,label0 */
+			EMIT6_PCREL_RILC(0xc0040000, 0xa, jit->labels[0]);
+		}
 
 		/*
 		 * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
@@ -1071,9 +1214,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT4_IMM(0xa7080000, REG_W0, 1);
 		/* laal %w1,%w0,off(%r15) */
 		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
-		/* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
-		EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
-				      MAX_TAIL_CALL_CNT, 0, 0x2);
+		if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+			/* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
+			EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
+					      MAX_TAIL_CALL_CNT, 0, 0x2);
+		} else {
+			/* clfi %w1,MAX_TAIL_CALL_CNT */
+			EMIT6_IMM(0xc20f0000, REG_W1, MAX_TAIL_CALL_CNT);
+			/* brcl 0x2,label0 */
+			EMIT6_PCREL_RILC(0xc0040000, 0x2, jit->labels[0]);
+		}
 
 		/*
 		 * prog = array->ptrs[index];
@@ -1085,11 +1235,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT4(0xb9160000, REG_1, BPF_REG_3);
 		/* sllg %r1,%r1,3: %r1 *= 8 */
 		EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
-		/* lg %r1,prog(%b2,%r1) */
-		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
+		/* ltg %r1,prog(%b2,%r1) */
+		EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
 			      REG_1, offsetof(struct bpf_array, ptrs));
-		/* clgij %r1,0,0x8,label0 */
-		EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
+		if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+			/* brc 0x8,label0 */
+			EMIT4_PCREL_RIC(0xa7040000, 0x8, jit->labels[0]);
+		} else {
+			/* brcl 0x8,label0 */
+			EMIT6_PCREL_RILC(0xc0040000, 0x8, jit->labels[0]);
+		}
 
 		/*
 		 * Restore registers before calling function
@@ -1110,7 +1265,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		break;
 	case BPF_JMP | BPF_EXIT: /* return b0 */
 		last = (i == fp->len - 1) ? 1 : 0;
-		if (last && !(jit->seen & SEEN_RET0))
+		if (last)
 			break;
 		/* j <exit> */
 		EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
@@ -1246,36 +1401,83 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		goto branch_oc;
 branch_ks:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* lgfi %w1,imm (load sign extend imm) */
-		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* crj or cgrj %dst,%w1,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
-			    dst_reg, REG_W1, i, off, mask);
+		/* cfi or cgfi %dst,imm */
+		EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
+			  dst_reg, imm);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* brc mask,off */
+			EMIT4_PCREL_RIC(0xa7040000,
+					mask >> 12, addrs[i + off + 1]);
+		} else {
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 branch_ku:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* lgfi %w1,imm (load sign extend imm) */
-		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* clrj or clgrj %dst,%w1,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
-			    dst_reg, REG_W1, i, off, mask);
+		/* clfi or clgfi %dst,imm */
+		EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000,
+			  dst_reg, imm);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* brc mask,off */
+			EMIT4_PCREL_RIC(0xa7040000,
+					mask >> 12, addrs[i + off + 1]);
+		} else {
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 branch_xs:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* crj or cgrj %dst,%src,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
-			    dst_reg, src_reg, i, off, mask);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* crj or cgrj %dst,%src,mask,off */
+			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
+				    dst_reg, src_reg, i, off, mask);
+		} else {
+			/* cr or cgr %dst,%src */
+			if (is_jmp32)
+				EMIT2(0x1900, dst_reg, src_reg);
+			else
+				EMIT4(0xb9200000, dst_reg, src_reg);
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 branch_xu:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* clrj or clgrj %dst,%src,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
-			    dst_reg, src_reg, i, off, mask);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* clrj or clgrj %dst,%src,mask,off */
+			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
+				    dst_reg, src_reg, i, off, mask);
+		} else {
+			/* clr or clgr %dst,%src */
+			if (is_jmp32)
+				EMIT2(0x1500, dst_reg, src_reg);
+			else
+				EMIT4(0xb9210000, dst_reg, src_reg);
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 branch_oc:
-		/* brc mask,jmp_off (branch instruction needs 4 bytes) */
-		jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
-		EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* brc mask,off */
+			EMIT4_PCREL_RIC(0xa7040000,
+					mask >> 12, addrs[i + off + 1]);
+		} else {
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 	}
 	default: /* too complex, give up */
@@ -1286,28 +1488,67 @@ branch_oc:
 }
 
 /*
+ * Return whether new i-th instruction address does not violate any invariant
+ */
+static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
+{
+	/* On the first pass anything goes */
+	if (is_first_pass(jit))
+		return true;
+
+	/* The codegen pass must not change anything */
+	if (is_codegen_pass(jit))
+		return jit->addrs[i] == jit->prg;
+
+	/* Passes in between must not increase code size */
+	return jit->addrs[i] >= jit->prg;
+}
+
+/*
+ * Update the address of i-th instruction
+ */
+static int bpf_set_addr(struct bpf_jit *jit, int i)
+{
+	if (!bpf_is_new_addr_sane(jit, i))
+		return -1;
+	jit->addrs[i] = jit->prg;
+	return 0;
+}
+
+/*
  * Compile eBPF program into s390x code
  */
 static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
 			bool extra_pass)
 {
-	int i, insn_count;
+	int i, insn_count, lit32_size, lit64_size;
 
-	jit->lit = jit->lit_start;
+	jit->lit32 = jit->lit32_start;
+	jit->lit64 = jit->lit64_start;
 	jit->prg = 0;
 
 	bpf_jit_prologue(jit, fp->aux->stack_depth);
+	if (bpf_set_addr(jit, 0) < 0)
+		return -1;
 	for (i = 0; i < fp->len; i += insn_count) {
 		insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
 		if (insn_count < 0)
 			return -1;
 		/* Next instruction address */
-		jit->addrs[i + insn_count] = jit->prg;
+		if (bpf_set_addr(jit, i + insn_count) < 0)
+			return -1;
 	}
 	bpf_jit_epilogue(jit, fp->aux->stack_depth);
 
-	jit->lit_start = jit->prg;
-	jit->size = jit->lit;
+	lit32_size = jit->lit32 - jit->lit32_start;
+	lit64_size = jit->lit64 - jit->lit64_start;
+	jit->lit32_start = jit->prg;
+	if (lit32_size)
+		jit->lit32_start = ALIGN(jit->lit32_start, 4);
+	jit->lit64_start = jit->lit32_start + lit32_size;
+	if (lit64_size)
+		jit->lit64_start = ALIGN(jit->lit64_start, 8);
+	jit->size = jit->lit64_start + lit64_size;
 	jit->size_prg = jit->prg;
 	return 0;
 }
@@ -1369,7 +1610,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	}
 
 	memset(&jit, 0, sizeof(jit));
-	jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
+	jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
 	if (jit.addrs == NULL) {
 		fp = orig_fp;
 		goto out;
@@ -1388,12 +1629,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	/*
 	 * Final pass: Allocate and generate program
 	 */
-	if (jit.size >= BPF_SIZE_MAX) {
-		fp = orig_fp;
-		goto free_addrs;
-	}
-
-	header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
+	header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole);
 	if (!header) {
 		fp = orig_fp;
 		goto free_addrs;
@@ -1422,7 +1658,7 @@ skip_init_ctx:
 	if (!fp->is_func || extra_pass) {
 		bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
 free_addrs:
-		kfree(jit.addrs);
+		kvfree(jit.addrs);
 		kfree(jit_data);
 		fp->aux->jit_data = NULL;
 	}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index c7fea9bea8cb..bc61ea18e88d 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -27,6 +27,7 @@
 #include <linux/seq_file.h>
 #include <linux/jump_label.h>
 #include <linux/pci.h>
+#include <linux/printk.h>
 
 #include <asm/isc.h>
 #include <asm/airq.h>
@@ -43,7 +44,7 @@ static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
 static DEFINE_SPINLOCK(zpci_domain_lock);
 
 #define ZPCI_IOMAP_ENTRIES						\
-	min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2),	\
+	min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2),	\
 	    ZPCI_IOMAP_MAX_ENTRIES)
 
 static DEFINE_SPINLOCK(zpci_iomap_lock);
@@ -294,7 +295,7 @@ static void __iomem *pci_iomap_range_mio(struct pci_dev *pdev, int bar,
 void __iomem *pci_iomap_range(struct pci_dev *pdev, int bar,
 			      unsigned long offset, unsigned long max)
 {
-	if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+	if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
 		return NULL;
 
 	if (static_branch_likely(&have_mio))
@@ -324,7 +325,7 @@ static void __iomem *pci_iomap_wc_range_mio(struct pci_dev *pdev, int bar,
 void __iomem *pci_iomap_wc_range(struct pci_dev *pdev, int bar,
 				 unsigned long offset, unsigned long max)
 {
-	if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+	if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
 		return NULL;
 
 	if (static_branch_likely(&have_mio))
@@ -416,7 +417,7 @@ static void zpci_map_resources(struct pci_dev *pdev)
 	resource_size_t len;
 	int i;
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		len = pci_resource_len(pdev, i);
 		if (!len)
 			continue;
@@ -451,7 +452,7 @@ static void zpci_unmap_resources(struct pci_dev *pdev)
 	if (zpci_use_mio(zdev))
 		return;
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		len = pci_resource_len(pdev, i);
 		if (!len)
 			continue;
@@ -514,7 +515,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
 	snprintf(zdev->res_name, sizeof(zdev->res_name),
 		 "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR);
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		if (!zdev->bars[i].size)
 			continue;
 		entry = zpci_alloc_iomap(zdev);
@@ -551,7 +552,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 {
 	int i;
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		if (!zdev->bars[i].size || !zdev->bars[i].res)
 			continue;
 
@@ -573,7 +574,7 @@ int pcibios_add_device(struct pci_dev *pdev)
 	pdev->dev.dma_ops = &s390_pci_dma_ops;
 	zpci_map_resources(pdev);
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		res = &pdev->resource[i];
 		if (res->parent || !res->flags)
 			continue;
@@ -659,6 +660,8 @@ static int zpci_alloc_domain(struct zpci_dev *zdev)
 		spin_lock(&zpci_domain_lock);
 		if (test_bit(zdev->domain, zpci_domain)) {
 			spin_unlock(&zpci_domain_lock);
+			pr_err("Adding PCI function %08x failed because domain %04x is already assigned\n",
+				zdev->fid, zdev->domain);
 			return -EEXIST;
 		}
 		set_bit(zdev->domain, zpci_domain);
@@ -670,6 +673,8 @@ static int zpci_alloc_domain(struct zpci_dev *zdev)
 	zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
 	if (zdev->domain == ZPCI_NR_DEVICES) {
 		spin_unlock(&zpci_domain_lock);
+		pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n",
+			zdev->fid, ZPCI_NR_DEVICES);
 		return -ENOSPC;
 	}
 	set_bit(zdev->domain, zpci_domain);
@@ -934,5 +939,5 @@ subsys_initcall_sync(pci_base_init);
 void zpci_rescan(void)
 {
 	if (zpci_is_enabled())
-		clp_rescan_pci_devices_simple();
+		clp_rescan_pci_devices_simple(NULL);
 }
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index e585a62d6530..0d3d8f170ea4 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -145,7 +145,7 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
 {
 	int i;
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		zdev->bars[i].val = le32_to_cpu(response->bar[i]);
 		zdev->bars[i].size = response->bar_size[i];
 	}
@@ -164,8 +164,8 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
 		       sizeof(zdev->util_str));
 	}
 	zdev->mio_capable = response->mio_addr_avail;
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
-		if (!(response->mio.valid & (1 << (PCI_BAR_COUNT - i - 1))))
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		if (!(response->mio.valid & (1 << (PCI_STD_NUM_BARS - i - 1))))
 			continue;
 
 		zdev->bars[i].mio_wb = (void __iomem *) response->mio.addr[i].wb;
@@ -240,12 +240,14 @@ error:
 }
 
 /*
- * Enable/Disable a given PCI function defined by its function handle.
+ * Enable/Disable a given PCI function and update its function handle if
+ * necessary
  */
-static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
+static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command)
 {
 	struct clp_req_rsp_set_pci *rrb;
 	int rc, retries = 100;
+	u32 fid = zdev->fid;
 
 	rrb = clp_alloc_block(GFP_KERNEL);
 	if (!rrb)
@@ -256,7 +258,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
 		rrb->request.hdr.len = sizeof(rrb->request);
 		rrb->request.hdr.cmd = CLP_SET_PCI_FN;
 		rrb->response.hdr.len = sizeof(rrb->response);
-		rrb->request.fh = *fh;
+		rrb->request.fh = zdev->fh;
 		rrb->request.oc = command;
 		rrb->request.ndas = nr_dma_as;
 
@@ -269,12 +271,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
 		}
 	} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
 
-	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
-		*fh = rrb->response.fh;
-	else {
+	if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
 		zpci_err("Set PCI FN:\n");
 		zpci_err_clp(rrb->response.hdr.rsp, rc);
-		rc = -EIO;
+	}
+
+	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+		zdev->fh = rrb->response.fh;
+	} else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY &&
+			rrb->response.fh == 0) {
+		/* Function is already in desired state - update handle */
+		rc = clp_rescan_pci_devices_simple(&fid);
 	}
 	clp_free_block(rrb);
 	return rc;
@@ -282,18 +289,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
 
 int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
 {
-	u32 fh = zdev->fh;
 	int rc;
 
-	rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
-	zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+	rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
+	zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
 	if (rc)
 		goto out;
 
-	zdev->fh = fh;
 	if (zpci_use_mio(zdev)) {
-		rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO);
-		zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+		rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO);
+		zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n",
+				zdev->fid, zdev->fh, rc);
 		if (rc)
 			clp_disable_fh(zdev);
 	}
@@ -309,11 +315,8 @@ int clp_disable_fh(struct zpci_dev *zdev)
 	if (!zdev_enabled(zdev))
 		return 0;
 
-	rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
+	rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN);
 	zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
-	if (!rc)
-		zdev->fh = fh;
-
 	return rc;
 }
 
@@ -370,10 +373,14 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data)
 static void __clp_update(struct clp_fh_list_entry *entry, void *data)
 {
 	struct zpci_dev *zdev;
+	u32 *fid = data;
 
 	if (!entry->vendor_id)
 		return;
 
+	if (fid && *fid != entry->fid)
+		return;
+
 	zdev = get_zdev_by_fid(entry->fid);
 	if (!zdev)
 		return;
@@ -413,7 +420,10 @@ int clp_rescan_pci_devices(void)
 	return rc;
 }
 
-int clp_rescan_pci_devices_simple(void)
+/* Rescan PCI functions and refresh function handles. If fid is non-NULL only
+ * refresh the handle of the function matching @fid
+ */
+int clp_rescan_pci_devices_simple(u32 *fid)
 {
 	struct clp_req_rsp_list_pci *rrb;
 	int rc;
@@ -422,7 +432,7 @@ int clp_rescan_pci_devices_simple(void)
 	if (!rrb)
 		return -ENOMEM;
 
-	rc = clp_list_pci(rrb, NULL, __clp_update);
+	rc = clp_list_pci(rrb, fid, __clp_update);
 
 	clp_free_block(rrb);
 	return rc;
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index a433ba01a317..215f17437a4f 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -13,6 +13,8 @@
 #include <linux/stat.h>
 #include <linux/pci.h>
 
+#include "../../../drivers/pci/pci.h"
+
 #include <asm/sclp.h>
 
 #define zpci_attr(name, fmt, member)					\
@@ -49,31 +51,50 @@ static DEVICE_ATTR_RO(mio_enabled);
 static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 			     const char *buf, size_t count)
 {
+	struct kernfs_node *kn;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct zpci_dev *zdev = to_zpci(pdev);
-	int ret;
-
-	if (!device_remove_file_self(dev, attr))
-		return count;
-
+	int ret = 0;
+
+	/* Can't use device_remove_self() here as that would lead us to lock
+	 * the pci_rescan_remove_lock while holding the device' kernfs lock.
+	 * This would create a possible deadlock with disable_slot() which is
+	 * not directly protected by the device' kernfs lock but takes it
+	 * during the device removal which happens under
+	 * pci_rescan_remove_lock.
+	 *
+	 * This is analogous to sdev_store_delete() in
+	 * drivers/scsi/scsi_sysfs.c
+	 */
+	kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+	WARN_ON_ONCE(!kn);
+	/* device_remove_file() serializes concurrent calls ignoring all but
+	 * the first
+	 */
+	device_remove_file(dev, attr);
+
+	/* A concurrent call to recover_store() may slip between
+	 * sysfs_break_active_protection() and the sysfs file removal.
+	 * Once it unblocks from pci_lock_rescan_remove() the original pdev
+	 * will already be removed.
+	 */
 	pci_lock_rescan_remove();
-	pci_stop_and_remove_bus_device(pdev);
-	ret = zpci_disable_device(zdev);
-	if (ret)
-		goto error;
-
-	ret = zpci_enable_device(zdev);
-	if (ret)
-		goto error;
-
-	pci_rescan_bus(zdev->bus);
+	if (pci_dev_is_added(pdev)) {
+		pci_stop_and_remove_bus_device(pdev);
+		ret = zpci_disable_device(zdev);
+		if (ret)
+			goto out;
+
+		ret = zpci_enable_device(zdev);
+		if (ret)
+			goto out;
+		pci_rescan_bus(zdev->bus);
+	}
+out:
 	pci_unlock_rescan_remove();
-
-	return count;
-
-error:
-	pci_unlock_rescan_remove();
-	return ret;
+	if (kn)
+		sysfs_unbreak_active_protection(kn);
+	return ret ? ret : count;
 }
 static DEVICE_ATTR_WO(recover);
 
diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore
index 04a03433c720..c82157f46b18 100644
--- a/arch/s390/purgatory/.gitignore
+++ b/arch/s390/purgatory/.gitignore
@@ -1,3 +1,4 @@
 purgatory
+purgatory.chk
 purgatory.lds
 purgatory.ro
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index bc0d7a0d0394..c57f8c40e992 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -4,7 +4,7 @@ OBJECT_FILES_NON_STANDARD := y
 
 purgatory-y := head.o purgatory.o string.o sha256.o mem.o
 
-targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro
+targets += $(purgatory-y) purgatory.lds purgatory purgatory.chk purgatory.ro
 PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 
 $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
@@ -15,8 +15,10 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS
 $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
 	$(call if_changed_rule,as_o_S)
 
-$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE
-	$(call if_changed_rule,cc_o_c)
+KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
 
 KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
 KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
@@ -26,15 +28,22 @@ KBUILD_CFLAGS += $(CLANG_FLAGS)
 KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
 KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
 
-LDFLAGS_purgatory := -r --no-undefined -nostdlib -z nodefaultlib -T
+# Since we link purgatory with -r unresolved symbols are not checked, so we
+# also link a purgatory.chk binary without -r to check for unresolved symbols.
+PURGATORY_LDFLAGS := -nostdlib -z nodefaultlib
+LDFLAGS_purgatory := -r $(PURGATORY_LDFLAGS) -T
+LDFLAGS_purgatory.chk := -e purgatory_start $(PURGATORY_LDFLAGS)
 $(obj)/purgatory: $(obj)/purgatory.lds $(PURGATORY_OBJS) FORCE
 		$(call if_changed,ld)
 
+$(obj)/purgatory.chk: $(obj)/purgatory FORCE
+		$(call if_changed,ld)
+
 OBJCOPYFLAGS_purgatory.ro := -O elf64-s390
 OBJCOPYFLAGS_purgatory.ro += --remove-section='*debug*'
 OBJCOPYFLAGS_purgatory.ro += --remove-section='.comment'
 OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*'
-$(obj)/purgatory.ro: $(obj)/purgatory FORCE
+$(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE
 		$(call if_changed,objcopy)
 
 $(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE
diff --git a/arch/s390/purgatory/string.c b/arch/s390/purgatory/string.c
new file mode 100644
index 000000000000..c98c22a72db7
--- /dev/null
+++ b/arch/s390/purgatory/string.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define __HAVE_ARCH_MEMCMP	/* arch function */
+#include "../lib/string.c"
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index f356ee674d89..9ece111b0254 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -108,7 +108,7 @@ config GENERIC_CALIBRATE_DELAY
 
 config GENERIC_LOCKBREAK
 	def_bool y
-	depends on SMP && PREEMPT
+	depends on SMP && PREEMPTION
 
 config ARCH_SUSPEND_POSSIBLE
 	def_bool n
diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c
index d964c4d6b139..77dad1e511b4 100644
--- a/arch/sh/boards/board-sh7785lcr.c
+++ b/arch/sh/boards/board-sh7785lcr.c
@@ -341,7 +341,7 @@ static void __init sh7785lcr_setup(char **cmdline_p)
 	pm_power_off = sh7785lcr_power_off;
 
 	/* sm501 DRAM configuration */
-	sm501_reg = ioremap_nocache(SM107_REG_ADDR, SM501_DRAM_CONTROL);
+	sm501_reg = ioremap(SM107_REG_ADDR, SM501_DRAM_CONTROL);
 	if (!sm501_reg) {
 		printk(KERN_ERR "%s: ioremap error.\n", __func__);
 		return;
diff --git a/arch/sh/boards/mach-cayman/irq.c b/arch/sh/boards/mach-cayman/irq.c
index 9108789fafef..3b6ea2d99013 100644
--- a/arch/sh/boards/mach-cayman/irq.c
+++ b/arch/sh/boards/mach-cayman/irq.c
@@ -137,7 +137,7 @@ void init_cayman_irq(void)
 {
 	int i;
 
-	epld_virt = (unsigned long)ioremap_nocache(EPLD_BASE, 1024);
+	epld_virt = (unsigned long)ioremap(EPLD_BASE, 1024);
 	if (!epld_virt) {
 		printk(KERN_ERR "Cayman IRQ: Unable to remap EPLD\n");
 		return;
diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c
index 4cec14700adc..8ef76e288da0 100644
--- a/arch/sh/boards/mach-cayman/setup.c
+++ b/arch/sh/boards/mach-cayman/setup.c
@@ -99,7 +99,7 @@ static int __init smsc_superio_setup(void)
 {
 	unsigned char devid, devrev;
 
-	smsc_superio_virt = (unsigned long)ioremap_nocache(SMSC_SUPERIO_BASE, 1024);
+	smsc_superio_virt = (unsigned long)ioremap(SMSC_SUPERIO_BASE, 1024);
 	if (!smsc_superio_virt) {
 		panic("Unable to remap SMSC SuperIO\n");
 	}
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index acaa97459531..dd427bac5cde 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -371,20 +371,32 @@ static struct platform_device lcdc_device = {
 	},
 };
 
+static struct gpiod_lookup_table gpio_backlight_lookup = {
+	.dev_id		= "gpio-backlight.0",
+	.table = {
+		GPIO_LOOKUP("sh7724_pfc", GPIO_PTR1, NULL, GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
+
+static struct property_entry gpio_backlight_props[] = {
+	PROPERTY_ENTRY_BOOL("default-on"),
+	{ }
+};
+
 static struct gpio_backlight_platform_data gpio_backlight_data = {
 	.fbdev = &lcdc_device.dev,
-	.gpio = GPIO_PTR1,
-	.def_value = 1,
-	.name = "backlight",
 };
 
-static struct platform_device gpio_backlight_device = {
+static const struct platform_device_info gpio_backlight_device_info = {
 	.name = "gpio-backlight",
-	.dev = {
-		.platform_data = &gpio_backlight_data,
-	},
+	.data = &gpio_backlight_data,
+	.size_data = sizeof(gpio_backlight_data),
+	.properties = gpio_backlight_props,
 };
 
+static struct platform_device *gpio_backlight_device;
+
 /* CEU0 */
 static struct ceu_platform_data ceu0_pdata = {
 	.num_subdevs			= 2,
@@ -1006,7 +1018,6 @@ static struct platform_device *ecovec_devices[] __initdata = {
 	&usb1_common_device,
 	&usbhs_device,
 	&lcdc_device,
-	&gpio_backlight_device,
 	&keysc_device,
 	&cn12_power,
 #if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE)
@@ -1462,6 +1473,12 @@ static int __init arch_setup(void)
 #endif
 #endif
 
+	gpiod_add_lookup_table(&gpio_backlight_lookup);
+	gpio_backlight_device = platform_device_register_full(
+					&gpio_backlight_device_info);
+	if (IS_ERR(gpio_backlight_device))
+		return PTR_ERR(gpio_backlight_device);
+
 	return platform_add_devices(ecovec_devices,
 				    ARRAY_SIZE(ecovec_devices));
 }
diff --git a/arch/sh/boards/mach-sdk7786/fpga.c b/arch/sh/boards/mach-sdk7786/fpga.c
index 895576ff8376..a37e1e88c6b1 100644
--- a/arch/sh/boards/mach-sdk7786/fpga.c
+++ b/arch/sh/boards/mach-sdk7786/fpga.c
@@ -32,7 +32,7 @@ static void __iomem *sdk7786_fpga_probe(void)
 	 * is reserved.
 	 */
 	for (area = PA_AREA0; area < PA_AREA7; area += SZ_64M) {
-		base = ioremap_nocache(area + FPGA_REGS_OFFSET, FPGA_REGS_SIZE);
+		base = ioremap(area + FPGA_REGS_OFFSET, FPGA_REGS_SIZE);
 		if (!base) {
 			/* Failed to remap this area, move along. */
 			continue;
diff --git a/arch/sh/boards/mach-sdk7786/nmi.c b/arch/sh/boards/mach-sdk7786/nmi.c
index c2e09d798537..afba49679a12 100644
--- a/arch/sh/boards/mach-sdk7786/nmi.c
+++ b/arch/sh/boards/mach-sdk7786/nmi.c
@@ -37,7 +37,7 @@ static int __init nmi_mode_setup(char *str)
 		nmi_mode = NMI_MODE_ANY;
 	else {
 		nmi_mode = NMI_MODE_UNKNOWN;
-		pr_warning("Unknown NMI mode %s\n", str);
+		pr_warn("Unknown NMI mode %s\n", str);
 	}
 
 	printk("Set NMI mode to %d\n", nmi_mode);
diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c
index c15cac9251b9..e69ec12cbbe6 100644
--- a/arch/sh/boot/compressed/misc.c
+++ b/arch/sh/boot/compressed/misc.c
@@ -111,6 +111,11 @@ void __stack_chk_fail(void)
 	error("stack-protector: Kernel stack is corrupted\n");
 }
 
+/* Needed because vmlinux.lds.h references this */
+void ftrace_stub(void)
+{
+}
+
 #ifdef CONFIG_SUPERH64
 #define stackalign	8
 #else
diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig
index 2b0572b497c1..78643191c99e 100644
--- a/arch/sh/configs/rsk7264_defconfig
+++ b/arch/sh/configs/rsk7264_defconfig
@@ -8,7 +8,6 @@ CONFIG_NAMESPACES=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_COUNTERS=y
diff --git a/arch/sh/drivers/Makefile b/arch/sh/drivers/Makefile
index 3e93b434e604..56b0acace6e7 100644
--- a/arch/sh/drivers/Makefile
+++ b/arch/sh/drivers/Makefile
@@ -3,7 +3,7 @@
 # Makefile for the Linux SuperH-specific device drivers.
 #
 
-obj-y		+= dma/
+obj-y		+= dma/ platform_early.o
 
 obj-$(CONFIG_PCI)		+= pci/
 obj-$(CONFIG_SUPERHYWAY)	+= superhyway/
diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c
index cf2fcccca812..24391b444b28 100644
--- a/arch/sh/drivers/heartbeat.c
+++ b/arch/sh/drivers/heartbeat.c
@@ -96,7 +96,7 @@ static int heartbeat_drv_probe(struct platform_device *pdev)
 			return -ENOMEM;
 	}
 
-	hd->base = ioremap_nocache(res->start, resource_size(res));
+	hd->base = ioremap(res->start, resource_size(res));
 	if (unlikely(!hd->base)) {
 		dev_err(&pdev->dev, "ioremap failed\n");
 
diff --git a/arch/sh/drivers/pci/fixups-sdk7786.c b/arch/sh/drivers/pci/fixups-sdk7786.c
index 8cbfa5310a4b..6972af7b4e93 100644
--- a/arch/sh/drivers/pci/fixups-sdk7786.c
+++ b/arch/sh/drivers/pci/fixups-sdk7786.c
@@ -53,7 +53,7 @@ static int __init sdk7786_pci_init(void)
 
 		/* Warn about forced rerouting if slot#3 is occupied */
 		if ((data & PCIECR_PRST3) == 0) {
-			pr_warning("Unreachable card detected in slot#3\n");
+			pr_warn("Unreachable card detected in slot#3\n");
 			return -EBUSY;
 		}
 	} else
diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c
index 49303fab187b..03225d27770b 100644
--- a/arch/sh/drivers/pci/pci-sh5.c
+++ b/arch/sh/drivers/pci/pci-sh5.c
@@ -115,12 +115,12 @@ static int __init sh5pci_init(void)
                 return -EINVAL;
         }
 
-	pcicr_virt = (unsigned long)ioremap_nocache(SH5PCI_ICR_BASE, 1024);
+	pcicr_virt = (unsigned long)ioremap(SH5PCI_ICR_BASE, 1024);
 	if (!pcicr_virt) {
 		panic("Unable to remap PCICR\n");
 	}
 
-	PCI_IO_AREA = (unsigned long)ioremap_nocache(SH5PCI_IO_BASE, 0x10000);
+	PCI_IO_AREA = (unsigned long)ioremap(SH5PCI_IO_BASE, 0x10000);
 	if (!PCI_IO_AREA) {
 		panic("Unable to remap PCIIO\n");
 	}
diff --git a/arch/sh/drivers/platform_early.c b/arch/sh/drivers/platform_early.c
new file mode 100644
index 000000000000..f3dc3f25b3ff
--- /dev/null
+++ b/arch/sh/drivers/platform_early.c
@@ -0,0 +1,340 @@
+// SPDX--License-Identifier: GPL-2.0
+
+#include <asm/platform_early.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pm.h>
+
+static __initdata LIST_HEAD(sh_early_platform_driver_list);
+static __initdata LIST_HEAD(sh_early_platform_device_list);
+
+static const struct platform_device_id *
+platform_match_id(const struct platform_device_id *id,
+		  struct platform_device *pdev)
+{
+	while (id->name[0]) {
+		if (strcmp(pdev->name, id->name) == 0) {
+			pdev->id_entry = id;
+			return id;
+		}
+		id++;
+	}
+	return NULL;
+}
+
+static int platform_match(struct device *dev, struct device_driver *drv)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct platform_driver *pdrv = to_platform_driver(drv);
+
+	/* When driver_override is set, only bind to the matching driver */
+	if (pdev->driver_override)
+		return !strcmp(pdev->driver_override, drv->name);
+
+	/* Then try to match against the id table */
+	if (pdrv->id_table)
+		return platform_match_id(pdrv->id_table, pdev) != NULL;
+
+	/* fall-back to driver name match */
+	return (strcmp(pdev->name, drv->name) == 0);
+}
+
+#ifdef CONFIG_PM
+static void device_pm_init_common(struct device *dev)
+{
+	if (!dev->power.early_init) {
+		spin_lock_init(&dev->power.lock);
+		dev->power.qos = NULL;
+		dev->power.early_init = true;
+	}
+}
+
+static void pm_runtime_early_init(struct device *dev)
+{
+	dev->power.disable_depth = 1;
+	device_pm_init_common(dev);
+}
+#else
+static void pm_runtime_early_init(struct device *dev) {}
+#endif
+
+/**
+ * sh_early_platform_driver_register - register early platform driver
+ * @epdrv: sh_early_platform driver structure
+ * @buf: string passed from early_param()
+ *
+ * Helper function for sh_early_platform_init() / sh_early_platform_init_buffer()
+ */
+int __init sh_early_platform_driver_register(struct sh_early_platform_driver *epdrv,
+					  char *buf)
+{
+	char *tmp;
+	int n;
+
+	/* Simply add the driver to the end of the global list.
+	 * Drivers will by default be put on the list in compiled-in order.
+	 */
+	if (!epdrv->list.next) {
+		INIT_LIST_HEAD(&epdrv->list);
+		list_add_tail(&epdrv->list, &sh_early_platform_driver_list);
+	}
+
+	/* If the user has specified device then make sure the driver
+	 * gets prioritized. The driver of the last device specified on
+	 * command line will be put first on the list.
+	 */
+	n = strlen(epdrv->pdrv->driver.name);
+	if (buf && !strncmp(buf, epdrv->pdrv->driver.name, n)) {
+		list_move(&epdrv->list, &sh_early_platform_driver_list);
+
+		/* Allow passing parameters after device name */
+		if (buf[n] == '\0' || buf[n] == ',')
+			epdrv->requested_id = -1;
+		else {
+			epdrv->requested_id = simple_strtoul(&buf[n + 1],
+							     &tmp, 10);
+
+			if (buf[n] != '.' || (tmp == &buf[n + 1])) {
+				epdrv->requested_id = EARLY_PLATFORM_ID_ERROR;
+				n = 0;
+			} else
+				n += strcspn(&buf[n + 1], ",") + 1;
+		}
+
+		if (buf[n] == ',')
+			n++;
+
+		if (epdrv->bufsize) {
+			memcpy(epdrv->buffer, &buf[n],
+			       min_t(int, epdrv->bufsize, strlen(&buf[n]) + 1));
+			epdrv->buffer[epdrv->bufsize - 1] = '\0';
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * sh_early_platform_add_devices - adds a number of early platform devices
+ * @devs: array of early platform devices to add
+ * @num: number of early platform devices in array
+ *
+ * Used by early architecture code to register early platform devices and
+ * their platform data.
+ */
+void __init sh_early_platform_add_devices(struct platform_device **devs, int num)
+{
+	struct device *dev;
+	int i;
+
+	/* simply add the devices to list */
+	for (i = 0; i < num; i++) {
+		dev = &devs[i]->dev;
+
+		if (!dev->devres_head.next) {
+			pm_runtime_early_init(dev);
+			INIT_LIST_HEAD(&dev->devres_head);
+			list_add_tail(&dev->devres_head,
+				      &sh_early_platform_device_list);
+		}
+	}
+}
+
+/**
+ * sh_early_platform_driver_register_all - register early platform drivers
+ * @class_str: string to identify early platform driver class
+ *
+ * Used by architecture code to register all early platform drivers
+ * for a certain class. If omitted then only early platform drivers
+ * with matching kernel command line class parameters will be registered.
+ */
+void __init sh_early_platform_driver_register_all(char *class_str)
+{
+	/* The "class_str" parameter may or may not be present on the kernel
+	 * command line. If it is present then there may be more than one
+	 * matching parameter.
+	 *
+	 * Since we register our early platform drivers using early_param()
+	 * we need to make sure that they also get registered in the case
+	 * when the parameter is missing from the kernel command line.
+	 *
+	 * We use parse_early_options() to make sure the early_param() gets
+	 * called at least once. The early_param() may be called more than
+	 * once since the name of the preferred device may be specified on
+	 * the kernel command line. sh_early_platform_driver_register() handles
+	 * this case for us.
+	 */
+	parse_early_options(class_str);
+}
+
+/**
+ * sh_early_platform_match - find early platform device matching driver
+ * @epdrv: early platform driver structure
+ * @id: id to match against
+ */
+static struct platform_device * __init
+sh_early_platform_match(struct sh_early_platform_driver *epdrv, int id)
+{
+	struct platform_device *pd;
+
+	list_for_each_entry(pd, &sh_early_platform_device_list, dev.devres_head)
+		if (platform_match(&pd->dev, &epdrv->pdrv->driver))
+			if (pd->id == id)
+				return pd;
+
+	return NULL;
+}
+
+/**
+ * sh_early_platform_left - check if early platform driver has matching devices
+ * @epdrv: early platform driver structure
+ * @id: return true if id or above exists
+ */
+static int __init sh_early_platform_left(struct sh_early_platform_driver *epdrv,
+				       int id)
+{
+	struct platform_device *pd;
+
+	list_for_each_entry(pd, &sh_early_platform_device_list, dev.devres_head)
+		if (platform_match(&pd->dev, &epdrv->pdrv->driver))
+			if (pd->id >= id)
+				return 1;
+
+	return 0;
+}
+
+/**
+ * sh_early_platform_driver_probe_id - probe drivers matching class_str and id
+ * @class_str: string to identify early platform driver class
+ * @id: id to match against
+ * @nr_probe: number of platform devices to successfully probe before exiting
+ */
+static int __init sh_early_platform_driver_probe_id(char *class_str,
+						 int id,
+						 int nr_probe)
+{
+	struct sh_early_platform_driver *epdrv;
+	struct platform_device *match;
+	int match_id;
+	int n = 0;
+	int left = 0;
+
+	list_for_each_entry(epdrv, &sh_early_platform_driver_list, list) {
+		/* only use drivers matching our class_str */
+		if (strcmp(class_str, epdrv->class_str))
+			continue;
+
+		if (id == -2) {
+			match_id = epdrv->requested_id;
+			left = 1;
+
+		} else {
+			match_id = id;
+			left += sh_early_platform_left(epdrv, id);
+
+			/* skip requested id */
+			switch (epdrv->requested_id) {
+			case EARLY_PLATFORM_ID_ERROR:
+			case EARLY_PLATFORM_ID_UNSET:
+				break;
+			default:
+				if (epdrv->requested_id == id)
+					match_id = EARLY_PLATFORM_ID_UNSET;
+			}
+		}
+
+		switch (match_id) {
+		case EARLY_PLATFORM_ID_ERROR:
+			pr_warn("%s: unable to parse %s parameter\n",
+				class_str, epdrv->pdrv->driver.name);
+			/* fall-through */
+		case EARLY_PLATFORM_ID_UNSET:
+			match = NULL;
+			break;
+		default:
+			match = sh_early_platform_match(epdrv, match_id);
+		}
+
+		if (match) {
+			/*
+			 * Set up a sensible init_name to enable
+			 * dev_name() and others to be used before the
+			 * rest of the driver core is initialized.
+			 */
+			if (!match->dev.init_name && slab_is_available()) {
+				if (match->id != -1)
+					match->dev.init_name =
+						kasprintf(GFP_KERNEL, "%s.%d",
+							  match->name,
+							  match->id);
+				else
+					match->dev.init_name =
+						kasprintf(GFP_KERNEL, "%s",
+							  match->name);
+
+				if (!match->dev.init_name)
+					return -ENOMEM;
+			}
+
+			if (epdrv->pdrv->probe(match))
+				pr_warn("%s: unable to probe %s early.\n",
+					class_str, match->name);
+			else
+				n++;
+		}
+
+		if (n >= nr_probe)
+			break;
+	}
+
+	if (left)
+		return n;
+	else
+		return -ENODEV;
+}
+
+/**
+ * sh_early_platform_driver_probe - probe a class of registered drivers
+ * @class_str: string to identify early platform driver class
+ * @nr_probe: number of platform devices to successfully probe before exiting
+ * @user_only: only probe user specified early platform devices
+ *
+ * Used by architecture code to probe registered early platform drivers
+ * within a certain class. For probe to happen a registered early platform
+ * device matching a registered early platform driver is needed.
+ */
+int __init sh_early_platform_driver_probe(char *class_str,
+				       int nr_probe,
+				       int user_only)
+{
+	int k, n, i;
+
+	n = 0;
+	for (i = -2; n < nr_probe; i++) {
+		k = sh_early_platform_driver_probe_id(class_str, i, nr_probe - n);
+
+		if (k < 0)
+			break;
+
+		n += k;
+
+		if (user_only)
+			break;
+	}
+
+	return n;
+}
+
+/**
+ * early_platform_cleanup - clean up early platform code
+ */
+void __init early_platform_cleanup(void)
+{
+	struct platform_device *pd, *pd2;
+
+	/* clean up the devres list used to chain devices */
+	list_for_each_entry_safe(pd, pd2, &sh_early_platform_device_list,
+				 dev.devres_head) {
+		list_del(&pd->dev.devres_head);
+		memset(&pd->dev.devres_head, 0, sizeof(pd->dev.devres_head));
+	}
+}
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index ac0561960c52..39c9ead489e5 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -267,7 +267,7 @@ unsigned long long poke_real_address_q(unsigned long long addr,
 #ifdef CONFIG_MMU
 void __iomem *__ioremap_caller(phys_addr_t offset, unsigned long size,
 			       pgprot_t prot, void *caller);
-void __iounmap(void __iomem *addr);
+void iounmap(void __iomem *addr);
 
 static inline void __iomem *
 __ioremap(phys_addr_t offset, unsigned long size, pgprot_t prot)
@@ -328,7 +328,7 @@ __ioremap_mode(phys_addr_t offset, unsigned long size, pgprot_t prot)
 #else
 #define __ioremap(offset, size, prot)		((void __iomem *)(offset))
 #define __ioremap_mode(offset, size, prot)	((void __iomem *)(offset))
-#define __iounmap(addr)				do { } while (0)
+#define iounmap(addr)				do { } while (0)
 #endif /* CONFIG_MMU */
 
 static inline void __iomem *ioremap(phys_addr_t offset, unsigned long size)
@@ -367,14 +367,8 @@ static inline void ioremap_fixed_init(void) { }
 static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; }
 #endif
 
-#define ioremap_nocache	ioremap
 #define ioremap_uc	ioremap
 
-static inline void iounmap(void __iomem *addr)
-{
-	__iounmap(addr);
-}
-
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
  * access
diff --git a/arch/sh/include/asm/platform_early.h b/arch/sh/include/asm/platform_early.h
new file mode 100644
index 000000000000..fc802137c37d
--- /dev/null
+++ b/arch/sh/include/asm/platform_early.h
@@ -0,0 +1,61 @@
+/* SPDX--License-Identifier: GPL-2.0 */
+
+#ifndef __PLATFORM_EARLY__
+#define __PLATFORM_EARLY__
+
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+struct sh_early_platform_driver {
+	const char *class_str;
+	struct platform_driver *pdrv;
+	struct list_head list;
+	int requested_id;
+	char *buffer;
+	int bufsize;
+};
+
+#define EARLY_PLATFORM_ID_UNSET -2
+#define EARLY_PLATFORM_ID_ERROR -3
+
+extern int sh_early_platform_driver_register(struct sh_early_platform_driver *epdrv,
+					  char *buf);
+extern void sh_early_platform_add_devices(struct platform_device **devs, int num);
+
+static inline int is_sh_early_platform_device(struct platform_device *pdev)
+{
+	return !pdev->dev.driver;
+}
+
+extern void sh_early_platform_driver_register_all(char *class_str);
+extern int sh_early_platform_driver_probe(char *class_str,
+				       int nr_probe, int user_only);
+
+#define sh_early_platform_init(class_string, platdrv)		\
+	sh_early_platform_init_buffer(class_string, platdrv, NULL, 0)
+
+#ifndef MODULE
+#define sh_early_platform_init_buffer(class_string, platdrv, buf, bufsiz)	\
+static __initdata struct sh_early_platform_driver early_driver = {		\
+	.class_str = class_string,					\
+	.buffer = buf,							\
+	.bufsize = bufsiz,						\
+	.pdrv = platdrv,						\
+	.requested_id = EARLY_PLATFORM_ID_UNSET,			\
+};									\
+static int __init sh_early_platform_driver_setup_func(char *buffer)	\
+{									\
+	return sh_early_platform_driver_register(&early_driver, buffer);	\
+}									\
+early_param(class_string, sh_early_platform_driver_setup_func)
+#else /* MODULE */
+#define sh_early_platform_init_buffer(class_string, platdrv, buf, bufsiz)	\
+static inline char *sh_early_platform_driver_setup_func(void)		\
+{									\
+	return bufsiz ? buf : NULL;					\
+}
+#endif /* MODULE */
+
+#endif /* __PLATFORM_EARLY__ */
diff --git a/arch/sh/include/asm/vmalloc.h b/arch/sh/include/asm/vmalloc.h
new file mode 100644
index 000000000000..716b77472646
--- /dev/null
+++ b/arch/sh/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_SH_VMALLOC_H
+#define _ASM_SH_VMALLOC_H
+
+#endif /* _ASM_SH_VMALLOC_H */
diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7264.h b/arch/sh/include/cpu-sh2a/cpu/sh7264.h
index d12c19186845..8a1338aaf10a 100644
--- a/arch/sh/include/cpu-sh2a/cpu/sh7264.h
+++ b/arch/sh/include/cpu-sh2a/cpu/sh7264.h
@@ -112,12 +112,6 @@ enum {
 	GPIO_FN_TIOC0D, GPIO_FN_TIOC0C, GPIO_FN_TIOC0B, GPIO_FN_TIOC0A,
 	GPIO_FN_TCLKD, GPIO_FN_TCLKC, GPIO_FN_TCLKB, GPIO_FN_TCLKA,
 
-	/* SSU */
-	GPIO_FN_SCS0_PD, GPIO_FN_SSO0_PD, GPIO_FN_SSI0_PD, GPIO_FN_SSCK0_PD,
-	GPIO_FN_SCS0_PF, GPIO_FN_SSO0_PF, GPIO_FN_SSI0_PF, GPIO_FN_SSCK0_PF,
-	GPIO_FN_SCS1_PD, GPIO_FN_SSO1_PD, GPIO_FN_SSI1_PD, GPIO_FN_SSCK1_PD,
-	GPIO_FN_SCS1_PF, GPIO_FN_SSO1_PF, GPIO_FN_SSI1_PF, GPIO_FN_SSCK1_PF,
-
 	/* SCIF */
 	GPIO_FN_SCK0, GPIO_FN_SCK1, GPIO_FN_SCK2, GPIO_FN_SCK3,
 	GPIO_FN_RXD0, GPIO_FN_RXD1, GPIO_FN_RXD2, GPIO_FN_RXD3,
diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h
index d516e5d48818..fece521c74b3 100644
--- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h
+++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h
@@ -78,8 +78,15 @@ enum {
 	GPIO_FN_WDTOVF,
 
 	/* CAN */
-	GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1,
-	GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2,
+	GPIO_FN_CTX2, GPIO_FN_CRX2,
+	GPIO_FN_CTX1, GPIO_FN_CRX1,
+	GPIO_FN_CTX0, GPIO_FN_CRX0,
+	GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1,
+	GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2,
+	GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20,
+	GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22,
+	GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22,
+	GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20,
 
 	/* DMAC */
 	GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0,
@@ -119,12 +126,6 @@ enum {
 	GPIO_FN_TIOC0D, GPIO_FN_TIOC0C, GPIO_FN_TIOC0B, GPIO_FN_TIOC0A,
 	GPIO_FN_TCLKD, GPIO_FN_TCLKC, GPIO_FN_TCLKB, GPIO_FN_TCLKA,
 
-	/* SSU */
-	GPIO_FN_SCS0_PD, GPIO_FN_SSO0_PD, GPIO_FN_SSI0_PD, GPIO_FN_SSCK0_PD,
-	GPIO_FN_SCS0_PF, GPIO_FN_SSO0_PF, GPIO_FN_SSI0_PF, GPIO_FN_SSCK0_PF,
-	GPIO_FN_SCS1_PD, GPIO_FN_SSO1_PD, GPIO_FN_SSI1_PD, GPIO_FN_SSCK1_PD,
-	GPIO_FN_SCS1_PF, GPIO_FN_SSO1_PF, GPIO_FN_SSI1_PF, GPIO_FN_SSCK1_PF,
-
 	/* SCIF */
 	GPIO_FN_SCK0, GPIO_FN_RXD0, GPIO_FN_TXD0,
 	GPIO_FN_SCK1, GPIO_FN_RXD1, GPIO_FN_TXD1, GPIO_FN_RTS1, GPIO_FN_CTS1,
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7734.h b/arch/sh/include/cpu-sh4/cpu/sh7734.h
index 96f0246ad2f2..82b63208135a 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7734.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7734.h
@@ -134,7 +134,7 @@ enum {
 	GPIO_FN_EX_WAIT1, GPIO_FN_SD1_DAT0_A, GPIO_FN_DREQ2, GPIO_FN_CAN1_TX_C,
 		GPIO_FN_ET0_LINK_C, GPIO_FN_ET0_ETXD5_A,
 	GPIO_FN_EX_WAIT0, GPIO_FN_TCLK1_B,
-	GPIO_FN_RD_WR, GPIO_FN_TCLK0,
+	GPIO_FN_RD_WR, GPIO_FN_TCLK0, GPIO_FN_CAN_CLK_B, GPIO_FN_ET0_ETXD4,
 	GPIO_FN_EX_CS5, GPIO_FN_SD1_CMD_A, GPIO_FN_ATADIR, GPIO_FN_QSSL_B,
 		GPIO_FN_ET0_ETXD3_A,
 	GPIO_FN_EX_CS4, GPIO_FN_SD1_WP_A, GPIO_FN_ATAWR, GPIO_FN_QMI_QIO1_B,
diff --git a/arch/sh/include/uapi/asm/sockios.h b/arch/sh/include/uapi/asm/sockios.h
index ef18a668456d..3da561453260 100644
--- a/arch/sh/include/uapi/asm/sockios.h
+++ b/arch/sh/include/uapi/asm/sockios.h
@@ -10,7 +10,7 @@
 #define SIOCSPGRP	_IOW('s', 8, pid_t)
 #define SIOCGPGRP	_IOR('s', 9, pid_t)
 
-#define SIOCGSTAMP_OLD	_IOR('s', 100, struct timeval) /* Get stamp (timeval) */
-#define SIOCGSTAMPNS_OLD _IOR('s', 101, struct timespec) /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD	_IOR('s', 100, struct __kernel_old_timeval) /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD _IOR('s', 101, struct __kernel_old_timespec) /* Get stamp (timespec) */
 
 #endif /* __ASM_SH_SOCKIOS_H */
diff --git a/arch/sh/kernel/cpu/irq/intc-sh5.c b/arch/sh/kernel/cpu/irq/intc-sh5.c
index 744f903b4df3..1b3050facda8 100644
--- a/arch/sh/kernel/cpu/irq/intc-sh5.c
+++ b/arch/sh/kernel/cpu/irq/intc-sh5.c
@@ -124,7 +124,7 @@ void __init plat_irq_setup(void)
 	unsigned long reg;
 	int i;
 
-	intc_virt = (unsigned long)ioremap_nocache(INTC_BASE, 1024);
+	intc_virt = (unsigned long)ioremap(INTC_BASE, 1024);
 	if (!intc_virt) {
 		panic("Unable to remap INTC\n");
 	}
diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index f5b6841ef7e1..b1c877b6a420 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -12,6 +12,7 @@
 #include <linux/sh_eth.h>
 #include <linux/sh_timer.h>
 #include <linux/io.h>
+#include <asm/platform_early.h>
 
 enum {
 	UNUSED = 0,
@@ -199,6 +200,6 @@ void __init plat_early_device_setup(void)
 	/* enable CMT clock */
 	__raw_writeb(__raw_readb(STBCR3) & ~0x10, STBCR3);
 
-	early_platform_add_devices(sh7619_early_devices,
+	sh_early_platform_add_devices(sh7619_early_devices,
 				   ARRAY_SIZE(sh7619_early_devices));
 }
diff --git a/arch/sh/kernel/cpu/sh2/smp-j2.c b/arch/sh/kernel/cpu/sh2/smp-j2.c
index ae44dc24c455..d0d5d81455ae 100644
--- a/arch/sh/kernel/cpu/sh2/smp-j2.c
+++ b/arch/sh/kernel/cpu/sh2/smp-j2.c
@@ -88,8 +88,8 @@ static void j2_start_cpu(unsigned int cpu, unsigned long entry_point)
 	if (!np) return;
 
 	if (of_property_read_u32_array(np, "cpu-release-addr", regs, 2)) return;
-	release = ioremap_nocache(regs[0], sizeof(u32));
-	initpc = ioremap_nocache(regs[1], sizeof(u32));
+	release = ioremap(regs[0], sizeof(u32));
+	initpc = ioremap(regs[1], sizeof(u32));
 
 	__raw_writel(entry_point, initpc);
 	__raw_writel(1, release);
diff --git a/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
index 52350ad0b0a2..cefa07924c16 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-mxg.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
@@ -9,6 +9,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/sh_timer.h>
+#include <asm/platform_early.h>
 
 enum {
 	UNUSED = 0,
@@ -169,6 +170,6 @@ static struct platform_device *mxg_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(mxg_early_devices,
+	sh_early_platform_add_devices(mxg_early_devices,
 				   ARRAY_SIZE(mxg_early_devices));
 }
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
index b51ed761ae08..28f1bebf3405 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
@@ -11,6 +11,7 @@
 #include <linux/serial_sci.h>
 #include <linux/sh_timer.h>
 #include <linux/io.h>
+#include <asm/platform_early.h>
 
 enum {
 	UNUSED = 0,
@@ -412,6 +413,6 @@ void __init plat_early_device_setup(void)
 	/* enable MTU2 clock */
 	__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
 
-	early_platform_add_devices(sh7201_early_devices,
+	sh_early_platform_add_devices(sh7201_early_devices,
 				   ARRAY_SIZE(sh7201_early_devices));
 }
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index 89b3e49fc250..4839f3aaeb4c 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -10,6 +10,7 @@
 #include <linux/serial_sci.h>
 #include <linux/sh_timer.h>
 #include <linux/io.h>
+#include <asm/platform_early.h>
 
 enum {
 	UNUSED = 0,
@@ -349,6 +350,6 @@ void __init plat_early_device_setup(void)
 	/* enable MTU2 clock */
 	__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
 
-	early_platform_add_devices(sh7203_early_devices,
+	sh_early_platform_add_devices(sh7203_early_devices,
 				   ARRAY_SIZE(sh7203_early_devices));
 }
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index 36ff3a3139da..68add5af4cc5 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -11,6 +11,7 @@
 #include <linux/serial_sci.h>
 #include <linux/sh_timer.h>
 #include <linux/io.h>
+#include <asm/platform_early.h>
 
 enum {
 	UNUSED = 0,
@@ -285,6 +286,6 @@ void __init plat_early_device_setup(void)
 	/* enable MTU2 clock */
 	__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
 
-	early_platform_add_devices(sh7206_early_devices,
+	sh_early_platform_add_devices(sh7206_early_devices,
 				   ARRAY_SIZE(sh7206_early_devices));
 }
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7264.c b/arch/sh/kernel/cpu/sh2a/setup-sh7264.c
index d199618d877c..8a1cb613dd2e 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7264.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7264.c
@@ -11,6 +11,7 @@
 #include <linux/usb/r8a66597.h>
 #include <linux/sh_timer.h>
 #include <linux/io.h>
+#include <asm/platform_early.h>
 
 enum {
 	UNUSED = 0,
@@ -546,6 +547,6 @@ static struct platform_device *sh7264_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7264_early_devices,
+	sh_early_platform_add_devices(sh7264_early_devices,
 				   ARRAY_SIZE(sh7264_early_devices));
 }
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7269.c b/arch/sh/kernel/cpu/sh2a/setup-sh7269.c
index 9095c960b455..8b1ef3028320 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7269.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7269.c
@@ -12,6 +12,7 @@
 #include <linux/usb/r8a66597.h>
 #include <linux/sh_timer.h>
 #include <linux/io.h>
+#include <asm/platform_early.h>
 
 enum {
 	UNUSED = 0,
@@ -562,6 +563,6 @@ static struct platform_device *sh7269_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7269_early_devices,
+	sh_early_platform_add_devices(sh7269_early_devices,
 				   ARRAY_SIZE(sh7269_early_devices));
 }
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh3.c b/arch/sh/kernel/cpu/sh3/setup-sh3.c
index 8058c01cf09d..cf2a3f09fee4 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh3.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh3.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <asm/platform_early.h>
 
 /* All SH3 devices are equipped with IRQ0->5 (except sh7708) */
 
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
index e19d1ce7b6ad..0544134b3f20 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7705.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
@@ -14,6 +14,7 @@
 #include <linux/sh_intc.h>
 #include <asm/rtc.h>
 #include <cpu/serial.h>
+#include <asm/platform_early.h>
 
 enum {
 	UNUSED = 0,
@@ -178,7 +179,7 @@ static struct platform_device *sh7705_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7705_early_devices,
+	sh_early_platform_add_devices(sh7705_early_devices,
 				   ARRAY_SIZE(sh7705_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
index 5c5144bee6bc..4947f57748bc 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
@@ -18,6 +18,7 @@
 #include <linux/sh_timer.h>
 #include <linux/sh_intc.h>
 #include <cpu/serial.h>
+#include <asm/platform_early.h>
 
 enum {
 	UNUSED = 0,
@@ -230,7 +231,7 @@ static struct platform_device *sh770x_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh770x_early_devices,
+	sh_early_platform_add_devices(sh770x_early_devices,
 				   ARRAY_SIZE(sh770x_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
index 4776e2495738..381910761579 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7710.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
@@ -13,6 +13,7 @@
 #include <linux/sh_timer.h>
 #include <linux/sh_intc.h>
 #include <asm/rtc.h>
+#include <asm/platform_early.h>
 
 enum {
 	UNUSED = 0,
@@ -177,7 +178,7 @@ static struct platform_device *sh7710_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7710_early_devices,
+	sh_early_platform_add_devices(sh7710_early_devices,
 				   ARRAY_SIZE(sh7710_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
index 1d4c34e7b7db..425d067dae9b 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
@@ -19,6 +19,7 @@
 #include <linux/sh_intc.h>
 #include <linux/usb/ohci_pdriver.h>
 #include <asm/rtc.h>
+#include <asm/platform_early.h>
 #include <cpu/serial.h>
 
 static struct resource rtc_resources[] = {
@@ -211,7 +212,7 @@ static struct platform_device *sh7720_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7720_early_devices,
+	sh_early_platform_add_devices(sh7720_early_devices,
 				   ARRAY_SIZE(sh7720_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
index a40ef35d101a..e6737f3d0df2 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
@@ -12,6 +12,7 @@
 #include <linux/sh_timer.h>
 #include <linux/sh_intc.h>
 #include <linux/io.h>
+#include <asm/platform_early.h>
 
 static struct plat_sci_port scif0_platform_data = {
 	.scscr		= SCSCR_REIE,
@@ -76,7 +77,7 @@ static struct platform_device *sh4202_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh4202_early_devices,
+	sh_early_platform_add_devices(sh4202_early_devices,
 				   ARRAY_SIZE(sh4202_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index b37bda66a532..19c8f1d69071 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -13,6 +13,7 @@
 #include <linux/sh_intc.h>
 #include <linux/serial_sci.h>
 #include <generated/machtypes.h>
+#include <asm/platform_early.h>
 
 static struct resource rtc_resources[] = {
 	[0] = {
@@ -161,15 +162,15 @@ void __init plat_early_device_setup(void)
 	if (mach_is_rts7751r2d()) {
 		scif_platform_data.scscr |= SCSCR_CKE1;
 		dev[0] = &scif_device;
-		early_platform_add_devices(dev, 1);
+		sh_early_platform_add_devices(dev, 1);
 	} else {
 		dev[0] = &sci_device;
-		early_platform_add_devices(dev, 1);
+		sh_early_platform_add_devices(dev, 1);
 		dev[0] = &scif_device;
-		early_platform_add_devices(dev, 1);
+		sh_early_platform_add_devices(dev, 1);
 	}
 
-	early_platform_add_devices(sh7750_early_devices,
+	sh_early_platform_add_devices(sh7750_early_devices,
 				   ARRAY_SIZE(sh7750_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index 86845da85997..14212f5d803c 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -11,6 +11,7 @@
 #include <linux/sh_intc.h>
 #include <linux/serial_sci.h>
 #include <linux/io.h>
+#include <asm/platform_early.h>
 
 enum {
 	UNUSED = 0,
@@ -271,7 +272,7 @@ static struct platform_device *sh7760_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7760_early_devices,
+	sh_early_platform_add_devices(sh7760_early_devices,
 				   ARRAY_SIZE(sh7760_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index a15e25690b5f..b6015188fab1 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -12,6 +12,7 @@
 #include <linux/sh_timer.h>
 #include <linux/sh_intc.h>
 #include <asm/clock.h>
+#include <asm/platform_early.h>
 
 /* Serial */
 static struct plat_sci_port scif0_platform_data = {
@@ -296,7 +297,7 @@ static struct platform_device *sh7343_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7343_early_devices,
+	sh_early_platform_add_devices(sh7343_early_devices,
 				   ARRAY_SIZE(sh7343_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index 7bd2776441ba..6676beef053e 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -15,6 +15,7 @@
 #include <linux/sh_intc.h>
 #include <linux/usb/r8a66597.h>
 #include <asm/clock.h>
+#include <asm/platform_early.h>
 
 static struct plat_sci_port scif0_platform_data = {
 	.scscr		= SCSCR_REIE,
@@ -240,7 +241,7 @@ static struct platform_device *sh7366_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7366_early_devices,
+	sh_early_platform_add_devices(sh7366_early_devices,
 				   ARRAY_SIZE(sh7366_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index 1ce65f88f060..0c6757ef63f4 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -18,6 +18,7 @@
 #include <asm/clock.h>
 #include <asm/mmzone.h>
 #include <asm/siu.h>
+#include <asm/platform_early.h>
 
 #include <cpu/dma-register.h>
 #include <cpu/sh7722.h>
@@ -512,7 +513,7 @@ static struct platform_device *sh7722_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7722_early_devices,
+	sh_early_platform_add_devices(sh7722_early_devices,
 				   ARRAY_SIZE(sh7722_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index edb649950662..83ae1ad4a86e 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -16,6 +16,7 @@
 #include <linux/io.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
+#include <asm/platform_early.h>
 #include <cpu/sh7723.h>
 
 /* Serial */
@@ -410,7 +411,7 @@ static struct platform_device *sh7723_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7723_early_devices,
+	sh_early_platform_add_devices(sh7723_early_devices,
 				   ARRAY_SIZE(sh7723_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 3e9825031d3d..0d990ab1ba2a 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -24,6 +24,7 @@
 #include <asm/suspend.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
+#include <asm/platform_early.h>
 
 #include <cpu/dma-register.h>
 #include <cpu/sh7724.h>
@@ -830,7 +831,7 @@ static struct platform_device *sh7724_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7724_early_devices,
+	sh_early_platform_add_devices(sh7724_early_devices,
 				   ARRAY_SIZE(sh7724_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7734.c b/arch/sh/kernel/cpu/sh4a/setup-sh7734.c
index 06a91569697a..9911da794358 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7734.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7734.c
@@ -18,6 +18,7 @@
 #include <linux/io.h>
 #include <asm/clock.h>
 #include <asm/irq.h>
+#include <asm/platform_early.h>
 #include <cpu/sh7734.h>
 
 /* SCIF */
@@ -280,7 +281,7 @@ static struct platform_device *sh7734_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7734_early_devices,
+	sh_early_platform_add_devices(sh7734_early_devices,
 		ARRAY_SIZE(sh7734_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7757.c b/arch/sh/kernel/cpu/sh4a/setup-sh7757.c
index 2501ce656511..67e330b7ea46 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7757.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7757.c
@@ -19,6 +19,7 @@
 #include <linux/usb/ohci_pdriver.h>
 #include <cpu/dma-register.h>
 #include <cpu/sh7757.h>
+#include <asm/platform_early.h>
 
 static struct plat_sci_port scif2_platform_data = {
 	.scscr		= SCSCR_REIE,
@@ -767,7 +768,7 @@ static struct platform_device *sh7757_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7757_early_devices,
+	sh_early_platform_add_devices(sh7757_early_devices,
 				   ARRAY_SIZE(sh7757_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
index 419c5efe4a17..b0608664785f 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
@@ -14,6 +14,7 @@
 #include <linux/io.h>
 #include <linux/serial_sci.h>
 #include <linux/usb/ohci_pdriver.h>
+#include <asm/platform_early.h>
 
 static struct plat_sci_port scif0_platform_data = {
 	.scscr		= SCSCR_REIE,
@@ -221,7 +222,7 @@ static struct platform_device *sh7763_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7763_early_devices,
+	sh_early_platform_add_devices(sh7763_early_devices,
 				   ARRAY_SIZE(sh7763_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
index 5fb4cf9b58c6..5efec6ceb04d 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
@@ -11,6 +11,7 @@
 #include <linux/sh_timer.h>
 #include <linux/sh_intc.h>
 #include <linux/io.h>
+#include <asm/platform_early.h>
 
 static struct plat_sci_port scif0_platform_data = {
 	.scscr		= SCSCR_REIE | SCSCR_TOIE,
@@ -316,7 +317,7 @@ static struct platform_device *sh7770_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7770_early_devices,
+	sh_early_platform_add_devices(sh7770_early_devices,
 				   ARRAY_SIZE(sh7770_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
index ab7d6b715865..c818b788ecb0 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
@@ -13,6 +13,7 @@
 #include <linux/sh_timer.h>
 #include <linux/sh_intc.h>
 #include <cpu/dma-register.h>
+#include <asm/platform_early.h>
 
 static struct plat_sci_port scif0_platform_data = {
 	.scscr		= SCSCR_REIE | SCSCR_CKE1,
@@ -285,7 +286,7 @@ void __init plat_early_device_setup(void)
 		scif1_platform_data.scscr &= ~SCSCR_CKE1;
 	}
 
-	early_platform_add_devices(sh7780_early_devices,
+	sh_early_platform_add_devices(sh7780_early_devices,
 				   ARRAY_SIZE(sh7780_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index a438da47285d..3b4a414d60a9 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -14,6 +14,7 @@
 #include <linux/sh_timer.h>
 #include <linux/sh_intc.h>
 #include <asm/mmzone.h>
+#include <asm/platform_early.h>
 #include <cpu/dma-register.h>
 
 static struct plat_sci_port scif0_platform_data = {
@@ -353,7 +354,7 @@ static struct platform_device *sh7785_early_devices[] __initdata = {
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7785_early_devices,
+	sh_early_platform_add_devices(sh7785_early_devices,
 				   ARRAY_SIZE(sh7785_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index d894165a0ef6..4b0db8259e3d 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -23,6 +23,7 @@
 #include <linux/usb/ohci_pdriver.h>
 #include <cpu/dma-register.h>
 #include <asm/mmzone.h>
+#include <asm/platform_early.h>
 
 static struct plat_sci_port scif0_platform_data = {
 	.scscr		= SCSCR_REIE | SCSCR_CKE1,
@@ -834,6 +835,6 @@ arch_initcall(sh7786_devices_setup);
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh7786_early_devices,
+	sh_early_platform_add_devices(sh7786_early_devices,
 				   ARRAY_SIZE(sh7786_early_devices));
 }
diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index 14aa4552bc45..7014d6d199b3 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -14,6 +14,7 @@
 #include <linux/sh_intc.h>
 #include <cpu/shx3.h>
 #include <asm/mmzone.h>
+#include <asm/platform_early.h>
 
 /*
  * This intentionally only registers SCIF ports 0, 1, and 3. SCIF 2
@@ -152,7 +153,7 @@ arch_initcall(shx3_devices_setup);
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(shx3_early_devices,
+	sh_early_platform_add_devices(shx3_early_devices,
 				   ARRAY_SIZE(shx3_early_devices));
 }
 
diff --git a/arch/sh/kernel/cpu/sh5/clock-sh5.c b/arch/sh/kernel/cpu/sh5/clock-sh5.c
index 43763c26a752..dee6be2c2344 100644
--- a/arch/sh/kernel/cpu/sh5/clock-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/clock-sh5.c
@@ -68,7 +68,7 @@ static struct sh_clk_ops *sh5_clk_ops[] = {
 
 void __init arch_init_clk_ops(struct sh_clk_ops **ops, int idx)
 {
-	cprc_base = (unsigned long)ioremap_nocache(CPRC_BASE, 1024);
+	cprc_base = (unsigned long)ioremap(CPRC_BASE, 1024);
 	BUG_ON(!cprc_base);
 
 	if (idx < ARRAY_SIZE(sh5_clk_ops))
diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S
index de68ffdfffbf..81c8b64b977f 100644
--- a/arch/sh/kernel/cpu/sh5/entry.S
+++ b/arch/sh/kernel/cpu/sh5/entry.S
@@ -86,7 +86,7 @@
 	andi	r6, ~0xf0, r6;		\
 	putcon	r6, SR;
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 #  define preempt_stop()	CLI()
 #else
 #  define preempt_stop()
@@ -884,7 +884,7 @@ ret_from_exception:
 
 	/* Check softirqs */
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	pta   ret_from_syscall, tr0
 	blink   tr0, ZERO
 
diff --git a/arch/sh/kernel/cpu/sh5/setup-sh5.c b/arch/sh/kernel/cpu/sh5/setup-sh5.c
index 41c1673afc0b..dc8476d67244 100644
--- a/arch/sh/kernel/cpu/sh5/setup-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/setup-sh5.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/sh_timer.h>
 #include <asm/addrspace.h>
+#include <asm/platform_early.h>
 
 static struct plat_sci_port scif0_platform_data = {
 	.flags		= UPF_IOREMAP,
@@ -115,6 +116,6 @@ arch_initcall(sh5_devices_setup);
 
 void __init plat_early_device_setup(void)
 {
-	early_platform_add_devices(sh5_early_devices,
+	sh_early_platform_add_devices(sh5_early_devices,
 				   ARRAY_SIZE(sh5_early_devices));
 }
diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c
index dbd2cdec2ddb..b0f9c8f8fd14 100644
--- a/arch/sh/kernel/cpu/shmobile/cpuidle.c
+++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c
@@ -67,7 +67,7 @@ static struct cpuidle_driver cpuidle_driver = {
 			.enter = cpuidle_sleep_enter,
 			.name = "C2",
 			.desc = "SuperH Sleep Mode [SF]",
-			.disabled = true,
+			.flags = CPUIDLE_FLAG_UNUSABLE,
 		},
 		{
 			.exit_latency = 2300,
@@ -76,7 +76,7 @@ static struct cpuidle_driver cpuidle_driver = {
 			.enter = cpuidle_sleep_enter,
 			.name = "C3",
 			.desc = "SuperH Mobile Standby Mode [SF]",
-			.disabled = true,
+			.flags = CPUIDLE_FLAG_UNUSABLE,
 		},
 	},
 	.safe_state_index = 0,
@@ -86,10 +86,10 @@ static struct cpuidle_driver cpuidle_driver = {
 int __init sh_mobile_setup_cpuidle(void)
 {
 	if (sh_mobile_sleep_supported & SUSP_SH_SF)
-		cpuidle_driver.states[1].disabled = false;
+		cpuidle_driver.states[1].flags = CPUIDLE_FLAG_NONE;
 
 	if (sh_mobile_sleep_supported & SUSP_SH_STANDBY)
-		cpuidle_driver.states[2].disabled = false;
+		cpuidle_driver.states[2].flags = CPUIDLE_FLAG_NONE;
 
 	return cpuidle_register(&cpuidle_driver, NULL);
 }
diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c
index b17514619b7e..d4811691b93c 100644
--- a/arch/sh/kernel/dma-coherent.c
+++ b/arch/sh/kernel/dma-coherent.c
@@ -25,10 +25,10 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	 * Pages from the page allocator may have data present in
 	 * cache. So flush the cache before using uncached memory.
 	 */
-	arch_sync_dma_for_device(dev, virt_to_phys(ret), size,
+	arch_sync_dma_for_device(virt_to_phys(ret), size,
 			DMA_BIDIRECTIONAL);
 
-	ret_nocache = (void __force *)ioremap_nocache(virt_to_phys(ret), size);
+	ret_nocache = (void __force *)ioremap(virt_to_phys(ret), size);
 	if (!ret_nocache) {
 		free_pages((unsigned long)ret, order);
 		return NULL;
@@ -59,8 +59,8 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 	iounmap(vaddr);
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	void *addr = sh_cacheop_vaddr(phys_to_virt(paddr));
 
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index d31f66e82ce5..956a7a03b0c8 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -41,7 +41,7 @@
  */
 #include <asm/dwarf.h>
 
-#if defined(CONFIG_PREEMPT)
+#if defined(CONFIG_PREEMPTION)
 #  define preempt_stop()	cli ; TRACE_IRQS_OFF
 #else
 #  define preempt_stop()
@@ -84,7 +84,7 @@ ENTRY(ret_from_irq)
 	get_current_thread_info r8, r0
 	bt	resume_kernel	! Yes, it's from kernel, go back soon
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	bra	resume_userspace
 	 nop
 ENTRY(resume_kernel)
diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c
index bacad6da4fe4..60c828a2b8a2 100644
--- a/arch/sh/kernel/io_trapped.c
+++ b/arch/sh/kernel/io_trapped.c
@@ -99,7 +99,7 @@ int register_trapped_io(struct trapped_io *tiop)
 
 	return 0;
  bad:
-	pr_warning("unable to install trapped io filter\n");
+	pr_warn("unable to install trapped io filter\n");
 	return -1;
 }
 EXPORT_SYMBOL_GPL(register_trapped_io);
diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c
index 6d61f8cf4c13..0d5f3c9d52f3 100644
--- a/arch/sh/kernel/kgdb.c
+++ b/arch/sh/kernel/kgdb.c
@@ -266,6 +266,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
 		ptr = &remcomInBuffer[1];
 		if (kgdb_hex2long(&ptr, &addr))
 			linux_regs->pc = addr;
+		/* fallthrough */
 	case 'D':
 	case 'k':
 		atomic_set(&kgdb_cpu_doing_single_step, -1);
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 2c0e0f37a318..67f5a3b44c2e 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -44,6 +44,7 @@
 #include <asm/mmu_context.h>
 #include <asm/mmzone.h>
 #include <asm/sparsemem.h>
+#include <asm/platform_early.h>
 
 /*
  * Initialize loops_per_jiffy as 10000000 (1000MIPS).
@@ -328,7 +329,7 @@ void __init setup_arch(char **cmdline_p)
 	sh_mv_setup();
 
 	/* Let earlyprintk output early console messages */
-	early_platform_driver_probe("earlyprintk", 1, 1);
+	sh_early_platform_driver_probe("earlyprintk", 1, 1);
 
 #ifdef CONFIG_OF_FLATTREE
 #ifdef CONFIG_USE_BUILTIN_DTB
@@ -340,10 +341,6 @@ void __init setup_arch(char **cmdline_p)
 
 	paging_init();
 
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-
 	/* Perform the machine specific initialisation */
 	if (likely(sh_mv.mv_setup))
 		sh_mv.mv_setup(cmdline_p);
@@ -354,7 +351,7 @@ void __init setup_arch(char **cmdline_p)
 /* processor boot mode configuration */
 int generic_mode_pins(void)
 {
-	pr_warning("generic_mode_pins(): missing mode pin configuration\n");
+	pr_warn("generic_mode_pins(): missing mode pin configuration\n");
 	return 0;
 }
 
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index b5ed26c4c005..c7a30fcd135f 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -438,3 +438,5 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 # 435 reserved for clone3
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index e16b2cd269a3..821a09cbd605 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -18,6 +18,7 @@
 #include <linux/rtc.h>
 #include <asm/clock.h>
 #include <asm/rtc.h>
+#include <asm/platform_early.h>
 
 static void __init sh_late_time_init(void)
 {
@@ -30,8 +31,8 @@ static void __init sh_late_time_init(void)
 	 * clocksource and the jiffies clocksource is used transparently
 	 * instead. No error handling is necessary here.
 	 */
-	early_platform_driver_register_all("earlytimer");
-	early_platform_driver_probe("earlytimer", 2, 0);
+	sh_early_platform_driver_register_all("earlytimer");
+	sh_early_platform_driver_probe("earlytimer", 2, 0);
 }
 
 void __init time_init(void)
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 77a59d8c6b4d..c60b19958c35 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -48,11 +48,10 @@ SECTIONS
 	} = 0x0009
 
 	EXCEPTION_TABLE(16)
-	NOTES
 
 	_sdata = .;
 	RO_DATA(PAGE_SIZE)
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 	_edata = .;
 
 	DWARF_EH_FRAME
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 792f36129062..3169a343a5ab 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -43,8 +43,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
 
 	r = pdev->resource + pdev->num_resources - 1;
 	if (r->flags) {
-		pr_warning("%s: unable to find empty space for resource\n",
-			name);
+		pr_warn("%s: unable to find empty space for resource\n", name);
 		return -EINVAL;
 	}
 
@@ -54,7 +53,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
 
 	buf = dma_alloc_coherent(&pdev->dev, memsize, &dma_handle, GFP_KERNEL);
 	if (!buf) {
-		pr_warning("%s: unable to allocate memory\n", name);
+		pr_warn("%s: unable to allocate memory\n", name);
 		return -ENOMEM;
 	}
 
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index dfdbaa50946e..d1b1ff2be17a 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct zone *zone;
 
-	zone = page_zone(pfn_to_page(start_pfn));
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
index d09ddfe58fd8..f6d02246d665 100644
--- a/arch/sh/mm/ioremap.c
+++ b/arch/sh/mm/ioremap.c
@@ -103,7 +103,7 @@ static inline int iomapping_nontranslatable(unsigned long offset)
 	return 0;
 }
 
-void __iounmap(void __iomem *addr)
+void iounmap(void __iomem *addr)
 {
 	unsigned long vaddr = (unsigned long __force)addr;
 	struct vm_struct *p;
@@ -134,4 +134,4 @@ void __iounmap(void __iomem *addr)
 
 	kfree(p);
 }
-EXPORT_SYMBOL(__iounmap);
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index eb24cb1afc11..e8c3ea01c12f 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -277,7 +277,7 @@ config US3_MC
 config GENERIC_LOCKBREAK
 	bool
 	default y
-	depends on SPARC64 && SMP && PREEMPT
+	depends on SPARC64 && SMP && PREEMPTION
 
 config NUMA
 	bool "NUMA support"
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index 7b946b3dee9d..e3d2138ff9e2 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -24,6 +24,7 @@
 #include <linux/types.h>
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
 
 #include <asm/fpumacro.h>
 #include <asm/pstate.h>
@@ -168,7 +169,6 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		       unsigned int key_len)
 {
 	struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
 
 	switch (key_len) {
 	case AES_KEYSIZE_128:
@@ -187,7 +187,6 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		break;
 
 	default:
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
 
@@ -197,6 +196,12 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
+static int aes_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *in_key,
+				unsigned int key_len)
+{
+	return aes_set_key(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
 static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -211,131 +216,108 @@ static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
 }
 
-#define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE-1))
-
-static int ecb_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	ctx->ops->load_encrypt_keys(&ctx->key[0]);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			ctx->ops->ecb_encrypt(&ctx->key[0],
-					      (const u64 *)walk.src.virt.addr,
-					      (u64 *) walk.dst.virt.addr,
-					      block_len);
-		}
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		ctx->ops->ecb_encrypt(&ctx->key[0], walk.src.virt.addr,
+				      walk.dst.virt.addr,
+				      round_down(nbytes, AES_BLOCK_SIZE));
+		err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	u64 *key_end;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	const u64 *key_end;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	ctx->ops->load_decrypt_keys(&ctx->key[0]);
 	key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			ctx->ops->ecb_decrypt(key_end,
-					      (const u64 *) walk.src.virt.addr,
-					      (u64 *) walk.dst.virt.addr, block_len);
-		}
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		ctx->ops->ecb_decrypt(key_end, walk.src.virt.addr,
+				      walk.dst.virt.addr,
+				      round_down(nbytes, AES_BLOCK_SIZE));
+		err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 
 	return err;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	ctx->ops->load_encrypt_keys(&ctx->key[0]);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			ctx->ops->cbc_encrypt(&ctx->key[0],
-					      (const u64 *)walk.src.virt.addr,
-					      (u64 *) walk.dst.virt.addr,
-					      block_len, (u64 *) walk.iv);
-		}
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		ctx->ops->cbc_encrypt(&ctx->key[0], walk.src.virt.addr,
+				      walk.dst.virt.addr,
+				      round_down(nbytes, AES_BLOCK_SIZE),
+				      walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	u64 *key_end;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	const u64 *key_end;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	ctx->ops->load_decrypt_keys(&ctx->key[0]);
 	key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			ctx->ops->cbc_decrypt(key_end,
-					      (const u64 *) walk.src.virt.addr,
-					      (u64 *) walk.dst.virt.addr,
-					      block_len, (u64 *) walk.iv);
-		}
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		ctx->ops->cbc_decrypt(key_end, walk.src.virt.addr,
+				      walk.dst.virt.addr,
+				      round_down(nbytes, AES_BLOCK_SIZE),
+				      walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 
 	return err;
 }
 
-static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
-			    struct blkcipher_walk *walk)
+static void ctr_crypt_final(const struct crypto_sparc64_aes_ctx *ctx,
+			    struct skcipher_walk *walk)
 {
 	u8 *ctrblk = walk->iv;
 	u64 keystream[AES_BLOCK_SIZE / sizeof(u64)];
@@ -349,40 +331,35 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc,
-		     struct scatterlist *dst, struct scatterlist *src,
-		     unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	ctx->ops->load_encrypt_keys(&ctx->key[0]);
 	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
-		unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			ctx->ops->ctr_crypt(&ctx->key[0],
-					    (const u64 *)walk.src.virt.addr,
-					    (u64 *) walk.dst.virt.addr,
-					    block_len, (u64 *) walk.iv);
-		}
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		ctx->ops->ctr_crypt(&ctx->key[0], walk.src.virt.addr,
+				    walk.dst.virt.addr,
+				    round_down(nbytes, AES_BLOCK_SIZE),
+				    walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
 	}
 	if (walk.nbytes) {
 		ctr_crypt_final(ctx, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = skcipher_walk_done(&walk, 0);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static struct crypto_alg algs[] = { {
+static struct crypto_alg cipher_alg = {
 	.cra_name		= "aes",
 	.cra_driver_name	= "aes-sparc64",
 	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
@@ -400,66 +377,53 @@ static struct crypto_alg algs[] = { {
 			.cia_decrypt		= crypto_aes_decrypt
 		}
 	}
-}, {
-	.cra_name		= "ecb(aes)",
-	.cra_driver_name	= "ecb-aes-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "cbc-aes-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "ctr-aes-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-} };
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(aes)",
+		.base.cra_driver_name	= "ecb-aes-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct crypto_sparc64_aes_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.setkey			= aes_set_key_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(aes)",
+		.base.cra_driver_name	= "cbc-aes-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct crypto_sparc64_aes_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= aes_set_key_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(aes)",
+		.base.cra_driver_name	= "ctr-aes-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct crypto_sparc64_aes_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= aes_set_key_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+		.chunksize		= AES_BLOCK_SIZE,
+	}
+};
 
 static bool __init sparc64_has_aes_opcode(void)
 {
@@ -477,17 +441,27 @@ static bool __init sparc64_has_aes_opcode(void)
 
 static int __init aes_sparc64_mod_init(void)
 {
-	if (sparc64_has_aes_opcode()) {
-		pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
-		return crypto_register_algs(algs, ARRAY_SIZE(algs));
+	int err;
+
+	if (!sparc64_has_aes_opcode()) {
+		pr_info("sparc64 aes opcodes not available.\n");
+		return -ENODEV;
 	}
-	pr_info("sparc64 aes opcodes not available.\n");
-	return -ENODEV;
+	pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
+	err = crypto_register_alg(&cipher_alg);
+	if (err)
+		return err;
+	err = crypto_register_skciphers(skcipher_algs,
+					ARRAY_SIZE(skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&cipher_alg);
+	return err;
 }
 
 static void __exit aes_sparc64_mod_fini(void)
 {
-	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+	crypto_unregister_alg(&cipher_alg);
+	crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
 }
 
 module_init(aes_sparc64_mod_init);
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
index 3823f9491a72..aaa9714378e6 100644
--- a/arch/sparc/crypto/camellia_glue.c
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 
 #include <asm/fpumacro.h>
 #include <asm/pstate.h>
@@ -38,12 +39,9 @@ static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key,
 {
 	struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
 	const u32 *in_key = (const u32 *) _in_key;
-	u32 *flags = &tfm->crt_flags;
 
-	if (key_len != 16 && key_len != 24 && key_len != 32) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	if (key_len != 16 && key_len != 24 && key_len != 32)
 		return -EINVAL;
-	}
 
 	ctx->key_len = key_len;
 
@@ -52,6 +50,12 @@ static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key,
 	return 0;
 }
 
+static int camellia_set_key_skcipher(struct crypto_skcipher *tfm,
+				     const u8 *in_key, unsigned int key_len)
+{
+	return camellia_set_key(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
 extern void camellia_sparc64_crypt(const u64 *key, const u32 *input,
 				   u32 *output, unsigned int key_len);
 
@@ -81,61 +85,46 @@ typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len,
 extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds;
 extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds;
 
-#define CAMELLIA_BLOCK_MASK	(~(CAMELLIA_BLOCK_SIZE - 1))
-
-static int __ecb_crypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes, bool encrypt)
+static int __ecb_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	ecb_crypt_op *op;
 	const u64 *key;
+	unsigned int nbytes;
 	int err;
 
 	op = camellia_sparc64_ecb_crypt_3_grand_rounds;
 	if (ctx->key_len != 16)
 		op = camellia_sparc64_ecb_crypt_4_grand_rounds;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	if (encrypt)
 		key = &ctx->encrypt_key[0];
 	else
 		key = &ctx->decrypt_key[0];
 	camellia_sparc64_load_keys(key, ctx->key_len);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64;
-			u64 *dst64;
-
-			src64 = (const u64 *)walk.src.virt.addr;
-			dst64 = (u64 *) walk.dst.virt.addr;
-			op(src64, dst64, block_len, key);
-		}
-		nbytes &= CAMELLIA_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		op(walk.src.virt.addr, walk.dst.virt.addr,
+		   round_down(nbytes, CAMELLIA_BLOCK_SIZE), key);
+		err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return __ecb_crypt(desc, dst, src, nbytes, true);
+	return __ecb_crypt(req, true);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return __ecb_crypt(desc, dst, src, nbytes, false);
+	return __ecb_crypt(req, false);
 }
 
 typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len,
@@ -146,85 +135,65 @@ extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds;
 extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds;
 extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds;
 
-static int cbc_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	cbc_crypt_op *op;
 	const u64 *key;
+	unsigned int nbytes;
 	int err;
 
 	op = camellia_sparc64_cbc_encrypt_3_grand_rounds;
 	if (ctx->key_len != 16)
 		op = camellia_sparc64_cbc_encrypt_4_grand_rounds;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	key = &ctx->encrypt_key[0];
 	camellia_sparc64_load_keys(key, ctx->key_len);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64;
-			u64 *dst64;
-
-			src64 = (const u64 *)walk.src.virt.addr;
-			dst64 = (u64 *) walk.dst.virt.addr;
-			op(src64, dst64, block_len, key,
-			   (u64 *) walk.iv);
-		}
-		nbytes &= CAMELLIA_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		op(walk.src.virt.addr, walk.dst.virt.addr,
+		   round_down(nbytes, CAMELLIA_BLOCK_SIZE), key, walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	cbc_crypt_op *op;
 	const u64 *key;
+	unsigned int nbytes;
 	int err;
 
 	op = camellia_sparc64_cbc_decrypt_3_grand_rounds;
 	if (ctx->key_len != 16)
 		op = camellia_sparc64_cbc_decrypt_4_grand_rounds;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	key = &ctx->decrypt_key[0];
 	camellia_sparc64_load_keys(key, ctx->key_len);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64;
-			u64 *dst64;
-
-			src64 = (const u64 *)walk.src.virt.addr;
-			dst64 = (u64 *) walk.dst.virt.addr;
-			op(src64, dst64, block_len, key,
-			   (u64 *) walk.iv);
-		}
-		nbytes &= CAMELLIA_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		op(walk.src.virt.addr, walk.dst.virt.addr,
+		   round_down(nbytes, CAMELLIA_BLOCK_SIZE), key, walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static struct crypto_alg algs[] = { {
+static struct crypto_alg cipher_alg = {
 	.cra_name		= "camellia",
 	.cra_driver_name	= "camellia-sparc64",
 	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
@@ -242,46 +211,37 @@ static struct crypto_alg algs[] = { {
 			.cia_decrypt		= camellia_decrypt
 		}
 	}
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_set_key,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_set_key,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(camellia)",
+		.base.cra_driver_name	= "ecb-camellia-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.setkey			= camellia_set_key_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(camellia)",
+		.base.cra_driver_name	= "cbc-camellia-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_set_key_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}
 };
 
 static bool __init sparc64_has_camellia_opcode(void)
@@ -300,17 +260,27 @@ static bool __init sparc64_has_camellia_opcode(void)
 
 static int __init camellia_sparc64_mod_init(void)
 {
-	if (sparc64_has_camellia_opcode()) {
-		pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
-		return crypto_register_algs(algs, ARRAY_SIZE(algs));
+	int err;
+
+	if (!sparc64_has_camellia_opcode()) {
+		pr_info("sparc64 camellia opcodes not available.\n");
+		return -ENODEV;
 	}
-	pr_info("sparc64 camellia opcodes not available.\n");
-	return -ENODEV;
+	pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
+	err = crypto_register_alg(&cipher_alg);
+	if (err)
+		return err;
+	err = crypto_register_skciphers(skcipher_algs,
+					ARRAY_SIZE(skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&cipher_alg);
+	return err;
 }
 
 static void __exit camellia_sparc64_mod_fini(void)
 {
-	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+	crypto_unregister_alg(&cipher_alg);
+	crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
 }
 
 module_init(camellia_sparc64_mod_init);
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c
index 1299073285a3..4e9323229e71 100644
--- a/arch/sparc/crypto/crc32c_glue.c
+++ b/arch/sparc/crypto/crc32c_glue.c
@@ -33,10 +33,8 @@ static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key,
 {
 	u32 *mctx = crypto_shash_ctx(hash);
 
-	if (keylen != sizeof(u32)) {
-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != sizeof(u32))
 		return -EINVAL;
-	}
 	*(__le32 *)mctx = le32_to_cpup((__le32 *)key);
 	return 0;
 }
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index db6010b4e52e..a499102bf706 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
 
 #include <asm/fpumacro.h>
 #include <asm/pstate.h>
@@ -61,6 +62,12 @@ static int des_set_key(struct crypto_tfm *tfm, const u8 *key,
 	return 0;
 }
 
+static int des_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				unsigned int keylen)
+{
+	return des_set_key(crypto_skcipher_tfm(tfm), key, keylen);
+}
+
 extern void des_sparc64_crypt(const u64 *key, const u64 *input,
 			      u64 *output);
 
@@ -85,113 +92,90 @@ extern void des_sparc64_load_keys(const u64 *key);
 extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output,
 				  unsigned int len);
 
-#define DES_BLOCK_MASK	(~(DES_BLOCK_SIZE - 1))
-
-static int __ecb_crypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes, bool encrypt)
+static int __ecb_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct des_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	if (encrypt)
 		des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
 	else
 		des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr,
-					      (u64 *) walk.dst.virt.addr,
-					      block_len);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		des_sparc64_ecb_crypt(walk.src.virt.addr, walk.dst.virt.addr,
+				      round_down(nbytes, DES_BLOCK_SIZE));
+		err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return __ecb_crypt(desc, dst, src, nbytes, true);
+	return __ecb_crypt(req, true);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return __ecb_crypt(desc, dst, src, nbytes, false);
+	return __ecb_crypt(req, false);
 }
 
 extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output,
 				    unsigned int len, u64 *iv);
 
-static int cbc_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
+				    unsigned int len, u64 *iv);
+
+static int __cbc_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct des_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
-		if (likely(block_len)) {
-			des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr,
-						(u64 *) walk.dst.virt.addr,
-						block_len, (u64 *) walk.iv);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	if (encrypt)
+		des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
+	else
+		des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
+	while ((nbytes = walk.nbytes) != 0) {
+		if (encrypt)
+			des_sparc64_cbc_encrypt(walk.src.virt.addr,
+						walk.dst.virt.addr,
+						round_down(nbytes,
+							   DES_BLOCK_SIZE),
+						walk.iv);
+		else
+			des_sparc64_cbc_decrypt(walk.src.virt.addr,
+						walk.dst.virt.addr,
+						round_down(nbytes,
+							   DES_BLOCK_SIZE),
+						walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
-				    unsigned int len, u64 *iv);
-
-static int cbc_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
+	return __cbc_crypt(req, true);
+}
 
-		if (likely(block_len)) {
-			des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr,
-						(u64 *) walk.dst.virt.addr,
-						block_len, (u64 *) walk.iv);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-	fprs_write(0);
-	return err;
+static int cbc_decrypt(struct skcipher_request *req)
+{
+	return __cbc_crypt(req, false);
 }
 
 static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
@@ -227,6 +211,12 @@ static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
 	return 0;
 }
 
+static int des3_ede_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				     unsigned int keylen)
+{
+	return des3_ede_set_key(crypto_skcipher_tfm(tfm), key, keylen);
+}
+
 extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input,
 				   u64 *output);
 
@@ -251,241 +241,196 @@ extern void des3_ede_sparc64_load_keys(const u64 *key);
 extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input,
 				       u64 *output, unsigned int len);
 
-static int __ecb3_crypt(struct blkcipher_desc *desc,
-			struct scatterlist *dst, struct scatterlist *src,
-			unsigned int nbytes, bool encrypt)
+static int __ecb3_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct des3_ede_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	const u64 *K;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	if (encrypt)
 		K = &ctx->encrypt_expkey[0];
 	else
 		K = &ctx->decrypt_expkey[0];
 	des3_ede_sparc64_load_keys(K);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64 = (const u64 *)walk.src.virt.addr;
-			des3_ede_sparc64_ecb_crypt(K, src64,
-						   (u64 *) walk.dst.virt.addr,
-						   block_len);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		des3_ede_sparc64_ecb_crypt(K, walk.src.virt.addr,
+					   walk.dst.virt.addr,
+					   round_down(nbytes, DES_BLOCK_SIZE));
+		err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int ecb3_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb3_encrypt(struct skcipher_request *req)
 {
-	return __ecb3_crypt(desc, dst, src, nbytes, true);
+	return __ecb3_crypt(req, true);
 }
 
-static int ecb3_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb3_decrypt(struct skcipher_request *req)
 {
-	return __ecb3_crypt(desc, dst, src, nbytes, false);
+	return __ecb3_crypt(req, false);
 }
 
 extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input,
 					 u64 *output, unsigned int len,
 					 u64 *iv);
 
-static int cbc3_encrypt(struct blkcipher_desc *desc,
-			struct scatterlist *dst, struct scatterlist *src,
-			unsigned int nbytes)
-{
-	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	const u64 *K;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	K = &ctx->encrypt_expkey[0];
-	des3_ede_sparc64_load_keys(K);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64 = (const u64 *)walk.src.virt.addr;
-			des3_ede_sparc64_cbc_encrypt(K, src64,
-						     (u64 *) walk.dst.virt.addr,
-						     block_len,
-						     (u64 *) walk.iv);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-	fprs_write(0);
-	return err;
-}
-
 extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input,
 					 u64 *output, unsigned int len,
 					 u64 *iv);
 
-static int cbc3_decrypt(struct blkcipher_desc *desc,
-			struct scatterlist *dst, struct scatterlist *src,
-			unsigned int nbytes)
+static int __cbc3_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct des3_ede_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	const u64 *K;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
-	K = &ctx->decrypt_expkey[0];
+	if (encrypt)
+		K = &ctx->encrypt_expkey[0];
+	else
+		K = &ctx->decrypt_expkey[0];
 	des3_ede_sparc64_load_keys(K);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64 = (const u64 *)walk.src.virt.addr;
-			des3_ede_sparc64_cbc_decrypt(K, src64,
-						     (u64 *) walk.dst.virt.addr,
-						     block_len,
-						     (u64 *) walk.iv);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		if (encrypt)
+			des3_ede_sparc64_cbc_encrypt(K, walk.src.virt.addr,
+						     walk.dst.virt.addr,
+						     round_down(nbytes,
+								DES_BLOCK_SIZE),
+						     walk.iv);
+		else
+			des3_ede_sparc64_cbc_decrypt(K, walk.src.virt.addr,
+						     walk.dst.virt.addr,
+						     round_down(nbytes,
+								DES_BLOCK_SIZE),
+						     walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static struct crypto_alg algs[] = { {
-	.cra_name		= "des",
-	.cra_driver_name	= "des-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_module		= THIS_MODULE,
-	.cra_u	= {
-		.cipher	= {
-			.cia_min_keysize	= DES_KEY_SIZE,
-			.cia_max_keysize	= DES_KEY_SIZE,
-			.cia_setkey		= des_set_key,
-			.cia_encrypt		= sparc_des_encrypt,
-			.cia_decrypt		= sparc_des_decrypt
+static int cbc3_encrypt(struct skcipher_request *req)
+{
+	return __cbc3_crypt(req, true);
+}
+
+static int cbc3_decrypt(struct skcipher_request *req)
+{
+	return __cbc3_crypt(req, false);
+}
+
+static struct crypto_alg cipher_algs[] = {
+	{
+		.cra_name		= "des",
+		.cra_driver_name	= "des-sparc64",
+		.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+		.cra_blocksize		= DES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct des_sparc64_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
+		.cra_u	= {
+			.cipher	= {
+				.cia_min_keysize	= DES_KEY_SIZE,
+				.cia_max_keysize	= DES_KEY_SIZE,
+				.cia_setkey		= des_set_key,
+				.cia_encrypt		= sparc_des_encrypt,
+				.cia_decrypt		= sparc_des_decrypt
+			}
 		}
-	}
-}, {
-	.cra_name		= "ecb(des)",
-	.cra_driver_name	= "ecb-des-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES_KEY_SIZE,
-			.max_keysize	= DES_KEY_SIZE,
-			.setkey		= des_set_key,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(des)",
-	.cra_driver_name	= "cbc-des-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES_KEY_SIZE,
-			.max_keysize	= DES_KEY_SIZE,
-			.ivsize		= DES_BLOCK_SIZE,
-			.setkey		= des_set_key,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "des3_ede",
-	.cra_driver_name	= "des3_ede-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_module		= THIS_MODULE,
-	.cra_u	= {
-		.cipher	= {
-			.cia_min_keysize	= DES3_EDE_KEY_SIZE,
-			.cia_max_keysize	= DES3_EDE_KEY_SIZE,
-			.cia_setkey		= des3_ede_set_key,
-			.cia_encrypt		= sparc_des3_ede_encrypt,
-			.cia_decrypt		= sparc_des3_ede_decrypt
+	}, {
+		.cra_name		= "des3_ede",
+		.cra_driver_name	= "des3_ede-sparc64",
+		.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+		.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
+		.cra_u	= {
+			.cipher	= {
+				.cia_min_keysize	= DES3_EDE_KEY_SIZE,
+				.cia_max_keysize	= DES3_EDE_KEY_SIZE,
+				.cia_setkey		= des3_ede_set_key,
+				.cia_encrypt		= sparc_des3_ede_encrypt,
+				.cia_decrypt		= sparc_des3_ede_decrypt
+			}
 		}
 	}
-}, {
-	.cra_name		= "ecb(des3_ede)",
-	.cra_driver_name	= "ecb-des3_ede-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.setkey		= des3_ede_set_key,
-			.encrypt	= ecb3_encrypt,
-			.decrypt	= ecb3_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(des3_ede)",
-	.cra_driver_name	= "cbc-des3_ede-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.ivsize		= DES3_EDE_BLOCK_SIZE,
-			.setkey		= des3_ede_set_key,
-			.encrypt	= cbc3_encrypt,
-			.decrypt	= cbc3_decrypt,
-		},
-	},
-} };
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(des)",
+		.base.cra_driver_name	= "ecb-des-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES_KEY_SIZE,
+		.max_keysize		= DES_KEY_SIZE,
+		.setkey			= des_set_key_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(des)",
+		.base.cra_driver_name	= "cbc-des-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES_KEY_SIZE,
+		.max_keysize		= DES_KEY_SIZE,
+		.ivsize			= DES_BLOCK_SIZE,
+		.setkey			= des_set_key_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ecb(des3_ede)",
+		.base.cra_driver_name	= "ecb-des3_ede-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.setkey			= des3_ede_set_key_skcipher,
+		.encrypt		= ecb3_encrypt,
+		.decrypt		= ecb3_decrypt,
+	}, {
+		.base.cra_name		= "cbc(des3_ede)",
+		.base.cra_driver_name	= "cbc-des3_ede-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES3_EDE_BLOCK_SIZE,
+		.setkey			= des3_ede_set_key_skcipher,
+		.encrypt		= cbc3_encrypt,
+		.decrypt		= cbc3_decrypt,
+	}
+};
 
 static bool __init sparc64_has_des_opcode(void)
 {
@@ -503,17 +448,27 @@ static bool __init sparc64_has_des_opcode(void)
 
 static int __init des_sparc64_mod_init(void)
 {
-	if (sparc64_has_des_opcode()) {
-		pr_info("Using sparc64 des opcodes optimized DES implementation\n");
-		return crypto_register_algs(algs, ARRAY_SIZE(algs));
+	int err;
+
+	if (!sparc64_has_des_opcode()) {
+		pr_info("sparc64 des opcodes not available.\n");
+		return -ENODEV;
 	}
-	pr_info("sparc64 des opcodes not available.\n");
-	return -ENODEV;
+	pr_info("Using sparc64 des opcodes optimized DES implementation\n");
+	err = crypto_register_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+	if (err)
+		return err;
+	err = crypto_register_skciphers(skcipher_algs,
+					ARRAY_SIZE(skcipher_algs));
+	if (err)
+		crypto_unregister_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+	return err;
 }
 
 static void __exit des_sparc64_mod_fini(void)
 {
-	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+	crypto_unregister_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+	crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
 }
 
 module_init(des_sparc64_mod_init);
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index b6212164847b..62de2eb2773d 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -18,7 +18,6 @@ generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += mmiowb.h
 generic-y += module.h
-generic-y += msi.h
 generic-y += preempt.h
 generic-y += serial.h
 generic-y += trace_clock.h
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 30b1763580b1..40a267b3bd52 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -125,23 +125,6 @@ typedef u32		compat_sigset_word;
 
 #define COMPAT_OFF_T_MAX	0x7fffffff
 
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
-	return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
-	return (u32)(unsigned long)uptr;
-}
-
 #ifdef CONFIG_COMPAT
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h
index df2dc1784673..9a52d9506f80 100644
--- a/arch/sparc/include/asm/io_32.h
+++ b/arch/sparc/include/asm/io_32.h
@@ -127,6 +127,7 @@ static inline void sbus_memcpy_toio(volatile void __iomem *dst,
  * Bus number may be embedded in the higher bits of the physical address.
  * This is why we have no bus number argument to ioremap().
  */
+void __iomem *ioremap(phys_addr_t offset, size_t size);
 void iounmap(volatile void __iomem *addr);
 /* Create a virtual mapping cookie for an IO port range */
 void __iomem *ioport_map(unsigned long port, unsigned int nr);
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index 688911051b44..9bb27e5c22f1 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -406,7 +406,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
 	return (void __iomem *)offset;
 }
 
-#define ioremap_nocache(X,Y)		ioremap((X),(Y))
+#define ioremap_uc(X,Y)			ioremap((X),(Y))
 #define ioremap_wc(X,Y)			ioremap((X),(Y))
 #define ioremap_wt(X,Y)			ioremap((X),(Y))
 
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 10538a4d1a1e..eae0c92ec422 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -26,14 +26,14 @@ static inline void free_pgd_fast(pgd_t *pgd)
 #define pgd_free(mm, pgd)	free_pgd_fast(pgd)
 #define pgd_alloc(mm)	get_pgd_fast()
 
-static inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
+static inline void pud_set(pud_t * pudp, pmd_t * pmdp)
 {
 	unsigned long pa = __nocache_pa(pmdp);
 
-	set_pte((pte_t *)pgdp, __pte((SRMMU_ET_PTD | (pa >> 4))));
+	set_pte((pte_t *)pudp, __pte((SRMMU_ET_PTD | (pa >> 4))));
 }
 
-#define pgd_populate(MM, PGD, PMD)      pgd_set(PGD, PMD)
+#define pud_populate(MM, PGD, PMD)      pud_set(PGD, PMD)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm,
 				   unsigned long address)
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 9d3e5cc95bbb..264e76ceccf6 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -16,12 +16,12 @@
 
 extern struct kmem_cache *pgtable_cache;
 
-static inline void __pgd_populate(pgd_t *pgd, pud_t *pud)
+static inline void __p4d_populate(p4d_t *p4d, pud_t *pud)
 {
-	pgd_set(pgd, pud);
+	p4d_set(p4d, pud);
 }
 
-#define pgd_populate(MM, PGD, PUD)	__pgd_populate(PGD, PUD)
+#define p4d_populate(MM, P4D, PUD)	__p4d_populate(P4D, PUD)
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 31da44826645..6d6f44c0cad9 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -12,7 +12,7 @@
 #include <linux/const.h>
 
 #ifndef __ASSEMBLY__
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
 
 #include <linux/spinlock.h>
 #include <linux/mm_types.h>
@@ -132,12 +132,12 @@ static inline struct page *pmd_page(pmd_t pmd)
 	return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4));
 }
 
-static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+static inline unsigned long pud_page_vaddr(pud_t pud)
 {
-	if (srmmu_device_memory(pgd_val(pgd))) {
+	if (srmmu_device_memory(pud_val(pud))) {
 		return ~0;
 	} else {
-		unsigned long v = pgd_val(pgd) & SRMMU_PTD_PMASK;
+		unsigned long v = pud_val(pud) & SRMMU_PTD_PMASK;
 		return (unsigned long)__nocache_va(v << 4);
 	}
 }
@@ -184,24 +184,24 @@ static inline void pmd_clear(pmd_t *pmdp)
 		set_pte((pte_t *)&pmdp->pmdv[i], __pte(0));
 }
 
-static inline int pgd_none(pgd_t pgd)          
+static inline int pud_none(pud_t pud)
 {
-	return !(pgd_val(pgd) & 0xFFFFFFF);
+	return !(pud_val(pud) & 0xFFFFFFF);
 }
 
-static inline int pgd_bad(pgd_t pgd)
+static inline int pud_bad(pud_t pud)
 {
-	return (pgd_val(pgd) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
+	return (pud_val(pud) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
 }
 
-static inline int pgd_present(pgd_t pgd)
+static inline int pud_present(pud_t pud)
 {
-	return ((pgd_val(pgd) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
+	return ((pud_val(pud) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
 }
 
-static inline void pgd_clear(pgd_t *pgdp)
+static inline void pud_clear(pud_t *pudp)
 {
-	set_pte((pte_t *)pgdp, __pte(0));
+	set_pte((pte_t *)pudp, __pte(0));
 }
 
 /*
@@ -319,9 +319,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
 /* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address)
+static inline pmd_t *pmd_offset(pud_t * dir, unsigned long address)
 {
-	return (pmd_t *) pgd_page_vaddr(*dir) +
+	return (pmd_t *) pud_page_vaddr(*dir) +
 		((address >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
 }
 
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 6ae8016ef4ec..34ff3b43afbb 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -13,7 +13,7 @@
  * the SpitFire page tables.
  */
 
-#include <asm-generic/5level-fixup.h>
+#include <asm-generic/pgtable-nop4d.h>
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <asm/types.h>
@@ -810,9 +810,9 @@ static inline int pmd_present(pmd_t pmd)
 
 #define pud_bad(pud)			(pud_val(pud) & ~PAGE_MASK)
 
-#define pgd_none(pgd)			(!pgd_val(pgd))
+#define p4d_none(p4d)			(!p4d_val(p4d))
 
-#define pgd_bad(pgd)			(pgd_val(pgd) & ~PAGE_MASK)
+#define p4d_bad(p4d)			(p4d_val(p4d) & ~PAGE_MASK)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void set_pmd_at(struct mm_struct *mm, unsigned long addr,
@@ -859,13 +859,13 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
 #define pmd_clear(pmdp)			(pmd_val(*(pmdp)) = 0UL)
 #define pud_present(pud)		(pud_val(pud) != 0U)
 #define pud_clear(pudp)			(pud_val(*(pudp)) = 0UL)
-#define pgd_page_vaddr(pgd)		\
-	((unsigned long) __va(pgd_val(pgd)))
-#define pgd_present(pgd)		(pgd_val(pgd) != 0U)
-#define pgd_clear(pgdp)			(pgd_val(*(pgdp)) = 0UL)
+#define p4d_page_vaddr(p4d)		\
+	((unsigned long) __va(p4d_val(p4d)))
+#define p4d_present(p4d)		(p4d_val(p4d) != 0U)
+#define p4d_clear(p4dp)			(p4d_val(*(p4dp)) = 0UL)
 
 /* only used by the stubbed out hugetlb gup code, should never be called */
-#define pgd_page(pgd)			NULL
+#define p4d_page(p4d)			NULL
 
 static inline unsigned long pud_large(pud_t pud)
 {
@@ -884,8 +884,8 @@ static inline unsigned long pud_pfn(pud_t pud)
 /* Same in both SUN4V and SUN4U.  */
 #define pte_none(pte) 			(!pte_val(pte))
 
-#define pgd_set(pgdp, pudp)	\
-	(pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp))))
+#define p4d_set(p4dp, pudp)	\
+	(p4d_val(*(p4dp)) = (__pa((unsigned long) (pudp))))
 
 /* to find an entry in a page-table-directory. */
 #define pgd_index(address)	(((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
@@ -896,8 +896,8 @@ static inline unsigned long pud_pfn(pud_t pud)
 
 /* Find an entry in the third-level page table.. */
 #define pud_index(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-#define pud_offset(pgdp, address)	\
-	((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address))
+#define pud_offset(p4dp, address)	\
+	((pud_t *) p4d_page_vaddr(*(p4dp)) + pud_index(address))
 
 /* Find an entry in the second-level page table.. */
 #define pmd_offset(pudp, address)	\
diff --git a/arch/sparc/include/asm/vmalloc.h b/arch/sparc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..04b8ab9518b8
--- /dev/null
+++ b/arch/sparc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_SPARC_VMALLOC_H
+#define _ASM_SPARC_VMALLOC_H
+
+#endif /* _ASM_SPARC_VMALLOC_H */
diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h
index 9d0d125500e2..0ea1240d2ea1 100644
--- a/arch/sparc/include/uapi/asm/ipcbuf.h
+++ b/arch/sparc/include/uapi/asm/ipcbuf.h
@@ -2,6 +2,8 @@
 #ifndef __SPARC_IPCBUF_H
 #define __SPARC_IPCBUF_H
 
+#include <linux/posix_types.h>
+
 /*
  * The ipc64_perm structure for sparc/sparc64 architecture.
  * Note extra padding because this structure is passed back and forth
@@ -15,19 +17,19 @@
 
 struct ipc64_perm
 {
-	__kernel_key_t	key;
-	__kernel_uid_t	uid;
-	__kernel_gid_t	gid;
-	__kernel_uid_t	cuid;
-	__kernel_gid_t	cgid;
+	__kernel_key_t		key;
+	__kernel_uid32_t	uid;
+	__kernel_gid32_t	gid;
+	__kernel_uid32_t	cuid;
+	__kernel_gid32_t	cgid;
 #ifndef __arch64__
-	unsigned short	__pad0;
+	unsigned short		__pad0;
 #endif
-	__kernel_mode_t	mode;
-	unsigned short	__pad1;
-	unsigned short	seq;
-	unsigned long long __unused1;
-	unsigned long long __unused2;
+	__kernel_mode_t		mode;
+	unsigned short		__pad1;
+	unsigned short		seq;
+	unsigned long long	__unused1;
+	unsigned long long	__unused2;
 };
 
 #endif /* __SPARC_IPCBUF_H */
diff --git a/arch/sparc/include/uapi/asm/msgbuf.h b/arch/sparc/include/uapi/asm/msgbuf.h
index ffc46c211d6d..0954552da188 100644
--- a/arch/sparc/include/uapi/asm/msgbuf.h
+++ b/arch/sparc/include/uapi/asm/msgbuf.h
@@ -2,6 +2,8 @@
 #ifndef _SPARC_MSGBUF_H
 #define _SPARC_MSGBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * The msqid64_ds structure for sparc64 architecture.
  * Note extra padding because this structure is passed back and forth
@@ -13,9 +15,9 @@
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
 #if defined(__sparc__) && defined(__arch64__)
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	__kernel_time_t msg_ctime;	/* last change time */
+	long msg_stime;			/* last msgsnd time */
+	long msg_rtime;			/* last msgrcv time */
+	long msg_ctime;			/* last change time */
 #else
 	unsigned long msg_stime_high;
 	unsigned long msg_stime;	/* last msgsnd time */
diff --git a/arch/sparc/include/uapi/asm/sembuf.h b/arch/sparc/include/uapi/asm/sembuf.h
index f3d309c2e1cd..10621d066d79 100644
--- a/arch/sparc/include/uapi/asm/sembuf.h
+++ b/arch/sparc/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
 #ifndef _SPARC_SEMBUF_H
 #define _SPARC_SEMBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * The semid64_ds structure for sparc architecture.
  * Note extra padding because this structure is passed back and forth
@@ -14,8 +16,8 @@
 struct semid64_ds {
 	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
 #if defined(__sparc__) && defined(__arch64__)
-	__kernel_time_t	sem_otime;		/* last semop time */
-	__kernel_time_t	sem_ctime;		/* last change time */
+	long		sem_otime;		/* last semop time */
+	long		sem_ctime;		/* last change time */
 #else
 	unsigned long	sem_otime_high;
 	unsigned long	sem_otime;		/* last semop time */
diff --git a/arch/sparc/include/uapi/asm/shmbuf.h b/arch/sparc/include/uapi/asm/shmbuf.h
index 06618b84822d..a5d7d8d681c4 100644
--- a/arch/sparc/include/uapi/asm/shmbuf.h
+++ b/arch/sparc/include/uapi/asm/shmbuf.h
@@ -14,9 +14,9 @@
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
 #if defined(__sparc__) && defined(__arch64__)
-	__kernel_time_t		shm_atime;	/* last attach time */
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	__kernel_time_t		shm_ctime;	/* last change time */
+	long			shm_atime;	/* last attach time */
+	long			shm_dtime;	/* last detach time */
+	long			shm_ctime;	/* last change time */
 #else
 	unsigned long		shm_atime_high;
 	unsigned long		shm_atime;	/* last attach time */
diff --git a/arch/sparc/include/uapi/asm/stat.h b/arch/sparc/include/uapi/asm/stat.h
index b6ec4eb217f7..732c41720e24 100644
--- a/arch/sparc/include/uapi/asm/stat.h
+++ b/arch/sparc/include/uapi/asm/stat.h
@@ -14,12 +14,12 @@ struct stat {
 	uid_t   st_uid;
 	gid_t   st_gid;
 	unsigned int st_rdev;
-	off_t   st_size;
-	time_t  st_atime;
-	time_t  st_mtime;
-	time_t  st_ctime;
-	off_t   st_blksize;
-	off_t   st_blocks;
+	long    st_size;
+	long    st_atime;
+	long    st_mtime;
+	long    st_ctime;
+	long    st_blksize;
+	long    st_blocks;
 	unsigned long  __unused4[2];
 };
 
@@ -57,15 +57,15 @@ struct stat {
 	unsigned short	st_uid;
 	unsigned short	st_gid;
 	unsigned short	st_rdev;
-	off_t		st_size;
-	time_t		st_atime;
+	long		st_size;
+	long		st_atime;
 	unsigned long	st_atime_nsec;
-	time_t		st_mtime;
+	long		st_mtime;
 	unsigned long	st_mtime_nsec;
-	time_t		st_ctime;
+	long		st_ctime;
 	unsigned long	st_ctime_nsec;
-	off_t		st_blksize;
-	off_t		st_blocks;
+	long		st_blksize;
+	long		st_blocks;
 	unsigned long	__unused4[2];
 };
 
diff --git a/arch/sparc/include/uapi/asm/statfs.h b/arch/sparc/include/uapi/asm/statfs.h
deleted file mode 100644
index 20c8f5bd340e..000000000000
--- a/arch/sparc/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef ___ASM_SPARC_STATFS_H
-#define ___ASM_SPARC_STATFS_H
-
-#include <asm-generic/statfs.h>
-
-#endif
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index f89603855f1e..e59461d03b9a 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -366,8 +366,8 @@ void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
 
 /* IIep is write-through, not flushing on cpu to device transfer. */
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	if (dir != PCI_DMA_TODEVICE)
 		dma_make_coherent(paddr, PAGE_ALIGN(size));
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index ec244d1022ce..da8902295c8c 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -132,12 +132,13 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf)
 		regs->which_io, regs->phys_addr);
 }
 
-/* "name:vendor:device@irq,addrlo" */
+/* "name@irq,addrlo" */
 static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf)
 {
 	const char *name = of_get_property(dp, "name", NULL);
 	struct amba_prom_registers *regs;
-	unsigned int *intr, *device, *vendor, reg0;
+	unsigned int *intr;
+	unsigned int reg0;
 	struct property *prop;
 	int interrupt = 0;
 
@@ -159,18 +160,7 @@ static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf)
 	else
 		intr = prop->value;
 
-	prop = of_find_property(dp, "vendor", NULL);
-	if (!prop)
-		return;
-	vendor = prop->value;
-	prop = of_find_property(dp, "device", NULL);
-	if (!prop)
-		return;
-	device = prop->value;
-
-	sprintf(tmp_buf, "%s:%d:%d@%x,%x",
-		name, *vendor, *device,
-		*intr, reg0);
+	sprintf(tmp_buf, "%s@%x,%x", name, *intr, reg0);
 }
 
 static void __init __build_path_component(struct device_node *dp, char *tmp_buf)
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index 29aa34f11720..c5fd4b450d9b 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -310,7 +310,7 @@ kern_rtt_restore:
 		retry
 
 to_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 		ldsw			[%g6 + TI_PRE_COUNT], %l5
 		brnz			%l5, kern_fpucheck
 		 ldx			[%g6 + TI_FLAGS], %l5
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index afe1592a6d08..5d1bcfce05d8 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -332,10 +332,6 @@ void __init setup_arch(char **cmdline_p)
 		break;
 	}
 
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-
 	idprom_init();
 	load_mmu();
 
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index fd2182a5c32d..75e3992203b6 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -653,10 +653,6 @@ void __init setup_arch(char **cmdline_p)
 	else
 		pr_info("ARCH: SUN4U\n");
 
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-
 	idprom_init();
 
 	if (!root_flags)
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index a237810aa9f4..2a734ecd0a40 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -299,6 +299,7 @@ static void flush_signal_insns(unsigned long address)
 	unsigned long pstate, paddr;
 	pte_t *ptep, pte;
 	pgd_t *pgdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 
@@ -318,7 +319,10 @@ static void flush_signal_insns(unsigned long address)
 	pgdp = pgd_offset(current->mm, address);
 	if (pgd_none(*pgdp))
 		goto out_irqs_on;
-	pudp = pud_offset(pgdp, address);
+	p4dp = p4d_offset(pgdp, address);
+	if (p4d_none(*p4dp))
+		goto out_irqs_on;
+	pudp = pud_offset(p4dp, address);
 	if (pud_none(*pudp))
 		goto out_irqs_on;
 	pmdp = pmd_offset(pudp, address);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index a8275fea4b70..80f20b3808ee 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1621,6 +1621,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 static void __init pcpu_populate_pte(unsigned long addr)
 {
 	pgd_t *pgd = pgd_offset_k(addr);
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -1633,7 +1634,17 @@ static void __init pcpu_populate_pte(unsigned long addr)
 		pgd_populate(&init_mm, pgd, new);
 	}
 
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d)) {
+		pud_t *new;
+
+		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+		if (!new)
+			goto err_alloc;
+		p4d_populate(&init_mm, p4d, new);
+	}
+
+	pud = pud_offset(p4d, addr);
 	if (pud_none(*pud)) {
 		pmd_t *new;
 
@@ -1673,9 +1684,9 @@ void __init setup_per_cpu_areas(void)
 					    pcpu_alloc_bootmem,
 					    pcpu_free_bootmem);
 		if (rc)
-			pr_warning("PERCPU: %s allocator failed (%d), "
-				   "falling back to page size\n",
-				   pcpu_fc_names[pcpu_chosen_fc], rc);
+			pr_warn("PERCPU: %s allocator failed (%d), "
+				"falling back to page size\n",
+				pcpu_fc_names[pcpu_chosen_fc], rc);
 	}
 	if (rc < 0)
 		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 9f41a6f5a032..34917617f258 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -548,34 +548,35 @@ out_unlock:
 	return err;
 }
 
-SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p)
+SYSCALL_DEFINE1(sparc_adjtimex, struct __kernel_timex __user *, txc_p)
 {
-	struct timex txc;		/* Local copy of parameter */
-	struct __kernel_timex *kt = (void *)&txc;
+	struct __kernel_timex txc;
+	struct __kernel_old_timeval *tv = (void *)&txc_p->time;
 	int ret;
 
 	/* Copy the user data space into the kernel copy
 	 * structure. But bear in mind that the structures
 	 * may change
 	 */
-	if (copy_from_user(&txc, txc_p, sizeof(struct timex)))
+	if (copy_from_user(&txc, txc_p, sizeof(txc)))
 		return -EFAULT;
 
 	/*
 	 * override for sparc64 specific timeval type: tv_usec
 	 * is 32 bit wide instead of 64-bit in __kernel_timex
 	 */
-	kt->time.tv_usec = txc.time.tv_usec;
-	ret = do_adjtimex(kt);
-	txc.time.tv_usec = kt->time.tv_usec;
+	txc.time.tv_usec = tv->tv_usec;
+	ret = do_adjtimex(&txc);
+	tv->tv_usec = txc.time.tv_usec;
 
-	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+	return copy_to_user(txc_p, &txc, sizeof(txc)) ? -EFAULT : ret;
 }
 
-SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p)
+SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,
+		struct __kernel_timex __user *, txc_p)
 {
-	struct timex txc;		/* Local copy of parameter */
-	struct __kernel_timex *kt = (void *)&txc;
+	struct __kernel_timex txc;
+	struct __kernel_old_timeval *tv = (void *)&txc_p->time;
 	int ret;
 
 	if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) {
@@ -590,18 +591,18 @@ SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex _
 	 * structure. But bear in mind that the structures
 	 * may change
 	 */
-	if (copy_from_user(&txc, txc_p, sizeof(struct timex)))
+	if (copy_from_user(&txc, txc_p, sizeof(txc)))
 		return -EFAULT;
 
 	/*
 	 * override for sparc64 specific timeval type: tv_usec
 	 * is 32 bit wide instead of 64-bit in __kernel_timex
 	 */
-	kt->time.tv_usec = txc.time.tv_usec;
-	ret = do_clock_adjtime(which_clock, kt);
-	txc.time.tv_usec = kt->time.tv_usec;
+	txc.time.tv_usec = tv->tv_usec;
+	ret = do_clock_adjtime(which_clock, &txc);
+	tv->tv_usec = txc.time.tv_usec;
 
-	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+	return copy_to_user(txc_p, &txc, sizeof(txc)) ? -EFAULT : ret;
 }
 
 SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 8c8cc7537fb2..f13615ecdecc 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -481,3 +481,5 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 # 435 reserved for clone3
+437	common	openat2			sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 61afd787bd0c..f99e99e58075 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -67,7 +67,7 @@ SECTIONS
 	.data1 : {
 		*(.data1)
 	}
-	RW_DATA_SECTION(SMP_CACHE_BYTES, 0, THREAD_SIZE)
+	RW_DATA(SMP_CACHE_BYTES, 0, THREAD_SIZE)
 
 	/* End of data section */
 	_edata = .;
@@ -78,7 +78,6 @@ SECTIONS
 		__stop___fixup = .;
 	}
 	EXCEPTION_TABLE(16)
-	NOTES
 
 	. = ALIGN(PAGE_SIZE);
 	__init_begin = ALIGN(PAGE_SIZE);
@@ -172,12 +171,14 @@ SECTIONS
 	}
 	PERCPU_SECTION(SMP_CACHE_BYTES)
 
-#ifdef CONFIG_JUMP_LABEL
 	. = ALIGN(PAGE_SIZE);
 	.exit.text : {
 		EXIT_TEXT
 	}
-#endif
+
+	.exit.data : {
+		EXIT_DATA
+	}
 
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 8d69de111470..89976c9b936c 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -351,6 +351,8 @@ vmalloc_fault:
 		 */
 		int offset = pgd_index(address);
 		pgd_t *pgd, *pgd_k;
+		p4d_t *p4d, *p4d_k;
+		pud_t *pud, *pud_k;
 		pmd_t *pmd, *pmd_k;
 
 		pgd = tsk->active_mm->pgd + offset;
@@ -363,8 +365,13 @@ vmalloc_fault:
 			return;
 		}
 
-		pmd = pmd_offset(pgd, address);
-		pmd_k = pmd_offset(pgd_k, address);
+		p4d = p4d_offset(pgd, address);
+		pud = pud_offset(p4d, address);
+		pmd = pmd_offset(pud, address);
+
+		p4d_k = p4d_offset(pgd_k, address);
+		pud_k = pud_offset(p4d_k, address);
+		pmd_k = pmd_offset(pud_k, address);
 
 		if (pmd_present(*pmd) || !pmd_present(*pmd_k))
 			goto bad_area_nosemaphore;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 2371fb6b97e4..8b7ddbd14b65 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -80,6 +80,7 @@ static void __kprobes bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr)
 static unsigned int get_user_insn(unsigned long tpc)
 {
 	pgd_t *pgdp = pgd_offset(current->mm, tpc);
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep, pte;
@@ -88,7 +89,10 @@ static unsigned int get_user_insn(unsigned long tpc)
 
 	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
 		goto out;
-	pudp = pud_offset(pgdp, tpc);
+	p4dp = p4d_offset(pgdp, tpc);
+	if (p4d_none(*p4dp) || unlikely(p4d_bad(*p4dp)))
+		goto out;
+	pudp = pud_offset(p4dp, tpc);
 	if (pud_none(*pudp) || unlikely(pud_bad(*pudp)))
 		goto out;
 
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
index 86bc2a58d26c..d4a80adea7e5 100644
--- a/arch/sparc/mm/highmem.c
+++ b/arch/sparc/mm/highmem.c
@@ -39,10 +39,14 @@ static pte_t *kmap_pte;
 void __init kmap_init(void)
 {
 	unsigned long address;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *dir;
 
 	address = __fix_to_virt(FIX_KMAP_BEGIN);
-	dir = pmd_offset(pgd_offset_k(address), address);
+	p4d = p4d_offset(pgd_offset_k(address), address);
+	pud = pud_offset(p4d, address);
+	dir = pmd_offset(pud, address);
 
         /* cache the first kmap pte */
         kmap_pte = pte_offset_kernel(dir, address);
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index f78793a06bbd..7b9fa861b67c 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -277,11 +277,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
 	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
 	if (!pud)
 		return NULL;
 	if (sz >= PUD_SIZE)
@@ -298,13 +300,17 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
 	pgd = pgd_offset(mm, addr);
 	if (pgd_none(*pgd))
 		return NULL;
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d))
+		return NULL;
+	pud = pud_offset(p4d, addr);
 	if (pud_none(*pud))
 		return NULL;
 	if (is_hugetlb_pud(*pud))
@@ -449,7 +455,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 	mm_dec_nr_pmds(tlb->mm);
 }
 
-static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
 				   unsigned long addr, unsigned long end,
 				   unsigned long floor, unsigned long ceiling)
 {
@@ -458,7 +464,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 	unsigned long start;
 
 	start = addr;
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
@@ -481,8 +487,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 	if (end - 1 > ceiling - 1)
 		return;
 
-	pud = pud_offset(pgd, start);
-	pgd_clear(pgd);
+	pud = pud_offset(p4d, start);
+	p4d_clear(p4d);
 	pud_free_tlb(tlb, pud, start);
 	mm_dec_nr_puds(tlb->mm);
 }
@@ -492,6 +498,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 			    unsigned long floor, unsigned long ceiling)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	unsigned long next;
 
 	addr &= PMD_MASK;
@@ -511,10 +518,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 		return;
 
 	pgd = pgd_offset(tlb->mm, addr);
+	p4d = p4d_offset(pgd, addr);
 	do {
-		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(p4d))
 			continue;
-		hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
-	} while (pgd++, addr = next, addr != end);
+		hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
+	} while (p4d++, addr = next, addr != end);
 }
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index e6d91819da92..1cf0d666dea3 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -530,7 +530,8 @@ void __kprobes flush_icache_range(unsigned long start, unsigned long end)
 				paddr = kaddr & mask;
 			else {
 				pgd_t *pgdp = pgd_offset_k(kaddr);
-				pud_t *pudp = pud_offset(pgdp, kaddr);
+				p4d_t *p4dp = p4d_offset(pgdp, kaddr);
+				pud_t *pudp = pud_offset(p4dp, kaddr);
 				pmd_t *pmdp = pmd_offset(pudp, kaddr);
 				pte_t *ptep = pte_offset_kernel(pmdp, kaddr);
 
@@ -1653,6 +1654,7 @@ static unsigned long max_phys_bits = 40;
 bool kern_addr_valid(unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -1674,7 +1676,11 @@ bool kern_addr_valid(unsigned long addr)
 	if (pgd_none(*pgd))
 		return 0;
 
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d))
+		return 0;
+
+	pud = pud_offset(p4d, addr);
 	if (pud_none(*pud))
 		return 0;
 
@@ -1800,6 +1806,7 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 	while (vstart < vend) {
 		unsigned long this_end, paddr = __pa(vstart);
 		pgd_t *pgd = pgd_offset_k(vstart);
+		p4d_t *p4d;
 		pud_t *pud;
 		pmd_t *pmd;
 		pte_t *pte;
@@ -1814,7 +1821,20 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 			alloc_bytes += PAGE_SIZE;
 			pgd_populate(&init_mm, pgd, new);
 		}
-		pud = pud_offset(pgd, vstart);
+
+		p4d = p4d_offset(pgd, vstart);
+		if (p4d_none(*p4d)) {
+			pud_t *new;
+
+			new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
+						  PAGE_SIZE);
+			if (!new)
+				goto err_alloc;
+			alloc_bytes += PAGE_SIZE;
+			p4d_populate(&init_mm, p4d, new);
+		}
+
+		pud = pud_offset(p4d, vstart);
 		if (pud_none(*pud)) {
 			pmd_t *new;
 
@@ -2612,13 +2632,18 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
 	for (; vstart < vend; vstart += PMD_SIZE) {
 		pgd_t *pgd = vmemmap_pgd_populate(vstart, node);
 		unsigned long pte;
+		p4d_t *p4d;
 		pud_t *pud;
 		pmd_t *pmd;
 
 		if (!pgd)
 			return -ENOMEM;
 
-		pud = vmemmap_pud_populate(pgd, vstart, node);
+		p4d = vmemmap_p4d_populate(pgd, vstart, node);
+		if (!p4d)
+			return -ENOMEM;
+
+		pud = vmemmap_pud_populate(p4d, vstart, node);
 		if (!pud)
 			return -ENOMEM;
 
diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
index f770ee7229d8..33a0facd9eb5 100644
--- a/arch/sparc/mm/io-unit.c
+++ b/arch/sparc/mm/io-unit.c
@@ -239,12 +239,16 @@ static void *iounit_alloc(struct device *dev, size_t len,
 		page = va;
 		{
 			pgd_t *pgdp;
+			p4d_t *p4dp;
+			pud_t *pudp;
 			pmd_t *pmdp;
 			pte_t *ptep;
 			long i;
 
 			pgdp = pgd_offset(&init_mm, addr);
-			pmdp = pmd_offset(pgdp, addr);
+			p4dp = p4d_offset(pgdp, addr);
+			pudp = pud_offset(p4dp, addr);
+			pmdp = pmd_offset(pudp, addr);
 			ptep = pte_offset_map(pmdp, addr);
 
 			set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index 71ac353032b6..4d3c6991f0ae 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -343,6 +343,8 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len,
 		page = va;
 		{
 			pgd_t *pgdp;
+			p4d_t *p4dp;
+			pud_t *pudp;
 			pmd_t *pmdp;
 			pte_t *ptep;
 
@@ -354,7 +356,9 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len,
 				__flush_page_to_ram(page);
 
 			pgdp = pgd_offset(&init_mm, addr);
-			pmdp = pmd_offset(pgdp, addr);
+			p4dp = p4d_offset(pgdp, addr);
+			pudp = pud_offset(p4dp, addr);
+			pmdp = pmd_offset(pudp, addr);
 			ptep = pte_offset_map(pmdp, addr);
 
 			set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index cc3ad64479ac..f56c3c9a9793 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -296,6 +296,8 @@ static void __init srmmu_nocache_init(void)
 	void *srmmu_nocache_bitmap;
 	unsigned int bitmap_bits;
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned long paddr, vaddr;
@@ -329,6 +331,8 @@ static void __init srmmu_nocache_init(void)
 
 	while (vaddr < srmmu_nocache_end) {
 		pgd = pgd_offset_k(vaddr);
+		p4d = p4d_offset(__nocache_fix(pgd), vaddr);
+		pud = pud_offset(__nocache_fix(p4d), vaddr);
 		pmd = pmd_offset(__nocache_fix(pgd), vaddr);
 		pte = pte_offset_kernel(__nocache_fix(pmd), vaddr);
 
@@ -516,13 +520,17 @@ static inline void srmmu_mapioaddr(unsigned long physaddr,
 				   unsigned long virt_addr, int bus_type)
 {
 	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 	unsigned long tmp;
 
 	physaddr &= PAGE_MASK;
 	pgdp = pgd_offset_k(virt_addr);
-	pmdp = pmd_offset(pgdp, virt_addr);
+	p4dp = p4d_offset(pgdp, virt_addr);
+	pudp = pud_offset(p4dp, virt_addr);
+	pmdp = pmd_offset(pudp, virt_addr);
 	ptep = pte_offset_kernel(pmdp, virt_addr);
 	tmp = (physaddr >> 4) | SRMMU_ET_PTE;
 
@@ -551,11 +559,16 @@ void srmmu_mapiorange(unsigned int bus, unsigned long xpa,
 static inline void srmmu_unmapioaddr(unsigned long virt_addr)
 {
 	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 
+
 	pgdp = pgd_offset_k(virt_addr);
-	pmdp = pmd_offset(pgdp, virt_addr);
+	p4dp = p4d_offset(pgdp, virt_addr);
+	pudp = pud_offset(p4dp, virt_addr);
+	pmdp = pmd_offset(pudp, virt_addr);
 	ptep = pte_offset_kernel(pmdp, virt_addr);
 
 	/* No need to flush uncacheable page. */
@@ -693,20 +706,24 @@ static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start,
 							unsigned long end)
 {
 	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 
 	while (start < end) {
 		pgdp = pgd_offset_k(start);
-		if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
+		p4dp = p4d_offset(pgdp, start);
+		pudp = pud_offset(p4dp, start);
+		if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
 			pmdp = __srmmu_get_nocache(
 			    SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
 			if (pmdp == NULL)
 				early_pgtable_allocfail("pmd");
 			memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
-			pgd_set(__nocache_fix(pgdp), pmdp);
+			pud_set(__nocache_fix(pudp), pmdp);
 		}
-		pmdp = pmd_offset(__nocache_fix(pgdp), start);
+		pmdp = pmd_offset(__nocache_fix(pudp), start);
 		if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
 			ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 			if (ptep == NULL)
@@ -724,19 +741,23 @@ static void __init srmmu_allocate_ptable_skeleton(unsigned long start,
 						  unsigned long end)
 {
 	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 
 	while (start < end) {
 		pgdp = pgd_offset_k(start);
-		if (pgd_none(*pgdp)) {
+		p4dp = p4d_offset(pgdp, start);
+		pudp = pud_offset(p4dp, start);
+		if (pud_none(*pudp)) {
 			pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
 			if (pmdp == NULL)
 				early_pgtable_allocfail("pmd");
 			memset(pmdp, 0, SRMMU_PMD_TABLE_SIZE);
-			pgd_set(pgdp, pmdp);
+			pud_set((pud_t *)pgdp, pmdp);
 		}
-		pmdp = pmd_offset(pgdp, start);
+		pmdp = pmd_offset(pudp, start);
 		if (srmmu_pmd_none(*pmdp)) {
 			ptep = __srmmu_get_nocache(PTE_SIZE,
 							     PTE_SIZE);
@@ -779,6 +800,8 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
 	unsigned long probed;
 	unsigned long addr;
 	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 	int what; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */
@@ -810,18 +833,20 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
 		}
 
 		pgdp = pgd_offset_k(start);
+		p4dp = p4d_offset(pgdp, start);
+		pudp = pud_offset(p4dp, start);
 		if (what == 2) {
 			*(pgd_t *)__nocache_fix(pgdp) = __pgd(probed);
 			start += SRMMU_PGDIR_SIZE;
 			continue;
 		}
-		if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
+		if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
 			pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE,
 						   SRMMU_PMD_TABLE_SIZE);
 			if (pmdp == NULL)
 				early_pgtable_allocfail("pmd");
 			memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
-			pgd_set(__nocache_fix(pgdp), pmdp);
+			pud_set(__nocache_fix(pudp), pmdp);
 		}
 		pmdp = pmd_offset(__nocache_fix(pgdp), start);
 		if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
@@ -906,6 +931,8 @@ void __init srmmu_paging_init(void)
 	phandle cpunode;
 	char node_str[128];
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned long pages_avail;
@@ -967,7 +994,9 @@ void __init srmmu_paging_init(void)
 	srmmu_allocate_ptable_skeleton(PKMAP_BASE, PKMAP_END);
 
 	pgd = pgd_offset_k(PKMAP_BASE);
-	pmd = pmd_offset(pgd, PKMAP_BASE);
+	p4d = p4d_offset(pgd, PKMAP_BASE);
+	pud = pud_offset(p4d, PKMAP_BASE);
+	pmd = pmd_offset(pud, PKMAP_BASE);
 	pte = pte_offset_kernel(pmd, PKMAP_BASE);
 	pkmap_page_table = pte;
 
diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c
index 84cc8f7f83e9..c8eabb973b86 100644
--- a/arch/sparc/net/bpf_jit_comp_32.c
+++ b/arch/sparc/net/bpf_jit_comp_32.c
@@ -180,19 +180,19 @@ do {									\
 
 #define emit_loadptr(BASE, STRUCT, FIELD, DEST)				\
 do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
-	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(void *));	\
+	BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(void *));	\
 	*prog++ = LDPTRI | RS1(BASE) | S13(_off) | RD(DEST);		\
 } while (0)
 
 #define emit_load32(BASE, STRUCT, FIELD, DEST)				\
 do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
-	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u32));	\
+	BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u32));	\
 	*prog++ = LD32I | RS1(BASE) | S13(_off) | RD(DEST);		\
 } while (0)
 
 #define emit_load16(BASE, STRUCT, FIELD, DEST)				\
 do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
-	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u16));	\
+	BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u16));	\
 	*prog++ = LD16I | RS1(BASE) | S13(_off) | RD(DEST);		\
 } while (0)
 
@@ -202,7 +202,7 @@ do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
 } while (0)
 
 #define emit_load8(BASE, STRUCT, FIELD, DEST)				\
-do {	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u8));	\
+do {	BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u8));	\
 	__emit_load8(BASE, STRUCT, FIELD, DEST);			\
 } while (0)
 
diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c
index fc5bdd14de76..e794edde6755 100644
--- a/arch/sparc/vdso/vclock_gettime.c
+++ b/arch/sparc/vdso/vclock_gettime.c
@@ -63,7 +63,7 @@ notrace static __always_inline struct vvar_data *get_vvar_data(void)
 	return (struct vvar_data *) ret;
 }
 
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+notrace static long vdso_fallback_gettime(long clock, struct __kernel_old_timespec *ts)
 {
 	register long num __asm__("g1") = __NR_clock_gettime;
 	register long o0 __asm__("o0") = clock;
@@ -74,7 +74,7 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
 	return o0;
 }
 
-notrace static long vdso_fallback_gettimeofday(struct timeval *tv, struct timezone *tz)
+notrace static long vdso_fallback_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
 {
 	register long num __asm__("g1") = __NR_gettimeofday;
 	register long o0 __asm__("o0") = (long) tv;
@@ -144,7 +144,7 @@ notrace static __always_inline u64 vgetsns_stick(struct vvar_data *vvar)
 }
 
 notrace static __always_inline int do_realtime(struct vvar_data *vvar,
-					       struct timespec *ts)
+					       struct __kernel_old_timespec *ts)
 {
 	unsigned long seq;
 	u64 ns;
@@ -164,7 +164,7 @@ notrace static __always_inline int do_realtime(struct vvar_data *vvar,
 }
 
 notrace static __always_inline int do_realtime_stick(struct vvar_data *vvar,
-						     struct timespec *ts)
+						     struct __kernel_old_timespec *ts)
 {
 	unsigned long seq;
 	u64 ns;
@@ -184,7 +184,7 @@ notrace static __always_inline int do_realtime_stick(struct vvar_data *vvar,
 }
 
 notrace static __always_inline int do_monotonic(struct vvar_data *vvar,
-						struct timespec *ts)
+						struct __kernel_old_timespec *ts)
 {
 	unsigned long seq;
 	u64 ns;
@@ -204,7 +204,7 @@ notrace static __always_inline int do_monotonic(struct vvar_data *vvar,
 }
 
 notrace static __always_inline int do_monotonic_stick(struct vvar_data *vvar,
-						      struct timespec *ts)
+						      struct __kernel_old_timespec *ts)
 {
 	unsigned long seq;
 	u64 ns;
@@ -224,7 +224,7 @@ notrace static __always_inline int do_monotonic_stick(struct vvar_data *vvar,
 }
 
 notrace static int do_realtime_coarse(struct vvar_data *vvar,
-				      struct timespec *ts)
+				      struct __kernel_old_timespec *ts)
 {
 	unsigned long seq;
 
@@ -237,7 +237,7 @@ notrace static int do_realtime_coarse(struct vvar_data *vvar,
 }
 
 notrace static int do_monotonic_coarse(struct vvar_data *vvar,
-				       struct timespec *ts)
+				       struct __kernel_old_timespec *ts)
 {
 	unsigned long seq;
 
@@ -251,7 +251,7 @@ notrace static int do_monotonic_coarse(struct vvar_data *vvar,
 }
 
 notrace int
-__vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+__vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts)
 {
 	struct vvar_data *vvd = get_vvar_data();
 
@@ -275,11 +275,11 @@ __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 	return vdso_fallback_gettime(clock, ts);
 }
 int
-clock_gettime(clockid_t, struct timespec *)
+clock_gettime(clockid_t, struct __kernel_old_timespec *)
 	__attribute__((weak, alias("__vdso_clock_gettime")));
 
 notrace int
-__vdso_clock_gettime_stick(clockid_t clock, struct timespec *ts)
+__vdso_clock_gettime_stick(clockid_t clock, struct __kernel_old_timespec *ts)
 {
 	struct vvar_data *vvd = get_vvar_data();
 
@@ -304,15 +304,15 @@ __vdso_clock_gettime_stick(clockid_t clock, struct timespec *ts)
 }
 
 notrace int
-__vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+__vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
 {
 	struct vvar_data *vvd = get_vvar_data();
 
 	if (likely(vvd->vclock_mode != VCLOCK_NONE)) {
 		if (likely(tv != NULL)) {
 			union tstv_t {
-				struct timespec ts;
-				struct timeval tv;
+				struct __kernel_old_timespec ts;
+				struct __kernel_old_timeval tv;
 			} *tstv = (union tstv_t *) tv;
 			do_realtime(vvd, &tstv->ts);
 			/*
@@ -336,19 +336,19 @@ __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 	return vdso_fallback_gettimeofday(tv, tz);
 }
 int
-gettimeofday(struct timeval *, struct timezone *)
+gettimeofday(struct __kernel_old_timeval *, struct timezone *)
 	__attribute__((weak, alias("__vdso_gettimeofday")));
 
 notrace int
-__vdso_gettimeofday_stick(struct timeval *tv, struct timezone *tz)
+__vdso_gettimeofday_stick(struct __kernel_old_timeval *tv, struct timezone *tz)
 {
 	struct vvar_data *vvd = get_vvar_data();
 
 	if (likely(vvd->vclock_mode != VCLOCK_NONE)) {
 		if (likely(tv != NULL)) {
 			union tstv_t {
-				struct timespec ts;
-				struct timeval tv;
+				struct __kernel_old_timespec ts;
+				struct __kernel_old_timeval tv;
 			} *tstv = (union tstv_t *) tv;
 			do_realtime_stick(vvd, &tstv->ts);
 			/*
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index fec6b4ca2b6e..0917f8443c28 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -14,6 +14,7 @@ config UML
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DEBUG_BUGVERBOSE
+	select HAVE_COPY_THREAD_TLS
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CLOCKEVENTS
@@ -153,7 +154,7 @@ config KERNEL_STACK_ORDER
 	  It is possible to reduce the stack to 1 for 64BIT and 0 for 32BIT on
 	  older (pre-2017) CPUs. It is not recommended on newer CPUs due to the
 	  increase in the size of the state which needs to be saved when handling
-          signals.
+	  signals.
 
 config MMAPPER
 	tristate "iomem emulation driver"
@@ -188,6 +189,8 @@ config SECCOMP
 config UML_TIME_TRAVEL_SUPPORT
 	bool
 	prompt "Support time-travel mode (e.g. for test execution)"
+	# inf-cpu mode is incompatible with the benchmarking
+	depends on !RAID6_PQ_BENCHMARK
 	help
 	  Enable this option to support time travel inside the UML instance.
 
diff --git a/arch/um/configs/kunit_defconfig b/arch/um/configs/kunit_defconfig
new file mode 100644
index 000000000000..9235b7d42d38
--- /dev/null
+++ b/arch/um/configs/kunit_defconfig
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_KUNIT_TEST=y
+CONFIG_KUNIT_EXAMPLE_TEST=y
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index fea5a0d522dc..72d417055782 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -147,7 +147,7 @@ config UML_NET
 	  make use of UML networking.
 
 config UML_NET_ETHERTAP
-	bool "Ethertap transport"
+	bool "Ethertap transport (obsolete)"
 	depends on UML_NET
 	help
 	  The Ethertap User-Mode Linux network transport allows a single
@@ -167,14 +167,13 @@ config UML_NET_ETHERTAP
 	  has examples of the UML command line to use to enable Ethertap
 	  networking.
 
-	  If you'd like to set up an IP network with the host and/or the
-	  outside world, say Y to this, the Daemon Transport and/or the
-	  Slip Transport.  You'll need at least one of them, but may choose
-	  more than one without conflict.  If you don't need UML networking,
-	  say N.
+	  NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+	  migrate to UML_NET_VECTOR.
+
+	  If unsure, say N.
 
 config UML_NET_TUNTAP
-	bool "TUN/TAP transport"
+	bool "TUN/TAP transport (obsolete)"
 	depends on UML_NET
 	help
 	  The UML TUN/TAP network transport allows a UML instance to exchange
@@ -185,8 +184,13 @@ config UML_NET_TUNTAP
 	  To use this transport, your host kernel must have support for TUN/TAP
 	  devices, either built-in or as a module.
 
+	  NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+	  migrate to UML_NET_VECTOR.
+
+	  If unsure, say N.
+
 config UML_NET_SLIP
-	bool "SLIP transport"
+	bool "SLIP transport (obsolete)"
 	depends on UML_NET
 	help
 	  The slip User-Mode Linux network transport allows a running UML to
@@ -201,16 +205,13 @@ config UML_NET_SLIP
 	  has examples of the UML command line to use to enable slip
 	  networking, and details of a few quirks with it.
 
-	  The Ethertap Transport is preferred over slip because of its
-	  limitations.  If you prefer slip, however, say Y here.  Otherwise
-	  choose the Multicast transport (to network multiple UMLs on
-	  multiple hosts), Ethertap (to network with the host and the
-	  outside world), and/or the Daemon transport (to network multiple
-	  UMLs on a single host).  You may choose more than one without
-	  conflict.  If you don't need UML networking, say N.
+	  NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+	  migrate to UML_NET_VECTOR.
+
+	  If unsure, say N.
 
 config UML_NET_DAEMON
-	bool "Daemon transport"
+	bool "Daemon transport (obsolete)"
 	depends on UML_NET
 	help
 	  This User-Mode Linux network transport allows one or more running
@@ -225,13 +226,10 @@ config UML_NET_DAEMON
 	  has examples of the UML command line to use to enable Daemon
 	  networking.
 
-	  If you'd like to set up a network with other UMLs on a single host,
-	  say Y.  If you need a network between UMLs on multiple physical
-	  hosts, choose the Multicast Transport.  To set up a network with
-	  the host and/or other IP machines, say Y to the Ethertap or Slip
-	  transports.  You'll need at least one of them, but may choose
-	  more than one without conflict.  If you don't need UML networking,
-	  say N.
+	  NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+	  migrate to UML_NET_VECTOR.
+
+	  If unsure, say N.
 
 config UML_NET_VECTOR
 	bool "Vector I/O high performance network devices"
@@ -245,7 +243,7 @@ config UML_NET_VECTOR
 	drivers.
 
 config UML_NET_VDE
-	bool "VDE transport"
+	bool "VDE transport (obsolete)"
 	depends on UML_NET
 	help
 	This User-Mode Linux network transport allows one or more running
@@ -263,11 +261,13 @@ config UML_NET_VDE
 	That site has a good overview of what VDE is and also examples
 	of the UML command line to use to enable VDE networking.
 
-	If you need UML networking with VDE,
-	say Y.
+	NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+	migrate to UML_NET_VECTOR.
+
+	If unsure, say N.
 
 config UML_NET_MCAST
-	bool "Multicast transport"
+	bool "Multicast transport (obsolete)"
 	depends on UML_NET
 	help
 	  This Multicast User-Mode Linux network transport allows multiple
@@ -284,15 +284,13 @@ config UML_NET_MCAST
 	  has examples of the UML command line to use to enable Multicast
 	  networking, and notes about the security of this approach.
 
-	  If you need UMLs on multiple physical hosts to communicate as if
-	  they shared an Ethernet network, say Y.  If you need to communicate
-	  with other IP machines, make sure you select one of the other
-	  transports (possibly in addition to Multicast; they're not
-	  exclusive).  If you don't need to network UMLs say N to each of
-	  the transports.
+	  NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+	  migrate to UML_NET_VECTOR.
+
+	  If unsure, say N.
 
 config UML_NET_PCAP
-	bool "pcap transport"
+	bool "pcap transport (obsolete)"
 	depends on UML_NET
 	help
 	The pcap transport makes a pcap packet stream on the host look
@@ -304,11 +302,13 @@ config UML_NET_PCAP
 	  <http://user-mode-linux.sourceforge.net/old/networking.html>  That site
 	  has examples of the UML command line to use to enable this option.
 
-	If you intend to use UML as a network monitor for the host, say
-	Y here.  Otherwise, say N.
+	NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+	migrate to UML_NET_VECTOR.
+
+	If unsure, say N.
 
 config UML_NET_SLIRP
-	bool "SLiRP transport"
+	bool "SLiRP transport (obsolete)"
 	depends on UML_NET
 	help
 	  The SLiRP User-Mode Linux network transport allows a running UML
@@ -328,16 +328,17 @@ config UML_NET_SLIRP
 	  that of a host behind a firewall that masquerades all network
 	  connections passing through it (but is less secure).
 
-	  To use this you should first have slirp compiled somewhere
-	  accessible on the host, and have read its documentation.  If you
-	  don't need UML networking, say N.
+	  NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+	  migrate to UML_NET_VECTOR.
+
+	  If unsure, say N.
 
 	  Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
 
 endmenu
 
 config VIRTIO_UML
-	tristate "UML driver for virtio devices"
+	bool "UML driver for virtio devices"
 	select VIRTIO
 	help
 	  This driver provides support for virtio based paravirtual device
diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h
index 72222bb036f5..4e51b85e2a23 100644
--- a/arch/um/drivers/chan_user.h
+++ b/arch/um/drivers/chan_user.h
@@ -11,7 +11,7 @@
 struct chan_opts {
 	void (*const announce)(char *dev_name, int dev);
 	char *xterm_title;
-	const int raw;
+	int raw;
 };
 
 struct chan_ops {
diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h
index 760c507dd5b6..103adac691ed 100644
--- a/arch/um/drivers/cow.h
+++ b/arch/um/drivers/cow.h
@@ -11,7 +11,7 @@ extern int init_cow_file(int fd, char *cow_file, char *backing_file,
 extern int file_reader(__u64 offset, char *buf, int len, void *arg);
 extern int read_cow_header(int (*reader)(__u64, char *, int, void *),
 			   void *arg, __u32 *version_out,
-			   char **backing_file_out, time_t *mtime_out,
+			   char **backing_file_out, long long *mtime_out,
 			   unsigned long long *size_out, int *sectorsize_out,
 			   __u32 *align_out, int *bitmap_offset_out);
 
diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c
index 74b0c2686c95..29b46581ddd1 100644
--- a/arch/um/drivers/cow_user.c
+++ b/arch/um/drivers/cow_user.c
@@ -17,6 +17,7 @@
 
 #define PATH_LEN_V1 256
 
+/* unsigned time_t works until year 2106 */
 typedef __u32 time32_t;
 
 struct cow_header_v1 {
@@ -197,7 +198,7 @@ int write_cow_header(char *cow_file, int fd, char *backing_file,
 		     int sectorsize, int alignment, unsigned long long *size)
 {
 	struct cow_header_v3 *header;
-	unsigned long modtime;
+	long long modtime;
 	int err;
 
 	err = cow_seek_file(fd, 0);
@@ -276,7 +277,7 @@ int file_reader(__u64 offset, char *buf, int len, void *arg)
 
 int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
 		    __u32 *version_out, char **backing_file_out,
-		    time_t *mtime_out, unsigned long long *size_out,
+		    long long *mtime_out, unsigned long long *size_out,
 		    int *sectorsize_out, __u32 *align_out,
 		    int *bitmap_offset_out)
 {
@@ -363,7 +364,7 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
 
 		/*
 		 * this was used until Dec2005 - 64bits are needed to represent
-		 * 2038+. I.e. we can safely do this truncating cast.
+		 * 2106+. I.e. we can safely do this truncating cast.
 		 *
 		 * Additionally, we must use be32toh() instead of be64toh(), since
 		 * the program used to use the former (tested - I got mtime
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 000cb69ba0bc..e6d4f43deba8 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -165,6 +165,7 @@ static const struct file_operations harddog_fops = {
 	.owner		= THIS_MODULE,
 	.write		= harddog_write,
 	.unlocked_ioctl	= harddog_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
 	.open		= harddog_open,
 	.release	= harddog_release,
 	.llseek		= no_llseek,
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index bf75b1ceac47..d35d3f305a31 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -298,6 +298,7 @@ static const struct file_operations hostaudio_fops = {
 	.write          = hostaudio_write,
 	.poll           = hostaudio_poll,
 	.unlocked_ioctl	= hostaudio_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
 	.mmap           = NULL,
 	.open           = hostaudio_open,
 	.release        = hostaudio_release,
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 327b728f7244..35ebeebfc1a8 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -247,7 +247,7 @@ static void uml_net_set_multicast_list(struct net_device *dev)
 	return;
 }
 
-static void uml_net_tx_timeout(struct net_device *dev)
+static void uml_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	netif_trans_update(dev);
 	netif_wake_queue(dev);
diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
index b213201b8a3b..26c5716fac0f 100644
--- a/arch/um/drivers/ssl.c
+++ b/arch/um/drivers/ssl.c
@@ -196,3 +196,11 @@ static int ssl_chan_setup(char *str)
 
 __setup("ssl", ssl_chan_setup);
 __channel_help(ssl_chan_setup, "ssl");
+
+static int ssl_non_raw_setup(char *str)
+{
+	opts.raw = 0;
+	return 1;
+}
+__setup("ssl-non-raw", ssl_non_raw_setup);
+__channel_help(ssl_non_raw_setup, "set serial lines to non-raw mode");
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 6627d7c30f37..247f95da057b 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -113,6 +113,7 @@ static const struct block_device_operations ubd_blops = {
         .open		= ubd_open,
         .release	= ubd_release,
         .ioctl		= ubd_ioctl,
+        .compat_ioctl	= blkdev_compat_ptr_ioctl,
 	.getgeo		= ubd_getgeo,
 };
 
@@ -561,7 +562,7 @@ static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 	__u32 version;
 	__u32 align;
 	char *backing_file;
-	time_t mtime;
+	time64_t mtime;
 	unsigned long long size;
 	int sector_size;
 	int bitmap_offset;
@@ -600,9 +601,9 @@ static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 	return 0;
 }
 
-static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
+static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
 {
-	unsigned long modtime;
+	time64_t modtime;
 	unsigned long long actual;
 	int err;
 
@@ -628,7 +629,7 @@ static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
 		return -EINVAL;
 	}
 	if (modtime != mtime) {
-		printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
+		printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
 		       "backing file\n", mtime, modtime);
 		return -EINVAL;
 	}
@@ -671,7 +672,7 @@ static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 		  unsigned long *bitmap_len_out, int *data_offset_out,
 		  int *create_cow_out)
 {
-	time_t mtime;
+	time64_t mtime;
 	unsigned long long size;
 	__u32 version, align;
 	char *backing_file;
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 769ffbd9e9a6..0ff86391f77d 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2017 - Cambridge Greys Limited
+ * Copyright (C) 2017 - 2019 Cambridge Greys Limited
  * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
@@ -21,6 +21,9 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
+#include <linux/firmware.h>
+#include <linux/fs.h>
+#include <uapi/linux/filter.h>
 #include <init.h>
 #include <irq_kern.h>
 #include <irq_user.h>
@@ -128,6 +131,23 @@ static int get_mtu(struct arglist *def)
 	return ETH_MAX_PACKET;
 }
 
+static char *get_bpf_file(struct arglist *def)
+{
+	return uml_vector_fetch_arg(def, "bpffile");
+}
+
+static bool get_bpf_flash(struct arglist *def)
+{
+	char *allow = uml_vector_fetch_arg(def, "bpfflash");
+	long result;
+
+	if (allow != NULL) {
+		if (kstrtoul(allow, 10, &result) == 0)
+			return (allow > 0);
+	}
+	return false;
+}
+
 static int get_depth(struct arglist *def)
 {
 	char *mtu = uml_vector_fetch_arg(def, "depth");
@@ -176,6 +196,7 @@ static int get_transport_options(struct arglist *def)
 	int vec_rx = VECTOR_RX;
 	int vec_tx = VECTOR_TX;
 	long parsed;
+	int result = 0;
 
 	if (vector != NULL) {
 		if (kstrtoul(vector, 10, &parsed) == 0) {
@@ -186,14 +207,16 @@ static int get_transport_options(struct arglist *def)
 		}
 	}
 
+	if (get_bpf_flash(def))
+		result = VECTOR_BPF_FLASH;
 
 	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
-		return 0;
+		return result;
 	if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0)
-		return (vec_rx | VECTOR_BPF);
+		return (result | vec_rx | VECTOR_BPF);
 	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
-		return (vec_rx | vec_tx | VECTOR_QDISC_BYPASS);
-	return (vec_rx | vec_tx);
+		return (result | vec_rx | vec_tx | VECTOR_QDISC_BYPASS);
+	return (result | vec_rx | vec_tx);
 }
 
 
@@ -1139,6 +1162,8 @@ static int vector_net_close(struct net_device *dev)
 	}
 	tasklet_kill(&vp->tx_poll);
 	if (vp->fds->rx_fd > 0) {
+		if (vp->bpf)
+			uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
 		os_close_file(vp->fds->rx_fd);
 		vp->fds->rx_fd = -1;
 	}
@@ -1146,7 +1171,10 @@ static int vector_net_close(struct net_device *dev)
 		os_close_file(vp->fds->tx_fd);
 		vp->fds->tx_fd = -1;
 	}
+	if (vp->bpf != NULL)
+		kfree(vp->bpf->filter);
 	kfree(vp->bpf);
+	vp->bpf = NULL;
 	kfree(vp->fds->remote_addr);
 	kfree(vp->transport_data);
 	kfree(vp->header_rxbuffer);
@@ -1181,6 +1209,7 @@ static void vector_reset_tx(struct work_struct *work)
 	netif_start_queue(vp->dev);
 	netif_wake_queue(vp->dev);
 }
+
 static int vector_net_open(struct net_device *dev)
 {
 	struct vector_private *vp = netdev_priv(dev);
@@ -1196,6 +1225,8 @@ static int vector_net_open(struct net_device *dev)
 	vp->opened = true;
 	spin_unlock_irqrestore(&vp->lock, flags);
 
+	vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed));
+
 	vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
 
 	if (vp->fds == NULL)
@@ -1267,8 +1298,11 @@ static int vector_net_open(struct net_device *dev)
 		if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
 			vp->options |= VECTOR_BPF;
 	}
-	if ((vp->options & VECTOR_BPF) != 0)
-		vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr);
+	if (((vp->options & VECTOR_BPF) != 0) && (vp->bpf == NULL))
+		vp->bpf = uml_vector_default_bpf(dev->dev_addr);
+
+	if (vp->bpf != NULL)
+		uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
 
 	netif_start_queue(dev);
 
@@ -1298,7 +1332,7 @@ static void vector_net_set_multicast_list(struct net_device *dev)
 	return;
 }
 
-static void vector_net_tx_timeout(struct net_device *dev)
+static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct vector_private *vp = netdev_priv(dev);
 
@@ -1347,6 +1381,65 @@ static void vector_net_get_drvinfo(struct net_device *dev,
 	strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
 }
 
+static int vector_net_load_bpf_flash(struct net_device *dev,
+				struct ethtool_flash *efl)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	struct vector_device *vdevice;
+	const struct firmware *fw;
+	int result = 0;
+
+	if (!(vp->options & VECTOR_BPF_FLASH)) {
+		netdev_err(dev, "loading firmware not permitted: %s\n", efl->data);
+		return -1;
+	}
+
+	spin_lock(&vp->lock);
+
+	if (vp->bpf != NULL) {
+		if (vp->opened)
+			uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
+		kfree(vp->bpf->filter);
+		vp->bpf->filter = NULL;
+	} else {
+		vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_KERNEL);
+		if (vp->bpf == NULL) {
+			netdev_err(dev, "failed to allocate memory for firmware\n");
+			goto flash_fail;
+		}
+	}
+
+	vdevice = find_device(vp->unit);
+
+	if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
+		goto flash_fail;
+
+	vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	if (!vp->bpf->filter)
+		goto free_buffer;
+
+	vp->bpf->len = fw->size / sizeof(struct sock_filter);
+	release_firmware(fw);
+
+	if (vp->opened)
+		result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
+
+	spin_unlock(&vp->lock);
+
+	return result;
+
+free_buffer:
+	release_firmware(fw);
+
+flash_fail:
+	spin_unlock(&vp->lock);
+	if (vp->bpf != NULL)
+		kfree(vp->bpf->filter);
+	kfree(vp->bpf);
+	vp->bpf = NULL;
+	return -1;
+}
+
 static void vector_get_ringparam(struct net_device *netdev,
 				struct ethtool_ringparam *ring)
 {
@@ -1424,6 +1517,7 @@ static const struct ethtool_ops vector_net_ethtool_ops = {
 	.get_ethtool_stats = vector_get_ethtool_stats,
 	.get_coalesce	= vector_get_coalesce,
 	.set_coalesce	= vector_set_coalesce,
+	.flash_device	= vector_net_load_bpf_flash,
 };
 
 
@@ -1528,8 +1622,9 @@ static void vector_eth_configure(
 		.in_write_poll		= false,
 		.coalesce		= 2,
 		.req_size		= get_req_size(def),
-		.in_error		= false
-		});
+		.in_error		= false,
+		.bpf			= NULL
+	});
 
 	dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
 	tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
index 4d292e6c07af..d0159082faf0 100644
--- a/arch/um/drivers/vector_kern.h
+++ b/arch/um/drivers/vector_kern.h
@@ -29,10 +29,13 @@
 #define VECTOR_TX (1 << 1)
 #define VECTOR_BPF (1 << 2)
 #define VECTOR_QDISC_BYPASS (1 << 3)
+#define VECTOR_BPF_FLASH (1 << 4)
 
 #define ETH_MAX_PACKET 1500
 #define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */
 
+#define MAX_FILTER_PROG (2 << 16)
+
 struct vector_queue {
 	struct mmsghdr *mmsg_vector;
 	void **skbuff_vector;
@@ -118,10 +121,13 @@ struct vector_private {
 	bool in_write_poll;
 	bool in_error;
 
+	/* guest allowed to use ethtool flash to load bpf */
+	bool bpf_via_flash;
+
 	/* ethtool stats */
 
 	struct vector_estats estats;
-	void *bpf;
+	struct sock_fprog *bpf;
 
 	char user[0];
 };
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
index e2c969b9f7ee..ddcd917be0af 100644
--- a/arch/um/drivers/vector_user.c
+++ b/arch/um/drivers/vector_user.c
@@ -46,7 +46,8 @@
 #define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
 #define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
 #define UNIX_BIND_FAIL "unix_open : could not bind socket err=%i"
-#define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n"
+#define BPF_ATTACH_FAIL "Failed to attach filter size %d prog %px to %d, err %d\n"
+#define BPF_DETACH_FAIL "Failed to detach filter size %d prog %px to %d, err %d\n"
 
 #define MAX_UN_LEN 107
 
@@ -660,31 +661,44 @@ int uml_vector_recvmmsg(
 	else
 		return -errno;
 }
-int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len)
+int uml_vector_attach_bpf(int fd, void *bpf)
 {
-	int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, bpf_len);
+	struct sock_fprog *prog = bpf;
+
+	int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, sizeof(struct sock_fprog));
 
 	if (err < 0)
-		printk(KERN_ERR BPF_ATTACH_FAIL, bpf_len, fd, -errno);
+		printk(KERN_ERR BPF_ATTACH_FAIL, prog->len, prog->filter, fd, -errno);
 	return err;
 }
 
-#define DEFAULT_BPF_LEN 6
+int uml_vector_detach_bpf(int fd, void *bpf)
+{
+	struct sock_fprog *prog = bpf;
 
-void *uml_vector_default_bpf(int fd, void *mac)
+	int err = setsockopt(fd, SOL_SOCKET, SO_DETACH_FILTER, bpf, sizeof(struct sock_fprog));
+	if (err < 0)
+		printk(KERN_ERR BPF_DETACH_FAIL, prog->len, prog->filter, fd, -errno);
+	return err;
+}
+void *uml_vector_default_bpf(void *mac)
 {
 	struct sock_filter *bpf;
 	uint32_t *mac1 = (uint32_t *)(mac + 2);
 	uint16_t *mac2 = (uint16_t *) mac;
-	struct sock_fprog bpf_prog = {
-		.len = 6,
-		.filter = NULL,
-	};
+	struct sock_fprog *bpf_prog;
 
+	bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL);
+	if (bpf_prog) {
+		bpf_prog->len = DEFAULT_BPF_LEN;
+		bpf_prog->filter = NULL;
+	} else {
+		return NULL;
+	}
 	bpf = uml_kmalloc(
 		sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL);
-	if (bpf != NULL) {
-		bpf_prog.filter = bpf;
+	if (bpf) {
+		bpf_prog->filter = bpf;
 		/* ld	[8] */
 		bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 };
 		/* jeq	#0xMAC[2-6] jt 2 jf 5*/
@@ -697,12 +711,56 @@ void *uml_vector_default_bpf(int fd, void *mac)
 		bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 };
 		/* ret	#0x40000 */
 		bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 };
-		if (uml_vector_attach_bpf(
-			fd, &bpf_prog, sizeof(struct sock_fprog)) < 0) {
-			kfree(bpf);
-			bpf = NULL;
-		}
+	} else {
+		kfree(bpf_prog);
+		bpf_prog = NULL;
 	}
-	return bpf;
+	return bpf_prog;
 }
 
+/* Note - this function requires a valid mac being passed as an arg */
+
+void *uml_vector_user_bpf(char *filename)
+{
+	struct sock_filter *bpf;
+	struct sock_fprog *bpf_prog;
+	struct stat statbuf;
+	int res, ffd = -1;
+
+	if (filename == NULL)
+		return NULL;
+
+	if (stat(filename, &statbuf) < 0) {
+		printk(KERN_ERR "Error %d reading bpf file", -errno);
+		return false;
+	}
+	bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL);
+	if (bpf_prog != NULL) {
+		bpf_prog->len = statbuf.st_size / sizeof(struct sock_filter);
+		bpf_prog->filter = NULL;
+	}
+	ffd = os_open_file(filename, of_read(OPENFLAGS()), 0);
+	if (ffd < 0) {
+		printk(KERN_ERR "Error %d opening bpf file", -errno);
+		goto bpf_failed;
+	}
+	bpf = uml_kmalloc(statbuf.st_size, UM_GFP_KERNEL);
+	if (bpf == NULL) {
+		printk(KERN_ERR "Failed to allocate bpf buffer");
+		goto bpf_failed;
+	}
+	bpf_prog->filter = bpf;
+	res = os_read_file(ffd, bpf, statbuf.st_size);
+	if (res < statbuf.st_size) {
+		printk(KERN_ERR "Failed to read bpf program %s, error %d", filename, res);
+		kfree(bpf);
+		goto bpf_failed;
+	}
+	os_close_file(ffd);
+	return bpf_prog;
+bpf_failed:
+	if (ffd > 0)
+		os_close_file(ffd);
+	kfree(bpf_prog);
+	return NULL;
+}
diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h
index 649ec250268b..91f35b266aba 100644
--- a/arch/um/drivers/vector_user.h
+++ b/arch/um/drivers/vector_user.h
@@ -28,6 +28,8 @@
 #define TRANS_BESS "bess"
 #define TRANS_BESS_LEN strlen(TRANS_BESS)
 
+#define DEFAULT_BPF_LEN 6
+
 #ifndef IPPROTO_GRE
 #define IPPROTO_GRE 0x2F
 #endif
@@ -95,8 +97,10 @@ extern int uml_vector_recvmmsg(
 	unsigned int vlen,
 	unsigned int flags
 );
-extern void *uml_vector_default_bpf(int fd, void *mac);
-extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len);
+extern void *uml_vector_default_bpf(void *mac);
+extern void *uml_vector_user_bpf(char *filename);
+extern int uml_vector_attach_bpf(int fd, void *bpf);
+extern int uml_vector_detach_bpf(int fd, void *bpf);
 extern bool uml_raw_enable_qdisc_bypass(int fd);
 extern bool uml_raw_enable_vnet_headers(int fd);
 extern bool uml_tap_enable_vnet_headers(int fd);
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index fc8c52cff5aa..023ced2250ea 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -4,12 +4,12 @@
  *
  * Copyright(c) 2019 Intel Corporation
  *
- * This module allows virtio devices to be used over a vhost-user socket.
+ * This driver allows virtio devices to be used over a vhost-user socket.
  *
  * Guest devices can be instantiated by kernel module or command line
  * parameters. One device will be created for each parameter. Syntax:
  *
- *		[virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
+ *		virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
  * where:
  *		<socket>	:= vhost-user socket path to connect
  *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
@@ -42,6 +42,13 @@
 #define to_virtio_uml_device(_vdev) \
 	container_of(_vdev, struct virtio_uml_device, vdev)
 
+struct virtio_uml_platform_data {
+	u32 virtio_device_id;
+	const char *socket_path;
+	struct work_struct conn_broken_wk;
+	struct platform_device *pdev;
+};
+
 struct virtio_uml_device {
 	struct virtio_device vdev;
 	struct platform_device *pdev;
@@ -50,6 +57,7 @@ struct virtio_uml_device {
 	u64 features;
 	u64 protocol_features;
 	u8 status;
+	u8 registered:1;
 };
 
 struct virtio_uml_vq_info {
@@ -83,7 +91,7 @@ static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
 	return 0;
 }
 
-static int full_read(int fd, void *buf, int len)
+static int full_read(int fd, void *buf, int len, bool abortable)
 {
 	int rc;
 
@@ -93,7 +101,7 @@ static int full_read(int fd, void *buf, int len)
 			buf += rc;
 			len -= rc;
 		}
-	} while (len && (rc > 0 || rc == -EINTR));
+	} while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
 
 	if (rc < 0)
 		return rc;
@@ -104,28 +112,37 @@ static int full_read(int fd, void *buf, int len)
 
 static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
 {
-	return full_read(fd, msg, sizeof(msg->header));
+	return full_read(fd, msg, sizeof(msg->header), true);
 }
 
-static int vhost_user_recv(int fd, struct vhost_user_msg *msg,
+static int vhost_user_recv(struct virtio_uml_device *vu_dev,
+			   int fd, struct vhost_user_msg *msg,
 			   size_t max_payload_size)
 {
 	size_t size;
 	int rc = vhost_user_recv_header(fd, msg);
 
+	if (rc == -ECONNRESET && vu_dev->registered) {
+		struct virtio_uml_platform_data *pdata;
+
+		pdata = vu_dev->pdev->dev.platform_data;
+
+		virtio_break_device(&vu_dev->vdev);
+		schedule_work(&pdata->conn_broken_wk);
+	}
 	if (rc)
 		return rc;
 	size = msg->header.size;
 	if (size > max_payload_size)
 		return -EPROTO;
-	return full_read(fd, &msg->payload, size);
+	return full_read(fd, &msg->payload, size, false);
 }
 
 static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
 				struct vhost_user_msg *msg,
 				size_t max_payload_size)
 {
-	int rc = vhost_user_recv(vu_dev->sock, msg, max_payload_size);
+	int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, max_payload_size);
 
 	if (rc)
 		return rc;
@@ -155,7 +172,7 @@ static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
 			       struct vhost_user_msg *msg,
 			       size_t max_payload_size)
 {
-	int rc = vhost_user_recv(vu_dev->req_fd, msg, max_payload_size);
+	int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, max_payload_size);
 
 	if (rc)
 		return rc;
@@ -963,11 +980,6 @@ static void virtio_uml_release_dev(struct device *d)
 
 /* Platform device */
 
-struct virtio_uml_platform_data {
-	u32 virtio_device_id;
-	const char *socket_path;
-};
-
 static int virtio_uml_probe(struct platform_device *pdev)
 {
 	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
@@ -1005,6 +1017,7 @@ static int virtio_uml_probe(struct platform_device *pdev)
 	rc = register_virtio_device(&vu_dev->vdev);
 	if (rc)
 		put_device(&vu_dev->vdev.dev);
+	vu_dev->registered = 1;
 	return rc;
 
 error_init:
@@ -1034,13 +1047,31 @@ static struct device vu_cmdline_parent = {
 static bool vu_cmdline_parent_registered;
 static int vu_cmdline_id;
 
+static int vu_unregister_cmdline_device(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+
+	kfree(pdata->socket_path);
+	platform_device_unregister(pdev);
+	return 0;
+}
+
+static void vu_conn_broken(struct work_struct *wk)
+{
+	struct virtio_uml_platform_data *pdata;
+
+	pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
+	vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
+}
+
 static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
 {
 	const char *ids = strchr(device, ':');
 	unsigned int virtio_device_id;
 	int processed, consumed, err;
 	char *socket_path;
-	struct virtio_uml_platform_data pdata;
+	struct virtio_uml_platform_data pdata, *ppdata;
 	struct platform_device *pdev;
 
 	if (!ids || ids == device)
@@ -1079,6 +1110,11 @@ static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
 	err = PTR_ERR_OR_ZERO(pdev);
 	if (err)
 		goto free;
+
+	ppdata = pdev->dev.platform_data;
+	ppdata->pdev = pdev;
+	INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
+
 	return 0;
 
 free:
@@ -1121,16 +1157,6 @@ __uml_help(vu_cmdline_param_ops,
 );
 
 
-static int vu_unregister_cmdline_device(struct device *dev, void *data)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
-
-	kfree(pdata->socket_path);
-	platform_device_unregister(pdev);
-	return 0;
-}
-
 static void vu_unregister_cmdline_devices(void)
 {
 	if (vu_cmdline_parent_registered) {
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 398006d27e40..db7d9d4e30d8 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -25,5 +25,4 @@ generic-y += switch_to.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
-generic-y += xor.h
 generic-y += kprobes.h
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index d7086b985f27..eca6c452a41b 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -9,14 +9,13 @@
   _sdata = .;
   PROVIDE (sdata = .);
 
-  RODATA
+  RO_DATA(4096)
 
   .unprotected : { *(.unprotected) }
   . = ALIGN(4096);
   PROVIDE (_unprotected_end = .);
 
   . = ALIGN(4096);
-  NOTES
   EXCEPTION_TABLE(0)
 
   BUG_TABLE
@@ -83,8 +82,8 @@
 	__preinit_array_end = .;
   }
   .init_array : {
-        /* dummy - we call this ourselves */
 	__init_array_start = .;
+	*(.init_array)
 	__init_array_end = .;
   }
   .fini_array : {
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index 32b3d26a7109..32106d31e4ab 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -8,7 +8,6 @@
 #ifndef __UM_PGTABLE_2LEVEL_H
 #define __UM_PGTABLE_2LEVEL_H
 
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index 9812269fefc9..8a3b689e0f86 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -7,7 +7,6 @@
 #ifndef __UM_PGTABLE_3LEVEL_H
 #define __UM_PGTABLE_3LEVEL_H
 
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 36a44d58f373..2daa58df2190 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -106,6 +106,9 @@ extern unsigned long end_iomem;
 #define pud_newpage(x)  (pud_val(x) & _PAGE_NEWPAGE)
 #define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE)
 
+#define p4d_newpage(x)  (p4d_val(x) & _PAGE_NEWPAGE)
+#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE)
+
 #define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK)
 
 #define pte_page(x) pfn_to_page(pte_pfn(x))
diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h
index 81c647ef9c6c..adf91ef553ae 100644
--- a/arch/um/include/asm/ptrace-generic.h
+++ b/arch/um/include/asm/ptrace-generic.h
@@ -36,7 +36,7 @@ extern long subarch_ptrace(struct task_struct *child, long request,
 extern unsigned long getreg(struct task_struct *child, int regno);
 extern int putreg(struct task_struct *child, int regno, unsigned long value);
 
-extern int arch_copy_tls(struct task_struct *new);
+extern int arch_set_tls(struct task_struct *new, unsigned long tls);
 extern void clear_flushed_tls(struct task_struct *task);
 extern int syscall_trace_enter(struct pt_regs *regs);
 extern void syscall_trace_leave(struct pt_regs *regs);
diff --git a/arch/um/include/asm/vmalloc.h b/arch/um/include/asm/vmalloc.h
new file mode 100644
index 000000000000..9a7b9ed93733
--- /dev/null
+++ b/arch/um/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_UM_VMALLOC_H
+#define _ASM_UM_VMALLOC_H
+
+#endif /* _ASM_UM_VMALLOC_H */
diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h
new file mode 100644
index 000000000000..7a3208c47cfc
--- /dev/null
+++ b/arch/um/include/asm/xor.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm-generic/xor.h>
+#include <shared/timer-internal.h>
+
+/* pick an arbitrary one - measuring isn't possible with inf-cpu */
+#define XOR_SELECT_TEMPLATE(x)	\
+	(time_travel_mode == TT_MODE_INFCPU ? &xor_block_8regs : NULL)
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 506bcd1bca68..0f30204b6afa 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -150,7 +150,7 @@ extern int os_sync_file(int fd);
 extern int os_file_size(const char *file, unsigned long long *size_out);
 extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset);
 extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset);
-extern int os_file_modtime(const char *file, unsigned long *modtime);
+extern int os_file_modtime(const char *file, long long *modtime);
 extern int os_pipe(int *fd, int stream, int close_on_exec);
 extern int os_set_fd_async(int fd);
 extern int os_clear_fd_async(int fd);
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index c69d69ee96be..f5001481010c 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -103,6 +103,7 @@ SECTIONS
      be empty, which isn't pretty.  */
   . = ALIGN(32 / 8);
   .preinit_array     : { *(.preinit_array) }
+  .init_array     : { *(.init_array) }
   .fini_array     : { *(.fini_array) }
   .data           : {
     INIT_TASK_DATA(KERNEL_STACK_SIZE)
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 417ff647fb37..30885d0b94ac 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -96,6 +96,7 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
 				 pgd_t *pgd_base)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	int i, j;
@@ -107,7 +108,8 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
 	pgd = pgd_base + i;
 
 	for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
-		pud = pud_offset(pgd, vaddr);
+		p4d = p4d_offset(pgd, vaddr);
+		pud = pud_offset(p4d, vaddr);
 		if (pud_none(*pud))
 			one_md_table_init(pud);
 		pmd = pmd_offset(pud, vaddr);
@@ -124,6 +126,7 @@ static void __init fixaddr_user_init( void)
 #ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
 	long size = FIXADDR_USER_END - FIXADDR_USER_START;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -144,7 +147,8 @@ static void __init fixaddr_user_init( void)
 	for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE,
 		      p += PAGE_SIZE) {
 		pgd = swapper_pg_dir + pgd_index(vaddr);
-		pud = pud_offset(pgd, vaddr);
+		p4d = p4d_offset(pgd, vaddr);
+		pud = pud_offset(p4d, vaddr);
 		pmd = pmd_offset(pud, vaddr);
 		pte = pte_offset_kernel(pmd, vaddr);
 		pte_set_val(*pte, p, PAGE_READONLY);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 263a8f069133..17045e7211bf 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -153,8 +153,8 @@ void fork_handler(void)
 	userspace(&current->thread.regs.regs, current_thread_info()->aux_fp_regs);
 }
 
-int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long arg, struct task_struct * p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+		unsigned long arg, struct task_struct * p, unsigned long tls)
 {
 	void (*handler)(void);
 	int kthread = current->flags & PF_KTHREAD;
@@ -188,7 +188,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		 * Set a new TLS for the child thread?
 		 */
 		if (clone_flags & CLONE_SETTLS)
-			ret = arch_copy_tls(p);
+			ret = arch_set_tls(p, tls);
 	}
 
 	return ret;
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index b5e3d91fc9c2..3f0d9a573fd6 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -19,15 +19,21 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 			 unsigned long kernel)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 
 	pgd = pgd_offset(mm, proc);
-	pud = pud_alloc(mm, pgd, proc);
-	if (!pud)
+
+	p4d = p4d_alloc(mm, pgd, proc);
+	if (!p4d)
 		goto out;
 
+	pud = pud_alloc(mm, p4d, proc);
+	if (!pud)
+		goto out_pud;
+
 	pmd = pmd_alloc(mm, pud, proc);
 	if (!pmd)
 		goto out_pmd;
@@ -44,6 +50,8 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	pmd_free(mm, pmd);
  out_pmd:
 	pud_free(mm, pud);
+ out_pud:
+	p4d_free(mm, p4d);
  out:
 	return -ENOMEM;
 }
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index f574b1856bc6..40d90dddf3f1 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -35,7 +35,7 @@ void handle_syscall(struct uml_pt_regs *r)
 		goto out;
 
 	/* Do the seccomp check after ptrace; failures should be fast. */
-	if (secure_computing(NULL) == -1)
+	if (secure_computing() == -1)
 		goto out;
 
 	syscall = UPT_SYSCALL_NR(r);
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 3236052f20e6..d617f8dc9c19 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -17,6 +17,7 @@
 pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -27,7 +28,11 @@ pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
 	if (!pgd_present(*pgd))
 		return NULL;
 
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	if (!p4d_present(*p4d))
+		return NULL;
+
+	pud = pud_offset(p4d, addr);
 	if (!pud_present(*pud))
 		return NULL;
 
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index b7eaf655635c..80a358c6d652 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -277,7 +277,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
 	return ret;
 }
 
-static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
+static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
 				   unsigned long end,
 				   struct host_vm_change *hvc)
 {
@@ -285,7 +285,7 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
 	unsigned long next;
 	int ret = 0;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (!pud_present(*pud)) {
@@ -299,6 +299,28 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
 	return ret;
 }
 
+static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
+				   unsigned long end,
+				   struct host_vm_change *hvc)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	int ret = 0;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (!p4d_present(*p4d)) {
+			if (hvc->force || p4d_newpage(*p4d)) {
+				ret = add_munmap(addr, next - addr, hvc);
+				p4d_mkuptodate(*p4d);
+			}
+		} else
+			ret = update_pud_range(p4d, addr, next, hvc);
+	} while (p4d++, addr = next, ((addr < end) && !ret));
+	return ret;
+}
+
 void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
 		      unsigned long end_addr, int force)
 {
@@ -316,8 +338,8 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
 				ret = add_munmap(addr, next - addr, &hvc);
 				pgd_mkuptodate(*pgd);
 			}
-		}
-		else ret = update_pud_range(pgd, addr, next, &hvc);
+		} else
+			ret = update_p4d_range(pgd, addr, next, &hvc);
 	} while (pgd++, addr = next, ((addr < end_addr) && !ret));
 
 	if (!ret)
@@ -338,6 +360,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 {
 	struct mm_struct *mm;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -364,7 +387,23 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 			continue;
 		}
 
-		pud = pud_offset(pgd, addr);
+		p4d = p4d_offset(pgd, addr);
+		if (!p4d_present(*p4d)) {
+			last = ADD_ROUND(addr, P4D_SIZE);
+			if (last > end)
+				last = end;
+			if (p4d_newpage(*p4d)) {
+				updated = 1;
+				err = add_munmap(addr, last - addr, &hvc);
+				if (err < 0)
+					panic("munmap failed, errno = %d\n",
+					      -err);
+			}
+			addr = last;
+			continue;
+		}
+
+		pud = pud_offset(p4d, addr);
 		if (!pud_present(*pud)) {
 			last = ADD_ROUND(addr, PUD_SIZE);
 			if (last > end)
@@ -424,6 +463,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -437,7 +477,11 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 	if (!pgd_present(*pgd))
 		goto kill;
 
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		goto kill;
+
+	pud = pud_offset(p4d, address);
 	if (!pud_present(*pud))
 		goto kill;
 
@@ -490,35 +534,6 @@ kill:
 	force_sig(SIGKILL);
 }
 
-pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
-{
-	return pgd_offset(mm, address);
-}
-
-pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
-{
-	return pud_offset(pgd, address);
-}
-
-pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
-{
-	return pmd_offset(pud, address);
-}
-
-pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
-{
-	return pte_offset_kernel(pmd, address);
-}
-
-pte_t *addr_pte(struct task_struct *task, unsigned long addr)
-{
-	pgd_t *pgd = pgd_offset(task->mm, addr);
-	pud_t *pud = pud_offset(pgd, addr);
-	pmd_t *pmd = pmd_offset(pud, addr);
-
-	return pte_offset_map(pmd, addr);
-}
-
 void flush_tlb_all(void)
 {
 	/*
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index e62296c66c95..818553064f04 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -28,6 +28,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -104,7 +105,8 @@ good_area:
 		}
 
 		pgd = pgd_offset(mm, address);
-		pud = pud_offset(pgd, address);
+		p4d = p4d_offset(pgd, address);
+		pud = pud_offset(p4d, address);
 		pmd = pmd_offset(pud, address);
 		pte = pte_offset_kernel(pmd, address);
 	} while (!pte_present(*pte));
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 5133e3afb96f..fbda10535dab 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -341,7 +341,7 @@ int os_file_size(const char *file, unsigned long long *size_out)
 	return 0;
 }
 
-int os_file_modtime(const char *file, unsigned long *modtime)
+int os_file_modtime(const char *file, long long *modtime)
 {
 	struct uml_stat buf;
 	int err;
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 8014dfac644d..c8a42ecbd7a2 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -170,7 +170,7 @@ int __init main(int argc, char **argv, char **envp)
 	 * that they won't be delivered after the exec, when
 	 * they are definitely not expected.
 	 */
-	unblock_signals_trace();
+	unblock_signals();
 
 	os_info("\n");
 	/* Reboot */
diff --git a/arch/unicore32/include/asm/io.h b/arch/unicore32/include/asm/io.h
index c71aa4b95996..3ca74e1cde7d 100644
--- a/arch/unicore32/include/asm/io.h
+++ b/arch/unicore32/include/asm/io.h
@@ -18,10 +18,9 @@
 #include <asm-generic/io.h>
 
 /*
- * __uc32_ioremap and __uc32_ioremap_cached takes CPU physical address.
+ * __uc32_ioremap takes CPU physical address.
  */
 extern void __iomem *__uc32_ioremap(unsigned long, size_t);
-extern void __iomem *__uc32_ioremap_cached(unsigned long, size_t);
 extern void __uc32_iounmap(volatile void __iomem *addr);
 
 /*
@@ -32,8 +31,6 @@ extern void __uc32_iounmap(volatile void __iomem *addr);
  *
  */
 #define ioremap(cookie, size)		__uc32_ioremap(cookie, size)
-#define ioremap_cached(cookie, size)	__uc32_ioremap_cached(cookie, size)
-#define ioremap_nocache(cookie, size)	__uc32_ioremap(cookie, size)
 #define iounmap(cookie)			__uc32_iounmap(cookie)
 
 #define readb_relaxed readb
diff --git a/arch/unicore32/include/asm/vmalloc.h b/arch/unicore32/include/asm/vmalloc.h
new file mode 100644
index 000000000000..054435818a14
--- /dev/null
+++ b/arch/unicore32/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_UNICORE32_VMALLOC_H
+#define _ASM_UNICORE32_VMALLOC_H
+
+#endif /* _ASM_UNICORE32_VMALLOC_H */
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index 95ae3b54df68..0c4242a5ee1d 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -270,8 +270,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
 	conswitchp = &vga_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
 #endif
 #endif
 	early_trap_init();
diff --git a/arch/unicore32/kernel/vmlinux.lds.S b/arch/unicore32/kernel/vmlinux.lds.S
index 7abf90537cd5..6fb320b337ef 100644
--- a/arch/unicore32/kernel/vmlinux.lds.S
+++ b/arch/unicore32/kernel/vmlinux.lds.S
@@ -43,12 +43,11 @@ SECTIONS
 	_etext = .;
 
 	_sdata = .;
-	RO_DATA_SECTION(PAGE_SIZE)
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RO_DATA(PAGE_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 	_edata = .;
 
 	EXCEPTION_TABLE(L1_CACHE_BYTES)
-	NOTES
 
 	BSS_SECTION(0, 0, 0)
 	_end = .;
diff --git a/arch/unicore32/mm/ioremap.c b/arch/unicore32/mm/ioremap.c
index cf6d656f240c..46a64bd6156a 100644
--- a/arch/unicore32/mm/ioremap.c
+++ b/arch/unicore32/mm/ioremap.c
@@ -220,14 +220,6 @@ __uc32_ioremap(unsigned long phys_addr, size_t size)
 }
 EXPORT_SYMBOL(__uc32_ioremap);
 
-void __iomem *
-__uc32_ioremap_cached(unsigned long phys_addr, size_t size)
-{
-	return __uc32_ioremap_caller(phys_addr, size, MT_DEVICE_CACHED,
-			__builtin_return_address(0));
-}
-EXPORT_SYMBOL(__uc32_ioremap_cached);
-
 void __uc32_iounmap(volatile void __iomem *io_addr)
 {
 	void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1c25ca11236f..90288ab1b99e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,7 +24,7 @@ config X86_64
 	depends on 64BIT
 	# Options that are inherently 64-bit kernel only:
 	select ARCH_HAS_GIGANTIC_PAGE
-	select ARCH_SUPPORTS_INT128
+	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select HAVE_ARCH_SOFT_DIRTY
 	select MODULES_USE_ELF_RELA
@@ -73,7 +73,6 @@ config X86
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_PTE_DEVMAP		if X86_64
 	select ARCH_HAS_PTE_SPECIAL
-	select ARCH_HAS_REFCOUNT
 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
 	select ARCH_HAS_UACCESS_MCSAFE		if X86_64 && X86_MCE
 	select ARCH_HAS_SET_MEMORY
@@ -94,10 +93,11 @@ config X86
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+	select ARCH_WANT_DEFAULT_BPF_JIT	if X86_64
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANT_HUGE_PMD_SHARE
 	select ARCH_WANTS_THP_SWAP		if X86_64
-	select BUILDTIME_EXTABLE_SORT
+	select BUILDTIME_TABLE_SORT
 	select CLKEVT_I8253
 	select CLOCKSOURCE_VALIDATE_LAST_CYCLE
 	select CLOCKSOURCE_WATCHDOG
@@ -125,6 +125,7 @@ config X86
 	select GENERIC_STRNLEN_USER
 	select GENERIC_TIME_VSYSCALL
 	select GENERIC_GETTIMEOFDAY
+	select GENERIC_VDSO_TIME_NS
 	select GUP_GET_PTE_LOW_HIGH		if X86_PAE
 	select HARDLOCKUP_CHECK_TIMESTAMP	if X86_64
 	select HAVE_ACPI_APEI			if ACPI
@@ -135,6 +136,7 @@ config X86
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN			if X86_64
+	select HAVE_ARCH_KASAN_VMALLOC		if X86_64
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_MMAP_RND_BITS		if MMU
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS	if MMU && COMPAT
@@ -158,6 +160,7 @@ config X86
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
+	select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 	select HAVE_EBPF_JIT
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_EISA
@@ -438,8 +441,8 @@ config X86_MPPARSE
 	  (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
 
 config GOLDFISH
-       def_bool y
-       depends on X86_GOLDFISH
+	def_bool y
+	depends on X86_GOLDFISH
 
 config RETPOLINE
 	bool "Avoid speculative indirect branches in kernel"
@@ -455,6 +458,7 @@ config X86_CPU_RESCTRL
 	bool "x86 CPU resource control support"
 	depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
 	select KERNFS
+	select PROC_CPU_RESCTRL		if PROC_FS
 	help
 	  Enable x86 CPU resource control support.
 
@@ -476,7 +480,7 @@ config X86_BIGSMP
 	bool "Support for big SMP systems with more than 8 CPUs"
 	depends on SMP
 	---help---
-	  This option is needed for the systems that have more than 8 CPUs
+	  This option is needed for the systems that have more than 8 CPUs.
 
 config X86_EXTENDED_PLATFORM
 	bool "Support for extended (non-PC) x86 platforms"
@@ -560,9 +564,9 @@ config X86_UV
 # Please maintain the alphabetic order if and when there are additions
 
 config X86_GOLDFISH
-       bool "Goldfish (Virtual Platform)"
-       depends on X86_EXTENDED_PLATFORM
-       ---help---
+	bool "Goldfish (Virtual Platform)"
+	depends on X86_EXTENDED_PLATFORM
+	---help---
 	 Enable support for the Goldfish virtual platform used primarily
 	 for Android development. Unless you are building for the Android
 	 Goldfish emulator say N here.
@@ -708,7 +712,6 @@ config X86_SUPPORTS_MEMORY_FAILURE
 config STA2X11
 	bool "STA2X11 Companion Chip Support"
 	depends on X86_32_NON_STANDARD && PCI
-	select ARCH_HAS_PHYS_TO_DMA
 	select SWIOTLB
 	select MFD_STA2X11
 	select GPIOLIB
@@ -806,9 +809,9 @@ config KVM_GUEST
 	  timing infrastructure such as time of day, and system time
 
 config ARCH_CPUIDLE_HALTPOLL
-        def_bool n
-        prompt "Disable host haltpoll when loading haltpoll driver"
-        help
+	def_bool n
+	prompt "Disable host haltpoll when loading haltpoll driver"
+	help
 	  If virtualized under KVM, disable host haltpoll.
 
 config PVH
@@ -887,16 +890,16 @@ config HPET_EMULATE_RTC
 	depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
 
 config APB_TIMER
-       def_bool y if X86_INTEL_MID
-       prompt "Intel MID APB Timer Support" if X86_INTEL_MID
-       select DW_APB_TIMER
-       depends on X86_INTEL_MID && SFI
-       help
-         APB timer is the replacement for 8254, HPET on X86 MID platforms.
-         The APBT provides a stable time base on SMP
-         systems, unlike the TSC, but it is more expensive to access,
-         as it is off-chip. APB timers are always running regardless of CPU
-         C states, they are used as per CPU clockevent device when possible.
+	def_bool y if X86_INTEL_MID
+	prompt "Intel MID APB Timer Support" if X86_INTEL_MID
+	select DW_APB_TIMER
+	depends on X86_INTEL_MID && SFI
+	help
+	 APB timer is the replacement for 8254, HPET on X86 MID platforms.
+	 The APBT provides a stable time base on SMP
+	 systems, unlike the TSC, but it is more expensive to access,
+	 as it is off-chip. APB timers are always running regardless of CPU
+	 C states, they are used as per CPU clockevent device when possible.
 
 # Mark as expert because too many people got it wrong.
 # The code disables itself when not needed.
@@ -932,36 +935,6 @@ config GART_IOMMU
 
 	  If unsure, say Y.
 
-config CALGARY_IOMMU
-	bool "IBM Calgary IOMMU support"
-	select IOMMU_HELPER
-	select SWIOTLB
-	depends on X86_64 && PCI
-	---help---
-	  Support for hardware IOMMUs in IBM's xSeries x366 and x460
-	  systems. Needed to run systems with more than 3GB of memory
-	  properly with 32-bit PCI devices that do not support DAC
-	  (Double Address Cycle). Calgary also supports bus level
-	  isolation, where all DMAs pass through the IOMMU.  This
-	  prevents them from going anywhere except their intended
-	  destination. This catches hard-to-find kernel bugs and
-	  mis-behaving drivers and devices that do not use the DMA-API
-	  properly to set up their DMA buffers.  The IOMMU can be
-	  turned off at boot time with the iommu=off parameter.
-	  Normally the kernel will make the right choice by itself.
-	  If unsure, say Y.
-
-config CALGARY_IOMMU_ENABLED_BY_DEFAULT
-	def_bool y
-	prompt "Should Calgary be enabled by default?"
-	depends on CALGARY_IOMMU
-	---help---
-	  Should Calgary be enabled by default? if you choose 'y', Calgary
-	  will be used (if it exists). If you choose 'n', Calgary will not be
-	  used even if it exists. If you choose 'n' and would like to use
-	  Calgary anyway, pass 'iommu=calgary' on the kernel command line.
-	  If unsure, say Y.
-
 config MAXSMP
 	bool "Enable Maximum number of SMP Processors and NUMA Nodes"
 	depends on X86_64 && SMP && DEBUG_KERNEL
@@ -1000,8 +973,8 @@ config NR_CPUS_RANGE_END
 config NR_CPUS_RANGE_END
 	int
 	depends on X86_64
-	default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
-	default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+	default 8192 if  SMP && CPUMASK_OFFSTACK
+	default  512 if  SMP && !CPUMASK_OFFSTACK
 	default    1 if !SMP
 
 config NR_CPUS_DEFAULT
@@ -1065,8 +1038,8 @@ config SCHED_MC_PRIO
 	  If unsure say Y here.
 
 config UP_LATE_INIT
-       def_bool y
-       depends on !SMP && X86_LOCAL_APIC
+	def_bool y
+	depends on !SMP && X86_LOCAL_APIC
 
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors" if !PCI_MSI
@@ -1215,8 +1188,8 @@ config X86_LEGACY_VM86
 	  If unsure, say N here.
 
 config VM86
-       bool
-       default X86_LEGACY_VM86
+	bool
+	default X86_LEGACY_VM86
 
 config X86_16BIT
 	bool "Enable support for 16-bit segments" if EXPERT
@@ -1237,10 +1210,10 @@ config X86_ESPFIX64
 	depends on X86_16BIT && X86_64
 
 config X86_VSYSCALL_EMULATION
-       bool "Enable vsyscall emulation" if EXPERT
-       default y
-       depends on X86_64
-       ---help---
+	bool "Enable vsyscall emulation" if EXPERT
+	default y
+	depends on X86_64
+	---help---
 	 This enables emulation of the legacy vsyscall page.  Disabling
 	 it is roughly equivalent to booting with vsyscall=none, except
 	 that it will also disable the helpful warning if a program
@@ -1254,6 +1227,24 @@ config X86_VSYSCALL_EMULATION
 	 Disabling this option saves about 7K of kernel size and
 	 possibly 4K of additional runtime pagetable memory.
 
+config X86_IOPL_IOPERM
+	bool "IOPERM and IOPL Emulation"
+	default y
+	---help---
+	  This enables the ioperm() and iopl() syscalls which are necessary
+	  for legacy applications.
+
+	  Legacy IOPL support is an overbroad mechanism which allows user
+	  space aside of accessing all 65536 I/O ports also to disable
+	  interrupts. To gain this access the caller needs CAP_SYS_RAWIO
+	  capabilities and permission from potentially active security
+	  modules.
+
+	  The emulation restricts the functionality of the syscall to
+	  only allowing the full range I/O port access, but prevents the
+	  ability to disable interrupts from user space which would be
+	  granted if the hardware IOPL mechanism would be used.
+
 config TOSHIBA
 	tristate "Toshiba Laptop support"
 	depends on X86_32
@@ -1492,6 +1483,7 @@ config X86_PAE
 
 config X86_5LEVEL
 	bool "Enable 5-level page tables support"
+	default y
 	select DYNAMIC_MEMORY_LAYOUT
 	select SPARSEMEM_VMEMMAP
 	depends on X86_64
@@ -1523,7 +1515,7 @@ config X86_CPA_STATISTICS
 	bool "Enable statistic for Change Page Attribute"
 	depends on DEBUG_FS
 	---help---
-	  Expose statistics about the Change Page Attribute mechanims, which
+	  Expose statistics about the Change Page Attribute mechanism, which
 	  helps to determine the effectiveness of preserving large and huge
 	  page mappings when mapping protections are changed.
 
@@ -1554,12 +1546,12 @@ config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
 
 # Common NUMA Features
 config NUMA
-	bool "Numa Memory Allocation and Scheduler Support"
+	bool "NUMA Memory Allocation and Scheduler Support"
 	depends on SMP
 	depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
 	default y if X86_BIGSMP
 	---help---
-	  Enable NUMA (Non Uniform Memory Access) support.
+	  Enable NUMA (Non-Uniform Memory Access) support.
 
 	  The kernel will try to allocate memory used by a CPU on the
 	  local memory controller of the CPU and add some more
@@ -1659,9 +1651,9 @@ config ARCH_PROC_KCORE_TEXT
 	depends on X86_64 && PROC_KCORE
 
 config ILLEGAL_POINTER_VALUE
-       hex
-       default 0 if X86_32
-       default 0xdead000000000000 if X86_64
+	hex
+	default 0 if X86_32
+	default 0xdead000000000000 if X86_64
 
 config X86_PMEM_LEGACY_DEVICE
 	bool
@@ -1751,7 +1743,7 @@ config X86_RESERVE_LOW
 config MATH_EMULATION
 	bool
 	depends on MODIFY_LDT_SYSCALL
-	prompt "Math emulation" if X86_32
+	prompt "Math emulation" if X86_32 && (M486SX || MELAN)
 	---help---
 	  Linux can emulate a math coprocessor (used for floating point
 	  operations) if you don't have one. 486DX and Pentium processors have
@@ -1880,16 +1872,16 @@ config X86_SMAP
 
 	  If unsure, say Y.
 
-config X86_INTEL_UMIP
+config X86_UMIP
 	def_bool y
-	depends on CPU_SUP_INTEL
-	prompt "Intel User Mode Instruction Prevention" if EXPERT
+	depends on CPU_SUP_INTEL || CPU_SUP_AMD
+	prompt "User Mode Instruction Prevention" if EXPERT
 	---help---
-	  The User Mode Instruction Prevention (UMIP) is a security
-	  feature in newer Intel processors. If enabled, a general
-	  protection fault is issued if the SGDT, SLDT, SIDT, SMSW
-	  or STR instructions are executed in user mode. These instructions
-	  unnecessarily expose information about the hardware state.
+	  User Mode Instruction Prevention (UMIP) is a security feature in
+	  some x86 processors. If enabled, a general protection fault is
+	  issued if the SGDT, SLDT, SIDT, SMSW or STR instructions are
+	  executed in user mode. These instructions unnecessarily expose
+	  information about the hardware state.
 
 	  The vast majority of applications do not use these instructions.
 	  For the very few that do, software emulation is provided in
@@ -1974,11 +1966,12 @@ config EFI
 	  platforms.
 
 config EFI_STUB
-       bool "EFI stub support"
-       depends on EFI && !X86_USE_3DNOW
-       select RELOCATABLE
-       ---help---
-          This kernel feature allows a bzImage to be loaded directly
+	bool "EFI stub support"
+	depends on EFI && !X86_USE_3DNOW
+	depends on $(cc-option,-mabi=ms) || X86_32
+	select RELOCATABLE
+	---help---
+	  This kernel feature allows a bzImage to be loaded directly
 	  by EFI firmware without the use of a bootloader.
 
 	  See Documentation/admin-guide/efi-stub.rst for more information.
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8e29c991ba3e..bc3a497c029c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -50,12 +50,19 @@ choice
 	  See each option's help text for additional details. If you don't know
 	  what to do, choose "486".
 
+config M486SX
+	bool "486SX"
+	depends on X86_32
+	---help---
+	  Select this for an 486-class CPU without an FPU such as
+	  AMD/Cyrix/IBM/Intel SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5S.
+
 config M486
-	bool "486"
+	bool "486DX"
 	depends on X86_32
 	---help---
 	  Select this for an 486-class CPU such as AMD/Cyrix/IBM/Intel
-	  486DX/DX2/DX4 or SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
+	  486DX/DX2/DX4 and UMC U5D.
 
 config M586
 	bool "586/K5/5x86/6x86/6x86MX"
@@ -312,20 +319,20 @@ config X86_L1_CACHE_SHIFT
 	int
 	default "7" if MPENTIUM4 || MPSC
 	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
-	default "4" if MELAN || M486 || MGEODEGX1
+	default "4" if MELAN || M486SX || M486 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
 
 config X86_F00F_BUG
 	def_bool y
-	depends on M586MMX || M586TSC || M586 || M486
+	depends on M586MMX || M586TSC || M586 || M486SX || M486
 
 config X86_INVD_BUG
 	def_bool y
-	depends on M486
+	depends on M486SX || M486
 
 config X86_ALIGNMENT_16
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486SX || M486 || MVIAC3_2 || MGEODEGX1
 
 config X86_INTEL_USERCOPY
 	def_bool y
@@ -378,7 +385,15 @@ config X86_MINIMUM_CPU_FAMILY
 
 config X86_DEBUGCTLMSR
 	def_bool y
-	depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486) && !UML
+	depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486SX || M486) && !UML
+
+config IA32_FEAT_CTL
+	def_bool y
+	depends on CPU_SUP_INTEL || CPU_SUP_CENTAUR || CPU_SUP_ZHAOXIN
+
+config X86_VMX_FEATURE_NAMES
+	def_bool y
+	depends on IA32_FEAT_CTL && X86_FEATURE_NAMES
 
 menuconfig PROCESSOR_SELECT
 	bool "Supported processor vendors" if EXPERT
@@ -402,7 +417,7 @@ config CPU_SUP_INTEL
 config CPU_SUP_CYRIX_32
 	default y
 	bool "Support Cyrix processors" if PROCESSOR_SELECT
-	depends on M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
+	depends on M486SX || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
 	---help---
 	  This enables detection, tunings and quirks for Cyrix processors
 
@@ -470,7 +485,7 @@ config CPU_SUP_TRANSMETA_32
 config CPU_SUP_UMC_32
 	default y
 	bool "Support UMC processors" if PROCESSOR_SELECT
-	depends on M486 || (EXPERT && !64BIT)
+	depends on M486SX || M486 || (EXPERT && !64BIT)
 	---help---
 	  This enables detection, tunings and quirks for UMC processors
 
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index bf9cd83de777..c4eab8ed33a3 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -117,7 +117,7 @@ config DEBUG_WX
 
 config DOUBLEFAULT
 	default y
-	bool "Enable doublefault exception handler" if EXPERT
+	bool "Enable doublefault exception handler" if EXPERT && X86_32
 	---help---
 	  This option allows trapping of rare doublefault exceptions that
 	  would otherwise cause a system to silently reboot. Disabling this
@@ -316,10 +316,6 @@ config UNWINDER_FRAME_POINTER
 	  unwinder, but the kernel text size will grow by ~3% and the kernel's
 	  overall performance will degrade by roughly 5-10%.
 
-	  This option is recommended if you want to use the livepatch
-	  consistency model, as this is currently the only way to get a
-	  reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
-
 config UNWINDER_GUESS
 	bool "Guess unwinder"
 	depends on EXPERT
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 1f5faf8606b4..cd3056759880 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -10,6 +10,7 @@ else
 tune		= $(call cc-option,-mcpu=$(1),$(2))
 endif
 
+cflags-$(CONFIG_M486SX)		+= -march=i486
 cflags-$(CONFIG_M486)		+= -march=i486
 cflags-$(CONFIG_M586)		+= -march=i586
 cflags-$(CONFIG_M586TSC)	+= -march=i586
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index e2839b5c246c..748b6d28a91d 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -67,6 +67,7 @@ clean-files += cpustr.h
 
 KBUILD_CFLAGS	:= $(REALMODE_CFLAGS) -D_SETUP
 KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
+KBUILD_CFLAGS	+= $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
 GCOV_PROFILE := n
 UBSAN_SANITIZE := n
 
@@ -87,7 +88,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
 
 SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
 
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
 
 quiet_cmd_zoffset = ZOFFSET $@
       cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 6b84afdd7538..56aa5fa0a66b 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -38,6 +38,7 @@ KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
 KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
 KBUILD_CFLAGS += -Wno-pointer-sign
+KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
 
 KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
@@ -72,8 +73,8 @@ $(obj)/../voffset.h: vmlinux FORCE
 
 $(obj)/misc.o: $(obj)/../voffset.h
 
-vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
-	$(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
+vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/kernel_info.o $(obj)/head_$(BITS).o \
+	$(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
 	$(obj)/piggy.o $(obj)/cpuflags.o
 
 vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
@@ -88,7 +89,7 @@ vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
 
 $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
 
-vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
+vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o \
 	$(objtree)/drivers/firmware/efi/libstub/lib.a
 vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
 
@@ -102,7 +103,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
 quiet_cmd_check_data_rel = DATAREL $@
 define cmd_check_data_rel
 	for obj in $(filter %.o,$^); do \
-		${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \
+		$(READELF) -S $$obj | grep -qF .rel.local && { \
 			echo "error: $$obj has data relocations!" >&2; \
 			exit 1; \
 		} || true; \
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 82bc60c8acb2..287393d725f0 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -6,6 +6,8 @@
  *
  * ----------------------------------------------------------------------- */
 
+#pragma GCC visibility push(hidden)
+
 #include <linux/efi.h>
 #include <linux/pci.h>
 
@@ -19,32 +21,18 @@
 #include "eboot.h"
 
 static efi_system_table_t *sys_table;
+extern const bool efi_is64;
 
-static struct efi_config *efi_early;
-
-__pure const struct efi_config *__efi_early(void)
+__pure efi_system_table_t *efi_system_table(void)
 {
-	return efi_early;
-}
-
-#define BOOT_SERVICES(bits)						\
-static void setup_boot_services##bits(struct efi_config *c)		\
-{									\
-	efi_system_table_##bits##_t *table;				\
-									\
-	table = (typeof(table))sys_table;				\
-									\
-	c->runtime_services	= table->runtime;			\
-	c->boot_services	= table->boottime;			\
-	c->text_output		= table->con_out;			\
+	return sys_table;
 }
-BOOT_SERVICES(32);
-BOOT_SERVICES(64);
 
-void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
+__attribute_const__ bool efi_is_64bit(void)
 {
-	efi_call_proto(efi_simple_text_output_protocol, output_string,
-		       efi_early->text_output, str);
+	if (IS_ENABLED(CONFIG_EFI_MIXED))
+		return efi_is64;
+	return IS_ENABLED(CONFIG_X86_64);
 }
 
 static efi_status_t
@@ -63,17 +51,17 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
 	 * large romsize. The UEFI spec limits the size of option ROMs to 16
 	 * MiB so we reject any ROMs over 16 MiB in size to catch this.
 	 */
-	romimage = (void *)(unsigned long)efi_table_attr(efi_pci_io_protocol,
-							 romimage, pci);
-	romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci);
+	romimage = efi_table_attr(pci, romimage);
+	romsize = efi_table_attr(pci, romsize);
 	if (!romimage || !romsize || romsize > SZ_16M)
 		return EFI_INVALID_PARAMETER;
 
 	size = romsize + sizeof(*rom);
 
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
+			     (void **)&rom);
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to allocate memory for 'rom'\n");
+		efi_printk("Failed to allocate memory for 'rom'\n");
 		return status;
 	}
 
@@ -85,27 +73,24 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
 	rom->pcilen	= pci->romsize;
 	*__rom = rom;
 
-	status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
-				EfiPciIoWidthUint16, PCI_VENDOR_ID, 1,
-				&rom->vendor);
+	status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,
+				PCI_VENDOR_ID, 1, &rom->vendor);
 
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to read rom->vendor\n");
+		efi_printk("Failed to read rom->vendor\n");
 		goto free_struct;
 	}
 
-	status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
-				EfiPciIoWidthUint16, PCI_DEVICE_ID, 1,
-				&rom->devid);
+	status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,
+				PCI_DEVICE_ID, 1, &rom->devid);
 
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to read rom->devid\n");
+		efi_printk("Failed to read rom->devid\n");
 		goto free_struct;
 	}
 
-	status = efi_call_proto(efi_pci_io_protocol, get_location, pci,
-				&rom->segment, &rom->bus, &rom->device,
-				&rom->function);
+	status = efi_call_proto(pci, get_location, &rom->segment, &rom->bus,
+				&rom->device, &rom->function);
 
 	if (status != EFI_SUCCESS)
 		goto free_struct;
@@ -114,7 +99,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
 	return status;
 
 free_struct:
-	efi_call_early(free_pool, rom);
+	efi_bs_call(free_pool, rom);
 	return status;
 }
 
@@ -133,27 +118,24 @@ static void setup_efi_pci(struct boot_params *params)
 	void **pci_handle = NULL;
 	efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
 	unsigned long size = 0;
-	unsigned long nr_pci;
 	struct setup_data *data;
+	efi_handle_t h;
 	int i;
 
-	status = efi_call_early(locate_handle,
-				EFI_LOCATE_BY_PROTOCOL,
-				&pci_proto, NULL, &size, pci_handle);
+	status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+			     &pci_proto, NULL, &size, pci_handle);
 
 	if (status == EFI_BUFFER_TOO_SMALL) {
-		status = efi_call_early(allocate_pool,
-					EFI_LOADER_DATA,
-					size, (void **)&pci_handle);
+		status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
+				     (void **)&pci_handle);
 
 		if (status != EFI_SUCCESS) {
-			efi_printk(sys_table, "Failed to allocate memory for 'pci_handle'\n");
+			efi_printk("Failed to allocate memory for 'pci_handle'\n");
 			return;
 		}
 
-		status = efi_call_early(locate_handle,
-					EFI_LOCATE_BY_PROTOCOL, &pci_proto,
-					NULL, &size, pci_handle);
+		status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+				     &pci_proto, NULL, &size, pci_handle);
 	}
 
 	if (status != EFI_SUCCESS)
@@ -164,15 +146,12 @@ static void setup_efi_pci(struct boot_params *params)
 	while (data && data->next)
 		data = (struct setup_data *)(unsigned long)data->next;
 
-	nr_pci = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
-	for (i = 0; i < nr_pci; i++) {
+	for_each_efi_handle(h, pci_handle, size, i) {
 		efi_pci_io_protocol_t *pci = NULL;
 		struct pci_setup_rom *rom;
 
-		status = efi_call_early(handle_protocol,
-					efi_is_64bit() ? ((u64 *)pci_handle)[i]
-						       : ((u32 *)pci_handle)[i],
-					&pci_proto, (void **)&pci);
+		status = efi_bs_call(handle_protocol, h, &pci_proto,
+				     (void **)&pci);
 		if (status != EFI_SUCCESS || !pci)
 			continue;
 
@@ -189,7 +168,7 @@ static void setup_efi_pci(struct boot_params *params)
 	}
 
 free_handle:
-	efi_call_early(free_pool, pci_handle);
+	efi_bs_call(free_pool, pci_handle);
 }
 
 static void retrieve_apple_device_properties(struct boot_params *boot_params)
@@ -198,34 +177,34 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
 	struct setup_data *data, *new;
 	efi_status_t status;
 	u32 size = 0;
-	void *p;
+	apple_properties_protocol_t *p;
 
-	status = efi_call_early(locate_protocol, &guid, NULL, &p);
+	status = efi_bs_call(locate_protocol, &guid, NULL, (void **)&p);
 	if (status != EFI_SUCCESS)
 		return;
 
-	if (efi_table_attr(apple_properties_protocol, version, p) != 0x10000) {
-		efi_printk(sys_table, "Unsupported properties proto version\n");
+	if (efi_table_attr(p, version) != 0x10000) {
+		efi_printk("Unsupported properties proto version\n");
 		return;
 	}
 
-	efi_call_proto(apple_properties_protocol, get_all, p, NULL, &size);
+	efi_call_proto(p, get_all, NULL, &size);
 	if (!size)
 		return;
 
 	do {
-		status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-					size + sizeof(struct setup_data), &new);
+		status = efi_bs_call(allocate_pool, EFI_LOADER_DATA,
+				     size + sizeof(struct setup_data),
+				     (void **)&new);
 		if (status != EFI_SUCCESS) {
-			efi_printk(sys_table, "Failed to allocate memory for 'properties'\n");
+			efi_printk("Failed to allocate memory for 'properties'\n");
 			return;
 		}
 
-		status = efi_call_proto(apple_properties_protocol, get_all, p,
-					new->data, &size);
+		status = efi_call_proto(p, get_all, new->data, &size);
 
 		if (status == EFI_BUFFER_TOO_SMALL)
-			efi_call_early(free_pool, new);
+			efi_bs_call(free_pool, new);
 	} while (status == EFI_BUFFER_TOO_SMALL);
 
 	new->type = SETUP_APPLE_PROPERTIES;
@@ -247,7 +226,7 @@ static const efi_char16_t apple[] = L"Apple";
 static void setup_quirks(struct boot_params *boot_params)
 {
 	efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
-		efi_table_attr(efi_system_table, fw_vendor, sys_table);
+		efi_table_attr(efi_system_table(), fw_vendor);
 
 	if (!memcmp(fw_vendor, apple, sizeof(apple))) {
 		if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
@@ -265,17 +244,16 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
 	u32 width, height;
 	void **uga_handle = NULL;
 	efi_uga_draw_protocol_t *uga = NULL, *first_uga;
-	unsigned long nr_ugas;
+	efi_handle_t handle;
 	int i;
 
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-				size, (void **)&uga_handle);
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
+			     (void **)&uga_handle);
 	if (status != EFI_SUCCESS)
 		return status;
 
-	status = efi_call_early(locate_handle,
-				EFI_LOCATE_BY_PROTOCOL,
-				uga_proto, NULL, &size, uga_handle);
+	status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+			     uga_proto, NULL, &size, uga_handle);
 	if (status != EFI_SUCCESS)
 		goto free_handle;
 
@@ -283,24 +261,20 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
 	width = 0;
 
 	first_uga = NULL;
-	nr_ugas = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
-	for (i = 0; i < nr_ugas; i++) {
+	for_each_efi_handle(handle, uga_handle, size, i) {
 		efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
 		u32 w, h, depth, refresh;
 		void *pciio;
-		unsigned long handle = efi_is_64bit() ? ((u64 *)uga_handle)[i]
-						      : ((u32 *)uga_handle)[i];
 
-		status = efi_call_early(handle_protocol, handle,
-					uga_proto, (void **)&uga);
+		status = efi_bs_call(handle_protocol, handle, uga_proto,
+				     (void **)&uga);
 		if (status != EFI_SUCCESS)
 			continue;
 
 		pciio = NULL;
-		efi_call_early(handle_protocol, handle, &pciio_proto, &pciio);
+		efi_bs_call(handle_protocol, handle, &pciio_proto, &pciio);
 
-		status = efi_call_proto(efi_uga_draw_protocol, get_mode, uga,
-					&w, &h, &depth, &refresh);
+		status = efi_call_proto(uga, get_mode, &w, &h, &depth, &refresh);
 		if (status == EFI_SUCCESS && (!first_uga || pciio)) {
 			width = w;
 			height = h;
@@ -336,7 +310,7 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
 	si->rsvd_pos		= 24;
 
 free_handle:
-	efi_call_early(free_pool, uga_handle);
+	efi_bs_call(free_pool, uga_handle);
 
 	return status;
 }
@@ -355,37 +329,38 @@ void setup_graphics(struct boot_params *boot_params)
 	memset(si, 0, sizeof(*si));
 
 	size = 0;
-	status = efi_call_early(locate_handle,
-				EFI_LOCATE_BY_PROTOCOL,
-				&graphics_proto, NULL, &size, gop_handle);
+	status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+			     &graphics_proto, NULL, &size, gop_handle);
 	if (status == EFI_BUFFER_TOO_SMALL)
-		status = efi_setup_gop(NULL, si, &graphics_proto, size);
+		status = efi_setup_gop(si, &graphics_proto, size);
 
 	if (status != EFI_SUCCESS) {
 		size = 0;
-		status = efi_call_early(locate_handle,
-					EFI_LOCATE_BY_PROTOCOL,
-					&uga_proto, NULL, &size, uga_handle);
+		status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+				     &uga_proto, NULL, &size, uga_handle);
 		if (status == EFI_BUFFER_TOO_SMALL)
 			setup_uga(si, &uga_proto, size);
 	}
 }
 
+void startup_32(struct boot_params *boot_params);
+
+void __noreturn efi_stub_entry(efi_handle_t handle,
+			       efi_system_table_t *sys_table_arg,
+			       struct boot_params *boot_params);
+
 /*
  * Because the x86 boot code expects to be passed a boot_params we
  * need to create one ourselves (usually the bootloader would create
  * one for us).
- *
- * The caller is responsible for filling out ->code32_start in the
- * returned boot_params.
  */
-struct boot_params *make_boot_params(struct efi_config *c)
+efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
+				   efi_system_table_t *sys_table_arg)
 {
 	struct boot_params *boot_params;
 	struct apm_bios_info *bi;
 	struct setup_header *hdr;
 	efi_loaded_image_t *image;
-	void *handle;
 	efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
 	int options_size = 0;
 	efi_status_t status;
@@ -393,31 +368,22 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	unsigned long ramdisk_addr;
 	unsigned long ramdisk_size;
 
-	efi_early = c;
-	sys_table = (efi_system_table_t *)(unsigned long)efi_early->table;
-	handle = (void *)(unsigned long)efi_early->image_handle;
+	sys_table = sys_table_arg;
 
 	/* Check if we were booted by the EFI firmware */
 	if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
-		return NULL;
-
-	if (efi_is_64bit())
-		setup_boot_services64(efi_early);
-	else
-		setup_boot_services32(efi_early);
+		return EFI_INVALID_PARAMETER;
 
-	status = efi_call_early(handle_protocol, handle,
-				&proto, (void *)&image);
+	status = efi_bs_call(handle_protocol, handle, &proto, (void *)&image);
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
-		return NULL;
+		efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
+		return status;
 	}
 
-	status = efi_low_alloc(sys_table, 0x4000, 1,
-			       (unsigned long *)&boot_params);
+	status = efi_low_alloc(0x4000, 1, (unsigned long *)&boot_params);
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to allocate lowmem for boot params\n");
-		return NULL;
+		efi_printk("Failed to allocate lowmem for boot params\n");
+		return status;
 	}
 
 	memset(boot_params, 0x0, 0x4000);
@@ -439,7 +405,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	hdr->type_of_loader = 0x21;
 
 	/* Convert unicode cmdline to ascii */
-	cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
+	cmdline_ptr = efi_convert_cmdline(image, &options_size);
 	if (!cmdline_ptr)
 		goto fail;
 
@@ -457,15 +423,15 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	if (status != EFI_SUCCESS)
 		goto fail2;
 
-	status = handle_cmdline_files(sys_table, image,
+	status = handle_cmdline_files(image,
 				      (char *)(unsigned long)hdr->cmd_line_ptr,
 				      "initrd=", hdr->initrd_addr_max,
 				      &ramdisk_addr, &ramdisk_size);
 
 	if (status != EFI_SUCCESS &&
 	    hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) {
-		efi_printk(sys_table, "Trying to load files to higher address\n");
-		status = handle_cmdline_files(sys_table, image,
+		efi_printk("Trying to load files to higher address\n");
+		status = handle_cmdline_files(image,
 				      (char *)(unsigned long)hdr->cmd_line_ptr,
 				      "initrd=", -1UL,
 				      &ramdisk_addr, &ramdisk_size);
@@ -478,14 +444,17 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32;
 	boot_params->ext_ramdisk_size  = (u64)ramdisk_size >> 32;
 
-	return boot_params;
+	hdr->code32_start = (u32)(unsigned long)startup_32;
+
+	efi_stub_entry(handle, sys_table, boot_params);
+	/* not reached */
 
 fail2:
-	efi_free(sys_table, options_size, hdr->cmd_line_ptr);
+	efi_free(options_size, hdr->cmd_line_ptr);
 fail:
-	efi_free(sys_table, 0x4000, (unsigned long)boot_params);
+	efi_free(0x4000, (unsigned long)boot_params);
 
-	return NULL;
+	return status;
 }
 
 static void add_e820ext(struct boot_params *params,
@@ -554,7 +523,11 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s
 		case EFI_BOOT_SERVICES_CODE:
 		case EFI_BOOT_SERVICES_DATA:
 		case EFI_CONVENTIONAL_MEMORY:
-			e820_type = E820_TYPE_RAM;
+			if (efi_soft_reserve_enabled() &&
+			    (d->attribute & EFI_MEMORY_SP))
+				e820_type = E820_TYPE_SOFT_RESERVED;
+			else
+				e820_type = E820_TYPE_RAM;
 			break;
 
 		case EFI_ACPI_MEMORY_NVS:
@@ -616,13 +589,13 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
 		sizeof(struct e820_entry) * nr_desc;
 
 	if (*e820ext) {
-		efi_call_early(free_pool, *e820ext);
+		efi_bs_call(free_pool, *e820ext);
 		*e820ext = NULL;
 		*e820ext_size = 0;
 	}
 
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-				size, (void **)e820ext);
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
+			     (void **)e820ext);
 	if (status == EFI_SUCCESS)
 		*e820ext_size = size;
 
@@ -646,7 +619,7 @@ static efi_status_t allocate_e820(struct boot_params *params,
 	boot_map.key_ptr	= NULL;
 	boot_map.buff_size	= &buff_size;
 
-	status = efi_get_memory_map(sys_table, &boot_map);
+	status = efi_get_memory_map(&boot_map);
 	if (status != EFI_SUCCESS)
 		return status;
 
@@ -668,8 +641,7 @@ struct exit_boot_struct {
 	struct efi_info		*efi;
 };
 
-static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
-				   struct efi_boot_memmap *map,
+static efi_status_t exit_boot_func(struct efi_boot_memmap *map,
 				   void *priv)
 {
 	const char *signature;
@@ -679,14 +651,14 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
 				   : EFI32_LOADER_SIGNATURE;
 	memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
 
-	p->efi->efi_systab		= (unsigned long)sys_table_arg;
+	p->efi->efi_systab		= (unsigned long)efi_system_table();
 	p->efi->efi_memdesc_size	= *map->desc_size;
 	p->efi->efi_memdesc_version	= *map->desc_ver;
 	p->efi->efi_memmap		= (unsigned long)*map->map;
 	p->efi->efi_memmap_size		= *map->map_size;
 
 #ifdef CONFIG_X86_64
-	p->efi->efi_systab_hi		= (unsigned long)sys_table_arg >> 32;
+	p->efi->efi_systab_hi		= (unsigned long)efi_system_table() >> 32;
 	p->efi->efi_memmap_hi		= (unsigned long)*map->map >> 32;
 #endif
 
@@ -718,8 +690,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
 		return status;
 
 	/* Might as well exit boot services now */
-	status = efi_exit_boot_services(sys_table, handle, &map, &priv,
-					exit_boot_func);
+	status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func);
 	if (status != EFI_SUCCESS)
 		return status;
 
@@ -737,33 +708,22 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
  * On success we return a pointer to a boot_params structure, and NULL
  * on failure.
  */
-struct boot_params *
-efi_main(struct efi_config *c, struct boot_params *boot_params)
+struct boot_params *efi_main(efi_handle_t handle,
+			     efi_system_table_t *sys_table_arg,
+			     struct boot_params *boot_params)
 {
 	struct desc_ptr *gdt = NULL;
 	struct setup_header *hdr = &boot_params->hdr;
 	efi_status_t status;
 	struct desc_struct *desc;
-	void *handle;
-	efi_system_table_t *_table;
 	unsigned long cmdline_paddr;
 
-	efi_early = c;
-
-	_table = (efi_system_table_t *)(unsigned long)efi_early->table;
-	handle = (void *)(unsigned long)efi_early->image_handle;
-
-	sys_table = _table;
+	sys_table = sys_table_arg;
 
 	/* Check if we were booted by the EFI firmware */
 	if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
 		goto fail;
 
-	if (efi_is_64bit())
-		setup_boot_services64(efi_early);
-	else
-		setup_boot_services32(efi_early);
-
 	/*
 	 * make_boot_params() may have been called before efi_main(), in which
 	 * case this is the second time we parse the cmdline. This is ok,
@@ -778,11 +738,14 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
 	 * otherwise we ask the BIOS.
 	 */
 	if (boot_params->secure_boot == efi_secureboot_mode_unset)
-		boot_params->secure_boot = efi_get_secureboot(sys_table);
+		boot_params->secure_boot = efi_get_secureboot();
 
 	/* Ask the firmware to clear memory on unclean shutdown */
-	efi_enable_reset_attack_mitigation(sys_table);
-	efi_retrieve_tpm2_eventlog(sys_table);
+	efi_enable_reset_attack_mitigation();
+
+	efi_random_get_seed();
+
+	efi_retrieve_tpm2_eventlog();
 
 	setup_graphics(boot_params);
 
@@ -790,18 +753,17 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
 
 	setup_quirks(boot_params);
 
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-				sizeof(*gdt), (void **)&gdt);
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(*gdt),
+			     (void **)&gdt);
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to allocate memory for 'gdt' structure\n");
+		efi_printk("Failed to allocate memory for 'gdt' structure\n");
 		goto fail;
 	}
 
 	gdt->size = 0x800;
-	status = efi_low_alloc(sys_table, gdt->size, 8,
-			   (unsigned long *)&gdt->address);
+	status = efi_low_alloc(gdt->size, 8, (unsigned long *)&gdt->address);
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n");
+		efi_printk("Failed to allocate memory for 'gdt'\n");
 		goto fail;
 	}
 
@@ -811,13 +773,13 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
 	 */
 	if (hdr->pref_address != hdr->code32_start) {
 		unsigned long bzimage_addr = hdr->code32_start;
-		status = efi_relocate_kernel(sys_table, &bzimage_addr,
+		status = efi_relocate_kernel(&bzimage_addr,
 					     hdr->init_size, hdr->init_size,
 					     hdr->pref_address,
 					     hdr->kernel_alignment,
 					     LOAD_PHYSICAL_ADDR);
 		if (status != EFI_SUCCESS) {
-			efi_printk(sys_table, "efi_relocate_kernel() failed!\n");
+			efi_printk("efi_relocate_kernel() failed!\n");
 			goto fail;
 		}
 
@@ -827,7 +789,7 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
 
 	status = exit_boot(boot_params, handle);
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "exit_boot() failed!\n");
+		efi_printk("exit_boot() failed!\n");
 		goto fail;
 	}
 
@@ -920,7 +882,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
 
 	return boot_params;
 fail:
-	efi_printk(sys_table, "efi_main() failed!\n");
+	efi_printk("efi_main() failed!\n");
 
-	return NULL;
+	for (;;)
+		asm("hlt");
 }
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index 8297387c4676..99f35343d443 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -12,22 +12,20 @@
 
 #define DESC_TYPE_CODE_DATA	(1 << 0)
 
-typedef struct {
-	u32 get_mode;
-	u32 set_mode;
-	u32 blt;
-} efi_uga_draw_protocol_32_t;
+typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t;
 
-typedef struct {
-	u64 get_mode;
-	u64 set_mode;
-	u64 blt;
-} efi_uga_draw_protocol_64_t;
-
-typedef struct {
-	void *get_mode;
-	void *set_mode;
-	void *blt;
-} efi_uga_draw_protocol_t;
+union efi_uga_draw_protocol {
+	struct {
+		efi_status_t (__efiapi *get_mode)(efi_uga_draw_protocol_t *,
+						  u32*, u32*, u32*, u32*);
+		void *set_mode;
+		void *blt;
+	};
+	struct {
+		u32 get_mode;
+		u32 set_mode;
+		u32 blt;
+	} mixed_mode;
+};
 
 #endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S
deleted file mode 100644
index 257e341fd2c8..000000000000
--- a/arch/x86/boot/compressed/efi_stub_32.S
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * EFI call stub for IA32.
- *
- * This stub allows us to make EFI calls in physical mode with interrupts
- * turned off. Note that this implementation is different from the one in
- * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical
- * mode at this point.
- */
-
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-
-/*
- * efi_call_phys(void *, ...) is a function with variable parameters.
- * All the callers of this function assure that all the parameters are 4-bytes.
- */
-
-/*
- * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
- * So we'd better save all of them at the beginning of this function and restore
- * at the end no matter how many we use, because we can not assure EFI runtime
- * service functions will comply with gcc calling convention, too.
- */
-
-.text
-ENTRY(efi_call_phys)
-	/*
-	 * 0. The function can only be called in Linux kernel. So CS has been
-	 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
-	 * the values of these registers are the same. And, the corresponding
-	 * GDT entries are identical. So I will do nothing about segment reg
-	 * and GDT, but change GDT base register in prelog and epilog.
-	 */
-
-	/*
-	 * 1. Because we haven't been relocated by this point we need to
-	 * use relative addressing.
-	 */
-	call	1f
-1:	popl	%edx
-	subl	$1b, %edx
-
-	/*
-	 * 2. Now on the top of stack is the return
-	 * address in the caller of efi_call_phys(), then parameter 1,
-	 * parameter 2, ..., param n. To make things easy, we save the return
-	 * address of efi_call_phys in a global variable.
-	 */
-	popl	%ecx
-	movl	%ecx, saved_return_addr(%edx)
-	/* get the function pointer into ECX*/
-	popl	%ecx
-	movl	%ecx, efi_rt_function_ptr(%edx)
-
-	/*
-	 * 3. Call the physical function.
-	 */
-	call	*%ecx
-
-	/*
-	 * 4. Balance the stack. And because EAX contain the return value,
-	 * we'd better not clobber it. We need to calculate our address
-	 * again because %ecx and %edx are not preserved across EFI function
-	 * calls.
-	 */
-	call	1f
-1:	popl	%edx
-	subl	$1b, %edx
-
-	movl	efi_rt_function_ptr(%edx), %ecx
-	pushl	%ecx
-
-	/*
-	 * 10. Push the saved return address onto the stack and return.
-	 */
-	movl	saved_return_addr(%edx), %ecx
-	pushl	%ecx
-	ret
-ENDPROC(efi_call_phys)
-.previous
-
-.data
-saved_return_addr:
-	.long 0
-efi_rt_function_ptr:
-	.long 0
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S
deleted file mode 100644
index 99494dff2113..000000000000
--- a/arch/x86/boot/compressed/efi_stub_64.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#include <asm/segment.h>
-#include <asm/msr.h>
-#include <asm/processor-flags.h>
-
-#include "../../platform/efi/efi_stub_64.S"
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
index bff9ab7c6317..8fb7f6799c52 100644
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -10,7 +10,7 @@
  * needs to be able to service interrupts.
  *
  * On the plus side, we don't have to worry about mangling 64-bit
- * addresses into 32-bits because we're executing with an identify
+ * addresses into 32-bits because we're executing with an identity
  * mapped pagetable and haven't transitioned to 64-bit virtual addresses
  * yet.
  */
@@ -23,16 +23,13 @@
 
 	.code64
 	.text
-ENTRY(efi64_thunk)
+SYM_FUNC_START(__efi64_thunk)
 	push	%rbp
 	push	%rbx
 
-	subq	$8, %rsp
-	leaq	efi_exit32(%rip), %rax
-	movl	%eax, 4(%rsp)
-	leaq	efi_gdt64(%rip), %rax
-	movl	%eax, (%rsp)
-	movl	%eax, 2(%rax)		/* Fixup the gdt base address */
+	leaq	1f(%rip), %rbp
+	leaq	efi_gdt64(%rip), %rbx
+	movl	%ebx, 2(%rbx)		/* Fixup the gdt base address */
 
 	movl	%ds, %eax
 	push	%rax
@@ -48,15 +45,10 @@ ENTRY(efi64_thunk)
 	movl	%esi, 0x0(%rsp)
 	movl	%edx, 0x4(%rsp)
 	movl	%ecx, 0x8(%rsp)
-	movq	%r8, %rsi
-	movl	%esi, 0xc(%rsp)
-	movq	%r9, %rsi
-	movl	%esi,  0x10(%rsp)
+	movl	%r8d, 0xc(%rsp)
+	movl	%r9d, 0x10(%rsp)
 
-	sgdt	save_gdt(%rip)
-
-	leaq	1f(%rip), %rbx
-	movq	%rbx, func_rt_ptr(%rip)
+	sgdt	0x14(%rsp)
 
 	/*
 	 * Switch to gdt with 32-bit segments. This is the firmware GDT
@@ -71,9 +63,9 @@ ENTRY(efi64_thunk)
 	pushq	%rax
 	lretq
 
-1:	addq	$32, %rsp
-
-	lgdt	save_gdt(%rip)
+1:	lgdt	0x14(%rsp)
+	addq	$32, %rsp
+	movq	%rdi, %rax
 
 	pop	%rbx
 	movl	%ebx, %ss
@@ -85,26 +77,13 @@ ENTRY(efi64_thunk)
 	/*
 	 * Convert 32-bit status code into 64-bit.
 	 */
-	test	%rax, %rax
-	jz	1f
-	movl	%eax, %ecx
-	andl	$0x0fffffff, %ecx
-	andl	$0xf0000000, %eax
-	shl	$32, %rax
-	or	%rcx, %rax
-1:
-	addq	$8, %rsp
+	roll	$1, %eax
+	rorq	$1, %rax
+
 	pop	%rbx
 	pop	%rbp
 	ret
-ENDPROC(efi64_thunk)
-
-ENTRY(efi_exit32)
-	movq	func_rt_ptr(%rip), %rax
-	push	%rax
-	mov	%rdi, %rax
-	ret
-ENDPROC(efi_exit32)
+SYM_FUNC_END(__efi64_thunk)
 
 	.code32
 /*
@@ -112,7 +91,7 @@ ENDPROC(efi_exit32)
  *
  * The stack should represent the 32-bit calling convention.
  */
-ENTRY(efi_enter32)
+SYM_FUNC_START_LOCAL(efi_enter32)
 	movl	$__KERNEL_DS, %eax
 	movl	%eax, %ds
 	movl	%eax, %es
@@ -144,9 +123,7 @@ ENTRY(efi_enter32)
 	 */
 	cli
 
-	movl	56(%esp), %eax
-	movl	%eax, 2(%eax)
-	lgdtl	(%eax)
+	lgdtl	(%ebx)
 
 	movl	%cr4, %eax
 	btsl	$(X86_CR4_PAE_BIT), %eax
@@ -163,29 +140,24 @@ ENTRY(efi_enter32)
 	xorl	%eax, %eax
 	lldt	%ax
 
-	movl	60(%esp), %eax
 	pushl	$__KERNEL_CS
-	pushl	%eax
+	pushl	%ebp
 
 	/* Enable paging */
 	movl	%cr0, %eax
 	btsl	$X86_CR0_PG_BIT, %eax
 	movl	%eax, %cr0
 	lret
-ENDPROC(efi_enter32)
+SYM_FUNC_END(efi_enter32)
 
 	.data
 	.balign	8
-	.global	efi32_boot_gdt
-efi32_boot_gdt:	.word	0
-		.quad	0
-
-save_gdt:	.word	0
-		.quad	0
-func_rt_ptr:	.quad	0
+SYM_DATA_START(efi32_boot_gdt)
+	.word	0
+	.quad	0
+SYM_DATA_END(efi32_boot_gdt)
 
-	.global efi_gdt64
-efi_gdt64:
+SYM_DATA_START(efi_gdt64)
 	.word	efi_gdt64_end - efi_gdt64
 	.long	0			/* Filled out by user */
 	.word	0
@@ -194,4 +166,4 @@ efi_gdt64:
 	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
 	.quad	0x0080890000000000	/* TS descriptor */
 	.quad   0x0000000000000000	/* TS continued */
-efi_gdt64_end:
+SYM_DATA_END_LABEL(efi_gdt64, SYM_L_LOCAL, efi_gdt64_end)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 5e30eaaf8576..73f17d0544dd 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -61,7 +61,7 @@
 	.hidden _egot
 
 	__HEAD
-ENTRY(startup_32)
+SYM_FUNC_START(startup_32)
 	cld
 	/*
 	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
@@ -142,74 +142,23 @@ ENTRY(startup_32)
  */
 	leal	.Lrelocated(%ebx), %eax
 	jmp	*%eax
-ENDPROC(startup_32)
+SYM_FUNC_END(startup_32)
 
 #ifdef CONFIG_EFI_STUB
-/*
- * We don't need the return address, so set up the stack so efi_main() can find
- * its arguments.
- */
-ENTRY(efi_pe_entry)
-	add	$0x4, %esp
-
-	call	1f
-1:	popl	%esi
-	subl	$1b, %esi
-
-	popl	%ecx
-	movl	%ecx, efi32_config(%esi)	/* Handle */
-	popl	%ecx
-	movl	%ecx, efi32_config+8(%esi)	/* EFI System table pointer */
-
-	/* Relocate efi_config->call() */
-	leal	efi32_config(%esi), %eax
-	add	%esi, 40(%eax)
-	pushl	%eax
-
-	call	make_boot_params
-	cmpl	$0, %eax
-	je	fail
-	movl	%esi, BP_code32_start(%eax)
-	popl	%ecx
-	pushl	%eax
-	pushl	%ecx
-	jmp	2f		/* Skip efi_config initialization */
-ENDPROC(efi_pe_entry)
-
-ENTRY(efi32_stub_entry)
+SYM_FUNC_START(efi32_stub_entry)
+SYM_FUNC_START_ALIAS(efi_stub_entry)
 	add	$0x4, %esp
-	popl	%ecx
-	popl	%edx
-
-	call	1f
-1:	popl	%esi
-	subl	$1b, %esi
-
-	movl	%ecx, efi32_config(%esi)	/* Handle */
-	movl	%edx, efi32_config+8(%esi)	/* EFI System table pointer */
-
-	/* Relocate efi_config->call() */
-	leal	efi32_config(%esi), %eax
-	add	%esi, 40(%eax)
-	pushl	%eax
-2:
 	call	efi_main
-	cmpl	$0, %eax
 	movl	%eax, %esi
-	jne	2f
-fail:
-	/* EFI init failed, so hang. */
-	hlt
-	jmp	fail
-2:
 	movl	BP_code32_start(%esi), %eax
 	leal	startup_32(%eax), %eax
 	jmp	*%eax
-ENDPROC(efi32_stub_entry)
+SYM_FUNC_END(efi32_stub_entry)
+SYM_FUNC_END_ALIAS(efi_stub_entry)
 #endif
 
 	.text
-.Lrelocated:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 
 /*
  * Clear BSS (stack is currently empty)
@@ -240,11 +189,9 @@ ENDPROC(efi32_stub_entry)
 				/* push arguments for extract_kernel: */
 	pushl	$z_output_len	/* decompressed length, end of relocs */
 
-	movl    BP_init_size(%esi), %eax
-	subl    $_end, %eax
-	movl    %ebx, %ebp
-	subl    %eax, %ebp
-	pushl	%ebp		/* output address */
+	leal	_end(%ebx), %eax
+	subl    BP_init_size(%esi), %eax
+	pushl	%eax		/* output address */
 
 	pushl	$z_input_len	/* input_len */
 	leal	input_data(%ebx), %eax
@@ -260,15 +207,7 @@ ENDPROC(efi32_stub_entry)
  */
 	xorl	%ebx, %ebx
 	jmp	*%eax
-
-#ifdef CONFIG_EFI_STUB
-	.data
-efi32_config:
-	.fill 5,8,0
-	.long efi_call_phys
-	.long 0
-	.byte 0
-#endif
+SYM_FUNC_END(.Lrelocated)
 
 /*
  * Stack and heap for uncompression
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d98cd483377e..1f1f6c8139b3 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -45,7 +45,7 @@
 
 	__HEAD
 	.code32
-ENTRY(startup_32)
+SYM_FUNC_START(startup_32)
 	/*
 	 * 32bit entry is 0 and it is ABI so immutable!
 	 * If we come here directly from a bootloader,
@@ -208,10 +208,12 @@ ENTRY(startup_32)
 	pushl	$__KERNEL_CS
 	leal	startup_64(%ebp), %eax
 #ifdef CONFIG_EFI_MIXED
-	movl	efi32_config(%ebp), %ebx
-	cmp	$0, %ebx
+	movl	efi32_boot_args(%ebp), %edi
+	cmp	$0, %edi
 	jz	1f
-	leal	handover_entry(%ebp), %eax
+	leal	efi64_stub_entry(%ebp), %eax
+	movl	%esi, %edx
+	movl	efi32_boot_args+4(%ebp), %esi
 1:
 #endif
 	pushl	%eax
@@ -222,35 +224,37 @@ ENTRY(startup_32)
 
 	/* Jump from 32bit compatibility mode into 64bit mode. */
 	lret
-ENDPROC(startup_32)
+SYM_FUNC_END(startup_32)
 
 #ifdef CONFIG_EFI_MIXED
 	.org 0x190
-ENTRY(efi32_stub_entry)
+SYM_FUNC_START(efi32_stub_entry)
 	add	$0x4, %esp		/* Discard return address */
 	popl	%ecx
 	popl	%edx
 	popl	%esi
 
-	leal	(BP_scratch+4)(%esi), %esp
 	call	1f
 1:	pop	%ebp
 	subl	$1b, %ebp
 
-	movl	%ecx, efi32_config(%ebp)
-	movl	%edx, efi32_config+8(%ebp)
+	movl	%ecx, efi32_boot_args(%ebp)
+	movl	%edx, efi32_boot_args+4(%ebp)
 	sgdtl	efi32_boot_gdt(%ebp)
+	movb	$0, efi_is64(%ebp)
 
-	leal	efi32_config(%ebp), %eax
-	movl	%eax, efi_config(%ebp)
+	/* Disable paging */
+	movl	%cr0, %eax
+	btrl	$X86_CR0_PG_BIT, %eax
+	movl	%eax, %cr0
 
 	jmp	startup_32
-ENDPROC(efi32_stub_entry)
+SYM_FUNC_END(efi32_stub_entry)
 #endif
 
 	.code64
 	.org 0x200
-ENTRY(startup_64)
+SYM_CODE_START(startup_64)
 	/*
 	 * 64bit entry is 0x200 and it is ABI so immutable!
 	 * We come here either from startup_32 or directly from a
@@ -442,76 +446,24 @@ trampoline_return:
  */
 	leaq	.Lrelocated(%rbx), %rax
 	jmp	*%rax
+SYM_CODE_END(startup_64)
 
 #ifdef CONFIG_EFI_STUB
-
-/* The entry point for the PE/COFF executable is efi_pe_entry. */
-ENTRY(efi_pe_entry)
-	movq	%rcx, efi64_config(%rip)	/* Handle */
-	movq	%rdx, efi64_config+8(%rip) /* EFI System table pointer */
-
-	leaq	efi64_config(%rip), %rax
-	movq	%rax, efi_config(%rip)
-
-	call	1f
-1:	popq	%rbp
-	subq	$1b, %rbp
-
-	/*
-	 * Relocate efi_config->call().
-	 */
-	addq	%rbp, efi64_config+40(%rip)
-
-	movq	%rax, %rdi
-	call	make_boot_params
-	cmpq	$0,%rax
-	je	fail
-	mov	%rax, %rsi
-	leaq	startup_32(%rip), %rax
-	movl	%eax, BP_code32_start(%rsi)
-	jmp	2f		/* Skip the relocation */
-
-handover_entry:
-	call	1f
-1:	popq	%rbp
-	subq	$1b, %rbp
-
-	/*
-	 * Relocate efi_config->call().
-	 */
-	movq	efi_config(%rip), %rax
-	addq	%rbp, 40(%rax)
-2:
-	movq	efi_config(%rip), %rdi
+	.org 0x390
+SYM_FUNC_START(efi64_stub_entry)
+SYM_FUNC_START_ALIAS(efi_stub_entry)
+	and	$~0xf, %rsp			/* realign the stack */
 	call	efi_main
 	movq	%rax,%rsi
-	cmpq	$0,%rax
-	jne	2f
-fail:
-	/* EFI init failed, so hang. */
-	hlt
-	jmp	fail
-2:
 	movl	BP_code32_start(%esi), %eax
 	leaq	startup_64(%rax), %rax
 	jmp	*%rax
-ENDPROC(efi_pe_entry)
-
-	.org 0x390
-ENTRY(efi64_stub_entry)
-	movq	%rdi, efi64_config(%rip)	/* Handle */
-	movq	%rsi, efi64_config+8(%rip) /* EFI System table pointer */
-
-	leaq	efi64_config(%rip), %rax
-	movq	%rax, efi_config(%rip)
-
-	movq	%rdx, %rsi
-	jmp	handover_entry
-ENDPROC(efi64_stub_entry)
+SYM_FUNC_END(efi64_stub_entry)
+SYM_FUNC_END_ALIAS(efi_stub_entry)
 #endif
 
 	.text
-.Lrelocated:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 
 /*
  * Clear BSS (stack is currently empty)
@@ -540,6 +492,7 @@ ENDPROC(efi64_stub_entry)
  * Jump to the decompressed kernel.
  */
 	jmp	*%rax
+SYM_FUNC_END(.Lrelocated)
 
 /*
  * Adjust the global offset table
@@ -570,7 +523,7 @@ ENDPROC(efi64_stub_entry)
  * ECX contains the base address of the trampoline memory.
  * Non zero RDX means trampoline needs to enable 5-level paging.
  */
-ENTRY(trampoline_32bit_src)
+SYM_CODE_START(trampoline_32bit_src)
 	/* Set up data and stack segments */
 	movl	$__KERNEL_DS, %eax
 	movl	%eax, %ds
@@ -633,11 +586,13 @@ ENTRY(trampoline_32bit_src)
 	movl	%eax, %cr0
 
 	lret
+SYM_CODE_END(trampoline_32bit_src)
 
 	.code64
-.Lpaging_enabled:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
 	/* Return from the trampoline */
 	jmp	*%rdi
+SYM_FUNC_END(.Lpaging_enabled)
 
 	/*
          * The trampoline code has a size limit.
@@ -647,20 +602,22 @@ ENTRY(trampoline_32bit_src)
 	.org	trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
 
 	.code32
-.Lno_longmode:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
 	/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
 1:
 	hlt
 	jmp     1b
+SYM_FUNC_END(.Lno_longmode)
 
 #include "../../kernel/verify_cpu.S"
 
 	.data
-gdt64:
+SYM_DATA_START_LOCAL(gdt64)
 	.word	gdt_end - gdt
 	.quad   0
+SYM_DATA_END(gdt64)
 	.balign	8
-gdt:
+SYM_DATA_START_LOCAL(gdt)
 	.word	gdt_end - gdt
 	.long	gdt
 	.word	0
@@ -669,49 +626,33 @@ gdt:
 	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
 	.quad	0x0080890000000000	/* TS descriptor */
 	.quad   0x0000000000000000	/* TS continued */
-gdt_end:
-
-#ifdef CONFIG_EFI_STUB
-efi_config:
-	.quad	0
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
 
 #ifdef CONFIG_EFI_MIXED
-	.global efi32_config
-efi32_config:
-	.fill	5,8,0
-	.quad	efi64_thunk
-	.byte	0
+SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0)
+SYM_DATA(efi_is64, .byte 1)
 #endif
 
-	.global efi64_config
-efi64_config:
-	.fill	5,8,0
-	.quad	efi_call
-	.byte	1
-#endif /* CONFIG_EFI_STUB */
-
 /*
  * Stack and heap for uncompression
  */
 	.bss
 	.balign 4
-boot_heap:
-	.fill BOOT_HEAP_SIZE, 1, 0
-boot_stack:
+SYM_DATA_LOCAL(boot_heap,	.fill BOOT_HEAP_SIZE, 1, 0)
+
+SYM_DATA_START_LOCAL(boot_stack)
 	.fill BOOT_STACK_SIZE, 1, 0
-boot_stack_end:
+SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
 
 /*
  * Space for page tables (not in .bss so not zeroed)
  */
 	.section ".pgtable","a",@nobits
 	.balign 4096
-pgtable:
-	.fill BOOT_PGT_SIZE, 1, 0
+SYM_DATA_LOCAL(pgtable,		.fill BOOT_PGT_SIZE, 1, 0)
 
 /*
  * The page table is going to be used instead of page table in the trampoline
  * memory.
  */
-top_pgtable:
-	.fill PAGE_SIZE, 1, 0
+SYM_DATA_LOCAL(top_pgtable,	.fill PAGE_SIZE, 1, 0)
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 2e53c056ba20..d7408af55738 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -132,8 +132,14 @@ char *skip_spaces(const char *str)
 #include "../../../../lib/ctype.c"
 #include "../../../../lib/cmdline.c"
 
+enum parse_mode {
+	PARSE_MEMMAP,
+	PARSE_EFI,
+};
+
 static int
-parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
+parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
+		enum parse_mode mode)
 {
 	char *oldp;
 
@@ -156,8 +162,29 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
 		*start = memparse(p + 1, &p);
 		return 0;
 	case '@':
-		/* memmap=nn@ss specifies usable region, should be skipped */
-		*size = 0;
+		if (mode == PARSE_MEMMAP) {
+			/*
+			 * memmap=nn@ss specifies usable region, should
+			 * be skipped
+			 */
+			*size = 0;
+		} else {
+			unsigned long long flags;
+
+			/*
+			 * efi_fake_mem=nn@ss:attr the attr specifies
+			 * flags that might imply a soft-reservation.
+			 */
+			*start = memparse(p + 1, &p);
+			if (p && *p == ':') {
+				p++;
+				if (kstrtoull(p, 0, &flags) < 0)
+					*size = 0;
+				else if (flags & EFI_MEMORY_SP)
+					return 0;
+			}
+			*size = 0;
+		}
 		/* Fall through */
 	default:
 		/*
@@ -172,7 +199,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
 	return -EINVAL;
 }
 
-static void mem_avoid_memmap(char *str)
+static void mem_avoid_memmap(enum parse_mode mode, char *str)
 {
 	static int i;
 
@@ -187,7 +214,7 @@ static void mem_avoid_memmap(char *str)
 		if (k)
 			*k++ = 0;
 
-		rc = parse_memmap(str, &start, &size);
+		rc = parse_memmap(str, &start, &size, mode);
 		if (rc < 0)
 			break;
 		str = k;
@@ -238,7 +265,6 @@ static void parse_gb_huge_pages(char *param, char *val)
 	}
 }
 
-
 static void handle_mem_options(void)
 {
 	char *args = (char *)get_cmd_line_ptr();
@@ -271,7 +297,7 @@ static void handle_mem_options(void)
 		}
 
 		if (!strcmp(param, "memmap")) {
-			mem_avoid_memmap(val);
+			mem_avoid_memmap(PARSE_MEMMAP, val);
 		} else if (strstr(param, "hugepages")) {
 			parse_gb_huge_pages(param, val);
 		} else if (!strcmp(param, "mem")) {
@@ -284,6 +310,8 @@ static void handle_mem_options(void)
 				goto out;
 
 			mem_limit = mem_size;
+		} else if (!strcmp(param, "efi_fake_mem")) {
+			mem_avoid_memmap(PARSE_EFI, val);
 		}
 	}
 
@@ -459,6 +487,18 @@ static bool mem_avoid_overlap(struct mem_vector *img,
 			is_overlapping = true;
 		}
 
+		if (ptr->type == SETUP_INDIRECT &&
+		    ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) {
+			avoid.start = ((struct setup_indirect *)ptr->data)->addr;
+			avoid.size = ((struct setup_indirect *)ptr->data)->len;
+
+			if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
+				*overlap = avoid;
+				earliest = overlap->start;
+				is_overlapping = true;
+			}
+		}
+
 		ptr = (struct setup_data *)(unsigned long)ptr->next;
 	}
 
@@ -760,6 +800,10 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
 		if (md->type != EFI_CONVENTIONAL_MEMORY)
 			continue;
 
+		if (efi_soft_reserve_enabled() &&
+		    (md->attribute & EFI_MEMORY_SP))
+			continue;
+
 		if (efi_mirror_found &&
 		    !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
 			continue;
diff --git a/arch/x86/boot/compressed/kernel_info.S b/arch/x86/boot/compressed/kernel_info.S
new file mode 100644
index 000000000000..f818ee8fba38
--- /dev/null
+++ b/arch/x86/boot/compressed/kernel_info.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/bootparam.h>
+
+	.section ".rodata.kernel_info", "a"
+
+	.global kernel_info
+
+kernel_info:
+	/* Header, Linux top (structure). */
+	.ascii	"LToP"
+	/* Size. */
+	.long	kernel_info_var_len_data - kernel_info
+	/* Size total. */
+	.long	kernel_info_end - kernel_info
+
+	/* Maximal allowed type for setup_data and setup_indirect structs. */
+	.long	SETUP_TYPE_MAX
+
+kernel_info_var_len_data:
+	/* Empty for time being... */
+kernel_info_end:
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index 6afb7130a387..dd07e7b41b11 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -15,7 +15,7 @@
 
 	.text
 	.code32
-ENTRY(get_sev_encryption_bit)
+SYM_FUNC_START(get_sev_encryption_bit)
 	xor	%eax, %eax
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
@@ -65,10 +65,10 @@ ENTRY(get_sev_encryption_bit)
 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
 
 	ret
-ENDPROC(get_sev_encryption_bit)
+SYM_FUNC_END(get_sev_encryption_bit)
 
 	.code64
-ENTRY(set_sev_encryption_mask)
+SYM_FUNC_START(set_sev_encryption_mask)
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 	push	%rbp
 	push	%rdx
@@ -90,12 +90,11 @@ ENTRY(set_sev_encryption_mask)
 
 	xor	%rax, %rax
 	ret
-ENDPROC(set_sev_encryption_mask)
+SYM_FUNC_END(set_sev_encryption_mask)
 
 	.data
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 	.balign	8
-GLOBAL(sme_me_mask)
-	.quad	0
+SYM_DATA(sme_me_mask, .quad 0)
 #endif
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
index 4c5f4f4ad035..6afd05e819d2 100644
--- a/arch/x86/boot/copy.S
+++ b/arch/x86/boot/copy.S
@@ -15,7 +15,7 @@
 	.code16
 	.text
 
-GLOBAL(memcpy)
+SYM_FUNC_START_NOALIGN(memcpy)
 	pushw	%si
 	pushw	%di
 	movw	%ax, %di
@@ -29,9 +29,9 @@ GLOBAL(memcpy)
 	popw	%di
 	popw	%si
 	retl
-ENDPROC(memcpy)
+SYM_FUNC_END(memcpy)
 
-GLOBAL(memset)
+SYM_FUNC_START_NOALIGN(memset)
 	pushw	%di
 	movw	%ax, %di
 	movzbl	%dl, %eax
@@ -44,22 +44,22 @@ GLOBAL(memset)
 	rep; stosb
 	popw	%di
 	retl
-ENDPROC(memset)
+SYM_FUNC_END(memset)
 
-GLOBAL(copy_from_fs)
+SYM_FUNC_START_NOALIGN(copy_from_fs)
 	pushw	%ds
 	pushw	%fs
 	popw	%ds
 	calll	memcpy
 	popw	%ds
 	retl
-ENDPROC(copy_from_fs)
+SYM_FUNC_END(copy_from_fs)
 
-GLOBAL(copy_to_fs)
+SYM_FUNC_START_NOALIGN(copy_to_fs)
 	pushw	%es
 	pushw	%fs
 	popw	%es
 	calll	memcpy
 	popw	%es
 	retl
-ENDPROC(copy_to_fs)
+SYM_FUNC_END(copy_to_fs)
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 2c11c0f45d49..97d9b6d6c1af 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -300,7 +300,7 @@ _start:
 	# Part 2 of the header, from the old setup.S
 
 		.ascii	"HdrS"		# header signature
-		.word	0x020d		# header version number (>= 0x0105)
+		.word	0x020f		# header version number (>= 0x0105)
 					# or else old loadlin-1.5 will fail)
 		.globl realmode_swtch
 realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
@@ -567,6 +567,7 @@ pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr
 
 init_size:		.long INIT_SIZE		# kernel initialization size
 handover_offset:	.long 0			# Filled in by build.c
+kernel_info_offset:	.long 0			# Filled in by build.c
 
 # End of setup header #####################################################
 
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 9caa10e82217..da0ccc5de538 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -15,6 +15,7 @@
 #include "../include/asm/required-features.h"
 #include "../include/asm/disabled-features.h"
 #include "../include/asm/cpufeatures.h"
+#include "../include/asm/vmxfeatures.h"
 #include "../kernel/cpu/capflags.c"
 
 int main(void)
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index c22f9a7d1aeb..cbec8bd0841f 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -21,7 +21,7 @@
 /*
  * void protected_mode_jump(u32 entrypoint, u32 bootparams);
  */
-GLOBAL(protected_mode_jump)
+SYM_FUNC_START_NOALIGN(protected_mode_jump)
 	movl	%edx, %esi		# Pointer to boot_params table
 
 	xorl	%ebx, %ebx
@@ -40,13 +40,13 @@ GLOBAL(protected_mode_jump)
 
 	# Transition to 32-bit mode
 	.byte	0x66, 0xea		# ljmpl opcode
-2:	.long	in_pm32			# offset
+2:	.long	.Lin_pm32		# offset
 	.word	__BOOT_CS		# segment
-ENDPROC(protected_mode_jump)
+SYM_FUNC_END(protected_mode_jump)
 
 	.code32
 	.section ".text32","ax"
-GLOBAL(in_pm32)
+SYM_FUNC_START_LOCAL_NOALIGN(.Lin_pm32)
 	# Set up data segments for flat 32-bit mode
 	movl	%ecx, %ds
 	movl	%ecx, %es
@@ -72,4 +72,4 @@ GLOBAL(in_pm32)
 	lldt	%cx
 
 	jmpl	*%eax			# Jump to the 32-bit entrypoint
-ENDPROC(in_pm32)
+SYM_FUNC_END(.Lin_pm32)
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 0149e41d42c2..3da1c37c6dd5 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -51,7 +51,10 @@ SECTIONS
 	. = ALIGN(16);
 	_end = .;
 
-	/DISCARD/ : { *(.note*) }
+	/DISCARD/	: {
+		*(.eh_frame)
+		*(.note*)
+	}
 
 	/*
 	 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index a93d44e58f9c..55e669d29e54 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -56,6 +56,7 @@ u8 buf[SETUP_SECT_MAX*512];
 unsigned long efi32_stub_entry;
 unsigned long efi64_stub_entry;
 unsigned long efi_pe_entry;
+unsigned long kernel_info;
 unsigned long startup_64;
 
 /*----------------------------------------------------------------------*/
@@ -321,6 +322,7 @@ static void parse_zoffset(char *fname)
 		PARSE_ZOFS(p, efi32_stub_entry);
 		PARSE_ZOFS(p, efi64_stub_entry);
 		PARSE_ZOFS(p, efi_pe_entry);
+		PARSE_ZOFS(p, kernel_info);
 		PARSE_ZOFS(p, startup_64);
 
 		p = strchr(p, '\n');
@@ -410,6 +412,9 @@ int main(int argc, char ** argv)
 
 	efi_stub_entry_update();
 
+	/* Update kernel_info offset. */
+	put_unaligned_le32(kernel_info, &buf[0x268]);
+
 	crc = partial_crc32(buf, i, crc);
 	if (fwrite(buf, 1, i, dest) != i)
 		die("Writing setup failed");
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index d0a5ffeae8df..0b9654c7a05c 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -25,7 +25,6 @@ CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_SMP=y
-CONFIG_CALGARY_IOMMU=y
 CONFIG_NR_CPUS=64
 CONFIG_SCHED_SMT=y
 CONFIG_PREEMPT_VOLUNTARY=y
diff --git a/arch/x86/crypto/.gitignore b/arch/x86/crypto/.gitignore
new file mode 100644
index 000000000000..30be0400a439
--- /dev/null
+++ b/arch/x86/crypto/.gitignore
@@ -0,0 +1 @@
+poly1305-x86_64-cryptogams.S
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 759b1a927826..b69e00bf20b8 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
 
 obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
 obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
+obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
 
 # These modules require assembler to support AVX.
 ifeq ($(avx_supported),yes)
@@ -48,6 +49,7 @@ ifeq ($(avx_supported),yes)
 	obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
 	obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
 	obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
+	obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
 endif
 
 # These modules require assembler to support AVX2.
@@ -70,6 +72,11 @@ serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
 aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
 
 nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
+blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
+poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o
+ifneq ($(CONFIG_CRYPTO_POLY1305_X86_64),)
+targets += poly1305-x86_64-cryptogams.S
+endif
 
 ifeq ($(avx_supported),yes)
 	camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
@@ -98,10 +105,8 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
 aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
-poly1305-x86_64-y := poly1305-sse2-x86_64.o poly1305_glue.o
 ifeq ($(avx2_supported),yes)
 sha1-ssse3-y += sha1_avx2_x86_64_asm.o
-poly1305-x86_64-y += poly1305-avx2-x86_64.o
 endif
 ifeq ($(sha1_ni_supported),yes)
 sha1-ssse3-y += sha1_ni_asm.o
@@ -115,3 +120,8 @@ sha256-ssse3-y += sha256_ni_asm.o
 endif
 sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
 crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
+
+quiet_cmd_perlasm = PERLASM $@
+      cmd_perlasm = $(PERL) $< > $@
+$(obj)/%.S: $(src)/%.pl FORCE
+	$(call if_changed,perlasm)
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index 4434607e366d..51d46d93efbc 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -71,7 +71,7 @@
  *   %r8
  *   %r9
  */
-__load_partial:
+SYM_FUNC_START_LOCAL(__load_partial)
 	xor %r9d, %r9d
 	pxor MSG, MSG
 
@@ -123,7 +123,7 @@ __load_partial:
 
 .Lld_partial_8:
 	ret
-ENDPROC(__load_partial)
+SYM_FUNC_END(__load_partial)
 
 /*
  * __store_partial: internal ABI
@@ -137,7 +137,7 @@ ENDPROC(__load_partial)
  *   %r9
  *   %r10
  */
-__store_partial:
+SYM_FUNC_START_LOCAL(__store_partial)
 	mov LEN, %r8
 	mov DST, %r9
 
@@ -181,12 +181,12 @@ __store_partial:
 
 .Lst_partial_1:
 	ret
-ENDPROC(__store_partial)
+SYM_FUNC_END(__store_partial)
 
 /*
  * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
  */
-ENTRY(crypto_aegis128_aesni_init)
+SYM_FUNC_START(crypto_aegis128_aesni_init)
 	FRAME_BEGIN
 
 	/* load IV: */
@@ -226,13 +226,13 @@ ENTRY(crypto_aegis128_aesni_init)
 
 	FRAME_END
 	ret
-ENDPROC(crypto_aegis128_aesni_init)
+SYM_FUNC_END(crypto_aegis128_aesni_init)
 
 /*
  * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
  *                               const void *data);
  */
-ENTRY(crypto_aegis128_aesni_ad)
+SYM_FUNC_START(crypto_aegis128_aesni_ad)
 	FRAME_BEGIN
 
 	cmp $0x10, LEN
@@ -378,7 +378,7 @@ ENTRY(crypto_aegis128_aesni_ad)
 .Lad_out:
 	FRAME_END
 	ret
-ENDPROC(crypto_aegis128_aesni_ad)
+SYM_FUNC_END(crypto_aegis128_aesni_ad)
 
 .macro encrypt_block a s0 s1 s2 s3 s4 i
 	movdq\a (\i * 0x10)(SRC), MSG
@@ -402,7 +402,7 @@ ENDPROC(crypto_aegis128_aesni_ad)
  * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
  *                                const void *src, void *dst);
  */
-ENTRY(crypto_aegis128_aesni_enc)
+SYM_FUNC_START(crypto_aegis128_aesni_enc)
 	FRAME_BEGIN
 
 	cmp $0x10, LEN
@@ -493,13 +493,13 @@ ENTRY(crypto_aegis128_aesni_enc)
 .Lenc_out:
 	FRAME_END
 	ret
-ENDPROC(crypto_aegis128_aesni_enc)
+SYM_FUNC_END(crypto_aegis128_aesni_enc)
 
 /*
  * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
  *                                     const void *src, void *dst);
  */
-ENTRY(crypto_aegis128_aesni_enc_tail)
+SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
 	FRAME_BEGIN
 
 	/* load the state: */
@@ -533,7 +533,7 @@ ENTRY(crypto_aegis128_aesni_enc_tail)
 
 	FRAME_END
 	ret
-ENDPROC(crypto_aegis128_aesni_enc_tail)
+SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
 
 .macro decrypt_block a s0 s1 s2 s3 s4 i
 	movdq\a (\i * 0x10)(SRC), MSG
@@ -556,7 +556,7 @@ ENDPROC(crypto_aegis128_aesni_enc_tail)
  * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
  *                                const void *src, void *dst);
  */
-ENTRY(crypto_aegis128_aesni_dec)
+SYM_FUNC_START(crypto_aegis128_aesni_dec)
 	FRAME_BEGIN
 
 	cmp $0x10, LEN
@@ -647,13 +647,13 @@ ENTRY(crypto_aegis128_aesni_dec)
 .Ldec_out:
 	FRAME_END
 	ret
-ENDPROC(crypto_aegis128_aesni_dec)
+SYM_FUNC_END(crypto_aegis128_aesni_dec)
 
 /*
  * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
  *                                     const void *src, void *dst);
  */
-ENTRY(crypto_aegis128_aesni_dec_tail)
+SYM_FUNC_START(crypto_aegis128_aesni_dec_tail)
 	FRAME_BEGIN
 
 	/* load the state: */
@@ -697,13 +697,13 @@ ENTRY(crypto_aegis128_aesni_dec_tail)
 
 	FRAME_END
 	ret
-ENDPROC(crypto_aegis128_aesni_dec_tail)
+SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
 
 /*
  * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
  *                                  u64 assoclen, u64 cryptlen);
  */
-ENTRY(crypto_aegis128_aesni_final)
+SYM_FUNC_START(crypto_aegis128_aesni_final)
 	FRAME_BEGIN
 
 	/* load the state: */
@@ -744,4 +744,4 @@ ENTRY(crypto_aegis128_aesni_final)
 
 	FRAME_END
 	ret
-ENDPROC(crypto_aegis128_aesni_final)
+SYM_FUNC_END(crypto_aegis128_aesni_final)
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 46d227122643..4623189000d8 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -144,10 +144,8 @@ static int crypto_aegis128_aesni_setkey(struct crypto_aead *aead, const u8 *key,
 {
 	struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(aead);
 
-	if (keylen != AEGIS128_KEY_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != AEGIS128_KEY_SIZE)
 		return -EINVAL;
-	}
 
 	memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE);
 
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index 5f6a5af9c489..ec437db1fa54 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -544,11 +544,11 @@ ddq_add_8:
  * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out,
  *			unsigned int num_bytes)
  */
-ENTRY(aes_ctr_enc_128_avx_by8)
+SYM_FUNC_START(aes_ctr_enc_128_avx_by8)
 	/* call the aes main loop */
 	do_aes_ctrmain KEY_128
 
-ENDPROC(aes_ctr_enc_128_avx_by8)
+SYM_FUNC_END(aes_ctr_enc_128_avx_by8)
 
 /*
  * routine to do AES192 CTR enc/decrypt "by8"
@@ -557,11 +557,11 @@ ENDPROC(aes_ctr_enc_128_avx_by8)
  * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out,
  *			unsigned int num_bytes)
  */
-ENTRY(aes_ctr_enc_192_avx_by8)
+SYM_FUNC_START(aes_ctr_enc_192_avx_by8)
 	/* call the aes main loop */
 	do_aes_ctrmain KEY_192
 
-ENDPROC(aes_ctr_enc_192_avx_by8)
+SYM_FUNC_END(aes_ctr_enc_192_avx_by8)
 
 /*
  * routine to do AES256 CTR enc/decrypt "by8"
@@ -570,8 +570,8 @@ ENDPROC(aes_ctr_enc_192_avx_by8)
  * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out,
  *			unsigned int num_bytes)
  */
-ENTRY(aes_ctr_enc_256_avx_by8)
+SYM_FUNC_START(aes_ctr_enc_256_avx_by8)
 	/* call the aes main loop */
 	do_aes_ctrmain KEY_256
 
-ENDPROC(aes_ctr_enc_256_avx_by8)
+SYM_FUNC_END(aes_ctr_enc_256_avx_by8)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index e40bdf024ba7..cad6e1bfa7d5 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1592,7 +1592,7 @@ _esb_loop_\@:
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 *
 *****************************************************************************/
-ENTRY(aesni_gcm_dec)
+SYM_FUNC_START(aesni_gcm_dec)
 	FUNC_SAVE
 
 	GCM_INIT %arg6, arg7, arg8, arg9
@@ -1600,7 +1600,7 @@ ENTRY(aesni_gcm_dec)
 	GCM_COMPLETE arg10, arg11
 	FUNC_RESTORE
 	ret
-ENDPROC(aesni_gcm_dec)
+SYM_FUNC_END(aesni_gcm_dec)
 
 
 /*****************************************************************************
@@ -1680,7 +1680,7 @@ ENDPROC(aesni_gcm_dec)
 *
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 ***************************************************************************/
-ENTRY(aesni_gcm_enc)
+SYM_FUNC_START(aesni_gcm_enc)
 	FUNC_SAVE
 
 	GCM_INIT %arg6, arg7, arg8, arg9
@@ -1689,7 +1689,7 @@ ENTRY(aesni_gcm_enc)
 	GCM_COMPLETE arg10, arg11
 	FUNC_RESTORE
 	ret
-ENDPROC(aesni_gcm_enc)
+SYM_FUNC_END(aesni_gcm_enc)
 
 /*****************************************************************************
 * void aesni_gcm_init(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
@@ -1702,12 +1702,12 @@ ENDPROC(aesni_gcm_enc)
 *                     const u8 *aad,      // Additional Authentication Data (AAD)
 *                     u64 aad_len)        // Length of AAD in bytes.
 */
-ENTRY(aesni_gcm_init)
+SYM_FUNC_START(aesni_gcm_init)
 	FUNC_SAVE
 	GCM_INIT %arg3, %arg4,%arg5, %arg6
 	FUNC_RESTORE
 	ret
-ENDPROC(aesni_gcm_init)
+SYM_FUNC_END(aesni_gcm_init)
 
 /*****************************************************************************
 * void aesni_gcm_enc_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
@@ -1717,12 +1717,12 @@ ENDPROC(aesni_gcm_init)
 *                    const u8 *in,       // Plaintext input
 *                    u64 plaintext_len,  // Length of data in bytes for encryption.
 */
-ENTRY(aesni_gcm_enc_update)
+SYM_FUNC_START(aesni_gcm_enc_update)
 	FUNC_SAVE
 	GCM_ENC_DEC enc
 	FUNC_RESTORE
 	ret
-ENDPROC(aesni_gcm_enc_update)
+SYM_FUNC_END(aesni_gcm_enc_update)
 
 /*****************************************************************************
 * void aesni_gcm_dec_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
@@ -1732,12 +1732,12 @@ ENDPROC(aesni_gcm_enc_update)
 *                    const u8 *in,       // Plaintext input
 *                    u64 plaintext_len,  // Length of data in bytes for encryption.
 */
-ENTRY(aesni_gcm_dec_update)
+SYM_FUNC_START(aesni_gcm_dec_update)
 	FUNC_SAVE
 	GCM_ENC_DEC dec
 	FUNC_RESTORE
 	ret
-ENDPROC(aesni_gcm_dec_update)
+SYM_FUNC_END(aesni_gcm_dec_update)
 
 /*****************************************************************************
 * void aesni_gcm_finalize(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
@@ -1747,19 +1747,18 @@ ENDPROC(aesni_gcm_dec_update)
 *                    u64 auth_tag_len);  // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
 *                                        // 12 or 8.
 */
-ENTRY(aesni_gcm_finalize)
+SYM_FUNC_START(aesni_gcm_finalize)
 	FUNC_SAVE
 	GCM_COMPLETE %arg3 %arg4
 	FUNC_RESTORE
 	ret
-ENDPROC(aesni_gcm_finalize)
+SYM_FUNC_END(aesni_gcm_finalize)
 
 #endif
 
 
-.align 4
-_key_expansion_128:
-_key_expansion_256a:
+SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128)
+SYM_FUNC_START_LOCAL(_key_expansion_256a)
 	pshufd $0b11111111, %xmm1, %xmm1
 	shufps $0b00010000, %xmm0, %xmm4
 	pxor %xmm4, %xmm0
@@ -1769,11 +1768,10 @@ _key_expansion_256a:
 	movaps %xmm0, (TKEYP)
 	add $0x10, TKEYP
 	ret
-ENDPROC(_key_expansion_128)
-ENDPROC(_key_expansion_256a)
+SYM_FUNC_END(_key_expansion_256a)
+SYM_FUNC_END_ALIAS(_key_expansion_128)
 
-.align 4
-_key_expansion_192a:
+SYM_FUNC_START_LOCAL(_key_expansion_192a)
 	pshufd $0b01010101, %xmm1, %xmm1
 	shufps $0b00010000, %xmm0, %xmm4
 	pxor %xmm4, %xmm0
@@ -1795,10 +1793,9 @@ _key_expansion_192a:
 	movaps %xmm1, 0x10(TKEYP)
 	add $0x20, TKEYP
 	ret
-ENDPROC(_key_expansion_192a)
+SYM_FUNC_END(_key_expansion_192a)
 
-.align 4
-_key_expansion_192b:
+SYM_FUNC_START_LOCAL(_key_expansion_192b)
 	pshufd $0b01010101, %xmm1, %xmm1
 	shufps $0b00010000, %xmm0, %xmm4
 	pxor %xmm4, %xmm0
@@ -1815,10 +1812,9 @@ _key_expansion_192b:
 	movaps %xmm0, (TKEYP)
 	add $0x10, TKEYP
 	ret
-ENDPROC(_key_expansion_192b)
+SYM_FUNC_END(_key_expansion_192b)
 
-.align 4
-_key_expansion_256b:
+SYM_FUNC_START_LOCAL(_key_expansion_256b)
 	pshufd $0b10101010, %xmm1, %xmm1
 	shufps $0b00010000, %xmm2, %xmm4
 	pxor %xmm4, %xmm2
@@ -1828,13 +1824,13 @@ _key_expansion_256b:
 	movaps %xmm2, (TKEYP)
 	add $0x10, TKEYP
 	ret
-ENDPROC(_key_expansion_256b)
+SYM_FUNC_END(_key_expansion_256b)
 
 /*
  * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
  *                   unsigned int key_len)
  */
-ENTRY(aesni_set_key)
+SYM_FUNC_START(aesni_set_key)
 	FRAME_BEGIN
 #ifndef __x86_64__
 	pushl KEYP
@@ -1943,12 +1939,12 @@ ENTRY(aesni_set_key)
 #endif
 	FRAME_END
 	ret
-ENDPROC(aesni_set_key)
+SYM_FUNC_END(aesni_set_key)
 
 /*
- * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+ * void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
  */
-ENTRY(aesni_enc)
+SYM_FUNC_START(aesni_enc)
 	FRAME_BEGIN
 #ifndef __x86_64__
 	pushl KEYP
@@ -1967,7 +1963,7 @@ ENTRY(aesni_enc)
 #endif
 	FRAME_END
 	ret
-ENDPROC(aesni_enc)
+SYM_FUNC_END(aesni_enc)
 
 /*
  * _aesni_enc1:		internal ABI
@@ -1981,8 +1977,7 @@ ENDPROC(aesni_enc)
  *	KEY
  *	TKEYP (T1)
  */
-.align 4
-_aesni_enc1:
+SYM_FUNC_START_LOCAL(_aesni_enc1)
 	movaps (KEYP), KEY		# key
 	mov KEYP, TKEYP
 	pxor KEY, STATE		# round 0
@@ -2025,7 +2020,7 @@ _aesni_enc1:
 	movaps 0x70(TKEYP), KEY
 	AESENCLAST KEY STATE
 	ret
-ENDPROC(_aesni_enc1)
+SYM_FUNC_END(_aesni_enc1)
 
 /*
  * _aesni_enc4:	internal ABI
@@ -2045,8 +2040,7 @@ ENDPROC(_aesni_enc1)
  *	KEY
  *	TKEYP (T1)
  */
-.align 4
-_aesni_enc4:
+SYM_FUNC_START_LOCAL(_aesni_enc4)
 	movaps (KEYP), KEY		# key
 	mov KEYP, TKEYP
 	pxor KEY, STATE1		# round 0
@@ -2134,12 +2128,12 @@ _aesni_enc4:
 	AESENCLAST KEY STATE3
 	AESENCLAST KEY STATE4
 	ret
-ENDPROC(_aesni_enc4)
+SYM_FUNC_END(_aesni_enc4)
 
 /*
- * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+ * void aesni_dec (const void *ctx, u8 *dst, const u8 *src)
  */
-ENTRY(aesni_dec)
+SYM_FUNC_START(aesni_dec)
 	FRAME_BEGIN
 #ifndef __x86_64__
 	pushl KEYP
@@ -2159,7 +2153,7 @@ ENTRY(aesni_dec)
 #endif
 	FRAME_END
 	ret
-ENDPROC(aesni_dec)
+SYM_FUNC_END(aesni_dec)
 
 /*
  * _aesni_dec1:		internal ABI
@@ -2173,8 +2167,7 @@ ENDPROC(aesni_dec)
  *	KEY
  *	TKEYP (T1)
  */
-.align 4
-_aesni_dec1:
+SYM_FUNC_START_LOCAL(_aesni_dec1)
 	movaps (KEYP), KEY		# key
 	mov KEYP, TKEYP
 	pxor KEY, STATE		# round 0
@@ -2217,7 +2210,7 @@ _aesni_dec1:
 	movaps 0x70(TKEYP), KEY
 	AESDECLAST KEY STATE
 	ret
-ENDPROC(_aesni_dec1)
+SYM_FUNC_END(_aesni_dec1)
 
 /*
  * _aesni_dec4:	internal ABI
@@ -2237,8 +2230,7 @@ ENDPROC(_aesni_dec1)
  *	KEY
  *	TKEYP (T1)
  */
-.align 4
-_aesni_dec4:
+SYM_FUNC_START_LOCAL(_aesni_dec4)
 	movaps (KEYP), KEY		# key
 	mov KEYP, TKEYP
 	pxor KEY, STATE1		# round 0
@@ -2326,13 +2318,13 @@ _aesni_dec4:
 	AESDECLAST KEY STATE3
 	AESDECLAST KEY STATE4
 	ret
-ENDPROC(_aesni_dec4)
+SYM_FUNC_END(_aesni_dec4)
 
 /*
  * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  *		      size_t len)
  */
-ENTRY(aesni_ecb_enc)
+SYM_FUNC_START(aesni_ecb_enc)
 	FRAME_BEGIN
 #ifndef __x86_64__
 	pushl LEN
@@ -2386,13 +2378,13 @@ ENTRY(aesni_ecb_enc)
 #endif
 	FRAME_END
 	ret
-ENDPROC(aesni_ecb_enc)
+SYM_FUNC_END(aesni_ecb_enc)
 
 /*
  * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  *		      size_t len);
  */
-ENTRY(aesni_ecb_dec)
+SYM_FUNC_START(aesni_ecb_dec)
 	FRAME_BEGIN
 #ifndef __x86_64__
 	pushl LEN
@@ -2447,13 +2439,13 @@ ENTRY(aesni_ecb_dec)
 #endif
 	FRAME_END
 	ret
-ENDPROC(aesni_ecb_dec)
+SYM_FUNC_END(aesni_ecb_dec)
 
 /*
  * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  *		      size_t len, u8 *iv)
  */
-ENTRY(aesni_cbc_enc)
+SYM_FUNC_START(aesni_cbc_enc)
 	FRAME_BEGIN
 #ifndef __x86_64__
 	pushl IVP
@@ -2491,13 +2483,13 @@ ENTRY(aesni_cbc_enc)
 #endif
 	FRAME_END
 	ret
-ENDPROC(aesni_cbc_enc)
+SYM_FUNC_END(aesni_cbc_enc)
 
 /*
  * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  *		      size_t len, u8 *iv)
  */
-ENTRY(aesni_cbc_dec)
+SYM_FUNC_START(aesni_cbc_dec)
 	FRAME_BEGIN
 #ifndef __x86_64__
 	pushl IVP
@@ -2584,7 +2576,7 @@ ENTRY(aesni_cbc_dec)
 #endif
 	FRAME_END
 	ret
-ENDPROC(aesni_cbc_dec)
+SYM_FUNC_END(aesni_cbc_dec)
 
 #ifdef __x86_64__
 .pushsection .rodata
@@ -2604,8 +2596,7 @@ ENDPROC(aesni_cbc_dec)
  *	INC:	== 1, in little endian
  *	BSWAP_MASK == endian swapping mask
  */
-.align 4
-_aesni_inc_init:
+SYM_FUNC_START_LOCAL(_aesni_inc_init)
 	movaps .Lbswap_mask, BSWAP_MASK
 	movaps IV, CTR
 	PSHUFB_XMM BSWAP_MASK CTR
@@ -2613,7 +2604,7 @@ _aesni_inc_init:
 	MOVQ_R64_XMM TCTR_LOW INC
 	MOVQ_R64_XMM CTR TCTR_LOW
 	ret
-ENDPROC(_aesni_inc_init)
+SYM_FUNC_END(_aesni_inc_init)
 
 /*
  * _aesni_inc:		internal ABI
@@ -2630,8 +2621,7 @@ ENDPROC(_aesni_inc_init)
  *	CTR:	== output IV, in little endian
  *	TCTR_LOW: == lower qword of CTR
  */
-.align 4
-_aesni_inc:
+SYM_FUNC_START_LOCAL(_aesni_inc)
 	paddq INC, CTR
 	add $1, TCTR_LOW
 	jnc .Linc_low
@@ -2642,13 +2632,13 @@ _aesni_inc:
 	movaps CTR, IV
 	PSHUFB_XMM BSWAP_MASK IV
 	ret
-ENDPROC(_aesni_inc)
+SYM_FUNC_END(_aesni_inc)
 
 /*
  * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  *		      size_t len, u8 *iv)
  */
-ENTRY(aesni_ctr_enc)
+SYM_FUNC_START(aesni_ctr_enc)
 	FRAME_BEGIN
 	cmp $16, LEN
 	jb .Lctr_enc_just_ret
@@ -2705,7 +2695,7 @@ ENTRY(aesni_ctr_enc)
 .Lctr_enc_just_ret:
 	FRAME_END
 	ret
-ENDPROC(aesni_ctr_enc)
+SYM_FUNC_END(aesni_ctr_enc)
 
 /*
  * _aesni_gf128mul_x_ble:		internal ABI
@@ -2726,10 +2716,10 @@ ENDPROC(aesni_ctr_enc)
 	pxor CTR, IV;
 
 /*
- * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *			 bool enc, u8 *iv)
+ * void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *dst,
+ *			 const u8 *src, bool enc, le128 *iv)
  */
-ENTRY(aesni_xts_crypt8)
+SYM_FUNC_START(aesni_xts_crypt8)
 	FRAME_BEGIN
 	cmpb $0, %cl
 	movl $0, %ecx
@@ -2833,6 +2823,6 @@ ENTRY(aesni_xts_crypt8)
 
 	FRAME_END
 	ret
-ENDPROC(aesni_xts_crypt8)
+SYM_FUNC_END(aesni_xts_crypt8)
 
 #endif
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 91c039ab5699..bfa1c0b3e5b4 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -1775,12 +1775,12 @@ _initial_blocks_done\@:
 #        const   u8 *aad, /* Additional Authentication Data (AAD)*/
 #        u64     aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
 #############################################################
-ENTRY(aesni_gcm_init_avx_gen2)
+SYM_FUNC_START(aesni_gcm_init_avx_gen2)
         FUNC_SAVE
         INIT GHASH_MUL_AVX, PRECOMPUTE_AVX
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_init_avx_gen2)
+SYM_FUNC_END(aesni_gcm_init_avx_gen2)
 
 ###############################################################################
 #void   aesni_gcm_enc_update_avx_gen2(
@@ -1790,7 +1790,7 @@ ENDPROC(aesni_gcm_init_avx_gen2)
 #        const   u8 *in, /* Plaintext input */
 #        u64     plaintext_len) /* Length of data in Bytes for encryption. */
 ###############################################################################
-ENTRY(aesni_gcm_enc_update_avx_gen2)
+SYM_FUNC_START(aesni_gcm_enc_update_avx_gen2)
         FUNC_SAVE
         mov     keysize, %eax
         cmp     $32, %eax
@@ -1809,7 +1809,7 @@ key_256_enc_update:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_enc_update_avx_gen2)
+SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2)
 
 ###############################################################################
 #void   aesni_gcm_dec_update_avx_gen2(
@@ -1819,7 +1819,7 @@ ENDPROC(aesni_gcm_enc_update_avx_gen2)
 #        const   u8 *in, /* Ciphertext input */
 #        u64     plaintext_len) /* Length of data in Bytes for encryption. */
 ###############################################################################
-ENTRY(aesni_gcm_dec_update_avx_gen2)
+SYM_FUNC_START(aesni_gcm_dec_update_avx_gen2)
         FUNC_SAVE
         mov     keysize,%eax
         cmp     $32, %eax
@@ -1838,7 +1838,7 @@ key_256_dec_update:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_dec_update_avx_gen2)
+SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2)
 
 ###############################################################################
 #void   aesni_gcm_finalize_avx_gen2(
@@ -1848,7 +1848,7 @@ ENDPROC(aesni_gcm_dec_update_avx_gen2)
 #        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
 #				Valid values are 16 (most likely), 12 or 8. */
 ###############################################################################
-ENTRY(aesni_gcm_finalize_avx_gen2)
+SYM_FUNC_START(aesni_gcm_finalize_avx_gen2)
         FUNC_SAVE
         mov	keysize,%eax
         cmp     $32, %eax
@@ -1867,7 +1867,7 @@ key_256_finalize:
         GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_finalize_avx_gen2)
+SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)
 
 #endif /* CONFIG_AS_AVX */
 
@@ -2746,12 +2746,12 @@ _initial_blocks_done\@:
 #        const   u8 *aad, /* Additional Authentication Data (AAD)*/
 #        u64     aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
 #############################################################
-ENTRY(aesni_gcm_init_avx_gen4)
+SYM_FUNC_START(aesni_gcm_init_avx_gen4)
         FUNC_SAVE
         INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_init_avx_gen4)
+SYM_FUNC_END(aesni_gcm_init_avx_gen4)
 
 ###############################################################################
 #void   aesni_gcm_enc_avx_gen4(
@@ -2761,7 +2761,7 @@ ENDPROC(aesni_gcm_init_avx_gen4)
 #        const   u8 *in, /* Plaintext input */
 #        u64     plaintext_len) /* Length of data in Bytes for encryption. */
 ###############################################################################
-ENTRY(aesni_gcm_enc_update_avx_gen4)
+SYM_FUNC_START(aesni_gcm_enc_update_avx_gen4)
         FUNC_SAVE
         mov     keysize,%eax
         cmp     $32, %eax
@@ -2780,7 +2780,7 @@ key_256_enc_update4:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13
         FUNC_RESTORE
 	ret
-ENDPROC(aesni_gcm_enc_update_avx_gen4)
+SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4)
 
 ###############################################################################
 #void   aesni_gcm_dec_update_avx_gen4(
@@ -2790,7 +2790,7 @@ ENDPROC(aesni_gcm_enc_update_avx_gen4)
 #        const   u8 *in, /* Ciphertext input */
 #        u64     plaintext_len) /* Length of data in Bytes for encryption. */
 ###############################################################################
-ENTRY(aesni_gcm_dec_update_avx_gen4)
+SYM_FUNC_START(aesni_gcm_dec_update_avx_gen4)
         FUNC_SAVE
         mov     keysize,%eax
         cmp     $32, %eax
@@ -2809,7 +2809,7 @@ key_256_dec_update4:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_dec_update_avx_gen4)
+SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4)
 
 ###############################################################################
 #void   aesni_gcm_finalize_avx_gen4(
@@ -2819,7 +2819,7 @@ ENDPROC(aesni_gcm_dec_update_avx_gen4)
 #        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
 #                              Valid values are 16 (most likely), 12 or 8. */
 ###############################################################################
-ENTRY(aesni_gcm_finalize_avx_gen4)
+SYM_FUNC_START(aesni_gcm_finalize_avx_gen4)
         FUNC_SAVE
         mov	keysize,%eax
         cmp     $32, %eax
@@ -2838,6 +2838,6 @@ key_256_finalize4:
         GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_finalize_avx_gen4)
+SYM_FUNC_END(aesni_gcm_finalize_avx_gen4)
 
 #endif /* CONFIG_AS_AVX2 */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 3e707e81afdb..bbbebbd35b5d 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -83,10 +83,8 @@ struct gcm_context_data {
 
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
 			     unsigned int key_len);
-asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
-			  const u8 *in);
-asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out,
-			  const u8 *in);
+asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in);
+asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in);
 asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len);
 asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
@@ -106,8 +104,8 @@ static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
 
-asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
-				 const u8 *in, bool enc, u8 *iv);
+asmlinkage void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *out,
+				 const u8 *in, bool enc, le128 *iv);
 
 /* asmlinkage void aesni_gcm_enc()
  * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
@@ -318,14 +316,11 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
 			      const u8 *in_key, unsigned int key_len)
 {
 	struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx);
-	u32 *flags = &tfm->crt_flags;
 	int err;
 
 	if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
-	    key_len != AES_KEYSIZE_256) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	    key_len != AES_KEYSIZE_256)
 		return -EINVAL;
-	}
 
 	if (!crypto_simd_usable())
 		err = aes_expandkey(ctx, in_key, key_len);
@@ -550,29 +545,24 @@ static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
 }
 
 
-static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
-{
-	aesni_enc(ctx, out, in);
-}
-
-static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void aesni_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc));
+	glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_enc);
 }
 
-static void aesni_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_dec));
+	glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec);
 }
 
-static void aesni_xts_enc8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void aesni_xts_enc8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, true, (u8 *)iv);
+	aesni_xts_crypt8(ctx, dst, src, true, iv);
 }
 
-static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void aesni_xts_dec8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, false, (u8 *)iv);
+	aesni_xts_crypt8(ctx, dst, src, false, iv);
 }
 
 static const struct common_glue_ctx aesni_enc_xts = {
@@ -581,10 +571,10 @@ static const struct common_glue_ctx aesni_enc_xts = {
 
 	.funcs = { {
 		.num_blocks = 8,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc8) }
+		.fn_u = { .xts = aesni_xts_enc8 }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc) }
+		.fn_u = { .xts = aesni_xts_enc }
 	} }
 };
 
@@ -594,10 +584,10 @@ static const struct common_glue_ctx aesni_dec_xts = {
 
 	.funcs = { {
 		.num_blocks = 8,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec8) }
+		.fn_u = { .xts = aesni_xts_dec8 }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec) }
+		.fn_u = { .xts = aesni_xts_dec }
 	} }
 };
 
@@ -606,8 +596,7 @@ static int xts_encrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_req_128bit(&aesni_enc_xts, req,
-				   XTS_TWEAK_CAST(aesni_xts_tweak),
+	return glue_xts_req_128bit(&aesni_enc_xts, req, aesni_enc,
 				   aes_ctx(ctx->raw_tweak_ctx),
 				   aes_ctx(ctx->raw_crypt_ctx),
 				   false);
@@ -618,8 +607,7 @@ static int xts_decrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_req_128bit(&aesni_dec_xts, req,
-				   XTS_TWEAK_CAST(aesni_xts_tweak),
+	return glue_xts_req_128bit(&aesni_dec_xts, req, aesni_enc,
 				   aes_ctx(ctx->raw_tweak_ctx),
 				   aes_ctx(ctx->raw_crypt_ctx),
 				   true);
@@ -650,10 +638,9 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
 {
 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead);
 
-	if (key_len < 4) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (key_len < 4)
 		return -EINVAL;
-	}
+
 	/*Account for 4 byte nonce at the end.*/
 	key_len -= 4;
 
diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S
new file mode 100644
index 000000000000..24910b766bdd
--- /dev/null
+++ b/arch/x86/crypto/blake2s-core.S
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+
+.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
+.align 32
+IV:	.octa 0xA54FF53A3C6EF372BB67AE856A09E667
+	.octa 0x5BE0CD191F83D9AB9B05688C510E527F
+.section .rodata.cst16.ROT16, "aM", @progbits, 16
+.align 16
+ROT16:	.octa 0x0D0C0F0E09080B0A0504070601000302
+.section .rodata.cst16.ROR328, "aM", @progbits, 16
+.align 16
+ROR328:	.octa 0x0C0F0E0D080B0A090407060500030201
+.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
+.align 64
+SIGMA:
+.byte  0,  2,  4,  6,  1,  3,  5,  7, 14,  8, 10, 12, 15,  9, 11, 13
+.byte 14,  4,  9, 13, 10,  8, 15,  6,  5,  1,  0, 11,  3, 12,  2,  7
+.byte 11, 12,  5, 15,  8,  0,  2, 13,  9, 10,  3,  7,  4, 14,  6,  1
+.byte  7,  3, 13, 11,  9,  1, 12, 14, 15,  2,  5,  4,  8,  6, 10,  0
+.byte  9,  5,  2, 10,  0,  7,  4, 15,  3, 14, 11,  6, 13,  1, 12,  8
+.byte  2,  6,  0,  8, 12, 10, 11,  3,  1,  4,  7, 15,  9, 13,  5, 14
+.byte 12,  1, 14,  4,  5, 15, 13, 10,  8,  0,  6,  9, 11,  7,  3,  2
+.byte 13,  7, 12,  3, 11, 14,  1,  9,  2,  5, 15,  8, 10,  0,  4,  6
+.byte  6, 14, 11,  0, 15,  9,  3,  8, 10, 12, 13,  1,  5,  2,  7,  4
+.byte 10,  8,  7,  1,  2,  4,  6,  5, 13, 15,  9,  3,  0, 11, 14, 12
+#ifdef CONFIG_AS_AVX512
+.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
+.align 64
+SIGMA2:
+.long  0,  2,  4,  6,  1,  3,  5,  7, 14,  8, 10, 12, 15,  9, 11, 13
+.long  8,  2, 13, 15, 10,  9, 12,  3,  6,  4,  0, 14,  5, 11,  1,  7
+.long 11, 13,  8,  6,  5, 10, 14,  3,  2,  4, 12, 15,  1,  0,  7,  9
+.long 11, 10,  7,  0,  8, 15,  1, 13,  3,  6,  2, 12,  4, 14,  9,  5
+.long  4, 10,  9, 14, 15,  0, 11,  8,  1,  7,  3, 13,  2,  5,  6, 12
+.long  2, 11,  4, 15, 14,  3, 10,  8, 13,  6,  5,  7,  0, 12,  1,  9
+.long  4,  8, 15,  9, 14, 11, 13,  5,  3,  2,  1, 12,  6, 10,  7,  0
+.long  6, 13,  0, 14, 12,  2,  1, 11, 15,  4,  5,  8,  7,  9,  3, 10
+.long 15,  5,  4, 13, 10,  7,  3, 11, 12,  2,  0,  6,  9,  8,  1, 14
+.long  8,  7, 14, 11, 13, 15,  0, 12, 10,  4,  5,  6,  3,  2,  1,  9
+#endif /* CONFIG_AS_AVX512 */
+
+.text
+#ifdef CONFIG_AS_SSSE3
+SYM_FUNC_START(blake2s_compress_ssse3)
+	testq		%rdx,%rdx
+	je		.Lendofloop
+	movdqu		(%rdi),%xmm0
+	movdqu		0x10(%rdi),%xmm1
+	movdqa		ROT16(%rip),%xmm12
+	movdqa		ROR328(%rip),%xmm13
+	movdqu		0x20(%rdi),%xmm14
+	movq		%rcx,%xmm15
+	leaq		SIGMA+0xa0(%rip),%r8
+	jmp		.Lbeginofloop
+	.align		32
+.Lbeginofloop:
+	movdqa		%xmm0,%xmm10
+	movdqa		%xmm1,%xmm11
+	paddq		%xmm15,%xmm14
+	movdqa		IV(%rip),%xmm2
+	movdqa		%xmm14,%xmm3
+	pxor		IV+0x10(%rip),%xmm3
+	leaq		SIGMA(%rip),%rcx
+.Lroundloop:
+	movzbl		(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm4
+	movzbl		0x1(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm5
+	movzbl		0x2(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm6
+	movzbl		0x3(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm7
+	punpckldq	%xmm5,%xmm4
+	punpckldq	%xmm7,%xmm6
+	punpcklqdq	%xmm6,%xmm4
+	paddd		%xmm4,%xmm0
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm12,%xmm3
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm8
+	psrld		$0xc,%xmm1
+	pslld		$0x14,%xmm8
+	por		%xmm8,%xmm1
+	movzbl		0x4(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm5
+	movzbl		0x5(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm6
+	movzbl		0x6(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm7
+	movzbl		0x7(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm4
+	punpckldq	%xmm6,%xmm5
+	punpckldq	%xmm4,%xmm7
+	punpcklqdq	%xmm7,%xmm5
+	paddd		%xmm5,%xmm0
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm13,%xmm3
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm8
+	psrld		$0x7,%xmm1
+	pslld		$0x19,%xmm8
+	por		%xmm8,%xmm1
+	pshufd		$0x93,%xmm0,%xmm0
+	pshufd		$0x4e,%xmm3,%xmm3
+	pshufd		$0x39,%xmm2,%xmm2
+	movzbl		0x8(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm6
+	movzbl		0x9(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm7
+	movzbl		0xa(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm4
+	movzbl		0xb(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm5
+	punpckldq	%xmm7,%xmm6
+	punpckldq	%xmm5,%xmm4
+	punpcklqdq	%xmm4,%xmm6
+	paddd		%xmm6,%xmm0
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm12,%xmm3
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm8
+	psrld		$0xc,%xmm1
+	pslld		$0x14,%xmm8
+	por		%xmm8,%xmm1
+	movzbl		0xc(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm7
+	movzbl		0xd(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm4
+	movzbl		0xe(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm5
+	movzbl		0xf(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm6
+	punpckldq	%xmm4,%xmm7
+	punpckldq	%xmm6,%xmm5
+	punpcklqdq	%xmm5,%xmm7
+	paddd		%xmm7,%xmm0
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm13,%xmm3
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm8
+	psrld		$0x7,%xmm1
+	pslld		$0x19,%xmm8
+	por		%xmm8,%xmm1
+	pshufd		$0x39,%xmm0,%xmm0
+	pshufd		$0x4e,%xmm3,%xmm3
+	pshufd		$0x93,%xmm2,%xmm2
+	addq		$0x10,%rcx
+	cmpq		%r8,%rcx
+	jnz		.Lroundloop
+	pxor		%xmm2,%xmm0
+	pxor		%xmm3,%xmm1
+	pxor		%xmm10,%xmm0
+	pxor		%xmm11,%xmm1
+	addq		$0x40,%rsi
+	decq		%rdx
+	jnz		.Lbeginofloop
+	movdqu		%xmm0,(%rdi)
+	movdqu		%xmm1,0x10(%rdi)
+	movdqu		%xmm14,0x20(%rdi)
+.Lendofloop:
+	ret
+SYM_FUNC_END(blake2s_compress_ssse3)
+#endif /* CONFIG_AS_SSSE3 */
+
+#ifdef CONFIG_AS_AVX512
+SYM_FUNC_START(blake2s_compress_avx512)
+	vmovdqu		(%rdi),%xmm0
+	vmovdqu		0x10(%rdi),%xmm1
+	vmovdqu		0x20(%rdi),%xmm4
+	vmovq		%rcx,%xmm5
+	vmovdqa		IV(%rip),%xmm14
+	vmovdqa		IV+16(%rip),%xmm15
+	jmp		.Lblake2s_compress_avx512_mainloop
+.align 32
+.Lblake2s_compress_avx512_mainloop:
+	vmovdqa		%xmm0,%xmm10
+	vmovdqa		%xmm1,%xmm11
+	vpaddq		%xmm5,%xmm4,%xmm4
+	vmovdqa		%xmm14,%xmm2
+	vpxor		%xmm15,%xmm4,%xmm3
+	vmovdqu		(%rsi),%ymm6
+	vmovdqu		0x20(%rsi),%ymm7
+	addq		$0x40,%rsi
+	leaq		SIGMA2(%rip),%rax
+	movb		$0xa,%cl
+.Lblake2s_compress_avx512_roundloop:
+	addq		$0x40,%rax
+	vmovdqa		-0x40(%rax),%ymm8
+	vmovdqa		-0x20(%rax),%ymm9
+	vpermi2d	%ymm7,%ymm6,%ymm8
+	vpermi2d	%ymm7,%ymm6,%ymm9
+	vmovdqa		%ymm8,%ymm6
+	vmovdqa		%ymm9,%ymm7
+	vpaddd		%xmm8,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x10,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0xc,%xmm1,%xmm1
+	vextracti128	$0x1,%ymm8,%xmm8
+	vpaddd		%xmm8,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x8,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0x7,%xmm1,%xmm1
+	vpshufd		$0x93,%xmm0,%xmm0
+	vpshufd		$0x4e,%xmm3,%xmm3
+	vpshufd		$0x39,%xmm2,%xmm2
+	vpaddd		%xmm9,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x10,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0xc,%xmm1,%xmm1
+	vextracti128	$0x1,%ymm9,%xmm9
+	vpaddd		%xmm9,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x8,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0x7,%xmm1,%xmm1
+	vpshufd		$0x39,%xmm0,%xmm0
+	vpshufd		$0x4e,%xmm3,%xmm3
+	vpshufd		$0x93,%xmm2,%xmm2
+	decb		%cl
+	jne		.Lblake2s_compress_avx512_roundloop
+	vpxor		%xmm10,%xmm0,%xmm0
+	vpxor		%xmm11,%xmm1,%xmm1
+	vpxor		%xmm2,%xmm0,%xmm0
+	vpxor		%xmm3,%xmm1,%xmm1
+	decq		%rdx
+	jne		.Lblake2s_compress_avx512_mainloop
+	vmovdqu		%xmm0,(%rdi)
+	vmovdqu		%xmm1,0x10(%rdi)
+	vmovdqu		%xmm4,0x20(%rdi)
+	vzeroupper
+	retq
+SYM_FUNC_END(blake2s_compress_avx512)
+#endif /* CONFIG_AS_AVX512 */
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
new file mode 100644
index 000000000000..06ef2d4a4701
--- /dev/null
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/internal/blake2s.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/hash.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cpufeature.h>
+#include <asm/fpu/api.h>
+#include <asm/processor.h>
+#include <asm/simd.h>
+
+asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
+				       const u8 *block, const size_t nblocks,
+				       const u32 inc);
+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
+					const u8 *block, const size_t nblocks,
+					const u32 inc);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
+
+void blake2s_compress_arch(struct blake2s_state *state,
+			   const u8 *block, size_t nblocks,
+			   const u32 inc)
+{
+	/* SIMD disables preemption, so relax after processing each page. */
+	BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
+
+	if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
+		blake2s_compress_generic(state, block, nblocks, inc);
+		return;
+	}
+
+	for (;;) {
+		const size_t blocks = min_t(size_t, nblocks,
+					    PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
+
+		kernel_fpu_begin();
+		if (IS_ENABLED(CONFIG_AS_AVX512) &&
+		    static_branch_likely(&blake2s_use_avx512))
+			blake2s_compress_avx512(state, block, blocks, inc);
+		else
+			blake2s_compress_ssse3(state, block, blocks, inc);
+		kernel_fpu_end();
+
+		nblocks -= blocks;
+		if (!nblocks)
+			break;
+		block += blocks * BLAKE2S_BLOCK_SIZE;
+	}
+}
+EXPORT_SYMBOL(blake2s_compress_arch);
+
+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+	if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE)
+		return -EINVAL;
+
+	memcpy(tctx->key, key, keylen);
+	tctx->keylen = keylen;
+
+	return 0;
+}
+
+static int crypto_blake2s_init(struct shash_desc *desc)
+{
+	struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct blake2s_state *state = shash_desc_ctx(desc);
+	const int outlen = crypto_shash_digestsize(desc->tfm);
+
+	if (tctx->keylen)
+		blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
+	else
+		blake2s_init(state, outlen);
+
+	return 0;
+}
+
+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
+				 unsigned int inlen)
+{
+	struct blake2s_state *state = shash_desc_ctx(desc);
+	const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+
+	if (unlikely(!inlen))
+		return 0;
+	if (inlen > fill) {
+		memcpy(state->buf + state->buflen, in, fill);
+		blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
+		state->buflen = 0;
+		in += fill;
+		inlen -= fill;
+	}
+	if (inlen > BLAKE2S_BLOCK_SIZE) {
+		const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+		/* Hash one less (full) block than strictly possible */
+		blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+		in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+		inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+	}
+	memcpy(state->buf + state->buflen, in, inlen);
+	state->buflen += inlen;
+
+	return 0;
+}
+
+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
+{
+	struct blake2s_state *state = shash_desc_ctx(desc);
+
+	blake2s_set_lastblock(state);
+	memset(state->buf + state->buflen, 0,
+	       BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+	blake2s_compress_arch(state, state->buf, 1, state->buflen);
+	cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+	memcpy(out, state->h, state->outlen);
+	memzero_explicit(state, sizeof(*state));
+
+	return 0;
+}
+
+static struct shash_alg blake2s_algs[] = {{
+	.base.cra_name		= "blake2s-128",
+	.base.cra_driver_name	= "blake2s-128-x86",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_128_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}, {
+	.base.cra_name		= "blake2s-160",
+	.base.cra_driver_name	= "blake2s-160-x86",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_160_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}, {
+	.base.cra_name		= "blake2s-224",
+	.base.cra_driver_name	= "blake2s-224-x86",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_224_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}, {
+	.base.cra_name		= "blake2s-256",
+	.base.cra_driver_name	= "blake2s-256-x86",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_256_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}};
+
+static int __init blake2s_mod_init(void)
+{
+	if (!boot_cpu_has(X86_FEATURE_SSSE3))
+		return 0;
+
+	static_branch_enable(&blake2s_use_ssse3);
+
+	if (IS_ENABLED(CONFIG_AS_AVX512) &&
+	    boot_cpu_has(X86_FEATURE_AVX) &&
+	    boot_cpu_has(X86_FEATURE_AVX2) &&
+	    boot_cpu_has(X86_FEATURE_AVX512F) &&
+	    boot_cpu_has(X86_FEATURE_AVX512VL) &&
+	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
+			      XFEATURE_MASK_AVX512, NULL))
+		static_branch_enable(&blake2s_use_avx512);
+
+	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+		crypto_register_shashes(blake2s_algs,
+					ARRAY_SIZE(blake2s_algs)) : 0;
+}
+
+static void __exit blake2s_mod_exit(void)
+{
+	if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
+		crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+module_init(blake2s_mod_init);
+module_exit(blake2s_mod_exit);
+
+MODULE_ALIAS_CRYPTO("blake2s-128");
+MODULE_ALIAS_CRYPTO("blake2s-128-x86");
+MODULE_ALIAS_CRYPTO("blake2s-160");
+MODULE_ALIAS_CRYPTO("blake2s-160-x86");
+MODULE_ALIAS_CRYPTO("blake2s-224");
+MODULE_ALIAS_CRYPTO("blake2s-224-x86");
+MODULE_ALIAS_CRYPTO("blake2s-256");
+MODULE_ALIAS_CRYPTO("blake2s-256-x86");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 330db7a48af8..4222ac6d6584 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -103,7 +103,7 @@
 	bswapq 			RX0; \
 	xorq RX0, 		(RIO);
 
-ENTRY(__blowfish_enc_blk)
+SYM_FUNC_START(__blowfish_enc_blk)
 	/* input:
 	 *	%rdi: ctx
 	 *	%rsi: dst
@@ -139,9 +139,9 @@ ENTRY(__blowfish_enc_blk)
 .L__enc_xor:
 	xor_block();
 	ret;
-ENDPROC(__blowfish_enc_blk)
+SYM_FUNC_END(__blowfish_enc_blk)
 
-ENTRY(blowfish_dec_blk)
+SYM_FUNC_START(blowfish_dec_blk)
 	/* input:
 	 *	%rdi: ctx
 	 *	%rsi: dst
@@ -171,7 +171,7 @@ ENTRY(blowfish_dec_blk)
 	movq %r11, %r12;
 
 	ret;
-ENDPROC(blowfish_dec_blk)
+SYM_FUNC_END(blowfish_dec_blk)
 
 /**********************************************************************
   4-way blowfish, four blocks parallel
@@ -283,7 +283,7 @@ ENDPROC(blowfish_dec_blk)
 	bswapq 			RX3; \
 	xorq RX3,		24(RIO);
 
-ENTRY(__blowfish_enc_blk_4way)
+SYM_FUNC_START(__blowfish_enc_blk_4way)
 	/* input:
 	 *	%rdi: ctx
 	 *	%rsi: dst
@@ -330,9 +330,9 @@ ENTRY(__blowfish_enc_blk_4way)
 	popq %rbx;
 	popq %r12;
 	ret;
-ENDPROC(__blowfish_enc_blk_4way)
+SYM_FUNC_END(__blowfish_enc_blk_4way)
 
-ENTRY(blowfish_dec_blk_4way)
+SYM_FUNC_START(blowfish_dec_blk_4way)
 	/* input:
 	 *	%rdi: ctx
 	 *	%rsi: dst
@@ -365,4 +365,4 @@ ENTRY(blowfish_dec_blk_4way)
 	popq %r12;
 
 	ret;
-ENDPROC(blowfish_dec_blk_4way)
+SYM_FUNC_END(blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index a14af6eb09cb..d01ddd73de65 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -189,20 +189,20 @@
  * larger and would only be 0.5% faster (on sandy-bridge).
  */
 .align 8
-roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
+SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
 	roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
 		  %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
 		  %rcx, (%r9));
 	ret;
-ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
+SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
 
 .align 8
-roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
+SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 	roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
 		  %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
 		  %rax, (%r9));
 	ret;
-ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
+SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 
 /*
  * IN/OUT:
@@ -722,7 +722,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 .text
 
 .align 8
-__camellia_enc_blk16:
+SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rax: temporary storage, 256 bytes
@@ -806,10 +806,10 @@ __camellia_enc_blk16:
 		     %xmm15, %rax, %rcx, 24);
 
 	jmp .Lenc_done;
-ENDPROC(__camellia_enc_blk16)
+SYM_FUNC_END(__camellia_enc_blk16)
 
 .align 8
-__camellia_dec_blk16:
+SYM_FUNC_START_LOCAL(__camellia_dec_blk16)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rax: temporary storage, 256 bytes
@@ -891,9 +891,9 @@ __camellia_dec_blk16:
 	      ((key_table + (24) * 8) + 4)(CTX));
 
 	jmp .Ldec_max24;
-ENDPROC(__camellia_dec_blk16)
+SYM_FUNC_END(__camellia_dec_blk16)
 
-ENTRY(camellia_ecb_enc_16way)
+SYM_FUNC_START(camellia_ecb_enc_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -916,9 +916,9 @@ ENTRY(camellia_ecb_enc_16way)
 
 	FRAME_END
 	ret;
-ENDPROC(camellia_ecb_enc_16way)
+SYM_FUNC_END(camellia_ecb_enc_16way)
 
-ENTRY(camellia_ecb_dec_16way)
+SYM_FUNC_START(camellia_ecb_dec_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -946,9 +946,9 @@ ENTRY(camellia_ecb_dec_16way)
 
 	FRAME_END
 	ret;
-ENDPROC(camellia_ecb_dec_16way)
+SYM_FUNC_END(camellia_ecb_dec_16way)
 
-ENTRY(camellia_cbc_dec_16way)
+SYM_FUNC_START(camellia_cbc_dec_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -997,7 +997,7 @@ ENTRY(camellia_cbc_dec_16way)
 
 	FRAME_END
 	ret;
-ENDPROC(camellia_cbc_dec_16way)
+SYM_FUNC_END(camellia_cbc_dec_16way)
 
 #define inc_le128(x, minus_one, tmp) \
 	vpcmpeqq minus_one, x, tmp; \
@@ -1005,7 +1005,7 @@ ENDPROC(camellia_cbc_dec_16way)
 	vpslldq $8, tmp, tmp; \
 	vpsubq tmp, x, x;
 
-ENTRY(camellia_ctr_16way)
+SYM_FUNC_START(camellia_ctr_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -1110,7 +1110,7 @@ ENTRY(camellia_ctr_16way)
 
 	FRAME_END
 	ret;
-ENDPROC(camellia_ctr_16way)
+SYM_FUNC_END(camellia_ctr_16way)
 
 #define gf128mul_x_ble(iv, mask, tmp) \
 	vpsrad $31, iv, tmp; \
@@ -1120,7 +1120,7 @@ ENDPROC(camellia_ctr_16way)
 	vpxor tmp, iv, iv;
 
 .align 8
-camellia_xts_crypt_16way:
+SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -1254,9 +1254,9 @@ camellia_xts_crypt_16way:
 
 	FRAME_END
 	ret;
-ENDPROC(camellia_xts_crypt_16way)
+SYM_FUNC_END(camellia_xts_crypt_16way)
 
-ENTRY(camellia_xts_enc_16way)
+SYM_FUNC_START(camellia_xts_enc_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -1268,9 +1268,9 @@ ENTRY(camellia_xts_enc_16way)
 	leaq __camellia_enc_blk16, %r9;
 
 	jmp camellia_xts_crypt_16way;
-ENDPROC(camellia_xts_enc_16way)
+SYM_FUNC_END(camellia_xts_enc_16way)
 
-ENTRY(camellia_xts_dec_16way)
+SYM_FUNC_START(camellia_xts_dec_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -1286,4 +1286,4 @@ ENTRY(camellia_xts_dec_16way)
 	leaq __camellia_dec_blk16, %r9;
 
 	jmp camellia_xts_crypt_16way;
-ENDPROC(camellia_xts_dec_16way)
+SYM_FUNC_END(camellia_xts_dec_16way)
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 4be4c7c3ba27..563ef6e83cdd 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -223,20 +223,20 @@
  * larger and would only marginally faster.
  */
 .align 8
-roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
+SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
 	roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
 		  %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
 		  %rcx, (%r9));
 	ret;
-ENDPROC(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
+SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
 
 .align 8
-roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
+SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 	roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
 		  %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
 		  %rax, (%r9));
 	ret;
-ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
+SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 
 /*
  * IN/OUT:
@@ -760,7 +760,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 .text
 
 .align 8
-__camellia_enc_blk32:
+SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rax: temporary storage, 512 bytes
@@ -844,10 +844,10 @@ __camellia_enc_blk32:
 		     %ymm15, %rax, %rcx, 24);
 
 	jmp .Lenc_done;
-ENDPROC(__camellia_enc_blk32)
+SYM_FUNC_END(__camellia_enc_blk32)
 
 .align 8
-__camellia_dec_blk32:
+SYM_FUNC_START_LOCAL(__camellia_dec_blk32)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rax: temporary storage, 512 bytes
@@ -929,9 +929,9 @@ __camellia_dec_blk32:
 	      ((key_table + (24) * 8) + 4)(CTX));
 
 	jmp .Ldec_max24;
-ENDPROC(__camellia_dec_blk32)
+SYM_FUNC_END(__camellia_dec_blk32)
 
-ENTRY(camellia_ecb_enc_32way)
+SYM_FUNC_START(camellia_ecb_enc_32way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (32 blocks)
@@ -958,9 +958,9 @@ ENTRY(camellia_ecb_enc_32way)
 
 	FRAME_END
 	ret;
-ENDPROC(camellia_ecb_enc_32way)
+SYM_FUNC_END(camellia_ecb_enc_32way)
 
-ENTRY(camellia_ecb_dec_32way)
+SYM_FUNC_START(camellia_ecb_dec_32way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (32 blocks)
@@ -992,9 +992,9 @@ ENTRY(camellia_ecb_dec_32way)
 
 	FRAME_END
 	ret;
-ENDPROC(camellia_ecb_dec_32way)
+SYM_FUNC_END(camellia_ecb_dec_32way)
 
-ENTRY(camellia_cbc_dec_32way)
+SYM_FUNC_START(camellia_cbc_dec_32way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (32 blocks)
@@ -1060,7 +1060,7 @@ ENTRY(camellia_cbc_dec_32way)
 
 	FRAME_END
 	ret;
-ENDPROC(camellia_cbc_dec_32way)
+SYM_FUNC_END(camellia_cbc_dec_32way)
 
 #define inc_le128(x, minus_one, tmp) \
 	vpcmpeqq minus_one, x, tmp; \
@@ -1076,7 +1076,7 @@ ENDPROC(camellia_cbc_dec_32way)
 	vpslldq $8, tmp1, tmp1; \
 	vpsubq tmp1, x, x;
 
-ENTRY(camellia_ctr_32way)
+SYM_FUNC_START(camellia_ctr_32way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (32 blocks)
@@ -1200,7 +1200,7 @@ ENTRY(camellia_ctr_32way)
 
 	FRAME_END
 	ret;
-ENDPROC(camellia_ctr_32way)
+SYM_FUNC_END(camellia_ctr_32way)
 
 #define gf128mul_x_ble(iv, mask, tmp) \
 	vpsrad $31, iv, tmp; \
@@ -1222,7 +1222,7 @@ ENDPROC(camellia_ctr_32way)
 	vpxor tmp1, iv, iv;
 
 .align 8
-camellia_xts_crypt_32way:
+SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (32 blocks)
@@ -1367,9 +1367,9 @@ camellia_xts_crypt_32way:
 
 	FRAME_END
 	ret;
-ENDPROC(camellia_xts_crypt_32way)
+SYM_FUNC_END(camellia_xts_crypt_32way)
 
-ENTRY(camellia_xts_enc_32way)
+SYM_FUNC_START(camellia_xts_enc_32way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (32 blocks)
@@ -1382,9 +1382,9 @@ ENTRY(camellia_xts_enc_32way)
 	leaq __camellia_enc_blk32, %r9;
 
 	jmp camellia_xts_crypt_32way;
-ENDPROC(camellia_xts_enc_32way)
+SYM_FUNC_END(camellia_xts_enc_32way)
 
-ENTRY(camellia_xts_dec_32way)
+SYM_FUNC_START(camellia_xts_dec_32way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (32 blocks)
@@ -1400,4 +1400,4 @@ ENTRY(camellia_xts_dec_32way)
 	leaq __camellia_dec_blk32, %r9;
 
 	jmp camellia_xts_crypt_32way;
-ENDPROC(camellia_xts_dec_32way)
+SYM_FUNC_END(camellia_xts_dec_32way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 23528bc18fc6..1372e6408850 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -175,7 +175,7 @@
 	bswapq				RAB0; \
 	movq RAB0,			4*2(RIO);
 
-ENTRY(__camellia_enc_blk)
+SYM_FUNC_START(__camellia_enc_blk)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -220,9 +220,9 @@ ENTRY(__camellia_enc_blk)
 
 	movq RR12, %r12;
 	ret;
-ENDPROC(__camellia_enc_blk)
+SYM_FUNC_END(__camellia_enc_blk)
 
-ENTRY(camellia_dec_blk)
+SYM_FUNC_START(camellia_dec_blk)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -258,7 +258,7 @@ ENTRY(camellia_dec_blk)
 
 	movq RR12, %r12;
 	ret;
-ENDPROC(camellia_dec_blk)
+SYM_FUNC_END(camellia_dec_blk)
 
 /**********************************************************************
   2-way camellia
@@ -409,7 +409,7 @@ ENDPROC(camellia_dec_blk)
 		bswapq				RAB1; \
 		movq RAB1,			12*2(RIO);
 
-ENTRY(__camellia_enc_blk_2way)
+SYM_FUNC_START(__camellia_enc_blk_2way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -456,9 +456,9 @@ ENTRY(__camellia_enc_blk_2way)
 	movq RR12, %r12;
 	popq %rbx;
 	ret;
-ENDPROC(__camellia_enc_blk_2way)
+SYM_FUNC_END(__camellia_enc_blk_2way)
 
-ENTRY(camellia_dec_blk_2way)
+SYM_FUNC_START(camellia_dec_blk_2way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -496,4 +496,4 @@ ENTRY(camellia_dec_blk_2way)
 	movq RR12, %r12;
 	movq RXOR, %rbx;
 	ret;
-ENDPROC(camellia_dec_blk_2way)
+SYM_FUNC_END(camellia_dec_blk_2way)
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index a4f00128ea55..ccda647422d6 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -19,20 +19,17 @@
 #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
 
 /* 32-way AVX2/AES-NI parallel cipher functions */
-asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
-asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
+asmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src);
 
-asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
-asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst,
-				   const u8 *src, le128 *iv);
+asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src,
+				   le128 *iv);
 
-asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src, le128 *iv);
-asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src, le128 *iv);
+asmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src,
+				       le128 *iv);
+asmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src,
+				       le128 *iv);
 
 static const struct common_glue_ctx camellia_enc = {
 	.num_funcs = 4,
@@ -40,16 +37,16 @@ static const struct common_glue_ctx camellia_enc = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) }
+		.fn_u = { .ecb = camellia_ecb_enc_32way }
 	}, {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
+		.fn_u = { .ecb = camellia_ecb_enc_16way }
 	}, {
 		.num_blocks = 2,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
+		.fn_u = { .ecb = camellia_enc_blk_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
+		.fn_u = { .ecb = camellia_enc_blk }
 	} }
 };
 
@@ -59,16 +56,16 @@ static const struct common_glue_ctx camellia_ctr = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) }
+		.fn_u = { .ctr = camellia_ctr_32way }
 	}, {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
+		.fn_u = { .ctr = camellia_ctr_16way }
 	}, {
 		.num_blocks = 2,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
+		.fn_u = { .ctr = camellia_crypt_ctr_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
+		.fn_u = { .ctr = camellia_crypt_ctr }
 	} }
 };
 
@@ -78,13 +75,13 @@ static const struct common_glue_ctx camellia_enc_xts = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) }
+		.fn_u = { .xts = camellia_xts_enc_32way }
 	}, {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
+		.fn_u = { .xts = camellia_xts_enc_16way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
+		.fn_u = { .xts = camellia_xts_enc }
 	} }
 };
 
@@ -94,16 +91,16 @@ static const struct common_glue_ctx camellia_dec = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) }
+		.fn_u = { .ecb = camellia_ecb_dec_32way }
 	}, {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
+		.fn_u = { .ecb = camellia_ecb_dec_16way }
 	}, {
 		.num_blocks = 2,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
+		.fn_u = { .ecb = camellia_dec_blk_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
+		.fn_u = { .ecb = camellia_dec_blk }
 	} }
 };
 
@@ -113,16 +110,16 @@ static const struct common_glue_ctx camellia_dec_cbc = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) }
+		.fn_u = { .cbc = camellia_cbc_dec_32way }
 	}, {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
+		.fn_u = { .cbc = camellia_cbc_dec_16way }
 	}, {
 		.num_blocks = 2,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
+		.fn_u = { .cbc = camellia_decrypt_cbc_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
+		.fn_u = { .cbc = camellia_dec_blk }
 	} }
 };
 
@@ -132,21 +129,20 @@ static const struct common_glue_ctx camellia_dec_xts = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) }
+		.fn_u = { .xts = camellia_xts_dec_32way }
 	}, {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
+		.fn_u = { .xts = camellia_xts_dec_16way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
+		.fn_u = { .xts = camellia_xts_dec }
 	} }
 };
 
 static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			   unsigned int keylen)
 {
-	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen,
-				 &tfm->base.crt_flags);
+	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen);
 }
 
 static int ecb_encrypt(struct skcipher_request *req)
@@ -161,8 +157,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
-					   req);
+	return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -180,8 +175,7 @@ static int xts_encrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_req_128bit(&camellia_enc_xts, req,
-				   XTS_TWEAK_CAST(camellia_enc_blk),
+	return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
 				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
@@ -190,8 +184,7 @@ static int xts_decrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_req_128bit(&camellia_dec_xts, req,
-				   XTS_TWEAK_CAST(camellia_enc_blk),
+	return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
 				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index f28d282779b8..4e5de6ef206e 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -18,41 +18,36 @@
 #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
 
 /* 16-way parallel cipher functions (avx/aes-ni) */
-asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
+asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
 EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way);
 
-asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
+asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
 EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way);
 
-asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
+asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
 EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way);
 
-asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
-				   const u8 *src, le128 *iv);
+asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
+				   le128 *iv);
 EXPORT_SYMBOL_GPL(camellia_ctr_16way);
 
-asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src, le128 *iv);
+asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
+				       le128 *iv);
 EXPORT_SYMBOL_GPL(camellia_xts_enc_16way);
 
-asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src, le128 *iv);
+asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
+				       le128 *iv);
 EXPORT_SYMBOL_GPL(camellia_xts_dec_16way);
 
-void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-				  GLUE_FUNC_CAST(camellia_enc_blk));
+	glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_enc_blk);
 }
 EXPORT_SYMBOL_GPL(camellia_xts_enc);
 
-void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-				  GLUE_FUNC_CAST(camellia_dec_blk));
+	glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_dec_blk);
 }
 EXPORT_SYMBOL_GPL(camellia_xts_dec);
 
@@ -62,13 +57,13 @@ static const struct common_glue_ctx camellia_enc = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
+		.fn_u = { .ecb = camellia_ecb_enc_16way }
 	}, {
 		.num_blocks = 2,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
+		.fn_u = { .ecb = camellia_enc_blk_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
+		.fn_u = { .ecb = camellia_enc_blk }
 	} }
 };
 
@@ -78,13 +73,13 @@ static const struct common_glue_ctx camellia_ctr = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
+		.fn_u = { .ctr = camellia_ctr_16way }
 	}, {
 		.num_blocks = 2,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
+		.fn_u = { .ctr = camellia_crypt_ctr_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
+		.fn_u = { .ctr = camellia_crypt_ctr }
 	} }
 };
 
@@ -94,10 +89,10 @@ static const struct common_glue_ctx camellia_enc_xts = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
+		.fn_u = { .xts = camellia_xts_enc_16way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
+		.fn_u = { .xts = camellia_xts_enc }
 	} }
 };
 
@@ -107,13 +102,13 @@ static const struct common_glue_ctx camellia_dec = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
+		.fn_u = { .ecb = camellia_ecb_dec_16way }
 	}, {
 		.num_blocks = 2,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
+		.fn_u = { .ecb = camellia_dec_blk_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
+		.fn_u = { .ecb = camellia_dec_blk }
 	} }
 };
 
@@ -123,13 +118,13 @@ static const struct common_glue_ctx camellia_dec_cbc = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
+		.fn_u = { .cbc = camellia_cbc_dec_16way }
 	}, {
 		.num_blocks = 2,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
+		.fn_u = { .cbc = camellia_decrypt_cbc_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
+		.fn_u = { .cbc = camellia_dec_blk }
 	} }
 };
 
@@ -139,18 +134,17 @@ static const struct common_glue_ctx camellia_dec_xts = {
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
+		.fn_u = { .xts = camellia_xts_dec_16way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
+		.fn_u = { .xts = camellia_xts_dec }
 	} }
 };
 
 static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			   unsigned int keylen)
 {
-	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen,
-				 &tfm->base.crt_flags);
+	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen);
 }
 
 static int ecb_encrypt(struct skcipher_request *req)
@@ -165,8 +159,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
-					   req);
+	return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -183,7 +176,6 @@ int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			unsigned int keylen)
 {
 	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-	u32 *flags = &tfm->base.crt_flags;
 	int err;
 
 	err = xts_verify_key(tfm, key, keylen);
@@ -191,13 +183,12 @@ int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		return err;
 
 	/* first half of xts-key is for crypt */
-	err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2);
 	if (err)
 		return err;
 
 	/* second half of xts-key is for tweak */
-	return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
-				flags);
+	return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
 }
 EXPORT_SYMBOL_GPL(xts_camellia_setkey);
 
@@ -206,8 +197,7 @@ static int xts_encrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_req_128bit(&camellia_enc_xts, req,
-				   XTS_TWEAK_CAST(camellia_enc_blk),
+	return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
 				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
@@ -216,8 +206,7 @@ static int xts_decrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_req_128bit(&camellia_dec_xts, req,
-				   XTS_TWEAK_CAST(camellia_enc_blk),
+	return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
 				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 7c62db56ffe1..242c056e5fa8 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -18,19 +18,17 @@
 #include <asm/crypto/glue_helper.h>
 
 /* regular block cipher functions */
-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
-				   const u8 *src, bool xor);
+asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
+				   bool xor);
 EXPORT_SYMBOL_GPL(__camellia_enc_blk);
-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
-				 const u8 *src);
+asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src);
 EXPORT_SYMBOL_GPL(camellia_dec_blk);
 
 /* 2-way parallel cipher functions */
-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-					const u8 *src, bool xor);
+asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src,
+					bool xor);
 EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way);
-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-				      const u8 *src);
+asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src);
 EXPORT_SYMBOL_GPL(camellia_dec_blk_2way);
 
 static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
@@ -1229,12 +1227,10 @@ static void camellia_setup192(const unsigned char *key, u64 *subkey)
 }
 
 int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key,
-		      unsigned int key_len, u32 *flags)
+		      unsigned int key_len)
 {
-	if (key_len != 16 && key_len != 24 && key_len != 32) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	if (key_len != 16 && key_len != 24 && key_len != 32)
 		return -EINVAL;
-	}
 
 	cctx->key_length = key_len;
 
@@ -1257,8 +1253,7 @@ EXPORT_SYMBOL_GPL(__camellia_setkey);
 static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
 			   unsigned int key_len)
 {
-	return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len,
-				 &tfm->crt_flags);
+	return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len);
 }
 
 static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
@@ -1267,8 +1262,10 @@ static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
 	return camellia_setkey(&tfm->base, key, key_len);
 }
 
-void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
+void camellia_decrypt_cbc_2way(const void *ctx, u8 *d, const u8 *s)
 {
+	u128 *dst = (u128 *)d;
+	const u128 *src = (const u128 *)s;
 	u128 iv = *src;
 
 	camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
@@ -1277,9 +1274,11 @@ void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
 }
 EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way);
 
-void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void camellia_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
 	be128 ctrblk;
+	u128 *dst = (u128 *)d;
+	const u128 *src = (const u128 *)s;
 
 	if (dst != src)
 		*dst = *src;
@@ -1291,9 +1290,11 @@ void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 }
 EXPORT_SYMBOL_GPL(camellia_crypt_ctr);
 
-void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void camellia_crypt_ctr_2way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
 	be128 ctrblks[2];
+	u128 *dst = (u128 *)d;
+	const u128 *src = (const u128 *)s;
 
 	if (dst != src) {
 		dst[0] = src[0];
@@ -1315,10 +1316,10 @@ static const struct common_glue_ctx camellia_enc = {
 
 	.funcs = { {
 		.num_blocks = 2,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
+		.fn_u = { .ecb = camellia_enc_blk_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
+		.fn_u = { .ecb = camellia_enc_blk }
 	} }
 };
 
@@ -1328,10 +1329,10 @@ static const struct common_glue_ctx camellia_ctr = {
 
 	.funcs = { {
 		.num_blocks = 2,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
+		.fn_u = { .ctr = camellia_crypt_ctr_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
+		.fn_u = { .ctr = camellia_crypt_ctr }
 	} }
 };
 
@@ -1341,10 +1342,10 @@ static const struct common_glue_ctx camellia_dec = {
 
 	.funcs = { {
 		.num_blocks = 2,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
+		.fn_u = { .ecb = camellia_dec_blk_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
+		.fn_u = { .ecb = camellia_dec_blk }
 	} }
 };
 
@@ -1354,10 +1355,10 @@ static const struct common_glue_ctx camellia_dec_cbc = {
 
 	.funcs = { {
 		.num_blocks = 2,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
+		.fn_u = { .cbc = camellia_decrypt_cbc_2way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
+		.fn_u = { .cbc = camellia_dec_blk }
 	} }
 };
 
@@ -1373,8 +1374,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
-					   req);
+	return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index dc55c3332fcc..8a6181b08b59 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -209,7 +209,7 @@
 .text
 
 .align 16
-__cast5_enc_blk16:
+SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
 	/* input:
 	 *	%rdi: ctx
 	 *	RL1: blocks 1 and 2
@@ -280,10 +280,10 @@ __cast5_enc_blk16:
 	outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
 
 	ret;
-ENDPROC(__cast5_enc_blk16)
+SYM_FUNC_END(__cast5_enc_blk16)
 
 .align 16
-__cast5_dec_blk16:
+SYM_FUNC_START_LOCAL(__cast5_dec_blk16)
 	/* input:
 	 *	%rdi: ctx
 	 *	RL1: encrypted blocks 1 and 2
@@ -357,9 +357,9 @@ __cast5_dec_blk16:
 .L__skip_dec:
 	vpsrldq $4, RKR, RKR;
 	jmp .L__dec_tail;
-ENDPROC(__cast5_dec_blk16)
+SYM_FUNC_END(__cast5_dec_blk16)
 
-ENTRY(cast5_ecb_enc_16way)
+SYM_FUNC_START(cast5_ecb_enc_16way)
 	/* input:
 	 *	%rdi: ctx
 	 *	%rsi: dst
@@ -394,9 +394,9 @@ ENTRY(cast5_ecb_enc_16way)
 	popq %r15;
 	FRAME_END
 	ret;
-ENDPROC(cast5_ecb_enc_16way)
+SYM_FUNC_END(cast5_ecb_enc_16way)
 
-ENTRY(cast5_ecb_dec_16way)
+SYM_FUNC_START(cast5_ecb_dec_16way)
 	/* input:
 	 *	%rdi: ctx
 	 *	%rsi: dst
@@ -432,9 +432,9 @@ ENTRY(cast5_ecb_dec_16way)
 	popq %r15;
 	FRAME_END
 	ret;
-ENDPROC(cast5_ecb_dec_16way)
+SYM_FUNC_END(cast5_ecb_dec_16way)
 
-ENTRY(cast5_cbc_dec_16way)
+SYM_FUNC_START(cast5_cbc_dec_16way)
 	/* input:
 	 *	%rdi: ctx
 	 *	%rsi: dst
@@ -484,9 +484,9 @@ ENTRY(cast5_cbc_dec_16way)
 	popq %r12;
 	FRAME_END
 	ret;
-ENDPROC(cast5_cbc_dec_16way)
+SYM_FUNC_END(cast5_cbc_dec_16way)
 
-ENTRY(cast5_ctr_16way)
+SYM_FUNC_START(cast5_ctr_16way)
 	/* input:
 	 *	%rdi: ctx
 	 *	%rsi: dst
@@ -560,4 +560,4 @@ ENTRY(cast5_ctr_16way)
 	popq %r12;
 	FRAME_END
 	ret;
-ENDPROC(cast5_ctr_16way)
+SYM_FUNC_END(cast5_ctr_16way)
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 4f0a7cdb94d9..932a3ce32a88 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -247,7 +247,7 @@
 .text
 
 .align 8
-__cast6_enc_blk8:
+SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
 	/* input:
 	 *	%rdi: ctx
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
@@ -292,10 +292,10 @@ __cast6_enc_blk8:
 	outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
 	ret;
-ENDPROC(__cast6_enc_blk8)
+SYM_FUNC_END(__cast6_enc_blk8)
 
 .align 8
-__cast6_dec_blk8:
+SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
 	/* input:
 	 *	%rdi: ctx
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
@@ -339,9 +339,9 @@ __cast6_dec_blk8:
 	outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
 	ret;
-ENDPROC(__cast6_dec_blk8)
+SYM_FUNC_END(__cast6_dec_blk8)
 
-ENTRY(cast6_ecb_enc_8way)
+SYM_FUNC_START(cast6_ecb_enc_8way)
 	/* input:
 	 *	%rdi: ctx
 	 *	%rsi: dst
@@ -362,9 +362,9 @@ ENTRY(cast6_ecb_enc_8way)
 	popq %r15;
 	FRAME_END
 	ret;
-ENDPROC(cast6_ecb_enc_8way)
+SYM_FUNC_END(cast6_ecb_enc_8way)
 
-ENTRY(cast6_ecb_dec_8way)
+SYM_FUNC_START(cast6_ecb_dec_8way)
 	/* input:
 	 *	%rdi: ctx
 	 *	%rsi: dst
@@ -385,9 +385,9 @@ ENTRY(cast6_ecb_dec_8way)
 	popq %r15;
 	FRAME_END
 	ret;
-ENDPROC(cast6_ecb_dec_8way)
+SYM_FUNC_END(cast6_ecb_dec_8way)
 
-ENTRY(cast6_cbc_dec_8way)
+SYM_FUNC_START(cast6_cbc_dec_8way)
 	/* input:
 	 *	%rdi: ctx
 	 *	%rsi: dst
@@ -411,9 +411,9 @@ ENTRY(cast6_cbc_dec_8way)
 	popq %r12;
 	FRAME_END
 	ret;
-ENDPROC(cast6_cbc_dec_8way)
+SYM_FUNC_END(cast6_cbc_dec_8way)
 
-ENTRY(cast6_ctr_8way)
+SYM_FUNC_START(cast6_ctr_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -439,9 +439,9 @@ ENTRY(cast6_ctr_8way)
 	popq %r12;
 	FRAME_END
 	ret;
-ENDPROC(cast6_ctr_8way)
+SYM_FUNC_END(cast6_ctr_8way)
 
-ENTRY(cast6_xts_enc_8way)
+SYM_FUNC_START(cast6_xts_enc_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -466,9 +466,9 @@ ENTRY(cast6_xts_enc_8way)
 	popq %r15;
 	FRAME_END
 	ret;
-ENDPROC(cast6_xts_enc_8way)
+SYM_FUNC_END(cast6_xts_enc_8way)
 
-ENTRY(cast6_xts_dec_8way)
+SYM_FUNC_START(cast6_xts_dec_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -493,4 +493,4 @@ ENTRY(cast6_xts_dec_8way)
 	popq %r15;
 	FRAME_END
 	ret;
-ENDPROC(cast6_xts_dec_8way)
+SYM_FUNC_END(cast6_xts_dec_8way)
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index a8a38fffb4a9..48e0f37796fa 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -20,20 +20,17 @@
 
 #define CAST6_PARALLEL_BLOCKS 8
 
-asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst,
-				   const u8 *src);
-asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst,
-				   const u8 *src);
-
-asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
-				   const u8 *src);
-asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
+asmlinkage void cast6_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void cast6_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src);
+
+asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void cast6_ctr_8way(const void *ctx, u8 *dst, const u8 *src,
 			       le128 *iv);
 
-asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
-				   const u8 *src, le128 *iv);
-asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
-				   const u8 *src, le128 *iv);
+asmlinkage void cast6_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src,
+				   le128 *iv);
+asmlinkage void cast6_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src,
+				   le128 *iv);
 
 static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
 				 const u8 *key, unsigned int keylen)
@@ -41,21 +38,21 @@ static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
 	return cast6_setkey(&tfm->base, key, keylen);
 }
 
-static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void cast6_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-				  GLUE_FUNC_CAST(__cast6_encrypt));
+	glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_encrypt);
 }
 
-static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void cast6_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-				  GLUE_FUNC_CAST(__cast6_decrypt));
+	glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_decrypt);
 }
 
-static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void cast6_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
 	be128 ctrblk;
+	u128 *dst = (u128 *)d;
+	const u128 *src = (const u128 *)s;
 
 	le128_to_be128(&ctrblk, iv);
 	le128_inc(iv);
@@ -70,10 +67,10 @@ static const struct common_glue_ctx cast6_enc = {
 
 	.funcs = { {
 		.num_blocks = CAST6_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) }
+		.fn_u = { .ecb = cast6_ecb_enc_8way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
+		.fn_u = { .ecb = __cast6_encrypt }
 	} }
 };
 
@@ -83,10 +80,10 @@ static const struct common_glue_ctx cast6_ctr = {
 
 	.funcs = { {
 		.num_blocks = CAST6_PARALLEL_BLOCKS,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) }
+		.fn_u = { .ctr = cast6_ctr_8way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
+		.fn_u = { .ctr = cast6_crypt_ctr }
 	} }
 };
 
@@ -96,10 +93,10 @@ static const struct common_glue_ctx cast6_enc_xts = {
 
 	.funcs = { {
 		.num_blocks = CAST6_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) }
+		.fn_u = { .xts = cast6_xts_enc_8way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) }
+		.fn_u = { .xts = cast6_xts_enc }
 	} }
 };
 
@@ -109,10 +106,10 @@ static const struct common_glue_ctx cast6_dec = {
 
 	.funcs = { {
 		.num_blocks = CAST6_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) }
+		.fn_u = { .ecb = cast6_ecb_dec_8way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
+		.fn_u = { .ecb = __cast6_decrypt }
 	} }
 };
 
@@ -122,10 +119,10 @@ static const struct common_glue_ctx cast6_dec_cbc = {
 
 	.funcs = { {
 		.num_blocks = CAST6_PARALLEL_BLOCKS,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) }
+		.fn_u = { .cbc = cast6_cbc_dec_8way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
+		.fn_u = { .cbc = __cast6_decrypt }
 	} }
 };
 
@@ -135,10 +132,10 @@ static const struct common_glue_ctx cast6_dec_xts = {
 
 	.funcs = { {
 		.num_blocks = CAST6_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) }
+		.fn_u = { .xts = cast6_xts_dec_8way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) }
+		.fn_u = { .xts = cast6_xts_dec }
 	} }
 };
 
@@ -154,8 +151,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt),
-					   req);
+	return glue_cbc_encrypt_req_128bit(__cast6_encrypt, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -177,7 +173,6 @@ static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			    unsigned int keylen)
 {
 	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-	u32 *flags = &tfm->base.crt_flags;
 	int err;
 
 	err = xts_verify_key(tfm, key, keylen);
@@ -185,13 +180,12 @@ static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		return err;
 
 	/* first half of xts-key is for crypt */
-	err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2);
 	if (err)
 		return err;
 
 	/* second half of xts-key is for tweak */
-	return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
-			      flags);
+	return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
 }
 
 static int xts_encrypt(struct skcipher_request *req)
@@ -199,8 +193,7 @@ static int xts_encrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_req_128bit(&cast6_enc_xts, req,
-				   XTS_TWEAK_CAST(__cast6_encrypt),
+	return glue_xts_req_128bit(&cast6_enc_xts, req, __cast6_encrypt,
 				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
@@ -209,8 +202,7 @@ static int xts_decrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_req_128bit(&cast6_dec_xts, req,
-				   XTS_TWEAK_CAST(__cast6_encrypt),
+	return glue_xts_req_128bit(&cast6_dec_xts, req, __cast6_encrypt,
 				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S
index 831e4434fc20..ee9a40ab4109 100644
--- a/arch/x86/crypto/chacha-avx2-x86_64.S
+++ b/arch/x86/crypto/chacha-avx2-x86_64.S
@@ -34,7 +34,7 @@ CTR4BL:	.octa 0x00000000000000000000000000000002
 
 .text
 
-ENTRY(chacha_2block_xor_avx2)
+SYM_FUNC_START(chacha_2block_xor_avx2)
 	# %rdi: Input state matrix, s
 	# %rsi: up to 2 data blocks output, o
 	# %rdx: up to 2 data blocks input, i
@@ -224,9 +224,9 @@ ENTRY(chacha_2block_xor_avx2)
 	lea		-8(%r10),%rsp
 	jmp		.Ldone2
 
-ENDPROC(chacha_2block_xor_avx2)
+SYM_FUNC_END(chacha_2block_xor_avx2)
 
-ENTRY(chacha_4block_xor_avx2)
+SYM_FUNC_START(chacha_4block_xor_avx2)
 	# %rdi: Input state matrix, s
 	# %rsi: up to 4 data blocks output, o
 	# %rdx: up to 4 data blocks input, i
@@ -529,9 +529,9 @@ ENTRY(chacha_4block_xor_avx2)
 	lea		-8(%r10),%rsp
 	jmp		.Ldone4
 
-ENDPROC(chacha_4block_xor_avx2)
+SYM_FUNC_END(chacha_4block_xor_avx2)
 
-ENTRY(chacha_8block_xor_avx2)
+SYM_FUNC_START(chacha_8block_xor_avx2)
 	# %rdi: Input state matrix, s
 	# %rsi: up to 8 data blocks output, o
 	# %rdx: up to 8 data blocks input, i
@@ -1018,4 +1018,4 @@ ENTRY(chacha_8block_xor_avx2)
 
 	jmp		.Ldone8
 
-ENDPROC(chacha_8block_xor_avx2)
+SYM_FUNC_END(chacha_8block_xor_avx2)
diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S
index 848f9c75fd4f..bb193fde123a 100644
--- a/arch/x86/crypto/chacha-avx512vl-x86_64.S
+++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S
@@ -24,7 +24,7 @@ CTR8BL:	.octa 0x00000003000000020000000100000000
 
 .text
 
-ENTRY(chacha_2block_xor_avx512vl)
+SYM_FUNC_START(chacha_2block_xor_avx512vl)
 	# %rdi: Input state matrix, s
 	# %rsi: up to 2 data blocks output, o
 	# %rdx: up to 2 data blocks input, i
@@ -187,9 +187,9 @@ ENTRY(chacha_2block_xor_avx512vl)
 
 	jmp		.Ldone2
 
-ENDPROC(chacha_2block_xor_avx512vl)
+SYM_FUNC_END(chacha_2block_xor_avx512vl)
 
-ENTRY(chacha_4block_xor_avx512vl)
+SYM_FUNC_START(chacha_4block_xor_avx512vl)
 	# %rdi: Input state matrix, s
 	# %rsi: up to 4 data blocks output, o
 	# %rdx: up to 4 data blocks input, i
@@ -453,9 +453,9 @@ ENTRY(chacha_4block_xor_avx512vl)
 
 	jmp		.Ldone4
 
-ENDPROC(chacha_4block_xor_avx512vl)
+SYM_FUNC_END(chacha_4block_xor_avx512vl)
 
-ENTRY(chacha_8block_xor_avx512vl)
+SYM_FUNC_START(chacha_8block_xor_avx512vl)
 	# %rdi: Input state matrix, s
 	# %rsi: up to 8 data blocks output, o
 	# %rdx: up to 8 data blocks input, i
@@ -833,4 +833,4 @@ ENTRY(chacha_8block_xor_avx512vl)
 
 	jmp		.Ldone8
 
-ENDPROC(chacha_8block_xor_avx512vl)
+SYM_FUNC_END(chacha_8block_xor_avx512vl)
diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
index 2d86c7d6dc88..a38ab2512a6f 100644
--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
@@ -33,7 +33,7 @@ CTRINC:	.octa 0x00000003000000020000000100000000
  *
  * Clobbers: %r8d, %xmm4-%xmm7
  */
-chacha_permute:
+SYM_FUNC_START_LOCAL(chacha_permute)
 
 	movdqa		ROT8(%rip),%xmm4
 	movdqa		ROT16(%rip),%xmm5
@@ -109,9 +109,9 @@ chacha_permute:
 	jnz		.Ldoubleround
 
 	ret
-ENDPROC(chacha_permute)
+SYM_FUNC_END(chacha_permute)
 
-ENTRY(chacha_block_xor_ssse3)
+SYM_FUNC_START(chacha_block_xor_ssse3)
 	# %rdi: Input state matrix, s
 	# %rsi: up to 1 data block output, o
 	# %rdx: up to 1 data block input, i
@@ -197,9 +197,9 @@ ENTRY(chacha_block_xor_ssse3)
 	lea		-8(%r10),%rsp
 	jmp		.Ldone
 
-ENDPROC(chacha_block_xor_ssse3)
+SYM_FUNC_END(chacha_block_xor_ssse3)
 
-ENTRY(hchacha_block_ssse3)
+SYM_FUNC_START(hchacha_block_ssse3)
 	# %rdi: Input state matrix, s
 	# %rsi: output (8 32-bit words)
 	# %edx: nrounds
@@ -218,9 +218,9 @@ ENTRY(hchacha_block_ssse3)
 
 	FRAME_END
 	ret
-ENDPROC(hchacha_block_ssse3)
+SYM_FUNC_END(hchacha_block_ssse3)
 
-ENTRY(chacha_4block_xor_ssse3)
+SYM_FUNC_START(chacha_4block_xor_ssse3)
 	# %rdi: Input state matrix, s
 	# %rsi: up to 4 data blocks output, o
 	# %rdx: up to 4 data blocks input, i
@@ -788,4 +788,4 @@ ENTRY(chacha_4block_xor_ssse3)
 
 	jmp		.Ldone4
 
-ENDPROC(chacha_4block_xor_ssse3)
+SYM_FUNC_END(chacha_4block_xor_ssse3)
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 388f95a4ec24..68a74953efaf 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -7,7 +7,7 @@
  */
 
 #include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/kernel.h>
@@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
 asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
 					unsigned int len, int nrounds);
 asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
-#ifdef CONFIG_AS_AVX2
+
 asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
 				       unsigned int len, int nrounds);
 asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
 				       unsigned int len, int nrounds);
 asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
 				       unsigned int len, int nrounds);
-static bool chacha_use_avx2;
-#ifdef CONFIG_AS_AVX512
+
 asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 					   unsigned int len, int nrounds);
 asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 					   unsigned int len, int nrounds);
 asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 					   unsigned int len, int nrounds);
-static bool chacha_use_avx512vl;
-#endif
-#endif
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
 
 static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
 {
@@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
 static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 			  unsigned int bytes, int nrounds)
 {
-#ifdef CONFIG_AS_AVX2
-#ifdef CONFIG_AS_AVX512
-	if (chacha_use_avx512vl) {
+	if (IS_ENABLED(CONFIG_AS_AVX512) &&
+	    static_branch_likely(&chacha_use_avx512vl)) {
 		while (bytes >= CHACHA_BLOCK_SIZE * 8) {
 			chacha_8block_xor_avx512vl(state, dst, src, bytes,
 						   nrounds);
@@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 			return;
 		}
 	}
-#endif
-	if (chacha_use_avx2) {
+
+	if (IS_ENABLED(CONFIG_AS_AVX2) &&
+	    static_branch_likely(&chacha_use_avx2)) {
 		while (bytes >= CHACHA_BLOCK_SIZE * 8) {
 			chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
 			bytes -= CHACHA_BLOCK_SIZE * 8;
@@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 			return;
 		}
 	}
-#endif
+
 	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
 		chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
 		bytes -= CHACHA_BLOCK_SIZE * 4;
@@ -123,37 +123,76 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 	}
 }
 
-static int chacha_simd_stream_xor(struct skcipher_walk *walk,
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+	state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+	if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
+		hchacha_block_generic(state, stream, nrounds);
+	} else {
+		kernel_fpu_begin();
+		hchacha_block_ssse3(state, stream, nrounds);
+		kernel_fpu_end();
+	}
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+		       int nrounds)
+{
+	state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+	if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
+	    bytes <= CHACHA_BLOCK_SIZE)
+		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+	kernel_fpu_begin();
+	chacha_dosimd(state, dst, src, bytes, nrounds);
+	kernel_fpu_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static int chacha_simd_stream_xor(struct skcipher_request *req,
 				  const struct chacha_ctx *ctx, const u8 *iv)
 {
 	u32 *state, state_buf[16 + 2] __aligned(8);
-	int next_yield = 4096; /* bytes until next FPU yield */
-	int err = 0;
+	struct skcipher_walk walk;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
 
 	BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
 	state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
 
-	crypto_chacha_init(state, ctx, iv);
-
-	while (walk->nbytes > 0) {
-		unsigned int nbytes = walk->nbytes;
+	chacha_init_generic(state, ctx->key, iv);
 
-		if (nbytes < walk->total) {
-			nbytes = round_down(nbytes, walk->stride);
-			next_yield -= nbytes;
-		}
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
 
-		chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
-			      nbytes, ctx->nrounds);
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
 
-		if (next_yield <= 0) {
-			/* temporarily allow preemption */
-			kernel_fpu_end();
+		if (!static_branch_likely(&chacha_use_simd) ||
+		    !crypto_simd_usable()) {
+			chacha_crypt_generic(state, walk.dst.virt.addr,
+					     walk.src.virt.addr, nbytes,
+					     ctx->nrounds);
+		} else {
 			kernel_fpu_begin();
-			next_yield = 4096;
+			chacha_dosimd(state, walk.dst.virt.addr,
+				      walk.src.virt.addr, nbytes,
+				      ctx->nrounds);
+			kernel_fpu_end();
 		}
-
-		err = skcipher_walk_done(walk, walk->nbytes - nbytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
@@ -163,55 +202,34 @@ static int chacha_simd(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	int err;
-
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_chacha_crypt(req);
 
-	err = skcipher_walk_virt(&walk, req, true);
-	if (err)
-		return err;
-
-	kernel_fpu_begin();
-	err = chacha_simd_stream_xor(&walk, ctx, req->iv);
-	kernel_fpu_end();
-	return err;
+	return chacha_simd_stream_xor(req, ctx, req->iv);
 }
 
 static int xchacha_simd(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	struct chacha_ctx subctx;
 	u32 *state, state_buf[16 + 2] __aligned(8);
+	struct chacha_ctx subctx;
 	u8 real_iv[16];
-	int err;
-
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_xchacha_crypt(req);
-
-	err = skcipher_walk_virt(&walk, req, true);
-	if (err)
-		return err;
 
 	BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
 	state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
-	crypto_chacha_init(state, ctx, req->iv);
-
-	kernel_fpu_begin();
-
-	hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+	chacha_init_generic(state, ctx->key, req->iv);
+
+	if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
+		kernel_fpu_begin();
+		hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+		kernel_fpu_end();
+	} else {
+		hchacha_block_generic(state, subctx.key, ctx->nrounds);
+	}
 	subctx.nrounds = ctx->nrounds;
 
 	memcpy(&real_iv[0], req->iv + 24, 8);
 	memcpy(&real_iv[8], req->iv + 16, 8);
-	err = chacha_simd_stream_xor(&walk, &subctx, real_iv);
-
-	kernel_fpu_end();
-
-	return err;
+	return chacha_simd_stream_xor(req, &subctx, real_iv);
 }
 
 static struct skcipher_alg algs[] = {
@@ -227,7 +245,7 @@ static struct skcipher_alg algs[] = {
 		.max_keysize		= CHACHA_KEY_SIZE,
 		.ivsize			= CHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
+		.setkey			= chacha20_setkey,
 		.encrypt		= chacha_simd,
 		.decrypt		= chacha_simd,
 	}, {
@@ -242,7 +260,7 @@ static struct skcipher_alg algs[] = {
 		.max_keysize		= CHACHA_KEY_SIZE,
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
+		.setkey			= chacha20_setkey,
 		.encrypt		= xchacha_simd,
 		.decrypt		= xchacha_simd,
 	}, {
@@ -257,7 +275,7 @@ static struct skcipher_alg algs[] = {
 		.max_keysize		= CHACHA_KEY_SIZE,
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha12_setkey,
+		.setkey			= chacha12_setkey,
 		.encrypt		= xchacha_simd,
 		.decrypt		= xchacha_simd,
 	},
@@ -266,24 +284,29 @@ static struct skcipher_alg algs[] = {
 static int __init chacha_simd_mod_init(void)
 {
 	if (!boot_cpu_has(X86_FEATURE_SSSE3))
-		return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
-	chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
-			  boot_cpu_has(X86_FEATURE_AVX2) &&
-			  cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-#ifdef CONFIG_AS_AVX512
-	chacha_use_avx512vl = chacha_use_avx2 &&
-			      boot_cpu_has(X86_FEATURE_AVX512VL) &&
-			      boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
-#endif
-#endif
-	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+		return 0;
+
+	static_branch_enable(&chacha_use_simd);
+
+	if (IS_ENABLED(CONFIG_AS_AVX2) &&
+	    boot_cpu_has(X86_FEATURE_AVX) &&
+	    boot_cpu_has(X86_FEATURE_AVX2) &&
+	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
+		static_branch_enable(&chacha_use_avx2);
+
+		if (IS_ENABLED(CONFIG_AS_AVX512) &&
+		    boot_cpu_has(X86_FEATURE_AVX512VL) &&
+		    boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
+			static_branch_enable(&chacha_use_avx512vl);
+	}
+	return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
+		crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
 }
 
 static void __exit chacha_simd_mod_fini(void)
 {
-	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && boot_cpu_has(X86_FEATURE_SSSE3))
+		crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
 }
 
 module_init(chacha_simd_mod_init);
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
index 1c099dc08cc3..9fd28ff65bc2 100644
--- a/arch/x86/crypto/crc32-pclmul_asm.S
+++ b/arch/x86/crypto/crc32-pclmul_asm.S
@@ -103,7 +103,7 @@
  *	                     size_t len, uint crc32)
  */
 
-ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
+SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
 	movdqa  (BUF), %xmm1
 	movdqa  0x10(BUF), %xmm2
 	movdqa  0x20(BUF), %xmm3
@@ -238,4 +238,4 @@ fold_64:
 	PEXTRD  0x01, %xmm1, %eax
 
 	ret
-ENDPROC(crc32_pclmul_le_16)
+SYM_FUNC_END(crc32_pclmul_le_16)
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index cb4ab6645106..418bd88acac8 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -94,10 +94,8 @@ static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
 {
 	u32 *mctx = crypto_shash_ctx(hash);
 
-	if (keylen != sizeof(u32)) {
-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != sizeof(u32))
 		return -EINVAL;
-	}
 	*mctx = le32_to_cpup((__le32 *)key);
 	return 0;
 }
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index eefa0862f309..c20d1b8a82c3 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -91,10 +91,8 @@ static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
 {
 	u32 *mctx = crypto_shash_ctx(hash);
 
-	if (keylen != sizeof(u32)) {
-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != sizeof(u32))
 		return -EINVAL;
-	}
 	*mctx = le32_to_cpup((__le32 *)key);
 	return 0;
 }
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index d9b734d0c8cc..0e6690e3618c 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -74,7 +74,7 @@
 # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
 
 .text
-ENTRY(crc_pcl)
+SYM_FUNC_START(crc_pcl)
 #define    bufp		%rdi
 #define    bufp_dw	%edi
 #define    bufp_w	%di
@@ -311,7 +311,7 @@ do_return:
 	popq    %rdi
 	popq    %rbx
         ret
-ENDPROC(crc_pcl)
+SYM_FUNC_END(crc_pcl)
 
 .section	.rodata, "a", @progbits
         ################################################################
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
index 3d873e67749d..b2533d63030e 100644
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
@@ -95,7 +95,7 @@
 # Assumes len >= 16.
 #
 .align 16
-ENTRY(crc_t10dif_pcl)
+SYM_FUNC_START(crc_t10dif_pcl)
 
 	movdqa	.Lbswap_mask(%rip), BSWAP_MASK
 
@@ -280,7 +280,7 @@ ENTRY(crc_t10dif_pcl)
 	jge	.Lfold_16_bytes_loop		# 32 <= len <= 255
 	add	$16, len
 	jmp	.Lhandle_partial_segment	# 17 <= len <= 31
-ENDPROC(crc_t10dif_pcl)
+SYM_FUNC_END(crc_t10dif_pcl)
 
 .section	.rodata, "a", @progbits
 .align 16
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
new file mode 100644
index 000000000000..eec7d2d24239
--- /dev/null
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -0,0 +1,2476 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved.
+ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ */
+
+#include <crypto/curve25519.h>
+#include <crypto/internal/kpp.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx);
+
+enum { NUM_WORDS_ELTFP25519 = 4 };
+typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519];
+typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519];
+
+#define mul_eltfp25519_1w_adx(c, a, b) do { \
+	mul_256x256_integer_adx(m.buffer, a, b); \
+	red_eltfp25519_1w_adx(c, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_1w_bmi2(c, a, b) do { \
+	mul_256x256_integer_bmi2(m.buffer, a, b); \
+	red_eltfp25519_1w_bmi2(c, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_1w_adx(a) do { \
+	sqr_256x256_integer_adx(m.buffer, a); \
+	red_eltfp25519_1w_adx(a, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_1w_bmi2(a) do { \
+	sqr_256x256_integer_bmi2(m.buffer, a); \
+	red_eltfp25519_1w_bmi2(a, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_2w_adx(c, a, b) do { \
+	mul2_256x256_integer_adx(m.buffer, a, b); \
+	red_eltfp25519_2w_adx(c, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_2w_bmi2(c, a, b) do { \
+	mul2_256x256_integer_bmi2(m.buffer, a, b); \
+	red_eltfp25519_2w_bmi2(c, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_2w_adx(a) do { \
+	sqr2_256x256_integer_adx(m.buffer, a); \
+	red_eltfp25519_2w_adx(a, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_2w_bmi2(a) do { \
+	sqr2_256x256_integer_bmi2(m.buffer, a); \
+	red_eltfp25519_2w_bmi2(a, m.buffer); \
+} while (0)
+
+#define sqrn_eltfp25519_1w_adx(a, times) do { \
+	int ____counter = (times); \
+	while (____counter-- > 0) \
+		sqr_eltfp25519_1w_adx(a); \
+} while (0)
+
+#define sqrn_eltfp25519_1w_bmi2(a, times) do { \
+	int ____counter = (times); \
+	while (____counter-- > 0) \
+		sqr_eltfp25519_1w_bmi2(a); \
+} while (0)
+
+#define copy_eltfp25519_1w(C, A) do { \
+	(C)[0] = (A)[0]; \
+	(C)[1] = (A)[1]; \
+	(C)[2] = (A)[2]; \
+	(C)[3] = (A)[3]; \
+} while (0)
+
+#define setzero_eltfp25519_1w(C) do { \
+	(C)[0] = 0; \
+	(C)[1] = 0; \
+	(C)[2] = 0; \
+	(C)[3] = 0; \
+} while (0)
+
+__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = {
+	/*   1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL,
+		  0xffffffffffffffffUL, 0x5fffffffffffffffUL,
+	/*   2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL,
+		  0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL,
+	/*   3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL,
+		  0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL,
+	/*   4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL,
+		  0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL,
+	/*   5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL,
+		  0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL,
+	/*   6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL,
+		  0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL,
+	/*   7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL,
+		  0xc1c20d06231f7614UL, 0x2938218da274f972UL,
+	/*   8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL,
+		  0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL,
+	/*   9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL,
+		  0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL,
+	/*  10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL,
+		  0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL,
+	/*  11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL,
+		  0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL,
+	/*  12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL,
+		  0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL,
+	/*  13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL,
+		  0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL,
+	/*  14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL,
+		  0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL,
+	/*  15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL,
+		  0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL,
+	/*  16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL,
+		  0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL,
+	/*  17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL,
+		  0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL,
+	/*  18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL,
+		  0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL,
+	/*  19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL,
+		  0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL,
+	/*  20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL,
+		  0x9d4935467caaf22eUL, 0x5166408eee85ff49UL,
+	/*  21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL,
+		  0x5259729241159b1cUL, 0x6a621892d5b0ab33UL,
+	/*  22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL,
+		  0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL,
+	/*  23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL,
+		  0x23758739f630a257UL, 0x295a407a01a78580UL,
+	/*  24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL,
+		  0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL,
+	/*  25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL,
+		  0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL,
+	/*  26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL,
+		  0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL,
+	/*  27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL,
+		  0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL,
+	/*  28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL,
+		  0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL,
+	/*  29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL,
+		  0x74b4c4ceab102f64UL, 0x183abadd10139845UL,
+	/*  30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL,
+		  0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL,
+	/*  31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL,
+		  0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL,
+	/*  32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL,
+		  0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL,
+	/*  33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL,
+		  0xd88768e4904032d8UL, 0x131384427b3aaeecUL,
+	/*  34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL,
+		  0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL,
+	/*  35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL,
+		  0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL,
+	/*  36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL,
+		  0xa401760b882c797aUL, 0x1fc223e28dc88730UL,
+	/*  37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL,
+		  0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL,
+	/*  38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL,
+		  0xfdbf177988bbc586UL, 0x2959894fcad81df5UL,
+	/*  39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL,
+		  0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL,
+	/*  40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL,
+		  0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL,
+	/*  41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL,
+		  0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL,
+	/*  42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL,
+		  0x5c217736fa279374UL, 0x7dde05734afeb1faUL,
+	/*  43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL,
+		  0xe6053bf89595bf7aUL, 0x394faf38da245530UL,
+	/*  44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL,
+		  0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL,
+	/*  45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL,
+		  0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL,
+	/*  46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL,
+		  0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL,
+	/*  47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL,
+		  0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL,
+	/*  48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL,
+		  0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL,
+	/*  49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL,
+		  0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL,
+	/*  50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL,
+		  0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL,
+	/*  51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL,
+		  0xc189218075e91436UL, 0x6d9284169b3b8484UL,
+	/*  52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL,
+		  0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL,
+	/*  53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL,
+		  0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL,
+	/*  54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL,
+		  0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL,
+	/*  55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL,
+		  0x19346a65d3224a08UL, 0x0f5034e49b9af466UL,
+	/*  56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL,
+		  0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL,
+	/*  57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL,
+		  0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL,
+	/*  58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL,
+		  0xf826842130f5ad28UL, 0x3ea988f75301a441UL,
+	/*  59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL,
+		  0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL,
+	/*  60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL,
+		  0xd01469df811d644bUL, 0x77fea47d81a5d71fUL,
+	/*  61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL,
+		  0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL,
+	/*  62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL,
+		  0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL,
+	/*  63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL,
+		  0xbea450e1dbd885d5UL, 0x61b68649320f712cUL,
+	/*  64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL,
+		  0x25232973322dbef4UL, 0x445dc4758c17f770UL,
+	/*  65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL,
+		  0x1efebefdc053db34UL, 0x4adbe867c65daf99UL,
+	/*  66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL,
+		  0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL,
+	/*  67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL,
+		  0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL,
+	/*  68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL,
+		  0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL,
+	/*  69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL,
+		  0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL,
+	/*  70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL,
+		  0xbdaacb805831ca6fUL, 0x445b652dc916694fUL,
+	/*  71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL,
+		  0xa1823aafe04c314aUL, 0x790a2d94437cf586UL,
+	/*  72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL,
+		  0xbf70903b204f5169UL, 0x2f7a89891ba319feUL,
+	/*  73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL,
+		  0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL,
+	/*  74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL,
+		  0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL,
+	/*  75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL,
+		  0x674f1288f8e11217UL, 0x5682250f329f93d0UL,
+	/*  76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL,
+		  0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL,
+	/*  77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL,
+		  0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL,
+	/*  78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL,
+		  0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL,
+	/*  79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL,
+		  0x5deadacec9f04973UL, 0x29275b5d41d29b27UL,
+	/*  80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL,
+		  0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL,
+	/*  81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL,
+		  0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL,
+	/*  82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL,
+		  0x894d1d855ae52359UL, 0x68e122157b743d69UL,
+	/*  83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL,
+		  0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL,
+	/*  84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL,
+		  0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL,
+	/*  85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL,
+		  0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL,
+	/*  86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL,
+		  0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL,
+	/*  87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL,
+		  0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL,
+	/*  88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL,
+		  0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL,
+	/*  89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL,
+		  0x45adb16e76cefcf2UL, 0x01f768aead232999UL,
+	/*  90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL,
+		  0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL,
+	/*  91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL,
+		  0x5eefa966de2a701dUL, 0x23b20565de55e3efUL,
+	/*  92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL,
+		  0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL,
+	/*  93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL,
+		  0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL,
+	/*  94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL,
+		  0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL,
+	/*  95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL,
+		  0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL,
+	/*  96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL,
+		  0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL,
+	/*  97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL,
+		  0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL,
+	/*  98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL,
+		  0xb9886314844006b1UL, 0x40d2a72ab454cc60UL,
+	/*  99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL,
+		  0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL,
+	/* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL,
+		  0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL,
+	/* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL,
+		  0x40727064c416d74fUL, 0x6e15c6114b502ef0UL,
+	/* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL,
+		  0x4a497962066e6043UL, 0x705b3aab41355b44UL,
+	/* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL,
+		  0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL,
+	/* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL,
+		  0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL,
+	/* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL,
+		  0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL,
+	/* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL,
+		  0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL,
+	/* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL,
+		  0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL,
+	/* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL,
+		  0x2088ce1570033c68UL, 0x7fba1f495c837987UL,
+	/* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL,
+		  0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL,
+	/* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL,
+		  0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL,
+	/* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL,
+		  0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL,
+	/* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL,
+		  0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL,
+	/* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL,
+		  0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL,
+	/* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL,
+		  0x00f52e3f67280294UL, 0x566d4fc14730c509UL,
+	/* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL,
+		  0x216730fba68d6095UL, 0x22e8c3843f69cea7UL,
+	/* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL,
+		  0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL,
+	/* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL,
+		  0x508e862f121692fcUL, 0x3a81907fa093c291UL,
+	/* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL,
+		  0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL,
+	/* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL,
+		  0xbee595ce8a9df2e5UL, 0x25e496c722422236UL,
+	/* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL,
+		  0xe488de11d761e352UL, 0x0e878a01a085545cUL,
+	/* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL,
+		  0x9ea37a487ae80d67UL, 0x67a9958011e41794UL,
+	/* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL,
+		  0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL,
+	/* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL,
+		  0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL,
+	/* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL,
+		  0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL,
+	/* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL,
+		  0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL,
+	/* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL,
+		  0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL,
+	/* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL,
+		  0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL,
+	/* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL,
+		  0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL,
+	/* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL,
+		  0x97134556a9832d06UL, 0x269bb0360a84f8a0UL,
+	/* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL,
+		  0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL,
+	/* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL,
+		  0x904659bb686e3772UL, 0x7215c371746ba8c8UL,
+	/* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL,
+		  0x266fd5809208f294UL, 0x5c847085619a26b9UL,
+	/* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL,
+		  0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL,
+	/* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL,
+		  0x2b1641917b307614UL, 0x117c554fc4f45b7cUL,
+	/* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL,
+		  0xd7e803f4171b2827UL, 0x1015e87487d225eaUL,
+	/* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL,
+		  0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL,
+	/* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL,
+		  0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL,
+	/* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL,
+		  0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL,
+	/* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL,
+		  0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL,
+	/* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL,
+		  0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL,
+	/* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL,
+		  0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL,
+	/* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL,
+		  0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL,
+	/* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL,
+		  0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL,
+	/* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL,
+		  0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL,
+	/* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL,
+		  0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL,
+	/* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL,
+		  0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL,
+	/* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL,
+		  0xec219c48fbd21604UL, 0x1aaf1af517c36731UL,
+	/* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL,
+		  0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL,
+	/* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL,
+		  0x4dbbc207f531561aUL, 0x0253b7f082128a27UL,
+	/* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL,
+		  0x52d17436309d4253UL, 0x356f97e13efae576UL,
+	/* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL,
+		  0x0c776128bed92c98UL, 0x1d34ae93032885b8UL,
+	/* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL,
+		  0x66124c6f97bda770UL, 0x0f81a0290654124aUL,
+	/* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL,
+		  0xff08d03f93d8c20aUL, 0x52a148199faef26bUL,
+	/* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL,
+		  0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL,
+	/* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL,
+		  0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL,
+	/* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL,
+		  0x5da643cb4bf30035UL, 0x77db28d63940f721UL,
+	/* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL,
+		  0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL,
+	/* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL,
+		  0x140a69245ca575edUL, 0x0cf1c37134273a4cUL,
+	/* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL,
+		  0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL,
+	/* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL,
+		  0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL,
+	/* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL,
+		  0x497d723f802e88e1UL, 0x30684dea602f408dUL,
+	/* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL,
+		  0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL,
+	/* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL,
+		  0x7468c7423a543258UL, 0x4a7f11464eb5642fUL,
+	/* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL,
+		  0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL,
+	/* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL,
+		  0x026df551dbb85c20UL, 0x74fcd91047e21901UL,
+	/* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL,
+		  0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL,
+	/* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL,
+		  0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL,
+	/* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL,
+		  0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL,
+	/* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL,
+		  0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL,
+	/* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL,
+		  0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL,
+	/* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL,
+		  0x13033ac001f66697UL, 0x273b24fe3b367d75UL,
+	/* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL,
+		  0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL,
+	/* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL,
+		  0xacc63ca34b8ec145UL, 0x74621888fee66574UL,
+	/* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL,
+		  0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL,
+	/* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL,
+		  0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL,
+	/* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL,
+		  0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL,
+	/* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL,
+		  0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL,
+	/* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL,
+		  0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL,
+	/* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL,
+		  0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL,
+	/* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL,
+		  0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL,
+	/* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL,
+		  0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL,
+	/* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL,
+		  0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL,
+	/* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL,
+		  0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL,
+	/* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL,
+		  0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL,
+	/* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL,
+		  0x81004b71e33cc191UL, 0x44e6be345122803cUL,
+	/* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL,
+		  0x49c8c4281af60c29UL, 0x21edb518de701aeeUL,
+	/* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL,
+		  0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL,
+	/* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL,
+		  0x12bc8d6915783712UL, 0x498194c0fc620abbUL,
+	/* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL,
+		  0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL,
+	/* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL,
+		  0x1e60c24598c71fffUL, 0x59f2f014979983efUL,
+	/* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL,
+		  0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL,
+	/* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL,
+		  0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL,
+	/* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL,
+		  0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL,
+	/* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL,
+		  0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL,
+	/* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL,
+		  0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL,
+	/* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL,
+		  0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL,
+	/* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL,
+		  0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL,
+	/* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL,
+		  0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL,
+	/* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL,
+		  0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL,
+	/* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL,
+		  0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL,
+	/* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL,
+		  0x883ada83a6a1652cUL, 0x585f1974034d6c17UL,
+	/* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL,
+		  0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL,
+	/* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL,
+		  0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL,
+	/* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL,
+		  0x33979624f0e917beUL, 0x2c018dc527356b30UL,
+	/* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL,
+		  0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL,
+	/* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL,
+		  0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL,
+	/* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL,
+		  0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL,
+	/* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL,
+		  0x345ead5e972d091eUL, 0x18c8df11a83103baUL,
+	/* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL,
+		  0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL,
+	/* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL,
+		  0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL,
+	/* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL,
+		  0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL,
+	/* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL,
+		  0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL,
+	/* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL,
+		  0x79952a008221e738UL, 0x4322e1a7535cd2bbUL,
+	/* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL,
+		  0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL,
+	/* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL,
+		  0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL,
+	/* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL,
+		  0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL,
+	/* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL,
+		  0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL,
+	/* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL,
+		  0x1df4c0af01314a60UL, 0x09a62dab89289527UL,
+	/* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL,
+		  0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL,
+	/* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL,
+		  0x49b96853d7a7084aUL, 0x4980a319601420a8UL,
+	/* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL,
+		  0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL,
+	/* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL,
+		  0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL,
+	/* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL,
+		  0xddeb34a061615d99UL, 0x5129cecceb64b773UL,
+	/* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL,
+		  0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL,
+	/* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL,
+		  0x680bd77c73edad2eUL, 0x487c02354edd9041UL,
+	/* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL,
+		  0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL,
+	/* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL,
+		  0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL,
+	/* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL,
+		  0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL,
+	/* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL,
+		  0xe9834262d13921edUL, 0x27fedafaa54bb592UL,
+	/* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL,
+		  0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL,
+	/* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL,
+		  0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL,
+	/* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL,
+		  0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL,
+	/* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL,
+		  0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL,
+	/* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL,
+		  0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL,
+	/* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL,
+		  0x645b426f3d1d58acUL, 0x4804a82227a557bcUL,
+	/* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL,
+		  0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL,
+	/* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL,
+		  0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL,
+	/* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL,
+		  0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL,
+	/* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL,
+		  0xc26ccff352b37ec7UL, 0x056f68341d797b21UL,
+	/* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL,
+		  0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL,
+	/* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL,
+		  0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL,
+	/* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL,
+		  0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL,
+	/* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL,
+		  0x2c43ecea0107c1ddUL, 0x526028809372de35UL,
+	/* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL,
+		  0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL,
+	/* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL,
+		  0x899fc38fc4b5c515UL, 0x250386b124ffc207UL,
+	/* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL,
+		  0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL,
+	/* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL,
+		  0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL,
+	/* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL,
+		  0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL,
+	/* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL,
+		  0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL,
+	/* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL,
+		  0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL,
+	/* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL,
+		  0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL,
+	/* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL,
+		  0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL
+};
+
+/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7]
+ * a is two 256-bit integers: a0[0:3] and a1[4:7]
+ * b is two 256-bit integers: b0[0:3] and b1[4:7]
+ */
+static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a,
+				     const u64 *const b)
+{
+	asm volatile(
+		"xorl %%r14d, %%r14d ;"
+		"movq   (%1), %%rdx; "	/* A[0] */
+		"mulx   (%2),  %%r8, %%r15; " /* A[0]*B[0] */
+		"xorl %%r10d, %%r10d ;"
+		"movq %%r8, (%0) ;"
+		"mulx  8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+		"adox %%r10, %%r15 ;"
+		"mulx 16(%2),  %%r8, %%rbx; " /* A[0]*B[2] */
+		"adox  %%r8, %%rax ;"
+		"mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+		"adox %%r10, %%rbx ;"
+		/******************************************/
+		"adox %%r14, %%rcx ;"
+
+		"movq  8(%1), %%rdx; "	/* A[1] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */
+		"adox %%r15,  %%r8 ;"
+		"movq  %%r8, 8(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rax ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[1]*B[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%rbx ;"
+		"mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%rcx ;"
+		/******************************************/
+		"adox %%r14, %%r15 ;"
+		"adcx %%r14, %%r15 ;"
+
+		"movq 16(%1), %%rdx; " /* A[2] */
+		"xorl %%r10d, %%r10d ;"
+		"mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */
+		"adox %%rax,  %%r8 ;"
+		"movq %%r8, 16(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rbx ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[2]*B[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%rcx ;"
+		"mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%r15 ;"
+		/******************************************/
+		"adox %%r14, %%rax ;"
+		"adcx %%r14, %%rax ;"
+
+		"movq 24(%1), %%rdx; " /* A[3] */
+		"xorl %%r10d, %%r10d ;"
+		"mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */
+		"adox %%rbx,  %%r8 ;"
+		"movq %%r8, 24(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rcx ;"
+		"movq %%rcx, 32(%0) ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[3]*B[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%rax ;"
+		"movq %%rax, 48(%0) ;"
+		/******************************************/
+		"adox %%r14, %%rbx ;"
+		"adcx %%r14, %%rbx ;"
+		"movq %%rbx, 56(%0) ;"
+
+		"movq 32(%1), %%rdx; "	/* C[0] */
+		"mulx 32(%2),  %%r8, %%r15; " /* C[0]*D[0] */
+		"xorl %%r10d, %%r10d ;"
+		"movq %%r8, 64(%0);"
+		"mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
+		"adox %%r10, %%r15 ;"
+		"mulx 48(%2),  %%r8, %%rbx; " /* C[0]*D[2] */
+		"adox  %%r8, %%rax ;"
+		"mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
+		"adox %%r10, %%rbx ;"
+		/******************************************/
+		"adox %%r14, %%rcx ;"
+
+		"movq 40(%1), %%rdx; " /* C[1] */
+		"xorl %%r10d, %%r10d ;"
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[1]*D[0] */
+		"adox %%r15,  %%r8 ;"
+		"movq  %%r8, 72(%0);"
+		"mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rax ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[1]*D[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%rbx ;"
+		"mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%rcx ;"
+		/******************************************/
+		"adox %%r14, %%r15 ;"
+		"adcx %%r14, %%r15 ;"
+
+		"movq 48(%1), %%rdx; " /* C[2] */
+		"xorl %%r10d, %%r10d ;"
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[2]*D[0] */
+		"adox %%rax,  %%r8 ;"
+		"movq  %%r8, 80(%0);"
+		"mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rbx ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[2]*D[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%rcx ;"
+		"mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%r15 ;"
+		/******************************************/
+		"adox %%r14, %%rax ;"
+		"adcx %%r14, %%rax ;"
+
+		"movq 56(%1), %%rdx; " /* C[3] */
+		"xorl %%r10d, %%r10d ;"
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[3]*D[0] */
+		"adox %%rbx,  %%r8 ;"
+		"movq  %%r8, 88(%0);"
+		"mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rcx ;"
+		"movq %%rcx,  96(%0) ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[3]*D[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%r15 ;"
+		"movq %%r15, 104(%0) ;"
+		"mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%rax ;"
+		"movq %%rax, 112(%0) ;"
+		/******************************************/
+		"adox %%r14, %%rbx ;"
+		"adcx %%r14, %%rbx ;"
+		"movq %%rbx, 120(%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a,
+				      const u64 *const b)
+{
+	asm volatile(
+		"movq   (%1), %%rdx; "	/* A[0] */
+		"mulx   (%2),  %%r8, %%r15; " /* A[0]*B[0] */
+		"movq %%r8,  (%0) ;"
+		"mulx  8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+		"addq %%r10, %%r15 ;"
+		"mulx 16(%2),  %%r8, %%rbx; " /* A[0]*B[2] */
+		"adcq  %%r8, %%rax ;"
+		"mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+		"adcq %%r10, %%rbx ;"
+		/******************************************/
+		"adcq    $0, %%rcx ;"
+
+		"movq  8(%1), %%rdx; "	/* A[1] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */
+		"addq %%r15,  %%r8 ;"
+		"movq %%r8, 8(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[1]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%r15 ;"
+
+		"addq  %%r9, %%rax ;"
+		"adcq %%r11, %%rbx ;"
+		"adcq %%r13, %%rcx ;"
+		"adcq    $0, %%r15 ;"
+
+		"movq 16(%1), %%rdx; "	/* A[2] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */
+		"addq %%rax,  %%r8 ;"
+		"movq %%r8, 16(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[2]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rax ;"
+
+		"addq  %%r9, %%rbx ;"
+		"adcq %%r11, %%rcx ;"
+		"adcq %%r13, %%r15 ;"
+		"adcq    $0, %%rax ;"
+
+		"movq 24(%1), %%rdx; "	/* A[3] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */
+		"addq %%rbx,  %%r8 ;"
+		"movq %%r8, 24(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[3]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rbx ;"
+
+		"addq  %%r9, %%rcx ;"
+		"movq %%rcx, 32(%0) ;"
+		"adcq %%r11, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"adcq %%r13, %%rax ;"
+		"movq %%rax, 48(%0) ;"
+		"adcq    $0, %%rbx ;"
+		"movq %%rbx, 56(%0) ;"
+
+		"movq 32(%1), %%rdx; "	/* C[0] */
+		"mulx 32(%2),  %%r8, %%r15; " /* C[0]*D[0] */
+		"movq %%r8, 64(%0) ;"
+		"mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
+		"addq %%r10, %%r15 ;"
+		"mulx 48(%2),  %%r8, %%rbx; " /* C[0]*D[2] */
+		"adcq  %%r8, %%rax ;"
+		"mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
+		"adcq %%r10, %%rbx ;"
+		/******************************************/
+		"adcq    $0, %%rcx ;"
+
+		"movq 40(%1), %%rdx; "	/* C[1] */
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[1]*D[0] */
+		"addq %%r15,  %%r8 ;"
+		"movq %%r8, 72(%0) ;"
+		"mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[1]*D[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%r15 ;"
+
+		"addq  %%r9, %%rax ;"
+		"adcq %%r11, %%rbx ;"
+		"adcq %%r13, %%rcx ;"
+		"adcq    $0, %%r15 ;"
+
+		"movq 48(%1), %%rdx; "	/* C[2] */
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[2]*D[0] */
+		"addq %%rax,  %%r8 ;"
+		"movq %%r8, 80(%0) ;"
+		"mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[2]*D[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rax ;"
+
+		"addq  %%r9, %%rbx ;"
+		"adcq %%r11, %%rcx ;"
+		"adcq %%r13, %%r15 ;"
+		"adcq    $0, %%rax ;"
+
+		"movq 56(%1), %%rdx; "	/* C[3] */
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[3]*D[0] */
+		"addq %%rbx,  %%r8 ;"
+		"movq %%r8, 88(%0) ;"
+		"mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[3]*D[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rbx ;"
+
+		"addq  %%r9, %%rcx ;"
+		"movq %%rcx,  96(%0) ;"
+		"adcq %%r11, %%r15 ;"
+		"movq %%r15, 104(%0) ;"
+		"adcq %%r13, %%rax ;"
+		"movq %%rax, 112(%0) ;"
+		"adcq    $0, %%rbx ;"
+		"movq %%rbx, 120(%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11", "%r13", "%r15");
+}
+
+static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movq   (%1), %%rdx        ;" /* A[0]      */
+		"mulx  8(%1),  %%r8, %%r14 ;" /* A[1]*A[0] */
+		"xorl %%r15d, %%r15d;"
+		"mulx 16(%1),  %%r9, %%r10 ;" /* A[2]*A[0] */
+		"adcx %%r14,  %%r9 ;"
+		"mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
+		"adcx %%rax, %%r10 ;"
+		"movq 24(%1), %%rdx        ;" /* A[3]      */
+		"mulx  8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
+		"adcx %%rcx, %%r11 ;"
+		"mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
+		"adcx %%rax, %%rbx ;"
+		"movq  8(%1), %%rdx        ;" /* A[1]      */
+		"adcx %%r15, %%r13 ;"
+		"mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
+		"movq    $0, %%r14 ;"
+		/******************************************/
+		"adcx %%r15, %%r14 ;"
+
+		"xorl %%r15d, %%r15d;"
+		"adox %%rax, %%r10 ;"
+		"adcx  %%r8,  %%r8 ;"
+		"adox %%rcx, %%r11 ;"
+		"adcx  %%r9,  %%r9 ;"
+		"adox %%r15, %%rbx ;"
+		"adcx %%r10, %%r10 ;"
+		"adox %%r15, %%r13 ;"
+		"adcx %%r11, %%r11 ;"
+		"adox %%r15, %%r14 ;"
+		"adcx %%rbx, %%rbx ;"
+		"adcx %%r13, %%r13 ;"
+		"adcx %%r14, %%r14 ;"
+
+		"movq   (%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+		/*******************/
+		"movq %%rax,  0(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  8(%0) ;"
+		"movq  8(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9, 16(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10, 24(%0) ;"
+		"movq 16(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11, 32(%0) ;"
+		"adcq %%rcx, %%rbx ;"
+		"movq %%rbx, 40(%0) ;"
+		"movq 24(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 48(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 56(%0) ;"
+
+
+		"movq 32(%1), %%rdx        ;" /* B[0]      */
+		"mulx 40(%1),  %%r8, %%r14 ;" /* B[1]*B[0] */
+		"xorl %%r15d, %%r15d;"
+		"mulx 48(%1),  %%r9, %%r10 ;" /* B[2]*B[0] */
+		"adcx %%r14,  %%r9 ;"
+		"mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */
+		"adcx %%rax, %%r10 ;"
+		"movq 56(%1), %%rdx        ;" /* B[3]      */
+		"mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */
+		"adcx %%rcx, %%r11 ;"
+		"mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */
+		"adcx %%rax, %%rbx ;"
+		"movq 40(%1), %%rdx        ;" /* B[1]      */
+		"adcx %%r15, %%r13 ;"
+		"mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */
+		"movq    $0, %%r14 ;"
+		/******************************************/
+		"adcx %%r15, %%r14 ;"
+
+		"xorl %%r15d, %%r15d;"
+		"adox %%rax, %%r10 ;"
+		"adcx  %%r8,  %%r8 ;"
+		"adox %%rcx, %%r11 ;"
+		"adcx  %%r9,  %%r9 ;"
+		"adox %%r15, %%rbx ;"
+		"adcx %%r10, %%r10 ;"
+		"adox %%r15, %%r13 ;"
+		"adcx %%r11, %%r11 ;"
+		"adox %%r15, %%r14 ;"
+		"adcx %%rbx, %%rbx ;"
+		"adcx %%r13, %%r13 ;"
+		"adcx %%r14, %%r14 ;"
+
+		"movq 32(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */
+		/*******************/
+		"movq %%rax,  64(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  72(%0) ;"
+		"movq 40(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9,  80(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10,  88(%0) ;"
+		"movq 48(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11,  96(%0) ;"
+		"adcq %%rcx, %%rbx ;"
+		"movq %%rbx, 104(%0) ;"
+		"movq 56(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 112(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 120(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movq  8(%1), %%rdx        ;" /* A[1]      */
+		"mulx   (%1),  %%r8,  %%r9 ;" /* A[0]*A[1] */
+		"mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
+		"mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
+
+		"movq 16(%1), %%rdx        ;" /* A[2]      */
+		"mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
+		"mulx   (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
+
+		"addq %%rax,  %%r9 ;"
+		"adcq %%rdx, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq %%r14, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"movq    $0, %%r14 ;"
+		"adcq    $0, %%r14 ;"
+
+		"movq   (%1), %%rdx        ;" /* A[0]      */
+		"mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
+
+		"addq %%rax, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq    $0, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"adcq    $0, %%r14 ;"
+
+		"shldq $1, %%r13, %%r14 ;"
+		"shldq $1, %%r15, %%r13 ;"
+		"shldq $1, %%r11, %%r15 ;"
+		"shldq $1, %%r10, %%r11 ;"
+		"shldq $1,  %%r9, %%r10 ;"
+		"shldq $1,  %%r8,  %%r9 ;"
+		"shlq  $1,  %%r8        ;"
+
+		/*******************/
+		"mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */
+		/*******************/
+		"movq %%rax,  0(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  8(%0) ;"
+		"movq  8(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9, 16(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10, 24(%0) ;"
+		"movq 16(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11, 32(%0) ;"
+		"adcq %%rcx, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"movq 24(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 48(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 56(%0) ;"
+
+		"movq 40(%1), %%rdx        ;" /* B[1]      */
+		"mulx 32(%1),  %%r8,  %%r9 ;" /* B[0]*B[1] */
+		"mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */
+		"mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */
+
+		"movq 48(%1), %%rdx        ;" /* B[2]      */
+		"mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */
+		"mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */
+
+		"addq %%rax,  %%r9 ;"
+		"adcq %%rdx, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq %%r14, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"movq    $0, %%r14 ;"
+		"adcq    $0, %%r14 ;"
+
+		"movq 32(%1), %%rdx        ;" /* B[0]      */
+		"mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */
+
+		"addq %%rax, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq    $0, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"adcq    $0, %%r14 ;"
+
+		"shldq $1, %%r13, %%r14 ;"
+		"shldq $1, %%r15, %%r13 ;"
+		"shldq $1, %%r11, %%r15 ;"
+		"shldq $1, %%r10, %%r11 ;"
+		"shldq $1,  %%r9, %%r10 ;"
+		"shldq $1,  %%r8,  %%r9 ;"
+		"shlq  $1,  %%r8        ;"
+
+		/*******************/
+		"mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */
+		/*******************/
+		"movq %%rax,  64(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  72(%0) ;"
+		"movq 40(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9,  80(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10,  88(%0) ;"
+		"movq 48(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11,  96(%0) ;"
+		"adcq %%rcx, %%r15 ;"
+		"movq %%r15, 104(%0) ;"
+		"movq 56(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 112(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 120(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+		  "%r11", "%r13", "%r14", "%r15");
+}
+
+static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movl    $38, %%edx; "	/* 2*c = 38 = 2^256 */
+		"mulx 32(%1),  %%r8, %%r10; " /* c*C[4] */
+		"xorl %%ebx, %%ebx ;"
+		"adox   (%1),  %%r8 ;"
+		"mulx 40(%1),  %%r9, %%r11; " /* c*C[5] */
+		"adcx %%r10,  %%r9 ;"
+		"adox  8(%1),  %%r9 ;"
+		"mulx 48(%1), %%r10, %%rax; " /* c*C[6] */
+		"adcx %%r11, %%r10 ;"
+		"adox 16(%1), %%r10 ;"
+		"mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */
+		"adcx %%rax, %%r11 ;"
+		"adox 24(%1), %%r11 ;"
+		/***************************************/
+		"adcx %%rbx, %%rcx ;"
+		"adox  %%rbx, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+		"adcx %%rcx,  %%r8 ;"
+		"adcx %%rbx,  %%r9 ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcx %%rbx, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"adcx %%rbx, %%r11 ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,   (%0) ;"
+
+		"mulx  96(%1),  %%r8, %%r10; " /* c*C[4] */
+		"xorl %%ebx, %%ebx ;"
+		"adox 64(%1),  %%r8 ;"
+		"mulx 104(%1),  %%r9, %%r11; " /* c*C[5] */
+		"adcx %%r10,  %%r9 ;"
+		"adox 72(%1),  %%r9 ;"
+		"mulx 112(%1), %%r10, %%rax; " /* c*C[6] */
+		"adcx %%r11, %%r10 ;"
+		"adox 80(%1), %%r10 ;"
+		"mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */
+		"adcx %%rax, %%r11 ;"
+		"adox 88(%1), %%r11 ;"
+		/****************************************/
+		"adcx %%rbx, %%rcx ;"
+		"adox  %%rbx, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+		"adcx %%rcx,  %%r8 ;"
+		"adcx %%rbx,  %%r9 ;"
+		"movq  %%r9, 40(%0) ;"
+		"adcx %%rbx, %%r10 ;"
+		"movq %%r10, 48(%0) ;"
+		"adcx %%rbx, %%r11 ;"
+		"movq %%r11, 56(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8, 32(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11");
+}
+
+static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movl    $38, %%edx ; "       /* 2*c = 38 = 2^256 */
+		"mulx 32(%1),  %%r8, %%r10 ;" /* c*C[4] */
+		"mulx 40(%1),  %%r9, %%r11 ;" /* c*C[5] */
+		"addq %%r10,  %%r9 ;"
+		"mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+		"adcq %%r11, %%r10 ;"
+		"mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+		"adcq %%rax, %%r11 ;"
+		/***************************************/
+		"adcq    $0, %%rcx ;"
+		"addq   (%1),  %%r8 ;"
+		"adcq  8(%1),  %%r9 ;"
+		"adcq 16(%1), %%r10 ;"
+		"adcq 24(%1), %%r11 ;"
+		"adcq     $0, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+		"addq %%rcx,  %%r8 ;"
+		"adcq    $0,  %%r9 ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcq    $0, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"adcq    $0, %%r11 ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,   (%0) ;"
+
+		"mulx  96(%1),  %%r8, %%r10 ;" /* c*C[4] */
+		"mulx 104(%1),  %%r9, %%r11 ;" /* c*C[5] */
+		"addq %%r10,  %%r9 ;"
+		"mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */
+		"adcq %%r11, %%r10 ;"
+		"mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */
+		"adcq %%rax, %%r11 ;"
+		/****************************************/
+		"adcq    $0, %%rcx ;"
+		"addq 64(%1),  %%r8 ;"
+		"adcq 72(%1),  %%r9 ;"
+		"adcq 80(%1), %%r10 ;"
+		"adcq 88(%1), %%r11 ;"
+		"adcq     $0, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+		"addq %%rcx,  %%r8 ;"
+		"adcq    $0,  %%r9 ;"
+		"movq  %%r9, 40(%0) ;"
+		"adcq    $0, %%r10 ;"
+		"movq %%r10, 48(%0) ;"
+		"adcq    $0, %%r11 ;"
+		"movq %%r11, 56(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8, 32(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+		  "%r11");
+}
+
+static void mul_256x256_integer_adx(u64 *const c, const u64 *const a,
+				    const u64 *const b)
+{
+	asm volatile(
+		"movq   (%1), %%rdx; "	/* A[0] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[0]*B[0] */
+		"xorl %%r10d, %%r10d ;"
+		"movq  %%r8,  (%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[0]*B[1] */
+		"adox  %%r9, %%r10 ;"
+		"movq %%r10, 8(%0) ;"
+		"mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */
+		"adox %%r11, %%r15 ;"
+		"mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */
+		"adox %%r13, %%r14 ;"
+		"movq $0, %%rax ;"
+		/******************************************/
+		"adox %%rdx, %%rax ;"
+
+		"movq  8(%1), %%rdx; "	/* A[1] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */
+		"xorl %%r10d, %%r10d ;"
+		"adcx 8(%0),  %%r8 ;"
+		"movq  %%r8,  8(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+		"adox  %%r9, %%r10 ;"
+		"adcx %%r15, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */
+		"adox %%r11, %%r15 ;"
+		"adcx %%r14, %%r15 ;"
+		"movq $0, %%r8  ;"
+		"mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */
+		"adox %%r13, %%r14 ;"
+		"adcx %%rax, %%r14 ;"
+		"movq $0, %%rax ;"
+		/******************************************/
+		"adox %%rdx, %%rax ;"
+		"adcx  %%r8, %%rax ;"
+
+		"movq 16(%1), %%rdx; "	/* A[2] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */
+		"xorl %%r10d, %%r10d ;"
+		"adcx 16(%0), %%r8 ;"
+		"movq  %%r8, 16(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+		"adox  %%r9, %%r10 ;"
+		"adcx %%r15, %%r10 ;"
+		"movq %%r10, 24(%0) ;"
+		"mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */
+		"adox %%r11, %%r15 ;"
+		"adcx %%r14, %%r15 ;"
+		"movq $0, %%r8  ;"
+		"mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */
+		"adox %%r13, %%r14 ;"
+		"adcx %%rax, %%r14 ;"
+		"movq $0, %%rax ;"
+		/******************************************/
+		"adox %%rdx, %%rax ;"
+		"adcx  %%r8, %%rax ;"
+
+		"movq 24(%1), %%rdx; "	/* A[3] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */
+		"xorl %%r10d, %%r10d ;"
+		"adcx 24(%0), %%r8 ;"
+		"movq  %%r8, 24(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+		"adox  %%r9, %%r10 ;"
+		"adcx %%r15, %%r10 ;"
+		"movq %%r10, 32(%0) ;"
+		"mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */
+		"adox %%r11, %%r15 ;"
+		"adcx %%r14, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"movq $0, %%r8  ;"
+		"mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */
+		"adox %%r13, %%r14 ;"
+		"adcx %%rax, %%r14 ;"
+		"movq %%r14, 48(%0) ;"
+		"movq $0, %%rax ;"
+		/******************************************/
+		"adox %%rdx, %%rax ;"
+		"adcx  %%r8, %%rax ;"
+		"movq %%rax, 56(%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11",
+		  "%r13", "%r14", "%r15");
+}
+
+static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a,
+				     const u64 *const b)
+{
+	asm volatile(
+		"movq   (%1), %%rdx; "	/* A[0] */
+		"mulx   (%2),  %%r8, %%r15; " /* A[0]*B[0] */
+		"movq %%r8,  (%0) ;"
+		"mulx  8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+		"addq %%r10, %%r15 ;"
+		"mulx 16(%2),  %%r8, %%rbx; " /* A[0]*B[2] */
+		"adcq  %%r8, %%rax ;"
+		"mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+		"adcq %%r10, %%rbx ;"
+		/******************************************/
+		"adcq    $0, %%rcx ;"
+
+		"movq  8(%1), %%rdx; "	/* A[1] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */
+		"addq %%r15,  %%r8 ;"
+		"movq %%r8, 8(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[1]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%r15 ;"
+
+		"addq  %%r9, %%rax ;"
+		"adcq %%r11, %%rbx ;"
+		"adcq %%r13, %%rcx ;"
+		"adcq    $0, %%r15 ;"
+
+		"movq 16(%1), %%rdx; "	/* A[2] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */
+		"addq %%rax,  %%r8 ;"
+		"movq %%r8, 16(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[2]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rax ;"
+
+		"addq  %%r9, %%rbx ;"
+		"adcq %%r11, %%rcx ;"
+		"adcq %%r13, %%r15 ;"
+		"adcq    $0, %%rax ;"
+
+		"movq 24(%1), %%rdx; "	/* A[3] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */
+		"addq %%rbx,  %%r8 ;"
+		"movq %%r8, 24(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[3]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rbx ;"
+
+		"addq  %%r9, %%rcx ;"
+		"movq %%rcx, 32(%0) ;"
+		"adcq %%r11, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"adcq %%r13, %%rax ;"
+		"movq %%rax, 48(%0) ;"
+		"adcq    $0, %%rbx ;"
+		"movq %%rbx, 56(%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11", "%r13", "%r15");
+}
+
+static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movq   (%1), %%rdx        ;" /* A[0]      */
+		"mulx  8(%1),  %%r8, %%r14 ;" /* A[1]*A[0] */
+		"xorl %%r15d, %%r15d;"
+		"mulx 16(%1),  %%r9, %%r10 ;" /* A[2]*A[0] */
+		"adcx %%r14,  %%r9 ;"
+		"mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
+		"adcx %%rax, %%r10 ;"
+		"movq 24(%1), %%rdx        ;" /* A[3]      */
+		"mulx  8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
+		"adcx %%rcx, %%r11 ;"
+		"mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
+		"adcx %%rax, %%rbx ;"
+		"movq  8(%1), %%rdx        ;" /* A[1]      */
+		"adcx %%r15, %%r13 ;"
+		"mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
+		"movq    $0, %%r14 ;"
+		/******************************************/
+		"adcx %%r15, %%r14 ;"
+
+		"xorl %%r15d, %%r15d;"
+		"adox %%rax, %%r10 ;"
+		"adcx  %%r8,  %%r8 ;"
+		"adox %%rcx, %%r11 ;"
+		"adcx  %%r9,  %%r9 ;"
+		"adox %%r15, %%rbx ;"
+		"adcx %%r10, %%r10 ;"
+		"adox %%r15, %%r13 ;"
+		"adcx %%r11, %%r11 ;"
+		"adox %%r15, %%r14 ;"
+		"adcx %%rbx, %%rbx ;"
+		"adcx %%r13, %%r13 ;"
+		"adcx %%r14, %%r14 ;"
+
+		"movq   (%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+		/*******************/
+		"movq %%rax,  0(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  8(%0) ;"
+		"movq  8(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9, 16(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10, 24(%0) ;"
+		"movq 16(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11, 32(%0) ;"
+		"adcq %%rcx, %%rbx ;"
+		"movq %%rbx, 40(%0) ;"
+		"movq 24(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 48(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 56(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movq  8(%1), %%rdx        ;" /* A[1]      */
+		"mulx   (%1),  %%r8,  %%r9 ;" /* A[0]*A[1] */
+		"mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
+		"mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
+
+		"movq 16(%1), %%rdx        ;" /* A[2]      */
+		"mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
+		"mulx   (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
+
+		"addq %%rax,  %%r9 ;"
+		"adcq %%rdx, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq %%r14, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"movq    $0, %%r14 ;"
+		"adcq    $0, %%r14 ;"
+
+		"movq   (%1), %%rdx        ;" /* A[0]      */
+		"mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
+
+		"addq %%rax, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq    $0, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"adcq    $0, %%r14 ;"
+
+		"shldq $1, %%r13, %%r14 ;"
+		"shldq $1, %%r15, %%r13 ;"
+		"shldq $1, %%r11, %%r15 ;"
+		"shldq $1, %%r10, %%r11 ;"
+		"shldq $1,  %%r9, %%r10 ;"
+		"shldq $1,  %%r8,  %%r9 ;"
+		"shlq  $1,  %%r8        ;"
+
+		/*******************/
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+		/*******************/
+		"movq %%rax,  0(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  8(%0) ;"
+		"movq  8(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9, 16(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10, 24(%0) ;"
+		"movq 16(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11, 32(%0) ;"
+		"adcq %%rcx, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"movq 24(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 48(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 56(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+		  "%r11", "%r13", "%r14", "%r15");
+}
+
+static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movl    $38, %%edx ;"	/* 2*c = 38 = 2^256 */
+		"mulx 32(%1),  %%r8, %%r10 ;" /* c*C[4] */
+		"xorl %%ebx, %%ebx ;"
+		"adox   (%1),  %%r8 ;"
+		"mulx 40(%1),  %%r9, %%r11 ;" /* c*C[5] */
+		"adcx %%r10,  %%r9 ;"
+		"adox  8(%1),  %%r9 ;"
+		"mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+		"adcx %%r11, %%r10 ;"
+		"adox 16(%1), %%r10 ;"
+		"mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+		"adcx %%rax, %%r11 ;"
+		"adox 24(%1), %%r11 ;"
+		/***************************************/
+		"adcx %%rbx, %%rcx ;"
+		"adox  %%rbx, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+		"adcx %%rcx,  %%r8 ;"
+		"adcx %%rbx,  %%r9 ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcx %%rbx, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"adcx %%rbx, %%r11 ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11");
+}
+
+static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movl    $38, %%edx ;"	/* 2*c = 38 = 2^256 */
+		"mulx 32(%1),  %%r8, %%r10 ;" /* c*C[4] */
+		"mulx 40(%1),  %%r9, %%r11 ;" /* c*C[5] */
+		"addq %%r10,  %%r9 ;"
+		"mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+		"adcq %%r11, %%r10 ;"
+		"mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+		"adcq %%rax, %%r11 ;"
+		/***************************************/
+		"adcq    $0, %%rcx ;"
+		"addq   (%1),  %%r8 ;"
+		"adcq  8(%1),  %%r9 ;"
+		"adcq 16(%1), %%r10 ;"
+		"adcq 24(%1), %%r11 ;"
+		"adcq     $0, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+		"addq %%rcx,  %%r8 ;"
+		"adcq    $0,  %%r9 ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcq    $0, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"adcq    $0, %%r11 ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+		  "%r11");
+}
+
+static __always_inline void
+add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b)
+{
+	asm volatile(
+		"mov     $38, %%eax ;"
+		"xorl  %%ecx, %%ecx ;"
+		"movq   (%2),  %%r8 ;"
+		"adcx   (%1),  %%r8 ;"
+		"movq  8(%2),  %%r9 ;"
+		"adcx  8(%1),  %%r9 ;"
+		"movq 16(%2), %%r10 ;"
+		"adcx 16(%1), %%r10 ;"
+		"movq 24(%2), %%r11 ;"
+		"adcx 24(%1), %%r11 ;"
+		"cmovc %%eax, %%ecx ;"
+		"xorl %%eax, %%eax  ;"
+		"adcx %%rcx,  %%r8  ;"
+		"adcx %%rax,  %%r9  ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcx %%rax, %%r10  ;"
+		"movq %%r10, 16(%0) ;"
+		"adcx %%rax, %%r11  ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $38, %%ecx ;"
+		"cmovc %%ecx, %%eax ;"
+		"addq %%rax,  %%r8  ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+static __always_inline void
+add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b)
+{
+	asm volatile(
+		"mov     $38, %%eax ;"
+		"movq   (%2),  %%r8 ;"
+		"addq   (%1),  %%r8 ;"
+		"movq  8(%2),  %%r9 ;"
+		"adcq  8(%1),  %%r9 ;"
+		"movq 16(%2), %%r10 ;"
+		"adcq 16(%1), %%r10 ;"
+		"movq 24(%2), %%r11 ;"
+		"adcq 24(%1), %%r11 ;"
+		"mov      $0, %%ecx ;"
+		"cmovc %%eax, %%ecx ;"
+		"addq %%rcx,  %%r8  ;"
+		"adcq    $0,  %%r9  ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcq    $0, %%r10  ;"
+		"movq %%r10, 16(%0) ;"
+		"adcq    $0, %%r11  ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx  ;"
+		"cmovc %%eax, %%ecx ;"
+		"addq %%rcx,  %%r8  ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+static __always_inline void
+sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b)
+{
+	asm volatile(
+		"mov     $38, %%eax ;"
+		"movq   (%1),  %%r8 ;"
+		"subq   (%2),  %%r8 ;"
+		"movq  8(%1),  %%r9 ;"
+		"sbbq  8(%2),  %%r9 ;"
+		"movq 16(%1), %%r10 ;"
+		"sbbq 16(%2), %%r10 ;"
+		"movq 24(%1), %%r11 ;"
+		"sbbq 24(%2), %%r11 ;"
+		"mov      $0, %%ecx ;"
+		"cmovc %%eax, %%ecx ;"
+		"subq %%rcx,  %%r8  ;"
+		"sbbq    $0,  %%r9  ;"
+		"movq  %%r9,  8(%0) ;"
+		"sbbq    $0, %%r10  ;"
+		"movq %%r10, 16(%0) ;"
+		"sbbq    $0, %%r11  ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx  ;"
+		"cmovc %%eax, %%ecx ;"
+		"subq %%rcx,  %%r8  ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */
+static __always_inline void
+mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a)
+{
+	const u64 a24 = 121666;
+	asm volatile(
+		"movq     %2, %%rdx ;"
+		"mulx   (%1),  %%r8, %%r10 ;"
+		"mulx  8(%1),  %%r9, %%r11 ;"
+		"addq %%r10,  %%r9 ;"
+		"mulx 16(%1), %%r10, %%rax ;"
+		"adcq %%r11, %%r10 ;"
+		"mulx 24(%1), %%r11, %%rcx ;"
+		"adcq %%rax, %%r11 ;"
+		/**************************/
+		"adcq    $0, %%rcx ;"
+		"movl   $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/
+		"imul %%rdx, %%rcx ;"
+		"addq %%rcx,  %%r8 ;"
+		"adcq    $0,  %%r9 ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcq    $0, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"adcq    $0, %%r11 ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(a24)
+		: "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+		  "%r11");
+}
+
+static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+{
+	struct {
+		eltfp25519_1w_buffer buffer;
+		eltfp25519_1w x0, x1, x2;
+	} __aligned(32) m;
+	u64 *T[4];
+
+	T[0] = m.x0;
+	T[1] = c; /* x^(-1) */
+	T[2] = m.x1;
+	T[3] = m.x2;
+
+	copy_eltfp25519_1w(T[1], a);
+	sqrn_eltfp25519_1w_adx(T[1], 1);
+	copy_eltfp25519_1w(T[2], T[1]);
+	sqrn_eltfp25519_1w_adx(T[2], 2);
+	mul_eltfp25519_1w_adx(T[0], a, T[2]);
+	mul_eltfp25519_1w_adx(T[1], T[1], T[0]);
+	copy_eltfp25519_1w(T[2], T[1]);
+	sqrn_eltfp25519_1w_adx(T[2], 1);
+	mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_adx(T[2], 5);
+	mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_adx(T[2], 10);
+	mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
+	copy_eltfp25519_1w(T[3], T[2]);
+	sqrn_eltfp25519_1w_adx(T[3], 20);
+	mul_eltfp25519_1w_adx(T[3], T[3], T[2]);
+	sqrn_eltfp25519_1w_adx(T[3], 10);
+	mul_eltfp25519_1w_adx(T[3], T[3], T[0]);
+	copy_eltfp25519_1w(T[0], T[3]);
+	sqrn_eltfp25519_1w_adx(T[0], 50);
+	mul_eltfp25519_1w_adx(T[0], T[0], T[3]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_adx(T[2], 100);
+	mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
+	sqrn_eltfp25519_1w_adx(T[2], 50);
+	mul_eltfp25519_1w_adx(T[2], T[2], T[3]);
+	sqrn_eltfp25519_1w_adx(T[2], 5);
+	mul_eltfp25519_1w_adx(T[1], T[1], T[2]);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+{
+	struct {
+		eltfp25519_1w_buffer buffer;
+		eltfp25519_1w x0, x1, x2;
+	} __aligned(32) m;
+	u64 *T[5];
+
+	T[0] = m.x0;
+	T[1] = c; /* x^(-1) */
+	T[2] = m.x1;
+	T[3] = m.x2;
+
+	copy_eltfp25519_1w(T[1], a);
+	sqrn_eltfp25519_1w_bmi2(T[1], 1);
+	copy_eltfp25519_1w(T[2], T[1]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 2);
+	mul_eltfp25519_1w_bmi2(T[0], a, T[2]);
+	mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]);
+	copy_eltfp25519_1w(T[2], T[1]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 1);
+	mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 5);
+	mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 10);
+	mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
+	copy_eltfp25519_1w(T[3], T[2]);
+	sqrn_eltfp25519_1w_bmi2(T[3], 20);
+	mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]);
+	sqrn_eltfp25519_1w_bmi2(T[3], 10);
+	mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]);
+	copy_eltfp25519_1w(T[0], T[3]);
+	sqrn_eltfp25519_1w_bmi2(T[0], 50);
+	mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 100);
+	mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 50);
+	mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 5);
+	mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+/* Given c, a 256-bit number, fred_eltfp25519_1w updates c
+ * with a number such that 0 <= C < 2**255-19.
+ */
+static __always_inline void fred_eltfp25519_1w(u64 *const c)
+{
+	u64 tmp0 = 38, tmp1 = 19;
+	asm volatile(
+		"btrq   $63,    %3 ;" /* Put bit 255 in carry flag and clear */
+		"cmovncl %k5,   %k4 ;" /* c[255] ? 38 : 19 */
+
+		/* Add either 19 or 38 to c */
+		"addq    %4,   %0 ;"
+		"adcq    $0,   %1 ;"
+		"adcq    $0,   %2 ;"
+		"adcq    $0,   %3 ;"
+
+		/* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */
+		"movl    $0,  %k4 ;"
+		"cmovnsl %k5,  %k4 ;" /* c[255] ? 0 : 19 */
+		"btrq   $63,   %3 ;" /* Clear bit 255 */
+
+		/* Subtract 19 if necessary */
+		"subq    %4,   %0 ;"
+		"sbbq    $0,   %1 ;"
+		"sbbq    $0,   %2 ;"
+		"sbbq    $0,   %3 ;"
+
+		: "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0),
+		  "+r"(tmp1)
+		:
+		: "memory", "cc");
+}
+
+static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py)
+{
+	u64 temp;
+	asm volatile(
+		"test %9, %9 ;"
+		"movq %0, %8 ;"
+		"cmovnzq %4, %0 ;"
+		"cmovnzq %8, %4 ;"
+		"movq %1, %8 ;"
+		"cmovnzq %5, %1 ;"
+		"cmovnzq %8, %5 ;"
+		"movq %2, %8 ;"
+		"cmovnzq %6, %2 ;"
+		"cmovnzq %8, %6 ;"
+		"movq %3, %8 ;"
+		"cmovnzq %7, %3 ;"
+		"cmovnzq %8, %7 ;"
+		: "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]),
+		  "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]),
+		  "=r"(temp)
+		: "r"(bit)
+		: "cc"
+	);
+}
+
+static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py)
+{
+	asm volatile(
+		"test %4, %4 ;"
+		"cmovnzq %5, %0 ;"
+		"cmovnzq %6, %1 ;"
+		"cmovnzq %7, %2 ;"
+		"cmovnzq %8, %3 ;"
+		: "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3])
+		: "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3])
+		: "cc"
+	);
+}
+
+static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE],
+			   const u8 private_key[CURVE25519_KEY_SIZE],
+			   const u8 session_key[CURVE25519_KEY_SIZE])
+{
+	struct {
+		u64 buffer[4 * NUM_WORDS_ELTFP25519];
+		u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+		u64 workspace[6 * NUM_WORDS_ELTFP25519];
+		u8 session[CURVE25519_KEY_SIZE];
+		u8 private[CURVE25519_KEY_SIZE];
+	} __aligned(32) m;
+
+	int i = 0, j = 0;
+	u64 prev = 0;
+	u64 *const X1 = (u64 *)m.session;
+	u64 *const key = (u64 *)m.private;
+	u64 *const Px = m.coordinates + 0;
+	u64 *const Pz = m.coordinates + 4;
+	u64 *const Qx = m.coordinates + 8;
+	u64 *const Qz = m.coordinates + 12;
+	u64 *const X2 = Qx;
+	u64 *const Z2 = Qz;
+	u64 *const X3 = Px;
+	u64 *const Z3 = Pz;
+	u64 *const X2Z2 = Qx;
+	u64 *const X3Z3 = Px;
+
+	u64 *const A = m.workspace + 0;
+	u64 *const B = m.workspace + 4;
+	u64 *const D = m.workspace + 8;
+	u64 *const C = m.workspace + 12;
+	u64 *const DA = m.workspace + 16;
+	u64 *const CB = m.workspace + 20;
+	u64 *const AB = A;
+	u64 *const DC = D;
+	u64 *const DACB = DA;
+
+	memcpy(m.private, private_key, sizeof(m.private));
+	memcpy(m.session, session_key, sizeof(m.session));
+
+	curve25519_clamp_secret(m.private);
+
+	/* As in the draft:
+	 * When receiving such an array, implementations of curve25519
+	 * MUST mask the most-significant bit in the final byte. This
+	 * is done to preserve compatibility with point formats which
+	 * reserve the sign bit for use in other protocols and to
+	 * increase resistance to implementation fingerprinting
+	 */
+	m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
+
+	copy_eltfp25519_1w(Px, X1);
+	setzero_eltfp25519_1w(Pz);
+	setzero_eltfp25519_1w(Qx);
+	setzero_eltfp25519_1w(Qz);
+
+	Pz[0] = 1;
+	Qx[0] = 1;
+
+	/* main-loop */
+	prev = 0;
+	j = 62;
+	for (i = 3; i >= 0; --i) {
+		while (j >= 0) {
+			u64 bit = (key[i] >> j) & 0x1;
+			u64 swap = bit ^ prev;
+			prev = bit;
+
+			add_eltfp25519_1w_adx(A, X2, Z2);	/* A = (X2+Z2) */
+			sub_eltfp25519_1w(B, X2, Z2);		/* B = (X2-Z2) */
+			add_eltfp25519_1w_adx(C, X3, Z3);	/* C = (X3+Z3) */
+			sub_eltfp25519_1w(D, X3, Z3);		/* D = (X3-Z3) */
+			mul_eltfp25519_2w_adx(DACB, AB, DC);	/* [DA|CB] = [A|B]*[D|C] */
+
+			cselect(swap, A, C);
+			cselect(swap, B, D);
+
+			sqr_eltfp25519_2w_adx(AB);		/* [AA|BB] = [A^2|B^2] */
+			add_eltfp25519_1w_adx(X3, DA, CB);	/* X3 = (DA+CB) */
+			sub_eltfp25519_1w(Z3, DA, CB);		/* Z3 = (DA-CB) */
+			sqr_eltfp25519_2w_adx(X3Z3);		/* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
+
+			copy_eltfp25519_1w(X2, B);		/* X2 = B^2 */
+			sub_eltfp25519_1w(Z2, A, B);		/* Z2 = E = AA-BB */
+
+			mul_a24_eltfp25519_1w(B, Z2);		/* B = a24*E */
+			add_eltfp25519_1w_adx(B, B, X2);	/* B = a24*E+B */
+			mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB);	/* [X2|Z2] = [B|E]*[A|a24*E+B] */
+			mul_eltfp25519_1w_adx(Z3, Z3, X1);	/* Z3 = Z3*X1 */
+			--j;
+		}
+		j = 63;
+	}
+
+	inv_eltfp25519_1w_adx(A, Qz);
+	mul_eltfp25519_1w_adx((u64 *)shared, Qx, A);
+	fred_eltfp25519_1w((u64 *)shared);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE],
+				const u8 private_key[CURVE25519_KEY_SIZE])
+{
+	struct {
+		u64 buffer[4 * NUM_WORDS_ELTFP25519];
+		u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+		u64 workspace[4 * NUM_WORDS_ELTFP25519];
+		u8 private[CURVE25519_KEY_SIZE];
+	} __aligned(32) m;
+
+	const int ite[4] = { 64, 64, 64, 63 };
+	const int q = 3;
+	u64 swap = 1;
+
+	int i = 0, j = 0, k = 0;
+	u64 *const key = (u64 *)m.private;
+	u64 *const Ur1 = m.coordinates + 0;
+	u64 *const Zr1 = m.coordinates + 4;
+	u64 *const Ur2 = m.coordinates + 8;
+	u64 *const Zr2 = m.coordinates + 12;
+
+	u64 *const UZr1 = m.coordinates + 0;
+	u64 *const ZUr2 = m.coordinates + 8;
+
+	u64 *const A = m.workspace + 0;
+	u64 *const B = m.workspace + 4;
+	u64 *const C = m.workspace + 8;
+	u64 *const D = m.workspace + 12;
+
+	u64 *const AB = m.workspace + 0;
+	u64 *const CD = m.workspace + 8;
+
+	const u64 *const P = table_ladder_8k;
+
+	memcpy(m.private, private_key, sizeof(m.private));
+
+	curve25519_clamp_secret(m.private);
+
+	setzero_eltfp25519_1w(Ur1);
+	setzero_eltfp25519_1w(Zr1);
+	setzero_eltfp25519_1w(Zr2);
+	Ur1[0] = 1;
+	Zr1[0] = 1;
+	Zr2[0] = 1;
+
+	/* G-S */
+	Ur2[3] = 0x1eaecdeee27cab34UL;
+	Ur2[2] = 0xadc7a0b9235d48e2UL;
+	Ur2[1] = 0xbbf095ae14b2edf8UL;
+	Ur2[0] = 0x7e94e1fec82faabdUL;
+
+	/* main-loop */
+	j = q;
+	for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
+		while (j < ite[i]) {
+			u64 bit = (key[i] >> j) & 0x1;
+			k = (64 * i + j - q);
+			swap = swap ^ bit;
+			cswap(swap, Ur1, Ur2);
+			cswap(swap, Zr1, Zr2);
+			swap = bit;
+			/* Addition */
+			sub_eltfp25519_1w(B, Ur1, Zr1);		/* B = Ur1-Zr1 */
+			add_eltfp25519_1w_adx(A, Ur1, Zr1);	/* A = Ur1+Zr1 */
+			mul_eltfp25519_1w_adx(C, &P[4 * k], B);	/* C = M0-B */
+			sub_eltfp25519_1w(B, A, C);		/* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
+			add_eltfp25519_1w_adx(A, A, C);		/* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
+			sqr_eltfp25519_2w_adx(AB);		/* A = A^2      |  B = B^2 */
+			mul_eltfp25519_2w_adx(UZr1, ZUr2, AB);	/* Ur1 = Zr2*A  |  Zr1 = Ur2*B */
+			++j;
+		}
+		j = 0;
+	}
+
+	/* Doubling */
+	for (i = 0; i < q; ++i) {
+		add_eltfp25519_1w_adx(A, Ur1, Zr1);	/*  A = Ur1+Zr1 */
+		sub_eltfp25519_1w(B, Ur1, Zr1);		/*  B = Ur1-Zr1 */
+		sqr_eltfp25519_2w_adx(AB);		/*  A = A**2     B = B**2 */
+		copy_eltfp25519_1w(C, B);		/*  C = B */
+		sub_eltfp25519_1w(B, A, B);		/*  B = A-B */
+		mul_a24_eltfp25519_1w(D, B);		/*  D = my_a24*B */
+		add_eltfp25519_1w_adx(D, D, C);		/*  D = D+C */
+		mul_eltfp25519_2w_adx(UZr1, AB, CD);	/*  Ur1 = A*B   Zr1 = Zr1*A */
+	}
+
+	/* Convert to affine coordinates */
+	inv_eltfp25519_1w_adx(A, Zr1);
+	mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A);
+	fred_eltfp25519_1w((u64 *)session_key);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE],
+			    const u8 private_key[CURVE25519_KEY_SIZE],
+			    const u8 session_key[CURVE25519_KEY_SIZE])
+{
+	struct {
+		u64 buffer[4 * NUM_WORDS_ELTFP25519];
+		u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+		u64 workspace[6 * NUM_WORDS_ELTFP25519];
+		u8 session[CURVE25519_KEY_SIZE];
+		u8 private[CURVE25519_KEY_SIZE];
+	} __aligned(32) m;
+
+	int i = 0, j = 0;
+	u64 prev = 0;
+	u64 *const X1 = (u64 *)m.session;
+	u64 *const key = (u64 *)m.private;
+	u64 *const Px = m.coordinates + 0;
+	u64 *const Pz = m.coordinates + 4;
+	u64 *const Qx = m.coordinates + 8;
+	u64 *const Qz = m.coordinates + 12;
+	u64 *const X2 = Qx;
+	u64 *const Z2 = Qz;
+	u64 *const X3 = Px;
+	u64 *const Z3 = Pz;
+	u64 *const X2Z2 = Qx;
+	u64 *const X3Z3 = Px;
+
+	u64 *const A = m.workspace + 0;
+	u64 *const B = m.workspace + 4;
+	u64 *const D = m.workspace + 8;
+	u64 *const C = m.workspace + 12;
+	u64 *const DA = m.workspace + 16;
+	u64 *const CB = m.workspace + 20;
+	u64 *const AB = A;
+	u64 *const DC = D;
+	u64 *const DACB = DA;
+
+	memcpy(m.private, private_key, sizeof(m.private));
+	memcpy(m.session, session_key, sizeof(m.session));
+
+	curve25519_clamp_secret(m.private);
+
+	/* As in the draft:
+	 * When receiving such an array, implementations of curve25519
+	 * MUST mask the most-significant bit in the final byte. This
+	 * is done to preserve compatibility with point formats which
+	 * reserve the sign bit for use in other protocols and to
+	 * increase resistance to implementation fingerprinting
+	 */
+	m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
+
+	copy_eltfp25519_1w(Px, X1);
+	setzero_eltfp25519_1w(Pz);
+	setzero_eltfp25519_1w(Qx);
+	setzero_eltfp25519_1w(Qz);
+
+	Pz[0] = 1;
+	Qx[0] = 1;
+
+	/* main-loop */
+	prev = 0;
+	j = 62;
+	for (i = 3; i >= 0; --i) {
+		while (j >= 0) {
+			u64 bit = (key[i] >> j) & 0x1;
+			u64 swap = bit ^ prev;
+			prev = bit;
+
+			add_eltfp25519_1w_bmi2(A, X2, Z2);	/* A = (X2+Z2) */
+			sub_eltfp25519_1w(B, X2, Z2);		/* B = (X2-Z2) */
+			add_eltfp25519_1w_bmi2(C, X3, Z3);	/* C = (X3+Z3) */
+			sub_eltfp25519_1w(D, X3, Z3);		/* D = (X3-Z3) */
+			mul_eltfp25519_2w_bmi2(DACB, AB, DC);	/* [DA|CB] = [A|B]*[D|C] */
+
+			cselect(swap, A, C);
+			cselect(swap, B, D);
+
+			sqr_eltfp25519_2w_bmi2(AB);		/* [AA|BB] = [A^2|B^2] */
+			add_eltfp25519_1w_bmi2(X3, DA, CB);	/* X3 = (DA+CB) */
+			sub_eltfp25519_1w(Z3, DA, CB);		/* Z3 = (DA-CB) */
+			sqr_eltfp25519_2w_bmi2(X3Z3);		/* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
+
+			copy_eltfp25519_1w(X2, B);		/* X2 = B^2 */
+			sub_eltfp25519_1w(Z2, A, B);		/* Z2 = E = AA-BB */
+
+			mul_a24_eltfp25519_1w(B, Z2);		/* B = a24*E */
+			add_eltfp25519_1w_bmi2(B, B, X2);	/* B = a24*E+B */
+			mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB);	/* [X2|Z2] = [B|E]*[A|a24*E+B] */
+			mul_eltfp25519_1w_bmi2(Z3, Z3, X1);	/* Z3 = Z3*X1 */
+			--j;
+		}
+		j = 63;
+	}
+
+	inv_eltfp25519_1w_bmi2(A, Qz);
+	mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A);
+	fred_eltfp25519_1w((u64 *)shared);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE],
+				 const u8 private_key[CURVE25519_KEY_SIZE])
+{
+	struct {
+		u64 buffer[4 * NUM_WORDS_ELTFP25519];
+		u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+		u64 workspace[4 * NUM_WORDS_ELTFP25519];
+		u8 private[CURVE25519_KEY_SIZE];
+	} __aligned(32) m;
+
+	const int ite[4] = { 64, 64, 64, 63 };
+	const int q = 3;
+	u64 swap = 1;
+
+	int i = 0, j = 0, k = 0;
+	u64 *const key = (u64 *)m.private;
+	u64 *const Ur1 = m.coordinates + 0;
+	u64 *const Zr1 = m.coordinates + 4;
+	u64 *const Ur2 = m.coordinates + 8;
+	u64 *const Zr2 = m.coordinates + 12;
+
+	u64 *const UZr1 = m.coordinates + 0;
+	u64 *const ZUr2 = m.coordinates + 8;
+
+	u64 *const A = m.workspace + 0;
+	u64 *const B = m.workspace + 4;
+	u64 *const C = m.workspace + 8;
+	u64 *const D = m.workspace + 12;
+
+	u64 *const AB = m.workspace + 0;
+	u64 *const CD = m.workspace + 8;
+
+	const u64 *const P = table_ladder_8k;
+
+	memcpy(m.private, private_key, sizeof(m.private));
+
+	curve25519_clamp_secret(m.private);
+
+	setzero_eltfp25519_1w(Ur1);
+	setzero_eltfp25519_1w(Zr1);
+	setzero_eltfp25519_1w(Zr2);
+	Ur1[0] = 1;
+	Zr1[0] = 1;
+	Zr2[0] = 1;
+
+	/* G-S */
+	Ur2[3] = 0x1eaecdeee27cab34UL;
+	Ur2[2] = 0xadc7a0b9235d48e2UL;
+	Ur2[1] = 0xbbf095ae14b2edf8UL;
+	Ur2[0] = 0x7e94e1fec82faabdUL;
+
+	/* main-loop */
+	j = q;
+	for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
+		while (j < ite[i]) {
+			u64 bit = (key[i] >> j) & 0x1;
+			k = (64 * i + j - q);
+			swap = swap ^ bit;
+			cswap(swap, Ur1, Ur2);
+			cswap(swap, Zr1, Zr2);
+			swap = bit;
+			/* Addition */
+			sub_eltfp25519_1w(B, Ur1, Zr1);		/* B = Ur1-Zr1 */
+			add_eltfp25519_1w_bmi2(A, Ur1, Zr1);	/* A = Ur1+Zr1 */
+			mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */
+			sub_eltfp25519_1w(B, A, C);		/* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
+			add_eltfp25519_1w_bmi2(A, A, C);	/* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
+			sqr_eltfp25519_2w_bmi2(AB);		/* A = A^2      |  B = B^2 */
+			mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB);	/* Ur1 = Zr2*A  |  Zr1 = Ur2*B */
+			++j;
+		}
+		j = 0;
+	}
+
+	/* Doubling */
+	for (i = 0; i < q; ++i) {
+		add_eltfp25519_1w_bmi2(A, Ur1, Zr1);	/*  A = Ur1+Zr1 */
+		sub_eltfp25519_1w(B, Ur1, Zr1);		/*  B = Ur1-Zr1 */
+		sqr_eltfp25519_2w_bmi2(AB);		/*  A = A**2     B = B**2 */
+		copy_eltfp25519_1w(C, B);		/*  C = B */
+		sub_eltfp25519_1w(B, A, B);		/*  B = A-B */
+		mul_a24_eltfp25519_1w(D, B);		/*  D = my_a24*B */
+		add_eltfp25519_1w_bmi2(D, D, C);	/*  D = D+C */
+		mul_eltfp25519_2w_bmi2(UZr1, AB, CD);	/*  Ur1 = A*B   Zr1 = Zr1*A */
+	}
+
+	/* Convert to affine coordinates */
+	inv_eltfp25519_1w_bmi2(A, Zr1);
+	mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A);
+	fred_eltfp25519_1w((u64 *)session_key);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
+		     const u8 secret[CURVE25519_KEY_SIZE],
+		     const u8 basepoint[CURVE25519_KEY_SIZE])
+{
+	if (static_branch_likely(&curve25519_use_adx))
+		curve25519_adx(mypublic, secret, basepoint);
+	else if (static_branch_likely(&curve25519_use_bmi2))
+		curve25519_bmi2(mypublic, secret, basepoint);
+	else
+		curve25519_generic(mypublic, secret, basepoint);
+}
+EXPORT_SYMBOL(curve25519_arch);
+
+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
+			  const u8 secret[CURVE25519_KEY_SIZE])
+{
+	if (static_branch_likely(&curve25519_use_adx))
+		curve25519_adx_base(pub, secret);
+	else if (static_branch_likely(&curve25519_use_bmi2))
+		curve25519_bmi2_base(pub, secret);
+	else
+		curve25519_generic(pub, secret, curve25519_base_point);
+}
+EXPORT_SYMBOL(curve25519_base_arch);
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+				 unsigned int len)
+{
+	u8 *secret = kpp_tfm_ctx(tfm);
+
+	if (!len)
+		curve25519_generate_secret(secret);
+	else if (len == CURVE25519_KEY_SIZE &&
+		 crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+		memcpy(secret, buf, CURVE25519_KEY_SIZE);
+	else
+		return -EINVAL;
+	return 0;
+}
+
+static int curve25519_generate_public_key(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	const u8 *secret = kpp_tfm_ctx(tfm);
+	u8 buf[CURVE25519_KEY_SIZE];
+	int copied, nbytes;
+
+	if (req->src)
+		return -EINVAL;
+
+	curve25519_base_arch(buf, secret);
+
+	/* might want less than we've got */
+	nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+	copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+								nbytes),
+				     buf, nbytes);
+	if (copied != nbytes)
+		return -EINVAL;
+	return 0;
+}
+
+static int curve25519_compute_shared_secret(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	const u8 *secret = kpp_tfm_ctx(tfm);
+	u8 public_key[CURVE25519_KEY_SIZE];
+	u8 buf[CURVE25519_KEY_SIZE];
+	int copied, nbytes;
+
+	if (!req->src)
+		return -EINVAL;
+
+	copied = sg_copy_to_buffer(req->src,
+				   sg_nents_for_len(req->src,
+						    CURVE25519_KEY_SIZE),
+				   public_key, CURVE25519_KEY_SIZE);
+	if (copied != CURVE25519_KEY_SIZE)
+		return -EINVAL;
+
+	curve25519_arch(buf, secret, public_key);
+
+	/* might want less than we've got */
+	nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+	copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+								nbytes),
+				     buf, nbytes);
+	if (copied != nbytes)
+		return -EINVAL;
+	return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+	return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+	.base.cra_name		= "curve25519",
+	.base.cra_driver_name	= "curve25519-x86",
+	.base.cra_priority	= 200,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_ctxsize	= CURVE25519_KEY_SIZE,
+
+	.set_secret		= curve25519_set_secret,
+	.generate_public_key	= curve25519_generate_public_key,
+	.compute_shared_secret	= curve25519_compute_shared_secret,
+	.max_size		= curve25519_max_size,
+};
+
+static int __init curve25519_mod_init(void)
+{
+	if (boot_cpu_has(X86_FEATURE_BMI2))
+		static_branch_enable(&curve25519_use_bmi2);
+	else if (boot_cpu_has(X86_FEATURE_ADX))
+		static_branch_enable(&curve25519_use_adx);
+	else
+		return 0;
+	return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
+		crypto_register_kpp(&curve25519_alg) : 0;
+}
+
+static void __exit curve25519_mod_exit(void)
+{
+	if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
+	    (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX)))
+		crypto_unregister_kpp(&curve25519_alg);
+}
+
+module_init(curve25519_mod_init);
+module_exit(curve25519_mod_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-x86");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
index 7fca43099a5f..fac0fdc3f25d 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -162,7 +162,7 @@
 	movl   left##d,   (io); \
 	movl   right##d, 4(io);
 
-ENTRY(des3_ede_x86_64_crypt_blk)
+SYM_FUNC_START(des3_ede_x86_64_crypt_blk)
 	/* input:
 	 *	%rdi: round keys, CTX
 	 *	%rsi: dst
@@ -244,7 +244,7 @@ ENTRY(des3_ede_x86_64_crypt_blk)
 	popq %rbx;
 
 	ret;
-ENDPROC(des3_ede_x86_64_crypt_blk)
+SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
 
 /***********************************************************************
  * 3-way 3DES
@@ -418,7 +418,7 @@ ENDPROC(des3_ede_x86_64_crypt_blk)
 #define __movq(src, dst) \
 	movq src, dst;
 
-ENTRY(des3_ede_x86_64_crypt_blk_3way)
+SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
 	/* input:
 	 *	%rdi: ctx, round keys
 	 *	%rsi: dst (3 blocks)
@@ -529,7 +529,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 	popq %rbx;
 
 	ret;
-ENDPROC(des3_ede_x86_64_crypt_blk_3way)
+SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
 
 .section	.rodata, "a", @progbits
 .align 16
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 5d53effe8abe..bb9735fbb865 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -44,7 +44,7 @@
  *	T2
  *	T3
  */
-__clmul_gf128mul_ble:
+SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
 	movaps DATA, T1
 	pshufd $0b01001110, DATA, T2
 	pshufd $0b01001110, SHASH, T3
@@ -87,10 +87,10 @@ __clmul_gf128mul_ble:
 	pxor T2, T1
 	pxor T1, DATA
 	ret
-ENDPROC(__clmul_gf128mul_ble)
+SYM_FUNC_END(__clmul_gf128mul_ble)
 
 /* void clmul_ghash_mul(char *dst, const u128 *shash) */
-ENTRY(clmul_ghash_mul)
+SYM_FUNC_START(clmul_ghash_mul)
 	FRAME_BEGIN
 	movups (%rdi), DATA
 	movups (%rsi), SHASH
@@ -101,13 +101,13 @@ ENTRY(clmul_ghash_mul)
 	movups DATA, (%rdi)
 	FRAME_END
 	ret
-ENDPROC(clmul_ghash_mul)
+SYM_FUNC_END(clmul_ghash_mul)
 
 /*
  * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
  *			   const u128 *shash);
  */
-ENTRY(clmul_ghash_update)
+SYM_FUNC_START(clmul_ghash_update)
 	FRAME_BEGIN
 	cmp $16, %rdx
 	jb .Lupdate_just_ret	# check length
@@ -130,4 +130,4 @@ ENTRY(clmul_ghash_update)
 .Lupdate_just_ret:
 	FRAME_END
 	ret
-ENDPROC(clmul_ghash_update)
+SYM_FUNC_END(clmul_ghash_update)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 04d72a5a8ce9..a4b728518e28 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -57,10 +57,8 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	be128 *x = (be128 *)key;
 	u64 a, b;
 
-	if (keylen != GHASH_BLOCK_SIZE) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != GHASH_BLOCK_SIZE)
 		return -EINVAL;
-	}
 
 	/* perform multiplication by 'x' in GF(2^128) */
 	a = be64_to_cpu(x->a);
@@ -257,16 +255,11 @@ static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
 {
 	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct crypto_ahash *child = &ctx->cryptd_tfm->base;
-	int err;
 
 	crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
 			       & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ahash_setkey(child, key, keylen);
-	crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
-			       & CRYPTO_TFM_RES_MASK);
-
-	return err;
+	return crypto_ahash_setkey(child, key, keylen);
 }
 
 static int ghash_async_init_tfm(struct crypto_tfm *tfm)
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index d15b99397480..d3d91a0abf88 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -134,7 +134,8 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
 				src -= num_blocks - 1;
 				dst -= num_blocks - 1;
 
-				gctx->funcs[i].fn_u.cbc(ctx, dst, src);
+				gctx->funcs[i].fn_u.cbc(ctx, (u8 *)dst,
+							(const u8 *)src);
 
 				nbytes -= func_bytes;
 				if (nbytes < bsize)
@@ -188,7 +189,9 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
 
 			/* Process multi-block batch */
 			do {
-				gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
+				gctx->funcs[i].fn_u.ctr(ctx, (u8 *)dst,
+							(const u8 *)src,
+							&ctrblk);
 				src += num_blocks;
 				dst += num_blocks;
 				nbytes -= func_bytes;
@@ -210,7 +213,8 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
 
 		be128_to_le128(&ctrblk, (be128 *)walk.iv);
 		memcpy(&tmp, walk.src.virt.addr, nbytes);
-		gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp,
+		gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, (u8 *)&tmp,
+							  (const u8 *)&tmp,
 							  &ctrblk);
 		memcpy(walk.dst.virt.addr, &tmp, nbytes);
 		le128_to_be128((be128 *)walk.iv, &ctrblk);
@@ -240,7 +244,8 @@ static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 
 		if (nbytes >= func_bytes) {
 			do {
-				gctx->funcs[i].fn_u.xts(ctx, dst, src,
+				gctx->funcs[i].fn_u.xts(ctx, (u8 *)dst,
+							(const u8 *)src,
 							walk->iv);
 
 				src += num_blocks;
@@ -354,8 +359,8 @@ out:
 }
 EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
 
-void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
-			       common_glue_func_t fn)
+void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, const u8 *src,
+			       le128 *iv, common_glue_func_t fn)
 {
 	le128 ivblk = *iv;
 
@@ -363,13 +368,13 @@ void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
 	gf128mul_x_ble(iv, &ivblk);
 
 	/* CC <- T xor C */
-	u128_xor(dst, src, (u128 *)&ivblk);
+	u128_xor((u128 *)dst, (const u128 *)src, (u128 *)&ivblk);
 
 	/* PP <- D(Key2,CC) */
-	fn(ctx, (u8 *)dst, (u8 *)dst);
+	fn(ctx, dst, dst);
 
 	/* P <- T xor PP */
-	u128_xor(dst, dst, (u128 *)&ivblk);
+	u128_xor((u128 *)dst, (u128 *)dst, (u128 *)&ivblk);
 }
 EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one);
 
diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S
index f7946ea1b704..b22c7b936272 100644
--- a/arch/x86/crypto/nh-avx2-x86_64.S
+++ b/arch/x86/crypto/nh-avx2-x86_64.S
@@ -69,7 +69,7 @@
  *
  * It's guaranteed that message_len % 16 == 0.
  */
-ENTRY(nh_avx2)
+SYM_FUNC_START(nh_avx2)
 
 	vmovdqu		0x00(KEY), K0
 	vmovdqu		0x10(KEY), K1
@@ -154,4 +154,4 @@ ENTRY(nh_avx2)
 	vpaddq		T4, T0, T0
 	vmovdqu		T0, (HASH)
 	ret
-ENDPROC(nh_avx2)
+SYM_FUNC_END(nh_avx2)
diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S
index 51f52d4ab4bb..d7ae22dd6683 100644
--- a/arch/x86/crypto/nh-sse2-x86_64.S
+++ b/arch/x86/crypto/nh-sse2-x86_64.S
@@ -71,7 +71,7 @@
  *
  * It's guaranteed that message_len % 16 == 0.
  */
-ENTRY(nh_sse2)
+SYM_FUNC_START(nh_sse2)
 
 	movdqu		0x00(KEY), K0
 	movdqu		0x10(KEY), K1
@@ -120,4 +120,4 @@ ENTRY(nh_sse2)
 	movdqu		T0, 0x00(HASH)
 	movdqu		T1, 0x10(HASH)
 	ret
-ENDPROC(nh_sse2)
+SYM_FUNC_END(nh_sse2)
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
deleted file mode 100644
index 8b341bc29d41..000000000000
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ /dev/null
@@ -1,390 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions
- *
- * Copyright (C) 2015 Martin Willi
- */
-
-#include <linux/linkage.h>
-
-.section	.rodata.cst32.ANMASK, "aM", @progbits, 32
-.align 32
-ANMASK:	.octa 0x0000000003ffffff0000000003ffffff
-	.octa 0x0000000003ffffff0000000003ffffff
-
-.section	.rodata.cst32.ORMASK, "aM", @progbits, 32
-.align 32
-ORMASK:	.octa 0x00000000010000000000000001000000
-	.octa 0x00000000010000000000000001000000
-
-.text
-
-#define h0 0x00(%rdi)
-#define h1 0x04(%rdi)
-#define h2 0x08(%rdi)
-#define h3 0x0c(%rdi)
-#define h4 0x10(%rdi)
-#define r0 0x00(%rdx)
-#define r1 0x04(%rdx)
-#define r2 0x08(%rdx)
-#define r3 0x0c(%rdx)
-#define r4 0x10(%rdx)
-#define u0 0x00(%r8)
-#define u1 0x04(%r8)
-#define u2 0x08(%r8)
-#define u3 0x0c(%r8)
-#define u4 0x10(%r8)
-#define w0 0x14(%r8)
-#define w1 0x18(%r8)
-#define w2 0x1c(%r8)
-#define w3 0x20(%r8)
-#define w4 0x24(%r8)
-#define y0 0x28(%r8)
-#define y1 0x2c(%r8)
-#define y2 0x30(%r8)
-#define y3 0x34(%r8)
-#define y4 0x38(%r8)
-#define m %rsi
-#define hc0 %ymm0
-#define hc1 %ymm1
-#define hc2 %ymm2
-#define hc3 %ymm3
-#define hc4 %ymm4
-#define hc0x %xmm0
-#define hc1x %xmm1
-#define hc2x %xmm2
-#define hc3x %xmm3
-#define hc4x %xmm4
-#define t1 %ymm5
-#define t2 %ymm6
-#define t1x %xmm5
-#define t2x %xmm6
-#define ruwy0 %ymm7
-#define ruwy1 %ymm8
-#define ruwy2 %ymm9
-#define ruwy3 %ymm10
-#define ruwy4 %ymm11
-#define ruwy0x %xmm7
-#define ruwy1x %xmm8
-#define ruwy2x %xmm9
-#define ruwy3x %xmm10
-#define ruwy4x %xmm11
-#define svxz1 %ymm12
-#define svxz2 %ymm13
-#define svxz3 %ymm14
-#define svxz4 %ymm15
-#define d0 %r9
-#define d1 %r10
-#define d2 %r11
-#define d3 %r12
-#define d4 %r13
-
-ENTRY(poly1305_4block_avx2)
-	# %rdi: Accumulator h[5]
-	# %rsi: 64 byte input block m
-	# %rdx: Poly1305 key r[5]
-	# %rcx: Quadblock count
-	# %r8:  Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5],
-
-	# This four-block variant uses loop unrolled block processing. It
-	# requires 4 Poly1305 keys: r, r^2, r^3 and r^4:
-	# h = (h + m) * r  =>  h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r
-
-	vzeroupper
-	push		%rbx
-	push		%r12
-	push		%r13
-
-	# combine r0,u0,w0,y0
-	vmovd		y0,ruwy0x
-	vmovd		w0,t1x
-	vpunpcklqdq	t1,ruwy0,ruwy0
-	vmovd		u0,t1x
-	vmovd		r0,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy0,ruwy0
-
-	# combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5
-	vmovd		y1,ruwy1x
-	vmovd		w1,t1x
-	vpunpcklqdq	t1,ruwy1,ruwy1
-	vmovd		u1,t1x
-	vmovd		r1,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy1,ruwy1
-	vpslld		$2,ruwy1,svxz1
-	vpaddd		ruwy1,svxz1,svxz1
-
-	# combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5
-	vmovd		y2,ruwy2x
-	vmovd		w2,t1x
-	vpunpcklqdq	t1,ruwy2,ruwy2
-	vmovd		u2,t1x
-	vmovd		r2,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy2,ruwy2
-	vpslld		$2,ruwy2,svxz2
-	vpaddd		ruwy2,svxz2,svxz2
-
-	# combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5
-	vmovd		y3,ruwy3x
-	vmovd		w3,t1x
-	vpunpcklqdq	t1,ruwy3,ruwy3
-	vmovd		u3,t1x
-	vmovd		r3,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy3,ruwy3
-	vpslld		$2,ruwy3,svxz3
-	vpaddd		ruwy3,svxz3,svxz3
-
-	# combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5
-	vmovd		y4,ruwy4x
-	vmovd		w4,t1x
-	vpunpcklqdq	t1,ruwy4,ruwy4
-	vmovd		u4,t1x
-	vmovd		r4,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy4,ruwy4
-	vpslld		$2,ruwy4,svxz4
-	vpaddd		ruwy4,svxz4,svxz4
-
-.Ldoblock4:
-	# hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff,
-	#	 m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0]
-	vmovd		0x00(m),hc0x
-	vmovd		0x10(m),t1x
-	vpunpcklqdq	t1,hc0,hc0
-	vmovd		0x20(m),t1x
-	vmovd		0x30(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc0,hc0
-	vpand		ANMASK(%rip),hc0,hc0
-	vmovd		h0,t1x
-	vpaddd		t1,hc0,hc0
-	# hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff,
-	#	 (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1]
-	vmovd		0x03(m),hc1x
-	vmovd		0x13(m),t1x
-	vpunpcklqdq	t1,hc1,hc1
-	vmovd		0x23(m),t1x
-	vmovd		0x33(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc1,hc1
-	vpsrld		$2,hc1,hc1
-	vpand		ANMASK(%rip),hc1,hc1
-	vmovd		h1,t1x
-	vpaddd		t1,hc1,hc1
-	# hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff,
-	#	 (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2]
-	vmovd		0x06(m),hc2x
-	vmovd		0x16(m),t1x
-	vpunpcklqdq	t1,hc2,hc2
-	vmovd		0x26(m),t1x
-	vmovd		0x36(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc2,hc2
-	vpsrld		$4,hc2,hc2
-	vpand		ANMASK(%rip),hc2,hc2
-	vmovd		h2,t1x
-	vpaddd		t1,hc2,hc2
-	# hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff,
-	#	 (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3]
-	vmovd		0x09(m),hc3x
-	vmovd		0x19(m),t1x
-	vpunpcklqdq	t1,hc3,hc3
-	vmovd		0x29(m),t1x
-	vmovd		0x39(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc3,hc3
-	vpsrld		$6,hc3,hc3
-	vpand		ANMASK(%rip),hc3,hc3
-	vmovd		h3,t1x
-	vpaddd		t1,hc3,hc3
-	# hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24),
-	#	 (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4]
-	vmovd		0x0c(m),hc4x
-	vmovd		0x1c(m),t1x
-	vpunpcklqdq	t1,hc4,hc4
-	vmovd		0x2c(m),t1x
-	vmovd		0x3c(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc4,hc4
-	vpsrld		$8,hc4,hc4
-	vpor		ORMASK(%rip),hc4,hc4
-	vmovd		h4,t1x
-	vpaddd		t1,hc4,hc4
-
-	# t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ]
-	vpmuludq	hc0,ruwy0,t1
-	# t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ]
-	vpmuludq	hc1,svxz4,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ]
-	vpmuludq	hc2,svxz3,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ]
-	vpmuludq	hc3,svxz2,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ]
-	vpmuludq	hc4,svxz1,t2
-	vpaddq		t2,t1,t1
-	# d0 = t1[0] + t1[1] + t[2] + t[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d0
-
-	# t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ]
-	vpmuludq	hc0,ruwy1,t1
-	# t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ]
-	vpmuludq	hc1,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ]
-	vpmuludq	hc2,svxz4,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ]
-	vpmuludq	hc3,svxz3,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ]
-	vpmuludq	hc4,svxz2,t2
-	vpaddq		t2,t1,t1
-	# d1 = t1[0] + t1[1] + t1[3] + t1[4]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d1
-
-	# t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ]
-	vpmuludq	hc0,ruwy2,t1
-	# t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ]
-	vpmuludq	hc1,ruwy1,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ]
-	vpmuludq	hc2,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ]
-	vpmuludq	hc3,svxz4,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ]
-	vpmuludq	hc4,svxz3,t2
-	vpaddq		t2,t1,t1
-	# d2 = t1[0] + t1[1] + t1[2] + t1[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d2
-
-	# t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ]
-	vpmuludq	hc0,ruwy3,t1
-	# t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ]
-	vpmuludq	hc1,ruwy2,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ]
-	vpmuludq	hc2,ruwy1,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ]
-	vpmuludq	hc3,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ]
-	vpmuludq	hc4,svxz4,t2
-	vpaddq		t2,t1,t1
-	# d3 = t1[0] + t1[1] + t1[2] + t1[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d3
-
-	# t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ]
-	vpmuludq	hc0,ruwy4,t1
-	# t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ]
-	vpmuludq	hc1,ruwy3,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ]
-	vpmuludq	hc2,ruwy2,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ]
-	vpmuludq	hc3,ruwy1,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ]
-	vpmuludq	hc4,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# d4 = t1[0] + t1[1] + t1[2] + t1[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d4
-
-	# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
-	# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
-	# amount.  Careful: we must not assume the carry bits 'd0 >> 26',
-	# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
-	# integers.  It's true in a single-block implementation, but not here.
-
-	# d1 += d0 >> 26
-	mov		d0,%rax
-	shr		$26,%rax
-	add		%rax,d1
-	# h0 = d0 & 0x3ffffff
-	mov		d0,%rbx
-	and		$0x3ffffff,%ebx
-
-	# d2 += d1 >> 26
-	mov		d1,%rax
-	shr		$26,%rax
-	add		%rax,d2
-	# h1 = d1 & 0x3ffffff
-	mov		d1,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h1
-
-	# d3 += d2 >> 26
-	mov		d2,%rax
-	shr		$26,%rax
-	add		%rax,d3
-	# h2 = d2 & 0x3ffffff
-	mov		d2,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h2
-
-	# d4 += d3 >> 26
-	mov		d3,%rax
-	shr		$26,%rax
-	add		%rax,d4
-	# h3 = d3 & 0x3ffffff
-	mov		d3,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h3
-
-	# h0 += (d4 >> 26) * 5
-	mov		d4,%rax
-	shr		$26,%rax
-	lea		(%rax,%rax,4),%rax
-	add		%rax,%rbx
-	# h4 = d4 & 0x3ffffff
-	mov		d4,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h4
-
-	# h1 += h0 >> 26
-	mov		%rbx,%rax
-	shr		$26,%rax
-	add		%eax,h1
-	# h0 = h0 & 0x3ffffff
-	andl		$0x3ffffff,%ebx
-	mov		%ebx,h0
-
-	add		$0x40,m
-	dec		%rcx
-	jnz		.Ldoblock4
-
-	vzeroupper
-	pop		%r13
-	pop		%r12
-	pop		%rbx
-	ret
-ENDPROC(poly1305_4block_avx2)
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
deleted file mode 100644
index 5578f846e622..000000000000
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ /dev/null
@@ -1,590 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions
- *
- * Copyright (C) 2015 Martin Willi
- */
-
-#include <linux/linkage.h>
-
-.section	.rodata.cst16.ANMASK, "aM", @progbits, 16
-.align 16
-ANMASK:	.octa 0x0000000003ffffff0000000003ffffff
-
-.section	.rodata.cst16.ORMASK, "aM", @progbits, 16
-.align 16
-ORMASK:	.octa 0x00000000010000000000000001000000
-
-.text
-
-#define h0 0x00(%rdi)
-#define h1 0x04(%rdi)
-#define h2 0x08(%rdi)
-#define h3 0x0c(%rdi)
-#define h4 0x10(%rdi)
-#define r0 0x00(%rdx)
-#define r1 0x04(%rdx)
-#define r2 0x08(%rdx)
-#define r3 0x0c(%rdx)
-#define r4 0x10(%rdx)
-#define s1 0x00(%rsp)
-#define s2 0x04(%rsp)
-#define s3 0x08(%rsp)
-#define s4 0x0c(%rsp)
-#define m %rsi
-#define h01 %xmm0
-#define h23 %xmm1
-#define h44 %xmm2
-#define t1 %xmm3
-#define t2 %xmm4
-#define t3 %xmm5
-#define t4 %xmm6
-#define mask %xmm7
-#define d0 %r8
-#define d1 %r9
-#define d2 %r10
-#define d3 %r11
-#define d4 %r12
-
-ENTRY(poly1305_block_sse2)
-	# %rdi: Accumulator h[5]
-	# %rsi: 16 byte input block m
-	# %rdx: Poly1305 key r[5]
-	# %rcx: Block count
-
-	# This single block variant tries to improve performance by doing two
-	# multiplications in parallel using SSE instructions. There is quite
-	# some quardword packing involved, hence the speedup is marginal.
-
-	push		%rbx
-	push		%r12
-	sub		$0x10,%rsp
-
-	# s1..s4 = r1..r4 * 5
-	mov		r1,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s1
-	mov		r2,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s2
-	mov		r3,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s3
-	mov		r4,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s4
-
-	movdqa		ANMASK(%rip),mask
-
-.Ldoblock:
-	# h01 = [0, h1, 0, h0]
-	# h23 = [0, h3, 0, h2]
-	# h44 = [0, h4, 0, h4]
-	movd		h0,h01
-	movd		h1,t1
-	movd		h2,h23
-	movd		h3,t2
-	movd		h4,h44
-	punpcklqdq	t1,h01
-	punpcklqdq	t2,h23
-	punpcklqdq	h44,h44
-
-	# h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ]
-	movd		0x00(m),t1
-	movd		0x03(m),t2
-	psrld		$2,t2
-	punpcklqdq	t2,t1
-	pand		mask,t1
-	paddd		t1,h01
-	# h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ]
-	movd		0x06(m),t1
-	movd		0x09(m),t2
-	psrld		$4,t1
-	psrld		$6,t2
-	punpcklqdq	t2,t1
-	pand		mask,t1
-	paddd		t1,h23
-	# h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ]
-	mov		0x0c(m),%eax
-	shr		$8,%eax
-	or		$0x01000000,%eax
-	movd		%eax,t1
-	pshufd		$0xc4,t1,t1
-	paddd		t1,h44
-
-	# t1[0] = h0 * r0 + h2 * s3
-	# t1[1] = h1 * s4 + h3 * s2
-	movd		r0,t1
-	movd		s4,t2
-	punpcklqdq	t2,t1
-	pmuludq		h01,t1
-	movd		s3,t2
-	movd		s2,t3
-	punpcklqdq	t3,t2
-	pmuludq		h23,t2
-	paddq		t2,t1
-	# t2[0] = h0 * r1 + h2 * s4
-	# t2[1] = h1 * r0 + h3 * s3
-	movd		r1,t2
-	movd		r0,t3
-	punpcklqdq	t3,t2
-	pmuludq		h01,t2
-	movd		s4,t3
-	movd		s3,t4
-	punpcklqdq	t4,t3
-	pmuludq		h23,t3
-	paddq		t3,t2
-	# t3[0] = h4 * s1
-	# t3[1] = h4 * s2
-	movd		s1,t3
-	movd		s2,t4
-	punpcklqdq	t4,t3
-	pmuludq		h44,t3
-	# d0 = t1[0] + t1[1] + t3[0]
-	# d1 = t2[0] + t2[1] + t3[1]
-	movdqa		t1,t4
-	punpcklqdq	t2,t4
-	punpckhqdq	t2,t1
-	paddq		t4,t1
-	paddq		t3,t1
-	movq		t1,d0
-	psrldq		$8,t1
-	movq		t1,d1
-
-	# t1[0] = h0 * r2 + h2 * r0
-	# t1[1] = h1 * r1 + h3 * s4
-	movd		r2,t1
-	movd		r1,t2
-	punpcklqdq 	t2,t1
-	pmuludq		h01,t1
-	movd		r0,t2
-	movd		s4,t3
-	punpcklqdq	t3,t2
-	pmuludq		h23,t2
-	paddq		t2,t1
-	# t2[0] = h0 * r3 + h2 * r1
-	# t2[1] = h1 * r2 + h3 * r0
-	movd		r3,t2
-	movd		r2,t3
-	punpcklqdq	t3,t2
-	pmuludq		h01,t2
-	movd		r1,t3
-	movd		r0,t4
-	punpcklqdq	t4,t3
-	pmuludq		h23,t3
-	paddq		t3,t2
-	# t3[0] = h4 * s3
-	# t3[1] = h4 * s4
-	movd		s3,t3
-	movd		s4,t4
-	punpcklqdq	t4,t3
-	pmuludq		h44,t3
-	# d2 = t1[0] + t1[1] + t3[0]
-	# d3 = t2[0] + t2[1] + t3[1]
-	movdqa		t1,t4
-	punpcklqdq	t2,t4
-	punpckhqdq	t2,t1
-	paddq		t4,t1
-	paddq		t3,t1
-	movq		t1,d2
-	psrldq		$8,t1
-	movq		t1,d3
-
-	# t1[0] = h0 * r4 + h2 * r2
-	# t1[1] = h1 * r3 + h3 * r1
-	movd		r4,t1
-	movd		r3,t2
-	punpcklqdq	t2,t1
-	pmuludq		h01,t1
-	movd		r2,t2
-	movd		r1,t3
-	punpcklqdq	t3,t2
-	pmuludq		h23,t2
-	paddq		t2,t1
-	# t3[0] = h4 * r0
-	movd		r0,t3
-	pmuludq		h44,t3
-	# d4 = t1[0] + t1[1] + t3[0]
-	movdqa		t1,t4
-	psrldq		$8,t4
-	paddq		t4,t1
-	paddq		t3,t1
-	movq		t1,d4
-
-	# d1 += d0 >> 26
-	mov		d0,%rax
-	shr		$26,%rax
-	add		%rax,d1
-	# h0 = d0 & 0x3ffffff
-	mov		d0,%rbx
-	and		$0x3ffffff,%ebx
-
-	# d2 += d1 >> 26
-	mov		d1,%rax
-	shr		$26,%rax
-	add		%rax,d2
-	# h1 = d1 & 0x3ffffff
-	mov		d1,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h1
-
-	# d3 += d2 >> 26
-	mov		d2,%rax
-	shr		$26,%rax
-	add		%rax,d3
-	# h2 = d2 & 0x3ffffff
-	mov		d2,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h2
-
-	# d4 += d3 >> 26
-	mov		d3,%rax
-	shr		$26,%rax
-	add		%rax,d4
-	# h3 = d3 & 0x3ffffff
-	mov		d3,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h3
-
-	# h0 += (d4 >> 26) * 5
-	mov		d4,%rax
-	shr		$26,%rax
-	lea		(%rax,%rax,4),%rax
-	add		%rax,%rbx
-	# h4 = d4 & 0x3ffffff
-	mov		d4,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h4
-
-	# h1 += h0 >> 26
-	mov		%rbx,%rax
-	shr		$26,%rax
-	add		%eax,h1
-	# h0 = h0 & 0x3ffffff
-	andl		$0x3ffffff,%ebx
-	mov		%ebx,h0
-
-	add		$0x10,m
-	dec		%rcx
-	jnz		.Ldoblock
-
-	# Zeroing of key material
-	mov		%rcx,0x00(%rsp)
-	mov		%rcx,0x08(%rsp)
-
-	add		$0x10,%rsp
-	pop		%r12
-	pop		%rbx
-	ret
-ENDPROC(poly1305_block_sse2)
-
-
-#define u0 0x00(%r8)
-#define u1 0x04(%r8)
-#define u2 0x08(%r8)
-#define u3 0x0c(%r8)
-#define u4 0x10(%r8)
-#define hc0 %xmm0
-#define hc1 %xmm1
-#define hc2 %xmm2
-#define hc3 %xmm5
-#define hc4 %xmm6
-#define ru0 %xmm7
-#define ru1 %xmm8
-#define ru2 %xmm9
-#define ru3 %xmm10
-#define ru4 %xmm11
-#define sv1 %xmm12
-#define sv2 %xmm13
-#define sv3 %xmm14
-#define sv4 %xmm15
-#undef d0
-#define d0 %r13
-
-ENTRY(poly1305_2block_sse2)
-	# %rdi: Accumulator h[5]
-	# %rsi: 16 byte input block m
-	# %rdx: Poly1305 key r[5]
-	# %rcx: Doubleblock count
-	# %r8:  Poly1305 derived key r^2 u[5]
-
-	# This two-block variant further improves performance by using loop
-	# unrolled block processing. This is more straight forward and does
-	# less byte shuffling, but requires a second Poly1305 key r^2:
-	# h = (h + m) * r    =>    h = (h + m1) * r^2 + m2 * r
-
-	push		%rbx
-	push		%r12
-	push		%r13
-
-	# combine r0,u0
-	movd		u0,ru0
-	movd		r0,t1
-	punpcklqdq	t1,ru0
-
-	# combine r1,u1 and s1=r1*5,v1=u1*5
-	movd		u1,ru1
-	movd		r1,t1
-	punpcklqdq	t1,ru1
-	movdqa		ru1,sv1
-	pslld		$2,sv1
-	paddd		ru1,sv1
-
-	# combine r2,u2 and s2=r2*5,v2=u2*5
-	movd		u2,ru2
-	movd		r2,t1
-	punpcklqdq	t1,ru2
-	movdqa		ru2,sv2
-	pslld		$2,sv2
-	paddd		ru2,sv2
-
-	# combine r3,u3 and s3=r3*5,v3=u3*5
-	movd		u3,ru3
-	movd		r3,t1
-	punpcklqdq	t1,ru3
-	movdqa		ru3,sv3
-	pslld		$2,sv3
-	paddd		ru3,sv3
-
-	# combine r4,u4 and s4=r4*5,v4=u4*5
-	movd		u4,ru4
-	movd		r4,t1
-	punpcklqdq	t1,ru4
-	movdqa		ru4,sv4
-	pslld		$2,sv4
-	paddd		ru4,sv4
-
-.Ldoblock2:
-	# hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ]
-	movd		0x00(m),hc0
-	movd		0x10(m),t1
-	punpcklqdq	t1,hc0
-	pand		ANMASK(%rip),hc0
-	movd		h0,t1
-	paddd		t1,hc0
-	# hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ]
-	movd		0x03(m),hc1
-	movd		0x13(m),t1
-	punpcklqdq	t1,hc1
-	psrld		$2,hc1
-	pand		ANMASK(%rip),hc1
-	movd		h1,t1
-	paddd		t1,hc1
-	# hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ]
-	movd		0x06(m),hc2
-	movd		0x16(m),t1
-	punpcklqdq	t1,hc2
-	psrld		$4,hc2
-	pand		ANMASK(%rip),hc2
-	movd		h2,t1
-	paddd		t1,hc2
-	# hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ]
-	movd		0x09(m),hc3
-	movd		0x19(m),t1
-	punpcklqdq	t1,hc3
-	psrld		$6,hc3
-	pand		ANMASK(%rip),hc3
-	movd		h3,t1
-	paddd		t1,hc3
-	# hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ]
-	movd		0x0c(m),hc4
-	movd		0x1c(m),t1
-	punpcklqdq	t1,hc4
-	psrld		$8,hc4
-	por		ORMASK(%rip),hc4
-	movd		h4,t1
-	paddd		t1,hc4
-
-	# t1 = [ hc0[1] * r0, hc0[0] * u0 ]
-	movdqa		ru0,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * s4, hc1[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * s3, hc2[0] * v3 ]
-	movdqa		sv3,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * s2, hc3[0] * v2 ]
-	movdqa		sv2,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s1, hc4[0] * v1 ]
-	movdqa		sv1,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d0 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d0
-
-	# t1 = [ hc0[1] * r1, hc0[0] * u1 ]
-	movdqa		ru1,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r0, hc1[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * s4, hc2[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * s3, hc3[0] * v3 ]
-	movdqa		sv3,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s2, hc4[0] * v2 ]
-	movdqa		sv2,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d1 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d1
-
-	# t1 = [ hc0[1] * r2, hc0[0] * u2 ]
-	movdqa		ru2,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r1, hc1[0] * u1 ]
-	movdqa		ru1,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * r0, hc2[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * s4, hc3[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s3, hc4[0] * v3 ]
-	movdqa		sv3,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d2 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d2
-
-	# t1 = [ hc0[1] * r3, hc0[0] * u3 ]
-	movdqa		ru3,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r2, hc1[0] * u2 ]
-	movdqa		ru2,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * r1, hc2[0] * u1 ]
-	movdqa		ru1,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * r0, hc3[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s4, hc4[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d3 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d3
-
-	# t1 = [ hc0[1] * r4, hc0[0] * u4 ]
-	movdqa		ru4,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r3, hc1[0] * u3 ]
-	movdqa		ru3,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * r2, hc2[0] * u2 ]
-	movdqa		ru2,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * r1, hc3[0] * u1 ]
-	movdqa		ru1,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * r0, hc4[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d4 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d4
-
-	# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
-	# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
-	# amount.  Careful: we must not assume the carry bits 'd0 >> 26',
-	# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
-	# integers.  It's true in a single-block implementation, but not here.
-
-	# d1 += d0 >> 26
-	mov		d0,%rax
-	shr		$26,%rax
-	add		%rax,d1
-	# h0 = d0 & 0x3ffffff
-	mov		d0,%rbx
-	and		$0x3ffffff,%ebx
-
-	# d2 += d1 >> 26
-	mov		d1,%rax
-	shr		$26,%rax
-	add		%rax,d2
-	# h1 = d1 & 0x3ffffff
-	mov		d1,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h1
-
-	# d3 += d2 >> 26
-	mov		d2,%rax
-	shr		$26,%rax
-	add		%rax,d3
-	# h2 = d2 & 0x3ffffff
-	mov		d2,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h2
-
-	# d4 += d3 >> 26
-	mov		d3,%rax
-	shr		$26,%rax
-	add		%rax,d4
-	# h3 = d3 & 0x3ffffff
-	mov		d3,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h3
-
-	# h0 += (d4 >> 26) * 5
-	mov		d4,%rax
-	shr		$26,%rax
-	lea		(%rax,%rax,4),%rax
-	add		%rax,%rbx
-	# h4 = d4 & 0x3ffffff
-	mov		d4,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h4
-
-	# h1 += h0 >> 26
-	mov		%rbx,%rax
-	shr		$26,%rax
-	add		%eax,h1
-	# h0 = h0 & 0x3ffffff
-	andl		$0x3ffffff,%ebx
-	mov		%ebx,h0
-
-	add		$0x20,m
-	dec		%rcx
-	jnz		.Ldoblock2
-
-	pop		%r13
-	pop		%r12
-	pop		%rbx
-	ret
-ENDPROC(poly1305_2block_sse2)
diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
new file mode 100644
index 000000000000..7a6b5380a46f
--- /dev/null
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
@@ -0,0 +1,4265 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#
+# Copyright (C) 2017-2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+# Copyright (C) 2017-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+# Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
+#
+# This code is taken from the OpenSSL project but the author, Andy Polyakov,
+# has relicensed it under the licenses specified in the SPDX header above.
+# The original headers, including the original license headers, are
+# included below for completeness.
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements Poly1305 hash for x86_64.
+#
+# March 2015
+#
+# Initial release.
+#
+# December 2016
+#
+# Add AVX512F+VL+BW code path.
+#
+# November 2017
+#
+# Convert AVX512F+VL+BW code path to pure AVX512F, so that it can be
+# executed even on Knights Landing. Trigger for modification was
+# observation that AVX512 code paths can negatively affect overall
+# Skylake-X system performance. Since we are likely to suppress
+# AVX512F capability flag [at least on Skylake-X], conversion serves
+# as kind of "investment protection". Note that next *lake processor,
+# Cannonlake, has AVX512IFMA code path to execute...
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone,
+# measured with rdtsc at fixed clock frequency.
+#
+#		IALU/gcc-4.8(*)	AVX(**)		AVX2	AVX-512
+# P4		4.46/+120%	-
+# Core 2	2.41/+90%	-
+# Westmere	1.88/+120%	-
+# Sandy Bridge	1.39/+140%	1.10
+# Haswell	1.14/+175%	1.11		0.65
+# Skylake[-X]	1.13/+120%	0.96		0.51	[0.35]
+# Silvermont	2.83/+95%	-
+# Knights L	3.60/?		1.65		1.10	0.41(***)
+# Goldmont	1.70/+180%	-
+# VIA Nano	1.82/+150%	-
+# Sledgehammer	1.38/+160%	-
+# Bulldozer	2.30/+130%	0.97
+# Ryzen		1.15/+200%	1.08		1.18
+#
+# (*)	improvement coefficients relative to clang are more modest and
+#	are ~50% on most processors, in both cases we are comparing to
+#	__int128 code;
+# (**)	SSE2 implementation was attempted, but among non-AVX processors
+#	it was faster than integer-only code only on older Intel P4 and
+#	Core processors, 50-30%, less newer processor is, but slower on
+#	contemporary ones, for example almost 2x slower on Atom, and as
+#	former are naturally disappearing, SSE2 is deemed unnecessary;
+# (***)	strangely enough performance seems to vary from core to core,
+#	listed result is best case;
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+$kernel=0; $kernel=1 if (!$flavour && !$output);
+
+if (!$kernel) {
+	$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+	( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+	( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+	die "can't locate x86_64-xlate.pl";
+
+	open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+	*STDOUT=*OUT;
+
+	if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+	    =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+		$avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25);
+	}
+
+	if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+	    `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) {
+		$avx = ($1>=2.09) + ($1>=2.10) + ($1>=2.12);
+		$avx += 1 if ($1==2.11 && $2>=8);
+	}
+
+	if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+	    `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+		$avx = ($1>=10) + ($1>=11);
+	}
+
+	if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
+		$avx = ($2>=3.0) + ($2>3.0);
+	}
+} else {
+	$avx = 4; # The kernel uses ifdefs for this.
+}
+
+sub declare_function() {
+	my ($name, $align, $nargs) = @_;
+	if($kernel) {
+		$code .= ".align $align\n";
+		$code .= "SYM_FUNC_START($name)\n";
+		$code .= ".L$name:\n";
+	} else {
+		$code .= ".globl	$name\n";
+		$code .= ".type	$name,\@function,$nargs\n";
+		$code .= ".align	$align\n";
+		$code .= "$name:\n";
+	}
+}
+
+sub end_function() {
+	my ($name) = @_;
+	if($kernel) {
+		$code .= "SYM_FUNC_END($name)\n";
+	} else {
+		$code .= ".size   $name,.-$name\n";
+	}
+}
+
+$code.=<<___ if $kernel;
+#include <linux/linkage.h>
+___
+
+if ($avx) {
+$code.=<<___ if $kernel;
+.section .rodata
+___
+$code.=<<___;
+.align	64
+.Lconst:
+.Lmask24:
+.long	0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
+.L129:
+.long	`1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0
+.Lmask26:
+.long	0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
+.Lpermd_avx2:
+.long	2,2,2,3,2,0,2,1
+.Lpermd_avx512:
+.long	0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
+
+.L2_44_inp_permd:
+.long	0,1,1,2,2,3,7,7
+.L2_44_inp_shift:
+.quad	0,12,24,64
+.L2_44_mask:
+.quad	0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
+.L2_44_shift_rgt:
+.quad	44,44,42,64
+.L2_44_shift_lft:
+.quad	8,8,10,64
+
+.align	64
+.Lx_mask44:
+.quad	0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
+.quad	0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
+.Lx_mask42:
+.quad	0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
+.quad	0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
+___
+}
+$code.=<<___ if (!$kernel);
+.asciz	"Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+.align	16
+___
+
+my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx");
+my ($mac,$nonce)=($inp,$len);	# *_emit arguments
+my ($d1,$d2,$d3, $r0,$r1,$s1)=("%r8","%r9","%rdi","%r11","%r12","%r13");
+my ($h0,$h1,$h2)=("%r14","%rbx","%r10");
+
+sub poly1305_iteration {
+# input:	copy of $r1 in %rax, $h0-$h2, $r0-$r1
+# output:	$h0-$h2 *= $r0-$r1
+$code.=<<___;
+	mulq	$h0			# h0*r1
+	mov	%rax,$d2
+	 mov	$r0,%rax
+	mov	%rdx,$d3
+
+	mulq	$h0			# h0*r0
+	mov	%rax,$h0		# future $h0
+	 mov	$r0,%rax
+	mov	%rdx,$d1
+
+	mulq	$h1			# h1*r0
+	add	%rax,$d2
+	 mov	$s1,%rax
+	adc	%rdx,$d3
+
+	mulq	$h1			# h1*s1
+	 mov	$h2,$h1			# borrow $h1
+	add	%rax,$h0
+	adc	%rdx,$d1
+
+	imulq	$s1,$h1			# h2*s1
+	add	$h1,$d2
+	 mov	$d1,$h1
+	adc	\$0,$d3
+
+	imulq	$r0,$h2			# h2*r0
+	add	$d2,$h1
+	mov	\$-4,%rax		# mask value
+	adc	$h2,$d3
+
+	and	$d3,%rax		# last reduction step
+	mov	$d3,$h2
+	shr	\$2,$d3
+	and	\$3,$h2
+	add	$d3,%rax
+	add	%rax,$h0
+	adc	\$0,$h1
+	adc	\$0,$h2
+___
+}
+
+########################################################################
+# Layout of opaque area is following.
+#
+#	unsigned __int64 h[3];		# current hash value base 2^64
+#	unsigned __int64 r[2];		# key value base 2^64
+
+$code.=<<___;
+.text
+___
+$code.=<<___ if (!$kernel);
+.extern	OPENSSL_ia32cap_P
+
+.globl	poly1305_init_x86_64
+.hidden	poly1305_init_x86_64
+.globl	poly1305_blocks_x86_64
+.hidden	poly1305_blocks_x86_64
+.globl	poly1305_emit_x86_64
+.hidden	poly1305_emit_x86_64
+___
+&declare_function("poly1305_init_x86_64", 32, 3);
+$code.=<<___;
+	xor	%rax,%rax
+	mov	%rax,0($ctx)		# initialize hash value
+	mov	%rax,8($ctx)
+	mov	%rax,16($ctx)
+
+	cmp	\$0,$inp
+	je	.Lno_key
+___
+$code.=<<___ if (!$kernel);
+	lea	poly1305_blocks_x86_64(%rip),%r10
+	lea	poly1305_emit_x86_64(%rip),%r11
+___
+$code.=<<___	if (!$kernel && $avx);
+	mov	OPENSSL_ia32cap_P+4(%rip),%r9
+	lea	poly1305_blocks_avx(%rip),%rax
+	lea	poly1305_emit_avx(%rip),%rcx
+	bt	\$`60-32`,%r9		# AVX?
+	cmovc	%rax,%r10
+	cmovc	%rcx,%r11
+___
+$code.=<<___	if (!$kernel && $avx>1);
+	lea	poly1305_blocks_avx2(%rip),%rax
+	bt	\$`5+32`,%r9		# AVX2?
+	cmovc	%rax,%r10
+___
+$code.=<<___	if (!$kernel && $avx>3);
+	mov	\$`(1<<31|1<<21|1<<16)`,%rax
+	shr	\$32,%r9
+	and	%rax,%r9
+	cmp	%rax,%r9
+	je	.Linit_base2_44
+___
+$code.=<<___;
+	mov	\$0x0ffffffc0fffffff,%rax
+	mov	\$0x0ffffffc0ffffffc,%rcx
+	and	0($inp),%rax
+	and	8($inp),%rcx
+	mov	%rax,24($ctx)
+	mov	%rcx,32($ctx)
+___
+$code.=<<___	if (!$kernel && $flavour !~ /elf32/);
+	mov	%r10,0(%rdx)
+	mov	%r11,8(%rdx)
+___
+$code.=<<___	if (!$kernel && $flavour =~ /elf32/);
+	mov	%r10d,0(%rdx)
+	mov	%r11d,4(%rdx)
+___
+$code.=<<___;
+	mov	\$1,%eax
+.Lno_key:
+	ret
+___
+&end_function("poly1305_init_x86_64");
+
+&declare_function("poly1305_blocks_x86_64", 32, 4);
+$code.=<<___;
+.cfi_startproc
+.Lblocks:
+	shr	\$4,$len
+	jz	.Lno_data		# too short
+
+	push	%rbx
+.cfi_push	%rbx
+	push	%r12
+.cfi_push	%r12
+	push	%r13
+.cfi_push	%r13
+	push	%r14
+.cfi_push	%r14
+	push	%r15
+.cfi_push	%r15
+	push	$ctx
+.cfi_push	$ctx
+.Lblocks_body:
+
+	mov	$len,%r15		# reassign $len
+
+	mov	24($ctx),$r0		# load r
+	mov	32($ctx),$s1
+
+	mov	0($ctx),$h0		# load hash value
+	mov	8($ctx),$h1
+	mov	16($ctx),$h2
+
+	mov	$s1,$r1
+	shr	\$2,$s1
+	mov	$r1,%rax
+	add	$r1,$s1			# s1 = r1 + (r1 >> 2)
+	jmp	.Loop
+
+.align	32
+.Loop:
+	add	0($inp),$h0		# accumulate input
+	adc	8($inp),$h1
+	lea	16($inp),$inp
+	adc	$padbit,$h2
+___
+
+	&poly1305_iteration();
+
+$code.=<<___;
+	mov	$r1,%rax
+	dec	%r15			# len-=16
+	jnz	.Loop
+
+	mov	0(%rsp),$ctx
+.cfi_restore	$ctx
+
+	mov	$h0,0($ctx)		# store hash value
+	mov	$h1,8($ctx)
+	mov	$h2,16($ctx)
+
+	mov	8(%rsp),%r15
+.cfi_restore	%r15
+	mov	16(%rsp),%r14
+.cfi_restore	%r14
+	mov	24(%rsp),%r13
+.cfi_restore	%r13
+	mov	32(%rsp),%r12
+.cfi_restore	%r12
+	mov	40(%rsp),%rbx
+.cfi_restore	%rbx
+	lea	48(%rsp),%rsp
+.cfi_adjust_cfa_offset	-48
+.Lno_data:
+.Lblocks_epilogue:
+	ret
+.cfi_endproc
+___
+&end_function("poly1305_blocks_x86_64");
+
+&declare_function("poly1305_emit_x86_64", 32, 3);
+$code.=<<___;
+.Lemit:
+	mov	0($ctx),%r8	# load hash value
+	mov	8($ctx),%r9
+	mov	16($ctx),%r10
+
+	mov	%r8,%rax
+	add	\$5,%r8		# compare to modulus
+	mov	%r9,%rcx
+	adc	\$0,%r9
+	adc	\$0,%r10
+	shr	\$2,%r10	# did 130-bit value overflow?
+	cmovnz	%r8,%rax
+	cmovnz	%r9,%rcx
+
+	add	0($nonce),%rax	# accumulate nonce
+	adc	8($nonce),%rcx
+	mov	%rax,0($mac)	# write result
+	mov	%rcx,8($mac)
+
+	ret
+___
+&end_function("poly1305_emit_x86_64");
+if ($avx) {
+
+if($kernel) {
+	$code .= "#ifdef CONFIG_AS_AVX\n";
+}
+
+########################################################################
+# Layout of opaque area is following.
+#
+#	unsigned __int32 h[5];		# current hash value base 2^26
+#	unsigned __int32 is_base2_26;
+#	unsigned __int64 r[2];		# key value base 2^64
+#	unsigned __int64 pad;
+#	struct { unsigned __int32 r^2, r^1, r^4, r^3; } r[9];
+#
+# where r^n are base 2^26 digits of degrees of multiplier key. There are
+# 5 digits, but last four are interleaved with multiples of 5, totalling
+# in 9 elements: r0, r1, 5*r1, r2, 5*r2, r3, 5*r3, r4, 5*r4.
+
+my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
+    map("%xmm$_",(0..15));
+
+$code.=<<___;
+.type	__poly1305_block,\@abi-omnipotent
+.align	32
+__poly1305_block:
+	push $ctx
+___
+	&poly1305_iteration();
+$code.=<<___;
+	pop $ctx
+	ret
+.size	__poly1305_block,.-__poly1305_block
+
+.type	__poly1305_init_avx,\@abi-omnipotent
+.align	32
+__poly1305_init_avx:
+	push %rbp
+	mov %rsp,%rbp
+	mov	$r0,$h0
+	mov	$r1,$h1
+	xor	$h2,$h2
+
+	lea	48+64($ctx),$ctx	# size optimization
+
+	mov	$r1,%rax
+	call	__poly1305_block	# r^2
+
+	mov	\$0x3ffffff,%eax	# save interleaved r^2 and r base 2^26
+	mov	\$0x3ffffff,%edx
+	mov	$h0,$d1
+	and	$h0#d,%eax
+	mov	$r0,$d2
+	and	$r0#d,%edx
+	mov	%eax,`16*0+0-64`($ctx)
+	shr	\$26,$d1
+	mov	%edx,`16*0+4-64`($ctx)
+	shr	\$26,$d2
+
+	mov	\$0x3ffffff,%eax
+	mov	\$0x3ffffff,%edx
+	and	$d1#d,%eax
+	and	$d2#d,%edx
+	mov	%eax,`16*1+0-64`($ctx)
+	lea	(%rax,%rax,4),%eax	# *5
+	mov	%edx,`16*1+4-64`($ctx)
+	lea	(%rdx,%rdx,4),%edx	# *5
+	mov	%eax,`16*2+0-64`($ctx)
+	shr	\$26,$d1
+	mov	%edx,`16*2+4-64`($ctx)
+	shr	\$26,$d2
+
+	mov	$h1,%rax
+	mov	$r1,%rdx
+	shl	\$12,%rax
+	shl	\$12,%rdx
+	or	$d1,%rax
+	or	$d2,%rdx
+	and	\$0x3ffffff,%eax
+	and	\$0x3ffffff,%edx
+	mov	%eax,`16*3+0-64`($ctx)
+	lea	(%rax,%rax,4),%eax	# *5
+	mov	%edx,`16*3+4-64`($ctx)
+	lea	(%rdx,%rdx,4),%edx	# *5
+	mov	%eax,`16*4+0-64`($ctx)
+	mov	$h1,$d1
+	mov	%edx,`16*4+4-64`($ctx)
+	mov	$r1,$d2
+
+	mov	\$0x3ffffff,%eax
+	mov	\$0x3ffffff,%edx
+	shr	\$14,$d1
+	shr	\$14,$d2
+	and	$d1#d,%eax
+	and	$d2#d,%edx
+	mov	%eax,`16*5+0-64`($ctx)
+	lea	(%rax,%rax,4),%eax	# *5
+	mov	%edx,`16*5+4-64`($ctx)
+	lea	(%rdx,%rdx,4),%edx	# *5
+	mov	%eax,`16*6+0-64`($ctx)
+	shr	\$26,$d1
+	mov	%edx,`16*6+4-64`($ctx)
+	shr	\$26,$d2
+
+	mov	$h2,%rax
+	shl	\$24,%rax
+	or	%rax,$d1
+	mov	$d1#d,`16*7+0-64`($ctx)
+	lea	($d1,$d1,4),$d1		# *5
+	mov	$d2#d,`16*7+4-64`($ctx)
+	lea	($d2,$d2,4),$d2		# *5
+	mov	$d1#d,`16*8+0-64`($ctx)
+	mov	$d2#d,`16*8+4-64`($ctx)
+
+	mov	$r1,%rax
+	call	__poly1305_block	# r^3
+
+	mov	\$0x3ffffff,%eax	# save r^3 base 2^26
+	mov	$h0,$d1
+	and	$h0#d,%eax
+	shr	\$26,$d1
+	mov	%eax,`16*0+12-64`($ctx)
+
+	mov	\$0x3ffffff,%edx
+	and	$d1#d,%edx
+	mov	%edx,`16*1+12-64`($ctx)
+	lea	(%rdx,%rdx,4),%edx	# *5
+	shr	\$26,$d1
+	mov	%edx,`16*2+12-64`($ctx)
+
+	mov	$h1,%rax
+	shl	\$12,%rax
+	or	$d1,%rax
+	and	\$0x3ffffff,%eax
+	mov	%eax,`16*3+12-64`($ctx)
+	lea	(%rax,%rax,4),%eax	# *5
+	mov	$h1,$d1
+	mov	%eax,`16*4+12-64`($ctx)
+
+	mov	\$0x3ffffff,%edx
+	shr	\$14,$d1
+	and	$d1#d,%edx
+	mov	%edx,`16*5+12-64`($ctx)
+	lea	(%rdx,%rdx,4),%edx	# *5
+	shr	\$26,$d1
+	mov	%edx,`16*6+12-64`($ctx)
+
+	mov	$h2,%rax
+	shl	\$24,%rax
+	or	%rax,$d1
+	mov	$d1#d,`16*7+12-64`($ctx)
+	lea	($d1,$d1,4),$d1		# *5
+	mov	$d1#d,`16*8+12-64`($ctx)
+
+	mov	$r1,%rax
+	call	__poly1305_block	# r^4
+
+	mov	\$0x3ffffff,%eax	# save r^4 base 2^26
+	mov	$h0,$d1
+	and	$h0#d,%eax
+	shr	\$26,$d1
+	mov	%eax,`16*0+8-64`($ctx)
+
+	mov	\$0x3ffffff,%edx
+	and	$d1#d,%edx
+	mov	%edx,`16*1+8-64`($ctx)
+	lea	(%rdx,%rdx,4),%edx	# *5
+	shr	\$26,$d1
+	mov	%edx,`16*2+8-64`($ctx)
+
+	mov	$h1,%rax
+	shl	\$12,%rax
+	or	$d1,%rax
+	and	\$0x3ffffff,%eax
+	mov	%eax,`16*3+8-64`($ctx)
+	lea	(%rax,%rax,4),%eax	# *5
+	mov	$h1,$d1
+	mov	%eax,`16*4+8-64`($ctx)
+
+	mov	\$0x3ffffff,%edx
+	shr	\$14,$d1
+	and	$d1#d,%edx
+	mov	%edx,`16*5+8-64`($ctx)
+	lea	(%rdx,%rdx,4),%edx	# *5
+	shr	\$26,$d1
+	mov	%edx,`16*6+8-64`($ctx)
+
+	mov	$h2,%rax
+	shl	\$24,%rax
+	or	%rax,$d1
+	mov	$d1#d,`16*7+8-64`($ctx)
+	lea	($d1,$d1,4),$d1		# *5
+	mov	$d1#d,`16*8+8-64`($ctx)
+
+	lea	-48-64($ctx),$ctx	# size [de-]optimization
+	pop %rbp
+	ret
+.size	__poly1305_init_avx,.-__poly1305_init_avx
+___
+
+&declare_function("poly1305_blocks_avx", 32, 4);
+$code.=<<___;
+.cfi_startproc
+	mov	20($ctx),%r8d		# is_base2_26
+	cmp	\$128,$len
+	jae	.Lblocks_avx
+	test	%r8d,%r8d
+	jz	.Lblocks
+
+.Lblocks_avx:
+	and	\$-16,$len
+	jz	.Lno_data_avx
+
+	vzeroupper
+
+	test	%r8d,%r8d
+	jz	.Lbase2_64_avx
+
+	test	\$31,$len
+	jz	.Leven_avx
+
+	push	%rbp
+.cfi_push	%rbp
+	mov 	%rsp,%rbp
+	push	%rbx
+.cfi_push	%rbx
+	push	%r12
+.cfi_push	%r12
+	push	%r13
+.cfi_push	%r13
+	push	%r14
+.cfi_push	%r14
+	push	%r15
+.cfi_push	%r15
+.Lblocks_avx_body:
+
+	mov	$len,%r15		# reassign $len
+
+	mov	0($ctx),$d1		# load hash value
+	mov	8($ctx),$d2
+	mov	16($ctx),$h2#d
+
+	mov	24($ctx),$r0		# load r
+	mov	32($ctx),$s1
+
+	################################# base 2^26 -> base 2^64
+	mov	$d1#d,$h0#d
+	and	\$`-1*(1<<31)`,$d1
+	mov	$d2,$r1			# borrow $r1
+	mov	$d2#d,$h1#d
+	and	\$`-1*(1<<31)`,$d2
+
+	shr	\$6,$d1
+	shl	\$52,$r1
+	add	$d1,$h0
+	shr	\$12,$h1
+	shr	\$18,$d2
+	add	$r1,$h0
+	adc	$d2,$h1
+
+	mov	$h2,$d1
+	shl	\$40,$d1
+	shr	\$24,$h2
+	add	$d1,$h1
+	adc	\$0,$h2			# can be partially reduced...
+
+	mov	\$-4,$d2		# ... so reduce
+	mov	$h2,$d1
+	and	$h2,$d2
+	shr	\$2,$d1
+	and	\$3,$h2
+	add	$d2,$d1			# =*5
+	add	$d1,$h0
+	adc	\$0,$h1
+	adc	\$0,$h2
+
+	mov	$s1,$r1
+	mov	$s1,%rax
+	shr	\$2,$s1
+	add	$r1,$s1			# s1 = r1 + (r1 >> 2)
+
+	add	0($inp),$h0		# accumulate input
+	adc	8($inp),$h1
+	lea	16($inp),$inp
+	adc	$padbit,$h2
+
+	call	__poly1305_block
+
+	test	$padbit,$padbit		# if $padbit is zero,
+	jz	.Lstore_base2_64_avx	# store hash in base 2^64 format
+
+	################################# base 2^64 -> base 2^26
+	mov	$h0,%rax
+	mov	$h0,%rdx
+	shr	\$52,$h0
+	mov	$h1,$r0
+	mov	$h1,$r1
+	shr	\$26,%rdx
+	and	\$0x3ffffff,%rax	# h[0]
+	shl	\$12,$r0
+	and	\$0x3ffffff,%rdx	# h[1]
+	shr	\$14,$h1
+	or	$r0,$h0
+	shl	\$24,$h2
+	and	\$0x3ffffff,$h0		# h[2]
+	shr	\$40,$r1
+	and	\$0x3ffffff,$h1		# h[3]
+	or	$r1,$h2			# h[4]
+
+	sub	\$16,%r15
+	jz	.Lstore_base2_26_avx
+
+	vmovd	%rax#d,$H0
+	vmovd	%rdx#d,$H1
+	vmovd	$h0#d,$H2
+	vmovd	$h1#d,$H3
+	vmovd	$h2#d,$H4
+	jmp	.Lproceed_avx
+
+.align	32
+.Lstore_base2_64_avx:
+	mov	$h0,0($ctx)
+	mov	$h1,8($ctx)
+	mov	$h2,16($ctx)		# note that is_base2_26 is zeroed
+	jmp	.Ldone_avx
+
+.align	16
+.Lstore_base2_26_avx:
+	mov	%rax#d,0($ctx)		# store hash value base 2^26
+	mov	%rdx#d,4($ctx)
+	mov	$h0#d,8($ctx)
+	mov	$h1#d,12($ctx)
+	mov	$h2#d,16($ctx)
+.align	16
+.Ldone_avx:
+	pop 		%r15
+.cfi_restore	%r15
+	pop 		%r14
+.cfi_restore	%r14
+	pop 		%r13
+.cfi_restore	%r13
+	pop 		%r12
+.cfi_restore	%r12
+	pop 		%rbx
+.cfi_restore	%rbx
+	pop 		%rbp
+.cfi_restore	%rbp
+.Lno_data_avx:
+.Lblocks_avx_epilogue:
+	ret
+.cfi_endproc
+
+.align	32
+.Lbase2_64_avx:
+.cfi_startproc
+	push	%rbp
+.cfi_push	%rbp
+	mov 	%rsp,%rbp
+	push	%rbx
+.cfi_push	%rbx
+	push	%r12
+.cfi_push	%r12
+	push	%r13
+.cfi_push	%r13
+	push	%r14
+.cfi_push	%r14
+	push	%r15
+.cfi_push	%r15
+.Lbase2_64_avx_body:
+
+	mov	$len,%r15		# reassign $len
+
+	mov	24($ctx),$r0		# load r
+	mov	32($ctx),$s1
+
+	mov	0($ctx),$h0		# load hash value
+	mov	8($ctx),$h1
+	mov	16($ctx),$h2#d
+
+	mov	$s1,$r1
+	mov	$s1,%rax
+	shr	\$2,$s1
+	add	$r1,$s1			# s1 = r1 + (r1 >> 2)
+
+	test	\$31,$len
+	jz	.Linit_avx
+
+	add	0($inp),$h0		# accumulate input
+	adc	8($inp),$h1
+	lea	16($inp),$inp
+	adc	$padbit,$h2
+	sub	\$16,%r15
+
+	call	__poly1305_block
+
+.Linit_avx:
+	################################# base 2^64 -> base 2^26
+	mov	$h0,%rax
+	mov	$h0,%rdx
+	shr	\$52,$h0
+	mov	$h1,$d1
+	mov	$h1,$d2
+	shr	\$26,%rdx
+	and	\$0x3ffffff,%rax	# h[0]
+	shl	\$12,$d1
+	and	\$0x3ffffff,%rdx	# h[1]
+	shr	\$14,$h1
+	or	$d1,$h0
+	shl	\$24,$h2
+	and	\$0x3ffffff,$h0		# h[2]
+	shr	\$40,$d2
+	and	\$0x3ffffff,$h1		# h[3]
+	or	$d2,$h2			# h[4]
+
+	vmovd	%rax#d,$H0
+	vmovd	%rdx#d,$H1
+	vmovd	$h0#d,$H2
+	vmovd	$h1#d,$H3
+	vmovd	$h2#d,$H4
+	movl	\$1,20($ctx)		# set is_base2_26
+
+	call	__poly1305_init_avx
+
+.Lproceed_avx:
+	mov	%r15,$len
+	pop 		%r15
+.cfi_restore	%r15
+	pop 		%r14
+.cfi_restore	%r14
+	pop 		%r13
+.cfi_restore	%r13
+	pop 		%r12
+.cfi_restore	%r12
+	pop 		%rbx
+.cfi_restore	%rbx
+	pop 		%rbp
+.cfi_restore	%rbp
+.Lbase2_64_avx_epilogue:
+	jmp	.Ldo_avx
+.cfi_endproc
+
+.align	32
+.Leven_avx:
+.cfi_startproc
+	vmovd		4*0($ctx),$H0		# load hash value
+	vmovd		4*1($ctx),$H1
+	vmovd		4*2($ctx),$H2
+	vmovd		4*3($ctx),$H3
+	vmovd		4*4($ctx),$H4
+
+.Ldo_avx:
+___
+$code.=<<___	if (!$win64);
+	lea		8(%rsp),%r10
+.cfi_def_cfa_register	%r10
+	and		\$-32,%rsp
+	sub		\$-8,%rsp
+	lea		-0x58(%rsp),%r11
+	sub		\$0x178,%rsp
+___
+$code.=<<___	if ($win64);
+	lea		-0xf8(%rsp),%r11
+	sub		\$0x218,%rsp
+	vmovdqa		%xmm6,0x50(%r11)
+	vmovdqa		%xmm7,0x60(%r11)
+	vmovdqa		%xmm8,0x70(%r11)
+	vmovdqa		%xmm9,0x80(%r11)
+	vmovdqa		%xmm10,0x90(%r11)
+	vmovdqa		%xmm11,0xa0(%r11)
+	vmovdqa		%xmm12,0xb0(%r11)
+	vmovdqa		%xmm13,0xc0(%r11)
+	vmovdqa		%xmm14,0xd0(%r11)
+	vmovdqa		%xmm15,0xe0(%r11)
+.Ldo_avx_body:
+___
+$code.=<<___;
+	sub		\$64,$len
+	lea		-32($inp),%rax
+	cmovc		%rax,$inp
+
+	vmovdqu		`16*3`($ctx),$D4	# preload r0^2
+	lea		`16*3+64`($ctx),$ctx	# size optimization
+	lea		.Lconst(%rip),%rcx
+
+	################################################################
+	# load input
+	vmovdqu		16*2($inp),$T0
+	vmovdqu		16*3($inp),$T1
+	vmovdqa		64(%rcx),$MASK		# .Lmask26
+
+	vpsrldq		\$6,$T0,$T2		# splat input
+	vpsrldq		\$6,$T1,$T3
+	vpunpckhqdq	$T1,$T0,$T4		# 4
+	vpunpcklqdq	$T1,$T0,$T0		# 0:1
+	vpunpcklqdq	$T3,$T2,$T3		# 2:3
+
+	vpsrlq		\$40,$T4,$T4		# 4
+	vpsrlq		\$26,$T0,$T1
+	vpand		$MASK,$T0,$T0		# 0
+	vpsrlq		\$4,$T3,$T2
+	vpand		$MASK,$T1,$T1		# 1
+	vpsrlq		\$30,$T3,$T3
+	vpand		$MASK,$T2,$T2		# 2
+	vpand		$MASK,$T3,$T3		# 3
+	vpor		32(%rcx),$T4,$T4	# padbit, yes, always
+
+	jbe		.Lskip_loop_avx
+
+	# expand and copy pre-calculated table to stack
+	vmovdqu		`16*1-64`($ctx),$D1
+	vmovdqu		`16*2-64`($ctx),$D2
+	vpshufd		\$0xEE,$D4,$D3		# 34xx -> 3434
+	vpshufd		\$0x44,$D4,$D0		# xx12 -> 1212
+	vmovdqa		$D3,-0x90(%r11)
+	vmovdqa		$D0,0x00(%rsp)
+	vpshufd		\$0xEE,$D1,$D4
+	vmovdqu		`16*3-64`($ctx),$D0
+	vpshufd		\$0x44,$D1,$D1
+	vmovdqa		$D4,-0x80(%r11)
+	vmovdqa		$D1,0x10(%rsp)
+	vpshufd		\$0xEE,$D2,$D3
+	vmovdqu		`16*4-64`($ctx),$D1
+	vpshufd		\$0x44,$D2,$D2
+	vmovdqa		$D3,-0x70(%r11)
+	vmovdqa		$D2,0x20(%rsp)
+	vpshufd		\$0xEE,$D0,$D4
+	vmovdqu		`16*5-64`($ctx),$D2
+	vpshufd		\$0x44,$D0,$D0
+	vmovdqa		$D4,-0x60(%r11)
+	vmovdqa		$D0,0x30(%rsp)
+	vpshufd		\$0xEE,$D1,$D3
+	vmovdqu		`16*6-64`($ctx),$D0
+	vpshufd		\$0x44,$D1,$D1
+	vmovdqa		$D3,-0x50(%r11)
+	vmovdqa		$D1,0x40(%rsp)
+	vpshufd		\$0xEE,$D2,$D4
+	vmovdqu		`16*7-64`($ctx),$D1
+	vpshufd		\$0x44,$D2,$D2
+	vmovdqa		$D4,-0x40(%r11)
+	vmovdqa		$D2,0x50(%rsp)
+	vpshufd		\$0xEE,$D0,$D3
+	vmovdqu		`16*8-64`($ctx),$D2
+	vpshufd		\$0x44,$D0,$D0
+	vmovdqa		$D3,-0x30(%r11)
+	vmovdqa		$D0,0x60(%rsp)
+	vpshufd		\$0xEE,$D1,$D4
+	vpshufd		\$0x44,$D1,$D1
+	vmovdqa		$D4,-0x20(%r11)
+	vmovdqa		$D1,0x70(%rsp)
+	vpshufd		\$0xEE,$D2,$D3
+	 vmovdqa	0x00(%rsp),$D4		# preload r0^2
+	vpshufd		\$0x44,$D2,$D2
+	vmovdqa		$D3,-0x10(%r11)
+	vmovdqa		$D2,0x80(%rsp)
+
+	jmp		.Loop_avx
+
+.align	32
+.Loop_avx:
+	################################################################
+	# ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+	# ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+	#   \___________________/
+	# ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+	# ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+	#   \___________________/ \____________________/
+	#
+	# Note that we start with inp[2:3]*r^2. This is because it
+	# doesn't depend on reduction in previous iteration.
+	################################################################
+	# d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+	# d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	# d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	# d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	# d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+	#
+	# though note that $Tx and $Hx are "reversed" in this section,
+	# and $D4 is preloaded with r0^2...
+
+	vpmuludq	$T0,$D4,$D0		# d0 = h0*r0
+	vpmuludq	$T1,$D4,$D1		# d1 = h1*r0
+	  vmovdqa	$H2,0x20(%r11)				# offload hash
+	vpmuludq	$T2,$D4,$D2		# d3 = h2*r0
+	 vmovdqa	0x10(%rsp),$H2		# r1^2
+	vpmuludq	$T3,$D4,$D3		# d3 = h3*r0
+	vpmuludq	$T4,$D4,$D4		# d4 = h4*r0
+
+	  vmovdqa	$H0,0x00(%r11)				#
+	vpmuludq	0x20(%rsp),$T4,$H0	# h4*s1
+	  vmovdqa	$H1,0x10(%r11)				#
+	vpmuludq	$T3,$H2,$H1		# h3*r1
+	vpaddq		$H0,$D0,$D0		# d0 += h4*s1
+	vpaddq		$H1,$D4,$D4		# d4 += h3*r1
+	  vmovdqa	$H3,0x30(%r11)				#
+	vpmuludq	$T2,$H2,$H0		# h2*r1
+	vpmuludq	$T1,$H2,$H1		# h1*r1
+	vpaddq		$H0,$D3,$D3		# d3 += h2*r1
+	 vmovdqa	0x30(%rsp),$H3		# r2^2
+	vpaddq		$H1,$D2,$D2		# d2 += h1*r1
+	  vmovdqa	$H4,0x40(%r11)				#
+	vpmuludq	$T0,$H2,$H2		# h0*r1
+	 vpmuludq	$T2,$H3,$H0		# h2*r2
+	vpaddq		$H2,$D1,$D1		# d1 += h0*r1
+
+	 vmovdqa	0x40(%rsp),$H4		# s2^2
+	vpaddq		$H0,$D4,$D4		# d4 += h2*r2
+	vpmuludq	$T1,$H3,$H1		# h1*r2
+	vpmuludq	$T0,$H3,$H3		# h0*r2
+	vpaddq		$H1,$D3,$D3		# d3 += h1*r2
+	 vmovdqa	0x50(%rsp),$H2		# r3^2
+	vpaddq		$H3,$D2,$D2		# d2 += h0*r2
+	vpmuludq	$T4,$H4,$H0		# h4*s2
+	vpmuludq	$T3,$H4,$H4		# h3*s2
+	vpaddq		$H0,$D1,$D1		# d1 += h4*s2
+	 vmovdqa	0x60(%rsp),$H3		# s3^2
+	vpaddq		$H4,$D0,$D0		# d0 += h3*s2
+
+	 vmovdqa	0x80(%rsp),$H4		# s4^2
+	vpmuludq	$T1,$H2,$H1		# h1*r3
+	vpmuludq	$T0,$H2,$H2		# h0*r3
+	vpaddq		$H1,$D4,$D4		# d4 += h1*r3
+	vpaddq		$H2,$D3,$D3		# d3 += h0*r3
+	vpmuludq	$T4,$H3,$H0		# h4*s3
+	vpmuludq	$T3,$H3,$H1		# h3*s3
+	vpaddq		$H0,$D2,$D2		# d2 += h4*s3
+	 vmovdqu	16*0($inp),$H0				# load input
+	vpaddq		$H1,$D1,$D1		# d1 += h3*s3
+	vpmuludq	$T2,$H3,$H3		# h2*s3
+	 vpmuludq	$T2,$H4,$T2		# h2*s4
+	vpaddq		$H3,$D0,$D0		# d0 += h2*s3
+
+	 vmovdqu	16*1($inp),$H1				#
+	vpaddq		$T2,$D1,$D1		# d1 += h2*s4
+	vpmuludq	$T3,$H4,$T3		# h3*s4
+	vpmuludq	$T4,$H4,$T4		# h4*s4
+	 vpsrldq	\$6,$H0,$H2				# splat input
+	vpaddq		$T3,$D2,$D2		# d2 += h3*s4
+	vpaddq		$T4,$D3,$D3		# d3 += h4*s4
+	 vpsrldq	\$6,$H1,$H3				#
+	vpmuludq	0x70(%rsp),$T0,$T4	# h0*r4
+	vpmuludq	$T1,$H4,$T0		# h1*s4
+	 vpunpckhqdq	$H1,$H0,$H4		# 4
+	vpaddq		$T4,$D4,$D4		# d4 += h0*r4
+	 vmovdqa	-0x90(%r11),$T4		# r0^4
+	vpaddq		$T0,$D0,$D0		# d0 += h1*s4
+
+	vpunpcklqdq	$H1,$H0,$H0		# 0:1
+	vpunpcklqdq	$H3,$H2,$H3		# 2:3
+
+	#vpsrlq		\$40,$H4,$H4		# 4
+	vpsrldq		\$`40/8`,$H4,$H4	# 4
+	vpsrlq		\$26,$H0,$H1
+	vpand		$MASK,$H0,$H0		# 0
+	vpsrlq		\$4,$H3,$H2
+	vpand		$MASK,$H1,$H1		# 1
+	vpand		0(%rcx),$H4,$H4		# .Lmask24
+	vpsrlq		\$30,$H3,$H3
+	vpand		$MASK,$H2,$H2		# 2
+	vpand		$MASK,$H3,$H3		# 3
+	vpor		32(%rcx),$H4,$H4	# padbit, yes, always
+
+	vpaddq		0x00(%r11),$H0,$H0	# add hash value
+	vpaddq		0x10(%r11),$H1,$H1
+	vpaddq		0x20(%r11),$H2,$H2
+	vpaddq		0x30(%r11),$H3,$H3
+	vpaddq		0x40(%r11),$H4,$H4
+
+	lea		16*2($inp),%rax
+	lea		16*4($inp),$inp
+	sub		\$64,$len
+	cmovc		%rax,$inp
+
+	################################################################
+	# Now we accumulate (inp[0:1]+hash)*r^4
+	################################################################
+	# d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+	# d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	# d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	# d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	# d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+	vpmuludq	$H0,$T4,$T0		# h0*r0
+	vpmuludq	$H1,$T4,$T1		# h1*r0
+	vpaddq		$T0,$D0,$D0
+	vpaddq		$T1,$D1,$D1
+	 vmovdqa	-0x80(%r11),$T2		# r1^4
+	vpmuludq	$H2,$T4,$T0		# h2*r0
+	vpmuludq	$H3,$T4,$T1		# h3*r0
+	vpaddq		$T0,$D2,$D2
+	vpaddq		$T1,$D3,$D3
+	vpmuludq	$H4,$T4,$T4		# h4*r0
+	 vpmuludq	-0x70(%r11),$H4,$T0	# h4*s1
+	vpaddq		$T4,$D4,$D4
+
+	vpaddq		$T0,$D0,$D0		# d0 += h4*s1
+	vpmuludq	$H2,$T2,$T1		# h2*r1
+	vpmuludq	$H3,$T2,$T0		# h3*r1
+	vpaddq		$T1,$D3,$D3		# d3 += h2*r1
+	 vmovdqa	-0x60(%r11),$T3		# r2^4
+	vpaddq		$T0,$D4,$D4		# d4 += h3*r1
+	vpmuludq	$H1,$T2,$T1		# h1*r1
+	vpmuludq	$H0,$T2,$T2		# h0*r1
+	vpaddq		$T1,$D2,$D2		# d2 += h1*r1
+	vpaddq		$T2,$D1,$D1		# d1 += h0*r1
+
+	 vmovdqa	-0x50(%r11),$T4		# s2^4
+	vpmuludq	$H2,$T3,$T0		# h2*r2
+	vpmuludq	$H1,$T3,$T1		# h1*r2
+	vpaddq		$T0,$D4,$D4		# d4 += h2*r2
+	vpaddq		$T1,$D3,$D3		# d3 += h1*r2
+	 vmovdqa	-0x40(%r11),$T2		# r3^4
+	vpmuludq	$H0,$T3,$T3		# h0*r2
+	vpmuludq	$H4,$T4,$T0		# h4*s2
+	vpaddq		$T3,$D2,$D2		# d2 += h0*r2
+	vpaddq		$T0,$D1,$D1		# d1 += h4*s2
+	 vmovdqa	-0x30(%r11),$T3		# s3^4
+	vpmuludq	$H3,$T4,$T4		# h3*s2
+	 vpmuludq	$H1,$T2,$T1		# h1*r3
+	vpaddq		$T4,$D0,$D0		# d0 += h3*s2
+
+	 vmovdqa	-0x10(%r11),$T4		# s4^4
+	vpaddq		$T1,$D4,$D4		# d4 += h1*r3
+	vpmuludq	$H0,$T2,$T2		# h0*r3
+	vpmuludq	$H4,$T3,$T0		# h4*s3
+	vpaddq		$T2,$D3,$D3		# d3 += h0*r3
+	vpaddq		$T0,$D2,$D2		# d2 += h4*s3
+	 vmovdqu	16*2($inp),$T0				# load input
+	vpmuludq	$H3,$T3,$T2		# h3*s3
+	vpmuludq	$H2,$T3,$T3		# h2*s3
+	vpaddq		$T2,$D1,$D1		# d1 += h3*s3
+	 vmovdqu	16*3($inp),$T1				#
+	vpaddq		$T3,$D0,$D0		# d0 += h2*s3
+
+	vpmuludq	$H2,$T4,$H2		# h2*s4
+	vpmuludq	$H3,$T4,$H3		# h3*s4
+	 vpsrldq	\$6,$T0,$T2				# splat input
+	vpaddq		$H2,$D1,$D1		# d1 += h2*s4
+	vpmuludq	$H4,$T4,$H4		# h4*s4
+	 vpsrldq	\$6,$T1,$T3				#
+	vpaddq		$H3,$D2,$H2		# h2 = d2 + h3*s4
+	vpaddq		$H4,$D3,$H3		# h3 = d3 + h4*s4
+	vpmuludq	-0x20(%r11),$H0,$H4	# h0*r4
+	vpmuludq	$H1,$T4,$H0
+	 vpunpckhqdq	$T1,$T0,$T4		# 4
+	vpaddq		$H4,$D4,$H4		# h4 = d4 + h0*r4
+	vpaddq		$H0,$D0,$H0		# h0 = d0 + h1*s4
+
+	vpunpcklqdq	$T1,$T0,$T0		# 0:1
+	vpunpcklqdq	$T3,$T2,$T3		# 2:3
+
+	#vpsrlq		\$40,$T4,$T4		# 4
+	vpsrldq		\$`40/8`,$T4,$T4	# 4
+	vpsrlq		\$26,$T0,$T1
+	 vmovdqa	0x00(%rsp),$D4		# preload r0^2
+	vpand		$MASK,$T0,$T0		# 0
+	vpsrlq		\$4,$T3,$T2
+	vpand		$MASK,$T1,$T1		# 1
+	vpand		0(%rcx),$T4,$T4		# .Lmask24
+	vpsrlq		\$30,$T3,$T3
+	vpand		$MASK,$T2,$T2		# 2
+	vpand		$MASK,$T3,$T3		# 3
+	vpor		32(%rcx),$T4,$T4	# padbit, yes, always
+
+	################################################################
+	# lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+	# and P. Schwabe
+
+	vpsrlq		\$26,$H3,$D3
+	vpand		$MASK,$H3,$H3
+	vpaddq		$D3,$H4,$H4		# h3 -> h4
+
+	vpsrlq		\$26,$H0,$D0
+	vpand		$MASK,$H0,$H0
+	vpaddq		$D0,$D1,$H1		# h0 -> h1
+
+	vpsrlq		\$26,$H4,$D0
+	vpand		$MASK,$H4,$H4
+
+	vpsrlq		\$26,$H1,$D1
+	vpand		$MASK,$H1,$H1
+	vpaddq		$D1,$H2,$H2		# h1 -> h2
+
+	vpaddq		$D0,$H0,$H0
+	vpsllq		\$2,$D0,$D0
+	vpaddq		$D0,$H0,$H0		# h4 -> h0
+
+	vpsrlq		\$26,$H2,$D2
+	vpand		$MASK,$H2,$H2
+	vpaddq		$D2,$H3,$H3		# h2 -> h3
+
+	vpsrlq		\$26,$H0,$D0
+	vpand		$MASK,$H0,$H0
+	vpaddq		$D0,$H1,$H1		# h0 -> h1
+
+	vpsrlq		\$26,$H3,$D3
+	vpand		$MASK,$H3,$H3
+	vpaddq		$D3,$H4,$H4		# h3 -> h4
+
+	ja		.Loop_avx
+
+.Lskip_loop_avx:
+	################################################################
+	# multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+	vpshufd		\$0x10,$D4,$D4		# r0^n, xx12 -> x1x2
+	add		\$32,$len
+	jnz		.Long_tail_avx
+
+	vpaddq		$H2,$T2,$T2
+	vpaddq		$H0,$T0,$T0
+	vpaddq		$H1,$T1,$T1
+	vpaddq		$H3,$T3,$T3
+	vpaddq		$H4,$T4,$T4
+
+.Long_tail_avx:
+	vmovdqa		$H2,0x20(%r11)
+	vmovdqa		$H0,0x00(%r11)
+	vmovdqa		$H1,0x10(%r11)
+	vmovdqa		$H3,0x30(%r11)
+	vmovdqa		$H4,0x40(%r11)
+
+	# d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+	# d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	# d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	# d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	# d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+	vpmuludq	$T2,$D4,$D2		# d2 = h2*r0
+	vpmuludq	$T0,$D4,$D0		# d0 = h0*r0
+	 vpshufd	\$0x10,`16*1-64`($ctx),$H2		# r1^n
+	vpmuludq	$T1,$D4,$D1		# d1 = h1*r0
+	vpmuludq	$T3,$D4,$D3		# d3 = h3*r0
+	vpmuludq	$T4,$D4,$D4		# d4 = h4*r0
+
+	vpmuludq	$T3,$H2,$H0		# h3*r1
+	vpaddq		$H0,$D4,$D4		# d4 += h3*r1
+	 vpshufd	\$0x10,`16*2-64`($ctx),$H3		# s1^n
+	vpmuludq	$T2,$H2,$H1		# h2*r1
+	vpaddq		$H1,$D3,$D3		# d3 += h2*r1
+	 vpshufd	\$0x10,`16*3-64`($ctx),$H4		# r2^n
+	vpmuludq	$T1,$H2,$H0		# h1*r1
+	vpaddq		$H0,$D2,$D2		# d2 += h1*r1
+	vpmuludq	$T0,$H2,$H2		# h0*r1
+	vpaddq		$H2,$D1,$D1		# d1 += h0*r1
+	vpmuludq	$T4,$H3,$H3		# h4*s1
+	vpaddq		$H3,$D0,$D0		# d0 += h4*s1
+
+	 vpshufd	\$0x10,`16*4-64`($ctx),$H2		# s2^n
+	vpmuludq	$T2,$H4,$H1		# h2*r2
+	vpaddq		$H1,$D4,$D4		# d4 += h2*r2
+	vpmuludq	$T1,$H4,$H0		# h1*r2
+	vpaddq		$H0,$D3,$D3		# d3 += h1*r2
+	 vpshufd	\$0x10,`16*5-64`($ctx),$H3		# r3^n
+	vpmuludq	$T0,$H4,$H4		# h0*r2
+	vpaddq		$H4,$D2,$D2		# d2 += h0*r2
+	vpmuludq	$T4,$H2,$H1		# h4*s2
+	vpaddq		$H1,$D1,$D1		# d1 += h4*s2
+	 vpshufd	\$0x10,`16*6-64`($ctx),$H4		# s3^n
+	vpmuludq	$T3,$H2,$H2		# h3*s2
+	vpaddq		$H2,$D0,$D0		# d0 += h3*s2
+
+	vpmuludq	$T1,$H3,$H0		# h1*r3
+	vpaddq		$H0,$D4,$D4		# d4 += h1*r3
+	vpmuludq	$T0,$H3,$H3		# h0*r3
+	vpaddq		$H3,$D3,$D3		# d3 += h0*r3
+	 vpshufd	\$0x10,`16*7-64`($ctx),$H2		# r4^n
+	vpmuludq	$T4,$H4,$H1		# h4*s3
+	vpaddq		$H1,$D2,$D2		# d2 += h4*s3
+	 vpshufd	\$0x10,`16*8-64`($ctx),$H3		# s4^n
+	vpmuludq	$T3,$H4,$H0		# h3*s3
+	vpaddq		$H0,$D1,$D1		# d1 += h3*s3
+	vpmuludq	$T2,$H4,$H4		# h2*s3
+	vpaddq		$H4,$D0,$D0		# d0 += h2*s3
+
+	vpmuludq	$T0,$H2,$H2		# h0*r4
+	vpaddq		$H2,$D4,$D4		# h4 = d4 + h0*r4
+	vpmuludq	$T4,$H3,$H1		# h4*s4
+	vpaddq		$H1,$D3,$D3		# h3 = d3 + h4*s4
+	vpmuludq	$T3,$H3,$H0		# h3*s4
+	vpaddq		$H0,$D2,$D2		# h2 = d2 + h3*s4
+	vpmuludq	$T2,$H3,$H1		# h2*s4
+	vpaddq		$H1,$D1,$D1		# h1 = d1 + h2*s4
+	vpmuludq	$T1,$H3,$H3		# h1*s4
+	vpaddq		$H3,$D0,$D0		# h0 = d0 + h1*s4
+
+	jz		.Lshort_tail_avx
+
+	vmovdqu		16*0($inp),$H0		# load input
+	vmovdqu		16*1($inp),$H1
+
+	vpsrldq		\$6,$H0,$H2		# splat input
+	vpsrldq		\$6,$H1,$H3
+	vpunpckhqdq	$H1,$H0,$H4		# 4
+	vpunpcklqdq	$H1,$H0,$H0		# 0:1
+	vpunpcklqdq	$H3,$H2,$H3		# 2:3
+
+	vpsrlq		\$40,$H4,$H4		# 4
+	vpsrlq		\$26,$H0,$H1
+	vpand		$MASK,$H0,$H0		# 0
+	vpsrlq		\$4,$H3,$H2
+	vpand		$MASK,$H1,$H1		# 1
+	vpsrlq		\$30,$H3,$H3
+	vpand		$MASK,$H2,$H2		# 2
+	vpand		$MASK,$H3,$H3		# 3
+	vpor		32(%rcx),$H4,$H4	# padbit, yes, always
+
+	vpshufd		\$0x32,`16*0-64`($ctx),$T4	# r0^n, 34xx -> x3x4
+	vpaddq		0x00(%r11),$H0,$H0
+	vpaddq		0x10(%r11),$H1,$H1
+	vpaddq		0x20(%r11),$H2,$H2
+	vpaddq		0x30(%r11),$H3,$H3
+	vpaddq		0x40(%r11),$H4,$H4
+
+	################################################################
+	# multiply (inp[0:1]+hash) by r^4:r^3 and accumulate
+
+	vpmuludq	$H0,$T4,$T0		# h0*r0
+	vpaddq		$T0,$D0,$D0		# d0 += h0*r0
+	vpmuludq	$H1,$T4,$T1		# h1*r0
+	vpaddq		$T1,$D1,$D1		# d1 += h1*r0
+	vpmuludq	$H2,$T4,$T0		# h2*r0
+	vpaddq		$T0,$D2,$D2		# d2 += h2*r0
+	 vpshufd	\$0x32,`16*1-64`($ctx),$T2		# r1^n
+	vpmuludq	$H3,$T4,$T1		# h3*r0
+	vpaddq		$T1,$D3,$D3		# d3 += h3*r0
+	vpmuludq	$H4,$T4,$T4		# h4*r0
+	vpaddq		$T4,$D4,$D4		# d4 += h4*r0
+
+	vpmuludq	$H3,$T2,$T0		# h3*r1
+	vpaddq		$T0,$D4,$D4		# d4 += h3*r1
+	 vpshufd	\$0x32,`16*2-64`($ctx),$T3		# s1
+	vpmuludq	$H2,$T2,$T1		# h2*r1
+	vpaddq		$T1,$D3,$D3		# d3 += h2*r1
+	 vpshufd	\$0x32,`16*3-64`($ctx),$T4		# r2
+	vpmuludq	$H1,$T2,$T0		# h1*r1
+	vpaddq		$T0,$D2,$D2		# d2 += h1*r1
+	vpmuludq	$H0,$T2,$T2		# h0*r1
+	vpaddq		$T2,$D1,$D1		# d1 += h0*r1
+	vpmuludq	$H4,$T3,$T3		# h4*s1
+	vpaddq		$T3,$D0,$D0		# d0 += h4*s1
+
+	 vpshufd	\$0x32,`16*4-64`($ctx),$T2		# s2
+	vpmuludq	$H2,$T4,$T1		# h2*r2
+	vpaddq		$T1,$D4,$D4		# d4 += h2*r2
+	vpmuludq	$H1,$T4,$T0		# h1*r2
+	vpaddq		$T0,$D3,$D3		# d3 += h1*r2
+	 vpshufd	\$0x32,`16*5-64`($ctx),$T3		# r3
+	vpmuludq	$H0,$T4,$T4		# h0*r2
+	vpaddq		$T4,$D2,$D2		# d2 += h0*r2
+	vpmuludq	$H4,$T2,$T1		# h4*s2
+	vpaddq		$T1,$D1,$D1		# d1 += h4*s2
+	 vpshufd	\$0x32,`16*6-64`($ctx),$T4		# s3
+	vpmuludq	$H3,$T2,$T2		# h3*s2
+	vpaddq		$T2,$D0,$D0		# d0 += h3*s2
+
+	vpmuludq	$H1,$T3,$T0		# h1*r3
+	vpaddq		$T0,$D4,$D4		# d4 += h1*r3
+	vpmuludq	$H0,$T3,$T3		# h0*r3
+	vpaddq		$T3,$D3,$D3		# d3 += h0*r3
+	 vpshufd	\$0x32,`16*7-64`($ctx),$T2		# r4
+	vpmuludq	$H4,$T4,$T1		# h4*s3
+	vpaddq		$T1,$D2,$D2		# d2 += h4*s3
+	 vpshufd	\$0x32,`16*8-64`($ctx),$T3		# s4
+	vpmuludq	$H3,$T4,$T0		# h3*s3
+	vpaddq		$T0,$D1,$D1		# d1 += h3*s3
+	vpmuludq	$H2,$T4,$T4		# h2*s3
+	vpaddq		$T4,$D0,$D0		# d0 += h2*s3
+
+	vpmuludq	$H0,$T2,$T2		# h0*r4
+	vpaddq		$T2,$D4,$D4		# d4 += h0*r4
+	vpmuludq	$H4,$T3,$T1		# h4*s4
+	vpaddq		$T1,$D3,$D3		# d3 += h4*s4
+	vpmuludq	$H3,$T3,$T0		# h3*s4
+	vpaddq		$T0,$D2,$D2		# d2 += h3*s4
+	vpmuludq	$H2,$T3,$T1		# h2*s4
+	vpaddq		$T1,$D1,$D1		# d1 += h2*s4
+	vpmuludq	$H1,$T3,$T3		# h1*s4
+	vpaddq		$T3,$D0,$D0		# d0 += h1*s4
+
+.Lshort_tail_avx:
+	################################################################
+	# horizontal addition
+
+	vpsrldq		\$8,$D4,$T4
+	vpsrldq		\$8,$D3,$T3
+	vpsrldq		\$8,$D1,$T1
+	vpsrldq		\$8,$D0,$T0
+	vpsrldq		\$8,$D2,$T2
+	vpaddq		$T3,$D3,$D3
+	vpaddq		$T4,$D4,$D4
+	vpaddq		$T0,$D0,$D0
+	vpaddq		$T1,$D1,$D1
+	vpaddq		$T2,$D2,$D2
+
+	################################################################
+	# lazy reduction
+
+	vpsrlq		\$26,$D3,$H3
+	vpand		$MASK,$D3,$D3
+	vpaddq		$H3,$D4,$D4		# h3 -> h4
+
+	vpsrlq		\$26,$D0,$H0
+	vpand		$MASK,$D0,$D0
+	vpaddq		$H0,$D1,$D1		# h0 -> h1
+
+	vpsrlq		\$26,$D4,$H4
+	vpand		$MASK,$D4,$D4
+
+	vpsrlq		\$26,$D1,$H1
+	vpand		$MASK,$D1,$D1
+	vpaddq		$H1,$D2,$D2		# h1 -> h2
+
+	vpaddq		$H4,$D0,$D0
+	vpsllq		\$2,$H4,$H4
+	vpaddq		$H4,$D0,$D0		# h4 -> h0
+
+	vpsrlq		\$26,$D2,$H2
+	vpand		$MASK,$D2,$D2
+	vpaddq		$H2,$D3,$D3		# h2 -> h3
+
+	vpsrlq		\$26,$D0,$H0
+	vpand		$MASK,$D0,$D0
+	vpaddq		$H0,$D1,$D1		# h0 -> h1
+
+	vpsrlq		\$26,$D3,$H3
+	vpand		$MASK,$D3,$D3
+	vpaddq		$H3,$D4,$D4		# h3 -> h4
+
+	vmovd		$D0,`4*0-48-64`($ctx)	# save partially reduced
+	vmovd		$D1,`4*1-48-64`($ctx)
+	vmovd		$D2,`4*2-48-64`($ctx)
+	vmovd		$D3,`4*3-48-64`($ctx)
+	vmovd		$D4,`4*4-48-64`($ctx)
+___
+$code.=<<___	if ($win64);
+	vmovdqa		0x50(%r11),%xmm6
+	vmovdqa		0x60(%r11),%xmm7
+	vmovdqa		0x70(%r11),%xmm8
+	vmovdqa		0x80(%r11),%xmm9
+	vmovdqa		0x90(%r11),%xmm10
+	vmovdqa		0xa0(%r11),%xmm11
+	vmovdqa		0xb0(%r11),%xmm12
+	vmovdqa		0xc0(%r11),%xmm13
+	vmovdqa		0xd0(%r11),%xmm14
+	vmovdqa		0xe0(%r11),%xmm15
+	lea		0xf8(%r11),%rsp
+.Ldo_avx_epilogue:
+___
+$code.=<<___	if (!$win64);
+	lea		-8(%r10),%rsp
+.cfi_def_cfa_register	%rsp
+___
+$code.=<<___;
+	vzeroupper
+	ret
+.cfi_endproc
+___
+&end_function("poly1305_blocks_avx");
+
+&declare_function("poly1305_emit_avx", 32, 3);
+$code.=<<___;
+	cmpl	\$0,20($ctx)	# is_base2_26?
+	je	.Lemit
+
+	mov	0($ctx),%eax	# load hash value base 2^26
+	mov	4($ctx),%ecx
+	mov	8($ctx),%r8d
+	mov	12($ctx),%r11d
+	mov	16($ctx),%r10d
+
+	shl	\$26,%rcx	# base 2^26 -> base 2^64
+	mov	%r8,%r9
+	shl	\$52,%r8
+	add	%rcx,%rax
+	shr	\$12,%r9
+	add	%rax,%r8	# h0
+	adc	\$0,%r9
+
+	shl	\$14,%r11
+	mov	%r10,%rax
+	shr	\$24,%r10
+	add	%r11,%r9
+	shl	\$40,%rax
+	add	%rax,%r9	# h1
+	adc	\$0,%r10	# h2
+
+	mov	%r10,%rax	# could be partially reduced, so reduce
+	mov	%r10,%rcx
+	and	\$3,%r10
+	shr	\$2,%rax
+	and	\$-4,%rcx
+	add	%rcx,%rax
+	add	%rax,%r8
+	adc	\$0,%r9
+	adc	\$0,%r10
+
+	mov	%r8,%rax
+	add	\$5,%r8		# compare to modulus
+	mov	%r9,%rcx
+	adc	\$0,%r9
+	adc	\$0,%r10
+	shr	\$2,%r10	# did 130-bit value overflow?
+	cmovnz	%r8,%rax
+	cmovnz	%r9,%rcx
+
+	add	0($nonce),%rax	# accumulate nonce
+	adc	8($nonce),%rcx
+	mov	%rax,0($mac)	# write result
+	mov	%rcx,8($mac)
+
+	ret
+___
+&end_function("poly1305_emit_avx");
+
+if ($kernel) {
+	$code .= "#endif\n";
+}
+
+if ($avx>1) {
+
+if ($kernel) {
+	$code .= "#ifdef CONFIG_AS_AVX2\n";
+}
+
+my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
+    map("%ymm$_",(0..15));
+my $S4=$MASK;
+
+sub poly1305_blocks_avxN {
+	my ($avx512) = @_;
+	my $suffix = $avx512 ? "_avx512" : "";
+$code.=<<___;
+.cfi_startproc
+	mov	20($ctx),%r8d		# is_base2_26
+	cmp	\$128,$len
+	jae	.Lblocks_avx2$suffix
+	test	%r8d,%r8d
+	jz	.Lblocks
+
+.Lblocks_avx2$suffix:
+	and	\$-16,$len
+	jz	.Lno_data_avx2$suffix
+
+	vzeroupper
+
+	test	%r8d,%r8d
+	jz	.Lbase2_64_avx2$suffix
+
+	test	\$63,$len
+	jz	.Leven_avx2$suffix
+
+	push	%rbp
+.cfi_push	%rbp
+	mov 	%rsp,%rbp
+	push	%rbx
+.cfi_push	%rbx
+	push	%r12
+.cfi_push	%r12
+	push	%r13
+.cfi_push	%r13
+	push	%r14
+.cfi_push	%r14
+	push	%r15
+.cfi_push	%r15
+.Lblocks_avx2_body$suffix:
+
+	mov	$len,%r15		# reassign $len
+
+	mov	0($ctx),$d1		# load hash value
+	mov	8($ctx),$d2
+	mov	16($ctx),$h2#d
+
+	mov	24($ctx),$r0		# load r
+	mov	32($ctx),$s1
+
+	################################# base 2^26 -> base 2^64
+	mov	$d1#d,$h0#d
+	and	\$`-1*(1<<31)`,$d1
+	mov	$d2,$r1			# borrow $r1
+	mov	$d2#d,$h1#d
+	and	\$`-1*(1<<31)`,$d2
+
+	shr	\$6,$d1
+	shl	\$52,$r1
+	add	$d1,$h0
+	shr	\$12,$h1
+	shr	\$18,$d2
+	add	$r1,$h0
+	adc	$d2,$h1
+
+	mov	$h2,$d1
+	shl	\$40,$d1
+	shr	\$24,$h2
+	add	$d1,$h1
+	adc	\$0,$h2			# can be partially reduced...
+
+	mov	\$-4,$d2		# ... so reduce
+	mov	$h2,$d1
+	and	$h2,$d2
+	shr	\$2,$d1
+	and	\$3,$h2
+	add	$d2,$d1			# =*5
+	add	$d1,$h0
+	adc	\$0,$h1
+	adc	\$0,$h2
+
+	mov	$s1,$r1
+	mov	$s1,%rax
+	shr	\$2,$s1
+	add	$r1,$s1			# s1 = r1 + (r1 >> 2)
+
+.Lbase2_26_pre_avx2$suffix:
+	add	0($inp),$h0		# accumulate input
+	adc	8($inp),$h1
+	lea	16($inp),$inp
+	adc	$padbit,$h2
+	sub	\$16,%r15
+
+	call	__poly1305_block
+	mov	$r1,%rax
+
+	test	\$63,%r15
+	jnz	.Lbase2_26_pre_avx2$suffix
+
+	test	$padbit,$padbit		# if $padbit is zero,
+	jz	.Lstore_base2_64_avx2$suffix	# store hash in base 2^64 format
+
+	################################# base 2^64 -> base 2^26
+	mov	$h0,%rax
+	mov	$h0,%rdx
+	shr	\$52,$h0
+	mov	$h1,$r0
+	mov	$h1,$r1
+	shr	\$26,%rdx
+	and	\$0x3ffffff,%rax	# h[0]
+	shl	\$12,$r0
+	and	\$0x3ffffff,%rdx	# h[1]
+	shr	\$14,$h1
+	or	$r0,$h0
+	shl	\$24,$h2
+	and	\$0x3ffffff,$h0		# h[2]
+	shr	\$40,$r1
+	and	\$0x3ffffff,$h1		# h[3]
+	or	$r1,$h2			# h[4]
+
+	test	%r15,%r15
+	jz	.Lstore_base2_26_avx2$suffix
+
+	vmovd	%rax#d,%x#$H0
+	vmovd	%rdx#d,%x#$H1
+	vmovd	$h0#d,%x#$H2
+	vmovd	$h1#d,%x#$H3
+	vmovd	$h2#d,%x#$H4
+	jmp	.Lproceed_avx2$suffix
+
+.align	32
+.Lstore_base2_64_avx2$suffix:
+	mov	$h0,0($ctx)
+	mov	$h1,8($ctx)
+	mov	$h2,16($ctx)		# note that is_base2_26 is zeroed
+	jmp	.Ldone_avx2$suffix
+
+.align	16
+.Lstore_base2_26_avx2$suffix:
+	mov	%rax#d,0($ctx)		# store hash value base 2^26
+	mov	%rdx#d,4($ctx)
+	mov	$h0#d,8($ctx)
+	mov	$h1#d,12($ctx)
+	mov	$h2#d,16($ctx)
+.align	16
+.Ldone_avx2$suffix:
+	pop 		%r15
+.cfi_restore	%r15
+	pop 		%r14
+.cfi_restore	%r14
+	pop 		%r13
+.cfi_restore	%r13
+	pop 		%r12
+.cfi_restore	%r12
+	pop 		%rbx
+.cfi_restore	%rbx
+	pop 		%rbp
+.cfi_restore 	%rbp
+.Lno_data_avx2$suffix:
+.Lblocks_avx2_epilogue$suffix:
+	ret
+.cfi_endproc
+
+.align	32
+.Lbase2_64_avx2$suffix:
+.cfi_startproc
+	push	%rbp
+.cfi_push	%rbp
+	mov 	%rsp,%rbp
+	push	%rbx
+.cfi_push	%rbx
+	push	%r12
+.cfi_push	%r12
+	push	%r13
+.cfi_push	%r13
+	push	%r14
+.cfi_push	%r14
+	push	%r15
+.cfi_push	%r15
+.Lbase2_64_avx2_body$suffix:
+
+	mov	$len,%r15		# reassign $len
+
+	mov	24($ctx),$r0		# load r
+	mov	32($ctx),$s1
+
+	mov	0($ctx),$h0		# load hash value
+	mov	8($ctx),$h1
+	mov	16($ctx),$h2#d
+
+	mov	$s1,$r1
+	mov	$s1,%rax
+	shr	\$2,$s1
+	add	$r1,$s1			# s1 = r1 + (r1 >> 2)
+
+	test	\$63,$len
+	jz	.Linit_avx2$suffix
+
+.Lbase2_64_pre_avx2$suffix:
+	add	0($inp),$h0		# accumulate input
+	adc	8($inp),$h1
+	lea	16($inp),$inp
+	adc	$padbit,$h2
+	sub	\$16,%r15
+
+	call	__poly1305_block
+	mov	$r1,%rax
+
+	test	\$63,%r15
+	jnz	.Lbase2_64_pre_avx2$suffix
+
+.Linit_avx2$suffix:
+	################################# base 2^64 -> base 2^26
+	mov	$h0,%rax
+	mov	$h0,%rdx
+	shr	\$52,$h0
+	mov	$h1,$d1
+	mov	$h1,$d2
+	shr	\$26,%rdx
+	and	\$0x3ffffff,%rax	# h[0]
+	shl	\$12,$d1
+	and	\$0x3ffffff,%rdx	# h[1]
+	shr	\$14,$h1
+	or	$d1,$h0
+	shl	\$24,$h2
+	and	\$0x3ffffff,$h0		# h[2]
+	shr	\$40,$d2
+	and	\$0x3ffffff,$h1		# h[3]
+	or	$d2,$h2			# h[4]
+
+	vmovd	%rax#d,%x#$H0
+	vmovd	%rdx#d,%x#$H1
+	vmovd	$h0#d,%x#$H2
+	vmovd	$h1#d,%x#$H3
+	vmovd	$h2#d,%x#$H4
+	movl	\$1,20($ctx)		# set is_base2_26
+
+	call	__poly1305_init_avx
+
+.Lproceed_avx2$suffix:
+	mov	%r15,$len			# restore $len
+___
+$code.=<<___ if (!$kernel);
+	mov	OPENSSL_ia32cap_P+8(%rip),%r9d
+	mov	\$`(1<<31|1<<30|1<<16)`,%r11d
+___
+$code.=<<___;
+	pop 		%r15
+.cfi_restore	%r15
+	pop 		%r14
+.cfi_restore	%r14
+	pop 		%r13
+.cfi_restore	%r13
+	pop 		%r12
+.cfi_restore	%r12
+	pop 		%rbx
+.cfi_restore	%rbx
+	pop 		%rbp
+.cfi_restore 	%rbp
+.Lbase2_64_avx2_epilogue$suffix:
+	jmp	.Ldo_avx2$suffix
+.cfi_endproc
+
+.align	32
+.Leven_avx2$suffix:
+.cfi_startproc
+___
+$code.=<<___ if (!$kernel);
+	mov		OPENSSL_ia32cap_P+8(%rip),%r9d
+___
+$code.=<<___;
+	vmovd		4*0($ctx),%x#$H0	# load hash value base 2^26
+	vmovd		4*1($ctx),%x#$H1
+	vmovd		4*2($ctx),%x#$H2
+	vmovd		4*3($ctx),%x#$H3
+	vmovd		4*4($ctx),%x#$H4
+
+.Ldo_avx2$suffix:
+___
+$code.=<<___		if (!$kernel && $avx>2);
+	cmp		\$512,$len
+	jb		.Lskip_avx512
+	and		%r11d,%r9d
+	test		\$`1<<16`,%r9d		# check for AVX512F
+	jnz		.Lblocks_avx512
+.Lskip_avx512$suffix:
+___
+$code.=<<___ if ($avx > 2 && $avx512 && $kernel);
+	cmp		\$512,$len
+	jae		.Lblocks_avx512
+___
+$code.=<<___	if (!$win64);
+	lea		8(%rsp),%r10
+.cfi_def_cfa_register	%r10
+	sub		\$0x128,%rsp
+___
+$code.=<<___	if ($win64);
+	lea		8(%rsp),%r10
+	sub		\$0x1c8,%rsp
+	vmovdqa		%xmm6,-0xb0(%r10)
+	vmovdqa		%xmm7,-0xa0(%r10)
+	vmovdqa		%xmm8,-0x90(%r10)
+	vmovdqa		%xmm9,-0x80(%r10)
+	vmovdqa		%xmm10,-0x70(%r10)
+	vmovdqa		%xmm11,-0x60(%r10)
+	vmovdqa		%xmm12,-0x50(%r10)
+	vmovdqa		%xmm13,-0x40(%r10)
+	vmovdqa		%xmm14,-0x30(%r10)
+	vmovdqa		%xmm15,-0x20(%r10)
+.Ldo_avx2_body$suffix:
+___
+$code.=<<___;
+	lea		.Lconst(%rip),%rcx
+	lea		48+64($ctx),$ctx	# size optimization
+	vmovdqa		96(%rcx),$T0		# .Lpermd_avx2
+
+	# expand and copy pre-calculated table to stack
+	vmovdqu		`16*0-64`($ctx),%x#$T2
+	and		\$-512,%rsp
+	vmovdqu		`16*1-64`($ctx),%x#$T3
+	vmovdqu		`16*2-64`($ctx),%x#$T4
+	vmovdqu		`16*3-64`($ctx),%x#$D0
+	vmovdqu		`16*4-64`($ctx),%x#$D1
+	vmovdqu		`16*5-64`($ctx),%x#$D2
+	lea		0x90(%rsp),%rax		# size optimization
+	vmovdqu		`16*6-64`($ctx),%x#$D3
+	vpermd		$T2,$T0,$T2		# 00003412 -> 14243444
+	vmovdqu		`16*7-64`($ctx),%x#$D4
+	vpermd		$T3,$T0,$T3
+	vmovdqu		`16*8-64`($ctx),%x#$MASK
+	vpermd		$T4,$T0,$T4
+	vmovdqa		$T2,0x00(%rsp)
+	vpermd		$D0,$T0,$D0
+	vmovdqa		$T3,0x20-0x90(%rax)
+	vpermd		$D1,$T0,$D1
+	vmovdqa		$T4,0x40-0x90(%rax)
+	vpermd		$D2,$T0,$D2
+	vmovdqa		$D0,0x60-0x90(%rax)
+	vpermd		$D3,$T0,$D3
+	vmovdqa		$D1,0x80-0x90(%rax)
+	vpermd		$D4,$T0,$D4
+	vmovdqa		$D2,0xa0-0x90(%rax)
+	vpermd		$MASK,$T0,$MASK
+	vmovdqa		$D3,0xc0-0x90(%rax)
+	vmovdqa		$D4,0xe0-0x90(%rax)
+	vmovdqa		$MASK,0x100-0x90(%rax)
+	vmovdqa		64(%rcx),$MASK		# .Lmask26
+
+	################################################################
+	# load input
+	vmovdqu		16*0($inp),%x#$T0
+	vmovdqu		16*1($inp),%x#$T1
+	vinserti128	\$1,16*2($inp),$T0,$T0
+	vinserti128	\$1,16*3($inp),$T1,$T1
+	lea		16*4($inp),$inp
+
+	vpsrldq		\$6,$T0,$T2		# splat input
+	vpsrldq		\$6,$T1,$T3
+	vpunpckhqdq	$T1,$T0,$T4		# 4
+	vpunpcklqdq	$T3,$T2,$T2		# 2:3
+	vpunpcklqdq	$T1,$T0,$T0		# 0:1
+
+	vpsrlq		\$30,$T2,$T3
+	vpsrlq		\$4,$T2,$T2
+	vpsrlq		\$26,$T0,$T1
+	vpsrlq		\$40,$T4,$T4		# 4
+	vpand		$MASK,$T2,$T2		# 2
+	vpand		$MASK,$T0,$T0		# 0
+	vpand		$MASK,$T1,$T1		# 1
+	vpand		$MASK,$T3,$T3		# 3
+	vpor		32(%rcx),$T4,$T4	# padbit, yes, always
+
+	vpaddq		$H2,$T2,$H2		# accumulate input
+	sub		\$64,$len
+	jz		.Ltail_avx2$suffix
+	jmp		.Loop_avx2$suffix
+
+.align	32
+.Loop_avx2$suffix:
+	################################################################
+	# ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4
+	# ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3
+	# ((inp[2]*r^4+inp[6])*r^4+inp[10])*r^2
+	# ((inp[3]*r^4+inp[7])*r^4+inp[11])*r^1
+	#   \________/\__________/
+	################################################################
+	#vpaddq		$H2,$T2,$H2		# accumulate input
+	vpaddq		$H0,$T0,$H0
+	vmovdqa		`32*0`(%rsp),$T0	# r0^4
+	vpaddq		$H1,$T1,$H1
+	vmovdqa		`32*1`(%rsp),$T1	# r1^4
+	vpaddq		$H3,$T3,$H3
+	vmovdqa		`32*3`(%rsp),$T2	# r2^4
+	vpaddq		$H4,$T4,$H4
+	vmovdqa		`32*6-0x90`(%rax),$T3	# s3^4
+	vmovdqa		`32*8-0x90`(%rax),$S4	# s4^4
+
+	# d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+	# d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	# d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	# d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	# d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+	#
+	# however, as h2 is "chronologically" first one available pull
+	# corresponding operations up, so it's
+	#
+	# d4 = h2*r2   + h4*r0 + h3*r1             + h1*r3   + h0*r4
+	# d3 = h2*r1   + h3*r0           + h1*r2   + h0*r3   + h4*5*r4
+	# d2 = h2*r0           + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	# d1 = h2*5*r4 + h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3
+	# d0 = h2*5*r3 + h0*r0 + h4*5*r1 + h3*5*r2           + h1*5*r4
+
+	vpmuludq	$H2,$T0,$D2		# d2 = h2*r0
+	vpmuludq	$H2,$T1,$D3		# d3 = h2*r1
+	vpmuludq	$H2,$T2,$D4		# d4 = h2*r2
+	vpmuludq	$H2,$T3,$D0		# d0 = h2*s3
+	vpmuludq	$H2,$S4,$D1		# d1 = h2*s4
+
+	vpmuludq	$H0,$T1,$T4		# h0*r1
+	vpmuludq	$H1,$T1,$H2		# h1*r1, borrow $H2 as temp
+	vpaddq		$T4,$D1,$D1		# d1 += h0*r1
+	vpaddq		$H2,$D2,$D2		# d2 += h1*r1
+	vpmuludq	$H3,$T1,$T4		# h3*r1
+	vpmuludq	`32*2`(%rsp),$H4,$H2	# h4*s1
+	vpaddq		$T4,$D4,$D4		# d4 += h3*r1
+	vpaddq		$H2,$D0,$D0		# d0 += h4*s1
+	 vmovdqa	`32*4-0x90`(%rax),$T1	# s2
+
+	vpmuludq	$H0,$T0,$T4		# h0*r0
+	vpmuludq	$H1,$T0,$H2		# h1*r0
+	vpaddq		$T4,$D0,$D0		# d0 += h0*r0
+	vpaddq		$H2,$D1,$D1		# d1 += h1*r0
+	vpmuludq	$H3,$T0,$T4		# h3*r0
+	vpmuludq	$H4,$T0,$H2		# h4*r0
+	 vmovdqu	16*0($inp),%x#$T0	# load input
+	vpaddq		$T4,$D3,$D3		# d3 += h3*r0
+	vpaddq		$H2,$D4,$D4		# d4 += h4*r0
+	 vinserti128	\$1,16*2($inp),$T0,$T0
+
+	vpmuludq	$H3,$T1,$T4		# h3*s2
+	vpmuludq	$H4,$T1,$H2		# h4*s2
+	 vmovdqu	16*1($inp),%x#$T1
+	vpaddq		$T4,$D0,$D0		# d0 += h3*s2
+	vpaddq		$H2,$D1,$D1		# d1 += h4*s2
+	 vmovdqa	`32*5-0x90`(%rax),$H2	# r3
+	vpmuludq	$H1,$T2,$T4		# h1*r2
+	vpmuludq	$H0,$T2,$T2		# h0*r2
+	vpaddq		$T4,$D3,$D3		# d3 += h1*r2
+	vpaddq		$T2,$D2,$D2		# d2 += h0*r2
+	 vinserti128	\$1,16*3($inp),$T1,$T1
+	 lea		16*4($inp),$inp
+
+	vpmuludq	$H1,$H2,$T4		# h1*r3
+	vpmuludq	$H0,$H2,$H2		# h0*r3
+	 vpsrldq	\$6,$T0,$T2		# splat input
+	vpaddq		$T4,$D4,$D4		# d4 += h1*r3
+	vpaddq		$H2,$D3,$D3		# d3 += h0*r3
+	vpmuludq	$H3,$T3,$T4		# h3*s3
+	vpmuludq	$H4,$T3,$H2		# h4*s3
+	 vpsrldq	\$6,$T1,$T3
+	vpaddq		$T4,$D1,$D1		# d1 += h3*s3
+	vpaddq		$H2,$D2,$D2		# d2 += h4*s3
+	 vpunpckhqdq	$T1,$T0,$T4		# 4
+
+	vpmuludq	$H3,$S4,$H3		# h3*s4
+	vpmuludq	$H4,$S4,$H4		# h4*s4
+	 vpunpcklqdq	$T1,$T0,$T0		# 0:1
+	vpaddq		$H3,$D2,$H2		# h2 = d2 + h3*r4
+	vpaddq		$H4,$D3,$H3		# h3 = d3 + h4*r4
+	 vpunpcklqdq	$T3,$T2,$T3		# 2:3
+	vpmuludq	`32*7-0x90`(%rax),$H0,$H4	# h0*r4
+	vpmuludq	$H1,$S4,$H0		# h1*s4
+	vmovdqa		64(%rcx),$MASK		# .Lmask26
+	vpaddq		$H4,$D4,$H4		# h4 = d4 + h0*r4
+	vpaddq		$H0,$D0,$H0		# h0 = d0 + h1*s4
+
+	################################################################
+	# lazy reduction (interleaved with tail of input splat)
+
+	vpsrlq		\$26,$H3,$D3
+	vpand		$MASK,$H3,$H3
+	vpaddq		$D3,$H4,$H4		# h3 -> h4
+
+	vpsrlq		\$26,$H0,$D0
+	vpand		$MASK,$H0,$H0
+	vpaddq		$D0,$D1,$H1		# h0 -> h1
+
+	vpsrlq		\$26,$H4,$D4
+	vpand		$MASK,$H4,$H4
+
+	 vpsrlq		\$4,$T3,$T2
+
+	vpsrlq		\$26,$H1,$D1
+	vpand		$MASK,$H1,$H1
+	vpaddq		$D1,$H2,$H2		# h1 -> h2
+
+	vpaddq		$D4,$H0,$H0
+	vpsllq		\$2,$D4,$D4
+	vpaddq		$D4,$H0,$H0		# h4 -> h0
+
+	 vpand		$MASK,$T2,$T2		# 2
+	 vpsrlq		\$26,$T0,$T1
+
+	vpsrlq		\$26,$H2,$D2
+	vpand		$MASK,$H2,$H2
+	vpaddq		$D2,$H3,$H3		# h2 -> h3
+
+	 vpaddq		$T2,$H2,$H2		# modulo-scheduled
+	 vpsrlq		\$30,$T3,$T3
+
+	vpsrlq		\$26,$H0,$D0
+	vpand		$MASK,$H0,$H0
+	vpaddq		$D0,$H1,$H1		# h0 -> h1
+
+	 vpsrlq		\$40,$T4,$T4		# 4
+
+	vpsrlq		\$26,$H3,$D3
+	vpand		$MASK,$H3,$H3
+	vpaddq		$D3,$H4,$H4		# h3 -> h4
+
+	 vpand		$MASK,$T0,$T0		# 0
+	 vpand		$MASK,$T1,$T1		# 1
+	 vpand		$MASK,$T3,$T3		# 3
+	 vpor		32(%rcx),$T4,$T4	# padbit, yes, always
+
+	sub		\$64,$len
+	jnz		.Loop_avx2$suffix
+
+	.byte		0x66,0x90
+.Ltail_avx2$suffix:
+	################################################################
+	# while above multiplications were by r^4 in all lanes, in last
+	# iteration we multiply least significant lane by r^4 and most
+	# significant one by r, so copy of above except that references
+	# to the precomputed table are displaced by 4...
+
+	#vpaddq		$H2,$T2,$H2		# accumulate input
+	vpaddq		$H0,$T0,$H0
+	vmovdqu		`32*0+4`(%rsp),$T0	# r0^4
+	vpaddq		$H1,$T1,$H1
+	vmovdqu		`32*1+4`(%rsp),$T1	# r1^4
+	vpaddq		$H3,$T3,$H3
+	vmovdqu		`32*3+4`(%rsp),$T2	# r2^4
+	vpaddq		$H4,$T4,$H4
+	vmovdqu		`32*6+4-0x90`(%rax),$T3	# s3^4
+	vmovdqu		`32*8+4-0x90`(%rax),$S4	# s4^4
+
+	vpmuludq	$H2,$T0,$D2		# d2 = h2*r0
+	vpmuludq	$H2,$T1,$D3		# d3 = h2*r1
+	vpmuludq	$H2,$T2,$D4		# d4 = h2*r2
+	vpmuludq	$H2,$T3,$D0		# d0 = h2*s3
+	vpmuludq	$H2,$S4,$D1		# d1 = h2*s4
+
+	vpmuludq	$H0,$T1,$T4		# h0*r1
+	vpmuludq	$H1,$T1,$H2		# h1*r1
+	vpaddq		$T4,$D1,$D1		# d1 += h0*r1
+	vpaddq		$H2,$D2,$D2		# d2 += h1*r1
+	vpmuludq	$H3,$T1,$T4		# h3*r1
+	vpmuludq	`32*2+4`(%rsp),$H4,$H2	# h4*s1
+	vpaddq		$T4,$D4,$D4		# d4 += h3*r1
+	vpaddq		$H2,$D0,$D0		# d0 += h4*s1
+
+	vpmuludq	$H0,$T0,$T4		# h0*r0
+	vpmuludq	$H1,$T0,$H2		# h1*r0
+	vpaddq		$T4,$D0,$D0		# d0 += h0*r0
+	 vmovdqu	`32*4+4-0x90`(%rax),$T1	# s2
+	vpaddq		$H2,$D1,$D1		# d1 += h1*r0
+	vpmuludq	$H3,$T0,$T4		# h3*r0
+	vpmuludq	$H4,$T0,$H2		# h4*r0
+	vpaddq		$T4,$D3,$D3		# d3 += h3*r0
+	vpaddq		$H2,$D4,$D4		# d4 += h4*r0
+
+	vpmuludq	$H3,$T1,$T4		# h3*s2
+	vpmuludq	$H4,$T1,$H2		# h4*s2
+	vpaddq		$T4,$D0,$D0		# d0 += h3*s2
+	vpaddq		$H2,$D1,$D1		# d1 += h4*s2
+	 vmovdqu	`32*5+4-0x90`(%rax),$H2	# r3
+	vpmuludq	$H1,$T2,$T4		# h1*r2
+	vpmuludq	$H0,$T2,$T2		# h0*r2
+	vpaddq		$T4,$D3,$D3		# d3 += h1*r2
+	vpaddq		$T2,$D2,$D2		# d2 += h0*r2
+
+	vpmuludq	$H1,$H2,$T4		# h1*r3
+	vpmuludq	$H0,$H2,$H2		# h0*r3
+	vpaddq		$T4,$D4,$D4		# d4 += h1*r3
+	vpaddq		$H2,$D3,$D3		# d3 += h0*r3
+	vpmuludq	$H3,$T3,$T4		# h3*s3
+	vpmuludq	$H4,$T3,$H2		# h4*s3
+	vpaddq		$T4,$D1,$D1		# d1 += h3*s3
+	vpaddq		$H2,$D2,$D2		# d2 += h4*s3
+
+	vpmuludq	$H3,$S4,$H3		# h3*s4
+	vpmuludq	$H4,$S4,$H4		# h4*s4
+	vpaddq		$H3,$D2,$H2		# h2 = d2 + h3*r4
+	vpaddq		$H4,$D3,$H3		# h3 = d3 + h4*r4
+	vpmuludq	`32*7+4-0x90`(%rax),$H0,$H4		# h0*r4
+	vpmuludq	$H1,$S4,$H0		# h1*s4
+	vmovdqa		64(%rcx),$MASK		# .Lmask26
+	vpaddq		$H4,$D4,$H4		# h4 = d4 + h0*r4
+	vpaddq		$H0,$D0,$H0		# h0 = d0 + h1*s4
+
+	################################################################
+	# horizontal addition
+
+	vpsrldq		\$8,$D1,$T1
+	vpsrldq		\$8,$H2,$T2
+	vpsrldq		\$8,$H3,$T3
+	vpsrldq		\$8,$H4,$T4
+	vpsrldq		\$8,$H0,$T0
+	vpaddq		$T1,$D1,$D1
+	vpaddq		$T2,$H2,$H2
+	vpaddq		$T3,$H3,$H3
+	vpaddq		$T4,$H4,$H4
+	vpaddq		$T0,$H0,$H0
+
+	vpermq		\$0x2,$H3,$T3
+	vpermq		\$0x2,$H4,$T4
+	vpermq		\$0x2,$H0,$T0
+	vpermq		\$0x2,$D1,$T1
+	vpermq		\$0x2,$H2,$T2
+	vpaddq		$T3,$H3,$H3
+	vpaddq		$T4,$H4,$H4
+	vpaddq		$T0,$H0,$H0
+	vpaddq		$T1,$D1,$D1
+	vpaddq		$T2,$H2,$H2
+
+	################################################################
+	# lazy reduction
+
+	vpsrlq		\$26,$H3,$D3
+	vpand		$MASK,$H3,$H3
+	vpaddq		$D3,$H4,$H4		# h3 -> h4
+
+	vpsrlq		\$26,$H0,$D0
+	vpand		$MASK,$H0,$H0
+	vpaddq		$D0,$D1,$H1		# h0 -> h1
+
+	vpsrlq		\$26,$H4,$D4
+	vpand		$MASK,$H4,$H4
+
+	vpsrlq		\$26,$H1,$D1
+	vpand		$MASK,$H1,$H1
+	vpaddq		$D1,$H2,$H2		# h1 -> h2
+
+	vpaddq		$D4,$H0,$H0
+	vpsllq		\$2,$D4,$D4
+	vpaddq		$D4,$H0,$H0		# h4 -> h0
+
+	vpsrlq		\$26,$H2,$D2
+	vpand		$MASK,$H2,$H2
+	vpaddq		$D2,$H3,$H3		# h2 -> h3
+
+	vpsrlq		\$26,$H0,$D0
+	vpand		$MASK,$H0,$H0
+	vpaddq		$D0,$H1,$H1		# h0 -> h1
+
+	vpsrlq		\$26,$H3,$D3
+	vpand		$MASK,$H3,$H3
+	vpaddq		$D3,$H4,$H4		# h3 -> h4
+
+	vmovd		%x#$H0,`4*0-48-64`($ctx)# save partially reduced
+	vmovd		%x#$H1,`4*1-48-64`($ctx)
+	vmovd		%x#$H2,`4*2-48-64`($ctx)
+	vmovd		%x#$H3,`4*3-48-64`($ctx)
+	vmovd		%x#$H4,`4*4-48-64`($ctx)
+___
+$code.=<<___	if ($win64);
+	vmovdqa		-0xb0(%r10),%xmm6
+	vmovdqa		-0xa0(%r10),%xmm7
+	vmovdqa		-0x90(%r10),%xmm8
+	vmovdqa		-0x80(%r10),%xmm9
+	vmovdqa		-0x70(%r10),%xmm10
+	vmovdqa		-0x60(%r10),%xmm11
+	vmovdqa		-0x50(%r10),%xmm12
+	vmovdqa		-0x40(%r10),%xmm13
+	vmovdqa		-0x30(%r10),%xmm14
+	vmovdqa		-0x20(%r10),%xmm15
+	lea		-8(%r10),%rsp
+.Ldo_avx2_epilogue$suffix:
+___
+$code.=<<___	if (!$win64);
+	lea		-8(%r10),%rsp
+.cfi_def_cfa_register	%rsp
+___
+$code.=<<___;
+	vzeroupper
+	ret
+.cfi_endproc
+___
+if($avx > 2 && $avx512) {
+my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24));
+my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29));
+my $PADBIT="%zmm30";
+
+map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3));		# switch to %zmm domain
+map(s/%y/%z/,($D0,$D1,$D2,$D3,$D4));
+map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4));
+map(s/%y/%z/,($MASK));
+
+$code.=<<___;
+.cfi_startproc
+.Lblocks_avx512:
+	mov		\$15,%eax
+	kmovw		%eax,%k2
+___
+$code.=<<___	if (!$win64);
+	lea		8(%rsp),%r10
+.cfi_def_cfa_register	%r10
+	sub		\$0x128,%rsp
+___
+$code.=<<___	if ($win64);
+	lea		8(%rsp),%r10
+	sub		\$0x1c8,%rsp
+	vmovdqa		%xmm6,-0xb0(%r10)
+	vmovdqa		%xmm7,-0xa0(%r10)
+	vmovdqa		%xmm8,-0x90(%r10)
+	vmovdqa		%xmm9,-0x80(%r10)
+	vmovdqa		%xmm10,-0x70(%r10)
+	vmovdqa		%xmm11,-0x60(%r10)
+	vmovdqa		%xmm12,-0x50(%r10)
+	vmovdqa		%xmm13,-0x40(%r10)
+	vmovdqa		%xmm14,-0x30(%r10)
+	vmovdqa		%xmm15,-0x20(%r10)
+.Ldo_avx512_body:
+___
+$code.=<<___;
+	lea		.Lconst(%rip),%rcx
+	lea		48+64($ctx),$ctx	# size optimization
+	vmovdqa		96(%rcx),%y#$T2		# .Lpermd_avx2
+
+	# expand pre-calculated table
+	vmovdqu		`16*0-64`($ctx),%x#$D0	# will become expanded ${R0}
+	and		\$-512,%rsp
+	vmovdqu		`16*1-64`($ctx),%x#$D1	# will become ... ${R1}
+	mov		\$0x20,%rax
+	vmovdqu		`16*2-64`($ctx),%x#$T0	# ... ${S1}
+	vmovdqu		`16*3-64`($ctx),%x#$D2	# ... ${R2}
+	vmovdqu		`16*4-64`($ctx),%x#$T1	# ... ${S2}
+	vmovdqu		`16*5-64`($ctx),%x#$D3	# ... ${R3}
+	vmovdqu		`16*6-64`($ctx),%x#$T3	# ... ${S3}
+	vmovdqu		`16*7-64`($ctx),%x#$D4	# ... ${R4}
+	vmovdqu		`16*8-64`($ctx),%x#$T4	# ... ${S4}
+	vpermd		$D0,$T2,$R0		# 00003412 -> 14243444
+	vpbroadcastq	64(%rcx),$MASK		# .Lmask26
+	vpermd		$D1,$T2,$R1
+	vpermd		$T0,$T2,$S1
+	vpermd		$D2,$T2,$R2
+	vmovdqa64	$R0,0x00(%rsp){%k2}	# save in case $len%128 != 0
+	 vpsrlq		\$32,$R0,$T0		# 14243444 -> 01020304
+	vpermd		$T1,$T2,$S2
+	vmovdqu64	$R1,0x00(%rsp,%rax){%k2}
+	 vpsrlq		\$32,$R1,$T1
+	vpermd		$D3,$T2,$R3
+	vmovdqa64	$S1,0x40(%rsp){%k2}
+	vpermd		$T3,$T2,$S3
+	vpermd		$D4,$T2,$R4
+	vmovdqu64	$R2,0x40(%rsp,%rax){%k2}
+	vpermd		$T4,$T2,$S4
+	vmovdqa64	$S2,0x80(%rsp){%k2}
+	vmovdqu64	$R3,0x80(%rsp,%rax){%k2}
+	vmovdqa64	$S3,0xc0(%rsp){%k2}
+	vmovdqu64	$R4,0xc0(%rsp,%rax){%k2}
+	vmovdqa64	$S4,0x100(%rsp){%k2}
+
+	################################################################
+	# calculate 5th through 8th powers of the key
+	#
+	# d0 = r0'*r0 + r1'*5*r4 + r2'*5*r3 + r3'*5*r2 + r4'*5*r1
+	# d1 = r0'*r1 + r1'*r0   + r2'*5*r4 + r3'*5*r3 + r4'*5*r2
+	# d2 = r0'*r2 + r1'*r1   + r2'*r0   + r3'*5*r4 + r4'*5*r3
+	# d3 = r0'*r3 + r1'*r2   + r2'*r1   + r3'*r0   + r4'*5*r4
+	# d4 = r0'*r4 + r1'*r3   + r2'*r2   + r3'*r1   + r4'*r0
+
+	vpmuludq	$T0,$R0,$D0		# d0 = r0'*r0
+	vpmuludq	$T0,$R1,$D1		# d1 = r0'*r1
+	vpmuludq	$T0,$R2,$D2		# d2 = r0'*r2
+	vpmuludq	$T0,$R3,$D3		# d3 = r0'*r3
+	vpmuludq	$T0,$R4,$D4		# d4 = r0'*r4
+	 vpsrlq		\$32,$R2,$T2
+
+	vpmuludq	$T1,$S4,$M0
+	vpmuludq	$T1,$R0,$M1
+	vpmuludq	$T1,$R1,$M2
+	vpmuludq	$T1,$R2,$M3
+	vpmuludq	$T1,$R3,$M4
+	 vpsrlq		\$32,$R3,$T3
+	vpaddq		$M0,$D0,$D0		# d0 += r1'*5*r4
+	vpaddq		$M1,$D1,$D1		# d1 += r1'*r0
+	vpaddq		$M2,$D2,$D2		# d2 += r1'*r1
+	vpaddq		$M3,$D3,$D3		# d3 += r1'*r2
+	vpaddq		$M4,$D4,$D4		# d4 += r1'*r3
+
+	vpmuludq	$T2,$S3,$M0
+	vpmuludq	$T2,$S4,$M1
+	vpmuludq	$T2,$R1,$M3
+	vpmuludq	$T2,$R2,$M4
+	vpmuludq	$T2,$R0,$M2
+	 vpsrlq		\$32,$R4,$T4
+	vpaddq		$M0,$D0,$D0		# d0 += r2'*5*r3
+	vpaddq		$M1,$D1,$D1		# d1 += r2'*5*r4
+	vpaddq		$M3,$D3,$D3		# d3 += r2'*r1
+	vpaddq		$M4,$D4,$D4		# d4 += r2'*r2
+	vpaddq		$M2,$D2,$D2		# d2 += r2'*r0
+
+	vpmuludq	$T3,$S2,$M0
+	vpmuludq	$T3,$R0,$M3
+	vpmuludq	$T3,$R1,$M4
+	vpmuludq	$T3,$S3,$M1
+	vpmuludq	$T3,$S4,$M2
+	vpaddq		$M0,$D0,$D0		# d0 += r3'*5*r2
+	vpaddq		$M3,$D3,$D3		# d3 += r3'*r0
+	vpaddq		$M4,$D4,$D4		# d4 += r3'*r1
+	vpaddq		$M1,$D1,$D1		# d1 += r3'*5*r3
+	vpaddq		$M2,$D2,$D2		# d2 += r3'*5*r4
+
+	vpmuludq	$T4,$S4,$M3
+	vpmuludq	$T4,$R0,$M4
+	vpmuludq	$T4,$S1,$M0
+	vpmuludq	$T4,$S2,$M1
+	vpmuludq	$T4,$S3,$M2
+	vpaddq		$M3,$D3,$D3		# d3 += r2'*5*r4
+	vpaddq		$M4,$D4,$D4		# d4 += r2'*r0
+	vpaddq		$M0,$D0,$D0		# d0 += r2'*5*r1
+	vpaddq		$M1,$D1,$D1		# d1 += r2'*5*r2
+	vpaddq		$M2,$D2,$D2		# d2 += r2'*5*r3
+
+	################################################################
+	# load input
+	vmovdqu64	16*0($inp),%z#$T3
+	vmovdqu64	16*4($inp),%z#$T4
+	lea		16*8($inp),$inp
+
+	################################################################
+	# lazy reduction
+
+	vpsrlq		\$26,$D3,$M3
+	vpandq		$MASK,$D3,$D3
+	vpaddq		$M3,$D4,$D4		# d3 -> d4
+
+	vpsrlq		\$26,$D0,$M0
+	vpandq		$MASK,$D0,$D0
+	vpaddq		$M0,$D1,$D1		# d0 -> d1
+
+	vpsrlq		\$26,$D4,$M4
+	vpandq		$MASK,$D4,$D4
+
+	vpsrlq		\$26,$D1,$M1
+	vpandq		$MASK,$D1,$D1
+	vpaddq		$M1,$D2,$D2		# d1 -> d2
+
+	vpaddq		$M4,$D0,$D0
+	vpsllq		\$2,$M4,$M4
+	vpaddq		$M4,$D0,$D0		# d4 -> d0
+
+	vpsrlq		\$26,$D2,$M2
+	vpandq		$MASK,$D2,$D2
+	vpaddq		$M2,$D3,$D3		# d2 -> d3
+
+	vpsrlq		\$26,$D0,$M0
+	vpandq		$MASK,$D0,$D0
+	vpaddq		$M0,$D1,$D1		# d0 -> d1
+
+	vpsrlq		\$26,$D3,$M3
+	vpandq		$MASK,$D3,$D3
+	vpaddq		$M3,$D4,$D4		# d3 -> d4
+
+	################################################################
+	# at this point we have 14243444 in $R0-$S4 and 05060708 in
+	# $D0-$D4, ...
+
+	vpunpcklqdq	$T4,$T3,$T0	# transpose input
+	vpunpckhqdq	$T4,$T3,$T4
+
+	# ... since input 64-bit lanes are ordered as 73625140, we could
+	# "vperm" it to 76543210 (here and in each loop iteration), *or*
+	# we could just flow along, hence the goal for $R0-$S4 is
+	# 1858286838784888 ...
+
+	vmovdqa32	128(%rcx),$M0		# .Lpermd_avx512:
+	mov		\$0x7777,%eax
+	kmovw		%eax,%k1
+
+	vpermd		$R0,$M0,$R0		# 14243444 -> 1---2---3---4---
+	vpermd		$R1,$M0,$R1
+	vpermd		$R2,$M0,$R2
+	vpermd		$R3,$M0,$R3
+	vpermd		$R4,$M0,$R4
+
+	vpermd		$D0,$M0,${R0}{%k1}	# 05060708 -> 1858286838784888
+	vpermd		$D1,$M0,${R1}{%k1}
+	vpermd		$D2,$M0,${R2}{%k1}
+	vpermd		$D3,$M0,${R3}{%k1}
+	vpermd		$D4,$M0,${R4}{%k1}
+
+	vpslld		\$2,$R1,$S1		# *5
+	vpslld		\$2,$R2,$S2
+	vpslld		\$2,$R3,$S3
+	vpslld		\$2,$R4,$S4
+	vpaddd		$R1,$S1,$S1
+	vpaddd		$R2,$S2,$S2
+	vpaddd		$R3,$S3,$S3
+	vpaddd		$R4,$S4,$S4
+
+	vpbroadcastq	32(%rcx),$PADBIT	# .L129
+
+	vpsrlq		\$52,$T0,$T2		# splat input
+	vpsllq		\$12,$T4,$T3
+	vporq		$T3,$T2,$T2
+	vpsrlq		\$26,$T0,$T1
+	vpsrlq		\$14,$T4,$T3
+	vpsrlq		\$40,$T4,$T4		# 4
+	vpandq		$MASK,$T2,$T2		# 2
+	vpandq		$MASK,$T0,$T0		# 0
+	#vpandq		$MASK,$T1,$T1		# 1
+	#vpandq		$MASK,$T3,$T3		# 3
+	#vporq		$PADBIT,$T4,$T4		# padbit, yes, always
+
+	vpaddq		$H2,$T2,$H2		# accumulate input
+	sub		\$192,$len
+	jbe		.Ltail_avx512
+	jmp		.Loop_avx512
+
+.align	32
+.Loop_avx512:
+	################################################################
+	# ((inp[0]*r^8+inp[ 8])*r^8+inp[16])*r^8
+	# ((inp[1]*r^8+inp[ 9])*r^8+inp[17])*r^7
+	# ((inp[2]*r^8+inp[10])*r^8+inp[18])*r^6
+	# ((inp[3]*r^8+inp[11])*r^8+inp[19])*r^5
+	# ((inp[4]*r^8+inp[12])*r^8+inp[20])*r^4
+	# ((inp[5]*r^8+inp[13])*r^8+inp[21])*r^3
+	# ((inp[6]*r^8+inp[14])*r^8+inp[22])*r^2
+	# ((inp[7]*r^8+inp[15])*r^8+inp[23])*r^1
+	#   \________/\___________/
+	################################################################
+	#vpaddq		$H2,$T2,$H2		# accumulate input
+
+	# d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+	# d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	# d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	# d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	# d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+	#
+	# however, as h2 is "chronologically" first one available pull
+	# corresponding operations up, so it's
+	#
+	# d3 = h2*r1   + h0*r3 + h1*r2   + h3*r0 + h4*5*r4
+	# d4 = h2*r2   + h0*r4 + h1*r3   + h3*r1 + h4*r0
+	# d0 = h2*5*r3 + h0*r0 + h1*5*r4         + h3*5*r2 + h4*5*r1
+	# d1 = h2*5*r4 + h0*r1           + h1*r0 + h3*5*r3 + h4*5*r2
+	# d2 = h2*r0           + h0*r2   + h1*r1 + h3*5*r4 + h4*5*r3
+
+	vpmuludq	$H2,$R1,$D3		# d3 = h2*r1
+	 vpaddq		$H0,$T0,$H0
+	vpmuludq	$H2,$R2,$D4		# d4 = h2*r2
+	 vpandq		$MASK,$T1,$T1		# 1
+	vpmuludq	$H2,$S3,$D0		# d0 = h2*s3
+	 vpandq		$MASK,$T3,$T3		# 3
+	vpmuludq	$H2,$S4,$D1		# d1 = h2*s4
+	 vporq		$PADBIT,$T4,$T4		# padbit, yes, always
+	vpmuludq	$H2,$R0,$D2		# d2 = h2*r0
+	 vpaddq		$H1,$T1,$H1		# accumulate input
+	 vpaddq		$H3,$T3,$H3
+	 vpaddq		$H4,$T4,$H4
+
+	  vmovdqu64	16*0($inp),$T3		# load input
+	  vmovdqu64	16*4($inp),$T4
+	  lea		16*8($inp),$inp
+	vpmuludq	$H0,$R3,$M3
+	vpmuludq	$H0,$R4,$M4
+	vpmuludq	$H0,$R0,$M0
+	vpmuludq	$H0,$R1,$M1
+	vpaddq		$M3,$D3,$D3		# d3 += h0*r3
+	vpaddq		$M4,$D4,$D4		# d4 += h0*r4
+	vpaddq		$M0,$D0,$D0		# d0 += h0*r0
+	vpaddq		$M1,$D1,$D1		# d1 += h0*r1
+
+	vpmuludq	$H1,$R2,$M3
+	vpmuludq	$H1,$R3,$M4
+	vpmuludq	$H1,$S4,$M0
+	vpmuludq	$H0,$R2,$M2
+	vpaddq		$M3,$D3,$D3		# d3 += h1*r2
+	vpaddq		$M4,$D4,$D4		# d4 += h1*r3
+	vpaddq		$M0,$D0,$D0		# d0 += h1*s4
+	vpaddq		$M2,$D2,$D2		# d2 += h0*r2
+
+	  vpunpcklqdq	$T4,$T3,$T0		# transpose input
+	  vpunpckhqdq	$T4,$T3,$T4
+
+	vpmuludq	$H3,$R0,$M3
+	vpmuludq	$H3,$R1,$M4
+	vpmuludq	$H1,$R0,$M1
+	vpmuludq	$H1,$R1,$M2
+	vpaddq		$M3,$D3,$D3		# d3 += h3*r0
+	vpaddq		$M4,$D4,$D4		# d4 += h3*r1
+	vpaddq		$M1,$D1,$D1		# d1 += h1*r0
+	vpaddq		$M2,$D2,$D2		# d2 += h1*r1
+
+	vpmuludq	$H4,$S4,$M3
+	vpmuludq	$H4,$R0,$M4
+	vpmuludq	$H3,$S2,$M0
+	vpmuludq	$H3,$S3,$M1
+	vpaddq		$M3,$D3,$D3		# d3 += h4*s4
+	vpmuludq	$H3,$S4,$M2
+	vpaddq		$M4,$D4,$D4		# d4 += h4*r0
+	vpaddq		$M0,$D0,$D0		# d0 += h3*s2
+	vpaddq		$M1,$D1,$D1		# d1 += h3*s3
+	vpaddq		$M2,$D2,$D2		# d2 += h3*s4
+
+	vpmuludq	$H4,$S1,$M0
+	vpmuludq	$H4,$S2,$M1
+	vpmuludq	$H4,$S3,$M2
+	vpaddq		$M0,$D0,$H0		# h0 = d0 + h4*s1
+	vpaddq		$M1,$D1,$H1		# h1 = d2 + h4*s2
+	vpaddq		$M2,$D2,$H2		# h2 = d3 + h4*s3
+
+	################################################################
+	# lazy reduction (interleaved with input splat)
+
+	 vpsrlq		\$52,$T0,$T2		# splat input
+	 vpsllq		\$12,$T4,$T3
+
+	vpsrlq		\$26,$D3,$H3
+	vpandq		$MASK,$D3,$D3
+	vpaddq		$H3,$D4,$H4		# h3 -> h4
+
+	 vporq		$T3,$T2,$T2
+
+	vpsrlq		\$26,$H0,$D0
+	vpandq		$MASK,$H0,$H0
+	vpaddq		$D0,$H1,$H1		# h0 -> h1
+
+	 vpandq		$MASK,$T2,$T2		# 2
+
+	vpsrlq		\$26,$H4,$D4
+	vpandq		$MASK,$H4,$H4
+
+	vpsrlq		\$26,$H1,$D1
+	vpandq		$MASK,$H1,$H1
+	vpaddq		$D1,$H2,$H2		# h1 -> h2
+
+	vpaddq		$D4,$H0,$H0
+	vpsllq		\$2,$D4,$D4
+	vpaddq		$D4,$H0,$H0		# h4 -> h0
+
+	 vpaddq		$T2,$H2,$H2		# modulo-scheduled
+	 vpsrlq		\$26,$T0,$T1
+
+	vpsrlq		\$26,$H2,$D2
+	vpandq		$MASK,$H2,$H2
+	vpaddq		$D2,$D3,$H3		# h2 -> h3
+
+	 vpsrlq		\$14,$T4,$T3
+
+	vpsrlq		\$26,$H0,$D0
+	vpandq		$MASK,$H0,$H0
+	vpaddq		$D0,$H1,$H1		# h0 -> h1
+
+	 vpsrlq		\$40,$T4,$T4		# 4
+
+	vpsrlq		\$26,$H3,$D3
+	vpandq		$MASK,$H3,$H3
+	vpaddq		$D3,$H4,$H4		# h3 -> h4
+
+	 vpandq		$MASK,$T0,$T0		# 0
+	 #vpandq	$MASK,$T1,$T1		# 1
+	 #vpandq	$MASK,$T3,$T3		# 3
+	 #vporq		$PADBIT,$T4,$T4		# padbit, yes, always
+
+	sub		\$128,$len
+	ja		.Loop_avx512
+
+.Ltail_avx512:
+	################################################################
+	# while above multiplications were by r^8 in all lanes, in last
+	# iteration we multiply least significant lane by r^8 and most
+	# significant one by r, that's why table gets shifted...
+
+	vpsrlq		\$32,$R0,$R0		# 0105020603070408
+	vpsrlq		\$32,$R1,$R1
+	vpsrlq		\$32,$R2,$R2
+	vpsrlq		\$32,$S3,$S3
+	vpsrlq		\$32,$S4,$S4
+	vpsrlq		\$32,$R3,$R3
+	vpsrlq		\$32,$R4,$R4
+	vpsrlq		\$32,$S1,$S1
+	vpsrlq		\$32,$S2,$S2
+
+	################################################################
+	# load either next or last 64 byte of input
+	lea		($inp,$len),$inp
+
+	#vpaddq		$H2,$T2,$H2		# accumulate input
+	vpaddq		$H0,$T0,$H0
+
+	vpmuludq	$H2,$R1,$D3		# d3 = h2*r1
+	vpmuludq	$H2,$R2,$D4		# d4 = h2*r2
+	vpmuludq	$H2,$S3,$D0		# d0 = h2*s3
+	 vpandq		$MASK,$T1,$T1		# 1
+	vpmuludq	$H2,$S4,$D1		# d1 = h2*s4
+	 vpandq		$MASK,$T3,$T3		# 3
+	vpmuludq	$H2,$R0,$D2		# d2 = h2*r0
+	 vporq		$PADBIT,$T4,$T4		# padbit, yes, always
+	 vpaddq		$H1,$T1,$H1		# accumulate input
+	 vpaddq		$H3,$T3,$H3
+	 vpaddq		$H4,$T4,$H4
+
+	  vmovdqu	16*0($inp),%x#$T0
+	vpmuludq	$H0,$R3,$M3
+	vpmuludq	$H0,$R4,$M4
+	vpmuludq	$H0,$R0,$M0
+	vpmuludq	$H0,$R1,$M1
+	vpaddq		$M3,$D3,$D3		# d3 += h0*r3
+	vpaddq		$M4,$D4,$D4		# d4 += h0*r4
+	vpaddq		$M0,$D0,$D0		# d0 += h0*r0
+	vpaddq		$M1,$D1,$D1		# d1 += h0*r1
+
+	  vmovdqu	16*1($inp),%x#$T1
+	vpmuludq	$H1,$R2,$M3
+	vpmuludq	$H1,$R3,$M4
+	vpmuludq	$H1,$S4,$M0
+	vpmuludq	$H0,$R2,$M2
+	vpaddq		$M3,$D3,$D3		# d3 += h1*r2
+	vpaddq		$M4,$D4,$D4		# d4 += h1*r3
+	vpaddq		$M0,$D0,$D0		# d0 += h1*s4
+	vpaddq		$M2,$D2,$D2		# d2 += h0*r2
+
+	  vinserti128	\$1,16*2($inp),%y#$T0,%y#$T0
+	vpmuludq	$H3,$R0,$M3
+	vpmuludq	$H3,$R1,$M4
+	vpmuludq	$H1,$R0,$M1
+	vpmuludq	$H1,$R1,$M2
+	vpaddq		$M3,$D3,$D3		# d3 += h3*r0
+	vpaddq		$M4,$D4,$D4		# d4 += h3*r1
+	vpaddq		$M1,$D1,$D1		# d1 += h1*r0
+	vpaddq		$M2,$D2,$D2		# d2 += h1*r1
+
+	  vinserti128	\$1,16*3($inp),%y#$T1,%y#$T1
+	vpmuludq	$H4,$S4,$M3
+	vpmuludq	$H4,$R0,$M4
+	vpmuludq	$H3,$S2,$M0
+	vpmuludq	$H3,$S3,$M1
+	vpmuludq	$H3,$S4,$M2
+	vpaddq		$M3,$D3,$H3		# h3 = d3 + h4*s4
+	vpaddq		$M4,$D4,$D4		# d4 += h4*r0
+	vpaddq		$M0,$D0,$D0		# d0 += h3*s2
+	vpaddq		$M1,$D1,$D1		# d1 += h3*s3
+	vpaddq		$M2,$D2,$D2		# d2 += h3*s4
+
+	vpmuludq	$H4,$S1,$M0
+	vpmuludq	$H4,$S2,$M1
+	vpmuludq	$H4,$S3,$M2
+	vpaddq		$M0,$D0,$H0		# h0 = d0 + h4*s1
+	vpaddq		$M1,$D1,$H1		# h1 = d2 + h4*s2
+	vpaddq		$M2,$D2,$H2		# h2 = d3 + h4*s3
+
+	################################################################
+	# horizontal addition
+
+	mov		\$1,%eax
+	vpermq		\$0xb1,$H3,$D3
+	vpermq		\$0xb1,$D4,$H4
+	vpermq		\$0xb1,$H0,$D0
+	vpermq		\$0xb1,$H1,$D1
+	vpermq		\$0xb1,$H2,$D2
+	vpaddq		$D3,$H3,$H3
+	vpaddq		$D4,$H4,$H4
+	vpaddq		$D0,$H0,$H0
+	vpaddq		$D1,$H1,$H1
+	vpaddq		$D2,$H2,$H2
+
+	kmovw		%eax,%k3
+	vpermq		\$0x2,$H3,$D3
+	vpermq		\$0x2,$H4,$D4
+	vpermq		\$0x2,$H0,$D0
+	vpermq		\$0x2,$H1,$D1
+	vpermq		\$0x2,$H2,$D2
+	vpaddq		$D3,$H3,$H3
+	vpaddq		$D4,$H4,$H4
+	vpaddq		$D0,$H0,$H0
+	vpaddq		$D1,$H1,$H1
+	vpaddq		$D2,$H2,$H2
+
+	vextracti64x4	\$0x1,$H3,%y#$D3
+	vextracti64x4	\$0x1,$H4,%y#$D4
+	vextracti64x4	\$0x1,$H0,%y#$D0
+	vextracti64x4	\$0x1,$H1,%y#$D1
+	vextracti64x4	\$0x1,$H2,%y#$D2
+	vpaddq		$D3,$H3,${H3}{%k3}{z}	# keep single qword in case
+	vpaddq		$D4,$H4,${H4}{%k3}{z}	# it's passed to .Ltail_avx2
+	vpaddq		$D0,$H0,${H0}{%k3}{z}
+	vpaddq		$D1,$H1,${H1}{%k3}{z}
+	vpaddq		$D2,$H2,${H2}{%k3}{z}
+___
+map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT));
+map(s/%z/%y/,($H0,$H1,$H2,$H3,$H4, $D0,$D1,$D2,$D3,$D4, $MASK));
+$code.=<<___;
+	################################################################
+	# lazy reduction (interleaved with input splat)
+
+	vpsrlq		\$26,$H3,$D3
+	vpand		$MASK,$H3,$H3
+	 vpsrldq	\$6,$T0,$T2		# splat input
+	 vpsrldq	\$6,$T1,$T3
+	 vpunpckhqdq	$T1,$T0,$T4		# 4
+	vpaddq		$D3,$H4,$H4		# h3 -> h4
+
+	vpsrlq		\$26,$H0,$D0
+	vpand		$MASK,$H0,$H0
+	 vpunpcklqdq	$T3,$T2,$T2		# 2:3
+	 vpunpcklqdq	$T1,$T0,$T0		# 0:1
+	vpaddq		$D0,$H1,$H1		# h0 -> h1
+
+	vpsrlq		\$26,$H4,$D4
+	vpand		$MASK,$H4,$H4
+
+	vpsrlq		\$26,$H1,$D1
+	vpand		$MASK,$H1,$H1
+	 vpsrlq		\$30,$T2,$T3
+	 vpsrlq		\$4,$T2,$T2
+	vpaddq		$D1,$H2,$H2		# h1 -> h2
+
+	vpaddq		$D4,$H0,$H0
+	vpsllq		\$2,$D4,$D4
+	 vpsrlq		\$26,$T0,$T1
+	 vpsrlq		\$40,$T4,$T4		# 4
+	vpaddq		$D4,$H0,$H0		# h4 -> h0
+
+	vpsrlq		\$26,$H2,$D2
+	vpand		$MASK,$H2,$H2
+	 vpand		$MASK,$T2,$T2		# 2
+	 vpand		$MASK,$T0,$T0		# 0
+	vpaddq		$D2,$H3,$H3		# h2 -> h3
+
+	vpsrlq		\$26,$H0,$D0
+	vpand		$MASK,$H0,$H0
+	 vpaddq		$H2,$T2,$H2		# accumulate input for .Ltail_avx2
+	 vpand		$MASK,$T1,$T1		# 1
+	vpaddq		$D0,$H1,$H1		# h0 -> h1
+
+	vpsrlq		\$26,$H3,$D3
+	vpand		$MASK,$H3,$H3
+	 vpand		$MASK,$T3,$T3		# 3
+	 vpor		32(%rcx),$T4,$T4	# padbit, yes, always
+	vpaddq		$D3,$H4,$H4		# h3 -> h4
+
+	lea		0x90(%rsp),%rax		# size optimization for .Ltail_avx2
+	add		\$64,$len
+	jnz		.Ltail_avx2$suffix
+
+	vpsubq		$T2,$H2,$H2		# undo input accumulation
+	vmovd		%x#$H0,`4*0-48-64`($ctx)# save partially reduced
+	vmovd		%x#$H1,`4*1-48-64`($ctx)
+	vmovd		%x#$H2,`4*2-48-64`($ctx)
+	vmovd		%x#$H3,`4*3-48-64`($ctx)
+	vmovd		%x#$H4,`4*4-48-64`($ctx)
+	vzeroall
+___
+$code.=<<___	if ($win64);
+	movdqa		-0xb0(%r10),%xmm6
+	movdqa		-0xa0(%r10),%xmm7
+	movdqa		-0x90(%r10),%xmm8
+	movdqa		-0x80(%r10),%xmm9
+	movdqa		-0x70(%r10),%xmm10
+	movdqa		-0x60(%r10),%xmm11
+	movdqa		-0x50(%r10),%xmm12
+	movdqa		-0x40(%r10),%xmm13
+	movdqa		-0x30(%r10),%xmm14
+	movdqa		-0x20(%r10),%xmm15
+	lea		-8(%r10),%rsp
+.Ldo_avx512_epilogue:
+___
+$code.=<<___	if (!$win64);
+	lea		-8(%r10),%rsp
+.cfi_def_cfa_register	%rsp
+___
+$code.=<<___;
+	ret
+.cfi_endproc
+___
+
+}
+
+}
+
+&declare_function("poly1305_blocks_avx2", 32, 4);
+poly1305_blocks_avxN(0);
+&end_function("poly1305_blocks_avx2");
+
+if($kernel) {
+	$code .= "#endif\n";
+}
+
+#######################################################################
+if ($avx>2) {
+# On entry we have input length divisible by 64. But since inner loop
+# processes 128 bytes per iteration, cases when length is not divisible
+# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this
+# reason stack layout is kept identical to poly1305_blocks_avx2. If not
+# for this tail, we wouldn't have to even allocate stack frame...
+
+if($kernel) {
+	$code .= "#ifdef CONFIG_AS_AVX512\n";
+}
+
+&declare_function("poly1305_blocks_avx512", 32, 4);
+poly1305_blocks_avxN(1);
+&end_function("poly1305_blocks_avx512");
+
+if ($kernel) {
+	$code .= "#endif\n";
+}
+
+if (!$kernel && $avx>3) {
+########################################################################
+# VPMADD52 version using 2^44 radix.
+#
+# One can argue that base 2^52 would be more natural. Well, even though
+# some operations would be more natural, one has to recognize couple of
+# things. Base 2^52 doesn't provide advantage over base 2^44 if you look
+# at amount of multiply-n-accumulate operations. Secondly, it makes it
+# impossible to pre-compute multiples of 5 [referred to as s[]/sN in
+# reference implementations], which means that more such operations
+# would have to be performed in inner loop, which in turn makes critical
+# path longer. In other words, even though base 2^44 reduction might
+# look less elegant, overall critical path is actually shorter...
+
+########################################################################
+# Layout of opaque area is following.
+#
+#	unsigned __int64 h[3];		# current hash value base 2^44
+#	unsigned __int64 s[2];		# key value*20 base 2^44
+#	unsigned __int64 r[3];		# key value base 2^44
+#	struct { unsigned __int64 r^1, r^3, r^2, r^4; } R[4];
+#					# r^n positions reflect
+#					# placement in register, not
+#					# memory, R[3] is R[1]*20
+
+$code.=<<___;
+.type	poly1305_init_base2_44,\@function,3
+.align	32
+poly1305_init_base2_44:
+	xor	%rax,%rax
+	mov	%rax,0($ctx)		# initialize hash value
+	mov	%rax,8($ctx)
+	mov	%rax,16($ctx)
+
+.Linit_base2_44:
+	lea	poly1305_blocks_vpmadd52(%rip),%r10
+	lea	poly1305_emit_base2_44(%rip),%r11
+
+	mov	\$0x0ffffffc0fffffff,%rax
+	mov	\$0x0ffffffc0ffffffc,%rcx
+	and	0($inp),%rax
+	mov	\$0x00000fffffffffff,%r8
+	and	8($inp),%rcx
+	mov	\$0x00000fffffffffff,%r9
+	and	%rax,%r8
+	shrd	\$44,%rcx,%rax
+	mov	%r8,40($ctx)		# r0
+	and	%r9,%rax
+	shr	\$24,%rcx
+	mov	%rax,48($ctx)		# r1
+	lea	(%rax,%rax,4),%rax	# *5
+	mov	%rcx,56($ctx)		# r2
+	shl	\$2,%rax		# magic <<2
+	lea	(%rcx,%rcx,4),%rcx	# *5
+	shl	\$2,%rcx		# magic <<2
+	mov	%rax,24($ctx)		# s1
+	mov	%rcx,32($ctx)		# s2
+	movq	\$-1,64($ctx)		# write impossible value
+___
+$code.=<<___	if ($flavour !~ /elf32/);
+	mov	%r10,0(%rdx)
+	mov	%r11,8(%rdx)
+___
+$code.=<<___	if ($flavour =~ /elf32/);
+	mov	%r10d,0(%rdx)
+	mov	%r11d,4(%rdx)
+___
+$code.=<<___;
+	mov	\$1,%eax
+	ret
+.size	poly1305_init_base2_44,.-poly1305_init_base2_44
+___
+{
+my ($H0,$H1,$H2,$r2r1r0,$r1r0s2,$r0s2s1,$Dlo,$Dhi) = map("%ymm$_",(0..5,16,17));
+my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21));
+my ($reduc_mask,$reduc_rght,$reduc_left) = map("%ymm$_",(22..25));
+
+$code.=<<___;
+.type	poly1305_blocks_vpmadd52,\@function,4
+.align	32
+poly1305_blocks_vpmadd52:
+	shr	\$4,$len
+	jz	.Lno_data_vpmadd52		# too short
+
+	shl	\$40,$padbit
+	mov	64($ctx),%r8			# peek on power of the key
+
+	# if powers of the key are not calculated yet, process up to 3
+	# blocks with this single-block subroutine, otherwise ensure that
+	# length is divisible by 2 blocks and pass the rest down to next
+	# subroutine...
+
+	mov	\$3,%rax
+	mov	\$1,%r10
+	cmp	\$4,$len			# is input long
+	cmovae	%r10,%rax
+	test	%r8,%r8				# is power value impossible?
+	cmovns	%r10,%rax
+
+	and	$len,%rax			# is input of favourable length?
+	jz	.Lblocks_vpmadd52_4x
+
+	sub		%rax,$len
+	mov		\$7,%r10d
+	mov		\$1,%r11d
+	kmovw		%r10d,%k7
+	lea		.L2_44_inp_permd(%rip),%r10
+	kmovw		%r11d,%k1
+
+	vmovq		$padbit,%x#$PAD
+	vmovdqa64	0(%r10),$inp_permd	# .L2_44_inp_permd
+	vmovdqa64	32(%r10),$inp_shift	# .L2_44_inp_shift
+	vpermq		\$0xcf,$PAD,$PAD
+	vmovdqa64	64(%r10),$reduc_mask	# .L2_44_mask
+
+	vmovdqu64	0($ctx),${Dlo}{%k7}{z}		# load hash value
+	vmovdqu64	40($ctx),${r2r1r0}{%k7}{z}	# load keys
+	vmovdqu64	32($ctx),${r1r0s2}{%k7}{z}
+	vmovdqu64	24($ctx),${r0s2s1}{%k7}{z}
+
+	vmovdqa64	96(%r10),$reduc_rght	# .L2_44_shift_rgt
+	vmovdqa64	128(%r10),$reduc_left	# .L2_44_shift_lft
+
+	jmp		.Loop_vpmadd52
+
+.align	32
+.Loop_vpmadd52:
+	vmovdqu32	0($inp),%x#$T0		# load input as ----3210
+	lea		16($inp),$inp
+
+	vpermd		$T0,$inp_permd,$T0	# ----3210 -> --322110
+	vpsrlvq		$inp_shift,$T0,$T0
+	vpandq		$reduc_mask,$T0,$T0
+	vporq		$PAD,$T0,$T0
+
+	vpaddq		$T0,$Dlo,$Dlo		# accumulate input
+
+	vpermq		\$0,$Dlo,${H0}{%k7}{z}	# smash hash value
+	vpermq		\$0b01010101,$Dlo,${H1}{%k7}{z}
+	vpermq		\$0b10101010,$Dlo,${H2}{%k7}{z}
+
+	vpxord		$Dlo,$Dlo,$Dlo
+	vpxord		$Dhi,$Dhi,$Dhi
+
+	vpmadd52luq	$r2r1r0,$H0,$Dlo
+	vpmadd52huq	$r2r1r0,$H0,$Dhi
+
+	vpmadd52luq	$r1r0s2,$H1,$Dlo
+	vpmadd52huq	$r1r0s2,$H1,$Dhi
+
+	vpmadd52luq	$r0s2s1,$H2,$Dlo
+	vpmadd52huq	$r0s2s1,$H2,$Dhi
+
+	vpsrlvq		$reduc_rght,$Dlo,$T0	# 0 in topmost qword
+	vpsllvq		$reduc_left,$Dhi,$Dhi	# 0 in topmost qword
+	vpandq		$reduc_mask,$Dlo,$Dlo
+
+	vpaddq		$T0,$Dhi,$Dhi
+
+	vpermq		\$0b10010011,$Dhi,$Dhi	# 0 in lowest qword
+
+	vpaddq		$Dhi,$Dlo,$Dlo		# note topmost qword :-)
+
+	vpsrlvq		$reduc_rght,$Dlo,$T0	# 0 in topmost word
+	vpandq		$reduc_mask,$Dlo,$Dlo
+
+	vpermq		\$0b10010011,$T0,$T0
+
+	vpaddq		$T0,$Dlo,$Dlo
+
+	vpermq		\$0b10010011,$Dlo,${T0}{%k1}{z}
+
+	vpaddq		$T0,$Dlo,$Dlo
+	vpsllq		\$2,$T0,$T0
+
+	vpaddq		$T0,$Dlo,$Dlo
+
+	dec		%rax			# len-=16
+	jnz		.Loop_vpmadd52
+
+	vmovdqu64	$Dlo,0($ctx){%k7}	# store hash value
+
+	test		$len,$len
+	jnz		.Lblocks_vpmadd52_4x
+
+.Lno_data_vpmadd52:
+	ret
+.size	poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52
+___
+}
+{
+########################################################################
+# As implied by its name 4x subroutine processes 4 blocks in parallel
+# (but handles even 4*n+2 blocks lengths). It takes up to 4th key power
+# and is handled in 256-bit %ymm registers.
+
+my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
+my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23));
+my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
+
+$code.=<<___;
+.type	poly1305_blocks_vpmadd52_4x,\@function,4
+.align	32
+poly1305_blocks_vpmadd52_4x:
+	shr	\$4,$len
+	jz	.Lno_data_vpmadd52_4x		# too short
+
+	shl	\$40,$padbit
+	mov	64($ctx),%r8			# peek on power of the key
+
+.Lblocks_vpmadd52_4x:
+	vpbroadcastq	$padbit,$PAD
+
+	vmovdqa64	.Lx_mask44(%rip),$mask44
+	mov		\$5,%eax
+	vmovdqa64	.Lx_mask42(%rip),$mask42
+	kmovw		%eax,%k1		# used in 2x path
+
+	test		%r8,%r8			# is power value impossible?
+	js		.Linit_vpmadd52		# if it is, then init R[4]
+
+	vmovq		0($ctx),%x#$H0		# load current hash value
+	vmovq		8($ctx),%x#$H1
+	vmovq		16($ctx),%x#$H2
+
+	test		\$3,$len		# is length 4*n+2?
+	jnz		.Lblocks_vpmadd52_2x_do
+
+.Lblocks_vpmadd52_4x_do:
+	vpbroadcastq	64($ctx),$R0		# load 4th power of the key
+	vpbroadcastq	96($ctx),$R1
+	vpbroadcastq	128($ctx),$R2
+	vpbroadcastq	160($ctx),$S1
+
+.Lblocks_vpmadd52_4x_key_loaded:
+	vpsllq		\$2,$R2,$S2		# S2 = R2*5*4
+	vpaddq		$R2,$S2,$S2
+	vpsllq		\$2,$S2,$S2
+
+	test		\$7,$len		# is len 8*n?
+	jz		.Lblocks_vpmadd52_8x
+
+	vmovdqu64	16*0($inp),$T2		# load data
+	vmovdqu64	16*2($inp),$T3
+	lea		16*4($inp),$inp
+
+	vpunpcklqdq	$T3,$T2,$T1		# transpose data
+	vpunpckhqdq	$T3,$T2,$T3
+
+	# at this point 64-bit lanes are ordered as 3-1-2-0
+
+	vpsrlq		\$24,$T3,$T2		# splat the data
+	vporq		$PAD,$T2,$T2
+	 vpaddq		$T2,$H2,$H2		# accumulate input
+	vpandq		$mask44,$T1,$T0
+	vpsrlq		\$44,$T1,$T1
+	vpsllq		\$20,$T3,$T3
+	vporq		$T3,$T1,$T1
+	vpandq		$mask44,$T1,$T1
+
+	sub		\$4,$len
+	jz		.Ltail_vpmadd52_4x
+	jmp		.Loop_vpmadd52_4x
+	ud2
+
+.align	32
+.Linit_vpmadd52:
+	vmovq		24($ctx),%x#$S1		# load key
+	vmovq		56($ctx),%x#$H2
+	vmovq		32($ctx),%x#$S2
+	vmovq		40($ctx),%x#$R0
+	vmovq		48($ctx),%x#$R1
+
+	vmovdqa		$R0,$H0
+	vmovdqa		$R1,$H1
+	vmovdqa		$H2,$R2
+
+	mov		\$2,%eax
+
+.Lmul_init_vpmadd52:
+	vpxorq		$D0lo,$D0lo,$D0lo
+	vpmadd52luq	$H2,$S1,$D0lo
+	vpxorq		$D0hi,$D0hi,$D0hi
+	vpmadd52huq	$H2,$S1,$D0hi
+	vpxorq		$D1lo,$D1lo,$D1lo
+	vpmadd52luq	$H2,$S2,$D1lo
+	vpxorq		$D1hi,$D1hi,$D1hi
+	vpmadd52huq	$H2,$S2,$D1hi
+	vpxorq		$D2lo,$D2lo,$D2lo
+	vpmadd52luq	$H2,$R0,$D2lo
+	vpxorq		$D2hi,$D2hi,$D2hi
+	vpmadd52huq	$H2,$R0,$D2hi
+
+	vpmadd52luq	$H0,$R0,$D0lo
+	vpmadd52huq	$H0,$R0,$D0hi
+	vpmadd52luq	$H0,$R1,$D1lo
+	vpmadd52huq	$H0,$R1,$D1hi
+	vpmadd52luq	$H0,$R2,$D2lo
+	vpmadd52huq	$H0,$R2,$D2hi
+
+	vpmadd52luq	$H1,$S2,$D0lo
+	vpmadd52huq	$H1,$S2,$D0hi
+	vpmadd52luq	$H1,$R0,$D1lo
+	vpmadd52huq	$H1,$R0,$D1hi
+	vpmadd52luq	$H1,$R1,$D2lo
+	vpmadd52huq	$H1,$R1,$D2hi
+
+	################################################################
+	# partial reduction
+	vpsrlq		\$44,$D0lo,$tmp
+	vpsllq		\$8,$D0hi,$D0hi
+	vpandq		$mask44,$D0lo,$H0
+	vpaddq		$tmp,$D0hi,$D0hi
+
+	vpaddq		$D0hi,$D1lo,$D1lo
+
+	vpsrlq		\$44,$D1lo,$tmp
+	vpsllq		\$8,$D1hi,$D1hi
+	vpandq		$mask44,$D1lo,$H1
+	vpaddq		$tmp,$D1hi,$D1hi
+
+	vpaddq		$D1hi,$D2lo,$D2lo
+
+	vpsrlq		\$42,$D2lo,$tmp
+	vpsllq		\$10,$D2hi,$D2hi
+	vpandq		$mask42,$D2lo,$H2
+	vpaddq		$tmp,$D2hi,$D2hi
+
+	vpaddq		$D2hi,$H0,$H0
+	vpsllq		\$2,$D2hi,$D2hi
+
+	vpaddq		$D2hi,$H0,$H0
+
+	vpsrlq		\$44,$H0,$tmp		# additional step
+	vpandq		$mask44,$H0,$H0
+
+	vpaddq		$tmp,$H1,$H1
+
+	dec		%eax
+	jz		.Ldone_init_vpmadd52
+
+	vpunpcklqdq	$R1,$H1,$R1		# 1,2
+	vpbroadcastq	%x#$H1,%x#$H1		# 2,2
+	vpunpcklqdq	$R2,$H2,$R2
+	vpbroadcastq	%x#$H2,%x#$H2
+	vpunpcklqdq	$R0,$H0,$R0
+	vpbroadcastq	%x#$H0,%x#$H0
+
+	vpsllq		\$2,$R1,$S1		# S1 = R1*5*4
+	vpsllq		\$2,$R2,$S2		# S2 = R2*5*4
+	vpaddq		$R1,$S1,$S1
+	vpaddq		$R2,$S2,$S2
+	vpsllq		\$2,$S1,$S1
+	vpsllq		\$2,$S2,$S2
+
+	jmp		.Lmul_init_vpmadd52
+	ud2
+
+.align	32
+.Ldone_init_vpmadd52:
+	vinserti128	\$1,%x#$R1,$H1,$R1	# 1,2,3,4
+	vinserti128	\$1,%x#$R2,$H2,$R2
+	vinserti128	\$1,%x#$R0,$H0,$R0
+
+	vpermq		\$0b11011000,$R1,$R1	# 1,3,2,4
+	vpermq		\$0b11011000,$R2,$R2
+	vpermq		\$0b11011000,$R0,$R0
+
+	vpsllq		\$2,$R1,$S1		# S1 = R1*5*4
+	vpaddq		$R1,$S1,$S1
+	vpsllq		\$2,$S1,$S1
+
+	vmovq		0($ctx),%x#$H0		# load current hash value
+	vmovq		8($ctx),%x#$H1
+	vmovq		16($ctx),%x#$H2
+
+	test		\$3,$len		# is length 4*n+2?
+	jnz		.Ldone_init_vpmadd52_2x
+
+	vmovdqu64	$R0,64($ctx)		# save key powers
+	vpbroadcastq	%x#$R0,$R0		# broadcast 4th power
+	vmovdqu64	$R1,96($ctx)
+	vpbroadcastq	%x#$R1,$R1
+	vmovdqu64	$R2,128($ctx)
+	vpbroadcastq	%x#$R2,$R2
+	vmovdqu64	$S1,160($ctx)
+	vpbroadcastq	%x#$S1,$S1
+
+	jmp		.Lblocks_vpmadd52_4x_key_loaded
+	ud2
+
+.align	32
+.Ldone_init_vpmadd52_2x:
+	vmovdqu64	$R0,64($ctx)		# save key powers
+	vpsrldq		\$8,$R0,$R0		# 0-1-0-2
+	vmovdqu64	$R1,96($ctx)
+	vpsrldq		\$8,$R1,$R1
+	vmovdqu64	$R2,128($ctx)
+	vpsrldq		\$8,$R2,$R2
+	vmovdqu64	$S1,160($ctx)
+	vpsrldq		\$8,$S1,$S1
+	jmp		.Lblocks_vpmadd52_2x_key_loaded
+	ud2
+
+.align	32
+.Lblocks_vpmadd52_2x_do:
+	vmovdqu64	128+8($ctx),${R2}{%k1}{z}# load 2nd and 1st key powers
+	vmovdqu64	160+8($ctx),${S1}{%k1}{z}
+	vmovdqu64	64+8($ctx),${R0}{%k1}{z}
+	vmovdqu64	96+8($ctx),${R1}{%k1}{z}
+
+.Lblocks_vpmadd52_2x_key_loaded:
+	vmovdqu64	16*0($inp),$T2		# load data
+	vpxorq		$T3,$T3,$T3
+	lea		16*2($inp),$inp
+
+	vpunpcklqdq	$T3,$T2,$T1		# transpose data
+	vpunpckhqdq	$T3,$T2,$T3
+
+	# at this point 64-bit lanes are ordered as x-1-x-0
+
+	vpsrlq		\$24,$T3,$T2		# splat the data
+	vporq		$PAD,$T2,$T2
+	 vpaddq		$T2,$H2,$H2		# accumulate input
+	vpandq		$mask44,$T1,$T0
+	vpsrlq		\$44,$T1,$T1
+	vpsllq		\$20,$T3,$T3
+	vporq		$T3,$T1,$T1
+	vpandq		$mask44,$T1,$T1
+
+	jmp		.Ltail_vpmadd52_2x
+	ud2
+
+.align	32
+.Loop_vpmadd52_4x:
+	#vpaddq		$T2,$H2,$H2		# accumulate input
+	vpaddq		$T0,$H0,$H0
+	vpaddq		$T1,$H1,$H1
+
+	vpxorq		$D0lo,$D0lo,$D0lo
+	vpmadd52luq	$H2,$S1,$D0lo
+	vpxorq		$D0hi,$D0hi,$D0hi
+	vpmadd52huq	$H2,$S1,$D0hi
+	vpxorq		$D1lo,$D1lo,$D1lo
+	vpmadd52luq	$H2,$S2,$D1lo
+	vpxorq		$D1hi,$D1hi,$D1hi
+	vpmadd52huq	$H2,$S2,$D1hi
+	vpxorq		$D2lo,$D2lo,$D2lo
+	vpmadd52luq	$H2,$R0,$D2lo
+	vpxorq		$D2hi,$D2hi,$D2hi
+	vpmadd52huq	$H2,$R0,$D2hi
+
+	 vmovdqu64	16*0($inp),$T2		# load data
+	 vmovdqu64	16*2($inp),$T3
+	 lea		16*4($inp),$inp
+	vpmadd52luq	$H0,$R0,$D0lo
+	vpmadd52huq	$H0,$R0,$D0hi
+	vpmadd52luq	$H0,$R1,$D1lo
+	vpmadd52huq	$H0,$R1,$D1hi
+	vpmadd52luq	$H0,$R2,$D2lo
+	vpmadd52huq	$H0,$R2,$D2hi
+
+	 vpunpcklqdq	$T3,$T2,$T1		# transpose data
+	 vpunpckhqdq	$T3,$T2,$T3
+	vpmadd52luq	$H1,$S2,$D0lo
+	vpmadd52huq	$H1,$S2,$D0hi
+	vpmadd52luq	$H1,$R0,$D1lo
+	vpmadd52huq	$H1,$R0,$D1hi
+	vpmadd52luq	$H1,$R1,$D2lo
+	vpmadd52huq	$H1,$R1,$D2hi
+
+	################################################################
+	# partial reduction (interleaved with data splat)
+	vpsrlq		\$44,$D0lo,$tmp
+	vpsllq		\$8,$D0hi,$D0hi
+	vpandq		$mask44,$D0lo,$H0
+	vpaddq		$tmp,$D0hi,$D0hi
+
+	 vpsrlq		\$24,$T3,$T2
+	 vporq		$PAD,$T2,$T2
+	vpaddq		$D0hi,$D1lo,$D1lo
+
+	vpsrlq		\$44,$D1lo,$tmp
+	vpsllq		\$8,$D1hi,$D1hi
+	vpandq		$mask44,$D1lo,$H1
+	vpaddq		$tmp,$D1hi,$D1hi
+
+	 vpandq		$mask44,$T1,$T0
+	 vpsrlq		\$44,$T1,$T1
+	 vpsllq		\$20,$T3,$T3
+	vpaddq		$D1hi,$D2lo,$D2lo
+
+	vpsrlq		\$42,$D2lo,$tmp
+	vpsllq		\$10,$D2hi,$D2hi
+	vpandq		$mask42,$D2lo,$H2
+	vpaddq		$tmp,$D2hi,$D2hi
+
+	  vpaddq	$T2,$H2,$H2		# accumulate input
+	vpaddq		$D2hi,$H0,$H0
+	vpsllq		\$2,$D2hi,$D2hi
+
+	vpaddq		$D2hi,$H0,$H0
+	 vporq		$T3,$T1,$T1
+	 vpandq		$mask44,$T1,$T1
+
+	vpsrlq		\$44,$H0,$tmp		# additional step
+	vpandq		$mask44,$H0,$H0
+
+	vpaddq		$tmp,$H1,$H1
+
+	sub		\$4,$len		# len-=64
+	jnz		.Loop_vpmadd52_4x
+
+.Ltail_vpmadd52_4x:
+	vmovdqu64	128($ctx),$R2		# load all key powers
+	vmovdqu64	160($ctx),$S1
+	vmovdqu64	64($ctx),$R0
+	vmovdqu64	96($ctx),$R1
+
+.Ltail_vpmadd52_2x:
+	vpsllq		\$2,$R2,$S2		# S2 = R2*5*4
+	vpaddq		$R2,$S2,$S2
+	vpsllq		\$2,$S2,$S2
+
+	#vpaddq		$T2,$H2,$H2		# accumulate input
+	vpaddq		$T0,$H0,$H0
+	vpaddq		$T1,$H1,$H1
+
+	vpxorq		$D0lo,$D0lo,$D0lo
+	vpmadd52luq	$H2,$S1,$D0lo
+	vpxorq		$D0hi,$D0hi,$D0hi
+	vpmadd52huq	$H2,$S1,$D0hi
+	vpxorq		$D1lo,$D1lo,$D1lo
+	vpmadd52luq	$H2,$S2,$D1lo
+	vpxorq		$D1hi,$D1hi,$D1hi
+	vpmadd52huq	$H2,$S2,$D1hi
+	vpxorq		$D2lo,$D2lo,$D2lo
+	vpmadd52luq	$H2,$R0,$D2lo
+	vpxorq		$D2hi,$D2hi,$D2hi
+	vpmadd52huq	$H2,$R0,$D2hi
+
+	vpmadd52luq	$H0,$R0,$D0lo
+	vpmadd52huq	$H0,$R0,$D0hi
+	vpmadd52luq	$H0,$R1,$D1lo
+	vpmadd52huq	$H0,$R1,$D1hi
+	vpmadd52luq	$H0,$R2,$D2lo
+	vpmadd52huq	$H0,$R2,$D2hi
+
+	vpmadd52luq	$H1,$S2,$D0lo
+	vpmadd52huq	$H1,$S2,$D0hi
+	vpmadd52luq	$H1,$R0,$D1lo
+	vpmadd52huq	$H1,$R0,$D1hi
+	vpmadd52luq	$H1,$R1,$D2lo
+	vpmadd52huq	$H1,$R1,$D2hi
+
+	################################################################
+	# horizontal addition
+
+	mov		\$1,%eax
+	kmovw		%eax,%k1
+	vpsrldq		\$8,$D0lo,$T0
+	vpsrldq		\$8,$D0hi,$H0
+	vpsrldq		\$8,$D1lo,$T1
+	vpsrldq		\$8,$D1hi,$H1
+	vpaddq		$T0,$D0lo,$D0lo
+	vpaddq		$H0,$D0hi,$D0hi
+	vpsrldq		\$8,$D2lo,$T2
+	vpsrldq		\$8,$D2hi,$H2
+	vpaddq		$T1,$D1lo,$D1lo
+	vpaddq		$H1,$D1hi,$D1hi
+	 vpermq		\$0x2,$D0lo,$T0
+	 vpermq		\$0x2,$D0hi,$H0
+	vpaddq		$T2,$D2lo,$D2lo
+	vpaddq		$H2,$D2hi,$D2hi
+
+	vpermq		\$0x2,$D1lo,$T1
+	vpermq		\$0x2,$D1hi,$H1
+	vpaddq		$T0,$D0lo,${D0lo}{%k1}{z}
+	vpaddq		$H0,$D0hi,${D0hi}{%k1}{z}
+	vpermq		\$0x2,$D2lo,$T2
+	vpermq		\$0x2,$D2hi,$H2
+	vpaddq		$T1,$D1lo,${D1lo}{%k1}{z}
+	vpaddq		$H1,$D1hi,${D1hi}{%k1}{z}
+	vpaddq		$T2,$D2lo,${D2lo}{%k1}{z}
+	vpaddq		$H2,$D2hi,${D2hi}{%k1}{z}
+
+	################################################################
+	# partial reduction
+	vpsrlq		\$44,$D0lo,$tmp
+	vpsllq		\$8,$D0hi,$D0hi
+	vpandq		$mask44,$D0lo,$H0
+	vpaddq		$tmp,$D0hi,$D0hi
+
+	vpaddq		$D0hi,$D1lo,$D1lo
+
+	vpsrlq		\$44,$D1lo,$tmp
+	vpsllq		\$8,$D1hi,$D1hi
+	vpandq		$mask44,$D1lo,$H1
+	vpaddq		$tmp,$D1hi,$D1hi
+
+	vpaddq		$D1hi,$D2lo,$D2lo
+
+	vpsrlq		\$42,$D2lo,$tmp
+	vpsllq		\$10,$D2hi,$D2hi
+	vpandq		$mask42,$D2lo,$H2
+	vpaddq		$tmp,$D2hi,$D2hi
+
+	vpaddq		$D2hi,$H0,$H0
+	vpsllq		\$2,$D2hi,$D2hi
+
+	vpaddq		$D2hi,$H0,$H0
+
+	vpsrlq		\$44,$H0,$tmp		# additional step
+	vpandq		$mask44,$H0,$H0
+
+	vpaddq		$tmp,$H1,$H1
+						# at this point $len is
+						# either 4*n+2 or 0...
+	sub		\$2,$len		# len-=32
+	ja		.Lblocks_vpmadd52_4x_do
+
+	vmovq		%x#$H0,0($ctx)
+	vmovq		%x#$H1,8($ctx)
+	vmovq		%x#$H2,16($ctx)
+	vzeroall
+
+.Lno_data_vpmadd52_4x:
+	ret
+.size	poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x
+___
+}
+{
+########################################################################
+# As implied by its name 8x subroutine processes 8 blocks in parallel...
+# This is intermediate version, as it's used only in cases when input
+# length is either 8*n, 8*n+1 or 8*n+2...
+
+my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
+my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23));
+my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
+my ($RR0,$RR1,$RR2,$SS1,$SS2) = map("%ymm$_",(6..10));
+
+$code.=<<___;
+.type	poly1305_blocks_vpmadd52_8x,\@function,4
+.align	32
+poly1305_blocks_vpmadd52_8x:
+	shr	\$4,$len
+	jz	.Lno_data_vpmadd52_8x		# too short
+
+	shl	\$40,$padbit
+	mov	64($ctx),%r8			# peek on power of the key
+
+	vmovdqa64	.Lx_mask44(%rip),$mask44
+	vmovdqa64	.Lx_mask42(%rip),$mask42
+
+	test	%r8,%r8				# is power value impossible?
+	js	.Linit_vpmadd52			# if it is, then init R[4]
+
+	vmovq	0($ctx),%x#$H0			# load current hash value
+	vmovq	8($ctx),%x#$H1
+	vmovq	16($ctx),%x#$H2
+
+.Lblocks_vpmadd52_8x:
+	################################################################
+	# fist we calculate more key powers
+
+	vmovdqu64	128($ctx),$R2		# load 1-3-2-4 powers
+	vmovdqu64	160($ctx),$S1
+	vmovdqu64	64($ctx),$R0
+	vmovdqu64	96($ctx),$R1
+
+	vpsllq		\$2,$R2,$S2		# S2 = R2*5*4
+	vpaddq		$R2,$S2,$S2
+	vpsllq		\$2,$S2,$S2
+
+	vpbroadcastq	%x#$R2,$RR2		# broadcast 4th power
+	vpbroadcastq	%x#$R0,$RR0
+	vpbroadcastq	%x#$R1,$RR1
+
+	vpxorq		$D0lo,$D0lo,$D0lo
+	vpmadd52luq	$RR2,$S1,$D0lo
+	vpxorq		$D0hi,$D0hi,$D0hi
+	vpmadd52huq	$RR2,$S1,$D0hi
+	vpxorq		$D1lo,$D1lo,$D1lo
+	vpmadd52luq	$RR2,$S2,$D1lo
+	vpxorq		$D1hi,$D1hi,$D1hi
+	vpmadd52huq	$RR2,$S2,$D1hi
+	vpxorq		$D2lo,$D2lo,$D2lo
+	vpmadd52luq	$RR2,$R0,$D2lo
+	vpxorq		$D2hi,$D2hi,$D2hi
+	vpmadd52huq	$RR2,$R0,$D2hi
+
+	vpmadd52luq	$RR0,$R0,$D0lo
+	vpmadd52huq	$RR0,$R0,$D0hi
+	vpmadd52luq	$RR0,$R1,$D1lo
+	vpmadd52huq	$RR0,$R1,$D1hi
+	vpmadd52luq	$RR0,$R2,$D2lo
+	vpmadd52huq	$RR0,$R2,$D2hi
+
+	vpmadd52luq	$RR1,$S2,$D0lo
+	vpmadd52huq	$RR1,$S2,$D0hi
+	vpmadd52luq	$RR1,$R0,$D1lo
+	vpmadd52huq	$RR1,$R0,$D1hi
+	vpmadd52luq	$RR1,$R1,$D2lo
+	vpmadd52huq	$RR1,$R1,$D2hi
+
+	################################################################
+	# partial reduction
+	vpsrlq		\$44,$D0lo,$tmp
+	vpsllq		\$8,$D0hi,$D0hi
+	vpandq		$mask44,$D0lo,$RR0
+	vpaddq		$tmp,$D0hi,$D0hi
+
+	vpaddq		$D0hi,$D1lo,$D1lo
+
+	vpsrlq		\$44,$D1lo,$tmp
+	vpsllq		\$8,$D1hi,$D1hi
+	vpandq		$mask44,$D1lo,$RR1
+	vpaddq		$tmp,$D1hi,$D1hi
+
+	vpaddq		$D1hi,$D2lo,$D2lo
+
+	vpsrlq		\$42,$D2lo,$tmp
+	vpsllq		\$10,$D2hi,$D2hi
+	vpandq		$mask42,$D2lo,$RR2
+	vpaddq		$tmp,$D2hi,$D2hi
+
+	vpaddq		$D2hi,$RR0,$RR0
+	vpsllq		\$2,$D2hi,$D2hi
+
+	vpaddq		$D2hi,$RR0,$RR0
+
+	vpsrlq		\$44,$RR0,$tmp		# additional step
+	vpandq		$mask44,$RR0,$RR0
+
+	vpaddq		$tmp,$RR1,$RR1
+
+	################################################################
+	# At this point Rx holds 1324 powers, RRx - 5768, and the goal
+	# is 15263748, which reflects how data is loaded...
+
+	vpunpcklqdq	$R2,$RR2,$T2		# 3748
+	vpunpckhqdq	$R2,$RR2,$R2		# 1526
+	vpunpcklqdq	$R0,$RR0,$T0
+	vpunpckhqdq	$R0,$RR0,$R0
+	vpunpcklqdq	$R1,$RR1,$T1
+	vpunpckhqdq	$R1,$RR1,$R1
+___
+######## switch to %zmm
+map(s/%y/%z/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
+map(s/%y/%z/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi);
+map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
+map(s/%y/%z/, $RR0,$RR1,$RR2,$SS1,$SS2);
+
+$code.=<<___;
+	vshufi64x2	\$0x44,$R2,$T2,$RR2	# 15263748
+	vshufi64x2	\$0x44,$R0,$T0,$RR0
+	vshufi64x2	\$0x44,$R1,$T1,$RR1
+
+	vmovdqu64	16*0($inp),$T2		# load data
+	vmovdqu64	16*4($inp),$T3
+	lea		16*8($inp),$inp
+
+	vpsllq		\$2,$RR2,$SS2		# S2 = R2*5*4
+	vpsllq		\$2,$RR1,$SS1		# S1 = R1*5*4
+	vpaddq		$RR2,$SS2,$SS2
+	vpaddq		$RR1,$SS1,$SS1
+	vpsllq		\$2,$SS2,$SS2
+	vpsllq		\$2,$SS1,$SS1
+
+	vpbroadcastq	$padbit,$PAD
+	vpbroadcastq	%x#$mask44,$mask44
+	vpbroadcastq	%x#$mask42,$mask42
+
+	vpbroadcastq	%x#$SS1,$S1		# broadcast 8th power
+	vpbroadcastq	%x#$SS2,$S2
+	vpbroadcastq	%x#$RR0,$R0
+	vpbroadcastq	%x#$RR1,$R1
+	vpbroadcastq	%x#$RR2,$R2
+
+	vpunpcklqdq	$T3,$T2,$T1		# transpose data
+	vpunpckhqdq	$T3,$T2,$T3
+
+	# at this point 64-bit lanes are ordered as 73625140
+
+	vpsrlq		\$24,$T3,$T2		# splat the data
+	vporq		$PAD,$T2,$T2
+	 vpaddq		$T2,$H2,$H2		# accumulate input
+	vpandq		$mask44,$T1,$T0
+	vpsrlq		\$44,$T1,$T1
+	vpsllq		\$20,$T3,$T3
+	vporq		$T3,$T1,$T1
+	vpandq		$mask44,$T1,$T1
+
+	sub		\$8,$len
+	jz		.Ltail_vpmadd52_8x
+	jmp		.Loop_vpmadd52_8x
+
+.align	32
+.Loop_vpmadd52_8x:
+	#vpaddq		$T2,$H2,$H2		# accumulate input
+	vpaddq		$T0,$H0,$H0
+	vpaddq		$T1,$H1,$H1
+
+	vpxorq		$D0lo,$D0lo,$D0lo
+	vpmadd52luq	$H2,$S1,$D0lo
+	vpxorq		$D0hi,$D0hi,$D0hi
+	vpmadd52huq	$H2,$S1,$D0hi
+	vpxorq		$D1lo,$D1lo,$D1lo
+	vpmadd52luq	$H2,$S2,$D1lo
+	vpxorq		$D1hi,$D1hi,$D1hi
+	vpmadd52huq	$H2,$S2,$D1hi
+	vpxorq		$D2lo,$D2lo,$D2lo
+	vpmadd52luq	$H2,$R0,$D2lo
+	vpxorq		$D2hi,$D2hi,$D2hi
+	vpmadd52huq	$H2,$R0,$D2hi
+
+	 vmovdqu64	16*0($inp),$T2		# load data
+	 vmovdqu64	16*4($inp),$T3
+	 lea		16*8($inp),$inp
+	vpmadd52luq	$H0,$R0,$D0lo
+	vpmadd52huq	$H0,$R0,$D0hi
+	vpmadd52luq	$H0,$R1,$D1lo
+	vpmadd52huq	$H0,$R1,$D1hi
+	vpmadd52luq	$H0,$R2,$D2lo
+	vpmadd52huq	$H0,$R2,$D2hi
+
+	 vpunpcklqdq	$T3,$T2,$T1		# transpose data
+	 vpunpckhqdq	$T3,$T2,$T3
+	vpmadd52luq	$H1,$S2,$D0lo
+	vpmadd52huq	$H1,$S2,$D0hi
+	vpmadd52luq	$H1,$R0,$D1lo
+	vpmadd52huq	$H1,$R0,$D1hi
+	vpmadd52luq	$H1,$R1,$D2lo
+	vpmadd52huq	$H1,$R1,$D2hi
+
+	################################################################
+	# partial reduction (interleaved with data splat)
+	vpsrlq		\$44,$D0lo,$tmp
+	vpsllq		\$8,$D0hi,$D0hi
+	vpandq		$mask44,$D0lo,$H0
+	vpaddq		$tmp,$D0hi,$D0hi
+
+	 vpsrlq		\$24,$T3,$T2
+	 vporq		$PAD,$T2,$T2
+	vpaddq		$D0hi,$D1lo,$D1lo
+
+	vpsrlq		\$44,$D1lo,$tmp
+	vpsllq		\$8,$D1hi,$D1hi
+	vpandq		$mask44,$D1lo,$H1
+	vpaddq		$tmp,$D1hi,$D1hi
+
+	 vpandq		$mask44,$T1,$T0
+	 vpsrlq		\$44,$T1,$T1
+	 vpsllq		\$20,$T3,$T3
+	vpaddq		$D1hi,$D2lo,$D2lo
+
+	vpsrlq		\$42,$D2lo,$tmp
+	vpsllq		\$10,$D2hi,$D2hi
+	vpandq		$mask42,$D2lo,$H2
+	vpaddq		$tmp,$D2hi,$D2hi
+
+	  vpaddq	$T2,$H2,$H2		# accumulate input
+	vpaddq		$D2hi,$H0,$H0
+	vpsllq		\$2,$D2hi,$D2hi
+
+	vpaddq		$D2hi,$H0,$H0
+	 vporq		$T3,$T1,$T1
+	 vpandq		$mask44,$T1,$T1
+
+	vpsrlq		\$44,$H0,$tmp		# additional step
+	vpandq		$mask44,$H0,$H0
+
+	vpaddq		$tmp,$H1,$H1
+
+	sub		\$8,$len		# len-=128
+	jnz		.Loop_vpmadd52_8x
+
+.Ltail_vpmadd52_8x:
+	#vpaddq		$T2,$H2,$H2		# accumulate input
+	vpaddq		$T0,$H0,$H0
+	vpaddq		$T1,$H1,$H1
+
+	vpxorq		$D0lo,$D0lo,$D0lo
+	vpmadd52luq	$H2,$SS1,$D0lo
+	vpxorq		$D0hi,$D0hi,$D0hi
+	vpmadd52huq	$H2,$SS1,$D0hi
+	vpxorq		$D1lo,$D1lo,$D1lo
+	vpmadd52luq	$H2,$SS2,$D1lo
+	vpxorq		$D1hi,$D1hi,$D1hi
+	vpmadd52huq	$H2,$SS2,$D1hi
+	vpxorq		$D2lo,$D2lo,$D2lo
+	vpmadd52luq	$H2,$RR0,$D2lo
+	vpxorq		$D2hi,$D2hi,$D2hi
+	vpmadd52huq	$H2,$RR0,$D2hi
+
+	vpmadd52luq	$H0,$RR0,$D0lo
+	vpmadd52huq	$H0,$RR0,$D0hi
+	vpmadd52luq	$H0,$RR1,$D1lo
+	vpmadd52huq	$H0,$RR1,$D1hi
+	vpmadd52luq	$H0,$RR2,$D2lo
+	vpmadd52huq	$H0,$RR2,$D2hi
+
+	vpmadd52luq	$H1,$SS2,$D0lo
+	vpmadd52huq	$H1,$SS2,$D0hi
+	vpmadd52luq	$H1,$RR0,$D1lo
+	vpmadd52huq	$H1,$RR0,$D1hi
+	vpmadd52luq	$H1,$RR1,$D2lo
+	vpmadd52huq	$H1,$RR1,$D2hi
+
+	################################################################
+	# horizontal addition
+
+	mov		\$1,%eax
+	kmovw		%eax,%k1
+	vpsrldq		\$8,$D0lo,$T0
+	vpsrldq		\$8,$D0hi,$H0
+	vpsrldq		\$8,$D1lo,$T1
+	vpsrldq		\$8,$D1hi,$H1
+	vpaddq		$T0,$D0lo,$D0lo
+	vpaddq		$H0,$D0hi,$D0hi
+	vpsrldq		\$8,$D2lo,$T2
+	vpsrldq		\$8,$D2hi,$H2
+	vpaddq		$T1,$D1lo,$D1lo
+	vpaddq		$H1,$D1hi,$D1hi
+	 vpermq		\$0x2,$D0lo,$T0
+	 vpermq		\$0x2,$D0hi,$H0
+	vpaddq		$T2,$D2lo,$D2lo
+	vpaddq		$H2,$D2hi,$D2hi
+
+	vpermq		\$0x2,$D1lo,$T1
+	vpermq		\$0x2,$D1hi,$H1
+	vpaddq		$T0,$D0lo,$D0lo
+	vpaddq		$H0,$D0hi,$D0hi
+	vpermq		\$0x2,$D2lo,$T2
+	vpermq		\$0x2,$D2hi,$H2
+	vpaddq		$T1,$D1lo,$D1lo
+	vpaddq		$H1,$D1hi,$D1hi
+	 vextracti64x4	\$1,$D0lo,%y#$T0
+	 vextracti64x4	\$1,$D0hi,%y#$H0
+	vpaddq		$T2,$D2lo,$D2lo
+	vpaddq		$H2,$D2hi,$D2hi
+
+	vextracti64x4	\$1,$D1lo,%y#$T1
+	vextracti64x4	\$1,$D1hi,%y#$H1
+	vextracti64x4	\$1,$D2lo,%y#$T2
+	vextracti64x4	\$1,$D2hi,%y#$H2
+___
+######## switch back to %ymm
+map(s/%z/%y/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
+map(s/%z/%y/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi);
+map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
+
+$code.=<<___;
+	vpaddq		$T0,$D0lo,${D0lo}{%k1}{z}
+	vpaddq		$H0,$D0hi,${D0hi}{%k1}{z}
+	vpaddq		$T1,$D1lo,${D1lo}{%k1}{z}
+	vpaddq		$H1,$D1hi,${D1hi}{%k1}{z}
+	vpaddq		$T2,$D2lo,${D2lo}{%k1}{z}
+	vpaddq		$H2,$D2hi,${D2hi}{%k1}{z}
+
+	################################################################
+	# partial reduction
+	vpsrlq		\$44,$D0lo,$tmp
+	vpsllq		\$8,$D0hi,$D0hi
+	vpandq		$mask44,$D0lo,$H0
+	vpaddq		$tmp,$D0hi,$D0hi
+
+	vpaddq		$D0hi,$D1lo,$D1lo
+
+	vpsrlq		\$44,$D1lo,$tmp
+	vpsllq		\$8,$D1hi,$D1hi
+	vpandq		$mask44,$D1lo,$H1
+	vpaddq		$tmp,$D1hi,$D1hi
+
+	vpaddq		$D1hi,$D2lo,$D2lo
+
+	vpsrlq		\$42,$D2lo,$tmp
+	vpsllq		\$10,$D2hi,$D2hi
+	vpandq		$mask42,$D2lo,$H2
+	vpaddq		$tmp,$D2hi,$D2hi
+
+	vpaddq		$D2hi,$H0,$H0
+	vpsllq		\$2,$D2hi,$D2hi
+
+	vpaddq		$D2hi,$H0,$H0
+
+	vpsrlq		\$44,$H0,$tmp		# additional step
+	vpandq		$mask44,$H0,$H0
+
+	vpaddq		$tmp,$H1,$H1
+
+	################################################################
+
+	vmovq		%x#$H0,0($ctx)
+	vmovq		%x#$H1,8($ctx)
+	vmovq		%x#$H2,16($ctx)
+	vzeroall
+
+.Lno_data_vpmadd52_8x:
+	ret
+.size	poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x
+___
+}
+$code.=<<___;
+.type	poly1305_emit_base2_44,\@function,3
+.align	32
+poly1305_emit_base2_44:
+	mov	0($ctx),%r8	# load hash value
+	mov	8($ctx),%r9
+	mov	16($ctx),%r10
+
+	mov	%r9,%rax
+	shr	\$20,%r9
+	shl	\$44,%rax
+	mov	%r10,%rcx
+	shr	\$40,%r10
+	shl	\$24,%rcx
+
+	add	%rax,%r8
+	adc	%rcx,%r9
+	adc	\$0,%r10
+
+	mov	%r8,%rax
+	add	\$5,%r8		# compare to modulus
+	mov	%r9,%rcx
+	adc	\$0,%r9
+	adc	\$0,%r10
+	shr	\$2,%r10	# did 130-bit value overflow?
+	cmovnz	%r8,%rax
+	cmovnz	%r9,%rcx
+
+	add	0($nonce),%rax	# accumulate nonce
+	adc	8($nonce),%rcx
+	mov	%rax,0($mac)	# write result
+	mov	%rcx,8($mac)
+
+	ret
+.size	poly1305_emit_base2_44,.-poly1305_emit_base2_44
+___
+}	}	}
+}
+
+if (!$kernel)
+{	# chacha20-poly1305 helpers
+my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") :  # Win64 order
+                                  ("%rdi","%rsi","%rdx","%rcx");  # Unix order
+$code.=<<___;
+.globl	xor128_encrypt_n_pad
+.type	xor128_encrypt_n_pad,\@abi-omnipotent
+.align	16
+xor128_encrypt_n_pad:
+	sub	$otp,$inp
+	sub	$otp,$out
+	mov	$len,%r10		# put len aside
+	shr	\$4,$len		# len / 16
+	jz	.Ltail_enc
+	nop
+.Loop_enc_xmm:
+	movdqu	($inp,$otp),%xmm0
+	pxor	($otp),%xmm0
+	movdqu	%xmm0,($out,$otp)
+	movdqa	%xmm0,($otp)
+	lea	16($otp),$otp
+	dec	$len
+	jnz	.Loop_enc_xmm
+
+	and	\$15,%r10		# len % 16
+	jz	.Ldone_enc
+
+.Ltail_enc:
+	mov	\$16,$len
+	sub	%r10,$len
+	xor	%eax,%eax
+.Loop_enc_byte:
+	mov	($inp,$otp),%al
+	xor	($otp),%al
+	mov	%al,($out,$otp)
+	mov	%al,($otp)
+	lea	1($otp),$otp
+	dec	%r10
+	jnz	.Loop_enc_byte
+
+	xor	%eax,%eax
+.Loop_enc_pad:
+	mov	%al,($otp)
+	lea	1($otp),$otp
+	dec	$len
+	jnz	.Loop_enc_pad
+
+.Ldone_enc:
+	mov	$otp,%rax
+	ret
+.size	xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
+
+.globl	xor128_decrypt_n_pad
+.type	xor128_decrypt_n_pad,\@abi-omnipotent
+.align	16
+xor128_decrypt_n_pad:
+	sub	$otp,$inp
+	sub	$otp,$out
+	mov	$len,%r10		# put len aside
+	shr	\$4,$len		# len / 16
+	jz	.Ltail_dec
+	nop
+.Loop_dec_xmm:
+	movdqu	($inp,$otp),%xmm0
+	movdqa	($otp),%xmm1
+	pxor	%xmm0,%xmm1
+	movdqu	%xmm1,($out,$otp)
+	movdqa	%xmm0,($otp)
+	lea	16($otp),$otp
+	dec	$len
+	jnz	.Loop_dec_xmm
+
+	pxor	%xmm1,%xmm1
+	and	\$15,%r10		# len % 16
+	jz	.Ldone_dec
+
+.Ltail_dec:
+	mov	\$16,$len
+	sub	%r10,$len
+	xor	%eax,%eax
+	xor	%r11,%r11
+.Loop_dec_byte:
+	mov	($inp,$otp),%r11b
+	mov	($otp),%al
+	xor	%r11b,%al
+	mov	%al,($out,$otp)
+	mov	%r11b,($otp)
+	lea	1($otp),$otp
+	dec	%r10
+	jnz	.Loop_dec_byte
+
+	xor	%eax,%eax
+.Loop_dec_pad:
+	mov	%al,($otp)
+	lea	1($otp),$otp
+	dec	$len
+	jnz	.Loop_dec_pad
+
+.Ldone_dec:
+	mov	$otp,%rax
+	ret
+.size	xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
+___
+}
+
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#		CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern	__imp_RtlVirtualUnwind
+.type	se_handler,\@abi-omnipotent
+.align	16
+se_handler:
+	push	%rsi
+	push	%rdi
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+	pushfq
+	sub	\$64,%rsp
+
+	mov	120($context),%rax	# pull context->Rax
+	mov	248($context),%rbx	# pull context->Rip
+
+	mov	8($disp),%rsi		# disp->ImageBase
+	mov	56($disp),%r11		# disp->HandlerData
+
+	mov	0(%r11),%r10d		# HandlerData[0]
+	lea	(%rsi,%r10),%r10	# prologue label
+	cmp	%r10,%rbx		# context->Rip<.Lprologue
+	jb	.Lcommon_seh_tail
+
+	mov	152($context),%rax	# pull context->Rsp
+
+	mov	4(%r11),%r10d		# HandlerData[1]
+	lea	(%rsi,%r10),%r10	# epilogue label
+	cmp	%r10,%rbx		# context->Rip>=.Lepilogue
+	jae	.Lcommon_seh_tail
+
+	lea	48(%rax),%rax
+
+	mov	-8(%rax),%rbx
+	mov	-16(%rax),%rbp
+	mov	-24(%rax),%r12
+	mov	-32(%rax),%r13
+	mov	-40(%rax),%r14
+	mov	-48(%rax),%r15
+	mov	%rbx,144($context)	# restore context->Rbx
+	mov	%rbp,160($context)	# restore context->Rbp
+	mov	%r12,216($context)	# restore context->R12
+	mov	%r13,224($context)	# restore context->R13
+	mov	%r14,232($context)	# restore context->R14
+	mov	%r15,240($context)	# restore context->R14
+
+	jmp	.Lcommon_seh_tail
+.size	se_handler,.-se_handler
+
+.type	avx_handler,\@abi-omnipotent
+.align	16
+avx_handler:
+	push	%rsi
+	push	%rdi
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+	pushfq
+	sub	\$64,%rsp
+
+	mov	120($context),%rax	# pull context->Rax
+	mov	248($context),%rbx	# pull context->Rip
+
+	mov	8($disp),%rsi		# disp->ImageBase
+	mov	56($disp),%r11		# disp->HandlerData
+
+	mov	0(%r11),%r10d		# HandlerData[0]
+	lea	(%rsi,%r10),%r10	# prologue label
+	cmp	%r10,%rbx		# context->Rip<prologue label
+	jb	.Lcommon_seh_tail
+
+	mov	152($context),%rax	# pull context->Rsp
+
+	mov	4(%r11),%r10d		# HandlerData[1]
+	lea	(%rsi,%r10),%r10	# epilogue label
+	cmp	%r10,%rbx		# context->Rip>=epilogue label
+	jae	.Lcommon_seh_tail
+
+	mov	208($context),%rax	# pull context->R11
+
+	lea	0x50(%rax),%rsi
+	lea	0xf8(%rax),%rax
+	lea	512($context),%rdi	# &context.Xmm6
+	mov	\$20,%ecx
+	.long	0xa548f3fc		# cld; rep movsq
+
+.Lcommon_seh_tail:
+	mov	8(%rax),%rdi
+	mov	16(%rax),%rsi
+	mov	%rax,152($context)	# restore context->Rsp
+	mov	%rsi,168($context)	# restore context->Rsi
+	mov	%rdi,176($context)	# restore context->Rdi
+
+	mov	40($disp),%rdi		# disp->ContextRecord
+	mov	$context,%rsi		# context
+	mov	\$154,%ecx		# sizeof(CONTEXT)
+	.long	0xa548f3fc		# cld; rep movsq
+
+	mov	$disp,%rsi
+	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
+	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
+	mov	0(%rsi),%r8		# arg3, disp->ControlPc
+	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
+	mov	40(%rsi),%r10		# disp->ContextRecord
+	lea	56(%rsi),%r11		# &disp->HandlerData
+	lea	24(%rsi),%r12		# &disp->EstablisherFrame
+	mov	%r10,32(%rsp)		# arg5
+	mov	%r11,40(%rsp)		# arg6
+	mov	%r12,48(%rsp)		# arg7
+	mov	%rcx,56(%rsp)		# arg8, (NULL)
+	call	*__imp_RtlVirtualUnwind(%rip)
+
+	mov	\$1,%eax		# ExceptionContinueSearch
+	add	\$64,%rsp
+	popfq
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	pop	%rdi
+	pop	%rsi
+	ret
+.size	avx_handler,.-avx_handler
+
+.section	.pdata
+.align	4
+	.rva	.LSEH_begin_poly1305_init_x86_64
+	.rva	.LSEH_end_poly1305_init_x86_64
+	.rva	.LSEH_info_poly1305_init_x86_64
+
+	.rva	.LSEH_begin_poly1305_blocks_x86_64
+	.rva	.LSEH_end_poly1305_blocks_x86_64
+	.rva	.LSEH_info_poly1305_blocks_x86_64
+
+	.rva	.LSEH_begin_poly1305_emit_x86_64
+	.rva	.LSEH_end_poly1305_emit_x86_64
+	.rva	.LSEH_info_poly1305_emit_x86_64
+___
+$code.=<<___ if ($avx);
+	.rva	.LSEH_begin_poly1305_blocks_avx
+	.rva	.Lbase2_64_avx
+	.rva	.LSEH_info_poly1305_blocks_avx_1
+
+	.rva	.Lbase2_64_avx
+	.rva	.Leven_avx
+	.rva	.LSEH_info_poly1305_blocks_avx_2
+
+	.rva	.Leven_avx
+	.rva	.LSEH_end_poly1305_blocks_avx
+	.rva	.LSEH_info_poly1305_blocks_avx_3
+
+	.rva	.LSEH_begin_poly1305_emit_avx
+	.rva	.LSEH_end_poly1305_emit_avx
+	.rva	.LSEH_info_poly1305_emit_avx
+___
+$code.=<<___ if ($avx>1);
+	.rva	.LSEH_begin_poly1305_blocks_avx2
+	.rva	.Lbase2_64_avx2
+	.rva	.LSEH_info_poly1305_blocks_avx2_1
+
+	.rva	.Lbase2_64_avx2
+	.rva	.Leven_avx2
+	.rva	.LSEH_info_poly1305_blocks_avx2_2
+
+	.rva	.Leven_avx2
+	.rva	.LSEH_end_poly1305_blocks_avx2
+	.rva	.LSEH_info_poly1305_blocks_avx2_3
+___
+$code.=<<___ if ($avx>2);
+	.rva	.LSEH_begin_poly1305_blocks_avx512
+	.rva	.LSEH_end_poly1305_blocks_avx512
+	.rva	.LSEH_info_poly1305_blocks_avx512
+___
+$code.=<<___;
+.section	.xdata
+.align	8
+.LSEH_info_poly1305_init_x86_64:
+	.byte	9,0,0,0
+	.rva	se_handler
+	.rva	.LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64
+
+.LSEH_info_poly1305_blocks_x86_64:
+	.byte	9,0,0,0
+	.rva	se_handler
+	.rva	.Lblocks_body,.Lblocks_epilogue
+
+.LSEH_info_poly1305_emit_x86_64:
+	.byte	9,0,0,0
+	.rva	se_handler
+	.rva	.LSEH_begin_poly1305_emit_x86_64,.LSEH_begin_poly1305_emit_x86_64
+___
+$code.=<<___ if ($avx);
+.LSEH_info_poly1305_blocks_avx_1:
+	.byte	9,0,0,0
+	.rva	se_handler
+	.rva	.Lblocks_avx_body,.Lblocks_avx_epilogue		# HandlerData[]
+
+.LSEH_info_poly1305_blocks_avx_2:
+	.byte	9,0,0,0
+	.rva	se_handler
+	.rva	.Lbase2_64_avx_body,.Lbase2_64_avx_epilogue	# HandlerData[]
+
+.LSEH_info_poly1305_blocks_avx_3:
+	.byte	9,0,0,0
+	.rva	avx_handler
+	.rva	.Ldo_avx_body,.Ldo_avx_epilogue			# HandlerData[]
+
+.LSEH_info_poly1305_emit_avx:
+	.byte	9,0,0,0
+	.rva	se_handler
+	.rva	.LSEH_begin_poly1305_emit_avx,.LSEH_begin_poly1305_emit_avx
+___
+$code.=<<___ if ($avx>1);
+.LSEH_info_poly1305_blocks_avx2_1:
+	.byte	9,0,0,0
+	.rva	se_handler
+	.rva	.Lblocks_avx2_body,.Lblocks_avx2_epilogue	# HandlerData[]
+
+.LSEH_info_poly1305_blocks_avx2_2:
+	.byte	9,0,0,0
+	.rva	se_handler
+	.rva	.Lbase2_64_avx2_body,.Lbase2_64_avx2_epilogue	# HandlerData[]
+
+.LSEH_info_poly1305_blocks_avx2_3:
+	.byte	9,0,0,0
+	.rva	avx_handler
+	.rva	.Ldo_avx2_body,.Ldo_avx2_epilogue		# HandlerData[]
+___
+$code.=<<___ if ($avx>2);
+.LSEH_info_poly1305_blocks_avx512:
+	.byte	9,0,0,0
+	.rva	avx_handler
+	.rva	.Ldo_avx512_body,.Ldo_avx512_epilogue		# HandlerData[]
+___
+}
+
+open SELF,$0;
+while(<SELF>) {
+	next if (/^#!/);
+	last if (!s/^#/\/\// and !/^$/);
+	print;
+}
+close SELF;
+
+foreach (split('\n',$code)) {
+	s/\`([^\`]*)\`/eval($1)/ge;
+	s/%r([a-z]+)#d/%e$1/g;
+	s/%r([0-9]+)#d/%r$1d/g;
+	s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g;
+
+	if ($kernel) {
+		s/(^\.type.*),[0-9]+$/\1/;
+		s/(^\.type.*),\@abi-omnipotent+$/\1,\@function/;
+		next if /^\.cfi.*/;
+	}
+
+	print $_,"\n";
+}
+close STDOUT;
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 4a1c05dce950..79bb58737d52 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -1,131 +1,176 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0 OR MIT
 /*
- * Poly1305 authenticator algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  */
 
 #include <crypto/algapi.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
 #include <crypto/internal/simd.h>
-#include <crypto/poly1305.h>
 #include <linux/crypto.h>
+#include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <asm/intel-family.h>
 #include <asm/simd.h>
 
-struct poly1305_simd_desc_ctx {
-	struct poly1305_desc_ctx base;
-	/* derived key u set? */
-	bool uset;
-#ifdef CONFIG_AS_AVX2
-	/* derived keys r^3, r^4 set? */
-	bool wset;
-#endif
-	/* derived Poly1305 key r^2 */
-	u32 u[5];
-	/* ... silently appended r^3 and r^4 when using AVX2 */
+asmlinkage void poly1305_init_x86_64(void *ctx,
+				     const u8 key[POLY1305_KEY_SIZE]);
+asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
+				       const size_t len, const u32 padbit);
+asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
+				     const u32 nonce[4]);
+asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
+				  const u32 nonce[4]);
+asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
+				    const u32 padbit);
+asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
+				     const u32 padbit);
+asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
+				       const size_t len, const u32 padbit);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512);
+
+struct poly1305_arch_internal {
+	union {
+		struct {
+			u32 h[5];
+			u32 is_base2_26;
+		};
+		u64 hs[3];
+	};
+	u64 r[2];
+	u64 pad;
+	struct { u32 r2, r1, r4, r3; } rn[9];
 };
 
-asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
-				    const u32 *r, unsigned int blocks);
-asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
-				     unsigned int blocks, const u32 *u);
-#ifdef CONFIG_AS_AVX2
-asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
-				     unsigned int blocks, const u32 *u);
-static bool poly1305_use_avx2;
-#endif
-
-static int poly1305_simd_init(struct shash_desc *desc)
+/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
+ * the unfortunate situation of using AVX and then having to go back to scalar
+ * -- because the user is silly and has called the update function from two
+ * separate contexts -- then we need to convert back to the original base before
+ * proceeding. It is possible to reason that the initial reduction below is
+ * sufficient given the implementation invariants. However, for an avoidance of
+ * doubt and because this is not performance critical, we do the full reduction
+ * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py
+ */
+static void convert_to_base2_64(void *ctx)
 {
-	struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
+	struct poly1305_arch_internal *state = ctx;
+	u32 cy;
 
-	sctx->uset = false;
-#ifdef CONFIG_AS_AVX2
-	sctx->wset = false;
-#endif
+	if (!state->is_base2_26)
+		return;
 
-	return crypto_poly1305_init(desc);
+	cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
+	cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
+	cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
+	cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
+	state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
+	state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
+	state->hs[2] = state->h[4] >> 24;
+#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
+	cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
+	state->hs[2] &= 3;
+	state->hs[0] += cy;
+	state->hs[1] += (cy = ULT(state->hs[0], cy));
+	state->hs[2] += ULT(state->hs[1], cy);
+#undef ULT
+	state->is_base2_26 = 0;
 }
 
-static void poly1305_simd_mult(u32 *a, const u32 *b)
+static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_KEY_SIZE])
 {
-	u8 m[POLY1305_BLOCK_SIZE];
-
-	memset(m, 0, sizeof(m));
-	/* The poly1305 block function adds a hi-bit to the accumulator which
-	 * we don't need for key multiplication; compensate for it. */
-	a[4] -= 1 << 24;
-	poly1305_block_sse2(a, m, b, 1);
+	poly1305_init_x86_64(ctx, key);
 }
 
-static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
-					 const u8 *src, unsigned int srclen)
+static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
+				 const u32 padbit)
 {
-	struct poly1305_simd_desc_ctx *sctx;
-	unsigned int blocks, datalen;
+	struct poly1305_arch_internal *state = ctx;
 
-	BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
-	sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
+	/* SIMD disables preemption, so relax after processing each page. */
+	BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
+		     PAGE_SIZE % POLY1305_BLOCK_SIZE);
 
-	if (unlikely(!dctx->sset)) {
-		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
-		src += srclen - datalen;
-		srclen = datalen;
+	if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) ||
+	    (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
+	    !crypto_simd_usable()) {
+		convert_to_base2_64(ctx);
+		poly1305_blocks_x86_64(ctx, inp, len, padbit);
+		return;
 	}
 
-#ifdef CONFIG_AS_AVX2
-	if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
-		if (unlikely(!sctx->wset)) {
-			if (!sctx->uset) {
-				memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
-				poly1305_simd_mult(sctx->u, dctx->r.r);
-				sctx->uset = true;
-			}
-			memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 5, dctx->r.r);
-			memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 10, dctx->r.r);
-			sctx->wset = true;
-		}
-		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
-		poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
-				     sctx->u);
-		src += POLY1305_BLOCK_SIZE * 4 * blocks;
-		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
-	}
-#endif
-	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
-		if (unlikely(!sctx->uset)) {
-			memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u, dctx->r.r);
-			sctx->uset = true;
-		}
-		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
-		poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
-				     sctx->u);
-		src += POLY1305_BLOCK_SIZE * 2 * blocks;
-		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
-	}
-	if (srclen >= POLY1305_BLOCK_SIZE) {
-		poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
-		srclen -= POLY1305_BLOCK_SIZE;
+	for (;;) {
+		const size_t bytes = min_t(size_t, len, PAGE_SIZE);
+
+		kernel_fpu_begin();
+		if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
+			poly1305_blocks_avx512(ctx, inp, bytes, padbit);
+		else if (IS_ENABLED(CONFIG_AS_AVX2) && static_branch_likely(&poly1305_use_avx2))
+			poly1305_blocks_avx2(ctx, inp, bytes, padbit);
+		else
+			poly1305_blocks_avx(ctx, inp, bytes, padbit);
+		kernel_fpu_end();
+		len -= bytes;
+		if (!len)
+			break;
+		inp += bytes;
 	}
-	return srclen;
 }
 
-static int poly1305_simd_update(struct shash_desc *desc,
-				const u8 *src, unsigned int srclen)
+static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
+			       const u32 nonce[4])
 {
-	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-	unsigned int bytes;
+	if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx))
+		poly1305_emit_x86_64(ctx, mac, nonce);
+	else
+		poly1305_emit_avx(ctx, mac, nonce);
+}
 
-	/* kernel_fpu_begin/end is costly, use fallback for small updates */
-	if (srclen <= 288 || !crypto_simd_usable())
-		return crypto_poly1305_update(desc, src, srclen);
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+	poly1305_simd_init(&dctx->h, key);
+	dctx->s[0] = get_unaligned_le32(&key[16]);
+	dctx->s[1] = get_unaligned_le32(&key[20]);
+	dctx->s[2] = get_unaligned_le32(&key[24]);
+	dctx->s[3] = get_unaligned_le32(&key[28]);
+	dctx->buflen = 0;
+	dctx->sset = true;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
+					       const u8 *inp, unsigned int len)
+{
+	unsigned int acc = 0;
+	if (unlikely(!dctx->sset)) {
+		if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
+			poly1305_simd_init(&dctx->h, inp);
+			inp += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			acc += POLY1305_BLOCK_SIZE;
+			dctx->rset = 1;
+		}
+		if (len >= POLY1305_BLOCK_SIZE) {
+			dctx->s[0] = get_unaligned_le32(&inp[0]);
+			dctx->s[1] = get_unaligned_le32(&inp[4]);
+			dctx->s[2] = get_unaligned_le32(&inp[8]);
+			dctx->s[3] = get_unaligned_le32(&inp[12]);
+			inp += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			acc += POLY1305_BLOCK_SIZE;
+			dctx->sset = true;
+		}
+	}
+	return acc;
+}
 
-	kernel_fpu_begin();
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+			  unsigned int srclen)
+{
+	unsigned int bytes, used;
 
 	if (unlikely(dctx->buflen)) {
 		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
@@ -135,34 +180,76 @@ static int poly1305_simd_update(struct shash_desc *desc,
 		dctx->buflen += bytes;
 
 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
-			poly1305_simd_blocks(dctx, dctx->buf,
-					     POLY1305_BLOCK_SIZE);
+			if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf, POLY1305_BLOCK_SIZE)))
+				poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
 			dctx->buflen = 0;
 		}
 	}
 
 	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-		bytes = poly1305_simd_blocks(dctx, src, srclen);
-		src += srclen - bytes;
-		srclen = bytes;
+		bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
+		srclen -= bytes;
+		used = crypto_poly1305_setdctxkey(dctx, src, bytes);
+		if (likely(bytes - used))
+			poly1305_simd_blocks(&dctx->h, src + used, bytes - used, 1);
+		src += bytes;
 	}
 
-	kernel_fpu_end();
-
 	if (unlikely(srclen)) {
 		dctx->buflen = srclen;
 		memcpy(dctx->buf, src, srclen);
 	}
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+	if (unlikely(dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+	}
+
+	poly1305_simd_emit(&dctx->h, dst, dctx->s);
+	*dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int crypto_poly1305_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	*dctx = (struct poly1305_desc_ctx){};
+	return 0;
+}
+
+static int crypto_poly1305_update(struct shash_desc *desc,
+				  const u8 *src, unsigned int srclen)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	poly1305_update_arch(dctx, src, srclen);
+	return 0;
+}
+
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
 
+	poly1305_final_arch(dctx, dst);
 	return 0;
 }
 
 static struct shash_alg alg = {
 	.digestsize	= POLY1305_DIGEST_SIZE,
-	.init		= poly1305_simd_init,
-	.update		= poly1305_simd_update,
+	.init		= crypto_poly1305_init,
+	.update		= crypto_poly1305_update,
 	.final		= crypto_poly1305_final,
-	.descsize	= sizeof(struct poly1305_simd_desc_ctx),
+	.descsize	= sizeof(struct poly1305_desc_ctx),
 	.base		= {
 		.cra_name		= "poly1305",
 		.cra_driver_name	= "poly1305-simd",
@@ -174,30 +261,33 @@ static struct shash_alg alg = {
 
 static int __init poly1305_simd_mod_init(void)
 {
-	if (!boot_cpu_has(X86_FEATURE_XMM2))
-		return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
-	poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
-			    boot_cpu_has(X86_FEATURE_AVX2) &&
-			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-	alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
-	if (poly1305_use_avx2)
-		alg.descsize += 10 * sizeof(u32);
-#endif
-	return crypto_register_shash(&alg);
+	if (IS_ENABLED(CONFIG_AS_AVX) && boot_cpu_has(X86_FEATURE_AVX) &&
+	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
+		static_branch_enable(&poly1305_use_avx);
+	if (IS_ENABLED(CONFIG_AS_AVX2) && boot_cpu_has(X86_FEATURE_AVX) &&
+	    boot_cpu_has(X86_FEATURE_AVX2) &&
+	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
+		static_branch_enable(&poly1305_use_avx2);
+	if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) &&
+	    boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) &&
+	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) &&
+	    /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */
+	    boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X)
+		static_branch_enable(&poly1305_use_avx512);
+	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0;
 }
 
 static void __exit poly1305_simd_mod_exit(void)
 {
-	crypto_unregister_shash(&alg);
+	if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
+		crypto_unregister_shash(&alg);
 }
 
 module_init(poly1305_simd_mod_init);
 module_exit(poly1305_simd_mod_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
 MODULE_DESCRIPTION("Poly1305 authenticator");
 MODULE_ALIAS_CRYPTO("poly1305");
 MODULE_ALIAS_CRYPTO("poly1305-simd");
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index ddc51dbba3af..ba9e4c1e7f5c 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -555,7 +555,7 @@
 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
 
 .align 8
-__serpent_enc_blk8_avx:
+SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
@@ -606,10 +606,10 @@ __serpent_enc_blk8_avx:
 	write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
 	ret;
-ENDPROC(__serpent_enc_blk8_avx)
+SYM_FUNC_END(__serpent_enc_blk8_avx)
 
 .align 8
-__serpent_dec_blk8_avx:
+SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
@@ -660,9 +660,9 @@ __serpent_dec_blk8_avx:
 	write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
 
 	ret;
-ENDPROC(__serpent_dec_blk8_avx)
+SYM_FUNC_END(__serpent_dec_blk8_avx)
 
-ENTRY(serpent_ecb_enc_8way_avx)
+SYM_FUNC_START(serpent_ecb_enc_8way_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -678,9 +678,9 @@ ENTRY(serpent_ecb_enc_8way_avx)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_ecb_enc_8way_avx)
+SYM_FUNC_END(serpent_ecb_enc_8way_avx)
 
-ENTRY(serpent_ecb_dec_8way_avx)
+SYM_FUNC_START(serpent_ecb_dec_8way_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -696,9 +696,9 @@ ENTRY(serpent_ecb_dec_8way_avx)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_ecb_dec_8way_avx)
+SYM_FUNC_END(serpent_ecb_dec_8way_avx)
 
-ENTRY(serpent_cbc_dec_8way_avx)
+SYM_FUNC_START(serpent_cbc_dec_8way_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -714,9 +714,9 @@ ENTRY(serpent_cbc_dec_8way_avx)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_cbc_dec_8way_avx)
+SYM_FUNC_END(serpent_cbc_dec_8way_avx)
 
-ENTRY(serpent_ctr_8way_avx)
+SYM_FUNC_START(serpent_ctr_8way_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -734,9 +734,9 @@ ENTRY(serpent_ctr_8way_avx)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_ctr_8way_avx)
+SYM_FUNC_END(serpent_ctr_8way_avx)
 
-ENTRY(serpent_xts_enc_8way_avx)
+SYM_FUNC_START(serpent_xts_enc_8way_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -756,9 +756,9 @@ ENTRY(serpent_xts_enc_8way_avx)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_xts_enc_8way_avx)
+SYM_FUNC_END(serpent_xts_enc_8way_avx)
 
-ENTRY(serpent_xts_dec_8way_avx)
+SYM_FUNC_START(serpent_xts_dec_8way_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -778,4 +778,4 @@ ENTRY(serpent_xts_dec_8way_avx)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_xts_dec_8way_avx)
+SYM_FUNC_END(serpent_xts_dec_8way_avx)
diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S
index 37bc1d48106c..c9648aeae705 100644
--- a/arch/x86/crypto/serpent-avx2-asm_64.S
+++ b/arch/x86/crypto/serpent-avx2-asm_64.S
@@ -561,7 +561,7 @@
 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
 
 .align 8
-__serpent_enc_blk16:
+SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext
@@ -612,10 +612,10 @@ __serpent_enc_blk16:
 	write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
 	ret;
-ENDPROC(__serpent_enc_blk16)
+SYM_FUNC_END(__serpent_enc_blk16)
 
 .align 8
-__serpent_dec_blk16:
+SYM_FUNC_START_LOCAL(__serpent_dec_blk16)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext
@@ -666,9 +666,9 @@ __serpent_dec_blk16:
 	write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
 
 	ret;
-ENDPROC(__serpent_dec_blk16)
+SYM_FUNC_END(__serpent_dec_blk16)
 
-ENTRY(serpent_ecb_enc_16way)
+SYM_FUNC_START(serpent_ecb_enc_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -688,9 +688,9 @@ ENTRY(serpent_ecb_enc_16way)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_ecb_enc_16way)
+SYM_FUNC_END(serpent_ecb_enc_16way)
 
-ENTRY(serpent_ecb_dec_16way)
+SYM_FUNC_START(serpent_ecb_dec_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -710,9 +710,9 @@ ENTRY(serpent_ecb_dec_16way)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_ecb_dec_16way)
+SYM_FUNC_END(serpent_ecb_dec_16way)
 
-ENTRY(serpent_cbc_dec_16way)
+SYM_FUNC_START(serpent_cbc_dec_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -733,9 +733,9 @@ ENTRY(serpent_cbc_dec_16way)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_cbc_dec_16way)
+SYM_FUNC_END(serpent_cbc_dec_16way)
 
-ENTRY(serpent_ctr_16way)
+SYM_FUNC_START(serpent_ctr_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -758,9 +758,9 @@ ENTRY(serpent_ctr_16way)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_ctr_16way)
+SYM_FUNC_END(serpent_ctr_16way)
 
-ENTRY(serpent_xts_enc_16way)
+SYM_FUNC_START(serpent_xts_enc_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -784,9 +784,9 @@ ENTRY(serpent_xts_enc_16way)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_xts_enc_16way)
+SYM_FUNC_END(serpent_xts_enc_16way)
 
-ENTRY(serpent_xts_dec_16way)
+SYM_FUNC_START(serpent_xts_dec_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -810,4 +810,4 @@ ENTRY(serpent_xts_dec_16way)
 
 	FRAME_END
 	ret;
-ENDPROC(serpent_xts_dec_16way)
+SYM_FUNC_END(serpent_xts_dec_16way)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index e5c4a4690ca9..6379b99cb722 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -497,7 +497,7 @@
 	pxor t0,		x3; \
 	movdqu x3,		(3*4*4)(out);
 
-ENTRY(__serpent_enc_blk_4way)
+SYM_FUNC_START(__serpent_enc_blk_4way)
 	/* input:
 	 *	arg_ctx(%esp): ctx, CTX
 	 *	arg_dst(%esp): dst
@@ -559,9 +559,9 @@ ENTRY(__serpent_enc_blk_4way)
 	xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
 
 	ret;
-ENDPROC(__serpent_enc_blk_4way)
+SYM_FUNC_END(__serpent_enc_blk_4way)
 
-ENTRY(serpent_dec_blk_4way)
+SYM_FUNC_START(serpent_dec_blk_4way)
 	/* input:
 	 *	arg_ctx(%esp): ctx, CTX
 	 *	arg_dst(%esp): dst
@@ -613,4 +613,4 @@ ENTRY(serpent_dec_blk_4way)
 	write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
 
 	ret;
-ENDPROC(serpent_dec_blk_4way)
+SYM_FUNC_END(serpent_dec_blk_4way)
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index 5e0b3a3e97af..efb6dc17dc90 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -619,7 +619,7 @@
 	pxor t0,		x3; \
 	movdqu x3,		(3*4*4)(out);
 
-ENTRY(__serpent_enc_blk_8way)
+SYM_FUNC_START(__serpent_enc_blk_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -682,9 +682,9 @@ ENTRY(__serpent_enc_blk_8way)
 	xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
 	ret;
-ENDPROC(__serpent_enc_blk_8way)
+SYM_FUNC_END(__serpent_enc_blk_8way)
 
-ENTRY(serpent_dec_blk_8way)
+SYM_FUNC_START(serpent_dec_blk_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -736,4 +736,4 @@ ENTRY(serpent_dec_blk_8way)
 	write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
 
 	ret;
-ENDPROC(serpent_dec_blk_8way)
+SYM_FUNC_END(serpent_dec_blk_8way)
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 13fd8d3d2da0..f973ace44ad3 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -19,18 +19,16 @@
 #define SERPENT_AVX2_PARALLEL_BLOCKS 16
 
 /* 16-way AVX2 parallel cipher functions */
-asmlinkage void serpent_ecb_enc_16way(struct serpent_ctx *ctx, u8 *dst,
-				      const u8 *src);
-asmlinkage void serpent_ecb_dec_16way(struct serpent_ctx *ctx, u8 *dst,
-				      const u8 *src);
-asmlinkage void serpent_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
+asmlinkage void serpent_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void serpent_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
 
-asmlinkage void serpent_ctr_16way(void *ctx, u128 *dst, const u128 *src,
+asmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
 				  le128 *iv);
-asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst,
-				      const u8 *src, le128 *iv);
-asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
-				      const u8 *src, le128 *iv);
+asmlinkage void serpent_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
+				      le128 *iv);
+asmlinkage void serpent_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
+				      le128 *iv);
 
 static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
 				   const u8 *key, unsigned int keylen)
@@ -44,13 +42,13 @@ static const struct common_glue_ctx serpent_enc = {
 
 	.funcs = { {
 		.num_blocks = 16,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_16way) }
+		.fn_u = { .ecb = serpent_ecb_enc_16way }
 	}, {
 		.num_blocks = 8,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
+		.fn_u = { .ecb = serpent_ecb_enc_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
+		.fn_u = { .ecb = __serpent_encrypt }
 	} }
 };
 
@@ -60,13 +58,13 @@ static const struct common_glue_ctx serpent_ctr = {
 
 	.funcs = { {
 		.num_blocks = 16,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_16way) }
+		.fn_u = { .ctr = serpent_ctr_16way }
 	},  {
 		.num_blocks = 8,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
+		.fn_u = { .ctr = serpent_ctr_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
+		.fn_u = { .ctr = __serpent_crypt_ctr }
 	} }
 };
 
@@ -76,13 +74,13 @@ static const struct common_glue_ctx serpent_enc_xts = {
 
 	.funcs = { {
 		.num_blocks = 16,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way) }
+		.fn_u = { .xts = serpent_xts_enc_16way }
 	}, {
 		.num_blocks = 8,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
+		.fn_u = { .xts = serpent_xts_enc_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
+		.fn_u = { .xts = serpent_xts_enc }
 	} }
 };
 
@@ -92,13 +90,13 @@ static const struct common_glue_ctx serpent_dec = {
 
 	.funcs = { {
 		.num_blocks = 16,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_16way) }
+		.fn_u = { .ecb = serpent_ecb_dec_16way }
 	}, {
 		.num_blocks = 8,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
+		.fn_u = { .ecb = serpent_ecb_dec_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
+		.fn_u = { .ecb = __serpent_decrypt }
 	} }
 };
 
@@ -108,13 +106,13 @@ static const struct common_glue_ctx serpent_dec_cbc = {
 
 	.funcs = { {
 		.num_blocks = 16,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way) }
+		.fn_u = { .cbc = serpent_cbc_dec_16way }
 	}, {
 		.num_blocks = 8,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
+		.fn_u = { .cbc = serpent_cbc_dec_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
+		.fn_u = { .cbc = __serpent_decrypt }
 	} }
 };
 
@@ -124,13 +122,13 @@ static const struct common_glue_ctx serpent_dec_xts = {
 
 	.funcs = { {
 		.num_blocks = 16,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way) }
+		.fn_u = { .xts = serpent_xts_dec_16way }
 	}, {
 		.num_blocks = 8,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
+		.fn_u = { .xts = serpent_xts_dec_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
+		.fn_u = { .xts = serpent_xts_dec }
 	} }
 };
 
@@ -146,8 +144,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
-					   req);
+	return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -166,8 +163,8 @@ static int xts_encrypt(struct skcipher_request *req)
 	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	return glue_xts_req_128bit(&serpent_enc_xts, req,
-				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+				   __serpent_encrypt, &ctx->tweak_ctx,
+				   &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -176,8 +173,8 @@ static int xts_decrypt(struct skcipher_request *req)
 	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	return glue_xts_req_128bit(&serpent_dec_xts, req,
-				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+				   __serpent_encrypt, &ctx->tweak_ctx,
+				   &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg serpent_algs[] = {
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 7d3dca38a5a2..7806d1cbe854 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -20,33 +20,35 @@
 #include <asm/crypto/serpent-avx.h>
 
 /* 8-way parallel cipher functions */
-asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst,
 					 const u8 *src);
 EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx);
 
-asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst,
 					 const u8 *src);
 EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx);
 
-asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst,
 					 const u8 *src);
 EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx);
 
-asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
-				     const u8 *src, le128 *iv);
+asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src,
+				     le128 *iv);
 EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx);
 
-asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst,
 					 const u8 *src, le128 *iv);
 EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx);
 
-asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst,
 					 const u8 *src, le128 *iv);
 EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx);
 
-void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
 	be128 ctrblk;
+	u128 *dst = (u128 *)d;
+	const u128 *src = (const u128 *)s;
 
 	le128_to_be128(&ctrblk, iv);
 	le128_inc(iv);
@@ -56,17 +58,15 @@ void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 }
 EXPORT_SYMBOL_GPL(__serpent_crypt_ctr);
 
-void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-				  GLUE_FUNC_CAST(__serpent_encrypt));
+	glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_encrypt);
 }
 EXPORT_SYMBOL_GPL(serpent_xts_enc);
 
-void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-				  GLUE_FUNC_CAST(__serpent_decrypt));
+	glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_decrypt);
 }
 EXPORT_SYMBOL_GPL(serpent_xts_dec);
 
@@ -102,10 +102,10 @@ static const struct common_glue_ctx serpent_enc = {
 
 	.funcs = { {
 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
+		.fn_u = { .ecb = serpent_ecb_enc_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
+		.fn_u = { .ecb = __serpent_encrypt }
 	} }
 };
 
@@ -115,10 +115,10 @@ static const struct common_glue_ctx serpent_ctr = {
 
 	.funcs = { {
 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
+		.fn_u = { .ctr = serpent_ctr_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
+		.fn_u = { .ctr = __serpent_crypt_ctr }
 	} }
 };
 
@@ -128,10 +128,10 @@ static const struct common_glue_ctx serpent_enc_xts = {
 
 	.funcs = { {
 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
+		.fn_u = { .xts = serpent_xts_enc_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
+		.fn_u = { .xts = serpent_xts_enc }
 	} }
 };
 
@@ -141,10 +141,10 @@ static const struct common_glue_ctx serpent_dec = {
 
 	.funcs = { {
 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
+		.fn_u = { .ecb = serpent_ecb_dec_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
+		.fn_u = { .ecb = __serpent_decrypt }
 	} }
 };
 
@@ -154,10 +154,10 @@ static const struct common_glue_ctx serpent_dec_cbc = {
 
 	.funcs = { {
 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
+		.fn_u = { .cbc = serpent_cbc_dec_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
+		.fn_u = { .cbc = __serpent_decrypt }
 	} }
 };
 
@@ -167,10 +167,10 @@ static const struct common_glue_ctx serpent_dec_xts = {
 
 	.funcs = { {
 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
+		.fn_u = { .xts = serpent_xts_dec_8way_avx }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
+		.fn_u = { .xts = serpent_xts_dec }
 	} }
 };
 
@@ -186,8 +186,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
-					   req);
+	return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -206,8 +205,8 @@ static int xts_encrypt(struct skcipher_request *req)
 	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	return glue_xts_req_128bit(&serpent_enc_xts, req,
-				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+				   __serpent_encrypt, &ctx->tweak_ctx,
+				   &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -216,8 +215,8 @@ static int xts_decrypt(struct skcipher_request *req)
 	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	return glue_xts_req_128bit(&serpent_dec_xts, req,
-				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+				   __serpent_encrypt, &ctx->tweak_ctx,
+				   &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg serpent_algs[] = {
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 5fdf1931d069..4fed8d26b91a 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -31,9 +31,11 @@ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
 	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
 }
 
-static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
+static void serpent_decrypt_cbc_xway(const void *ctx, u8 *d, const u8 *s)
 {
 	u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
+	u128 *dst = (u128 *)d;
+	const u128 *src = (const u128 *)s;
 	unsigned int j;
 
 	for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
@@ -45,9 +47,11 @@ static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
 		u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
 }
 
-static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
 	be128 ctrblk;
+	u128 *dst = (u128 *)d;
+	const u128 *src = (const u128 *)s;
 
 	le128_to_be128(&ctrblk, iv);
 	le128_inc(iv);
@@ -56,10 +60,12 @@ static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 	u128_xor(dst, src, (u128 *)&ctrblk);
 }
 
-static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+static void serpent_crypt_ctr_xway(const void *ctx, u8 *d, const u8 *s,
 				   le128 *iv)
 {
 	be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
+	u128 *dst = (u128 *)d;
+	const u128 *src = (const u128 *)s;
 	unsigned int i;
 
 	for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
@@ -79,10 +85,10 @@ static const struct common_glue_ctx serpent_enc = {
 
 	.funcs = { {
 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
+		.fn_u = { .ecb = serpent_enc_blk_xway }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
+		.fn_u = { .ecb = __serpent_encrypt }
 	} }
 };
 
@@ -92,10 +98,10 @@ static const struct common_glue_ctx serpent_ctr = {
 
 	.funcs = { {
 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
+		.fn_u = { .ctr = serpent_crypt_ctr_xway }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
+		.fn_u = { .ctr = serpent_crypt_ctr }
 	} }
 };
 
@@ -105,10 +111,10 @@ static const struct common_glue_ctx serpent_dec = {
 
 	.funcs = { {
 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
+		.fn_u = { .ecb = serpent_dec_blk_xway }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
+		.fn_u = { .ecb = __serpent_decrypt }
 	} }
 };
 
@@ -118,10 +124,10 @@ static const struct common_glue_ctx serpent_dec_cbc = {
 
 	.funcs = { {
 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
+		.fn_u = { .cbc = serpent_decrypt_cbc_xway }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
+		.fn_u = { .cbc = __serpent_decrypt }
 	} }
 };
 
@@ -137,7 +143,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+	return glue_cbc_encrypt_req_128bit(__serpent_encrypt,
 					   req);
 }
 
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 9f712a7dfd79..1e594d60afa5 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -62,11 +62,11 @@
  *Visit http://software.intel.com/en-us/articles/
  *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/
  *
- *Updates 20-byte SHA-1 record in 'hash' for even number of
- *'num_blocks' consecutive 64-byte blocks
+ *Updates 20-byte SHA-1 record at start of 'state', from 'input', for
+ *even number of 'blocks' consecutive 64-byte blocks.
  *
  *extern "C" void sha1_transform_avx2(
- *	int *hash, const char* input, size_t num_blocks );
+ *	struct sha1_state *state, const u8* input, int blocks );
  */
 
 #include <linux/linkage.h>
@@ -634,7 +634,7 @@ _loop3:
  * param: function's name
  */
 .macro SHA1_VECTOR_ASM  name
-	ENTRY(\name)
+	SYM_FUNC_START(\name)
 
 	push	%rbx
 	push	%r12
@@ -676,7 +676,7 @@ _loop3:
 
 	ret
 
-	ENDPROC(\name)
+	SYM_FUNC_END(\name)
 .endm
 
 .section .rodata
diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
index ebbdba72ae07..11efe3a45a1f 100644
--- a/arch/x86/crypto/sha1_ni_asm.S
+++ b/arch/x86/crypto/sha1_ni_asm.S
@@ -95,7 +95,7 @@
  */
 .text
 .align 32
-ENTRY(sha1_ni_transform)
+SYM_FUNC_START(sha1_ni_transform)
 	mov		%rsp, RSPSAVE
 	sub		$FRAME_SIZE, %rsp
 	and		$~0xF, %rsp
@@ -291,7 +291,7 @@ ENTRY(sha1_ni_transform)
 	mov		RSPSAVE, %rsp
 
 	ret
-ENDPROC(sha1_ni_transform)
+SYM_FUNC_END(sha1_ni_transform)
 
 .section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
 .align 16
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index 99c5b8c4dc38..12e2d19d7402 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -67,7 +67,7 @@
  * param: function's name
  */
 .macro SHA1_VECTOR_ASM  name
-	ENTRY(\name)
+	SYM_FUNC_START(\name)
 
 	push	%rbx
 	push	%r12
@@ -101,7 +101,7 @@
 	pop	%rbx
 	ret
 
-	ENDPROC(\name)
+	SYM_FUNC_END(\name)
 .endm
 
 /*
@@ -457,9 +457,13 @@ W_PRECALC_SSSE3
 	movdqu	\a,\b
 .endm
 
-/* SSSE3 optimized implementation:
- *  extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws,
- *                                       unsigned int rounds);
+/*
+ * SSSE3 optimized implementation:
+ *
+ * extern "C" void sha1_transform_ssse3(struct sha1_state *state,
+ *					const u8 *data, int blocks);
+ *
+ * Note that struct sha1_state is assumed to begin with u32 state[5].
  */
 SHA1_VECTOR_ASM     sha1_transform_ssse3
 
@@ -545,8 +549,8 @@ W_PRECALC_AVX
 
 
 /* AVX optimized implementation:
- *  extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws,
- *                                     unsigned int rounds);
+ *  extern "C" void sha1_transform_avx(struct sha1_state *state,
+ *				       const u8 *data, int blocks);
  */
 SHA1_VECTOR_ASM     sha1_transform_avx
 
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 639d4c2fd6a8..d70b40ad594c 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -27,11 +27,8 @@
 #include <crypto/sha1_base.h>
 #include <asm/simd.h>
 
-typedef void (sha1_transform_fn)(u32 *digest, const char *data,
-				unsigned int rounds);
-
 static int sha1_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len, sha1_transform_fn *sha1_xform)
+			     unsigned int len, sha1_block_fn *sha1_xform)
 {
 	struct sha1_state *sctx = shash_desc_ctx(desc);
 
@@ -39,48 +36,47 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
 	    (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
 		return crypto_sha1_update(desc, data, len);
 
-	/* make sure casting to sha1_block_fn() is safe */
+	/*
+	 * Make sure struct sha1_state begins directly with the SHA1
+	 * 160-bit internal state, as this is what the asm functions expect.
+	 */
 	BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
 
 	kernel_fpu_begin();
-	sha1_base_do_update(desc, data, len,
-			    (sha1_block_fn *)sha1_xform);
+	sha1_base_do_update(desc, data, len, sha1_xform);
 	kernel_fpu_end();
 
 	return 0;
 }
 
 static int sha1_finup(struct shash_desc *desc, const u8 *data,
-		      unsigned int len, u8 *out, sha1_transform_fn *sha1_xform)
+		      unsigned int len, u8 *out, sha1_block_fn *sha1_xform)
 {
 	if (!crypto_simd_usable())
 		return crypto_sha1_finup(desc, data, len, out);
 
 	kernel_fpu_begin();
 	if (len)
-		sha1_base_do_update(desc, data, len,
-				    (sha1_block_fn *)sha1_xform);
-	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_xform);
+		sha1_base_do_update(desc, data, len, sha1_xform);
+	sha1_base_do_finalize(desc, sha1_xform);
 	kernel_fpu_end();
 
 	return sha1_base_finish(desc, out);
 }
 
-asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
-				     unsigned int rounds);
+asmlinkage void sha1_transform_ssse3(struct sha1_state *state,
+				     const u8 *data, int blocks);
 
 static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
 			     unsigned int len)
 {
-	return sha1_update(desc, data, len,
-			(sha1_transform_fn *) sha1_transform_ssse3);
+	return sha1_update(desc, data, len, sha1_transform_ssse3);
 }
 
 static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
 			      unsigned int len, u8 *out)
 {
-	return sha1_finup(desc, data, len, out,
-			(sha1_transform_fn *) sha1_transform_ssse3);
+	return sha1_finup(desc, data, len, out, sha1_transform_ssse3);
 }
 
 /* Add padding and return the message digest. */
@@ -119,21 +115,19 @@ static void unregister_sha1_ssse3(void)
 }
 
 #ifdef CONFIG_AS_AVX
-asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
-				   unsigned int rounds);
+asmlinkage void sha1_transform_avx(struct sha1_state *state,
+				   const u8 *data, int blocks);
 
 static int sha1_avx_update(struct shash_desc *desc, const u8 *data,
 			     unsigned int len)
 {
-	return sha1_update(desc, data, len,
-			(sha1_transform_fn *) sha1_transform_avx);
+	return sha1_update(desc, data, len, sha1_transform_avx);
 }
 
 static int sha1_avx_finup(struct shash_desc *desc, const u8 *data,
 			      unsigned int len, u8 *out)
 {
-	return sha1_finup(desc, data, len, out,
-			(sha1_transform_fn *) sha1_transform_avx);
+	return sha1_finup(desc, data, len, out, sha1_transform_avx);
 }
 
 static int sha1_avx_final(struct shash_desc *desc, u8 *out)
@@ -190,8 +184,8 @@ static inline void unregister_sha1_avx(void) { }
 #if defined(CONFIG_AS_AVX2) && (CONFIG_AS_AVX)
 #define SHA1_AVX2_BLOCK_OPTSIZE	4	/* optimal 4*64 bytes of SHA1 blocks */
 
-asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
-				    unsigned int rounds);
+asmlinkage void sha1_transform_avx2(struct sha1_state *state,
+				    const u8 *data, int blocks);
 
 static bool avx2_usable(void)
 {
@@ -203,28 +197,26 @@ static bool avx2_usable(void)
 	return false;
 }
 
-static void sha1_apply_transform_avx2(u32 *digest, const char *data,
-				unsigned int rounds)
+static void sha1_apply_transform_avx2(struct sha1_state *state,
+				      const u8 *data, int blocks)
 {
 	/* Select the optimal transform based on data block size */
-	if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE)
-		sha1_transform_avx2(digest, data, rounds);
+	if (blocks >= SHA1_AVX2_BLOCK_OPTSIZE)
+		sha1_transform_avx2(state, data, blocks);
 	else
-		sha1_transform_avx(digest, data, rounds);
+		sha1_transform_avx(state, data, blocks);
 }
 
 static int sha1_avx2_update(struct shash_desc *desc, const u8 *data,
 			     unsigned int len)
 {
-	return sha1_update(desc, data, len,
-		(sha1_transform_fn *) sha1_apply_transform_avx2);
+	return sha1_update(desc, data, len, sha1_apply_transform_avx2);
 }
 
 static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data,
 			      unsigned int len, u8 *out)
 {
-	return sha1_finup(desc, data, len, out,
-		(sha1_transform_fn *) sha1_apply_transform_avx2);
+	return sha1_finup(desc, data, len, out, sha1_apply_transform_avx2);
 }
 
 static int sha1_avx2_final(struct shash_desc *desc, u8 *out)
@@ -267,21 +259,19 @@ static inline void unregister_sha1_avx2(void) { }
 #endif
 
 #ifdef CONFIG_AS_SHA1_NI
-asmlinkage void sha1_ni_transform(u32 *digest, const char *data,
-				   unsigned int rounds);
+asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data,
+				  int rounds);
 
 static int sha1_ni_update(struct shash_desc *desc, const u8 *data,
 			     unsigned int len)
 {
-	return sha1_update(desc, data, len,
-		(sha1_transform_fn *) sha1_ni_transform);
+	return sha1_update(desc, data, len, sha1_ni_transform);
 }
 
 static int sha1_ni_finup(struct shash_desc *desc, const u8 *data,
 			      unsigned int len, u8 *out)
 {
-	return sha1_finup(desc, data, len, out,
-		(sha1_transform_fn *) sha1_ni_transform);
+	return sha1_finup(desc, data, len, out, sha1_ni_transform);
 }
 
 static int sha1_ni_final(struct shash_desc *desc, u8 *out)
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index 001bbcf93c79..fcbc30f58c38 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -341,13 +341,13 @@ a = TMP_
 .endm
 
 ########################################################################
-## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to digest
+## void sha256_transform_avx(state sha256_state *state, const u8 *data, int blocks)
+## arg 1 : pointer to state
 ## arg 2 : pointer to input data
 ## arg 3 : Num blocks
 ########################################################################
 .text
-ENTRY(sha256_transform_avx)
+SYM_FUNC_START(sha256_transform_avx)
 .align 32
 	pushq   %rbx
 	pushq   %r12
@@ -460,7 +460,7 @@ done_hash:
 	popq	%r12
 	popq    %rbx
 	ret
-ENDPROC(sha256_transform_avx)
+SYM_FUNC_END(sha256_transform_avx)
 
 .section	.rodata.cst256.K256, "aM", @progbits, 256
 .align 64
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 1420db15dcdd..499d9ec129de 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -520,13 +520,13 @@ STACK_SIZE	= _RSP      + _RSP_SIZE
 .endm
 
 ########################################################################
-## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to digest
+## void sha256_transform_rorx(struct sha256_state *state, const u8 *data, int blocks)
+## arg 1 : pointer to state
 ## arg 2 : pointer to input data
 ## arg 3 : Num blocks
 ########################################################################
 .text
-ENTRY(sha256_transform_rorx)
+SYM_FUNC_START(sha256_transform_rorx)
 .align 32
 	pushq	%rbx
 	pushq	%r12
@@ -713,7 +713,7 @@ done_hash:
 	popq	%r12
 	popq	%rbx
 	ret
-ENDPROC(sha256_transform_rorx)
+SYM_FUNC_END(sha256_transform_rorx)
 
 .section	.rodata.cst512.K256, "aM", @progbits, 512
 .align 64
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index c6c05ed2c16a..ddfa863b4ee3 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -347,13 +347,15 @@ a = TMP_
 .endm
 
 ########################################################################
-## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to digest
+## void sha256_transform_ssse3(struct sha256_state *state, const u8 *data,
+##			       int blocks);
+## arg 1 : pointer to state
+##	   (struct sha256_state is assumed to begin with u32 state[8])
 ## arg 2 : pointer to input data
 ## arg 3 : Num blocks
 ########################################################################
 .text
-ENTRY(sha256_transform_ssse3)
+SYM_FUNC_START(sha256_transform_ssse3)
 .align 32
 	pushq   %rbx
 	pushq   %r12
@@ -471,7 +473,7 @@ done_hash:
 	popq    %rbx
 
 	ret
-ENDPROC(sha256_transform_ssse3)
+SYM_FUNC_END(sha256_transform_ssse3)
 
 .section	.rodata.cst256.K256, "aM", @progbits, 256
 .align 64
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
index fb58f58ecfbc..7abade04a3a3 100644
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ b/arch/x86/crypto/sha256_ni_asm.S
@@ -97,7 +97,7 @@
 
 .text
 .align 32
-ENTRY(sha256_ni_transform)
+SYM_FUNC_START(sha256_ni_transform)
 
 	shl		$6, NUM_BLKS		/*  convert to bytes */
 	jz		.Ldone_hash
@@ -327,7 +327,7 @@ ENTRY(sha256_ni_transform)
 .Ldone_hash:
 
 	ret
-ENDPROC(sha256_ni_transform)
+SYM_FUNC_END(sha256_ni_transform)
 
 .section	.rodata.cst256.K256, "aM", @progbits, 256
 .align 64
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index f9aff31fe59e..03ad657c04bd 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -41,12 +41,11 @@
 #include <linux/string.h>
 #include <asm/simd.h>
 
-asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
-				       u64 rounds);
-typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds);
+asmlinkage void sha256_transform_ssse3(struct sha256_state *state,
+				       const u8 *data, int blocks);
 
 static int _sha256_update(struct shash_desc *desc, const u8 *data,
-			  unsigned int len, sha256_transform_fn *sha256_xform)
+			  unsigned int len, sha256_block_fn *sha256_xform)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
@@ -54,28 +53,29 @@ static int _sha256_update(struct shash_desc *desc, const u8 *data,
 	    (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
 		return crypto_sha256_update(desc, data, len);
 
-	/* make sure casting to sha256_block_fn() is safe */
+	/*
+	 * Make sure struct sha256_state begins directly with the SHA256
+	 * 256-bit internal state, as this is what the asm functions expect.
+	 */
 	BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
 
 	kernel_fpu_begin();
-	sha256_base_do_update(desc, data, len,
-			      (sha256_block_fn *)sha256_xform);
+	sha256_base_do_update(desc, data, len, sha256_xform);
 	kernel_fpu_end();
 
 	return 0;
 }
 
 static int sha256_finup(struct shash_desc *desc, const u8 *data,
-	      unsigned int len, u8 *out, sha256_transform_fn *sha256_xform)
+	      unsigned int len, u8 *out, sha256_block_fn *sha256_xform)
 {
 	if (!crypto_simd_usable())
 		return crypto_sha256_finup(desc, data, len, out);
 
 	kernel_fpu_begin();
 	if (len)
-		sha256_base_do_update(desc, data, len,
-				      (sha256_block_fn *)sha256_xform);
-	sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_xform);
+		sha256_base_do_update(desc, data, len, sha256_xform);
+	sha256_base_do_finalize(desc, sha256_xform);
 	kernel_fpu_end();
 
 	return sha256_base_finish(desc, out);
@@ -145,8 +145,8 @@ static void unregister_sha256_ssse3(void)
 }
 
 #ifdef CONFIG_AS_AVX
-asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
-				     u64 rounds);
+asmlinkage void sha256_transform_avx(struct sha256_state *state,
+				     const u8 *data, int blocks);
 
 static int sha256_avx_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
@@ -227,8 +227,8 @@ static inline void unregister_sha256_avx(void) { }
 #endif
 
 #if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX)
-asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
-				      u64 rounds);
+asmlinkage void sha256_transform_rorx(struct sha256_state *state,
+				      const u8 *data, int blocks);
 
 static int sha256_avx2_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
@@ -307,8 +307,8 @@ static inline void unregister_sha256_avx2(void) { }
 #endif
 
 #ifdef CONFIG_AS_SHA256_NI
-asmlinkage void sha256_ni_transform(u32 *digest, const char *data,
-				   u64 rounds); /*unsigned int rounds);*/
+asmlinkage void sha256_ni_transform(struct sha256_state *digest,
+				    const u8 *data, int rounds);
 
 static int sha256_ni_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index 39235fefe6f7..90ea945ba5e6 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -271,13 +271,14 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
 .endm
 
 ########################################################################
-# void sha512_transform_avx(void* D, const void* M, u64 L)
-# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
-# The size of the message pointed to by M must be an integer multiple of SHA512
-# message blocks.
-# L is the message length in SHA512 blocks
+# void sha512_transform_avx(sha512_state *state, const u8 *data, int blocks)
+# Purpose: Updates the SHA512 digest stored at "state" with the message
+# stored in "data".
+# The size of the message pointed to by "data" must be an integer multiple
+# of SHA512 message blocks.
+# "blocks" is the message length in SHA512 blocks
 ########################################################################
-ENTRY(sha512_transform_avx)
+SYM_FUNC_START(sha512_transform_avx)
 	cmp $0, msglen
 	je nowork
 
@@ -365,7 +366,7 @@ updateblock:
 
 nowork:
 	ret
-ENDPROC(sha512_transform_avx)
+SYM_FUNC_END(sha512_transform_avx)
 
 ########################################################################
 ### Binary Data
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index b16d56005162..3dd886b14e7d 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -563,13 +563,14 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
 .endm
 
 ########################################################################
-# void sha512_transform_rorx(void* D, const void* M, uint64_t L)#
-# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
-# The size of the message pointed to by M must be an integer multiple of SHA512
-#   message blocks.
-# L is the message length in SHA512 blocks
+# void sha512_transform_rorx(sha512_state *state, const u8 *data, int blocks)
+# Purpose: Updates the SHA512 digest stored at "state" with the message
+# stored in "data".
+# The size of the message pointed to by "data" must be an integer multiple
+# of SHA512 message blocks.
+# "blocks" is the message length in SHA512 blocks
 ########################################################################
-ENTRY(sha512_transform_rorx)
+SYM_FUNC_START(sha512_transform_rorx)
 	# Allocate Stack Space
 	mov	%rsp, %rax
 	sub	$frame_size, %rsp
@@ -682,7 +683,7 @@ done_hash:
 	# Restore Stack Pointer
 	mov	frame_RSPSAVE(%rsp), %rsp
 	ret
-ENDPROC(sha512_transform_rorx)
+SYM_FUNC_END(sha512_transform_rorx)
 
 ########################################################################
 ### Binary Data
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index 66bbd9058a90..7946a1bee85b 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -269,13 +269,16 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
 .endm
 
 ########################################################################
-# void sha512_transform_ssse3(void* D, const void* M, u64 L)#
-# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
-# The size of the message pointed to by M must be an integer multiple of SHA512
-#   message blocks.
-# L is the message length in SHA512 blocks.
+## void sha512_transform_ssse3(struct sha512_state *state, const u8 *data,
+##			       int blocks);
+# (struct sha512_state is assumed to begin with u64 state[8])
+# Purpose: Updates the SHA512 digest stored at "state" with the message
+# stored in "data".
+# The size of the message pointed to by "data" must be an integer multiple
+# of SHA512 message blocks.
+# "blocks" is the message length in SHA512 blocks.
 ########################################################################
-ENTRY(sha512_transform_ssse3)
+SYM_FUNC_START(sha512_transform_ssse3)
 
 	cmp $0, msglen
 	je nowork
@@ -364,7 +367,7 @@ updateblock:
 
 nowork:
 	ret
-ENDPROC(sha512_transform_ssse3)
+SYM_FUNC_END(sha512_transform_ssse3)
 
 ########################################################################
 ### Binary Data
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 458356a3f124..1c444f41037c 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -39,13 +39,11 @@
 #include <crypto/sha512_base.h>
 #include <asm/simd.h>
 
-asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data,
-				       u64 rounds);
-
-typedef void (sha512_transform_fn)(u64 *digest, const char *data, u64 rounds);
+asmlinkage void sha512_transform_ssse3(struct sha512_state *state,
+				       const u8 *data, int blocks);
 
 static int sha512_update(struct shash_desc *desc, const u8 *data,
-		       unsigned int len, sha512_transform_fn *sha512_xform)
+		       unsigned int len, sha512_block_fn *sha512_xform)
 {
 	struct sha512_state *sctx = shash_desc_ctx(desc);
 
@@ -53,28 +51,29 @@ static int sha512_update(struct shash_desc *desc, const u8 *data,
 	    (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
 		return crypto_sha512_update(desc, data, len);
 
-	/* make sure casting to sha512_block_fn() is safe */
+	/*
+	 * Make sure struct sha512_state begins directly with the SHA512
+	 * 512-bit internal state, as this is what the asm functions expect.
+	 */
 	BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
 
 	kernel_fpu_begin();
-	sha512_base_do_update(desc, data, len,
-			      (sha512_block_fn *)sha512_xform);
+	sha512_base_do_update(desc, data, len, sha512_xform);
 	kernel_fpu_end();
 
 	return 0;
 }
 
 static int sha512_finup(struct shash_desc *desc, const u8 *data,
-	      unsigned int len, u8 *out, sha512_transform_fn *sha512_xform)
+	      unsigned int len, u8 *out, sha512_block_fn *sha512_xform)
 {
 	if (!crypto_simd_usable())
 		return crypto_sha512_finup(desc, data, len, out);
 
 	kernel_fpu_begin();
 	if (len)
-		sha512_base_do_update(desc, data, len,
-				      (sha512_block_fn *)sha512_xform);
-	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_xform);
+		sha512_base_do_update(desc, data, len, sha512_xform);
+	sha512_base_do_finalize(desc, sha512_xform);
 	kernel_fpu_end();
 
 	return sha512_base_finish(desc, out);
@@ -144,8 +143,8 @@ static void unregister_sha512_ssse3(void)
 }
 
 #ifdef CONFIG_AS_AVX
-asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
-				     u64 rounds);
+asmlinkage void sha512_transform_avx(struct sha512_state *state,
+				     const u8 *data, int blocks);
 static bool avx_usable(void)
 {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
@@ -225,8 +224,8 @@ static inline void unregister_sha512_avx(void) { }
 #endif
 
 #if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX)
-asmlinkage void sha512_transform_rorx(u64 *digest, const char *data,
-				      u64 rounds);
+asmlinkage void sha512_transform_rorx(struct sha512_state *state,
+				      const u8 *data, int blocks);
 
 static int sha512_avx2_update(struct shash_desc *desc, const u8 *data,
 		       unsigned int len)
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 698b8f2a56e2..a5151393bb2f 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -234,7 +234,7 @@
 	vpxor		x3, wkey, x3;
 
 .align 8
-__twofish_enc_blk8:
+SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
@@ -273,10 +273,10 @@ __twofish_enc_blk8:
 	outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
 
 	ret;
-ENDPROC(__twofish_enc_blk8)
+SYM_FUNC_END(__twofish_enc_blk8)
 
 .align 8
-__twofish_dec_blk8:
+SYM_FUNC_START_LOCAL(__twofish_dec_blk8)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
@@ -313,9 +313,9 @@ __twofish_dec_blk8:
 	outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
 
 	ret;
-ENDPROC(__twofish_dec_blk8)
+SYM_FUNC_END(__twofish_dec_blk8)
 
-ENTRY(twofish_ecb_enc_8way)
+SYM_FUNC_START(twofish_ecb_enc_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -333,9 +333,9 @@ ENTRY(twofish_ecb_enc_8way)
 
 	FRAME_END
 	ret;
-ENDPROC(twofish_ecb_enc_8way)
+SYM_FUNC_END(twofish_ecb_enc_8way)
 
-ENTRY(twofish_ecb_dec_8way)
+SYM_FUNC_START(twofish_ecb_dec_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -353,9 +353,9 @@ ENTRY(twofish_ecb_dec_8way)
 
 	FRAME_END
 	ret;
-ENDPROC(twofish_ecb_dec_8way)
+SYM_FUNC_END(twofish_ecb_dec_8way)
 
-ENTRY(twofish_cbc_dec_8way)
+SYM_FUNC_START(twofish_cbc_dec_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -378,9 +378,9 @@ ENTRY(twofish_cbc_dec_8way)
 
 	FRAME_END
 	ret;
-ENDPROC(twofish_cbc_dec_8way)
+SYM_FUNC_END(twofish_cbc_dec_8way)
 
-ENTRY(twofish_ctr_8way)
+SYM_FUNC_START(twofish_ctr_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -405,9 +405,9 @@ ENTRY(twofish_ctr_8way)
 
 	FRAME_END
 	ret;
-ENDPROC(twofish_ctr_8way)
+SYM_FUNC_END(twofish_ctr_8way)
 
-ENTRY(twofish_xts_enc_8way)
+SYM_FUNC_START(twofish_xts_enc_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -429,9 +429,9 @@ ENTRY(twofish_xts_enc_8way)
 
 	FRAME_END
 	ret;
-ENDPROC(twofish_xts_enc_8way)
+SYM_FUNC_END(twofish_xts_enc_8way)
 
-ENTRY(twofish_xts_dec_8way)
+SYM_FUNC_START(twofish_xts_dec_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -453,4 +453,4 @@ ENTRY(twofish_xts_dec_8way)
 
 	FRAME_END
 	ret;
-ENDPROC(twofish_xts_dec_8way)
+SYM_FUNC_END(twofish_xts_dec_8way)
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 290cc4e9a6fe..a6f09e4f2e46 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -207,7 +207,7 @@
 	xor	%esi,		d ## D;\
 	ror	$1,		d ## D;
 
-ENTRY(twofish_enc_blk)
+SYM_FUNC_START(twofish_enc_blk)
 	push	%ebp			/* save registers according to calling convention*/
 	push    %ebx
 	push    %esi
@@ -261,9 +261,9 @@ ENTRY(twofish_enc_blk)
 	pop	%ebp
 	mov	$1,	%eax
 	ret
-ENDPROC(twofish_enc_blk)
+SYM_FUNC_END(twofish_enc_blk)
 
-ENTRY(twofish_dec_blk)
+SYM_FUNC_START(twofish_dec_blk)
 	push	%ebp			/* save registers according to calling convention*/
 	push    %ebx
 	push    %esi
@@ -318,4 +318,4 @@ ENTRY(twofish_dec_blk)
 	pop	%ebp
 	mov	$1,	%eax
 	ret
-ENDPROC(twofish_dec_blk)
+SYM_FUNC_END(twofish_dec_blk)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index e495e07c7f1b..fc23552afe37 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -220,7 +220,7 @@
 	rorq $32,			RAB2; \
 	outunpack3(mov, RIO, 2, RAB, 2);
 
-ENTRY(__twofish_enc_blk_3way)
+SYM_FUNC_START(__twofish_enc_blk_3way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -267,9 +267,9 @@ ENTRY(__twofish_enc_blk_3way)
 	popq %r12;
 	popq %r13;
 	ret;
-ENDPROC(__twofish_enc_blk_3way)
+SYM_FUNC_END(__twofish_enc_blk_3way)
 
-ENTRY(twofish_dec_blk_3way)
+SYM_FUNC_START(twofish_dec_blk_3way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -302,4 +302,4 @@ ENTRY(twofish_dec_blk_3way)
 	popq %r12;
 	popq %r13;
 	ret;
-ENDPROC(twofish_dec_blk_3way)
+SYM_FUNC_END(twofish_dec_blk_3way)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index ecef2cb9f43f..d2e56232494a 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -202,7 +202,7 @@
 	xor	%r8d,		d ## D;\
 	ror	$1,		d ## D;
 
-ENTRY(twofish_enc_blk)
+SYM_FUNC_START(twofish_enc_blk)
 	pushq    R1
 
 	/* %rdi contains the ctx address */
@@ -253,9 +253,9 @@ ENTRY(twofish_enc_blk)
 	popq	R1
 	movl	$1,%eax
 	ret
-ENDPROC(twofish_enc_blk)
+SYM_FUNC_END(twofish_enc_blk)
 
-ENTRY(twofish_dec_blk)
+SYM_FUNC_START(twofish_dec_blk)
 	pushq    R1
 
 	/* %rdi contains the ctx address */
@@ -305,4 +305,4 @@ ENTRY(twofish_dec_blk)
 	popq	R1
 	movl	$1,%eax
 	ret
-ENDPROC(twofish_dec_blk)
+SYM_FUNC_END(twofish_dec_blk)
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index d561c821788b..2dbc8ce3730e 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -22,20 +22,17 @@
 #define TWOFISH_PARALLEL_BLOCKS 8
 
 /* 8-way parallel cipher functions */
-asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
-				     const u8 *src);
-asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
-				     const u8 *src);
+asmlinkage void twofish_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void twofish_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src);
 
-asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
-				     const u8 *src);
-asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
-				 const u8 *src, le128 *iv);
+asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void twofish_ctr_8way(const void *ctx, u8 *dst, const u8 *src,
+				 le128 *iv);
 
-asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
-				     const u8 *src, le128 *iv);
-asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
-				     const u8 *src, le128 *iv);
+asmlinkage void twofish_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src,
+				     le128 *iv);
+asmlinkage void twofish_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src,
+				     le128 *iv);
 
 static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
 				   const u8 *key, unsigned int keylen)
@@ -43,22 +40,19 @@ static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
 	return twofish_setkey(&tfm->base, key, keylen);
 }
 
-static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-					const u8 *src)
+static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src)
 {
 	__twofish_enc_blk_3way(ctx, dst, src, false);
 }
 
-static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void twofish_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-				  GLUE_FUNC_CAST(twofish_enc_blk));
+	glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_enc_blk);
 }
 
-static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void twofish_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-				  GLUE_FUNC_CAST(twofish_dec_blk));
+	glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_dec_blk);
 }
 
 struct twofish_xts_ctx {
@@ -70,7 +64,6 @@ static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			      unsigned int keylen)
 {
 	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-	u32 *flags = &tfm->base.crt_flags;
 	int err;
 
 	err = xts_verify_key(tfm, key, keylen);
@@ -78,13 +71,12 @@ static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		return err;
 
 	/* first half of xts-key is for crypt */
-	err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2);
 	if (err)
 		return err;
 
 	/* second half of xts-key is for tweak */
-	return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
-				flags);
+	return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
 }
 
 static const struct common_glue_ctx twofish_enc = {
@@ -93,13 +85,13 @@ static const struct common_glue_ctx twofish_enc = {
 
 	.funcs = { {
 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
+		.fn_u = { .ecb = twofish_ecb_enc_8way }
 	}, {
 		.num_blocks = 3,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
+		.fn_u = { .ecb = twofish_enc_blk_3way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
+		.fn_u = { .ecb = twofish_enc_blk }
 	} }
 };
 
@@ -109,13 +101,13 @@ static const struct common_glue_ctx twofish_ctr = {
 
 	.funcs = { {
 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
+		.fn_u = { .ctr = twofish_ctr_8way }
 	}, {
 		.num_blocks = 3,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
+		.fn_u = { .ctr = twofish_enc_blk_ctr_3way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
+		.fn_u = { .ctr = twofish_enc_blk_ctr }
 	} }
 };
 
@@ -125,10 +117,10 @@ static const struct common_glue_ctx twofish_enc_xts = {
 
 	.funcs = { {
 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
+		.fn_u = { .xts = twofish_xts_enc_8way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
+		.fn_u = { .xts = twofish_xts_enc }
 	} }
 };
 
@@ -138,13 +130,13 @@ static const struct common_glue_ctx twofish_dec = {
 
 	.funcs = { {
 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
+		.fn_u = { .ecb = twofish_ecb_dec_8way }
 	}, {
 		.num_blocks = 3,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
+		.fn_u = { .ecb = twofish_dec_blk_3way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
+		.fn_u = { .ecb = twofish_dec_blk }
 	} }
 };
 
@@ -154,13 +146,13 @@ static const struct common_glue_ctx twofish_dec_cbc = {
 
 	.funcs = { {
 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
+		.fn_u = { .cbc = twofish_cbc_dec_8way }
 	}, {
 		.num_blocks = 3,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
+		.fn_u = { .cbc = twofish_dec_blk_cbc_3way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
+		.fn_u = { .cbc = twofish_dec_blk }
 	} }
 };
 
@@ -170,10 +162,10 @@ static const struct common_glue_ctx twofish_dec_xts = {
 
 	.funcs = { {
 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
+		.fn_u = { .xts = twofish_xts_dec_8way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
+		.fn_u = { .xts = twofish_xts_dec }
 	} }
 };
 
@@ -189,8 +181,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
-					   req);
+	return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -208,8 +199,7 @@ static int xts_encrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_req_128bit(&twofish_enc_xts, req,
-				   XTS_TWEAK_CAST(twofish_enc_blk),
+	return glue_xts_req_128bit(&twofish_enc_xts, req, twofish_enc_blk,
 				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
@@ -218,8 +208,7 @@ static int xts_decrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_req_128bit(&twofish_dec_xts, req,
-				   XTS_TWEAK_CAST(twofish_enc_blk),
+	return glue_xts_req_128bit(&twofish_dec_xts, req, twofish_enc_blk,
 				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 1dc9e29f221e..768af6075479 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -25,21 +25,22 @@ static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
 	return twofish_setkey(&tfm->base, key, keylen);
 }
 
-static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-					const u8 *src)
+static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src)
 {
 	__twofish_enc_blk_3way(ctx, dst, src, false);
 }
 
-static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
+static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst,
 					    const u8 *src)
 {
 	__twofish_enc_blk_3way(ctx, dst, src, true);
 }
 
-void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
+void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s)
 {
 	u128 ivs[2];
+	u128 *dst = (u128 *)d;
+	const u128 *src = (const u128 *)s;
 
 	ivs[0] = src[0];
 	ivs[1] = src[1];
@@ -51,9 +52,11 @@ void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
 }
 EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
 
-void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
 	be128 ctrblk;
+	u128 *dst = (u128 *)d;
+	const u128 *src = (const u128 *)s;
 
 	if (dst != src)
 		*dst = *src;
@@ -66,10 +69,11 @@ void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 }
 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
 
-void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
-			      le128 *iv)
+void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
 	be128 ctrblks[3];
+	u128 *dst = (u128 *)d;
+	const u128 *src = (const u128 *)s;
 
 	if (dst != src) {
 		dst[0] = src[0];
@@ -94,10 +98,10 @@ static const struct common_glue_ctx twofish_enc = {
 
 	.funcs = { {
 		.num_blocks = 3,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
+		.fn_u = { .ecb = twofish_enc_blk_3way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
+		.fn_u = { .ecb = twofish_enc_blk }
 	} }
 };
 
@@ -107,10 +111,10 @@ static const struct common_glue_ctx twofish_ctr = {
 
 	.funcs = { {
 		.num_blocks = 3,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
+		.fn_u = { .ctr = twofish_enc_blk_ctr_3way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
+		.fn_u = { .ctr = twofish_enc_blk_ctr }
 	} }
 };
 
@@ -120,10 +124,10 @@ static const struct common_glue_ctx twofish_dec = {
 
 	.funcs = { {
 		.num_blocks = 3,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
+		.fn_u = { .ecb = twofish_dec_blk_3way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
+		.fn_u = { .ecb = twofish_dec_blk }
 	} }
 };
 
@@ -133,10 +137,10 @@ static const struct common_glue_ctx twofish_dec_cbc = {
 
 	.funcs = { {
 		.num_blocks = 3,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
+		.fn_u = { .cbc = twofish_dec_blk_cbc_3way }
 	}, {
 		.num_blocks = 1,
-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
+		.fn_u = { .cbc = twofish_dec_blk }
 	} }
 };
 
@@ -152,8 +156,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
-					   req);
+	return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 515c0ceeb4a3..0789e13ece90 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -354,7 +354,7 @@ For 32-bit we have the following conventions - kernel is built with
 .macro CALL_enter_from_user_mode
 #ifdef CONFIG_CONTEXT_TRACKING
 #ifdef CONFIG_JUMP_LABEL
-	STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
+	STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_key, def=0
 #endif
 	call enter_from_user_mode
 .Lafter_call_\@:
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 3f8e22615812..9747876980b5 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -33,6 +33,7 @@
 #include <asm/cpufeature.h>
 #include <asm/fpu/api.h>
 #include <asm/nospec-branch.h>
+#include <asm/io_bitmap.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -196,6 +197,9 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	/* Reload ti->flags; we may have rescheduled above. */
 	cached_flags = READ_ONCE(ti->flags);
 
+	if (unlikely(cached_flags & _TIF_IO_BITMAP))
+		tss_update_io_bitmap();
+
 	fpregs_assert_state_consistent();
 	if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD))
 		switch_fpu_return();
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index f83ca5aa8b77..7e0560442538 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -172,7 +172,7 @@
 	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 	.if \no_user_check == 0
 	/* coming from usermode? */
-	testl	$SEGMENT_RPL_MASK, PT_CS(%esp)
+	testl	$USER_SEGMENT_RPL_MASK, PT_CS(%esp)
 	jz	.Lend_\@
 	.endif
 	/* On user-cr3? */
@@ -205,64 +205,76 @@
 #define CS_FROM_ENTRY_STACK	(1 << 31)
 #define CS_FROM_USER_CR3	(1 << 30)
 #define CS_FROM_KERNEL		(1 << 29)
+#define CS_FROM_ESPFIX		(1 << 28)
 
 .macro FIXUP_FRAME
 	/*
 	 * The high bits of the CS dword (__csh) are used for CS_FROM_*.
 	 * Clear them in case hardware didn't do this for us.
 	 */
-	andl	$0x0000ffff, 3*4(%esp)
+	andl	$0x0000ffff, 4*4(%esp)
 
 #ifdef CONFIG_VM86
-	testl	$X86_EFLAGS_VM, 4*4(%esp)
+	testl	$X86_EFLAGS_VM, 5*4(%esp)
 	jnz	.Lfrom_usermode_no_fixup_\@
 #endif
-	testl	$SEGMENT_RPL_MASK, 3*4(%esp)
+	testl	$USER_SEGMENT_RPL_MASK, 4*4(%esp)
 	jnz	.Lfrom_usermode_no_fixup_\@
 
-	orl	$CS_FROM_KERNEL, 3*4(%esp)
+	orl	$CS_FROM_KERNEL, 4*4(%esp)
 
 	/*
 	 * When we're here from kernel mode; the (exception) stack looks like:
 	 *
-	 *  5*4(%esp) - <previous context>
-	 *  4*4(%esp) - flags
-	 *  3*4(%esp) - cs
-	 *  2*4(%esp) - ip
-	 *  1*4(%esp) - orig_eax
-	 *  0*4(%esp) - gs / function
+	 *  6*4(%esp) - <previous context>
+	 *  5*4(%esp) - flags
+	 *  4*4(%esp) - cs
+	 *  3*4(%esp) - ip
+	 *  2*4(%esp) - orig_eax
+	 *  1*4(%esp) - gs / function
+	 *  0*4(%esp) - fs
 	 *
 	 * Lets build a 5 entry IRET frame after that, such that struct pt_regs
 	 * is complete and in particular regs->sp is correct. This gives us
-	 * the original 5 enties as gap:
+	 * the original 6 enties as gap:
 	 *
-	 * 12*4(%esp) - <previous context>
-	 * 11*4(%esp) - gap / flags
-	 * 10*4(%esp) - gap / cs
-	 *  9*4(%esp) - gap / ip
-	 *  8*4(%esp) - gap / orig_eax
-	 *  7*4(%esp) - gap / gs / function
-	 *  6*4(%esp) - ss
-	 *  5*4(%esp) - sp
-	 *  4*4(%esp) - flags
-	 *  3*4(%esp) - cs
-	 *  2*4(%esp) - ip
-	 *  1*4(%esp) - orig_eax
-	 *  0*4(%esp) - gs / function
+	 * 14*4(%esp) - <previous context>
+	 * 13*4(%esp) - gap / flags
+	 * 12*4(%esp) - gap / cs
+	 * 11*4(%esp) - gap / ip
+	 * 10*4(%esp) - gap / orig_eax
+	 *  9*4(%esp) - gap / gs / function
+	 *  8*4(%esp) - gap / fs
+	 *  7*4(%esp) - ss
+	 *  6*4(%esp) - sp
+	 *  5*4(%esp) - flags
+	 *  4*4(%esp) - cs
+	 *  3*4(%esp) - ip
+	 *  2*4(%esp) - orig_eax
+	 *  1*4(%esp) - gs / function
+	 *  0*4(%esp) - fs
 	 */
 
 	pushl	%ss		# ss
 	pushl	%esp		# sp (points at ss)
-	addl	$6*4, (%esp)	# point sp back at the previous context
-	pushl	6*4(%esp)	# flags
-	pushl	6*4(%esp)	# cs
-	pushl	6*4(%esp)	# ip
-	pushl	6*4(%esp)	# orig_eax
-	pushl	6*4(%esp)	# gs / function
+	addl	$7*4, (%esp)	# point sp back at the previous context
+	pushl	7*4(%esp)	# flags
+	pushl	7*4(%esp)	# cs
+	pushl	7*4(%esp)	# ip
+	pushl	7*4(%esp)	# orig_eax
+	pushl	7*4(%esp)	# gs / function
+	pushl	7*4(%esp)	# fs
 .Lfrom_usermode_no_fixup_\@:
 .endm
 
 .macro IRET_FRAME
+	/*
+	 * We're called with %ds, %es, %fs, and %gs from the interrupted
+	 * frame, so we shouldn't use them.  Also, we may be in ESPFIX
+	 * mode and therefore have a nonzero SS base and an offset ESP,
+	 * so any attempt to access the stack needs to use SS.  (except for
+	 * accesses through %esp, which automatically use SS.)
+	 */
 	testl $CS_FROM_KERNEL, 1*4(%esp)
 	jz .Lfinished_frame_\@
 
@@ -276,31 +288,40 @@
 	movl	5*4(%esp), %eax		# (modified) regs->sp
 
 	movl	4*4(%esp), %ecx		# flags
-	movl	%ecx, -4(%eax)
+	movl	%ecx, %ss:-1*4(%eax)
 
 	movl	3*4(%esp), %ecx		# cs
 	andl	$0x0000ffff, %ecx
-	movl	%ecx, -8(%eax)
+	movl	%ecx, %ss:-2*4(%eax)
 
 	movl	2*4(%esp), %ecx		# ip
-	movl	%ecx, -12(%eax)
+	movl	%ecx, %ss:-3*4(%eax)
 
 	movl	1*4(%esp), %ecx		# eax
-	movl	%ecx, -16(%eax)
+	movl	%ecx, %ss:-4*4(%eax)
 
 	popl	%ecx
-	lea	-16(%eax), %esp
+	lea	-4*4(%eax), %esp
 	popl	%eax
 .Lfinished_frame_\@:
 .endm
 
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0
 	cld
 .if \skip_gs == 0
 	PUSH_GS
 .endif
-	FIXUP_FRAME
 	pushl	%fs
+
+	pushl	%eax
+	movl	$(__KERNEL_PERCPU), %eax
+	movl	%eax, %fs
+.if \unwind_espfix > 0
+	UNWIND_ESPFIX_STACK
+.endif
+	popl	%eax
+
+	FIXUP_FRAME
 	pushl	%es
 	pushl	%ds
 	pushl	\pt_regs_ax
@@ -313,8 +334,6 @@
 	movl	$(__USER_DS), %edx
 	movl	%edx, %ds
 	movl	%edx, %es
-	movl	$(__KERNEL_PERCPU), %edx
-	movl	%edx, %fs
 .if \skip_gs == 0
 	SET_KERNEL_GS %edx
 .endif
@@ -324,8 +343,8 @@
 .endif
 .endm
 
-.macro SAVE_ALL_NMI cr3_reg:req
-	SAVE_ALL
+.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0
+	SAVE_ALL unwind_espfix=\unwind_espfix
 
 	BUG_IF_WRONG_CR3
 
@@ -357,6 +376,7 @@
 2:	popl	%es
 3:	popl	%fs
 	POP_GS \pop
+	IRET_FRAME
 .pushsection .fixup, "ax"
 4:	movl	$0, (%esp)
 	jmp	1b
@@ -395,7 +415,8 @@
 
 .macro CHECK_AND_APPLY_ESPFIX
 #ifdef CONFIG_X86_ESPFIX32
-#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
+#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8)
+#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET
 
 	ALTERNATIVE	"jmp .Lend_\@", "", X86_BUG_ESPFIX
 
@@ -709,7 +730,7 @@
  * %eax: prev task
  * %edx: next task
  */
-ENTRY(__switch_to_asm)
+SYM_CODE_START(__switch_to_asm)
 	/*
 	 * Save callee-saved registers
 	 * This must match the order in struct inactive_task_frame
@@ -718,6 +739,11 @@ ENTRY(__switch_to_asm)
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
+	/*
+	 * Flags are saved to prevent AC leakage. This could go
+	 * away if objtool would have 32bit support to verify
+	 * the STAC/CLAC correctness.
+	 */
 	pushfl
 
 	/* switch stack */
@@ -740,15 +766,16 @@ ENTRY(__switch_to_asm)
 	FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
-	/* restore callee-saved registers */
+	/* Restore flags or the incoming task to restore AC state. */
 	popfl
+	/* restore callee-saved registers */
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 
 	jmp	__switch_to
-END(__switch_to_asm)
+SYM_CODE_END(__switch_to_asm)
 
 /*
  * The unwinder expects the last frame on the stack to always be at the same
@@ -757,7 +784,7 @@ END(__switch_to_asm)
  * asmlinkage function so its argument has to be pushed on the stack.  This
  * wrapper creates a proper "end of stack" frame header before the call.
  */
-ENTRY(schedule_tail_wrapper)
+SYM_FUNC_START(schedule_tail_wrapper)
 	FRAME_BEGIN
 
 	pushl	%eax
@@ -766,7 +793,7 @@ ENTRY(schedule_tail_wrapper)
 
 	FRAME_END
 	ret
-ENDPROC(schedule_tail_wrapper)
+SYM_FUNC_END(schedule_tail_wrapper)
 /*
  * A newly forked process directly context switches into this address.
  *
@@ -774,7 +801,7 @@ ENDPROC(schedule_tail_wrapper)
  * ebx: kernel thread func (NULL for user thread)
  * edi: kernel thread arg
  */
-ENTRY(ret_from_fork)
+SYM_CODE_START(ret_from_fork)
 	call	schedule_tail_wrapper
 
 	testl	%ebx, %ebx
@@ -797,7 +824,7 @@ ENTRY(ret_from_fork)
 	 */
 	movl	$0, PT_EAX(%esp)
 	jmp	2b
-END(ret_from_fork)
+SYM_CODE_END(ret_from_fork)
 
 /*
  * Return to user mode is not as complex as all this looks,
@@ -807,8 +834,7 @@ END(ret_from_fork)
  */
 
 	# userspace resumption stub bypassing syscall exit tracing
-	ALIGN
-ret_from_exception:
+SYM_CODE_START_LOCAL(ret_from_exception)
 	preempt_stop(CLBR_ANY)
 ret_from_intr:
 #ifdef CONFIG_VM86
@@ -825,15 +851,14 @@ ret_from_intr:
 	cmpl	$USER_RPL, %eax
 	jb	restore_all_kernel		# not returning to v8086 or userspace
 
-ENTRY(resume_userspace)
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	movl	%esp, %eax
 	call	prepare_exit_to_usermode
 	jmp	restore_all
-END(ret_from_exception)
+SYM_CODE_END(ret_from_exception)
 
-GLOBAL(__begin_SYSENTER_singlestep_region)
+SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
 /*
  * All code from here through __end_SYSENTER_singlestep_region is subject
  * to being single-stepped if a user program sets TF and executes SYSENTER.
@@ -848,9 +873,10 @@ GLOBAL(__begin_SYSENTER_singlestep_region)
  * Xen doesn't set %esp to be precisely what the normal SYSENTER
  * entry point expects, so fix it up before using the normal path.
  */
-ENTRY(xen_sysenter_target)
+SYM_CODE_START(xen_sysenter_target)
 	addl	$5*4, %esp			/* remove xen-provided frame */
 	jmp	.Lsysenter_past_esp
+SYM_CODE_END(xen_sysenter_target)
 #endif
 
 /*
@@ -885,7 +911,7 @@ ENTRY(xen_sysenter_target)
  * ebp  user stack
  * 0(%ebp) arg6
  */
-ENTRY(entry_SYSENTER_32)
+SYM_FUNC_START(entry_SYSENTER_32)
 	/*
 	 * On entry-stack with all userspace-regs live - save and
 	 * restore eflags and %eax to use it as scratch-reg for the cr3
@@ -1012,8 +1038,8 @@ ENTRY(entry_SYSENTER_32)
 	pushl	$X86_EFLAGS_FIXED
 	popfl
 	jmp	.Lsysenter_flags_fixed
-GLOBAL(__end_SYSENTER_singlestep_region)
-ENDPROC(entry_SYSENTER_32)
+SYM_ENTRY(__end_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
+SYM_FUNC_END(entry_SYSENTER_32)
 
 /*
  * 32-bit legacy system call entry.
@@ -1043,7 +1069,7 @@ ENDPROC(entry_SYSENTER_32)
  * edi  arg5
  * ebp  arg6
  */
-ENTRY(entry_INT80_32)
+SYM_FUNC_START(entry_INT80_32)
 	ASM_CLAC
 	pushl	%eax			/* pt_regs->orig_ax */
 
@@ -1064,7 +1090,6 @@ ENTRY(entry_INT80_32)
 restore_all:
 	TRACE_IRQS_IRET
 	SWITCH_TO_ENTRY_STACK
-.Lrestore_all_notrace:
 	CHECK_AND_APPLY_ESPFIX
 .Lrestore_nocheck:
 	/* Switch back to user CR3 */
@@ -1075,7 +1100,6 @@ restore_all:
 	/* Restore user state */
 	RESTORE_REGS pop=4			# skip orig_eax/error_code
 .Lirq_return:
-	IRET_FRAME
 	/*
 	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
 	 * when returning from IPI handler and when returning from
@@ -1100,7 +1124,7 @@ restore_all_kernel:
 	jmp	.Lirq_return
 
 .section .fixup, "ax"
-ENTRY(iret_exc	)
+SYM_CODE_START(iret_exc)
 	pushl	$0				# no error code
 	pushl	$do_iret_error
 
@@ -1117,9 +1141,10 @@ ENTRY(iret_exc	)
 #endif
 
 	jmp	common_exception
+SYM_CODE_END(iret_exc)
 .previous
 	_ASM_EXTABLE(.Lirq_return, iret_exc)
-ENDPROC(entry_INT80_32)
+SYM_FUNC_END(entry_INT80_32)
 
 .macro FIXUP_ESPFIX_STACK
 /*
@@ -1128,30 +1153,43 @@ ENDPROC(entry_INT80_32)
  * We can't call C functions using the ESPFIX stack. This code reads
  * the high word of the segment base from the GDT and swiches to the
  * normal stack and adjusts ESP with the matching offset.
+ *
+ * We might be on user CR3 here, so percpu data is not mapped and we can't
+ * access the GDT through the percpu segment.  Instead, use SGDT to find
+ * the cpu_entry_area alias of the GDT.
  */
 #ifdef CONFIG_X86_ESPFIX32
 	/* fixup the stack */
-	mov	GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
-	mov	GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
+	pushl	%ecx
+	subl	$2*4, %esp
+	sgdt	(%esp)
+	movl	2(%esp), %ecx				/* GDT address */
+	/*
+	 * Careful: ECX is a linear pointer, so we need to force base
+	 * zero.  %cs is the only known-linear segment we have right now.
+	 */
+	mov	%cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al	/* bits 16..23 */
+	mov	%cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah	/* bits 24..31 */
 	shl	$16, %eax
+	addl	$2*4, %esp
+	popl	%ecx
 	addl	%esp, %eax			/* the adjusted stack pointer */
 	pushl	$__KERNEL_DS
 	pushl	%eax
 	lss	(%esp), %esp			/* switch to the normal stack segment */
 #endif
 .endm
+
 .macro UNWIND_ESPFIX_STACK
+	/* It's safe to clobber %eax, all other regs need to be preserved */
 #ifdef CONFIG_X86_ESPFIX32
 	movl	%ss, %eax
 	/* see if on espfix stack */
 	cmpw	$__ESPFIX_SS, %ax
-	jne	27f
-	movl	$__KERNEL_DS, %eax
-	movl	%eax, %ds
-	movl	%eax, %es
+	jne	.Lno_fixup_\@
 	/* switch to normal stack */
 	FIXUP_ESPFIX_STACK
-27:
+.Lno_fixup_\@:
 #endif
 .endm
 
@@ -1160,7 +1198,7 @@ ENDPROC(entry_INT80_32)
  * We pack 1 stub into every 8-byte block.
  */
 	.align 8
-ENTRY(irq_entries_start)
+SYM_CODE_START(irq_entries_start)
     vector=FIRST_EXTERNAL_VECTOR
     .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
 	pushl	$(~vector+0x80)			/* Note: always in signed byte range */
@@ -1168,11 +1206,11 @@ ENTRY(irq_entries_start)
 	jmp	common_interrupt
 	.align	8
     .endr
-END(irq_entries_start)
+SYM_CODE_END(irq_entries_start)
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	.align 8
-ENTRY(spurious_entries_start)
+SYM_CODE_START(spurious_entries_start)
     vector=FIRST_SYSTEM_VECTOR
     .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
 	pushl	$(~vector+0x80)			/* Note: always in signed byte range */
@@ -1180,9 +1218,9 @@ ENTRY(spurious_entries_start)
 	jmp	common_spurious
 	.align	8
     .endr
-END(spurious_entries_start)
+SYM_CODE_END(spurious_entries_start)
 
-common_spurious:
+SYM_CODE_START_LOCAL(common_spurious)
 	ASM_CLAC
 	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
 	SAVE_ALL switch_stacks=1
@@ -1191,7 +1229,7 @@ common_spurious:
 	movl	%esp, %eax
 	call	smp_spurious_interrupt
 	jmp	ret_from_intr
-ENDPROC(common_spurious)
+SYM_CODE_END(common_spurious)
 #endif
 
 /*
@@ -1199,7 +1237,7 @@ ENDPROC(common_spurious)
  * so IRQ-flags tracing has to follow that:
  */
 	.p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
+SYM_CODE_START_LOCAL(common_interrupt)
 	ASM_CLAC
 	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
 
@@ -1209,10 +1247,10 @@ common_interrupt:
 	movl	%esp, %eax
 	call	do_IRQ
 	jmp	ret_from_intr
-ENDPROC(common_interrupt)
+SYM_CODE_END(common_interrupt)
 
 #define BUILD_INTERRUPT3(name, nr, fn)			\
-ENTRY(name)						\
+SYM_FUNC_START(name)					\
 	ASM_CLAC;					\
 	pushl	$~(nr);					\
 	SAVE_ALL switch_stacks=1;			\
@@ -1221,7 +1259,7 @@ ENTRY(name)						\
 	movl	%esp, %eax;				\
 	call	fn;					\
 	jmp	ret_from_intr;				\
-ENDPROC(name)
+SYM_FUNC_END(name)
 
 #define BUILD_INTERRUPT(name, nr)		\
 	BUILD_INTERRUPT3(name, nr, smp_##name);	\
@@ -1229,14 +1267,14 @@ ENDPROC(name)
 /* The include is where all of the SMP etc. interrupts come from */
 #include <asm/entry_arch.h>
 
-ENTRY(coprocessor_error)
+SYM_CODE_START(coprocessor_error)
 	ASM_CLAC
 	pushl	$0
 	pushl	$do_coprocessor_error
 	jmp	common_exception
-END(coprocessor_error)
+SYM_CODE_END(coprocessor_error)
 
-ENTRY(simd_coprocessor_error)
+SYM_CODE_START(simd_coprocessor_error)
 	ASM_CLAC
 	pushl	$0
 #ifdef CONFIG_X86_INVD_BUG
@@ -1248,104 +1286,99 @@ ENTRY(simd_coprocessor_error)
 	pushl	$do_simd_coprocessor_error
 #endif
 	jmp	common_exception
-END(simd_coprocessor_error)
+SYM_CODE_END(simd_coprocessor_error)
 
-ENTRY(device_not_available)
+SYM_CODE_START(device_not_available)
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
 	pushl	$do_device_not_available
 	jmp	common_exception
-END(device_not_available)
+SYM_CODE_END(device_not_available)
 
 #ifdef CONFIG_PARAVIRT
-ENTRY(native_iret)
+SYM_CODE_START(native_iret)
 	iret
 	_ASM_EXTABLE(native_iret, iret_exc)
-END(native_iret)
+SYM_CODE_END(native_iret)
 #endif
 
-ENTRY(overflow)
+SYM_CODE_START(overflow)
 	ASM_CLAC
 	pushl	$0
 	pushl	$do_overflow
 	jmp	common_exception
-END(overflow)
+SYM_CODE_END(overflow)
 
-ENTRY(bounds)
+SYM_CODE_START(bounds)
 	ASM_CLAC
 	pushl	$0
 	pushl	$do_bounds
 	jmp	common_exception
-END(bounds)
+SYM_CODE_END(bounds)
 
-ENTRY(invalid_op)
+SYM_CODE_START(invalid_op)
 	ASM_CLAC
 	pushl	$0
 	pushl	$do_invalid_op
 	jmp	common_exception
-END(invalid_op)
+SYM_CODE_END(invalid_op)
 
-ENTRY(coprocessor_segment_overrun)
+SYM_CODE_START(coprocessor_segment_overrun)
 	ASM_CLAC
 	pushl	$0
 	pushl	$do_coprocessor_segment_overrun
 	jmp	common_exception
-END(coprocessor_segment_overrun)
+SYM_CODE_END(coprocessor_segment_overrun)
 
-ENTRY(invalid_TSS)
+SYM_CODE_START(invalid_TSS)
 	ASM_CLAC
 	pushl	$do_invalid_TSS
 	jmp	common_exception
-END(invalid_TSS)
+SYM_CODE_END(invalid_TSS)
 
-ENTRY(segment_not_present)
+SYM_CODE_START(segment_not_present)
 	ASM_CLAC
 	pushl	$do_segment_not_present
 	jmp	common_exception
-END(segment_not_present)
+SYM_CODE_END(segment_not_present)
 
-ENTRY(stack_segment)
+SYM_CODE_START(stack_segment)
 	ASM_CLAC
 	pushl	$do_stack_segment
 	jmp	common_exception
-END(stack_segment)
+SYM_CODE_END(stack_segment)
 
-ENTRY(alignment_check)
+SYM_CODE_START(alignment_check)
 	ASM_CLAC
 	pushl	$do_alignment_check
 	jmp	common_exception
-END(alignment_check)
+SYM_CODE_END(alignment_check)
 
-ENTRY(divide_error)
+SYM_CODE_START(divide_error)
 	ASM_CLAC
 	pushl	$0				# no error code
 	pushl	$do_divide_error
 	jmp	common_exception
-END(divide_error)
+SYM_CODE_END(divide_error)
 
 #ifdef CONFIG_X86_MCE
-ENTRY(machine_check)
+SYM_CODE_START(machine_check)
 	ASM_CLAC
 	pushl	$0
 	pushl	machine_check_vector
 	jmp	common_exception
-END(machine_check)
+SYM_CODE_END(machine_check)
 #endif
 
-ENTRY(spurious_interrupt_bug)
+SYM_CODE_START(spurious_interrupt_bug)
 	ASM_CLAC
 	pushl	$0
 	pushl	$do_spurious_interrupt_bug
 	jmp	common_exception
-END(spurious_interrupt_bug)
+SYM_CODE_END(spurious_interrupt_bug)
 
 #ifdef CONFIG_XEN_PV
-ENTRY(xen_hypervisor_callback)
-	pushl	$-1				/* orig_ax = -1 => not a system call */
-	SAVE_ALL
-	ENCODE_FRAME_POINTER
-	TRACE_IRQS_OFF
-
+SYM_FUNC_START(xen_hypervisor_callback)
 	/*
 	 * Check to see if we got the event in the critical
 	 * region in xen_iret_direct, after we've reenabled
@@ -1353,22 +1386,23 @@ ENTRY(xen_hypervisor_callback)
 	 * iret instruction's behaviour where it delivers a
 	 * pending interrupt when enabling interrupts:
 	 */
-	movl	PT_EIP(%esp), %eax
-	cmpl	$xen_iret_start_crit, %eax
+	cmpl	$xen_iret_start_crit, (%esp)
 	jb	1f
-	cmpl	$xen_iret_end_crit, %eax
+	cmpl	$xen_iret_end_crit, (%esp)
 	jae	1f
-
-	jmp	xen_iret_crit_fixup
-
-ENTRY(xen_do_upcall)
-1:	mov	%esp, %eax
+	call	xen_iret_crit_fixup
+1:
+	pushl	$-1				/* orig_ax = -1 => not a system call */
+	SAVE_ALL
+	ENCODE_FRAME_POINTER
+	TRACE_IRQS_OFF
+	mov	%esp, %eax
 	call	xen_evtchn_do_upcall
 #ifndef CONFIG_PREEMPTION
 	call	xen_maybe_preempt_hcall
 #endif
 	jmp	ret_from_intr
-ENDPROC(xen_hypervisor_callback)
+SYM_FUNC_END(xen_hypervisor_callback)
 
 /*
  * Hypervisor uses this for application faults while it executes.
@@ -1382,7 +1416,7 @@ ENDPROC(xen_hypervisor_callback)
  * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  * We distinguish between categories by maintaining a status value in EAX.
  */
-ENTRY(xen_failsafe_callback)
+SYM_FUNC_START(xen_failsafe_callback)
 	pushl	%eax
 	movl	$1, %eax
 1:	mov	4(%esp), %ds
@@ -1419,7 +1453,7 @@ ENTRY(xen_failsafe_callback)
 	_ASM_EXTABLE(2b, 7b)
 	_ASM_EXTABLE(3b, 8b)
 	_ASM_EXTABLE(4b, 9b)
-ENDPROC(xen_failsafe_callback)
+SYM_FUNC_END(xen_failsafe_callback)
 #endif /* CONFIG_XEN_PV */
 
 #ifdef CONFIG_XEN_PVHVM
@@ -1441,18 +1475,17 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
 
 #endif /* CONFIG_HYPERV */
 
-ENTRY(page_fault)
+SYM_CODE_START(page_fault)
 	ASM_CLAC
 	pushl	$do_page_fault
 	jmp	common_exception_read_cr2
-END(page_fault)
+SYM_CODE_END(page_fault)
 
-common_exception_read_cr2:
+SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
 	/* the function address is in %gs's slot on the stack */
-	SAVE_ALL switch_stacks=1 skip_gs=1
+	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
 
 	ENCODE_FRAME_POINTER
-	UNWIND_ESPFIX_STACK
 
 	/* fixup %gs */
 	GS_TO_REG %ecx
@@ -1470,13 +1503,12 @@ common_exception_read_cr2:
 	movl	%esp, %eax			# pt_regs pointer
 	CALL_NOSPEC %edi
 	jmp	ret_from_exception
-END(common_exception_read_cr2)
+SYM_CODE_END(common_exception_read_cr2)
 
-common_exception:
+SYM_CODE_START_LOCAL_NOALIGN(common_exception)
 	/* the function address is in %gs's slot on the stack */
-	SAVE_ALL switch_stacks=1 skip_gs=1
+	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
 	ENCODE_FRAME_POINTER
-	UNWIND_ESPFIX_STACK
 
 	/* fixup %gs */
 	GS_TO_REG %ecx
@@ -1492,9 +1524,9 @@ common_exception:
 	movl	%esp, %eax			# pt_regs pointer
 	CALL_NOSPEC %edi
 	jmp	ret_from_exception
-END(common_exception)
+SYM_CODE_END(common_exception)
 
-ENTRY(debug)
+SYM_CODE_START(debug)
 	/*
 	 * Entry from sysenter is now handled in common_exception
 	 */
@@ -1502,7 +1534,49 @@ ENTRY(debug)
 	pushl	$-1				# mark this as an int
 	pushl	$do_debug
 	jmp	common_exception
-END(debug)
+SYM_CODE_END(debug)
+
+#ifdef CONFIG_DOUBLEFAULT
+SYM_CODE_START(double_fault)
+1:
+	/*
+	 * This is a task gate handler, not an interrupt gate handler.
+	 * The error code is on the stack, but the stack is otherwise
+	 * empty.  Interrupts are off.  Our state is sane with the following
+	 * exceptions:
+	 *
+	 *  - CR0.TS is set.  "TS" literally means "task switched".
+	 *  - EFLAGS.NT is set because we're a "nested task".
+	 *  - The doublefault TSS has back_link set and has been marked busy.
+	 *  - TR points to the doublefault TSS and the normal TSS is busy.
+	 *  - CR3 is the normal kernel PGD.  This would be delightful, except
+	 *    that the CPU didn't bother to save the old CR3 anywhere.  This
+	 *    would make it very awkward to return back to the context we came
+	 *    from.
+	 *
+	 * The rest of EFLAGS is sanitized for us, so we don't need to
+	 * worry about AC or DF.
+	 *
+	 * Don't even bother popping the error code.  It's always zero,
+	 * and ignoring it makes us a bit more robust against buggy
+	 * hypervisor task gate implementations.
+	 *
+	 * We will manually undo the task switch instead of doing a
+	 * task-switching IRET.
+	 */
+
+	clts				/* clear CR0.TS */
+	pushl	$X86_EFLAGS_FIXED
+	popfl				/* clear EFLAGS.NT */
+
+	call	doublefault_shim
+
+	/* We don't support returning, so we have no IRET here. */
+1:
+	hlt
+	jmp 1b
+SYM_CODE_END(double_fault)
+#endif
 
 /*
  * NMI is doubly nasty.  It can happen on the first instruction of
@@ -1511,10 +1585,14 @@ END(debug)
  * switched stacks.  We handle both conditions by simply checking whether we
  * interrupted kernel code running on the SYSENTER stack.
  */
-ENTRY(nmi)
+SYM_CODE_START(nmi)
 	ASM_CLAC
 
 #ifdef CONFIG_X86_ESPFIX32
+	/*
+	 * ESPFIX_SS is only ever set on the return to user path
+	 * after we've switched to the entry stack.
+	 */
 	pushl	%eax
 	movl	%ss, %eax
 	cmpw	$__ESPFIX_SS, %ax
@@ -1550,6 +1628,11 @@ ENTRY(nmi)
 	movl	%ebx, %esp
 
 .Lnmi_return:
+#ifdef CONFIG_X86_ESPFIX32
+	testl	$CS_FROM_ESPFIX, PT_CS(%esp)
+	jnz	.Lnmi_from_espfix
+#endif
+
 	CHECK_AND_APPLY_ESPFIX
 	RESTORE_ALL_NMI cr3_reg=%edi pop=4
 	jmp	.Lirq_return
@@ -1557,28 +1640,47 @@ ENTRY(nmi)
 #ifdef CONFIG_X86_ESPFIX32
 .Lnmi_espfix_stack:
 	/*
-	 * create the pointer to lss back
+	 * Create the pointer to LSS back
 	 */
 	pushl	%ss
 	pushl	%esp
 	addl	$4, (%esp)
-	/* copy the iret frame of 12 bytes */
-	.rept 3
-	pushl	16(%esp)
-	.endr
-	pushl	%eax
-	SAVE_ALL_NMI cr3_reg=%edi
+
+	/* Copy the (short) IRET frame */
+	pushl	4*4(%esp)	# flags
+	pushl	4*4(%esp)	# cs
+	pushl	4*4(%esp)	# ip
+
+	pushl	%eax		# orig_ax
+
+	SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1
 	ENCODE_FRAME_POINTER
-	FIXUP_ESPFIX_STACK			# %eax == %esp
+
+	/* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */
+	xorl	$(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp)
+
 	xorl	%edx, %edx			# zero error code
-	call	do_nmi
+	movl	%esp, %eax			# pt_regs pointer
+	jmp	.Lnmi_from_sysenter_stack
+
+.Lnmi_from_espfix:
 	RESTORE_ALL_NMI cr3_reg=%edi
-	lss	12+4(%esp), %esp		# back to espfix stack
+	/*
+	 * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to
+	 * fix up the gap and long frame:
+	 *
+	 *  3 - original frame	(exception)
+	 *  2 - ESPFIX block	(above)
+	 *  6 - gap		(FIXUP_FRAME)
+	 *  5 - long frame	(FIXUP_FRAME)
+	 *  1 - orig_ax
+	 */
+	lss	(1+5+6)*4(%esp), %esp			# back to espfix stack
 	jmp	.Lirq_return
 #endif
-END(nmi)
+SYM_CODE_END(nmi)
 
-ENTRY(int3)
+SYM_CODE_START(int3)
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
 
@@ -1589,22 +1691,22 @@ ENTRY(int3)
 	movl	%esp, %eax			# pt_regs pointer
 	call	do_int3
 	jmp	ret_from_exception
-END(int3)
+SYM_CODE_END(int3)
 
-ENTRY(general_protection)
+SYM_CODE_START(general_protection)
 	pushl	$do_general_protection
 	jmp	common_exception
-END(general_protection)
+SYM_CODE_END(general_protection)
 
 #ifdef CONFIG_KVM_GUEST
-ENTRY(async_page_fault)
+SYM_CODE_START(async_page_fault)
 	ASM_CLAC
 	pushl	$do_async_page_fault
 	jmp	common_exception_read_cr2
-END(async_page_fault)
+SYM_CODE_END(async_page_fault)
 #endif
 
-ENTRY(rewind_stack_do_exit)
+SYM_CODE_START(rewind_stack_do_exit)
 	/* Prevent any naive code from trying to unwind to our caller. */
 	xorl	%ebp, %ebp
 
@@ -1613,4 +1715,4 @@ ENTRY(rewind_stack_do_exit)
 
 	call	do_exit
 1:	jmp 1b
-END(rewind_stack_do_exit)
+SYM_CODE_END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b7c3ea4cb19d..f2bb91e87877 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -15,7 +15,7 @@
  *			at the top of the kernel process stack.
  *
  * Some macro usage:
- * - ENTRY/END:		Define functions in the symbol table.
+ * - SYM_FUNC_START/END:Define functions in the symbol table.
  * - TRACE_IRQ_*:	Trace hardirq state for lock debugging.
  * - idtentry:		Define exception entry points.
  */
@@ -46,11 +46,11 @@
 .section .entry.text, "ax"
 
 #ifdef CONFIG_PARAVIRT
-ENTRY(native_usergs_sysret64)
+SYM_CODE_START(native_usergs_sysret64)
 	UNWIND_HINT_EMPTY
 	swapgs
 	sysretq
-END(native_usergs_sysret64)
+SYM_CODE_END(native_usergs_sysret64)
 #endif /* CONFIG_PARAVIRT */
 
 .macro TRACE_IRQS_FLAGS flags:req
@@ -142,7 +142,7 @@ END(native_usergs_sysret64)
  * with them due to bugs in both AMD and Intel CPUs.
  */
 
-ENTRY(entry_SYSCALL_64)
+SYM_CODE_START(entry_SYSCALL_64)
 	UNWIND_HINT_EMPTY
 	/*
 	 * Interrupts are off on entry.
@@ -162,7 +162,7 @@ ENTRY(entry_SYSCALL_64)
 	pushq	%r11					/* pt_regs->flags */
 	pushq	$__USER_CS				/* pt_regs->cs */
 	pushq	%rcx					/* pt_regs->ip */
-GLOBAL(entry_SYSCALL_64_after_hwframe)
+SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
 	pushq	%rax					/* pt_regs->orig_ax */
 
 	PUSH_AND_CLEAR_REGS rax=$-ENOSYS
@@ -273,13 +273,13 @@ syscall_return_via_sysret:
 	popq	%rdi
 	popq	%rsp
 	USERGS_SYSRET64
-END(entry_SYSCALL_64)
+SYM_CODE_END(entry_SYSCALL_64)
 
 /*
  * %rdi: prev task
  * %rsi: next task
  */
-ENTRY(__switch_to_asm)
+SYM_CODE_START(__switch_to_asm)
 	UNWIND_HINT_FUNC
 	/*
 	 * Save callee-saved registers
@@ -321,7 +321,7 @@ ENTRY(__switch_to_asm)
 	popq	%rbp
 
 	jmp	__switch_to
-END(__switch_to_asm)
+SYM_CODE_END(__switch_to_asm)
 
 /*
  * A newly forked process directly context switches into this address.
@@ -330,7 +330,7 @@ END(__switch_to_asm)
  * rbx: kernel thread func (NULL for user thread)
  * r12: kernel thread arg
  */
-ENTRY(ret_from_fork)
+SYM_CODE_START(ret_from_fork)
 	UNWIND_HINT_EMPTY
 	movq	%rax, %rdi
 	call	schedule_tail			/* rdi: 'prev' task parameter */
@@ -357,14 +357,14 @@ ENTRY(ret_from_fork)
 	 */
 	movq	$0, RAX(%rsp)
 	jmp	2b
-END(ret_from_fork)
+SYM_CODE_END(ret_from_fork)
 
 /*
  * Build the entry stubs with some assembler magic.
  * We pack 1 stub into every 8-byte block.
  */
 	.align 8
-ENTRY(irq_entries_start)
+SYM_CODE_START(irq_entries_start)
     vector=FIRST_EXTERNAL_VECTOR
     .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
 	UNWIND_HINT_IRET_REGS
@@ -373,10 +373,10 @@ ENTRY(irq_entries_start)
 	.align	8
 	vector=vector+1
     .endr
-END(irq_entries_start)
+SYM_CODE_END(irq_entries_start)
 
 	.align 8
-ENTRY(spurious_entries_start)
+SYM_CODE_START(spurious_entries_start)
     vector=FIRST_SYSTEM_VECTOR
     .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
 	UNWIND_HINT_IRET_REGS
@@ -385,7 +385,7 @@ ENTRY(spurious_entries_start)
 	.align	8
 	vector=vector+1
     .endr
-END(spurious_entries_start)
+SYM_CODE_END(spurious_entries_start)
 
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
@@ -511,7 +511,7 @@ END(spurious_entries_start)
  * | return address					|
  * +----------------------------------------------------+
  */
-ENTRY(interrupt_entry)
+SYM_CODE_START(interrupt_entry)
 	UNWIND_HINT_FUNC
 	ASM_CLAC
 	cld
@@ -579,7 +579,7 @@ ENTRY(interrupt_entry)
 	TRACE_IRQS_OFF
 
 	ret
-END(interrupt_entry)
+SYM_CODE_END(interrupt_entry)
 _ASM_NOKPROBE(interrupt_entry)
 
 
@@ -589,18 +589,18 @@ _ASM_NOKPROBE(interrupt_entry)
  * The interrupt stubs push (~vector+0x80) onto the stack and
  * then jump to common_spurious/interrupt.
  */
-common_spurious:
+SYM_CODE_START_LOCAL(common_spurious)
 	addq	$-0x80, (%rsp)			/* Adjust vector to [-256, -1] range */
 	call	interrupt_entry
 	UNWIND_HINT_REGS indirect=1
 	call	smp_spurious_interrupt		/* rdi points to pt_regs */
 	jmp	ret_from_intr
-END(common_spurious)
+SYM_CODE_END(common_spurious)
 _ASM_NOKPROBE(common_spurious)
 
 /* common_interrupt is a hotpath. Align it */
 	.p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
+SYM_CODE_START_LOCAL(common_interrupt)
 	addq	$-0x80, (%rsp)			/* Adjust vector to [-256, -1] range */
 	call	interrupt_entry
 	UNWIND_HINT_REGS indirect=1
@@ -616,12 +616,12 @@ ret_from_intr:
 	jz	retint_kernel
 
 	/* Interrupt came from user space */
-GLOBAL(retint_user)
+.Lretint_user:
 	mov	%rsp,%rdi
 	call	prepare_exit_to_usermode
 	TRACE_IRQS_IRETQ
 
-GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
 #ifdef CONFIG_DEBUG_ENTRY
 	/* Assert that pt_regs indicates user mode. */
 	testb	$3, CS(%rsp)
@@ -679,7 +679,7 @@ retint_kernel:
 	 */
 	TRACE_IRQS_IRETQ
 
-GLOBAL(restore_regs_and_return_to_kernel)
+SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
 #ifdef CONFIG_DEBUG_ENTRY
 	/* Assert that pt_regs indicates kernel mode. */
 	testb	$3, CS(%rsp)
@@ -695,7 +695,7 @@ GLOBAL(restore_regs_and_return_to_kernel)
 	 */
 	INTERRUPT_RETURN
 
-ENTRY(native_iret)
+SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL)
 	UNWIND_HINT_IRET_REGS
 	/*
 	 * Are we returning to a stack segment from the LDT?  Note: in
@@ -706,8 +706,7 @@ ENTRY(native_iret)
 	jnz	native_irq_return_ldt
 #endif
 
-.global native_irq_return_iret
-native_irq_return_iret:
+SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL)
 	/*
 	 * This may fault.  Non-paranoid faults on return to userspace are
 	 * handled by fixup_bad_iret.  These include #SS, #GP, and #NP.
@@ -789,14 +788,14 @@ native_irq_return_ldt:
 	 */
 	jmp	native_irq_return_iret
 #endif
-END(common_interrupt)
+SYM_CODE_END(common_interrupt)
 _ASM_NOKPROBE(common_interrupt)
 
 /*
  * APIC interrupts.
  */
 .macro apicinterrupt3 num sym do_sym
-ENTRY(\sym)
+SYM_CODE_START(\sym)
 	UNWIND_HINT_IRET_REGS
 	pushq	$~(\num)
 .Lcommon_\sym:
@@ -804,7 +803,7 @@ ENTRY(\sym)
 	UNWIND_HINT_REGS indirect=1
 	call	\do_sym	/* rdi points to pt_regs */
 	jmp	ret_from_intr
-END(\sym)
+SYM_CODE_END(\sym)
 _ASM_NOKPROBE(\sym)
 .endm
 
@@ -969,7 +968,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
  */
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
-ENTRY(\sym)
+SYM_CODE_START(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
 	/* Sanity check */
@@ -1019,7 +1018,7 @@ ENTRY(\sym)
 	.endif
 
 _ASM_NOKPROBE(\sym)
-END(\sym)
+SYM_CODE_END(\sym)
 .endm
 
 idtentry divide_error			do_divide_error			has_error_code=0
@@ -1041,7 +1040,7 @@ idtentry simd_coprocessor_error		do_simd_coprocessor_error	has_error_code=0
 	 * Reload gs selector with exception handling
 	 * edi:  new selector
 	 */
-ENTRY(native_load_gs_index)
+SYM_FUNC_START(native_load_gs_index)
 	FRAME_BEGIN
 	pushfq
 	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
@@ -1055,13 +1054,13 @@ ENTRY(native_load_gs_index)
 	popfq
 	FRAME_END
 	ret
-ENDPROC(native_load_gs_index)
+SYM_FUNC_END(native_load_gs_index)
 EXPORT_SYMBOL(native_load_gs_index)
 
 	_ASM_EXTABLE(.Lgs_change, .Lbad_gs)
 	.section .fixup, "ax"
 	/* running with kernelgs */
-.Lbad_gs:
+SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs)
 	SWAPGS					/* switch back to user gs */
 .macro ZAP_GS
 	/* This can't be a string because the preprocessor needs to see it. */
@@ -1072,10 +1071,11 @@ EXPORT_SYMBOL(native_load_gs_index)
 	xorl	%eax, %eax
 	movl	%eax, %gs
 	jmp	2b
+SYM_CODE_END(.Lbad_gs)
 	.previous
 
 /* Call softirq on interrupt stack. Interrupts are off. */
-ENTRY(do_softirq_own_stack)
+SYM_FUNC_START(do_softirq_own_stack)
 	pushq	%rbp
 	mov	%rsp, %rbp
 	ENTER_IRQ_STACK regs=0 old_rsp=%r11
@@ -1083,7 +1083,7 @@ ENTRY(do_softirq_own_stack)
 	LEAVE_IRQ_STACK regs=0
 	leaveq
 	ret
-ENDPROC(do_softirq_own_stack)
+SYM_FUNC_END(do_softirq_own_stack)
 
 #ifdef CONFIG_XEN_PV
 idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
@@ -1101,7 +1101,8 @@ idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
  * existing activation in its critical region -- if so, we pop the current
  * activation and restart the handler using the previous one.
  */
-ENTRY(xen_do_hypervisor_callback)		/* do_hypervisor_callback(struct *pt_regs) */
+/* do_hypervisor_callback(struct *pt_regs) */
+SYM_CODE_START_LOCAL(xen_do_hypervisor_callback)
 
 /*
  * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
@@ -1119,7 +1120,7 @@ ENTRY(xen_do_hypervisor_callback)		/* do_hypervisor_callback(struct *pt_regs) */
 	call	xen_maybe_preempt_hcall
 #endif
 	jmp	error_exit
-END(xen_do_hypervisor_callback)
+SYM_CODE_END(xen_do_hypervisor_callback)
 
 /*
  * Hypervisor uses this for application faults while it executes.
@@ -1134,7 +1135,7 @@ END(xen_do_hypervisor_callback)
  * We distinguish between categories by comparing each saved segment register
  * with its current contents: any discrepancy means we in category 1.
  */
-ENTRY(xen_failsafe_callback)
+SYM_CODE_START(xen_failsafe_callback)
 	UNWIND_HINT_EMPTY
 	movl	%ds, %ecx
 	cmpw	%cx, 0x10(%rsp)
@@ -1164,7 +1165,7 @@ ENTRY(xen_failsafe_callback)
 	PUSH_AND_CLEAR_REGS
 	ENCODE_FRAME_POINTER
 	jmp	error_exit
-END(xen_failsafe_callback)
+SYM_CODE_END(xen_failsafe_callback)
 #endif /* CONFIG_XEN_PV */
 
 #ifdef CONFIG_XEN_PVHVM
@@ -1214,7 +1215,7 @@ idtentry machine_check		do_mce			has_error_code=0	paranoid=1
  * Use slow, but surefire "are we in kernel?" check.
  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
-ENTRY(paranoid_entry)
+SYM_CODE_START_LOCAL(paranoid_entry)
 	UNWIND_HINT_FUNC
 	cld
 	PUSH_AND_CLEAR_REGS save_ret=1
@@ -1248,7 +1249,7 @@ ENTRY(paranoid_entry)
 	FENCE_SWAPGS_KERNEL_ENTRY
 
 	ret
-END(paranoid_entry)
+SYM_CODE_END(paranoid_entry)
 
 /*
  * "Paranoid" exit path from exception stack.  This is invoked
@@ -1262,7 +1263,7 @@ END(paranoid_entry)
  *
  * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
  */
-ENTRY(paranoid_exit)
+SYM_CODE_START_LOCAL(paranoid_exit)
 	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF_DEBUG
@@ -1272,19 +1273,18 @@ ENTRY(paranoid_exit)
 	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 	SWAPGS_UNSAFE_STACK
-	jmp	.Lparanoid_exit_restore
+	jmp	restore_regs_and_return_to_kernel
 .Lparanoid_exit_no_swapgs:
 	TRACE_IRQS_IRETQ_DEBUG
 	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
-.Lparanoid_exit_restore:
 	jmp restore_regs_and_return_to_kernel
-END(paranoid_exit)
+SYM_CODE_END(paranoid_exit)
 
 /*
  * Save all registers in pt_regs, and switch GS if needed.
  */
-ENTRY(error_entry)
+SYM_CODE_START_LOCAL(error_entry)
 	UNWIND_HINT_FUNC
 	cld
 	PUSH_AND_CLEAR_REGS save_ret=1
@@ -1364,16 +1364,16 @@ ENTRY(error_entry)
 	call	fixup_bad_iret
 	mov	%rax, %rsp
 	jmp	.Lerror_entry_from_usermode_after_swapgs
-END(error_entry)
+SYM_CODE_END(error_entry)
 
-ENTRY(error_exit)
+SYM_CODE_START_LOCAL(error_exit)
 	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	testb	$3, CS(%rsp)
 	jz	retint_kernel
-	jmp	retint_user
-END(error_exit)
+	jmp	.Lretint_user
+SYM_CODE_END(error_exit)
 
 /*
  * Runs on exception stack.  Xen PV does not go through this path at all,
@@ -1383,7 +1383,7 @@ END(error_exit)
  *	%r14: Used to save/restore the CR3 of the interrupted context
  *	      when PAGE_TABLE_ISOLATION is in use.  Do not clobber.
  */
-ENTRY(nmi)
+SYM_CODE_START(nmi)
 	UNWIND_HINT_IRET_REGS
 
 	/*
@@ -1718,21 +1718,21 @@ nmi_restore:
 	 * about espfix64 on the way back to kernel mode.
 	 */
 	iretq
-END(nmi)
+SYM_CODE_END(nmi)
 
 #ifndef CONFIG_IA32_EMULATION
 /*
  * This handles SYSCALL from 32-bit code.  There is no way to program
  * MSRs to fully disable 32-bit SYSCALL.
  */
-ENTRY(ignore_sysret)
+SYM_CODE_START(ignore_sysret)
 	UNWIND_HINT_EMPTY
 	mov	$-ENOSYS, %eax
-	sysret
-END(ignore_sysret)
+	sysretl
+SYM_CODE_END(ignore_sysret)
 #endif
 
-ENTRY(rewind_stack_do_exit)
+SYM_CODE_START(rewind_stack_do_exit)
 	UNWIND_HINT_FUNC
 	/* Prevent any naive code from trying to unwind to our caller. */
 	xorl	%ebp, %ebp
@@ -1742,4 +1742,4 @@ ENTRY(rewind_stack_do_exit)
 	UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
 
 	call	do_exit
-END(rewind_stack_do_exit)
+SYM_CODE_END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 39913770a44d..f1d3ccae5dd5 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -46,7 +46,7 @@
  * ebp  user stack
  * 0(%ebp) arg6
  */
-ENTRY(entry_SYSENTER_compat)
+SYM_FUNC_START(entry_SYSENTER_compat)
 	/* Interrupts are off on entry. */
 	SWAPGS
 
@@ -146,8 +146,8 @@ ENTRY(entry_SYSENTER_compat)
 	pushq	$X86_EFLAGS_FIXED
 	popfq
 	jmp	.Lsysenter_flags_fixed
-GLOBAL(__end_entry_SYSENTER_compat)
-ENDPROC(entry_SYSENTER_compat)
+SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL)
+SYM_FUNC_END(entry_SYSENTER_compat)
 
 /*
  * 32-bit SYSCALL entry.
@@ -196,7 +196,7 @@ ENDPROC(entry_SYSENTER_compat)
  * esp  user stack
  * 0(%esp) arg6
  */
-ENTRY(entry_SYSCALL_compat)
+SYM_CODE_START(entry_SYSCALL_compat)
 	/* Interrupts are off on entry. */
 	swapgs
 
@@ -215,7 +215,7 @@ ENTRY(entry_SYSCALL_compat)
 	pushq	%r11			/* pt_regs->flags */
 	pushq	$__USER32_CS		/* pt_regs->cs */
 	pushq	%rcx			/* pt_regs->ip */
-GLOBAL(entry_SYSCALL_compat_after_hwframe)
+SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
 	movl	%eax, %eax		/* discard orig_ax high bits */
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rdi			/* pt_regs->di */
@@ -311,7 +311,7 @@ sysret32_from_system_call:
 	xorl	%r10d, %r10d
 	swapgs
 	sysretl
-END(entry_SYSCALL_compat)
+SYM_CODE_END(entry_SYSCALL_compat)
 
 /*
  * 32-bit legacy system call entry.
@@ -339,7 +339,7 @@ END(entry_SYSCALL_compat)
  * edi  arg5
  * ebp  arg6
  */
-ENTRY(entry_INT80_compat)
+SYM_CODE_START(entry_INT80_compat)
 	/*
 	 * Interrupts are off on entry.
 	 */
@@ -416,4 +416,4 @@ ENTRY(entry_INT80_compat)
 	/* Go back to user mode. */
 	TRACE_IRQS_ON
 	jmp	swapgs_restore_regs_and_return_to_usermode
-END(entry_INT80_compat)
+SYM_CODE_END(entry_INT80_compat)
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index aa3336a7cb15..7d17b3addbbb 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -10,13 +10,11 @@
 #ifdef CONFIG_IA32_EMULATION
 /* On X86_64, we use struct pt_regs * to pass parameters to syscalls */
 #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
-
-/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
-extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
-
+#define __sys_ni_syscall __ia32_sys_ni_syscall
 #else /* CONFIG_IA32_EMULATION */
 #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+#define __sys_ni_syscall sys_ni_syscall
 #endif /* CONFIG_IA32_EMULATION */
 
 #include <asm/syscalls_32.h>
@@ -29,6 +27,6 @@ __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] =
 	 * Smells like a compiler bug -- it doesn't work
 	 * when the & below is removed.
 	 */
-	[0 ... __NR_syscall_compat_max] = &sys_ni_syscall,
+	[0 ... __NR_syscall_compat_max] = &__sys_ni_syscall,
 #include <asm/syscalls_32.h>
 };
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index b1bf31713374..adf619a856e8 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -4,11 +4,17 @@
 #include <linux/linkage.h>
 #include <linux/sys.h>
 #include <linux/cache.h>
+#include <linux/syscalls.h>
 #include <asm/asm-offsets.h>
 #include <asm/syscall.h>
 
-/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
-extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
+extern asmlinkage long sys_ni_syscall(void);
+
+SYSCALL_DEFINE0(ni_syscall)
+{
+	return sys_ni_syscall();
+}
+
 #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
 #define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
 #include <asm/syscalls_64.h>
@@ -23,7 +29,7 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	 * Smells like a compiler bug -- it doesn't work
 	 * when the & below is removed.
 	 */
-	[0 ... __NR_syscall_max] = &sys_ni_syscall,
+	[0 ... __NR_syscall_max] = &__x64_sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };
 
@@ -40,7 +46,7 @@ asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
 	 * Smells like a compiler bug -- it doesn't work
 	 * when the & below is removed.
 	 */
-	[0 ... __NR_syscall_x32_max] = &sys_ni_syscall,
+	[0 ... __NR_syscall_x32_max] = &__x64_sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };
 
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 3fe02546aed3..c17cb77eb150 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -124,13 +124,13 @@
 110	i386	iopl			sys_iopl			__ia32_sys_iopl
 111	i386	vhangup			sys_vhangup			__ia32_sys_vhangup
 112	i386	idle
-113	i386	vm86old			sys_vm86old			sys_ni_syscall
+113	i386	vm86old			sys_vm86old			__ia32_sys_ni_syscall
 114	i386	wait4			sys_wait4			__ia32_compat_sys_wait4
 115	i386	swapoff			sys_swapoff			__ia32_sys_swapoff
 116	i386	sysinfo			sys_sysinfo			__ia32_compat_sys_sysinfo
 117	i386	ipc			sys_ipc				__ia32_compat_sys_ipc
 118	i386	fsync			sys_fsync			__ia32_sys_fsync
-119	i386	sigreturn		sys_sigreturn			sys32_sigreturn
+119	i386	sigreturn		sys_sigreturn			__ia32_compat_sys_sigreturn
 120	i386	clone			sys_clone			__ia32_compat_sys_x86_clone
 121	i386	setdomainname		sys_setdomainname		__ia32_sys_setdomainname
 122	i386	uname			sys_newuname			__ia32_sys_newuname
@@ -177,14 +177,14 @@
 163	i386	mremap			sys_mremap			__ia32_sys_mremap
 164	i386	setresuid		sys_setresuid16			__ia32_sys_setresuid16
 165	i386	getresuid		sys_getresuid16			__ia32_sys_getresuid16
-166	i386	vm86			sys_vm86			sys_ni_syscall
+166	i386	vm86			sys_vm86			__ia32_sys_ni_syscall
 167	i386	query_module
 168	i386	poll			sys_poll			__ia32_sys_poll
 169	i386	nfsservctl
 170	i386	setresgid		sys_setresgid16			__ia32_sys_setresgid16
 171	i386	getresgid		sys_getresgid16			__ia32_sys_getresgid16
 172	i386	prctl			sys_prctl			__ia32_sys_prctl
-173	i386	rt_sigreturn		sys_rt_sigreturn		sys32_rt_sigreturn
+173	i386	rt_sigreturn		sys_rt_sigreturn		__ia32_compat_sys_rt_sigreturn
 174	i386	rt_sigaction		sys_rt_sigaction		__ia32_compat_sys_rt_sigaction
 175	i386	rt_sigprocmask		sys_rt_sigprocmask		__ia32_compat_sys_rt_sigprocmask
 176	i386	rt_sigpending		sys_rt_sigpending		__ia32_compat_sys_rt_sigpending
@@ -440,3 +440,5 @@
 433	i386	fspick			sys_fspick			__ia32_sys_fspick
 434	i386	pidfd_open		sys_pidfd_open			__ia32_sys_pidfd_open
 435	i386	clone3			sys_clone3			__ia32_sys_clone3
+437	i386	openat2			sys_openat2			__ia32_sys_openat2
+438	i386	pidfd_getfd		sys_pidfd_getfd			__ia32_sys_pidfd_getfd
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index c29976eca4a8..44d510bc9b78 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -357,6 +357,8 @@
 433	common	fspick			__x64_sys_fspick
 434	common	pidfd_open		__x64_sys_pidfd_open
 435	common	clone3			__x64_sys_clone3/ptregs
+437	common	openat2			__x64_sys_openat2
+438	common	pidfd_getfd		__x64_sys_pidfd_getfd
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index 2713490611a3..e010d4ae11f1 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -10,8 +10,7 @@
 
 	/* put return address in eax (arg1) */
 	.macro THUNK name, func, put_ret_addr_in_eax=0
-	.globl \name
-\name:
+SYM_CODE_START_NOALIGN(\name)
 	pushl %eax
 	pushl %ecx
 	pushl %edx
@@ -27,6 +26,7 @@
 	popl %eax
 	ret
 	_ASM_NOKPROBE(\name)
+SYM_CODE_END(\name)
 	.endm
 
 #ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index ea5c4167086c..c5c3b6e86e62 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -12,7 +12,7 @@
 
 	/* rdi:	arg1 ... normal C conventions. rax is saved/restored. */
 	.macro THUNK name, func, put_ret_addr_in_rdi=0
-	ENTRY(\name)
+SYM_FUNC_START_NOALIGN(\name)
 	pushq %rbp
 	movq %rsp, %rbp
 
@@ -33,7 +33,7 @@
 
 	call \func
 	jmp  .L_restore
-	ENDPROC(\name)
+SYM_FUNC_END(\name)
 	_ASM_NOKPROBE(\name)
 	.endm
 
@@ -56,7 +56,7 @@
 #if defined(CONFIG_TRACE_IRQFLAGS) \
  || defined(CONFIG_DEBUG_LOCK_ALLOC) \
  || defined(CONFIG_PREEMPTION)
-.L_restore:
+SYM_CODE_START_LOCAL_NOALIGN(.L_restore)
 	popq %r11
 	popq %r10
 	popq %r9
@@ -69,4 +69,5 @@
 	popq %rbp
 	ret
 	_ASM_NOKPROBE(.L_restore)
+SYM_CODE_END(.L_restore)
 #endif
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 0f2154106d01..2b75e80f6b41 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -87,11 +87,9 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
 #
 # vDSO code runs in userspace and -pg doesn't help with profiling anyway.
 #
-CFLAGS_REMOVE_vdso-note.o = -pg
 CFLAGS_REMOVE_vclock_gettime.o = -pg
 CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
 CFLAGS_REMOVE_vgetcpu.o = -pg
-CFLAGS_REMOVE_vvar.o = -pg
 
 #
 # X32 processes use x32 vDSO to access 64bit kernel data.
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index d9ff616bb0f6..7d70935b6758 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -15,7 +15,7 @@
 #include "../../../../lib/vdso/gettimeofday.c"
 
 extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
-extern time_t __vdso_time(time_t *t);
+extern __kernel_old_time_t __vdso_time(__kernel_old_time_t *t);
 
 int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
 {
@@ -25,12 +25,12 @@ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
 int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
 	__attribute__((weak, alias("__vdso_gettimeofday")));
 
-time_t __vdso_time(time_t *t)
+__kernel_old_time_t __vdso_time(__kernel_old_time_t *t)
 {
 	return __cvdso_time(t);
 }
 
-time_t time(time_t *t)	__attribute__((weak, alias("__vdso_time")));
+__kernel_old_time_t time(__kernel_old_time_t *t)	__attribute__((weak, alias("__vdso_time")));
 
 
 #if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64)
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index 93c6dc7812d0..ea7e0155c604 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -16,18 +16,23 @@ SECTIONS
 	 * segment.
 	 */
 
-	vvar_start = . - 3 * PAGE_SIZE;
-	vvar_page = vvar_start;
+	vvar_start = . - 4 * PAGE_SIZE;
+	vvar_page  = vvar_start;
 
 	/* Place all vvars at the offsets in asm/vvar.h. */
 #define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
-#define __VVAR_KERNEL_LDS
 #include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
 #undef EMIT_VVAR
 
 	pvclock_page = vvar_start + PAGE_SIZE;
 	hvclock_page = vvar_start + 2 * PAGE_SIZE;
+	timens_page  = vvar_start + 3 * PAGE_SIZE;
+
+#undef _ASM_X86_VVAR_H
+	/* Place all vvars in timens too at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) timens_ ## name = timens_page + offset;
+#include <asm/vvar.h>
+#undef EMIT_VVAR
 
 	. = SIZEOF_HEADERS;
 
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 3a4d8d4d39f8..3842873b3ae3 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -75,12 +75,14 @@ enum {
 	sym_vvar_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
+	sym_timens_page,
 };
 
 const int special_pages[] = {
 	sym_vvar_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
+	sym_timens_page,
 };
 
 struct vdso_sym {
@@ -93,6 +95,7 @@ struct vdso_sym required_syms[] = {
 	[sym_vvar_page] = {"vvar_page", true},
 	[sym_pvclock_page] = {"pvclock_page", true},
 	[sym_hvclock_page] = {"hvclock_page", true},
+	[sym_timens_page] = {"timens_page", true},
 	{"VDSO32_NOTE_MASK", true},
 	{"__kernel_vsyscall", true},
 	{"__kernel_sigreturn", true},
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index 240626e7f55a..43842fade8fa 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -11,6 +11,7 @@
 #include <linux/smp.h>
 #include <linux/kernel.h>
 #include <linux/mm_types.h>
+#include <linux/elf.h>
 
 #include <asm/processor.h>
 #include <asm/vdso.h>
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 263d7433dea8..de1fff7188aa 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -62,7 +62,7 @@ __kernel_vsyscall:
 
 	/* Enter using int $0x80 */
 	int	$0x80
-GLOBAL(int80_landing_pad)
+SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL)
 
 	/*
 	 * Restore EDX and ECX in case they were clobbered.  EBP is not
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index f5937742b290..c1b8496b5606 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -14,16 +14,30 @@
 #include <linux/elf.h>
 #include <linux/cpu.h>
 #include <linux/ptrace.h>
+#include <linux/time_namespace.h>
+
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
 #include <asm/vdso.h>
 #include <asm/vvar.h>
+#include <asm/tlb.h>
 #include <asm/page.h>
 #include <asm/desc.h>
 #include <asm/cpufeature.h>
 #include <clocksource/hyperv_timer.h>
 
+#undef _ASM_X86_VVAR_H
+#define EMIT_VVAR(name, offset)	\
+	const size_t name ## _offset = offset;
+#include <asm/vvar.h>
+
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+	return (struct vdso_data *)(vvar_page + _vdso_data_offset);
+}
+#undef EMIT_VVAR
+
 #if defined(CONFIG_X86_64)
 unsigned int __read_mostly vdso64_enabled = 1;
 #endif
@@ -37,6 +51,7 @@ void __init init_vdso_image(const struct vdso_image *image)
 						image->alt_len));
 }
 
+static const struct vm_special_mapping vvar_mapping;
 struct linux_binprm;
 
 static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
@@ -84,10 +99,74 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
 	return 0;
 }
 
+static int vvar_mremap(const struct vm_special_mapping *sm,
+		struct vm_area_struct *new_vma)
+{
+	const struct vdso_image *image = new_vma->vm_mm->context.vdso_image;
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+
+	if (new_size != -image->sym_vvar_start)
+		return -EINVAL;
+
+	return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	if (likely(vma->vm_mm == current->mm))
+		return current->nsproxy->time_ns->vvar_page;
+
+	/*
+	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
+	 * through interfaces like /proc/$pid/mem or
+	 * process_vm_{readv,writev}() as long as there's no .access()
+	 * in special_mapping_vmops().
+	 * For more details check_vma_flags() and __access_remote_vm()
+	 */
+
+	WARN(1, "vvar_page accessed remotely");
+
+	return NULL;
+}
+
+/*
+ * The vvar page layout depends on whether a task belongs to the root or
+ * non-root time namespace. Whenever a task changes its namespace, the VVAR
+ * page tables are cleared and then they will re-faulted with a
+ * corresponding layout.
+ * See also the comment near timens_setup_vdso_data() for details.
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+	struct mm_struct *mm = task->mm;
+	struct vm_area_struct *vma;
+
+	if (down_write_killable(&mm->mmap_sem))
+		return -EINTR;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		unsigned long size = vma->vm_end - vma->vm_start;
+
+		if (vma_is_special_mapping(vma, &vvar_mapping))
+			zap_page_range(vma, vma->vm_start, size);
+	}
+
+	up_write(&mm->mmap_sem);
+	return 0;
+}
+#else
+static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+#endif
+
 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+	unsigned long pfn;
 	long sym_offset;
 
 	if (!image)
@@ -107,8 +186,36 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 		return VM_FAULT_SIGBUS;
 
 	if (sym_offset == image->sym_vvar_page) {
-		return vmf_insert_pfn(vma, vmf->address,
-				__pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+		struct page *timens_page = find_timens_vvar_page(vma);
+
+		pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
+
+		/*
+		 * If a task belongs to a time namespace then a namespace
+		 * specific VVAR is mapped with the sym_vvar_page offset and
+		 * the real VVAR page is mapped with the sym_timens_page
+		 * offset.
+		 * See also the comment near timens_setup_vdso_data().
+		 */
+		if (timens_page) {
+			unsigned long addr;
+			vm_fault_t err;
+
+			/*
+			 * Optimization: inside time namespace pre-fault
+			 * VVAR page too. As on timens page there are only
+			 * offsets for clocks on VVAR, it'll be faulted
+			 * shortly by VDSO code.
+			 */
+			addr = vmf->address + (image->sym_timens_page - sym_offset);
+			err = vmf_insert_pfn(vma, addr, pfn);
+			if (unlikely(err & VM_FAULT_ERROR))
+				return err;
+
+			pfn = page_to_pfn(timens_page);
+		}
+
+		return vmf_insert_pfn(vma, vmf->address, pfn);
 	} else if (sym_offset == image->sym_pvclock_page) {
 		struct pvclock_vsyscall_time_info *pvti =
 			pvclock_get_pvti_cpu0_va();
@@ -123,6 +230,14 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 		if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
 			return vmf_insert_pfn(vma, vmf->address,
 					virt_to_phys(tsc_pg) >> PAGE_SHIFT);
+	} else if (sym_offset == image->sym_timens_page) {
+		struct page *timens_page = find_timens_vvar_page(vma);
+
+		if (!timens_page)
+			return VM_FAULT_SIGBUS;
+
+		pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
+		return vmf_insert_pfn(vma, vmf->address, pfn);
 	}
 
 	return VM_FAULT_SIGBUS;
@@ -136,6 +251,7 @@ static const struct vm_special_mapping vdso_mapping = {
 static const struct vm_special_mapping vvar_mapping = {
 	.name = "[vvar]",
 	.fault = vvar_fault,
+	.mremap = vvar_mremap,
 };
 
 /*
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index e7c596dea947..44c33103a955 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -184,7 +184,7 @@ bool emulate_vsyscall(unsigned long error_code,
 	 */
 	switch (vsyscall_nr) {
 	case 0:
-		if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
+		if (!write_ok_or_segv(regs->di, sizeof(struct __kernel_old_timeval)) ||
 		    !write_ok_or_segv(regs->si, sizeof(struct timezone))) {
 			ret = -EFAULT;
 			goto check_fault;
@@ -194,7 +194,7 @@ bool emulate_vsyscall(unsigned long error_code,
 		break;
 
 	case 1:
-		if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
+		if (!write_ok_or_segv(regs->di, sizeof(__kernel_old_time_t))) {
 			ret = -EFAULT;
 			goto check_fault;
 		}
@@ -222,7 +222,7 @@ bool emulate_vsyscall(unsigned long error_code,
 	 */
 	regs->orig_ax = syscall_nr;
 	regs->ax = -ENOSYS;
-	tmp = secure_computing(NULL);
+	tmp = secure_computing();
 	if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
 		warn_bad_vsyscall(KERN_DEBUG, regs,
 				  "seccomp tried to change syscall nr or ip");
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 64c3e70b0556..1f22b6bbda68 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -14,6 +14,10 @@
 static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
 static unsigned long perf_nmi_window;
 
+/* AMD Event 0xFFF: Merge.  Used with Large Increment per Cycle events */
+#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
+#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
+
 static __initconst const u64 amd_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -301,6 +305,25 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
 	return offset;
 }
 
+/*
+ * AMD64 events are detected based on their event codes.
+ */
+static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
+{
+	return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
+}
+
+static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
+{
+	if (!(x86_pmu.flags & PMU_FL_PAIR))
+		return false;
+
+	switch (amd_get_event_code(hwc)) {
+	case 0x003:	return true;	/* Retired SSE/AVX FLOPs */
+	default:	return false;
+	}
+}
+
 static int amd_core_hw_config(struct perf_event *event)
 {
 	if (event->attr.exclude_host && event->attr.exclude_guest)
@@ -316,15 +339,10 @@ static int amd_core_hw_config(struct perf_event *event)
 	else if (event->attr.exclude_guest)
 		event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
 
-	return 0;
-}
+	if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
+		event->hw.flags |= PERF_X86_EVENT_PAIR;
 
-/*
- * AMD64 events are detected based on their event codes.
- */
-static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
-{
-	return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
+	return 0;
 }
 
 static inline int amd_is_nb_event(struct hw_perf_event *hwc)
@@ -652,15 +670,7 @@ static void amd_pmu_disable_event(struct perf_event *event)
  */
 static int amd_pmu_handle_irq(struct pt_regs *regs)
 {
-	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-	int active, handled;
-
-	/*
-	 * Obtain the active count before calling x86_pmu_handle_irq() since
-	 * it is possible that x86_pmu_handle_irq() may make a counter
-	 * inactive (through x86_pmu_stop).
-	 */
-	active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX);
+	int handled;
 
 	/* Process any counter overflows */
 	handled = x86_pmu_handle_irq(regs);
@@ -670,8 +680,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 	 * NMIs will be claimed if arriving within that window.
 	 */
 	if (handled) {
-		this_cpu_write(perf_nmi_tstamp,
-			       jiffies + perf_nmi_window);
+		this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window);
 
 		return handled;
 	}
@@ -864,6 +873,29 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
 	}
 }
 
+static struct event_constraint pair_constraint;
+
+static struct event_constraint *
+amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx,
+			       struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (amd_is_pair_event_code(hwc))
+		return &pair_constraint;
+
+	return &unconstrained;
+}
+
+static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
+					   struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (is_counter_pair(hwc))
+		--cpuc->n_pair;
+}
+
 static ssize_t amd_event_sysfs_show(char *page, u64 config)
 {
 	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
@@ -907,33 +939,15 @@ static __initconst const struct x86_pmu amd_pmu = {
 
 static int __init amd_core_pmu_init(void)
 {
+	u64 even_ctr_mask = 0ULL;
+	int i;
+
 	if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
 		return 0;
 
-	/* Avoid calulating the value each time in the NMI handler */
+	/* Avoid calculating the value each time in the NMI handler */
 	perf_nmi_window = msecs_to_jiffies(100);
 
-	switch (boot_cpu_data.x86) {
-	case 0x15:
-		pr_cont("Fam15h ");
-		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
-		break;
-	case 0x17:
-		pr_cont("Fam17h ");
-		/*
-		 * In family 17h, there are no event constraints in the PMC hardware.
-		 * We fallback to using default amd_get_event_constraints.
-		 */
-		break;
-	case 0x18:
-		pr_cont("Fam18h ");
-		/* Using default amd_get_event_constraints. */
-		break;
-	default:
-		pr_err("core perfctr but no constraints; unknown hardware!\n");
-		return -ENODEV;
-	}
-
 	/*
 	 * If core performance counter extensions exists, we must use
 	 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
@@ -948,6 +962,32 @@ static int __init amd_core_pmu_init(void)
 	 */
 	x86_pmu.amd_nb_constraints = 0;
 
+	if (boot_cpu_data.x86 == 0x15) {
+		pr_cont("Fam15h ");
+		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
+	}
+	if (boot_cpu_data.x86 >= 0x17) {
+		pr_cont("Fam17h+ ");
+		/*
+		 * Family 17h and compatibles have constraints for Large
+		 * Increment per Cycle events: they may only be assigned an
+		 * even numbered counter that has a consecutive adjacent odd
+		 * numbered counter following it.
+		 */
+		for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
+			even_ctr_mask |= 1 << i;
+
+		pair_constraint = (struct event_constraint)
+				    __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
+				    x86_pmu.num_counters / 2, 0,
+				    PERF_X86_EVENT_PAIR);
+
+		x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
+		x86_pmu.put_event_constraints = amd_put_event_constraints_f17h;
+		x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE;
+		x86_pmu.flags |= PMU_FL_PAIR;
+	}
+
 	pr_cont("core perfctr, ");
 	return 0;
 }
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 7b21455d7504..3bb738f5a472 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -49,6 +49,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	.enabled = 1,
 };
 
+DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
 DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
 
 u64 __read_mostly hw_cache_event_ids
@@ -375,7 +376,7 @@ int x86_add_exclusive(unsigned int what)
 	 * LBR and BTS are still mutually exclusive.
 	 */
 	if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
-		return 0;
+		goto out;
 
 	if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
 		mutex_lock(&pmc_reserve_mutex);
@@ -387,6 +388,7 @@ int x86_add_exclusive(unsigned int what)
 		mutex_unlock(&pmc_reserve_mutex);
 	}
 
+out:
 	atomic_inc(&active_events);
 	return 0;
 
@@ -397,11 +399,15 @@ fail_unlock:
 
 void x86_del_exclusive(unsigned int what)
 {
+	atomic_dec(&active_events);
+
+	/*
+	 * See the comment in x86_add_exclusive().
+	 */
 	if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
 		return;
 
 	atomic_dec(&x86_pmu.lbr_exclusive[what]);
-	atomic_dec(&active_events);
 }
 
 int x86_setup_perfctr(struct perf_event *event)
@@ -612,6 +618,7 @@ void x86_pmu_disable_all(void)
 	int idx;
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
 		u64 val;
 
 		if (!test_bit(idx, cpuc->active_mask))
@@ -621,6 +628,8 @@ void x86_pmu_disable_all(void)
 			continue;
 		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
 		wrmsrl(x86_pmu_config_addr(idx), val);
+		if (is_counter_pair(hwc))
+			wrmsrl(x86_pmu_config_addr(idx + 1), 0);
 	}
 }
 
@@ -693,7 +702,7 @@ struct sched_state {
 	int	counter;	/* counter index */
 	int	unassigned;	/* number of events to be assigned left */
 	int	nr_gp;		/* number of GP counters used */
-	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	u64	used;
 };
 
 /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
@@ -750,8 +759,12 @@ static bool perf_sched_restore_state(struct perf_sched *sched)
 	sched->saved_states--;
 	sched->state = sched->saved[sched->saved_states];
 
-	/* continue with next counter: */
-	clear_bit(sched->state.counter++, sched->state.used);
+	/* this assignment didn't work out */
+	/* XXX broken vs EVENT_PAIR */
+	sched->state.used &= ~BIT_ULL(sched->state.counter);
+
+	/* try the next one */
+	sched->state.counter++;
 
 	return true;
 }
@@ -776,20 +789,32 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
 	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
 		idx = INTEL_PMC_IDX_FIXED;
 		for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
-			if (!__test_and_set_bit(idx, sched->state.used))
-				goto done;
+			u64 mask = BIT_ULL(idx);
+
+			if (sched->state.used & mask)
+				continue;
+
+			sched->state.used |= mask;
+			goto done;
 		}
 	}
 
 	/* Grab the first unused counter starting with idx */
 	idx = sched->state.counter;
 	for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
-		if (!__test_and_set_bit(idx, sched->state.used)) {
-			if (sched->state.nr_gp++ >= sched->max_gp)
-				return false;
+		u64 mask = BIT_ULL(idx);
 
-			goto done;
-		}
+		if (c->flags & PERF_X86_EVENT_PAIR)
+			mask |= mask << 1;
+
+		if (sched->state.used & mask)
+			continue;
+
+		if (sched->state.nr_gp++ >= sched->max_gp)
+			return false;
+
+		sched->state.used |= mask;
+		goto done;
 	}
 
 	return false;
@@ -866,12 +891,10 @@ EXPORT_SYMBOL_GPL(perf_assign_events);
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
 	struct event_constraint *c;
-	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	struct perf_event *e;
 	int n0, i, wmin, wmax, unsched = 0;
 	struct hw_perf_event *hwc;
-
-	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+	u64 used_mask = 0;
 
 	/*
 	 * Compute the number of events already present; see x86_pmu_add(),
@@ -914,6 +937,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	 * fastpath, try to reuse previous register
 	 */
 	for (i = 0; i < n; i++) {
+		u64 mask;
+
 		hwc = &cpuc->event_list[i]->hw;
 		c = cpuc->event_constraint[i];
 
@@ -925,11 +950,16 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 		if (!test_bit(hwc->idx, c->idxmsk))
 			break;
 
+		mask = BIT_ULL(hwc->idx);
+		if (is_counter_pair(hwc))
+			mask |= mask << 1;
+
 		/* not already used */
-		if (test_bit(hwc->idx, used_mask))
+		if (used_mask & mask)
 			break;
 
-		__set_bit(hwc->idx, used_mask);
+		used_mask |= mask;
+
 		if (assign)
 			assign[i] = hwc->idx;
 	}
@@ -952,6 +982,15 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 		    READ_ONCE(cpuc->excl_cntrs->exclusive_present))
 			gpmax /= 2;
 
+		/*
+		 * Reduce the amount of available counters to allow fitting
+		 * the extra Merge events needed by large increment events.
+		 */
+		if (x86_pmu.flags & PMU_FL_PAIR) {
+			gpmax = x86_pmu.num_counters - cpuc->n_pair;
+			WARN_ON(gpmax <= 0);
+		}
+
 		unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
 					     wmax, gpmax, assign);
 	}
@@ -1032,6 +1071,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
 			return -EINVAL;
 		cpuc->event_list[n] = leader;
 		n++;
+		if (is_counter_pair(&leader->hw))
+			cpuc->n_pair++;
 	}
 	if (!dogrp)
 		return n;
@@ -1046,6 +1087,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
 
 		cpuc->event_list[n] = event;
 		n++;
+		if (is_counter_pair(&event->hw))
+			cpuc->n_pair++;
 	}
 	return n;
 }
@@ -1232,6 +1275,13 @@ int x86_perf_event_set_period(struct perf_event *event)
 	wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
 
 	/*
+	 * Clear the Merge event counter's upper 16 bits since
+	 * we currently declare a 48-bit counter width
+	 */
+	if (is_counter_pair(hwc))
+		wrmsrl(x86_pmu_event_addr(idx + 1), 0);
+
+	/*
 	 * Due to erratum on certan cpu we need
 	 * a second write to be sure the register
 	 * is updated properly
@@ -1641,9 +1691,12 @@ static struct attribute_group x86_pmu_format_group __ro_after_init = {
 
 ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
 {
-	struct perf_pmu_events_attr *pmu_attr = \
+	struct perf_pmu_events_attr *pmu_attr =
 		container_of(attr, struct perf_pmu_events_attr, attr);
-	u64 config = x86_pmu.event_map(pmu_attr->id);
+	u64 config = 0;
+
+	if (pmu_attr->id < x86_pmu.max_events)
+		config = x86_pmu.event_map(pmu_attr->id);
 
 	/* string trumps id */
 	if (pmu_attr->event_str)
@@ -1712,6 +1765,9 @@ is_visible(struct kobject *kobj, struct attribute *attr, int idx)
 {
 	struct perf_pmu_events_attr *pmu_attr;
 
+	if (idx >= x86_pmu.max_events)
+		return 0;
+
 	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
 	/* str trumps id */
 	return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0;
@@ -2181,21 +2237,26 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
 	if (x86_pmu.attr_rdpmc_broken)
 		return -ENOTSUPP;
 
-	if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
+	if (val != x86_pmu.attr_rdpmc) {
 		/*
-		 * Changing into or out of always available, aka
-		 * perf-event-bypassing mode.  This path is extremely slow,
+		 * Changing into or out of never available or always available,
+		 * aka perf-event-bypassing mode. This path is extremely slow,
 		 * but only root can trigger it, so it's okay.
 		 */
+		if (val == 0)
+			static_branch_inc(&rdpmc_never_available_key);
+		else if (x86_pmu.attr_rdpmc == 0)
+			static_branch_dec(&rdpmc_never_available_key);
+
 		if (val == 2)
 			static_branch_inc(&rdpmc_always_available_key);
-		else
+		else if (x86_pmu.attr_rdpmc == 2)
 			static_branch_dec(&rdpmc_always_available_key);
+
 		on_each_cpu(refresh_pce, NULL, 1);
+		x86_pmu.attr_rdpmc = val;
 	}
 
-	x86_pmu.attr_rdpmc = val;
-
 	return count;
 }
 
@@ -2243,6 +2304,13 @@ static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 		x86_pmu.sched_task(ctx, sched_in);
 }
 
+static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
+				  struct perf_event_context *next)
+{
+	if (x86_pmu.swap_task_ctx)
+		x86_pmu.swap_task_ctx(prev, next);
+}
+
 void perf_check_microcode(void)
 {
 	if (x86_pmu.check_microcode)
@@ -2297,6 +2365,7 @@ static struct pmu pmu = {
 	.event_idx		= x86_pmu_event_idx,
 	.sched_task		= x86_pmu_sched_task,
 	.task_ctx_size          = sizeof(struct x86_perf_task_context),
+	.swap_task_ctx		= x86_pmu_swap_task_ctx,
 	.check_period		= x86_pmu_check_period,
 
 	.aux_output_match	= x86_pmu_aux_output_match,
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 5ee3fed881d3..6a3b599ee0fe 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -63,9 +63,17 @@ struct bts_buffer {
 
 static struct pmu bts_pmu;
 
+static int buf_nr_pages(struct page *page)
+{
+	if (!PagePrivate(page))
+		return 1;
+
+	return 1 << page_private(page);
+}
+
 static size_t buf_size(struct page *page)
 {
-	return 1 << (PAGE_SHIFT + page_private(page));
+	return buf_nr_pages(page) * PAGE_SIZE;
 }
 
 static void *
@@ -83,9 +91,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
 	/* count all the high order buffers */
 	for (pg = 0, nbuf = 0; pg < nr_pages;) {
 		page = virt_to_page(pages[pg]);
-		if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
-			return NULL;
-		pg += 1 << page_private(page);
+		pg += buf_nr_pages(page);
 		nbuf++;
 	}
 
@@ -109,7 +115,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
 		unsigned int __nr_pages;
 
 		page = virt_to_page(pages[pg]);
-		__nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
+		__nr_pages = buf_nr_pages(page);
 		buf->buf[nbuf].page = page;
 		buf->buf[nbuf].offset = offset;
 		buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
@@ -549,9 +555,11 @@ static int bts_event_init(struct perf_event *event)
 	 * Note that the default paranoia setting permits unprivileged
 	 * users to profile the kernel.
 	 */
-	if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
-	    !capable(CAP_SYS_ADMIN))
-		return -EACCES;
+	if (event->attr.exclude_kernel) {
+		ret = perf_allow_kernel(&event->attr);
+		if (ret)
+			return ret;
+	}
 
 	if (x86_add_exclusive(x86_lbr_exclusive_bts))
 		return -EBUSY;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fcef678c3423..3be51aa06e67 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3315,16 +3315,28 @@ static int intel_pmu_hw_config(struct perf_event *event)
 	if (x86_pmu.version < 3)
 		return -EINVAL;
 
-	if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-		return -EACCES;
+	ret = perf_allow_cpu(&event->attr);
+	if (ret)
+		return ret;
 
 	event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
 
 	return 0;
 }
 
+#ifdef CONFIG_RETPOLINE
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr);
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr);
+#endif
+
 struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 {
+#ifdef CONFIG_RETPOLINE
+	if (x86_pmu.guest_get_msrs == intel_guest_get_msrs)
+		return intel_guest_get_msrs(nr);
+	else if (x86_pmu.guest_get_msrs == core_guest_get_msrs)
+		return core_guest_get_msrs(nr);
+#endif
 	if (x86_pmu.guest_get_msrs)
 		return x86_pmu.guest_get_msrs(nr);
 	*nr = 0;
@@ -3819,6 +3831,12 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx,
 	intel_pmu_lbr_sched_task(ctx, sched_in);
 }
 
+static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
+				    struct perf_event_context *next)
+{
+	intel_pmu_lbr_swap_task_ctx(prev, next);
+}
+
 static int intel_pmu_check_period(struct perf_event *event, u64 value)
 {
 	return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
@@ -3954,6 +3972,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 
 	.guest_get_msrs		= intel_guest_get_msrs,
 	.sched_task		= intel_pmu_sched_task,
+	.swap_task_ctx		= intel_pmu_swap_task_ctx,
 
 	.check_period		= intel_pmu_check_period,
 
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ce83950036c5..4b94ae4ae369 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -7,6 +7,7 @@
 #include <asm/perf_event.h>
 #include <asm/tlbflush.h>
 #include <asm/insn.h>
+#include <asm/io.h>
 
 #include "../perf_event.h"
 
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index ea54634eabf3..534c76606049 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -417,6 +417,29 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 	cpuc->last_log_id = ++task_ctx->log_id;
 }
 
+void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
+				 struct perf_event_context *next)
+{
+	struct x86_perf_task_context *prev_ctx_data, *next_ctx_data;
+
+	swap(prev->task_ctx_data, next->task_ctx_data);
+
+	/*
+	 * Architecture specific synchronization makes sense in
+	 * case both prev->task_ctx_data and next->task_ctx_data
+	 * pointers are allocated.
+	 */
+
+	prev_ctx_data = next->task_ctx_data;
+	next_ctx_data = prev->task_ctx_data;
+
+	if (!prev_ctx_data || !next_ctx_data)
+		return;
+
+	swap(prev_ctx_data->lbr_callstack_users,
+	     next_ctx_data->lbr_callstack_users);
+}
+
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index dee579efb2b2..a4cc66005ce8 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -776,8 +776,9 @@ static int p4_validate_raw_event(struct perf_event *event)
 	 * the user needs special permissions to be able to use it
 	 */
 	if (p4_ht_active() && p4_event_bind_map[v].shared) {
-		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
+		v = perf_allow_cpu(&event->attr);
+		if (v)
+			return v;
 	}
 
 	/* ESCR EventMask bits may be invalid */
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 05e43d0f430b..1db7a51d9792 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -397,6 +397,20 @@ static bool pt_event_valid(struct perf_event *event)
  * These all are cpu affine and operate on a local PT
  */
 
+static void pt_config_start(struct perf_event *event)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	u64 ctl = event->hw.config;
+
+	ctl |= RTIT_CTL_TRACEEN;
+	if (READ_ONCE(pt->vmx_on))
+		perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
+	else
+		wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+
+	WRITE_ONCE(event->hw.config, ctl);
+}
+
 /* Address ranges and their corresponding msr configuration registers */
 static const struct pt_address_range {
 	unsigned long	msr_a;
@@ -469,6 +483,7 @@ static u64 pt_config_filters(struct perf_event *event)
 static void pt_config(struct perf_event *event)
 {
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct pt_buffer *buf = perf_get_aux(&pt->handle);
 	u64 reg;
 
 	/* First round: clear STATUS, in particular the PSB byte counter. */
@@ -478,7 +493,9 @@ static void pt_config(struct perf_event *event)
 	}
 
 	reg = pt_config_filters(event);
-	reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
+	reg |= RTIT_CTL_TRACEEN;
+	if (!buf->single)
+		reg |= RTIT_CTL_TOPA;
 
 	/*
 	 * Previously, we had BRANCH_EN on by default, but now that PT has
@@ -501,10 +518,7 @@ static void pt_config(struct perf_event *event)
 	reg |= (event->attr.config & PT_CONFIG_MASK);
 
 	event->hw.config = reg;
-	if (READ_ONCE(pt->vmx_on))
-		perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
-	else
-		wrmsrl(MSR_IA32_RTIT_CTL, reg);
+	pt_config_start(event);
 }
 
 static void pt_config_stop(struct perf_event *event)
@@ -533,18 +547,6 @@ static void pt_config_stop(struct perf_event *event)
 	wmb();
 }
 
-static void pt_config_buffer(void *buf, unsigned int topa_idx,
-			     unsigned int output_off)
-{
-	u64 reg;
-
-	wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
-
-	reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
-
-	wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
-}
-
 /**
  * struct topa - ToPA metadata
  * @list:	linkage to struct pt_buffer's list of tables
@@ -602,6 +604,33 @@ static inline phys_addr_t topa_pfn(struct topa *topa)
 #define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size))
 #define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size)
 
+static void pt_config_buffer(struct pt_buffer *buf)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	u64 reg, mask;
+	void *base;
+
+	if (buf->single) {
+		base = buf->data_pages[0];
+		mask = (buf->nr_pages * PAGE_SIZE - 1) >> 7;
+	} else {
+		base = topa_to_page(buf->cur)->table;
+		mask = (u64)buf->cur_idx;
+	}
+
+	reg = virt_to_phys(base);
+	if (pt->output_base != reg) {
+		pt->output_base = reg;
+		wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
+	}
+
+	reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);
+	if (pt->output_mask != reg) {
+		pt->output_mask = reg;
+		wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+	}
+}
+
 /**
  * topa_alloc() - allocate page-sized ToPA table
  * @cpu:	CPU on which to allocate.
@@ -802,6 +831,11 @@ static void pt_update_head(struct pt *pt)
 	struct pt_buffer *buf = perf_get_aux(&pt->handle);
 	u64 topa_idx, base, old;
 
+	if (buf->single) {
+		local_set(&buf->data_size, buf->output_off);
+		return;
+	}
+
 	/* offset of the first region in this table from the beginning of buf */
 	base = buf->cur->offset + buf->output_off;
 
@@ -903,18 +937,21 @@ static void pt_handle_status(struct pt *pt)
  */
 static void pt_read_offset(struct pt_buffer *buf)
 {
-	u64 offset, base_topa;
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct topa_page *tp;
 
-	rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
-	tp = phys_to_virt(base_topa);
-	buf->cur = &tp->topa;
+	if (!buf->single) {
+		rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
+		tp = phys_to_virt(pt->output_base);
+		buf->cur = &tp->topa;
+	}
 
-	rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
+	rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
 	/* offset within current output region */
-	buf->output_off = offset >> 32;
+	buf->output_off = pt->output_mask >> 32;
 	/* index of current output region within this table */
-	buf->cur_idx = (offset & 0xffffff80) >> 7;
+	if (!buf->single)
+		buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7;
 }
 
 static struct topa_entry *
@@ -1030,6 +1067,9 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
 	unsigned long head = local64_read(&buf->head);
 	unsigned long idx, npages, wakeup;
 
+	if (buf->single)
+		return 0;
+
 	/* can't stop in the middle of an output region */
 	if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) {
 		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
@@ -1111,13 +1151,17 @@ static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
 	if (buf->snapshot)
 		head &= (buf->nr_pages << PAGE_SHIFT) - 1;
 
-	pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
-	te = pt_topa_entry_for_page(buf, pg);
+	if (!buf->single) {
+		pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
+		te = pt_topa_entry_for_page(buf, pg);
 
-	cur_tp = topa_entry_to_page(te);
-	buf->cur = &cur_tp->topa;
-	buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
-	buf->output_off = head & (pt_buffer_region_size(buf) - 1);
+		cur_tp = topa_entry_to_page(te);
+		buf->cur = &cur_tp->topa;
+		buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
+		buf->output_off = head & (pt_buffer_region_size(buf) - 1);
+	} else {
+		buf->output_off = head;
+	}
 
 	local64_set(&buf->head, head);
 	local_set(&buf->data_size, 0);
@@ -1131,6 +1175,9 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
 {
 	struct topa *topa, *iter;
 
+	if (buf->single)
+		return;
+
 	list_for_each_entry_safe(topa, iter, &buf->tables, list) {
 		/*
 		 * right now, this is in free_aux() path only, so
@@ -1176,6 +1223,36 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
 	return 0;
 }
 
+static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages)
+{
+	struct page *p = virt_to_page(buf->data_pages[0]);
+	int ret = -ENOTSUPP, order = 0;
+
+	/*
+	 * We can use single range output mode
+	 * + in snapshot mode, where we don't need interrupts;
+	 * + if the hardware supports it;
+	 * + if the entire buffer is one contiguous allocation.
+	 */
+	if (!buf->snapshot)
+		goto out;
+
+	if (!intel_pt_validate_hw_cap(PT_CAP_single_range_output))
+		goto out;
+
+	if (PagePrivate(p))
+		order = page_private(p);
+
+	if (1 << order != nr_pages)
+		goto out;
+
+	buf->single = true;
+	buf->nr_pages = nr_pages;
+	ret = 0;
+out:
+	return ret;
+}
+
 /**
  * pt_buffer_setup_aux() - set up topa tables for a PT buffer
  * @cpu:	Cpu on which to allocate, -1 means current.
@@ -1198,6 +1275,13 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
 	if (!nr_pages)
 		return NULL;
 
+	/*
+	 * Only support AUX sampling in snapshot mode, where we don't
+	 * generate NMIs.
+	 */
+	if (event->attr.aux_sample_size && !snapshot)
+		return NULL;
+
 	if (cpu == -1)
 		cpu = raw_smp_processor_id();
 	node = cpu_to_node(cpu);
@@ -1213,6 +1297,10 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
 
 	INIT_LIST_HEAD(&buf->tables);
 
+	ret = pt_buffer_try_single(buf, nr_pages);
+	if (!ret)
+		return buf;
+
 	ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL);
 	if (ret) {
 		kfree(buf);
@@ -1379,9 +1467,8 @@ void intel_pt_interrupt(void)
 			return;
 		}
 
-		pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
-				 buf->output_off);
-		pt_config(event);
+		pt_config_buffer(buf);
+		pt_config_start(event);
 	}
 }
 
@@ -1444,8 +1531,7 @@ static void pt_event_start(struct perf_event *event, int mode)
 	WRITE_ONCE(pt->handle_nmi, 1);
 	hwc->state = 0;
 
-	pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
-			 buf->output_off);
+	pt_config_buffer(buf);
 	pt_config(event);
 
 	return;
@@ -1496,6 +1582,52 @@ static void pt_event_stop(struct perf_event *event, int mode)
 	}
 }
 
+static long pt_event_snapshot_aux(struct perf_event *event,
+				  struct perf_output_handle *handle,
+				  unsigned long size)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct pt_buffer *buf = perf_get_aux(&pt->handle);
+	unsigned long from = 0, to;
+	long ret;
+
+	if (WARN_ON_ONCE(!buf))
+		return 0;
+
+	/*
+	 * Sampling is only allowed on snapshot events;
+	 * see pt_buffer_setup_aux().
+	 */
+	if (WARN_ON_ONCE(!buf->snapshot))
+		return 0;
+
+	/*
+	 * Here, handle_nmi tells us if the tracing is on
+	 */
+	if (READ_ONCE(pt->handle_nmi))
+		pt_config_stop(event);
+
+	pt_read_offset(buf);
+	pt_update_head(pt);
+
+	to = local_read(&buf->data_size);
+	if (to < size)
+		from = buf->nr_pages << PAGE_SHIFT;
+	from += to - size;
+
+	ret = perf_output_copy_aux(&pt->handle, handle, from, to);
+
+	/*
+	 * If the tracing was on when we turned up, restart it.
+	 * Compiler barrier not needed as we couldn't have been
+	 * preempted by anything that touches pt->handle_nmi.
+	 */
+	if (pt->handle_nmi)
+		pt_config_start(event);
+
+	return ret;
+}
+
 static void pt_event_del(struct perf_event *event, int mode)
 {
 	pt_event_stop(event, PERF_EF_UPDATE);
@@ -1615,6 +1747,7 @@ static __init int pt_init(void)
 	pt_pmu.pmu.del			 = pt_event_del;
 	pt_pmu.pmu.start		 = pt_event_start;
 	pt_pmu.pmu.stop			 = pt_event_stop;
+	pt_pmu.pmu.snapshot_aux		 = pt_event_snapshot_aux;
 	pt_pmu.pmu.read			 = pt_event_read;
 	pt_pmu.pmu.setup_aux		 = pt_buffer_setup_aux;
 	pt_pmu.pmu.free_aux		 = pt_buffer_free_aux;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 1d2bb7572374..96906a62aacd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -64,6 +64,7 @@ struct pt_pmu {
  * @lost:	if data was lost/truncated
  * @head:	logical write offset inside the buffer
  * @snapshot:	if this is for a snapshot/overwrite counter
+ * @single:	use Single Range Output instead of ToPA
  * @stop_pos:	STOP topa entry index
  * @intr_pos:	INT topa entry index
  * @stop_te:	STOP topa entry pointer
@@ -80,6 +81,7 @@ struct pt_buffer {
 	local_t			data_size;
 	local64_t		head;
 	bool			snapshot;
+	bool			single;
 	long			stop_pos, intr_pos;
 	struct topa_entry	*stop_te, *intr_te;
 	void			**data_pages;
@@ -111,16 +113,20 @@ struct pt_filters {
 
 /**
  * struct pt - per-cpu pt context
- * @handle:	perf output handle
+ * @handle:		perf output handle
  * @filters:		last configured filters
- * @handle_nmi:	do handle PT PMI on this cpu, there's an active event
- * @vmx_on:	1 if VMX is ON on this cpu
+ * @handle_nmi:		do handle PT PMI on this cpu, there's an active event
+ * @vmx_on:		1 if VMX is ON on this cpu
+ * @output_base:	cached RTIT_OUTPUT_BASE MSR value
+ * @output_mask:	cached RTIT_OUTPUT_MASK MSR value
  */
 struct pt {
 	struct perf_output_handle handle;
 	struct pt_filters	filters;
 	int			handle_nmi;
 	int			vmx_on;
+	u64			output_base;
+	u64			output_mask;
 };
 
 #endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 5053a403e4ae..09913121e726 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -741,6 +741,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS,	model_hsw),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_L,		model_skl),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE,		model_skl),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE_L,		model_skl),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE,		model_skl),
 	{},
 };
 
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index dbaa1b088a30..c37cb12d0ef6 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -15,6 +15,7 @@
 #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC		0x1910
 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC		0x190f
 #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC		0x191f
+#define PCI_DEVICE_ID_INTEL_SKL_E3_IMC		0x1918
 #define PCI_DEVICE_ID_INTEL_KBL_Y_IMC		0x590c
 #define PCI_DEVICE_ID_INTEL_KBL_U_IMC		0x5904
 #define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC		0x5914
@@ -658,6 +659,10 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
 	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_E3_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
@@ -826,6 +831,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core H Quad Core */
 	IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Dual Core */
 	IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Quad Core */
+	IMC_DEV(SKL_E3_IMC, &skl_uncore_pci_driver),  /* Xeon E3 V5 Gen Core processor */
 	IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core Y */
 	IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core U */
 	IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core U Quad Core */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index b10a5ec79e48..ad20220af303 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -369,11 +369,6 @@
 #define SNR_M2M_PCI_PMON_BOX_CTL		0x438
 #define SNR_M2M_PCI_PMON_UMASK_EXT		0xff
 
-/* SNR PCIE3 */
-#define SNR_PCIE3_PCI_PMON_CTL0			0x508
-#define SNR_PCIE3_PCI_PMON_CTR0			0x4e8
-#define SNR_PCIE3_PCI_PMON_BOX_CTL		0x4e4
-
 /* SNR IMC */
 #define SNR_IMC_MMIO_PMON_FIXED_CTL		0x54
 #define SNR_IMC_MMIO_PMON_FIXED_CTR		0x38
@@ -4328,27 +4323,12 @@ static struct intel_uncore_type snr_uncore_m2m = {
 	.format_group	= &snr_m2m_uncore_format_group,
 };
 
-static struct intel_uncore_type snr_uncore_pcie3 = {
-	.name		= "pcie3",
-	.num_counters	= 4,
-	.num_boxes	= 1,
-	.perf_ctr_bits	= 48,
-	.perf_ctr	= SNR_PCIE3_PCI_PMON_CTR0,
-	.event_ctl	= SNR_PCIE3_PCI_PMON_CTL0,
-	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
-	.box_ctl	= SNR_PCIE3_PCI_PMON_BOX_CTL,
-	.ops		= &ivbep_uncore_pci_ops,
-	.format_group	= &ivbep_uncore_format_group,
-};
-
 enum {
 	SNR_PCI_UNCORE_M2M,
-	SNR_PCI_UNCORE_PCIE3,
 };
 
 static struct intel_uncore_type *snr_pci_uncores[] = {
 	[SNR_PCI_UNCORE_M2M]		= &snr_uncore_m2m,
-	[SNR_PCI_UNCORE_PCIE3]		= &snr_uncore_pcie3,
 	NULL,
 };
 
@@ -4357,10 +4337,6 @@ static const struct pci_device_id snr_uncore_pci_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a),
 		.driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, SNR_PCI_UNCORE_M2M, 0),
 	},
-	{ /* PCIe3 */
-		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a),
-		.driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0),
-	},
 	{ /* end: all zeroes */ }
 };
 
@@ -4536,6 +4512,7 @@ static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = {
 	INTEL_UNCORE_EVENT_DESC(write,		"event=0xff,umask=0x21"),
 	INTEL_UNCORE_EVENT_DESC(write.scale,	"3.814697266e-6"),
 	INTEL_UNCORE_EVENT_DESC(write.unit,	"MiB"),
+	{ /* end: all zeroes */ },
 };
 
 static struct intel_uncore_ops snr_uncore_imc_freerunning_ops = {
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ecacfbf4ebc1..f1cd1ca1a77b 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -77,6 +77,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
 #define PERF_X86_EVENT_AUTO_RELOAD	0x0200 /* use PEBS auto-reload */
 #define PERF_X86_EVENT_LARGE_PEBS	0x0400 /* use large PEBS */
 #define PERF_X86_EVENT_PEBS_VIA_PT	0x0800 /* use PT buffer for PEBS */
+#define PERF_X86_EVENT_PAIR		0x1000 /* Large Increment per Cycle */
 
 struct amd_nb {
 	int nb_id;  /* NorthBridge id */
@@ -272,6 +273,7 @@ struct cpu_hw_events {
 	struct amd_nb			*amd_nb;
 	/* Inverted mask of bits to clear in the perf_ctr ctrl registers */
 	u64				perf_ctr_virt_mask;
+	int				n_pair; /* Large increment events */
 
 	void				*kfree_on_online[X86_PERF_KFREE_MAX];
 };
@@ -683,9 +685,18 @@ struct x86_pmu {
 	atomic_t	lbr_exclusive[x86_lbr_exclusive_max];
 
 	/*
+	 * perf task context (i.e. struct perf_event_context::task_ctx_data)
+	 * switch helper to bridge calls from perf/core to perf/x86.
+	 * See struct pmu::swap_task_ctx() usage for examples;
+	 */
+	void		(*swap_task_ctx)(struct perf_event_context *prev,
+					 struct perf_event_context *next);
+
+	/*
 	 * AMD bits
 	 */
 	unsigned int	amd_nb_constraints : 1;
+	u64		perf_ctr_pair_en;
 
 	/*
 	 * Extra registers for events
@@ -735,6 +746,7 @@ do {									\
 #define PMU_FL_EXCL_ENABLED	0x8 /* exclusive counter active */
 #define PMU_FL_PEBS_ALL		0x10 /* all events are valid PEBS events */
 #define PMU_FL_TFA		0x20 /* deal with TSX force abort */
+#define PMU_FL_PAIR		0x40 /* merge counters for large incr. events */
 
 #define EVENT_VAR(_id)  event_attr_##_id
 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -830,6 +842,11 @@ int x86_pmu_hw_config(struct perf_event *event);
 
 void x86_pmu_disable_all(void);
 
+static inline bool is_counter_pair(struct hw_perf_event *hwc)
+{
+	return hwc->flags & PERF_X86_EVENT_PAIR;
+}
+
 static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
 					  u64 enable_mask)
 {
@@ -837,6 +854,14 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
 
 	if (hwc->extra_reg.reg)
 		wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
+
+	/*
+	 * Add enabled Merge event on next counter
+	 * if large increment event being enabled on this counter
+	 */
+	if (is_counter_pair(hwc))
+		wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en);
+
 	wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
 }
 
@@ -853,6 +878,9 @@ static inline void x86_pmu_disable_event(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 
 	wrmsrl(hwc->config_base, hwc->config);
+
+	if (is_counter_pair(hwc))
+		wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);
 }
 
 void x86_pmu_enable_event(struct perf_event *event);
@@ -1016,6 +1044,9 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr);
 
 void intel_ds_init(void);
 
+void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
+				 struct perf_event_context *next);
+
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
 
 u64 lbr_from_signext_quirk_wr(u64 val);
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index e01078e93dd3..40e0e322161d 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -194,10 +194,20 @@ do_ex_hypercall:
 
 static bool __send_ipi_one(int cpu, int vector)
 {
-	struct cpumask mask = CPU_MASK_NONE;
+	int vp = hv_cpu_number_to_vp_number(cpu);
 
-	cpumask_set_cpu(cpu, &mask);
-	return __send_ipi_mask(&mask, vector);
+	trace_hyperv_send_ipi_one(cpu, vector);
+
+	if (!hv_hypercall_pg || (vp == VP_INVAL))
+		return false;
+
+	if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+		return false;
+
+	if (vp >= 64)
+		return __send_ipi_mask_ex(cpumask_of(cpu), vector);
+
+	return !hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
 }
 
 static void hv_send_ipi(int cpu, int vector)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 2db3972c0e0f..caaf4dce99bf 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -7,6 +7,7 @@
  * Author : K. Y. Srinivasan <kys@microsoft.com>
  */
 
+#include <linux/acpi.h>
 #include <linux/efi.h>
 #include <linux/types.h>
 #include <asm/apic.h>
@@ -45,6 +46,14 @@ void *hv_alloc_hyperv_page(void)
 }
 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
 
+void *hv_alloc_hyperv_zeroed_page(void)
+{
+        BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE);
+
+        return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+}
+EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
+
 void hv_free_hyperv_page(unsigned long addr)
 {
 	free_page(addr);
@@ -311,6 +320,12 @@ void __init hyperv_init(void)
 	hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
 	wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 
+	/*
+	 * Ignore any errors in setting up stimer clockevents
+	 * as we can run with the LAPIC timer as a fallback.
+	 */
+	(void)hv_stimer_alloc();
+
 	hv_apic_init();
 
 	x86_init.pci.arch_init = hv_pci_init;
@@ -431,3 +446,9 @@ bool hv_is_hyperv_initialized(void)
 	return hypercall_msr.enable;
 }
 EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized);
+
+bool hv_is_hibernation_supported(void)
+{
+	return acpi_sleep_state_supported(ACPI_STATE_S4);
+}
+EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 1cee10091b9f..a3aefe9b9401 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -21,6 +21,7 @@
 #include <linux/personality.h>
 #include <linux/compat.h>
 #include <linux/binfmts.h>
+#include <linux/syscalls.h>
 #include <asm/ucontext.h>
 #include <linux/uaccess.h>
 #include <asm/fpu/internal.h>
@@ -113,12 +114,10 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 
 	err |= fpu__restore_sig(buf, 1);
 
-	force_iret();
-
 	return err;
 }
 
-asmlinkage long sys32_sigreturn(void)
+COMPAT_SYSCALL_DEFINE0(sigreturn)
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
@@ -144,7 +143,7 @@ badframe:
 	return 0;
 }
 
-asmlinkage long sys32_rt_sigreturn(void)
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe_ia32 __user *frame;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index bc9693c9107e..ca0976456a6b 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -13,7 +13,6 @@
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/mpspec.h>
-#include <asm/realmode.h>
 #include <asm/x86_init.h>
 
 #ifdef CONFIG_ACPI_APEI
@@ -62,7 +61,7 @@ static inline void acpi_disable_pci(void)
 extern int (*acpi_suspend_lowlevel)(void);
 
 /* Physical address to resume after wakeup */
-#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
+unsigned long acpi_get_wakeup_address(void);
 
 /*
  * Check if the CPU can handle C2 and deeper
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 3ff577c0b102..cd339b88d5d4 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -7,9 +7,11 @@
 # define __ASM_FORM_RAW(x)     x
 # define __ASM_FORM_COMMA(x) x,
 #else
-# define __ASM_FORM(x)	" " #x " "
-# define __ASM_FORM_RAW(x)     #x
-# define __ASM_FORM_COMMA(x) " " #x ","
+#include <linux/stringify.h>
+
+# define __ASM_FORM(x)	" " __stringify(x) " "
+# define __ASM_FORM_RAW(x)     __stringify(x)
+# define __ASM_FORM_COMMA(x) " " __stringify(x) ","
 #endif
 
 #ifndef __x86_64__
@@ -139,9 +141,6 @@
 # define _ASM_EXTABLE_EX(from, to)				\
 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
 
-# define _ASM_EXTABLE_REFCOUNT(from, to)			\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
-
 # define _ASM_NOKPROBE(entry)					\
 	.pushsection "_kprobe_blacklist","aw" ;			\
 	_ASM_ALIGN ;						\
@@ -170,9 +169,6 @@
 # define _ASM_EXTABLE_EX(from, to)				\
 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
 
-# define _ASM_EXTABLE_REFCOUNT(from, to)			\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
-
 /* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 7d1f6a49bfae..062cdecb2f24 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -388,7 +388,9 @@ static __always_inline int fls64(__u64 x)
 
 #include <asm-generic/bitops/const_hweight.h>
 
-#include <asm-generic/bitops-instrumented.h>
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-non-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
 
 #include <asm-generic/bitops/le.h>
 
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
deleted file mode 100644
index facd374a1bf7..000000000000
--- a/arch/x86/include/asm/calgary.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Derived from include/asm-powerpc/iommu.h
- *
- * Copyright IBM Corporation, 2006-2007
- *
- * Author: Jon Mason <jdmason@us.ibm.com>
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- */
-
-#ifndef _ASM_X86_CALGARY_H
-#define _ASM_X86_CALGARY_H
-
-#include <linux/spinlock.h>
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/timer.h>
-#include <asm/types.h>
-
-struct iommu_table {
-	const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */
-	unsigned long  it_base;      /* mapped address of tce table */
-	unsigned long  it_hint;      /* Hint for next alloc */
-	unsigned long *it_map;       /* A simple allocation bitmap for now */
-	void __iomem  *bbar;         /* Bridge BAR */
-	u64	       tar_val;      /* Table Address Register */
-	struct timer_list watchdog_timer;
-	spinlock_t     it_lock;      /* Protects it_map */
-	unsigned int   it_size;      /* Size of iommu table in entries */
-	unsigned char  it_busno;     /* Bus number this table belongs to */
-};
-
-struct cal_chipset_ops {
-	void (*handle_quirks)(struct iommu_table *tbl, struct pci_dev *dev);
-	void (*tce_cache_blast)(struct iommu_table *tbl);
-	void (*dump_error_regs)(struct iommu_table *tbl);
-};
-
-#define TCE_TABLE_SIZE_UNSPECIFIED	~0
-#define TCE_TABLE_SIZE_64K		0
-#define TCE_TABLE_SIZE_128K		1
-#define TCE_TABLE_SIZE_256K		2
-#define TCE_TABLE_SIZE_512K		3
-#define TCE_TABLE_SIZE_1M		4
-#define TCE_TABLE_SIZE_2M		5
-#define TCE_TABLE_SIZE_4M		6
-#define TCE_TABLE_SIZE_8M		7
-
-extern int use_calgary;
-
-#ifdef CONFIG_CALGARY_IOMMU
-extern int detect_calgary(void);
-#else
-static inline int detect_calgary(void) { return -ENODEV; }
-#endif
-
-#endif /* _ASM_X86_CALGARY_H */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 22c4dfe65992..52e9f3480f69 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -177,23 +177,6 @@ typedef struct user_regs_struct compat_elf_gregset_t;
 	(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
 #endif
 
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
-	return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
-	return (u32)(unsigned long)uptr;
-}
-
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	compat_uptr_t sp;
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 8348f7d69fd5..02c0078d3787 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -6,6 +6,7 @@
 #include <linux/percpu-defs.h>
 #include <asm/processor.h>
 #include <asm/intel_ds.h>
+#include <asm/pgtable_areas.h>
 
 #ifdef CONFIG_X86_64
 
@@ -65,6 +66,13 @@ enum exception_stack_ordering {
 
 #endif
 
+#ifdef CONFIG_X86_32
+struct doublefault_stack {
+	unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)];
+	struct x86_hw_tss tss;
+} __aligned(PAGE_SIZE);
+#endif
+
 /*
  * cpu_entry_area is a percpu region that contains things needed by the CPU
  * and early entry/exit code.  Real types aren't used for all fields here
@@ -78,10 +86,19 @@ struct cpu_entry_area {
 
 	/*
 	 * The GDT is just below entry_stack and thus serves (on x86_64) as
-	 * a a read-only guard page.
+	 * a read-only guard page. On 32-bit the GDT must be writeable, so
+	 * it needs an extra guard page.
 	 */
+#ifdef CONFIG_X86_32
+	char guard_entry_stack[PAGE_SIZE];
+#endif
 	struct entry_stack_page entry_stack_page;
 
+#ifdef CONFIG_X86_32
+	char guard_doublefault_stack[PAGE_SIZE];
+	struct doublefault_stack doublefault_stack;
+#endif
+
 	/*
 	 * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
 	 * we need task switches to work, and task switches write to the TSS.
@@ -94,7 +111,6 @@ struct cpu_entry_area {
 	 */
 	struct cea_exception_stacks estacks;
 #endif
-#ifdef CONFIG_CPU_SUP_INTEL
 	/*
 	 * Per CPU debug store for Intel performance monitoring. Wastes a
 	 * full page at the moment.
@@ -105,11 +121,13 @@ struct cpu_entry_area {
 	 * Reserve enough fixmap PTEs.
 	 */
 	struct debug_store_buffers cpu_debug_buffers;
-#endif
 };
 
-#define CPU_ENTRY_AREA_SIZE	(sizeof(struct cpu_entry_area))
-#define CPU_ENTRY_AREA_TOT_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)
+#define CPU_ENTRY_AREA_SIZE		(sizeof(struct cpu_entry_area))
+#define CPU_ENTRY_AREA_ARRAY_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)
+
+/* Total size includes the readonly IDT mapping page as well: */
+#define CPU_ENTRY_AREA_TOTAL_SIZE	(CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
 
 DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
 DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
@@ -117,14 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
 extern void setup_cpu_entry_areas(void);
 extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
 
-#define	CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
-#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
-
-#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
-
-#define CPU_ENTRY_AREA_MAP_SIZE			\
-	(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
-
 extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
 
 static inline struct entry_stack *cpu_entry_stack(int cpu)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c4fbe379cc0b..f3327cb56edf 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -220,6 +220,7 @@
 #define X86_FEATURE_ZEN			( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
 #define X86_FEATURE_L1TF_PTEINV		( 7*32+29) /* "" L1TF workaround PTE inversion */
 #define X86_FEATURE_IBRS_ENHANCED	( 7*32+30) /* Enhanced IBRS */
+#define X86_FEATURE_MSR_IA32_FEAT_CTL	( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
@@ -292,6 +293,7 @@
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
 #define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
 #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
+#define X86_FEATURE_RDPRU		(13*32+ 4) /* Read processor register at user level */
 #define X86_FEATURE_WBNOINVD		(13*32+ 9) /* WBNOINVD instruction */
 #define X86_FEATURE_AMD_IBPB		(13*32+12) /* "" Indirect Branch Prediction Barrier */
 #define X86_FEATURE_AMD_IBRS		(13*32+14) /* "" Indirect Branch Restricted Speculation */
@@ -356,6 +358,7 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_FSRM		(18*32+ 4) /* Fast Short Rep Mov */
 #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
 #define X86_FEATURE_MD_CLEAR		(18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
index 0acf5ee45a21..f58de66091e5 100644
--- a/arch/x86/include/asm/crash.h
+++ b/arch/x86/include/asm/crash.h
@@ -2,10 +2,17 @@
 #ifndef _ASM_X86_CRASH_H
 #define _ASM_X86_CRASH_H
 
+struct kimage;
+
 int crash_load_segments(struct kimage *image);
-int crash_copy_backup_region(struct kimage *image);
 int crash_setup_memmap_entries(struct kimage *image,
 		struct boot_params *params);
 void crash_smp_send_stop(void);
 
+#ifdef CONFIG_KEXEC_CORE
+void __init crash_reserve_low_1M(void);
+#else
+static inline void __init crash_reserve_low_1M(void) { }
+#endif
+
 #endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h
index a5d86fc0593f..f6d91861cb14 100644
--- a/arch/x86/include/asm/crypto/camellia.h
+++ b/arch/x86/include/asm/crypto/camellia.h
@@ -26,71 +26,66 @@ struct camellia_xts_ctx {
 
 extern int __camellia_setkey(struct camellia_ctx *cctx,
 			     const unsigned char *key,
-			     unsigned int key_len, u32 *flags);
+			     unsigned int key_len);
 
 extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			       unsigned int keylen);
 
 /* regular block cipher functions */
-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
-				   const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
-				 const u8 *src);
+asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
+				   bool xor);
+asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src);
 
 /* 2-way parallel cipher functions */
-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-					const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-				      const u8 *src);
+asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src,
+					bool xor);
+asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src);
 
 /* 16-way parallel cipher functions (avx/aes-ni) */
-asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
-asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
-
-asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
-asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
-				   const u8 *src, le128 *iv);
-
-asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src, le128 *iv);
-asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src, le128 *iv);
-
-static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
-				    const u8 *src)
+asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+
+asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
+				   le128 *iv);
+
+asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
+				       le128 *iv);
+asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
+				       le128 *iv);
+
+static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src)
 {
 	__camellia_enc_blk(ctx, dst, src, false);
 }
 
-static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
-					const u8 *src)
+static inline void camellia_enc_blk_xor(const void *ctx, u8 *dst, const u8 *src)
 {
 	__camellia_enc_blk(ctx, dst, src, true);
 }
 
-static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+static inline void camellia_enc_blk_2way(const void *ctx, u8 *dst,
 					 const u8 *src)
 {
 	__camellia_enc_blk_2way(ctx, dst, src, false);
 }
 
-static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
+static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst,
 					     const u8 *src)
 {
 	__camellia_enc_blk_2way(ctx, dst, src, true);
 }
 
 /* glue helpers */
-extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src);
-extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
+extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src);
+extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src,
 			       le128 *iv);
-extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
+extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src,
 				    le128 *iv);
 
-extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
+extern void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src,
+			     le128 *iv);
+extern void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src,
+			     le128 *iv);
 
 #endif /* ASM_X86_CAMELLIA_H */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 8d4a8e1226ee..777c0f63418c 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -11,18 +11,13 @@
 #include <asm/fpu/api.h>
 #include <crypto/b128ops.h>
 
-typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
-typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
-typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
+typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src);
+typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src);
+typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src,
 				       le128 *iv);
-typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src,
+typedef void (*common_glue_xts_func_t)(const void *ctx, u8 *dst, const u8 *src,
 				       le128 *iv);
 
-#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
-#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
-#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
-#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn))
-
 struct common_glue_func_entry {
 	unsigned int num_blocks; /* number of blocks that @fn will process */
 	union {
@@ -116,7 +111,8 @@ extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			       common_glue_func_t tweak_fn, void *tweak_ctx,
 			       void *crypt_ctx, bool decrypt);
 
-extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
-				      le128 *iv, common_glue_func_t fn);
+extern void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst,
+				      const u8 *src, le128 *iv,
+				      common_glue_func_t fn);
 
 #endif /* _CRYPTO_GLUE_HELPER_H */
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
index db7c9cc32234..251c2c89d7cf 100644
--- a/arch/x86/include/asm/crypto/serpent-avx.h
+++ b/arch/x86/include/asm/crypto/serpent-avx.h
@@ -15,26 +15,26 @@ struct serpent_xts_ctx {
 	struct serpent_ctx crypt_ctx;
 };
 
-asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst,
 					 const u8 *src);
-asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst,
 					 const u8 *src);
 
-asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst,
 					 const u8 *src);
-asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
-				     const u8 *src, le128 *iv);
+asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src,
+				     le128 *iv);
 
-asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst,
 					 const u8 *src, le128 *iv);
-asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst,
 					 const u8 *src, le128 *iv);
 
-extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
+extern void __serpent_crypt_ctr(const void *ctx, u8 *dst, const u8 *src,
 				le128 *iv);
 
-extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
+extern void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv);
+extern void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv);
 
 extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			      unsigned int keylen);
diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h b/arch/x86/include/asm/crypto/serpent-sse2.h
index 1a345e8a7496..860ca248914b 100644
--- a/arch/x86/include/asm/crypto/serpent-sse2.h
+++ b/arch/x86/include/asm/crypto/serpent-sse2.h
@@ -9,25 +9,23 @@
 
 #define SERPENT_PARALLEL_BLOCKS 4
 
-asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void __serpent_enc_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
 				       const u8 *src, bool xor);
-asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_dec_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
 				     const u8 *src);
 
-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-					const u8 *src)
+static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src)
 {
 	__serpent_enc_blk_4way(ctx, dst, src, false);
 }
 
-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
-					    const u8 *src)
+static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
+					    u8 *dst, const u8 *src)
 {
 	__serpent_enc_blk_4way(ctx, dst, src, true);
 }
 
-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-					const u8 *src)
+static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src)
 {
 	serpent_dec_blk_4way(ctx, dst, src);
 }
@@ -36,25 +34,23 @@ static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
 
 #define SERPENT_PARALLEL_BLOCKS 8
 
-asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void __serpent_enc_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
 				       const u8 *src, bool xor);
-asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_dec_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
 				     const u8 *src);
 
-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-				   const u8 *src)
+static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src)
 {
 	__serpent_enc_blk_8way(ctx, dst, src, false);
 }
 
-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
-				       const u8 *src)
+static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
+					    u8 *dst, const u8 *src)
 {
 	__serpent_enc_blk_8way(ctx, dst, src, true);
 }
 
-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-				   const u8 *src)
+static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src)
 {
 	serpent_dec_blk_8way(ctx, dst, src);
 }
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
index f618bf272b90..2c377a8042e1 100644
--- a/arch/x86/include/asm/crypto/twofish.h
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -7,22 +7,19 @@
 #include <crypto/b128ops.h>
 
 /* regular block cipher functions from twofish_x86_64 module */
-asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
-				const u8 *src);
-asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
-				const u8 *src);
+asmlinkage void twofish_enc_blk(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(const void *ctx, u8 *dst, const u8 *src);
 
 /* 3-way parallel cipher functions */
-asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-				       const u8 *src, bool xor);
-asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-				     const u8 *src);
+asmlinkage void __twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src,
+				       bool xor);
+asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src);
 
 /* helpers from twofish_x86_64-3way module */
-extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
-extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
+extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src);
+extern void twofish_enc_blk_ctr(const void *ctx, u8 *dst, const u8 *src,
 				le128 *iv);
-extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
+extern void twofish_enc_blk_ctr_3way(const void *ctx, u8 *dst, const u8 *src,
 				     le128 *iv);
 
 #endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index a8f6c809d9b1..5e12c63b47aa 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,9 +6,6 @@ struct dev_archdata {
 #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
-#ifdef CONFIG_STA2X11
-	bool is_sta2x11;
-#endif
 };
 
 #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 41d069fa8f2f..4ea8584682f9 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -16,7 +16,7 @@
 # define DISABLE_SMAP	(1<<(X86_FEATURE_SMAP & 31))
 #endif
 
-#ifdef CONFIG_X86_INTEL_UMIP
+#ifdef CONFIG_X86_UMIP
 # define DISABLE_UMIP	0
 #else
 # define DISABLE_UMIP	(1<<(X86_FEATURE_UMIP & 31))
diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h
deleted file mode 100644
index 1a19251eaac9..000000000000
--- a/arch/x86/include/asm/dma-direct.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_DMA_DIRECT_H
-#define ASM_X86_DMA_DIRECT_H 1
-
-bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
-
-#endif /* ASM_X86_DMA_DIRECT_H */
diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h
new file mode 100644
index 000000000000..af9a14ac8962
--- /dev/null
+++ b/arch/x86/include/asm/doublefault.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_DOUBLEFAULT_H
+#define _ASM_X86_DOUBLEFAULT_H
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
+extern void doublefault_init_cpu_tss(void);
+#else
+static inline void doublefault_init_cpu_tss(void)
+{
+}
+#endif
+
+#endif /* _ASM_X86_DOUBLEFAULT_H */
diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h
index c3aa4b5e49e2..314f75d886d0 100644
--- a/arch/x86/include/asm/e820/types.h
+++ b/arch/x86/include/asm/e820/types.h
@@ -29,6 +29,14 @@ enum e820_type {
 	E820_TYPE_PRAM		= 12,
 
 	/*
+	 * Special-purpose memory is indicated to the system via the
+	 * EFI_MEMORY_SP attribute. Define an e820 translation of this
+	 * memory type for the purpose of reserving this range and
+	 * marking it with the IORES_DESC_SOFT_RESERVED designation.
+	 */
+	E820_TYPE_SOFT_RESERVED	= 0xefffffff,
+
+	/*
 	 * Reserved RAM used by the kernel itself if
 	 * CONFIG_INTEL_TXT=y is enabled, memory of this type
 	 * will be included in the S3 integrity calculation
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 43a82e59c59d..86169a24b0d8 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -8,6 +8,7 @@
 #include <asm/tlb.h>
 #include <asm/nospec-branch.h>
 #include <asm/mmu_context.h>
+#include <linux/build_bug.h>
 
 /*
  * We map the EFI regions needed for runtime services non-contiguously,
@@ -19,13 +20,16 @@
  * This is the main reason why we're doing stable VA mappings for RT
  * services.
  *
- * This flag is used in conjunction with a chicken bit called
- * "efi=old_map" which can be used as a fallback to the old runtime
- * services mapping method in case there's some b0rkage with a
- * particular EFI implementation (haha, it is hard to hold up the
- * sarcasm here...).
+ * SGI UV1 machines are known to be incompatible with this scheme, so we
+ * provide an opt-out for these machines via a DMI quirk that sets the
+ * attribute below.
  */
-#define EFI_OLD_MEMMAP		EFI_ARCH_1
+#define EFI_UV1_MEMMAP         EFI_ARCH_1
+
+static inline bool efi_have_uv1_memmap(void)
+{
+	return IS_ENABLED(CONFIG_X86_UV) && efi_enabled(EFI_UV1_MEMMAP);
+}
 
 #define EFI32_LOADER_SIGNATURE	"EL32"
 #define EFI64_LOADER_SIGNATURE	"EL64"
@@ -34,10 +38,46 @@
 
 #define ARCH_EFI_IRQ_FLAGS_MASK	X86_EFLAGS_IF
 
-#ifdef CONFIG_X86_32
+/*
+ * The EFI services are called through variadic functions in many cases. These
+ * functions are implemented in assembler and support only a fixed number of
+ * arguments. The macros below allows us to check at build time that we don't
+ * try to call them with too many arguments.
+ *
+ * __efi_nargs() will return the number of arguments if it is 7 or less, and
+ * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it
+ * impossible to calculate the exact number of arguments beyond some
+ * pre-defined limit. The maximum number of arguments currently supported by
+ * any of the thunks is 7, so this is good enough for now and can be extended
+ * in the obvious way if we ever need more.
+ */
 
-extern asmlinkage unsigned long efi_call_phys(void *, ...);
+#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__)
+#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__,	\
+	__efi_arg_sentinel(7), __efi_arg_sentinel(6),		\
+	__efi_arg_sentinel(5), __efi_arg_sentinel(4),		\
+	__efi_arg_sentinel(3), __efi_arg_sentinel(2),		\
+	__efi_arg_sentinel(1), __efi_arg_sentinel(0))
+#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, n, ...)	\
+	__take_second_arg(n,					\
+		({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 8; }))
+#define __efi_arg_sentinel(n) , n
 
+/*
+ * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis
+ * represents more than n arguments.
+ */
+
+#define __efi_nargs_check(f, n, ...)					\
+	__efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n)
+#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n)
+#define __efi_nargs_check__(f, p, n) ({					\
+	BUILD_BUG_ON_MSG(						\
+		(p) > (n),						\
+		#f " called with too many arguments (" #p ">" #n ")");	\
+})
+
+#ifdef CONFIG_X86_32
 #define arch_efi_call_virt_setup()					\
 ({									\
 	kernel_fpu_begin();						\
@@ -51,13 +91,7 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...);
 })
 
 
-/*
- * Wrap all the virtual calls in a way that forces the parameters on the stack.
- */
-#define arch_efi_call_virt(p, f, args...)				\
-({									\
-	((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args);	\
-})
+#define arch_efi_call_virt(p, f, args...)	p->f(args)
 
 #define efi_ioremap(addr, size, type, attr)	ioremap_cache(addr, size)
 
@@ -65,9 +99,12 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...);
 
 #define EFI_LOADER_SIGNATURE	"EL64"
 
-extern asmlinkage u64 efi_call(void *fp, ...);
+extern asmlinkage u64 __efi_call(void *fp, ...);
 
-#define efi_call_phys(f, args...)		efi_call((f), args)
+#define efi_call(...) ({						\
+	__efi_nargs_check(efi_call, 7, __VA_ARGS__);			\
+	__efi_call(__VA_ARGS__);					\
+})
 
 /*
  * struct efi_scratch - Scratch space used while switching to/from efi_mm
@@ -85,7 +122,7 @@ struct efi_scratch {
 	kernel_fpu_begin();						\
 	firmware_restrict_branch_speculation_start();			\
 									\
-	if (!efi_enabled(EFI_OLD_MEMMAP))				\
+	if (!efi_have_uv1_memmap())					\
 		efi_switch_mm(&efi_mm);					\
 })
 
@@ -94,7 +131,7 @@ struct efi_scratch {
 
 #define arch_efi_call_virt_teardown()					\
 ({									\
-	if (!efi_enabled(EFI_OLD_MEMMAP))				\
+	if (!efi_have_uv1_memmap())					\
 		efi_switch_mm(efi_scratch.prev_mm);			\
 									\
 	firmware_restrict_branch_speculation_end();			\
@@ -121,8 +158,6 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
 extern struct efi_scratch efi_scratch;
 extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int __init efi_memblock_x86_reserve_range(void);
-extern pgd_t * __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
 extern void __init efi_print_memmap(void);
 extern void __init efi_memory_uc(u64 addr, unsigned long size);
 extern void __init efi_map_region(efi_memory_desc_t *md);
@@ -140,7 +175,8 @@ extern void efi_delete_dummy_variable(void);
 extern void efi_switch_mm(struct mm_struct *mm);
 extern void efi_recover_from_page_fault(unsigned long phys_addr);
 extern void efi_free_boot_services(void);
-extern void efi_reserve_boot_services(void);
+extern pgd_t * __init efi_uv1_memmap_phys_prolog(void);
+extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd);
 
 struct efi_setup_data {
 	u64 fw_vendor;
@@ -153,97 +189,150 @@ struct efi_setup_data {
 extern u64 efi_setup;
 
 #ifdef CONFIG_EFI
+extern efi_status_t __efi64_thunk(u32, ...);
 
-static inline bool efi_is_native(void)
+#define efi64_thunk(...) ({						\
+	__efi_nargs_check(efi64_thunk, 6, __VA_ARGS__);			\
+	__efi64_thunk(__VA_ARGS__);					\
+})
+
+static inline bool efi_is_mixed(void)
 {
-	return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
+	if (!IS_ENABLED(CONFIG_EFI_MIXED))
+		return false;
+	return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT);
 }
 
 static inline bool efi_runtime_supported(void)
 {
-	if (efi_is_native())
+	if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT))
 		return true;
 
-	if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP))
-		return true;
-
-	return false;
+	return IS_ENABLED(CONFIG_EFI_MIXED);
 }
 
 extern void parse_efi_setup(u64 phys_addr, u32 data_len);
 
 extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
 
-#ifdef CONFIG_EFI_MIXED
 extern void efi_thunk_runtime_setup(void);
-extern efi_status_t efi_thunk_set_virtual_address_map(
-	void *phys_set_virtual_address_map,
-	unsigned long memory_map_size,
-	unsigned long descriptor_size,
-	u32 descriptor_version,
-	efi_memory_desc_t *virtual_map);
-#else
-static inline void efi_thunk_runtime_setup(void) {}
-static inline efi_status_t efi_thunk_set_virtual_address_map(
-	void *phys_set_virtual_address_map,
-	unsigned long memory_map_size,
-	unsigned long descriptor_size,
-	u32 descriptor_version,
-	efi_memory_desc_t *virtual_map)
-{
-	return EFI_SUCCESS;
-}
-#endif /* CONFIG_EFI_MIXED */
-
+efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
+					 unsigned long descriptor_size,
+					 u32 descriptor_version,
+					 efi_memory_desc_t *virtual_map);
 
 /* arch specific definitions used by the stub code */
 
-struct efi_config {
-	u64 image_handle;
-	u64 table;
-	u64 runtime_services;
-	u64 boot_services;
-	u64 text_output;
-	efi_status_t (*call)(unsigned long, ...);
-	bool is64;
-} __packed;
-
-__pure const struct efi_config *__efi_early(void);
+__attribute_const__ bool efi_is_64bit(void);
 
-static inline bool efi_is_64bit(void)
+static inline bool efi_is_native(void)
 {
 	if (!IS_ENABLED(CONFIG_X86_64))
-		return false;
-
+		return true;
 	if (!IS_ENABLED(CONFIG_EFI_MIXED))
 		return true;
+	return efi_is_64bit();
+}
 
-	return __efi_early()->is64;
+#define efi_mixed_mode_cast(attr)					\
+	__builtin_choose_expr(						\
+		__builtin_types_compatible_p(u32, __typeof__(attr)),	\
+			(unsigned long)(attr), (attr))
+
+#define efi_table_attr(inst, attr)					\
+	(efi_is_native()						\
+		? inst->attr						\
+		: (__typeof__(inst->attr))				\
+			efi_mixed_mode_cast(inst->mixed_mode.attr))
+
+/*
+ * The following macros allow translating arguments if necessary from native to
+ * mixed mode. The use case for this is to initialize the upper 32 bits of
+ * output parameters, and where the 32-bit method requires a 64-bit argument,
+ * which must be split up into two arguments to be thunked properly.
+ *
+ * As examples, the AllocatePool boot service returns the address of the
+ * allocation, but it will not set the high 32 bits of the address. To ensure
+ * that the full 64-bit address is initialized, we zero-init the address before
+ * calling the thunk.
+ *
+ * The FreePages boot service takes a 64-bit physical address even in 32-bit
+ * mode. For the thunk to work correctly, a native 64-bit call of
+ * 	free_pages(addr, size)
+ * must be translated to
+ * 	efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size)
+ * so that the two 32-bit halves of addr get pushed onto the stack separately.
+ */
+
+static inline void *efi64_zero_upper(void *p)
+{
+	((u32 *)p)[1] = 0;
+	return p;
 }
 
-#define efi_table_attr(table, attr, instance)				\
-	(efi_is_64bit() ?						\
-		((table##_64_t *)(unsigned long)instance)->attr :	\
-		((table##_32_t *)(unsigned long)instance)->attr)
+#define __efi64_argmap_free_pages(addr, size)				\
+	((addr), 0, (size))
+
+#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver)	\
+	((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver))
+
+#define __efi64_argmap_allocate_pool(type, size, buffer)		\
+	((type), (size), efi64_zero_upper(buffer))
+
+#define __efi64_argmap_handle_protocol(handle, protocol, interface)	\
+	((handle), (protocol), efi64_zero_upper(interface))
+
+#define __efi64_argmap_locate_protocol(protocol, reg, interface)	\
+	((protocol), (reg), efi64_zero_upper(interface))
 
-#define efi_call_proto(protocol, f, instance, ...)			\
-	__efi_early()->call(efi_table_attr(protocol, f, instance),	\
-		instance, ##__VA_ARGS__)
+/* PCI I/O */
+#define __efi64_argmap_get_location(protocol, seg, bus, dev, func)	\
+	((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus),	\
+	 efi64_zero_upper(dev), efi64_zero_upper(func))
 
-#define efi_call_early(f, ...)						\
-	__efi_early()->call(efi_table_attr(efi_boot_services, f,	\
-		__efi_early()->boot_services), __VA_ARGS__)
+/*
+ * The macros below handle the plumbing for the argument mapping. To add a
+ * mapping for a specific EFI method, simply define a macro
+ * __efi64_argmap_<method name>, following the examples above.
+ */
+
+#define __efi64_thunk_map(inst, func, ...)				\
+	efi64_thunk(inst->mixed_mode.func,				\
+		__efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__),	\
+			       (__VA_ARGS__)))
+
+#define __efi64_argmap(mapped, args)					\
+	__PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args)
+#define __efi64_argmap__0(mapped, args) __efi_eval mapped
+#define __efi64_argmap__1(mapped, args) __efi_eval args
+
+#define __efi_eat(...)
+#define __efi_eval(...) __VA_ARGS__
 
-#define __efi_call_early(f, ...)					\
-	__efi_early()->call((unsigned long)f, __VA_ARGS__);
+/* The three macros below handle dispatching via the thunk if needed */
 
-#define efi_call_runtime(f, ...)					\
-	__efi_early()->call(efi_table_attr(efi_runtime_services, f,	\
-		__efi_early()->runtime_services), __VA_ARGS__)
+#define efi_call_proto(inst, func, ...)					\
+	(efi_is_native()						\
+		? inst->func(inst, ##__VA_ARGS__)			\
+		: __efi64_thunk_map(inst, func, inst, ##__VA_ARGS__))
+
+#define efi_bs_call(func, ...)						\
+	(efi_is_native()						\
+		? efi_system_table()->boottime->func(__VA_ARGS__)	\
+		: __efi64_thunk_map(efi_table_attr(efi_system_table(),	\
+				boottime), func, __VA_ARGS__))
+
+#define efi_rt_call(func, ...)						\
+	(efi_is_native()						\
+		? efi_system_table()->runtime->func(__VA_ARGS__)	\
+		: __efi64_thunk_map(efi_table_attr(efi_system_table(),	\
+				runtime), func, __VA_ARGS__))
 
 extern bool efi_reboot_required(void);
 extern bool efi_is_table_address(unsigned long phys_addr);
 
+extern void efi_find_mirror(void);
+extern void efi_reserve_boot_services(void);
 #else
 static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
 static inline bool efi_reboot_required(void)
@@ -254,6 +343,20 @@ static inline  bool efi_is_table_address(unsigned long phys_addr)
 {
 	return false;
 }
+static inline void efi_find_mirror(void)
+{
+}
+static inline void efi_reserve_boot_services(void)
+{
+}
 #endif /* CONFIG_EFI */
 
+#ifdef CONFIG_EFI_FAKE_MEMMAP
+extern void __init efi_fake_memmap_early(void);
+#else
+static inline void efi_fake_memmap_early(void)
+{
+}
+#endif
+
 #endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/include/asm/emulate_prefix.h b/arch/x86/include/asm/emulate_prefix.h
new file mode 100644
index 000000000000..70f5b98a5286
--- /dev/null
+++ b/arch/x86/include/asm/emulate_prefix.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_EMULATE_PREFIX_H
+#define _ASM_X86_EMULATE_PREFIX_H
+
+/*
+ * Virt escape sequences to trigger instruction emulation;
+ * ideally these would decode to 'whole' instruction and not destroy
+ * the instruction stream; sadly this is not true for the 'kvm' one :/
+ */
+
+#define __XEN_EMULATE_PREFIX  0x0f,0x0b,0x78,0x65,0x6e  /* ud2 ; .ascii "xen" */
+#define __KVM_EMULATE_PREFIX  0x0f,0x0b,0x6b,0x76,0x6d	/* ud2 ; .ascii "kvm" */
+
+#endif
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 0c47aa82e2e2..28183ee3cc42 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -156,7 +156,7 @@ extern pte_t *kmap_pte;
 extern pte_t *pkmap_page_table;
 
 void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
-void native_set_fixmap(enum fixed_addresses idx,
+void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
 		       phys_addr_t phys, pgprot_t flags);
 
 #ifndef CONFIG_PARAVIRT_XXL
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 4c95c365058a..44c48e34d799 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -509,7 +509,7 @@ static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
 
 static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
 {
-	return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
+	return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
 }
 
 /*
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index c38a66661576..85be2f506272 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -28,14 +28,25 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 	return addr;
 }
 
+/*
+ * When a ftrace registered caller is tracing a function that is
+ * also set by a register_ftrace_direct() call, it needs to be
+ * differentiated in the ftrace_caller trampoline. To do this, we
+ * place the direct caller in the ORIG_AX part of pt_regs. This
+ * tells the ftrace_caller that there's a direct caller.
+ */
+static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+{
+	/* Emulate a call */
+	regs->orig_ax = addr;
+}
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 struct dyn_arch_ftrace {
 	/* No extra data needed for x86 */
 };
 
-int ftrace_int3_handler(struct pt_regs *regs);
-
 #define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
 
 #endif /*  CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 7741e211f7f5..5f10f7f2098d 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -86,6 +86,8 @@
 #define HV_X64_ACCESS_FREQUENCY_MSRS		BIT(11)
 /* AccessReenlightenmentControls privilege */
 #define HV_X64_ACCESS_REENLIGHTENMENT		BIT(13)
+/* AccessTscInvariantControls privilege */
+#define HV_X64_ACCESS_TSC_INVARIANT		BIT(15)
 
 /*
  * Feature identification: indicates which flags were specified at partition
@@ -278,6 +280,9 @@
 #define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107
 #define HV_X64_MSR_TSC_EMULATION_STATUS		0x40000108
 
+/* TSC invariant control */
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL	0x40000118
+
 /*
  * Declare the MSR used to setup pages used to communicate with the hypervisor.
  */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 154f27be8bfc..5c1ae3eff9d4 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -45,6 +45,7 @@ struct insn {
 		struct insn_field immediate2;	/* for 64bit imm or seg16 */
 	};
 
+	int	emulate_prefix_size;
 	insn_attr_t attr;
 	unsigned char opnd_bytes;
 	unsigned char addr_bytes;
@@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn)
 	return (insn->vex_prefix.nbytes == 4);
 }
 
+static inline int insn_has_emulate_prefix(struct insn *insn)
+{
+	return !!insn->emulate_prefix_size;
+}
+
 /* Ensure this instruction is decoded completely */
 static inline int insn_complete(struct insn *insn)
 {
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index c606c0b70738..4981c293f926 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -111,6 +111,7 @@
 
 #define INTEL_FAM6_ATOM_TREMONT_D	0x86 /* Jacobsville */
 #define INTEL_FAM6_ATOM_TREMONT		0x96 /* Elkhart Lake */
+#define INTEL_FAM6_ATOM_TREMONT_L	0x9C /* Jasper Lake */
 
 /* Xeon Phi */
 
diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h
index 9e7adcdbe031..e6da1ce26256 100644
--- a/arch/x86/include/asm/intel_pmc_ipc.h
+++ b/arch/x86/include/asm/intel_pmc_ipc.h
@@ -31,30 +31,13 @@
 
 #if IS_ENABLED(CONFIG_INTEL_PMC_IPC)
 
-int intel_pmc_ipc_simple_command(int cmd, int sub);
-int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
-		u32 *out, u32 outlen, u32 dptr, u32 sptr);
 int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
 		u32 *out, u32 outlen);
 int intel_pmc_s0ix_counter_read(u64 *data);
-int intel_pmc_gcr_read(u32 offset, u32 *data);
 int intel_pmc_gcr_read64(u32 offset, u64 *data);
-int intel_pmc_gcr_write(u32 offset, u32 data);
-int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val);
 
 #else
 
-static inline int intel_pmc_ipc_simple_command(int cmd, int sub)
-{
-	return -EINVAL;
-}
-
-static inline int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
-		u32 *out, u32 outlen, u32 dptr, u32 sptr)
-{
-	return -EINVAL;
-}
-
 static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
 		u32 *out, u32 outlen)
 {
@@ -66,26 +49,11 @@ static inline int intel_pmc_s0ix_counter_read(u64 *data)
 	return -EINVAL;
 }
 
-static inline int intel_pmc_gcr_read(u32 offset, u32 *data)
-{
-	return -EINVAL;
-}
-
 static inline int intel_pmc_gcr_read64(u32 offset, u64 *data)
 {
 	return -EINVAL;
 }
 
-static inline int intel_pmc_gcr_write(u32 offset, u32 data)
-{
-	return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val)
-{
-	return -EINVAL;
-}
-
 #endif /*CONFIG_INTEL_PMC_IPC*/
 
 #endif
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
index 4a8c6e817398..2a1442ba6e78 100644
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ b/arch/x86/include/asm/intel_scu_ipc.h
@@ -22,24 +22,12 @@
 /* Read single register */
 int intel_scu_ipc_ioread8(u16 addr, u8 *data);
 
-/* Read two sequential registers */
-int intel_scu_ipc_ioread16(u16 addr, u16 *data);
-
-/* Read four sequential registers */
-int intel_scu_ipc_ioread32(u16 addr, u32 *data);
-
 /* Read a vector */
 int intel_scu_ipc_readv(u16 *addr, u8 *data, int len);
 
 /* Write single register */
 int intel_scu_ipc_iowrite8(u16 addr, u8 data);
 
-/* Write two sequential registers */
-int intel_scu_ipc_iowrite16(u16 addr, u16 data);
-
-/* Write four sequential registers */
-int intel_scu_ipc_iowrite32(u16 addr, u32 data);
-
 /* Write a vector */
 int intel_scu_ipc_writev(u16 *addr, u8 *data, int len);
 
@@ -50,14 +38,6 @@ int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask);
 int intel_scu_ipc_simple_command(int cmd, int sub);
 int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
 			  u32 *out, int outlen);
-int intel_scu_ipc_raw_command(int cmd, int sub, u8 *in, int inlen,
-			      u32 *out, int outlen, u32 dptr, u32 sptr);
-
-/* I2C control api */
-int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data);
-
-/* Update FW version */
-int intel_scu_ipc_fw_update(u8 *buffer, u32 length);
 
 extern struct blocking_notifier_head intel_scu_notifier;
 
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
index 214394860632..2f77e31a1283 100644
--- a/arch/x86/include/asm/intel_telemetry.h
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -40,13 +40,10 @@ struct telemetry_evtmap {
 struct telemetry_unit_config {
 	struct telemetry_evtmap *telem_evts;
 	void __iomem *regmap;
-	u32 ssram_base_addr;
 	u8 ssram_evts_used;
 	u8 curr_period;
 	u8 max_period;
 	u8 min_period;
-	u32 ssram_size;
-
 };
 
 struct telemetry_plt_config {
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 6bed97ff6db2..e1aa17a468a8 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -180,8 +180,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
  * The default ioremap() behavior is non-cached; if you need something
  * else, you probably want one of the following.
  */
-extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
-#define ioremap_nocache ioremap_nocache
 extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
 #define ioremap_uc ioremap_uc
 extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
@@ -205,10 +203,7 @@ extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long
  * If the area you are trying to map is a PCI BAR you should have a
  * look at pci_iomap().
  */
-static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
-{
-	return ioremap_nocache(offset, size);
-}
+void __iomem *ioremap(resource_size_t offset, unsigned long size);
 #define ioremap ioremap
 
 extern void iounmap(volatile void __iomem *addr);
@@ -404,4 +399,40 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset,
 extern bool phys_mem_access_encrypted(unsigned long phys_addr,
 				      unsigned long size);
 
+/**
+ * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units
+ * @__dst: destination, in MMIO space (must be 512-bit aligned)
+ * @src: source
+ * @count: number of 512 bits quantities to submit
+ *
+ * Submit data from kernel space to MMIO space, in units of 512 bits at a
+ * time.  Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ *
+ * Warning: Do not use this helper unless your driver has checked that the CPU
+ * instruction is supported on the platform.
+ */
+static inline void iosubmit_cmds512(void __iomem *__dst, const void *src,
+				    size_t count)
+{
+	/*
+	 * Note that this isn't an "on-stack copy", just definition of "dst"
+	 * as a pointer to 64-bytes of stuff that is going to be overwritten.
+	 * In the MOVDIR64B case that may be needed as you can use the
+	 * MOVDIR64B instruction to copy arbitrary memory around. This trick
+	 * lets the compiler know how much gets clobbered.
+	 */
+	volatile struct { char _[64]; } *dst = __dst;
+	const u8 *from = src;
+	const u8 *end = from + count * 64;
+
+	while (from < end) {
+		/* MOVDIR64B [rdx], rax */
+		asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+			     : "=m" (dst)
+			     : "d" (from), "a" (dst));
+		from += 64;
+	}
+}
+
 #endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
new file mode 100644
index 000000000000..02c6ef8f7667
--- /dev/null
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IOBITMAP_H
+#define _ASM_X86_IOBITMAP_H
+
+#include <linux/refcount.h>
+#include <asm/processor.h>
+
+struct io_bitmap {
+	u64		sequence;
+	refcount_t	refcnt;
+	/* The maximum number of bytes to copy so all zero bits are covered */
+	unsigned int	max;
+	unsigned long	bitmap[IO_BITMAP_LONGS];
+};
+
+struct task_struct;
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+void io_bitmap_share(struct task_struct *tsk);
+void io_bitmap_exit(void);
+
+void tss_update_io_bitmap(void);
+#else
+static inline void io_bitmap_share(struct task_struct *tsk) { }
+static inline void io_bitmap_exit(void) { }
+static inline void tss_update_io_bitmap(void) { }
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index b91623d521d9..bf1ed2ddc74b 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,10 +2,28 @@
 #ifndef _ASM_X86_IOMMU_H
 #define _ASM_X86_IOMMU_H
 
+#include <linux/acpi.h>
+
+#include <asm/e820/api.h>
+
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
 
+static inline int __init
+arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
+{
+	u64 start = rmrr->base_address;
+	u64 end = rmrr->end_address + 1;
+
+	if (e820__mapped_all(start, end, E820_TYPE_RESERVED))
+		return 0;
+
+	pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n",
+	       start, end - 1);
+	return -EINVAL;
+}
+
 #endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 75f1e35e7c15..247ab14c6309 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -33,6 +33,7 @@ enum show_regs_mode {
 };
 
 extern void die(const char *, struct pt_regs *,long);
+void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
 extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 5e7d6b46de97..6802c59e8252 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -66,10 +66,6 @@ struct kimage;
 # define KEXEC_ARCH KEXEC_ARCH_X86_64
 #endif
 
-/* Memory to backup during crash kdump */
-#define KEXEC_BACKUP_SRC_START	(0UL)
-#define KEXEC_BACKUP_SRC_END	(640 * 1024UL - 1)	/* 640K */
-
 /*
  * This function is responsible for capturing register states if coming
  * via panic otherwise just fix up the ss and sp if coming via kernel
@@ -154,12 +150,6 @@ struct kimage_arch {
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
-	/* Details of backup region */
-	unsigned long backup_src_start;
-	unsigned long backup_src_sz;
-
-	/* Physical address of backup segment */
-	unsigned long backup_load_addr;
 
 	/* Core ELF header buffer */
 	void *elf_headers;
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 5dc909d9ad81..95b1f053bd96 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -11,12 +11,11 @@
 
 #include <asm-generic/kprobes.h>
 
-#define BREAKPOINT_INSTRUCTION	0xcc
-
 #ifdef CONFIG_KPROBES
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
+#include <asm/text-patching.h>
 #include <asm/insn.h>
 
 #define  __ARCH_WANT_KPROBES_INSN_SLOT
@@ -25,10 +24,7 @@ struct pt_regs;
 struct kprobe;
 
 typedef u8 kprobe_opcode_t;
-#define RELATIVEJUMP_OPCODE 0xe9
-#define RELATIVEJUMP_SIZE 5
-#define RELATIVECALL_OPCODE 0xe8
-#define RELATIVE_ADDR_SIZE 4
+
 #define MAX_STACK_SIZE 64
 #define CUR_STACK_SIZE(ADDR) \
 	(current_top_of_stack() - (unsigned long)(ADDR))
@@ -43,11 +39,11 @@ extern __visible kprobe_opcode_t optprobe_template_entry[];
 extern __visible kprobe_opcode_t optprobe_template_val[];
 extern __visible kprobe_opcode_t optprobe_template_call[];
 extern __visible kprobe_opcode_t optprobe_template_end[];
-#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
+#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE)
 #define MAX_OPTINSN_SIZE 				\
 	(((unsigned long)optprobe_template_end -	\
 	  (unsigned long)optprobe_template_entry) +	\
-	 MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
+	 MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE)
 
 extern const int kretprobe_blacklist_size;
 
@@ -73,7 +69,7 @@ struct arch_specific_insn {
 
 struct arch_optimized_insn {
 	/* copy of the original instructions */
-	kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
+	kprobe_opcode_t copied_insn[DISP32_SIZE];
 	/* detour code buffer */
 	kprobe_opcode_t *insn;
 	/* the size of instructions copied to detour code buffer */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4fc61483919a..b79cd6aa4075 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -156,10 +156,8 @@ enum kvm_reg {
 	VCPU_REGS_R15 = __VCPU_REGS_R15,
 #endif
 	VCPU_REGS_RIP,
-	NR_VCPU_REGS
-};
+	NR_VCPU_REGS,
 
-enum kvm_reg_ex {
 	VCPU_EXREG_PDPTR = NR_VCPU_REGS,
 	VCPU_EXREG_CR3,
 	VCPU_EXREG_RFLAGS,
@@ -454,6 +452,11 @@ struct kvm_pmc {
 	u64 eventsel;
 	struct perf_event *perf_event;
 	struct kvm_vcpu *vcpu;
+	/*
+	 * eventsel value for general purpose counters,
+	 * ctrl value for fixed counters.
+	 */
+	u64 current_config;
 };
 
 struct kvm_pmu {
@@ -472,7 +475,21 @@ struct kvm_pmu {
 	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
 	struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
 	struct irq_work irq_work;
-	u64 reprogram_pmi;
+	DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
+	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+
+	/*
+	 * The gate to release perf_events not marked in
+	 * pmc_in_use only once in a vcpu time slice.
+	 */
+	bool need_cleanup;
+
+	/*
+	 * The total number of programmed perf_events and it helps to avoid
+	 * redundant check before cleanup if guest don't use vPMU at all.
+	 */
+	u8 event_count;
 };
 
 struct kvm_pmu_ops;
@@ -565,6 +582,7 @@ struct kvm_vcpu_arch {
 	u64 smbase;
 	u64 smi_count;
 	bool tpr_access_reporting;
+	bool xsaves_enabled;
 	u64 ia32_xss;
 	u64 microcode_version;
 	u64 arch_capabilities;
@@ -1041,7 +1059,6 @@ struct kvm_x86_ops {
 			    struct kvm_segment *var, int seg);
 	void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
 	void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
-	void (*decache_cr3)(struct kvm_vcpu *vcpu);
 	void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
 	void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
 	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -1090,7 +1107,7 @@ struct kvm_x86_ops {
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-	bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
+	bool (*get_enable_apicv)(struct kvm *kvm);
 	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
@@ -1357,6 +1374,7 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
 
 void kvm_enable_efer_bits(u64);
 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
+int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
 int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
@@ -1577,6 +1595,8 @@ bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
 void kvm_make_scan_ioapic_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+				       unsigned long *vcpu_bitmap);
 
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 				     struct kvm_async_pf *work);
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 14caa9d9fb7f..365111789cc6 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -13,10 +13,6 @@
 
 #ifdef __ASSEMBLY__
 
-#define GLOBAL(name)	\
-	.globl name;	\
-	name:
-
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
 #define __ALIGN		.p2align 4, 0x90
 #define __ALIGN_STR	__stringify(__ALIGN)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index dc2d4b206ab7..4359b955e0b7 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -144,7 +144,7 @@ struct mce_log_buffer {
 
 enum mce_notifier_prios {
 	MCE_PRIO_FIRST		= INT_MAX,
-	MCE_PRIO_SRAO		= INT_MAX - 1,
+	MCE_PRIO_UC		= INT_MAX - 1,
 	MCE_PRIO_EXTLOG		= INT_MAX - 2,
 	MCE_PRIO_NFIT		= INT_MAX - 3,
 	MCE_PRIO_EDAC		= INT_MAX - 4,
@@ -290,6 +290,7 @@ extern void apei_mce_report_mem_error(int corrected,
 /* These may be used by multiple smca_hwid_mcatypes */
 enum smca_bank_types {
 	SMCA_LS = 0,	/* Load Store */
+	SMCA_LS_V2,	/* Load Store */
 	SMCA_IF,	/* Instruction Fetch */
 	SMCA_L2_CACHE,	/* L2 Cache */
 	SMCA_DE,	/* Decoder Unit */
diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h
new file mode 100644
index 000000000000..9c2447b3555d
--- /dev/null
+++ b/arch/x86/include/asm/memtype.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_MEMTYPE_H
+#define _ASM_X86_MEMTYPE_H
+
+#include <linux/types.h>
+#include <asm/pgtable_types.h>
+
+extern bool pat_enabled(void);
+extern void pat_disable(const char *reason);
+extern void pat_init(void);
+extern void init_cache_modes(void);
+
+extern int memtype_reserve(u64 start, u64 end,
+		enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
+extern int memtype_free(u64 start, u64 end);
+
+extern int memtype_kernel_map_sync(u64 base, unsigned long size,
+		enum page_cache_mode pcm);
+
+extern int memtype_reserve_io(resource_size_t start, resource_size_t end,
+			enum page_cache_mode *pcm);
+
+extern void memtype_free_io(resource_size_t start, resource_size_t end);
+
+extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
+
+#endif /* _ASM_X86_MEMTYPE_H */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 209492849566..6685e1218959 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -53,6 +53,6 @@ static inline void __init load_ucode_amd_bsp(unsigned int family) {}
 static inline void load_ucode_amd_ap(unsigned int family) {}
 static inline int __init
 save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
-void reload_ucode_amd(void) {}
+static inline void reload_ucode_amd(void) {}
 #endif
 #endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index a3e1c72a3ab9..b538d9ddee9c 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -25,12 +25,14 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
 
 #ifdef CONFIG_PERF_EVENTS
 
+DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key);
 DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
 
 static inline void load_mm_cr4_irqsoff(struct mm_struct *mm)
 {
 	if (static_branch_unlikely(&rdpmc_always_available_key) ||
-	    atomic_read(&mm->context.perf_rdpmc_allowed))
+	    (!static_branch_unlikely(&rdpmc_never_available_key) &&
+	     atomic_read(&mm->context.perf_rdpmc_allowed)))
 		cr4_set_bits_irqsoff(X86_CR4_PCE);
 	else
 		cr4_clear_bits_irqsoff(X86_CR4_PCE);
@@ -66,14 +68,6 @@ struct ldt_struct {
 	int			slot;
 };
 
-/* This is a multiple of PAGE_SIZE. */
-#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
-
-static inline void *ldt_slot_va(int slot)
-{
-	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
-}
-
 /*
  * Used for LDT copy/destruction.
  */
@@ -96,87 +90,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
 static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
 #endif
 
-static inline void load_mm_ldt(struct mm_struct *mm)
-{
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
-	struct ldt_struct *ldt;
-
-	/* READ_ONCE synchronizes with smp_store_release */
-	ldt = READ_ONCE(mm->context.ldt);
-
-	/*
-	 * Any change to mm->context.ldt is followed by an IPI to all
-	 * CPUs with the mm active.  The LDT will not be freed until
-	 * after the IPI is handled by all such CPUs.  This means that,
-	 * if the ldt_struct changes before we return, the values we see
-	 * will be safe, and the new values will be loaded before we run
-	 * any user code.
-	 *
-	 * NB: don't try to convert this to use RCU without extreme care.
-	 * We would still need IRQs off, because we don't want to change
-	 * the local LDT after an IPI loaded a newer value than the one
-	 * that we can see.
-	 */
-
-	if (unlikely(ldt)) {
-		if (static_cpu_has(X86_FEATURE_PTI)) {
-			if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
-				/*
-				 * Whoops -- either the new LDT isn't mapped
-				 * (if slot == -1) or is mapped into a bogus
-				 * slot (if slot > 1).
-				 */
-				clear_LDT();
-				return;
-			}
-
-			/*
-			 * If page table isolation is enabled, ldt->entries
-			 * will not be mapped in the userspace pagetables.
-			 * Tell the CPU to access the LDT through the alias
-			 * at ldt_slot_va(ldt->slot).
-			 */
-			set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
-		} else {
-			set_ldt(ldt->entries, ldt->nr_entries);
-		}
-	} else {
-		clear_LDT();
-	}
+extern void load_mm_ldt(struct mm_struct *mm);
+extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
 #else
+static inline void load_mm_ldt(struct mm_struct *mm)
+{
 	clear_LDT();
-#endif
 }
-
 static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
 {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-	/*
-	 * Load the LDT if either the old or new mm had an LDT.
-	 *
-	 * An mm will never go from having an LDT to not having an LDT.  Two
-	 * mms never share an LDT, so we don't gain anything by checking to
-	 * see whether the LDT changed.  There's also no guarantee that
-	 * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
-	 * then prev->context.ldt will also be non-NULL.
-	 *
-	 * If we really cared, we could optimize the case where prev == next
-	 * and we're exiting lazy mode.  Most of the time, if this happens,
-	 * we don't actually need to reload LDTR, but modify_ldt() is mostly
-	 * used by legacy code and emulators where we don't need this level of
-	 * performance.
-	 *
-	 * This uses | instead of || because it generates better code.
-	 */
-	if (unlikely((unsigned long)prev->context.ldt |
-		     (unsigned long)next->context.ldt))
-		load_mm_ldt(next);
-#endif
-
 	DEBUG_LOCKS_WARN_ON(preemptible());
 }
+#endif
 
-void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 
 /*
  * Init a new mm.  Used on mm copies, like at fork()
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 7948a17febb4..c215d2762488 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -15,6 +15,8 @@ struct mod_arch_specific {
 
 #ifdef CONFIG_X86_64
 /* X86_64 does not define MODULE_PROC_FAMILY */
+#elif defined CONFIG_M486SX
+#define MODULE_PROC_FAMILY "486SX "
 #elif defined CONFIG_M486
 #define MODULE_PROC_FAMILY "486 "
 #elif defined CONFIG_M586
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index f4138aeb4280..6b79515abb82 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -219,6 +219,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
 void __init hyperv_init(void);
 void hyperv_setup_mmu_ops(void);
 void *hv_alloc_hyperv_page(void);
+void *hv_alloc_hyperv_zeroed_page(void);
 void hv_free_hyperv_page(unsigned long addr);
 void hyperv_reenlightenment_intr(struct pt_regs *regs);
 void set_hv_tscchange_cb(void (*cb)(void));
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 6a3124664289..ebe1685e92dd 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -409,6 +409,8 @@
 #define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
 #define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140
 #define MSR_AMD64_OSVW_STATUS		0xc0010141
+#define MSR_AMD_PPIN_CTL		0xc00102f0
+#define MSR_AMD_PPIN			0xc00102f1
 #define MSR_AMD64_LS_CFG		0xc0011020
 #define MSR_AMD64_DC_CFG		0xc0011022
 #define MSR_AMD64_BU_CFG2		0xc001102a
@@ -556,7 +558,14 @@
 #define MSR_IA32_EBL_CR_POWERON		0x0000002a
 #define MSR_EBC_FREQUENCY_ID		0x0000002c
 #define MSR_SMI_COUNT			0x00000034
-#define MSR_IA32_FEATURE_CONTROL        0x0000003a
+
+/* Referred to as IA32_FEATURE_CONTROL in Intel's SDM. */
+#define MSR_IA32_FEAT_CTL		0x0000003a
+#define FEAT_CTL_LOCKED				BIT(0)
+#define FEAT_CTL_VMX_ENABLED_INSIDE_SMX		BIT(1)
+#define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX	BIT(2)
+#define FEAT_CTL_LMCE_ENABLED			BIT(20)
+
 #define MSR_IA32_TSC_ADJUST             0x0000003b
 #define MSR_IA32_BNDCFGS		0x00000d90
 
@@ -564,11 +573,6 @@
 
 #define MSR_IA32_XSS			0x00000da0
 
-#define FEATURE_CONTROL_LOCKED				(1<<0)
-#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX	(1<<1)
-#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX	(1<<2)
-#define FEATURE_CONTROL_LMCE				(1<<20)
-
 #define MSR_IA32_APICBASE		0x0000001b
 #define MSR_IA32_APICBASE_BSP		(1<<8)
 #define MSR_IA32_APICBASE_ENABLE	(1<<11)
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index dbff1456d215..829df26fd7a3 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -24,7 +24,7 @@
 #define _ASM_X86_MTRR_H
 
 #include <uapi/asm/mtrr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
 
 
 /*
@@ -86,7 +86,7 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
 }
 static inline void mtrr_bp_init(void)
 {
-	pat_disable("MTRRs disabled, skipping PAT initialization too.");
+	pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
 }
 
 #define mtrr_ap_init() do {} while (0)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 75ded1d13d98..9d5d949e662e 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -41,7 +41,6 @@ struct nmiaction {
 	struct list_head	list;
 	nmi_handler_t		handler;
 	u64			max_duration;
-	struct irq_work		irq_work;
 	unsigned long		flags;
 	const char		*name;
 };
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 5c24a7b35166..07e95dcb40ad 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -37,7 +37,6 @@
  */
 
 #define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
-#define RSB_FILL_LOOPS		16	/* To avoid underflow */
 
 /*
  * Google experimented with loop-unrolling and this turned out to be
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 69089d46f128..86e7317eb31f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -294,10 +294,6 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
 {
 	PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
 }
-static inline void set_iopl_mask(unsigned mask)
-{
-	PVOP_VCALL1(cpu.set_iopl_mask, mask);
-}
 
 static inline void paravirt_activate_mm(struct mm_struct *prev,
 					struct mm_struct *next)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 70b654f3ffe5..84812964d3dd 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -140,8 +140,6 @@ struct pv_cpu_ops {
 
 	void (*load_sp0)(unsigned long sp0);
 
-	void (*set_iopl_mask)(unsigned mask);
-
 	void (*wbinvd)(void);
 
 	/* cpuid emulation, mostly so that caps bits can be disabled */
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
deleted file mode 100644
index 92015c65fa2a..000000000000
--- a/arch/x86/include/asm/pat.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PAT_H
-#define _ASM_X86_PAT_H
-
-#include <linux/types.h>
-#include <asm/pgtable_types.h>
-
-bool pat_enabled(void);
-void pat_disable(const char *reason);
-extern void pat_init(void);
-extern void init_cache_modes(void);
-
-extern int reserve_memtype(u64 start, u64 end,
-		enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
-extern int free_memtype(u64 start, u64 end);
-
-extern int kernel_map_sync_memtype(u64 base, unsigned long size,
-		enum page_cache_mode pcm);
-
-int io_reserve_memtype(resource_size_t start, resource_size_t end,
-			enum page_cache_mode *pcm);
-
-void io_free_memtype(resource_size_t start, resource_size_t end);
-
-bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
-
-#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index e662f987dfa2..c1fdd43fe187 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -9,11 +9,9 @@
 #include <linux/scatterlist.h>
 #include <linux/numa.h>
 #include <asm/io.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
 #include <asm/x86_init.h>
 
-#ifdef __KERNEL__
-
 struct pci_sysdata {
 	int		domain;		/* PCI domain */
 	int		node;		/* NUMA node */
@@ -118,11 +116,6 @@ void native_restore_msi_irqs(struct pci_dev *dev);
 #define native_setup_msi_irqs		NULL
 #define native_teardown_msi_irq		NULL
 #endif
-#endif  /* __KERNEL__ */
-
-#ifdef CONFIG_X86_64
-#include <asm/pci_64.h>
-#endif
 
 /* generic pci stuff */
 #include <asm-generic/pci.h>
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
deleted file mode 100644
index f5411de0ae11..000000000000
--- a/arch/x86/include/asm/pci_64.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PCI_64_H
-#define _ASM_X86_PCI_64_H
-
-#ifdef __KERNEL__
-
-#ifdef CONFIG_CALGARY_IOMMU
-static inline void *pci_iommu(struct pci_bus *bus)
-{
-	struct pci_sysdata *sd = bus->sysdata;
-	return sd->iommu;
-}
-
-static inline void set_pci_iommu(struct pci_bus *bus, void *val)
-{
-	struct pci_sysdata *sd = bus->sysdata;
-	sd->iommu = val;
-}
-#endif /* CONFIG_CALGARY_IOMMU */
-
-extern int (*pci_config_read)(int seg, int bus, int dev, int fn,
-			      int reg, int len, u32 *value);
-extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
-			       int reg, int len, u32 value);
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index ee26e9215f18..29964b0e1075 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -322,17 +322,10 @@ struct perf_guest_switch_msr {
 	u64 host, guest;
 };
 
-extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
 extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
 extern void perf_check_microcode(void);
 extern int x86_perf_rdpmc_index(struct perf_event *event);
 #else
-static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
-{
-	*nr = 0;
-	return NULL;
-}
-
 static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
 {
 	memset(cap, 0, sizeof(*cap));
@@ -342,8 +335,23 @@ static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_check_microcode(void) { }
 #endif
 
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
+extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+#else
+static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+{
+	*nr = 0;
+	return NULL;
+}
+#endif
+
 #ifdef CONFIG_CPU_SUP_INTEL
  extern void intel_pt_handle_vmx(int on);
+#else
+static inline void intel_pt_handle_vmx(int on)
+{
+
+}
 #endif
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index e3633795fb22..5afb5e0fe903 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -36,39 +36,41 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
 
 #define pmd_read_atomic pmd_read_atomic
 /*
- * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
- * a "*pmdp" dereference done by gcc. Problem is, in certain places
- * where pte_offset_map_lock is called, concurrent page faults are
+ * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with
+ * a "*pmdp" dereference done by GCC. Problem is, in certain places
+ * where pte_offset_map_lock() is called, concurrent page faults are
  * allowed, if the mmap_sem is hold for reading. An example is mincore
  * vs page faults vs MADV_DONTNEED. On the page fault side
- * pmd_populate rightfully does a set_64bit, but if we're reading the
+ * pmd_populate() rightfully does a set_64bit(), but if we're reading the
  * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
- * because gcc will not read the 64bit of the pmd atomically. To fix
- * this all places running pmd_offset_map_lock() while holding the
+ * because GCC will not read the 64-bit value of the pmd atomically.
+ *
+ * To fix this all places running pte_offset_map_lock() while holding the
  * mmap_sem in read mode, shall read the pmdp pointer using this
- * function to know if the pmd is null nor not, and in turn to know if
- * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
+ * function to know if the pmd is null or not, and in turn to know if
+ * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd
  * operations.
  *
- * Without THP if the mmap_sem is hold for reading, the pmd can only
- * transition from null to not null while pmd_read_atomic runs. So
+ * Without THP if the mmap_sem is held for reading, the pmd can only
+ * transition from null to not null while pmd_read_atomic() runs. So
  * we can always return atomic pmd values with this function.
  *
- * With THP if the mmap_sem is hold for reading, the pmd can become
+ * With THP if the mmap_sem is held for reading, the pmd can become
  * trans_huge or none or point to a pte (and in turn become "stable")
- * at any time under pmd_read_atomic. We could read it really
- * atomically here with a atomic64_read for the THP enabled case (and
+ * at any time under pmd_read_atomic(). We could read it truly
+ * atomically here with an atomic64_read() for the THP enabled case (and
  * it would be a whole lot simpler), but to avoid using cmpxchg8b we
  * only return an atomic pmdval if the low part of the pmdval is later
- * found stable (i.e. pointing to a pte). And we're returning a none
- * pmdval if the low part of the pmd is none. In some cases the high
- * and low part of the pmdval returned may not be consistent if THP is
- * enabled (the low part may point to previously mapped hugepage,
- * while the high part may point to a more recently mapped hugepage),
- * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part
- * of the pmd to be read atomically to decide if the pmd is unstable
- * or not, with the only exception of when the low part of the pmd is
- * zero in which case we return a none pmd.
+ * found to be stable (i.e. pointing to a pte). We are also returning a
+ * 'none' (zero) pmdval if the low part of the pmd is zero.
+ *
+ * In some cases the high and low part of the pmdval returned may not be
+ * consistent if THP is enabled (the low part may point to previously
+ * mapped hugepage, while the high part may point to a more recently
+ * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only
+ * needs the low part of the pmd to be read atomically to decide if the
+ * pmd is unstable or not, with the only exception when the low part
+ * of the pmd is zero, in which case we return a 'none' pmd.
  */
 static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
 {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 0bc530c4eb13..ad97dc155195 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1463,6 +1463,12 @@ static inline bool arch_has_pfn_modify_check(void)
 	return boot_cpu_has_bug(X86_BUG_L1TF);
 }
 
+#define arch_faults_on_old_pte arch_faults_on_old_pte
+static inline bool arch_faults_on_old_pte(void)
+{
+	return false;
+}
+
 #include <asm-generic/pgtable.h>
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h
new file mode 100644
index 000000000000..b6355416a15a
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_32_areas.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_X86_PGTABLE_32_AREAS_H
+#define _ASM_X86_PGTABLE_32_AREAS_H
+
+#include <asm/cpu_entry_area.h>
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET	(8 * 1024 * 1024)
+
+#ifndef __ASSEMBLY__
+extern bool __vmalloc_start_set; /* set once high_memory is set */
+#endif
+
+#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#define CPU_ENTRY_AREA_PAGES		(NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))
+
+/* The +1 is for the readonly IDT page: */
+#define CPU_ENTRY_AREA_BASE	\
+	((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
+
+#define LDT_BASE_ADDR		\
+	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+
+#define LDT_END_ADDR		(LDT_BASE_ADDR + PMD_SIZE)
+
+#define PKMAP_BASE		\
+	((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
+#else
+# define VMALLOC_END	(LDT_BASE_ADDR - 2 * PAGE_SIZE)
+#endif
+
+#define MODULES_VADDR	VMALLOC_START
+#define MODULES_END	VMALLOC_END
+#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
+
+#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+
+#endif /* _ASM_X86_PGTABLE_32_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index b0bc0fff5f1f..5356a46b0373 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PGTABLE_32_DEFS_H
-#define _ASM_X86_PGTABLE_32_DEFS_H
+#ifndef _ASM_X86_PGTABLE_32_TYPES_H
+#define _ASM_X86_PGTABLE_32_TYPES_H
 
 /*
  * The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -20,54 +20,4 @@
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
-/* Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 8MB value just means that there will be a 8MB "hole" after the
- * physical memory until the kernel virtual memory starts.  That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- */
-#define VMALLOC_OFFSET	(8 * 1024 * 1024)
-
-#ifndef __ASSEMBLY__
-extern bool __vmalloc_start_set; /* set once high_memory is set */
-#endif
-
-#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
-
-/*
- * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
- * to avoid include recursion hell
- */
-#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 40)
-
-#define CPU_ENTRY_AREA_BASE						\
-	((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1))   \
-	 & PMD_MASK)
-
-#define LDT_BASE_ADDR		\
-	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
-
-#define LDT_END_ADDR		(LDT_BASE_ADDR + PMD_SIZE)
-
-#define PKMAP_BASE		\
-	((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
-
-#ifdef CONFIG_HIGHMEM
-# define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
-#else
-# define VMALLOC_END	(LDT_BASE_ADDR - 2 * PAGE_SIZE)
-#endif
-
-#define MODULES_VADDR	VMALLOC_START
-#define MODULES_END	VMALLOC_END
-#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
-
-#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
-
-#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
+#endif /* _ASM_X86_PGTABLE_32_TYPES_H */
diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h
new file mode 100644
index 000000000000..d34cce1b995c
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_areas.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_X86_PGTABLE_AREAS_H
+#define _ASM_X86_PGTABLE_AREAS_H
+
+#ifdef CONFIG_X86_32
+# include <asm/pgtable_32_areas.h>
+#endif
+
+/* Single page reserved for the readonly IDT mapping: */
+#define CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE		(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
+
+#endif /* _ASM_X86_PGTABLE_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b5e49e6bac63..ea7400726d7a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -110,11 +110,6 @@
 
 #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
 
-#define _PAGE_TABLE_NOENC	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
-				 _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE_NOENC	(_PAGE_PRESENT | _PAGE_RW |		\
-				 _PAGE_ACCESSED | _PAGE_DIRTY)
-
 /*
  * Set of bits not changed in pte_modify.  The pte's
  * protection key is treated like _PAGE_RW, for
@@ -136,80 +131,93 @@
  */
 #ifndef __ASSEMBLY__
 enum page_cache_mode {
-	_PAGE_CACHE_MODE_WB = 0,
-	_PAGE_CACHE_MODE_WC = 1,
+	_PAGE_CACHE_MODE_WB       = 0,
+	_PAGE_CACHE_MODE_WC       = 1,
 	_PAGE_CACHE_MODE_UC_MINUS = 2,
-	_PAGE_CACHE_MODE_UC = 3,
-	_PAGE_CACHE_MODE_WT = 4,
-	_PAGE_CACHE_MODE_WP = 5,
-	_PAGE_CACHE_MODE_NUM = 8
+	_PAGE_CACHE_MODE_UC       = 3,
+	_PAGE_CACHE_MODE_WT       = 4,
+	_PAGE_CACHE_MODE_WP       = 5,
+
+	_PAGE_CACHE_MODE_NUM      = 8
 };
 #endif
 
-#define _PAGE_CACHE_MASK	(_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
-#define _PAGE_NOCACHE		(cachemode2protval(_PAGE_CACHE_MODE_UC))
-#define _PAGE_CACHE_WP		(cachemode2protval(_PAGE_CACHE_MODE_WP))
+#define _PAGE_ENC		(_AT(pteval_t, sme_me_mask))
 
-#define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
-				 _PAGE_ACCESSED | _PAGE_NX)
-
-#define PAGE_SHARED_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_RW |	\
-					 _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
-					 _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY_EXEC		__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
-					 _PAGE_ACCESSED)
-#define PAGE_COPY		PAGE_COPY_NOEXEC
-#define PAGE_READONLY		__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
-					 _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
-					 _PAGE_ACCESSED)
-
-#define __PAGE_KERNEL_EXEC						\
-	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
-#define __PAGE_KERNEL		(__PAGE_KERNEL_EXEC | _PAGE_NX)
-
-#define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
-#define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VVAR		(__PAGE_KERNEL_RO | _PAGE_USER)
-#define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
-#define __PAGE_KERNEL_WP		(__PAGE_KERNEL | _PAGE_CACHE_WP)
-
-#define __PAGE_KERNEL_IO		(__PAGE_KERNEL)
-#define __PAGE_KERNEL_IO_NOCACHE	(__PAGE_KERNEL_NOCACHE)
+#define _PAGE_CACHE_MASK	(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
 
-#ifndef __ASSEMBLY__
+#define _PAGE_NOCACHE		(cachemode2protval(_PAGE_CACHE_MODE_UC))
+#define _PAGE_CACHE_WP		(cachemode2protval(_PAGE_CACHE_MODE_WP))
 
-#define _PAGE_ENC	(_AT(pteval_t, sme_me_mask))
+#define __PP _PAGE_PRESENT
+#define __RW _PAGE_RW
+#define _USR _PAGE_USER
+#define ___A _PAGE_ACCESSED
+#define ___D _PAGE_DIRTY
+#define ___G _PAGE_GLOBAL
+#define __NX _PAGE_NX
+
+#define _ENC _PAGE_ENC
+#define __WP _PAGE_CACHE_WP
+#define __NC _PAGE_NOCACHE
+#define _PSE _PAGE_PSE
+
+#define pgprot_val(x)		((x).pgprot)
+#define __pgprot(x)		((pgprot_t) { (x) } )
+#define __pg(x)			__pgprot(x)
+
+#define _PAGE_PAT_LARGE		(_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
+
+#define PAGE_NONE	     __pg(   0|   0|   0|___A|   0|   0|   0|___G)
+#define PAGE_SHARED	     __pg(__PP|__RW|_USR|___A|__NX|   0|   0|   0)
+#define PAGE_SHARED_EXEC     __pg(__PP|__RW|_USR|___A|   0|   0|   0|   0)
+#define PAGE_COPY_NOEXEC     __pg(__PP|   0|_USR|___A|__NX|   0|   0|   0)
+#define PAGE_COPY_EXEC	     __pg(__PP|   0|_USR|___A|   0|   0|   0|   0)
+#define PAGE_COPY	     __pg(__PP|   0|_USR|___A|__NX|   0|   0|   0)
+#define PAGE_READONLY	     __pg(__PP|   0|_USR|___A|__NX|   0|   0|   0)
+#define PAGE_READONLY_EXEC   __pg(__PP|   0|_USR|___A|   0|   0|   0|   0)
+
+#define __PAGE_KERNEL		 (__PP|__RW|   0|___A|__NX|___D|   0|___G)
+#define __PAGE_KERNEL_EXEC	 (__PP|__RW|   0|___A|   0|___D|   0|___G)
+#define _KERNPG_TABLE_NOENC	 (__PP|__RW|   0|___A|   0|___D|   0|   0)
+#define _KERNPG_TABLE		 (__PP|__RW|   0|___A|   0|___D|   0|   0| _ENC)
+#define _PAGE_TABLE_NOENC	 (__PP|__RW|_USR|___A|   0|___D|   0|   0)
+#define _PAGE_TABLE		 (__PP|__RW|_USR|___A|   0|___D|   0|   0| _ENC)
+#define __PAGE_KERNEL_RO	 (__PP|   0|   0|___A|__NX|___D|   0|___G)
+#define __PAGE_KERNEL_RX	 (__PP|   0|   0|___A|   0|___D|   0|___G)
+#define __PAGE_KERNEL_NOCACHE	 (__PP|__RW|   0|___A|__NX|___D|   0|___G| __NC)
+#define __PAGE_KERNEL_VVAR	 (__PP|   0|_USR|___A|__NX|___D|   0|___G)
+#define __PAGE_KERNEL_LARGE	 (__PP|__RW|   0|___A|__NX|___D|_PSE|___G)
+#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW|   0|___A|   0|___D|_PSE|___G)
+#define __PAGE_KERNEL_WP	 (__PP|__RW|   0|___A|__NX|___D|   0|___G| __WP)
+
+
+#define __PAGE_KERNEL_IO		__PAGE_KERNEL
+#define __PAGE_KERNEL_IO_NOCACHE	__PAGE_KERNEL_NOCACHE
 
-#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |	\
-			 _PAGE_DIRTY | _PAGE_ENC)
-#define _PAGE_TABLE	(_KERNPG_TABLE | _PAGE_USER)
 
-#define __PAGE_KERNEL_ENC	(__PAGE_KERNEL | _PAGE_ENC)
-#define __PAGE_KERNEL_ENC_WP	(__PAGE_KERNEL_WP | _PAGE_ENC)
+#ifndef __ASSEMBLY__
 
-#define __PAGE_KERNEL_NOENC	(__PAGE_KERNEL)
-#define __PAGE_KERNEL_NOENC_WP	(__PAGE_KERNEL_WP)
+#define __PAGE_KERNEL_ENC	(__PAGE_KERNEL    | _ENC)
+#define __PAGE_KERNEL_ENC_WP	(__PAGE_KERNEL_WP | _ENC)
+#define __PAGE_KERNEL_NOENC	(__PAGE_KERNEL    |    0)
+#define __PAGE_KERNEL_NOENC_WP	(__PAGE_KERNEL_WP |    0)
 
-#define default_pgprot(x)	__pgprot((x) & __default_kernel_pte_mask)
+#define __pgprot_mask(x)	__pgprot((x) & __default_kernel_pte_mask)
 
-#define PAGE_KERNEL		default_pgprot(__PAGE_KERNEL | _PAGE_ENC)
-#define PAGE_KERNEL_NOENC	default_pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO		default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC	default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC_NOENC	default_pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX		default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
-#define PAGE_KERNEL_NOCACHE	default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE	default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE_EXEC	default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VVAR	default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
+#define PAGE_KERNEL		__pgprot_mask(__PAGE_KERNEL            | _ENC)
+#define PAGE_KERNEL_NOENC	__pgprot_mask(__PAGE_KERNEL            |    0)
+#define PAGE_KERNEL_RO		__pgprot_mask(__PAGE_KERNEL_RO         | _ENC)
+#define PAGE_KERNEL_EXEC	__pgprot_mask(__PAGE_KERNEL_EXEC       | _ENC)
+#define PAGE_KERNEL_EXEC_NOENC	__pgprot_mask(__PAGE_KERNEL_EXEC       |    0)
+#define PAGE_KERNEL_RX		__pgprot_mask(__PAGE_KERNEL_RX         | _ENC)
+#define PAGE_KERNEL_NOCACHE	__pgprot_mask(__PAGE_KERNEL_NOCACHE    | _ENC)
+#define PAGE_KERNEL_LARGE	__pgprot_mask(__PAGE_KERNEL_LARGE      | _ENC)
+#define PAGE_KERNEL_LARGE_EXEC	__pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
+#define PAGE_KERNEL_VVAR	__pgprot_mask(__PAGE_KERNEL_VVAR       | _ENC)
 
-#define PAGE_KERNEL_IO		default_pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE	default_pgprot(__PAGE_KERNEL_IO_NOCACHE)
+#define PAGE_KERNEL_IO		__pgprot_mask(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE	__pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)
 
 #endif	/* __ASSEMBLY__ */
 
@@ -449,9 +457,6 @@ static inline pteval_t pte_flags(pte_t pte)
 	return native_pte_val(pte) & PTE_FLAGS_MASK;
 }
 
-#define pgprot_val(x)	((x).pgprot)
-#define __pgprot(x)	((pgprot_t) { (x) } )
-
 extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM];
 extern uint8_t __pte2cachemode_tbl[8];
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4482f14dc48d..09705ccc393c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -7,6 +7,7 @@
 /* Forward declaration, a strange C thing */
 struct task_struct;
 struct mm_struct;
+struct io_bitmap;
 struct vm86;
 
 #include <asm/math_emu.h>
@@ -24,6 +25,7 @@ struct vm86;
 #include <asm/special_insns.h>
 #include <asm/fpu/types.h>
 #include <asm/unwind_hints.h>
+#include <asm/vmxfeatures.h>
 
 #include <linux/personality.h>
 #include <linux/cache.h>
@@ -84,6 +86,9 @@ struct cpuinfo_x86 {
 	/* Number of 4K pages in DTLB/ITLB combined(in pages): */
 	int			x86_tlbsize;
 #endif
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+	__u32			vmx_capability[NVMXINTS];
+#endif
 	__u8			x86_virt_bits;
 	__u8			x86_phys_bits;
 	/* CPUID returned core id bits: */
@@ -93,7 +98,15 @@ struct cpuinfo_x86 {
 	__u32			extended_cpuid_level;
 	/* Maximum supported CPUID level, -1=no CPUID: */
 	int			cpuid_level;
-	__u32			x86_capability[NCAPINTS + NBUGINTS];
+	/*
+	 * Align to size of unsigned long because the x86_capability array
+	 * is passed to bitops which require the alignment. Use unnamed
+	 * union to enforce the array is aligned to size of unsigned long.
+	 */
+	union {
+		__u32		x86_capability[NCAPINTS + NBUGINTS];
+		unsigned long	x86_capability_alignment;
+	};
 	char			x86_vendor_id[16];
 	char			x86_model_id[64];
 	/* in KB - valid for CPUS which support this call: */
@@ -157,7 +170,6 @@ enum cpuid_regs_idx {
 extern struct cpuinfo_x86	boot_cpu_data;
 extern struct cpuinfo_x86	new_cpu_data;
 
-extern struct x86_hw_tss	doublefault_tss;
 extern __u32			cpu_caps_cleared[NCAPINTS + NBUGINTS];
 extern __u32			cpu_caps_set[NCAPINTS + NBUGINTS];
 
@@ -328,10 +340,32 @@ struct x86_hw_tss {
  * IO-bitmap sizes:
  */
 #define IO_BITMAP_BITS			65536
-#define IO_BITMAP_BYTES			(IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS			(IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET		(offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
-#define INVALID_IO_BITMAP_OFFSET	0x8000
+#define IO_BITMAP_BYTES			(IO_BITMAP_BITS / BITS_PER_BYTE)
+#define IO_BITMAP_LONGS			(IO_BITMAP_BYTES / sizeof(long))
+
+#define IO_BITMAP_OFFSET_VALID_MAP				\
+	(offsetof(struct tss_struct, io_bitmap.bitmap) -	\
+	 offsetof(struct tss_struct, x86_tss))
+
+#define IO_BITMAP_OFFSET_VALID_ALL				\
+	(offsetof(struct tss_struct, io_bitmap.mapall) -	\
+	 offsetof(struct tss_struct, x86_tss))
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+/*
+ * sizeof(unsigned long) coming from an extra "long" at the end of the
+ * iobitmap. The limit is inclusive, i.e. the last valid byte.
+ */
+# define __KERNEL_TSS_LIMIT	\
+	(IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \
+	 sizeof(unsigned long) - 1)
+#else
+# define __KERNEL_TSS_LIMIT	\
+	(offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1)
+#endif
+
+/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
+#define IO_BITMAP_OFFSET_INVALID	(__KERNEL_TSS_LIMIT + 1)
 
 struct entry_stack {
 	unsigned long		words[64];
@@ -341,13 +375,21 @@ struct entry_stack_page {
 	struct entry_stack stack;
 } __aligned(PAGE_SIZE);
 
-struct tss_struct {
+/*
+ * All IO bitmap related data stored in the TSS:
+ */
+struct x86_io_bitmap {
+	/* The sequence number of the last active bitmap. */
+	u64			prev_sequence;
+
 	/*
-	 * The fixed hardware portion.  This must not cross a page boundary
-	 * at risk of violating the SDM's advice and potentially triggering
-	 * errata.
+	 * Store the dirty size of the last io bitmap offender. The next
+	 * one will have to do the cleanup as the switch out to a non io
+	 * bitmap user will just set x86_tss.io_bitmap_base to a value
+	 * outside of the TSS limit. So for sane tasks there is no need to
+	 * actually touch the io_bitmap at all.
 	 */
-	struct x86_hw_tss	x86_tss;
+	unsigned int		prev_max;
 
 	/*
 	 * The extra 1 is there because the CPU will access an
@@ -355,21 +397,28 @@ struct tss_struct {
 	 * bitmap. The extra byte must be all 1 bits, and must
 	 * be within the limit.
 	 */
-	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1];
+	unsigned long		bitmap[IO_BITMAP_LONGS + 1];
+
+	/*
+	 * Special I/O bitmap to emulate IOPL(3). All bytes zero,
+	 * except the additional byte at the end.
+	 */
+	unsigned long		mapall[IO_BITMAP_LONGS + 1];
+};
+
+struct tss_struct {
+	/*
+	 * The fixed hardware portion.  This must not cross a page boundary
+	 * at risk of violating the SDM's advice and potentially triggering
+	 * errata.
+	 */
+	struct x86_hw_tss	x86_tss;
+
+	struct x86_io_bitmap	io_bitmap;
 } __aligned(PAGE_SIZE);
 
 DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
 
-/*
- * sizeof(unsigned long) coming from an extra "long" at the end
- * of the iobitmap.
- *
- * -1? seg base+limit should be pointing to the address of the
- * last valid byte
- */
-#define __KERNEL_TSS_LIMIT	\
-	(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
-
 /* Per CPU interrupt stacks */
 struct irq_stack {
 	char		stack[IRQ_STACK_SIZE];
@@ -480,10 +529,14 @@ struct thread_struct {
 	struct vm86		*vm86;
 #endif
 	/* IO permissions: */
-	unsigned long		*io_bitmap_ptr;
-	unsigned long		iopl;
-	/* Max allowed port in the bitmap, in bytes: */
-	unsigned		io_bitmap_max;
+	struct io_bitmap	*io_bitmap;
+
+	/*
+	 * IOPL. Priviledge level dependent I/O permission which is
+	 * emulated via the I/O bitmap to prevent user space from disabling
+	 * interrupts.
+	 */
+	unsigned long		iopl_emul;
 
 	mm_segment_t		addr_limit;
 
@@ -515,25 +568,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
  */
 #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
 
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void native_set_iopl_mask(unsigned mask)
-{
-#ifdef CONFIG_X86_32
-	unsigned int reg;
-
-	asm volatile ("pushfl;"
-		      "popl %0;"
-		      "andl %1, %0;"
-		      "orl %2, %0;"
-		      "pushl %0;"
-		      "popfl"
-		      : "=&r" (reg)
-		      : "i" (~X86_EFLAGS_IOPL), "r" (mask));
-#endif
-}
-
 static inline void
 native_load_sp0(unsigned long sp0)
 {
@@ -573,7 +607,6 @@ static inline void load_sp0(unsigned long sp0)
 	native_load_sp0(sp0);
 }
 
-#define set_iopl_mask native_set_iopl_mask
 #endif /* CONFIG_PARAVIRT_XXL */
 
 /* Free all resources held by a thread. */
@@ -841,7 +874,6 @@ static inline void spin_lock_prefetch(const void *x)
 #define INIT_THREAD  {							  \
 	.sp0			= TOP_OF_INIT_STACK,			  \
 	.sysenter_cs		= __KERNEL_CS,				  \
-	.io_bitmap_ptr		= NULL,					  \
 	.addr_limit		= KERNEL_DS,				  \
 }
 
@@ -940,7 +972,7 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
 
 extern unsigned long arch_align_stack(unsigned long sp);
 void free_init_pages(const char *what, unsigned long begin, unsigned long end);
-extern void free_kernel_image_pages(void *begin, void *end);
+extern void free_kernel_image_pages(const char *what, void *begin, void *end);
 
 void default_idle(void);
 #ifdef	CONFIG_XEN
@@ -950,7 +982,6 @@ bool xen_set_default_idle(void);
 #endif
 
 void stop_this_cpu(void *dummy);
-void df_debug(struct pt_regs *regs, long error_code);
 void microcode_check(void);
 
 enum l1tf_mitigations {
@@ -970,11 +1001,4 @@ enum mds_mitigations {
 	MDS_MITIGATION_VMWERV,
 };
 
-enum taa_mitigations {
-	TAA_MITIGATION_OFF,
-	TAA_MITIGATION_UCODE_NEEDED,
-	TAA_MITIGATION_VERW,
-	TAA_MITIGATION_TSX_DISABLED,
-};
-
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 332eb3525867..6d6475fdd327 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
 #endif
 }
 
+/*
+ * Determine whether the register set came from any context that is running in
+ * 64-bit mode.
+ */
+static inline bool any_64bit_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_64
+	return !user_mode(regs) || user_64bit_mode(regs);
+#else
+	return false;
+#endif
+}
+
 #ifdef CONFIG_X86_64
 #define current_user_stack_pointer()	current_pt_regs()->sp
 #define compat_user_stack_pointer()	current_pt_regs()->sp
@@ -339,27 +352,17 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
 
 #define ARCH_HAS_USER_SINGLE_STEP_REPORT
 
-/*
- * When hitting ptrace_stop(), we cannot return using SYSRET because
- * that does not restore the full CPU state, only a minimal set.  The
- * ptracer can change arbitrary register values, which is usually okay
- * because the usual ptrace stops run off the signal delivery path which
- * forces IRET; however, ptrace_event() stops happen in arbitrary places
- * in the kernel and don't force IRET path.
- *
- * So force IRET path after a ptrace stop.
- */
-#define arch_ptrace_stop_needed(code, info)				\
-({									\
-	force_iret();							\
-	false;								\
-})
-
 struct user_desc;
 extern int do_get_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info);
 extern int do_set_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info, int can_allocate);
 
+#ifdef CONFIG_X86_64
+# define do_set_thread_area_64(p, s, t)	do_arch_prctl_64(p, s, t)
+#else
+# define do_set_thread_area_64(p, s, t)	(0)
+#endif
+
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
index 92c34e517da1..5528e9325049 100644
--- a/arch/x86/include/asm/purgatory.h
+++ b/arch/x86/include/asm/purgatory.h
@@ -6,16 +6,6 @@
 #include <linux/purgatory.h>
 
 extern void purgatory(void);
-/*
- * These forward declarations serve two purposes:
- *
- * 1) Make sparse happy when checking arch/purgatory
- * 2) Document that these are required to be global so the symbol
- *    lookup in kexec works
- */
-extern unsigned long purgatory_backup_dest;
-extern unsigned long purgatory_backup_src;
-extern unsigned long purgatory_backup_sz;
 #endif	/* __ASSEMBLY__ */
 
 #endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 09ecc32f6524..b35030eeec36 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -14,7 +14,7 @@
 #include <linux/types.h>
 #include <asm/io.h>
 
-/* This must match data at realmode.S */
+/* This must match data at realmode/rm/header.S */
 struct real_mode_header {
 	u32	text_start;
 	u32	ro_end;
@@ -36,7 +36,7 @@ struct real_mode_header {
 #endif
 };
 
-/* This must match data at trampoline_32/64.S */
+/* This must match data at realmode/rm/trampoline_{32,64}.S */
 struct trampoline_header {
 #ifdef CONFIG_X86_32
 	u32 start;
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
deleted file mode 100644
index 232f856e0db0..000000000000
--- a/arch/x86/include/asm/refcount.h
+++ /dev/null
@@ -1,126 +0,0 @@
-#ifndef __ASM_X86_REFCOUNT_H
-#define __ASM_X86_REFCOUNT_H
-/*
- * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
- * PaX/grsecurity.
- */
-#include <linux/refcount.h>
-#include <asm/bug.h>
-
-/*
- * This is the first portion of the refcount error handling, which lives in
- * .text.unlikely, and is jumped to from the CPU flag check (in the
- * following macros). This saves the refcount value location into CX for
- * the exception handler to use (in mm/extable.c), and then triggers the
- * central refcount exception. The fixup address for the exception points
- * back to the regular execution flow in .text.
- */
-#define _REFCOUNT_EXCEPTION				\
-	".pushsection .text..refcount\n"		\
-	"111:\tlea %[var], %%" _ASM_CX "\n"		\
-	"112:\t" ASM_UD2 "\n"				\
-	ASM_UNREACHABLE					\
-	".popsection\n"					\
-	"113:\n"					\
-	_ASM_EXTABLE_REFCOUNT(112b, 113b)
-
-/* Trigger refcount exception if refcount result is negative. */
-#define REFCOUNT_CHECK_LT_ZERO				\
-	"js 111f\n\t"					\
-	_REFCOUNT_EXCEPTION
-
-/* Trigger refcount exception if refcount result is zero or negative. */
-#define REFCOUNT_CHECK_LE_ZERO				\
-	"jz 111f\n\t"					\
-	REFCOUNT_CHECK_LT_ZERO
-
-/* Trigger refcount exception unconditionally. */
-#define REFCOUNT_ERROR					\
-	"jmp 111f\n\t"					\
-	_REFCOUNT_EXCEPTION
-
-static __always_inline void refcount_add(unsigned int i, refcount_t *r)
-{
-	asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
-		REFCOUNT_CHECK_LT_ZERO
-		: [var] "+m" (r->refs.counter)
-		: "ir" (i)
-		: "cc", "cx");
-}
-
-static __always_inline void refcount_inc(refcount_t *r)
-{
-	asm volatile(LOCK_PREFIX "incl %0\n\t"
-		REFCOUNT_CHECK_LT_ZERO
-		: [var] "+m" (r->refs.counter)
-		: : "cc", "cx");
-}
-
-static __always_inline void refcount_dec(refcount_t *r)
-{
-	asm volatile(LOCK_PREFIX "decl %0\n\t"
-		REFCOUNT_CHECK_LE_ZERO
-		: [var] "+m" (r->refs.counter)
-		: : "cc", "cx");
-}
-
-static __always_inline __must_check
-bool refcount_sub_and_test(unsigned int i, refcount_t *r)
-{
-	bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
-					 REFCOUNT_CHECK_LT_ZERO,
-					 r->refs.counter, e, "er", i, "cx");
-
-	if (ret) {
-		smp_acquire__after_ctrl_dep();
-		return true;
-	}
-
-	return false;
-}
-
-static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
-{
-	bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
-					 REFCOUNT_CHECK_LT_ZERO,
-					 r->refs.counter, e, "cx");
-
-	if (ret) {
-		smp_acquire__after_ctrl_dep();
-		return true;
-	}
-
-	return false;
-}
-
-static __always_inline __must_check
-bool refcount_add_not_zero(unsigned int i, refcount_t *r)
-{
-	int c, result;
-
-	c = atomic_read(&(r->refs));
-	do {
-		if (unlikely(c == 0))
-			return false;
-
-		result = c + i;
-
-		/* Did we try to increment from/to an undesirable state? */
-		if (unlikely(c < 0 || c == INT_MAX || result < c)) {
-			asm volatile(REFCOUNT_ERROR
-				     : : [var] "m" (r->refs.counter)
-				     : "cc", "cx");
-			break;
-		}
-
-	} while (!atomic_try_cmpxchg(&(r->refs), &c, result));
-
-	return c != 0;
-}
-
-static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
-{
-	return refcount_add_not_zero(1, r);
-}
-
-#endif
diff --git a/arch/x86/include/asm/rio.h b/arch/x86/include/asm/rio.h
deleted file mode 100644
index 0a21986d2238..000000000000
--- a/arch/x86/include/asm/rio.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Derived from include/asm-x86/mach-summit/mach_mpparse.h
- *          and include/asm-x86/mach-default/bios_ebda.h
- *
- * Author: Laurent Vivier <Laurent.Vivier@bull.net>
- */
-
-#ifndef _ASM_X86_RIO_H
-#define _ASM_X86_RIO_H
-
-#define RIO_TABLE_VERSION	3
-
-struct rio_table_hdr {
-	u8 version;		/* Version number of this data structure  */
-	u8 num_scal_dev;	/* # of Scalability devices               */
-	u8 num_rio_dev;		/* # of RIO I/O devices                   */
-} __attribute__((packed));
-
-struct scal_detail {
-	u8 node_id;		/* Scalability Node ID                    */
-	u32 CBAR;		/* Address of 1MB register space          */
-	u8 port0node;		/* Node ID port connected to: 0xFF=None   */
-	u8 port0port;		/* Port num port connected to: 0,1,2, or  */
-				/* 0xFF=None                              */
-	u8 port1node;		/* Node ID port connected to: 0xFF = None */
-	u8 port1port;		/* Port num port connected to: 0,1,2, or  */
-				/* 0xFF=None                              */
-	u8 port2node;		/* Node ID port connected to: 0xFF = None */
-	u8 port2port;		/* Port num port connected to: 0,1,2, or  */
-				/* 0xFF=None                              */
-	u8 chassis_num;		/* 1 based Chassis number (1 = boot node) */
-} __attribute__((packed));
-
-struct rio_detail {
-	u8 node_id;		/* RIO Node ID                            */
-	u32 BBAR;		/* Address of 1MB register space          */
-	u8 type;		/* Type of device                         */
-	u8 owner_id;		/* Node ID of Hurricane that owns this    */
-				/* node                                   */
-	u8 port0node;		/* Node ID port connected to: 0xFF=None   */
-	u8 port0port;		/* Port num port connected to: 0,1,2, or  */
-				/* 0xFF=None                              */
-	u8 port1node;		/* Node ID port connected to: 0xFF=None   */
-	u8 port1port;		/* Port num port connected to: 0,1,2, or  */
-				/* 0xFF=None                              */
-	u8 first_slot;		/* Lowest slot number below this Calgary  */
-	u8 status;		/* Bit 0 = 1 : the XAPIC is used          */
-				/*       = 0 : the XAPIC is not used, ie: */
-				/*            ints fwded to another XAPIC */
-				/*           Bits1:7 Reserved             */
-	u8 WP_index;		/* instance index - lower ones have       */
-				/*     lower slot numbers/PCI bus numbers */
-	u8 chassis_num;		/* 1 based Chassis number                 */
-} __attribute__((packed));
-
-enum {
-	HURR_SCALABILTY	= 0,	/* Hurricane Scalability info */
-	HURR_RIOIB	= 2,	/* Hurricane RIOIB info       */
-	COMPAT_CALGARY	= 4,	/* Compatibility Calgary      */
-	ALT_CALGARY	= 5,	/* Second Planar Calgary      */
-};
-
-#endif /* _ASM_X86_RIO_H */
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 71b32f2570ab..036c360910c5 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -6,7 +6,6 @@
 #include <asm/extable.h>
 
 extern char __brk_base[], __brk_limit[];
-extern struct exception_table_entry __stop___ex_table[];
 extern char __end_rodata_aligned[];
 
 #if defined(CONFIG_X86_64)
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index ac3892920419..6669164abadc 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -31,6 +31,18 @@
  */
 #define SEGMENT_RPL_MASK	0x3
 
+/*
+ * When running on Xen PV, the actual privilege level of the kernel is 1,
+ * not 0. Testing the Requested Privilege Level in a segment selector to
+ * determine whether the context is user mode or kernel mode with
+ * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level
+ * matches the 0x3 mask.
+ *
+ * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV
+ * kernels because privilege level 2 is never used.
+ */
+#define USER_SEGMENT_RPL_MASK	0x2
+
 /* User mode is privilege level 3: */
 #define USER_RPL		0x3
 
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 2ee8e469dcf5..64c3dce374e5 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -81,8 +81,6 @@ int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
 
 extern int kernel_set_to_readonly;
-void set_kernel_text_rw(void);
-void set_kernel_text_ro(void);
 
 #ifdef CONFIG_X86_64
 static inline int set_mce_nospec(unsigned long pfn)
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 18a4b6890fa8..0e059b73437b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -103,7 +103,17 @@ static inline void update_task_stack(struct task_struct *task)
 	if (static_cpu_has(X86_FEATURE_XENPV))
 		load_sp0(task_top_of_stack(task));
 #endif
+}
 
+static inline void kthread_frame_init(struct inactive_task_frame *frame,
+				      unsigned long fun, unsigned long arg)
+{
+	frame->bx = fun;
+#ifdef CONFIG_X86_32
+	frame->di = arg;
+#else
+	frame->r12 = arg;
+#endif
 }
 
 #endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index e046a405743d..e2389ce9bf58 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -6,6 +6,8 @@
 #ifndef _ASM_X86_SYSCALL_WRAPPER_H
 #define _ASM_X86_SYSCALL_WRAPPER_H
 
+struct pt_regs;
+
 /* Mapping of registers to parameters for syscalls on x86-64 and x32 */
 #define SC_X86_64_REGS_TO_ARGS(x, ...)					\
 	__MAP(x,__SC_ARGS						\
@@ -28,13 +30,21 @@
  * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this
  * case as well.
  */
+#define __IA32_COMPAT_SYS_STUB0(x, name)				\
+	asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs);\
+	ALLOW_ERROR_INJECTION(__ia32_compat_sys_##name, ERRNO);		\
+	asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs)\
+	{								\
+		return __se_compat_sys_##name();			\
+	}
+
 #define __IA32_COMPAT_SYS_STUBx(x, name, ...)				\
 	asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\
 	ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO);		\
 	asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\
 	{								\
 		return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
-	}								\
+	}
 
 #define __IA32_SYS_STUBx(x, name, ...)					\
 	asmlinkage long __ia32_sys##name(const struct pt_regs *regs);	\
@@ -48,16 +58,23 @@
  * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
  * named __ia32_sys_*()
  */
-#define SYSCALL_DEFINE0(sname)					\
-	SYSCALL_METADATA(_##sname, 0);				\
-	asmlinkage long __x64_sys_##sname(void);		\
-	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);	\
-	SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname);	\
-	asmlinkage long __x64_sys_##sname(void)
 
-#define COND_SYSCALL(name)						\
-	cond_syscall(__x64_sys_##name);					\
-	cond_syscall(__ia32_sys_##name)
+#define SYSCALL_DEFINE0(sname)						\
+	SYSCALL_METADATA(_##sname, 0);					\
+	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\
+	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);		\
+	SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname);		\
+	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL(name)							\
+	asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused)	\
+	{									\
+		return sys_ni_syscall();					\
+	}									\
+	asmlinkage __weak long __ia32_sys_##name(const struct pt_regs *__unused)\
+	{									\
+		return sys_ni_syscall();					\
+	}
 
 #define SYS_NI(name)							\
 	SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers);		\
@@ -75,15 +92,24 @@
  * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common
  * with x86_64 obviously do not need such care.
  */
+#define __X32_COMPAT_SYS_STUB0(x, name, ...)				\
+	asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs);\
+	ALLOW_ERROR_INJECTION(__x32_compat_sys_##name, ERRNO);		\
+	asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs)\
+	{								\
+		return __se_compat_sys_##name();\
+	}
+
 #define __X32_COMPAT_SYS_STUBx(x, name, ...)				\
 	asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\
 	ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO);		\
 	asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\
 	{								\
 		return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
-	}								\
+	}
 
 #else /* CONFIG_X86_X32 */
+#define __X32_COMPAT_SYS_STUB0(x, name)
 #define __X32_COMPAT_SYS_STUBx(x, name, ...)
 #endif /* CONFIG_X86_X32 */
 
@@ -94,6 +120,17 @@
  * mapping of registers to parameters, we need to generate stubs for each
  * of them.
  */
+#define COMPAT_SYSCALL_DEFINE0(name)					\
+	static long __se_compat_sys_##name(void);			\
+	static inline long __do_compat_sys_##name(void);		\
+	__IA32_COMPAT_SYS_STUB0(x, name)				\
+	__X32_COMPAT_SYS_STUB0(x, name)					\
+	static long __se_compat_sys_##name(void)			\
+	{								\
+		return __do_compat_sys_##name();			\
+	}								\
+	static inline long __do_compat_sys_##name(void)
+
 #define COMPAT_SYSCALL_DEFINEx(x, name, ...)					\
 	static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
@@ -181,15 +218,19 @@
  * macros to work correctly.
  */
 #ifndef SYSCALL_DEFINE0
-#define SYSCALL_DEFINE0(sname)					\
-	SYSCALL_METADATA(_##sname, 0);				\
-	asmlinkage long __x64_sys_##sname(void);		\
-	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);	\
-	asmlinkage long __x64_sys_##sname(void)
+#define SYSCALL_DEFINE0(sname)						\
+	SYSCALL_METADATA(_##sname, 0);					\
+	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\
+	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);		\
+	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused)
 #endif
 
 #ifndef COND_SYSCALL
-#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name)
+#define COND_SYSCALL(name) 							\
+	asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused)	\
+	{									\
+		return sys_ni_syscall();					\
+	}
 #endif
 
 #ifndef SYS_NI
@@ -201,7 +242,6 @@
  * For VSYSCALLS, we need to declare these three syscalls with the new
  * pt_regs-based calling convention for in-kernel use.
  */
-struct pt_regs;
 asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs);
 asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs);
 asmlinkage long __x64_sys_time(const struct pt_regs *regs);
diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h
deleted file mode 100644
index 6ed2deacf1d0..000000000000
--- a/arch/x86/include/asm/tce.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file is derived from asm-powerpc/tce.h.
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- * Author: Jon Mason <jdmason@us.ibm.com>
- */
-
-#ifndef _ASM_X86_TCE_H
-#define _ASM_X86_TCE_H
-
-extern unsigned int specified_table_size;
-struct iommu_table;
-
-#define TCE_ENTRY_SIZE   8   /* in bytes */
-
-#define TCE_READ_SHIFT   0
-#define TCE_WRITE_SHIFT  1
-#define TCE_HUBID_SHIFT  2   /* unused */
-#define TCE_RSVD_SHIFT   8   /* unused */
-#define TCE_RPN_SHIFT    12
-#define TCE_UNUSED_SHIFT 48  /* unused */
-
-#define TCE_RPN_MASK     0x0000fffffffff000ULL
-
-extern void tce_build(struct iommu_table *tbl, unsigned long index,
-		      unsigned int npages, unsigned long uaddr, int direction);
-extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages);
-extern void * __init alloc_tce_table(void);
-extern void __init free_tce_table(void *tbl);
-extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar);
-
-#endif /* _ASM_X86_TCE_H */
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 5e8319bb207a..67315fa3956a 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -25,13 +25,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
  */
 #define POKE_MAX_OPCODE_SIZE	5
 
-struct text_poke_loc {
-	void *detour;
-	void *addr;
-	size_t len;
-	const char opcode[POKE_MAX_OPCODE_SIZE];
-};
-
 extern void text_poke_early(void *addr, const void *opcode, size_t len);
 
 /*
@@ -49,10 +42,77 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
  * an inconsistent instruction while you patch.
  */
 extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern void text_poke_sync(void);
 extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
 extern int poke_int3_handler(struct pt_regs *regs);
-extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
-extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
+extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
+
+extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate);
+extern void text_poke_finish(void);
+
+#define INT3_INSN_SIZE		1
+#define INT3_INSN_OPCODE	0xCC
+
+#define CALL_INSN_SIZE		5
+#define CALL_INSN_OPCODE	0xE8
+
+#define JMP32_INSN_SIZE		5
+#define JMP32_INSN_OPCODE	0xE9
+
+#define JMP8_INSN_SIZE		2
+#define JMP8_INSN_OPCODE	0xEB
+
+#define DISP32_SIZE		4
+
+static inline int text_opcode_size(u8 opcode)
+{
+	int size = 0;
+
+#define __CASE(insn)	\
+	case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break
+
+	switch(opcode) {
+	__CASE(INT3);
+	__CASE(CALL);
+	__CASE(JMP32);
+	__CASE(JMP8);
+	}
+
+#undef __CASE
+
+	return size;
+}
+
+union text_poke_insn {
+	u8 text[POKE_MAX_OPCODE_SIZE];
+	struct {
+		u8 opcode;
+		s32 disp;
+	} __attribute__((packed));
+};
+
+static __always_inline
+void *text_gen_insn(u8 opcode, const void *addr, const void *dest)
+{
+	static union text_poke_insn insn; /* per instance */
+	int size = text_opcode_size(opcode);
+
+	insn.opcode = opcode;
+
+	if (size > 1) {
+		insn.disp = (long)dest - (long)(addr + size);
+		if (size == 2) {
+			/*
+			 * Ensure that for JMP9 the displacement
+			 * actually fits the signed byte.
+			 */
+			BUG_ON((insn.disp >> 31) != (insn.disp >> 7));
+		}
+	}
+
+	return &insn.text;
+}
+
 extern int after_bootmem;
 extern __ro_after_init struct mm_struct *poking_mm;
 extern __ro_after_init unsigned long poking_addr;
@@ -63,9 +123,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
 	regs->ip = ip;
 }
 
-#define INT3_INSN_SIZE 1
-#define CALL_INSN_SIZE 5
-
 static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
 {
 	/*
@@ -73,6 +130,9 @@ static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
 	 * stack where the break point happened, and the saving of
 	 * pt_regs. We can extend the original stack because of
 	 * this gap. See the idtentry macro's create_gap option.
+	 *
+	 * Similarly entry_32.S will have a gap on the stack for (any) hardware
+	 * exception and pt_regs; see FIXUP_FRAME.
 	 */
 	regs->sp -= sizeof(unsigned long);
 	*(unsigned long *)regs->sp = val;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index f9453536f9bb..cf4327986e98 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -143,8 +143,8 @@ struct thread_info {
 	 _TIF_NOHZ)
 
 /* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW_BASE						\
-	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|		\
+#define _TIF_WORK_CTXSW_BASE					\
+	(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP |		\
 	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
 
 /*
@@ -156,8 +156,14 @@ struct thread_info {
 # define _TIF_WORK_CTXSW	(_TIF_WORK_CTXSW_BASE)
 #endif
 
-#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
+#ifdef CONFIG_X86_IOPL_IOPERM
+# define _TIF_WORK_CTXSW_PREV	(_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \
+				 _TIF_IO_BITMAP)
+#else
+# define _TIF_WORK_CTXSW_PREV	(_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY)
+#endif
+
+#define _TIF_WORK_CTXSW_NEXT	(_TIF_WORK_CTXSW)
 
 #define STACK_WARN		(THREAD_SIZE/8)
 
@@ -233,15 +239,6 @@ static inline int arch_within_stack_frames(const void * const stack,
 			   current_thread_info()->status & TS_COMPAT)
 #endif
 
-/*
- * Force syscall return via IRET by making it look as if there was
- * some work pending. IRET is our most capable (but slowest) syscall
- * return path, which is able to restore modified SS, CS and certain
- * EFLAGS values that other (fast) syscall return instructions
- * are not able to restore properly.
- */
-#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
-
 extern void arch_task_cache_init(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 extern void arch_release_task_struct(struct task_struct *tsk);
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index ace464f09681..4d705cb4d63b 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask,
 		      __entry->ncpus, __entry->vector)
 	);
 
+TRACE_EVENT(hyperv_send_ipi_one,
+	    TP_PROTO(int cpu,
+		     int vector),
+	    TP_ARGS(cpu, vector),
+	    TP_STRUCT__entry(
+		    __field(int, cpu)
+		    __field(int, vector)
+		    ),
+	    TP_fast_assign(__entry->cpu = cpu;
+			   __entry->vector = vector;
+		    ),
+	    TP_printk("cpu %d vector %x",
+		      __entry->cpu, __entry->vector)
+	);
+
 #endif /* CONFIG_HYPERV */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index b25e633033c3..ffa0dc8a535e 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -69,6 +69,9 @@ dotraplinkage void do_overflow(struct pt_regs *regs, long error_code);
 dotraplinkage void do_bounds(struct pt_regs *regs, long error_code);
 dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code);
 dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code);
+#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2);
+#endif
 dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
 dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
 dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h
index db43f2a0d92c..aeed98c3c9e1 100644
--- a/arch/x86/include/asm/umip.h
+++ b/arch/x86/include/asm/umip.h
@@ -4,9 +4,9 @@
 #include <linux/types.h>
 #include <asm/ptrace.h>
 
-#ifdef CONFIG_X86_INTEL_UMIP
+#ifdef CONFIG_X86_UMIP
 bool fixup_umip_exception(struct pt_regs *regs);
 #else
 static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; }
-#endif  /* CONFIG_X86_INTEL_UMIP */
+#endif  /* CONFIG_X86_UMIP */
 #endif  /* _ASM_X86_UMIP_H */
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 0bcdb1279361..f5e2eb12cb71 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -86,6 +86,14 @@
 	UNWIND_HINT sp_offset=\sp_offset
 .endm
 
+.macro UNWIND_HINT_SAVE
+	UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
+.endm
+
+.macro UNWIND_HINT_RESTORE
+	UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
+.endm
+
 #else /* !__ASSEMBLY__ */
 
 #define UNWIND_HINT(sp_reg, sp_offset, type, end)		\
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 6e7caf65fa40..389174eaec79 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -138,7 +138,7 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
 extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
 extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
 
-extern void uv_bios_init(void);
+extern int uv_bios_init(void);
 
 extern unsigned long sn_rtc_cycles_per_second;
 extern int uv_type;
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 6bc6d89d8e2a..45ea95ce79b4 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -12,6 +12,16 @@ struct mm_struct;
 #ifdef CONFIG_X86_UV
 #include <linux/efi.h>
 
+#define	UV_PROC_NODE	"sgi_uv"
+
+static inline int uv(int uvtype)
+{
+	/* uv(0) is "any" */
+	if (uvtype >= 0 && uvtype <= 30)
+		return 1 << uvtype;
+	return 1;
+}
+
 extern unsigned long uv_systab_phys;
 
 extern enum uv_system_type get_uv_system_type(void);
@@ -20,7 +30,8 @@ static inline bool is_early_uv_system(void)
 	return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR;
 }
 extern int is_uv_system(void);
-extern int is_uv_hubless(void);
+extern int is_uv_hubbed(int uvtype);
+extern int is_uv_hubless(int uvtype);
 extern void uv_cpu_init(void);
 extern void uv_nmi_init(void);
 extern void uv_system_init(void);
@@ -32,7 +43,8 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
 static inline bool is_early_uv_system(void)	{ return 0; }
 static inline int is_uv_system(void)	{ return 0; }
-static inline int is_uv_hubless(void)	{ return 0; }
+static inline int is_uv_hubbed(int uv)	{ return 0; }
+static inline int is_uv_hubless(int uv) { return 0; }
 static inline void uv_cpu_init(void)	{ }
 static inline void uv_system_init(void)	{ }
 static inline const struct cpumask *
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 44cf6d6deb7a..950cd1395d5d 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -19,6 +19,7 @@
 #include <linux/topology.h>
 #include <asm/types.h>
 #include <asm/percpu.h>
+#include <asm/uv/uv.h>
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/bios.h>
 #include <asm/irq_vectors.h>
@@ -243,83 +244,61 @@ static inline int uv_hub_info_check(int version)
 #define UV4_HUB_REVISION_BASE		7
 #define UV4A_HUB_REVISION_BASE		8	/* UV4 (fixed) rev 2 */
 
-#ifdef	UV1_HUB_IS_SUPPORTED
+/* WARNING: UVx_HUB_IS_SUPPORTED defines are deprecated and will be removed */
 static inline int is_uv1_hub(void)
 {
-	return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE;
-}
+#ifdef	UV1_HUB_IS_SUPPORTED
+	return is_uv_hubbed(uv(1));
 #else
-static inline int is_uv1_hub(void)
-{
 	return 0;
-}
 #endif
+}
 
-#ifdef	UV2_HUB_IS_SUPPORTED
 static inline int is_uv2_hub(void)
 {
-	return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) &&
-		(uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE));
-}
+#ifdef	UV2_HUB_IS_SUPPORTED
+	return is_uv_hubbed(uv(2));
 #else
-static inline int is_uv2_hub(void)
-{
 	return 0;
-}
 #endif
+}
 
-#ifdef	UV3_HUB_IS_SUPPORTED
 static inline int is_uv3_hub(void)
 {
-	return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) &&
-		(uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE));
-}
+#ifdef	UV3_HUB_IS_SUPPORTED
+	return is_uv_hubbed(uv(3));
 #else
-static inline int is_uv3_hub(void)
-{
 	return 0;
-}
 #endif
+}
 
 /* First test "is UV4A", then "is UV4" */
-#ifdef	UV4A_HUB_IS_SUPPORTED
-static inline int is_uv4a_hub(void)
-{
-	return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE);
-}
-#else
 static inline int is_uv4a_hub(void)
 {
+#ifdef	UV4A_HUB_IS_SUPPORTED
+	if (is_uv_hubbed(uv(4)))
+		return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE);
+#endif
 	return 0;
 }
-#endif
 
-#ifdef	UV4_HUB_IS_SUPPORTED
 static inline int is_uv4_hub(void)
 {
-	return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE;
-}
+#ifdef	UV4_HUB_IS_SUPPORTED
+	return is_uv_hubbed(uv(4));
 #else
-static inline int is_uv4_hub(void)
-{
 	return 0;
-}
 #endif
+}
 
 static inline int is_uvx_hub(void)
 {
-	if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE)
-		return uv_hub_info->hub_revision;
-
-	return 0;
+	return (is_uv_hubbed(-2) >= uv(2));
 }
 
 static inline int is_uv_hub(void)
 {
-#ifdef	UV1_HUB_IS_SUPPORTED
-	return uv_hub_info->hub_revision;
-#endif
-	return is_uvx_hub();
+	return is_uv1_hub() || is_uvx_hub();
 }
 
 union uvh_apicid {
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 230474e2ddb5..bbcdc7b8f963 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -21,6 +21,7 @@ struct vdso_image {
 	long sym_vvar_page;
 	long sym_pvclock_page;
 	long sym_hvclock_page;
+	long sym_timens_page;
 	long sym_VDSO32_NOTE_MASK;
 	long sym___kernel_sigreturn;
 	long sym___kernel_rt_sigreturn;
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index e9ee139cf29e..6ee1f7dba34b 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -21,6 +21,7 @@
 #include <clocksource/hyperv_timer.h>
 
 #define __vdso_data (VVAR(_vdso_data))
+#define __timens_vdso_data (TIMENS(_vdso_data))
 
 #define VDSO_HAS_TIME 1
 
@@ -56,6 +57,13 @@ extern struct ms_hyperv_tsc_page hvclock_page
 	__attribute__((visibility("hidden")));
 #endif
 
+#ifdef CONFIG_TIME_NS
+static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+{
+	return __timens_vdso_data;
+}
+#endif
+
 #ifndef BUILD_VDSO32
 
 static __always_inline
@@ -96,8 +104,6 @@ long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
 
 #else
 
-#define VDSO_HAS_32BIT_FALLBACK	1
-
 static __always_inline
 long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
 {
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
new file mode 100644
index 000000000000..29837740b520
--- /dev/null
+++ b/arch/x86/include/asm/vmalloc.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_X86_VMALLOC_H
+#define _ASM_X86_VMALLOC_H
+
+#include <asm/pgtable_areas.h>
+
+#endif /* _ASM_X86_VMALLOC_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 1835767aa335..9fbba31be825 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -15,67 +15,70 @@
 #include <linux/bitops.h>
 #include <linux/types.h>
 #include <uapi/asm/vmx.h>
+#include <asm/vmxfeatures.h>
+
+#define VMCS_CONTROL_BIT(x)	BIT(VMX_FEATURE_##x & 0x1f)
 
 /*
  * Definitions of Primary Processor-Based VM-Execution Controls.
  */
-#define CPU_BASED_VIRTUAL_INTR_PENDING          0x00000004
-#define CPU_BASED_USE_TSC_OFFSETING             0x00000008
-#define CPU_BASED_HLT_EXITING                   0x00000080
-#define CPU_BASED_INVLPG_EXITING                0x00000200
-#define CPU_BASED_MWAIT_EXITING                 0x00000400
-#define CPU_BASED_RDPMC_EXITING                 0x00000800
-#define CPU_BASED_RDTSC_EXITING                 0x00001000
-#define CPU_BASED_CR3_LOAD_EXITING		0x00008000
-#define CPU_BASED_CR3_STORE_EXITING		0x00010000
-#define CPU_BASED_CR8_LOAD_EXITING              0x00080000
-#define CPU_BASED_CR8_STORE_EXITING             0x00100000
-#define CPU_BASED_TPR_SHADOW                    0x00200000
-#define CPU_BASED_VIRTUAL_NMI_PENDING		0x00400000
-#define CPU_BASED_MOV_DR_EXITING                0x00800000
-#define CPU_BASED_UNCOND_IO_EXITING             0x01000000
-#define CPU_BASED_USE_IO_BITMAPS                0x02000000
-#define CPU_BASED_MONITOR_TRAP_FLAG             0x08000000
-#define CPU_BASED_USE_MSR_BITMAPS               0x10000000
-#define CPU_BASED_MONITOR_EXITING               0x20000000
-#define CPU_BASED_PAUSE_EXITING                 0x40000000
-#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   0x80000000
+#define CPU_BASED_VIRTUAL_INTR_PENDING          VMCS_CONTROL_BIT(VIRTUAL_INTR_PENDING)
+#define CPU_BASED_USE_TSC_OFFSETING             VMCS_CONTROL_BIT(TSC_OFFSETTING)
+#define CPU_BASED_HLT_EXITING                   VMCS_CONTROL_BIT(HLT_EXITING)
+#define CPU_BASED_INVLPG_EXITING                VMCS_CONTROL_BIT(INVLPG_EXITING)
+#define CPU_BASED_MWAIT_EXITING                 VMCS_CONTROL_BIT(MWAIT_EXITING)
+#define CPU_BASED_RDPMC_EXITING                 VMCS_CONTROL_BIT(RDPMC_EXITING)
+#define CPU_BASED_RDTSC_EXITING                 VMCS_CONTROL_BIT(RDTSC_EXITING)
+#define CPU_BASED_CR3_LOAD_EXITING		VMCS_CONTROL_BIT(CR3_LOAD_EXITING)
+#define CPU_BASED_CR3_STORE_EXITING		VMCS_CONTROL_BIT(CR3_STORE_EXITING)
+#define CPU_BASED_CR8_LOAD_EXITING              VMCS_CONTROL_BIT(CR8_LOAD_EXITING)
+#define CPU_BASED_CR8_STORE_EXITING             VMCS_CONTROL_BIT(CR8_STORE_EXITING)
+#define CPU_BASED_TPR_SHADOW                    VMCS_CONTROL_BIT(VIRTUAL_TPR)
+#define CPU_BASED_VIRTUAL_NMI_PENDING		VMCS_CONTROL_BIT(VIRTUAL_NMI_PENDING)
+#define CPU_BASED_MOV_DR_EXITING                VMCS_CONTROL_BIT(MOV_DR_EXITING)
+#define CPU_BASED_UNCOND_IO_EXITING             VMCS_CONTROL_BIT(UNCOND_IO_EXITING)
+#define CPU_BASED_USE_IO_BITMAPS                VMCS_CONTROL_BIT(USE_IO_BITMAPS)
+#define CPU_BASED_MONITOR_TRAP_FLAG             VMCS_CONTROL_BIT(MONITOR_TRAP_FLAG)
+#define CPU_BASED_USE_MSR_BITMAPS               VMCS_CONTROL_BIT(USE_MSR_BITMAPS)
+#define CPU_BASED_MONITOR_EXITING               VMCS_CONTROL_BIT(MONITOR_EXITING)
+#define CPU_BASED_PAUSE_EXITING                 VMCS_CONTROL_BIT(PAUSE_EXITING)
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   VMCS_CONTROL_BIT(SEC_CONTROLS)
 
 #define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR	0x0401e172
 
 /*
  * Definitions of Secondary Processor-Based VM-Execution Controls.
  */
-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
-#define SECONDARY_EXEC_ENABLE_EPT               0x00000002
-#define SECONDARY_EXEC_DESC			0x00000004
-#define SECONDARY_EXEC_RDTSCP			0x00000008
-#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   0x00000010
-#define SECONDARY_EXEC_ENABLE_VPID              0x00000020
-#define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
-#define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
-#define SECONDARY_EXEC_APIC_REGISTER_VIRT       0x00000100
-#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY    0x00000200
-#define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
-#define SECONDARY_EXEC_RDRAND_EXITING		0x00000800
-#define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
-#define SECONDARY_EXEC_ENABLE_VMFUNC            0x00002000
-#define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
-#define SECONDARY_EXEC_ENCLS_EXITING		0x00008000
-#define SECONDARY_EXEC_RDSEED_EXITING		0x00010000
-#define SECONDARY_EXEC_ENABLE_PML               0x00020000
-#define SECONDARY_EXEC_PT_CONCEAL_VMX		0x00080000
-#define SECONDARY_EXEC_XSAVES			0x00100000
-#define SECONDARY_EXEC_PT_USE_GPA		0x01000000
-#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC	0x00400000
-#define SECONDARY_EXEC_TSC_SCALING              0x02000000
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES)
+#define SECONDARY_EXEC_ENABLE_EPT               VMCS_CONTROL_BIT(EPT)
+#define SECONDARY_EXEC_DESC			VMCS_CONTROL_BIT(DESC_EXITING)
+#define SECONDARY_EXEC_RDTSCP			VMCS_CONTROL_BIT(RDTSCP)
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   VMCS_CONTROL_BIT(VIRTUAL_X2APIC)
+#define SECONDARY_EXEC_ENABLE_VPID              VMCS_CONTROL_BIT(VPID)
+#define SECONDARY_EXEC_WBINVD_EXITING		VMCS_CONTROL_BIT(WBINVD_EXITING)
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST	VMCS_CONTROL_BIT(UNRESTRICTED_GUEST)
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT       VMCS_CONTROL_BIT(APIC_REGISTER_VIRT)
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY    VMCS_CONTROL_BIT(VIRT_INTR_DELIVERY)
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING	VMCS_CONTROL_BIT(PAUSE_LOOP_EXITING)
+#define SECONDARY_EXEC_RDRAND_EXITING		VMCS_CONTROL_BIT(RDRAND_EXITING)
+#define SECONDARY_EXEC_ENABLE_INVPCID		VMCS_CONTROL_BIT(INVPCID)
+#define SECONDARY_EXEC_ENABLE_VMFUNC            VMCS_CONTROL_BIT(VMFUNC)
+#define SECONDARY_EXEC_SHADOW_VMCS              VMCS_CONTROL_BIT(SHADOW_VMCS)
+#define SECONDARY_EXEC_ENCLS_EXITING		VMCS_CONTROL_BIT(ENCLS_EXITING)
+#define SECONDARY_EXEC_RDSEED_EXITING		VMCS_CONTROL_BIT(RDSEED_EXITING)
+#define SECONDARY_EXEC_ENABLE_PML               VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
+#define SECONDARY_EXEC_PT_CONCEAL_VMX		VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
+#define SECONDARY_EXEC_XSAVES			VMCS_CONTROL_BIT(XSAVES)
+#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC	VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
+#define SECONDARY_EXEC_PT_USE_GPA		VMCS_CONTROL_BIT(PT_USE_GPA)
+#define SECONDARY_EXEC_TSC_SCALING              VMCS_CONTROL_BIT(TSC_SCALING)
 #define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE	0x04000000
 
-#define PIN_BASED_EXT_INTR_MASK                 0x00000001
-#define PIN_BASED_NMI_EXITING                   0x00000008
-#define PIN_BASED_VIRTUAL_NMIS                  0x00000020
-#define PIN_BASED_VMX_PREEMPTION_TIMER          0x00000040
-#define PIN_BASED_POSTED_INTR                   0x00000080
+#define PIN_BASED_EXT_INTR_MASK                 VMCS_CONTROL_BIT(INTR_EXITING)
+#define PIN_BASED_NMI_EXITING                   VMCS_CONTROL_BIT(NMI_EXITING)
+#define PIN_BASED_VIRTUAL_NMIS                  VMCS_CONTROL_BIT(VIRTUAL_NMIS)
+#define PIN_BASED_VMX_PREEMPTION_TIMER          VMCS_CONTROL_BIT(PREEMPTION_TIMER)
+#define PIN_BASED_POSTED_INTR                   VMCS_CONTROL_BIT(POSTED_INTR)
 
 #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR	0x00000016
 
@@ -114,7 +117,9 @@
 #define VMX_MISC_MSR_LIST_MULTIPLIER		512
 
 /* VMFUNC functions */
-#define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
+#define VMFUNC_CONTROL_BIT(x)	BIT((VMX_FEATURE_##x & 0x1f) - 28)
+
+#define VMX_VMFUNC_EPTP_SWITCHING               VMFUNC_CONTROL_BIT(EPTP_SWITCHING)
 #define VMFUNC_EPTP_ENTRIES  512
 
 static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
new file mode 100644
index 000000000000..0d04d8bf15a5
--- /dev/null
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_VMXFEATURES_H
+#define _ASM_X86_VMXFEATURES_H
+
+/*
+ * Defines VMX CPU feature bits
+ */
+#define NVMXINTS			3 /* N 32-bit words worth of info */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name.  If the string is "",
+ * this feature bit is not displayed in /proc/cpuinfo at all.
+ */
+
+/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */
+#define VMX_FEATURE_INTR_EXITING	( 0*32+  0) /* "" VM-Exit on vectored interrupts */
+#define VMX_FEATURE_NMI_EXITING		( 0*32+  3) /* "" VM-Exit on NMIs */
+#define VMX_FEATURE_VIRTUAL_NMIS	( 0*32+  5) /* "vnmi" NMI virtualization */
+#define VMX_FEATURE_PREEMPTION_TIMER	( 0*32+  6) /* VMX Preemption Timer */
+#define VMX_FEATURE_POSTED_INTR		( 0*32+  7) /* Posted Interrupts */
+
+/* EPT/VPID features, scattered to bits 16-23 */
+#define VMX_FEATURE_INVVPID		( 0*32+ 16) /* INVVPID is supported */
+#define VMX_FEATURE_EPT_EXECUTE_ONLY	( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */
+#define VMX_FEATURE_EPT_AD		( 0*32+ 18) /* EPT Accessed/Dirty bits */
+#define VMX_FEATURE_EPT_1GB		( 0*32+ 19) /* 1GB EPT pages */
+
+/* Aggregated APIC features 24-27 */
+#define VMX_FEATURE_FLEXPRIORITY	( 0*32+ 24) /* TPR shadow + virt APIC */
+#define VMX_FEATURE_APICV	        ( 0*32+ 25) /* TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */
+
+/* VM-Functions, shifted to bits 28-31 */
+#define VMX_FEATURE_EPTP_SWITCHING	( 0*32+ 28) /* EPTP switching (in guest) */
+
+/* Primary Processor-Based VM-Execution Controls, word 1 */
+#define VMX_FEATURE_VIRTUAL_INTR_PENDING ( 1*32+  2) /* "" VM-Exit if INTRs are unblocked in guest */
+#define VMX_FEATURE_TSC_OFFSETTING	( 1*32+  3) /* "tsc_offset" Offset hardware TSC when read in guest */
+#define VMX_FEATURE_HLT_EXITING		( 1*32+  7) /* "" VM-Exit on HLT */
+#define VMX_FEATURE_INVLPG_EXITING	( 1*32+  9) /* "" VM-Exit on INVLPG */
+#define VMX_FEATURE_MWAIT_EXITING	( 1*32+ 10) /* "" VM-Exit on MWAIT */
+#define VMX_FEATURE_RDPMC_EXITING	( 1*32+ 11) /* "" VM-Exit on RDPMC */
+#define VMX_FEATURE_RDTSC_EXITING	( 1*32+ 12) /* "" VM-Exit on RDTSC */
+#define VMX_FEATURE_CR3_LOAD_EXITING	( 1*32+ 15) /* "" VM-Exit on writes to CR3 */
+#define VMX_FEATURE_CR3_STORE_EXITING	( 1*32+ 16) /* "" VM-Exit on reads from CR3 */
+#define VMX_FEATURE_CR8_LOAD_EXITING	( 1*32+ 19) /* "" VM-Exit on writes to CR8 */
+#define VMX_FEATURE_CR8_STORE_EXITING	( 1*32+ 20) /* "" VM-Exit on reads from CR8 */
+#define VMX_FEATURE_VIRTUAL_TPR		( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */
+#define VMX_FEATURE_VIRTUAL_NMI_PENDING	( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */
+#define VMX_FEATURE_MOV_DR_EXITING	( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */
+#define VMX_FEATURE_UNCOND_IO_EXITING	( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/
+#define VMX_FEATURE_USE_IO_BITMAPS	( 1*32+ 25) /* "" VM-Exit based on I/O port */
+#define VMX_FEATURE_MONITOR_TRAP_FLAG	( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */
+#define VMX_FEATURE_USE_MSR_BITMAPS	( 1*32+ 28) /* "" VM-Exit based on MSR index */
+#define VMX_FEATURE_MONITOR_EXITING	( 1*32+ 29) /* "" VM-Exit on MONITOR (MWAIT's accomplice) */
+#define VMX_FEATURE_PAUSE_EXITING	( 1*32+ 30) /* "" VM-Exit on PAUSE (unconditionally) */
+#define VMX_FEATURE_SEC_CONTROLS	( 1*32+ 31) /* "" Enable Secondary VM-Execution Controls */
+
+/* Secondary Processor-Based VM-Execution Controls, word 2 */
+#define VMX_FEATURE_VIRT_APIC_ACCESSES	( 2*32+  0) /* "vapic" Virtualize memory mapped APIC accesses */
+#define VMX_FEATURE_EPT			( 2*32+  1) /* Extended Page Tables, a.k.a. Two-Dimensional Paging */
+#define VMX_FEATURE_DESC_EXITING	( 2*32+  2) /* "" VM-Exit on {S,L}*DT instructions */
+#define VMX_FEATURE_RDTSCP		( 2*32+  3) /* "" Enable RDTSCP in guest */
+#define VMX_FEATURE_VIRTUAL_X2APIC	( 2*32+  4) /* "" Virtualize X2APIC for the guest */
+#define VMX_FEATURE_VPID		( 2*32+  5) /* Virtual Processor ID (TLB ASID modifier) */
+#define VMX_FEATURE_WBINVD_EXITING	( 2*32+  6) /* "" VM-Exit on WBINVD */
+#define VMX_FEATURE_UNRESTRICTED_GUEST	( 2*32+  7) /* Allow Big Real Mode and other "invalid" states */
+#define VMX_FEATURE_APIC_REGISTER_VIRT	( 2*32+  8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */
+#define VMX_FEATURE_VIRT_INTR_DELIVERY	( 2*32+  9) /* "vid" Evaluation and delivery of pending virtual interrupts */
+#define VMX_FEATURE_PAUSE_LOOP_EXITING	( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */
+#define VMX_FEATURE_RDRAND_EXITING	( 2*32+ 11) /* "" VM-Exit on RDRAND*/
+#define VMX_FEATURE_INVPCID		( 2*32+ 12) /* "" Enable INVPCID in guest */
+#define VMX_FEATURE_VMFUNC		( 2*32+ 13) /* "" Enable VM-Functions (leaf dependent) */
+#define VMX_FEATURE_SHADOW_VMCS		( 2*32+ 14) /* VMREAD/VMWRITE in guest can access shadow VMCS */
+#define VMX_FEATURE_ENCLS_EXITING	( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */
+#define VMX_FEATURE_RDSEED_EXITING	( 2*32+ 16) /* "" VM-Exit on RDSEED */
+#define VMX_FEATURE_PAGE_MOD_LOGGING	( 2*32+ 17) /* "pml" Log dirty pages into buffer */
+#define VMX_FEATURE_EPT_VIOLATION_VE	( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */
+#define VMX_FEATURE_PT_CONCEAL_VMX	( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */
+#define VMX_FEATURE_XSAVES		( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */
+#define VMX_FEATURE_MODE_BASED_EPT_EXEC	( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
+#define VMX_FEATURE_PT_USE_GPA		( 2*32+ 24) /* "" Processor Trace logs GPAs */
+#define VMX_FEATURE_TSC_SCALING		( 2*32+ 25) /* Scale hardware TSC when read in guest */
+#define VMX_FEATURE_ENCLV_EXITING	( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
+
+#endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 32f5d9a0b90e..183e98e49ab9 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -19,10 +19,10 @@
 #ifndef _ASM_X86_VVAR_H
 #define _ASM_X86_VVAR_H
 
-#if defined(__VVAR_KERNEL_LDS)
-
-/* The kernel linker script defines its own magic to put vvars in the
- * right place.
+#ifdef EMIT_VVAR
+/*
+ * EMIT_VVAR() is used by the kernel linker script to put vvars in the
+ * right place. Also, it's used by kernel code to import offsets values.
  */
 #define DECLARE_VVAR(offset, type, name) \
 	EMIT_VVAR(name, offset)
@@ -33,9 +33,12 @@ extern char __vvar_page;
 
 #define DECLARE_VVAR(offset, type, name)				\
 	extern type vvar_ ## name[CS_BASES]				\
-	__attribute__((visibility("hidden")));
+	__attribute__((visibility("hidden")));				\
+	extern type timens_ ## name[CS_BASES]				\
+	__attribute__((visibility("hidden")));				\
 
 #define VVAR(name) (vvar_ ## name)
+#define TIMENS(name) (timens_ ## name)
 
 #define DEFINE_VVAR(type, name)						\
 	type name[CS_BASES]						\
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 42e1245af0d8..ff4b52e37e60 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -62,6 +62,4 @@ void xen_arch_register_cpu(int num);
 void xen_arch_unregister_cpu(int num);
 #endif
 
-extern void xen_set_iopl_mask(unsigned mask);
-
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 62ca03ef5c65..9139b3e86316 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -379,12 +379,9 @@ struct xen_pmu_arch {
  * Prefix forces emulation of some non-trapping instructions.
  * Currently only CPUID.
  */
-#ifdef __ASSEMBLY__
-#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
-#define XEN_CPUID          XEN_EMULATE_PREFIX cpuid
-#else
-#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
-#define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
-#endif
+#include <asm/emulate_prefix.h>
+
+#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
+#define XEN_CPUID          XEN_EMULATE_PREFIX __ASM_FORM(cpuid)
 
 #endif /* _ASM_X86_XEN_INTERFACE_H */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index c895df5482c5..8669c6bdbb84 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_BOOTPARAM_H
 #define _ASM_X86_BOOTPARAM_H
 
-/* setup_data types */
+/* setup_data/setup_indirect types */
 #define SETUP_NONE			0
 #define SETUP_E820_EXT			1
 #define SETUP_DTB			2
@@ -11,6 +11,11 @@
 #define SETUP_APPLE_PROPERTIES		5
 #define SETUP_JAILHOUSE			6
 
+#define SETUP_INDIRECT			(1<<31)
+
+/* SETUP_INDIRECT | max(SETUP_*) */
+#define SETUP_TYPE_MAX			(SETUP_INDIRECT | SETUP_JAILHOUSE)
+
 /* ram_size flags */
 #define RAMDISK_IMAGE_START_MASK	0x07FF
 #define RAMDISK_PROMPT_FLAG		0x8000
@@ -49,6 +54,14 @@ struct setup_data {
 	__u8 data[0];
 };
 
+/* extensible setup indirect data node */
+struct setup_indirect {
+	__u32 type;
+	__u32 reserved;  /* Reserved, must be set to zero. */
+	__u64 len;
+	__u64 addr;
+};
+
 struct setup_header {
 	__u8	setup_sects;
 	__u16	root_flags;
@@ -88,6 +101,7 @@ struct setup_header {
 	__u64	pref_address;
 	__u32	init_size;
 	__u32	handover_offset;
+	__u32	kernel_info_offset;
 } __attribute__((packed));
 
 struct sys_desc_table {
@@ -139,15 +153,22 @@ struct boot_e820_entry {
  * setup data structure.
  */
 struct jailhouse_setup_data {
-	__u16	version;
-	__u16	compatible_version;
-	__u16	pm_timer_address;
-	__u16	num_cpus;
-	__u64	pci_mmconfig_base;
-	__u32	tsc_khz;
-	__u32	apic_khz;
-	__u8	standard_ioapic;
-	__u8	cpu_ids[255];
+	struct {
+		__u16	version;
+		__u16	compatible_version;
+	} __attribute__((packed)) hdr;
+	struct {
+		__u16	pm_timer_address;
+		__u16	num_cpus;
+		__u64	pci_mmconfig_base;
+		__u32	tsc_khz;
+		__u32	apic_khz;
+		__u8	standard_ioapic;
+		__u8	cpu_ids[255];
+	} __attribute__((packed)) v1;
+	struct {
+		__u32	flags;
+	} __attribute__((packed)) v2;
 } __attribute__((packed));
 
 /* The so-called "zeropage" */
diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h
index 90ab9a795b49..b3d0664fadc9 100644
--- a/arch/x86/include/uapi/asm/msgbuf.h
+++ b/arch/x86/include/uapi/asm/msgbuf.h
@@ -5,6 +5,9 @@
 #if !defined(__x86_64__) || !defined(__ILP32__)
 #include <asm-generic/msgbuf.h>
 #else
+
+#include <asm/ipcbuf.h>
+
 /*
  * The msqid64_ds structure for x86 architecture with x32 ABI.
  *
@@ -15,9 +18,9 @@
 
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	__kernel_time_t msg_ctime;	/* last change time */
+	__kernel_long_t msg_stime;	/* last msgsnd time */
+	__kernel_long_t msg_rtime;	/* last msgrcv time */
+	__kernel_long_t msg_ctime;	/* last change time */
 	__kernel_ulong_t msg_cbytes;	/* current number of bytes on queue */
 	__kernel_ulong_t msg_qnum;	/* number of messages in queue */
 	__kernel_ulong_t msg_qbytes;	/* max number of bytes on queue */
diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h
index 89de6cd9f0a7..71205b02a191 100644
--- a/arch/x86/include/uapi/asm/sembuf.h
+++ b/arch/x86/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_SEMBUF_H
 #define _ASM_X86_SEMBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * The semid64_ds structure for x86 architecture.
  * Note extra padding because this structure is passed back and forth
@@ -21,9 +23,9 @@ struct semid64_ds {
 	unsigned long	sem_ctime;	/* last change time */
 	unsigned long	sem_ctime_high;
 #else
-	__kernel_time_t	sem_otime;	/* last semop time */
+	__kernel_long_t sem_otime;	/* last semop time */
 	__kernel_ulong_t __unused1;
-	__kernel_time_t	sem_ctime;	/* last change time */
+	__kernel_long_t sem_ctime;	/* last change time */
 	__kernel_ulong_t __unused2;
 #endif
 	__kernel_ulong_t sem_nsems;	/* no. of semaphores in array */
diff --git a/arch/x86/include/uapi/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h
index 644421f3823b..f0305dc660c9 100644
--- a/arch/x86/include/uapi/asm/shmbuf.h
+++ b/arch/x86/include/uapi/asm/shmbuf.h
@@ -16,9 +16,9 @@
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
 	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	__kernel_time_t		shm_ctime;	/* last change time */
+	__kernel_long_t		shm_atime;	/* last attach time */
+	__kernel_long_t		shm_dtime;	/* last detach time */
+	__kernel_long_t		shm_ctime;	/* last change time */
 	__kernel_pid_t		shm_cpid;	/* pid of creator */
 	__kernel_pid_t		shm_lpid;	/* pid of last operator */
 	__kernel_ulong_t	shm_nattch;	/* no. of current attaches */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3578ad248bc9..6175e370ee4a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,7 +100,9 @@ obj-$(CONFIG_KEXEC_FILE)	+= kexec-bzimage64.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-y				+= kprobes/
 obj-$(CONFIG_MODULES)		+= module.o
-obj-$(CONFIG_DOUBLEFAULT)	+= doublefault.o
+ifeq ($(CONFIG_X86_32),y)
+obj-$(CONFIG_DOUBLEFAULT)	+= doublefault_32.o
+endif
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_VM86)		+= vm86_32.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
@@ -134,7 +136,7 @@ obj-$(CONFIG_EFI)			+= sysfb_efi.o
 obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
 obj-$(CONFIG_TRACING)			+= tracepoint.o
 obj-$(CONFIG_SCHED_MC_PRIO)		+= itmt.o
-obj-$(CONFIG_X86_INTEL_UMIP)		+= umip.o
+obj-$(CONFIG_X86_UMIP)			+= umip.o
 
 obj-$(CONFIG_UNWINDER_ORC)		+= unwind_orc.o
 obj-$(CONFIG_UNWINDER_FRAME_POINTER)	+= unwind_frame.o
@@ -146,7 +148,6 @@ ifeq ($(CONFIG_X86_64),y)
 	obj-$(CONFIG_AUDIT)		+= audit_64.o
 
 	obj-$(CONFIG_GART_IOMMU)	+= amd_gart_64.o aperture_64.o
-	obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
 
 	obj-$(CONFIG_MMCONF_FAM10H)	+= mmconf-fam10h_64.o
 	obj-y				+= vsmp_64.o
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index ca13851f0570..26b7256f590f 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -27,6 +27,17 @@ static char temp_stack[4096];
 #endif
 
 /**
+ * acpi_get_wakeup_address - provide physical address for S3 wakeup
+ *
+ * Returns the physical address where the kernel should be resumed after the
+ * system awakes from S3, e.g. for programming into the firmware waking vector.
+ */
+unsigned long acpi_get_wakeup_address(void)
+{
+	return ((unsigned long)(real_mode_header->wakeup_start));
+}
+
+/**
  * x86_acpi_enter_sleep_state - enter sleep state
  * @state: Sleep state to enter.
  *
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index fbb60ca4255c..d06c2079b6c1 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -3,7 +3,7 @@
  *	Variables and functions used by the code in sleep.c
  */
 
-#include <asm/realmode.h>
+#include <linux/linkage.h>
 
 extern unsigned long saved_video_mode;
 extern long saved_magic;
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index e95e95960156..daf88f8143c5 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -9,8 +9,7 @@
 	.code32
 	ALIGN
 
-ENTRY(wakeup_pmode_return)
-wakeup_pmode_return:
+SYM_CODE_START(wakeup_pmode_return)
 	movw	$__KERNEL_DS, %ax
 	movw	%ax, %ss
 	movw	%ax, %fs
@@ -39,6 +38,7 @@ wakeup_pmode_return:
 	# jump to place where we left off
 	movl	saved_eip, %eax
 	jmp	*%eax
+SYM_CODE_END(wakeup_pmode_return)
 
 bogus_magic:
 	jmp	bogus_magic
@@ -72,7 +72,7 @@ restore_registers:
 	popfl
 	ret
 
-ENTRY(do_suspend_lowlevel)
+SYM_CODE_START(do_suspend_lowlevel)
 	call	save_processor_state
 	call	save_registers
 	pushl	$3
@@ -87,10 +87,11 @@ ret_point:
 	call	restore_registers
 	call	restore_processor_state
 	ret
+SYM_CODE_END(do_suspend_lowlevel)
 
 .data
 ALIGN
-ENTRY(saved_magic)	.long	0
+SYM_DATA(saved_magic,	.long 0)
 saved_eip:		.long 0
 
 # saved registers
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 7f9ade13bbcf..c8daa92f38dc 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -14,7 +14,7 @@
 	/*
 	 * Hooray, we are in Long 64-bit mode (but still running in low memory)
 	 */
-ENTRY(wakeup_long64)
+SYM_FUNC_START(wakeup_long64)
 	movq	saved_magic, %rax
 	movq	$0x123456789abcdef0, %rdx
 	cmpq	%rdx, %rax
@@ -40,9 +40,9 @@ ENTRY(wakeup_long64)
 
 	movq	saved_rip, %rax
 	jmp	*%rax
-ENDPROC(wakeup_long64)
+SYM_FUNC_END(wakeup_long64)
 
-ENTRY(do_suspend_lowlevel)
+SYM_FUNC_START(do_suspend_lowlevel)
 	FRAME_BEGIN
 	subq	$8, %rsp
 	xorl	%eax, %eax
@@ -125,7 +125,7 @@ ENTRY(do_suspend_lowlevel)
 	addq	$8, %rsp
 	FRAME_END
 	jmp	restore_processor_state
-ENDPROC(do_suspend_lowlevel)
+SYM_FUNC_END(do_suspend_lowlevel)
 
 .data
 saved_rbp:		.quad	0
@@ -136,4 +136,4 @@ saved_rbx:		.quad	0
 saved_rip:		.quad	0
 saved_rsp:		.quad	0
 
-ENTRY(saved_magic)	.quad	0
+SYM_DATA(saved_magic,	.quad	0)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9ae8e3cdf53f..15ac0d5f4b40 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -937,74 +937,139 @@ static void do_sync_core(void *info)
 	sync_core();
 }
 
-static struct bp_patching_desc {
+void text_poke_sync(void)
+{
+	on_each_cpu(do_sync_core, NULL, 1);
+}
+
+struct text_poke_loc {
+	s32 rel_addr; /* addr := _stext + rel_addr */
+	s32 rel32;
+	u8 opcode;
+	const u8 text[POKE_MAX_OPCODE_SIZE];
+};
+
+struct bp_patching_desc {
 	struct text_poke_loc *vec;
 	int nr_entries;
-} bp_patching;
+	atomic_t refs;
+};
+
+static struct bp_patching_desc *bp_desc;
 
-static int patch_cmp(const void *key, const void *elt)
+static inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
+{
+	struct bp_patching_desc *desc = READ_ONCE(*descp); /* rcu_dereference */
+
+	if (!desc || !atomic_inc_not_zero(&desc->refs))
+		return NULL;
+
+	return desc;
+}
+
+static inline void put_desc(struct bp_patching_desc *desc)
+{
+	smp_mb__before_atomic();
+	atomic_dec(&desc->refs);
+}
+
+static inline void *text_poke_addr(struct text_poke_loc *tp)
+{
+	return _stext + tp->rel_addr;
+}
+
+static int notrace patch_cmp(const void *key, const void *elt)
 {
 	struct text_poke_loc *tp = (struct text_poke_loc *) elt;
 
-	if (key < tp->addr)
+	if (key < text_poke_addr(tp))
 		return -1;
-	if (key > tp->addr)
+	if (key > text_poke_addr(tp))
 		return 1;
 	return 0;
 }
 NOKPROBE_SYMBOL(patch_cmp);
 
-int poke_int3_handler(struct pt_regs *regs)
+int notrace poke_int3_handler(struct pt_regs *regs)
 {
+	struct bp_patching_desc *desc;
 	struct text_poke_loc *tp;
-	unsigned char int3 = 0xcc;
+	int len, ret = 0;
 	void *ip;
 
+	if (user_mode(regs))
+		return 0;
+
 	/*
 	 * Having observed our INT3 instruction, we now must observe
-	 * bp_patching.nr_entries.
-	 *
-	 * 	nr_entries != 0			INT3
-	 * 	WMB				RMB
-	 * 	write INT3			if (nr_entries)
+	 * bp_desc:
 	 *
-	 * Idem for other elements in bp_patching.
+	 *	bp_desc = desc			INT3
+	 *	WMB				RMB
+	 *	write INT3			if (desc)
 	 */
 	smp_rmb();
 
-	if (likely(!bp_patching.nr_entries))
-		return 0;
-
-	if (user_mode(regs))
+	desc = try_get_desc(&bp_desc);
+	if (!desc)
 		return 0;
 
 	/*
-	 * Discount the sizeof(int3). See text_poke_bp_batch().
+	 * Discount the INT3. See text_poke_bp_batch().
 	 */
-	ip = (void *) regs->ip - sizeof(int3);
+	ip = (void *) regs->ip - INT3_INSN_SIZE;
 
 	/*
 	 * Skip the binary search if there is a single member in the vector.
 	 */
-	if (unlikely(bp_patching.nr_entries > 1)) {
-		tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
+	if (unlikely(desc->nr_entries > 1)) {
+		tp = bsearch(ip, desc->vec, desc->nr_entries,
 			     sizeof(struct text_poke_loc),
 			     patch_cmp);
 		if (!tp)
-			return 0;
+			goto out_put;
 	} else {
-		tp = bp_patching.vec;
-		if (tp->addr != ip)
-			return 0;
+		tp = desc->vec;
+		if (text_poke_addr(tp) != ip)
+			goto out_put;
 	}
 
-	/* set up the specified breakpoint detour */
-	regs->ip = (unsigned long) tp->detour;
+	len = text_opcode_size(tp->opcode);
+	ip += len;
 
-	return 1;
+	switch (tp->opcode) {
+	case INT3_INSN_OPCODE:
+		/*
+		 * Someone poked an explicit INT3, they'll want to handle it,
+		 * do not consume.
+		 */
+		goto out_put;
+
+	case CALL_INSN_OPCODE:
+		int3_emulate_call(regs, (long)ip + tp->rel32);
+		break;
+
+	case JMP32_INSN_OPCODE:
+	case JMP8_INSN_OPCODE:
+		int3_emulate_jmp(regs, (long)ip + tp->rel32);
+		break;
+
+	default:
+		BUG();
+	}
+
+	ret = 1;
+
+out_put:
+	put_desc(desc);
+	return ret;
 }
 NOKPROBE_SYMBOL(poke_int3_handler);
 
+#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
+static struct text_poke_loc tp_vec[TP_VEC_MAX];
+static int tp_vec_nr;
+
 /**
  * text_poke_bp_batch() -- update instructions on live kernel on SMP
  * @tp:			vector of instructions to patch
@@ -1015,7 +1080,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
  * synchronization using int3 breakpoint.
  *
  * The way it is done:
- * 	- For each entry in the vector:
+ *	- For each entry in the vector:
  *		- add a int3 trap to the address that will be patched
  *	- sync cores
  *	- For each entry in the vector:
@@ -1026,16 +1091,20 @@ NOKPROBE_SYMBOL(poke_int3_handler);
  *		  replacing opcode
  *	- sync cores
  */
-void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
+static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
 {
-	int patched_all_but_first = 0;
-	unsigned char int3 = 0xcc;
+	struct bp_patching_desc desc = {
+		.vec = tp,
+		.nr_entries = nr_entries,
+		.refs = ATOMIC_INIT(1),
+	};
+	unsigned char int3 = INT3_INSN_OPCODE;
 	unsigned int i;
+	int do_sync;
 
 	lockdep_assert_held(&text_mutex);
 
-	bp_patching.vec = tp;
-	bp_patching.nr_entries = nr_entries;
+	smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */
 
 	/*
 	 * Corresponding read barrier in int3 notifier for making sure the
@@ -1047,45 +1116,153 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
 	 * First step: add a int3 trap to the address that will be patched.
 	 */
 	for (i = 0; i < nr_entries; i++)
-		text_poke(tp[i].addr, &int3, sizeof(int3));
+		text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
 
-	on_each_cpu(do_sync_core, NULL, 1);
+	text_poke_sync();
 
 	/*
 	 * Second step: update all but the first byte of the patched range.
 	 */
-	for (i = 0; i < nr_entries; i++) {
-		if (tp[i].len - sizeof(int3) > 0) {
-			text_poke((char *)tp[i].addr + sizeof(int3),
-				  (const char *)tp[i].opcode + sizeof(int3),
-				  tp[i].len - sizeof(int3));
-			patched_all_but_first++;
+	for (do_sync = 0, i = 0; i < nr_entries; i++) {
+		int len = text_opcode_size(tp[i].opcode);
+
+		if (len - INT3_INSN_SIZE > 0) {
+			text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
+				  (const char *)tp[i].text + INT3_INSN_SIZE,
+				  len - INT3_INSN_SIZE);
+			do_sync++;
 		}
 	}
 
-	if (patched_all_but_first) {
+	if (do_sync) {
 		/*
 		 * According to Intel, this core syncing is very likely
 		 * not necessary and we'd be safe even without it. But
 		 * better safe than sorry (plus there's not only Intel).
 		 */
-		on_each_cpu(do_sync_core, NULL, 1);
+		text_poke_sync();
 	}
 
 	/*
 	 * Third step: replace the first byte (int3) by the first byte of
 	 * replacing opcode.
 	 */
-	for (i = 0; i < nr_entries; i++)
-		text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
+	for (do_sync = 0, i = 0; i < nr_entries; i++) {
+		if (tp[i].text[0] == INT3_INSN_OPCODE)
+			continue;
+
+		text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE);
+		do_sync++;
+	}
+
+	if (do_sync)
+		text_poke_sync();
 
-	on_each_cpu(do_sync_core, NULL, 1);
 	/*
-	 * sync_core() implies an smp_mb() and orders this store against
-	 * the writing of the new instruction.
+	 * Remove and synchronize_rcu(), except we have a very primitive
+	 * refcount based completion.
 	 */
-	bp_patching.vec = NULL;
-	bp_patching.nr_entries = 0;
+	WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */
+	if (!atomic_dec_and_test(&desc.refs))
+		atomic_cond_read_acquire(&desc.refs, !VAL);
+}
+
+void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+			const void *opcode, size_t len, const void *emulate)
+{
+	struct insn insn;
+
+	memcpy((void *)tp->text, opcode, len);
+	if (!emulate)
+		emulate = opcode;
+
+	kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
+	insn_get_length(&insn);
+
+	BUG_ON(!insn_complete(&insn));
+	BUG_ON(len != insn.length);
+
+	tp->rel_addr = addr - (void *)_stext;
+	tp->opcode = insn.opcode.bytes[0];
+
+	switch (tp->opcode) {
+	case INT3_INSN_OPCODE:
+		break;
+
+	case CALL_INSN_OPCODE:
+	case JMP32_INSN_OPCODE:
+	case JMP8_INSN_OPCODE:
+		tp->rel32 = insn.immediate.value;
+		break;
+
+	default: /* assume NOP */
+		switch (len) {
+		case 2: /* NOP2 -- emulate as JMP8+0 */
+			BUG_ON(memcmp(emulate, ideal_nops[len], len));
+			tp->opcode = JMP8_INSN_OPCODE;
+			tp->rel32 = 0;
+			break;
+
+		case 5: /* NOP5 -- emulate as JMP32+0 */
+			BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
+			tp->opcode = JMP32_INSN_OPCODE;
+			tp->rel32 = 0;
+			break;
+
+		default: /* unknown instruction */
+			BUG();
+		}
+		break;
+	}
+}
+
+/*
+ * We hard rely on the tp_vec being ordered; ensure this is so by flushing
+ * early if needed.
+ */
+static bool tp_order_fail(void *addr)
+{
+	struct text_poke_loc *tp;
+
+	if (!tp_vec_nr)
+		return false;
+
+	if (!addr) /* force */
+		return true;
+
+	tp = &tp_vec[tp_vec_nr - 1];
+	if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr)
+		return true;
+
+	return false;
+}
+
+static void text_poke_flush(void *addr)
+{
+	if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) {
+		text_poke_bp_batch(tp_vec, tp_vec_nr);
+		tp_vec_nr = 0;
+	}
+}
+
+void text_poke_finish(void)
+{
+	text_poke_flush(NULL);
+}
+
+void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate)
+{
+	struct text_poke_loc *tp;
+
+	if (unlikely(system_state == SYSTEM_BOOTING)) {
+		text_poke_early(addr, opcode, len);
+		return;
+	}
+
+	text_poke_flush(addr);
+
+	tp = &tp_vec[tp_vec_nr++];
+	text_poke_loc_init(tp, addr, opcode, len, emulate);
 }
 
 /**
@@ -1099,20 +1276,15 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
  * dynamically allocated memory. This function should be used when it is
  * not possible to allocate memory.
  */
-void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
 {
-	struct text_poke_loc tp = {
-		.detour = handler,
-		.addr = addr,
-		.len = len,
-	};
+	struct text_poke_loc tp;
 
-	if (len > POKE_MAX_OPCODE_SIZE) {
-		WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
+	if (unlikely(system_state == SYSTEM_BOOTING)) {
+		text_poke_early(addr, opcode, len);
 		return;
 	}
 
-	memcpy((void *)tp.opcode, opcode, len);
-
+	text_poke_loc_init(&tp, addr, opcode, len, emulate);
 	text_poke_bp_batch(&tp, 1);
 }
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index a6ac3712db8b..4e5f50236048 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -185,13 +185,13 @@ static void iommu_full(struct device *dev, size_t size, int dir)
 static inline int
 need_iommu(struct device *dev, unsigned long addr, size_t size)
 {
-	return force_iommu || !dma_capable(dev, addr, size);
+	return force_iommu || !dma_capable(dev, addr, size, true);
 }
 
 static inline int
 nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
 {
-	return !dma_capable(dev, addr, size);
+	return !dma_capable(dev, addr, size, true);
 }
 
 /* Map a single continuous physical area into the IOMMU.
@@ -510,10 +510,9 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
 	iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
 
 	if (iommu_size < 64*1024*1024) {
-		pr_warning(
-			"PCI-DMA: Warning: Small IOMMU %luMB."
+		pr_warn("PCI-DMA: Warning: Small IOMMU %luMB."
 			" Consider increasing the AGP aperture in BIOS\n",
-				iommu_size >> 20);
+			iommu_size >> 20);
 	}
 
 	return iommu_size;
@@ -665,8 +664,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info)
 
  nommu:
 	/* Should not happen anymore */
-	pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
-	       "falling back to iommu=soft.\n");
+	pr_warn("PCI-DMA: More than 4GB of RAM and no IOMMU - falling back to iommu=soft.\n");
 	return -1;
 }
 
@@ -733,8 +731,8 @@ int __init gart_iommu_init(void)
 	    !gart_iommu_aperture ||
 	    (no_agp && init_amd_gatt(&info) < 0)) {
 		if (max_pfn > MAX_DMA32_PFN) {
-			pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
-			pr_warning("falling back to iommu=soft.\n");
+			pr_warn("More than 4GB of memory but GART IOMMU not available.\n");
+			pr_warn("falling back to iommu=soft.\n");
 		}
 		return 0;
 	}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 251c795b4eb3..69aed0ebbdfc 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -22,6 +22,7 @@
 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
+#define PCI_DEVICE_ID_AMD_19H_DF_F4	0x1654
 
 /* Protect the PCI config register pairs used for SMN and DF indirect access. */
 static DEFINE_MUTEX(smn_mutex);
@@ -52,6 +53,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
 	{}
 };
 EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -66,6 +68,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
 	{}
 };
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 5da106f84e84..fe698f96617c 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -95,7 +95,7 @@ static inline void apbt_set_mapping(void)
 		printk(KERN_WARNING "No timer base from SFI, use default\n");
 		apbt_address = APBT_DEFAULT_BASE;
 	}
-	apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE);
+	apbt_virt_address = ioremap(apbt_address, APBT_MMAP_SIZE);
 	if (!apbt_virt_address) {
 		pr_debug("Failed mapping APBT phy address at %lu\n",\
 			 (unsigned long)apbt_address);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2b0faf86da1b..28446fa6bf18 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -780,8 +780,8 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
 
 	res = (((u64)deltapm) *  mult) >> 22;
 	do_div(res, 1000000);
-	pr_warning("APIC calibration not consistent "
-		   "with PM-Timer: %ldms instead of 100ms\n",(long)res);
+	pr_warn("APIC calibration not consistent "
+		"with PM-Timer: %ldms instead of 100ms\n", (long)res);
 
 	/* Correct the lapic counter value */
 	res = (((u64)(*delta)) * pm_100ms);
@@ -977,7 +977,7 @@ static int __init calibrate_APIC_clock(void)
 	 */
 	if (lapic_timer_period < (1000000 / HZ)) {
 		local_irq_enable();
-		pr_warning("APIC frequency too slow, disabling apic timer\n");
+		pr_warn("APIC frequency too slow, disabling apic timer\n");
 		return -1;
 	}
 
@@ -1021,7 +1021,7 @@ static int __init calibrate_APIC_clock(void)
 	local_irq_enable();
 
 	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
-		pr_warning("APIC timer disabled due to verification failure\n");
+		pr_warn("APIC timer disabled due to verification failure\n");
 		return -1;
 	}
 
@@ -1095,8 +1095,8 @@ static void local_apic_timer_interrupt(void)
 	 * spurious.
 	 */
 	if (!evt->event_handler) {
-		pr_warning("Spurious LAPIC timer interrupt on cpu %d\n",
-			   smp_processor_id());
+		pr_warn("Spurious LAPIC timer interrupt on cpu %d\n",
+			smp_processor_id());
 		/* Switch it off */
 		lapic_timer_shutdown(evt);
 		return;
@@ -1811,11 +1811,11 @@ static int __init setup_nox2apic(char *str)
 		int apicid = native_apic_msr_read(APIC_ID);
 
 		if (apicid >= 255) {
-			pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
-				   apicid);
+			pr_warn("Apicid: %08x, cannot enforce nox2apic\n",
+				apicid);
 			return 0;
 		}
-		pr_warning("x2apic already enabled.\n");
+		pr_warn("x2apic already enabled.\n");
 		__x2apic_disable();
 	}
 	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
@@ -1983,7 +1983,7 @@ static int __init apic_verify(void)
 	 */
 	features = cpuid_edx(1);
 	if (!(features & (1 << X86_FEATURE_APIC))) {
-		pr_warning("Could not enable APIC!\n");
+		pr_warn("Could not enable APIC!\n");
 		return -1;
 	}
 	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
@@ -2337,7 +2337,7 @@ static int cpuid_to_apicid[] = {
 #ifdef CONFIG_SMP
 /**
  * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
- * @id:	APIC ID to check
+ * @apicid: APIC ID to check
  */
 bool apic_id_is_primary_thread(unsigned int apicid)
 {
@@ -2410,9 +2410,8 @@ int generic_processor_info(int apicid, int version)
 	    disabled_cpu_apicid == apicid) {
 		int thiscpu = num_processors + disabled_cpus;
 
-		pr_warning("APIC: Disabling requested cpu."
-			   " Processor %d/0x%x ignored.\n",
-			   thiscpu, apicid);
+		pr_warn("APIC: Disabling requested cpu."
+			" Processor %d/0x%x ignored.\n", thiscpu, apicid);
 
 		disabled_cpus++;
 		return -ENODEV;
@@ -2426,8 +2425,7 @@ int generic_processor_info(int apicid, int version)
 	    apicid != boot_cpu_physical_apicid) {
 		int thiscpu = max + disabled_cpus - 1;
 
-		pr_warning(
-			"APIC: NR_CPUS/possible_cpus limit of %i almost"
+		pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost"
 			" reached. Keeping one slot for boot cpu."
 			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
 
@@ -2438,9 +2436,8 @@ int generic_processor_info(int apicid, int version)
 	if (num_processors >= nr_cpu_ids) {
 		int thiscpu = max + disabled_cpus;
 
-		pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
-			   "reached. Processor %d/0x%x ignored.\n",
-			   max, thiscpu, apicid);
+		pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. "
+			"Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
 
 		disabled_cpus++;
 		return -EINVAL;
@@ -2470,13 +2467,13 @@ int generic_processor_info(int apicid, int version)
 	 * Validate version
 	 */
 	if (version == 0x0) {
-		pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
-			   cpu, apicid);
+		pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
+			cpu, apicid);
 		version = 0x10;
 	}
 
 	if (version != boot_cpu_apic_version) {
-		pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+		pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
 			boot_cpu_apic_version, cpu, version);
 	}
 
@@ -2845,7 +2842,7 @@ static int __init apic_set_verbosity(char *arg)
 		apic_verbosity = APIC_VERBOSE;
 #ifdef CONFIG_X86_64
 	else {
-		pr_warning("APIC Verbosity level %s not recognised"
+		pr_warn("APIC Verbosity level %s not recognised"
 			" use apic=verbose or apic=debug\n", arg);
 		return -EINVAL;
 	}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d6af97fd170a..913c88617848 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1725,19 +1725,20 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data)
 	return false;
 }
 
-static inline bool ioapic_irqd_mask(struct irq_data *data)
+static inline bool ioapic_prepare_move(struct irq_data *data)
 {
-	/* If we are moving the irq we need to mask it */
+	/* If we are moving the IRQ we need to mask it */
 	if (unlikely(irqd_is_setaffinity_pending(data))) {
-		mask_ioapic_irq(data);
+		if (!irqd_irq_masked(data))
+			mask_ioapic_irq(data);
 		return true;
 	}
 	return false;
 }
 
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
 {
-	if (unlikely(masked)) {
+	if (unlikely(moveit)) {
 		/* Only migrate the irq if the ack has been received.
 		 *
 		 * On rare occasions the broadcast level triggered ack gets
@@ -1766,15 +1767,17 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
 		 */
 		if (!io_apic_level_ack_pending(data->chip_data))
 			irq_move_masked_irq(data);
-		unmask_ioapic_irq(data);
+		/* If the IRQ is masked in the core, leave it: */
+		if (!irqd_irq_masked(data))
+			unmask_ioapic_irq(data);
 	}
 }
 #else
-static inline bool ioapic_irqd_mask(struct irq_data *data)
+static inline bool ioapic_prepare_move(struct irq_data *data)
 {
 	return false;
 }
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
 {
 }
 #endif
@@ -1783,11 +1786,11 @@ static void ioapic_ack_level(struct irq_data *irq_data)
 {
 	struct irq_cfg *cfg = irqd_cfg(irq_data);
 	unsigned long v;
-	bool masked;
+	bool moveit;
 	int i;
 
 	irq_complete_move(cfg);
-	masked = ioapic_irqd_mask(irq_data);
+	moveit = ioapic_prepare_move(irq_data);
 
 	/*
 	 * It appears there is an erratum which affects at least version 0x11
@@ -1842,7 +1845,7 @@ static void ioapic_ack_level(struct irq_data *irq_data)
 		eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
 	}
 
-	ioapic_irqd_unmask(irq_data, masked);
+	ioapic_finish_move(irq_data, moveit);
 }
 
 static void ioapic_ir_ack_level(struct irq_data *irq_data)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index e6230af19864..ad53b2abc859 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -14,6 +14,8 @@
 #include <linux/memory.h>
 #include <linux/export.h>
 #include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
 
 #include <asm/e820/api.h>
 #include <asm/uv/uv_mmrs.h>
@@ -25,12 +27,17 @@
 static DEFINE_PER_CPU(int, x2apic_extra_bits);
 
 static enum uv_system_type	uv_system_type;
-static bool			uv_hubless_system;
+static int			uv_hubbed_system;
+static int			uv_hubless_system;
 static u64			gru_start_paddr, gru_end_paddr;
 static u64			gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
 static u64			gru_dist_lmask, gru_dist_umask;
 static union uvh_apicid		uvh_apicid;
 
+/* Unpack OEM/TABLE ID's to be NULL terminated strings */
+static u8 oem_id[ACPI_OEM_ID_SIZE + 1];
+static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
+
 /* Information derived from CPUID: */
 static struct {
 	unsigned int apicid_shift;
@@ -248,17 +255,35 @@ static void __init uv_set_apicid_hibit(void)
 	}
 }
 
-static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static void __init uv_stringify(int len, char *to, char *from)
+{
+	/* Relies on 'to' being NULL chars so result will be NULL terminated */
+	strncpy(to, from, len-1);
+}
+
+static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
 {
 	int pnodeid;
 	int uv_apic;
 
+	uv_stringify(sizeof(oem_id), oem_id, _oem_id);
+	uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);
+
 	if (strncmp(oem_id, "SGI", 3) != 0) {
-		if (strncmp(oem_id, "NSGI", 4) == 0) {
-			uv_hubless_system = true;
-			pr_info("UV: OEM IDs %s/%s, HUBLESS\n",
-				oem_id, oem_table_id);
-		}
+		if (strncmp(oem_id, "NSGI", 4) != 0)
+			return 0;
+
+		/* UV4 Hubless, CH, (0x11:UV4+Any) */
+		if (strncmp(oem_id, "NSGI4", 5) == 0)
+			uv_hubless_system = 0x11;
+
+		/* UV3 Hubless, UV300/MC990X w/o hub (0x9:UV3+Any) */
+		else
+			uv_hubless_system = 0x9;
+
+		pr_info("UV: OEM IDs %s/%s, HUBLESS(0x%x)\n",
+			oem_id, oem_table_id, uv_hubless_system);
+
 		return 0;
 	}
 
@@ -286,6 +311,24 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	if (uv_hub_info->hub_revision == 0)
 		goto badbios;
 
+	switch (uv_hub_info->hub_revision) {
+	case UV4_HUB_REVISION_BASE:
+		uv_hubbed_system = 0x11;
+		break;
+
+	case UV3_HUB_REVISION_BASE:
+		uv_hubbed_system = 0x9;
+		break;
+
+	case UV2_HUB_REVISION_BASE:
+		uv_hubbed_system = 0x5;
+		break;
+
+	case UV1_HUB_REVISION_BASE:
+		uv_hubbed_system = 0x3;
+		break;
+	}
+
 	pnodeid = early_get_pnodeid();
 	early_get_apic_socketid_shift();
 
@@ -336,9 +379,15 @@ int is_uv_system(void)
 }
 EXPORT_SYMBOL_GPL(is_uv_system);
 
-int is_uv_hubless(void)
+int is_uv_hubbed(int uvtype)
+{
+	return (uv_hubbed_system & uvtype);
+}
+EXPORT_SYMBOL_GPL(is_uv_hubbed);
+
+int is_uv_hubless(int uvtype)
 {
-	return uv_hubless_system;
+	return (uv_hubless_system & uvtype);
 }
 EXPORT_SYMBOL_GPL(is_uv_hubless);
 
@@ -1255,7 +1304,8 @@ static int __init decode_uv_systab(void)
 	struct uv_systab *st;
 	int i;
 
-	if (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE)
+	/* If system is uv3 or lower, there is no extended UVsystab */
+	if (is_uv_hubbed(0xfffffe) < uv(4) && is_uv_hubless(0xfffffe) < uv(4))
 		return 0;	/* No extended UVsystab required */
 
 	st = uv_systab;
@@ -1434,6 +1484,72 @@ static void __init build_socket_tables(void)
 	}
 }
 
+/* Check which reboot to use */
+static void check_efi_reboot(void)
+{
+	/* If EFI reboot not available, use ACPI reboot */
+	if (!efi_enabled(EFI_BOOT))
+		reboot_type = BOOT_ACPI;
+}
+
+/* Setup user proc fs files */
+static int __maybe_unused proc_hubbed_show(struct seq_file *file, void *data)
+{
+	seq_printf(file, "0x%x\n", uv_hubbed_system);
+	return 0;
+}
+
+static int __maybe_unused proc_hubless_show(struct seq_file *file, void *data)
+{
+	seq_printf(file, "0x%x\n", uv_hubless_system);
+	return 0;
+}
+
+static int __maybe_unused proc_oemid_show(struct seq_file *file, void *data)
+{
+	seq_printf(file, "%s/%s\n", oem_id, oem_table_id);
+	return 0;
+}
+
+static __init void uv_setup_proc_files(int hubless)
+{
+	struct proc_dir_entry *pde;
+
+	pde = proc_mkdir(UV_PROC_NODE, NULL);
+	proc_create_single("oemid", 0, pde, proc_oemid_show);
+	if (hubless)
+		proc_create_single("hubless", 0, pde, proc_hubless_show);
+	else
+		proc_create_single("hubbed", 0, pde, proc_hubbed_show);
+}
+
+/* Initialize UV hubless systems */
+static __init int uv_system_init_hubless(void)
+{
+	int rc;
+
+	/* Setup PCH NMI handler */
+	uv_nmi_setup_hubless();
+
+	/* Init kernel/BIOS interface */
+	rc = uv_bios_init();
+	if (rc < 0)
+		return rc;
+
+	/* Process UVsystab */
+	rc = decode_uv_systab();
+	if (rc < 0)
+		return rc;
+
+	/* Create user access node */
+	if (rc >= 0)
+		uv_setup_proc_files(1);
+
+	check_efi_reboot();
+
+	return rc;
+}
+
 static void __init uv_system_init_hub(void)
 {
 	struct uv_hub_info_s hub_info = {0};
@@ -1559,32 +1675,27 @@ static void __init uv_system_init_hub(void)
 	uv_nmi_setup();
 	uv_cpu_init();
 	uv_scir_register_cpu_notifier();
-	proc_mkdir("sgi_uv", NULL);
+	uv_setup_proc_files(0);
 
 	/* Register Legacy VGA I/O redirection handler: */
 	pci_register_set_vga_state(uv_set_vga_state);
 
-	/*
-	 * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as
-	 * EFI is not enabled in the kdump kernel:
-	 */
-	if (is_kdump_kernel())
-		reboot_type = BOOT_ACPI;
+	check_efi_reboot();
 }
 
 /*
- * There is a small amount of UV specific code needed to initialize a
- * UV system that does not have a "UV HUB" (referred to as "hubless").
+ * There is a different code path needed to initialize a UV system that does
+ * not have a "UV HUB" (referred to as "hubless").
  */
 void __init uv_system_init(void)
 {
-	if (likely(!is_uv_system() && !is_uv_hubless()))
+	if (likely(!is_uv_system() && !is_uv_hubless(1)))
 		return;
 
 	if (is_uv_system())
 		uv_system_init_hub();
 	else
-		uv_nmi_setup_hubless();
+		uv_system_init_hubless();
 }
 
 apic_driver(apic_x2apic_uv_x);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 890f60083eca..7dc4ad68eb41 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -29,6 +29,7 @@ obj-y			+= umwait.o
 obj-$(CONFIG_PROC_FS)	+= proc.o
 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
 
+obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o
 ifdef CONFIG_CPU_SUP_INTEL
 obj-y			+= intel.o intel_pconfig.o tsx.o
 obj-$(CONFIG_PM)	+= intel_epb.o
@@ -53,11 +54,12 @@ obj-$(CONFIG_ACRN_GUEST)		+= acrn.o
 
 ifdef CONFIG_X86_FEATURE_NAMES
 quiet_cmd_mkcapflags = MKCAP   $@
-      cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
+      cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $@ $^
 
 cpufeature = $(src)/../../include/asm/cpufeatures.h
+vmxfeature = $(src)/../../include/asm/vmxfeatures.h
 
-$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
+$(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/mkcapflags.sh FORCE
 	$(call if_changed,mkcapflags)
 endif
 targets += capflags.c
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 90f75e515876..ac83a0fef628 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -319,13 +319,6 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c)
 	c->cpu_core_id %= cus_per_node;
 }
 
-
-static void amd_get_topology_early(struct cpuinfo_x86 *c)
-{
-	if (cpu_has(c, X86_FEATURE_TOPOEXT))
-		smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
-}
-
 /*
  * Fixup core topology information for
  * (1) AMD multi-node processors
@@ -615,9 +608,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
 		return;
 
 clear_all:
-		clear_cpu_cap(c, X86_FEATURE_SME);
+		setup_clear_cpu_cap(X86_FEATURE_SME);
 clear_sev:
-		clear_cpu_cap(c, X86_FEATURE_SEV);
+		setup_clear_cpu_cap(X86_FEATURE_SEV);
 	}
 }
 
@@ -717,7 +710,8 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 		}
 	}
 
-	amd_get_topology_early(c);
+	if (cpu_has(c, X86_FEATURE_TOPOEXT))
+		smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
 }
 
 static void init_amd_k8(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4c7b0fa15a19..ed54b3b21c39 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -39,6 +39,7 @@ static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
 static void __init l1tf_select_mitigation(void);
 static void __init mds_select_mitigation(void);
+static void __init mds_print_mitigation(void);
 static void __init taa_select_mitigation(void);
 
 /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
@@ -108,6 +109,12 @@ void __init check_bugs(void)
 	mds_select_mitigation();
 	taa_select_mitigation();
 
+	/*
+	 * As MDS and TAA mitigations are inter-related, print MDS
+	 * mitigation until after TAA mitigation selection is done.
+	 */
+	mds_print_mitigation();
+
 	arch_smt_update();
 
 #ifdef CONFIG_X86_32
@@ -245,6 +252,12 @@ static void __init mds_select_mitigation(void)
 		    (mds_nosmt || cpu_mitigations_auto_nosmt()))
 			cpu_smt_disable(false);
 	}
+}
+
+static void __init mds_print_mitigation(void)
+{
+	if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off())
+		return;
 
 	pr_info("%s\n", mds_strings[mds_mitigation]);
 }
@@ -273,6 +286,13 @@ early_param("mds", mds_cmdline);
 #undef pr_fmt
 #define pr_fmt(fmt)	"TAA: " fmt
 
+enum taa_mitigations {
+	TAA_MITIGATION_OFF,
+	TAA_MITIGATION_UCODE_NEEDED,
+	TAA_MITIGATION_VERW,
+	TAA_MITIGATION_TSX_DISABLED,
+};
+
 /* Default mitigation for TAA-affected CPUs */
 static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW;
 static bool taa_nosmt __ro_after_init;
@@ -304,8 +324,12 @@ static void __init taa_select_mitigation(void)
 		return;
 	}
 
-	/* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
-	if (taa_mitigation == TAA_MITIGATION_OFF)
+	/*
+	 * TAA mitigation via VERW is turned off if both
+	 * tsx_async_abort=off and mds=off are specified.
+	 */
+	if (taa_mitigation == TAA_MITIGATION_OFF &&
+	    mds_mitigation == MDS_MITIGATION_OFF)
 		goto out;
 
 	if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
@@ -339,6 +363,15 @@ static void __init taa_select_mitigation(void)
 	if (taa_nosmt || cpu_mitigations_auto_nosmt())
 		cpu_smt_disable(false);
 
+	/*
+	 * Update MDS mitigation, if necessary, as the mds_user_clear is
+	 * now enabled for TAA mitigation.
+	 */
+	if (mds_mitigation == MDS_MITIGATION_OFF &&
+	    boot_cpu_has_bug(X86_BUG_MDS)) {
+		mds_mitigation = MDS_MITIGATION_FULL;
+		mds_select_mitigation();
+	}
 out:
 	pr_info("%s\n", taa_strings[taa_mitigation]);
 }
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 14433ff5b828..426792565d86 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -18,13 +18,6 @@
 #define RNG_ENABLED	(1 << 3)
 #define RNG_ENABLE	(1 << 6)	/* MSR_VIA_RNG */
 
-#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW	0x00200000
-#define X86_VMX_FEATURE_PROC_CTLS_VNMI		0x00400000
-#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS	0x80000000
-#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC	0x00000001
-#define X86_VMX_FEATURE_PROC_CTLS2_EPT		0x00000002
-#define X86_VMX_FEATURE_PROC_CTLS2_VPID		0x00000020
-
 static void init_c3(struct cpuinfo_x86 *c)
 {
 	u32  lo, hi;
@@ -71,8 +64,6 @@ static void init_c3(struct cpuinfo_x86 *c)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 	}
-
-	cpu_detect_cache_sizes(c);
 }
 
 enum {
@@ -119,31 +110,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
 	}
 }
 
-static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c)
-{
-	u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
-
-	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
-	msr_ctl = vmx_msr_high | vmx_msr_low;
-
-	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
-		set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
-	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
-		set_cpu_cap(c, X86_FEATURE_VNMI);
-	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
-		rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
-		      vmx_msr_low, vmx_msr_high);
-		msr_ctl2 = vmx_msr_high | vmx_msr_low;
-		if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
-		    (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
-			set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
-		if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
-			set_cpu_cap(c, X86_FEATURE_EPT);
-		if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
-			set_cpu_cap(c, X86_FEATURE_VPID);
-	}
-}
-
 static void init_centaur(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_X86_32
@@ -250,8 +216,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
 	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 #endif
 
-	if (cpu_has(c, X86_FEATURE_VMX))
-		centaur_detect_vmx_virtcap(c);
+	init_ia32_feat_ctl(c);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index be2c85bf9c56..52c9bfbbdb2a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -14,6 +14,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/task.h>
+#include <linux/sched/smt.h>
 #include <linux/init.h>
 #include <linux/kprobes.h>
 #include <linux/kgdb.h>
@@ -24,6 +25,7 @@
 #include <asm/stackprotector.h>
 #include <asm/perf_event.h>
 #include <asm/mmu_context.h>
+#include <asm/doublefault.h>
 #include <asm/archrandom.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
@@ -48,15 +50,12 @@
 #include <asm/cpu.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
 #include <asm/microcode.h>
 #include <asm/microcode_intel.h>
 #include <asm/intel-family.h>
 #include <asm/cpu_device_id.h>
-
-#ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/uv/uv.h>
-#endif
 
 #include "cpu.h"
 
@@ -547,8 +546,9 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
 	return NULL;		/* Not found */
 }
 
-__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
-__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
+/* Aligned to unsigned long to avoid split lock in atomic bitmap ops */
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
 
 void load_percpu_segment(int cpu)
 {
@@ -1006,6 +1006,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 #define MSBDS_ONLY		BIT(5)
 #define NO_SWAPGS		BIT(6)
 #define NO_ITLB_MULTIHIT	BIT(7)
+#define NO_SPECTRE_V2		BIT(8)
 
 #define VULNWL(_vendor, _family, _model, _whitelist)	\
 	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
@@ -1067,6 +1068,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 	/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
 	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 	VULNWL_HYGON(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+
+	/* Zhaoxin Family 7 */
+	VULNWL(CENTAUR,	7, X86_MODEL_ANY,	NO_SPECTRE_V2 | NO_SWAPGS),
+	VULNWL(ZHAOXIN,	7, X86_MODEL_ANY,	NO_SPECTRE_V2 | NO_SWAPGS),
 	{}
 };
 
@@ -1099,7 +1104,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 		return;
 
 	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
-	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
+	if (!cpu_matches(NO_SPECTRE_V2))
+		setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
 
 	if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
 	   !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
@@ -1432,6 +1439,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 #endif
 	c->x86_cache_alignment = c->x86_clflush_size;
 	memset(&c->x86_capability, 0, sizeof(c->x86_capability));
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+	memset(&c->vmx_capability, 0, sizeof(c->vmx_capability));
+#endif
 
 	generic_identify(c);
 
@@ -1762,7 +1772,7 @@ static void wait_for_master_cpu(int cpu)
 }
 
 #ifdef CONFIG_X86_64
-static void setup_getcpu(int cpu)
+static inline void setup_getcpu(int cpu)
 {
 	unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
 	struct desc_struct d = { };
@@ -1782,7 +1792,50 @@ static void setup_getcpu(int cpu)
 
 	write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S);
 }
+
+static inline void ucode_cpu_init(int cpu)
+{
+	if (cpu)
+		load_ucode_ap();
+}
+
+static inline void tss_setup_ist(struct tss_struct *tss)
+{
+	/* Set up the per-CPU TSS IST stacks */
+	tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
+	tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
+	tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
+	tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
+}
+
+#else /* CONFIG_X86_64 */
+
+static inline void setup_getcpu(int cpu) { }
+
+static inline void ucode_cpu_init(int cpu)
+{
+	show_ucode_info_early();
+}
+
+static inline void tss_setup_ist(struct tss_struct *tss) { }
+
+#endif /* !CONFIG_X86_64 */
+
+static inline void tss_setup_io_bitmap(struct tss_struct *tss)
+{
+	tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+	tss->io_bitmap.prev_max = 0;
+	tss->io_bitmap.prev_sequence = 0;
+	memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap));
+	/*
+	 * Invalidate the extra array entry past the end of the all
+	 * permission bitmap as required by the hardware.
+	 */
+	tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL;
 #endif
+}
 
 /*
  * cpu_init() initializes state that is per-CPU. Some data is already
@@ -1790,21 +1843,15 @@ static void setup_getcpu(int cpu)
  * and IDT. We reload them nevertheless, this function acts as a
  * 'CPU state barrier', nothing should get across.
  */
-#ifdef CONFIG_X86_64
-
 void cpu_init(void)
 {
+	struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+	struct task_struct *cur = current;
 	int cpu = raw_smp_processor_id();
-	struct task_struct *me;
-	struct tss_struct *t;
-	int i;
 
 	wait_for_master_cpu(cpu);
 
-	if (cpu)
-		load_ucode_ap();
-
-	t = &per_cpu(cpu_tss_rw, cpu);
+	ucode_cpu_init(cpu);
 
 #ifdef CONFIG_NUMA
 	if (this_cpu_read(numa_node) == 0 &&
@@ -1813,63 +1860,47 @@ void cpu_init(void)
 #endif
 	setup_getcpu(cpu);
 
-	me = current;
-
 	pr_debug("Initializing CPU#%d\n", cpu);
 
-	cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+	if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) ||
+	    boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE))
+		cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
 	/*
 	 * Initialize the per-CPU GDT with the boot GDT,
 	 * and set up the GDT descriptor:
 	 */
-
 	switch_to_new_gdt(cpu);
-	loadsegment(fs, 0);
-
 	load_current_idt();
 
-	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
-	syscall_init();
+	if (IS_ENABLED(CONFIG_X86_64)) {
+		loadsegment(fs, 0);
+		memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
+		syscall_init();
 
-	wrmsrl(MSR_FS_BASE, 0);
-	wrmsrl(MSR_KERNEL_GS_BASE, 0);
-	barrier();
+		wrmsrl(MSR_FS_BASE, 0);
+		wrmsrl(MSR_KERNEL_GS_BASE, 0);
+		barrier();
 
-	x86_configure_nx();
-	x2apic_setup();
-
-	/*
-	 * set up and load the per-CPU TSS
-	 */
-	if (!t->x86_tss.ist[0]) {
-		t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
-		t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
-		t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
-		t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
+		x2apic_setup();
 	}
 
-	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
-
-	/*
-	 * <= is required because the CPU will access up to
-	 * 8 bits beyond the end of the IO permission bitmap.
-	 */
-	for (i = 0; i <= IO_BITMAP_LONGS; i++)
-		t->io_bitmap[i] = ~0UL;
-
 	mmgrab(&init_mm);
-	me->active_mm = &init_mm;
-	BUG_ON(me->mm);
+	cur->active_mm = &init_mm;
+	BUG_ON(cur->mm);
 	initialize_tlbstate_and_flush();
-	enter_lazy_tlb(&init_mm, me);
+	enter_lazy_tlb(&init_mm, cur);
 
-	/*
-	 * Initialize the TSS.  sp0 points to the entry trampoline stack
-	 * regardless of what task is running.
-	 */
+	/* Initialize the TSS. */
+	tss_setup_ist(tss);
+	tss_setup_io_bitmap(tss);
 	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+
 	load_TR_desc();
+	/*
+	 * sp0 points to the entry trampoline stack regardless of what task
+	 * is running.
+	 */
 	load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
 
 	load_mm_ldt(&init_mm);
@@ -1877,6 +1908,8 @@ void cpu_init(void)
 	clear_all_debug_regs();
 	dbg_restore_debug_regs();
 
+	doublefault_init_cpu_tss();
+
 	fpu__init_cpu();
 
 	if (is_uv_system())
@@ -1885,63 +1918,6 @@ void cpu_init(void)
 	load_fixmap_gdt(cpu);
 }
 
-#else
-
-void cpu_init(void)
-{
-	int cpu = smp_processor_id();
-	struct task_struct *curr = current;
-	struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
-
-	wait_for_master_cpu(cpu);
-
-	show_ucode_info_early();
-
-	pr_info("Initializing CPU#%d\n", cpu);
-
-	if (cpu_feature_enabled(X86_FEATURE_VME) ||
-	    boot_cpu_has(X86_FEATURE_TSC) ||
-	    boot_cpu_has(X86_FEATURE_DE))
-		cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
-
-	load_current_idt();
-	switch_to_new_gdt(cpu);
-
-	/*
-	 * Set up and load the per-CPU TSS and LDT
-	 */
-	mmgrab(&init_mm);
-	curr->active_mm = &init_mm;
-	BUG_ON(curr->mm);
-	initialize_tlbstate_and_flush();
-	enter_lazy_tlb(&init_mm, curr);
-
-	/*
-	 * Initialize the TSS.  sp0 points to the entry trampoline stack
-	 * regardless of what task is running.
-	 */
-	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
-	load_TR_desc();
-	load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
-
-	load_mm_ldt(&init_mm);
-
-	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
-
-#ifdef CONFIG_DOUBLEFAULT
-	/* Set up doublefault TSS pointer in the GDT */
-	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
-#endif
-
-	clear_all_debug_regs();
-	dbg_restore_debug_regs();
-
-	fpu__init_cpu();
-
-	load_fixmap_gdt(cpu);
-}
-#endif
-
 /*
  * The microcode loader calls this upon late microcode load to recheck features,
  * only when microcode has been updated. Caller holds microcode_mutex and CPU
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 38ab6e115eac..37fdefd14f28 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -80,4 +80,8 @@ extern void x86_spec_ctrl_setup_ap(void);
 
 extern u64 x86_read_arch_cap_msr(void);
 
+#ifdef CONFIG_IA32_FEAT_CTL
+void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
+#endif
+
 #endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c
new file mode 100644
index 000000000000..0268185bef94
--- /dev/null
+++ b/arch/x86/kernel/cpu/feat_ctl.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/tboot.h>
+
+#include <asm/cpufeature.h>
+#include <asm/msr-index.h>
+#include <asm/processor.h>
+#include <asm/vmx.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt)	"x86/cpu: " fmt
+
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+enum vmx_feature_leafs {
+	MISC_FEATURES = 0,
+	PRIMARY_CTLS,
+	SECONDARY_CTLS,
+	NR_VMX_FEATURE_WORDS,
+};
+
+#define VMX_F(x) BIT(VMX_FEATURE_##x & 0x1f)
+
+static void init_vmx_capabilities(struct cpuinfo_x86 *c)
+{
+	u32 supported, funcs, ept, vpid, ign;
+
+	BUILD_BUG_ON(NVMXINTS != NR_VMX_FEATURE_WORDS);
+
+	/*
+	 * The high bits contain the allowed-1 settings, i.e. features that can
+	 * be turned on.  The low bits contain the allowed-0 settings, i.e.
+	 * features that can be turned off.  Ignore the allowed-0 settings,
+	 * if a feature can be turned on then it's supported.
+	 *
+	 * Use raw rdmsr() for primary processor controls and pin controls MSRs
+	 * as they exist on any CPU that supports VMX, i.e. we want the WARN if
+	 * the RDMSR faults.
+	 */
+	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, ign, supported);
+	c->vmx_capability[PRIMARY_CTLS] = supported;
+
+	rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported);
+	c->vmx_capability[SECONDARY_CTLS] = supported;
+
+	rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported);
+	rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs);
+
+	/*
+	 * Except for EPT+VPID, which enumerates support for both in a single
+	 * MSR, low for EPT, high for VPID.
+	 */
+	rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, &ept, &vpid);
+
+	/* Pin, EPT, VPID and VM-Func are merged into a single word. */
+	WARN_ON_ONCE(supported >> 16);
+	WARN_ON_ONCE(funcs >> 4);
+	c->vmx_capability[MISC_FEATURES] = (supported & 0xffff) |
+					   ((vpid & 0x1) << 16) |
+					   ((funcs & 0xf) << 28);
+
+	/* EPT bits are full on scattered and must be manually handled. */
+	if (ept & VMX_EPT_EXECUTE_ONLY_BIT)
+		c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_EXECUTE_ONLY);
+	if (ept & VMX_EPT_AD_BIT)
+		c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_AD);
+	if (ept & VMX_EPT_1GB_PAGE_BIT)
+		c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_1GB);
+
+	/* Synthetic APIC features that are aggregates of multiple features. */
+	if ((c->vmx_capability[PRIMARY_CTLS] & VMX_F(VIRTUAL_TPR)) &&
+	    (c->vmx_capability[SECONDARY_CTLS] & VMX_F(VIRT_APIC_ACCESSES)))
+		c->vmx_capability[MISC_FEATURES] |= VMX_F(FLEXPRIORITY);
+
+	if ((c->vmx_capability[PRIMARY_CTLS] & VMX_F(VIRTUAL_TPR)) &&
+	    (c->vmx_capability[SECONDARY_CTLS] & VMX_F(APIC_REGISTER_VIRT)) &&
+	    (c->vmx_capability[SECONDARY_CTLS] & VMX_F(VIRT_INTR_DELIVERY)) &&
+	    (c->vmx_capability[MISC_FEATURES] & VMX_F(POSTED_INTR)))
+		c->vmx_capability[MISC_FEATURES] |= VMX_F(APICV);
+
+	/* Set the synthetic cpufeatures to preserve /proc/cpuinfo's ABI. */
+	if (c->vmx_capability[PRIMARY_CTLS] & VMX_F(VIRTUAL_TPR))
+		set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
+	if (c->vmx_capability[MISC_FEATURES] & VMX_F(FLEXPRIORITY))
+		set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
+	if (c->vmx_capability[MISC_FEATURES] & VMX_F(VIRTUAL_NMIS))
+		set_cpu_cap(c, X86_FEATURE_VNMI);
+	if (c->vmx_capability[SECONDARY_CTLS] & VMX_F(EPT))
+		set_cpu_cap(c, X86_FEATURE_EPT);
+	if (c->vmx_capability[MISC_FEATURES] & VMX_F(EPT_AD))
+		set_cpu_cap(c, X86_FEATURE_EPT_AD);
+	if (c->vmx_capability[MISC_FEATURES] & VMX_F(VPID))
+		set_cpu_cap(c, X86_FEATURE_VPID);
+}
+#endif /* CONFIG_X86_VMX_FEATURE_NAMES */
+
+void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
+{
+	bool tboot = tboot_enabled();
+	u64 msr;
+
+	if (rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr)) {
+		clear_cpu_cap(c, X86_FEATURE_VMX);
+		return;
+	}
+
+	if (msr & FEAT_CTL_LOCKED)
+		goto update_caps;
+
+	/*
+	 * Ignore whatever value BIOS left in the MSR to avoid enabling random
+	 * features or faulting on the WRMSR.
+	 */
+	msr = FEAT_CTL_LOCKED;
+
+	/*
+	 * Enable VMX if and only if the kernel may do VMXON at some point,
+	 * i.e. KVM is enabled, to avoid unnecessarily adding an attack vector
+	 * for the kernel, e.g. using VMX to hide malicious code.
+	 */
+	if (cpu_has(c, X86_FEATURE_VMX) && IS_ENABLED(CONFIG_KVM_INTEL)) {
+		msr |= FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
+
+		if (tboot)
+			msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX;
+	}
+
+	wrmsrl(MSR_IA32_FEAT_CTL, msr);
+
+update_caps:
+	set_cpu_cap(c, X86_FEATURE_MSR_IA32_FEAT_CTL);
+
+	if (!cpu_has(c, X86_FEATURE_VMX))
+		return;
+
+	if ( (tboot && !(msr & FEAT_CTL_VMX_ENABLED_INSIDE_SMX)) ||
+	    (!tboot && !(msr & FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX))) {
+		if (IS_ENABLED(CONFIG_KVM_INTEL))
+			pr_err_once("VMX (%s TXT) disabled by BIOS\n",
+				    tboot ? "inside" : "outside");
+		clear_cpu_cap(c, X86_FEATURE_VMX);
+	} else {
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+		init_vmx_capabilities(c);
+#endif
+	}
+}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ea5899f79f36..be82cd5841c3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -458,52 +458,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
 #endif
 }
 
-static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
-{
-	/* Intel VMX MSR indicated features */
-#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW	0x00200000
-#define X86_VMX_FEATURE_PROC_CTLS_VNMI		0x00400000
-#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS	0x80000000
-#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC	0x00000001
-#define X86_VMX_FEATURE_PROC_CTLS2_EPT		0x00000002
-#define X86_VMX_FEATURE_PROC_CTLS2_VPID		0x00000020
-#define x86_VMX_FEATURE_EPT_CAP_AD		0x00200000
-
-	u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
-	u32 msr_vpid_cap, msr_ept_cap;
-
-	clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
-	clear_cpu_cap(c, X86_FEATURE_VNMI);
-	clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
-	clear_cpu_cap(c, X86_FEATURE_EPT);
-	clear_cpu_cap(c, X86_FEATURE_VPID);
-	clear_cpu_cap(c, X86_FEATURE_EPT_AD);
-
-	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
-	msr_ctl = vmx_msr_high | vmx_msr_low;
-	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
-		set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
-	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
-		set_cpu_cap(c, X86_FEATURE_VNMI);
-	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
-		rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
-		      vmx_msr_low, vmx_msr_high);
-		msr_ctl2 = vmx_msr_high | vmx_msr_low;
-		if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
-		    (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
-			set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
-		if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) {
-			set_cpu_cap(c, X86_FEATURE_EPT);
-			rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
-			      msr_ept_cap, msr_vpid_cap);
-			if (msr_ept_cap & x86_VMX_FEATURE_EPT_CAP_AD)
-				set_cpu_cap(c, X86_FEATURE_EPT_AD);
-		}
-		if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
-			set_cpu_cap(c, X86_FEATURE_VPID);
-	}
-}
-
 #define MSR_IA32_TME_ACTIVATE		0x982
 
 /* Helpers to access TME_ACTIVATE MSR */
@@ -719,8 +673,7 @@ static void init_intel(struct cpuinfo_x86 *c)
 	/* Work around errata */
 	srat_detect_node(c);
 
-	if (cpu_has(c, X86_FEATURE_VMX))
-		detect_vmx_virtcap(c);
+	init_ia32_feat_ctl(c);
 
 	if (cpu_has(c, X86_FEATURE_TME))
 		detect_tme(c);
@@ -783,7 +736,7 @@ static const struct _tlb_table intel_tlb_table[] = {
 	{ 0x04, TLB_DATA_4M,		8,	" TLB_DATA 4 MByte pages, 4-way set associative" },
 	{ 0x05, TLB_DATA_4M,		32,	" TLB_DATA 4 MByte pages, 4-way set associative" },
 	{ 0x0b, TLB_INST_4M,		4,	" TLB_INST 4 MByte pages, 4-way set associative" },
-	{ 0x4f, TLB_INST_4K,		32,	" TLB_INST 4 KByte pages */" },
+	{ 0x4f, TLB_INST_4K,		32,	" TLB_INST 4 KByte pages" },
 	{ 0x50, TLB_INST_ALL,		64,	" TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
 	{ 0x51, TLB_INST_ALL,		128,	" TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
 	{ 0x52, TLB_INST_ALL,		256,	" TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
@@ -811,7 +764,7 @@ static const struct _tlb_table intel_tlb_table[] = {
 	{ 0xba, TLB_DATA_4K,		64,	" TLB_DATA 4 KByte pages, 4-way associative" },
 	{ 0xc0, TLB_DATA_4K_4M,		8,	" TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
 	{ 0xc1, STLB_4K_2M,		1024,	" STLB 4 KByte and 2 MByte pages, 8-way associative" },
-	{ 0xc2, TLB_DATA_2M_4M,		16,	" DTLB 2 MByte/4MByte pages, 4-way associative" },
+	{ 0xc2, TLB_DATA_2M_4M,		16,	" TLB_DATA 2 MByte/4MByte pages, 4-way associative" },
 	{ 0xca, STLB_4K,		512,	" STLB 4 KByte pages, 4-way associative" },
 	{ 0x00, 0, 0 }
 };
@@ -823,8 +776,8 @@ static void intel_tlb_lookup(const unsigned char desc)
 		return;
 
 	/* look up this descriptor in the table */
-	for (k = 0; intel_tlb_table[k].descriptor != desc && \
-			intel_tlb_table[k].descriptor != 0; k++)
+	for (k = 0; intel_tlb_table[k].descriptor != desc &&
+	     intel_tlb_table[k].descriptor != 0; k++)
 		;
 
 	if (intel_tlb_table[k].tlb_type == 0)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 6ea7fdc82f3c..b3a50d962851 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -78,6 +78,7 @@ struct smca_bank_name {
 
 static struct smca_bank_name smca_names[] = {
 	[SMCA_LS]	= { "load_store",	"Load Store Unit" },
+	[SMCA_LS_V2]	= { "load_store",	"Load Store Unit" },
 	[SMCA_IF]	= { "insn_fetch",	"Instruction Fetch Unit" },
 	[SMCA_L2_CACHE]	= { "l2_cache",		"L2 Cache" },
 	[SMCA_DE]	= { "decode_unit",	"Decode Unit" },
@@ -138,6 +139,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 
 	/* ZN Core (HWID=0xB0) MCA types */
 	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
+	{ SMCA_LS_V2,	 HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF },
 	{ SMCA_IF,	 HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
 	{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
 	{ SMCA_DE,	 HWID_MCATYPE(0xB0, 0x3), 0x1FF },
@@ -266,10 +268,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
 	smca_set_misc_banks_map(bank, cpu);
 
 	/* Return early if this bank was already initialized. */
-	if (smca_banks[bank].hwid)
+	if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0)
 		return;
 
-	if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
+	if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
 		pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
 		return;
 	}
@@ -583,7 +585,7 @@ bool amd_filter_mce(struct mce *m)
  * - Prevent possible spurious interrupts from the IF bank on Family 0x17
  *   Models 0x10-0x2F due to Erratum #1114.
  */
-void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
+static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
 {
 	int i, num_msrs;
 	u64 hwcr;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 743370ee4983..2c4f949611e4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -53,8 +53,6 @@
 
 #include "internal.h"
 
-static DEFINE_MUTEX(mce_log_mutex);
-
 /* sysfs synchronization */
 static DEFINE_MUTEX(mce_sysfs_mutex);
 
@@ -156,19 +154,10 @@ void mce_log(struct mce *m)
 	if (!mce_gen_pool_add(m))
 		irq_work_queue(&mce_irq_work);
 }
-
-void mce_inject_log(struct mce *m)
-{
-	mutex_lock(&mce_log_mutex);
-	mce_log(m);
-	mutex_unlock(&mce_log_mutex);
-}
-EXPORT_SYMBOL_GPL(mce_inject_log);
-
-static struct notifier_block mce_srao_nb;
+EXPORT_SYMBOL_GPL(mce_log);
 
 /*
- * We run the default notifier if we have only the SRAO, the first and the
+ * We run the default notifier if we have only the UC, the first and the
  * default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS
  * notifiers registered on the chain.
  */
@@ -488,8 +477,9 @@ int mce_usable_address(struct mce *m)
 	if (!(m->status & MCI_STATUS_ADDRV))
 		return 0;
 
-	/* Checks after this one are Intel-specific: */
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+	/* Checks after this one are Intel/Zhaoxin-specific: */
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
 		return 1;
 
 	if (!(m->status & MCI_STATUS_MISCV))
@@ -507,10 +497,13 @@ EXPORT_SYMBOL_GPL(mce_usable_address);
 
 bool mce_is_memory_error(struct mce *m)
 {
-	if (m->cpuvendor == X86_VENDOR_AMD ||
-	    m->cpuvendor == X86_VENDOR_HYGON) {
+	switch (m->cpuvendor) {
+	case X86_VENDOR_AMD:
+	case X86_VENDOR_HYGON:
 		return amd_mce_is_memory_error(m);
-	} else if (m->cpuvendor == X86_VENDOR_INTEL) {
+
+	case X86_VENDOR_INTEL:
+	case X86_VENDOR_ZHAOXIN:
 		/*
 		 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
 		 *
@@ -527,9 +520,10 @@ bool mce_is_memory_error(struct mce *m)
 		return (m->status & 0xef80) == BIT(7) ||
 		       (m->status & 0xef00) == BIT(8) ||
 		       (m->status & 0xeffc) == 0xc;
-	}
 
-	return false;
+	default:
+		return false;
+	}
 }
 EXPORT_SYMBOL_GPL(mce_is_memory_error);
 
@@ -589,26 +583,29 @@ static struct notifier_block first_nb = {
 	.priority	= MCE_PRIO_FIRST,
 };
 
-static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
-				void *data)
+static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
+			      void *data)
 {
 	struct mce *mce = (struct mce *)data;
 	unsigned long pfn;
 
-	if (!mce)
+	if (!mce || !mce_usable_address(mce))
 		return NOTIFY_DONE;
 
-	if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
-		pfn = mce->addr >> PAGE_SHIFT;
-		if (!memory_failure(pfn, 0))
-			set_mce_nospec(pfn);
-	}
+	if (mce->severity != MCE_AO_SEVERITY &&
+	    mce->severity != MCE_DEFERRED_SEVERITY)
+		return NOTIFY_DONE;
+
+	pfn = mce->addr >> PAGE_SHIFT;
+	if (!memory_failure(pfn, 0))
+		set_mce_nospec(pfn);
 
 	return NOTIFY_OK;
 }
-static struct notifier_block mce_srao_nb = {
-	.notifier_call	= srao_decode_notifier,
-	.priority	= MCE_PRIO_SRAO,
+
+static struct notifier_block mce_uc_nb = {
+	.notifier_call	= uc_decode_notifier,
+	.priority	= MCE_PRIO_UC,
 };
 
 static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
@@ -758,26 +755,22 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 log_it:
 		error_seen = true;
 
-		mce_read_aux(&m, i);
+		if (flags & MCP_DONTLOG)
+			goto clear_it;
 
+		mce_read_aux(&m, i);
 		m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
-
 		/*
 		 * Don't get the IP here because it's unlikely to
 		 * have anything to do with the actual error location.
 		 */
-		if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
-			mce_log(&m);
-		else if (mce_usable_address(&m)) {
-			/*
-			 * Although we skipped logging this, we still want
-			 * to take action. Add to the pool so the registered
-			 * notifiers will see it.
-			 */
-			if (!mce_gen_pool_add(&m))
-				mce_schedule_work();
-		}
 
+		if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
+			goto clear_it;
+
+		mce_log(&m);
+
+clear_it:
 		/*
 		 * Clear state for this bank.
 		 */
@@ -802,7 +795,7 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
 static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			  struct pt_regs *regs)
 {
-	char *tmp;
+	char *tmp = *msg;
 	int i;
 
 	for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
@@ -814,8 +807,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 		if (quirk_no_way_out)
 			quirk_no_way_out(i, m, regs);
 
+		m->bank = i;
 		if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
-			m->bank = i;
 			mce_read_aux(m, i);
 			*msg = tmp;
 			return 1;
@@ -1127,6 +1120,12 @@ static bool __mc_check_crashing_cpu(int cpu)
 		u64 mcgstatus;
 
 		mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+
+		if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
+			if (mcgstatus & MCG_STATUS_LMCES)
+				return false;
+		}
+
 		if (mcgstatus & MCG_STATUS_RIPV) {
 			mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
 			return true;
@@ -1221,8 +1220,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
 	struct mca_config *cfg = &mca_cfg;
 	int cpu = smp_processor_id();
-	char *msg = "Unknown";
 	struct mce m, *final;
+	char *msg = NULL;
 	int worst = 0;
 
 	/*
@@ -1277,9 +1276,10 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
 	/*
 	 * Check if this MCE is signaled to only this logical processor,
-	 * on Intel only.
+	 * on Intel, Zhaoxin only.
 	 */
-	if (m.cpuvendor == X86_VENDOR_INTEL)
+	if (m.cpuvendor == X86_VENDOR_INTEL ||
+	    m.cpuvendor == X86_VENDOR_ZHAOXIN)
 		lmce = m.mcgstatus & MCG_STATUS_LMCES;
 
 	/*
@@ -1353,7 +1353,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		ist_end_non_atomic();
 	} else {
 		if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
-			mce_panic("Failed kernel mode recovery", &m, NULL);
+			mce_panic("Failed kernel mode recovery", &m, msg);
 	}
 
 out_ist:
@@ -1697,6 +1697,18 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 		if (c->x86 == 6 && c->x86_model == 45)
 			quirk_no_way_out = quirk_sandybridge_ifu;
 	}
+
+	if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
+		/*
+		 * All newer Zhaoxin CPUs support MCE broadcasting. Enable
+		 * synchronization with a one second timeout.
+		 */
+		if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
+			if (cfg->monarch_timeout < 0)
+				cfg->monarch_timeout = USEC_PER_SEC;
+		}
+	}
+
 	if (cfg->monarch_timeout < 0)
 		cfg->monarch_timeout = 0;
 	if (cfg->bootlog != 0)
@@ -1760,6 +1772,35 @@ static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
 	}
 }
 
+static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c)
+{
+	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+
+	/*
+	 * These CPUs have MCA bank 8 which reports only one error type called
+	 * SVAD (System View Address Decoder). The reporting of that error is
+	 * controlled by IA32_MC8.CTL.0.
+	 *
+	 * If enabled, prefetching on these CPUs will cause SVAD MCE when
+	 * virtual machines start and result in a system  panic. Always disable
+	 * bank 8 SVAD error by default.
+	 */
+	if ((c->x86 == 7 && c->x86_model == 0x1b) ||
+	    (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
+		if (this_cpu_read(mce_num_banks) > 8)
+			mce_banks[8].ctl = 0;
+	}
+
+	intel_init_cmci();
+	intel_init_lmce();
+	mce_adjust_timer = cmci_intel_adjust_timer;
+}
+
+static void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c)
+{
+	intel_clear_lmce();
+}
+
 static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 {
 	switch (c->x86_vendor) {
@@ -1781,6 +1822,10 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 		mce_centaur_feature_init(c);
 		break;
 
+	case X86_VENDOR_ZHAOXIN:
+		mce_zhaoxin_feature_init(c);
+		break;
+
 	default:
 		break;
 	}
@@ -1792,6 +1837,11 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
 	case X86_VENDOR_INTEL:
 		mce_intel_feature_clear(c);
 		break;
+
+	case X86_VENDOR_ZHAOXIN:
+		mce_zhaoxin_feature_clear(c);
+		break;
+
 	default:
 		break;
 	}
@@ -1979,7 +2029,7 @@ int __init mcheck_init(void)
 {
 	mcheck_intel_therm_init();
 	mce_register_decode_chain(&first_nb);
-	mce_register_decode_chain(&mce_srao_nb);
+	mce_register_decode_chain(&mce_uc_nb);
 	mce_register_decode_chain(&mce_default_nb);
 	mcheck_vendor_init_severity();
 
@@ -2014,15 +2064,16 @@ static void mce_disable_error_reporting(void)
 static void vendor_disable_error_reporting(void)
 {
 	/*
-	 * Don't clear on Intel or AMD or Hygon CPUs. Some of these MSRs
-	 * are socket-wide.
-	 * Disabling them for just a single offlined CPU is bad, since it will
-	 * inhibit reporting for all shared resources on the socket like the
-	 * last level cache (LLC), the integrated memory controller (iMC), etc.
+	 * Don't clear on Intel or AMD or Hygon or Zhaoxin CPUs. Some of these
+	 * MSRs are socket-wide. Disabling them for just a single offlined CPU
+	 * is bad, since it will inhibit reporting for all shared resources on
+	 * the socket like the last level cache (LLC), the integrated memory
+	 * controller (iMC), etc.
 	 */
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
 	    boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ||
-	    boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+	    boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+	    boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN)
 		return;
 
 	mce_disable_error_reporting();
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 1f30117b24ba..3413b41b8d55 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -494,7 +494,7 @@ static void do_inject(void)
 		i_mce.status |= MCI_STATUS_SYNDV;
 
 	if (inj_type == SW_INJ) {
-		mce_inject_log(&i_mce);
+		mce_log(&i_mce);
 		return;
 	}
 
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 88cd9598fa57..5627b1091b85 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -85,8 +85,10 @@ static int cmci_supported(int *banks)
 	 * initialization is vendor keyed and this
 	 * makes sure none of the backdoors are entered otherwise.
 	 */
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
 		return 0;
+
 	if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
 		return 0;
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
@@ -113,15 +115,16 @@ static bool lmce_supported(void)
 
 	/*
 	 * BIOS should indicate support for LMCE by setting bit 20 in
-	 * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
-	 * generate a #GP fault.
+	 * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP
+	 * fault.  The MSR must also be locked for LMCE_ENABLED to take effect.
+	 * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
+	 * locks the MSR in the event that it wasn't already locked by BIOS.
 	 */
-	rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
-	if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
-		   (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
-		return true;
+	rdmsrl(MSR_IA32_FEAT_CTL, tmp);
+	if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
+		return false;
 
-	return false;
+	return tmp & FEAT_CTL_LMCE_ENABLED;
 }
 
 bool mce_intel_cmci_poll(void)
@@ -423,7 +426,7 @@ void cmci_disable_bank(int bank)
 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 }
 
-static void intel_init_cmci(void)
+void intel_init_cmci(void)
 {
 	int banks;
 
@@ -442,7 +445,7 @@ static void intel_init_cmci(void)
 	cmci_recheck();
 }
 
-static void intel_init_lmce(void)
+void intel_init_lmce(void)
 {
 	u64 val;
 
@@ -455,7 +458,7 @@ static void intel_init_lmce(void)
 		wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
 }
 
-static void intel_clear_lmce(void)
+void intel_clear_lmce(void)
 {
 	u64 val;
 
@@ -482,6 +485,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
 	case INTEL_FAM6_BROADWELL_D:
 	case INTEL_FAM6_BROADWELL_X:
 	case INTEL_FAM6_SKYLAKE_X:
+	case INTEL_FAM6_ICELAKE_X:
 	case INTEL_FAM6_XEON_PHI_KNL:
 	case INTEL_FAM6_XEON_PHI_KNM:
 
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 43031db429d2..b785c0d0b590 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -45,11 +45,17 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval);
 bool mce_intel_cmci_poll(void);
 void mce_intel_hcpu_update(unsigned long cpu);
 void cmci_disable_bank(int bank);
+void intel_init_cmci(void);
+void intel_init_lmce(void);
+void intel_clear_lmce(void);
 #else
 # define cmci_intel_adjust_timer mce_adjust_timer_default
 static inline bool mce_intel_cmci_poll(void) { return false; }
 static inline void mce_intel_hcpu_update(unsigned long cpu) { }
 static inline void cmci_disable_bank(int bank) { }
+static inline void intel_init_cmci(void) { }
+static inline void intel_init_lmce(void) { }
+static inline void intel_clear_lmce(void) { }
 #endif
 
 void mce_timer_kick(unsigned long interval);
@@ -78,8 +84,6 @@ static inline int apei_clear_mce(u64 record_id)
 }
 #endif
 
-void mce_inject_log(struct mce *m);
-
 /*
  * We consider records to be equivalent if bank+status+addr+misc all match.
  * This is only used when the system is going down because of a fatal error
diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
index 6e2becf547c5..58b4ee3cda77 100644
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -40,15 +40,58 @@
 #define THERMAL_THROTTLING_EVENT	0
 #define POWER_LIMIT_EVENT		1
 
-/*
- * Current thermal event state:
+/**
+ * struct _thermal_state - Represent the current thermal event state
+ * @next_check:			Stores the next timestamp, when it is allowed
+ *				to log the next warning message.
+ * @last_interrupt_time:	Stores the timestamp for the last threshold
+ *				high event.
+ * @therm_work:			Delayed workqueue structure
+ * @count:			Stores the current running count for thermal
+ *				or power threshold interrupts.
+ * @last_count:			Stores the previous running count for thermal
+ *				or power threshold interrupts.
+ * @max_time_ms:		This shows the maximum amount of time CPU was
+ *				in throttled state for a single thermal
+ *				threshold high to low state.
+ * @total_time_ms:		This is a cumulative time during which CPU was
+ *				in the throttled state.
+ * @rate_control_active:	Set when a throttling message is logged.
+ *				This is used for the purpose of rate-control.
+ * @new_event:			Stores the last high/low status of the
+ *				THERM_STATUS_PROCHOT or
+ *				THERM_STATUS_POWER_LIMIT.
+ * @level:			Stores whether this _thermal_state instance is
+ *				for a CORE level or for PACKAGE level.
+ * @sample_index:		Index for storing the next sample in the buffer
+ *				temp_samples[].
+ * @sample_count:		Total number of samples collected in the buffer
+ *				temp_samples[].
+ * @average:			The last moving average of temperature samples
+ * @baseline_temp:		Temperature at which thermal threshold high
+ *				interrupt was generated.
+ * @temp_samples:		Storage for temperature samples to calculate
+ *				moving average.
+ *
+ * This structure is used to represent data related to thermal state for a CPU.
+ * There is a separate storage for core and package level for each CPU.
  */
 struct _thermal_state {
-	bool			new_event;
-	int			event;
 	u64			next_check;
+	u64			last_interrupt_time;
+	struct delayed_work	therm_work;
 	unsigned long		count;
 	unsigned long		last_count;
+	unsigned long		max_time_ms;
+	unsigned long		total_time_ms;
+	bool			rate_control_active;
+	bool			new_event;
+	u8			level;
+	u8			sample_index;
+	u8			sample_count;
+	u8			average;
+	u8			baseline_temp;
+	u8			temp_samples[3];
 };
 
 struct thermal_state {
@@ -121,8 +164,22 @@ define_therm_throt_device_one_ro(package_throttle_count);
 define_therm_throt_device_show_func(package_power_limit, count);
 define_therm_throt_device_one_ro(package_power_limit_count);
 
+define_therm_throt_device_show_func(core_throttle, max_time_ms);
+define_therm_throt_device_one_ro(core_throttle_max_time_ms);
+
+define_therm_throt_device_show_func(package_throttle, max_time_ms);
+define_therm_throt_device_one_ro(package_throttle_max_time_ms);
+
+define_therm_throt_device_show_func(core_throttle, total_time_ms);
+define_therm_throt_device_one_ro(core_throttle_total_time_ms);
+
+define_therm_throt_device_show_func(package_throttle, total_time_ms);
+define_therm_throt_device_one_ro(package_throttle_total_time_ms);
+
 static struct attribute *thermal_throttle_attrs[] = {
 	&dev_attr_core_throttle_count.attr,
+	&dev_attr_core_throttle_max_time_ms.attr,
+	&dev_attr_core_throttle_total_time_ms.attr,
 	NULL
 };
 
@@ -135,6 +192,112 @@ static const struct attribute_group thermal_attr_group = {
 #define CORE_LEVEL	0
 #define PACKAGE_LEVEL	1
 
+#define THERM_THROT_POLL_INTERVAL	HZ
+#define THERM_STATUS_PROCHOT_LOG	BIT(1)
+
+#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
+#define THERM_STATUS_CLEAR_PKG_MASK  (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
+
+static void clear_therm_status_log(int level)
+{
+	int msr;
+	u64 mask, msr_val;
+
+	if (level == CORE_LEVEL) {
+		msr  = MSR_IA32_THERM_STATUS;
+		mask = THERM_STATUS_CLEAR_CORE_MASK;
+	} else {
+		msr  = MSR_IA32_PACKAGE_THERM_STATUS;
+		mask = THERM_STATUS_CLEAR_PKG_MASK;
+	}
+
+	rdmsrl(msr, msr_val);
+	msr_val &= mask;
+	wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
+}
+
+static void get_therm_status(int level, bool *proc_hot, u8 *temp)
+{
+	int msr;
+	u64 msr_val;
+
+	if (level == CORE_LEVEL)
+		msr = MSR_IA32_THERM_STATUS;
+	else
+		msr = MSR_IA32_PACKAGE_THERM_STATUS;
+
+	rdmsrl(msr, msr_val);
+	if (msr_val & THERM_STATUS_PROCHOT_LOG)
+		*proc_hot = true;
+	else
+		*proc_hot = false;
+
+	*temp = (msr_val >> 16) & 0x7F;
+}
+
+static void __maybe_unused throttle_active_work(struct work_struct *work)
+{
+	struct _thermal_state *state = container_of(to_delayed_work(work),
+						struct _thermal_state, therm_work);
+	unsigned int i, avg, this_cpu = smp_processor_id();
+	u64 now = get_jiffies_64();
+	bool hot;
+	u8 temp;
+
+	get_therm_status(state->level, &hot, &temp);
+	/* temperature value is offset from the max so lesser means hotter */
+	if (!hot && temp > state->baseline_temp) {
+		if (state->rate_control_active)
+			pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
+				this_cpu,
+				state->level == CORE_LEVEL ? "Core" : "Package",
+				state->count);
+
+		state->rate_control_active = false;
+		return;
+	}
+
+	if (time_before64(now, state->next_check) &&
+			  state->rate_control_active)
+		goto re_arm;
+
+	state->next_check = now + CHECK_INTERVAL;
+
+	if (state->count != state->last_count) {
+		/* There was one new thermal interrupt */
+		state->last_count = state->count;
+		state->average = 0;
+		state->sample_count = 0;
+		state->sample_index = 0;
+	}
+
+	state->temp_samples[state->sample_index] = temp;
+	state->sample_count++;
+	state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
+	if (state->sample_count < ARRAY_SIZE(state->temp_samples))
+		goto re_arm;
+
+	avg = 0;
+	for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
+		avg += state->temp_samples[i];
+
+	avg /= ARRAY_SIZE(state->temp_samples);
+
+	if (state->average > avg) {
+		pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
+			this_cpu,
+			state->level == CORE_LEVEL ? "Core" : "Package",
+			state->count);
+		state->rate_control_active = true;
+	}
+
+	state->average = avg;
+
+re_arm:
+	clear_therm_status_log(state->level);
+	schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
+}
+
 /***
  * therm_throt_process - Process thermal throttling event from interrupt
  * @curr: Whether the condition is current or not (boolean), since the
@@ -178,27 +341,33 @@ static void therm_throt_process(bool new_event, int event, int level)
 	if (new_event)
 		state->count++;
 
-	if (time_before64(now, state->next_check) &&
-			state->count != state->last_count)
+	if (event != THERMAL_THROTTLING_EVENT)
 		return;
 
-	state->next_check = now + CHECK_INTERVAL;
-	state->last_count = state->count;
+	if (new_event && !state->last_interrupt_time) {
+		bool hot;
+		u8 temp;
+
+		get_therm_status(state->level, &hot, &temp);
+		/*
+		 * Ignore short temperature spike as the system is not close
+		 * to PROCHOT. 10C offset is large enough to ignore. It is
+		 * already dropped from the high threshold temperature.
+		 */
+		if (temp > 10)
+			return;
 
-	/* if we just entered the thermal event */
-	if (new_event) {
-		if (event == THERMAL_THROTTLING_EVENT)
-			pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
-				this_cpu,
-				level == CORE_LEVEL ? "Core" : "Package",
-				state->count);
-		return;
-	}
-	if (old_event) {
-		if (event == THERMAL_THROTTLING_EVENT)
-			pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
-				level == CORE_LEVEL ? "Core" : "Package");
-		return;
+		state->baseline_temp = temp;
+		state->last_interrupt_time = now;
+		schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
+	} else if (old_event && state->last_interrupt_time) {
+		unsigned long throttle_time;
+
+		throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time);
+		if (throttle_time > state->max_time_ms)
+			state->max_time_ms = throttle_time;
+		state->total_time_ms += throttle_time;
+		state->last_interrupt_time = 0;
 	}
 }
 
@@ -244,20 +413,47 @@ static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
 	if (err)
 		return err;
 
-	if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
+	if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
 		err = sysfs_add_file_to_group(&dev->kobj,
 					      &dev_attr_core_power_limit_count.attr,
 					      thermal_attr_group.name);
+		if (err)
+			goto del_group;
+	}
+
 	if (cpu_has(c, X86_FEATURE_PTS)) {
 		err = sysfs_add_file_to_group(&dev->kobj,
 					      &dev_attr_package_throttle_count.attr,
 					      thermal_attr_group.name);
-		if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
+		if (err)
+			goto del_group;
+
+		err = sysfs_add_file_to_group(&dev->kobj,
+					      &dev_attr_package_throttle_max_time_ms.attr,
+					      thermal_attr_group.name);
+		if (err)
+			goto del_group;
+
+		err = sysfs_add_file_to_group(&dev->kobj,
+					      &dev_attr_package_throttle_total_time_ms.attr,
+					      thermal_attr_group.name);
+		if (err)
+			goto del_group;
+
+		if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
 			err = sysfs_add_file_to_group(&dev->kobj,
 					&dev_attr_package_power_limit_count.attr,
 					thermal_attr_group.name);
+			if (err)
+				goto del_group;
+		}
 	}
 
+	return 0;
+
+del_group:
+	sysfs_remove_group(&dev->kobj, &thermal_attr_group);
+
 	return err;
 }
 
@@ -269,15 +465,34 @@ static void thermal_throttle_remove_dev(struct device *dev)
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
 static int thermal_throttle_online(unsigned int cpu)
 {
+	struct thermal_state *state = &per_cpu(thermal_state, cpu);
 	struct device *dev = get_cpu_device(cpu);
+	u32 l;
+
+	state->package_throttle.level = PACKAGE_LEVEL;
+	state->core_throttle.level = CORE_LEVEL;
+
+	INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
+	INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
+
+	/* Unmask the thermal vector after the above workqueues are initialized. */
+	l = apic_read(APIC_LVTTHMR);
+	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 
 	return thermal_throttle_add_dev(dev, cpu);
 }
 
 static int thermal_throttle_offline(unsigned int cpu)
 {
+	struct thermal_state *state = &per_cpu(thermal_state, cpu);
 	struct device *dev = get_cpu_device(cpu);
 
+	cancel_delayed_work(&state->package_throttle.therm_work);
+	cancel_delayed_work(&state->core_throttle.therm_work);
+
+	state->package_throttle.rate_control_active = false;
+	state->core_throttle.rate_control_active = false;
+
 	thermal_throttle_remove_dev(dev);
 	return 0;
 }
@@ -512,10 +727,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
 
-	/* Unmask the thermal vector: */
-	l = apic_read(APIC_LVTTHMR);
-	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-
 	pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
 		      tm2 ? "TM2" : "TM1");
 
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index a0e52bd00ecc..3f6b137ef4e6 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -567,7 +567,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 void reload_ucode_amd(void)
 {
 	struct microcode_amd *mc;
-	u32 rev, dummy;
+	u32 rev, dummy __always_unused;
 
 	mc = (struct microcode_amd *)amd_ucode_patch;
 
@@ -673,7 +673,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
 	struct ucode_cpu_info *uci;
 	struct ucode_patch *p;
 	enum ucode_state ret;
-	u32 rev, dummy;
+	u32 rev, dummy __always_unused;
 
 	BUG_ON(raw_smp_processor_id() != cpu);
 
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index cb0fdcaf1415..7019d4b2df0c 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -63,11 +63,6 @@ LIST_HEAD(microcode_cache);
  */
 static DEFINE_MUTEX(microcode_mutex);
 
-/*
- * Serialize late loading so that CPUs get updated one-by-one.
- */
-static DEFINE_RAW_SPINLOCK(update_lock);
-
 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
 
 struct cpu_info_ctx {
@@ -566,11 +561,18 @@ static int __reload_late(void *info)
 	if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
 		return -1;
 
-	raw_spin_lock(&update_lock);
-	apply_microcode_local(&err);
-	raw_spin_unlock(&update_lock);
+	/*
+	 * On an SMT system, it suffices to load the microcode on one sibling of
+	 * the core because the microcode engine is shared between the threads.
+	 * Synchronization still needs to take place so that no concurrent
+	 * loading attempts happen on multiple threads of an SMT core. See
+	 * below.
+	 */
+	if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
+		apply_microcode_local(&err);
+	else
+		goto wait_for_siblings;
 
-	/* siblings return UCODE_OK because their engine got updated already */
 	if (err > UCODE_NFOUND) {
 		pr_warn("Error reloading microcode on CPU %d\n", cpu);
 		ret = -1;
@@ -578,14 +580,18 @@ static int __reload_late(void *info)
 		ret = 1;
 	}
 
+wait_for_siblings:
+	if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC))
+		panic("Timeout during microcode update!\n");
+
 	/*
-	 * Increase the wait timeout to a safe value here since we're
-	 * serializing the microcode update and that could take a while on a
-	 * large number of CPUs. And that is fine as the *actual* timeout will
-	 * be determined by the last CPU finished updating and thus cut short.
+	 * At least one thread has completed update on each core.
+	 * For others, simply call the update to make sure the
+	 * per-cpu cpuinfo can be updated with right microcode
+	 * revision.
 	 */
-	if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
-		panic("Timeout during microcode update!\n");
+	if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
+		apply_microcode_local(&err);
 
 	return ret;
 }
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ce799cfe9434..6a99535d7f37 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -791,6 +791,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
 	struct microcode_intel *mc;
 	enum ucode_state ret;
 	static int prev_rev;
@@ -836,7 +837,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
 		return UCODE_ERROR;
 	}
 
-	if (rev != prev_rev) {
+	if (bsp && rev != prev_rev) {
 		pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n",
 			rev,
 			mc->hdr.date & 0xffff,
@@ -852,7 +853,7 @@ out:
 	c->microcode	 = rev;
 
 	/* Update boot_cpu_data's revision too, if we're on the BSP: */
-	if (c->cpu_index == boot_cpu_data.cpu_index)
+	if (bsp)
 		boot_cpu_data.microcode = rev;
 
 	return ret;
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index aed45b8895d5..1db560ed2ca3 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -6,8 +6,7 @@
 
 set -e
 
-IN=$1
-OUT=$2
+OUT=$1
 
 dump_array()
 {
@@ -15,6 +14,7 @@ dump_array()
 	SIZE=$2
 	PFX=$3
 	POSTFIX=$4
+	IN=$5
 
 	PFX_SZ=$(echo $PFX | wc -c)
 	TABS="$(printf '\t\t\t\t\t')"
@@ -57,11 +57,18 @@ trap 'rm "$OUT"' EXIT
 	echo "#endif"
 	echo ""
 
-	dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" ""
+	dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" "" $2
 	echo ""
 
-	dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32"
+	dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32" $2
+	echo ""
 
+	echo "#ifdef CONFIG_X86_VMX_FEATURE_NAMES"
+	echo "#ifndef _ASM_X86_VMXFEATURES_H"
+	echo "#include <asm/vmxfeatures.h>"
+	echo "#endif"
+	dump_array "x86_vmx_flags" "NVMXINTS*32" "VMX_FEATURE_" "" $3
+	echo "#endif /* CONFIG_X86_VMX_FEATURE_NAMES */"
 ) > $OUT
 
 trap - EXIT
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index c656d92cd708..caa032ce3fe3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -290,7 +290,12 @@ static void __init ms_hyperv_init_platform(void)
 	machine_ops.shutdown = hv_machine_shutdown;
 	machine_ops.crash_shutdown = hv_machine_crash_shutdown;
 #endif
-	mark_tsc_unstable("running on Hyper-V");
+	if (ms_hyperv.features & HV_X64_ACCESS_TSC_INVARIANT) {
+		wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
+		setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+	} else {
+		mark_tsc_unstable("running on Hyper-V");
+	}
 
 	/*
 	 * Generation 2 instances don't support reading the NMI status from
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index aa5c064a6a22..51b9190c628b 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -15,7 +15,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
 
 #include "mtrr.h"
 
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 4d36dcc1cf87..da532f656a7b 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -101,9 +101,6 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
 	int length;
 	size_t linelen;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
 	memset(line, 0, LINE_SIZE);
 
 	len = min_t(size_t, len, LINE_SIZE - 1);
@@ -226,8 +223,6 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 #ifdef CONFIG_COMPAT
 	case MTRRIOC32_ADD_ENTRY:
 #endif
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
 		err =
 		    mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
 				  file, 0);
@@ -236,24 +231,18 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 #ifdef CONFIG_COMPAT
 	case MTRRIOC32_SET_ENTRY:
 #endif
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
 		err = mtrr_add(sentry.base, sentry.size, sentry.type, false);
 		break;
 	case MTRRIOC_DEL_ENTRY:
 #ifdef CONFIG_COMPAT
 	case MTRRIOC32_DEL_ENTRY:
 #endif
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
 		err = mtrr_file_del(sentry.base, sentry.size, file, 0);
 		break;
 	case MTRRIOC_KILL_ENTRY:
 #ifdef CONFIG_COMPAT
 	case MTRRIOC32_KILL_ENTRY:
 #endif
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
 		err = mtrr_del(-1, sentry.base, sentry.size);
 		break;
 	case MTRRIOC_GET_ENTRY:
@@ -279,8 +268,6 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 #ifdef CONFIG_COMPAT
 	case MTRRIOC32_ADD_PAGE_ENTRY:
 #endif
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
 		err =
 		    mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
 				  file, 1);
@@ -289,8 +276,6 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 #ifdef CONFIG_COMPAT
 	case MTRRIOC32_SET_PAGE_ENTRY:
 #endif
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
 		err =
 		    mtrr_add_page(sentry.base, sentry.size, sentry.type, false);
 		break;
@@ -298,16 +283,12 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 #ifdef CONFIG_COMPAT
 	case MTRRIOC32_DEL_PAGE_ENTRY:
 #endif
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
 		err = mtrr_file_del(sentry.base, sentry.size, file, 1);
 		break;
 	case MTRRIOC_KILL_PAGE_ENTRY:
 #ifdef CONFIG_COMPAT
 	case MTRRIOC32_KILL_PAGE_ENTRY:
 #endif
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
 		err = mtrr_del_page(-1, sentry.base, sentry.size);
 		break;
 	case MTRRIOC_GET_PAGE_ENTRY:
@@ -373,28 +354,6 @@ static int mtrr_close(struct inode *ino, struct file *file)
 	return single_release(ino, file);
 }
 
-static int mtrr_seq_show(struct seq_file *seq, void *offset);
-
-static int mtrr_open(struct inode *inode, struct file *file)
-{
-	if (!mtrr_if)
-		return -EIO;
-	if (!mtrr_if->get)
-		return -ENXIO;
-	return single_open(file, mtrr_seq_show, NULL);
-}
-
-static const struct file_operations mtrr_fops = {
-	.owner			= THIS_MODULE,
-	.open			= mtrr_open,
-	.read			= seq_read,
-	.llseek			= seq_lseek,
-	.write			= mtrr_write,
-	.unlocked_ioctl		= mtrr_ioctl,
-	.compat_ioctl		= mtrr_ioctl,
-	.release		= mtrr_close,
-};
-
 static int mtrr_seq_show(struct seq_file *seq, void *offset)
 {
 	char factor;
@@ -426,6 +385,28 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
 	return 0;
 }
 
+static int mtrr_open(struct inode *inode, struct file *file)
+{
+	if (!mtrr_if)
+		return -EIO;
+	if (!mtrr_if->get)
+		return -ENXIO;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	return single_open(file, mtrr_seq_show, NULL);
+}
+
+static const struct file_operations mtrr_fops = {
+	.owner			= THIS_MODULE,
+	.open			= mtrr_open,
+	.read			= seq_read,
+	.llseek			= seq_lseek,
+	.write			= mtrr_write,
+	.unlocked_ioctl		= mtrr_ioctl,
+	.compat_ioctl		= mtrr_ioctl,
+	.release		= mtrr_close,
+};
+
 static int __init mtrr_if_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 507039c20128..6a80f36b5d59 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -52,7 +52,7 @@
 #include <asm/e820/api.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
 
 #include "mtrr.h"
 
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index cb2e49810d68..4eec8889b0ff 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -7,6 +7,10 @@
 
 #include "cpu.h"
 
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+extern const char * const x86_vmx_flags[NVMXINTS*32];
+#endif
+
 /*
  *	Get CPU information for use by the procfs.
  */
@@ -102,6 +106,17 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
 			seq_printf(m, " %s", x86_cap_flags[i]);
 
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+	if (cpu_has(c, X86_FEATURE_VMX) && c->vmx_capability[0]) {
+		seq_puts(m, "\nvmx flags\t:");
+		for (i = 0; i < 32*NVMXINTS; i++) {
+			if (test_bit(i, (unsigned long *)c->vmx_capability) &&
+			    x86_vmx_flags[i] != NULL)
+				seq_printf(m, " %s", x86_vmx_flags[i]);
+		}
+	}
+#endif
+
 	seq_puts(m, "\nbugs\t\t:");
 	for (i = 0; i < 32*NBUGINTS; i++) {
 		unsigned int bug_bit = 32*NCAPINTS + i;
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 5c900f9527ff..c4be62058dd9 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -29,7 +29,8 @@ __setup("nordrand", x86_rdrand_setup);
 #ifdef CONFIG_ARCH_RANDOM
 void x86_init_rdrand(struct cpuinfo_x86 *c)
 {
-	unsigned long tmp;
+	unsigned int changed = 0;
+	unsigned long tmp, prev;
 	int i;
 
 	if (!cpu_has(c, X86_FEATURE_RDRAND))
@@ -42,5 +43,24 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
 			return;
 		}
 	}
+
+	/*
+	 * Stupid sanity-check whether RDRAND does *actually* generate
+	 * some at least random-looking data.
+	 */
+	prev = tmp;
+	for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
+		if (rdrand_long(&tmp)) {
+			if (prev != tmp)
+				changed++;
+
+			prev = tmp;
+		}
+	}
+
+	if (WARN_ON_ONCE(!changed))
+		pr_emerg(
+"RDRAND gives funky smelling output, might consider not using it by booting with \"nordrand\"");
+
 }
 #endif
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 03eb90d00af0..89049b343c7a 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -618,7 +618,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 		if (static_branch_unlikely(&rdt_mon_enable_key))
 			rmdir_mondata_subdir_allrdtgrp(r, d->id);
 		list_del(&d->list);
-		if (is_mbm_enabled())
+		if (r->mon_capable && is_mbm_enabled())
 			cancel_delayed_work(&d->mbm_over);
 		if (is_llc_occupancy_enabled() &&  has_busy_rmid(r, d)) {
 			/*
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index e49b77283924..181c992f448c 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -57,6 +57,7 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
 }
 
 DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
 
 /**
  * struct mon_evt - Entry in the event list of a resource
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 397206f23d14..773124b0e18a 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -514,7 +514,7 @@ void mbm_handle_overflow(struct work_struct *work)
 
 	mutex_lock(&rdtgroup_mutex);
 
-	if (!static_branch_likely(&rdt_enable_key))
+	if (!static_branch_likely(&rdt_mon_enable_key))
 		goto out_unlock;
 
 	d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]);
@@ -543,7 +543,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
 	unsigned long delay = msecs_to_jiffies(delay_ms);
 	int cpu;
 
-	if (!static_branch_likely(&rdt_enable_key))
+	if (!static_branch_likely(&rdt_mon_enable_key))
 		return;
 	cpu = cpumask_any(&dom->cpu_mask);
 	dom->mbm_work_cpu = cpu;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2e3b06d6bbc6..1504bcabc63c 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -532,11 +532,15 @@ static void move_myself(struct callback_head *head)
 		kfree(rdtgrp);
 	}
 
+	if (unlikely(current->flags & PF_EXITING))
+		goto out;
+
 	preempt_disable();
 	/* update PQR_ASSOC MSR to make resource group go into effect */
 	resctrl_sched_in();
 	preempt_enable();
 
+out:
 	kfree(callback);
 }
 
@@ -725,6 +729,92 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 	return ret;
 }
 
+#ifdef CONFIG_PROC_CPU_RESCTRL
+
+/*
+ * A task can only be part of one resctrl control group and of one monitor
+ * group which is associated to that control group.
+ *
+ * 1)   res:
+ *      mon:
+ *
+ *    resctrl is not available.
+ *
+ * 2)   res:/
+ *      mon:
+ *
+ *    Task is part of the root resctrl control group, and it is not associated
+ *    to any monitor group.
+ *
+ * 3)  res:/
+ *     mon:mon0
+ *
+ *    Task is part of the root resctrl control group and monitor group mon0.
+ *
+ * 4)  res:group0
+ *     mon:
+ *
+ *    Task is part of resctrl control group group0, and it is not associated
+ *    to any monitor group.
+ *
+ * 5) res:group0
+ *    mon:mon1
+ *
+ *    Task is part of resctrl control group group0 and monitor group mon1.
+ */
+int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
+		      struct pid *pid, struct task_struct *tsk)
+{
+	struct rdtgroup *rdtg;
+	int ret = 0;
+
+	mutex_lock(&rdtgroup_mutex);
+
+	/* Return empty if resctrl has not been mounted. */
+	if (!static_branch_unlikely(&rdt_enable_key)) {
+		seq_puts(s, "res:\nmon:\n");
+		goto unlock;
+	}
+
+	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
+		struct rdtgroup *crg;
+
+		/*
+		 * Task information is only relevant for shareable
+		 * and exclusive groups.
+		 */
+		if (rdtg->mode != RDT_MODE_SHAREABLE &&
+		    rdtg->mode != RDT_MODE_EXCLUSIVE)
+			continue;
+
+		if (rdtg->closid != tsk->closid)
+			continue;
+
+		seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
+			   rdtg->kn->name);
+		seq_puts(s, "mon:");
+		list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
+				    mon.crdtgrp_list) {
+			if (tsk->rmid != crg->mon.rmid)
+				continue;
+			seq_printf(s, "%s", crg->kn->name);
+			break;
+		}
+		seq_putc(s, '\n');
+		goto unlock;
+	}
+	/*
+	 * The above search should succeed. Otherwise return
+	 * with an error.
+	 */
+	ret = -ENOENT;
+unlock:
+	mutex_unlock(&rdtgroup_mutex);
+
+	return ret;
+}
+#endif
+
 static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
 				    struct seq_file *seq, void *v)
 {
@@ -1741,9 +1831,6 @@ static int set_cache_qos_cfg(int level, bool enable)
 	struct rdt_domain *d;
 	int cpu;
 
-	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
-		return -ENOMEM;
-
 	if (level == RDT_RESOURCE_L3)
 		update = l3_qos_cfg_update;
 	else if (level == RDT_RESOURCE_L2)
@@ -1751,6 +1838,9 @@ static int set_cache_qos_cfg(int level, bool enable)
 	else
 		return -EINVAL;
 
+	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+		return -ENOMEM;
+
 	r_l = &rdt_resources_all[level];
 	list_for_each_entry(d, &r_l->domains, list) {
 		/* Pick one CPU from each domain instance to update MSR */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index adf9b71386ef..62b137c3c97a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -4,7 +4,7 @@
  */
 #include <linux/cpu.h>
 
-#include <asm/pat.h>
+#include <asm/memtype.h>
 #include <asm/apic.h>
 #include <asm/processor.h>
 
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index ee48c3fc8a65..d3a0791bc052 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -7,7 +7,7 @@
 
 #include <linux/cpu.h>
 #include <asm/apic.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
 #include <asm/processor.h>
 
 #include "cpu.h"
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
index 3e20d322bc98..e2ad30e474f8 100644
--- a/arch/x86/kernel/cpu/tsx.c
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -14,6 +14,9 @@
 
 #include "cpu.h"
 
+#undef pr_fmt
+#define pr_fmt(fmt) "tsx: " fmt
+
 enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED;
 
 void tsx_disable(void)
@@ -99,7 +102,7 @@ void __init tsx_init(void)
 			tsx_ctrl_state = x86_get_tsx_auto_mode();
 		} else {
 			tsx_ctrl_state = TSX_CTRL_DISABLE;
-			pr_err("tsx: invalid option, defaulting to off\n");
+			pr_err("invalid option, defaulting to off\n");
 		}
 	} else {
 		/* tsx= not provided */
@@ -115,11 +118,12 @@ void __init tsx_init(void)
 		tsx_disable();
 
 		/*
-		 * tsx_disable() will change the state of the
-		 * RTM CPUID bit.  Clear it here since it is now
-		 * expected to be not set.
+		 * tsx_disable() will change the state of the RTM and HLE CPUID
+		 * bits. Clear them here since they are now expected to be not
+		 * set.
 		 */
 		setup_clear_cpu_cap(X86_FEATURE_RTM);
+		setup_clear_cpu_cap(X86_FEATURE_HLE);
 	} else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {
 
 		/*
@@ -131,10 +135,10 @@ void __init tsx_init(void)
 		tsx_enable();
 
 		/*
-		 * tsx_enable() will change the state of the
-		 * RTM CPUID bit.  Force it here since it is now
-		 * expected to be set.
+		 * tsx_enable() will change the state of the RTM and HLE CPUID
+		 * bits. Force them here since they are now expected to be set.
 		 */
 		setup_force_cpu_cap(X86_FEATURE_RTM);
+		setup_force_cpu_cap(X86_FEATURE_HLE);
 	}
 }
diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c
index 8e6f2f4b4afe..df1358ba622b 100644
--- a/arch/x86/kernel/cpu/zhaoxin.c
+++ b/arch/x86/kernel/cpu/zhaoxin.c
@@ -16,13 +16,6 @@
 #define RNG_ENABLED	(1 << 3)
 #define RNG_ENABLE	(1 << 8)	/* MSR_ZHAOXIN_RNG */
 
-#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW	0x00200000
-#define X86_VMX_FEATURE_PROC_CTLS_VNMI		0x00400000
-#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS	0x80000000
-#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC	0x00000001
-#define X86_VMX_FEATURE_PROC_CTLS2_EPT		0x00000002
-#define X86_VMX_FEATURE_PROC_CTLS2_VPID		0x00000020
-
 static void init_zhaoxin_cap(struct cpuinfo_x86 *c)
 {
 	u32  lo, hi;
@@ -58,8 +51,6 @@ static void init_zhaoxin_cap(struct cpuinfo_x86 *c)
 
 	if (c->x86 >= 0x6)
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
-
-	cpu_detect_cache_sizes(c);
 }
 
 static void early_init_zhaoxin(struct cpuinfo_x86 *c)
@@ -89,31 +80,6 @@ static void early_init_zhaoxin(struct cpuinfo_x86 *c)
 
 }
 
-static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c)
-{
-	u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
-
-	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
-	msr_ctl = vmx_msr_high | vmx_msr_low;
-
-	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
-		set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
-	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
-		set_cpu_cap(c, X86_FEATURE_VNMI);
-	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
-		rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
-		      vmx_msr_low, vmx_msr_high);
-		msr_ctl2 = vmx_msr_high | vmx_msr_low;
-		if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
-		    (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
-			set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
-		if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
-			set_cpu_cap(c, X86_FEATURE_EPT);
-		if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
-			set_cpu_cap(c, X86_FEATURE_VPID);
-	}
-}
-
 static void init_zhaoxin(struct cpuinfo_x86 *c)
 {
 	early_init_zhaoxin(c);
@@ -141,8 +107,7 @@ static void init_zhaoxin(struct cpuinfo_x86 *c)
 	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 #endif
 
-	if (cpu_has(c, X86_FEATURE_VMX))
-		zhaoxin_detect_vmx_virtcap(c);
+	init_ia32_feat_ctl(c);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index eb651fbde92a..fd87b59452a3 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/memblock.h>
 
 #include <asm/processor.h>
 #include <asm/hardirq.h>
@@ -39,6 +40,7 @@
 #include <asm/virtext.h>
 #include <asm/intel_pt.h>
 #include <asm/crash.h>
+#include <asm/cmdline.h>
 
 /* Used while preparing memory map entries for second kernel */
 struct crash_memmap_data {
@@ -68,6 +70,19 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
 	rcu_read_unlock();
 }
 
+/*
+ * When the crashkernel option is specified, only use the low
+ * 1M for the real mode trampoline.
+ */
+void __init crash_reserve_low_1M(void)
+{
+	if (cmdline_find_option(boot_command_line, "crashkernel", NULL, 0) < 0)
+		return;
+
+	memblock_reserve(0, 1<<20);
+	pr_info("Reserving the low 1M of memory for crashkernel\n");
+}
+
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
 static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -173,8 +188,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 
 #ifdef CONFIG_KEXEC_FILE
 
-static unsigned long crash_zero_bytes;
-
 static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
 {
 	unsigned int *nr_ranges = arg;
@@ -189,8 +202,7 @@ static struct crash_mem *fill_up_crash_elf_data(void)
 	unsigned int nr_ranges = 0;
 	struct crash_mem *cmem;
 
-	walk_system_ram_res(0, -1, &nr_ranges,
-				get_nr_ram_ranges_callback);
+	walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
 	if (!nr_ranges)
 		return NULL;
 
@@ -217,15 +229,19 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem)
 {
 	int ret = 0;
 
+	/* Exclude the low 1M because it is always reserved */
+	ret = crash_exclude_mem_range(cmem, 0, 1<<20);
+	if (ret)
+		return ret;
+
 	/* Exclude crashkernel region */
 	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
 	if (ret)
 		return ret;
 
-	if (crashk_low_res.end) {
+	if (crashk_low_res.end)
 		ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
-							crashk_low_res.end);
-	}
+					      crashk_low_res.end);
 
 	return ret;
 }
@@ -246,16 +262,13 @@ static int prepare_elf_headers(struct kimage *image, void **addr,
 					unsigned long *sz)
 {
 	struct crash_mem *cmem;
-	Elf64_Ehdr *ehdr;
-	Elf64_Phdr *phdr;
-	int ret, i;
+	int ret;
 
 	cmem = fill_up_crash_elf_data();
 	if (!cmem)
 		return -ENOMEM;
 
-	ret = walk_system_ram_res(0, -1, cmem,
-				prepare_elf64_ram_headers_callback);
+	ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
 	if (ret)
 		goto out;
 
@@ -265,24 +278,8 @@ static int prepare_elf_headers(struct kimage *image, void **addr,
 		goto out;
 
 	/* By default prepare 64bit headers */
-	ret =  crash_prepare_elf64_headers(cmem,
-				IS_ENABLED(CONFIG_X86_64), addr, sz);
-	if (ret)
-		goto out;
+	ret =  crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz);
 
-	/*
-	 * If a range matches backup region, adjust offset to backup
-	 * segment.
-	 */
-	ehdr = (Elf64_Ehdr *)*addr;
-	phdr = (Elf64_Phdr *)(ehdr + 1);
-	for (i = 0; i < ehdr->e_phnum; phdr++, i++)
-		if (phdr->p_type == PT_LOAD &&
-				phdr->p_paddr == image->arch.backup_src_start &&
-				phdr->p_memsz == image->arch.backup_src_sz) {
-			phdr->p_offset = image->arch.backup_load_addr;
-			break;
-		}
 out:
 	vfree(cmem);
 	return ret;
@@ -296,8 +293,7 @@ static int add_e820_entry(struct boot_params *params, struct e820_entry *entry)
 	if (nr_e820_entries >= E820_MAX_ENTRIES_ZEROPAGE)
 		return 1;
 
-	memcpy(&params->e820_table[nr_e820_entries], entry,
-			sizeof(struct e820_entry));
+	memcpy(&params->e820_table[nr_e820_entries], entry, sizeof(struct e820_entry));
 	params->e820_entries++;
 	return 0;
 }
@@ -321,19 +317,11 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
 				 unsigned long long mend)
 {
 	unsigned long start, end;
-	int ret = 0;
 
 	cmem->ranges[0].start = mstart;
 	cmem->ranges[0].end = mend;
 	cmem->nr_ranges = 1;
 
-	/* Exclude Backup region */
-	start = image->arch.backup_load_addr;
-	end = start + image->arch.backup_src_sz - 1;
-	ret = crash_exclude_mem_range(cmem, start, end);
-	if (ret)
-		return ret;
-
 	/* Exclude elf header region */
 	start = image->arch.elf_load_addr;
 	end = start + image->arch.elf_headers_sz - 1;
@@ -356,40 +344,39 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
 	memset(&cmd, 0, sizeof(struct crash_memmap_data));
 	cmd.params = params;
 
-	/* Add first 640K segment */
-	ei.addr = image->arch.backup_src_start;
-	ei.size = image->arch.backup_src_sz;
-	ei.type = E820_TYPE_RAM;
-	add_e820_entry(params, &ei);
+	/* Add the low 1M */
+	cmd.type = E820_TYPE_RAM;
+	flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+	walk_iomem_res_desc(IORES_DESC_NONE, flags, 0, (1<<20)-1, &cmd,
+			    memmap_entry_callback);
 
 	/* Add ACPI tables */
 	cmd.type = E820_TYPE_ACPI;
 	flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd,
-		       memmap_entry_callback);
+			    memmap_entry_callback);
 
 	/* Add ACPI Non-volatile Storage */
 	cmd.type = E820_TYPE_NVS;
 	walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
-			memmap_entry_callback);
+			    memmap_entry_callback);
 
 	/* Add e820 reserved ranges */
 	cmd.type = E820_TYPE_RESERVED;
 	flags = IORESOURCE_MEM;
 	walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd,
-			   memmap_entry_callback);
+			    memmap_entry_callback);
 
 	/* Add crashk_low_res region */
 	if (crashk_low_res.end) {
 		ei.addr = crashk_low_res.start;
-		ei.size = crashk_low_res.end - crashk_low_res.start + 1;
+		ei.size = resource_size(&crashk_low_res);
 		ei.type = E820_TYPE_RAM;
 		add_e820_entry(params, &ei);
 	}
 
 	/* Exclude some ranges from crashk_res and add rest to memmap */
-	ret = memmap_exclude_ranges(image, cmem, crashk_res.start,
-						crashk_res.end);
+	ret = memmap_exclude_ranges(image, cmem, crashk_res.start, crashk_res.end);
 	if (ret)
 		goto out;
 
@@ -409,55 +396,12 @@ out:
 	return ret;
 }
 
-static int determine_backup_region(struct resource *res, void *arg)
-{
-	struct kimage *image = arg;
-
-	image->arch.backup_src_start = res->start;
-	image->arch.backup_src_sz = resource_size(res);
-
-	/* Expecting only one range for backup region */
-	return 1;
-}
-
 int crash_load_segments(struct kimage *image)
 {
 	int ret;
 	struct kexec_buf kbuf = { .image = image, .buf_min = 0,
 				  .buf_max = ULONG_MAX, .top_down = false };
 
-	/*
-	 * Determine and load a segment for backup area. First 640K RAM
-	 * region is backup source
-	 */
-
-	ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END,
-				image, determine_backup_region);
-
-	/* Zero or postive return values are ok */
-	if (ret < 0)
-		return ret;
-
-	/* Add backup segment. */
-	if (image->arch.backup_src_sz) {
-		kbuf.buffer = &crash_zero_bytes;
-		kbuf.bufsz = sizeof(crash_zero_bytes);
-		kbuf.memsz = image->arch.backup_src_sz;
-		kbuf.buf_align = PAGE_SIZE;
-		/*
-		 * Ideally there is no source for backup segment. This is
-		 * copied in purgatory after crash. Just add a zero filled
-		 * segment for now to make sure checksum logic works fine.
-		 */
-		ret = kexec_add_buffer(&kbuf);
-		if (ret)
-			return ret;
-		image->arch.backup_load_addr = kbuf.mem;
-		pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
-			 image->arch.backup_load_addr,
-			 image->arch.backup_src_start, kbuf.memsz);
-	}
-
 	/* Prepare elf headers and add a segment */
 	ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz);
 	if (ret)
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
deleted file mode 100644
index 0b8cedb20d6d..000000000000
--- a/arch/x86/kernel/doublefault.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/sched/debug.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-
-#include <linux/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-
-#ifdef CONFIG_X86_32
-
-#define DOUBLEFAULT_STACKSIZE (1024)
-static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
-#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
-
-#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
-
-static void doublefault_fn(void)
-{
-	struct desc_ptr gdt_desc = {0, 0};
-	unsigned long gdt, tss;
-
-	native_store_gdt(&gdt_desc);
-	gdt = gdt_desc.address;
-
-	printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
-
-	if (ptr_ok(gdt)) {
-		gdt += GDT_ENTRY_TSS << 3;
-		tss = get_desc_base((struct desc_struct *)gdt);
-		printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
-
-		if (ptr_ok(tss)) {
-			struct x86_hw_tss *t = (struct x86_hw_tss *)tss;
-
-			printk(KERN_EMERG "eip = %08lx, esp = %08lx\n",
-			       t->ip, t->sp);
-
-			printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
-				t->ax, t->bx, t->cx, t->dx);
-			printk(KERN_EMERG "esi = %08lx, edi = %08lx\n",
-				t->si, t->di);
-		}
-	}
-
-	for (;;)
-		cpu_relax();
-}
-
-struct x86_hw_tss doublefault_tss __cacheline_aligned = {
-	.sp0		= STACK_START,
-	.ss0		= __KERNEL_DS,
-	.ldt		= 0,
-	.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
-
-	.ip		= (unsigned long) doublefault_fn,
-	/* 0x2 bit is always set */
-	.flags		= X86_EFLAGS_SF | 0x2,
-	.sp		= STACK_START,
-	.es		= __USER_DS,
-	.cs		= __KERNEL_CS,
-	.ss		= __KERNEL_DS,
-	.ds		= __USER_DS,
-	.fs		= __KERNEL_PERCPU,
-
-	.__cr3		= __pa_nodebug(swapper_pg_dir),
-};
-
-/* dummy for do_double_fault() call */
-void df_debug(struct pt_regs *regs, long error_code) {}
-
-#else /* !CONFIG_X86_32 */
-
-void df_debug(struct pt_regs *regs, long error_code)
-{
-	pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
-	show_regs(regs);
-	panic("Machine halted.");
-}
-#endif
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
new file mode 100644
index 000000000000..3793646f0fb5
--- /dev/null
+++ b/arch/x86/kernel/doublefault_32.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+
+#include <linux/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/traps.h>
+
+extern void double_fault(void);
+#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
+
+#define TSS(x) this_cpu_read(cpu_tss_rw.x86_tss.x)
+
+static void set_df_gdt_entry(unsigned int cpu);
+
+/*
+ * Called by double_fault with CR0.TS and EFLAGS.NT cleared.  The CPU thinks
+ * we're running the doublefault task.  Cannot return.
+ */
+asmlinkage notrace void __noreturn doublefault_shim(void)
+{
+	unsigned long cr2;
+	struct pt_regs regs;
+
+	BUILD_BUG_ON(sizeof(struct doublefault_stack) != PAGE_SIZE);
+
+	cr2 = native_read_cr2();
+
+	/* Reset back to the normal kernel task. */
+	force_reload_TR();
+	set_df_gdt_entry(smp_processor_id());
+
+	trace_hardirqs_off();
+
+	/*
+	 * Fill in pt_regs.  A downside of doing this in C is that the unwinder
+	 * won't see it (no ENCODE_FRAME_POINTER), so a nested stack dump
+	 * won't successfully unwind to the source of the double fault.
+	 * The main dump from do_double_fault() is fine, though, since it
+	 * uses these regs directly.
+	 *
+	 * If anyone ever cares, this could be moved to asm.
+	 */
+	regs.ss		= TSS(ss);
+	regs.__ssh	= 0;
+	regs.sp		= TSS(sp);
+	regs.flags	= TSS(flags);
+	regs.cs		= TSS(cs);
+	/* We won't go through the entry asm, so we can leave __csh as 0. */
+	regs.__csh	= 0;
+	regs.ip		= TSS(ip);
+	regs.orig_ax	= 0;
+	regs.gs		= TSS(gs);
+	regs.__gsh	= 0;
+	regs.fs		= TSS(fs);
+	regs.__fsh	= 0;
+	regs.es		= TSS(es);
+	regs.__esh	= 0;
+	regs.ds		= TSS(ds);
+	regs.__dsh	= 0;
+	regs.ax		= TSS(ax);
+	regs.bp		= TSS(bp);
+	regs.di		= TSS(di);
+	regs.si		= TSS(si);
+	regs.dx		= TSS(dx);
+	regs.cx		= TSS(cx);
+	regs.bx		= TSS(bx);
+
+	do_double_fault(&regs, 0, cr2);
+
+	/*
+	 * x86_32 does not save the original CR3 anywhere on a task switch.
+	 * This means that, even if we wanted to return, we would need to find
+	 * some way to reconstruct CR3.  We could make a credible guess based
+	 * on cpu_tlbstate, but that would be racy and would not account for
+	 * PTI.
+	 *
+	 * Instead, don't bother.  We can return through
+	 * rewind_stack_do_exit() instead.
+	 */
+	panic("cannot return from double fault\n");
+}
+NOKPROBE_SYMBOL(doublefault_shim);
+
+DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
+	.tss = {
+                /*
+                 * No sp0 or ss0 -- we never run CPL != 0 with this TSS
+                 * active.  sp is filled in later.
+                 */
+		.ldt		= 0,
+	.io_bitmap_base	= IO_BITMAP_OFFSET_INVALID,
+
+		.ip		= (unsigned long) double_fault,
+		.flags		= X86_EFLAGS_FIXED,
+		.es		= __USER_DS,
+		.cs		= __KERNEL_CS,
+		.ss		= __KERNEL_DS,
+		.ds		= __USER_DS,
+		.fs		= __KERNEL_PERCPU,
+#ifndef CONFIG_X86_32_LAZY_GS
+		.gs		= __KERNEL_STACK_CANARY,
+#endif
+
+		.__cr3		= __pa_nodebug(swapper_pg_dir),
+	},
+};
+
+static void set_df_gdt_entry(unsigned int cpu)
+{
+	/* Set up doublefault TSS pointer in the GDT */
+	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS,
+		       &get_cpu_entry_area(cpu)->doublefault_stack.tss);
+
+}
+
+void doublefault_init_cpu_tss(void)
+{
+	unsigned int cpu = smp_processor_id();
+	struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
+
+	/*
+	 * The linker isn't smart enough to initialize percpu variables that
+	 * point to other places in percpu space.
+	 */
+        this_cpu_write(doublefault_stack.tss.sp,
+                       (unsigned long)&cea->doublefault_stack.stack +
+                       sizeof(doublefault_stack.stack));
+
+	set_df_gdt_entry(cpu);
+}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e07424e19274..ae64ec7f752f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -365,7 +365,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 }
 NOKPROBE_SYMBOL(oops_end);
 
-int __die(const char *str, struct pt_regs *regs, long err)
+static void __die_header(const char *str, struct pt_regs *regs, long err)
 {
 	const char *pr = "";
 
@@ -384,7 +384,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
 	       IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "",
 	       IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
 	       (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
+}
+NOKPROBE_SYMBOL(__die_header);
 
+static int __die_body(const char *str, struct pt_regs *regs, long err)
+{
 	show_regs(regs);
 	print_modules();
 
@@ -394,6 +398,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
 
 	return 0;
 }
+NOKPROBE_SYMBOL(__die_body);
+
+int __die(const char *str, struct pt_regs *regs, long err)
+{
+	__die_header(str, regs, err);
+	return __die_body(str, regs, err);
+}
 NOKPROBE_SYMBOL(__die);
 
 /*
@@ -410,6 +421,19 @@ void die(const char *str, struct pt_regs *regs, long err)
 	oops_end(flags, regs, sig);
 }
 
+void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr)
+{
+	unsigned long flags = oops_begin();
+	int sig = SIGSEGV;
+
+	__die_header(str, regs, err);
+	if (gp_addr)
+		kasan_non_canonical_hook(gp_addr);
+	if (__die_body(str, regs, err))
+		sig = 0;
+	oops_end(flags, regs, sig);
+}
+
 void show_regs(struct pt_regs *regs)
 {
 	show_regs_print_info(KERN_DEFAULT);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 64a59d726639..8e3a8fedfa4d 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -29,6 +29,9 @@ const char *stack_type_name(enum stack_type type)
 	if (type == STACK_TYPE_ENTRY)
 		return "ENTRY_TRAMPOLINE";
 
+	if (type == STACK_TYPE_EXCEPTION)
+		return "#DF";
+
 	return NULL;
 }
 
@@ -82,6 +85,30 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
 	return true;
 }
 
+static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info)
+{
+#ifdef CONFIG_DOUBLEFAULT
+	struct cpu_entry_area *cea = get_cpu_entry_area(raw_smp_processor_id());
+	struct doublefault_stack *ss = &cea->doublefault_stack;
+
+	void *begin = ss->stack;
+	void *end = begin + sizeof(ss->stack);
+
+	if ((void *)stack < begin || (void *)stack >= end)
+		return false;
+
+	info->type	= STACK_TYPE_EXCEPTION;
+	info->begin	= begin;
+	info->end	= end;
+	info->next_sp	= (unsigned long *)this_cpu_read(cpu_tss_rw.x86_tss.sp);
+
+	return true;
+#else
+	return false;
+#endif
+}
+
+
 int get_stack_info(unsigned long *stack, struct task_struct *task,
 		   struct stack_info *info, unsigned long *visit_mask)
 {
@@ -105,6 +132,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
 	if (in_softirq_stack(stack, info))
 		goto recursion_check;
 
+	if (in_doublefault_stack(stack, info))
+		goto recursion_check;
+
 	goto unknown;
 
 recursion_check:
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7da2bcd2b8eb..c5399e80c59c 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -190,6 +190,7 @@ static void __init e820_print_type(enum e820_type type)
 	case E820_TYPE_RAM:		/* Fall through: */
 	case E820_TYPE_RESERVED_KERN:	pr_cont("usable");			break;
 	case E820_TYPE_RESERVED:	pr_cont("reserved");			break;
+	case E820_TYPE_SOFT_RESERVED:	pr_cont("soft reserved");		break;
 	case E820_TYPE_ACPI:		pr_cont("ACPI data");			break;
 	case E820_TYPE_NVS:		pr_cont("ACPI NVS");			break;
 	case E820_TYPE_UNUSABLE:	pr_cont("unusable");			break;
@@ -999,6 +1000,17 @@ void __init e820__reserve_setup_data(void)
 		data = early_memremap(pa_data, sizeof(*data));
 		e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
 		e820__range_update_kexec(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+
+		if (data->type == SETUP_INDIRECT &&
+		    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+			e820__range_update(((struct setup_indirect *)data->data)->addr,
+					   ((struct setup_indirect *)data->data)->len,
+					   E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+			e820__range_update_kexec(((struct setup_indirect *)data->data)->addr,
+						 ((struct setup_indirect *)data->data)->len,
+						 E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+		}
+
 		pa_data = data->next;
 		early_memunmap(data, sizeof(*data));
 	}
@@ -1037,6 +1049,7 @@ static const char *__init e820_type_to_string(struct e820_entry *entry)
 	case E820_TYPE_PRAM:		return "Persistent Memory (legacy)";
 	case E820_TYPE_PMEM:		return "Persistent Memory";
 	case E820_TYPE_RESERVED:	return "Reserved";
+	case E820_TYPE_SOFT_RESERVED:	return "Soft Reserved";
 	default:			return "Unknown E820 type";
 	}
 }
@@ -1052,6 +1065,7 @@ static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry)
 	case E820_TYPE_PRAM:		/* Fall-through: */
 	case E820_TYPE_PMEM:		/* Fall-through: */
 	case E820_TYPE_RESERVED:	/* Fall-through: */
+	case E820_TYPE_SOFT_RESERVED:	/* Fall-through: */
 	default:			return IORESOURCE_MEM;
 	}
 }
@@ -1064,6 +1078,7 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry)
 	case E820_TYPE_PMEM:		return IORES_DESC_PERSISTENT_MEMORY;
 	case E820_TYPE_PRAM:		return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
 	case E820_TYPE_RESERVED:	return IORES_DESC_RESERVED;
+	case E820_TYPE_SOFT_RESERVED:	return IORES_DESC_SOFT_RESERVED;
 	case E820_TYPE_RESERVED_KERN:	/* Fall-through: */
 	case E820_TYPE_RAM:		/* Fall-through: */
 	case E820_TYPE_UNUSABLE:	/* Fall-through: */
@@ -1078,11 +1093,12 @@ static bool __init do_mark_busy(enum e820_type type, struct resource *res)
 		return true;
 
 	/*
-	 * Treat persistent memory like device memory, i.e. reserve it
-	 * for exclusive use of a driver
+	 * Treat persistent memory and other special memory ranges like
+	 * device memory, i.e. reserve it for exclusive use of a driver
 	 */
 	switch (type) {
 	case E820_TYPE_RESERVED:
+	case E820_TYPE_SOFT_RESERVED:
 	case E820_TYPE_PRAM:
 	case E820_TYPE_PMEM:
 		return false;
@@ -1285,6 +1301,9 @@ void __init e820__memblock_setup(void)
 		if (end != (resource_size_t)end)
 			continue;
 
+		if (entry->type == E820_TYPE_SOFT_RESERVED)
+			memblock_reserve(entry->addr, entry->size);
+
 		if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
 			continue;
 
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 4cba91ec8049..2f9ec14be3b1 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -710,8 +710,12 @@ static struct chipset early_qrk[] __initdata = {
 	 */
 	{ PCI_VENDOR_ID_INTEL, 0x0f00,
 		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+	{ PCI_VENDOR_ID_INTEL, 0x3e20,
+		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
 	{ PCI_VENDOR_ID_INTEL, 0x3ec4,
 		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+	{ PCI_VENDOR_ID_INTEL, 0x8a12,
+		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
 	{ PCI_VENDOR_ID_BROADCOM, 0x4331,
 	  PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
 	{}
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 0071b794ed19..400a05e1c1c5 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -352,6 +352,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			fpregs_unlock();
 			return 0;
 		}
+		fpregs_deactivate(fpu);
 		fpregs_unlock();
 	}
 
@@ -403,6 +404,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	}
 	if (!ret)
 		fpregs_mark_activate();
+	else
+		fpregs_deactivate(fpu);
 	fpregs_unlock();
 
 err_out:
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index e5cb67d67c03..a1806598aaa4 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -60,7 +60,7 @@ u64 xfeatures_mask __read_mostly;
 
 static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
 static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] = -1};
-static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
+static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
 
 /*
  * The XSAVE area of kernel can be in standard or compacted format;
@@ -107,23 +107,20 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
 }
 EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
 
-static int xfeature_is_supervisor(int xfeature_nr)
+static bool xfeature_is_supervisor(int xfeature_nr)
 {
 	/*
-	 * We currently do not support supervisor states, but if
-	 * we did, we could find out like this.
-	 *
-	 * SDM says: If state component 'i' is a user state component,
-	 * ECX[0] return 0; if state component i is a supervisor
-	 * state component, ECX[0] returns 1.
+	 * Extended State Enumeration Sub-leaves (EAX = 0DH, ECX = n, n > 1)
+	 * returns ECX[0] set to (1) for a supervisor state, and cleared (0)
+	 * for a user state.
 	 */
 	u32 eax, ebx, ecx, edx;
 
 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
-	return !!(ecx & 1);
+	return ecx & 1;
 }
 
-static int xfeature_is_user(int xfeature_nr)
+static bool xfeature_is_user(int xfeature_nr)
 {
 	return !xfeature_is_supervisor(xfeature_nr);
 }
@@ -254,10 +251,13 @@ static void __init setup_xstate_features(void)
 	 * in the fixed offsets in the xsave area in either compacted form
 	 * or standard form.
 	 */
-	xstate_offsets[0] = 0;
-	xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space);
-	xstate_offsets[1] = xstate_sizes[0];
-	xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space);
+	xstate_offsets[XFEATURE_FP]	= 0;
+	xstate_sizes[XFEATURE_FP]	= offsetof(struct fxregs_state,
+						   xmm_space);
+
+	xstate_offsets[XFEATURE_SSE]	= xstate_sizes[XFEATURE_FP];
+	xstate_sizes[XFEATURE_SSE]	= sizeof_field(struct fxregs_state,
+						       xmm_space);
 
 	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 		if (!xfeature_enabled(i))
@@ -342,7 +342,7 @@ static int xfeature_is_aligned(int xfeature_nr)
  */
 static void __init setup_xstate_comp(void)
 {
-	unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8];
+	unsigned int xstate_comp_sizes[XFEATURE_MAX];
 	int i;
 
 	/*
@@ -350,8 +350,9 @@ static void __init setup_xstate_comp(void)
 	 * in the fixed offsets in the xsave area in either compacted form
 	 * or standard form.
 	 */
-	xstate_comp_offsets[0] = 0;
-	xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
+	xstate_comp_offsets[XFEATURE_FP] = 0;
+	xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state,
+						     xmm_space);
 
 	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
 		for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
@@ -415,7 +416,8 @@ static void __init setup_init_fpu_buf(void)
 	print_xstate_features();
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
+		init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
+						     xfeatures_mask;
 
 	/*
 	 * Init all the features state with header.xfeatures being 0x0
@@ -840,7 +842,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 
 	/*
 	 * We should not ever be requesting features that we
-	 * have not enabled.  Remember that pcntxt_mask is
+	 * have not enabled.  Remember that xfeatures_mask is
 	 * what we write to the XCR0 register.
 	 */
 	WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 024c3053dbba..37a0aeaf89e7 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -23,6 +23,7 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/memory.h>
+#include <linux/vmalloc.h>
 
 #include <trace/syscall.h>
 
@@ -34,6 +35,8 @@
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
+static int ftrace_poke_late = 0;
+
 int ftrace_arch_code_modify_prepare(void)
     __acquires(&text_mutex)
 {
@@ -43,84 +46,37 @@ int ftrace_arch_code_modify_prepare(void)
 	 * ftrace has it set to "read/write".
 	 */
 	mutex_lock(&text_mutex);
-	set_kernel_text_rw();
-	set_all_modules_text_rw();
+	ftrace_poke_late = 1;
 	return 0;
 }
 
 int ftrace_arch_code_modify_post_process(void)
     __releases(&text_mutex)
 {
-	set_all_modules_text_ro();
-	set_kernel_text_ro();
+	/*
+	 * ftrace_make_{call,nop}() may be called during
+	 * module load, and we need to finish the text_poke_queue()
+	 * that they do, here.
+	 */
+	text_poke_finish();
+	ftrace_poke_late = 0;
 	mutex_unlock(&text_mutex);
 	return 0;
 }
 
-union ftrace_code_union {
-	char code[MCOUNT_INSN_SIZE];
-	struct {
-		unsigned char op;
-		int offset;
-	} __attribute__((packed));
-};
-
-static int ftrace_calc_offset(long ip, long addr)
-{
-	return (int)(addr - ip);
-}
-
-static unsigned char *
-ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr)
+static const char *ftrace_nop_replace(void)
 {
-	static union ftrace_code_union calc;
-
-	calc.op		= op;
-	calc.offset	= ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
-
-	return calc.code;
-}
-
-static unsigned char *
-ftrace_call_replace(unsigned long ip, unsigned long addr)
-{
-	return ftrace_text_replace(0xe8, ip, addr);
-}
-
-static inline int
-within(unsigned long addr, unsigned long start, unsigned long end)
-{
-	return addr >= start && addr < end;
+	return ideal_nops[NOP_ATOMIC5];
 }
 
-static unsigned long text_ip_addr(unsigned long ip)
+static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 {
-	/*
-	 * On x86_64, kernel text mappings are mapped read-only, so we use
-	 * the kernel identity mapping instead of the kernel text mapping
-	 * to modify the kernel text.
-	 *
-	 * For 32bit kernels, these mappings are same and we can use
-	 * kernel identity mapping to modify code.
-	 */
-	if (within(ip, (unsigned long)_text, (unsigned long)_etext))
-		ip = (unsigned long)__va(__pa_symbol(ip));
-
-	return ip;
+	return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
 }
 
-static const unsigned char *ftrace_nop_replace(void)
+static int ftrace_verify_code(unsigned long ip, const char *old_code)
 {
-	return ideal_nops[NOP_ATOMIC5];
-}
-
-static int
-ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
-		   unsigned const char *new_code)
-{
-	unsigned char replaced[MCOUNT_INSN_SIZE];
-
-	ftrace_expected = old_code;
+	char cur_code[MCOUNT_INSN_SIZE];
 
 	/*
 	 * Note:
@@ -129,31 +85,46 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
 	 * Carefully read and modify the code with probe_kernel_*(), and make
 	 * sure what we read is what we expected it to be before modifying it.
 	 */
-
 	/* read the text we want to modify */
-	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+	if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) {
+		WARN_ON(1);
 		return -EFAULT;
+	}
 
 	/* Make sure it is what we expect it to be */
-	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+	if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) {
+		WARN_ON(1);
 		return -EINVAL;
+	}
 
-	ip = text_ip_addr(ip);
-
-	/* replace the text with the new text */
-	if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
-		return -EPERM;
+	return 0;
+}
 
-	sync_core();
+/*
+ * Marked __ref because it calls text_poke_early() which is .init.text. That is
+ * ok because that call will happen early, during boot, when .init sections are
+ * still present.
+ */
+static int __ref
+ftrace_modify_code_direct(unsigned long ip, const char *old_code,
+			  const char *new_code)
+{
+	int ret = ftrace_verify_code(ip, old_code);
+	if (ret)
+		return ret;
 
+	/* replace the text with the new text */
+	if (ftrace_poke_late)
+		text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
+	else
+		text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);
 	return 0;
 }
 
-int ftrace_make_nop(struct module *mod,
-		    struct dyn_ftrace *rec, unsigned long addr)
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
 {
-	unsigned const char *new, *old;
 	unsigned long ip = rec->ip;
+	const char *new, *old;
 
 	old = ftrace_call_replace(ip, addr);
 	new = ftrace_nop_replace();
@@ -167,19 +138,20 @@ int ftrace_make_nop(struct module *mod,
 	 * just modify the code directly.
 	 */
 	if (addr == MCOUNT_ADDR)
-		return ftrace_modify_code_direct(rec->ip, old, new);
+		return ftrace_modify_code_direct(ip, old, new);
 
-	ftrace_expected = NULL;
-
-	/* Normal cases use add_brk_on_nop */
+	/*
+	 * x86 overrides ftrace_replace_code -- this function will never be used
+	 * in this case.
+	 */
 	WARN_ONCE(1, "invalid use of ftrace_make_nop");
 	return -EINVAL;
 }
 
 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
-	unsigned const char *new, *old;
 	unsigned long ip = rec->ip;
+	const char *new, *old;
 
 	old = ftrace_nop_replace();
 	new = ftrace_call_replace(ip, addr);
@@ -189,43 +161,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 }
 
 /*
- * The modifying_ftrace_code is used to tell the breakpoint
- * handler to call ftrace_int3_handler(). If it fails to
- * call this handler for a breakpoint added by ftrace, then
- * the kernel may crash.
- *
- * As atomic_writes on x86 do not need a barrier, we do not
- * need to add smp_mb()s for this to work. It is also considered
- * that we can not read the modifying_ftrace_code before
- * executing the breakpoint. That would be quite remarkable if
- * it could do that. Here's the flow that is required:
- *
- *   CPU-0                          CPU-1
- *
- * atomic_inc(mfc);
- * write int3s
- *				<trap-int3> // implicit (r)mb
- *				if (atomic_read(mfc))
- *					call ftrace_int3_handler()
- *
- * Then when we are finished:
- *
- * atomic_dec(mfc);
- *
- * If we hit a breakpoint that was not set by ftrace, it does not
- * matter if ftrace_int3_handler() is called or not. It will
- * simply be ignored. But it is crucial that a ftrace nop/caller
- * breakpoint is handled. No other user should ever place a
- * breakpoint on an ftrace nop/caller location. It must only
- * be done by this code.
- */
-atomic_t modifying_ftrace_code __read_mostly;
-
-static int
-ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
-		   unsigned const char *new_code);
-
-/*
  * Should never be called:
  *  As it is only called by __ftrace_replace_code() which is called by
  *  ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
@@ -237,452 +172,84 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 				 unsigned long addr)
 {
 	WARN_ON(1);
-	ftrace_expected = NULL;
 	return -EINVAL;
 }
 
-static unsigned long ftrace_update_func;
-static unsigned long ftrace_update_func_call;
-
-static int update_ftrace_func(unsigned long ip, void *new)
-{
-	unsigned char old[MCOUNT_INSN_SIZE];
-	int ret;
-
-	memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
-
-	ftrace_update_func = ip;
-	/* Make sure the breakpoints see the ftrace_update_func update */
-	smp_wmb();
-
-	/* See comment above by declaration of modifying_ftrace_code */
-	atomic_inc(&modifying_ftrace_code);
-
-	ret = ftrace_modify_code(ip, old, new);
-
-	atomic_dec(&modifying_ftrace_code);
-
-	return ret;
-}
-
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
-	unsigned long ip = (unsigned long)(&ftrace_call);
-	unsigned char *new;
-	int ret;
-
-	ftrace_update_func_call = (unsigned long)func;
-
-	new = ftrace_call_replace(ip, (unsigned long)func);
-	ret = update_ftrace_func(ip, new);
-
-	/* Also update the regs callback function */
-	if (!ret) {
-		ip = (unsigned long)(&ftrace_regs_call);
-		new = ftrace_call_replace(ip, (unsigned long)func);
-		ret = update_ftrace_func(ip, new);
-	}
-
-	return ret;
-}
-
-static nokprobe_inline int is_ftrace_caller(unsigned long ip)
-{
-	if (ip == ftrace_update_func)
-		return 1;
-
-	return 0;
-}
-
-/*
- * A breakpoint was added to the code address we are about to
- * modify, and this is the handle that will just skip over it.
- * We are either changing a nop into a trace call, or a trace
- * call to a nop. While the change is taking place, we treat
- * it just like it was a nop.
- */
-int ftrace_int3_handler(struct pt_regs *regs)
-{
 	unsigned long ip;
+	const char *new;
 
-	if (WARN_ON_ONCE(!regs))
-		return 0;
-
-	ip = regs->ip - INT3_INSN_SIZE;
-
-	if (ftrace_location(ip)) {
-		int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
-		return 1;
-	} else if (is_ftrace_caller(ip)) {
-		if (!ftrace_update_func_call) {
-			int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
-			return 1;
-		}
-		int3_emulate_call(regs, ftrace_update_func_call);
-		return 1;
-	}
-
-	return 0;
-}
-NOKPROBE_SYMBOL(ftrace_int3_handler);
-
-static int ftrace_write(unsigned long ip, const char *val, int size)
-{
-	ip = text_ip_addr(ip);
-
-	if (probe_kernel_write((void *)ip, val, size))
-		return -EPERM;
-
-	return 0;
-}
-
-static int add_break(unsigned long ip, const char *old)
-{
-	unsigned char replaced[MCOUNT_INSN_SIZE];
-	unsigned char brk = BREAKPOINT_INSTRUCTION;
-
-	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
-		return -EFAULT;
-
-	ftrace_expected = old;
-
-	/* Make sure it is what we expect it to be */
-	if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
-		return -EINVAL;
-
-	return ftrace_write(ip, &brk, 1);
-}
-
-static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
-{
-	unsigned const char *old;
-	unsigned long ip = rec->ip;
-
-	old = ftrace_call_replace(ip, addr);
-
-	return add_break(rec->ip, old);
-}
-
-
-static int add_brk_on_nop(struct dyn_ftrace *rec)
-{
-	unsigned const char *old;
-
-	old = ftrace_nop_replace();
-
-	return add_break(rec->ip, old);
-}
-
-static int add_breakpoints(struct dyn_ftrace *rec, bool enable)
-{
-	unsigned long ftrace_addr;
-	int ret;
-
-	ftrace_addr = ftrace_get_addr_curr(rec);
-
-	ret = ftrace_test_record(rec, enable);
-
-	switch (ret) {
-	case FTRACE_UPDATE_IGNORE:
-		return 0;
-
-	case FTRACE_UPDATE_MAKE_CALL:
-		/* converting nop to call */
-		return add_brk_on_nop(rec);
-
-	case FTRACE_UPDATE_MODIFY_CALL:
-	case FTRACE_UPDATE_MAKE_NOP:
-		/* converting a call to a nop */
-		return add_brk_on_call(rec, ftrace_addr);
-	}
-	return 0;
-}
-
-/*
- * On error, we need to remove breakpoints. This needs to
- * be done caefully. If the address does not currently have a
- * breakpoint, we know we are done. Otherwise, we look at the
- * remaining 4 bytes of the instruction. If it matches a nop
- * we replace the breakpoint with the nop. Otherwise we replace
- * it with the call instruction.
- */
-static int remove_breakpoint(struct dyn_ftrace *rec)
-{
-	unsigned char ins[MCOUNT_INSN_SIZE];
-	unsigned char brk = BREAKPOINT_INSTRUCTION;
-	const unsigned char *nop;
-	unsigned long ftrace_addr;
-	unsigned long ip = rec->ip;
-
-	/* If we fail the read, just give up */
-	if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
-		return -EFAULT;
-
-	/* If this does not have a breakpoint, we are done */
-	if (ins[0] != brk)
-		return 0;
-
-	nop = ftrace_nop_replace();
-
-	/*
-	 * If the last 4 bytes of the instruction do not match
-	 * a nop, then we assume that this is a call to ftrace_addr.
-	 */
-	if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
-		/*
-		 * For extra paranoidism, we check if the breakpoint is on
-		 * a call that would actually jump to the ftrace_addr.
-		 * If not, don't touch the breakpoint, we make just create
-		 * a disaster.
-		 */
-		ftrace_addr = ftrace_get_addr_new(rec);
-		nop = ftrace_call_replace(ip, ftrace_addr);
-
-		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
-			goto update;
-
-		/* Check both ftrace_addr and ftrace_old_addr */
-		ftrace_addr = ftrace_get_addr_curr(rec);
-		nop = ftrace_call_replace(ip, ftrace_addr);
-
-		ftrace_expected = nop;
-
-		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
-			return -EINVAL;
-	}
-
- update:
-	return ftrace_write(ip, nop, 1);
-}
-
-static int add_update_code(unsigned long ip, unsigned const char *new)
-{
-	/* skip breakpoint */
-	ip++;
-	new++;
-	return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);
-}
-
-static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
-{
-	unsigned long ip = rec->ip;
-	unsigned const char *new;
-
-	new = ftrace_call_replace(ip, addr);
-	return add_update_code(ip, new);
-}
-
-static int add_update_nop(struct dyn_ftrace *rec)
-{
-	unsigned long ip = rec->ip;
-	unsigned const char *new;
-
-	new = ftrace_nop_replace();
-	return add_update_code(ip, new);
-}
-
-static int add_update(struct dyn_ftrace *rec, bool enable)
-{
-	unsigned long ftrace_addr;
-	int ret;
-
-	ret = ftrace_test_record(rec, enable);
-
-	ftrace_addr  = ftrace_get_addr_new(rec);
-
-	switch (ret) {
-	case FTRACE_UPDATE_IGNORE:
-		return 0;
-
-	case FTRACE_UPDATE_MODIFY_CALL:
-	case FTRACE_UPDATE_MAKE_CALL:
-		/* converting nop to call */
-		return add_update_call(rec, ftrace_addr);
-
-	case FTRACE_UPDATE_MAKE_NOP:
-		/* converting a call to a nop */
-		return add_update_nop(rec);
-	}
-
-	return 0;
-}
-
-static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
-{
-	unsigned long ip = rec->ip;
-	unsigned const char *new;
-
-	new = ftrace_call_replace(ip, addr);
-
-	return ftrace_write(ip, new, 1);
-}
-
-static int finish_update_nop(struct dyn_ftrace *rec)
-{
-	unsigned long ip = rec->ip;
-	unsigned const char *new;
-
-	new = ftrace_nop_replace();
-
-	return ftrace_write(ip, new, 1);
-}
-
-static int finish_update(struct dyn_ftrace *rec, bool enable)
-{
-	unsigned long ftrace_addr;
-	int ret;
-
-	ret = ftrace_update_record(rec, enable);
-
-	ftrace_addr = ftrace_get_addr_new(rec);
-
-	switch (ret) {
-	case FTRACE_UPDATE_IGNORE:
-		return 0;
-
-	case FTRACE_UPDATE_MODIFY_CALL:
-	case FTRACE_UPDATE_MAKE_CALL:
-		/* converting nop to call */
-		return finish_update_call(rec, ftrace_addr);
+	ip = (unsigned long)(&ftrace_call);
+	new = ftrace_call_replace(ip, (unsigned long)func);
+	text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
 
-	case FTRACE_UPDATE_MAKE_NOP:
-		/* converting a call to a nop */
-		return finish_update_nop(rec);
-	}
+	ip = (unsigned long)(&ftrace_regs_call);
+	new = ftrace_call_replace(ip, (unsigned long)func);
+	text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
 
 	return 0;
 }
 
-static void do_sync_core(void *data)
-{
-	sync_core();
-}
-
-static void run_sync(void)
-{
-	int enable_irqs;
-
-	/* No need to sync if there's only one CPU */
-	if (num_online_cpus() == 1)
-		return;
-
-	enable_irqs = irqs_disabled();
-
-	/* We may be called with interrupts disabled (on bootup). */
-	if (enable_irqs)
-		local_irq_enable();
-	on_each_cpu(do_sync_core, NULL, 1);
-	if (enable_irqs)
-		local_irq_disable();
-}
-
 void ftrace_replace_code(int enable)
 {
 	struct ftrace_rec_iter *iter;
 	struct dyn_ftrace *rec;
-	const char *report = "adding breakpoints";
-	int count = 0;
+	const char *new, *old;
 	int ret;
 
 	for_ftrace_rec_iter(iter) {
 		rec = ftrace_rec_iter_record(iter);
 
-		ret = add_breakpoints(rec, enable);
-		if (ret)
-			goto remove_breakpoints;
-		count++;
-	}
-
-	run_sync();
+		switch (ftrace_test_record(rec, enable)) {
+		case FTRACE_UPDATE_IGNORE:
+		default:
+			continue;
 
-	report = "updating code";
-	count = 0;
+		case FTRACE_UPDATE_MAKE_CALL:
+			old = ftrace_nop_replace();
+			break;
 
-	for_ftrace_rec_iter(iter) {
-		rec = ftrace_rec_iter_record(iter);
+		case FTRACE_UPDATE_MODIFY_CALL:
+		case FTRACE_UPDATE_MAKE_NOP:
+			old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec));
+			break;
+		}
 
-		ret = add_update(rec, enable);
-		if (ret)
-			goto remove_breakpoints;
-		count++;
+		ret = ftrace_verify_code(rec->ip, old);
+		if (ret) {
+			ftrace_bug(ret, rec);
+			return;
+		}
 	}
 
-	run_sync();
-
-	report = "removing breakpoints";
-	count = 0;
-
 	for_ftrace_rec_iter(iter) {
 		rec = ftrace_rec_iter_record(iter);
 
-		ret = finish_update(rec, enable);
-		if (ret)
-			goto remove_breakpoints;
-		count++;
-	}
+		switch (ftrace_test_record(rec, enable)) {
+		case FTRACE_UPDATE_IGNORE:
+		default:
+			continue;
 
-	run_sync();
+		case FTRACE_UPDATE_MAKE_CALL:
+		case FTRACE_UPDATE_MODIFY_CALL:
+			new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec));
+			break;
 
-	return;
+		case FTRACE_UPDATE_MAKE_NOP:
+			new = ftrace_nop_replace();
+			break;
+		}
 
- remove_breakpoints:
-	pr_warn("Failed on %s (%d):\n", report, count);
-	ftrace_bug(ret, rec);
-	for_ftrace_rec_iter(iter) {
-		rec = ftrace_rec_iter_record(iter);
-		/*
-		 * Breakpoints are handled only when this function is in
-		 * progress. The system could not work with them.
-		 */
-		if (remove_breakpoint(rec))
-			BUG();
+		text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL);
+		ftrace_update_record(rec, enable);
 	}
-	run_sync();
-}
-
-static int
-ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
-		   unsigned const char *new_code)
-{
-	int ret;
-
-	ret = add_break(ip, old_code);
-	if (ret)
-		goto out;
-
-	run_sync();
-
-	ret = add_update_code(ip, new_code);
-	if (ret)
-		goto fail_update;
-
-	run_sync();
-
-	ret = ftrace_write(ip, new_code, 1);
-	/*
-	 * The breakpoint is handled only when this function is in progress.
-	 * The system could not work if we could not remove it.
-	 */
-	BUG_ON(ret);
- out:
-	run_sync();
-	return ret;
-
- fail_update:
-	/* Also here the system could not work with the breakpoint */
-	if (ftrace_write(ip, old_code, 1))
-		BUG();
-	goto out;
+	text_poke_finish();
 }
 
 void arch_ftrace_update_code(int command)
 {
-	/* See comment above by declaration of modifying_ftrace_code */
-	atomic_inc(&modifying_ftrace_code);
-
 	ftrace_modify_all_code(command);
-
-	atomic_dec(&modifying_ftrace_code);
 }
 
 int __init ftrace_dyn_arch_init(void)
@@ -747,6 +314,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	unsigned long start_offset;
 	unsigned long end_offset;
 	unsigned long op_offset;
+	unsigned long call_offset;
 	unsigned long offset;
 	unsigned long npages;
 	unsigned long size;
@@ -763,10 +331,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 		start_offset = (unsigned long)ftrace_regs_caller;
 		end_offset = (unsigned long)ftrace_regs_caller_end;
 		op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
+		call_offset = (unsigned long)ftrace_regs_call;
 	} else {
 		start_offset = (unsigned long)ftrace_caller;
 		end_offset = (unsigned long)ftrace_epilogue;
 		op_offset = (unsigned long)ftrace_caller_op_ptr;
+		call_offset = (unsigned long)ftrace_call;
 	}
 
 	size = end_offset - start_offset;
@@ -823,16 +393,21 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	/* put in the new offset to the ftrace_ops */
 	memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
 
+	/* put in the call to the function */
+	mutex_lock(&text_mutex);
+	call_offset -= start_offset;
+	memcpy(trampoline + call_offset,
+	       text_gen_insn(CALL_INSN_OPCODE,
+			     trampoline + call_offset,
+			     ftrace_ops_get_func(ops)), CALL_INSN_SIZE);
+	mutex_unlock(&text_mutex);
+
 	/* ALLOC_TRAMP flags lets us know we created it */
 	ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
 
 	set_vm_flush_reset_perms(trampoline);
 
-	/*
-	 * Module allocation needs to be completed by making the page
-	 * executable. The page is still writable, which is a security hazard,
-	 * but anyhow ftrace breaks W^X completely.
-	 */
+	set_memory_ro((unsigned long)trampoline, npages);
 	set_memory_x((unsigned long)trampoline, npages);
 	return (unsigned long)trampoline;
 fail:
@@ -859,62 +434,54 @@ static unsigned long calc_trampoline_call_offset(bool save_regs)
 void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 {
 	ftrace_func_t func;
-	unsigned char *new;
 	unsigned long offset;
 	unsigned long ip;
 	unsigned int size;
-	int ret, npages;
+	const char *new;
 
-	if (ops->trampoline) {
-		/*
-		 * The ftrace_ops caller may set up its own trampoline.
-		 * In such a case, this code must not modify it.
-		 */
-		if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
-			return;
-		npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;
-		set_memory_rw(ops->trampoline, npages);
-	} else {
+	if (!ops->trampoline) {
 		ops->trampoline = create_trampoline(ops, &size);
 		if (!ops->trampoline)
 			return;
 		ops->trampoline_size = size;
-		npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+		return;
 	}
 
+	/*
+	 * The ftrace_ops caller may set up its own trampoline.
+	 * In such a case, this code must not modify it.
+	 */
+	if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
+		return;
+
 	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
 	ip = ops->trampoline + offset;
-
 	func = ftrace_ops_get_func(ops);
 
-	ftrace_update_func_call = (unsigned long)func;
-
+	mutex_lock(&text_mutex);
 	/* Do a safe modify in case the trampoline is executing */
 	new = ftrace_call_replace(ip, (unsigned long)func);
-	ret = update_ftrace_func(ip, new);
-	set_memory_ro(ops->trampoline, npages);
-
-	/* The update should never fail */
-	WARN_ON(ret);
+	text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
+	mutex_unlock(&text_mutex);
 }
 
 /* Return the address of the function the trampoline calls */
 static void *addr_from_call(void *ptr)
 {
-	union ftrace_code_union calc;
+	union text_poke_insn call;
 	int ret;
 
-	ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE);
+	ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE);
 	if (WARN_ON_ONCE(ret < 0))
 		return NULL;
 
 	/* Make sure this is a call */
-	if (WARN_ON_ONCE(calc.op != 0xe8)) {
-		pr_warn("Expected e8, got %x\n", calc.op);
+	if (WARN_ON_ONCE(call.opcode != CALL_INSN_OPCODE)) {
+		pr_warn("Expected E8, got %x\n", call.opcode);
 		return NULL;
 	}
 
-	return ptr + MCOUNT_INSN_SIZE + calc.offset;
+	return ptr + CALL_INSN_SIZE + call.disp;
 }
 
 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
@@ -981,19 +548,18 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern void ftrace_graph_call(void);
 
-static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
+static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
 {
-	return ftrace_text_replace(0xe9, ip, addr);
+	return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr);
 }
 
 static int ftrace_mod_jmp(unsigned long ip, void *func)
 {
-	unsigned char *new;
+	const char *new;
 
-	ftrace_update_func_call = 0UL;
 	new = ftrace_jmp_replace(ip, (unsigned long)func);
-
-	return update_ftrace_func(ip, new);
+	text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
+	return 0;
 }
 
 int ftrace_enable_ftrace_graph_caller(void)
@@ -1019,10 +585,9 @@ int ftrace_disable_ftrace_graph_caller(void)
 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
 			   unsigned long frame_pointer)
 {
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
 	unsigned long old;
 	int faulted;
-	unsigned long return_hooker = (unsigned long)
-				&return_to_handler;
 
 	/*
 	 * When resuming from suspend-to-ram, this function can be indirectly
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index 073aab525d80..e8a9f8370112 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -12,20 +12,18 @@
 #include <asm/frame.h>
 #include <asm/asm-offsets.h>
 
-# define function_hook	__fentry__
-EXPORT_SYMBOL(__fentry__)
-
 #ifdef CONFIG_FRAME_POINTER
 # define MCOUNT_FRAME			1	/* using frame = true  */
 #else
 # define MCOUNT_FRAME			0	/* using frame = false */
 #endif
 
-ENTRY(function_hook)
+SYM_FUNC_START(__fentry__)
 	ret
-END(function_hook)
+SYM_FUNC_END(__fentry__)
+EXPORT_SYMBOL(__fentry__)
 
-ENTRY(ftrace_caller)
+SYM_CODE_START(ftrace_caller)
 
 #ifdef CONFIG_FRAME_POINTER
 	/*
@@ -85,11 +83,11 @@ ftrace_graph_call:
 #endif
 
 /* This is weak to keep gas from relaxing the jumps */
-WEAK(ftrace_stub)
+SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
 	ret
-END(ftrace_caller)
+SYM_CODE_END(ftrace_caller)
 
-ENTRY(ftrace_regs_caller)
+SYM_CODE_START(ftrace_regs_caller)
 	/*
 	 * We're here from an mcount/fentry CALL, and the stack frame looks like:
 	 *
@@ -138,7 +136,7 @@ ENTRY(ftrace_regs_caller)
 	movl	function_trace_op, %ecx	# 3rd argument: ftrace_pos
 	pushl	%esp			# 4th argument: pt_regs
 
-GLOBAL(ftrace_regs_call)
+SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
 	call	ftrace_stub
 
 	addl	$4, %esp		# skip 4th argument
@@ -163,9 +161,10 @@ GLOBAL(ftrace_regs_call)
 	popl	%eax
 
 	jmp	.Lftrace_ret
+SYM_CODE_END(ftrace_regs_caller)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
+SYM_CODE_START(ftrace_graph_caller)
 	pushl	%eax
 	pushl	%ecx
 	pushl	%edx
@@ -179,7 +178,7 @@ ENTRY(ftrace_graph_caller)
 	popl	%ecx
 	popl	%eax
 	ret
-END(ftrace_graph_caller)
+SYM_CODE_END(ftrace_graph_caller)
 
 .globl return_to_handler
 return_to_handler:
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 809d54397dba..369e61faacfe 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -14,9 +14,6 @@
 	.code64
 	.section .entry.text, "ax"
 
-# define function_hook	__fentry__
-EXPORT_SYMBOL(__fentry__)
-
 #ifdef CONFIG_FRAME_POINTER
 /* Save parent and function stack frames (rip and rbp) */
 #  define MCOUNT_FRAME_SIZE	(8+16*2)
@@ -88,6 +85,7 @@ EXPORT_SYMBOL(__fentry__)
 	movq %rdi, RDI(%rsp)
 	movq %r8, R8(%rsp)
 	movq %r9, R9(%rsp)
+	movq $0, ORIG_RAX(%rsp)
 	/*
 	 * Save the original RBP. Even though the mcount ABI does not
 	 * require this, it helps out callers.
@@ -114,7 +112,11 @@ EXPORT_SYMBOL(__fentry__)
 	subq $MCOUNT_INSN_SIZE, %rdi
 	.endm
 
-.macro restore_mcount_regs
+.macro restore_mcount_regs save=0
+
+	/* ftrace_regs_caller or frame pointers require this */
+	movq RBP(%rsp), %rbp
+
 	movq R9(%rsp), %r9
 	movq R8(%rsp), %r8
 	movq RDI(%rsp), %rdi
@@ -123,31 +125,29 @@ EXPORT_SYMBOL(__fentry__)
 	movq RCX(%rsp), %rcx
 	movq RAX(%rsp), %rax
 
-	/* ftrace_regs_caller can modify %rbp */
-	movq RBP(%rsp), %rbp
-
-	addq $MCOUNT_REG_SIZE, %rsp
+	addq $MCOUNT_REG_SIZE-\save, %rsp
 
 	.endm
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
-ENTRY(function_hook)
+SYM_FUNC_START(__fentry__)
 	retq
-ENDPROC(function_hook)
+SYM_FUNC_END(__fentry__)
+EXPORT_SYMBOL(__fentry__)
 
-ENTRY(ftrace_caller)
+SYM_FUNC_START(ftrace_caller)
 	/* save_mcount_regs fills in first two parameters */
 	save_mcount_regs
 
-GLOBAL(ftrace_caller_op_ptr)
+SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL)
 	/* Load the ftrace_ops into the 3rd parameter */
 	movq function_trace_op(%rip), %rdx
 
 	/* regs go into 4th parameter (but make it NULL) */
 	movq $0, %rcx
 
-GLOBAL(ftrace_call)
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
 	call ftrace_stub
 
 	restore_mcount_regs
@@ -157,10 +157,10 @@ GLOBAL(ftrace_call)
 	 * think twice before adding any new code or changing the
 	 * layout here.
 	 */
-GLOBAL(ftrace_epilogue)
+SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-GLOBAL(ftrace_graph_call)
+SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
 	jmp ftrace_stub
 #endif
 
@@ -168,19 +168,21 @@ GLOBAL(ftrace_graph_call)
  * This is weak to keep gas from relaxing the jumps.
  * It is also used to copy the retq for trampolines.
  */
-WEAK(ftrace_stub)
+SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
 	retq
-ENDPROC(ftrace_caller)
+SYM_FUNC_END(ftrace_caller)
 
-ENTRY(ftrace_regs_caller)
+SYM_FUNC_START(ftrace_regs_caller)
 	/* Save the current flags before any operations that can change them */
 	pushfq
 
+	UNWIND_HINT_SAVE
+
 	/* added 8 bytes to save flags */
 	save_mcount_regs 8
 	/* save_mcount_regs fills in first two parameters */
 
-GLOBAL(ftrace_regs_caller_op_ptr)
+SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
 	/* Load the ftrace_ops into the 3rd parameter */
 	movq function_trace_op(%rip), %rdx
 
@@ -209,7 +211,7 @@ GLOBAL(ftrace_regs_caller_op_ptr)
 	/* regs go into 4th parameter */
 	leaq (%rsp), %rcx
 
-GLOBAL(ftrace_regs_call)
+SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
 	call ftrace_stub
 
 	/* Copy flags back to SS, to restore them */
@@ -228,7 +230,33 @@ GLOBAL(ftrace_regs_call)
 	movq R10(%rsp), %r10
 	movq RBX(%rsp), %rbx
 
-	restore_mcount_regs
+	movq ORIG_RAX(%rsp), %rax
+	movq %rax, MCOUNT_REG_SIZE-8(%rsp)
+
+	/* If ORIG_RAX is anything but zero, make this a call to that */
+	movq ORIG_RAX(%rsp), %rax
+	cmpq	$0, %rax
+	je	1f
+
+	/* Swap the flags with orig_rax */
+	movq MCOUNT_REG_SIZE(%rsp), %rdi
+	movq %rdi, MCOUNT_REG_SIZE-8(%rsp)
+	movq %rax, MCOUNT_REG_SIZE(%rsp)
+
+	restore_mcount_regs 8
+
+	jmp	2f
+
+1:	restore_mcount_regs
+
+
+2:
+	/*
+	 * The stack layout is nondetermistic here, depending on which path was
+	 * taken.  This confuses objtool and ORC, rightfully so.  For now,
+	 * pretend the stack always looks like the non-direct case.
+	 */
+	UNWIND_HINT_RESTORE
 
 	/* Restore flags */
 	popfq
@@ -239,16 +267,16 @@ GLOBAL(ftrace_regs_call)
 	 * The trampoline will add the code to jump
 	 * to the return.
 	 */
-GLOBAL(ftrace_regs_caller_end)
+SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
 
 	jmp ftrace_epilogue
 
-ENDPROC(ftrace_regs_caller)
+SYM_FUNC_END(ftrace_regs_caller)
 
 
 #else /* ! CONFIG_DYNAMIC_FTRACE */
 
-ENTRY(function_hook)
+SYM_FUNC_START(__fentry__)
 	cmpq $ftrace_stub, ftrace_trace_function
 	jnz trace
 
@@ -261,7 +289,7 @@ fgraph_trace:
 	jnz ftrace_graph_caller
 #endif
 
-GLOBAL(ftrace_stub)
+SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL)
 	retq
 
 trace:
@@ -279,11 +307,12 @@ trace:
 	restore_mcount_regs
 
 	jmp fgraph_trace
-ENDPROC(function_hook)
+SYM_FUNC_END(__fentry__)
+EXPORT_SYMBOL(__fentry__)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
+SYM_FUNC_START(ftrace_graph_caller)
 	/* Saves rbp into %rdx and fills first parameter  */
 	save_mcount_regs
 
@@ -294,9 +323,9 @@ ENTRY(ftrace_graph_caller)
 	restore_mcount_regs
 
 	retq
-ENDPROC(ftrace_graph_caller)
+SYM_FUNC_END(ftrace_graph_caller)
 
-ENTRY(return_to_handler)
+SYM_CODE_START(return_to_handler)
 	UNWIND_HINT_EMPTY
 	subq  $24, %rsp
 
@@ -312,5 +341,5 @@ ENTRY(return_to_handler)
 	movq (%rsp), %rax
 	addq $24, %rsp
 	JMP_NOSPEC %rdi
-END(return_to_handler)
+SYM_CODE_END(return_to_handler)
 #endif
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 30f9cb2c0b55..3923ab4630d7 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -64,7 +64,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
  * can.
  */
 __HEAD
-ENTRY(startup_32)
+SYM_CODE_START(startup_32)
 	movl pa(initial_stack),%ecx
 	
 	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
@@ -156,7 +156,7 @@ ENTRY(startup_32)
 	jmp *%eax
 
 .Lbad_subarch:
-WEAK(xen_entry)
+SYM_INNER_LABEL_ALIGN(xen_entry, SYM_L_WEAK)
 	/* Unknown implementation; there's really
 	   nothing we can do at this point. */
 	ud2a
@@ -172,6 +172,7 @@ num_subarch_entries = (. - subarch_entries) / 4
 #else
 	jmp .Ldefault_entry
 #endif /* CONFIG_PARAVIRT */
+SYM_CODE_END(startup_32)
 
 #ifdef CONFIG_HOTPLUG_CPU
 /*
@@ -179,12 +180,12 @@ num_subarch_entries = (. - subarch_entries) / 4
  * up already except stack. We just set up stack here. Then call
  * start_secondary().
  */
-ENTRY(start_cpu0)
+SYM_FUNC_START(start_cpu0)
 	movl initial_stack, %ecx
 	movl %ecx, %esp
 	call *(initial_code)
 1:	jmp 1b
-ENDPROC(start_cpu0)
+SYM_FUNC_END(start_cpu0)
 #endif
 
 /*
@@ -195,7 +196,7 @@ ENDPROC(start_cpu0)
  * If cpu hotplug is not supported then this code can go in init section
  * which will be freed later
  */
-ENTRY(startup_32_smp)
+SYM_FUNC_START(startup_32_smp)
 	cld
 	movl $(__BOOT_DS),%eax
 	movl %eax,%ds
@@ -362,7 +363,7 @@ ENTRY(startup_32_smp)
 
 	call *(initial_code)
 1:	jmp 1b
-ENDPROC(startup_32_smp)
+SYM_FUNC_END(startup_32_smp)
 
 #include "verify_cpu.S"
 
@@ -392,7 +393,7 @@ setup_once:
 	andl $0,setup_once_ref	/* Once is enough, thanks */
 	ret
 
-ENTRY(early_idt_handler_array)
+SYM_FUNC_START(early_idt_handler_array)
 	# 36(%esp) %eflags
 	# 32(%esp) %cs
 	# 28(%esp) %eip
@@ -407,9 +408,9 @@ ENTRY(early_idt_handler_array)
 	i = i + 1
 	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
 	.endr
-ENDPROC(early_idt_handler_array)
+SYM_FUNC_END(early_idt_handler_array)
 	
-early_idt_handler_common:
+SYM_CODE_START_LOCAL(early_idt_handler_common)
 	/*
 	 * The stack is the hardware frame, an error code or zero, and the
 	 * vector number.
@@ -460,10 +461,10 @@ early_idt_handler_common:
 	decl	%ss:early_recursion_flag
 	addl	$4, %esp	/* pop pt_regs->orig_ax */
 	iret
-ENDPROC(early_idt_handler_common)
+SYM_CODE_END(early_idt_handler_common)
 
 /* This is the default interrupt "handler" :-) */
-ENTRY(early_ignore_irq)
+SYM_FUNC_START(early_ignore_irq)
 	cld
 #ifdef CONFIG_PRINTK
 	pushl %eax
@@ -498,19 +499,16 @@ ENTRY(early_ignore_irq)
 hlt_loop:
 	hlt
 	jmp hlt_loop
-ENDPROC(early_ignore_irq)
+SYM_FUNC_END(early_ignore_irq)
 
 __INITDATA
 	.align 4
-GLOBAL(early_recursion_flag)
-	.long 0
+SYM_DATA(early_recursion_flag, .long 0)
 
 __REFDATA
 	.align 4
-ENTRY(initial_code)
-	.long i386_start_kernel
-ENTRY(setup_once_ref)
-	.long setup_once
+SYM_DATA(initial_code,		.long i386_start_kernel)
+SYM_DATA(setup_once_ref,	.long setup_once)
 
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 #define	PGD_ALIGN	(2 * PAGE_SIZE)
@@ -553,7 +551,7 @@ EXPORT_SYMBOL(empty_zero_page)
 __PAGE_ALIGNED_DATA
 	/* Page-aligned for the benefit of paravirt? */
 	.align PGD_ALIGN
-ENTRY(initial_page_table)
+SYM_DATA_START(initial_page_table)
 	.long	pa(initial_pg_pmd+PGD_IDENT_ATTR),0	/* low identity map */
 # if KPMDS == 3
 	.long	pa(initial_pg_pmd+PGD_IDENT_ATTR),0
@@ -571,17 +569,28 @@ ENTRY(initial_page_table)
 #  error "Kernel PMDs should be 1, 2 or 3"
 # endif
 	.align PAGE_SIZE		/* needs to be page-sized too */
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	/*
+	 * PTI needs another page so sync_initial_pagetable() works correctly
+	 * and does not scribble over the data which is placed behind the
+	 * actual initial_page_table. See clone_pgd_range().
+	 */
+	.fill 1024, 4, 0
+#endif
+
+SYM_DATA_END(initial_page_table)
 #endif
 
 .data
 .balign 4
-ENTRY(initial_stack)
-	/*
-	 * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
-	 * unwinder reliably detect the end of the stack.
-	 */
-	.long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS - \
-	      TOP_OF_KERNEL_STACK_PADDING;
+/*
+ * The SIZEOF_PTREGS gap is a convention which helps the in-kernel unwinder
+ * reliably detect the end of the stack.
+ */
+SYM_DATA(initial_stack,
+		.long init_thread_union + THREAD_SIZE -
+		SIZEOF_PTREGS - TOP_OF_KERNEL_STACK_PADDING)
 
 __INITRODATA
 int_msg:
@@ -597,27 +606,28 @@ int_msg:
  */
 
 	.data
-.globl boot_gdt_descr
-
 	ALIGN
 # early boot GDT descriptor (must use 1:1 address mapping)
 	.word 0				# 32 bit align gdt_desc.address
-boot_gdt_descr:
+SYM_DATA_START_LOCAL(boot_gdt_descr)
 	.word __BOOT_DS+7
 	.long boot_gdt - __PAGE_OFFSET
+SYM_DATA_END(boot_gdt_descr)
 
 # boot GDT descriptor (later on used by CPU#0):
 	.word 0				# 32 bit align gdt_desc.address
-ENTRY(early_gdt_descr)
+SYM_DATA_START(early_gdt_descr)
 	.word GDT_ENTRIES*8-1
 	.long gdt_page			/* Overwritten for secondary CPUs */
+SYM_DATA_END(early_gdt_descr)
 
 /*
  * The boot_gdt must mirror the equivalent in setup.S and is
  * used only for booting.
  */
 	.align L1_CACHE_BYTES
-ENTRY(boot_gdt)
+SYM_DATA_START(boot_gdt)
 	.fill GDT_ENTRY_BOOT_CS,8,0
 	.quad 0x00cf9a000000ffff	/* kernel 4GB code at 0x00000000 */
 	.quad 0x00cf92000000ffff	/* kernel 4GB data at 0x00000000 */
+SYM_DATA_END(boot_gdt)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index f3d3e9646a99..4bbc770af632 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -49,8 +49,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
 	.text
 	__HEAD
 	.code64
-	.globl startup_64
-startup_64:
+SYM_CODE_START_NOALIGN(startup_64)
 	UNWIND_HINT_EMPTY
 	/*
 	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
@@ -90,7 +89,9 @@ startup_64:
 	/* Form the CR3 value being sure to include the CR3 modifier */
 	addq	$(early_top_pgt - __START_KERNEL_map), %rax
 	jmp 1f
-ENTRY(secondary_startup_64)
+SYM_CODE_END(startup_64)
+
+SYM_CODE_START(secondary_startup_64)
 	UNWIND_HINT_EMPTY
 	/*
 	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
@@ -240,7 +241,7 @@ ENTRY(secondary_startup_64)
 	pushq	%rax		# target address in negative space
 	lretq
 .Lafter_lret:
-END(secondary_startup_64)
+SYM_CODE_END(secondary_startup_64)
 
 #include "verify_cpu.S"
 
@@ -250,30 +251,28 @@ END(secondary_startup_64)
  * up already except stack. We just set up stack here. Then call
  * start_secondary() via .Ljump_to_C_code.
  */
-ENTRY(start_cpu0)
+SYM_CODE_START(start_cpu0)
 	UNWIND_HINT_EMPTY
 	movq	initial_stack(%rip), %rsp
 	jmp	.Ljump_to_C_code
-END(start_cpu0)
+SYM_CODE_END(start_cpu0)
 #endif
 
 	/* Both SMP bootup and ACPI suspend change these variables */
 	__REFDATA
 	.balign	8
-	GLOBAL(initial_code)
-	.quad	x86_64_start_kernel
-	GLOBAL(initial_gs)
-	.quad	INIT_PER_CPU_VAR(fixed_percpu_data)
-	GLOBAL(initial_stack)
-	/*
-	 * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
-	 * unwinder reliably detect the end of the stack.
-	 */
-	.quad  init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
+SYM_DATA(initial_code,	.quad x86_64_start_kernel)
+SYM_DATA(initial_gs,	.quad INIT_PER_CPU_VAR(fixed_percpu_data))
+
+/*
+ * The SIZEOF_PTREGS gap is a convention which helps the in-kernel unwinder
+ * reliably detect the end of the stack.
+ */
+SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS)
 	__FINITDATA
 
 	__INIT
-ENTRY(early_idt_handler_array)
+SYM_CODE_START(early_idt_handler_array)
 	i = 0
 	.rept NUM_EXCEPTION_VECTORS
 	.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
@@ -289,9 +288,9 @@ ENTRY(early_idt_handler_array)
 	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
 	.endr
 	UNWIND_HINT_IRET_REGS offset=16
-END(early_idt_handler_array)
+SYM_CODE_END(early_idt_handler_array)
 
-early_idt_handler_common:
+SYM_CODE_START_LOCAL(early_idt_handler_common)
 	/*
 	 * The stack is the hardware frame, an error code or zero, and the
 	 * vector number.
@@ -333,17 +332,11 @@ early_idt_handler_common:
 20:
 	decl early_recursion_flag(%rip)
 	jmp restore_regs_and_return_to_kernel
-END(early_idt_handler_common)
+SYM_CODE_END(early_idt_handler_common)
 
-	__INITDATA
 
-	.balign 4
-GLOBAL(early_recursion_flag)
-	.long 0
-
-#define NEXT_PAGE(name) \
-	.balign	PAGE_SIZE; \
-GLOBAL(name)
+#define SYM_DATA_START_PAGE_ALIGNED(name)			\
+	SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
 
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 /*
@@ -358,11 +351,11 @@ GLOBAL(name)
  */
 #define PTI_USER_PGD_FILL	512
 /* This ensures they are 8k-aligned: */
-#define NEXT_PGD_PAGE(name) \
-	.balign 2 * PAGE_SIZE; \
-GLOBAL(name)
+#define SYM_DATA_START_PTI_ALIGNED(name) \
+	SYM_START(name, SYM_L_GLOBAL, .balign 2 * PAGE_SIZE)
 #else
-#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
+#define SYM_DATA_START_PTI_ALIGNED(name) \
+	SYM_DATA_START_PAGE_ALIGNED(name)
 #define PTI_USER_PGD_FILL	0
 #endif
 
@@ -375,17 +368,23 @@ GLOBAL(name)
 	.endr
 
 	__INITDATA
-NEXT_PGD_PAGE(early_top_pgt)
+	.balign 4
+
+SYM_DATA_START_PTI_ALIGNED(early_top_pgt)
 	.fill	512,8,0
 	.fill	PTI_USER_PGD_FILL,8,0
+SYM_DATA_END(early_top_pgt)
 
-NEXT_PAGE(early_dynamic_pgts)
+SYM_DATA_START_PAGE_ALIGNED(early_dynamic_pgts)
 	.fill	512*EARLY_DYNAMIC_PAGE_TABLES,8,0
+SYM_DATA_END(early_dynamic_pgts)
+
+SYM_DATA(early_recursion_flag, .long 0)
 
 	.data
 
 #if defined(CONFIG_XEN_PV) || defined(CONFIG_PVH)
-NEXT_PGD_PAGE(init_top_pgt)
+SYM_DATA_START_PTI_ALIGNED(init_top_pgt)
 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
 	.org    init_top_pgt + L4_PAGE_OFFSET*8, 0
 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
@@ -393,11 +392,13 @@ NEXT_PGD_PAGE(init_top_pgt)
 	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
 	.quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 	.fill	PTI_USER_PGD_FILL,8,0
+SYM_DATA_END(init_top_pgt)
 
-NEXT_PAGE(level3_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level3_ident_pgt)
 	.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
 	.fill	511, 8, 0
-NEXT_PAGE(level2_ident_pgt)
+SYM_DATA_END(level3_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level2_ident_pgt)
 	/*
 	 * Since I easily can, map the first 1G.
 	 * Don't set NX because code runs from these pages.
@@ -407,25 +408,29 @@ NEXT_PAGE(level2_ident_pgt)
 	 * the CPU should ignore the bit.
 	 */
 	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+SYM_DATA_END(level2_ident_pgt)
 #else
-NEXT_PGD_PAGE(init_top_pgt)
+SYM_DATA_START_PTI_ALIGNED(init_top_pgt)
 	.fill	512,8,0
 	.fill	PTI_USER_PGD_FILL,8,0
+SYM_DATA_END(init_top_pgt)
 #endif
 
 #ifdef CONFIG_X86_5LEVEL
-NEXT_PAGE(level4_kernel_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt)
 	.fill	511,8,0
 	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+SYM_DATA_END(level4_kernel_pgt)
 #endif
 
-NEXT_PAGE(level3_kernel_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level3_kernel_pgt)
 	.fill	L3_START_KERNEL,8,0
 	/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
 	.quad	level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
 	.quad	level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+SYM_DATA_END(level3_kernel_pgt)
 
-NEXT_PAGE(level2_kernel_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level2_kernel_pgt)
 	/*
 	 * 512 MB kernel mapping. We spend a full page on this pagetable
 	 * anyway.
@@ -442,8 +447,9 @@ NEXT_PAGE(level2_kernel_pgt)
 	 */
 	PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
 		KERNEL_IMAGE_SIZE/PMD_SIZE)
+SYM_DATA_END(level2_kernel_pgt)
 
-NEXT_PAGE(level2_fixmap_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level2_fixmap_pgt)
 	.fill	(512 - 4 - FIXMAP_PMD_NUM),8,0
 	pgtno = 0
 	.rept (FIXMAP_PMD_NUM)
@@ -453,31 +459,32 @@ NEXT_PAGE(level2_fixmap_pgt)
 	.endr
 	/* 6 MB reserved space + a 2MB hole */
 	.fill	4,8,0
+SYM_DATA_END(level2_fixmap_pgt)
 
-NEXT_PAGE(level1_fixmap_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt)
 	.rept (FIXMAP_PMD_NUM)
 	.fill	512,8,0
 	.endr
+SYM_DATA_END(level1_fixmap_pgt)
 
 #undef PMDS
 
 	.data
 	.align 16
-	.globl early_gdt_descr
-early_gdt_descr:
-	.word	GDT_ENTRIES*8-1
-early_gdt_descr_base:
-	.quad	INIT_PER_CPU_VAR(gdt_page)
-
-ENTRY(phys_base)
-	/* This must match the first entry in level2_kernel_pgt */
-	.quad   0x0000000000000000
+
+SYM_DATA(early_gdt_descr,		.word GDT_ENTRIES*8-1)
+SYM_DATA_LOCAL(early_gdt_descr_base,	.quad INIT_PER_CPU_VAR(gdt_page))
+
+	.align 16
+/* This must match the first entry in level2_kernel_pgt */
+SYM_DATA(phys_base, .quad 0x0)
 EXPORT_SYMBOL(phys_base)
 
 #include "../../x86/xen/xen-head.S"
 
 	__PAGE_ALIGNED_BSS
-NEXT_PAGE(empty_zero_page)
+SYM_DATA_START_PAGE_ALIGNED(empty_zero_page)
 	.skip PAGE_SIZE
+SYM_DATA_END(empty_zero_page)
 EXPORT_SYMBOL(empty_zero_page)
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index c6f791bc481e..7a50f0b62a70 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -84,7 +84,7 @@ static inline void hpet_writel(unsigned int d, unsigned int a)
 
 static inline void hpet_set_mapping(void)
 {
-	hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+	hpet_virt_address = ioremap(hpet_address, HPET_MMAP_SIZE);
 }
 
 static inline void hpet_clear_mapping(void)
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 61a89d3c0382..8abeee0dd7bf 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -3,32 +3,69 @@
  * This contains the io-permission bitmap code - written by obz, with changes
  * by Linus. 32/64 bits code unification by Miguel Botón.
  */
-
-#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
-#include <linux/kernel.h>
 #include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
 #include <linux/security.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/thread_info.h>
 #include <linux/syscalls.h>
 #include <linux/bitmap.h>
-#include <asm/syscalls.h>
+#include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/io_bitmap.h>
 #include <asm/desc.h>
 
+#ifdef CONFIG_X86_IOPL_IOPERM
+
+static atomic64_t io_bitmap_sequence;
+
+void io_bitmap_share(struct task_struct *tsk)
+{
+	/* Can be NULL when current->thread.iopl_emul == 3 */
+	if (current->thread.io_bitmap) {
+		/*
+		 * Take a refcount on current's bitmap. It can be used by
+		 * both tasks as long as none of them changes the bitmap.
+		 */
+		refcount_inc(&current->thread.io_bitmap->refcnt);
+		tsk->thread.io_bitmap = current->thread.io_bitmap;
+	}
+	set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
+}
+
+static void task_update_io_bitmap(void)
+{
+	struct thread_struct *t = &current->thread;
+
+	if (t->iopl_emul == 3 || t->io_bitmap) {
+		/* TSS update is handled on exit to user space */
+		set_thread_flag(TIF_IO_BITMAP);
+	} else {
+		clear_thread_flag(TIF_IO_BITMAP);
+		/* Invalidate TSS */
+		preempt_disable();
+		tss_update_io_bitmap();
+		preempt_enable();
+	}
+}
+
+void io_bitmap_exit(void)
+{
+	struct io_bitmap *iobm = current->thread.io_bitmap;
+
+	current->thread.io_bitmap = NULL;
+	task_update_io_bitmap();
+	if (iobm && refcount_dec_and_test(&iobm->refcnt))
+		kfree(iobm);
+}
+
 /*
- * this changes the io permissions bitmap in the current task.
+ * This changes the io permissions bitmap in the current task.
  */
 long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
 {
 	struct thread_struct *t = &current->thread;
-	struct tss_struct *tss;
-	unsigned int i, max_long, bytes, bytes_updated;
+	unsigned int i, max_long;
+	struct io_bitmap *iobm;
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
@@ -41,59 +78,72 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
 	 * IO bitmap up. ioperm() is much less timing critical than clone(),
 	 * this is why we delay this operation until now:
 	 */
-	if (!t->io_bitmap_ptr) {
-		unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
-
-		if (!bitmap)
+	iobm = t->io_bitmap;
+	if (!iobm) {
+		/* No point to allocate a bitmap just to clear permissions */
+		if (!turn_on)
+			return 0;
+		iobm = kmalloc(sizeof(*iobm), GFP_KERNEL);
+		if (!iobm)
 			return -ENOMEM;
 
-		memset(bitmap, 0xff, IO_BITMAP_BYTES);
-		t->io_bitmap_ptr = bitmap;
-		set_thread_flag(TIF_IO_BITMAP);
+		memset(iobm->bitmap, 0xff, sizeof(iobm->bitmap));
+		refcount_set(&iobm->refcnt, 1);
+	}
 
-		/*
-		 * Now that we have an IO bitmap, we need our TSS limit to be
-		 * correct.  It's fine if we are preempted after doing this:
-		 * with TIF_IO_BITMAP set, context switches will keep our TSS
-		 * limit correct.
-		 */
-		preempt_disable();
-		refresh_tss_limit();
-		preempt_enable();
+	/*
+	 * If the bitmap is not shared, then nothing can take a refcount as
+	 * current can obviously not fork at the same time. If it's shared
+	 * duplicate it and drop the refcount on the original one.
+	 */
+	if (refcount_read(&iobm->refcnt) > 1) {
+		iobm = kmemdup(iobm, sizeof(*iobm), GFP_KERNEL);
+		if (!iobm)
+			return -ENOMEM;
+		refcount_set(&iobm->refcnt, 1);
+		io_bitmap_exit();
 	}
 
 	/*
-	 * do it in the per-thread copy and in the TSS ...
-	 *
-	 * Disable preemption via get_cpu() - we must not switch away
-	 * because the ->io_bitmap_max value must match the bitmap
-	 * contents:
+	 * Store the bitmap pointer (might be the same if the task already
+	 * head one). Must be done here so freeing the bitmap when all
+	 * permissions are dropped has the pointer set up.
 	 */
-	tss = &per_cpu(cpu_tss_rw, get_cpu());
+	t->io_bitmap = iobm;
+	/* Mark it active for context switching and exit to user mode */
+	set_thread_flag(TIF_IO_BITMAP);
 
+	/*
+	 * Update the tasks bitmap. The update of the TSS bitmap happens on
+	 * exit to user mode. So this needs no protection.
+	 */
 	if (turn_on)
-		bitmap_clear(t->io_bitmap_ptr, from, num);
+		bitmap_clear(iobm->bitmap, from, num);
 	else
-		bitmap_set(t->io_bitmap_ptr, from, num);
+		bitmap_set(iobm->bitmap, from, num);
 
 	/*
 	 * Search for a (possibly new) maximum. This is simple and stupid,
 	 * to keep it obviously correct:
 	 */
-	max_long = 0;
-	for (i = 0; i < IO_BITMAP_LONGS; i++)
-		if (t->io_bitmap_ptr[i] != ~0UL)
+	max_long = UINT_MAX;
+	for (i = 0; i < IO_BITMAP_LONGS; i++) {
+		if (iobm->bitmap[i] != ~0UL)
 			max_long = i;
+	}
+	/* All permissions dropped? */
+	if (max_long == UINT_MAX) {
+		io_bitmap_exit();
+		return 0;
+	}
 
-	bytes = (max_long + 1) * sizeof(unsigned long);
-	bytes_updated = max(bytes, t->io_bitmap_max);
-
-	t->io_bitmap_max = bytes;
-
-	/* Update the TSS: */
-	memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+	iobm->max = (max_long + 1) * sizeof(unsigned long);
 
-	put_cpu();
+	/*
+	 * Update the sequence number to force a TSS update on return to
+	 * user mode.
+	 */
+	iobm->sequence = atomic64_add_return(1, &io_bitmap_sequence);
 
 	return 0;
 }
@@ -104,38 +154,61 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
 }
 
 /*
- * sys_iopl has to be used when you want to access the IO ports
- * beyond the 0x3ff range: to get the full 65536 ports bitmapped
- * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ * The sys_iopl functionality depends on the level argument, which if
+ * granted for the task is used to enable access to all 65536 I/O ports.
+ *
+ * This does not use the IOPL mechanism provided by the CPU as that would
+ * also allow the user space task to use the CLI/STI instructions.
  *
- * Here we just change the flags value on the stack: we allow
- * only the super-user to do it. This depends on the stack-layout
- * on system-call entry - see also fork() and the signal handling
- * code.
+ * Disabling interrupts in a user space task is dangerous as it might lock
+ * up the machine and the semantics vs. syscalls and exceptions is
+ * undefined.
+ *
+ * Setting IOPL to level 0-2 is disabling I/O permissions. Level 3
+ * 3 enables them.
+ *
+ * IOPL is strictly per thread and inherited on fork.
  */
 SYSCALL_DEFINE1(iopl, unsigned int, level)
 {
-	struct pt_regs *regs = current_pt_regs();
 	struct thread_struct *t = &current->thread;
-
-	/*
-	 * Careful: the IOPL bits in regs->flags are undefined under Xen PV
-	 * and changing them has no effect.
-	 */
-	unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT;
+	unsigned int old;
 
 	if (level > 3)
 		return -EINVAL;
+
+	old = t->iopl_emul;
+
+	/* No point in going further if nothing changes */
+	if (level == old)
+		return 0;
+
 	/* Trying to gain more privileges? */
 	if (level > old) {
 		if (!capable(CAP_SYS_RAWIO) ||
 		    security_locked_down(LOCKDOWN_IOPORT))
 			return -EPERM;
 	}
-	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
-		(level << X86_EFLAGS_IOPL_BIT);
-	t->iopl = level << X86_EFLAGS_IOPL_BIT;
-	set_iopl_mask(t->iopl);
+
+	t->iopl_emul = level;
+	task_update_io_bitmap();
 
 	return 0;
 }
+
+#else /* CONFIG_X86_IOPL_IOPERM */
+
+long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+	return -ENOSYS;
+}
+SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
+{
+	return -ENOSYS;
+}
+
+SYSCALL_DEFINE1(iopl, unsigned int, level)
+{
+	return -ENOSYS;
+}
+#endif
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
index ddeeaac8adda..0db0375235b4 100644
--- a/arch/x86/kernel/irqflags.S
+++ b/arch/x86/kernel/irqflags.S
@@ -7,20 +7,20 @@
 /*
  * unsigned long native_save_fl(void)
  */
-ENTRY(native_save_fl)
+SYM_FUNC_START(native_save_fl)
 	pushf
 	pop %_ASM_AX
 	ret
-ENDPROC(native_save_fl)
+SYM_FUNC_END(native_save_fl)
 EXPORT_SYMBOL(native_save_fl)
 
 /*
  * void native_restore_fl(unsigned long flags)
  * %eax/%rdi: flags
  */
-ENTRY(native_restore_fl)
+SYM_FUNC_START(native_restore_fl)
 	push %_ASM_ARG1
 	popf
 	ret
-ENDPROC(native_restore_fl)
+SYM_FUNC_END(native_restore_fl)
 EXPORT_SYMBOL(native_restore_fl)
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index 3ad34f01de2a..6eb8b50ea07e 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -11,6 +11,7 @@
 #include <linux/acpi_pmtmr.h>
 #include <linux/kernel.h>
 #include <linux/reboot.h>
+#include <linux/serial_8250.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
 #include <asm/hypervisor.h>
@@ -21,9 +22,24 @@
 #include <asm/setup.h>
 #include <asm/jailhouse_para.h>
 
-static __initdata struct jailhouse_setup_data setup_data;
+static struct jailhouse_setup_data setup_data;
+#define SETUP_DATA_V1_LEN	(sizeof(setup_data.hdr) + sizeof(setup_data.v1))
+#define SETUP_DATA_V2_LEN	(SETUP_DATA_V1_LEN + sizeof(setup_data.v2))
+
 static unsigned int precalibrated_tsc_khz;
 
+static void jailhouse_setup_irq(unsigned int irq)
+{
+	struct mpc_intsrc mp_irq = {
+		.type		= MP_INTSRC,
+		.irqtype	= mp_INT,
+		.irqflag	= MP_IRQPOL_ACTIVE_HIGH | MP_IRQTRIG_EDGE,
+		.srcbusirq	= irq,
+		.dstirq		= irq,
+	};
+	mp_save_irq(&mp_irq);
+}
+
 static uint32_t jailhouse_cpuid_base(void)
 {
 	if (boot_cpu_data.cpuid_level < 0 ||
@@ -45,7 +61,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now)
 
 static void __init jailhouse_timer_init(void)
 {
-	lapic_timer_period = setup_data.apic_khz * (1000 / HZ);
+	lapic_timer_period = setup_data.v1.apic_khz * (1000 / HZ);
 }
 
 static unsigned long jailhouse_get_tsc(void)
@@ -77,33 +93,28 @@ static void __init jailhouse_get_smp_config(unsigned int early)
 		.type = IOAPIC_DOMAIN_STRICT,
 		.ops = &mp_ioapic_irqdomain_ops,
 	};
-	struct mpc_intsrc mp_irq = {
-		.type = MP_INTSRC,
-		.irqtype = mp_INT,
-		.irqflag = MP_IRQPOL_ACTIVE_HIGH | MP_IRQTRIG_EDGE,
-	};
 	unsigned int cpu;
 
 	jailhouse_x2apic_init();
 
 	register_lapic_address(0xfee00000);
 
-	for (cpu = 0; cpu < setup_data.num_cpus; cpu++) {
-		generic_processor_info(setup_data.cpu_ids[cpu],
+	for (cpu = 0; cpu < setup_data.v1.num_cpus; cpu++) {
+		generic_processor_info(setup_data.v1.cpu_ids[cpu],
 				       boot_cpu_apic_version);
 	}
 
 	smp_found_config = 1;
 
-	if (setup_data.standard_ioapic) {
+	if (setup_data.v1.standard_ioapic) {
 		mp_register_ioapic(0, 0xfec00000, gsi_top, &ioapic_cfg);
 
-		/* Register 1:1 mapping for legacy UART IRQs 3 and 4 */
-		mp_irq.srcbusirq = mp_irq.dstirq = 3;
-		mp_save_irq(&mp_irq);
-
-		mp_irq.srcbusirq = mp_irq.dstirq = 4;
-		mp_save_irq(&mp_irq);
+		if (IS_ENABLED(CONFIG_SERIAL_8250) &&
+		    setup_data.hdr.version < 2) {
+			/* Register 1:1 mapping for legacy UART IRQs 3 and 4 */
+			jailhouse_setup_irq(3);
+			jailhouse_setup_irq(4);
+		}
 	}
 }
 
@@ -126,9 +137,9 @@ static int __init jailhouse_pci_arch_init(void)
 		pcibios_last_bus = 0xff;
 
 #ifdef CONFIG_PCI_MMCONFIG
-	if (setup_data.pci_mmconfig_base) {
+	if (setup_data.v1.pci_mmconfig_base) {
 		pci_mmconfig_add(0, 0, pcibios_last_bus,
-				 setup_data.pci_mmconfig_base);
+				 setup_data.v1.pci_mmconfig_base);
 		pci_mmcfg_arch_init();
 	}
 #endif
@@ -136,9 +147,57 @@ static int __init jailhouse_pci_arch_init(void)
 	return 0;
 }
 
+#ifdef CONFIG_SERIAL_8250
+static inline bool jailhouse_uart_enabled(unsigned int uart_nr)
+{
+	return setup_data.v2.flags & BIT(uart_nr);
+}
+
+static void jailhouse_serial_fixup(int port, struct uart_port *up,
+				   u32 *capabilities)
+{
+	static const u16 pcuart_base[] = {0x3f8, 0x2f8, 0x3e8, 0x2e8};
+	unsigned int n;
+
+	for (n = 0; n < ARRAY_SIZE(pcuart_base); n++) {
+		if (pcuart_base[n] != up->iobase)
+			continue;
+
+		if (jailhouse_uart_enabled(n)) {
+			pr_info("Enabling UART%u (port 0x%lx)\n", n,
+				up->iobase);
+			jailhouse_setup_irq(up->irq);
+		} else {
+			/* Deactivate UART if access isn't allowed */
+			up->iobase = 0;
+		}
+		break;
+	}
+}
+
+static void __init jailhouse_serial_workaround(void)
+{
+	/*
+	 * There are flags inside setup_data that indicate availability of
+	 * platform UARTs since setup data version 2.
+	 *
+	 * In case of version 1, we don't know which UARTs belong Linux. In
+	 * this case, unconditionally register 1:1 mapping for legacy UART IRQs
+	 * 3 and 4.
+	 */
+	if (setup_data.hdr.version > 1)
+		serial8250_set_isa_configurator(jailhouse_serial_fixup);
+}
+#else /* !CONFIG_SERIAL_8250 */
+static inline void jailhouse_serial_workaround(void)
+{
+}
+#endif /* CONFIG_SERIAL_8250 */
+
 static void __init jailhouse_init_platform(void)
 {
 	u64 pa_data = boot_params.hdr.setup_data;
+	unsigned long setup_data_len;
 	struct setup_data header;
 	void *mapping;
 
@@ -163,16 +222,8 @@ static void __init jailhouse_init_platform(void)
 		memcpy(&header, mapping, sizeof(header));
 		early_memunmap(mapping, sizeof(header));
 
-		if (header.type == SETUP_JAILHOUSE &&
-		    header.len >= sizeof(setup_data)) {
-			pa_data += offsetof(struct setup_data, data);
-
-			mapping = early_memremap(pa_data, sizeof(setup_data));
-			memcpy(&setup_data, mapping, sizeof(setup_data));
-			early_memunmap(mapping, sizeof(setup_data));
-
+		if (header.type == SETUP_JAILHOUSE)
 			break;
-		}
 
 		pa_data = header.next;
 	}
@@ -180,13 +231,28 @@ static void __init jailhouse_init_platform(void)
 	if (!pa_data)
 		panic("Jailhouse: No valid setup data found");
 
-	if (setup_data.compatible_version > JAILHOUSE_SETUP_REQUIRED_VERSION)
-		panic("Jailhouse: Unsupported setup data structure");
-
-	pmtmr_ioport = setup_data.pm_timer_address;
+	/* setup data must at least contain the header */
+	if (header.len < sizeof(setup_data.hdr))
+		goto unsupported;
+
+	pa_data += offsetof(struct setup_data, data);
+	setup_data_len = min_t(unsigned long, sizeof(setup_data),
+			       (unsigned long)header.len);
+	mapping = early_memremap(pa_data, setup_data_len);
+	memcpy(&setup_data, mapping, setup_data_len);
+	early_memunmap(mapping, setup_data_len);
+
+	if (setup_data.hdr.version == 0 ||
+	    setup_data.hdr.compatible_version !=
+		JAILHOUSE_SETUP_REQUIRED_VERSION ||
+	    (setup_data.hdr.version == 1 && header.len < SETUP_DATA_V1_LEN) ||
+	    (setup_data.hdr.version >= 2 && header.len < SETUP_DATA_V2_LEN))
+		goto unsupported;
+
+	pmtmr_ioport = setup_data.v1.pm_timer_address;
 	pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport);
 
-	precalibrated_tsc_khz = setup_data.tsc_khz;
+	precalibrated_tsc_khz = setup_data.v1.tsc_khz;
 	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
 
 	pci_probe = 0;
@@ -196,6 +262,12 @@ static void __init jailhouse_init_platform(void)
 	 * are none in a non-root cell.
 	 */
 	disable_acpi();
+
+	jailhouse_serial_workaround();
+	return;
+
+unsupported:
+	panic("Jailhouse: Unsupported setup data structure");
 }
 
 bool jailhouse_paravirt(void)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 044053235302..9c4498ea0b3c 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -16,15 +16,7 @@
 #include <asm/alternative.h>
 #include <asm/text-patching.h>
 
-union jump_code_union {
-	char code[JUMP_LABEL_NOP_SIZE];
-	struct {
-		char jump;
-		int offset;
-	} __attribute__((packed));
-};
-
-static void bug_at(unsigned char *ip, int line)
+static void bug_at(const void *ip, int line)
 {
 	/*
 	 * The location is not an op that we were expecting.
@@ -35,42 +27,42 @@ static void bug_at(unsigned char *ip, int line)
 	BUG();
 }
 
-static void __jump_label_set_jump_code(struct jump_entry *entry,
-				       enum jump_label_type type,
-				       union jump_code_union *code,
-				       int init)
+static const void *
+__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init)
 {
 	const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
 	const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
-	const void *expect;
+	const void *expect, *code;
+	const void *addr, *dest;
 	int line;
 
-	code->jump = 0xe9;
-	code->offset = jump_entry_target(entry) -
-		       (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+	addr = (void *)jump_entry_code(entry);
+	dest = (void *)jump_entry_target(entry);
+
+	code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
 
 	if (init) {
 		expect = default_nop; line = __LINE__;
 	} else if (type == JUMP_LABEL_JMP) {
 		expect = ideal_nop; line = __LINE__;
 	} else {
-		expect = code->code; line = __LINE__;
+		expect = code; line = __LINE__;
 	}
 
-	if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
-		bug_at((void *)jump_entry_code(entry), line);
+	if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE))
+		bug_at(addr, line);
 
 	if (type == JUMP_LABEL_NOP)
-		memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE);
+		code = ideal_nop;
+
+	return code;
 }
 
-static void __ref __jump_label_transform(struct jump_entry *entry,
-					 enum jump_label_type type,
-					 int init)
+static void inline __jump_label_transform(struct jump_entry *entry,
+					  enum jump_label_type type,
+					  int init)
 {
-	union jump_code_union code;
-
-	__jump_label_set_jump_code(entry, type, &code, init);
+	const void *opcode = __jump_label_set_jump_code(entry, type, init);
 
 	/*
 	 * As long as only a single processor is running and the code is still
@@ -84,32 +76,33 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
 	 * always nop being the 'currently valid' instruction
 	 */
 	if (init || system_state == SYSTEM_BOOTING) {
-		text_poke_early((void *)jump_entry_code(entry), &code,
+		text_poke_early((void *)jump_entry_code(entry), opcode,
 				JUMP_LABEL_NOP_SIZE);
 		return;
 	}
 
-	text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
-		     (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+	text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL);
 }
 
-void arch_jump_label_transform(struct jump_entry *entry,
-			       enum jump_label_type type)
+static void __ref jump_label_transform(struct jump_entry *entry,
+				       enum jump_label_type type,
+				       int init)
 {
 	mutex_lock(&text_mutex);
-	__jump_label_transform(entry, type, 0);
+	__jump_label_transform(entry, type, init);
 	mutex_unlock(&text_mutex);
 }
 
-#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
-static struct text_poke_loc tp_vec[TP_VEC_MAX];
-static int tp_vec_nr;
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	jump_label_transform(entry, type, 0);
+}
 
 bool arch_jump_label_transform_queue(struct jump_entry *entry,
 				     enum jump_label_type type)
 {
-	struct text_poke_loc *tp;
-	void *entry_code;
+	const void *opcode;
 
 	if (system_state == SYSTEM_BOOTING) {
 		/*
@@ -119,55 +112,19 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
 		return true;
 	}
 
-	/*
-	 * No more space in the vector, tell upper layer to apply
-	 * the queue before continuing.
-	 */
-	if (tp_vec_nr == TP_VEC_MAX)
-		return false;
-
-	tp = &tp_vec[tp_vec_nr];
-
-	entry_code = (void *)jump_entry_code(entry);
-
-	/*
-	 * The INT3 handler will do a bsearch in the queue, so we need entries
-	 * to be sorted. We can survive an unsorted list by rejecting the entry,
-	 * forcing the generic jump_label code to apply the queue. Warning once,
-	 * to raise the attention to the case of an unsorted entry that is
-	 * better not happen, because, in the worst case we will perform in the
-	 * same way as we do without batching - with some more overhead.
-	 */
-	if (tp_vec_nr > 0) {
-		int prev = tp_vec_nr - 1;
-		struct text_poke_loc *prev_tp = &tp_vec[prev];
-
-		if (WARN_ON_ONCE(prev_tp->addr > entry_code))
-			return false;
-	}
-
-	__jump_label_set_jump_code(entry, type,
-				   (union jump_code_union *) &tp->opcode, 0);
-
-	tp->addr = entry_code;
-	tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
-	tp->len = JUMP_LABEL_NOP_SIZE;
-
-	tp_vec_nr++;
-
+	mutex_lock(&text_mutex);
+	opcode = __jump_label_set_jump_code(entry, type, 0);
+	text_poke_queue((void *)jump_entry_code(entry),
+			opcode, JUMP_LABEL_NOP_SIZE, NULL);
+	mutex_unlock(&text_mutex);
 	return true;
 }
 
 void arch_jump_label_transform_apply(void)
 {
-	if (!tp_vec_nr)
-		return;
-
 	mutex_lock(&text_mutex);
-	text_poke_bp_batch(tp_vec, tp_vec_nr);
+	text_poke_finish();
 	mutex_unlock(&text_mutex);
-
-	tp_vec_nr = 0;
 }
 
 static enum {
@@ -196,5 +153,5 @@ __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
 			jlstate = JL_STATE_NO_UPDATE;
 	}
 	if (jlstate == JL_STATE_UPDATE)
-		__jump_label_transform(entry, type, 1);
+		jump_label_transform(entry, type, 1);
 }
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index edaa30b20841..64b6da95af98 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -44,7 +44,12 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf,
 	if (count > node->len - pos)
 		count = node->len - pos;
 
-	pa = node->paddr + sizeof(struct setup_data) + pos;
+	pa = node->paddr + pos;
+
+	/* Is it direct data or invalid indirect one? */
+	if (!(node->type & SETUP_INDIRECT) || node->type == SETUP_INDIRECT)
+		pa += sizeof(struct setup_data);
+
 	p = memremap(pa, count, MEMREMAP_WB);
 	if (!p)
 		return -ENOMEM;
@@ -108,9 +113,17 @@ static int __init create_setup_data_nodes(struct dentry *parent)
 			goto err_dir;
 		}
 
-		node->paddr = pa_data;
-		node->type = data->type;
-		node->len = data->len;
+		if (data->type == SETUP_INDIRECT &&
+		    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+			node->paddr = ((struct setup_indirect *)data->data)->addr;
+			node->type  = ((struct setup_indirect *)data->data)->type;
+			node->len   = ((struct setup_indirect *)data->data)->len;
+		} else {
+			node->paddr = pa_data;
+			node->type  = data->type;
+			node->len   = data->len;
+		}
+
 		create_setup_data_node(d, no, node);
 		pa_data = data->next;
 
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index d2f4e706a428..f293d872602a 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -177,7 +177,7 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
 	 * acpi_rsdp=<addr> on kernel command line to make second kernel boot
 	 * without efi.
 	 */
-	if (efi_enabled(EFI_OLD_MEMMAP))
+	if (efi_have_uv1_memmap())
 		return 0;
 
 	params->secure_boot = boot_params.secure_boot;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 43fc13c831af..4d7022a740ab 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -40,6 +40,7 @@
 #include <linux/frame.h>
 #include <linux/kasan.h>
 #include <linux/moduleloader.h>
+#include <linux/vmalloc.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -119,14 +120,14 @@ __synthesize_relative_insn(void *dest, void *from, void *to, u8 op)
 /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
 void synthesize_reljump(void *dest, void *from, void *to)
 {
-	__synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE);
+	__synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE);
 }
 NOKPROBE_SYMBOL(synthesize_reljump);
 
 /* Insert a call instruction at address 'from', which calls address 'to'.*/
 void synthesize_relcall(void *dest, void *from, void *to)
 {
-	__synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE);
+	__synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE);
 }
 NOKPROBE_SYMBOL(synthesize_relcall);
 
@@ -301,7 +302,7 @@ static int can_probe(unsigned long paddr)
 		 * Another debugging subsystem might insert this breakpoint.
 		 * In that case, we can't recover it.
 		 */
-		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
 			return 0;
 		addr += insn.length;
 	}
@@ -351,8 +352,12 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
 	kernel_insn_init(insn, dest, MAX_INSN_SIZE);
 	insn_get_length(insn);
 
+	/* We can not probe force emulate prefixed instruction */
+	if (insn_has_emulate_prefix(insn))
+		return 0;
+
 	/* Another subsystem puts a breakpoint, failed to recover */
-	if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+	if (insn->opcode.bytes[0] == INT3_INSN_OPCODE)
 		return 0;
 
 	/* We should not singlestep on the exception masking instructions */
@@ -396,14 +401,14 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
 	int len = insn->length;
 
 	if (can_boost(insn, p->addr) &&
-	    MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) {
+	    MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) {
 		/*
 		 * These instructions can be executed directly if it
 		 * jumps back to correct address.
 		 */
 		synthesize_reljump(buf + len, p->ainsn.insn + len,
 				   p->addr + insn->length);
-		len += RELATIVEJUMP_SIZE;
+		len += JMP32_INSN_SIZE;
 		p->ainsn.boostable = true;
 	} else {
 		p->ainsn.boostable = false;
@@ -497,12 +502,14 @@ int arch_prepare_kprobe(struct kprobe *p)
 
 void arch_arm_kprobe(struct kprobe *p)
 {
-	text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
+	text_poke(p->addr, ((unsigned char []){INT3_INSN_OPCODE}), 1);
+	text_poke_sync();
 }
 
 void arch_disarm_kprobe(struct kprobe *p)
 {
 	text_poke(p->addr, &p->opcode, 1);
+	text_poke_sync();
 }
 
 void arch_remove_kprobe(struct kprobe *p)
@@ -605,7 +612,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
 	regs->flags |= X86_EFLAGS_TF;
 	regs->flags &= ~X86_EFLAGS_IF;
 	/* single step inline if the instruction is an int3 */
-	if (p->opcode == BREAKPOINT_INSTRUCTION)
+	if (p->opcode == INT3_INSN_OPCODE)
 		regs->ip = (unsigned long)p->addr;
 	else
 		regs->ip = (unsigned long)p->ainsn.insn;
@@ -691,7 +698,7 @@ int kprobe_int3_handler(struct pt_regs *regs)
 				reset_current_kprobe();
 			return 1;
 		}
-	} else if (*addr != BREAKPOINT_INSTRUCTION) {
+	} else if (*addr != INT3_INSN_OPCODE) {
 		/*
 		 * The breakpoint instruction was removed right
 		 * after we hit it.  Another cpu has removed
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index b348dd506d58..3f45b5c43a71 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -38,7 +38,7 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
 	long offs;
 	int i;
 
-	for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
+	for (i = 0; i < JMP32_INSN_SIZE; i++) {
 		kp = get_kprobe((void *)addr - i);
 		/* This function only handles jump-optimized kprobe */
 		if (kp && kprobe_optimized(kp)) {
@@ -62,10 +62,10 @@ found:
 
 	if (addr == (unsigned long)kp->addr) {
 		buf[0] = kp->opcode;
-		memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+		memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);
 	} else {
 		offs = addr - (unsigned long)kp->addr - 1;
-		memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
+		memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);
 	}
 
 	return (unsigned long)buf;
@@ -141,8 +141,6 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func);
 #define TMPL_END_IDX \
 	((long)optprobe_template_end - (long)optprobe_template_entry)
 
-#define INT3_SIZE sizeof(kprobe_opcode_t)
-
 /* Optimized kprobe call back function: called from optinsn */
 static void
 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
@@ -162,7 +160,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 		regs->cs |= get_kernel_rpl();
 		regs->gs = 0;
 #endif
-		regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
+		regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
 		regs->orig_ax = ~0UL;
 
 		__this_cpu_write(current_kprobe, &op->kp);
@@ -179,7 +177,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
 	struct insn insn;
 	int len = 0, ret;
 
-	while (len < RELATIVEJUMP_SIZE) {
+	while (len < JMP32_INSN_SIZE) {
 		ret = __copy_instruction(dest + len, src + len, real + len, &insn);
 		if (!ret || !can_boost(&insn, src + len))
 			return -EINVAL;
@@ -271,7 +269,7 @@ static int can_optimize(unsigned long paddr)
 		return 0;
 
 	/* Check there is enough space for a relative jump. */
-	if (size - offset < RELATIVEJUMP_SIZE)
+	if (size - offset < JMP32_INSN_SIZE)
 		return 0;
 
 	/* Decode instructions */
@@ -290,15 +288,15 @@ static int can_optimize(unsigned long paddr)
 		kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
 		insn_get_length(&insn);
 		/* Another subsystem puts a breakpoint */
-		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
 			return 0;
 		/* Recover address */
 		insn.kaddr = (void *)addr;
 		insn.next_byte = (void *)(addr + insn.length);
 		/* Check any instructions don't jump into target */
 		if (insn_is_indirect_jump(&insn) ||
-		    insn_jump_into_range(&insn, paddr + INT3_SIZE,
-					 RELATIVE_ADDR_SIZE))
+		    insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
+					 DISP32_SIZE))
 			return 0;
 		addr += insn.length;
 	}
@@ -374,7 +372,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 	 * Verify if the address gap is in 2GB range, because this uses
 	 * a relative jump.
 	 */
-	rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+	rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;
 	if (abs(rel) > 0x7fffffff) {
 		ret = -ERANGE;
 		goto err;
@@ -401,7 +399,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 	/* Set returning jmp instruction at the tail of out-of-line buffer */
 	synthesize_reljump(buf + len, slot + len,
 			   (u8 *)op->kp.addr + op->optinsn.size);
-	len += RELATIVEJUMP_SIZE;
+	len += JMP32_INSN_SIZE;
 
 	/* We have to use text_poke() for instruction buffer because it is RO */
 	text_poke(slot, buf, len);
@@ -416,44 +414,50 @@ err:
 }
 
 /*
- * Replace breakpoints (int3) with relative jumps.
+ * Replace breakpoints (INT3) with relative jumps (JMP.d32).
  * Caller must call with locking kprobe_mutex and text_mutex.
+ *
+ * The caller will have installed a regular kprobe and after that issued
+ * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in
+ * the 4 bytes after the INT3 are unused and can now be overwritten.
  */
 void arch_optimize_kprobes(struct list_head *oplist)
 {
 	struct optimized_kprobe *op, *tmp;
-	u8 insn_buff[RELATIVEJUMP_SIZE];
+	u8 insn_buff[JMP32_INSN_SIZE];
 
 	list_for_each_entry_safe(op, tmp, oplist, list) {
 		s32 rel = (s32)((long)op->optinsn.insn -
-			((long)op->kp.addr + RELATIVEJUMP_SIZE));
+			((long)op->kp.addr + JMP32_INSN_SIZE));
 
 		WARN_ON(kprobe_disabled(&op->kp));
 
 		/* Backup instructions which will be replaced by jump address */
-		memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
-		       RELATIVE_ADDR_SIZE);
+		memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,
+		       DISP32_SIZE);
 
-		insn_buff[0] = RELATIVEJUMP_OPCODE;
+		insn_buff[0] = JMP32_INSN_OPCODE;
 		*(s32 *)(&insn_buff[1]) = rel;
 
-		text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
-			     op->optinsn.insn);
+		text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
 
 		list_del_init(&op->list);
 	}
 }
 
-/* Replace a relative jump with a breakpoint (int3).  */
+/*
+ * Replace a relative jump (JMP.d32) with a breakpoint (INT3).
+ *
+ * After that, we can restore the 4 bytes after the INT3 to undo what
+ * arch_optimize_kprobes() scribbled. This is safe since those bytes will be
+ * unused once the INT3 lands.
+ */
 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
 {
-	u8 insn_buff[RELATIVEJUMP_SIZE];
-
-	/* Set int3 to first byte for kprobes */
-	insn_buff[0] = BREAKPOINT_INSTRUCTION;
-	memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-	text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
-		     op->optinsn.insn);
+	arch_arm_kprobe(&op->kp);
+	text_poke(op->kp.addr + INT3_INSN_SIZE,
+		  op->optinsn.copied_insn, DISP32_SIZE);
+	text_poke_sync();
 }
 
 /*
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 7969da939213..d0a19121c6a4 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -100,7 +100,12 @@ static int __init get_setup_data_size(int nr, size_t *size)
 		if (!data)
 			return -ENOMEM;
 		if (nr == i) {
-			*size = data->len;
+			if (data->type == SETUP_INDIRECT &&
+			    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
+				*size = ((struct setup_indirect *)data->data)->len;
+			else
+				*size = data->len;
+
 			memunmap(data);
 			return 0;
 		}
@@ -130,7 +135,10 @@ static ssize_t type_show(struct kobject *kobj,
 	if (!data)
 		return -ENOMEM;
 
-	ret = sprintf(buf, "0x%x\n", data->type);
+	if (data->type == SETUP_INDIRECT)
+		ret = sprintf(buf, "0x%x\n", ((struct setup_indirect *)data->data)->type);
+	else
+		ret = sprintf(buf, "0x%x\n", data->type);
 	memunmap(data);
 	return ret;
 }
@@ -142,7 +150,7 @@ static ssize_t setup_data_data_read(struct file *fp,
 				    loff_t off, size_t count)
 {
 	int nr, ret = 0;
-	u64 paddr;
+	u64 paddr, len;
 	struct setup_data *data;
 	void *p;
 
@@ -157,19 +165,28 @@ static ssize_t setup_data_data_read(struct file *fp,
 	if (!data)
 		return -ENOMEM;
 
-	if (off > data->len) {
+	if (data->type == SETUP_INDIRECT &&
+	    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+		paddr = ((struct setup_indirect *)data->data)->addr;
+		len = ((struct setup_indirect *)data->data)->len;
+	} else {
+		paddr += sizeof(*data);
+		len = data->len;
+	}
+
+	if (off > len) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	if (count > data->len - off)
-		count = data->len - off;
+	if (count > len - off)
+		count = len - off;
 
 	if (!count)
 		goto out;
 
 	ret = count;
-	p = memremap(paddr + sizeof(*data), data->len, MEMREMAP_WB);
+	p = memremap(paddr, len, MEMREMAP_WB);
 	if (!p) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e820568ed4d5..81045aabb6f4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -33,6 +33,7 @@
 #include <asm/apicdef.h>
 #include <asm/hypervisor.h>
 #include <asm/tlb.h>
+#include <asm/cpuidle_haltpoll.h>
 
 static int kvmapf = 1;
 
@@ -244,17 +245,13 @@ NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
 dotraplinkage void
 do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 {
-	enum ctx_state prev_state;
-
 	switch (kvm_read_and_reset_pf_reason()) {
 	default:
 		do_page_fault(regs, error_code, address);
 		break;
 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
 		/* page is swapped out by the host. */
-		prev_state = exception_enter();
 		kvm_async_pf_task_wait((u32)address, !user_mode(regs));
-		exception_exit(prev_state);
 		break;
 	case KVM_PV_REASON_PAGE_READY:
 		rcu_irq_enter();
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b2463fcb20a8..c57e1ca70fd1 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -28,6 +28,89 @@
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
+#include <asm/pgtable_areas.h>
+
+/* This is a multiple of PAGE_SIZE. */
+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
+
+static inline void *ldt_slot_va(int slot)
+{
+	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
+}
+
+void load_mm_ldt(struct mm_struct *mm)
+{
+	struct ldt_struct *ldt;
+
+	/* READ_ONCE synchronizes with smp_store_release */
+	ldt = READ_ONCE(mm->context.ldt);
+
+	/*
+	 * Any change to mm->context.ldt is followed by an IPI to all
+	 * CPUs with the mm active.  The LDT will not be freed until
+	 * after the IPI is handled by all such CPUs.  This means that,
+	 * if the ldt_struct changes before we return, the values we see
+	 * will be safe, and the new values will be loaded before we run
+	 * any user code.
+	 *
+	 * NB: don't try to convert this to use RCU without extreme care.
+	 * We would still need IRQs off, because we don't want to change
+	 * the local LDT after an IPI loaded a newer value than the one
+	 * that we can see.
+	 */
+
+	if (unlikely(ldt)) {
+		if (static_cpu_has(X86_FEATURE_PTI)) {
+			if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
+				/*
+				 * Whoops -- either the new LDT isn't mapped
+				 * (if slot == -1) or is mapped into a bogus
+				 * slot (if slot > 1).
+				 */
+				clear_LDT();
+				return;
+			}
+
+			/*
+			 * If page table isolation is enabled, ldt->entries
+			 * will not be mapped in the userspace pagetables.
+			 * Tell the CPU to access the LDT through the alias
+			 * at ldt_slot_va(ldt->slot).
+			 */
+			set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
+		} else {
+			set_ldt(ldt->entries, ldt->nr_entries);
+		}
+	} else {
+		clear_LDT();
+	}
+}
+
+void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+{
+	/*
+	 * Load the LDT if either the old or new mm had an LDT.
+	 *
+	 * An mm will never go from having an LDT to not having an LDT.  Two
+	 * mms never share an LDT, so we don't gain anything by checking to
+	 * see whether the LDT changed.  There's also no guarantee that
+	 * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
+	 * then prev->context.ldt will also be non-NULL.
+	 *
+	 * If we really cared, we could optimize the case where prev == next
+	 * and we're exiting lazy mode.  Most of the time, if this happens,
+	 * we don't actually need to reload LDTR, but modify_ldt() is mostly
+	 * used by legacy code and emulators where we don't need this level of
+	 * performance.
+	 *
+	 * This uses | instead of || because it generates better code.
+	 */
+	if (unlikely((unsigned long)prev->context.ldt |
+		     (unsigned long)next->context.ldt))
+		load_mm_ldt(next);
+
+	DEBUG_LOCKS_WARN_ON(preemptible());
+}
 
 static void refresh_ldt_segments(void)
 {
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 5dcd438ad8f2..16e125a50b33 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -298,48 +298,6 @@ static void load_segments(void)
 		);
 }
 
-#ifdef CONFIG_KEXEC_FILE
-/* Update purgatory as needed after various image segments have been prepared */
-static int arch_update_purgatory(struct kimage *image)
-{
-	int ret = 0;
-
-	if (!image->file_mode)
-		return 0;
-
-	/* Setup copying of backup region */
-	if (image->type == KEXEC_TYPE_CRASH) {
-		ret = kexec_purgatory_get_set_symbol(image,
-				"purgatory_backup_dest",
-				&image->arch.backup_load_addr,
-				sizeof(image->arch.backup_load_addr), 0);
-		if (ret)
-			return ret;
-
-		ret = kexec_purgatory_get_set_symbol(image,
-				"purgatory_backup_src",
-				&image->arch.backup_src_start,
-				sizeof(image->arch.backup_src_start), 0);
-		if (ret)
-			return ret;
-
-		ret = kexec_purgatory_get_set_symbol(image,
-				"purgatory_backup_sz",
-				&image->arch.backup_src_sz,
-				sizeof(image->arch.backup_src_sz), 0);
-		if (ret)
-			return ret;
-	}
-
-	return ret;
-}
-#else /* !CONFIG_KEXEC_FILE */
-static inline int arch_update_purgatory(struct kimage *image)
-{
-	return 0;
-}
-#endif /* CONFIG_KEXEC_FILE */
-
 int machine_kexec_prepare(struct kimage *image)
 {
 	unsigned long start_pgtable;
@@ -353,11 +311,6 @@ int machine_kexec_prepare(struct kimage *image)
 	if (result)
 		return result;
 
-	/* update purgatory as needed */
-	result = arch_update_purgatory(image);
-	if (result)
-		return result;
-
 	return 0;
 }
 
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index e676a9916c49..54c21d6abd5a 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void)
 }
 fs_initcall(nmi_warning_debugfs);
 
-static void nmi_max_handler(struct irq_work *w)
+static void nmi_check_duration(struct nmiaction *action, u64 duration)
 {
-	struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
+	u64 whole_msecs = READ_ONCE(action->max_duration);
 	int remainder_ns, decimal_msecs;
-	u64 whole_msecs = READ_ONCE(a->max_duration);
+
+	if (duration < nmi_longest_ns || duration < action->max_duration)
+		return;
+
+	action->max_duration = duration;
 
 	remainder_ns = do_div(whole_msecs, (1000 * 1000));
 	decimal_msecs = remainder_ns / 1000;
 
 	printk_ratelimited(KERN_INFO
 		"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
-		a->handler, whole_msecs, decimal_msecs);
+		action->handler, whole_msecs, decimal_msecs);
 }
 
 static int nmi_handle(unsigned int type, struct pt_regs *regs)
@@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs)
 		delta = sched_clock() - delta;
 		trace_nmi_handler(a->handler, (int)delta, thishandled);
 
-		if (delta < nmi_longest_ns || delta < a->max_duration)
-			continue;
-
-		a->max_duration = delta;
-		irq_work_queue(&a->irq_work);
+		nmi_check_duration(a, delta);
 	}
 
 	rcu_read_unlock();
@@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 	if (!action->handler)
 		return -EINVAL;
 
-	init_irq_work(&action->irq_work, nmi_max_handler);
-
 	raw_spin_lock_irqsave(&desc->lock, flags);
 
 	/*
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 59d3d2763a9e..789f5e4f89de 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -341,8 +341,6 @@ struct paravirt_patch_template pv_ops = {
 	.cpu.iret		= native_iret,
 	.cpu.swapgs		= native_swapgs,
 
-	.cpu.set_iopl_mask	= native_set_iopl_mask,
-
 	.cpu.start_context_switch	= paravirt_nop,
 	.cpu.end_context_switch		= paravirt_nop,
 
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
deleted file mode 100644
index 23fdec030c37..000000000000
--- a/arch/x86/kernel/pci-calgary_64.c
+++ /dev/null
@@ -1,1586 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Derived from arch/powerpc/kernel/iommu.c
- *
- * Copyright IBM Corporation, 2006-2007
- * Copyright (C) 2006  Jon Mason <jdmason@kudzu.us>
- *
- * Author: Jon Mason <jdmason@kudzu.us>
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
-
- */
-
-#define pr_fmt(fmt) "Calgary: " fmt
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/crash_dump.h>
-#include <linux/dma-mapping.h>
-#include <linux/dma-direct.h>
-#include <linux/bitmap.h>
-#include <linux/pci_ids.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/scatterlist.h>
-#include <linux/iommu-helper.h>
-
-#include <asm/iommu.h>
-#include <asm/calgary.h>
-#include <asm/tce.h>
-#include <asm/pci-direct.h>
-#include <asm/dma.h>
-#include <asm/rio.h>
-#include <asm/bios_ebda.h>
-#include <asm/x86_init.h>
-#include <asm/iommu_table.h>
-
-#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
-int use_calgary __read_mostly = 1;
-#else
-int use_calgary __read_mostly = 0;
-#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */
-
-#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
-#define PCI_DEVICE_ID_IBM_CALIOC2 0x0308
-
-/* register offsets inside the host bridge space */
-#define CALGARY_CONFIG_REG	0x0108
-#define PHB_CSR_OFFSET		0x0110 /* Channel Status */
-#define PHB_PLSSR_OFFSET	0x0120
-#define PHB_CONFIG_RW_OFFSET	0x0160
-#define PHB_IOBASE_BAR_LOW	0x0170
-#define PHB_IOBASE_BAR_HIGH	0x0180
-#define PHB_MEM_1_LOW		0x0190
-#define PHB_MEM_1_HIGH		0x01A0
-#define PHB_IO_ADDR_SIZE	0x01B0
-#define PHB_MEM_1_SIZE		0x01C0
-#define PHB_MEM_ST_OFFSET	0x01D0
-#define PHB_AER_OFFSET		0x0200
-#define PHB_CONFIG_0_HIGH	0x0220
-#define PHB_CONFIG_0_LOW	0x0230
-#define PHB_CONFIG_0_END	0x0240
-#define PHB_MEM_2_LOW		0x02B0
-#define PHB_MEM_2_HIGH		0x02C0
-#define PHB_MEM_2_SIZE_HIGH	0x02D0
-#define PHB_MEM_2_SIZE_LOW	0x02E0
-#define PHB_DOSHOLE_OFFSET	0x08E0
-
-/* CalIOC2 specific */
-#define PHB_SAVIOR_L2		0x0DB0
-#define PHB_PAGE_MIG_CTRL	0x0DA8
-#define PHB_PAGE_MIG_DEBUG	0x0DA0
-#define PHB_ROOT_COMPLEX_STATUS 0x0CB0
-
-/* PHB_CONFIG_RW */
-#define PHB_TCE_ENABLE		0x20000000
-#define PHB_SLOT_DISABLE	0x1C000000
-#define PHB_DAC_DISABLE		0x01000000
-#define PHB_MEM2_ENABLE		0x00400000
-#define PHB_MCSR_ENABLE		0x00100000
-/* TAR (Table Address Register) */
-#define TAR_SW_BITS		0x0000ffffffff800fUL
-#define TAR_VALID		0x0000000000000008UL
-/* CSR (Channel/DMA Status Register) */
-#define CSR_AGENT_MASK		0xffe0ffff
-/* CCR (Calgary Configuration Register) */
-#define CCR_2SEC_TIMEOUT	0x000000000000000EUL
-/* PMCR/PMDR (Page Migration Control/Debug Registers */
-#define PMR_SOFTSTOP		0x80000000
-#define PMR_SOFTSTOPFAULT	0x40000000
-#define PMR_HARDSTOP		0x20000000
-
-/*
- * The maximum PHB bus number.
- * x3950M2 (rare): 8 chassis, 48 PHBs per chassis = 384
- * x3950M2: 4 chassis, 48 PHBs per chassis        = 192
- * x3950 (PCIE): 8 chassis, 32 PHBs per chassis   = 256
- * x3950 (PCIX): 8 chassis, 16 PHBs per chassis   = 128
- */
-#define MAX_PHB_BUS_NUM		256
-
-#define PHBS_PER_CALGARY	  4
-
-/* register offsets in Calgary's internal register space */
-static const unsigned long tar_offsets[] = {
-	0x0580 /* TAR0 */,
-	0x0588 /* TAR1 */,
-	0x0590 /* TAR2 */,
-	0x0598 /* TAR3 */
-};
-
-static const unsigned long split_queue_offsets[] = {
-	0x4870 /* SPLIT QUEUE 0 */,
-	0x5870 /* SPLIT QUEUE 1 */,
-	0x6870 /* SPLIT QUEUE 2 */,
-	0x7870 /* SPLIT QUEUE 3 */
-};
-
-static const unsigned long phb_offsets[] = {
-	0x8000 /* PHB0 */,
-	0x9000 /* PHB1 */,
-	0xA000 /* PHB2 */,
-	0xB000 /* PHB3 */
-};
-
-/* PHB debug registers */
-
-static const unsigned long phb_debug_offsets[] = {
-	0x4000	/* PHB 0 DEBUG */,
-	0x5000	/* PHB 1 DEBUG */,
-	0x6000	/* PHB 2 DEBUG */,
-	0x7000	/* PHB 3 DEBUG */
-};
-
-/*
- * STUFF register for each debug PHB,
- * byte 1 = start bus number, byte 2 = end bus number
- */
-
-#define PHB_DEBUG_STUFF_OFFSET	0x0020
-
-unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
-static int translate_empty_slots __read_mostly = 0;
-static int calgary_detected __read_mostly = 0;
-
-static struct rio_table_hdr	*rio_table_hdr __initdata;
-static struct scal_detail	*scal_devs[MAX_NUMNODES] __initdata;
-static struct rio_detail	*rio_devs[MAX_NUMNODES * 4] __initdata;
-
-struct calgary_bus_info {
-	void *tce_space;
-	unsigned char translation_disabled;
-	signed char phbid;
-	void __iomem *bbar;
-};
-
-static void calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
-static void calgary_tce_cache_blast(struct iommu_table *tbl);
-static void calgary_dump_error_regs(struct iommu_table *tbl);
-static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
-static void calioc2_tce_cache_blast(struct iommu_table *tbl);
-static void calioc2_dump_error_regs(struct iommu_table *tbl);
-static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
-static void get_tce_space_from_tar(void);
-
-static const struct cal_chipset_ops calgary_chip_ops = {
-	.handle_quirks = calgary_handle_quirks,
-	.tce_cache_blast = calgary_tce_cache_blast,
-	.dump_error_regs = calgary_dump_error_regs
-};
-
-static const struct cal_chipset_ops calioc2_chip_ops = {
-	.handle_quirks = calioc2_handle_quirks,
-	.tce_cache_blast = calioc2_tce_cache_blast,
-	.dump_error_regs = calioc2_dump_error_regs
-};
-
-static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
-
-static inline int translation_enabled(struct iommu_table *tbl)
-{
-	/* only PHBs with translation enabled have an IOMMU table */
-	return (tbl != NULL);
-}
-
-static void iommu_range_reserve(struct iommu_table *tbl,
-	unsigned long start_addr, unsigned int npages)
-{
-	unsigned long index;
-	unsigned long end;
-	unsigned long flags;
-
-	index = start_addr >> PAGE_SHIFT;
-
-	/* bail out if we're asked to reserve a region we don't cover */
-	if (index >= tbl->it_size)
-		return;
-
-	end = index + npages;
-	if (end > tbl->it_size) /* don't go off the table */
-		end = tbl->it_size;
-
-	spin_lock_irqsave(&tbl->it_lock, flags);
-
-	bitmap_set(tbl->it_map, index, npages);
-
-	spin_unlock_irqrestore(&tbl->it_lock, flags);
-}
-
-static unsigned long iommu_range_alloc(struct device *dev,
-				       struct iommu_table *tbl,
-				       unsigned int npages)
-{
-	unsigned long flags;
-	unsigned long offset;
-	unsigned long boundary_size;
-
-	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-			      PAGE_SIZE) >> PAGE_SHIFT;
-
-	BUG_ON(npages == 0);
-
-	spin_lock_irqsave(&tbl->it_lock, flags);
-
-	offset = iommu_area_alloc(tbl->it_map, tbl->it_size, tbl->it_hint,
-				  npages, 0, boundary_size, 0);
-	if (offset == ~0UL) {
-		tbl->chip_ops->tce_cache_blast(tbl);
-
-		offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0,
-					  npages, 0, boundary_size, 0);
-		if (offset == ~0UL) {
-			pr_warn("IOMMU full\n");
-			spin_unlock_irqrestore(&tbl->it_lock, flags);
-			if (panic_on_overflow)
-				panic("Calgary: fix the allocator.\n");
-			else
-				return DMA_MAPPING_ERROR;
-		}
-	}
-
-	tbl->it_hint = offset + npages;
-	BUG_ON(tbl->it_hint > tbl->it_size);
-
-	spin_unlock_irqrestore(&tbl->it_lock, flags);
-
-	return offset;
-}
-
-static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
-			      void *vaddr, unsigned int npages, int direction)
-{
-	unsigned long entry;
-	dma_addr_t ret;
-
-	entry = iommu_range_alloc(dev, tbl, npages);
-	if (unlikely(entry == DMA_MAPPING_ERROR)) {
-		pr_warn("failed to allocate %u pages in iommu %p\n",
-			npages, tbl);
-		return DMA_MAPPING_ERROR;
-	}
-
-	/* set the return dma address */
-	ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
-
-	/* put the TCEs in the HW table */
-	tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
-		  direction);
-	return ret;
-}
-
-static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
-	unsigned int npages)
-{
-	unsigned long entry;
-	unsigned long flags;
-
-	/* were we called with bad_dma_address? */
-	if (unlikely(dma_addr == DMA_MAPPING_ERROR)) {
-		WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
-		       "address 0x%Lx\n", dma_addr);
-		return;
-	}
-
-	entry = dma_addr >> PAGE_SHIFT;
-
-	BUG_ON(entry + npages > tbl->it_size);
-
-	tce_free(tbl, entry, npages);
-
-	spin_lock_irqsave(&tbl->it_lock, flags);
-
-	bitmap_clear(tbl->it_map, entry, npages);
-
-	spin_unlock_irqrestore(&tbl->it_lock, flags);
-}
-
-static inline struct iommu_table *find_iommu_table(struct device *dev)
-{
-	struct pci_dev *pdev;
-	struct pci_bus *pbus;
-	struct iommu_table *tbl;
-
-	pdev = to_pci_dev(dev);
-
-	/* search up the device tree for an iommu */
-	pbus = pdev->bus;
-	do {
-		tbl = pci_iommu(pbus);
-		if (tbl && tbl->it_busno == pbus->number)
-			break;
-		tbl = NULL;
-		pbus = pbus->parent;
-	} while (pbus);
-
-	BUG_ON(tbl && (tbl->it_busno != pbus->number));
-
-	return tbl;
-}
-
-static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
-			     int nelems,enum dma_data_direction dir,
-			     unsigned long attrs)
-{
-	struct iommu_table *tbl = find_iommu_table(dev);
-	struct scatterlist *s;
-	int i;
-
-	if (!translation_enabled(tbl))
-		return;
-
-	for_each_sg(sglist, s, nelems, i) {
-		unsigned int npages;
-		dma_addr_t dma = s->dma_address;
-		unsigned int dmalen = s->dma_length;
-
-		if (dmalen == 0)
-			break;
-
-		npages = iommu_num_pages(dma, dmalen, PAGE_SIZE);
-		iommu_free(tbl, dma, npages);
-	}
-}
-
-static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
-			  int nelems, enum dma_data_direction dir,
-			  unsigned long attrs)
-{
-	struct iommu_table *tbl = find_iommu_table(dev);
-	struct scatterlist *s;
-	unsigned long vaddr;
-	unsigned int npages;
-	unsigned long entry;
-	int i;
-
-	for_each_sg(sg, s, nelems, i) {
-		BUG_ON(!sg_page(s));
-
-		vaddr = (unsigned long) sg_virt(s);
-		npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE);
-
-		entry = iommu_range_alloc(dev, tbl, npages);
-		if (entry == DMA_MAPPING_ERROR) {
-			/* makes sure unmap knows to stop */
-			s->dma_length = 0;
-			goto error;
-		}
-
-		s->dma_address = (entry << PAGE_SHIFT) | s->offset;
-
-		/* insert into HW table */
-		tce_build(tbl, entry, npages, vaddr & PAGE_MASK, dir);
-
-		s->dma_length = s->length;
-	}
-
-	return nelems;
-error:
-	calgary_unmap_sg(dev, sg, nelems, dir, 0);
-	for_each_sg(sg, s, nelems, i) {
-		sg->dma_address = DMA_MAPPING_ERROR;
-		sg->dma_length = 0;
-	}
-	return 0;
-}
-
-static dma_addr_t calgary_map_page(struct device *dev, struct page *page,
-				   unsigned long offset, size_t size,
-				   enum dma_data_direction dir,
-				   unsigned long attrs)
-{
-	void *vaddr = page_address(page) + offset;
-	unsigned long uaddr;
-	unsigned int npages;
-	struct iommu_table *tbl = find_iommu_table(dev);
-
-	uaddr = (unsigned long)vaddr;
-	npages = iommu_num_pages(uaddr, size, PAGE_SIZE);
-
-	return iommu_alloc(dev, tbl, vaddr, npages, dir);
-}
-
-static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
-			       size_t size, enum dma_data_direction dir,
-			       unsigned long attrs)
-{
-	struct iommu_table *tbl = find_iommu_table(dev);
-	unsigned int npages;
-
-	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
-	iommu_free(tbl, dma_addr, npages);
-}
-
-static void* calgary_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
-{
-	void *ret = NULL;
-	dma_addr_t mapping;
-	unsigned int npages, order;
-	struct iommu_table *tbl = find_iommu_table(dev);
-
-	size = PAGE_ALIGN(size); /* size rounded up to full pages */
-	npages = size >> PAGE_SHIFT;
-	order = get_order(size);
-
-	/* alloc enough pages (and possibly more) */
-	ret = (void *)__get_free_pages(flag, order);
-	if (!ret)
-		goto error;
-	memset(ret, 0, size);
-
-	/* set up tces to cover the allocated range */
-	mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
-	if (mapping == DMA_MAPPING_ERROR)
-		goto free;
-	*dma_handle = mapping;
-	return ret;
-free:
-	free_pages((unsigned long)ret, get_order(size));
-	ret = NULL;
-error:
-	return ret;
-}
-
-static void calgary_free_coherent(struct device *dev, size_t size,
-				  void *vaddr, dma_addr_t dma_handle,
-				  unsigned long attrs)
-{
-	unsigned int npages;
-	struct iommu_table *tbl = find_iommu_table(dev);
-
-	size = PAGE_ALIGN(size);
-	npages = size >> PAGE_SHIFT;
-
-	iommu_free(tbl, dma_handle, npages);
-	free_pages((unsigned long)vaddr, get_order(size));
-}
-
-static const struct dma_map_ops calgary_dma_ops = {
-	.alloc = calgary_alloc_coherent,
-	.free = calgary_free_coherent,
-	.map_sg = calgary_map_sg,
-	.unmap_sg = calgary_unmap_sg,
-	.map_page = calgary_map_page,
-	.unmap_page = calgary_unmap_page,
-	.dma_supported = dma_direct_supported,
-	.mmap = dma_common_mmap,
-	.get_sgtable = dma_common_get_sgtable,
-};
-
-static inline void __iomem * busno_to_bbar(unsigned char num)
-{
-	return bus_info[num].bbar;
-}
-
-static inline int busno_to_phbid(unsigned char num)
-{
-	return bus_info[num].phbid;
-}
-
-static inline unsigned long split_queue_offset(unsigned char num)
-{
-	size_t idx = busno_to_phbid(num);
-
-	return split_queue_offsets[idx];
-}
-
-static inline unsigned long tar_offset(unsigned char num)
-{
-	size_t idx = busno_to_phbid(num);
-
-	return tar_offsets[idx];
-}
-
-static inline unsigned long phb_offset(unsigned char num)
-{
-	size_t idx = busno_to_phbid(num);
-
-	return phb_offsets[idx];
-}
-
-static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset)
-{
-	unsigned long target = ((unsigned long)bar) | offset;
-	return (void __iomem*)target;
-}
-
-static inline int is_calioc2(unsigned short device)
-{
-	return (device == PCI_DEVICE_ID_IBM_CALIOC2);
-}
-
-static inline int is_calgary(unsigned short device)
-{
-	return (device == PCI_DEVICE_ID_IBM_CALGARY);
-}
-
-static inline int is_cal_pci_dev(unsigned short device)
-{
-	return (is_calgary(device) || is_calioc2(device));
-}
-
-static void calgary_tce_cache_blast(struct iommu_table *tbl)
-{
-	u64 val;
-	u32 aer;
-	int i = 0;
-	void __iomem *bbar = tbl->bbar;
-	void __iomem *target;
-
-	/* disable arbitration on the bus */
-	target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
-	aer = readl(target);
-	writel(0, target);
-
-	/* read plssr to ensure it got there */
-	target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
-	val = readl(target);
-
-	/* poll split queues until all DMA activity is done */
-	target = calgary_reg(bbar, split_queue_offset(tbl->it_busno));
-	do {
-		val = readq(target);
-		i++;
-	} while ((val & 0xff) != 0xff && i < 100);
-	if (i == 100)
-		pr_warn("PCI bus not quiesced, continuing anyway\n");
-
-	/* invalidate TCE cache */
-	target = calgary_reg(bbar, tar_offset(tbl->it_busno));
-	writeq(tbl->tar_val, target);
-
-	/* enable arbitration */
-	target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
-	writel(aer, target);
-	(void)readl(target); /* flush */
-}
-
-static void calioc2_tce_cache_blast(struct iommu_table *tbl)
-{
-	void __iomem *bbar = tbl->bbar;
-	void __iomem *target;
-	u64 val64;
-	u32 val;
-	int i = 0;
-	int count = 1;
-	unsigned char bus = tbl->it_busno;
-
-begin:
-	printk(KERN_DEBUG "Calgary: CalIOC2 bus 0x%x entering tce cache blast "
-	       "sequence - count %d\n", bus, count);
-
-	/* 1. using the Page Migration Control reg set SoftStop */
-	target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
-	val = be32_to_cpu(readl(target));
-	printk(KERN_DEBUG "1a. read 0x%x [LE] from %p\n", val, target);
-	val |= PMR_SOFTSTOP;
-	printk(KERN_DEBUG "1b. writing 0x%x [LE] to %p\n", val, target);
-	writel(cpu_to_be32(val), target);
-
-	/* 2. poll split queues until all DMA activity is done */
-	printk(KERN_DEBUG "2a. starting to poll split queues\n");
-	target = calgary_reg(bbar, split_queue_offset(bus));
-	do {
-		val64 = readq(target);
-		i++;
-	} while ((val64 & 0xff) != 0xff && i < 100);
-	if (i == 100)
-		pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n");
-
-	/* 3. poll Page Migration DEBUG for SoftStopFault */
-	target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
-	val = be32_to_cpu(readl(target));
-	printk(KERN_DEBUG "3. read 0x%x [LE] from %p\n", val, target);
-
-	/* 4. if SoftStopFault - goto (1) */
-	if (val & PMR_SOFTSTOPFAULT) {
-		if (++count < 100)
-			goto begin;
-		else {
-			pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n");
-			return; /* pray for the best */
-		}
-	}
-
-	/* 5. Slam into HardStop by reading PHB_PAGE_MIG_CTRL */
-	target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
-	printk(KERN_DEBUG "5a. slamming into HardStop by reading %p\n", target);
-	val = be32_to_cpu(readl(target));
-	printk(KERN_DEBUG "5b. read 0x%x [LE] from %p\n", val, target);
-	target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
-	val = be32_to_cpu(readl(target));
-	printk(KERN_DEBUG "5c. read 0x%x [LE] from %p (debug)\n", val, target);
-
-	/* 6. invalidate TCE cache */
-	printk(KERN_DEBUG "6. invalidating TCE cache\n");
-	target = calgary_reg(bbar, tar_offset(bus));
-	writeq(tbl->tar_val, target);
-
-	/* 7. Re-read PMCR */
-	printk(KERN_DEBUG "7a. Re-reading PMCR\n");
-	target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
-	val = be32_to_cpu(readl(target));
-	printk(KERN_DEBUG "7b. read 0x%x [LE] from %p\n", val, target);
-
-	/* 8. Remove HardStop */
-	printk(KERN_DEBUG "8a. removing HardStop from PMCR\n");
-	target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
-	val = 0;
-	printk(KERN_DEBUG "8b. writing 0x%x [LE] to %p\n", val, target);
-	writel(cpu_to_be32(val), target);
-	val = be32_to_cpu(readl(target));
-	printk(KERN_DEBUG "8c. read 0x%x [LE] from %p\n", val, target);
-}
-
-static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
-	u64 limit)
-{
-	unsigned int numpages;
-
-	limit = limit | 0xfffff;
-	limit++;
-
-	numpages = ((limit - start) >> PAGE_SHIFT);
-	iommu_range_reserve(pci_iommu(dev->bus), start, numpages);
-}
-
-static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
-{
-	void __iomem *target;
-	u64 low, high, sizelow;
-	u64 start, limit;
-	struct iommu_table *tbl = pci_iommu(dev->bus);
-	unsigned char busnum = dev->bus->number;
-	void __iomem *bbar = tbl->bbar;
-
-	/* peripheral MEM_1 region */
-	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW);
-	low = be32_to_cpu(readl(target));
-	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH);
-	high = be32_to_cpu(readl(target));
-	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE);
-	sizelow = be32_to_cpu(readl(target));
-
-	start = (high << 32) | low;
-	limit = sizelow;
-
-	calgary_reserve_mem_region(dev, start, limit);
-}
-
-static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
-{
-	void __iomem *target;
-	u32 val32;
-	u64 low, high, sizelow, sizehigh;
-	u64 start, limit;
-	struct iommu_table *tbl = pci_iommu(dev->bus);
-	unsigned char busnum = dev->bus->number;
-	void __iomem *bbar = tbl->bbar;
-
-	/* is it enabled? */
-	target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
-	val32 = be32_to_cpu(readl(target));
-	if (!(val32 & PHB_MEM2_ENABLE))
-		return;
-
-	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW);
-	low = be32_to_cpu(readl(target));
-	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH);
-	high = be32_to_cpu(readl(target));
-	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW);
-	sizelow = be32_to_cpu(readl(target));
-	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH);
-	sizehigh = be32_to_cpu(readl(target));
-
-	start = (high << 32) | low;
-	limit = (sizehigh << 32) | sizelow;
-
-	calgary_reserve_mem_region(dev, start, limit);
-}
-
-/*
- * some regions of the IO address space do not get translated, so we
- * must not give devices IO addresses in those regions. The regions
- * are the 640KB-1MB region and the two PCI peripheral memory holes.
- * Reserve all of them in the IOMMU bitmap to avoid giving them out
- * later.
- */
-static void __init calgary_reserve_regions(struct pci_dev *dev)
-{
-	unsigned int npages;
-	u64 start;
-	struct iommu_table *tbl = pci_iommu(dev->bus);
-
-	/* avoid the BIOS/VGA first 640KB-1MB region */
-	/* for CalIOC2 - avoid the entire first MB */
-	if (is_calgary(dev->device)) {
-		start = (640 * 1024);
-		npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
-	} else { /* calioc2 */
-		start = 0;
-		npages = (1 * 1024 * 1024) >> PAGE_SHIFT;
-	}
-	iommu_range_reserve(tbl, start, npages);
-
-	/* reserve the two PCI peripheral memory regions in IO space */
-	calgary_reserve_peripheral_mem_1(dev);
-	calgary_reserve_peripheral_mem_2(dev);
-}
-
-static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
-{
-	u64 val64;
-	u64 table_phys;
-	void __iomem *target;
-	int ret;
-	struct iommu_table *tbl;
-
-	/* build TCE tables for each PHB */
-	ret = build_tce_table(dev, bbar);
-	if (ret)
-		return ret;
-
-	tbl = pci_iommu(dev->bus);
-	tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
-
-	if (is_kdump_kernel())
-		calgary_init_bitmap_from_tce_table(tbl);
-	else
-		tce_free(tbl, 0, tbl->it_size);
-
-	if (is_calgary(dev->device))
-		tbl->chip_ops = &calgary_chip_ops;
-	else if (is_calioc2(dev->device))
-		tbl->chip_ops = &calioc2_chip_ops;
-	else
-		BUG();
-
-	calgary_reserve_regions(dev);
-
-	/* set TARs for each PHB */
-	target = calgary_reg(bbar, tar_offset(dev->bus->number));
-	val64 = be64_to_cpu(readq(target));
-
-	/* zero out all TAR bits under sw control */
-	val64 &= ~TAR_SW_BITS;
-	table_phys = (u64)__pa(tbl->it_base);
-
-	val64 |= table_phys;
-
-	BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
-	val64 |= (u64) specified_table_size;
-
-	tbl->tar_val = cpu_to_be64(val64);
-
-	writeq(tbl->tar_val, target);
-	readq(target); /* flush */
-
-	return 0;
-}
-
-static void __init calgary_free_bus(struct pci_dev *dev)
-{
-	u64 val64;
-	struct iommu_table *tbl = pci_iommu(dev->bus);
-	void __iomem *target;
-	unsigned int bitmapsz;
-
-	target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
-	val64 = be64_to_cpu(readq(target));
-	val64 &= ~TAR_SW_BITS;
-	writeq(cpu_to_be64(val64), target);
-	readq(target); /* flush */
-
-	bitmapsz = tbl->it_size / BITS_PER_BYTE;
-	free_pages((unsigned long)tbl->it_map, get_order(bitmapsz));
-	tbl->it_map = NULL;
-
-	kfree(tbl);
-	
-	set_pci_iommu(dev->bus, NULL);
-
-	/* Can't free bootmem allocated memory after system is up :-( */
-	bus_info[dev->bus->number].tce_space = NULL;
-}
-
-static void calgary_dump_error_regs(struct iommu_table *tbl)
-{
-	void __iomem *bbar = tbl->bbar;
-	void __iomem *target;
-	u32 csr, plssr;
-
-	target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
-	csr = be32_to_cpu(readl(target));
-
-	target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
-	plssr = be32_to_cpu(readl(target));
-
-	/* If no error, the agent ID in the CSR is not valid */
-	pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n",
-		 tbl->it_busno, csr, plssr);
-}
-
-static void calioc2_dump_error_regs(struct iommu_table *tbl)
-{
-	void __iomem *bbar = tbl->bbar;
-	u32 csr, csmr, plssr, mck, rcstat;
-	void __iomem *target;
-	unsigned long phboff = phb_offset(tbl->it_busno);
-	unsigned long erroff;
-	u32 errregs[7];
-	int i;
-
-	/* dump CSR */
-	target = calgary_reg(bbar, phboff | PHB_CSR_OFFSET);
-	csr = be32_to_cpu(readl(target));
-	/* dump PLSSR */
-	target = calgary_reg(bbar, phboff | PHB_PLSSR_OFFSET);
-	plssr = be32_to_cpu(readl(target));
-	/* dump CSMR */
-	target = calgary_reg(bbar, phboff | 0x290);
-	csmr = be32_to_cpu(readl(target));
-	/* dump mck */
-	target = calgary_reg(bbar, phboff | 0x800);
-	mck = be32_to_cpu(readl(target));
-
-	pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno);
-
-	pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n",
-		 csr, plssr, csmr, mck);
-
-	/* dump rest of error regs */
-	pr_emerg("");
-	for (i = 0; i < ARRAY_SIZE(errregs); i++) {
-		/* err regs are at 0x810 - 0x870 */
-		erroff = (0x810 + (i * 0x10));
-		target = calgary_reg(bbar, phboff | erroff);
-		errregs[i] = be32_to_cpu(readl(target));
-		pr_cont("0x%08x@0x%lx ", errregs[i], erroff);
-	}
-	pr_cont("\n");
-
-	/* root complex status */
-	target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS);
-	rcstat = be32_to_cpu(readl(target));
-	printk(KERN_EMERG "Calgary: 0x%08x@0x%x\n", rcstat,
-	       PHB_ROOT_COMPLEX_STATUS);
-}
-
-static void calgary_watchdog(struct timer_list *t)
-{
-	struct iommu_table *tbl = from_timer(tbl, t, watchdog_timer);
-	void __iomem *bbar = tbl->bbar;
-	u32 val32;
-	void __iomem *target;
-
-	target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
-	val32 = be32_to_cpu(readl(target));
-
-	/* If no error, the agent ID in the CSR is not valid */
-	if (val32 & CSR_AGENT_MASK) {
-		tbl->chip_ops->dump_error_regs(tbl);
-
-		/* reset error */
-		writel(0, target);
-
-		/* Disable bus that caused the error */
-		target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
-				     PHB_CONFIG_RW_OFFSET);
-		val32 = be32_to_cpu(readl(target));
-		val32 |= PHB_SLOT_DISABLE;
-		writel(cpu_to_be32(val32), target);
-		readl(target); /* flush */
-	} else {
-		/* Reset the timer */
-		mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ);
-	}
-}
-
-static void __init calgary_set_split_completion_timeout(void __iomem *bbar,
-	unsigned char busnum, unsigned long timeout)
-{
-	u64 val64;
-	void __iomem *target;
-	unsigned int phb_shift = ~0; /* silence gcc */
-	u64 mask;
-
-	switch (busno_to_phbid(busnum)) {
-	case 0: phb_shift = (63 - 19);
-		break;
-	case 1: phb_shift = (63 - 23);
-		break;
-	case 2: phb_shift = (63 - 27);
-		break;
-	case 3: phb_shift = (63 - 35);
-		break;
-	default:
-		BUG_ON(busno_to_phbid(busnum));
-	}
-
-	target = calgary_reg(bbar, CALGARY_CONFIG_REG);
-	val64 = be64_to_cpu(readq(target));
-
-	/* zero out this PHB's timer bits */
-	mask = ~(0xFUL << phb_shift);
-	val64 &= mask;
-	val64 |= (timeout << phb_shift);
-	writeq(cpu_to_be64(val64), target);
-	readq(target); /* flush */
-}
-
-static void __init calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev)
-{
-	unsigned char busnum = dev->bus->number;
-	void __iomem *bbar = tbl->bbar;
-	void __iomem *target;
-	u32 val;
-
-	/*
-	 * CalIOC2 designers recommend setting bit 8 in 0xnDB0 to 1
-	 */
-	target = calgary_reg(bbar, phb_offset(busnum) | PHB_SAVIOR_L2);
-	val = cpu_to_be32(readl(target));
-	val |= 0x00800000;
-	writel(cpu_to_be32(val), target);
-}
-
-static void __init calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev)
-{
-	unsigned char busnum = dev->bus->number;
-
-	/*
-	 * Give split completion a longer timeout on bus 1 for aic94xx
-	 * http://bugzilla.kernel.org/show_bug.cgi?id=7180
-	 */
-	if (is_calgary(dev->device) && (busnum == 1))
-		calgary_set_split_completion_timeout(tbl->bbar, busnum,
-						     CCR_2SEC_TIMEOUT);
-}
-
-static void __init calgary_enable_translation(struct pci_dev *dev)
-{
-	u32 val32;
-	unsigned char busnum;
-	void __iomem *target;
-	void __iomem *bbar;
-	struct iommu_table *tbl;
-
-	busnum = dev->bus->number;
-	tbl = pci_iommu(dev->bus);
-	bbar = tbl->bbar;
-
-	/* enable TCE in PHB Config Register */
-	target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
-	val32 = be32_to_cpu(readl(target));
-	val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE;
-
-	printk(KERN_INFO "Calgary: enabling translation on %s PHB %#x\n",
-	       (dev->device == PCI_DEVICE_ID_IBM_CALGARY) ?
-	       "Calgary" : "CalIOC2", busnum);
-	printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this "
-	       "bus.\n");
-
-	writel(cpu_to_be32(val32), target);
-	readl(target); /* flush */
-
-	timer_setup(&tbl->watchdog_timer, calgary_watchdog, 0);
-	mod_timer(&tbl->watchdog_timer, jiffies);
-}
-
-static void __init calgary_disable_translation(struct pci_dev *dev)
-{
-	u32 val32;
-	unsigned char busnum;
-	void __iomem *target;
-	void __iomem *bbar;
-	struct iommu_table *tbl;
-
-	busnum = dev->bus->number;
-	tbl = pci_iommu(dev->bus);
-	bbar = tbl->bbar;
-
-	/* disable TCE in PHB Config Register */
-	target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
-	val32 = be32_to_cpu(readl(target));
-	val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE);
-
-	printk(KERN_INFO "Calgary: disabling translation on PHB %#x!\n", busnum);
-	writel(cpu_to_be32(val32), target);
-	readl(target); /* flush */
-
-	del_timer_sync(&tbl->watchdog_timer);
-}
-
-static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
-{
-	pci_dev_get(dev);
-	set_pci_iommu(dev->bus, NULL);
-
-	/* is the device behind a bridge? */
-	if (dev->bus->parent)
-		dev->bus->parent->self = dev;
-	else
-		dev->bus->self = dev;
-}
-
-static int __init calgary_init_one(struct pci_dev *dev)
-{
-	void __iomem *bbar;
-	struct iommu_table *tbl;
-	int ret;
-
-	bbar = busno_to_bbar(dev->bus->number);
-	ret = calgary_setup_tar(dev, bbar);
-	if (ret)
-		goto done;
-
-	pci_dev_get(dev);
-
-	if (dev->bus->parent) {
-		if (dev->bus->parent->self)
-			printk(KERN_WARNING "Calgary: IEEEE, dev %p has "
-			       "bus->parent->self!\n", dev);
-		dev->bus->parent->self = dev;
-	} else
-		dev->bus->self = dev;
-
-	tbl = pci_iommu(dev->bus);
-	tbl->chip_ops->handle_quirks(tbl, dev);
-
-	calgary_enable_translation(dev);
-
-	return 0;
-
-done:
-	return ret;
-}
-
-static int __init calgary_locate_bbars(void)
-{
-	int ret;
-	int rioidx, phb, bus;
-	void __iomem *bbar;
-	void __iomem *target;
-	unsigned long offset;
-	u8 start_bus, end_bus;
-	u32 val;
-
-	ret = -ENODATA;
-	for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) {
-		struct rio_detail *rio = rio_devs[rioidx];
-
-		if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY))
-			continue;
-
-		/* map entire 1MB of Calgary config space */
-		bbar = ioremap_nocache(rio->BBAR, 1024 * 1024);
-		if (!bbar)
-			goto error;
-
-		for (phb = 0; phb < PHBS_PER_CALGARY; phb++) {
-			offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET;
-			target = calgary_reg(bbar, offset);
-
-			val = be32_to_cpu(readl(target));
-
-			start_bus = (u8)((val & 0x00FF0000) >> 16);
-			end_bus = (u8)((val & 0x0000FF00) >> 8);
-
-			if (end_bus) {
-				for (bus = start_bus; bus <= end_bus; bus++) {
-					bus_info[bus].bbar = bbar;
-					bus_info[bus].phbid = phb;
-				}
-			} else {
-				bus_info[start_bus].bbar = bbar;
-				bus_info[start_bus].phbid = phb;
-			}
-		}
-	}
-
-	return 0;
-
-error:
-	/* scan bus_info and iounmap any bbars we previously ioremap'd */
-	for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++)
-		if (bus_info[bus].bbar)
-			iounmap(bus_info[bus].bbar);
-
-	return ret;
-}
-
-static int __init calgary_init(void)
-{
-	int ret;
-	struct pci_dev *dev = NULL;
-	struct calgary_bus_info *info;
-
-	ret = calgary_locate_bbars();
-	if (ret)
-		return ret;
-
-	/* Purely for kdump kernel case */
-	if (is_kdump_kernel())
-		get_tce_space_from_tar();
-
-	do {
-		dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
-		if (!dev)
-			break;
-		if (!is_cal_pci_dev(dev->device))
-			continue;
-
-		info = &bus_info[dev->bus->number];
-		if (info->translation_disabled) {
-			calgary_init_one_nontraslated(dev);
-			continue;
-		}
-
-		if (!info->tce_space && !translate_empty_slots)
-			continue;
-
-		ret = calgary_init_one(dev);
-		if (ret)
-			goto error;
-	} while (1);
-
-	dev = NULL;
-	for_each_pci_dev(dev) {
-		struct iommu_table *tbl;
-
-		tbl = find_iommu_table(&dev->dev);
-
-		if (translation_enabled(tbl))
-			dev->dev.dma_ops = &calgary_dma_ops;
-	}
-
-	return ret;
-
-error:
-	do {
-		dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
-		if (!dev)
-			break;
-		if (!is_cal_pci_dev(dev->device))
-			continue;
-
-		info = &bus_info[dev->bus->number];
-		if (info->translation_disabled) {
-			pci_dev_put(dev);
-			continue;
-		}
-		if (!info->tce_space && !translate_empty_slots)
-			continue;
-
-		calgary_disable_translation(dev);
-		calgary_free_bus(dev);
-		pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
-		dev->dev.dma_ops = NULL;
-	} while (1);
-
-	return ret;
-}
-
-static inline int __init determine_tce_table_size(void)
-{
-	int ret;
-
-	if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED)
-		return specified_table_size;
-
-	if (is_kdump_kernel() && saved_max_pfn) {
-		/*
-		 * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
-		 * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
-		 * larger table size has twice as many entries, so shift the
-		 * max ram address by 13 to divide by 8K and then look at the
-		 * order of the result to choose between 0-7.
-		 */
-		ret = get_order((saved_max_pfn * PAGE_SIZE) >> 13);
-		if (ret > TCE_TABLE_SIZE_8M)
-			ret = TCE_TABLE_SIZE_8M;
-	} else {
-		/*
-		 * Use 8M by default (suggested by Muli) if it's not
-		 * kdump kernel and saved_max_pfn isn't set.
-		 */
-		ret = TCE_TABLE_SIZE_8M;
-	}
-
-	return ret;
-}
-
-static int __init build_detail_arrays(void)
-{
-	unsigned long ptr;
-	unsigned numnodes, i;
-	int scal_detail_size, rio_detail_size;
-
-	numnodes = rio_table_hdr->num_scal_dev;
-	if (numnodes > MAX_NUMNODES){
-		printk(KERN_WARNING
-			"Calgary: MAX_NUMNODES too low! Defined as %d, "
-			"but system has %d nodes.\n",
-			MAX_NUMNODES, numnodes);
-		return -ENODEV;
-	}
-
-	switch (rio_table_hdr->version){
-	case 2:
-		scal_detail_size = 11;
-		rio_detail_size = 13;
-		break;
-	case 3:
-		scal_detail_size = 12;
-		rio_detail_size = 15;
-		break;
-	default:
-		printk(KERN_WARNING
-		       "Calgary: Invalid Rio Grande Table Version: %d\n",
-		       rio_table_hdr->version);
-		return -EPROTO;
-	}
-
-	ptr = ((unsigned long)rio_table_hdr) + 3;
-	for (i = 0; i < numnodes; i++, ptr += scal_detail_size)
-		scal_devs[i] = (struct scal_detail *)ptr;
-
-	for (i = 0; i < rio_table_hdr->num_rio_dev;
-		    i++, ptr += rio_detail_size)
-		rio_devs[i] = (struct rio_detail *)ptr;
-
-	return 0;
-}
-
-static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
-{
-	int dev;
-	u32 val;
-
-	if (pci_dev == PCI_DEVICE_ID_IBM_CALIOC2) {
-		/*
-		 * FIXME: properly scan for devices across the
-		 * PCI-to-PCI bridge on every CalIOC2 port.
-		 */
-		return 1;
-	}
-
-	for (dev = 1; dev < 8; dev++) {
-		val = read_pci_config(bus, dev, 0, 0);
-		if (val != 0xffffffff)
-			break;
-	}
-	return (val != 0xffffffff);
-}
-
-/*
- * calgary_init_bitmap_from_tce_table():
- * Function for kdump case. In the second/kdump kernel initialize
- * the bitmap based on the tce table entries obtained from first kernel
- */
-static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
-{
-	u64 *tp;
-	unsigned int index;
-	tp = ((u64 *)tbl->it_base);
-	for (index = 0 ; index < tbl->it_size; index++) {
-		if (*tp != 0x0)
-			set_bit(index, tbl->it_map);
-		tp++;
-	}
-}
-
-/*
- * get_tce_space_from_tar():
- * Function for kdump case. Get the tce tables from first kernel
- * by reading the contents of the base address register of calgary iommu
- */
-static void __init get_tce_space_from_tar(void)
-{
-	int bus;
-	void __iomem *target;
-	unsigned long tce_space;
-
-	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
-		struct calgary_bus_info *info = &bus_info[bus];
-		unsigned short pci_device;
-		u32 val;
-
-		val = read_pci_config(bus, 0, 0, 0);
-		pci_device = (val & 0xFFFF0000) >> 16;
-
-		if (!is_cal_pci_dev(pci_device))
-			continue;
-		if (info->translation_disabled)
-			continue;
-
-		if (calgary_bus_has_devices(bus, pci_device) ||
-						translate_empty_slots) {
-			target = calgary_reg(bus_info[bus].bbar,
-						tar_offset(bus));
-			tce_space = be64_to_cpu(readq(target));
-			tce_space = tce_space & TAR_SW_BITS;
-
-			tce_space = tce_space & (~specified_table_size);
-			info->tce_space = (u64 *)__va(tce_space);
-		}
-	}
-	return;
-}
-
-static int __init calgary_iommu_init(void)
-{
-	int ret;
-
-	/* ok, we're trying to use Calgary - let's roll */
-	printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
-
-	ret = calgary_init();
-	if (ret) {
-		printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
-		       "falling back to no_iommu\n", ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-int __init detect_calgary(void)
-{
-	int bus;
-	void *tbl;
-	int calgary_found = 0;
-	unsigned long ptr;
-	unsigned int offset, prev_offset;
-	int ret;
-
-	/*
-	 * if the user specified iommu=off or iommu=soft or we found
-	 * another HW IOMMU already, bail out.
-	 */
-	if (no_iommu || iommu_detected)
-		return -ENODEV;
-
-	if (!use_calgary)
-		return -ENODEV;
-
-	if (!early_pci_allowed())
-		return -ENODEV;
-
-	printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n");
-
-	ptr = (unsigned long)phys_to_virt(get_bios_ebda());
-
-	rio_table_hdr = NULL;
-	prev_offset = 0;
-	offset = 0x180;
-	/*
-	 * The next offset is stored in the 1st word.
-	 * Only parse up until the offset increases:
-	 */
-	while (offset > prev_offset) {
-		/* The block id is stored in the 2nd word */
-		if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
-			/* set the pointer past the offset & block id */
-			rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
-			break;
-		}
-		prev_offset = offset;
-		offset = *((unsigned short *)(ptr + offset));
-	}
-	if (!rio_table_hdr) {
-		printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table "
-		       "in EBDA - bailing!\n");
-		return -ENODEV;
-	}
-
-	ret = build_detail_arrays();
-	if (ret) {
-		printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret);
-		return -ENOMEM;
-	}
-
-	specified_table_size = determine_tce_table_size();
-
-	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
-		struct calgary_bus_info *info = &bus_info[bus];
-		unsigned short pci_device;
-		u32 val;
-
-		val = read_pci_config(bus, 0, 0, 0);
-		pci_device = (val & 0xFFFF0000) >> 16;
-
-		if (!is_cal_pci_dev(pci_device))
-			continue;
-
-		if (info->translation_disabled)
-			continue;
-
-		if (calgary_bus_has_devices(bus, pci_device) ||
-		    translate_empty_slots) {
-			/*
-			 * If it is kdump kernel, find and use tce tables
-			 * from first kernel, else allocate tce tables here
-			 */
-			if (!is_kdump_kernel()) {
-				tbl = alloc_tce_table();
-				if (!tbl)
-					goto cleanup;
-				info->tce_space = tbl;
-			}
-			calgary_found = 1;
-		}
-	}
-
-	printk(KERN_DEBUG "Calgary: finished detection, Calgary %s\n",
-	       calgary_found ? "found" : "not found");
-
-	if (calgary_found) {
-		iommu_detected = 1;
-		calgary_detected = 1;
-		printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
-		printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
-		       specified_table_size);
-
-		x86_init.iommu.iommu_init = calgary_iommu_init;
-	}
-	return calgary_found;
-
-cleanup:
-	for (--bus; bus >= 0; --bus) {
-		struct calgary_bus_info *info = &bus_info[bus];
-
-		if (info->tce_space)
-			free_tce_table(info->tce_space);
-	}
-	return -ENOMEM;
-}
-
-static int __init calgary_parse_options(char *p)
-{
-	unsigned int bridge;
-	unsigned long val;
-	size_t len;
-	ssize_t ret;
-
-	while (*p) {
-		if (!strncmp(p, "64k", 3))
-			specified_table_size = TCE_TABLE_SIZE_64K;
-		else if (!strncmp(p, "128k", 4))
-			specified_table_size = TCE_TABLE_SIZE_128K;
-		else if (!strncmp(p, "256k", 4))
-			specified_table_size = TCE_TABLE_SIZE_256K;
-		else if (!strncmp(p, "512k", 4))
-			specified_table_size = TCE_TABLE_SIZE_512K;
-		else if (!strncmp(p, "1M", 2))
-			specified_table_size = TCE_TABLE_SIZE_1M;
-		else if (!strncmp(p, "2M", 2))
-			specified_table_size = TCE_TABLE_SIZE_2M;
-		else if (!strncmp(p, "4M", 2))
-			specified_table_size = TCE_TABLE_SIZE_4M;
-		else if (!strncmp(p, "8M", 2))
-			specified_table_size = TCE_TABLE_SIZE_8M;
-
-		len = strlen("translate_empty_slots");
-		if (!strncmp(p, "translate_empty_slots", len))
-			translate_empty_slots = 1;
-
-		len = strlen("disable");
-		if (!strncmp(p, "disable", len)) {
-			p += len;
-			if (*p == '=')
-				++p;
-			if (*p == '\0')
-				break;
-			ret = kstrtoul(p, 0, &val);
-			if (ret)
-				break;
-
-			bridge = val;
-			if (bridge < MAX_PHB_BUS_NUM) {
-				printk(KERN_INFO "Calgary: disabling "
-				       "translation for PHB %#x\n", bridge);
-				bus_info[bridge].translation_disabled = 1;
-			}
-		}
-
-		p = strpbrk(p, ",");
-		if (!p)
-			break;
-
-		p++; /* skip ',' */
-	}
-	return 1;
-}
-__setup("calgary=", calgary_parse_options);
-
-static void __init calgary_fixup_one_tce_space(struct pci_dev *dev)
-{
-	struct iommu_table *tbl;
-	unsigned int npages;
-	int i;
-
-	tbl = pci_iommu(dev->bus);
-
-	for (i = 0; i < 4; i++) {
-		struct resource *r = &dev->resource[PCI_BRIDGE_RESOURCES + i];
-
-		/* Don't give out TCEs that map MEM resources */
-		if (!(r->flags & IORESOURCE_MEM))
-			continue;
-
-		/* 0-based? we reserve the whole 1st MB anyway */
-		if (!r->start)
-			continue;
-
-		/* cover the whole region */
-		npages = resource_size(r) >> PAGE_SHIFT;
-		npages++;
-
-		iommu_range_reserve(tbl, r->start, npages);
-	}
-}
-
-static int __init calgary_fixup_tce_spaces(void)
-{
-	struct pci_dev *dev = NULL;
-	struct calgary_bus_info *info;
-
-	if (no_iommu || swiotlb || !calgary_detected)
-		return -ENODEV;
-
-	printk(KERN_DEBUG "Calgary: fixing up tce spaces\n");
-
-	do {
-		dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
-		if (!dev)
-			break;
-		if (!is_cal_pci_dev(dev->device))
-			continue;
-
-		info = &bus_info[dev->bus->number];
-		if (info->translation_disabled)
-			continue;
-
-		if (!info->tce_space)
-			continue;
-
-		calgary_fixup_one_tce_space(dev);
-
-	} while (1);
-
-	return 0;
-}
-
-/*
- * We need to be call after pcibios_assign_resources (fs_initcall level)
- * and before device_initcall.
- */
-rootfs_initcall(calgary_fixup_tce_spaces);
-
-IOMMU_INIT_POST(detect_calgary);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index fa4352dce491..5dcedad21dff 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -12,7 +12,6 @@
 #include <asm/dma.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
-#include <asm/calgary.h>
 #include <asm/x86_init.h>
 #include <asm/iommu_table.h>
 
@@ -112,11 +111,6 @@ static __init int iommu_setup(char *p)
 
 		gart_parse_options(p);
 
-#ifdef CONFIG_CALGARY_IOMMU
-		if (!strncmp(p, "calgary", 7))
-			use_calgary = 1;
-#endif /* CONFIG_CALGARY_IOMMU */
-
 		p += strcspn(p, ",");
 		if (*p == ',')
 			++p;
@@ -146,7 +140,7 @@ rootfs_initcall(pci_iommu_init);
 
 static int via_no_dac_cb(struct pci_dev *pdev, void *data)
 {
-	pdev->dev.bus_dma_mask = DMA_BIT_MASK(32);
+	pdev->dev.bus_dma_limit = DMA_BIT_MASK(32);
 	return 0;
 }
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5e94c4354d4e..839b5244e3b7 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -41,6 +41,7 @@
 #include <asm/desc.h>
 #include <asm/prctl.h>
 #include <asm/spec-ctrl.h>
+#include <asm/io_bitmap.h>
 #include <asm/proto.h>
 
 #include "process.h"
@@ -72,18 +73,9 @@ __visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
 #ifdef CONFIG_X86_32
 		.ss0 = __KERNEL_DS,
 		.ss1 = __KERNEL_CS,
-		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
 #endif
+		.io_bitmap_base	= IO_BITMAP_OFFSET_INVALID,
 	 },
-#ifdef CONFIG_X86_32
-	 /*
-	  * Note that the .io_bitmap member must be extra-big. This is because
-	  * the CPU will access an additional byte beyond the end of the IO
-	  * permission bitmap. The extra byte must be all 1 bits, and must
-	  * be within the limit.
-	  */
-	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
-#endif
 };
 EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
 
@@ -110,28 +102,89 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 void exit_thread(struct task_struct *tsk)
 {
 	struct thread_struct *t = &tsk->thread;
-	unsigned long *bp = t->io_bitmap_ptr;
 	struct fpu *fpu = &t->fpu;
 
-	if (bp) {
-		struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
-
-		t->io_bitmap_ptr = NULL;
-		clear_thread_flag(TIF_IO_BITMAP);
-		/*
-		 * Careful, clear this in the TSS too:
-		 */
-		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
-		t->io_bitmap_max = 0;
-		put_cpu();
-		kfree(bp);
-	}
+	if (test_thread_flag(TIF_IO_BITMAP))
+		io_bitmap_exit();
 
 	free_vm86(t);
 
 	fpu__drop(fpu);
 }
 
+static int set_new_tls(struct task_struct *p, unsigned long tls)
+{
+	struct user_desc __user *utls = (struct user_desc __user *)tls;
+
+	if (in_ia32_syscall())
+		return do_set_thread_area(p, -1, utls, 0);
+	else
+		return do_set_thread_area_64(p, ARCH_SET_FS, tls);
+}
+
+int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+		    unsigned long arg, struct task_struct *p, unsigned long tls)
+{
+	struct inactive_task_frame *frame;
+	struct fork_frame *fork_frame;
+	struct pt_regs *childregs;
+	int ret = 0;
+
+	childregs = task_pt_regs(p);
+	fork_frame = container_of(childregs, struct fork_frame, regs);
+	frame = &fork_frame->frame;
+
+	frame->bp = 0;
+	frame->ret_addr = (unsigned long) ret_from_fork;
+	p->thread.sp = (unsigned long) fork_frame;
+	p->thread.io_bitmap = NULL;
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
+#ifdef CONFIG_X86_64
+	savesegment(gs, p->thread.gsindex);
+	p->thread.gsbase = p->thread.gsindex ? 0 : current->thread.gsbase;
+	savesegment(fs, p->thread.fsindex);
+	p->thread.fsbase = p->thread.fsindex ? 0 : current->thread.fsbase;
+	savesegment(es, p->thread.es);
+	savesegment(ds, p->thread.ds);
+#else
+	p->thread.sp0 = (unsigned long) (childregs + 1);
+	/*
+	 * Clear all status flags including IF and set fixed bit. 64bit
+	 * does not have this initialization as the frame does not contain
+	 * flags. The flags consistency (especially vs. AC) is there
+	 * ensured via objtool, which lacks 32bit support.
+	 */
+	frame->flags = X86_EFLAGS_FIXED;
+#endif
+
+	/* Kernel thread ? */
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		kthread_frame_init(frame, sp, arg);
+		return 0;
+	}
+
+	frame->bx = 0;
+	*childregs = *current_pt_regs();
+	childregs->ax = 0;
+	if (sp)
+		childregs->sp = sp;
+
+#ifdef CONFIG_X86_32
+	task_user_gs(p) = get_user_gs(current_pt_regs());
+#endif
+
+	/* Set a new TLS for the child thread? */
+	if (clone_flags & CLONE_SETTLS)
+		ret = set_new_tls(p, tls);
+
+	if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP)))
+		io_bitmap_share(p);
+
+	return ret;
+}
+
 void flush_thread(void)
 {
 	struct task_struct *tsk = current;
@@ -269,31 +322,96 @@ void arch_setup_new_exec(void)
 	}
 }
 
-static inline void switch_to_bitmap(struct thread_struct *prev,
-				    struct thread_struct *next,
-				    unsigned long tifp, unsigned long tifn)
+#ifdef CONFIG_X86_IOPL_IOPERM
+static inline void tss_invalidate_io_bitmap(struct tss_struct *tss)
+{
+	/*
+	 * Invalidate the I/O bitmap by moving io_bitmap_base outside the
+	 * TSS limit so any subsequent I/O access from user space will
+	 * trigger a #GP.
+	 *
+	 * This is correct even when VMEXIT rewrites the TSS limit
+	 * to 0x67 as the only requirement is that the base points
+	 * outside the limit.
+	 */
+	tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
+}
+
+static inline void switch_to_bitmap(unsigned long tifp)
+{
+	/*
+	 * Invalidate I/O bitmap if the previous task used it. This prevents
+	 * any possible leakage of an active I/O bitmap.
+	 *
+	 * If the next task has an I/O bitmap it will handle it on exit to
+	 * user mode.
+	 */
+	if (tifp & _TIF_IO_BITMAP)
+		tss_invalidate_io_bitmap(this_cpu_ptr(&cpu_tss_rw));
+}
+
+static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
+{
+	/*
+	 * Copy at least the byte range of the incoming tasks bitmap which
+	 * covers the permitted I/O ports.
+	 *
+	 * If the previous task which used an I/O bitmap had more bits
+	 * permitted, then the copy needs to cover those as well so they
+	 * get turned off.
+	 */
+	memcpy(tss->io_bitmap.bitmap, iobm->bitmap,
+	       max(tss->io_bitmap.prev_max, iobm->max));
+
+	/*
+	 * Store the new max and the sequence number of this bitmap
+	 * and a pointer to the bitmap itself.
+	 */
+	tss->io_bitmap.prev_max = iobm->max;
+	tss->io_bitmap.prev_sequence = iobm->sequence;
+}
+
+/**
+ * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
+ */
+void tss_update_io_bitmap(void)
 {
 	struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+	struct thread_struct *t = &current->thread;
+	u16 *base = &tss->x86_tss.io_bitmap_base;
+
+	if (!test_thread_flag(TIF_IO_BITMAP)) {
+		tss_invalidate_io_bitmap(tss);
+		return;
+	}
+
+	if (IS_ENABLED(CONFIG_X86_IOPL_IOPERM) && t->iopl_emul == 3) {
+		*base = IO_BITMAP_OFFSET_VALID_ALL;
+	} else {
+		struct io_bitmap *iobm = t->io_bitmap;
 
-	if (tifn & _TIF_IO_BITMAP) {
-		/*
-		 * Copy the relevant range of the IO bitmap.
-		 * Normally this is 128 bytes or less:
-		 */
-		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
-		       max(prev->io_bitmap_max, next->io_bitmap_max));
-		/*
-		 * Make sure that the TSS limit is correct for the CPU
-		 * to notice the IO bitmap.
-		 */
-		refresh_tss_limit();
-	} else if (tifp & _TIF_IO_BITMAP) {
 		/*
-		 * Clear any possible leftover bits:
+		 * Only copy bitmap data when the sequence number differs. The
+		 * update time is accounted to the incoming task.
 		 */
-		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
+		if (tss->io_bitmap.prev_sequence != iobm->sequence)
+			tss_copy_io_bitmap(tss, iobm);
+
+		/* Enable the bitmap */
+		*base = IO_BITMAP_OFFSET_VALID_MAP;
 	}
+
+	/*
+	 * Make sure that the TSS limit is covering the IO bitmap. It might have
+	 * been cut down by a VMEXIT to 0x67 which would cause a subsequent I/O
+	 * access from user space to trigger a #GP because tbe bitmap is outside
+	 * the TSS limit.
+	 */
+	refresh_tss_limit();
 }
+#else /* CONFIG_X86_IOPL_IOPERM */
+static inline void switch_to_bitmap(unsigned long tifp) { }
+#endif
 
 #ifdef CONFIG_SMP
 
@@ -497,15 +615,12 @@ void speculation_ctrl_update_current(void)
 
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
 {
-	struct thread_struct *prev, *next;
 	unsigned long tifp, tifn;
 
-	prev = &prev_p->thread;
-	next = &next_p->thread;
-
 	tifn = READ_ONCE(task_thread_info(next_p)->flags);
 	tifp = READ_ONCE(task_thread_info(prev_p)->flags);
-	switch_to_bitmap(prev, next, tifp, tifn);
+
+	switch_to_bitmap(tifp);
 
 	propagate_user_return_notify(prev_p, next_p);
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index b8ceec4974fe..5052ced43373 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -112,74 +112,6 @@ void release_thread(struct task_struct *dead_task)
 	release_vm86_irqs(dead_task);
 }
 
-int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
-	unsigned long arg, struct task_struct *p, unsigned long tls)
-{
-	struct pt_regs *childregs = task_pt_regs(p);
-	struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
-	struct inactive_task_frame *frame = &fork_frame->frame;
-	struct task_struct *tsk;
-	int err;
-
-	/*
-	 * For a new task use the RESET flags value since there is no before.
-	 * All the status flags are zero; DF and all the system flags must also
-	 * be 0, specifically IF must be 0 because we context switch to the new
-	 * task with interrupts disabled.
-	 */
-	frame->flags = X86_EFLAGS_FIXED;
-	frame->bp = 0;
-	frame->ret_addr = (unsigned long) ret_from_fork;
-	p->thread.sp = (unsigned long) fork_frame;
-	p->thread.sp0 = (unsigned long) (childregs+1);
-	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
-
-	if (unlikely(p->flags & PF_KTHREAD)) {
-		/* kernel thread */
-		memset(childregs, 0, sizeof(struct pt_regs));
-		frame->bx = sp;		/* function */
-		frame->di = arg;
-		p->thread.io_bitmap_ptr = NULL;
-		return 0;
-	}
-	frame->bx = 0;
-	*childregs = *current_pt_regs();
-	childregs->ax = 0;
-	if (sp)
-		childregs->sp = sp;
-
-	task_user_gs(p) = get_user_gs(current_pt_regs());
-
-	p->thread.io_bitmap_ptr = NULL;
-	tsk = current;
-	err = -ENOMEM;
-
-	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
-		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
-						IO_BITMAP_BYTES, GFP_KERNEL);
-		if (!p->thread.io_bitmap_ptr) {
-			p->thread.io_bitmap_max = 0;
-			return -ENOMEM;
-		}
-		set_tsk_thread_flag(p, TIF_IO_BITMAP);
-	}
-
-	err = 0;
-
-	/*
-	 * Set a new TLS for the child thread?
-	 */
-	if (clone_flags & CLONE_SETTLS)
-		err = do_set_thread_area(p, -1,
-			(struct user_desc __user *)tls, 0);
-
-	if (err && p->thread.io_bitmap_ptr) {
-		kfree(p->thread.io_bitmap_ptr);
-		p->thread.io_bitmap_max = 0;
-	}
-	return err;
-}
-
 void
 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 {
@@ -192,7 +124,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
 	regs->flags		= X86_EFLAGS_IF;
-	force_iret();
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
@@ -255,15 +186,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	load_TLS(next, cpu);
 
-	/*
-	 * Restore IOPL if needed.  In normal use, the flags restore
-	 * in the switch assembly will handle this.  But if the kernel
-	 * is running virtualized at a non-zero CPL, the popf will
-	 * not restore flags, so it must be done in a separate step.
-	 */
-	if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
-		set_iopl_mask(next->iopl);
-
 	switch_to_extra(prev_p, next_p);
 
 	/*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index af64519b2695..ffd497804dbc 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -371,81 +371,6 @@ void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
 	task->thread.gsbase = gsbase;
 }
 
-int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
-		unsigned long arg, struct task_struct *p, unsigned long tls)
-{
-	int err;
-	struct pt_regs *childregs;
-	struct fork_frame *fork_frame;
-	struct inactive_task_frame *frame;
-	struct task_struct *me = current;
-
-	childregs = task_pt_regs(p);
-	fork_frame = container_of(childregs, struct fork_frame, regs);
-	frame = &fork_frame->frame;
-
-	frame->bp = 0;
-	frame->ret_addr = (unsigned long) ret_from_fork;
-	p->thread.sp = (unsigned long) fork_frame;
-	p->thread.io_bitmap_ptr = NULL;
-
-	savesegment(gs, p->thread.gsindex);
-	p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
-	savesegment(fs, p->thread.fsindex);
-	p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
-	savesegment(es, p->thread.es);
-	savesegment(ds, p->thread.ds);
-	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
-
-	if (unlikely(p->flags & PF_KTHREAD)) {
-		/* kernel thread */
-		memset(childregs, 0, sizeof(struct pt_regs));
-		frame->bx = sp;		/* function */
-		frame->r12 = arg;
-		return 0;
-	}
-	frame->bx = 0;
-	*childregs = *current_pt_regs();
-
-	childregs->ax = 0;
-	if (sp)
-		childregs->sp = sp;
-
-	err = -ENOMEM;
-	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
-		p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
-						  IO_BITMAP_BYTES, GFP_KERNEL);
-		if (!p->thread.io_bitmap_ptr) {
-			p->thread.io_bitmap_max = 0;
-			return -ENOMEM;
-		}
-		set_tsk_thread_flag(p, TIF_IO_BITMAP);
-	}
-
-	/*
-	 * Set a new TLS for the child thread?
-	 */
-	if (clone_flags & CLONE_SETTLS) {
-#ifdef CONFIG_IA32_EMULATION
-		if (in_ia32_syscall())
-			err = do_set_thread_area(p, -1,
-				(struct user_desc __user *)tls, 0);
-		else
-#endif
-			err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
-		if (err)
-			goto out;
-	}
-	err = 0;
-out:
-	if (err && p->thread.io_bitmap_ptr) {
-		kfree(p->thread.io_bitmap_ptr);
-		p->thread.io_bitmap_max = 0;
-	}
-
-	return err;
-}
-
 static void
 start_thread_common(struct pt_regs *regs, unsigned long new_ip,
 		    unsigned long new_sp,
@@ -469,7 +394,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
 	regs->cs		= _cs;
 	regs->ss		= _ss;
 	regs->flags		= X86_EFLAGS_IF;
-	force_iret();
 }
 
 void
@@ -572,17 +496,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	switch_to_extra(prev_p, next_p);
 
-#ifdef CONFIG_XEN_PV
-	/*
-	 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
-	 * current_pt_regs()->flags may not match the current task's
-	 * intended IOPL.  We need to switch it manually.
-	 */
-	if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
-		     prev->iopl != next->iopl))
-		xen_set_iopl_mask(next->iopl);
-#endif
-
 	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
 		/*
 		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 3c5bbe8e4120..f0e1ddbc2fd7 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -42,6 +42,7 @@
 #include <asm/traps.h>
 #include <asm/syscall.h>
 #include <asm/fsgsbase.h>
+#include <asm/io_bitmap.h>
 
 #include "tls.h"
 
@@ -181,6 +182,9 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
 static int set_segment_reg(struct task_struct *task,
 			   unsigned long offset, u16 value)
 {
+	if (WARN_ON_ONCE(task == current))
+		return -EIO;
+
 	/*
 	 * The value argument was already truncated to 16 bits.
 	 */
@@ -208,10 +212,7 @@ static int set_segment_reg(struct task_struct *task,
 		break;
 
 	case offsetof(struct user_regs_struct, gs):
-		if (task == current)
-			set_user_gs(task_pt_regs(task), value);
-		else
-			task_user_gs(task) = value;
+		task_user_gs(task) = value;
 	}
 
 	return 0;
@@ -271,32 +272,41 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
 static int set_segment_reg(struct task_struct *task,
 			   unsigned long offset, u16 value)
 {
+	if (WARN_ON_ONCE(task == current))
+		return -EIO;
+
 	/*
 	 * The value argument was already truncated to 16 bits.
 	 */
 	if (invalid_selector(value))
 		return -EIO;
 
+	/*
+	 * This function has some ABI oddities.
+	 *
+	 * A 32-bit ptracer probably expects that writing FS or GS will change
+	 * FSBASE or GSBASE respectively.  In the absence of FSGSBASE support,
+	 * this code indeed has that effect.  When FSGSBASE is added, this
+	 * will require a special case.
+	 *
+	 * For existing 64-bit ptracers, writing FS or GS *also* currently
+	 * changes the base if the selector is nonzero the next time the task
+	 * is run.  This behavior may not be needed, and trying to preserve it
+	 * when FSGSBASE is added would be complicated at best.
+	 */
+
 	switch (offset) {
 	case offsetof(struct user_regs_struct,fs):
 		task->thread.fsindex = value;
-		if (task == current)
-			loadsegment(fs, task->thread.fsindex);
 		break;
 	case offsetof(struct user_regs_struct,gs):
 		task->thread.gsindex = value;
-		if (task == current)
-			load_gs_index(task->thread.gsindex);
 		break;
 	case offsetof(struct user_regs_struct,ds):
 		task->thread.ds = value;
-		if (task == current)
-			loadsegment(ds, task->thread.ds);
 		break;
 	case offsetof(struct user_regs_struct,es):
 		task->thread.es = value;
-		if (task == current)
-			loadsegment(es, task->thread.es);
 		break;
 
 		/*
@@ -374,6 +384,9 @@ static int putreg(struct task_struct *child,
 		 * When changing the FS base, use do_arch_prctl_64()
 		 * to set the index to zero and to set the base
 		 * as requested.
+		 *
+		 * NB: This behavior is nonsensical and likely needs to
+		 * change when FSGSBASE support is added.
 		 */
 		if (child->thread.fsbase != value)
 			return do_arch_prctl_64(child, ARCH_SET_FS, value);
@@ -697,7 +710,9 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n,
 static int ioperm_active(struct task_struct *target,
 			 const struct user_regset *regset)
 {
-	return target->thread.io_bitmap_max / regset->size;
+	struct io_bitmap *iobm = target->thread.io_bitmap;
+
+	return iobm ? DIV_ROUND_UP(iobm->max, regset->size) : 0;
 }
 
 static int ioperm_get(struct task_struct *target,
@@ -705,12 +720,13 @@ static int ioperm_get(struct task_struct *target,
 		      unsigned int pos, unsigned int count,
 		      void *kbuf, void __user *ubuf)
 {
-	if (!target->thread.io_bitmap_ptr)
+	struct io_bitmap *iobm = target->thread.io_bitmap;
+
+	if (!iobm)
 		return -ENXIO;
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   target->thread.io_bitmap_ptr,
-				   0, IO_BITMAP_BYTES);
+				   iobm->bitmap, 0, IO_BITMAP_BYTES);
 }
 
 /*
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 1daf8f2aa21f..896d74cb5081 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -110,7 +110,7 @@ static void ich_force_enable_hpet(struct pci_dev *dev)
 	}
 
 	/* use bits 31:14, 16 kB aligned */
-	rcba_base = ioremap_nocache(rcba, 0x4000);
+	rcba_base = ioremap(rcba, 0x4000);
 	if (rcba_base == NULL) {
 		dev_printk(KERN_DEBUG, &dev->dev, "ioremap failed; "
 			"cannot force enable HPET\n");
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index ee26df08002e..94b33885f8d2 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -35,8 +35,7 @@
 #define CP_PA_BACKUP_PAGES_MAP	DATA(0x1c)
 
 	.text
-	.globl relocate_kernel
-relocate_kernel:
+SYM_CODE_START_NOALIGN(relocate_kernel)
 	/* Save the CPU context, used for jumping back */
 
 	pushl	%ebx
@@ -93,8 +92,9 @@ relocate_kernel:
 	addl    $(identity_mapped - relocate_kernel), %eax
 	pushl   %eax
 	ret
+SYM_CODE_END(relocate_kernel)
 
-identity_mapped:
+SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
 	/* set return address to 0 if not preserving context */
 	pushl	$0
 	/* store the start address on the stack */
@@ -191,8 +191,9 @@ identity_mapped:
 	addl	$(virtual_mapped - relocate_kernel), %eax
 	pushl	%eax
 	ret
+SYM_CODE_END(identity_mapped)
 
-virtual_mapped:
+SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
 	movl	CR4(%edi), %eax
 	movl	%eax, %cr4
 	movl	CR3(%edi), %eax
@@ -208,9 +209,10 @@ virtual_mapped:
 	popl	%esi
 	popl	%ebx
 	ret
+SYM_CODE_END(virtual_mapped)
 
 	/* Do the copies */
-swap_pages:
+SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
 	movl	8(%esp), %edx
 	movl	4(%esp), %ecx
 	pushl	%ebp
@@ -270,6 +272,7 @@ swap_pages:
 	popl	%ebx
 	popl	%ebp
 	ret
+SYM_CODE_END(swap_pages)
 
 	.globl kexec_control_code_size
 .set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index c51ccff5cd01..ef3ba99068d3 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -38,8 +38,7 @@
 	.text
 	.align PAGE_SIZE
 	.code64
-	.globl relocate_kernel
-relocate_kernel:
+SYM_CODE_START_NOALIGN(relocate_kernel)
 	/*
 	 * %rdi indirection_page
 	 * %rsi page_list
@@ -103,8 +102,9 @@ relocate_kernel:
 	addq	$(identity_mapped - relocate_kernel), %r8
 	pushq	%r8
 	ret
+SYM_CODE_END(relocate_kernel)
 
-identity_mapped:
+SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
 	/* set return address to 0 if not preserving context */
 	pushq	$0
 	/* store the start address on the stack */
@@ -209,8 +209,9 @@ identity_mapped:
 	movq	$virtual_mapped, %rax
 	pushq	%rax
 	ret
+SYM_CODE_END(identity_mapped)
 
-virtual_mapped:
+SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
 	movq	RSP(%r8), %rsp
 	movq	CR4(%r8), %rax
 	movq	%rax, %cr4
@@ -228,9 +229,10 @@ virtual_mapped:
 	popq	%rbp
 	popq	%rbx
 	ret
+SYM_CODE_END(virtual_mapped)
 
 	/* Do the copies */
-swap_pages:
+SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
 	movq	%rdi, %rcx 	/* Put the page_list in %rcx */
 	xorl	%edi, %edi
 	xorl	%esi, %esi
@@ -283,6 +285,7 @@ swap_pages:
 	jmp	0b
 3:
 	ret
+SYM_CODE_END(swap_pages)
 
 	.globl kexec_control_code_size
 .set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6cf206806be0..a74262c71484 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -2,130 +2,54 @@
 /*
  *  Copyright (C) 1995  Linus Torvalds
  *
- *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
- *
- *  Memory region support
- *	David Parsons <orc@pell.chi.il.us>, July-August 1999
- *
- *  Added E820 sanitization routine (removes overlapping memory regions);
- *  Brian Moyle <bmoyle@mvista.com>, February 2001
- *
- * Moved CPU detection code to cpu/${cpu}.c
- *    Patrick Mochel <mochel@osdl.org>, March 2002
- *
- *  Provisions for empty E820 memory regions (reported by certain BIOSes).
- *  Alex Achenbach <xela@slit.de>, December 2002.
- *
+ * This file contains the setup_arch() code, which handles the architecture-dependent
+ * parts of early kernel initialization.
  */
-
-/*
- * This file handles the architecture-dependent parts of initialization
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/screen_info.h>
-#include <linux/ioport.h>
-#include <linux/acpi.h>
-#include <linux/sfi.h>
-#include <linux/apm_bios.h>
-#include <linux/initrd.h>
-#include <linux/memblock.h>
-#include <linux/seq_file.h>
 #include <linux/console.h>
-#include <linux/root_dev.h>
-#include <linux/highmem.h>
-#include <linux/export.h>
+#include <linux/crash_dump.h>
+#include <linux/dmi.h>
 #include <linux/efi.h>
-#include <linux/init.h>
-#include <linux/edd.h>
+#include <linux/init_ohci1394_dma.h>
+#include <linux/initrd.h>
 #include <linux/iscsi_ibft.h>
-#include <linux/nodemask.h>
-#include <linux/kexec.h>
-#include <linux/dmi.h>
-#include <linux/pfn.h>
+#include <linux/memblock.h>
 #include <linux/pci.h>
-#include <asm/pci-direct.h>
-#include <linux/init_ohci1394_dma.h>
-#include <linux/kvm_para.h>
-#include <linux/dma-contiguous.h>
-#include <xen/xen.h>
-#include <uapi/linux/mount.h>
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/delay.h>
-
-#include <linux/kallsyms.h>
-#include <linux/cpufreq.h>
-#include <linux/dma-mapping.h>
-#include <linux/ctype.h>
-#include <linux/uaccess.h>
-
-#include <linux/percpu.h>
-#include <linux/crash_dump.h>
+#include <linux/root_dev.h>
+#include <linux/sfi.h>
 #include <linux/tboot.h>
-#include <linux/jiffies.h>
-#include <linux/mem_encrypt.h>
-#include <linux/sizes.h>
-
 #include <linux/usb/xhci-dbgp.h>
-#include <video/edid.h>
 
-#include <asm/mtrr.h>
+#include <uapi/linux/mount.h>
+
+#include <xen/xen.h>
+
 #include <asm/apic.h>
-#include <asm/realmode.h>
-#include <asm/e820/api.h>
-#include <asm/mpspec.h>
-#include <asm/setup.h>
-#include <asm/efi.h>
-#include <asm/timer.h>
-#include <asm/i8259.h>
-#include <asm/sections.h>
-#include <asm/io_apic.h>
-#include <asm/ist.h>
-#include <asm/setup_arch.h>
 #include <asm/bios_ebda.h>
-#include <asm/cacheflush.h>
-#include <asm/processor.h>
 #include <asm/bugs.h>
-#include <asm/kasan.h>
-
-#include <asm/vsyscall.h>
 #include <asm/cpu.h>
-#include <asm/desc.h>
-#include <asm/dma.h>
-#include <asm/iommu.h>
+#include <asm/efi.h>
 #include <asm/gart.h>
-#include <asm/mmu_context.h>
-#include <asm/proto.h>
-
-#include <asm/paravirt.h>
 #include <asm/hypervisor.h>
-#include <asm/olpc_ofw.h>
-
-#include <asm/percpu.h>
-#include <asm/topology.h>
-#include <asm/apicdef.h>
-#include <asm/amd_nb.h>
+#include <asm/io_apic.h>
+#include <asm/kasan.h>
+#include <asm/kaslr.h>
 #include <asm/mce.h>
-#include <asm/alternative.h>
+#include <asm/mtrr.h>
+#include <asm/realmode.h>
+#include <asm/olpc_ofw.h>
+#include <asm/pci-direct.h>
 #include <asm/prom.h>
-#include <asm/microcode.h>
-#include <asm/kaslr.h>
+#include <asm/proto.h>
 #include <asm/unwind.h>
+#include <asm/vsyscall.h>
+#include <linux/vmalloc.h>
 
 /*
- * max_low_pfn_mapped: highest direct mapped pfn under 4GB
- * max_pfn_mapped:     highest direct mapped pfn over 4GB
+ * max_low_pfn_mapped: highest directly mapped pfn < 4 GB
+ * max_pfn_mapped:     highest directly mapped pfn > 4 GB
  *
  * The direct mapping only covers E820_TYPE_RAM regions, so the ranges and gaps are
- * represented by pfn_mapped
+ * represented by pfn_mapped[].
  */
 unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
@@ -135,14 +59,30 @@ RESERVE_BRK(dmi_alloc, 65536);
 #endif
 
 
-static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
-unsigned long _brk_end = (unsigned long)__brk_base;
+/*
+ * Range of the BSS area. The size of the BSS area is determined
+ * at link time, with RESERVE_BRK*() facility reserving additional
+ * chunks.
+ */
+static __initdata
+unsigned long _brk_start = (unsigned long)__brk_base;
+unsigned long _brk_end   = (unsigned long)__brk_base;
 
 struct boot_params boot_params;
 
 /*
- * Machine setup..
+ * These are the four main kernel memory regions, we put them into
+ * the resource tree so that kdump tools and other debugging tools
+ * recover it:
  */
+
+static struct resource rodata_resource = {
+	.name	= "Kernel rodata",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
+};
+
 static struct resource data_resource = {
 	.name	= "Kernel data",
 	.start	= 0,
@@ -166,16 +106,16 @@ static struct resource bss_resource = {
 
 
 #ifdef CONFIG_X86_32
-/* cpu data as detected by the assembly code in head_32.S */
+/* CPU data as detected by the assembly code in head_32.S */
 struct cpuinfo_x86 new_cpu_data;
 
-/* common cpu data for all cpus */
+/* Common CPU data for all CPUs */
 struct cpuinfo_x86 boot_cpu_data __read_mostly;
 EXPORT_SYMBOL(boot_cpu_data);
 
 unsigned int def_to_bigsmp;
 
-/* for MCA, but anyone else can use it if they want */
+/* For MCA, but anyone else can use it if they want */
 unsigned int machine_id;
 unsigned int machine_submodel_id;
 unsigned int BIOS_revision;
@@ -438,6 +378,12 @@ static void __init memblock_x86_reserve_range_setup_data(void)
 	while (pa_data) {
 		data = early_memremap(pa_data, sizeof(*data));
 		memblock_reserve(pa_data, sizeof(*data) + data->len);
+
+		if (data->type == SETUP_INDIRECT &&
+		    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
+			memblock_reserve(((struct setup_indirect *)data->data)->addr,
+					 ((struct setup_indirect *)data->data)->len);
+
 		pa_data = data->next;
 		early_memunmap(data, sizeof(*data));
 	}
@@ -455,15 +401,15 @@ static void __init memblock_x86_reserve_range_setup_data(void)
 /*
  * Keep the crash kernel below this limit.
  *
- * On 32 bits earlier kernels would limit the kernel to the low 512 MiB
+ * Earlier 32-bits kernels would limit the kernel to the low 512 MB range
  * due to mapping restrictions.
  *
- * On 64bit, kdump kernel need be restricted to be under 64TB, which is
- * the upper limit of system RAM in 4-level paing mode. Since the kdump
- * jumping could be from 5-level to 4-level, the jumping will fail if
- * kernel is put above 64TB, and there's no way to detect the paging mode
- * of the kernel which will be loaded for dumping during the 1st kernel
- * bootup.
+ * 64-bit kdump kernels need to be restricted to be under 64 TB, which is
+ * the upper limit of system RAM in 4-level paging mode. Since the kdump
+ * jump could be from 5-level paging to 4-level paging, the jump will fail if
+ * the kernel is put above 64 TB, and during the 1st kernel bootup there's
+ * no good way to detect the paging mode of the target kernel which will be
+ * loaded for dumping.
  */
 #ifdef CONFIG_X86_32
 # define CRASH_ADDR_LOW_MAX	SZ_512M
@@ -743,8 +689,8 @@ static void __init trim_bios_range(void)
 	e820__range_update(0, PAGE_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
 
 	/*
-	 * special case: Some BIOSen report the PC BIOS
-	 * area (640->1Mb) as ram even though it is not.
+	 * special case: Some BIOSes report the PC BIOS
+	 * area (640Kb -> 1Mb) as RAM even though it is not.
 	 * take them out.
 	 */
 	e820__range_remove(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_TYPE_RAM, 1);
@@ -874,7 +820,7 @@ void __init setup_arch(char **cmdline_p)
 	/*
 	 * Note: Quark X1000 CPUs advertise PGE incorrectly and require
 	 * a cr3 based tlb flush, so the following __flush_tlb_all()
-	 * will not flush anything because the cpu quirk which clears
+	 * will not flush anything because the CPU quirk which clears
 	 * X86_FEATURE_PGE has not been invoked yet. Though due to the
 	 * load_cr3() above the TLB has been flushed already. The
 	 * quirk is invoked before subsequent calls to __flush_tlb_all()
@@ -949,7 +895,9 @@ void __init setup_arch(char **cmdline_p)
 
 	code_resource.start = __pa_symbol(_text);
 	code_resource.end = __pa_symbol(_etext)-1;
-	data_resource.start = __pa_symbol(_etext);
+	rodata_resource.start = __pa_symbol(__start_rodata);
+	rodata_resource.end = __pa_symbol(__end_rodata)-1;
+	data_resource.start = __pa_symbol(_sdata);
 	data_resource.end = __pa_symbol(_edata)-1;
 	bss_resource.start = __pa_symbol(__bss_start);
 	bss_resource.end = __pa_symbol(__bss_stop)-1;
@@ -1038,6 +986,7 @@ void __init setup_arch(char **cmdline_p)
 
 	/* after parse_early_param, so could debug it */
 	insert_resource(&iomem_resource, &code_resource);
+	insert_resource(&iomem_resource, &rodata_resource);
 	insert_resource(&iomem_resource, &data_resource);
 	insert_resource(&iomem_resource, &bss_resource);
 
@@ -1120,17 +1069,15 @@ void __init setup_arch(char **cmdline_p)
 
 	reserve_bios_regions();
 
-	if (efi_enabled(EFI_MEMMAP)) {
-		efi_fake_memmap();
-		efi_find_mirror();
-		efi_esrt_init();
+	efi_fake_memmap();
+	efi_find_mirror();
+	efi_esrt_init();
 
-		/*
-		 * The EFI specification says that boot service code won't be
-		 * called after ExitBootServices(). This is, in fact, a lie.
-		 */
-		efi_reserve_boot_services();
-	}
+	/*
+	 * The EFI specification says that boot service code won't be
+	 * called after ExitBootServices(). This is, in fact, a lie.
+	 */
+	efi_reserve_boot_services();
 
 	/* preallocate 4k for mptable mpc */
 	e820__memblock_alloc_reserved_mpc_new();
@@ -1279,8 +1226,6 @@ void __init setup_arch(char **cmdline_p)
 #if defined(CONFIG_VGA_CONSOLE)
 	if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
 		conswitchp = &vga_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
 #endif
 #endif
 	x86_init.oem.banner();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 86663874ef04..e6d7894ad127 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -207,8 +207,8 @@ void __init setup_per_cpu_areas(void)
 					    pcpu_cpu_distance,
 					    pcpu_fc_alloc, pcpu_fc_free);
 		if (rc < 0)
-			pr_warning("%s allocator failed (%d), falling back to page size\n",
-				   pcpu_fc_names[pcpu_chosen_fc], rc);
+			pr_warn("%s allocator failed (%d), falling back to page size\n",
+				pcpu_fc_names[pcpu_chosen_fc], rc);
 	}
 	if (rc < 0)
 		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 8eb7193e158d..8a29573851a3 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -151,8 +151,6 @@ static int restore_sigcontext(struct pt_regs *regs,
 
 	err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32));
 
-	force_iret();
-
 	return err;
 }
 
diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c
index 01f0e2263b86..298fc1edd9c9 100644
--- a/arch/x86/kernel/sysfb_simplefb.c
+++ b/arch/x86/kernel/sysfb_simplefb.c
@@ -90,11 +90,11 @@ __init int create_simplefb(const struct screen_info *si,
 	if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
 		size <<= 16;
 	length = mode->height * mode->stride;
-	length = PAGE_ALIGN(length);
 	if (length > size) {
 		printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
 		return -EINVAL;
 	}
+	length = PAGE_ALIGN(length);
 
 	/* setup IORESOURCE_MEM as framebuffer memory */
 	memset(&res, 0, sizeof(res));
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index a49fe1dcb47e..b89f6ac6a0c0 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -57,7 +57,7 @@ void __init tboot_probe(void)
 	 */
 	if (!e820__mapped_any(boot_params.tboot_addr,
 			     boot_params.tboot_addr, E820_TYPE_RESERVED)) {
-		pr_warning("non-0 tboot_addr but it is not of type E820_TYPE_RESERVED\n");
+		pr_warn("non-0 tboot_addr but it is not of type E820_TYPE_RESERVED\n");
 		return;
 	}
 
@@ -65,13 +65,12 @@ void __init tboot_probe(void)
 	set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
 	tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);
 	if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) {
-		pr_warning("tboot at 0x%llx is invalid\n",
-			   boot_params.tboot_addr);
+		pr_warn("tboot at 0x%llx is invalid\n", boot_params.tboot_addr);
 		tboot = NULL;
 		return;
 	}
 	if (tboot->version < 5) {
-		pr_warning("tboot version is invalid: %u\n", tboot->version);
+		pr_warn("tboot version is invalid: %u\n", tboot->version);
 		tboot = NULL;
 		return;
 	}
@@ -289,7 +288,7 @@ static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
 
 	if (sleep_state >= ACPI_S_STATE_COUNT ||
 	    acpi_shutdown_map[sleep_state] == -1) {
-		pr_warning("unsupported sleep state 0x%x\n", sleep_state);
+		pr_warn("unsupported sleep state 0x%x\n", sleep_state);
 		return -1;
 	}
 
@@ -302,7 +301,7 @@ static int tboot_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b)
 	if (!tboot_enabled())
 		return 0;
 
-	pr_warning("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
+	pr_warn("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
 	return -ENODEV;
 }
 
@@ -320,7 +319,7 @@ static int tboot_wait_for_aps(int num_aps)
 	}
 
 	if (timeout)
-		pr_warning("tboot wait for APs timeout\n");
+		pr_warn("tboot wait for APs timeout\n");
 
 	return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
 }
@@ -355,7 +354,7 @@ static ssize_t tboot_log_read(struct file *file, char __user *user_buf, size_t c
 	void *kbuf;
 	int ret = -EFAULT;
 
-	log_base = ioremap_nocache(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE);
+	log_base = ioremap(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE);
 	if (!log_base)
 		return ret;
 
@@ -516,7 +515,7 @@ int tboot_force_iommu(void)
 		return 1;
 
 	if (no_iommu || swiotlb || dmar_disabled)
-		pr_warning("Forcing Intel-IOMMU to enabled\n");
+		pr_warn("Forcing Intel-IOMMU to enabled\n");
 
 	dmar_disabled = 0;
 #ifdef CONFIG_SWIOTLB
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
deleted file mode 100644
index 6384be751eff..000000000000
--- a/arch/x86/kernel/tce_64.c
+++ /dev/null
@@ -1,177 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * This file manages the translation entries for the IBM Calgary IOMMU.
- *
- * Derived from arch/powerpc/platforms/pseries/iommu.c
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Author: Jon Mason <jdmason@us.ibm.com>
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/memblock.h>
-#include <asm/tce.h>
-#include <asm/calgary.h>
-#include <asm/proto.h>
-#include <asm/cacheflush.h>
-
-/* flush a tce at 'tceaddr' to main memory */
-static inline void flush_tce(void* tceaddr)
-{
-	/* a single tce can't cross a cache line */
-	if (boot_cpu_has(X86_FEATURE_CLFLUSH))
-		clflush(tceaddr);
-	else
-		wbinvd();
-}
-
-void tce_build(struct iommu_table *tbl, unsigned long index,
-	unsigned int npages, unsigned long uaddr, int direction)
-{
-	u64* tp;
-	u64 t;
-	u64 rpn;
-
-	t = (1 << TCE_READ_SHIFT);
-	if (direction != DMA_TO_DEVICE)
-		t |= (1 << TCE_WRITE_SHIFT);
-
-	tp = ((u64*)tbl->it_base) + index;
-
-	while (npages--) {
-		rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT;
-		t &= ~TCE_RPN_MASK;
-		t |= (rpn << TCE_RPN_SHIFT);
-
-		*tp = cpu_to_be64(t);
-		flush_tce(tp);
-
-		uaddr += PAGE_SIZE;
-		tp++;
-	}
-}
-
-void tce_free(struct iommu_table *tbl, long index, unsigned int npages)
-{
-	u64* tp;
-
-	tp  = ((u64*)tbl->it_base) + index;
-
-	while (npages--) {
-		*tp = cpu_to_be64(0);
-		flush_tce(tp);
-		tp++;
-	}
-}
-
-static inline unsigned int table_size_to_number_of_entries(unsigned char size)
-{
-	/*
-	 * size is the order of the table, 0-7
-	 * smallest table is 8K entries, so shift result by 13 to
-	 * multiply by 8K
-	 */
-	return (1 << size) << 13;
-}
-
-static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl)
-{
-	unsigned int bitmapsz;
-	unsigned long bmppages;
-	int ret;
-
-	tbl->it_busno = dev->bus->number;
-
-	/* set the tce table size - measured in entries */
-	tbl->it_size = table_size_to_number_of_entries(specified_table_size);
-
-	/*
-	 * number of bytes needed for the bitmap size in number of
-	 * entries; we need one bit per entry
-	 */
-	bitmapsz = tbl->it_size / BITS_PER_BYTE;
-	bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz));
-	if (!bmppages) {
-		printk(KERN_ERR "Calgary: cannot allocate bitmap\n");
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	tbl->it_map = (unsigned long*)bmppages;
-
-	memset(tbl->it_map, 0, bitmapsz);
-
-	tbl->it_hint = 0;
-
-	spin_lock_init(&tbl->it_lock);
-
-	return 0;
-
-done:
-	return ret;
-}
-
-int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar)
-{
-	struct iommu_table *tbl;
-	int ret;
-
-	if (pci_iommu(dev->bus)) {
-		printk(KERN_ERR "Calgary: dev %p has sysdata->iommu %p\n",
-		       dev, pci_iommu(dev->bus));
-		BUG();
-	}
-
-	tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL);
-	if (!tbl) {
-		printk(KERN_ERR "Calgary: error allocating iommu_table\n");
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	ret = tce_table_setparms(dev, tbl);
-	if (ret)
-		goto free_tbl;
-
-	tbl->bbar = bbar;
-
-	set_pci_iommu(dev->bus, tbl);
-
-	return 0;
-
-free_tbl:
-	kfree(tbl);
-done:
-	return ret;
-}
-
-void * __init alloc_tce_table(void)
-{
-	unsigned int size;
-
-	size = table_size_to_number_of_entries(specified_table_size);
-	size *= TCE_ENTRY_SIZE;
-
-	return memblock_alloc_low(size, size);
-}
-
-void __init free_tce_table(void *tbl)
-{
-	unsigned int size;
-
-	if (!tbl)
-		return;
-
-	size = table_size_to_number_of_entries(specified_table_size);
-	size *= TCE_ENTRY_SIZE;
-
-	memblock_free(__pa(tbl), size);
-}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 6fd5b7561444..6ef00eb6fbb9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -37,11 +37,6 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/io.h>
-
-#if defined(CONFIG_EDAC)
-#include <linux/edac.h>
-#endif
-
 #include <asm/stacktrace.h>
 #include <asm/processor.h>
 #include <asm/debugreg.h>
@@ -59,6 +54,8 @@
 #include <asm/fpu/xstate.h>
 #include <asm/vm86.h>
 #include <asm/umip.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -309,8 +306,23 @@ __visible void __noreturn handle_stack_overflow(const char *message,
 }
 #endif
 
-#ifdef CONFIG_X86_64
-/* Runs on IST stack */
+#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
+/*
+ * Runs on an IST stack for x86_64 and on a special task stack for x86_32.
+ *
+ * On x86_64, this is more or less a normal kernel entry.  Notwithstanding the
+ * SDM's warnings about double faults being unrecoverable, returning works as
+ * expected.  Presumably what the SDM actually means is that the CPU may get
+ * the register state wrong on entry, so returning could be a bad idea.
+ *
+ * Various CPU engineers have promised that double faults due to an IRET fault
+ * while the stack is read-only are, in fact, recoverable.
+ *
+ * On x86_32, this is entered through a task gate, and regs are synthesized
+ * from the TSS.  Returning is, in principle, okay, but changes to regs will
+ * be lost.  If, for some reason, we need to return to a context with modified
+ * regs, the shim code could be adjusted to synchronize the registers.
+ */
 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
 {
 	static const char str[] = "double fault";
@@ -414,15 +426,9 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
 		handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
 #endif
 
-#ifdef CONFIG_DOUBLEFAULT
-	df_debug(regs, error_code);
-#endif
-	/*
-	 * This is always a kernel trap and never fixable (and thus must
-	 * never return).
-	 */
-	for (;;)
-		die(str, regs, error_code);
+	pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
+	die("double fault", regs, error_code);
+	panic("Machine halted.");
 }
 #endif
 
@@ -440,11 +446,57 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 	do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL);
 }
 
-dotraplinkage void
-do_general_protection(struct pt_regs *regs, long error_code)
+enum kernel_gp_hint {
+	GP_NO_HINT,
+	GP_NON_CANONICAL,
+	GP_CANONICAL
+};
+
+/*
+ * When an uncaught #GP occurs, try to determine the memory address accessed by
+ * the instruction and return that address to the caller. Also, try to figure
+ * out whether any part of the access to that address was non-canonical.
+ */
+static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
+						 unsigned long *addr)
 {
-	const char *desc = "general protection fault";
+	u8 insn_buf[MAX_INSN_SIZE];
+	struct insn insn;
+
+	if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
+		return GP_NO_HINT;
+
+	kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
+	insn_get_modrm(&insn);
+	insn_get_sib(&insn);
+
+	*addr = (unsigned long)insn_get_addr_ref(&insn, regs);
+	if (*addr == -1UL)
+		return GP_NO_HINT;
+
+#ifdef CONFIG_X86_64
+	/*
+	 * Check that:
+	 *  - the operand is not in the kernel half
+	 *  - the last byte of the operand is not in the user canonical half
+	 */
+	if (*addr < ~__VIRTUAL_MASK &&
+	    *addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK)
+		return GP_NON_CANONICAL;
+#endif
+
+	return GP_CANONICAL;
+}
+
+#define GPFSTR "general protection fault"
+
+dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code)
+{
+	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
+	enum kernel_gp_hint hint = GP_NO_HINT;
 	struct task_struct *tsk;
+	unsigned long gp_addr;
+	int ret;
 
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
 	cond_local_irq_enable(regs);
@@ -461,48 +513,61 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	}
 
 	tsk = current;
-	if (!user_mode(regs)) {
-		if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
-			return;
 
+	if (user_mode(regs)) {
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_nr = X86_TRAP_GP;
 
-		/*
-		 * To be potentially processing a kprobe fault and to
-		 * trust the result from kprobe_running(), we have to
-		 * be non-preemptible.
-		 */
-		if (!preemptible() && kprobe_running() &&
-		    kprobe_fault_handler(regs, X86_TRAP_GP))
-			return;
+		show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
+		force_sig(SIGSEGV);
 
-		if (notify_die(DIE_GPF, desc, regs, error_code,
-			       X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
-			die(desc, regs, error_code);
 		return;
 	}
 
+	if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
+		return;
+
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_nr = X86_TRAP_GP;
 
-	show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
+	/*
+	 * To be potentially processing a kprobe fault and to trust the result
+	 * from kprobe_running(), we have to be non-preemptible.
+	 */
+	if (!preemptible() &&
+	    kprobe_running() &&
+	    kprobe_fault_handler(regs, X86_TRAP_GP))
+		return;
+
+	ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV);
+	if (ret == NOTIFY_STOP)
+		return;
+
+	if (error_code)
+		snprintf(desc, sizeof(desc), "segment-related " GPFSTR);
+	else
+		hint = get_kernel_gp_address(regs, &gp_addr);
+
+	if (hint != GP_NO_HINT)
+		snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx",
+			 (hint == GP_NON_CANONICAL) ? "probably for non-canonical address"
+						    : "maybe for address",
+			 gp_addr);
+
+	/*
+	 * KASAN is interested only in the non-canonical case, clear it
+	 * otherwise.
+	 */
+	if (hint != GP_NON_CANONICAL)
+		gp_addr = 0;
+
+	die_addr(desc, regs, error_code, gp_addr);
 
-	force_sig(SIGSEGV);
 }
 NOKPROBE_SYMBOL(do_general_protection);
 
 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 {
-#ifdef CONFIG_DYNAMIC_FTRACE
-	/*
-	 * ftrace must be first, everything else may cause a recursive crash.
-	 * See note by declaration of modifying_ftrace_code in ftrace.c
-	 */
-	if (unlikely(atomic_read(&modifying_ftrace_code)) &&
-	    ftrace_int3_handler(regs))
-		return;
-#endif
 	if (poke_int3_handler(regs))
 		return;
 
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index ec534f978867..32a818764e03 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -233,7 +233,6 @@ static cycles_t check_tsc_warp(unsigned int timeout)
 	 * The measurement runs for 'timeout' msecs:
 	 */
 	end = start + (cycles_t) tsc_khz * timeout;
-	now = start;
 
 	for (i = 0; ; i++) {
 		/*
@@ -364,12 +363,12 @@ retry:
 		/* Force it to 0 if random warps brought us here */
 		atomic_set(&test_runs, 0);
 
-		pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
+		pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n",
 			smp_processor_id(), cpu);
-		pr_warning("Measured %Ld cycles TSC warp between CPUs, "
-			   "turning off TSC clock.\n", max_warp);
+		pr_warn("Measured %Ld cycles TSC warp between CPUs, "
+			"turning off TSC clock.\n", max_warp);
 		if (random_warps)
-			pr_warning("TSC warped randomly between CPUs\n");
+			pr_warn("TSC warped randomly between CPUs\n");
 		mark_tsc_unstable("check_tsc_sync_source failed");
 	}
 
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 548fefed71ee..4d732a444711 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -1,6 +1,6 @@
 /*
- * umip.c Emulation for instruction protected by the Intel User-Mode
- * Instruction Prevention feature
+ * umip.c Emulation for instruction protected by the User-Mode Instruction
+ * Prevention feature
  *
  * Copyright (c) 2017, Intel Corporation.
  * Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
@@ -18,10 +18,10 @@
 
 /** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
  *
- * The feature User-Mode Instruction Prevention present in recent Intel
- * processor prevents a group of instructions (SGDT, SIDT, SLDT, SMSW and STR)
- * from being executed with CPL > 0. Otherwise, a general protection fault is
- * issued.
+ * User-Mode Instruction Prevention is a security feature present in recent
+ * x86 processors that, when enabled, prevents a group of instructions (SGDT,
+ * SIDT, SLDT, SMSW and STR) from being run in user mode by issuing a general
+ * protection fault if the instruction is executed with CPL > 0.
  *
  * Rather than relaying to the user space the general protection fault caused by
  * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be
@@ -91,7 +91,7 @@ const char * const umip_insns[5] = {
 
 #define umip_pr_err(regs, fmt, ...) \
 	umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
-#define umip_pr_warning(regs, fmt, ...) \
+#define umip_pr_warn(regs, fmt, ...) \
 	umip_printk(regs, KERN_WARNING, fmt,  ##__VA_ARGS__)
 
 /**
@@ -380,14 +380,14 @@ bool fixup_umip_exception(struct pt_regs *regs)
 	if (umip_inst < 0)
 		return false;
 
-	umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
+	umip_pr_warn(regs, "%s instruction cannot be used by applications.\n",
 			umip_insns[umip_inst]);
 
 	/* Do not emulate (spoof) SLDT or STR. */
 	if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT)
 		return false;
 
-	umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
+	umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n");
 
 	if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
 			      user_64bit_mode(regs)))
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 332ae6530fa8..e9cc182aa97e 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -187,6 +187,8 @@ static struct orc_entry *orc_find(unsigned long ip)
 	return orc_ftrace_find(ip);
 }
 
+#ifdef CONFIG_MODULES
+
 static void orc_sort_swap(void *_a, void *_b, int size)
 {
 	struct orc_entry *orc_a, *orc_b;
@@ -229,7 +231,6 @@ static int orc_sort_cmp(const void *_a, const void *_b)
 	return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1;
 }
 
-#ifdef CONFIG_MODULES
 void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
 			void *_orc, size_t orc_size)
 {
@@ -273,9 +274,11 @@ void __init unwind_init(void)
 		return;
 	}
 
-	/* Sort the .orc_unwind and .orc_unwind_ip tables: */
-	sort(__start_orc_unwind_ip, num_entries, sizeof(int), orc_sort_cmp,
-	     orc_sort_swap);
+	/*
+	 * Note, the orc_unwind and orc_unwind_ip tables were already
+	 * sorted at build time via the 'sorttable' tool.
+	 * It's ready for binary search straight away, no need to sort it.
+	 */
 
 	/* Initialize the fast lookup table: */
 	lookup_num_blocks = orc_lookup_end - orc_lookup;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 8cd745ef8c7b..15e5aad8ac2c 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -842,8 +842,8 @@ static int push_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 
 /**
  * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
+ * @auprobe: the probepoint information.
  * @mm: the probed address space.
- * @arch_uprobe: the probepoint information.
  * @addr: virtual address at which to install the probepoint
  * Return 0 on success or a -ve number on error.
  */
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index a024c4f7ba56..641f0fe1e5b4 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -31,7 +31,7 @@
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
 
-ENTRY(verify_cpu)
+SYM_FUNC_START_LOCAL(verify_cpu)
 	pushf				# Save caller passed flags
 	push	$0			# Kill any dangerous flags
 	popf
@@ -137,4 +137,4 @@ ENTRY(verify_cpu)
 	popf				# Restore caller passed flags
 	xorl %eax, %eax
 	ret
-ENDPROC(verify_cpu)
+SYM_FUNC_END(verify_cpu)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index a76c12b38e92..91d55454e702 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -381,7 +381,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 		mark_screen_rdonly(tsk->mm);
 
 	memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
-	force_iret();
 	return regs->ax;
 }
 
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index e2feacf921a0..e3296aa028fe 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -21,6 +21,9 @@
 #define LOAD_OFFSET __START_KERNEL_map
 #endif
 
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
@@ -141,17 +144,12 @@ SECTIONS
 		*(.text.__x86.indirect_thunk)
 		__indirect_thunk_end = .;
 #endif
+	} :text =0xcccc
 
-		/* End of text section */
-		_etext = .;
-	} :text = 0x9090
-
-	NOTES :text :note
-
-	EXCEPTION_TABLE(16) :text = 0x9090
-
-	/* .text should occupy whole number of pages */
+	/* End of text section, which should occupy whole number of pages */
+	_etext = .;
 	. = ALIGN(PAGE_SIZE);
+
 	X86_ALIGN_RODATA_BEGIN
 	RO_DATA(PAGE_SIZE)
 	X86_ALIGN_RODATA_END
@@ -195,12 +193,10 @@ SECTIONS
 		__vvar_beginning_hack = .;
 
 		/* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) 			\
+#define EMIT_VVAR(name, offset)				\
 		. = __vvar_beginning_hack + offset;	\
 		*(.vvar_ ## name)
-#define __VVAR_KERNEL_LDS
 #include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
 #undef EMIT_VVAR
 
 		/*
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 18a799c8fa28..23e25f3034c2 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -20,7 +20,7 @@
 #include <asm/irq.h>
 #include <asm/io_apic.h>
 #include <asm/hpet.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
 #include <asm/tsc.h>
 #include <asm/iommu.h>
 #include <asm/mach_traps.h>
@@ -31,6 +31,28 @@ static int __init iommu_init_noop(void) { return 0; }
 static void iommu_shutdown_noop(void) { }
 bool __init bool_x86_init_noop(void) { return false; }
 void x86_op_int_noop(int cpu) { }
+static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
+static __init void get_rtc_noop(struct timespec64 *now) { }
+
+static __initconst const struct of_device_id of_cmos_match[] = {
+	{ .compatible = "motorola,mc146818" },
+	{}
+};
+
+/*
+ * Allow devicetree configured systems to disable the RTC by setting the
+ * corresponding DT node's status property to disabled. Code is optimized
+ * out for CONFIG_OF=n builds.
+ */
+static __init void x86_wallclock_init(void)
+{
+	struct device_node *node = of_find_matching_node(NULL, of_cmos_match);
+
+	if (node && !of_device_is_available(node)) {
+		x86_platform.get_wallclock = get_rtc_noop;
+		x86_platform.set_wallclock = set_rtc_noop;
+	}
+}
 
 /*
  * The platform setup functions are preset with the default functions
@@ -73,7 +95,7 @@ struct x86_init_ops x86_init __initdata = {
 	.timers = {
 		.setup_percpu_clockev	= setup_boot_APIC_clock,
 		.timer_init		= hpet_time_init,
-		.wallclock_init		= x86_init_noop,
+		.wallclock_init		= x86_wallclock_init,
 	},
 
 	.iommu = {
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 840e12583b85..991019d5eee1 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -60,13 +60,11 @@ config KVM
 	  If unsure, say N.
 
 config KVM_INTEL
-	tristate "KVM for Intel processors support"
-	depends on KVM
-	# for perf_guest_get_msrs():
-	depends on CPU_SUP_INTEL
+	tristate "KVM for Intel (and compatible) processors support"
+	depends on KVM && IA32_FEAT_CTL
 	---help---
-	  Provides support for KVM on Intel processors equipped with the VT
-	  extensions.
+	  Provides support for KVM on processors equipped with Intel's VT
+	  extensions, a.k.a. Virtual Machine Extensions (VMX).
 
 	  To compile this as a module, choose M here: the module
 	  will be called kvm-intel.
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 31ecf7a76d5a..b19ef421084d 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -8,9 +8,9 @@ kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
-kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
+kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
-			   hyperv.o page_track.o debugfs.o
+			   hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o
 
 kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
 kvm-amd-y		+= svm.o pmu_amd.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f68c0c753c38..cf55629ff0ff 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -402,7 +402,8 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
 			entry->edx |= F(SPEC_CTRL);
 		if (boot_cpu_has(X86_FEATURE_STIBP))
 			entry->edx |= F(INTEL_STIBP);
-		if (boot_cpu_has(X86_FEATURE_SSBD))
+		if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+		    boot_cpu_has(X86_FEATURE_AMD_SSBD))
 			entry->edx |= F(SPEC_CTRL_SSBD);
 		/*
 		 * We emulate ARCH_CAPABILITIES in software even
@@ -504,7 +505,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 
 	r = -E2BIG;
 
-	if (*nent >= maxnent)
+	if (WARN_ON(*nent >= maxnent))
 		goto out;
 
 	do_host_cpuid(entry, function, 0);
@@ -759,7 +760,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 			entry->ebx |= F(AMD_IBRS);
 		if (boot_cpu_has(X86_FEATURE_STIBP))
 			entry->ebx |= F(AMD_STIBP);
-		if (boot_cpu_has(X86_FEATURE_SSBD))
+		if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+		    boot_cpu_has(X86_FEATURE_AMD_SSBD))
 			entry->ebx |= F(AMD_SSBD);
 		if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
 			entry->ebx |= F(AMD_SSB_NO);
@@ -778,6 +780,11 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 	case 0x8000001a:
 	case 0x8000001e:
 		break;
+	/* Support memory encryption cpuid if host supports it */
+	case 0x8000001F:
+		if (!boot_cpu_has(X86_FEATURE_SEV))
+			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+		break;
 	/*Add support for Centaur's CPUID instruction*/
 	case 0xC0000000:
 		/*Just support up to 0xC0000004 now*/
@@ -810,14 +817,15 @@ out:
 static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func,
 			 int *nent, int maxnent, unsigned int type)
 {
+	if (*nent >= maxnent)
+		return -E2BIG;
+
 	if (type == KVM_GET_EMULATED_CPUID)
 		return __do_cpuid_func_emulated(entry, func, nent, maxnent);
 
 	return __do_cpuid_func(entry, func, nent, maxnent);
 }
 
-#undef F
-
 struct kvm_cpuid_param {
 	u32 func;
 	bool (*qualifier)(const struct kvm_cpuid_param *param);
@@ -1015,6 +1023,12 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
 		*ebx = entry->ebx;
 		*ecx = entry->ecx;
 		*edx = entry->edx;
+		if (function == 7 && index == 0) {
+			u64 data;
+		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
+			    (data & TSX_CTRL_CPUID_CLEAR))
+				*ebx &= ~(F(RTM) | F(HLE));
+		}
 	} else {
 		*eax = *ebx = *ecx = *edx = 0;
 		/*
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 698efb8c3897..952d1a4f4d7e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2770,11 +2770,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 		return emulate_ud(ctxt);
 
 	ops->get_msr(ctxt, MSR_EFER, &efer);
-	setup_syscalls_segments(ctxt, &cs, &ss);
-
 	if (!(efer & EFER_SCE))
 		return emulate_ud(ctxt);
 
+	setup_syscalls_segments(ctxt, &cs, &ss);
 	ops->get_msr(ctxt, MSR_STAR, &msr_data);
 	msr_data >>= 32;
 	cs_sel = (u16)(msr_data & 0xfffc);
@@ -2838,12 +2837,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	if (ctxt->mode == X86EMUL_MODE_PROT64)
 		return X86EMUL_UNHANDLEABLE;
 
-	setup_syscalls_segments(ctxt, &cs, &ss);
-
 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
 	if ((msr_data & 0xfffc) == 0x0)
 		return emulate_gp(ctxt, 0);
 
+	setup_syscalls_segments(ctxt, &cs, &ss);
 	ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
 	cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
 	ss_sel = cs_sel + 8;
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index d859ae8890d0..9fd2dd89a1c5 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -271,8 +271,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 {
 	unsigned index;
 	bool mask_before, mask_after;
-	int old_remote_irr, old_delivery_status;
 	union kvm_ioapic_redirect_entry *e;
+	unsigned long vcpu_bitmap;
+	int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode;
 
 	switch (ioapic->ioregsel) {
 	case IOAPIC_REG_VERSION:
@@ -296,6 +297,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 		/* Preserve read-only fields */
 		old_remote_irr = e->fields.remote_irr;
 		old_delivery_status = e->fields.delivery_status;
+		old_dest_id = e->fields.dest_id;
+		old_dest_mode = e->fields.dest_mode;
 		if (ioapic->ioregsel & 1) {
 			e->bits &= 0xffffffff;
 			e->bits |= (u64) val << 32;
@@ -321,7 +324,34 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
 		    && ioapic->irr & (1 << index))
 			ioapic_service(ioapic, index, false);
-		kvm_make_scan_ioapic_request(ioapic->kvm);
+		if (e->fields.delivery_mode == APIC_DM_FIXED) {
+			struct kvm_lapic_irq irq;
+
+			irq.shorthand = 0;
+			irq.vector = e->fields.vector;
+			irq.delivery_mode = e->fields.delivery_mode << 8;
+			irq.dest_id = e->fields.dest_id;
+			irq.dest_mode = e->fields.dest_mode;
+			bitmap_zero(&vcpu_bitmap, 16);
+			kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
+						 &vcpu_bitmap);
+			if (old_dest_mode != e->fields.dest_mode ||
+			    old_dest_id != e->fields.dest_id) {
+				/*
+				 * Update vcpu_bitmap with vcpus specified in
+				 * the previous request as well. This is done to
+				 * keep ioapic_handled_vectors synchronized.
+				 */
+				irq.dest_id = old_dest_id;
+				irq.dest_mode = old_dest_mode;
+				kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
+							 &vcpu_bitmap);
+			}
+			kvm_make_scan_ioapic_request_mask(ioapic->kvm,
+							  &vcpu_bitmap);
+		} else {
+			kvm_make_scan_ioapic_request(ioapic->kvm);
+		}
 		break;
 	}
 }
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 1cc6c47dc77e..58767020de41 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -37,22 +37,50 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
 BUILD_KVM_GPR_ACCESSORS(r15, R15)
 #endif
 
-static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
-					      enum kvm_reg reg)
+static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
+					     enum kvm_reg reg)
 {
-	if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail))
+	return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
+					 enum kvm_reg reg)
+{
+	return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
+					       enum kvm_reg reg)
+{
+	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
+					   enum kvm_reg reg)
+{
+	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
+{
+	if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
+		return 0;
+
+	if (!kvm_register_is_available(vcpu, reg))
 		kvm_x86_ops->cache_reg(vcpu, reg);
 
 	return vcpu->arch.regs[reg];
 }
 
-static inline void kvm_register_write(struct kvm_vcpu *vcpu,
-				      enum kvm_reg reg,
+static inline void kvm_register_write(struct kvm_vcpu *vcpu, int reg,
 				      unsigned long val)
 {
+	if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
+		return;
+
 	vcpu->arch.regs[reg] = val;
-	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+	kvm_register_mark_dirty(vcpu, reg);
 }
 
 static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu)
@@ -79,9 +107,8 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
 {
 	might_sleep();  /* on svm */
 
-	if (!test_bit(VCPU_EXREG_PDPTR,
-		      (unsigned long *)&vcpu->arch.regs_avail))
-		kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR);
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
+		kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
 
 	return vcpu->arch.walk_mmu->pdptrs[index];
 }
@@ -109,8 +136,8 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
 
 static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
 {
-	if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
-		kvm_x86_ops->decache_cr3(vcpu);
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
+		kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_CR3);
 	return vcpu->arch.cr3;
 }
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b29d00b661ff..cf9177b4a07f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -557,60 +557,53 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 			irq->level, irq->trig_mode, dest_map);
 }
 
+static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
+			 struct kvm_lapic_irq *irq, u32 min)
+{
+	int i, count = 0;
+	struct kvm_vcpu *vcpu;
+
+	if (min > map->max_apic_id)
+		return 0;
+
+	for_each_set_bit(i, ipi_bitmap,
+		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+		if (map->phys_map[min + i]) {
+			vcpu = map->phys_map[min + i]->vcpu;
+			count += kvm_apic_set_irq(vcpu, irq, NULL);
+		}
+	}
+
+	return count;
+}
+
 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 		    unsigned long ipi_bitmap_high, u32 min,
 		    unsigned long icr, int op_64_bit)
 {
-	int i;
 	struct kvm_apic_map *map;
-	struct kvm_vcpu *vcpu;
 	struct kvm_lapic_irq irq = {0};
 	int cluster_size = op_64_bit ? 64 : 32;
-	int count = 0;
+	int count;
+
+	if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
+		return -KVM_EINVAL;
 
 	irq.vector = icr & APIC_VECTOR_MASK;
 	irq.delivery_mode = icr & APIC_MODE_MASK;
 	irq.level = (icr & APIC_INT_ASSERT) != 0;
 	irq.trig_mode = icr & APIC_INT_LEVELTRIG;
 
-	if (icr & APIC_DEST_MASK)
-		return -KVM_EINVAL;
-	if (icr & APIC_SHORT_MASK)
-		return -KVM_EINVAL;
-
 	rcu_read_lock();
 	map = rcu_dereference(kvm->arch.apic_map);
 
-	if (unlikely(!map)) {
-		count = -EOPNOTSUPP;
-		goto out;
-	}
-
-	if (min > map->max_apic_id)
-		goto out;
-	/* Bits above cluster_size are masked in the caller.  */
-	for_each_set_bit(i, &ipi_bitmap_low,
-		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
-		if (map->phys_map[min + i]) {
-			vcpu = map->phys_map[min + i]->vcpu;
-			count += kvm_apic_set_irq(vcpu, &irq, NULL);
-		}
+	count = -EOPNOTSUPP;
+	if (likely(map)) {
+		count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
+		min += cluster_size;
+		count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
 	}
 
-	min += cluster_size;
-
-	if (min > map->max_apic_id)
-		goto out;
-
-	for_each_set_bit(i, &ipi_bitmap_high,
-		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
-		if (map->phys_map[min + i]) {
-			vcpu = map->phys_map[min + i]->vcpu;
-			count += kvm_apic_set_irq(vcpu, &irq, NULL);
-		}
-	}
-
-out:
 	rcu_read_unlock();
 	return count;
 }
@@ -1124,6 +1117,50 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	return result;
 }
 
+/*
+ * This routine identifies the destination vcpus mask meant to receive the
+ * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
+ * out the destination vcpus array and set the bitmap or it traverses to
+ * each available vcpu to identify the same.
+ */
+void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
+			      unsigned long *vcpu_bitmap)
+{
+	struct kvm_lapic **dest_vcpu = NULL;
+	struct kvm_lapic *src = NULL;
+	struct kvm_apic_map *map;
+	struct kvm_vcpu *vcpu;
+	unsigned long bitmap;
+	int i, vcpu_idx;
+	bool ret;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
+					  &bitmap);
+	if (ret) {
+		for_each_set_bit(i, &bitmap, 16) {
+			if (!dest_vcpu[i])
+				continue;
+			vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
+			__set_bit(vcpu_idx, vcpu_bitmap);
+		}
+	} else {
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			if (!kvm_apic_present(vcpu))
+				continue;
+			if (!kvm_apic_match_dest(vcpu, NULL,
+						 irq->delivery_mode,
+						 irq->dest_id,
+						 irq->dest_mode))
+				continue;
+			__set_bit(i, vcpu_bitmap);
+		}
+	}
+	rcu_read_unlock();
+}
+
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 {
 	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
@@ -2709,7 +2746,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 	 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
 	 * and leave the INIT pending.
 	 */
-	if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) {
+	if (kvm_vcpu_latch_init(vcpu)) {
 		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
 		if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
 			clear_bit(KVM_APIC_SIPI, &apic->pending_events);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 1f5014852e20..39925afdfcdc 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -226,6 +226,9 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
 
+void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
+			      unsigned long *vcpu_bitmap);
+
 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 2ce9da58611e..a32b847a8089 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -40,7 +40,7 @@
 #include <linux/kthread.h>
 
 #include <asm/page.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
 #include <asm/cmpxchg.h>
 #include <asm/e820/api.h>
 #include <asm/io.h>
@@ -4395,7 +4395,7 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
 			kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
 			if (!skip_tlb_flush) {
 				kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-				kvm_x86_ops->tlb_flush(vcpu, true);
+				kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 			}
 
 			/*
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 3521e2d176f2..3521e2d176f2 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 97b21e7fd013..97b21e7fd013 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 46875bbd0419..bcc6a73d6628 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -62,8 +62,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
 	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 
-	if (!test_and_set_bit(pmc->idx,
-			      (unsigned long *)&pmu->reprogram_pmi)) {
+	if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
 		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
 		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
 	}
@@ -76,8 +75,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,
 	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 
-	if (!test_and_set_bit(pmc->idx,
-			      (unsigned long *)&pmu->reprogram_pmi)) {
+	if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
 		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
 		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
 
@@ -137,7 +135,37 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 	}
 
 	pmc->perf_event = event;
-	clear_bit(pmc->idx, (unsigned long*)&pmc_to_pmu(pmc)->reprogram_pmi);
+	pmc_to_pmu(pmc)->event_count++;
+	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
+}
+
+static void pmc_pause_counter(struct kvm_pmc *pmc)
+{
+	u64 counter = pmc->counter;
+
+	if (!pmc->perf_event)
+		return;
+
+	/* update counter, reset event value to avoid redundant accumulation */
+	counter += perf_event_pause(pmc->perf_event, true);
+	pmc->counter = counter & pmc_bitmask(pmc);
+}
+
+static bool pmc_resume_counter(struct kvm_pmc *pmc)
+{
+	if (!pmc->perf_event)
+		return false;
+
+	/* recalibrate sample period and check if it's accepted by perf core */
+	if (perf_event_period(pmc->perf_event,
+			(-pmc->counter) & pmc_bitmask(pmc)))
+		return false;
+
+	/* reuse perf_event to serve as pmc_reprogram_counter() does*/
+	perf_event_enable(pmc->perf_event);
+
+	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
+	return true;
 }
 
 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
@@ -154,7 +182,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 
 	pmc->eventsel = eventsel;
 
-	pmc_stop_counter(pmc);
+	pmc_pause_counter(pmc);
 
 	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
 		return;
@@ -193,6 +221,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 	if (type == PERF_TYPE_RAW)
 		config = eventsel & X86_RAW_EVENT_MASK;
 
+	if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
+		return;
+
+	pmc_release_perf_event(pmc);
+
+	pmc->current_config = eventsel;
 	pmc_reprogram_counter(pmc, type, config,
 			      !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
 			      !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
@@ -209,7 +243,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
 	struct kvm_pmu_event_filter *filter;
 	struct kvm *kvm = pmc->vcpu->kvm;
 
-	pmc_stop_counter(pmc);
+	pmc_pause_counter(pmc);
 
 	if (!en_field || !pmc_is_enabled(pmc))
 		return;
@@ -224,6 +258,12 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
 			return;
 	}
 
+	if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
+		return;
+
+	pmc_release_perf_event(pmc);
+
+	pmc->current_config = (u64)ctrl;
 	pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
 			      kvm_x86_ops->pmu_ops->find_fixed_event(idx),
 			      !(en_field & 0x2), /* exclude user */
@@ -253,27 +293,32 @@ EXPORT_SYMBOL_GPL(reprogram_counter);
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-	u64 bitmask;
 	int bit;
 
-	bitmask = pmu->reprogram_pmi;
-
-	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
+	for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
 		struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit);
 
 		if (unlikely(!pmc || !pmc->perf_event)) {
-			clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
+			clear_bit(bit, pmu->reprogram_pmi);
 			continue;
 		}
 
 		reprogram_counter(pmu, bit);
 	}
+
+	/*
+	 * Unused perf_events are only released if the corresponding MSRs
+	 * weren't accessed during the last vCPU time slice. kvm_arch_sched_in
+	 * triggers KVM_REQ_PMU if cleanup is needed.
+	 */
+	if (unlikely(pmu->need_cleanup))
+		kvm_pmu_cleanup(vcpu);
 }
 
 /* check if idx is a valid index to access PMU */
-int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
-	return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx);
+	return kvm_x86_ops->pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
 }
 
 bool is_vmware_backdoor_pmc(u32 pmc_idx)
@@ -323,7 +368,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
 	if (is_vmware_backdoor_pmc(idx))
 		return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
 
-	pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask);
+	pmc = kvm_x86_ops->pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask);
 	if (!pmc)
 		return 1;
 
@@ -339,7 +384,17 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
 
 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 {
-	return kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
+	return kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr) ||
+		kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
+}
+
+static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr);
+
+	if (pmc)
+		__set_bit(pmc->idx, pmu->pmc_in_use);
 }
 
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
@@ -349,6 +404,7 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 
 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
+	kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
 	return kvm_x86_ops->pmu_ops->set_msr(vcpu, msr_info);
 }
 
@@ -376,9 +432,45 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
 	memset(pmu, 0, sizeof(*pmu));
 	kvm_x86_ops->pmu_ops->init(vcpu);
 	init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
+	pmu->event_count = 0;
+	pmu->need_cleanup = false;
 	kvm_pmu_refresh(vcpu);
 }
 
+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+
+	if (pmc_is_fixed(pmc))
+		return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+			pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
+
+	return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
+}
+
+/* Release perf_events for vPMCs that have been unused for a full time slice.  */
+void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc = NULL;
+	DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
+	int i;
+
+	pmu->need_cleanup = false;
+
+	bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
+		      pmu->pmc_in_use, X86_PMC_IDX_MAX);
+
+	for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
+		pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, i);
+
+		if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
+			pmc_stop_counter(pmc);
+	}
+
+	bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
+}
+
 void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
 {
 	kvm_pmu_reset(vcpu);
@@ -416,8 +508,8 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
 	*filter = tmp;
 
 	mutex_lock(&kvm->lock);
-	rcu_swap_protected(kvm->arch.pmu_event_filter, filter,
-			   mutex_is_locked(&kvm->lock));
+	filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
+				     mutex_is_locked(&kvm->lock));
 	mutex_unlock(&kvm->lock);
 
 	synchronize_srcu_expedited(&kvm->srcu);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 58265f761c3b..7ebb62326c14 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -25,9 +25,10 @@ struct kvm_pmu_ops {
 	unsigned (*find_fixed_event)(int idx);
 	bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
 	struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
-	struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx,
-					  u64 *mask);
-	int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx);
+	struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
+		unsigned int idx, u64 *mask);
+	struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
+	int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
 	bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 	int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -55,12 +56,21 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
 	return counter & pmc_bitmask(pmc);
 }
 
-static inline void pmc_stop_counter(struct kvm_pmc *pmc)
+static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
 {
 	if (pmc->perf_event) {
-		pmc->counter = pmc_read_counter(pmc);
 		perf_event_release_kernel(pmc->perf_event);
 		pmc->perf_event = NULL;
+		pmc->current_config = 0;
+		pmc_to_pmu(pmc)->event_count--;
+	}
+}
+
+static inline void pmc_stop_counter(struct kvm_pmc *pmc)
+{
+	if (pmc->perf_event) {
+		pmc->counter = pmc_read_counter(pmc);
+		pmc_release_perf_event(pmc);
 	}
 }
 
@@ -79,6 +89,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
 	return kvm_x86_ops->pmu_ops->pmc_is_enabled(pmc);
 }
 
+static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu,
+						 u64 data)
+{
+	return !(pmu->global_ctrl_mask & data);
+}
+
 /* returns general purpose PMC with the specified MSR. Note that it can be
  * used for both PERFCTRn and EVNTSELn; that is why it accepts base as a
  * paramenter to tell them apart.
@@ -110,13 +126,14 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx);
+int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
 void kvm_pmu_reset(struct kvm_vcpu *vcpu);
 void kvm_pmu_init(struct kvm_vcpu *vcpu);
+void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
 void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
 
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index c8388389a3b0..ce0b10fe5e2b 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -174,7 +174,7 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
 }
 
 /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 
@@ -184,7 +184,8 @@ static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
 }
 
 /* idx is the ECX register of RDPMC instruction */
-static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask)
+static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
+	unsigned int idx, u64 *mask)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *counters;
@@ -199,13 +200,19 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u
 
 static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 {
+	/* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough.  */
+	return false;
+}
+
+static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
+{
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-	int ret = false;
+	struct kvm_pmc *pmc;
 
-	ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) ||
-		get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
+	pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
+	pmc = pmc ? pmc : get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
 
-	return ret;
+	return pmc;
 }
 
 static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
@@ -272,6 +279,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
 	pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
 	pmu->nr_arch_fixed_counters = 0;
 	pmu->global_status = 0;
+	bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
 }
 
 static void amd_pmu_init(struct kvm_vcpu *vcpu)
@@ -285,6 +293,7 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
 		pmu->gp_counters[i].type = KVM_PMC_GP;
 		pmu->gp_counters[i].vcpu = vcpu;
 		pmu->gp_counters[i].idx = i;
+		pmu->gp_counters[i].current_config = 0;
 	}
 }
 
@@ -306,8 +315,9 @@ struct kvm_pmu_ops amd_pmu_ops = {
 	.find_fixed_event = amd_find_fixed_event,
 	.pmc_is_enabled = amd_pmc_is_enabled,
 	.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
+	.rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
 	.msr_idx_to_pmc = amd_msr_idx_to_pmc,
-	.is_valid_msr_idx = amd_is_valid_msr_idx,
+	.is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx,
 	.is_valid_msr = amd_is_valid_msr,
 	.get_msr = amd_pmu_get_msr,
 	.set_msr = amd_pmu_set_msr,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c5673bda4b66..122d4ce3b1ab 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -38,6 +38,7 @@
 #include <linux/file.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
+#include <linux/rwsem.h>
 
 #include <asm/apic.h>
 #include <asm/perf_event.h>
@@ -418,9 +419,13 @@ enum {
 
 #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
 
+static int sev_flush_asids(void);
+static DECLARE_RWSEM(sev_deactivate_lock);
+static DEFINE_MUTEX(sev_bitmap_lock);
 static unsigned int max_sev_asid;
 static unsigned int min_sev_asid;
 static unsigned long *sev_asid_bitmap;
+static unsigned long *sev_reclaim_asid_bitmap;
 #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
 
 struct enc_region {
@@ -1235,11 +1240,15 @@ static __init int sev_hardware_setup(void)
 	/* Minimum ASID value that should be used for SEV guest */
 	min_sev_asid = cpuid_edx(0x8000001F);
 
-	/* Initialize SEV ASID bitmap */
+	/* Initialize SEV ASID bitmaps */
 	sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
 	if (!sev_asid_bitmap)
 		return 1;
 
+	sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+	if (!sev_reclaim_asid_bitmap)
+		return 1;
+
 	status = kmalloc(sizeof(*status), GFP_KERNEL);
 	if (!status)
 		return 1;
@@ -1418,8 +1427,12 @@ static __exit void svm_hardware_unsetup(void)
 {
 	int cpu;
 
-	if (svm_sev_enabled())
+	if (svm_sev_enabled()) {
 		bitmap_free(sev_asid_bitmap);
+		bitmap_free(sev_reclaim_asid_bitmap);
+
+		sev_flush_asids();
+	}
 
 	for_each_possible_cpu(cpu)
 		svm_cpu_uninit(cpu);
@@ -1729,25 +1742,22 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void __sev_asid_free(int asid)
+static void sev_asid_free(int asid)
 {
 	struct svm_cpu_data *sd;
 	int cpu, pos;
 
+	mutex_lock(&sev_bitmap_lock);
+
 	pos = asid - 1;
-	clear_bit(pos, sev_asid_bitmap);
+	__set_bit(pos, sev_reclaim_asid_bitmap);
 
 	for_each_possible_cpu(cpu) {
 		sd = per_cpu(svm_data, cpu);
 		sd->sev_vmcbs[pos] = NULL;
 	}
-}
 
-static void sev_asid_free(struct kvm *kvm)
-{
-	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-
-	__sev_asid_free(sev->asid);
+	mutex_unlock(&sev_bitmap_lock);
 }
 
 static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
@@ -1764,10 +1774,12 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
 
 	/* deactivate handle */
 	data->handle = handle;
+
+	/* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
+	down_read(&sev_deactivate_lock);
 	sev_guest_deactivate(data, NULL);
+	up_read(&sev_deactivate_lock);
 
-	wbinvd_on_all_cpus();
-	sev_guest_df_flush(NULL);
 	kfree(data);
 
 	decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
@@ -1916,7 +1928,7 @@ static void sev_vm_destroy(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 
 	sev_unbind_asid(kvm, sev->handle);
-	sev_asid_free(kvm);
+	sev_asid_free(sev->asid);
 }
 
 static void avic_vm_destroy(struct kvm *kvm)
@@ -2370,7 +2382,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
 		break;
 	default:
-		BUG();
+		WARN_ON_ONCE(1);
 	}
 }
 
@@ -2523,10 +2535,6 @@ static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 {
 }
 
-static void svm_decache_cr3(struct kvm_vcpu *vcpu)
-{
-}
-
 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 {
 }
@@ -4997,6 +5005,18 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 
+#ifdef CONFIG_RETPOLINE
+	if (exit_code == SVM_EXIT_MSR)
+		return msr_interception(svm);
+	else if (exit_code == SVM_EXIT_VINTR)
+		return interrupt_window_interception(svm);
+	else if (exit_code == SVM_EXIT_INTR)
+		return intr_interception(svm);
+	else if (exit_code == SVM_EXIT_HLT)
+		return halt_interception(svm);
+	else if (exit_code == SVM_EXIT_NPF)
+		return npf_interception(svm);
+#endif
 	return svm_exit_handlers[exit_code](svm);
 }
 
@@ -5092,8 +5112,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (svm_nested_virtualize_tpr(vcpu) ||
-	    kvm_vcpu_apicv_active(vcpu))
+	if (svm_nested_virtualize_tpr(vcpu))
 		return;
 
 	clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
@@ -5110,9 +5129,9 @@ static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 	return;
 }
 
-static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu)
+static bool svm_get_enable_apicv(struct kvm *kvm)
 {
-	return avic && irqchip_split(vcpu->kvm);
+	return avic && irqchip_split(kvm);
 }
 
 static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
@@ -5634,7 +5653,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	svm->vmcb->save.cr2 = vcpu->arch.cr2;
 
 	clgi();
-	kvm_load_guest_xcr0(vcpu);
+	kvm_load_guest_xsave_state(vcpu);
 
 	if (lapic_in_kernel(vcpu) &&
 		vcpu->arch.apic->lapic_timer.timer_advance_ns)
@@ -5784,7 +5803,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
 		kvm_before_interrupt(&svm->vcpu);
 
-	kvm_put_guest_xcr0(vcpu);
+	kvm_load_host_xsave_state(vcpu);
 	stgi();
 
 	/* Any pending NMI will happen here */
@@ -5893,6 +5912,9 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+				    boot_cpu_has(X86_FEATURE_XSAVES);
+
 	/* Update nrips enabled cache */
 	svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
 
@@ -5936,13 +5958,6 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 		if (npt_enabled)
 			entry->edx |= F(NPT);
 
-		break;
-	case 0x8000001F:
-		/* Support memory encryption cpuid if host supports it */
-		if (boot_cpu_has(X86_FEATURE_SEV))
-			cpuid(0x8000001f, &entry->eax, &entry->ebx,
-				&entry->ecx, &entry->edx);
-
 	}
 }
 
@@ -5968,7 +5983,7 @@ static bool svm_mpx_supported(void)
 
 static bool svm_xsaves_supported(void)
 {
-	return false;
+	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
 
 static bool svm_umip_emulated(void)
@@ -6270,18 +6285,73 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int sev_flush_asids(void)
+{
+	int ret, error;
+
+	/*
+	 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
+	 * so it must be guarded.
+	 */
+	down_write(&sev_deactivate_lock);
+
+	wbinvd_on_all_cpus();
+	ret = sev_guest_df_flush(&error);
+
+	up_write(&sev_deactivate_lock);
+
+	if (ret)
+		pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
+
+	return ret;
+}
+
+/* Must be called with the sev_bitmap_lock held */
+static bool __sev_recycle_asids(void)
+{
+	int pos;
+
+	/* Check if there are any ASIDs to reclaim before performing a flush */
+	pos = find_next_bit(sev_reclaim_asid_bitmap,
+			    max_sev_asid, min_sev_asid - 1);
+	if (pos >= max_sev_asid)
+		return false;
+
+	if (sev_flush_asids())
+		return false;
+
+	bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
+		   max_sev_asid);
+	bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
+
+	return true;
+}
+
 static int sev_asid_new(void)
 {
+	bool retry = true;
 	int pos;
 
+	mutex_lock(&sev_bitmap_lock);
+
 	/*
 	 * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
 	 */
+again:
 	pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
-	if (pos >= max_sev_asid)
+	if (pos >= max_sev_asid) {
+		if (retry && __sev_recycle_asids()) {
+			retry = false;
+			goto again;
+		}
+		mutex_unlock(&sev_bitmap_lock);
 		return -EBUSY;
+	}
+
+	__set_bit(pos, sev_asid_bitmap);
+
+	mutex_unlock(&sev_bitmap_lock);
 
-	set_bit(pos, sev_asid_bitmap);
 	return pos + 1;
 }
 
@@ -6309,7 +6379,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	return 0;
 
 e_free:
-	__sev_asid_free(asid);
+	sev_asid_free(asid);
 	return ret;
 }
 
@@ -6319,12 +6389,6 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
 	int asid = sev_get_asid(kvm);
 	int ret;
 
-	wbinvd_on_all_cpus();
-
-	ret = sev_guest_df_flush(error);
-	if (ret)
-		return ret;
-
 	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
 	if (!data)
 		return -ENOMEM;
@@ -7214,7 +7278,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.get_cpl = svm_get_cpl,
 	.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
 	.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
-	.decache_cr3 = svm_decache_cr3,
 	.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
 	.set_cr0 = svm_set_cr0,
 	.set_cr3 = svm_set_cr3,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0e7c9301fe86..6879966b7648 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -10,6 +10,7 @@
 #include "hyperv.h"
 #include "mmu.h"
 #include "nested.h"
+#include "pmu.h"
 #include "trace.h"
 #include "x86.h"
 
@@ -27,6 +28,16 @@ module_param(nested_early_check, bool, S_IRUGO);
 	failed;								\
 })
 
+#define SET_MSR_OR_WARN(vcpu, idx, data)				\
+({									\
+	bool failed = kvm_set_msr(vcpu, idx, data);			\
+	if (failed)							\
+		pr_warn_ratelimited(					\
+				"%s cannot write MSR (0x%x, 0x%llx)\n",	\
+				__func__, idx, data);			\
+	failed;								\
+})
+
 /*
  * Hyper-V requires all of these, so mark them as supported even though
  * they are just treated the same as all-context.
@@ -257,7 +268,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
 	vmx->nested.cached_shadow_vmcs12 = NULL;
 	/* Unpin physical memory we referred to in the vmcs02 */
 	if (vmx->nested.apic_access_page) {
-		kvm_release_page_dirty(vmx->nested.apic_access_page);
+		kvm_release_page_clean(vmx->nested.apic_access_page);
 		vmx->nested.apic_access_page = NULL;
 	}
 	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
@@ -929,6 +940,57 @@ fail:
 	return i + 1;
 }
 
+static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
+					    u32 msr_index,
+					    u64 *data)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	/*
+	 * If the L0 hypervisor stored a more accurate value for the TSC that
+	 * does not include the time taken for emulation of the L2->L1
+	 * VM-exit in L0, use the more accurate value.
+	 */
+	if (msr_index == MSR_IA32_TSC) {
+		int index = vmx_find_msr_index(&vmx->msr_autostore.guest,
+					       MSR_IA32_TSC);
+
+		if (index >= 0) {
+			u64 val = vmx->msr_autostore.guest.val[index].value;
+
+			*data = kvm_read_l1_tsc(vcpu, val);
+			return true;
+		}
+	}
+
+	if (kvm_get_msr(vcpu, msr_index, data)) {
+		pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
+			msr_index);
+		return false;
+	}
+	return true;
+}
+
+static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
+				     struct vmx_msr_entry *e)
+{
+	if (kvm_vcpu_read_guest(vcpu,
+				gpa + i * sizeof(*e),
+				e, 2 * sizeof(u32))) {
+		pr_debug_ratelimited(
+			"%s cannot read MSR entry (%u, 0x%08llx)\n",
+			__func__, i, gpa + i * sizeof(*e));
+		return false;
+	}
+	if (nested_vmx_store_msr_check(vcpu, e)) {
+		pr_debug_ratelimited(
+			"%s check failed (%u, 0x%x, 0x%x)\n",
+			__func__, i, e->index, e->reserved);
+		return false;
+	}
+	return true;
+}
+
 static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 {
 	u64 data;
@@ -940,26 +1002,12 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 		if (unlikely(i >= max_msr_list_size))
 			return -EINVAL;
 
-		if (kvm_vcpu_read_guest(vcpu,
-					gpa + i * sizeof(e),
-					&e, 2 * sizeof(u32))) {
-			pr_debug_ratelimited(
-				"%s cannot read MSR entry (%u, 0x%08llx)\n",
-				__func__, i, gpa + i * sizeof(e));
+		if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
 			return -EINVAL;
-		}
-		if (nested_vmx_store_msr_check(vcpu, &e)) {
-			pr_debug_ratelimited(
-				"%s check failed (%u, 0x%x, 0x%x)\n",
-				__func__, i, e.index, e.reserved);
-			return -EINVAL;
-		}
-		if (kvm_get_msr(vcpu, e.index, &data)) {
-			pr_debug_ratelimited(
-				"%s cannot read MSR (%u, 0x%x)\n",
-				__func__, i, e.index);
+
+		if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
 			return -EINVAL;
-		}
+
 		if (kvm_vcpu_write_guest(vcpu,
 					 gpa + i * sizeof(e) +
 					     offsetof(struct vmx_msr_entry, value),
@@ -973,6 +1021,60 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 	return 0;
 }
 
+static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
+{
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	u32 count = vmcs12->vm_exit_msr_store_count;
+	u64 gpa = vmcs12->vm_exit_msr_store_addr;
+	struct vmx_msr_entry e;
+	u32 i;
+
+	for (i = 0; i < count; i++) {
+		if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
+			return false;
+
+		if (e.index == msr_index)
+			return true;
+	}
+	return false;
+}
+
+static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
+					   u32 msr_index)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
+	bool in_vmcs12_store_list;
+	int msr_autostore_index;
+	bool in_autostore_list;
+	int last;
+
+	msr_autostore_index = vmx_find_msr_index(autostore, msr_index);
+	in_autostore_list = msr_autostore_index >= 0;
+	in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
+
+	if (in_vmcs12_store_list && !in_autostore_list) {
+		if (autostore->nr == NR_LOADSTORE_MSRS) {
+			/*
+			 * Emulated VMEntry does not fail here.  Instead a less
+			 * accurate value will be returned by
+			 * nested_vmx_get_vmexit_msr_value() using kvm_get_msr()
+			 * instead of reading the value from the vmcs02 VMExit
+			 * MSR-store area.
+			 */
+			pr_warn_ratelimited(
+				"Not enough msr entries in msr_autostore.  Can't add msr %x\n",
+				msr_index);
+			return;
+		}
+		last = autostore->nr++;
+		autostore->val[last].index = msr_index;
+	} else if (!in_vmcs12_store_list && in_autostore_list) {
+		last = --autostore->nr;
+		autostore->val[msr_autostore_index] = autostore->val[last];
+	}
+}
+
 static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	unsigned long invalid_mask;
@@ -1012,7 +1114,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 		kvm_mmu_new_cr3(vcpu, cr3, false);
 
 	vcpu->arch.cr3 = cr3;
-	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
 	kvm_init_mmu(vcpu, false);
 
@@ -1024,7 +1126,9 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
  * populated by L2 differently than TLB entries populated
  * by L1.
  *
- * If L1 uses EPT, then TLB entries are tagged with different EPTP.
+ * If L0 uses EPT, L1 and L2 run with different EPTP because
+ * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries
+ * are tagged with different EPTP.
  *
  * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
  * with different VPID (L1 entries are tagged with vmx->vpid
@@ -1034,7 +1138,7 @@ static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
-	return nested_cpu_has_ept(vmcs12) ||
+	return enable_ept ||
 	       (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
 }
 
@@ -2018,7 +2122,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
 	 * addresses are constant (for vmcs02), the counts can change based
 	 * on L2's behavior, e.g. switching to/from long mode.
 	 */
-	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
 	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
 	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
 
@@ -2073,6 +2177,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	exec_control &= ~CPU_BASED_TPR_SHADOW;
 	exec_control |= vmcs12->cpu_based_vm_exec_control;
 
+	vmx->nested.l1_tpr_threshold = -1;
 	if (exec_control & CPU_BASED_TPR_SHADOW)
 		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
 #ifdef CONFIG_X86_64
@@ -2285,6 +2390,13 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 		vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
 	}
 
+	/*
+	 * Make sure the msr_autostore list is up to date before we set the
+	 * count in the vmcs02.
+	 */
+	prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
+
+	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
 
@@ -2381,9 +2493,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
 	if (nested_cpu_has_ept(vmcs12))
 		nested_ept_init_mmu_context(vcpu);
-	else if (nested_cpu_has2(vmcs12,
-				 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
-		vmx_flush_tlb(vcpu, true);
 
 	/*
 	 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
@@ -2418,6 +2527,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 				entry_failure_code))
 		return -EINVAL;
 
+	/*
+	 * Immediately write vmcs02.GUEST_CR3.  It will be propagated to vmcs12
+	 * on nested VM-Exit, which can occur without actually running L2 and
+	 * thus without hitting vmx_set_cr3(), e.g. if L1 is entering L2 with
+	 * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
+	 * transition to HLT instead of running L2.
+	 */
+	if (enable_ept)
+		vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
+
 	/* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
 	if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
 	    is_pae_paging(vcpu)) {
@@ -2430,6 +2549,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	if (!enable_ept)
 		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
 
+	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+	    SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+			    vmcs12->guest_ia32_perf_global_ctrl))
+		return -EINVAL;
+
 	kvm_rsp_write(vcpu, vmcs12->guest_rsp);
 	kvm_rip_write(vcpu, vmcs12->guest_rip);
 	return 0;
@@ -2664,6 +2788,11 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 	    CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
 		return -EINVAL;
 
+	if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+	    CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
+					   vmcs12->host_ia32_perf_global_ctrl)))
+		return -EINVAL;
+
 #ifdef CONFIG_X86_64
 	ia32e = !!(vcpu->arch.efer & EFER_LMA);
 #else
@@ -2779,6 +2908,11 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 	}
 
+	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+	    CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
+					   vmcs12->guest_ia32_perf_global_ctrl)))
+		return -EINVAL;
+
 	/*
 	 * If the load IA32_EFER VM-entry control is 1, the following checks
 	 * are performed on the field for the IA32_EFER MSR:
@@ -2933,7 +3067,7 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 		 * to it so we can release it later.
 		 */
 		if (vmx->nested.apic_access_page) { /* shouldn't happen */
-			kvm_release_page_dirty(vmx->nested.apic_access_page);
+			kvm_release_page_clean(vmx->nested.apic_access_page);
 			vmx->nested.apic_access_page = NULL;
 		}
 		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
@@ -3461,6 +3595,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 		test_bit(KVM_APIC_INIT, &apic->pending_events)) {
 		if (block_nested_events)
 			return -EBUSY;
+		clear_bit(KVM_APIC_INIT, &apic->pending_events);
 		nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
 		return 0;
 	}
@@ -3864,8 +3999,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 		vcpu->arch.pat = vmcs12->host_ia32_pat;
 	}
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
-		vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
-			vmcs12->host_ia32_perf_global_ctrl);
+		SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+				vmcs12->host_ia32_perf_global_ctrl);
 
 	/* Set L1 segment info according to Intel SDM
 	    27.5.2 Loading Host Segment and Descriptor-Table Registers */
@@ -3984,7 +4119,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
 
 	nested_ept_uninit_mmu_context(vcpu);
 	vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
-	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
 	/*
 	 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
@@ -4112,6 +4247,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
 	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+	if (vmx->nested.l1_tpr_threshold != -1)
+		vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
 
 	if (kvm_has_tsc_control)
 		decache_tsc_multiplier(vmx);
@@ -4119,15 +4256,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	if (vmx->nested.change_vmcs01_virtual_apic_mode) {
 		vmx->nested.change_vmcs01_virtual_apic_mode = false;
 		vmx_set_virtual_apic_mode(vcpu);
-	} else if (!nested_cpu_has_ept(vmcs12) &&
-		   nested_cpu_has2(vmcs12,
-				   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
-		vmx_flush_tlb(vcpu, true);
 	}
 
 	/* Unpin physical memory we referred to in vmcs02 */
 	if (vmx->nested.apic_access_page) {
-		kvm_release_page_dirty(vmx->nested.apic_access_page);
+		kvm_release_page_clean(vmx->nested.apic_access_page);
 		vmx->nested.apic_access_page = NULL;
 	}
 	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
@@ -4327,6 +4460,27 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
 	return 0;
 }
 
+void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx;
+
+	if (!nested_vmx_allowed(vcpu))
+		return;
+
+	vmx = to_vmx(vcpu);
+	if (kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
+		vmx->nested.msrs.entry_ctls_high |=
+				VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+		vmx->nested.msrs.exit_ctls_high |=
+				VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+	} else {
+		vmx->nested.msrs.entry_ctls_high &=
+				~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+		vmx->nested.msrs.exit_ctls_high &=
+				~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+	}
+}
+
 static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
 {
 	gva_t gva;
@@ -4434,8 +4588,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 	gpa_t vmptr;
 	uint32_t revision;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
-		| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+	const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED
+		| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
 
 	/*
 	 * The Intel VMX Instruction Reference lists a bunch of bits that are
@@ -5766,7 +5920,7 @@ error_guest_mode:
 	return ret;
 }
 
-void nested_vmx_vcpu_setup(void)
+void nested_vmx_set_vmcs_shadowing_bitmap(void)
 {
 	if (enable_shadow_vmcs) {
 		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
@@ -6047,23 +6201,23 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
 		init_vmcs_shadow_fields();
 	}
 
-	exit_handlers[EXIT_REASON_VMCLEAR]	= handle_vmclear,
-	exit_handlers[EXIT_REASON_VMLAUNCH]	= handle_vmlaunch,
-	exit_handlers[EXIT_REASON_VMPTRLD]	= handle_vmptrld,
-	exit_handlers[EXIT_REASON_VMPTRST]	= handle_vmptrst,
-	exit_handlers[EXIT_REASON_VMREAD]	= handle_vmread,
-	exit_handlers[EXIT_REASON_VMRESUME]	= handle_vmresume,
-	exit_handlers[EXIT_REASON_VMWRITE]	= handle_vmwrite,
-	exit_handlers[EXIT_REASON_VMOFF]	= handle_vmoff,
-	exit_handlers[EXIT_REASON_VMON]		= handle_vmon,
-	exit_handlers[EXIT_REASON_INVEPT]	= handle_invept,
-	exit_handlers[EXIT_REASON_INVVPID]	= handle_invvpid,
-	exit_handlers[EXIT_REASON_VMFUNC]	= handle_vmfunc,
+	exit_handlers[EXIT_REASON_VMCLEAR]	= handle_vmclear;
+	exit_handlers[EXIT_REASON_VMLAUNCH]	= handle_vmlaunch;
+	exit_handlers[EXIT_REASON_VMPTRLD]	= handle_vmptrld;
+	exit_handlers[EXIT_REASON_VMPTRST]	= handle_vmptrst;
+	exit_handlers[EXIT_REASON_VMREAD]	= handle_vmread;
+	exit_handlers[EXIT_REASON_VMRESUME]	= handle_vmresume;
+	exit_handlers[EXIT_REASON_VMWRITE]	= handle_vmwrite;
+	exit_handlers[EXIT_REASON_VMOFF]	= handle_vmoff;
+	exit_handlers[EXIT_REASON_VMON]		= handle_vmon;
+	exit_handlers[EXIT_REASON_INVEPT]	= handle_invept;
+	exit_handlers[EXIT_REASON_INVVPID]	= handle_invvpid;
+	exit_handlers[EXIT_REASON_VMFUNC]	= handle_vmfunc;
 
 	kvm_x86_ops->check_nested_events = vmx_check_nested_events;
 	kvm_x86_ops->get_nested_state = vmx_get_nested_state;
 	kvm_x86_ops->set_nested_state = vmx_set_nested_state;
-	kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages,
+	kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages;
 	kvm_x86_ops->nested_enable_evmcs = nested_enable_evmcs;
 	kvm_x86_ops->nested_get_evmcs_version = nested_get_evmcs_version;
 
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 6280f33e5fa6..fc874d4ead0f 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -21,7 +21,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
 				bool apicv);
 void nested_vmx_hardware_unsetup(void);
 __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
-void nested_vmx_vcpu_setup(void);
+void nested_vmx_set_vmcs_shadowing_bitmap(void);
 void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
 enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 						     bool from_vmentry);
@@ -33,6 +33,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
 int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
 			u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
+void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
 
 static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
 {
@@ -256,7 +257,7 @@ static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
 	return ((val & fixed1) | fixed0) == val;
 }
 
-static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
 	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
@@ -270,7 +271,7 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 	return fixed_bits_valid(val, fixed0, fixed1);
 }
 
-static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
 	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
@@ -278,7 +279,7 @@ static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 	return fixed_bits_valid(val, fixed0, fixed1);
 }
 
-static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
 	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 3e9c059099e9..7023138b1cb0 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -15,6 +15,7 @@
 #include "x86.h"
 #include "cpuid.h"
 #include "lapic.h"
+#include "nested.h"
 #include "pmu.h"
 
 static struct kvm_event_hw_type_mapping intel_arch_events[] = {
@@ -46,6 +47,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 		if (old_ctrl == new_ctrl)
 			continue;
 
+		__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
 		reprogram_fixed_counter(pmc, new_ctrl, i);
 	}
 
@@ -111,7 +113,7 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
 }
 
 /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	bool fixed = idx & (1u << 30);
@@ -122,8 +124,8 @@ static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
 		(fixed && idx >= pmu->nr_arch_fixed_counters);
 }
 
-static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu,
-					    unsigned idx, u64 *mask)
+static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
+					    unsigned int idx, u64 *mask)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	bool fixed = idx & (1u << 30);
@@ -162,6 +164,18 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 	return ret;
 }
 
+static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc;
+
+	pmc = get_fixed_pmc(pmu, msr);
+	pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0);
+	pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0);
+
+	return pmc;
+}
+
 static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -223,7 +237,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_CORE_PERF_GLOBAL_CTRL:
 		if (pmu->global_ctrl == data)
 			return 0;
-		if (!(data & pmu->global_ctrl_mask)) {
+		if (kvm_valid_perf_global_ctrl(pmu, data)) {
 			global_ctrl_changed(pmu, data);
 			return 0;
 		}
@@ -317,6 +331,13 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	    (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
 	    (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
 		pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
+
+	bitmap_set(pmu->all_valid_pmc_idx,
+		0, pmu->nr_arch_gp_counters);
+	bitmap_set(pmu->all_valid_pmc_idx,
+		INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
+
+	nested_vmx_pmu_entry_exit_ctls_update(vcpu);
 }
 
 static void intel_pmu_init(struct kvm_vcpu *vcpu)
@@ -328,12 +349,14 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
 		pmu->gp_counters[i].type = KVM_PMC_GP;
 		pmu->gp_counters[i].vcpu = vcpu;
 		pmu->gp_counters[i].idx = i;
+		pmu->gp_counters[i].current_config = 0;
 	}
 
 	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
 		pmu->fixed_counters[i].type = KVM_PMC_FIXED;
 		pmu->fixed_counters[i].vcpu = vcpu;
 		pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
+		pmu->fixed_counters[i].current_config = 0;
 	}
 }
 
@@ -366,8 +389,9 @@ struct kvm_pmu_ops intel_pmu_ops = {
 	.find_fixed_event = intel_find_fixed_event,
 	.pmc_is_enabled = intel_pmc_is_enabled,
 	.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
+	.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
 	.msr_idx_to_pmc = intel_msr_idx_to_pmc,
-	.is_valid_msr_idx = intel_is_valid_msr_idx,
+	.is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
 	.is_valid_msr = intel_is_valid_msr,
 	.get_msr = intel_pmu_get_msr,
 	.set_msr = intel_pmu_set_msr,
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 751a384c2eb0..81ada2ce99e7 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -43,7 +43,7 @@
  * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
  * to vmx_vmexit.
  */
-ENTRY(vmx_vmenter)
+SYM_FUNC_START(vmx_vmenter)
 	/* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
 	je 2f
 
@@ -65,7 +65,7 @@ ENTRY(vmx_vmenter)
 	_ASM_EXTABLE(1b, 5b)
 	_ASM_EXTABLE(2b, 5b)
 
-ENDPROC(vmx_vmenter)
+SYM_FUNC_END(vmx_vmenter)
 
 /**
  * vmx_vmexit - Handle a VMX VM-Exit
@@ -77,7 +77,7 @@ ENDPROC(vmx_vmenter)
  * here after hardware loads the host's state, i.e. this is the destination
  * referred to by VMCS.HOST_RIP.
  */
-ENTRY(vmx_vmexit)
+SYM_FUNC_START(vmx_vmexit)
 #ifdef CONFIG_RETPOLINE
 	ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
 	/* Preserve guest's RAX, it's used to stuff the RSB. */
@@ -90,7 +90,7 @@ ENTRY(vmx_vmexit)
 .Lvmexit_skip_rsb:
 #endif
 	ret
-ENDPROC(vmx_vmexit)
+SYM_FUNC_END(vmx_vmexit)
 
 /**
  * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
@@ -101,7 +101,7 @@ ENDPROC(vmx_vmexit)
  * Returns:
  *	0 on VM-Exit, 1 on VM-Fail
  */
-ENTRY(__vmx_vcpu_run)
+SYM_FUNC_START(__vmx_vcpu_run)
 	push %_ASM_BP
 	mov  %_ASM_SP, %_ASM_BP
 #ifdef CONFIG_X86_64
@@ -233,4 +233,4 @@ ENTRY(__vmx_vcpu_run)
 	/* VM-Fail.  Out-of-line to avoid a taken Jcc after VM-Exit. */
 2:	mov $1, %eax
 	jmp 1b
-ENDPROC(__vmx_vcpu_run)
+SYM_FUNC_END(__vmx_vcpu_run)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 04a8212704c1..cdb4bf50ee14 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -106,8 +106,6 @@ module_param(enable_apicv, bool, S_IRUGO);
 static bool __read_mostly nested = 1;
 module_param(nested, bool, S_IRUGO);
 
-static u64 __read_mostly host_xss;
-
 bool __read_mostly enable_pml = 1;
 module_param_named(pml, enable_pml, bool, S_IRUGO);
 
@@ -450,6 +448,7 @@ const u32 vmx_msr_index[] = {
 	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
 #endif
 	MSR_EFER, MSR_TSC_AUX, MSR_STAR,
+	MSR_IA32_TSX_CTRL,
 };
 
 #if IS_ENABLED(CONFIG_HYPERV)
@@ -638,6 +637,23 @@ struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
 	return NULL;
 }
 
+static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data)
+{
+	int ret = 0;
+
+	u64 old_msr_data = msr->data;
+	msr->data = data;
+	if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
+		preempt_disable();
+		ret = kvm_set_shared_msr(msr->index, msr->data,
+					 msr->mask);
+		preempt_enable();
+		if (ret)
+			msr->data = old_msr_data;
+	}
+	return ret;
+}
+
 void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
 {
 	vmcs_clear(loaded_vmcs->vmcs);
@@ -726,8 +742,8 @@ static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
 	bool ret;
 	u32 mask = 1 << (seg * SEG_FIELD_NR + field);
 
-	if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
-		vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
+	if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) {
+		kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS);
 		vmx->segment_cache.bitmask = 0;
 	}
 	ret = vmx->segment_cache.bitmask & mask;
@@ -835,7 +851,7 @@ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
 	vm_exit_controls_clearbit(vmx, exit);
 }
 
-static int find_msr(struct vmx_msrs *m, unsigned int msr)
+int vmx_find_msr_index(struct vmx_msrs *m, u32 msr)
 {
 	unsigned int i;
 
@@ -869,7 +885,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 		}
 		break;
 	}
-	i = find_msr(&m->guest, msr);
+	i = vmx_find_msr_index(&m->guest, msr);
 	if (i < 0)
 		goto skip_guest;
 	--m->guest.nr;
@@ -877,7 +893,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
 
 skip_guest:
-	i = find_msr(&m->host, msr);
+	i = vmx_find_msr_index(&m->host, msr);
 	if (i < 0)
 		return;
 
@@ -936,12 +952,12 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 	}
 
-	i = find_msr(&m->guest, msr);
+	i = vmx_find_msr_index(&m->guest, msr);
 	if (!entry_only)
-		j = find_msr(&m->host, msr);
+		j = vmx_find_msr_index(&m->host, msr);
 
-	if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) ||
-		(j < 0 &&  m->host.nr == NR_AUTOLOAD_MSRS)) {
+	if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) ||
+		(j < 0 &&  m->host.nr == NR_LOADSTORE_MSRS)) {
 		printk_once(KERN_WARNING "Not enough msr switch entries. "
 				"Can't add msr %x\n", msr);
 		return;
@@ -1352,14 +1368,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
 			    (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
 		vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
 
-		/*
-		 * VM exits change the host TR limit to 0x67 after a VM
-		 * exit.  This is okay, since 0x67 covers everything except
-		 * the IO bitmap and have have code to handle the IO bitmap
-		 * being lost after a VM exit.
-		 */
-		BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67);
-
 		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
 
@@ -1418,35 +1426,44 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
 
 unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long rflags, save_rflags;
 
-	if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
-		__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) {
+		kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
 		rflags = vmcs_readl(GUEST_RFLAGS);
-		if (to_vmx(vcpu)->rmode.vm86_active) {
+		if (vmx->rmode.vm86_active) {
 			rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
-			save_rflags = to_vmx(vcpu)->rmode.save_rflags;
+			save_rflags = vmx->rmode.save_rflags;
 			rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
 		}
-		to_vmx(vcpu)->rflags = rflags;
+		vmx->rflags = rflags;
 	}
-	return to_vmx(vcpu)->rflags;
+	return vmx->rflags;
 }
 
 void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
-	unsigned long old_rflags = vmx_get_rflags(vcpu);
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	unsigned long old_rflags;
 
-	__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
-	to_vmx(vcpu)->rflags = rflags;
-	if (to_vmx(vcpu)->rmode.vm86_active) {
-		to_vmx(vcpu)->rmode.save_rflags = rflags;
+	if (enable_unrestricted_guest) {
+		kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
+		vmx->rflags = rflags;
+		vmcs_writel(GUEST_RFLAGS, rflags);
+		return;
+	}
+
+	old_rflags = vmx_get_rflags(vcpu);
+	vmx->rflags = rflags;
+	if (vmx->rmode.vm86_active) {
+		vmx->rmode.save_rflags = rflags;
 		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
 	}
 	vmcs_writel(GUEST_RFLAGS, rflags);
 
-	if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
-		to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
+	if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
+		vmx->emulation_required = emulation_required(vcpu);
 }
 
 u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
@@ -1683,6 +1700,9 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 	index = __find_msr_index(vmx, MSR_TSC_AUX);
 	if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
 		move_msr_up(vmx, index, save_nmsrs++);
+	index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL);
+	if (index >= 0)
+		move_msr_up(vmx, index, save_nmsrs++);
 
 	vmx->save_nmsrs = save_nmsrs;
 	vmx->guest_msrs_ready = false;
@@ -1782,6 +1802,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 #endif
 	case MSR_EFER:
 		return kvm_get_msr_common(vcpu, msr_info);
+	case MSR_IA32_TSX_CTRL:
+		if (!msr_info->host_initiated &&
+		    !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
+			return 1;
+		goto find_shared_msr;
 	case MSR_IA32_UMWAIT_CONTROL:
 		if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
 			return 1;
@@ -1814,11 +1839,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_MCG_EXT_CTL:
 		if (!msr_info->host_initiated &&
 		    !(vmx->msr_ia32_feature_control &
-		      FEATURE_CONTROL_LMCE))
+		      FEAT_CTL_LMCE_ENABLED))
 			return 1;
 		msr_info->data = vcpu->arch.mcg_ext_ctl;
 		break;
-	case MSR_IA32_FEATURE_CONTROL:
+	case MSR_IA32_FEAT_CTL:
 		msr_info->data = vmx->msr_ia32_feature_control;
 		break;
 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
@@ -1826,14 +1851,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
 				       &msr_info->data);
-	case MSR_IA32_XSS:
-		if (!vmx_xsaves_supported() ||
-		    (!msr_info->host_initiated &&
-		     !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-		       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
-			return 1;
-		msr_info->data = vcpu->arch.ia32_xss;
-		break;
 	case MSR_IA32_RTIT_CTL:
 		if (pt_mode != PT_MODE_HOST_GUEST)
 			return 1;
@@ -1884,8 +1901,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
 			return 1;
-		/* Else, falls through */
+		goto find_shared_msr;
 	default:
+	find_shared_msr:
 		msr = find_msr_entry(vmx, msr_info->index);
 		if (msr) {
 			msr_info->data = msr->data;
@@ -2001,6 +2019,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 					      MSR_IA32_SPEC_CTRL,
 					      MSR_TYPE_RW);
 		break;
+	case MSR_IA32_TSX_CTRL:
+		if (!msr_info->host_initiated &&
+		    !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
+			return 1;
+		if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
+			return 1;
+		goto find_shared_msr;
 	case MSR_IA32_PRED_CMD:
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -2049,15 +2074,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_MCG_EXT_CTL:
 		if ((!msr_info->host_initiated &&
 		     !(to_vmx(vcpu)->msr_ia32_feature_control &
-		       FEATURE_CONTROL_LMCE)) ||
+		       FEAT_CTL_LMCE_ENABLED)) ||
 		    (data & ~MCG_EXT_CTL_LMCE_EN))
 			return 1;
 		vcpu->arch.mcg_ext_ctl = data;
 		break;
-	case MSR_IA32_FEATURE_CONTROL:
+	case MSR_IA32_FEAT_CTL:
 		if (!vmx_feature_control_msr_valid(vcpu, data) ||
 		    (to_vmx(vcpu)->msr_ia32_feature_control &
-		     FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
+		     FEAT_CTL_LOCKED && !msr_info->host_initiated))
 			return 1;
 		vmx->msr_ia32_feature_control = data;
 		if (msr_info->host_initiated && data == 0)
@@ -2069,25 +2094,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (!nested_vmx_allowed(vcpu))
 			return 1;
 		return vmx_set_vmx_msr(vcpu, msr_index, data);
-	case MSR_IA32_XSS:
-		if (!vmx_xsaves_supported() ||
-		    (!msr_info->host_initiated &&
-		     !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-		       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
-			return 1;
-		/*
-		 * The only supported bit as of Skylake is bit 8, but
-		 * it is not supported on KVM.
-		 */
-		if (data != 0)
-			return 1;
-		vcpu->arch.ia32_xss = data;
-		if (vcpu->arch.ia32_xss != host_xss)
-			add_atomic_switch_msr(vmx, MSR_IA32_XSS,
-				vcpu->arch.ia32_xss, host_xss, false);
-		else
-			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
-		break;
 	case MSR_IA32_RTIT_CTL:
 		if ((pt_mode != PT_MODE_HOST_GUEST) ||
 			vmx_rtit_ctl_check(vcpu, data) ||
@@ -2152,23 +2158,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		/* Check reserved bit, higher 32 bits should be zero */
 		if ((data >> 32) != 0)
 			return 1;
-		/* Else, falls through */
+		goto find_shared_msr;
+
 	default:
+	find_shared_msr:
 		msr = find_msr_entry(vmx, msr_index);
-		if (msr) {
-			u64 old_msr_data = msr->data;
-			msr->data = data;
-			if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
-				preempt_disable();
-				ret = kvm_set_shared_msr(msr->index, msr->data,
-							 msr->mask);
-				preempt_enable();
-				if (ret)
-					msr->data = old_msr_data;
-			}
-			break;
-		}
-		ret = kvm_set_msr_common(vcpu, msr_info);
+		if (msr)
+			ret = vmx_set_guest_msr(vmx, msr, data);
+		else
+			ret = kvm_set_msr_common(vcpu, msr_info);
 	}
 
 	return ret;
@@ -2176,7 +2174,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 {
-	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+	kvm_register_mark_available(vcpu, reg);
+
 	switch (reg) {
 	case VCPU_REGS_RSP:
 		vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
@@ -2188,7 +2187,12 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		if (enable_ept)
 			ept_save_pdptrs(vcpu);
 		break;
+	case VCPU_EXREG_CR3:
+		if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
+			vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+		break;
 	default:
+		WARN_ON_ONCE(1);
 		break;
 	}
 }
@@ -2200,29 +2204,8 @@ static __init int cpu_has_kvm_support(void)
 
 static __init int vmx_disabled_by_bios(void)
 {
-	u64 msr;
-
-	rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
-	if (msr & FEATURE_CONTROL_LOCKED) {
-		/* launched w/ TXT and VMX disabled */
-		if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
-			&& tboot_enabled())
-			return 1;
-		/* launched w/o TXT and VMX only enabled w/ TXT */
-		if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
-			&& (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
-			&& !tboot_enabled()) {
-			printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
-				"activate TXT before enabling KVM\n");
-			return 1;
-		}
-		/* launched w/o TXT and VMX disabled */
-		if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
-			&& !tboot_enabled())
-			return 1;
-	}
-
-	return 0;
+	return !boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
+	       !boot_cpu_has(X86_FEATURE_VMX);
 }
 
 static void kvm_cpu_vmxon(u64 addr)
@@ -2237,7 +2220,6 @@ static int hardware_enable(void)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
-	u64 old, test_bits;
 
 	if (cr4_read_shadow() & X86_CR4_VMXE)
 		return -EBUSY;
@@ -2265,17 +2247,6 @@ static int hardware_enable(void)
 	 */
 	crash_enable_local_vmclear(cpu);
 
-	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
-
-	test_bits = FEATURE_CONTROL_LOCKED;
-	test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
-	if (tboot_enabled())
-		test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
-
-	if ((old & test_bits) != test_bits) {
-		/* enable and lock */
-		wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
-	}
 	kvm_cpu_vmxon(phys_addr);
 	if (enable_ept)
 		ept_sync_global();
@@ -2859,13 +2830,6 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 	vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
 }
 
-static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
-{
-	if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
-		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
-	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
-}
-
 static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 {
 	ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
@@ -2878,8 +2842,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
 
-	if (!test_bit(VCPU_EXREG_PDPTR,
-		      (unsigned long *)&vcpu->arch.regs_dirty))
+	if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR))
 		return;
 
 	if (is_pae_paging(vcpu)) {
@@ -2901,10 +2864,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 		mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
 	}
 
-	__set_bit(VCPU_EXREG_PDPTR,
-		  (unsigned long *)&vcpu->arch.regs_avail);
-	__set_bit(VCPU_EXREG_PDPTR,
-		  (unsigned long *)&vcpu->arch.regs_dirty);
+	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
 }
 
 static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
@@ -2913,8 +2873,8 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
-		vmx_decache_cr3(vcpu);
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
+		vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
 	if (!(cr0 & X86_CR0_PG)) {
 		/* From paging/starting to nonpaging */
 		exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
@@ -2995,6 +2955,7 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
 void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
 	struct kvm *kvm = vcpu->kvm;
+	bool update_guest_cr3 = true;
 	unsigned long guest_cr3;
 	u64 eptp;
 
@@ -3011,15 +2972,20 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 			spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
 		}
 
-		if (enable_unrestricted_guest || is_paging(vcpu) ||
-		    is_guest_mode(vcpu))
-			guest_cr3 = kvm_read_cr3(vcpu);
-		else
+		/* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */
+		if (is_guest_mode(vcpu))
+			update_guest_cr3 = false;
+		else if (!enable_unrestricted_guest && !is_paging(vcpu))
 			guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
+		else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+			guest_cr3 = vcpu->arch.cr3;
+		else /* vmcs01.GUEST_CR3 is already up-to-date. */
+			update_guest_cr3 = false;
 		ept_load_pdptrs(vcpu);
 	}
 
-	vmcs_writel(GUEST_CR3, guest_cr3);
+	if (update_guest_cr3)
+		vmcs_writel(GUEST_CR3, guest_cr3);
 }
 
 int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -3753,7 +3719,7 @@ void pt_update_intercept_for_msr(struct vcpu_vmx *vmx)
 	}
 }
 
-static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
+static bool vmx_get_enable_apicv(struct kvm *kvm)
 {
 	return enable_apicv;
 }
@@ -4046,6 +4012,8 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 			guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
 			guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
 
+		vcpu->arch.xsaves_enabled = xsaves_enabled;
+
 		if (!xsaves_enabled)
 			exec_control &= ~SECONDARY_EXEC_XSAVES;
 
@@ -4158,14 +4126,13 @@ static void ept_set_mmio_spte_mask(void)
 #define VMX_XSS_EXIT_BITMAP 0
 
 /*
- * Sets up the vmcs for emulated real mode.
+ * Noting that the initialization of Guest-state Area of VMCS is in
+ * vmx_vcpu_reset().
  */
-static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
+static void init_vmcs(struct vcpu_vmx *vmx)
 {
-	int i;
-
 	if (nested)
-		nested_vmx_vcpu_setup();
+		nested_vmx_set_vmcs_shadowing_bitmap();
 
 	if (cpu_has_vmx_msr_bitmap())
 		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
@@ -4174,7 +4141,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	/* Control */
 	pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
-	vmx->hv_deadline_tsc = -1;
 
 	exec_controls_set(vmx, vmx_exec_control(vmx));
 
@@ -4223,21 +4189,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 
-	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
-		u32 index = vmx_msr_index[i];
-		u32 data_low, data_high;
-		int j = vmx->nmsrs;
-
-		if (rdmsr_safe(index, &data_low, &data_high) < 0)
-			continue;
-		if (wrmsr_safe(index, data_low, data_high) < 0)
-			continue;
-		vmx->guest_msrs[j].index = i;
-		vmx->guest_msrs[j].data = 0;
-		vmx->guest_msrs[j].mask = -1ull;
-		++vmx->nmsrs;
-	}
-
 	vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
 
 	/* 22.2.1, 20.8.1 */
@@ -4248,6 +4199,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	set_cr4_guest_host_mask(vmx);
 
+	if (vmx->vpid != 0)
+		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+
 	if (vmx_xsaves_supported())
 		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
 
@@ -4350,9 +4304,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
-	if (vmx->vpid != 0)
-		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
-
 	cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
 	vmx->vcpu.arch.cr0 = cr0;
 	vmx_set_cr0(vcpu, cr0); /* enter rmode */
@@ -4707,7 +4658,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.irq_exits;
 	return 1;
@@ -4979,21 +4930,6 @@ static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
 	vmcs_writel(GUEST_DR7, val);
 }
 
-static int handle_cpuid(struct kvm_vcpu *vcpu)
-{
-	return kvm_emulate_cpuid(vcpu);
-}
-
-static int handle_rdmsr(struct kvm_vcpu *vcpu)
-{
-	return kvm_emulate_rdmsr(vcpu);
-}
-
-static int handle_wrmsr(struct kvm_vcpu *vcpu)
-{
-	return kvm_emulate_wrmsr(vcpu);
-}
-
 static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
 {
 	kvm_apic_update_ppr(vcpu);
@@ -5010,11 +4946,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-static int handle_halt(struct kvm_vcpu *vcpu)
-{
-	return kvm_emulate_halt(vcpu);
-}
-
 static int handle_vmcall(struct kvm_vcpu *vcpu)
 {
 	return kvm_emulate_hypercall(vcpu);
@@ -5562,11 +5493,11 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_IO_INSTRUCTION]          = handle_io,
 	[EXIT_REASON_CR_ACCESS]               = handle_cr,
 	[EXIT_REASON_DR_ACCESS]               = handle_dr,
-	[EXIT_REASON_CPUID]                   = handle_cpuid,
-	[EXIT_REASON_MSR_READ]                = handle_rdmsr,
-	[EXIT_REASON_MSR_WRITE]               = handle_wrmsr,
+	[EXIT_REASON_CPUID]                   = kvm_emulate_cpuid,
+	[EXIT_REASON_MSR_READ]                = kvm_emulate_rdmsr,
+	[EXIT_REASON_MSR_WRITE]               = kvm_emulate_wrmsr,
 	[EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
-	[EXIT_REASON_HLT]                     = handle_halt,
+	[EXIT_REASON_HLT]                     = kvm_emulate_halt,
 	[EXIT_REASON_INVD]		      = handle_invd,
 	[EXIT_REASON_INVLPG]		      = handle_invlpg,
 	[EXIT_REASON_RDPMC]                   = handle_rdpmc,
@@ -5939,9 +5870,23 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	}
 
 	if (exit_reason < kvm_vmx_max_exit_handlers
-	    && kvm_vmx_exit_handlers[exit_reason])
+	    && kvm_vmx_exit_handlers[exit_reason]) {
+#ifdef CONFIG_RETPOLINE
+		if (exit_reason == EXIT_REASON_MSR_WRITE)
+			return kvm_emulate_wrmsr(vcpu);
+		else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER)
+			return handle_preemption_timer(vcpu);
+		else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT)
+			return handle_interrupt_window(vcpu);
+		else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
+			return handle_external_interrupt(vcpu);
+		else if (exit_reason == EXIT_REASON_HLT)
+			return kvm_emulate_halt(vcpu);
+		else if (exit_reason == EXIT_REASON_EPT_MISCONFIG)
+			return handle_ept_misconfig(vcpu);
+#endif
 		return kvm_vmx_exit_handlers[exit_reason](vcpu);
-	else {
+	} else {
 		vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
 				exit_reason);
 		dump_vmcs();
@@ -6027,17 +5972,17 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	int tpr_threshold;
 
 	if (is_guest_mode(vcpu) &&
 		nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
 		return;
 
-	if (irr == -1 || tpr < irr) {
-		vmcs_write32(TPR_THRESHOLD, 0);
-		return;
-	}
-
-	vmcs_write32(TPR_THRESHOLD, irr);
+	tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr;
+	if (is_guest_mode(vcpu))
+		to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold;
+	else
+		vmcs_write32(TPR_THRESHOLD, tpr_threshold);
 }
 
 void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
@@ -6514,9 +6459,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (vmx->nested.need_vmcs12_to_shadow_sync)
 		nested_sync_vmcs12_to_shadow(vcpu);
 
-	if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
+	if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
 		vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
-	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
+	if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
 	cr3 = __get_current_cr3_fast();
@@ -6539,7 +6484,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		vmx_set_interrupt_shadow(vcpu, 0);
 
-	kvm_load_guest_xcr0(vcpu);
+	kvm_load_guest_xsave_state(vcpu);
 
 	if (static_cpu_has(X86_FEATURE_PKU) &&
 	    kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
@@ -6646,7 +6591,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 			__write_pkru(vmx->host_pkru);
 	}
 
-	kvm_put_guest_xcr0(vcpu);
+	kvm_load_host_xsave_state(vcpu);
 
 	vmx->nested.nested_run_pending = 0;
 	vmx->idt_vectoring_info = 0;
@@ -6688,7 +6633,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 	free_vpid(vmx->vpid);
 	nested_vmx_free_vcpu(vcpu);
 	free_loaded_vmcs(vmx->loaded_vmcs);
-	kfree(vmx->guest_msrs);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
 	kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
@@ -6700,7 +6644,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	int err;
 	struct vcpu_vmx *vmx;
 	unsigned long *msr_bitmap;
-	int cpu;
+	int i, cpu;
 
 	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
 		"struct kvm_vcpu must be at offset 0 for arch usercopy region");
@@ -6745,16 +6689,39 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 			goto uninit_vcpu;
 	}
 
-	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
-	BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
-		     > PAGE_SIZE);
+	BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS);
 
-	if (!vmx->guest_msrs)
-		goto free_pml;
+	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
+		u32 index = vmx_msr_index[i];
+		u32 data_low, data_high;
+		int j = vmx->nmsrs;
+
+		if (rdmsr_safe(index, &data_low, &data_high) < 0)
+			continue;
+		if (wrmsr_safe(index, data_low, data_high) < 0)
+			continue;
+
+		vmx->guest_msrs[j].index = i;
+		vmx->guest_msrs[j].data = 0;
+		switch (index) {
+		case MSR_IA32_TSX_CTRL:
+			/*
+			 * No need to pass TSX_CTRL_CPUID_CLEAR through, so
+			 * let's avoid changing CPUID bits under the host
+			 * kernel's feet.
+			 */
+			vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+			break;
+		default:
+			vmx->guest_msrs[j].mask = -1ull;
+			break;
+		}
+		++vmx->nmsrs;
+	}
 
 	err = alloc_loaded_vmcs(&vmx->vmcs01);
 	if (err < 0)
-		goto free_msrs;
+		goto free_pml;
 
 	msr_bitmap = vmx->vmcs01.msr_bitmap;
 	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
@@ -6776,7 +6743,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	cpu = get_cpu();
 	vmx_vcpu_load(&vmx->vcpu, cpu);
 	vmx->vcpu.cpu = cpu;
-	vmx_vcpu_setup(vmx);
+	init_vmcs(vmx);
 	vmx_vcpu_put(&vmx->vcpu);
 	put_cpu();
 	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
@@ -6801,7 +6768,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	vmx->nested.posted_intr_nv = -1;
 	vmx->nested.current_vmptr = -1ull;
 
-	vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
+	vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
 
 	/*
 	 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
@@ -6816,8 +6783,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
 free_vmcs:
 	free_loaded_vmcs(vmx->loaded_vmcs);
-free_msrs:
-	kfree(vmx->guest_msrs);
 free_pml:
 	vmx_destroy_pml_buffer(vmx);
 uninit_vcpu:
@@ -6873,6 +6838,12 @@ static int __init vmx_check_processor_compat(void)
 	struct vmcs_config vmcs_conf;
 	struct vmx_capability vmx_cap;
 
+	if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
+	    !this_cpu_has(X86_FEATURE_VMX)) {
+		pr_err("kvm: VMX is disabled on CPU %d\n", smp_processor_id());
+		return -EIO;
+	}
+
 	if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
 		return -EIO;
 	if (nested)
@@ -6996,6 +6967,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
 	cr4_fixed1_update(X86_CR4_SMAP,       ebx, bit(X86_FEATURE_SMAP));
 	cr4_fixed1_update(X86_CR4_PKE,        ecx, bit(X86_FEATURE_PKU));
 	cr4_fixed1_update(X86_CR4_UMIP,       ecx, bit(X86_FEATURE_UMIP));
+	cr4_fixed1_update(X86_CR4_LA57,       ecx, bit(X86_FEATURE_LA57));
 
 #undef cr4_fixed1_update
 }
@@ -7090,6 +7062,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	/* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
+	vcpu->arch.xsaves_enabled = false;
+
 	if (cpu_has_secondary_exec_ctrls()) {
 		vmx_compute_secondary_exec_control(vmx);
 		vmcs_set_secondary_exec_control(vmx);
@@ -7097,10 +7072,12 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 
 	if (nested_vmx_allowed(vcpu))
 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
-			FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+			FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
+			FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
 	else
 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
-			~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+			~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
+			  FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
 
 	if (nested_vmx_allowed(vcpu)) {
 		nested_vmx_cr_fixed1_bits_update(vcpu);
@@ -7110,6 +7087,15 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 	if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
 			guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
 		update_intel_pt_cfg(vcpu);
+
+	if (boot_cpu_has(X86_FEATURE_RTM)) {
+		struct shared_msr_entry *msr;
+		msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL);
+		if (msr) {
+			bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM);
+			vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
+		}
+	}
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -7510,10 +7496,10 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.mcg_cap & MCG_LMCE_P)
 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
-			FEATURE_CONTROL_LMCE;
+			FEAT_CTL_LMCE_ENABLED;
 	else
 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
-			~FEATURE_CONTROL_LMCE;
+			~FEAT_CTL_LMCE_ENABLED;
 }
 
 static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
@@ -7598,9 +7584,6 @@ static __init int hardware_setup(void)
 		WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
 	}
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		rdmsrl(MSR_IA32_XSS, host_xss);
-
 	if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
 	    !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
 		enable_vpid = 0;
@@ -7781,7 +7764,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.get_cpl = vmx_get_cpl,
 	.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
 	.decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
-	.decache_cr3 = vmx_decache_cr3,
 	.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
 	.set_cr0 = vmx_set_cr0,
 	.set_cr3 = vmx_set_cr3,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 5a0f34b1e226..7f42cf3dcd70 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -22,11 +22,17 @@ extern u32 get_umwait_control_msr(void);
 
 #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
 
-#define NR_AUTOLOAD_MSRS 8
+#ifdef CONFIG_X86_64
+#define NR_SHARED_MSRS	7
+#else
+#define NR_SHARED_MSRS	4
+#endif
+
+#define NR_LOADSTORE_MSRS 8
 
 struct vmx_msrs {
 	unsigned int		nr;
-	struct vmx_msr_entry	val[NR_AUTOLOAD_MSRS];
+	struct vmx_msr_entry	val[NR_LOADSTORE_MSRS];
 };
 
 struct shared_msr_entry {
@@ -167,6 +173,9 @@ struct nested_vmx {
 	u64 vmcs01_debugctl;
 	u64 vmcs01_guest_bndcfgs;
 
+	/* to migrate it to L1 if L2 writes to L1's CR8 directly */
+	int l1_tpr_threshold;
+
 	u16 vpid02;
 	u16 last_vpid;
 
@@ -203,7 +212,7 @@ struct vcpu_vmx {
 	u32                   idt_vectoring_info;
 	ulong                 rflags;
 
-	struct shared_msr_entry *guest_msrs;
+	struct shared_msr_entry guest_msrs[NR_SHARED_MSRS];
 	int                   nmsrs;
 	int                   save_nmsrs;
 	bool                  guest_msrs_ready;
@@ -230,6 +239,10 @@ struct vcpu_vmx {
 		struct vmx_msrs host;
 	} msr_autoload;
 
+	struct msr_autostore {
+		struct vmx_msrs guest;
+	} msr_autostore;
+
 	struct {
 		int vm86_active;
 		ulong save_rflags;
@@ -276,7 +289,7 @@ struct vcpu_vmx {
 
 	/*
 	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
-	 * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
+	 * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included
 	 * in msr_ia32_feature_control_valid_bits.
 	 */
 	u64 msr_ia32_feature_control;
@@ -334,6 +347,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
 struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr);
 void pt_update_intercept_for_msr(struct vcpu_vmx *vmx);
 void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
+int vmx_find_msr_index(struct vmx_msrs *m, u32 msr);
 
 #define POSTED_INTR_ON  0
 #define POSTED_INTR_SN  1
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d530521f11d..740d3ee42455 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -68,6 +68,7 @@
 #include <asm/mshyperv.h>
 #include <asm/hypervisor.h>
 #include <asm/intel_pt.h>
+#include <asm/emulate_prefix.h>
 #include <clocksource/hyperv_timer.h>
 
 #define CREATE_TRACE_POINTS
@@ -176,6 +177,8 @@ struct kvm_shared_msrs {
 static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
 static struct kvm_shared_msrs __percpu *shared_msrs;
 
+static u64 __read_mostly host_xss;
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "pf_fixed", VCPU_STAT(pf_fixed) },
 	{ "pf_guest", VCPU_STAT(pf_guest) },
@@ -260,23 +263,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
 	}
 }
 
-static void shared_msr_update(unsigned slot, u32 msr)
-{
-	u64 value;
-	unsigned int cpu = smp_processor_id();
-	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
-
-	/* only read, and nobody should modify it at this time,
-	 * so don't need lock */
-	if (slot >= shared_msrs_global.nr) {
-		printk(KERN_ERR "kvm: invalid MSR slot!");
-		return;
-	}
-	rdmsrl_safe(msr, &value);
-	smsr->values[slot].host = value;
-	smsr->values[slot].curr = value;
-}
-
 void kvm_define_shared_msr(unsigned slot, u32 msr)
 {
 	BUG_ON(slot >= KVM_NR_SHARED_MSRS);
@@ -288,10 +274,16 @@ EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
 
 static void kvm_shared_msr_cpu_online(void)
 {
-	unsigned i;
+	unsigned int cpu = smp_processor_id();
+	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+	u64 value;
+	int i;
 
-	for (i = 0; i < shared_msrs_global.nr; ++i)
-		shared_msr_update(i, shared_msrs_global.msrs[i]);
+	for (i = 0; i < shared_msrs_global.nr; ++i) {
+		rdmsrl_safe(shared_msrs_global.msrs[i], &value);
+		smsr->values[i].host = value;
+		smsr->values[i].curr = value;
+	}
 }
 
 int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
@@ -300,13 +292,14 @@ int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
 	int err;
 
-	if (((value ^ smsr->values[slot].curr) & mask) == 0)
+	value = (value & mask) | (smsr->values[slot].host & ~mask);
+	if (value == smsr->values[slot].curr)
 		return 0;
-	smsr->values[slot].curr = value;
 	err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
 	if (err)
 		return 1;
 
+	smsr->values[slot].curr = value;
 	if (!smsr->registered) {
 		smsr->urn.on_user_return = kvm_on_user_return;
 		user_return_notifier_register(&smsr->urn);
@@ -709,10 +702,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 	ret = 1;
 
 	memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
-	__set_bit(VCPU_EXREG_PDPTR,
-		  (unsigned long *)&vcpu->arch.regs_avail);
-	__set_bit(VCPU_EXREG_PDPTR,
-		  (unsigned long *)&vcpu->arch.regs_dirty);
+	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+
 out:
 
 	return ret;
@@ -722,7 +713,6 @@ EXPORT_SYMBOL_GPL(load_pdptrs);
 bool pdptrs_changed(struct kvm_vcpu *vcpu)
 {
 	u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
-	bool changed = true;
 	int offset;
 	gfn_t gfn;
 	int r;
@@ -730,8 +720,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
 	if (!is_pae_paging(vcpu))
 		return false;
 
-	if (!test_bit(VCPU_EXREG_PDPTR,
-		      (unsigned long *)&vcpu->arch.regs_avail))
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
 		return true;
 
 	gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
@@ -739,11 +728,9 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
 	r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
 				       PFERR_USER_MASK | PFERR_WRITE_MASK);
 	if (r < 0)
-		goto out;
-	changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
-out:
+		return true;
 
-	return changed;
+	return memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
 }
 EXPORT_SYMBOL_GPL(pdptrs_changed);
 
@@ -812,27 +799,34 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
-void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
 {
-	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
-			!vcpu->guest_xcr0_loaded) {
-		/* kvm_set_xcr() also depends on this */
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+
 		if (vcpu->arch.xcr0 != host_xcr0)
 			xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
-		vcpu->guest_xcr0_loaded = 1;
+
+		if (vcpu->arch.xsaves_enabled &&
+		    vcpu->arch.ia32_xss != host_xss)
+			wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
 	}
 }
-EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
+EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
 
-void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->guest_xcr0_loaded) {
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+
 		if (vcpu->arch.xcr0 != host_xcr0)
 			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
-		vcpu->guest_xcr0_loaded = 0;
+
+		if (vcpu->arch.xsaves_enabled &&
+		    vcpu->arch.ia32_xss != host_xss)
+			wrmsrl(MSR_IA32_XSS, host_xss);
 	}
+
 }
-EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
+EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
 
 static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 {
@@ -984,7 +978,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 	kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
 	vcpu->arch.cr3 = cr3;
-	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
 	return 0;
 }
@@ -1148,7 +1142,7 @@ static const u32 msrs_to_save_all[] = {
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
-	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
+	MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
 	MSR_IA32_SPEC_CTRL,
 	MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
 	MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
@@ -1313,23 +1307,15 @@ static u64 kvm_get_arch_capabilities(void)
 		data |= ARCH_CAP_MDS_NO;
 
 	/*
-	 * On TAA affected systems, export MDS_NO=0 when:
-	 *	- TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
-	 *	- Updated microcode is present. This is detected by
-	 *	  the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
-	 *	  that VERW clears CPU buffers.
-	 *
-	 * When MDS_NO=0 is exported, guests deploy clear CPU buffer
-	 * mitigation and don't complain:
-	 *
-	 *	"Vulnerable: Clear CPU buffers attempted, no microcode"
-	 *
-	 * If TSX is disabled on the system, guests are also mitigated against
-	 * TAA and clear CPU buffer mitigation is not required for guests.
+	 * On TAA affected systems:
+	 *      - nothing to do if TSX is disabled on the host.
+	 *      - we emulate TSX_CTRL if present on the host.
+	 *	  This lets the guest use VERW to clear CPU buffers.
 	 */
-	if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
-	    (data & ARCH_CAP_TSX_CTRL_MSR))
-		data &= ~ARCH_CAP_MDS_NO;
+	if (!boot_cpu_has(X86_FEATURE_RTM))
+		data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
+	else if (!boot_cpu_has_bug(X86_BUG_TAA))
+		data |= ARCH_CAP_TAA_NO;
 
 	return data;
 }
@@ -1477,8 +1463,8 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
  */
-static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
-			 bool host_initiated)
+int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+		  bool host_initiated)
 {
 	struct msr_data msr;
 	int ret;
@@ -1553,20 +1539,25 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
 }
 
 #ifdef CONFIG_X86_64
+struct pvclock_clock {
+	int vclock_mode;
+	u64 cycle_last;
+	u64 mask;
+	u32 mult;
+	u32 shift;
+};
+
 struct pvclock_gtod_data {
 	seqcount_t	seq;
 
-	struct { /* extract of a clocksource struct */
-		int vclock_mode;
-		u64	cycle_last;
-		u64	mask;
-		u32	mult;
-		u32	shift;
-	} clock;
+	struct pvclock_clock clock; /* extract of a clocksource struct */
+	struct pvclock_clock raw_clock; /* extract of a clocksource struct */
 
+	u64		boot_ns_raw;
 	u64		boot_ns;
 	u64		nsec_base;
 	u64		wall_time_sec;
+	u64		monotonic_raw_nsec;
 };
 
 static struct pvclock_gtod_data pvclock_gtod_data;
@@ -1574,9 +1565,10 @@ static struct pvclock_gtod_data pvclock_gtod_data;
 static void update_pvclock_gtod(struct timekeeper *tk)
 {
 	struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
-	u64 boot_ns;
+	u64 boot_ns, boot_ns_raw;
 
 	boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
+	boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot));
 
 	write_seqcount_begin(&vdata->seq);
 
@@ -1587,11 +1579,20 @@ static void update_pvclock_gtod(struct timekeeper *tk)
 	vdata->clock.mult		= tk->tkr_mono.mult;
 	vdata->clock.shift		= tk->tkr_mono.shift;
 
+	vdata->raw_clock.vclock_mode	= tk->tkr_raw.clock->archdata.vclock_mode;
+	vdata->raw_clock.cycle_last	= tk->tkr_raw.cycle_last;
+	vdata->raw_clock.mask		= tk->tkr_raw.mask;
+	vdata->raw_clock.mult		= tk->tkr_raw.mult;
+	vdata->raw_clock.shift		= tk->tkr_raw.shift;
+
 	vdata->boot_ns			= boot_ns;
 	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
 
 	vdata->wall_time_sec            = tk->xtime_sec;
 
+	vdata->boot_ns_raw		= boot_ns_raw;
+	vdata->monotonic_raw_nsec	= tk->tkr_raw.xtime_nsec;
+
 	write_seqcount_end(&vdata->seq);
 }
 #endif
@@ -2015,21 +2016,21 @@ static u64 read_tsc(void)
 	return last;
 }
 
-static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
+static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
+			  int *mode)
 {
 	long v;
-	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 	u64 tsc_pg_val;
 
-	switch (gtod->clock.vclock_mode) {
+	switch (clock->vclock_mode) {
 	case VCLOCK_HVCLOCK:
 		tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
 						  tsc_timestamp);
 		if (tsc_pg_val != U64_MAX) {
 			/* TSC page valid */
 			*mode = VCLOCK_HVCLOCK;
-			v = (tsc_pg_val - gtod->clock.cycle_last) &
-				gtod->clock.mask;
+			v = (tsc_pg_val - clock->cycle_last) &
+				clock->mask;
 		} else {
 			/* TSC page invalid */
 			*mode = VCLOCK_NONE;
@@ -2038,8 +2039,8 @@ static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
 	case VCLOCK_TSC:
 		*mode = VCLOCK_TSC;
 		*tsc_timestamp = read_tsc();
-		v = (*tsc_timestamp - gtod->clock.cycle_last) &
-			gtod->clock.mask;
+		v = (*tsc_timestamp - clock->cycle_last) &
+			clock->mask;
 		break;
 	default:
 		*mode = VCLOCK_NONE;
@@ -2048,10 +2049,10 @@ static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
 	if (*mode == VCLOCK_NONE)
 		*tsc_timestamp = v = 0;
 
-	return v * gtod->clock.mult;
+	return v * clock->mult;
 }
 
-static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
+static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
 {
 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 	unsigned long seq;
@@ -2060,10 +2061,10 @@ static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
 
 	do {
 		seq = read_seqcount_begin(&gtod->seq);
-		ns = gtod->nsec_base;
-		ns += vgettsc(tsc_timestamp, &mode);
+		ns = gtod->monotonic_raw_nsec;
+		ns += vgettsc(&gtod->raw_clock, tsc_timestamp, &mode);
 		ns >>= gtod->clock.shift;
-		ns += gtod->boot_ns;
+		ns += gtod->boot_ns_raw;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 	*t = ns;
 
@@ -2081,7 +2082,7 @@ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
 		seq = read_seqcount_begin(&gtod->seq);
 		ts->tv_sec = gtod->wall_time_sec;
 		ns = gtod->nsec_base;
-		ns += vgettsc(tsc_timestamp, &mode);
+		ns += vgettsc(&gtod->clock, tsc_timestamp, &mode);
 		ns >>= gtod->clock.shift;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 
@@ -2098,7 +2099,7 @@ static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
 	if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
 		return false;
 
-	return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
+	return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
 						      tsc_timestamp));
 }
 
@@ -2721,6 +2722,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_TSC:
 		kvm_write_tsc(vcpu, msr_info);
 		break;
+	case MSR_IA32_XSS:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+			return 1;
+		/*
+		 * We do support PT if kvm_x86_ops->pt_supported(), but we do
+		 * not support IA32_XSS[bit 8]. Guests will have to use
+		 * RDMSR/WRMSR rather than XSAVES/XRSTORS to save/restore PT
+		 * MSRs.
+		 */
+		if (data != 0)
+			return 1;
+		vcpu->arch.ia32_xss = data;
+		break;
 	case MSR_SMI_COUNT:
 		if (!msr_info->host_initiated)
 			return 1;
@@ -3048,6 +3063,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
 		return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
 				   msr_info->host_initiated);
+	case MSR_IA32_XSS:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+			return 1;
+		msr_info->data = vcpu->arch.ia32_xss;
+		break;
 	case MSR_K7_CLK_CTL:
 		/*
 		 * Provide expected ramp-up count for K7. All other
@@ -3825,12 +3846,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 				vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
 			else
 				vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
-			if (lapic_in_kernel(vcpu)) {
-				if (events->smi.latched_init)
-					set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
-				else
-					clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
-			}
+		}
+
+		if (lapic_in_kernel(vcpu)) {
+			if (events->smi.latched_init)
+				set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+			else
+				clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
 		}
 	}
 
@@ -4421,6 +4443,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	case KVM_SET_NESTED_STATE: {
 		struct kvm_nested_state __user *user_kvm_nested_state = argp;
 		struct kvm_nested_state kvm_state;
+		int idx;
 
 		r = -EINVAL;
 		if (!kvm_x86_ops->set_nested_state)
@@ -4444,7 +4467,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		    && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
 			break;
 
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
 	}
 	case KVM_GET_SUPPORTED_HV_CPUID: {
@@ -4946,9 +4971,6 @@ set_identity_unlock:
 		if (!irqchip_kernel(kvm))
 			goto set_irqchip_out;
 		r = kvm_vm_ioctl_set_irqchip(kvm, chip);
-		if (r)
-			goto set_irqchip_out;
-		r = 0;
 	set_irqchip_out:
 		kfree(chip);
 		break;
@@ -5471,6 +5493,7 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 
 int handle_ud(struct kvm_vcpu *vcpu)
 {
+	static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
 	int emul_type = EMULTYPE_TRAP_UD;
 	char sig[5]; /* ud2; .ascii "kvm" */
 	struct x86_exception e;
@@ -5478,7 +5501,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
 	if (force_emulation_prefix &&
 	    kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
 				sig, sizeof(sig), &e) == 0 &&
-	    memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
+	    memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
 		kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
 		emul_type = EMULTYPE_TRAP_UD_FORCED;
 	}
@@ -6136,7 +6159,7 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
 static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
 			      u32 pmc)
 {
-	return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
+	return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
 }
 
 static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@ -7866,6 +7889,19 @@ static void process_smi(struct kvm_vcpu *vcpu)
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+				       unsigned long *vcpu_bitmap)
+{
+	cpumask_var_t cpus;
+
+	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
+
+	kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
+				    vcpu_bitmap, cpus);
+
+	free_cpumask_var(cpus);
+}
+
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
 	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
@@ -7943,7 +7979,6 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 	 */
 	put_page(page);
 }
-EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
 
 void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
 {
@@ -8702,8 +8737,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	    mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
 		goto out;
 
-	/* INITs are latched while in SMM */
-	if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
+	/*
+	 * KVM_MP_STATE_INIT_RECEIVED means the processor is in
+	 * INIT state; latched init should be reported using
+	 * KVM_SET_VCPU_EVENTS, so reject it here.
+	 */
+	if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
 	    (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
 	     mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
 		goto out;
@@ -8795,7 +8834,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	vcpu->arch.cr2 = sregs->cr2;
 	mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
 	vcpu->arch.cr3 = sregs->cr3;
-	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
 	kvm_set_cr8(vcpu, sregs->cr8);
 
@@ -9322,6 +9361,9 @@ int kvm_arch_hardware_setup(void)
 		kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
 	}
 
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		rdmsrl(MSR_IA32_XSS, host_xss);
+
 	kvm_init_msr_list();
 	return 0;
 }
@@ -9375,7 +9417,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		goto fail_free_pio_data;
 
 	if (irqchip_in_kernel(vcpu->kvm)) {
-		vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
+		vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm);
 		r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
 		if (r < 0)
 			goto fail_mmu_destroy;
@@ -9444,7 +9486,13 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
 	vcpu->arch.l1tf_flush_l1d = true;
+	if (pmu->version && unlikely(pmu->event_count)) {
+		pmu->need_cleanup = true;
+		kvm_make_request(KVM_REQ_PMU, vcpu);
+	}
 	kvm_x86_ops->sched_in(vcpu, cpu);
 }
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index dbf7442a822b..29391af8871d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -238,8 +238,7 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 	return false;
 }
 
-static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu,
-					       enum kvm_reg reg)
+static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, int reg)
 {
 	unsigned long val = kvm_register_read(vcpu, reg);
 
@@ -247,8 +246,7 @@ static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu,
 }
 
 static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
-				       enum kvm_reg reg,
-				       unsigned long val)
+				       int reg, unsigned long val)
 {
 	if (!is_64_bit_mode(vcpu))
 		val = (u32)val;
@@ -260,6 +258,11 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
 	return !(kvm->arch.disabled_quirks & quirk);
 }
 
+static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu)
+{
+	return is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu);
+}
+
 void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 
@@ -366,7 +369,7 @@ static inline bool kvm_pat_valid(u64 data)
 	return (data | ((data & 0x0202020202020202ull) << 1)) == data;
 }
 
-void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
-void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
+void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
+void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index e0788bade5ab..3b6544111ac9 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -20,10 +20,10 @@
 
 #define BEGIN(op) \
 .macro endp; \
-ENDPROC(atomic64_##op##_386); \
+SYM_FUNC_END(atomic64_##op##_386); \
 .purgem endp; \
 .endm; \
-ENTRY(atomic64_##op##_386); \
+SYM_FUNC_START(atomic64_##op##_386); \
 	LOCK v;
 
 #define ENDP endp
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 843d978ee341..1c5c81c16b06 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -16,12 +16,12 @@
 	cmpxchg8b (\reg)
 .endm
 
-ENTRY(atomic64_read_cx8)
+SYM_FUNC_START(atomic64_read_cx8)
 	read64 %ecx
 	ret
-ENDPROC(atomic64_read_cx8)
+SYM_FUNC_END(atomic64_read_cx8)
 
-ENTRY(atomic64_set_cx8)
+SYM_FUNC_START(atomic64_set_cx8)
 1:
 /* we don't need LOCK_PREFIX since aligned 64-bit writes
  * are atomic on 586 and newer */
@@ -29,19 +29,19 @@ ENTRY(atomic64_set_cx8)
 	jne 1b
 
 	ret
-ENDPROC(atomic64_set_cx8)
+SYM_FUNC_END(atomic64_set_cx8)
 
-ENTRY(atomic64_xchg_cx8)
+SYM_FUNC_START(atomic64_xchg_cx8)
 1:
 	LOCK_PREFIX
 	cmpxchg8b (%esi)
 	jne 1b
 
 	ret
-ENDPROC(atomic64_xchg_cx8)
+SYM_FUNC_END(atomic64_xchg_cx8)
 
 .macro addsub_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
+SYM_FUNC_START(atomic64_\func\()_return_cx8)
 	pushl %ebp
 	pushl %ebx
 	pushl %esi
@@ -69,14 +69,14 @@ ENTRY(atomic64_\func\()_return_cx8)
 	popl %ebx
 	popl %ebp
 	ret
-ENDPROC(atomic64_\func\()_return_cx8)
+SYM_FUNC_END(atomic64_\func\()_return_cx8)
 .endm
 
 addsub_return add add adc
 addsub_return sub sub sbb
 
 .macro incdec_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
+SYM_FUNC_START(atomic64_\func\()_return_cx8)
 	pushl %ebx
 
 	read64 %esi
@@ -94,13 +94,13 @@ ENTRY(atomic64_\func\()_return_cx8)
 	movl %ecx, %edx
 	popl %ebx
 	ret
-ENDPROC(atomic64_\func\()_return_cx8)
+SYM_FUNC_END(atomic64_\func\()_return_cx8)
 .endm
 
 incdec_return inc add adc
 incdec_return dec sub sbb
 
-ENTRY(atomic64_dec_if_positive_cx8)
+SYM_FUNC_START(atomic64_dec_if_positive_cx8)
 	pushl %ebx
 
 	read64 %esi
@@ -119,9 +119,9 @@ ENTRY(atomic64_dec_if_positive_cx8)
 	movl %ecx, %edx
 	popl %ebx
 	ret
-ENDPROC(atomic64_dec_if_positive_cx8)
+SYM_FUNC_END(atomic64_dec_if_positive_cx8)
 
-ENTRY(atomic64_add_unless_cx8)
+SYM_FUNC_START(atomic64_add_unless_cx8)
 	pushl %ebp
 	pushl %ebx
 /* these just push these two parameters on the stack */
@@ -155,9 +155,9 @@ ENTRY(atomic64_add_unless_cx8)
 	jne 2b
 	xorl %eax, %eax
 	jmp 3b
-ENDPROC(atomic64_add_unless_cx8)
+SYM_FUNC_END(atomic64_add_unless_cx8)
 
-ENTRY(atomic64_inc_not_zero_cx8)
+SYM_FUNC_START(atomic64_inc_not_zero_cx8)
 	pushl %ebx
 
 	read64 %esi
@@ -177,4 +177,4 @@ ENTRY(atomic64_inc_not_zero_cx8)
 3:
 	popl %ebx
 	ret
-ENDPROC(atomic64_inc_not_zero_cx8)
+SYM_FUNC_END(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4df90c9ea383..4742e8fa7ee7 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -46,7 +46,7 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
 	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
 	   * alignment for the unrolled loop.
 	   */		
-ENTRY(csum_partial)
+SYM_FUNC_START(csum_partial)
 	pushl %esi
 	pushl %ebx
 	movl 20(%esp),%eax	# Function arg: unsigned int sum
@@ -128,13 +128,13 @@ ENTRY(csum_partial)
 	popl %ebx
 	popl %esi
 	ret
-ENDPROC(csum_partial)
+SYM_FUNC_END(csum_partial)
 
 #else
 
 /* Version for PentiumII/PPro */
 
-ENTRY(csum_partial)
+SYM_FUNC_START(csum_partial)
 	pushl %esi
 	pushl %ebx
 	movl 20(%esp),%eax	# Function arg: unsigned int sum
@@ -246,7 +246,7 @@ ENTRY(csum_partial)
 	popl %ebx
 	popl %esi
 	ret
-ENDPROC(csum_partial)
+SYM_FUNC_END(csum_partial)
 				
 #endif
 EXPORT_SYMBOL(csum_partial)
@@ -280,7 +280,7 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
 #define ARGBASE 16		
 #define FP		12
 		
-ENTRY(csum_partial_copy_generic)
+SYM_FUNC_START(csum_partial_copy_generic)
 	subl  $4,%esp	
 	pushl %edi
 	pushl %esi
@@ -398,7 +398,7 @@ DST(	movb %cl, (%edi)	)
 	popl %edi
 	popl %ecx			# equivalent to addl $4,%esp
 	ret	
-ENDPROC(csum_partial_copy_generic)
+SYM_FUNC_END(csum_partial_copy_generic)
 
 #else
 
@@ -416,7 +416,7 @@ ENDPROC(csum_partial_copy_generic)
 
 #define ARGBASE 12
 		
-ENTRY(csum_partial_copy_generic)
+SYM_FUNC_START(csum_partial_copy_generic)
 	pushl %ebx
 	pushl %edi
 	pushl %esi
@@ -483,7 +483,7 @@ DST(	movb %dl, (%edi)         )
 	popl %edi
 	popl %ebx
 	ret
-ENDPROC(csum_partial_copy_generic)
+SYM_FUNC_END(csum_partial_copy_generic)
 				
 #undef ROUND
 #undef ROUND1		
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index 75a5a4515fa7..c4c7dd115953 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -13,15 +13,15 @@
  * Zero a page.
  * %rdi	- page
  */
-ENTRY(clear_page_rep)
+SYM_FUNC_START(clear_page_rep)
 	movl $4096/8,%ecx
 	xorl %eax,%eax
 	rep stosq
 	ret
-ENDPROC(clear_page_rep)
+SYM_FUNC_END(clear_page_rep)
 EXPORT_SYMBOL_GPL(clear_page_rep)
 
-ENTRY(clear_page_orig)
+SYM_FUNC_START(clear_page_orig)
 	xorl   %eax,%eax
 	movl   $4096/64,%ecx
 	.p2align 4
@@ -40,13 +40,13 @@ ENTRY(clear_page_orig)
 	jnz	.Lloop
 	nop
 	ret
-ENDPROC(clear_page_orig)
+SYM_FUNC_END(clear_page_orig)
 EXPORT_SYMBOL_GPL(clear_page_orig)
 
-ENTRY(clear_page_erms)
+SYM_FUNC_START(clear_page_erms)
 	movl $4096,%ecx
 	xorl %eax,%eax
 	rep stosb
 	ret
-ENDPROC(clear_page_erms)
+SYM_FUNC_END(clear_page_erms)
 EXPORT_SYMBOL_GPL(clear_page_erms)
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index d63185698a23..3542502faa3b 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -13,7 +13,7 @@
  * %rcx : high 64 bits of new value
  * %al  : Operation successful
  */
-ENTRY(this_cpu_cmpxchg16b_emu)
+SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
 
 #
 # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
@@ -44,4 +44,4 @@ ENTRY(this_cpu_cmpxchg16b_emu)
 	xor %al,%al
 	ret
 
-ENDPROC(this_cpu_cmpxchg16b_emu)
+SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index 691d80e97488..ca01ed6029f4 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -13,7 +13,7 @@
  * %ebx : low 32 bits of new value
  * %ecx : high 32 bits of new value
  */
-ENTRY(cmpxchg8b_emu)
+SYM_FUNC_START(cmpxchg8b_emu)
 
 #
 # Emulate 'cmpxchg8b (%esi)' on UP except we don't
@@ -42,5 +42,5 @@ ENTRY(cmpxchg8b_emu)
 	popfl
 	ret
 
-ENDPROC(cmpxchg8b_emu)
+SYM_FUNC_END(cmpxchg8b_emu)
 EXPORT_SYMBOL(cmpxchg8b_emu)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index fd2d09afa097..2402d4c489d2 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -13,15 +13,15 @@
  * prefetch distance based on SMP/UP.
  */
 	ALIGN
-ENTRY(copy_page)
+SYM_FUNC_START(copy_page)
 	ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
 	movl	$4096/8, %ecx
 	rep	movsq
 	ret
-ENDPROC(copy_page)
+SYM_FUNC_END(copy_page)
 EXPORT_SYMBOL(copy_page)
 
-ENTRY(copy_page_regs)
+SYM_FUNC_START_LOCAL(copy_page_regs)
 	subq	$2*8,	%rsp
 	movq	%rbx,	(%rsp)
 	movq	%r12,	1*8(%rsp)
@@ -86,4 +86,4 @@ ENTRY(copy_page_regs)
 	movq	1*8(%rsp), %r12
 	addq	$2*8, %rsp
 	ret
-ENDPROC(copy_page_regs)
+SYM_FUNC_END(copy_page_regs)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 86976b55ae74..816f128a6d52 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -53,7 +53,7 @@
  * Output:
  * eax uncopied bytes or 0 if successful.
  */
-ENTRY(copy_user_generic_unrolled)
+SYM_FUNC_START(copy_user_generic_unrolled)
 	ASM_STAC
 	cmpl $8,%edx
 	jb 20f		/* less then 8 bytes, go to byte copy loop */
@@ -136,7 +136,7 @@ ENTRY(copy_user_generic_unrolled)
 	_ASM_EXTABLE_UA(19b, 40b)
 	_ASM_EXTABLE_UA(21b, 50b)
 	_ASM_EXTABLE_UA(22b, 50b)
-ENDPROC(copy_user_generic_unrolled)
+SYM_FUNC_END(copy_user_generic_unrolled)
 EXPORT_SYMBOL(copy_user_generic_unrolled)
 
 /* Some CPUs run faster using the string copy instructions.
@@ -157,7 +157,7 @@ EXPORT_SYMBOL(copy_user_generic_unrolled)
  * Output:
  * eax uncopied bytes or 0 if successful.
  */
-ENTRY(copy_user_generic_string)
+SYM_FUNC_START(copy_user_generic_string)
 	ASM_STAC
 	cmpl $8,%edx
 	jb 2f		/* less than 8 bytes, go to byte copy loop */
@@ -182,7 +182,7 @@ ENTRY(copy_user_generic_string)
 
 	_ASM_EXTABLE_UA(1b, 11b)
 	_ASM_EXTABLE_UA(3b, 12b)
-ENDPROC(copy_user_generic_string)
+SYM_FUNC_END(copy_user_generic_string)
 EXPORT_SYMBOL(copy_user_generic_string)
 
 /*
@@ -197,7 +197,7 @@ EXPORT_SYMBOL(copy_user_generic_string)
  * Output:
  * eax uncopied bytes or 0 if successful.
  */
-ENTRY(copy_user_enhanced_fast_string)
+SYM_FUNC_START(copy_user_enhanced_fast_string)
 	ASM_STAC
 	cmpl $64,%edx
 	jb .L_copy_short_string	/* less then 64 bytes, avoid the costly 'rep' */
@@ -214,7 +214,7 @@ ENTRY(copy_user_enhanced_fast_string)
 	.previous
 
 	_ASM_EXTABLE_UA(1b, 12b)
-ENDPROC(copy_user_enhanced_fast_string)
+SYM_FUNC_END(copy_user_enhanced_fast_string)
 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
 
 /*
@@ -230,8 +230,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
  * Output:
  * eax uncopied bytes or 0 if successful.
  */
-ALIGN;
-.Lcopy_user_handle_tail:
+SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
 	movl %edx,%ecx
 1:	rep movsb
 2:	mov %ecx,%eax
@@ -239,7 +238,7 @@ ALIGN;
 	ret
 
 	_ASM_EXTABLE_UA(1b, 2b)
-END(.Lcopy_user_handle_tail)
+SYM_CODE_END(.Lcopy_user_handle_tail)
 
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
@@ -250,7 +249,7 @@ END(.Lcopy_user_handle_tail)
  *  - Require 8-byte alignment when size is 8 bytes or larger.
  *  - Require 4-byte alignment when size is 4 bytes.
  */
-ENTRY(__copy_user_nocache)
+SYM_FUNC_START(__copy_user_nocache)
 	ASM_STAC
 
 	/* If size is less than 8 bytes, go to 4-byte copy */
@@ -389,5 +388,5 @@ ENTRY(__copy_user_nocache)
 	_ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
 	_ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
 	_ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
-ENDPROC(__copy_user_nocache)
+SYM_FUNC_END(__copy_user_nocache)
 EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index a4a379e79259..3394a8ff7fd0 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -49,7 +49,7 @@
 	.endm
 
 
-ENTRY(csum_partial_copy_generic)
+SYM_FUNC_START(csum_partial_copy_generic)
 	cmpl	$3*64, %edx
 	jle	.Lignore
 
@@ -225,4 +225,4 @@ ENTRY(csum_partial_copy_generic)
 	jz   .Lende
 	movl $-EFAULT, (%rax)
 	jmp .Lende
-ENDPROC(csum_partial_copy_generic)
+SYM_FUNC_END(csum_partial_copy_generic)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 9578eb88fc87..c8a85b512796 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -36,7 +36,7 @@
 #include <asm/export.h>
 
 	.text
-ENTRY(__get_user_1)
+SYM_FUNC_START(__get_user_1)
 	mov PER_CPU_VAR(current_task), %_ASM_DX
 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
@@ -47,10 +47,10 @@ ENTRY(__get_user_1)
 	xor %eax,%eax
 	ASM_CLAC
 	ret
-ENDPROC(__get_user_1)
+SYM_FUNC_END(__get_user_1)
 EXPORT_SYMBOL(__get_user_1)
 
-ENTRY(__get_user_2)
+SYM_FUNC_START(__get_user_2)
 	add $1,%_ASM_AX
 	jc bad_get_user
 	mov PER_CPU_VAR(current_task), %_ASM_DX
@@ -63,10 +63,10 @@ ENTRY(__get_user_2)
 	xor %eax,%eax
 	ASM_CLAC
 	ret
-ENDPROC(__get_user_2)
+SYM_FUNC_END(__get_user_2)
 EXPORT_SYMBOL(__get_user_2)
 
-ENTRY(__get_user_4)
+SYM_FUNC_START(__get_user_4)
 	add $3,%_ASM_AX
 	jc bad_get_user
 	mov PER_CPU_VAR(current_task), %_ASM_DX
@@ -79,10 +79,10 @@ ENTRY(__get_user_4)
 	xor %eax,%eax
 	ASM_CLAC
 	ret
-ENDPROC(__get_user_4)
+SYM_FUNC_END(__get_user_4)
 EXPORT_SYMBOL(__get_user_4)
 
-ENTRY(__get_user_8)
+SYM_FUNC_START(__get_user_8)
 #ifdef CONFIG_X86_64
 	add $7,%_ASM_AX
 	jc bad_get_user
@@ -111,25 +111,27 @@ ENTRY(__get_user_8)
 	ASM_CLAC
 	ret
 #endif
-ENDPROC(__get_user_8)
+SYM_FUNC_END(__get_user_8)
 EXPORT_SYMBOL(__get_user_8)
 
 
-.Lbad_get_user_clac:
+SYM_CODE_START_LOCAL(.Lbad_get_user_clac)
 	ASM_CLAC
 bad_get_user:
 	xor %edx,%edx
 	mov $(-EFAULT),%_ASM_AX
 	ret
+SYM_CODE_END(.Lbad_get_user_clac)
 
 #ifdef CONFIG_X86_32
-.Lbad_get_user_8_clac:
+SYM_CODE_START_LOCAL(.Lbad_get_user_8_clac)
 	ASM_CLAC
 bad_get_user_8:
 	xor %edx,%edx
 	xor %ecx,%ecx
 	mov $(-EFAULT),%_ASM_AX
 	ret
+SYM_CODE_END(.Lbad_get_user_8_clac)
 #endif
 
 	_ASM_EXTABLE_UA(1b, .Lbad_get_user_clac)
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index a14f9939c365..dbf8cc97b7f5 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -8,7 +8,7 @@
  * unsigned int __sw_hweight32(unsigned int w)
  * %rdi: w
  */
-ENTRY(__sw_hweight32)
+SYM_FUNC_START(__sw_hweight32)
 
 #ifdef CONFIG_X86_64
 	movl %edi, %eax				# w
@@ -33,10 +33,10 @@ ENTRY(__sw_hweight32)
 	shrl $24, %eax				# w = w_tmp >> 24
 	__ASM_SIZE(pop,) %__ASM_REG(dx)
 	ret
-ENDPROC(__sw_hweight32)
+SYM_FUNC_END(__sw_hweight32)
 EXPORT_SYMBOL(__sw_hweight32)
 
-ENTRY(__sw_hweight64)
+SYM_FUNC_START(__sw_hweight64)
 #ifdef CONFIG_X86_64
 	pushq   %rdi
 	pushq   %rdx
@@ -79,5 +79,5 @@ ENTRY(__sw_hweight64)
 	popl    %ecx
 	ret
 #endif
-ENDPROC(__sw_hweight64)
+SYM_FUNC_END(__sw_hweight64)
 EXPORT_SYMBOL(__sw_hweight64)
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 306c3a0902ba..31600d851fd8 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -155,7 +155,7 @@ static bool check_seg_overrides(struct insn *insn, int regoff)
  */
 static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off)
 {
-	if (user_64bit_mode(regs))
+	if (any_64bit_mode(regs))
 		return INAT_SEG_REG_IGNORE;
 	/*
 	 * Resolve the default segment register as described in Section 3.7.4
@@ -266,7 +266,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
 	 * which may be invalid at this point.
 	 */
 	if (regoff == offsetof(struct pt_regs, ip)) {
-		if (user_64bit_mode(regs))
+		if (any_64bit_mode(regs))
 			return INAT_SEG_REG_IGNORE;
 		else
 			return INAT_SEG_REG_CS;
@@ -289,7 +289,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
 	 * In long mode, segment override prefixes are ignored, except for
 	 * overrides for FS and GS.
 	 */
-	if (user_64bit_mode(regs)) {
+	if (any_64bit_mode(regs)) {
 		if (idx != INAT_SEG_REG_FS &&
 		    idx != INAT_SEG_REG_GS)
 			idx = INAT_SEG_REG_IGNORE;
@@ -646,23 +646,27 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
 		 */
 		return (unsigned long)(sel << 4);
 
-	if (user_64bit_mode(regs)) {
+	if (any_64bit_mode(regs)) {
 		/*
 		 * Only FS or GS will have a base address, the rest of
 		 * the segments' bases are forced to 0.
 		 */
 		unsigned long base;
 
-		if (seg_reg_idx == INAT_SEG_REG_FS)
+		if (seg_reg_idx == INAT_SEG_REG_FS) {
 			rdmsrl(MSR_FS_BASE, base);
-		else if (seg_reg_idx == INAT_SEG_REG_GS)
+		} else if (seg_reg_idx == INAT_SEG_REG_GS) {
 			/*
 			 * swapgs was called at the kernel entry point. Thus,
 			 * MSR_KERNEL_GS_BASE will have the user-space GS base.
 			 */
-			rdmsrl(MSR_KERNEL_GS_BASE, base);
-		else
+			if (user_mode(regs))
+				rdmsrl(MSR_KERNEL_GS_BASE, base);
+			else
+				rdmsrl(MSR_GS_BASE, base);
+		} else {
 			base = 0;
+		}
 		return base;
 	}
 
@@ -703,7 +707,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
 	if (sel < 0)
 		return 0;
 
-	if (user_64bit_mode(regs) || v8086_mode(regs))
+	if (any_64bit_mode(regs) || v8086_mode(regs))
 		return -1L;
 
 	if (!sel)
@@ -948,7 +952,7 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs,
 	 * following instruction.
 	 */
 	if (*regoff == -EDOM) {
-		if (user_64bit_mode(regs))
+		if (any_64bit_mode(regs))
 			tmp = regs->ip + insn->length;
 		else
 			tmp = 0;
@@ -1250,7 +1254,7 @@ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs)
 	 * After computed, the effective address is treated as an unsigned
 	 * quantity.
 	 */
-	if (!user_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
+	if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
 		goto out;
 
 	/*
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 0b5862ba6a75..404279563891 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -13,6 +13,8 @@
 #include <asm/inat.h>
 #include <asm/insn.h>
 
+#include <asm/emulate_prefix.h>
+
 /* Verify next sizeof(t) bytes can be on the same instruction */
 #define validate_next(t, insn, n)	\
 	((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
@@ -58,6 +60,36 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
 		insn->addr_bytes = 4;
 }
 
+static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX };
+static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX };
+
+static int __insn_get_emulate_prefix(struct insn *insn,
+				     const insn_byte_t *prefix, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i])
+			goto err_out;
+	}
+
+	insn->emulate_prefix_size = len;
+	insn->next_byte += len;
+
+	return 1;
+
+err_out:
+	return 0;
+}
+
+static void insn_get_emulate_prefix(struct insn *insn)
+{
+	if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix)))
+		return;
+
+	__insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix));
+}
+
 /**
  * insn_get_prefixes - scan x86 instruction prefix bytes
  * @insn:	&struct insn containing instruction
@@ -76,6 +108,8 @@ void insn_get_prefixes(struct insn *insn)
 	if (prefixes->got)
 		return;
 
+	insn_get_emulate_prefix(insn);
+
 	nb = 0;
 	lb = 0;
 	b = peek_next(insn_byte_t, insn);
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
index a9bdf0805be0..cb5a1964506b 100644
--- a/arch/x86/lib/iomap_copy_64.S
+++ b/arch/x86/lib/iomap_copy_64.S
@@ -8,8 +8,8 @@
 /*
  * override generic version in lib/iomap_copy.c
  */
-ENTRY(__iowrite32_copy)
+SYM_FUNC_START(__iowrite32_copy)
 	movl %edx,%ecx
 	rep movsd
 	ret
-ENDPROC(__iowrite32_copy)
+SYM_FUNC_END(__iowrite32_copy)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 92748660ba51..56b243b14c3a 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -28,8 +28,8 @@
  * Output:
  * rax original destination
  */
-ENTRY(__memcpy)
-ENTRY(memcpy)
+SYM_FUNC_START_ALIAS(__memcpy)
+SYM_FUNC_START_LOCAL(memcpy)
 	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
 		      "jmp memcpy_erms", X86_FEATURE_ERMS
 
@@ -41,8 +41,8 @@ ENTRY(memcpy)
 	movl %edx, %ecx
 	rep movsb
 	ret
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
+SYM_FUNC_END(memcpy)
+SYM_FUNC_END_ALIAS(__memcpy)
 EXPORT_SYMBOL(memcpy)
 EXPORT_SYMBOL(__memcpy)
 
@@ -50,14 +50,14 @@ EXPORT_SYMBOL(__memcpy)
  * memcpy_erms() - enhanced fast string memcpy. This is faster and
  * simpler than memcpy. Use memcpy_erms when possible.
  */
-ENTRY(memcpy_erms)
+SYM_FUNC_START_LOCAL(memcpy_erms)
 	movq %rdi, %rax
 	movq %rdx, %rcx
 	rep movsb
 	ret
-ENDPROC(memcpy_erms)
+SYM_FUNC_END(memcpy_erms)
 
-ENTRY(memcpy_orig)
+SYM_FUNC_START_LOCAL(memcpy_orig)
 	movq %rdi, %rax
 
 	cmpq $0x20, %rdx
@@ -182,7 +182,7 @@ ENTRY(memcpy_orig)
 
 .Lend:
 	retq
-ENDPROC(memcpy_orig)
+SYM_FUNC_END(memcpy_orig)
 
 #ifndef CONFIG_UML
 
@@ -193,7 +193,7 @@ MCSAFE_TEST_CTL
  * Note that we only catch machine checks when reading the source addresses.
  * Writes to target are posted and don't generate machine checks.
  */
-ENTRY(__memcpy_mcsafe)
+SYM_FUNC_START(__memcpy_mcsafe)
 	cmpl $8, %edx
 	/* Less than 8 bytes? Go to byte copy loop */
 	jb .L_no_whole_words
@@ -260,7 +260,7 @@ ENTRY(__memcpy_mcsafe)
 	xorl %eax, %eax
 .L_done:
 	ret
-ENDPROC(__memcpy_mcsafe)
+SYM_FUNC_END(__memcpy_mcsafe)
 EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
 
 	.section .fixup, "ax"
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index bbec69d8223b..7ff00ea64e4f 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -26,13 +26,10 @@
  */
 .weak memmove
 
-ENTRY(memmove)
-ENTRY(__memmove)
+SYM_FUNC_START_ALIAS(memmove)
+SYM_FUNC_START(__memmove)
 
-	/* Handle more 32 bytes in loop */
 	mov %rdi, %rax
-	cmp $0x20, %rdx
-	jb	1f
 
 	/* Decide forward/backward copy mode */
 	cmp %rdi, %rsi
@@ -42,7 +39,9 @@ ENTRY(__memmove)
 	cmp %rdi, %r8
 	jg 2f
 
+	/* FSRM implies ERMS => no length checks, do the copy directly */
 .Lmemmove_begin_forward:
+	ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
 	ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
 
 	/*
@@ -114,6 +113,8 @@ ENTRY(__memmove)
 	 */
 	.p2align 4
 2:
+	cmp $0x20, %rdx
+	jb 1f
 	cmp $680, %rdx
 	jb 6f
 	cmp %dil, %sil
@@ -207,7 +208,7 @@ ENTRY(__memmove)
 	movb %r11b, (%rdi)
 13:
 	retq
-ENDPROC(__memmove)
-ENDPROC(memmove)
+SYM_FUNC_END(__memmove)
+SYM_FUNC_END_ALIAS(memmove)
 EXPORT_SYMBOL(__memmove)
 EXPORT_SYMBOL(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 9bc861c71e75..9ff15ee404a4 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -19,8 +19,8 @@
  *
  * rax   original destination
  */
-ENTRY(memset)
-ENTRY(__memset)
+SYM_FUNC_START_ALIAS(memset)
+SYM_FUNC_START(__memset)
 	/*
 	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
 	 * to use it when possible. If not available, use fast string instructions.
@@ -43,8 +43,8 @@ ENTRY(__memset)
 	rep stosb
 	movq %r9,%rax
 	ret
-ENDPROC(memset)
-ENDPROC(__memset)
+SYM_FUNC_END(__memset)
+SYM_FUNC_END_ALIAS(memset)
 EXPORT_SYMBOL(memset)
 EXPORT_SYMBOL(__memset)
 
@@ -59,16 +59,16 @@ EXPORT_SYMBOL(__memset)
  *
  * rax   original destination
  */
-ENTRY(memset_erms)
+SYM_FUNC_START_LOCAL(memset_erms)
 	movq %rdi,%r9
 	movb %sil,%al
 	movq %rdx,%rcx
 	rep stosb
 	movq %r9,%rax
 	ret
-ENDPROC(memset_erms)
+SYM_FUNC_END(memset_erms)
 
-ENTRY(memset_orig)
+SYM_FUNC_START_LOCAL(memset_orig)
 	movq %rdi,%r10
 
 	/* expand byte value  */
@@ -139,4 +139,4 @@ ENTRY(memset_orig)
 	subq %r8,%rdx
 	jmp .Lafter_bad_alignment
 .Lfinal:
-ENDPROC(memset_orig)
+SYM_FUNC_END(memset_orig)
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index ed33cbab3958..a2b9caa5274c 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -12,7 +12,7 @@
  *
  */
 .macro op_safe_regs op
-ENTRY(\op\()_safe_regs)
+SYM_FUNC_START(\op\()_safe_regs)
 	pushq %rbx
 	pushq %r12
 	movq	%rdi, %r10	/* Save pointer */
@@ -41,13 +41,13 @@ ENTRY(\op\()_safe_regs)
 	jmp     2b
 
 	_ASM_EXTABLE(1b, 3b)
-ENDPROC(\op\()_safe_regs)
+SYM_FUNC_END(\op\()_safe_regs)
 .endm
 
 #else /* X86_32 */
 
 .macro op_safe_regs op
-ENTRY(\op\()_safe_regs)
+SYM_FUNC_START(\op\()_safe_regs)
 	pushl %ebx
 	pushl %ebp
 	pushl %esi
@@ -83,7 +83,7 @@ ENTRY(\op\()_safe_regs)
 	jmp     2b
 
 	_ASM_EXTABLE(1b, 3b)
-ENDPROC(\op\()_safe_regs)
+SYM_FUNC_END(\op\()_safe_regs)
 .endm
 
 #endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 126dd6a9ec9b..7c7c92db8497 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -34,7 +34,7 @@
 #define ENTER	mov PER_CPU_VAR(current_task), %_ASM_BX
 
 .text
-ENTRY(__put_user_1)
+SYM_FUNC_START(__put_user_1)
 	ENTER
 	cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
 	jae .Lbad_put_user
@@ -43,10 +43,10 @@ ENTRY(__put_user_1)
 	xor %eax,%eax
 	ASM_CLAC
 	ret
-ENDPROC(__put_user_1)
+SYM_FUNC_END(__put_user_1)
 EXPORT_SYMBOL(__put_user_1)
 
-ENTRY(__put_user_2)
+SYM_FUNC_START(__put_user_2)
 	ENTER
 	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $1,%_ASM_BX
@@ -57,10 +57,10 @@ ENTRY(__put_user_2)
 	xor %eax,%eax
 	ASM_CLAC
 	ret
-ENDPROC(__put_user_2)
+SYM_FUNC_END(__put_user_2)
 EXPORT_SYMBOL(__put_user_2)
 
-ENTRY(__put_user_4)
+SYM_FUNC_START(__put_user_4)
 	ENTER
 	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $3,%_ASM_BX
@@ -71,10 +71,10 @@ ENTRY(__put_user_4)
 	xor %eax,%eax
 	ASM_CLAC
 	ret
-ENDPROC(__put_user_4)
+SYM_FUNC_END(__put_user_4)
 EXPORT_SYMBOL(__put_user_4)
 
-ENTRY(__put_user_8)
+SYM_FUNC_START(__put_user_8)
 	ENTER
 	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $7,%_ASM_BX
@@ -88,14 +88,15 @@ ENTRY(__put_user_8)
 	xor %eax,%eax
 	ASM_CLAC
 	RET
-ENDPROC(__put_user_8)
+SYM_FUNC_END(__put_user_8)
 EXPORT_SYMBOL(__put_user_8)
 
-.Lbad_put_user_clac:
+SYM_CODE_START_LOCAL(.Lbad_put_user_clac)
 	ASM_CLAC
 .Lbad_put_user:
 	movl $-EFAULT,%eax
 	RET
+SYM_CODE_END(.Lbad_put_user_clac)
 
 	_ASM_EXTABLE_UA(1b, .Lbad_put_user_clac)
 	_ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index c909961e678a..363ec132df7e 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -11,11 +11,11 @@
 .macro THUNK reg
 	.section .text.__x86.indirect_thunk
 
-ENTRY(__x86_indirect_thunk_\reg)
+SYM_FUNC_START(__x86_indirect_thunk_\reg)
 	CFI_STARTPROC
 	JMP_NOSPEC %\reg
 	CFI_ENDPROC
-ENDPROC(__x86_indirect_thunk_\reg)
+SYM_FUNC_END(__x86_indirect_thunk_\reg)
 .endm
 
 /*
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index e0b85930dd77..8908c58bd6cd 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -333,7 +333,7 @@ AVXcode: 1
 06: CLTS
 07: SYSRET (o64)
 08: INVD
-09: WBINVD
+09: WBINVD | WBNOINVD (F3)
 0a:
 0b: UD2 (1B)
 0c:
@@ -364,7 +364,7 @@ AVXcode: 1
 # a ModR/M byte.
 1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
-1c:
+1c: Grp20 (1A),(1C)
 1d:
 1e:
 1f: NOP Ev
@@ -695,16 +695,28 @@ AVXcode: 2
 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
 4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
-# Skip 0x50-0x57
+50: vpdpbusd Vx,Hx,Wx (66),(ev)
+51: vpdpbusds Vx,Hx,Wx (66),(ev)
+52: vdpbf16ps Vx,Hx,Wx (F3),(ev) | vpdpwssd Vx,Hx,Wx (66),(ev) | vp4dpwssd Vdqq,Hdqq,Wdq (F2),(ev)
+53: vpdpwssds Vx,Hx,Wx (66),(ev) | vp4dpwssds Vdqq,Hdqq,Wdq (F2),(ev)
+54: vpopcntb/w Vx,Wx (66),(ev)
+55: vpopcntd/q Vx,Wx (66),(ev)
 58: vpbroadcastd Vx,Wx (66),(v)
 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
-# Skip 0x5c-0x63
+# Skip 0x5c-0x61
+62: vpexpandb/w Vx,Wx (66),(ev)
+63: vpcompressb/w Wx,Vx (66),(ev)
 64: vpblendmd/q Vx,Hx,Wx (66),(ev)
 65: vblendmps/d Vx,Hx,Wx (66),(ev)
 66: vpblendmb/w Vx,Hx,Wx (66),(ev)
-# Skip 0x67-0x74
+68: vp2intersectd/q Kx,Hx,Wx (F2),(ev)
+# Skip 0x69-0x6f
+70: vpshldvw Vx,Hx,Wx (66),(ev)
+71: vpshldvd/q Vx,Hx,Wx (66),(ev)
+72: vcvtne2ps2bf16 Vx,Hx,Wx (F2),(ev) | vcvtneps2bf16 Vx,Wx (F3),(ev) | vpshrdvw Vx,Hx,Wx (66),(ev)
+73: vpshrdvd/q Vx,Hx,Wx (66),(ev)
 75: vpermi2b/w Vx,Hx,Wx (66),(ev)
 76: vpermi2d/q Vx,Hx,Wx (66),(ev)
 77: vpermi2ps/d Vx,Hx,Wx (66),(ev)
@@ -727,6 +739,7 @@ AVXcode: 2
 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
 8d: vpermb/w Vx,Hx,Wx (66),(ev)
 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
+8f: vpshufbitqmb Kx,Hx,Wx (66),(ev)
 # 0x0f 0x38 0x90-0xbf (FMA)
 90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
 91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
@@ -738,8 +751,8 @@ AVXcode: 2
 97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
 98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
 99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
-9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
-9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) | v4fmaddps Vdqq,Hdqq,Wdq (F2),(ev)
+9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) | v4fmaddss Vdq,Hdq,Wdq (F2),(ev)
 9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
@@ -752,8 +765,8 @@ a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
 a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
 a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
 a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
-aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
-ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) | v4fnmaddps Vdqq,Hdqq,Wdq (F2),(ev)
+ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) | v4fnmaddss Vdq,Hdq,Wdq (F2),(ev)
 ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
 ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
 ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
@@ -780,11 +793,12 @@ ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev)
 cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
 cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
 cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
+cf: vgf2p8mulb Vx,Wx (66)
 db: VAESIMC Vdq,Wdq (66),(v1)
-dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
-dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
-de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
-df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
+dc: vaesenc Vx,Hx,Wx (66)
+dd: vaesenclast Vx,Hx,Wx (66)
+de: vaesdec Vx,Hx,Wx (66)
+df: vaesdeclast Vx,Hx,Wx (66)
 f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
 f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
 f2: ANDN Gy,By,Ey (v)
@@ -792,6 +806,8 @@ f3: Grp17 (1A)
 f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
 f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
 f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
+f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3)
+f9: MOVDIRI My,Gy
 EndTable
 
 Table: 3-byte opcode 2 (0x0f 0x3a)
@@ -846,7 +862,7 @@ AVXcode: 3
 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
 42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
 43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
-44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
+44: vpclmulqdq Vx,Hx,Wx,Ib (66)
 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
 4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
@@ -863,7 +879,13 @@ AVXcode: 3
 63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
 66: vfpclassps/d Vk,Wx,Ib (66),(ev)
 67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+70: vpshldw Vx,Hx,Wx,Ib (66),(ev)
+71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev)
+72: vpshrdw Vx,Hx,Wx,Ib (66),(ev)
+73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev)
 cc: sha1rnds4 Vdq,Wdq,Ib
+ce: vgf2p8affineqb Vx,Wx,Ib (66)
+cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
 df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
 f0: RORX Gy,Ey,Ib (F2),(v)
 EndTable
@@ -943,9 +965,9 @@ GrpTable: Grp6
 EndTable
 
 GrpTable: Grp7
-0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
-1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
-2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
 3: LIDT Ms
 4: SMSW Mw/Rv
 5: rdpkru (110),(11B) | wrpkru (111),(11B)
@@ -1020,7 +1042,7 @@ GrpTable: Grp15
 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
 4: XSAVE | ptwrite Ey (F3),(11B)
 5: XRSTOR | lfence (11B)
-6: XSAVEOPT | clwb (66) | mfence (11B)
+6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B)
 7: clflush | clflushopt (66) | sfence (11B)
 EndTable
 
@@ -1051,6 +1073,10 @@ GrpTable: Grp19
 6: vscatterpf1qps/d Wx (66),(ev)
 EndTable
 
+GrpTable: Grp20
+0: cldemote Mb
+EndTable
+
 # AMD's Prefetch Group
 GrpTable: GrpP
 0: PREFETCH
diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S
index ee08449d20fd..951da2ad54bb 100644
--- a/arch/x86/math-emu/div_Xsig.S
+++ b/arch/x86/math-emu/div_Xsig.S
@@ -75,7 +75,7 @@ FPU_result_1:
 
 
 .text
-ENTRY(div_Xsig)
+SYM_FUNC_START(div_Xsig)
 	pushl	%ebp
 	movl	%esp,%ebp
 #ifndef NON_REENTRANT_FPU
@@ -364,4 +364,4 @@ L_bugged_2:
 	pop	%ebx
 	jmp	L_exit
 #endif /* PARANOID */ 
-ENDPROC(div_Xsig)
+SYM_FUNC_END(div_Xsig)
diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S
index 8f5025c80ee0..d047d1816abe 100644
--- a/arch/x86/math-emu/div_small.S
+++ b/arch/x86/math-emu/div_small.S
@@ -19,7 +19,7 @@
 #include "fpu_emu.h"
 
 .text
-ENTRY(FPU_div_small)
+SYM_FUNC_START(FPU_div_small)
 	pushl	%ebp
 	movl	%esp,%ebp
 
@@ -45,4 +45,4 @@ ENTRY(FPU_div_small)
 
 	leave
 	ret
-ENDPROC(FPU_div_small)
+SYM_FUNC_END(FPU_div_small)
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index f98a0c956764..9b41391867dc 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -107,6 +107,8 @@ static inline bool seg_writable(struct desc_struct *d)
 #define FPU_access_ok(y,z)	if ( !access_ok(y,z) ) \
 				math_abort(FPU_info,SIGSEGV)
 #define FPU_abort		math_abort(FPU_info, SIGSEGV)
+#define FPU_copy_from_user(to, from, n)	\
+		do { if (copy_from_user(to, from, n)) FPU_abort; } while (0)
 
 #undef FPU_IGNORE_CODE_SEGV
 #ifdef FPU_IGNORE_CODE_SEGV
@@ -122,7 +124,7 @@ static inline bool seg_writable(struct desc_struct *d)
 #define	FPU_code_access_ok(z) FPU_access_ok((void __user *)FPU_EIP,z)
 #endif
 
-#define FPU_get_user(x,y)       get_user((x),(y))
-#define FPU_put_user(x,y)       put_user((x),(y))
+#define FPU_get_user(x,y) do { if (get_user((x),(y))) FPU_abort; } while (0)
+#define FPU_put_user(x,y) do { if (put_user((x),(y))) FPU_abort; } while (0)
 
 #endif
diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S
index 3e489122a2b0..4afc7b1fa6e9 100644
--- a/arch/x86/math-emu/mul_Xsig.S
+++ b/arch/x86/math-emu/mul_Xsig.S
@@ -25,7 +25,7 @@
 #include "fpu_emu.h"
 
 .text
-ENTRY(mul32_Xsig)
+SYM_FUNC_START(mul32_Xsig)
 	pushl %ebp
 	movl %esp,%ebp
 	subl $16,%esp
@@ -63,10 +63,10 @@ ENTRY(mul32_Xsig)
 	popl %esi
 	leave
 	ret
-ENDPROC(mul32_Xsig)
+SYM_FUNC_END(mul32_Xsig)
 
 
-ENTRY(mul64_Xsig)
+SYM_FUNC_START(mul64_Xsig)
 	pushl %ebp
 	movl %esp,%ebp
 	subl $16,%esp
@@ -116,11 +116,11 @@ ENTRY(mul64_Xsig)
 	popl %esi
 	leave
 	ret
-ENDPROC(mul64_Xsig)
+SYM_FUNC_END(mul64_Xsig)
 
 
 
-ENTRY(mul_Xsig_Xsig)
+SYM_FUNC_START(mul_Xsig_Xsig)
 	pushl %ebp
 	movl %esp,%ebp
 	subl $16,%esp
@@ -176,4 +176,4 @@ ENTRY(mul_Xsig_Xsig)
 	popl %esi
 	leave
 	ret
-ENDPROC(mul_Xsig_Xsig)
+SYM_FUNC_END(mul_Xsig_Xsig)
diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S
index 604f0b2d17e8..702315eecb86 100644
--- a/arch/x86/math-emu/polynom_Xsig.S
+++ b/arch/x86/math-emu/polynom_Xsig.S
@@ -37,7 +37,7 @@
 #define OVERFLOWED      -16(%ebp)	/* addition overflow flag */
 
 .text
-ENTRY(polynomial_Xsig)
+SYM_FUNC_START(polynomial_Xsig)
 	pushl	%ebp
 	movl	%esp,%ebp
 	subl	$32,%esp
@@ -134,4 +134,4 @@ L_accum_done:
 	popl	%esi
 	leave
 	ret
-ENDPROC(polynomial_Xsig)
+SYM_FUNC_END(polynomial_Xsig)
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c
index f3779743d15e..fe6246ff9887 100644
--- a/arch/x86/math-emu/reg_ld_str.c
+++ b/arch/x86/math-emu/reg_ld_str.c
@@ -85,7 +85,7 @@ int FPU_load_extended(long double __user *s, int stnr)
 
 	RE_ENTRANT_CHECK_OFF;
 	FPU_access_ok(s, 10);
-	__copy_from_user(sti_ptr, s, 10);
+	FPU_copy_from_user(sti_ptr, s, 10);
 	RE_ENTRANT_CHECK_ON;
 
 	return FPU_tagof(sti_ptr);
@@ -1126,9 +1126,9 @@ void frstor(fpu_addr_modes addr_modes, u_char __user *data_address)
 	/* Copy all registers in stack order. */
 	RE_ENTRANT_CHECK_OFF;
 	FPU_access_ok(s, 80);
-	__copy_from_user(register_base + offset, s, other);
+	FPU_copy_from_user(register_base + offset, s, other);
 	if (offset)
-		__copy_from_user(register_base, s + other, offset);
+		FPU_copy_from_user(register_base, s + other, offset);
 	RE_ENTRANT_CHECK_ON;
 
 	for (i = 0; i < 8; i++) {
diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S
index 7f6b4392a15d..cad1d60b1e84 100644
--- a/arch/x86/math-emu/reg_norm.S
+++ b/arch/x86/math-emu/reg_norm.S
@@ -22,7 +22,7 @@
 
 
 .text
-ENTRY(FPU_normalize)
+SYM_FUNC_START(FPU_normalize)
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%ebx
@@ -95,12 +95,12 @@ L_overflow:
 	call	arith_overflow
 	pop	%ebx
 	jmp	L_exit
-ENDPROC(FPU_normalize)
+SYM_FUNC_END(FPU_normalize)
 
 
 
 /* Normalise without reporting underflow or overflow */
-ENTRY(FPU_normalize_nuo)
+SYM_FUNC_START(FPU_normalize_nuo)
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%ebx
@@ -147,4 +147,4 @@ L_exit_nuo_zero:
 	popl	%ebx
 	leave
 	ret
-ENDPROC(FPU_normalize_nuo)
+SYM_FUNC_END(FPU_normalize_nuo)
diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S
index 04563421ee7d..11a1f798451b 100644
--- a/arch/x86/math-emu/reg_round.S
+++ b/arch/x86/math-emu/reg_round.S
@@ -109,7 +109,7 @@ FPU_denormal:
 .globl fpu_Arith_exit
 
 /* Entry point when called from C */
-ENTRY(FPU_round)
+SYM_FUNC_START(FPU_round)
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%esi
@@ -708,4 +708,4 @@ L_exception_exit:
 	jmp	fpu_reg_round_special_exit
 #endif /* PARANOID */ 
 
-ENDPROC(FPU_round)
+SYM_FUNC_END(FPU_round)
diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S
index 50fe9f8c893c..9c9e2c810afe 100644
--- a/arch/x86/math-emu/reg_u_add.S
+++ b/arch/x86/math-emu/reg_u_add.S
@@ -32,7 +32,7 @@
 #include "control_w.h"
 
 .text
-ENTRY(FPU_u_add)
+SYM_FUNC_START(FPU_u_add)
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%esi
@@ -166,4 +166,4 @@ L_exit:
 	leave
 	ret
 #endif /* PARANOID */
-ENDPROC(FPU_u_add)
+SYM_FUNC_END(FPU_u_add)
diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S
index 94d545e118e4..e2fb5c2644c5 100644
--- a/arch/x86/math-emu/reg_u_div.S
+++ b/arch/x86/math-emu/reg_u_div.S
@@ -75,7 +75,7 @@ FPU_ovfl_flag:
 #define DEST	PARAM3
 
 .text
-ENTRY(FPU_u_div)
+SYM_FUNC_START(FPU_u_div)
 	pushl	%ebp
 	movl	%esp,%ebp
 #ifndef NON_REENTRANT_FPU
@@ -471,4 +471,4 @@ L_exit:
 	ret
 #endif /* PARANOID */ 
 
-ENDPROC(FPU_u_div)
+SYM_FUNC_END(FPU_u_div)
diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S
index 21cde47fb3e5..0c779c87ac5b 100644
--- a/arch/x86/math-emu/reg_u_mul.S
+++ b/arch/x86/math-emu/reg_u_mul.S
@@ -45,7 +45,7 @@ FPU_accum_1:
 
 
 .text
-ENTRY(FPU_u_mul)
+SYM_FUNC_START(FPU_u_mul)
 	pushl	%ebp
 	movl	%esp,%ebp
 #ifndef NON_REENTRANT_FPU
@@ -147,4 +147,4 @@ L_exit:
 	ret
 #endif /* PARANOID */ 
 
-ENDPROC(FPU_u_mul)
+SYM_FUNC_END(FPU_u_mul)
diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S
index f05dea7dec38..e9bb7c248649 100644
--- a/arch/x86/math-emu/reg_u_sub.S
+++ b/arch/x86/math-emu/reg_u_sub.S
@@ -33,7 +33,7 @@
 #include "control_w.h"
 
 .text
-ENTRY(FPU_u_sub)
+SYM_FUNC_START(FPU_u_sub)
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%esi
@@ -271,4 +271,4 @@ L_exit:
 	popl	%esi
 	leave
 	ret
-ENDPROC(FPU_u_sub)
+SYM_FUNC_END(FPU_u_sub)
diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S
index 226a51e991f1..d9d7de8dbd7b 100644
--- a/arch/x86/math-emu/round_Xsig.S
+++ b/arch/x86/math-emu/round_Xsig.S
@@ -23,7 +23,7 @@
 
 
 .text
-ENTRY(round_Xsig)
+SYM_FUNC_START(round_Xsig)
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%ebx		/* Reserve some space */
@@ -79,11 +79,11 @@ L_exit:
 	popl	%ebx
 	leave
 	ret
-ENDPROC(round_Xsig)
+SYM_FUNC_END(round_Xsig)
 
 
 
-ENTRY(norm_Xsig)
+SYM_FUNC_START(norm_Xsig)
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%ebx		/* Reserve some space */
@@ -139,4 +139,4 @@ L_n_exit:
 	popl	%ebx
 	leave
 	ret
-ENDPROC(norm_Xsig)
+SYM_FUNC_END(norm_Xsig)
diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S
index 96f4779aa9c1..726af985f758 100644
--- a/arch/x86/math-emu/shr_Xsig.S
+++ b/arch/x86/math-emu/shr_Xsig.S
@@ -22,7 +22,7 @@
 #include "fpu_emu.h"
 
 .text
-ENTRY(shr_Xsig)
+SYM_FUNC_START(shr_Xsig)
 	push	%ebp
 	movl	%esp,%ebp
 	pushl	%esi
@@ -86,4 +86,4 @@ L_more_than_95:
 	popl	%esi
 	leave
 	ret
-ENDPROC(shr_Xsig)
+SYM_FUNC_END(shr_Xsig)
diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S
index d588874eb6fb..4fc89174caf0 100644
--- a/arch/x86/math-emu/wm_shrx.S
+++ b/arch/x86/math-emu/wm_shrx.S
@@ -33,7 +33,7 @@
  |   Results returned in the 64 bit arg and eax.                             |
  +---------------------------------------------------------------------------*/
 
-ENTRY(FPU_shrx)
+SYM_FUNC_START(FPU_shrx)
 	push	%ebp
 	movl	%esp,%ebp
 	pushl	%esi
@@ -93,7 +93,7 @@ L_more_than_95:
 	popl	%esi
 	leave
 	ret
-ENDPROC(FPU_shrx)
+SYM_FUNC_END(FPU_shrx)
 
 
 /*---------------------------------------------------------------------------+
@@ -112,7 +112,7 @@ ENDPROC(FPU_shrx)
  |   part which has been shifted out of the arg.                             |
  |   Results returned in the 64 bit arg and eax.                             |
  +---------------------------------------------------------------------------*/
-ENTRY(FPU_shrxs)
+SYM_FUNC_START(FPU_shrxs)
 	push	%ebp
 	movl	%esp,%ebp
 	pushl	%esi
@@ -204,4 +204,4 @@ Ls_more_than_95:
 	popl	%esi
 	leave
 	ret
-ENDPROC(FPU_shrxs)
+SYM_FUNC_END(FPU_shrxs)
diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S
index f031c0e19356..3b2b58164ec1 100644
--- a/arch/x86/math-emu/wm_sqrt.S
+++ b/arch/x86/math-emu/wm_sqrt.S
@@ -75,7 +75,7 @@ FPU_fsqrt_arg_0:
 
 
 .text
-ENTRY(wm_sqrt)
+SYM_FUNC_START(wm_sqrt)
 	pushl	%ebp
 	movl	%esp,%ebp
 #ifndef NON_REENTRANT_FPU
@@ -469,4 +469,4 @@ sqrt_more_prec_large:
 /* Our estimate is too large */
 	movl	$0x7fffff00,%eax
 	jmp	sqrt_round_result
-ENDPROC(wm_sqrt)
+SYM_FUNC_END(wm_sqrt)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 236e57776ca6..98aecb14fbcc 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -12,8 +12,10 @@ CFLAGS_REMOVE_mem_encrypt.o		= -pg
 CFLAGS_REMOVE_mem_encrypt_identity.o	= -pg
 endif
 
-obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+obj-y				:=  init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
+				    pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
+
+obj-y				+= pat/
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
@@ -23,8 +25,6 @@ CFLAGS_mem_encrypt_identity.o	:= $(nostackp)
 
 CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace
 
-obj-$(CONFIG_X86_PAT)		+= pat_rbtree.o
-
 obj-$(CONFIG_X86_32)		+= pgtable_32.o iomap_32.o
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 752ad11d6868..56f9189bbadb 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -17,6 +17,10 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
 DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
 #endif
 
+#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
+DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
+#endif
+
 struct cpu_entry_area *get_cpu_entry_area(int cpu)
 {
 	unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
@@ -108,7 +112,15 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
 	cea_map_stack(MCE);
 }
 #else
-static inline void percpu_setup_exception_stacks(unsigned int cpu) {}
+static inline void percpu_setup_exception_stacks(unsigned int cpu)
+{
+#ifdef CONFIG_DOUBLEFAULT
+	struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
+
+	cea_map_percpu_pages(&cea->doublefault_stack,
+			     &per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL);
+#endif
+}
 #endif
 
 /* Setup the fixmap mappings only once per-processor */
@@ -161,6 +173,14 @@ static void __init setup_cpu_entry_area(unsigned int cpu)
 	BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
 		      offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
 	BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+	/*
+	 * VMX changes the host TR limit to 0x67 after a VM exit. This is
+	 * okay, since 0x67 covers the size of struct x86_hw_tss. Make sure
+	 * that this is correct.
+	 */
+	BUILD_BUG_ON(offsetof(struct tss_struct, x86_tss) != 0);
+	BUILD_BUG_ON(sizeof(struct x86_hw_tss) != 0x68);
+
 	cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
 			     sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
 
@@ -178,7 +198,9 @@ static __init void setup_cpu_entry_area_ptes(void)
 #ifdef CONFIG_X86_32
 	unsigned long start, end;
 
-	BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
+	/* The +1 is for the readonly IDT: */
+	BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
+	BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
 	BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
 
 	start = CPU_ENTRY_AREA_BASE;
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 4d75bc656f97..30bb0bd3b1b8 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -45,55 +45,6 @@ __visible bool ex_handler_fault(const struct exception_table_entry *fixup,
 EXPORT_SYMBOL_GPL(ex_handler_fault);
 
 /*
- * Handler for UD0 exception following a failed test against the
- * result of a refcount inc/dec/add/sub.
- */
-__visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
-				   struct pt_regs *regs, int trapnr,
-				   unsigned long error_code,
-				   unsigned long fault_addr)
-{
-	/* First unconditionally saturate the refcount. */
-	*(int *)regs->cx = INT_MIN / 2;
-
-	/*
-	 * Strictly speaking, this reports the fixup destination, not
-	 * the fault location, and not the actually overflowing
-	 * instruction, which is the instruction before the "js", but
-	 * since that instruction could be a variety of lengths, just
-	 * report the location after the overflow, which should be close
-	 * enough for finding the overflow, as it's at least back in
-	 * the function, having returned from .text.unlikely.
-	 */
-	regs->ip = ex_fixup_addr(fixup);
-
-	/*
-	 * This function has been called because either a negative refcount
-	 * value was seen by any of the refcount functions, or a zero
-	 * refcount value was seen by refcount_dec().
-	 *
-	 * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result
-	 * wrapped around) will be set. Additionally, seeing the refcount
-	 * reach 0 will set ZF (Zero Flag: result was zero). In each of
-	 * these cases we want a report, since it's a boundary condition.
-	 * The SF case is not reported since it indicates post-boundary
-	 * manipulations below zero or above INT_MAX. And if none of the
-	 * flags are set, something has gone very wrong, so report it.
-	 */
-	if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) {
-		bool zero = regs->flags & X86_EFLAGS_ZF;
-
-		refcount_error_report(regs, zero ? "hit zero" : "overflow");
-	} else if ((regs->flags & X86_EFLAGS_SF) == 0) {
-		/* Report if none of OF, ZF, nor SF are set. */
-		refcount_error_report(regs, "unexpected saturation");
-	}
-
-	return true;
-}
-EXPORT_SYMBOL(ex_handler_refcount);
-
-/*
  * Handler for when we fail to restore a task's FPU state.  We should never get
  * here because the FPU state of a task using the FPU (task->thread.fpu.state)
  * should always be valid.  However, past bugs have allowed userspace to set
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9ceacd1156db..fa4ea09593ab 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -29,6 +29,7 @@
 #include <asm/efi.h>			/* efi_recover_from_page_fault()*/
 #include <asm/desc.h>			/* store_idt(), ...		*/
 #include <asm/cpu_entry_area.h>		/* exception stack		*/
+#include <asm/pgtable_areas.h>		/* VMALLOC_START, ...		*/
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
@@ -197,7 +198,7 @@ void vmalloc_sync_all(void)
 		return;
 
 	for (address = VMALLOC_START & PMD_MASK;
-	     address >= TASK_SIZE_MAX && address < FIXADDR_TOP;
+	     address >= TASK_SIZE_MAX && address < VMALLOC_END;
 	     address += PMD_SIZE) {
 		struct page *page;
 
@@ -1486,27 +1487,6 @@ good_area:
 }
 NOKPROBE_SYMBOL(do_user_addr_fault);
 
-/*
- * Explicitly marked noinline such that the function tracer sees this as the
- * page_fault entry point.
- */
-static noinline void
-__do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
-		unsigned long address)
-{
-	prefetchw(&current->mm->mmap_sem);
-
-	if (unlikely(kmmio_fault(regs, address)))
-		return;
-
-	/* Was the fault on kernel-controlled part of the address space? */
-	if (unlikely(fault_in_kernel_space(address)))
-		do_kern_addr_fault(regs, hw_error_code, address);
-	else
-		do_user_addr_fault(regs, hw_error_code, address);
-}
-NOKPROBE_SYMBOL(__do_page_fault);
-
 static __always_inline void
 trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
 			 unsigned long address)
@@ -1521,13 +1501,19 @@ trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
 }
 
 dotraplinkage void
-do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
+		unsigned long address)
 {
-	enum ctx_state prev_state;
+	prefetchw(&current->mm->mmap_sem);
+	trace_page_fault_entries(regs, hw_error_code, address);
 
-	prev_state = exception_enter();
-	trace_page_fault_entries(regs, error_code, address);
-	__do_page_fault(regs, error_code, address);
-	exception_exit(prev_state);
+	if (unlikely(kmmio_fault(regs, address)))
+		return;
+
+	/* Was the fault on kernel-controlled part of the address space? */
+	if (unlikely(fault_in_kernel_space(address)))
+		do_kern_addr_fault(regs, hw_error_code, address);
+	else
+		do_user_addr_fault(regs, hw_error_code, address);
 }
 NOKPROBE_SYMBOL(do_page_fault);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fd10d91a6115..e7bb483557c9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -829,14 +829,13 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
  * used for the kernel image only.  free_init_pages() will do the
  * right thing for either kind of address.
  */
-void free_kernel_image_pages(void *begin, void *end)
+void free_kernel_image_pages(const char *what, void *begin, void *end)
 {
 	unsigned long begin_ul = (unsigned long)begin;
 	unsigned long end_ul = (unsigned long)end;
 	unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
 
-
-	free_init_pages("unused kernel image", begin_ul, end_ul);
+	free_init_pages(what, begin_ul, end_ul);
 
 	/*
 	 * PTI maps some of the kernel into userspace.  For performance,
@@ -865,7 +864,8 @@ void __ref free_initmem(void)
 
 	mem_encrypt_free_decrypted_mem();
 
-	free_kernel_image_pages(&__init_begin, &__init_end);
+	free_kernel_image_pages("unused kernel image (initmem)",
+				&__init_begin, &__init_end);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 930edeb41ec3..23df4885bbed 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -52,6 +52,7 @@
 #include <asm/page_types.h>
 #include <asm/cpu_entry_area.h>
 #include <asm/init.h>
+#include <asm/pgtable_areas.h>
 
 #include "mm_internal.h"
 
@@ -865,43 +866,13 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct zone *zone;
 
-	zone = page_zone(pfn_to_page(start_pfn));
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 }
 #endif
 
 int kernel_set_to_readonly __read_mostly;
 
-void set_kernel_text_rw(void)
-{
-	unsigned long start = PFN_ALIGN(_text);
-	unsigned long size = PFN_ALIGN(_etext) - start;
-
-	if (!kernel_set_to_readonly)
-		return;
-
-	pr_debug("Set kernel text: %lx - %lx for read write\n",
-		 start, start+size);
-
-	set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
-}
-
-void set_kernel_text_ro(void)
-{
-	unsigned long start = PFN_ALIGN(_text);
-	unsigned long size = PFN_ALIGN(_etext) - start;
-
-	if (!kernel_set_to_readonly)
-		return;
-
-	pr_debug("Set kernel text: %lx - %lx for read only\n",
-		 start, start+size);
-
-	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
-}
-
 static void mark_nxdata_nx(void)
 {
 	/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a6b5c653727b..abbdecb75fad 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1212,10 +1212,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
-	struct zone *zone = page_zone(page);
 
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 	kernel_physical_mapping_remove(start, start + size);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
@@ -1260,49 +1258,13 @@ void __init mem_init(void)
 
 int kernel_set_to_readonly;
 
-void set_kernel_text_rw(void)
-{
-	unsigned long start = PFN_ALIGN(_text);
-	unsigned long end = PFN_ALIGN(__stop___ex_table);
-
-	if (!kernel_set_to_readonly)
-		return;
-
-	pr_debug("Set kernel text: %lx - %lx for read write\n",
-		 start, end);
-
-	/*
-	 * Make the kernel identity mapping for text RW. Kernel text
-	 * mapping will always be RO. Refer to the comment in
-	 * static_protections() in pageattr.c
-	 */
-	set_memory_rw(start, (end - start) >> PAGE_SHIFT);
-}
-
-void set_kernel_text_ro(void)
-{
-	unsigned long start = PFN_ALIGN(_text);
-	unsigned long end = PFN_ALIGN(__stop___ex_table);
-
-	if (!kernel_set_to_readonly)
-		return;
-
-	pr_debug("Set kernel text: %lx - %lx for read only\n",
-		 start, end);
-
-	/*
-	 * Set the kernel identity mapping for text RO.
-	 */
-	set_memory_ro(start, (end - start) >> PAGE_SHIFT);
-}
-
 void mark_rodata_ro(void)
 {
 	unsigned long start = PFN_ALIGN(_text);
 	unsigned long rodata_start = PFN_ALIGN(__start_rodata);
-	unsigned long end = (unsigned long) &__end_rodata_hpage_align;
-	unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
-	unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
+	unsigned long end = (unsigned long)__end_rodata_hpage_align;
+	unsigned long text_end = PFN_ALIGN(_etext);
+	unsigned long rodata_end = PFN_ALIGN(__end_rodata);
 	unsigned long all_end;
 
 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
@@ -1334,8 +1296,10 @@ void mark_rodata_ro(void)
 	set_memory_ro(start, (end-start) >> PAGE_SHIFT);
 #endif
 
-	free_kernel_image_pages((void *)text_end, (void *)rodata_start);
-	free_kernel_image_pages((void *)rodata_end, (void *)_sdata);
+	free_kernel_image_pages("unused kernel image (text/rodata gap)",
+				(void *)text_end, (void *)rodata_start);
+	free_kernel_image_pages("unused kernel image (rodata/data gap)",
+				(void *)rodata_end, (void *)_sdata);
 
 	debug_checkwx();
 }
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 6748b4c2baff..f60398aeb644 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -4,7 +4,7 @@
  */
 
 #include <asm/iomap.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
 #include <linux/export.h>
 #include <linux/highmem.h>
 
@@ -26,7 +26,7 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
 	if (!is_io_mapping_possible(base, size))
 		return -EINVAL;
 
-	ret = io_reserve_memtype(base, base + size, &pcm);
+	ret = memtype_reserve_io(base, base + size, &pcm);
 	if (ret)
 		return ret;
 
@@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(iomap_create_wc);
 
 void iomap_free(resource_size_t base, unsigned long size)
 {
-	io_free_memtype(base, base + size);
+	memtype_free_io(base, base + size);
 }
 EXPORT_SYMBOL_GPL(iomap_free);
 
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index a39dcdb5ae34..44e4beb4239f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -24,7 +24,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
 #include <asm/setup.h>
 
 #include "physaddr.h"
@@ -196,10 +196,10 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
 	phys_addr &= PHYSICAL_PAGE_MASK;
 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
 
-	retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
+	retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
 						pcm, &new_pcm);
 	if (retval) {
-		printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
+		printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
 		return NULL;
 	}
 
@@ -255,7 +255,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
 	area->phys_addr = phys_addr;
 	vaddr = (unsigned long) area->addr;
 
-	if (kernel_map_sync_memtype(phys_addr, size, pcm))
+	if (memtype_kernel_map_sync(phys_addr, size, pcm))
 		goto err_free_area;
 
 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
@@ -275,16 +275,16 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
 err_free_area:
 	free_vm_area(area);
 err_free_memtype:
-	free_memtype(phys_addr, phys_addr + size);
+	memtype_free(phys_addr, phys_addr + size);
 	return NULL;
 }
 
 /**
- * ioremap_nocache     -   map bus memory into CPU space
+ * ioremap     -   map bus memory into CPU space
  * @phys_addr:    bus address of the memory
  * @size:      size of the resource to map
  *
- * ioremap_nocache performs a platform specific sequence of operations to
+ * ioremap performs a platform specific sequence of operations to
  * make bus memory CPU accessible via the readb/readw/readl/writeb/
  * writew/writel functions and the other mmio helpers. The returned
  * address is not guaranteed to be usable directly as a virtual
@@ -300,7 +300,7 @@ err_free_memtype:
  *
  * Must be freed with iounmap.
  */
-void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
+void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
 {
 	/*
 	 * Ideally, this should be:
@@ -315,7 +315,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
 	return __ioremap_caller(phys_addr, size, pcm,
 				__builtin_return_address(0), false);
 }
-EXPORT_SYMBOL(ioremap_nocache);
+EXPORT_SYMBOL(ioremap);
 
 /**
  * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
@@ -451,7 +451,7 @@ void iounmap(volatile void __iomem *addr)
 		return;
 	}
 
-	free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
+	memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
 
 	/* Finally remove it */
 	o = remove_vm_area((void __force *)addr);
@@ -626,6 +626,17 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
 		paddr_next = data->next;
 		len = data->len;
 
+		if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
+			memunmap(data);
+			return true;
+		}
+
+		if (data->type == SETUP_INDIRECT &&
+		    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+			paddr = ((struct setup_indirect *)data->data)->addr;
+			len = ((struct setup_indirect *)data->data)->len;
+		}
+
 		memunmap(data);
 
 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 296da58f3013..763e71abc0fe 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -245,22 +245,48 @@ static void __init kasan_map_early_shadow(pgd_t *pgd)
 	} while (pgd++, addr = next, addr != end);
 }
 
-#ifdef CONFIG_KASAN_INLINE
-static int kasan_die_handler(struct notifier_block *self,
-			     unsigned long val,
-			     void *data)
+static void __init kasan_shallow_populate_p4ds(pgd_t *pgd,
+					       unsigned long addr,
+					       unsigned long end)
 {
-	if (val == DIE_GPF) {
-		pr_emerg("CONFIG_KASAN_INLINE enabled\n");
-		pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n");
-	}
-	return NOTIFY_OK;
+	p4d_t *p4d;
+	unsigned long next;
+	void *p;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+
+		if (p4d_none(*p4d)) {
+			p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
+			p4d_populate(&init_mm, p4d, p);
+		}
+	} while (p4d++, addr = next, addr != end);
 }
 
-static struct notifier_block kasan_die_notifier = {
-	.notifier_call = kasan_die_handler,
-};
-#endif
+static void __init kasan_shallow_populate_pgds(void *start, void *end)
+{
+	unsigned long addr, next;
+	pgd_t *pgd;
+	void *p;
+
+	addr = (unsigned long)start;
+	pgd = pgd_offset_k(addr);
+	do {
+		next = pgd_addr_end(addr, (unsigned long)end);
+
+		if (pgd_none(*pgd)) {
+			p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
+			pgd_populate(&init_mm, pgd, p);
+		}
+
+		/*
+		 * we need to populate p4ds to be synced when running in
+		 * four level mode - see sync_global_pgds_l4()
+		 */
+		kasan_shallow_populate_p4ds(pgd, addr, next);
+	} while (pgd++, addr = next, addr != (unsigned long)end);
+}
 
 void __init kasan_early_init(void)
 {
@@ -298,10 +324,6 @@ void __init kasan_init(void)
 	int i;
 	void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
 
-#ifdef CONFIG_KASAN_INLINE
-	register_die_notifier(&kasan_die_notifier);
-#endif
-
 	memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
 
 	/*
@@ -354,6 +376,24 @@ void __init kasan_init(void)
 
 	kasan_populate_early_shadow(
 		kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
+		kasan_mem_to_shadow((void *)VMALLOC_START));
+
+	/*
+	 * If we're in full vmalloc mode, don't back vmalloc space with early
+	 * shadow pages. Instead, prepopulate pgds/p4ds so they are synced to
+	 * the global table and we can populate the lower levels on demand.
+	 */
+	if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+		kasan_shallow_populate_pgds(
+			kasan_mem_to_shadow((void *)VMALLOC_START),
+			kasan_mem_to_shadow((void *)VMALLOC_END));
+	else
+		kasan_populate_early_shadow(
+			kasan_mem_to_shadow((void *)VMALLOC_START),
+			kasan_mem_to_shadow((void *)VMALLOC_END));
+
+	kasan_populate_early_shadow(
+		kasan_mem_to_shadow((void *)VMALLOC_END + 1),
 		shadow_cpu_entry_begin);
 
 	kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 79eb55ce69a9..49d7814b59a9 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -193,8 +193,8 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
 	int ret;
 	WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
 	if (f->armed) {
-		pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
-			   f->addr, f->count, !!f->old_presence);
+		pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n",
+			f->addr, f->count, !!f->old_presence);
 	}
 	ret = clear_page_presence(f, true);
 	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
@@ -341,8 +341,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 		 * something external causing them (f.e. using a debugger while
 		 * mmio tracing enabled), or erroneous behaviour
 		 */
-		pr_warning("unexpected debug trap on CPU %d.\n",
-			   smp_processor_id());
+		pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id());
 		goto out;
 	}
 
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
new file mode 100644
index 000000000000..f5b85bdc0535
--- /dev/null
+++ b/arch/x86/mm/maccess.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_X86_64
+static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+	return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+	/*
+	 * Range covering the highest possible canonical userspace address
+	 * as well as non-canonical address range. For the canonical range
+	 * we also need to include the userspace guard page.
+	 */
+	return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
+	       canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
+}
+#else
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+	return vaddr < TASK_SIZE_MAX;
+}
+#endif
+
+long probe_kernel_read_strict(void *dst, const void *src, size_t size)
+{
+	if (unlikely(invalid_probe_range((unsigned long)src)))
+		return -EFAULT;
+
+	return __probe_kernel_read(dst, src, size);
+}
+
+long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
+{
+	if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
+		return -EFAULT;
+
+	return __strncpy_from_unsafe(dst, unsafe_addr, count);
+}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 9268c12458c8..a03614bd3e1a 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -367,7 +367,7 @@ bool force_dma_unencrypted(struct device *dev)
 	if (sme_active()) {
 		u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask));
 		u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask,
-						dev->bus_dma_mask);
+						dev->bus_dma_limit);
 
 		if (dma_dev_mask <= dma_enc_mask)
 			return true;
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 6d71481a1e70..106ead05bbe3 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -16,7 +16,7 @@
 
 	.text
 	.code64
-ENTRY(sme_encrypt_execute)
+SYM_FUNC_START(sme_encrypt_execute)
 
 	/*
 	 * Entry parameters:
@@ -66,9 +66,9 @@ ENTRY(sme_encrypt_execute)
 	pop	%rbp
 
 	ret
-ENDPROC(sme_encrypt_execute)
+SYM_FUNC_END(sme_encrypt_execute)
 
-ENTRY(__enc_copy)
+SYM_FUNC_START(__enc_copy)
 /*
  * Routine used to encrypt memory in place.
  *   This routine must be run outside of the kernel proper since
@@ -153,4 +153,4 @@ ENTRY(__enc_copy)
 
 	ret
 .L__enc_copy_end:
-ENDPROC(__enc_copy)
+SYM_FUNC_END(__enc_copy)
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index b8ef8557d4b3..673de6063345 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -394,7 +394,7 @@ static void enter_uniprocessor(void)
 	}
 out:
 	if (num_online_cpus() > 1)
-		pr_warning("multiple CPUs still online, may miss events.\n");
+		pr_warn("multiple CPUs still online, may miss events.\n");
 }
 
 static void leave_uniprocessor(void)
@@ -418,8 +418,8 @@ static void leave_uniprocessor(void)
 static void enter_uniprocessor(void)
 {
 	if (num_online_cpus() > 1)
-		pr_warning("multiple CPUs are online, may miss events. "
-			   "Suggest booting with maxcpus=1 kernel argument.\n");
+		pr_warn("multiple CPUs are online, may miss events. "
+			"Suggest booting with maxcpus=1 kernel argument.\n");
 }
 
 static void leave_uniprocessor(void)
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 4123100e0eaf..99f7a68738f0 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -699,7 +699,7 @@ static int __init dummy_numa_init(void)
  * x86_numa_init - Initialize NUMA
  *
  * Try each configured NUMA initialization method until one succeeds.  The
- * last fallback is dummy single node config encomapssing whole memory and
+ * last fallback is dummy single node config encompassing whole memory and
  * never fails.
  */
 void __init x86_numa_init(void)
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index abffa0be80da..7f1d2034df1e 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -438,7 +438,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 		goto no_emu;
 
 	if (numa_cleanup_meminfo(&ei) < 0) {
-		pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
+		pr_warn("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
 		goto no_emu;
 	}
 
@@ -449,7 +449,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 		phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
 					      phys_size, PAGE_SIZE);
 		if (!phys) {
-			pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
+			pr_warn("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
 			goto no_emu;
 		}
 		memblock_reserve(phys, phys_size);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
deleted file mode 100644
index 0d09cc5aad61..000000000000
--- a/arch/x86/mm/pageattr.c
+++ /dev/null
@@ -1,2285 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * Thanks to Ben LaHaise for precious feedback.
- */
-#include <linux/highmem.h>
-#include <linux/memblock.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/pfn.h>
-#include <linux/percpu.h>
-#include <linux/gfp.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-
-#include <asm/e820/api.h>
-#include <asm/processor.h>
-#include <asm/tlbflush.h>
-#include <asm/sections.h>
-#include <asm/setup.h>
-#include <linux/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/proto.h>
-#include <asm/pat.h>
-#include <asm/set_memory.h>
-
-#include "mm_internal.h"
-
-/*
- * The current flushing context - we pass it instead of 5 arguments:
- */
-struct cpa_data {
-	unsigned long	*vaddr;
-	pgd_t		*pgd;
-	pgprot_t	mask_set;
-	pgprot_t	mask_clr;
-	unsigned long	numpages;
-	unsigned long	curpage;
-	unsigned long	pfn;
-	unsigned int	flags;
-	unsigned int	force_split		: 1,
-			force_static_prot	: 1;
-	struct page	**pages;
-};
-
-enum cpa_warn {
-	CPA_CONFLICT,
-	CPA_PROTECT,
-	CPA_DETECT,
-};
-
-static const int cpa_warn_level = CPA_PROTECT;
-
-/*
- * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
- * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
- * entries change the page attribute in parallel to some other cpu
- * splitting a large page entry along with changing the attribute.
- */
-static DEFINE_SPINLOCK(cpa_lock);
-
-#define CPA_FLUSHTLB 1
-#define CPA_ARRAY 2
-#define CPA_PAGES_ARRAY 4
-#define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
-
-#ifdef CONFIG_PROC_FS
-static unsigned long direct_pages_count[PG_LEVEL_NUM];
-
-void update_page_count(int level, unsigned long pages)
-{
-	/* Protect against CPA */
-	spin_lock(&pgd_lock);
-	direct_pages_count[level] += pages;
-	spin_unlock(&pgd_lock);
-}
-
-static void split_page_count(int level)
-{
-	if (direct_pages_count[level] == 0)
-		return;
-
-	direct_pages_count[level]--;
-	direct_pages_count[level - 1] += PTRS_PER_PTE;
-}
-
-void arch_report_meminfo(struct seq_file *m)
-{
-	seq_printf(m, "DirectMap4k:    %8lu kB\n",
-			direct_pages_count[PG_LEVEL_4K] << 2);
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-	seq_printf(m, "DirectMap2M:    %8lu kB\n",
-			direct_pages_count[PG_LEVEL_2M] << 11);
-#else
-	seq_printf(m, "DirectMap4M:    %8lu kB\n",
-			direct_pages_count[PG_LEVEL_2M] << 12);
-#endif
-	if (direct_gbpages)
-		seq_printf(m, "DirectMap1G:    %8lu kB\n",
-			direct_pages_count[PG_LEVEL_1G] << 20);
-}
-#else
-static inline void split_page_count(int level) { }
-#endif
-
-#ifdef CONFIG_X86_CPA_STATISTICS
-
-static unsigned long cpa_1g_checked;
-static unsigned long cpa_1g_sameprot;
-static unsigned long cpa_1g_preserved;
-static unsigned long cpa_2m_checked;
-static unsigned long cpa_2m_sameprot;
-static unsigned long cpa_2m_preserved;
-static unsigned long cpa_4k_install;
-
-static inline void cpa_inc_1g_checked(void)
-{
-	cpa_1g_checked++;
-}
-
-static inline void cpa_inc_2m_checked(void)
-{
-	cpa_2m_checked++;
-}
-
-static inline void cpa_inc_4k_install(void)
-{
-	cpa_4k_install++;
-}
-
-static inline void cpa_inc_lp_sameprot(int level)
-{
-	if (level == PG_LEVEL_1G)
-		cpa_1g_sameprot++;
-	else
-		cpa_2m_sameprot++;
-}
-
-static inline void cpa_inc_lp_preserved(int level)
-{
-	if (level == PG_LEVEL_1G)
-		cpa_1g_preserved++;
-	else
-		cpa_2m_preserved++;
-}
-
-static int cpastats_show(struct seq_file *m, void *p)
-{
-	seq_printf(m, "1G pages checked:     %16lu\n", cpa_1g_checked);
-	seq_printf(m, "1G pages sameprot:    %16lu\n", cpa_1g_sameprot);
-	seq_printf(m, "1G pages preserved:   %16lu\n", cpa_1g_preserved);
-	seq_printf(m, "2M pages checked:     %16lu\n", cpa_2m_checked);
-	seq_printf(m, "2M pages sameprot:    %16lu\n", cpa_2m_sameprot);
-	seq_printf(m, "2M pages preserved:   %16lu\n", cpa_2m_preserved);
-	seq_printf(m, "4K pages set-checked: %16lu\n", cpa_4k_install);
-	return 0;
-}
-
-static int cpastats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, cpastats_show, NULL);
-}
-
-static const struct file_operations cpastats_fops = {
-	.open		= cpastats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int __init cpa_stats_init(void)
-{
-	debugfs_create_file("cpa_stats", S_IRUSR, arch_debugfs_dir, NULL,
-			    &cpastats_fops);
-	return 0;
-}
-late_initcall(cpa_stats_init);
-#else
-static inline void cpa_inc_1g_checked(void) { }
-static inline void cpa_inc_2m_checked(void) { }
-static inline void cpa_inc_4k_install(void) { }
-static inline void cpa_inc_lp_sameprot(int level) { }
-static inline void cpa_inc_lp_preserved(int level) { }
-#endif
-
-
-static inline int
-within(unsigned long addr, unsigned long start, unsigned long end)
-{
-	return addr >= start && addr < end;
-}
-
-static inline int
-within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
-{
-	return addr >= start && addr <= end;
-}
-
-#ifdef CONFIG_X86_64
-
-static inline unsigned long highmap_start_pfn(void)
-{
-	return __pa_symbol(_text) >> PAGE_SHIFT;
-}
-
-static inline unsigned long highmap_end_pfn(void)
-{
-	/* Do not reference physical address outside the kernel. */
-	return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
-}
-
-static bool __cpa_pfn_in_highmap(unsigned long pfn)
-{
-	/*
-	 * Kernel text has an alias mapping at a high address, known
-	 * here as "highmap".
-	 */
-	return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn());
-}
-
-#else
-
-static bool __cpa_pfn_in_highmap(unsigned long pfn)
-{
-	/* There is no highmap on 32-bit */
-	return false;
-}
-
-#endif
-
-/*
- * See set_mce_nospec().
- *
- * Machine check recovery code needs to change cache mode of poisoned pages to
- * UC to avoid speculative access logging another error. But passing the
- * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
- * speculative access. So we cheat and flip the top bit of the address. This
- * works fine for the code that updates the page tables. But at the end of the
- * process we need to flush the TLB and cache and the non-canonical address
- * causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
- *
- * But in the common case we already have a canonical address. This code
- * will fix the top bit if needed and is a no-op otherwise.
- */
-static inline unsigned long fix_addr(unsigned long addr)
-{
-#ifdef CONFIG_X86_64
-	return (long)(addr << 1) >> 1;
-#else
-	return addr;
-#endif
-}
-
-static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
-{
-	if (cpa->flags & CPA_PAGES_ARRAY) {
-		struct page *page = cpa->pages[idx];
-
-		if (unlikely(PageHighMem(page)))
-			return 0;
-
-		return (unsigned long)page_address(page);
-	}
-
-	if (cpa->flags & CPA_ARRAY)
-		return cpa->vaddr[idx];
-
-	return *cpa->vaddr + idx * PAGE_SIZE;
-}
-
-/*
- * Flushing functions
- */
-
-static void clflush_cache_range_opt(void *vaddr, unsigned int size)
-{
-	const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
-	void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
-	void *vend = vaddr + size;
-
-	if (p >= vend)
-		return;
-
-	for (; p < vend; p += clflush_size)
-		clflushopt(p);
-}
-
-/**
- * clflush_cache_range - flush a cache range with clflush
- * @vaddr:	virtual start address
- * @size:	number of bytes to flush
- *
- * CLFLUSHOPT is an unordered instruction which needs fencing with MFENCE or
- * SFENCE to avoid ordering issues.
- */
-void clflush_cache_range(void *vaddr, unsigned int size)
-{
-	mb();
-	clflush_cache_range_opt(vaddr, size);
-	mb();
-}
-EXPORT_SYMBOL_GPL(clflush_cache_range);
-
-void arch_invalidate_pmem(void *addr, size_t size)
-{
-	clflush_cache_range(addr, size);
-}
-EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
-
-static void __cpa_flush_all(void *arg)
-{
-	unsigned long cache = (unsigned long)arg;
-
-	/*
-	 * Flush all to work around Errata in early athlons regarding
-	 * large page flushing.
-	 */
-	__flush_tlb_all();
-
-	if (cache && boot_cpu_data.x86 >= 4)
-		wbinvd();
-}
-
-static void cpa_flush_all(unsigned long cache)
-{
-	BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
-
-	on_each_cpu(__cpa_flush_all, (void *) cache, 1);
-}
-
-void __cpa_flush_tlb(void *data)
-{
-	struct cpa_data *cpa = data;
-	unsigned int i;
-
-	for (i = 0; i < cpa->numpages; i++)
-		__flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
-}
-
-static void cpa_flush(struct cpa_data *data, int cache)
-{
-	struct cpa_data *cpa = data;
-	unsigned int i;
-
-	BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
-
-	if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		cpa_flush_all(cache);
-		return;
-	}
-
-	if (cpa->numpages <= tlb_single_page_flush_ceiling)
-		on_each_cpu(__cpa_flush_tlb, cpa, 1);
-	else
-		flush_tlb_all();
-
-	if (!cache)
-		return;
-
-	mb();
-	for (i = 0; i < cpa->numpages; i++) {
-		unsigned long addr = __cpa_addr(cpa, i);
-		unsigned int level;
-
-		pte_t *pte = lookup_address(addr, &level);
-
-		/*
-		 * Only flush present addresses:
-		 */
-		if (pte && (pte_val(*pte) & _PAGE_PRESENT))
-			clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
-	}
-	mb();
-}
-
-static bool overlaps(unsigned long r1_start, unsigned long r1_end,
-		     unsigned long r2_start, unsigned long r2_end)
-{
-	return (r1_start <= r2_end && r1_end >= r2_start) ||
-		(r2_start <= r1_end && r2_end >= r1_start);
-}
-
-#ifdef CONFIG_PCI_BIOS
-/*
- * The BIOS area between 640k and 1Mb needs to be executable for PCI BIOS
- * based config access (CONFIG_PCI_GOBIOS) support.
- */
-#define BIOS_PFN	PFN_DOWN(BIOS_BEGIN)
-#define BIOS_PFN_END	PFN_DOWN(BIOS_END - 1)
-
-static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
-{
-	if (pcibios_enabled && overlaps(spfn, epfn, BIOS_PFN, BIOS_PFN_END))
-		return _PAGE_NX;
-	return 0;
-}
-#else
-static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
-{
-	return 0;
-}
-#endif
-
-/*
- * The .rodata section needs to be read-only. Using the pfn catches all
- * aliases.  This also includes __ro_after_init, so do not enforce until
- * kernel_set_to_readonly is true.
- */
-static pgprotval_t protect_rodata(unsigned long spfn, unsigned long epfn)
-{
-	unsigned long epfn_ro, spfn_ro = PFN_DOWN(__pa_symbol(__start_rodata));
-
-	/*
-	 * Note: __end_rodata is at page aligned and not inclusive, so
-	 * subtract 1 to get the last enforced PFN in the rodata area.
-	 */
-	epfn_ro = PFN_DOWN(__pa_symbol(__end_rodata)) - 1;
-
-	if (kernel_set_to_readonly && overlaps(spfn, epfn, spfn_ro, epfn_ro))
-		return _PAGE_RW;
-	return 0;
-}
-
-/*
- * Protect kernel text against becoming non executable by forbidding
- * _PAGE_NX.  This protects only the high kernel mapping (_text -> _etext)
- * out of which the kernel actually executes.  Do not protect the low
- * mapping.
- *
- * This does not cover __inittext since that is gone after boot.
- */
-static pgprotval_t protect_kernel_text(unsigned long start, unsigned long end)
-{
-	unsigned long t_end = (unsigned long)_etext - 1;
-	unsigned long t_start = (unsigned long)_text;
-
-	if (overlaps(start, end, t_start, t_end))
-		return _PAGE_NX;
-	return 0;
-}
-
-#if defined(CONFIG_X86_64)
-/*
- * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
- * kernel text mappings for the large page aligned text, rodata sections
- * will be always read-only. For the kernel identity mappings covering the
- * holes caused by this alignment can be anything that user asks.
- *
- * This will preserve the large page mappings for kernel text/data at no
- * extra cost.
- */
-static pgprotval_t protect_kernel_text_ro(unsigned long start,
-					  unsigned long end)
-{
-	unsigned long t_end = (unsigned long)__end_rodata_hpage_align - 1;
-	unsigned long t_start = (unsigned long)_text;
-	unsigned int level;
-
-	if (!kernel_set_to_readonly || !overlaps(start, end, t_start, t_end))
-		return 0;
-	/*
-	 * Don't enforce the !RW mapping for the kernel text mapping, if
-	 * the current mapping is already using small page mapping.  No
-	 * need to work hard to preserve large page mappings in this case.
-	 *
-	 * This also fixes the Linux Xen paravirt guest boot failure caused
-	 * by unexpected read-only mappings for kernel identity
-	 * mappings. In this paravirt guest case, the kernel text mapping
-	 * and the kernel identity mapping share the same page-table pages,
-	 * so the protections for kernel text and identity mappings have to
-	 * be the same.
-	 */
-	if (lookup_address(start, &level) && (level != PG_LEVEL_4K))
-		return _PAGE_RW;
-	return 0;
-}
-#else
-static pgprotval_t protect_kernel_text_ro(unsigned long start,
-					  unsigned long end)
-{
-	return 0;
-}
-#endif
-
-static inline bool conflicts(pgprot_t prot, pgprotval_t val)
-{
-	return (pgprot_val(prot) & ~val) != pgprot_val(prot);
-}
-
-static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
-				  unsigned long start, unsigned long end,
-				  unsigned long pfn, const char *txt)
-{
-	static const char *lvltxt[] = {
-		[CPA_CONFLICT]	= "conflict",
-		[CPA_PROTECT]	= "protect",
-		[CPA_DETECT]	= "detect",
-	};
-
-	if (warnlvl > cpa_warn_level || !conflicts(prot, val))
-		return;
-
-	pr_warn("CPA %8s %10s: 0x%016lx - 0x%016lx PFN %lx req %016llx prevent %016llx\n",
-		lvltxt[warnlvl], txt, start, end, pfn, (unsigned long long)pgprot_val(prot),
-		(unsigned long long)val);
-}
-
-/*
- * Certain areas of memory on x86 require very specific protection flags,
- * for example the BIOS area or kernel text. Callers don't always get this
- * right (again, ioremap() on BIOS memory is not uncommon) so this function
- * checks and fixes these known static required protection bits.
- */
-static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
-					  unsigned long pfn, unsigned long npg,
-					  unsigned long lpsize, int warnlvl)
-{
-	pgprotval_t forbidden, res;
-	unsigned long end;
-
-	/*
-	 * There is no point in checking RW/NX conflicts when the requested
-	 * mapping is setting the page !PRESENT.
-	 */
-	if (!(pgprot_val(prot) & _PAGE_PRESENT))
-		return prot;
-
-	/* Operate on the virtual address */
-	end = start + npg * PAGE_SIZE - 1;
-
-	res = protect_kernel_text(start, end);
-	check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX");
-	forbidden = res;
-
-	/*
-	 * Special case to preserve a large page. If the change spawns the
-	 * full large page mapping then there is no point to split it
-	 * up. Happens with ftrace and is going to be removed once ftrace
-	 * switched to text_poke().
-	 */
-	if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) {
-		res = protect_kernel_text_ro(start, end);
-		check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
-		forbidden |= res;
-	}
-
-	/* Check the PFN directly */
-	res = protect_pci_bios(pfn, pfn + npg - 1);
-	check_conflict(warnlvl, prot, res, start, end, pfn, "PCIBIOS NX");
-	forbidden |= res;
-
-	res = protect_rodata(pfn, pfn + npg - 1);
-	check_conflict(warnlvl, prot, res, start, end, pfn, "Rodata RO");
-	forbidden |= res;
-
-	return __pgprot(pgprot_val(prot) & ~forbidden);
-}
-
-/*
- * Lookup the page table entry for a virtual address in a specific pgd.
- * Return a pointer to the entry and the level of the mapping.
- */
-pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
-			     unsigned int *level)
-{
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-
-	*level = PG_LEVEL_NONE;
-
-	if (pgd_none(*pgd))
-		return NULL;
-
-	p4d = p4d_offset(pgd, address);
-	if (p4d_none(*p4d))
-		return NULL;
-
-	*level = PG_LEVEL_512G;
-	if (p4d_large(*p4d) || !p4d_present(*p4d))
-		return (pte_t *)p4d;
-
-	pud = pud_offset(p4d, address);
-	if (pud_none(*pud))
-		return NULL;
-
-	*level = PG_LEVEL_1G;
-	if (pud_large(*pud) || !pud_present(*pud))
-		return (pte_t *)pud;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd))
-		return NULL;
-
-	*level = PG_LEVEL_2M;
-	if (pmd_large(*pmd) || !pmd_present(*pmd))
-		return (pte_t *)pmd;
-
-	*level = PG_LEVEL_4K;
-
-	return pte_offset_kernel(pmd, address);
-}
-
-/*
- * Lookup the page table entry for a virtual address. Return a pointer
- * to the entry and the level of the mapping.
- *
- * Note: We return pud and pmd either when the entry is marked large
- * or when the present bit is not set. Otherwise we would return a
- * pointer to a nonexisting mapping.
- */
-pte_t *lookup_address(unsigned long address, unsigned int *level)
-{
-	return lookup_address_in_pgd(pgd_offset_k(address), address, level);
-}
-EXPORT_SYMBOL_GPL(lookup_address);
-
-static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
-				  unsigned int *level)
-{
-	if (cpa->pgd)
-		return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
-					       address, level);
-
-	return lookup_address(address, level);
-}
-
-/*
- * Lookup the PMD entry for a virtual address. Return a pointer to the entry
- * or NULL if not present.
- */
-pmd_t *lookup_pmd_address(unsigned long address)
-{
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-
-	pgd = pgd_offset_k(address);
-	if (pgd_none(*pgd))
-		return NULL;
-
-	p4d = p4d_offset(pgd, address);
-	if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d))
-		return NULL;
-
-	pud = pud_offset(p4d, address);
-	if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
-		return NULL;
-
-	return pmd_offset(pud, address);
-}
-
-/*
- * This is necessary because __pa() does not work on some
- * kinds of memory, like vmalloc() or the alloc_remap()
- * areas on 32-bit NUMA systems.  The percpu areas can
- * end up in this kind of memory, for instance.
- *
- * This could be optimized, but it is only intended to be
- * used at inititalization time, and keeping it
- * unoptimized should increase the testing coverage for
- * the more obscure platforms.
- */
-phys_addr_t slow_virt_to_phys(void *__virt_addr)
-{
-	unsigned long virt_addr = (unsigned long)__virt_addr;
-	phys_addr_t phys_addr;
-	unsigned long offset;
-	enum pg_level level;
-	pte_t *pte;
-
-	pte = lookup_address(virt_addr, &level);
-	BUG_ON(!pte);
-
-	/*
-	 * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
-	 * before being left-shifted PAGE_SHIFT bits -- this trick is to
-	 * make 32-PAE kernel work correctly.
-	 */
-	switch (level) {
-	case PG_LEVEL_1G:
-		phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
-		offset = virt_addr & ~PUD_PAGE_MASK;
-		break;
-	case PG_LEVEL_2M:
-		phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
-		offset = virt_addr & ~PMD_PAGE_MASK;
-		break;
-	default:
-		phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
-		offset = virt_addr & ~PAGE_MASK;
-	}
-
-	return (phys_addr_t)(phys_addr | offset);
-}
-EXPORT_SYMBOL_GPL(slow_virt_to_phys);
-
-/*
- * Set the new pmd in all the pgds we know about:
- */
-static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
-{
-	/* change init_mm */
-	set_pte_atomic(kpte, pte);
-#ifdef CONFIG_X86_32
-	if (!SHARED_KERNEL_PMD) {
-		struct page *page;
-
-		list_for_each_entry(page, &pgd_list, lru) {
-			pgd_t *pgd;
-			p4d_t *p4d;
-			pud_t *pud;
-			pmd_t *pmd;
-
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			p4d = p4d_offset(pgd, address);
-			pud = pud_offset(p4d, address);
-			pmd = pmd_offset(pud, address);
-			set_pte_atomic((pte_t *)pmd, pte);
-		}
-	}
-#endif
-}
-
-static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot)
-{
-	/*
-	 * _PAGE_GLOBAL means "global page" for present PTEs.
-	 * But, it is also used to indicate _PAGE_PROTNONE
-	 * for non-present PTEs.
-	 *
-	 * This ensures that a _PAGE_GLOBAL PTE going from
-	 * present to non-present is not confused as
-	 * _PAGE_PROTNONE.
-	 */
-	if (!(pgprot_val(prot) & _PAGE_PRESENT))
-		pgprot_val(prot) &= ~_PAGE_GLOBAL;
-
-	return prot;
-}
-
-static int __should_split_large_page(pte_t *kpte, unsigned long address,
-				     struct cpa_data *cpa)
-{
-	unsigned long numpages, pmask, psize, lpaddr, pfn, old_pfn;
-	pgprot_t old_prot, new_prot, req_prot, chk_prot;
-	pte_t new_pte, *tmp;
-	enum pg_level level;
-
-	/*
-	 * Check for races, another CPU might have split this page
-	 * up already:
-	 */
-	tmp = _lookup_address_cpa(cpa, address, &level);
-	if (tmp != kpte)
-		return 1;
-
-	switch (level) {
-	case PG_LEVEL_2M:
-		old_prot = pmd_pgprot(*(pmd_t *)kpte);
-		old_pfn = pmd_pfn(*(pmd_t *)kpte);
-		cpa_inc_2m_checked();
-		break;
-	case PG_LEVEL_1G:
-		old_prot = pud_pgprot(*(pud_t *)kpte);
-		old_pfn = pud_pfn(*(pud_t *)kpte);
-		cpa_inc_1g_checked();
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	psize = page_level_size(level);
-	pmask = page_level_mask(level);
-
-	/*
-	 * Calculate the number of pages, which fit into this large
-	 * page starting at address:
-	 */
-	lpaddr = (address + psize) & pmask;
-	numpages = (lpaddr - address) >> PAGE_SHIFT;
-	if (numpages < cpa->numpages)
-		cpa->numpages = numpages;
-
-	/*
-	 * We are safe now. Check whether the new pgprot is the same:
-	 * Convert protection attributes to 4k-format, as cpa->mask* are set
-	 * up accordingly.
-	 */
-
-	/* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */
-	req_prot = pgprot_large_2_4k(old_prot);
-
-	pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
-	pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
-
-	/*
-	 * req_prot is in format of 4k pages. It must be converted to large
-	 * page format: the caching mode includes the PAT bit located at
-	 * different bit positions in the two formats.
-	 */
-	req_prot = pgprot_4k_2_large(req_prot);
-	req_prot = pgprot_clear_protnone_bits(req_prot);
-	if (pgprot_val(req_prot) & _PAGE_PRESENT)
-		pgprot_val(req_prot) |= _PAGE_PSE;
-
-	/*
-	 * old_pfn points to the large page base pfn. So we need to add the
-	 * offset of the virtual address:
-	 */
-	pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
-	cpa->pfn = pfn;
-
-	/*
-	 * Calculate the large page base address and the number of 4K pages
-	 * in the large page
-	 */
-	lpaddr = address & pmask;
-	numpages = psize >> PAGE_SHIFT;
-
-	/*
-	 * Sanity check that the existing mapping is correct versus the static
-	 * protections. static_protections() guards against !PRESENT, so no
-	 * extra conditional required here.
-	 */
-	chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages,
-				      psize, CPA_CONFLICT);
-
-	if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) {
-		/*
-		 * Split the large page and tell the split code to
-		 * enforce static protections.
-		 */
-		cpa->force_static_prot = 1;
-		return 1;
-	}
-
-	/*
-	 * Optimization: If the requested pgprot is the same as the current
-	 * pgprot, then the large page can be preserved and no updates are
-	 * required independent of alignment and length of the requested
-	 * range. The above already established that the current pgprot is
-	 * correct, which in consequence makes the requested pgprot correct
-	 * as well if it is the same. The static protection scan below will
-	 * not come to a different conclusion.
-	 */
-	if (pgprot_val(req_prot) == pgprot_val(old_prot)) {
-		cpa_inc_lp_sameprot(level);
-		return 0;
-	}
-
-	/*
-	 * If the requested range does not cover the full page, split it up
-	 */
-	if (address != lpaddr || cpa->numpages != numpages)
-		return 1;
-
-	/*
-	 * Check whether the requested pgprot is conflicting with a static
-	 * protection requirement in the large page.
-	 */
-	new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
-				      psize, CPA_DETECT);
-
-	/*
-	 * If there is a conflict, split the large page.
-	 *
-	 * There used to be a 4k wise evaluation trying really hard to
-	 * preserve the large pages, but experimentation has shown, that this
-	 * does not help at all. There might be corner cases which would
-	 * preserve one large page occasionally, but it's really not worth the
-	 * extra code and cycles for the common case.
-	 */
-	if (pgprot_val(req_prot) != pgprot_val(new_prot))
-		return 1;
-
-	/* All checks passed. Update the large page mapping. */
-	new_pte = pfn_pte(old_pfn, new_prot);
-	__set_pmd_pte(kpte, address, new_pte);
-	cpa->flags |= CPA_FLUSHTLB;
-	cpa_inc_lp_preserved(level);
-	return 0;
-}
-
-static int should_split_large_page(pte_t *kpte, unsigned long address,
-				   struct cpa_data *cpa)
-{
-	int do_split;
-
-	if (cpa->force_split)
-		return 1;
-
-	spin_lock(&pgd_lock);
-	do_split = __should_split_large_page(kpte, address, cpa);
-	spin_unlock(&pgd_lock);
-
-	return do_split;
-}
-
-static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn,
-			  pgprot_t ref_prot, unsigned long address,
-			  unsigned long size)
-{
-	unsigned int npg = PFN_DOWN(size);
-	pgprot_t prot;
-
-	/*
-	 * If should_split_large_page() discovered an inconsistent mapping,
-	 * remove the invalid protection in the split mapping.
-	 */
-	if (!cpa->force_static_prot)
-		goto set;
-
-	/* Hand in lpsize = 0 to enforce the protection mechanism */
-	prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT);
-
-	if (pgprot_val(prot) == pgprot_val(ref_prot))
-		goto set;
-
-	/*
-	 * If this is splitting a PMD, fix it up. PUD splits cannot be
-	 * fixed trivially as that would require to rescan the newly
-	 * installed PMD mappings after returning from split_large_page()
-	 * so an eventual further split can allocate the necessary PTE
-	 * pages. Warn for now and revisit it in case this actually
-	 * happens.
-	 */
-	if (size == PAGE_SIZE)
-		ref_prot = prot;
-	else
-		pr_warn_once("CPA: Cannot fixup static protections for PUD split\n");
-set:
-	set_pte(pte, pfn_pte(pfn, ref_prot));
-}
-
-static int
-__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
-		   struct page *base)
-{
-	unsigned long lpaddr, lpinc, ref_pfn, pfn, pfninc = 1;
-	pte_t *pbase = (pte_t *)page_address(base);
-	unsigned int i, level;
-	pgprot_t ref_prot;
-	pte_t *tmp;
-
-	spin_lock(&pgd_lock);
-	/*
-	 * Check for races, another CPU might have split this page
-	 * up for us already:
-	 */
-	tmp = _lookup_address_cpa(cpa, address, &level);
-	if (tmp != kpte) {
-		spin_unlock(&pgd_lock);
-		return 1;
-	}
-
-	paravirt_alloc_pte(&init_mm, page_to_pfn(base));
-
-	switch (level) {
-	case PG_LEVEL_2M:
-		ref_prot = pmd_pgprot(*(pmd_t *)kpte);
-		/*
-		 * Clear PSE (aka _PAGE_PAT) and move
-		 * PAT bit to correct position.
-		 */
-		ref_prot = pgprot_large_2_4k(ref_prot);
-		ref_pfn = pmd_pfn(*(pmd_t *)kpte);
-		lpaddr = address & PMD_MASK;
-		lpinc = PAGE_SIZE;
-		break;
-
-	case PG_LEVEL_1G:
-		ref_prot = pud_pgprot(*(pud_t *)kpte);
-		ref_pfn = pud_pfn(*(pud_t *)kpte);
-		pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
-		lpaddr = address & PUD_MASK;
-		lpinc = PMD_SIZE;
-		/*
-		 * Clear the PSE flags if the PRESENT flag is not set
-		 * otherwise pmd_present/pmd_huge will return true
-		 * even on a non present pmd.
-		 */
-		if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
-			pgprot_val(ref_prot) &= ~_PAGE_PSE;
-		break;
-
-	default:
-		spin_unlock(&pgd_lock);
-		return 1;
-	}
-
-	ref_prot = pgprot_clear_protnone_bits(ref_prot);
-
-	/*
-	 * Get the target pfn from the original entry:
-	 */
-	pfn = ref_pfn;
-	for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc, lpaddr += lpinc)
-		split_set_pte(cpa, pbase + i, pfn, ref_prot, lpaddr, lpinc);
-
-	if (virt_addr_valid(address)) {
-		unsigned long pfn = PFN_DOWN(__pa(address));
-
-		if (pfn_range_is_mapped(pfn, pfn + 1))
-			split_page_count(level);
-	}
-
-	/*
-	 * Install the new, split up pagetable.
-	 *
-	 * We use the standard kernel pagetable protections for the new
-	 * pagetable protections, the actual ptes set above control the
-	 * primary protection behavior:
-	 */
-	__set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
-
-	/*
-	 * Do a global flush tlb after splitting the large page
-	 * and before we do the actual change page attribute in the PTE.
-	 *
-	 * Without this, we violate the TLB application note, that says:
-	 * "The TLBs may contain both ordinary and large-page
-	 *  translations for a 4-KByte range of linear addresses. This
-	 *  may occur if software modifies the paging structures so that
-	 *  the page size used for the address range changes. If the two
-	 *  translations differ with respect to page frame or attributes
-	 *  (e.g., permissions), processor behavior is undefined and may
-	 *  be implementation-specific."
-	 *
-	 * We do this global tlb flush inside the cpa_lock, so that we
-	 * don't allow any other cpu, with stale tlb entries change the
-	 * page attribute in parallel, that also falls into the
-	 * just split large page entry.
-	 */
-	flush_tlb_all();
-	spin_unlock(&pgd_lock);
-
-	return 0;
-}
-
-static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
-			    unsigned long address)
-{
-	struct page *base;
-
-	if (!debug_pagealloc_enabled())
-		spin_unlock(&cpa_lock);
-	base = alloc_pages(GFP_KERNEL, 0);
-	if (!debug_pagealloc_enabled())
-		spin_lock(&cpa_lock);
-	if (!base)
-		return -ENOMEM;
-
-	if (__split_large_page(cpa, kpte, address, base))
-		__free_page(base);
-
-	return 0;
-}
-
-static bool try_to_free_pte_page(pte_t *pte)
-{
-	int i;
-
-	for (i = 0; i < PTRS_PER_PTE; i++)
-		if (!pte_none(pte[i]))
-			return false;
-
-	free_page((unsigned long)pte);
-	return true;
-}
-
-static bool try_to_free_pmd_page(pmd_t *pmd)
-{
-	int i;
-
-	for (i = 0; i < PTRS_PER_PMD; i++)
-		if (!pmd_none(pmd[i]))
-			return false;
-
-	free_page((unsigned long)pmd);
-	return true;
-}
-
-static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
-{
-	pte_t *pte = pte_offset_kernel(pmd, start);
-
-	while (start < end) {
-		set_pte(pte, __pte(0));
-
-		start += PAGE_SIZE;
-		pte++;
-	}
-
-	if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
-		pmd_clear(pmd);
-		return true;
-	}
-	return false;
-}
-
-static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
-			      unsigned long start, unsigned long end)
-{
-	if (unmap_pte_range(pmd, start, end))
-		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
-			pud_clear(pud);
-}
-
-static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
-{
-	pmd_t *pmd = pmd_offset(pud, start);
-
-	/*
-	 * Not on a 2MB page boundary?
-	 */
-	if (start & (PMD_SIZE - 1)) {
-		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
-		unsigned long pre_end = min_t(unsigned long, end, next_page);
-
-		__unmap_pmd_range(pud, pmd, start, pre_end);
-
-		start = pre_end;
-		pmd++;
-	}
-
-	/*
-	 * Try to unmap in 2M chunks.
-	 */
-	while (end - start >= PMD_SIZE) {
-		if (pmd_large(*pmd))
-			pmd_clear(pmd);
-		else
-			__unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
-
-		start += PMD_SIZE;
-		pmd++;
-	}
-
-	/*
-	 * 4K leftovers?
-	 */
-	if (start < end)
-		return __unmap_pmd_range(pud, pmd, start, end);
-
-	/*
-	 * Try again to free the PMD page if haven't succeeded above.
-	 */
-	if (!pud_none(*pud))
-		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
-			pud_clear(pud);
-}
-
-static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
-{
-	pud_t *pud = pud_offset(p4d, start);
-
-	/*
-	 * Not on a GB page boundary?
-	 */
-	if (start & (PUD_SIZE - 1)) {
-		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
-		unsigned long pre_end	= min_t(unsigned long, end, next_page);
-
-		unmap_pmd_range(pud, start, pre_end);
-
-		start = pre_end;
-		pud++;
-	}
-
-	/*
-	 * Try to unmap in 1G chunks?
-	 */
-	while (end - start >= PUD_SIZE) {
-
-		if (pud_large(*pud))
-			pud_clear(pud);
-		else
-			unmap_pmd_range(pud, start, start + PUD_SIZE);
-
-		start += PUD_SIZE;
-		pud++;
-	}
-
-	/*
-	 * 2M leftovers?
-	 */
-	if (start < end)
-		unmap_pmd_range(pud, start, end);
-
-	/*
-	 * No need to try to free the PUD page because we'll free it in
-	 * populate_pgd's error path
-	 */
-}
-
-static int alloc_pte_page(pmd_t *pmd)
-{
-	pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
-	if (!pte)
-		return -1;
-
-	set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
-	return 0;
-}
-
-static int alloc_pmd_page(pud_t *pud)
-{
-	pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
-	if (!pmd)
-		return -1;
-
-	set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
-	return 0;
-}
-
-static void populate_pte(struct cpa_data *cpa,
-			 unsigned long start, unsigned long end,
-			 unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
-{
-	pte_t *pte;
-
-	pte = pte_offset_kernel(pmd, start);
-
-	pgprot = pgprot_clear_protnone_bits(pgprot);
-
-	while (num_pages-- && start < end) {
-		set_pte(pte, pfn_pte(cpa->pfn, pgprot));
-
-		start	 += PAGE_SIZE;
-		cpa->pfn++;
-		pte++;
-	}
-}
-
-static long populate_pmd(struct cpa_data *cpa,
-			 unsigned long start, unsigned long end,
-			 unsigned num_pages, pud_t *pud, pgprot_t pgprot)
-{
-	long cur_pages = 0;
-	pmd_t *pmd;
-	pgprot_t pmd_pgprot;
-
-	/*
-	 * Not on a 2M boundary?
-	 */
-	if (start & (PMD_SIZE - 1)) {
-		unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
-		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
-
-		pre_end   = min_t(unsigned long, pre_end, next_page);
-		cur_pages = (pre_end - start) >> PAGE_SHIFT;
-		cur_pages = min_t(unsigned int, num_pages, cur_pages);
-
-		/*
-		 * Need a PTE page?
-		 */
-		pmd = pmd_offset(pud, start);
-		if (pmd_none(*pmd))
-			if (alloc_pte_page(pmd))
-				return -1;
-
-		populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
-
-		start = pre_end;
-	}
-
-	/*
-	 * We mapped them all?
-	 */
-	if (num_pages == cur_pages)
-		return cur_pages;
-
-	pmd_pgprot = pgprot_4k_2_large(pgprot);
-
-	while (end - start >= PMD_SIZE) {
-
-		/*
-		 * We cannot use a 1G page so allocate a PMD page if needed.
-		 */
-		if (pud_none(*pud))
-			if (alloc_pmd_page(pud))
-				return -1;
-
-		pmd = pmd_offset(pud, start);
-
-		set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn,
-					canon_pgprot(pmd_pgprot))));
-
-		start	  += PMD_SIZE;
-		cpa->pfn  += PMD_SIZE >> PAGE_SHIFT;
-		cur_pages += PMD_SIZE >> PAGE_SHIFT;
-	}
-
-	/*
-	 * Map trailing 4K pages.
-	 */
-	if (start < end) {
-		pmd = pmd_offset(pud, start);
-		if (pmd_none(*pmd))
-			if (alloc_pte_page(pmd))
-				return -1;
-
-		populate_pte(cpa, start, end, num_pages - cur_pages,
-			     pmd, pgprot);
-	}
-	return num_pages;
-}
-
-static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d,
-			pgprot_t pgprot)
-{
-	pud_t *pud;
-	unsigned long end;
-	long cur_pages = 0;
-	pgprot_t pud_pgprot;
-
-	end = start + (cpa->numpages << PAGE_SHIFT);
-
-	/*
-	 * Not on a Gb page boundary? => map everything up to it with
-	 * smaller pages.
-	 */
-	if (start & (PUD_SIZE - 1)) {
-		unsigned long pre_end;
-		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
-
-		pre_end   = min_t(unsigned long, end, next_page);
-		cur_pages = (pre_end - start) >> PAGE_SHIFT;
-		cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
-
-		pud = pud_offset(p4d, start);
-
-		/*
-		 * Need a PMD page?
-		 */
-		if (pud_none(*pud))
-			if (alloc_pmd_page(pud))
-				return -1;
-
-		cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
-					 pud, pgprot);
-		if (cur_pages < 0)
-			return cur_pages;
-
-		start = pre_end;
-	}
-
-	/* We mapped them all? */
-	if (cpa->numpages == cur_pages)
-		return cur_pages;
-
-	pud = pud_offset(p4d, start);
-	pud_pgprot = pgprot_4k_2_large(pgprot);
-
-	/*
-	 * Map everything starting from the Gb boundary, possibly with 1G pages
-	 */
-	while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) {
-		set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn,
-				   canon_pgprot(pud_pgprot))));
-
-		start	  += PUD_SIZE;
-		cpa->pfn  += PUD_SIZE >> PAGE_SHIFT;
-		cur_pages += PUD_SIZE >> PAGE_SHIFT;
-		pud++;
-	}
-
-	/* Map trailing leftover */
-	if (start < end) {
-		long tmp;
-
-		pud = pud_offset(p4d, start);
-		if (pud_none(*pud))
-			if (alloc_pmd_page(pud))
-				return -1;
-
-		tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
-				   pud, pgprot);
-		if (tmp < 0)
-			return cur_pages;
-
-		cur_pages += tmp;
-	}
-	return cur_pages;
-}
-
-/*
- * Restrictions for kernel page table do not necessarily apply when mapping in
- * an alternate PGD.
- */
-static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
-{
-	pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
-	pud_t *pud = NULL;	/* shut up gcc */
-	p4d_t *p4d;
-	pgd_t *pgd_entry;
-	long ret;
-
-	pgd_entry = cpa->pgd + pgd_index(addr);
-
-	if (pgd_none(*pgd_entry)) {
-		p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
-		if (!p4d)
-			return -1;
-
-		set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE));
-	}
-
-	/*
-	 * Allocate a PUD page and hand it down for mapping.
-	 */
-	p4d = p4d_offset(pgd_entry, addr);
-	if (p4d_none(*p4d)) {
-		pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
-		if (!pud)
-			return -1;
-
-		set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
-	}
-
-	pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
-	pgprot_val(pgprot) |=  pgprot_val(cpa->mask_set);
-
-	ret = populate_pud(cpa, addr, p4d, pgprot);
-	if (ret < 0) {
-		/*
-		 * Leave the PUD page in place in case some other CPU or thread
-		 * already found it, but remove any useless entries we just
-		 * added to it.
-		 */
-		unmap_pud_range(p4d, addr,
-				addr + (cpa->numpages << PAGE_SHIFT));
-		return ret;
-	}
-
-	cpa->numpages = ret;
-	return 0;
-}
-
-static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
-			       int primary)
-{
-	if (cpa->pgd) {
-		/*
-		 * Right now, we only execute this code path when mapping
-		 * the EFI virtual memory map regions, no other users
-		 * provide a ->pgd value. This may change in the future.
-		 */
-		return populate_pgd(cpa, vaddr);
-	}
-
-	/*
-	 * Ignore all non primary paths.
-	 */
-	if (!primary) {
-		cpa->numpages = 1;
-		return 0;
-	}
-
-	/*
-	 * Ignore the NULL PTE for kernel identity mapping, as it is expected
-	 * to have holes.
-	 * Also set numpages to '1' indicating that we processed cpa req for
-	 * one virtual address page and its pfn. TBD: numpages can be set based
-	 * on the initial value and the level returned by lookup_address().
-	 */
-	if (within(vaddr, PAGE_OFFSET,
-		   PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
-		cpa->numpages = 1;
-		cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
-		return 0;
-
-	} else if (__cpa_pfn_in_highmap(cpa->pfn)) {
-		/* Faults in the highmap are OK, so do not warn: */
-		return -EFAULT;
-	} else {
-		WARN(1, KERN_WARNING "CPA: called for zero pte. "
-			"vaddr = %lx cpa->vaddr = %lx\n", vaddr,
-			*cpa->vaddr);
-
-		return -EFAULT;
-	}
-}
-
-static int __change_page_attr(struct cpa_data *cpa, int primary)
-{
-	unsigned long address;
-	int do_split, err;
-	unsigned int level;
-	pte_t *kpte, old_pte;
-
-	address = __cpa_addr(cpa, cpa->curpage);
-repeat:
-	kpte = _lookup_address_cpa(cpa, address, &level);
-	if (!kpte)
-		return __cpa_process_fault(cpa, address, primary);
-
-	old_pte = *kpte;
-	if (pte_none(old_pte))
-		return __cpa_process_fault(cpa, address, primary);
-
-	if (level == PG_LEVEL_4K) {
-		pte_t new_pte;
-		pgprot_t new_prot = pte_pgprot(old_pte);
-		unsigned long pfn = pte_pfn(old_pte);
-
-		pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
-		pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
-
-		cpa_inc_4k_install();
-		/* Hand in lpsize = 0 to enforce the protection mechanism */
-		new_prot = static_protections(new_prot, address, pfn, 1, 0,
-					      CPA_PROTECT);
-
-		new_prot = pgprot_clear_protnone_bits(new_prot);
-
-		/*
-		 * We need to keep the pfn from the existing PTE,
-		 * after all we're only going to change it's attributes
-		 * not the memory it points to
-		 */
-		new_pte = pfn_pte(pfn, new_prot);
-		cpa->pfn = pfn;
-		/*
-		 * Do we really change anything ?
-		 */
-		if (pte_val(old_pte) != pte_val(new_pte)) {
-			set_pte_atomic(kpte, new_pte);
-			cpa->flags |= CPA_FLUSHTLB;
-		}
-		cpa->numpages = 1;
-		return 0;
-	}
-
-	/*
-	 * Check, whether we can keep the large page intact
-	 * and just change the pte:
-	 */
-	do_split = should_split_large_page(kpte, address, cpa);
-	/*
-	 * When the range fits into the existing large page,
-	 * return. cp->numpages and cpa->tlbflush have been updated in
-	 * try_large_page:
-	 */
-	if (do_split <= 0)
-		return do_split;
-
-	/*
-	 * We have to split the large page:
-	 */
-	err = split_large_page(cpa, kpte, address);
-	if (!err)
-		goto repeat;
-
-	return err;
-}
-
-static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
-
-static int cpa_process_alias(struct cpa_data *cpa)
-{
-	struct cpa_data alias_cpa;
-	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
-	unsigned long vaddr;
-	int ret;
-
-	if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1))
-		return 0;
-
-	/*
-	 * No need to redo, when the primary call touched the direct
-	 * mapping already:
-	 */
-	vaddr = __cpa_addr(cpa, cpa->curpage);
-	if (!(within(vaddr, PAGE_OFFSET,
-		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
-
-		alias_cpa = *cpa;
-		alias_cpa.vaddr = &laddr;
-		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
-		alias_cpa.curpage = 0;
-
-		ret = __change_page_attr_set_clr(&alias_cpa, 0);
-		if (ret)
-			return ret;
-	}
-
-#ifdef CONFIG_X86_64
-	/*
-	 * If the primary call didn't touch the high mapping already
-	 * and the physical address is inside the kernel map, we need
-	 * to touch the high mapped kernel as well:
-	 */
-	if (!within(vaddr, (unsigned long)_text, _brk_end) &&
-	    __cpa_pfn_in_highmap(cpa->pfn)) {
-		unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
-					       __START_KERNEL_map - phys_base;
-		alias_cpa = *cpa;
-		alias_cpa.vaddr = &temp_cpa_vaddr;
-		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
-		alias_cpa.curpage = 0;
-
-		/*
-		 * The high mapping range is imprecise, so ignore the
-		 * return value.
-		 */
-		__change_page_attr_set_clr(&alias_cpa, 0);
-	}
-#endif
-
-	return 0;
-}
-
-static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
-{
-	unsigned long numpages = cpa->numpages;
-	unsigned long rempages = numpages;
-	int ret = 0;
-
-	while (rempages) {
-		/*
-		 * Store the remaining nr of pages for the large page
-		 * preservation check.
-		 */
-		cpa->numpages = rempages;
-		/* for array changes, we can't use large page */
-		if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
-			cpa->numpages = 1;
-
-		if (!debug_pagealloc_enabled())
-			spin_lock(&cpa_lock);
-		ret = __change_page_attr(cpa, checkalias);
-		if (!debug_pagealloc_enabled())
-			spin_unlock(&cpa_lock);
-		if (ret)
-			goto out;
-
-		if (checkalias) {
-			ret = cpa_process_alias(cpa);
-			if (ret)
-				goto out;
-		}
-
-		/*
-		 * Adjust the number of pages with the result of the
-		 * CPA operation. Either a large page has been
-		 * preserved or a single page update happened.
-		 */
-		BUG_ON(cpa->numpages > rempages || !cpa->numpages);
-		rempages -= cpa->numpages;
-		cpa->curpage += cpa->numpages;
-	}
-
-out:
-	/* Restore the original numpages */
-	cpa->numpages = numpages;
-	return ret;
-}
-
-static int change_page_attr_set_clr(unsigned long *addr, int numpages,
-				    pgprot_t mask_set, pgprot_t mask_clr,
-				    int force_split, int in_flag,
-				    struct page **pages)
-{
-	struct cpa_data cpa;
-	int ret, cache, checkalias;
-
-	memset(&cpa, 0, sizeof(cpa));
-
-	/*
-	 * Check, if we are requested to set a not supported
-	 * feature.  Clearing non-supported features is OK.
-	 */
-	mask_set = canon_pgprot(mask_set);
-
-	if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
-		return 0;
-
-	/* Ensure we are PAGE_SIZE aligned */
-	if (in_flag & CPA_ARRAY) {
-		int i;
-		for (i = 0; i < numpages; i++) {
-			if (addr[i] & ~PAGE_MASK) {
-				addr[i] &= PAGE_MASK;
-				WARN_ON_ONCE(1);
-			}
-		}
-	} else if (!(in_flag & CPA_PAGES_ARRAY)) {
-		/*
-		 * in_flag of CPA_PAGES_ARRAY implies it is aligned.
-		 * No need to check in that case
-		 */
-		if (*addr & ~PAGE_MASK) {
-			*addr &= PAGE_MASK;
-			/*
-			 * People should not be passing in unaligned addresses:
-			 */
-			WARN_ON_ONCE(1);
-		}
-	}
-
-	/* Must avoid aliasing mappings in the highmem code */
-	kmap_flush_unused();
-
-	vm_unmap_aliases();
-
-	cpa.vaddr = addr;
-	cpa.pages = pages;
-	cpa.numpages = numpages;
-	cpa.mask_set = mask_set;
-	cpa.mask_clr = mask_clr;
-	cpa.flags = 0;
-	cpa.curpage = 0;
-	cpa.force_split = force_split;
-
-	if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
-		cpa.flags |= in_flag;
-
-	/* No alias checking for _NX bit modifications */
-	checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
-	/* Has caller explicitly disabled alias checking? */
-	if (in_flag & CPA_NO_CHECK_ALIAS)
-		checkalias = 0;
-
-	ret = __change_page_attr_set_clr(&cpa, checkalias);
-
-	/*
-	 * Check whether we really changed something:
-	 */
-	if (!(cpa.flags & CPA_FLUSHTLB))
-		goto out;
-
-	/*
-	 * No need to flush, when we did not set any of the caching
-	 * attributes:
-	 */
-	cache = !!pgprot2cachemode(mask_set);
-
-	/*
-	 * On error; flush everything to be sure.
-	 */
-	if (ret) {
-		cpa_flush_all(cache);
-		goto out;
-	}
-
-	cpa_flush(&cpa, cache);
-out:
-	return ret;
-}
-
-static inline int change_page_attr_set(unsigned long *addr, int numpages,
-				       pgprot_t mask, int array)
-{
-	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
-		(array ? CPA_ARRAY : 0), NULL);
-}
-
-static inline int change_page_attr_clear(unsigned long *addr, int numpages,
-					 pgprot_t mask, int array)
-{
-	return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
-		(array ? CPA_ARRAY : 0), NULL);
-}
-
-static inline int cpa_set_pages_array(struct page **pages, int numpages,
-				       pgprot_t mask)
-{
-	return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
-		CPA_PAGES_ARRAY, pages);
-}
-
-static inline int cpa_clear_pages_array(struct page **pages, int numpages,
-					 pgprot_t mask)
-{
-	return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
-		CPA_PAGES_ARRAY, pages);
-}
-
-int _set_memory_uc(unsigned long addr, int numpages)
-{
-	/*
-	 * for now UC MINUS. see comments in ioremap_nocache()
-	 * If you really need strong UC use ioremap_uc(), but note
-	 * that you cannot override IO areas with set_memory_*() as
-	 * these helpers cannot work with IO memory.
-	 */
-	return change_page_attr_set(&addr, numpages,
-				    cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
-				    0);
-}
-
-int set_memory_uc(unsigned long addr, int numpages)
-{
-	int ret;
-
-	/*
-	 * for now UC MINUS. see comments in ioremap_nocache()
-	 */
-	ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
-			      _PAGE_CACHE_MODE_UC_MINUS, NULL);
-	if (ret)
-		goto out_err;
-
-	ret = _set_memory_uc(addr, numpages);
-	if (ret)
-		goto out_free;
-
-	return 0;
-
-out_free:
-	free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
-out_err:
-	return ret;
-}
-EXPORT_SYMBOL(set_memory_uc);
-
-int _set_memory_wc(unsigned long addr, int numpages)
-{
-	int ret;
-
-	ret = change_page_attr_set(&addr, numpages,
-				   cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
-				   0);
-	if (!ret) {
-		ret = change_page_attr_set_clr(&addr, numpages,
-					       cachemode2pgprot(_PAGE_CACHE_MODE_WC),
-					       __pgprot(_PAGE_CACHE_MASK),
-					       0, 0, NULL);
-	}
-	return ret;
-}
-
-int set_memory_wc(unsigned long addr, int numpages)
-{
-	int ret;
-
-	ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
-		_PAGE_CACHE_MODE_WC, NULL);
-	if (ret)
-		return ret;
-
-	ret = _set_memory_wc(addr, numpages);
-	if (ret)
-		free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
-
-	return ret;
-}
-EXPORT_SYMBOL(set_memory_wc);
-
-int _set_memory_wt(unsigned long addr, int numpages)
-{
-	return change_page_attr_set(&addr, numpages,
-				    cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0);
-}
-
-int _set_memory_wb(unsigned long addr, int numpages)
-{
-	/* WB cache mode is hard wired to all cache attribute bits being 0 */
-	return change_page_attr_clear(&addr, numpages,
-				      __pgprot(_PAGE_CACHE_MASK), 0);
-}
-
-int set_memory_wb(unsigned long addr, int numpages)
-{
-	int ret;
-
-	ret = _set_memory_wb(addr, numpages);
-	if (ret)
-		return ret;
-
-	free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
-	return 0;
-}
-EXPORT_SYMBOL(set_memory_wb);
-
-int set_memory_x(unsigned long addr, int numpages)
-{
-	if (!(__supported_pte_mask & _PAGE_NX))
-		return 0;
-
-	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
-}
-
-int set_memory_nx(unsigned long addr, int numpages)
-{
-	if (!(__supported_pte_mask & _PAGE_NX))
-		return 0;
-
-	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
-}
-
-int set_memory_ro(unsigned long addr, int numpages)
-{
-	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
-}
-
-int set_memory_rw(unsigned long addr, int numpages)
-{
-	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
-}
-
-int set_memory_np(unsigned long addr, int numpages)
-{
-	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
-}
-
-int set_memory_np_noalias(unsigned long addr, int numpages)
-{
-	int cpa_flags = CPA_NO_CHECK_ALIAS;
-
-	return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
-					__pgprot(_PAGE_PRESENT), 0,
-					cpa_flags, NULL);
-}
-
-int set_memory_4k(unsigned long addr, int numpages)
-{
-	return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
-					__pgprot(0), 1, 0, NULL);
-}
-
-int set_memory_nonglobal(unsigned long addr, int numpages)
-{
-	return change_page_attr_clear(&addr, numpages,
-				      __pgprot(_PAGE_GLOBAL), 0);
-}
-
-int set_memory_global(unsigned long addr, int numpages)
-{
-	return change_page_attr_set(&addr, numpages,
-				    __pgprot(_PAGE_GLOBAL), 0);
-}
-
-static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
-{
-	struct cpa_data cpa;
-	int ret;
-
-	/* Nothing to do if memory encryption is not active */
-	if (!mem_encrypt_active())
-		return 0;
-
-	/* Should not be working on unaligned addresses */
-	if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
-		addr &= PAGE_MASK;
-
-	memset(&cpa, 0, sizeof(cpa));
-	cpa.vaddr = &addr;
-	cpa.numpages = numpages;
-	cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
-	cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
-	cpa.pgd = init_mm.pgd;
-
-	/* Must avoid aliasing mappings in the highmem code */
-	kmap_flush_unused();
-	vm_unmap_aliases();
-
-	/*
-	 * Before changing the encryption attribute, we need to flush caches.
-	 */
-	cpa_flush(&cpa, 1);
-
-	ret = __change_page_attr_set_clr(&cpa, 1);
-
-	/*
-	 * After changing the encryption attribute, we need to flush TLBs again
-	 * in case any speculative TLB caching occurred (but no need to flush
-	 * caches again).  We could just use cpa_flush_all(), but in case TLB
-	 * flushing gets optimized in the cpa_flush() path use the same logic
-	 * as above.
-	 */
-	cpa_flush(&cpa, 0);
-
-	return ret;
-}
-
-int set_memory_encrypted(unsigned long addr, int numpages)
-{
-	return __set_memory_enc_dec(addr, numpages, true);
-}
-EXPORT_SYMBOL_GPL(set_memory_encrypted);
-
-int set_memory_decrypted(unsigned long addr, int numpages)
-{
-	return __set_memory_enc_dec(addr, numpages, false);
-}
-EXPORT_SYMBOL_GPL(set_memory_decrypted);
-
-int set_pages_uc(struct page *page, int numpages)
-{
-	unsigned long addr = (unsigned long)page_address(page);
-
-	return set_memory_uc(addr, numpages);
-}
-EXPORT_SYMBOL(set_pages_uc);
-
-static int _set_pages_array(struct page **pages, int numpages,
-		enum page_cache_mode new_type)
-{
-	unsigned long start;
-	unsigned long end;
-	enum page_cache_mode set_type;
-	int i;
-	int free_idx;
-	int ret;
-
-	for (i = 0; i < numpages; i++) {
-		if (PageHighMem(pages[i]))
-			continue;
-		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
-		end = start + PAGE_SIZE;
-		if (reserve_memtype(start, end, new_type, NULL))
-			goto err_out;
-	}
-
-	/* If WC, set to UC- first and then WC */
-	set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
-				_PAGE_CACHE_MODE_UC_MINUS : new_type;
-
-	ret = cpa_set_pages_array(pages, numpages,
-				  cachemode2pgprot(set_type));
-	if (!ret && new_type == _PAGE_CACHE_MODE_WC)
-		ret = change_page_attr_set_clr(NULL, numpages,
-					       cachemode2pgprot(
-						_PAGE_CACHE_MODE_WC),
-					       __pgprot(_PAGE_CACHE_MASK),
-					       0, CPA_PAGES_ARRAY, pages);
-	if (ret)
-		goto err_out;
-	return 0; /* Success */
-err_out:
-	free_idx = i;
-	for (i = 0; i < free_idx; i++) {
-		if (PageHighMem(pages[i]))
-			continue;
-		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
-		end = start + PAGE_SIZE;
-		free_memtype(start, end);
-	}
-	return -EINVAL;
-}
-
-int set_pages_array_uc(struct page **pages, int numpages)
-{
-	return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_UC_MINUS);
-}
-EXPORT_SYMBOL(set_pages_array_uc);
-
-int set_pages_array_wc(struct page **pages, int numpages)
-{
-	return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WC);
-}
-EXPORT_SYMBOL(set_pages_array_wc);
-
-int set_pages_array_wt(struct page **pages, int numpages)
-{
-	return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT);
-}
-EXPORT_SYMBOL_GPL(set_pages_array_wt);
-
-int set_pages_wb(struct page *page, int numpages)
-{
-	unsigned long addr = (unsigned long)page_address(page);
-
-	return set_memory_wb(addr, numpages);
-}
-EXPORT_SYMBOL(set_pages_wb);
-
-int set_pages_array_wb(struct page **pages, int numpages)
-{
-	int retval;
-	unsigned long start;
-	unsigned long end;
-	int i;
-
-	/* WB cache mode is hard wired to all cache attribute bits being 0 */
-	retval = cpa_clear_pages_array(pages, numpages,
-			__pgprot(_PAGE_CACHE_MASK));
-	if (retval)
-		return retval;
-
-	for (i = 0; i < numpages; i++) {
-		if (PageHighMem(pages[i]))
-			continue;
-		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
-		end = start + PAGE_SIZE;
-		free_memtype(start, end);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(set_pages_array_wb);
-
-int set_pages_ro(struct page *page, int numpages)
-{
-	unsigned long addr = (unsigned long)page_address(page);
-
-	return set_memory_ro(addr, numpages);
-}
-
-int set_pages_rw(struct page *page, int numpages)
-{
-	unsigned long addr = (unsigned long)page_address(page);
-
-	return set_memory_rw(addr, numpages);
-}
-
-static int __set_pages_p(struct page *page, int numpages)
-{
-	unsigned long tempaddr = (unsigned long) page_address(page);
-	struct cpa_data cpa = { .vaddr = &tempaddr,
-				.pgd = NULL,
-				.numpages = numpages,
-				.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
-				.mask_clr = __pgprot(0),
-				.flags = 0};
-
-	/*
-	 * No alias checking needed for setting present flag. otherwise,
-	 * we may need to break large pages for 64-bit kernel text
-	 * mappings (this adds to complexity if we want to do this from
-	 * atomic context especially). Let's keep it simple!
-	 */
-	return __change_page_attr_set_clr(&cpa, 0);
-}
-
-static int __set_pages_np(struct page *page, int numpages)
-{
-	unsigned long tempaddr = (unsigned long) page_address(page);
-	struct cpa_data cpa = { .vaddr = &tempaddr,
-				.pgd = NULL,
-				.numpages = numpages,
-				.mask_set = __pgprot(0),
-				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
-				.flags = 0};
-
-	/*
-	 * No alias checking needed for setting not present flag. otherwise,
-	 * we may need to break large pages for 64-bit kernel text
-	 * mappings (this adds to complexity if we want to do this from
-	 * atomic context especially). Let's keep it simple!
-	 */
-	return __change_page_attr_set_clr(&cpa, 0);
-}
-
-int set_direct_map_invalid_noflush(struct page *page)
-{
-	return __set_pages_np(page, 1);
-}
-
-int set_direct_map_default_noflush(struct page *page)
-{
-	return __set_pages_p(page, 1);
-}
-
-void __kernel_map_pages(struct page *page, int numpages, int enable)
-{
-	if (PageHighMem(page))
-		return;
-	if (!enable) {
-		debug_check_no_locks_freed(page_address(page),
-					   numpages * PAGE_SIZE);
-	}
-
-	/*
-	 * The return value is ignored as the calls cannot fail.
-	 * Large pages for identity mappings are not used at boot time
-	 * and hence no memory allocations during large page split.
-	 */
-	if (enable)
-		__set_pages_p(page, numpages);
-	else
-		__set_pages_np(page, numpages);
-
-	/*
-	 * We should perform an IPI and flush all tlbs,
-	 * but that can deadlock->flush only current cpu.
-	 * Preemption needs to be disabled around __flush_tlb_all() due to
-	 * CR3 reload in __native_flush_tlb().
-	 */
-	preempt_disable();
-	__flush_tlb_all();
-	preempt_enable();
-
-	arch_flush_lazy_mmu_mode();
-}
-
-#ifdef CONFIG_HIBERNATION
-bool kernel_page_present(struct page *page)
-{
-	unsigned int level;
-	pte_t *pte;
-
-	if (PageHighMem(page))
-		return false;
-
-	pte = lookup_address((unsigned long)page_address(page), &level);
-	return (pte_val(*pte) & _PAGE_PRESENT);
-}
-#endif /* CONFIG_HIBERNATION */
-
-int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
-				   unsigned numpages, unsigned long page_flags)
-{
-	int retval = -EINVAL;
-
-	struct cpa_data cpa = {
-		.vaddr = &address,
-		.pfn = pfn,
-		.pgd = pgd,
-		.numpages = numpages,
-		.mask_set = __pgprot(0),
-		.mask_clr = __pgprot(0),
-		.flags = 0,
-	};
-
-	WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
-
-	if (!(__supported_pte_mask & _PAGE_NX))
-		goto out;
-
-	if (!(page_flags & _PAGE_NX))
-		cpa.mask_clr = __pgprot(_PAGE_NX);
-
-	if (!(page_flags & _PAGE_RW))
-		cpa.mask_clr = __pgprot(_PAGE_RW);
-
-	if (!(page_flags & _PAGE_ENC))
-		cpa.mask_clr = pgprot_encrypted(cpa.mask_clr);
-
-	cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
-
-	retval = __change_page_attr_set_clr(&cpa, 0);
-	__flush_tlb_all();
-
-out:
-	return retval;
-}
-
-/*
- * __flush_tlb_all() flushes mappings only on current CPU and hence this
- * function shouldn't be used in an SMP environment. Presently, it's used only
- * during boot (way before smp_init()) by EFI subsystem and hence is ok.
- */
-int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
-				     unsigned long numpages)
-{
-	int retval;
-
-	/*
-	 * The typical sequence for unmapping is to find a pte through
-	 * lookup_address_in_pgd() (ideally, it should never return NULL because
-	 * the address is already mapped) and change it's protections. As pfn is
-	 * the *target* of a mapping, it's not useful while unmapping.
-	 */
-	struct cpa_data cpa = {
-		.vaddr		= &address,
-		.pfn		= 0,
-		.pgd		= pgd,
-		.numpages	= numpages,
-		.mask_set	= __pgprot(0),
-		.mask_clr	= __pgprot(_PAGE_PRESENT | _PAGE_RW),
-		.flags		= 0,
-	};
-
-	WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
-
-	retval = __change_page_attr_set_clr(&cpa, 0);
-	__flush_tlb_all();
-
-	return retval;
-}
-
-/*
- * The testcases use internal knowledge of the implementation that shouldn't
- * be exposed to the rest of the kernel. Include these directly here.
- */
-#ifdef CONFIG_CPA_DEBUG
-#include "pageattr-test.c"
-#endif
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
deleted file mode 100644
index d9fbd4f69920..000000000000
--- a/arch/x86/mm/pat.c
+++ /dev/null
@@ -1,1184 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Handle caching attributes in page tables (PAT)
- *
- * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- *          Suresh B Siddha <suresh.b.siddha@intel.com>
- *
- * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
- */
-
-#include <linux/seq_file.h>
-#include <linux/memblock.h>
-#include <linux/debugfs.h>
-#include <linux/ioport.h>
-#include <linux/kernel.h>
-#include <linux/pfn_t.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/rbtree.h>
-
-#include <asm/cacheflush.h>
-#include <asm/processor.h>
-#include <asm/tlbflush.h>
-#include <asm/x86_init.h>
-#include <asm/pgtable.h>
-#include <asm/fcntl.h>
-#include <asm/e820/api.h>
-#include <asm/mtrr.h>
-#include <asm/page.h>
-#include <asm/msr.h>
-#include <asm/pat.h>
-#include <asm/io.h>
-
-#include "pat_internal.h"
-#include "mm_internal.h"
-
-#undef pr_fmt
-#define pr_fmt(fmt) "" fmt
-
-static bool __read_mostly boot_cpu_done;
-static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
-static bool __read_mostly pat_initialized;
-static bool __read_mostly init_cm_done;
-
-void pat_disable(const char *reason)
-{
-	if (pat_disabled)
-		return;
-
-	if (boot_cpu_done) {
-		WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
-		return;
-	}
-
-	pat_disabled = true;
-	pr_info("x86/PAT: %s\n", reason);
-}
-
-static int __init nopat(char *str)
-{
-	pat_disable("PAT support disabled.");
-	return 0;
-}
-early_param("nopat", nopat);
-
-bool pat_enabled(void)
-{
-	return pat_initialized;
-}
-EXPORT_SYMBOL_GPL(pat_enabled);
-
-int pat_debug_enable;
-
-static int __init pat_debug_setup(char *str)
-{
-	pat_debug_enable = 1;
-	return 0;
-}
-__setup("debugpat", pat_debug_setup);
-
-#ifdef CONFIG_X86_PAT
-/*
- * X86 PAT uses page flags arch_1 and uncached together to keep track of
- * memory type of pages that have backing page struct.
- *
- * X86 PAT supports 4 different memory types:
- *  - _PAGE_CACHE_MODE_WB
- *  - _PAGE_CACHE_MODE_WC
- *  - _PAGE_CACHE_MODE_UC_MINUS
- *  - _PAGE_CACHE_MODE_WT
- *
- * _PAGE_CACHE_MODE_WB is the default type.
- */
-
-#define _PGMT_WB		0
-#define _PGMT_WC		(1UL << PG_arch_1)
-#define _PGMT_UC_MINUS		(1UL << PG_uncached)
-#define _PGMT_WT		(1UL << PG_uncached | 1UL << PG_arch_1)
-#define _PGMT_MASK		(1UL << PG_uncached | 1UL << PG_arch_1)
-#define _PGMT_CLEAR_MASK	(~_PGMT_MASK)
-
-static inline enum page_cache_mode get_page_memtype(struct page *pg)
-{
-	unsigned long pg_flags = pg->flags & _PGMT_MASK;
-
-	if (pg_flags == _PGMT_WB)
-		return _PAGE_CACHE_MODE_WB;
-	else if (pg_flags == _PGMT_WC)
-		return _PAGE_CACHE_MODE_WC;
-	else if (pg_flags == _PGMT_UC_MINUS)
-		return _PAGE_CACHE_MODE_UC_MINUS;
-	else
-		return _PAGE_CACHE_MODE_WT;
-}
-
-static inline void set_page_memtype(struct page *pg,
-				    enum page_cache_mode memtype)
-{
-	unsigned long memtype_flags;
-	unsigned long old_flags;
-	unsigned long new_flags;
-
-	switch (memtype) {
-	case _PAGE_CACHE_MODE_WC:
-		memtype_flags = _PGMT_WC;
-		break;
-	case _PAGE_CACHE_MODE_UC_MINUS:
-		memtype_flags = _PGMT_UC_MINUS;
-		break;
-	case _PAGE_CACHE_MODE_WT:
-		memtype_flags = _PGMT_WT;
-		break;
-	case _PAGE_CACHE_MODE_WB:
-	default:
-		memtype_flags = _PGMT_WB;
-		break;
-	}
-
-	do {
-		old_flags = pg->flags;
-		new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags;
-	} while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags);
-}
-#else
-static inline enum page_cache_mode get_page_memtype(struct page *pg)
-{
-	return -1;
-}
-static inline void set_page_memtype(struct page *pg,
-				    enum page_cache_mode memtype)
-{
-}
-#endif
-
-enum {
-	PAT_UC = 0,		/* uncached */
-	PAT_WC = 1,		/* Write combining */
-	PAT_WT = 4,		/* Write Through */
-	PAT_WP = 5,		/* Write Protected */
-	PAT_WB = 6,		/* Write Back (default) */
-	PAT_UC_MINUS = 7,	/* UC, but can be overridden by MTRR */
-};
-
-#define CM(c) (_PAGE_CACHE_MODE_ ## c)
-
-static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
-{
-	enum page_cache_mode cache;
-	char *cache_mode;
-
-	switch (pat_val) {
-	case PAT_UC:       cache = CM(UC);       cache_mode = "UC  "; break;
-	case PAT_WC:       cache = CM(WC);       cache_mode = "WC  "; break;
-	case PAT_WT:       cache = CM(WT);       cache_mode = "WT  "; break;
-	case PAT_WP:       cache = CM(WP);       cache_mode = "WP  "; break;
-	case PAT_WB:       cache = CM(WB);       cache_mode = "WB  "; break;
-	case PAT_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break;
-	default:           cache = CM(WB);       cache_mode = "WB  "; break;
-	}
-
-	memcpy(msg, cache_mode, 4);
-
-	return cache;
-}
-
-#undef CM
-
-/*
- * Update the cache mode to pgprot translation tables according to PAT
- * configuration.
- * Using lower indices is preferred, so we start with highest index.
- */
-static void __init_cache_modes(u64 pat)
-{
-	enum page_cache_mode cache;
-	char pat_msg[33];
-	int i;
-
-	pat_msg[32] = 0;
-	for (i = 7; i >= 0; i--) {
-		cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
-					   pat_msg + 4 * i);
-		update_cache_mode_entry(i, cache);
-	}
-	pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
-
-	init_cm_done = true;
-}
-
-#define PAT(x, y)	((u64)PAT_ ## y << ((x)*8))
-
-static void pat_bsp_init(u64 pat)
-{
-	u64 tmp_pat;
-
-	if (!boot_cpu_has(X86_FEATURE_PAT)) {
-		pat_disable("PAT not supported by CPU.");
-		return;
-	}
-
-	rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
-	if (!tmp_pat) {
-		pat_disable("PAT MSR is 0, disabled.");
-		return;
-	}
-
-	wrmsrl(MSR_IA32_CR_PAT, pat);
-	pat_initialized = true;
-
-	__init_cache_modes(pat);
-}
-
-static void pat_ap_init(u64 pat)
-{
-	if (!boot_cpu_has(X86_FEATURE_PAT)) {
-		/*
-		 * If this happens we are on a secondary CPU, but switched to
-		 * PAT on the boot CPU. We have no way to undo PAT.
-		 */
-		panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
-	}
-
-	wrmsrl(MSR_IA32_CR_PAT, pat);
-}
-
-void init_cache_modes(void)
-{
-	u64 pat = 0;
-
-	if (init_cm_done)
-		return;
-
-	if (boot_cpu_has(X86_FEATURE_PAT)) {
-		/*
-		 * CPU supports PAT. Set PAT table to be consistent with
-		 * PAT MSR. This case supports "nopat" boot option, and
-		 * virtual machine environments which support PAT without
-		 * MTRRs. In specific, Xen has unique setup to PAT MSR.
-		 *
-		 * If PAT MSR returns 0, it is considered invalid and emulates
-		 * as No PAT.
-		 */
-		rdmsrl(MSR_IA32_CR_PAT, pat);
-	}
-
-	if (!pat) {
-		/*
-		 * No PAT. Emulate the PAT table that corresponds to the two
-		 * cache bits, PWT (Write Through) and PCD (Cache Disable).
-		 * This setup is also the same as the BIOS default setup.
-		 *
-		 * PTE encoding:
-		 *
-		 *       PCD
-		 *       |PWT  PAT
-		 *       ||    slot
-		 *       00    0    WB : _PAGE_CACHE_MODE_WB
-		 *       01    1    WT : _PAGE_CACHE_MODE_WT
-		 *       10    2    UC-: _PAGE_CACHE_MODE_UC_MINUS
-		 *       11    3    UC : _PAGE_CACHE_MODE_UC
-		 *
-		 * NOTE: When WC or WP is used, it is redirected to UC- per
-		 * the default setup in __cachemode2pte_tbl[].
-		 */
-		pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
-		      PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
-	}
-
-	__init_cache_modes(pat);
-}
-
-/**
- * pat_init - Initialize PAT MSR and PAT table
- *
- * This function initializes PAT MSR and PAT table with an OS-defined value
- * to enable additional cache attributes, WC, WT and WP.
- *
- * This function must be called on all CPUs using the specific sequence of
- * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
- * procedure for PAT.
- */
-void pat_init(void)
-{
-	u64 pat;
-	struct cpuinfo_x86 *c = &boot_cpu_data;
-
-	if (pat_disabled)
-		return;
-
-	if ((c->x86_vendor == X86_VENDOR_INTEL) &&
-	    (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
-	     ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
-		/*
-		 * PAT support with the lower four entries. Intel Pentium 2,
-		 * 3, M, and 4 are affected by PAT errata, which makes the
-		 * upper four entries unusable. To be on the safe side, we don't
-		 * use those.
-		 *
-		 *  PTE encoding:
-		 *      PAT
-		 *      |PCD
-		 *      ||PWT  PAT
-		 *      |||    slot
-		 *      000    0    WB : _PAGE_CACHE_MODE_WB
-		 *      001    1    WC : _PAGE_CACHE_MODE_WC
-		 *      010    2    UC-: _PAGE_CACHE_MODE_UC_MINUS
-		 *      011    3    UC : _PAGE_CACHE_MODE_UC
-		 * PAT bit unused
-		 *
-		 * NOTE: When WT or WP is used, it is redirected to UC- per
-		 * the default setup in __cachemode2pte_tbl[].
-		 */
-		pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
-		      PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
-	} else {
-		/*
-		 * Full PAT support.  We put WT in slot 7 to improve
-		 * robustness in the presence of errata that might cause
-		 * the high PAT bit to be ignored.  This way, a buggy slot 7
-		 * access will hit slot 3, and slot 3 is UC, so at worst
-		 * we lose performance without causing a correctness issue.
-		 * Pentium 4 erratum N46 is an example for such an erratum,
-		 * although we try not to use PAT at all on affected CPUs.
-		 *
-		 *  PTE encoding:
-		 *      PAT
-		 *      |PCD
-		 *      ||PWT  PAT
-		 *      |||    slot
-		 *      000    0    WB : _PAGE_CACHE_MODE_WB
-		 *      001    1    WC : _PAGE_CACHE_MODE_WC
-		 *      010    2    UC-: _PAGE_CACHE_MODE_UC_MINUS
-		 *      011    3    UC : _PAGE_CACHE_MODE_UC
-		 *      100    4    WB : Reserved
-		 *      101    5    WP : _PAGE_CACHE_MODE_WP
-		 *      110    6    UC-: Reserved
-		 *      111    7    WT : _PAGE_CACHE_MODE_WT
-		 *
-		 * The reserved slots are unused, but mapped to their
-		 * corresponding types in the presence of PAT errata.
-		 */
-		pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
-		      PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
-	}
-
-	if (!boot_cpu_done) {
-		pat_bsp_init(pat);
-		boot_cpu_done = true;
-	} else {
-		pat_ap_init(pat);
-	}
-}
-
-#undef PAT
-
-static DEFINE_SPINLOCK(memtype_lock);	/* protects memtype accesses */
-
-/*
- * Does intersection of PAT memory type and MTRR memory type and returns
- * the resulting memory type as PAT understands it.
- * (Type in pat and mtrr will not have same value)
- * The intersection is based on "Effective Memory Type" tables in IA-32
- * SDM vol 3a
- */
-static unsigned long pat_x_mtrr_type(u64 start, u64 end,
-				     enum page_cache_mode req_type)
-{
-	/*
-	 * Look for MTRR hint to get the effective type in case where PAT
-	 * request is for WB.
-	 */
-	if (req_type == _PAGE_CACHE_MODE_WB) {
-		u8 mtrr_type, uniform;
-
-		mtrr_type = mtrr_type_lookup(start, end, &uniform);
-		if (mtrr_type != MTRR_TYPE_WRBACK)
-			return _PAGE_CACHE_MODE_UC_MINUS;
-
-		return _PAGE_CACHE_MODE_WB;
-	}
-
-	return req_type;
-}
-
-struct pagerange_state {
-	unsigned long		cur_pfn;
-	int			ram;
-	int			not_ram;
-};
-
-static int
-pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg)
-{
-	struct pagerange_state *state = arg;
-
-	state->not_ram	|= initial_pfn > state->cur_pfn;
-	state->ram	|= total_nr_pages > 0;
-	state->cur_pfn	 = initial_pfn + total_nr_pages;
-
-	return state->ram && state->not_ram;
-}
-
-static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
-{
-	int ret = 0;
-	unsigned long start_pfn = start >> PAGE_SHIFT;
-	unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	struct pagerange_state state = {start_pfn, 0, 0};
-
-	/*
-	 * For legacy reasons, physical address range in the legacy ISA
-	 * region is tracked as non-RAM. This will allow users of
-	 * /dev/mem to map portions of legacy ISA region, even when
-	 * some of those portions are listed(or not even listed) with
-	 * different e820 types(RAM/reserved/..)
-	 */
-	if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT)
-		start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT;
-
-	if (start_pfn < end_pfn) {
-		ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
-				&state, pagerange_is_ram_callback);
-	}
-
-	return (ret > 0) ? -1 : (state.ram ? 1 : 0);
-}
-
-/*
- * For RAM pages, we use page flags to mark the pages with appropriate type.
- * The page flags are limited to four types, WB (default), WC, WT and UC-.
- * WP request fails with -EINVAL, and UC gets redirected to UC-.  Setting
- * a new memory type is only allowed for a page mapped with the default WB
- * type.
- *
- * Here we do two passes:
- * - Find the memtype of all the pages in the range, look for any conflicts.
- * - In case of no conflicts, set the new memtype for pages in the range.
- */
-static int reserve_ram_pages_type(u64 start, u64 end,
-				  enum page_cache_mode req_type,
-				  enum page_cache_mode *new_type)
-{
-	struct page *page;
-	u64 pfn;
-
-	if (req_type == _PAGE_CACHE_MODE_WP) {
-		if (new_type)
-			*new_type = _PAGE_CACHE_MODE_UC_MINUS;
-		return -EINVAL;
-	}
-
-	if (req_type == _PAGE_CACHE_MODE_UC) {
-		/* We do not support strong UC */
-		WARN_ON_ONCE(1);
-		req_type = _PAGE_CACHE_MODE_UC_MINUS;
-	}
-
-	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
-		enum page_cache_mode type;
-
-		page = pfn_to_page(pfn);
-		type = get_page_memtype(page);
-		if (type != _PAGE_CACHE_MODE_WB) {
-			pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
-				start, end - 1, type, req_type);
-			if (new_type)
-				*new_type = type;
-
-			return -EBUSY;
-		}
-	}
-
-	if (new_type)
-		*new_type = req_type;
-
-	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
-		page = pfn_to_page(pfn);
-		set_page_memtype(page, req_type);
-	}
-	return 0;
-}
-
-static int free_ram_pages_type(u64 start, u64 end)
-{
-	struct page *page;
-	u64 pfn;
-
-	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
-		page = pfn_to_page(pfn);
-		set_page_memtype(page, _PAGE_CACHE_MODE_WB);
-	}
-	return 0;
-}
-
-static u64 sanitize_phys(u64 address)
-{
-	/*
-	 * When changing the memtype for pages containing poison allow
-	 * for a "decoy" virtual address (bit 63 clear) passed to
-	 * set_memory_X(). __pa() on a "decoy" address results in a
-	 * physical address with bit 63 set.
-	 *
-	 * Decoy addresses are not present for 32-bit builds, see
-	 * set_mce_nospec().
-	 */
-	if (IS_ENABLED(CONFIG_X86_64))
-		return address & __PHYSICAL_MASK;
-	return address;
-}
-
-/*
- * req_type typically has one of the:
- * - _PAGE_CACHE_MODE_WB
- * - _PAGE_CACHE_MODE_WC
- * - _PAGE_CACHE_MODE_UC_MINUS
- * - _PAGE_CACHE_MODE_UC
- * - _PAGE_CACHE_MODE_WT
- *
- * If new_type is NULL, function will return an error if it cannot reserve the
- * region with req_type. If new_type is non-NULL, function will return
- * available type in new_type in case of no error. In case of any error
- * it will return a negative return value.
- */
-int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
-		    enum page_cache_mode *new_type)
-{
-	struct memtype *new;
-	enum page_cache_mode actual_type;
-	int is_range_ram;
-	int err = 0;
-
-	start = sanitize_phys(start);
-	end = sanitize_phys(end);
-	if (start >= end) {
-		WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__,
-				start, end - 1, cattr_name(req_type));
-		return -EINVAL;
-	}
-
-	if (!pat_enabled()) {
-		/* This is identical to page table setting without PAT */
-		if (new_type)
-			*new_type = req_type;
-		return 0;
-	}
-
-	/* Low ISA region is always mapped WB in page table. No need to track */
-	if (x86_platform.is_untracked_pat_range(start, end)) {
-		if (new_type)
-			*new_type = _PAGE_CACHE_MODE_WB;
-		return 0;
-	}
-
-	/*
-	 * Call mtrr_lookup to get the type hint. This is an
-	 * optimization for /dev/mem mmap'ers into WB memory (BIOS
-	 * tools and ACPI tools). Use WB request for WB memory and use
-	 * UC_MINUS otherwise.
-	 */
-	actual_type = pat_x_mtrr_type(start, end, req_type);
-
-	if (new_type)
-		*new_type = actual_type;
-
-	is_range_ram = pat_pagerange_is_ram(start, end);
-	if (is_range_ram == 1) {
-
-		err = reserve_ram_pages_type(start, end, req_type, new_type);
-
-		return err;
-	} else if (is_range_ram < 0) {
-		return -EINVAL;
-	}
-
-	new  = kzalloc(sizeof(struct memtype), GFP_KERNEL);
-	if (!new)
-		return -ENOMEM;
-
-	new->start	= start;
-	new->end	= end;
-	new->type	= actual_type;
-
-	spin_lock(&memtype_lock);
-
-	err = rbt_memtype_check_insert(new, new_type);
-	if (err) {
-		pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
-			start, end - 1,
-			cattr_name(new->type), cattr_name(req_type));
-		kfree(new);
-		spin_unlock(&memtype_lock);
-
-		return err;
-	}
-
-	spin_unlock(&memtype_lock);
-
-	dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
-		start, end - 1, cattr_name(new->type), cattr_name(req_type),
-		new_type ? cattr_name(*new_type) : "-");
-
-	return err;
-}
-
-int free_memtype(u64 start, u64 end)
-{
-	int err = -EINVAL;
-	int is_range_ram;
-	struct memtype *entry;
-
-	if (!pat_enabled())
-		return 0;
-
-	start = sanitize_phys(start);
-	end = sanitize_phys(end);
-
-	/* Low ISA region is always mapped WB. No need to track */
-	if (x86_platform.is_untracked_pat_range(start, end))
-		return 0;
-
-	is_range_ram = pat_pagerange_is_ram(start, end);
-	if (is_range_ram == 1) {
-
-		err = free_ram_pages_type(start, end);
-
-		return err;
-	} else if (is_range_ram < 0) {
-		return -EINVAL;
-	}
-
-	spin_lock(&memtype_lock);
-	entry = rbt_memtype_erase(start, end);
-	spin_unlock(&memtype_lock);
-
-	if (IS_ERR(entry)) {
-		pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
-			current->comm, current->pid, start, end - 1);
-		return -EINVAL;
-	}
-
-	kfree(entry);
-
-	dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
-
-	return 0;
-}
-
-
-/**
- * lookup_memtype - Looksup the memory type for a physical address
- * @paddr: physical address of which memory type needs to be looked up
- *
- * Only to be called when PAT is enabled
- *
- * Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS
- * or _PAGE_CACHE_MODE_WT.
- */
-static enum page_cache_mode lookup_memtype(u64 paddr)
-{
-	enum page_cache_mode rettype = _PAGE_CACHE_MODE_WB;
-	struct memtype *entry;
-
-	if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
-		return rettype;
-
-	if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
-		struct page *page;
-
-		page = pfn_to_page(paddr >> PAGE_SHIFT);
-		return get_page_memtype(page);
-	}
-
-	spin_lock(&memtype_lock);
-
-	entry = rbt_memtype_lookup(paddr);
-	if (entry != NULL)
-		rettype = entry->type;
-	else
-		rettype = _PAGE_CACHE_MODE_UC_MINUS;
-
-	spin_unlock(&memtype_lock);
-	return rettype;
-}
-
-/**
- * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type
- * of @pfn cannot be overridden by UC MTRR memory type.
- *
- * Only to be called when PAT is enabled.
- *
- * Returns true, if the PAT memory type of @pfn is UC, UC-, or WC.
- * Returns false in other cases.
- */
-bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn)
-{
-	enum page_cache_mode cm = lookup_memtype(PFN_PHYS(pfn));
-
-	return cm == _PAGE_CACHE_MODE_UC ||
-	       cm == _PAGE_CACHE_MODE_UC_MINUS ||
-	       cm == _PAGE_CACHE_MODE_WC;
-}
-EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
-
-/**
- * io_reserve_memtype - Request a memory type mapping for a region of memory
- * @start: start (physical address) of the region
- * @end: end (physical address) of the region
- * @type: A pointer to memtype, with requested type. On success, requested
- * or any other compatible type that was available for the region is returned
- *
- * On success, returns 0
- * On failure, returns non-zero
- */
-int io_reserve_memtype(resource_size_t start, resource_size_t end,
-			enum page_cache_mode *type)
-{
-	resource_size_t size = end - start;
-	enum page_cache_mode req_type = *type;
-	enum page_cache_mode new_type;
-	int ret;
-
-	WARN_ON_ONCE(iomem_map_sanity_check(start, size));
-
-	ret = reserve_memtype(start, end, req_type, &new_type);
-	if (ret)
-		goto out_err;
-
-	if (!is_new_memtype_allowed(start, size, req_type, new_type))
-		goto out_free;
-
-	if (kernel_map_sync_memtype(start, size, new_type) < 0)
-		goto out_free;
-
-	*type = new_type;
-	return 0;
-
-out_free:
-	free_memtype(start, end);
-	ret = -EBUSY;
-out_err:
-	return ret;
-}
-
-/**
- * io_free_memtype - Release a memory type mapping for a region of memory
- * @start: start (physical address) of the region
- * @end: end (physical address) of the region
- */
-void io_free_memtype(resource_size_t start, resource_size_t end)
-{
-	free_memtype(start, end);
-}
-
-int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
-{
-	enum page_cache_mode type = _PAGE_CACHE_MODE_WC;
-
-	return io_reserve_memtype(start, start + size, &type);
-}
-EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
-
-void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
-{
-	io_free_memtype(start, start + size);
-}
-EXPORT_SYMBOL(arch_io_free_memtype_wc);
-
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-				unsigned long size, pgprot_t vma_prot)
-{
-	if (!phys_mem_access_encrypted(pfn << PAGE_SHIFT, size))
-		vma_prot = pgprot_decrypted(vma_prot);
-
-	return vma_prot;
-}
-
-#ifdef CONFIG_STRICT_DEVMEM
-/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
-{
-	return 1;
-}
-#else
-/* This check is needed to avoid cache aliasing when PAT is enabled */
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
-{
-	u64 from = ((u64)pfn) << PAGE_SHIFT;
-	u64 to = from + size;
-	u64 cursor = from;
-
-	if (!pat_enabled())
-		return 1;
-
-	while (cursor < to) {
-		if (!devmem_is_allowed(pfn))
-			return 0;
-		cursor += PAGE_SIZE;
-		pfn++;
-	}
-	return 1;
-}
-#endif /* CONFIG_STRICT_DEVMEM */
-
-int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
-				unsigned long size, pgprot_t *vma_prot)
-{
-	enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB;
-
-	if (!range_is_allowed(pfn, size))
-		return 0;
-
-	if (file->f_flags & O_DSYNC)
-		pcm = _PAGE_CACHE_MODE_UC_MINUS;
-
-	*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
-			     cachemode2protval(pcm));
-	return 1;
-}
-
-/*
- * Change the memory type for the physial address range in kernel identity
- * mapping space if that range is a part of identity map.
- */
-int kernel_map_sync_memtype(u64 base, unsigned long size,
-			    enum page_cache_mode pcm)
-{
-	unsigned long id_sz;
-
-	if (base > __pa(high_memory-1))
-		return 0;
-
-	/*
-	 * some areas in the middle of the kernel identity range
-	 * are not mapped, like the PCI space.
-	 */
-	if (!page_is_ram(base >> PAGE_SHIFT))
-		return 0;
-
-	id_sz = (__pa(high_memory-1) <= base + size) ?
-				__pa(high_memory) - base :
-				size;
-
-	if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
-		pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
-			current->comm, current->pid,
-			cattr_name(pcm),
-			base, (unsigned long long)(base + size-1));
-		return -EINVAL;
-	}
-	return 0;
-}
-
-/*
- * Internal interface to reserve a range of physical memory with prot.
- * Reserved non RAM regions only and after successful reserve_memtype,
- * this func also keeps identity mapping (if any) in sync with this new prot.
- */
-static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
-				int strict_prot)
-{
-	int is_ram = 0;
-	int ret;
-	enum page_cache_mode want_pcm = pgprot2cachemode(*vma_prot);
-	enum page_cache_mode pcm = want_pcm;
-
-	is_ram = pat_pagerange_is_ram(paddr, paddr + size);
-
-	/*
-	 * reserve_pfn_range() for RAM pages. We do not refcount to keep
-	 * track of number of mappings of RAM pages. We can assert that
-	 * the type requested matches the type of first page in the range.
-	 */
-	if (is_ram) {
-		if (!pat_enabled())
-			return 0;
-
-		pcm = lookup_memtype(paddr);
-		if (want_pcm != pcm) {
-			pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
-				current->comm, current->pid,
-				cattr_name(want_pcm),
-				(unsigned long long)paddr,
-				(unsigned long long)(paddr + size - 1),
-				cattr_name(pcm));
-			*vma_prot = __pgprot((pgprot_val(*vma_prot) &
-					     (~_PAGE_CACHE_MASK)) |
-					     cachemode2protval(pcm));
-		}
-		return 0;
-	}
-
-	ret = reserve_memtype(paddr, paddr + size, want_pcm, &pcm);
-	if (ret)
-		return ret;
-
-	if (pcm != want_pcm) {
-		if (strict_prot ||
-		    !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
-			free_memtype(paddr, paddr + size);
-			pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
-			       current->comm, current->pid,
-			       cattr_name(want_pcm),
-			       (unsigned long long)paddr,
-			       (unsigned long long)(paddr + size - 1),
-			       cattr_name(pcm));
-			return -EINVAL;
-		}
-		/*
-		 * We allow returning different type than the one requested in
-		 * non strict case.
-		 */
-		*vma_prot = __pgprot((pgprot_val(*vma_prot) &
-				      (~_PAGE_CACHE_MASK)) |
-				     cachemode2protval(pcm));
-	}
-
-	if (kernel_map_sync_memtype(paddr, size, pcm) < 0) {
-		free_memtype(paddr, paddr + size);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-/*
- * Internal interface to free a range of physical memory.
- * Frees non RAM regions only.
- */
-static void free_pfn_range(u64 paddr, unsigned long size)
-{
-	int is_ram;
-
-	is_ram = pat_pagerange_is_ram(paddr, paddr + size);
-	if (is_ram == 0)
-		free_memtype(paddr, paddr + size);
-}
-
-/*
- * track_pfn_copy is called when vma that is covering the pfnmap gets
- * copied through copy_page_range().
- *
- * If the vma has a linear pfn mapping for the entire range, we get the prot
- * from pte and reserve the entire vma range with single reserve_pfn_range call.
- */
-int track_pfn_copy(struct vm_area_struct *vma)
-{
-	resource_size_t paddr;
-	unsigned long prot;
-	unsigned long vma_size = vma->vm_end - vma->vm_start;
-	pgprot_t pgprot;
-
-	if (vma->vm_flags & VM_PAT) {
-		/*
-		 * reserve the whole chunk covered by vma. We need the
-		 * starting address and protection from pte.
-		 */
-		if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
-			WARN_ON_ONCE(1);
-			return -EINVAL;
-		}
-		pgprot = __pgprot(prot);
-		return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
-	}
-
-	return 0;
-}
-
-/*
- * prot is passed in as a parameter for the new mapping. If the vma has
- * a linear pfn mapping for the entire range, or no vma is provided,
- * reserve the entire pfn + size range with single reserve_pfn_range
- * call.
- */
-int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
-		    unsigned long pfn, unsigned long addr, unsigned long size)
-{
-	resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
-	enum page_cache_mode pcm;
-
-	/* reserve the whole chunk starting from paddr */
-	if (!vma || (addr == vma->vm_start
-				&& size == (vma->vm_end - vma->vm_start))) {
-		int ret;
-
-		ret = reserve_pfn_range(paddr, size, prot, 0);
-		if (ret == 0 && vma)
-			vma->vm_flags |= VM_PAT;
-		return ret;
-	}
-
-	if (!pat_enabled())
-		return 0;
-
-	/*
-	 * For anything smaller than the vma size we set prot based on the
-	 * lookup.
-	 */
-	pcm = lookup_memtype(paddr);
-
-	/* Check memtype for the remaining pages */
-	while (size > PAGE_SIZE) {
-		size -= PAGE_SIZE;
-		paddr += PAGE_SIZE;
-		if (pcm != lookup_memtype(paddr))
-			return -EINVAL;
-	}
-
-	*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
-			 cachemode2protval(pcm));
-
-	return 0;
-}
-
-void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn)
-{
-	enum page_cache_mode pcm;
-
-	if (!pat_enabled())
-		return;
-
-	/* Set prot based on lookup */
-	pcm = lookup_memtype(pfn_t_to_phys(pfn));
-	*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
-			 cachemode2protval(pcm));
-}
-
-/*
- * untrack_pfn is called while unmapping a pfnmap for a region.
- * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case pfn, size are zero).
- */
-void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
-		 unsigned long size)
-{
-	resource_size_t paddr;
-	unsigned long prot;
-
-	if (vma && !(vma->vm_flags & VM_PAT))
-		return;
-
-	/* free the chunk starting from pfn or the whole chunk */
-	paddr = (resource_size_t)pfn << PAGE_SHIFT;
-	if (!paddr && !size) {
-		if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
-			WARN_ON_ONCE(1);
-			return;
-		}
-
-		size = vma->vm_end - vma->vm_start;
-	}
-	free_pfn_range(paddr, size);
-	if (vma)
-		vma->vm_flags &= ~VM_PAT;
-}
-
-/*
- * untrack_pfn_moved is called, while mremapping a pfnmap for a new region,
- * with the old vma after its pfnmap page table has been removed.  The new
- * vma has a new pfnmap to the same pfn & cache type with VM_PAT set.
- */
-void untrack_pfn_moved(struct vm_area_struct *vma)
-{
-	vma->vm_flags &= ~VM_PAT;
-}
-
-pgprot_t pgprot_writecombine(pgprot_t prot)
-{
-	return __pgprot(pgprot_val(prot) |
-				cachemode2protval(_PAGE_CACHE_MODE_WC));
-}
-EXPORT_SYMBOL_GPL(pgprot_writecombine);
-
-pgprot_t pgprot_writethrough(pgprot_t prot)
-{
-	return __pgprot(pgprot_val(prot) |
-				cachemode2protval(_PAGE_CACHE_MODE_WT));
-}
-EXPORT_SYMBOL_GPL(pgprot_writethrough);
-
-#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
-
-static struct memtype *memtype_get_idx(loff_t pos)
-{
-	struct memtype *print_entry;
-	int ret;
-
-	print_entry  = kzalloc(sizeof(struct memtype), GFP_KERNEL);
-	if (!print_entry)
-		return NULL;
-
-	spin_lock(&memtype_lock);
-	ret = rbt_memtype_copy_nth_element(print_entry, pos);
-	spin_unlock(&memtype_lock);
-
-	if (!ret) {
-		return print_entry;
-	} else {
-		kfree(print_entry);
-		return NULL;
-	}
-}
-
-static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	if (*pos == 0) {
-		++*pos;
-		seq_puts(seq, "PAT memtype list:\n");
-	}
-
-	return memtype_get_idx(*pos);
-}
-
-static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	return memtype_get_idx(*pos);
-}
-
-static void memtype_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int memtype_seq_show(struct seq_file *seq, void *v)
-{
-	struct memtype *print_entry = (struct memtype *)v;
-
-	seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
-			print_entry->start, print_entry->end);
-	kfree(print_entry);
-
-	return 0;
-}
-
-static const struct seq_operations memtype_seq_ops = {
-	.start = memtype_seq_start,
-	.next  = memtype_seq_next,
-	.stop  = memtype_seq_stop,
-	.show  = memtype_seq_show,
-};
-
-static int memtype_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &memtype_seq_ops);
-}
-
-static const struct file_operations memtype_fops = {
-	.open    = memtype_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
-static int __init pat_memtype_list_init(void)
-{
-	if (pat_enabled()) {
-		debugfs_create_file("pat_memtype_list", S_IRUSR,
-				    arch_debugfs_dir, NULL, &memtype_fops);
-	}
-	return 0;
-}
-
-late_initcall(pat_memtype_list_init);
-
-#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */
diff --git a/arch/x86/mm/pat/Makefile b/arch/x86/mm/pat/Makefile
new file mode 100644
index 000000000000..ea464c995161
--- /dev/null
+++ b/arch/x86/mm/pat/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y				:= set_memory.o memtype.o
+
+obj-$(CONFIG_X86_PAT)		+= memtype_interval.o
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pat/cpa-test.c
index facce271e8b9..facce271e8b9 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pat/cpa-test.c
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
new file mode 100644
index 000000000000..394be8611748
--- /dev/null
+++ b/arch/x86/mm/pat/memtype.c
@@ -0,0 +1,1219 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Page Attribute Table (PAT) support: handle memory caching attributes in page tables.
+ *
+ * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *          Suresh B Siddha <suresh.b.siddha@intel.com>
+ *
+ * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
+ *
+ * Basic principles:
+ *
+ * PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and
+ * the kernel to set one of a handful of 'caching type' attributes for physical
+ * memory ranges: uncached, write-combining, write-through, write-protected,
+ * and the most commonly used and default attribute: write-back caching.
+ *
+ * PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is
+ * a hardware interface to enumerate a limited number of physical memory ranges
+ * and set their caching attributes explicitly, programmed into the CPU via MSRs.
+ * Even modern CPUs have MTRRs enabled - but these are typically not touched
+ * by the kernel or by user-space (such as the X server), we rely on PAT for any
+ * additional cache attribute logic.
+ *
+ * PAT doesn't work via explicit memory ranges, but uses page table entries to add
+ * cache attribute information to the mapped memory range: there's 3 bits used,
+ * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the
+ * CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT).
+ *
+ * ( There's a metric ton of finer details, such as compatibility with CPU quirks
+ *   that only support 4 types of PAT entries, and interaction with MTRRs, see
+ *   below for details. )
+ */
+
+#include <linux/seq_file.h>
+#include <linux/memblock.h>
+#include <linux/debugfs.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/pfn_t.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/rbtree.h>
+
+#include <asm/cacheflush.h>
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/x86_init.h>
+#include <asm/pgtable.h>
+#include <asm/fcntl.h>
+#include <asm/e820/api.h>
+#include <asm/mtrr.h>
+#include <asm/page.h>
+#include <asm/msr.h>
+#include <asm/memtype.h>
+#include <asm/io.h>
+
+#include "memtype.h"
+#include "../mm_internal.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "" fmt
+
+static bool __read_mostly pat_bp_initialized;
+static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
+static bool __read_mostly pat_bp_enabled;
+static bool __read_mostly pat_cm_initialized;
+
+/*
+ * PAT support is enabled by default, but can be disabled for
+ * various user-requested or hardware-forced reasons:
+ */
+void pat_disable(const char *msg_reason)
+{
+	if (pat_disabled)
+		return;
+
+	if (pat_bp_initialized) {
+		WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
+		return;
+	}
+
+	pat_disabled = true;
+	pr_info("x86/PAT: %s\n", msg_reason);
+}
+
+static int __init nopat(char *str)
+{
+	pat_disable("PAT support disabled via boot option.");
+	return 0;
+}
+early_param("nopat", nopat);
+
+bool pat_enabled(void)
+{
+	return pat_bp_enabled;
+}
+EXPORT_SYMBOL_GPL(pat_enabled);
+
+int pat_debug_enable;
+
+static int __init pat_debug_setup(char *str)
+{
+	pat_debug_enable = 1;
+	return 0;
+}
+__setup("debugpat", pat_debug_setup);
+
+#ifdef CONFIG_X86_PAT
+/*
+ * X86 PAT uses page flags arch_1 and uncached together to keep track of
+ * memory type of pages that have backing page struct.
+ *
+ * X86 PAT supports 4 different memory types:
+ *  - _PAGE_CACHE_MODE_WB
+ *  - _PAGE_CACHE_MODE_WC
+ *  - _PAGE_CACHE_MODE_UC_MINUS
+ *  - _PAGE_CACHE_MODE_WT
+ *
+ * _PAGE_CACHE_MODE_WB is the default type.
+ */
+
+#define _PGMT_WB		0
+#define _PGMT_WC		(1UL << PG_arch_1)
+#define _PGMT_UC_MINUS		(1UL << PG_uncached)
+#define _PGMT_WT		(1UL << PG_uncached | 1UL << PG_arch_1)
+#define _PGMT_MASK		(1UL << PG_uncached | 1UL << PG_arch_1)
+#define _PGMT_CLEAR_MASK	(~_PGMT_MASK)
+
+static inline enum page_cache_mode get_page_memtype(struct page *pg)
+{
+	unsigned long pg_flags = pg->flags & _PGMT_MASK;
+
+	if (pg_flags == _PGMT_WB)
+		return _PAGE_CACHE_MODE_WB;
+	else if (pg_flags == _PGMT_WC)
+		return _PAGE_CACHE_MODE_WC;
+	else if (pg_flags == _PGMT_UC_MINUS)
+		return _PAGE_CACHE_MODE_UC_MINUS;
+	else
+		return _PAGE_CACHE_MODE_WT;
+}
+
+static inline void set_page_memtype(struct page *pg,
+				    enum page_cache_mode memtype)
+{
+	unsigned long memtype_flags;
+	unsigned long old_flags;
+	unsigned long new_flags;
+
+	switch (memtype) {
+	case _PAGE_CACHE_MODE_WC:
+		memtype_flags = _PGMT_WC;
+		break;
+	case _PAGE_CACHE_MODE_UC_MINUS:
+		memtype_flags = _PGMT_UC_MINUS;
+		break;
+	case _PAGE_CACHE_MODE_WT:
+		memtype_flags = _PGMT_WT;
+		break;
+	case _PAGE_CACHE_MODE_WB:
+	default:
+		memtype_flags = _PGMT_WB;
+		break;
+	}
+
+	do {
+		old_flags = pg->flags;
+		new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags;
+	} while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags);
+}
+#else
+static inline enum page_cache_mode get_page_memtype(struct page *pg)
+{
+	return -1;
+}
+static inline void set_page_memtype(struct page *pg,
+				    enum page_cache_mode memtype)
+{
+}
+#endif
+
+enum {
+	PAT_UC = 0,		/* uncached */
+	PAT_WC = 1,		/* Write combining */
+	PAT_WT = 4,		/* Write Through */
+	PAT_WP = 5,		/* Write Protected */
+	PAT_WB = 6,		/* Write Back (default) */
+	PAT_UC_MINUS = 7,	/* UC, but can be overridden by MTRR */
+};
+
+#define CM(c) (_PAGE_CACHE_MODE_ ## c)
+
+static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
+{
+	enum page_cache_mode cache;
+	char *cache_mode;
+
+	switch (pat_val) {
+	case PAT_UC:       cache = CM(UC);       cache_mode = "UC  "; break;
+	case PAT_WC:       cache = CM(WC);       cache_mode = "WC  "; break;
+	case PAT_WT:       cache = CM(WT);       cache_mode = "WT  "; break;
+	case PAT_WP:       cache = CM(WP);       cache_mode = "WP  "; break;
+	case PAT_WB:       cache = CM(WB);       cache_mode = "WB  "; break;
+	case PAT_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break;
+	default:           cache = CM(WB);       cache_mode = "WB  "; break;
+	}
+
+	memcpy(msg, cache_mode, 4);
+
+	return cache;
+}
+
+#undef CM
+
+/*
+ * Update the cache mode to pgprot translation tables according to PAT
+ * configuration.
+ * Using lower indices is preferred, so we start with highest index.
+ */
+static void __init_cache_modes(u64 pat)
+{
+	enum page_cache_mode cache;
+	char pat_msg[33];
+	int i;
+
+	WARN_ON_ONCE(pat_cm_initialized);
+
+	pat_msg[32] = 0;
+	for (i = 7; i >= 0; i--) {
+		cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
+					   pat_msg + 4 * i);
+		update_cache_mode_entry(i, cache);
+	}
+	pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
+
+	pat_cm_initialized = true;
+}
+
+#define PAT(x, y)	((u64)PAT_ ## y << ((x)*8))
+
+static void pat_bp_init(u64 pat)
+{
+	u64 tmp_pat;
+
+	if (!boot_cpu_has(X86_FEATURE_PAT)) {
+		pat_disable("PAT not supported by the CPU.");
+		return;
+	}
+
+	rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
+	if (!tmp_pat) {
+		pat_disable("PAT support disabled by the firmware.");
+		return;
+	}
+
+	wrmsrl(MSR_IA32_CR_PAT, pat);
+	pat_bp_enabled = true;
+
+	__init_cache_modes(pat);
+}
+
+static void pat_ap_init(u64 pat)
+{
+	if (!boot_cpu_has(X86_FEATURE_PAT)) {
+		/*
+		 * If this happens we are on a secondary CPU, but switched to
+		 * PAT on the boot CPU. We have no way to undo PAT.
+		 */
+		panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
+	}
+
+	wrmsrl(MSR_IA32_CR_PAT, pat);
+}
+
+void init_cache_modes(void)
+{
+	u64 pat = 0;
+
+	if (pat_cm_initialized)
+		return;
+
+	if (boot_cpu_has(X86_FEATURE_PAT)) {
+		/*
+		 * CPU supports PAT. Set PAT table to be consistent with
+		 * PAT MSR. This case supports "nopat" boot option, and
+		 * virtual machine environments which support PAT without
+		 * MTRRs. In specific, Xen has unique setup to PAT MSR.
+		 *
+		 * If PAT MSR returns 0, it is considered invalid and emulates
+		 * as No PAT.
+		 */
+		rdmsrl(MSR_IA32_CR_PAT, pat);
+	}
+
+	if (!pat) {
+		/*
+		 * No PAT. Emulate the PAT table that corresponds to the two
+		 * cache bits, PWT (Write Through) and PCD (Cache Disable).
+		 * This setup is also the same as the BIOS default setup.
+		 *
+		 * PTE encoding:
+		 *
+		 *       PCD
+		 *       |PWT  PAT
+		 *       ||    slot
+		 *       00    0    WB : _PAGE_CACHE_MODE_WB
+		 *       01    1    WT : _PAGE_CACHE_MODE_WT
+		 *       10    2    UC-: _PAGE_CACHE_MODE_UC_MINUS
+		 *       11    3    UC : _PAGE_CACHE_MODE_UC
+		 *
+		 * NOTE: When WC or WP is used, it is redirected to UC- per
+		 * the default setup in __cachemode2pte_tbl[].
+		 */
+		pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
+		      PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+	}
+
+	__init_cache_modes(pat);
+}
+
+/**
+ * pat_init - Initialize the PAT MSR and PAT table on the current CPU
+ *
+ * This function initializes PAT MSR and PAT table with an OS-defined value
+ * to enable additional cache attributes, WC, WT and WP.
+ *
+ * This function must be called on all CPUs using the specific sequence of
+ * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
+ * procedure for PAT.
+ */
+void pat_init(void)
+{
+	u64 pat;
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+#ifndef CONFIG_X86_PAT
+	pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n");
+#endif
+
+	if (pat_disabled)
+		return;
+
+	if ((c->x86_vendor == X86_VENDOR_INTEL) &&
+	    (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
+	     ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+		/*
+		 * PAT support with the lower four entries. Intel Pentium 2,
+		 * 3, M, and 4 are affected by PAT errata, which makes the
+		 * upper four entries unusable. To be on the safe side, we don't
+		 * use those.
+		 *
+		 *  PTE encoding:
+		 *      PAT
+		 *      |PCD
+		 *      ||PWT  PAT
+		 *      |||    slot
+		 *      000    0    WB : _PAGE_CACHE_MODE_WB
+		 *      001    1    WC : _PAGE_CACHE_MODE_WC
+		 *      010    2    UC-: _PAGE_CACHE_MODE_UC_MINUS
+		 *      011    3    UC : _PAGE_CACHE_MODE_UC
+		 * PAT bit unused
+		 *
+		 * NOTE: When WT or WP is used, it is redirected to UC- per
+		 * the default setup in __cachemode2pte_tbl[].
+		 */
+		pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
+		      PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
+	} else {
+		/*
+		 * Full PAT support.  We put WT in slot 7 to improve
+		 * robustness in the presence of errata that might cause
+		 * the high PAT bit to be ignored.  This way, a buggy slot 7
+		 * access will hit slot 3, and slot 3 is UC, so at worst
+		 * we lose performance without causing a correctness issue.
+		 * Pentium 4 erratum N46 is an example for such an erratum,
+		 * although we try not to use PAT at all on affected CPUs.
+		 *
+		 *  PTE encoding:
+		 *      PAT
+		 *      |PCD
+		 *      ||PWT  PAT
+		 *      |||    slot
+		 *      000    0    WB : _PAGE_CACHE_MODE_WB
+		 *      001    1    WC : _PAGE_CACHE_MODE_WC
+		 *      010    2    UC-: _PAGE_CACHE_MODE_UC_MINUS
+		 *      011    3    UC : _PAGE_CACHE_MODE_UC
+		 *      100    4    WB : Reserved
+		 *      101    5    WP : _PAGE_CACHE_MODE_WP
+		 *      110    6    UC-: Reserved
+		 *      111    7    WT : _PAGE_CACHE_MODE_WT
+		 *
+		 * The reserved slots are unused, but mapped to their
+		 * corresponding types in the presence of PAT errata.
+		 */
+		pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
+		      PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
+	}
+
+	if (!pat_bp_initialized) {
+		pat_bp_init(pat);
+		pat_bp_initialized = true;
+	} else {
+		pat_ap_init(pat);
+	}
+}
+
+#undef PAT
+
+static DEFINE_SPINLOCK(memtype_lock);	/* protects memtype accesses */
+
+/*
+ * Does intersection of PAT memory type and MTRR memory type and returns
+ * the resulting memory type as PAT understands it.
+ * (Type in pat and mtrr will not have same value)
+ * The intersection is based on "Effective Memory Type" tables in IA-32
+ * SDM vol 3a
+ */
+static unsigned long pat_x_mtrr_type(u64 start, u64 end,
+				     enum page_cache_mode req_type)
+{
+	/*
+	 * Look for MTRR hint to get the effective type in case where PAT
+	 * request is for WB.
+	 */
+	if (req_type == _PAGE_CACHE_MODE_WB) {
+		u8 mtrr_type, uniform;
+
+		mtrr_type = mtrr_type_lookup(start, end, &uniform);
+		if (mtrr_type != MTRR_TYPE_WRBACK)
+			return _PAGE_CACHE_MODE_UC_MINUS;
+
+		return _PAGE_CACHE_MODE_WB;
+	}
+
+	return req_type;
+}
+
+struct pagerange_state {
+	unsigned long		cur_pfn;
+	int			ram;
+	int			not_ram;
+};
+
+static int
+pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg)
+{
+	struct pagerange_state *state = arg;
+
+	state->not_ram	|= initial_pfn > state->cur_pfn;
+	state->ram	|= total_nr_pages > 0;
+	state->cur_pfn	 = initial_pfn + total_nr_pages;
+
+	return state->ram && state->not_ram;
+}
+
+static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
+{
+	int ret = 0;
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	struct pagerange_state state = {start_pfn, 0, 0};
+
+	/*
+	 * For legacy reasons, physical address range in the legacy ISA
+	 * region is tracked as non-RAM. This will allow users of
+	 * /dev/mem to map portions of legacy ISA region, even when
+	 * some of those portions are listed(or not even listed) with
+	 * different e820 types(RAM/reserved/..)
+	 */
+	if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT)
+		start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT;
+
+	if (start_pfn < end_pfn) {
+		ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
+				&state, pagerange_is_ram_callback);
+	}
+
+	return (ret > 0) ? -1 : (state.ram ? 1 : 0);
+}
+
+/*
+ * For RAM pages, we use page flags to mark the pages with appropriate type.
+ * The page flags are limited to four types, WB (default), WC, WT and UC-.
+ * WP request fails with -EINVAL, and UC gets redirected to UC-.  Setting
+ * a new memory type is only allowed for a page mapped with the default WB
+ * type.
+ *
+ * Here we do two passes:
+ * - Find the memtype of all the pages in the range, look for any conflicts.
+ * - In case of no conflicts, set the new memtype for pages in the range.
+ */
+static int reserve_ram_pages_type(u64 start, u64 end,
+				  enum page_cache_mode req_type,
+				  enum page_cache_mode *new_type)
+{
+	struct page *page;
+	u64 pfn;
+
+	if (req_type == _PAGE_CACHE_MODE_WP) {
+		if (new_type)
+			*new_type = _PAGE_CACHE_MODE_UC_MINUS;
+		return -EINVAL;
+	}
+
+	if (req_type == _PAGE_CACHE_MODE_UC) {
+		/* We do not support strong UC */
+		WARN_ON_ONCE(1);
+		req_type = _PAGE_CACHE_MODE_UC_MINUS;
+	}
+
+	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
+		enum page_cache_mode type;
+
+		page = pfn_to_page(pfn);
+		type = get_page_memtype(page);
+		if (type != _PAGE_CACHE_MODE_WB) {
+			pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
+				start, end - 1, type, req_type);
+			if (new_type)
+				*new_type = type;
+
+			return -EBUSY;
+		}
+	}
+
+	if (new_type)
+		*new_type = req_type;
+
+	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
+		page = pfn_to_page(pfn);
+		set_page_memtype(page, req_type);
+	}
+	return 0;
+}
+
+static int free_ram_pages_type(u64 start, u64 end)
+{
+	struct page *page;
+	u64 pfn;
+
+	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
+		page = pfn_to_page(pfn);
+		set_page_memtype(page, _PAGE_CACHE_MODE_WB);
+	}
+	return 0;
+}
+
+static u64 sanitize_phys(u64 address)
+{
+	/*
+	 * When changing the memtype for pages containing poison allow
+	 * for a "decoy" virtual address (bit 63 clear) passed to
+	 * set_memory_X(). __pa() on a "decoy" address results in a
+	 * physical address with bit 63 set.
+	 *
+	 * Decoy addresses are not present for 32-bit builds, see
+	 * set_mce_nospec().
+	 */
+	if (IS_ENABLED(CONFIG_X86_64))
+		return address & __PHYSICAL_MASK;
+	return address;
+}
+
+/*
+ * req_type typically has one of the:
+ * - _PAGE_CACHE_MODE_WB
+ * - _PAGE_CACHE_MODE_WC
+ * - _PAGE_CACHE_MODE_UC_MINUS
+ * - _PAGE_CACHE_MODE_UC
+ * - _PAGE_CACHE_MODE_WT
+ *
+ * If new_type is NULL, function will return an error if it cannot reserve the
+ * region with req_type. If new_type is non-NULL, function will return
+ * available type in new_type in case of no error. In case of any error
+ * it will return a negative return value.
+ */
+int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type,
+		    enum page_cache_mode *new_type)
+{
+	struct memtype *entry_new;
+	enum page_cache_mode actual_type;
+	int is_range_ram;
+	int err = 0;
+
+	start = sanitize_phys(start);
+	end = sanitize_phys(end);
+	if (start >= end) {
+		WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__,
+				start, end - 1, cattr_name(req_type));
+		return -EINVAL;
+	}
+
+	if (!pat_enabled()) {
+		/* This is identical to page table setting without PAT */
+		if (new_type)
+			*new_type = req_type;
+		return 0;
+	}
+
+	/* Low ISA region is always mapped WB in page table. No need to track */
+	if (x86_platform.is_untracked_pat_range(start, end)) {
+		if (new_type)
+			*new_type = _PAGE_CACHE_MODE_WB;
+		return 0;
+	}
+
+	/*
+	 * Call mtrr_lookup to get the type hint. This is an
+	 * optimization for /dev/mem mmap'ers into WB memory (BIOS
+	 * tools and ACPI tools). Use WB request for WB memory and use
+	 * UC_MINUS otherwise.
+	 */
+	actual_type = pat_x_mtrr_type(start, end, req_type);
+
+	if (new_type)
+		*new_type = actual_type;
+
+	is_range_ram = pat_pagerange_is_ram(start, end);
+	if (is_range_ram == 1) {
+
+		err = reserve_ram_pages_type(start, end, req_type, new_type);
+
+		return err;
+	} else if (is_range_ram < 0) {
+		return -EINVAL;
+	}
+
+	entry_new = kzalloc(sizeof(struct memtype), GFP_KERNEL);
+	if (!entry_new)
+		return -ENOMEM;
+
+	entry_new->start = start;
+	entry_new->end	 = end;
+	entry_new->type	 = actual_type;
+
+	spin_lock(&memtype_lock);
+
+	err = memtype_check_insert(entry_new, new_type);
+	if (err) {
+		pr_info("x86/PAT: memtype_reserve failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
+			start, end - 1,
+			cattr_name(entry_new->type), cattr_name(req_type));
+		kfree(entry_new);
+		spin_unlock(&memtype_lock);
+
+		return err;
+	}
+
+	spin_unlock(&memtype_lock);
+
+	dprintk("memtype_reserve added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
+		start, end - 1, cattr_name(entry_new->type), cattr_name(req_type),
+		new_type ? cattr_name(*new_type) : "-");
+
+	return err;
+}
+
+int memtype_free(u64 start, u64 end)
+{
+	int is_range_ram;
+	struct memtype *entry_old;
+
+	if (!pat_enabled())
+		return 0;
+
+	start = sanitize_phys(start);
+	end = sanitize_phys(end);
+
+	/* Low ISA region is always mapped WB. No need to track */
+	if (x86_platform.is_untracked_pat_range(start, end))
+		return 0;
+
+	is_range_ram = pat_pagerange_is_ram(start, end);
+	if (is_range_ram == 1)
+		return free_ram_pages_type(start, end);
+	if (is_range_ram < 0)
+		return -EINVAL;
+
+	spin_lock(&memtype_lock);
+	entry_old = memtype_erase(start, end);
+	spin_unlock(&memtype_lock);
+
+	if (IS_ERR(entry_old)) {
+		pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
+			current->comm, current->pid, start, end - 1);
+		return -EINVAL;
+	}
+
+	kfree(entry_old);
+
+	dprintk("memtype_free request [mem %#010Lx-%#010Lx]\n", start, end - 1);
+
+	return 0;
+}
+
+
+/**
+ * lookup_memtype - Looksup the memory type for a physical address
+ * @paddr: physical address of which memory type needs to be looked up
+ *
+ * Only to be called when PAT is enabled
+ *
+ * Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS
+ * or _PAGE_CACHE_MODE_WT.
+ */
+static enum page_cache_mode lookup_memtype(u64 paddr)
+{
+	enum page_cache_mode rettype = _PAGE_CACHE_MODE_WB;
+	struct memtype *entry;
+
+	if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
+		return rettype;
+
+	if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
+		struct page *page;
+
+		page = pfn_to_page(paddr >> PAGE_SHIFT);
+		return get_page_memtype(page);
+	}
+
+	spin_lock(&memtype_lock);
+
+	entry = memtype_lookup(paddr);
+	if (entry != NULL)
+		rettype = entry->type;
+	else
+		rettype = _PAGE_CACHE_MODE_UC_MINUS;
+
+	spin_unlock(&memtype_lock);
+
+	return rettype;
+}
+
+/**
+ * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type
+ * of @pfn cannot be overridden by UC MTRR memory type.
+ *
+ * Only to be called when PAT is enabled.
+ *
+ * Returns true, if the PAT memory type of @pfn is UC, UC-, or WC.
+ * Returns false in other cases.
+ */
+bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn)
+{
+	enum page_cache_mode cm = lookup_memtype(PFN_PHYS(pfn));
+
+	return cm == _PAGE_CACHE_MODE_UC ||
+	       cm == _PAGE_CACHE_MODE_UC_MINUS ||
+	       cm == _PAGE_CACHE_MODE_WC;
+}
+EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
+
+/**
+ * memtype_reserve_io - Request a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ * @type: A pointer to memtype, with requested type. On success, requested
+ * or any other compatible type that was available for the region is returned
+ *
+ * On success, returns 0
+ * On failure, returns non-zero
+ */
+int memtype_reserve_io(resource_size_t start, resource_size_t end,
+			enum page_cache_mode *type)
+{
+	resource_size_t size = end - start;
+	enum page_cache_mode req_type = *type;
+	enum page_cache_mode new_type;
+	int ret;
+
+	WARN_ON_ONCE(iomem_map_sanity_check(start, size));
+
+	ret = memtype_reserve(start, end, req_type, &new_type);
+	if (ret)
+		goto out_err;
+
+	if (!is_new_memtype_allowed(start, size, req_type, new_type))
+		goto out_free;
+
+	if (memtype_kernel_map_sync(start, size, new_type) < 0)
+		goto out_free;
+
+	*type = new_type;
+	return 0;
+
+out_free:
+	memtype_free(start, end);
+	ret = -EBUSY;
+out_err:
+	return ret;
+}
+
+/**
+ * memtype_free_io - Release a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ */
+void memtype_free_io(resource_size_t start, resource_size_t end)
+{
+	memtype_free(start, end);
+}
+
+int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
+{
+	enum page_cache_mode type = _PAGE_CACHE_MODE_WC;
+
+	return memtype_reserve_io(start, start + size, &type);
+}
+EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
+
+void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
+{
+	memtype_free_io(start, start + size);
+}
+EXPORT_SYMBOL(arch_io_free_memtype_wc);
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				unsigned long size, pgprot_t vma_prot)
+{
+	if (!phys_mem_access_encrypted(pfn << PAGE_SHIFT, size))
+		vma_prot = pgprot_decrypted(vma_prot);
+
+	return vma_prot;
+}
+
+#ifdef CONFIG_STRICT_DEVMEM
+/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+	return 1;
+}
+#else
+/* This check is needed to avoid cache aliasing when PAT is enabled */
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+	u64 from = ((u64)pfn) << PAGE_SHIFT;
+	u64 to = from + size;
+	u64 cursor = from;
+
+	if (!pat_enabled())
+		return 1;
+
+	while (cursor < to) {
+		if (!devmem_is_allowed(pfn))
+			return 0;
+		cursor += PAGE_SIZE;
+		pfn++;
+	}
+	return 1;
+}
+#endif /* CONFIG_STRICT_DEVMEM */
+
+int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+				unsigned long size, pgprot_t *vma_prot)
+{
+	enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB;
+
+	if (!range_is_allowed(pfn, size))
+		return 0;
+
+	if (file->f_flags & O_DSYNC)
+		pcm = _PAGE_CACHE_MODE_UC_MINUS;
+
+	*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
+			     cachemode2protval(pcm));
+	return 1;
+}
+
+/*
+ * Change the memory type for the physical address range in kernel identity
+ * mapping space if that range is a part of identity map.
+ */
+int memtype_kernel_map_sync(u64 base, unsigned long size,
+			    enum page_cache_mode pcm)
+{
+	unsigned long id_sz;
+
+	if (base > __pa(high_memory-1))
+		return 0;
+
+	/*
+	 * Some areas in the middle of the kernel identity range
+	 * are not mapped, for example the PCI space.
+	 */
+	if (!page_is_ram(base >> PAGE_SHIFT))
+		return 0;
+
+	id_sz = (__pa(high_memory-1) <= base + size) ?
+				__pa(high_memory) - base : size;
+
+	if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
+		pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
+			current->comm, current->pid,
+			cattr_name(pcm),
+			base, (unsigned long long)(base + size-1));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * Internal interface to reserve a range of physical memory with prot.
+ * Reserved non RAM regions only and after successful memtype_reserve,
+ * this func also keeps identity mapping (if any) in sync with this new prot.
+ */
+static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
+				int strict_prot)
+{
+	int is_ram = 0;
+	int ret;
+	enum page_cache_mode want_pcm = pgprot2cachemode(*vma_prot);
+	enum page_cache_mode pcm = want_pcm;
+
+	is_ram = pat_pagerange_is_ram(paddr, paddr + size);
+
+	/*
+	 * reserve_pfn_range() for RAM pages. We do not refcount to keep
+	 * track of number of mappings of RAM pages. We can assert that
+	 * the type requested matches the type of first page in the range.
+	 */
+	if (is_ram) {
+		if (!pat_enabled())
+			return 0;
+
+		pcm = lookup_memtype(paddr);
+		if (want_pcm != pcm) {
+			pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
+				current->comm, current->pid,
+				cattr_name(want_pcm),
+				(unsigned long long)paddr,
+				(unsigned long long)(paddr + size - 1),
+				cattr_name(pcm));
+			*vma_prot = __pgprot((pgprot_val(*vma_prot) &
+					     (~_PAGE_CACHE_MASK)) |
+					     cachemode2protval(pcm));
+		}
+		return 0;
+	}
+
+	ret = memtype_reserve(paddr, paddr + size, want_pcm, &pcm);
+	if (ret)
+		return ret;
+
+	if (pcm != want_pcm) {
+		if (strict_prot ||
+		    !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
+			memtype_free(paddr, paddr + size);
+			pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
+			       current->comm, current->pid,
+			       cattr_name(want_pcm),
+			       (unsigned long long)paddr,
+			       (unsigned long long)(paddr + size - 1),
+			       cattr_name(pcm));
+			return -EINVAL;
+		}
+		/*
+		 * We allow returning different type than the one requested in
+		 * non strict case.
+		 */
+		*vma_prot = __pgprot((pgprot_val(*vma_prot) &
+				      (~_PAGE_CACHE_MASK)) |
+				     cachemode2protval(pcm));
+	}
+
+	if (memtype_kernel_map_sync(paddr, size, pcm) < 0) {
+		memtype_free(paddr, paddr + size);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * Internal interface to free a range of physical memory.
+ * Frees non RAM regions only.
+ */
+static void free_pfn_range(u64 paddr, unsigned long size)
+{
+	int is_ram;
+
+	is_ram = pat_pagerange_is_ram(paddr, paddr + size);
+	if (is_ram == 0)
+		memtype_free(paddr, paddr + size);
+}
+
+/*
+ * track_pfn_copy is called when vma that is covering the pfnmap gets
+ * copied through copy_page_range().
+ *
+ * If the vma has a linear pfn mapping for the entire range, we get the prot
+ * from pte and reserve the entire vma range with single reserve_pfn_range call.
+ */
+int track_pfn_copy(struct vm_area_struct *vma)
+{
+	resource_size_t paddr;
+	unsigned long prot;
+	unsigned long vma_size = vma->vm_end - vma->vm_start;
+	pgprot_t pgprot;
+
+	if (vma->vm_flags & VM_PAT) {
+		/*
+		 * reserve the whole chunk covered by vma. We need the
+		 * starting address and protection from pte.
+		 */
+		if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
+			WARN_ON_ONCE(1);
+			return -EINVAL;
+		}
+		pgprot = __pgprot(prot);
+		return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
+	}
+
+	return 0;
+}
+
+/*
+ * prot is passed in as a parameter for the new mapping. If the vma has
+ * a linear pfn mapping for the entire range, or no vma is provided,
+ * reserve the entire pfn + size range with single reserve_pfn_range
+ * call.
+ */
+int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+		    unsigned long pfn, unsigned long addr, unsigned long size)
+{
+	resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
+	enum page_cache_mode pcm;
+
+	/* reserve the whole chunk starting from paddr */
+	if (!vma || (addr == vma->vm_start
+				&& size == (vma->vm_end - vma->vm_start))) {
+		int ret;
+
+		ret = reserve_pfn_range(paddr, size, prot, 0);
+		if (ret == 0 && vma)
+			vma->vm_flags |= VM_PAT;
+		return ret;
+	}
+
+	if (!pat_enabled())
+		return 0;
+
+	/*
+	 * For anything smaller than the vma size we set prot based on the
+	 * lookup.
+	 */
+	pcm = lookup_memtype(paddr);
+
+	/* Check memtype for the remaining pages */
+	while (size > PAGE_SIZE) {
+		size -= PAGE_SIZE;
+		paddr += PAGE_SIZE;
+		if (pcm != lookup_memtype(paddr))
+			return -EINVAL;
+	}
+
+	*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
+			 cachemode2protval(pcm));
+
+	return 0;
+}
+
+void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn)
+{
+	enum page_cache_mode pcm;
+
+	if (!pat_enabled())
+		return;
+
+	/* Set prot based on lookup */
+	pcm = lookup_memtype(pfn_t_to_phys(pfn));
+	*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
+			 cachemode2protval(pcm));
+}
+
+/*
+ * untrack_pfn is called while unmapping a pfnmap for a region.
+ * untrack can be called for a specific region indicated by pfn and size or
+ * can be for the entire vma (in which case pfn, size are zero).
+ */
+void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+		 unsigned long size)
+{
+	resource_size_t paddr;
+	unsigned long prot;
+
+	if (vma && !(vma->vm_flags & VM_PAT))
+		return;
+
+	/* free the chunk starting from pfn or the whole chunk */
+	paddr = (resource_size_t)pfn << PAGE_SHIFT;
+	if (!paddr && !size) {
+		if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
+			WARN_ON_ONCE(1);
+			return;
+		}
+
+		size = vma->vm_end - vma->vm_start;
+	}
+	free_pfn_range(paddr, size);
+	if (vma)
+		vma->vm_flags &= ~VM_PAT;
+}
+
+/*
+ * untrack_pfn_moved is called, while mremapping a pfnmap for a new region,
+ * with the old vma after its pfnmap page table has been removed.  The new
+ * vma has a new pfnmap to the same pfn & cache type with VM_PAT set.
+ */
+void untrack_pfn_moved(struct vm_area_struct *vma)
+{
+	vma->vm_flags &= ~VM_PAT;
+}
+
+pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+	return __pgprot(pgprot_val(prot) |
+				cachemode2protval(_PAGE_CACHE_MODE_WC));
+}
+EXPORT_SYMBOL_GPL(pgprot_writecombine);
+
+pgprot_t pgprot_writethrough(pgprot_t prot)
+{
+	return __pgprot(pgprot_val(prot) |
+				cachemode2protval(_PAGE_CACHE_MODE_WT));
+}
+EXPORT_SYMBOL_GPL(pgprot_writethrough);
+
+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
+
+/*
+ * We are allocating a temporary printout-entry to be passed
+ * between seq_start()/next() and seq_show():
+ */
+static struct memtype *memtype_get_idx(loff_t pos)
+{
+	struct memtype *entry_print;
+	int ret;
+
+	entry_print  = kzalloc(sizeof(struct memtype), GFP_KERNEL);
+	if (!entry_print)
+		return NULL;
+
+	spin_lock(&memtype_lock);
+	ret = memtype_copy_nth_element(entry_print, pos);
+	spin_unlock(&memtype_lock);
+
+	/* Free it on error: */
+	if (ret) {
+		kfree(entry_print);
+		return NULL;
+	}
+
+	return entry_print;
+}
+
+static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	if (*pos == 0) {
+		++*pos;
+		seq_puts(seq, "PAT memtype list:\n");
+	}
+
+	return memtype_get_idx(*pos);
+}
+
+static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return memtype_get_idx(*pos);
+}
+
+static void memtype_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int memtype_seq_show(struct seq_file *seq, void *v)
+{
+	struct memtype *entry_print = (struct memtype *)v;
+
+	seq_printf(seq, "PAT: [mem 0x%016Lx-0x%016Lx] %s\n",
+			entry_print->start,
+			entry_print->end,
+			cattr_name(entry_print->type));
+
+	kfree(entry_print);
+
+	return 0;
+}
+
+static const struct seq_operations memtype_seq_ops = {
+	.start = memtype_seq_start,
+	.next  = memtype_seq_next,
+	.stop  = memtype_seq_stop,
+	.show  = memtype_seq_show,
+};
+
+static int memtype_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &memtype_seq_ops);
+}
+
+static const struct file_operations memtype_fops = {
+	.open    = memtype_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static int __init pat_memtype_list_init(void)
+{
+	if (pat_enabled()) {
+		debugfs_create_file("pat_memtype_list", S_IRUSR,
+				    arch_debugfs_dir, NULL, &memtype_fops);
+	}
+	return 0;
+}
+late_initcall(pat_memtype_list_init);
+
+#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */
diff --git a/arch/x86/mm/pat/memtype.h b/arch/x86/mm/pat/memtype.h
new file mode 100644
index 000000000000..cacecdbceb55
--- /dev/null
+++ b/arch/x86/mm/pat/memtype.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __MEMTYPE_H_
+#define __MEMTYPE_H_
+
+extern int pat_debug_enable;
+
+#define dprintk(fmt, arg...) \
+	do { if (pat_debug_enable) pr_info("x86/PAT: " fmt, ##arg); } while (0)
+
+struct memtype {
+	u64			start;
+	u64			end;
+	u64			subtree_max_end;
+	enum page_cache_mode	type;
+	struct rb_node		rb;
+};
+
+static inline char *cattr_name(enum page_cache_mode pcm)
+{
+	switch (pcm) {
+	case _PAGE_CACHE_MODE_UC:		return "uncached";
+	case _PAGE_CACHE_MODE_UC_MINUS:		return "uncached-minus";
+	case _PAGE_CACHE_MODE_WB:		return "write-back";
+	case _PAGE_CACHE_MODE_WC:		return "write-combining";
+	case _PAGE_CACHE_MODE_WT:		return "write-through";
+	case _PAGE_CACHE_MODE_WP:		return "write-protected";
+	default:				return "broken";
+	}
+}
+
+#ifdef CONFIG_X86_PAT
+extern int memtype_check_insert(struct memtype *entry_new,
+				enum page_cache_mode *new_type);
+extern struct memtype *memtype_erase(u64 start, u64 end);
+extern struct memtype *memtype_lookup(u64 addr);
+extern int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos);
+#else
+static inline int memtype_check_insert(struct memtype *entry_new,
+				       enum page_cache_mode *new_type)
+{ return 0; }
+static inline struct memtype *memtype_erase(u64 start, u64 end)
+{ return NULL; }
+static inline struct memtype *memtype_lookup(u64 addr)
+{ return NULL; }
+static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos)
+{ return 0; }
+#endif
+
+#endif /* __MEMTYPE_H_ */
diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c
new file mode 100644
index 000000000000..a07e4882bf36
--- /dev/null
+++ b/arch/x86/mm/pat/memtype_interval.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handle caching attributes in page tables (PAT)
+ *
+ * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *          Suresh B Siddha <suresh.b.siddha@intel.com>
+ *
+ * Interval tree used to store the PAT memory type reservations.
+ */
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+
+#include <asm/pgtable.h>
+#include <asm/memtype.h>
+
+#include "memtype.h"
+
+/*
+ * The memtype tree keeps track of memory type for specific
+ * physical memory areas. Without proper tracking, conflicting memory
+ * types in different mappings can cause CPU cache corruption.
+ *
+ * The tree is an interval tree (augmented rbtree) which tree is ordered
+ * by the starting address. The tree can contain multiple entries for
+ * different regions which overlap. All the aliases have the same
+ * cache attributes of course, as enforced by the PAT logic.
+ *
+ * memtype_lock protects the rbtree.
+ */
+
+static inline u64 interval_start(struct memtype *entry)
+{
+	return entry->start;
+}
+
+static inline u64 interval_end(struct memtype *entry)
+{
+	return entry->end - 1;
+}
+
+INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
+		     interval_start, interval_end,
+		     static, interval)
+
+static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
+
+enum {
+	MEMTYPE_EXACT_MATCH	= 0,
+	MEMTYPE_END_MATCH	= 1
+};
+
+static struct memtype *memtype_match(u64 start, u64 end, int match_type)
+{
+	struct memtype *entry_match;
+
+	entry_match = interval_iter_first(&memtype_rbroot, start, end-1);
+
+	while (entry_match != NULL && entry_match->start < end) {
+		if ((match_type == MEMTYPE_EXACT_MATCH) &&
+		    (entry_match->start == start) && (entry_match->end == end))
+			return entry_match;
+
+		if ((match_type == MEMTYPE_END_MATCH) &&
+		    (entry_match->start < start) && (entry_match->end == end))
+			return entry_match;
+
+		entry_match = interval_iter_next(entry_match, start, end-1);
+	}
+
+	return NULL; /* Returns NULL if there is no match */
+}
+
+static int memtype_check_conflict(u64 start, u64 end,
+				  enum page_cache_mode reqtype,
+				  enum page_cache_mode *newtype)
+{
+	struct memtype *entry_match;
+	enum page_cache_mode found_type = reqtype;
+
+	entry_match = interval_iter_first(&memtype_rbroot, start, end-1);
+	if (entry_match == NULL)
+		goto success;
+
+	if (entry_match->type != found_type && newtype == NULL)
+		goto failure;
+
+	dprintk("Overlap at 0x%Lx-0x%Lx\n", entry_match->start, entry_match->end);
+	found_type = entry_match->type;
+
+	entry_match = interval_iter_next(entry_match, start, end-1);
+	while (entry_match) {
+		if (entry_match->type != found_type)
+			goto failure;
+
+		entry_match = interval_iter_next(entry_match, start, end-1);
+	}
+success:
+	if (newtype)
+		*newtype = found_type;
+
+	return 0;
+
+failure:
+	pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
+		current->comm, current->pid, start, end,
+		cattr_name(found_type), cattr_name(entry_match->type));
+
+	return -EBUSY;
+}
+
+int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_type)
+{
+	int err = 0;
+
+	err = memtype_check_conflict(entry_new->start, entry_new->end, entry_new->type, ret_type);
+	if (err)
+		return err;
+
+	if (ret_type)
+		entry_new->type = *ret_type;
+
+	interval_insert(entry_new, &memtype_rbroot);
+	return 0;
+}
+
+struct memtype *memtype_erase(u64 start, u64 end)
+{
+	struct memtype *entry_old;
+
+	/*
+	 * Since the memtype_rbroot tree allows overlapping ranges,
+	 * memtype_erase() checks with EXACT_MATCH first, i.e. free
+	 * a whole node for the munmap case.  If no such entry is found,
+	 * it then checks with END_MATCH, i.e. shrink the size of a node
+	 * from the end for the mremap case.
+	 */
+	entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
+	if (!entry_old) {
+		entry_old = memtype_match(start, end, MEMTYPE_END_MATCH);
+		if (!entry_old)
+			return ERR_PTR(-EINVAL);
+	}
+
+	if (entry_old->start == start) {
+		/* munmap: erase this node */
+		interval_remove(entry_old, &memtype_rbroot);
+	} else {
+		/* mremap: update the end value of this node */
+		interval_remove(entry_old, &memtype_rbroot);
+		entry_old->end = start;
+		interval_insert(entry_old, &memtype_rbroot);
+
+		return NULL;
+	}
+
+	return entry_old;
+}
+
+struct memtype *memtype_lookup(u64 addr)
+{
+	return interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE-1);
+}
+
+/*
+ * Debugging helper, copy the Nth entry of the tree into a
+ * a copy for printout. This allows us to print out the tree
+ * via debugfs, without holding the memtype_lock too long:
+ */
+#ifdef CONFIG_DEBUG_FS
+int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos)
+{
+	struct memtype *entry_match;
+	int i = 1;
+
+	entry_match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
+
+	while (entry_match && pos != i) {
+		entry_match = interval_iter_next(entry_match, 0, ULONG_MAX);
+		i++;
+	}
+
+	if (entry_match) { /* pos == i */
+		*entry_out = *entry_match;
+		return 0;
+	} else {
+		return 1;
+	}
+}
+#endif
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
new file mode 100644
index 000000000000..62a8ebe72a52
--- /dev/null
+++ b/arch/x86/mm/pat/set_memory.c
@@ -0,0 +1,2279 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ * Thanks to Ben LaHaise for precious feedback.
+ */
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/pfn.h>
+#include <linux/percpu.h>
+#include <linux/gfp.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include <asm/e820/api.h>
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <linux/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/proto.h>
+#include <asm/memtype.h>
+#include <asm/set_memory.h>
+
+#include "../mm_internal.h"
+
+/*
+ * The current flushing context - we pass it instead of 5 arguments:
+ */
+struct cpa_data {
+	unsigned long	*vaddr;
+	pgd_t		*pgd;
+	pgprot_t	mask_set;
+	pgprot_t	mask_clr;
+	unsigned long	numpages;
+	unsigned long	curpage;
+	unsigned long	pfn;
+	unsigned int	flags;
+	unsigned int	force_split		: 1,
+			force_static_prot	: 1;
+	struct page	**pages;
+};
+
+enum cpa_warn {
+	CPA_CONFLICT,
+	CPA_PROTECT,
+	CPA_DETECT,
+};
+
+static const int cpa_warn_level = CPA_PROTECT;
+
+/*
+ * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
+ * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
+ * entries change the page attribute in parallel to some other cpu
+ * splitting a large page entry along with changing the attribute.
+ */
+static DEFINE_SPINLOCK(cpa_lock);
+
+#define CPA_FLUSHTLB 1
+#define CPA_ARRAY 2
+#define CPA_PAGES_ARRAY 4
+#define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
+
+#ifdef CONFIG_PROC_FS
+static unsigned long direct_pages_count[PG_LEVEL_NUM];
+
+void update_page_count(int level, unsigned long pages)
+{
+	/* Protect against CPA */
+	spin_lock(&pgd_lock);
+	direct_pages_count[level] += pages;
+	spin_unlock(&pgd_lock);
+}
+
+static void split_page_count(int level)
+{
+	if (direct_pages_count[level] == 0)
+		return;
+
+	direct_pages_count[level]--;
+	direct_pages_count[level - 1] += PTRS_PER_PTE;
+}
+
+void arch_report_meminfo(struct seq_file *m)
+{
+	seq_printf(m, "DirectMap4k:    %8lu kB\n",
+			direct_pages_count[PG_LEVEL_4K] << 2);
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+	seq_printf(m, "DirectMap2M:    %8lu kB\n",
+			direct_pages_count[PG_LEVEL_2M] << 11);
+#else
+	seq_printf(m, "DirectMap4M:    %8lu kB\n",
+			direct_pages_count[PG_LEVEL_2M] << 12);
+#endif
+	if (direct_gbpages)
+		seq_printf(m, "DirectMap1G:    %8lu kB\n",
+			direct_pages_count[PG_LEVEL_1G] << 20);
+}
+#else
+static inline void split_page_count(int level) { }
+#endif
+
+#ifdef CONFIG_X86_CPA_STATISTICS
+
+static unsigned long cpa_1g_checked;
+static unsigned long cpa_1g_sameprot;
+static unsigned long cpa_1g_preserved;
+static unsigned long cpa_2m_checked;
+static unsigned long cpa_2m_sameprot;
+static unsigned long cpa_2m_preserved;
+static unsigned long cpa_4k_install;
+
+static inline void cpa_inc_1g_checked(void)
+{
+	cpa_1g_checked++;
+}
+
+static inline void cpa_inc_2m_checked(void)
+{
+	cpa_2m_checked++;
+}
+
+static inline void cpa_inc_4k_install(void)
+{
+	cpa_4k_install++;
+}
+
+static inline void cpa_inc_lp_sameprot(int level)
+{
+	if (level == PG_LEVEL_1G)
+		cpa_1g_sameprot++;
+	else
+		cpa_2m_sameprot++;
+}
+
+static inline void cpa_inc_lp_preserved(int level)
+{
+	if (level == PG_LEVEL_1G)
+		cpa_1g_preserved++;
+	else
+		cpa_2m_preserved++;
+}
+
+static int cpastats_show(struct seq_file *m, void *p)
+{
+	seq_printf(m, "1G pages checked:     %16lu\n", cpa_1g_checked);
+	seq_printf(m, "1G pages sameprot:    %16lu\n", cpa_1g_sameprot);
+	seq_printf(m, "1G pages preserved:   %16lu\n", cpa_1g_preserved);
+	seq_printf(m, "2M pages checked:     %16lu\n", cpa_2m_checked);
+	seq_printf(m, "2M pages sameprot:    %16lu\n", cpa_2m_sameprot);
+	seq_printf(m, "2M pages preserved:   %16lu\n", cpa_2m_preserved);
+	seq_printf(m, "4K pages set-checked: %16lu\n", cpa_4k_install);
+	return 0;
+}
+
+static int cpastats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cpastats_show, NULL);
+}
+
+static const struct file_operations cpastats_fops = {
+	.open		= cpastats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init cpa_stats_init(void)
+{
+	debugfs_create_file("cpa_stats", S_IRUSR, arch_debugfs_dir, NULL,
+			    &cpastats_fops);
+	return 0;
+}
+late_initcall(cpa_stats_init);
+#else
+static inline void cpa_inc_1g_checked(void) { }
+static inline void cpa_inc_2m_checked(void) { }
+static inline void cpa_inc_4k_install(void) { }
+static inline void cpa_inc_lp_sameprot(int level) { }
+static inline void cpa_inc_lp_preserved(int level) { }
+#endif
+
+
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+	return addr >= start && addr < end;
+}
+
+static inline int
+within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+{
+	return addr >= start && addr <= end;
+}
+
+#ifdef CONFIG_X86_64
+
+static inline unsigned long highmap_start_pfn(void)
+{
+	return __pa_symbol(_text) >> PAGE_SHIFT;
+}
+
+static inline unsigned long highmap_end_pfn(void)
+{
+	/* Do not reference physical address outside the kernel. */
+	return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
+}
+
+static bool __cpa_pfn_in_highmap(unsigned long pfn)
+{
+	/*
+	 * Kernel text has an alias mapping at a high address, known
+	 * here as "highmap".
+	 */
+	return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn());
+}
+
+#else
+
+static bool __cpa_pfn_in_highmap(unsigned long pfn)
+{
+	/* There is no highmap on 32-bit */
+	return false;
+}
+
+#endif
+
+/*
+ * See set_mce_nospec().
+ *
+ * Machine check recovery code needs to change cache mode of poisoned pages to
+ * UC to avoid speculative access logging another error. But passing the
+ * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
+ * speculative access. So we cheat and flip the top bit of the address. This
+ * works fine for the code that updates the page tables. But at the end of the
+ * process we need to flush the TLB and cache and the non-canonical address
+ * causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
+ *
+ * But in the common case we already have a canonical address. This code
+ * will fix the top bit if needed and is a no-op otherwise.
+ */
+static inline unsigned long fix_addr(unsigned long addr)
+{
+#ifdef CONFIG_X86_64
+	return (long)(addr << 1) >> 1;
+#else
+	return addr;
+#endif
+}
+
+static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
+{
+	if (cpa->flags & CPA_PAGES_ARRAY) {
+		struct page *page = cpa->pages[idx];
+
+		if (unlikely(PageHighMem(page)))
+			return 0;
+
+		return (unsigned long)page_address(page);
+	}
+
+	if (cpa->flags & CPA_ARRAY)
+		return cpa->vaddr[idx];
+
+	return *cpa->vaddr + idx * PAGE_SIZE;
+}
+
+/*
+ * Flushing functions
+ */
+
+static void clflush_cache_range_opt(void *vaddr, unsigned int size)
+{
+	const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
+	void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
+	void *vend = vaddr + size;
+
+	if (p >= vend)
+		return;
+
+	for (; p < vend; p += clflush_size)
+		clflushopt(p);
+}
+
+/**
+ * clflush_cache_range - flush a cache range with clflush
+ * @vaddr:	virtual start address
+ * @size:	number of bytes to flush
+ *
+ * CLFLUSHOPT is an unordered instruction which needs fencing with MFENCE or
+ * SFENCE to avoid ordering issues.
+ */
+void clflush_cache_range(void *vaddr, unsigned int size)
+{
+	mb();
+	clflush_cache_range_opt(vaddr, size);
+	mb();
+}
+EXPORT_SYMBOL_GPL(clflush_cache_range);
+
+void arch_invalidate_pmem(void *addr, size_t size)
+{
+	clflush_cache_range(addr, size);
+}
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+
+static void __cpa_flush_all(void *arg)
+{
+	unsigned long cache = (unsigned long)arg;
+
+	/*
+	 * Flush all to work around Errata in early athlons regarding
+	 * large page flushing.
+	 */
+	__flush_tlb_all();
+
+	if (cache && boot_cpu_data.x86 >= 4)
+		wbinvd();
+}
+
+static void cpa_flush_all(unsigned long cache)
+{
+	BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
+
+	on_each_cpu(__cpa_flush_all, (void *) cache, 1);
+}
+
+static void __cpa_flush_tlb(void *data)
+{
+	struct cpa_data *cpa = data;
+	unsigned int i;
+
+	for (i = 0; i < cpa->numpages; i++)
+		__flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
+}
+
+static void cpa_flush(struct cpa_data *data, int cache)
+{
+	struct cpa_data *cpa = data;
+	unsigned int i;
+
+	BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
+
+	if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		cpa_flush_all(cache);
+		return;
+	}
+
+	if (cpa->numpages <= tlb_single_page_flush_ceiling)
+		on_each_cpu(__cpa_flush_tlb, cpa, 1);
+	else
+		flush_tlb_all();
+
+	if (!cache)
+		return;
+
+	mb();
+	for (i = 0; i < cpa->numpages; i++) {
+		unsigned long addr = __cpa_addr(cpa, i);
+		unsigned int level;
+
+		pte_t *pte = lookup_address(addr, &level);
+
+		/*
+		 * Only flush present addresses:
+		 */
+		if (pte && (pte_val(*pte) & _PAGE_PRESENT))
+			clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
+	}
+	mb();
+}
+
+static bool overlaps(unsigned long r1_start, unsigned long r1_end,
+		     unsigned long r2_start, unsigned long r2_end)
+{
+	return (r1_start <= r2_end && r1_end >= r2_start) ||
+		(r2_start <= r1_end && r2_end >= r1_start);
+}
+
+#ifdef CONFIG_PCI_BIOS
+/*
+ * The BIOS area between 640k and 1Mb needs to be executable for PCI BIOS
+ * based config access (CONFIG_PCI_GOBIOS) support.
+ */
+#define BIOS_PFN	PFN_DOWN(BIOS_BEGIN)
+#define BIOS_PFN_END	PFN_DOWN(BIOS_END - 1)
+
+static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
+{
+	if (pcibios_enabled && overlaps(spfn, epfn, BIOS_PFN, BIOS_PFN_END))
+		return _PAGE_NX;
+	return 0;
+}
+#else
+static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
+{
+	return 0;
+}
+#endif
+
+/*
+ * The .rodata section needs to be read-only. Using the pfn catches all
+ * aliases.  This also includes __ro_after_init, so do not enforce until
+ * kernel_set_to_readonly is true.
+ */
+static pgprotval_t protect_rodata(unsigned long spfn, unsigned long epfn)
+{
+	unsigned long epfn_ro, spfn_ro = PFN_DOWN(__pa_symbol(__start_rodata));
+
+	/*
+	 * Note: __end_rodata is at page aligned and not inclusive, so
+	 * subtract 1 to get the last enforced PFN in the rodata area.
+	 */
+	epfn_ro = PFN_DOWN(__pa_symbol(__end_rodata)) - 1;
+
+	if (kernel_set_to_readonly && overlaps(spfn, epfn, spfn_ro, epfn_ro))
+		return _PAGE_RW;
+	return 0;
+}
+
+/*
+ * Protect kernel text against becoming non executable by forbidding
+ * _PAGE_NX.  This protects only the high kernel mapping (_text -> _etext)
+ * out of which the kernel actually executes.  Do not protect the low
+ * mapping.
+ *
+ * This does not cover __inittext since that is gone after boot.
+ */
+static pgprotval_t protect_kernel_text(unsigned long start, unsigned long end)
+{
+	unsigned long t_end = (unsigned long)_etext - 1;
+	unsigned long t_start = (unsigned long)_text;
+
+	if (overlaps(start, end, t_start, t_end))
+		return _PAGE_NX;
+	return 0;
+}
+
+#if defined(CONFIG_X86_64)
+/*
+ * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
+ * kernel text mappings for the large page aligned text, rodata sections
+ * will be always read-only. For the kernel identity mappings covering the
+ * holes caused by this alignment can be anything that user asks.
+ *
+ * This will preserve the large page mappings for kernel text/data at no
+ * extra cost.
+ */
+static pgprotval_t protect_kernel_text_ro(unsigned long start,
+					  unsigned long end)
+{
+	unsigned long t_end = (unsigned long)__end_rodata_hpage_align - 1;
+	unsigned long t_start = (unsigned long)_text;
+	unsigned int level;
+
+	if (!kernel_set_to_readonly || !overlaps(start, end, t_start, t_end))
+		return 0;
+	/*
+	 * Don't enforce the !RW mapping for the kernel text mapping, if
+	 * the current mapping is already using small page mapping.  No
+	 * need to work hard to preserve large page mappings in this case.
+	 *
+	 * This also fixes the Linux Xen paravirt guest boot failure caused
+	 * by unexpected read-only mappings for kernel identity
+	 * mappings. In this paravirt guest case, the kernel text mapping
+	 * and the kernel identity mapping share the same page-table pages,
+	 * so the protections for kernel text and identity mappings have to
+	 * be the same.
+	 */
+	if (lookup_address(start, &level) && (level != PG_LEVEL_4K))
+		return _PAGE_RW;
+	return 0;
+}
+#else
+static pgprotval_t protect_kernel_text_ro(unsigned long start,
+					  unsigned long end)
+{
+	return 0;
+}
+#endif
+
+static inline bool conflicts(pgprot_t prot, pgprotval_t val)
+{
+	return (pgprot_val(prot) & ~val) != pgprot_val(prot);
+}
+
+static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
+				  unsigned long start, unsigned long end,
+				  unsigned long pfn, const char *txt)
+{
+	static const char *lvltxt[] = {
+		[CPA_CONFLICT]	= "conflict",
+		[CPA_PROTECT]	= "protect",
+		[CPA_DETECT]	= "detect",
+	};
+
+	if (warnlvl > cpa_warn_level || !conflicts(prot, val))
+		return;
+
+	pr_warn("CPA %8s %10s: 0x%016lx - 0x%016lx PFN %lx req %016llx prevent %016llx\n",
+		lvltxt[warnlvl], txt, start, end, pfn, (unsigned long long)pgprot_val(prot),
+		(unsigned long long)val);
+}
+
+/*
+ * Certain areas of memory on x86 require very specific protection flags,
+ * for example the BIOS area or kernel text. Callers don't always get this
+ * right (again, ioremap() on BIOS memory is not uncommon) so this function
+ * checks and fixes these known static required protection bits.
+ */
+static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
+					  unsigned long pfn, unsigned long npg,
+					  unsigned long lpsize, int warnlvl)
+{
+	pgprotval_t forbidden, res;
+	unsigned long end;
+
+	/*
+	 * There is no point in checking RW/NX conflicts when the requested
+	 * mapping is setting the page !PRESENT.
+	 */
+	if (!(pgprot_val(prot) & _PAGE_PRESENT))
+		return prot;
+
+	/* Operate on the virtual address */
+	end = start + npg * PAGE_SIZE - 1;
+
+	res = protect_kernel_text(start, end);
+	check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX");
+	forbidden = res;
+
+	/*
+	 * Special case to preserve a large page. If the change spawns the
+	 * full large page mapping then there is no point to split it
+	 * up. Happens with ftrace and is going to be removed once ftrace
+	 * switched to text_poke().
+	 */
+	if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) {
+		res = protect_kernel_text_ro(start, end);
+		check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
+		forbidden |= res;
+	}
+
+	/* Check the PFN directly */
+	res = protect_pci_bios(pfn, pfn + npg - 1);
+	check_conflict(warnlvl, prot, res, start, end, pfn, "PCIBIOS NX");
+	forbidden |= res;
+
+	res = protect_rodata(pfn, pfn + npg - 1);
+	check_conflict(warnlvl, prot, res, start, end, pfn, "Rodata RO");
+	forbidden |= res;
+
+	return __pgprot(pgprot_val(prot) & ~forbidden);
+}
+
+/*
+ * Lookup the page table entry for a virtual address in a specific pgd.
+ * Return a pointer to the entry and the level of the mapping.
+ */
+pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
+			     unsigned int *level)
+{
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	*level = PG_LEVEL_NONE;
+
+	if (pgd_none(*pgd))
+		return NULL;
+
+	p4d = p4d_offset(pgd, address);
+	if (p4d_none(*p4d))
+		return NULL;
+
+	*level = PG_LEVEL_512G;
+	if (p4d_large(*p4d) || !p4d_present(*p4d))
+		return (pte_t *)p4d;
+
+	pud = pud_offset(p4d, address);
+	if (pud_none(*pud))
+		return NULL;
+
+	*level = PG_LEVEL_1G;
+	if (pud_large(*pud) || !pud_present(*pud))
+		return (pte_t *)pud;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd))
+		return NULL;
+
+	*level = PG_LEVEL_2M;
+	if (pmd_large(*pmd) || !pmd_present(*pmd))
+		return (pte_t *)pmd;
+
+	*level = PG_LEVEL_4K;
+
+	return pte_offset_kernel(pmd, address);
+}
+
+/*
+ * Lookup the page table entry for a virtual address. Return a pointer
+ * to the entry and the level of the mapping.
+ *
+ * Note: We return pud and pmd either when the entry is marked large
+ * or when the present bit is not set. Otherwise we would return a
+ * pointer to a nonexisting mapping.
+ */
+pte_t *lookup_address(unsigned long address, unsigned int *level)
+{
+	return lookup_address_in_pgd(pgd_offset_k(address), address, level);
+}
+EXPORT_SYMBOL_GPL(lookup_address);
+
+static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
+				  unsigned int *level)
+{
+	if (cpa->pgd)
+		return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
+					       address, level);
+
+	return lookup_address(address, level);
+}
+
+/*
+ * Lookup the PMD entry for a virtual address. Return a pointer to the entry
+ * or NULL if not present.
+ */
+pmd_t *lookup_pmd_address(unsigned long address)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+
+	pgd = pgd_offset_k(address);
+	if (pgd_none(*pgd))
+		return NULL;
+
+	p4d = p4d_offset(pgd, address);
+	if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d))
+		return NULL;
+
+	pud = pud_offset(p4d, address);
+	if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
+		return NULL;
+
+	return pmd_offset(pud, address);
+}
+
+/*
+ * This is necessary because __pa() does not work on some
+ * kinds of memory, like vmalloc() or the alloc_remap()
+ * areas on 32-bit NUMA systems.  The percpu areas can
+ * end up in this kind of memory, for instance.
+ *
+ * This could be optimized, but it is only intended to be
+ * used at inititalization time, and keeping it
+ * unoptimized should increase the testing coverage for
+ * the more obscure platforms.
+ */
+phys_addr_t slow_virt_to_phys(void *__virt_addr)
+{
+	unsigned long virt_addr = (unsigned long)__virt_addr;
+	phys_addr_t phys_addr;
+	unsigned long offset;
+	enum pg_level level;
+	pte_t *pte;
+
+	pte = lookup_address(virt_addr, &level);
+	BUG_ON(!pte);
+
+	/*
+	 * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
+	 * before being left-shifted PAGE_SHIFT bits -- this trick is to
+	 * make 32-PAE kernel work correctly.
+	 */
+	switch (level) {
+	case PG_LEVEL_1G:
+		phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
+		offset = virt_addr & ~PUD_PAGE_MASK;
+		break;
+	case PG_LEVEL_2M:
+		phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
+		offset = virt_addr & ~PMD_PAGE_MASK;
+		break;
+	default:
+		phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
+		offset = virt_addr & ~PAGE_MASK;
+	}
+
+	return (phys_addr_t)(phys_addr | offset);
+}
+EXPORT_SYMBOL_GPL(slow_virt_to_phys);
+
+/*
+ * Set the new pmd in all the pgds we know about:
+ */
+static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
+{
+	/* change init_mm */
+	set_pte_atomic(kpte, pte);
+#ifdef CONFIG_X86_32
+	if (!SHARED_KERNEL_PMD) {
+		struct page *page;
+
+		list_for_each_entry(page, &pgd_list, lru) {
+			pgd_t *pgd;
+			p4d_t *p4d;
+			pud_t *pud;
+			pmd_t *pmd;
+
+			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+			p4d = p4d_offset(pgd, address);
+			pud = pud_offset(p4d, address);
+			pmd = pmd_offset(pud, address);
+			set_pte_atomic((pte_t *)pmd, pte);
+		}
+	}
+#endif
+}
+
+static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot)
+{
+	/*
+	 * _PAGE_GLOBAL means "global page" for present PTEs.
+	 * But, it is also used to indicate _PAGE_PROTNONE
+	 * for non-present PTEs.
+	 *
+	 * This ensures that a _PAGE_GLOBAL PTE going from
+	 * present to non-present is not confused as
+	 * _PAGE_PROTNONE.
+	 */
+	if (!(pgprot_val(prot) & _PAGE_PRESENT))
+		pgprot_val(prot) &= ~_PAGE_GLOBAL;
+
+	return prot;
+}
+
+static int __should_split_large_page(pte_t *kpte, unsigned long address,
+				     struct cpa_data *cpa)
+{
+	unsigned long numpages, pmask, psize, lpaddr, pfn, old_pfn;
+	pgprot_t old_prot, new_prot, req_prot, chk_prot;
+	pte_t new_pte, *tmp;
+	enum pg_level level;
+
+	/*
+	 * Check for races, another CPU might have split this page
+	 * up already:
+	 */
+	tmp = _lookup_address_cpa(cpa, address, &level);
+	if (tmp != kpte)
+		return 1;
+
+	switch (level) {
+	case PG_LEVEL_2M:
+		old_prot = pmd_pgprot(*(pmd_t *)kpte);
+		old_pfn = pmd_pfn(*(pmd_t *)kpte);
+		cpa_inc_2m_checked();
+		break;
+	case PG_LEVEL_1G:
+		old_prot = pud_pgprot(*(pud_t *)kpte);
+		old_pfn = pud_pfn(*(pud_t *)kpte);
+		cpa_inc_1g_checked();
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	psize = page_level_size(level);
+	pmask = page_level_mask(level);
+
+	/*
+	 * Calculate the number of pages, which fit into this large
+	 * page starting at address:
+	 */
+	lpaddr = (address + psize) & pmask;
+	numpages = (lpaddr - address) >> PAGE_SHIFT;
+	if (numpages < cpa->numpages)
+		cpa->numpages = numpages;
+
+	/*
+	 * We are safe now. Check whether the new pgprot is the same:
+	 * Convert protection attributes to 4k-format, as cpa->mask* are set
+	 * up accordingly.
+	 */
+
+	/* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */
+	req_prot = pgprot_large_2_4k(old_prot);
+
+	pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
+	pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
+
+	/*
+	 * req_prot is in format of 4k pages. It must be converted to large
+	 * page format: the caching mode includes the PAT bit located at
+	 * different bit positions in the two formats.
+	 */
+	req_prot = pgprot_4k_2_large(req_prot);
+	req_prot = pgprot_clear_protnone_bits(req_prot);
+	if (pgprot_val(req_prot) & _PAGE_PRESENT)
+		pgprot_val(req_prot) |= _PAGE_PSE;
+
+	/*
+	 * old_pfn points to the large page base pfn. So we need to add the
+	 * offset of the virtual address:
+	 */
+	pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
+	cpa->pfn = pfn;
+
+	/*
+	 * Calculate the large page base address and the number of 4K pages
+	 * in the large page
+	 */
+	lpaddr = address & pmask;
+	numpages = psize >> PAGE_SHIFT;
+
+	/*
+	 * Sanity check that the existing mapping is correct versus the static
+	 * protections. static_protections() guards against !PRESENT, so no
+	 * extra conditional required here.
+	 */
+	chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages,
+				      psize, CPA_CONFLICT);
+
+	if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) {
+		/*
+		 * Split the large page and tell the split code to
+		 * enforce static protections.
+		 */
+		cpa->force_static_prot = 1;
+		return 1;
+	}
+
+	/*
+	 * Optimization: If the requested pgprot is the same as the current
+	 * pgprot, then the large page can be preserved and no updates are
+	 * required independent of alignment and length of the requested
+	 * range. The above already established that the current pgprot is
+	 * correct, which in consequence makes the requested pgprot correct
+	 * as well if it is the same. The static protection scan below will
+	 * not come to a different conclusion.
+	 */
+	if (pgprot_val(req_prot) == pgprot_val(old_prot)) {
+		cpa_inc_lp_sameprot(level);
+		return 0;
+	}
+
+	/*
+	 * If the requested range does not cover the full page, split it up
+	 */
+	if (address != lpaddr || cpa->numpages != numpages)
+		return 1;
+
+	/*
+	 * Check whether the requested pgprot is conflicting with a static
+	 * protection requirement in the large page.
+	 */
+	new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
+				      psize, CPA_DETECT);
+
+	/*
+	 * If there is a conflict, split the large page.
+	 *
+	 * There used to be a 4k wise evaluation trying really hard to
+	 * preserve the large pages, but experimentation has shown, that this
+	 * does not help at all. There might be corner cases which would
+	 * preserve one large page occasionally, but it's really not worth the
+	 * extra code and cycles for the common case.
+	 */
+	if (pgprot_val(req_prot) != pgprot_val(new_prot))
+		return 1;
+
+	/* All checks passed. Update the large page mapping. */
+	new_pte = pfn_pte(old_pfn, new_prot);
+	__set_pmd_pte(kpte, address, new_pte);
+	cpa->flags |= CPA_FLUSHTLB;
+	cpa_inc_lp_preserved(level);
+	return 0;
+}
+
+static int should_split_large_page(pte_t *kpte, unsigned long address,
+				   struct cpa_data *cpa)
+{
+	int do_split;
+
+	if (cpa->force_split)
+		return 1;
+
+	spin_lock(&pgd_lock);
+	do_split = __should_split_large_page(kpte, address, cpa);
+	spin_unlock(&pgd_lock);
+
+	return do_split;
+}
+
+static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn,
+			  pgprot_t ref_prot, unsigned long address,
+			  unsigned long size)
+{
+	unsigned int npg = PFN_DOWN(size);
+	pgprot_t prot;
+
+	/*
+	 * If should_split_large_page() discovered an inconsistent mapping,
+	 * remove the invalid protection in the split mapping.
+	 */
+	if (!cpa->force_static_prot)
+		goto set;
+
+	/* Hand in lpsize = 0 to enforce the protection mechanism */
+	prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT);
+
+	if (pgprot_val(prot) == pgprot_val(ref_prot))
+		goto set;
+
+	/*
+	 * If this is splitting a PMD, fix it up. PUD splits cannot be
+	 * fixed trivially as that would require to rescan the newly
+	 * installed PMD mappings after returning from split_large_page()
+	 * so an eventual further split can allocate the necessary PTE
+	 * pages. Warn for now and revisit it in case this actually
+	 * happens.
+	 */
+	if (size == PAGE_SIZE)
+		ref_prot = prot;
+	else
+		pr_warn_once("CPA: Cannot fixup static protections for PUD split\n");
+set:
+	set_pte(pte, pfn_pte(pfn, ref_prot));
+}
+
+static int
+__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
+		   struct page *base)
+{
+	unsigned long lpaddr, lpinc, ref_pfn, pfn, pfninc = 1;
+	pte_t *pbase = (pte_t *)page_address(base);
+	unsigned int i, level;
+	pgprot_t ref_prot;
+	pte_t *tmp;
+
+	spin_lock(&pgd_lock);
+	/*
+	 * Check for races, another CPU might have split this page
+	 * up for us already:
+	 */
+	tmp = _lookup_address_cpa(cpa, address, &level);
+	if (tmp != kpte) {
+		spin_unlock(&pgd_lock);
+		return 1;
+	}
+
+	paravirt_alloc_pte(&init_mm, page_to_pfn(base));
+
+	switch (level) {
+	case PG_LEVEL_2M:
+		ref_prot = pmd_pgprot(*(pmd_t *)kpte);
+		/*
+		 * Clear PSE (aka _PAGE_PAT) and move
+		 * PAT bit to correct position.
+		 */
+		ref_prot = pgprot_large_2_4k(ref_prot);
+		ref_pfn = pmd_pfn(*(pmd_t *)kpte);
+		lpaddr = address & PMD_MASK;
+		lpinc = PAGE_SIZE;
+		break;
+
+	case PG_LEVEL_1G:
+		ref_prot = pud_pgprot(*(pud_t *)kpte);
+		ref_pfn = pud_pfn(*(pud_t *)kpte);
+		pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+		lpaddr = address & PUD_MASK;
+		lpinc = PMD_SIZE;
+		/*
+		 * Clear the PSE flags if the PRESENT flag is not set
+		 * otherwise pmd_present/pmd_huge will return true
+		 * even on a non present pmd.
+		 */
+		if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
+			pgprot_val(ref_prot) &= ~_PAGE_PSE;
+		break;
+
+	default:
+		spin_unlock(&pgd_lock);
+		return 1;
+	}
+
+	ref_prot = pgprot_clear_protnone_bits(ref_prot);
+
+	/*
+	 * Get the target pfn from the original entry:
+	 */
+	pfn = ref_pfn;
+	for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc, lpaddr += lpinc)
+		split_set_pte(cpa, pbase + i, pfn, ref_prot, lpaddr, lpinc);
+
+	if (virt_addr_valid(address)) {
+		unsigned long pfn = PFN_DOWN(__pa(address));
+
+		if (pfn_range_is_mapped(pfn, pfn + 1))
+			split_page_count(level);
+	}
+
+	/*
+	 * Install the new, split up pagetable.
+	 *
+	 * We use the standard kernel pagetable protections for the new
+	 * pagetable protections, the actual ptes set above control the
+	 * primary protection behavior:
+	 */
+	__set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
+
+	/*
+	 * Do a global flush tlb after splitting the large page
+	 * and before we do the actual change page attribute in the PTE.
+	 *
+	 * Without this, we violate the TLB application note, that says:
+	 * "The TLBs may contain both ordinary and large-page
+	 *  translations for a 4-KByte range of linear addresses. This
+	 *  may occur if software modifies the paging structures so that
+	 *  the page size used for the address range changes. If the two
+	 *  translations differ with respect to page frame or attributes
+	 *  (e.g., permissions), processor behavior is undefined and may
+	 *  be implementation-specific."
+	 *
+	 * We do this global tlb flush inside the cpa_lock, so that we
+	 * don't allow any other cpu, with stale tlb entries change the
+	 * page attribute in parallel, that also falls into the
+	 * just split large page entry.
+	 */
+	flush_tlb_all();
+	spin_unlock(&pgd_lock);
+
+	return 0;
+}
+
+static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
+			    unsigned long address)
+{
+	struct page *base;
+
+	if (!debug_pagealloc_enabled())
+		spin_unlock(&cpa_lock);
+	base = alloc_pages(GFP_KERNEL, 0);
+	if (!debug_pagealloc_enabled())
+		spin_lock(&cpa_lock);
+	if (!base)
+		return -ENOMEM;
+
+	if (__split_large_page(cpa, kpte, address, base))
+		__free_page(base);
+
+	return 0;
+}
+
+static bool try_to_free_pte_page(pte_t *pte)
+{
+	int i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		if (!pte_none(pte[i]))
+			return false;
+
+	free_page((unsigned long)pte);
+	return true;
+}
+
+static bool try_to_free_pmd_page(pmd_t *pmd)
+{
+	int i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++)
+		if (!pmd_none(pmd[i]))
+			return false;
+
+	free_page((unsigned long)pmd);
+	return true;
+}
+
+static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
+{
+	pte_t *pte = pte_offset_kernel(pmd, start);
+
+	while (start < end) {
+		set_pte(pte, __pte(0));
+
+		start += PAGE_SIZE;
+		pte++;
+	}
+
+	if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
+		pmd_clear(pmd);
+		return true;
+	}
+	return false;
+}
+
+static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
+			      unsigned long start, unsigned long end)
+{
+	if (unmap_pte_range(pmd, start, end))
+		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
+			pud_clear(pud);
+}
+
+static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
+{
+	pmd_t *pmd = pmd_offset(pud, start);
+
+	/*
+	 * Not on a 2MB page boundary?
+	 */
+	if (start & (PMD_SIZE - 1)) {
+		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
+		unsigned long pre_end = min_t(unsigned long, end, next_page);
+
+		__unmap_pmd_range(pud, pmd, start, pre_end);
+
+		start = pre_end;
+		pmd++;
+	}
+
+	/*
+	 * Try to unmap in 2M chunks.
+	 */
+	while (end - start >= PMD_SIZE) {
+		if (pmd_large(*pmd))
+			pmd_clear(pmd);
+		else
+			__unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
+
+		start += PMD_SIZE;
+		pmd++;
+	}
+
+	/*
+	 * 4K leftovers?
+	 */
+	if (start < end)
+		return __unmap_pmd_range(pud, pmd, start, end);
+
+	/*
+	 * Try again to free the PMD page if haven't succeeded above.
+	 */
+	if (!pud_none(*pud))
+		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
+			pud_clear(pud);
+}
+
+static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
+{
+	pud_t *pud = pud_offset(p4d, start);
+
+	/*
+	 * Not on a GB page boundary?
+	 */
+	if (start & (PUD_SIZE - 1)) {
+		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
+		unsigned long pre_end	= min_t(unsigned long, end, next_page);
+
+		unmap_pmd_range(pud, start, pre_end);
+
+		start = pre_end;
+		pud++;
+	}
+
+	/*
+	 * Try to unmap in 1G chunks?
+	 */
+	while (end - start >= PUD_SIZE) {
+
+		if (pud_large(*pud))
+			pud_clear(pud);
+		else
+			unmap_pmd_range(pud, start, start + PUD_SIZE);
+
+		start += PUD_SIZE;
+		pud++;
+	}
+
+	/*
+	 * 2M leftovers?
+	 */
+	if (start < end)
+		unmap_pmd_range(pud, start, end);
+
+	/*
+	 * No need to try to free the PUD page because we'll free it in
+	 * populate_pgd's error path
+	 */
+}
+
+static int alloc_pte_page(pmd_t *pmd)
+{
+	pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
+	if (!pte)
+		return -1;
+
+	set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
+	return 0;
+}
+
+static int alloc_pmd_page(pud_t *pud)
+{
+	pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+	if (!pmd)
+		return -1;
+
+	set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+	return 0;
+}
+
+static void populate_pte(struct cpa_data *cpa,
+			 unsigned long start, unsigned long end,
+			 unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
+{
+	pte_t *pte;
+
+	pte = pte_offset_kernel(pmd, start);
+
+	pgprot = pgprot_clear_protnone_bits(pgprot);
+
+	while (num_pages-- && start < end) {
+		set_pte(pte, pfn_pte(cpa->pfn, pgprot));
+
+		start	 += PAGE_SIZE;
+		cpa->pfn++;
+		pte++;
+	}
+}
+
+static long populate_pmd(struct cpa_data *cpa,
+			 unsigned long start, unsigned long end,
+			 unsigned num_pages, pud_t *pud, pgprot_t pgprot)
+{
+	long cur_pages = 0;
+	pmd_t *pmd;
+	pgprot_t pmd_pgprot;
+
+	/*
+	 * Not on a 2M boundary?
+	 */
+	if (start & (PMD_SIZE - 1)) {
+		unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
+		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
+
+		pre_end   = min_t(unsigned long, pre_end, next_page);
+		cur_pages = (pre_end - start) >> PAGE_SHIFT;
+		cur_pages = min_t(unsigned int, num_pages, cur_pages);
+
+		/*
+		 * Need a PTE page?
+		 */
+		pmd = pmd_offset(pud, start);
+		if (pmd_none(*pmd))
+			if (alloc_pte_page(pmd))
+				return -1;
+
+		populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
+
+		start = pre_end;
+	}
+
+	/*
+	 * We mapped them all?
+	 */
+	if (num_pages == cur_pages)
+		return cur_pages;
+
+	pmd_pgprot = pgprot_4k_2_large(pgprot);
+
+	while (end - start >= PMD_SIZE) {
+
+		/*
+		 * We cannot use a 1G page so allocate a PMD page if needed.
+		 */
+		if (pud_none(*pud))
+			if (alloc_pmd_page(pud))
+				return -1;
+
+		pmd = pmd_offset(pud, start);
+
+		set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn,
+					canon_pgprot(pmd_pgprot))));
+
+		start	  += PMD_SIZE;
+		cpa->pfn  += PMD_SIZE >> PAGE_SHIFT;
+		cur_pages += PMD_SIZE >> PAGE_SHIFT;
+	}
+
+	/*
+	 * Map trailing 4K pages.
+	 */
+	if (start < end) {
+		pmd = pmd_offset(pud, start);
+		if (pmd_none(*pmd))
+			if (alloc_pte_page(pmd))
+				return -1;
+
+		populate_pte(cpa, start, end, num_pages - cur_pages,
+			     pmd, pgprot);
+	}
+	return num_pages;
+}
+
+static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d,
+			pgprot_t pgprot)
+{
+	pud_t *pud;
+	unsigned long end;
+	long cur_pages = 0;
+	pgprot_t pud_pgprot;
+
+	end = start + (cpa->numpages << PAGE_SHIFT);
+
+	/*
+	 * Not on a Gb page boundary? => map everything up to it with
+	 * smaller pages.
+	 */
+	if (start & (PUD_SIZE - 1)) {
+		unsigned long pre_end;
+		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
+
+		pre_end   = min_t(unsigned long, end, next_page);
+		cur_pages = (pre_end - start) >> PAGE_SHIFT;
+		cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
+
+		pud = pud_offset(p4d, start);
+
+		/*
+		 * Need a PMD page?
+		 */
+		if (pud_none(*pud))
+			if (alloc_pmd_page(pud))
+				return -1;
+
+		cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
+					 pud, pgprot);
+		if (cur_pages < 0)
+			return cur_pages;
+
+		start = pre_end;
+	}
+
+	/* We mapped them all? */
+	if (cpa->numpages == cur_pages)
+		return cur_pages;
+
+	pud = pud_offset(p4d, start);
+	pud_pgprot = pgprot_4k_2_large(pgprot);
+
+	/*
+	 * Map everything starting from the Gb boundary, possibly with 1G pages
+	 */
+	while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) {
+		set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn,
+				   canon_pgprot(pud_pgprot))));
+
+		start	  += PUD_SIZE;
+		cpa->pfn  += PUD_SIZE >> PAGE_SHIFT;
+		cur_pages += PUD_SIZE >> PAGE_SHIFT;
+		pud++;
+	}
+
+	/* Map trailing leftover */
+	if (start < end) {
+		long tmp;
+
+		pud = pud_offset(p4d, start);
+		if (pud_none(*pud))
+			if (alloc_pmd_page(pud))
+				return -1;
+
+		tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
+				   pud, pgprot);
+		if (tmp < 0)
+			return cur_pages;
+
+		cur_pages += tmp;
+	}
+	return cur_pages;
+}
+
+/*
+ * Restrictions for kernel page table do not necessarily apply when mapping in
+ * an alternate PGD.
+ */
+static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
+{
+	pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
+	pud_t *pud = NULL;	/* shut up gcc */
+	p4d_t *p4d;
+	pgd_t *pgd_entry;
+	long ret;
+
+	pgd_entry = cpa->pgd + pgd_index(addr);
+
+	if (pgd_none(*pgd_entry)) {
+		p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
+		if (!p4d)
+			return -1;
+
+		set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE));
+	}
+
+	/*
+	 * Allocate a PUD page and hand it down for mapping.
+	 */
+	p4d = p4d_offset(pgd_entry, addr);
+	if (p4d_none(*p4d)) {
+		pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
+		if (!pud)
+			return -1;
+
+		set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+	}
+
+	pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
+	pgprot_val(pgprot) |=  pgprot_val(cpa->mask_set);
+
+	ret = populate_pud(cpa, addr, p4d, pgprot);
+	if (ret < 0) {
+		/*
+		 * Leave the PUD page in place in case some other CPU or thread
+		 * already found it, but remove any useless entries we just
+		 * added to it.
+		 */
+		unmap_pud_range(p4d, addr,
+				addr + (cpa->numpages << PAGE_SHIFT));
+		return ret;
+	}
+
+	cpa->numpages = ret;
+	return 0;
+}
+
+static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
+			       int primary)
+{
+	if (cpa->pgd) {
+		/*
+		 * Right now, we only execute this code path when mapping
+		 * the EFI virtual memory map regions, no other users
+		 * provide a ->pgd value. This may change in the future.
+		 */
+		return populate_pgd(cpa, vaddr);
+	}
+
+	/*
+	 * Ignore all non primary paths.
+	 */
+	if (!primary) {
+		cpa->numpages = 1;
+		return 0;
+	}
+
+	/*
+	 * Ignore the NULL PTE for kernel identity mapping, as it is expected
+	 * to have holes.
+	 * Also set numpages to '1' indicating that we processed cpa req for
+	 * one virtual address page and its pfn. TBD: numpages can be set based
+	 * on the initial value and the level returned by lookup_address().
+	 */
+	if (within(vaddr, PAGE_OFFSET,
+		   PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
+		cpa->numpages = 1;
+		cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
+		return 0;
+
+	} else if (__cpa_pfn_in_highmap(cpa->pfn)) {
+		/* Faults in the highmap are OK, so do not warn: */
+		return -EFAULT;
+	} else {
+		WARN(1, KERN_WARNING "CPA: called for zero pte. "
+			"vaddr = %lx cpa->vaddr = %lx\n", vaddr,
+			*cpa->vaddr);
+
+		return -EFAULT;
+	}
+}
+
+static int __change_page_attr(struct cpa_data *cpa, int primary)
+{
+	unsigned long address;
+	int do_split, err;
+	unsigned int level;
+	pte_t *kpte, old_pte;
+
+	address = __cpa_addr(cpa, cpa->curpage);
+repeat:
+	kpte = _lookup_address_cpa(cpa, address, &level);
+	if (!kpte)
+		return __cpa_process_fault(cpa, address, primary);
+
+	old_pte = *kpte;
+	if (pte_none(old_pte))
+		return __cpa_process_fault(cpa, address, primary);
+
+	if (level == PG_LEVEL_4K) {
+		pte_t new_pte;
+		pgprot_t new_prot = pte_pgprot(old_pte);
+		unsigned long pfn = pte_pfn(old_pte);
+
+		pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+		pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+
+		cpa_inc_4k_install();
+		/* Hand in lpsize = 0 to enforce the protection mechanism */
+		new_prot = static_protections(new_prot, address, pfn, 1, 0,
+					      CPA_PROTECT);
+
+		new_prot = pgprot_clear_protnone_bits(new_prot);
+
+		/*
+		 * We need to keep the pfn from the existing PTE,
+		 * after all we're only going to change it's attributes
+		 * not the memory it points to
+		 */
+		new_pte = pfn_pte(pfn, new_prot);
+		cpa->pfn = pfn;
+		/*
+		 * Do we really change anything ?
+		 */
+		if (pte_val(old_pte) != pte_val(new_pte)) {
+			set_pte_atomic(kpte, new_pte);
+			cpa->flags |= CPA_FLUSHTLB;
+		}
+		cpa->numpages = 1;
+		return 0;
+	}
+
+	/*
+	 * Check, whether we can keep the large page intact
+	 * and just change the pte:
+	 */
+	do_split = should_split_large_page(kpte, address, cpa);
+	/*
+	 * When the range fits into the existing large page,
+	 * return. cp->numpages and cpa->tlbflush have been updated in
+	 * try_large_page:
+	 */
+	if (do_split <= 0)
+		return do_split;
+
+	/*
+	 * We have to split the large page:
+	 */
+	err = split_large_page(cpa, kpte, address);
+	if (!err)
+		goto repeat;
+
+	return err;
+}
+
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
+
+static int cpa_process_alias(struct cpa_data *cpa)
+{
+	struct cpa_data alias_cpa;
+	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
+	unsigned long vaddr;
+	int ret;
+
+	if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1))
+		return 0;
+
+	/*
+	 * No need to redo, when the primary call touched the direct
+	 * mapping already:
+	 */
+	vaddr = __cpa_addr(cpa, cpa->curpage);
+	if (!(within(vaddr, PAGE_OFFSET,
+		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
+
+		alias_cpa = *cpa;
+		alias_cpa.vaddr = &laddr;
+		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+		alias_cpa.curpage = 0;
+
+		ret = __change_page_attr_set_clr(&alias_cpa, 0);
+		if (ret)
+			return ret;
+	}
+
+#ifdef CONFIG_X86_64
+	/*
+	 * If the primary call didn't touch the high mapping already
+	 * and the physical address is inside the kernel map, we need
+	 * to touch the high mapped kernel as well:
+	 */
+	if (!within(vaddr, (unsigned long)_text, _brk_end) &&
+	    __cpa_pfn_in_highmap(cpa->pfn)) {
+		unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
+					       __START_KERNEL_map - phys_base;
+		alias_cpa = *cpa;
+		alias_cpa.vaddr = &temp_cpa_vaddr;
+		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+		alias_cpa.curpage = 0;
+
+		/*
+		 * The high mapping range is imprecise, so ignore the
+		 * return value.
+		 */
+		__change_page_attr_set_clr(&alias_cpa, 0);
+	}
+#endif
+
+	return 0;
+}
+
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
+{
+	unsigned long numpages = cpa->numpages;
+	unsigned long rempages = numpages;
+	int ret = 0;
+
+	while (rempages) {
+		/*
+		 * Store the remaining nr of pages for the large page
+		 * preservation check.
+		 */
+		cpa->numpages = rempages;
+		/* for array changes, we can't use large page */
+		if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
+			cpa->numpages = 1;
+
+		if (!debug_pagealloc_enabled())
+			spin_lock(&cpa_lock);
+		ret = __change_page_attr(cpa, checkalias);
+		if (!debug_pagealloc_enabled())
+			spin_unlock(&cpa_lock);
+		if (ret)
+			goto out;
+
+		if (checkalias) {
+			ret = cpa_process_alias(cpa);
+			if (ret)
+				goto out;
+		}
+
+		/*
+		 * Adjust the number of pages with the result of the
+		 * CPA operation. Either a large page has been
+		 * preserved or a single page update happened.
+		 */
+		BUG_ON(cpa->numpages > rempages || !cpa->numpages);
+		rempages -= cpa->numpages;
+		cpa->curpage += cpa->numpages;
+	}
+
+out:
+	/* Restore the original numpages */
+	cpa->numpages = numpages;
+	return ret;
+}
+
+static int change_page_attr_set_clr(unsigned long *addr, int numpages,
+				    pgprot_t mask_set, pgprot_t mask_clr,
+				    int force_split, int in_flag,
+				    struct page **pages)
+{
+	struct cpa_data cpa;
+	int ret, cache, checkalias;
+
+	memset(&cpa, 0, sizeof(cpa));
+
+	/*
+	 * Check, if we are requested to set a not supported
+	 * feature.  Clearing non-supported features is OK.
+	 */
+	mask_set = canon_pgprot(mask_set);
+
+	if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
+		return 0;
+
+	/* Ensure we are PAGE_SIZE aligned */
+	if (in_flag & CPA_ARRAY) {
+		int i;
+		for (i = 0; i < numpages; i++) {
+			if (addr[i] & ~PAGE_MASK) {
+				addr[i] &= PAGE_MASK;
+				WARN_ON_ONCE(1);
+			}
+		}
+	} else if (!(in_flag & CPA_PAGES_ARRAY)) {
+		/*
+		 * in_flag of CPA_PAGES_ARRAY implies it is aligned.
+		 * No need to check in that case
+		 */
+		if (*addr & ~PAGE_MASK) {
+			*addr &= PAGE_MASK;
+			/*
+			 * People should not be passing in unaligned addresses:
+			 */
+			WARN_ON_ONCE(1);
+		}
+	}
+
+	/* Must avoid aliasing mappings in the highmem code */
+	kmap_flush_unused();
+
+	vm_unmap_aliases();
+
+	cpa.vaddr = addr;
+	cpa.pages = pages;
+	cpa.numpages = numpages;
+	cpa.mask_set = mask_set;
+	cpa.mask_clr = mask_clr;
+	cpa.flags = 0;
+	cpa.curpage = 0;
+	cpa.force_split = force_split;
+
+	if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
+		cpa.flags |= in_flag;
+
+	/* No alias checking for _NX bit modifications */
+	checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
+	/* Has caller explicitly disabled alias checking? */
+	if (in_flag & CPA_NO_CHECK_ALIAS)
+		checkalias = 0;
+
+	ret = __change_page_attr_set_clr(&cpa, checkalias);
+
+	/*
+	 * Check whether we really changed something:
+	 */
+	if (!(cpa.flags & CPA_FLUSHTLB))
+		goto out;
+
+	/*
+	 * No need to flush, when we did not set any of the caching
+	 * attributes:
+	 */
+	cache = !!pgprot2cachemode(mask_set);
+
+	/*
+	 * On error; flush everything to be sure.
+	 */
+	if (ret) {
+		cpa_flush_all(cache);
+		goto out;
+	}
+
+	cpa_flush(&cpa, cache);
+out:
+	return ret;
+}
+
+static inline int change_page_attr_set(unsigned long *addr, int numpages,
+				       pgprot_t mask, int array)
+{
+	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
+		(array ? CPA_ARRAY : 0), NULL);
+}
+
+static inline int change_page_attr_clear(unsigned long *addr, int numpages,
+					 pgprot_t mask, int array)
+{
+	return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
+		(array ? CPA_ARRAY : 0), NULL);
+}
+
+static inline int cpa_set_pages_array(struct page **pages, int numpages,
+				       pgprot_t mask)
+{
+	return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
+		CPA_PAGES_ARRAY, pages);
+}
+
+static inline int cpa_clear_pages_array(struct page **pages, int numpages,
+					 pgprot_t mask)
+{
+	return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
+		CPA_PAGES_ARRAY, pages);
+}
+
+int _set_memory_uc(unsigned long addr, int numpages)
+{
+	/*
+	 * for now UC MINUS. see comments in ioremap()
+	 * If you really need strong UC use ioremap_uc(), but note
+	 * that you cannot override IO areas with set_memory_*() as
+	 * these helpers cannot work with IO memory.
+	 */
+	return change_page_attr_set(&addr, numpages,
+				    cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
+				    0);
+}
+
+int set_memory_uc(unsigned long addr, int numpages)
+{
+	int ret;
+
+	/*
+	 * for now UC MINUS. see comments in ioremap()
+	 */
+	ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+			      _PAGE_CACHE_MODE_UC_MINUS, NULL);
+	if (ret)
+		goto out_err;
+
+	ret = _set_memory_uc(addr, numpages);
+	if (ret)
+		goto out_free;
+
+	return 0;
+
+out_free:
+	memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+out_err:
+	return ret;
+}
+EXPORT_SYMBOL(set_memory_uc);
+
+int _set_memory_wc(unsigned long addr, int numpages)
+{
+	int ret;
+
+	ret = change_page_attr_set(&addr, numpages,
+				   cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
+				   0);
+	if (!ret) {
+		ret = change_page_attr_set_clr(&addr, numpages,
+					       cachemode2pgprot(_PAGE_CACHE_MODE_WC),
+					       __pgprot(_PAGE_CACHE_MASK),
+					       0, 0, NULL);
+	}
+	return ret;
+}
+
+int set_memory_wc(unsigned long addr, int numpages)
+{
+	int ret;
+
+	ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+		_PAGE_CACHE_MODE_WC, NULL);
+	if (ret)
+		return ret;
+
+	ret = _set_memory_wc(addr, numpages);
+	if (ret)
+		memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+
+	return ret;
+}
+EXPORT_SYMBOL(set_memory_wc);
+
+int _set_memory_wt(unsigned long addr, int numpages)
+{
+	return change_page_attr_set(&addr, numpages,
+				    cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0);
+}
+
+int _set_memory_wb(unsigned long addr, int numpages)
+{
+	/* WB cache mode is hard wired to all cache attribute bits being 0 */
+	return change_page_attr_clear(&addr, numpages,
+				      __pgprot(_PAGE_CACHE_MASK), 0);
+}
+
+int set_memory_wb(unsigned long addr, int numpages)
+{
+	int ret;
+
+	ret = _set_memory_wb(addr, numpages);
+	if (ret)
+		return ret;
+
+	memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+	return 0;
+}
+EXPORT_SYMBOL(set_memory_wb);
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+	if (!(__supported_pte_mask & _PAGE_NX))
+		return 0;
+
+	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
+}
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+	if (!(__supported_pte_mask & _PAGE_NX))
+		return 0;
+
+	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
+}
+
+int set_memory_np(unsigned long addr, int numpages)
+{
+	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
+}
+
+int set_memory_np_noalias(unsigned long addr, int numpages)
+{
+	int cpa_flags = CPA_NO_CHECK_ALIAS;
+
+	return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
+					__pgprot(_PAGE_PRESENT), 0,
+					cpa_flags, NULL);
+}
+
+int set_memory_4k(unsigned long addr, int numpages)
+{
+	return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
+					__pgprot(0), 1, 0, NULL);
+}
+
+int set_memory_nonglobal(unsigned long addr, int numpages)
+{
+	return change_page_attr_clear(&addr, numpages,
+				      __pgprot(_PAGE_GLOBAL), 0);
+}
+
+int set_memory_global(unsigned long addr, int numpages)
+{
+	return change_page_attr_set(&addr, numpages,
+				    __pgprot(_PAGE_GLOBAL), 0);
+}
+
+static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
+{
+	struct cpa_data cpa;
+	int ret;
+
+	/* Nothing to do if memory encryption is not active */
+	if (!mem_encrypt_active())
+		return 0;
+
+	/* Should not be working on unaligned addresses */
+	if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
+		addr &= PAGE_MASK;
+
+	memset(&cpa, 0, sizeof(cpa));
+	cpa.vaddr = &addr;
+	cpa.numpages = numpages;
+	cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
+	cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
+	cpa.pgd = init_mm.pgd;
+
+	/* Must avoid aliasing mappings in the highmem code */
+	kmap_flush_unused();
+	vm_unmap_aliases();
+
+	/*
+	 * Before changing the encryption attribute, we need to flush caches.
+	 */
+	cpa_flush(&cpa, 1);
+
+	ret = __change_page_attr_set_clr(&cpa, 1);
+
+	/*
+	 * After changing the encryption attribute, we need to flush TLBs again
+	 * in case any speculative TLB caching occurred (but no need to flush
+	 * caches again).  We could just use cpa_flush_all(), but in case TLB
+	 * flushing gets optimized in the cpa_flush() path use the same logic
+	 * as above.
+	 */
+	cpa_flush(&cpa, 0);
+
+	return ret;
+}
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+	return __set_memory_enc_dec(addr, numpages, true);
+}
+EXPORT_SYMBOL_GPL(set_memory_encrypted);
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+	return __set_memory_enc_dec(addr, numpages, false);
+}
+EXPORT_SYMBOL_GPL(set_memory_decrypted);
+
+int set_pages_uc(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+
+	return set_memory_uc(addr, numpages);
+}
+EXPORT_SYMBOL(set_pages_uc);
+
+static int _set_pages_array(struct page **pages, int numpages,
+		enum page_cache_mode new_type)
+{
+	unsigned long start;
+	unsigned long end;
+	enum page_cache_mode set_type;
+	int i;
+	int free_idx;
+	int ret;
+
+	for (i = 0; i < numpages; i++) {
+		if (PageHighMem(pages[i]))
+			continue;
+		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
+		end = start + PAGE_SIZE;
+		if (memtype_reserve(start, end, new_type, NULL))
+			goto err_out;
+	}
+
+	/* If WC, set to UC- first and then WC */
+	set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
+				_PAGE_CACHE_MODE_UC_MINUS : new_type;
+
+	ret = cpa_set_pages_array(pages, numpages,
+				  cachemode2pgprot(set_type));
+	if (!ret && new_type == _PAGE_CACHE_MODE_WC)
+		ret = change_page_attr_set_clr(NULL, numpages,
+					       cachemode2pgprot(
+						_PAGE_CACHE_MODE_WC),
+					       __pgprot(_PAGE_CACHE_MASK),
+					       0, CPA_PAGES_ARRAY, pages);
+	if (ret)
+		goto err_out;
+	return 0; /* Success */
+err_out:
+	free_idx = i;
+	for (i = 0; i < free_idx; i++) {
+		if (PageHighMem(pages[i]))
+			continue;
+		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
+		end = start + PAGE_SIZE;
+		memtype_free(start, end);
+	}
+	return -EINVAL;
+}
+
+int set_pages_array_uc(struct page **pages, int numpages)
+{
+	return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_UC_MINUS);
+}
+EXPORT_SYMBOL(set_pages_array_uc);
+
+int set_pages_array_wc(struct page **pages, int numpages)
+{
+	return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WC);
+}
+EXPORT_SYMBOL(set_pages_array_wc);
+
+int set_pages_array_wt(struct page **pages, int numpages)
+{
+	return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT);
+}
+EXPORT_SYMBOL_GPL(set_pages_array_wt);
+
+int set_pages_wb(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+
+	return set_memory_wb(addr, numpages);
+}
+EXPORT_SYMBOL(set_pages_wb);
+
+int set_pages_array_wb(struct page **pages, int numpages)
+{
+	int retval;
+	unsigned long start;
+	unsigned long end;
+	int i;
+
+	/* WB cache mode is hard wired to all cache attribute bits being 0 */
+	retval = cpa_clear_pages_array(pages, numpages,
+			__pgprot(_PAGE_CACHE_MASK));
+	if (retval)
+		return retval;
+
+	for (i = 0; i < numpages; i++) {
+		if (PageHighMem(pages[i]))
+			continue;
+		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
+		end = start + PAGE_SIZE;
+		memtype_free(start, end);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(set_pages_array_wb);
+
+int set_pages_ro(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+
+	return set_memory_ro(addr, numpages);
+}
+
+int set_pages_rw(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+
+	return set_memory_rw(addr, numpages);
+}
+
+static int __set_pages_p(struct page *page, int numpages)
+{
+	unsigned long tempaddr = (unsigned long) page_address(page);
+	struct cpa_data cpa = { .vaddr = &tempaddr,
+				.pgd = NULL,
+				.numpages = numpages,
+				.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+				.mask_clr = __pgprot(0),
+				.flags = 0};
+
+	/*
+	 * No alias checking needed for setting present flag. otherwise,
+	 * we may need to break large pages for 64-bit kernel text
+	 * mappings (this adds to complexity if we want to do this from
+	 * atomic context especially). Let's keep it simple!
+	 */
+	return __change_page_attr_set_clr(&cpa, 0);
+}
+
+static int __set_pages_np(struct page *page, int numpages)
+{
+	unsigned long tempaddr = (unsigned long) page_address(page);
+	struct cpa_data cpa = { .vaddr = &tempaddr,
+				.pgd = NULL,
+				.numpages = numpages,
+				.mask_set = __pgprot(0),
+				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+				.flags = 0};
+
+	/*
+	 * No alias checking needed for setting not present flag. otherwise,
+	 * we may need to break large pages for 64-bit kernel text
+	 * mappings (this adds to complexity if we want to do this from
+	 * atomic context especially). Let's keep it simple!
+	 */
+	return __change_page_attr_set_clr(&cpa, 0);
+}
+
+int set_direct_map_invalid_noflush(struct page *page)
+{
+	return __set_pages_np(page, 1);
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+	return __set_pages_p(page, 1);
+}
+
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	if (PageHighMem(page))
+		return;
+	if (!enable) {
+		debug_check_no_locks_freed(page_address(page),
+					   numpages * PAGE_SIZE);
+	}
+
+	/*
+	 * The return value is ignored as the calls cannot fail.
+	 * Large pages for identity mappings are not used at boot time
+	 * and hence no memory allocations during large page split.
+	 */
+	if (enable)
+		__set_pages_p(page, numpages);
+	else
+		__set_pages_np(page, numpages);
+
+	/*
+	 * We should perform an IPI and flush all tlbs,
+	 * but that can deadlock->flush only current cpu.
+	 * Preemption needs to be disabled around __flush_tlb_all() due to
+	 * CR3 reload in __native_flush_tlb().
+	 */
+	preempt_disable();
+	__flush_tlb_all();
+	preempt_enable();
+
+	arch_flush_lazy_mmu_mode();
+}
+
+#ifdef CONFIG_HIBERNATION
+bool kernel_page_present(struct page *page)
+{
+	unsigned int level;
+	pte_t *pte;
+
+	if (PageHighMem(page))
+		return false;
+
+	pte = lookup_address((unsigned long)page_address(page), &level);
+	return (pte_val(*pte) & _PAGE_PRESENT);
+}
+#endif /* CONFIG_HIBERNATION */
+
+int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
+				   unsigned numpages, unsigned long page_flags)
+{
+	int retval = -EINVAL;
+
+	struct cpa_data cpa = {
+		.vaddr = &address,
+		.pfn = pfn,
+		.pgd = pgd,
+		.numpages = numpages,
+		.mask_set = __pgprot(0),
+		.mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)),
+		.flags = 0,
+	};
+
+	WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
+
+	if (!(__supported_pte_mask & _PAGE_NX))
+		goto out;
+
+	if (!(page_flags & _PAGE_ENC))
+		cpa.mask_clr = pgprot_encrypted(cpa.mask_clr);
+
+	cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
+
+	retval = __change_page_attr_set_clr(&cpa, 0);
+	__flush_tlb_all();
+
+out:
+	return retval;
+}
+
+/*
+ * __flush_tlb_all() flushes mappings only on current CPU and hence this
+ * function shouldn't be used in an SMP environment. Presently, it's used only
+ * during boot (way before smp_init()) by EFI subsystem and hence is ok.
+ */
+int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
+				     unsigned long numpages)
+{
+	int retval;
+
+	/*
+	 * The typical sequence for unmapping is to find a pte through
+	 * lookup_address_in_pgd() (ideally, it should never return NULL because
+	 * the address is already mapped) and change it's protections. As pfn is
+	 * the *target* of a mapping, it's not useful while unmapping.
+	 */
+	struct cpa_data cpa = {
+		.vaddr		= &address,
+		.pfn		= 0,
+		.pgd		= pgd,
+		.numpages	= numpages,
+		.mask_set	= __pgprot(0),
+		.mask_clr	= __pgprot(_PAGE_PRESENT | _PAGE_RW),
+		.flags		= 0,
+	};
+
+	WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
+
+	retval = __change_page_attr_set_clr(&cpa, 0);
+	__flush_tlb_all();
+
+	return retval;
+}
+
+/*
+ * The testcases use internal knowledge of the implementation that shouldn't
+ * be exposed to the rest of the kernel. Include these directly here.
+ */
+#ifdef CONFIG_CPA_DEBUG
+#include "cpa-test.c"
+#endif
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h
deleted file mode 100644
index eeb5caeb089b..000000000000
--- a/arch/x86/mm/pat_internal.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PAT_INTERNAL_H_
-#define __PAT_INTERNAL_H_
-
-extern int pat_debug_enable;
-
-#define dprintk(fmt, arg...) \
-	do { if (pat_debug_enable) pr_info("x86/PAT: " fmt, ##arg); } while (0)
-
-struct memtype {
-	u64			start;
-	u64			end;
-	u64			subtree_max_end;
-	enum page_cache_mode	type;
-	struct rb_node		rb;
-};
-
-static inline char *cattr_name(enum page_cache_mode pcm)
-{
-	switch (pcm) {
-	case _PAGE_CACHE_MODE_UC:		return "uncached";
-	case _PAGE_CACHE_MODE_UC_MINUS:		return "uncached-minus";
-	case _PAGE_CACHE_MODE_WB:		return "write-back";
-	case _PAGE_CACHE_MODE_WC:		return "write-combining";
-	case _PAGE_CACHE_MODE_WT:		return "write-through";
-	case _PAGE_CACHE_MODE_WP:		return "write-protected";
-	default:				return "broken";
-	}
-}
-
-#ifdef CONFIG_X86_PAT
-extern int rbt_memtype_check_insert(struct memtype *new,
-					enum page_cache_mode *new_type);
-extern struct memtype *rbt_memtype_erase(u64 start, u64 end);
-extern struct memtype *rbt_memtype_lookup(u64 addr);
-extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos);
-#else
-static inline int rbt_memtype_check_insert(struct memtype *new,
-					enum page_cache_mode *new_type)
-{ return 0; }
-static inline struct memtype *rbt_memtype_erase(u64 start, u64 end)
-{ return NULL; }
-static inline struct memtype *rbt_memtype_lookup(u64 addr)
-{ return NULL; }
-static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
-{ return 0; }
-#endif
-
-#endif /* __PAT_INTERNAL_H_ */
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
deleted file mode 100644
index 65ebe4b88f7c..000000000000
--- a/arch/x86/mm/pat_rbtree.c
+++ /dev/null
@@ -1,268 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Handle caching attributes in page tables (PAT)
- *
- * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- *          Suresh B Siddha <suresh.b.siddha@intel.com>
- *
- * Interval tree (augmented rbtree) used to store the PAT memory type
- * reservations.
- */
-
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/kernel.h>
-#include <linux/rbtree_augmented.h>
-#include <linux/sched.h>
-#include <linux/gfp.h>
-
-#include <asm/pgtable.h>
-#include <asm/pat.h>
-
-#include "pat_internal.h"
-
-/*
- * The memtype tree keeps track of memory type for specific
- * physical memory areas. Without proper tracking, conflicting memory
- * types in different mappings can cause CPU cache corruption.
- *
- * The tree is an interval tree (augmented rbtree) with tree ordered
- * on starting address. Tree can contain multiple entries for
- * different regions which overlap. All the aliases have the same
- * cache attributes of course.
- *
- * memtype_lock protects the rbtree.
- */
-
-static struct rb_root memtype_rbroot = RB_ROOT;
-
-static int is_node_overlap(struct memtype *node, u64 start, u64 end)
-{
-	if (node->start >= end || node->end <= start)
-		return 0;
-
-	return 1;
-}
-
-static u64 get_subtree_max_end(struct rb_node *node)
-{
-	u64 ret = 0;
-	if (node) {
-		struct memtype *data = rb_entry(node, struct memtype, rb);
-		ret = data->subtree_max_end;
-	}
-	return ret;
-}
-
-#define NODE_END(node) ((node)->end)
-
-RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb,
-			 struct memtype, rb, u64, subtree_max_end, NODE_END)
-
-/* Find the first (lowest start addr) overlapping range from rb tree */
-static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
-				u64 start, u64 end)
-{
-	struct rb_node *node = root->rb_node;
-	struct memtype *last_lower = NULL;
-
-	while (node) {
-		struct memtype *data = rb_entry(node, struct memtype, rb);
-
-		if (get_subtree_max_end(node->rb_left) > start) {
-			/* Lowest overlap if any must be on left side */
-			node = node->rb_left;
-		} else if (is_node_overlap(data, start, end)) {
-			last_lower = data;
-			break;
-		} else if (start >= data->start) {
-			/* Lowest overlap if any must be on right side */
-			node = node->rb_right;
-		} else {
-			break;
-		}
-	}
-	return last_lower; /* Returns NULL if there is no overlap */
-}
-
-enum {
-	MEMTYPE_EXACT_MATCH	= 0,
-	MEMTYPE_END_MATCH	= 1
-};
-
-static struct memtype *memtype_rb_match(struct rb_root *root,
-				u64 start, u64 end, int match_type)
-{
-	struct memtype *match;
-
-	match = memtype_rb_lowest_match(root, start, end);
-	while (match != NULL && match->start < end) {
-		struct rb_node *node;
-
-		if ((match_type == MEMTYPE_EXACT_MATCH) &&
-		    (match->start == start) && (match->end == end))
-			return match;
-
-		if ((match_type == MEMTYPE_END_MATCH) &&
-		    (match->start < start) && (match->end == end))
-			return match;
-
-		node = rb_next(&match->rb);
-		if (node)
-			match = rb_entry(node, struct memtype, rb);
-		else
-			match = NULL;
-	}
-
-	return NULL; /* Returns NULL if there is no match */
-}
-
-static int memtype_rb_check_conflict(struct rb_root *root,
-				u64 start, u64 end,
-				enum page_cache_mode reqtype,
-				enum page_cache_mode *newtype)
-{
-	struct rb_node *node;
-	struct memtype *match;
-	enum page_cache_mode found_type = reqtype;
-
-	match = memtype_rb_lowest_match(&memtype_rbroot, start, end);
-	if (match == NULL)
-		goto success;
-
-	if (match->type != found_type && newtype == NULL)
-		goto failure;
-
-	dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
-	found_type = match->type;
-
-	node = rb_next(&match->rb);
-	while (node) {
-		match = rb_entry(node, struct memtype, rb);
-
-		if (match->start >= end) /* Checked all possible matches */
-			goto success;
-
-		if (is_node_overlap(match, start, end) &&
-		    match->type != found_type) {
-			goto failure;
-		}
-
-		node = rb_next(&match->rb);
-	}
-success:
-	if (newtype)
-		*newtype = found_type;
-
-	return 0;
-
-failure:
-	pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
-		current->comm, current->pid, start, end,
-		cattr_name(found_type), cattr_name(match->type));
-	return -EBUSY;
-}
-
-static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
-{
-	struct rb_node **node = &(root->rb_node);
-	struct rb_node *parent = NULL;
-
-	while (*node) {
-		struct memtype *data = rb_entry(*node, struct memtype, rb);
-
-		parent = *node;
-		if (data->subtree_max_end < newdata->end)
-			data->subtree_max_end = newdata->end;
-		if (newdata->start <= data->start)
-			node = &((*node)->rb_left);
-		else if (newdata->start > data->start)
-			node = &((*node)->rb_right);
-	}
-
-	newdata->subtree_max_end = newdata->end;
-	rb_link_node(&newdata->rb, parent, node);
-	rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb);
-}
-
-int rbt_memtype_check_insert(struct memtype *new,
-			     enum page_cache_mode *ret_type)
-{
-	int err = 0;
-
-	err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end,
-						new->type, ret_type);
-
-	if (!err) {
-		if (ret_type)
-			new->type = *ret_type;
-
-		new->subtree_max_end = new->end;
-		memtype_rb_insert(&memtype_rbroot, new);
-	}
-	return err;
-}
-
-struct memtype *rbt_memtype_erase(u64 start, u64 end)
-{
-	struct memtype *data;
-
-	/*
-	 * Since the memtype_rbroot tree allows overlapping ranges,
-	 * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
-	 * a whole node for the munmap case.  If no such entry is found,
-	 * it then checks with END_MATCH, i.e. shrink the size of a node
-	 * from the end for the mremap case.
-	 */
-	data = memtype_rb_match(&memtype_rbroot, start, end,
-				MEMTYPE_EXACT_MATCH);
-	if (!data) {
-		data = memtype_rb_match(&memtype_rbroot, start, end,
-					MEMTYPE_END_MATCH);
-		if (!data)
-			return ERR_PTR(-EINVAL);
-	}
-
-	if (data->start == start) {
-		/* munmap: erase this node */
-		rb_erase_augmented(&data->rb, &memtype_rbroot,
-					&memtype_rb_augment_cb);
-	} else {
-		/* mremap: update the end value of this node */
-		rb_erase_augmented(&data->rb, &memtype_rbroot,
-					&memtype_rb_augment_cb);
-		data->end = start;
-		data->subtree_max_end = data->end;
-		memtype_rb_insert(&memtype_rbroot, data);
-		return NULL;
-	}
-
-	return data;
-}
-
-struct memtype *rbt_memtype_lookup(u64 addr)
-{
-	return memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
-}
-
-#if defined(CONFIG_DEBUG_FS)
-int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
-{
-	struct rb_node *node;
-	int i = 1;
-
-	node = rb_first(&memtype_rbroot);
-	while (node && pos != i) {
-		node = rb_next(node);
-		i++;
-	}
-
-	if (node) { /* pos == i */
-		struct memtype *this = rb_entry(node, struct memtype, rb);
-		*out = *this;
-		return 0;
-	} else {
-		return 1;
-	}
-}
-#endif
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 3e4b9035bb9a..7bd2c3a52297 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -643,8 +643,8 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
 	fixmaps_set++;
 }
 
-void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
-		       pgprot_t flags)
+void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
+		       phys_addr_t phys, pgprot_t flags)
 {
 	/* Sanitize 'prot' against any unsupported bits: */
 	pgprot_val(flags) &= __default_kernel_pte_mask;
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 9bb7f0ab9fe6..0e6700eaa4f9 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -18,6 +18,7 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/io.h>
+#include <linux/vmalloc.h>
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index bdc98150d4db..fc3f3d3e2ef2 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -5,6 +5,7 @@
 #include <linux/mm.h>
 
 #include <asm/page.h>
+#include <linux/vmalloc.h>
 
 #include "physaddr.h"
 
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 7f2140414440..44a9f068eee0 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -574,7 +574,7 @@ static void pti_clone_kernel_text(void)
 	 */
 	unsigned long start = PFN_ALIGN(_text);
 	unsigned long end_clone  = (unsigned long)__end_rodata_aligned;
-	unsigned long end_global = PFN_ALIGN((unsigned long)__stop___ex_table);
+	unsigned long end_global = PFN_ALIGN((unsigned long)_etext);
 
 	if (!pti_kernel_image_global_ok())
 		return;
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index a8bd952e136d..bda73cb7a044 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -79,7 +79,7 @@ static void do_read_far_test(void __iomem *p)
 
 static void do_test(unsigned long size)
 {
-	void __iomem *p = ioremap_nocache(mmio_address, size);
+	void __iomem *p = ioremap(mmio_address, size);
 	if (!p) {
 		pr_err("could not ioremap, aborting.\n");
 		return;
@@ -104,7 +104,7 @@ static void do_test_bulk_ioremapping(void)
 	int i;
 
 	for (i = 0; i < 10; ++i) {
-		p = ioremap_nocache(mmio_address, PAGE_SIZE);
+		p = ioremap(mmio_address, PAGE_SIZE);
 		if (p)
 			iounmap(p);
 	}
@@ -127,9 +127,9 @@ static int __init init(void)
 		return -ENXIO;
 	}
 
-	pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
-		   "and writing 16 kB of rubbish in there.\n",
-		   size >> 10, mmio_address);
+	pr_warn("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
+		"and writing 16 kB of rubbish in there.\n",
+		size >> 10, mmio_address);
 	do_test(size);
 	do_test_bulk_ioremapping();
 	pr_info("All done.\n");
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e6a9edc5baaf..66f96f21a7b6 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -708,7 +708,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 			       (void *)info, 1);
 	else
 		on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
-				(void *)info, 1, GFP_ATOMIC, cpumask);
+				(void *)info, 1, cpumask);
 }
 
 /*
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 991549a1c5f3..9ba08e9abc09 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -9,9 +9,13 @@
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
 #include <linux/bpf.h>
-
+#include <linux/memory.h>
+#include <linux/sort.h>
+#include <asm/extable.h>
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
+#include <asm/text-patching.h>
+#include <asm/asm-prototypes.h>
 
 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
@@ -96,6 +100,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 
 /* Pick a register outside of BPF range for JIT internal work */
 #define AUX_REG (MAX_BPF_JIT_REG + 1)
+#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
 
 /*
  * The following table maps BPF registers to x86-64 registers.
@@ -104,8 +109,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
  * register in load/store instructions, it always needs an
  * extra byte of encoding and is callee saved.
  *
- * Also x86-64 register R9 is unused. x86-64 register R10 is
- * used for blinding (if enabled).
+ * x86-64 register R9 is not used by BPF programs, but can be used by BPF
+ * trampoline. x86-64 register R10 is used for blinding (if enabled).
  */
 static const int reg2hex[] = {
 	[BPF_REG_0] = 0,  /* RAX */
@@ -121,6 +126,20 @@ static const int reg2hex[] = {
 	[BPF_REG_FP] = 5, /* RBP readonly */
 	[BPF_REG_AX] = 2, /* R10 temp register */
 	[AUX_REG] = 3,    /* R11 temp register */
+	[X86_REG_R9] = 1, /* R9 register, 6th function argument */
+};
+
+static const int reg2pt_regs[] = {
+	[BPF_REG_0] = offsetof(struct pt_regs, ax),
+	[BPF_REG_1] = offsetof(struct pt_regs, di),
+	[BPF_REG_2] = offsetof(struct pt_regs, si),
+	[BPF_REG_3] = offsetof(struct pt_regs, dx),
+	[BPF_REG_4] = offsetof(struct pt_regs, cx),
+	[BPF_REG_5] = offsetof(struct pt_regs, r8),
+	[BPF_REG_6] = offsetof(struct pt_regs, bx),
+	[BPF_REG_7] = offsetof(struct pt_regs, r13),
+	[BPF_REG_8] = offsetof(struct pt_regs, r14),
+	[BPF_REG_9] = offsetof(struct pt_regs, r15),
 };
 
 /*
@@ -135,6 +154,7 @@ static bool is_ereg(u32 reg)
 			     BIT(BPF_REG_7) |
 			     BIT(BPF_REG_8) |
 			     BIT(BPF_REG_9) |
+			     BIT(X86_REG_R9) |
 			     BIT(BPF_REG_AX));
 }
 
@@ -186,7 +206,10 @@ struct jit_context {
 #define BPF_MAX_INSN_SIZE	128
 #define BPF_INSN_SAFETY		64
 
-#define PROLOGUE_SIZE		20
+/* Number of bytes emit_patch() needs to generate instructions */
+#define X86_PATCH_SIZE		5
+
+#define PROLOGUE_SIZE		25
 
 /*
  * Emit x86-64 prologue code for BPF program and check its size.
@@ -195,8 +218,13 @@ struct jit_context {
 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
+	int cnt = X86_PATCH_SIZE;
 
+	/* BPF trampoline can be made to work without these nops,
+	 * but let's waste 5 bytes for now and optimize later
+	 */
+	memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
+	prog += cnt;
 	EMIT1(0x55);             /* push rbp */
 	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
 	/* sub rsp, rounded_stack_depth */
@@ -213,6 +241,89 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 	*pprog = prog;
 }
 
+static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+	s64 offset;
+
+	offset = func - (ip + X86_PATCH_SIZE);
+	if (!is_simm32(offset)) {
+		pr_err("Target call %p is out of range\n", func);
+		return -ERANGE;
+	}
+	EMIT1_off32(opcode, offset);
+	*pprog = prog;
+	return 0;
+}
+
+static int emit_call(u8 **pprog, void *func, void *ip)
+{
+	return emit_patch(pprog, func, ip, 0xE8);
+}
+
+static int emit_jump(u8 **pprog, void *func, void *ip)
+{
+	return emit_patch(pprog, func, ip, 0xE9);
+}
+
+static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+				void *old_addr, void *new_addr,
+				const bool text_live)
+{
+	const u8 *nop_insn = ideal_nops[NOP_ATOMIC5];
+	u8 old_insn[X86_PATCH_SIZE];
+	u8 new_insn[X86_PATCH_SIZE];
+	u8 *prog;
+	int ret;
+
+	memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
+	if (old_addr) {
+		prog = old_insn;
+		ret = t == BPF_MOD_CALL ?
+		      emit_call(&prog, old_addr, ip) :
+		      emit_jump(&prog, old_addr, ip);
+		if (ret)
+			return ret;
+	}
+
+	memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
+	if (new_addr) {
+		prog = new_insn;
+		ret = t == BPF_MOD_CALL ?
+		      emit_call(&prog, new_addr, ip) :
+		      emit_jump(&prog, new_addr, ip);
+		if (ret)
+			return ret;
+	}
+
+	ret = -EBUSY;
+	mutex_lock(&text_mutex);
+	if (memcmp(ip, old_insn, X86_PATCH_SIZE))
+		goto out;
+	if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
+		if (text_live)
+			text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
+		else
+			memcpy(ip, new_insn, X86_PATCH_SIZE);
+	}
+	ret = 0;
+out:
+	mutex_unlock(&text_mutex);
+	return ret;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+		       void *old_addr, void *new_addr)
+{
+	if (!is_kernel_text((long)ip) &&
+	    !is_bpf_text_address((long)ip))
+		/* BPF poking in modules is not supported */
+		return -EINVAL;
+
+	return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+}
+
 /*
  * Generate the following code:
  *
@@ -227,7 +338,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
  *   goto *(prog->bpf_func + prologue_size);
  * out:
  */
-static void emit_bpf_tail_call(u8 **pprog)
+static void emit_bpf_tail_call_indirect(u8 **pprog)
 {
 	u8 *prog = *pprog;
 	int label1, label2, label3;
@@ -294,6 +405,68 @@ static void emit_bpf_tail_call(u8 **pprog)
 	*pprog = prog;
 }
 
+static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+				      u8 **pprog, int addr, u8 *image)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	/*
+	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+	 *	goto out;
+	 */
+	EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);         /* cmp eax, MAX_TAIL_CALL_CNT */
+	EMIT2(X86_JA, 14);                            /* ja out */
+	EMIT3(0x83, 0xC0, 0x01);                      /* add eax, 1 */
+	EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+
+	poke->ip = image + (addr - X86_PATCH_SIZE);
+	poke->adj_off = PROLOGUE_SIZE;
+
+	memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
+	prog += X86_PATCH_SIZE;
+	/* out: */
+
+	*pprog = prog;
+}
+
+static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
+{
+	struct bpf_jit_poke_descriptor *poke;
+	struct bpf_array *array;
+	struct bpf_prog *target;
+	int i, ret;
+
+	for (i = 0; i < prog->aux->size_poke_tab; i++) {
+		poke = &prog->aux->poke_tab[i];
+		WARN_ON_ONCE(READ_ONCE(poke->ip_stable));
+
+		if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
+			continue;
+
+		array = container_of(poke->tail_call.map, struct bpf_array, map);
+		mutex_lock(&array->aux->poke_mutex);
+		target = array->ptrs[poke->tail_call.key];
+		if (target) {
+			/* Plain memcpy is used when image is not live yet
+			 * and still not locked as read-only. Once poke
+			 * location is active (poke->ip_stable), any parallel
+			 * bpf_arch_text_poke() might occur still on the
+			 * read-write image until we finally locked it as
+			 * read-only. Both modifications on the given image
+			 * are under text_mutex to avoid interference.
+			 */
+			ret = __bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP, NULL,
+						   (u8 *)target->bpf_func +
+						   poke->adj_off, false);
+			BUG_ON(ret < 0);
+		}
+		WRITE_ONCE(poke->ip_stable, true);
+		mutex_unlock(&array->aux->poke_mutex);
+	}
+}
+
 static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
 			   u32 dst_reg, const u32 imm32)
 {
@@ -377,6 +550,96 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
 	*pprog = prog;
 }
 
+/* LDX: dst_reg = *(u8*)(src_reg + off) */
+static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	switch (size) {
+	case BPF_B:
+		/* Emit 'movzx rax, byte ptr [rax + off]' */
+		EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
+		break;
+	case BPF_H:
+		/* Emit 'movzx rax, word ptr [rax + off]' */
+		EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
+		break;
+	case BPF_W:
+		/* Emit 'mov eax, dword ptr [rax+0x14]' */
+		if (is_ereg(dst_reg) || is_ereg(src_reg))
+			EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
+		else
+			EMIT1(0x8B);
+		break;
+	case BPF_DW:
+		/* Emit 'mov rax, qword ptr [rax+0x14]' */
+		EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
+		break;
+	}
+	/*
+	 * If insn->off == 0 we can save one extra byte, but
+	 * special case of x86 R13 which always needs an offset
+	 * is not worth the hassle
+	 */
+	if (is_imm8(off))
+		EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
+	else
+		EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+	*pprog = prog;
+}
+
+/* STX: *(u8*)(dst_reg + off) = src_reg */
+static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	switch (size) {
+	case BPF_B:
+		/* Emit 'mov byte ptr [rax + off], al' */
+		if (is_ereg(dst_reg) || is_ereg(src_reg) ||
+		    /* We have to add extra byte for x86 SIL, DIL regs */
+		    src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+			EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
+		else
+			EMIT1(0x88);
+		break;
+	case BPF_H:
+		if (is_ereg(dst_reg) || is_ereg(src_reg))
+			EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
+		else
+			EMIT2(0x66, 0x89);
+		break;
+	case BPF_W:
+		if (is_ereg(dst_reg) || is_ereg(src_reg))
+			EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
+		else
+			EMIT1(0x89);
+		break;
+	case BPF_DW:
+		EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
+		break;
+	}
+	if (is_imm8(off))
+		EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
+	else
+		EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+	*pprog = prog;
+}
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+			   struct pt_regs *regs, int trapnr,
+			   unsigned long error_code, unsigned long fault_addr)
+{
+	u32 reg = x->fixup >> 8;
+
+	/* jump over faulting load and clear dest register */
+	*(unsigned long *)((void *)regs + reg) = 0;
+	regs->ip += x->fixup & 0xff;
+	return true;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
@@ -384,7 +647,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 	int insn_cnt = bpf_prog->len;
 	bool seen_exit = false;
 	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
-	int i, cnt = 0;
+	int i, cnt = 0, excnt = 0;
 	int proglen = 0;
 	u8 *prog = temp;
 
@@ -747,64 +1010,64 @@ st:			if (is_imm8(insn->off))
 
 			/* STX: *(u8*)(dst_reg + off) = src_reg */
 		case BPF_STX | BPF_MEM | BPF_B:
-			/* Emit 'mov byte ptr [rax + off], al' */
-			if (is_ereg(dst_reg) || is_ereg(src_reg) ||
-			    /* We have to add extra byte for x86 SIL, DIL regs */
-			    src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
-				EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
-			else
-				EMIT1(0x88);
-			goto stx;
 		case BPF_STX | BPF_MEM | BPF_H:
-			if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
-			else
-				EMIT2(0x66, 0x89);
-			goto stx;
 		case BPF_STX | BPF_MEM | BPF_W:
-			if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
-			else
-				EMIT1(0x89);
-			goto stx;
 		case BPF_STX | BPF_MEM | BPF_DW:
-			EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
-stx:			if (is_imm8(insn->off))
-				EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
-			else
-				EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
-					    insn->off);
+			emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
 			break;
 
 			/* LDX: dst_reg = *(u8*)(src_reg + off) */
 		case BPF_LDX | BPF_MEM | BPF_B:
-			/* Emit 'movzx rax, byte ptr [rax + off]' */
-			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
-			goto ldx;
+		case BPF_LDX | BPF_PROBE_MEM | BPF_B:
 		case BPF_LDX | BPF_MEM | BPF_H:
-			/* Emit 'movzx rax, word ptr [rax + off]' */
-			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
-			goto ldx;
+		case BPF_LDX | BPF_PROBE_MEM | BPF_H:
 		case BPF_LDX | BPF_MEM | BPF_W:
-			/* Emit 'mov eax, dword ptr [rax+0x14]' */
-			if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
-			else
-				EMIT1(0x8B);
-			goto ldx;
+		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
 		case BPF_LDX | BPF_MEM | BPF_DW:
-			/* Emit 'mov rax, qword ptr [rax+0x14]' */
-			EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
-ldx:			/*
-			 * If insn->off == 0 we can save one extra byte, but
-			 * special case of x86 R13 which always needs an offset
-			 * is not worth the hassle
-			 */
-			if (is_imm8(insn->off))
-				EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
-			else
-				EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
-					    insn->off);
+		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+			emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+				struct exception_table_entry *ex;
+				u8 *_insn = image + proglen;
+				s64 delta;
+
+				if (!bpf_prog->aux->extable)
+					break;
+
+				if (excnt >= bpf_prog->aux->num_exentries) {
+					pr_err("ex gen bug\n");
+					return -EFAULT;
+				}
+				ex = &bpf_prog->aux->extable[excnt++];
+
+				delta = _insn - (u8 *)&ex->insn;
+				if (!is_simm32(delta)) {
+					pr_err("extable->insn doesn't fit into 32-bit\n");
+					return -EFAULT;
+				}
+				ex->insn = delta;
+
+				delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+				if (!is_simm32(delta)) {
+					pr_err("extable->handler doesn't fit into 32-bit\n");
+					return -EFAULT;
+				}
+				ex->handler = delta;
+
+				if (dst_reg > BPF_REG_9) {
+					pr_err("verifier error\n");
+					return -EFAULT;
+				}
+				/*
+				 * Compute size of x86 insn and its target dest x86 register.
+				 * ex_handler_bpf() will use lower 8 bits to adjust
+				 * pt_regs->ip to jump over this x86 instruction
+				 * and upper bits to figure out which pt_regs to zero out.
+				 * End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
+				 * of 4 bytes will be ignored and rbx will be zero inited.
+				 */
+				ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8);
+			}
 			break;
 
 			/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
@@ -827,17 +1090,16 @@ xadd:			if (is_imm8(insn->off))
 			/* call */
 		case BPF_JMP | BPF_CALL:
 			func = (u8 *) __bpf_call_base + imm32;
-			jmp_offset = func - (image + addrs[i]);
-			if (!imm32 || !is_simm32(jmp_offset)) {
-				pr_err("unsupported BPF func %d addr %p image %p\n",
-				       imm32, func, image);
+			if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
 				return -EINVAL;
-			}
-			EMIT1_off32(0xE8, jmp_offset);
 			break;
 
 		case BPF_JMP | BPF_TAIL_CALL:
-			emit_bpf_tail_call(&prog);
+			if (imm32)
+				emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
+							  &prog, addrs[i], image);
+			else
+				emit_bpf_tail_call_indirect(&prog);
 			break;
 
 			/* cond jump */
@@ -909,6 +1171,16 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP32 | BPF_JSLT | BPF_K:
 		case BPF_JMP32 | BPF_JSGE | BPF_K:
 		case BPF_JMP32 | BPF_JSLE | BPF_K:
+			/* test dst_reg, dst_reg to save one extra byte */
+			if (imm32 == 0) {
+				if (BPF_CLASS(insn->code) == BPF_JMP)
+					EMIT1(add_2mod(0x48, dst_reg, dst_reg));
+				else if (is_ereg(dst_reg))
+					EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+				EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
+				goto emit_cond_jmp;
+			}
+
 			/* cmp dst_reg, imm8/32 */
 			if (BPF_CLASS(insn->code) == BPF_JMP)
 				EMIT1(add_1mod(0x48, dst_reg));
@@ -1048,9 +1320,364 @@ emit_jmp:
 		addrs[i] = proglen;
 		prog = temp;
 	}
+
+	if (image && excnt != bpf_prog->aux->num_exentries) {
+		pr_err("extable is not populated\n");
+		return -EFAULT;
+	}
 	return proglen;
 }
 
+static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
+		      int stack_size)
+{
+	int i;
+	/* Store function arguments to stack.
+	 * For a function that accepts two pointers the sequence will be:
+	 * mov QWORD PTR [rbp-0x10],rdi
+	 * mov QWORD PTR [rbp-0x8],rsi
+	 */
+	for (i = 0; i < min(nr_args, 6); i++)
+		emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]),
+			 BPF_REG_FP,
+			 i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+			 -(stack_size - i * 8));
+}
+
+static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
+			 int stack_size)
+{
+	int i;
+
+	/* Restore function arguments from stack.
+	 * For a function that accepts two pointers the sequence will be:
+	 * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
+	 * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
+	 */
+	for (i = 0; i < min(nr_args, 6); i++)
+		emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]),
+			 i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+			 BPF_REG_FP,
+			 -(stack_size - i * 8));
+}
+
+static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
+		      struct bpf_prog **progs, int prog_cnt, int stack_size)
+{
+	u8 *prog = *pprog;
+	int cnt = 0, i;
+
+	for (i = 0; i < prog_cnt; i++) {
+		if (emit_call(&prog, __bpf_prog_enter, prog))
+			return -EINVAL;
+		/* remember prog start time returned by __bpf_prog_enter */
+		emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+
+		/* arg1: lea rdi, [rbp - stack_size] */
+		EMIT4(0x48, 0x8D, 0x7D, -stack_size);
+		/* arg2: progs[i]->insnsi for interpreter */
+		if (!progs[i]->jited)
+			emit_mov_imm64(&prog, BPF_REG_2,
+				       (long) progs[i]->insnsi >> 32,
+				       (u32) (long) progs[i]->insnsi);
+		/* call JITed bpf program or interpreter */
+		if (emit_call(&prog, progs[i]->bpf_func, prog))
+			return -EINVAL;
+
+		/* arg1: mov rdi, progs[i] */
+		emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32,
+			       (u32) (long) progs[i]);
+		/* arg2: mov rsi, rbx <- start time in nsec */
+		emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+		if (emit_call(&prog, __bpf_prog_exit, prog))
+			return -EINVAL;
+	}
+	*pprog = prog;
+	return 0;
+}
+
+/* Example:
+ * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+ * its 'struct btf_func_model' will be nr_args=2
+ * The assembly code when eth_type_trans is executing after trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 16                     // space for skb and dev
+ * push rbx                        // temp regs to pass start time
+ * mov qword ptr [rbp - 16], rdi   // save skb pointer to stack
+ * mov qword ptr [rbp - 8], rsi    // save dev pointer to stack
+ * call __bpf_prog_enter           // rcu_read_lock and preempt_disable
+ * mov rbx, rax                    // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 16]             // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog  // unused if bpf stats are off
+ * mov rsi, rbx                    // prog start time
+ * call __bpf_prog_exit            // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 16]   // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 8]    // restore dev pointer from stack
+ * pop rbx
+ * leave
+ * ret
+ *
+ * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be
+ * replaced with 'call generated_bpf_trampoline'. When it returns
+ * eth_type_trans will continue executing with original skb and dev pointers.
+ *
+ * The assembly code when eth_type_trans is called from trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 24                     // space for skb, dev, return value
+ * push rbx                        // temp regs to pass start time
+ * mov qword ptr [rbp - 24], rdi   // save skb pointer to stack
+ * mov qword ptr [rbp - 16], rsi   // save dev pointer to stack
+ * call __bpf_prog_enter           // rcu_read_lock and preempt_disable
+ * mov rbx, rax                    // remember start time if bpf stats are enabled
+ * lea rdi, [rbp - 24]             // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog  // bpf prog can access skb and dev
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog  // unused if bpf stats are off
+ * mov rsi, rbx                    // prog start time
+ * call __bpf_prog_exit            // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 24]   // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 16]   // restore dev pointer from stack
+ * call eth_type_trans+5           // execute body of eth_type_trans
+ * mov qword ptr [rbp - 8], rax    // save return value
+ * call __bpf_prog_enter           // rcu_read_lock and preempt_disable
+ * mov rbx, rax                    // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 24]             // R1==ctx of bpf prog
+ * call addr_of_jited_FEXIT_prog   // bpf prog can access skb, dev, return value
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog  // unused if bpf stats are off
+ * mov rsi, rbx                    // prog start time
+ * call __bpf_prog_exit            // rcu_read_unlock, preempt_enable and stats math
+ * mov rax, qword ptr [rbp - 8]    // restore eth_type_trans's return value
+ * pop rbx
+ * leave
+ * add rsp, 8                      // skip eth_type_trans's frame
+ * ret                             // return to its caller
+ */
+int arch_prepare_bpf_trampoline(void *image, void *image_end,
+				const struct btf_func_model *m, u32 flags,
+				struct bpf_prog **fentry_progs, int fentry_cnt,
+				struct bpf_prog **fexit_progs, int fexit_cnt,
+				void *orig_call)
+{
+	int cnt = 0, nr_args = m->nr_args;
+	int stack_size = nr_args * 8;
+	u8 *prog;
+
+	/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
+	if (nr_args > 6)
+		return -ENOTSUPP;
+
+	if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
+	    (flags & BPF_TRAMP_F_SKIP_FRAME))
+		return -EINVAL;
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG)
+		stack_size += 8; /* room for return value of orig_call */
+
+	if (flags & BPF_TRAMP_F_SKIP_FRAME)
+		/* skip patched call instruction and point orig_call to actual
+		 * body of the kernel function.
+		 */
+		orig_call += X86_PATCH_SIZE;
+
+	prog = image;
+
+	EMIT1(0x55);		 /* push rbp */
+	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+	EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
+	EMIT1(0x53);		 /* push rbx */
+
+	save_regs(m, &prog, nr_args, stack_size);
+
+	if (fentry_cnt)
+		if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size))
+			return -EINVAL;
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		if (fentry_cnt)
+			restore_regs(m, &prog, nr_args, stack_size);
+
+		/* call original function */
+		if (emit_call(&prog, orig_call, prog))
+			return -EINVAL;
+		/* remember return value in a stack for bpf prog to access */
+		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
+	}
+
+	if (fexit_cnt)
+		if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size))
+			return -EINVAL;
+
+	if (flags & BPF_TRAMP_F_RESTORE_REGS)
+		restore_regs(m, &prog, nr_args, stack_size);
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG)
+		/* restore original return value back into RAX */
+		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
+
+	EMIT1(0x5B); /* pop rbx */
+	EMIT1(0xC9); /* leave */
+	if (flags & BPF_TRAMP_F_SKIP_FRAME)
+		/* skip our return address and return to parent */
+		EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+	EMIT1(0xC3); /* ret */
+	/* Make sure the trampoline generation logic doesn't overflow */
+	if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY))
+		return -EFAULT;
+	return prog - (u8 *)image;
+}
+
+static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+	s64 offset;
+
+	offset = func - (ip + 2 + 4);
+	if (!is_simm32(offset)) {
+		pr_err("Target %p is out of range\n", func);
+		return -EINVAL;
+	}
+	EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
+	*pprog = prog;
+	return 0;
+}
+
+static void emit_nops(u8 **pprog, unsigned int len)
+{
+	unsigned int i, noplen;
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	while (len > 0) {
+		noplen = len;
+
+		if (noplen > ASM_NOP_MAX)
+			noplen = ASM_NOP_MAX;
+
+		for (i = 0; i < noplen; i++)
+			EMIT1(ideal_nops[noplen][i]);
+		len -= noplen;
+	}
+
+	*pprog = prog;
+}
+
+static int emit_fallback_jump(u8 **pprog)
+{
+	u8 *prog = *pprog;
+	int err = 0;
+
+#ifdef CONFIG_RETPOLINE
+	/* Note that this assumes the the compiler uses external
+	 * thunks for indirect calls. Both clang and GCC use the same
+	 * naming convention for external thunks.
+	 */
+	err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
+#else
+	int cnt = 0;
+
+	EMIT2(0xFF, 0xE2);	/* jmp rdx */
+#endif
+	*pprog = prog;
+	return err;
+}
+
+static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
+{
+	u8 *jg_reloc, *jg_target, *prog = *pprog;
+	int pivot, err, jg_bytes = 1, cnt = 0;
+	s64 jg_offset;
+
+	if (a == b) {
+		/* Leaf node of recursion, i.e. not a range of indices
+		 * anymore.
+		 */
+		EMIT1(add_1mod(0x48, BPF_REG_3));	/* cmp rdx,func */
+		if (!is_simm32(progs[a]))
+			return -1;
+		EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3),
+			    progs[a]);
+		err = emit_cond_near_jump(&prog,	/* je func */
+					  (void *)progs[a], prog,
+					  X86_JE);
+		if (err)
+			return err;
+
+		err = emit_fallback_jump(&prog);	/* jmp thunk/indirect */
+		if (err)
+			return err;
+
+		*pprog = prog;
+		return 0;
+	}
+
+	/* Not a leaf node, so we pivot, and recursively descend into
+	 * the lower and upper ranges.
+	 */
+	pivot = (b - a) / 2;
+	EMIT1(add_1mod(0x48, BPF_REG_3));		/* cmp rdx,func */
+	if (!is_simm32(progs[a + pivot]))
+		return -1;
+	EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]);
+
+	if (pivot > 2) {				/* jg upper_part */
+		/* Require near jump. */
+		jg_bytes = 4;
+		EMIT2_off32(0x0F, X86_JG + 0x10, 0);
+	} else {
+		EMIT2(X86_JG, 0);
+	}
+	jg_reloc = prog;
+
+	err = emit_bpf_dispatcher(&prog, a, a + pivot,	/* emit lower_part */
+				  progs);
+	if (err)
+		return err;
+
+	/* From Intel 64 and IA-32 Architectures Optimization
+	 * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
+	 * Coding Rule 11: All branch targets should be 16-byte
+	 * aligned.
+	 */
+	jg_target = PTR_ALIGN(prog, 16);
+	if (jg_target != prog)
+		emit_nops(&prog, jg_target - prog);
+	jg_offset = prog - jg_reloc;
+	emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes);
+
+	err = emit_bpf_dispatcher(&prog, a + pivot + 1,	/* emit upper_part */
+				  b, progs);
+	if (err)
+		return err;
+
+	*pprog = prog;
+	return 0;
+}
+
+static int cmp_ips(const void *a, const void *b)
+{
+	const s64 *ipa = a;
+	const s64 *ipb = b;
+
+	if (*ipa > *ipb)
+		return 1;
+	if (*ipa < *ipb)
+		return -1;
+	return 0;
+}
+
+int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
+{
+	u8 *prog = image;
+
+	sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL);
+	return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs);
+}
+
 struct x64_jit_data {
 	struct bpf_binary_header *header;
 	int *addrs;
@@ -1148,12 +1775,24 @@ out_image:
 			break;
 		}
 		if (proglen == oldproglen) {
-			header = bpf_jit_binary_alloc(proglen, &image,
-						      1, jit_fill_hole);
+			/*
+			 * The number of entries in extable is the number of BPF_LDX
+			 * insns that access kernel memory via "pointer to BTF type".
+			 * The verifier changed their opcode from LDX|MEM|size
+			 * to LDX|PROBE_MEM|size to make JITing easier.
+			 */
+			u32 align = __alignof__(struct exception_table_entry);
+			u32 extable_size = prog->aux->num_exentries *
+				sizeof(struct exception_table_entry);
+
+			/* allocate module memory for x86 insns and extable */
+			header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
+						      &image, align, jit_fill_hole);
 			if (!header) {
 				prog = orig_prog;
 				goto out_addrs;
 			}
+			prog->aux->extable = (void *) image + roundup(proglen, align);
 		}
 		oldproglen = proglen;
 		cond_resched();
@@ -1164,6 +1803,7 @@ out_image:
 
 	if (image) {
 		if (!prog->is_func || extra_pass) {
+			bpf_tail_call_direct_fixup(prog);
 			bpf_jit_binary_lock_ro(header);
 		} else {
 			jit_data->addrs = addrs;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 71e8a67337e2..276cf79b5d24 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -67,13 +67,13 @@ static inline void op_x86_warn_in_use(int counter)
 	 * cannot be monitored by any other counter, contact your
 	 * hardware or BIOS vendor.
 	 */
-	pr_warning("oprofile: counter #%d on cpu #%d may already be used\n",
-		   counter, smp_processor_id());
+	pr_warn("oprofile: counter #%d on cpu #%d may already be used\n",
+		counter, smp_processor_id());
 }
 
 static inline void op_x86_warn_reserved(int counter)
 {
-	pr_warning("oprofile: counter #%d is already reserved\n", counter);
+	pr_warn("oprofile: counter #%d is already reserved\n", counter);
 }
 
 extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index c806b57d3f22..48bcada5cabe 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -24,6 +24,4 @@ obj-y				+= bus_numa.o
 obj-$(CONFIG_AMD_NB)		+= amd_bus.o
 obj-$(CONFIG_PCI_CNB20LE_QUIRK)	+= broadcom_bus.o
 
-ifeq ($(CONFIG_PCI_DEBUG),y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_PCI_DEBUG)	+= -DDEBUG
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 9acab6ac28f5..1e59df041456 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -135,7 +135,7 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev)
 		* resource so the kernel doesn't attempt to assign
 		* it later on in pci_assign_unassigned_resources
 		*/
-		for (bar = 0; bar <= PCI_STD_RESOURCE_END; bar++) {
+		for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
 			bar_r = &dev->resource[bar];
 			if (bar_r->start == 0 && bar_r->end != 0) {
 				bar_r->flags = 0;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 527e69b12002..e723559c386a 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -589,6 +589,17 @@ static void pci_fixup_amd_ehci_pme(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7808, pci_fixup_amd_ehci_pme);
 
 /*
+ * Device [1022:7914]
+ * When in D0, PME# doesn't get asserted when plugging USB 2.0 device.
+ */
+static void pci_fixup_amd_fch_xhci_pme(struct pci_dev *dev)
+{
+	dev_info(&dev->dev, "PME# does not work under D0, disabling it\n");
+	dev->pme_support &= ~(PCI_PM_CAP_PME_D0 >> PCI_PM_CAP_PME_SHIFT);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7914, pci_fixup_amd_fch_xhci_pme);
+
+/*
  * Apple MacBook Pro: Avoid [mem 0x7fa00000-0x7fbfffff]
  *
  * Using the [mem 0x7fa00000-0x7fbfffff] region, e.g., by assigning it to
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 9df652d3d927..fa855bbaebaf 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -34,7 +34,7 @@
 #include <linux/errno.h>
 #include <linux/memblock.h>
 
-#include <asm/pat.h>
+#include <asm/memtype.h>
 #include <asm/e820/api.h>
 #include <asm/pci_x86.h>
 #include <asm/io_apic.h>
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 43867bc85368..00c62115f39c 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -382,7 +382,7 @@ static void pci_fixed_bar_fixup(struct pci_dev *dev)
 	    PCI_DEVFN(2, 2) == dev->devfn)
 		return;
 
-	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		pci_read_config_dword(dev, offset + 8 + (i * 4), &size);
 		dev->resource[i].end = dev->resource[i].start + size - 1;
 		dev->resource[i].flags |= IORESOURCE_PCI_FIXED;
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index 887d181b769b..0c7b6e66c644 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -105,7 +105,7 @@ static void __iomem *mcfg_ioremap(struct pci_mmcfg_region *cfg)
 	start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus);
 	num_buses = cfg->end_bus - cfg->start_bus + 1;
 	size = PCI_MMCFG_BUS_OFFSET(num_buses);
-	addr = ioremap_nocache(start, size);
+	addr = ioremap(start, size);
 	if (addr)
 		addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus);
 	return addr;
diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c
index 2e565e65c893..01a085d9135a 100644
--- a/arch/x86/pci/numachip.c
+++ b/arch/x86/pci/numachip.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
  * Numascale NumaConnect-specific PCI code
  *
  * Copyright (C) 2012 Numascale AS. All rights reserved.
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 6269a175385d..c313d784efab 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -30,7 +30,6 @@ struct sta2x11_ahb_regs { /* saved during suspend */
 };
 
 struct sta2x11_mapping {
-	u32 amba_base;
 	int is_suspended;
 	struct sta2x11_ahb_regs regs[STA2X11_NR_FUNCS];
 };
@@ -92,18 +91,6 @@ static int sta2x11_pdev_to_ep(struct pci_dev *pdev)
 	return pdev->bus->number - instance->bus0;
 }
 
-static struct sta2x11_mapping *sta2x11_pdev_to_mapping(struct pci_dev *pdev)
-{
-	struct sta2x11_instance *instance;
-	int ep;
-
-	instance = sta2x11_pdev_to_instance(pdev);
-	if (!instance)
-		return NULL;
-	ep = sta2x11_pdev_to_ep(pdev);
-	return instance->map + ep;
-}
-
 /* This is exported, as some devices need to access the MFD registers */
 struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev)
 {
@@ -111,39 +98,6 @@ struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL(sta2x11_get_instance);
 
-
-/**
- * p2a - Translate physical address to STA2x11 AMBA address,
- *       used for DMA transfers to STA2x11
- * @p: Physical address
- * @pdev: PCI device (must be hosted within the connext)
- */
-static dma_addr_t p2a(dma_addr_t p, struct pci_dev *pdev)
-{
-	struct sta2x11_mapping *map;
-	dma_addr_t a;
-
-	map = sta2x11_pdev_to_mapping(pdev);
-	a = p + map->amba_base;
-	return a;
-}
-
-/**
- * a2p - Translate STA2x11 AMBA address to physical address
- *       used for DMA transfers from STA2x11
- * @a: STA2x11 AMBA address
- * @pdev: PCI device (must be hosted within the connext)
- */
-static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev)
-{
-	struct sta2x11_mapping *map;
-	dma_addr_t p;
-
-	map = sta2x11_pdev_to_mapping(pdev);
-	p = a - map->amba_base;
-	return p;
-}
-
 /* At setup time, we use our own ops if the device is a ConneXt one */
 static void sta2x11_setup_pdev(struct pci_dev *pdev)
 {
@@ -151,9 +105,6 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev)
 
 	if (!instance) /* either a sta2x11 bridge or another ST device */
 		return;
-	pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
-	pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
-	pdev->dev.archdata.is_sta2x11 = true;
 
 	/* We must enable all devices as master, for audio DMA to work */
 	pci_set_master(pdev);
@@ -161,61 +112,6 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_setup_pdev);
 
 /*
- * The following three functions are exported (used in swiotlb: FIXME)
- */
-/**
- * dma_capable - Check if device can manage DMA transfers (FIXME: kill it)
- * @dev: device for a PCI device
- * @addr: DMA address
- * @size: DMA size
- */
-bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
-	struct sta2x11_mapping *map;
-
-	if (!dev->archdata.is_sta2x11) {
-		if (!dev->dma_mask)
-			return false;
-		return addr + size - 1 <= *dev->dma_mask;
-	}
-
-	map = sta2x11_pdev_to_mapping(to_pci_dev(dev));
-
-	if (!map || (addr < map->amba_base))
-		return false;
-	if (addr + size >= map->amba_base + STA2X11_AMBA_SIZE) {
-		return false;
-	}
-
-	return true;
-}
-
-/**
- * __phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
- * @dev: device for a PCI device
- * @paddr: Physical address
- */
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
-	if (!dev->archdata.is_sta2x11)
-		return paddr;
-	return p2a(paddr, to_pci_dev(dev));
-}
-
-/**
- * dma_to_phys - Return the physical address used for this STA2x11 DMA address
- * @dev: device for a PCI device
- * @daddr: STA2x11 AMBA DMA address
- */
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
-{
-	if (!dev->archdata.is_sta2x11)
-		return daddr;
-	return a2p(daddr, to_pci_dev(dev));
-}
-
-
-/*
  * At boot we must set up the mappings for the pcie-to-amba bridge.
  * It involves device access, and the same happens at suspend/resume time
  */
@@ -234,12 +130,22 @@ phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 /* At probe time, enable mapping for each endpoint, using the pdev */
 static void sta2x11_map_ep(struct pci_dev *pdev)
 {
-	struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev);
+	struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev);
+	struct device *dev = &pdev->dev;
+	u32 amba_base, max_amba_addr;
 	int i;
 
-	if (!map)
+	if (!instance)
 		return;
-	pci_read_config_dword(pdev, AHB_BASE(0), &map->amba_base);
+
+	pci_read_config_dword(pdev, AHB_BASE(0), &amba_base);
+	max_amba_addr = amba_base + STA2X11_AMBA_SIZE - 1;
+
+	dev->dma_pfn_offset = PFN_DOWN(-amba_base);
+
+	dev->bus_dma_limit = max_amba_addr;
+	pci_set_consistent_dma_mask(pdev, max_amba_addr);
+	pci_set_dma_mask(pdev, max_amba_addr);
 
 	/* Configure AHB mapping */
 	pci_write_config_dword(pdev, AHB_PEXLBASE(0), 0);
@@ -253,13 +159,24 @@ static void sta2x11_map_ep(struct pci_dev *pdev)
 
 	dev_info(&pdev->dev,
 		 "sta2x11: Map EP %i: AMBA address %#8x-%#8x\n",
-		 sta2x11_pdev_to_ep(pdev),  map->amba_base,
-		 map->amba_base + STA2X11_AMBA_SIZE - 1);
+		 sta2x11_pdev_to_ep(pdev), amba_base, max_amba_addr);
 }
 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_map_ep);
 
 #ifdef CONFIG_PM /* Some register values must be saved and restored */
 
+static struct sta2x11_mapping *sta2x11_pdev_to_mapping(struct pci_dev *pdev)
+{
+	struct sta2x11_instance *instance;
+	int ep;
+
+	instance = sta2x11_pdev_to_instance(pdev);
+	if (!instance)
+		return NULL;
+	ep = sta2x11_pdev_to_ep(pdev);
+	return instance->map + ep;
+}
+
 static void suspend_mapping(struct pci_dev *pdev)
 {
 	struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev);
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index fe29f3f5d384..84b09c230cbd 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
-OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y
+KASAN_SANITIZE := n
+GCOV_PROFILE := n
 
 obj-$(CONFIG_EFI) 		+= quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_EFI_MIXED)		+= efi_thunk_$(BITS).o
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 425e025341db..59f7f6d60cf6 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -54,8 +54,8 @@
 #include <asm/x86_init.h>
 #include <asm/uv/uv.h>
 
-static struct efi efi_phys __initdata;
 static efi_system_table_t efi_systab __initdata;
+static u64 efi_systab_phys __initdata;
 
 static efi_config_table_type_t arch_tables[] __initdata = {
 #ifdef CONFIG_X86_UV
@@ -97,37 +97,14 @@ static int __init setup_add_efi_memmap(char *arg)
 }
 early_param("add_efi_memmap", setup_add_efi_memmap);
 
-static efi_status_t __init phys_efi_set_virtual_address_map(
-	unsigned long memory_map_size,
-	unsigned long descriptor_size,
-	u32 descriptor_version,
-	efi_memory_desc_t *virtual_map)
-{
-	efi_status_t status;
-	unsigned long flags;
-	pgd_t *save_pgd;
-
-	save_pgd = efi_call_phys_prolog();
-	if (!save_pgd)
-		return EFI_ABORTED;
-
-	/* Disable interrupts around EFI calls: */
-	local_irq_save(flags);
-	status = efi_call_phys(efi_phys.set_virtual_address_map,
-			       memory_map_size, descriptor_size,
-			       descriptor_version, virtual_map);
-	local_irq_restore(flags);
-
-	efi_call_phys_epilog(save_pgd);
-
-	return status;
-}
-
 void __init efi_find_mirror(void)
 {
 	efi_memory_desc_t *md;
 	u64 mirror_size = 0, total_size = 0;
 
+	if (!efi_enabled(EFI_MEMMAP))
+		return;
+
 	for_each_efi_memory_desc(md) {
 		unsigned long long start = md->phys_addr;
 		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -145,14 +122,18 @@ void __init efi_find_mirror(void)
 
 /*
  * Tell the kernel about the EFI memory map.  This might include
- * more than the max 128 entries that can fit in the e820 legacy
- * (zeropage) memory map.
+ * more than the max 128 entries that can fit in the passed in e820
+ * legacy (zeropage) memory map, but the kernel's e820 table can hold
+ * E820_MAX_ENTRIES.
  */
 
 static void __init do_add_efi_memmap(void)
 {
 	efi_memory_desc_t *md;
 
+	if (!efi_enabled(EFI_MEMMAP))
+		return;
+
 	for_each_efi_memory_desc(md) {
 		unsigned long long start = md->phys_addr;
 		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -164,7 +145,10 @@ static void __init do_add_efi_memmap(void)
 		case EFI_BOOT_SERVICES_CODE:
 		case EFI_BOOT_SERVICES_DATA:
 		case EFI_CONVENTIONAL_MEMORY:
-			if (md->attribute & EFI_MEMORY_WB)
+			if (efi_soft_reserve_enabled()
+			    && (md->attribute & EFI_MEMORY_SP))
+				e820_type = E820_TYPE_SOFT_RESERVED;
+			else if (md->attribute & EFI_MEMORY_WB)
 				e820_type = E820_TYPE_RAM;
 			else
 				e820_type = E820_TYPE_RESERVED;
@@ -190,11 +174,36 @@ static void __init do_add_efi_memmap(void)
 			e820_type = E820_TYPE_RESERVED;
 			break;
 		}
+
 		e820__range_add(start, size, e820_type);
 	}
 	e820__update_table(e820_table);
 }
 
+/*
+ * Given add_efi_memmap defaults to 0 and there there is no alternative
+ * e820 mechanism for soft-reserved memory, import the full EFI memory
+ * map if soft reservations are present and enabled. Otherwise, the
+ * mechanism to disable the kernel's consideration of EFI_MEMORY_SP is
+ * the efi=nosoftreserve option.
+ */
+static bool do_efi_soft_reserve(void)
+{
+	efi_memory_desc_t *md;
+
+	if (!efi_enabled(EFI_MEMMAP))
+		return false;
+
+	if (!efi_soft_reserve_enabled())
+		return false;
+
+	for_each_efi_memory_desc(md)
+		if (md->type == EFI_CONVENTIONAL_MEMORY &&
+		    (md->attribute & EFI_MEMORY_SP))
+			return true;
+	return false;
+}
+
 int __init efi_memblock_x86_reserve_range(void)
 {
 	struct efi_info *e = &boot_params.efi_info;
@@ -224,9 +233,11 @@ int __init efi_memblock_x86_reserve_range(void)
 	if (rv)
 		return rv;
 
-	if (add_efi_memmap)
+	if (add_efi_memmap || do_efi_soft_reserve())
 		do_add_efi_memmap();
 
+	efi_fake_memmap_early();
+
 	WARN(efi.memmap.desc_version != 1,
 	     "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
 	     efi.memmap.desc_version);
@@ -293,10 +304,16 @@ static void __init efi_clean_memmap(void)
 	}
 
 	if (n_removal > 0) {
-		u64 size = efi.memmap.nr_map - n_removal;
+		struct efi_memory_map_data data = {
+			.phys_map = efi.memmap.phys_map,
+			.desc_version = efi.memmap.desc_version,
+			.desc_size = efi.memmap.desc_size,
+			.size = data.desc_size * (efi.memmap.nr_map - n_removal),
+			.flags = 0,
+		};
 
 		pr_warn("Removing %d invalid memory map entries.\n", n_removal);
-		efi_memmap_install(efi.memmap.phys_map, size);
+		efi_memmap_install(&data);
 	}
 }
 
@@ -316,89 +333,90 @@ void __init efi_print_memmap(void)
 	}
 }
 
-static int __init efi_systab_init(void *phys)
+static int __init efi_systab_init(u64 phys)
 {
+	int size = efi_enabled(EFI_64BIT) ? sizeof(efi_system_table_64_t)
+					  : sizeof(efi_system_table_32_t);
+	bool over4g = false;
+	void *p;
+
+	p = early_memremap_ro(phys, size);
+	if (p == NULL) {
+		pr_err("Couldn't map the system table!\n");
+		return -ENOMEM;
+	}
+
 	if (efi_enabled(EFI_64BIT)) {
-		efi_system_table_64_t *systab64;
-		struct efi_setup_data *data = NULL;
-		u64 tmp = 0;
+		const efi_system_table_64_t *systab64 = p;
+
+		efi_systab.hdr			= systab64->hdr;
+		efi_systab.fw_vendor		= systab64->fw_vendor;
+		efi_systab.fw_revision		= systab64->fw_revision;
+		efi_systab.con_in_handle	= systab64->con_in_handle;
+		efi_systab.con_in		= systab64->con_in;
+		efi_systab.con_out_handle	= systab64->con_out_handle;
+		efi_systab.con_out		= (void *)(unsigned long)systab64->con_out;
+		efi_systab.stderr_handle	= systab64->stderr_handle;
+		efi_systab.stderr		= systab64->stderr;
+		efi_systab.runtime		= (void *)(unsigned long)systab64->runtime;
+		efi_systab.boottime		= (void *)(unsigned long)systab64->boottime;
+		efi_systab.nr_tables		= systab64->nr_tables;
+		efi_systab.tables		= systab64->tables;
+
+		over4g = systab64->con_in_handle	> U32_MAX ||
+			 systab64->con_in		> U32_MAX ||
+			 systab64->con_out_handle	> U32_MAX ||
+			 systab64->con_out		> U32_MAX ||
+			 systab64->stderr_handle	> U32_MAX ||
+			 systab64->stderr		> U32_MAX ||
+			 systab64->boottime		> U32_MAX;
 
 		if (efi_setup) {
-			data = early_memremap(efi_setup, sizeof(*data));
-			if (!data)
+			struct efi_setup_data *data;
+
+			data = early_memremap_ro(efi_setup, sizeof(*data));
+			if (!data) {
+				early_memunmap(p, size);
 				return -ENOMEM;
-		}
-		systab64 = early_memremap((unsigned long)phys,
-					 sizeof(*systab64));
-		if (systab64 == NULL) {
-			pr_err("Couldn't map the system table!\n");
-			if (data)
-				early_memunmap(data, sizeof(*data));
-			return -ENOMEM;
-		}
+			}
+
+			efi_systab.fw_vendor	= (unsigned long)data->fw_vendor;
+			efi_systab.runtime	= (void *)(unsigned long)data->runtime;
+			efi_systab.tables	= (unsigned long)data->tables;
+
+			over4g |= data->fw_vendor	> U32_MAX ||
+				  data->runtime		> U32_MAX ||
+				  data->tables		> U32_MAX;
 
-		efi_systab.hdr = systab64->hdr;
-		efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor :
-					      systab64->fw_vendor;
-		tmp |= data ? data->fw_vendor : systab64->fw_vendor;
-		efi_systab.fw_revision = systab64->fw_revision;
-		efi_systab.con_in_handle = systab64->con_in_handle;
-		tmp |= systab64->con_in_handle;
-		efi_systab.con_in = systab64->con_in;
-		tmp |= systab64->con_in;
-		efi_systab.con_out_handle = systab64->con_out_handle;
-		tmp |= systab64->con_out_handle;
-		efi_systab.con_out = systab64->con_out;
-		tmp |= systab64->con_out;
-		efi_systab.stderr_handle = systab64->stderr_handle;
-		tmp |= systab64->stderr_handle;
-		efi_systab.stderr = systab64->stderr;
-		tmp |= systab64->stderr;
-		efi_systab.runtime = data ?
-				     (void *)(unsigned long)data->runtime :
-				     (void *)(unsigned long)systab64->runtime;
-		tmp |= data ? data->runtime : systab64->runtime;
-		efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
-		tmp |= systab64->boottime;
-		efi_systab.nr_tables = systab64->nr_tables;
-		efi_systab.tables = data ? (unsigned long)data->tables :
-					   systab64->tables;
-		tmp |= data ? data->tables : systab64->tables;
-
-		early_memunmap(systab64, sizeof(*systab64));
-		if (data)
 			early_memunmap(data, sizeof(*data));
-#ifdef CONFIG_X86_32
-		if (tmp >> 32) {
-			pr_err("EFI data located above 4GB, disabling EFI.\n");
-			return -EINVAL;
+		} else {
+			over4g |= systab64->fw_vendor	> U32_MAX ||
+				  systab64->runtime	> U32_MAX ||
+				  systab64->tables	> U32_MAX;
 		}
-#endif
 	} else {
-		efi_system_table_32_t *systab32;
-
-		systab32 = early_memremap((unsigned long)phys,
-					 sizeof(*systab32));
-		if (systab32 == NULL) {
-			pr_err("Couldn't map the system table!\n");
-			return -ENOMEM;
-		}
-
-		efi_systab.hdr = systab32->hdr;
-		efi_systab.fw_vendor = systab32->fw_vendor;
-		efi_systab.fw_revision = systab32->fw_revision;
-		efi_systab.con_in_handle = systab32->con_in_handle;
-		efi_systab.con_in = systab32->con_in;
-		efi_systab.con_out_handle = systab32->con_out_handle;
-		efi_systab.con_out = systab32->con_out;
-		efi_systab.stderr_handle = systab32->stderr_handle;
-		efi_systab.stderr = systab32->stderr;
-		efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
-		efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
-		efi_systab.nr_tables = systab32->nr_tables;
-		efi_systab.tables = systab32->tables;
-
-		early_memunmap(systab32, sizeof(*systab32));
+		const efi_system_table_32_t *systab32 = p;
+
+		efi_systab.hdr			= systab32->hdr;
+		efi_systab.fw_vendor		= systab32->fw_vendor;
+		efi_systab.fw_revision		= systab32->fw_revision;
+		efi_systab.con_in_handle	= systab32->con_in_handle;
+		efi_systab.con_in		= systab32->con_in;
+		efi_systab.con_out_handle	= systab32->con_out_handle;
+		efi_systab.con_out		= (void *)(unsigned long)systab32->con_out;
+		efi_systab.stderr_handle	= systab32->stderr_handle;
+		efi_systab.stderr		= systab32->stderr;
+		efi_systab.runtime		= (void *)(unsigned long)systab32->runtime;
+		efi_systab.boottime		= (void *)(unsigned long)systab32->boottime;
+		efi_systab.nr_tables		= systab32->nr_tables;
+		efi_systab.tables		= systab32->tables;
+	}
+
+	early_memunmap(p, size);
+
+	if (IS_ENABLED(CONFIG_X86_32) && over4g) {
+		pr_err("EFI data located above 4GB, disabling EFI.\n");
+		return -EINVAL;
 	}
 
 	efi.systab = &efi_systab;
@@ -418,108 +436,23 @@ static int __init efi_systab_init(void *phys)
 	return 0;
 }
 
-static int __init efi_runtime_init32(void)
-{
-	efi_runtime_services_32_t *runtime;
-
-	runtime = early_memremap((unsigned long)efi.systab->runtime,
-			sizeof(efi_runtime_services_32_t));
-	if (!runtime) {
-		pr_err("Could not map the runtime service table!\n");
-		return -ENOMEM;
-	}
-
-	/*
-	 * We will only need *early* access to the SetVirtualAddressMap
-	 * EFI runtime service. All other runtime services will be called
-	 * via the virtual mapping.
-	 */
-	efi_phys.set_virtual_address_map =
-			(efi_set_virtual_address_map_t *)
-			(unsigned long)runtime->set_virtual_address_map;
-	early_memunmap(runtime, sizeof(efi_runtime_services_32_t));
-
-	return 0;
-}
-
-static int __init efi_runtime_init64(void)
-{
-	efi_runtime_services_64_t *runtime;
-
-	runtime = early_memremap((unsigned long)efi.systab->runtime,
-			sizeof(efi_runtime_services_64_t));
-	if (!runtime) {
-		pr_err("Could not map the runtime service table!\n");
-		return -ENOMEM;
-	}
-
-	/*
-	 * We will only need *early* access to the SetVirtualAddressMap
-	 * EFI runtime service. All other runtime services will be called
-	 * via the virtual mapping.
-	 */
-	efi_phys.set_virtual_address_map =
-			(efi_set_virtual_address_map_t *)
-			(unsigned long)runtime->set_virtual_address_map;
-	early_memunmap(runtime, sizeof(efi_runtime_services_64_t));
-
-	return 0;
-}
-
-static int __init efi_runtime_init(void)
-{
-	int rv;
-
-	/*
-	 * Check out the runtime services table. We need to map
-	 * the runtime services table so that we can grab the physical
-	 * address of several of the EFI runtime functions, needed to
-	 * set the firmware into virtual mode.
-	 *
-	 * When EFI_PARAVIRT is in force then we could not map runtime
-	 * service memory region because we do not have direct access to it.
-	 * However, runtime services are available through proxy functions
-	 * (e.g. in case of Xen dom0 EFI implementation they call special
-	 * hypercall which executes relevant EFI functions) and that is why
-	 * they are always enabled.
-	 */
-
-	if (!efi_enabled(EFI_PARAVIRT)) {
-		if (efi_enabled(EFI_64BIT))
-			rv = efi_runtime_init64();
-		else
-			rv = efi_runtime_init32();
-
-		if (rv)
-			return rv;
-	}
-
-	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
-
-	return 0;
-}
-
 void __init efi_init(void)
 {
 	efi_char16_t *c16;
 	char vendor[100] = "unknown";
 	int i = 0;
-	void *tmp;
 
-#ifdef CONFIG_X86_32
-	if (boot_params.efi_info.efi_systab_hi ||
-	    boot_params.efi_info.efi_memmap_hi) {
+	if (IS_ENABLED(CONFIG_X86_32) &&
+	    (boot_params.efi_info.efi_systab_hi ||
+	     boot_params.efi_info.efi_memmap_hi)) {
 		pr_info("Table located above 4GB, disabling EFI.\n");
 		return;
 	}
-	efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
-#else
-	efi_phys.systab = (efi_system_table_t *)
-			  (boot_params.efi_info.efi_systab |
-			  ((__u64)boot_params.efi_info.efi_systab_hi<<32));
-#endif
 
-	if (efi_systab_init(efi_phys.systab))
+	efi_systab_phys = boot_params.efi_info.efi_systab |
+			  ((__u64)boot_params.efi_info.efi_systab_hi << 32);
+
+	if (efi_systab_init(efi_systab_phys))
 		return;
 
 	efi.config_table = (unsigned long)efi.systab->tables;
@@ -529,14 +462,16 @@ void __init efi_init(void)
 	/*
 	 * Show what we know for posterity
 	 */
-	c16 = tmp = early_memremap(efi.systab->fw_vendor, 2);
+	c16 = early_memremap_ro(efi.systab->fw_vendor,
+				sizeof(vendor) * sizeof(efi_char16_t));
 	if (c16) {
-		for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
-			vendor[i] = *c16++;
+		for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i)
+			vendor[i] = c16[i];
 		vendor[i] = '\0';
-	} else
+		early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t));
+	} else {
 		pr_err("Could not map the firmware vendor!\n");
-	early_memunmap(tmp, 2);
+	}
 
 	pr_info("EFI v%u.%.02u by %s\n",
 		efi.systab->hdr.revision >> 16,
@@ -555,19 +490,21 @@ void __init efi_init(void)
 
 	if (!efi_runtime_supported())
 		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
-	else {
-		if (efi_runtime_disabled() || efi_runtime_init()) {
-			efi_memmap_unmap();
-			return;
-		}
+
+	if (!efi_runtime_supported() || efi_runtime_disabled()) {
+		efi_memmap_unmap();
+		return;
 	}
 
+	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 	efi_clean_memmap();
 
 	if (efi_enabled(EFI_DBG))
 		efi_print_memmap();
 }
 
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_UV)
+
 void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
 {
 	u64 addr, npages;
@@ -632,6 +569,8 @@ void __init old_map_region(efi_memory_desc_t *md)
 		       (unsigned long long)md->phys_addr);
 }
 
+#endif
+
 /* Merge contiguous regions of the same type and attribute */
 static void __init efi_merge_regions(void)
 {
@@ -670,7 +609,7 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md)
 
 	size = md->num_pages << EFI_PAGE_SHIFT;
 	end = md->phys_addr + size;
-	systab = (u64)(unsigned long)efi_phys.systab;
+	systab = efi_systab_phys;
 	if (md->phys_addr <= systab && systab < end) {
 		systab += md->virt_addr - md->phys_addr;
 		efi.systab = (efi_system_table_t *)(unsigned long)systab;
@@ -730,7 +669,7 @@ static inline void *efi_map_next_entry_reverse(void *entry)
  */
 static void *efi_map_next_entry(void *entry)
 {
-	if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) {
+	if (!efi_have_uv1_memmap() && efi_enabled(EFI_64BIT)) {
 		/*
 		 * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE
 		 * config table feature requires us to map all entries
@@ -779,10 +718,19 @@ static bool should_map_region(efi_memory_desc_t *md)
 		return false;
 
 	/*
+	 * EFI specific purpose memory may be reserved by default
+	 * depending on kernel config and boot options.
+	 */
+	if (md->type == EFI_CONVENTIONAL_MEMORY &&
+	    efi_soft_reserve_enabled() &&
+	    (md->attribute & EFI_MEMORY_SP))
+		return false;
+
+	/*
 	 * Map all of RAM so that we can access arguments in the 1:1
 	 * mapping when making EFI runtime calls.
 	 */
-	if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) {
+	if (efi_is_mixed()) {
 		if (md->type == EFI_CONVENTIONAL_MEMORY ||
 		    md->type == EFI_LOADER_DATA ||
 		    md->type == EFI_LOADER_CODE)
@@ -853,11 +801,11 @@ static void __init kexec_enter_virtual_mode(void)
 
 	/*
 	 * We don't do virtual mode, since we don't do runtime services, on
-	 * non-native EFI. With efi=old_map, we don't do runtime services in
+	 * non-native EFI. With the UV1 memmap, we don't do runtime services in
 	 * kexec kernel because in the initial boot something else might
 	 * have been mapped at these virtual addresses.
 	 */
-	if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) {
+	if (efi_is_mixed() || efi_have_uv1_memmap()) {
 		efi_memmap_unmap();
 		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 		return;
@@ -912,11 +860,6 @@ static void __init kexec_enter_virtual_mode(void)
 	efi.runtime_version = efi_systab.hdr.revision;
 
 	efi_native_runtime_setup();
-
-	efi.set_virtual_address_map = NULL;
-
-	if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
-		runtime_code_page_mkexec();
 #endif
 }
 
@@ -928,9 +871,9 @@ static void __init kexec_enter_virtual_mode(void)
  *
  * The old method which used to update that memory descriptor with the
  * virtual address obtained from ioremap() is still supported when the
- * kernel is booted with efi=old_map on its command line. Same old
- * method enabled the runtime services to be called without having to
- * thunk back into physical mode for every invocation.
+ * kernel is booted on SG1 UV1 hardware. Same old method enabled the
+ * runtime services to be called without having to thunk back into
+ * physical mode for every invocation.
  *
  * The new method does a pagetable switch in a preemption-safe manner
  * so that we're in a different address space when calling a runtime
@@ -953,16 +896,14 @@ static void __init __efi_enter_virtual_mode(void)
 
 	if (efi_alloc_page_tables()) {
 		pr_err("Failed to allocate EFI page tables\n");
-		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
-		return;
+		goto err;
 	}
 
 	efi_merge_regions();
 	new_memmap = efi_map_regions(&count, &pg_shift);
 	if (!new_memmap) {
 		pr_err("Error reallocating memory, EFI runtime non-functional!\n");
-		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
-		return;
+		goto err;
 	}
 
 	pa = __pa(new_memmap);
@@ -976,8 +917,7 @@ static void __init __efi_enter_virtual_mode(void)
 
 	if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) {
 		pr_err("Failed to remap late EFI memory map\n");
-		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
-		return;
+		goto err;
 	}
 
 	if (efi_enabled(EFI_DBG)) {
@@ -985,34 +925,22 @@ static void __init __efi_enter_virtual_mode(void)
 		efi_print_memmap();
 	}
 
-	BUG_ON(!efi.systab);
+	if (WARN_ON(!efi.systab))
+		goto err;
 
-	if (efi_setup_page_tables(pa, 1 << pg_shift)) {
-		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
-		return;
-	}
+	if (efi_setup_page_tables(pa, 1 << pg_shift))
+		goto err;
 
 	efi_sync_low_kernel_mappings();
 
-	if (efi_is_native()) {
-		status = phys_efi_set_virtual_address_map(
-				efi.memmap.desc_size * count,
-				efi.memmap.desc_size,
-				efi.memmap.desc_version,
-				(efi_memory_desc_t *)pa);
-	} else {
-		status = efi_thunk_set_virtual_address_map(
-				efi_phys.set_virtual_address_map,
-				efi.memmap.desc_size * count,
-				efi.memmap.desc_size,
-				efi.memmap.desc_version,
-				(efi_memory_desc_t *)pa);
-	}
-
+	status = efi_set_virtual_address_map(efi.memmap.desc_size * count,
+					     efi.memmap.desc_size,
+					     efi.memmap.desc_version,
+					     (efi_memory_desc_t *)pa);
 	if (status != EFI_SUCCESS) {
-		pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
-			 status);
-		panic("EFI call to SetVirtualAddressMap() failed!");
+		pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n",
+		       status);
+		goto err;
 	}
 
 	efi_free_boot_services();
@@ -1025,13 +953,11 @@ static void __init __efi_enter_virtual_mode(void)
 	 */
 	efi.runtime_version = efi_systab.hdr.revision;
 
-	if (efi_is_native())
+	if (!efi_is_mixed())
 		efi_native_runtime_setup();
 	else
 		efi_thunk_runtime_setup();
 
-	efi.set_virtual_address_map = NULL;
-
 	/*
 	 * Apply more restrictive page table mapping attributes now that
 	 * SVAM() has been called and the firmware has performed all
@@ -1041,6 +967,10 @@ static void __init __efi_enter_virtual_mode(void)
 
 	/* clean DUMMY object */
 	efi_delete_dummy_variable();
+	return;
+
+err:
+	clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 }
 
 void __init efi_enter_virtual_mode(void)
@@ -1056,20 +986,6 @@ void __init efi_enter_virtual_mode(void)
 	efi_dump_pagetable();
 }
 
-static int __init arch_parse_efi_cmdline(char *str)
-{
-	if (!str) {
-		pr_warn("need at least one option\n");
-		return -EINVAL;
-	}
-
-	if (parse_option_str(str, "old_map"))
-		set_bit(EFI_OLD_MEMMAP, &efi.flags);
-
-	return 0;
-}
-early_param("efi", arch_parse_efi_cmdline);
-
 bool efi_is_table_address(unsigned long phys_addr)
 {
 	unsigned int i;
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 9959657127f4..71dddd1620f9 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -66,9 +66,17 @@ void __init efi_map_region(efi_memory_desc_t *md)
 void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
 void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
 
-pgd_t * __init efi_call_phys_prolog(void)
+efi_status_t efi_call_svam(efi_set_virtual_address_map_t *__efiapi *,
+			   u32, u32, u32, void *);
+
+efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size,
+						unsigned long descriptor_size,
+						u32 descriptor_version,
+						efi_memory_desc_t *virtual_map)
 {
 	struct desc_ptr gdt_descr;
+	efi_status_t status;
+	unsigned long flags;
 	pgd_t *save_pgd;
 
 	/* Current pgd is swapper_pg_dir, we'll restore it later: */
@@ -80,14 +88,18 @@ pgd_t * __init efi_call_phys_prolog(void)
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
-	return save_pgd;
-}
+	/* Disable interrupts around EFI calls: */
+	local_irq_save(flags);
+	status = efi_call_svam(&efi.systab->runtime->set_virtual_address_map,
+			       memory_map_size, descriptor_size,
+			       descriptor_version, virtual_map);
+	local_irq_restore(flags);
 
-void __init efi_call_phys_epilog(pgd_t *save_pgd)
-{
 	load_fixmap_gdt(0);
 	load_cr3(save_pgd);
 	__flush_tlb_all();
+
+	return status;
 }
 
 void __init efi_runtime_update_mappings(void)
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 08ce8177c3af..e2accfe636bd 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -57,142 +57,6 @@ static u64 efi_va = EFI_VA_START;
 
 struct efi_scratch efi_scratch;
 
-static void __init early_code_mapping_set_exec(int executable)
-{
-	efi_memory_desc_t *md;
-
-	if (!(__supported_pte_mask & _PAGE_NX))
-		return;
-
-	/* Make EFI service code area executable */
-	for_each_efi_memory_desc(md) {
-		if (md->type == EFI_RUNTIME_SERVICES_CODE ||
-		    md->type == EFI_BOOT_SERVICES_CODE)
-			efi_set_executable(md, executable);
-	}
-}
-
-pgd_t * __init efi_call_phys_prolog(void)
-{
-	unsigned long vaddr, addr_pgd, addr_p4d, addr_pud;
-	pgd_t *save_pgd, *pgd_k, *pgd_efi;
-	p4d_t *p4d, *p4d_k, *p4d_efi;
-	pud_t *pud;
-
-	int pgd;
-	int n_pgds, i, j;
-
-	if (!efi_enabled(EFI_OLD_MEMMAP)) {
-		efi_switch_mm(&efi_mm);
-		return efi_mm.pgd;
-	}
-
-	early_code_mapping_set_exec(1);
-
-	n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
-	save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
-	if (!save_pgd)
-		return NULL;
-
-	/*
-	 * Build 1:1 identity mapping for efi=old_map usage. Note that
-	 * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while
-	 * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical
-	 * address X, the pud_index(X) != pud_index(__va(X)), we can only copy
-	 * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping.
-	 * This means here we can only reuse the PMD tables of the direct mapping.
-	 */
-	for (pgd = 0; pgd < n_pgds; pgd++) {
-		addr_pgd = (unsigned long)(pgd * PGDIR_SIZE);
-		vaddr = (unsigned long)__va(pgd * PGDIR_SIZE);
-		pgd_efi = pgd_offset_k(addr_pgd);
-		save_pgd[pgd] = *pgd_efi;
-
-		p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd);
-		if (!p4d) {
-			pr_err("Failed to allocate p4d table!\n");
-			goto out;
-		}
-
-		for (i = 0; i < PTRS_PER_P4D; i++) {
-			addr_p4d = addr_pgd + i * P4D_SIZE;
-			p4d_efi = p4d + p4d_index(addr_p4d);
-
-			pud = pud_alloc(&init_mm, p4d_efi, addr_p4d);
-			if (!pud) {
-				pr_err("Failed to allocate pud table!\n");
-				goto out;
-			}
-
-			for (j = 0; j < PTRS_PER_PUD; j++) {
-				addr_pud = addr_p4d + j * PUD_SIZE;
-
-				if (addr_pud > (max_pfn << PAGE_SHIFT))
-					break;
-
-				vaddr = (unsigned long)__va(addr_pud);
-
-				pgd_k = pgd_offset_k(vaddr);
-				p4d_k = p4d_offset(pgd_k, vaddr);
-				pud[j] = *pud_offset(p4d_k, vaddr);
-			}
-		}
-		pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
-	}
-
-	__flush_tlb_all();
-	return save_pgd;
-out:
-	efi_call_phys_epilog(save_pgd);
-	return NULL;
-}
-
-void __init efi_call_phys_epilog(pgd_t *save_pgd)
-{
-	/*
-	 * After the lock is released, the original page table is restored.
-	 */
-	int pgd_idx, i;
-	int nr_pgds;
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-
-	if (!efi_enabled(EFI_OLD_MEMMAP)) {
-		efi_switch_mm(efi_scratch.prev_mm);
-		return;
-	}
-
-	nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
-
-	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
-		pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
-		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
-
-		if (!pgd_present(*pgd))
-			continue;
-
-		for (i = 0; i < PTRS_PER_P4D; i++) {
-			p4d = p4d_offset(pgd,
-					 pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
-
-			if (!p4d_present(*p4d))
-				continue;
-
-			pud = (pud_t *)p4d_page_vaddr(*p4d);
-			pud_free(&init_mm, pud);
-		}
-
-		p4d = (p4d_t *)pgd_page_vaddr(*pgd);
-		p4d_free(&init_mm, p4d);
-	}
-
-	kfree(save_pgd);
-
-	__flush_tlb_all();
-	early_code_mapping_set_exec(0);
-}
-
 EXPORT_SYMBOL_GPL(efi_mm);
 
 /*
@@ -211,7 +75,7 @@ int __init efi_alloc_page_tables(void)
 	pud_t *pud;
 	gfp_t gfp_mask;
 
-	if (efi_enabled(EFI_OLD_MEMMAP))
+	if (efi_have_uv1_memmap())
 		return 0;
 
 	gfp_mask = GFP_KERNEL | __GFP_ZERO;
@@ -252,7 +116,7 @@ void efi_sync_low_kernel_mappings(void)
 	pud_t *pud_k, *pud_efi;
 	pgd_t *efi_pgd = efi_mm.pgd;
 
-	if (efi_enabled(EFI_OLD_MEMMAP))
+	if (efi_have_uv1_memmap())
 		return;
 
 	/*
@@ -346,7 +210,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	unsigned npages;
 	pgd_t *pgd = efi_mm.pgd;
 
-	if (efi_enabled(EFI_OLD_MEMMAP))
+	if (efi_have_uv1_memmap())
 		return 0;
 
 	/*
@@ -373,10 +237,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	 * as trim_bios_range() will reserve the first page and isolate it away
 	 * from memory allocators anyway.
 	 */
-	pf = _PAGE_RW;
-	if (sev_active())
-		pf |= _PAGE_ENC;
-
 	if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) {
 		pr_err("Failed to create 1:1 mapping for the first page!\n");
 		return 1;
@@ -388,21 +248,22 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	 * text and allocate a new stack because we can't rely on the
 	 * stack pointer being < 4GB.
 	 */
-	if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native())
+	if (!efi_is_mixed())
 		return 0;
 
 	page = alloc_page(GFP_KERNEL|__GFP_DMA32);
-	if (!page)
-		panic("Unable to allocate EFI runtime stack < 4GB\n");
+	if (!page) {
+		pr_err("Unable to allocate EFI runtime stack < 4GB\n");
+		return 1;
+	}
 
-	efi_scratch.phys_stack = virt_to_phys(page_address(page));
-	efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */
+	efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */
 
-	npages = (_etext - _text) >> PAGE_SHIFT;
+	npages = (__end_rodata_aligned - _text) >> PAGE_SHIFT;
 	text = __pa(_text);
 	pfn = text >> PAGE_SHIFT;
 
-	pf = _PAGE_RW | _PAGE_ENC;
+	pf = _PAGE_ENC;
 	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) {
 		pr_err("Failed to map kernel text 1:1\n");
 		return 1;
@@ -417,6 +278,22 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va)
 	unsigned long pfn;
 	pgd_t *pgd = efi_mm.pgd;
 
+	/*
+	 * EFI_RUNTIME_SERVICES_CODE regions typically cover PE/COFF
+	 * executable images in memory that consist of both R-X and
+	 * RW- sections, so we cannot apply read-only or non-exec
+	 * permissions just yet. However, modern EFI systems provide
+	 * a memory attributes table that describes those sections
+	 * with the appropriate restricted permissions, which are
+	 * applied in efi_runtime_update_mappings() below. All other
+	 * regions can be mapped non-executable at this point, with
+	 * the exception of boot services code regions, but those will
+	 * be unmapped again entirely in efi_free_boot_services().
+	 */
+	if (md->type != EFI_BOOT_SERVICES_CODE &&
+	    md->type != EFI_RUNTIME_SERVICES_CODE)
+		flags |= _PAGE_NX;
+
 	if (!(md->attribute & EFI_MEMORY_WB))
 		flags |= _PAGE_PCD;
 
@@ -434,7 +311,7 @@ void __init efi_map_region(efi_memory_desc_t *md)
 	unsigned long size = md->num_pages << PAGE_SHIFT;
 	u64 pa = md->phys_addr;
 
-	if (efi_enabled(EFI_OLD_MEMMAP))
+	if (efi_have_uv1_memmap())
 		return old_map_region(md);
 
 	/*
@@ -449,7 +326,7 @@ void __init efi_map_region(efi_memory_desc_t *md)
 	 * booting in EFI mixed mode, because even though we may be
 	 * running a 64-bit kernel, the firmware may only be 32-bit.
 	 */
-	if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED)) {
+	if (efi_is_mixed()) {
 		md->virt_addr = md->phys_addr;
 		return;
 	}
@@ -491,26 +368,6 @@ void __init efi_map_region_fixed(efi_memory_desc_t *md)
 	__map_region(md, md->virt_addr);
 }
 
-void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
-				 u32 type, u64 attribute)
-{
-	unsigned long last_map_pfn;
-
-	if (type == EFI_MEMORY_MAPPED_IO)
-		return ioremap(phys_addr, size);
-
-	last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
-	if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
-		unsigned long top = last_map_pfn << PAGE_SHIFT;
-		efi_ioremap(top, size - (top - phys_addr), type, attribute);
-	}
-
-	if (!(attribute & EFI_MEMORY_WB))
-		efi_memory_uc((u64)(unsigned long)__va(phys_addr), size);
-
-	return (void __iomem *)__va(phys_addr);
-}
-
 void __init parse_efi_setup(u64 phys_addr, u32 data_len)
 {
 	efi_setup = phys_addr + sizeof(struct setup_data);
@@ -559,7 +416,7 @@ void __init efi_runtime_update_mappings(void)
 {
 	efi_memory_desc_t *md;
 
-	if (efi_enabled(EFI_OLD_MEMMAP)) {
+	if (efi_have_uv1_memmap()) {
 		if (__supported_pte_mask & _PAGE_NX)
 			runtime_code_page_mkexec();
 		return;
@@ -613,7 +470,7 @@ void __init efi_runtime_update_mappings(void)
 void __init efi_dump_pagetable(void)
 {
 #ifdef CONFIG_EFI_PGT_DUMP
-	if (efi_enabled(EFI_OLD_MEMMAP))
+	if (efi_have_uv1_memmap())
 		ptdump_walk_pgd_level(NULL, swapper_pg_dir);
 	else
 		ptdump_walk_pgd_level(NULL, efi_mm.pgd);
@@ -634,63 +491,74 @@ void efi_switch_mm(struct mm_struct *mm)
 	switch_mm(efi_scratch.prev_mm, mm, NULL);
 }
 
-#ifdef CONFIG_EFI_MIXED
-extern efi_status_t efi64_thunk(u32, ...);
-
 static DEFINE_SPINLOCK(efi_runtime_lock);
 
-#define runtime_service32(func)						 \
-({									 \
-	u32 table = (u32)(unsigned long)efi.systab;			 \
-	u32 *rt, *___f;							 \
-									 \
-	rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime));	 \
-	___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \
-	*___f;								 \
+/*
+ * DS and ES contain user values.  We need to save them.
+ * The 32-bit EFI code needs a valid DS, ES, and SS.  There's no
+ * need to save the old SS: __KERNEL_DS is always acceptable.
+ */
+#define __efi_thunk(func, ...)						\
+({									\
+	efi_runtime_services_32_t *__rt;				\
+	unsigned short __ds, __es;					\
+	efi_status_t ____s;						\
+									\
+	__rt = (void *)(unsigned long)efi.systab->mixed_mode.runtime;	\
+									\
+	savesegment(ds, __ds);						\
+	savesegment(es, __es);						\
+									\
+	loadsegment(ss, __KERNEL_DS);					\
+	loadsegment(ds, __KERNEL_DS);					\
+	loadsegment(es, __KERNEL_DS);					\
+									\
+	____s = efi64_thunk(__rt->func, __VA_ARGS__);			\
+									\
+	loadsegment(ds, __ds);						\
+	loadsegment(es, __es);						\
+									\
+	____s ^= (____s & BIT(31)) | (____s & BIT_ULL(31)) << 32;	\
+	____s;								\
 })
 
 /*
  * Switch to the EFI page tables early so that we can access the 1:1
  * runtime services mappings which are not mapped in any other page
- * tables. This function must be called before runtime_service32().
+ * tables.
  *
  * Also, disable interrupts because the IDT points to 64-bit handlers,
  * which aren't going to function correctly when we switch to 32-bit.
  */
-#define efi_thunk(f, ...)						\
+#define efi_thunk(func...)						\
 ({									\
 	efi_status_t __s;						\
-	u32 __func;							\
 									\
 	arch_efi_call_virt_setup();					\
 									\
-	__func = runtime_service32(f);					\
-	__s = efi64_thunk(__func, __VA_ARGS__);				\
+	__s = __efi_thunk(func);					\
 									\
 	arch_efi_call_virt_teardown();					\
 									\
 	__s;								\
 })
 
-efi_status_t efi_thunk_set_virtual_address_map(
-	void *phys_set_virtual_address_map,
-	unsigned long memory_map_size,
-	unsigned long descriptor_size,
-	u32 descriptor_version,
-	efi_memory_desc_t *virtual_map)
+static efi_status_t __init __no_sanitize_address
+efi_thunk_set_virtual_address_map(unsigned long memory_map_size,
+				  unsigned long descriptor_size,
+				  u32 descriptor_version,
+				  efi_memory_desc_t *virtual_map)
 {
 	efi_status_t status;
 	unsigned long flags;
-	u32 func;
 
 	efi_sync_low_kernel_mappings();
 	local_irq_save(flags);
 
 	efi_switch_mm(&efi_mm);
 
-	func = (u32)(unsigned long)phys_set_virtual_address_map;
-	status = efi64_thunk(func, memory_map_size, descriptor_size,
-			     descriptor_version, virtual_map);
+	status = __efi_thunk(set_virtual_address_map, memory_map_size,
+			     descriptor_size, descriptor_version, virtual_map);
 
 	efi_switch_mm(efi_scratch.prev_mm);
 	local_irq_restore(flags);
@@ -993,8 +861,11 @@ efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules,
 	return EFI_UNSUPPORTED;
 }
 
-void efi_thunk_runtime_setup(void)
+void __init efi_thunk_runtime_setup(void)
 {
+	if (!IS_ENABLED(CONFIG_EFI_MIXED))
+		return;
+
 	efi.get_time = efi_thunk_get_time;
 	efi.set_time = efi_thunk_set_time;
 	efi.get_wakeup_time = efi_thunk_get_wakeup_time;
@@ -1010,4 +881,46 @@ void efi_thunk_runtime_setup(void)
 	efi.update_capsule = efi_thunk_update_capsule;
 	efi.query_capsule_caps = efi_thunk_query_capsule_caps;
 }
-#endif /* CONFIG_EFI_MIXED */
+
+efi_status_t __init __no_sanitize_address
+efi_set_virtual_address_map(unsigned long memory_map_size,
+			    unsigned long descriptor_size,
+			    u32 descriptor_version,
+			    efi_memory_desc_t *virtual_map)
+{
+	efi_status_t status;
+	unsigned long flags;
+	pgd_t *save_pgd = NULL;
+
+	if (efi_is_mixed())
+		return efi_thunk_set_virtual_address_map(memory_map_size,
+							 descriptor_size,
+							 descriptor_version,
+							 virtual_map);
+
+	if (efi_have_uv1_memmap()) {
+		save_pgd = efi_uv1_memmap_phys_prolog();
+		if (!save_pgd)
+			return EFI_ABORTED;
+	} else {
+		efi_switch_mm(&efi_mm);
+	}
+
+	kernel_fpu_begin();
+
+	/* Disable interrupts around EFI calls: */
+	local_irq_save(flags);
+	status = efi_call(efi.systab->runtime->set_virtual_address_map,
+			  memory_map_size, descriptor_size,
+			  descriptor_version, virtual_map);
+	local_irq_restore(flags);
+
+	kernel_fpu_end();
+
+	if (save_pgd)
+		efi_uv1_memmap_phys_epilog(save_pgd);
+	else
+		efi_switch_mm(efi_scratch.prev_mm);
+
+	return status;
+}
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
index ab2e91e76894..75c46e7a809f 100644
--- a/arch/x86/platform/efi/efi_stub_32.S
+++ b/arch/x86/platform/efi/efi_stub_32.S
@@ -7,118 +7,43 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/init.h>
 #include <asm/page_types.h>
 
-/*
- * efi_call_phys(void *, ...) is a function with variable parameters.
- * All the callers of this function assure that all the parameters are 4-bytes.
- */
-
-/*
- * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
- * So we'd better save all of them at the beginning of this function and restore
- * at the end no matter how many we use, because we can not assure EFI runtime
- * service functions will comply with gcc calling convention, too.
- */
+	__INIT
+SYM_FUNC_START(efi_call_svam)
+	push	8(%esp)
+	push	8(%esp)
+	push	%ecx
+	push	%edx
 
-.text
-ENTRY(efi_call_phys)
 	/*
-	 * 0. The function can only be called in Linux kernel. So CS has been
-	 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
-	 * the values of these registers are the same. And, the corresponding
-	 * GDT entries are identical. So I will do nothing about segment reg
-	 * and GDT, but change GDT base register in prolog and epilog.
-	 */
-
-	/*
-	 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
-	 * But to make it smoothly switch from virtual mode to flat mode.
-	 * The mapping of lower virtual memory has been created in prolog and
-	 * epilog.
+	 * Switch to the flat mapped alias of this routine, by jumping to the
+	 * address of label '1' after subtracting PAGE_OFFSET from it.
 	 */
 	movl	$1f, %edx
 	subl	$__PAGE_OFFSET, %edx
 	jmp	*%edx
 1:
 
-	/*
-	 * 2. Now on the top of stack is the return
-	 * address in the caller of efi_call_phys(), then parameter 1,
-	 * parameter 2, ..., param n. To make things easy, we save the return
-	 * address of efi_call_phys in a global variable.
-	 */
-	popl	%edx
-	movl	%edx, saved_return_addr
-	/* get the function pointer into ECX*/
-	popl	%ecx
-	movl	%ecx, efi_rt_function_ptr
-	movl	$2f, %edx
-	subl	$__PAGE_OFFSET, %edx
-	pushl	%edx
-
-	/*
-	 * 3. Clear PG bit in %CR0.
-	 */
+	/* disable paging */
 	movl	%cr0, %edx
 	andl	$0x7fffffff, %edx
 	movl	%edx, %cr0
-	jmp	1f
-1:
 
-	/*
-	 * 4. Adjust stack pointer.
-	 */
+	/* convert the stack pointer to a flat mapped address */
 	subl	$__PAGE_OFFSET, %esp
 
-	/*
-	 * 5. Call the physical function.
-	 */
-	jmp	*%ecx
+	/* call the EFI routine */
+	call	*(%eax)
 
-2:
-	/*
-	 * 6. After EFI runtime service returns, control will return to
-	 * following instruction. We'd better readjust stack pointer first.
-	 */
-	addl	$__PAGE_OFFSET, %esp
+	/* convert ESP back to a kernel VA, and pop the outgoing args */
+	addl	$__PAGE_OFFSET + 16, %esp
 
-	/*
-	 * 7. Restore PG bit
-	 */
+	/* re-enable paging */
 	movl	%cr0, %edx
 	orl	$0x80000000, %edx
 	movl	%edx, %cr0
-	jmp	1f
-1:
-	/*
-	 * 8. Now restore the virtual mode from flat mode by
-	 * adding EIP with PAGE_OFFSET.
-	 */
-	movl	$1f, %edx
-	jmp	*%edx
-1:
-
-	/*
-	 * 9. Balance the stack. And because EAX contain the return value,
-	 * we'd better not clobber it.
-	 */
-	leal	efi_rt_function_ptr, %edx
-	movl	(%edx), %ecx
-	pushl	%ecx
 
-	/*
-	 * 10. Push the saved return address onto the stack and return.
-	 */
-	leal	saved_return_addr, %edx
-	movl	(%edx), %ecx
-	pushl	%ecx
 	ret
-ENDPROC(efi_call_phys)
-.previous
-
-.data
-saved_return_addr:
-	.long 0
-efi_rt_function_ptr:
-	.long 0
+SYM_FUNC_END(efi_call_svam)
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 74628ec78f29..15da118f04f0 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -8,41 +8,12 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/msr.h>
-#include <asm/processor-flags.h>
-#include <asm/page_types.h>
+#include <asm/nospec-branch.h>
 
-#define SAVE_XMM			\
-	mov %rsp, %rax;			\
-	subq $0x70, %rsp;		\
-	and $~0xf, %rsp;		\
-	mov %rax, (%rsp);		\
-	mov %cr0, %rax;			\
-	clts;				\
-	mov %rax, 0x8(%rsp);		\
-	movaps %xmm0, 0x60(%rsp);	\
-	movaps %xmm1, 0x50(%rsp);	\
-	movaps %xmm2, 0x40(%rsp);	\
-	movaps %xmm3, 0x30(%rsp);	\
-	movaps %xmm4, 0x20(%rsp);	\
-	movaps %xmm5, 0x10(%rsp)
-
-#define RESTORE_XMM			\
-	movaps 0x60(%rsp), %xmm0;	\
-	movaps 0x50(%rsp), %xmm1;	\
-	movaps 0x40(%rsp), %xmm2;	\
-	movaps 0x30(%rsp), %xmm3;	\
-	movaps 0x20(%rsp), %xmm4;	\
-	movaps 0x10(%rsp), %xmm5;	\
-	mov 0x8(%rsp), %rsi;		\
-	mov %rsi, %cr0;			\
-	mov (%rsp), %rsp
-
-ENTRY(efi_call)
+SYM_FUNC_START(__efi_call)
 	pushq %rbp
 	movq %rsp, %rbp
-	SAVE_XMM
+	and $~0xf, %rsp
 	mov 16(%rbp), %rax
 	subq $48, %rsp
 	mov %r9, 32(%rsp)
@@ -50,9 +21,7 @@ ENTRY(efi_call)
 	mov %r8, %r9
 	mov %rcx, %r8
 	mov %rsi, %rcx
-	call *%rdi
-	addq $48, %rsp
-	RESTORE_XMM
-	popq %rbp
+	CALL_NOSPEC %rdi
+	leave
 	ret
-ENDPROC(efi_call)
+SYM_FUNC_END(__efi_call)
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
index 46c58b08739c..26f0da238c1c 100644
--- a/arch/x86/platform/efi/efi_thunk_64.S
+++ b/arch/x86/platform/efi/efi_thunk_64.S
@@ -25,15 +25,16 @@
 
 	.text
 	.code64
-ENTRY(efi64_thunk)
+SYM_CODE_START(__efi64_thunk)
 	push	%rbp
 	push	%rbx
 
 	/*
 	 * Switch to 1:1 mapped 32-bit stack pointer.
 	 */
-	movq	%rsp, efi_saved_sp(%rip)
+	movq	%rsp, %rax
 	movq	efi_scratch(%rip), %rsp
+	push	%rax
 
 	/*
 	 * Calculate the physical address of the kernel text.
@@ -41,113 +42,31 @@ ENTRY(efi64_thunk)
 	movq	$__START_KERNEL_map, %rax
 	subq	phys_base(%rip), %rax
 
-	/*
-	 * Push some physical addresses onto the stack. This is easier
-	 * to do now in a code64 section while the assembler can address
-	 * 64-bit values. Note that all the addresses on the stack are
-	 * 32-bit.
-	 */
-	subq	$16, %rsp
-	leaq	efi_exit32(%rip), %rbx
-	subq	%rax, %rbx
-	movl	%ebx, 8(%rsp)
-
-	leaq	__efi64_thunk(%rip), %rbx
+	leaq	1f(%rip), %rbp
+	leaq	2f(%rip), %rbx
+	subq	%rax, %rbp
 	subq	%rax, %rbx
-	call	*%rbx
-
-	movq	efi_saved_sp(%rip), %rsp
-	pop	%rbx
-	pop	%rbp
-	retq
-ENDPROC(efi64_thunk)
 
-/*
- * We run this function from the 1:1 mapping.
- *
- * This function must be invoked with a 1:1 mapped stack.
- */
-ENTRY(__efi64_thunk)
-	movl	%ds, %eax
-	push	%rax
-	movl	%es, %eax
-	push	%rax
-	movl	%ss, %eax
-	push	%rax
-
-	subq	$32, %rsp
-	movl	%esi, 0x0(%rsp)
-	movl	%edx, 0x4(%rsp)
-	movl	%ecx, 0x8(%rsp)
-	movq	%r8, %rsi
-	movl	%esi, 0xc(%rsp)
-	movq	%r9, %rsi
-	movl	%esi,  0x10(%rsp)
-
-	leaq	1f(%rip), %rbx
-	movq	%rbx, func_rt_ptr(%rip)
+	subq	$28, %rsp
+	movl	%ebx, 0x0(%rsp)		/* return address */
+	movl	%esi, 0x4(%rsp)
+	movl	%edx, 0x8(%rsp)
+	movl	%ecx, 0xc(%rsp)
+	movl	%r8d, 0x10(%rsp)
+	movl	%r9d, 0x14(%rsp)
 
 	/* Switch to 32-bit descriptor */
 	pushq	$__KERNEL32_CS
-	leaq	efi_enter32(%rip), %rax
-	pushq	%rax
+	pushq	%rdi			/* EFI runtime service address */
 	lretq
 
-1:	addq	$32, %rsp
-
+1:	movq	24(%rsp), %rsp
 	pop	%rbx
-	movl	%ebx, %ss
-	pop	%rbx
-	movl	%ebx, %es
-	pop	%rbx
-	movl	%ebx, %ds
-
-	/*
-	 * Convert 32-bit status code into 64-bit.
-	 */
-	test	%rax, %rax
-	jz	1f
-	movl	%eax, %ecx
-	andl	$0x0fffffff, %ecx
-	andl	$0xf0000000, %eax
-	shl	$32, %rax
-	or	%rcx, %rax
-1:
-	ret
-ENDPROC(__efi64_thunk)
-
-ENTRY(efi_exit32)
-	movq	func_rt_ptr(%rip), %rax
-	push	%rax
-	mov	%rdi, %rax
-	ret
-ENDPROC(efi_exit32)
+	pop	%rbp
+	retq
 
 	.code32
-/*
- * EFI service pointer must be in %edi.
- *
- * The stack should represent the 32-bit calling convention.
- */
-ENTRY(efi_enter32)
-	movl	$__KERNEL_DS, %eax
-	movl	%eax, %ds
-	movl	%eax, %es
-	movl	%eax, %ss
-
-	call	*%edi
-
-	/* We must preserve return value */
-	movl	%eax, %edi
-
-	movl	72(%esp), %eax
-	pushl	$__KERNEL_CS
-	pushl	%eax
-
+2:	pushl	$__KERNEL_CS
+	pushl	%ebp
 	lret
-ENDPROC(efi_enter32)
-
-	.data
-	.balign	8
-func_rt_ptr:		.quad 0
-efi_saved_sp:		.quad 0
+SYM_CODE_END(__efi64_thunk)
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 3b9fd679cea9..88d32c06cffa 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -16,6 +16,7 @@
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
 #include <asm/cpu_device_id.h>
+#include <asm/realmode.h>
 #include <asm/reboot.h>
 
 #define EFI_MIN_RESERVE 5120
@@ -243,7 +244,7 @@ EXPORT_SYMBOL_GPL(efi_query_variable_store);
  */
 void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
 {
-	phys_addr_t new_phys, new_size;
+	struct efi_memory_map_data data = { 0 };
 	struct efi_mem_range mr;
 	efi_memory_desc_t md;
 	int num_entries;
@@ -260,10 +261,6 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
 		return;
 	}
 
-	/* No need to reserve regions that will never be freed. */
-	if (md.attribute & EFI_MEMORY_RUNTIME)
-		return;
-
 	size += addr % EFI_PAGE_SIZE;
 	size = round_up(size, EFI_PAGE_SIZE);
 	addr = round_down(addr, EFI_PAGE_SIZE);
@@ -275,24 +272,23 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
 	num_entries = efi_memmap_split_count(&md, &mr.range);
 	num_entries += efi.memmap.nr_map;
 
-	new_size = efi.memmap.desc_size * num_entries;
-
-	new_phys = efi_memmap_alloc(num_entries);
-	if (!new_phys) {
+	if (efi_memmap_alloc(num_entries, &data) != 0) {
 		pr_err("Could not allocate boot services memmap\n");
 		return;
 	}
 
-	new = early_memremap(new_phys, new_size);
+	new = early_memremap(data.phys_map, data.size);
 	if (!new) {
 		pr_err("Failed to map new boot services memmap\n");
 		return;
 	}
 
 	efi_memmap_insert(&efi.memmap, new, &mr);
-	early_memunmap(new, new_size);
+	early_memunmap(new, data.size);
 
-	efi_memmap_install(new_phys, num_entries);
+	efi_memmap_install(&data);
+	e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
+	e820__update_table(e820_table);
 }
 
 /*
@@ -320,6 +316,9 @@ void __init efi_reserve_boot_services(void)
 {
 	efi_memory_desc_t *md;
 
+	if (!efi_enabled(EFI_MEMMAP))
+		return;
+
 	for_each_efi_memory_desc(md) {
 		u64 start = md->phys_addr;
 		u64 size = md->num_pages << EFI_PAGE_SHIFT;
@@ -383,10 +382,10 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md)
 
 	/*
 	 * To Do: Remove this check after adding functionality to unmap EFI boot
-	 * services code/data regions from direct mapping area because
-	 * "efi=old_map" maps EFI regions in swapper_pg_dir.
+	 * services code/data regions from direct mapping area because the UV1
+	 * memory map maps EFI regions in swapper_pg_dir.
 	 */
-	if (efi_enabled(EFI_OLD_MEMMAP))
+	if (efi_have_uv1_memmap())
 		return;
 
 	/*
@@ -394,7 +393,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md)
 	 * EFI runtime calls, hence don't unmap EFI boot services code/data
 	 * regions.
 	 */
-	if (!efi_is_native())
+	if (efi_is_mixed())
 		return;
 
 	if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages))
@@ -406,7 +405,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md)
 
 void __init efi_free_boot_services(void)
 {
-	phys_addr_t new_phys, new_size;
+	struct efi_memory_map_data data = { 0 };
 	efi_memory_desc_t *md;
 	int num_entries = 0;
 	void *new, *new_md;
@@ -461,14 +460,12 @@ void __init efi_free_boot_services(void)
 	if (!num_entries)
 		return;
 
-	new_size = efi.memmap.desc_size * num_entries;
-	new_phys = efi_memmap_alloc(num_entries);
-	if (!new_phys) {
+	if (efi_memmap_alloc(num_entries, &data) != 0) {
 		pr_err("Failed to allocate new EFI memmap\n");
 		return;
 	}
 
-	new = memremap(new_phys, new_size, MEMREMAP_WB);
+	new = memremap(data.phys_map, data.size, MEMREMAP_WB);
 	if (!new) {
 		pr_err("Failed to map new EFI memmap\n");
 		return;
@@ -492,7 +489,7 @@ void __init efi_free_boot_services(void)
 
 	memunmap(new);
 
-	if (efi_memmap_install(new_phys, num_entries)) {
+	if (efi_memmap_install(&data) != 0) {
 		pr_err("Could not install new EFI memmap\n");
 		return;
 	}
@@ -557,7 +554,7 @@ out:
 	return ret;
 }
 
-static const struct dmi_system_id sgi_uv1_dmi[] = {
+static const struct dmi_system_id sgi_uv1_dmi[] __initconst = {
 	{ NULL, "SGI UV1",
 		{	DMI_MATCH(DMI_PRODUCT_NAME,	"Stoutland Platform"),
 			DMI_MATCH(DMI_PRODUCT_VERSION,	"1.0"),
@@ -580,8 +577,15 @@ void __init efi_apply_memmap_quirks(void)
 	}
 
 	/* UV2+ BIOS has a fix for this issue.  UV1 still needs the quirk. */
-	if (dmi_check_system(sgi_uv1_dmi))
-		set_bit(EFI_OLD_MEMMAP, &efi.flags);
+	if (dmi_check_system(sgi_uv1_dmi)) {
+		if (IS_ENABLED(CONFIG_X86_UV)) {
+			set_bit(EFI_UV1_MEMMAP, &efi.flags);
+		} else {
+			pr_warn("EFI runtime disabled, needs CONFIG_X86_UV=y on UV1\n");
+			clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+			efi_memmap_unmap();
+		}
+	}
 }
 
 /*
@@ -719,7 +723,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
 	/*
 	 * Make sure that an efi runtime service caused the page fault.
 	 * "efi_mm" cannot be used to check if the page fault had occurred
-	 * in the firmware context because efi=old_map doesn't use efi_pgd.
+	 * in the firmware context because the UV1 memmap doesn't use efi_pgd.
 	 */
 	if (efi_rts_work.efi_rts_id == EFI_NONE)
 		return;
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
index 44d1f884c3d3..139738bbdd36 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
@@ -6,21 +6,31 @@
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
  */
 
-#include <linux/gpio.h>
-#include <linux/platform_data/tc35876x.h>
+#include <linux/gpio/machine.h>
 #include <asm/intel-mid.h>
 
+static struct gpiod_lookup_table tc35876x_gpio_table = {
+	.dev_id	= "i2c_disp_brig",
+	.table	= {
+		GPIO_LOOKUP("0000:00:0c.0", -1, "bridge-reset", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("0000:00:0c.0", -1, "bl-en", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("0000:00:0c.0", -1, "vadd", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 /*tc35876x DSI_LVDS bridge chip and panel platform data*/
 static void *tc35876x_platform_data(void *data)
 {
-	static struct tc35876x_platform_data pdata;
+	struct gpiod_lookup_table *table = &tc35876x_gpio_table;
+	struct gpiod_lookup *lookup = table->table;
 
-	/* gpio pins set to -1 will not be used by the driver */
-	pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN");
-	pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN");
-	pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3");
+	lookup[0].chip_hwnum = get_gpio_by_name("LCMB_RXEN");
+	lookup[1].chip_hwnum = get_gpio_by_name("6S6P_BL_EN");
+	lookup[2].chip_hwnum = get_gpio_by_name("EN_VREG_LCD_V3P3");
+	gpiod_add_lookup_table(table);
 
-	return &pdata;
+	return NULL;
 }
 
 static const struct devs_id tc35876x_dev_id __initconst = {
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index 6dd25dc5f027..e9d97d52475e 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -29,6 +29,8 @@
 #include <asm/cpu_device_id.h>
 #include <asm/imr.h>
 #include <asm/iosf_mbi.h>
+#include <asm/io.h>
+
 #include <linux/debugfs.h>
 #include <linux/init.h>
 #include <linux/mm.h>
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index 42f879b75f9b..4307830e1b6f 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -14,6 +14,8 @@
 #include <asm-generic/sections.h>
 #include <asm/cpu_device_id.h>
 #include <asm/imr.h>
+#include <asm/io.h>
+
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/types.h>
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index e1a32062a375..f067ac780ba7 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -12,7 +12,6 @@
 #include <linux/platform_device.h>
 #include <linux/export.h>
 #include <linux/pm.h>
-#include <linux/mfd/core.h>
 #include <linux/suspend.h>
 #include <linux/olpc-ec.h>
 
@@ -120,16 +119,11 @@ static const struct platform_suspend_ops xo1_suspend_ops = {
 static int xo1_pm_probe(struct platform_device *pdev)
 {
 	struct resource *res;
-	int err;
 
 	/* don't run on non-XOs */
 	if (!machine_is_olpc())
 		return -ENODEV;
 
-	err = mfd_cell_enable(pdev);
-	if (err)
-		return err;
-
 	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
 	if (!res) {
 		dev_err(&pdev->dev, "can't fetch device resource info\n");
@@ -152,8 +146,6 @@ static int xo1_pm_probe(struct platform_device *pdev)
 
 static int xo1_pm_remove(struct platform_device *pdev)
 {
-	mfd_cell_disable(pdev);
-
 	if (strcmp(pdev->name, "cs5535-pms") == 0)
 		pms_base = 0;
 	else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0)
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 99a28ce2244c..933dd4fe3a97 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -15,7 +15,6 @@
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/pm_wakeup.h>
-#include <linux/mfd/core.h>
 #include <linux/power_supply.h>
 #include <linux/suspend.h>
 #include <linux/workqueue.h>
@@ -537,10 +536,6 @@ static int xo1_sci_probe(struct platform_device *pdev)
 	if (!machine_is_olpc())
 		return -ENODEV;
 
-	r = mfd_cell_enable(pdev);
-	if (r)
-		return r;
-
 	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
 	if (!res) {
 		dev_err(&pdev->dev, "can't fetch device resource info\n");
@@ -605,7 +600,6 @@ err_ebook:
 
 static int xo1_sci_remove(struct platform_device *pdev)
 {
-	mfd_cell_disable(pdev);
 	free_irq(sci_irq, pdev);
 	cancel_work_sync(&sci_work);
 	free_ec_sci();
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 6d193bb36021..089413cd944e 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -39,7 +39,7 @@ static int set_lid_wake_behavior(bool wake_on_close)
 
 	status = acpi_execute_simple_method(NULL, "\\_SB.PCI0.LID.LIDW", wake_on_close);
 	if (ACPI_FAILURE(status)) {
-		pr_warning(PFX "failed to set lid behavior\n");
+		pr_warn(PFX "failed to set lid behavior\n");
 		return 1;
 	}
 
diff --git a/arch/x86/platform/olpc/xo1-wakeup.S b/arch/x86/platform/olpc/xo1-wakeup.S
index 5fee3a2c2fd4..75f4faff8468 100644
--- a/arch/x86/platform/olpc/xo1-wakeup.S
+++ b/arch/x86/platform/olpc/xo1-wakeup.S
@@ -90,7 +90,7 @@ restore_registers:
 
 	ret
 
-ENTRY(do_olpc_suspend_lowlevel)
+SYM_CODE_START(do_olpc_suspend_lowlevel)
 	call	save_processor_state
 	call	save_registers
 
@@ -110,6 +110,7 @@ ret_point:
 	call	restore_registers
 	call	restore_processor_state
 	ret
+SYM_CODE_END(do_olpc_suspend_lowlevel)
 
 .data
 saved_gdt:             .long   0,0
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index 1f8825bbaffb..43b4d864817e 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -50,7 +50,7 @@
 #define PVH_DS_SEL		(PVH_GDT_ENTRY_DS * 8)
 #define PVH_CANARY_SEL		(PVH_GDT_ENTRY_CANARY * 8)
 
-ENTRY(pvh_start_xen)
+SYM_CODE_START_LOCAL(pvh_start_xen)
 	cld
 
 	lgdt (_pa(gdt))
@@ -146,15 +146,16 @@ ENTRY(pvh_start_xen)
 
 	ljmp $PVH_CS_SEL, $_pa(startup_32)
 #endif
-END(pvh_start_xen)
+SYM_CODE_END(pvh_start_xen)
 
 	.section ".init.data","aw"
 	.balign 8
-gdt:
+SYM_DATA_START_LOCAL(gdt)
 	.word gdt_end - gdt_start
 	.long _pa(gdt_start)
 	.word 0
-gdt_start:
+SYM_DATA_END(gdt)
+SYM_DATA_START_LOCAL(gdt_start)
 	.quad 0x0000000000000000            /* NULL descriptor */
 #ifdef CONFIG_X86_64
 	.quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* PVH_CS_SEL */
@@ -163,15 +164,14 @@ gdt_start:
 #endif
 	.quad GDT_ENTRY(0xc092, 0, 0xfffff) /* PVH_DS_SEL */
 	.quad GDT_ENTRY(0x4090, 0, 0x18)    /* PVH_CANARY_SEL */
-gdt_end:
+SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end)
 
 	.balign 16
-canary:
-	.fill 48, 1, 0
+SYM_DATA_LOCAL(canary, .fill 48, 1, 0)
 
-early_stack:
+SYM_DATA_START_LOCAL(early_stack)
 	.fill BOOT_STACK_SIZE, 1, 0
-early_stack_end:
+SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
 
 	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
 	             _ASM_PTR (pvh_start_xen - __START_KERNEL_map))
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index bf6016f8db4e..6259563760f9 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -26,8 +26,7 @@ static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 static void __init mp_sfi_register_lapic(u8 id)
 {
 	if (MAX_LOCAL_APIC - id <= 0) {
-		pr_warning("Processor #%d invalid (max %d)\n",
-			id, MAX_LOCAL_APIC);
+		pr_warn("Processor #%d invalid (max %d)\n", id, MAX_LOCAL_APIC);
 		return;
 	}
 
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index c2ee31953372..607f58147311 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -31,13 +31,16 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
 		return BIOS_STATUS_UNIMPLEMENTED;
 
 	/*
-	 * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI
+	 * If EFI_UV1_MEMMAP is set, we need to fall back to using our old EFI
 	 * callback method, which uses efi_call() directly, with the kernel page tables:
 	 */
-	if (unlikely(efi_enabled(EFI_OLD_MEMMAP)))
+	if (unlikely(efi_enabled(EFI_UV1_MEMMAP))) {
+		kernel_fpu_begin();
 		ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5);
-	else
+		kernel_fpu_end();
+	} else {
 		ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
+	}
 
 	return ret;
 }
@@ -184,20 +187,20 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
 }
 EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
 
-void uv_bios_init(void)
+int uv_bios_init(void)
 {
 	uv_systab = NULL;
 	if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) ||
 	    !uv_systab_phys || efi_runtime_disabled()) {
 		pr_crit("UV: UVsystab: missing\n");
-		return;
+		return -EEXIST;
 	}
 
 	uv_systab = ioremap(uv_systab_phys, sizeof(struct uv_systab));
 	if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) {
 		pr_err("UV: UVsystab: bad signature!\n");
 		iounmap(uv_systab);
-		return;
+		return -EINVAL;
 	}
 
 	/* Starting with UV4 the UV systab size is variable */
@@ -208,8 +211,169 @@ void uv_bios_init(void)
 		uv_systab = ioremap(uv_systab_phys, size);
 		if (!uv_systab) {
 			pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
-			return;
+			return -EFAULT;
 		}
 	}
 	pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision);
+	return 0;
+}
+
+static void __init early_code_mapping_set_exec(int executable)
+{
+	efi_memory_desc_t *md;
+
+	if (!(__supported_pte_mask & _PAGE_NX))
+		return;
+
+	/* Make EFI service code area executable */
+	for_each_efi_memory_desc(md) {
+		if (md->type == EFI_RUNTIME_SERVICES_CODE ||
+		    md->type == EFI_BOOT_SERVICES_CODE)
+			efi_set_executable(md, executable);
+	}
+}
+
+void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd)
+{
+	/*
+	 * After the lock is released, the original page table is restored.
+	 */
+	int pgd_idx, i;
+	int nr_pgds;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+
+	nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+
+	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
+		pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
+		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+
+		if (!pgd_present(*pgd))
+			continue;
+
+		for (i = 0; i < PTRS_PER_P4D; i++) {
+			p4d = p4d_offset(pgd,
+					 pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
+
+			if (!p4d_present(*p4d))
+				continue;
+
+			pud = (pud_t *)p4d_page_vaddr(*p4d);
+			pud_free(&init_mm, pud);
+		}
+
+		p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+		p4d_free(&init_mm, p4d);
+	}
+
+	kfree(save_pgd);
+
+	__flush_tlb_all();
+	early_code_mapping_set_exec(0);
+}
+
+pgd_t * __init efi_uv1_memmap_phys_prolog(void)
+{
+	unsigned long vaddr, addr_pgd, addr_p4d, addr_pud;
+	pgd_t *save_pgd, *pgd_k, *pgd_efi;
+	p4d_t *p4d, *p4d_k, *p4d_efi;
+	pud_t *pud;
+
+	int pgd;
+	int n_pgds, i, j;
+
+	early_code_mapping_set_exec(1);
+
+	n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
+	save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
+	if (!save_pgd)
+		return NULL;
+
+	/*
+	 * Build 1:1 identity mapping for UV1 memmap usage. Note that
+	 * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while
+	 * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical
+	 * address X, the pud_index(X) != pud_index(__va(X)), we can only copy
+	 * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping.
+	 * This means here we can only reuse the PMD tables of the direct mapping.
+	 */
+	for (pgd = 0; pgd < n_pgds; pgd++) {
+		addr_pgd = (unsigned long)(pgd * PGDIR_SIZE);
+		vaddr = (unsigned long)__va(pgd * PGDIR_SIZE);
+		pgd_efi = pgd_offset_k(addr_pgd);
+		save_pgd[pgd] = *pgd_efi;
+
+		p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd);
+		if (!p4d) {
+			pr_err("Failed to allocate p4d table!\n");
+			goto out;
+		}
+
+		for (i = 0; i < PTRS_PER_P4D; i++) {
+			addr_p4d = addr_pgd + i * P4D_SIZE;
+			p4d_efi = p4d + p4d_index(addr_p4d);
+
+			pud = pud_alloc(&init_mm, p4d_efi, addr_p4d);
+			if (!pud) {
+				pr_err("Failed to allocate pud table!\n");
+				goto out;
+			}
+
+			for (j = 0; j < PTRS_PER_PUD; j++) {
+				addr_pud = addr_p4d + j * PUD_SIZE;
+
+				if (addr_pud > (max_pfn << PAGE_SHIFT))
+					break;
+
+				vaddr = (unsigned long)__va(addr_pud);
+
+				pgd_k = pgd_offset_k(vaddr);
+				p4d_k = p4d_offset(pgd_k, vaddr);
+				pud[j] = *pud_offset(p4d_k, vaddr);
+			}
+		}
+		pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
+	}
+
+	__flush_tlb_all();
+	return save_pgd;
+out:
+	efi_uv1_memmap_phys_epilog(save_pgd);
+	return NULL;
+}
+
+void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
+				 u32 type, u64 attribute)
+{
+	unsigned long last_map_pfn;
+
+	if (type == EFI_MEMORY_MAPPED_IO)
+		return ioremap(phys_addr, size);
+
+	last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
+	if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
+		unsigned long top = last_map_pfn << PAGE_SHIFT;
+		efi_ioremap(top, size - (top - phys_addr), type, attribute);
+	}
+
+	if (!(attribute & EFI_MEMORY_WB))
+		efi_memory_uc((u64)(unsigned long)__va(phys_addr), size);
+
+	return (void __iomem *)__va(phys_addr);
+}
+
+static int __init arch_parse_efi_cmdline(char *str)
+{
+	if (!str) {
+		pr_warn("need at least one option\n");
+		return -EINVAL;
+	}
+
+	if (!efi_is_mixed() && parse_option_str(str, "old_map"))
+		set_bit(EFI_UV1_MEMMAP, &efi.flags);
+
+	return 0;
 }
+early_param("efi", arch_parse_efi_cmdline);
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index 6fe383002125..8786653ad3c0 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -16,7 +16,7 @@
 
 .text
 
-ENTRY(swsusp_arch_suspend)
+SYM_FUNC_START(swsusp_arch_suspend)
 	movl %esp, saved_context_esp
 	movl %ebx, saved_context_ebx
 	movl %ebp, saved_context_ebp
@@ -33,9 +33,9 @@ ENTRY(swsusp_arch_suspend)
 	call swsusp_save
 	FRAME_END
 	ret
-ENDPROC(swsusp_arch_suspend)
+SYM_FUNC_END(swsusp_arch_suspend)
 
-ENTRY(restore_image)
+SYM_CODE_START(restore_image)
 	/* prepare to jump to the image kernel */
 	movl	restore_jump_address, %ebx
 	movl	restore_cr3, %ebp
@@ -45,9 +45,10 @@ ENTRY(restore_image)
 	/* jump to relocated restore code */
 	movl	relocated_restore_code, %eax
 	jmpl	*%eax
+SYM_CODE_END(restore_image)
 
 /* code below has been relocated to a safe page */
-ENTRY(core_restore_code)
+SYM_CODE_START(core_restore_code)
 	movl	temp_pgt, %eax
 	movl	%eax, %cr3
 
@@ -77,10 +78,11 @@ copy_loop:
 
 done:
 	jmpl	*%ebx
+SYM_CODE_END(core_restore_code)
 
 	/* code below belongs to the image kernel */
 	.align PAGE_SIZE
-ENTRY(restore_registers)
+SYM_FUNC_START(restore_registers)
 	/* go back to the original page tables */
 	movl	%ebp, %cr3
 	movl	mmu_cr4_features, %ecx
@@ -107,4 +109,4 @@ ENTRY(restore_registers)
 	movl	%eax, in_suspend
 
 	ret
-ENDPROC(restore_registers)
+SYM_FUNC_END(restore_registers)
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index a4d5eb0a7ece..7918b8415f13 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -22,7 +22,7 @@
 #include <asm/processor-flags.h>
 #include <asm/frame.h>
 
-ENTRY(swsusp_arch_suspend)
+SYM_FUNC_START(swsusp_arch_suspend)
 	movq	$saved_context, %rax
 	movq	%rsp, pt_regs_sp(%rax)
 	movq	%rbp, pt_regs_bp(%rax)
@@ -50,9 +50,9 @@ ENTRY(swsusp_arch_suspend)
 	call swsusp_save
 	FRAME_END
 	ret
-ENDPROC(swsusp_arch_suspend)
+SYM_FUNC_END(swsusp_arch_suspend)
 
-ENTRY(restore_image)
+SYM_CODE_START(restore_image)
 	/* prepare to jump to the image kernel */
 	movq	restore_jump_address(%rip), %r8
 	movq	restore_cr3(%rip), %r9
@@ -67,9 +67,10 @@ ENTRY(restore_image)
 	/* jump to relocated restore code */
 	movq	relocated_restore_code(%rip), %rcx
 	jmpq	*%rcx
+SYM_CODE_END(restore_image)
 
 	/* code below has been relocated to a safe page */
-ENTRY(core_restore_code)
+SYM_CODE_START(core_restore_code)
 	/* switch to temporary page tables */
 	movq	%rax, %cr3
 	/* flush TLB */
@@ -97,10 +98,11 @@ ENTRY(core_restore_code)
 .Ldone:
 	/* jump to the restore_registers address from the image header */
 	jmpq	*%r8
+SYM_CODE_END(core_restore_code)
 
 	 /* code below belongs to the image kernel */
 	.align PAGE_SIZE
-ENTRY(restore_registers)
+SYM_FUNC_START(restore_registers)
 	/* go back to the original page tables */
 	movq    %r9, %cr3
 
@@ -142,4 +144,4 @@ ENTRY(restore_registers)
 	movq	%rax, in_suspend(%rip)
 
 	ret
-ENDPROC(restore_registers)
+SYM_FUNC_END(restore_registers)
diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S
index 275a646d1048..0b4390ce586b 100644
--- a/arch/x86/purgatory/entry64.S
+++ b/arch/x86/purgatory/entry64.S
@@ -8,13 +8,13 @@
  * This code has been taken from kexec-tools.
  */
 
+#include <linux/linkage.h>
+
 	.text
 	.balign 16
 	.code64
-	.globl entry64, entry64_regs
-
 
-entry64:
+SYM_CODE_START(entry64)
 	/* Setup a gdt that should be preserved */
 	lgdt gdt(%rip)
 
@@ -54,10 +54,11 @@ new_cs_exit:
 
 	/* Jump to the new code... */
 	jmpq	*rip(%rip)
+SYM_CODE_END(entry64)
 
 	.section ".rodata"
 	.balign 4
-entry64_regs:
+SYM_DATA_START(entry64_regs)
 rax:	.quad 0x0
 rcx:	.quad 0x0
 rdx:	.quad 0x0
@@ -75,13 +76,14 @@ r13:	.quad 0x0
 r14:	.quad 0x0
 r15:	.quad 0x0
 rip:	.quad 0x0
-	.size entry64_regs, . - entry64_regs
+SYM_DATA_END(entry64_regs)
 
 	/* GDT */
 	.section ".rodata"
 	.balign 16
-gdt:
-	/* 0x00 unusable segment
+SYM_DATA_START_LOCAL(gdt)
+	/*
+	 * 0x00 unusable segment
 	 * 0x08 unused
 	 * so use them as gdt ptr
 	 */
@@ -94,6 +96,8 @@ gdt:
 
 	/* 0x18 4GB flat data segment */
 	.word 0xFFFF, 0x0000, 0x9200, 0x00CF
-gdt_end:
-stack:	.quad   0, 0
-stack_init:
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
+
+SYM_DATA_START_LOCAL(stack)
+	.quad   0, 0
+SYM_DATA_END_LABEL(stack, SYM_L_LOCAL, stack_init)
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 3b95410ff0f8..2961234d0795 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -14,28 +14,10 @@
 
 #include "../boot/string.h"
 
-unsigned long purgatory_backup_dest __section(.kexec-purgatory);
-unsigned long purgatory_backup_src __section(.kexec-purgatory);
-unsigned long purgatory_backup_sz __section(.kexec-purgatory);
-
 u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory);
 
 struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory);
 
-/*
- * On x86, second kernel requries first 640K of memory to boot. Copy
- * first 640K to a backup region in reserved memory range so that second
- * kernel can use first 640K.
- */
-static int copy_backup_region(void)
-{
-	if (purgatory_backup_dest) {
-		memcpy((void *)purgatory_backup_dest,
-		       (void *)purgatory_backup_src, purgatory_backup_sz);
-	}
-	return 0;
-}
-
 static int verify_sha256_digest(void)
 {
 	struct kexec_sha_region *ptr, *end;
@@ -66,7 +48,6 @@ void purgatory(void)
 		for (;;)
 			;
 	}
-	copy_backup_region();
 }
 
 /*
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index 321146be741d..89d9e9e53fcd 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -7,14 +7,14 @@
  *
  * This code has been taken from kexec-tools.
  */
+#include <linux/linkage.h>
 #include <asm/purgatory.h>
 
 	.text
-	.globl purgatory_start
 	.balign 16
-purgatory_start:
 	.code64
 
+SYM_CODE_START(purgatory_start)
 	/* Load a gdt so I know what the segment registers are */
 	lgdt	gdt(%rip)
 
@@ -32,10 +32,12 @@ purgatory_start:
 	/* Call the C code */
 	call purgatory
 	jmp	entry64
+SYM_CODE_END(purgatory_start)
 
 	.section ".rodata"
 	.balign 16
-gdt:	/* 0x00 unusable segment
+SYM_DATA_START_LOCAL(gdt)
+	/* 0x00 unusable segment
 	 * 0x08 unused
 	 * so use them as the gdt ptr
 	 */
@@ -48,10 +50,10 @@ gdt:	/* 0x00 unusable segment
 
 	/* 0x18 4GB flat data segment */
 	.word	0xFFFF, 0x0000, 0x9200, 0x00CF
-gdt_end:
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
 
 	.bss
 	.balign 4096
-lstack:
+SYM_DATA_START_LOCAL(lstack)
 	.skip 4096
-lstack_end:
+SYM_DATA_END_LABEL(lstack, SYM_L_LOCAL, lstack_end)
diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S
index 8b1427422dfc..1ef507ca50a5 100644
--- a/arch/x86/purgatory/stack.S
+++ b/arch/x86/purgatory/stack.S
@@ -5,13 +5,14 @@
  * Copyright (C) 2014 Red Hat Inc.
  */
 
+#include <linux/linkage.h>
+
 	/* A stack for the loaded kernel.
 	 * Separate and in the data section so it can be prepopulated.
 	 */
 	.data
 	.balign 4096
-	.globl stack, stack_end
 
-stack:
+SYM_DATA_START(stack)
 	.skip 4096
-stack_end:
+SYM_DATA_END_LABEL(stack, SYM_L_GLOBAL, stack_end)
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 7dce39c8c034..262f83cad355 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -8,6 +8,7 @@
 #include <asm/pgtable.h>
 #include <asm/realmode.h>
 #include <asm/tlbflush.h>
+#include <asm/crash.h>
 
 struct real_mode_header *real_mode_header;
 u32 *trampoline_cr4_features;
@@ -34,6 +35,7 @@ void __init reserve_real_mode(void)
 
 	memblock_reserve(mem, size);
 	set_real_mode_mem(mem);
+	crash_reserve_low_1M();
 }
 
 static void __init setup_real_mode(void)
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
index 6363761cc74c..af04512c02d9 100644
--- a/arch/x86/realmode/rm/header.S
+++ b/arch/x86/realmode/rm/header.S
@@ -14,7 +14,7 @@
 	.section ".header", "a"
 
 	.balign	16
-GLOBAL(real_mode_header)
+SYM_DATA_START(real_mode_header)
 	.long	pa_text_start
 	.long	pa_ro_end
 	/* SMP trampoline */
@@ -33,11 +33,9 @@ GLOBAL(real_mode_header)
 #ifdef CONFIG_X86_64
 	.long	__KERNEL32_CS
 #endif
-END(real_mode_header)
+SYM_DATA_END(real_mode_header)
 
 	/* End signature, used to verify integrity */
 	.section ".signature","a"
 	.balign 4
-GLOBAL(end_signature)
-	.long	REALMODE_END_SIGNATURE
-END(end_signature)
+SYM_DATA(end_signature, .long REALMODE_END_SIGNATURE)
diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S
index 3bb980800c58..64d135d1ee63 100644
--- a/arch/x86/realmode/rm/realmode.lds.S
+++ b/arch/x86/realmode/rm/realmode.lds.S
@@ -11,6 +11,7 @@
 
 OUTPUT_FORMAT("elf32-i386")
 OUTPUT_ARCH(i386)
+ENTRY(pa_text_start)
 
 SECTIONS
 {
diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S
index cd2f97b9623b..f10515b10e0a 100644
--- a/arch/x86/realmode/rm/reboot.S
+++ b/arch/x86/realmode/rm/reboot.S
@@ -19,7 +19,7 @@
  */
 	.section ".text32", "ax"
 	.code32
-ENTRY(machine_real_restart_asm)
+SYM_CODE_START(machine_real_restart_asm)
 
 #ifdef CONFIG_X86_64
 	/* Switch to trampoline GDT as it is guaranteed < 4 GiB */
@@ -33,7 +33,7 @@ ENTRY(machine_real_restart_asm)
 	movl	%eax, %cr0
 	ljmpl	$__KERNEL32_CS, $pa_machine_real_restart_paging_off
 
-GLOBAL(machine_real_restart_paging_off)
+SYM_INNER_LABEL(machine_real_restart_paging_off, SYM_L_GLOBAL)
 	xorl	%eax, %eax
 	xorl	%edx, %edx
 	movl	$MSR_EFER, %ecx
@@ -63,6 +63,7 @@ GLOBAL(machine_real_restart_paging_off)
 	movl	%ecx, %gs
 	movl	%ecx, %ss
 	ljmpw	$8, $1f
+SYM_CODE_END(machine_real_restart_asm)
 
 /*
  * This is 16-bit protected mode code to disable paging and the cache,
@@ -127,13 +128,13 @@ bios:
 	.section ".rodata", "a"
 
 	.balign	16
-GLOBAL(machine_real_restart_idt)
+SYM_DATA_START(machine_real_restart_idt)
 	.word	0xffff		/* Length - real mode default value */
 	.long	0		/* Base - real mode default value */
-END(machine_real_restart_idt)
+SYM_DATA_END(machine_real_restart_idt)
 
 	.balign	16
-GLOBAL(machine_real_restart_gdt)
+SYM_DATA_START(machine_real_restart_gdt)
 	/* Self-pointer */
 	.word	0xffff		/* Length - real mode default value */
 	.long	pa_machine_real_restart_gdt
@@ -153,4 +154,4 @@ GLOBAL(machine_real_restart_gdt)
 	 * semantics we don't have to reload the segments once CR0.PE = 0.
 	 */
 	.quad	GDT_ENTRY(0x0093, 0x100, 0xffff)
-END(machine_real_restart_gdt)
+SYM_DATA_END(machine_real_restart_gdt)
diff --git a/arch/x86/realmode/rm/stack.S b/arch/x86/realmode/rm/stack.S
index 8d4cb64799ea..0fca64061ad2 100644
--- a/arch/x86/realmode/rm/stack.S
+++ b/arch/x86/realmode/rm/stack.S
@@ -6,15 +6,13 @@
 #include <linux/linkage.h>
 
 	.data
-GLOBAL(HEAP)
-	.long	rm_heap
-GLOBAL(heap_end)
-	.long	rm_stack
+SYM_DATA(HEAP,		.long rm_heap)
+SYM_DATA(heap_end,	.long rm_stack)
 
 	.bss
 	.balign	16
-GLOBAL(rm_heap)
-	.space	2048
-GLOBAL(rm_stack)
+SYM_DATA(rm_heap,	.space 2048)
+
+SYM_DATA_START(rm_stack)
 	.space	2048
-GLOBAL(rm_stack_end)
+SYM_DATA_END_LABEL(rm_stack, SYM_L_GLOBAL, rm_stack_end)
diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S
index 1868b158480d..3fad907a179f 100644
--- a/arch/x86/realmode/rm/trampoline_32.S
+++ b/arch/x86/realmode/rm/trampoline_32.S
@@ -29,7 +29,7 @@
 	.code16
 
 	.balign	PAGE_SIZE
-ENTRY(trampoline_start)
+SYM_CODE_START(trampoline_start)
 	wbinvd			# Needed for NUMA-Q should be harmless for others
 
 	LJMPW_RM(1f)
@@ -54,18 +54,20 @@ ENTRY(trampoline_start)
 	lmsw	%dx			# into protected mode
 
 	ljmpl	$__BOOT_CS, $pa_startup_32
+SYM_CODE_END(trampoline_start)
 
 	.section ".text32","ax"
 	.code32
-ENTRY(startup_32)			# note: also used from wakeup_asm.S
+SYM_CODE_START(startup_32)			# note: also used from wakeup_asm.S
 	jmp	*%eax
+SYM_CODE_END(startup_32)
 
 	.bss
 	.balign 8
-GLOBAL(trampoline_header)
-	tr_start:		.space	4
-	tr_gdt_pad:		.space	2
-	tr_gdt:			.space	6
-END(trampoline_header)
+SYM_DATA_START(trampoline_header)
+	SYM_DATA_LOCAL(tr_start,	.space 4)
+	SYM_DATA_LOCAL(tr_gdt_pad,	.space 2)
+	SYM_DATA_LOCAL(tr_gdt,		.space 6)
+SYM_DATA_END(trampoline_header)
 	
 #include "trampoline_common.S"
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index aee2b45d83b8..251758ed7443 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -38,7 +38,7 @@
 	.code16
 
 	.balign	PAGE_SIZE
-ENTRY(trampoline_start)
+SYM_CODE_START(trampoline_start)
 	cli			# We should be safe anyway
 	wbinvd
 
@@ -78,12 +78,14 @@ ENTRY(trampoline_start)
 no_longmode:
 	hlt
 	jmp no_longmode
+SYM_CODE_END(trampoline_start)
+
 #include "../kernel/verify_cpu.S"
 
 	.section ".text32","ax"
 	.code32
 	.balign 4
-ENTRY(startup_32)
+SYM_CODE_START(startup_32)
 	movl	%edx, %ss
 	addl	$pa_real_mode_base, %esp
 	movl	%edx, %ds
@@ -137,38 +139,39 @@ ENTRY(startup_32)
 	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
 	 */
 	ljmpl	$__KERNEL_CS, $pa_startup_64
+SYM_CODE_END(startup_32)
 
 	.section ".text64","ax"
 	.code64
 	.balign 4
-ENTRY(startup_64)
+SYM_CODE_START(startup_64)
 	# Now jump into the kernel using virtual addresses
 	jmpq	*tr_start(%rip)
+SYM_CODE_END(startup_64)
 
 	.section ".rodata","a"
 	# Duplicate the global descriptor table
 	# so the kernel can live anywhere
 	.balign	16
-	.globl tr_gdt
-tr_gdt:
+SYM_DATA_START(tr_gdt)
 	.short	tr_gdt_end - tr_gdt - 1	# gdt limit
 	.long	pa_tr_gdt
 	.short	0
 	.quad	0x00cf9b000000ffff	# __KERNEL32_CS
 	.quad	0x00af9b000000ffff	# __KERNEL_CS
 	.quad	0x00cf93000000ffff	# __KERNEL_DS
-tr_gdt_end:
+SYM_DATA_END_LABEL(tr_gdt, SYM_L_LOCAL, tr_gdt_end)
 
 	.bss
 	.balign	PAGE_SIZE
-GLOBAL(trampoline_pgd)		.space	PAGE_SIZE
+SYM_DATA(trampoline_pgd, .space PAGE_SIZE)
 
 	.balign	8
-GLOBAL(trampoline_header)
-	tr_start:		.space	8
-	GLOBAL(tr_efer)		.space	8
-	GLOBAL(tr_cr4)		.space	4
-	GLOBAL(tr_flags)	.space	4
-END(trampoline_header)
+SYM_DATA_START(trampoline_header)
+	SYM_DATA_LOCAL(tr_start,	.space 8)
+	SYM_DATA(tr_efer,		.space 8)
+	SYM_DATA(tr_cr4,		.space 4)
+	SYM_DATA(tr_flags,		.space 4)
+SYM_DATA_END(trampoline_header)
 
 #include "trampoline_common.S"
diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S
index 8d8208dcca24..5033e640f957 100644
--- a/arch/x86/realmode/rm/trampoline_common.S
+++ b/arch/x86/realmode/rm/trampoline_common.S
@@ -1,4 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 	.section ".rodata","a"
 	.balign	16
-tr_idt: .fill 1, 6, 0
+SYM_DATA_LOCAL(tr_idt, .fill 1, 6, 0)
diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S
index 05ac9c17c811..02d0ba16ae33 100644
--- a/arch/x86/realmode/rm/wakeup_asm.S
+++ b/arch/x86/realmode/rm/wakeup_asm.S
@@ -17,7 +17,7 @@
 	.section ".data", "aw"
 
 	.balign	16
-GLOBAL(wakeup_header)
+SYM_DATA_START(wakeup_header)
 	video_mode:	.short	0	/* Video mode number */
 	pmode_entry:	.long	0
 	pmode_cs:	.short	__KERNEL_CS
@@ -31,13 +31,13 @@ GLOBAL(wakeup_header)
 	realmode_flags:	.long	0
 	real_magic:	.long	0
 	signature:	.long	WAKEUP_HEADER_SIGNATURE
-END(wakeup_header)
+SYM_DATA_END(wakeup_header)
 
 	.text
 	.code16
 
 	.balign	16
-ENTRY(wakeup_start)
+SYM_CODE_START(wakeup_start)
 	cli
 	cld
 
@@ -73,7 +73,7 @@ ENTRY(wakeup_start)
 	movw	%ax, %fs
 	movw	%ax, %gs
 
-	lidtl	wakeup_idt
+	lidtl	.Lwakeup_idt
 
 	/* Clear the EFLAGS */
 	pushl $0
@@ -135,6 +135,7 @@ ENTRY(wakeup_start)
 #else
 	jmp	trampoline_start
 #endif
+SYM_CODE_END(wakeup_start)
 
 bogus_real_magic:
 1:
@@ -152,7 +153,7 @@ bogus_real_magic:
 	 */
 
 	.balign	16
-GLOBAL(wakeup_gdt)
+SYM_DATA_START(wakeup_gdt)
 	.word	3*8-1		/* Self-descriptor */
 	.long	pa_wakeup_gdt
 	.word	0
@@ -164,15 +165,15 @@ GLOBAL(wakeup_gdt)
 	.word	0xffff		/* 16-bit data segment @ real_mode_base */
 	.long	0x93000000 + pa_real_mode_base
 	.word	0x008f		/* big real mode */
-END(wakeup_gdt)
+SYM_DATA_END(wakeup_gdt)
 
 	.section ".rodata","a"
 	.balign	8
 
 	/* This is the standard real-mode IDT */
 	.balign	16
-GLOBAL(wakeup_idt)
+SYM_DATA_START_LOCAL(.Lwakeup_idt)
 	.word	0xffff		/* limit */
 	.long	0		/* address */
 	.word	0
-END(wakeup_idt)
+SYM_DATA_END(.Lwakeup_idt)
diff --git a/arch/x86/realmode/rmpiggy.S b/arch/x86/realmode/rmpiggy.S
index c078dba40cef..c8fef76743f6 100644
--- a/arch/x86/realmode/rmpiggy.S
+++ b/arch/x86/realmode/rmpiggy.S
@@ -10,12 +10,10 @@
 
 	.balign PAGE_SIZE
 
-GLOBAL(real_mode_blob)
+SYM_DATA_START(real_mode_blob)
 	.incbin	"arch/x86/realmode/rm/realmode.bin"
-END(real_mode_blob)
+SYM_DATA_END_LABEL(real_mode_blob, SYM_L_GLOBAL, real_mode_blob_end)
 
-GLOBAL(real_mode_blob_end);
-
-GLOBAL(real_mode_relocs)
+SYM_DATA_START(real_mode_relocs)
 	.incbin	"arch/x86/realmode/rm/realmode.relocs"
-END(real_mode_relocs)
+SYM_DATA_END(real_mode_relocs)
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index b02a36b2c14f..a42015b305f4 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -69,7 +69,7 @@ BEGIN {
 
 	lprefix1_expr = "\\((66|!F3)\\)"
 	lprefix2_expr = "\\(F3\\)"
-	lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
+	lprefix3_expr = "\\((F2|!F3|66&F2)\\)"
 	lprefix_expr = "\\((66|F2|F3)\\)"
 	max_lprefix = 4
 
@@ -257,7 +257,7 @@ function convert_operands(count,opnd,       i,j,imm,mod)
 	return add_flags(imm, mod)
 }
 
-/^[0-9a-f]+\:/ {
+/^[0-9a-f]+:/ {
 	if (NR == 1)
 		next
 	# get index
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index 5bd949da7a4a..ac8eee093f9c 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -215,14 +215,12 @@ static int set_tls_entry(struct task_struct* task, struct user_desc *info,
 	return 0;
 }
 
-int arch_copy_tls(struct task_struct *new)
+int arch_set_tls(struct task_struct *new, unsigned long tls)
 {
 	struct user_desc info;
 	int idx, ret = -EFAULT;
 
-	if (copy_from_user(&info,
-			   (void __user *) UPT_SI(&new->thread.regs.regs),
-			   sizeof(info)))
+	if (copy_from_user(&info, (void __user *) tls, sizeof(info)))
 		goto out;
 
 	ret = -EINVAL;
diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c
index 3a621e0d3925..ebd3855d9b13 100644
--- a/arch/x86/um/tls_64.c
+++ b/arch/x86/um/tls_64.c
@@ -6,14 +6,13 @@ void clear_flushed_tls(struct task_struct *task)
 {
 }
 
-int arch_copy_tls(struct task_struct *t)
+int arch_set_tls(struct task_struct *t, unsigned long tls)
 {
 	/*
 	 * If CLONE_SETTLS is set, we need to save the thread id
-	 * (which is argument 5, child_tid, of clone) so it can be set
-	 * during context switches.
+	 * so it can be set during context switches.
 	 */
-	t->thread.arch.fs = t->thread.regs.regs.gp[R8 / sizeof(long)];
+	t->thread.arch.fs = tls;
 
 	return 0;
 }
diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c
index 891868756a51..2112b8d14668 100644
--- a/arch/x86/um/vdso/um_vdso.c
+++ b/arch/x86/um/vdso/um_vdso.c
@@ -13,7 +13,7 @@
 #include <linux/getcpu.h>
 #include <asm/unistd.h>
 
-int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+int __vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts)
 {
 	long ret;
 
@@ -22,10 +22,10 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 
 	return ret;
 }
-int clock_gettime(clockid_t, struct timespec *)
+int clock_gettime(clockid_t, struct __kernel_old_timespec *)
 	__attribute__((weak, alias("__vdso_clock_gettime")));
 
-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
 {
 	long ret;
 
@@ -34,10 +34,10 @@ int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 
 	return ret;
 }
-int gettimeofday(struct timeval *, struct timezone *)
+int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
 	__attribute__((weak, alias("__vdso_gettimeofday")));
 
-time_t __vdso_time(time_t *t)
+__kernel_old_time_t __vdso_time(__kernel_old_time_t *t)
 {
 	long secs;
 
@@ -47,7 +47,7 @@ time_t __vdso_time(time_t *t)
 
 	return secs;
 }
-time_t time(time_t *t) __attribute__((weak, alias("__vdso_time")));
+__kernel_old_time_t time(__kernel_old_time_t *t) __attribute__((weak, alias("__vdso_time")));
 
 long
 __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
diff --git a/arch/x86/um/vdso/vdso.S b/arch/x86/um/vdso/vdso.S
index a4a3870dc059..a6eaf293a73b 100644
--- a/arch/x86/um/vdso/vdso.S
+++ b/arch/x86/um/vdso/vdso.S
@@ -1,11 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/init.h>
+#include <linux/linkage.h>
 
 __INITDATA
 
-	.globl vdso_start, vdso_end
-vdso_start:
+SYM_DATA_START(vdso_start)
 	.incbin "arch/x86/um/vdso/vdso.so"
-vdso_end:
+SYM_DATA_END_LABEL(vdso_start, SYM_L_GLOBAL, vdso_end)
 
 __FINIT
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index ba5a41828e9d..1aded63a95cb 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -62,10 +62,10 @@ config XEN_512GB
 	  boot parameter "xen_512gb_limit".
 
 config XEN_SAVE_RESTORE
-       bool
-       depends on XEN
-       select HIBERNATE_CALLBACKS
-       default y
+	bool
+	depends on XEN
+	select HIBERNATE_CALLBACKS
+	default y
 
 config XEN_DEBUG_FS
 	bool "Enable Xen debug and tuning parameters in debugfs"
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index a04551ee5568..1abe455d926a 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -31,7 +31,7 @@ static efi_system_table_t efi_systab_xen __initdata = {
 	.con_in_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
 	.con_in		= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
 	.con_out_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
-	.con_out	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+	.con_out	= NULL, 		  /* Not used under Xen. */
 	.stderr_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
 	.stderr		= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
 	.runtime	= (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR,
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5bfea374a160..ae4a41ca19f6 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -837,15 +837,6 @@ static void xen_load_sp0(unsigned long sp0)
 	this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 }
 
-void xen_set_iopl_mask(unsigned mask)
-{
-	struct physdev_set_iopl set_iopl;
-
-	/* Force the change at ring 0. */
-	set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
-	HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
-}
-
 static void xen_io_delay(void)
 {
 }
@@ -1055,7 +1046,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.write_idt_entry = xen_write_idt_entry,
 	.load_sp0 = xen_load_sp0,
 
-	.set_iopl_mask = xen_set_iopl_mask,
 	.io_delay = xen_io_delay,
 
 	/* Xen takes care of %gs when switching to usermode for us */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index c8dbee62ec2a..bbba8b17829a 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -67,7 +67,7 @@
 #include <asm/linkage.h>
 #include <asm/page.h>
 #include <asm/init.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
 #include <asm/smp.h>
 #include <asm/tlb.h>
 
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 548d1e0a5ba1..33b0e20df7fc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -412,7 +412,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
 
 		remap_range_size = xen_find_pfn_range(&remap_pfn);
 		if (!remap_range_size) {
-			pr_warning("Unable to find available pfn range, not remapping identity pages\n");
+			pr_warn("Unable to find available pfn range, not remapping identity pages\n");
 			xen_set_identity_and_release_chunk(cur_pfn,
 						cur_pfn + left, nr_pages);
 			break;
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index be104eef80be..508fe204520b 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -19,7 +19,7 @@
  * event status with one and operation.  If there are pending events,
  * then enter the hypervisor to get them handled.
  */
-ENTRY(xen_irq_enable_direct)
+SYM_FUNC_START(xen_irq_enable_direct)
 	FRAME_BEGIN
 	/* Unmask events */
 	movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
@@ -38,17 +38,17 @@ ENTRY(xen_irq_enable_direct)
 1:
 	FRAME_END
 	ret
-	ENDPROC(xen_irq_enable_direct)
+SYM_FUNC_END(xen_irq_enable_direct)
 
 
 /*
  * Disabling events is simply a matter of making the event mask
  * non-zero.
  */
-ENTRY(xen_irq_disable_direct)
+SYM_FUNC_START(xen_irq_disable_direct)
 	movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 	ret
-ENDPROC(xen_irq_disable_direct)
+SYM_FUNC_END(xen_irq_disable_direct)
 
 /*
  * (xen_)save_fl is used to get the current interrupt enable status.
@@ -59,12 +59,12 @@ ENDPROC(xen_irq_disable_direct)
  * undefined.  We need to toggle the state of the bit, because Xen and
  * x86 use opposite senses (mask vs enable).
  */
-ENTRY(xen_save_fl_direct)
+SYM_FUNC_START(xen_save_fl_direct)
 	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 	setz %ah
 	addb %ah, %ah
 	ret
-	ENDPROC(xen_save_fl_direct)
+SYM_FUNC_END(xen_save_fl_direct)
 
 
 /*
@@ -74,7 +74,7 @@ ENTRY(xen_save_fl_direct)
  * interrupt mask state, it checks for unmasked pending events and
  * enters the hypervisor to get them delivered if so.
  */
-ENTRY(xen_restore_fl_direct)
+SYM_FUNC_START(xen_restore_fl_direct)
 	FRAME_BEGIN
 #ifdef CONFIG_X86_64
 	testw $X86_EFLAGS_IF, %di
@@ -95,14 +95,14 @@ ENTRY(xen_restore_fl_direct)
 1:
 	FRAME_END
 	ret
-	ENDPROC(xen_restore_fl_direct)
+SYM_FUNC_END(xen_restore_fl_direct)
 
 
 /*
  * Force an event check by making a hypercall, but preserve regs
  * before making the call.
  */
-ENTRY(check_events)
+SYM_FUNC_START(check_events)
 	FRAME_BEGIN
 #ifdef CONFIG_X86_32
 	push %eax
@@ -135,19 +135,19 @@ ENTRY(check_events)
 #endif
 	FRAME_END
 	ret
-ENDPROC(check_events)
+SYM_FUNC_END(check_events)
 
-ENTRY(xen_read_cr2)
+SYM_FUNC_START(xen_read_cr2)
 	FRAME_BEGIN
 	_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
 	_ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
 	FRAME_END
 	ret
-	ENDPROC(xen_read_cr2);
+SYM_FUNC_END(xen_read_cr2);
 
-ENTRY(xen_read_cr2_direct)
+SYM_FUNC_START(xen_read_cr2_direct)
 	FRAME_BEGIN
 	_ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
 	FRAME_END
 	ret
-	ENDPROC(xen_read_cr2_direct);
+SYM_FUNC_END(xen_read_cr2_direct);
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index c15db060a242..2712e9155306 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -56,7 +56,7 @@
 	_ASM_EXTABLE(1b,2b)
 .endm
 
-ENTRY(xen_iret)
+SYM_CODE_START(xen_iret)
 	/* test eflags for special cases */
 	testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
 	jnz hyper_iret
@@ -122,14 +122,14 @@ xen_iret_end_crit:
 hyper_iret:
 	/* put this out of line since its very rarely used */
 	jmp hypercall_page + __HYPERVISOR_iret * 32
+SYM_CODE_END(xen_iret)
 
 	.globl xen_iret_start_crit, xen_iret_end_crit
 
 /*
- * This is called by xen_hypervisor_callback in entry.S when it sees
+ * This is called by xen_hypervisor_callback in entry_32.S when it sees
  * that the EIP at the time of interrupt was between
- * xen_iret_start_crit and xen_iret_end_crit.  We're passed the EIP in
- * %eax so we can do a more refined determination of what to do.
+ * xen_iret_start_crit and xen_iret_end_crit.
  *
  * The stack format at this point is:
  *	----------------
@@ -138,70 +138,46 @@ hyper_iret:
  *	 eflags		}  outer exception info
  *	 cs		}
  *	 eip		}
- *	---------------- <- edi (copy dest)
- *	 eax		:  outer eax if it hasn't been restored
  *	----------------
- *	 eflags		}  nested exception info
- *	 cs		}   (no ss/esp because we're nested
- *	 eip		}    from the same ring)
- *	 orig_eax	}<- esi (copy src)
- *	 - - - - - - - -
- *	 fs		}
- *	 es		}
- *	 ds		}  SAVE_ALL state
- *	 eax		}
- *	  :		:
- *	 ebx		}<- esp
+ *	 eax		:  outer eax if it hasn't been restored
  *	----------------
+ *	 eflags		}
+ *	 cs		}  nested exception info
+ *	 eip		}
+ *	 return address	: (into xen_hypervisor_callback)
  *
- * In order to deliver the nested exception properly, we need to shift
- * everything from the return addr up to the error code so it sits
- * just under the outer exception info.  This means that when we
- * handle the exception, we do it in the context of the outer
- * exception rather than starting a new one.
+ * In order to deliver the nested exception properly, we need to discard the
+ * nested exception frame such that when we handle the exception, we do it
+ * in the context of the outer exception rather than starting a new one.
  *
- * The only caveat is that if the outer eax hasn't been restored yet
- * (ie, it's still on stack), we need to insert its value into the
- * SAVE_ALL state before going on, since it's usermode state which we
- * eventually need to restore.
+ * The only caveat is that if the outer eax hasn't been restored yet (i.e.
+ * it's still on stack), we need to restore its value here.
  */
-ENTRY(xen_iret_crit_fixup)
+SYM_CODE_START(xen_iret_crit_fixup)
 	/*
 	 * Paranoia: Make sure we're really coming from kernel space.
 	 * One could imagine a case where userspace jumps into the
 	 * critical range address, but just before the CPU delivers a
-	 * GP, it decides to deliver an interrupt instead.  Unlikely?
-	 * Definitely.  Easy to avoid?  Yes.  The Intel documents
-	 * explicitly say that the reported EIP for a bad jump is the
-	 * jump instruction itself, not the destination, but some
-	 * virtual environments get this wrong.
+	 * PF, it decides to deliver an interrupt instead.  Unlikely?
+	 * Definitely.  Easy to avoid?  Yes.
 	 */
-	movl PT_CS(%esp), %ecx
-	andl $SEGMENT_RPL_MASK, %ecx
-	cmpl $USER_RPL, %ecx
-	je 2f
-
-	lea PT_ORIG_EAX(%esp), %esi
-	lea PT_EFLAGS(%esp), %edi
+	testb $2, 2*4(%esp)		/* nested CS */
+	jnz 2f
 
 	/*
 	 * If eip is before iret_restore_end then stack
 	 * hasn't been restored yet.
 	 */
-	cmp $iret_restore_end, %eax
+	cmpl $iret_restore_end, 1*4(%esp)
 	jae 1f
 
-	movl 0+4(%edi), %eax		/* copy EAX (just above top of frame) */
-	movl %eax, PT_EAX(%esp)
+	movl 4*4(%esp), %eax		/* load outer EAX */
+	ret $4*4			/* discard nested EIP, CS, and EFLAGS as
+					 * well as the just restored EAX */
 
-	lea ESP_OFFSET(%edi), %edi	/* move dest up over saved regs */
-
-	/* set up the copy */
-1:	std
-	mov $PT_EIP / 4, %ecx		/* saved regs up to orig_eax */
-	rep movsl
-	cld
-
-	lea 4(%edi), %esp		/* point esp to new frame */
-2:	jmp xen_do_upcall
+1:
+	ret $3*4			/* discard nested EIP, CS, and EFLAGS */
 
+2:
+	ret
+SYM_CODE_END(xen_iret_crit_fixup)
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index ebf610b49c06..0a0fd168683a 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -20,11 +20,11 @@
 #include <linux/linkage.h>
 
 .macro xen_pv_trap name
-ENTRY(xen_\name)
+SYM_CODE_START(xen_\name)
 	pop %rcx
 	pop %r11
 	jmp  \name
-END(xen_\name)
+SYM_CODE_END(xen_\name)
 _ASM_NOKPROBE(xen_\name)
 .endm
 
@@ -57,7 +57,7 @@ xen_pv_trap entry_INT80_compat
 xen_pv_trap hypervisor_callback
 
 	__INIT
-ENTRY(xen_early_idt_handler_array)
+SYM_CODE_START(xen_early_idt_handler_array)
 	i = 0
 	.rept NUM_EXCEPTION_VECTORS
 	pop %rcx
@@ -66,7 +66,7 @@ ENTRY(xen_early_idt_handler_array)
 	i = i + 1
 	.fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
 	.endr
-END(xen_early_idt_handler_array)
+SYM_CODE_END(xen_early_idt_handler_array)
 	__FINIT
 
 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
@@ -85,11 +85,12 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
  *	r11		}<-- pushed by hypercall page
  * rsp->rax		}
  */
-ENTRY(xen_iret)
+SYM_CODE_START(xen_iret)
 	pushq $0
 	jmp hypercall_iret
+SYM_CODE_END(xen_iret)
 
-ENTRY(xen_sysret64)
+SYM_CODE_START(xen_sysret64)
 	/*
 	 * We're already on the usermode stack at this point, but
 	 * still with the kernel gs, so we can easily switch back.
@@ -107,6 +108,7 @@ ENTRY(xen_sysret64)
 
 	pushq $VGCF_in_syscall
 	jmp hypercall_iret
+SYM_CODE_END(xen_sysret64)
 
 /*
  * Xen handles syscall callbacks much like ordinary exceptions, which
@@ -124,7 +126,7 @@ ENTRY(xen_sysret64)
  */
 
 /* Normal 64-bit system call target */
-ENTRY(xen_syscall_target)
+SYM_FUNC_START(xen_syscall_target)
 	popq %rcx
 	popq %r11
 
@@ -137,12 +139,12 @@ ENTRY(xen_syscall_target)
 	movq $__USER_CS, 1*8(%rsp)
 
 	jmp entry_SYSCALL_64_after_hwframe
-ENDPROC(xen_syscall_target)
+SYM_FUNC_END(xen_syscall_target)
 
 #ifdef CONFIG_IA32_EMULATION
 
 /* 32-bit compat syscall target */
-ENTRY(xen_syscall32_target)
+SYM_FUNC_START(xen_syscall32_target)
 	popq %rcx
 	popq %r11
 
@@ -155,25 +157,25 @@ ENTRY(xen_syscall32_target)
 	movq $__USER32_CS, 1*8(%rsp)
 
 	jmp entry_SYSCALL_compat_after_hwframe
-ENDPROC(xen_syscall32_target)
+SYM_FUNC_END(xen_syscall32_target)
 
 /* 32-bit compat sysenter target */
-ENTRY(xen_sysenter_target)
+SYM_FUNC_START(xen_sysenter_target)
 	mov 0*8(%rsp), %rcx
 	mov 1*8(%rsp), %r11
 	mov 5*8(%rsp), %rsp
 	jmp entry_SYSENTER_compat
-ENDPROC(xen_sysenter_target)
+SYM_FUNC_END(xen_sysenter_target)
 
 #else /* !CONFIG_IA32_EMULATION */
 
-ENTRY(xen_syscall32_target)
-ENTRY(xen_sysenter_target)
+SYM_FUNC_START_ALIAS(xen_syscall32_target)
+SYM_FUNC_START(xen_sysenter_target)
 	lea 16(%rsp), %rsp	/* strip %rcx, %r11 */
 	mov $-ENOSYS, %rax
 	pushq $0
 	jmp hypercall_iret
-ENDPROC(xen_syscall32_target)
-ENDPROC(xen_sysenter_target)
+SYM_FUNC_END(xen_sysenter_target)
+SYM_FUNC_END_ALIAS(xen_syscall32_target)
 
 #endif	/* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index c1d8b90aa4e2..1d0cee3163e4 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -22,7 +22,7 @@
 
 #ifdef CONFIG_XEN_PV
 	__INIT
-ENTRY(startup_xen)
+SYM_CODE_START(startup_xen)
 	UNWIND_HINT_EMPTY
 	cld
 
@@ -52,13 +52,13 @@ ENTRY(startup_xen)
 #endif
 
 	jmp xen_start_kernel
-END(startup_xen)
+SYM_CODE_END(startup_xen)
 	__FINIT
 #endif
 
 .pushsection .text
 	.balign PAGE_SIZE
-ENTRY(hypercall_page)
+SYM_CODE_START(hypercall_page)
 	.rept (PAGE_SIZE / 32)
 		UNWIND_HINT_EMPTY
 		.skip 32
@@ -69,7 +69,7 @@ ENTRY(hypercall_page)
 	.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
 #include <asm/xen-hypercalls.h>
 #undef HYPERCALL
-END(hypercall_page)
+SYM_CODE_END(hypercall_page)
 .popsection
 
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index a8e7beb6b7b5..1c645172b4b5 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -3,13 +3,15 @@ config XTENSA
 	def_bool y
 	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_BINFMT_FLAT if !MMU
-	select ARCH_HAS_SYNC_DMA_FOR_CPU
-	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+	select ARCH_HAS_DMA_PREP_COHERENT if MMU
+	select ARCH_HAS_SYNC_DMA_FOR_CPU if MMU
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if MMU
+	select ARCH_HAS_UNCACHED_SEGMENT if MMU
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select BUILDTIME_EXTABLE_SORT
+	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select DMA_REMAP if MMU
@@ -19,9 +21,10 @@ config XTENSA
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_STRNCPY_FROM_USER if KASAN
-	select HAVE_ARCH_JUMP_LABEL
-	select HAVE_ARCH_KASAN if MMU
+	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
+	select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_COPY_THREAD_TLS
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_EXIT_THREAD
@@ -213,151 +216,6 @@ config HOTPLUG_CPU
 
 	  Say N if you want to disable CPU hotplug.
 
-config INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
-	bool "Initialize Xtensa MMU inside the Linux kernel code"
-	depends on !XTENSA_VARIANT_FSF && !XTENSA_VARIANT_DC232B
-	default y if XTENSA_VARIANT_DC233C || XTENSA_VARIANT_CUSTOM
-	help
-	  Earlier version initialized the MMU in the exception vector
-	  before jumping to _startup in head.S and had an advantage that
-	  it was possible to place a software breakpoint at 'reset' and
-	  then enter your normal kernel breakpoints once the MMU was mapped
-	  to the kernel mappings (0XC0000000).
-
-	  This unfortunately won't work for U-Boot and likely also wont
-	  work for using KEXEC to have a hot kernel ready for doing a
-	  KDUMP.
-
-	  So now the MMU is initialized in head.S but it's necessary to
-	  use hardware breakpoints (gdb 'hbreak' cmd) to break at _startup.
-	  xt-gdb can't place a Software Breakpoint in the  0XD region prior
-	  to mapping the MMU and after mapping even if the area of low memory
-	  was mapped gdb wouldn't remove the breakpoint on hitting it as the
-	  PC wouldn't match. Since Hardware Breakpoints are recommended for
-	  Linux configurations it seems reasonable to just assume they exist
-	  and leave this older mechanism for unfortunate souls that choose
-	  not to follow Tensilica's recommendation.
-
-	  Selecting this will cause U-Boot to set the KERNEL Load and Entry
-	  address at 0x00003000 instead of the mapped std of 0xD0003000.
-
-	  If in doubt, say Y.
-
-config MEMMAP_CACHEATTR
-	hex "Cache attributes for the memory address space"
-	depends on !MMU
-	default 0x22222222
-	help
-	  These cache attributes are set up for noMMU systems. Each hex digit
-	  specifies cache attributes for the corresponding 512MB memory
-	  region: bits 0..3 -- for addresses 0x00000000..0x1fffffff,
-	  bits 4..7 -- for addresses 0x20000000..0x3fffffff, and so on.
-
-	  Cache attribute values are specific for the MMU type.
-	  For region protection MMUs:
-	    1: WT cached,
-	    2: cache bypass,
-	    4: WB cached,
-	    f: illegal.
-	  For ful MMU:
-	    bit 0: executable,
-	    bit 1: writable,
-	    bits 2..3:
-	      0: cache bypass,
-	      1: WB cache,
-	      2: WT cache,
-	      3: special (c and e are illegal, f is reserved).
-	  For MPU:
-	    0: illegal,
-	    1: WB cache,
-	    2: WB, no-write-allocate cache,
-	    3: WT cache,
-	    4: cache bypass.
-
-config KSEG_PADDR
-	hex "Physical address of the KSEG mapping"
-	depends on INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX && MMU
-	default 0x00000000
-	help
-	  This is the physical address where KSEG is mapped. Please refer to
-	  the chosen KSEG layout help for the required address alignment.
-	  Unpacked kernel image (including vectors) must be located completely
-	  within KSEG.
-	  Physical memory below this address is not available to linux.
-
-	  If unsure, leave the default value here.
-
-config KERNEL_LOAD_ADDRESS
-	hex "Kernel load address"
-	default 0x60003000 if !MMU
-	default 0x00003000 if MMU && INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
-	default 0xd0003000 if MMU && !INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
-	help
-	  This is the address where the kernel is loaded.
-	  It is virtual address for MMUv2 configurations and physical address
-	  for all other configurations.
-
-	  If unsure, leave the default value here.
-
-config VECTORS_OFFSET
-	hex "Kernel vectors offset"
-	default 0x00003000
-	help
-	  This is the offset of the kernel image from the relocatable vectors
-	  base.
-
-	  If unsure, leave the default value here.
-
-choice
-	prompt "KSEG layout"
-	depends on MMU
-	default XTENSA_KSEG_MMU_V2
-
-config XTENSA_KSEG_MMU_V2
-	bool "MMUv2: 128MB cached + 128MB uncached"
-	help
-	  MMUv2 compatible kernel memory map: TLB way 5 maps 128MB starting
-	  at KSEG_PADDR to 0xd0000000 with cache and to 0xd8000000
-	  without cache.
-	  KSEG_PADDR must be aligned to 128MB.
-
-config XTENSA_KSEG_256M
-	bool "256MB cached + 256MB uncached"
-	depends on INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
-	help
-	  TLB way 6 maps 256MB starting at KSEG_PADDR to 0xb0000000
-	  with cache and to 0xc0000000 without cache.
-	  KSEG_PADDR must be aligned to 256MB.
-
-config XTENSA_KSEG_512M
-	bool "512MB cached + 512MB uncached"
-	depends on INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
-	help
-	  TLB way 6 maps 512MB starting at KSEG_PADDR to 0xa0000000
-	  with cache and to 0xc0000000 without cache.
-	  KSEG_PADDR must be aligned to 256MB.
-
-endchoice
-
-config HIGHMEM
-	bool "High Memory Support"
-	depends on MMU
-	help
-	  Linux can use the full amount of RAM in the system by
-	  default. However, the default MMUv2 setup only maps the
-	  lowermost 128 MB of memory linearly to the areas starting
-	  at 0xd0000000 (cached) and 0xd8000000 (uncached).
-	  When there are more than 128 MB memory in the system not
-	  all of it can be "permanently mapped" by the kernel.
-	  The physical memory that's not permanently mapped is called
-	  "high memory".
-
-	  If you are compiling a kernel which will never run on a
-	  machine with more than 128 MB total physical RAM, answer
-	  N here.
-
-	  If unsure, say Y.
-
 config FAST_SYSCALL_XTENSA
 	bool "Enable fast atomic syscalls"
 	default n
@@ -444,6 +302,9 @@ config XTENSA_CALIBRATE_CCOUNT
 config SERIAL_CONSOLE
 	def_bool n
 
+config PLATFORM_HAVE_XIP
+	def_bool n
+
 menu "Platform options"
 
 choice
@@ -470,6 +331,7 @@ config XTENSA_PLATFORM_XTFPGA
 	select PLATFORM_WANT_DEFAULT_MEM if !MMU
 	select SERIAL_CONSOLE
 	select XTENSA_CALIBRATE_CCOUNT
+	select PLATFORM_HAVE_XIP
 	help
 	  XTFPGA is the name of Tensilica board family (LX60, LX110, LX200, ML605).
 	  This hardware is capable of running a full Linux distribution.
@@ -561,34 +423,6 @@ config SIMDISK1_FILENAME
 	  Another simulated disk in a host file for a buildroot-independent
 	  storage.
 
-config FORCE_MAX_ZONEORDER
-	int "Maximum zone order"
-	default "11"
-	help
-	  The kernel memory allocator divides physically contiguous memory
-	  blocks into "zones", where each zone is a power of two number of
-	  pages.  This option selects the largest power of two that the kernel
-	  keeps in the memory allocator.  If you need to allocate very large
-	  blocks of physically contiguous memory, then you may need to
-	  increase this value.
-
-	  This config option is actually maximum order plus one. For example,
-	  a value of 11 means that the largest free memory block is 2^10 pages.
-
-config PLATFORM_WANT_DEFAULT_MEM
-	def_bool n
-
-config DEFAULT_MEM_START
-	hex
-	prompt "PAGE_OFFSET/PHYS_OFFSET" if !MMU && PLATFORM_WANT_DEFAULT_MEM
-	default 0x60000000 if PLATFORM_WANT_DEFAULT_MEM
-	default 0x00000000
-	help
-	  This is the base address used for both PAGE_OFFSET and PHYS_OFFSET
-	  in noMMU configurations.
-
-	  If unsure, leave the default value here.
-
 config XTFPGA_LCD
 	bool "Enable XTFPGA LCD driver"
 	depends on XTENSA_PLATFORM_XTFPGA
@@ -619,6 +453,221 @@ config XTFPGA_LCD_8BIT_ACCESS
 	  only be used with 8-bit interface. Please consult prototyping user
 	  guide for your board for the correct interface width.
 
+comment "Kernel memory layout"
+
+config INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
+	bool "Initialize Xtensa MMU inside the Linux kernel code"
+	depends on !XTENSA_VARIANT_FSF && !XTENSA_VARIANT_DC232B
+	default y if XTENSA_VARIANT_DC233C || XTENSA_VARIANT_CUSTOM
+	help
+	  Earlier version initialized the MMU in the exception vector
+	  before jumping to _startup in head.S and had an advantage that
+	  it was possible to place a software breakpoint at 'reset' and
+	  then enter your normal kernel breakpoints once the MMU was mapped
+	  to the kernel mappings (0XC0000000).
+
+	  This unfortunately won't work for U-Boot and likely also wont
+	  work for using KEXEC to have a hot kernel ready for doing a
+	  KDUMP.
+
+	  So now the MMU is initialized in head.S but it's necessary to
+	  use hardware breakpoints (gdb 'hbreak' cmd) to break at _startup.
+	  xt-gdb can't place a Software Breakpoint in the  0XD region prior
+	  to mapping the MMU and after mapping even if the area of low memory
+	  was mapped gdb wouldn't remove the breakpoint on hitting it as the
+	  PC wouldn't match. Since Hardware Breakpoints are recommended for
+	  Linux configurations it seems reasonable to just assume they exist
+	  and leave this older mechanism for unfortunate souls that choose
+	  not to follow Tensilica's recommendation.
+
+	  Selecting this will cause U-Boot to set the KERNEL Load and Entry
+	  address at 0x00003000 instead of the mapped std of 0xD0003000.
+
+	  If in doubt, say Y.
+
+config XIP_KERNEL
+	bool "Kernel Execute-In-Place from ROM"
+	depends on PLATFORM_HAVE_XIP
+	help
+	  Execute-In-Place allows the kernel to run from non-volatile storage
+	  directly addressable by the CPU, such as NOR flash. This saves RAM
+	  space since the text section of the kernel is not loaded from flash
+	  to RAM. Read-write sections, such as the data section and stack,
+	  are still copied to RAM. The XIP kernel is not compressed since
+	  it has to run directly from flash, so it will take more space to
+	  store it. The flash address used to link the kernel object files,
+	  and for storing it, is configuration dependent. Therefore, if you
+	  say Y here, you must know the proper physical address where to
+	  store the kernel image depending on your own flash memory usage.
+
+	  Also note that the make target becomes "make xipImage" rather than
+	  "make Image" or "make uImage". The final kernel binary to put in
+	  ROM memory will be arch/xtensa/boot/xipImage.
+
+	  If unsure, say N.
+
+config MEMMAP_CACHEATTR
+	hex "Cache attributes for the memory address space"
+	depends on !MMU
+	default 0x22222222
+	help
+	  These cache attributes are set up for noMMU systems. Each hex digit
+	  specifies cache attributes for the corresponding 512MB memory
+	  region: bits 0..3 -- for addresses 0x00000000..0x1fffffff,
+	  bits 4..7 -- for addresses 0x20000000..0x3fffffff, and so on.
+
+	  Cache attribute values are specific for the MMU type.
+	  For region protection MMUs:
+	    1: WT cached,
+	    2: cache bypass,
+	    4: WB cached,
+	    f: illegal.
+	  For ful MMU:
+	    bit 0: executable,
+	    bit 1: writable,
+	    bits 2..3:
+	      0: cache bypass,
+	      1: WB cache,
+	      2: WT cache,
+	      3: special (c and e are illegal, f is reserved).
+	  For MPU:
+	    0: illegal,
+	    1: WB cache,
+	    2: WB, no-write-allocate cache,
+	    3: WT cache,
+	    4: cache bypass.
+
+config KSEG_PADDR
+	hex "Physical address of the KSEG mapping"
+	depends on INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX && MMU
+	default 0x00000000
+	help
+	  This is the physical address where KSEG is mapped. Please refer to
+	  the chosen KSEG layout help for the required address alignment.
+	  Unpacked kernel image (including vectors) must be located completely
+	  within KSEG.
+	  Physical memory below this address is not available to linux.
+
+	  If unsure, leave the default value here.
+
+config KERNEL_VIRTUAL_ADDRESS
+	hex "Kernel virtual address"
+	depends on MMU && XIP_KERNEL
+	default 0xd0003000
+	help
+	  This is the virtual address where the XIP kernel is mapped.
+	  XIP kernel may be mapped into KSEG or KIO region, virtual address
+	  provided here must match kernel load address provided in
+	  KERNEL_LOAD_ADDRESS.
+
+config KERNEL_LOAD_ADDRESS
+	hex "Kernel load address"
+	default 0x60003000 if !MMU
+	default 0x00003000 if MMU && INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
+	default 0xd0003000 if MMU && !INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
+	help
+	  This is the address where the kernel is loaded.
+	  It is virtual address for MMUv2 configurations and physical address
+	  for all other configurations.
+
+	  If unsure, leave the default value here.
+
+config VECTORS_OFFSET
+	hex "Kernel vectors offset"
+	default 0x00003000
+	depends on !XIP_KERNEL
+	help
+	  This is the offset of the kernel image from the relocatable vectors
+	  base.
+
+	  If unsure, leave the default value here.
+
+config XIP_DATA_ADDR
+	hex "XIP kernel data virtual address"
+	depends on XIP_KERNEL
+	default 0x00000000
+	help
+	  This is the virtual address where XIP kernel data is copied.
+	  It must be within KSEG if MMU is used.
+
+config PLATFORM_WANT_DEFAULT_MEM
+	def_bool n
+
+config DEFAULT_MEM_START
+	hex
+	prompt "PAGE_OFFSET/PHYS_OFFSET" if !MMU && PLATFORM_WANT_DEFAULT_MEM
+	default 0x60000000 if PLATFORM_WANT_DEFAULT_MEM
+	default 0x00000000
+	help
+	  This is the base address used for both PAGE_OFFSET and PHYS_OFFSET
+	  in noMMU configurations.
+
+	  If unsure, leave the default value here.
+
+choice
+	prompt "KSEG layout"
+	depends on MMU
+	default XTENSA_KSEG_MMU_V2
+
+config XTENSA_KSEG_MMU_V2
+	bool "MMUv2: 128MB cached + 128MB uncached"
+	help
+	  MMUv2 compatible kernel memory map: TLB way 5 maps 128MB starting
+	  at KSEG_PADDR to 0xd0000000 with cache and to 0xd8000000
+	  without cache.
+	  KSEG_PADDR must be aligned to 128MB.
+
+config XTENSA_KSEG_256M
+	bool "256MB cached + 256MB uncached"
+	depends on INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
+	help
+	  TLB way 6 maps 256MB starting at KSEG_PADDR to 0xb0000000
+	  with cache and to 0xc0000000 without cache.
+	  KSEG_PADDR must be aligned to 256MB.
+
+config XTENSA_KSEG_512M
+	bool "512MB cached + 512MB uncached"
+	depends on INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
+	help
+	  TLB way 6 maps 512MB starting at KSEG_PADDR to 0xa0000000
+	  with cache and to 0xc0000000 without cache.
+	  KSEG_PADDR must be aligned to 256MB.
+
+endchoice
+
+config HIGHMEM
+	bool "High Memory Support"
+	depends on MMU
+	help
+	  Linux can use the full amount of RAM in the system by
+	  default. However, the default MMUv2 setup only maps the
+	  lowermost 128 MB of memory linearly to the areas starting
+	  at 0xd0000000 (cached) and 0xd8000000 (uncached).
+	  When there are more than 128 MB memory in the system not
+	  all of it can be "permanently mapped" by the kernel.
+	  The physical memory that's not permanently mapped is called
+	  "high memory".
+
+	  If you are compiling a kernel which will never run on a
+	  machine with more than 128 MB total physical RAM, answer
+	  N here.
+
+	  If unsure, say Y.
+
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	default "11"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
 endmenu
 
 menu "Power management options"
diff --git a/arch/xtensa/Kconfig.debug b/arch/xtensa/Kconfig.debug
index 39de98e20018..83cc8d12fa0e 100644
--- a/arch/xtensa/Kconfig.debug
+++ b/arch/xtensa/Kconfig.debug
@@ -31,3 +31,10 @@ config S32C1I_SELFTEST
 	  It is easy to make wrong hardware configuration, this test should catch it early.
 
 	  Say 'N' on stable hardware.
+
+config PRINT_STACK_DEPTH
+	int "Stack depth to print" if DEBUG_KERNEL
+	default 64
+	help
+	  This option allows you to set the stack depth that the kernel
+	  prints in stack traces.
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index 1542018c9e57..67a7d151d1e7 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -87,7 +87,7 @@ drivers-$(CONFIG_OPROFILE)	+= arch/xtensa/oprofile/
 
 boot		:= arch/xtensa/boot
 
-all Image zImage uImage: vmlinux
+all Image zImage uImage xipImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
 archheaders:
@@ -97,4 +97,5 @@ define archhelp
   @echo '* Image       - Kernel ELF image with reset vector'
   @echo '* zImage      - Compressed kernel image (arch/xtensa/boot/images/zImage.*)'
   @echo '* uImage      - U-Boot wrapped image'
+  @echo '  xipImage    - XIP image'
 endef
diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile
index 294846117fc2..efb91bfda2b4 100644
--- a/arch/xtensa/boot/Makefile
+++ b/arch/xtensa/boot/Makefile
@@ -29,6 +29,7 @@ all: $(boot-y)
 Image: boot-elf
 zImage: boot-redboot
 uImage: $(obj)/uImage
+xipImage: $(obj)/xipImage
 
 boot-elf boot-redboot: $(addprefix $(obj)/,$(subdir-y))
 	$(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS)
@@ -50,3 +51,7 @@ UIMAGE_COMPRESSION = gzip
 $(obj)/uImage: vmlinux.bin.gz FORCE
 	$(call if_changed,uimage)
 	$(Q)$(kecho) '  Kernel: $@ is ready'
+
+$(obj)/xipImage: vmlinux FORCE
+	$(call if_changed,objcopy)
+	$(Q)$(kecho) '  Kernel: $@ is ready'
diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig
index f378e56f9ce6..b6367af71d65 100644
--- a/arch/xtensa/configs/audio_kc705_defconfig
+++ b/arch/xtensa/configs/audio_kc705_defconfig
@@ -16,7 +16,6 @@ CONFIG_SCHED_AUTOGROUP=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=y
diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig
index 62f32a902568..f4eef6decd2a 100644
--- a/arch/xtensa/configs/cadence_csp_defconfig
+++ b/arch/xtensa/configs/cadence_csp_defconfig
@@ -21,7 +21,6 @@ CONFIG_INITRAMFS_SOURCE="$$KERNEL_INITRAMFS_SOURCE"
 # CONFIG_RD_LZO is not set
 # CONFIG_RD_LZ4 is not set
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PROFILING=y
 CONFIG_MODULES=y
diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig
index 8bebe07f1060..c925165cf760 100644
--- a/arch/xtensa/configs/generic_kc705_defconfig
+++ b/arch/xtensa/configs/generic_kc705_defconfig
@@ -16,7 +16,6 @@ CONFIG_SCHED_AUTOGROUP=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=y
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index 4bb5b76d9524..d1c01742baf4 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -1,7 +1,6 @@
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_EXPERT=y
-CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 # CONFIG_PCI is not set
diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig
index 933ab2adf434..380e366730d5 100644
--- a/arch/xtensa/configs/nommu_kc705_defconfig
+++ b/arch/xtensa/configs/nommu_kc705_defconfig
@@ -21,7 +21,6 @@ CONFIG_BLK_DEV_INITRD=y
 # CONFIG_RD_LZO is not set
 # CONFIG_RD_LZ4 is not set
 CONFIG_EXPERT=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_PERF_EVENTS=y
 CONFIG_MODULES=y
diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig
index e29c5b179a5b..d46b58f34098 100644
--- a/arch/xtensa/configs/smp_lx200_defconfig
+++ b/arch/xtensa/configs/smp_lx200_defconfig
@@ -16,7 +16,6 @@ CONFIG_SCHED_AUTOGROUP=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=y
diff --git a/arch/xtensa/configs/virt_defconfig b/arch/xtensa/configs/virt_defconfig
index bfc45a138e72..4fddd8512350 100644
--- a/arch/xtensa/configs/virt_defconfig
+++ b/arch/xtensa/configs/virt_defconfig
@@ -15,7 +15,6 @@ CONFIG_SCHED_AUTOGROUP=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_PERF_EVENTS=y
 CONFIG_XTENSA_VARIANT_DC233C=y
diff --git a/arch/xtensa/configs/xip_kc705_defconfig b/arch/xtensa/configs/xip_kc705_defconfig
new file mode 100644
index 000000000000..f9e85c082afc
--- /dev/null
+++ b/arch/xtensa/configs/xip_kc705_defconfig
@@ -0,0 +1,119 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_IRQ_TIME_ACCOUNTING=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_MEMCG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_NAMESPACES=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_PROFILING=y
+CONFIG_XTENSA_VARIANT_DC233C=y
+CONFIG_XTENSA_UNALIGNED_USER=y
+CONFIG_XIP_KERNEL=y
+CONFIG_XIP_DATA_ADDR=0xd0000000
+CONFIG_KERNEL_VIRTUAL_ADDRESS=0xe6000000
+CONFIG_KERNEL_LOAD_ADDRESS=0xf6000000
+CONFIG_XTENSA_KSEG_512M=y
+CONFIG_HIGHMEM=y
+CONFIG_XTENSA_PLATFORM_XTFPGA=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=0x38000000@0"
+CONFIG_USE_OF=y
+CONFIG_BUILTIN_DTB_SOURCE="kc705"
+# CONFIG_PARSE_BOOTPARAM is not set
+CONFIG_OPROFILE=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_AURORA is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SAMSUNG is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_MARVELL_PHY=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_DEVKMEM=y
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=y
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_FANOTIFY=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_SWAP=y
+CONFIG_ROOT_NFS=y
+CONFIG_SUNRPC_DEBUG=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_CRYPTO_ECHAINIV=y
+CONFIG_CRYPTO_DEFLATE=y
+CONFIG_CRYPTO_LZO=y
+CONFIG_CRYPTO_ANSI_CPRNG=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_STACKTRACE=y
+CONFIG_RCU_TRACE=y
+# CONFIG_FTRACE is not set
+# CONFIG_S32C1I_SELFTEST is not set
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index ffa0cf7f66c3..3acc31e55e02 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -11,6 +11,7 @@ generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
 generic-y += hardirq.h
+generic-y += hw_irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
@@ -30,6 +31,7 @@ generic-y += qspinlock.h
 generic-y += sections.h
 generic-y += topology.h
 generic-y += trace_clock.h
+generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 7b00d26f472e..3e7c6134ed32 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -64,13 +64,13 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 	int result;							\
 									\
 	__asm__ __volatile__(						\
-			"1:     l32ex   %1, %3\n"			\
-			"       " #op " %0, %1, %2\n"			\
-			"       s32ex   %0, %3\n"			\
-			"       getex   %0\n"				\
-			"       beqz    %0, 1b\n"			\
-			: "=&a" (result), "=&a" (tmp)			\
-			: "a" (i), "a" (v)				\
+			"1:     l32ex   %[tmp], %[addr]\n"		\
+			"       " #op " %[result], %[tmp], %[i]\n"	\
+			"       s32ex   %[result], %[addr]\n"		\
+			"       getex   %[result]\n"			\
+			"       beqz    %[result], 1b\n"		\
+			: [result] "=&a" (result), [tmp] "=&a" (tmp)	\
+			: [i] "a" (i), [addr] "a" (v)			\
 			: "memory"					\
 			);						\
 }									\
@@ -82,14 +82,14 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	int result;							\
 									\
 	__asm__ __volatile__(						\
-			"1:     l32ex   %1, %3\n"			\
-			"       " #op " %0, %1, %2\n"			\
-			"       s32ex   %0, %3\n"			\
-			"       getex   %0\n"				\
-			"       beqz    %0, 1b\n"			\
-			"       " #op " %0, %1, %2\n"			\
-			: "=&a" (result), "=&a" (tmp)			\
-			: "a" (i), "a" (v)				\
+			"1:     l32ex   %[tmp], %[addr]\n"		\
+			"       " #op " %[result], %[tmp], %[i]\n"	\
+			"       s32ex   %[result], %[addr]\n"		\
+			"       getex   %[result]\n"			\
+			"       beqz    %[result], 1b\n"		\
+			"       " #op " %[result], %[tmp], %[i]\n"	\
+			: [result] "=&a" (result), [tmp] "=&a" (tmp)	\
+			: [i] "a" (i), [addr] "a" (v)			\
 			: "memory"					\
 			);						\
 									\
@@ -103,13 +103,13 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 	int result;							\
 									\
 	__asm__ __volatile__(						\
-			"1:     l32ex   %1, %3\n"			\
-			"       " #op " %0, %1, %2\n"			\
-			"       s32ex   %0, %3\n"			\
-			"       getex   %0\n"				\
-			"       beqz    %0, 1b\n"			\
-			: "=&a" (result), "=&a" (tmp)			\
-			: "a" (i), "a" (v)				\
+			"1:     l32ex   %[tmp], %[addr]\n"		\
+			"       " #op " %[result], %[tmp], %[i]\n"	\
+			"       s32ex   %[result], %[addr]\n"		\
+			"       getex   %[result]\n"			\
+			"       beqz    %[result], 1b\n"		\
+			: [result] "=&a" (result), [tmp] "=&a" (tmp)	\
+			: [i] "a" (i), [addr] "a" (v)			\
 			: "memory"					\
 			);						\
 									\
@@ -124,13 +124,14 @@ static inline void atomic_##op(int i, atomic_t * v)			\
 	int result;							\
 									\
 	__asm__ __volatile__(						\
-			"1:     l32i    %1, %3, 0\n"			\
-			"       wsr     %1, scompare1\n"		\
-			"       " #op " %0, %1, %2\n"			\
-			"       s32c1i  %0, %3, 0\n"			\
-			"       bne     %0, %1, 1b\n"			\
-			: "=&a" (result), "=&a" (tmp)			\
-			: "a" (i), "a" (v)				\
+			"1:     l32i    %[tmp], %[mem]\n"		\
+			"       wsr     %[tmp], scompare1\n"		\
+			"       " #op " %[result], %[tmp], %[i]\n"	\
+			"       s32c1i  %[result], %[mem]\n"		\
+			"       bne     %[result], %[tmp], 1b\n"	\
+			: [result] "=&a" (result), [tmp] "=&a" (tmp),	\
+			  [mem] "+m" (*v)				\
+			: [i] "a" (i)					\
 			: "memory"					\
 			);						\
 }									\
@@ -142,14 +143,15 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 	int result;							\
 									\
 	__asm__ __volatile__(						\
-			"1:     l32i    %1, %3, 0\n"			\
-			"       wsr     %1, scompare1\n"		\
-			"       " #op " %0, %1, %2\n"			\
-			"       s32c1i  %0, %3, 0\n"			\
-			"       bne     %0, %1, 1b\n"			\
-			"       " #op " %0, %0, %2\n"			\
-			: "=&a" (result), "=&a" (tmp)			\
-			: "a" (i), "a" (v)				\
+			"1:     l32i    %[tmp], %[mem]\n"		\
+			"       wsr     %[tmp], scompare1\n"		\
+			"       " #op " %[result], %[tmp], %[i]\n"	\
+			"       s32c1i  %[result], %[mem]\n"		\
+			"       bne     %[result], %[tmp], 1b\n"	\
+			"       " #op " %[result], %[result], %[i]\n"	\
+			: [result] "=&a" (result), [tmp] "=&a" (tmp),	\
+			  [mem] "+m" (*v)				\
+			: [i] "a" (i)					\
 			: "memory"					\
 			);						\
 									\
@@ -163,13 +165,14 @@ static inline int atomic_fetch_##op(int i, atomic_t * v)		\
 	int result;							\
 									\
 	__asm__ __volatile__(						\
-			"1:     l32i    %1, %3, 0\n"			\
-			"       wsr     %1, scompare1\n"		\
-			"       " #op " %0, %1, %2\n"			\
-			"       s32c1i  %0, %3, 0\n"			\
-			"       bne     %0, %1, 1b\n"			\
-			: "=&a" (result), "=&a" (tmp)			\
-			: "a" (i), "a" (v)				\
+			"1:     l32i    %[tmp], %[mem]\n"		\
+			"       wsr     %[tmp], scompare1\n"		\
+			"       " #op " %[result], %[tmp], %[i]\n"	\
+			"       s32c1i  %[result], %[mem]\n"		\
+			"       bne     %[result], %[tmp], 1b\n"	\
+			: [result] "=&a" (result), [tmp] "=&a" (tmp),	\
+			  [mem] "+m" (*v)				\
+			: [i] "a" (i)					\
 			: "memory"					\
 			);						\
 									\
@@ -184,14 +187,14 @@ static inline void atomic_##op(int i, atomic_t * v)			\
 	unsigned int vval;						\
 									\
 	__asm__ __volatile__(						\
-			"       rsil    a15, "__stringify(TOPLEVEL)"\n"\
-			"       l32i    %0, %2, 0\n"			\
-			"       " #op " %0, %0, %1\n"			\
-			"       s32i    %0, %2, 0\n"			\
+			"       rsil    a15, "__stringify(TOPLEVEL)"\n"	\
+			"       l32i    %[result], %[mem]\n"		\
+			"       " #op " %[result], %[result], %[i]\n"	\
+			"       s32i    %[result], %[mem]\n"		\
 			"       wsr     a15, ps\n"			\
 			"       rsync\n"				\
-			: "=&a" (vval)					\
-			: "a" (i), "a" (v)				\
+			: [result] "=&a" (vval), [mem] "+m" (*v)	\
+			: [i] "a" (i)					\
 			: "a15", "memory"				\
 			);						\
 }									\
@@ -203,13 +206,13 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 									\
 	__asm__ __volatile__(						\
 			"       rsil    a15,"__stringify(TOPLEVEL)"\n"	\
-			"       l32i    %0, %2, 0\n"			\
-			"       " #op " %0, %0, %1\n"			\
-			"       s32i    %0, %2, 0\n"			\
+			"       l32i    %[result], %[mem]\n"		\
+			"       " #op " %[result], %[result], %[i]\n"	\
+			"       s32i    %[result], %[mem]\n"		\
 			"       wsr     a15, ps\n"			\
 			"       rsync\n"				\
-			: "=&a" (vval)					\
-			: "a" (i), "a" (v)				\
+			: [result] "=&a" (vval), [mem] "+m" (*v)	\
+			: [i] "a" (i)					\
 			: "a15", "memory"				\
 			);						\
 									\
@@ -223,13 +226,14 @@ static inline int atomic_fetch_##op(int i, atomic_t * v)		\
 									\
 	__asm__ __volatile__(						\
 			"       rsil    a15,"__stringify(TOPLEVEL)"\n"	\
-			"       l32i    %0, %3, 0\n"			\
-			"       " #op " %1, %0, %2\n"			\
-			"       s32i    %1, %3, 0\n"			\
+			"       l32i    %[result], %[mem]\n"		\
+			"       " #op " %[tmp], %[result], %[i]\n"	\
+			"       s32i    %[tmp], %[mem]\n"		\
 			"       wsr     a15, ps\n"			\
 			"       rsync\n"				\
-			: "=&a" (vval), "=&a" (tmp)			\
-			: "a" (i), "a" (v)				\
+			: [result] "=&a" (vval), [tmp] "=&a" (tmp),	\
+			  [mem] "+m" (*v)				\
+			: [i] "a" (i)					\
 			: "a15", "memory"				\
 			);						\
 									\
diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h
index be8b2be5a98b..3f71d364ba90 100644
--- a/arch/xtensa/include/asm/bitops.h
+++ b/arch/xtensa/include/asm/bitops.h
@@ -98,247 +98,112 @@ static inline unsigned long __fls(unsigned long word)
 
 #if XCHAL_HAVE_EXCLUSIVE
 
-static inline void set_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32ex   %0, %2\n"
-			"       or      %0, %0, %1\n"
-			"       s32ex   %0, %2\n"
-			"       getex   %0\n"
-			"       beqz    %0, 1b\n"
-			: "=&a" (tmp)
-			: "a" (mask), "a" (p)
-			: "memory");
-}
-
-static inline void clear_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32ex   %0, %2\n"
-			"       and     %0, %0, %1\n"
-			"       s32ex   %0, %2\n"
-			"       getex   %0\n"
-			"       beqz    %0, 1b\n"
-			: "=&a" (tmp)
-			: "a" (~mask), "a" (p)
-			: "memory");
-}
-
-static inline void change_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32ex   %0, %2\n"
-			"       xor     %0, %0, %1\n"
-			"       s32ex   %0, %2\n"
-			"       getex   %0\n"
-			"       beqz    %0, 1b\n"
-			: "=&a" (tmp)
-			: "a" (mask), "a" (p)
-			: "memory");
-}
-
-static inline int
-test_and_set_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp, value;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32ex   %1, %3\n"
-			"       or      %0, %1, %2\n"
-			"       s32ex   %0, %3\n"
-			"       getex   %0\n"
-			"       beqz    %0, 1b\n"
-			: "=&a" (tmp), "=&a" (value)
-			: "a" (mask), "a" (p)
-			: "memory");
-
-	return value & mask;
-}
-
-static inline int
-test_and_clear_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp, value;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32ex   %1, %3\n"
-			"       and     %0, %1, %2\n"
-			"       s32ex   %0, %3\n"
-			"       getex   %0\n"
-			"       beqz    %0, 1b\n"
-			: "=&a" (tmp), "=&a" (value)
-			: "a" (~mask), "a" (p)
-			: "memory");
-
-	return value & mask;
-}
-
-static inline int
-test_and_change_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp, value;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32ex   %1, %3\n"
-			"       xor     %0, %1, %2\n"
-			"       s32ex   %0, %3\n"
-			"       getex   %0\n"
-			"       beqz    %0, 1b\n"
-			: "=&a" (tmp), "=&a" (value)
-			: "a" (mask), "a" (p)
-			: "memory");
-
-	return value & mask;
+#define BIT_OP(op, insn, inv)						\
+static inline void op##_bit(unsigned int bit, volatile unsigned long *p)\
+{									\
+	unsigned long tmp;						\
+	unsigned long mask = 1UL << (bit & 31);				\
+									\
+	p += bit >> 5;							\
+									\
+	__asm__ __volatile__(						\
+			"1:     l32ex   %[tmp], %[addr]\n"		\
+			"      "insn"   %[tmp], %[tmp], %[mask]\n"	\
+			"       s32ex   %[tmp], %[addr]\n"		\
+			"       getex   %[tmp]\n"			\
+			"       beqz    %[tmp], 1b\n"			\
+			: [tmp] "=&a" (tmp)				\
+			: [mask] "a" (inv mask), [addr] "a" (p)		\
+			: "memory");					\
+}
+
+#define TEST_AND_BIT_OP(op, insn, inv)					\
+static inline int							\
+test_and_##op##_bit(unsigned int bit, volatile unsigned long *p)	\
+{									\
+	unsigned long tmp, value;					\
+	unsigned long mask = 1UL << (bit & 31);				\
+									\
+	p += bit >> 5;							\
+									\
+	__asm__ __volatile__(						\
+			"1:     l32ex   %[value], %[addr]\n"		\
+			"      "insn"   %[tmp], %[value], %[mask]\n"	\
+			"       s32ex   %[tmp], %[addr]\n"		\
+			"       getex   %[tmp]\n"			\
+			"       beqz    %[tmp], 1b\n"			\
+			: [tmp] "=&a" (tmp), [value] "=&a" (value)	\
+			: [mask] "a" (inv mask), [addr] "a" (p)		\
+			: "memory");					\
+									\
+	return value & mask;						\
 }
 
 #elif XCHAL_HAVE_S32C1I
 
-static inline void set_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp, value;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32i    %1, %3, 0\n"
-			"       wsr     %1, scompare1\n"
-			"       or      %0, %1, %2\n"
-			"       s32c1i  %0, %3, 0\n"
-			"       bne     %0, %1, 1b\n"
-			: "=&a" (tmp), "=&a" (value)
-			: "a" (mask), "a" (p)
-			: "memory");
-}
-
-static inline void clear_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp, value;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32i    %1, %3, 0\n"
-			"       wsr     %1, scompare1\n"
-			"       and     %0, %1, %2\n"
-			"       s32c1i  %0, %3, 0\n"
-			"       bne     %0, %1, 1b\n"
-			: "=&a" (tmp), "=&a" (value)
-			: "a" (~mask), "a" (p)
-			: "memory");
+#define BIT_OP(op, insn, inv)						\
+static inline void op##_bit(unsigned int bit, volatile unsigned long *p)\
+{									\
+	unsigned long tmp, value;					\
+	unsigned long mask = 1UL << (bit & 31);				\
+									\
+	p += bit >> 5;							\
+									\
+	__asm__ __volatile__(						\
+			"1:     l32i    %[value], %[mem]\n"		\
+			"       wsr     %[value], scompare1\n"		\
+			"      "insn"   %[tmp], %[value], %[mask]\n"	\
+			"       s32c1i  %[tmp], %[mem]\n"		\
+			"       bne     %[tmp], %[value], 1b\n"		\
+			: [tmp] "=&a" (tmp), [value] "=&a" (value),	\
+			  [mem] "+m" (*p)				\
+			: [mask] "a" (inv mask)				\
+			: "memory");					\
+}
+
+#define TEST_AND_BIT_OP(op, insn, inv)					\
+static inline int							\
+test_and_##op##_bit(unsigned int bit, volatile unsigned long *p)	\
+{									\
+	unsigned long tmp, value;					\
+	unsigned long mask = 1UL << (bit & 31);				\
+									\
+	p += bit >> 5;							\
+									\
+	__asm__ __volatile__(						\
+			"1:     l32i    %[value], %[mem]\n"		\
+			"       wsr     %[value], scompare1\n"		\
+			"      "insn"   %[tmp], %[value], %[mask]\n"	\
+			"       s32c1i  %[tmp], %[mem]\n"		\
+			"       bne     %[tmp], %[value], 1b\n"		\
+			: [tmp] "=&a" (tmp), [value] "=&a" (value),	\
+			  [mem] "+m" (*p)				\
+			: [mask] "a" (inv mask)				\
+			: "memory");					\
+									\
+	return tmp & mask;						\
 }
 
-static inline void change_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp, value;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32i    %1, %3, 0\n"
-			"       wsr     %1, scompare1\n"
-			"       xor     %0, %1, %2\n"
-			"       s32c1i  %0, %3, 0\n"
-			"       bne     %0, %1, 1b\n"
-			: "=&a" (tmp), "=&a" (value)
-			: "a" (mask), "a" (p)
-			: "memory");
-}
+#else
 
-static inline int
-test_and_set_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp, value;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32i    %1, %3, 0\n"
-			"       wsr     %1, scompare1\n"
-			"       or      %0, %1, %2\n"
-			"       s32c1i  %0, %3, 0\n"
-			"       bne     %0, %1, 1b\n"
-			: "=&a" (tmp), "=&a" (value)
-			: "a" (mask), "a" (p)
-			: "memory");
-
-	return tmp & mask;
-}
+#define BIT_OP(op, insn, inv)
+#define TEST_AND_BIT_OP(op, insn, inv)
 
-static inline int
-test_and_clear_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp, value;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32i    %1, %3, 0\n"
-			"       wsr     %1, scompare1\n"
-			"       and     %0, %1, %2\n"
-			"       s32c1i  %0, %3, 0\n"
-			"       bne     %0, %1, 1b\n"
-			: "=&a" (tmp), "=&a" (value)
-			: "a" (~mask), "a" (p)
-			: "memory");
-
-	return tmp & mask;
-}
+#include <asm-generic/bitops/atomic.h>
 
-static inline int
-test_and_change_bit(unsigned int bit, volatile unsigned long *p)
-{
-	unsigned long tmp, value;
-	unsigned long mask = 1UL << (bit & 31);
-
-	p += bit >> 5;
-
-	__asm__ __volatile__(
-			"1:     l32i    %1, %3, 0\n"
-			"       wsr     %1, scompare1\n"
-			"       xor     %0, %1, %2\n"
-			"       s32c1i  %0, %3, 0\n"
-			"       bne     %0, %1, 1b\n"
-			: "=&a" (tmp), "=&a" (value)
-			: "a" (mask), "a" (p)
-			: "memory");
-
-	return tmp & mask;
-}
+#endif /* XCHAL_HAVE_S32C1I */
 
-#else
+#define BIT_OPS(op, insn, inv)		\
+	BIT_OP(op, insn, inv)		\
+	TEST_AND_BIT_OP(op, insn, inv)
 
-#include <asm-generic/bitops/atomic.h>
+BIT_OPS(set, "or", )
+BIT_OPS(clear, "and", ~)
+BIT_OPS(change, "xor", )
 
-#endif /* XCHAL_HAVE_S32C1I */
+#undef BIT_OPS
+#undef BIT_OP
+#undef TEST_AND_BIT_OP
 
 #include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/le.h>
diff --git a/arch/xtensa/include/asm/cache.h b/arch/xtensa/include/asm/cache.h
index b21fd133ff62..54e147ac26bf 100644
--- a/arch/xtensa/include/asm/cache.h
+++ b/arch/xtensa/include/asm/cache.h
@@ -31,4 +31,10 @@
 
 #define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
 
+/*
+ * R/O after init is actually writable, it cannot go to .rodata
+ * according to vmlinux linker script.
+ */
+#define __ro_after_init __read_mostly
+
 #endif	/* _XTENSA_CACHE_H */
diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h
index 7ccc5cbf441b..a175f8aec3fb 100644
--- a/arch/xtensa/include/asm/cmpxchg.h
+++ b/arch/xtensa/include/asm/cmpxchg.h
@@ -27,25 +27,25 @@ __cmpxchg_u32(volatile int *p, int old, int new)
 	unsigned long tmp, result;
 
 	__asm__ __volatile__(
-			"1:     l32ex   %0, %3\n"
-			"       bne     %0, %4, 2f\n"
-			"       mov     %1, %2\n"
-			"       s32ex   %1, %3\n"
-			"       getex   %1\n"
-			"       beqz    %1, 1b\n"
+			"1:     l32ex   %[result], %[addr]\n"
+			"       bne     %[result], %[cmp], 2f\n"
+			"       mov     %[tmp], %[new]\n"
+			"       s32ex   %[tmp], %[addr]\n"
+			"       getex   %[tmp]\n"
+			"       beqz    %[tmp], 1b\n"
 			"2:\n"
-			: "=&a" (result), "=&a" (tmp)
-			: "a" (new), "a" (p), "a" (old)
+			: [result] "=&a" (result), [tmp] "=&a" (tmp)
+			: [new] "a" (new), [addr] "a" (p), [cmp] "a" (old)
 			: "memory"
 			);
 
 	return result;
 #elif XCHAL_HAVE_S32C1I
 	__asm__ __volatile__(
-			"       wsr     %2, scompare1\n"
-			"       s32c1i  %0, %1, 0\n"
-			: "+a" (new)
-			: "a" (p), "a" (old)
+			"       wsr     %[cmp], scompare1\n"
+			"       s32c1i  %[new], %[mem]\n"
+			: [new] "+a" (new), [mem] "+m" (*p)
+			: [cmp] "a" (old)
 			: "memory"
 			);
 
@@ -53,14 +53,14 @@ __cmpxchg_u32(volatile int *p, int old, int new)
 #else
 	__asm__ __volatile__(
 			"       rsil    a15, "__stringify(TOPLEVEL)"\n"
-			"       l32i    %0, %1, 0\n"
-			"       bne     %0, %2, 1f\n"
-			"       s32i    %3, %1, 0\n"
+			"       l32i    %[old], %[mem]\n"
+			"       bne     %[old], %[cmp], 1f\n"
+			"       s32i    %[new], %[mem]\n"
 			"1:\n"
 			"       wsr     a15, ps\n"
 			"       rsync\n"
-			: "=&a" (old)
-			: "a" (p), "a" (old), "r" (new)
+			: [old] "=&a" (old), [mem] "+m" (*p)
+			: [cmp] "a" (old), [new] "r" (new)
 			: "a15", "memory");
 	return old;
 #endif
@@ -129,13 +129,13 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
 	unsigned long tmp, result;
 
 	__asm__ __volatile__(
-			"1:     l32ex   %0, %3\n"
-			"       mov     %1, %2\n"
-			"       s32ex   %1, %3\n"
-			"       getex   %1\n"
-			"       beqz    %1, 1b\n"
-			: "=&a" (result), "=&a" (tmp)
-			: "a" (val), "a" (m)
+			"1:     l32ex   %[result], %[addr]\n"
+			"       mov     %[tmp], %[val]\n"
+			"       s32ex   %[tmp], %[addr]\n"
+			"       getex   %[tmp]\n"
+			"       beqz    %[tmp], 1b\n"
+			: [result] "=&a" (result), [tmp] "=&a" (tmp)
+			: [val] "a" (val), [addr] "a" (m)
 			: "memory"
 			);
 
@@ -143,13 +143,14 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
 #elif XCHAL_HAVE_S32C1I
 	unsigned long tmp, result;
 	__asm__ __volatile__(
-			"1:     l32i    %1, %2, 0\n"
-			"       mov     %0, %3\n"
-			"       wsr     %1, scompare1\n"
-			"       s32c1i  %0, %2, 0\n"
-			"       bne     %0, %1, 1b\n"
-			: "=&a" (result), "=&a" (tmp)
-			: "a" (m), "a" (val)
+			"1:     l32i    %[tmp], %[mem]\n"
+			"       mov     %[result], %[val]\n"
+			"       wsr     %[tmp], scompare1\n"
+			"       s32c1i  %[result], %[mem]\n"
+			"       bne     %[result], %[tmp], 1b\n"
+			: [result] "=&a" (result), [tmp] "=&a" (tmp),
+			  [mem] "+m" (*m)
+			: [val] "a" (val)
 			: "memory"
 			);
 	return result;
@@ -157,12 +158,12 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
 	unsigned long tmp;
 	__asm__ __volatile__(
 			"       rsil    a15, "__stringify(TOPLEVEL)"\n"
-			"       l32i    %0, %1, 0\n"
-			"       s32i    %2, %1, 0\n"
+			"       l32i    %[tmp], %[mem]\n"
+			"       s32i    %[val], %[mem]\n"
 			"       wsr     a15, ps\n"
 			"       rsync\n"
-			: "=&a" (tmp)
-			: "a" (m), "a" (val)
+			: [tmp] "=&a" (tmp), [mem] "+m" (*m)
+			: [val] "a" (val)
 			: "a15", "memory");
 	return tmp;
 #endif
diff --git a/arch/xtensa/include/asm/fixmap.h b/arch/xtensa/include/asm/fixmap.h
index 7e25c1b50ac0..cfb8696917e9 100644
--- a/arch/xtensa/include/asm/fixmap.h
+++ b/arch/xtensa/include/asm/fixmap.h
@@ -78,8 +78,10 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
 
 #define kmap_get_fixmap_pte(vaddr) \
 	pte_offset_kernel( \
-		pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), \
-		(vaddr) \
-	)
+		pmd_offset(pud_offset(p4d_offset(pgd_offset_k(vaddr), \
+						 (vaddr)), \
+				      (vaddr)), \
+			   (vaddr)), \
+		(vaddr))
 
 #endif
diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h
index 0c4457ca0a85..964611083224 100644
--- a/arch/xtensa/include/asm/futex.h
+++ b/arch/xtensa/include/asm/futex.h
@@ -43,10 +43,10 @@
 #elif XCHAL_HAVE_S32C1I
 #define __futex_atomic_op(insn, ret, old, uaddr, arg)	\
 	__asm__ __volatile(				\
-	"1:	l32i	%[oldval], %[addr], 0\n"	\
+	"1:	l32i	%[oldval], %[mem]\n"		\
 		insn "\n"				\
 	"	wsr	%[oldval], scompare1\n"		\
-	"2:	s32c1i	%[newval], %[addr], 0\n"	\
+	"2:	s32c1i	%[newval], %[mem]\n"		\
 	"	bne	%[newval], %[oldval], 1b\n"	\
 	"	movi	%[newval], 0\n"			\
 	"3:\n"						\
@@ -60,9 +60,9 @@
 	"	.section __ex_table,\"a\"\n"		\
 	"	.long 1b, 5b, 2b, 5b\n"			\
 	"	.previous\n"				\
-	: [oldval] "=&r" (old), [newval] "=&r" (ret)	\
-	: [addr] "r" (uaddr), [oparg] "r" (arg),	\
-	  [fault] "I" (-EFAULT)				\
+	: [oldval] "=&r" (old), [newval] "=&r" (ret),	\
+	  [mem] "+m" (*(uaddr))				\
+	: [oparg] "r" (arg), [fault] "I" (-EFAULT)	\
 	: "memory")
 #endif
 
diff --git a/arch/xtensa/include/asm/hw_irq.h b/arch/xtensa/include/asm/hw_irq.h
deleted file mode 100644
index 3ddbea759b2b..000000000000
--- a/arch/xtensa/include/asm/hw_irq.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * include/asm-xtensa/hw_irq.h
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file "COPYING" in the main directory of
- * this archive for more details.
- *
- * Copyright (C) 2002 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_HW_IRQ_H
-#define _XTENSA_HW_IRQ_H
-
-#endif
diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h
index 3b054d2bede0..e3e1d9a1ef69 100644
--- a/arch/xtensa/include/asm/initialize_mmu.h
+++ b/arch/xtensa/include/asm/initialize_mmu.h
@@ -23,6 +23,7 @@
 #ifndef _XTENSA_INITIALIZE_MMU_H
 #define _XTENSA_INITIALIZE_MMU_H
 
+#include <linux/init.h>
 #include <asm/pgtable.h>
 #include <asm/vectors.h>
 
@@ -183,7 +184,7 @@
 #endif
 
 #if XCHAL_HAVE_MPU
-	.data
+	__REFCONST
 	.align	4
 .Lattribute_table:
 	.long 0x000000, 0x1fff00, 0x1ddf00, 0x1eef00
diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h
index 988e08530a5c..54188e69b988 100644
--- a/arch/xtensa/include/asm/io.h
+++ b/arch/xtensa/include/asm/io.h
@@ -32,8 +32,7 @@ void xtensa_iounmap(volatile void __iomem *addr);
 /*
  * Return the virtual address for the specified bus memory.
  */
-static inline void __iomem *ioremap_nocache(unsigned long offset,
-		unsigned long size)
+static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
 {
 	if (offset >= XCHAL_KIO_PADDR
 	    && offset - XCHAL_KIO_PADDR < XCHAL_KIO_SIZE)
@@ -52,15 +51,6 @@ static inline void __iomem *ioremap_cache(unsigned long offset,
 		return xtensa_ioremap_cache(offset, size);
 }
 #define ioremap_cache ioremap_cache
-#define ioremap_nocache ioremap_nocache
-
-#define ioremap_wc ioremap_nocache
-#define ioremap_wt ioremap_nocache
-
-static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
-{
-	return ioremap_nocache(offset, size);
-}
 
 static inline void iounmap(volatile void __iomem *addr)
 {
diff --git a/arch/xtensa/include/asm/kmem_layout.h b/arch/xtensa/include/asm/kmem_layout.h
index 9c12babc016c..7cbf68ca7106 100644
--- a/arch/xtensa/include/asm/kmem_layout.h
+++ b/arch/xtensa/include/asm/kmem_layout.h
@@ -11,6 +11,7 @@
 #ifndef _XTENSA_KMEM_LAYOUT_H
 #define _XTENSA_KMEM_LAYOUT_H
 
+#include <asm/core.h>
 #include <asm/types.h>
 
 #ifdef CONFIG_MMU
@@ -65,6 +66,34 @@
 
 #endif
 
+/* KIO definition */
+
+#if XCHAL_HAVE_PTP_MMU
+#define XCHAL_KIO_CACHED_VADDR		0xe0000000
+#define XCHAL_KIO_BYPASS_VADDR		0xf0000000
+#define XCHAL_KIO_DEFAULT_PADDR		0xf0000000
+#else
+#define XCHAL_KIO_BYPASS_VADDR		XCHAL_KIO_PADDR
+#define XCHAL_KIO_DEFAULT_PADDR		0x90000000
+#endif
+#define XCHAL_KIO_SIZE			0x10000000
+
+#if (!XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY) && defined(CONFIG_OF)
+#define XCHAL_KIO_PADDR			xtensa_get_kio_paddr()
+#ifndef __ASSEMBLY__
+extern unsigned long xtensa_kio_paddr;
+
+static inline unsigned long xtensa_get_kio_paddr(void)
+{
+	return xtensa_kio_paddr;
+}
+#endif
+#else
+#define XCHAL_KIO_PADDR			XCHAL_KIO_DEFAULT_PADDR
+#endif
+
+/* KERNEL_STACK definition */
+
 #ifndef CONFIG_KASAN
 #define KERNEL_STACK_SHIFT	13
 #else
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 09c56cba442e..f4771c29c7e9 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -169,7 +169,18 @@ static inline unsigned long ___pa(unsigned long va)
 	if (off >= XCHAL_KSEG_SIZE)
 		off -= XCHAL_KSEG_SIZE;
 
+#ifndef CONFIG_XIP_KERNEL
 	return off + PHYS_OFFSET;
+#else
+	if (off < XCHAL_KSEG_SIZE)
+		return off + PHYS_OFFSET;
+
+	off -= XCHAL_KSEG_SIZE;
+	if (off >= XCHAL_KIO_SIZE)
+		off -= XCHAL_KIO_SIZE;
+
+	return off + XCHAL_KIO_PADDR;
+#endif
 }
 #define __pa(x)	___pa((unsigned long)(x))
 #else
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 3f7fe5a8c286..27ac17c9da09 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -8,7 +8,6 @@
 #ifndef _XTENSA_PGTABLE_H
 #define _XTENSA_PGTABLE_H
 
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm/page.h>
 #include <asm/kmem_layout.h>
 #include <asm-generic/pgtable-nopmd.h>
@@ -371,9 +370,6 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 
 #define pgd_index(address)	((address) >> PGDIR_SHIFT)
 
-/* Find an entry in the second-level page table.. */
-#define pmd_offset(dir,address) ((pmd_t*)(dir))
-
 /* Find an entry in the third-level page table.. */
 #define pte_index(address)	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir,addr) 					\
diff --git a/arch/xtensa/include/asm/platform.h b/arch/xtensa/include/asm/platform.h
index 913826dfa838..f2c48522c5a1 100644
--- a/arch/xtensa/include/asm/platform.h
+++ b/arch/xtensa/include/asm/platform.h
@@ -65,31 +65,4 @@ extern void platform_calibrate_ccount (void);
  */
 void cpu_reset(void) __attribute__((noreturn));
 
-/*
- * Memory caching is platform-dependent in noMMU xtensa configurations.
- * The following set of functions should be implemented in platform code
- * in order to enable coherent DMA memory operations when CONFIG_MMU is not
- * enabled. Default implementations do nothing and issue a warning.
- */
-
-/*
- * Check whether p points to a cached memory.
- */
-bool platform_vaddr_cached(const void *p);
-
-/*
- * Check whether p points to an uncached memory.
- */
-bool platform_vaddr_uncached(const void *p);
-
-/*
- * Return pointer to an uncached view of the cached sddress p.
- */
-void *platform_vaddr_to_uncached(void *p);
-
-/*
- * Return pointer to a cached view of the uncached sddress p.
- */
-void *platform_vaddr_to_cached(void *p);
-
 #endif	/* _XTENSA_PLATFORM_H */
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 7495520d7a3e..6fa903daf2a2 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -195,6 +195,7 @@ struct thread_struct {
 /* Clearing a0 terminates the backtrace. */
 #define start_thread(regs, new_pc, new_sp) \
 	do { \
+		unsigned long syscall = (regs)->syscall; \
 		memset((regs), 0, sizeof(*(regs))); \
 		(regs)->pc = (new_pc); \
 		(regs)->ps = USER_PS_VALUE; \
@@ -204,7 +205,7 @@ struct thread_struct {
 		(regs)->depc = 0; \
 		(regs)->windowbase = 0; \
 		(regs)->windowstart = 1; \
-		(regs)->syscall = NO_SYSCALL; \
+		(regs)->syscall = syscall; \
 	} while (0)
 
 /* Forward declaration */
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index 359ab40e935a..f9a671cbf933 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -51,7 +51,7 @@ static inline void syscall_set_return_value(struct task_struct *task,
 					    struct pt_regs *regs,
 					    int error, long val)
 {
-	regs->areg[0] = (long) error ? error : val;
+	regs->areg[2] = (long) error ? error : val;
 }
 
 #define SYSCALL_MAX_ARGS 6
@@ -79,7 +79,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
 		regs->areg[reg[i]] = args[i];
 }
 
-asmlinkage long xtensa_rt_sigreturn(struct pt_regs*);
+asmlinkage long xtensa_rt_sigreturn(void);
 asmlinkage long xtensa_shmat(int, char __user *, int);
 asmlinkage long xtensa_fadvise64_64(int, int,
 				    unsigned long long, unsigned long long);
diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h
index 3f80386f1883..47b7702aaa40 100644
--- a/arch/xtensa/include/asm/uaccess.h
+++ b/arch/xtensa/include/asm/uaccess.h
@@ -132,13 +132,13 @@ do {									\
 #define __check_align_1  ""
 
 #define __check_align_2				\
-	"   _bbci.l %[addr], 0, 1f	\n"	\
+	"   _bbci.l %[mem] * 0, 1f	\n"	\
 	"   movi    %[err], %[efault]	\n"	\
 	"   _j      2f			\n"
 
 #define __check_align_4				\
-	"   _bbsi.l %[addr], 0, 0f	\n"	\
-	"   _bbci.l %[addr], 1, 1f	\n"	\
+	"   _bbsi.l %[mem] * 0, 0f	\n"	\
+	"   _bbci.l %[mem] * 0 + 1, 1f	\n"	\
 	"0: movi    %[err], %[efault]	\n"	\
 	"   _j      2f			\n"
 
@@ -154,7 +154,7 @@ do {									\
 #define __put_user_asm(x_, addr_, err_, align, insn, cb)\
 __asm__ __volatile__(					\
 	__check_align_##align				\
-	"1: "insn"  %[x], %[addr], 0	\n"		\
+	"1: "insn"  %[x], %[mem]	\n"		\
 	"2:				\n"		\
 	"   .section  .fixup,\"ax\"	\n"		\
 	"   .align 4			\n"		\
@@ -167,8 +167,8 @@ __asm__ __volatile__(					\
 	"   .section  __ex_table,\"a\"	\n"		\
 	"   .long	1b, 5b		\n"		\
 	"   .previous"					\
-	:[err] "+r"(err_), [tmp] "=r"(cb)		\
-	:[x] "r"(x_), [addr] "r"(addr_), [efault] "i"(-EFAULT))
+	:[err] "+r"(err_), [tmp] "=r"(cb), [mem] "=m"(*(addr_))		\
+	:[x] "r"(x_), [efault] "i"(-EFAULT))
 
 #define __get_user_nocheck(x, ptr, size)			\
 ({								\
@@ -222,7 +222,7 @@ do {							\
 	u32 __x = 0;					\
 	__asm__ __volatile__(				\
 		__check_align_##align			\
-		"1: "insn"  %[x], %[addr], 0	\n"	\
+		"1: "insn"  %[x], %[mem]	\n"	\
 		"2:				\n"	\
 		"   .section  .fixup,\"ax\"	\n"	\
 		"   .align 4			\n"	\
@@ -236,7 +236,7 @@ do {							\
 		"   .long	1b, 5b		\n"	\
 		"   .previous"				\
 		:[err] "+r"(err_), [tmp] "=r"(cb), [x] "+r"(__x) \
-		:[addr] "r"(addr_), [efault] "i"(-EFAULT)); \
+		:[mem] "m"(*(addr_)), [efault] "i"(-EFAULT)); \
 	(x_) = (__force __typeof__(*(addr_)))__x;	\
 } while (0)
 
diff --git a/arch/xtensa/include/asm/user.h b/arch/xtensa/include/asm/user.h
deleted file mode 100644
index 2c3ed23354a8..000000000000
--- a/arch/xtensa/include/asm/user.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * include/asm-xtensa/user.h
- *
- * Xtensa Processor version.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_USER_H
-#define _XTENSA_USER_H
-
-/* This file usually defines a 'struct user' structure. However, it it only
- * used for a.out file, which are not supported on Xtensa.
- */
-
-#endif	/* _XTENSA_USER_H */
diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h
index 79fe3007919e..fd99b25037a7 100644
--- a/arch/xtensa/include/asm/vectors.h
+++ b/arch/xtensa/include/asm/vectors.h
@@ -21,50 +21,18 @@
 #include <asm/core.h>
 #include <asm/kmem_layout.h>
 
-#if XCHAL_HAVE_PTP_MMU
-#define XCHAL_KIO_CACHED_VADDR		0xe0000000
-#define XCHAL_KIO_BYPASS_VADDR		0xf0000000
-#define XCHAL_KIO_DEFAULT_PADDR		0xf0000000
+#if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
+#ifdef CONFIG_KERNEL_VIRTUAL_ADDRESS
+#define KERNELOFFSET			CONFIG_KERNEL_VIRTUAL_ADDRESS
 #else
-#define XCHAL_KIO_BYPASS_VADDR		XCHAL_KIO_PADDR
-#define XCHAL_KIO_DEFAULT_PADDR		0x90000000
-#endif
-#define XCHAL_KIO_SIZE			0x10000000
-
-#if (!XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY) && defined(CONFIG_OF)
-#define XCHAL_KIO_PADDR			xtensa_get_kio_paddr()
-#ifndef __ASSEMBLY__
-extern unsigned long xtensa_kio_paddr;
-
-static inline unsigned long xtensa_get_kio_paddr(void)
-{
-	return xtensa_kio_paddr;
-}
-#endif
-#else
-#define XCHAL_KIO_PADDR			XCHAL_KIO_DEFAULT_PADDR
-#endif
-
-#if defined(CONFIG_MMU)
-
-#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
-/* Image Virtual Start Address */
-#define KERNELOFFSET			(XCHAL_KSEG_CACHED_VADDR + \
-					 CONFIG_KERNEL_LOAD_ADDRESS - \
+#define KERNELOFFSET			(CONFIG_KERNEL_LOAD_ADDRESS + \
+					 XCHAL_KSEG_CACHED_VADDR - \
 					 XCHAL_KSEG_PADDR)
+#endif
 #else
 #define KERNELOFFSET			CONFIG_KERNEL_LOAD_ADDRESS
 #endif
 
-#else /* !defined(CONFIG_MMU) */
-  /* MMU Not being used - Virtual == Physical */
-
-/* Location of the start of the kernel text, _start */
-#define KERNELOFFSET			CONFIG_KERNEL_LOAD_ADDRESS
-
-
-#endif /* CONFIG_MMU */
-
 #define RESET_VECTOR1_VADDR		(XCHAL_RESET_VECTOR1_VADDR)
 #ifdef CONFIG_VECTORS_OFFSET
 #define VECBASE_VADDR			(KERNELOFFSET - CONFIG_VECTORS_OFFSET)
diff --git a/arch/xtensa/include/asm/vmalloc.h b/arch/xtensa/include/asm/vmalloc.h
new file mode 100644
index 000000000000..0eb94b70be55
--- /dev/null
+++ b/arch/xtensa/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_XTENSA_VMALLOC_H
+#define _ASM_XTENSA_VMALLOC_H
+
+#endif /* _ASM_XTENSA_VMALLOC_H */
diff --git a/arch/xtensa/include/uapi/asm/ipcbuf.h b/arch/xtensa/include/uapi/asm/ipcbuf.h
index a57afa0b606f..3bd0642f6660 100644
--- a/arch/xtensa/include/uapi/asm/ipcbuf.h
+++ b/arch/xtensa/include/uapi/asm/ipcbuf.h
@@ -12,6 +12,8 @@
 #ifndef _XTENSA_IPCBUF_H
 #define _XTENSA_IPCBUF_H
 
+#include <linux/posix_types.h>
+
 /*
  * Pad space is left for:
  * - 32-bit mode_t and seq
diff --git a/arch/xtensa/include/uapi/asm/msgbuf.h b/arch/xtensa/include/uapi/asm/msgbuf.h
index d6915e9f071c..1ed2c85b693a 100644
--- a/arch/xtensa/include/uapi/asm/msgbuf.h
+++ b/arch/xtensa/include/uapi/asm/msgbuf.h
@@ -17,6 +17,8 @@
 #ifndef _XTENSA_MSGBUF_H
 #define _XTENSA_MSGBUF_H
 
+#include <asm/ipcbuf.h>
+
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
 #ifdef __XTENSA_EB__
diff --git a/arch/xtensa/include/uapi/asm/sembuf.h b/arch/xtensa/include/uapi/asm/sembuf.h
index 09f348d643f1..3b9cdd406dfe 100644
--- a/arch/xtensa/include/uapi/asm/sembuf.h
+++ b/arch/xtensa/include/uapi/asm/sembuf.h
@@ -22,6 +22,7 @@
 #define _XTENSA_SEMBUF_H
 
 #include <asm/byteorder.h>
+#include <asm/ipcbuf.h>
 
 struct semid64_ds {
 	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index 6f629027ac7d..d4082c6a121b 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -5,10 +5,11 @@
 
 extra-y := head.o vmlinux.lds
 
-obj-y := align.o coprocessor.o entry.o irq.o pci-dma.o platform.o process.o \
+obj-y := align.o coprocessor.o entry.o irq.o platform.o process.o \
 	 ptrace.o setup.o signal.o stacktrace.o syscall.o time.o traps.o \
 	 vectors.o
 
+obj-$(CONFIG_MMU) += pci-dma.o
 obj-$(CONFIG_PCI) += pci.o
 obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o
 obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index 80828b95a51f..bb8e499b9900 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -15,17 +15,9 @@
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 #include <asm/asmmacro.h>
-#include <asm/processor.h>
 #include <asm/coprocessor.h>
-#include <asm/thread_info.h>
-#include <asm/asm-uaccess.h>
-#include <asm/unistd.h>
-#include <asm/ptrace.h>
 #include <asm/current.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/signal.h>
-#include <asm/tlbflush.h>
+#include <asm/regs.h>
 
 #if XTENSA_HAVE_COPROCESSORS
 
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 9e3676879168..2c9e48566e48 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -520,7 +520,7 @@ common_exception_return:
 	call4	schedule	# void schedule (void)
 	j	1b
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 6:
 	_bbci.l	a4, TIF_NEED_RESCHED, 4f
 
@@ -529,7 +529,7 @@ common_exception_return:
 	l32i	a4, a2, TI_PRE_COUNT
 	bnez	a4, 4f
 	call4	preempt_schedule_irq
-	j	1b
+	j	4f
 #endif
 
 #if XTENSA_FAKE_NMI
@@ -1876,8 +1876,7 @@ ENDPROC(fast_store_prohibited)
 
 ENTRY(system_call)
 
-	/* reserve 4 bytes on stack for function parameter */
-	abi_entry(4)
+	abi_entry_default
 
 	/* regs->syscall = regs->areg[2] */
 
@@ -1892,11 +1891,10 @@ ENTRY(system_call)
 
 	mov	a6, a2
 	call4	do_syscall_trace_enter
+	beqz	a6, .Lsyscall_exit
 	l32i	a7, a2, PT_SYSCALL
 
 1:
-	s32i	a7, a1, 4
-
 	/* syscall = sys_call_table[syscall_nr] */
 
 	movi	a4, sys_call_table
@@ -1906,8 +1904,6 @@ ENTRY(system_call)
 
 	addx4	a4, a7, a4
 	l32i	a4, a4, 0
-	movi	a5, sys_ni_syscall;
-	beq	a4, a5, 1f
 
 	/* Load args: arg0 - arg5 are passed via regs. */
 
@@ -1918,25 +1914,19 @@ ENTRY(system_call)
 	l32i	a10, a2, PT_AREG8
 	l32i	a11, a2, PT_AREG9
 
-	/* Pass one additional argument to the syscall: pt_regs (on stack) */
-	s32i	a2, a1, 0
-
 	callx4	a4
 
 1:	/* regs->areg[2] = return_value */
 
 	s32i	a6, a2, PT_AREG2
 	bnez	a3, 1f
-	abi_ret(4)
+.Lsyscall_exit:
+	abi_ret_default
 
 1:
-	l32i	a4, a1, 4
-	l32i	a3, a2, PT_SYSCALL
-	s32i	a4, a2, PT_SYSCALL
 	mov	a6, a2
 	call4	do_syscall_trace_leave
-	s32i	a3, a2, PT_SYSCALL
-	abi_ret(4)
+	abi_ret_default
 
 ENDPROC(system_call)
 
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index 4ae998b5a348..e0c1fac0910f 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -260,6 +260,13 @@ ENTRY(_startup)
 	___invalidate_icache_all a2 a3
 	isync
 
+#ifdef CONFIG_XIP_KERNEL
+	/* Setup bootstrap CPU stack in XIP kernel */
+
+	movi	a1, start_info
+	l32i	a1, a1, 0
+#endif
+
 	movi	a6, 0
 	xsr	a6, excsave1
 
@@ -355,10 +362,10 @@ ENDPROC(cpu_restart)
  * DATA section
  */
 
-        .section ".data.init.refok"
-        .align  4
+	__REFDATA
+	.align  4
 ENTRY(start_info)
-        .long   init_thread_union + KERNEL_STACK_SIZE
+	.long	init_thread_union + KERNEL_STACK_SIZE
 
 /*
  * BSS section
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 154979d62b73..72b6222daa0b 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -44,8 +44,8 @@ static void do_cache_op(phys_addr_t paddr, size_t size,
 		}
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_BIDIRECTIONAL:
@@ -62,8 +62,8 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
 	}
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_BIDIRECTIONAL:
@@ -81,122 +81,25 @@ void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
 	}
 }
 
-#ifdef CONFIG_MMU
-bool platform_vaddr_cached(const void *p)
-{
-	unsigned long addr = (unsigned long)p;
-
-	return addr >= XCHAL_KSEG_CACHED_VADDR &&
-	       addr - XCHAL_KSEG_CACHED_VADDR < XCHAL_KSEG_SIZE;
-}
-
-bool platform_vaddr_uncached(const void *p)
-{
-	unsigned long addr = (unsigned long)p;
-
-	return addr >= XCHAL_KSEG_BYPASS_VADDR &&
-	       addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE;
-}
-
-void *platform_vaddr_to_uncached(void *p)
-{
-	return p + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
-}
-
-void *platform_vaddr_to_cached(void *p)
-{
-	return p + XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
-}
-#else
-bool __attribute__((weak)) platform_vaddr_cached(const void *p)
-{
-	WARN_ONCE(1, "Default %s implementation is used\n", __func__);
-	return true;
-}
-
-bool __attribute__((weak)) platform_vaddr_uncached(const void *p)
-{
-	WARN_ONCE(1, "Default %s implementation is used\n", __func__);
-	return false;
-}
-
-void __attribute__((weak)) *platform_vaddr_to_uncached(void *p)
+void arch_dma_prep_coherent(struct page *page, size_t size)
 {
-	WARN_ONCE(1, "Default %s implementation is used\n", __func__);
-	return p;
-}
-
-void __attribute__((weak)) *platform_vaddr_to_cached(void *p)
-{
-	WARN_ONCE(1, "Default %s implementation is used\n", __func__);
-	return p;
+	__invalidate_dcache_range((unsigned long)page_address(page), size);
 }
-#endif
 
 /*
- * Note: We assume that the full memory space is always mapped to 'kseg'
- *	 Otherwise we have to use page attributes (not implemented).
+ * Memory caching is platform-dependent in noMMU xtensa configurations.
+ * The following two functions should be implemented in platform code
+ * in order to enable coherent DMA memory operations when CONFIG_MMU is not
+ * enabled.
  */
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-		gfp_t flag, unsigned long attrs)
-{
-	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	struct page *page = NULL;
-
-	/* ignore region speicifiers */
-
-	flag &= ~(__GFP_DMA | __GFP_HIGHMEM);
-
-	if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
-		flag |= GFP_DMA;
-
-	if (gfpflags_allow_blocking(flag))
-		page = dma_alloc_from_contiguous(dev, count, get_order(size),
-						 flag & __GFP_NOWARN);
-
-	if (!page)
-		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
-
-	if (!page)
-		return NULL;
-
-	*handle = phys_to_dma(dev, page_to_phys(page));
-
 #ifdef CONFIG_MMU
-	if (PageHighMem(page)) {
-		void *p;
-
-		p = dma_common_contiguous_remap(page, size,
-						pgprot_noncached(PAGE_KERNEL),
-						__builtin_return_address(0));
-		if (!p) {
-			if (!dma_release_from_contiguous(dev, page, count))
-				__free_pages(page, get_order(size));
-		}
-		return p;
-	}
-#endif
-	BUG_ON(!platform_vaddr_cached(page_address(page)));
-	__invalidate_dcache_range((unsigned long)page_address(page), size);
-	return platform_vaddr_to_uncached(page_address(page));
+void *uncached_kernel_address(void *p)
+{
+	return p + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
 }
 
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, unsigned long attrs)
+void *cached_kernel_address(void *p)
 {
-	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	struct page *page;
-
-	if (platform_vaddr_uncached(vaddr)) {
-		page = virt_to_page(platform_vaddr_to_cached(vaddr));
-	} else {
-#ifdef CONFIG_MMU
-		dma_common_free_remap(vaddr, size);
-#endif
-		page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle)));
-	}
-
-	if (!dma_release_from_contiguous(dev, page, count))
-		__free_pages(page, get_order(size));
+	return p + XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
 }
+#endif /* CONFIG_MMU */
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index db278a9e80c7..3edecc41ef8c 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -202,8 +202,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
  * involved.  Much simpler to just not copy those live frames across.
  */
 
-int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
-		unsigned long thread_fn_arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp_thread_fn,
+		unsigned long thread_fn_arg, struct task_struct *p,
+		unsigned long tls)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 
@@ -264,9 +265,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
 			       &regs->areg[XCHAL_NUM_AREGS - len/4], len);
 		}
 
-		/* The thread pointer is passed in the '4th argument' (= a5) */
+		childregs->syscall = regs->syscall;
+
 		if (clone_flags & CLONE_SETTLS)
-			childregs->threadptr = childregs->areg[5];
+			childregs->threadptr = tls;
 	} else {
 		p->thread.ra = MAKE_RA_FOR_CALL(
 				(unsigned long)ret_from_kernel_thread, 1);
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index b964f0b2d886..145742d70a9f 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -542,14 +542,28 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ret;
 }
 
-void do_syscall_trace_enter(struct pt_regs *regs)
+void do_syscall_trace_leave(struct pt_regs *regs);
+int do_syscall_trace_enter(struct pt_regs *regs)
 {
+	if (regs->syscall == NO_SYSCALL)
+		regs->areg[2] = -ENOSYS;
+
 	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
-	    tracehook_report_syscall_entry(regs))
+	    tracehook_report_syscall_entry(regs)) {
+		regs->areg[2] = -ENOSYS;
 		regs->syscall = NO_SYSCALL;
+		return 0;
+	}
+
+	if (regs->syscall == NO_SYSCALL) {
+		do_syscall_trace_leave(regs);
+		return 0;
+	}
 
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_enter(regs, syscall_get_nr(current, regs));
+
+	return 1;
 }
 
 void do_syscall_trace_leave(struct pt_regs *regs)
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index e0e1e1892b86..adead45debe8 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -308,6 +308,10 @@ extern char _Level6InterruptVector_text_end;
 extern char _SecondaryResetVector_text_start;
 extern char _SecondaryResetVector_text_end;
 #endif
+#ifdef CONFIG_XIP_KERNEL
+extern char _xip_start[];
+extern char _xip_end[];
+#endif
 
 static inline int __init_memblock mem_reserve(unsigned long start,
 					      unsigned long end)
@@ -339,6 +343,9 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	mem_reserve(__pa(_stext), __pa(_end));
+#ifdef CONFIG_XIP_KERNEL
+	mem_reserve(__pa(_xip_start), __pa(_xip_end));
+#endif
 
 #ifdef CONFIG_VECTORS_OFFSET
 	mem_reserve(__pa(&_WindowVectors_text_start),
@@ -398,8 +405,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_VT
 # if defined(CONFIG_VGA_CONSOLE)
 	conswitchp = &vga_con;
-# elif defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
 # endif
 #endif
 }
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index dae83cddd6ca..76cee341507b 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -236,9 +236,9 @@ restore_sigcontext(struct pt_regs *regs, struct rt_sigframe __user *frame)
  * Do a signal return; undo the signal stack.
  */
 
-asmlinkage long xtensa_rt_sigreturn(long a0, long a1, long a2, long a3,
-				    long a4, long a5, struct pt_regs *regs)
+asmlinkage long xtensa_rt_sigreturn(void)
 {
+	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe __user *frame;
 	sigset_t set;
 	int ret;
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 25f4de729a6d..85a9ab1bc04d 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -406,3 +406,5 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 435	common	clone3				sys_clone3
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 4a6c495ce9b6..0976e27b8d5d 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -491,32 +491,27 @@ void show_trace(struct task_struct *task, unsigned long *sp)
 
 	pr_info("Call Trace:\n");
 	walk_stackframe(sp, show_trace_cb, NULL);
-#ifndef CONFIG_KALLSYMS
-	pr_cont("\n");
-#endif
 }
 
-static int kstack_depth_to_print = 24;
+#define STACK_DUMP_ENTRY_SIZE 4
+#define STACK_DUMP_LINE_SIZE 32
+static size_t kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
 
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
-	int i = 0;
-	unsigned long *stack;
+	size_t len;
 
 	if (!sp)
 		sp = stack_pointer(task);
-	stack = sp;
 
-	pr_info("Stack:\n");
+	len = min((-(size_t)sp) & (THREAD_SIZE - STACK_DUMP_ENTRY_SIZE),
+		  kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE);
 
-	for (i = 0; i < kstack_depth_to_print; i++) {
-		if (kstack_end(sp))
-			break;
-		pr_cont(" %08lx", *sp++);
-		if (i % 8 == 7)
-			pr_cont("\n");
-	}
-	show_trace(task, stack);
+	pr_info("Stack:\n");
+	print_hex_dump(KERN_INFO, " ", DUMP_PREFIX_NONE,
+		       STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE,
+		       sp, len, false);
+	show_trace(task, sp);
 }
 
 DEFINE_SPINLOCK(die_lock);
@@ -524,12 +519,15 @@ DEFINE_SPINLOCK(die_lock);
 void die(const char * str, struct pt_regs * regs, long err)
 {
 	static int die_counter;
+	const char *pr = "";
+
+	if (IS_ENABLED(CONFIG_PREEMPTION))
+		pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
 
 	console_verbose();
 	spin_lock_irq(&die_lock);
 
-	pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter,
-		IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "");
+	pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter, pr);
 	show_regs(regs);
 	if (!user_mode(regs))
 		show_stack(NULL, (unsigned long*)regs->areg[1]);
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 943f10639a93..409c05cac15e 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -14,6 +14,8 @@
  * Joe Taylor <joe@tensilica.com, joetylr@yahoo.com>
  */
 
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/page.h>
 #include <asm/thread_info.h>
@@ -117,25 +119,22 @@ SECTIONS
     SCHED_TEXT
     CPUIDLE_TEXT
     LOCK_TEXT
-
+    *(.fixup)
   }
   _etext = .;
   PROVIDE (etext = .);
 
   . = ALIGN(16);
 
-  RODATA
-
-  /*  Relocation table */
+  RO_DATA(4096)
 
-  .fixup   : { *(.fixup) }
-
-  EXCEPTION_TABLE(16)
-  NOTES
   /* Data section */
 
+#ifdef CONFIG_XIP_KERNEL
+  INIT_TEXT_SECTION(PAGE_SIZE)
+#else
   _sdata = .;
-  RW_DATA_SECTION(XCHAL_ICACHE_LINESIZE, PAGE_SIZE, THREAD_SIZE)
+  RW_DATA(XCHAL_ICACHE_LINESIZE, PAGE_SIZE, THREAD_SIZE)
   _edata = .;
 
   /* Initialization code and data: */
@@ -147,6 +146,11 @@ SECTIONS
   .init.data :
   {
     INIT_DATA
+  }
+#endif
+
+  .init.rodata :
+  {
     . = ALIGN(0x4);
     __tagtable_begin = .;
     *(.taglist)
@@ -187,12 +191,16 @@ SECTIONS
     RELOCATE_ENTRY(_DebugInterruptVector_text,
 		   .DebugInterruptVector.text);
 #endif
+#ifdef CONFIG_XIP_KERNEL
+    RELOCATE_ENTRY(_xip_data, .data);
+    RELOCATE_ENTRY(_xip_init_data, .init.data);
+#else
 #if defined(CONFIG_SMP)
     RELOCATE_ENTRY(_SecondaryResetVector_text,
 		   .SecondaryResetVector.text);
 #endif
+#endif
 
-  
     __boot_reloc_table_end = ABSOLUTE(.) ;
 
     INIT_SETUP(XCHAL_ICACHE_LINESIZE)
@@ -278,7 +286,7 @@ SECTIONS
   . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;
 
 #endif
-#if defined(CONFIG_SMP)
+#if !defined(CONFIG_XIP_KERNEL) && defined(CONFIG_SMP)
 
   SECTION_VECTOR (_SecondaryResetVector_text,
 		  .SecondaryResetVector.text,
@@ -291,12 +299,48 @@ SECTIONS
 
   . = ALIGN(PAGE_SIZE);
 
+#ifndef CONFIG_XIP_KERNEL
   __init_end = .;
 
   BSS_SECTION(0, 8192, 0)
+#endif
 
   _end = .;
 
+#ifdef CONFIG_XIP_KERNEL
+  . = CONFIG_XIP_DATA_ADDR;
+
+  _xip_start = .;
+
+#undef LOAD_OFFSET
+#define LOAD_OFFSET \
+  (CONFIG_XIP_DATA_ADDR - (LOADADDR(.dummy) + SIZEOF(.dummy) + 3) & ~ 3)
+
+  _xip_data_start = .;
+  _sdata = .;
+  RW_DATA(XCHAL_ICACHE_LINESIZE, PAGE_SIZE, THREAD_SIZE)
+  _edata = .;
+  _xip_data_end = .;
+
+  /* Initialization data: */
+
+  STRUCT_ALIGN();
+
+  _xip_init_data_start = .;
+  __init_begin = .;
+  .init.data :
+  {
+    INIT_DATA
+  }
+  _xip_init_data_end = .;
+  __init_end = .;
+  BSS_SECTION(0, 8192, 0)
+
+  _xip_end = .;
+
+#undef LOAD_OFFSET
+#endif
+
   DWARF_DEBUG
 
   .xt.prop 0 : { KEEP(*(.xt.prop .xt.prop.* .gnu.linkonce.prop.*)) }
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index f81b1478da61..bee30a77cd70 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -197,6 +197,8 @@ vmalloc_fault:
 		struct mm_struct *act_mm = current->active_mm;
 		int index = pgd_index(address);
 		pgd_t *pgd, *pgd_k;
+		p4d_t *p4d, *p4d_k;
+		pud_t *pud, *pud_k;
 		pmd_t *pmd, *pmd_k;
 		pte_t *pte_k;
 
@@ -211,8 +213,18 @@ vmalloc_fault:
 
 		pgd_val(*pgd) = pgd_val(*pgd_k);
 
-		pmd = pmd_offset(pgd, address);
-		pmd_k = pmd_offset(pgd_k, address);
+		p4d = p4d_offset(pgd, address);
+		p4d_k = p4d_offset(pgd_k, address);
+		if (!p4d_present(*p4d) || !p4d_present(*p4d_k))
+			goto bad_page_fault;
+
+		pud = pud_offset(p4d, address);
+		pud_k = pud_offset(p4d_k, address);
+		if (!pud_present(*pud) || !pud_present(*pud_k))
+			goto bad_page_fault;
+
+		pmd = pmd_offset(pud, address);
+		pmd_k = pmd_offset(pud_k, address);
 		if (!pmd_present(*pmd) || !pmd_present(*pmd_k))
 			goto bad_page_fault;
 
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index d898ed67d890..19c625e6d81f 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -193,8 +193,8 @@ void __init mem_init(void)
 		((max_low_pfn - min_low_pfn) * PAGE_SIZE) >> 20,
 		(unsigned long)_text, (unsigned long)_etext,
 		(unsigned long)(_etext - _text) >> 10,
-		(unsigned long)__start_rodata, (unsigned long)_sdata,
-		(unsigned long)(_sdata - __start_rodata) >> 10,
+		(unsigned long)__start_rodata, (unsigned long)__end_rodata,
+		(unsigned long)(__end_rodata - __start_rodata) >> 10,
 		(unsigned long)_sdata, (unsigned long)_edata,
 		(unsigned long)(_edata - _sdata) >> 10,
 		(unsigned long)__init_begin, (unsigned long)__init_end,
diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c
index af7152560bc3..e3baa21ff24c 100644
--- a/arch/xtensa/mm/kasan_init.c
+++ b/arch/xtensa/mm/kasan_init.c
@@ -20,7 +20,9 @@ void __init kasan_early_init(void)
 {
 	unsigned long vaddr = KASAN_SHADOW_START;
 	pgd_t *pgd = pgd_offset_k(vaddr);
-	pmd_t *pmd = pmd_offset(pgd, vaddr);
+	p4d_t *p4d = p4d_offset(pgd, vaddr);
+	pud_t *pud = pud_offset(p4d, vaddr);
+	pmd_t *pmd = pmd_offset(pud, vaddr);
 	int i;
 
 	for (i = 0; i < PTRS_PER_PTE; ++i)
@@ -42,7 +44,9 @@ static void __init populate(void *start, void *end)
 	unsigned long i, j;
 	unsigned long vaddr = (unsigned long)start;
 	pgd_t *pgd = pgd_offset_k(vaddr);
-	pmd_t *pmd = pmd_offset(pgd, vaddr);
+	p4d_t *p4d = p4d_offset(pgd, vaddr);
+	pud_t *pud = pud_offset(p4d, vaddr);
+	pmd_t *pmd = pmd_offset(pud, vaddr);
 	pte_t *pte = memblock_alloc(n_pages * sizeof(pte_t), PAGE_SIZE);
 
 	if (!pte)
@@ -56,7 +60,9 @@ static void __init populate(void *start, void *end)
 
 		for (k = 0; k < PTRS_PER_PTE; ++k, ++j) {
 			phys_addr_t phys =
-				memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+				memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE,
+							  0,
+							  MEMBLOCK_ALLOC_ANYWHERE);
 
 			if (!phys)
 				panic("Failed to allocate page table page\n");
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index 03678c4afc39..37e478a27877 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -22,7 +22,9 @@
 static void * __init init_pmd(unsigned long vaddr, unsigned long n_pages)
 {
 	pgd_t *pgd = pgd_offset_k(vaddr);
-	pmd_t *pmd = pmd_offset(pgd, vaddr);
+	p4d_t *p4d = p4d_offset(pgd, vaddr);
+	pud_t *pud = pud_offset(p4d, vaddr);
+	pmd_t *pmd = pmd_offset(pud, vaddr);
 	pte_t *pte;
 	unsigned long i;
 
diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c
index 59153d0aa890..f436cf2efd8b 100644
--- a/arch/xtensa/mm/tlb.c
+++ b/arch/xtensa/mm/tlb.c
@@ -169,6 +169,8 @@ static unsigned get_pte_for_vaddr(unsigned vaddr)
 	struct task_struct *task = get_current();
 	struct mm_struct *mm = task->mm;
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 
@@ -177,7 +179,13 @@ static unsigned get_pte_for_vaddr(unsigned vaddr)
 	pgd = pgd_offset(mm, vaddr);
 	if (pgd_none_or_clear_bad(pgd))
 		return 0;
-	pmd = pmd_offset(pgd, vaddr);
+	p4d = p4d_offset(pgd, vaddr);
+	if (p4d_none_or_clear_bad(p4d))
+		return 0;
+	pud = pud_offset(p4d, vaddr);
+	if (pud_none_or_clear_bad(pud))
+		return 0;
+	pmd = pmd_offset(pud, vaddr);
 	if (pmd_none_or_clear_bad(pmd))
 		return 0;
 	pte = pte_offset_map(pmd, vaddr);
@@ -216,6 +224,8 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb)
 	unsigned tlbidx = w | (e << PAGE_SHIFT);
 	unsigned r0 = dtlb ?
 		read_dtlb_virtual(tlbidx) : read_itlb_virtual(tlbidx);
+	unsigned r1 = dtlb ?
+		read_dtlb_translation(tlbidx) : read_itlb_translation(tlbidx);
 	unsigned vpn = (r0 & PAGE_MASK) | (e << PAGE_SHIFT);
 	unsigned pte = get_pte_for_vaddr(vpn);
 	unsigned mm_asid = (get_rasid_register() >> 8) & ASID_MASK;
@@ -231,8 +241,6 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb)
 	}
 
 	if (tlb_asid == mm_asid) {
-		unsigned r1 = dtlb ? read_dtlb_translation(tlbidx) :
-			read_itlb_translation(tlbidx);
 		if ((pte ^ r1) & PAGE_MASK) {
 			pr_err("%cTLB: way: %u, entry: %u, mapping: %08x->%08x, PTE: %08x\n",
 					dtlb ? 'D' : 'I', w, e, r0, r1, pte);
diff --git a/arch/xtensa/platforms/iss/include/platform/simcall.h b/arch/xtensa/platforms/iss/include/platform/simcall.h
index 2ba45858e50a..4e2a48380dbf 100644
--- a/arch/xtensa/platforms/iss/include/platform/simcall.h
+++ b/arch/xtensa/platforms/iss/include/platform/simcall.h
@@ -113,9 +113,9 @@ static inline int simc_write(int fd, const void *buf, size_t count)
 
 static inline int simc_poll(int fd)
 {
-	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
+	long timeval[2] = { 0, 0 };
 
-	return __simc(SYS_select_one, fd, XTISS_SELECT_ONE_READ, (int)&tv);
+	return __simc(SYS_select_one, fd, XTISS_SELECT_ONE_READ, (int)&timeval);
 }
 
 static inline int simc_lseek(int fd, uint32_t off, int whence)
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index fa9f3893b002..4986226a5ab2 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -455,7 +455,7 @@ static void iss_net_set_multicast_list(struct net_device *dev)
 {
 }
 
-static void iss_net_tx_timeout(struct net_device *dev)
+static void iss_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 }
 
diff --git a/block/Kconfig b/block/Kconfig
index 41c0917ce622..3bc76bb113a0 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -32,6 +32,9 @@ config BLK_RQ_ALLOC_TIME
 config BLK_SCSI_REQUEST
 	bool
 
+config BLK_CGROUP_RWSTAT
+	bool
+
 config BLK_DEV_BSG
 	bool "Block layer SG support v4"
 	default y
@@ -63,7 +66,6 @@ config BLK_DEV_BSGLIB
 
 config BLK_DEV_INTEGRITY
 	bool "Block layer data integrity support"
-	select CRC_T10DIF if BLK_DEV_INTEGRITY
 	---help---
 	Some storage devices allow extra information to be
 	stored/retrieved to help protect the data.  The block layer
@@ -74,6 +76,11 @@ config BLK_DEV_INTEGRITY
 	T10/SCSI Data Integrity Field or the T13/ATA External Path
 	Protection.  If in doubt, say N.
 
+config BLK_DEV_INTEGRITY_T10
+	tristate
+	depends on BLK_DEV_INTEGRITY
+	select CRC_T10DIF
+
 config BLK_DEV_ZONED
 	bool "Zoned block device support"
 	select MQ_IOSCHED_DEADLINE
@@ -86,6 +93,7 @@ config BLK_DEV_ZONED
 config BLK_DEV_THROTTLING
 	bool "Block layer bio throttling support"
 	depends on BLK_CGROUP=y
+	select BLK_CGROUP_RWSTAT
 	---help---
 	Block layer bio throttling support. It can be used to limit
 	the IO rate to a device. IO rate policies are per cgroup and
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index b89310a022ad..7df14133adc8 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -31,6 +31,7 @@ config IOSCHED_BFQ
 config BFQ_GROUP_IOSCHED
        bool "BFQ hierarchical scheduling support"
        depends on IOSCHED_BFQ && BLK_CGROUP
+       select BLK_CGROUP_RWSTAT
        ---help---
 
        Enable hierarchical scheduling in BFQ, using the blkio
diff --git a/block/Makefile b/block/Makefile
index 9ef57ace90d4..1a43750f4b01 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_SCSI_REQUEST)	+= scsi_ioctl.o
 obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_BLK_DEV_BSGLIB)	+= bsg-lib.o
 obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
+obj-$(CONFIG_BLK_CGROUP_RWSTAT)	+= blk-cgroup-rwstat.o
 obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
 obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o
 obj-$(CONFIG_BLK_CGROUP_IOCOST)	+= blk-iocost.o
@@ -24,9 +25,9 @@ obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o
 bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
 obj-$(CONFIG_IOSCHED_BFQ)	+= bfq.o
 
-obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
 obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY_T10)	+= t10-pi.o
 obj-$(CONFIG_BLK_MQ_PCI)	+= blk-mq-pci.o
 obj-$(CONFIG_BLK_MQ_VIRTIO)	+= blk-mq-virtio.o
 obj-$(CONFIG_BLK_MQ_RDMA)	+= blk-mq-rdma.o
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 86a607cf19a1..e1419edde2ec 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -347,6 +347,17 @@ void bfqg_and_blkg_put(struct bfq_group *bfqg)
 	bfqg_put(bfqg);
 }
 
+void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
+{
+	struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
+
+	if (!bfqg)
+		return;
+
+	blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
+	blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
+}
+
 /* @stats = 0 */
 static void bfqg_stats_reset(struct bfqg_stats *stats)
 {
@@ -431,6 +442,8 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
 
 static void bfqg_stats_exit(struct bfqg_stats *stats)
 {
+	blkg_rwstat_exit(&stats->bytes);
+	blkg_rwstat_exit(&stats->ios);
 #ifdef CONFIG_BFQ_CGROUP_DEBUG
 	blkg_rwstat_exit(&stats->merged);
 	blkg_rwstat_exit(&stats->service_time);
@@ -448,6 +461,10 @@ static void bfqg_stats_exit(struct bfqg_stats *stats)
 
 static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
 {
+	if (blkg_rwstat_init(&stats->bytes, gfp) ||
+	    blkg_rwstat_init(&stats->ios, gfp))
+		return -ENOMEM;
+
 #ifdef CONFIG_BFQ_CGROUP_DEBUG
 	if (blkg_rwstat_init(&stats->merged, gfp) ||
 	    blkg_rwstat_init(&stats->service_time, gfp) ||
@@ -1057,18 +1074,35 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
 	return bfq_io_set_device_weight(of, buf, nbytes, off);
 }
 
-#ifdef CONFIG_BFQ_CGROUP_DEBUG
-static int bfqg_print_stat(struct seq_file *sf, void *v)
+static int bfqg_print_rwstat(struct seq_file *sf, void *v)
 {
-	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
-			  &blkcg_policy_bfq, seq_cft(sf)->private, false);
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
+			  &blkcg_policy_bfq, seq_cft(sf)->private, true);
 	return 0;
 }
 
-static int bfqg_print_rwstat(struct seq_file *sf, void *v)
+static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
+					struct blkg_policy_data *pd, int off)
 {
-	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
-			  &blkcg_policy_bfq, seq_cft(sf)->private, true);
+	struct blkg_rwstat_sample sum;
+
+	blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
+	return __blkg_prfill_rwstat(sf, pd, &sum);
+}
+
+static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
+			  seq_cft(sf)->private, true);
+	return 0;
+}
+
+#ifdef CONFIG_BFQ_CGROUP_DEBUG
+static int bfqg_print_stat(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
+			  &blkcg_policy_bfq, seq_cft(sf)->private, false);
 	return 0;
 }
 
@@ -1097,15 +1131,6 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
 	return __blkg_prfill_u64(sf, pd, sum);
 }
 
-static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
-					struct blkg_policy_data *pd, int off)
-{
-	struct blkg_rwstat_sample sum;
-
-	blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
-	return __blkg_prfill_rwstat(sf, pd, &sum);
-}
-
 static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
 {
 	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
@@ -1114,18 +1139,11 @@ static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
-{
-	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
-			  bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
-			  seq_cft(sf)->private, true);
-	return 0;
-}
-
 static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
 			       int off)
 {
-	u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
+	struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg);
+	u64 sum = blkg_rwstat_total(&bfqg->stats.bytes);
 
 	return __blkg_prfill_u64(sf, pd, sum >> 9);
 }
@@ -1142,8 +1160,8 @@ static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
 {
 	struct blkg_rwstat_sample tmp;
 
-	blkg_rwstat_recursive_sum(pd->blkg, NULL,
-			offsetof(struct blkcg_gq, stat_bytes), &tmp);
+	blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq,
+			offsetof(struct bfq_group, stats.bytes), &tmp);
 
 	return __blkg_prfill_u64(sf, pd,
 		(tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9);
@@ -1226,13 +1244,13 @@ struct cftype bfq_blkcg_legacy_files[] = {
 	/* statistics, covers only the tasks in the bfqg */
 	{
 		.name = "bfq.io_service_bytes",
-		.private = (unsigned long)&blkcg_policy_bfq,
-		.seq_show = blkg_print_stat_bytes,
+		.private = offsetof(struct bfq_group, stats.bytes),
+		.seq_show = bfqg_print_rwstat,
 	},
 	{
 		.name = "bfq.io_serviced",
-		.private = (unsigned long)&blkcg_policy_bfq,
-		.seq_show = blkg_print_stat_ios,
+		.private = offsetof(struct bfq_group, stats.ios),
+		.seq_show = bfqg_print_rwstat,
 	},
 #ifdef CONFIG_BFQ_CGROUP_DEBUG
 	{
@@ -1269,13 +1287,13 @@ struct cftype bfq_blkcg_legacy_files[] = {
 	/* the same statistics which cover the bfqg and its descendants */
 	{
 		.name = "bfq.io_service_bytes_recursive",
-		.private = (unsigned long)&blkcg_policy_bfq,
-		.seq_show = blkg_print_stat_bytes_recursive,
+		.private = offsetof(struct bfq_group, stats.bytes),
+		.seq_show = bfqg_print_rwstat_recursive,
 	},
 	{
 		.name = "bfq.io_serviced_recursive",
-		.private = (unsigned long)&blkcg_policy_bfq,
-		.seq_show = blkg_print_stat_ios_recursive,
+		.private = offsetof(struct bfq_group, stats.ios),
+		.seq_show = bfqg_print_rwstat_recursive,
 	},
 #ifdef CONFIG_BFQ_CGROUP_DEBUG
 	{
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0c6214497fcc..4686b68b48b4 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -427,7 +427,6 @@ void bfq_schedule_dispatch(struct bfq_data *bfqd)
 }
 
 #define bfq_class_idle(bfqq)	((bfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
-#define bfq_class_rt(bfqq)	((bfqq)->ioprio_class == IOPRIO_CLASS_RT)
 
 #define bfq_sample_valid(samples)	((samples) > 80)
 
@@ -5484,6 +5483,10 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	bool idle_timer_disabled = false;
 	unsigned int cmd_flags;
 
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+	if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio)
+		bfqg_stats_update_legacy_io(q, rq);
+#endif
 	spin_lock_irq(&bfqd->lock);
 	if (blk_mq_sched_try_insert_merge(q, rq)) {
 		spin_unlock_irq(&bfqd->lock);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 5d1a519640f6..8526f20c53bc 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -10,6 +10,8 @@
 #include <linux/hrtimer.h>
 #include <linux/blk-cgroup.h>
 
+#include "blk-cgroup-rwstat.h"
+
 #define BFQ_IOPRIO_CLASSES	3
 #define BFQ_CL_IDLE_TIMEOUT	(HZ/5)
 
@@ -809,6 +811,9 @@ struct bfq_stat {
 };
 
 struct bfqg_stats {
+	/* basic stats */
+	struct blkg_rwstat		bytes;
+	struct blkg_rwstat		ios;
 #ifdef CONFIG_BFQ_CGROUP_DEBUG
 	/* number of ios merged */
 	struct blkg_rwstat		merged;
@@ -956,6 +961,7 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
 
 /* ---------------- cgroups-support interface ---------------- */
 
+void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq);
 void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
 			      unsigned int op);
 void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
@@ -1062,6 +1068,8 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
 
 #define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	do {			\
 	char pid_str[MAX_PID_STR_LENGTH];	\
+	if (likely(!blk_trace_note_message_enabled((bfqd)->queue)))	\
+		break;							\
 	bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH);	\
 	blk_add_cgroup_trace_msg((bfqd)->queue,				\
 			bfqg_to_blkg(bfqq_group(bfqq))->blkcg,		\
@@ -1078,6 +1086,8 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
 
 #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do {	\
 	char pid_str[MAX_PID_STR_LENGTH];	\
+	if (likely(!blk_trace_note_message_enabled((bfqd)->queue)))	\
+		break;							\
 	bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH);	\
 	blk_add_trace_msg((bfqd)->queue, "bfq%s%c " fmt, pid_str,	\
 			bfq_bfqq_sync((bfqq)) ? 'S' : 'A',		\
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 05f0bf4a1144..ffe9ce9faa89 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -277,10 +277,7 @@ struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
  */
 static u64 bfq_delta(unsigned long service, unsigned long weight)
 {
-	u64 d = (u64)service << WFQ_SERVICE_SHIFT;
-
-	do_div(d, weight);
-	return d;
+	return div64_ul((u64)service << WFQ_SERVICE_SHIFT, weight);
 }
 
 /**
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index fb95dbb21dd8..bf62c25cde8f 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -87,7 +87,7 @@ EXPORT_SYMBOL(bio_integrity_alloc);
  * Description: Used to free the integrity portion of a bio. Usually
  * called from bio_free().
  */
-static void bio_integrity_free(struct bio *bio)
+void bio_integrity_free(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct bio_set *bs = bio->bi_pool;
diff --git a/block/bio.c b/block/bio.c
index b1170ec18464..94d697217887 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -233,6 +233,9 @@ fallback:
 void bio_uninit(struct bio *bio)
 {
 	bio_disassociate_blkg(bio);
+
+	if (bio_integrity(bio))
+		bio_integrity_free(bio);
 }
 EXPORT_SYMBOL(bio_uninit);
 
@@ -536,6 +539,55 @@ void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
 EXPORT_SYMBOL(zero_fill_bio_iter);
 
 /**
+ * bio_truncate - truncate the bio to small size of @new_size
+ * @bio:	the bio to be truncated
+ * @new_size:	new size for truncating the bio
+ *
+ * Description:
+ *   Truncate the bio to new size of @new_size. If bio_op(bio) is
+ *   REQ_OP_READ, zero the truncated part. This function should only
+ *   be used for handling corner cases, such as bio eod.
+ */
+void bio_truncate(struct bio *bio, unsigned new_size)
+{
+	struct bio_vec bv;
+	struct bvec_iter iter;
+	unsigned int done = 0;
+	bool truncated = false;
+
+	if (new_size >= bio->bi_iter.bi_size)
+		return;
+
+	if (bio_op(bio) != REQ_OP_READ)
+		goto exit;
+
+	bio_for_each_segment(bv, bio, iter) {
+		if (done + bv.bv_len > new_size) {
+			unsigned offset;
+
+			if (!truncated)
+				offset = new_size - done;
+			else
+				offset = 0;
+			zero_user(bv.bv_page, offset, bv.bv_len - offset);
+			truncated = true;
+		}
+		done += bv.bv_len;
+	}
+
+ exit:
+	/*
+	 * Don't touch bvec table here and make it really immutable, since
+	 * fs bio user has to retrieve all pages via bio_for_each_segment_all
+	 * in its .end_bio() callback.
+	 *
+	 * It is enough to truncate bio by updating .bi_size since we can make
+	 * correct bvec with the updated .bi_size for drivers.
+	 */
+	bio->bi_iter.bi_size = new_size;
+}
+
+/**
  * bio_put - release a reference to a bio
  * @bio:   bio to release reference to
  *
@@ -751,10 +803,12 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
 	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return false;
 
-	if (bio->bi_vcnt > 0 && !bio_full(bio, len)) {
+	if (bio->bi_vcnt > 0) {
 		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 
 		if (page_is_mergeable(bv, page, len, off, same_page)) {
+			if (bio->bi_iter.bi_size > UINT_MAX - len)
+				return false;
 			bv->bv_len += len;
 			bio->bi_iter.bi_size += len;
 			return true;
diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c
new file mode 100644
index 000000000000..85d5790ac49b
--- /dev/null
+++ b/block/blk-cgroup-rwstat.c
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT.
+ * Do not use in new code.
+ */
+#include "blk-cgroup-rwstat.h"
+
+int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
+{
+	int i, ret;
+
+	for (i = 0; i < BLKG_RWSTAT_NR; i++) {
+		ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
+		if (ret) {
+			while (--i >= 0)
+				percpu_counter_destroy(&rwstat->cpu_cnt[i]);
+			return ret;
+		}
+		atomic64_set(&rwstat->aux_cnt[i], 0);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blkg_rwstat_init);
+
+void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
+{
+	int i;
+
+	for (i = 0; i < BLKG_RWSTAT_NR; i++)
+		percpu_counter_destroy(&rwstat->cpu_cnt[i]);
+}
+EXPORT_SYMBOL_GPL(blkg_rwstat_exit);
+
+/**
+ * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
+ * @sf: seq_file to print to
+ * @pd: policy private data of interest
+ * @rwstat: rwstat to print
+ *
+ * Print @rwstat to @sf for the device assocaited with @pd.
+ */
+u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+			 const struct blkg_rwstat_sample *rwstat)
+{
+	static const char *rwstr[] = {
+		[BLKG_RWSTAT_READ]	= "Read",
+		[BLKG_RWSTAT_WRITE]	= "Write",
+		[BLKG_RWSTAT_SYNC]	= "Sync",
+		[BLKG_RWSTAT_ASYNC]	= "Async",
+		[BLKG_RWSTAT_DISCARD]	= "Discard",
+	};
+	const char *dname = blkg_dev_name(pd->blkg);
+	u64 v;
+	int i;
+
+	if (!dname)
+		return 0;
+
+	for (i = 0; i < BLKG_RWSTAT_NR; i++)
+		seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
+			   rwstat->cnt[i]);
+
+	v = rwstat->cnt[BLKG_RWSTAT_READ] +
+		rwstat->cnt[BLKG_RWSTAT_WRITE] +
+		rwstat->cnt[BLKG_RWSTAT_DISCARD];
+	seq_printf(sf, "%s Total %llu\n", dname, v);
+	return v;
+}
+EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
+
+/**
+ * blkg_prfill_rwstat - prfill callback for blkg_rwstat
+ * @sf: seq_file to print to
+ * @pd: policy private data of interest
+ * @off: offset to the blkg_rwstat in @pd
+ *
+ * prfill callback for printing a blkg_rwstat.
+ */
+u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+		       int off)
+{
+	struct blkg_rwstat_sample rwstat = { };
+
+	blkg_rwstat_read((void *)pd + off, &rwstat);
+	return __blkg_prfill_rwstat(sf, pd, &rwstat);
+}
+EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
+
+/**
+ * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
+ * @blkg: blkg of interest
+ * @pol: blkcg_policy which contains the blkg_rwstat
+ * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
+ * @sum: blkg_rwstat_sample structure containing the results
+ *
+ * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
+ * online descendants and their aux counts.  The caller must be holding the
+ * queue lock for online tests.
+ *
+ * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
+ * is at @off bytes into @blkg's blkg_policy_data of the policy.
+ */
+void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
+		int off, struct blkg_rwstat_sample *sum)
+{
+	struct blkcg_gq *pos_blkg;
+	struct cgroup_subsys_state *pos_css;
+	unsigned int i;
+
+	lockdep_assert_held(&blkg->q->queue_lock);
+
+	rcu_read_lock();
+	blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
+		struct blkg_rwstat *rwstat;
+
+		if (!pos_blkg->online)
+			continue;
+
+		if (pol)
+			rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
+		else
+			rwstat = (void *)pos_blkg + off;
+
+		for (i = 0; i < BLKG_RWSTAT_NR; i++)
+			sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h
new file mode 100644
index 000000000000..ee746919c41f
--- /dev/null
+++ b/block/blk-cgroup-rwstat.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT.
+ * Do not use in new code.
+ */
+#ifndef _BLK_CGROUP_RWSTAT_H
+#define _BLK_CGROUP_RWSTAT_H
+
+#include <linux/blk-cgroup.h>
+
+enum blkg_rwstat_type {
+	BLKG_RWSTAT_READ,
+	BLKG_RWSTAT_WRITE,
+	BLKG_RWSTAT_SYNC,
+	BLKG_RWSTAT_ASYNC,
+	BLKG_RWSTAT_DISCARD,
+
+	BLKG_RWSTAT_NR,
+	BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
+};
+
+/*
+ * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
+ * recursive.  Used to carry stats of dead children.
+ */
+struct blkg_rwstat {
+	struct percpu_counter		cpu_cnt[BLKG_RWSTAT_NR];
+	atomic64_t			aux_cnt[BLKG_RWSTAT_NR];
+};
+
+struct blkg_rwstat_sample {
+	u64				cnt[BLKG_RWSTAT_NR];
+};
+
+static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat,
+		unsigned int idx)
+{
+	return atomic64_read(&rwstat->aux_cnt[idx]) +
+		percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]);
+}
+
+int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp);
+void blkg_rwstat_exit(struct blkg_rwstat *rwstat);
+u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+			 const struct blkg_rwstat_sample *rwstat);
+u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+		       int off);
+void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
+		int off, struct blkg_rwstat_sample *sum);
+
+
+/**
+ * blkg_rwstat_add - add a value to a blkg_rwstat
+ * @rwstat: target blkg_rwstat
+ * @op: REQ_OP and flags
+ * @val: value to add
+ *
+ * Add @val to @rwstat.  The counters are chosen according to @rw.  The
+ * caller is responsible for synchronizing calls to this function.
+ */
+static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
+				   unsigned int op, uint64_t val)
+{
+	struct percpu_counter *cnt;
+
+	if (op_is_discard(op))
+		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
+	else if (op_is_write(op))
+		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
+	else
+		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
+
+	percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
+
+	if (op_is_sync(op))
+		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
+	else
+		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
+
+	percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
+}
+
+/**
+ * blkg_rwstat_read - read the current values of a blkg_rwstat
+ * @rwstat: blkg_rwstat to read
+ *
+ * Read the current snapshot of @rwstat and return it in the aux counts.
+ */
+static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat,
+		struct blkg_rwstat_sample *result)
+{
+	int i;
+
+	for (i = 0; i < BLKG_RWSTAT_NR; i++)
+		result->cnt[i] =
+			percpu_counter_sum_positive(&rwstat->cpu_cnt[i]);
+}
+
+/**
+ * blkg_rwstat_total - read the total count of a blkg_rwstat
+ * @rwstat: blkg_rwstat to read
+ *
+ * Return the total count of @rwstat regardless of the IO direction.  This
+ * function can be called without synchronization and takes care of u64
+ * atomicity.
+ */
+static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
+{
+	struct blkg_rwstat_sample tmp = { };
+
+	blkg_rwstat_read(rwstat, &tmp);
+	return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
+}
+
+/**
+ * blkg_rwstat_reset - reset a blkg_rwstat
+ * @rwstat: blkg_rwstat to reset
+ */
+static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
+{
+	int i;
+
+	for (i = 0; i < BLKG_RWSTAT_NR; i++) {
+		percpu_counter_set(&rwstat->cpu_cnt[i], 0);
+		atomic64_set(&rwstat->aux_cnt[i], 0);
+	}
+}
+
+/**
+ * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
+ * @to: the destination blkg_rwstat
+ * @from: the source
+ *
+ * Add @from's count including the aux one to @to's aux count.
+ */
+static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
+				       struct blkg_rwstat *from)
+{
+	u64 sum[BLKG_RWSTAT_NR];
+	int i;
+
+	for (i = 0; i < BLKG_RWSTAT_NR; i++)
+		sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
+
+	for (i = 0; i < BLKG_RWSTAT_NR; i++)
+		atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
+			     &to->aux_cnt[i]);
+}
+#endif	/* _BLK_CGROUP_RWSTAT_H */
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 1eb8895be4c6..a229b94d5390 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -80,8 +80,7 @@ static void blkg_free(struct blkcg_gq *blkg)
 		if (blkg->pd[i])
 			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
 
-	blkg_rwstat_exit(&blkg->stat_ios);
-	blkg_rwstat_exit(&blkg->stat_bytes);
+	free_percpu(blkg->iostat_cpu);
 	percpu_ref_exit(&blkg->refcnt);
 	kfree(blkg);
 }
@@ -146,7 +145,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
 				   gfp_t gfp_mask)
 {
 	struct blkcg_gq *blkg;
-	int i;
+	int i, cpu;
 
 	/* alloc and init base part */
 	blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
@@ -156,8 +155,8 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
 	if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask))
 		goto err_free;
 
-	if (blkg_rwstat_init(&blkg->stat_bytes, gfp_mask) ||
-	    blkg_rwstat_init(&blkg->stat_ios, gfp_mask))
+	blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask);
+	if (!blkg->iostat_cpu)
 		goto err_free;
 
 	blkg->q = q;
@@ -167,6 +166,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
 	INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
 	blkg->blkcg = blkcg;
 
+	u64_stats_init(&blkg->iostat.sync);
+	for_each_possible_cpu(cpu)
+		u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
+
 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
 		struct blkcg_policy *pol = blkcg_policy[i];
 		struct blkg_policy_data *pd;
@@ -393,7 +396,6 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
 	struct blkcg *blkcg = blkg->blkcg;
-	struct blkcg_gq *parent = blkg->parent;
 	int i;
 
 	lockdep_assert_held(&blkg->q->queue_lock);
@@ -410,11 +412,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 			pol->pd_offline_fn(blkg->pd[i]);
 	}
 
-	if (parent) {
-		blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
-		blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
-	}
-
 	blkg->online = false;
 
 	radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
@@ -464,7 +461,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
 	struct blkcg_gq *blkg;
-	int i;
+	int i, cpu;
 
 	mutex_lock(&blkcg_pol_mutex);
 	spin_lock_irq(&blkcg->lock);
@@ -475,8 +472,12 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
 	 * anyway.  If you get hit by a race, retry.
 	 */
 	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
-		blkg_rwstat_reset(&blkg->stat_bytes);
-		blkg_rwstat_reset(&blkg->stat_ios);
+		for_each_possible_cpu(cpu) {
+			struct blkg_iostat_set *bis =
+				per_cpu_ptr(blkg->iostat_cpu, cpu);
+			memset(bis, 0, sizeof(*bis));
+		}
+		memset(&blkg->iostat, 0, sizeof(blkg->iostat));
 
 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
 			struct blkcg_policy *pol = blkcg_policy[i];
@@ -560,186 +561,6 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
 }
 EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
 
-/**
- * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
- * @sf: seq_file to print to
- * @pd: policy private data of interest
- * @rwstat: rwstat to print
- *
- * Print @rwstat to @sf for the device assocaited with @pd.
- */
-u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
-			 const struct blkg_rwstat_sample *rwstat)
-{
-	static const char *rwstr[] = {
-		[BLKG_RWSTAT_READ]	= "Read",
-		[BLKG_RWSTAT_WRITE]	= "Write",
-		[BLKG_RWSTAT_SYNC]	= "Sync",
-		[BLKG_RWSTAT_ASYNC]	= "Async",
-		[BLKG_RWSTAT_DISCARD]	= "Discard",
-	};
-	const char *dname = blkg_dev_name(pd->blkg);
-	u64 v;
-	int i;
-
-	if (!dname)
-		return 0;
-
-	for (i = 0; i < BLKG_RWSTAT_NR; i++)
-		seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
-			   rwstat->cnt[i]);
-
-	v = rwstat->cnt[BLKG_RWSTAT_READ] +
-		rwstat->cnt[BLKG_RWSTAT_WRITE] +
-		rwstat->cnt[BLKG_RWSTAT_DISCARD];
-	seq_printf(sf, "%s Total %llu\n", dname, v);
-	return v;
-}
-EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
-
-/**
- * blkg_prfill_rwstat - prfill callback for blkg_rwstat
- * @sf: seq_file to print to
- * @pd: policy private data of interest
- * @off: offset to the blkg_rwstat in @pd
- *
- * prfill callback for printing a blkg_rwstat.
- */
-u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
-		       int off)
-{
-	struct blkg_rwstat_sample rwstat = { };
-
-	blkg_rwstat_read((void *)pd + off, &rwstat);
-	return __blkg_prfill_rwstat(sf, pd, &rwstat);
-}
-EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
-
-static u64 blkg_prfill_rwstat_field(struct seq_file *sf,
-				    struct blkg_policy_data *pd, int off)
-{
-	struct blkg_rwstat_sample rwstat = { };
-
-	blkg_rwstat_read((void *)pd->blkg + off, &rwstat);
-	return __blkg_prfill_rwstat(sf, pd, &rwstat);
-}
-
-/**
- * blkg_print_stat_bytes - seq_show callback for blkg->stat_bytes
- * @sf: seq_file to print to
- * @v: unused
- *
- * To be used as cftype->seq_show to print blkg->stat_bytes.
- * cftype->private must be set to the blkcg_policy.
- */
-int blkg_print_stat_bytes(struct seq_file *sf, void *v)
-{
-	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
-			  blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
-			  offsetof(struct blkcg_gq, stat_bytes), true);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(blkg_print_stat_bytes);
-
-/**
- * blkg_print_stat_bytes - seq_show callback for blkg->stat_ios
- * @sf: seq_file to print to
- * @v: unused
- *
- * To be used as cftype->seq_show to print blkg->stat_ios.  cftype->private
- * must be set to the blkcg_policy.
- */
-int blkg_print_stat_ios(struct seq_file *sf, void *v)
-{
-	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
-			  blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
-			  offsetof(struct blkcg_gq, stat_ios), true);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(blkg_print_stat_ios);
-
-static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf,
-					      struct blkg_policy_data *pd,
-					      int off)
-{
-	struct blkg_rwstat_sample rwstat;
-
-	blkg_rwstat_recursive_sum(pd->blkg, NULL, off, &rwstat);
-	return __blkg_prfill_rwstat(sf, pd, &rwstat);
-}
-
-/**
- * blkg_print_stat_bytes_recursive - recursive version of blkg_print_stat_bytes
- * @sf: seq_file to print to
- * @v: unused
- */
-int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v)
-{
-	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
-			  blkg_prfill_rwstat_field_recursive,
-			  (void *)seq_cft(sf)->private,
-			  offsetof(struct blkcg_gq, stat_bytes), true);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(blkg_print_stat_bytes_recursive);
-
-/**
- * blkg_print_stat_ios_recursive - recursive version of blkg_print_stat_ios
- * @sf: seq_file to print to
- * @v: unused
- */
-int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v)
-{
-	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
-			  blkg_prfill_rwstat_field_recursive,
-			  (void *)seq_cft(sf)->private,
-			  offsetof(struct blkcg_gq, stat_ios), true);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive);
-
-/**
- * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
- * @blkg: blkg of interest
- * @pol: blkcg_policy which contains the blkg_rwstat
- * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
- * @sum: blkg_rwstat_sample structure containing the results
- *
- * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
- * online descendants and their aux counts.  The caller must be holding the
- * queue lock for online tests.
- *
- * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
- * is at @off bytes into @blkg's blkg_policy_data of the policy.
- */
-void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
-		int off, struct blkg_rwstat_sample *sum)
-{
-	struct blkcg_gq *pos_blkg;
-	struct cgroup_subsys_state *pos_css;
-	unsigned int i;
-
-	lockdep_assert_held(&blkg->q->queue_lock);
-
-	rcu_read_lock();
-	blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
-		struct blkg_rwstat *rwstat;
-
-		if (!pos_blkg->online)
-			continue;
-
-		if (pol)
-			rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
-		else
-			rwstat = (void *)pos_blkg + off;
-
-		for (i = 0; i < BLKG_RWSTAT_NR; i++)
-			sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i);
-	}
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
-
 /* Performs queue bypass and policy enabled checks then looks up blkg. */
 static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
 					  const struct blkcg_policy *pol,
@@ -923,16 +744,18 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
 	struct blkcg_gq *blkg;
 
+	cgroup_rstat_flush(blkcg->css.cgroup);
 	rcu_read_lock();
 
 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+		struct blkg_iostat_set *bis = &blkg->iostat;
 		const char *dname;
 		char *buf;
-		struct blkg_rwstat_sample rwstat;
 		u64 rbytes, wbytes, rios, wios, dbytes, dios;
 		size_t size = seq_get_buf(sf, &buf), off = 0;
 		int i;
 		bool has_stats = false;
+		unsigned seq;
 
 		spin_lock_irq(&blkg->q->queue_lock);
 
@@ -951,17 +774,16 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 		 */
 		off += scnprintf(buf+off, size-off, "%s ", dname);
 
-		blkg_rwstat_recursive_sum(blkg, NULL,
-				offsetof(struct blkcg_gq, stat_bytes), &rwstat);
-		rbytes = rwstat.cnt[BLKG_RWSTAT_READ];
-		wbytes = rwstat.cnt[BLKG_RWSTAT_WRITE];
-		dbytes = rwstat.cnt[BLKG_RWSTAT_DISCARD];
+		do {
+			seq = u64_stats_fetch_begin(&bis->sync);
 
-		blkg_rwstat_recursive_sum(blkg, NULL,
-					offsetof(struct blkcg_gq, stat_ios), &rwstat);
-		rios = rwstat.cnt[BLKG_RWSTAT_READ];
-		wios = rwstat.cnt[BLKG_RWSTAT_WRITE];
-		dios = rwstat.cnt[BLKG_RWSTAT_DISCARD];
+			rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
+			wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
+			dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
+			rios = bis->cur.ios[BLKG_IOSTAT_READ];
+			wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
+			dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
+		} while (u64_stats_fetch_retry(&bis->sync, seq));
 
 		if (rbytes || wbytes || rios || wios) {
 			has_stats = true;
@@ -1240,26 +1062,6 @@ err_unlock:
 }
 
 /**
- * blkcg_drain_queue - drain blkcg part of request_queue
- * @q: request_queue to drain
- *
- * Called from blk_drain_queue().  Responsible for draining blkcg part.
- */
-void blkcg_drain_queue(struct request_queue *q)
-{
-	lockdep_assert_held(&q->queue_lock);
-
-	/*
-	 * @q could be exiting and already have destroyed all blkgs as
-	 * indicated by NULL root_blkg.  If so, don't confuse policies.
-	 */
-	if (!q->root_blkg)
-		return;
-
-	blk_throtl_drain(q);
-}
-
-/**
  * blkcg_exit_queue - exit and release blkcg part of request_queue
  * @q: request_queue being released
  *
@@ -1297,6 +1099,77 @@ static int blkcg_can_attach(struct cgroup_taskset *tset)
 	return ret;
 }
 
+static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+	int i;
+
+	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+		dst->bytes[i] = src->bytes[i];
+		dst->ios[i] = src->ios[i];
+	}
+}
+
+static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+	int i;
+
+	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+		dst->bytes[i] += src->bytes[i];
+		dst->ios[i] += src->ios[i];
+	}
+}
+
+static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+	int i;
+
+	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+		dst->bytes[i] -= src->bytes[i];
+		dst->ios[i] -= src->ios[i];
+	}
+}
+
+static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+{
+	struct blkcg *blkcg = css_to_blkcg(css);
+	struct blkcg_gq *blkg;
+
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+		struct blkcg_gq *parent = blkg->parent;
+		struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
+		struct blkg_iostat cur, delta;
+		unsigned seq;
+
+		/* fetch the current per-cpu values */
+		do {
+			seq = u64_stats_fetch_begin(&bisc->sync);
+			blkg_iostat_set(&cur, &bisc->cur);
+		} while (u64_stats_fetch_retry(&bisc->sync, seq));
+
+		/* propagate percpu delta to global */
+		u64_stats_update_begin(&blkg->iostat.sync);
+		blkg_iostat_set(&delta, &cur);
+		blkg_iostat_sub(&delta, &bisc->last);
+		blkg_iostat_add(&blkg->iostat.cur, &delta);
+		blkg_iostat_add(&bisc->last, &delta);
+		u64_stats_update_end(&blkg->iostat.sync);
+
+		/* propagate global delta to parent */
+		if (parent) {
+			u64_stats_update_begin(&parent->iostat.sync);
+			blkg_iostat_set(&delta, &blkg->iostat.cur);
+			blkg_iostat_sub(&delta, &blkg->iostat.last);
+			blkg_iostat_add(&parent->iostat.cur, &delta);
+			blkg_iostat_add(&blkg->iostat.last, &delta);
+			u64_stats_update_end(&parent->iostat.sync);
+		}
+	}
+
+	rcu_read_unlock();
+}
+
 static void blkcg_bind(struct cgroup_subsys_state *root_css)
 {
 	int i;
@@ -1329,6 +1202,7 @@ struct cgroup_subsys io_cgrp_subsys = {
 	.css_offline = blkcg_css_offline,
 	.css_free = blkcg_css_free,
 	.can_attach = blkcg_can_attach,
+	.css_rstat_flush = blkcg_rstat_flush,
 	.bind = blkcg_bind,
 	.dfl_cftypes = blkcg_files,
 	.legacy_cftypes = blkcg_legacy_files,
diff --git a/block/blk-core.c b/block/blk-core.c
index d5e668ec751b..089e890ab208 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -132,6 +132,9 @@ static const char *const blk_op_name[] = {
 	REQ_OP_NAME(SECURE_ERASE),
 	REQ_OP_NAME(ZONE_RESET),
 	REQ_OP_NAME(ZONE_RESET_ALL),
+	REQ_OP_NAME(ZONE_OPEN),
+	REQ_OP_NAME(ZONE_CLOSE),
+	REQ_OP_NAME(ZONE_FINISH),
 	REQ_OP_NAME(WRITE_SAME),
 	REQ_OP_NAME(WRITE_ZEROES),
 	REQ_OP_NAME(SCSI_IN),
@@ -336,14 +339,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying);
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
+	WARN_ON_ONCE(blk_queue_registered(q));
+
 	/* mark @q DYING, no new request or merges will be allowed afterwards */
-	mutex_lock(&q->sysfs_lock);
 	blk_set_queue_dying(q);
 
 	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
 	blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
 	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
-	mutex_unlock(&q->sysfs_lock);
 
 	/*
 	 * Drain all requests queued before DYING marking. Set DEAD flag to
@@ -848,11 +851,7 @@ static inline int blk_partition_remap(struct bio *bio)
 	if (unlikely(bio_check_ro(bio, p)))
 		goto out;
 
-	/*
-	 * Zone reset does not include bi_size so bio_sectors() is always 0.
-	 * Include a test for the reset op code and perform the remap if needed.
-	 */
-	if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
+	if (bio_sectors(bio)) {
 		if (bio_check_eod(bio, part_nr_sects_read(p)))
 			goto out;
 		bio->bi_iter.bi_sector += p->start_sect;
@@ -886,11 +885,14 @@ generic_make_request_checks(struct bio *bio)
 	}
 
 	/*
-	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
-	 * if queue is not a request based queue.
+	 * Non-mq queues do not honor REQ_NOWAIT, so complete a bio
+	 * with BLK_STS_AGAIN status in order to catch -EAGAIN and
+	 * to give a chance to the caller to repeat request gracefully.
 	 */
-	if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q))
-		goto not_supported;
+	if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) {
+		status = BLK_STS_AGAIN;
+		goto end_io;
+	}
 
 	if (should_fail_bio(bio))
 		goto end_io;
@@ -936,6 +938,9 @@ generic_make_request_checks(struct bio *bio)
 			goto not_supported;
 		break;
 	case REQ_OP_ZONE_RESET:
+	case REQ_OP_ZONE_OPEN:
+	case REQ_OP_ZONE_CLOSE:
+	case REQ_OP_ZONE_FINISH:
 		if (!blk_queue_is_zoned(q))
 			goto not_supported;
 		break;
@@ -1308,7 +1313,7 @@ EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
 
 void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
-	if (blk_do_io_stat(req)) {
+	if (req->part && blk_do_io_stat(req)) {
 		const int sgrp = op_stat_group(req_op(req));
 		struct hd_struct *part;
 
@@ -1326,7 +1331,8 @@ void blk_account_io_done(struct request *req, u64 now)
 	 * normal IO on queueing nor completion.  Accounting the
 	 * containing request is enough.
 	 */
-	if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
+	if (req->part && blk_do_io_stat(req) &&
+	    !(req->rq_flags & RQF_FLUSH_SEQ)) {
 		const int sgrp = op_stat_group(req_op(req));
 		struct hd_struct *part;
 
@@ -1790,9 +1796,9 @@ int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
 	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
-			FIELD_SIZEOF(struct request, cmd_flags));
+			sizeof_field(struct request, cmd_flags));
 	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
-			FIELD_SIZEOF(struct bio, bi_opf));
+			sizeof_field(struct bio, bi_opf));
 
 	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
 	kblockd_workqueue = alloc_workqueue("kblockd",
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 1db44ca0f4a6..e20a852ae432 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -55,6 +55,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	rq->rq_disk = bd_disk;
 	rq->end_io = done;
 
+	blk_account_io_start(rq, true);
+
 	/*
 	 * don't check dying flag for MQ because the request won't
 	 * be reused after dying flag is set
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 1eec9cbe5a0a..3f977c517960 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -69,6 +69,7 @@
 #include <linux/blkdev.h>
 #include <linux/gfp.h>
 #include <linux/blk-mq.h>
+#include <linux/lockdep.h>
 
 #include "blk.h"
 #include "blk-mq.h"
@@ -136,6 +137,17 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front)
 	blk_mq_add_to_requeue_list(rq, add_front, true);
 }
 
+static void blk_account_io_flush(struct request *rq)
+{
+	struct hd_struct *part = &rq->rq_disk->part0;
+
+	part_stat_lock();
+	part_stat_inc(part, ios[STAT_FLUSH]);
+	part_stat_add(part, nsecs[STAT_FLUSH],
+		      ktime_get_ns() - rq->start_time_ns);
+	part_stat_unlock();
+}
+
 /**
  * blk_flush_complete_seq - complete flush sequence
  * @rq: PREFLUSH/FUA request being sequenced
@@ -185,7 +197,7 @@ static void blk_flush_complete_seq(struct request *rq,
 
 	case REQ_FSEQ_DONE:
 		/*
-		 * @rq was previously adjusted by blk_flush_issue() for
+		 * @rq was previously adjusted by blk_insert_flush() for
 		 * flush sequencing and may already have gone through the
 		 * flush data request completion path.  Restore @rq for
 		 * normal completion and end it.
@@ -212,6 +224,8 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
 	struct blk_mq_hw_ctx *hctx;
 
+	blk_account_io_flush(flush_rq);
+
 	/* release the tag's ownership to the req cloned from */
 	spin_lock_irqsave(&fq->mq_flush_lock, flags);
 
@@ -492,6 +506,9 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
 	INIT_LIST_HEAD(&fq->flush_queue[1]);
 	INIT_LIST_HEAD(&fq->flush_data_in_flight);
 
+	lockdep_register_key(&fq->key);
+	lockdep_set_class(&fq->mq_flush_lock, &fq->key);
+
 	return fq;
 
  fail_rq:
@@ -506,6 +523,7 @@ void blk_free_flush_queue(struct blk_flush_queue *fq)
 	if (!fq)
 		return;
 
+	lockdep_unregister_key(&fq->key);
 	kfree(fq->flush_rq);
 	kfree(fq);
 }
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index e01267f99183..27ca68621137 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1212,7 +1212,7 @@ static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-static void iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
+static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
 {
 	struct ioc *ioc = iocg->ioc;
 	struct blkcg_gq *blkg = iocg_to_blkg(iocg);
@@ -1229,11 +1229,11 @@ static void iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
 	/* clear or maintain depending on the overage */
 	if (time_before_eq64(vtime, now->vnow)) {
 		blkcg_clear_delay(blkg);
-		return;
+		return false;
 	}
 	if (!atomic_read(&blkg->use_delay) &&
 	    time_before_eq64(vtime, now->vnow + vmargin))
-		return;
+		return false;
 
 	/* use delay */
 	if (cost) {
@@ -1250,10 +1250,11 @@ static void iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
 	oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer));
 	if (hrtimer_is_queued(&iocg->delay_timer) &&
 	    abs(oexpires - expires) <= margin_ns / 4)
-		return;
+		return true;
 
 	hrtimer_start_range_ns(&iocg->delay_timer, ns_to_ktime(expires),
 			       margin_ns / 4, HRTIMER_MODE_ABS);
+	return true;
 }
 
 static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
@@ -1739,7 +1740,9 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 	 */
 	if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
 		atomic64_add(abs_cost, &iocg->abs_vdebt);
-		iocg_kick_delay(iocg, &now, cost);
+		if (iocg_kick_delay(iocg, &now, cost))
+			blkcg_schedule_throttle(rqos->q,
+					(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
 		return;
 	}
 
diff --git a/block/blk-map.c b/block/blk-map.c
index 3a62e471d81b..b0790268ed9d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -151,7 +151,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 	return 0;
 
 unmap_rq:
-	__blk_rq_unmap_user(bio);
+	blk_rq_unmap_user(bio);
 fail:
 	rq->bio = NULL;
 	return ret;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 48e6725b32ee..1534ed736363 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -157,17 +157,20 @@ static inline unsigned get_max_io_size(struct request_queue *q,
 	return sectors & (lbs - 1);
 }
 
-static unsigned get_max_segment_size(const struct request_queue *q,
-				     unsigned offset)
+static inline unsigned get_max_segment_size(const struct request_queue *q,
+					    struct page *start_page,
+					    unsigned long offset)
 {
 	unsigned long mask = queue_segment_boundary(q);
 
-	/* default segment boundary mask means no boundary limit */
-	if (mask == BLK_SEG_BOUNDARY_MASK)
-		return queue_max_segment_size(q);
+	offset = mask & (page_to_phys(start_page) + offset);
 
-	return min_t(unsigned long, mask - (mask & offset) + 1,
-		     queue_max_segment_size(q));
+	/*
+	 * overflow may be triggered in case of zero page physical address
+	 * on 32bit arch, use queue's max segment size when that happens.
+	 */
+	return min_not_zero(mask - offset + 1,
+			(unsigned long)queue_max_segment_size(q));
 }
 
 /**
@@ -201,7 +204,8 @@ static bool bvec_split_segs(const struct request_queue *q,
 	unsigned seg_size = 0;
 
 	while (len && *nsegs < max_segs) {
-		seg_size = get_max_segment_size(q, bv->bv_offset + total_len);
+		seg_size = get_max_segment_size(q, bv->bv_page,
+						bv->bv_offset + total_len);
 		seg_size = min(seg_size, len);
 
 		(*nsegs)++;
@@ -293,7 +297,7 @@ split:
 void __blk_queue_split(struct request_queue *q, struct bio **bio,
 		unsigned int *nr_segs)
 {
-	struct bio *split;
+	struct bio *split = NULL;
 
 	switch (bio_op(*bio)) {
 	case REQ_OP_DISCARD:
@@ -309,6 +313,21 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
 				nr_segs);
 		break;
 	default:
+		/*
+		 * All drivers must accept single-segments bios that are <=
+		 * PAGE_SIZE.  This is a quick and dirty check that relies on
+		 * the fact that bi_io_vec[0] is always valid if a bio has data.
+		 * The check might lead to occasional false negatives when bios
+		 * are cloned, but compared to the performance impact of cloned
+		 * bios themselves the loop below doesn't matter anyway.
+		 */
+		if (!q->limits.chunk_sectors &&
+		    (*bio)->bi_vcnt == 1 &&
+		    ((*bio)->bi_io_vec[0].bv_len +
+		     (*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) {
+			*nr_segs = 1;
+			break;
+		}
 		split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
 		break;
 	}
@@ -404,7 +423,8 @@ static unsigned blk_bvec_map_sg(struct request_queue *q,
 
 	while (nbytes > 0) {
 		unsigned offset = bvec->bv_offset + total;
-		unsigned len = min(get_max_segment_size(q, offset), nbytes);
+		unsigned len = min(get_max_segment_size(q, bvec->bv_page,
+					offset), nbytes);
 		struct page *page = bvec->bv_page;
 
 		/*
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index a0d3ce30fa08..062229395a50 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -74,10 +74,8 @@ static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
 	if (!entry->show)
 		return -EIO;
 
-	res = -ENOENT;
 	mutex_lock(&q->sysfs_lock);
-	if (!blk_queue_dying(q))
-		res = entry->show(ctx, page);
+	res = entry->show(ctx, page);
 	mutex_unlock(&q->sysfs_lock);
 	return res;
 }
@@ -97,10 +95,8 @@ static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
 	if (!entry->store)
 		return -EIO;
 
-	res = -ENOENT;
 	mutex_lock(&q->sysfs_lock);
-	if (!blk_queue_dying(q))
-		res = entry->store(ctx, page, length);
+	res = entry->store(ctx, page, length);
 	mutex_unlock(&q->sysfs_lock);
 	return res;
 }
@@ -120,10 +116,8 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
 	if (!entry->show)
 		return -EIO;
 
-	res = -ENOENT;
 	mutex_lock(&q->sysfs_lock);
-	if (!blk_queue_dying(q))
-		res = entry->show(hctx, page);
+	res = entry->show(hctx, page);
 	mutex_unlock(&q->sysfs_lock);
 	return res;
 }
@@ -144,10 +138,8 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
 	if (!entry->store)
 		return -EIO;
 
-	res = -ENOENT;
 	mutex_lock(&q->sysfs_lock);
-	if (!blk_queue_dying(q))
-		res = entry->store(hctx, page, length);
+	res = entry->store(hctx, page, length);
 	mutex_unlock(&q->sysfs_lock);
 	return res;
 }
@@ -166,20 +158,25 @@ static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
 
 static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
 {
+	const size_t size = PAGE_SIZE - 1;
 	unsigned int i, first = 1;
-	ssize_t ret = 0;
+	int ret = 0, pos = 0;
 
 	for_each_cpu(i, hctx->cpumask) {
 		if (first)
-			ret += sprintf(ret + page, "%u", i);
+			ret = snprintf(pos + page, size - pos, "%u", i);
 		else
-			ret += sprintf(ret + page, ", %u", i);
+			ret = snprintf(pos + page, size - pos, ", %u", i);
+
+		if (ret >= size - pos)
+			break;
 
 		first = 0;
+		pos += ret;
 	}
 
-	ret += sprintf(ret + page, "\n");
-	return ret;
+	ret = snprintf(pos + page, size + 1 - pos, "\n");
+	return pos + ret;
 }
 
 static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 008388e82b5c..fbacde454718 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -15,14 +15,6 @@
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
 
-bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
-{
-	if (!tags)
-		return true;
-
-	return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
-}
-
 /*
  * If a previously inactive queue goes active, bump the active user count.
  * We need to do this before try to allocate driver tag, then even if fail
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 61deab0b5a5a..15bc74acb57e 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -28,7 +28,6 @@ extern void blk_mq_free_tags(struct blk_mq_tags *tags);
 extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
 extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
 			   struct blk_mq_ctx *ctx, unsigned int tag);
-extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
 extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 					struct blk_mq_tags **tags,
 					unsigned int depth, bool can_grow);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ec791156e9cc..a12b1763508d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -93,7 +93,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
 
 struct mq_inflight {
 	struct hd_struct *part;
-	unsigned int *inflight;
+	unsigned int inflight[2];
 };
 
 static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
@@ -102,45 +102,29 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
 {
 	struct mq_inflight *mi = priv;
 
-	/*
-	 * index[0] counts the specific partition that was asked for.
-	 */
 	if (rq->part == mi->part)
-		mi->inflight[0]++;
+		mi->inflight[rq_data_dir(rq)]++;
 
 	return true;
 }
 
 unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part)
 {
-	unsigned inflight[2];
-	struct mq_inflight mi = { .part = part, .inflight = inflight, };
+	struct mq_inflight mi = { .part = part };
 
-	inflight[0] = inflight[1] = 0;
 	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
 
-	return inflight[0];
-}
-
-static bool blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
-				     struct request *rq, void *priv,
-				     bool reserved)
-{
-	struct mq_inflight *mi = priv;
-
-	if (rq->part == mi->part)
-		mi->inflight[rq_data_dir(rq)]++;
-
-	return true;
+	return mi.inflight[0] + mi.inflight[1];
 }
 
 void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
 			 unsigned int inflight[2])
 {
-	struct mq_inflight mi = { .part = part, .inflight = inflight, };
+	struct mq_inflight mi = { .part = part };
 
-	inflight[0] = inflight[1] = 0;
-	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
+	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
+	inflight[0] = mi.inflight[0];
+	inflight[1] = mi.inflight[1];
 }
 
 void blk_freeze_queue_start(struct request_queue *q)
@@ -276,12 +260,6 @@ void blk_mq_wake_waiters(struct request_queue *q)
 			blk_mq_tag_wakeup_all(hctx->tags, true);
 }
 
-bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
-{
-	return blk_mq_has_free_tags(hctx->tags);
-}
-EXPORT_SYMBOL(blk_mq_can_queue);
-
 /*
  * Only need start/end time stamping if we have iostat or
  * blk stats enabled, or using an IO scheduler.
@@ -663,18 +641,14 @@ bool blk_mq_complete_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
-int blk_mq_request_started(struct request *rq)
-{
-	return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
-}
-EXPORT_SYMBOL_GPL(blk_mq_request_started);
-
-int blk_mq_request_completed(struct request *rq)
-{
-	return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
-}
-EXPORT_SYMBOL_GPL(blk_mq_request_completed);
-
+/**
+ * blk_mq_start_request - Start processing a request
+ * @rq: Pointer to request to be started
+ *
+ * Function used by device drivers to notify the block layer that a request
+ * is going to be processed now, so blk layer can do proper initializations
+ * such as starting the timeout timer.
+ */
 void blk_mq_start_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
@@ -1064,7 +1038,7 @@ bool blk_mq_get_driver_tag(struct request *rq)
 	bool shared;
 
 	if (rq->tag != -1)
-		goto done;
+		return true;
 
 	if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
 		data.flags |= BLK_MQ_REQ_RESERVED;
@@ -1079,7 +1053,6 @@ bool blk_mq_get_driver_tag(struct request *rq)
 		data.hctx->tags->rqs[rq->tag] = rq;
 	}
 
-done:
 	return rq->tag != -1;
 }
 
@@ -1362,6 +1335,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 	return (queued + errors) != 0;
 }
 
+/**
+ * __blk_mq_run_hw_queue - Run a hardware queue.
+ * @hctx: Pointer to the hardware queue to run.
+ *
+ * Send pending requests to the hardware.
+ */
 static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
 	int srcu_idx;
@@ -1459,6 +1438,15 @@ select_cpu:
 	return next_cpu;
 }
 
+/**
+ * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue.
+ * @hctx: Pointer to the hardware queue to run.
+ * @async: If we want to run the queue asynchronously.
+ * @msecs: Microseconds of delay to wait before running the queue.
+ *
+ * If !@async, try to run the queue now. Else, run the queue asynchronously and
+ * with a delay of @msecs.
+ */
 static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
 					unsigned long msecs)
 {
@@ -1480,13 +1468,29 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
 				    msecs_to_jiffies(msecs));
 }
 
+/**
+ * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
+ * @hctx: Pointer to the hardware queue to run.
+ * @msecs: Microseconds of delay to wait before running the queue.
+ *
+ * Run a hardware queue asynchronously with a delay of @msecs.
+ */
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 {
 	__blk_mq_delay_run_hw_queue(hctx, true, msecs);
 }
 EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
 
-bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
+/**
+ * blk_mq_run_hw_queue - Start to run a hardware queue.
+ * @hctx: Pointer to the hardware queue to run.
+ * @async: If we want to run the queue asynchronously.
+ *
+ * Check if the request queue is not in a quiesced state and if there are
+ * pending requests to be sent. If this is true, run the queue to send requests
+ * to hardware.
+ */
+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 {
 	int srcu_idx;
 	bool need_run;
@@ -1504,15 +1508,16 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 		blk_mq_hctx_has_pending(hctx);
 	hctx_unlock(hctx, srcu_idx);
 
-	if (need_run) {
+	if (need_run)
 		__blk_mq_delay_run_hw_queue(hctx, async, 0);
-		return true;
-	}
-
-	return false;
 }
 EXPORT_SYMBOL(blk_mq_run_hw_queue);
 
+/**
+ * blk_mq_run_hw_queue - Run all hardware queues in a request queue.
+ * @q: Pointer to the request queue to run.
+ * @async: If we want to run the queue asynchronously.
+ */
 void blk_mq_run_hw_queues(struct request_queue *q, bool async)
 {
 	struct blk_mq_hw_ctx *hctx;
@@ -1664,7 +1669,11 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	blk_mq_hctx_mark_pending(hctx, ctx);
 }
 
-/*
+/**
+ * blk_mq_request_bypass_insert - Insert a request at dispatch list.
+ * @rq: Pointer to request to be inserted.
+ * @run_queue: If we should run the hardware queue after inserting the request.
+ *
  * Should only be used carefully, when the caller knows we want to
  * bypass a potential IO scheduler on the target device.
  */
@@ -1707,28 +1716,20 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
 	struct request *rqa = container_of(a, struct request, queuelist);
 	struct request *rqb = container_of(b, struct request, queuelist);
 
-	if (rqa->mq_ctx < rqb->mq_ctx)
-		return -1;
-	else if (rqa->mq_ctx > rqb->mq_ctx)
-		return 1;
-	else if (rqa->mq_hctx < rqb->mq_hctx)
-		return -1;
-	else if (rqa->mq_hctx > rqb->mq_hctx)
-		return 1;
+	if (rqa->mq_ctx != rqb->mq_ctx)
+		return rqa->mq_ctx > rqb->mq_ctx;
+	if (rqa->mq_hctx != rqb->mq_hctx)
+		return rqa->mq_hctx > rqb->mq_hctx;
 
 	return blk_rq_pos(rqa) > blk_rq_pos(rqb);
 }
 
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
-	struct blk_mq_hw_ctx *this_hctx;
-	struct blk_mq_ctx *this_ctx;
-	struct request_queue *this_q;
-	struct request *rq;
 	LIST_HEAD(list);
-	LIST_HEAD(rq_list);
-	unsigned int depth;
 
+	if (list_empty(&plug->mq_list))
+		return;
 	list_splice_init(&plug->mq_list, &list);
 
 	if (plug->rq_count > 2 && plug->multiple_queues)
@@ -1736,42 +1737,27 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
 	plug->rq_count = 0;
 
-	this_q = NULL;
-	this_hctx = NULL;
-	this_ctx = NULL;
-	depth = 0;
-
-	while (!list_empty(&list)) {
-		rq = list_entry_rq(list.next);
-		list_del_init(&rq->queuelist);
-		BUG_ON(!rq->q);
-		if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx) {
-			if (this_hctx) {
-				trace_block_unplug(this_q, depth, !from_schedule);
-				blk_mq_sched_insert_requests(this_hctx, this_ctx,
-								&rq_list,
-								from_schedule);
-			}
-
-			this_q = rq->q;
-			this_ctx = rq->mq_ctx;
-			this_hctx = rq->mq_hctx;
-			depth = 0;
+	do {
+		struct list_head rq_list;
+		struct request *rq, *head_rq = list_entry_rq(list.next);
+		struct list_head *pos = &head_rq->queuelist; /* skip first */
+		struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx;
+		struct blk_mq_ctx *this_ctx = head_rq->mq_ctx;
+		unsigned int depth = 1;
+
+		list_for_each_continue(pos, &list) {
+			rq = list_entry_rq(pos);
+			BUG_ON(!rq->q);
+			if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx)
+				break;
+			depth++;
 		}
 
-		depth++;
-		list_add_tail(&rq->queuelist, &rq_list);
-	}
-
-	/*
-	 * If 'this_hctx' is set, we know we have entries to complete
-	 * on 'rq_list'. Do those.
-	 */
-	if (this_hctx) {
-		trace_block_unplug(this_q, depth, !from_schedule);
+		list_cut_before(&rq_list, &list, pos);
+		trace_block_unplug(head_rq->q, depth, !from_schedule);
 		blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
 						from_schedule);
-	}
+	} while(!list_empty(&list));
 }
 
 static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
@@ -1867,6 +1853,17 @@ insert:
 	return BLK_STS_OK;
 }
 
+/**
+ * blk_mq_try_issue_directly - Try to send a request directly to device driver.
+ * @hctx: Pointer of the associated hardware queue.
+ * @rq: Pointer to request to be sent.
+ * @cookie: Request queue cookie.
+ *
+ * If the device has enough resources to accept a new request now, send the
+ * request directly to device driver. Else, insert at hctx->dispatch queue, so
+ * we can try send it another time in the future. Requests inserted at this
+ * queue have higher priority.
+ */
 static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 		struct request *rq, blk_qc_t *cookie)
 {
@@ -1944,6 +1941,22 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
 	}
 }
 
+/**
+ * blk_mq_make_request - Create and send a request to block device.
+ * @q: Request queue pointer.
+ * @bio: Bio pointer.
+ *
+ * Builds up a request structure from @q and @bio and send to the device. The
+ * request may not be queued directly to hardware if:
+ * * This request can be merged with another one
+ * * We want to place request at plug queue for possible future merging
+ * * There is an IO scheduler active at this queue
+ *
+ * It will not queue the request if there is an error with the bio, or at the
+ * request creation.
+ *
+ * Returns: Request queue cookie.
+ */
 static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = op_is_sync(bio->bi_opf);
@@ -1989,7 +2002,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	plug = blk_mq_plug(q, bio);
 	if (unlikely(is_flush_fua)) {
-		/* bypass scheduler for flush rq */
+		/* Bypass scheduler for flush requests */
 		blk_insert_flush(rq);
 		blk_mq_run_hw_queue(data.hctx, true);
 	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
@@ -2017,6 +2030,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 		blk_add_rq_to_plug(plug, rq);
 	} else if (q->elevator) {
+		/* Insert the request at the IO scheduler queue */
 		blk_mq_sched_insert_request(rq, false, true, true);
 	} else if (plug && !blk_queue_nomerges(q)) {
 		/*
@@ -2043,8 +2057,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		}
 	} else if ((q->nr_hw_queues > 1 && is_sync) ||
 			!data.hctx->dispatch_busy) {
+		/*
+		 * There is no scheduler and we can try to send directly
+		 * to the hardware.
+		 */
 		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
 	} else {
+		/* Default case. */
 		blk_mq_sched_insert_request(rq, false, true, true);
 	}
 
@@ -2789,6 +2808,23 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 	int i, j, end;
 	struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
 
+	if (q->nr_hw_queues < set->nr_hw_queues) {
+		struct blk_mq_hw_ctx **new_hctxs;
+
+		new_hctxs = kcalloc_node(set->nr_hw_queues,
+				       sizeof(*new_hctxs), GFP_KERNEL,
+				       set->numa_node);
+		if (!new_hctxs)
+			return;
+		if (hctxs)
+			memcpy(new_hctxs, hctxs, q->nr_hw_queues *
+			       sizeof(*hctxs));
+		q->queue_hw_ctx = new_hctxs;
+		q->nr_hw_queues = set->nr_hw_queues;
+		kfree(hctxs);
+		hctxs = new_hctxs;
+	}
+
 	/* protect against switching io scheduler  */
 	mutex_lock(&q->sysfs_lock);
 	for (i = 0; i < set->nr_hw_queues; i++) {
@@ -2844,19 +2880,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 	mutex_unlock(&q->sysfs_lock);
 }
 
-/*
- * Maximum number of hardware queues we support. For single sets, we'll never
- * have more than the CPUs (software queues). For multiple sets, the tag_set
- * user may have set ->nr_hw_queues larger.
- */
-static unsigned int nr_hw_queues(struct blk_mq_tag_set *set)
-{
-	if (set->nr_maps == 1)
-		return nr_cpu_ids;
-
-	return max(set->nr_hw_queues, nr_cpu_ids);
-}
-
 struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 						  struct request_queue *q,
 						  bool elevator_init)
@@ -2876,12 +2899,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	/* init q->mq_kobj and sw queues' kobjects */
 	blk_mq_sysfs_init(q);
 
-	q->nr_queues = nr_hw_queues(set);
-	q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)),
-						GFP_KERNEL, set->numa_node);
-	if (!q->queue_hw_ctx)
-		goto err_sys_init;
-
 	INIT_LIST_HEAD(&q->unused_hctx_list);
 	spin_lock_init(&q->unused_hctx_lock);
 
@@ -2929,7 +2946,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 err_hctxs:
 	kfree(q->queue_hw_ctx);
 	q->nr_hw_queues = 0;
-err_sys_init:
 	blk_mq_sysfs_deinit(q);
 err_poll:
 	blk_stat_free_callback(q->poll_cb);
@@ -3030,6 +3046,29 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
 	}
 }
 
+static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
+				  int cur_nr_hw_queues, int new_nr_hw_queues)
+{
+	struct blk_mq_tags **new_tags;
+
+	if (cur_nr_hw_queues >= new_nr_hw_queues)
+		return 0;
+
+	new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
+				GFP_KERNEL, set->numa_node);
+	if (!new_tags)
+		return -ENOMEM;
+
+	if (set->tags)
+		memcpy(new_tags, set->tags, cur_nr_hw_queues *
+		       sizeof(*set->tags));
+	kfree(set->tags);
+	set->tags = new_tags;
+	set->nr_hw_queues = new_nr_hw_queues;
+
+	return 0;
+}
+
 /*
  * Alloc a tag set to be associated with one or more request queues.
  * May fail with EINVAL for various error conditions. May adjust the
@@ -3083,9 +3122,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
 		set->nr_hw_queues = nr_cpu_ids;
 
-	set->tags = kcalloc_node(nr_hw_queues(set), sizeof(struct blk_mq_tags *),
-				 GFP_KERNEL, set->numa_node);
-	if (!set->tags)
+	if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0)
 		return -ENOMEM;
 
 	ret = -ENOMEM;
@@ -3126,7 +3163,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 {
 	int i, j;
 
-	for (i = 0; i < nr_hw_queues(set); i++)
+	for (i = 0; i < set->nr_hw_queues; i++)
 		blk_mq_free_map_and_requests(set, i);
 
 	for (j = 0; j < set->nr_maps; j++) {
@@ -3271,10 +3308,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
 		blk_mq_freeze_queue(q);
 	/*
-	 * Sync with blk_mq_queue_tag_busy_iter.
-	 */
-	synchronize_rcu();
-	/*
 	 * Switch IO scheduler to 'none', cleaning up the data associated
 	 * with the previous scheduler. We will switch back once we are done
 	 * updating the new sw to hw queue mappings.
@@ -3288,6 +3321,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		blk_mq_sysfs_unregister(q);
 	}
 
+	if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
+	    0)
+		goto reregister;
+
 	prev_nr_hw_queues = set->nr_hw_queues;
 	set->nr_hw_queues = nr_hw_queues;
 	blk_mq_update_queue_map(set);
@@ -3304,6 +3341,7 @@ fallback:
 		blk_mq_map_swqueue(q);
 	}
 
+reregister:
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_sysfs_register(q);
 		blk_mq_debugfs_register_hctxs(q);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 32c62c64e6c2..eaaca8fc1c28 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,15 +128,6 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
 
 void blk_mq_release(struct request_queue *q);
 
-/**
- * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
- * @rq: target request.
- */
-static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
-{
-	return READ_ONCE(rq->state);
-}
-
 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
 					   unsigned int cpu)
 {
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 5f6dcc7a47bd..c8eda2e7b91e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -328,7 +328,7 @@ EXPORT_SYMBOL(blk_queue_max_segment_size);
  *   storage device can address.  The default of 512 covers most
  *   hardware.
  **/
-void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
+void blk_queue_logical_block_size(struct request_queue *q, unsigned int size)
 {
 	q->limits.logical_block_size = size;
 
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 457d9ba3eb20..6e7ec87d49fa 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -42,17 +42,13 @@ static __latent_entropy void blk_done_softirq(struct softirq_action *h)
 static void trigger_softirq(void *data)
 {
 	struct request *rq = data;
-	unsigned long flags;
 	struct list_head *list;
 
-	local_irq_save(flags);
 	list = this_cpu_ptr(&blk_cpu_done);
 	list_add_tail(&rq->ipi_list, list);
 
 	if (list->next == &rq->ipi_list)
 		raise_softirq_irqoff(BLOCK_SOFTIRQ);
-
-	local_irq_restore(flags);
 }
 
 /*
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 940f15d600f8..7da302ff88d0 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -53,7 +53,7 @@ void blk_stat_add(struct request *rq, u64 now)
 	struct request_queue *q = rq->q;
 	struct blk_stat_callback *cb;
 	struct blk_rq_stat *stat;
-	int bucket;
+	int bucket, cpu;
 	u64 value;
 
 	value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
@@ -61,6 +61,7 @@ void blk_stat_add(struct request *rq, u64 now)
 	blk_throtl_stat_add(rq, value);
 
 	rcu_read_lock();
+	cpu = get_cpu();
 	list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
 		if (!blk_stat_is_active(cb))
 			continue;
@@ -69,10 +70,10 @@ void blk_stat_add(struct request *rq, u64 now)
 		if (bucket < 0)
 			continue;
 
-		stat = &get_cpu_ptr(cb->cpu_stat)[bucket];
+		stat = &per_cpu_ptr(cb->cpu_stat, cpu)[bucket];
 		blk_rq_stat_add(stat, value);
-		put_cpu_ptr(cb->cpu_stat);
 	}
+	put_cpu();
 	rcu_read_unlock();
 }
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 46f5198be017..fca9b158f4a0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -801,10 +801,6 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 	if (!entry->show)
 		return -EIO;
 	mutex_lock(&q->sysfs_lock);
-	if (blk_queue_dying(q)) {
-		mutex_unlock(&q->sysfs_lock);
-		return -ENOENT;
-	}
 	res = entry->show(q, page);
 	mutex_unlock(&q->sysfs_lock);
 	return res;
@@ -823,10 +819,6 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 
 	q = container_of(kobj, struct request_queue, kobj);
 	mutex_lock(&q->sysfs_lock);
-	if (blk_queue_dying(q)) {
-		mutex_unlock(&q->sysfs_lock);
-		return -ENOENT;
-	}
 	res = entry->store(q, page, length);
 	mutex_unlock(&q->sysfs_lock);
 	return res;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 18f773e52dfb..98233c9c65a8 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -12,6 +12,7 @@
 #include <linux/blktrace_api.h>
 #include <linux/blk-cgroup.h>
 #include "blk.h"
+#include "blk-cgroup-rwstat.h"
 
 /* Max dispatch from a group in 1 round */
 static int throtl_grp_quantum = 8;
@@ -176,6 +177,9 @@ struct throtl_grp {
 	unsigned int bio_cnt; /* total bios */
 	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
 	unsigned long bio_cnt_reset_time;
+
+	struct blkg_rwstat stat_bytes;
+	struct blkg_rwstat stat_ios;
 };
 
 /* We measure latency for request size from <= 4k to >= 1M */
@@ -489,6 +493,12 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
 	if (!tg)
 		return NULL;
 
+	if (blkg_rwstat_init(&tg->stat_bytes, gfp))
+		goto err_free_tg;
+
+	if (blkg_rwstat_init(&tg->stat_ios, gfp))
+		goto err_exit_stat_bytes;
+
 	throtl_service_queue_init(&tg->service_queue);
 
 	for (rw = READ; rw <= WRITE; rw++) {
@@ -513,6 +523,12 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
 	tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
 
 	return &tg->pd;
+
+err_exit_stat_bytes:
+	blkg_rwstat_exit(&tg->stat_bytes);
+err_free_tg:
+	kfree(tg);
+	return NULL;
 }
 
 static void throtl_pd_init(struct blkg_policy_data *pd)
@@ -611,6 +627,8 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
 	struct throtl_grp *tg = pd_to_tg(pd);
 
 	del_timer_sync(&tg->service_queue.pending_timer);
+	blkg_rwstat_exit(&tg->stat_bytes);
+	blkg_rwstat_exit(&tg->stat_ios);
 	kfree(tg);
 }
 
@@ -1464,6 +1482,32 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
 	return tg_set_conf(of, buf, nbytes, off, false);
 }
 
+static int tg_print_rwstat(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  blkg_prfill_rwstat, &blkcg_policy_throtl,
+			  seq_cft(sf)->private, true);
+	return 0;
+}
+
+static u64 tg_prfill_rwstat_recursive(struct seq_file *sf,
+				      struct blkg_policy_data *pd, int off)
+{
+	struct blkg_rwstat_sample sum;
+
+	blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_throtl, off,
+				  &sum);
+	return __blkg_prfill_rwstat(sf, pd, &sum);
+}
+
+static int tg_print_rwstat_recursive(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  tg_prfill_rwstat_recursive, &blkcg_policy_throtl,
+			  seq_cft(sf)->private, true);
+	return 0;
+}
+
 static struct cftype throtl_legacy_files[] = {
 	{
 		.name = "throttle.read_bps_device",
@@ -1491,23 +1535,23 @@ static struct cftype throtl_legacy_files[] = {
 	},
 	{
 		.name = "throttle.io_service_bytes",
-		.private = (unsigned long)&blkcg_policy_throtl,
-		.seq_show = blkg_print_stat_bytes,
+		.private = offsetof(struct throtl_grp, stat_bytes),
+		.seq_show = tg_print_rwstat,
 	},
 	{
 		.name = "throttle.io_service_bytes_recursive",
-		.private = (unsigned long)&blkcg_policy_throtl,
-		.seq_show = blkg_print_stat_bytes_recursive,
+		.private = offsetof(struct throtl_grp, stat_bytes),
+		.seq_show = tg_print_rwstat_recursive,
 	},
 	{
 		.name = "throttle.io_serviced",
-		.private = (unsigned long)&blkcg_policy_throtl,
-		.seq_show = blkg_print_stat_ios,
+		.private = offsetof(struct throtl_grp, stat_ios),
+		.seq_show = tg_print_rwstat,
 	},
 	{
 		.name = "throttle.io_serviced_recursive",
-		.private = (unsigned long)&blkcg_policy_throtl,
-		.seq_show = blkg_print_stat_ios_recursive,
+		.private = offsetof(struct throtl_grp, stat_ios),
+		.seq_show = tg_print_rwstat_recursive,
 	},
 	{ }	/* terminate */
 };
@@ -2127,7 +2171,16 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	/* see throtl_charge_bio() */
-	if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw])
+	if (bio_flagged(bio, BIO_THROTTLED))
+		goto out;
+
+	if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
+		blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
+				bio->bi_iter.bi_size);
+		blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
+	}
+
+	if (!tg->has_rules[rw])
 		goto out;
 
 	spin_lock_irq(&q->queue_lock);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 4bc5f260248a..05741c6f618b 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -70,195 +70,98 @@ void __blk_req_zone_write_unlock(struct request *rq)
 }
 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
 
-static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
-					     sector_t nr_sectors)
-{
-	sector_t zone_sectors = blk_queue_zone_sectors(q);
-
-	return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
-}
-
 /**
  * blkdev_nr_zones - Get number of zones
- * @bdev:	Target block device
+ * @disk:	Target gendisk
  *
- * Description:
- *    Return the total number of zones of a zoned block device.
- *    For a regular block device, the number of zones is always 0.
+ * Return the total number of zones of a zoned block device.  For a block
+ * device without zone capabilities, the number of zones is always 0.
  */
-unsigned int blkdev_nr_zones(struct block_device *bdev)
+unsigned int blkdev_nr_zones(struct gendisk *disk)
 {
-	struct request_queue *q = bdev_get_queue(bdev);
+	sector_t zone_sectors = blk_queue_zone_sectors(disk->queue);
 
-	if (!blk_queue_is_zoned(q))
+	if (!blk_queue_is_zoned(disk->queue))
 		return 0;
-
-	return __blkdev_nr_zones(q, bdev->bd_part->nr_sects);
+	return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors);
 }
 EXPORT_SYMBOL_GPL(blkdev_nr_zones);
 
-/*
- * Check that a zone report belongs to this partition, and if yes, fix its start
- * sector and write pointer and return true. Return false otherwise.
- */
-static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep)
-{
-	sector_t offset = get_start_sect(bdev);
-
-	if (rep->start < offset)
-		return false;
-
-	rep->start -= offset;
-	if (rep->start + rep->len > bdev->bd_part->nr_sects)
-		return false;
-
-	if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
-		rep->wp = rep->start + rep->len;
-	else
-		rep->wp -= offset;
-	return true;
-}
-
-static int blk_report_zones(struct gendisk *disk, sector_t sector,
-			    struct blk_zone *zones, unsigned int *nr_zones)
-{
-	struct request_queue *q = disk->queue;
-	unsigned int z = 0, n, nrz = *nr_zones;
-	sector_t capacity = get_capacity(disk);
-	int ret;
-
-	while (z < nrz && sector < capacity) {
-		n = nrz - z;
-		ret = disk->fops->report_zones(disk, sector, &zones[z], &n);
-		if (ret)
-			return ret;
-		if (!n)
-			break;
-		sector += blk_queue_zone_sectors(q) * n;
-		z += n;
-	}
-
-	WARN_ON(z > *nr_zones);
-	*nr_zones = z;
-
-	return 0;
-}
-
 /**
  * blkdev_report_zones - Get zones information
  * @bdev:	Target block device
  * @sector:	Sector from which to report zones
- * @zones:	Array of zone structures where to return the zones information
- * @nr_zones:	Number of zone structures in the zone array
+ * @nr_zones:	Maximum number of zones to report
+ * @cb:		Callback function called for each reported zone
+ * @data:	Private data for the callback
  *
  * Description:
- *    Get zone information starting from the zone containing @sector.
- *    The number of zone information reported may be less than the number
- *    requested by @nr_zones. The number of zones actually reported is
- *    returned in @nr_zones.
- *    The caller must use memalloc_noXX_save/restore() calls to control
- *    memory allocations done within this function (zone array and command
- *    buffer allocation by the device driver).
+ *    Get zone information starting from the zone containing @sector for at most
+ *    @nr_zones, and call @cb for each zone reported by the device.
+ *    To report all zones in a device starting from @sector, the BLK_ALL_ZONES
+ *    constant can be passed to @nr_zones.
+ *    Returns the number of zones reported by the device, or a negative errno
+ *    value in case of failure.
+ *
+ *    Note: The caller must use memalloc_noXX_save/restore() calls to control
+ *    memory allocations done within this function.
  */
 int blkdev_report_zones(struct block_device *bdev, sector_t sector,
-			struct blk_zone *zones, unsigned int *nr_zones)
+			unsigned int nr_zones, report_zones_cb cb, void *data)
 {
-	struct request_queue *q = bdev_get_queue(bdev);
-	unsigned int i, nrz;
-	int ret;
-
-	if (!blk_queue_is_zoned(q))
-		return -EOPNOTSUPP;
+	struct gendisk *disk = bdev->bd_disk;
+	sector_t capacity = get_capacity(disk);
 
-	/*
-	 * A block device that advertized itself as zoned must have a
-	 * report_zones method. If it does not have one defined, the device
-	 * driver has a bug. So warn about that.
-	 */
-	if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zones))
+	if (!blk_queue_is_zoned(bdev_get_queue(bdev)) ||
+	    WARN_ON_ONCE(!disk->fops->report_zones))
 		return -EOPNOTSUPP;
 
-	if (!*nr_zones || sector >= bdev->bd_part->nr_sects) {
-		*nr_zones = 0;
+	if (!nr_zones || sector >= capacity)
 		return 0;
-	}
-
-	nrz = min(*nr_zones,
-		  __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector));
-	ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector,
-			       zones, &nrz);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < nrz; i++) {
-		if (!blkdev_report_zone(bdev, zones))
-			break;
-		zones++;
-	}
-
-	*nr_zones = i;
 
-	return 0;
+	return disk->fops->report_zones(disk, sector, nr_zones, cb, data);
 }
 EXPORT_SYMBOL_GPL(blkdev_report_zones);
 
-/*
- * Special case of zone reset operation to reset all zones in one command,
- * useful for applications like mkfs.
- */
-static int __blkdev_reset_all_zones(struct block_device *bdev, gfp_t gfp_mask)
-{
-	struct bio *bio = bio_alloc(gfp_mask, 0);
-	int ret;
-
-	/* across the zones operations, don't need any sectors */
-	bio_set_dev(bio, bdev);
-	bio_set_op_attrs(bio, REQ_OP_ZONE_RESET_ALL, 0);
-
-	ret = submit_bio_wait(bio);
-	bio_put(bio);
-
-	return ret;
-}
-
 static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
+						sector_t sector,
 						sector_t nr_sectors)
 {
 	if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
 		return false;
 
-	if (nr_sectors != part_nr_sects_read(bdev->bd_part))
-		return false;
 	/*
-	 * REQ_OP_ZONE_RESET_ALL can be executed only if the block device is
-	 * the entire disk, that is, if the blocks device start offset is 0 and
-	 * its capacity is the same as the entire disk.
+	 * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
+	 * of the applicable zone range is the entire disk.
 	 */
-	return get_start_sect(bdev) == 0 &&
-	       part_nr_sects_read(bdev->bd_part) == get_capacity(bdev->bd_disk);
+	return !sector && nr_sectors == get_capacity(bdev->bd_disk);
 }
 
 /**
- * blkdev_reset_zones - Reset zones write pointer
+ * blkdev_zone_mgmt - Execute a zone management operation on a range of zones
  * @bdev:	Target block device
- * @sector:	Start sector of the first zone to reset
- * @nr_sectors:	Number of sectors, at least the length of one zone
+ * @op:		Operation to be performed on the zones
+ * @sector:	Start sector of the first zone to operate on
+ * @nr_sectors:	Number of sectors, should be at least the length of one zone and
+ *		must be zone size aligned.
  * @gfp_mask:	Memory allocation flags (for bio_alloc)
  *
  * Description:
- *    Reset the write pointer of the zones contained in the range
+ *    Perform the specified operation on the range of zones specified by
  *    @sector..@sector+@nr_sectors. Specifying the entire disk sector range
  *    is valid, but the specified range should not contain conventional zones.
+ *    The operation to execute on each zone can be a zone reset, open, close
+ *    or finish request.
  */
-int blkdev_reset_zones(struct block_device *bdev,
-		       sector_t sector, sector_t nr_sectors,
-		       gfp_t gfp_mask)
+int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
+		     sector_t sector, sector_t nr_sectors,
+		     gfp_t gfp_mask)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
-	sector_t zone_sectors;
+	sector_t zone_sectors = blk_queue_zone_sectors(q);
+	sector_t capacity = get_capacity(bdev->bd_disk);
 	sector_t end_sector = sector + nr_sectors;
 	struct bio *bio = NULL;
-	struct blk_plug plug;
 	int ret;
 
 	if (!blk_queue_is_zoned(q))
@@ -267,45 +170,62 @@ int blkdev_reset_zones(struct block_device *bdev,
 	if (bdev_read_only(bdev))
 		return -EPERM;
 
-	if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
+	if (!op_is_zone_mgmt(op))
+		return -EOPNOTSUPP;
+
+	if (!nr_sectors || end_sector > capacity)
 		/* Out of range */
 		return -EINVAL;
 
-	if (blkdev_allow_reset_all_zones(bdev, nr_sectors))
-		return  __blkdev_reset_all_zones(bdev, gfp_mask);
-
 	/* Check alignment (handle eventual smaller last zone) */
-	zone_sectors = blk_queue_zone_sectors(q);
 	if (sector & (zone_sectors - 1))
 		return -EINVAL;
 
-	if ((nr_sectors & (zone_sectors - 1)) &&
-	    end_sector != bdev->bd_part->nr_sects)
+	if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
 		return -EINVAL;
 
-	blk_start_plug(&plug);
 	while (sector < end_sector) {
-
 		bio = blk_next_bio(bio, 0, gfp_mask);
-		bio->bi_iter.bi_sector = sector;
 		bio_set_dev(bio, bdev);
-		bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
 
+		/*
+		 * Special case for the zone reset operation that reset all
+		 * zones, this is useful for applications like mkfs.
+		 */
+		if (op == REQ_OP_ZONE_RESET &&
+		    blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
+			bio->bi_opf = REQ_OP_ZONE_RESET_ALL;
+			break;
+		}
+
+		bio->bi_opf = op | REQ_SYNC;
+		bio->bi_iter.bi_sector = sector;
 		sector += zone_sectors;
 
 		/* This may take a while, so be nice to others */
 		cond_resched();
-
 	}
 
 	ret = submit_bio_wait(bio);
 	bio_put(bio);
 
-	blk_finish_plug(&plug);
-
 	return ret;
 }
-EXPORT_SYMBOL_GPL(blkdev_reset_zones);
+EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
+
+struct zone_report_args {
+	struct blk_zone __user *zones;
+};
+
+static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
+				    void *data)
+{
+	struct zone_report_args *args = data;
+
+	if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone)))
+		return -EFAULT;
+	return 0;
+}
 
 /*
  * BLKREPORTZONE ioctl processing.
@@ -315,9 +235,9 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 			      unsigned int cmd, unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
+	struct zone_report_args args;
 	struct request_queue *q;
 	struct blk_zone_report rep;
-	struct blk_zone *zones;
 	int ret;
 
 	if (!argp)
@@ -339,44 +259,29 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!rep.nr_zones)
 		return -EINVAL;
 
-	rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones);
-
-	zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
-			       GFP_KERNEL | __GFP_ZERO);
-	if (!zones)
-		return -ENOMEM;
-
-	ret = blkdev_report_zones(bdev, rep.sector, zones, &rep.nr_zones);
-	if (ret)
-		goto out;
-
-	if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	if (rep.nr_zones) {
-		if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
-				 sizeof(struct blk_zone) * rep.nr_zones))
-			ret = -EFAULT;
-	}
-
- out:
-	kvfree(zones);
+	args.zones = argp + sizeof(struct blk_zone_report);
+	ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones,
+				  blkdev_copy_zone_to_user, &args);
+	if (ret < 0)
+		return ret;
 
-	return ret;
+	rep.nr_zones = ret;
+	if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
+		return -EFAULT;
+	return 0;
 }
 
 /*
- * BLKRESETZONE ioctl processing.
+ * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
  * Called from blkdev_ioctl.
  */
-int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
-			     unsigned int cmd, unsigned long arg)
+int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
+			   unsigned int cmd, unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
 	struct request_queue *q;
 	struct blk_zone_range zrange;
+	enum req_opf op;
 
 	if (!argp)
 		return -EINVAL;
@@ -397,8 +302,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
 	if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
 		return -EFAULT;
 
-	return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
-				  GFP_KERNEL);
+	switch (cmd) {
+	case BLKRESETZONE:
+		op = REQ_OP_ZONE_RESET;
+		break;
+	case BLKOPENZONE:
+		op = REQ_OP_ZONE_OPEN;
+		break;
+	case BLKCLOSEZONE:
+		op = REQ_OP_ZONE_CLOSE;
+		break;
+	case BLKFINISHZONE:
+		op = REQ_OP_ZONE_FINISH;
+		break;
+	default:
+		return -ENOTTY;
+	}
+
+	return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
+				GFP_KERNEL);
 }
 
 static inline unsigned long *blk_alloc_zone_bitmap(int node,
@@ -408,37 +330,96 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node,
 			    GFP_NOIO, node);
 }
 
+void blk_queue_free_zone_bitmaps(struct request_queue *q)
+{
+	kfree(q->conv_zones_bitmap);
+	q->conv_zones_bitmap = NULL;
+	kfree(q->seq_zones_wlock);
+	q->seq_zones_wlock = NULL;
+}
+
+struct blk_revalidate_zone_args {
+	struct gendisk	*disk;
+	unsigned long	*conv_zones_bitmap;
+	unsigned long	*seq_zones_wlock;
+	unsigned int	nr_zones;
+	sector_t	zone_sectors;
+	sector_t	sector;
+};
+
 /*
- * Allocate an array of struct blk_zone to get nr_zones zone information.
- * The allocated array may be smaller than nr_zones.
+ * Helper function to check the validity of zones of a zoned block device.
  */
-static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones)
+static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
+				  void *data)
 {
-	struct blk_zone *zones;
-	size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES);
+	struct blk_revalidate_zone_args *args = data;
+	struct gendisk *disk = args->disk;
+	struct request_queue *q = disk->queue;
+	sector_t capacity = get_capacity(disk);
 
 	/*
-	 * GFP_KERNEL here is meaningless as the caller task context has
-	 * the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones()
-	 * with memalloc_noio_save().
+	 * All zones must have the same size, with the exception on an eventual
+	 * smaller last zone.
 	 */
-	zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL);
-	if (!zones) {
-		*nr_zones = 0;
-		return NULL;
+	if (zone->start == 0) {
+		if (zone->len == 0 || !is_power_of_2(zone->len)) {
+			pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
+				disk->disk_name, zone->len);
+			return -ENODEV;
+		}
+
+		args->zone_sectors = zone->len;
+		args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
+	} else if (zone->start + args->zone_sectors < capacity) {
+		if (zone->len != args->zone_sectors) {
+			pr_warn("%s: Invalid zoned device with non constant zone size\n",
+				disk->disk_name);
+			return -ENODEV;
+		}
+	} else {
+		if (zone->len > args->zone_sectors) {
+			pr_warn("%s: Invalid zoned device with larger last zone size\n",
+				disk->disk_name);
+			return -ENODEV;
+		}
 	}
 
-	*nr_zones = nrz;
+	/* Check for holes in the zone report */
+	if (zone->start != args->sector) {
+		pr_warn("%s: Zone gap at sectors %llu..%llu\n",
+			disk->disk_name, args->sector, zone->start);
+		return -ENODEV;
+	}
 
-	return zones;
-}
+	/* Check zone type */
+	switch (zone->type) {
+	case BLK_ZONE_TYPE_CONVENTIONAL:
+		if (!args->conv_zones_bitmap) {
+			args->conv_zones_bitmap =
+				blk_alloc_zone_bitmap(q->node, args->nr_zones);
+			if (!args->conv_zones_bitmap)
+				return -ENOMEM;
+		}
+		set_bit(idx, args->conv_zones_bitmap);
+		break;
+	case BLK_ZONE_TYPE_SEQWRITE_REQ:
+	case BLK_ZONE_TYPE_SEQWRITE_PREF:
+		if (!args->seq_zones_wlock) {
+			args->seq_zones_wlock =
+				blk_alloc_zone_bitmap(q->node, args->nr_zones);
+			if (!args->seq_zones_wlock)
+				return -ENOMEM;
+		}
+		break;
+	default:
+		pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
+			disk->disk_name, (int)zone->type, zone->start);
+		return -ENODEV;
+	}
 
-void blk_queue_free_zone_bitmaps(struct request_queue *q)
-{
-	kfree(q->seq_zones_bitmap);
-	q->seq_zones_bitmap = NULL;
-	kfree(q->seq_zones_wlock);
-	q->seq_zones_wlock = NULL;
+	args->sector += zone->len;
+	return 0;
 }
 
 /**
@@ -447,102 +428,53 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q)
  *
  * Helper function for low-level device drivers to (re) allocate and initialize
  * a disk request queue zone bitmaps. This functions should normally be called
- * within the disk ->revalidate method. For BIO based queues, no zone bitmap
- * is allocated.
+ * within the disk ->revalidate method for blk-mq based drivers.  For BIO based
+ * drivers only q->nr_zones needs to be updated so that the sysfs exposed value
+ * is correct.
  */
 int blk_revalidate_disk_zones(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
-	unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk));
-	unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
-	unsigned int i, rep_nr_zones = 0, z = 0, nrz;
-	struct blk_zone *zones = NULL;
+	struct blk_revalidate_zone_args args = {
+		.disk		= disk,
+	};
 	unsigned int noio_flag;
-	sector_t sector = 0;
-	int ret = 0;
+	int ret;
 
-	/*
-	 * BIO based queues do not use a scheduler so only q->nr_zones
-	 * needs to be updated so that the sysfs exposed value is correct.
-	 */
-	if (!queue_is_mq(q)) {
-		q->nr_zones = nr_zones;
-		return 0;
-	}
+	if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
+		return -EIO;
+	if (WARN_ON_ONCE(!queue_is_mq(q)))
+		return -EIO;
 
 	/*
-	 * Ensure that all memory allocations in this context are done as
-	 * if GFP_NOIO was specified.
+	 * Ensure that all memory allocations in this context are done as if
+	 * GFP_NOIO was specified.
 	 */
 	noio_flag = memalloc_noio_save();
+	ret = disk->fops->report_zones(disk, 0, UINT_MAX,
+				       blk_revalidate_zone_cb, &args);
+	memalloc_noio_restore(noio_flag);
 
-	if (!blk_queue_is_zoned(q) || !nr_zones) {
-		nr_zones = 0;
-		goto update;
-	}
-
-	/* Allocate bitmaps */
-	ret = -ENOMEM;
-	seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
-	if (!seq_zones_wlock)
-		goto out;
-	seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
-	if (!seq_zones_bitmap)
-		goto out;
-
-	/* Get zone information and initialize seq_zones_bitmap */
-	rep_nr_zones = nr_zones;
-	zones = blk_alloc_zones(&rep_nr_zones);
-	if (!zones)
-		goto out;
-
-	while (z < nr_zones) {
-		nrz = min(nr_zones - z, rep_nr_zones);
-		ret = blk_report_zones(disk, sector, zones, &nrz);
-		if (ret)
-			goto out;
-		if (!nrz)
-			break;
-		for (i = 0; i < nrz; i++) {
-			if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
-				set_bit(z, seq_zones_bitmap);
-			z++;
-		}
-		sector += nrz * blk_queue_zone_sectors(q);
-	}
-
-	if (WARN_ON(z != nr_zones)) {
-		ret = -EIO;
-		goto out;
-	}
-
-update:
 	/*
-	 * Install the new bitmaps, making sure the queue is stopped and
-	 * all I/Os are completed (i.e. a scheduler is not referencing the
-	 * bitmaps).
+	 * Install the new bitmaps and update nr_zones only once the queue is
+	 * stopped and all I/Os are completed (i.e. a scheduler is not
+	 * referencing the bitmaps).
 	 */
 	blk_mq_freeze_queue(q);
-	q->nr_zones = nr_zones;
-	swap(q->seq_zones_wlock, seq_zones_wlock);
-	swap(q->seq_zones_bitmap, seq_zones_bitmap);
-	blk_mq_unfreeze_queue(q);
-
-out:
-	memalloc_noio_restore(noio_flag);
-
-	kvfree(zones);
-	kfree(seq_zones_wlock);
-	kfree(seq_zones_bitmap);
-
-	if (ret) {
+	if (ret >= 0) {
+		blk_queue_chunk_sectors(q, args.zone_sectors);
+		q->nr_zones = args.nr_zones;
+		swap(q->seq_zones_wlock, args.seq_zones_wlock);
+		swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
+		ret = 0;
+	} else {
 		pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
-		blk_mq_freeze_queue(q);
 		blk_queue_free_zone_bitmaps(q);
-		blk_mq_unfreeze_queue(q);
 	}
+	blk_mq_unfreeze_queue(q);
 
+	kfree(args.seq_zones_wlock);
+	kfree(args.conv_zones_bitmap);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
-
diff --git a/block/blk.h b/block/blk.h
index 47fba9362e60..0b8884353f6b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -30,6 +30,7 @@ struct blk_flush_queue {
 	 * at the same time
 	 */
 	struct request		*orig_rq;
+	struct lock_class_key	key;
 	spinlock_t		mq_flush_lock;
 };
 
@@ -121,6 +122,7 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 void blk_flush_integrity(void);
 bool __bio_integrity_endio(struct bio *);
+void bio_integrity_free(struct bio *bio);
 static inline bool bio_integrity_endio(struct bio *bio)
 {
 	if (bio_integrity(bio))
@@ -166,6 +168,9 @@ static inline bool bio_integrity_endio(struct bio *bio)
 {
 	return true;
 }
+static inline void bio_integrity_free(struct bio *bio)
+{
+}
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 unsigned long blk_rq_timeout(unsigned long timeout);
@@ -242,14 +247,11 @@ int blk_dev_init(void);
  * Contribute to IO statistics IFF:
  *
  *	a) it's attached to a gendisk, and
- *	b) the queue had IO stats enabled when this request was started, and
- *	c) it's a file system request
+ *	b) the queue had IO stats enabled when this request was started
  */
 static inline bool blk_do_io_stat(struct request *rq)
 {
-	return rq->rq_disk &&
-	       (rq->rq_flags & RQF_IO_STAT) &&
-		!blk_rq_is_passthrough(rq);
+	return rq->rq_disk && (rq->rq_flags & RQF_IO_STAT);
 }
 
 static inline void req_set_nomerge(struct request_queue *q, struct request *req)
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 347dda16c2f4..6cbb7926534c 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -266,7 +266,7 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct request *req = bd->rq;
 	struct bsg_set *bset =
 		container_of(q->tag_set, struct bsg_set, tag_set);
-	int sts = BLK_STS_IOERR;
+	blk_status_t sts = BLK_STS_IOERR;
 	int ret;
 
 	blk_mq_start_request(req);
diff --git a/block/bsg.c b/block/bsg.c
index 833c44b3d458..d7bae94b64d9 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -382,6 +382,7 @@ static const struct file_operations bsg_fops = {
 	.open		=	bsg_open,
 	.release	=	bsg_release,
 	.unlocked_ioctl	=	bsg_ioctl,
+	.compat_ioctl	=	compat_ptr_ioctl,
 	.owner		=	THIS_MODULE,
 	.llseek		=	default_llseek,
 };
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
deleted file mode 100644
index 6ca015f92766..000000000000
--- a/block/compat_ioctl.c
+++ /dev/null
@@ -1,411 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/blkdev.h>
-#include <linux/blkpg.h>
-#include <linux/blktrace_api.h>
-#include <linux/cdrom.h>
-#include <linux/compat.h>
-#include <linux/elevator.h>
-#include <linux/hdreg.h>
-#include <linux/slab.h>
-#include <linux/syscalls.h>
-#include <linux/types.h>
-#include <linux/uaccess.h>
-
-static int compat_put_ushort(unsigned long arg, unsigned short val)
-{
-	return put_user(val, (unsigned short __user *)compat_ptr(arg));
-}
-
-static int compat_put_int(unsigned long arg, int val)
-{
-	return put_user(val, (compat_int_t __user *)compat_ptr(arg));
-}
-
-static int compat_put_uint(unsigned long arg, unsigned int val)
-{
-	return put_user(val, (compat_uint_t __user *)compat_ptr(arg));
-}
-
-static int compat_put_long(unsigned long arg, long val)
-{
-	return put_user(val, (compat_long_t __user *)compat_ptr(arg));
-}
-
-static int compat_put_ulong(unsigned long arg, compat_ulong_t val)
-{
-	return put_user(val, (compat_ulong_t __user *)compat_ptr(arg));
-}
-
-static int compat_put_u64(unsigned long arg, u64 val)
-{
-	return put_user(val, (compat_u64 __user *)compat_ptr(arg));
-}
-
-struct compat_hd_geometry {
-	unsigned char heads;
-	unsigned char sectors;
-	unsigned short cylinders;
-	u32 start;
-};
-
-static int compat_hdio_getgeo(struct gendisk *disk, struct block_device *bdev,
-			struct compat_hd_geometry __user *ugeo)
-{
-	struct hd_geometry geo;
-	int ret;
-
-	if (!ugeo)
-		return -EINVAL;
-	if (!disk->fops->getgeo)
-		return -ENOTTY;
-
-	memset(&geo, 0, sizeof(geo));
-	/*
-	 * We need to set the startsect first, the driver may
-	 * want to override it.
-	 */
-	geo.start = get_start_sect(bdev);
-	ret = disk->fops->getgeo(bdev, &geo);
-	if (ret)
-		return ret;
-
-	ret = copy_to_user(ugeo, &geo, 4);
-	ret |= put_user(geo.start, &ugeo->start);
-	if (ret)
-		ret = -EFAULT;
-
-	return ret;
-}
-
-static int compat_hdio_ioctl(struct block_device *bdev, fmode_t mode,
-		unsigned int cmd, unsigned long arg)
-{
-	unsigned long __user *p;
-	int error;
-
-	p = compat_alloc_user_space(sizeof(unsigned long));
-	error = __blkdev_driver_ioctl(bdev, mode,
-				cmd, (unsigned long)p);
-	if (error == 0) {
-		unsigned int __user *uvp = compat_ptr(arg);
-		unsigned long v;
-		if (get_user(v, p) || put_user(v, uvp))
-			error = -EFAULT;
-	}
-	return error;
-}
-
-struct compat_cdrom_read_audio {
-	union cdrom_addr	addr;
-	u8			addr_format;
-	compat_int_t		nframes;
-	compat_caddr_t		buf;
-};
-
-struct compat_cdrom_generic_command {
-	unsigned char	cmd[CDROM_PACKET_SIZE];
-	compat_caddr_t	buffer;
-	compat_uint_t	buflen;
-	compat_int_t	stat;
-	compat_caddr_t	sense;
-	unsigned char	data_direction;
-	compat_int_t	quiet;
-	compat_int_t	timeout;
-	compat_caddr_t	reserved[1];
-};
-
-static int compat_cdrom_read_audio(struct block_device *bdev, fmode_t mode,
-		unsigned int cmd, unsigned long arg)
-{
-	struct cdrom_read_audio __user *cdread_audio;
-	struct compat_cdrom_read_audio __user *cdread_audio32;
-	__u32 data;
-	void __user *datap;
-
-	cdread_audio = compat_alloc_user_space(sizeof(*cdread_audio));
-	cdread_audio32 = compat_ptr(arg);
-
-	if (copy_in_user(&cdread_audio->addr,
-			 &cdread_audio32->addr,
-			 (sizeof(*cdread_audio32) -
-			  sizeof(compat_caddr_t))))
-		return -EFAULT;
-
-	if (get_user(data, &cdread_audio32->buf))
-		return -EFAULT;
-	datap = compat_ptr(data);
-	if (put_user(datap, &cdread_audio->buf))
-		return -EFAULT;
-
-	return __blkdev_driver_ioctl(bdev, mode, cmd,
-			(unsigned long)cdread_audio);
-}
-
-static int compat_cdrom_generic_command(struct block_device *bdev, fmode_t mode,
-		unsigned int cmd, unsigned long arg)
-{
-	struct cdrom_generic_command __user *cgc;
-	struct compat_cdrom_generic_command __user *cgc32;
-	u32 data;
-	unsigned char dir;
-	int itmp;
-
-	cgc = compat_alloc_user_space(sizeof(*cgc));
-	cgc32 = compat_ptr(arg);
-
-	if (copy_in_user(&cgc->cmd, &cgc32->cmd, sizeof(cgc->cmd)) ||
-	    get_user(data, &cgc32->buffer) ||
-	    put_user(compat_ptr(data), &cgc->buffer) ||
-	    copy_in_user(&cgc->buflen, &cgc32->buflen,
-			 (sizeof(unsigned int) + sizeof(int))) ||
-	    get_user(data, &cgc32->sense) ||
-	    put_user(compat_ptr(data), &cgc->sense) ||
-	    get_user(dir, &cgc32->data_direction) ||
-	    put_user(dir, &cgc->data_direction) ||
-	    get_user(itmp, &cgc32->quiet) ||
-	    put_user(itmp, &cgc->quiet) ||
-	    get_user(itmp, &cgc32->timeout) ||
-	    put_user(itmp, &cgc->timeout) ||
-	    get_user(data, &cgc32->reserved[0]) ||
-	    put_user(compat_ptr(data), &cgc->reserved[0]))
-		return -EFAULT;
-
-	return __blkdev_driver_ioctl(bdev, mode, cmd, (unsigned long)cgc);
-}
-
-struct compat_blkpg_ioctl_arg {
-	compat_int_t op;
-	compat_int_t flags;
-	compat_int_t datalen;
-	compat_caddr_t data;
-};
-
-static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode,
-		unsigned int cmd, struct compat_blkpg_ioctl_arg __user *ua32)
-{
-	struct blkpg_ioctl_arg __user *a = compat_alloc_user_space(sizeof(*a));
-	compat_caddr_t udata;
-	compat_int_t n;
-	int err;
-
-	err = get_user(n, &ua32->op);
-	err |= put_user(n, &a->op);
-	err |= get_user(n, &ua32->flags);
-	err |= put_user(n, &a->flags);
-	err |= get_user(n, &ua32->datalen);
-	err |= put_user(n, &a->datalen);
-	err |= get_user(udata, &ua32->data);
-	err |= put_user(compat_ptr(udata), &a->data);
-	if (err)
-		return err;
-
-	return blkdev_ioctl(bdev, mode, cmd, (unsigned long)a);
-}
-
-#define BLKBSZGET_32		_IOR(0x12, 112, int)
-#define BLKBSZSET_32		_IOW(0x12, 113, int)
-#define BLKGETSIZE64_32		_IOR(0x12, 114, int)
-
-static int compat_blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
-			unsigned cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case HDIO_GET_UNMASKINTR:
-	case HDIO_GET_MULTCOUNT:
-	case HDIO_GET_KEEPSETTINGS:
-	case HDIO_GET_32BIT:
-	case HDIO_GET_NOWERR:
-	case HDIO_GET_DMA:
-	case HDIO_GET_NICE:
-	case HDIO_GET_WCACHE:
-	case HDIO_GET_ACOUSTIC:
-	case HDIO_GET_ADDRESS:
-	case HDIO_GET_BUSSTATE:
-		return compat_hdio_ioctl(bdev, mode, cmd, arg);
-	case CDROMREADAUDIO:
-		return compat_cdrom_read_audio(bdev, mode, cmd, arg);
-	case CDROM_SEND_PACKET:
-		return compat_cdrom_generic_command(bdev, mode, cmd, arg);
-
-	/*
-	 * No handler required for the ones below, we just need to
-	 * convert arg to a 64 bit pointer.
-	 */
-	case BLKSECTSET:
-	/*
-	 * 0x03 -- HD/IDE ioctl's used by hdparm and friends.
-	 *         Some need translations, these do not.
-	 */
-	case HDIO_GET_IDENTITY:
-	case HDIO_DRIVE_TASK:
-	case HDIO_DRIVE_CMD:
-	/* 0x330 is reserved -- it used to be HDIO_GETGEO_BIG */
-	case 0x330:
-	/* CDROM stuff */
-	case CDROMPAUSE:
-	case CDROMRESUME:
-	case CDROMPLAYMSF:
-	case CDROMPLAYTRKIND:
-	case CDROMREADTOCHDR:
-	case CDROMREADTOCENTRY:
-	case CDROMSTOP:
-	case CDROMSTART:
-	case CDROMEJECT:
-	case CDROMVOLCTRL:
-	case CDROMSUBCHNL:
-	case CDROMMULTISESSION:
-	case CDROM_GET_MCN:
-	case CDROMRESET:
-	case CDROMVOLREAD:
-	case CDROMSEEK:
-	case CDROMPLAYBLK:
-	case CDROMCLOSETRAY:
-	case CDROM_DISC_STATUS:
-	case CDROM_CHANGER_NSLOTS:
-	case CDROM_GET_CAPABILITY:
-	/* Ignore cdrom.h about these next 5 ioctls, they absolutely do
-	 * not take a struct cdrom_read, instead they take a struct cdrom_msf
-	 * which is compatible.
-	 */
-	case CDROMREADMODE2:
-	case CDROMREADMODE1:
-	case CDROMREADRAW:
-	case CDROMREADCOOKED:
-	case CDROMREADALL:
-	/* DVD ioctls */
-	case DVD_READ_STRUCT:
-	case DVD_WRITE_STRUCT:
-	case DVD_AUTH:
-		arg = (unsigned long)compat_ptr(arg);
-	/* These intepret arg as an unsigned long, not as a pointer,
-	 * so we must not do compat_ptr() conversion. */
-	case HDIO_SET_MULTCOUNT:
-	case HDIO_SET_UNMASKINTR:
-	case HDIO_SET_KEEPSETTINGS:
-	case HDIO_SET_32BIT:
-	case HDIO_SET_NOWERR:
-	case HDIO_SET_DMA:
-	case HDIO_SET_PIO_MODE:
-	case HDIO_SET_NICE:
-	case HDIO_SET_WCACHE:
-	case HDIO_SET_ACOUSTIC:
-	case HDIO_SET_BUSSTATE:
-	case HDIO_SET_ADDRESS:
-	case CDROMEJECT_SW:
-	case CDROM_SET_OPTIONS:
-	case CDROM_CLEAR_OPTIONS:
-	case CDROM_SELECT_SPEED:
-	case CDROM_SELECT_DISC:
-	case CDROM_MEDIA_CHANGED:
-	case CDROM_DRIVE_STATUS:
-	case CDROM_LOCKDOOR:
-	case CDROM_DEBUG:
-		break;
-	default:
-		/* unknown ioctl number */
-		return -ENOIOCTLCMD;
-	}
-
-	return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
-}
-
-/* Most of the generic ioctls are handled in the normal fallback path.
-   This assumes the blkdev's low level compat_ioctl always returns
-   ENOIOCTLCMD for unknown ioctls. */
-long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-{
-	int ret = -ENOIOCTLCMD;
-	struct inode *inode = file->f_mapping->host;
-	struct block_device *bdev = inode->i_bdev;
-	struct gendisk *disk = bdev->bd_disk;
-	fmode_t mode = file->f_mode;
-	loff_t size;
-	unsigned int max_sectors;
-
-	/*
-	 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
-	 * to updated it before every ioctl.
-	 */
-	if (file->f_flags & O_NDELAY)
-		mode |= FMODE_NDELAY;
-	else
-		mode &= ~FMODE_NDELAY;
-
-	switch (cmd) {
-	case HDIO_GETGEO:
-		return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
-	case BLKPBSZGET:
-		return compat_put_uint(arg, bdev_physical_block_size(bdev));
-	case BLKIOMIN:
-		return compat_put_uint(arg, bdev_io_min(bdev));
-	case BLKIOOPT:
-		return compat_put_uint(arg, bdev_io_opt(bdev));
-	case BLKALIGNOFF:
-		return compat_put_int(arg, bdev_alignment_offset(bdev));
-	case BLKDISCARDZEROES:
-		return compat_put_uint(arg, 0);
-	case BLKFLSBUF:
-	case BLKROSET:
-	case BLKDISCARD:
-	case BLKSECDISCARD:
-	case BLKZEROOUT:
-	/*
-	 * the ones below are implemented in blkdev_locked_ioctl,
-	 * but we call blkdev_ioctl, which gets the lock for us
-	 */
-	case BLKRRPART:
-		return blkdev_ioctl(bdev, mode, cmd,
-				(unsigned long)compat_ptr(arg));
-	case BLKBSZSET_32:
-		return blkdev_ioctl(bdev, mode, BLKBSZSET,
-				(unsigned long)compat_ptr(arg));
-	case BLKPG:
-		return compat_blkpg_ioctl(bdev, mode, cmd, compat_ptr(arg));
-	case BLKRAGET:
-	case BLKFRAGET:
-		if (!arg)
-			return -EINVAL;
-		return compat_put_long(arg,
-			       (bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512);
-	case BLKROGET: /* compatible */
-		return compat_put_int(arg, bdev_read_only(bdev) != 0);
-	case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
-		return compat_put_int(arg, block_size(bdev));
-	case BLKSSZGET: /* get block device hardware sector size */
-		return compat_put_int(arg, bdev_logical_block_size(bdev));
-	case BLKSECTGET:
-		max_sectors = min_t(unsigned int, USHRT_MAX,
-				    queue_max_sectors(bdev_get_queue(bdev)));
-		return compat_put_ushort(arg, max_sectors);
-	case BLKROTATIONAL:
-		return compat_put_ushort(arg,
-					 !blk_queue_nonrot(bdev_get_queue(bdev)));
-	case BLKRASET: /* compatible, but no compat_ptr (!) */
-	case BLKFRASET:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EACCES;
-		bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
-		return 0;
-	case BLKGETSIZE:
-		size = i_size_read(bdev->bd_inode);
-		if ((size >> 9) > ~0UL)
-			return -EFBIG;
-		return compat_put_ulong(arg, size >> 9);
-
-	case BLKGETSIZE64_32:
-		return compat_put_u64(arg, i_size_read(bdev->bd_inode));
-
-	case BLKTRACESETUP32:
-	case BLKTRACESTART: /* compatible */
-	case BLKTRACESTOP:  /* compatible */
-	case BLKTRACETEARDOWN: /* compatible */
-		ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg));
-		return ret;
-	default:
-		if (disk->fops->compat_ioctl)
-			ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg);
-		if (ret == -ENOIOCTLCMD)
-			ret = compat_blkdev_driver_ioctl(bdev, mode, cmd, arg);
-		return ret;
-	}
-}
diff --git a/block/elevator.c b/block/elevator.c
index 076ba7308e65..4eab3d70e880 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -832,3 +832,12 @@ struct request *elv_rb_latter_request(struct request_queue *q,
 	return NULL;
 }
 EXPORT_SYMBOL(elv_rb_latter_request);
+
+static int __init elevator_setup(char *str)
+{
+	pr_warn("Kernel parameter elevator= does not have any effect anymore.\n"
+		"Please use sysfs to set IO scheduler for individual devices.\n");
+	return 1;
+}
+
+__setup("elevator=", elevator_setup);
diff --git a/block/genhd.c b/block/genhd.c
index 26b31fcae217..ff6268970ddc 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1385,7 +1385,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   "%lu %lu %lu %u "
 			   "%lu %lu %lu %u "
 			   "%u %u %u "
-			   "%lu %lu %lu %u\n",
+			   "%lu %lu %lu %u "
+			   "%lu %u"
+			   "\n",
 			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 			   disk_name(gp, hd->partno, buf),
 			   part_stat_read(hd, ios[STAT_READ]),
@@ -1402,7 +1404,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   part_stat_read(hd, ios[STAT_DISCARD]),
 			   part_stat_read(hd, merges[STAT_DISCARD]),
 			   part_stat_read(hd, sectors[STAT_DISCARD]),
-			   (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD)
+			   (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
+			   part_stat_read(hd, ios[STAT_FLUSH]),
+			   (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
 			);
 	}
 	disk_part_iter_exit(&piter);
diff --git a/block/ioctl.c b/block/ioctl.c
index 15a0eb80ada9..127194b9f9bd 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/capability.h>
+#include <linux/compat.h>
 #include <linux/blkdev.h>
 #include <linux/export.h>
 #include <linux/gfp.h>
@@ -11,12 +12,12 @@
 #include <linux/pr.h>
 #include <linux/uaccess.h>
 
-static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
+static int blkpg_do_ioctl(struct block_device *bdev,
+			  struct blkpg_partition __user *upart, int op)
 {
 	struct block_device *bdevp;
 	struct gendisk *disk;
 	struct hd_struct *part, *lpart;
-	struct blkpg_ioctl_arg a;
 	struct blkpg_partition p;
 	struct disk_part_iter piter;
 	long long start, length;
@@ -24,9 +25,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
-	if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
-		return -EFAULT;
-	if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
+	if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
 		return -EFAULT;
 	disk = bdev->bd_disk;
 	if (bdev != bdev->bd_contains)
@@ -34,7 +33,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 	partno = p.pno;
 	if (partno <= 0)
 		return -EINVAL;
-	switch (a.op) {
+	switch (op) {
 		case BLKPG_ADD_PARTITION:
 			start = p.start >> 9;
 			length = p.length >> 9;
@@ -155,48 +154,54 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 	}
 }
 
-/*
- * This is an exported API for the block driver, and will not
- * acquire bd_mutex. This API should be used in case that
- * caller has held bd_mutex already.
- */
-int __blkdev_reread_part(struct block_device *bdev)
+static int blkpg_ioctl(struct block_device *bdev,
+		       struct blkpg_ioctl_arg __user *arg)
 {
-	struct gendisk *disk = bdev->bd_disk;
+	struct blkpg_partition __user *udata;
+	int op;
 
-	if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
-		return -EINVAL;
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
+	if (get_user(op, &arg->op) || get_user(udata, &arg->data))
+		return -EFAULT;
+
+	return blkpg_do_ioctl(bdev, udata, op);
+}
 
-	lockdep_assert_held(&bdev->bd_mutex);
+#ifdef CONFIG_COMPAT
+struct compat_blkpg_ioctl_arg {
+	compat_int_t op;
+	compat_int_t flags;
+	compat_int_t datalen;
+	compat_caddr_t data;
+};
 
-	return rescan_partitions(disk, bdev);
+static int compat_blkpg_ioctl(struct block_device *bdev,
+			      struct compat_blkpg_ioctl_arg __user *arg)
+{
+	compat_caddr_t udata;
+	int op;
+
+	if (get_user(op, &arg->op) || get_user(udata, &arg->data))
+		return -EFAULT;
+
+	return blkpg_do_ioctl(bdev, compat_ptr(udata), op);
 }
-EXPORT_SYMBOL(__blkdev_reread_part);
+#endif
 
-/*
- * This is an exported API for the block driver, and will
- * try to acquire bd_mutex. If bd_mutex has been held already
- * in current context, please call __blkdev_reread_part().
- *
- * Make sure the held locks in current context aren't required
- * in open()/close() handler and I/O path for avoiding ABBA deadlock:
- * - bd_mutex is held before calling block driver's open/close
- *   handler
- * - reading partition table may submit I/O to the block device
- */
-int blkdev_reread_part(struct block_device *bdev)
+static int blkdev_reread_part(struct block_device *bdev)
 {
-	int res;
+	int ret;
+
+	if (!disk_part_scan_enabled(bdev->bd_disk) || bdev != bdev->bd_contains)
+		return -EINVAL;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
 
 	mutex_lock(&bdev->bd_mutex);
-	res = __blkdev_reread_part(bdev);
+	ret = bdev_disk_changed(bdev, false);
 	mutex_unlock(&bdev->bd_mutex);
 
-	return res;
+	return ret;
 }
-EXPORT_SYMBOL(blkdev_reread_part);
 
 static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
 		unsigned long arg, unsigned long flags)
@@ -265,36 +270,48 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
 			BLKDEV_ZERO_NOUNMAP);
 }
 
-static int put_ushort(unsigned long arg, unsigned short val)
+static int put_ushort(unsigned short __user *argp, unsigned short val)
+{
+	return put_user(val, argp);
+}
+
+static int put_int(int __user *argp, int val)
 {
-	return put_user(val, (unsigned short __user *)arg);
+	return put_user(val, argp);
 }
 
-static int put_int(unsigned long arg, int val)
+static int put_uint(unsigned int __user *argp, unsigned int val)
 {
-	return put_user(val, (int __user *)arg);
+	return put_user(val, argp);
 }
 
-static int put_uint(unsigned long arg, unsigned int val)
+static int put_long(long __user *argp, long val)
 {
-	return put_user(val, (unsigned int __user *)arg);
+	return put_user(val, argp);
 }
 
-static int put_long(unsigned long arg, long val)
+static int put_ulong(unsigned long __user *argp, unsigned long val)
 {
-	return put_user(val, (long __user *)arg);
+	return put_user(val, argp);
 }
 
-static int put_ulong(unsigned long arg, unsigned long val)
+static int put_u64(u64 __user *argp, u64 val)
 {
-	return put_user(val, (unsigned long __user *)arg);
+	return put_user(val, argp);
 }
 
-static int put_u64(unsigned long arg, u64 val)
+#ifdef CONFIG_COMPAT
+static int compat_put_long(compat_long_t *argp, long val)
 {
-	return put_user(val, (u64 __user *)arg);
+	return put_user(val, argp);
 }
 
+static int compat_put_ulong(compat_ulong_t *argp, compat_ulong_t val)
+{
+	return put_user(val, argp);
+}
+#endif
+
 int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned cmd, unsigned long arg)
 {
@@ -312,6 +329,26 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
  */
 EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
 
+#ifdef CONFIG_COMPAT
+/*
+ * This is the equivalent of compat_ptr_ioctl(), to be used by block
+ * drivers that implement only commands that are completely compatible
+ * between 32-bit and 64-bit user space
+ */
+int blkdev_compat_ptr_ioctl(struct block_device *bdev, fmode_t mode,
+			unsigned cmd, unsigned long arg)
+{
+	struct gendisk *disk = bdev->bd_disk;
+
+	if (disk->fops->ioctl)
+		return disk->fops->ioctl(bdev, mode, cmd,
+					 (unsigned long)compat_ptr(arg));
+
+	return -ENOIOCTLCMD;
+}
+EXPORT_SYMBOL(blkdev_compat_ptr_ioctl);
+#endif
+
 static int blkdev_pr_register(struct block_device *bdev,
 		struct pr_registration __user *arg)
 {
@@ -482,6 +519,45 @@ static int blkdev_getgeo(struct block_device *bdev,
 	return 0;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_hd_geometry {
+	unsigned char heads;
+	unsigned char sectors;
+	unsigned short cylinders;
+	u32 start;
+};
+
+static int compat_hdio_getgeo(struct block_device *bdev,
+			      struct compat_hd_geometry __user *ugeo)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct hd_geometry geo;
+	int ret;
+
+	if (!ugeo)
+		return -EINVAL;
+	if (!disk->fops->getgeo)
+		return -ENOTTY;
+
+	memset(&geo, 0, sizeof(geo));
+	/*
+	 * We need to set the startsect first, the driver may
+	 * want to override it.
+	 */
+	geo.start = get_start_sect(bdev);
+	ret = disk->fops->getgeo(bdev, &geo);
+	if (ret)
+		return ret;
+
+	ret = copy_to_user(ugeo, &geo, 4);
+	ret |= put_user(geo.start, &ugeo->start);
+	if (ret)
+		ret = -EFAULT;
+
+	return ret;
+}
+#endif
+
 /* set the logical block size */
 static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
 		int __user *argp)
@@ -508,13 +584,13 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
 }
 
 /*
- * always keep this in sync with compat_blkdev_ioctl()
+ * Common commands that are handled the same way on native and compat
+ * user space. Note the separate arg/argp parameters that are needed
+ * to deal with the compat_ptr() conversion.
  */
-int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
-			unsigned long arg)
+static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
+				unsigned cmd, unsigned long arg, void __user *argp)
 {
-	void __user *argp = (void __user *)arg;
-	loff_t size;
 	unsigned int max_sectors;
 
 	switch (cmd) {
@@ -532,62 +608,44 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	case BLKREPORTZONE:
 		return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
 	case BLKRESETZONE:
-		return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
+	case BLKOPENZONE:
+	case BLKCLOSEZONE:
+	case BLKFINISHZONE:
+		return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
 	case BLKGETZONESZ:
-		return put_uint(arg, bdev_zone_sectors(bdev));
+		return put_uint(argp, bdev_zone_sectors(bdev));
 	case BLKGETNRZONES:
-		return put_uint(arg, blkdev_nr_zones(bdev));
-	case HDIO_GETGEO:
-		return blkdev_getgeo(bdev, argp);
-	case BLKRAGET:
-	case BLKFRAGET:
-		if (!arg)
-			return -EINVAL;
-		return put_long(arg, (bdev->bd_bdi->ra_pages*PAGE_SIZE) / 512);
+		return put_uint(argp, blkdev_nr_zones(bdev->bd_disk));
 	case BLKROGET:
-		return put_int(arg, bdev_read_only(bdev) != 0);
-	case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
-		return put_int(arg, block_size(bdev));
+		return put_int(argp, bdev_read_only(bdev) != 0);
 	case BLKSSZGET: /* get block device logical block size */
-		return put_int(arg, bdev_logical_block_size(bdev));
+		return put_int(argp, bdev_logical_block_size(bdev));
 	case BLKPBSZGET: /* get block device physical block size */
-		return put_uint(arg, bdev_physical_block_size(bdev));
+		return put_uint(argp, bdev_physical_block_size(bdev));
 	case BLKIOMIN:
-		return put_uint(arg, bdev_io_min(bdev));
+		return put_uint(argp, bdev_io_min(bdev));
 	case BLKIOOPT:
-		return put_uint(arg, bdev_io_opt(bdev));
+		return put_uint(argp, bdev_io_opt(bdev));
 	case BLKALIGNOFF:
-		return put_int(arg, bdev_alignment_offset(bdev));
+		return put_int(argp, bdev_alignment_offset(bdev));
 	case BLKDISCARDZEROES:
-		return put_uint(arg, 0);
+		return put_uint(argp, 0);
 	case BLKSECTGET:
 		max_sectors = min_t(unsigned int, USHRT_MAX,
 				    queue_max_sectors(bdev_get_queue(bdev)));
-		return put_ushort(arg, max_sectors);
+		return put_ushort(argp, max_sectors);
 	case BLKROTATIONAL:
-		return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev)));
+		return put_ushort(argp, !blk_queue_nonrot(bdev_get_queue(bdev)));
 	case BLKRASET:
 	case BLKFRASET:
 		if(!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 		bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
 		return 0;
-	case BLKBSZSET:
-		return blkdev_bszset(bdev, mode, argp);
-	case BLKPG:
-		return blkpg_ioctl(bdev, argp);
 	case BLKRRPART:
 		return blkdev_reread_part(bdev);
-	case BLKGETSIZE:
-		size = i_size_read(bdev->bd_inode);
-		if ((size >> 9) > ~0UL)
-			return -EFBIG;
-		return put_ulong(arg, size >> 9);
-	case BLKGETSIZE64:
-		return put_u64(arg, i_size_read(bdev->bd_inode));
 	case BLKTRACESTART:
 	case BLKTRACESTOP:
-	case BLKTRACESETUP:
 	case BLKTRACETEARDOWN:
 		return blk_trace_ioctl(bdev, cmd, argp);
 	case IOC_PR_REGISTER:
@@ -603,7 +661,132 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	case IOC_PR_CLEAR:
 		return blkdev_pr_clear(bdev, argp);
 	default:
+		return -ENOIOCTLCMD;
+	}
+}
+
+/*
+ * Always keep this in sync with compat_blkdev_ioctl()
+ * to handle all incompatible commands in both functions.
+ *
+ * New commands must be compatible and go into blkdev_common_ioctl
+ */
+int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
+			unsigned long arg)
+{
+	int ret;
+	loff_t size;
+	void __user *argp = (void __user *)arg;
+
+	switch (cmd) {
+	/* These need separate implementations for the data structure */
+	case HDIO_GETGEO:
+		return blkdev_getgeo(bdev, argp);
+	case BLKPG:
+		return blkpg_ioctl(bdev, argp);
+
+	/* Compat mode returns 32-bit data instead of 'long' */
+	case BLKRAGET:
+	case BLKFRAGET:
+		if (!argp)
+			return -EINVAL;
+		return put_long(argp, (bdev->bd_bdi->ra_pages*PAGE_SIZE) / 512);
+	case BLKGETSIZE:
+		size = i_size_read(bdev->bd_inode);
+		if ((size >> 9) > ~0UL)
+			return -EFBIG;
+		return put_ulong(argp, size >> 9);
+
+	/* The data is compatible, but the command number is different */
+	case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
+		return put_int(argp, block_size(bdev));
+	case BLKBSZSET:
+		return blkdev_bszset(bdev, mode, argp);
+	case BLKGETSIZE64:
+		return put_u64(argp, i_size_read(bdev->bd_inode));
+
+	/* Incompatible alignment on i386 */
+	case BLKTRACESETUP:
+		return blk_trace_ioctl(bdev, cmd, argp);
+	default:
+		break;
+	}
+
+	ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp);
+	if (ret == -ENOIOCTLCMD)
 		return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blkdev_ioctl); /* for /dev/raw */
+
+#ifdef CONFIG_COMPAT
+
+#define BLKBSZGET_32		_IOR(0x12, 112, int)
+#define BLKBSZSET_32		_IOW(0x12, 113, int)
+#define BLKGETSIZE64_32		_IOR(0x12, 114, int)
+
+/* Most of the generic ioctls are handled in the normal fallback path.
+   This assumes the blkdev's low level compat_ioctl always returns
+   ENOIOCTLCMD for unknown ioctls. */
+long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+	int ret;
+	void __user *argp = compat_ptr(arg);
+	struct inode *inode = file->f_mapping->host;
+	struct block_device *bdev = inode->i_bdev;
+	struct gendisk *disk = bdev->bd_disk;
+	fmode_t mode = file->f_mode;
+	loff_t size;
+
+	/*
+	 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
+	 * to updated it before every ioctl.
+	 */
+	if (file->f_flags & O_NDELAY)
+		mode |= FMODE_NDELAY;
+	else
+		mode &= ~FMODE_NDELAY;
+
+	switch (cmd) {
+	/* These need separate implementations for the data structure */
+	case HDIO_GETGEO:
+		return compat_hdio_getgeo(bdev, argp);
+	case BLKPG:
+		return compat_blkpg_ioctl(bdev, argp);
+
+	/* Compat mode returns 32-bit data instead of 'long' */
+	case BLKRAGET:
+	case BLKFRAGET:
+		if (!argp)
+			return -EINVAL;
+		return compat_put_long(argp,
+			       (bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512);
+	case BLKGETSIZE:
+		size = i_size_read(bdev->bd_inode);
+		if ((size >> 9) > ~0UL)
+			return -EFBIG;
+		return compat_put_ulong(argp, size >> 9);
+
+	/* The data is compatible, but the command number is different */
+	case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
+		return put_int(argp, bdev_logical_block_size(bdev));
+	case BLKBSZSET_32:
+		return blkdev_bszset(bdev, mode, argp);
+	case BLKGETSIZE64_32:
+		return put_u64(argp, i_size_read(bdev->bd_inode));
+
+	/* Incompatible alignment on i386 */
+	case BLKTRACESETUP32:
+		return blk_trace_ioctl(bdev, cmd, argp);
+	default:
+		break;
 	}
+
+	ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp);
+	if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl)
+		ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg);
+
+	return ret;
 }
-EXPORT_SYMBOL_GPL(blkdev_ioctl);
+#endif
diff --git a/block/opal_proto.h b/block/opal_proto.h
index 5532412d567c..325cbba2465f 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -76,7 +76,6 @@ enum opal_response_token {
  * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
  * Section: 6.3 Assigned UIDs
  */
-#define OPAL_UID_LENGTH 8
 #define OPAL_METHOD_LENGTH 8
 #define OPAL_MSID_KEYLEN 15
 #define OPAL_UID_LENGTH_HALF 4
@@ -108,6 +107,7 @@ enum opal_uid {
 	OPAL_C_PIN_TABLE,
 	OPAL_LOCKING_INFO_TABLE,
 	OPAL_ENTERPRISE_LOCKING_INFO_TABLE,
+	OPAL_DATASTORE,
 	/* C_PIN_TABLE object ID's */
 	OPAL_C_PIN_MSID,
 	OPAL_C_PIN_SID,
@@ -205,6 +205,10 @@ enum opal_lockingstate {
 	OPAL_LOCKING_LOCKED = 0x03,
 };
 
+enum opal_parameter {
+	OPAL_SUM_SET_LIST = 0x060000,
+};
+
 /* Packets derived from:
  * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
  * Secion: 3.2.3 ComPackets, Packets & Subpackets
diff --git a/block/partition-generic.c b/block/partition-generic.c
index aee643ce13d1..564fae77711d 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -127,7 +127,8 @@ ssize_t part_stat_show(struct device *dev,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
 		"%8u %8u %8u "
-		"%8lu %8lu %8llu %8u"
+		"%8lu %8lu %8llu %8u "
+		"%8lu %8u"
 		"\n",
 		part_stat_read(p, ios[STAT_READ]),
 		part_stat_read(p, merges[STAT_READ]),
@@ -143,7 +144,9 @@ ssize_t part_stat_show(struct device *dev,
 		part_stat_read(p, ios[STAT_DISCARD]),
 		part_stat_read(p, merges[STAT_DISCARD]),
 		(unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
-		(unsigned int)part_stat_read_msecs(p, STAT_DISCARD));
+		(unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
+		part_stat_read(p, ios[STAT_FLUSH]),
+		(unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
 }
 
 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
@@ -318,6 +321,24 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	const char *dname;
 	int err;
 
+	/*
+	 * Partitions are not supported on zoned block devices that are used as
+	 * such.
+	 */
+	switch (disk->queue->limits.zoned) {
+	case BLK_ZONED_HM:
+		pr_warn("%s: partitions not supported on host managed zoned block device\n",
+			disk->disk_name);
+		return ERR_PTR(-ENXIO);
+	case BLK_ZONED_HA:
+		pr_info("%s: disabling host aware zoned block device support due to partitions\n",
+			disk->disk_name);
+		disk->queue->limits.zoned = BLK_ZONED_NONE;
+		break;
+	case BLK_ZONED_NONE:
+		break;
+	}
+
 	err = disk_expand_part_tbl(disk, partno);
 	if (err)
 		return ERR_PTR(err);
@@ -439,12 +460,14 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
 	}
 }
 
-static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
+int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev)
 {
 	struct disk_part_iter piter;
 	struct hd_struct *part;
 	int res;
 
+	if (!disk_part_scan_enabled(disk))
+		return 0;
 	if (bdev->bd_part_count || bdev->bd_super)
 		return -EBUSY;
 	res = invalidate_partition(disk, 0);
@@ -459,204 +482,124 @@ static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
 	return 0;
 }
 
-static bool part_zone_aligned(struct gendisk *disk,
-			      struct block_device *bdev,
-			      sector_t from, sector_t size)
+static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
+		struct parsed_partitions *state, int p)
 {
-	unsigned int zone_sectors = bdev_zone_sectors(bdev);
+	sector_t size = state->parts[p].size;
+	sector_t from = state->parts[p].from;
+	struct hd_struct *part;
 
-	/*
-	 * If this function is called, then the disk is a zoned block device
-	 * (host-aware or host-managed). This can be detected even if the
-	 * zoned block device support is disabled (CONFIG_BLK_DEV_ZONED not
-	 * set). In this case, however, only host-aware devices will be seen
-	 * as a block device is not created for host-managed devices. Without
-	 * zoned block device support, host-aware drives can still be used as
-	 * regular block devices (no zone operation) and their zone size will
-	 * be reported as 0. Allow this case.
-	 */
-	if (!zone_sectors)
+	if (!size)
 		return true;
 
-	/*
-	 * Check partition start and size alignement. If the drive has a
-	 * smaller last runt zone, ignore it and allow the partition to
-	 * use it. Check the zone size too: it should be a power of 2 number
-	 * of sectors.
-	 */
-	if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) {
-		u32 rem;
-
-		div_u64_rem(from, zone_sectors, &rem);
-		if (rem)
+	if (from >= get_capacity(disk)) {
+		printk(KERN_WARNING
+		       "%s: p%d start %llu is beyond EOD, ",
+		       disk->disk_name, p, (unsigned long long) from);
+		if (disk_unlock_native_capacity(disk))
 			return false;
-		if ((from + size) < get_capacity(disk)) {
-			div_u64_rem(size, zone_sectors, &rem);
-			if (rem)
-				return false;
-		}
+		return true;
+	}
 
-	} else {
+	if (from + size > get_capacity(disk)) {
+		printk(KERN_WARNING
+		       "%s: p%d size %llu extends beyond EOD, ",
+		       disk->disk_name, p, (unsigned long long) size);
 
-		if (from & (zone_sectors - 1))
-			return false;
-		if ((from + size) < get_capacity(disk) &&
-		    (size & (zone_sectors - 1)))
+		if (disk_unlock_native_capacity(disk))
 			return false;
 
+		/*
+		 * We can not ignore partitions of broken tables created by for
+		 * example camera firmware, but we limit them to the end of the
+		 * disk to avoid creating invalid block devices.
+		 */
+		size = get_capacity(disk) - from;
 	}
 
+	part = add_partition(disk, p, from, size, state->parts[p].flags,
+			     &state->parts[p].info);
+	if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) {
+		printk(KERN_ERR " %s: p%d could not be added: %ld\n",
+		       disk->disk_name, p, -PTR_ERR(part));
+		return true;
+	}
+
+#ifdef CONFIG_BLK_DEV_MD
+	if (state->parts[p].flags & ADDPART_FLAG_RAID)
+		md_autodetect_dev(part_to_dev(part)->devt);
+#endif
 	return true;
 }
 
-int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
 {
-	struct parsed_partitions *state = NULL;
-	struct hd_struct *part;
-	int p, highest, res;
-rescan:
-	if (state && !IS_ERR(state)) {
-		free_partitions(state);
-		state = NULL;
-	}
+	struct parsed_partitions *state;
+	int ret = -EAGAIN, p, highest;
 
-	res = drop_partitions(disk, bdev);
-	if (res)
-		return res;
+	if (!disk_part_scan_enabled(disk))
+		return 0;
 
-	if (disk->fops->revalidate_disk)
-		disk->fops->revalidate_disk(disk);
-	check_disk_size_change(disk, bdev, true);
-	bdev->bd_invalidated = 0;
-	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
+	state = check_partition(disk, bdev);
+	if (!state)
 		return 0;
 	if (IS_ERR(state)) {
 		/*
-		 * I/O error reading the partition table.  If any
-		 * partition code tried to read beyond EOD, retry
-		 * after unlocking native capacity.
+		 * I/O error reading the partition table.  If we tried to read
+		 * beyond EOD, retry after unlocking the native capacity.
 		 */
 		if (PTR_ERR(state) == -ENOSPC) {
 			printk(KERN_WARNING "%s: partition table beyond EOD, ",
 			       disk->disk_name);
 			if (disk_unlock_native_capacity(disk))
-				goto rescan;
+				return -EAGAIN;
 		}
 		return -EIO;
 	}
+
 	/*
-	 * If any partition code tried to read beyond EOD, try
-	 * unlocking native capacity even if partition table is
-	 * successfully read as we could be missing some partitions.
+	 * Partitions are not supported on host managed zoned block devices.
+	 */
+	if (disk->queue->limits.zoned == BLK_ZONED_HM) {
+		pr_warn("%s: ignoring partition table on host managed zoned block device\n",
+			disk->disk_name);
+		ret = 0;
+		goto out_free_state;
+	}
+
+	/*
+	 * If we read beyond EOD, try unlocking native capacity even if the
+	 * partition table was successfully read as we could be missing some
+	 * partitions.
 	 */
 	if (state->access_beyond_eod) {
 		printk(KERN_WARNING
 		       "%s: partition table partially beyond EOD, ",
 		       disk->disk_name);
 		if (disk_unlock_native_capacity(disk))
-			goto rescan;
+			goto out_free_state;
 	}
 
 	/* tell userspace that the media / partition table may have changed */
 	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
 
-	/* Detect the highest partition number and preallocate
-	 * disk->part_tbl.  This is an optimization and not strictly
-	 * necessary.
+	/*
+	 * Detect the highest partition number and preallocate disk->part_tbl.
+	 * This is an optimization and not strictly necessary.
 	 */
 	for (p = 1, highest = 0; p < state->limit; p++)
 		if (state->parts[p].size)
 			highest = p;
-
 	disk_expand_part_tbl(disk, highest);
 
-	/* add partitions */
-	for (p = 1; p < state->limit; p++) {
-		sector_t size, from;
-
-		size = state->parts[p].size;
-		if (!size)
-			continue;
-
-		from = state->parts[p].from;
-		if (from >= get_capacity(disk)) {
-			printk(KERN_WARNING
-			       "%s: p%d start %llu is beyond EOD, ",
-			       disk->disk_name, p, (unsigned long long) from);
-			if (disk_unlock_native_capacity(disk))
-				goto rescan;
-			continue;
-		}
+	for (p = 1; p < state->limit; p++)
+		if (!blk_add_partition(disk, bdev, state, p))
+			goto out_free_state;
 
-		if (from + size > get_capacity(disk)) {
-			printk(KERN_WARNING
-			       "%s: p%d size %llu extends beyond EOD, ",
-			       disk->disk_name, p, (unsigned long long) size);
-
-			if (disk_unlock_native_capacity(disk)) {
-				/* free state and restart */
-				goto rescan;
-			} else {
-				/*
-				 * we can not ignore partitions of broken tables
-				 * created by for example camera firmware, but
-				 * we limit them to the end of the disk to avoid
-				 * creating invalid block devices
-				 */
-				size = get_capacity(disk) - from;
-			}
-		}
-
-		/*
-		 * On a zoned block device, partitions should be aligned on the
-		 * device zone size (i.e. zone boundary crossing not allowed).
-		 * Otherwise, resetting the write pointer of the last zone of
-		 * one partition may impact the following partition.
-		 */
-		if (bdev_is_zoned(bdev) &&
-		    !part_zone_aligned(disk, bdev, from, size)) {
-			printk(KERN_WARNING
-			       "%s: p%d start %llu+%llu is not zone aligned\n",
-			       disk->disk_name, p, (unsigned long long) from,
-			       (unsigned long long) size);
-			continue;
-		}
-
-		part = add_partition(disk, p, from, size,
-				     state->parts[p].flags,
-				     &state->parts[p].info);
-		if (IS_ERR(part)) {
-			printk(KERN_ERR " %s: p%d could not be added: %ld\n",
-			       disk->disk_name, p, -PTR_ERR(part));
-			continue;
-		}
-#ifdef CONFIG_BLK_DEV_MD
-		if (state->parts[p].flags & ADDPART_FLAG_RAID)
-			md_autodetect_dev(part_to_dev(part)->devt);
-#endif
-	}
+	ret = 0;
+out_free_state:
 	free_partitions(state);
-	return 0;
-}
-
-int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
-{
-	int res;
-
-	if (!bdev->bd_invalidated)
-		return 0;
-
-	res = drop_partitions(disk, bdev);
-	if (res)
-		return res;
-
-	set_capacity(disk, 0);
-	check_disk_size_change(disk, bdev, false);
-	bdev->bd_invalidated = 0;
-	/* tell userspace that the media / partition table may have changed */
-	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
-
-	return 0;
+	return ret;
 }
 
 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index fe5d970e2e60..a2d97ee1908c 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -1233,7 +1233,7 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
 	BUG_ON (!data || !frags);
 
 	if (size < 2 * VBLK_SIZE_HEAD) {
-		ldm_error("Value of size is to small.");
+		ldm_error("Value of size is too small.");
 		return false;
 	}
 
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index f5e0ad65e86a..b4e73d5dd5c2 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -2,6 +2,7 @@
 /*
  * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
  */
+#include <linux/compat.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
@@ -19,6 +20,7 @@
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
 #include <scsi/scsi_cmnd.h>
+#include <scsi/sg.h>
 
 struct blk_cmd_filter {
 	unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
@@ -327,7 +329,14 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 		struct iov_iter i;
 		struct iovec *iov = NULL;
 
-		ret = import_iovec(rq_data_dir(rq),
+#ifdef CONFIG_COMPAT
+		if (in_compat_syscall())
+			ret = compat_import_iovec(rq_data_dir(rq),
+				   hdr->dxferp, hdr->iovec_count,
+				   0, &iov, &i);
+		else
+#endif
+			ret = import_iovec(rq_data_dir(rq),
 				   hdr->dxferp, hdr->iovec_count,
 				   0, &iov, &i);
 		if (ret < 0)
@@ -542,6 +551,224 @@ static inline int blk_send_start_stop(struct request_queue *q,
 	return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data);
 }
 
+int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp)
+{
+#ifdef CONFIG_COMPAT
+	if (in_compat_syscall()) {
+		struct compat_sg_io_hdr hdr32 =  {
+			.interface_id	 = hdr->interface_id,
+			.dxfer_direction = hdr->dxfer_direction,
+			.cmd_len	 = hdr->cmd_len,
+			.mx_sb_len	 = hdr->mx_sb_len,
+			.iovec_count	 = hdr->iovec_count,
+			.dxfer_len	 = hdr->dxfer_len,
+			.dxferp		 = (uintptr_t)hdr->dxferp,
+			.cmdp		 = (uintptr_t)hdr->cmdp,
+			.sbp		 = (uintptr_t)hdr->sbp,
+			.timeout	 = hdr->timeout,
+			.flags		 = hdr->flags,
+			.pack_id	 = hdr->pack_id,
+			.usr_ptr	 = (uintptr_t)hdr->usr_ptr,
+			.status		 = hdr->status,
+			.masked_status	 = hdr->masked_status,
+			.msg_status	 = hdr->msg_status,
+			.sb_len_wr	 = hdr->sb_len_wr,
+			.host_status	 = hdr->host_status,
+			.driver_status	 = hdr->driver_status,
+			.resid		 = hdr->resid,
+			.duration	 = hdr->duration,
+			.info		 = hdr->info,
+		};
+
+		if (copy_to_user(argp, &hdr32, sizeof(hdr32)))
+			return -EFAULT;
+
+		return 0;
+	}
+#endif
+
+	if (copy_to_user(argp, hdr, sizeof(*hdr)))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL(put_sg_io_hdr);
+
+int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp)
+{
+#ifdef CONFIG_COMPAT
+	struct compat_sg_io_hdr hdr32;
+
+	if (in_compat_syscall()) {
+		if (copy_from_user(&hdr32, argp, sizeof(hdr32)))
+			return -EFAULT;
+
+		*hdr = (struct sg_io_hdr) {
+			.interface_id	 = hdr32.interface_id,
+			.dxfer_direction = hdr32.dxfer_direction,
+			.cmd_len	 = hdr32.cmd_len,
+			.mx_sb_len	 = hdr32.mx_sb_len,
+			.iovec_count	 = hdr32.iovec_count,
+			.dxfer_len	 = hdr32.dxfer_len,
+			.dxferp		 = compat_ptr(hdr32.dxferp),
+			.cmdp		 = compat_ptr(hdr32.cmdp),
+			.sbp		 = compat_ptr(hdr32.sbp),
+			.timeout	 = hdr32.timeout,
+			.flags		 = hdr32.flags,
+			.pack_id	 = hdr32.pack_id,
+			.usr_ptr	 = compat_ptr(hdr32.usr_ptr),
+			.status		 = hdr32.status,
+			.masked_status	 = hdr32.masked_status,
+			.msg_status	 = hdr32.msg_status,
+			.sb_len_wr	 = hdr32.sb_len_wr,
+			.host_status	 = hdr32.host_status,
+			.driver_status	 = hdr32.driver_status,
+			.resid		 = hdr32.resid,
+			.duration	 = hdr32.duration,
+			.info		 = hdr32.info,
+		};
+
+		return 0;
+	}
+#endif
+
+	if (copy_from_user(hdr, argp, sizeof(*hdr)))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL(get_sg_io_hdr);
+
+#ifdef CONFIG_COMPAT
+struct compat_cdrom_generic_command {
+	unsigned char	cmd[CDROM_PACKET_SIZE];
+	compat_caddr_t	buffer;
+	compat_uint_t	buflen;
+	compat_int_t	stat;
+	compat_caddr_t	sense;
+	unsigned char	data_direction;
+	compat_int_t	quiet;
+	compat_int_t	timeout;
+	compat_caddr_t	reserved[1];
+};
+#endif
+
+static int scsi_get_cdrom_generic_arg(struct cdrom_generic_command *cgc,
+				      const void __user *arg)
+{
+#ifdef CONFIG_COMPAT
+	if (in_compat_syscall()) {
+		struct compat_cdrom_generic_command cgc32;
+
+		if (copy_from_user(&cgc32, arg, sizeof(cgc32)))
+			return -EFAULT;
+
+		*cgc = (struct cdrom_generic_command) {
+			.buffer		= compat_ptr(cgc32.buffer),
+			.buflen		= cgc32.buflen,
+			.stat		= cgc32.stat,
+			.sense		= compat_ptr(cgc32.sense),
+			.data_direction	= cgc32.data_direction,
+			.quiet		= cgc32.quiet,
+			.timeout	= cgc32.timeout,
+			.reserved[0]	= compat_ptr(cgc32.reserved[0]),
+		};
+		memcpy(&cgc->cmd, &cgc32.cmd, CDROM_PACKET_SIZE);
+		return 0;
+	}
+#endif
+	if (copy_from_user(cgc, arg, sizeof(*cgc)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int scsi_put_cdrom_generic_arg(const struct cdrom_generic_command *cgc,
+				      void __user *arg)
+{
+#ifdef CONFIG_COMPAT
+	if (in_compat_syscall()) {
+		struct compat_cdrom_generic_command cgc32 = {
+			.buffer		= (uintptr_t)(cgc->buffer),
+			.buflen		= cgc->buflen,
+			.stat		= cgc->stat,
+			.sense		= (uintptr_t)(cgc->sense),
+			.data_direction	= cgc->data_direction,
+			.quiet		= cgc->quiet,
+			.timeout	= cgc->timeout,
+			.reserved[0]	= (uintptr_t)(cgc->reserved[0]),
+		};
+		memcpy(&cgc32.cmd, &cgc->cmd, CDROM_PACKET_SIZE);
+
+		if (copy_to_user(arg, &cgc32, sizeof(cgc32)))
+			return -EFAULT;
+
+		return 0;
+	}
+#endif
+	if (copy_to_user(arg, cgc, sizeof(*cgc)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int scsi_cdrom_send_packet(struct request_queue *q,
+				  struct gendisk *bd_disk,
+				  fmode_t mode, void __user *arg)
+{
+	struct cdrom_generic_command cgc;
+	struct sg_io_hdr hdr;
+	int err;
+
+	err = scsi_get_cdrom_generic_arg(&cgc, arg);
+	if (err)
+		return err;
+
+	cgc.timeout = clock_t_to_jiffies(cgc.timeout);
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.interface_id = 'S';
+	hdr.cmd_len = sizeof(cgc.cmd);
+	hdr.dxfer_len = cgc.buflen;
+	switch (cgc.data_direction) {
+		case CGC_DATA_UNKNOWN:
+			hdr.dxfer_direction = SG_DXFER_UNKNOWN;
+			break;
+		case CGC_DATA_WRITE:
+			hdr.dxfer_direction = SG_DXFER_TO_DEV;
+			break;
+		case CGC_DATA_READ:
+			hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+			break;
+		case CGC_DATA_NONE:
+			hdr.dxfer_direction = SG_DXFER_NONE;
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	hdr.dxferp = cgc.buffer;
+	hdr.sbp = cgc.sense;
+	if (hdr.sbp)
+		hdr.mx_sb_len = sizeof(struct request_sense);
+	hdr.timeout = jiffies_to_msecs(cgc.timeout);
+	hdr.cmdp = ((struct cdrom_generic_command __user*) arg)->cmd;
+	hdr.cmd_len = sizeof(cgc.cmd);
+
+	err = sg_io(q, bd_disk, &hdr, mode);
+	if (err == -EFAULT)
+		return -EFAULT;
+
+	if (hdr.status)
+		return -EIO;
+
+	cgc.stat = err;
+	cgc.buflen = hdr.resid;
+	if (scsi_put_cdrom_generic_arg(&cgc, arg))
+		return -EFAULT;
+
+	return err;
+}
+
 int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mode,
 		   unsigned int cmd, void __user *arg)
 {
@@ -581,71 +808,20 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
 		case SG_IO: {
 			struct sg_io_hdr hdr;
 
-			err = -EFAULT;
-			if (copy_from_user(&hdr, arg, sizeof(hdr)))
-				break;
-			err = sg_io(q, bd_disk, &hdr, mode);
-			if (err == -EFAULT)
-				break;
-
-			if (copy_to_user(arg, &hdr, sizeof(hdr)))
-				err = -EFAULT;
-			break;
-		}
-		case CDROM_SEND_PACKET: {
-			struct cdrom_generic_command cgc;
-			struct sg_io_hdr hdr;
-
-			err = -EFAULT;
-			if (copy_from_user(&cgc, arg, sizeof(cgc)))
-				break;
-			cgc.timeout = clock_t_to_jiffies(cgc.timeout);
-			memset(&hdr, 0, sizeof(hdr));
-			hdr.interface_id = 'S';
-			hdr.cmd_len = sizeof(cgc.cmd);
-			hdr.dxfer_len = cgc.buflen;
-			err = 0;
-			switch (cgc.data_direction) {
-				case CGC_DATA_UNKNOWN:
-					hdr.dxfer_direction = SG_DXFER_UNKNOWN;
-					break;
-				case CGC_DATA_WRITE:
-					hdr.dxfer_direction = SG_DXFER_TO_DEV;
-					break;
-				case CGC_DATA_READ:
-					hdr.dxfer_direction = SG_DXFER_FROM_DEV;
-					break;
-				case CGC_DATA_NONE:
-					hdr.dxfer_direction = SG_DXFER_NONE;
-					break;
-				default:
-					err = -EINVAL;
-			}
+			err = get_sg_io_hdr(&hdr, arg);
 			if (err)
 				break;
-
-			hdr.dxferp = cgc.buffer;
-			hdr.sbp = cgc.sense;
-			if (hdr.sbp)
-				hdr.mx_sb_len = sizeof(struct request_sense);
-			hdr.timeout = jiffies_to_msecs(cgc.timeout);
-			hdr.cmdp = ((struct cdrom_generic_command __user*) arg)->cmd;
-			hdr.cmd_len = sizeof(cgc.cmd);
-
 			err = sg_io(q, bd_disk, &hdr, mode);
 			if (err == -EFAULT)
 				break;
 
-			if (hdr.status)
-				err = -EIO;
-
-			cgc.stat = err;
-			cgc.buflen = hdr.resid;
-			if (copy_to_user(arg, &cgc, sizeof(cgc)))
+			if (put_sg_io_hdr(&hdr, arg))
 				err = -EFAULT;
-
 			break;
 		}
+		case CDROM_SEND_PACKET:
+			err = scsi_cdrom_send_packet(q, bd_disk, mode, arg);
+			break;
 
 		/*
 		 * old junk scsi send command ioctl
diff --git a/block/sed-opal.c b/block/sed-opal.c
index b4c761973ac1..880cc57a5f6b 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -149,6 +149,8 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
 		{ 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x01 },
 	[OPAL_ENTERPRISE_LOCKING_INFO_TABLE] =
 		{ 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00 },
+	[OPAL_DATASTORE] =
+		{ 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00 },
 
 	/* C_PIN_TABLE object ID's */
 	[OPAL_C_PIN_MSID] =
@@ -1139,11 +1141,11 @@ static int generic_get_column(struct opal_dev *dev, const u8 *table,
  *
  * the result is provided in dev->resp->tok[4]
  */
-static int generic_get_table_info(struct opal_dev *dev, enum opal_uid table,
+static int generic_get_table_info(struct opal_dev *dev, const u8 *table_uid,
 				  u64 column)
 {
 	u8 uid[OPAL_UID_LENGTH];
-	const unsigned int half = OPAL_UID_LENGTH/2;
+	const unsigned int half = OPAL_UID_LENGTH_HALF;
 
 	/* sed-opal UIDs can be split in two halves:
 	 *  first:  actual table index
@@ -1152,7 +1154,7 @@ static int generic_get_table_info(struct opal_dev *dev, enum opal_uid table,
 	 * first part of the target table as relative index into that table
 	 */
 	memcpy(uid, opaluid[OPAL_TABLE_TABLE], half);
-	memcpy(uid+half, opaluid[table], half);
+	memcpy(uid + half, table_uid, half);
 
 	return generic_get_column(dev, uid, column);
 }
@@ -1221,6 +1223,75 @@ static int get_active_key(struct opal_dev *dev, void *data)
 	return get_active_key_cont(dev);
 }
 
+static int generic_table_write_data(struct opal_dev *dev, const u64 data,
+				    u64 offset, u64 size, const u8 *uid)
+{
+	const u8 __user *src = (u8 __user *)(uintptr_t)data;
+	u8 *dst;
+	u64 len;
+	size_t off = 0;
+	int err;
+
+	/* do we fit in the available space? */
+	err = generic_get_table_info(dev, uid, OPAL_TABLE_ROWS);
+	if (err) {
+		pr_debug("Couldn't get the table size\n");
+		return err;
+	}
+
+	len = response_get_u64(&dev->parsed, 4);
+	if (size > len || offset > len - size) {
+		pr_debug("Does not fit in the table (%llu vs. %llu)\n",
+			  offset + size, len);
+		return -ENOSPC;
+	}
+
+	/* do the actual transmission(s) */
+	while (off < size) {
+		err = cmd_start(dev, uid, opalmethod[OPAL_SET]);
+		add_token_u8(&err, dev, OPAL_STARTNAME);
+		add_token_u8(&err, dev, OPAL_WHERE);
+		add_token_u64(&err, dev, offset + off);
+		add_token_u8(&err, dev, OPAL_ENDNAME);
+
+		add_token_u8(&err, dev, OPAL_STARTNAME);
+		add_token_u8(&err, dev, OPAL_VALUES);
+
+		/*
+		 * The bytestring header is either 1 or 2 bytes, so assume 2.
+		 * There also needs to be enough space to accommodate the
+		 * trailing OPAL_ENDNAME (1 byte) and tokens added by
+		 * cmd_finalize.
+		 */
+		len = min(remaining_size(dev) - (2+1+CMD_FINALIZE_BYTES_NEEDED),
+			  (size_t)(size - off));
+		pr_debug("Write bytes %zu+%llu/%llu\n", off, len, size);
+
+		dst = add_bytestring_header(&err, dev, len);
+		if (!dst)
+			break;
+
+		if (copy_from_user(dst, src + off, len)) {
+			err = -EFAULT;
+			break;
+		}
+
+		dev->pos += len;
+
+		add_token_u8(&err, dev, OPAL_ENDNAME);
+		if (err)
+			break;
+
+		err = finalize_and_send(dev, parse_and_check_status);
+		if (err)
+			break;
+
+		off += len;
+	}
+
+	return err;
+}
+
 static int generic_lr_enable_disable(struct opal_dev *dev,
 				     u8 *uid, bool rle, bool wle,
 				     bool rl, bool wl)
@@ -1583,68 +1654,9 @@ static int set_mbr_enable_disable(struct opal_dev *dev, void *data)
 static int write_shadow_mbr(struct opal_dev *dev, void *data)
 {
 	struct opal_shadow_mbr *shadow = data;
-	const u8 __user *src;
-	u8 *dst;
-	size_t off = 0;
-	u64 len;
-	int err = 0;
-
-	/* do we fit in the available shadow mbr space? */
-	err = generic_get_table_info(dev, OPAL_MBR, OPAL_TABLE_ROWS);
-	if (err) {
-		pr_debug("MBR: could not get shadow size\n");
-		return err;
-	}
-
-	len = response_get_u64(&dev->parsed, 4);
-	if (shadow->size > len || shadow->offset > len - shadow->size) {
-		pr_debug("MBR: does not fit in shadow (%llu vs. %llu)\n",
-			 shadow->offset + shadow->size, len);
-		return -ENOSPC;
-	}
-
-	/* do the actual transmission(s) */
-	src = (u8 __user *)(uintptr_t)shadow->data;
-	while (off < shadow->size) {
-		err = cmd_start(dev, opaluid[OPAL_MBR], opalmethod[OPAL_SET]);
-		add_token_u8(&err, dev, OPAL_STARTNAME);
-		add_token_u8(&err, dev, OPAL_WHERE);
-		add_token_u64(&err, dev, shadow->offset + off);
-		add_token_u8(&err, dev, OPAL_ENDNAME);
-
-		add_token_u8(&err, dev, OPAL_STARTNAME);
-		add_token_u8(&err, dev, OPAL_VALUES);
-
-		/*
-		 * The bytestring header is either 1 or 2 bytes, so assume 2.
-		 * There also needs to be enough space to accommodate the
-		 * trailing OPAL_ENDNAME (1 byte) and tokens added by
-		 * cmd_finalize.
-		 */
-		len = min(remaining_size(dev) - (2+1+CMD_FINALIZE_BYTES_NEEDED),
-			  (size_t)(shadow->size - off));
-		pr_debug("MBR: write bytes %zu+%llu/%llu\n",
-			 off, len, shadow->size);
-
-		dst = add_bytestring_header(&err, dev, len);
-		if (!dst)
-			break;
-		if (copy_from_user(dst, src + off, len))
-			err = -EFAULT;
-		dev->pos += len;
-
-		add_token_u8(&err, dev, OPAL_ENDNAME);
-		if (err)
-			break;
-
-		err = finalize_and_send(dev, parse_and_check_status);
-		if (err)
-			break;
-
-		off += len;
-	}
 
-	return err;
+	return generic_table_write_data(dev, shadow->data, shadow->offset,
+					shadow->size, opaluid[OPAL_MBR]);
 }
 
 static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid,
@@ -1874,7 +1886,6 @@ static int activate_lsp(struct opal_dev *dev, void *data)
 {
 	struct opal_lr_act *opal_act = data;
 	u8 user_lr[OPAL_UID_LENGTH];
-	u8 uint_3 = 0x83;
 	int err, i;
 
 	err = cmd_start(dev, opaluid[OPAL_LOCKINGSP_UID],
@@ -1887,10 +1898,7 @@ static int activate_lsp(struct opal_dev *dev, void *data)
 			return err;
 
 		add_token_u8(&err, dev, OPAL_STARTNAME);
-		add_token_u8(&err, dev, uint_3);
-		add_token_u8(&err, dev, 6);
-		add_token_u8(&err, dev, 0);
-		add_token_u8(&err, dev, 0);
+		add_token_u64(&err, dev, OPAL_SUM_SET_LIST);
 
 		add_token_u8(&err, dev, OPAL_STARTLIST);
 		add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH);
@@ -1957,6 +1965,113 @@ static int get_msid_cpin_pin(struct opal_dev *dev, void *data)
 	return 0;
 }
 
+static int write_table_data(struct opal_dev *dev, void *data)
+{
+	struct opal_read_write_table *write_tbl = data;
+
+	return generic_table_write_data(dev, write_tbl->data, write_tbl->offset,
+					write_tbl->size, write_tbl->table_uid);
+}
+
+static int read_table_data_cont(struct opal_dev *dev)
+{
+	int err;
+	const char *data_read;
+
+	err = parse_and_check_status(dev);
+	if (err)
+		return err;
+
+	dev->prev_d_len = response_get_string(&dev->parsed, 1, &data_read);
+	dev->prev_data = (void *)data_read;
+	if (!dev->prev_data) {
+		pr_debug("%s: Couldn't read data from the table.\n", __func__);
+		return OPAL_INVAL_PARAM;
+	}
+
+	return 0;
+}
+
+/*
+ * IO_BUFFER_LENGTH = 2048
+ * sizeof(header) = 56
+ * No. of Token Bytes in the Response = 11
+ * MAX size of data that can be carried in response buffer
+ * at a time is : 2048 - (56 + 11) = 1981 = 0x7BD.
+ */
+#define OPAL_MAX_READ_TABLE (0x7BD)
+
+static int read_table_data(struct opal_dev *dev, void *data)
+{
+	struct opal_read_write_table *read_tbl = data;
+	int err;
+	size_t off = 0, max_read_size = OPAL_MAX_READ_TABLE;
+	u64 table_len, len;
+	u64 offset = read_tbl->offset, read_size = read_tbl->size - 1;
+	u8 __user *dst;
+
+	err = generic_get_table_info(dev, read_tbl->table_uid, OPAL_TABLE_ROWS);
+	if (err) {
+		pr_debug("Couldn't get the table size\n");
+		return err;
+	}
+
+	table_len = response_get_u64(&dev->parsed, 4);
+
+	/* Check if the user is trying to read from the table limits */
+	if (read_size > table_len || offset > table_len - read_size) {
+		pr_debug("Read size exceeds the Table size limits (%llu vs. %llu)\n",
+			  offset + read_size, table_len);
+		return -EINVAL;
+	}
+
+	while (off < read_size) {
+		err = cmd_start(dev, read_tbl->table_uid, opalmethod[OPAL_GET]);
+
+		add_token_u8(&err, dev, OPAL_STARTLIST);
+		add_token_u8(&err, dev, OPAL_STARTNAME);
+		add_token_u8(&err, dev, OPAL_STARTROW);
+		add_token_u64(&err, dev, offset + off); /* start row value */
+		add_token_u8(&err, dev, OPAL_ENDNAME);
+
+		add_token_u8(&err, dev, OPAL_STARTNAME);
+		add_token_u8(&err, dev, OPAL_ENDROW);
+
+		len = min(max_read_size, (size_t)(read_size - off));
+		add_token_u64(&err, dev, offset + off + len); /* end row value
+							       */
+		add_token_u8(&err, dev, OPAL_ENDNAME);
+		add_token_u8(&err, dev, OPAL_ENDLIST);
+
+		if (err) {
+			pr_debug("Error building read table data command.\n");
+			break;
+		}
+
+		err = finalize_and_send(dev, read_table_data_cont);
+		if (err)
+			break;
+
+		/* len+1: This includes the NULL terminator at the end*/
+		if (dev->prev_d_len > len + 1) {
+			err = -EOVERFLOW;
+			break;
+		}
+
+		dst = (u8 __user *)(uintptr_t)read_tbl->data;
+		if (copy_to_user(dst + off, dev->prev_data, dev->prev_d_len)) {
+			pr_debug("Error copying data to userspace\n");
+			err = -EFAULT;
+			break;
+		}
+		dev->prev_data = NULL;
+
+		off += len;
+	}
+
+	return err;
+}
+
 static int end_opal_session(struct opal_dev *dev, void *data)
 {
 	int err = 0;
@@ -2443,6 +2558,68 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
 }
 EXPORT_SYMBOL(opal_unlock_from_suspend);
 
+static int opal_read_table(struct opal_dev *dev,
+			   struct opal_read_write_table *rw_tbl)
+{
+	const struct opal_step read_table_steps[] = {
+		{ start_admin1LSP_opal_session, &rw_tbl->key },
+		{ read_table_data, rw_tbl },
+		{ end_opal_session, }
+	};
+	int ret = 0;
+
+	if (!rw_tbl->size)
+		return ret;
+
+	return execute_steps(dev, read_table_steps,
+			     ARRAY_SIZE(read_table_steps));
+}
+
+static int opal_write_table(struct opal_dev *dev,
+			    struct opal_read_write_table *rw_tbl)
+{
+	const struct opal_step write_table_steps[] = {
+		{ start_admin1LSP_opal_session, &rw_tbl->key },
+		{ write_table_data, rw_tbl },
+		{ end_opal_session, }
+	};
+	int ret = 0;
+
+	if (!rw_tbl->size)
+		return ret;
+
+	return execute_steps(dev, write_table_steps,
+			     ARRAY_SIZE(write_table_steps));
+}
+
+static int opal_generic_read_write_table(struct opal_dev *dev,
+					 struct opal_read_write_table *rw_tbl)
+{
+	int ret, bit_set;
+
+	mutex_lock(&dev->dev_lock);
+	setup_opal_dev(dev);
+
+	bit_set = fls64(rw_tbl->flags) - 1;
+	switch (bit_set) {
+	case OPAL_READ_TABLE:
+		ret = opal_read_table(dev, rw_tbl);
+		break;
+	case OPAL_WRITE_TABLE:
+		ret = opal_write_table(dev, rw_tbl);
+		break;
+	default:
+		pr_debug("Invalid bit set in the flag (%016llx).\n",
+			 rw_tbl->flags);
+		ret = -EINVAL;
+		break;
+	}
+
+	mutex_unlock(&dev->dev_lock);
+
+	return ret;
+}
+
 int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
 {
 	void *p;
@@ -2505,6 +2682,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
 	case IOC_OPAL_PSID_REVERT_TPR:
 		ret = opal_reverttper(dev, p, true);
 		break;
+	case IOC_OPAL_GENERIC_TABLE_RW:
+		ret = opal_generic_read_write_table(dev, p);
+		break;
 	default:
 		break;
 	}
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 9803c7e0376e..d910534b3a41 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -7,6 +7,7 @@
 #include <linux/t10-pi.h>
 #include <linux/blkdev.h>
 #include <linux/crc-t10dif.h>
+#include <linux/module.h>
 #include <net/checksum.h>
 
 typedef __be16 (csum_fn) (void *, unsigned int);
@@ -235,16 +236,12 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 	return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
 }
 
-/**
- * Type 3 does not have a reference tag so no remapping is required.
- */
+/* Type 3 does not have a reference tag so no remapping is required. */
 static void t10_pi_type3_prepare(struct request *rq)
 {
 }
 
-/**
- * Type 3 does not have a reference tag so no remapping is required.
- */
+/* Type 3 does not have a reference tag so no remapping is required. */
 static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes)
 {
 }
@@ -284,3 +281,5 @@ const struct blk_integrity_profile t10_pi_type3_ip = {
 	.complete_fn		= t10_pi_type3_complete,
 };
 EXPORT_SYMBOL(t10_pi_type3_ip);
+
+MODULE_LICENSE("GPL");
diff --git a/certs/blacklist.c b/certs/blacklist.c
index ec00bf337eb6..6514f9ebc943 100644
--- a/certs/blacklist.c
+++ b/certs/blacklist.c
@@ -135,6 +135,15 @@ int is_hash_blacklisted(const u8 *hash, size_t hash_len, const char *type)
 }
 EXPORT_SYMBOL_GPL(is_hash_blacklisted);
 
+int is_binary_blacklisted(const u8 *hash, size_t hash_len)
+{
+	if (is_hash_blacklisted(hash, hash_len, "bin") == -EKEYREJECTED)
+		return -EPERM;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(is_binary_blacklisted);
+
 /*
  * Initialise the blacklist
  */
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 9e524044d312..cdb51d4272d0 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -52,12 +52,12 @@ config CRYPTO_AEAD2
 	select CRYPTO_NULL2
 	select CRYPTO_RNG2
 
-config CRYPTO_BLKCIPHER
+config CRYPTO_SKCIPHER
 	tristate
-	select CRYPTO_BLKCIPHER2
+	select CRYPTO_SKCIPHER2
 	select CRYPTO_ALGAPI
 
-config CRYPTO_BLKCIPHER2
+config CRYPTO_SKCIPHER2
 	tristate
 	select CRYPTO_ALGAPI2
 	select CRYPTO_RNG2
@@ -123,7 +123,7 @@ config CRYPTO_MANAGER2
 	def_tristate CRYPTO_MANAGER || (CRYPTO_MANAGER!=n && CRYPTO_ALGAPI=y)
 	select CRYPTO_AEAD2
 	select CRYPTO_HASH2
-	select CRYPTO_BLKCIPHER2
+	select CRYPTO_SKCIPHER2
 	select CRYPTO_AKCIPHER2
 	select CRYPTO_KPP2
 	select CRYPTO_ACOMP2
@@ -169,7 +169,7 @@ config CRYPTO_NULL
 config CRYPTO_NULL2
 	tristate
 	select CRYPTO_ALGAPI2
-	select CRYPTO_BLKCIPHER2
+	select CRYPTO_SKCIPHER2
 	select CRYPTO_HASH2
 
 config CRYPTO_PCRYPT
@@ -184,7 +184,7 @@ config CRYPTO_PCRYPT
 
 config CRYPTO_CRYPTD
 	tristate "Software async crypto daemon"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_HASH
 	select CRYPTO_MANAGER
 	help
@@ -195,7 +195,7 @@ config CRYPTO_CRYPTD
 config CRYPTO_AUTHENC
 	tristate "Authenc support"
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	select CRYPTO_HASH
 	select CRYPTO_NULL
@@ -217,7 +217,7 @@ config CRYPTO_SIMD
 config CRYPTO_GLUE_HELPER_X86
 	tristate
 	depends on X86
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 
 config CRYPTO_ENGINE
 	tristate
@@ -264,6 +264,17 @@ config CRYPTO_ECRDSA
 	  standard algorithms (called GOST algorithms). Only signature verification
 	  is implemented.
 
+config CRYPTO_CURVE25519
+	tristate "Curve25519 algorithm"
+	select CRYPTO_KPP
+	select CRYPTO_LIB_CURVE25519_GENERIC
+
+config CRYPTO_CURVE25519_X86
+	tristate "x86_64 accelerated Curve25519 scalar multiplication library"
+	depends on X86 && 64BIT
+	select CRYPTO_LIB_CURVE25519_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+
 comment "Authenticated Encryption with Associated Data"
 
 config CRYPTO_CCM
@@ -309,6 +320,7 @@ config CRYPTO_AEGIS128
 config CRYPTO_AEGIS128_SIMD
 	bool "Support SIMD acceleration for AEGIS-128"
 	depends on CRYPTO_AEGIS128 && ((ARM || ARM64) && KERNEL_MODE_NEON)
+	depends on !ARM || CC_IS_CLANG || GCC_VERSION >= 40800
 	default y
 
 config CRYPTO_AEGIS128_AESNI_SSE2
@@ -322,7 +334,7 @@ config CRYPTO_AEGIS128_AESNI_SSE2
 config CRYPTO_SEQIV
 	tristate "Sequence Number IV Generator"
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_NULL
 	select CRYPTO_RNG_DEFAULT
 	select CRYPTO_MANAGER
@@ -345,7 +357,7 @@ comment "Block modes"
 
 config CRYPTO_CBC
 	tristate "CBC support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  CBC: Cipher Block Chaining mode
@@ -353,7 +365,7 @@ config CRYPTO_CBC
 
 config CRYPTO_CFB
 	tristate "CFB support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  CFB: Cipher FeedBack mode
@@ -361,7 +373,7 @@ config CRYPTO_CFB
 
 config CRYPTO_CTR
 	tristate "CTR support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_SEQIV
 	select CRYPTO_MANAGER
 	help
@@ -370,7 +382,7 @@ config CRYPTO_CTR
 
 config CRYPTO_CTS
 	tristate "CTS support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  CTS: Cipher Text Stealing
@@ -385,7 +397,7 @@ config CRYPTO_CTS
 
 config CRYPTO_ECB
 	tristate "ECB support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  ECB: Electronic CodeBook mode
@@ -394,7 +406,7 @@ config CRYPTO_ECB
 
 config CRYPTO_LRW
 	tristate "LRW support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	select CRYPTO_GF128MUL
 	help
@@ -406,7 +418,7 @@ config CRYPTO_LRW
 
 config CRYPTO_OFB
 	tristate "OFB support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  OFB: the Output Feedback mode makes a block cipher into a synchronous
@@ -418,7 +430,7 @@ config CRYPTO_OFB
 
 config CRYPTO_PCBC
 	tristate "PCBC support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  PCBC: Propagating Cipher Block Chaining mode
@@ -426,7 +438,7 @@ config CRYPTO_PCBC
 
 config CRYPTO_XTS
 	tristate "XTS support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	select CRYPTO_ECB
 	help
@@ -436,7 +448,7 @@ config CRYPTO_XTS
 
 config CRYPTO_KEYWRAP
 	tristate "Key wrapping support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  Support for key wrapping (NIST SP800-38F / RFC3394) without
@@ -445,7 +457,7 @@ config CRYPTO_KEYWRAP
 config CRYPTO_NHPOLY1305
 	tristate
 	select CRYPTO_HASH
-	select CRYPTO_POLY1305
+	select CRYPTO_LIB_POLY1305_GENERIC
 
 config CRYPTO_NHPOLY1305_SSE2
 	tristate "NHPoly1305 hash function (x86_64 SSE2 implementation)"
@@ -466,7 +478,7 @@ config CRYPTO_NHPOLY1305_AVX2
 config CRYPTO_ADIANTUM
 	tristate "Adiantum support"
 	select CRYPTO_CHACHA20
-	select CRYPTO_POLY1305
+	select CRYPTO_LIB_POLY1305_GENERIC
 	select CRYPTO_NHPOLY1305
 	select CRYPTO_MANAGER
 	help
@@ -499,10 +511,10 @@ config CRYPTO_ESSIV
 	  encryption.
 
 	  This driver implements a crypto API template that can be
-	  instantiated either as a skcipher or as a aead (depending on the
+	  instantiated either as an skcipher or as an AEAD (depending on the
 	  type of the first template argument), and which defers encryption
 	  and decryption requests to the encapsulated cipher after applying
-	  ESSIV to the input IV. Note that in the aead case, it is assumed
+	  ESSIV to the input IV. Note that in the AEAD case, it is assumed
 	  that the keys are presented in the same format used by the authenc
 	  template, and that the IV appears at the end of the authenticated
 	  associated data (AAD) region (which is how dm-crypt uses it.)
@@ -638,6 +650,47 @@ config CRYPTO_XXHASH
 	  xxHash non-cryptographic hash algorithm. Extremely fast, working at
 	  speeds close to RAM limits.
 
+config CRYPTO_BLAKE2B
+	tristate "BLAKE2b digest algorithm"
+	select CRYPTO_HASH
+	help
+	  Implementation of cryptographic hash function BLAKE2b (or just BLAKE2),
+	  optimized for 64bit platforms and can produce digests of any size
+	  between 1 to 64.  The keyed hash is also implemented.
+
+	  This module provides the following algorithms:
+
+	  - blake2b-160
+	  - blake2b-256
+	  - blake2b-384
+	  - blake2b-512
+
+	  See https://blake2.net for further information.
+
+config CRYPTO_BLAKE2S
+	tristate "BLAKE2s digest algorithm"
+	select CRYPTO_LIB_BLAKE2S_GENERIC
+	select CRYPTO_HASH
+	help
+	  Implementation of cryptographic hash function BLAKE2s
+	  optimized for 8-32bit platforms and can produce digests of any size
+	  between 1 to 32.  The keyed hash is also implemented.
+
+	  This module provides the following algorithms:
+
+	  - blake2s-128
+	  - blake2s-160
+	  - blake2s-224
+	  - blake2s-256
+
+	  See https://blake2.net for further information.
+
+config CRYPTO_BLAKE2S_X86
+	tristate "BLAKE2s digest algorithm (x86 accelerated version)"
+	depends on X86 && 64BIT
+	select CRYPTO_LIB_BLAKE2S_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+
 config CRYPTO_CRCT10DIF
 	tristate "CRCT10DIF algorithm"
 	select CRYPTO_HASH
@@ -685,6 +738,7 @@ config CRYPTO_GHASH
 config CRYPTO_POLY1305
 	tristate "Poly1305 authenticator algorithm"
 	select CRYPTO_HASH
+	select CRYPTO_LIB_POLY1305_GENERIC
 	help
 	  Poly1305 authenticator algorithm, RFC7539.
 
@@ -695,7 +749,8 @@ config CRYPTO_POLY1305
 config CRYPTO_POLY1305_X86_64
 	tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
 	depends on X86 && 64BIT
-	select CRYPTO_POLY1305
+	select CRYPTO_LIB_POLY1305_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
 	help
 	  Poly1305 authenticator algorithm, RFC7539.
 
@@ -704,6 +759,11 @@ config CRYPTO_POLY1305_X86_64
 	  in IETF protocols. This is the x86_64 assembler implementation using SIMD
 	  instructions.
 
+config CRYPTO_POLY1305_MIPS
+	tristate "Poly1305 authenticator algorithm (MIPS optimized)"
+	depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
+
 config CRYPTO_MD4
 	tristate "MD4 digest algorithm"
 	select CRYPTO_HASH
@@ -877,9 +937,6 @@ config CRYPTO_SHA1_PPC_SPE
 	  SHA-1 secure hash standard (DFIPS 180-4) implemented
 	  using powerpc SPE SIMD instruction set.
 
-config CRYPTO_LIB_SHA256
-	tristate
-
 config CRYPTO_SHA256
 	tristate "SHA224 and SHA256 digest algorithm"
 	select CRYPTO_HASH
@@ -1018,9 +1075,6 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL
 
 comment "Ciphers"
 
-config CRYPTO_LIB_AES
-	tristate
-
 config CRYPTO_AES
 	tristate "AES cipher algorithms"
 	select CRYPTO_ALGAPI
@@ -1067,7 +1121,7 @@ config CRYPTO_AES_NI_INTEL
 	select CRYPTO_AEAD
 	select CRYPTO_LIB_AES
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86 if 64BIT
 	select CRYPTO_SIMD
 	help
@@ -1097,8 +1151,7 @@ config CRYPTO_AES_NI_INTEL
 config CRYPTO_AES_SPARC64
 	tristate "AES cipher algorithms (SPARC64)"
 	depends on SPARC64
-	select CRYPTO_CRYPTD
-	select CRYPTO_ALGAPI
+	select CRYPTO_SKCIPHER
 	help
 	  Use SPARC64 crypto opcodes for AES algorithm.
 
@@ -1125,6 +1178,7 @@ config CRYPTO_AES_SPARC64
 config CRYPTO_AES_PPC_SPE
 	tristate "AES cipher algorithms (PPC SPE)"
 	depends on PPC && SPE
+	select CRYPTO_SKCIPHER
 	help
 	  AES cipher algorithms (FIPS-197). Additionally the acceleration
 	  for popular block cipher modes ECB, CBC, CTR and XTS is supported.
@@ -1149,12 +1203,9 @@ config CRYPTO_ANUBIS
 	  <https://www.cosic.esat.kuleuven.be/nessie/reports/>
 	  <http://www.larc.usp.br/~pbarreto/AnubisPage.html>
 
-config CRYPTO_LIB_ARC4
-	tristate
-
 config CRYPTO_ARC4
 	tristate "ARC4 cipher algorithm"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_ARC4
 	help
 	  ARC4 cipher algorithm.
@@ -1190,7 +1241,7 @@ config CRYPTO_BLOWFISH_COMMON
 config CRYPTO_BLOWFISH_X86_64
 	tristate "Blowfish cipher algorithm (x86_64)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_BLOWFISH_COMMON
 	help
 	  Blowfish cipher algorithm (x86_64), by Bruce Schneier.
@@ -1221,7 +1272,7 @@ config CRYPTO_CAMELLIA_X86_64
 	tristate "Camellia cipher algorithm (x86_64)"
 	depends on X86 && 64BIT
 	depends on CRYPTO
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	help
 	  Camellia cipher algorithm module (x86_64).
@@ -1238,7 +1289,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
 	tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX)"
 	depends on X86 && 64BIT
 	depends on CRYPTO
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CAMELLIA_X86_64
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SIMD
@@ -1275,6 +1326,7 @@ config CRYPTO_CAMELLIA_SPARC64
 	depends on SPARC64
 	depends on CRYPTO
 	select CRYPTO_ALGAPI
+	select CRYPTO_SKCIPHER
 	help
 	  Camellia cipher algorithm module (SPARC64).
 
@@ -1303,7 +1355,7 @@ config CRYPTO_CAST5
 config CRYPTO_CAST5_AVX_X86_64
 	tristate "CAST5 (CAST-128) cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CAST5
 	select CRYPTO_CAST_COMMON
 	select CRYPTO_SIMD
@@ -1325,7 +1377,7 @@ config CRYPTO_CAST6
 config CRYPTO_CAST6_AVX_X86_64
 	tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CAST6
 	select CRYPTO_CAST_COMMON
 	select CRYPTO_GLUE_HELPER_X86
@@ -1338,9 +1390,6 @@ config CRYPTO_CAST6_AVX_X86_64
 	  This module provides the Cast6 cipher algorithm that processes
 	  eight blocks parallel using the AVX instruction set.
 
-config CRYPTO_LIB_DES
-	tristate
-
 config CRYPTO_DES
 	tristate "DES and Triple DES EDE cipher algorithms"
 	select CRYPTO_ALGAPI
@@ -1353,6 +1402,7 @@ config CRYPTO_DES_SPARC64
 	depends on SPARC64
 	select CRYPTO_ALGAPI
 	select CRYPTO_LIB_DES
+	select CRYPTO_SKCIPHER
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3),
 	  optimized using SPARC64 crypto opcodes.
@@ -1360,7 +1410,7 @@ config CRYPTO_DES_SPARC64
 config CRYPTO_DES3_EDE_X86_64
 	tristate "Triple DES EDE cipher algorithm (x86-64)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	help
 	  Triple DES EDE (FIPS 46-3) algorithm.
@@ -1373,7 +1423,7 @@ config CRYPTO_DES3_EDE_X86_64
 config CRYPTO_FCRYPT
 	tristate "FCrypt cipher algorithm"
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  FCrypt algorithm used by RxRPC.
 
@@ -1392,7 +1442,7 @@ config CRYPTO_KHAZAD
 
 config CRYPTO_SALSA20
 	tristate "Salsa20 stream cipher algorithm"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Salsa20 stream cipher algorithm.
 
@@ -1404,7 +1454,8 @@ config CRYPTO_SALSA20
 
 config CRYPTO_CHACHA20
 	tristate "ChaCha stream cipher algorithms"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_SKCIPHER
 	help
 	  The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms.
 
@@ -1426,12 +1477,19 @@ config CRYPTO_CHACHA20
 config CRYPTO_CHACHA20_X86_64
 	tristate "ChaCha stream cipher algorithms (x86_64/SSSE3/AVX2/AVX-512VL)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_CHACHA20
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
 	help
 	  SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
 	  XChaCha20, and XChaCha12 stream ciphers.
 
+config CRYPTO_CHACHA_MIPS
+	tristate "ChaCha stream cipher algorithms (MIPS 32r2 optimized)"
+	depends on CPU_MIPS32_R2
+	select CRYPTO_SKCIPHER
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
 config CRYPTO_SEED
 	tristate "SEED cipher algorithm"
 	select CRYPTO_ALGAPI
@@ -1462,7 +1520,7 @@ config CRYPTO_SERPENT
 config CRYPTO_SERPENT_SSE2_X86_64
 	tristate "Serpent cipher algorithm (x86_64/SSE2)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
 	select CRYPTO_SIMD
@@ -1481,7 +1539,7 @@ config CRYPTO_SERPENT_SSE2_X86_64
 config CRYPTO_SERPENT_SSE2_586
 	tristate "Serpent cipher algorithm (i586/SSE2)"
 	depends on X86 && !64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
 	select CRYPTO_SIMD
@@ -1500,7 +1558,7 @@ config CRYPTO_SERPENT_SSE2_586
 config CRYPTO_SERPENT_AVX_X86_64
 	tristate "Serpent cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
 	select CRYPTO_SIMD
@@ -1631,7 +1689,7 @@ config CRYPTO_TWOFISH_X86_64
 config CRYPTO_TWOFISH_X86_64_3WAY
 	tristate "Twofish cipher algorithm (x86_64, 3-way parallel)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_TWOFISH_COMMON
 	select CRYPTO_TWOFISH_X86_64
 	select CRYPTO_GLUE_HELPER_X86
@@ -1652,7 +1710,7 @@ config CRYPTO_TWOFISH_X86_64_3WAY
 config CRYPTO_TWOFISH_AVX_X86_64
 	tristate "Twofish cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SIMD
 	select CRYPTO_TWOFISH_COMMON
@@ -1803,7 +1861,7 @@ config CRYPTO_USER_API_HASH
 config CRYPTO_USER_API_SKCIPHER
 	tristate "User-space interface for symmetric key cipher algorithms"
 	depends on NET
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_USER_API
 	help
 	  This option enables the user-spaces interface for symmetric
@@ -1822,7 +1880,7 @@ config CRYPTO_USER_API_AEAD
 	tristate "User-space interface for AEAD cipher algorithms"
 	depends on NET
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_NULL
 	select CRYPTO_USER_API
 	help
@@ -1844,6 +1902,7 @@ config CRYPTO_STATS
 config CRYPTO_HASH_INFO
 	bool
 
+source "lib/crypto/Kconfig"
 source "drivers/crypto/Kconfig"
 source "crypto/asymmetric_keys/Kconfig"
 source "certs/Kconfig"
diff --git a/crypto/Makefile b/crypto/Makefile
index fcb1ee679782..4ca12b6044f7 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -14,11 +14,9 @@ crypto_algapi-y := algapi.o scatterwalk.o $(crypto_algapi-y)
 obj-$(CONFIG_CRYPTO_ALGAPI2) += crypto_algapi.o
 
 obj-$(CONFIG_CRYPTO_AEAD2) += aead.o
+obj-$(CONFIG_CRYPTO_AEAD2) += geniv.o
 
-crypto_blkcipher-y := ablkcipher.o
-crypto_blkcipher-y += blkcipher.o
-crypto_blkcipher-y += skcipher.o
-obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o
+obj-$(CONFIG_CRYPTO_SKCIPHER2) += skcipher.o
 obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o
 obj-$(CONFIG_CRYPTO_ECHAINIV) += echainiv.o
 
@@ -74,6 +72,8 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
 obj-$(CONFIG_CRYPTO_WP512) += wp512.o
 CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
 obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
+obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
+obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o
 obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
 obj-$(CONFIG_CRYPTO_ECB) += ecb.o
 obj-$(CONFIG_CRYPTO_CBC) += cbc.o
@@ -93,7 +93,7 @@ obj-$(CONFIG_CRYPTO_AEGIS128) += aegis128.o
 aegis128-y := aegis128-core.o
 
 ifeq ($(ARCH),arm)
-CFLAGS_aegis128-neon-inner.o += -ffreestanding -march=armv7-a -mfloat-abi=softfp
+CFLAGS_aegis128-neon-inner.o += -ffreestanding -march=armv8-a -mfloat-abi=softfp
 CFLAGS_aegis128-neon-inner.o += -mfpu=crypto-neon-fp-armv8
 aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o
 endif
@@ -166,6 +166,7 @@ obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
 obj-$(CONFIG_CRYPTO_OFB) += ofb.o
 obj-$(CONFIG_CRYPTO_ECC) += ecc.o
 obj-$(CONFIG_CRYPTO_ESSIV) += essiv.o
+obj-$(CONFIG_CRYPTO_CURVE25519) += curve25519-generic.o
 
 ecdh_generic-y += ecdh.o
 ecdh_generic-y += ecdh_helper.o
diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c
deleted file mode 100644
index 072b5646a0a3..000000000000
--- a/crypto/ablkcipher.c
+++ /dev/null
@@ -1,407 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Asynchronous block chaining cipher operations.
- *
- * This is the asynchronous version of blkcipher.c indicating completion
- * via a callback.
- *
- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <crypto/internal/skcipher.h>
-#include <linux/err.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/seq_file.h>
-#include <linux/cryptouser.h>
-#include <linux/compiler.h>
-#include <net/netlink.h>
-
-#include <crypto/scatterwalk.h>
-
-#include "internal.h"
-
-struct ablkcipher_buffer {
-	struct list_head	entry;
-	struct scatter_walk	dst;
-	unsigned int		len;
-	void			*data;
-};
-
-enum {
-	ABLKCIPHER_WALK_SLOW = 1 << 0,
-};
-
-static inline void ablkcipher_buffer_write(struct ablkcipher_buffer *p)
-{
-	scatterwalk_copychunks(p->data, &p->dst, p->len, 1);
-}
-
-void __ablkcipher_walk_complete(struct ablkcipher_walk *walk)
-{
-	struct ablkcipher_buffer *p, *tmp;
-
-	list_for_each_entry_safe(p, tmp, &walk->buffers, entry) {
-		ablkcipher_buffer_write(p);
-		list_del(&p->entry);
-		kfree(p);
-	}
-}
-EXPORT_SYMBOL_GPL(__ablkcipher_walk_complete);
-
-static inline void ablkcipher_queue_write(struct ablkcipher_walk *walk,
-					  struct ablkcipher_buffer *p)
-{
-	p->dst = walk->out;
-	list_add_tail(&p->entry, &walk->buffers);
-}
-
-/* Get a spot of the specified length that does not straddle a page.
- * The caller needs to ensure that there is enough space for this operation.
- */
-static inline u8 *ablkcipher_get_spot(u8 *start, unsigned int len)
-{
-	u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK);
-
-	return max(start, end_page);
-}
-
-static inline void ablkcipher_done_slow(struct ablkcipher_walk *walk,
-					unsigned int n)
-{
-	for (;;) {
-		unsigned int len_this_page = scatterwalk_pagelen(&walk->out);
-
-		if (len_this_page > n)
-			len_this_page = n;
-		scatterwalk_advance(&walk->out, n);
-		if (n == len_this_page)
-			break;
-		n -= len_this_page;
-		scatterwalk_start(&walk->out, sg_next(walk->out.sg));
-	}
-}
-
-static inline void ablkcipher_done_fast(struct ablkcipher_walk *walk,
-					unsigned int n)
-{
-	scatterwalk_advance(&walk->in, n);
-	scatterwalk_advance(&walk->out, n);
-}
-
-static int ablkcipher_walk_next(struct ablkcipher_request *req,
-				struct ablkcipher_walk *walk);
-
-int ablkcipher_walk_done(struct ablkcipher_request *req,
-			 struct ablkcipher_walk *walk, int err)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	unsigned int n; /* bytes processed */
-	bool more;
-
-	if (unlikely(err < 0))
-		goto finish;
-
-	n = walk->nbytes - err;
-	walk->total -= n;
-	more = (walk->total != 0);
-
-	if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW))) {
-		ablkcipher_done_fast(walk, n);
-	} else {
-		if (WARN_ON(err)) {
-			/* unexpected case; didn't process all bytes */
-			err = -EINVAL;
-			goto finish;
-		}
-		ablkcipher_done_slow(walk, n);
-	}
-
-	scatterwalk_done(&walk->in, 0, more);
-	scatterwalk_done(&walk->out, 1, more);
-
-	if (more) {
-		crypto_yield(req->base.flags);
-		return ablkcipher_walk_next(req, walk);
-	}
-	err = 0;
-finish:
-	walk->nbytes = 0;
-	if (walk->iv != req->info)
-		memcpy(req->info, walk->iv, tfm->crt_ablkcipher.ivsize);
-	kfree(walk->iv_buffer);
-	return err;
-}
-EXPORT_SYMBOL_GPL(ablkcipher_walk_done);
-
-static inline int ablkcipher_next_slow(struct ablkcipher_request *req,
-				       struct ablkcipher_walk *walk,
-				       unsigned int bsize,
-				       unsigned int alignmask,
-				       void **src_p, void **dst_p)
-{
-	unsigned aligned_bsize = ALIGN(bsize, alignmask + 1);
-	struct ablkcipher_buffer *p;
-	void *src, *dst, *base;
-	unsigned int n;
-
-	n = ALIGN(sizeof(struct ablkcipher_buffer), alignmask + 1);
-	n += (aligned_bsize * 3 - (alignmask + 1) +
-	      (alignmask & ~(crypto_tfm_ctx_alignment() - 1)));
-
-	p = kmalloc(n, GFP_ATOMIC);
-	if (!p)
-		return ablkcipher_walk_done(req, walk, -ENOMEM);
-
-	base = p + 1;
-
-	dst = (u8 *)ALIGN((unsigned long)base, alignmask + 1);
-	src = dst = ablkcipher_get_spot(dst, bsize);
-
-	p->len = bsize;
-	p->data = dst;
-
-	scatterwalk_copychunks(src, &walk->in, bsize, 0);
-
-	ablkcipher_queue_write(walk, p);
-
-	walk->nbytes = bsize;
-	walk->flags |= ABLKCIPHER_WALK_SLOW;
-
-	*src_p = src;
-	*dst_p = dst;
-
-	return 0;
-}
-
-static inline int ablkcipher_copy_iv(struct ablkcipher_walk *walk,
-				     struct crypto_tfm *tfm,
-				     unsigned int alignmask)
-{
-	unsigned bs = walk->blocksize;
-	unsigned int ivsize = tfm->crt_ablkcipher.ivsize;
-	unsigned aligned_bs = ALIGN(bs, alignmask + 1);
-	unsigned int size = aligned_bs * 2 + ivsize + max(aligned_bs, ivsize) -
-			    (alignmask + 1);
-	u8 *iv;
-
-	size += alignmask & ~(crypto_tfm_ctx_alignment() - 1);
-	walk->iv_buffer = kmalloc(size, GFP_ATOMIC);
-	if (!walk->iv_buffer)
-		return -ENOMEM;
-
-	iv = (u8 *)ALIGN((unsigned long)walk->iv_buffer, alignmask + 1);
-	iv = ablkcipher_get_spot(iv, bs) + aligned_bs;
-	iv = ablkcipher_get_spot(iv, bs) + aligned_bs;
-	iv = ablkcipher_get_spot(iv, ivsize);
-
-	walk->iv = memcpy(iv, walk->iv, ivsize);
-	return 0;
-}
-
-static inline int ablkcipher_next_fast(struct ablkcipher_request *req,
-				       struct ablkcipher_walk *walk)
-{
-	walk->src.page = scatterwalk_page(&walk->in);
-	walk->src.offset = offset_in_page(walk->in.offset);
-	walk->dst.page = scatterwalk_page(&walk->out);
-	walk->dst.offset = offset_in_page(walk->out.offset);
-
-	return 0;
-}
-
-static int ablkcipher_walk_next(struct ablkcipher_request *req,
-				struct ablkcipher_walk *walk)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	unsigned int alignmask, bsize, n;
-	void *src, *dst;
-	int err;
-
-	alignmask = crypto_tfm_alg_alignmask(tfm);
-	n = walk->total;
-	if (unlikely(n < crypto_tfm_alg_blocksize(tfm))) {
-		req->base.flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
-		return ablkcipher_walk_done(req, walk, -EINVAL);
-	}
-
-	walk->flags &= ~ABLKCIPHER_WALK_SLOW;
-	src = dst = NULL;
-
-	bsize = min(walk->blocksize, n);
-	n = scatterwalk_clamp(&walk->in, n);
-	n = scatterwalk_clamp(&walk->out, n);
-
-	if (n < bsize ||
-	    !scatterwalk_aligned(&walk->in, alignmask) ||
-	    !scatterwalk_aligned(&walk->out, alignmask)) {
-		err = ablkcipher_next_slow(req, walk, bsize, alignmask,
-					   &src, &dst);
-		goto set_phys_lowmem;
-	}
-
-	walk->nbytes = n;
-
-	return ablkcipher_next_fast(req, walk);
-
-set_phys_lowmem:
-	if (err >= 0) {
-		walk->src.page = virt_to_page(src);
-		walk->dst.page = virt_to_page(dst);
-		walk->src.offset = ((unsigned long)src & (PAGE_SIZE - 1));
-		walk->dst.offset = ((unsigned long)dst & (PAGE_SIZE - 1));
-	}
-
-	return err;
-}
-
-static int ablkcipher_walk_first(struct ablkcipher_request *req,
-				 struct ablkcipher_walk *walk)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	unsigned int alignmask;
-
-	alignmask = crypto_tfm_alg_alignmask(tfm);
-	if (WARN_ON_ONCE(in_irq()))
-		return -EDEADLK;
-
-	walk->iv = req->info;
-	walk->nbytes = walk->total;
-	if (unlikely(!walk->total))
-		return 0;
-
-	walk->iv_buffer = NULL;
-	if (unlikely(((unsigned long)walk->iv & alignmask))) {
-		int err = ablkcipher_copy_iv(walk, tfm, alignmask);
-
-		if (err)
-			return err;
-	}
-
-	scatterwalk_start(&walk->in, walk->in.sg);
-	scatterwalk_start(&walk->out, walk->out.sg);
-
-	return ablkcipher_walk_next(req, walk);
-}
-
-int ablkcipher_walk_phys(struct ablkcipher_request *req,
-			 struct ablkcipher_walk *walk)
-{
-	walk->blocksize = crypto_tfm_alg_blocksize(req->base.tfm);
-	return ablkcipher_walk_first(req, walk);
-}
-EXPORT_SYMBOL_GPL(ablkcipher_walk_phys);
-
-static int setkey_unaligned(struct crypto_ablkcipher *tfm, const u8 *key,
-			    unsigned int keylen)
-{
-	struct ablkcipher_alg *cipher = crypto_ablkcipher_alg(tfm);
-	unsigned long alignmask = crypto_ablkcipher_alignmask(tfm);
-	int ret;
-	u8 *buffer, *alignbuffer;
-	unsigned long absize;
-
-	absize = keylen + alignmask;
-	buffer = kmalloc(absize, GFP_ATOMIC);
-	if (!buffer)
-		return -ENOMEM;
-
-	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
-	memcpy(alignbuffer, key, keylen);
-	ret = cipher->setkey(tfm, alignbuffer, keylen);
-	memset(alignbuffer, 0, keylen);
-	kfree(buffer);
-	return ret;
-}
-
-static int setkey(struct crypto_ablkcipher *tfm, const u8 *key,
-		  unsigned int keylen)
-{
-	struct ablkcipher_alg *cipher = crypto_ablkcipher_alg(tfm);
-	unsigned long alignmask = crypto_ablkcipher_alignmask(tfm);
-
-	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) {
-		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	if ((unsigned long)key & alignmask)
-		return setkey_unaligned(tfm, key, keylen);
-
-	return cipher->setkey(tfm, key, keylen);
-}
-
-static unsigned int crypto_ablkcipher_ctxsize(struct crypto_alg *alg, u32 type,
-					      u32 mask)
-{
-	return alg->cra_ctxsize;
-}
-
-static int crypto_init_ablkcipher_ops(struct crypto_tfm *tfm, u32 type,
-				      u32 mask)
-{
-	struct ablkcipher_alg *alg = &tfm->__crt_alg->cra_ablkcipher;
-	struct ablkcipher_tfm *crt = &tfm->crt_ablkcipher;
-
-	if (alg->ivsize > PAGE_SIZE / 8)
-		return -EINVAL;
-
-	crt->setkey = setkey;
-	crt->encrypt = alg->encrypt;
-	crt->decrypt = alg->decrypt;
-	crt->base = __crypto_ablkcipher_cast(tfm);
-	crt->ivsize = alg->ivsize;
-
-	return 0;
-}
-
-#ifdef CONFIG_NET
-static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
-{
-	struct crypto_report_blkcipher rblkcipher;
-
-	memset(&rblkcipher, 0, sizeof(rblkcipher));
-
-	strscpy(rblkcipher.type, "ablkcipher", sizeof(rblkcipher.type));
-	strscpy(rblkcipher.geniv, "<default>", sizeof(rblkcipher.geniv));
-
-	rblkcipher.blocksize = alg->cra_blocksize;
-	rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize;
-	rblkcipher.max_keysize = alg->cra_ablkcipher.max_keysize;
-	rblkcipher.ivsize = alg->cra_ablkcipher.ivsize;
-
-	return nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER,
-		       sizeof(rblkcipher), &rblkcipher);
-}
-#else
-static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
-{
-	return -ENOSYS;
-}
-#endif
-
-static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg)
-	__maybe_unused;
-static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg)
-{
-	struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;
-
-	seq_printf(m, "type         : ablkcipher\n");
-	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
-					     "yes" : "no");
-	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
-	seq_printf(m, "min keysize  : %u\n", ablkcipher->min_keysize);
-	seq_printf(m, "max keysize  : %u\n", ablkcipher->max_keysize);
-	seq_printf(m, "ivsize       : %u\n", ablkcipher->ivsize);
-	seq_printf(m, "geniv        : <default>\n");
-}
-
-const struct crypto_type crypto_ablkcipher_type = {
-	.ctxsize = crypto_ablkcipher_ctxsize,
-	.init = crypto_init_ablkcipher_ops,
-#ifdef CONFIG_PROC_FS
-	.show = crypto_ablkcipher_show,
-#endif
-	.report = crypto_ablkcipher_report,
-};
-EXPORT_SYMBOL_GPL(crypto_ablkcipher_type);
diff --git a/crypto/acompress.c b/crypto/acompress.c
index abadcb035a41..84a76723e851 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -151,9 +151,9 @@ int crypto_register_acomp(struct acomp_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_register_acomp);
 
-int crypto_unregister_acomp(struct acomp_alg *alg)
+void crypto_unregister_acomp(struct acomp_alg *alg)
 {
-	return crypto_unregister_alg(&alg->base);
+	crypto_unregister_alg(&alg->base);
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_acomp);
 
diff --git a/crypto/adiantum.c b/crypto/adiantum.c
index 395a3ddd3707..cf2b9f4103dd 100644
--- a/crypto/adiantum.c
+++ b/crypto/adiantum.c
@@ -33,13 +33,12 @@
 #include <crypto/b128ops.h>
 #include <crypto/chacha.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/nhpoly1305.h>
 #include <crypto/scatterwalk.h>
 #include <linux/module.h>
 
-#include "internal.h"
-
 /*
  * Size of right-hand part of input data, in bytes; also the size of the block
  * cipher's block size and the hash function's output.
@@ -63,7 +62,7 @@
 
 struct adiantum_instance_ctx {
 	struct crypto_skcipher_spawn streamcipher_spawn;
-	struct crypto_spawn blockcipher_spawn;
+	struct crypto_cipher_spawn blockcipher_spawn;
 	struct crypto_shash_spawn hash_spawn;
 };
 
@@ -71,7 +70,7 @@ struct adiantum_tfm_ctx {
 	struct crypto_skcipher *streamcipher;
 	struct crypto_cipher *blockcipher;
 	struct crypto_shash *hash;
-	struct poly1305_key header_hash_key;
+	struct poly1305_core_key header_hash_key;
 };
 
 struct adiantum_request_ctx {
@@ -134,9 +133,6 @@ static int adiantum_setkey(struct crypto_skcipher *tfm, const u8 *key,
 				  crypto_skcipher_get_flags(tfm) &
 				  CRYPTO_TFM_REQ_MASK);
 	err = crypto_skcipher_setkey(tctx->streamcipher, key, keylen);
-	crypto_skcipher_set_flags(tfm,
-				crypto_skcipher_get_flags(tctx->streamcipher) &
-				CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
 
@@ -166,9 +162,6 @@ static int adiantum_setkey(struct crypto_skcipher *tfm, const u8 *key,
 				CRYPTO_TFM_REQ_MASK);
 	err = crypto_cipher_setkey(tctx->blockcipher, keyp,
 				   BLOCKCIPHER_KEY_SIZE);
-	crypto_skcipher_set_flags(tfm,
-				  crypto_cipher_get_flags(tctx->blockcipher) &
-				  CRYPTO_TFM_RES_MASK);
 	if (err)
 		goto out;
 	keyp += BLOCKCIPHER_KEY_SIZE;
@@ -181,8 +174,6 @@ static int adiantum_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	crypto_shash_set_flags(tctx->hash, crypto_skcipher_get_flags(tfm) &
 					   CRYPTO_TFM_REQ_MASK);
 	err = crypto_shash_setkey(tctx->hash, keyp, NHPOLY1305_KEY_SIZE);
-	crypto_skcipher_set_flags(tfm, crypto_shash_get_flags(tctx->hash) &
-				       CRYPTO_TFM_RES_MASK);
 	keyp += NHPOLY1305_KEY_SIZE;
 	WARN_ON(keyp != &data->derived_keys[ARRAY_SIZE(data->derived_keys)]);
 out:
@@ -242,13 +233,13 @@ static void adiantum_hash_header(struct skcipher_request *req)
 
 	BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0);
 	poly1305_core_blocks(&state, &tctx->header_hash_key,
-			     &header, sizeof(header) / POLY1305_BLOCK_SIZE);
+			     &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1);
 
 	BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0);
 	poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
-			     TWEAK_SIZE / POLY1305_BLOCK_SIZE);
+			     TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
 
-	poly1305_core_emit(&state, &rctx->header_hash);
+	poly1305_core_emit(&state, NULL, &rctx->header_hash);
 }
 
 /* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */
@@ -435,10 +426,10 @@ static int adiantum_init_tfm(struct crypto_skcipher *tfm)
 
 	BUILD_BUG_ON(offsetofend(struct adiantum_request_ctx, u) !=
 		     sizeof(struct adiantum_request_ctx));
-	subreq_size = max(FIELD_SIZEOF(struct adiantum_request_ctx,
+	subreq_size = max(sizeof_field(struct adiantum_request_ctx,
 				       u.hash_desc) +
 			  crypto_shash_descsize(hash),
-			  FIELD_SIZEOF(struct adiantum_request_ctx,
+			  sizeof_field(struct adiantum_request_ctx,
 				       u.streamcipher_req) +
 			  crypto_skcipher_reqsize(streamcipher));
 
@@ -468,7 +459,7 @@ static void adiantum_free_instance(struct skcipher_instance *inst)
 	struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst);
 
 	crypto_drop_skcipher(&ictx->streamcipher_spawn);
-	crypto_drop_spawn(&ictx->blockcipher_spawn);
+	crypto_drop_cipher(&ictx->blockcipher_spawn);
 	crypto_drop_shash(&ictx->hash_spawn);
 	kfree(inst);
 }
@@ -500,14 +491,12 @@ static bool adiantum_supported_algorithms(struct skcipher_alg *streamcipher_alg,
 static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
-	const char *streamcipher_name;
-	const char *blockcipher_name;
+	u32 mask;
 	const char *nhpoly1305_name;
 	struct skcipher_instance *inst;
 	struct adiantum_instance_ctx *ictx;
 	struct skcipher_alg *streamcipher_alg;
 	struct crypto_alg *blockcipher_alg;
-	struct crypto_alg *_hash_alg;
 	struct shash_alg *hash_alg;
 	int err;
 
@@ -518,19 +507,7 @@ static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
 		return -EINVAL;
 
-	streamcipher_name = crypto_attr_alg_name(tb[1]);
-	if (IS_ERR(streamcipher_name))
-		return PTR_ERR(streamcipher_name);
-
-	blockcipher_name = crypto_attr_alg_name(tb[2]);
-	if (IS_ERR(blockcipher_name))
-		return PTR_ERR(blockcipher_name);
-
-	nhpoly1305_name = crypto_attr_alg_name(tb[3]);
-	if (nhpoly1305_name == ERR_PTR(-ENOENT))
-		nhpoly1305_name = "nhpoly1305";
-	if (IS_ERR(nhpoly1305_name))
-		return PTR_ERR(nhpoly1305_name);
+	mask = crypto_requires_sync(algt->type, algt->mask);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
 	if (!inst)
@@ -538,37 +515,31 @@ static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
 	ictx = skcipher_instance_ctx(inst);
 
 	/* Stream cipher, e.g. "xchacha12" */
-	crypto_set_skcipher_spawn(&ictx->streamcipher_spawn,
-				  skcipher_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ictx->streamcipher_spawn, streamcipher_name,
-				   0, crypto_requires_sync(algt->type,
-							   algt->mask));
+	err = crypto_grab_skcipher(&ictx->streamcipher_spawn,
+				   skcipher_crypto_instance(inst),
+				   crypto_attr_alg_name(tb[1]), 0, mask);
 	if (err)
-		goto out_free_inst;
+		goto err_free_inst;
 	streamcipher_alg = crypto_spawn_skcipher_alg(&ictx->streamcipher_spawn);
 
 	/* Block cipher, e.g. "aes" */
-	crypto_set_spawn(&ictx->blockcipher_spawn,
-			 skcipher_crypto_instance(inst));
-	err = crypto_grab_spawn(&ictx->blockcipher_spawn, blockcipher_name,
-				CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK);
+	err = crypto_grab_cipher(&ictx->blockcipher_spawn,
+				 skcipher_crypto_instance(inst),
+				 crypto_attr_alg_name(tb[2]), 0, mask);
 	if (err)
-		goto out_drop_streamcipher;
-	blockcipher_alg = ictx->blockcipher_spawn.alg;
+		goto err_free_inst;
+	blockcipher_alg = crypto_spawn_cipher_alg(&ictx->blockcipher_spawn);
 
 	/* NHPoly1305 ε-∆U hash function */
-	_hash_alg = crypto_alg_mod_lookup(nhpoly1305_name,
-					  CRYPTO_ALG_TYPE_SHASH,
-					  CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(_hash_alg)) {
-		err = PTR_ERR(_hash_alg);
-		goto out_drop_blockcipher;
-	}
-	hash_alg = __crypto_shash_alg(_hash_alg);
-	err = crypto_init_shash_spawn(&ictx->hash_spawn, hash_alg,
-				      skcipher_crypto_instance(inst));
+	nhpoly1305_name = crypto_attr_alg_name(tb[3]);
+	if (nhpoly1305_name == ERR_PTR(-ENOENT))
+		nhpoly1305_name = "nhpoly1305";
+	err = crypto_grab_shash(&ictx->hash_spawn,
+				skcipher_crypto_instance(inst),
+				nhpoly1305_name, 0, mask);
 	if (err)
-		goto out_put_hash;
+		goto err_free_inst;
+	hash_alg = crypto_spawn_shash_alg(&ictx->hash_spawn);
 
 	/* Check the set of algorithms */
 	if (!adiantum_supported_algorithms(streamcipher_alg, blockcipher_alg,
@@ -577,7 +548,7 @@ static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
 			streamcipher_alg->base.cra_name,
 			blockcipher_alg->cra_name, hash_alg->base.cra_name);
 		err = -EINVAL;
-		goto out_drop_hash;
+		goto err_free_inst;
 	}
 
 	/* Instance fields */
@@ -586,13 +557,13 @@ static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
 		     "adiantum(%s,%s)", streamcipher_alg->base.cra_name,
 		     blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
-		goto out_drop_hash;
+		goto err_free_inst;
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "adiantum(%s,%s,%s)",
 		     streamcipher_alg->base.cra_driver_name,
 		     blockcipher_alg->cra_driver_name,
 		     hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
-		goto out_drop_hash;
+		goto err_free_inst;
 
 	inst->alg.base.cra_flags = streamcipher_alg->base.cra_flags &
 				   CRYPTO_ALG_ASYNC;
@@ -622,22 +593,10 @@ static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->free = adiantum_free_instance;
 
 	err = skcipher_register_instance(tmpl, inst);
-	if (err)
-		goto out_drop_hash;
-
-	crypto_mod_put(_hash_alg);
-	return 0;
-
-out_drop_hash:
-	crypto_drop_shash(&ictx->hash_spawn);
-out_put_hash:
-	crypto_mod_put(_hash_alg);
-out_drop_blockcipher:
-	crypto_drop_spawn(&ictx->blockcipher_spawn);
-out_drop_streamcipher:
-	crypto_drop_skcipher(&ictx->streamcipher_spawn);
-out_free_inst:
-	kfree(inst);
+	if (err) {
+err_free_inst:
+		adiantum_free_instance(inst);
+	}
 	return err;
 }
 
diff --git a/crypto/aead.c b/crypto/aead.c
index ce035589cf57..16991095270d 100644
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -7,19 +7,14 @@
  * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au>
  */
 
-#include <crypto/internal/geniv.h>
-#include <crypto/internal/rng.h>
-#include <crypto/null.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
+#include <crypto/internal/aead.h>
+#include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/rtnetlink.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/cryptouser.h>
-#include <linux/compiler.h>
 #include <net/netlink.h>
 
 #include "internal.h"
@@ -190,11 +185,6 @@ static void crypto_aead_free_instance(struct crypto_instance *inst)
 {
 	struct aead_instance *aead = aead_instance(inst);
 
-	if (!aead->free) {
-		inst->tmpl->free(inst);
-		return;
-	}
-
 	aead->free(aead);
 }
 
@@ -212,167 +202,12 @@ static const struct crypto_type crypto_aead_type = {
 	.tfmsize = offsetof(struct crypto_aead, base),
 };
 
-static int aead_geniv_setkey(struct crypto_aead *tfm,
-			     const u8 *key, unsigned int keylen)
-{
-	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
-
-	return crypto_aead_setkey(ctx->child, key, keylen);
-}
-
-static int aead_geniv_setauthsize(struct crypto_aead *tfm,
-				  unsigned int authsize)
-{
-	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
-
-	return crypto_aead_setauthsize(ctx->child, authsize);
-}
-
-struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
-				       struct rtattr **tb, u32 type, u32 mask)
-{
-	const char *name;
-	struct crypto_aead_spawn *spawn;
-	struct crypto_attr_type *algt;
-	struct aead_instance *inst;
-	struct aead_alg *alg;
-	unsigned int ivsize;
-	unsigned int maxauthsize;
-	int err;
-
-	algt = crypto_get_attr_type(tb);
-	if (IS_ERR(algt))
-		return ERR_CAST(algt);
-
-	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-		return ERR_PTR(-EINVAL);
-
-	name = crypto_attr_alg_name(tb[1]);
-	if (IS_ERR(name))
-		return ERR_CAST(name);
-
-	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
-	if (!inst)
-		return ERR_PTR(-ENOMEM);
-
-	spawn = aead_instance_ctx(inst);
-
-	/* Ignore async algorithms if necessary. */
-	mask |= crypto_requires_sync(algt->type, algt->mask);
-
-	crypto_set_aead_spawn(spawn, aead_crypto_instance(inst));
-	err = crypto_grab_aead(spawn, name, type, mask);
-	if (err)
-		goto err_free_inst;
-
-	alg = crypto_spawn_aead_alg(spawn);
-
-	ivsize = crypto_aead_alg_ivsize(alg);
-	maxauthsize = crypto_aead_alg_maxauthsize(alg);
-
-	err = -EINVAL;
-	if (ivsize < sizeof(u64))
-		goto err_drop_alg;
-
-	err = -ENAMETOOLONG;
-	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
-		     "%s(%s)", tmpl->name, alg->base.cra_name) >=
-	    CRYPTO_MAX_ALG_NAME)
-		goto err_drop_alg;
-	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-		     "%s(%s)", tmpl->name, alg->base.cra_driver_name) >=
-	    CRYPTO_MAX_ALG_NAME)
-		goto err_drop_alg;
-
-	inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
-	inst->alg.base.cra_priority = alg->base.cra_priority;
-	inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
-	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
-	inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx);
-
-	inst->alg.setkey = aead_geniv_setkey;
-	inst->alg.setauthsize = aead_geniv_setauthsize;
-
-	inst->alg.ivsize = ivsize;
-	inst->alg.maxauthsize = maxauthsize;
-
-out:
-	return inst;
-
-err_drop_alg:
-	crypto_drop_aead(spawn);
-err_free_inst:
-	kfree(inst);
-	inst = ERR_PTR(err);
-	goto out;
-}
-EXPORT_SYMBOL_GPL(aead_geniv_alloc);
-
-void aead_geniv_free(struct aead_instance *inst)
-{
-	crypto_drop_aead(aead_instance_ctx(inst));
-	kfree(inst);
-}
-EXPORT_SYMBOL_GPL(aead_geniv_free);
-
-int aead_init_geniv(struct crypto_aead *aead)
-{
-	struct aead_geniv_ctx *ctx = crypto_aead_ctx(aead);
-	struct aead_instance *inst = aead_alg_instance(aead);
-	struct crypto_aead *child;
-	int err;
-
-	spin_lock_init(&ctx->lock);
-
-	err = crypto_get_default_rng();
-	if (err)
-		goto out;
-
-	err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
-				   crypto_aead_ivsize(aead));
-	crypto_put_default_rng();
-	if (err)
-		goto out;
-
-	ctx->sknull = crypto_get_default_null_skcipher();
-	err = PTR_ERR(ctx->sknull);
-	if (IS_ERR(ctx->sknull))
-		goto out;
-
-	child = crypto_spawn_aead(aead_instance_ctx(inst));
-	err = PTR_ERR(child);
-	if (IS_ERR(child))
-		goto drop_null;
-
-	ctx->child = child;
-	crypto_aead_set_reqsize(aead, crypto_aead_reqsize(child) +
-				      sizeof(struct aead_request));
-
-	err = 0;
-
-out:
-	return err;
-
-drop_null:
-	crypto_put_default_null_skcipher();
-	goto out;
-}
-EXPORT_SYMBOL_GPL(aead_init_geniv);
-
-void aead_exit_geniv(struct crypto_aead *tfm)
-{
-	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
-
-	crypto_free_aead(ctx->child);
-	crypto_put_default_null_skcipher();
-}
-EXPORT_SYMBOL_GPL(aead_exit_geniv);
-
-int crypto_grab_aead(struct crypto_aead_spawn *spawn, const char *name,
-		     u32 type, u32 mask)
+int crypto_grab_aead(struct crypto_aead_spawn *spawn,
+		     struct crypto_instance *inst,
+		     const char *name, u32 type, u32 mask)
 {
 	spawn->base.frontend = &crypto_aead_type;
-	return crypto_grab_spawn(&spawn->base, name, type, mask);
+	return crypto_grab_spawn(&spawn->base, inst, name, type, mask);
 }
 EXPORT_SYMBOL_GPL(crypto_grab_aead);
 
@@ -453,6 +288,9 @@ int aead_register_instance(struct crypto_template *tmpl,
 {
 	int err;
 
+	if (WARN_ON(!inst->free))
+		return -EINVAL;
+
 	err = aead_prepare_alg(&inst->alg);
 	if (err)
 		return err;
diff --git a/crypto/aegis128-core.c b/crypto/aegis128-core.c
index 80e73611bd5c..44fb4956f0dd 100644
--- a/crypto/aegis128-core.c
+++ b/crypto/aegis128-core.c
@@ -13,6 +13,7 @@
 #include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
+#include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/scatterlist.h>
@@ -35,15 +36,7 @@ struct aegis_ctx {
 	union aegis_block key;
 };
 
-struct aegis128_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
-			    const u8 *src, unsigned int size);
-};
-
-static bool have_simd;
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_simd);
 
 static const union aegis_block crypto_aegis_const[2] = {
 	{ .words64 = {
@@ -59,7 +52,7 @@ static const union aegis_block crypto_aegis_const[2] = {
 static bool aegis128_do_simd(void)
 {
 #ifdef CONFIG_CRYPTO_AEGIS128_SIMD
-	if (have_simd)
+	if (static_branch_likely(&have_simd))
 		return crypto_simd_usable();
 #endif
 	return false;
@@ -67,10 +60,16 @@ static bool aegis128_do_simd(void)
 
 bool crypto_aegis128_have_simd(void);
 void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg);
+void crypto_aegis128_init_simd(struct aegis_state *state,
+			       const union aegis_block *key,
+			       const u8 *iv);
 void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst,
 					const u8 *src, unsigned int size);
 void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst,
 					const u8 *src, unsigned int size);
+void crypto_aegis128_final_simd(struct aegis_state *state,
+				union aegis_block *tag_xor,
+				u64 assoclen, u64 cryptlen);
 
 static void crypto_aegis128_update(struct aegis_state *state)
 {
@@ -323,25 +322,27 @@ static void crypto_aegis128_process_ad(struct aegis_state *state,
 	}
 }
 
-static void crypto_aegis128_process_crypt(struct aegis_state *state,
-					  struct aead_request *req,
-					  const struct aegis128_ops *ops)
+static __always_inline
+int crypto_aegis128_process_crypt(struct aegis_state *state,
+				  struct aead_request *req,
+				  struct skcipher_walk *walk,
+				  void (*crypt)(struct aegis_state *state,
+					        u8 *dst, const u8 *src,
+					        unsigned int size))
 {
-	struct skcipher_walk walk;
+	int err = 0;
 
-	ops->skcipher_walk_init(&walk, req, false);
+	while (walk->nbytes) {
+		unsigned int nbytes = walk->nbytes;
 
-	while (walk.nbytes) {
-		unsigned int nbytes = walk.nbytes;
+		if (nbytes < walk->total)
+			nbytes = round_down(nbytes, walk->stride);
 
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
+		crypt(state, walk->dst.virt.addr, walk->src.virt.addr, nbytes);
 
-		ops->crypt_chunk(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 nbytes);
-
-		skcipher_walk_done(&walk, walk.nbytes - nbytes);
+		err = skcipher_walk_done(walk, walk->nbytes - nbytes);
 	}
+	return err;
 }
 
 static void crypto_aegis128_final(struct aegis_state *state,
@@ -371,10 +372,8 @@ static int crypto_aegis128_setkey(struct crypto_aead *aead, const u8 *key,
 {
 	struct aegis_ctx *ctx = crypto_aead_ctx(aead);
 
-	if (keylen != AEGIS128_KEY_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != AEGIS128_KEY_SIZE)
 		return -EINVAL;
-	}
 
 	memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE);
 	return 0;
@@ -390,39 +389,31 @@ static int crypto_aegis128_setauthsize(struct crypto_aead *tfm,
 	return 0;
 }
 
-static void crypto_aegis128_crypt(struct aead_request *req,
-				  union aegis_block *tag_xor,
-				  unsigned int cryptlen,
-				  const struct aegis128_ops *ops)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aegis_state state;
-
-	crypto_aegis128_init(&state, &ctx->key, req->iv);
-	crypto_aegis128_process_ad(&state, req->src, req->assoclen);
-	crypto_aegis128_process_crypt(&state, req, ops);
-	crypto_aegis128_final(&state, tag_xor, req->assoclen, cryptlen);
-}
-
 static int crypto_aegis128_encrypt(struct aead_request *req)
 {
-	const struct aegis128_ops *ops = &(struct aegis128_ops){
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_chunk = crypto_aegis128_encrypt_chunk,
-	};
-
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	union aegis_block tag = {};
 	unsigned int authsize = crypto_aead_authsize(tfm);
+	struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
 	unsigned int cryptlen = req->cryptlen;
+	struct skcipher_walk walk;
+	struct aegis_state state;
 
-	if (aegis128_do_simd())
-		ops = &(struct aegis128_ops){
-			.skcipher_walk_init = skcipher_walk_aead_encrypt,
-			.crypt_chunk = crypto_aegis128_encrypt_chunk_simd };
-
-	crypto_aegis128_crypt(req, &tag, cryptlen, ops);
+	skcipher_walk_aead_encrypt(&walk, req, false);
+	if (aegis128_do_simd()) {
+		crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
+		crypto_aegis128_process_ad(&state, req->src, req->assoclen);
+		crypto_aegis128_process_crypt(&state, req, &walk,
+					      crypto_aegis128_encrypt_chunk_simd);
+		crypto_aegis128_final_simd(&state, &tag, req->assoclen,
+					   cryptlen);
+	} else {
+		crypto_aegis128_init(&state, &ctx->key, req->iv);
+		crypto_aegis128_process_ad(&state, req->src, req->assoclen);
+		crypto_aegis128_process_crypt(&state, req, &walk,
+					      crypto_aegis128_encrypt_chunk);
+		crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+	}
 
 	scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
 				 authsize, 1);
@@ -431,26 +422,33 @@ static int crypto_aegis128_encrypt(struct aead_request *req)
 
 static int crypto_aegis128_decrypt(struct aead_request *req)
 {
-	const struct aegis128_ops *ops = &(struct aegis128_ops){
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_chunk = crypto_aegis128_decrypt_chunk,
-	};
 	static const u8 zeros[AEGIS128_MAX_AUTH_SIZE] = {};
-
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	union aegis_block tag;
 	unsigned int authsize = crypto_aead_authsize(tfm);
 	unsigned int cryptlen = req->cryptlen - authsize;
+	struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
+	struct skcipher_walk walk;
+	struct aegis_state state;
 
 	scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
 				 authsize, 0);
 
-	if (aegis128_do_simd())
-		ops = &(struct aegis128_ops){
-			.skcipher_walk_init = skcipher_walk_aead_decrypt,
-			.crypt_chunk = crypto_aegis128_decrypt_chunk_simd };
-
-	crypto_aegis128_crypt(req, &tag, cryptlen, ops);
+	skcipher_walk_aead_decrypt(&walk, req, false);
+	if (aegis128_do_simd()) {
+		crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
+		crypto_aegis128_process_ad(&state, req->src, req->assoclen);
+		crypto_aegis128_process_crypt(&state, req, &walk,
+					      crypto_aegis128_decrypt_chunk_simd);
+		crypto_aegis128_final_simd(&state, &tag, req->assoclen,
+					   cryptlen);
+	} else {
+		crypto_aegis128_init(&state, &ctx->key, req->iv);
+		crypto_aegis128_process_ad(&state, req->src, req->assoclen);
+		crypto_aegis128_process_crypt(&state, req, &walk,
+					      crypto_aegis128_decrypt_chunk);
+		crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+	}
 
 	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
 }
@@ -481,8 +479,9 @@ static struct aead_alg crypto_aegis128_alg = {
 
 static int __init crypto_aegis128_module_init(void)
 {
-	if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD))
-		have_simd = crypto_aegis128_have_simd();
+	if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD) &&
+	    crypto_aegis128_have_simd())
+		static_branch_enable(&have_simd);
 
 	return crypto_register_aead(&crypto_aegis128_alg);
 }
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c
index f05310ca22aa..2a660ac1bc3a 100644
--- a/crypto/aegis128-neon-inner.c
+++ b/crypto/aegis128-neon-inner.c
@@ -132,6 +132,36 @@ void preload_sbox(void)
 	    :: "r"(crypto_aes_sbox));
 }
 
+void crypto_aegis128_init_neon(void *state, const void *key, const void *iv)
+{
+	static const uint8_t const0[] = {
+		0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
+		0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
+	};
+	static const uint8_t const1[] = {
+		0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
+		0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
+	};
+	uint8x16_t k = vld1q_u8(key);
+	uint8x16_t kiv = k ^ vld1q_u8(iv);
+	struct aegis128_state st = {{
+		kiv,
+		vld1q_u8(const1),
+		vld1q_u8(const0),
+		k ^ vld1q_u8(const0),
+		k ^ vld1q_u8(const1),
+	}};
+	int i;
+
+	preload_sbox();
+
+	for (i = 0; i < 5; i++) {
+		st = aegis128_update_neon(st, k);
+		st = aegis128_update_neon(st, kiv);
+	}
+	aegis128_save_state_neon(st, state);
+}
+
 void crypto_aegis128_update_neon(void *state, const void *msg)
 {
 	struct aegis128_state st = aegis128_load_state_neon(state);
@@ -210,3 +240,23 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
 
 	aegis128_save_state_neon(st, state);
 }
+
+void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen,
+				uint64_t cryptlen)
+{
+	struct aegis128_state st = aegis128_load_state_neon(state);
+	uint8x16_t v;
+	int i;
+
+	preload_sbox();
+
+	v = st.v[3] ^ (uint8x16_t)vcombine_u64(vmov_n_u64(8 * assoclen),
+					       vmov_n_u64(8 * cryptlen));
+
+	for (i = 0; i < 7; i++)
+		st = aegis128_update_neon(st, v);
+
+	v = vld1q_u8(tag_xor);
+	v ^= st.v[0] ^ st.v[1] ^ st.v[2] ^ st.v[3] ^ st.v[4];
+	vst1q_u8(tag_xor, v);
+}
diff --git a/crypto/aegis128-neon.c b/crypto/aegis128-neon.c
index 751f9c195aa4..8271b1fa0fbc 100644
--- a/crypto/aegis128-neon.c
+++ b/crypto/aegis128-neon.c
@@ -8,11 +8,14 @@
 
 #include "aegis.h"
 
+void crypto_aegis128_init_neon(void *state, const void *key, const void *iv);
 void crypto_aegis128_update_neon(void *state, const void *msg);
 void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
 					unsigned int size);
 void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
 					unsigned int size);
+void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen,
+				uint64_t cryptlen);
 
 int aegis128_have_aes_insn __ro_after_init;
 
@@ -25,6 +28,15 @@ bool crypto_aegis128_have_simd(void)
 	return IS_ENABLED(CONFIG_ARM64);
 }
 
+void crypto_aegis128_init_simd(union aegis_block *state,
+			       const union aegis_block *key,
+			       const u8 *iv)
+{
+	kernel_neon_begin();
+	crypto_aegis128_init_neon(state, key, iv);
+	kernel_neon_end();
+}
+
 void crypto_aegis128_update_simd(union aegis_block *state, const void *msg)
 {
 	kernel_neon_begin();
@@ -47,3 +59,12 @@ void crypto_aegis128_decrypt_chunk_simd(union aegis_block *state, u8 *dst,
 	crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
 	kernel_neon_end();
 }
+
+void crypto_aegis128_final_simd(union aegis_block *state,
+				union aegis_block *tag_xor,
+				u64 assoclen, u64 cryptlen)
+{
+	kernel_neon_begin();
+	crypto_aegis128_final_neon(state, tag_xor, assoclen, cryptlen);
+	kernel_neon_end();
+}
diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index 22e5867177f1..27ab27931813 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -1127,24 +1127,18 @@ EXPORT_SYMBOL_GPL(crypto_it_tab);
  * @in_key:	The input key.
  * @key_len:	The size of the key.
  *
- * Returns 0 on success, on failure the %CRYPTO_TFM_RES_BAD_KEY_LEN flag in tfm
- * is set. The function uses aes_expand_key() to expand the key.
- * &crypto_aes_ctx _must_ be the private data embedded in @tfm which is
- * retrieved with crypto_tfm_ctx().
+ * This function uses aes_expand_key() to expand the key.  &crypto_aes_ctx
+ * _must_ be the private data embedded in @tfm which is retrieved with
+ * crypto_tfm_ctx().
+ *
+ * Return: 0 on success; -EINVAL on failure (only happens for bad key lengths)
  */
 int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len)
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int ret;
-
-	ret = aes_expandkey(ctx, in_key, key_len);
-	if (!ret)
-		return 0;
 
-	*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-	return -EINVAL;
+	return aes_expandkey(ctx, in_key, key_len);
 }
 EXPORT_SYMBOL_GPL(crypto_aes_set_key);
 
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 879cf23f7489..3d8e53010cda 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -134,11 +134,13 @@ void af_alg_release_parent(struct sock *sk)
 	sk = ask->parent;
 	ask = alg_sk(sk);
 
-	lock_sock(sk);
+	local_bh_disable();
+	bh_lock_sock(sk);
 	ask->nokey_refcnt -= nokey;
 	if (!last)
 		last = !--ask->refcnt;
-	release_sock(sk);
+	bh_unlock_sock(sk);
+	local_bh_enable();
 
 	if (last)
 		sock_put(sk);
@@ -1043,7 +1045,7 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err)
 	af_alg_free_resources(areq);
 	sock_put(sk);
 
-	iocb->ki_complete(iocb, err ? err : resultlen, 0);
+	iocb->ki_complete(iocb, err ? err : (int)resultlen, 0);
 }
 EXPORT_SYMBOL_GPL(af_alg_async_cb);
 
diff --git a/crypto/ahash.c b/crypto/ahash.c
index 3815b363a693..68a0f0cb75c4 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -23,6 +23,8 @@
 
 #include "internal.h"
 
+static const struct crypto_type crypto_ahash_type;
+
 struct ahash_request_priv {
 	crypto_completion_t complete;
 	void *data;
@@ -509,6 +511,13 @@ static unsigned int crypto_ahash_extsize(struct crypto_alg *alg)
 	return crypto_alg_extsize(alg);
 }
 
+static void crypto_ahash_free_instance(struct crypto_instance *inst)
+{
+	struct ahash_instance *ahash = ahash_instance(inst);
+
+	ahash->free(ahash);
+}
+
 #ifdef CONFIG_NET
 static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
@@ -542,9 +551,10 @@ static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg)
 		   __crypto_hash_alg_common(alg)->digestsize);
 }
 
-const struct crypto_type crypto_ahash_type = {
+static const struct crypto_type crypto_ahash_type = {
 	.extsize = crypto_ahash_extsize,
 	.init_tfm = crypto_ahash_init_tfm,
+	.free = crypto_ahash_free_instance,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_ahash_show,
 #endif
@@ -554,7 +564,15 @@ const struct crypto_type crypto_ahash_type = {
 	.type = CRYPTO_ALG_TYPE_AHASH,
 	.tfmsize = offsetof(struct crypto_ahash, base),
 };
-EXPORT_SYMBOL_GPL(crypto_ahash_type);
+
+int crypto_grab_ahash(struct crypto_ahash_spawn *spawn,
+		      struct crypto_instance *inst,
+		      const char *name, u32 type, u32 mask)
+{
+	spawn->base.frontend = &crypto_ahash_type;
+	return crypto_grab_spawn(&spawn->base, inst, name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_grab_ahash);
 
 struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, u32 type,
 					u32 mask)
@@ -598,9 +616,9 @@ int crypto_register_ahash(struct ahash_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_register_ahash);
 
-int crypto_unregister_ahash(struct ahash_alg *alg)
+void crypto_unregister_ahash(struct ahash_alg *alg)
 {
-	return crypto_unregister_alg(&alg->halg.base);
+	crypto_unregister_alg(&alg->halg.base);
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_ahash);
 
@@ -638,6 +656,9 @@ int ahash_register_instance(struct crypto_template *tmpl,
 {
 	int err;
 
+	if (WARN_ON(!inst->free))
+		return -EINVAL;
+
 	err = ahash_prepare_alg(&inst->alg);
 	if (err)
 		return err;
@@ -646,31 +667,6 @@ int ahash_register_instance(struct crypto_template *tmpl,
 }
 EXPORT_SYMBOL_GPL(ahash_register_instance);
 
-void ahash_free_instance(struct crypto_instance *inst)
-{
-	crypto_drop_spawn(crypto_instance_ctx(inst));
-	kfree(ahash_instance(inst));
-}
-EXPORT_SYMBOL_GPL(ahash_free_instance);
-
-int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn,
-			    struct hash_alg_common *alg,
-			    struct crypto_instance *inst)
-{
-	return crypto_init_spawn2(&spawn->base, &alg->base, inst,
-				  &crypto_ahash_type);
-}
-EXPORT_SYMBOL_GPL(crypto_init_ahash_spawn);
-
-struct hash_alg_common *ahash_attr_alg(struct rtattr *rta, u32 type, u32 mask)
-{
-	struct crypto_alg *alg;
-
-	alg = crypto_attr_alg2(rta, &crypto_ahash_type, type, mask);
-	return IS_ERR(alg) ? ERR_CAST(alg) : __crypto_hash_alg_common(alg);
-}
-EXPORT_SYMBOL_GPL(ahash_attr_alg);
-
 bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg)
 {
 	struct crypto_alg *alg = &halg->base;
diff --git a/crypto/akcipher.c b/crypto/akcipher.c
index 7d5cf4939423..f866085c8a4a 100644
--- a/crypto/akcipher.c
+++ b/crypto/akcipher.c
@@ -90,11 +90,12 @@ static const struct crypto_type crypto_akcipher_type = {
 	.tfmsize = offsetof(struct crypto_akcipher, base),
 };
 
-int crypto_grab_akcipher(struct crypto_akcipher_spawn *spawn, const char *name,
-			 u32 type, u32 mask)
+int crypto_grab_akcipher(struct crypto_akcipher_spawn *spawn,
+			 struct crypto_instance *inst,
+			 const char *name, u32 type, u32 mask)
 {
 	spawn->base.frontend = &crypto_akcipher_type;
-	return crypto_grab_spawn(&spawn->base, name, type, mask);
+	return crypto_grab_spawn(&spawn->base, inst, name, type, mask);
 }
 EXPORT_SYMBOL_GPL(crypto_grab_akcipher);
 
@@ -146,6 +147,8 @@ EXPORT_SYMBOL_GPL(crypto_unregister_akcipher);
 int akcipher_register_instance(struct crypto_template *tmpl,
 			       struct akcipher_instance *inst)
 {
+	if (WARN_ON(!inst->free))
+		return -EINVAL;
 	akcipher_prepare_alg(&inst->alg);
 	return crypto_register_instance(tmpl, akcipher_crypto_instance(inst));
 }
diff --git a/crypto/algapi.c b/crypto/algapi.c
index de30ddc952d8..69605e21af92 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -65,11 +65,6 @@ static int crypto_check_alg(struct crypto_alg *alg)
 
 static void crypto_free_instance(struct crypto_instance *inst)
 {
-	if (!inst->alg.cra_type->free) {
-		inst->tmpl->free(inst);
-		return;
-	}
-
 	inst->alg.cra_type->free(inst);
 }
 
@@ -82,6 +77,15 @@ static void crypto_destroy_instance(struct crypto_alg *alg)
 	crypto_tmpl_put(tmpl);
 }
 
+/*
+ * This function adds a spawn to the list secondary_spawns which
+ * will be used at the end of crypto_remove_spawns to unregister
+ * instances, unless the spawn happens to be one that is depended
+ * on by the new algorithm (nalg in crypto_remove_spawns).
+ *
+ * This function is also responsible for resurrecting any algorithms
+ * in the dependency chain of nalg by unsetting n->dead.
+ */
 static struct list_head *crypto_more_spawns(struct crypto_alg *alg,
 					    struct list_head *stack,
 					    struct list_head *top,
@@ -93,15 +97,17 @@ static struct list_head *crypto_more_spawns(struct crypto_alg *alg,
 	if (!spawn)
 		return NULL;
 
-	n = list_next_entry(spawn, list);
+	n = list_prev_entry(spawn, list);
+	list_move(&spawn->list, secondary_spawns);
 
-	if (spawn->alg && &n->list != stack && !n->alg)
-		n->alg = (n->list.next == stack) ? alg :
-			 &list_next_entry(n, list)->inst->alg;
+	if (list_is_last(&n->list, stack))
+		return top;
 
-	list_move(&spawn->list, secondary_spawns);
+	n = list_next_entry(n, list);
+	if (!spawn->dead)
+		n->dead = false;
 
-	return &n->list == stack ? top : &n->inst->alg.cra_users;
+	return &n->inst->alg.cra_users;
 }
 
 static void crypto_remove_instance(struct crypto_instance *inst,
@@ -113,8 +119,6 @@ static void crypto_remove_instance(struct crypto_instance *inst,
 		return;
 
 	inst->alg.cra_flags |= CRYPTO_ALG_DEAD;
-	if (hlist_unhashed(&inst->list))
-		return;
 
 	if (!tmpl || !crypto_tmpl_get(tmpl))
 		return;
@@ -126,6 +130,12 @@ static void crypto_remove_instance(struct crypto_instance *inst,
 	BUG_ON(!list_empty(&inst->alg.cra_users));
 }
 
+/*
+ * Given an algorithm alg, remove all algorithms that depend on it
+ * through spawns.  If nalg is not null, then exempt any algorithms
+ * that is depended on by nalg.  This is useful when nalg itself
+ * depends on alg.
+ */
 void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
 			  struct crypto_alg *nalg)
 {
@@ -144,6 +154,11 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
 		list_move(&spawn->list, &top);
 	}
 
+	/*
+	 * Perform a depth-first walk starting from alg through
+	 * the cra_users tree.  The list stack records the path
+	 * from alg to the current spawn.
+	 */
 	spawns = &top;
 	do {
 		while (!list_empty(spawns)) {
@@ -153,17 +168,26 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
 						 list);
 			inst = spawn->inst;
 
-			BUG_ON(&inst->alg == alg);
-
 			list_move(&spawn->list, &stack);
+			spawn->dead = !spawn->registered || &inst->alg != nalg;
+
+			if (!spawn->registered)
+				break;
+
+			BUG_ON(&inst->alg == alg);
 
 			if (&inst->alg == nalg)
 				break;
 
-			spawn->alg = NULL;
 			spawns = &inst->alg.cra_users;
 
 			/*
+			 * Even if spawn->registered is true, the
+			 * instance itself may still be unregistered.
+			 * This is because it may have failed during
+			 * registration.  Therefore we still need to
+			 * make the following test.
+			 *
 			 * We may encounter an unregistered instance here, since
 			 * an instance's spawns are set up prior to the instance
 			 * being registered.  An unregistered instance will have
@@ -178,10 +202,15 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
 	} while ((spawns = crypto_more_spawns(alg, &stack, &top,
 					      &secondary_spawns)));
 
+	/*
+	 * Remove all instances that are marked as dead.  Also
+	 * complete the resurrection of the others by moving them
+	 * back to the cra_users list.
+	 */
 	list_for_each_entry_safe(spawn, n, &secondary_spawns, list) {
-		if (spawn->alg)
+		if (!spawn->dead)
 			list_move(&spawn->list, &spawn->alg->cra_users);
-		else
+		else if (spawn->registered)
 			crypto_remove_instance(spawn->inst, list);
 	}
 }
@@ -257,6 +286,7 @@ void crypto_alg_tested(const char *name, int err)
 	struct crypto_alg *alg;
 	struct crypto_alg *q;
 	LIST_HEAD(list);
+	bool best;
 
 	down_write(&crypto_alg_sem);
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
@@ -280,6 +310,21 @@ found:
 
 	alg->cra_flags |= CRYPTO_ALG_TESTED;
 
+	/* Only satisfy larval waiters if we are the best. */
+	best = true;
+	list_for_each_entry(q, &crypto_alg_list, cra_list) {
+		if (crypto_is_moribund(q) || !crypto_is_larval(q))
+			continue;
+
+		if (strcmp(alg->cra_name, q->cra_name))
+			continue;
+
+		if (q->cra_priority > alg->cra_priority) {
+			best = false;
+			break;
+		}
+	}
+
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
 		if (q == alg)
 			continue;
@@ -303,10 +348,12 @@ found:
 				continue;
 			if ((q->cra_flags ^ alg->cra_flags) & larval->mask)
 				continue;
-			if (!crypto_mod_get(alg))
-				continue;
 
-			larval->adult = alg;
+			if (best && crypto_mod_get(alg))
+				larval->adult = alg;
+			else
+				larval->adult = ERR_PTR(-EAGAIN);
+
 			continue;
 		}
 
@@ -397,7 +444,7 @@ static int crypto_remove_alg(struct crypto_alg *alg, struct list_head *list)
 	return 0;
 }
 
-int crypto_unregister_alg(struct crypto_alg *alg)
+void crypto_unregister_alg(struct crypto_alg *alg)
 {
 	int ret;
 	LIST_HEAD(list);
@@ -406,15 +453,14 @@ int crypto_unregister_alg(struct crypto_alg *alg)
 	ret = crypto_remove_alg(alg, &list);
 	up_write(&crypto_alg_sem);
 
-	if (ret)
-		return ret;
+	if (WARN(ret, "Algorithm %s is not registered", alg->cra_driver_name))
+		return;
 
 	BUG_ON(refcount_read(&alg->cra_refcnt) != 1);
 	if (alg->cra_destroy)
 		alg->cra_destroy(alg);
 
 	crypto_remove_final(&list);
-	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_alg);
 
@@ -438,18 +484,12 @@ err:
 }
 EXPORT_SYMBOL_GPL(crypto_register_algs);
 
-int crypto_unregister_algs(struct crypto_alg *algs, int count)
+void crypto_unregister_algs(struct crypto_alg *algs, int count)
 {
-	int i, ret;
-
-	for (i = 0; i < count; i++) {
-		ret = crypto_unregister_alg(&algs[i]);
-		if (ret)
-			pr_err("Failed to unregister %s %s: %d\n",
-			       algs[i].cra_driver_name, algs[i].cra_name, ret);
-	}
+	int i;
 
-	return 0;
+	for (i = 0; i < count; i++)
+		crypto_unregister_alg(&algs[i]);
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_algs);
 
@@ -561,6 +601,7 @@ int crypto_register_instance(struct crypto_template *tmpl,
 			     struct crypto_instance *inst)
 {
 	struct crypto_larval *larval;
+	struct crypto_spawn *spawn;
 	int err;
 
 	err = crypto_check_alg(&inst->alg);
@@ -572,6 +613,22 @@ int crypto_register_instance(struct crypto_template *tmpl,
 
 	down_write(&crypto_alg_sem);
 
+	larval = ERR_PTR(-EAGAIN);
+	for (spawn = inst->spawns; spawn;) {
+		struct crypto_spawn *next;
+
+		if (spawn->dead)
+			goto unlock;
+
+		next = spawn->next;
+		spawn->inst = inst;
+		spawn->registered = true;
+
+		crypto_mod_put(spawn->alg);
+
+		spawn = next;
+	}
+
 	larval = __crypto_register_alg(&inst->alg);
 	if (IS_ERR(larval))
 		goto unlock;
@@ -594,7 +651,7 @@ err:
 }
 EXPORT_SYMBOL_GPL(crypto_register_instance);
 
-int crypto_unregister_instance(struct crypto_instance *inst)
+void crypto_unregister_instance(struct crypto_instance *inst)
 {
 	LIST_HEAD(list);
 
@@ -606,97 +663,70 @@ int crypto_unregister_instance(struct crypto_instance *inst)
 	up_write(&crypto_alg_sem);
 
 	crypto_remove_final(&list);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_instance);
 
-int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
-		      struct crypto_instance *inst, u32 mask)
+int crypto_grab_spawn(struct crypto_spawn *spawn, struct crypto_instance *inst,
+		      const char *name, u32 type, u32 mask)
 {
+	struct crypto_alg *alg;
 	int err = -EAGAIN;
 
 	if (WARN_ON_ONCE(inst == NULL))
 		return -EINVAL;
 
-	spawn->inst = inst;
-	spawn->mask = mask;
+	/* Allow the result of crypto_attr_alg_name() to be passed directly */
+	if (IS_ERR(name))
+		return PTR_ERR(name);
+
+	alg = crypto_find_alg(name, spawn->frontend, type, mask);
+	if (IS_ERR(alg))
+		return PTR_ERR(alg);
 
 	down_write(&crypto_alg_sem);
 	if (!crypto_is_moribund(alg)) {
 		list_add(&spawn->list, &alg->cra_users);
 		spawn->alg = alg;
+		spawn->mask = mask;
+		spawn->next = inst->spawns;
+		inst->spawns = spawn;
 		err = 0;
 	}
 	up_write(&crypto_alg_sem);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(crypto_init_spawn);
-
-int crypto_init_spawn2(struct crypto_spawn *spawn, struct crypto_alg *alg,
-		       struct crypto_instance *inst,
-		       const struct crypto_type *frontend)
-{
-	int err = -EINVAL;
-
-	if ((alg->cra_flags ^ frontend->type) & frontend->maskset)
-		goto out;
-
-	spawn->frontend = frontend;
-	err = crypto_init_spawn(spawn, alg, inst, frontend->maskset);
-
-out:
-	return err;
-}
-EXPORT_SYMBOL_GPL(crypto_init_spawn2);
-
-int crypto_grab_spawn(struct crypto_spawn *spawn, const char *name,
-		      u32 type, u32 mask)
-{
-	struct crypto_alg *alg;
-	int err;
-
-	alg = crypto_find_alg(name, spawn->frontend, type, mask);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
-
-	err = crypto_init_spawn(spawn, alg, spawn->inst, mask);
-	crypto_mod_put(alg);
+	if (err)
+		crypto_mod_put(alg);
 	return err;
 }
 EXPORT_SYMBOL_GPL(crypto_grab_spawn);
 
 void crypto_drop_spawn(struct crypto_spawn *spawn)
 {
-	if (!spawn->alg)
+	if (!spawn->alg) /* not yet initialized? */
 		return;
 
 	down_write(&crypto_alg_sem);
-	list_del(&spawn->list);
+	if (!spawn->dead)
+		list_del(&spawn->list);
 	up_write(&crypto_alg_sem);
+
+	if (!spawn->registered)
+		crypto_mod_put(spawn->alg);
 }
 EXPORT_SYMBOL_GPL(crypto_drop_spawn);
 
 static struct crypto_alg *crypto_spawn_alg(struct crypto_spawn *spawn)
 {
 	struct crypto_alg *alg;
-	struct crypto_alg *alg2;
 
 	down_read(&crypto_alg_sem);
 	alg = spawn->alg;
-	alg2 = alg;
-	if (alg2)
-		alg2 = crypto_mod_get(alg2);
-	up_read(&crypto_alg_sem);
-
-	if (!alg2) {
-		if (alg)
-			crypto_shoot_alg(alg);
-		return ERR_PTR(-EAGAIN);
+	if (!spawn->dead && !crypto_mod_get(alg)) {
+		alg->cra_flags |= CRYPTO_ALG_DYING;
+		alg = NULL;
 	}
+	up_read(&crypto_alg_sem);
 
-	return alg;
+	return alg ?: ERR_PTR(-EAGAIN);
 }
 
 struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
@@ -809,20 +839,6 @@ const char *crypto_attr_alg_name(struct rtattr *rta)
 }
 EXPORT_SYMBOL_GPL(crypto_attr_alg_name);
 
-struct crypto_alg *crypto_attr_alg2(struct rtattr *rta,
-				    const struct crypto_type *frontend,
-				    u32 type, u32 mask)
-{
-	const char *name;
-
-	name = crypto_attr_alg_name(rta);
-	if (IS_ERR(name))
-		return ERR_CAST(name);
-
-	return crypto_find_alg(name, frontend, type, mask);
-}
-EXPORT_SYMBOL_GPL(crypto_attr_alg2);
-
 int crypto_attr_u32(struct rtattr *rta, u32 *num)
 {
 	struct crypto_attr_u32 *nu32;
@@ -856,32 +872,6 @@ int crypto_inst_setname(struct crypto_instance *inst, const char *name,
 }
 EXPORT_SYMBOL_GPL(crypto_inst_setname);
 
-void *crypto_alloc_instance(const char *name, struct crypto_alg *alg,
-			    unsigned int head)
-{
-	struct crypto_instance *inst;
-	char *p;
-	int err;
-
-	p = kzalloc(head + sizeof(*inst) + sizeof(struct crypto_spawn),
-		    GFP_KERNEL);
-	if (!p)
-		return ERR_PTR(-ENOMEM);
-
-	inst = (void *)(p + head);
-
-	err = crypto_inst_setname(inst, name, alg);
-	if (err)
-		goto err_free_inst;
-
-	return p;
-
-err_free_inst:
-	kfree(p);
-	return ERR_PTR(err);
-}
-EXPORT_SYMBOL_GPL(crypto_alloc_instance);
-
 void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen)
 {
 	INIT_LIST_HEAD(&queue->list);
@@ -1052,32 +1042,6 @@ void crypto_stats_get(struct crypto_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_stats_get);
 
-void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret,
-				     struct crypto_alg *alg)
-{
-	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
-		atomic64_inc(&alg->stats.cipher.err_cnt);
-	} else {
-		atomic64_inc(&alg->stats.cipher.encrypt_cnt);
-		atomic64_add(nbytes, &alg->stats.cipher.encrypt_tlen);
-	}
-	crypto_alg_put(alg);
-}
-EXPORT_SYMBOL_GPL(crypto_stats_ablkcipher_encrypt);
-
-void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret,
-				     struct crypto_alg *alg)
-{
-	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
-		atomic64_inc(&alg->stats.cipher.err_cnt);
-	} else {
-		atomic64_inc(&alg->stats.cipher.decrypt_cnt);
-		atomic64_add(nbytes, &alg->stats.cipher.decrypt_tlen);
-	}
-	crypto_alg_put(alg);
-}
-EXPORT_SYMBOL_GPL(crypto_stats_ablkcipher_decrypt);
-
 void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg,
 			       int ret)
 {
diff --git a/crypto/algboss.c b/crypto/algboss.c
index a62149d6c839..535f1f87e6c1 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -58,7 +58,6 @@ static int cryptomgr_probe(void *data)
 {
 	struct cryptomgr_param *param = data;
 	struct crypto_template *tmpl;
-	struct crypto_instance *inst;
 	int err;
 
 	tmpl = crypto_lookup_template(param->template);
@@ -66,16 +65,7 @@ static int cryptomgr_probe(void *data)
 		goto out;
 
 	do {
-		if (tmpl->create) {
-			err = tmpl->create(tmpl, param->tb);
-			continue;
-		}
-
-		inst = tmpl->alloc(param->tb);
-		if (IS_ERR(inst))
-			err = PTR_ERR(inst);
-		else if ((err = crypto_register_instance(tmpl, inst)))
-			tmpl->free(inst);
+		err = tmpl->create(tmpl, param->tb);
 	} while (err == -EAGAIN && !signal_pending(current));
 
 	crypto_tmpl_put(tmpl);
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index c1601edd70e3..e2c8ab408bed 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -56,7 +56,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct alg_sock *pask = alg_sk(psk);
 	struct af_alg_ctx *ctx = ask->private;
 	struct crypto_skcipher *tfm = pask->private;
-	unsigned int bs = crypto_skcipher_blocksize(tfm);
+	unsigned int bs = crypto_skcipher_chunksize(tfm);
 	struct af_alg_async_req *areq;
 	int err = 0;
 	size_t len = 0;
diff --git a/crypto/anubis.c b/crypto/anubis.c
index f9ce78fde6ee..5da0241ef453 100644
--- a/crypto/anubis.c
+++ b/crypto/anubis.c
@@ -464,7 +464,6 @@ static int anubis_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 {
 	struct anubis_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __be32 *key = (const __be32 *)in_key;
-	u32 *flags = &tfm->crt_flags;
 	int N, R, i, r;
 	u32 kappa[ANUBIS_MAX_N];
 	u32 inter[ANUBIS_MAX_N];
@@ -474,7 +473,6 @@ static int anubis_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 		case 32: case 36: case 40:
 			break;
 		default:
-			*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 			return -EINVAL;
 	}
 
diff --git a/crypto/api.c b/crypto/api.c
index d8ba54142620..7d71a9b10e5f 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -97,7 +97,7 @@ static void crypto_larval_destroy(struct crypto_alg *alg)
 	struct crypto_larval *larval = (void *)alg;
 
 	BUG_ON(!crypto_is_larval(alg));
-	if (larval->adult)
+	if (!IS_ERR_OR_NULL(larval->adult))
 		crypto_mod_put(larval->adult);
 	kfree(larval);
 }
@@ -178,6 +178,8 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 		alg = ERR_PTR(-ETIMEDOUT);
 	else if (!alg)
 		alg = ERR_PTR(-ENOENT);
+	else if (IS_ERR(alg))
+		;
 	else if (crypto_is_test_larval(larval) &&
 		 !(alg->cra_flags & CRYPTO_ALG_TESTED))
 		alg = ERR_PTR(-EAGAIN);
@@ -295,20 +297,7 @@ static int crypto_init_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 
 	if (type_obj)
 		return type_obj->init(tfm, type, mask);
-
-	switch (crypto_tfm_alg_type(tfm)) {
-	case CRYPTO_ALG_TYPE_CIPHER:
-		return crypto_init_cipher_ops(tfm);
-
-	case CRYPTO_ALG_TYPE_COMPRESS:
-		return crypto_init_compress_ops(tfm);
-
-	default:
-		break;
-	}
-
-	BUG();
-	return -EINVAL;
+	return 0;
 }
 
 static void crypto_exit_ops(struct crypto_tfm *tfm)
@@ -344,13 +333,12 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask)
 	return len;
 }
 
-void crypto_shoot_alg(struct crypto_alg *alg)
+static void crypto_shoot_alg(struct crypto_alg *alg)
 {
 	down_write(&crypto_alg_sem);
 	alg->cra_flags |= CRYPTO_ALG_DYING;
 	up_write(&crypto_alg_sem);
 }
-EXPORT_SYMBOL_GPL(crypto_shoot_alg);
 
 struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
 				      u32 mask)
@@ -406,7 +394,7 @@ EXPORT_SYMBOL_GPL(__crypto_alloc_tfm);
  *
  *	The returned transform is of a non-determinate type.  Most people
  *	should use one of the more specific allocation functions such as
- *	crypto_alloc_blkcipher.
+ *	crypto_alloc_skcipher().
  *
  *	In case of error the return value is an error pointer.
  */
@@ -516,7 +504,7 @@ EXPORT_SYMBOL_GPL(crypto_find_alg);
  *
  *	The returned transform is of a non-determinate type.  Most people
  *	should use one of the more specific allocation functions such as
- *	crypto_alloc_blkcipher.
+ *	crypto_alloc_skcipher().
  *
  *	In case of error the return value is an error pointer.
  */
@@ -608,3 +596,4 @@ EXPORT_SYMBOL_GPL(crypto_req_done);
 
 MODULE_DESCRIPTION("Cryptographic core API");
 MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: cryptomgr");
diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index 76d2ce3a1b5b..378b18b9bc34 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -13,7 +13,7 @@
 #include <crypto/sha.h>
 #include <asm/unaligned.h>
 #include <keys/asymmetric-subtype.h>
-#include <keys/trusted.h>
+#include <keys/trusted_tpm.h>
 #include <crypto/asym_tpm_subtype.h>
 #include <crypto/public_key.h>
 
@@ -21,10 +21,6 @@
 #define TPM_ORD_LOADKEY2	65
 #define TPM_ORD_UNBIND		30
 #define TPM_ORD_SIGN		60
-#define TPM_LOADKEY2_SIZE		59
-#define TPM_FLUSHSPECIFIC_SIZE		18
-#define TPM_UNBIND_SIZE			63
-#define TPM_SIGN_SIZE			63
 
 #define TPM_RT_KEY                      0x00000001
 
@@ -68,16 +64,13 @@ static int tpm_loadkey2(struct tpm_buf *tb,
 		return ret;
 
 	/* build the request buffer */
-	INIT_BUF(tb);
-	store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
-	store32(tb, TPM_LOADKEY2_SIZE + keybloblen);
-	store32(tb, TPM_ORD_LOADKEY2);
-	store32(tb, keyhandle);
-	storebytes(tb, keyblob, keybloblen);
-	store32(tb, authhandle);
-	storebytes(tb, nonceodd, TPM_NONCE_SIZE);
-	store8(tb, cont);
-	storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+	tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_LOADKEY2);
+	tpm_buf_append_u32(tb, keyhandle);
+	tpm_buf_append(tb, keyblob, keybloblen);
+	tpm_buf_append_u32(tb, authhandle);
+	tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
+	tpm_buf_append_u8(tb, cont);
+	tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
 
 	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
 	if (ret < 0) {
@@ -101,12 +94,9 @@ static int tpm_loadkey2(struct tpm_buf *tb,
  */
 static int tpm_flushspecific(struct tpm_buf *tb, uint32_t handle)
 {
-	INIT_BUF(tb);
-	store16(tb, TPM_TAG_RQU_COMMAND);
-	store32(tb, TPM_FLUSHSPECIFIC_SIZE);
-	store32(tb, TPM_ORD_FLUSHSPECIFIC);
-	store32(tb, handle);
-	store32(tb, TPM_RT_KEY);
+	tpm_buf_reset(tb, TPM_TAG_RQU_COMMAND, TPM_ORD_FLUSHSPECIFIC);
+	tpm_buf_append_u32(tb, handle);
+	tpm_buf_append_u32(tb, TPM_RT_KEY);
 
 	return trusted_tpm_send(tb->data, MAX_BUF_SIZE);
 }
@@ -155,17 +145,14 @@ static int tpm_unbind(struct tpm_buf *tb,
 		return ret;
 
 	/* build the request buffer */
-	INIT_BUF(tb);
-	store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
-	store32(tb, TPM_UNBIND_SIZE + bloblen);
-	store32(tb, TPM_ORD_UNBIND);
-	store32(tb, keyhandle);
-	store32(tb, bloblen);
-	storebytes(tb, blob, bloblen);
-	store32(tb, authhandle);
-	storebytes(tb, nonceodd, TPM_NONCE_SIZE);
-	store8(tb, cont);
-	storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+	tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_UNBIND);
+	tpm_buf_append_u32(tb, keyhandle);
+	tpm_buf_append_u32(tb, bloblen);
+	tpm_buf_append(tb, blob, bloblen);
+	tpm_buf_append_u32(tb, authhandle);
+	tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
+	tpm_buf_append_u8(tb, cont);
+	tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
 
 	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
 	if (ret < 0) {
@@ -241,17 +228,14 @@ static int tpm_sign(struct tpm_buf *tb,
 		return ret;
 
 	/* build the request buffer */
-	INIT_BUF(tb);
-	store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
-	store32(tb, TPM_SIGN_SIZE + bloblen);
-	store32(tb, TPM_ORD_SIGN);
-	store32(tb, keyhandle);
-	store32(tb, bloblen);
-	storebytes(tb, blob, bloblen);
-	store32(tb, authhandle);
-	storebytes(tb, nonceodd, TPM_NONCE_SIZE);
-	store8(tb, cont);
-	storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+	tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_SIGN);
+	tpm_buf_append_u32(tb, keyhandle);
+	tpm_buf_append_u32(tb, bloblen);
+	tpm_buf_append(tb, blob, bloblen);
+	tpm_buf_append_u32(tb, authhandle);
+	tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
+	tpm_buf_append_u8(tb, cont);
+	tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
 
 	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
 	if (ret < 0) {
@@ -486,6 +470,7 @@ static int tpm_key_encrypt(struct tpm_key *tk,
 	if (ret < 0)
 		goto error_free_tfm;
 
+	ret = -ENOMEM;
 	req = akcipher_request_alloc(tfm, GFP_KERNEL);
 	if (!req)
 		goto error_free_tfm;
@@ -519,7 +504,7 @@ static int tpm_key_decrypt(struct tpm_key *tk,
 			   struct kernel_pkey_params *params,
 			   const void *in, void *out)
 {
-	struct tpm_buf *tb;
+	struct tpm_buf tb;
 	uint32_t keyhandle;
 	uint8_t srkauth[SHA1_DIGEST_SIZE];
 	uint8_t keyauth[SHA1_DIGEST_SIZE];
@@ -533,14 +518,14 @@ static int tpm_key_decrypt(struct tpm_key *tk,
 	if (strcmp(params->encoding, "pkcs1"))
 		return -ENOPKG;
 
-	tb = kzalloc(sizeof(*tb), GFP_KERNEL);
-	if (!tb)
-		return -ENOMEM;
+	r = tpm_buf_init(&tb, 0, 0);
+	if (r)
+		return r;
 
 	/* TODO: Handle a non-all zero SRK authorization */
 	memset(srkauth, 0, sizeof(srkauth));
 
-	r = tpm_loadkey2(tb, SRKHANDLE, srkauth,
+	r = tpm_loadkey2(&tb, SRKHANDLE, srkauth,
 				tk->blob, tk->blob_len, &keyhandle);
 	if (r < 0) {
 		pr_devel("loadkey2 failed (%d)\n", r);
@@ -550,16 +535,16 @@ static int tpm_key_decrypt(struct tpm_key *tk,
 	/* TODO: Handle a non-all zero key authorization */
 	memset(keyauth, 0, sizeof(keyauth));
 
-	r = tpm_unbind(tb, keyhandle, keyauth,
+	r = tpm_unbind(&tb, keyhandle, keyauth,
 		       in, params->in_len, out, params->out_len);
 	if (r < 0)
 		pr_devel("tpm_unbind failed (%d)\n", r);
 
-	if (tpm_flushspecific(tb, keyhandle) < 0)
+	if (tpm_flushspecific(&tb, keyhandle) < 0)
 		pr_devel("flushspecific failed (%d)\n", r);
 
 error:
-	kzfree(tb);
+	tpm_buf_destroy(&tb);
 	pr_devel("<==%s() = %d\n", __func__, r);
 	return r;
 }
@@ -643,7 +628,7 @@ static int tpm_key_sign(struct tpm_key *tk,
 			struct kernel_pkey_params *params,
 			const void *in, void *out)
 {
-	struct tpm_buf *tb;
+	struct tpm_buf tb;
 	uint32_t keyhandle;
 	uint8_t srkauth[SHA1_DIGEST_SIZE];
 	uint8_t keyauth[SHA1_DIGEST_SIZE];
@@ -681,15 +666,14 @@ static int tpm_key_sign(struct tpm_key *tk,
 		goto error_free_asn1_wrapped;
 	}
 
-	r = -ENOMEM;
-	tb = kzalloc(sizeof(*tb), GFP_KERNEL);
-	if (!tb)
+	r = tpm_buf_init(&tb, 0, 0);
+	if (r)
 		goto error_free_asn1_wrapped;
 
 	/* TODO: Handle a non-all zero SRK authorization */
 	memset(srkauth, 0, sizeof(srkauth));
 
-	r = tpm_loadkey2(tb, SRKHANDLE, srkauth,
+	r = tpm_loadkey2(&tb, SRKHANDLE, srkauth,
 			 tk->blob, tk->blob_len, &keyhandle);
 	if (r < 0) {
 		pr_devel("loadkey2 failed (%d)\n", r);
@@ -699,15 +683,15 @@ static int tpm_key_sign(struct tpm_key *tk,
 	/* TODO: Handle a non-all zero key authorization */
 	memset(keyauth, 0, sizeof(keyauth));
 
-	r = tpm_sign(tb, keyhandle, keyauth, in, in_len, out, params->out_len);
+	r = tpm_sign(&tb, keyhandle, keyauth, in, in_len, out, params->out_len);
 	if (r < 0)
 		pr_devel("tpm_sign failed (%d)\n", r);
 
-	if (tpm_flushspecific(tb, keyhandle) < 0)
+	if (tpm_flushspecific(&tb, keyhandle) < 0)
 		pr_devel("flushspecific failed (%d)\n", r);
 
 error_free_tb:
-	kzfree(tb);
+	tpm_buf_destroy(&tb);
 error_free_asn1_wrapped:
 	kfree(asn1_wrapped);
 	pr_devel("<==%s() = %d\n", __func__, r);
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 364b9df9d631..d7f43d4ea925 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -184,6 +184,7 @@ static int software_key_eds_op(struct kernel_pkey_params *params,
 	if (IS_ERR(tfm))
 		return PTR_ERR(tfm);
 
+	ret = -ENOMEM;
 	req = akcipher_request_alloc(tfm, GFP_KERNEL);
 	if (!req)
 		goto error_free_tfm;
diff --git a/crypto/authenc.c b/crypto/authenc.c
index 3f0ed9402582..775e7138fd10 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -91,15 +91,12 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 	int err = -EINVAL;
 
 	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
-		goto badkey;
+		goto out;
 
 	crypto_ahash_clear_flags(auth, CRYPTO_TFM_REQ_MASK);
 	crypto_ahash_set_flags(auth, crypto_aead_get_flags(authenc) &
 				    CRYPTO_TFM_REQ_MASK);
 	err = crypto_ahash_setkey(auth, keys.authkey, keys.authkeylen);
-	crypto_aead_set_flags(authenc, crypto_ahash_get_flags(auth) &
-				       CRYPTO_TFM_RES_MASK);
-
 	if (err)
 		goto out;
 
@@ -107,16 +104,9 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 	crypto_skcipher_set_flags(enc, crypto_aead_get_flags(authenc) &
 				       CRYPTO_TFM_REQ_MASK);
 	err = crypto_skcipher_setkey(enc, keys.enckey, keys.enckeylen);
-	crypto_aead_set_flags(authenc, crypto_skcipher_get_flags(enc) &
-				       CRYPTO_TFM_RES_MASK);
-
 out:
 	memzero_explicit(&keys, sizeof(keys));
 	return err;
-
-badkey:
-	crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	goto out;
 }
 
 static void authenc_geniv_ahash_done(struct crypto_async_request *areq, int err)
@@ -383,12 +373,12 @@ static int crypto_authenc_create(struct crypto_template *tmpl,
 				 struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
+	u32 mask;
 	struct aead_instance *inst;
+	struct authenc_instance_ctx *ctx;
 	struct hash_alg_common *auth;
 	struct crypto_alg *auth_base;
 	struct skcipher_alg *enc;
-	struct authenc_instance_ctx *ctx;
-	const char *enc_name;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -398,38 +388,24 @@ static int crypto_authenc_create(struct crypto_template *tmpl,
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return -EINVAL;
 
-	auth = ahash_attr_alg(tb[1], CRYPTO_ALG_TYPE_HASH,
-			      CRYPTO_ALG_TYPE_AHASH_MASK |
-			      crypto_requires_sync(algt->type, algt->mask));
-	if (IS_ERR(auth))
-		return PTR_ERR(auth);
-
-	auth_base = &auth->base;
-
-	enc_name = crypto_attr_alg_name(tb[2]);
-	err = PTR_ERR(enc_name);
-	if (IS_ERR(enc_name))
-		goto out_put_auth;
+	mask = crypto_requires_sync(algt->type, algt->mask);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
-	err = -ENOMEM;
 	if (!inst)
-		goto out_put_auth;
-
+		return -ENOMEM;
 	ctx = aead_instance_ctx(inst);
 
-	err = crypto_init_ahash_spawn(&ctx->auth, auth,
-				      aead_crypto_instance(inst));
+	err = crypto_grab_ahash(&ctx->auth, aead_crypto_instance(inst),
+				crypto_attr_alg_name(tb[1]), 0, mask);
 	if (err)
 		goto err_free_inst;
+	auth = crypto_spawn_ahash_alg(&ctx->auth);
+	auth_base = &auth->base;
 
-	crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ctx->enc, enc_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	err = crypto_grab_skcipher(&ctx->enc, aead_crypto_instance(inst),
+				   crypto_attr_alg_name(tb[2]), 0, mask);
 	if (err)
-		goto err_drop_auth;
-
+		goto err_free_inst;
 	enc = crypto_spawn_skcipher_alg(&ctx->enc);
 
 	ctx->reqoff = ALIGN(2 * auth->digestsize + auth_base->cra_alignmask,
@@ -440,12 +416,12 @@ static int crypto_authenc_create(struct crypto_template *tmpl,
 		     "authenc(%s,%s)", auth_base->cra_name,
 		     enc->base.cra_name) >=
 	    CRYPTO_MAX_ALG_NAME)
-		goto err_drop_enc;
+		goto err_free_inst;
 
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "authenc(%s,%s)", auth_base->cra_driver_name,
 		     enc->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
-		goto err_drop_enc;
+		goto err_free_inst;
 
 	inst->alg.base.cra_flags = (auth_base->cra_flags |
 				    enc->base.cra_flags) & CRYPTO_ALG_ASYNC;
@@ -470,21 +446,11 @@ static int crypto_authenc_create(struct crypto_template *tmpl,
 	inst->free = crypto_authenc_free;
 
 	err = aead_register_instance(tmpl, inst);
-	if (err)
-		goto err_drop_enc;
-
-out:
-	crypto_mod_put(auth_base);
-	return err;
-
-err_drop_enc:
-	crypto_drop_skcipher(&ctx->enc);
-err_drop_auth:
-	crypto_drop_ahash(&ctx->auth);
+	if (err) {
 err_free_inst:
-	kfree(inst);
-out_put_auth:
-	goto out;
+		crypto_authenc_free(inst);
+	}
+	return err;
 }
 
 static struct crypto_template crypto_authenc_tmpl = {
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index adb7554fca29..589008146fce 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -65,15 +65,12 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 *
 	int err = -EINVAL;
 
 	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
-		goto badkey;
+		goto out;
 
 	crypto_ahash_clear_flags(auth, CRYPTO_TFM_REQ_MASK);
 	crypto_ahash_set_flags(auth, crypto_aead_get_flags(authenc_esn) &
 				     CRYPTO_TFM_REQ_MASK);
 	err = crypto_ahash_setkey(auth, keys.authkey, keys.authkeylen);
-	crypto_aead_set_flags(authenc_esn, crypto_ahash_get_flags(auth) &
-					   CRYPTO_TFM_RES_MASK);
-
 	if (err)
 		goto out;
 
@@ -81,16 +78,9 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 *
 	crypto_skcipher_set_flags(enc, crypto_aead_get_flags(authenc_esn) &
 					 CRYPTO_TFM_REQ_MASK);
 	err = crypto_skcipher_setkey(enc, keys.enckey, keys.enckeylen);
-	crypto_aead_set_flags(authenc_esn, crypto_skcipher_get_flags(enc) &
-					   CRYPTO_TFM_RES_MASK);
-
 out:
 	memzero_explicit(&keys, sizeof(keys));
 	return err;
-
-badkey:
-	crypto_aead_set_flags(authenc_esn, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	goto out;
 }
 
 static int crypto_authenc_esn_genicv_tail(struct aead_request *req,
@@ -401,12 +391,12 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl,
 				     struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
+	u32 mask;
 	struct aead_instance *inst;
+	struct authenc_esn_instance_ctx *ctx;
 	struct hash_alg_common *auth;
 	struct crypto_alg *auth_base;
 	struct skcipher_alg *enc;
-	struct authenc_esn_instance_ctx *ctx;
-	const char *enc_name;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -416,50 +406,36 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl,
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return -EINVAL;
 
-	auth = ahash_attr_alg(tb[1], CRYPTO_ALG_TYPE_HASH,
-			      CRYPTO_ALG_TYPE_AHASH_MASK |
-			      crypto_requires_sync(algt->type, algt->mask));
-	if (IS_ERR(auth))
-		return PTR_ERR(auth);
-
-	auth_base = &auth->base;
-
-	enc_name = crypto_attr_alg_name(tb[2]);
-	err = PTR_ERR(enc_name);
-	if (IS_ERR(enc_name))
-		goto out_put_auth;
+	mask = crypto_requires_sync(algt->type, algt->mask);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
-	err = -ENOMEM;
 	if (!inst)
-		goto out_put_auth;
-
+		return -ENOMEM;
 	ctx = aead_instance_ctx(inst);
 
-	err = crypto_init_ahash_spawn(&ctx->auth, auth,
-				      aead_crypto_instance(inst));
+	err = crypto_grab_ahash(&ctx->auth, aead_crypto_instance(inst),
+				crypto_attr_alg_name(tb[1]), 0, mask);
 	if (err)
 		goto err_free_inst;
+	auth = crypto_spawn_ahash_alg(&ctx->auth);
+	auth_base = &auth->base;
 
-	crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ctx->enc, enc_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	err = crypto_grab_skcipher(&ctx->enc, aead_crypto_instance(inst),
+				   crypto_attr_alg_name(tb[2]), 0, mask);
 	if (err)
-		goto err_drop_auth;
-
+		goto err_free_inst;
 	enc = crypto_spawn_skcipher_alg(&ctx->enc);
 
 	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
 		     "authencesn(%s,%s)", auth_base->cra_name,
 		     enc->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
-		goto err_drop_enc;
+		goto err_free_inst;
 
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "authencesn(%s,%s)", auth_base->cra_driver_name,
 		     enc->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
-		goto err_drop_enc;
+		goto err_free_inst;
 
 	inst->alg.base.cra_flags = (auth_base->cra_flags |
 				    enc->base.cra_flags) & CRYPTO_ALG_ASYNC;
@@ -485,21 +461,11 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl,
 	inst->free = crypto_authenc_esn_free,
 
 	err = aead_register_instance(tmpl, inst);
-	if (err)
-		goto err_drop_enc;
-
-out:
-	crypto_mod_put(auth_base);
-	return err;
-
-err_drop_enc:
-	crypto_drop_skcipher(&ctx->enc);
-err_drop_auth:
-	crypto_drop_ahash(&ctx->auth);
+	if (err) {
 err_free_inst:
-	kfree(inst);
-out_put_auth:
-	goto out;
+		crypto_authenc_esn_free(inst);
+	}
+	return err;
 }
 
 static struct crypto_template crypto_authenc_esn_tmpl = {
diff --git a/crypto/blake2b_generic.c b/crypto/blake2b_generic.c
new file mode 100644
index 000000000000..1d262374fa4e
--- /dev/null
+++ b/crypto/blake2b_generic.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR Apache-2.0)
+/*
+ * BLAKE2b reference source code package - reference C implementations
+ *
+ * Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
+ * terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
+ * your option.  The terms of these licenses can be found at:
+ *
+ * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ * - OpenSSL license   : https://www.openssl.org/source/license.html
+ * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * More information about the BLAKE2 hash function can be found at
+ * https://blake2.net.
+ *
+ * Note: the original sources have been modified for inclusion in linux kernel
+ * in terms of coding style, using generic helpers and simplifications of error
+ * handling.
+ */
+
+#include <asm/unaligned.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <crypto/internal/hash.h>
+
+#define BLAKE2B_160_DIGEST_SIZE		(160 / 8)
+#define BLAKE2B_256_DIGEST_SIZE		(256 / 8)
+#define BLAKE2B_384_DIGEST_SIZE		(384 / 8)
+#define BLAKE2B_512_DIGEST_SIZE		(512 / 8)
+
+enum blake2b_constant {
+	BLAKE2B_BLOCKBYTES    = 128,
+	BLAKE2B_KEYBYTES      = 64,
+};
+
+struct blake2b_state {
+	u64      h[8];
+	u64      t[2];
+	u64      f[2];
+	u8       buf[BLAKE2B_BLOCKBYTES];
+	size_t   buflen;
+};
+
+static const u64 blake2b_IV[8] = {
+	0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
+	0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
+	0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+	0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+static const u8 blake2b_sigma[12][16] = {
+	{  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
+	{ 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 },
+	{ 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 },
+	{  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 },
+	{  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 },
+	{  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 },
+	{ 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 },
+	{ 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 },
+	{  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 },
+	{ 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13,  0 },
+	{  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
+	{ 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 }
+};
+
+static void blake2b_increment_counter(struct blake2b_state *S, const u64 inc)
+{
+	S->t[0] += inc;
+	S->t[1] += (S->t[0] < inc);
+}
+
+#define G(r,i,a,b,c,d)                                  \
+	do {                                            \
+		a = a + b + m[blake2b_sigma[r][2*i+0]]; \
+		d = ror64(d ^ a, 32);                   \
+		c = c + d;                              \
+		b = ror64(b ^ c, 24);                   \
+		a = a + b + m[blake2b_sigma[r][2*i+1]]; \
+		d = ror64(d ^ a, 16);                   \
+		c = c + d;                              \
+		b = ror64(b ^ c, 63);                   \
+	} while (0)
+
+#define ROUND(r)                                \
+	do {                                    \
+		G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+		G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+		G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+		G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+		G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+		G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+		G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+		G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
+	} while (0)
+
+static void blake2b_compress(struct blake2b_state *S,
+			     const u8 block[BLAKE2B_BLOCKBYTES])
+{
+	u64 m[16];
+	u64 v[16];
+	size_t i;
+
+	for (i = 0; i < 16; ++i)
+		m[i] = get_unaligned_le64(block + i * sizeof(m[i]));
+
+	for (i = 0; i < 8; ++i)
+		v[i] = S->h[i];
+
+	v[ 8] = blake2b_IV[0];
+	v[ 9] = blake2b_IV[1];
+	v[10] = blake2b_IV[2];
+	v[11] = blake2b_IV[3];
+	v[12] = blake2b_IV[4] ^ S->t[0];
+	v[13] = blake2b_IV[5] ^ S->t[1];
+	v[14] = blake2b_IV[6] ^ S->f[0];
+	v[15] = blake2b_IV[7] ^ S->f[1];
+
+	ROUND(0);
+	ROUND(1);
+	ROUND(2);
+	ROUND(3);
+	ROUND(4);
+	ROUND(5);
+	ROUND(6);
+	ROUND(7);
+	ROUND(8);
+	ROUND(9);
+	ROUND(10);
+	ROUND(11);
+
+	for (i = 0; i < 8; ++i)
+		S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+}
+
+#undef G
+#undef ROUND
+
+struct blake2b_tfm_ctx {
+	u8 key[BLAKE2B_KEYBYTES];
+	unsigned int keylen;
+};
+
+static int blake2b_setkey(struct crypto_shash *tfm, const u8 *key,
+			  unsigned int keylen)
+{
+	struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+	if (keylen == 0 || keylen > BLAKE2B_KEYBYTES)
+		return -EINVAL;
+
+	memcpy(tctx->key, key, keylen);
+	tctx->keylen = keylen;
+
+	return 0;
+}
+
+static int blake2b_init(struct shash_desc *desc)
+{
+	struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct blake2b_state *state = shash_desc_ctx(desc);
+	const int digestsize = crypto_shash_digestsize(desc->tfm);
+
+	memset(state, 0, sizeof(*state));
+	memcpy(state->h, blake2b_IV, sizeof(state->h));
+
+	/* Parameter block is all zeros except index 0, no xor for 1..7 */
+	state->h[0] ^= 0x01010000 | tctx->keylen << 8 | digestsize;
+
+	if (tctx->keylen) {
+		/*
+		 * Prefill the buffer with the key, next call to _update or
+		 * _final will process it
+		 */
+		memcpy(state->buf, tctx->key, tctx->keylen);
+		state->buflen = BLAKE2B_BLOCKBYTES;
+	}
+	return 0;
+}
+
+static int blake2b_update(struct shash_desc *desc, const u8 *in,
+			  unsigned int inlen)
+{
+	struct blake2b_state *state = shash_desc_ctx(desc);
+	const size_t left = state->buflen;
+	const size_t fill = BLAKE2B_BLOCKBYTES - left;
+
+	if (!inlen)
+		return 0;
+
+	if (inlen > fill) {
+		state->buflen = 0;
+		/* Fill buffer */
+		memcpy(state->buf + left, in, fill);
+		blake2b_increment_counter(state, BLAKE2B_BLOCKBYTES);
+		/* Compress */
+		blake2b_compress(state, state->buf);
+		in += fill;
+		inlen -= fill;
+		while (inlen > BLAKE2B_BLOCKBYTES) {
+			blake2b_increment_counter(state, BLAKE2B_BLOCKBYTES);
+			blake2b_compress(state, in);
+			in += BLAKE2B_BLOCKBYTES;
+			inlen -= BLAKE2B_BLOCKBYTES;
+		}
+	}
+	memcpy(state->buf + state->buflen, in, inlen);
+	state->buflen += inlen;
+
+	return 0;
+}
+
+static int blake2b_final(struct shash_desc *desc, u8 *out)
+{
+	struct blake2b_state *state = shash_desc_ctx(desc);
+	const int digestsize = crypto_shash_digestsize(desc->tfm);
+	size_t i;
+
+	blake2b_increment_counter(state, state->buflen);
+	/* Set last block */
+	state->f[0] = (u64)-1;
+	/* Padding */
+	memset(state->buf + state->buflen, 0, BLAKE2B_BLOCKBYTES - state->buflen);
+	blake2b_compress(state, state->buf);
+
+	/* Avoid temporary buffer and switch the internal output to LE order */
+	for (i = 0; i < ARRAY_SIZE(state->h); i++)
+		__cpu_to_le64s(&state->h[i]);
+
+	memcpy(out, state->h, digestsize);
+	return 0;
+}
+
+static struct shash_alg blake2b_algs[] = {
+	{
+		.base.cra_name		= "blake2b-160",
+		.base.cra_driver_name	= "blake2b-160-generic",
+		.base.cra_priority	= 100,
+		.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+		.base.cra_blocksize	= BLAKE2B_BLOCKBYTES,
+		.base.cra_ctxsize	= sizeof(struct blake2b_tfm_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.digestsize		= BLAKE2B_160_DIGEST_SIZE,
+		.setkey			= blake2b_setkey,
+		.init			= blake2b_init,
+		.update			= blake2b_update,
+		.final			= blake2b_final,
+		.descsize		= sizeof(struct blake2b_state),
+	}, {
+		.base.cra_name		= "blake2b-256",
+		.base.cra_driver_name	= "blake2b-256-generic",
+		.base.cra_priority	= 100,
+		.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+		.base.cra_blocksize	= BLAKE2B_BLOCKBYTES,
+		.base.cra_ctxsize	= sizeof(struct blake2b_tfm_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.digestsize		= BLAKE2B_256_DIGEST_SIZE,
+		.setkey			= blake2b_setkey,
+		.init			= blake2b_init,
+		.update			= blake2b_update,
+		.final			= blake2b_final,
+		.descsize		= sizeof(struct blake2b_state),
+	}, {
+		.base.cra_name		= "blake2b-384",
+		.base.cra_driver_name	= "blake2b-384-generic",
+		.base.cra_priority	= 100,
+		.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+		.base.cra_blocksize	= BLAKE2B_BLOCKBYTES,
+		.base.cra_ctxsize	= sizeof(struct blake2b_tfm_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.digestsize		= BLAKE2B_384_DIGEST_SIZE,
+		.setkey			= blake2b_setkey,
+		.init			= blake2b_init,
+		.update			= blake2b_update,
+		.final			= blake2b_final,
+		.descsize		= sizeof(struct blake2b_state),
+	}, {
+		.base.cra_name		= "blake2b-512",
+		.base.cra_driver_name	= "blake2b-512-generic",
+		.base.cra_priority	= 100,
+		.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+		.base.cra_blocksize	= BLAKE2B_BLOCKBYTES,
+		.base.cra_ctxsize	= sizeof(struct blake2b_tfm_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.digestsize		= BLAKE2B_512_DIGEST_SIZE,
+		.setkey			= blake2b_setkey,
+		.init			= blake2b_init,
+		.update			= blake2b_update,
+		.final			= blake2b_final,
+		.descsize		= sizeof(struct blake2b_state),
+	}
+};
+
+static int __init blake2b_mod_init(void)
+{
+	return crypto_register_shashes(blake2b_algs, ARRAY_SIZE(blake2b_algs));
+}
+
+static void __exit blake2b_mod_fini(void)
+{
+	crypto_unregister_shashes(blake2b_algs, ARRAY_SIZE(blake2b_algs));
+}
+
+subsys_initcall(blake2b_mod_init);
+module_exit(blake2b_mod_fini);
+
+MODULE_AUTHOR("David Sterba <kdave@kernel.org>");
+MODULE_DESCRIPTION("BLAKE2b generic implementation");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("blake2b-160");
+MODULE_ALIAS_CRYPTO("blake2b-160-generic");
+MODULE_ALIAS_CRYPTO("blake2b-256");
+MODULE_ALIAS_CRYPTO("blake2b-256-generic");
+MODULE_ALIAS_CRYPTO("blake2b-384");
+MODULE_ALIAS_CRYPTO("blake2b-384-generic");
+MODULE_ALIAS_CRYPTO("blake2b-512");
+MODULE_ALIAS_CRYPTO("blake2b-512-generic");
diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c
new file mode 100644
index 000000000000..005783ff45ad
--- /dev/null
+++ b/crypto/blake2s_generic.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/internal/blake2s.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/hash.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+	if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE)
+		return -EINVAL;
+
+	memcpy(tctx->key, key, keylen);
+	tctx->keylen = keylen;
+
+	return 0;
+}
+
+static int crypto_blake2s_init(struct shash_desc *desc)
+{
+	struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct blake2s_state *state = shash_desc_ctx(desc);
+	const int outlen = crypto_shash_digestsize(desc->tfm);
+
+	if (tctx->keylen)
+		blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
+	else
+		blake2s_init(state, outlen);
+
+	return 0;
+}
+
+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
+				 unsigned int inlen)
+{
+	struct blake2s_state *state = shash_desc_ctx(desc);
+	const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+
+	if (unlikely(!inlen))
+		return 0;
+	if (inlen > fill) {
+		memcpy(state->buf + state->buflen, in, fill);
+		blake2s_compress_generic(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
+		state->buflen = 0;
+		in += fill;
+		inlen -= fill;
+	}
+	if (inlen > BLAKE2S_BLOCK_SIZE) {
+		const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+		/* Hash one less (full) block than strictly possible */
+		blake2s_compress_generic(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+		in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+		inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+	}
+	memcpy(state->buf + state->buflen, in, inlen);
+	state->buflen += inlen;
+
+	return 0;
+}
+
+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
+{
+	struct blake2s_state *state = shash_desc_ctx(desc);
+
+	blake2s_set_lastblock(state);
+	memset(state->buf + state->buflen, 0,
+	       BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+	blake2s_compress_generic(state, state->buf, 1, state->buflen);
+	cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+	memcpy(out, state->h, state->outlen);
+	memzero_explicit(state, sizeof(*state));
+
+	return 0;
+}
+
+static struct shash_alg blake2s_algs[] = {{
+	.base.cra_name		= "blake2s-128",
+	.base.cra_driver_name	= "blake2s-128-generic",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_128_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}, {
+	.base.cra_name		= "blake2s-160",
+	.base.cra_driver_name	= "blake2s-160-generic",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_160_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}, {
+	.base.cra_name		= "blake2s-224",
+	.base.cra_driver_name	= "blake2s-224-generic",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_224_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}, {
+	.base.cra_name		= "blake2s-256",
+	.base.cra_driver_name	= "blake2s-256-generic",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_256_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}};
+
+static int __init blake2s_mod_init(void)
+{
+	return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+static void __exit blake2s_mod_exit(void)
+{
+	crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+subsys_initcall(blake2s_mod_init);
+module_exit(blake2s_mod_exit);
+
+MODULE_ALIAS_CRYPTO("blake2s-128");
+MODULE_ALIAS_CRYPTO("blake2s-128-generic");
+MODULE_ALIAS_CRYPTO("blake2s-160");
+MODULE_ALIAS_CRYPTO("blake2s-160-generic");
+MODULE_ALIAS_CRYPTO("blake2s-224");
+MODULE_ALIAS_CRYPTO("blake2s-224-generic");
+MODULE_ALIAS_CRYPTO("blake2s-256");
+MODULE_ALIAS_CRYPTO("blake2s-256-generic");
+MODULE_LICENSE("GPL v2");
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
deleted file mode 100644
index 48a33817de11..000000000000
--- a/crypto/blkcipher.c
+++ /dev/null
@@ -1,548 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Block chaining cipher operations.
- *
- * Generic encrypt/decrypt wrapper for ciphers, handles operations across
- * multiple page boundaries by using temporary blocks.  In user context,
- * the kernel is given a chance to schedule us once per page.
- *
- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <crypto/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/scatterwalk.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/cryptouser.h>
-#include <linux/compiler.h>
-#include <net/netlink.h>
-
-#include "internal.h"
-
-enum {
-	BLKCIPHER_WALK_PHYS = 1 << 0,
-	BLKCIPHER_WALK_SLOW = 1 << 1,
-	BLKCIPHER_WALK_COPY = 1 << 2,
-	BLKCIPHER_WALK_DIFF = 1 << 3,
-};
-
-static int blkcipher_walk_next(struct blkcipher_desc *desc,
-			       struct blkcipher_walk *walk);
-static int blkcipher_walk_first(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk);
-
-static inline void blkcipher_map_src(struct blkcipher_walk *walk)
-{
-	walk->src.virt.addr = scatterwalk_map(&walk->in);
-}
-
-static inline void blkcipher_map_dst(struct blkcipher_walk *walk)
-{
-	walk->dst.virt.addr = scatterwalk_map(&walk->out);
-}
-
-static inline void blkcipher_unmap_src(struct blkcipher_walk *walk)
-{
-	scatterwalk_unmap(walk->src.virt.addr);
-}
-
-static inline void blkcipher_unmap_dst(struct blkcipher_walk *walk)
-{
-	scatterwalk_unmap(walk->dst.virt.addr);
-}
-
-/* Get a spot of the specified length that does not straddle a page.
- * The caller needs to ensure that there is enough space for this operation.
- */
-static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len)
-{
-	u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK);
-	return max(start, end_page);
-}
-
-static inline void blkcipher_done_slow(struct blkcipher_walk *walk,
-				       unsigned int bsize)
-{
-	u8 *addr;
-
-	addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
-	addr = blkcipher_get_spot(addr, bsize);
-	scatterwalk_copychunks(addr, &walk->out, bsize, 1);
-}
-
-static inline void blkcipher_done_fast(struct blkcipher_walk *walk,
-				       unsigned int n)
-{
-	if (walk->flags & BLKCIPHER_WALK_COPY) {
-		blkcipher_map_dst(walk);
-		memcpy(walk->dst.virt.addr, walk->page, n);
-		blkcipher_unmap_dst(walk);
-	} else if (!(walk->flags & BLKCIPHER_WALK_PHYS)) {
-		if (walk->flags & BLKCIPHER_WALK_DIFF)
-			blkcipher_unmap_dst(walk);
-		blkcipher_unmap_src(walk);
-	}
-
-	scatterwalk_advance(&walk->in, n);
-	scatterwalk_advance(&walk->out, n);
-}
-
-int blkcipher_walk_done(struct blkcipher_desc *desc,
-			struct blkcipher_walk *walk, int err)
-{
-	unsigned int n; /* bytes processed */
-	bool more;
-
-	if (unlikely(err < 0))
-		goto finish;
-
-	n = walk->nbytes - err;
-	walk->total -= n;
-	more = (walk->total != 0);
-
-	if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW))) {
-		blkcipher_done_fast(walk, n);
-	} else {
-		if (WARN_ON(err)) {
-			/* unexpected case; didn't process all bytes */
-			err = -EINVAL;
-			goto finish;
-		}
-		blkcipher_done_slow(walk, n);
-	}
-
-	scatterwalk_done(&walk->in, 0, more);
-	scatterwalk_done(&walk->out, 1, more);
-
-	if (more) {
-		crypto_yield(desc->flags);
-		return blkcipher_walk_next(desc, walk);
-	}
-	err = 0;
-finish:
-	walk->nbytes = 0;
-	if (walk->iv != desc->info)
-		memcpy(desc->info, walk->iv, walk->ivsize);
-	if (walk->buffer != walk->page)
-		kfree(walk->buffer);
-	if (walk->page)
-		free_page((unsigned long)walk->page);
-	return err;
-}
-EXPORT_SYMBOL_GPL(blkcipher_walk_done);
-
-static inline int blkcipher_next_slow(struct blkcipher_desc *desc,
-				      struct blkcipher_walk *walk,
-				      unsigned int bsize,
-				      unsigned int alignmask)
-{
-	unsigned int n;
-	unsigned aligned_bsize = ALIGN(bsize, alignmask + 1);
-
-	if (walk->buffer)
-		goto ok;
-
-	walk->buffer = walk->page;
-	if (walk->buffer)
-		goto ok;
-
-	n = aligned_bsize * 3 - (alignmask + 1) +
-	    (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
-	walk->buffer = kmalloc(n, GFP_ATOMIC);
-	if (!walk->buffer)
-		return blkcipher_walk_done(desc, walk, -ENOMEM);
-
-ok:
-	walk->dst.virt.addr = (u8 *)ALIGN((unsigned long)walk->buffer,
-					  alignmask + 1);
-	walk->dst.virt.addr = blkcipher_get_spot(walk->dst.virt.addr, bsize);
-	walk->src.virt.addr = blkcipher_get_spot(walk->dst.virt.addr +
-						 aligned_bsize, bsize);
-
-	scatterwalk_copychunks(walk->src.virt.addr, &walk->in, bsize, 0);
-
-	walk->nbytes = bsize;
-	walk->flags |= BLKCIPHER_WALK_SLOW;
-
-	return 0;
-}
-
-static inline int blkcipher_next_copy(struct blkcipher_walk *walk)
-{
-	u8 *tmp = walk->page;
-
-	blkcipher_map_src(walk);
-	memcpy(tmp, walk->src.virt.addr, walk->nbytes);
-	blkcipher_unmap_src(walk);
-
-	walk->src.virt.addr = tmp;
-	walk->dst.virt.addr = tmp;
-
-	return 0;
-}
-
-static inline int blkcipher_next_fast(struct blkcipher_desc *desc,
-				      struct blkcipher_walk *walk)
-{
-	unsigned long diff;
-
-	walk->src.phys.page = scatterwalk_page(&walk->in);
-	walk->src.phys.offset = offset_in_page(walk->in.offset);
-	walk->dst.phys.page = scatterwalk_page(&walk->out);
-	walk->dst.phys.offset = offset_in_page(walk->out.offset);
-
-	if (walk->flags & BLKCIPHER_WALK_PHYS)
-		return 0;
-
-	diff = walk->src.phys.offset - walk->dst.phys.offset;
-	diff |= walk->src.virt.page - walk->dst.virt.page;
-
-	blkcipher_map_src(walk);
-	walk->dst.virt.addr = walk->src.virt.addr;
-
-	if (diff) {
-		walk->flags |= BLKCIPHER_WALK_DIFF;
-		blkcipher_map_dst(walk);
-	}
-
-	return 0;
-}
-
-static int blkcipher_walk_next(struct blkcipher_desc *desc,
-			       struct blkcipher_walk *walk)
-{
-	unsigned int bsize;
-	unsigned int n;
-	int err;
-
-	n = walk->total;
-	if (unlikely(n < walk->cipher_blocksize)) {
-		desc->flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
-		return blkcipher_walk_done(desc, walk, -EINVAL);
-	}
-
-	bsize = min(walk->walk_blocksize, n);
-
-	walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY |
-			 BLKCIPHER_WALK_DIFF);
-	if (!scatterwalk_aligned(&walk->in, walk->alignmask) ||
-	    !scatterwalk_aligned(&walk->out, walk->alignmask)) {
-		walk->flags |= BLKCIPHER_WALK_COPY;
-		if (!walk->page) {
-			walk->page = (void *)__get_free_page(GFP_ATOMIC);
-			if (!walk->page)
-				n = 0;
-		}
-	}
-
-	n = scatterwalk_clamp(&walk->in, n);
-	n = scatterwalk_clamp(&walk->out, n);
-
-	if (unlikely(n < bsize)) {
-		err = blkcipher_next_slow(desc, walk, bsize, walk->alignmask);
-		goto set_phys_lowmem;
-	}
-
-	walk->nbytes = n;
-	if (walk->flags & BLKCIPHER_WALK_COPY) {
-		err = blkcipher_next_copy(walk);
-		goto set_phys_lowmem;
-	}
-
-	return blkcipher_next_fast(desc, walk);
-
-set_phys_lowmem:
-	if (walk->flags & BLKCIPHER_WALK_PHYS) {
-		walk->src.phys.page = virt_to_page(walk->src.virt.addr);
-		walk->dst.phys.page = virt_to_page(walk->dst.virt.addr);
-		walk->src.phys.offset &= PAGE_SIZE - 1;
-		walk->dst.phys.offset &= PAGE_SIZE - 1;
-	}
-	return err;
-}
-
-static inline int blkcipher_copy_iv(struct blkcipher_walk *walk)
-{
-	unsigned bs = walk->walk_blocksize;
-	unsigned aligned_bs = ALIGN(bs, walk->alignmask + 1);
-	unsigned int size = aligned_bs * 2 +
-			    walk->ivsize + max(aligned_bs, walk->ivsize) -
-			    (walk->alignmask + 1);
-	u8 *iv;
-
-	size += walk->alignmask & ~(crypto_tfm_ctx_alignment() - 1);
-	walk->buffer = kmalloc(size, GFP_ATOMIC);
-	if (!walk->buffer)
-		return -ENOMEM;
-
-	iv = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
-	iv = blkcipher_get_spot(iv, bs) + aligned_bs;
-	iv = blkcipher_get_spot(iv, bs) + aligned_bs;
-	iv = blkcipher_get_spot(iv, walk->ivsize);
-
-	walk->iv = memcpy(iv, walk->iv, walk->ivsize);
-	return 0;
-}
-
-int blkcipher_walk_virt(struct blkcipher_desc *desc,
-			struct blkcipher_walk *walk)
-{
-	walk->flags &= ~BLKCIPHER_WALK_PHYS;
-	walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm);
-	walk->cipher_blocksize = walk->walk_blocksize;
-	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
-	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
-	return blkcipher_walk_first(desc, walk);
-}
-EXPORT_SYMBOL_GPL(blkcipher_walk_virt);
-
-int blkcipher_walk_phys(struct blkcipher_desc *desc,
-			struct blkcipher_walk *walk)
-{
-	walk->flags |= BLKCIPHER_WALK_PHYS;
-	walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm);
-	walk->cipher_blocksize = walk->walk_blocksize;
-	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
-	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
-	return blkcipher_walk_first(desc, walk);
-}
-EXPORT_SYMBOL_GPL(blkcipher_walk_phys);
-
-static int blkcipher_walk_first(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
-{
-	if (WARN_ON_ONCE(in_irq()))
-		return -EDEADLK;
-
-	walk->iv = desc->info;
-	walk->nbytes = walk->total;
-	if (unlikely(!walk->total))
-		return 0;
-
-	walk->buffer = NULL;
-	if (unlikely(((unsigned long)walk->iv & walk->alignmask))) {
-		int err = blkcipher_copy_iv(walk);
-		if (err)
-			return err;
-	}
-
-	scatterwalk_start(&walk->in, walk->in.sg);
-	scatterwalk_start(&walk->out, walk->out.sg);
-	walk->page = NULL;
-
-	return blkcipher_walk_next(desc, walk);
-}
-
-int blkcipher_walk_virt_block(struct blkcipher_desc *desc,
-			      struct blkcipher_walk *walk,
-			      unsigned int blocksize)
-{
-	walk->flags &= ~BLKCIPHER_WALK_PHYS;
-	walk->walk_blocksize = blocksize;
-	walk->cipher_blocksize = crypto_blkcipher_blocksize(desc->tfm);
-	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
-	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
-	return blkcipher_walk_first(desc, walk);
-}
-EXPORT_SYMBOL_GPL(blkcipher_walk_virt_block);
-
-int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc,
-				   struct blkcipher_walk *walk,
-				   struct crypto_aead *tfm,
-				   unsigned int blocksize)
-{
-	walk->flags &= ~BLKCIPHER_WALK_PHYS;
-	walk->walk_blocksize = blocksize;
-	walk->cipher_blocksize = crypto_aead_blocksize(tfm);
-	walk->ivsize = crypto_aead_ivsize(tfm);
-	walk->alignmask = crypto_aead_alignmask(tfm);
-	return blkcipher_walk_first(desc, walk);
-}
-EXPORT_SYMBOL_GPL(blkcipher_aead_walk_virt_block);
-
-static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key,
-			    unsigned int keylen)
-{
-	struct blkcipher_alg *cipher = &tfm->__crt_alg->cra_blkcipher;
-	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
-	int ret;
-	u8 *buffer, *alignbuffer;
-	unsigned long absize;
-
-	absize = keylen + alignmask;
-	buffer = kmalloc(absize, GFP_ATOMIC);
-	if (!buffer)
-		return -ENOMEM;
-
-	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
-	memcpy(alignbuffer, key, keylen);
-	ret = cipher->setkey(tfm, alignbuffer, keylen);
-	memset(alignbuffer, 0, keylen);
-	kfree(buffer);
-	return ret;
-}
-
-static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
-{
-	struct blkcipher_alg *cipher = &tfm->__crt_alg->cra_blkcipher;
-	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
-
-	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
-	if ((unsigned long)key & alignmask)
-		return setkey_unaligned(tfm, key, keylen);
-
-	return cipher->setkey(tfm, key, keylen);
-}
-
-static int async_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
-			unsigned int keylen)
-{
-	return setkey(crypto_ablkcipher_tfm(tfm), key, keylen);
-}
-
-static int async_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher;
-	struct blkcipher_desc desc = {
-		.tfm = __crypto_blkcipher_cast(tfm),
-		.info = req->info,
-		.flags = req->base.flags,
-	};
-
-
-	return alg->encrypt(&desc, req->dst, req->src, req->nbytes);
-}
-
-static int async_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher;
-	struct blkcipher_desc desc = {
-		.tfm = __crypto_blkcipher_cast(tfm),
-		.info = req->info,
-		.flags = req->base.flags,
-	};
-
-	return alg->decrypt(&desc, req->dst, req->src, req->nbytes);
-}
-
-static unsigned int crypto_blkcipher_ctxsize(struct crypto_alg *alg, u32 type,
-					     u32 mask)
-{
-	struct blkcipher_alg *cipher = &alg->cra_blkcipher;
-	unsigned int len = alg->cra_ctxsize;
-
-	if ((mask & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_MASK &&
-	    cipher->ivsize) {
-		len = ALIGN(len, (unsigned long)alg->cra_alignmask + 1);
-		len += cipher->ivsize;
-	}
-
-	return len;
-}
-
-static int crypto_init_blkcipher_ops_async(struct crypto_tfm *tfm)
-{
-	struct ablkcipher_tfm *crt = &tfm->crt_ablkcipher;
-	struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher;
-
-	crt->setkey = async_setkey;
-	crt->encrypt = async_encrypt;
-	crt->decrypt = async_decrypt;
-	crt->base = __crypto_ablkcipher_cast(tfm);
-	crt->ivsize = alg->ivsize;
-
-	return 0;
-}
-
-static int crypto_init_blkcipher_ops_sync(struct crypto_tfm *tfm)
-{
-	struct blkcipher_tfm *crt = &tfm->crt_blkcipher;
-	struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher;
-	unsigned long align = crypto_tfm_alg_alignmask(tfm) + 1;
-	unsigned long addr;
-
-	crt->setkey = setkey;
-	crt->encrypt = alg->encrypt;
-	crt->decrypt = alg->decrypt;
-
-	addr = (unsigned long)crypto_tfm_ctx(tfm);
-	addr = ALIGN(addr, align);
-	addr += ALIGN(tfm->__crt_alg->cra_ctxsize, align);
-	crt->iv = (void *)addr;
-
-	return 0;
-}
-
-static int crypto_init_blkcipher_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
-{
-	struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher;
-
-	if (alg->ivsize > PAGE_SIZE / 8)
-		return -EINVAL;
-
-	if ((mask & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_MASK)
-		return crypto_init_blkcipher_ops_sync(tfm);
-	else
-		return crypto_init_blkcipher_ops_async(tfm);
-}
-
-#ifdef CONFIG_NET
-static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
-{
-	struct crypto_report_blkcipher rblkcipher;
-
-	memset(&rblkcipher, 0, sizeof(rblkcipher));
-
-	strscpy(rblkcipher.type, "blkcipher", sizeof(rblkcipher.type));
-	strscpy(rblkcipher.geniv, "<default>", sizeof(rblkcipher.geniv));
-
-	rblkcipher.blocksize = alg->cra_blocksize;
-	rblkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
-	rblkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
-	rblkcipher.ivsize = alg->cra_blkcipher.ivsize;
-
-	return nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER,
-		       sizeof(rblkcipher), &rblkcipher);
-}
-#else
-static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
-{
-	return -ENOSYS;
-}
-#endif
-
-static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
-	__maybe_unused;
-static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
-{
-	seq_printf(m, "type         : blkcipher\n");
-	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
-	seq_printf(m, "min keysize  : %u\n", alg->cra_blkcipher.min_keysize);
-	seq_printf(m, "max keysize  : %u\n", alg->cra_blkcipher.max_keysize);
-	seq_printf(m, "ivsize       : %u\n", alg->cra_blkcipher.ivsize);
-	seq_printf(m, "geniv        : <default>\n");
-}
-
-const struct crypto_type crypto_blkcipher_type = {
-	.ctxsize = crypto_blkcipher_ctxsize,
-	.init = crypto_init_blkcipher_ops,
-#ifdef CONFIG_PROC_FS
-	.show = crypto_blkcipher_show,
-#endif
-	.report = crypto_blkcipher_report,
-};
-EXPORT_SYMBOL_GPL(crypto_blkcipher_type);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Generic block chaining cipher type");
diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c
index b6a1121e2478..9a5783e5196a 100644
--- a/crypto/camellia_generic.c
+++ b/crypto/camellia_generic.c
@@ -970,12 +970,9 @@ camellia_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 {
 	struct camellia_ctx *cctx = crypto_tfm_ctx(tfm);
 	const unsigned char *key = (const unsigned char *)in_key;
-	u32 *flags = &tfm->crt_flags;
 
-	if (key_len != 16 && key_len != 24 && key_len != 32) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	if (key_len != 16 && key_len != 24 && key_len != 32)
 		return -EINVAL;
-	}
 
 	cctx->key_length = key_len;
 
diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c
index a8248f8e2777..c77ff6c8a2b2 100644
--- a/crypto/cast6_generic.c
+++ b/crypto/cast6_generic.c
@@ -103,17 +103,14 @@ static inline void W(u32 *key, unsigned int i)
 	key[7] ^= F2(key[0], Tr[i % 4][7], Tm[i][7]);
 }
 
-int __cast6_setkey(struct cast6_ctx *c, const u8 *in_key,
-		   unsigned key_len, u32 *flags)
+int __cast6_setkey(struct cast6_ctx *c, const u8 *in_key, unsigned int key_len)
 {
 	int i;
 	u32 key[8];
 	__be32 p_key[8]; /* padded key */
 
-	if (key_len % 4 != 0) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	if (key_len % 4 != 0)
 		return -EINVAL;
-	}
 
 	memset(p_key, 0, 32);
 	memcpy(p_key, in_key, key_len);
@@ -148,13 +145,12 @@ EXPORT_SYMBOL_GPL(__cast6_setkey);
 
 int cast6_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
 {
-	return __cast6_setkey(crypto_tfm_ctx(tfm), key, keylen,
-			      &tfm->crt_flags);
+	return __cast6_setkey(crypto_tfm_ctx(tfm), key, keylen);
 }
 EXPORT_SYMBOL_GPL(cast6_setkey);
 
 /*forward quad round*/
-static inline void Q(u32 *block, u8 *Kr, u32 *Km)
+static inline void Q(u32 *block, const u8 *Kr, const u32 *Km)
 {
 	u32 I;
 	block[2] ^= F1(block[3], Kr[0], Km[0]);
@@ -164,7 +160,7 @@ static inline void Q(u32 *block, u8 *Kr, u32 *Km)
 }
 
 /*reverse quad round*/
-static inline void QBAR(u32 *block, u8 *Kr, u32 *Km)
+static inline void QBAR(u32 *block, const u8 *Kr, const u32 *Km)
 {
 	u32 I;
 	block[3] ^= F1(block[0], Kr[3], Km[3]);
@@ -173,13 +169,14 @@ static inline void QBAR(u32 *block, u8 *Kr, u32 *Km)
 	block[2] ^= F1(block[3], Kr[0], Km[0]);
 }
 
-void __cast6_encrypt(struct cast6_ctx *c, u8 *outbuf, const u8 *inbuf)
+void __cast6_encrypt(const void *ctx, u8 *outbuf, const u8 *inbuf)
 {
+	const struct cast6_ctx *c = ctx;
 	const __be32 *src = (const __be32 *)inbuf;
 	__be32 *dst = (__be32 *)outbuf;
 	u32 block[4];
-	u32 *Km;
-	u8 *Kr;
+	const u32 *Km;
+	const u8 *Kr;
 
 	block[0] = be32_to_cpu(src[0]);
 	block[1] = be32_to_cpu(src[1]);
@@ -211,13 +208,14 @@ static void cast6_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
 	__cast6_encrypt(crypto_tfm_ctx(tfm), outbuf, inbuf);
 }
 
-void __cast6_decrypt(struct cast6_ctx *c, u8 *outbuf, const u8 *inbuf)
+void __cast6_decrypt(const void *ctx, u8 *outbuf, const u8 *inbuf)
 {
+	const struct cast6_ctx *c = ctx;
 	const __be32 *src = (const __be32 *)inbuf;
 	__be32 *dst = (__be32 *)outbuf;
 	u32 block[4];
-	u32 *Km;
-	u8 *Kr;
+	const u32 *Km;
+	const u8 *Kr;
 
 	block[0] = be32_to_cpu(src[0]);
 	block[1] = be32_to_cpu(src[1]);
diff --git a/crypto/cbc.c b/crypto/cbc.c
index dd96bcf4d4b6..e6f6273a7d39 100644
--- a/crypto/cbc.c
+++ b/crypto/cbc.c
@@ -54,10 +54,12 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 	struct crypto_alg *alg;
 	int err;
 
-	inst = skcipher_alloc_instance_simple(tmpl, tb, &alg);
+	inst = skcipher_alloc_instance_simple(tmpl, tb);
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
+	alg = skcipher_ialg_simple(inst);
+
 	err = -EINVAL;
 	if (!is_power_of_2(alg->cra_blocksize))
 		goto out_free_inst;
@@ -66,14 +68,11 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.decrypt = crypto_cbc_decrypt;
 
 	err = skcipher_register_instance(tmpl, inst);
-	if (err)
-		goto out_free_inst;
-	goto out_put_alg;
-
+	if (err) {
 out_free_inst:
-	inst->free(inst);
-out_put_alg:
-	crypto_mod_put(alg);
+		inst->free(inst);
+	}
+
 	return err;
 }
 
diff --git a/crypto/ccm.c b/crypto/ccm.c
index 380eb619f657..241ecdc5c4e0 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -15,8 +15,6 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 
-#include "internal.h"
-
 struct ccm_instance_ctx {
 	struct crypto_skcipher_spawn ctr;
 	struct crypto_ahash_spawn mac;
@@ -91,26 +89,19 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
 	struct crypto_skcipher *ctr = ctx->ctr;
 	struct crypto_ahash *mac = ctx->mac;
-	int err = 0;
+	int err;
 
 	crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
 	crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) &
 				       CRYPTO_TFM_REQ_MASK);
 	err = crypto_skcipher_setkey(ctr, key, keylen);
-	crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) &
-			      CRYPTO_TFM_RES_MASK);
 	if (err)
-		goto out;
+		return err;
 
 	crypto_ahash_clear_flags(mac, CRYPTO_TFM_REQ_MASK);
 	crypto_ahash_set_flags(mac, crypto_aead_get_flags(aead) &
 				    CRYPTO_TFM_REQ_MASK);
-	err = crypto_ahash_setkey(mac, key, keylen);
-	crypto_aead_set_flags(aead, crypto_ahash_get_flags(mac) &
-			      CRYPTO_TFM_RES_MASK);
-
-out:
-	return err;
+	return crypto_ahash_setkey(mac, key, keylen);
 }
 
 static int crypto_ccm_setauthsize(struct crypto_aead *tfm,
@@ -457,11 +448,11 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 				    const char *mac_name)
 {
 	struct crypto_attr_type *algt;
+	u32 mask;
 	struct aead_instance *inst;
+	struct ccm_instance_ctx *ictx;
 	struct skcipher_alg *ctr;
-	struct crypto_alg *mac_alg;
 	struct hash_alg_common *mac;
-	struct ccm_instance_ctx *ictx;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -471,37 +462,28 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return -EINVAL;
 
-	mac_alg = crypto_find_alg(mac_name, &crypto_ahash_type,
-				  CRYPTO_ALG_TYPE_HASH,
-				  CRYPTO_ALG_TYPE_AHASH_MASK |
-				  CRYPTO_ALG_ASYNC);
-	if (IS_ERR(mac_alg))
-		return PTR_ERR(mac_alg);
-
-	mac = __crypto_hash_alg_common(mac_alg);
-	err = -EINVAL;
-	if (strncmp(mac->base.cra_name, "cbcmac(", 7) != 0 ||
-	    mac->digestsize != 16)
-		goto out_put_mac;
+	mask = crypto_requires_sync(algt->type, algt->mask);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
-	err = -ENOMEM;
 	if (!inst)
-		goto out_put_mac;
-
+		return -ENOMEM;
 	ictx = aead_instance_ctx(inst);
-	err = crypto_init_ahash_spawn(&ictx->mac, mac,
-				      aead_crypto_instance(inst));
+
+	err = crypto_grab_ahash(&ictx->mac, aead_crypto_instance(inst),
+				mac_name, 0, CRYPTO_ALG_ASYNC);
 	if (err)
 		goto err_free_inst;
+	mac = crypto_spawn_ahash_alg(&ictx->mac);
 
-	crypto_set_skcipher_spawn(&ictx->ctr, aead_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ictx->ctr, ctr_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
-	if (err)
-		goto err_drop_mac;
+	err = -EINVAL;
+	if (strncmp(mac->base.cra_name, "cbcmac(", 7) != 0 ||
+	    mac->digestsize != 16)
+		goto err_free_inst;
 
+	err = crypto_grab_skcipher(&ictx->ctr, aead_crypto_instance(inst),
+				   ctr_name, 0, mask);
+	if (err)
+		goto err_free_inst;
 	ctr = crypto_spawn_skcipher_alg(&ictx->ctr);
 
 	/* The skcipher algorithm must be CTR mode, using 16-byte blocks. */
@@ -509,21 +491,21 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 	if (strncmp(ctr->base.cra_name, "ctr(", 4) != 0 ||
 	    crypto_skcipher_alg_ivsize(ctr) != 16 ||
 	    ctr->base.cra_blocksize != 1)
-		goto err_drop_ctr;
+		goto err_free_inst;
 
 	/* ctr and cbcmac must use the same underlying block cipher. */
 	if (strcmp(ctr->base.cra_name + 4, mac->base.cra_name + 7) != 0)
-		goto err_drop_ctr;
+		goto err_free_inst;
 
 	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
 		     "ccm(%s", ctr->base.cra_name + 4) >= CRYPTO_MAX_ALG_NAME)
-		goto err_drop_ctr;
+		goto err_free_inst;
 
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "ccm_base(%s,%s)", ctr->base.cra_driver_name,
 		     mac->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
-		goto err_drop_ctr;
+		goto err_free_inst;
 
 	inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC;
 	inst->alg.base.cra_priority = (mac->base.cra_priority +
@@ -545,20 +527,11 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 	inst->free = crypto_ccm_free;
 
 	err = aead_register_instance(tmpl, inst);
-	if (err)
-		goto err_drop_ctr;
-
-out_put_mac:
-	crypto_mod_put(mac_alg);
-	return err;
-
-err_drop_ctr:
-	crypto_drop_skcipher(&ictx->ctr);
-err_drop_mac:
-	crypto_drop_ahash(&ictx->mac);
+	if (err) {
 err_free_inst:
-	kfree(inst);
-	goto out_put_mac;
+		crypto_ccm_free(inst);
+	}
+	return err;
 }
 
 static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
@@ -604,7 +577,6 @@ static int crypto_rfc4309_setkey(struct crypto_aead *parent, const u8 *key,
 {
 	struct crypto_rfc4309_ctx *ctx = crypto_aead_ctx(parent);
 	struct crypto_aead *child = ctx->child;
-	int err;
 
 	if (keylen < 3)
 		return -EINVAL;
@@ -615,11 +587,7 @@ static int crypto_rfc4309_setkey(struct crypto_aead *parent, const u8 *key,
 	crypto_aead_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_aead_set_flags(child, crypto_aead_get_flags(parent) &
 				     CRYPTO_TFM_REQ_MASK);
-	err = crypto_aead_setkey(child, key, keylen);
-	crypto_aead_set_flags(parent, crypto_aead_get_flags(child) &
-				      CRYPTO_TFM_RES_MASK);
-
-	return err;
+	return crypto_aead_setkey(child, key, keylen);
 }
 
 static int crypto_rfc4309_setauthsize(struct crypto_aead *parent,
@@ -745,6 +713,7 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl,
 				 struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
+	u32 mask;
 	struct aead_instance *inst;
 	struct crypto_aead_spawn *spawn;
 	struct aead_alg *alg;
@@ -758,6 +727,8 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl,
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return -EINVAL;
 
+	mask = crypto_requires_sync(algt->type, algt->mask);
+
 	ccm_name = crypto_attr_alg_name(tb[1]);
 	if (IS_ERR(ccm_name))
 		return PTR_ERR(ccm_name);
@@ -767,9 +738,8 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl,
 		return -ENOMEM;
 
 	spawn = aead_instance_ctx(inst);
-	crypto_set_aead_spawn(spawn, aead_crypto_instance(inst));
-	err = crypto_grab_aead(spawn, ccm_name, 0,
-			       crypto_requires_sync(algt->type, algt->mask));
+	err = crypto_grab_aead(spawn, aead_crypto_instance(inst),
+			       ccm_name, 0, mask);
 	if (err)
 		goto out_free_inst;
 
@@ -896,7 +866,7 @@ static int cbcmac_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_cipher *cipher;
 	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct crypto_cipher_spawn *spawn = crypto_instance_ctx(inst);
 	struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	cipher = crypto_spawn_cipher(spawn);
@@ -917,6 +887,7 @@ static void cbcmac_exit_tfm(struct crypto_tfm *tfm)
 static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct shash_instance *inst;
+	struct crypto_cipher_spawn *spawn;
 	struct crypto_alg *alg;
 	int err;
 
@@ -924,21 +895,20 @@ static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (err)
 		return err;
 
-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
-				  CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+	spawn = shash_instance_ctx(inst);
 
-	inst = shash_alloc_instance("cbcmac", alg);
-	err = PTR_ERR(inst);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	err = crypto_grab_cipher(spawn, shash_crypto_instance(inst),
+				 crypto_attr_alg_name(tb[1]), 0, 0);
+	if (err)
+		goto err_free_inst;
+	alg = crypto_spawn_cipher_alg(spawn);
 
-	err = crypto_init_spawn(shash_instance_ctx(inst), alg,
-				shash_crypto_instance(inst),
-				CRYPTO_ALG_TYPE_MASK);
+	err = crypto_inst_setname(shash_crypto_instance(inst), tmpl->name, alg);
 	if (err)
-		goto out_free_inst;
+		goto err_free_inst;
 
 	inst->alg.base.cra_priority = alg->cra_priority;
 	inst->alg.base.cra_blocksize = 1;
@@ -957,14 +927,13 @@ static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.final = crypto_cbcmac_digest_final;
 	inst->alg.setkey = crypto_cbcmac_digest_setkey;
 
-	err = shash_register_instance(tmpl, inst);
+	inst->free = shash_free_singlespawn_instance;
 
-out_free_inst:
-	if (err)
-		shash_free_instance(shash_crypto_instance(inst));
-
-out_put_alg:
-	crypto_mod_put(alg);
+	err = shash_register_instance(tmpl, inst);
+	if (err) {
+err_free_inst:
+		shash_free_singlespawn_instance(inst);
+	}
 	return err;
 }
 
@@ -972,7 +941,6 @@ static struct crypto_template crypto_ccm_tmpls[] = {
 	{
 		.name = "cbcmac",
 		.create = cbcmac_create,
-		.free = shash_free_instance,
 		.module = THIS_MODULE,
 	}, {
 		.name = "ccm_base",
diff --git a/crypto/cfb.c b/crypto/cfb.c
index 7b68fbb61732..4e5219bbcd19 100644
--- a/crypto/cfb.c
+++ b/crypto/cfb.c
@@ -203,10 +203,12 @@ static int crypto_cfb_create(struct crypto_template *tmpl, struct rtattr **tb)
 	struct crypto_alg *alg;
 	int err;
 
-	inst = skcipher_alloc_instance_simple(tmpl, tb, &alg);
+	inst = skcipher_alloc_instance_simple(tmpl, tb);
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
+	alg = skcipher_ialg_simple(inst);
+
 	/* CFB mode is a stream cipher. */
 	inst->alg.base.cra_blocksize = 1;
 
@@ -223,7 +225,6 @@ static int crypto_cfb_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (err)
 		inst->free(inst);
 
-	crypto_mod_put(alg);
 	return err;
 }
 
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index 74e824e537e6..ccaea5cb66d1 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -16,8 +16,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-#include "internal.h"
-
 struct chachapoly_instance_ctx {
 	struct crypto_skcipher_spawn chacha;
 	struct crypto_ahash_spawn poly;
@@ -477,7 +475,6 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
 			     unsigned int keylen)
 {
 	struct chachapoly_ctx *ctx = crypto_aead_ctx(aead);
-	int err;
 
 	if (keylen != ctx->saltlen + CHACHA_KEY_SIZE)
 		return -EINVAL;
@@ -488,11 +485,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
 	crypto_skcipher_clear_flags(ctx->chacha, CRYPTO_TFM_REQ_MASK);
 	crypto_skcipher_set_flags(ctx->chacha, crypto_aead_get_flags(aead) &
 					       CRYPTO_TFM_REQ_MASK);
-
-	err = crypto_skcipher_setkey(ctx->chacha, key, keylen);
-	crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctx->chacha) &
-				    CRYPTO_TFM_RES_MASK);
-	return err;
+	return crypto_skcipher_setkey(ctx->chacha, key, keylen);
 }
 
 static int chachapoly_setauthsize(struct crypto_aead *tfm,
@@ -563,12 +556,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 			     const char *name, unsigned int ivsize)
 {
 	struct crypto_attr_type *algt;
+	u32 mask;
 	struct aead_instance *inst;
-	struct skcipher_alg *chacha;
-	struct crypto_alg *poly;
-	struct hash_alg_common *poly_hash;
 	struct chachapoly_instance_ctx *ctx;
-	const char *chacha_name, *poly_name;
+	struct skcipher_alg *chacha;
+	struct hash_alg_common *poly;
 	int err;
 
 	if (ivsize > CHACHAPOLY_IV_SIZE)
@@ -581,72 +573,53 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return -EINVAL;
 
-	chacha_name = crypto_attr_alg_name(tb[1]);
-	if (IS_ERR(chacha_name))
-		return PTR_ERR(chacha_name);
-	poly_name = crypto_attr_alg_name(tb[2]);
-	if (IS_ERR(poly_name))
-		return PTR_ERR(poly_name);
-
-	poly = crypto_find_alg(poly_name, &crypto_ahash_type,
-			       CRYPTO_ALG_TYPE_HASH,
-			       CRYPTO_ALG_TYPE_AHASH_MASK |
-			       crypto_requires_sync(algt->type,
-						    algt->mask));
-	if (IS_ERR(poly))
-		return PTR_ERR(poly);
-	poly_hash = __crypto_hash_alg_common(poly);
+	mask = crypto_requires_sync(algt->type, algt->mask);
 
-	err = -EINVAL;
-	if (poly_hash->digestsize != POLY1305_DIGEST_SIZE)
-		goto out_put_poly;
-
-	err = -ENOMEM;
 	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
 	if (!inst)
-		goto out_put_poly;
-
+		return -ENOMEM;
 	ctx = aead_instance_ctx(inst);
 	ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize;
-	err = crypto_init_ahash_spawn(&ctx->poly, poly_hash,
-				      aead_crypto_instance(inst));
+
+	err = crypto_grab_skcipher(&ctx->chacha, aead_crypto_instance(inst),
+				   crypto_attr_alg_name(tb[1]), 0, mask);
 	if (err)
 		goto err_free_inst;
+	chacha = crypto_spawn_skcipher_alg(&ctx->chacha);
 
-	crypto_set_skcipher_spawn(&ctx->chacha, aead_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ctx->chacha, chacha_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	err = crypto_grab_ahash(&ctx->poly, aead_crypto_instance(inst),
+				crypto_attr_alg_name(tb[2]), 0, mask);
 	if (err)
-		goto err_drop_poly;
-
-	chacha = crypto_spawn_skcipher_alg(&ctx->chacha);
+		goto err_free_inst;
+	poly = crypto_spawn_ahash_alg(&ctx->poly);
 
 	err = -EINVAL;
+	if (poly->digestsize != POLY1305_DIGEST_SIZE)
+		goto err_free_inst;
 	/* Need 16-byte IV size, including Initial Block Counter value */
 	if (crypto_skcipher_alg_ivsize(chacha) != CHACHA_IV_SIZE)
-		goto out_drop_chacha;
+		goto err_free_inst;
 	/* Not a stream cipher? */
 	if (chacha->base.cra_blocksize != 1)
-		goto out_drop_chacha;
+		goto err_free_inst;
 
 	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
 		     "%s(%s,%s)", name, chacha->base.cra_name,
-		     poly->cra_name) >= CRYPTO_MAX_ALG_NAME)
-		goto out_drop_chacha;
+		     poly->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
+		goto err_free_inst;
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "%s(%s,%s)", name, chacha->base.cra_driver_name,
-		     poly->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
-		goto out_drop_chacha;
+		     poly->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		goto err_free_inst;
 
-	inst->alg.base.cra_flags = (chacha->base.cra_flags | poly->cra_flags) &
-				   CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_flags = (chacha->base.cra_flags |
+				    poly->base.cra_flags) & CRYPTO_ALG_ASYNC;
 	inst->alg.base.cra_priority = (chacha->base.cra_priority +
-				       poly->cra_priority) / 2;
+				       poly->base.cra_priority) / 2;
 	inst->alg.base.cra_blocksize = 1;
 	inst->alg.base.cra_alignmask = chacha->base.cra_alignmask |
-				       poly->cra_alignmask;
+				       poly->base.cra_alignmask;
 	inst->alg.base.cra_ctxsize = sizeof(struct chachapoly_ctx) +
 				     ctx->saltlen;
 	inst->alg.ivsize = ivsize;
@@ -662,20 +635,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 	inst->free = chachapoly_free;
 
 	err = aead_register_instance(tmpl, inst);
-	if (err)
-		goto out_drop_chacha;
-
-out_put_poly:
-	crypto_mod_put(poly);
-	return err;
-
-out_drop_chacha:
-	crypto_drop_skcipher(&ctx->chacha);
-err_drop_poly:
-	crypto_drop_ahash(&ctx->poly);
+	if (err) {
 err_free_inst:
-	kfree(inst);
-	goto out_put_poly;
+		chachapoly_free(inst);
+	}
+	return err;
 }
 
 static int rfc7539_create(struct crypto_template *tmpl, struct rtattr **tb)
diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
index 085d8d219987..8beea79ab117 100644
--- a/crypto/chacha_generic.c
+++ b/crypto/chacha_generic.c
@@ -8,29 +8,10 @@
 
 #include <asm/unaligned.h>
 #include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 
-static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src,
-			   unsigned int bytes, int nrounds)
-{
-	/* aligned to potentially speed up crypto_xor() */
-	u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
-
-	while (bytes >= CHACHA_BLOCK_SIZE) {
-		chacha_block(state, stream, nrounds);
-		crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE);
-		bytes -= CHACHA_BLOCK_SIZE;
-		dst += CHACHA_BLOCK_SIZE;
-		src += CHACHA_BLOCK_SIZE;
-	}
-	if (bytes) {
-		chacha_block(state, stream, nrounds);
-		crypto_xor_cpy(dst, src, stream, bytes);
-	}
-}
-
 static int chacha_stream_xor(struct skcipher_request *req,
 			     const struct chacha_ctx *ctx, const u8 *iv)
 {
@@ -40,7 +21,7 @@ static int chacha_stream_xor(struct skcipher_request *req,
 
 	err = skcipher_walk_virt(&walk, req, false);
 
-	crypto_chacha_init(state, ctx, iv);
+	chacha_init_generic(state, ctx->key, iv);
 
 	while (walk.nbytes > 0) {
 		unsigned int nbytes = walk.nbytes;
@@ -48,75 +29,23 @@ static int chacha_stream_xor(struct skcipher_request *req,
 		if (nbytes < walk.total)
 			nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE);
 
-		chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
-			       nbytes, ctx->nrounds);
+		chacha_crypt_generic(state, walk.dst.virt.addr,
+				     walk.src.virt.addr, nbytes, ctx->nrounds);
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv)
-{
-	state[0]  = 0x61707865; /* "expa" */
-	state[1]  = 0x3320646e; /* "nd 3" */
-	state[2]  = 0x79622d32; /* "2-by" */
-	state[3]  = 0x6b206574; /* "te k" */
-	state[4]  = ctx->key[0];
-	state[5]  = ctx->key[1];
-	state[6]  = ctx->key[2];
-	state[7]  = ctx->key[3];
-	state[8]  = ctx->key[4];
-	state[9]  = ctx->key[5];
-	state[10] = ctx->key[6];
-	state[11] = ctx->key[7];
-	state[12] = get_unaligned_le32(iv +  0);
-	state[13] = get_unaligned_le32(iv +  4);
-	state[14] = get_unaligned_le32(iv +  8);
-	state[15] = get_unaligned_le32(iv + 12);
-}
-EXPORT_SYMBOL_GPL(crypto_chacha_init);
-
-static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			 unsigned int keysize, int nrounds)
-{
-	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int i;
-
-	if (keysize != CHACHA_KEY_SIZE)
-		return -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
-		ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
-
-	ctx->nrounds = nrounds;
-	return 0;
-}
-
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			   unsigned int keysize)
-{
-	return chacha_setkey(tfm, key, keysize, 20);
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
-
-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			   unsigned int keysize)
-{
-	return chacha_setkey(tfm, key, keysize, 12);
-}
-EXPORT_SYMBOL_GPL(crypto_chacha12_setkey);
-
-int crypto_chacha_crypt(struct skcipher_request *req)
+static int crypto_chacha_crypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	return chacha_stream_xor(req, ctx, req->iv);
 }
-EXPORT_SYMBOL_GPL(crypto_chacha_crypt);
 
-int crypto_xchacha_crypt(struct skcipher_request *req)
+static int crypto_xchacha_crypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -125,8 +54,8 @@ int crypto_xchacha_crypt(struct skcipher_request *req)
 	u8 real_iv[16];
 
 	/* Compute the subkey given the original key and first 128 nonce bits */
-	crypto_chacha_init(state, ctx, req->iv);
-	hchacha_block(state, subctx.key, ctx->nrounds);
+	chacha_init_generic(state, ctx->key, req->iv);
+	hchacha_block_generic(state, subctx.key, ctx->nrounds);
 	subctx.nrounds = ctx->nrounds;
 
 	/* Build the real IV */
@@ -136,7 +65,6 @@ int crypto_xchacha_crypt(struct skcipher_request *req)
 	/* Generate the stream and XOR it with the data */
 	return chacha_stream_xor(req, &subctx, real_iv);
 }
-EXPORT_SYMBOL_GPL(crypto_xchacha_crypt);
 
 static struct skcipher_alg algs[] = {
 	{
@@ -151,7 +79,7 @@ static struct skcipher_alg algs[] = {
 		.max_keysize		= CHACHA_KEY_SIZE,
 		.ivsize			= CHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
+		.setkey			= chacha20_setkey,
 		.encrypt		= crypto_chacha_crypt,
 		.decrypt		= crypto_chacha_crypt,
 	}, {
@@ -166,7 +94,7 @@ static struct skcipher_alg algs[] = {
 		.max_keysize		= CHACHA_KEY_SIZE,
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
+		.setkey			= chacha20_setkey,
 		.encrypt		= crypto_xchacha_crypt,
 		.decrypt		= crypto_xchacha_crypt,
 	}, {
@@ -181,7 +109,7 @@ static struct skcipher_alg algs[] = {
 		.max_keysize		= CHACHA_KEY_SIZE,
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha12_setkey,
+		.setkey			= chacha12_setkey,
 		.encrypt		= crypto_xchacha_crypt,
 		.decrypt		= crypto_xchacha_crypt,
 	}
diff --git a/crypto/cipher.c b/crypto/cipher.c
index 108427026e7c..fd78150deb1c 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -2,7 +2,7 @@
 /*
  * Cryptographic API.
  *
- * Cipher operations.
+ * Single-block cipher operations.
  *
  * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
  * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au>
@@ -16,11 +16,11 @@
 #include <linux/string.h>
 #include "internal.h"
 
-static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key,
+static int setkey_unaligned(struct crypto_cipher *tfm, const u8 *key,
 			    unsigned int keylen)
 {
-	struct cipher_alg *cia = &tfm->__crt_alg->cra_cipher;
-	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
+	struct cipher_alg *cia = crypto_cipher_alg(tfm);
+	unsigned long alignmask = crypto_cipher_alignmask(tfm);
 	int ret;
 	u8 *buffer, *alignbuffer;
 	unsigned long absize;
@@ -32,83 +32,60 @@ static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key,
 
 	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
 	memcpy(alignbuffer, key, keylen);
-	ret = cia->cia_setkey(tfm, alignbuffer, keylen);
+	ret = cia->cia_setkey(crypto_cipher_tfm(tfm), alignbuffer, keylen);
 	memset(alignbuffer, 0, keylen);
 	kfree(buffer);
 	return ret;
 
 }
 
-static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
+int crypto_cipher_setkey(struct crypto_cipher *tfm,
+			 const u8 *key, unsigned int keylen)
 {
-	struct cipher_alg *cia = &tfm->__crt_alg->cra_cipher;
-	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
+	struct cipher_alg *cia = crypto_cipher_alg(tfm);
+	unsigned long alignmask = crypto_cipher_alignmask(tfm);
 
-	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	if (keylen < cia->cia_min_keysize || keylen > cia->cia_max_keysize) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	if (keylen < cia->cia_min_keysize || keylen > cia->cia_max_keysize)
 		return -EINVAL;
-	}
 
 	if ((unsigned long)key & alignmask)
 		return setkey_unaligned(tfm, key, keylen);
 
-	return cia->cia_setkey(tfm, key, keylen);
-}
-
-static void cipher_crypt_unaligned(void (*fn)(struct crypto_tfm *, u8 *,
-					      const u8 *),
-				   struct crypto_tfm *tfm,
-				   u8 *dst, const u8 *src)
-{
-	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
-	unsigned int size = crypto_tfm_alg_blocksize(tfm);
-	u8 buffer[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
-	u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
-
-	memcpy(tmp, src, size);
-	fn(tfm, tmp, tmp);
-	memcpy(dst, tmp, size);
+	return cia->cia_setkey(crypto_cipher_tfm(tfm), key, keylen);
 }
+EXPORT_SYMBOL_GPL(crypto_cipher_setkey);
 
-static void cipher_encrypt_unaligned(struct crypto_tfm *tfm,
-				     u8 *dst, const u8 *src)
+static inline void cipher_crypt_one(struct crypto_cipher *tfm,
+				    u8 *dst, const u8 *src, bool enc)
 {
-	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	unsigned long alignmask = crypto_cipher_alignmask(tfm);
+	struct cipher_alg *cia = crypto_cipher_alg(tfm);
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
+		enc ? cia->cia_encrypt : cia->cia_decrypt;
 
 	if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) {
-		cipher_crypt_unaligned(cipher->cia_encrypt, tfm, dst, src);
-		return;
+		unsigned int bs = crypto_cipher_blocksize(tfm);
+		u8 buffer[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
+		u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
+
+		memcpy(tmp, src, bs);
+		fn(crypto_cipher_tfm(tfm), tmp, tmp);
+		memcpy(dst, tmp, bs);
+	} else {
+		fn(crypto_cipher_tfm(tfm), dst, src);
 	}
-
-	cipher->cia_encrypt(tfm, dst, src);
 }
 
-static void cipher_decrypt_unaligned(struct crypto_tfm *tfm,
-				     u8 *dst, const u8 *src)
+void crypto_cipher_encrypt_one(struct crypto_cipher *tfm,
+			       u8 *dst, const u8 *src)
 {
-	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
-
-	if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) {
-		cipher_crypt_unaligned(cipher->cia_decrypt, tfm, dst, src);
-		return;
-	}
-
-	cipher->cia_decrypt(tfm, dst, src);
+	cipher_crypt_one(tfm, dst, src, true);
 }
+EXPORT_SYMBOL_GPL(crypto_cipher_encrypt_one);
 
-int crypto_init_cipher_ops(struct crypto_tfm *tfm)
+void crypto_cipher_decrypt_one(struct crypto_cipher *tfm,
+			       u8 *dst, const u8 *src)
 {
-	struct cipher_tfm *ops = &tfm->crt_cipher;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
-
-	ops->cit_setkey = setkey;
-	ops->cit_encrypt_one = crypto_tfm_alg_alignmask(tfm) ?
-		cipher_encrypt_unaligned : cipher->cia_encrypt;
-	ops->cit_decrypt_one = crypto_tfm_alg_alignmask(tfm) ?
-		cipher_decrypt_unaligned : cipher->cia_decrypt;
-
-	return 0;
+	cipher_crypt_one(tfm, dst, src, false);
 }
+EXPORT_SYMBOL_GPL(crypto_cipher_decrypt_one);
diff --git a/crypto/cmac.c b/crypto/cmac.c
index 0928aebc6205..143a6544c873 100644
--- a/crypto/cmac.c
+++ b/crypto/cmac.c
@@ -201,7 +201,7 @@ static int cmac_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_cipher *cipher;
 	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct crypto_cipher_spawn *spawn = crypto_instance_ctx(inst);
 	struct cmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	cipher = crypto_spawn_cipher(spawn);
@@ -222,6 +222,7 @@ static void cmac_exit_tfm(struct crypto_tfm *tfm)
 static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct shash_instance *inst;
+	struct crypto_cipher_spawn *spawn;
 	struct crypto_alg *alg;
 	unsigned long alignmask;
 	int err;
@@ -230,10 +231,16 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (err)
 		return err;
 
-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
-				  CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+	spawn = shash_instance_ctx(inst);
+
+	err = crypto_grab_cipher(spawn, shash_crypto_instance(inst),
+				 crypto_attr_alg_name(tb[1]), 0, 0);
+	if (err)
+		goto err_free_inst;
+	alg = crypto_spawn_cipher_alg(spawn);
 
 	switch (alg->cra_blocksize) {
 	case 16:
@@ -241,19 +248,12 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 		break;
 	default:
 		err = -EINVAL;
-		goto out_put_alg;
+		goto err_free_inst;
 	}
 
-	inst = shash_alloc_instance("cmac", alg);
-	err = PTR_ERR(inst);
-	if (IS_ERR(inst))
-		goto out_put_alg;
-
-	err = crypto_init_spawn(shash_instance_ctx(inst), alg,
-				shash_crypto_instance(inst),
-				CRYPTO_ALG_TYPE_MASK);
+	err = crypto_inst_setname(shash_crypto_instance(inst), tmpl->name, alg);
 	if (err)
-		goto out_free_inst;
+		goto err_free_inst;
 
 	alignmask = alg->cra_alignmask;
 	inst->alg.base.cra_alignmask = alignmask;
@@ -280,21 +280,19 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.final = crypto_cmac_digest_final;
 	inst->alg.setkey = crypto_cmac_digest_setkey;
 
+	inst->free = shash_free_singlespawn_instance;
+
 	err = shash_register_instance(tmpl, inst);
 	if (err) {
-out_free_inst:
-		shash_free_instance(shash_crypto_instance(inst));
+err_free_inst:
+		shash_free_singlespawn_instance(inst);
 	}
-
-out_put_alg:
-	crypto_mod_put(alg);
 	return err;
 }
 
 static struct crypto_template crypto_cmac_tmpl = {
 	.name = "cmac",
 	.create = cmac_create,
-	.free = shash_free_instance,
 	.module = THIS_MODULE,
 };
 
diff --git a/crypto/compress.c b/crypto/compress.c
index e9edf8524787..9048fe390c46 100644
--- a/crypto/compress.c
+++ b/crypto/compress.c
@@ -6,34 +6,27 @@
  *
  * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
  */
-#include <linux/types.h>
 #include <linux/crypto.h>
-#include <linux/errno.h>
-#include <linux/string.h>
 #include "internal.h"
 
-static int crypto_compress(struct crypto_tfm *tfm,
-                            const u8 *src, unsigned int slen,
-                            u8 *dst, unsigned int *dlen)
+int crypto_comp_compress(struct crypto_comp *comp,
+			 const u8 *src, unsigned int slen,
+			 u8 *dst, unsigned int *dlen)
 {
+	struct crypto_tfm *tfm = crypto_comp_tfm(comp);
+
 	return tfm->__crt_alg->cra_compress.coa_compress(tfm, src, slen, dst,
 	                                                 dlen);
 }
+EXPORT_SYMBOL_GPL(crypto_comp_compress);
 
-static int crypto_decompress(struct crypto_tfm *tfm,
-                             const u8 *src, unsigned int slen,
-                             u8 *dst, unsigned int *dlen)
+int crypto_comp_decompress(struct crypto_comp *comp,
+			   const u8 *src, unsigned int slen,
+			   u8 *dst, unsigned int *dlen)
 {
+	struct crypto_tfm *tfm = crypto_comp_tfm(comp);
+
 	return tfm->__crt_alg->cra_compress.coa_decompress(tfm, src, slen, dst,
 	                                                   dlen);
 }
-
-int crypto_init_compress_ops(struct crypto_tfm *tfm)
-{
-	struct compress_tfm *ops = &tfm->crt_compress;
-
-	ops->cot_compress = crypto_compress;
-	ops->cot_decompress = crypto_decompress;
-
-	return 0;
-}
+EXPORT_SYMBOL_GPL(crypto_comp_decompress);
diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c
index 9e97912280bd..0e103fb5dd77 100644
--- a/crypto/crc32_generic.c
+++ b/crypto/crc32_generic.c
@@ -60,10 +60,8 @@ static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
 {
 	u32 *mctx = crypto_shash_ctx(hash);
 
-	if (keylen != sizeof(u32)) {
-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != sizeof(u32))
 		return -EINVAL;
-	}
 	*mctx = get_unaligned_le32(key);
 	return 0;
 }
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c
index 7b25fe82072c..7fa9b0788685 100644
--- a/crypto/crc32c_generic.c
+++ b/crypto/crc32c_generic.c
@@ -74,10 +74,8 @@ static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
 {
 	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
 
-	if (keylen != sizeof(mctx->key)) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != sizeof(mctx->key))
 		return -EINVAL;
-	}
 	mctx->key = get_unaligned_le32(key);
 	return 0;
 }
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 927760b316a4..d94c75c840a5 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -221,48 +221,17 @@ static int cryptd_init_instance(struct crypto_instance *inst,
 	return 0;
 }
 
-static void *cryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
-				   unsigned int tail)
-{
-	char *p;
-	struct crypto_instance *inst;
-	int err;
-
-	p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL);
-	if (!p)
-		return ERR_PTR(-ENOMEM);
-
-	inst = (void *)(p + head);
-
-	err = cryptd_init_instance(inst, alg);
-	if (err)
-		goto out_free_inst;
-
-out:
-	return p;
-
-out_free_inst:
-	kfree(p);
-	p = ERR_PTR(err);
-	goto out;
-}
-
 static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
 				  const u8 *key, unsigned int keylen)
 {
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
 	struct crypto_sync_skcipher *child = ctx->child;
-	int err;
 
 	crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_sync_skcipher_set_flags(child,
 				       crypto_skcipher_get_flags(parent) &
 					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_sync_skcipher_setkey(child, key, keylen);
-	crypto_skcipher_set_flags(parent,
-				  crypto_sync_skcipher_get_flags(child) &
-					  CRYPTO_TFM_RES_MASK);
-	return err;
+	return crypto_sync_skcipher_setkey(child, key, keylen);
 }
 
 static void cryptd_skcipher_complete(struct skcipher_request *req, int err)
@@ -421,8 +390,8 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl,
 	ctx = skcipher_instance_ctx(inst);
 	ctx->queue = queue;
 
-	crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ctx->spawn, name, type, mask);
+	err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst),
+				   name, type, mask);
 	if (err)
 		goto out_free_inst;
 
@@ -491,15 +460,11 @@ static int cryptd_hash_setkey(struct crypto_ahash *parent,
 {
 	struct cryptd_hash_ctx *ctx   = crypto_ahash_ctx(parent);
 	struct crypto_shash *child = ctx->child;
-	int err;
 
 	crypto_shash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_shash_set_flags(child, crypto_ahash_get_flags(parent) &
 				      CRYPTO_TFM_REQ_MASK);
-	err = crypto_shash_setkey(child, key, keylen);
-	crypto_ahash_set_flags(parent, crypto_shash_get_flags(child) &
-				       CRYPTO_TFM_RES_MASK);
-	return err;
+	return crypto_shash_setkey(child, key, keylen);
 }
 
 static int cryptd_hash_enqueue(struct ahash_request *req,
@@ -666,44 +631,49 @@ static int cryptd_hash_import(struct ahash_request *req, const void *in)
 	return crypto_shash_import(desc, in);
 }
 
+static void cryptd_hash_free(struct ahash_instance *inst)
+{
+	struct hashd_instance_ctx *ctx = ahash_instance_ctx(inst);
+
+	crypto_drop_shash(&ctx->spawn);
+	kfree(inst);
+}
+
 static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 			      struct cryptd_queue *queue)
 {
 	struct hashd_instance_ctx *ctx;
 	struct ahash_instance *inst;
-	struct shash_alg *salg;
-	struct crypto_alg *alg;
+	struct shash_alg *alg;
 	u32 type = 0;
 	u32 mask = 0;
 	int err;
 
 	cryptd_check_internal(tb, &type, &mask);
 
-	salg = shash_attr_alg(tb[1], type, mask);
-	if (IS_ERR(salg))
-		return PTR_ERR(salg);
-
-	alg = &salg->base;
-	inst = cryptd_alloc_instance(alg, ahash_instance_headroom(),
-				     sizeof(*ctx));
-	err = PTR_ERR(inst);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
 
 	ctx = ahash_instance_ctx(inst);
 	ctx->queue = queue;
 
-	err = crypto_init_shash_spawn(&ctx->spawn, salg,
-				      ahash_crypto_instance(inst));
+	err = crypto_grab_shash(&ctx->spawn, ahash_crypto_instance(inst),
+				crypto_attr_alg_name(tb[1]), type, mask);
 	if (err)
-		goto out_free_inst;
+		goto err_free_inst;
+	alg = crypto_spawn_shash_alg(&ctx->spawn);
+
+	err = cryptd_init_instance(ahash_crypto_instance(inst), &alg->base);
+	if (err)
+		goto err_free_inst;
 
 	inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
-		(alg->cra_flags & (CRYPTO_ALG_INTERNAL |
-				   CRYPTO_ALG_OPTIONAL_KEY));
+		(alg->base.cra_flags & (CRYPTO_ALG_INTERNAL |
+					CRYPTO_ALG_OPTIONAL_KEY));
 
-	inst->alg.halg.digestsize = salg->digestsize;
-	inst->alg.halg.statesize = salg->statesize;
+	inst->alg.halg.digestsize = alg->digestsize;
+	inst->alg.halg.statesize = alg->statesize;
 	inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx);
 
 	inst->alg.halg.base.cra_init = cryptd_hash_init_tfm;
@@ -715,19 +685,18 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 	inst->alg.finup  = cryptd_hash_finup_enqueue;
 	inst->alg.export = cryptd_hash_export;
 	inst->alg.import = cryptd_hash_import;
-	if (crypto_shash_alg_has_setkey(salg))
+	if (crypto_shash_alg_has_setkey(alg))
 		inst->alg.setkey = cryptd_hash_setkey;
 	inst->alg.digest = cryptd_hash_digest_enqueue;
 
+	inst->free = cryptd_hash_free;
+
 	err = ahash_register_instance(tmpl, inst);
 	if (err) {
+err_free_inst:
 		crypto_drop_shash(&ctx->spawn);
-out_free_inst:
 		kfree(inst);
 	}
-
-out_put_alg:
-	crypto_mod_put(alg);
 	return err;
 }
 
@@ -849,6 +818,14 @@ static void cryptd_aead_exit_tfm(struct crypto_aead *tfm)
 	crypto_free_aead(ctx->child);
 }
 
+static void cryptd_aead_free(struct aead_instance *inst)
+{
+	struct aead_instance_ctx *ctx = aead_instance_ctx(inst);
+
+	crypto_drop_aead(&ctx->aead_spawn);
+	kfree(inst);
+}
+
 static int cryptd_create_aead(struct crypto_template *tmpl,
 		              struct rtattr **tb,
 			      struct cryptd_queue *queue)
@@ -874,8 +851,8 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
 	ctx = aead_instance_ctx(inst);
 	ctx->queue = queue;
 
-	crypto_set_aead_spawn(&ctx->aead_spawn, aead_crypto_instance(inst));
-	err = crypto_grab_aead(&ctx->aead_spawn, name, type, mask);
+	err = crypto_grab_aead(&ctx->aead_spawn, aead_crypto_instance(inst),
+			       name, type, mask);
 	if (err)
 		goto out_free_inst;
 
@@ -898,6 +875,8 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
 	inst->alg.encrypt = cryptd_aead_encrypt_enqueue;
 	inst->alg.decrypt = cryptd_aead_decrypt_enqueue;
 
+	inst->free = cryptd_aead_free;
+
 	err = aead_register_instance(tmpl, inst);
 	if (err) {
 out_drop_aead:
@@ -919,7 +898,7 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
 		return PTR_ERR(algt);
 
 	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
-	case CRYPTO_ALG_TYPE_BLKCIPHER:
+	case CRYPTO_ALG_TYPE_SKCIPHER:
 		return cryptd_create_skcipher(tmpl, tb, &queue);
 	case CRYPTO_ALG_TYPE_HASH:
 		return cryptd_create_hash(tmpl, tb, &queue);
@@ -930,31 +909,9 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
 	return -EINVAL;
 }
 
-static void cryptd_free(struct crypto_instance *inst)
-{
-	struct cryptd_instance_ctx *ctx = crypto_instance_ctx(inst);
-	struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst);
-	struct aead_instance_ctx *aead_ctx = crypto_instance_ctx(inst);
-
-	switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
-	case CRYPTO_ALG_TYPE_AHASH:
-		crypto_drop_shash(&hctx->spawn);
-		kfree(ahash_instance(inst));
-		return;
-	case CRYPTO_ALG_TYPE_AEAD:
-		crypto_drop_aead(&aead_ctx->aead_spawn);
-		kfree(aead_instance(inst));
-		return;
-	default:
-		crypto_drop_spawn(&ctx->spawn);
-		kfree(inst);
-	}
-}
-
 static struct crypto_template cryptd_tmpl = {
 	.name = "cryptd",
 	.create = cryptd_create,
-	.free = cryptd_free,
 	.module = THIS_MODULE,
 };
 
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index 055d17977280..eb029ff1e05a 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -214,20 +214,6 @@ static int crypto_transfer_request_to_engine(struct crypto_engine *engine,
 }
 
 /**
- * crypto_transfer_ablkcipher_request_to_engine - transfer one ablkcipher_request
- * to list into the engine queue
- * @engine: the hardware engine
- * @req: the request need to be listed into the engine queue
- * TODO: Remove this function when skcipher conversion is finished
- */
-int crypto_transfer_ablkcipher_request_to_engine(struct crypto_engine *engine,
-						 struct ablkcipher_request *req)
-{
-	return crypto_transfer_request_to_engine(engine, &req->base);
-}
-EXPORT_SYMBOL_GPL(crypto_transfer_ablkcipher_request_to_engine);
-
-/**
  * crypto_transfer_aead_request_to_engine - transfer one aead_request
  * to list into the engine queue
  * @engine: the hardware engine
@@ -280,21 +266,6 @@ int crypto_transfer_skcipher_request_to_engine(struct crypto_engine *engine,
 EXPORT_SYMBOL_GPL(crypto_transfer_skcipher_request_to_engine);
 
 /**
- * crypto_finalize_ablkcipher_request - finalize one ablkcipher_request if
- * the request is done
- * @engine: the hardware engine
- * @req: the request need to be finalized
- * @err: error number
- * TODO: Remove this function when skcipher conversion is finished
- */
-void crypto_finalize_ablkcipher_request(struct crypto_engine *engine,
-					struct ablkcipher_request *req, int err)
-{
-	return crypto_finalize_request(engine, &req->base, err);
-}
-EXPORT_SYMBOL_GPL(crypto_finalize_ablkcipher_request);
-
-/**
  * crypto_finalize_aead_request - finalize one aead_request if
  * the request is done
  * @engine: the hardware engine
diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c
index 910e0b46012e..3fa20f12989f 100644
--- a/crypto/crypto_user_base.c
+++ b/crypto/crypto_user_base.c
@@ -213,8 +213,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 drop_alg:
 	crypto_mod_put(alg);
 
-	if (err)
+	if (err) {
+		kfree_skb(skb);
 		return err;
+	}
 
 	return nlmsg_unicast(net->crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
 }
@@ -321,7 +323,8 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (refcount_read(&alg->cra_refcnt) > 2)
 		goto drop_alg;
 
-	err = crypto_unregister_instance((struct crypto_instance *)alg);
+	crypto_unregister_instance((struct crypto_instance *)alg);
+	err = 0;
 
 drop_alg:
 	crypto_mod_put(alg);
diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c
index 8bad88413de1..154884bf9275 100644
--- a/crypto/crypto_user_stat.c
+++ b/crypto/crypto_user_stat.c
@@ -213,10 +213,6 @@ static int crypto_reportstat_one(struct crypto_alg *alg,
 		if (crypto_report_cipher(skb, alg))
 			goto nla_put_failure;
 		break;
-	case CRYPTO_ALG_TYPE_BLKCIPHER:
-		if (crypto_report_cipher(skb, alg))
-			goto nla_put_failure;
-		break;
 	case CRYPTO_ALG_TYPE_CIPHER:
 		if (crypto_report_cipher(skb, alg))
 			goto nla_put_failure;
@@ -328,8 +324,10 @@ int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 drop_alg:
 	crypto_mod_put(alg);
 
-	if (err)
+	if (err) {
+		kfree_skb(skb);
 		return err;
+	}
 
 	return nlmsg_unicast(net->crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
 }
diff --git a/crypto/ctr.c b/crypto/ctr.c
index 70a3fccb82f3..a8feab621c6c 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -129,10 +129,12 @@ static int crypto_ctr_create(struct crypto_template *tmpl, struct rtattr **tb)
 	struct crypto_alg *alg;
 	int err;
 
-	inst = skcipher_alloc_instance_simple(tmpl, tb, &alg);
+	inst = skcipher_alloc_instance_simple(tmpl, tb);
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
+	alg = skcipher_ialg_simple(inst);
+
 	/* Block size must be >= 4 bytes. */
 	err = -EINVAL;
 	if (alg->cra_blocksize < 4)
@@ -155,14 +157,11 @@ static int crypto_ctr_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.decrypt = crypto_ctr_crypt;
 
 	err = skcipher_register_instance(tmpl, inst);
-	if (err)
-		goto out_free_inst;
-	goto out_put_alg;
-
+	if (err) {
 out_free_inst:
-	inst->free(inst);
-out_put_alg:
-	crypto_mod_put(alg);
+		inst->free(inst);
+	}
+
 	return err;
 }
 
@@ -171,7 +170,6 @@ static int crypto_rfc3686_setkey(struct crypto_skcipher *parent,
 {
 	struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(parent);
 	struct crypto_skcipher *child = ctx->child;
-	int err;
 
 	/* the nonce is stored in bytes at end of key */
 	if (keylen < CTR_RFC3686_NONCE_SIZE)
@@ -185,11 +183,7 @@ static int crypto_rfc3686_setkey(struct crypto_skcipher *parent,
 	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
 					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_skcipher_setkey(child, key, keylen);
-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
-					  CRYPTO_TFM_RES_MASK);
-
-	return err;
+	return crypto_skcipher_setkey(child, key, keylen);
 }
 
 static int crypto_rfc3686_crypt(struct skcipher_request *req)
@@ -292,8 +286,8 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl,
 
 	spawn = skcipher_instance_ctx(inst);
 
-	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
-	err = crypto_grab_skcipher(spawn, cipher_name, 0, mask);
+	err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst),
+				   cipher_name, 0, mask);
 	if (err)
 		goto err_free_inst;
 
diff --git a/crypto/cts.c b/crypto/cts.c
index 6b6087dbb62a..48188adc8e91 100644
--- a/crypto/cts.c
+++ b/crypto/cts.c
@@ -78,15 +78,11 @@ static int crypto_cts_setkey(struct crypto_skcipher *parent, const u8 *key,
 {
 	struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(parent);
 	struct crypto_skcipher *child = ctx->child;
-	int err;
 
 	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
 					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_skcipher_setkey(child, key, keylen);
-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
-					  CRYPTO_TFM_RES_MASK);
-	return err;
+	return crypto_skcipher_setkey(child, key, keylen);
 }
 
 static void cts_cbc_crypt_done(struct crypto_async_request *areq, int err)
@@ -332,6 +328,7 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
 	struct crypto_attr_type *algt;
 	struct skcipher_alg *alg;
 	const char *cipher_name;
+	u32 mask;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -341,6 +338,8 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
 		return -EINVAL;
 
+	mask = crypto_requires_sync(algt->type, algt->mask);
+
 	cipher_name = crypto_attr_alg_name(tb[1]);
 	if (IS_ERR(cipher_name))
 		return PTR_ERR(cipher_name);
@@ -351,10 +350,8 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	spawn = skcipher_instance_ctx(inst);
 
-	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
-	err = crypto_grab_skcipher(spawn, cipher_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst),
+				   cipher_name, 0, mask);
 	if (err)
 		goto err_free_inst;
 
diff --git a/crypto/curve25519-generic.c b/crypto/curve25519-generic.c
new file mode 100644
index 000000000000..bd88fd571393
--- /dev/null
+++ b/crypto/curve25519-generic.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <crypto/curve25519.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/kpp.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+				 unsigned int len)
+{
+	u8 *secret = kpp_tfm_ctx(tfm);
+
+	if (!len)
+		curve25519_generate_secret(secret);
+	else if (len == CURVE25519_KEY_SIZE &&
+		 crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+		memcpy(secret, buf, CURVE25519_KEY_SIZE);
+	else
+		return -EINVAL;
+	return 0;
+}
+
+static int curve25519_compute_value(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	const u8 *secret = kpp_tfm_ctx(tfm);
+	u8 public_key[CURVE25519_KEY_SIZE];
+	u8 buf[CURVE25519_KEY_SIZE];
+	int copied, nbytes;
+	u8 const *bp;
+
+	if (req->src) {
+		copied = sg_copy_to_buffer(req->src,
+					   sg_nents_for_len(req->src,
+							    CURVE25519_KEY_SIZE),
+					   public_key, CURVE25519_KEY_SIZE);
+		if (copied != CURVE25519_KEY_SIZE)
+			return -EINVAL;
+		bp = public_key;
+	} else {
+		bp = curve25519_base_point;
+	}
+
+	curve25519_generic(buf, secret, bp);
+
+	/* might want less than we've got */
+	nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+	copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+								nbytes),
+				     buf, nbytes);
+	if (copied != nbytes)
+		return -EINVAL;
+	return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+	return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+	.base.cra_name		= "curve25519",
+	.base.cra_driver_name	= "curve25519-generic",
+	.base.cra_priority	= 100,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_ctxsize	= CURVE25519_KEY_SIZE,
+
+	.set_secret		= curve25519_set_secret,
+	.generate_public_key	= curve25519_compute_value,
+	.compute_shared_secret	= curve25519_compute_value,
+	.max_size		= curve25519_max_size,
+};
+
+static int curve25519_init(void)
+{
+	return crypto_register_kpp(&curve25519_alg);
+}
+
+static void curve25519_exit(void)
+{
+	crypto_unregister_kpp(&curve25519_alg);
+}
+
+subsys_initcall(curve25519_init);
+module_exit(curve25519_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-generic");
+MODULE_LICENSE("GPL");
diff --git a/crypto/des_generic.c b/crypto/des_generic.c
index 6e13a4a29ecb..c85354a5e94c 100644
--- a/crypto/des_generic.c
+++ b/crypto/des_generic.c
@@ -29,11 +29,8 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
 		else
 			err = 0;
 	}
-
-	if (err) {
+	if (err)
 		memset(dctx, 0, sizeof(*dctx));
-		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
-	}
 	return err;
 }
 
@@ -64,11 +61,8 @@ static int des3_ede_setkey(struct crypto_tfm *tfm, const u8 *key,
 		else
 			err = 0;
 	}
-
-	if (err) {
+	if (err)
 		memset(dctx, 0, sizeof(*dctx));
-		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
-	}
 	return err;
 }
 
diff --git a/crypto/ecb.c b/crypto/ecb.c
index 9d6981ca7d5d..69a687cbdf21 100644
--- a/crypto/ecb.c
+++ b/crypto/ecb.c
@@ -61,10 +61,9 @@ static int crypto_ecb_decrypt(struct skcipher_request *req)
 static int crypto_ecb_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct skcipher_instance *inst;
-	struct crypto_alg *alg;
 	int err;
 
-	inst = skcipher_alloc_instance_simple(tmpl, tb, &alg);
+	inst = skcipher_alloc_instance_simple(tmpl, tb);
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
@@ -76,7 +75,7 @@ static int crypto_ecb_create(struct crypto_template *tmpl, struct rtattr **tb)
 	err = skcipher_register_instance(tmpl, inst);
 	if (err)
 		inst->free(inst);
-	crypto_mod_put(alg);
+
 	return err;
 }
 
diff --git a/crypto/ecc.c b/crypto/ecc.c
index dfe114bc0c4a..02d35be7702b 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -336,7 +336,7 @@ static u64 vli_usub(u64 *result, const u64 *left, u64 right,
 static uint128_t mul_64_64(u64 left, u64 right)
 {
 	uint128_t result;
-#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+#if defined(CONFIG_ARCH_SUPPORTS_INT128)
 	unsigned __int128 m = (unsigned __int128)left * right;
 
 	result.m_low  = m;
@@ -1284,10 +1284,11 @@ EXPORT_SYMBOL(ecc_point_mult_shamir);
 static inline void ecc_swap_digits(const u64 *in, u64 *out,
 				   unsigned int ndigits)
 {
+	const __be64 *src = (__force __be64 *)in;
 	int i;
 
 	for (i = 0; i < ndigits; i++)
-		out[i] = __swab64(in[ndigits - 1 - i]);
+		out[i] = be64_to_cpu(src[ndigits - 1 - i]);
 }
 
 static int __ecc_is_key_valid(const struct ecc_curve *curve,
diff --git a/crypto/echainiv.c b/crypto/echainiv.c
index a49cbf7b0929..4a2f02baba14 100644
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -133,29 +133,17 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
 	inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx);
 	inst->alg.base.cra_ctxsize += inst->alg.ivsize;
 
-	inst->free = aead_geniv_free;
-
 	err = aead_register_instance(tmpl, inst);
-	if (err)
-		goto free_inst;
-
-out:
-	return err;
-
+	if (err) {
 free_inst:
-	aead_geniv_free(inst);
-	goto out;
-}
-
-static void echainiv_free(struct crypto_instance *inst)
-{
-	aead_geniv_free(aead_instance(inst));
+		inst->free(inst);
+	}
+	return err;
 }
 
 static struct crypto_template echainiv_tmpl = {
 	.name = "echainiv",
 	.create = echainiv_aead_create,
-	.free = echainiv_free,
 	.module = THIS_MODULE,
 };
 
diff --git a/crypto/essiv.c b/crypto/essiv.c
index a8befc8fb06e..465a89c9d1ef 100644
--- a/crypto/essiv.c
+++ b/crypto/essiv.c
@@ -75,9 +75,6 @@ static int essiv_skcipher_setkey(struct crypto_skcipher *tfm,
 				  crypto_skcipher_get_flags(tfm) &
 				  CRYPTO_TFM_REQ_MASK);
 	err = crypto_skcipher_setkey(tctx->u.skcipher, key, keylen);
-	crypto_skcipher_set_flags(tfm,
-				  crypto_skcipher_get_flags(tctx->u.skcipher) &
-				  CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
 
@@ -90,13 +87,8 @@ static int essiv_skcipher_setkey(struct crypto_skcipher *tfm,
 	crypto_cipher_set_flags(tctx->essiv_cipher,
 				crypto_skcipher_get_flags(tfm) &
 				CRYPTO_TFM_REQ_MASK);
-	err = crypto_cipher_setkey(tctx->essiv_cipher, salt,
-				   crypto_shash_digestsize(tctx->hash));
-	crypto_skcipher_set_flags(tfm,
-				  crypto_cipher_get_flags(tctx->essiv_cipher) &
-				  CRYPTO_TFM_RES_MASK);
-
-	return err;
+	return crypto_cipher_setkey(tctx->essiv_cipher, salt,
+				    crypto_shash_digestsize(tctx->hash));
 }
 
 static int essiv_aead_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -112,15 +104,11 @@ static int essiv_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 	crypto_aead_set_flags(tctx->u.aead, crypto_aead_get_flags(tfm) &
 					    CRYPTO_TFM_REQ_MASK);
 	err = crypto_aead_setkey(tctx->u.aead, key, keylen);
-	crypto_aead_set_flags(tfm, crypto_aead_get_flags(tctx->u.aead) &
-				   CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
 
-	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) {
-		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
 		return -EINVAL;
-	}
 
 	desc->tfm = tctx->hash;
 	err = crypto_shash_init(desc) ?:
@@ -132,12 +120,8 @@ static int essiv_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 	crypto_cipher_clear_flags(tctx->essiv_cipher, CRYPTO_TFM_REQ_MASK);
 	crypto_cipher_set_flags(tctx->essiv_cipher, crypto_aead_get_flags(tfm) &
 						    CRYPTO_TFM_REQ_MASK);
-	err = crypto_cipher_setkey(tctx->essiv_cipher, salt,
-				   crypto_shash_digestsize(tctx->hash));
-	crypto_aead_set_flags(tfm, crypto_cipher_get_flags(tctx->essiv_cipher) &
-				   CRYPTO_TFM_RES_MASK);
-
-	return err;
+	return crypto_cipher_setkey(tctx->essiv_cipher, salt,
+				    crypto_shash_digestsize(tctx->hash));
 }
 
 static int essiv_aead_setauthsize(struct crypto_aead *tfm,
@@ -188,8 +172,7 @@ static void essiv_aead_done(struct crypto_async_request *areq, int err)
 	struct aead_request *req = areq->data;
 	struct essiv_aead_request_ctx *rctx = aead_request_ctx(req);
 
-	if (rctx->assoc)
-		kfree(rctx->assoc);
+	kfree(rctx->assoc);
 	aead_request_complete(req, err);
 }
 
@@ -348,7 +331,7 @@ static int essiv_aead_init_tfm(struct crypto_aead *tfm)
 	if (IS_ERR(aead))
 		return PTR_ERR(aead);
 
-	subreq_size = FIELD_SIZEOF(struct essiv_aead_request_ctx, aead_req) +
+	subreq_size = sizeof_field(struct essiv_aead_request_ctx, aead_req) +
 		      crypto_aead_reqsize(aead);
 
 	tctx->ivoffset = offsetof(struct essiv_aead_request_ctx, aead_req) +
@@ -443,7 +426,7 @@ static bool essiv_supported_algorithms(const char *essiv_cipher_name,
 	if (ivsize != alg->cra_blocksize)
 		goto out;
 
-	if (crypto_shash_alg_has_setkey(hash_alg))
+	if (crypto_shash_alg_needs_key(hash_alg))
 		goto out;
 
 	ret = true;
@@ -469,6 +452,7 @@ static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb)
 	struct shash_alg *hash_alg;
 	int ivsize;
 	u32 type;
+	u32 mask;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -484,9 +468,10 @@ static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb)
 		return PTR_ERR(shash_name);
 
 	type = algt->type & algt->mask;
+	mask = crypto_requires_sync(algt->type, algt->mask);
 
 	switch (type) {
-	case CRYPTO_ALG_TYPE_BLKCIPHER:
+	case CRYPTO_ALG_TYPE_SKCIPHER:
 		skcipher_inst = kzalloc(sizeof(*skcipher_inst) +
 					sizeof(*ictx), GFP_KERNEL);
 		if (!skcipher_inst)
@@ -496,11 +481,8 @@ static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb)
 		ictx = crypto_instance_ctx(inst);
 
 		/* Symmetric cipher, e.g., "cbc(aes)" */
-		crypto_set_skcipher_spawn(&ictx->u.skcipher_spawn, inst);
-		err = crypto_grab_skcipher(&ictx->u.skcipher_spawn,
-					   inner_cipher_name, 0,
-					   crypto_requires_sync(algt->type,
-								algt->mask));
+		err = crypto_grab_skcipher(&ictx->u.skcipher_spawn, inst,
+					   inner_cipher_name, 0, mask);
 		if (err)
 			goto out_free_inst;
 		skcipher_alg = crypto_spawn_skcipher_alg(&ictx->u.skcipher_spawn);
@@ -518,11 +500,8 @@ static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb)
 		ictx = crypto_instance_ctx(inst);
 
 		/* AEAD cipher, e.g., "authenc(hmac(sha256),cbc(aes))" */
-		crypto_set_aead_spawn(&ictx->u.aead_spawn, inst);
-		err = crypto_grab_aead(&ictx->u.aead_spawn,
-				       inner_cipher_name, 0,
-				       crypto_requires_sync(algt->type,
-							    algt->mask));
+		err = crypto_grab_aead(&ictx->u.aead_spawn, inst,
+				       inner_cipher_name, 0, mask);
 		if (err)
 			goto out_free_inst;
 		aead_alg = crypto_spawn_aead_alg(&ictx->u.aead_spawn);
@@ -586,7 +565,7 @@ static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb)
 	base->cra_alignmask	= block_base->cra_alignmask;
 	base->cra_priority	= block_base->cra_priority;
 
-	if (type == CRYPTO_ALG_TYPE_BLKCIPHER) {
+	if (type == CRYPTO_ALG_TYPE_SKCIPHER) {
 		skcipher_inst->alg.setkey	= essiv_skcipher_setkey;
 		skcipher_inst->alg.encrypt	= essiv_skcipher_encrypt;
 		skcipher_inst->alg.decrypt	= essiv_skcipher_decrypt;
@@ -628,7 +607,7 @@ static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb)
 out_free_hash:
 	crypto_mod_put(_hash_alg);
 out_drop_skcipher:
-	if (type == CRYPTO_ALG_TYPE_BLKCIPHER)
+	if (type == CRYPTO_ALG_TYPE_SKCIPHER)
 		crypto_drop_skcipher(&ictx->u.skcipher_spawn);
 	else
 		crypto_drop_aead(&ictx->u.aead_spawn);
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 73884208f075..8e5c0ac65661 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -13,7 +13,6 @@
 #include <crypto/scatterwalk.h>
 #include <crypto/gcm.h>
 #include <crypto/hash.h>
-#include "internal.h"
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -111,8 +110,6 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 	crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) &
 				       CRYPTO_TFM_REQ_MASK);
 	err = crypto_skcipher_setkey(ctr, key, keylen);
-	crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) &
-				    CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
 
@@ -141,9 +138,6 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 	crypto_ahash_set_flags(ghash, crypto_aead_get_flags(aead) &
 			       CRYPTO_TFM_REQ_MASK);
 	err = crypto_ahash_setkey(ghash, (u8 *)&data->hash, sizeof(be128));
-	crypto_aead_set_flags(aead, crypto_ahash_get_flags(ghash) &
-			      CRYPTO_TFM_RES_MASK);
-
 out:
 	kzfree(data);
 	return err;
@@ -585,11 +579,11 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
 				    const char *ghash_name)
 {
 	struct crypto_attr_type *algt;
+	u32 mask;
 	struct aead_instance *inst;
+	struct gcm_instance_ctx *ctx;
 	struct skcipher_alg *ctr;
-	struct crypto_alg *ghash_alg;
 	struct hash_alg_common *ghash;
-	struct gcm_instance_ctx *ctx;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -599,39 +593,28 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return -EINVAL;
 
-	ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
-				    CRYPTO_ALG_TYPE_HASH,
-				    CRYPTO_ALG_TYPE_AHASH_MASK |
-				    crypto_requires_sync(algt->type,
-							 algt->mask));
-	if (IS_ERR(ghash_alg))
-		return PTR_ERR(ghash_alg);
-
-	ghash = __crypto_hash_alg_common(ghash_alg);
+	mask = crypto_requires_sync(algt->type, algt->mask);
 
-	err = -ENOMEM;
 	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
 	if (!inst)
-		goto out_put_ghash;
-
+		return -ENOMEM;
 	ctx = aead_instance_ctx(inst);
-	err = crypto_init_ahash_spawn(&ctx->ghash, ghash,
-				      aead_crypto_instance(inst));
+
+	err = crypto_grab_ahash(&ctx->ghash, aead_crypto_instance(inst),
+				ghash_name, 0, mask);
 	if (err)
 		goto err_free_inst;
+	ghash = crypto_spawn_ahash_alg(&ctx->ghash);
 
 	err = -EINVAL;
 	if (strcmp(ghash->base.cra_name, "ghash") != 0 ||
 	    ghash->digestsize != 16)
-		goto err_drop_ghash;
+		goto err_free_inst;
 
-	crypto_set_skcipher_spawn(&ctx->ctr, aead_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ctx->ctr, ctr_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	err = crypto_grab_skcipher(&ctx->ctr, aead_crypto_instance(inst),
+				   ctr_name, 0, mask);
 	if (err)
-		goto err_drop_ghash;
-
+		goto err_free_inst;
 	ctr = crypto_spawn_skcipher_alg(&ctx->ctr);
 
 	/* The skcipher algorithm must be CTR mode, using 16-byte blocks. */
@@ -639,18 +622,18 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
 	if (strncmp(ctr->base.cra_name, "ctr(", 4) != 0 ||
 	    crypto_skcipher_alg_ivsize(ctr) != 16 ||
 	    ctr->base.cra_blocksize != 1)
-		goto out_put_ctr;
+		goto err_free_inst;
 
 	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
 		     "gcm(%s", ctr->base.cra_name + 4) >= CRYPTO_MAX_ALG_NAME)
-		goto out_put_ctr;
+		goto err_free_inst;
 
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "gcm_base(%s,%s)", ctr->base.cra_driver_name,
-		     ghash_alg->cra_driver_name) >=
+		     ghash->base.cra_driver_name) >=
 	    CRYPTO_MAX_ALG_NAME)
-		goto out_put_ctr;
+		goto err_free_inst;
 
 	inst->alg.base.cra_flags = (ghash->base.cra_flags |
 				    ctr->base.cra_flags) & CRYPTO_ALG_ASYNC;
@@ -673,20 +656,11 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
 	inst->free = crypto_gcm_free;
 
 	err = aead_register_instance(tmpl, inst);
-	if (err)
-		goto out_put_ctr;
-
-out_put_ghash:
-	crypto_mod_put(ghash_alg);
-	return err;
-
-out_put_ctr:
-	crypto_drop_skcipher(&ctx->ctr);
-err_drop_ghash:
-	crypto_drop_ahash(&ctx->ghash);
+	if (err) {
 err_free_inst:
-	kfree(inst);
-	goto out_put_ghash;
+		crypto_gcm_free(inst);
+	}
+	return err;
 }
 
 static int crypto_gcm_create(struct crypto_template *tmpl, struct rtattr **tb)
@@ -727,7 +701,6 @@ static int crypto_rfc4106_setkey(struct crypto_aead *parent, const u8 *key,
 {
 	struct crypto_rfc4106_ctx *ctx = crypto_aead_ctx(parent);
 	struct crypto_aead *child = ctx->child;
-	int err;
 
 	if (keylen < 4)
 		return -EINVAL;
@@ -738,11 +711,7 @@ static int crypto_rfc4106_setkey(struct crypto_aead *parent, const u8 *key,
 	crypto_aead_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_aead_set_flags(child, crypto_aead_get_flags(parent) &
 				     CRYPTO_TFM_REQ_MASK);
-	err = crypto_aead_setkey(child, key, keylen);
-	crypto_aead_set_flags(parent, crypto_aead_get_flags(child) &
-				      CRYPTO_TFM_RES_MASK);
-
-	return err;
+	return crypto_aead_setkey(child, key, keylen);
 }
 
 static int crypto_rfc4106_setauthsize(struct crypto_aead *parent,
@@ -867,6 +836,7 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
 				 struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
+	u32 mask;
 	struct aead_instance *inst;
 	struct crypto_aead_spawn *spawn;
 	struct aead_alg *alg;
@@ -880,6 +850,8 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return -EINVAL;
 
+	mask = crypto_requires_sync(algt->type, algt->mask);
+
 	ccm_name = crypto_attr_alg_name(tb[1]);
 	if (IS_ERR(ccm_name))
 		return PTR_ERR(ccm_name);
@@ -889,9 +861,8 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
 		return -ENOMEM;
 
 	spawn = aead_instance_ctx(inst);
-	crypto_set_aead_spawn(spawn, aead_crypto_instance(inst));
-	err = crypto_grab_aead(spawn, ccm_name, 0,
-			       crypto_requires_sync(algt->type, algt->mask));
+	err = crypto_grab_aead(spawn, aead_crypto_instance(inst),
+			       ccm_name, 0, mask);
 	if (err)
 		goto out_free_inst;
 
@@ -956,7 +927,6 @@ static int crypto_rfc4543_setkey(struct crypto_aead *parent, const u8 *key,
 {
 	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(parent);
 	struct crypto_aead *child = ctx->child;
-	int err;
 
 	if (keylen < 4)
 		return -EINVAL;
@@ -967,11 +937,7 @@ static int crypto_rfc4543_setkey(struct crypto_aead *parent, const u8 *key,
 	crypto_aead_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_aead_set_flags(child, crypto_aead_get_flags(parent) &
 				     CRYPTO_TFM_REQ_MASK);
-	err = crypto_aead_setkey(child, key, keylen);
-	crypto_aead_set_flags(parent, crypto_aead_get_flags(child) &
-				      CRYPTO_TFM_RES_MASK);
-
-	return err;
+	return crypto_aead_setkey(child, key, keylen);
 }
 
 static int crypto_rfc4543_setauthsize(struct crypto_aead *parent,
@@ -1103,6 +1069,7 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
 				struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
+	u32 mask;
 	struct aead_instance *inst;
 	struct crypto_aead_spawn *spawn;
 	struct aead_alg *alg;
@@ -1117,6 +1084,8 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return -EINVAL;
 
+	mask = crypto_requires_sync(algt->type, algt->mask);
+
 	ccm_name = crypto_attr_alg_name(tb[1]);
 	if (IS_ERR(ccm_name))
 		return PTR_ERR(ccm_name);
@@ -1127,9 +1096,8 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
 
 	ctx = aead_instance_ctx(inst);
 	spawn = &ctx->aead;
-	crypto_set_aead_spawn(spawn, aead_crypto_instance(inst));
-	err = crypto_grab_aead(spawn, ccm_name, 0,
-			       crypto_requires_sync(algt->type, algt->mask));
+	err = crypto_grab_aead(spawn, aead_crypto_instance(inst),
+			       ccm_name, 0, mask);
 	if (err)
 		goto out_free_inst;
 
diff --git a/crypto/geniv.c b/crypto/geniv.c
new file mode 100644
index 000000000000..dbcc640274cd
--- /dev/null
+++ b/crypto/geniv.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * geniv: Shared IV generator code
+ *
+ * This file provides common code to IV generators such as seqiv.
+ *
+ * Copyright (c) 2007-2019 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+
+#include <crypto/internal/geniv.h>
+#include <crypto/internal/rng.h>
+#include <crypto/null.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+
+static int aead_geniv_setkey(struct crypto_aead *tfm,
+			     const u8 *key, unsigned int keylen)
+{
+	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
+
+	return crypto_aead_setkey(ctx->child, key, keylen);
+}
+
+static int aead_geniv_setauthsize(struct crypto_aead *tfm,
+				  unsigned int authsize)
+{
+	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
+
+	return crypto_aead_setauthsize(ctx->child, authsize);
+}
+
+static void aead_geniv_free(struct aead_instance *inst)
+{
+	crypto_drop_aead(aead_instance_ctx(inst));
+	kfree(inst);
+}
+
+struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
+				       struct rtattr **tb, u32 type, u32 mask)
+{
+	const char *name;
+	struct crypto_aead_spawn *spawn;
+	struct crypto_attr_type *algt;
+	struct aead_instance *inst;
+	struct aead_alg *alg;
+	unsigned int ivsize;
+	unsigned int maxauthsize;
+	int err;
+
+	algt = crypto_get_attr_type(tb);
+	if (IS_ERR(algt))
+		return ERR_CAST(algt);
+
+	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
+		return ERR_PTR(-EINVAL);
+
+	name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(name))
+		return ERR_CAST(name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return ERR_PTR(-ENOMEM);
+
+	spawn = aead_instance_ctx(inst);
+
+	/* Ignore async algorithms if necessary. */
+	mask |= crypto_requires_sync(algt->type, algt->mask);
+
+	err = crypto_grab_aead(spawn, aead_crypto_instance(inst),
+			       name, type, mask);
+	if (err)
+		goto err_free_inst;
+
+	alg = crypto_spawn_aead_alg(spawn);
+
+	ivsize = crypto_aead_alg_ivsize(alg);
+	maxauthsize = crypto_aead_alg_maxauthsize(alg);
+
+	err = -EINVAL;
+	if (ivsize < sizeof(u64))
+		goto err_drop_alg;
+
+	err = -ENAMETOOLONG;
+	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+		     "%s(%s)", tmpl->name, alg->base.cra_name) >=
+	    CRYPTO_MAX_ALG_NAME)
+		goto err_drop_alg;
+	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "%s(%s)", tmpl->name, alg->base.cra_driver_name) >=
+	    CRYPTO_MAX_ALG_NAME)
+		goto err_drop_alg;
+
+	inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_priority = alg->base.cra_priority;
+	inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
+	inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx);
+
+	inst->alg.setkey = aead_geniv_setkey;
+	inst->alg.setauthsize = aead_geniv_setauthsize;
+
+	inst->alg.ivsize = ivsize;
+	inst->alg.maxauthsize = maxauthsize;
+
+	inst->free = aead_geniv_free;
+
+out:
+	return inst;
+
+err_drop_alg:
+	crypto_drop_aead(spawn);
+err_free_inst:
+	kfree(inst);
+	inst = ERR_PTR(err);
+	goto out;
+}
+EXPORT_SYMBOL_GPL(aead_geniv_alloc);
+
+int aead_init_geniv(struct crypto_aead *aead)
+{
+	struct aead_geniv_ctx *ctx = crypto_aead_ctx(aead);
+	struct aead_instance *inst = aead_alg_instance(aead);
+	struct crypto_aead *child;
+	int err;
+
+	spin_lock_init(&ctx->lock);
+
+	err = crypto_get_default_rng();
+	if (err)
+		goto out;
+
+	err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
+				   crypto_aead_ivsize(aead));
+	crypto_put_default_rng();
+	if (err)
+		goto out;
+
+	ctx->sknull = crypto_get_default_null_skcipher();
+	err = PTR_ERR(ctx->sknull);
+	if (IS_ERR(ctx->sknull))
+		goto out;
+
+	child = crypto_spawn_aead(aead_instance_ctx(inst));
+	err = PTR_ERR(child);
+	if (IS_ERR(child))
+		goto drop_null;
+
+	ctx->child = child;
+	crypto_aead_set_reqsize(aead, crypto_aead_reqsize(child) +
+				      sizeof(struct aead_request));
+
+	err = 0;
+
+out:
+	return err;
+
+drop_null:
+	crypto_put_default_null_skcipher();
+	goto out;
+}
+EXPORT_SYMBOL_GPL(aead_init_geniv);
+
+void aead_exit_geniv(struct crypto_aead *tfm)
+{
+	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
+
+	crypto_free_aead(ctx->child);
+	crypto_put_default_null_skcipher();
+}
+EXPORT_SYMBOL_GPL(aead_exit_geniv);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Shared IV generator code");
diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c
index 5027b3461c92..c70d163c1ac9 100644
--- a/crypto/ghash-generic.c
+++ b/crypto/ghash-generic.c
@@ -58,10 +58,8 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
 	be128 k;
 
-	if (keylen != GHASH_BLOCK_SIZE) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != GHASH_BLOCK_SIZE)
 		return -EINVAL;
-	}
 
 	if (ctx->gf128)
 		gf128mul_free_4k(ctx->gf128);
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 8b2a212eb0ad..e38bfb948278 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -138,12 +138,11 @@ static int hmac_finup(struct shash_desc *pdesc, const u8 *data,
 	       crypto_shash_finup(desc, out, ds, out);
 }
 
-static int hmac_init_tfm(struct crypto_tfm *tfm)
+static int hmac_init_tfm(struct crypto_shash *parent)
 {
-	struct crypto_shash *parent = __crypto_shash_cast(tfm);
 	struct crypto_shash *hash;
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_shash_spawn *spawn = crypto_instance_ctx(inst);
+	struct shash_instance *inst = shash_alg_instance(parent);
+	struct crypto_shash_spawn *spawn = shash_instance_ctx(inst);
 	struct hmac_ctx *ctx = hmac_ctx(parent);
 
 	hash = crypto_spawn_shash(spawn);
@@ -152,24 +151,21 @@ static int hmac_init_tfm(struct crypto_tfm *tfm)
 
 	parent->descsize = sizeof(struct shash_desc) +
 			   crypto_shash_descsize(hash);
-	if (WARN_ON(parent->descsize > HASH_MAX_DESCSIZE)) {
-		crypto_free_shash(hash);
-		return -EINVAL;
-	}
 
 	ctx->hash = hash;
 	return 0;
 }
 
-static void hmac_exit_tfm(struct crypto_tfm *tfm)
+static void hmac_exit_tfm(struct crypto_shash *parent)
 {
-	struct hmac_ctx *ctx = hmac_ctx(__crypto_shash_cast(tfm));
+	struct hmac_ctx *ctx = hmac_ctx(parent);
 	crypto_free_shash(ctx->hash);
 }
 
 static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct shash_instance *inst;
+	struct crypto_shash_spawn *spawn;
 	struct crypto_alg *alg;
 	struct shash_alg *salg;
 	int err;
@@ -180,31 +176,32 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (err)
 		return err;
 
-	salg = shash_attr_alg(tb[1], 0, 0);
-	if (IS_ERR(salg))
-		return PTR_ERR(salg);
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+	spawn = shash_instance_ctx(inst);
+
+	err = crypto_grab_shash(spawn, shash_crypto_instance(inst),
+				crypto_attr_alg_name(tb[1]), 0, 0);
+	if (err)
+		goto err_free_inst;
+	salg = crypto_spawn_shash_alg(spawn);
 	alg = &salg->base;
 
-	/* The underlying hash algorithm must be unkeyed */
+	/* The underlying hash algorithm must not require a key */
 	err = -EINVAL;
-	if (crypto_shash_alg_has_setkey(salg))
-		goto out_put_alg;
+	if (crypto_shash_alg_needs_key(salg))
+		goto err_free_inst;
 
 	ds = salg->digestsize;
 	ss = salg->statesize;
 	if (ds > alg->cra_blocksize ||
 	    ss < alg->cra_blocksize)
-		goto out_put_alg;
+		goto err_free_inst;
 
-	inst = shash_alloc_instance("hmac", alg);
-	err = PTR_ERR(inst);
-	if (IS_ERR(inst))
-		goto out_put_alg;
-
-	err = crypto_init_shash_spawn(shash_instance_ctx(inst), salg,
-				      shash_crypto_instance(inst));
+	err = crypto_inst_setname(shash_crypto_instance(inst), tmpl->name, alg);
 	if (err)
-		goto out_free_inst;
+		goto err_free_inst;
 
 	inst->alg.base.cra_priority = alg->cra_priority;
 	inst->alg.base.cra_blocksize = alg->cra_blocksize;
@@ -217,9 +214,6 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.base.cra_ctxsize = sizeof(struct hmac_ctx) +
 				     ALIGN(ss * 2, crypto_tfm_ctx_alignment());
 
-	inst->alg.base.cra_init = hmac_init_tfm;
-	inst->alg.base.cra_exit = hmac_exit_tfm;
-
 	inst->alg.init = hmac_init;
 	inst->alg.update = hmac_update;
 	inst->alg.final = hmac_final;
@@ -227,22 +221,22 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.export = hmac_export;
 	inst->alg.import = hmac_import;
 	inst->alg.setkey = hmac_setkey;
+	inst->alg.init_tfm = hmac_init_tfm;
+	inst->alg.exit_tfm = hmac_exit_tfm;
+
+	inst->free = shash_free_singlespawn_instance;
 
 	err = shash_register_instance(tmpl, inst);
 	if (err) {
-out_free_inst:
-		shash_free_instance(shash_crypto_instance(inst));
+err_free_inst:
+		shash_free_singlespawn_instance(inst);
 	}
-
-out_put_alg:
-	crypto_mod_put(alg);
 	return err;
 }
 
 static struct crypto_template hmac_tmpl = {
 	.name = "hmac",
 	.create = hmac_create,
-	.free = shash_free_instance,
 	.module = THIS_MODULE,
 };
 
diff --git a/crypto/internal.h b/crypto/internal.h
index 93df7bec844a..d5ebc60c5143 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -58,9 +58,6 @@ static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg)
 struct crypto_alg *crypto_mod_get(struct crypto_alg *alg);
 struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
 
-int crypto_init_cipher_ops(struct crypto_tfm *tfm);
-int crypto_init_compress_ops(struct crypto_tfm *tfm);
-
 struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask);
 void crypto_larval_kill(struct crypto_alg *alg);
 void crypto_alg_tested(const char *name, int err);
@@ -68,7 +65,6 @@ void crypto_alg_tested(const char *name, int err);
 void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
 			  struct crypto_alg *nalg);
 void crypto_remove_final(struct list_head *list);
-void crypto_shoot_alg(struct crypto_alg *alg);
 struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
 				      u32 mask);
 void *crypto_create_tfm(struct crypto_alg *alg,
diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c
index 701b8d86ab49..a5ce8f96790f 100644
--- a/crypto/jitterentropy-kcapi.c
+++ b/crypto/jitterentropy-kcapi.c
@@ -44,13 +44,7 @@
 #include <linux/crypto.h>
 #include <crypto/internal/rng.h>
 
-struct rand_data;
-int jent_read_entropy(struct rand_data *ec, unsigned char *data,
-		      unsigned int len);
-int jent_entropy_init(void);
-struct rand_data *jent_entropy_collector_alloc(unsigned int osr,
-					       unsigned int flags);
-void jent_entropy_collector_free(struct rand_data *entropy_collector);
+#include "jitterentropy.h"
 
 /***************************************************************************
  * Helper function
diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c
index 77fa2120fe0c..042157f0d28b 100644
--- a/crypto/jitterentropy.c
+++ b/crypto/jitterentropy.c
@@ -103,12 +103,7 @@ struct rand_data {
  * Helper functions
  ***************************************************************************/
 
-void jent_get_nstime(__u64 *out);
-void *jent_zalloc(unsigned int len);
-void jent_zfree(void *ptr);
-int jent_fips_enabled(void);
-void jent_panic(char *s);
-void jent_memcpy(void *dest, const void *src, unsigned int n);
+#include "jitterentropy.h"
 
 /**
  * Update of the loop count used for the next round of
@@ -172,7 +167,7 @@ static __u64 jent_loop_shuffle(struct rand_data *ec,
  * implies that careful retesting must be done.
  *
  * Input:
- * @ec entropy collector struct -- may be NULL
+ * @ec entropy collector struct
  * @time time stamp to be injected
  * @loop_cnt if a value not equal to 0 is set, use the given value as number of
  *	     loops to perform the folding
@@ -400,8 +395,8 @@ static void jent_gen_entropy(struct rand_data *ec)
  * primes the test if needed.
  *
  * Return:
- * 0 if FIPS test passed
- * < 0 if FIPS test failed
+ * returns normally if FIPS test passed
+ * panics the kernel if FIPS test failed
  */
 static void jent_fips_test(struct rand_data *ec)
 {
diff --git a/crypto/jitterentropy.h b/crypto/jitterentropy.h
new file mode 100644
index 000000000000..c83fff32d130
--- /dev/null
+++ b/crypto/jitterentropy.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+extern void *jent_zalloc(unsigned int len);
+extern void jent_zfree(void *ptr);
+extern int jent_fips_enabled(void);
+extern void jent_panic(char *s);
+extern void jent_memcpy(void *dest, const void *src, unsigned int n);
+extern void jent_get_nstime(__u64 *out);
+
+struct rand_data;
+extern int jent_entropy_init(void);
+extern int jent_read_entropy(struct rand_data *ec, unsigned char *data,
+			     unsigned int len);
+
+extern struct rand_data *jent_entropy_collector_alloc(unsigned int osr,
+						      unsigned int flags);
+extern void jent_entropy_collector_free(struct rand_data *entropy_collector);
diff --git a/crypto/keywrap.c b/crypto/keywrap.c
index a155c88105ea..0355cce21b1e 100644
--- a/crypto/keywrap.c
+++ b/crypto/keywrap.c
@@ -266,10 +266,12 @@ static int crypto_kw_create(struct crypto_template *tmpl, struct rtattr **tb)
 	struct crypto_alg *alg;
 	int err;
 
-	inst = skcipher_alloc_instance_simple(tmpl, tb, &alg);
+	inst = skcipher_alloc_instance_simple(tmpl, tb);
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
+	alg = skcipher_ialg_simple(inst);
+
 	err = -EINVAL;
 	/* Section 5.1 requirement for KW */
 	if (alg->cra_blocksize != sizeof(struct crypto_kw_block))
@@ -283,14 +285,11 @@ static int crypto_kw_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.decrypt = crypto_kw_decrypt;
 
 	err = skcipher_register_instance(tmpl, inst);
-	if (err)
-		goto out_free_inst;
-	goto out_put_alg;
-
+	if (err) {
 out_free_inst:
-	inst->free(inst);
-out_put_alg:
-	crypto_mod_put(alg);
+		inst->free(inst);
+	}
+
 	return err;
 }
 
diff --git a/crypto/lrw.c b/crypto/lrw.c
index be829f6afc8e..63c485c0d8a6 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -79,8 +79,6 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
 	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
 					 CRYPTO_TFM_REQ_MASK);
 	err = crypto_skcipher_setkey(child, key, keylen - bsize);
-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
-					  CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
 
@@ -303,6 +301,7 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 	struct skcipher_alg *alg;
 	const char *cipher_name;
 	char ecb_name[CRYPTO_MAX_ALG_NAME];
+	u32 mask;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -312,6 +311,8 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
 		return -EINVAL;
 
+	mask = crypto_requires_sync(algt->type, algt->mask);
+
 	cipher_name = crypto_attr_alg_name(tb[1]);
 	if (IS_ERR(cipher_name))
 		return PTR_ERR(cipher_name);
@@ -322,19 +323,17 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	spawn = skcipher_instance_ctx(inst);
 
-	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
-	err = crypto_grab_skcipher(spawn, cipher_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst),
+				   cipher_name, 0, mask);
 	if (err == -ENOENT) {
 		err = -ENAMETOOLONG;
 		if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
 			     cipher_name) >= CRYPTO_MAX_ALG_NAME)
 			goto err_free_inst;
 
-		err = crypto_grab_skcipher(spawn, ecb_name, 0,
-					   crypto_requires_sync(algt->type,
-								algt->mask));
+		err = crypto_grab_skcipher(spawn,
+					   skcipher_crypto_instance(inst),
+					   ecb_name, 0, mask);
 	}
 
 	if (err)
diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c
index 20e6220f46f6..63350c4ad461 100644
--- a/crypto/michael_mic.c
+++ b/crypto/michael_mic.c
@@ -137,10 +137,8 @@ static int michael_setkey(struct crypto_shash *tfm, const u8 *key,
 
 	const __le32 *data = (const __le32 *)key;
 
-	if (keylen != 8) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != 8)
 		return -EINVAL;
-	}
 
 	mctx->l = le32_to_cpu(data[0]);
 	mctx->r = le32_to_cpu(data[1]);
diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c
index 9ab4e07cde4d..8a3006c3b51b 100644
--- a/crypto/nhpoly1305.c
+++ b/crypto/nhpoly1305.c
@@ -33,6 +33,7 @@
 #include <asm/unaligned.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
 #include <crypto/nhpoly1305.h>
 #include <linux/crypto.h>
 #include <linux/kernel.h>
@@ -78,7 +79,7 @@ static void process_nh_hash_value(struct nhpoly1305_state *state,
 	BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0);
 
 	poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash,
-			     NH_HASH_BYTES / POLY1305_BLOCK_SIZE);
+			     NH_HASH_BYTES / POLY1305_BLOCK_SIZE, 1);
 }
 
 /*
@@ -209,7 +210,7 @@ int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn)
 	if (state->nh_remaining)
 		process_nh_hash_value(state, key);
 
-	poly1305_core_emit(&state->poly_state, dst);
+	poly1305_core_emit(&state->poly_state, NULL, dst);
 	return 0;
 }
 EXPORT_SYMBOL(crypto_nhpoly1305_final_helper);
diff --git a/crypto/ofb.c b/crypto/ofb.c
index 133ff4c7f2c6..2ec68e3f2c55 100644
--- a/crypto/ofb.c
+++ b/crypto/ofb.c
@@ -55,10 +55,12 @@ static int crypto_ofb_create(struct crypto_template *tmpl, struct rtattr **tb)
 	struct crypto_alg *alg;
 	int err;
 
-	inst = skcipher_alloc_instance_simple(tmpl, tb, &alg);
+	inst = skcipher_alloc_instance_simple(tmpl, tb);
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
+	alg = skcipher_ialg_simple(inst);
+
 	/* OFB mode is a stream cipher. */
 	inst->alg.base.cra_blocksize = 1;
 
@@ -75,7 +77,6 @@ static int crypto_ofb_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (err)
 		inst->free(inst);
 
-	crypto_mod_put(alg);
 	return err;
 }
 
diff --git a/crypto/pcbc.c b/crypto/pcbc.c
index 862cdb8d8b6c..ae921fb74dc9 100644
--- a/crypto/pcbc.c
+++ b/crypto/pcbc.c
@@ -153,10 +153,9 @@ static int crypto_pcbc_decrypt(struct skcipher_request *req)
 static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct skcipher_instance *inst;
-	struct crypto_alg *alg;
 	int err;
 
-	inst = skcipher_alloc_instance_simple(tmpl, tb, &alg);
+	inst = skcipher_alloc_instance_simple(tmpl, tb);
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
@@ -166,7 +165,7 @@ static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 	err = skcipher_register_instance(tmpl, inst);
 	if (err)
 		inst->free(inst);
-	crypto_mod_put(alg);
+
 	return err;
 }
 
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 543792e0ebf0..1b632139a8c1 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -13,7 +13,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/notifier.h>
 #include <linux/kobject.h>
 #include <linux/cpu.h>
 #include <crypto/pcrypt.h>
@@ -24,6 +23,8 @@ static struct kset           *pcrypt_kset;
 
 struct pcrypt_instance_ctx {
 	struct crypto_aead_spawn spawn;
+	struct padata_shell *psenc;
+	struct padata_shell *psdec;
 	atomic_t tfm_count;
 };
 
@@ -32,6 +33,12 @@ struct pcrypt_aead_ctx {
 	unsigned int cb_cpu;
 };
 
+static inline struct pcrypt_instance_ctx *pcrypt_tfm_ictx(
+	struct crypto_aead *tfm)
+{
+	return aead_instance_ctx(aead_alg_instance(tfm));
+}
+
 static int pcrypt_aead_setkey(struct crypto_aead *parent,
 			      const u8 *key, unsigned int keylen)
 {
@@ -63,7 +70,6 @@ static void pcrypt_aead_done(struct crypto_async_request *areq, int err)
 	struct padata_priv *padata = pcrypt_request_padata(preq);
 
 	padata->info = err;
-	req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	padata_do_serial(padata);
 }
@@ -90,6 +96,9 @@ static int pcrypt_aead_encrypt(struct aead_request *req)
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead);
 	u32 flags = aead_request_flags(req);
+	struct pcrypt_instance_ctx *ictx;
+
+	ictx = pcrypt_tfm_ictx(aead);
 
 	memset(padata, 0, sizeof(struct padata_priv));
 
@@ -103,7 +112,7 @@ static int pcrypt_aead_encrypt(struct aead_request *req)
 			       req->cryptlen, req->iv);
 	aead_request_set_ad(creq, req->assoclen);
 
-	err = padata_do_parallel(pencrypt, padata, &ctx->cb_cpu);
+	err = padata_do_parallel(ictx->psenc, padata, &ctx->cb_cpu);
 	if (!err)
 		return -EINPROGRESS;
 
@@ -132,6 +141,9 @@ static int pcrypt_aead_decrypt(struct aead_request *req)
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead);
 	u32 flags = aead_request_flags(req);
+	struct pcrypt_instance_ctx *ictx;
+
+	ictx = pcrypt_tfm_ictx(aead);
 
 	memset(padata, 0, sizeof(struct padata_priv));
 
@@ -145,7 +157,7 @@ static int pcrypt_aead_decrypt(struct aead_request *req)
 			       req->cryptlen, req->iv);
 	aead_request_set_ad(creq, req->assoclen);
 
-	err = padata_do_parallel(pdecrypt, padata, &ctx->cb_cpu);
+	err = padata_do_parallel(ictx->psdec, padata, &ctx->cb_cpu);
 	if (!err)
 		return -EINPROGRESS;
 
@@ -192,6 +204,8 @@ static void pcrypt_free(struct aead_instance *inst)
 	struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst);
 
 	crypto_drop_aead(&ctx->spawn);
+	padata_free_shell(ctx->psdec);
+	padata_free_shell(ctx->psenc);
 	kfree(inst);
 }
 
@@ -233,12 +247,21 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
 	if (!inst)
 		return -ENOMEM;
 
+	err = -ENOMEM;
+
 	ctx = aead_instance_ctx(inst);
-	crypto_set_aead_spawn(&ctx->spawn, aead_crypto_instance(inst));
+	ctx->psenc = padata_alloc_shell(pencrypt);
+	if (!ctx->psenc)
+		goto out_free_inst;
+
+	ctx->psdec = padata_alloc_shell(pdecrypt);
+	if (!ctx->psdec)
+		goto out_free_psenc;
 
-	err = crypto_grab_aead(&ctx->spawn, name, 0, 0);
+	err = crypto_grab_aead(&ctx->spawn, aead_crypto_instance(inst),
+			       name, 0, 0);
 	if (err)
-		goto out_free_inst;
+		goto out_free_psdec;
 
 	alg = crypto_spawn_aead_alg(&ctx->spawn);
 	err = pcrypt_init_instance(aead_crypto_instance(inst), &alg->base);
@@ -271,6 +294,10 @@ out:
 
 out_drop_aead:
 	crypto_drop_aead(&ctx->spawn);
+out_free_psdec:
+	padata_free_shell(ctx->psdec);
+out_free_psenc:
+	padata_free_shell(ctx->psenc);
 out_free_inst:
 	kfree(inst);
 	goto out;
@@ -362,11 +389,12 @@ err:
 
 static void __exit pcrypt_exit(void)
 {
+	crypto_unregister_template(&pcrypt_tmpl);
+
 	pcrypt_fini_padata(pencrypt);
 	pcrypt_fini_padata(pdecrypt);
 
 	kset_unregister(pcrypt_kset);
-	crypto_unregister_template(&pcrypt_tmpl);
 }
 
 subsys_initcall(pcrypt_init);
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index adc40298c749..94af47eb6fa6 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -13,65 +13,33 @@
 
 #include <crypto/algapi.h>
 #include <crypto/internal/hash.h>
-#include <crypto/poly1305.h>
+#include <crypto/internal/poly1305.h>
 #include <linux/crypto.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <asm/unaligned.h>
 
-static inline u64 mlt(u64 a, u64 b)
-{
-	return a * b;
-}
-
-static inline u32 sr(u64 v, u_char n)
-{
-	return v >> n;
-}
-
-static inline u32 and(u32 v, u32 mask)
-{
-	return v & mask;
-}
-
-int crypto_poly1305_init(struct shash_desc *desc)
+static int crypto_poly1305_init(struct shash_desc *desc)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 
 	poly1305_core_init(&dctx->h);
 	dctx->buflen = 0;
-	dctx->rset = false;
+	dctx->rset = 0;
 	dctx->sset = false;
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(crypto_poly1305_init);
 
-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
-{
-	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
-	key->r[0] = (get_unaligned_le32(raw_key +  0) >> 0) & 0x3ffffff;
-	key->r[1] = (get_unaligned_le32(raw_key +  3) >> 2) & 0x3ffff03;
-	key->r[2] = (get_unaligned_le32(raw_key +  6) >> 4) & 0x3ffc0ff;
-	key->r[3] = (get_unaligned_le32(raw_key +  9) >> 6) & 0x3f03fff;
-	key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
-}
-EXPORT_SYMBOL_GPL(poly1305_core_setkey);
-
-/*
- * Poly1305 requires a unique key for each tag, which implies that we can't set
- * it on the tfm that gets accessed by multiple users simultaneously. Instead we
- * expect the key as the first 32 bytes in the update() call.
- */
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
-					const u8 *src, unsigned int srclen)
+static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
+					       const u8 *src, unsigned int srclen)
 {
 	if (!dctx->sset) {
 		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
-			poly1305_core_setkey(&dctx->r, src);
+			poly1305_core_setkey(&dctx->core_r, src);
 			src += POLY1305_BLOCK_SIZE;
 			srclen -= POLY1305_BLOCK_SIZE;
-			dctx->rset = true;
+			dctx->rset = 2;
 		}
 		if (srclen >= POLY1305_BLOCK_SIZE) {
 			dctx->s[0] = get_unaligned_le32(src +  0);
@@ -85,86 +53,9 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 	}
 	return srclen;
 }
-EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
-
-static void poly1305_blocks_internal(struct poly1305_state *state,
-				     const struct poly1305_key *key,
-				     const void *src, unsigned int nblocks,
-				     u32 hibit)
-{
-	u32 r0, r1, r2, r3, r4;
-	u32 s1, s2, s3, s4;
-	u32 h0, h1, h2, h3, h4;
-	u64 d0, d1, d2, d3, d4;
-
-	if (!nblocks)
-		return;
-
-	r0 = key->r[0];
-	r1 = key->r[1];
-	r2 = key->r[2];
-	r3 = key->r[3];
-	r4 = key->r[4];
-
-	s1 = r1 * 5;
-	s2 = r2 * 5;
-	s3 = r3 * 5;
-	s4 = r4 * 5;
-
-	h0 = state->h[0];
-	h1 = state->h[1];
-	h2 = state->h[2];
-	h3 = state->h[3];
-	h4 = state->h[4];
-
-	do {
-		/* h += m[i] */
-		h0 += (get_unaligned_le32(src +  0) >> 0) & 0x3ffffff;
-		h1 += (get_unaligned_le32(src +  3) >> 2) & 0x3ffffff;
-		h2 += (get_unaligned_le32(src +  6) >> 4) & 0x3ffffff;
-		h3 += (get_unaligned_le32(src +  9) >> 6) & 0x3ffffff;
-		h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
-
-		/* h *= r */
-		d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
-		     mlt(h3, s2) + mlt(h4, s1);
-		d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
-		     mlt(h3, s3) + mlt(h4, s2);
-		d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
-		     mlt(h3, s4) + mlt(h4, s3);
-		d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
-		     mlt(h3, r0) + mlt(h4, s4);
-		d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
-		     mlt(h3, r1) + mlt(h4, r0);
-
-		/* (partial) h %= p */
-		d1 += sr(d0, 26);     h0 = and(d0, 0x3ffffff);
-		d2 += sr(d1, 26);     h1 = and(d1, 0x3ffffff);
-		d3 += sr(d2, 26);     h2 = and(d2, 0x3ffffff);
-		d4 += sr(d3, 26);     h3 = and(d3, 0x3ffffff);
-		h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
-		h1 += h0 >> 26;       h0 = h0 & 0x3ffffff;
-
-		src += POLY1305_BLOCK_SIZE;
-	} while (--nblocks);
-
-	state->h[0] = h0;
-	state->h[1] = h1;
-	state->h[2] = h2;
-	state->h[3] = h3;
-	state->h[4] = h4;
-}
 
-void poly1305_core_blocks(struct poly1305_state *state,
-			  const struct poly1305_key *key,
-			  const void *src, unsigned int nblocks)
-{
-	poly1305_blocks_internal(state, key, src, nblocks, 1 << 24);
-}
-EXPORT_SYMBOL_GPL(poly1305_core_blocks);
-
-static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
-			    const u8 *src, unsigned int srclen, u32 hibit)
+static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+			    unsigned int srclen)
 {
 	unsigned int datalen;
 
@@ -174,12 +65,12 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
 		srclen = datalen;
 	}
 
-	poly1305_blocks_internal(&dctx->h, &dctx->r,
-				 src, srclen / POLY1305_BLOCK_SIZE, hibit);
+	poly1305_core_blocks(&dctx->h, &dctx->core_r, src,
+			     srclen / POLY1305_BLOCK_SIZE, 1);
 }
 
-int crypto_poly1305_update(struct shash_desc *desc,
-			   const u8 *src, unsigned int srclen)
+static int crypto_poly1305_update(struct shash_desc *desc,
+				  const u8 *src, unsigned int srclen)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 	unsigned int bytes;
@@ -193,13 +84,13 @@ int crypto_poly1305_update(struct shash_desc *desc,
 
 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
 			poly1305_blocks(dctx, dctx->buf,
-					POLY1305_BLOCK_SIZE, 1 << 24);
+					POLY1305_BLOCK_SIZE);
 			dctx->buflen = 0;
 		}
 	}
 
 	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-		poly1305_blocks(dctx, src, srclen, 1 << 24);
+		poly1305_blocks(dctx, src, srclen);
 		src += srclen - (srclen % POLY1305_BLOCK_SIZE);
 		srclen %= POLY1305_BLOCK_SIZE;
 	}
@@ -211,87 +102,17 @@ int crypto_poly1305_update(struct shash_desc *desc,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(crypto_poly1305_update);
 
-void poly1305_core_emit(const struct poly1305_state *state, void *dst)
-{
-	u32 h0, h1, h2, h3, h4;
-	u32 g0, g1, g2, g3, g4;
-	u32 mask;
-
-	/* fully carry h */
-	h0 = state->h[0];
-	h1 = state->h[1];
-	h2 = state->h[2];
-	h3 = state->h[3];
-	h4 = state->h[4];
-
-	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
-	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
-	h4 += (h3 >> 26);     h3 = h3 & 0x3ffffff;
-	h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
-	h1 += (h0 >> 26);     h0 = h0 & 0x3ffffff;
-
-	/* compute h + -p */
-	g0 = h0 + 5;
-	g1 = h1 + (g0 >> 26);             g0 &= 0x3ffffff;
-	g2 = h2 + (g1 >> 26);             g1 &= 0x3ffffff;
-	g3 = h3 + (g2 >> 26);             g2 &= 0x3ffffff;
-	g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
-
-	/* select h if h < p, or h + -p if h >= p */
-	mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
-	g0 &= mask;
-	g1 &= mask;
-	g2 &= mask;
-	g3 &= mask;
-	g4 &= mask;
-	mask = ~mask;
-	h0 = (h0 & mask) | g0;
-	h1 = (h1 & mask) | g1;
-	h2 = (h2 & mask) | g2;
-	h3 = (h3 & mask) | g3;
-	h4 = (h4 & mask) | g4;
-
-	/* h = h % (2^128) */
-	put_unaligned_le32((h0 >>  0) | (h1 << 26), dst +  0);
-	put_unaligned_le32((h1 >>  6) | (h2 << 20), dst +  4);
-	put_unaligned_le32((h2 >> 12) | (h3 << 14), dst +  8);
-	put_unaligned_le32((h3 >> 18) | (h4 <<  8), dst + 12);
-}
-EXPORT_SYMBOL_GPL(poly1305_core_emit);
-
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-	__le32 digest[4];
-	u64 f = 0;
 
 	if (unlikely(!dctx->sset))
 		return -ENOKEY;
 
-	if (unlikely(dctx->buflen)) {
-		dctx->buf[dctx->buflen++] = 1;
-		memset(dctx->buf + dctx->buflen, 0,
-		       POLY1305_BLOCK_SIZE - dctx->buflen);
-		poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
-	}
-
-	poly1305_core_emit(&dctx->h, digest);
-
-	/* mac = (h + s) % (2^128) */
-	f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0];
-	put_unaligned_le32(f, dst + 0);
-	f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1];
-	put_unaligned_le32(f, dst + 4);
-	f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2];
-	put_unaligned_le32(f, dst + 8);
-	f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3];
-	put_unaligned_le32(f, dst + 12);
-
+	poly1305_final_generic(dctx, dst);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(crypto_poly1305_final);
 
 static struct shash_alg poly1305_alg = {
 	.digestsize	= POLY1305_DIGEST_SIZE,
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 0aa489711ec4..176b63afec8d 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -598,6 +598,7 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	const struct rsa_asn1_template *digest_info;
 	struct crypto_attr_type *algt;
+	u32 mask;
 	struct akcipher_instance *inst;
 	struct pkcs1pad_inst_ctx *ctx;
 	struct crypto_akcipher_spawn *spawn;
@@ -613,6 +614,8 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AKCIPHER) & algt->mask)
 		return -EINVAL;
 
+	mask = crypto_requires_sync(algt->type, algt->mask);
+
 	rsa_alg_name = crypto_attr_alg_name(tb[1]);
 	if (IS_ERR(rsa_alg_name))
 		return PTR_ERR(rsa_alg_name);
@@ -636,9 +639,8 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 	spawn = &ctx->spawn;
 	ctx->digest_info = digest_info;
 
-	crypto_set_spawn(&spawn->base, akcipher_crypto_instance(inst));
-	err = crypto_grab_akcipher(spawn, rsa_alg_name, 0,
-			crypto_requires_sync(algt->type, algt->mask));
+	err = crypto_grab_akcipher(spawn, akcipher_crypto_instance(inst),
+				   rsa_alg_name, 0, mask);
 	if (err)
 		goto out_free_inst;
 
diff --git a/crypto/scompress.c b/crypto/scompress.c
index 4d50750d01c6..738f4f8f0f41 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -266,9 +266,9 @@ int crypto_register_scomp(struct scomp_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_register_scomp);
 
-int crypto_unregister_scomp(struct scomp_alg *alg)
+void crypto_unregister_scomp(struct scomp_alg *alg)
 {
-	return crypto_unregister_alg(&alg->base);
+	crypto_unregister_alg(&alg->base);
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_scomp);
 
diff --git a/crypto/seqiv.c b/crypto/seqiv.c
index 96d222c32acc..f124b9b54e15 100644
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -18,8 +18,6 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 
-static void seqiv_free(struct crypto_instance *inst);
-
 static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err)
 {
 	struct aead_request *subreq = aead_request_ctx(req);
@@ -159,15 +157,11 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.base.cra_ctxsize += inst->alg.ivsize;
 
 	err = aead_register_instance(tmpl, inst);
-	if (err)
-		goto free_inst;
-
-out:
-	return err;
-
+	if (err) {
 free_inst:
-	aead_geniv_free(inst);
-	goto out;
+		inst->free(inst);
+	}
+	return err;
 }
 
 static int seqiv_create(struct crypto_template *tmpl, struct rtattr **tb)
@@ -184,15 +178,9 @@ static int seqiv_create(struct crypto_template *tmpl, struct rtattr **tb)
 	return seqiv_aead_create(tmpl, tb);
 }
 
-static void seqiv_free(struct crypto_instance *inst)
-{
-	aead_geniv_free(aead_instance(inst));
-}
-
 static struct crypto_template seqiv_tmpl = {
 	.name = "seqiv",
 	.create = seqiv_create,
-	.free = seqiv_free,
 	.module = THIS_MODULE,
 };
 
diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c
index 56fa665a4f01..492c1d0bfe06 100644
--- a/crypto/serpent_generic.c
+++ b/crypto/serpent_generic.c
@@ -449,8 +449,9 @@ int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
 }
 EXPORT_SYMBOL_GPL(serpent_setkey);
 
-void __serpent_encrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src)
+void __serpent_encrypt(const void *c, u8 *dst, const u8 *src)
 {
+	const struct serpent_ctx *ctx = c;
 	const u32 *k = ctx->expkey;
 	const __le32 *s = (const __le32 *)src;
 	__le32	*d = (__le32 *)dst;
@@ -514,8 +515,9 @@ static void serpent_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	__serpent_encrypt(ctx, dst, src);
 }
 
-void __serpent_decrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src)
+void __serpent_decrypt(const void *c, u8 *dst, const u8 *src)
 {
+	const struct serpent_ctx *ctx = c;
 	const u32 *k = ctx->expkey;
 	const __le32 *s = (const __le32 *)src;
 	__le32	*d = (__le32 *)dst;
diff --git a/crypto/shash.c b/crypto/shash.c
index e83c5124f6eb..c075b26c2a1d 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -50,8 +50,7 @@ static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
 
 static void shash_set_needkey(struct crypto_shash *tfm, struct shash_alg *alg)
 {
-	if (crypto_shash_alg_has_setkey(alg) &&
-	    !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY))
+	if (crypto_shash_alg_needs_key(alg))
 		crypto_shash_set_flags(tfm, CRYPTO_TFM_NEED_KEY);
 }
 
@@ -386,18 +385,51 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static void crypto_shash_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_shash *hash = __crypto_shash_cast(tfm);
+	struct shash_alg *alg = crypto_shash_alg(hash);
+
+	alg->exit_tfm(hash);
+}
+
 static int crypto_shash_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_shash *hash = __crypto_shash_cast(tfm);
 	struct shash_alg *alg = crypto_shash_alg(hash);
+	int err;
 
 	hash->descsize = alg->descsize;
 
 	shash_set_needkey(hash, alg);
 
+	if (alg->exit_tfm)
+		tfm->exit = crypto_shash_exit_tfm;
+
+	if (!alg->init_tfm)
+		return 0;
+
+	err = alg->init_tfm(hash);
+	if (err)
+		return err;
+
+	/* ->init_tfm() may have increased the descsize. */
+	if (WARN_ON_ONCE(hash->descsize > HASH_MAX_DESCSIZE)) {
+		if (alg->exit_tfm)
+			alg->exit_tfm(hash);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
+static void crypto_shash_free_instance(struct crypto_instance *inst)
+{
+	struct shash_instance *shash = shash_instance(inst);
+
+	shash->free(shash);
+}
+
 #ifdef CONFIG_NET
 static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
@@ -434,6 +466,7 @@ static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
 static const struct crypto_type crypto_shash_type = {
 	.extsize = crypto_alg_extsize,
 	.init_tfm = crypto_shash_init_tfm,
+	.free = crypto_shash_free_instance,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_shash_show,
 #endif
@@ -444,6 +477,15 @@ static const struct crypto_type crypto_shash_type = {
 	.tfmsize = offsetof(struct crypto_shash, base),
 };
 
+int crypto_grab_shash(struct crypto_shash_spawn *spawn,
+		      struct crypto_instance *inst,
+		      const char *name, u32 type, u32 mask)
+{
+	spawn->base.frontend = &crypto_shash_type;
+	return crypto_grab_spawn(&spawn->base, inst, name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_grab_shash);
+
 struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
 					u32 mask)
 {
@@ -495,9 +537,9 @@ int crypto_register_shash(struct shash_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_register_shash);
 
-int crypto_unregister_shash(struct shash_alg *alg)
+void crypto_unregister_shash(struct shash_alg *alg)
 {
-	return crypto_unregister_alg(&alg->base);
+	crypto_unregister_alg(&alg->base);
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_shash);
 
@@ -521,19 +563,12 @@ err:
 }
 EXPORT_SYMBOL_GPL(crypto_register_shashes);
 
-int crypto_unregister_shashes(struct shash_alg *algs, int count)
+void crypto_unregister_shashes(struct shash_alg *algs, int count)
 {
-	int i, ret;
+	int i;
 
-	for (i = count - 1; i >= 0; --i) {
-		ret = crypto_unregister_shash(&algs[i]);
-		if (ret)
-			pr_err("Failed to unregister %s %s: %d\n",
-			       algs[i].base.cra_driver_name,
-			       algs[i].base.cra_name, ret);
-	}
-
-	return 0;
+	for (i = count - 1; i >= 0; --i)
+		crypto_unregister_shash(&algs[i]);
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_shashes);
 
@@ -542,6 +577,9 @@ int shash_register_instance(struct crypto_template *tmpl,
 {
 	int err;
 
+	if (WARN_ON(!inst->free))
+		return -EINVAL;
+
 	err = shash_prepare_alg(&inst->alg);
 	if (err)
 		return err;
@@ -550,31 +588,12 @@ int shash_register_instance(struct crypto_template *tmpl,
 }
 EXPORT_SYMBOL_GPL(shash_register_instance);
 
-void shash_free_instance(struct crypto_instance *inst)
-{
-	crypto_drop_spawn(crypto_instance_ctx(inst));
-	kfree(shash_instance(inst));
-}
-EXPORT_SYMBOL_GPL(shash_free_instance);
-
-int crypto_init_shash_spawn(struct crypto_shash_spawn *spawn,
-			    struct shash_alg *alg,
-			    struct crypto_instance *inst)
+void shash_free_singlespawn_instance(struct shash_instance *inst)
 {
-	return crypto_init_spawn2(&spawn->base, &alg->base, inst,
-				  &crypto_shash_type);
-}
-EXPORT_SYMBOL_GPL(crypto_init_shash_spawn);
-
-struct shash_alg *shash_attr_alg(struct rtattr *rta, u32 type, u32 mask)
-{
-	struct crypto_alg *alg;
-
-	alg = crypto_attr_alg2(rta, &crypto_shash_type, type, mask);
-	return IS_ERR(alg) ? ERR_CAST(alg) :
-	       container_of(alg, struct shash_alg, base);
+	crypto_drop_spawn(shash_instance_ctx(inst));
+	kfree(inst);
 }
-EXPORT_SYMBOL_GPL(shash_attr_alg);
+EXPORT_SYMBOL_GPL(shash_free_singlespawn_instance);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Synchronous cryptographic hash type");
diff --git a/crypto/simd.c b/crypto/simd.c
index 48876266cf2d..56885af49c24 100644
--- a/crypto/simd.c
+++ b/crypto/simd.c
@@ -52,15 +52,11 @@ static int simd_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 {
 	struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_skcipher *child = &ctx->cryptd_tfm->base;
-	int err;
 
 	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(tfm) &
 					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_skcipher_setkey(child, key, key_len);
-	crypto_skcipher_set_flags(tfm, crypto_skcipher_get_flags(child) &
-				       CRYPTO_TFM_RES_MASK);
-	return err;
+	return crypto_skcipher_setkey(child, key, key_len);
 }
 
 static int simd_skcipher_encrypt(struct skcipher_request *req)
@@ -295,15 +291,11 @@ static int simd_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 {
 	struct simd_aead_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_aead *child = &ctx->cryptd_tfm->base;
-	int err;
 
 	crypto_aead_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_aead_set_flags(child, crypto_aead_get_flags(tfm) &
 				     CRYPTO_TFM_REQ_MASK);
-	err = crypto_aead_setkey(child, key, key_len);
-	crypto_aead_set_flags(tfm, crypto_aead_get_flags(child) &
-				   CRYPTO_TFM_RES_MASK);
-	return err;
+	return crypto_aead_setkey(child, key, key_len);
 }
 
 static int simd_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 22753c1c7202..7221def7b9a7 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -549,15 +549,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
 	return err;
 }
 
-int skcipher_walk_aead(struct skcipher_walk *walk, struct aead_request *req,
-		       bool atomic)
-{
-	walk->total = req->cryptlen;
-
-	return skcipher_walk_aead_common(walk, req, atomic);
-}
-EXPORT_SYMBOL_GPL(skcipher_walk_aead);
-
 int skcipher_walk_aead_encrypt(struct skcipher_walk *walk,
 			       struct aead_request *req, bool atomic)
 {
@@ -578,222 +569,12 @@ int skcipher_walk_aead_decrypt(struct skcipher_walk *walk,
 }
 EXPORT_SYMBOL_GPL(skcipher_walk_aead_decrypt);
 
-static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg)
-{
-	if (alg->cra_type == &crypto_blkcipher_type)
-		return sizeof(struct crypto_blkcipher *);
-
-	if (alg->cra_type == &crypto_ablkcipher_type)
-		return sizeof(struct crypto_ablkcipher *);
-
-	return crypto_alg_extsize(alg);
-}
-
 static void skcipher_set_needkey(struct crypto_skcipher *tfm)
 {
-	if (tfm->keysize)
+	if (crypto_skcipher_max_keysize(tfm) != 0)
 		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_NEED_KEY);
 }
 
-static int skcipher_setkey_blkcipher(struct crypto_skcipher *tfm,
-				     const u8 *key, unsigned int keylen)
-{
-	struct crypto_blkcipher **ctx = crypto_skcipher_ctx(tfm);
-	struct crypto_blkcipher *blkcipher = *ctx;
-	int err;
-
-	crypto_blkcipher_clear_flags(blkcipher, ~0);
-	crypto_blkcipher_set_flags(blkcipher, crypto_skcipher_get_flags(tfm) &
-					      CRYPTO_TFM_REQ_MASK);
-	err = crypto_blkcipher_setkey(blkcipher, key, keylen);
-	crypto_skcipher_set_flags(tfm, crypto_blkcipher_get_flags(blkcipher) &
-				       CRYPTO_TFM_RES_MASK);
-	if (unlikely(err)) {
-		skcipher_set_needkey(tfm);
-		return err;
-	}
-
-	crypto_skcipher_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
-	return 0;
-}
-
-static int skcipher_crypt_blkcipher(struct skcipher_request *req,
-				    int (*crypt)(struct blkcipher_desc *,
-						 struct scatterlist *,
-						 struct scatterlist *,
-						 unsigned int))
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct crypto_blkcipher **ctx = crypto_skcipher_ctx(tfm);
-	struct blkcipher_desc desc = {
-		.tfm = *ctx,
-		.info = req->iv,
-		.flags = req->base.flags,
-	};
-
-
-	return crypt(&desc, req->dst, req->src, req->cryptlen);
-}
-
-static int skcipher_encrypt_blkcipher(struct skcipher_request *req)
-{
-	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct crypto_tfm *tfm = crypto_skcipher_tfm(skcipher);
-	struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher;
-
-	return skcipher_crypt_blkcipher(req, alg->encrypt);
-}
-
-static int skcipher_decrypt_blkcipher(struct skcipher_request *req)
-{
-	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct crypto_tfm *tfm = crypto_skcipher_tfm(skcipher);
-	struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher;
-
-	return skcipher_crypt_blkcipher(req, alg->decrypt);
-}
-
-static void crypto_exit_skcipher_ops_blkcipher(struct crypto_tfm *tfm)
-{
-	struct crypto_blkcipher **ctx = crypto_tfm_ctx(tfm);
-
-	crypto_free_blkcipher(*ctx);
-}
-
-static int crypto_init_skcipher_ops_blkcipher(struct crypto_tfm *tfm)
-{
-	struct crypto_alg *calg = tfm->__crt_alg;
-	struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
-	struct crypto_blkcipher **ctx = crypto_tfm_ctx(tfm);
-	struct crypto_blkcipher *blkcipher;
-	struct crypto_tfm *btfm;
-
-	if (!crypto_mod_get(calg))
-		return -EAGAIN;
-
-	btfm = __crypto_alloc_tfm(calg, CRYPTO_ALG_TYPE_BLKCIPHER,
-					CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(btfm)) {
-		crypto_mod_put(calg);
-		return PTR_ERR(btfm);
-	}
-
-	blkcipher = __crypto_blkcipher_cast(btfm);
-	*ctx = blkcipher;
-	tfm->exit = crypto_exit_skcipher_ops_blkcipher;
-
-	skcipher->setkey = skcipher_setkey_blkcipher;
-	skcipher->encrypt = skcipher_encrypt_blkcipher;
-	skcipher->decrypt = skcipher_decrypt_blkcipher;
-
-	skcipher->ivsize = crypto_blkcipher_ivsize(blkcipher);
-	skcipher->keysize = calg->cra_blkcipher.max_keysize;
-
-	skcipher_set_needkey(skcipher);
-
-	return 0;
-}
-
-static int skcipher_setkey_ablkcipher(struct crypto_skcipher *tfm,
-				      const u8 *key, unsigned int keylen)
-{
-	struct crypto_ablkcipher **ctx = crypto_skcipher_ctx(tfm);
-	struct crypto_ablkcipher *ablkcipher = *ctx;
-	int err;
-
-	crypto_ablkcipher_clear_flags(ablkcipher, ~0);
-	crypto_ablkcipher_set_flags(ablkcipher,
-				    crypto_skcipher_get_flags(tfm) &
-				    CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(ablkcipher, key, keylen);
-	crypto_skcipher_set_flags(tfm,
-				  crypto_ablkcipher_get_flags(ablkcipher) &
-				  CRYPTO_TFM_RES_MASK);
-	if (unlikely(err)) {
-		skcipher_set_needkey(tfm);
-		return err;
-	}
-
-	crypto_skcipher_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
-	return 0;
-}
-
-static int skcipher_crypt_ablkcipher(struct skcipher_request *req,
-				     int (*crypt)(struct ablkcipher_request *))
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct crypto_ablkcipher **ctx = crypto_skcipher_ctx(tfm);
-	struct ablkcipher_request *subreq = skcipher_request_ctx(req);
-
-	ablkcipher_request_set_tfm(subreq, *ctx);
-	ablkcipher_request_set_callback(subreq, skcipher_request_flags(req),
-					req->base.complete, req->base.data);
-	ablkcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
-				     req->iv);
-
-	return crypt(subreq);
-}
-
-static int skcipher_encrypt_ablkcipher(struct skcipher_request *req)
-{
-	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct crypto_tfm *tfm = crypto_skcipher_tfm(skcipher);
-	struct ablkcipher_alg *alg = &tfm->__crt_alg->cra_ablkcipher;
-
-	return skcipher_crypt_ablkcipher(req, alg->encrypt);
-}
-
-static int skcipher_decrypt_ablkcipher(struct skcipher_request *req)
-{
-	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct crypto_tfm *tfm = crypto_skcipher_tfm(skcipher);
-	struct ablkcipher_alg *alg = &tfm->__crt_alg->cra_ablkcipher;
-
-	return skcipher_crypt_ablkcipher(req, alg->decrypt);
-}
-
-static void crypto_exit_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
-{
-	struct crypto_ablkcipher **ctx = crypto_tfm_ctx(tfm);
-
-	crypto_free_ablkcipher(*ctx);
-}
-
-static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
-{
-	struct crypto_alg *calg = tfm->__crt_alg;
-	struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
-	struct crypto_ablkcipher **ctx = crypto_tfm_ctx(tfm);
-	struct crypto_ablkcipher *ablkcipher;
-	struct crypto_tfm *abtfm;
-
-	if (!crypto_mod_get(calg))
-		return -EAGAIN;
-
-	abtfm = __crypto_alloc_tfm(calg, 0, 0);
-	if (IS_ERR(abtfm)) {
-		crypto_mod_put(calg);
-		return PTR_ERR(abtfm);
-	}
-
-	ablkcipher = __crypto_ablkcipher_cast(abtfm);
-	*ctx = ablkcipher;
-	tfm->exit = crypto_exit_skcipher_ops_ablkcipher;
-
-	skcipher->setkey = skcipher_setkey_ablkcipher;
-	skcipher->encrypt = skcipher_encrypt_ablkcipher;
-	skcipher->decrypt = skcipher_decrypt_ablkcipher;
-
-	skcipher->ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-	skcipher->reqsize = crypto_ablkcipher_reqsize(ablkcipher) +
-			    sizeof(struct ablkcipher_request);
-	skcipher->keysize = calg->cra_ablkcipher.max_keysize;
-
-	skcipher_set_needkey(skcipher);
-
-	return 0;
-}
-
 static int skcipher_setkey_unaligned(struct crypto_skcipher *tfm,
 				     const u8 *key, unsigned int keylen)
 {
@@ -815,17 +596,15 @@ static int skcipher_setkey_unaligned(struct crypto_skcipher *tfm,
 	return ret;
 }
 
-static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+int crypto_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			   unsigned int keylen)
 {
 	struct skcipher_alg *cipher = crypto_skcipher_alg(tfm);
 	unsigned long alignmask = crypto_skcipher_alignmask(tfm);
 	int err;
 
-	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) {
-		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize)
 		return -EINVAL;
-	}
 
 	if ((unsigned long)key & alignmask)
 		err = skcipher_setkey_unaligned(tfm, key, keylen);
@@ -840,6 +619,7 @@ static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	crypto_skcipher_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(crypto_skcipher_setkey);
 
 int crypto_skcipher_encrypt(struct skcipher_request *req)
 {
@@ -852,7 +632,7 @@ int crypto_skcipher_encrypt(struct skcipher_request *req)
 	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
 		ret = -ENOKEY;
 	else
-		ret = tfm->encrypt(req);
+		ret = crypto_skcipher_alg(tfm)->encrypt(req);
 	crypto_stats_skcipher_encrypt(cryptlen, ret, alg);
 	return ret;
 }
@@ -869,7 +649,7 @@ int crypto_skcipher_decrypt(struct skcipher_request *req)
 	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
 		ret = -ENOKEY;
 	else
-		ret = tfm->decrypt(req);
+		ret = crypto_skcipher_alg(tfm)->decrypt(req);
 	crypto_stats_skcipher_decrypt(cryptlen, ret, alg);
 	return ret;
 }
@@ -888,18 +668,6 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm)
 	struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
 	struct skcipher_alg *alg = crypto_skcipher_alg(skcipher);
 
-	if (tfm->__crt_alg->cra_type == &crypto_blkcipher_type)
-		return crypto_init_skcipher_ops_blkcipher(tfm);
-
-	if (tfm->__crt_alg->cra_type == &crypto_ablkcipher_type)
-		return crypto_init_skcipher_ops_ablkcipher(tfm);
-
-	skcipher->setkey = skcipher_setkey;
-	skcipher->encrypt = alg->encrypt;
-	skcipher->decrypt = alg->decrypt;
-	skcipher->ivsize = alg->ivsize;
-	skcipher->keysize = alg->max_keysize;
-
 	skcipher_set_needkey(skcipher);
 
 	if (alg->exit)
@@ -964,8 +732,8 @@ static int crypto_skcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
 }
 #endif
 
-static const struct crypto_type crypto_skcipher_type2 = {
-	.extsize = crypto_skcipher_extsize,
+static const struct crypto_type crypto_skcipher_type = {
+	.extsize = crypto_alg_extsize,
 	.init_tfm = crypto_skcipher_init_tfm,
 	.free = crypto_skcipher_free_instance,
 #ifdef CONFIG_PROC_FS
@@ -973,23 +741,24 @@ static const struct crypto_type crypto_skcipher_type2 = {
 #endif
 	.report = crypto_skcipher_report,
 	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
-	.maskset = CRYPTO_ALG_TYPE_BLKCIPHER_MASK,
+	.maskset = CRYPTO_ALG_TYPE_MASK,
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.tfmsize = offsetof(struct crypto_skcipher, base),
 };
 
 int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn,
-			  const char *name, u32 type, u32 mask)
+			 struct crypto_instance *inst,
+			 const char *name, u32 type, u32 mask)
 {
-	spawn->base.frontend = &crypto_skcipher_type2;
-	return crypto_grab_spawn(&spawn->base, name, type, mask);
+	spawn->base.frontend = &crypto_skcipher_type;
+	return crypto_grab_spawn(&spawn->base, inst, name, type, mask);
 }
 EXPORT_SYMBOL_GPL(crypto_grab_skcipher);
 
 struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
 					      u32 type, u32 mask)
 {
-	return crypto_alloc_tfm(alg_name, &crypto_skcipher_type2, type, mask);
+	return crypto_alloc_tfm(alg_name, &crypto_skcipher_type, type, mask);
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_skcipher);
 
@@ -1001,7 +770,7 @@ struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(
 	/* Only sync algorithms allowed. */
 	mask |= CRYPTO_ALG_ASYNC;
 
-	tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type2, type, mask);
+	tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type, type, mask);
 
 	/*
 	 * Make sure we do not allocate something that might get used with
@@ -1017,12 +786,11 @@ struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_sync_skcipher);
 
-int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask)
+int crypto_has_skcipher(const char *alg_name, u32 type, u32 mask)
 {
-	return crypto_type_has_alg(alg_name, &crypto_skcipher_type2,
-				   type, mask);
+	return crypto_type_has_alg(alg_name, &crypto_skcipher_type, type, mask);
 }
-EXPORT_SYMBOL_GPL(crypto_has_skcipher2);
+EXPORT_SYMBOL_GPL(crypto_has_skcipher);
 
 static int skcipher_prepare_alg(struct skcipher_alg *alg)
 {
@@ -1037,7 +805,7 @@ static int skcipher_prepare_alg(struct skcipher_alg *alg)
 	if (!alg->walksize)
 		alg->walksize = alg->chunksize;
 
-	base->cra_type = &crypto_skcipher_type2;
+	base->cra_type = &crypto_skcipher_type;
 	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
 	base->cra_flags |= CRYPTO_ALG_TYPE_SKCIPHER;
 
@@ -1097,6 +865,9 @@ int skcipher_register_instance(struct crypto_template *tmpl,
 {
 	int err;
 
+	if (WARN_ON(!inst->free))
+		return -EINVAL;
+
 	err = skcipher_prepare_alg(&inst->alg);
 	if (err)
 		return err;
@@ -1109,21 +880,17 @@ static int skcipher_setkey_simple(struct crypto_skcipher *tfm, const u8 *key,
 				  unsigned int keylen)
 {
 	struct crypto_cipher *cipher = skcipher_cipher_simple(tfm);
-	int err;
 
 	crypto_cipher_clear_flags(cipher, CRYPTO_TFM_REQ_MASK);
 	crypto_cipher_set_flags(cipher, crypto_skcipher_get_flags(tfm) &
 				CRYPTO_TFM_REQ_MASK);
-	err = crypto_cipher_setkey(cipher, key, keylen);
-	crypto_skcipher_set_flags(tfm, crypto_cipher_get_flags(cipher) &
-				  CRYPTO_TFM_RES_MASK);
-	return err;
+	return crypto_cipher_setkey(cipher, key, keylen);
 }
 
 static int skcipher_init_tfm_simple(struct crypto_skcipher *tfm)
 {
 	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
-	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
+	struct crypto_cipher_spawn *spawn = skcipher_instance_ctx(inst);
 	struct skcipher_ctx_simple *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_cipher *cipher;
 
@@ -1144,7 +911,7 @@ static void skcipher_exit_tfm_simple(struct crypto_skcipher *tfm)
 
 static void skcipher_free_instance_simple(struct skcipher_instance *inst)
 {
-	crypto_drop_spawn(skcipher_instance_ctx(inst));
+	crypto_drop_cipher(skcipher_instance_ctx(inst));
 	kfree(inst);
 }
 
@@ -1160,21 +927,18 @@ static void skcipher_free_instance_simple(struct skcipher_instance *inst)
  *
  * @tmpl: the template being instantiated
  * @tb: the template parameters
- * @cipher_alg_ret: on success, a pointer to the underlying cipher algorithm is
- *		    returned here.  It must be dropped with crypto_mod_put().
  *
  * Return: a pointer to the new instance, or an ERR_PTR().  The caller still
  *	   needs to register the instance.
  */
-struct skcipher_instance *
-skcipher_alloc_instance_simple(struct crypto_template *tmpl, struct rtattr **tb,
-			       struct crypto_alg **cipher_alg_ret)
+struct skcipher_instance *skcipher_alloc_instance_simple(
+	struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
-	struct crypto_alg *cipher_alg;
-	struct skcipher_instance *inst;
-	struct crypto_spawn *spawn;
 	u32 mask;
+	struct skcipher_instance *inst;
+	struct crypto_cipher_spawn *spawn;
+	struct crypto_alg *cipher_alg;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -1184,31 +948,25 @@ skcipher_alloc_instance_simple(struct crypto_template *tmpl, struct rtattr **tb,
 	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
 		return ERR_PTR(-EINVAL);
 
-	mask = CRYPTO_ALG_TYPE_MASK |
-		crypto_requires_off(algt->type, algt->mask,
-				    CRYPTO_ALG_NEED_FALLBACK);
-
-	cipher_alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, mask);
-	if (IS_ERR(cipher_alg))
-		return ERR_CAST(cipher_alg);
+	mask = crypto_requires_off(algt->type, algt->mask,
+				   CRYPTO_ALG_NEED_FALLBACK);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
-	if (!inst) {
-		err = -ENOMEM;
-		goto err_put_cipher_alg;
-	}
+	if (!inst)
+		return ERR_PTR(-ENOMEM);
 	spawn = skcipher_instance_ctx(inst);
 
-	err = crypto_inst_setname(skcipher_crypto_instance(inst), tmpl->name,
-				  cipher_alg);
+	err = crypto_grab_cipher(spawn, skcipher_crypto_instance(inst),
+				 crypto_attr_alg_name(tb[1]), 0, mask);
 	if (err)
 		goto err_free_inst;
+	cipher_alg = crypto_spawn_cipher_alg(spawn);
 
-	err = crypto_init_spawn(spawn, cipher_alg,
-				skcipher_crypto_instance(inst),
-				CRYPTO_ALG_TYPE_MASK);
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), tmpl->name,
+				  cipher_alg);
 	if (err)
 		goto err_free_inst;
+
 	inst->free = skcipher_free_instance_simple;
 
 	/* Default algorithm properties, can be overridden */
@@ -1225,13 +983,10 @@ skcipher_alloc_instance_simple(struct crypto_template *tmpl, struct rtattr **tb,
 	inst->alg.init = skcipher_init_tfm_simple;
 	inst->alg.exit = skcipher_exit_tfm_simple;
 
-	*cipher_alg_ret = cipher_alg;
 	return inst;
 
 err_free_inst:
-	kfree(inst);
-err_put_cipher_alg:
-	crypto_mod_put(cipher_alg);
+	skcipher_free_instance_simple(inst);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(skcipher_alloc_instance_simple);
diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c
index 71ffb343709a..016dbc595705 100644
--- a/crypto/sm4_generic.c
+++ b/crypto/sm4_generic.c
@@ -143,29 +143,23 @@ int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
 EXPORT_SYMBOL_GPL(crypto_sm4_expand_key);
 
 /**
- * crypto_sm4_set_key - Set the AES key.
+ * crypto_sm4_set_key - Set the SM4 key.
  * @tfm:	The %crypto_tfm that is used in the context.
  * @in_key:	The input key.
  * @key_len:	The size of the key.
  *
- * Returns 0 on success, on failure the %CRYPTO_TFM_RES_BAD_KEY_LEN flag in tfm
- * is set. The function uses crypto_sm4_expand_key() to expand the key.
+ * This function uses crypto_sm4_expand_key() to expand the key.
  * &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is
  * retrieved with crypto_tfm_ctx().
+ *
+ * Return: 0 on success; -EINVAL on failure (only happens for bad key lengths)
  */
 int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		       unsigned int key_len)
 {
 	struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int ret;
-
-	ret = crypto_sm4_expand_key(ctx, in_key, key_len);
-	if (!ret)
-		return 0;
 
-	*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-	return -EINVAL;
+	return crypto_sm4_expand_key(ctx, in_key, key_len);
 }
 EXPORT_SYMBOL_GPL(crypto_sm4_set_key);
 
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 83ad0b1fab30..f42f486e90e8 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -65,7 +65,7 @@ static int mode;
 static u32 num_mb = 8;
 static char *tvmem[TVMEMSIZE];
 
-static char *check[] = {
+static const char *check[] = {
 	"des", "md5", "des3_ede", "rot13", "sha1", "sha224", "sha256", "sm3",
 	"blowfish", "twofish", "serpent", "sha384", "sha512", "md4", "aes",
 	"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
@@ -1634,7 +1634,7 @@ static void test_cipher_speed(const char *algo, int enc, unsigned int secs,
 
 static void test_available(void)
 {
-	char **name = check;
+	const char **name = check;
 
 	while (*name) {
 		printk("alg %s ", *name);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index c39e39e55dc2..88f33c0efb23 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -82,6 +82,19 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 struct aead_test_suite {
 	const struct aead_testvec *vecs;
 	unsigned int count;
+
+	/*
+	 * Set if trying to decrypt an inauthentic ciphertext with this
+	 * algorithm might result in EINVAL rather than EBADMSG, due to other
+	 * validation the algorithm does on the inputs such as length checks.
+	 */
+	unsigned int einval_allowed : 1;
+
+	/*
+	 * Set if the algorithm intentionally ignores the last 8 bytes of the
+	 * AAD buffer during decryption.
+	 */
+	unsigned int esp_aad : 1;
 };
 
 struct cipher_test_suite {
@@ -259,6 +272,9 @@ struct test_sg_division {
  *	       where 0 is aligned to a 2*(MAX_ALGAPI_ALIGNMASK+1) byte boundary
  * @iv_offset_relative_to_alignmask: if true, add the algorithm's alignmask to
  *				     the @iv_offset
+ * @key_offset: misalignment of the key, where 0 is default alignment
+ * @key_offset_relative_to_alignmask: if true, add the algorithm's alignmask to
+ *				      the @key_offset
  * @finalization_type: what finalization function to use for hashes
  * @nosimd: execute with SIMD disabled?  Requires !CRYPTO_TFM_REQ_MAY_SLEEP.
  */
@@ -269,7 +285,9 @@ struct testvec_config {
 	struct test_sg_division src_divs[XBUFSIZE];
 	struct test_sg_division dst_divs[XBUFSIZE];
 	unsigned int iv_offset;
+	unsigned int key_offset;
 	bool iv_offset_relative_to_alignmask;
+	bool key_offset_relative_to_alignmask;
 	enum finalization_type finalization_type;
 	bool nosimd;
 };
@@ -297,6 +315,7 @@ static const struct testvec_config default_cipher_testvec_configs[] = {
 		.name = "unaligned buffer, offset=1",
 		.src_divs = { { .proportion_of_total = 10000, .offset = 1 } },
 		.iv_offset = 1,
+		.key_offset = 1,
 	}, {
 		.name = "buffer aligned only to alignmask",
 		.src_divs = {
@@ -308,6 +327,8 @@ static const struct testvec_config default_cipher_testvec_configs[] = {
 		},
 		.iv_offset = 1,
 		.iv_offset_relative_to_alignmask = true,
+		.key_offset = 1,
+		.key_offset_relative_to_alignmask = true,
 	}, {
 		.name = "two even aligned splits",
 		.src_divs = {
@@ -323,6 +344,7 @@ static const struct testvec_config default_cipher_testvec_configs[] = {
 			{ .proportion_of_total = 4800, .offset = 18 },
 		},
 		.iv_offset = 3,
+		.key_offset = 3,
 	}, {
 		.name = "misaligned splits crossing pages, inplace",
 		.inplace = true,
@@ -355,6 +377,7 @@ static const struct testvec_config default_hash_testvec_configs[] = {
 		.name = "init+update+final misaligned buffer",
 		.src_divs = { { .proportion_of_total = 10000, .offset = 1 } },
 		.finalization_type = FINALIZATION_TYPE_FINAL,
+		.key_offset = 1,
 	}, {
 		.name = "digest buffer aligned only to alignmask",
 		.src_divs = {
@@ -365,6 +388,8 @@ static const struct testvec_config default_hash_testvec_configs[] = {
 			},
 		},
 		.finalization_type = FINALIZATION_TYPE_DIGEST,
+		.key_offset = 1,
+		.key_offset_relative_to_alignmask = true,
 	}, {
 		.name = "init+update+update+final two even splits",
 		.src_divs = {
@@ -740,6 +765,49 @@ static int build_cipher_test_sglists(struct cipher_test_sglists *tsgls,
 				 alignmask, dst_total_len, NULL, NULL);
 }
 
+/*
+ * Support for testing passing a misaligned key to setkey():
+ *
+ * If cfg->key_offset is set, copy the key into a new buffer at that offset,
+ * optionally adding alignmask.  Else, just use the key directly.
+ */
+static int prepare_keybuf(const u8 *key, unsigned int ksize,
+			  const struct testvec_config *cfg,
+			  unsigned int alignmask,
+			  const u8 **keybuf_ret, const u8 **keyptr_ret)
+{
+	unsigned int key_offset = cfg->key_offset;
+	u8 *keybuf = NULL, *keyptr = (u8 *)key;
+
+	if (key_offset != 0) {
+		if (cfg->key_offset_relative_to_alignmask)
+			key_offset += alignmask;
+		keybuf = kmalloc(key_offset + ksize, GFP_KERNEL);
+		if (!keybuf)
+			return -ENOMEM;
+		keyptr = keybuf + key_offset;
+		memcpy(keyptr, key, ksize);
+	}
+	*keybuf_ret = keybuf;
+	*keyptr_ret = keyptr;
+	return 0;
+}
+
+/* Like setkey_f(tfm, key, ksize), but sometimes misalign the key */
+#define do_setkey(setkey_f, tfm, key, ksize, cfg, alignmask)		\
+({									\
+	const u8 *keybuf, *keyptr;					\
+	int err;							\
+									\
+	err = prepare_keybuf((key), (ksize), (cfg), (alignmask),	\
+			     &keybuf, &keyptr);				\
+	if (err == 0) {							\
+		err = setkey_f((tfm), keyptr, (ksize));			\
+		kfree(keybuf);						\
+	}								\
+	err;								\
+})
+
 #ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
 
 /* Generate a random length in range [0, max_len], but prefer smaller values */
@@ -759,27 +827,39 @@ static unsigned int generate_random_length(unsigned int max_len)
 	}
 }
 
-/* Sometimes make some random changes to the given data buffer */
-static void mutate_buffer(u8 *buf, size_t count)
+/* Flip a random bit in the given nonempty data buffer */
+static void flip_random_bit(u8 *buf, size_t size)
+{
+	size_t bitpos;
+
+	bitpos = prandom_u32() % (size * 8);
+	buf[bitpos / 8] ^= 1 << (bitpos % 8);
+}
+
+/* Flip a random byte in the given nonempty data buffer */
+static void flip_random_byte(u8 *buf, size_t size)
+{
+	buf[prandom_u32() % size] ^= 0xff;
+}
+
+/* Sometimes make some random changes to the given nonempty data buffer */
+static void mutate_buffer(u8 *buf, size_t size)
 {
 	size_t num_flips;
 	size_t i;
-	size_t pos;
 
 	/* Sometimes flip some bits */
 	if (prandom_u32() % 4 == 0) {
-		num_flips = min_t(size_t, 1 << (prandom_u32() % 8), count * 8);
-		for (i = 0; i < num_flips; i++) {
-			pos = prandom_u32() % (count * 8);
-			buf[pos / 8] ^= 1 << (pos % 8);
-		}
+		num_flips = min_t(size_t, 1 << (prandom_u32() % 8), size * 8);
+		for (i = 0; i < num_flips; i++)
+			flip_random_bit(buf, size);
 	}
 
 	/* Sometimes flip some bytes */
 	if (prandom_u32() % 4 == 0) {
-		num_flips = min_t(size_t, 1 << (prandom_u32() % 8), count);
+		num_flips = min_t(size_t, 1 << (prandom_u32() % 8), size);
 		for (i = 0; i < num_flips; i++)
-			buf[prandom_u32() % count] ^= 0xff;
+			flip_random_byte(buf, size);
 	}
 }
 
@@ -966,6 +1046,11 @@ static void generate_random_testvec_config(struct testvec_config *cfg,
 		p += scnprintf(p, end - p, " iv_offset=%u", cfg->iv_offset);
 	}
 
+	if (prandom_u32() % 2 == 0) {
+		cfg->key_offset = 1 + (prandom_u32() % MAX_ALGAPI_ALIGNMASK);
+		p += scnprintf(p, end - p, " key_offset=%u", cfg->key_offset);
+	}
+
 	WARN_ON_ONCE(!valid_testvec_config(cfg));
 }
 
@@ -1103,7 +1188,8 @@ static int test_shash_vec_cfg(const char *driver,
 
 	/* Set the key, if specified */
 	if (vec->ksize) {
-		err = crypto_shash_setkey(tfm, vec->key, vec->ksize);
+		err = do_setkey(crypto_shash_setkey, tfm, vec->key, vec->ksize,
+				cfg, alignmask);
 		if (err) {
 			if (err == vec->setkey_error)
 				return 0;
@@ -1290,7 +1376,8 @@ static int test_ahash_vec_cfg(const char *driver,
 
 	/* Set the key, if specified */
 	if (vec->ksize) {
-		err = crypto_ahash_setkey(tfm, vec->key, vec->ksize);
+		err = do_setkey(crypto_ahash_setkey, tfm, vec->key, vec->ksize,
+				cfg, alignmask);
 		if (err) {
 			if (err == vec->setkey_error)
 				return 0;
@@ -1853,7 +1940,6 @@ static int test_aead_vec_cfg(const char *driver, int enc,
 		 cfg->iv_offset +
 		 (cfg->iv_offset_relative_to_alignmask ? alignmask : 0);
 	struct kvec input[2];
-	int expected_error;
 	int err;
 
 	/* Set the key */
@@ -1861,7 +1947,9 @@ static int test_aead_vec_cfg(const char *driver, int enc,
 		crypto_aead_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
 	else
 		crypto_aead_clear_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
-	err = crypto_aead_setkey(tfm, vec->key, vec->klen);
+
+	err = do_setkey(crypto_aead_setkey, tfm, vec->key, vec->klen,
+			cfg, alignmask);
 	if (err && err != vec->setkey_error) {
 		pr_err("alg: aead: %s setkey failed on test vector %s; expected_error=%d, actual_error=%d, flags=%#x\n",
 		       driver, vec_name, vec->setkey_error, err,
@@ -1972,20 +2060,31 @@ static int test_aead_vec_cfg(const char *driver, int enc,
 		return -EINVAL;
 	}
 
-	/* Check for success or failure */
-	expected_error = vec->novrfy ? -EBADMSG : vec->crypt_error;
-	if (err) {
-		if (err == expected_error)
-			return 0;
-		pr_err("alg: aead: %s %s failed on test vector %s; expected_error=%d, actual_error=%d, cfg=\"%s\"\n",
-		       driver, op, vec_name, expected_error, err, cfg->name);
-		return err;
-	}
-	if (expected_error) {
-		pr_err("alg: aead: %s %s unexpectedly succeeded on test vector %s; expected_error=%d, cfg=\"%s\"\n",
+	/* Check for unexpected success or failure, or wrong error code */
+	if ((err == 0 && vec->novrfy) ||
+	    (err != vec->crypt_error && !(err == -EBADMSG && vec->novrfy))) {
+		char expected_error[32];
+
+		if (vec->novrfy &&
+		    vec->crypt_error != 0 && vec->crypt_error != -EBADMSG)
+			sprintf(expected_error, "-EBADMSG or %d",
+				vec->crypt_error);
+		else if (vec->novrfy)
+			sprintf(expected_error, "-EBADMSG");
+		else
+			sprintf(expected_error, "%d", vec->crypt_error);
+		if (err) {
+			pr_err("alg: aead: %s %s failed on test vector %s; expected_error=%s, actual_error=%d, cfg=\"%s\"\n",
+			       driver, op, vec_name, expected_error, err,
+			       cfg->name);
+			return err;
+		}
+		pr_err("alg: aead: %s %s unexpectedly succeeded on test vector %s; expected_error=%s, cfg=\"%s\"\n",
 		       driver, op, vec_name, expected_error, cfg->name);
 		return -EINVAL;
 	}
+	if (err) /* Expectedly failed. */
+		return 0;
 
 	/* Check for the correct output (ciphertext or plaintext) */
 	err = verify_correct_output(&tsgls->dst, enc ? vec->ctext : vec->ptext,
@@ -2047,25 +2146,129 @@ static int test_aead_vec(const char *driver, int enc,
 }
 
 #ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
+
+struct aead_extra_tests_ctx {
+	struct aead_request *req;
+	struct crypto_aead *tfm;
+	const char *driver;
+	const struct alg_test_desc *test_desc;
+	struct cipher_test_sglists *tsgls;
+	unsigned int maxdatasize;
+	unsigned int maxkeysize;
+
+	struct aead_testvec vec;
+	char vec_name[64];
+	char cfgname[TESTVEC_CONFIG_NAMELEN];
+	struct testvec_config cfg;
+};
+
 /*
- * Generate an AEAD test vector from the given implementation.
- * Assumes the buffers in 'vec' were already allocated.
+ * Make at least one random change to a (ciphertext, AAD) pair.  "Ciphertext"
+ * here means the full ciphertext including the authentication tag.  The
+ * authentication tag (and hence also the ciphertext) is assumed to be nonempty.
+ */
+static void mutate_aead_message(struct aead_testvec *vec, bool esp_aad)
+{
+	const unsigned int aad_tail_size = esp_aad ? 8 : 0;
+	const unsigned int authsize = vec->clen - vec->plen;
+
+	if (prandom_u32() % 2 == 0 && vec->alen > aad_tail_size) {
+		 /* Mutate the AAD */
+		flip_random_bit((u8 *)vec->assoc, vec->alen - aad_tail_size);
+		if (prandom_u32() % 2 == 0)
+			return;
+	}
+	if (prandom_u32() % 2 == 0) {
+		/* Mutate auth tag (assuming it's at the end of ciphertext) */
+		flip_random_bit((u8 *)vec->ctext + vec->plen, authsize);
+	} else {
+		/* Mutate any part of the ciphertext */
+		flip_random_bit((u8 *)vec->ctext, vec->clen);
+	}
+}
+
+/*
+ * Minimum authentication tag size in bytes at which we assume that we can
+ * reliably generate inauthentic messages, i.e. not generate an authentic
+ * message by chance.
+ */
+#define MIN_COLLISION_FREE_AUTHSIZE 8
+
+static void generate_aead_message(struct aead_request *req,
+				  const struct aead_test_suite *suite,
+				  struct aead_testvec *vec,
+				  bool prefer_inauthentic)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	const unsigned int ivsize = crypto_aead_ivsize(tfm);
+	const unsigned int authsize = vec->clen - vec->plen;
+	const bool inauthentic = (authsize >= MIN_COLLISION_FREE_AUTHSIZE) &&
+				 (prefer_inauthentic || prandom_u32() % 4 == 0);
+
+	/* Generate the AAD. */
+	generate_random_bytes((u8 *)vec->assoc, vec->alen);
+
+	if (inauthentic && prandom_u32() % 2 == 0) {
+		/* Generate a random ciphertext. */
+		generate_random_bytes((u8 *)vec->ctext, vec->clen);
+	} else {
+		int i = 0;
+		struct scatterlist src[2], dst;
+		u8 iv[MAX_IVLEN];
+		DECLARE_CRYPTO_WAIT(wait);
+
+		/* Generate a random plaintext and encrypt it. */
+		sg_init_table(src, 2);
+		if (vec->alen)
+			sg_set_buf(&src[i++], vec->assoc, vec->alen);
+		if (vec->plen) {
+			generate_random_bytes((u8 *)vec->ptext, vec->plen);
+			sg_set_buf(&src[i++], vec->ptext, vec->plen);
+		}
+		sg_init_one(&dst, vec->ctext, vec->alen + vec->clen);
+		memcpy(iv, vec->iv, ivsize);
+		aead_request_set_callback(req, 0, crypto_req_done, &wait);
+		aead_request_set_crypt(req, src, &dst, vec->plen, iv);
+		aead_request_set_ad(req, vec->alen);
+		vec->crypt_error = crypto_wait_req(crypto_aead_encrypt(req),
+						   &wait);
+		/* If encryption failed, we're done. */
+		if (vec->crypt_error != 0)
+			return;
+		memmove((u8 *)vec->ctext, vec->ctext + vec->alen, vec->clen);
+		if (!inauthentic)
+			return;
+		/*
+		 * Mutate the authentic (ciphertext, AAD) pair to get an
+		 * inauthentic one.
+		 */
+		mutate_aead_message(vec, suite->esp_aad);
+	}
+	vec->novrfy = 1;
+	if (suite->einval_allowed)
+		vec->crypt_error = -EINVAL;
+}
+
+/*
+ * Generate an AEAD test vector 'vec' using the implementation specified by
+ * 'req'.  The buffers in 'vec' must already be allocated.
+ *
+ * If 'prefer_inauthentic' is true, then this function will generate inauthentic
+ * test vectors (i.e. vectors with 'vec->novrfy=1') more often.
  */
 static void generate_random_aead_testvec(struct aead_request *req,
 					 struct aead_testvec *vec,
+					 const struct aead_test_suite *suite,
 					 unsigned int maxkeysize,
 					 unsigned int maxdatasize,
-					 char *name, size_t max_namelen)
+					 char *name, size_t max_namelen,
+					 bool prefer_inauthentic)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	const unsigned int ivsize = crypto_aead_ivsize(tfm);
-	unsigned int maxauthsize = crypto_aead_alg(tfm)->maxauthsize;
+	const unsigned int maxauthsize = crypto_aead_maxauthsize(tfm);
 	unsigned int authsize;
 	unsigned int total_len;
-	int i;
-	struct scatterlist src[2], dst;
-	u8 iv[MAX_IVLEN];
-	DECLARE_CRYPTO_WAIT(wait);
 
 	/* Key: length in [0, maxkeysize], but usually choose maxkeysize */
 	vec->klen = maxkeysize;
@@ -2081,81 +2284,101 @@ static void generate_random_aead_testvec(struct aead_request *req,
 	authsize = maxauthsize;
 	if (prandom_u32() % 4 == 0)
 		authsize = prandom_u32() % (maxauthsize + 1);
+	if (prefer_inauthentic && authsize < MIN_COLLISION_FREE_AUTHSIZE)
+		authsize = MIN_COLLISION_FREE_AUTHSIZE;
 	if (WARN_ON(authsize > maxdatasize))
 		authsize = maxdatasize;
 	maxdatasize -= authsize;
 	vec->setauthsize_error = crypto_aead_setauthsize(tfm, authsize);
 
-	/* Plaintext and associated data */
+	/* AAD, plaintext, and ciphertext lengths */
 	total_len = generate_random_length(maxdatasize);
 	if (prandom_u32() % 4 == 0)
 		vec->alen = 0;
 	else
 		vec->alen = generate_random_length(total_len);
 	vec->plen = total_len - vec->alen;
-	generate_random_bytes((u8 *)vec->assoc, vec->alen);
-	generate_random_bytes((u8 *)vec->ptext, vec->plen);
-
 	vec->clen = vec->plen + authsize;
 
 	/*
-	 * If the key or authentication tag size couldn't be set, no need to
-	 * continue to encrypt.
+	 * Generate the AAD, plaintext, and ciphertext.  Not applicable if the
+	 * key or the authentication tag size couldn't be set.
 	 */
-	if (vec->setkey_error || vec->setauthsize_error)
-		goto done;
-
-	/* Ciphertext */
-	sg_init_table(src, 2);
-	i = 0;
-	if (vec->alen)
-		sg_set_buf(&src[i++], vec->assoc, vec->alen);
-	if (vec->plen)
-		sg_set_buf(&src[i++], vec->ptext, vec->plen);
-	sg_init_one(&dst, vec->ctext, vec->alen + vec->clen);
-	memcpy(iv, vec->iv, ivsize);
-	aead_request_set_callback(req, 0, crypto_req_done, &wait);
-	aead_request_set_crypt(req, src, &dst, vec->plen, iv);
-	aead_request_set_ad(req, vec->alen);
-	vec->crypt_error = crypto_wait_req(crypto_aead_encrypt(req), &wait);
-	if (vec->crypt_error == 0)
-		memmove((u8 *)vec->ctext, vec->ctext + vec->alen, vec->clen);
-done:
+	vec->novrfy = 0;
+	vec->crypt_error = 0;
+	if (vec->setkey_error == 0 && vec->setauthsize_error == 0)
+		generate_aead_message(req, suite, vec, prefer_inauthentic);
 	snprintf(name, max_namelen,
-		 "\"random: alen=%u plen=%u authsize=%u klen=%u\"",
-		 vec->alen, vec->plen, authsize, vec->klen);
+		 "\"random: alen=%u plen=%u authsize=%u klen=%u novrfy=%d\"",
+		 vec->alen, vec->plen, authsize, vec->klen, vec->novrfy);
+}
+
+static void try_to_generate_inauthentic_testvec(
+					struct aead_extra_tests_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		generate_random_aead_testvec(ctx->req, &ctx->vec,
+					     &ctx->test_desc->suite.aead,
+					     ctx->maxkeysize, ctx->maxdatasize,
+					     ctx->vec_name,
+					     sizeof(ctx->vec_name), true);
+		if (ctx->vec.novrfy)
+			return;
+	}
 }
 
 /*
- * Test the AEAD algorithm represented by @req against the corresponding generic
- * implementation, if one is available.
+ * Generate inauthentic test vectors (i.e. ciphertext, AAD pairs that aren't the
+ * result of an encryption with the key) and verify that decryption fails.
  */
-static int test_aead_vs_generic_impl(const char *driver,
-				     const struct alg_test_desc *test_desc,
-				     struct aead_request *req,
-				     struct cipher_test_sglists *tsgls)
+static int test_aead_inauthentic_inputs(struct aead_extra_tests_ctx *ctx)
 {
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	const unsigned int ivsize = crypto_aead_ivsize(tfm);
-	const unsigned int maxauthsize = crypto_aead_alg(tfm)->maxauthsize;
-	const unsigned int blocksize = crypto_aead_blocksize(tfm);
-	const unsigned int maxdatasize = (2 * PAGE_SIZE) - TESTMGR_POISON_LEN;
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < fuzz_iterations * 8; i++) {
+		/*
+		 * Since this part of the tests isn't comparing the
+		 * implementation to another, there's no point in testing any
+		 * test vectors other than inauthentic ones (vec.novrfy=1) here.
+		 *
+		 * If we're having trouble generating such a test vector, e.g.
+		 * if the algorithm keeps rejecting the generated keys, don't
+		 * retry forever; just continue on.
+		 */
+		try_to_generate_inauthentic_testvec(ctx);
+		if (ctx->vec.novrfy) {
+			generate_random_testvec_config(&ctx->cfg, ctx->cfgname,
+						       sizeof(ctx->cfgname));
+			err = test_aead_vec_cfg(ctx->driver, DECRYPT, &ctx->vec,
+						ctx->vec_name, &ctx->cfg,
+						ctx->req, ctx->tsgls);
+			if (err)
+				return err;
+		}
+		cond_resched();
+	}
+	return 0;
+}
+
+/*
+ * Test the AEAD algorithm against the corresponding generic implementation, if
+ * one is available.
+ */
+static int test_aead_vs_generic_impl(struct aead_extra_tests_ctx *ctx)
+{
+	struct crypto_aead *tfm = ctx->tfm;
 	const char *algname = crypto_aead_alg(tfm)->base.cra_name;
-	const char *generic_driver = test_desc->generic_driver;
+	const char *driver = ctx->driver;
+	const char *generic_driver = ctx->test_desc->generic_driver;
 	char _generic_driver[CRYPTO_MAX_ALG_NAME];
 	struct crypto_aead *generic_tfm = NULL;
 	struct aead_request *generic_req = NULL;
-	unsigned int maxkeysize;
 	unsigned int i;
-	struct aead_testvec vec = { 0 };
-	char vec_name[64];
-	struct testvec_config *cfg;
-	char cfgname[TESTVEC_CONFIG_NAMELEN];
 	int err;
 
-	if (noextratests)
-		return 0;
-
 	if (!generic_driver) { /* Use default naming convention? */
 		err = build_generic_driver_name(algname, _generic_driver);
 		if (err)
@@ -2179,12 +2402,6 @@ static int test_aead_vs_generic_impl(const char *driver,
 		return err;
 	}
 
-	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
-	if (!cfg) {
-		err = -ENOMEM;
-		goto out;
-	}
-
 	generic_req = aead_request_alloc(generic_tfm, GFP_KERNEL);
 	if (!generic_req) {
 		err = -ENOMEM;
@@ -2193,24 +2410,27 @@ static int test_aead_vs_generic_impl(const char *driver,
 
 	/* Check the algorithm properties for consistency. */
 
-	if (maxauthsize != crypto_aead_alg(generic_tfm)->maxauthsize) {
+	if (crypto_aead_maxauthsize(tfm) !=
+	    crypto_aead_maxauthsize(generic_tfm)) {
 		pr_err("alg: aead: maxauthsize for %s (%u) doesn't match generic impl (%u)\n",
-		       driver, maxauthsize,
-		       crypto_aead_alg(generic_tfm)->maxauthsize);
+		       driver, crypto_aead_maxauthsize(tfm),
+		       crypto_aead_maxauthsize(generic_tfm));
 		err = -EINVAL;
 		goto out;
 	}
 
-	if (ivsize != crypto_aead_ivsize(generic_tfm)) {
+	if (crypto_aead_ivsize(tfm) != crypto_aead_ivsize(generic_tfm)) {
 		pr_err("alg: aead: ivsize for %s (%u) doesn't match generic impl (%u)\n",
-		       driver, ivsize, crypto_aead_ivsize(generic_tfm));
+		       driver, crypto_aead_ivsize(tfm),
+		       crypto_aead_ivsize(generic_tfm));
 		err = -EINVAL;
 		goto out;
 	}
 
-	if (blocksize != crypto_aead_blocksize(generic_tfm)) {
+	if (crypto_aead_blocksize(tfm) != crypto_aead_blocksize(generic_tfm)) {
 		pr_err("alg: aead: blocksize for %s (%u) doesn't match generic impl (%u)\n",
-		       driver, blocksize, crypto_aead_blocksize(generic_tfm));
+		       driver, crypto_aead_blocksize(tfm),
+		       crypto_aead_blocksize(generic_tfm));
 		err = -EINVAL;
 		goto out;
 	}
@@ -2219,55 +2439,93 @@ static int test_aead_vs_generic_impl(const char *driver,
 	 * Now generate test vectors using the generic implementation, and test
 	 * the other implementation against them.
 	 */
+	for (i = 0; i < fuzz_iterations * 8; i++) {
+		generate_random_aead_testvec(generic_req, &ctx->vec,
+					     &ctx->test_desc->suite.aead,
+					     ctx->maxkeysize, ctx->maxdatasize,
+					     ctx->vec_name,
+					     sizeof(ctx->vec_name), false);
+		generate_random_testvec_config(&ctx->cfg, ctx->cfgname,
+					       sizeof(ctx->cfgname));
+		if (!ctx->vec.novrfy) {
+			err = test_aead_vec_cfg(driver, ENCRYPT, &ctx->vec,
+						ctx->vec_name, &ctx->cfg,
+						ctx->req, ctx->tsgls);
+			if (err)
+				goto out;
+		}
+		if (ctx->vec.crypt_error == 0 || ctx->vec.novrfy) {
+			err = test_aead_vec_cfg(driver, DECRYPT, &ctx->vec,
+						ctx->vec_name, &ctx->cfg,
+						ctx->req, ctx->tsgls);
+			if (err)
+				goto out;
+		}
+		cond_resched();
+	}
+	err = 0;
+out:
+	crypto_free_aead(generic_tfm);
+	aead_request_free(generic_req);
+	return err;
+}
 
-	maxkeysize = 0;
-	for (i = 0; i < test_desc->suite.aead.count; i++)
-		maxkeysize = max_t(unsigned int, maxkeysize,
-				   test_desc->suite.aead.vecs[i].klen);
+static int test_aead_extra(const char *driver,
+			   const struct alg_test_desc *test_desc,
+			   struct aead_request *req,
+			   struct cipher_test_sglists *tsgls)
+{
+	struct aead_extra_tests_ctx *ctx;
+	unsigned int i;
+	int err;
 
-	vec.key = kmalloc(maxkeysize, GFP_KERNEL);
-	vec.iv = kmalloc(ivsize, GFP_KERNEL);
-	vec.assoc = kmalloc(maxdatasize, GFP_KERNEL);
-	vec.ptext = kmalloc(maxdatasize, GFP_KERNEL);
-	vec.ctext = kmalloc(maxdatasize, GFP_KERNEL);
-	if (!vec.key || !vec.iv || !vec.assoc || !vec.ptext || !vec.ctext) {
+	if (noextratests)
+		return 0;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	ctx->req = req;
+	ctx->tfm = crypto_aead_reqtfm(req);
+	ctx->driver = driver;
+	ctx->test_desc = test_desc;
+	ctx->tsgls = tsgls;
+	ctx->maxdatasize = (2 * PAGE_SIZE) - TESTMGR_POISON_LEN;
+	ctx->maxkeysize = 0;
+	for (i = 0; i < test_desc->suite.aead.count; i++)
+		ctx->maxkeysize = max_t(unsigned int, ctx->maxkeysize,
+					test_desc->suite.aead.vecs[i].klen);
+
+	ctx->vec.key = kmalloc(ctx->maxkeysize, GFP_KERNEL);
+	ctx->vec.iv = kmalloc(crypto_aead_ivsize(ctx->tfm), GFP_KERNEL);
+	ctx->vec.assoc = kmalloc(ctx->maxdatasize, GFP_KERNEL);
+	ctx->vec.ptext = kmalloc(ctx->maxdatasize, GFP_KERNEL);
+	ctx->vec.ctext = kmalloc(ctx->maxdatasize, GFP_KERNEL);
+	if (!ctx->vec.key || !ctx->vec.iv || !ctx->vec.assoc ||
+	    !ctx->vec.ptext || !ctx->vec.ctext) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	for (i = 0; i < fuzz_iterations * 8; i++) {
-		generate_random_aead_testvec(generic_req, &vec,
-					     maxkeysize, maxdatasize,
-					     vec_name, sizeof(vec_name));
-		generate_random_testvec_config(cfg, cfgname, sizeof(cfgname));
+	err = test_aead_inauthentic_inputs(ctx);
+	if (err)
+		goto out;
 
-		err = test_aead_vec_cfg(driver, ENCRYPT, &vec, vec_name, cfg,
-					req, tsgls);
-		if (err)
-			goto out;
-		err = test_aead_vec_cfg(driver, DECRYPT, &vec, vec_name, cfg,
-					req, tsgls);
-		if (err)
-			goto out;
-		cond_resched();
-	}
-	err = 0;
+	err = test_aead_vs_generic_impl(ctx);
 out:
-	kfree(cfg);
-	kfree(vec.key);
-	kfree(vec.iv);
-	kfree(vec.assoc);
-	kfree(vec.ptext);
-	kfree(vec.ctext);
-	crypto_free_aead(generic_tfm);
-	aead_request_free(generic_req);
+	kfree(ctx->vec.key);
+	kfree(ctx->vec.iv);
+	kfree(ctx->vec.assoc);
+	kfree(ctx->vec.ptext);
+	kfree(ctx->vec.ctext);
+	kfree(ctx);
 	return err;
 }
 #else /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
-static int test_aead_vs_generic_impl(const char *driver,
-				     const struct alg_test_desc *test_desc,
-				     struct aead_request *req,
-				     struct cipher_test_sglists *tsgls)
+static int test_aead_extra(const char *driver,
+			   const struct alg_test_desc *test_desc,
+			   struct aead_request *req,
+			   struct cipher_test_sglists *tsgls)
 {
 	return 0;
 }
@@ -2336,7 +2594,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver,
 	if (err)
 		goto out;
 
-	err = test_aead_vs_generic_impl(driver, desc, req, tsgls);
+	err = test_aead_extra(driver, desc, req, tsgls);
 out:
 	free_cipher_test_sglists(tsgls);
 	aead_request_free(req);
@@ -2457,7 +2715,8 @@ static int test_skcipher_vec_cfg(const char *driver, int enc,
 	else
 		crypto_skcipher_clear_flags(tfm,
 					    CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
-	err = crypto_skcipher_setkey(tfm, vec->key, vec->klen);
+	err = do_setkey(crypto_skcipher_setkey, tfm, vec->key, vec->klen,
+			cfg, alignmask);
 	if (err) {
 		if (err == vec->setkey_error)
 			return 0;
@@ -2647,7 +2906,7 @@ static void generate_random_cipher_testvec(struct skcipher_request *req,
 					   char *name, size_t max_namelen)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	const unsigned int maxkeysize = tfm->keysize;
+	const unsigned int maxkeysize = crypto_skcipher_max_keysize(tfm);
 	const unsigned int ivsize = crypto_skcipher_ivsize(tfm);
 	struct scatterlist src, dst;
 	u8 iv[MAX_IVLEN];
@@ -2678,6 +2937,15 @@ static void generate_random_cipher_testvec(struct skcipher_request *req,
 	skcipher_request_set_callback(req, 0, crypto_req_done, &wait);
 	skcipher_request_set_crypt(req, &src, &dst, vec->len, iv);
 	vec->crypt_error = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
+	if (vec->crypt_error != 0) {
+		/*
+		 * The only acceptable error here is for an invalid length, so
+		 * skcipher decryption should fail with the same error too.
+		 * We'll test for this.  But to keep the API usage well-defined,
+		 * explicitly initialize the ciphertext buffer too.
+		 */
+		memset((u8 *)vec->ctext, 0, vec->len);
+	}
 done:
 	snprintf(name, max_namelen, "\"random: len=%u klen=%u\"",
 		 vec->len, vec->klen);
@@ -2693,6 +2961,7 @@ static int test_skcipher_vs_generic_impl(const char *driver,
 					 struct cipher_test_sglists *tsgls)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const unsigned int maxkeysize = crypto_skcipher_max_keysize(tfm);
 	const unsigned int ivsize = crypto_skcipher_ivsize(tfm);
 	const unsigned int blocksize = crypto_skcipher_blocksize(tfm);
 	const unsigned int maxdatasize = (2 * PAGE_SIZE) - TESTMGR_POISON_LEN;
@@ -2751,9 +3020,19 @@ static int test_skcipher_vs_generic_impl(const char *driver,
 
 	/* Check the algorithm properties for consistency. */
 
-	if (tfm->keysize != generic_tfm->keysize) {
+	if (crypto_skcipher_min_keysize(tfm) !=
+	    crypto_skcipher_min_keysize(generic_tfm)) {
+		pr_err("alg: skcipher: min keysize for %s (%u) doesn't match generic impl (%u)\n",
+		       driver, crypto_skcipher_min_keysize(tfm),
+		       crypto_skcipher_min_keysize(generic_tfm));
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (maxkeysize != crypto_skcipher_max_keysize(generic_tfm)) {
 		pr_err("alg: skcipher: max keysize for %s (%u) doesn't match generic impl (%u)\n",
-		       driver, tfm->keysize, generic_tfm->keysize);
+		       driver, maxkeysize,
+		       crypto_skcipher_max_keysize(generic_tfm));
 		err = -EINVAL;
 		goto out;
 	}
@@ -2778,7 +3057,7 @@ static int test_skcipher_vs_generic_impl(const char *driver,
 	 * the other implementation against them.
 	 */
 
-	vec.key = kmalloc(tfm->keysize, GFP_KERNEL);
+	vec.key = kmalloc(maxkeysize, GFP_KERNEL);
 	vec.iv = kmalloc(ivsize, GFP_KERNEL);
 	vec.ptext = kmalloc(maxdatasize, GFP_KERNEL);
 	vec.ctext = kmalloc(maxdatasize, GFP_KERNEL);
@@ -3862,7 +4141,8 @@ static int alg_test_null(const struct alg_test_desc *desc,
 	return 0;
 }
 
-#define __VECS(tv)	{ .vecs = tv, .count = ARRAY_SIZE(tv) }
+#define ____VECS(tv)	.vecs = tv, .count = ARRAY_SIZE(tv)
+#define __VECS(tv)	{ ____VECS(tv) }
 
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
@@ -4023,6 +4303,58 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.test = alg_test_null,
 		.fips_allowed = 1,
 	}, {
+		.alg = "blake2b-160",
+		.test = alg_test_hash,
+		.fips_allowed = 0,
+		.suite = {
+			.hash = __VECS(blake2b_160_tv_template)
+		}
+	}, {
+		.alg = "blake2b-256",
+		.test = alg_test_hash,
+		.fips_allowed = 0,
+		.suite = {
+			.hash = __VECS(blake2b_256_tv_template)
+		}
+	}, {
+		.alg = "blake2b-384",
+		.test = alg_test_hash,
+		.fips_allowed = 0,
+		.suite = {
+			.hash = __VECS(blake2b_384_tv_template)
+		}
+	}, {
+		.alg = "blake2b-512",
+		.test = alg_test_hash,
+		.fips_allowed = 0,
+		.suite = {
+			.hash = __VECS(blake2b_512_tv_template)
+		}
+	}, {
+		.alg = "blake2s-128",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = __VECS(blakes2s_128_tv_template)
+		}
+	}, {
+		.alg = "blake2s-160",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = __VECS(blakes2s_160_tv_template)
+		}
+	}, {
+		.alg = "blake2s-224",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = __VECS(blakes2s_224_tv_template)
+		}
+	}, {
+		.alg = "blake2s-256",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = __VECS(blakes2s_256_tv_template)
+		}
+	}, {
 		.alg = "cbc(aes)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
@@ -4116,7 +4448,10 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.test = alg_test_aead,
 		.fips_allowed = 1,
 		.suite = {
-			.aead = __VECS(aes_ccm_tv_template)
+			.aead = {
+				____VECS(aes_ccm_tv_template),
+				.einval_allowed = 1,
+			}
 		}
 	}, {
 		.alg = "cfb(aes)",
@@ -4126,6 +4461,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(aes_cfb_tv_template)
 		},
 	}, {
+		.alg = "cfb(sm4)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(sm4_cfb_tv_template)
+		}
+	}, {
 		.alg = "chacha20",
 		.test = alg_test_skcipher,
 		.suite = {
@@ -4260,6 +4601,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.test = alg_test_null,
 		.fips_allowed = 1,
 	}, {
+		.alg = "curve25519",
+		.test = alg_test_kpp,
+		.suite = {
+			.kpp = __VECS(curve25519_tv_template)
+		}
+	}, {
 		.alg = "deflate",
 		.test = alg_test_comp,
 		.fips_allowed = 1,
@@ -4655,6 +5002,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.hash = __VECS(hmac_sha512_tv_template)
 		}
 	}, {
+		.alg = "hmac(sm3)",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = __VECS(hmac_sm3_tv_template)
+		}
+	}, {
 		.alg = "hmac(streebog256)",
 		.test = alg_test_hash,
 		.suite = {
@@ -4791,6 +5144,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.test = alg_test_null,
 		.fips_allowed = 1,
 	}, {
+		.alg = "ofb(sm4)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(sm4_ofb_tv_template)
+		}
+	}, {
 		.alg = "pcbc(fcrypt)",
 		.test = alg_test_skcipher,
 		.suite = {
@@ -4829,12 +5188,22 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(aes_ctr_rfc3686_tv_template)
 		}
 	}, {
+		.alg = "rfc3686(ctr(sm4))",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(sm4_ctr_rfc3686_tv_template)
+		}
+	}, {
 		.alg = "rfc4106(gcm(aes))",
 		.generic_driver = "rfc4106(gcm_base(ctr(aes-generic),ghash-generic))",
 		.test = alg_test_aead,
 		.fips_allowed = 1,
 		.suite = {
-			.aead = __VECS(aes_gcm_rfc4106_tv_template)
+			.aead = {
+				____VECS(aes_gcm_rfc4106_tv_template),
+				.einval_allowed = 1,
+				.esp_aad = 1,
+			}
 		}
 	}, {
 		.alg = "rfc4309(ccm(aes))",
@@ -4842,14 +5211,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.test = alg_test_aead,
 		.fips_allowed = 1,
 		.suite = {
-			.aead = __VECS(aes_ccm_rfc4309_tv_template)
+			.aead = {
+				____VECS(aes_ccm_rfc4309_tv_template),
+				.einval_allowed = 1,
+				.esp_aad = 1,
+			}
 		}
 	}, {
 		.alg = "rfc4543(gcm(aes))",
 		.generic_driver = "rfc4543(gcm_base(ctr(aes-generic),ghash-generic))",
 		.test = alg_test_aead,
 		.suite = {
-			.aead = __VECS(aes_gcm_rfc4543_tv_template)
+			.aead = {
+				____VECS(aes_gcm_rfc4543_tv_template),
+				.einval_allowed = 1,
+			}
 		}
 	}, {
 		.alg = "rfc7539(chacha20,poly1305)",
@@ -4861,7 +5237,11 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.alg = "rfc7539esp(chacha20,poly1305)",
 		.test = alg_test_aead,
 		.suite = {
-			.aead = __VECS(rfc7539esp_tv_template)
+			.aead = {
+				____VECS(rfc7539esp_tv_template),
+				.einval_allowed = 1,
+				.esp_aad = 1,
+			}
 		}
 	}, {
 		.alg = "rmd128",
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index ef7d21f39d4a..d29983908c38 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -85,16 +85,22 @@ struct cipher_testvec {
  * @ctext:	Pointer to the full authenticated ciphertext.  For AEADs that
  *		produce a separate "ciphertext" and "authentication tag", these
  *		two parts are concatenated: ciphertext || tag.
- * @novrfy:	Decryption verification failure expected?
+ * @novrfy:	If set, this is an inauthentic input test: only decryption is
+ *		tested, and it is expected to fail with either -EBADMSG or
+ *		@crypt_error if it is nonzero.
  * @wk:		Does the test need CRYPTO_TFM_REQ_FORBID_WEAK_KEYS?
  *		(e.g. setkey() needs to fail due to a weak key)
  * @klen:	Length of @key in bytes
  * @plen:	Length of @ptext in bytes
  * @alen:	Length of @assoc in bytes
  * @clen:	Length of @ctext in bytes
- * @setkey_error: Expected error from setkey()
- * @setauthsize_error: Expected error from setauthsize()
- * @crypt_error: Expected error from encrypt() and decrypt()
+ * @setkey_error: Expected error from setkey().  If set, neither encryption nor
+ *		  decryption is tested.
+ * @setauthsize_error: Expected error from setauthsize().  If set, neither
+ *		       encryption nor decryption is tested.
+ * @crypt_error: When @novrfy=0, the expected error from encrypt().  When
+ *		 @novrfy=1, an optional alternate error code that is acceptable
+ *		 for decrypt() to return besides -EBADMSG.
  */
 struct aead_testvec {
 	const char *key;
@@ -1030,6 +1036,1231 @@ static const struct kpp_testvec dh_tv_template[] = {
 	}
 };
 
+static const struct kpp_testvec curve25519_tv_template[] = {
+{
+	.secret = (u8[32]){ 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d,
+		     0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45,
+		     0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a,
+		     0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a },
+	.b_public = (u8[32]){ 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4,
+		    0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37,
+		    0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d,
+		    0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f },
+	.expected_ss = (u8[32]){ 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
+		    0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
+		    0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
+		    0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+{
+	.secret = (u8[32]){ 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b,
+		     0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6,
+		     0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd,
+		     0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb },
+	.b_public = (u8[32]){ 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54,
+		    0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a,
+		    0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4,
+		    0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a },
+	.expected_ss = (u8[32]){ 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
+		    0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
+		    0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
+		    0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+{
+	.secret = (u8[32]){ 1 },
+	.b_public = (u8[32]){ 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.expected_ss = (u8[32]){ 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64,
+		    0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d,
+		    0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98,
+		    0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+{
+	.secret = (u8[32]){ 1 },
+	.b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.expected_ss = (u8[32]){ 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f,
+		    0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d,
+		    0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3,
+		    0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+{
+	.secret = (u8[32]){ 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
+		     0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
+		     0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
+		     0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 },
+	.b_public = (u8[32]){ 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
+		    0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
+		    0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
+		    0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
+	.expected_ss = (u8[32]){ 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
+		    0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
+		    0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
+		    0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+{
+	.secret = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff,
+		     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f },
+	.expected_ss = (u8[32]){ 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2,
+		    0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57,
+		    0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05,
+		    0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+{
+	.secret = (u8[32]){ 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.b_public = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 },
+	.expected_ss = (u8[32]){ 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d,
+		    0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12,
+		    0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99,
+		    0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - normal case */
+{
+	.secret = (u8[32]){ 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda,
+		     0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66,
+		     0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3,
+		     0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba },
+	.b_public = (u8[32]){ 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5,
+		    0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9,
+		    0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e,
+		    0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a },
+	.expected_ss = (u8[32]){ 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5,
+		    0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38,
+		    0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e,
+		    0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key on twist */
+{
+	.secret = (u8[32]){ 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4,
+		     0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5,
+		     0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49,
+		     0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 },
+	.b_public = (u8[32]){ 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5,
+		    0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8,
+		    0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3,
+		    0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 },
+	.expected_ss = (u8[32]){ 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff,
+		    0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d,
+		    0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe,
+		    0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key on twist */
+{
+	.secret = (u8[32]){ 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9,
+		     0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39,
+		     0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5,
+		     0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 },
+	.b_public = (u8[32]){ 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f,
+		    0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b,
+		    0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c,
+		    0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 },
+	.expected_ss = (u8[32]){ 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53,
+		    0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57,
+		    0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0,
+		    0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key on twist */
+{
+	.secret = (u8[32]){ 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc,
+		     0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d,
+		     0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67,
+		     0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c },
+	.b_public = (u8[32]){ 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97,
+		    0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f,
+		    0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45,
+		    0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a },
+	.expected_ss = (u8[32]){ 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93,
+		    0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2,
+		    0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44,
+		    0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key on twist */
+{
+	.secret = (u8[32]){ 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1,
+		     0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95,
+		     0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99,
+		     0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d },
+	.b_public = (u8[32]){ 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27,
+		    0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07,
+		    0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae,
+		    0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c },
+	.expected_ss = (u8[32]){ 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73,
+		    0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2,
+		    0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f,
+		    0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key on twist */
+{
+	.secret = (u8[32]){ 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9,
+		     0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd,
+		     0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b,
+		     0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 },
+	.b_public = (u8[32]){ 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5,
+		    0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52,
+		    0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8,
+		    0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 },
+	.expected_ss = (u8[32]){ 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86,
+		    0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4,
+		    0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6,
+		    0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+	.secret = (u8[32]){ 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04,
+		     0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77,
+		     0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90,
+		     0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 },
+	.b_public = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.expected_ss = (u8[32]){ 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97,
+		    0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9,
+		    0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7,
+		    0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+	.secret = (u8[32]){ 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36,
+		     0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd,
+		     0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c,
+		     0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 },
+	.b_public = (u8[32]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.expected_ss = (u8[32]){ 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e,
+		    0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b,
+		    0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e,
+		    0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+	.secret = (u8[32]){ 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed,
+		     0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e,
+		     0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd,
+		     0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 },
+	.b_public = (u8[32]){ 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff,
+		    0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff,
+		    0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00,
+		    0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 },
+	.expected_ss = (u8[32]){ 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f,
+		    0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1,
+		    0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10,
+		    0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+	.secret = (u8[32]){ 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3,
+		     0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d,
+		     0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00,
+		     0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 },
+	.b_public = (u8[32]){ 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00,
+		    0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00,
+		    0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff,
+		    0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f },
+	.expected_ss = (u8[32]){ 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8,
+		    0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4,
+		    0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70,
+		    0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+	.secret = (u8[32]){ 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3,
+		     0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a,
+		     0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e,
+		     0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 },
+	.b_public = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+		    0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+		    0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+		    0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f },
+	.expected_ss = (u8[32]){ 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57,
+		    0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c,
+		    0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59,
+		    0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case on twist */
+{
+	.secret = (u8[32]){ 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f,
+		     0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42,
+		     0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9,
+		     0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 },
+	.b_public = (u8[32]){ 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+	.expected_ss = (u8[32]){ 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c,
+		    0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5,
+		    0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65,
+		    0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+	.secret = (u8[32]){ 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6,
+		     0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4,
+		     0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8,
+		     0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe },
+	.b_public = (u8[32]){ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.expected_ss = (u8[32]){ 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7,
+		    0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca,
+		    0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f,
+		    0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+	.secret = (u8[32]){ 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa,
+		     0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3,
+		     0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52,
+		     0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 },
+	.b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+		    0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+		    0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+		    0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 },
+	.expected_ss = (u8[32]){ 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3,
+		    0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e,
+		    0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75,
+		    0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+	.secret = (u8[32]){ 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26,
+		     0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea,
+		     0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00,
+		     0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 },
+	.b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
+	.expected_ss = (u8[32]){ 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8,
+		    0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32,
+		    0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87,
+		    0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+	.secret = (u8[32]){ 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c,
+		     0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6,
+		     0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb,
+		     0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 },
+	.b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff,
+		    0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff,
+		    0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff,
+		    0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f },
+	.expected_ss = (u8[32]){ 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85,
+		    0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f,
+		    0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0,
+		    0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+	.secret = (u8[32]){ 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38,
+		     0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b,
+		     0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c,
+		     0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb },
+	.b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+	.expected_ss = (u8[32]){ 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b,
+		    0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81,
+		    0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3,
+		    0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+	.secret = (u8[32]){ 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d,
+		     0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42,
+		     0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98,
+		     0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 },
+	.b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
+		    0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
+		    0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
+		    0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f },
+	.expected_ss = (u8[32]){ 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c,
+		    0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9,
+		    0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89,
+		    0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for public key */
+{
+	.secret = (u8[32]){ 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29,
+		     0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6,
+		     0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c,
+		     0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f },
+	.b_public = (u8[32]){ 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+	.expected_ss = (u8[32]){ 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75,
+		    0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89,
+		    0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c,
+		    0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc,
+		     0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1,
+		     0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d,
+		     0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae },
+	.b_public = (u8[32]){ 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+	.expected_ss = (u8[32]){ 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09,
+		    0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde,
+		    0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1,
+		    0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81,
+		     0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a,
+		     0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99,
+		     0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d },
+	.b_public = (u8[32]){ 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+	.expected_ss = (u8[32]){ 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17,
+		    0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35,
+		    0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55,
+		    0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11,
+		     0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b,
+		     0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9,
+		     0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 },
+	.b_public = (u8[32]){ 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+	.expected_ss = (u8[32]){ 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53,
+		    0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e,
+		    0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6,
+		    0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78,
+		     0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2,
+		     0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd,
+		     0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 },
+	.b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+	.expected_ss = (u8[32]){ 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb,
+		    0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40,
+		    0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2,
+		    0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9,
+		     0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60,
+		     0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13,
+		     0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 },
+	.b_public = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+	.expected_ss = (u8[32]){ 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c,
+		    0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3,
+		    0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65,
+		    0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a,
+		     0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7,
+		     0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11,
+		     0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e },
+	.b_public = (u8[32]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+	.expected_ss = (u8[32]){ 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82,
+		    0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4,
+		    0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c,
+		    0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e,
+		     0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a,
+		     0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d,
+		     0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f },
+	.b_public = (u8[32]){ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+	.expected_ss = (u8[32]){ 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2,
+		    0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60,
+		    0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25,
+		    0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb,
+		     0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97,
+		     0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c,
+		     0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 },
+	.b_public = (u8[32]){ 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.expected_ss = (u8[32]){ 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23,
+		    0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8,
+		    0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69,
+		    0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a,
+		     0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23,
+		     0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b,
+		     0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 },
+	.b_public = (u8[32]){ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.expected_ss = (u8[32]){ 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b,
+		    0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44,
+		    0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37,
+		    0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80,
+		     0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d,
+		     0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b,
+		     0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 },
+	.b_public = (u8[32]){ 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.expected_ss = (u8[32]){ 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63,
+		    0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae,
+		    0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f,
+		    0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0,
+		     0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd,
+		     0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49,
+		     0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 },
+	.b_public = (u8[32]){ 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.expected_ss = (u8[32]){ 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41,
+		    0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0,
+		    0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf,
+		    0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9,
+		     0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa,
+		     0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5,
+		     0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e },
+	.b_public = (u8[32]){ 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.expected_ss = (u8[32]){ 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47,
+		    0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3,
+		    0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b,
+		    0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8,
+		     0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98,
+		     0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0,
+		     0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 },
+	.b_public = (u8[32]){ 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.expected_ss = (u8[32]){ 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0,
+		    0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1,
+		    0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a,
+		    0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02,
+		     0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4,
+		     0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68,
+		     0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d },
+	.b_public = (u8[32]){ 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.expected_ss = (u8[32]){ 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f,
+		    0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2,
+		    0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95,
+		    0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7,
+		     0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06,
+		     0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9,
+		     0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 },
+	.b_public = (u8[32]){ 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.expected_ss = (u8[32]){ 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5,
+		    0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0,
+		    0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80,
+		    0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - public key >= p */
+{
+	.secret = (u8[32]){ 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd,
+		     0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4,
+		     0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04,
+		     0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 },
+	.b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.expected_ss = (u8[32]){ 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0,
+		    0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac,
+		    0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48,
+		    0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - RFC 7748 */
+{
+	.secret = (u8[32]){ 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
+		     0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
+		     0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
+		     0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 },
+	.b_public = (u8[32]){ 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
+		    0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
+		    0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
+		    0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
+	.expected_ss = (u8[32]){ 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
+		    0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
+		    0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
+		    0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - RFC 7748 */
+{
+	.secret = (u8[32]){ 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c,
+		     0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5,
+		     0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4,
+		     0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d },
+	.b_public = (u8[32]){ 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3,
+		    0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c,
+		    0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e,
+		    0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 },
+	.expected_ss = (u8[32]){ 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d,
+		    0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8,
+		    0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52,
+		    0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde,
+		    0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8,
+		    0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4,
+		    0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 },
+	.expected_ss = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d,
+		    0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64,
+		    0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd,
+		    0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 },
+	.expected_ss = (u8[32]){ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8,
+		    0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf,
+		    0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94,
+		    0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d },
+	.expected_ss = (u8[32]){ 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84,
+		    0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62,
+		    0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e,
+		    0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 },
+	.expected_ss = (u8[32]){ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8,
+		    0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58,
+		    0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02,
+		    0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 },
+	.expected_ss = (u8[32]){ 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9,
+		    0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a,
+		    0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44,
+		    0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b },
+	.expected_ss = (u8[32]){ 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd,
+		    0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22,
+		    0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56,
+		    0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b },
+	.expected_ss = (u8[32]){ 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53,
+		    0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f,
+		    0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18,
+		    0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f },
+	.expected_ss = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55,
+		    0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b,
+		    0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79,
+		    0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f },
+	.expected_ss = (u8[32]){ 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39,
+		    0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c,
+		    0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb,
+		    0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e },
+	.expected_ss = (u8[32]){ 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04,
+		    0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10,
+		    0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58,
+		    0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c },
+	.expected_ss = (u8[32]){ 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3,
+		    0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c,
+		    0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88,
+		    0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 },
+	.expected_ss = (u8[32]){ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a,
+		    0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49,
+		    0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a,
+		    0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f },
+	.expected_ss = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - edge case for shared secret */
+{
+	.secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
+		     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
+		     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
+		     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
+	.b_public = (u8[32]){ 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca,
+		    0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c,
+		    0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb,
+		    0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 },
+	.expected_ss = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - checking for overflow */
+{
+	.secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+		     0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+		     0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+		     0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+	.b_public = (u8[32]){ 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58,
+		    0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7,
+		    0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01,
+		    0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d },
+	.expected_ss = (u8[32]){ 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d,
+		    0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27,
+		    0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b,
+		    0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - checking for overflow */
+{
+	.secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+		     0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+		     0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+		     0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+	.b_public = (u8[32]){ 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26,
+		    0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2,
+		    0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44,
+		    0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e },
+	.expected_ss = (u8[32]){ 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6,
+		    0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d,
+		    0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e,
+		    0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - checking for overflow */
+{
+	.secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+		     0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+		     0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+		     0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+	.b_public = (u8[32]){ 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61,
+		    0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67,
+		    0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e,
+		    0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c },
+	.expected_ss = (u8[32]){ 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65,
+		    0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce,
+		    0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0,
+		    0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - checking for overflow */
+{
+	.secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+		     0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+		     0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+		     0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+	.b_public = (u8[32]){ 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee,
+		    0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d,
+		    0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14,
+		    0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 },
+	.expected_ss = (u8[32]){ 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e,
+		    0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc,
+		    0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5,
+		    0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - checking for overflow */
+{
+	.secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
+		     0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
+		     0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
+		     0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
+	.b_public = (u8[32]){ 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4,
+		    0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5,
+		    0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c,
+		    0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 },
+	.expected_ss = (u8[32]){ 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b,
+		    0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93,
+		    0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f,
+		    0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - private key == -1 (mod order) */
+{
+	.secret = (u8[32]){ 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8,
+		     0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68,
+		     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 },
+	.b_public = (u8[32]){ 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
+		    0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
+		    0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
+		    0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
+	.expected_ss = (u8[32]){ 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
+		    0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
+		    0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
+		    0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+},
+/* wycheproof - private key == 1 (mod order) on twist */
+{
+	.secret = (u8[32]){ 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef,
+		     0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82,
+		     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+		     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f },
+	.b_public = (u8[32]){ 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
+		    0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
+		    0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
+		    0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
+	.expected_ss = (u8[32]){ 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
+		    0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
+		    0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
+		    0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
+	.secret_size = 32,
+	.b_public_size = 32,
+	.expected_ss_size = 32,
+
+}
+};
+
 static const struct kpp_testvec ecdh_tv_template[] = {
 	{
 #ifndef CONFIG_CRYPTO_FIPS
@@ -2628,6 +3859,62 @@ static const struct hash_testvec sm3_tv_template[] = {
 	}
 };
 
+/* Example vectors below taken from
+ * GM/T 0042-2015 Appendix D.3
+ */
+static const struct hash_testvec hmac_sm3_tv_template[] = {
+	{
+		.key	= "\x01\x02\x03\x04\x05\x06\x07\x08"
+			  "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+			  "\x11\x12\x13\x14\x15\x16\x17\x18"
+			  "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20",
+		.ksize	= 32,
+		.plaintext = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+			     "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+		.psize	= 112,
+		.digest	= "\xca\x05\xe1\x44\xed\x05\xd1\x85"
+			  "\x78\x40\xd1\xf3\x18\xa4\xa8\x66"
+			  "\x9e\x55\x9f\xc8\x39\x1f\x41\x44"
+			  "\x85\xbf\xdf\x7b\xb4\x08\x96\x3a",
+	}, {
+		.key	= "\x01\x02\x03\x04\x05\x06\x07\x08"
+			  "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+			  "\x11\x12\x13\x14\x15\x16\x17\x18"
+			  "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
+			  "\x21\x22\x23\x24\x25",
+		.ksize	= 37,
+		.plaintext = "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+			"\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+			"\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+			"\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd",
+		.psize	= 50,
+		.digest	= "\x22\x0b\xf5\x79\xde\xd5\x55\x39"
+			  "\x3f\x01\x59\xf6\x6c\x99\x87\x78"
+			  "\x22\xa3\xec\xf6\x10\xd1\x55\x21"
+			  "\x54\xb4\x1d\x44\xb9\x4d\xb3\xae",
+	}, {
+		.key	= "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+			  "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+			 "\x0b\x0b\x0b\x0b\x0b\x0b",
+		.ksize	= 32,
+		.plaintext = "Hi There",
+		.psize	= 8,
+		.digest	= "\xc0\xba\x18\xc6\x8b\x90\xc8\x8b"
+			  "\xc0\x7d\xe7\x94\xbf\xc7\xd2\xc8"
+			  "\xd1\x9e\xc3\x1e\xd8\x77\x3b\xc2"
+			  "\xb3\x90\xc9\x60\x4e\x0b\xe1\x1e",
+	}, {
+		.key	= "Jefe",
+		.ksize	= 4,
+		.plaintext = "what do ya want for nothing?",
+		.psize	= 28,
+		.digest	= "\x2e\x87\xf1\xd1\x68\x62\xe6\xd9"
+			  "\x64\xb5\x0a\x52\x00\xbf\x2b\x10"
+			  "\xb7\x64\xfa\xa9\x68\x0a\x29\x6a"
+			  "\x24\x05\xf2\x4b\xec\x39\xf8\x82",
+	},
+};
+
 /*
  * SHA1 test vectors  from from FIPS PUB 180-1
  * Long vector from CAVS 5.0
@@ -11846,6 +13133,133 @@ static const struct cipher_testvec sm4_ctr_tv_template[] = {
 	}
 };
 
+static const struct cipher_testvec sm4_ctr_rfc3686_tv_template[] = {
+	{
+		.key	= "\xae\x68\x52\xf8\x12\x10\x67\xcc"
+			  "\x4b\xf7\xa5\x76\x55\x77\xf3\x9e"
+			  "\x00\x00\x00\x30",
+		.klen	= 20,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "Single block msg",
+		.ctext	= "\x20\x9b\x77\x31\xd3\x65\xdb\xab"
+			  "\x9e\x48\x74\x7e\xbd\x13\x83\xeb",
+		.len	= 16,
+	}, {
+		.key	= "\x7e\x24\x06\x78\x17\xfa\xe0\xd7"
+			  "\x43\xd6\xce\x1f\x32\x53\x91\x63"
+			  "\x00\x6c\xb6\xdb",
+		.klen	= 20,
+		.iv	= "\xc0\x54\x3b\x59\xda\x48\xd9\x0b",
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+		.ctext	= "\x33\xe0\x28\x01\x92\xed\xc9\x1e"
+			  "\x97\x35\xd9\x4a\xec\xd4\xbc\x23"
+			  "\x4f\x35\x9f\x1c\x55\x1f\xe0\x27"
+			  "\xe0\xdf\xc5\x43\xbc\xb0\x23\x94",
+		.len	= 32,
+	}
+};
+
+static const struct cipher_testvec sm4_ofb_tv_template[] = {
+	{ /* From: draft-ribose-cfrg-sm4-02, paragraph 12.2.3 */
+		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.iv	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xfe\xdc\xba\x98\x76\x54\x32\x10"
+			  "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.ctext	= "\x69\x3d\x9a\x53\x5b\xad\x5b\xb1"
+			  "\x78\x6f\x53\xd7\x25\x3a\x70\x56"
+			  "\xf2\x07\x5d\x28\xb5\x23\x5f\x58"
+			  "\xd5\x00\x27\xe4\x17\x7d\x2b\xce",
+		.len	= 32,
+	}, { /* From: draft-ribose-cfrg-sm4-09, appendix A.2.3, Example 1 */
+		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.ptext	= "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+			  "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+			  "\xee\xee\xee\xee\xff\xff\xff\xff"
+			  "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+		.ctext	= "\xac\x32\x36\xcb\x86\x1d\xd3\x16"
+			  "\xe6\x41\x3b\x4e\x3c\x75\x24\xb7"
+			  "\x1d\x01\xac\xa2\x48\x7c\xa5\x82"
+			  "\xcb\xf5\x46\x3e\x66\x98\x53\x9b",
+		.len	= 32,
+	}, { /* From: draft-ribose-cfrg-sm4-09, appendix A.2.3, Example 2 */
+		.key	= "\xfe\xdc\xba\x98\x76\x54\x32\x10"
+			  "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.klen	= 16,
+		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.ptext	= "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+			  "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+			  "\xee\xee\xee\xee\xff\xff\xff\xff"
+			  "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+		.ctext	= "\x5d\xcc\xcd\x25\xa8\x4b\xa1\x65"
+			  "\x60\xd7\xf2\x65\x88\x70\x68\x49"
+			  "\x33\xfa\x16\xbd\x5c\xd9\xc8\x56"
+			  "\xca\xca\xa1\xe1\x01\x89\x7a\x97",
+		.len	= 32,
+	}
+};
+
+static const struct cipher_testvec sm4_cfb_tv_template[] = {
+	{ /* From: draft-ribose-cfrg-sm4-02, paragraph 12.2.4 */
+		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.iv	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xfe\xdc\xba\x98\x76\x54\x32\x10"
+			  "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.ctext	= "\x69\x3d\x9a\x53\x5b\xad\x5b\xb1"
+			  "\x78\x6f\x53\xd7\x25\x3a\x70\x56"
+			  "\x9e\xd2\x58\xa8\x5a\x04\x67\xcc"
+			  "\x92\xaa\xb3\x93\xdd\x97\x89\x95",
+		.len	= 32,
+	}, { /* From: draft-ribose-cfrg-sm4-09, appendix A.2.4, Example 1 */
+		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.ptext	= "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+			  "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+			  "\xee\xee\xee\xee\xff\xff\xff\xff"
+			  "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+		.ctext	= "\xac\x32\x36\xcb\x86\x1d\xd3\x16"
+			  "\xe6\x41\x3b\x4e\x3c\x75\x24\xb7"
+			  "\x69\xd4\xc5\x4e\xd4\x33\xb9\xa0"
+			  "\x34\x60\x09\xbe\xb3\x7b\x2b\x3f",
+		.len	= 32,
+	}, { /* From: draft-ribose-cfrg-sm4-09, appendix A.2.4, Example 2 */
+		.key	= "\xfe\xdc\xba\x98\x76\x54\x32\x10"
+			  "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.klen	= 16,
+		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.ptext	= "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+			  "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+			  "\xee\xee\xee\xee\xff\xff\xff\xff"
+			  "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+		.ctext	= "\x5d\xcc\xcd\x25\xa8\x4b\xa1\x65"
+			  "\x60\xd7\xf2\x65\x88\x70\x68\x49"
+			  "\x0d\x9b\x86\xff\x20\xc3\xbf\xe1"
+			  "\x15\xff\xa0\x2c\xa6\x19\x2c\xc5",
+		.len	= 32,
+	}
+};
+
 /* Cast6 test vectors from RFC 2612 */
 static const struct cipher_testvec cast6_tv_template[] = {
 	{
@@ -17043,6 +18457,198 @@ static const struct aead_testvec aes_gcm_tv_template[] = {
 			  "\x25\x19\x49\x8e\x80\xf1\x47\x8f"
 			  "\x37\xba\x55\xbd\x6d\x27\x61\x8c",
 		.clen	= 76,
+	}, {
+		.key	= "\x62\x35\xf8\x95\xfc\xa5\xeb\xf6"
+			  "\x0e\x92\x12\x04\xd3\xa1\x3f\x2e"
+			  "\x8b\x32\xcf\xe7\x44\xed\x13\x59"
+			  "\x04\x38\x77\xb0\xb9\xad\xb4\x38",
+		.klen	= 32,
+		.iv	= "\x00\xff\xff\xff\xff\x00\x00\xff"
+			  "\xff\xff\x00\xff",
+		.ptext	= "\x42\xc1\xcc\x08\x48\x6f\x41\x3f"
+			  "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0"
+			  "\x58\x83\xf0\xc3\x70\x14\xc0\x5b"
+			  "\x3f\xec\x1d\x25\x3c\x51\xd2\x03"
+			  "\xcf\x59\x74\x1f\xb2\x85\xb4\x07"
+			  "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb"
+			  "\xaf\x08\x44\xbd\x6f\x91\x15\xe1"
+			  "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50"
+			  "\x59\xa9\x97\xab\xbb\x0e\x74\x5c"
+			  "\x00\xa4\x43\x54\x04\x54\x9b\x3b"
+			  "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08"
+			  "\xae\xe6\x10\x3f\x32\x65\xd1\xfc"
+			  "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3"
+			  "\x35\x23\xf4\x20\x41\xd4\xad\x82"
+			  "\x8b\xa4\xad\x96\x1c\x20\x53\xbe"
+			  "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72"
+			  "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7"
+			  "\xad\x49\x3a\xae\x98\xce\xa6\x66"
+			  "\x10\x30\x90\x8c\x55\x83\xd7\x7c"
+			  "\x8b\xe6\x53\xde\xd2\x6e\x18\x21"
+			  "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73"
+			  "\x57\xcc\x89\x09\x75\x9b\x78\x70"
+			  "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5"
+			  "\xfa\x70\x04\x70\xc6\x96\x1c\x7d"
+			  "\x54\x41\x77\xa8\xe3\xb0\x7e\x96"
+			  "\x82\xd9\xec\xa2\x87\x68\x55\xf9"
+			  "\x8f\x9e\x73\x43\x47\x6a\x08\x36"
+			  "\x93\x67\xa8\x2d\xde\xac\x41\xa9"
+			  "\x5c\x4d\x73\x97\x0f\x70\x68\xfa"
+			  "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9"
+			  "\x78\x1f\x51\x07\xe3\x9a\x13\x4e"
+			  "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7"
+			  "\xab\x19\x37\xd9\xba\x76\x5e\xd2"
+			  "\xf2\x53\x15\x17\x4c\x6b\x16\x9f"
+			  "\x02\x66\x49\xca\x7c\x91\x05\xf2"
+			  "\x45\x36\x1e\xf5\x77\xad\x1f\x46"
+			  "\xa8\x13\xfb\x63\xb6\x08\x99\x63"
+			  "\x82\xa2\xed\xb3\xac\xdf\x43\x19"
+			  "\x45\xea\x78\x73\xd9\xb7\x39\x11"
+			  "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81"
+			  "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79"
+			  "\xa4\x47\x7d\x80\x20\x26\xfd\x63"
+			  "\x0a\xc7\x7e\x6d\x75\x47\xff\x76"
+			  "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b"
+			  "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1"
+			  "\x54\x03\xa4\x09\x0c\x37\x7a\x15"
+			  "\x23\x27\x5b\x8b\x4b\xa5\x64\x97"
+			  "\xae\x4a\x50\x73\x1f\x66\x1c\x5c"
+			  "\x03\x25\x3c\x8d\x48\x58\x71\x34"
+			  "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5"
+			  "\xb6\x19\x2b\x84\x2a\x20\xd1\xea"
+			  "\x80\x6f\x96\x0e\x05\x62\xc7\x78"
+			  "\x87\x79\x60\x38\x46\xb4\x25\x57"
+			  "\x6e\x16\x63\xf8\xad\x6e\xd7\x42"
+			  "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a"
+			  "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22"
+			  "\x86\x5c\x74\x3a\xeb\x24\x26\xc7"
+			  "\x09\xfc\x91\x96\x47\x87\x4f\x1a"
+			  "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24"
+			  "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a"
+			  "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5"
+			  "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb"
+			  "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe"
+			  "\x0b\x63\xde\x87\x42\x79\x8a\x68"
+			  "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f"
+			  "\x9d\xd1\xc7\x45\x90\x08\xc9\x83"
+			  "\xe9\x83\x84\xcb\x28\x69\x09\x69"
+			  "\xce\x99\x46\x00\x54\xcb\xd8\x38"
+			  "\xf9\x53\x4a\xbf\x31\xce\x57\x15"
+			  "\x33\xfa\x96\x04\x33\x42\xe3\xc0"
+			  "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6"
+			  "\x19\x95\xd0\x0e\x82\x07\x63\xf9"
+			  "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9"
+			  "\xb5\x9f\x23\x28\x60\xe7\x20\x51"
+			  "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2"
+			  "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb"
+			  "\x78\xc6\x91\x22\x40\x91\x80\xbe"
+			  "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9"
+			  "\x67\x10\xa4\x83\x98\x79\x23\xe7"
+			  "\x92\xda\xa9\x22\x16\xb1\xe7\x78"
+			  "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37"
+			  "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9"
+			  "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d"
+			  "\x48\x11\x06\xbb\x2d\xf2\x63\x88"
+			  "\x3f\x73\x09\xe2\x45\x56\x31\x51"
+			  "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9"
+			  "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66"
+			  "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23"
+			  "\x59\xfa\xfa\xaa\x44\x04\x01\xa7"
+			  "\xa4\x78\xdb\x74\x3d\x8b\xb5",
+		.plen	= 719,
+		.ctext	= "\x84\x0b\xdb\xd5\xb7\xa8\xfe\x20"
+			  "\xbb\xb1\x12\x7f\x41\xea\xb3\xc0"
+			  "\xa2\xb4\x37\x19\x11\x58\xb6\x0b"
+			  "\x4c\x1d\x38\x05\x54\xd1\x16\x73"
+			  "\x8e\x1c\x20\x90\xa2\x9a\xb7\x74"
+			  "\x47\xe6\xd8\xfc\x18\x3a\xb4\xea"
+			  "\xd5\x16\x5a\x2c\x53\x01\x46\xb3"
+			  "\x18\x33\x74\x6c\x50\xf2\xe8\xc0"
+			  "\x73\xda\x60\x22\xeb\xe3\xe5\x9b"
+			  "\x20\x93\x6c\x4b\x37\x99\xb8\x23"
+			  "\x3b\x4e\xac\xe8\x5b\xe8\x0f\xb7"
+			  "\xc3\x8f\xfb\x4a\x37\xd9\x39\x95"
+			  "\x34\xf1\xdb\x8f\x71\xd9\xc7\x0b"
+			  "\x02\xf1\x63\xfc\x9b\xfc\xc5\xab"
+			  "\xb9\x14\x13\x21\xdf\xce\xaa\x88"
+			  "\x44\x30\x1e\xce\x26\x01\x92\xf8"
+			  "\x9f\x00\x4b\x0c\x4b\xf7\x5f\xe0"
+			  "\x89\xca\x94\x66\x11\x21\x97\xca"
+			  "\x3e\x83\x74\x2d\xdb\x4d\x11\xeb"
+			  "\x97\xc2\x14\xff\x9e\x1e\xa0\x6b"
+			  "\x08\xb4\x31\x2b\x85\xc6\x85\x6c"
+			  "\x90\xec\x39\xc0\xec\xb3\xb5\x4e"
+			  "\xf3\x9c\xe7\x83\x3a\x77\x0a\xf4"
+			  "\x56\xfe\xce\x18\x33\x6d\x0b\x2d"
+			  "\x33\xda\xc8\x05\x5c\xb4\x09\x2a"
+			  "\xde\x6b\x52\x98\x01\xef\x36\x3d"
+			  "\xbd\xf9\x8f\xa8\x3e\xaa\xcd\xd1"
+			  "\x01\x2d\x42\x49\xc3\xb6\x84\xbb"
+			  "\x48\x96\xe0\x90\x93\x6c\x48\x64"
+			  "\xd4\xfa\x7f\x93\x2c\xa6\x21\xc8"
+			  "\x7a\x23\x7b\xaa\x20\x56\x12\xae"
+			  "\x16\x9d\x94\x0f\x54\xa1\xec\xca"
+			  "\x51\x4e\xf2\x39\xf4\xf8\x5f\x04"
+			  "\x5a\x0d\xbf\xf5\x83\xa1\x15\xe1"
+			  "\xf5\x3c\xd8\x62\xa3\xed\x47\x89"
+			  "\x85\x4c\xe5\xdb\xac\x9e\x17\x1d"
+			  "\x0c\x09\xe3\x3e\x39\x5b\x4d\x74"
+			  "\x0e\xf5\x34\xee\x70\x11\x4c\xfd"
+			  "\xdb\x34\xb1\xb5\x10\x3f\x73\xb7"
+			  "\xf5\xfa\xed\xb0\x1f\xa5\xcd\x3c"
+			  "\x8d\x35\x83\xd4\x11\x44\x6e\x6c"
+			  "\x5b\xe0\x0e\x69\xa5\x39\xe5\xbb"
+			  "\xa9\x57\x24\x37\xe6\x1f\xdd\xcf"
+			  "\x16\x2a\x13\xf9\x6a\x2d\x90\xa0"
+			  "\x03\x60\x7a\xed\x69\xd5\x00\x8b"
+			  "\x7e\x4f\xcb\xb9\xfa\x91\xb9\x37"
+			  "\xc1\x26\xce\x90\x97\x22\x64\x64"
+			  "\xc1\x72\x43\x1b\xf6\xac\xc1\x54"
+			  "\x8a\x10\x9c\xdd\x8d\xd5\x8e\xb2"
+			  "\xe4\x85\xda\xe0\x20\x5f\xf4\xb4"
+			  "\x15\xb5\xa0\x8d\x12\x74\x49\x23"
+			  "\x3a\xdf\x4a\xd3\xf0\x3b\x89\xeb"
+			  "\xf8\xcc\x62\x7b\xfb\x93\x07\x41"
+			  "\x61\x26\x94\x58\x70\xa6\x3c\xe4"
+			  "\xff\x58\xc4\x13\x3d\xcb\x36\x6b"
+			  "\x32\xe5\xb2\x6d\x03\x74\x6f\x76"
+			  "\x93\x77\xde\x48\xc4\xfa\x30\x4a"
+			  "\xda\x49\x80\x77\x0f\x1c\xbe\x11"
+			  "\xc8\x48\xb1\xe5\xbb\xf2\x8a\xe1"
+			  "\x96\x2f\x9f\xd1\x8e\x8a\x5c\xe2"
+			  "\xf7\xd7\xd8\x54\xf3\x3f\xc4\x91"
+			  "\xb8\xfb\x86\xdc\x46\x24\x91\x60"
+			  "\x6c\x2f\xc9\x41\x37\x51\x49\x54"
+			  "\x09\x81\x21\xf3\x03\x9f\x2b\xe3"
+			  "\x1f\x39\x63\xaf\xf4\xd7\x53\x60"
+			  "\xa7\xc7\x54\xf9\xee\xb1\xb1\x7d"
+			  "\x75\x54\x65\x93\xfe\xb1\x68\x6b"
+			  "\x57\x02\xf9\xbb\x0e\xf9\xf8\xbf"
+			  "\x01\x12\x27\xb4\xfe\xe4\x79\x7a"
+			  "\x40\x5b\x51\x4b\xdf\x38\xec\xb1"
+			  "\x6a\x56\xff\x35\x4d\x42\x33\xaa"
+			  "\x6f\x1b\xe4\xdc\xe0\xdb\x85\x35"
+			  "\x62\x10\xd4\xec\xeb\xc5\x7e\x45"
+			  "\x1c\x6f\x17\xca\x3b\x8e\x2d\x66"
+			  "\x4f\x4b\x36\x56\xcd\x1b\x59\xaa"
+			  "\xd2\x9b\x17\xb9\x58\xdf\x7b\x64"
+			  "\x8a\xff\x3b\x9c\xa6\xb5\x48\x9e"
+			  "\xaa\xe2\x5d\x09\x71\x32\x5f\xb6"
+			  "\x29\xbe\xe7\xc7\x52\x7e\x91\x82"
+			  "\x6b\x6d\x33\xe1\x34\x06\x36\x21"
+			  "\x5e\xbe\x1e\x2f\x3e\xc1\xfb\xea"
+			  "\x49\x2c\xb5\xca\xf7\xb0\x37\xea"
+			  "\x1f\xed\x10\x04\xd9\x48\x0d\x1a"
+			  "\x1c\xfb\xe7\x84\x0e\x83\x53\x74"
+			  "\xc7\x65\xe2\x5c\xe5\xba\x73\x4c"
+			  "\x0e\xe1\xb5\x11\x45\x61\x43\x46"
+			  "\xaa\x25\x8f\xbd\x85\x08\xfa\x4c"
+			  "\x15\xc1\xc0\xd8\xf5\xdc\x16\xbb"
+			  "\x7b\x1d\xe3\x87\x57\xa7\x2a\x1d"
+			  "\x38\x58\x9e\x8a\x43\xdc\x57"
+			  "\xd1\x81\x7d\x2b\xe9\xff\x99\x3a"
+			  "\x4b\x24\x52\x58\x55\xe1\x49\x14",
+		.clen	= 735,
 	}
 };
 
@@ -31567,4 +33173,528 @@ static const struct aead_testvec essiv_hmac_sha256_aes_cbc_tv_temp[] = {
 	},
 };
 
+static const char blake2_ordered_sequence[] =
+	"\x00\x01\x02\x03\x04\x05\x06\x07"
+	"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+	"\x10\x11\x12\x13\x14\x15\x16\x17"
+	"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+	"\x20\x21\x22\x23\x24\x25\x26\x27"
+	"\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+	"\x30\x31\x32\x33\x34\x35\x36\x37"
+	"\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+	"\x40\x41\x42\x43\x44\x45\x46\x47"
+	"\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+	"\x50\x51\x52\x53\x54\x55\x56\x57"
+	"\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+	"\x60\x61\x62\x63\x64\x65\x66\x67"
+	"\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+	"\x70\x71\x72\x73\x74\x75\x76\x77"
+	"\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+	"\x80\x81\x82\x83\x84\x85\x86\x87"
+	"\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+	"\x90\x91\x92\x93\x94\x95\x96\x97"
+	"\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+	"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+	"\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+	"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+	"\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+	"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+	"\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+	"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+	"\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+	"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+	"\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+	"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+	"\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
+
+static const struct hash_testvec blake2b_160_tv_template[] = {{
+	.digest = (u8[]){ 0x33, 0x45, 0x52, 0x4a, 0xbf, 0x6b, 0xbe, 0x18,
+			  0x09, 0x44, 0x92, 0x24, 0xb5, 0x97, 0x2c, 0x41,
+			  0x79, 0x0b, 0x6c, 0xf2, },
+}, {
+	.plaintext = blake2_ordered_sequence,
+	.psize = 64,
+	.digest = (u8[]){ 0x11, 0xcc, 0x66, 0x61, 0xe9, 0x22, 0xb0, 0xe4,
+			  0x07, 0xe0, 0xa5, 0x72, 0x49, 0xc3, 0x8d, 0x4f,
+			  0xf7, 0x6d, 0x8e, 0xc8, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 1,
+	.digest = (u8[]){ 0x31, 0xe3, 0xd9, 0xd5, 0x4e, 0x72, 0xd8, 0x0b,
+			  0x2b, 0x3b, 0xd7, 0x6b, 0x82, 0x7a, 0x1d, 0xfb,
+			  0x56, 0x2f, 0x79, 0x4c, },
+}, {
+	.ksize = 64,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 7,
+	.digest = (u8[]){ 0x28, 0x20, 0xd1, 0xbe, 0x7f, 0xcc, 0xc1, 0x62,
+			  0xd9, 0x0d, 0x9a, 0x4b, 0x47, 0xd1, 0x5e, 0x04,
+			  0x74, 0x2a, 0x53, 0x17, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 15,
+	.digest = (u8[]){ 0x45, 0xe9, 0x95, 0xb6, 0xc4, 0xe8, 0x22, 0xea,
+			  0xfe, 0xd2, 0x37, 0xdb, 0x46, 0xbf, 0xf1, 0x25,
+			  0xd5, 0x03, 0x1d, 0x81, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 247,
+	.digest = (u8[]){ 0x7e, 0xb9, 0xf2, 0x9b, 0x2f, 0xc2, 0x01, 0xd4,
+			  0xb0, 0x4f, 0x08, 0x2b, 0x8e, 0xbd, 0x06, 0xef,
+			  0x1c, 0xc4, 0x25, 0x95, },
+}, {
+	.ksize = 64,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 256,
+	.digest = (u8[]){ 0x6e, 0x35, 0x01, 0x70, 0xbf, 0xb6, 0xc4, 0xba,
+			  0x33, 0x1b, 0xa6, 0xd3, 0xc2, 0x5d, 0xb4, 0x03,
+			  0x95, 0xaf, 0x29, 0x16, },
+}};
+
+static const struct hash_testvec blake2b_256_tv_template[] = {{
+	.plaintext = blake2_ordered_sequence,
+	.psize = 7,
+	.digest = (u8[]){ 0x9d, 0xf1, 0x4b, 0x72, 0x48, 0x76, 0x4a, 0x86,
+			  0x91, 0x97, 0xc3, 0x5e, 0x39, 0x2d, 0x2a, 0x6d,
+			  0x6f, 0xdc, 0x5b, 0x79, 0xd5, 0x97, 0x29, 0x79,
+			  0x20, 0xfd, 0x3f, 0x14, 0x91, 0xb4, 0x42, 0xd2, },
+}, {
+	.plaintext = blake2_ordered_sequence,
+	.psize = 256,
+	.digest = (u8[]){ 0x39, 0xa7, 0xeb, 0x9f, 0xed, 0xc1, 0x9a, 0xab,
+			  0xc8, 0x34, 0x25, 0xc6, 0x75, 0x5d, 0xd9, 0x0e,
+			  0x6f, 0x9d, 0x0c, 0x80, 0x49, 0x64, 0xa1, 0xf4,
+			  0xaa, 0xee, 0xa3, 0xb9, 0xfb, 0x59, 0x98, 0x35, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.digest = (u8[]){ 0xc3, 0x08, 0xb1, 0xbf, 0xe4, 0xf9, 0xbc, 0xb4,
+			  0x75, 0xaf, 0x3f, 0x59, 0x6e, 0xae, 0xde, 0x6a,
+			  0xa3, 0x8e, 0xb5, 0x94, 0xad, 0x30, 0xf0, 0x17,
+			  0x1c, 0xfb, 0xd8, 0x3e, 0x8a, 0xbe, 0xed, 0x9c, },
+}, {
+	.ksize = 64,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 1,
+	.digest = (u8[]){ 0x34, 0x75, 0x8b, 0x64, 0x71, 0x35, 0x62, 0x82,
+			  0x97, 0xfb, 0x09, 0xc7, 0x93, 0x0c, 0xd0, 0x4e,
+			  0x95, 0x28, 0xe5, 0x66, 0x91, 0x12, 0xf5, 0xb1,
+			  0x31, 0x84, 0x93, 0xe1, 0x4d, 0xe7, 0x7e, 0x55, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 15,
+	.digest = (u8[]){ 0xce, 0x74, 0xa9, 0x2e, 0xe9, 0x40, 0x3d, 0xa2,
+			  0x11, 0x4a, 0x99, 0x25, 0x7a, 0x34, 0x5d, 0x35,
+			  0xdf, 0x6a, 0x48, 0x79, 0x2a, 0x93, 0x93, 0xff,
+			  0x1f, 0x3c, 0x39, 0xd0, 0x71, 0x1f, 0x20, 0x7b, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 64,
+	.digest = (u8[]){ 0x2e, 0x84, 0xdb, 0xa2, 0x5f, 0x0e, 0xe9, 0x52,
+			  0x79, 0x50, 0x69, 0x9f, 0xf1, 0xfd, 0xfc, 0x9d,
+			  0x89, 0x83, 0xa9, 0xb6, 0xa4, 0xd5, 0xfa, 0xb5,
+			  0xbe, 0x35, 0x1a, 0x17, 0x8a, 0x2c, 0x7f, 0x7d, },
+}, {
+	.ksize = 64,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 247,
+	.digest = (u8[]){ 0x2e, 0x26, 0xf0, 0x09, 0x02, 0x65, 0x90, 0x09,
+			  0xcc, 0xf5, 0x4c, 0x44, 0x74, 0x0e, 0xa0, 0xa8,
+			  0x25, 0x4a, 0xda, 0x61, 0x56, 0x95, 0x7d, 0x3f,
+			  0x6d, 0xc0, 0x43, 0x17, 0x95, 0x89, 0xcd, 0x9d, },
+}};
+
+static const struct hash_testvec blake2b_384_tv_template[] = {{
+	.plaintext = blake2_ordered_sequence,
+	.psize = 1,
+	.digest = (u8[]){ 0xcc, 0x01, 0x08, 0x85, 0x36, 0xf7, 0x84, 0xf0,
+			  0xbb, 0x76, 0x9e, 0x41, 0xc4, 0x95, 0x7b, 0x6d,
+			  0x0c, 0xde, 0x1f, 0xcc, 0x8c, 0xf1, 0xd9, 0x1f,
+			  0xc4, 0x77, 0xd4, 0xdd, 0x6e, 0x3f, 0xbf, 0xcd,
+			  0x43, 0xd1, 0x69, 0x8d, 0x14, 0x6f, 0x34, 0x8b,
+			  0x2c, 0x36, 0xa3, 0x39, 0x68, 0x2b, 0xec, 0x3f, },
+}, {
+	.plaintext = blake2_ordered_sequence,
+	.psize = 247,
+	.digest = (u8[]){ 0xc8, 0xf8, 0xf0, 0xa2, 0x69, 0xfa, 0xcc, 0x4d,
+			  0x32, 0x5f, 0x13, 0x88, 0xca, 0x71, 0x99, 0x8f,
+			  0xf7, 0x30, 0x41, 0x5d, 0x6e, 0x34, 0xb7, 0x6e,
+			  0x3e, 0xd0, 0x46, 0xb6, 0xca, 0x30, 0x66, 0xb2,
+			  0x6f, 0x0c, 0x35, 0x54, 0x17, 0xcd, 0x26, 0x1b,
+			  0xef, 0x48, 0x98, 0xe0, 0x56, 0x7c, 0x05, 0xd2, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.digest = (u8[]){ 0x15, 0x09, 0x7a, 0x90, 0x13, 0x23, 0xab, 0x0c,
+			  0x0b, 0x43, 0x21, 0x9a, 0xb5, 0xc6, 0x0c, 0x2e,
+			  0x7c, 0x57, 0xfc, 0xcc, 0x4b, 0x0f, 0xf0, 0x57,
+			  0xb7, 0x9c, 0xe7, 0x0f, 0xe1, 0x57, 0xac, 0x37,
+			  0x77, 0xd4, 0xf4, 0x2f, 0x03, 0x3b, 0x64, 0x09,
+			  0x84, 0xa0, 0xb3, 0x24, 0xb7, 0xae, 0x47, 0x5e, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 7,
+	.digest = (u8[]){ 0x0b, 0x82, 0x88, 0xca, 0x05, 0x2f, 0x1b, 0x15,
+			  0xdc, 0xbb, 0x22, 0x27, 0x11, 0x6b, 0xf4, 0xd1,
+			  0xe9, 0x8f, 0x1b, 0x0b, 0x58, 0x3f, 0x5e, 0x86,
+			  0x80, 0x82, 0x6f, 0x8e, 0x54, 0xc1, 0x9f, 0x12,
+			  0xcf, 0xe9, 0x56, 0xc1, 0xfc, 0x1a, 0x08, 0xb9,
+			  0x4a, 0x57, 0x0a, 0x76, 0x3c, 0x15, 0x33, 0x18, },
+}, {
+	.ksize = 64,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 15,
+	.digest = (u8[]){ 0x4a, 0x81, 0x55, 0xb9, 0x79, 0x42, 0x8c, 0xc6,
+			  0x4f, 0xfe, 0xca, 0x82, 0x3b, 0xb2, 0xf7, 0xbc,
+			  0x5e, 0xfc, 0xab, 0x09, 0x1c, 0xd6, 0x3b, 0xe1,
+			  0x50, 0x82, 0x3b, 0xde, 0xc7, 0x06, 0xee, 0x3b,
+			  0x29, 0xce, 0xe5, 0x68, 0xe0, 0xff, 0xfa, 0xe1,
+			  0x7a, 0xf1, 0xc0, 0xfe, 0x57, 0xf4, 0x60, 0x49, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 64,
+	.digest = (u8[]){ 0x34, 0xbd, 0xe1, 0x99, 0x43, 0x9f, 0x82, 0x72,
+			  0xe7, 0xed, 0x94, 0x9e, 0xe1, 0x84, 0xee, 0x82,
+			  0xfd, 0x26, 0x23, 0xc4, 0x17, 0x8d, 0xf5, 0x04,
+			  0xeb, 0xb7, 0xbc, 0xb8, 0xf3, 0x68, 0xb7, 0xad,
+			  0x94, 0x8e, 0x05, 0x3f, 0x8a, 0x5d, 0x8d, 0x81,
+			  0x3e, 0x88, 0xa7, 0x8c, 0xa2, 0xd5, 0xdc, 0x76, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 256,
+	.digest = (u8[]){ 0x22, 0x14, 0xf4, 0xb0, 0x4c, 0xa8, 0xb5, 0x7d,
+			  0xa7, 0x5c, 0x04, 0xeb, 0xd8, 0x8d, 0x04, 0x71,
+			  0xc7, 0x3c, 0xc7, 0x6e, 0x8b, 0x20, 0x36, 0x40,
+			  0x9d, 0xd0, 0x60, 0xc6, 0xe3, 0x0b, 0x6e, 0x50,
+			  0xf5, 0xaf, 0xf5, 0xc6, 0x3b, 0xe3, 0x84, 0x6a,
+			  0x93, 0x1b, 0x12, 0xd6, 0x18, 0x27, 0xba, 0x36, },
+}};
+
+static const struct hash_testvec blake2b_512_tv_template[] = {{
+	.plaintext = blake2_ordered_sequence,
+	.psize = 15,
+	.digest = (u8[]){ 0x44, 0x4b, 0x24, 0x0f, 0xe3, 0xed, 0x86, 0xd0,
+			  0xe2, 0xef, 0x4c, 0xe7, 0xd8, 0x51, 0xed, 0xde,
+			  0x22, 0x15, 0x55, 0x82, 0xaa, 0x09, 0x14, 0x79,
+			  0x7b, 0x72, 0x6c, 0xd0, 0x58, 0xb6, 0xf4, 0x59,
+			  0x32, 0xe0, 0xe1, 0x29, 0x51, 0x68, 0x76, 0x52,
+			  0x7b, 0x1d, 0xd8, 0x8f, 0xc6, 0x6d, 0x71, 0x19,
+			  0xf4, 0xab, 0x3b, 0xed, 0x93, 0xa6, 0x1a, 0x0e,
+			  0x2d, 0x2d, 0x2a, 0xea, 0xc3, 0x36, 0xd9, 0x58, },
+}, {
+	.ksize = 64,
+	.key = blake2_ordered_sequence,
+	.digest = (u8[]){ 0x10, 0xeb, 0xb6, 0x77, 0x00, 0xb1, 0x86, 0x8e,
+			  0xfb, 0x44, 0x17, 0x98, 0x7a, 0xcf, 0x46, 0x90,
+			  0xae, 0x9d, 0x97, 0x2f, 0xb7, 0xa5, 0x90, 0xc2,
+			  0xf0, 0x28, 0x71, 0x79, 0x9a, 0xaa, 0x47, 0x86,
+			  0xb5, 0xe9, 0x96, 0xe8, 0xf0, 0xf4, 0xeb, 0x98,
+			  0x1f, 0xc2, 0x14, 0xb0, 0x05, 0xf4, 0x2d, 0x2f,
+			  0xf4, 0x23, 0x34, 0x99, 0x39, 0x16, 0x53, 0xdf,
+			  0x7a, 0xef, 0xcb, 0xc1, 0x3f, 0xc5, 0x15, 0x68, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 1,
+	.digest = (u8[]){ 0xd2, 0x11, 0x31, 0x29, 0x3f, 0xea, 0xca, 0x72,
+			  0x21, 0xe4, 0x06, 0x65, 0x05, 0x2a, 0xd1, 0x02,
+			  0xc0, 0x8d, 0x7b, 0xf1, 0x09, 0x3c, 0xef, 0x88,
+			  0xe1, 0x68, 0x0c, 0xf1, 0x3b, 0xa4, 0xe3, 0x03,
+			  0xed, 0xa0, 0xe3, 0x60, 0x58, 0xa0, 0xdb, 0x52,
+			  0x8a, 0x66, 0x43, 0x09, 0x60, 0x1a, 0xbb, 0x67,
+			  0xc5, 0x84, 0x31, 0x40, 0xfa, 0xde, 0xc1, 0xd0,
+			  0xff, 0x3f, 0x4a, 0x69, 0xd9, 0x92, 0x26, 0x86, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 7,
+	.digest = (u8[]){ 0xa3, 0x3e, 0x50, 0xbc, 0xfb, 0xd9, 0xf0, 0x82,
+			  0xa6, 0xd1, 0xdf, 0xaf, 0x82, 0xd0, 0xcf, 0x84,
+			  0x9a, 0x25, 0x3c, 0xae, 0x6d, 0xb5, 0xaf, 0x01,
+			  0xd7, 0xaf, 0xed, 0x50, 0xdc, 0xe2, 0xba, 0xcc,
+			  0x8c, 0x38, 0xf5, 0x16, 0x89, 0x38, 0x86, 0xce,
+			  0x68, 0x10, 0x63, 0x64, 0xa5, 0x79, 0x53, 0xb5,
+			  0x2e, 0x8e, 0xbc, 0x0a, 0xce, 0x95, 0xc0, 0x1e,
+			  0x69, 0x59, 0x1d, 0x3b, 0xd8, 0x19, 0x90, 0xd7, },
+}, {
+	.ksize = 64,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 64,
+	.digest = (u8[]){ 0x65, 0x67, 0x6d, 0x80, 0x06, 0x17, 0x97, 0x2f,
+			  0xbd, 0x87, 0xe4, 0xb9, 0x51, 0x4e, 0x1c, 0x67,
+			  0x40, 0x2b, 0x7a, 0x33, 0x10, 0x96, 0xd3, 0xbf,
+			  0xac, 0x22, 0xf1, 0xab, 0xb9, 0x53, 0x74, 0xab,
+			  0xc9, 0x42, 0xf1, 0x6e, 0x9a, 0xb0, 0xea, 0xd3,
+			  0x3b, 0x87, 0xc9, 0x19, 0x68, 0xa6, 0xe5, 0x09,
+			  0xe1, 0x19, 0xff, 0x07, 0x78, 0x7b, 0x3e, 0xf4,
+			  0x83, 0xe1, 0xdc, 0xdc, 0xcf, 0x6e, 0x30, 0x22, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 247,
+	.digest = (u8[]){ 0xc2, 0x96, 0x2c, 0x6b, 0x84, 0xff, 0xee, 0xea,
+			  0x9b, 0xb8, 0x55, 0x2d, 0x6b, 0xa5, 0xd5, 0xe5,
+			  0xbd, 0xb1, 0x54, 0xb6, 0x1e, 0xfb, 0x63, 0x16,
+			  0x6e, 0x22, 0x04, 0xf0, 0x82, 0x7a, 0xc6, 0x99,
+			  0xf7, 0x4c, 0xff, 0x93, 0x71, 0x57, 0x64, 0xd0,
+			  0x08, 0x60, 0x39, 0x98, 0xb8, 0xd2, 0x2b, 0x4e,
+			  0x81, 0x8d, 0xe4, 0x8f, 0xb2, 0x1e, 0x8f, 0x99,
+			  0x98, 0xf1, 0x02, 0x9b, 0x4c, 0x7c, 0x97, 0x1a, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 256,
+	.digest = (u8[]){ 0x0f, 0x32, 0x05, 0x09, 0xad, 0x9f, 0x25, 0xf7,
+			  0xf2, 0x00, 0x71, 0xc9, 0x9f, 0x08, 0x58, 0xd1,
+			  0x67, 0xc3, 0xa6, 0x2c, 0x0d, 0xe5, 0x7c, 0x15,
+			  0x35, 0x18, 0x5a, 0x68, 0xc1, 0xca, 0x1c, 0x6e,
+			  0x0f, 0xc4, 0xf6, 0x0c, 0x43, 0xe1, 0xb4, 0x3d,
+			  0x28, 0xe4, 0xc7, 0xa1, 0xcf, 0x6b, 0x17, 0x4e,
+			  0xf1, 0x5b, 0xb5, 0x53, 0xd4, 0xa7, 0xd0, 0x5b,
+			  0xae, 0x15, 0x81, 0x15, 0xd0, 0x88, 0xa0, 0x3c, },
+}};
+
+static const struct hash_testvec blakes2s_128_tv_template[] = {{
+	.digest = (u8[]){ 0x64, 0x55, 0x0d, 0x6f, 0xfe, 0x2c, 0x0a, 0x01,
+			  0xa1, 0x4a, 0xba, 0x1e, 0xad, 0xe0, 0x20, 0x0c, },
+}, {
+	.plaintext = blake2_ordered_sequence,
+	.psize = 64,
+	.digest = (u8[]){ 0xdc, 0x66, 0xca, 0x8f, 0x03, 0x86, 0x58, 0x01,
+			  0xb0, 0xff, 0xe0, 0x6e, 0xd8, 0xa1, 0xa9, 0x0e, },
+}, {
+	.ksize = 16,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 1,
+	.digest = (u8[]){ 0x88, 0x1e, 0x42, 0xe7, 0xbb, 0x35, 0x80, 0x82,
+			  0x63, 0x7c, 0x0a, 0x0f, 0xd7, 0xec, 0x6c, 0x2f, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 7,
+	.digest = (u8[]){ 0xcf, 0x9e, 0x07, 0x2a, 0xd5, 0x22, 0xf2, 0xcd,
+			  0xa2, 0xd8, 0x25, 0x21, 0x80, 0x86, 0x73, 0x1c, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 15,
+	.digest = (u8[]){ 0xf6, 0x33, 0x5a, 0x2c, 0x22, 0xa0, 0x64, 0xb2,
+			  0xb6, 0x3f, 0xeb, 0xbc, 0xd1, 0xc3, 0xe5, 0xb2, },
+}, {
+	.ksize = 16,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 247,
+	.digest = (u8[]){ 0x72, 0x66, 0x49, 0x60, 0xf9, 0x4a, 0xea, 0xbe,
+			  0x1f, 0xf4, 0x60, 0xce, 0xb7, 0x81, 0xcb, 0x09, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 256,
+	.digest = (u8[]){ 0xd5, 0xa4, 0x0e, 0xc3, 0x16, 0xc7, 0x51, 0xa6,
+			  0x3c, 0xd0, 0xd9, 0x11, 0x57, 0xfa, 0x1e, 0xbb, },
+}};
+
+static const struct hash_testvec blakes2s_160_tv_template[] = {{
+	.plaintext = blake2_ordered_sequence,
+	.psize = 7,
+	.digest = (u8[]){ 0xb4, 0xf2, 0x03, 0x49, 0x37, 0xed, 0xb1, 0x3e,
+			  0x5b, 0x2a, 0xca, 0x64, 0x82, 0x74, 0xf6, 0x62,
+			  0xe3, 0xf2, 0x84, 0xff, },
+}, {
+	.plaintext = blake2_ordered_sequence,
+	.psize = 256,
+	.digest = (u8[]){ 0xaa, 0x56, 0x9b, 0xdc, 0x98, 0x17, 0x75, 0xf2,
+			  0xb3, 0x68, 0x83, 0xb7, 0x9b, 0x8d, 0x48, 0xb1,
+			  0x9b, 0x2d, 0x35, 0x05, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.digest = (u8[]){ 0x50, 0x16, 0xe7, 0x0c, 0x01, 0xd0, 0xd3, 0xc3,
+			  0xf4, 0x3e, 0xb1, 0x6e, 0x97, 0xa9, 0x4e, 0xd1,
+			  0x79, 0x65, 0x32, 0x93, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 1,
+	.digest = (u8[]){ 0x1c, 0x2b, 0xcd, 0x9a, 0x68, 0xca, 0x8c, 0x71,
+			  0x90, 0x29, 0x6c, 0x54, 0xfa, 0x56, 0x4a, 0xef,
+			  0xa2, 0x3a, 0x56, 0x9c, },
+}, {
+	.ksize = 16,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 15,
+	.digest = (u8[]){ 0x36, 0xc3, 0x5f, 0x9a, 0xdc, 0x7e, 0xbf, 0x19,
+			  0x68, 0xaa, 0xca, 0xd8, 0x81, 0xbf, 0x09, 0x34,
+			  0x83, 0x39, 0x0f, 0x30, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 64,
+	.digest = (u8[]){ 0x86, 0x80, 0x78, 0xa4, 0x14, 0xec, 0x03, 0xe5,
+			  0xb6, 0x9a, 0x52, 0x0e, 0x42, 0xee, 0x39, 0x9d,
+			  0xac, 0xa6, 0x81, 0x63, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 247,
+	.digest = (u8[]){ 0x2d, 0xd8, 0xd2, 0x53, 0x66, 0xfa, 0xa9, 0x01,
+			  0x1c, 0x9c, 0xaf, 0xa3, 0xe2, 0x9d, 0x9b, 0x10,
+			  0x0a, 0xf6, 0x73, 0xe8, },
+}};
+
+static const struct hash_testvec blakes2s_224_tv_template[] = {{
+	.plaintext = blake2_ordered_sequence,
+	.psize = 1,
+	.digest = (u8[]){ 0x61, 0xb9, 0x4e, 0xc9, 0x46, 0x22, 0xa3, 0x91,
+			  0xd2, 0xae, 0x42, 0xe6, 0x45, 0x6c, 0x90, 0x12,
+			  0xd5, 0x80, 0x07, 0x97, 0xb8, 0x86, 0x5a, 0xfc,
+			  0x48, 0x21, 0x97, 0xbb, },
+}, {
+	.plaintext = blake2_ordered_sequence,
+	.psize = 247,
+	.digest = (u8[]){ 0x9e, 0xda, 0xc7, 0x20, 0x2c, 0xd8, 0x48, 0x2e,
+			  0x31, 0x94, 0xab, 0x46, 0x6d, 0x94, 0xd8, 0xb4,
+			  0x69, 0xcd, 0xae, 0x19, 0x6d, 0x9e, 0x41, 0xcc,
+			  0x2b, 0xa4, 0xd5, 0xf6, },
+}, {
+	.ksize = 16,
+	.key = blake2_ordered_sequence,
+	.digest = (u8[]){ 0x32, 0xc0, 0xac, 0xf4, 0x3b, 0xd3, 0x07, 0x9f,
+			  0xbe, 0xfb, 0xfa, 0x4d, 0x6b, 0x4e, 0x56, 0xb3,
+			  0xaa, 0xd3, 0x27, 0xf6, 0x14, 0xbf, 0xb9, 0x32,
+			  0xa7, 0x19, 0xfc, 0xb8, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 7,
+	.digest = (u8[]){ 0x73, 0xad, 0x5e, 0x6d, 0xb9, 0x02, 0x8e, 0x76,
+			  0xf2, 0x66, 0x42, 0x4b, 0x4c, 0xfa, 0x1f, 0xe6,
+			  0x2e, 0x56, 0x40, 0xe5, 0xa2, 0xb0, 0x3c, 0xe8,
+			  0x7b, 0x45, 0xfe, 0x05, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 15,
+	.digest = (u8[]){ 0x16, 0x60, 0xfb, 0x92, 0x54, 0xb3, 0x6e, 0x36,
+			  0x81, 0xf4, 0x16, 0x41, 0xc3, 0x3d, 0xd3, 0x43,
+			  0x84, 0xed, 0x10, 0x6f, 0x65, 0x80, 0x7a, 0x3e,
+			  0x25, 0xab, 0xc5, 0x02, },
+}, {
+	.ksize = 16,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 64,
+	.digest = (u8[]){ 0xca, 0xaa, 0x39, 0x67, 0x9c, 0xf7, 0x6b, 0xc7,
+			  0xb6, 0x82, 0xca, 0x0e, 0x65, 0x36, 0x5b, 0x7c,
+			  0x24, 0x00, 0xfa, 0x5f, 0xda, 0x06, 0x91, 0x93,
+			  0x6a, 0x31, 0x83, 0xb5, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 256,
+	.digest = (u8[]){ 0x90, 0x02, 0x26, 0xb5, 0x06, 0x9c, 0x36, 0x86,
+			  0x94, 0x91, 0x90, 0x1e, 0x7d, 0x2a, 0x71, 0xb2,
+			  0x48, 0xb5, 0xe8, 0x16, 0xfd, 0x64, 0x33, 0x45,
+			  0xb3, 0xd7, 0xec, 0xcc, },
+}};
+
+static const struct hash_testvec blakes2s_256_tv_template[] = {{
+	.plaintext = blake2_ordered_sequence,
+	.psize = 15,
+	.digest = (u8[]){ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21,
+			  0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67,
+			  0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04,
+			  0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.digest = (u8[]){ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b,
+			  0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b,
+			  0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a,
+			  0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 1,
+	.digest = (u8[]){ 0x22, 0x27, 0xae, 0xaa, 0x6e, 0x81, 0x56, 0x03,
+			  0xa7, 0xe3, 0xa1, 0x18, 0xa5, 0x9a, 0x2c, 0x18,
+			  0xf4, 0x63, 0xbc, 0x16, 0x70, 0xf1, 0xe7, 0x4b,
+			  0x00, 0x6d, 0x66, 0x16, 0xae, 0x9e, 0x74, 0x4e, },
+}, {
+	.ksize = 16,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 7,
+	.digest = (u8[]){ 0x58, 0x5d, 0xa8, 0x60, 0x1c, 0xa4, 0xd8, 0x03,
+			  0x86, 0x86, 0x84, 0x64, 0xd7, 0xa0, 0x8e, 0x15,
+			  0x2f, 0x05, 0xa2, 0x1b, 0xbc, 0xef, 0x7a, 0x34,
+			  0xb3, 0xc5, 0xbc, 0x4b, 0xf0, 0x32, 0xeb, 0x12, },
+}, {
+	.ksize = 32,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 64,
+	.digest = (u8[]){ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66,
+			  0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26,
+			  0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab,
+			  0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4, },
+}, {
+	.ksize = 1,
+	.key = "B",
+	.plaintext = blake2_ordered_sequence,
+	.psize = 247,
+	.digest = (u8[]){ 0x2e, 0x74, 0x1c, 0x1d, 0x03, 0xf4, 0x9d, 0x84,
+			  0x6f, 0xfc, 0x86, 0x32, 0x92, 0x49, 0x7e, 0x66,
+			  0xd7, 0xc3, 0x10, 0x88, 0xfe, 0x28, 0xb3, 0xe0,
+			  0xbf, 0x50, 0x75, 0xad, 0x8e, 0xa4, 0xe6, 0xb2, },
+}, {
+	.ksize = 16,
+	.key = blake2_ordered_sequence,
+	.plaintext = blake2_ordered_sequence,
+	.psize = 256,
+	.digest = (u8[]){ 0xb9, 0xd2, 0x81, 0x0e, 0x3a, 0xb1, 0x62, 0x9b,
+			  0xad, 0x44, 0x05, 0xf4, 0x92, 0x2e, 0x99, 0xc1,
+			  0x4a, 0x47, 0xbb, 0x5b, 0x6f, 0xb2, 0x96, 0xed,
+			  0xd5, 0x06, 0xb5, 0x3a, 0x7c, 0x7a, 0x65, 0x1d, },
+}};
+
 #endif	/* _CRYPTO_TESTMGR_H */
diff --git a/crypto/tgr192.c b/crypto/tgr192.c
index 052648e24909..aa29c529b44e 100644
--- a/crypto/tgr192.c
+++ b/crypto/tgr192.c
@@ -555,7 +555,7 @@ static int tgr192_final(struct shash_desc *desc, u8 * out)
 	__le32 *le32p;
 	u32 t, msb, lsb;
 
-	tgr192_update(desc, NULL, 0); /* flush */ ;
+	tgr192_update(desc, NULL, 0); /* flush */
 
 	msb = 0;
 	t = tctx->nblocks;
@@ -583,7 +583,7 @@ static int tgr192_final(struct shash_desc *desc, u8 * out)
 		while (tctx->count < 64) {
 			tctx->hash[tctx->count++] = 0;
 		}
-		tgr192_update(desc, NULL, 0); /* flush */ ;
+		tgr192_update(desc, NULL, 0); /* flush */
 		memset(tctx->hash, 0, 56);    /* fill next block with zeroes */
 	}
 	/* append the 64 bit count */
diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c
index 222fc765c57a..d23fa531b91f 100644
--- a/crypto/twofish_common.c
+++ b/crypto/twofish_common.c
@@ -567,7 +567,7 @@ static const u8 calc_sb_tbl[512] = {
 
 /* Perform the key setup. */
 int __twofish_setkey(struct twofish_ctx *ctx, const u8 *key,
-		     unsigned int key_len, u32 *flags)
+		     unsigned int key_len)
 {
 	int i, j, k;
 
@@ -584,10 +584,7 @@ int __twofish_setkey(struct twofish_ctx *ctx, const u8 *key,
 
 	/* Check key length. */
 	if (key_len % 8)
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL; /* unsupported key length */
-	}
 
 	/* Compute the first two words of the S vector.  The magic numbers are
 	 * the entries of the RS matrix, preprocessed through poly_to_exp. The
@@ -688,8 +685,7 @@ EXPORT_SYMBOL_GPL(__twofish_setkey);
 
 int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len)
 {
-	return __twofish_setkey(crypto_tfm_ctx(tfm), key, key_len,
-				&tfm->crt_flags);
+	return __twofish_setkey(crypto_tfm_ctx(tfm), key, key_len);
 }
 EXPORT_SYMBOL_GPL(twofish_setkey);
 
diff --git a/crypto/vmac.c b/crypto/vmac.c
index f50a85060b39..2d906830df96 100644
--- a/crypto/vmac.c
+++ b/crypto/vmac.c
@@ -435,10 +435,8 @@ static int vmac_setkey(struct crypto_shash *tfm,
 	unsigned int i;
 	int err;
 
-	if (keylen != VMAC_KEY_LEN) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != VMAC_KEY_LEN)
 		return -EINVAL;
-	}
 
 	err = crypto_cipher_setkey(tctx->cipher, key, keylen);
 	if (err)
@@ -598,7 +596,7 @@ static int vmac_final(struct shash_desc *desc, u8 *out)
 static int vmac_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct crypto_cipher_spawn *spawn = crypto_instance_ctx(inst);
 	struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
 	struct crypto_cipher *cipher;
 
@@ -620,6 +618,7 @@ static void vmac_exit_tfm(struct crypto_tfm *tfm)
 static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct shash_instance *inst;
+	struct crypto_cipher_spawn *spawn;
 	struct crypto_alg *alg;
 	int err;
 
@@ -627,25 +626,24 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (err)
 		return err;
 
-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
-			CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+	spawn = shash_instance_ctx(inst);
+
+	err = crypto_grab_cipher(spawn, shash_crypto_instance(inst),
+				 crypto_attr_alg_name(tb[1]), 0, 0);
+	if (err)
+		goto err_free_inst;
+	alg = crypto_spawn_cipher_alg(spawn);
 
 	err = -EINVAL;
 	if (alg->cra_blocksize != VMAC_NONCEBYTES)
-		goto out_put_alg;
+		goto err_free_inst;
 
-	inst = shash_alloc_instance(tmpl->name, alg);
-	err = PTR_ERR(inst);
-	if (IS_ERR(inst))
-		goto out_put_alg;
-
-	err = crypto_init_spawn(shash_instance_ctx(inst), alg,
-			shash_crypto_instance(inst),
-			CRYPTO_ALG_TYPE_MASK);
+	err = crypto_inst_setname(shash_crypto_instance(inst), tmpl->name, alg);
 	if (err)
-		goto out_free_inst;
+		goto err_free_inst;
 
 	inst->alg.base.cra_priority = alg->cra_priority;
 	inst->alg.base.cra_blocksize = alg->cra_blocksize;
@@ -662,21 +660,19 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.final = vmac_final;
 	inst->alg.setkey = vmac_setkey;
 
+	inst->free = shash_free_singlespawn_instance;
+
 	err = shash_register_instance(tmpl, inst);
 	if (err) {
-out_free_inst:
-		shash_free_instance(shash_crypto_instance(inst));
+err_free_inst:
+		shash_free_singlespawn_instance(inst);
 	}
-
-out_put_alg:
-	crypto_mod_put(alg);
 	return err;
 }
 
 static struct crypto_template vmac64_tmpl = {
 	.name = "vmac64",
 	.create = vmac_create,
-	.free = shash_free_instance,
 	.module = THIS_MODULE,
 };
 
diff --git a/crypto/xcbc.c b/crypto/xcbc.c
index 0bb26e8f6f5a..598ec88abf0f 100644
--- a/crypto/xcbc.c
+++ b/crypto/xcbc.c
@@ -167,7 +167,7 @@ static int xcbc_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_cipher *cipher;
 	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct crypto_cipher_spawn *spawn = crypto_instance_ctx(inst);
 	struct xcbc_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	cipher = crypto_spawn_cipher(spawn);
@@ -188,6 +188,7 @@ static void xcbc_exit_tfm(struct crypto_tfm *tfm)
 static int xcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct shash_instance *inst;
+	struct crypto_cipher_spawn *spawn;
 	struct crypto_alg *alg;
 	unsigned long alignmask;
 	int err;
@@ -196,28 +197,24 @@ static int xcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (err)
 		return err;
 
-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
-				  CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+	spawn = shash_instance_ctx(inst);
 
-	switch(alg->cra_blocksize) {
-	case XCBC_BLOCKSIZE:
-		break;
-	default:
-		goto out_put_alg;
-	}
+	err = crypto_grab_cipher(spawn, shash_crypto_instance(inst),
+				 crypto_attr_alg_name(tb[1]), 0, 0);
+	if (err)
+		goto err_free_inst;
+	alg = crypto_spawn_cipher_alg(spawn);
 
-	inst = shash_alloc_instance("xcbc", alg);
-	err = PTR_ERR(inst);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	err = -EINVAL;
+	if (alg->cra_blocksize != XCBC_BLOCKSIZE)
+		goto err_free_inst;
 
-	err = crypto_init_spawn(shash_instance_ctx(inst), alg,
-				shash_crypto_instance(inst),
-				CRYPTO_ALG_TYPE_MASK);
+	err = crypto_inst_setname(shash_crypto_instance(inst), tmpl->name, alg);
 	if (err)
-		goto out_free_inst;
+		goto err_free_inst;
 
 	alignmask = alg->cra_alignmask | 3;
 	inst->alg.base.cra_alignmask = alignmask;
@@ -242,21 +239,19 @@ static int xcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.final = crypto_xcbc_digest_final;
 	inst->alg.setkey = crypto_xcbc_digest_setkey;
 
+	inst->free = shash_free_singlespawn_instance;
+
 	err = shash_register_instance(tmpl, inst);
 	if (err) {
-out_free_inst:
-		shash_free_instance(shash_crypto_instance(inst));
+err_free_inst:
+		shash_free_singlespawn_instance(inst);
 	}
-
-out_put_alg:
-	crypto_mod_put(alg);
 	return err;
 }
 
 static struct crypto_template crypto_xcbc_tmpl = {
 	.name = "xcbc",
 	.create = xcbc_create,
-	.free = shash_free_instance,
 	.module = THIS_MODULE,
 };
 
diff --git a/crypto/xts.c b/crypto/xts.c
index ab117633d64e..29efa15f1495 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -61,8 +61,6 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
 	crypto_cipher_set_flags(tweak, crypto_skcipher_get_flags(parent) &
 				       CRYPTO_TFM_REQ_MASK);
 	err = crypto_cipher_setkey(tweak, key + keylen, keylen);
-	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(tweak) &
-					  CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
 
@@ -71,11 +69,7 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
 	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
 					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_skcipher_setkey(child, key, keylen);
-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
-					  CRYPTO_TFM_RES_MASK);
-
-	return err;
+	return crypto_skcipher_setkey(child, key, keylen);
 }
 
 /*
@@ -361,20 +355,21 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	ctx = skcipher_instance_ctx(inst);
 
-	crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst));
-
 	mask = crypto_requires_off(algt->type, algt->mask,
 				   CRYPTO_ALG_NEED_FALLBACK |
 				   CRYPTO_ALG_ASYNC);
 
-	err = crypto_grab_skcipher(&ctx->spawn, cipher_name, 0, mask);
+	err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst),
+				   cipher_name, 0, mask);
 	if (err == -ENOENT) {
 		err = -ENAMETOOLONG;
 		if (snprintf(ctx->name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
 			     cipher_name) >= CRYPTO_MAX_ALG_NAME)
 			goto err_free_inst;
 
-		err = crypto_grab_skcipher(&ctx->spawn, ctx->name, 0, mask);
+		err = crypto_grab_skcipher(&ctx->spawn,
+					   skcipher_crypto_instance(inst),
+					   ctx->name, 0, mask);
 	}
 
 	if (err)
diff --git a/crypto/xxhash_generic.c b/crypto/xxhash_generic.c
index 4aad2c0f40a9..55d1c8a76127 100644
--- a/crypto/xxhash_generic.c
+++ b/crypto/xxhash_generic.c
@@ -22,10 +22,8 @@ static int xxhash64_setkey(struct crypto_shash *tfm, const u8 *key,
 {
 	struct xxhash64_tfm_ctx *tctx = crypto_shash_ctx(tfm);
 
-	if (keylen != sizeof(tctx->seed)) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != sizeof(tctx->seed))
 		return -EINVAL;
-	}
 	tctx->seed = get_unaligned_le64(key);
 	return 0;
 }
diff --git a/drivers/Makefile b/drivers/Makefile
index aaef17cc6512..31cf17dee252 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -171,7 +171,7 @@ obj-$(CONFIG_POWERCAP)		+= powercap/
 obj-$(CONFIG_MCB)		+= mcb/
 obj-$(CONFIG_PERF_EVENTS)	+= perf/
 obj-$(CONFIG_RAS)		+= ras/
-obj-$(CONFIG_THUNDERBOLT)	+= thunderbolt/
+obj-$(CONFIG_USB4)		+= thunderbolt/
 obj-$(CONFIG_CORESIGHT)		+= hwtracing/coresight/
 obj-y				+= hwtracing/intel_th/
 obj-$(CONFIG_STM)		+= hwtracing/stm/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index ebe1e9e5fd81..cc57bab146b5 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -104,9 +104,9 @@ config ACPI_PROCFS_POWER
 	depends on X86 && PROC_FS
 	help
 	  For backwards compatibility, this option allows
-          deprecated power /proc/acpi/ directories to exist, even when
-          they have been replaced by functions in /sys.
-          The deprecated directories (and their replacements) include:
+	  deprecated power /proc/acpi/ directories to exist, even when
+	  they have been replaced by functions in /sys.
+	  The deprecated directories (and their replacements) include:
 	  /proc/acpi/battery/* (/sys/class/power_supply/*) and
 	  /proc/acpi/ac_adapter/* (sys/class/power_supply/*).
 	  This option has no effect on /proc/acpi/ directories
@@ -241,6 +241,7 @@ config ACPI_CPU_FREQ_PSS
 
 config ACPI_PROCESSOR_CSTATE
 	def_bool y
+	depends on ACPI_PROCESSOR
 	depends on IA64 || X86
 
 config ACPI_PROCESSOR_IDLE
@@ -319,12 +320,6 @@ config ACPI_THERMAL
 	  To compile this driver as a module, choose M here:
 	  the module will be called thermal.
 
-config ACPI_NUMA
-	bool "NUMA support"
-	depends on NUMA
-	depends on (X86 || IA64 || ARM64)
-	default y if IA64 || ARM64
-
 config ACPI_CUSTOM_DSDT_FILE
 	string "Custom DSDT Table file to include"
 	default ""
@@ -454,7 +449,7 @@ config ACPI_CUSTOM_METHOD
 config ACPI_BGRT
 	bool "Boottime Graphics Resource Table support"
 	depends on EFI && (X86 || ARM64)
-        help
+	help
 	  This driver adds support for exposing the ACPI Boottime Graphics
 	  Resource Table, which allows the operating system to obtain
 	  data from the firmware boot splash. It will appear under
@@ -473,8 +468,7 @@ config ACPI_REDUCED_HARDWARE_ONLY
 	  If you are unsure what to do, do not enable this option.
 
 source "drivers/acpi/nfit/Kconfig"
-source "drivers/acpi/hmat/Kconfig"
-
+source "drivers/acpi/numa/Kconfig"
 source "drivers/acpi/apei/Kconfig"
 source "drivers/acpi/dptf/Kconfig"
 
@@ -513,11 +507,19 @@ menuconfig PMIC_OPREGION
 	  PMIC chip.
 
 if PMIC_OPREGION
-config CRC_PMIC_OPREGION
-	bool "ACPI operation region support for CrystalCove PMIC"
+config BYTCRC_PMIC_OPREGION
+	bool "ACPI operation region support for Bay Trail Crystal Cove PMIC"
+	depends on INTEL_SOC_PMIC
+	help
+	  This config adds ACPI operation region support for the Bay Trail
+	  version of the Crystal Cove PMIC.
+
+config CHTCRC_PMIC_OPREGION
+	bool "ACPI operation region support for Cherry Trail Crystal Cove PMIC"
 	depends on INTEL_SOC_PMIC
 	help
-	  This config adds ACPI operation region support for CrystalCove PMIC.
+	  This config adds ACPI operation region support for the Cherry Trail
+	  version of the Crystal Cove PMIC.
 
 config XPOWER_PMIC_OPREGION
 	bool "ACPI operation region support for XPower AXP288 PMIC"
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 5d361e4e3405..33fdaf67454e 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -48,14 +48,13 @@ acpi-y				+= acpi_pnp.o
 acpi-$(CONFIG_ARM_AMBA)	+= acpi_amba.o
 acpi-y				+= power.o
 acpi-y				+= event.o
-acpi-$(CONFIG_ACPI_REDUCED_HARDWARE_ONLY) += evged.o
+acpi-y				+= evged.o
 acpi-y				+= sysfs.o
 acpi-y				+= property.o
 acpi-$(CONFIG_X86)		+= acpi_cmos_rtc.o
 acpi-$(CONFIG_X86)		+= x86/apple.o
 acpi-$(CONFIG_X86)		+= x86/utils.o
 acpi-$(CONFIG_DEBUG_FS)		+= debugfs.o
-acpi-$(CONFIG_ACPI_NUMA)	+= numa.o
 acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o
 acpi-y				+= acpi_lpat.o
 acpi-$(CONFIG_ACPI_LPIT)	+= acpi_lpit.o
@@ -80,7 +79,7 @@ obj-$(CONFIG_ACPI_PROCESSOR)	+= processor.o
 obj-$(CONFIG_ACPI)		+= container.o
 obj-$(CONFIG_ACPI_THERMAL)	+= thermal.o
 obj-$(CONFIG_ACPI_NFIT)		+= nfit/
-obj-$(CONFIG_ACPI_HMAT)		+= hmat/
+obj-$(CONFIG_ACPI_NUMA)		+= numa/
 obj-$(CONFIG_ACPI)		+= acpi_memhotplug.o
 obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
 obj-$(CONFIG_ACPI_BATTERY)	+= battery.o
@@ -109,7 +108,8 @@ obj-$(CONFIG_ACPI_APEI)		+= apei/
 obj-$(CONFIG_ACPI_EXTLOG)	+= acpi_extlog.o
 
 obj-$(CONFIG_PMIC_OPREGION)	+= pmic/intel_pmic.o
-obj-$(CONFIG_CRC_PMIC_OPREGION) += pmic/intel_pmic_crc.o
+obj-$(CONFIG_BYTCRC_PMIC_OPREGION) += pmic/intel_pmic_bytcrc.o
+obj-$(CONFIG_CHTCRC_PMIC_OPREGION) += pmic/intel_pmic_chtcrc.o
 obj-$(CONFIG_XPOWER_PMIC_OPREGION) += pmic/intel_pmic_xpower.o
 obj-$(CONFIG_BXT_WC_PMIC_OPREGION) += pmic/intel_pmic_bxtwc.o
 obj-$(CONFIG_CHT_WC_PMIC_OPREGION) += pmic/intel_pmic_chtwc.o
diff --git a/drivers/acpi/acpi_configfs.c b/drivers/acpi/acpi_configfs.c
index 57d9d574d4dd..ece8c1a921cc 100644
--- a/drivers/acpi/acpi_configfs.c
+++ b/drivers/acpi/acpi_configfs.c
@@ -53,7 +53,7 @@ static ssize_t acpi_table_aml_write(struct config_item *cfg,
 	if (!table->header)
 		return -ENOMEM;
 
-	ret = acpi_load_table(table->header);
+	ret = acpi_load_table(table->header, &table->index);
 	if (ret) {
 		kfree(table->header);
 		table->header = NULL;
@@ -223,7 +223,7 @@ static void acpi_table_drop_item(struct config_group *group,
 	struct acpi_table *table = container_of(cfg, struct acpi_table, cfg);
 
 	ACPI_INFO(("Host-directed Dynamic ACPI Table Unload"));
-	acpi_tb_unload_table(table->index);
+	acpi_unload_table(table->index);
 }
 
 static struct configfs_group_operations acpi_table_group_ops = {
diff --git a/drivers/acpi/acpi_lpit.c b/drivers/acpi/acpi_lpit.c
index 433376e819bb..953437a216f6 100644
--- a/drivers/acpi/acpi_lpit.c
+++ b/drivers/acpi/acpi_lpit.c
@@ -104,7 +104,7 @@ static void lpit_update_residency(struct lpit_residency_info *info,
 
 	info->gaddr = lpit_native->residency_counter;
 	if (info->gaddr.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
-		info->iomem_addr = ioremap_nocache(info->gaddr.address,
+		info->iomem_addr = ioremap(info->gaddr.address,
 						   info->gaddr.bit_width / 8);
 		if (!info->iomem_addr)
 			return;
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 60bbc5090abe..db18df6cb330 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -10,6 +10,7 @@
 #include <linux/acpi.h>
 #include <linux/clkdev.h>
 #include <linux/clk-provider.h>
+#include <linux/dmi.h>
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/mutex.h>
@@ -68,10 +69,6 @@ ACPI_MODULE_NAME("acpi_lpss");
 #define LPSS_SAVE_CTX			BIT(4)
 #define LPSS_NO_D3_DELAY		BIT(5)
 
-/* Crystal Cove PMIC shares same ACPI ID between different platforms */
-#define BYT_CRC_HRV			2
-#define CHT_CRC_HRV			3
-
 struct lpss_private_data;
 
 struct lpss_device_desc {
@@ -157,7 +154,7 @@ static void lpss_deassert_reset(struct lpss_private_data *pdata)
  */
 static struct pwm_lookup byt_pwm_lookup[] = {
 	PWM_LOOKUP_WITH_MODULE("80860F09:00", 0, "0000:00:02.0",
-			       "pwm_backlight", 0, PWM_POLARITY_NORMAL,
+			       "pwm_soc_backlight", 0, PWM_POLARITY_NORMAL,
 			       "pwm-lpss-platform"),
 };
 
@@ -169,8 +166,7 @@ static void byt_pwm_setup(struct lpss_private_data *pdata)
 	if (!adev->pnp.unique_id || strcmp(adev->pnp.unique_id, "1"))
 		return;
 
-	if (!acpi_dev_present("INT33FD", NULL, BYT_CRC_HRV))
-		pwm_add_table(byt_pwm_lookup, ARRAY_SIZE(byt_pwm_lookup));
+	pwm_add_table(byt_pwm_lookup, ARRAY_SIZE(byt_pwm_lookup));
 }
 
 #define LPSS_I2C_ENABLE			0x6c
@@ -203,7 +199,7 @@ static void byt_i2c_setup(struct lpss_private_data *pdata)
 /* BSW PWM used for backlight control by the i915 driver */
 static struct pwm_lookup bsw_pwm_lookup[] = {
 	PWM_LOOKUP_WITH_MODULE("80862288:00", 0, "0000:00:02.0",
-			       "pwm_backlight", 0, PWM_POLARITY_NORMAL,
+			       "pwm_soc_backlight", 0, PWM_POLARITY_NORMAL,
 			       "pwm-lpss-platform"),
 };
 
@@ -463,6 +459,18 @@ struct lpss_device_links {
 	const char *consumer_hid;
 	const char *consumer_uid;
 	u32 flags;
+	const struct dmi_system_id *dep_missing_ids;
+};
+
+/* Please keep this list sorted alphabetically by vendor and model */
+static const struct dmi_system_id i2c1_dep_missing_dmi_ids[] = {
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "T200TA"),
+		},
+	},
+	{}
 };
 
 /*
@@ -473,36 +481,29 @@ struct lpss_device_links {
  * the supplier is not enumerated until after the consumer is probed.
  */
 static const struct lpss_device_links lpss_device_links[] = {
+	/* CHT External sdcard slot controller depends on PMIC I2C ctrl */
 	{"808622C1", "7", "80860F14", "3", DL_FLAG_PM_RUNTIME},
+	/* CHT iGPU depends on PMIC I2C controller */
 	{"808622C1", "7", "LNXVIDEO", NULL, DL_FLAG_PM_RUNTIME},
+	/* BYT iGPU depends on the Embedded Controller I2C controller (UID 1) */
+	{"80860F41", "1", "LNXVIDEO", NULL, DL_FLAG_PM_RUNTIME,
+	 i2c1_dep_missing_dmi_ids},
+	/* BYT CR iGPU depends on PMIC I2C controller (UID 5 on CR) */
 	{"80860F41", "5", "LNXVIDEO", NULL, DL_FLAG_PM_RUNTIME},
+	/* BYT iGPU depends on PMIC I2C controller (UID 7 on non CR) */
+	{"80860F41", "7", "LNXVIDEO", NULL, DL_FLAG_PM_RUNTIME},
 };
 
-static bool hid_uid_match(struct acpi_device *adev,
-			  const char *hid2, const char *uid2)
-{
-	const char *hid1 = acpi_device_hid(adev);
-	const char *uid1 = acpi_device_uid(adev);
-
-	if (strcmp(hid1, hid2))
-		return false;
-
-	if (!uid2)
-		return true;
-
-	return uid1 && !strcmp(uid1, uid2);
-}
-
 static bool acpi_lpss_is_supplier(struct acpi_device *adev,
 				  const struct lpss_device_links *link)
 {
-	return hid_uid_match(adev, link->supplier_hid, link->supplier_uid);
+	return acpi_dev_hid_uid_match(adev, link->supplier_hid, link->supplier_uid);
 }
 
 static bool acpi_lpss_is_consumer(struct acpi_device *adev,
 				  const struct lpss_device_links *link)
 {
-	return hid_uid_match(adev, link->consumer_hid, link->consumer_uid);
+	return acpi_dev_hid_uid_match(adev, link->consumer_hid, link->consumer_uid);
 }
 
 struct hid_uid {
@@ -518,7 +519,7 @@ static int match_hid_uid(struct device *dev, const void *data)
 	if (!adev)
 		return 0;
 
-	return hid_uid_match(adev, id->hid, id->uid);
+	return acpi_dev_hid_uid_match(adev, id->hid, id->uid);
 }
 
 static struct device *acpi_lpss_find_device(const char *hid, const char *uid)
@@ -570,7 +571,8 @@ static void acpi_lpss_link_consumer(struct device *dev1,
 	if (!dev2)
 		return;
 
-	if (acpi_lpss_dep(ACPI_COMPANION(dev2), ACPI_HANDLE(dev1)))
+	if ((link->dep_missing_ids && dmi_check_system(link->dep_missing_ids))
+	    || acpi_lpss_dep(ACPI_COMPANION(dev2), ACPI_HANDLE(dev1)))
 		device_link_add(dev2, dev1, link->flags);
 
 	put_device(dev2);
@@ -585,7 +587,8 @@ static void acpi_lpss_link_supplier(struct device *dev1,
 	if (!dev2)
 		return;
 
-	if (acpi_lpss_dep(ACPI_COMPANION(dev1), ACPI_HANDLE(dev2)))
+	if ((link->dep_missing_ids && dmi_check_system(link->dep_missing_ids))
+	    || acpi_lpss_dep(ACPI_COMPANION(dev1), ACPI_HANDLE(dev2)))
 		device_link_add(dev1, dev2, link->flags);
 
 	put_device(dev2);
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 00ec4f2bf015..c05050f474cd 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -31,6 +31,44 @@ static const struct acpi_device_id forbidden_id_list[] = {
 	{"", 0},
 };
 
+static struct platform_device *acpi_platform_device_find_by_companion(struct acpi_device *adev)
+{
+	struct device *dev;
+
+	dev = bus_find_device_by_acpi_dev(&platform_bus_type, adev);
+	return dev ? to_platform_device(dev) : NULL;
+}
+
+static int acpi_platform_device_remove_notify(struct notifier_block *nb,
+					      unsigned long value, void *arg)
+{
+	struct acpi_device *adev = arg;
+	struct platform_device *pdev;
+
+	switch (value) {
+	case ACPI_RECONFIG_DEVICE_ADD:
+		/* Nothing to do here */
+		break;
+	case ACPI_RECONFIG_DEVICE_REMOVE:
+		if (!acpi_device_enumerated(adev))
+			break;
+
+		pdev = acpi_platform_device_find_by_companion(adev);
+		if (!pdev)
+			break;
+
+		platform_device_unregister(pdev);
+		put_device(&pdev->dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block acpi_platform_notifier = {
+	.notifier_call = acpi_platform_device_remove_notify,
+};
+
 static void acpi_platform_fill_resource(struct acpi_device *adev,
 	const struct resource *src, struct resource *dest)
 {
@@ -130,3 +168,8 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev,
 	return pdev;
 }
 EXPORT_SYMBOL_GPL(acpi_create_platform_device);
+
+void __init acpi_platform_init(void)
+{
+	acpi_reconfig_notifier_register(&acpi_platform_notifier);
+}
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 2c4dda0787e8..5379bc3f275d 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -705,3 +705,185 @@ void __init acpi_processor_init(void)
 	acpi_scan_add_handler_with_hotplug(&processor_handler, "processor");
 	acpi_scan_add_handler(&processor_container_handler);
 }
+
+#ifdef CONFIG_ACPI_PROCESSOR_CSTATE
+/**
+ * acpi_processor_claim_cst_control - Request _CST control from the platform.
+ */
+bool acpi_processor_claim_cst_control(void)
+{
+	static bool cst_control_claimed;
+	acpi_status status;
+
+	if (!acpi_gbl_FADT.cst_control || cst_control_claimed)
+		return true;
+
+	status = acpi_os_write_port(acpi_gbl_FADT.smi_command,
+				    acpi_gbl_FADT.cst_control, 8);
+	if (ACPI_FAILURE(status)) {
+		pr_warn("ACPI: Failed to claim processor _CST control\n");
+		return false;
+	}
+
+	cst_control_claimed = true;
+	return true;
+}
+EXPORT_SYMBOL_GPL(acpi_processor_claim_cst_control);
+
+/**
+ * acpi_processor_evaluate_cst - Evaluate the processor _CST control method.
+ * @handle: ACPI handle of the processor object containing the _CST.
+ * @cpu: The numeric ID of the target CPU.
+ * @info: Object write the C-states information into.
+ *
+ * Extract the C-state information for the given CPU from the output of the _CST
+ * control method under the corresponding ACPI processor object (or processor
+ * device object) and populate @info with it.
+ *
+ * If any ACPI_ADR_SPACE_FIXED_HARDWARE C-states are found, invoke
+ * acpi_processor_ffh_cstate_probe() to verify them and update the
+ * cpu_cstate_entry data for @cpu.
+ */
+int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu,
+				struct acpi_processor_power *info)
+{
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *cst;
+	acpi_status status;
+	u64 count;
+	int last_index = 0;
+	int i, ret = 0;
+
+	status = acpi_evaluate_object(handle, "_CST", NULL, &buffer);
+	if (ACPI_FAILURE(status)) {
+		acpi_handle_debug(handle, "No _CST\n");
+		return -ENODEV;
+	}
+
+	cst = buffer.pointer;
+
+	/* There must be at least 2 elements. */
+	if (!cst || cst->type != ACPI_TYPE_PACKAGE || cst->package.count < 2) {
+		acpi_handle_warn(handle, "Invalid _CST output\n");
+		ret = -EFAULT;
+		goto end;
+	}
+
+	count = cst->package.elements[0].integer.value;
+
+	/* Validate the number of C-states. */
+	if (count < 1 || count != cst->package.count - 1) {
+		acpi_handle_warn(handle, "Inconsistent _CST data\n");
+		ret = -EFAULT;
+		goto end;
+	}
+
+	for (i = 1; i <= count; i++) {
+		union acpi_object *element;
+		union acpi_object *obj;
+		struct acpi_power_register *reg;
+		struct acpi_processor_cx cx;
+
+		/*
+		 * If there is not enough space for all C-states, skip the
+		 * excess ones and log a warning.
+		 */
+		if (last_index >= ACPI_PROCESSOR_MAX_POWER - 1) {
+			acpi_handle_warn(handle,
+					 "No room for more idle states (limit: %d)\n",
+					 ACPI_PROCESSOR_MAX_POWER - 1);
+			break;
+		}
+
+		memset(&cx, 0, sizeof(cx));
+
+		element = &cst->package.elements[i];
+		if (element->type != ACPI_TYPE_PACKAGE)
+			continue;
+
+		if (element->package.count != 4)
+			continue;
+
+		obj = &element->package.elements[0];
+
+		if (obj->type != ACPI_TYPE_BUFFER)
+			continue;
+
+		reg = (struct acpi_power_register *)obj->buffer.pointer;
+
+		obj = &element->package.elements[1];
+		if (obj->type != ACPI_TYPE_INTEGER)
+			continue;
+
+		cx.type = obj->integer.value;
+		/*
+		 * There are known cases in which the _CST output does not
+		 * contain C1, so if the type of the first state found is not
+		 * C1, leave an empty slot for C1 to be filled in later.
+		 */
+		if (i == 1 && cx.type != ACPI_STATE_C1)
+			last_index = 1;
+
+		cx.address = reg->address;
+		cx.index = last_index + 1;
+
+		if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
+			if (!acpi_processor_ffh_cstate_probe(cpu, &cx, reg)) {
+				/*
+				 * In the majority of cases _CST describes C1 as
+				 * a FIXED_HARDWARE C-state, but if the command
+				 * line forbids using MWAIT, use CSTATE_HALT for
+				 * C1 regardless.
+				 */
+				if (cx.type == ACPI_STATE_C1 &&
+				    boot_option_idle_override == IDLE_NOMWAIT) {
+					cx.entry_method = ACPI_CSTATE_HALT;
+					snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
+				} else {
+					cx.entry_method = ACPI_CSTATE_FFH;
+				}
+			} else if (cx.type == ACPI_STATE_C1) {
+				/*
+				 * In the special case of C1, FIXED_HARDWARE can
+				 * be handled by executing the HLT instruction.
+				 */
+				cx.entry_method = ACPI_CSTATE_HALT;
+				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
+			} else {
+				continue;
+			}
+		} else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
+			cx.entry_method = ACPI_CSTATE_SYSTEMIO;
+			snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x",
+				 cx.address);
+		} else {
+			continue;
+		}
+
+		if (cx.type == ACPI_STATE_C1)
+			cx.valid = 1;
+
+		obj = &element->package.elements[2];
+		if (obj->type != ACPI_TYPE_INTEGER)
+			continue;
+
+		cx.latency = obj->integer.value;
+
+		obj = &element->package.elements[3];
+		if (obj->type != ACPI_TYPE_INTEGER)
+			continue;
+
+		memcpy(&info->states[++last_index], &cx, sizeof(cx));
+	}
+
+	acpi_handle_info(handle, "Found %d idle states\n", last_index);
+
+	info->count = last_index;
+
+      end:
+	kfree(buffer.pointer);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(acpi_processor_evaluate_cst);
+#endif /* CONFIG_ACPI_PROCESSOR_CSTATE */
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 4f325e47519f..15c5b272e698 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -699,9 +699,13 @@ acpi_video_device_EDID(struct acpi_video_device *device,
  *			event notify code.
  *	lcd_flag	:
  *		0.	The system BIOS should automatically control the brightness level
- *			of the LCD when the power changes from AC to DC
+ *			of the LCD when:
+ *			- the power changes from AC to DC (ACPI appendix B)
+ *			- a brightness hotkey gets pressed (implied by Win7/8 backlight docs)
  *		1.	The system BIOS should NOT automatically control the brightness
- *			level of the LCD when the power changes from AC to DC.
+ *			level of the LCD when:
+ *			- the power changes from AC to DC (ACPI appendix B)
+ *			- a brightness hotkey gets pressed (implied by Win7/8 backlight docs)
  *  Return Value:
  *		-EINVAL	wrong arg.
  */
@@ -2183,7 +2187,7 @@ int acpi_video_register(void)
 	if (register_count) {
 		/*
 		 * if the function of acpi_video_register is already called,
-		 * don't register the acpi_vide_bus again and return no error.
+		 * don't register the acpi_video_bus again and return no error.
 		 */
 		goto leave;
 	}
diff --git a/drivers/acpi/acpica/acapps.h b/drivers/acpi/acpica/acapps.h
index 863ade9add6d..173447d50acf 100644
--- a/drivers/acpi/acpica/acapps.h
+++ b/drivers/acpi/acpica/acapps.h
@@ -3,7 +3,7 @@
  *
  * Module Name: acapps - common include for ACPI applications/tools
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -17,7 +17,7 @@
 /* Common info for tool signons */
 
 #define ACPICA_NAME                 "Intel ACPI Component Architecture"
-#define ACPICA_COPYRIGHT            "Copyright (c) 2000 - 2019 Intel Corporation"
+#define ACPICA_COPYRIGHT            "Copyright (c) 2000 - 2020 Intel Corporation"
 
 #if ACPI_MACHINE_WIDTH == 64
 #define ACPI_WIDTH          " (64-bit version)"
diff --git a/drivers/acpi/acpica/accommon.h b/drivers/acpi/acpica/accommon.h
index 54f81eac7ec9..89101e53324b 100644
--- a/drivers/acpi/acpica/accommon.h
+++ b/drivers/acpi/acpica/accommon.h
@@ -3,7 +3,7 @@
  *
  * Name: accommon.h - Common include files for generation of ACPICA source
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acconvert.h b/drivers/acpi/acpica/acconvert.h
index d5478cd4a857..ede4b9cc9e85 100644
--- a/drivers/acpi/acpica/acconvert.h
+++ b/drivers/acpi/acpica/acconvert.h
@@ -3,7 +3,7 @@
  *
  * Module Name: acapps - common include for ACPI applications/tools
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h
index 32f2e38c7570..a676daaa2da5 100644
--- a/drivers/acpi/acpica/acdebug.h
+++ b/drivers/acpi/acpica/acdebug.h
@@ -3,7 +3,7 @@
  *
  * Name: acdebug.h - ACPI/AML debugger
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -148,6 +148,8 @@ void acpi_db_find_references(char *object_arg);
 
 void acpi_db_get_bus_info(void);
 
+acpi_status acpi_db_display_fields(u32 address_space_id);
+
 /*
  * dbdisply - debug display commands
  */
diff --git a/drivers/acpi/acpica/acdispat.h b/drivers/acpi/acpica/acdispat.h
index 82f81501566b..7ba6e308f146 100644
--- a/drivers/acpi/acpica/acdispat.h
+++ b/drivers/acpi/acpica/acdispat.h
@@ -3,7 +3,7 @@
  *
  * Name: acdispat.h - dispatcher (parser to interpreter interface)
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h
index c8652f91054e..79f292687bd6 100644
--- a/drivers/acpi/acpica/acevents.h
+++ b/drivers/acpi/acpica/acevents.h
@@ -3,7 +3,7 @@
  *
  * Name: acevents.h - Event subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index fd3beea93421..38ffa2c0a496 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -3,7 +3,7 @@
  *
  * Name: acglobal.h - Declarations for global variables
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index bcf8f7501db7..67f282e9e0af 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -3,7 +3,7 @@
  *
  * Name: achware.h -- hardware specific interfaces
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h
index 20706adbc148..a6d896cda2a5 100644
--- a/drivers/acpi/acpica/acinterp.h
+++ b/drivers/acpi/acpica/acinterp.h
@@ -3,7 +3,7 @@
  *
  * Name: acinterp.h - Interpreter subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 1ea52576f0a2..af58cd2dc9d3 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -3,7 +3,7 @@
  *
  * Name: aclocal.h - Internal data types used across the ACPI subsystem
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h
index 283614e82a20..2269e10bc21b 100644
--- a/drivers/acpi/acpica/acmacros.h
+++ b/drivers/acpi/acpica/acmacros.h
@@ -3,7 +3,7 @@
  *
  * Name: acmacros.h - C macros for the entire subsystem.
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 7da1864798a0..e618ddfab2fd 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -3,7 +3,7 @@
  *
  * Name: acnamesp.h - Namespace subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h
index 8def0e3d690f..9f0219a8cb98 100644
--- a/drivers/acpi/acpica/acobject.h
+++ b/drivers/acpi/acpica/acobject.h
@@ -3,7 +3,7 @@
  *
  * Name: acobject.h - Definition of union acpi_operand_object  (Internal object only)
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -260,7 +260,8 @@ struct acpi_object_index_field {
 /* The buffer_field is different in that it is part of a Buffer, not an op_region */
 
 struct acpi_object_buffer_field {
-	ACPI_OBJECT_COMMON_HEADER ACPI_COMMON_FIELD_INFO union acpi_operand_object *buffer_obj;	/* Containing Buffer object */
+	ACPI_OBJECT_COMMON_HEADER ACPI_COMMON_FIELD_INFO u8 is_create_field;	/* Special case for objects created by create_field() */
+	union acpi_operand_object *buffer_obj;	/* Containing Buffer object */
 };
 
 /******************************************************************************
diff --git a/drivers/acpi/acpica/acopcode.h b/drivers/acpi/acpica/acopcode.h
index 9d78134428e3..8825394be9ab 100644
--- a/drivers/acpi/acpica/acopcode.h
+++ b/drivers/acpi/acpica/acopcode.h
@@ -3,7 +3,7 @@
  *
  * Name: acopcode.h - AML opcode information for the AML parser and interpreter
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acparser.h b/drivers/acpi/acpica/acparser.h
index 6e32c97cba6c..bc00b85c0a8f 100644
--- a/drivers/acpi/acpica/acparser.h
+++ b/drivers/acpi/acpica/acparser.h
@@ -3,7 +3,7 @@
  *
  * Module Name: acparser.h - AML Parser subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index 387163b962a7..cd0f5df0ea23 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -3,7 +3,7 @@
  *
  * Name: acpredef - Information table for ACPI predefined methods and objects
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acresrc.h b/drivers/acpi/acpica/acresrc.h
index 422cd8f2b92e..6de8a1650d3d 100644
--- a/drivers/acpi/acpica/acresrc.h
+++ b/drivers/acpi/acpica/acresrc.h
@@ -3,7 +3,7 @@
  *
  * Name: acresrc.h - Resource Manager function prototypes
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acstruct.h b/drivers/acpi/acpica/acstruct.h
index 218ff4c8b817..4c900c108f3f 100644
--- a/drivers/acpi/acpica/acstruct.h
+++ b/drivers/acpi/acpica/acstruct.h
@@ -3,7 +3,7 @@
  *
  * Name: acstruct.h - Internal structs
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -192,6 +192,16 @@ struct acpi_device_walk_info {
 	u32 num_INI;
 };
 
+/* Info used by Acpi  acpi_db_display_fields */
+
+struct acpi_region_walk_info {
+	u32 debug_level;
+	u32 count;
+	acpi_owner_id owner_id;
+	u8 display_type;
+	u32 address_space_id;
+};
+
 /* TBD: [Restructure] Merge with struct above */
 
 struct acpi_walk_info {
diff --git a/drivers/acpi/acpica/actables.h b/drivers/acpi/acpica/actables.h
index dfbf1dbd4033..734624facda3 100644
--- a/drivers/acpi/acpica/actables.h
+++ b/drivers/acpi/acpica/actables.h
@@ -3,7 +3,7 @@
  *
  * Name: actables.h - ACPI table management
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index 601808be86d1..7c89b470ec81 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -3,7 +3,7 @@
  *
  * Name: acutils.h -- prototypes for the common (subsystem-wide) procedures
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -142,10 +142,11 @@ struct acpi_pkg_info {
 
 /* acpi_ut_dump_buffer */
 
-#define DB_BYTE_DISPLAY     1
-#define DB_WORD_DISPLAY     2
-#define DB_DWORD_DISPLAY    4
-#define DB_QWORD_DISPLAY    8
+#define DB_BYTE_DISPLAY      0x01
+#define DB_WORD_DISPLAY      0x02
+#define DB_DWORD_DISPLAY     0x04
+#define DB_QWORD_DISPLAY     0x08
+#define DB_DISPLAY_DATA_ONLY 0x10
 
 /*
  * utascii - ASCII utilities
diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h
index 49e412edd7c6..1d541bbac4a3 100644
--- a/drivers/acpi/acpica/amlcode.h
+++ b/drivers/acpi/acpica/amlcode.h
@@ -5,7 +5,7 @@
  *                   Declarations and definitions contained herein are derived
  *                   directly from the ACPI specification.
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/amlresrc.h b/drivers/acpi/acpica/amlresrc.h
index 7c3bd4ab60fc..e5234e001acf 100644
--- a/drivers/acpi/acpica/amlresrc.h
+++ b/drivers/acpi/acpica/amlresrc.h
@@ -3,7 +3,7 @@
  *
  * Module Name: amlresrc.h - AML resource descriptors
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dbconvert.c b/drivers/acpi/acpica/dbconvert.c
index 9fd9a98a9cbe..2b84ac093698 100644
--- a/drivers/acpi/acpica/dbconvert.c
+++ b/drivers/acpi/acpica/dbconvert.c
@@ -106,6 +106,10 @@ acpi_db_convert_to_buffer(char *string, union acpi_object *object)
 	u8 *buffer;
 	acpi_status status;
 
+	/* Skip all preceding white space */
+
+	acpi_ut_remove_whitespace(&string);
+
 	/* Generate the final buffer length */
 
 	for (i = 0, length = 0; string[i];) {
diff --git a/drivers/acpi/acpica/dbdisply.c b/drivers/acpi/acpica/dbdisply.c
index 30ab62b0fec8..f2df416d0d2d 100644
--- a/drivers/acpi/acpica/dbdisply.c
+++ b/drivers/acpi/acpica/dbdisply.c
@@ -513,7 +513,6 @@ void acpi_db_display_results(void)
 		return;
 	}
 
-	obj_desc = walk_state->method_desc;
 	node = walk_state->method_node;
 
 	if (walk_state->results) {
@@ -565,7 +564,6 @@ void acpi_db_display_calling_tree(void)
 		return;
 	}
 
-	node = walk_state->method_node;
 	acpi_os_printf("Current Control Method Call Tree\n");
 
 	while (walk_state) {
diff --git a/drivers/acpi/acpica/dbfileio.c b/drivers/acpi/acpica/dbfileio.c
index c6e25734dc5c..e1b6e54a96ac 100644
--- a/drivers/acpi/acpica/dbfileio.c
+++ b/drivers/acpi/acpica/dbfileio.c
@@ -93,7 +93,7 @@ acpi_status acpi_db_load_tables(struct acpi_new_table_desc *list_head)
 	while (table_list_head) {
 		table = table_list_head->table;
 
-		status = acpi_load_table(table);
+		status = acpi_load_table(table, NULL);
 		if (ACPI_FAILURE(status)) {
 			if (status == AE_ALREADY_EXISTS) {
 				acpi_os_printf
diff --git a/drivers/acpi/acpica/dbhistry.c b/drivers/acpi/acpica/dbhistry.c
index 47d2e5059849..bb9600b867ee 100644
--- a/drivers/acpi/acpica/dbhistry.c
+++ b/drivers/acpi/acpica/dbhistry.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dbhistry - debugger HISTORY command
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dbinput.c b/drivers/acpi/acpica/dbinput.c
index 55a7e10998d8..aa71f65395d2 100644
--- a/drivers/acpi/acpica/dbinput.c
+++ b/drivers/acpi/acpica/dbinput.c
@@ -50,6 +50,7 @@ enum acpi_ex_debugger_commands {
 	CMD_EVALUATE,
 	CMD_EXECUTE,
 	CMD_EXIT,
+	CMD_FIELDS,
 	CMD_FIND,
 	CMD_GO,
 	CMD_HANDLERS,
@@ -127,6 +128,7 @@ static const struct acpi_db_command_info acpi_gbl_db_commands[] = {
 	{"EVALUATE", 1},
 	{"EXECUTE", 1},
 	{"EXIT", 0},
+	{"FIELDS", 1},
 	{"FIND", 1},
 	{"GO", 0},
 	{"HANDLERS", 0},
@@ -200,6 +202,8 @@ static const struct acpi_db_command_help acpi_gbl_db_command_help[] = {
 	 "Find ACPI name(s) with wildcards\n"},
 	{1, "  Integrity", "Validate namespace integrity\n"},
 	{1, "  Methods", "Display list of loaded control methods\n"},
+	{1, "  Fields <AddressSpaceId>",
+	 "Display list of loaded field units by space ID\n"},
 	{1, "  Namespace [Object] [Depth]",
 	 "Display loaded namespace tree/subtree\n"},
 	{1, "  Notify <Object> <Value>", "Send a notification on Object\n"},
@@ -507,6 +511,21 @@ char *acpi_db_get_next_token(char *string,
 		}
 		break;
 
+	case '{':
+
+		/* This is the start of a field unit, scan until closing brace */
+
+		string++;
+		start = string;
+		type = ACPI_TYPE_FIELD_UNIT;
+
+		/* Find end of buffer */
+
+		while (*string && (*string != '}')) {
+			string++;
+		}
+		break;
+
 	case '[':
 
 		/* This is the start of a package, scan until closing bracket */
@@ -674,6 +693,7 @@ acpi_db_command_dispatch(char *input_buffer,
 			 union acpi_parse_object *op)
 {
 	u32 temp;
+	u64 temp64;
 	u32 command_index;
 	u32 param_count;
 	char *command_line;
@@ -689,7 +709,6 @@ acpi_db_command_dispatch(char *input_buffer,
 
 	param_count = acpi_db_get_line(input_buffer);
 	command_index = acpi_db_match_command(acpi_gbl_db_args[0]);
-	temp = 0;
 
 	/*
 	 * We don't want to add the !! command to the history buffer. It
@@ -790,6 +809,21 @@ acpi_db_command_dispatch(char *input_buffer,
 		status = acpi_db_find_name_in_namespace(acpi_gbl_db_args[1]);
 		break;
 
+	case CMD_FIELDS:
+
+		status = acpi_ut_strtoul64(acpi_gbl_db_args[1], &temp64);
+
+		if (ACPI_FAILURE(status)
+		    || temp64 >= ACPI_NUM_PREDEFINED_REGIONS) {
+			acpi_os_printf
+			    ("Invalid address space ID: must be between 0 and %u inclusive\n",
+			     ACPI_NUM_PREDEFINED_REGIONS - 1);
+			return (AE_OK);
+		}
+
+		status = acpi_db_display_fields((u32)temp64);
+		break;
+
 	case CMD_GO:
 
 		acpi_gbl_cm_single_step = FALSE;
diff --git a/drivers/acpi/acpica/dbmethod.c b/drivers/acpi/acpica/dbmethod.c
index 76a15b6ffc5d..4e48a7de7413 100644
--- a/drivers/acpi/acpica/dbmethod.c
+++ b/drivers/acpi/acpica/dbmethod.c
@@ -321,6 +321,10 @@ acpi_status acpi_db_disassemble_method(char *name)
 	walk_state->parse_flags |= ACPI_PARSE_DISASSEMBLE;
 
 	status = acpi_ps_parse_aml(walk_state);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
 	(void)acpi_dm_parse_deferred_ops(op);
 
 	/* Now we can disassemble the method */
diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
index 63fe30e86807..3615e1a6efd8 100644
--- a/drivers/acpi/acpica/dbnames.c
+++ b/drivers/acpi/acpica/dbnames.c
@@ -10,6 +10,7 @@
 #include "acnamesp.h"
 #include "acdebug.h"
 #include "acpredef.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_CA_DEBUGGER
 ACPI_MODULE_NAME("dbnames")
@@ -504,6 +505,86 @@ acpi_db_walk_for_object_counts(acpi_handle obj_handle,
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_db_walk_for_fields
+ *
+ * PARAMETERS:  Callback from walk_namespace
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Display short info about objects in the namespace
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_db_walk_for_fields(acpi_handle obj_handle,
+			u32 nesting_level, void *context, void **return_value)
+{
+	union acpi_object *ret_value;
+	struct acpi_region_walk_info *info =
+	    (struct acpi_region_walk_info *)context;
+	struct acpi_buffer buffer;
+	acpi_status status;
+	struct acpi_namespace_node *node = acpi_ns_validate_handle(obj_handle);
+
+	if (!node) {
+		return (AE_OK);
+	}
+	if (node->object->field.region_obj->region.space_id !=
+	    info->address_space_id) {
+		return (AE_OK);
+	}
+
+	info->count++;
+
+	/* Get and display the full pathname to this object */
+
+	buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
+	status = acpi_ns_handle_to_pathname(obj_handle, &buffer, TRUE);
+	if (ACPI_FAILURE(status)) {
+		acpi_os_printf("Could Not get pathname for object %p\n",
+			       obj_handle);
+		return (AE_OK);
+	}
+
+	acpi_os_printf("%s ", (char *)buffer.pointer);
+	ACPI_FREE(buffer.pointer);
+
+	buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
+	acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
+
+	/*
+	 * Since this is a field unit, surround the output in braces
+	 */
+	acpi_os_printf("{");
+
+	ret_value = (union acpi_object *)buffer.pointer;
+	switch (ret_value->type) {
+	case ACPI_TYPE_INTEGER:
+
+		acpi_os_printf("%8.8X%8.8X",
+			       ACPI_FORMAT_UINT64(ret_value->integer.value));
+		break;
+
+	case ACPI_TYPE_BUFFER:
+
+		acpi_ut_dump_buffer(ret_value->buffer.pointer,
+				    ret_value->buffer.length,
+				    DB_DISPLAY_DATA_ONLY | DB_BYTE_DISPLAY, 0);
+		break;
+
+	default:
+
+		break;
+	}
+	acpi_os_printf("}\n");
+
+	ACPI_FREE(buffer.pointer);
+
+	return (AE_OK);
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_db_walk_for_specific_objects
  *
  * PARAMETERS:  Callback from walk_namespace
@@ -630,6 +711,39 @@ acpi_status acpi_db_display_objects(char *obj_type_arg, char *display_count_arg)
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_db_display_fields
+ *
+ * PARAMETERS:  obj_type_arg        - Type of object to display
+ *              display_count_arg   - Max depth to display
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Display objects in the namespace of the requested type
+ *
+ ******************************************************************************/
+
+acpi_status acpi_db_display_fields(u32 address_space_id)
+{
+	struct acpi_region_walk_info info;
+
+	info.count = 0;
+	info.owner_id = ACPI_OWNER_ID_MAX;
+	info.debug_level = ACPI_UINT32_MAX;
+	info.display_type = ACPI_DISPLAY_SUMMARY | ACPI_DISPLAY_SHORT;
+	info.address_space_id = address_space_id;
+
+	/* Walk the namespace from the root */
+
+	(void)acpi_walk_namespace(ACPI_TYPE_LOCAL_REGION_FIELD,
+				  ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
+				  acpi_db_walk_for_fields, NULL, (void *)&info,
+				  NULL);
+
+	return (AE_OK);
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_db_integrity_walk
  *
  * PARAMETERS:  Callback from walk_namespace
diff --git a/drivers/acpi/acpica/dbobject.c b/drivers/acpi/acpica/dbobject.c
index f9fc84bc3e84..4b4c530a0654 100644
--- a/drivers/acpi/acpica/dbobject.c
+++ b/drivers/acpi/acpica/dbobject.c
@@ -464,7 +464,6 @@ void acpi_db_decode_arguments(struct acpi_walk_state *walk_state)
 	u8 display_args = FALSE;
 
 	node = walk_state->method_node;
-	obj_desc = walk_state->method_desc;
 
 	/* There are no arguments for the module-level code case */
 
diff --git a/drivers/acpi/acpica/dsargs.c b/drivers/acpi/acpica/dsargs.c
index 85b34d02233e..ad17f62e51d9 100644
--- a/drivers/acpi/acpica/dsargs.c
+++ b/drivers/acpi/acpica/dsargs.c
@@ -4,7 +4,7 @@
  * Module Name: dsargs - Support for execution of dynamic arguments for static
  *                       objects (regions, fields, buffer fields, etc.)
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dscontrol.c b/drivers/acpi/acpica/dscontrol.c
index 4847f89c678c..4b5b6e859f62 100644
--- a/drivers/acpi/acpica/dscontrol.c
+++ b/drivers/acpi/acpica/dscontrol.c
@@ -4,7 +4,7 @@
  * Module Name: dscontrol - Support for execution control opcodes -
  *                          if/else/while/return
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -85,7 +85,7 @@ acpi_ds_exec_begin_control_op(struct acpi_walk_state *walk_state,
 		    walk_state->parser_state.pkg_end;
 		control_state->control.opcode = op->common.aml_opcode;
 		control_state->control.loop_timeout = acpi_os_get_timer() +
-		    (u64)(acpi_gbl_max_loop_iterations * ACPI_100NSEC_PER_SEC);
+		    ((u64)acpi_gbl_max_loop_iterations * ACPI_100NSEC_PER_SEC);
 
 		/* Push the control state on this walk's control stack */
 
diff --git a/drivers/acpi/acpica/dsdebug.c b/drivers/acpi/acpica/dsdebug.c
index 0d3e1ced1f57..63bc5f19fb82 100644
--- a/drivers/acpi/acpica/dsdebug.c
+++ b/drivers/acpi/acpica/dsdebug.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsdebug - Parser/Interpreter interface - debugging
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c
index cf4e061bb0f0..c901f5aec739 100644
--- a/drivers/acpi/acpica/dsfield.c
+++ b/drivers/acpi/acpica/dsfield.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsfield - Dispatcher field routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -149,7 +149,6 @@ acpi_ds_create_buffer_field(union acpi_parse_object *op,
 
 	if (walk_state->deferred_node) {
 		node = walk_state->deferred_node;
-		status = AE_OK;
 	} else {
 		/* Execute flag should always be set when this function is entered */
 
@@ -244,7 +243,7 @@ cleanup:
  * FUNCTION:    acpi_ds_get_field_names
  *
  * PARAMETERS:  info            - create_field info structure
- *  `           walk_state      - Current method state
+ *              walk_state      - Current method state
  *              arg             - First parser arg for the field name list
  *
  * RETURN:      Status
@@ -264,7 +263,6 @@ acpi_ds_get_field_names(struct acpi_create_field_info *info,
 	union acpi_parse_object *child;
 
 #ifdef ACPI_EXEC_APP
-	u64 value = 0;
 	union acpi_operand_object *result_desc;
 	union acpi_operand_object *obj_desc;
 	char *name_path;
@@ -406,19 +404,17 @@ acpi_ds_get_field_names(struct acpi_create_field_info *info,
 					name_path =
 					    acpi_ns_get_external_pathname(info->
 									  field_node);
-					obj_desc =
-					    acpi_ut_create_integer_object
-					    (value);
 					if (ACPI_SUCCESS
 					    (ae_lookup_init_file_entry
-					     (name_path, &value))) {
+					     (name_path, &obj_desc))) {
 						acpi_ex_write_data_to_field
 						    (obj_desc,
 						     acpi_ns_get_attached_object
 						     (info->field_node),
 						     &result_desc);
+						acpi_ut_remove_reference
+						    (obj_desc);
 					}
-					acpi_ut_remove_reference(obj_desc);
 					ACPI_FREE(name_path);
 #endif
 				}
@@ -636,8 +632,6 @@ acpi_ds_init_field_objects(union acpi_parse_object *op,
 				}
 
 				/* Name already exists, just ignore this error */
-
-				status = AE_OK;
 			}
 
 			arg->common.node = node;
diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c
index a1ffed29903b..9be2a309424c 100644
--- a/drivers/acpi/acpica/dsinit.c
+++ b/drivers/acpi/acpica/dsinit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsinit - Object initialization namespace walk
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index f59b4d944f7f..cf67caff878a 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsmethod - Parser/Interpreter interface - control method parsing
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c
index 179129a2deb1..c0a14a6a2c20 100644
--- a/drivers/acpi/acpica/dsobject.c
+++ b/drivers/acpi/acpica/dsobject.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsobject - Dispatcher object management routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c
index 10f32b62608e..d9c26e720cb7 100644
--- a/drivers/acpi/acpica/dsopcode.c
+++ b/drivers/acpi/acpica/dsopcode.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsopcode - Dispatcher support for regions and fields
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -217,6 +217,8 @@ acpi_ds_init_buffer_field(u16 aml_opcode,
 	}
 
 	obj_desc->buffer_field.buffer_obj = buffer_desc;
+	obj_desc->buffer_field.is_create_field =
+	    aml_opcode == AML_CREATE_FIELD_OP;
 
 	/* Reference count for buffer_desc inherits obj_desc count */
 
diff --git a/drivers/acpi/acpica/dspkginit.c b/drivers/acpi/acpica/dspkginit.c
index 997faa10f615..d869568d55c2 100644
--- a/drivers/acpi/acpica/dspkginit.c
+++ b/drivers/acpi/acpica/dspkginit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dspkginit - Completion of deferred package initialization
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c
index d75aae304595..5e81a1ae44cf 100644
--- a/drivers/acpi/acpica/dswexec.c
+++ b/drivers/acpi/acpica/dswexec.c
@@ -4,7 +4,7 @@
  * Module Name: dswexec - Dispatcher method execution callbacks;
  *                        dispatch to interpreter.
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c
index c88fd31208a5..697974e37edf 100644
--- a/drivers/acpi/acpica/dswload.c
+++ b/drivers/acpi/acpica/dswload.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dswload - Dispatcher first pass namespace load callbacks
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -410,6 +410,27 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state)
 	ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "Op=%p State=%p\n", op,
 			  walk_state));
 
+	/*
+	 * Disassembler: handle create field operators here.
+	 *
+	 * create_buffer_field is a deferred op that is typically processed in load
+	 * pass 2. However, disassembly of control method contents walk the parse
+	 * tree with ACPI_PARSE_LOAD_PASS1 and AML_CREATE operators are processed
+	 * in a later walk. This is a problem when there is a control method that
+	 * has the same name as the AML_CREATE object. In this case, any use of the
+	 * name segment will be detected as a method call rather than a reference
+	 * to a buffer field.
+	 *
+	 * This earlier creation during disassembly solves this issue by inserting
+	 * the named object in the ACPI namespace so that references to this name
+	 * would be a name string rather than a method call.
+	 */
+	if ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE) &&
+	    (walk_state->op_info->flags & AML_CREATE)) {
+		status = acpi_ds_create_buffer_field(op, walk_state);
+		return_ACPI_STATUS(status);
+	}
+
 	/* We are only interested in opcodes that have an associated name */
 
 	if (!(walk_state->op_info->flags & (AML_NAMED | AML_FIELD))) {
diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 935a8e2623e4..b31457ca926c 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dswload2 - Dispatcher second pass namespace load callbacks
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dswscope.c b/drivers/acpi/acpica/dswscope.c
index 39acf7b286da..9c397642fed7 100644
--- a/drivers/acpi/acpica/dswscope.c
+++ b/drivers/acpi/acpica/dswscope.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dswscope - Scope stack manipulation
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dswstate.c b/drivers/acpi/acpica/dswstate.c
index de79f835a373..809a0c0536b5 100644
--- a/drivers/acpi/acpica/dswstate.c
+++ b/drivers/acpi/acpica/dswstate.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dswstate - Dispatcher parse tree walk management routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evevent.c b/drivers/acpi/acpica/evevent.c
index 9e2f5a05c066..8c83d8c620dc 100644
--- a/drivers/acpi/acpica/evevent.c
+++ b/drivers/acpi/acpica/evevent.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evevent - Fixed Event handling and dispatch
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evglock.c b/drivers/acpi/acpica/evglock.c
index 5c77bee5d31f..0ced84ae13e4 100644
--- a/drivers/acpi/acpica/evglock.c
+++ b/drivers/acpi/acpica/evglock.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evglock - Global Lock support
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 344feba29063..3e39907fedd9 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpe - General Purpose Event handling and dispatch
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index fb15e9e2373b..132adff1e131 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpeblk - GPE block creation and initialization.
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -110,6 +110,9 @@ acpi_status acpi_ev_delete_gpe_block(struct acpi_gpe_block_info *gpe_block)
 
 	status =
 	    acpi_hw_disable_gpe_block(gpe_block->xrupt_block, gpe_block, NULL);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
 
 	if (!gpe_block->previous && !gpe_block->next) {
 
@@ -359,10 +362,10 @@ acpi_ev_create_gpe_block(struct acpi_namespace_node *gpe_device,
 	walk_info.gpe_device = gpe_device;
 	walk_info.execute_by_owner_id = FALSE;
 
-	status = acpi_ns_walk_namespace(ACPI_TYPE_METHOD, gpe_device,
-					ACPI_UINT32_MAX, ACPI_NS_WALK_NO_UNLOCK,
-					acpi_ev_match_gpe_method, NULL,
-					&walk_info, NULL);
+	(void)acpi_ns_walk_namespace(ACPI_TYPE_METHOD, gpe_device,
+				     ACPI_UINT32_MAX, ACPI_NS_WALK_NO_UNLOCK,
+				     acpi_ev_match_gpe_method, NULL, &walk_info,
+				     NULL);
 
 	/* Return the new block */
 
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index b04f982e59fa..6effd8076dcc 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpeinit - System GPE initialization and update
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -156,8 +156,6 @@ acpi_status acpi_ev_gpe_initialize(void)
 			 * GPE0 and GPE1 do not have to be contiguous in the GPE number
 			 * space. However, GPE0 always starts at GPE number zero.
 			 */
-			gpe_number_max = acpi_gbl_FADT.gpe1_base +
-			    ((register_count1 * ACPI_GPE_REGISTER_WIDTH) - 1);
 		}
 	}
 
@@ -169,7 +167,6 @@ acpi_status acpi_ev_gpe_initialize(void)
 
 		ACPI_DEBUG_PRINT((ACPI_DB_INIT,
 				  "There are no GPE blocks defined in the FADT\n"));
-		status = AE_OK;
 		goto cleanup;
 	}
 
diff --git a/drivers/acpi/acpica/evgpeutil.c b/drivers/acpi/acpica/evgpeutil.c
index 917892227e09..738873e876ca 100644
--- a/drivers/acpi/acpica/evgpeutil.c
+++ b/drivers/acpi/acpica/evgpeutil.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpeutil - GPE utilities
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evhandler.c b/drivers/acpi/acpica/evhandler.c
index 3ef4e27995f0..5884eba047f7 100644
--- a/drivers/acpi/acpica/evhandler.c
+++ b/drivers/acpi/acpica/evhandler.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evhandler - Support for Address Space handlers
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c
index d45f7639f7ee..ce1eda6beb84 100644
--- a/drivers/acpi/acpica/evmisc.c
+++ b/drivers/acpi/acpica/evmisc.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evmisc - Miscellaneous event manager support functions
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -230,11 +230,15 @@ void acpi_ev_terminate(void)
 		/* Disable all GPEs in all GPE blocks */
 
 		status = acpi_ev_walk_gpe_list(acpi_hw_disable_gpe_block, NULL);
+		if (ACPI_FAILURE(status)) {
+			ACPI_EXCEPTION((AE_INFO, status,
+					"Could not disable GPEs in GPE block"));
+		}
 
 		status = acpi_ev_remove_global_lock_handler();
 		if (ACPI_FAILURE(status)) {
-			ACPI_ERROR((AE_INFO,
-				    "Could not remove Global Lock handler"));
+			ACPI_EXCEPTION((AE_INFO, status,
+					"Could not remove Global Lock handler"));
 		}
 
 		acpi_gbl_events_initialized = FALSE;
@@ -250,6 +254,10 @@ void acpi_ev_terminate(void)
 	/* Deallocate all handler objects installed within GPE info structs */
 
 	status = acpi_ev_walk_gpe_list(acpi_ev_delete_gpe_handlers, NULL);
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status,
+				"Could not delete GPE handlers"));
+	}
 
 	/* Return to original mode if necessary */
 
diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
index 45dc797df05d..738d4b231f34 100644
--- a/drivers/acpi/acpica/evregion.c
+++ b/drivers/acpi/acpica/evregion.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evregion - Operation Region support
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -836,11 +836,11 @@ acpi_ev_orphan_ec_reg_method(struct acpi_namespace_node *ec_device_node)
 	objects[1].type = ACPI_TYPE_INTEGER;
 	objects[1].integer.value = ACPI_REG_CONNECT;
 
-	status = acpi_evaluate_object(reg_method, NULL, &args, NULL);
+	(void)acpi_evaluate_object(reg_method, NULL, &args, NULL);
 
 exit:
 	/* We ignore all errors from above, don't care */
 
-	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
+	(void)acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
 	return_VOID;
 }
diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index 0b47bbcd2a23..aefc0145e583 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evrgnini- ACPI address_space (op_region) init
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -198,7 +198,6 @@ acpi_ev_pci_config_region_setup(acpi_handle handle,
 						 * root bridge. Still need to return a context object
 						 * for the new PCI_Config operation region, however.
 						 */
-						status = AE_OK;
 					} else {
 						ACPI_EXCEPTION((AE_INFO, status,
 								"Could not install PciConfig handler "
diff --git a/drivers/acpi/acpica/evxface.c b/drivers/acpi/acpica/evxface.c
index 279ef0557aa3..e4e012297eee 100644
--- a/drivers/acpi/acpica/evxface.c
+++ b/drivers/acpi/acpica/evxface.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evxface - External interfaces for ACPI events
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c
index e528fe56b755..1a15b0087379 100644
--- a/drivers/acpi/acpica/evxfevnt.c
+++ b/drivers/acpi/acpica/evxfevnt.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evxfevnt - External Interfaces, ACPI event disable/enable
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 04a40d563dd6..2c39ff2a7406 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evxfgpe - External Interfaces for General Purpose Events (GPEs)
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c
index 47265b073e6f..da97fd0c6b51 100644
--- a/drivers/acpi/acpica/evxfregn.c
+++ b/drivers/acpi/acpica/evxfregn.c
@@ -4,7 +4,7 @@
  * Module Name: evxfregn - External Interfaces, ACPI Operation Regions and
  *                         Address Spaces.
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exconcat.c b/drivers/acpi/acpica/exconcat.c
index c7af07566b7b..43711412722f 100644
--- a/drivers/acpi/acpica/exconcat.c
+++ b/drivers/acpi/acpica/exconcat.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exconcat - Concatenate-type AML operators
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index 46a8baf28bd0..68efd704e2dc 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exconfig - Namespace reconfiguration (Load/Unload opcodes)
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exconvrt.c b/drivers/acpi/acpica/exconvrt.c
index ca2966bacb50..50c7aad2e86d 100644
--- a/drivers/acpi/acpica/exconvrt.c
+++ b/drivers/acpi/acpica/exconvrt.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exconvrt - Object conversion routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/excreate.c b/drivers/acpi/acpica/excreate.c
index f376fc00064e..a17482428b46 100644
--- a/drivers/acpi/acpica/excreate.c
+++ b/drivers/acpi/acpica/excreate.c
@@ -3,7 +3,7 @@
  *
  * Module Name: excreate - Named object creation
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exdebug.c b/drivers/acpi/acpica/exdebug.c
index b1aeec8cac55..a5223dcaee70 100644
--- a/drivers/acpi/acpica/exdebug.c
+++ b/drivers/acpi/acpica/exdebug.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exdebug - Support for stores to the AML Debug Object
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c
index a9bc938a3b55..47a4d9a40d6b 100644
--- a/drivers/acpi/acpica/exdump.c
+++ b/drivers/acpi/acpica/exdump.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exdump - Interpreter debug output routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index d3d2dbfba680..e85eb31e5075 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exfield - AML execution - field_unit read/write
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -96,7 +96,8 @@ acpi_ex_get_protocol_buffer_length(u32 protocol_id, u32 *return_length)
  * RETURN:      Status
  *
  * DESCRIPTION: Read from a named field. Returns either an Integer or a
- *              Buffer, depending on the size of the field.
+ *              Buffer, depending on the size of the field and whether if a
+ *              field is created by the create_field() operator.
  *
  ******************************************************************************/
 
@@ -154,12 +155,17 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 	 * the use of arithmetic operators on the returned value if the
 	 * field size is equal or smaller than an Integer.
 	 *
+	 * However, all buffer fields created by create_field operator needs to
+	 * remain as a buffer to match other AML interpreter implementations.
+	 *
 	 * Note: Field.length is in bits.
 	 */
 	buffer_length =
 	    (acpi_size)ACPI_ROUND_BITS_UP_TO_BYTES(obj_desc->field.bit_length);
 
-	if (buffer_length > acpi_gbl_integer_byte_width) {
+	if (buffer_length > acpi_gbl_integer_byte_width ||
+	    (obj_desc->common.type == ACPI_TYPE_BUFFER_FIELD &&
+	     obj_desc->buffer_field.is_create_field)) {
 
 		/* Field is too large for an Integer, create a Buffer instead */
 
diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c
index 95a0dcb4f7b9..ade35ff1c7ba 100644
--- a/drivers/acpi/acpica/exfldio.c
+++ b/drivers/acpi/acpica/exfldio.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exfldio - Aml Field I/O
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c
index 60e854965af9..717e3998fd77 100644
--- a/drivers/acpi/acpica/exmisc.c
+++ b/drivers/acpi/acpica/exmisc.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exmisc - ACPI AML (p-code) execution - specific opcodes
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exmutex.c b/drivers/acpi/acpica/exmutex.c
index 775cd62af5b3..9ff247cba571 100644
--- a/drivers/acpi/acpica/exmutex.c
+++ b/drivers/acpi/acpica/exmutex.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exmutex - ASL Mutex Acquire/Release functions
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c
index 6b76be5212a4..74f8b0d0452b 100644
--- a/drivers/acpi/acpica/exnames.c
+++ b/drivers/acpi/acpica/exnames.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exnames - interpreter/scanner name load/execute
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c
index 06e35ea09823..a46d685a3ffc 100644
--- a/drivers/acpi/acpica/exoparg1.c
+++ b/drivers/acpi/acpica/exoparg1.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg1 - AML execution - opcodes with 1 argument
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exoparg2.c b/drivers/acpi/acpica/exoparg2.c
index 5e4a31a11df4..03241d18ac1d 100644
--- a/drivers/acpi/acpica/exoparg2.c
+++ b/drivers/acpi/acpica/exoparg2.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg2 - AML execution - opcodes with 2 arguments
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exoparg3.c b/drivers/acpi/acpica/exoparg3.c
index a4ebce417930..c8d0d75fc450 100644
--- a/drivers/acpi/acpica/exoparg3.c
+++ b/drivers/acpi/acpica/exoparg3.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg3 - AML execution - opcodes with 3 arguments
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exoparg6.c b/drivers/acpi/acpica/exoparg6.c
index 31385a0b2dab..55d0fa056fe7 100644
--- a/drivers/acpi/acpica/exoparg6.c
+++ b/drivers/acpi/acpica/exoparg6.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg6 - AML execution - opcodes with 6 arguments
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exprep.c b/drivers/acpi/acpica/exprep.c
index 728d752f7adc..a4e306690a21 100644
--- a/drivers/acpi/acpica/exprep.c
+++ b/drivers/acpi/acpica/exprep.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exprep - ACPI AML field prep utilities
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c
index c08521194b29..d15a66de26c0 100644
--- a/drivers/acpi/acpica/exregion.c
+++ b/drivers/acpi/acpica/exregion.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exregion - ACPI default op_region (address space) handlers
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exresnte.c b/drivers/acpi/acpica/exresnte.c
index b223d01e6bf8..3e4018678c09 100644
--- a/drivers/acpi/acpica/exresnte.c
+++ b/drivers/acpi/acpica/exresnte.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exresnte - AML Interpreter object resolution
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exresolv.c b/drivers/acpi/acpica/exresolv.c
index 36da5c0ef69c..912a078c60a4 100644
--- a/drivers/acpi/acpica/exresolv.c
+++ b/drivers/acpi/acpica/exresolv.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exresolv - AML Interpreter object resolution
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exresop.c b/drivers/acpi/acpica/exresop.c
index bdfe4d33b483..4d1b22971d58 100644
--- a/drivers/acpi/acpica/exresop.c
+++ b/drivers/acpi/acpica/exresop.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exresop - AML Interpreter operand/object resolution
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exserial.c b/drivers/acpi/acpica/exserial.c
index c5aa4b0deb70..760bc7cef55a 100644
--- a/drivers/acpi/acpica/exserial.c
+++ b/drivers/acpi/acpica/exserial.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exserial - field_unit support for serial address spaces
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exstore.c b/drivers/acpi/acpica/exstore.c
index 7f3c3571c292..3adc0a29d890 100644
--- a/drivers/acpi/acpica/exstore.c
+++ b/drivers/acpi/acpica/exstore.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exstore - AML Interpreter object store support
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exstoren.c b/drivers/acpi/acpica/exstoren.c
index 4e43c8277f07..8c34f4e2ab8f 100644
--- a/drivers/acpi/acpica/exstoren.c
+++ b/drivers/acpi/acpica/exstoren.c
@@ -4,7 +4,7 @@
  * Module Name: exstoren - AML Interpreter object store support,
  *                        Store to Node (namespace object)
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exstorob.c b/drivers/acpi/acpica/exstorob.c
index dc9e2b1c1ad9..dc66696080a5 100644
--- a/drivers/acpi/acpica/exstorob.c
+++ b/drivers/acpi/acpica/exstorob.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exstorob - AML object store support, store to object
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exsystem.c b/drivers/acpi/acpica/exsystem.c
index a538f7799b78..f329b01672bb 100644
--- a/drivers/acpi/acpica/exsystem.c
+++ b/drivers/acpi/acpica/exsystem.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exsystem - Interface to OS services
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/extrace.c b/drivers/acpi/acpica/extrace.c
index db7f93ca539f..832a47885b99 100644
--- a/drivers/acpi/acpica/extrace.c
+++ b/drivers/acpi/acpica/extrace.c
@@ -3,7 +3,7 @@
  *
  * Module Name: extrace - Support for interpreter execution tracing
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index 75380be1c2ef..8fefa6feac2f 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exutils - interpreter/scanner utilities
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwacpi.c b/drivers/acpi/acpica/hwacpi.c
index 926f7e080f22..9b9aac27ff7e 100644
--- a/drivers/acpi/acpica/hwacpi.c
+++ b/drivers/acpi/acpica/hwacpi.c
@@ -3,7 +3,7 @@
  *
  * Module Name: hwacpi - ACPI Hardware Initialization/Mode Interface
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwesleep.c b/drivers/acpi/acpica/hwesleep.c
index dee3affaca49..d9be5d0545d4 100644
--- a/drivers/acpi/acpica/hwesleep.c
+++ b/drivers/acpi/acpica/hwesleep.c
@@ -4,7 +4,7 @@
  * Name: hwesleep.c - ACPI Hardware Sleep/Wake Support functions for the
  *                    extended FADT-V5 sleep registers.
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index 565bd3f29f31..1b4252bdcd0b 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -3,7 +3,7 @@
  *
  * Module Name: hwgpe - Low level GPE enable/disable/clear functions
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
index b62db8ec446f..243a25add28f 100644
--- a/drivers/acpi/acpica/hwsleep.c
+++ b/drivers/acpi/acpica/hwsleep.c
@@ -4,7 +4,7 @@
  * Name: hwsleep.c - ACPI Hardware Sleep/Wake Support functions for the
  *                   original/legacy sleep/PM registers.
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwtimer.c b/drivers/acpi/acpica/hwtimer.c
index 2fb9f75d71c5..07473ddfa9a9 100644
--- a/drivers/acpi/acpica/hwtimer.c
+++ b/drivers/acpi/acpica/hwtimer.c
@@ -3,7 +3,7 @@
  *
  * Name: hwtimer.c - ACPI Power Management Timer Interface
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c
index cd576153257c..4d94861e6093 100644
--- a/drivers/acpi/acpica/hwvalid.c
+++ b/drivers/acpi/acpica/hwvalid.c
@@ -3,7 +3,7 @@
  *
  * Module Name: hwvalid - I/O request validation
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
index c4fd97104024..134dbfadcd15 100644
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -3,7 +3,7 @@
  *
  * Module Name: hwxface - Public ACPICA hardware interfaces
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c
index abbf9702aa7f..a4b66f4b2714 100644
--- a/drivers/acpi/acpica/hwxfsleep.c
+++ b/drivers/acpi/acpica/hwxfsleep.c
@@ -3,7 +3,7 @@
  *
  * Name: hwxfsleep.c - ACPI Hardware Sleep/Wake External Interfaces
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -166,6 +166,9 @@ acpi_status acpi_enter_sleep_state_s4bios(void)
 
 	status = acpi_hw_write_port(acpi_gbl_FADT.smi_command,
 				    (u32)acpi_gbl_FADT.s4_bios_request, 8);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
 
 	do {
 		acpi_os_stall(ACPI_USEC_PER_MSEC);
diff --git a/drivers/acpi/acpica/nsarguments.c b/drivers/acpi/acpica/nsarguments.c
index 0e97ed38973f..d5e8405e9d8f 100644
--- a/drivers/acpi/acpica/nsarguments.c
+++ b/drivers/acpi/acpica/nsarguments.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsarguments - Validation of args for ACPI predefined methods
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsconvert.c b/drivers/acpi/acpica/nsconvert.c
index 14cbf63f1991..c86c82939ebb 100644
--- a/drivers/acpi/acpica/nsconvert.c
+++ b/drivers/acpi/acpica/nsconvert.c
@@ -4,7 +4,7 @@
  * Module Name: nsconvert - Object conversions for objects returned by
  *                          predefined methods
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -486,5 +486,5 @@ acpi_ns_convert_to_reference(struct acpi_namespace_node *scope,
 error_exit:
 	ACPI_FREE(name);
 	*return_object = new_object;
-	return (AE_OK);
+	return (status);
 }
diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c
index 9731d7cf1b83..994f0b556c60 100644
--- a/drivers/acpi/acpica/nsdump.c
+++ b/drivers/acpi/acpica/nsdump.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsdump - table dumping routines for debug
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -291,7 +291,7 @@ acpi_ns_dump_one_object(acpi_handle obj_handle,
 					for (i = 0;
 					     (i < obj_desc->buffer.length
 					      && i < 12); i++) {
-						acpi_os_printf(" %.2hX",
+						acpi_os_printf(" %2.2X",
 							       obj_desc->buffer.
 							       pointer[i]);
 					}
@@ -404,7 +404,7 @@ acpi_ns_dump_one_object(acpi_handle obj_handle,
 		case ACPI_TYPE_LOCAL_BANK_FIELD:
 		case ACPI_TYPE_LOCAL_INDEX_FIELD:
 
-			acpi_os_printf(" Off %.3X Len %.2X Acc %.2hd\n",
+			acpi_os_printf(" Off %.3X Len %.2X Acc %.2X\n",
 				       (obj_desc->common_field.
 					base_byte_offset * 8)
 				       +
@@ -589,8 +589,6 @@ acpi_ns_dump_one_object(acpi_handle obj_handle,
 
 			goto cleanup;
 		}
-
-		obj_type = ACPI_TYPE_INVALID;	/* Terminate loop after next pass */
 	}
 
 cleanup:
diff --git a/drivers/acpi/acpica/nsdumpdv.c b/drivers/acpi/acpica/nsdumpdv.c
index 73e5c83c8c9f..b691fe20e384 100644
--- a/drivers/acpi/acpica/nsdumpdv.c
+++ b/drivers/acpi/acpica/nsdumpdv.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsdump - table dumping routines for debug
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index 61e9dfc9fe8c..e16f6a0c2c3f 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsinit - namespace initialization
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index d7c4d6e8e21e..9ba17891edb6 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsload - namespace loading/expanding/contracting procedures
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c
index f16cf5e4742c..7e74a765e785 100644
--- a/drivers/acpi/acpica/nsparse.c
+++ b/drivers/acpi/acpica/nsparse.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsparse - namespace interface to AML parser
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 2f9d93122d0c..0cea9c363ace 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nspredef - Validation of ACPI predefined methods and objects
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsprepkg.c b/drivers/acpi/acpica/nsprepkg.c
index 9a80e3b23496..237b3ddeb075 100644
--- a/drivers/acpi/acpica/nsprepkg.c
+++ b/drivers/acpi/acpica/nsprepkg.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsprepkg - Validation of package objects for predefined names
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index be86fea8e4d4..90db2d85e7f5 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsrepair - Repair for objects returned by predefined methods
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 663d85e0adba..125143c41bb8 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -4,7 +4,7 @@
  * Module Name: nsrepair2 - Repair for objects returned by specific
  *                          predefined methods
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index b8d007c84d32..e66abdab8f31 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -4,7 +4,7 @@
  * Module Name: nsutils - Utilities for accessing ACPI namespace, accessing
  *                        parents and siblings and Scope manipulation
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c
index ceea6af79d12..b7f3e8603ad8 100644
--- a/drivers/acpi/acpica/nswalk.c
+++ b/drivers/acpi/acpica/nswalk.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nswalk - Functions for walking the ACPI namespace
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c
index 55b4a5b3331f..984129dcaa0c 100644
--- a/drivers/acpi/acpica/nsxfname.c
+++ b/drivers/acpi/acpica/nsxfname.c
@@ -4,7 +4,7 @@
  * Module Name: nsxfname - Public interfaces to the ACPI subsystem
  *                         ACPI Namespace oriented interfaces
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -425,8 +425,8 @@ acpi_get_object_info(acpi_handle handle,
 	}
 
 	if (cls) {
-		next_id_string = acpi_ns_copy_device_id(&info->class_code,
-							cls, next_id_string);
+		(void)acpi_ns_copy_device_id(&info->class_code,
+					     cls, next_id_string);
 	}
 
 	/* Copy the fixed-length data */
diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index e62c7897fdf1..3b40db4ad9f3 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psargs - Parse AML opcode arguments
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index 207805047bc4..3cf0687b9915 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psloop - Main AML parse loop
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c
index 98e5c7400e54..2480c26c5171 100644
--- a/drivers/acpi/acpica/psobject.c
+++ b/drivers/acpi/acpica/psobject.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psobject - Support for parse objects
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -481,8 +481,7 @@ acpi_ps_complete_op(struct acpi_walk_state *walk_state,
 			walk_state->opcode = (*op)->common.aml_opcode;
 
 			status = walk_state->ascending_callback(walk_state);
-			status =
-			    acpi_ps_next_parse_state(walk_state, *op, status);
+			(void)acpi_ps_next_parse_state(walk_state, *op, status);
 
 			status2 = acpi_ps_complete_this_op(walk_state, *op);
 			if (ACPI_FAILURE(status2)) {
@@ -490,7 +489,6 @@ acpi_ps_complete_op(struct acpi_walk_state *walk_state,
 			}
 		}
 
-		status = AE_OK;
 		break;
 
 	case AE_CTRL_BREAK:
@@ -512,14 +510,13 @@ acpi_ps_complete_op(struct acpi_walk_state *walk_state,
 		walk_state->opcode = (*op)->common.aml_opcode;
 
 		status = walk_state->ascending_callback(walk_state);
-		status = acpi_ps_next_parse_state(walk_state, *op, status);
+		(void)acpi_ps_next_parse_state(walk_state, *op, status);
 
 		status2 = acpi_ps_complete_this_op(walk_state, *op);
 		if (ACPI_FAILURE(status2)) {
 			return_ACPI_STATUS(status2);
 		}
 
-		status = AE_OK;
 		break;
 
 	case AE_CTRL_TERMINATE:
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 43775c5ce17c..28af49263ebf 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psopcode - Parser/Interpreter opcode information table
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psopinfo.c b/drivers/acpi/acpica/psopinfo.c
index 15e7563829f1..ab9327f6a63c 100644
--- a/drivers/acpi/acpica/psopinfo.c
+++ b/drivers/acpi/acpica/psopinfo.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psopinfo - AML opcode information functions and dispatch tables
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psparse.c b/drivers/acpi/acpica/psparse.c
index 9b386530ffbe..c780046bf294 100644
--- a/drivers/acpi/acpica/psparse.c
+++ b/drivers/acpi/acpica/psparse.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psparse - Parser top level AML parse routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psscope.c b/drivers/acpi/acpica/psscope.c
index f153ca804740..fceb311995e9 100644
--- a/drivers/acpi/acpica/psscope.c
+++ b/drivers/acpi/acpica/psscope.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psscope - Parser scope stack management routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/pstree.c b/drivers/acpi/acpica/pstree.c
index 22d8a2becdd0..c8aef0694864 100644
--- a/drivers/acpi/acpica/pstree.c
+++ b/drivers/acpi/acpica/pstree.c
@@ -3,7 +3,7 @@
  *
  * Module Name: pstree - Parser op tree manipulation/traversal/search
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c
index 2512f584fa3c..00efae2f95ba 100644
--- a/drivers/acpi/acpica/psutils.c
+++ b/drivers/acpi/acpica/psutils.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psutils - Parser miscellaneous utilities (Parser only)
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/pswalk.c b/drivers/acpi/acpica/pswalk.c
index cf91841297c2..0fe3adf6b0e5 100644
--- a/drivers/acpi/acpica/pswalk.c
+++ b/drivers/acpi/acpica/pswalk.c
@@ -3,7 +3,7 @@
  *
  * Module Name: pswalk - Parser routines to walk parsed op tree(s)
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psxface.c b/drivers/acpi/acpica/psxface.c
index ee2ee2c858f2..1bbfc8def388 100644
--- a/drivers/acpi/acpica/psxface.c
+++ b/drivers/acpi/acpica/psxface.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psxface - Parser external interfaces
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/rscreate.c b/drivers/acpi/acpica/rscreate.c
index 570ea0df8a1b..c659b54985a5 100644
--- a/drivers/acpi/acpica/rscreate.c
+++ b/drivers/acpi/acpica/rscreate.c
@@ -312,6 +312,9 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object,
 				path_buffer.pointer = user_prt->source;
 
 				status = acpi_ns_handle_to_pathname((acpi_handle)node, &path_buffer, FALSE);
+				if (ACPI_FAILURE(status)) {
+					return_ACPI_STATUS(status);
+				}
 
 				/* +1 to include null terminator */
 
diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c
index 309440010ab2..523b1e9b98d4 100644
--- a/drivers/acpi/acpica/tbdata.c
+++ b/drivers/acpi/acpica/tbdata.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbdata - Table manager data structure functions
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -933,6 +933,9 @@ acpi_tb_load_table(u32 table_index, struct acpi_namespace_node *parent_node)
 	}
 
 	status = acpi_ns_load_table(table_index, parent_node);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
 
 	/*
 	 * Update GPEs for any new _Lxx/_Exx methods. Ignore errors. The host is
diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index 0041bfba9abc..907edc5edba7 100644
--- a/drivers/acpi/acpica/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbfadt   - FADT table utilities
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c
index b2abb40023a6..56d81e490a5c 100644
--- a/drivers/acpi/acpica/tbfind.c
+++ b/drivers/acpi/acpica/tbfind.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbfind   - find table
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index ef1ffd36ab3f..0bb15add2245 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbinstal - ACPI table installation and removal
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c
index 4764f849cb78..0b3494ad9a70 100644
--- a/drivers/acpi/acpica/tbprint.c
+++ b/drivers/acpi/acpica/tbprint.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbprint - Table output utilities
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index c5f0b8ec70cc..dfe1ac3ae34a 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbutils - ACPI Table utilities
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index 1640685bf4ae..f8403d480318 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbxface - ACPI table-oriented external interfaces
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c
index 86f1693f6d29..bcba993d4dac 100644
--- a/drivers/acpi/acpica/tbxfload.c
+++ b/drivers/acpi/acpica/tbxfload.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbxfload - Table load/unload external interfaces
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -268,6 +268,8 @@ ACPI_EXPORT_SYMBOL_INIT(acpi_install_table)
  *
  * PARAMETERS:  table               - Pointer to a buffer containing the ACPI
  *                                    table to be loaded.
+ *              table_idx           - Pointer to a u32 for storing the table
+ *                                    index, might be NULL
  *
  * RETURN:      Status
  *
@@ -278,7 +280,7 @@ ACPI_EXPORT_SYMBOL_INIT(acpi_install_table)
  *              to ensure that the table is not deleted or unmapped.
  *
  ******************************************************************************/
-acpi_status acpi_load_table(struct acpi_table_header *table)
+acpi_status acpi_load_table(struct acpi_table_header *table, u32 *table_idx)
 {
 	acpi_status status;
 	u32 table_index;
@@ -297,6 +299,10 @@ acpi_status acpi_load_table(struct acpi_table_header *table)
 	status = acpi_tb_install_and_load_table(ACPI_PTR_TO_PHYSADDR(table),
 						ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL,
 						FALSE, &table_index);
+	if (table_idx) {
+		*table_idx = table_index;
+	}
+
 	if (ACPI_SUCCESS(status)) {
 
 		/* Complete the initialization/resolution of new objects */
@@ -390,3 +396,35 @@ acpi_status acpi_unload_parent_table(acpi_handle object)
 }
 
 ACPI_EXPORT_SYMBOL(acpi_unload_parent_table)
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_unload_table
+ *
+ * PARAMETERS:  table_index         - Index as returned by acpi_load_table
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Via the table_index representing an SSDT or OEMx table, unloads
+ *              the table and deletes all namespace objects associated with
+ *              that table. Unloading of the DSDT is not allowed.
+ *              Note: Mainly intended to support hotplug removal of SSDTs.
+ *
+ ******************************************************************************/
+acpi_status acpi_unload_table(u32 table_index)
+{
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(acpi_unload_table);
+
+	if (table_index == 1) {
+
+		/* table_index==1 means DSDT is the owner. DSDT cannot be unloaded */
+
+		return_ACPI_STATUS(AE_TYPE);
+	}
+
+	status = acpi_tb_unload_table(table_index);
+	return_ACPI_STATUS(status);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_unload_table)
diff --git a/drivers/acpi/acpica/tbxfroot.c b/drivers/acpi/acpica/tbxfroot.c
index e2859d09ca2e..0edc6ef5d46d 100644
--- a/drivers/acpi/acpica/tbxfroot.c
+++ b/drivers/acpi/acpica/tbxfroot.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbxfroot - Find the root ACPI table (RSDT)
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utaddress.c b/drivers/acpi/acpica/utaddress.c
index bb260376bd59..99fa48722cf6 100644
--- a/drivers/acpi/acpica/utaddress.c
+++ b/drivers/acpi/acpica/utaddress.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utaddress - op_region address range check
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utalloc.c b/drivers/acpi/acpica/utalloc.c
index d64da4d9e8d0..303ab51b4fcf 100644
--- a/drivers/acpi/acpica/utalloc.c
+++ b/drivers/acpi/acpica/utalloc.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utalloc - local memory allocation routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utascii.c b/drivers/acpi/acpica/utascii.c
index f6cd7d4f698b..d78656d960e8 100644
--- a/drivers/acpi/acpica/utascii.c
+++ b/drivers/acpi/acpica/utascii.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utascii - Utility ascii functions
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utbuffer.c b/drivers/acpi/acpica/utbuffer.c
index 61db9967ebe4..f2ec427f4e29 100644
--- a/drivers/acpi/acpica/utbuffer.c
+++ b/drivers/acpi/acpica/utbuffer.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utbuffer - Buffer dump routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -37,7 +37,9 @@ void acpi_ut_dump_buffer(u8 *buffer, u32 count, u32 display, u32 base_offset)
 	u32 j;
 	u32 temp32;
 	u8 buf_char;
+	u32 display_data_only = display & DB_DISPLAY_DATA_ONLY;
 
+	display &= ~DB_DISPLAY_DATA_ONLY;
 	if (!buffer) {
 		acpi_os_printf("Null Buffer Pointer in DumpBuffer!\n");
 		return;
@@ -53,7 +55,9 @@ void acpi_ut_dump_buffer(u8 *buffer, u32 count, u32 display, u32 base_offset)
 
 		/* Print current offset */
 
-		acpi_os_printf("%8.4X: ", (base_offset + i));
+		if (!display_data_only) {
+			acpi_os_printf("%8.4X: ", (base_offset + i));
+		}
 
 		/* Print 16 hex chars */
 
@@ -109,32 +113,34 @@ void acpi_ut_dump_buffer(u8 *buffer, u32 count, u32 display, u32 base_offset)
 		 * Print the ASCII equivalent characters but watch out for the bad
 		 * unprintable ones (printable chars are 0x20 through 0x7E)
 		 */
-		acpi_os_printf(" ");
-		for (j = 0; j < 16; j++) {
-			if (i + j >= count) {
-				acpi_os_printf("\n");
-				return;
+		if (!display_data_only) {
+			acpi_os_printf(" ");
+			for (j = 0; j < 16; j++) {
+				if (i + j >= count) {
+					acpi_os_printf("\n");
+					return;
+				}
+
+				/*
+				 * Add comment characters so rest of line is ignored when
+				 * compiled
+				 */
+				if (j == 0) {
+					acpi_os_printf("// ");
+				}
+
+				buf_char = buffer[(acpi_size)i + j];
+				if (isprint(buf_char)) {
+					acpi_os_printf("%c", buf_char);
+				} else {
+					acpi_os_printf(".");
+				}
 			}
 
-			/*
-			 * Add comment characters so rest of line is ignored when
-			 * compiled
-			 */
-			if (j == 0) {
-				acpi_os_printf("// ");
-			}
+			/* Done with that line. */
 
-			buf_char = buffer[(acpi_size)i + j];
-			if (isprint(buf_char)) {
-				acpi_os_printf("%c", buf_char);
-			} else {
-				acpi_os_printf(".");
-			}
+			acpi_os_printf("\n");
 		}
-
-		/* Done with that line. */
-
-		acpi_os_printf("\n");
 		i += 16;
 	}
 
diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c
index 8533fce7fa93..1b03a2747401 100644
--- a/drivers/acpi/acpica/utcache.c
+++ b/drivers/acpi/acpica/utcache.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utcache - local cache allocation routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c
index 1fb8327f3c3b..41bdd0278dd8 100644
--- a/drivers/acpi/acpica/utcopy.c
+++ b/drivers/acpi/acpica/utcopy.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utcopy - Internal to external object translation utilities
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index 5b169b5f0f1a..0c8cb0612414 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utdebug - Debug print/trace routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c
index 65beaa237669..befdd13b403b 100644
--- a/drivers/acpi/acpica/utdecode.c
+++ b/drivers/acpi/acpica/utdecode.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utdecode - Utility decoding routines (value-to-string)
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c
index 558a9f3b0678..8180d1a458f5 100644
--- a/drivers/acpi/acpica/uteval.c
+++ b/drivers/acpi/acpica/uteval.c
@@ -3,7 +3,7 @@
  *
  * Module Name: uteval - Object evaluation
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index b0622ec4bb85..e6dcbdc3fc6e 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utglobal - Global variables for the ACPI subsystem
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/uthex.c b/drivers/acpi/acpica/uthex.c
index b6da135d5f41..0e02f12513dc 100644
--- a/drivers/acpi/acpica/uthex.c
+++ b/drivers/acpi/acpica/uthex.c
@@ -3,7 +3,7 @@
  *
  * Module Name: uthex -- Hex/ASCII support functions
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c
index e805abdd95b8..3bb06935a2ad 100644
--- a/drivers/acpi/acpica/utids.c
+++ b/drivers/acpi/acpica/utids.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utids - support for device Ids - HID, UID, CID, SUB, CLS
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -289,9 +289,7 @@ acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
 						  value);
 			length = ACPI_EISAID_STRING_SIZE;
 		} else {	/* ACPI_TYPE_STRING */
-
 			/* Copy the String CID from the returned object */
-
 			strcpy(next_id_string, cid_objects[i]->string.pointer);
 			length = cid_objects[i]->string.length + 1;
 		}
diff --git a/drivers/acpi/acpica/utinit.c b/drivers/acpi/acpica/utinit.c
index 6f33e7c72327..fdbc397c038d 100644
--- a/drivers/acpi/acpica/utinit.c
+++ b/drivers/acpi/acpica/utinit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utinit - Common ACPI subsystem initialization
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utlock.c b/drivers/acpi/acpica/utlock.c
index 8b4ff11d617a..46be549539e7 100644
--- a/drivers/acpi/acpica/utlock.c
+++ b/drivers/acpi/acpica/utlock.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utlock - Reader/Writer lock interfaces
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utobject.c b/drivers/acpi/acpica/utobject.c
index eee97a902696..3e60bdac2200 100644
--- a/drivers/acpi/acpica/utobject.c
+++ b/drivers/acpi/acpica/utobject.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utobject - ACPI object create/delete/size/cache routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c
index ad2b218039d0..0a01c08dad8a 100644
--- a/drivers/acpi/acpica/utosi.c
+++ b/drivers/acpi/acpica/utosi.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utosi - Support for the _OSI predefined control method
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utpredef.c b/drivers/acpi/acpica/utpredef.c
index 1b0f68f5ed8c..05fe3470fb93 100644
--- a/drivers/acpi/acpica/utpredef.c
+++ b/drivers/acpi/acpica/utpredef.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utpredef - support functions for predefined names
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c
index 5839f2fa7400..a874dac7db5c 100644
--- a/drivers/acpi/acpica/utprint.c
+++ b/drivers/acpi/acpica/utprint.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utprint - Formatted printing routines
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c
index 8052f7ef5025..d366be431a84 100644
--- a/drivers/acpi/acpica/uttrack.c
+++ b/drivers/acpi/acpica/uttrack.c
@@ -3,7 +3,7 @@
  *
  * Module Name: uttrack - Memory allocation tracking routines (debug only)
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
@@ -660,7 +660,7 @@ void acpi_ut_dump_allocations(u32 component, const char *module)
 					case ACPI_DESC_TYPE_PARSER:
 
 						acpi_os_printf
-						    ("AmlOpcode 0x%04hX\n",
+						    ("AmlOpcode 0x%04X\n",
 						     descriptor->op.asl.
 						     aml_opcode);
 						break;
diff --git a/drivers/acpi/acpica/utuuid.c b/drivers/acpi/acpica/utuuid.c
index 0a7cf8007643..b8039954b0d1 100644
--- a/drivers/acpi/acpica/utuuid.c
+++ b/drivers/acpi/acpica/utuuid.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utuuid -- UUID support functions
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index f497c4b30e65..ca7c9f0144ef 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utxface - External interfaces, miscellaneous utility functions
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c
index cf769e94fe0f..653e3bb20036 100644
--- a/drivers/acpi/acpica/utxfinit.c
+++ b/drivers/acpi/acpica/utxfinit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utxfinit - External interfaces for ACPICA initialization
  *
- * Copyright (C) 2000 - 2019, Intel Corp.
+ * Copyright (C) 2000 - 2020, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 131c35ee9ed3..e358d0046494 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -170,9 +170,9 @@ rewind:
 		if (ip == ctx->ip) {
 			if (entry->instruction >= ctx->instructions ||
 			    !ctx->ins_table[entry->instruction].run) {
-				pr_warning(FW_WARN APEI_PFX
-			"Invalid action table, unknown instruction type: %d\n",
-					   entry->instruction);
+				pr_warn(FW_WARN APEI_PFX
+					"Invalid action table, unknown instruction type: %d\n",
+					entry->instruction);
 				return -EINVAL;
 			}
 			run = ctx->ins_table[entry->instruction].run;
@@ -211,9 +211,9 @@ static int apei_exec_for_each_entry(struct apei_exec_context *ctx,
 		if (end)
 			*end = i;
 		if (ins >= ctx->instructions || !ins_table[ins].run) {
-			pr_warning(FW_WARN APEI_PFX
-			"Invalid action table, unknown instruction type: %d\n",
-				   ins);
+			pr_warn(FW_WARN APEI_PFX
+				"Invalid action table, unknown instruction type: %d\n",
+				ins);
 			return -EINVAL;
 		}
 		rc = func(ctx, entry, data);
@@ -579,18 +579,18 @@ static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr,
 	space_id = reg->space_id;
 	*paddr = get_unaligned(&reg->address);
 	if (!*paddr) {
-		pr_warning(FW_BUG APEI_PFX
-			   "Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n",
-			   *paddr, bit_width, bit_offset, access_size_code,
-			   space_id);
+		pr_warn(FW_BUG APEI_PFX
+			"Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n",
+			*paddr, bit_width, bit_offset, access_size_code,
+			space_id);
 		return -EINVAL;
 	}
 
 	if (access_size_code < 1 || access_size_code > 4) {
-		pr_warning(FW_BUG APEI_PFX
-			   "Invalid access size code in GAR [0x%llx/%u/%u/%u/%u]\n",
-			   *paddr, bit_width, bit_offset, access_size_code,
-			   space_id);
+		pr_warn(FW_BUG APEI_PFX
+			"Invalid access size code in GAR [0x%llx/%u/%u/%u/%u]\n",
+			*paddr, bit_width, bit_offset, access_size_code,
+			space_id);
 		return -EINVAL;
 	}
 	*access_bit_width = 1UL << (access_size_code + 2);
@@ -604,19 +604,19 @@ static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr,
 		*access_bit_width = 64;
 
 	if ((bit_width + bit_offset) > *access_bit_width) {
-		pr_warning(FW_BUG APEI_PFX
-			   "Invalid bit width + offset in GAR [0x%llx/%u/%u/%u/%u]\n",
-			   *paddr, bit_width, bit_offset, access_size_code,
-			   space_id);
+		pr_warn(FW_BUG APEI_PFX
+			"Invalid bit width + offset in GAR [0x%llx/%u/%u/%u/%u]\n",
+			*paddr, bit_width, bit_offset, access_size_code,
+			space_id);
 		return -EINVAL;
 	}
 
 	if (space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY &&
 	    space_id != ACPI_ADR_SPACE_SYSTEM_IO) {
-		pr_warning(FW_BUG APEI_PFX
-			   "Invalid address space type in GAR [0x%llx/%u/%u/%u/%u]\n",
-			   *paddr, bit_width, bit_offset, access_size_code,
-			   space_id);
+		pr_warn(FW_BUG APEI_PFX
+			"Invalid address space type in GAR [0x%llx/%u/%u/%u/%u]\n",
+			*paddr, bit_width, bit_offset, access_size_code,
+			space_id);
 		return -EINVAL;
 	}
 
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index e430cf4caec2..086373f8ccb1 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -172,7 +172,7 @@ static int einj_get_available_error_type(u32 *type)
 static int einj_timedout(u64 *t)
 {
 	if ((s64)*t < SPIN_UNIT) {
-		pr_warning(FW_WARN "Firmware does not respond in time\n");
+		pr_warn(FW_WARN "Firmware does not respond in time\n");
 		return 1;
 	}
 	*t -= SPIN_UNIT;
@@ -312,7 +312,7 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type,
 	}
 	rc = einj_check_trigger_header(trigger_tab);
 	if (rc) {
-		pr_warning(FW_BUG "Invalid trigger error action table.\n");
+		pr_warn(FW_BUG "Invalid trigger error action table.\n");
 		goto out_rel_header;
 	}
 
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index d0f3a46716e9..c740f0faad39 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -118,9 +118,8 @@ retry:
 	if (rc < 0)
 		goto out;
 	if (len > ERST_DBG_RECORD_LEN_MAX) {
-		pr_warning(ERST_DBG_PFX
-			   "Record (ID: 0x%llx) length is too long: %zd\n",
-			   id, len);
+		pr_warn(ERST_DBG_PFX
+			"Record (ID: 0x%llx) length is too long: %zd\n", id, len);
 		rc = -EIO;
 		goto out;
 	}
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 777f6f7122b4..103acbbfcf9a 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -235,10 +235,10 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
 		goto err_unmap_read_ack_addr;
 	error_block_length = generic->error_block_length;
 	if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
-		pr_warning(FW_WARN GHES_PFX
-			   "Error status block length is too long: %u for "
-			   "generic hardware error source: %d.\n",
-			   error_block_length, generic->header.source_id);
+		pr_warn(FW_WARN GHES_PFX
+			"Error status block length is too long: %u for "
+			"generic hardware error source: %d.\n",
+			error_block_length, generic->header.source_id);
 		error_block_length = GHES_ESTATUS_MAX_SIZE;
 	}
 	ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
@@ -748,8 +748,8 @@ static void ghes_add_timer(struct ghes *ghes)
 	unsigned long expire;
 
 	if (!g->notify.poll_interval) {
-		pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
-			   g->header.source_id);
+		pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
+			g->header.source_id);
 		return;
 	}
 	expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
@@ -1155,21 +1155,20 @@ static int ghes_probe(struct platform_device *ghes_dev)
 		}
 		break;
 	case ACPI_HEST_NOTIFY_LOCAL:
-		pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
-			   generic->header.source_id);
+		pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
+			generic->header.source_id);
 		goto err;
 	default:
-		pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
-			   generic->notify.type, generic->header.source_id);
+		pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
+			generic->notify.type, generic->header.source_id);
 		goto err;
 	}
 
 	rc = -EIO;
 	if (generic->error_block_length <
 	    sizeof(struct acpi_hest_generic_status)) {
-		pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
-			   generic->error_block_length,
-			   generic->header.source_id);
+		pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
+			generic->error_block_length, generic->header.source_id);
 		goto err;
 	}
 	ghes = ghes_new(generic);
@@ -1181,7 +1180,7 @@ static int ghes_probe(struct platform_device *ghes_dev)
 
 	switch (generic->notify.type) {
 	case ACPI_HEST_NOTIFY_POLLED:
-		timer_setup(&ghes->timer, ghes_poll_func, TIMER_DEFERRABLE);
+		timer_setup(&ghes->timer, ghes_poll_func, 0);
 		ghes_add_timer(ghes);
 		break;
 	case ACPI_HEST_NOTIFY_EXTERNAL:
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 267bdbf6a7bf..822402480f7d 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -92,15 +92,15 @@ int apei_hest_parse(apei_hest_func_t func, void *data)
 	for (i = 0; i < hest_tab->error_source_count; i++) {
 		len = hest_esrc_len(hest_hdr);
 		if (!len) {
-			pr_warning(FW_WARN HEST_PFX
-				   "Unknown or unused hardware error source "
-				   "type: %d for hardware error source: %d.\n",
-				   hest_hdr->type, hest_hdr->source_id);
+			pr_warn(FW_WARN HEST_PFX
+				"Unknown or unused hardware error source "
+				"type: %d for hardware error source: %d.\n",
+				hest_hdr->type, hest_hdr->source_id);
 			return -EINVAL;
 		}
 		if ((void *)hest_hdr + len >
 		    (void *)hest_tab + hest_tab->header.length) {
-			pr_warning(FW_BUG HEST_PFX
+			pr_warn(FW_BUG HEST_PFX
 		"Table contents overflow for hardware error source: %d.\n",
 				hest_hdr->source_id);
 			return -EINVAL;
@@ -164,8 +164,8 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
 		ghes_dev = ghes_arr->ghes_devs[i];
 		hdr = *(struct acpi_hest_header **)ghes_dev->dev.platform_data;
 		if (hdr->source_id == hest_hdr->source_id) {
-			pr_warning(FW_WARN HEST_PFX "Duplicated hardware error source ID: %d.\n",
-				   hdr->source_id);
+			pr_warn(FW_WARN HEST_PFX "Duplicated hardware error source ID: %d.\n",
+				hdr->source_id);
 			return -EIO;
 		}
 	}
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 5a7551d060f2..6078064684c6 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -298,6 +298,59 @@ out:
 	return status;
 }
 
+struct iort_workaround_oem_info {
+	char oem_id[ACPI_OEM_ID_SIZE + 1];
+	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
+	u32 oem_revision;
+};
+
+static bool apply_id_count_workaround;
+
+static struct iort_workaround_oem_info wa_info[] __initdata = {
+	{
+		.oem_id		= "HISI  ",
+		.oem_table_id	= "HIP07   ",
+		.oem_revision	= 0,
+	}, {
+		.oem_id		= "HISI  ",
+		.oem_table_id	= "HIP08   ",
+		.oem_revision	= 0,
+	}
+};
+
+static void __init
+iort_check_id_count_workaround(struct acpi_table_header *tbl)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(wa_info); i++) {
+		if (!memcmp(wa_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) &&
+		    !memcmp(wa_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
+		    wa_info[i].oem_revision == tbl->oem_revision) {
+			apply_id_count_workaround = true;
+			pr_warn(FW_BUG "ID count for ID mapping entry is wrong, applying workaround\n");
+			break;
+		}
+	}
+}
+
+static inline u32 iort_get_map_max(struct acpi_iort_id_mapping *map)
+{
+	u32 map_max = map->input_base + map->id_count;
+
+	/*
+	 * The IORT specification revision D (Section 3, table 4, page 9) says
+	 * Number of IDs = The number of IDs in the range minus one, but the
+	 * IORT code ignored the "minus one", and some firmware did that too,
+	 * so apply a workaround here to keep compatible with both the spec
+	 * compliant and non-spec compliant firmwares.
+	 */
+	if (apply_id_count_workaround)
+		map_max--;
+
+	return map_max;
+}
+
 static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
 		       u32 *rid_out)
 {
@@ -314,8 +367,7 @@ static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
 		return -ENXIO;
 	}
 
-	if (rid_in < map->input_base ||
-	    (rid_in >= map->input_base + map->id_count))
+	if (rid_in < map->input_base || rid_in > iort_get_map_max(map))
 		return -ENXIO;
 
 	*rid_out = map->output_base + (rid_in - map->input_base);
@@ -1057,8 +1109,8 @@ static int rc_dma_get_range(struct device *dev, u64 *size)
  */
 void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
 {
-	u64 mask, dmaaddr = 0, size = 0, offset = 0;
-	int ret, msb;
+	u64 end, mask, dmaaddr = 0, size = 0, offset = 0;
+	int ret;
 
 	/*
 	 * If @dev is expected to be DMA-capable then the bus code that created
@@ -1085,19 +1137,13 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
 	}
 
 	if (!ret) {
-		msb = fls64(dmaaddr + size - 1);
-		/*
-		 * Round-up to the power-of-two mask or set
-		 * the mask to the whole 64-bit address space
-		 * in case the DMA region covers the full
-		 * memory window.
-		 */
-		mask = msb == 64 ? U64_MAX : (1ULL << msb) - 1;
 		/*
-		 * Limit coherent and dma mask based on size
-		 * retrieved from firmware.
+		 * Limit coherent and dma mask based on size retrieved from
+		 * firmware.
 		 */
-		dev->bus_dma_mask = mask;
+		end = dmaaddr + size - 1;
+		mask = DMA_BIT_MASK(ilog2(end) + 1);
+		dev->bus_dma_limit = end;
 		dev->coherent_dma_mask = mask;
 		*dev->dma_mask = mask;
 	}
@@ -1637,5 +1683,6 @@ void __init acpi_iort_init(void)
 		return;
 	}
 
+	iort_check_id_count_workaround(iort_table);
 	iort_init_platform_devices();
 }
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 558fedf8a7a1..15cc7d5a6185 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -38,6 +38,8 @@
 #define PREFIX "ACPI: "
 
 #define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF
+#define ACPI_BATTERY_CAPACITY_VALID(capacity) \
+	((capacity) != 0 && (capacity) != ACPI_BATTERY_VALUE_UNKNOWN)
 
 #define ACPI_BATTERY_DEVICE_NAME	"Battery"
 
@@ -192,7 +194,8 @@ static int acpi_battery_is_charged(struct acpi_battery *battery)
 
 static bool acpi_battery_is_degraded(struct acpi_battery *battery)
 {
-	return battery->full_charge_capacity && battery->design_capacity &&
+	return ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity) &&
+		ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity) &&
 		battery->full_charge_capacity < battery->design_capacity;
 }
 
@@ -214,7 +217,7 @@ static int acpi_battery_get_property(struct power_supply *psy,
 				     enum power_supply_property psp,
 				     union power_supply_propval *val)
 {
-	int ret = 0;
+	int full_capacity = ACPI_BATTERY_VALUE_UNKNOWN, ret = 0;
 	struct acpi_battery *battery = to_acpi_battery(psy);
 
 	if (acpi_battery_present(battery)) {
@@ -263,14 +266,14 @@ static int acpi_battery_get_property(struct power_supply *psy,
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
 	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
-		if (battery->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+		if (!ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity))
 			ret = -ENODEV;
 		else
 			val->intval = battery->design_capacity * 1000;
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_FULL:
 	case POWER_SUPPLY_PROP_ENERGY_FULL:
-		if (battery->full_charge_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+		if (!ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity))
 			ret = -ENODEV;
 		else
 			val->intval = battery->full_charge_capacity * 1000;
@@ -283,11 +286,17 @@ static int acpi_battery_get_property(struct power_supply *psy,
 			val->intval = battery->capacity_now * 1000;
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY:
-		if (battery->capacity_now && battery->full_charge_capacity)
-			val->intval = battery->capacity_now * 100/
-					battery->full_charge_capacity;
+		if (ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity))
+			full_capacity = battery->full_charge_capacity;
+		else if (ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity))
+			full_capacity = battery->design_capacity;
+
+		if (battery->capacity_now == ACPI_BATTERY_VALUE_UNKNOWN ||
+		    full_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+			ret = -ENODEV;
 		else
-			val->intval = 0;
+			val->intval = battery->capacity_now * 100/
+					full_capacity;
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY_LEVEL:
 		if (battery->state & ACPI_BATTERY_STATE_CRITICAL)
@@ -333,6 +342,20 @@ static enum power_supply_property charge_battery_props[] = {
 	POWER_SUPPLY_PROP_SERIAL_NUMBER,
 };
 
+static enum power_supply_property charge_battery_full_cap_broken_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+	POWER_SUPPLY_PROP_SERIAL_NUMBER,
+};
+
 static enum power_supply_property energy_battery_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_PRESENT,
@@ -794,20 +817,34 @@ static void __exit battery_hook_exit(void)
 static int sysfs_add_battery(struct acpi_battery *battery)
 {
 	struct power_supply_config psy_cfg = { .drv_data = battery, };
+	bool full_cap_broken = false;
+
+	if (!ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity) &&
+	    !ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity))
+		full_cap_broken = true;
 
 	if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) {
-		battery->bat_desc.properties = charge_battery_props;
-		battery->bat_desc.num_properties =
-			ARRAY_SIZE(charge_battery_props);
-	} else if (battery->full_charge_capacity == 0) {
-		battery->bat_desc.properties =
-			energy_battery_full_cap_broken_props;
-		battery->bat_desc.num_properties =
-			ARRAY_SIZE(energy_battery_full_cap_broken_props);
+		if (full_cap_broken) {
+			battery->bat_desc.properties =
+			    charge_battery_full_cap_broken_props;
+			battery->bat_desc.num_properties =
+			    ARRAY_SIZE(charge_battery_full_cap_broken_props);
+		} else {
+			battery->bat_desc.properties = charge_battery_props;
+			battery->bat_desc.num_properties =
+			    ARRAY_SIZE(charge_battery_props);
+		}
 	} else {
-		battery->bat_desc.properties = energy_battery_props;
-		battery->bat_desc.num_properties =
-			ARRAY_SIZE(energy_battery_props);
+		if (full_cap_broken) {
+			battery->bat_desc.properties =
+			    energy_battery_full_cap_broken_props;
+			battery->bat_desc.num_properties =
+			    ARRAY_SIZE(energy_battery_full_cap_broken_props);
+		} else {
+			battery->bat_desc.properties = energy_battery_props;
+			battery->bat_desc.num_properties =
+			    ARRAY_SIZE(energy_battery_props);
+		}
 	}
 
 	battery->bat_desc.name = acpi_device_bid(battery->device);
@@ -1176,7 +1213,7 @@ static const struct file_operations acpi_battery_alarm_fops = {
 
 static int acpi_battery_add_fs(struct acpi_device *device)
 {
-	pr_warning(PREFIX "Deprecated procfs I/F for battery is loaded, please retry with CONFIG_ACPI_PROCFS_POWER cleared\n");
+	pr_warn(PREFIX "Deprecated procfs I/F for battery is loaded, please retry with CONFIG_ACPI_PROCFS_POWER cleared\n");
 	if (!acpi_device_dir(device)) {
 		acpi_device_dir(device) = proc_mkdir(acpi_device_bid(device),
 						     acpi_battery_dir);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 48bc96d45bab..54002670cb7a 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -153,7 +153,7 @@ int acpi_bus_get_private_data(acpi_handle handle, void **data)
 {
 	acpi_status status;
 
-	if (!*data)
+	if (!data)
 		return -EINVAL;
 
 	status = acpi_get_data(handle, acpi_bus_private_data_handler, data);
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 4a2cde2c536a..f6925f16c4a2 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -44,9 +44,19 @@
 #define ACPI_BUTTON_DEVICE_NAME_LID	"Lid Switch"
 #define ACPI_BUTTON_TYPE_LID		0x05
 
-#define ACPI_BUTTON_LID_INIT_IGNORE	0x00
-#define ACPI_BUTTON_LID_INIT_OPEN	0x01
-#define ACPI_BUTTON_LID_INIT_METHOD	0x02
+enum {
+	ACPI_BUTTON_LID_INIT_IGNORE,
+	ACPI_BUTTON_LID_INIT_OPEN,
+	ACPI_BUTTON_LID_INIT_METHOD,
+	ACPI_BUTTON_LID_INIT_DISABLED,
+};
+
+static const char * const lid_init_state_str[] = {
+	[ACPI_BUTTON_LID_INIT_IGNORE]		= "ignore",
+	[ACPI_BUTTON_LID_INIT_OPEN]		= "open",
+	[ACPI_BUTTON_LID_INIT_METHOD]		= "method",
+	[ACPI_BUTTON_LID_INIT_DISABLED]		= "disabled",
+};
 
 #define _COMPONENT		ACPI_BUTTON_COMPONENT
 ACPI_MODULE_NAME("button");
@@ -65,18 +75,63 @@ static const struct acpi_device_id button_device_ids[] = {
 };
 MODULE_DEVICE_TABLE(acpi, button_device_ids);
 
-/*
- * Some devices which don't even have a lid in anyway have a broken _LID
- * method (e.g. pointing to a floating gpio pin) causing spurious LID events.
- */
-static const struct dmi_system_id lid_blacklst[] = {
+/* Please keep this list sorted alphabetically by vendor and model */
+static const struct dmi_system_id dmi_lid_quirks[] = {
+	{
+		/*
+		 * Acer Switch 10 SW5-012. _LID method messes with home and
+		 * power button GPIO IRQ settings causing an interrupt storm on
+		 * both GPIOs. This is unfixable without a DSDT override, so we
+		 * have to disable the lid-switch functionality altogether :|
+		 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"),
+		},
+		.driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_DISABLED,
+	},
+	{
+		/*
+		 * Asus T200TA, _LID keeps reporting closed after every second
+		 * openening of the lid. Causing immediate re-suspend after
+		 * opening every other open. Using LID_INIT_OPEN fixes this.
+		 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "T200TA"),
+		},
+		.driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN,
+	},
 	{
-		/* GP-electronic T701 */
+		/* GP-electronic T701, _LID method points to a floating GPIO */
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "T701"),
 			DMI_MATCH(DMI_BIOS_VERSION, "BYT70A.YNCHENG.WIN.007"),
 		},
+		.driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_DISABLED,
+	},
+	{
+		/*
+		 * Medion Akoya E2215T, notification of the LID device only
+		 * happens on close, not on open and _LID always returns closed.
+		 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "MEDION"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "E2215T MD60198"),
+		},
+		.driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN,
+	},
+	{
+		/*
+		 * Razer Blade Stealth 13 late 2019, notification of the LID device
+		 * only happens on close, not on open and _LID always returns closed.
+		 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Razer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Razer Blade Stealth 13 Late 2019"),
+		},
+		.driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN,
 	},
 	{}
 };
@@ -116,9 +171,8 @@ struct acpi_button {
 	bool suspended;
 };
 
-static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier);
 static struct acpi_device *lid_device;
-static u8 lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
+static long lid_init_state = -1;
 
 static unsigned long lid_report_interval __read_mostly = 500;
 module_param(lid_report_interval, ulong, 0644);
@@ -146,7 +200,6 @@ static int acpi_lid_evaluate_state(struct acpi_device *device)
 static int acpi_lid_notify_state(struct acpi_device *device, int state)
 {
 	struct acpi_button *button = acpi_driver_data(device);
-	int ret;
 	ktime_t next_report;
 	bool do_update;
 
@@ -223,18 +276,7 @@ static int acpi_lid_notify_state(struct acpi_device *device, int state)
 		button->last_time = ktime_get();
 	}
 
-	ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
-	if (ret == NOTIFY_DONE)
-		ret = blocking_notifier_call_chain(&acpi_lid_notifier, state,
-						   device);
-	if (ret == NOTIFY_DONE || ret == NOTIFY_OK) {
-		/*
-		 * It is also regarded as success if the notifier_chain
-		 * returns NOTIFY_OK or NOTIFY_DONE.
-		 */
-		ret = 0;
-	}
-	return ret;
+	return 0;
 }
 
 static int __maybe_unused acpi_button_state_seq_show(struct seq_file *seq,
@@ -331,18 +373,6 @@ static int acpi_button_remove_fs(struct acpi_device *device)
 /* --------------------------------------------------------------------------
                                 Driver Interface
    -------------------------------------------------------------------------- */
-int acpi_lid_notifier_register(struct notifier_block *nb)
-{
-	return blocking_notifier_chain_register(&acpi_lid_notifier, nb);
-}
-EXPORT_SYMBOL(acpi_lid_notifier_register);
-
-int acpi_lid_notifier_unregister(struct notifier_block *nb)
-{
-	return blocking_notifier_chain_unregister(&acpi_lid_notifier, nb);
-}
-EXPORT_SYMBOL(acpi_lid_notifier_unregister);
-
 int acpi_lid_open(void)
 {
 	if (!lid_device)
@@ -472,7 +502,8 @@ static int acpi_button_add(struct acpi_device *device)
 	char *name, *class;
 	int error;
 
-	if (!strcmp(hid, ACPI_BUTTON_HID_LID) && dmi_check_system(lid_blacklst))
+	if (!strcmp(hid, ACPI_BUTTON_HID_LID) &&
+	     lid_init_state == ACPI_BUTTON_LID_INIT_DISABLED)
 		return -ENODEV;
 
 	button = kzalloc(sizeof(struct acpi_button), GFP_KERNEL);
@@ -578,36 +609,30 @@ static int acpi_button_remove(struct acpi_device *device)
 static int param_set_lid_init_state(const char *val,
 				    const struct kernel_param *kp)
 {
-	int result = 0;
-
-	if (!strncmp(val, "open", sizeof("open") - 1)) {
-		lid_init_state = ACPI_BUTTON_LID_INIT_OPEN;
-		pr_info("Notify initial lid state as open\n");
-	} else if (!strncmp(val, "method", sizeof("method") - 1)) {
-		lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
-		pr_info("Notify initial lid state with _LID return value\n");
-	} else if (!strncmp(val, "ignore", sizeof("ignore") - 1)) {
-		lid_init_state = ACPI_BUTTON_LID_INIT_IGNORE;
-		pr_info("Do not notify initial lid state\n");
-	} else
-		result = -EINVAL;
-	return result;
+	int i;
+
+	i = sysfs_match_string(lid_init_state_str, val);
+	if (i < 0)
+		return i;
+
+	lid_init_state = i;
+	pr_info("Initial lid state set to '%s'\n", lid_init_state_str[i]);
+	return 0;
 }
 
-static int param_get_lid_init_state(char *buffer,
-				    const struct kernel_param *kp)
+static int param_get_lid_init_state(char *buf, const struct kernel_param *kp)
 {
-	switch (lid_init_state) {
-	case ACPI_BUTTON_LID_INIT_OPEN:
-		return sprintf(buffer, "open");
-	case ACPI_BUTTON_LID_INIT_METHOD:
-		return sprintf(buffer, "method");
-	case ACPI_BUTTON_LID_INIT_IGNORE:
-		return sprintf(buffer, "ignore");
-	default:
-		return sprintf(buffer, "invalid");
-	}
-	return 0;
+	int i, c = 0;
+
+	for (i = 0; i < ARRAY_SIZE(lid_init_state_str); i++)
+		if (i == lid_init_state)
+			c += sprintf(buf + c, "[%s] ", lid_init_state_str[i]);
+		else
+			c += sprintf(buf + c, "%s ", lid_init_state_str[i]);
+
+	buf[c - 1] = '\n'; /* Replace the final space with a newline */
+
+	return c;
 }
 
 module_param_call(lid_init_state,
@@ -617,6 +642,16 @@ MODULE_PARM_DESC(lid_init_state, "Behavior for reporting LID initial state");
 
 static int acpi_button_register_driver(struct acpi_driver *driver)
 {
+	const struct dmi_system_id *dmi_id;
+
+	if (lid_init_state == -1) {
+		dmi_id = dmi_first_match(dmi_lid_quirks);
+		if (dmi_id)
+			lid_init_state = (long)dmi_id->driver_data;
+		else
+			lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
+	}
+
 	/*
 	 * Modules such as nouveau.ko and i915.ko have a link time dependency
 	 * on acpi_lid_open(), and would therefore not be loadable on ACPI
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 08bb9f2f2d23..b64c62bfcea5 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1314,9 +1314,20 @@ static void acpi_dev_pm_detach(struct device *dev, bool power_off)
  */
 int acpi_dev_pm_attach(struct device *dev, bool power_on)
 {
+	/*
+	 * Skip devices whose ACPI companions match the device IDs below,
+	 * because they require special power management handling incompatible
+	 * with the generic ACPI PM domain.
+	 */
+	static const struct acpi_device_id special_pm_ids[] = {
+		{"PNP0C0B", }, /* Generic ACPI fan */
+		{"INT1044", }, /* Fan for Tiger Lake generation */
+		{"INT3404", }, /* Fan */
+		{}
+	};
 	struct acpi_device *adev = ACPI_COMPANION(dev);
 
-	if (!adev)
+	if (!adev || !acpi_match_device_ids(adev, special_pm_ids))
 		return 0;
 
 	/*
diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c
index eb58fc475a03..387f27ef3368 100644
--- a/drivers/acpi/dptf/dptf_power.c
+++ b/drivers/acpi/dptf/dptf_power.c
@@ -97,6 +97,7 @@ static int dptf_power_remove(struct platform_device *pdev)
 }
 
 static const struct acpi_device_id int3407_device_ids[] = {
+	{"INT1047", 0},
 	{"INT3407", 0},
 	{"", 0},
 };
diff --git a/drivers/acpi/dptf/int340x_thermal.c b/drivers/acpi/dptf/int340x_thermal.c
index 5c7a90186e3c..1ec7b6900662 100644
--- a/drivers/acpi/dptf/int340x_thermal.c
+++ b/drivers/acpi/dptf/int340x_thermal.c
@@ -13,6 +13,10 @@
 
 #define INT3401_DEVICE 0X01
 static const struct acpi_device_id int340x_thermal_device_ids[] = {
+	{"INT1040"},
+	{"INT1043"},
+	{"INT1044"},
+	{"INT1047"},
 	{"INT3400"},
 	{"INT3401", INT3401_DEVICE},
 	{"INT3402"},
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index da1e5c5ce150..08bc9751fe66 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -95,12 +95,12 @@ enum {
 	EC_FLAGS_QUERY_ENABLED,		/* Query is enabled */
 	EC_FLAGS_QUERY_PENDING,		/* Query is pending */
 	EC_FLAGS_QUERY_GUARDING,	/* Guard for SCI_EVT check */
-	EC_FLAGS_GPE_HANDLER_INSTALLED,	/* GPE handler installed */
+	EC_FLAGS_EVENT_HANDLER_INSTALLED,	/* Event handler installed */
 	EC_FLAGS_EC_HANDLER_INSTALLED,	/* OpReg handler installed */
-	EC_FLAGS_EVT_HANDLER_INSTALLED, /* _Qxx handlers installed */
+	EC_FLAGS_QUERY_METHODS_INSTALLED, /* _Qxx handlers installed */
 	EC_FLAGS_STARTED,		/* Driver is started */
 	EC_FLAGS_STOPPED,		/* Driver is stopped */
-	EC_FLAGS_GPE_MASKED,		/* GPE masked */
+	EC_FLAGS_EVENTS_MASKED,		/* Events masked */
 };
 
 #define ACPI_EC_COMMAND_POLL		0x01 /* Available for command byte */
@@ -397,8 +397,8 @@ static inline void acpi_ec_clear_gpe(struct acpi_ec *ec)
 static void acpi_ec_submit_request(struct acpi_ec *ec)
 {
 	ec->reference_count++;
-	if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags) &&
-	    ec->reference_count == 1)
+	if (test_bit(EC_FLAGS_EVENT_HANDLER_INSTALLED, &ec->flags) &&
+	    ec->gpe >= 0 && ec->reference_count == 1)
 		acpi_ec_enable_gpe(ec, true);
 }
 
@@ -407,28 +407,36 @@ static void acpi_ec_complete_request(struct acpi_ec *ec)
 	bool flushed = false;
 
 	ec->reference_count--;
-	if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags) &&
-	    ec->reference_count == 0)
+	if (test_bit(EC_FLAGS_EVENT_HANDLER_INSTALLED, &ec->flags) &&
+	    ec->gpe >= 0 && ec->reference_count == 0)
 		acpi_ec_disable_gpe(ec, true);
 	flushed = acpi_ec_flushed(ec);
 	if (flushed)
 		wake_up(&ec->wait);
 }
 
-static void acpi_ec_mask_gpe(struct acpi_ec *ec)
+static void acpi_ec_mask_events(struct acpi_ec *ec)
 {
-	if (!test_bit(EC_FLAGS_GPE_MASKED, &ec->flags)) {
-		acpi_ec_disable_gpe(ec, false);
+	if (!test_bit(EC_FLAGS_EVENTS_MASKED, &ec->flags)) {
+		if (ec->gpe >= 0)
+			acpi_ec_disable_gpe(ec, false);
+		else
+			disable_irq_nosync(ec->irq);
+
 		ec_dbg_drv("Polling enabled");
-		set_bit(EC_FLAGS_GPE_MASKED, &ec->flags);
+		set_bit(EC_FLAGS_EVENTS_MASKED, &ec->flags);
 	}
 }
 
-static void acpi_ec_unmask_gpe(struct acpi_ec *ec)
+static void acpi_ec_unmask_events(struct acpi_ec *ec)
 {
-	if (test_bit(EC_FLAGS_GPE_MASKED, &ec->flags)) {
-		clear_bit(EC_FLAGS_GPE_MASKED, &ec->flags);
-		acpi_ec_enable_gpe(ec, false);
+	if (test_bit(EC_FLAGS_EVENTS_MASKED, &ec->flags)) {
+		clear_bit(EC_FLAGS_EVENTS_MASKED, &ec->flags);
+		if (ec->gpe >= 0)
+			acpi_ec_enable_gpe(ec, false);
+		else
+			enable_irq(ec->irq);
+
 		ec_dbg_drv("Polling disabled");
 	}
 }
@@ -454,7 +462,7 @@ static bool acpi_ec_submit_flushable_request(struct acpi_ec *ec)
 
 static void acpi_ec_submit_query(struct acpi_ec *ec)
 {
-	acpi_ec_mask_gpe(ec);
+	acpi_ec_mask_events(ec);
 	if (!acpi_ec_event_enabled(ec))
 		return;
 	if (!test_and_set_bit(EC_FLAGS_QUERY_PENDING, &ec->flags)) {
@@ -470,7 +478,7 @@ static void acpi_ec_complete_query(struct acpi_ec *ec)
 	if (test_and_clear_bit(EC_FLAGS_QUERY_PENDING, &ec->flags))
 		ec_dbg_evt("Command(%s) unblocked",
 			   acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
-	acpi_ec_unmask_gpe(ec);
+	acpi_ec_unmask_events(ec);
 }
 
 static inline void __acpi_ec_enable_event(struct acpi_ec *ec)
@@ -525,26 +533,10 @@ static void acpi_ec_enable_event(struct acpi_ec *ec)
 }
 
 #ifdef CONFIG_PM_SLEEP
-static bool acpi_ec_query_flushed(struct acpi_ec *ec)
+static void __acpi_ec_flush_work(void)
 {
-	bool flushed;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ec->lock, flags);
-	flushed = !ec->nr_pending_queries;
-	spin_unlock_irqrestore(&ec->lock, flags);
-	return flushed;
-}
-
-static void __acpi_ec_flush_event(struct acpi_ec *ec)
-{
-	/*
-	 * When ec_freeze_events is true, we need to flush events in
-	 * the proper position before entering the noirq stage.
-	 */
-	wait_event(ec->wait, acpi_ec_query_flushed(ec));
-	if (ec_query_wq)
-		flush_workqueue(ec_query_wq);
+	flush_scheduled_work(); /* flush ec->work */
+	flush_workqueue(ec_query_wq); /* flush queries */
 }
 
 static void acpi_ec_disable_event(struct acpi_ec *ec)
@@ -554,15 +546,21 @@ static void acpi_ec_disable_event(struct acpi_ec *ec)
 	spin_lock_irqsave(&ec->lock, flags);
 	__acpi_ec_disable_event(ec);
 	spin_unlock_irqrestore(&ec->lock, flags);
-	__acpi_ec_flush_event(ec);
+
+	/*
+	 * When ec_freeze_events is true, we need to flush events in
+	 * the proper position before entering the noirq stage.
+	 */
+	__acpi_ec_flush_work();
 }
 
 void acpi_ec_flush_work(void)
 {
-	if (first_ec)
-		__acpi_ec_flush_event(first_ec);
+	/* Without ec_query_wq there is nothing to flush. */
+	if (!ec_query_wq)
+		return;
 
-	flush_scheduled_work();
+	__acpi_ec_flush_work();
 }
 #endif /* CONFIG_PM_SLEEP */
 
@@ -648,7 +646,9 @@ static void advance_transaction(struct acpi_ec *ec)
 	 * ensure a hardware STS 0->1 change after this clearing can always
 	 * trigger a GPE interrupt.
 	 */
-	acpi_ec_clear_gpe(ec);
+	if (ec->gpe >= 0)
+		acpi_ec_clear_gpe(ec);
+
 	status = acpi_ec_read_status(ec);
 	t = ec->curr;
 	/*
@@ -717,7 +717,7 @@ err:
 				++t->irq_count;
 			/* Allow triggering on 0 threshold */
 			if (t->irq_count == ec_storm_threshold)
-				acpi_ec_mask_gpe(ec);
+				acpi_ec_mask_events(ec);
 		}
 	}
 out:
@@ -815,7 +815,7 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec,
 
 	spin_lock_irqsave(&ec->lock, tmp);
 	if (t->irq_count == ec_storm_threshold)
-		acpi_ec_unmask_gpe(ec);
+		acpi_ec_unmask_events(ec);
 	ec_dbg_req("Command(%s) stopped", acpi_ec_cmd_string(t->command));
 	ec->curr = NULL;
 	/* Disable GPE for command processing (IBF=0/OBF=1) */
@@ -1053,28 +1053,20 @@ void acpi_ec_unblock_transactions(void)
                                 Event Management
    -------------------------------------------------------------------------- */
 static struct acpi_ec_query_handler *
-acpi_ec_get_query_handler(struct acpi_ec_query_handler *handler)
-{
-	if (handler)
-		kref_get(&handler->kref);
-	return handler;
-}
-
-static struct acpi_ec_query_handler *
 acpi_ec_get_query_handler_by_value(struct acpi_ec *ec, u8 value)
 {
 	struct acpi_ec_query_handler *handler;
-	bool found = false;
 
 	mutex_lock(&ec->mutex);
 	list_for_each_entry(handler, &ec->list, node) {
 		if (value == handler->query_bit) {
-			found = true;
-			break;
+			kref_get(&handler->kref);
+			mutex_unlock(&ec->mutex);
+			return handler;
 		}
 	}
 	mutex_unlock(&ec->mutex);
-	return found ? acpi_ec_get_query_handler(handler) : NULL;
+	return NULL;
 }
 
 static void acpi_ec_query_handler_release(struct kref *kref)
@@ -1275,18 +1267,28 @@ static void acpi_ec_event_handler(struct work_struct *work)
 	acpi_ec_check_event(ec);
 }
 
-static u32 acpi_ec_gpe_handler(acpi_handle gpe_device,
-	u32 gpe_number, void *data)
+static void acpi_ec_handle_interrupt(struct acpi_ec *ec)
 {
 	unsigned long flags;
-	struct acpi_ec *ec = data;
 
 	spin_lock_irqsave(&ec->lock, flags);
 	advance_transaction(ec);
 	spin_unlock_irqrestore(&ec->lock, flags);
+}
+
+static u32 acpi_ec_gpe_handler(acpi_handle gpe_device,
+			       u32 gpe_number, void *data)
+{
+	acpi_ec_handle_interrupt(data);
 	return ACPI_INTERRUPT_HANDLED;
 }
 
+static irqreturn_t acpi_ec_irq_handler(int irq, void *data)
+{
+	acpi_ec_handle_interrupt(data);
+	return IRQ_HANDLED;
+}
+
 /* --------------------------------------------------------------------------
  *                           Address Space Management
  * -------------------------------------------------------------------------- */
@@ -1359,6 +1361,8 @@ static struct acpi_ec *acpi_ec_alloc(void)
 	ec->timestamp = jiffies;
 	ec->busy_polling = true;
 	ec->polling_guard = 0;
+	ec->gpe = -1;
+	ec->irq = -1;
 	return ec;
 }
 
@@ -1406,9 +1410,13 @@ ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval)
 		/* Get GPE bit assignment (EC events). */
 		/* TODO: Add support for _GPE returning a package */
 		status = acpi_evaluate_integer(handle, "_GPE", NULL, &tmp);
-		if (ACPI_FAILURE(status))
-			return status;
-		ec->gpe = tmp;
+		if (ACPI_SUCCESS(status))
+			ec->gpe = tmp;
+
+		/*
+		 * Errors are non-fatal, allowing for ACPI Reduced Hardware
+		 * platforms which use GpioInt instead of GPE.
+		 */
 	}
 	/* Use the global lock for all EC transactions? */
 	tmp = 0;
@@ -1418,12 +1426,57 @@ ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval)
 	return AE_CTRL_TERMINATE;
 }
 
+static void install_gpe_event_handler(struct acpi_ec *ec)
+{
+	acpi_status status =
+		acpi_install_gpe_raw_handler(NULL, ec->gpe,
+					     ACPI_GPE_EDGE_TRIGGERED,
+					     &acpi_ec_gpe_handler,
+					     ec);
+	if (ACPI_SUCCESS(status)) {
+		/* This is not fatal as we can poll EC events */
+		set_bit(EC_FLAGS_EVENT_HANDLER_INSTALLED, &ec->flags);
+		acpi_ec_leave_noirq(ec);
+		if (test_bit(EC_FLAGS_STARTED, &ec->flags) &&
+		    ec->reference_count >= 1)
+			acpi_ec_enable_gpe(ec, true);
+	}
+}
+
+/* ACPI reduced hardware platforms use a GpioInt specified in _CRS. */
+static int install_gpio_irq_event_handler(struct acpi_ec *ec,
+					  struct acpi_device *device)
+{
+	int irq = acpi_dev_gpio_irq_get(device, 0);
+	int ret;
+
+	if (irq < 0)
+		return irq;
+
+	ret = request_irq(irq, acpi_ec_irq_handler, IRQF_SHARED,
+			  "ACPI EC", ec);
+
+	/*
+	 * Unlike the GPE case, we treat errors here as fatal, we'll only
+	 * implement GPIO polling if we find a case that needs it.
+	 */
+	if (ret < 0)
+		return ret;
+
+	ec->irq = irq;
+	set_bit(EC_FLAGS_EVENT_HANDLER_INSTALLED, &ec->flags);
+	acpi_ec_leave_noirq(ec);
+
+	return 0;
+}
+
 /*
  * Note: This function returns an error code only when the address space
  *       handler is not installed, which means "not able to handle
  *       transactions".
  */
-static int ec_install_handlers(struct acpi_ec *ec, bool handle_events)
+static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device,
+			       bool handle_events)
 {
 	acpi_status status;
 
@@ -1456,24 +1509,23 @@ static int ec_install_handlers(struct acpi_ec *ec, bool handle_events)
 	if (!handle_events)
 		return 0;
 
-	if (!test_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags)) {
+	if (!test_bit(EC_FLAGS_QUERY_METHODS_INSTALLED, &ec->flags)) {
 		/* Find and register all query methods */
 		acpi_walk_namespace(ACPI_TYPE_METHOD, ec->handle, 1,
 				    acpi_ec_register_query_methods,
 				    NULL, ec, NULL);
-		set_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags);
+		set_bit(EC_FLAGS_QUERY_METHODS_INSTALLED, &ec->flags);
 	}
-	if (!test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags)) {
-		status = acpi_install_gpe_raw_handler(NULL, ec->gpe,
-					  ACPI_GPE_EDGE_TRIGGERED,
-					  &acpi_ec_gpe_handler, ec);
-		/* This is not fatal as we can poll EC events */
-		if (ACPI_SUCCESS(status)) {
-			set_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags);
-			acpi_ec_leave_noirq(ec);
-			if (test_bit(EC_FLAGS_STARTED, &ec->flags) &&
-			    ec->reference_count >= 1)
-				acpi_ec_enable_gpe(ec, true);
+	if (!test_bit(EC_FLAGS_EVENT_HANDLER_INSTALLED, &ec->flags)) {
+		if (ec->gpe >= 0) {
+			install_gpe_event_handler(ec);
+		} else if (device) {
+			int ret = install_gpio_irq_event_handler(ec, device);
+
+			if (ret)
+				return ret;
+		} else { /* No GPE and no GpioInt? */
+			return -ENODEV;
 		}
 	}
 	/* EC is fully operational, allow queries */
@@ -1504,23 +1556,29 @@ static void ec_remove_handlers(struct acpi_ec *ec)
 	 */
 	acpi_ec_stop(ec, false);
 
-	if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags)) {
-		if (ACPI_FAILURE(acpi_remove_gpe_handler(NULL, ec->gpe,
-					&acpi_ec_gpe_handler)))
+	if (test_bit(EC_FLAGS_EVENT_HANDLER_INSTALLED, &ec->flags)) {
+		if (ec->gpe >= 0 &&
+		    ACPI_FAILURE(acpi_remove_gpe_handler(NULL, ec->gpe,
+				 &acpi_ec_gpe_handler)))
 			pr_err("failed to remove gpe handler\n");
-		clear_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags);
+
+		if (ec->irq >= 0)
+			free_irq(ec->irq, ec);
+
+		clear_bit(EC_FLAGS_EVENT_HANDLER_INSTALLED, &ec->flags);
 	}
-	if (test_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags)) {
+	if (test_bit(EC_FLAGS_QUERY_METHODS_INSTALLED, &ec->flags)) {
 		acpi_ec_remove_query_handlers(ec, true, 0);
-		clear_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags);
+		clear_bit(EC_FLAGS_QUERY_METHODS_INSTALLED, &ec->flags);
 	}
 }
 
-static int acpi_ec_setup(struct acpi_ec *ec, bool handle_events)
+static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device,
+			 bool handle_events)
 {
 	int ret;
 
-	ret = ec_install_handlers(ec, handle_events);
+	ret = ec_install_handlers(ec, device, handle_events);
 	if (ret)
 		return ret;
 
@@ -1531,8 +1589,8 @@ static int acpi_ec_setup(struct acpi_ec *ec, bool handle_events)
 	}
 
 	acpi_handle_info(ec->handle,
-			 "GPE=0x%x, EC_CMD/EC_SC=0x%lx, EC_DATA=0x%lx\n",
-			 ec->gpe, ec->command_addr, ec->data_addr);
+			 "GPE=0x%x, IRQ=%d, EC_CMD/EC_SC=0x%lx, EC_DATA=0x%lx\n",
+			 ec->gpe, ec->irq, ec->command_addr, ec->data_addr);
 	return ret;
 }
 
@@ -1596,7 +1654,7 @@ static int acpi_ec_add(struct acpi_device *device)
 		}
 	}
 
-	ret = acpi_ec_setup(ec, true);
+	ret = acpi_ec_setup(ec, device, true);
 	if (ret)
 		goto err_query;
 
@@ -1716,7 +1774,7 @@ void __init acpi_ec_dsdt_probe(void)
 	 * At this point, the GPE is not fully initialized, so do not to
 	 * handle the events.
 	 */
-	ret = acpi_ec_setup(ec, false);
+	ret = acpi_ec_setup(ec, NULL, false);
 	if (ret) {
 		acpi_ec_free(ec);
 		return;
@@ -1889,14 +1947,21 @@ void __init acpi_ec_ecdt_probe(void)
 		ec->command_addr = ecdt_ptr->control.address;
 		ec->data_addr = ecdt_ptr->data.address;
 	}
-	ec->gpe = ecdt_ptr->gpe;
+
+	/*
+	 * Ignore the GPE value on Reduced Hardware platforms.
+	 * Some products have this set to an erroneous value.
+	 */
+	if (!acpi_gbl_reduced_hardware)
+		ec->gpe = ecdt_ptr->gpe;
+
 	ec->handle = ACPI_ROOT_OBJECT;
 
 	/*
 	 * At this point, the namespace is not initialized, so do not find
 	 * the namespace objects, or handle the events.
 	 */
-	ret = acpi_ec_setup(ec, false);
+	ret = acpi_ec_setup(ec, NULL, false);
 	if (ret) {
 		acpi_ec_free(ec);
 		return;
@@ -1928,7 +1993,7 @@ static int acpi_ec_suspend_noirq(struct device *dev)
 	 * masked at the low level without side effects.
 	 */
 	if (ec_no_wakeup && test_bit(EC_FLAGS_STARTED, &ec->flags) &&
-	    ec->reference_count >= 1)
+	    ec->gpe >= 0 && ec->reference_count >= 1)
 		acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_DISABLE);
 
 	acpi_ec_enter_noirq(ec);
@@ -1943,7 +2008,7 @@ static int acpi_ec_resume_noirq(struct device *dev)
 	acpi_ec_leave_noirq(ec);
 
 	if (ec_no_wakeup && test_bit(EC_FLAGS_STARTED, &ec->flags) &&
-	    ec->reference_count >= 1)
+	    ec->gpe >= 0 && ec->reference_count >= 1)
 		acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_ENABLE);
 
 	return 0;
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 816b0803f7fb..aaf4e8f348cf 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -25,6 +25,7 @@ static int acpi_fan_remove(struct platform_device *pdev);
 
 static const struct acpi_device_id fan_device_ids[] = {
 	{"PNP0C0B", 0},
+	{"INT1044", 0},
 	{"INT3404", 0},
 	{"", 0},
 };
@@ -44,12 +45,16 @@ static const struct dev_pm_ops acpi_fan_pm = {
 #define FAN_PM_OPS_PTR NULL
 #endif
 
+#define ACPI_FPS_NAME_LEN	20
+
 struct acpi_fan_fps {
 	u64 control;
 	u64 trip_point;
 	u64 speed;
 	u64 noise_level;
 	u64 power;
+	char name[ACPI_FPS_NAME_LEN];
+	struct device_attribute dev_attr;
 };
 
 struct acpi_fan_fif {
@@ -265,6 +270,39 @@ static int acpi_fan_speed_cmp(const void *a, const void *b)
 	return fps1->speed - fps2->speed;
 }
 
+static ssize_t show_state(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct acpi_fan_fps *fps = container_of(attr, struct acpi_fan_fps, dev_attr);
+	int count;
+
+	if (fps->control == 0xFFFFFFFF || fps->control > 100)
+		count = snprintf(buf, PAGE_SIZE, "not-defined:");
+	else
+		count = snprintf(buf, PAGE_SIZE, "%lld:", fps->control);
+
+	if (fps->trip_point == 0xFFFFFFFF || fps->trip_point > 9)
+		count += snprintf(&buf[count], PAGE_SIZE, "not-defined:");
+	else
+		count += snprintf(&buf[count], PAGE_SIZE, "%lld:", fps->trip_point);
+
+	if (fps->speed == 0xFFFFFFFF)
+		count += snprintf(&buf[count], PAGE_SIZE, "not-defined:");
+	else
+		count += snprintf(&buf[count], PAGE_SIZE, "%lld:", fps->speed);
+
+	if (fps->noise_level == 0xFFFFFFFF)
+		count += snprintf(&buf[count], PAGE_SIZE, "not-defined:");
+	else
+		count += snprintf(&buf[count], PAGE_SIZE, "%lld:", fps->noise_level * 100);
+
+	if (fps->power == 0xFFFFFFFF)
+		count += snprintf(&buf[count], PAGE_SIZE, "not-defined\n");
+	else
+		count += snprintf(&buf[count], PAGE_SIZE, "%lld\n", fps->power);
+
+	return count;
+}
+
 static int acpi_fan_get_fps(struct acpi_device *device)
 {
 	struct acpi_fan *fan = acpi_driver_data(device);
@@ -295,12 +333,13 @@ static int acpi_fan_get_fps(struct acpi_device *device)
 	}
 	for (i = 0; i < fan->fps_count; i++) {
 		struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" };
-		struct acpi_buffer fps = { sizeof(fan->fps[i]), &fan->fps[i] };
+		struct acpi_buffer fps = { offsetof(struct acpi_fan_fps, name),
+						&fan->fps[i] };
 		status = acpi_extract_package(&obj->package.elements[i + 1],
 					      &format, &fps);
 		if (ACPI_FAILURE(status)) {
 			dev_err(&device->dev, "Invalid _FPS element\n");
-			break;
+			goto err;
 		}
 	}
 
@@ -308,6 +347,24 @@ static int acpi_fan_get_fps(struct acpi_device *device)
 	sort(fan->fps, fan->fps_count, sizeof(*fan->fps),
 	     acpi_fan_speed_cmp, NULL);
 
+	for (i = 0; i < fan->fps_count; ++i) {
+		struct acpi_fan_fps *fps = &fan->fps[i];
+
+		snprintf(fps->name, ACPI_FPS_NAME_LEN, "state%d", i);
+		fps->dev_attr.show = show_state;
+		fps->dev_attr.store = NULL;
+		fps->dev_attr.attr.name = fps->name;
+		fps->dev_attr.attr.mode = 0444;
+		status = sysfs_create_file(&device->dev.kobj, &fps->dev_attr.attr);
+		if (status) {
+			int j;
+
+			for (j = 0; j < i; ++j)
+				sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr);
+			break;
+		}
+	}
+
 err:
 	kfree(obj);
 	return status;
@@ -330,14 +387,20 @@ static int acpi_fan_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, fan);
 
 	if (acpi_fan_is_acpi4(device)) {
-		if (acpi_fan_get_fif(device) || acpi_fan_get_fps(device))
-			goto end;
+		result = acpi_fan_get_fif(device);
+		if (result)
+			return result;
+
+		result = acpi_fan_get_fps(device);
+		if (result)
+			return result;
+
 		fan->acpi4 = true;
 	} else {
 		result = acpi_device_update_power(device, NULL);
 		if (result) {
 			dev_err(&device->dev, "Failed to set initial power state\n");
-			goto end;
+			goto err_end;
 		}
 	}
 
@@ -350,7 +413,7 @@ static int acpi_fan_probe(struct platform_device *pdev)
 						&fan_cooling_ops);
 	if (IS_ERR(cdev)) {
 		result = PTR_ERR(cdev);
-		goto end;
+		goto err_end;
 	}
 
 	dev_dbg(&pdev->dev, "registered as cooling_device%d\n", cdev->id);
@@ -365,10 +428,21 @@ static int acpi_fan_probe(struct platform_device *pdev)
 	result = sysfs_create_link(&cdev->device.kobj,
 				   &pdev->dev.kobj,
 				   "device");
-	if (result)
+	if (result) {
 		dev_err(&pdev->dev, "Failed to create sysfs link 'device'\n");
+		goto err_end;
+	}
+
+	return 0;
+
+err_end:
+	if (fan->acpi4) {
+		int i;
+
+		for (i = 0; i < fan->fps_count; ++i)
+			sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
+	}
 
-end:
 	return result;
 }
 
@@ -376,6 +450,13 @@ static int acpi_fan_remove(struct platform_device *pdev)
 {
 	struct acpi_fan *fan = platform_get_drvdata(pdev);
 
+	if (fan->acpi4) {
+		struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
+		int i;
+
+		for (i = 0; i < fan->fps_count; ++i)
+			sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
+	}
 	sysfs_remove_link(&pdev->dev.kobj, "thermal_cooling");
 	sysfs_remove_link(&fan->cdev->device.kobj, "device");
 	thermal_cooling_device_unregister(fan->cdev);
diff --git a/drivers/acpi/hmat/Kconfig b/drivers/acpi/hmat/Kconfig
deleted file mode 100644
index 95a29964dbea..000000000000
--- a/drivers/acpi/hmat/Kconfig
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-config ACPI_HMAT
-	bool "ACPI Heterogeneous Memory Attribute Table Support"
-	depends on ACPI_NUMA
-	select HMEM_REPORTING
-	help
-	 If set, this option has the kernel parse and report the
-	 platform's ACPI HMAT (Heterogeneous Memory Attributes Table),
-	 register memory initiators with their targets, and export
-	 performance attributes through the node's sysfs device if
-	 provided.
diff --git a/drivers/acpi/hmat/Makefile b/drivers/acpi/hmat/Makefile
deleted file mode 100644
index 1c20ef36a385..000000000000
--- a/drivers/acpi/hmat/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_ACPI_HMAT) := hmat.o
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index afe6636f9ad3..3616daec650b 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -165,7 +165,8 @@ static inline void acpi_early_processor_osc(void) {}
    -------------------------------------------------------------------------- */
 struct acpi_ec {
 	acpi_handle handle;
-	u32 gpe;
+	int gpe;
+	int irq;
 	unsigned long command_addr;
 	unsigned long data_addr;
 	bool global_lock;
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 14e68f202f81..a3320f93616d 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1404,7 +1404,6 @@ static const struct attribute_group acpi_nfit_attribute_group = {
 };
 
 static const struct attribute_group *acpi_nfit_attribute_groups[] = {
-	&nvdimm_bus_attribute_group,
 	&acpi_nfit_attribute_group,
 	NULL,
 };
@@ -1698,8 +1697,6 @@ static const struct attribute_group acpi_nfit_dimm_attribute_group = {
 };
 
 static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
-	&nvdimm_attribute_group,
-	&nd_device_attribute_group,
 	&acpi_nfit_dimm_attribute_group,
 	NULL,
 };
@@ -2197,10 +2194,6 @@ static const struct attribute_group acpi_nfit_region_attribute_group = {
 };
 
 static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
-	&nd_region_attribute_group,
-	&nd_mapping_attribute_group,
-	&nd_device_attribute_group,
-	&nd_numa_attribute_group,
 	&acpi_nfit_region_attribute_group,
 	NULL,
 };
diff --git a/drivers/acpi/numa/Kconfig b/drivers/acpi/numa/Kconfig
new file mode 100644
index 000000000000..fcf2e556d69d
--- /dev/null
+++ b/drivers/acpi/numa/Kconfig
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+config ACPI_NUMA
+	bool "NUMA support"
+	depends on NUMA
+	depends on (X86 || IA64 || ARM64)
+	default y if IA64 || ARM64
+
+config ACPI_HMAT
+	bool "ACPI Heterogeneous Memory Attribute Table Support"
+	depends on ACPI_NUMA
+	select HMEM_REPORTING
+	select MEMREGION
+	help
+	 If set, this option has the kernel parse and report the
+	 platform's ACPI HMAT (Heterogeneous Memory Attributes Table),
+	 register memory initiators with their targets, and export
+	 performance attributes through the node's sysfs device if
+	 provided.
diff --git a/drivers/acpi/numa/Makefile b/drivers/acpi/numa/Makefile
new file mode 100644
index 000000000000..517a6c689a94
--- /dev/null
+++ b/drivers/acpi/numa/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_ACPI_NUMA) += srat.o
+obj-$(CONFIG_ACPI_HMAT) += hmat.o
diff --git a/drivers/acpi/hmat/hmat.c b/drivers/acpi/numa/hmat.c
index 8b0de8a3c647..2c32cfb72370 100644
--- a/drivers/acpi/hmat/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -8,12 +8,18 @@
  * the applicable attributes with the node's interfaces.
  */
 
+#define pr_fmt(fmt) "acpi/hmat: " fmt
+#define dev_fmt(fmt) "acpi/hmat: " fmt
+
 #include <linux/acpi.h>
 #include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/platform_device.h>
 #include <linux/list_sort.h>
+#include <linux/memregion.h>
 #include <linux/memory.h>
 #include <linux/mutex.h>
 #include <linux/node.h>
@@ -49,6 +55,7 @@ struct memory_target {
 	struct list_head node;
 	unsigned int memory_pxm;
 	unsigned int processor_pxm;
+	struct resource memregions;
 	struct node_hmem_attrs hmem_attrs;
 	struct list_head caches;
 	struct node_cache_attrs cache_attrs;
@@ -104,22 +111,36 @@ static __init void alloc_memory_initiator(unsigned int cpu_pxm)
 	list_add_tail(&initiator->node, &initiators);
 }
 
-static __init void alloc_memory_target(unsigned int mem_pxm)
+static __init void alloc_memory_target(unsigned int mem_pxm,
+		resource_size_t start, resource_size_t len)
 {
 	struct memory_target *target;
 
 	target = find_mem_target(mem_pxm);
-	if (target)
-		return;
-
-	target = kzalloc(sizeof(*target), GFP_KERNEL);
-	if (!target)
-		return;
+	if (!target) {
+		target = kzalloc(sizeof(*target), GFP_KERNEL);
+		if (!target)
+			return;
+		target->memory_pxm = mem_pxm;
+		target->processor_pxm = PXM_INVAL;
+		target->memregions = (struct resource) {
+			.name	= "ACPI mem",
+			.start	= 0,
+			.end	= -1,
+			.flags	= IORESOURCE_MEM,
+		};
+		list_add_tail(&target->node, &targets);
+		INIT_LIST_HEAD(&target->caches);
+	}
 
-	target->memory_pxm = mem_pxm;
-	target->processor_pxm = PXM_INVAL;
-	list_add_tail(&target->node, &targets);
-	INIT_LIST_HEAD(&target->caches);
+	/*
+	 * There are potentially multiple ranges per PXM, so record each
+	 * in the per-target memregions resource tree.
+	 */
+	if (!__request_region(&target->memregions, start, len, "memory target",
+				IORESOURCE_MEM))
+		pr_warn("failed to reserve %#llx - %#llx in pxm: %d\n",
+				start, start + len, mem_pxm);
 }
 
 static __init const char *hmat_data_type(u8 type)
@@ -272,7 +293,7 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
 	u8 type, mem_hier;
 
 	if (hmat_loc->header.length < sizeof(*hmat_loc)) {
-		pr_notice("HMAT: Unexpected locality header length: %d\n",
+		pr_notice("HMAT: Unexpected locality header length: %u\n",
 			 hmat_loc->header.length);
 		return -EINVAL;
 	}
@@ -284,12 +305,12 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
 	total_size = sizeof(*hmat_loc) + sizeof(*entries) * ipds * tpds +
 		     sizeof(*inits) * ipds + sizeof(*targs) * tpds;
 	if (hmat_loc->header.length < total_size) {
-		pr_notice("HMAT: Unexpected locality header length:%d, minimum required:%d\n",
+		pr_notice("HMAT: Unexpected locality header length:%u, minimum required:%u\n",
 			 hmat_loc->header.length, total_size);
 		return -EINVAL;
 	}
 
-	pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%d Target Domains:%d Base:%lld\n",
+	pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n",
 		hmat_loc->flags, hmat_data_type(type), ipds, tpds,
 		hmat_loc->entry_base_unit);
 
@@ -302,7 +323,7 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
 			value = hmat_normalize(entries[init * tpds + targ],
 					       hmat_loc->entry_base_unit,
 					       type);
-			pr_info("  Initiator-Target[%d-%d]:%d%s\n",
+			pr_info("  Initiator-Target[%u-%u]:%u%s\n",
 				inits[init], targs[targ], value,
 				hmat_data_type_suffix(type));
 
@@ -329,13 +350,13 @@ static __init int hmat_parse_cache(union acpi_subtable_headers *header,
 	u32 attrs;
 
 	if (cache->header.length < sizeof(*cache)) {
-		pr_notice("HMAT: Unexpected cache header length: %d\n",
+		pr_notice("HMAT: Unexpected cache header length: %u\n",
 			 cache->header.length);
 		return -EINVAL;
 	}
 
 	attrs = cache->cache_attributes;
-	pr_info("HMAT: Cache: Domain:%d Size:%llu Attrs:%08x SMBIOS Handles:%d\n",
+	pr_info("HMAT: Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n",
 		cache->memory_PD, cache->cache_size, attrs,
 		cache->number_of_SMBIOShandles);
 
@@ -390,17 +411,17 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade
 	struct memory_target *target = NULL;
 
 	if (p->header.length != sizeof(*p)) {
-		pr_notice("HMAT: Unexpected address range header length: %d\n",
+		pr_notice("HMAT: Unexpected address range header length: %u\n",
 			 p->header.length);
 		return -EINVAL;
 	}
 
 	if (hmat_revision == 1)
-		pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%d Memory Domain:%d\n",
+		pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n",
 			p->reserved3, p->reserved4, p->flags, p->processor_PD,
 			p->memory_PD);
 	else
-		pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n",
+		pr_info("HMAT: Memory Flags:%04x Processor Domain:%u Memory Domain:%u\n",
 			p->flags, p->processor_PD, p->memory_PD);
 
 	if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) {
@@ -417,7 +438,7 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade
 			pr_debug("HMAT: Invalid Processor Domain\n");
 			return -EINVAL;
 		}
-		target->processor_pxm = p_node;
+		target->processor_pxm = p->processor_PD;
 	}
 
 	return 0;
@@ -452,7 +473,7 @@ static __init int srat_parse_mem_affinity(union acpi_subtable_headers *header,
 		return -EINVAL;
 	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
 		return 0;
-	alloc_memory_target(ma->proximity_domain);
+	alloc_memory_target(ma->proximity_domain, ma->base_address, ma->length);
 	return 0;
 }
 
@@ -613,11 +634,92 @@ static void hmat_register_target_perf(struct memory_target *target)
 	node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0);
 }
 
+static void hmat_register_target_device(struct memory_target *target,
+		struct resource *r)
+{
+	/* define a clean / non-busy resource for the platform device */
+	struct resource res = {
+		.start = r->start,
+		.end = r->end,
+		.flags = IORESOURCE_MEM,
+	};
+	struct platform_device *pdev;
+	struct memregion_info info;
+	int rc, id;
+
+	rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM,
+			IORES_DESC_SOFT_RESERVED);
+	if (rc != REGION_INTERSECTS)
+		return;
+
+	id = memregion_alloc(GFP_KERNEL);
+	if (id < 0) {
+		pr_err("memregion allocation failure for %pr\n", &res);
+		return;
+	}
+
+	pdev = platform_device_alloc("hmem", id);
+	if (!pdev) {
+		pr_err("hmem device allocation failure for %pr\n", &res);
+		goto out_pdev;
+	}
+
+	pdev->dev.numa_node = acpi_map_pxm_to_online_node(target->memory_pxm);
+	info = (struct memregion_info) {
+		.target_node = acpi_map_pxm_to_node(target->memory_pxm),
+	};
+	rc = platform_device_add_data(pdev, &info, sizeof(info));
+	if (rc < 0) {
+		pr_err("hmem memregion_info allocation failure for %pr\n", &res);
+		goto out_pdev;
+	}
+
+	rc = platform_device_add_resources(pdev, &res, 1);
+	if (rc < 0) {
+		pr_err("hmem resource allocation failure for %pr\n", &res);
+		goto out_resource;
+	}
+
+	rc = platform_device_add(pdev);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "device add failed for %pr\n", &res);
+		goto out_resource;
+	}
+
+	return;
+
+out_resource:
+	put_device(&pdev->dev);
+out_pdev:
+	memregion_free(id);
+}
+
+static void hmat_register_target_devices(struct memory_target *target)
+{
+	struct resource *res;
+
+	/*
+	 * Do not bother creating devices if no driver is available to
+	 * consume them.
+	 */
+	if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
+		return;
+
+	for (res = target->memregions.child; res; res = res->sibling)
+		hmat_register_target_device(target, res);
+}
+
 static void hmat_register_target(struct memory_target *target)
 {
 	int nid = pxm_to_node(target->memory_pxm);
 
 	/*
+	 * Devices may belong to either an offline or online
+	 * node, so unconditionally add them.
+	 */
+	hmat_register_target_devices(target);
+
+	/*
 	 * Skip offline nodes. This can happen when memory
 	 * marked EFI_MEMORY_SP, "specific purpose", is applied
 	 * to all the memory in a promixity domain leading to
@@ -677,11 +779,21 @@ static __init void hmat_free_structures(void)
 	struct target_cache *tcache, *cnext;
 
 	list_for_each_entry_safe(target, tnext, &targets, node) {
+		struct resource *res, *res_next;
+
 		list_for_each_entry_safe(tcache, cnext, &target->caches, node) {
 			list_del(&tcache->node);
 			kfree(tcache);
 		}
+
 		list_del(&target->node);
+		res = target->memregions.child;
+		while (res) {
+			res_next = res->sibling;
+			__release_region(&target->memregions, res->start,
+					resource_size(res));
+			res = res_next;
+		}
 		kfree(target);
 	}
 
@@ -748,4 +860,4 @@ out_put:
 	acpi_put_table(tbl);
 	return 0;
 }
-subsys_initcall(hmat_init);
+device_initcall(hmat_init);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa/srat.c
index eadbf90e65d1..eadbf90e65d1 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa/srat.c
diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c
index bec0bebc7f52..9f6853809138 100644
--- a/drivers/acpi/osi.c
+++ b/drivers/acpi/osi.c
@@ -473,9 +473,9 @@ static const struct dmi_system_id acpi_osi_dmi_table[] __initconst = {
 	 */
 
 	/*
-	 * Without this this EEEpc exports a non working WMI interface, with
-	 * this it exports a working "good old" eeepc_laptop interface, fixing
-	 * both brightness control, and rfkill not working.
+	 * Without this EEEpc exports a non working WMI interface, with
+	 * this it exports a working "good old" eeepc_laptop interface,
+	 * fixing both brightness control, and rfkill not working.
 	 */
 	{
 	.callback = dmi_enable_osi_linux,
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index a2e844a8e9ed..41168c027a5a 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -374,19 +374,21 @@ void *__ref acpi_os_map_memory(acpi_physical_address phys, acpi_size size)
 }
 EXPORT_SYMBOL_GPL(acpi_os_map_memory);
 
-static void acpi_os_drop_map_ref(struct acpi_ioremap *map)
+/* Must be called with mutex_lock(&acpi_ioremap_lock) */
+static unsigned long acpi_os_drop_map_ref(struct acpi_ioremap *map)
 {
-	if (!--map->refcount)
+	unsigned long refcount = --map->refcount;
+
+	if (!refcount)
 		list_del_rcu(&map->list);
+	return refcount;
 }
 
 static void acpi_os_map_cleanup(struct acpi_ioremap *map)
 {
-	if (!map->refcount) {
-		synchronize_rcu_expedited();
-		acpi_unmap(map->phys, map->virt);
-		kfree(map);
-	}
+	synchronize_rcu_expedited();
+	acpi_unmap(map->phys, map->virt);
+	kfree(map);
 }
 
 /**
@@ -406,6 +408,7 @@ static void acpi_os_map_cleanup(struct acpi_ioremap *map)
 void __ref acpi_os_unmap_iomem(void __iomem *virt, acpi_size size)
 {
 	struct acpi_ioremap *map;
+	unsigned long refcount;
 
 	if (!acpi_permanent_mmap) {
 		__acpi_unmap_table(virt, size);
@@ -419,10 +422,11 @@ void __ref acpi_os_unmap_iomem(void __iomem *virt, acpi_size size)
 		WARN(true, PREFIX "%s: bad address %p\n", __func__, virt);
 		return;
 	}
-	acpi_os_drop_map_ref(map);
+	refcount = acpi_os_drop_map_ref(map);
 	mutex_unlock(&acpi_ioremap_lock);
 
-	acpi_os_map_cleanup(map);
+	if (!refcount)
+		acpi_os_map_cleanup(map);
 }
 EXPORT_SYMBOL_GPL(acpi_os_unmap_iomem);
 
@@ -457,6 +461,7 @@ void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
 {
 	u64 addr;
 	struct acpi_ioremap *map;
+	unsigned long refcount;
 
 	if (gas->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY)
 		return;
@@ -472,10 +477,11 @@ void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
 		mutex_unlock(&acpi_ioremap_lock);
 		return;
 	}
-	acpi_os_drop_map_ref(map);
+	refcount = acpi_os_drop_map_ref(map);
 	mutex_unlock(&acpi_ioremap_lock);
 
-	acpi_os_map_cleanup(map);
+	if (!refcount)
+		acpi_os_map_cleanup(map);
 }
 EXPORT_SYMBOL(acpi_os_unmap_generic_address);
 
diff --git a/drivers/acpi/pmic/intel_pmic.c b/drivers/acpi/pmic/intel_pmic.c
index 452041398b34..a371f273f99d 100644
--- a/drivers/acpi/pmic/intel_pmic.c
+++ b/drivers/acpi/pmic/intel_pmic.c
@@ -252,7 +252,7 @@ int intel_pmic_install_opregion_handler(struct device *dev, acpi_handle handle,
 					struct regmap *regmap,
 					struct intel_pmic_opregion_data *d)
 {
-	acpi_status status;
+	acpi_status status = AE_OK;
 	struct intel_pmic_opregion *opregion;
 	int ret;
 
@@ -270,7 +270,8 @@ int intel_pmic_install_opregion_handler(struct device *dev, acpi_handle handle,
 	opregion->regmap = regmap;
 	opregion->lpat_table = acpi_lpat_get_conversion_table(handle);
 
-	status = acpi_install_address_space_handler(handle,
+	if (d->power_table_count)
+		status = acpi_install_address_space_handler(handle,
 						    PMIC_POWER_OPREGION_ID,
 						    intel_pmic_power_handler,
 						    NULL, opregion);
@@ -279,7 +280,8 @@ int intel_pmic_install_opregion_handler(struct device *dev, acpi_handle handle,
 		goto out_error;
 	}
 
-	status = acpi_install_address_space_handler(handle,
+	if (d->thermal_table_count)
+		status = acpi_install_address_space_handler(handle,
 						    PMIC_THERMAL_OPREGION_ID,
 						    intel_pmic_thermal_handler,
 						    NULL, opregion);
@@ -301,12 +303,16 @@ int intel_pmic_install_opregion_handler(struct device *dev, acpi_handle handle,
 	return 0;
 
 out_remove_thermal_handler:
-	acpi_remove_address_space_handler(handle, PMIC_THERMAL_OPREGION_ID,
-					  intel_pmic_thermal_handler);
+	if (d->thermal_table_count)
+		acpi_remove_address_space_handler(handle,
+						  PMIC_THERMAL_OPREGION_ID,
+						  intel_pmic_thermal_handler);
 
 out_remove_power_handler:
-	acpi_remove_address_space_handler(handle, PMIC_POWER_OPREGION_ID,
-					  intel_pmic_power_handler);
+	if (d->power_table_count)
+		acpi_remove_address_space_handler(handle,
+						  PMIC_POWER_OPREGION_ID,
+						  intel_pmic_power_handler);
 
 out_error:
 	acpi_lpat_free_conversion_table(opregion->lpat_table);
diff --git a/drivers/acpi/pmic/intel_pmic_bytcrc.c b/drivers/acpi/pmic/intel_pmic_bytcrc.c
new file mode 100644
index 000000000000..2a692cc4b7ae
--- /dev/null
+++ b/drivers/acpi/pmic/intel_pmic_bytcrc.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Bay Trail Crystal Cove PMIC operation region driver
+ *
+ * Copyright (C) 2014 Intel Corporation. All rights reserved.
+ */
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/mfd/intel_soc_pmic.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include "intel_pmic.h"
+
+#define PWR_SOURCE_SELECT	BIT(1)
+
+#define PMIC_A0LOCK_REG		0xc5
+
+static struct pmic_table power_table[] = {
+/*	{
+		.address = 0x00,
+		.reg = ??,
+		.bit = ??,
+	}, ** VSYS */
+	{
+		.address = 0x04,
+		.reg = 0x63,
+		.bit = 0x00,
+	}, /* SYSX -> VSYS_SX */
+	{
+		.address = 0x08,
+		.reg = 0x62,
+		.bit = 0x00,
+	}, /* SYSU -> VSYS_U */
+	{
+		.address = 0x0c,
+		.reg = 0x64,
+		.bit = 0x00,
+	}, /* SYSS -> VSYS_S */
+	{
+		.address = 0x10,
+		.reg = 0x6a,
+		.bit = 0x00,
+	}, /* V50S -> V5P0S */
+	{
+		.address = 0x14,
+		.reg = 0x6b,
+		.bit = 0x00,
+	}, /* HOST -> VHOST, USB2/3 host */
+	{
+		.address = 0x18,
+		.reg = 0x6c,
+		.bit = 0x00,
+	}, /* VBUS -> VBUS, USB2/3 OTG */
+	{
+		.address = 0x1c,
+		.reg = 0x6d,
+		.bit = 0x00,
+	}, /* HDMI -> VHDMI */
+/*	{
+		.address = 0x20,
+		.reg = ??,
+		.bit = ??,
+	}, ** S285 */
+	{
+		.address = 0x24,
+		.reg = 0x66,
+		.bit = 0x00,
+	}, /* X285 -> V2P85SX, camera */
+/*	{
+		.address = 0x28,
+		.reg = ??,
+		.bit = ??,
+	}, ** V33A */
+	{
+		.address = 0x2c,
+		.reg = 0x69,
+		.bit = 0x00,
+	}, /* V33S -> V3P3S, display/ssd/audio */
+	{
+		.address = 0x30,
+		.reg = 0x68,
+		.bit = 0x00,
+	}, /* V33U -> V3P3U, SDIO wifi&bt */
+/*	{
+		.address = 0x34 .. 0x40,
+		.reg = ??,
+		.bit = ??,
+	}, ** V33I, V18A, REFQ, V12A */
+	{
+		.address = 0x44,
+		.reg = 0x5c,
+		.bit = 0x00,
+	}, /* V18S -> V1P8S, SOC/USB PHY/SIM */
+	{
+		.address = 0x48,
+		.reg = 0x5d,
+		.bit = 0x00,
+	}, /* V18X -> V1P8SX, eMMC/camara/audio */
+	{
+		.address = 0x4c,
+		.reg = 0x5b,
+		.bit = 0x00,
+	}, /* V18U -> V1P8U, LPDDR */
+	{
+		.address = 0x50,
+		.reg = 0x61,
+		.bit = 0x00,
+	}, /* V12X -> V1P2SX, SOC SFR */
+	{
+		.address = 0x54,
+		.reg = 0x60,
+		.bit = 0x00,
+	}, /* V12S -> V1P2S, MIPI */
+/*	{
+		.address = 0x58,
+		.reg = ??,
+		.bit = ??,
+	}, ** V10A */
+	{
+		.address = 0x5c,
+		.reg = 0x56,
+		.bit = 0x00,
+	}, /* V10S -> V1P0S, SOC GFX */
+	{
+		.address = 0x60,
+		.reg = 0x57,
+		.bit = 0x00,
+	}, /* V10X -> V1P0SX, SOC display/DDR IO/PCIe */
+	{
+		.address = 0x64,
+		.reg = 0x59,
+		.bit = 0x00,
+	}, /* V105 -> V1P05S, L2 SRAM */
+};
+
+static struct pmic_table thermal_table[] = {
+	{
+		.address = 0x00,
+		.reg = 0x75
+	},
+	{
+		.address = 0x04,
+		.reg = 0x95
+	},
+	{
+		.address = 0x08,
+		.reg = 0x97
+	},
+	{
+		.address = 0x0c,
+		.reg = 0x77
+	},
+	{
+		.address = 0x10,
+		.reg = 0x9a
+	},
+	{
+		.address = 0x14,
+		.reg = 0x9c
+	},
+	{
+		.address = 0x18,
+		.reg = 0x79
+	},
+	{
+		.address = 0x1c,
+		.reg = 0x9f
+	},
+	{
+		.address = 0x20,
+		.reg = 0xa1
+	},
+	{
+		.address = 0x48,
+		.reg = 0x94
+	},
+	{
+		.address = 0x4c,
+		.reg = 0x99
+	},
+	{
+		.address = 0x50,
+		.reg = 0x9e
+	},
+};
+
+static int intel_crc_pmic_get_power(struct regmap *regmap, int reg,
+				    int bit, u64 *value)
+{
+	int data;
+
+	if (regmap_read(regmap, reg, &data))
+		return -EIO;
+
+	*value = (data & PWR_SOURCE_SELECT) && (data & BIT(bit)) ? 1 : 0;
+	return 0;
+}
+
+static int intel_crc_pmic_update_power(struct regmap *regmap, int reg,
+				       int bit, bool on)
+{
+	int data;
+
+	if (regmap_read(regmap, reg, &data))
+		return -EIO;
+
+	if (on) {
+		data |= PWR_SOURCE_SELECT | BIT(bit);
+	} else {
+		data &= ~BIT(bit);
+		data |= PWR_SOURCE_SELECT;
+	}
+
+	if (regmap_write(regmap, reg, data))
+		return -EIO;
+	return 0;
+}
+
+static int intel_crc_pmic_get_raw_temp(struct regmap *regmap, int reg)
+{
+	int temp_l, temp_h;
+
+	/*
+	 * Raw temperature value is 10bits: 8bits in reg
+	 * and 2bits in reg-1: bit0,1
+	 */
+	if (regmap_read(regmap, reg, &temp_l) ||
+	    regmap_read(regmap, reg - 1, &temp_h))
+		return -EIO;
+
+	return temp_l | (temp_h & 0x3) << 8;
+}
+
+static int intel_crc_pmic_update_aux(struct regmap *regmap, int reg, int raw)
+{
+	return regmap_write(regmap, reg, raw) ||
+		regmap_update_bits(regmap, reg - 1, 0x3, raw >> 8) ? -EIO : 0;
+}
+
+static int intel_crc_pmic_get_policy(struct regmap *regmap,
+					int reg, int bit, u64 *value)
+{
+	int pen;
+
+	if (regmap_read(regmap, reg, &pen))
+		return -EIO;
+	*value = pen >> 7;
+	return 0;
+}
+
+static int intel_crc_pmic_update_policy(struct regmap *regmap,
+					int reg, int bit, int enable)
+{
+	int alert0;
+
+	/* Update to policy enable bit requires unlocking a0lock */
+	if (regmap_read(regmap, PMIC_A0LOCK_REG, &alert0))
+		return -EIO;
+
+	if (regmap_update_bits(regmap, PMIC_A0LOCK_REG, 0x01, 0))
+		return -EIO;
+
+	if (regmap_update_bits(regmap, reg, 0x80, enable << 7))
+		return -EIO;
+
+	/* restore alert0 */
+	if (regmap_write(regmap, PMIC_A0LOCK_REG, alert0))
+		return -EIO;
+
+	return 0;
+}
+
+static struct intel_pmic_opregion_data intel_crc_pmic_opregion_data = {
+	.get_power	= intel_crc_pmic_get_power,
+	.update_power	= intel_crc_pmic_update_power,
+	.get_raw_temp	= intel_crc_pmic_get_raw_temp,
+	.update_aux	= intel_crc_pmic_update_aux,
+	.get_policy	= intel_crc_pmic_get_policy,
+	.update_policy	= intel_crc_pmic_update_policy,
+	.power_table	= power_table,
+	.power_table_count= ARRAY_SIZE(power_table),
+	.thermal_table	= thermal_table,
+	.thermal_table_count = ARRAY_SIZE(thermal_table),
+};
+
+static int intel_crc_pmic_opregion_probe(struct platform_device *pdev)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(pdev->dev.parent);
+	return intel_pmic_install_opregion_handler(&pdev->dev,
+			ACPI_HANDLE(pdev->dev.parent), pmic->regmap,
+			&intel_crc_pmic_opregion_data);
+}
+
+static struct platform_driver intel_crc_pmic_opregion_driver = {
+	.probe = intel_crc_pmic_opregion_probe,
+	.driver = {
+		.name = "byt_crystal_cove_pmic",
+	},
+};
+builtin_platform_driver(intel_crc_pmic_opregion_driver);
diff --git a/drivers/acpi/pmic/intel_pmic_chtcrc.c b/drivers/acpi/pmic/intel_pmic_chtcrc.c
new file mode 100644
index 000000000000..ebf8d3187df1
--- /dev/null
+++ b/drivers/acpi/pmic/intel_pmic_chtcrc.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Cherry Trail Crystal Cove PMIC operation region driver
+ *
+ * Copyright (C) 2019 Hans de Goede <hdegoede@redhat.com>
+ */
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/mfd/intel_soc_pmic.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include "intel_pmic.h"
+
+/*
+ * We have no docs for the CHT Crystal Cove PMIC. The Asus Zenfone-2 kernel
+ * code has 2 Crystal Cove regulator drivers, one calls the PMIC a "Crystal
+ * Cove Plus" PMIC and talks about Cherry Trail, so presuambly that one
+ * could be used to get register info for the regulators if we need to
+ * implement regulator support in the future.
+ *
+ * For now the sole purpose of this driver is to make
+ * intel_soc_pmic_exec_mipi_pmic_seq_element work on devices with a
+ * CHT Crystal Cove PMIC.
+ */
+static struct intel_pmic_opregion_data intel_chtcrc_pmic_opregion_data = {
+	.pmic_i2c_address = 0x6e,
+};
+
+static int intel_chtcrc_pmic_opregion_probe(struct platform_device *pdev)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(pdev->dev.parent);
+	return intel_pmic_install_opregion_handler(&pdev->dev,
+			ACPI_HANDLE(pdev->dev.parent), pmic->regmap,
+			&intel_chtcrc_pmic_opregion_data);
+}
+
+static struct platform_driver intel_chtcrc_pmic_opregion_driver = {
+	.probe = intel_chtcrc_pmic_opregion_probe,
+	.driver = {
+		.name = "cht_crystal_cove_pmic",
+	},
+};
+builtin_platform_driver(intel_chtcrc_pmic_opregion_driver);
diff --git a/drivers/acpi/pmic/intel_pmic_crc.c b/drivers/acpi/pmic/intel_pmic_crc.c
deleted file mode 100644
index a0f411a6e5ac..000000000000
--- a/drivers/acpi/pmic/intel_pmic_crc.c
+++ /dev/null
@@ -1,301 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Intel CrystalCove PMIC operation region driver
- *
- * Copyright (C) 2014 Intel Corporation. All rights reserved.
- */
-
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/mfd/intel_soc_pmic.h>
-#include <linux/platform_device.h>
-#include <linux/regmap.h>
-#include "intel_pmic.h"
-
-#define PWR_SOURCE_SELECT	BIT(1)
-
-#define PMIC_A0LOCK_REG		0xc5
-
-static struct pmic_table power_table[] = {
-/*	{
-		.address = 0x00,
-		.reg = ??,
-		.bit = ??,
-	}, ** VSYS */
-	{
-		.address = 0x04,
-		.reg = 0x63,
-		.bit = 0x00,
-	}, /* SYSX -> VSYS_SX */
-	{
-		.address = 0x08,
-		.reg = 0x62,
-		.bit = 0x00,
-	}, /* SYSU -> VSYS_U */
-	{
-		.address = 0x0c,
-		.reg = 0x64,
-		.bit = 0x00,
-	}, /* SYSS -> VSYS_S */
-	{
-		.address = 0x10,
-		.reg = 0x6a,
-		.bit = 0x00,
-	}, /* V50S -> V5P0S */
-	{
-		.address = 0x14,
-		.reg = 0x6b,
-		.bit = 0x00,
-	}, /* HOST -> VHOST, USB2/3 host */
-	{
-		.address = 0x18,
-		.reg = 0x6c,
-		.bit = 0x00,
-	}, /* VBUS -> VBUS, USB2/3 OTG */
-	{
-		.address = 0x1c,
-		.reg = 0x6d,
-		.bit = 0x00,
-	}, /* HDMI -> VHDMI */
-/*	{
-		.address = 0x20,
-		.reg = ??,
-		.bit = ??,
-	}, ** S285 */
-	{
-		.address = 0x24,
-		.reg = 0x66,
-		.bit = 0x00,
-	}, /* X285 -> V2P85SX, camera */
-/*	{
-		.address = 0x28,
-		.reg = ??,
-		.bit = ??,
-	}, ** V33A */
-	{
-		.address = 0x2c,
-		.reg = 0x69,
-		.bit = 0x00,
-	}, /* V33S -> V3P3S, display/ssd/audio */
-	{
-		.address = 0x30,
-		.reg = 0x68,
-		.bit = 0x00,
-	}, /* V33U -> V3P3U, SDIO wifi&bt */
-/*	{
-		.address = 0x34 .. 0x40,
-		.reg = ??,
-		.bit = ??,
-	}, ** V33I, V18A, REFQ, V12A */
-	{
-		.address = 0x44,
-		.reg = 0x5c,
-		.bit = 0x00,
-	}, /* V18S -> V1P8S, SOC/USB PHY/SIM */
-	{
-		.address = 0x48,
-		.reg = 0x5d,
-		.bit = 0x00,
-	}, /* V18X -> V1P8SX, eMMC/camara/audio */
-	{
-		.address = 0x4c,
-		.reg = 0x5b,
-		.bit = 0x00,
-	}, /* V18U -> V1P8U, LPDDR */
-	{
-		.address = 0x50,
-		.reg = 0x61,
-		.bit = 0x00,
-	}, /* V12X -> V1P2SX, SOC SFR */
-	{
-		.address = 0x54,
-		.reg = 0x60,
-		.bit = 0x00,
-	}, /* V12S -> V1P2S, MIPI */
-/*	{
-		.address = 0x58,
-		.reg = ??,
-		.bit = ??,
-	}, ** V10A */
-	{
-		.address = 0x5c,
-		.reg = 0x56,
-		.bit = 0x00,
-	}, /* V10S -> V1P0S, SOC GFX */
-	{
-		.address = 0x60,
-		.reg = 0x57,
-		.bit = 0x00,
-	}, /* V10X -> V1P0SX, SOC display/DDR IO/PCIe */
-	{
-		.address = 0x64,
-		.reg = 0x59,
-		.bit = 0x00,
-	}, /* V105 -> V1P05S, L2 SRAM */
-};
-
-static struct pmic_table thermal_table[] = {
-	{
-		.address = 0x00,
-		.reg = 0x75
-	},
-	{
-		.address = 0x04,
-		.reg = 0x95
-	},
-	{
-		.address = 0x08,
-		.reg = 0x97
-	},
-	{
-		.address = 0x0c,
-		.reg = 0x77
-	},
-	{
-		.address = 0x10,
-		.reg = 0x9a
-	},
-	{
-		.address = 0x14,
-		.reg = 0x9c
-	},
-	{
-		.address = 0x18,
-		.reg = 0x79
-	},
-	{
-		.address = 0x1c,
-		.reg = 0x9f
-	},
-	{
-		.address = 0x20,
-		.reg = 0xa1
-	},
-	{
-		.address = 0x48,
-		.reg = 0x94
-	},
-	{
-		.address = 0x4c,
-		.reg = 0x99
-	},
-	{
-		.address = 0x50,
-		.reg = 0x9e
-	},
-};
-
-static int intel_crc_pmic_get_power(struct regmap *regmap, int reg,
-				    int bit, u64 *value)
-{
-	int data;
-
-	if (regmap_read(regmap, reg, &data))
-		return -EIO;
-
-	*value = (data & PWR_SOURCE_SELECT) && (data & BIT(bit)) ? 1 : 0;
-	return 0;
-}
-
-static int intel_crc_pmic_update_power(struct regmap *regmap, int reg,
-				       int bit, bool on)
-{
-	int data;
-
-	if (regmap_read(regmap, reg, &data))
-		return -EIO;
-
-	if (on) {
-		data |= PWR_SOURCE_SELECT | BIT(bit);
-	} else {
-		data &= ~BIT(bit);
-		data |= PWR_SOURCE_SELECT;
-	}
-
-	if (regmap_write(regmap, reg, data))
-		return -EIO;
-	return 0;
-}
-
-static int intel_crc_pmic_get_raw_temp(struct regmap *regmap, int reg)
-{
-	int temp_l, temp_h;
-
-	/*
-	 * Raw temperature value is 10bits: 8bits in reg
-	 * and 2bits in reg-1: bit0,1
-	 */
-	if (regmap_read(regmap, reg, &temp_l) ||
-	    regmap_read(regmap, reg - 1, &temp_h))
-		return -EIO;
-
-	return temp_l | (temp_h & 0x3) << 8;
-}
-
-static int intel_crc_pmic_update_aux(struct regmap *regmap, int reg, int raw)
-{
-	return regmap_write(regmap, reg, raw) ||
-		regmap_update_bits(regmap, reg - 1, 0x3, raw >> 8) ? -EIO : 0;
-}
-
-static int intel_crc_pmic_get_policy(struct regmap *regmap,
-					int reg, int bit, u64 *value)
-{
-	int pen;
-
-	if (regmap_read(regmap, reg, &pen))
-		return -EIO;
-	*value = pen >> 7;
-	return 0;
-}
-
-static int intel_crc_pmic_update_policy(struct regmap *regmap,
-					int reg, int bit, int enable)
-{
-	int alert0;
-
-	/* Update to policy enable bit requires unlocking a0lock */
-	if (regmap_read(regmap, PMIC_A0LOCK_REG, &alert0))
-		return -EIO;
-
-	if (regmap_update_bits(regmap, PMIC_A0LOCK_REG, 0x01, 0))
-		return -EIO;
-
-	if (regmap_update_bits(regmap, reg, 0x80, enable << 7))
-		return -EIO;
-
-	/* restore alert0 */
-	if (regmap_write(regmap, PMIC_A0LOCK_REG, alert0))
-		return -EIO;
-
-	return 0;
-}
-
-static struct intel_pmic_opregion_data intel_crc_pmic_opregion_data = {
-	.get_power	= intel_crc_pmic_get_power,
-	.update_power	= intel_crc_pmic_update_power,
-	.get_raw_temp	= intel_crc_pmic_get_raw_temp,
-	.update_aux	= intel_crc_pmic_update_aux,
-	.get_policy	= intel_crc_pmic_get_policy,
-	.update_policy	= intel_crc_pmic_update_policy,
-	.power_table	= power_table,
-	.power_table_count= ARRAY_SIZE(power_table),
-	.thermal_table	= thermal_table,
-	.thermal_table_count = ARRAY_SIZE(thermal_table),
-};
-
-static int intel_crc_pmic_opregion_probe(struct platform_device *pdev)
-{
-	struct intel_soc_pmic *pmic = dev_get_drvdata(pdev->dev.parent);
-	return intel_pmic_install_opregion_handler(&pdev->dev,
-			ACPI_HANDLE(pdev->dev.parent), pmic->regmap,
-			&intel_crc_pmic_opregion_data);
-}
-
-static struct platform_driver intel_crc_pmic_opregion_driver = {
-	.probe = intel_crc_pmic_opregion_probe,
-	.driver = {
-		.name = "crystal_cove_pmic",
-	},
-};
-builtin_platform_driver(intel_crc_pmic_opregion_driver);
diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index f31544d3656e..4ae93350b70d 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -98,11 +98,11 @@ static inline bool acpi_pptt_match_type(int table_type, int type)
  *
  * Return: The cache structure and the level we terminated with.
  */
-static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
-				int local_level,
-				struct acpi_subtable_header *res,
-				struct acpi_pptt_cache **found,
-				int level, int type)
+static unsigned int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
+					 unsigned int local_level,
+					 struct acpi_subtable_header *res,
+					 struct acpi_pptt_cache **found,
+					 unsigned int level, int type)
 {
 	struct acpi_pptt_cache *cache;
 
@@ -119,7 +119,7 @@ static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
 			if (*found != NULL && cache != *found)
 				pr_warn("Found duplicate cache level/type unable to determine uniqueness\n");
 
-			pr_debug("Found cache @ level %d\n", level);
+			pr_debug("Found cache @ level %u\n", level);
 			*found = cache;
 			/*
 			 * continue looking at this node's resource list
@@ -132,16 +132,17 @@ static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
 	return local_level;
 }
 
-static struct acpi_pptt_cache *acpi_find_cache_level(struct acpi_table_header *table_hdr,
-						     struct acpi_pptt_processor *cpu_node,
-						     int *starting_level, int level,
-						     int type)
+static struct acpi_pptt_cache *
+acpi_find_cache_level(struct acpi_table_header *table_hdr,
+		      struct acpi_pptt_processor *cpu_node,
+		      unsigned int *starting_level, unsigned int level,
+		      int type)
 {
 	struct acpi_subtable_header *res;
-	int number_of_levels = *starting_level;
+	unsigned int number_of_levels = *starting_level;
 	int resource = 0;
 	struct acpi_pptt_cache *ret = NULL;
-	int local_level;
+	unsigned int local_level;
 
 	/* walk down from processor node */
 	while ((res = acpi_get_pptt_resource(table_hdr, cpu_node, resource))) {
@@ -321,12 +322,12 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta
 						    unsigned int level,
 						    struct acpi_pptt_processor **node)
 {
-	int total_levels = 0;
+	unsigned int total_levels = 0;
 	struct acpi_pptt_cache *found = NULL;
 	struct acpi_pptt_processor *cpu_node;
 	u8 acpi_type = acpi_cache_type(type);
 
-	pr_debug("Looking for CPU %d's level %d cache type %d\n",
+	pr_debug("Looking for CPU %d's level %u cache type %d\n",
 		 acpi_cpu_id, level, acpi_type);
 
 	cpu_node = acpi_find_processor_node(table_hdr, acpi_cpu_id);
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index ed56c6d20b08..dcc289e30166 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -299,164 +299,24 @@ static int acpi_processor_get_power_info_default(struct acpi_processor *pr)
 
 static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
 {
-	acpi_status status;
-	u64 count;
-	int current_count;
-	int i, ret = 0;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *cst;
+	int ret;
 
 	if (nocst)
 		return -ENODEV;
 
-	current_count = 0;
-
-	status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
-	if (ACPI_FAILURE(status)) {
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n"));
-		return -ENODEV;
-	}
-
-	cst = buffer.pointer;
-
-	/* There must be at least 2 elements */
-	if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) {
-		pr_err("not enough elements in _CST\n");
-		ret = -EFAULT;
-		goto end;
-	}
-
-	count = cst->package.elements[0].integer.value;
+	ret = acpi_processor_evaluate_cst(pr->handle, pr->id, &pr->power);
+	if (ret)
+		return ret;
 
-	/* Validate number of power states. */
-	if (count < 1 || count != cst->package.count - 1) {
-		pr_err("count given by _CST is not valid\n");
-		ret = -EFAULT;
-		goto end;
-	}
+	/*
+	 * It is expected that there will be at least 2 states, C1 and
+	 * something else (C2 or C3), so fail if that is not the case.
+	 */
+	if (pr->power.count < 2)
+		return -EFAULT;
 
-	/* Tell driver that at least _CST is supported. */
 	pr->flags.has_cst = 1;
-
-	for (i = 1; i <= count; i++) {
-		union acpi_object *element;
-		union acpi_object *obj;
-		struct acpi_power_register *reg;
-		struct acpi_processor_cx cx;
-
-		memset(&cx, 0, sizeof(cx));
-
-		element = &(cst->package.elements[i]);
-		if (element->type != ACPI_TYPE_PACKAGE)
-			continue;
-
-		if (element->package.count != 4)
-			continue;
-
-		obj = &(element->package.elements[0]);
-
-		if (obj->type != ACPI_TYPE_BUFFER)
-			continue;
-
-		reg = (struct acpi_power_register *)obj->buffer.pointer;
-
-		if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO &&
-		    (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
-			continue;
-
-		/* There should be an easy way to extract an integer... */
-		obj = &(element->package.elements[1]);
-		if (obj->type != ACPI_TYPE_INTEGER)
-			continue;
-
-		cx.type = obj->integer.value;
-		/*
-		 * Some buggy BIOSes won't list C1 in _CST -
-		 * Let acpi_processor_get_power_info_default() handle them later
-		 */
-		if (i == 1 && cx.type != ACPI_STATE_C1)
-			current_count++;
-
-		cx.address = reg->address;
-		cx.index = current_count + 1;
-
-		cx.entry_method = ACPI_CSTATE_SYSTEMIO;
-		if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
-			if (acpi_processor_ffh_cstate_probe
-					(pr->id, &cx, reg) == 0) {
-				cx.entry_method = ACPI_CSTATE_FFH;
-			} else if (cx.type == ACPI_STATE_C1) {
-				/*
-				 * C1 is a special case where FIXED_HARDWARE
-				 * can be handled in non-MWAIT way as well.
-				 * In that case, save this _CST entry info.
-				 * Otherwise, ignore this info and continue.
-				 */
-				cx.entry_method = ACPI_CSTATE_HALT;
-				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
-			} else {
-				continue;
-			}
-			if (cx.type == ACPI_STATE_C1 &&
-			    (boot_option_idle_override == IDLE_NOMWAIT)) {
-				/*
-				 * In most cases the C1 space_id obtained from
-				 * _CST object is FIXED_HARDWARE access mode.
-				 * But when the option of idle=halt is added,
-				 * the entry_method type should be changed from
-				 * CSTATE_FFH to CSTATE_HALT.
-				 * When the option of idle=nomwait is added,
-				 * the C1 entry_method type should be
-				 * CSTATE_HALT.
-				 */
-				cx.entry_method = ACPI_CSTATE_HALT;
-				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
-			}
-		} else {
-			snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x",
-				 cx.address);
-		}
-
-		if (cx.type == ACPI_STATE_C1) {
-			cx.valid = 1;
-		}
-
-		obj = &(element->package.elements[2]);
-		if (obj->type != ACPI_TYPE_INTEGER)
-			continue;
-
-		cx.latency = obj->integer.value;
-
-		obj = &(element->package.elements[3]);
-		if (obj->type != ACPI_TYPE_INTEGER)
-			continue;
-
-		current_count++;
-		memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
-
-		/*
-		 * We support total ACPI_PROCESSOR_MAX_POWER - 1
-		 * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1)
-		 */
-		if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) {
-			pr_warn("Limiting number of power states to max (%d)\n",
-				ACPI_PROCESSOR_MAX_POWER);
-			pr_warn("Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
-			break;
-		}
-	}
-
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n",
-			  current_count));
-
-	/* Validate number of power states discovered */
-	if (current_count < 2)
-		ret = -EFAULT;
-
-      end:
-	kfree(buffer.pointer);
-
-	return ret;
+	return 0;
 }
 
 static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
@@ -642,6 +502,19 @@ static int acpi_idle_bm_check(void)
 	return bm_status;
 }
 
+static void wait_for_freeze(void)
+{
+#ifdef	CONFIG_X86
+	/* No delay is needed if we are in guest */
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+		return;
+#endif
+	/* Dummy wait op - must do something useless after P_LVL2 read
+	   because chipsets cannot guarantee that STPCLK# signal
+	   gets asserted in time to freeze execution properly. */
+	inl(acpi_gbl_FADT.xpm_timer_block.address);
+}
+
 /**
  * acpi_idle_do_entry - enter idle state using the appropriate method
  * @cx: cstate data
@@ -658,10 +531,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
 	} else {
 		/* IO port based C-state */
 		inb(cx->address);
-		/* Dummy wait op - must do something useless after P_LVL2 read
-		   because chipsets cannot guarantee that STPCLK# signal
-		   gets asserted in time to freeze execution properly. */
-		inl(acpi_gbl_FADT.xpm_timer_block.address);
+		wait_for_freeze();
 	}
 }
 
@@ -682,8 +552,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
 			safe_halt();
 		else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
 			inb(cx->address);
-			/* See comment in acpi_idle_do_entry() */
-			inl(acpi_gbl_FADT.xpm_timer_block.address);
+			wait_for_freeze();
 		} else
 			return -ENODEV;
 	}
@@ -900,7 +769,6 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr)
 
 static inline void acpi_processor_cstate_first_run_checks(void)
 {
-	acpi_status status;
 	static int first_run;
 
 	if (first_run)
@@ -912,13 +780,10 @@ static inline void acpi_processor_cstate_first_run_checks(void)
 			  max_cstate);
 	first_run++;
 
-	if (acpi_gbl_FADT.cst_control && !nocst) {
-		status = acpi_os_write_port(acpi_gbl_FADT.smi_command,
-					    acpi_gbl_FADT.cst_control, 8);
-		if (ACPI_FAILURE(status))
-			ACPI_EXCEPTION((AE_INFO, status,
-					"Notifying BIOS of _CST ability failed"));
-	}
+	if (nocst)
+		return;
+
+	acpi_processor_claim_cst_control();
 }
 #else
 
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 3eacf474e1e3..e601c4511a8b 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1317,6 +1317,52 @@ acpi_fwnode_get_reference_args(const struct fwnode_handle *fwnode,
 						  args_count, args);
 }
 
+static const char *acpi_fwnode_get_name(const struct fwnode_handle *fwnode)
+{
+	const struct acpi_device *adev;
+	struct fwnode_handle *parent;
+
+	/* Is this the root node? */
+	parent = fwnode_get_parent(fwnode);
+	if (!parent)
+		return "\\";
+
+	fwnode_handle_put(parent);
+
+	if (is_acpi_data_node(fwnode)) {
+		const struct acpi_data_node *dn = to_acpi_data_node(fwnode);
+
+		return dn->name;
+	}
+
+	adev = to_acpi_device_node(fwnode);
+	if (WARN_ON(!adev))
+		return NULL;
+
+	return acpi_device_bid(adev);
+}
+
+static const char *
+acpi_fwnode_get_name_prefix(const struct fwnode_handle *fwnode)
+{
+	struct fwnode_handle *parent;
+
+	/* Is this the root node? */
+	parent = fwnode_get_parent(fwnode);
+	if (!parent)
+		return "";
+
+	/* Is this 2nd node from the root? */
+	parent = fwnode_get_next_parent(parent);
+	if (!parent)
+		return "";
+
+	fwnode_handle_put(parent);
+
+	/* ACPI device or data node. */
+	return ".";
+}
+
 static struct fwnode_handle *
 acpi_fwnode_get_parent(struct fwnode_handle *fwnode)
 {
@@ -1357,6 +1403,8 @@ acpi_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
 		.get_parent = acpi_node_get_parent,			\
 		.get_next_child_node = acpi_get_next_subnode,		\
 		.get_named_child_node = acpi_fwnode_get_named_child_node, \
+		.get_name = acpi_fwnode_get_name,			\
+		.get_name_prefix = acpi_fwnode_get_name_prefix,		\
 		.get_reference_args = acpi_fwnode_get_reference_args,	\
 		.graph_get_next_endpoint =				\
 			acpi_graph_get_next_endpoint,			\
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 2a3e392751e0..3b4448972374 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -413,8 +413,8 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi,
 		u8 pol = p ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
 
 		if (triggering != trig || polarity != pol) {
-			pr_warning("ACPI: IRQ %d override to %s, %s\n", gsi,
-				   t ? "level" : "edge", p ? "low" : "high");
+			pr_warn("ACPI: IRQ %d override to %s, %s\n", gsi,
+				t ? "level" : "edge", p ? "low" : "high");
 			triggering = trig;
 			polarity = pol;
 		}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index aad6be5c0af0..915650bf519f 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2174,6 +2174,7 @@ int __init acpi_scan_init(void)
 	acpi_pci_root_init();
 	acpi_pci_link_init();
 	acpi_processor_init();
+	acpi_platform_init();
 	acpi_lpss_init();
 	acpi_apd_init();
 	acpi_cmos_rtc_init();
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 2af937a8b1c5..439880629839 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -61,8 +61,11 @@ static struct notifier_block tts_notifier = {
 static int acpi_sleep_prepare(u32 acpi_state)
 {
 #ifdef CONFIG_ACPI_SLEEP
+	unsigned long acpi_wakeup_address;
+
 	/* do we have a wakeup address for S2 and S3? */
 	if (acpi_state == ACPI_STATE_S3) {
+		acpi_wakeup_address = acpi_get_wakeup_address();
 		if (!acpi_wakeup_address)
 			return -EFAULT;
 		acpi_set_waking_vector(acpi_wakeup_address);
@@ -977,6 +980,16 @@ static int acpi_s2idle_prepare_late(void)
 	return 0;
 }
 
+static void acpi_s2idle_sync(void)
+{
+	/*
+	 * The EC driver uses the system workqueue and an additional special
+	 * one, so those need to be flushed too.
+	 */
+	acpi_ec_flush_work();
+	acpi_os_wait_events_complete(); /* synchronize Notify handling */
+}
+
 static void acpi_s2idle_wake(void)
 {
 	/*
@@ -1001,13 +1014,8 @@ static void acpi_s2idle_wake(void)
 		 * should be missed by canceling the wakeup here.
 		 */
 		pm_system_cancel_wakeup();
-		/*
-		 * The EC driver uses the system workqueue and an additional
-		 * special one, so those need to be flushed too.
-		 */
-		acpi_os_wait_events_complete(); /* synchronize EC GPE processing */
-		acpi_ec_flush_work();
-		acpi_os_wait_events_complete(); /* synchronize Notify handling */
+
+		acpi_s2idle_sync();
 
 		rearm_wake_irq(acpi_sci_irq);
 	}
@@ -1024,6 +1032,13 @@ static void acpi_s2idle_restore_early(void)
 
 static void acpi_s2idle_restore(void)
 {
+	/*
+	 * Drain pending events before restoring the working-state configuration
+	 * of GPEs.
+	 */
+	acpi_os_wait_events_complete(); /* synchronize GPE processing */
+	acpi_s2idle_sync();
+
 	s2idle_wakeup = false;
 
 	acpi_enable_all_runtime_gpes();
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 75948a3f1a20..c60d2c6d31d6 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -819,14 +819,14 @@ end:
  * interface:
  *   echo unmask > /sys/firmware/acpi/interrupts/gpe00
  */
-#define ACPI_MASKABLE_GPE_MAX	0xFF
+#define ACPI_MASKABLE_GPE_MAX	0x100
 static DECLARE_BITMAP(acpi_masked_gpes_map, ACPI_MASKABLE_GPE_MAX) __initdata;
 
 static int __init acpi_gpe_set_masked_gpes(char *val)
 {
 	u8 gpe;
 
-	if (kstrtou8(val, 0, &gpe) || gpe > ACPI_MASKABLE_GPE_MAX)
+	if (kstrtou8(val, 0, &gpe))
 		return -EINVAL;
 	set_bit(gpe, acpi_masked_gpes_map);
 
@@ -838,7 +838,7 @@ void __init acpi_gpe_apply_masked_gpes(void)
 {
 	acpi_handle handle;
 	acpi_status status;
-	u8 gpe;
+	u16 gpe;
 
 	for_each_set_bit(gpe, acpi_masked_gpes_map, ACPI_MASKABLE_GPE_MAX) {
 		status = acpi_get_gpe_device(gpe, &handle);
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index e3974a8f8fd4..804ac0df58ec 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -455,6 +455,7 @@ EXPORT_SYMBOL(acpi_evaluate_ost);
 
 /**
  * acpi_handle_path: Return the object path of handle
+ * @handle: ACPI device handle
  *
  * Caller must free the returned buffer
  */
@@ -473,6 +474,9 @@ static char *acpi_handle_path(acpi_handle handle)
 
 /**
  * acpi_handle_printk: Print message with ACPI prefix and object path
+ * @level: log level
+ * @handle: ACPI device handle
+ * @fmt: format string
  *
  * This function is called through acpi_handle_<level> macros and prints
  * a message with ACPI prefix and object path.  This function acquires
@@ -501,6 +505,9 @@ EXPORT_SYMBOL(acpi_handle_printk);
 #if defined(CONFIG_DYNAMIC_DEBUG)
 /**
  * __acpi_handle_debug: pr_debug with ACPI prefix and object path
+ * @descriptor: Dynamic Debug descriptor
+ * @handle: ACPI device handle
+ * @fmt: format string
  *
  * This function is called through acpi_handle_debug macro and debug
  * prints a message with ACPI prefix and object path. This function
@@ -695,6 +702,31 @@ bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs)
 EXPORT_SYMBOL(acpi_check_dsm);
 
 /**
+ * acpi_dev_hid_uid_match - Match device by supplied HID and UID
+ * @adev: ACPI device to match.
+ * @hid2: Hardware ID of the device.
+ * @uid2: Unique ID of the device, pass NULL to not check _UID.
+ *
+ * Matches HID and UID in @adev with given @hid2 and @uid2.
+ * Returns true if matches.
+ */
+bool acpi_dev_hid_uid_match(struct acpi_device *adev,
+			    const char *hid2, const char *uid2)
+{
+	const char *hid1 = acpi_device_hid(adev);
+	const char *uid1 = acpi_device_uid(adev);
+
+	if (strcmp(hid1, hid2))
+		return false;
+
+	if (!uid2)
+		return true;
+
+	return uid1 && !strcmp(uid1, uid2);
+}
+EXPORT_SYMBOL(acpi_dev_hid_uid_match);
+
+/**
  * acpi_dev_found - Detect presence of a given ACPI device in the namespace.
  * @hid: Hardware ID of the device.
  *
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 31014c7d3793..419f814d596a 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -303,6 +303,22 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		},
 	},
 	{
+	 .callback = video_detect_force_native,
+	 .ident = "Lenovo E41-25",
+	 .matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "81FS"),
+		},
+	},
+	{
+	 .callback = video_detect_force_native,
+	 .ident = "Lenovo E41-45",
+	 .matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "82BK"),
+		},
+	},
+	{
 	 /* https://bugzilla.redhat.com/show_bug.cgi?id=1217249 */
 	 .callback = video_detect_force_native,
 	 .ident = "Apple MacBook Pro 12,1",
@@ -336,6 +352,11 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"),
 		},
 	},
+
+	/*
+	 * Desktops which falsely report a backlight and which our heuristics
+	 * for this do not catch.
+	 */
 	{
 	 .callback = video_detect_force_none,
 	 .ident = "Dell OptiPlex 9020M",
@@ -344,6 +365,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 9020M"),
 		},
 	},
+	{
+	 .callback = video_detect_force_none,
+	 .ident = "MSI MS-7721",
+	 .matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "MSI"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "MS-7721"),
+		},
+	},
 	{ },
 };
 
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 265d9dd46a5e..a6b2082c24f8 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -65,6 +65,7 @@
 #include <linux/ratelimit.h>
 #include <linux/syscalls.h>
 #include <linux/task_work.h>
+#include <linux/sizes.h>
 
 #include <uapi/linux/android/binder.h>
 #include <uapi/linux/android/binderfs.h>
@@ -92,11 +93,6 @@ static atomic_t binder_last_id;
 static int proc_show(struct seq_file *m, void *unused);
 DEFINE_SHOW_ATTRIBUTE(proc);
 
-/* This is only defined in include/asm-arm/sizes.h */
-#ifndef SZ_1K
-#define SZ_1K                               0x400
-#endif
-
 #define FORBIDDEN_MMAP_FLAGS                (VM_WRITE)
 
 enum {
@@ -2253,10 +2249,12 @@ static void binder_deferred_fd_close(int fd)
 		return;
 	init_task_work(&twcb->twork, binder_do_fd_close);
 	__close_fd_get_file(fd, &twcb->file);
-	if (twcb->file)
+	if (twcb->file) {
+		filp_close(twcb->file, current->files);
 		task_work_add(current, &twcb->twork, true);
-	else
+	} else {
 		kfree(twcb);
+	}
 }
 
 static void binder_transaction_buffer_release(struct binder_proc *proc,
@@ -3314,7 +3312,7 @@ static void binder_transaction(struct binder_proc *proc,
 			binder_size_t parent_offset;
 			struct binder_fd_array_object *fda =
 				to_binder_fd_array_object(hdr);
-			size_t num_valid = (buffer_offset - off_start_offset) *
+			size_t num_valid = (buffer_offset - off_start_offset) /
 						sizeof(binder_size_t);
 			struct binder_buffer_object *parent =
 				binder_validate_ptr(target_proc, t->buffer,
@@ -3388,7 +3386,7 @@ static void binder_transaction(struct binder_proc *proc,
 				t->buffer->user_data + sg_buf_offset;
 			sg_buf_offset += ALIGN(bp->length, sizeof(u64));
 
-			num_valid = (buffer_offset - off_start_offset) *
+			num_valid = (buffer_offset - off_start_offset) /
 					sizeof(binder_size_t);
 			ret = binder_fixup_parent(t, thread, bp,
 						  off_start_offset,
@@ -5203,10 +5201,11 @@ err_bad_arg:
 
 static int binder_open(struct inode *nodp, struct file *filp)
 {
-	struct binder_proc *proc;
+	struct binder_proc *proc, *itr;
 	struct binder_device *binder_dev;
 	struct binderfs_info *info;
 	struct dentry *binder_binderfs_dir_entry_proc = NULL;
+	bool existing_pid = false;
 
 	binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
 		     current->group_leader->pid, current->pid);
@@ -5239,19 +5238,24 @@ static int binder_open(struct inode *nodp, struct file *filp)
 	filp->private_data = proc;
 
 	mutex_lock(&binder_procs_lock);
+	hlist_for_each_entry(itr, &binder_procs, proc_node) {
+		if (itr->pid == proc->pid) {
+			existing_pid = true;
+			break;
+		}
+	}
 	hlist_add_head(&proc->proc_node, &binder_procs);
 	mutex_unlock(&binder_procs_lock);
 
-	if (binder_debugfs_dir_entry_proc) {
+	if (binder_debugfs_dir_entry_proc && !existing_pid) {
 		char strbuf[11];
 
 		snprintf(strbuf, sizeof(strbuf), "%u", proc->pid);
 		/*
-		 * proc debug entries are shared between contexts, so
-		 * this will fail if the process tries to open the driver
-		 * again with a different context. The priting code will
-		 * anyway print all contexts that a given PID has, so this
-		 * is not a problem.
+		 * proc debug entries are shared between contexts.
+		 * Only create for the first PID to avoid debugfs log spamming
+		 * The printing code will anyway print all contexts for a given
+		 * PID so this is not a problem.
 		 */
 		proc->debugfs_entry = debugfs_create_file(strbuf, 0444,
 			binder_debugfs_dir_entry_proc,
@@ -5259,19 +5263,16 @@ static int binder_open(struct inode *nodp, struct file *filp)
 			&proc_fops);
 	}
 
-	if (binder_binderfs_dir_entry_proc) {
+	if (binder_binderfs_dir_entry_proc && !existing_pid) {
 		char strbuf[11];
 		struct dentry *binderfs_entry;
 
 		snprintf(strbuf, sizeof(strbuf), "%u", proc->pid);
 		/*
 		 * Similar to debugfs, the process specific log file is shared
-		 * between contexts. If the file has already been created for a
-		 * process, the following binderfs_create_file() call will
-		 * fail with error code EEXIST if another context of the same
-		 * process invoked binder_open(). This is ok since same as
-		 * debugfs, the log file will contain information on all
-		 * contexts of a given PID.
+		 * between contexts. Only create for the first PID.
+		 * This is ok since same as debugfs, the log file will contain
+		 * information on all contexts of a given PID.
 		 */
 		binderfs_entry = binderfs_create_file(binder_binderfs_dir_entry_proc,
 			strbuf, &proc_fops, (void *)(unsigned long)proc->pid);
@@ -5281,10 +5282,8 @@ static int binder_open(struct inode *nodp, struct file *filp)
 			int error;
 
 			error = PTR_ERR(binderfs_entry);
-			if (error != -EEXIST) {
-				pr_warn("Unable to create file %s in binderfs (error %d)\n",
-					strbuf, error);
-			}
+			pr_warn("Unable to create file %s in binderfs (error %d)\n",
+				strbuf, error);
 		}
 	}
 
@@ -6058,7 +6057,7 @@ const struct file_operations binder_fops = {
 	.owner = THIS_MODULE,
 	.poll = binder_poll,
 	.unlocked_ioctl = binder_ioctl,
-	.compat_ioctl = binder_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
 	.mmap = binder_mmap,
 	.open = binder_open,
 	.flush = binder_flush,
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index eb76a823fbb2..2d8b9b91dee0 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -268,7 +268,6 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
 			alloc->pages_high = index + 1;
 
 		trace_binder_alloc_page_end(alloc, index);
-		/* vm_insert_page does not seem to increment the refcount */
 	}
 	if (mm) {
 		up_read(&mm->mmap_sem);
@@ -277,8 +276,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
 	return 0;
 
 free_range:
-	for (page_addr = end - PAGE_SIZE; page_addr >= start;
-	     page_addr -= PAGE_SIZE) {
+	for (page_addr = end - PAGE_SIZE; 1; page_addr -= PAGE_SIZE) {
 		bool ret;
 		size_t index;
 
@@ -291,6 +289,8 @@ free_range:
 		WARN_ON(!ret);
 
 		trace_binder_free_lru_end(alloc, index);
+		if (page_addr == start)
+			break;
 		continue;
 
 err_vm_insert_page_failed:
@@ -298,7 +298,8 @@ err_vm_insert_page_failed:
 		page->page_ptr = NULL;
 err_alloc_page_failed:
 err_page_ptr_cleared:
-		;
+		if (page_addr == start)
+			break;
 	}
 err_no_vma:
 	if (mm) {
@@ -681,17 +682,17 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
 	struct binder_buffer *buffer;
 
 	mutex_lock(&binder_alloc_mmap_lock);
-	if (alloc->buffer) {
+	if (alloc->buffer_size) {
 		ret = -EBUSY;
 		failure_string = "already mapped";
 		goto err_already_mapped;
 	}
+	alloc->buffer_size = min_t(unsigned long, vma->vm_end - vma->vm_start,
+				   SZ_4M);
+	mutex_unlock(&binder_alloc_mmap_lock);
 
 	alloc->buffer = (void __user *)vma->vm_start;
-	mutex_unlock(&binder_alloc_mmap_lock);
 
-	alloc->buffer_size = min_t(unsigned long, vma->vm_end - vma->vm_start,
-				   SZ_4M);
 	alloc->pages = kcalloc(alloc->buffer_size / PAGE_SIZE,
 			       sizeof(alloc->pages[0]),
 			       GFP_KERNEL);
@@ -722,8 +723,9 @@ err_alloc_buf_struct_failed:
 	kfree(alloc->pages);
 	alloc->pages = NULL;
 err_alloc_pages_failed:
-	mutex_lock(&binder_alloc_mmap_lock);
 	alloc->buffer = NULL;
+	mutex_lock(&binder_alloc_mmap_lock);
+	alloc->buffer_size = 0;
 err_already_mapped:
 	mutex_unlock(&binder_alloc_mmap_lock);
 	binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
@@ -841,14 +843,20 @@ void binder_alloc_print_pages(struct seq_file *m,
 	int free = 0;
 
 	mutex_lock(&alloc->mutex);
-	for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
-		page = &alloc->pages[i];
-		if (!page->page_ptr)
-			free++;
-		else if (list_empty(&page->lru))
-			active++;
-		else
-			lru++;
+	/*
+	 * Make sure the binder_alloc is fully initialized, otherwise we might
+	 * read inconsistent state.
+	 */
+	if (binder_alloc_get_vma(alloc) != NULL) {
+		for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
+			page = &alloc->pages[i];
+			if (!page->page_ptr)
+				free++;
+			else if (list_empty(&page->lru))
+				active++;
+			else
+				lru++;
+		}
 	}
 	mutex_unlock(&alloc->mutex);
 	seq_printf(m, "  pages: %d:%d:%d\n", active, lru, free);
diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c
index 753985c01517..2a04e8abd397 100644
--- a/drivers/ata/acard-ahci.c
+++ b/drivers/ata/acard-ahci.c
@@ -56,7 +56,7 @@ struct acard_sg {
 	__le32			size;	 /* bit 31 (EOT) max==0x10000 (64k) */
 };
 
-static void acard_ahci_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors acard_ahci_qc_prep(struct ata_queued_cmd *qc);
 static bool acard_ahci_qc_fill_rtf(struct ata_queued_cmd *qc);
 static int acard_ahci_port_start(struct ata_port *ap);
 static int acard_ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
@@ -210,7 +210,7 @@ static unsigned int acard_ahci_fill_sg(struct ata_queued_cmd *qc, void *cmd_tbl)
 	return si;
 }
 
-static void acard_ahci_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors acard_ahci_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
 	struct ahci_port_priv *pp = ap->private_data;
@@ -218,7 +218,6 @@ static void acard_ahci_qc_prep(struct ata_queued_cmd *qc)
 	void *cmd_tbl;
 	u32 opts;
 	const u32 cmd_fis_len = 5; /* five dwords */
-	unsigned int n_elem;
 
 	/*
 	 * Fill in command table information.  First, the header,
@@ -232,9 +231,8 @@ static void acard_ahci_qc_prep(struct ata_queued_cmd *qc)
 		memcpy(cmd_tbl + AHCI_CMD_TBL_CDB, qc->cdb, qc->dev->cdb_len);
 	}
 
-	n_elem = 0;
 	if (qc->flags & ATA_QCFLAG_DMAMAP)
-		n_elem = acard_ahci_fill_sg(qc, cmd_tbl);
+		acard_ahci_fill_sg(qc, cmd_tbl);
 
 	/*
 	 * Fill in command slot information.
@@ -248,6 +246,8 @@ static void acard_ahci_qc_prep(struct ata_queued_cmd *qc)
 		opts |= AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH;
 
 	ahci_fill_cmd_slot(pp, qc->hw_tag, opts);
+
+	return AC_ERR_OK;
 }
 
 static bool acard_ahci_qc_fill_rtf(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 05c2b32dcc4d..4bfd1b14b390 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -56,6 +56,7 @@ enum board_ids {
 	board_ahci_yes_fbs,
 
 	/* board IDs for specific chipsets in alphabetical order */
+	board_ahci_al,
 	board_ahci_avn,
 	board_ahci_mcp65,
 	board_ahci_mcp77,
@@ -167,6 +168,13 @@ static const struct ata_port_info ahci_port_info[] = {
 		.port_ops	= &ahci_ops,
 	},
 	/* by chipsets */
+	[board_ahci_al] = {
+		AHCI_HFLAGS	(AHCI_HFLAG_NO_PMP | AHCI_HFLAG_NO_MSI),
+		.flags		= AHCI_FLAG_COMMON,
+		.pio_mask	= ATA_PIO4,
+		.udma_mask	= ATA_UDMA6,
+		.port_ops	= &ahci_ops,
+	},
 	[board_ahci_avn] = {
 		.flags		= AHCI_FLAG_COMMON,
 		.pio_mask	= ATA_PIO4,
@@ -415,6 +423,11 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(ATI, 0x4394), board_ahci_sb700 }, /* ATI SB700/800 */
 	{ PCI_VDEVICE(ATI, 0x4395), board_ahci_sb700 }, /* ATI SB700/800 */
 
+	/* Amazon's Annapurna Labs support */
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031),
+		.class = PCI_CLASS_STORAGE_SATA_AHCI,
+		.class_mask = 0xffffff,
+		board_ahci_al },
 	/* AMD */
 	{ PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */
 	{ PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */
@@ -897,7 +910,7 @@ static int ahci_configure_dma_masks(struct pci_dev *pdev, int using_dac)
 	 * value, don't extend it here. This happens on STA2X11, for example.
 	 *
 	 * XXX: manipulating the DMA mask from platform code is completely
-	 * bogus, platform code should use dev->bus_dma_mask instead..
+	 * bogus, platform code should use dev->bus_dma_limit instead..
 	 */
 	if (pdev->dma_mask && pdev->dma_mask < DMA_BIT_MASK(32))
 		return 0;
diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c
index f41744b9b38a..6853dbb4131d 100644
--- a/drivers/ata/ahci_brcm.c
+++ b/drivers/ata/ahci_brcm.c
@@ -73,11 +73,11 @@ enum brcm_ahci_version {
 	BRCM_SATA_BCM7425 = 1,
 	BRCM_SATA_BCM7445,
 	BRCM_SATA_NSP,
+	BRCM_SATA_BCM7216,
 };
 
 enum brcm_ahci_quirks {
-	BRCM_AHCI_QUIRK_NO_NCQ		= BIT(0),
-	BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE	= BIT(1),
+	BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE	= BIT(0),
 };
 
 struct brcm_ahci_priv {
@@ -213,19 +213,12 @@ static void brcm_sata_phys_disable(struct brcm_ahci_priv *priv)
 			brcm_sata_phy_disable(priv, i);
 }
 
-static u32 brcm_ahci_get_portmask(struct platform_device *pdev,
+static u32 brcm_ahci_get_portmask(struct ahci_host_priv *hpriv,
 				  struct brcm_ahci_priv *priv)
 {
-	void __iomem *ahci;
-	struct resource *res;
 	u32 impl;
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ahci");
-	ahci = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(ahci))
-		return 0;
-
-	impl = readl(ahci + HOST_PORTS_IMPL);
+	impl = readl(hpriv->mmio + HOST_PORTS_IMPL);
 
 	if (fls(impl) > SATA_TOP_MAX_PHYS)
 		dev_warn(priv->dev, "warning: more ports than PHYs (%#x)\n",
@@ -233,9 +226,6 @@ static u32 brcm_ahci_get_portmask(struct platform_device *pdev,
 	else if (!impl)
 		dev_info(priv->dev, "no ports found\n");
 
-	devm_iounmap(&pdev->dev, ahci);
-	devm_release_mem_region(&pdev->dev, res->start, resource_size(res));
-
 	return impl;
 }
 
@@ -285,6 +275,13 @@ static unsigned int brcm_ahci_read_id(struct ata_device *dev,
 	/* Perform the SATA PHY reset sequence */
 	brcm_sata_phy_disable(priv, ap->port_no);
 
+	/* Reset the SATA clock */
+	ahci_platform_disable_clks(hpriv);
+	msleep(10);
+
+	ahci_platform_enable_clks(hpriv);
+	msleep(10);
+
 	/* Bring the PHY back on */
 	brcm_sata_phy_enable(priv, ap->port_no);
 
@@ -341,7 +338,6 @@ static const struct ata_port_info ahci_brcm_port_info = {
 	.port_ops	= &ahci_brcm_platform_ops,
 };
 
-#ifdef CONFIG_PM_SLEEP
 static int brcm_ahci_suspend(struct device *dev)
 {
 	struct ata_host *host = dev_get_drvdata(dev);
@@ -349,23 +345,70 @@ static int brcm_ahci_suspend(struct device *dev)
 	struct brcm_ahci_priv *priv = hpriv->plat_data;
 	int ret;
 
-	ret = ahci_platform_suspend(dev);
 	brcm_sata_phys_disable(priv);
+
+	if (IS_ENABLED(CONFIG_PM_SLEEP))
+		ret = ahci_platform_suspend(dev);
+	else
+		ret = 0;
+
+	if (priv->version != BRCM_SATA_BCM7216)
+		reset_control_assert(priv->rcdev);
+
 	return ret;
 }
 
-static int brcm_ahci_resume(struct device *dev)
+static int __maybe_unused brcm_ahci_resume(struct device *dev)
 {
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct ahci_host_priv *hpriv = host->private_data;
 	struct brcm_ahci_priv *priv = hpriv->plat_data;
+	int ret = 0;
+
+	if (priv->version == BRCM_SATA_BCM7216)
+		ret = reset_control_reset(priv->rcdev);
+	else
+		ret = reset_control_deassert(priv->rcdev);
+	if (ret)
+		return ret;
+
+	/* Make sure clocks are turned on before re-configuration */
+	ret = ahci_platform_enable_clks(hpriv);
+	if (ret)
+		return ret;
 
 	brcm_sata_init(priv);
 	brcm_sata_phys_enable(priv);
 	brcm_sata_alpm_init(hpriv);
-	return ahci_platform_resume(dev);
+
+	/* Since we had to enable clocks earlier on, we cannot use
+	 * ahci_platform_resume() as-is since a second call to
+	 * ahci_platform_enable_resources() would bump up the resources
+	 * (regulators, clocks, PHYs) count artificially so we copy the part
+	 * after ahci_platform_enable_resources().
+	 */
+	ret = ahci_platform_enable_phys(hpriv);
+	if (ret)
+		goto out_disable_phys;
+
+	ret = ahci_platform_resume_host(dev);
+	if (ret)
+		goto out_disable_platform_phys;
+
+	/* We resumed so update PM runtime state */
+	pm_runtime_disable(dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+
+	return 0;
+
+out_disable_platform_phys:
+	ahci_platform_disable_phys(hpriv);
+out_disable_phys:
+	brcm_sata_phys_disable(priv);
+	ahci_platform_disable_clks(hpriv);
+	return ret;
 }
-#endif
 
 static struct scsi_host_template ahci_platform_sht = {
 	AHCI_SHT(DRV_NAME),
@@ -376,6 +419,7 @@ static const struct of_device_id ahci_of_match[] = {
 	{.compatible = "brcm,bcm7445-ahci", .data = (void *)BRCM_SATA_BCM7445},
 	{.compatible = "brcm,bcm63138-ahci", .data = (void *)BRCM_SATA_BCM7445},
 	{.compatible = "brcm,bcm-nsp-ahci", .data = (void *)BRCM_SATA_NSP},
+	{.compatible = "brcm,bcm7216-ahci", .data = (void *)BRCM_SATA_BCM7216},
 	{},
 };
 MODULE_DEVICE_TABLE(of, ahci_of_match);
@@ -384,6 +428,7 @@ static int brcm_ahci_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
 	struct device *dev = &pdev->dev;
+	const char *reset_name = NULL;
 	struct brcm_ahci_priv *priv;
 	struct ahci_host_priv *hpriv;
 	struct resource *res;
@@ -405,49 +450,86 @@ static int brcm_ahci_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->top_ctrl))
 		return PTR_ERR(priv->top_ctrl);
 
-	/* Reset is optional depending on platform */
-	priv->rcdev = devm_reset_control_get(&pdev->dev, "ahci");
-	if (!IS_ERR_OR_NULL(priv->rcdev))
-		reset_control_deassert(priv->rcdev);
+	/* Reset is optional depending on platform and named differently */
+	if (priv->version == BRCM_SATA_BCM7216)
+		reset_name = "rescal";
+	else
+		reset_name = "ahci";
+
+	priv->rcdev = devm_reset_control_get_optional(&pdev->dev, reset_name);
+	if (IS_ERR(priv->rcdev))
+		return PTR_ERR(priv->rcdev);
+
+	hpriv = ahci_platform_get_resources(pdev, 0);
+	if (IS_ERR(hpriv))
+		return PTR_ERR(hpriv);
 
-	if ((priv->version == BRCM_SATA_BCM7425) ||
-		(priv->version == BRCM_SATA_NSP)) {
-		priv->quirks |= BRCM_AHCI_QUIRK_NO_NCQ;
+	hpriv->plat_data = priv;
+	hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP | AHCI_HFLAG_NO_WRITE_TO_RO;
+
+	switch (priv->version) {
+	case BRCM_SATA_BCM7425:
+		hpriv->flags |= AHCI_HFLAG_DELAY_ENGINE;
+		/* fall through */
+	case BRCM_SATA_NSP:
+		hpriv->flags |= AHCI_HFLAG_NO_NCQ;
 		priv->quirks |= BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE;
+		break;
+	default:
+		break;
 	}
 
+	if (priv->version == BRCM_SATA_BCM7216)
+		ret = reset_control_reset(priv->rcdev);
+	else
+		ret = reset_control_deassert(priv->rcdev);
+	if (ret)
+		return ret;
+
+	ret = ahci_platform_enable_clks(hpriv);
+	if (ret)
+		goto out_reset;
+
+	/* Must be first so as to configure endianness including that
+	 * of the standard AHCI register space.
+	 */
 	brcm_sata_init(priv);
 
-	priv->port_mask = brcm_ahci_get_portmask(pdev, priv);
-	if (!priv->port_mask)
-		return -ENODEV;
+	/* Initializes priv->port_mask which is used below */
+	priv->port_mask = brcm_ahci_get_portmask(hpriv, priv);
+	if (!priv->port_mask) {
+		ret = -ENODEV;
+		goto out_disable_clks;
+	}
 
+	/* Must be done before ahci_platform_enable_phys() */
 	brcm_sata_phys_enable(priv);
 
-	hpriv = ahci_platform_get_resources(pdev, 0);
-	if (IS_ERR(hpriv))
-		return PTR_ERR(hpriv);
-	hpriv->plat_data = priv;
-	hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP;
-
 	brcm_sata_alpm_init(hpriv);
 
-	ret = ahci_platform_enable_resources(hpriv);
+	ret = ahci_platform_enable_phys(hpriv);
 	if (ret)
-		return ret;
-
-	if (priv->quirks & BRCM_AHCI_QUIRK_NO_NCQ)
-		hpriv->flags |= AHCI_HFLAG_NO_NCQ;
-	hpriv->flags |= AHCI_HFLAG_NO_WRITE_TO_RO;
+		goto out_disable_phys;
 
 	ret = ahci_platform_init_host(pdev, hpriv, &ahci_brcm_port_info,
 				      &ahci_platform_sht);
 	if (ret)
-		return ret;
+		goto out_disable_platform_phys;
 
 	dev_info(dev, "Broadcom AHCI SATA3 registered\n");
 
 	return 0;
+
+out_disable_platform_phys:
+	ahci_platform_disable_phys(hpriv);
+out_disable_phys:
+	brcm_sata_phys_disable(priv);
+out_disable_clks:
+	ahci_platform_disable_clks(hpriv);
+out_reset:
+	if (priv->version != BRCM_SATA_BCM7216)
+		reset_control_assert(priv->rcdev);
+	return ret;
 }
 
 static int brcm_ahci_remove(struct platform_device *pdev)
@@ -457,20 +539,35 @@ static int brcm_ahci_remove(struct platform_device *pdev)
 	struct brcm_ahci_priv *priv = hpriv->plat_data;
 	int ret;
 
+	brcm_sata_phys_disable(priv);
+
 	ret = ata_platform_remove_one(pdev);
 	if (ret)
 		return ret;
 
-	brcm_sata_phys_disable(priv);
-
 	return 0;
 }
 
+static void brcm_ahci_shutdown(struct platform_device *pdev)
+{
+	int ret;
+
+	/* All resources releasing happens via devres, but our device, unlike a
+	 * proper remove is not disappearing, therefore using
+	 * brcm_ahci_suspend() here which does explicit power management is
+	 * appropriate.
+	 */
+	ret = brcm_ahci_suspend(&pdev->dev);
+	if (ret)
+		dev_err(&pdev->dev, "failed to shutdown\n");
+}
+
 static SIMPLE_DEV_PM_OPS(ahci_brcm_pm_ops, brcm_ahci_suspend, brcm_ahci_resume);
 
 static struct platform_driver brcm_ahci_driver = {
 	.probe = brcm_ahci_probe,
 	.remove = brcm_ahci_remove,
+	.shutdown = brcm_ahci_shutdown,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = ahci_of_match,
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
index bfc617cc8ac5..948d2c6557f3 100644
--- a/drivers/ata/ahci_imx.c
+++ b/drivers/ata/ahci_imx.c
@@ -11,8 +11,8 @@
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/ahci_platform.h>
+#include <linux/gpio/consumer.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
 #include <linux/libata.h>
@@ -100,7 +100,7 @@ struct imx_ahci_priv {
 	struct clk *phy_pclk0;
 	struct clk *phy_pclk1;
 	void __iomem *phy_base;
-	int clkreq_gpio;
+	struct gpio_desc *clkreq_gpiod;
 	struct regmap *gpr;
 	bool no_device;
 	bool first_time;
@@ -980,7 +980,6 @@ static struct scsi_host_template ahci_platform_sht = {
 
 static int imx8_sata_probe(struct device *dev, struct imx_ahci_priv *imxpriv)
 {
-	int ret;
 	struct resource *phy_res;
 	struct platform_device *pdev = imxpriv->ahci_pdev;
 	struct device_node *np = dev->of_node;
@@ -1033,20 +1032,12 @@ static int imx8_sata_probe(struct device *dev, struct imx_ahci_priv *imxpriv)
 	}
 
 	/* Fetch GPIO, then enable the external OSC */
-	imxpriv->clkreq_gpio = of_get_named_gpio(np, "clkreq-gpio", 0);
-	if (gpio_is_valid(imxpriv->clkreq_gpio)) {
-		ret = devm_gpio_request_one(dev, imxpriv->clkreq_gpio,
-					    GPIOF_OUT_INIT_LOW,
-					    "SATA CLKREQ");
-		if (ret == -EBUSY) {
-			dev_info(dev, "clkreq had been initialized.\n");
-		} else if (ret) {
-			dev_err(dev, "%d unable to get clkreq.\n", ret);
-			return ret;
-		}
-	} else if (imxpriv->clkreq_gpio == -EPROBE_DEFER) {
-		return imxpriv->clkreq_gpio;
-	}
+	imxpriv->clkreq_gpiod = devm_gpiod_get_optional(dev, "clkreq",
+				GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_NONEXCLUSIVE);
+	if (IS_ERR(imxpriv->clkreq_gpiod))
+		return PTR_ERR(imxpriv->clkreq_gpiod);
+	if (imxpriv->clkreq_gpiod)
+		gpiod_set_consumer_name(imxpriv->clkreq_gpiod, "SATA CLKREQ");
 
 	return 0;
 }
diff --git a/drivers/ata/ahci_tegra.c b/drivers/ata/ahci_tegra.c
index e3163dae5e85..cb55ebc1725b 100644
--- a/drivers/ata/ahci_tegra.c
+++ b/drivers/ata/ahci_tegra.c
@@ -483,7 +483,6 @@ static int tegra_ahci_probe(struct platform_device *pdev)
 	struct tegra_ahci_priv *tegra;
 	struct resource *res;
 	int ret;
-	unsigned int i;
 
 	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
@@ -543,8 +542,9 @@ static int tegra_ahci_probe(struct platform_device *pdev)
 	if (!tegra->supplies)
 		return -ENOMEM;
 
-	for (i = 0; i < tegra->soc->num_supplies; i++)
-		tegra->supplies[i].supply = tegra->soc->supply_names[i];
+	regulator_bulk_set_supply_names(tegra->supplies,
+					tegra->soc->supply_names,
+					tegra->soc->num_supplies);
 
 	ret = devm_regulator_bulk_get(&pdev->dev,
 				      tegra->soc->num_supplies,
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index e4da725381d3..3ca7720e7d8f 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -841,6 +841,12 @@ static int piix_broken_suspend(void)
 			},
 		},
 		{
+			.ident = "TECRA M3",
+			.matches = {
+				DMI_MATCH(DMI_OEM_STRING, "Tecra M3,"),
+			},
+		},
+		{
 			.ident = "TECRA M4",
 			.matches = {
 				DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
@@ -955,18 +961,10 @@ static int piix_broken_suspend(void)
 
 		{ }	/* terminate list */
 	};
-	static const char *oemstrs[] = {
-		"Tecra M3,",
-	};
-	int i;
 
 	if (dmi_check_system(sysids))
 		return 1;
 
-	for (i = 0; i < ARRAY_SIZE(oemstrs); i++)
-		if (dmi_find_device(DMI_DEV_TYPE_OEM_STRING, oemstrs[i], NULL))
-			return 1;
-
 	/* TECRA M4 sometimes forgets its identify and reports bogus
 	 * DMI information.  As the bogus information is a bit
 	 * generic, match as many entries as possible.  This manual
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index bff369d9a1a7..ea5bf5f4cbed 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -57,7 +57,7 @@ static int ahci_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static bool ahci_qc_fill_rtf(struct ata_queued_cmd *qc);
 static int ahci_port_start(struct ata_port *ap);
 static void ahci_port_stop(struct ata_port *ap);
-static void ahci_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors ahci_qc_prep(struct ata_queued_cmd *qc);
 static int ahci_pmp_qc_defer(struct ata_queued_cmd *qc);
 static void ahci_freeze(struct ata_port *ap);
 static void ahci_thaw(struct ata_port *ap);
@@ -1624,7 +1624,7 @@ static int ahci_pmp_qc_defer(struct ata_queued_cmd *qc)
 		return sata_pmp_qc_defer_cmd_switch(qc);
 }
 
-static void ahci_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors ahci_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
 	struct ahci_port_priv *pp = ap->private_data;
@@ -1660,6 +1660,8 @@ static void ahci_qc_prep(struct ata_queued_cmd *qc)
 		opts |= AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH;
 
 	ahci_fill_cmd_slot(pp, qc->hw_tag, opts);
+
+	return AC_ERR_OK;
 }
 
 static void ahci_fbs_dec_intr(struct ata_port *ap)
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 8befce036af8..129556fcf6be 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_ops);
  * RETURNS:
  * 0 on success otherwise a negative error code
  */
-static int ahci_platform_enable_phys(struct ahci_host_priv *hpriv)
+int ahci_platform_enable_phys(struct ahci_host_priv *hpriv)
 {
 	int rc, i;
 
@@ -74,6 +74,7 @@ disable_phys:
 	}
 	return rc;
 }
+EXPORT_SYMBOL_GPL(ahci_platform_enable_phys);
 
 /**
  * ahci_platform_disable_phys - Disable PHYs
@@ -81,7 +82,7 @@ disable_phys:
  *
  * This function disables all PHYs found in hpriv->phys.
  */
-static void ahci_platform_disable_phys(struct ahci_host_priv *hpriv)
+void ahci_platform_disable_phys(struct ahci_host_priv *hpriv)
 {
 	int i;
 
@@ -90,6 +91,7 @@ static void ahci_platform_disable_phys(struct ahci_host_priv *hpriv)
 		phy_exit(hpriv->phys[i]);
 	}
 }
+EXPORT_SYMBOL_GPL(ahci_platform_disable_phys);
 
 /**
  * ahci_platform_enable_clks - Enable platform clocks
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 28c492be0a57..6f4ab5c5b52d 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4980,7 +4980,10 @@ int ata_std_qc_defer(struct ata_queued_cmd *qc)
 	return ATA_DEFER_LINK;
 }
 
-void ata_noop_qc_prep(struct ata_queued_cmd *qc) { }
+enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc)
+{
+	return AC_ERR_OK;
+}
 
 /**
  *	ata_sg_init - Associate command with scatter-gather table.
@@ -5326,6 +5329,30 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
 }
 
 /**
+ *	ata_qc_get_active - get bitmask of active qcs
+ *	@ap: port in question
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ *
+ *	RETURNS:
+ *	Bitmask of active qcs
+ */
+u64 ata_qc_get_active(struct ata_port *ap)
+{
+	u64 qc_active = ap->qc_active;
+
+	/* ATA_TAG_INTERNAL is sent to hw as tag 0 */
+	if (qc_active & (1ULL << ATA_TAG_INTERNAL)) {
+		qc_active |= (1 << 0);
+		qc_active &= ~(1ULL << ATA_TAG_INTERNAL);
+	}
+
+	return qc_active;
+}
+EXPORT_SYMBOL_GPL(ata_qc_get_active);
+
+/**
  *	ata_qc_complete_multiple - Complete multiple qcs successfully
  *	@ap: port in question
  *	@qc_active: new qc_active mask
@@ -5443,7 +5470,9 @@ void ata_qc_issue(struct ata_queued_cmd *qc)
 		return;
 	}
 
-	ap->ops->qc_prep(qc);
+	qc->err_mask |= ap->ops->qc_prep(qc);
+	if (unlikely(qc->err_mask))
+		goto err;
 	trace_ata_qc_issue(qc);
 	qc->err_mask |= ap->ops->qc_issue(qc);
 	if (unlikely(qc->err_mask))
@@ -6708,6 +6737,9 @@ void ata_host_detach(struct ata_host *host)
 {
 	int i;
 
+	/* Ensure ata_port probe has completed */
+	async_synchronize_full();
+
 	for (i = 0; i < host->n_ports; i++)
 		ata_port_detach(host->ports[i]);
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 58e09ffe8b9c..eb2eb599e602 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -17,6 +17,7 @@
  *  - http://www.t13.org/
  */
 
+#include <linux/compat.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/blkdev.h>
@@ -761,6 +762,10 @@ static int ata_ioc32(struct ata_port *ap)
 	return 0;
 }
 
+/*
+ * This handles both native and compat commands, so anything added
+ * here must have a compatible argument, or check in_compat_syscall()
+ */
 int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev,
 		     unsigned int cmd, void __user *arg)
 {
@@ -773,6 +778,10 @@ int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev,
 		spin_lock_irqsave(ap->lock, flags);
 		val = ata_ioc32(ap);
 		spin_unlock_irqrestore(ap->lock, flags);
+#ifdef CONFIG_COMPAT
+		if (in_compat_syscall())
+			return put_user(val, (compat_ulong_t __user *)arg);
+#endif
 		return put_user(val, (unsigned long __user *)arg);
 
 	case HDIO_SET_32BIT:
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 4ed682da52ae..038db94216a9 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2679,12 +2679,14 @@ static void ata_bmdma_fill_sg_dumb(struct ata_queued_cmd *qc)
  *	LOCKING:
  *	spin_lock_irqsave(host lock)
  */
-void ata_bmdma_qc_prep(struct ata_queued_cmd *qc)
+enum ata_completion_errors ata_bmdma_qc_prep(struct ata_queued_cmd *qc)
 {
 	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
-		return;
+		return AC_ERR_OK;
 
 	ata_bmdma_fill_sg(qc);
+
+	return AC_ERR_OK;
 }
 EXPORT_SYMBOL_GPL(ata_bmdma_qc_prep);
 
@@ -2697,12 +2699,14 @@ EXPORT_SYMBOL_GPL(ata_bmdma_qc_prep);
  *	LOCKING:
  *	spin_lock_irqsave(host lock)
  */
-void ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc)
+enum ata_completion_errors ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc)
 {
 	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
-		return;
+		return AC_ERR_OK;
 
 	ata_bmdma_fill_sg_dumb(qc);
+
+	return AC_ERR_OK;
 }
 EXPORT_SYMBOL_GPL(ata_bmdma_dumb_qc_prep);
 
diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c
index ebecab8c3f36..391dff0f25a2 100644
--- a/drivers/ata/pata_arasan_cf.c
+++ b/drivers/ata/pata_arasan_cf.c
@@ -219,7 +219,6 @@ struct arasan_cf_dev {
 
 static struct scsi_host_template arasan_cf_sht = {
 	ATA_BASE_SHT(DRIVER_NAME),
-	.sg_tablesize = SG_NONE,
 	.dma_boundary = 0xFFFFFFFFUL,
 };
 
@@ -825,7 +824,7 @@ static int arasan_cf_probe(struct platform_device *pdev)
 		quirk |= CF_BROKEN_MWDMA | CF_BROKEN_UDMA;
 
 	acdev->pbase = res->start;
-	acdev->vbase = devm_ioremap_nocache(&pdev->dev, res->start,
+	acdev->vbase = devm_ioremap(&pdev->dev, res->start,
 			resource_size(res));
 	if (!acdev->vbase) {
 		dev_warn(&pdev->dev, "ioremap fail\n");
diff --git a/drivers/ata/pata_artop.c b/drivers/ata/pata_artop.c
index 3aa006c5ed0c..6bd2228bb6ff 100644
--- a/drivers/ata/pata_artop.c
+++ b/drivers/ata/pata_artop.c
@@ -100,7 +100,7 @@ static void artop6210_load_piomode(struct ata_port *ap, struct ata_device *adev,
 {
 	struct pci_dev *pdev	= to_pci_dev(ap->host->dev);
 	int dn = adev->devno + 2 * ap->port_no;
-	const u16 timing[2][5] = {
+	static const u16 timing[2][5] = {
 		{ 0x0000, 0x000A, 0x0008, 0x0303, 0x0301 },
 		{ 0x0700, 0x070A, 0x0708, 0x0403, 0x0401 }
 
@@ -154,7 +154,7 @@ static void artop6260_load_piomode (struct ata_port *ap, struct ata_device *adev
 {
 	struct pci_dev *pdev	= to_pci_dev(ap->host->dev);
 	int dn = adev->devno + 2 * ap->port_no;
-	const u8 timing[2][5] = {
+	static const u8 timing[2][5] = {
 		{ 0x00, 0x0A, 0x08, 0x33, 0x31 },
 		{ 0x70, 0x7A, 0x78, 0x43, 0x41 }
 
diff --git a/drivers/ata/pata_atp867x.c b/drivers/ata/pata_atp867x.c
index cfd0cf2cbca6..e01a3a6e4d46 100644
--- a/drivers/ata/pata_atp867x.c
+++ b/drivers/ata/pata_atp867x.c
@@ -422,7 +422,7 @@ static int atp867x_ata_pci_sff_init_host(struct ata_host *host)
 #ifdef	ATP867X_DEBUG
 	atp867x_check_res(pdev);
 
-	for (i = 0; i < PCI_ROM_RESOURCE; i++)
+	for (i = 0; i < PCI_STD_NUM_BARS; i++)
 		printk(KERN_DEBUG "ATP867X: iomap[%d]=0x%llx\n", i,
 			(unsigned long long)(host->iomap[i]));
 #endif
diff --git a/drivers/ata/pata_falcon.c b/drivers/ata/pata_falcon.c
index 41e0d6a6cd05..27b0952fde6b 100644
--- a/drivers/ata/pata_falcon.c
+++ b/drivers/ata/pata_falcon.c
@@ -33,7 +33,6 @@
 #define DRV_NAME "pata_falcon"
 #define DRV_VERSION "0.1.0"
 
-#define ATA_HD_BASE	0xfff00000
 #define ATA_HD_CONTROL	0x39
 
 static struct scsi_host_template pata_falcon_sht = {
@@ -120,24 +119,22 @@ static struct ata_port_operations pata_falcon_ops = {
 	.set_mode	= pata_falcon_set_mode,
 };
 
-static int pata_falcon_init_one(void)
+static int __init pata_falcon_init_one(struct platform_device *pdev)
 {
+	struct resource *res;
 	struct ata_host *host;
 	struct ata_port *ap;
-	struct platform_device *pdev;
 	void __iomem *base;
 
-	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE))
-		return -ENODEV;
-
-	pr_info(DRV_NAME ": Atari Falcon PATA controller\n");
+	dev_info(&pdev->dev, "Atari Falcon PATA controller\n");
 
-	pdev = platform_device_register_simple(DRV_NAME, 0, NULL, 0);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
 
-	if (!devm_request_mem_region(&pdev->dev, ATA_HD_BASE, 0x40, DRV_NAME)) {
-		pr_err(DRV_NAME ": resources busy\n");
+	if (!devm_request_mem_region(&pdev->dev, res->start,
+				     resource_size(res), DRV_NAME)) {
+		dev_err(&pdev->dev, "resources busy\n");
 		return -EBUSY;
 	}
 
@@ -152,7 +149,7 @@ static int pata_falcon_init_one(void)
 	ap->flags |= ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_IORDY;
 	ap->flags |= ATA_FLAG_PIO_POLLING;
 
-	base = (void __iomem *)ATA_HD_BASE;
+	base = (void __iomem *)res->start;
 	ap->ioaddr.data_addr		= base;
 	ap->ioaddr.error_addr		= base + 1 + 1 * 4;
 	ap->ioaddr.feature_addr		= base + 1 + 1 * 4;
@@ -174,9 +171,26 @@ static int pata_falcon_init_one(void)
 	return ata_host_activate(host, 0, NULL, 0, &pata_falcon_sht);
 }
 
-module_init(pata_falcon_init_one);
+static int __exit pata_falcon_remove_one(struct platform_device *pdev)
+{
+	struct ata_host *host = platform_get_drvdata(pdev);
+
+	ata_host_detach(host);
+
+	return 0;
+}
+
+static struct platform_driver pata_falcon_driver = {
+	.remove = __exit_p(pata_falcon_remove_one),
+	.driver   = {
+		.name	= "atari-falcon-ide",
+	},
+};
+
+module_platform_driver_probe(pata_falcon_driver, pata_falcon_init_one);
 
 MODULE_AUTHOR("Bartlomiej Zolnierkiewicz");
 MODULE_DESCRIPTION("low-level driver for Atari Falcon PATA");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:atari-falcon-ide");
 MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index 57f2ec71cfc3..e47a28271f5b 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -510,7 +510,7 @@ static int pata_macio_cable_detect(struct ata_port *ap)
 	return ATA_CBL_PATA40;
 }
 
-static void pata_macio_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors pata_macio_qc_prep(struct ata_queued_cmd *qc)
 {
 	unsigned int write = (qc->tf.flags & ATA_TFLAG_WRITE);
 	struct ata_port *ap = qc->ap;
@@ -523,7 +523,7 @@ static void pata_macio_qc_prep(struct ata_queued_cmd *qc)
 		   __func__, qc, qc->flags, write, qc->dev->devno);
 
 	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
-		return;
+		return AC_ERR_OK;
 
 	table = (struct dbdma_cmd *) priv->dma_table_cpu;
 
@@ -568,6 +568,8 @@ static void pata_macio_qc_prep(struct ata_queued_cmd *qc)
 	table->command = cpu_to_le16(DBDMA_STOP);
 
 	dev_dbgdma(priv->dev, "%s: %d DMA list entries\n", __func__, pi);
+
+	return AC_ERR_OK;
 }
 
 
@@ -977,7 +979,7 @@ static void pata_macio_invariants(struct pata_macio_priv *priv)
 	priv->aapl_bus_id =  bidp ? *bidp : 0;
 
 	/* Fixup missing Apple bus ID in case of media-bay */
-	if (priv->mediabay && bidp == 0)
+	if (priv->mediabay && !bidp)
 		priv->aapl_bus_id = 1;
 }
 
diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index d3d851b014a3..bd87476ab481 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c
@@ -891,7 +891,7 @@ static int octeon_cf_probe(struct platform_device *pdev)
 					of_node_put(dma_node);
 					return -EINVAL;
 				}
-				cf_port->dma_base = (u64)devm_ioremap_nocache(&pdev->dev, res_dma->start,
+				cf_port->dma_base = (u64)devm_ioremap(&pdev->dev, res_dma->start,
 									 resource_size(res_dma));
 				if (!cf_port->dma_base) {
 					of_node_put(dma_node);
@@ -909,7 +909,7 @@ static int octeon_cf_probe(struct platform_device *pdev)
 		if (!res_cs1)
 			return -EINVAL;
 
-		cs1 = devm_ioremap_nocache(&pdev->dev, res_cs1->start,
+		cs1 = devm_ioremap(&pdev->dev, res_cs1->start,
 					   resource_size(res_cs1));
 		if (!cs1)
 			return rv;
@@ -925,7 +925,7 @@ static int octeon_cf_probe(struct platform_device *pdev)
 	if (!res_cs0)
 		return -EINVAL;
 
-	cs0 = devm_ioremap_nocache(&pdev->dev, res_cs0->start,
+	cs0 = devm_ioremap(&pdev->dev, res_cs0->start,
 				   resource_size(res_cs0));
 	if (!cs0)
 		return rv;
diff --git a/drivers/ata/pata_pxa.c b/drivers/ata/pata_pxa.c
index 4afcb8e63e21..41430f79663c 100644
--- a/drivers/ata/pata_pxa.c
+++ b/drivers/ata/pata_pxa.c
@@ -44,25 +44,27 @@ static void pxa_ata_dma_irq(void *d)
 /*
  * Prepare taskfile for submission.
  */
-static void pxa_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors pxa_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct pata_pxa_data *pd = qc->ap->private_data;
 	struct dma_async_tx_descriptor *tx;
 	enum dma_transfer_direction dir;
 
 	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
-		return;
+		return AC_ERR_OK;
 
 	dir = (qc->dma_dir == DMA_TO_DEVICE ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM);
 	tx = dmaengine_prep_slave_sg(pd->dma_chan, qc->sg, qc->n_elem, dir,
 				     DMA_PREP_INTERRUPT);
 	if (!tx) {
 		ata_dev_err(qc->dev, "prep_slave_sg() failed\n");
-		return;
+		return AC_ERR_OK;
 	}
 	tx->callback = pxa_ata_dma_irq;
 	tx->callback_param = pd;
 	pd->dma_cookie = dmaengine_submit(tx);
+
+	return AC_ERR_OK;
 }
 
 /*
diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c
index deae466395de..479c4b29b856 100644
--- a/drivers/ata/pata_rb532_cf.c
+++ b/drivers/ata/pata_rb532_cf.c
@@ -140,7 +140,7 @@ static int rb532_pata_driver_probe(struct platform_device *pdev)
 	info->gpio_line = gpiod;
 	info->irq = irq;
 
-	info->iobase = devm_ioremap_nocache(&pdev->dev, res->start,
+	info->iobase = devm_ioremap(&pdev->dev, res->start,
 				resource_size(res));
 	if (!info->iobase)
 		return -ENOMEM;
diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index cb490531b62e..5db55e1e2a61 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -116,7 +116,7 @@ static int adma_ata_init_one(struct pci_dev *pdev,
 				const struct pci_device_id *ent);
 static int adma_port_start(struct ata_port *ap);
 static void adma_port_stop(struct ata_port *ap);
-static void adma_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors adma_qc_prep(struct ata_queued_cmd *qc);
 static unsigned int adma_qc_issue(struct ata_queued_cmd *qc);
 static int adma_check_atapi_dma(struct ata_queued_cmd *qc);
 static void adma_freeze(struct ata_port *ap);
@@ -295,7 +295,7 @@ static int adma_fill_sg(struct ata_queued_cmd *qc)
 	return i;
 }
 
-static void adma_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors adma_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct adma_port_priv *pp = qc->ap->private_data;
 	u8  *buf = pp->pkt;
@@ -306,7 +306,7 @@ static void adma_qc_prep(struct ata_queued_cmd *qc)
 
 	adma_enter_reg_mode(qc->ap);
 	if (qc->tf.protocol != ATA_PROT_DMA)
-		return;
+		return AC_ERR_OK;
 
 	buf[i++] = 0;	/* Response flags */
 	buf[i++] = 0;	/* reserved */
@@ -371,6 +371,7 @@ static void adma_qc_prep(struct ata_queued_cmd *qc)
 			printk("%s\n", obuf);
 	}
 #endif
+	return AC_ERR_OK;
 }
 
 static inline void adma_packet_start(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 8e9cb198fcd1..d55ee244d693 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -502,7 +502,7 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc,
 	return num_prde;
 }
 
-static void sata_fsl_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors sata_fsl_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
 	struct sata_fsl_port_priv *pp = ap->private_data;
@@ -548,6 +548,8 @@ static void sata_fsl_qc_prep(struct ata_queued_cmd *qc)
 
 	VPRINTK("SATA FSL : xx_qc_prep, di = 0x%x, ttl = %d, num_prde = %d\n",
 		desc_info, ttl_dwords, num_prde);
+
+	return AC_ERR_OK;
 }
 
 static unsigned int sata_fsl_qc_issue(struct ata_queued_cmd *qc)
@@ -1278,7 +1280,7 @@ static void sata_fsl_host_intr(struct ata_port *ap)
 				     i, ioread32(hcr_base + CC),
 				     ioread32(hcr_base + CA));
 		}
-		ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask);
+		ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask);
 		return;
 
 	} else if ((ap->qc_active & (1ULL << ATA_TAG_INTERNAL))) {
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index 7f99e23bff88..a6b76cc12a66 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -478,7 +478,7 @@ static void inic_fill_sg(struct inic_prd *prd, struct ata_queued_cmd *qc)
 	prd[-1].flags |= PRD_END;
 }
 
-static void inic_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors inic_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct inic_port_priv *pp = qc->ap->private_data;
 	struct inic_pkt *pkt = pp->pkt;
@@ -538,6 +538,8 @@ static void inic_qc_prep(struct ata_queued_cmd *qc)
 		inic_fill_sg(prd, qc);
 
 	pp->cpb_tbl[0] = pp->pkt_dma;
+
+	return AC_ERR_OK;
 }
 
 static unsigned int inic_qc_issue(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index ad385a113391..d7228f8e9297 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -592,8 +592,8 @@ static int mv5_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val)
 static int mv_port_start(struct ata_port *ap);
 static void mv_port_stop(struct ata_port *ap);
 static int mv_qc_defer(struct ata_queued_cmd *qc);
-static void mv_qc_prep(struct ata_queued_cmd *qc);
-static void mv_qc_prep_iie(struct ata_queued_cmd *qc);
+static enum ata_completion_errors mv_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors mv_qc_prep_iie(struct ata_queued_cmd *qc);
 static unsigned int mv_qc_issue(struct ata_queued_cmd *qc);
 static int mv_hardreset(struct ata_link *link, unsigned int *class,
 			unsigned long deadline);
@@ -2031,7 +2031,7 @@ static void mv_rw_multi_errata_sata24(struct ata_queued_cmd *qc)
  *      LOCKING:
  *      Inherited from caller.
  */
-static void mv_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors mv_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
 	struct mv_port_priv *pp = ap->private_data;
@@ -2043,15 +2043,15 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
 	switch (tf->protocol) {
 	case ATA_PROT_DMA:
 		if (tf->command == ATA_CMD_DSM)
-			return;
+			return AC_ERR_OK;
 		/* fall-thru */
 	case ATA_PROT_NCQ:
 		break;	/* continue below */
 	case ATA_PROT_PIO:
 		mv_rw_multi_errata_sata24(qc);
-		return;
+		return AC_ERR_OK;
 	default:
-		return;
+		return AC_ERR_OK;
 	}
 
 	/* Fill in command request block
@@ -2098,12 +2098,10 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
 		 * non-NCQ mode are: [RW] STREAM DMA and W DMA FUA EXT, none
 		 * of which are defined/used by Linux.  If we get here, this
 		 * driver needs work.
-		 *
-		 * FIXME: modify libata to give qc_prep a return value and
-		 * return error here.
 		 */
-		BUG_ON(tf->command);
-		break;
+		ata_port_err(ap, "%s: unsupported command: %.2x\n", __func__,
+				tf->command);
+		return AC_ERR_INVALID;
 	}
 	mv_crqb_pack_cmd(cw++, tf->nsect, ATA_REG_NSECT, 0);
 	mv_crqb_pack_cmd(cw++, tf->hob_lbal, ATA_REG_LBAL, 0);
@@ -2116,8 +2114,10 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
 	mv_crqb_pack_cmd(cw++, tf->command, ATA_REG_CMD, 1);	/* last */
 
 	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
-		return;
+		return AC_ERR_OK;
 	mv_fill_sg(qc);
+
+	return AC_ERR_OK;
 }
 
 /**
@@ -2132,7 +2132,7 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
  *      LOCKING:
  *      Inherited from caller.
  */
-static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
+static enum ata_completion_errors mv_qc_prep_iie(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
 	struct mv_port_priv *pp = ap->private_data;
@@ -2143,9 +2143,9 @@ static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
 
 	if ((tf->protocol != ATA_PROT_DMA) &&
 	    (tf->protocol != ATA_PROT_NCQ))
-		return;
+		return AC_ERR_OK;
 	if (tf->command == ATA_CMD_DSM)
-		return;  /* use bmdma for this */
+		return AC_ERR_OK;  /* use bmdma for this */
 
 	/* Fill in Gen IIE command request block */
 	if (!(tf->flags & ATA_TFLAG_WRITE))
@@ -2186,8 +2186,10 @@ static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
 		);
 
 	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
-		return;
+		return AC_ERR_OK;
 	mv_fill_sg(qc);
+
+	return AC_ERR_OK;
 }
 
 /**
@@ -2827,7 +2829,7 @@ static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp
 	}
 
 	if (work_done) {
-		ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask);
+		ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask);
 
 		/* Update the software queue position index in hardware */
 		writelfl((pp->crpb_dma & EDMA_RSP_Q_BASE_LO_MASK) |
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 56946012d113..eb9dc14e5147 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -297,7 +297,7 @@ static void nv_ck804_freeze(struct ata_port *ap);
 static void nv_ck804_thaw(struct ata_port *ap);
 static int nv_adma_slave_config(struct scsi_device *sdev);
 static int nv_adma_check_atapi_dma(struct ata_queued_cmd *qc);
-static void nv_adma_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors nv_adma_qc_prep(struct ata_queued_cmd *qc);
 static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc);
 static irqreturn_t nv_adma_interrupt(int irq, void *dev_instance);
 static void nv_adma_irq_clear(struct ata_port *ap);
@@ -319,7 +319,7 @@ static void nv_mcp55_freeze(struct ata_port *ap);
 static void nv_swncq_error_handler(struct ata_port *ap);
 static int nv_swncq_slave_config(struct scsi_device *sdev);
 static int nv_swncq_port_start(struct ata_port *ap);
-static void nv_swncq_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors nv_swncq_qc_prep(struct ata_queued_cmd *qc);
 static void nv_swncq_fill_sg(struct ata_queued_cmd *qc);
 static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc);
 static void nv_swncq_irq_clear(struct ata_port *ap, u16 fis);
@@ -984,7 +984,7 @@ static irqreturn_t nv_adma_interrupt(int irq, void *dev_instance)
 					check_commands = 0;
 				check_commands &= ~(1 << pos);
 			}
-			ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask);
+			ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask);
 		}
 	}
 
@@ -1344,7 +1344,7 @@ static int nv_adma_use_reg_mode(struct ata_queued_cmd *qc)
 	return 1;
 }
 
-static void nv_adma_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors nv_adma_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct nv_adma_port_priv *pp = qc->ap->private_data;
 	struct nv_adma_cpb *cpb = &pp->cpb[qc->hw_tag];
@@ -1356,7 +1356,7 @@ static void nv_adma_qc_prep(struct ata_queued_cmd *qc)
 			(qc->flags & ATA_QCFLAG_DMAMAP));
 		nv_adma_register_mode(qc->ap);
 		ata_bmdma_qc_prep(qc);
-		return;
+		return AC_ERR_OK;
 	}
 
 	cpb->resp_flags = NV_CPB_RESP_DONE;
@@ -1388,6 +1388,8 @@ static void nv_adma_qc_prep(struct ata_queued_cmd *qc)
 	cpb->ctl_flags = ctl_flags;
 	wmb();
 	cpb->resp_flags = 0;
+
+	return AC_ERR_OK;
 }
 
 static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc)
@@ -1950,17 +1952,19 @@ static int nv_swncq_port_start(struct ata_port *ap)
 	return 0;
 }
 
-static void nv_swncq_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors nv_swncq_qc_prep(struct ata_queued_cmd *qc)
 {
 	if (qc->tf.protocol != ATA_PROT_NCQ) {
 		ata_bmdma_qc_prep(qc);
-		return;
+		return AC_ERR_OK;
 	}
 
 	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
-		return;
+		return AC_ERR_OK;
 
 	nv_swncq_fill_sg(qc);
+
+	return AC_ERR_OK;
 }
 
 static void nv_swncq_fill_sg(struct ata_queued_cmd *qc)
@@ -2325,7 +2329,7 @@ static int nv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
         // Make sure this is a SATA controller by counting the number of bars
         // (NVIDIA SATA controllers will always have six bars).  Otherwise,
         // it's an IDE controller and we ignore it.
-	for (bar = 0; bar < 6; bar++)
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++)
 		if (pci_resource_start(pdev, bar) == 0)
 			return -ENODEV;
 
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 5fd464765ddc..c451d7d1c817 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -139,7 +139,7 @@ static int pdc_sata_scr_write(struct ata_link *link, unsigned int sc_reg, u32 va
 static int pdc_ata_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 static int pdc_common_port_start(struct ata_port *ap);
 static int pdc_sata_port_start(struct ata_port *ap);
-static void pdc_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors pdc_qc_prep(struct ata_queued_cmd *qc);
 static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
 static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
 static int pdc_check_atapi_dma(struct ata_queued_cmd *qc);
@@ -633,7 +633,7 @@ static void pdc_fill_sg(struct ata_queued_cmd *qc)
 	prd[idx - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
 }
 
-static void pdc_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors pdc_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct pdc_port_priv *pp = qc->ap->private_data;
 	unsigned int i;
@@ -665,6 +665,8 @@ static void pdc_qc_prep(struct ata_queued_cmd *qc)
 	default:
 		break;
 	}
+
+	return AC_ERR_OK;
 }
 
 static int pdc_is_sataii_tx4(unsigned long flags)
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index c53c5a47204d..ef00ab644afb 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -100,7 +100,7 @@ static int qs_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static int qs_ata_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 static int qs_port_start(struct ata_port *ap);
 static void qs_host_stop(struct ata_host *host);
-static void qs_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors qs_qc_prep(struct ata_queued_cmd *qc);
 static unsigned int qs_qc_issue(struct ata_queued_cmd *qc);
 static int qs_check_atapi_dma(struct ata_queued_cmd *qc);
 static void qs_freeze(struct ata_port *ap);
@@ -260,7 +260,7 @@ static unsigned int qs_fill_sg(struct ata_queued_cmd *qc)
 	return si;
 }
 
-static void qs_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors qs_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct qs_port_priv *pp = qc->ap->private_data;
 	u8 dflags = QS_DF_PORD, *buf = pp->pkt;
@@ -272,7 +272,7 @@ static void qs_qc_prep(struct ata_queued_cmd *qc)
 
 	qs_enter_reg_mode(qc->ap);
 	if (qc->tf.protocol != ATA_PROT_DMA)
-		return;
+		return AC_ERR_OK;
 
 	nelem = qs_fill_sg(qc);
 
@@ -295,6 +295,8 @@ static void qs_qc_prep(struct ata_queued_cmd *qc)
 
 	/* frame information structure (FIS) */
 	ata_tf_to_fis(&qc->tf, 0, 1, &buf[32]);
+
+	return AC_ERR_OK;
 }
 
 static inline void qs_packet_start(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 3495e1733a8e..980aacdbcf3b 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -550,12 +550,14 @@ static void sata_rcar_bmdma_fill_sg(struct ata_queued_cmd *qc)
 	prd[si - 1].addr |= cpu_to_le32(SATA_RCAR_DTEND);
 }
 
-static void sata_rcar_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors sata_rcar_qc_prep(struct ata_queued_cmd *qc)
 {
 	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
-		return;
+		return AC_ERR_OK;
 
 	sata_rcar_bmdma_fill_sg(qc);
+
+	return AC_ERR_OK;
 }
 
 static void sata_rcar_bmdma_setup(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index e6fbae2f645a..75321f1ceba5 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -103,7 +103,7 @@ static void sil_dev_config(struct ata_device *dev);
 static int sil_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
 static int sil_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static int sil_set_mode(struct ata_link *link, struct ata_device **r_failed);
-static void sil_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors sil_qc_prep(struct ata_queued_cmd *qc);
 static void sil_bmdma_setup(struct ata_queued_cmd *qc);
 static void sil_bmdma_start(struct ata_queued_cmd *qc);
 static void sil_bmdma_stop(struct ata_queued_cmd *qc);
@@ -317,12 +317,14 @@ static void sil_fill_sg(struct ata_queued_cmd *qc)
 		last_prd->flags_len |= cpu_to_le32(ATA_PRD_EOT);
 }
 
-static void sil_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors sil_qc_prep(struct ata_queued_cmd *qc)
 {
 	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
-		return;
+		return AC_ERR_OK;
 
 	sil_fill_sg(qc);
+
+	return AC_ERR_OK;
 }
 
 static unsigned char sil_get_device_cache_line(struct pci_dev *pdev)
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index 7bef82de53ca..560070d4f1d0 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -326,7 +326,7 @@ static void sil24_dev_config(struct ata_device *dev);
 static int sil24_scr_read(struct ata_link *link, unsigned sc_reg, u32 *val);
 static int sil24_scr_write(struct ata_link *link, unsigned sc_reg, u32 val);
 static int sil24_qc_defer(struct ata_queued_cmd *qc);
-static void sil24_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors sil24_qc_prep(struct ata_queued_cmd *qc);
 static unsigned int sil24_qc_issue(struct ata_queued_cmd *qc);
 static bool sil24_qc_fill_rtf(struct ata_queued_cmd *qc);
 static void sil24_pmp_attach(struct ata_port *ap);
@@ -830,7 +830,7 @@ static int sil24_qc_defer(struct ata_queued_cmd *qc)
 	return ata_std_qc_defer(qc);
 }
 
-static void sil24_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors sil24_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
 	struct sil24_port_priv *pp = ap->private_data;
@@ -874,6 +874,8 @@ static void sil24_qc_prep(struct ata_queued_cmd *qc)
 
 	if (qc->flags & ATA_QCFLAG_DMAMAP)
 		sil24_fill_sg(qc, sge);
+
+	return AC_ERR_OK;
 }
 
 static unsigned int sil24_qc_issue(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index 2277ba0c9c7f..2c7b30c5ea3d 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -202,7 +202,7 @@ static void pdc_error_handler(struct ata_port *ap);
 static void pdc_freeze(struct ata_port *ap);
 static void pdc_thaw(struct ata_port *ap);
 static int pdc_port_start(struct ata_port *ap);
-static void pdc20621_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors pdc20621_qc_prep(struct ata_queued_cmd *qc);
 static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
 static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
 static unsigned int pdc20621_dimm_init(struct ata_host *host);
@@ -530,7 +530,7 @@ static void pdc20621_nodata_prep(struct ata_queued_cmd *qc)
 	VPRINTK("ata pkt buf ofs %u, mmio copied\n", i);
 }
 
-static void pdc20621_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors pdc20621_qc_prep(struct ata_queued_cmd *qc)
 {
 	switch (qc->tf.protocol) {
 	case ATA_PROT_DMA:
@@ -542,6 +542,8 @@ static void pdc20621_qc_prep(struct ata_queued_cmd *qc)
 	default:
 		break;
 	}
+
+	return AC_ERR_OK;
 }
 
 static void __pdc20621_push_hdma(struct ata_queued_cmd *qc,
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index b23d1e4bad33..17d47ad03ab7 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -31,12 +31,6 @@
 #include "suni.h"
 #include "eni.h"
 
-#if !defined(__i386__) && !defined(__x86_64__)
-#ifndef ioremap_nocache
-#define ioremap_nocache(X,Y) ioremap(X,Y)
-#endif 
-#endif
-
 /*
  * TODO:
  *
@@ -374,7 +368,7 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb,
 		here = (eni_vcc->descr+skip) & (eni_vcc->words-1);
 		dma[j++] = (here << MID_DMA_COUNT_SHIFT) | (vcc->vci
 		    << MID_DMA_VCI_SHIFT) | MID_DT_JK;
-		j++;
+		dma[j++] = 0;
 	}
 	here = (eni_vcc->descr+size+skip) & (eni_vcc->words-1);
 	if (!eff) size += skip;
@@ -447,7 +441,7 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb,
 	if (size != eff) {
 		dma[j++] = (here << MID_DMA_COUNT_SHIFT) |
 		    (vcc->vci << MID_DMA_VCI_SHIFT) | MID_DT_JK;
-		j++;
+		dma[j++] = 0;
 	}
 	if (!j || j > 2*RX_DMA_BUF) {
 		printk(KERN_CRIT DEV_LABEL "!j or j too big!!!\n");
@@ -1725,7 +1719,7 @@ static int eni_do_init(struct atm_dev *dev)
 	}
 	printk(KERN_NOTICE DEV_LABEL "(itf %d): rev.%d,base=0x%lx,irq=%d,",
 	    dev->number,pci_dev->revision,real_base,eni_dev->irq);
-	if (!(base = ioremap_nocache(real_base,MAP_MAX_SIZE))) {
+	if (!(base = ioremap(real_base,MAP_MAX_SIZE))) {
 		printk("\n");
 		printk(KERN_ERR DEV_LABEL "(itf %d): can't set up page "
 		    "mapping\n",dev->number);
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 2bbab0230aeb..cc87004d5e2d 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -912,6 +912,7 @@ static int fs_open(struct atm_vcc *atm_vcc)
 			}
 			if (!to) {
 				printk ("No more free channels for FS50..\n");
+				kfree(vcc);
 				return -EBUSY;
 			}
 			vcc->channo = dev->channo;
@@ -922,6 +923,7 @@ static int fs_open(struct atm_vcc *atm_vcc)
 			if (((DO_DIRECTION(rxtp) && dev->atm_vccs[vcc->channo])) ||
 			    ( DO_DIRECTION(txtp) && test_bit (vcc->channo, dev->tx_inuse))) {
 				printk ("Channel is in use for FS155.\n");
+				kfree(vcc);
 				return -EBUSY;
 			}
 		}
@@ -935,6 +937,7 @@ static int fs_open(struct atm_vcc *atm_vcc)
 			    tc, sizeof (struct fs_transmit_config));
 		if (!tc) {
 			fs_dprintk (FS_DEBUG_OPEN, "fs: can't alloc transmit_config.\n");
+			kfree(vcc);
 			return -ENOMEM;
 		}
 
@@ -1070,7 +1073,7 @@ static int fs_open(struct atm_vcc *atm_vcc)
 					RC_FLAGS_BFPS_BFP * bfp |
 					RC_FLAGS_RXBM_PSB, 0, 0);
 			break;
-		};
+		}
 		if (IS_FS50 (dev)) {
 			submit_command (dev, &dev->hp_txq, 
 					QE_CMD_REG_WR | QE_CMD_IMM_INQ,
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index f1a500205313..8fbd36eb8941 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1414,12 +1414,14 @@ fore200e_open(struct atm_vcc *vcc)
 static void
 fore200e_close(struct atm_vcc* vcc)
 {
-    struct fore200e*        fore200e = FORE200E_DEV(vcc->dev);
     struct fore200e_vcc*    fore200e_vcc;
+    struct fore200e*        fore200e;
     struct fore200e_vc_map* vc_map;
     unsigned long           flags;
 
     ASSERT(vcc);
+    fore200e = FORE200E_DEV(vcc->dev);
+
     ASSERT((vcc->vpi >= 0) && (vcc->vpi < 1<<FORE200E_VPI_BITS));
     ASSERT((vcc->vci >= 0) && (vcc->vci < 1<<FORE200E_VCI_BITS));
 
@@ -1464,10 +1466,10 @@ fore200e_close(struct atm_vcc* vcc)
 static int
 fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb)
 {
-    struct fore200e*        fore200e     = FORE200E_DEV(vcc->dev);
-    struct fore200e_vcc*    fore200e_vcc = FORE200E_VCC(vcc);
+    struct fore200e*        fore200e;
+    struct fore200e_vcc*    fore200e_vcc;
     struct fore200e_vc_map* vc_map;
-    struct host_txq*        txq          = &fore200e->host_txq;
+    struct host_txq*        txq;
     struct host_txq_entry*  entry;
     struct tpd*             tpd;
     struct tpd_haddr        tpd_haddr;
@@ -1480,9 +1482,18 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb)
     unsigned char*          data;
     unsigned long           flags;
 
-    ASSERT(vcc);
-    ASSERT(fore200e);
-    ASSERT(fore200e_vcc);
+    if (!vcc)
+        return -EINVAL;
+
+    fore200e = FORE200E_DEV(vcc->dev);
+    fore200e_vcc = FORE200E_VCC(vcc);
+
+    if (!fore200e)
+        return -EINVAL;
+
+    txq = &fore200e->host_txq;
+    if (!fore200e_vcc)
+        return -EINVAL;
 
     if (!test_bit(ATM_VF_READY, &vcc->flags)) {
 	DPRINTK(1, "VC %d.%d.%d not ready for tx\n", vcc->itf, vcc->vpi, vcc->vpi);
diff --git a/drivers/auxdisplay/cfag12864bfb.c b/drivers/auxdisplay/cfag12864bfb.c
index 4074886b7bc8..2002291ab338 100644
--- a/drivers/auxdisplay/cfag12864bfb.c
+++ b/drivers/auxdisplay/cfag12864bfb.c
@@ -57,7 +57,7 @@ static int cfag12864bfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	return vm_map_pages_zero(vma, &pages, 1);
 }
 
-static struct fb_ops cfag12864bfb_ops = {
+static const struct fb_ops cfag12864bfb_ops = {
 	.owner = THIS_MODULE,
 	.fb_read = fb_sys_read,
 	.fb_write = fb_sys_write,
diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index bef6b85778b6..874c259a8829 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -288,31 +288,6 @@ static int charlcd_init_display(struct charlcd *lcd)
 }
 
 /*
- * Parses an unsigned integer from a string, until a non-digit character
- * is found. The empty string is not accepted. No overflow checks are done.
- *
- * Returns whether the parsing was successful. Only in that case
- * the output parameters are written to.
- *
- * TODO: If the kernel adds an inplace version of kstrtoul(), this function
- * could be easily replaced by that.
- */
-static bool parse_n(const char *s, unsigned long *res, const char **next_s)
-{
-	if (!isdigit(*s))
-		return false;
-
-	*res = 0;
-	while (isdigit(*s)) {
-		*res = *res * 10 + (*s - '0');
-		++s;
-	}
-
-	*next_s = s;
-	return true;
-}
-
-/*
  * Parses a movement command of the form "(.*);", where the group can be
  * any number of subcommands of the form "(x|y)[0-9]+".
  *
@@ -336,6 +311,7 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y)
 {
 	unsigned long new_x = *x;
 	unsigned long new_y = *y;
+	char *p;
 
 	for (;;) {
 		if (!*s)
@@ -345,11 +321,15 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y)
 			break;
 
 		if (*s == 'x') {
-			if (!parse_n(s + 1, &new_x, &s))
+			new_x = simple_strtoul(s + 1, &p, 10);
+			if (p == s + 1)
 				return false;
+			s = p;
 		} else if (*s == 'y') {
-			if (!parse_n(s + 1, &new_y, &s))
+			new_y = simple_strtoul(s + 1, &p, 10);
+			if (p == s + 1)
 				return false;
+			s = p;
 		} else {
 			return false;
 		}
diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c
index a2fcde582e2a..d951d54b26f5 100644
--- a/drivers/auxdisplay/ht16k33.c
+++ b/drivers/auxdisplay/ht16k33.c
@@ -228,7 +228,7 @@ static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	return vm_map_pages_zero(vma, &pages, 1);
 }
 
-static struct fb_ops ht16k33_fb_ops = {
+static const struct fb_ops ht16k33_fb_ops = {
 	.owner = THIS_MODULE,
 	.fb_read = fb_sys_read,
 	.fb_write = fb_sys_write,
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 28b92e3cc570..5f0bc74d2409 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -148,6 +148,10 @@ config DEBUG_TEST_DRIVER_REMOVE
 	  unusable. You should say N here unless you are explicitly looking to
 	  test this functionality.
 
+config PM_QOS_KUNIT_TEST
+	bool "KUnit Test for PM QoS features"
+	depends on KUNIT=y
+
 config HMEM_REPORTING
 	bool
 	default n
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 1eb81f113786..6119e11a9f95 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -248,6 +248,16 @@ core_initcall(free_raw_capacity);
 #endif
 
 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
+/*
+ * This function returns the logic cpu number of the node.
+ * There are basically three kinds of return values:
+ * (1) logic cpu number which is > 0.
+ * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
+ * there is no possible logical CPU in the kernel to match. This happens
+ * when CONFIG_NR_CPUS is configure to be smaller than the number of
+ * CPU nodes in DT. We need to just ignore this case.
+ * (3) -1 if the node does not exist in the device tree
+ */
 static int __init get_cpu_for_node(struct device_node *node)
 {
 	struct device_node *cpu_node;
@@ -261,7 +271,8 @@ static int __init get_cpu_for_node(struct device_node *node)
 	if (cpu >= 0)
 		topology_parse_cpu_capacity(cpu_node, cpu);
 	else
-		pr_crit("Unable to find CPU node for %pOF\n", cpu_node);
+		pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
+			cpu_node, cpumask_pr_args(cpu_possible_mask));
 
 	of_node_put(cpu_node);
 	return cpu;
@@ -286,9 +297,8 @@ static int __init parse_core(struct device_node *core, int package_id,
 				cpu_topology[cpu].package_id = package_id;
 				cpu_topology[cpu].core_id = core_id;
 				cpu_topology[cpu].thread_id = i;
-			} else {
-				pr_err("%pOF: Can't get CPU for thread\n",
-				       t);
+			} else if (cpu != -ENODEV) {
+				pr_err("%pOF: Can't get CPU for thread\n", t);
 				of_node_put(t);
 				return -EINVAL;
 			}
@@ -307,7 +317,7 @@ static int __init parse_core(struct device_node *core, int package_id,
 
 		cpu_topology[cpu].package_id = package_id;
 		cpu_topology[cpu].core_id = core_id;
-	} else if (leaf) {
+	} else if (leaf && cpu != -ENODEV) {
 		pr_err("%pOF: Can't get CPU for leaf core\n", core);
 		return -EINVAL;
 	}
diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c
index 20736aaa0e69..f7bd0f4db13d 100644
--- a/drivers/base/attribute_container.c
+++ b/drivers/base/attribute_container.c
@@ -236,6 +236,109 @@ attribute_container_remove_device(struct device *dev,
 	mutex_unlock(&attribute_container_mutex);
 }
 
+static int
+do_attribute_container_device_trigger_safe(struct device *dev,
+					   struct attribute_container *cont,
+					   int (*fn)(struct attribute_container *,
+						     struct device *, struct device *),
+					   int (*undo)(struct attribute_container *,
+						       struct device *, struct device *))
+{
+	int ret;
+	struct internal_container *ic, *failed;
+	struct klist_iter iter;
+
+	if (attribute_container_no_classdevs(cont))
+		return fn(cont, dev, NULL);
+
+	klist_for_each_entry(ic, &cont->containers, node, &iter) {
+		if (dev == ic->classdev.parent) {
+			ret = fn(cont, dev, &ic->classdev);
+			if (ret) {
+				failed = ic;
+				klist_iter_exit(&iter);
+				goto fail;
+			}
+		}
+	}
+	return 0;
+
+fail:
+	if (!undo)
+		return ret;
+
+	/* Attempt to undo the work partially done. */
+	klist_for_each_entry(ic, &cont->containers, node, &iter) {
+		if (ic == failed) {
+			klist_iter_exit(&iter);
+			break;
+		}
+		if (dev == ic->classdev.parent)
+			undo(cont, dev, &ic->classdev);
+	}
+	return ret;
+}
+
+/**
+ * attribute_container_device_trigger_safe - execute a trigger for each
+ * matching classdev or fail all of them.
+ *
+ * @dev:  The generic device to run the trigger for
+ * @fn	  the function to execute for each classdev.
+ * @undo  A function to undo the work previously done in case of error
+ *
+ * This function is a safe version of
+ * attribute_container_device_trigger. It stops on the first error and
+ * undo the partial work that has been done, on previous classdev.  It
+ * is guaranteed that either they all succeeded, or none of them
+ * succeeded.
+ */
+int
+attribute_container_device_trigger_safe(struct device *dev,
+					int (*fn)(struct attribute_container *,
+						  struct device *,
+						  struct device *),
+					int (*undo)(struct attribute_container *,
+						    struct device *,
+						    struct device *))
+{
+	struct attribute_container *cont, *failed = NULL;
+	int ret = 0;
+
+	mutex_lock(&attribute_container_mutex);
+
+	list_for_each_entry(cont, &attribute_container_list, node) {
+
+		if (!cont->match(cont, dev))
+			continue;
+
+		ret = do_attribute_container_device_trigger_safe(dev, cont,
+								 fn, undo);
+		if (ret) {
+			failed = cont;
+			break;
+		}
+	}
+
+	if (ret && !WARN_ON(!undo)) {
+		list_for_each_entry(cont, &attribute_container_list, node) {
+
+			if (failed == cont)
+				break;
+
+			if (!cont->match(cont, dev))
+				continue;
+
+			do_attribute_container_device_trigger_safe(dev, cont,
+								   undo, NULL);
+		}
+	}
+
+	mutex_unlock(&attribute_container_mutex);
+	return ret;
+
+}
+
 /**
  * attribute_container_device_trigger - execute a trigger for each matching classdev
  *
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 0d32544b6f91..40fb069a8a7e 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -1,4 +1,15 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2001-2003 Patrick Mochel <mochel@osdl.org>
+ * Copyright (c) 2004-2009 Greg Kroah-Hartman <gregkh@suse.de>
+ * Copyright (c) 2008-2012 Novell Inc.
+ * Copyright (c) 2012-2019 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (c) 2012-2019 Linux Foundation
+ *
+ * Core driver model functions and structures that should not be
+ * shared outside of the drivers/base/ directory.
+ *
+ */
 #include <linux/notifier.h>
 
 /**
@@ -175,3 +186,11 @@ extern void device_links_unbind_consumers(struct device *dev);
 
 /* device pm support */
 void device_pm_move_to_tail(struct device *dev);
+
+#ifdef CONFIG_DEVTMPFS
+int devtmpfs_create_node(struct device *dev);
+int devtmpfs_delete_node(struct device *dev);
+#else
+static inline int devtmpfs_create_node(struct device *dev) { return 0; }
+static inline int devtmpfs_delete_node(struct device *dev) { return 0; }
+#endif
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index a1d1e8256324..886e9054999a 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/async.h>
+#include <linux/device/bus.h>
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/errno.h>
diff --git a/drivers/base/class.c b/drivers/base/class.c
index d8a6a5864c2e..bcd410e6d70a 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -8,6 +8,7 @@
  * Copyright (c) 2003-2004 IBM Corp.
  */
 
+#include <linux/device/class.h>
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/init.h>
diff --git a/drivers/base/component.c b/drivers/base/component.c
index 532a3a5d8f63..c7879f5ae2fb 100644
--- a/drivers/base/component.c
+++ b/drivers/base/component.c
@@ -11,7 +11,6 @@
 #include <linux/device.h>
 #include <linux/kref.h>
 #include <linux/list.h>
-#include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
@@ -102,11 +101,11 @@ static int component_devices_show(struct seq_file *s, void *data)
 	seq_printf(s, "%-40s %20s\n", "device name", "status");
 	seq_puts(s, "-------------------------------------------------------------\n");
 	for (i = 0; i < match->num; i++) {
-		struct device *d = (struct device *)match->compare[i].data;
+		struct component *component = match->compare[i].component;
 
-		seq_printf(s, "%-40s %20s\n", dev_name(d),
-			   match->compare[i].component ?
-			   "registered" : "not registered");
+		seq_printf(s, "%-40s %20s\n",
+			   component ? dev_name(component->dev) : "(unknown)",
+			   component ? (component->bound ? "bound" : "not bound") : "not registered");
 	}
 	mutex_unlock(&component_mutex);
 
@@ -775,5 +774,3 @@ void component_del(struct device *dev, const struct component_ops *ops)
 	kfree(component);
 }
 EXPORT_SYMBOL_GPL(component_del);
-
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 7bd9cd366d41..42a672456432 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -45,6 +45,10 @@ early_param("sysfs.deprecated", sysfs_deprecated_setup);
 #endif
 
 /* Device links support. */
+static LIST_HEAD(wait_for_suppliers);
+static DEFINE_MUTEX(wfs_lock);
+static LIST_HEAD(deferred_sync);
+static unsigned int defer_sync_state_count = 1;
 
 #ifdef CONFIG_SRCU
 static DEFINE_MUTEX(device_links_lock);
@@ -127,6 +131,9 @@ static int device_is_dependent(struct device *dev, void *target)
 		return ret;
 
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
+		if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
+			continue;
+
 		if (link->consumer == target)
 			return 1;
 
@@ -196,8 +203,11 @@ static int device_reorder_to_tail(struct device *dev, void *not_used)
 		device_pm_move_last(dev);
 
 	device_for_each_child(dev, NULL, device_reorder_to_tail);
-	list_for_each_entry(link, &dev->links.consumers, s_node)
+	list_for_each_entry(link, &dev->links.consumers, s_node) {
+		if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
+			continue;
 		device_reorder_to_tail(link->consumer, NULL);
+	}
 
 	return 0;
 }
@@ -224,7 +234,8 @@ void device_pm_move_to_tail(struct device *dev)
 
 #define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \
 			       DL_FLAG_AUTOREMOVE_SUPPLIER | \
-			       DL_FLAG_AUTOPROBE_CONSUMER)
+			       DL_FLAG_AUTOPROBE_CONSUMER  | \
+			       DL_FLAG_SYNC_STATE_ONLY)
 
 #define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \
 			    DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE)
@@ -292,6 +303,8 @@ struct device_link *device_link_add(struct device *consumer,
 
 	if (!consumer || !supplier || flags & ~DL_ADD_VALID_FLAGS ||
 	    (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) ||
+	    (flags & DL_FLAG_SYNC_STATE_ONLY &&
+	     flags != DL_FLAG_SYNC_STATE_ONLY) ||
 	    (flags & DL_FLAG_AUTOPROBE_CONSUMER &&
 	     flags & (DL_FLAG_AUTOREMOVE_CONSUMER |
 		      DL_FLAG_AUTOREMOVE_SUPPLIER)))
@@ -312,11 +325,14 @@ struct device_link *device_link_add(struct device *consumer,
 
 	/*
 	 * If the supplier has not been fully registered yet or there is a
-	 * reverse dependency between the consumer and the supplier already in
-	 * the graph, return NULL.
+	 * reverse (non-SYNC_STATE_ONLY) dependency between the consumer and
+	 * the supplier already in the graph, return NULL. If the link is a
+	 * SYNC_STATE_ONLY link, we don't check for reverse dependencies
+	 * because it only affects sync_state() callbacks.
 	 */
 	if (!device_pm_initialized(supplier)
-	    || device_is_dependent(consumer, supplier)) {
+	    || (!(flags & DL_FLAG_SYNC_STATE_ONLY) &&
+		  device_is_dependent(consumer, supplier))) {
 		link = NULL;
 		goto out;
 	}
@@ -343,9 +359,14 @@ struct device_link *device_link_add(struct device *consumer,
 		}
 
 		if (flags & DL_FLAG_STATELESS) {
-			link->flags |= DL_FLAG_STATELESS;
 			kref_get(&link->kref);
-			goto out;
+			if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
+			    !(link->flags & DL_FLAG_STATELESS)) {
+				link->flags |= DL_FLAG_STATELESS;
+				goto reorder;
+			} else {
+				goto out;
+			}
 		}
 
 		/*
@@ -367,6 +388,12 @@ struct device_link *device_link_add(struct device *consumer,
 			link->flags |= DL_FLAG_MANAGED;
 			device_link_init_status(link, consumer, supplier);
 		}
+		if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
+		    !(flags & DL_FLAG_SYNC_STATE_ONLY)) {
+			link->flags &= ~DL_FLAG_SYNC_STATE_ONLY;
+			goto reorder;
+		}
+
 		goto out;
 	}
 
@@ -406,6 +433,13 @@ struct device_link *device_link_add(struct device *consumer,
 	    flags & DL_FLAG_PM_RUNTIME)
 		pm_runtime_resume(supplier);
 
+	if (flags & DL_FLAG_SYNC_STATE_ONLY) {
+		dev_dbg(consumer,
+			"Linked as a sync state only consumer to %s\n",
+			dev_name(supplier));
+		goto out;
+	}
+reorder:
 	/*
 	 * Move the consumer and all of the devices depending on it to the end
 	 * of dpm_list and the devices_kset list.
@@ -431,6 +465,70 @@ struct device_link *device_link_add(struct device *consumer,
 }
 EXPORT_SYMBOL_GPL(device_link_add);
 
+/**
+ * device_link_wait_for_supplier - Add device to wait_for_suppliers list
+ * @consumer: Consumer device
+ *
+ * Marks the @consumer device as waiting for suppliers to become available by
+ * adding it to the wait_for_suppliers list. The consumer device will never be
+ * probed until it's removed from the wait_for_suppliers list.
+ *
+ * The caller is responsible for adding the links to the supplier devices once
+ * they are available and removing the @consumer device from the
+ * wait_for_suppliers list once links to all the suppliers have been created.
+ *
+ * This function is NOT meant to be called from the probe function of the
+ * consumer but rather from code that creates/adds the consumer device.
+ */
+static void device_link_wait_for_supplier(struct device *consumer,
+					  bool need_for_probe)
+{
+	mutex_lock(&wfs_lock);
+	list_add_tail(&consumer->links.needs_suppliers, &wait_for_suppliers);
+	consumer->links.need_for_probe = need_for_probe;
+	mutex_unlock(&wfs_lock);
+}
+
+static void device_link_wait_for_mandatory_supplier(struct device *consumer)
+{
+	device_link_wait_for_supplier(consumer, true);
+}
+
+static void device_link_wait_for_optional_supplier(struct device *consumer)
+{
+	device_link_wait_for_supplier(consumer, false);
+}
+
+/**
+ * device_link_add_missing_supplier_links - Add links from consumer devices to
+ *					    supplier devices, leaving any
+ *					    consumer with inactive suppliers on
+ *					    the wait_for_suppliers list
+ *
+ * Loops through all consumers waiting on suppliers and tries to add all their
+ * supplier links. If that succeeds, the consumer device is removed from
+ * wait_for_suppliers list. Otherwise, they are left in the wait_for_suppliers
+ * list.  Devices left on the wait_for_suppliers list will not be probed.
+ *
+ * The fwnode add_links callback is expected to return 0 if it has found and
+ * added all the supplier links for the consumer device. It should return an
+ * error if it isn't able to do so.
+ *
+ * The caller of device_link_wait_for_supplier() is expected to call this once
+ * it's aware of potential suppliers becoming available.
+ */
+static void device_link_add_missing_supplier_links(void)
+{
+	struct device *dev, *tmp;
+
+	mutex_lock(&wfs_lock);
+	list_for_each_entry_safe(dev, tmp, &wait_for_suppliers,
+				 links.needs_suppliers)
+		if (!fwnode_call_int_op(dev->fwnode, add_links, dev))
+			list_del_init(&dev->links.needs_suppliers);
+	mutex_unlock(&wfs_lock);
+}
+
 static void device_link_free(struct device_link *link)
 {
 	while (refcount_dec_not_one(&link->rpm_active))
@@ -565,10 +663,23 @@ int device_links_check_suppliers(struct device *dev)
 	struct device_link *link;
 	int ret = 0;
 
+	/*
+	 * Device waiting for supplier to become available is not allowed to
+	 * probe.
+	 */
+	mutex_lock(&wfs_lock);
+	if (!list_empty(&dev->links.needs_suppliers) &&
+	    dev->links.need_for_probe) {
+		mutex_unlock(&wfs_lock);
+		return -EPROBE_DEFER;
+	}
+	mutex_unlock(&wfs_lock);
+
 	device_links_write_lock();
 
 	list_for_each_entry(link, &dev->links.suppliers, c_node) {
-		if (!(link->flags & DL_FLAG_MANAGED))
+		if (!(link->flags & DL_FLAG_MANAGED) ||
+		    link->flags & DL_FLAG_SYNC_STATE_ONLY)
 			continue;
 
 		if (link->status != DL_STATE_AVAILABLE) {
@@ -585,6 +696,128 @@ int device_links_check_suppliers(struct device *dev)
 }
 
 /**
+ * __device_links_queue_sync_state - Queue a device for sync_state() callback
+ * @dev: Device to call sync_state() on
+ * @list: List head to queue the @dev on
+ *
+ * Queues a device for a sync_state() callback when the device links write lock
+ * isn't held. This allows the sync_state() execution flow to use device links
+ * APIs.  The caller must ensure this function is called with
+ * device_links_write_lock() held.
+ *
+ * This function does a get_device() to make sure the device is not freed while
+ * on this list.
+ *
+ * So the caller must also ensure that device_links_flush_sync_list() is called
+ * as soon as the caller releases device_links_write_lock().  This is necessary
+ * to make sure the sync_state() is called in a timely fashion and the
+ * put_device() is called on this device.
+ */
+static void __device_links_queue_sync_state(struct device *dev,
+					    struct list_head *list)
+{
+	struct device_link *link;
+
+	if (dev->state_synced)
+		return;
+
+	list_for_each_entry(link, &dev->links.consumers, s_node) {
+		if (!(link->flags & DL_FLAG_MANAGED))
+			continue;
+		if (link->status != DL_STATE_ACTIVE)
+			return;
+	}
+
+	/*
+	 * Set the flag here to avoid adding the same device to a list more
+	 * than once. This can happen if new consumers get added to the device
+	 * and probed before the list is flushed.
+	 */
+	dev->state_synced = true;
+
+	if (WARN_ON(!list_empty(&dev->links.defer_sync)))
+		return;
+
+	get_device(dev);
+	list_add_tail(&dev->links.defer_sync, list);
+}
+
+/**
+ * device_links_flush_sync_list - Call sync_state() on a list of devices
+ * @list: List of devices to call sync_state() on
+ *
+ * Calls sync_state() on all the devices that have been queued for it. This
+ * function is used in conjunction with __device_links_queue_sync_state().
+ */
+static void device_links_flush_sync_list(struct list_head *list)
+{
+	struct device *dev, *tmp;
+
+	list_for_each_entry_safe(dev, tmp, list, links.defer_sync) {
+		list_del_init(&dev->links.defer_sync);
+
+		device_lock(dev);
+
+		if (dev->bus->sync_state)
+			dev->bus->sync_state(dev);
+		else if (dev->driver && dev->driver->sync_state)
+			dev->driver->sync_state(dev);
+
+		device_unlock(dev);
+
+		put_device(dev);
+	}
+}
+
+void device_links_supplier_sync_state_pause(void)
+{
+	device_links_write_lock();
+	defer_sync_state_count++;
+	device_links_write_unlock();
+}
+
+void device_links_supplier_sync_state_resume(void)
+{
+	struct device *dev, *tmp;
+	LIST_HEAD(sync_list);
+
+	device_links_write_lock();
+	if (!defer_sync_state_count) {
+		WARN(true, "Unmatched sync_state pause/resume!");
+		goto out;
+	}
+	defer_sync_state_count--;
+	if (defer_sync_state_count)
+		goto out;
+
+	list_for_each_entry_safe(dev, tmp, &deferred_sync, links.defer_sync) {
+		/*
+		 * Delete from deferred_sync list before queuing it to
+		 * sync_list because defer_sync is used for both lists.
+		 */
+		list_del_init(&dev->links.defer_sync);
+		__device_links_queue_sync_state(dev, &sync_list);
+	}
+out:
+	device_links_write_unlock();
+
+	device_links_flush_sync_list(&sync_list);
+}
+
+static int sync_state_resume_initcall(void)
+{
+	device_links_supplier_sync_state_resume();
+	return 0;
+}
+late_initcall(sync_state_resume_initcall);
+
+static void __device_links_supplier_defer_sync(struct device *sup)
+{
+	if (list_empty(&sup->links.defer_sync))
+		list_add_tail(&sup->links.defer_sync, &deferred_sync);
+}
+
+/**
  * device_links_driver_bound - Update device links after probing its driver.
  * @dev: Device to update the links for.
  *
@@ -598,6 +831,16 @@ int device_links_check_suppliers(struct device *dev)
 void device_links_driver_bound(struct device *dev)
 {
 	struct device_link *link;
+	LIST_HEAD(sync_list);
+
+	/*
+	 * If a device probes successfully, it's expected to have created all
+	 * the device links it needs to or make new device links as it needs
+	 * them. So, it no longer needs to wait on any suppliers.
+	 */
+	mutex_lock(&wfs_lock);
+	list_del_init(&dev->links.needs_suppliers);
+	mutex_unlock(&wfs_lock);
 
 	device_links_write_lock();
 
@@ -628,11 +871,19 @@ void device_links_driver_bound(struct device *dev)
 
 		WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
 		WRITE_ONCE(link->status, DL_STATE_ACTIVE);
+
+		if (defer_sync_state_count)
+			__device_links_supplier_defer_sync(link->supplier);
+		else
+			__device_links_queue_sync_state(link->supplier,
+							&sync_list);
 	}
 
 	dev->links.status = DL_DEV_DRIVER_BOUND;
 
 	device_links_write_unlock();
+
+	device_links_flush_sync_list(&sync_list);
 }
 
 static void device_link_drop_managed(struct device_link *link)
@@ -744,6 +995,7 @@ void device_links_driver_cleanup(struct device *dev)
 		WRITE_ONCE(link->status, DL_STATE_DORMANT);
 	}
 
+	list_del_init(&dev->links.defer_sync);
 	__device_links_no_driver(dev);
 
 	device_links_write_unlock();
@@ -813,7 +1065,8 @@ void device_links_unbind_consumers(struct device *dev)
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
 		enum device_link_state status;
 
-		if (!(link->flags & DL_FLAG_MANAGED))
+		if (!(link->flags & DL_FLAG_MANAGED) ||
+		    link->flags & DL_FLAG_SYNC_STATE_ONLY)
 			continue;
 
 		status = link->status;
@@ -849,6 +1102,10 @@ static void device_links_purge(struct device *dev)
 {
 	struct device_link *link, *ln;
 
+	mutex_lock(&wfs_lock);
+	list_del(&dev->links.needs_suppliers);
+	mutex_unlock(&wfs_lock);
+
 	/*
 	 * Delete all of the remaining links from this device to any other
 	 * devices (either consumers or suppliers).
@@ -1713,6 +1970,8 @@ void device_initialize(struct device *dev)
 #endif
 	INIT_LIST_HEAD(&dev->links.consumers);
 	INIT_LIST_HEAD(&dev->links.suppliers);
+	INIT_LIST_HEAD(&dev->links.needs_suppliers);
+	INIT_LIST_HEAD(&dev->links.defer_sync);
 	dev->links.status = DL_DEV_NO_DRIVER;
 }
 EXPORT_SYMBOL_GPL(device_initialize);
@@ -2101,7 +2360,7 @@ int device_add(struct device *dev)
 	struct device *parent;
 	struct kobject *kobj;
 	struct class_interface *class_intf;
-	int error = -EINVAL;
+	int error = -EINVAL, fw_ret;
 	struct kobject *glue_dir = NULL;
 
 	dev = get_device(dev);
@@ -2199,6 +2458,32 @@ int device_add(struct device *dev)
 					     BUS_NOTIFY_ADD_DEVICE, dev);
 
 	kobject_uevent(&dev->kobj, KOBJ_ADD);
+
+	if (dev->fwnode && !dev->fwnode->dev)
+		dev->fwnode->dev = dev;
+
+	/*
+	 * Check if any of the other devices (consumers) have been waiting for
+	 * this device (supplier) to be added so that they can create a device
+	 * link to it.
+	 *
+	 * This needs to happen after device_pm_add() because device_link_add()
+	 * requires the supplier be registered before it's called.
+	 *
+	 * But this also needs to happe before bus_probe_device() to make sure
+	 * waiting consumers can link to it before the driver is bound to the
+	 * device and the driver sync_state callback is called for this device.
+	 */
+	device_link_add_missing_supplier_links();
+
+	if (fwnode_has_op(dev->fwnode, add_links)) {
+		fw_ret = fwnode_call_int_op(dev->fwnode, add_links, dev);
+		if (fw_ret == -ENODEV)
+			device_link_wait_for_mandatory_supplier(dev);
+		else if (fw_ret)
+			device_link_wait_for_optional_supplier(dev);
+	}
+
 	bus_probe_device(dev);
 	if (parent)
 		klist_add_tail(&dev->p->knode_parent,
@@ -2343,6 +2628,9 @@ void device_del(struct device *dev)
 	kill_device(dev);
 	device_unlock(dev);
 
+	if (dev->fwnode && dev->fwnode->dev == dev)
+		dev->fwnode->dev = NULL;
+
 	/* Notify clients of device removal.  This call must come
 	 * before dpm_sysfs_remove().
 	 */
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index d811e60610d3..b25bcab2a26b 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -516,7 +516,10 @@ static int really_probe(struct device *dev, struct device_driver *drv)
 	atomic_inc(&probe_count);
 	pr_debug("bus: '%s': %s: probing driver %s with device %s\n",
 		 drv->bus->name, __func__, drv->name, dev_name(dev));
-	WARN_ON(!list_empty(&dev->devres_head));
+	if (!list_empty(&dev->devres_head)) {
+		dev_crit(dev, "Resources present before probing\n");
+		return -EBUSY;
+	}
 
 re_probe:
 	dev->driver = drv;
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 30d0523014e0..5995c437cbdf 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -30,11 +30,7 @@
 
 static struct task_struct *thread;
 
-#if defined CONFIG_DEVTMPFS_MOUNT
-static int mount_dev = 1;
-#else
-static int mount_dev;
-#endif
+static int __initdata mount_dev = IS_ENABLED(CONFIG_DEVTMPFS_MOUNT);
 
 static DEFINE_SPINLOCK(req_lock);
 
@@ -93,6 +89,23 @@ static inline int is_blockdev(struct device *dev)
 static inline int is_blockdev(struct device *dev) { return 0; }
 #endif
 
+static int devtmpfs_submit_req(struct req *req, const char *tmp)
+{
+	init_completion(&req->done);
+
+	spin_lock(&req_lock);
+	req->next = requests;
+	requests = req;
+	spin_unlock(&req_lock);
+
+	wake_up_process(thread);
+	wait_for_completion(&req->done);
+
+	kfree(tmp);
+
+	return req->err;
+}
+
 int devtmpfs_create_node(struct device *dev)
 {
 	const char *tmp = NULL;
@@ -117,19 +130,7 @@ int devtmpfs_create_node(struct device *dev)
 
 	req.dev = dev;
 
-	init_completion(&req.done);
-
-	spin_lock(&req_lock);
-	req.next = requests;
-	requests = &req;
-	spin_unlock(&req_lock);
-
-	wake_up_process(thread);
-	wait_for_completion(&req.done);
-
-	kfree(tmp);
-
-	return req.err;
+	return devtmpfs_submit_req(&req, tmp);
 }
 
 int devtmpfs_delete_node(struct device *dev)
@@ -147,18 +148,7 @@ int devtmpfs_delete_node(struct device *dev)
 	req.mode = 0;
 	req.dev = dev;
 
-	init_completion(&req.done);
-
-	spin_lock(&req_lock);
-	req.next = requests;
-	requests = &req;
-	spin_unlock(&req_lock);
-
-	wake_up_process(thread);
-	wait_for_completion(&req.done);
-
-	kfree(tmp);
-	return req.err;
+	return devtmpfs_submit_req(&req, tmp);
 }
 
 static int dev_mkdir(const char *name, umode_t mode)
@@ -359,7 +349,7 @@ static int handle_remove(const char *nodename, struct device *dev)
  * If configured, or requested by the commandline, devtmpfs will be
  * auto-mounted after the kernel mounted the root filesystem.
  */
-int devtmpfs_mount(const char *mntdir)
+int __init devtmpfs_mount(void)
 {
 	int err;
 
@@ -369,7 +359,7 @@ int devtmpfs_mount(const char *mntdir)
 	if (!thread)
 		return 0;
 
-	err = ksys_mount("devtmpfs", mntdir, "devtmpfs", MS_SILENT, NULL);
+	err = do_mount("devtmpfs", "dev", "devtmpfs", MS_SILENT, NULL);
 	if (err)
 		printk(KERN_INFO "devtmpfs: error mounting %i\n", err);
 	else
@@ -388,18 +378,30 @@ static int handle(const char *name, umode_t mode, kuid_t uid, kgid_t gid,
 		return handle_remove(name, dev);
 }
 
-static int devtmpfsd(void *p)
+static int devtmpfs_setup(void *p)
 {
-	int *err = p;
-	*err = ksys_unshare(CLONE_NEWNS);
-	if (*err)
+	int err;
+
+	err = ksys_unshare(CLONE_NEWNS);
+	if (err)
 		goto out;
-	*err = ksys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL);
-	if (*err)
+	err = do_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL);
+	if (err)
 		goto out;
 	ksys_chdir("/.."); /* will traverse into overmounted root */
 	ksys_chroot(".");
+out:
+	*(int *)p = err;
 	complete(&setup_done);
+	return err;
+}
+
+static int devtmpfsd(void *p)
+{
+	int err = devtmpfs_setup(p);
+
+	if (err)
+		return err;
 	while (1) {
 		spin_lock(&req_lock);
 		while (requests) {
@@ -420,9 +422,6 @@ static int devtmpfsd(void *p)
 		schedule();
 	}
 	return 0;
-out:
-	complete(&setup_done);
-	return *err;
 }
 
 /*
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index 4e5ca632f35e..57c68769e157 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -8,6 +8,7 @@
  * Copyright (c) 2007 Novell Inc.
  */
 
+#include <linux/device/driver.h>
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/errno.h>
diff --git a/drivers/base/firmware_loader/Kconfig b/drivers/base/firmware_loader/Kconfig
index 3f9e274e2ed3..5b24f3959255 100644
--- a/drivers/base/firmware_loader/Kconfig
+++ b/drivers/base/firmware_loader/Kconfig
@@ -148,7 +148,7 @@ config FW_LOADER_USER_HELPER_FALLBACK
 	  to be used for all firmware requests which explicitly do not disable a
 	  a fallback mechanism. Firmware calls which do prohibit a fallback
 	  mechanism is request_firmware_direct(). This option is kept for
-          backward compatibility purposes given this precise mechanism can also
+	  backward compatibility purposes given this precise mechanism can also
 	  be enabled by setting the proc sysctl value to true:
 
 	       /proc/sys/kernel/firmware_config/force_sysfs_fallback
@@ -169,5 +169,17 @@ config FW_LOADER_COMPRESS
 	  be compressed with either none or crc32 integrity check type (pass
 	  "-C crc32" option to xz command).
 
+config FW_CACHE
+	bool "Enable firmware caching during suspend"
+	depends on PM_SLEEP
+	default y if PM_SLEEP
+	help
+	  Because firmware caching generates uevent messages that are sent
+	  over a netlink socket, it can prevent suspend on many platforms.
+	  It is also not always useful, so on such platforms we have the
+	  option.
+
+	  If unsure, say Y.
+
 endif # FW_LOADER
 endmenu
diff --git a/drivers/base/firmware_loader/builtin/Makefile b/drivers/base/firmware_loader/builtin/Makefile
index 37e5ae387400..5fa7ce3745a0 100644
--- a/drivers/base/firmware_loader/builtin/Makefile
+++ b/drivers/base/firmware_loader/builtin/Makefile
@@ -8,7 +8,8 @@ fwdir := $(addprefix $(srctree)/,$(filter-out /%,$(fwdir)))$(filter /%,$(fwdir))
 obj-y  := $(addsuffix .gen.o, $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE)))
 
 FWNAME    = $(patsubst $(obj)/%.gen.S,%,$@)
-FWSTR     = $(subst /,_,$(subst .,_,$(subst -,_,$(FWNAME))))
+comma     := ,
+FWSTR     = $(subst $(comma),_,$(subst /,_,$(subst .,_,$(subst -,_,$(FWNAME)))))
 ASM_WORD  = $(if $(CONFIG_64BIT),.quad,.long)
 ASM_ALIGN = $(if $(CONFIG_64BIT),3,2)
 PROGBITS  = $(if $(CONFIG_ARM),%,@)progbits
@@ -16,7 +17,7 @@ PROGBITS  = $(if $(CONFIG_ARM),%,@)progbits
 filechk_fwbin = \
 	echo "/* Generated by $(src)/Makefile */"		;\
 	echo "    .section .rodata"				;\
-	echo "    .p2align $(ASM_ALIGN)"			;\
+	echo "    .p2align 4"					;\
 	echo "_fw_$(FWSTR)_bin:"				;\
 	echo "    .incbin \"$(fwdir)/$(FWNAME)\""		;\
 	echo "_fw_end:"						;\
diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c
index 62ee90b4db56..8704e1bae175 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -606,7 +606,7 @@ static bool fw_run_sysfs_fallback(enum fw_opt opt_flags)
 		return false;
 	}
 
-	if ((opt_flags & FW_OPT_NOFALLBACK))
+	if ((opt_flags & FW_OPT_NOFALLBACK_SYSFS))
 		return false;
 
 	/* Also permit LSMs and IMA to fail firmware sysfs fallback */
@@ -630,10 +630,11 @@ static bool fw_run_sysfs_fallback(enum fw_opt opt_flags)
  * interface. Userspace is in charge of loading the firmware through the sysfs
  * loading interface. This sysfs fallback mechanism may be disabled completely
  * on a system by setting the proc sysctl value ignore_sysfs_fallback to true.
- * If this false we check if the internal API caller set the @FW_OPT_NOFALLBACK
- * flag, if so it would also disable the fallback mechanism. A system may want
- * to enfoce the sysfs fallback mechanism at all times, it can do this by
- * setting ignore_sysfs_fallback to false and force_sysfs_fallback to true.
+ * If this is false we check if the internal API caller set the
+ * @FW_OPT_NOFALLBACK_SYSFS flag, if so it would also disable the fallback
+ * mechanism. A system may want to enforce the sysfs fallback mechanism at all
+ * times, it can do this by setting ignore_sysfs_fallback to false and
+ * force_sysfs_fallback to true.
  * Enabling force_sysfs_fallback is functionally equivalent to build a kernel
  * with CONFIG_FW_LOADER_USER_HELPER_FALLBACK.
  **/
diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h
index 7ecd590e67fe..8656e5239a80 100644
--- a/drivers/base/firmware_loader/firmware.h
+++ b/drivers/base/firmware_loader/firmware.h
@@ -27,16 +27,16 @@
  *	firmware file lookup on storage is avoided. Used for calls where the
  *	file may be too big, or where the driver takes charge of its own
  *	firmware caching mechanism.
- * @FW_OPT_NOFALLBACK: Disable the fallback mechanism. Takes precedence over
- *	&FW_OPT_UEVENT and &FW_OPT_USERHELPER.
+ * @FW_OPT_NOFALLBACK_SYSFS: Disable the sysfs fallback mechanism. Takes
+ *	precedence over &FW_OPT_UEVENT and &FW_OPT_USERHELPER.
  */
 enum fw_opt {
-	FW_OPT_UEVENT =         BIT(0),
-	FW_OPT_NOWAIT =         BIT(1),
-	FW_OPT_USERHELPER =     BIT(2),
-	FW_OPT_NO_WARN =        BIT(3),
-	FW_OPT_NOCACHE =        BIT(4),
-	FW_OPT_NOFALLBACK =     BIT(5),
+	FW_OPT_UEVENT			= BIT(0),
+	FW_OPT_NOWAIT			= BIT(1),
+	FW_OPT_USERHELPER		= BIT(2),
+	FW_OPT_NO_WARN			= BIT(3),
+	FW_OPT_NOCACHE			= BIT(4),
+	FW_OPT_NOFALLBACK_SYSFS		= BIT(5),
 };
 
 enum fw_status {
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index bf44c79beae9..57133a9dad09 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -4,7 +4,7 @@
  *
  * Copyright (c) 2003 Manuel Estrada Sainz
  *
- * Please see Documentation/firmware_class/ for more information.
+ * Please see Documentation/driver-api/firmware/ for more information.
  *
  */
 
@@ -51,7 +51,7 @@ struct firmware_cache {
 	struct list_head head;
 	int state;
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_FW_CACHE
 	/*
 	 * Names of firmware images which have been cached successfully
 	 * will be added into the below list so that device uncache
@@ -504,6 +504,7 @@ fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv,
 					 path);
 			continue;
 		}
+		dev_dbg(device, "Loading firmware from %s\n", path);
 		if (decompress) {
 			dev_dbg(device, "f/w decompressing %s\n",
 				fw_priv->fw_name);
@@ -556,7 +557,7 @@ static void fw_set_page_data(struct fw_priv *fw_priv, struct firmware *fw)
 		 (unsigned int)fw_priv->size);
 }
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_FW_CACHE
 static void fw_name_devm_release(struct device *dev, void *res)
 {
 	struct fw_name_devm *fwn = res;
@@ -876,7 +877,7 @@ int request_firmware_direct(const struct firmware **firmware_p,
 	__module_get(THIS_MODULE);
 	ret = _request_firmware(firmware_p, name, device, NULL, 0,
 				FW_OPT_UEVENT | FW_OPT_NO_WARN |
-				FW_OPT_NOFALLBACK);
+				FW_OPT_NOFALLBACK_SYSFS);
 	module_put(THIS_MODULE);
 	return ret;
 }
@@ -1046,7 +1047,7 @@ request_firmware_nowait(
 }
 EXPORT_SYMBOL(request_firmware_nowait);
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_FW_CACHE
 static ASYNC_DOMAIN_EXCLUSIVE(fw_cache_domain);
 
 /**
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 84c4e1f72cbd..799b43191dea 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -19,15 +19,12 @@
 #include <linux/memory.h>
 #include <linux/memory_hotplug.h>
 #include <linux/mm.h>
-#include <linux/mutex.h>
 #include <linux/stat.h>
 #include <linux/slab.h>
 
 #include <linux/atomic.h>
 #include <linux/uaccess.h>
 
-static DEFINE_MUTEX(mem_sysfs_mutex);
-
 #define MEMORY_CLASS_NAME	"memory"
 
 #define to_memory_block(dev) container_of(dev, struct memory_block, dev)
@@ -538,12 +535,7 @@ static ssize_t soft_offline_page_store(struct device *dev,
 	if (kstrtoull(buf, 0, &pfn) < 0)
 		return -EINVAL;
 	pfn >>= PAGE_SHIFT;
-	if (!pfn_valid(pfn))
-		return -ENXIO;
-	/* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */
-	if (!pfn_to_online_page(pfn))
-		return -EIO;
-	ret = soft_offline_page(pfn_to_page(pfn), 0);
+	ret = soft_offline_page(pfn, 0);
 	return ret == 0 ? count : ret;
 }
 
@@ -705,6 +697,8 @@ static void unregister_memory(struct memory_block *memory)
  * Create memory block devices for the given memory area. Start and size
  * have to be aligned to memory block granularity. Memory block devices
  * will be initialized as offline.
+ *
+ * Called under device_hotplug_lock.
  */
 int create_memory_block_devices(unsigned long start, unsigned long size)
 {
@@ -718,7 +712,6 @@ int create_memory_block_devices(unsigned long start, unsigned long size)
 			 !IS_ALIGNED(size, memory_block_size_bytes())))
 		return -EINVAL;
 
-	mutex_lock(&mem_sysfs_mutex);
 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
 		ret = init_memory_block(&mem, block_id, MEM_OFFLINE);
 		if (ret)
@@ -730,11 +723,12 @@ int create_memory_block_devices(unsigned long start, unsigned long size)
 		for (block_id = start_block_id; block_id != end_block_id;
 		     block_id++) {
 			mem = find_memory_block_by_id(block_id);
+			if (WARN_ON_ONCE(!mem))
+				continue;
 			mem->section_count = 0;
 			unregister_memory(mem);
 		}
 	}
-	mutex_unlock(&mem_sysfs_mutex);
 	return ret;
 }
 
@@ -742,6 +736,8 @@ int create_memory_block_devices(unsigned long start, unsigned long size)
  * Remove memory block devices for the given memory area. Start and size
  * have to be aligned to memory block granularity. Memory block devices
  * have to be offline.
+ *
+ * Called under device_hotplug_lock.
  */
 void remove_memory_block_devices(unsigned long start, unsigned long size)
 {
@@ -754,7 +750,6 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
 			 !IS_ALIGNED(size, memory_block_size_bytes())))
 		return;
 
-	mutex_lock(&mem_sysfs_mutex);
 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
 		mem = find_memory_block_by_id(block_id);
 		if (WARN_ON_ONCE(!mem))
@@ -763,7 +758,6 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
 		unregister_memory_block_under_nodes(mem);
 		unregister_memory(mem);
 	}
-	mutex_unlock(&mem_sysfs_mutex);
 }
 
 /* return true if the memory block is offlined, otherwise, return false */
@@ -797,12 +791,13 @@ static const struct attribute_group *memory_root_attr_groups[] = {
 };
 
 /*
- * Initialize the sysfs support for memory devices...
+ * Initialize the sysfs support for memory devices. At the time this function
+ * is called, we cannot have concurrent creation/deletion of memory block
+ * devices, the device_hotplug_lock is not needed.
  */
 void __init memory_dev_init(void)
 {
 	int ret;
-	int err;
 	unsigned long block_sz, nr;
 
 	/* Validate the configured memory block size */
@@ -813,24 +808,19 @@ void __init memory_dev_init(void)
 
 	ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
 	if (ret)
-		goto out;
+		panic("%s() failed to register subsystem: %d\n", __func__, ret);
 
 	/*
 	 * Create entries for memory sections that were found
 	 * during boot and have been initialized
 	 */
-	mutex_lock(&mem_sysfs_mutex);
 	for (nr = 0; nr <= __highest_present_section_nr;
 	     nr += sections_per_block) {
-		err = add_memory_block(nr);
-		if (!ret)
-			ret = err;
+		ret = add_memory_block(nr);
+		if (ret)
+			panic("%s() failed to add memory block: %d\n", __func__,
+			      ret);
 	}
-	mutex_unlock(&mem_sysfs_mutex);
-
-out:
-	if (ret)
-		panic("%s() failed: %d\n", __func__, ret);
 }
 
 /**
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 296546ffed6c..98a31bafc8a2 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -496,20 +496,17 @@ static ssize_t node_read_vmstat(struct device *dev,
 	int n = 0;
 
 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-		n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
+		n += sprintf(buf+n, "%s %lu\n", zone_stat_name(i),
 			     sum_zone_node_page_state(nid, i));
 
 #ifdef CONFIG_NUMA
 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-		n += sprintf(buf+n, "%s %lu\n",
-			     vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
+		n += sprintf(buf+n, "%s %lu\n", numa_stat_name(i),
 			     sum_zone_numa_state(nid, i));
 #endif
 
 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
-		n += sprintf(buf+n, "%s %lu\n",
-			     vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
-			     NR_VM_NUMA_STAT_ITEMS],
+		n += sprintf(buf+n, "%s %lu\n", node_stat_name(i),
 			     node_page_state(pgdat, i));
 
 	return n;
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index b230beb6ccb4..7fa654f1288b 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -27,6 +27,7 @@
 #include <linux/limits.h>
 #include <linux/property.h>
 #include <linux/kmemleak.h>
+#include <linux/types.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -48,7 +49,7 @@ EXPORT_SYMBOL_GPL(platform_bus);
 struct resource *platform_get_resource(struct platform_device *dev,
 				       unsigned int type, unsigned int num)
 {
-	int i;
+	u32 i;
 
 	for (i = 0; i < dev->num_resources; i++) {
 		struct resource *r = &dev->resource[i];
@@ -60,6 +61,7 @@ struct resource *platform_get_resource(struct platform_device *dev,
 }
 EXPORT_SYMBOL_GPL(platform_get_resource);
 
+#ifdef CONFIG_HAS_IOMEM
 /**
  * devm_platform_ioremap_resource - call devm_ioremap_resource() for a platform
  *				    device
@@ -68,7 +70,6 @@ EXPORT_SYMBOL_GPL(platform_get_resource);
  *        resource management
  * @index: resource index
  */
-#ifdef CONFIG_HAS_IOMEM
 void __iomem *devm_platform_ioremap_resource(struct platform_device *pdev,
 					     unsigned int index)
 {
@@ -78,9 +79,63 @@ void __iomem *devm_platform_ioremap_resource(struct platform_device *pdev,
 	return devm_ioremap_resource(&pdev->dev, res);
 }
 EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource);
+
+/**
+ * devm_platform_ioremap_resource_wc - write-combined variant of
+ *                                     devm_platform_ioremap_resource()
+ *
+ * @pdev: platform device to use both for memory resource lookup as well as
+ *        resource management
+ * @index: resource index
+ */
+void __iomem *devm_platform_ioremap_resource_wc(struct platform_device *pdev,
+						unsigned int index)
+{
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, index);
+	return devm_ioremap_resource_wc(&pdev->dev, res);
+}
+
+/**
+ * devm_platform_ioremap_resource_byname - call devm_ioremap_resource for
+ *					   a platform device, retrieve the
+ *					   resource by name
+ *
+ * @pdev: platform device to use both for memory resource lookup as well as
+ *	  resource management
+ * @name: name of the resource
+ */
+void __iomem *
+devm_platform_ioremap_resource_byname(struct platform_device *pdev,
+				      const char *name)
+{
+	struct resource *res;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
+	return devm_ioremap_resource(&pdev->dev, res);
+}
+EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource_byname);
 #endif /* CONFIG_HAS_IOMEM */
 
-static int __platform_get_irq(struct platform_device *dev, unsigned int num)
+/**
+ * platform_get_irq_optional - get an optional IRQ for a device
+ * @dev: platform device
+ * @num: IRQ number index
+ *
+ * Gets an IRQ for a platform device. Device drivers should check the return
+ * value for errors so as to not pass a negative integer value to the
+ * request_irq() APIs. This is the same as platform_get_irq(), except that it
+ * does not print an error message if an IRQ can not be obtained.
+ *
+ * Example:
+ *		int irq = platform_get_irq_optional(pdev, 0);
+ *		if (irq < 0)
+ *			return irq;
+ *
+ * Return: IRQ number on success, negative error number on failure.
+ */
+int platform_get_irq_optional(struct platform_device *dev, unsigned int num)
 {
 #ifdef CONFIG_SPARC
 	/* sparc does not have irqs represented as IORESOURCE_IRQ resources */
@@ -89,9 +144,9 @@ static int __platform_get_irq(struct platform_device *dev, unsigned int num)
 	return dev->archdata.irqs[num];
 #else
 	struct resource *r;
-	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) {
-		int ret;
+	int ret;
 
+	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) {
 		ret = of_irq_get(dev->dev.of_node, num);
 		if (ret > 0 || ret == -EPROBE_DEFER)
 			return ret;
@@ -100,8 +155,6 @@ static int __platform_get_irq(struct platform_device *dev, unsigned int num)
 	r = platform_get_resource(dev, IORESOURCE_IRQ, num);
 	if (has_acpi_companion(&dev->dev)) {
 		if (r && r->flags & IORESOURCE_DISABLED) {
-			int ret;
-
 			ret = acpi_irq_get(ACPI_HANDLE(&dev->dev), num, r);
 			if (ret)
 				return ret;
@@ -134,8 +187,7 @@ static int __platform_get_irq(struct platform_device *dev, unsigned int num)
 	 * allows a common code path across either kind of resource.
 	 */
 	if (num == 0 && has_acpi_companion(&dev->dev)) {
-		int ret = acpi_dev_gpio_irq_get(ACPI_COMPANION(&dev->dev), num);
-
+		ret = acpi_dev_gpio_irq_get(ACPI_COMPANION(&dev->dev), num);
 		/* Our callers expect -ENXIO for missing IRQs. */
 		if (ret >= 0 || ret == -EPROBE_DEFER)
 			return ret;
@@ -144,6 +196,7 @@ static int __platform_get_irq(struct platform_device *dev, unsigned int num)
 	return -ENXIO;
 #endif
 }
+EXPORT_SYMBOL_GPL(platform_get_irq_optional);
 
 /**
  * platform_get_irq - get an IRQ for a device
@@ -165,7 +218,7 @@ int platform_get_irq(struct platform_device *dev, unsigned int num)
 {
 	int ret;
 
-	ret = __platform_get_irq(dev, num);
+	ret = platform_get_irq_optional(dev, num);
 	if (ret < 0 && ret != -EPROBE_DEFER)
 		dev_err(&dev->dev, "IRQ index %u not found\n", num);
 
@@ -174,29 +227,6 @@ int platform_get_irq(struct platform_device *dev, unsigned int num)
 EXPORT_SYMBOL_GPL(platform_get_irq);
 
 /**
- * platform_get_irq_optional - get an optional IRQ for a device
- * @dev: platform device
- * @num: IRQ number index
- *
- * Gets an IRQ for a platform device. Device drivers should check the return
- * value for errors so as to not pass a negative integer value to the
- * request_irq() APIs. This is the same as platform_get_irq(), except that it
- * does not print an error message if an IRQ can not be obtained.
- *
- * Example:
- *		int irq = platform_get_irq_optional(pdev, 0);
- *		if (irq < 0)
- *			return irq;
- *
- * Return: IRQ number on success, negative error number on failure.
- */
-int platform_get_irq_optional(struct platform_device *dev, unsigned int num)
-{
-	return __platform_get_irq(dev, num);
-}
-EXPORT_SYMBOL_GPL(platform_get_irq_optional);
-
-/**
  * platform_irq_count - Count the number of IRQs a platform device uses
  * @dev: platform device
  *
@@ -206,7 +236,7 @@ int platform_irq_count(struct platform_device *dev)
 {
 	int ret, nr = 0;
 
-	while ((ret = __platform_get_irq(dev, nr)) >= 0)
+	while ((ret = platform_get_irq_optional(dev, nr)) >= 0)
 		nr++;
 
 	if (ret == -EPROBE_DEFER)
@@ -226,7 +256,7 @@ struct resource *platform_get_resource_byname(struct platform_device *dev,
 					      unsigned int type,
 					      const char *name)
 {
-	int i;
+	u32 i;
 
 	for (i = 0; i < dev->num_resources; i++) {
 		struct resource *r = &dev->resource[i];
@@ -245,10 +275,9 @@ static int __platform_get_irq_byname(struct platform_device *dev,
 				     const char *name)
 {
 	struct resource *r;
+	int ret;
 
 	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) {
-		int ret;
-
 		ret = of_irq_get_byname(dev->dev.of_node, name);
 		if (ret > 0 || ret == -EPROBE_DEFER)
 			return ret;
@@ -473,7 +502,8 @@ EXPORT_SYMBOL_GPL(platform_device_add_properties);
  */
 int platform_device_add(struct platform_device *pdev)
 {
-	int i, ret;
+	u32 i;
+	int ret;
 
 	if (!pdev)
 		return -EINVAL;
@@ -541,7 +571,7 @@ int platform_device_add(struct platform_device *pdev)
 		pdev->id = PLATFORM_DEVID_AUTO;
 	}
 
-	while (--i >= 0) {
+	while (i--) {
 		struct resource *r = &pdev->resource[i];
 		if (r->parent)
 			release_resource(r);
@@ -562,7 +592,7 @@ EXPORT_SYMBOL_GPL(platform_device_add);
  */
 void platform_device_del(struct platform_device *pdev)
 {
-	int i;
+	u32 i;
 
 	if (!IS_ERR_OR_NULL(pdev)) {
 		device_del(&pdev->dev);
@@ -1278,6 +1308,11 @@ struct bus_type platform_bus_type = {
 };
 EXPORT_SYMBOL_GPL(platform_bus_type);
 
+static inline int __platform_match(struct device *dev, const void *drv)
+{
+	return platform_match(dev, (struct device_driver *)drv);
+}
+
 /**
  * platform_find_device_by_driver - Find a platform device with a given
  * driver.
@@ -1288,10 +1323,12 @@ struct device *platform_find_device_by_driver(struct device *start,
 					      const struct device_driver *drv)
 {
 	return bus_find_device(&platform_bus_type, start, drv,
-			       (void *)platform_match);
+			       __platform_match);
 }
 EXPORT_SYMBOL_GPL(platform_find_device_by_driver);
 
+void __weak __init early_platform_cleanup(void) { }
+
 int __init platform_bus_init(void)
 {
 	int error;
@@ -1309,289 +1346,3 @@ int __init platform_bus_init(void)
 	of_platform_register_reconfig_notifier();
 	return error;
 }
-
-static __initdata LIST_HEAD(early_platform_driver_list);
-static __initdata LIST_HEAD(early_platform_device_list);
-
-/**
- * early_platform_driver_register - register early platform driver
- * @epdrv: early_platform driver structure
- * @buf: string passed from early_param()
- *
- * Helper function for early_platform_init() / early_platform_init_buffer()
- */
-int __init early_platform_driver_register(struct early_platform_driver *epdrv,
-					  char *buf)
-{
-	char *tmp;
-	int n;
-
-	/* Simply add the driver to the end of the global list.
-	 * Drivers will by default be put on the list in compiled-in order.
-	 */
-	if (!epdrv->list.next) {
-		INIT_LIST_HEAD(&epdrv->list);
-		list_add_tail(&epdrv->list, &early_platform_driver_list);
-	}
-
-	/* If the user has specified device then make sure the driver
-	 * gets prioritized. The driver of the last device specified on
-	 * command line will be put first on the list.
-	 */
-	n = strlen(epdrv->pdrv->driver.name);
-	if (buf && !strncmp(buf, epdrv->pdrv->driver.name, n)) {
-		list_move(&epdrv->list, &early_platform_driver_list);
-
-		/* Allow passing parameters after device name */
-		if (buf[n] == '\0' || buf[n] == ',')
-			epdrv->requested_id = -1;
-		else {
-			epdrv->requested_id = simple_strtoul(&buf[n + 1],
-							     &tmp, 10);
-
-			if (buf[n] != '.' || (tmp == &buf[n + 1])) {
-				epdrv->requested_id = EARLY_PLATFORM_ID_ERROR;
-				n = 0;
-			} else
-				n += strcspn(&buf[n + 1], ",") + 1;
-		}
-
-		if (buf[n] == ',')
-			n++;
-
-		if (epdrv->bufsize) {
-			memcpy(epdrv->buffer, &buf[n],
-			       min_t(int, epdrv->bufsize, strlen(&buf[n]) + 1));
-			epdrv->buffer[epdrv->bufsize - 1] = '\0';
-		}
-	}
-
-	return 0;
-}
-
-/**
- * early_platform_add_devices - adds a number of early platform devices
- * @devs: array of early platform devices to add
- * @num: number of early platform devices in array
- *
- * Used by early architecture code to register early platform devices and
- * their platform data.
- */
-void __init early_platform_add_devices(struct platform_device **devs, int num)
-{
-	struct device *dev;
-	int i;
-
-	/* simply add the devices to list */
-	for (i = 0; i < num; i++) {
-		dev = &devs[i]->dev;
-
-		if (!dev->devres_head.next) {
-			pm_runtime_early_init(dev);
-			INIT_LIST_HEAD(&dev->devres_head);
-			list_add_tail(&dev->devres_head,
-				      &early_platform_device_list);
-		}
-	}
-}
-
-/**
- * early_platform_driver_register_all - register early platform drivers
- * @class_str: string to identify early platform driver class
- *
- * Used by architecture code to register all early platform drivers
- * for a certain class. If omitted then only early platform drivers
- * with matching kernel command line class parameters will be registered.
- */
-void __init early_platform_driver_register_all(char *class_str)
-{
-	/* The "class_str" parameter may or may not be present on the kernel
-	 * command line. If it is present then there may be more than one
-	 * matching parameter.
-	 *
-	 * Since we register our early platform drivers using early_param()
-	 * we need to make sure that they also get registered in the case
-	 * when the parameter is missing from the kernel command line.
-	 *
-	 * We use parse_early_options() to make sure the early_param() gets
-	 * called at least once. The early_param() may be called more than
-	 * once since the name of the preferred device may be specified on
-	 * the kernel command line. early_platform_driver_register() handles
-	 * this case for us.
-	 */
-	parse_early_options(class_str);
-}
-
-/**
- * early_platform_match - find early platform device matching driver
- * @epdrv: early platform driver structure
- * @id: id to match against
- */
-static struct platform_device * __init
-early_platform_match(struct early_platform_driver *epdrv, int id)
-{
-	struct platform_device *pd;
-
-	list_for_each_entry(pd, &early_platform_device_list, dev.devres_head)
-		if (platform_match(&pd->dev, &epdrv->pdrv->driver))
-			if (pd->id == id)
-				return pd;
-
-	return NULL;
-}
-
-/**
- * early_platform_left - check if early platform driver has matching devices
- * @epdrv: early platform driver structure
- * @id: return true if id or above exists
- */
-static int __init early_platform_left(struct early_platform_driver *epdrv,
-				       int id)
-{
-	struct platform_device *pd;
-
-	list_for_each_entry(pd, &early_platform_device_list, dev.devres_head)
-		if (platform_match(&pd->dev, &epdrv->pdrv->driver))
-			if (pd->id >= id)
-				return 1;
-
-	return 0;
-}
-
-/**
- * early_platform_driver_probe_id - probe drivers matching class_str and id
- * @class_str: string to identify early platform driver class
- * @id: id to match against
- * @nr_probe: number of platform devices to successfully probe before exiting
- */
-static int __init early_platform_driver_probe_id(char *class_str,
-						 int id,
-						 int nr_probe)
-{
-	struct early_platform_driver *epdrv;
-	struct platform_device *match;
-	int match_id;
-	int n = 0;
-	int left = 0;
-
-	list_for_each_entry(epdrv, &early_platform_driver_list, list) {
-		/* only use drivers matching our class_str */
-		if (strcmp(class_str, epdrv->class_str))
-			continue;
-
-		if (id == -2) {
-			match_id = epdrv->requested_id;
-			left = 1;
-
-		} else {
-			match_id = id;
-			left += early_platform_left(epdrv, id);
-
-			/* skip requested id */
-			switch (epdrv->requested_id) {
-			case EARLY_PLATFORM_ID_ERROR:
-			case EARLY_PLATFORM_ID_UNSET:
-				break;
-			default:
-				if (epdrv->requested_id == id)
-					match_id = EARLY_PLATFORM_ID_UNSET;
-			}
-		}
-
-		switch (match_id) {
-		case EARLY_PLATFORM_ID_ERROR:
-			pr_warn("%s: unable to parse %s parameter\n",
-				class_str, epdrv->pdrv->driver.name);
-			/* fall-through */
-		case EARLY_PLATFORM_ID_UNSET:
-			match = NULL;
-			break;
-		default:
-			match = early_platform_match(epdrv, match_id);
-		}
-
-		if (match) {
-			/*
-			 * Set up a sensible init_name to enable
-			 * dev_name() and others to be used before the
-			 * rest of the driver core is initialized.
-			 */
-			if (!match->dev.init_name && slab_is_available()) {
-				if (match->id != -1)
-					match->dev.init_name =
-						kasprintf(GFP_KERNEL, "%s.%d",
-							  match->name,
-							  match->id);
-				else
-					match->dev.init_name =
-						kasprintf(GFP_KERNEL, "%s",
-							  match->name);
-
-				if (!match->dev.init_name)
-					return -ENOMEM;
-			}
-
-			if (epdrv->pdrv->probe(match))
-				pr_warn("%s: unable to probe %s early.\n",
-					class_str, match->name);
-			else
-				n++;
-		}
-
-		if (n >= nr_probe)
-			break;
-	}
-
-	if (left)
-		return n;
-	else
-		return -ENODEV;
-}
-
-/**
- * early_platform_driver_probe - probe a class of registered drivers
- * @class_str: string to identify early platform driver class
- * @nr_probe: number of platform devices to successfully probe before exiting
- * @user_only: only probe user specified early platform devices
- *
- * Used by architecture code to probe registered early platform drivers
- * within a certain class. For probe to happen a registered early platform
- * device matching a registered early platform driver is needed.
- */
-int __init early_platform_driver_probe(char *class_str,
-				       int nr_probe,
-				       int user_only)
-{
-	int k, n, i;
-
-	n = 0;
-	for (i = -2; n < nr_probe; i++) {
-		k = early_platform_driver_probe_id(class_str, i, nr_probe - n);
-
-		if (k < 0)
-			break;
-
-		n += k;
-
-		if (user_only)
-			break;
-	}
-
-	return n;
-}
-
-/**
- * early_platform_cleanup - clean up early platform code
- */
-void __init early_platform_cleanup(void)
-{
-	struct platform_device *pd, *pd2;
-
-	/* clean up the devres list used to chain devices */
-	list_for_each_entry_safe(pd, pd2, &early_platform_device_list,
-				 dev.devres_head) {
-		list_del(&pd->dev.devres_head);
-		memset(&pd->dev.devres_head, 0, sizeof(pd->dev.devres_head));
-	}
-}
-
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index ec5bb190b9d0..8fdd0073eeeb 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -4,5 +4,6 @@ obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o wakeup_stats.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 obj-$(CONFIG_PM_GENERIC_DOMAINS)	+=  domain.o domain_governor.o
 obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
+obj-$(CONFIG_PM_QOS_KUNIT_TEST) += qos-test.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index 8db98a1f83dc..bbddb267c2e6 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -188,6 +188,26 @@ void dev_pm_domain_detach(struct device *dev, bool power_off)
 EXPORT_SYMBOL_GPL(dev_pm_domain_detach);
 
 /**
+ * dev_pm_domain_start - Start the device through its PM domain.
+ * @dev: Device to start.
+ *
+ * This function should typically be called during probe by a subsystem/driver,
+ * when it needs to start its device from the PM domain's perspective. Note
+ * that, it's assumed that the PM domain is already powered on when this
+ * function is called.
+ *
+ * Returns 0 on success and negative error values on failures.
+ */
+int dev_pm_domain_start(struct device *dev)
+{
+	if (dev->pm_domain && dev->pm_domain->start)
+		return dev->pm_domain->start(dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_start);
+
+/**
  * dev_pm_domain_set - Set PM domain of a device.
  * @dev: Device whose PM domain is to be set.
  * @pd: PM domain to be set, or NULL.
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index cc85e87eaf05..8e5725b11ee8 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -634,6 +634,13 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth)
 	return ret;
 }
 
+static int genpd_dev_pm_start(struct device *dev)
+{
+	struct generic_pm_domain *genpd = dev_to_genpd(dev);
+
+	return genpd_start_dev(genpd, dev);
+}
+
 static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
 				     unsigned long val, void *ptr)
 {
@@ -922,24 +929,6 @@ static int __init genpd_power_off_unused(void)
 }
 late_initcall(genpd_power_off_unused);
 
-#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_GENERIC_DOMAINS_OF)
-
-static bool genpd_present(const struct generic_pm_domain *genpd)
-{
-	const struct generic_pm_domain *gpd;
-
-	if (IS_ERR_OR_NULL(genpd))
-		return false;
-
-	list_for_each_entry(gpd, &gpd_list, gpd_list_node)
-		if (gpd == genpd)
-			return true;
-
-	return false;
-}
-
-#endif
-
 #ifdef CONFIG_PM_SLEEP
 
 /**
@@ -1354,8 +1343,8 @@ static void genpd_syscore_switch(struct device *dev, bool suspend)
 {
 	struct generic_pm_domain *genpd;
 
-	genpd = dev_to_genpd(dev);
-	if (!genpd_present(genpd))
+	genpd = dev_to_genpd_safe(dev);
+	if (!genpd)
 		return;
 
 	if (suspend) {
@@ -1805,6 +1794,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->domain.ops.poweroff_noirq = genpd_poweroff_noirq;
 	genpd->domain.ops.restore_noirq = genpd_restore_noirq;
 	genpd->domain.ops.complete = genpd_complete;
+	genpd->domain.start = genpd_dev_pm_start;
 
 	if (genpd->flags & GENPD_FLAG_PM_CLK) {
 		genpd->dev_ops.stop = pm_clk_suspend;
@@ -2020,6 +2010,16 @@ static int genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
 	return 0;
 }
 
+static bool genpd_present(const struct generic_pm_domain *genpd)
+{
+	const struct generic_pm_domain *gpd;
+
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node)
+		if (gpd == genpd)
+			return true;
+	return false;
+}
+
 /**
  * of_genpd_add_provider_simple() - Register a simple PM domain provider
  * @np: Device node pointer associated with the PM domain provider.
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 39a06a0cfdaa..444f5c169a0b 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -117,6 +117,13 @@ static inline bool device_pm_initialized(struct device *dev)
 	return dev->power.in_dpm_list;
 }
 
+/* drivers/base/power/wakeup_stats.c */
+extern int wakeup_source_sysfs_add(struct device *parent,
+				   struct wakeup_source *ws);
+extern void wakeup_source_sysfs_remove(struct wakeup_source *ws);
+
+extern int pm_wakeup_source_sysfs_add(struct device *parent);
+
 #else /* !CONFIG_PM_SLEEP */
 
 static inline void device_pm_sleep_init(struct device *dev) {}
@@ -141,6 +148,11 @@ static inline bool device_pm_initialized(struct device *dev)
 	return device_is_registered(dev);
 }
 
+static inline int pm_wakeup_source_sysfs_add(struct device *parent)
+{
+	return 0;
+}
+
 #endif /* !CONFIG_PM_SLEEP */
 
 static inline void device_pm_init(struct device *dev)
@@ -149,21 +161,3 @@ static inline void device_pm_init(struct device *dev)
 	device_pm_sleep_init(dev);
 	pm_runtime_init(dev);
 }
-
-#ifdef CONFIG_PM_SLEEP
-
-/* drivers/base/power/wakeup_stats.c */
-extern int wakeup_source_sysfs_add(struct device *parent,
-				   struct wakeup_source *ws);
-extern void wakeup_source_sysfs_remove(struct wakeup_source *ws);
-
-extern int pm_wakeup_source_sysfs_add(struct device *parent);
-
-#else /* !CONFIG_PM_SLEEP */
-
-static inline int pm_wakeup_source_sysfs_add(struct device *parent)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/base/power/qos-test.c b/drivers/base/power/qos-test.c
new file mode 100644
index 000000000000..79fc6c4418da
--- /dev/null
+++ b/drivers/base/power/qos-test.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 NXP
+ */
+#include <kunit/test.h>
+#include <linux/pm_qos.h>
+
+/* Basic test for aggregating two "min" requests */
+static void freq_qos_test_min(struct kunit *test)
+{
+	struct freq_constraints	qos;
+	struct freq_qos_request	req1, req2;
+	int ret;
+
+	freq_constraints_init(&qos);
+	memset(&req1, 0, sizeof(req1));
+	memset(&req2, 0, sizeof(req2));
+
+	ret = freq_qos_add_request(&qos, &req1, FREQ_QOS_MIN, 1000);
+	KUNIT_EXPECT_EQ(test, ret, 1);
+	ret = freq_qos_add_request(&qos, &req2, FREQ_QOS_MIN, 2000);
+	KUNIT_EXPECT_EQ(test, ret, 1);
+
+	KUNIT_EXPECT_EQ(test, freq_qos_read_value(&qos, FREQ_QOS_MIN), 2000);
+
+	ret = freq_qos_remove_request(&req2);
+	KUNIT_EXPECT_EQ(test, ret, 1);
+	KUNIT_EXPECT_EQ(test, freq_qos_read_value(&qos, FREQ_QOS_MIN), 1000);
+
+	ret = freq_qos_remove_request(&req1);
+	KUNIT_EXPECT_EQ(test, ret, 1);
+	KUNIT_EXPECT_EQ(test, freq_qos_read_value(&qos, FREQ_QOS_MIN),
+			FREQ_QOS_MIN_DEFAULT_VALUE);
+}
+
+/* Test that requests for MAX_DEFAULT_VALUE have no effect */
+static void freq_qos_test_maxdef(struct kunit *test)
+{
+	struct freq_constraints	qos;
+	struct freq_qos_request	req1, req2;
+	int ret;
+
+	freq_constraints_init(&qos);
+	memset(&req1, 0, sizeof(req1));
+	memset(&req2, 0, sizeof(req2));
+	KUNIT_EXPECT_EQ(test, freq_qos_read_value(&qos, FREQ_QOS_MAX),
+			FREQ_QOS_MAX_DEFAULT_VALUE);
+
+	ret = freq_qos_add_request(&qos, &req1, FREQ_QOS_MAX,
+			FREQ_QOS_MAX_DEFAULT_VALUE);
+	KUNIT_EXPECT_EQ(test, ret, 0);
+	ret = freq_qos_add_request(&qos, &req2, FREQ_QOS_MAX,
+			FREQ_QOS_MAX_DEFAULT_VALUE);
+	KUNIT_EXPECT_EQ(test, ret, 0);
+
+	/* Add max 1000 */
+	ret = freq_qos_update_request(&req1, 1000);
+	KUNIT_EXPECT_EQ(test, ret, 1);
+	KUNIT_EXPECT_EQ(test, freq_qos_read_value(&qos, FREQ_QOS_MAX), 1000);
+
+	/* Add max 2000, no impact */
+	ret = freq_qos_update_request(&req2, 2000);
+	KUNIT_EXPECT_EQ(test, ret, 0);
+	KUNIT_EXPECT_EQ(test, freq_qos_read_value(&qos, FREQ_QOS_MAX), 1000);
+
+	/* Remove max 1000, new max 2000 */
+	ret = freq_qos_remove_request(&req1);
+	KUNIT_EXPECT_EQ(test, ret, 1);
+	KUNIT_EXPECT_EQ(test, freq_qos_read_value(&qos, FREQ_QOS_MAX), 2000);
+}
+
+/*
+ * Test that a freq_qos_request can be added again after removal
+ *
+ * This issue was solved by commit 05ff1ba412fd ("PM: QoS: Invalidate frequency
+ * QoS requests after removal")
+ */
+static void freq_qos_test_readd(struct kunit *test)
+{
+	struct freq_constraints	qos;
+	struct freq_qos_request	req;
+	int ret;
+
+	freq_constraints_init(&qos);
+	memset(&req, 0, sizeof(req));
+	KUNIT_EXPECT_EQ(test, freq_qos_read_value(&qos, FREQ_QOS_MIN),
+			FREQ_QOS_MIN_DEFAULT_VALUE);
+
+	/* Add */
+	ret = freq_qos_add_request(&qos, &req, FREQ_QOS_MIN, 1000);
+	KUNIT_EXPECT_EQ(test, ret, 1);
+	KUNIT_EXPECT_EQ(test, freq_qos_read_value(&qos, FREQ_QOS_MIN), 1000);
+
+	/* Remove */
+	ret = freq_qos_remove_request(&req);
+	KUNIT_EXPECT_EQ(test, ret, 1);
+	KUNIT_EXPECT_EQ(test, freq_qos_read_value(&qos, FREQ_QOS_MIN),
+			FREQ_QOS_MIN_DEFAULT_VALUE);
+
+	/* Add again */
+	ret = freq_qos_add_request(&qos, &req, FREQ_QOS_MIN, 2000);
+	KUNIT_EXPECT_EQ(test, ret, 1);
+	KUNIT_EXPECT_EQ(test, freq_qos_read_value(&qos, FREQ_QOS_MIN), 2000);
+}
+
+static struct kunit_case pm_qos_test_cases[] = {
+	KUNIT_CASE(freq_qos_test_min),
+	KUNIT_CASE(freq_qos_test_maxdef),
+	KUNIT_CASE(freq_qos_test_readd),
+	{},
+};
+
+static struct kunit_suite pm_qos_test_module = {
+	.name = "qos-kunit-test",
+	.test_cases = pm_qos_test_cases,
+};
+kunit_test_suites(&pm_qos_test_module);
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 350dcafd751f..8e93167f1783 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -115,10 +115,20 @@ s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type)
 
 	spin_lock_irqsave(&dev->power.lock, flags);
 
-	if (type == DEV_PM_QOS_RESUME_LATENCY) {
+	switch (type) {
+	case DEV_PM_QOS_RESUME_LATENCY:
 		ret = IS_ERR_OR_NULL(qos) ? PM_QOS_RESUME_LATENCY_NO_CONSTRAINT
 			: pm_qos_read_value(&qos->resume_latency);
-	} else {
+		break;
+	case DEV_PM_QOS_MIN_FREQUENCY:
+		ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE
+			: freq_qos_read_value(&qos->freq, FREQ_QOS_MIN);
+		break;
+	case DEV_PM_QOS_MAX_FREQUENCY:
+		ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE
+			: freq_qos_read_value(&qos->freq, FREQ_QOS_MAX);
+		break;
+	default:
 		WARN_ON(1);
 		ret = 0;
 	}
@@ -159,6 +169,10 @@ static int apply_constraint(struct dev_pm_qos_request *req,
 			req->dev->power.set_latency_tolerance(req->dev, value);
 		}
 		break;
+	case DEV_PM_QOS_MIN_FREQUENCY:
+	case DEV_PM_QOS_MAX_FREQUENCY:
+		ret = freq_qos_apply(&req->data.freq, action, value);
+		break;
 	case DEV_PM_QOS_FLAGS:
 		ret = pm_qos_update_flags(&qos->flags, &req->data.flr,
 					  action, value);
@@ -209,6 +223,8 @@ static int dev_pm_qos_constraints_allocate(struct device *dev)
 	c->no_constraint_value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
 	c->type = PM_QOS_MIN;
 
+	freq_constraints_init(&qos->freq);
+
 	INIT_LIST_HEAD(&qos->flags.list);
 
 	spin_lock_irq(&dev->power.lock);
@@ -269,6 +285,20 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
 		memset(req, 0, sizeof(*req));
 	}
 
+	c = &qos->freq.min_freq;
+	plist_for_each_entry_safe(req, tmp, &c->list, data.freq.pnode) {
+		apply_constraint(req, PM_QOS_REMOVE_REQ,
+				 PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE);
+		memset(req, 0, sizeof(*req));
+	}
+
+	c = &qos->freq.max_freq;
+	plist_for_each_entry_safe(req, tmp, &c->list, data.freq.pnode) {
+		apply_constraint(req, PM_QOS_REMOVE_REQ,
+				 PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
+		memset(req, 0, sizeof(*req));
+	}
+
 	f = &qos->flags;
 	list_for_each_entry_safe(req, tmp, &f->list, data.flr.node) {
 		apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
@@ -314,11 +344,22 @@ static int __dev_pm_qos_add_request(struct device *dev,
 		ret = dev_pm_qos_constraints_allocate(dev);
 
 	trace_dev_pm_qos_add_request(dev_name(dev), type, value);
-	if (!ret) {
-		req->dev = dev;
-		req->type = type;
+	if (ret)
+		return ret;
+
+	req->dev = dev;
+	req->type = type;
+	if (req->type == DEV_PM_QOS_MIN_FREQUENCY)
+		ret = freq_qos_add_request(&dev->power.qos->freq,
+					   &req->data.freq,
+					   FREQ_QOS_MIN, value);
+	else if (req->type == DEV_PM_QOS_MAX_FREQUENCY)
+		ret = freq_qos_add_request(&dev->power.qos->freq,
+					   &req->data.freq,
+					   FREQ_QOS_MAX, value);
+	else
 		ret = apply_constraint(req, PM_QOS_ADD_REQ, value);
-	}
+
 	return ret;
 }
 
@@ -382,6 +423,10 @@ static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req,
 	case DEV_PM_QOS_LATENCY_TOLERANCE:
 		curr_value = req->data.pnode.prio;
 		break;
+	case DEV_PM_QOS_MIN_FREQUENCY:
+	case DEV_PM_QOS_MAX_FREQUENCY:
+		curr_value = req->data.freq.pnode.prio;
+		break;
 	case DEV_PM_QOS_FLAGS:
 		curr_value = req->data.flr.flags;
 		break;
@@ -507,6 +552,14 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier,
 		ret = blocking_notifier_chain_register(dev->power.qos->resume_latency.notifiers,
 						       notifier);
 		break;
+	case DEV_PM_QOS_MIN_FREQUENCY:
+		ret = freq_qos_add_notifier(&dev->power.qos->freq,
+					    FREQ_QOS_MIN, notifier);
+		break;
+	case DEV_PM_QOS_MAX_FREQUENCY:
+		ret = freq_qos_add_notifier(&dev->power.qos->freq,
+					    FREQ_QOS_MAX, notifier);
+		break;
 	default:
 		WARN_ON(1);
 		ret = -EINVAL;
@@ -546,6 +599,14 @@ int dev_pm_qos_remove_notifier(struct device *dev,
 		ret = blocking_notifier_chain_unregister(dev->power.qos->resume_latency.notifiers,
 							 notifier);
 		break;
+	case DEV_PM_QOS_MIN_FREQUENCY:
+		ret = freq_qos_remove_notifier(&dev->power.qos->freq,
+					       FREQ_QOS_MIN, notifier);
+		break;
+	case DEV_PM_QOS_MAX_FREQUENCY:
+		ret = freq_qos_remove_notifier(&dev->power.qos->freq,
+					       FREQ_QOS_MAX, notifier);
+		break;
 	default:
 		WARN_ON(1);
 		ret = -EINVAL;
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 48616f358854..16134a69bf6f 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1006,8 +1006,10 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
 	int retval;
 
 	if (rpmflags & RPM_GET_PUT) {
-		if (!atomic_dec_and_test(&dev->power.usage_count))
+		if (!atomic_dec_and_test(&dev->power.usage_count)) {
+			trace_rpm_usage_rcuidle(dev, rpmflags);
 			return 0;
+		}
 	}
 
 	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
@@ -1038,8 +1040,10 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
 	int retval;
 
 	if (rpmflags & RPM_GET_PUT) {
-		if (!atomic_dec_and_test(&dev->power.usage_count))
+		if (!atomic_dec_and_test(&dev->power.usage_count)) {
+			trace_rpm_usage_rcuidle(dev, rpmflags);
 			return 0;
+		}
 	}
 
 	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
@@ -1101,6 +1105,7 @@ int pm_runtime_get_if_in_use(struct device *dev)
 	retval = dev->power.disable_depth > 0 ? -EINVAL :
 		dev->power.runtime_status == RPM_ACTIVE
 			&& atomic_inc_not_zero(&dev->power.usage_count);
+	trace_rpm_usage_rcuidle(dev, 0);
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 	return retval;
 }
@@ -1434,6 +1439,8 @@ void pm_runtime_allow(struct device *dev)
 	dev->power.runtime_auto = true;
 	if (atomic_dec_and_test(&dev->power.usage_count))
 		rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
+	else
+		trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
 
  out:
 	spin_unlock_irq(&dev->power.lock);
@@ -1501,6 +1508,8 @@ static void update_autosuspend(struct device *dev, int old_delay, int old_use)
 		if (!old_use || old_delay >= 0) {
 			atomic_inc(&dev->power.usage_count);
 			rpm_resume(dev, 0);
+		} else {
+			trace_rpm_usage_rcuidle(dev, 0);
 		}
 	}
 
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 5ce77d1ef9fc..8e021082dba8 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -272,7 +272,7 @@ void dev_pm_enable_wake_irq_check(struct device *dev,
 {
 	struct wake_irq *wirq = dev->power.wakeirq;
 
-	if (!wirq || !((wirq->status & WAKE_IRQ_DEDICATED_MASK)))
+	if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
 		return;
 
 	if (likely(wirq->status & WAKE_IRQ_DEDICATED_MANAGED)) {
@@ -299,7 +299,7 @@ void dev_pm_disable_wake_irq_check(struct device *dev)
 {
 	struct wake_irq *wirq = dev->power.wakeirq;
 
-	if (!wirq || !((wirq->status & WAKE_IRQ_DEDICATED_MASK)))
+	if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
 		return;
 
 	if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED)
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 5817b51d2b15..27f3e60608e5 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -248,6 +248,60 @@ void wakeup_source_unregister(struct wakeup_source *ws)
 EXPORT_SYMBOL_GPL(wakeup_source_unregister);
 
 /**
+ * wakeup_sources_read_lock - Lock wakeup source list for read.
+ *
+ * Returns an index of srcu lock for struct wakeup_srcu.
+ * This index must be passed to the matching wakeup_sources_read_unlock().
+ */
+int wakeup_sources_read_lock(void)
+{
+	return srcu_read_lock(&wakeup_srcu);
+}
+EXPORT_SYMBOL_GPL(wakeup_sources_read_lock);
+
+/**
+ * wakeup_sources_read_unlock - Unlock wakeup source list.
+ * @idx: return value from corresponding wakeup_sources_read_lock()
+ */
+void wakeup_sources_read_unlock(int idx)
+{
+	srcu_read_unlock(&wakeup_srcu, idx);
+}
+EXPORT_SYMBOL_GPL(wakeup_sources_read_unlock);
+
+/**
+ * wakeup_sources_walk_start - Begin a walk on wakeup source list
+ *
+ * Returns first object of the list of wakeup sources.
+ *
+ * Note that to be safe, wakeup sources list needs to be locked by calling
+ * wakeup_source_read_lock() for this.
+ */
+struct wakeup_source *wakeup_sources_walk_start(void)
+{
+	struct list_head *ws_head = &wakeup_sources;
+
+	return list_entry_rcu(ws_head->next, struct wakeup_source, entry);
+}
+EXPORT_SYMBOL_GPL(wakeup_sources_walk_start);
+
+/**
+ * wakeup_sources_walk_next - Get next wakeup source from the list
+ * @ws: Previous wakeup source object
+ *
+ * Note that to be safe, wakeup sources list needs to be locked by calling
+ * wakeup_source_read_lock() for this.
+ */
+struct wakeup_source *wakeup_sources_walk_next(struct wakeup_source *ws)
+{
+	struct list_head *ws_head = &wakeup_sources;
+
+	return list_next_or_null_rcu(ws_head, &ws->entry,
+				struct wakeup_source, entry);
+}
+EXPORT_SYMBOL_GPL(wakeup_sources_walk_next);
+
+/**
  * device_wakeup_attach - Attach a wakeup source object to a device object.
  * @dev: Device to handle.
  * @ws: Wakeup source object to attach to @dev.
@@ -1071,6 +1125,9 @@ static void *wakeup_sources_stats_seq_next(struct seq_file *m,
 		break;
 	}
 
+	if (!next_ws)
+		print_wakeup_source_stats(m, &deleted_ws);
+
 	return next_ws;
 }
 
diff --git a/drivers/base/property.c b/drivers/base/property.c
index 81bd01ed4042..511f6d7acdfe 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -557,6 +557,42 @@ int device_add_properties(struct device *dev,
 EXPORT_SYMBOL_GPL(device_add_properties);
 
 /**
+ * fwnode_get_name - Return the name of a node
+ * @fwnode: The firmware node
+ *
+ * Returns a pointer to the node name.
+ */
+const char *fwnode_get_name(const struct fwnode_handle *fwnode)
+{
+	return fwnode_call_ptr_op(fwnode, get_name);
+}
+
+/**
+ * fwnode_get_name_prefix - Return the prefix of node for printing purposes
+ * @fwnode: The firmware node
+ *
+ * Returns the prefix of a node, intended to be printed right before the node.
+ * The prefix works also as a separator between the nodes.
+ */
+const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode)
+{
+	return fwnode_call_ptr_op(fwnode, get_name_prefix);
+}
+
+/**
+ * fwnode_get_parent - Return parent firwmare node
+ * @fwnode: Firmware whose parent is retrieved
+ *
+ * Return parent firmware node of the given node if possible or %NULL if no
+ * parent was available.
+ */
+struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode)
+{
+	return fwnode_call_ptr_op(fwnode, get_parent);
+}
+EXPORT_SYMBOL_GPL(fwnode_get_parent);
+
+/**
  * fwnode_get_next_parent - Iterate to the node's parent
  * @fwnode: Firmware whose parent is retrieved
  *
@@ -578,17 +614,50 @@ struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode)
 EXPORT_SYMBOL_GPL(fwnode_get_next_parent);
 
 /**
- * fwnode_get_parent - Return parent firwmare node
- * @fwnode: Firmware whose parent is retrieved
+ * fwnode_count_parents - Return the number of parents a node has
+ * @fwnode: The node the parents of which are to be counted
  *
- * Return parent firmware node of the given node if possible or %NULL if no
- * parent was available.
+ * Returns the number of parents a node has.
  */
-struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode)
+unsigned int fwnode_count_parents(const struct fwnode_handle *fwnode)
 {
-	return fwnode_call_ptr_op(fwnode, get_parent);
+	struct fwnode_handle *__fwnode;
+	unsigned int count;
+
+	__fwnode = fwnode_get_parent(fwnode);
+
+	for (count = 0; __fwnode; count++)
+		__fwnode = fwnode_get_next_parent(__fwnode);
+
+	return count;
 }
-EXPORT_SYMBOL_GPL(fwnode_get_parent);
+EXPORT_SYMBOL_GPL(fwnode_count_parents);
+
+/**
+ * fwnode_get_nth_parent - Return an nth parent of a node
+ * @fwnode: The node the parent of which is requested
+ * @depth: Distance of the parent from the node
+ *
+ * Returns the nth parent of a node. If there is no parent at the requested
+ * @depth, %NULL is returned. If @depth is 0, the functionality is equivalent to
+ * fwnode_handle_get(). For @depth == 1, it is fwnode_get_parent() and so on.
+ *
+ * The caller is responsible for calling fwnode_handle_put() for the returned
+ * node.
+ */
+struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwnode,
+					    unsigned int depth)
+{
+	unsigned int i;
+
+	fwnode_handle_get(fwnode);
+
+	for (i = 0; i < depth && fwnode; i++)
+		fwnode = fwnode_get_next_parent(fwnode);
+
+	return fwnode;
+}
+EXPORT_SYMBOL_GPL(fwnode_get_nth_parent);
 
 /**
  * fwnode_get_next_child_node - Return the next child node handle for a node
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
index ac9b31c57967..008f8da69d97 100644
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -43,7 +43,7 @@ static int regmap_smbus_byte_reg_write(void *context, unsigned int reg,
 	return i2c_smbus_write_byte_data(i2c, reg, val);
 }
 
-static struct regmap_bus regmap_smbus_byte = {
+static const struct regmap_bus regmap_smbus_byte = {
 	.reg_write = regmap_smbus_byte_reg_write,
 	.reg_read = regmap_smbus_byte_reg_read,
 };
@@ -79,7 +79,7 @@ static int regmap_smbus_word_reg_write(void *context, unsigned int reg,
 	return i2c_smbus_write_word_data(i2c, reg, val);
 }
 
-static struct regmap_bus regmap_smbus_word = {
+static const struct regmap_bus regmap_smbus_word = {
 	.reg_write = regmap_smbus_word_reg_write,
 	.reg_read = regmap_smbus_word_reg_read,
 };
@@ -115,7 +115,7 @@ static int regmap_smbus_word_write_swapped(void *context, unsigned int reg,
 	return i2c_smbus_write_word_swapped(i2c, reg, val);
 }
 
-static struct regmap_bus regmap_smbus_word_swapped = {
+static const struct regmap_bus regmap_smbus_word_swapped = {
 	.reg_write = regmap_smbus_word_write_swapped,
 	.reg_read = regmap_smbus_word_read_swapped,
 };
@@ -197,7 +197,7 @@ static int regmap_i2c_read(void *context,
 		return -EIO;
 }
 
-static struct regmap_bus regmap_i2c = {
+static const struct regmap_bus regmap_i2c = {
 	.write = regmap_i2c_write,
 	.gather_write = regmap_i2c_gather_write,
 	.read = regmap_i2c_read,
@@ -239,7 +239,7 @@ static int regmap_i2c_smbus_i2c_read(void *context, const void *reg,
 		return -EIO;
 }
 
-static struct regmap_bus regmap_i2c_smbus_i2c_block = {
+static const struct regmap_bus regmap_i2c_smbus_i2c_block = {
 	.write = regmap_i2c_smbus_i2c_write,
 	.read = regmap_i2c_smbus_i2c_read,
 	.max_raw_read = I2C_SMBUS_BLOCK_MAX,
diff --git a/drivers/base/regmap/regmap-w1.c b/drivers/base/regmap/regmap-w1.c
index 3a7d30b8c3ac..1fbaaad71ca5 100644
--- a/drivers/base/regmap/regmap-w1.c
+++ b/drivers/base/regmap/regmap-w1.c
@@ -215,8 +215,6 @@ struct regmap *__regmap_init_w1(struct device *w1_dev,
 
 	return __regmap_init(w1_dev, bus, w1_dev, config,
 			 lock_key, lock_name);
-
-	return NULL;
 }
 EXPORT_SYMBOL_GPL(__regmap_init_w1);
 
@@ -233,8 +231,6 @@ struct regmap *__devm_regmap_init_w1(struct device *w1_dev,
 
 	return __devm_regmap_init(w1_dev, bus, w1_dev, config,
 				 lock_key, lock_name);
-
-	return NULL;
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_w1);
 
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 19f57ccfbe1d..59f911e57719 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1488,11 +1488,18 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 
 	WARN_ON(!map->bus);
 
-	/* Check for unwritable registers before we start */
-	for (i = 0; i < val_len / map->format.val_bytes; i++)
-		if (!regmap_writeable(map,
-				     reg + regmap_get_offset(map, i)))
-			return -EINVAL;
+	/* Check for unwritable or noinc registers in range
+	 * before we start
+	 */
+	if (!regmap_writeable_noinc(map, reg)) {
+		for (i = 0; i < val_len / map->format.val_bytes; i++) {
+			unsigned int element =
+				reg + regmap_get_offset(map, i);
+			if (!regmap_writeable(map, element) ||
+				regmap_writeable_noinc(map, element))
+				return -EINVAL;
+		}
+	}
 
 	if (!map->cache_bypass && map->format.parse_val) {
 		unsigned int ival;
diff --git a/drivers/base/soc.c b/drivers/base/soc.c
index 7c0c5ca5953d..4af11a423475 100644
--- a/drivers/base/soc.c
+++ b/drivers/base/soc.c
@@ -104,15 +104,12 @@ static const struct attribute_group soc_attr_group = {
 	.is_visible = soc_attribute_mode,
 };
 
-static const struct attribute_group *soc_attr_groups[] = {
-	&soc_attr_group,
-	NULL,
-};
-
 static void soc_release(struct device *dev)
 {
 	struct soc_device *soc_dev = container_of(dev, struct soc_device, dev);
 
+	ida_simple_remove(&soc_ida, soc_dev->soc_dev_num);
+	kfree(soc_dev->dev.groups);
 	kfree(soc_dev);
 }
 
@@ -121,6 +118,7 @@ static struct soc_device_attribute *early_soc_dev_attr;
 struct soc_device *soc_device_register(struct soc_device_attribute *soc_dev_attr)
 {
 	struct soc_device *soc_dev;
+	const struct attribute_group **soc_attr_groups;
 	int ret;
 
 	if (!soc_bus_type.p) {
@@ -136,10 +134,18 @@ struct soc_device *soc_device_register(struct soc_device_attribute *soc_dev_attr
 		goto out1;
 	}
 
+	soc_attr_groups = kcalloc(3, sizeof(*soc_attr_groups), GFP_KERNEL);
+	if (!soc_attr_groups) {
+		ret = -ENOMEM;
+		goto out2;
+	}
+	soc_attr_groups[0] = &soc_attr_group;
+	soc_attr_groups[1] = soc_dev_attr->custom_attr_group;
+
 	/* Fetch a unique (reclaimable) SOC ID. */
 	ret = ida_simple_get(&soc_ida, 0, 0, GFP_KERNEL);
 	if (ret < 0)
-		goto out2;
+		goto out3;
 	soc_dev->soc_dev_num = ret;
 
 	soc_dev->attr = soc_dev_attr;
@@ -150,15 +156,15 @@ struct soc_device *soc_device_register(struct soc_device_attribute *soc_dev_attr
 	dev_set_name(&soc_dev->dev, "soc%d", soc_dev->soc_dev_num);
 
 	ret = device_register(&soc_dev->dev);
-	if (ret)
-		goto out3;
+	if (ret) {
+		put_device(&soc_dev->dev);
+		return ERR_PTR(ret);
+	}
 
 	return soc_dev;
 
 out3:
-	ida_simple_remove(&soc_ida, soc_dev->soc_dev_num);
-	put_device(&soc_dev->dev);
-	soc_dev = NULL;
+	kfree(soc_attr_groups);
 out2:
 	kfree(soc_dev);
 out1:
@@ -169,8 +175,6 @@ EXPORT_SYMBOL_GPL(soc_device_register);
 /* Ensure soc_dev->attr is freed prior to calling soc_device_unregister. */
 void soc_device_unregister(struct soc_device *soc_dev)
 {
-	ida_simple_remove(&soc_ida, soc_dev->soc_dev_num);
-
 	device_unregister(&soc_dev->dev);
 	early_soc_dev_attr = NULL;
 }
diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
index a1f3f0994f9f..0b081dee1e95 100644
--- a/drivers/base/swnode.c
+++ b/drivers/base/swnode.c
@@ -71,9 +71,9 @@ software_node_to_swnode(const struct software_node *node)
 	return swnode;
 }
 
-const struct software_node *to_software_node(struct fwnode_handle *fwnode)
+const struct software_node *to_software_node(const struct fwnode_handle *fwnode)
 {
-	struct swnode *swnode = to_swnode(fwnode);
+	const struct swnode *swnode = to_swnode(fwnode);
 
 	return swnode ? swnode->node : NULL;
 }
@@ -103,71 +103,12 @@ property_entry_get(const struct property_entry *prop, const char *name)
 	return NULL;
 }
 
-static void
-property_set_pointer(struct property_entry *prop, const void *pointer)
-{
-	switch (prop->type) {
-	case DEV_PROP_U8:
-		if (prop->is_array)
-			prop->pointer.u8_data = pointer;
-		else
-			prop->value.u8_data = *((u8 *)pointer);
-		break;
-	case DEV_PROP_U16:
-		if (prop->is_array)
-			prop->pointer.u16_data = pointer;
-		else
-			prop->value.u16_data = *((u16 *)pointer);
-		break;
-	case DEV_PROP_U32:
-		if (prop->is_array)
-			prop->pointer.u32_data = pointer;
-		else
-			prop->value.u32_data = *((u32 *)pointer);
-		break;
-	case DEV_PROP_U64:
-		if (prop->is_array)
-			prop->pointer.u64_data = pointer;
-		else
-			prop->value.u64_data = *((u64 *)pointer);
-		break;
-	case DEV_PROP_STRING:
-		if (prop->is_array)
-			prop->pointer.str = pointer;
-		else
-			prop->value.str = pointer;
-		break;
-	default:
-		break;
-	}
-}
-
 static const void *property_get_pointer(const struct property_entry *prop)
 {
-	switch (prop->type) {
-	case DEV_PROP_U8:
-		if (prop->is_array)
-			return prop->pointer.u8_data;
-		return &prop->value.u8_data;
-	case DEV_PROP_U16:
-		if (prop->is_array)
-			return prop->pointer.u16_data;
-		return &prop->value.u16_data;
-	case DEV_PROP_U32:
-		if (prop->is_array)
-			return prop->pointer.u32_data;
-		return &prop->value.u32_data;
-	case DEV_PROP_U64:
-		if (prop->is_array)
-			return prop->pointer.u64_data;
-		return &prop->value.u64_data;
-	case DEV_PROP_STRING:
-		if (prop->is_array)
-			return prop->pointer.str;
-		return &prop->value.str;
-	default:
+	if (!prop->length)
 		return NULL;
-	}
+
+	return prop->is_inline ? &prop->value : prop->pointer;
 }
 
 static const void *property_entry_find(const struct property_entry *props,
@@ -187,66 +128,6 @@ static const void *property_entry_find(const struct property_entry *props,
 	return pointer;
 }
 
-static int property_entry_read_u8_array(const struct property_entry *props,
-					const char *propname,
-					u8 *values, size_t nval)
-{
-	const void *pointer;
-	size_t length = nval * sizeof(*values);
-
-	pointer = property_entry_find(props, propname, length);
-	if (IS_ERR(pointer))
-		return PTR_ERR(pointer);
-
-	memcpy(values, pointer, length);
-	return 0;
-}
-
-static int property_entry_read_u16_array(const struct property_entry *props,
-					 const char *propname,
-					 u16 *values, size_t nval)
-{
-	const void *pointer;
-	size_t length = nval * sizeof(*values);
-
-	pointer = property_entry_find(props, propname, length);
-	if (IS_ERR(pointer))
-		return PTR_ERR(pointer);
-
-	memcpy(values, pointer, length);
-	return 0;
-}
-
-static int property_entry_read_u32_array(const struct property_entry *props,
-					 const char *propname,
-					 u32 *values, size_t nval)
-{
-	const void *pointer;
-	size_t length = nval * sizeof(*values);
-
-	pointer = property_entry_find(props, propname, length);
-	if (IS_ERR(pointer))
-		return PTR_ERR(pointer);
-
-	memcpy(values, pointer, length);
-	return 0;
-}
-
-static int property_entry_read_u64_array(const struct property_entry *props,
-					 const char *propname,
-					 u64 *values, size_t nval)
-{
-	const void *pointer;
-	size_t length = nval * sizeof(*values);
-
-	pointer = property_entry_find(props, propname, length);
-	if (IS_ERR(pointer))
-		return PTR_ERR(pointer);
-
-	memcpy(values, pointer, length);
-	return 0;
-}
-
 static int
 property_entry_count_elems_of_size(const struct property_entry *props,
 				   const char *propname, size_t length)
@@ -265,49 +146,45 @@ static int property_entry_read_int_array(const struct property_entry *props,
 					 unsigned int elem_size, void *val,
 					 size_t nval)
 {
+	const void *pointer;
+	size_t length;
+
 	if (!val)
 		return property_entry_count_elems_of_size(props, name,
 							  elem_size);
-	switch (elem_size) {
-	case sizeof(u8):
-		return property_entry_read_u8_array(props, name, val, nval);
-	case sizeof(u16):
-		return property_entry_read_u16_array(props, name, val, nval);
-	case sizeof(u32):
-		return property_entry_read_u32_array(props, name, val, nval);
-	case sizeof(u64):
-		return property_entry_read_u64_array(props, name, val, nval);
-	}
 
-	return -ENXIO;
+	if (!is_power_of_2(elem_size) || elem_size > sizeof(u64))
+		return -ENXIO;
+
+	length = nval * elem_size;
+
+	pointer = property_entry_find(props, name, length);
+	if (IS_ERR(pointer))
+		return PTR_ERR(pointer);
+
+	memcpy(val, pointer, length);
+	return 0;
 }
 
 static int property_entry_read_string_array(const struct property_entry *props,
 					    const char *propname,
 					    const char **strings, size_t nval)
 {
-	const struct property_entry *prop;
 	const void *pointer;
-	size_t array_len, length;
+	size_t length;
+	int array_len;
 
 	/* Find out the array length. */
-	prop = property_entry_get(props, propname);
-	if (!prop)
-		return -EINVAL;
-
-	if (prop->is_array)
-		/* Find the length of an array. */
-		array_len = property_entry_count_elems_of_size(props, propname,
-							  sizeof(const char *));
-	else
-		/* The array length for a non-array string property is 1. */
-		array_len = 1;
+	array_len = property_entry_count_elems_of_size(props, propname,
+						       sizeof(const char *));
+	if (array_len < 0)
+		return array_len;
 
 	/* Return how many there are if strings is NULL. */
 	if (!strings)
 		return array_len;
 
-	array_len = min(nval, array_len);
+	array_len = min_t(size_t, nval, array_len);
 	length = array_len * sizeof(*strings);
 
 	pointer = property_entry_find(props, propname, length);
@@ -321,91 +198,91 @@ static int property_entry_read_string_array(const struct property_entry *props,
 
 static void property_entry_free_data(const struct property_entry *p)
 {
-	const void *pointer = property_get_pointer(p);
+	const char * const *src_str;
 	size_t i, nval;
 
-	if (p->is_array) {
-		if (p->type == DEV_PROP_STRING && p->pointer.str) {
-			nval = p->length / sizeof(const char *);
-			for (i = 0; i < nval; i++)
-				kfree(p->pointer.str[i]);
-		}
-		kfree(pointer);
-	} else if (p->type == DEV_PROP_STRING) {
-		kfree(p->value.str);
+	if (p->type == DEV_PROP_STRING) {
+		src_str = property_get_pointer(p);
+		nval = p->length / sizeof(*src_str);
+		for (i = 0; i < nval; i++)
+			kfree(src_str[i]);
 	}
+
+	if (!p->is_inline)
+		kfree(p->pointer);
+
 	kfree(p->name);
 }
 
-static int property_copy_string_array(struct property_entry *dst,
-				      const struct property_entry *src)
+static bool property_copy_string_array(const char **dst_ptr,
+				       const char * const *src_ptr,
+				       size_t nval)
 {
-	const char **d;
-	size_t nval = src->length / sizeof(*d);
 	int i;
 
-	d = kcalloc(nval, sizeof(*d), GFP_KERNEL);
-	if (!d)
-		return -ENOMEM;
-
 	for (i = 0; i < nval; i++) {
-		d[i] = kstrdup(src->pointer.str[i], GFP_KERNEL);
-		if (!d[i] && src->pointer.str[i]) {
+		dst_ptr[i] = kstrdup(src_ptr[i], GFP_KERNEL);
+		if (!dst_ptr[i] && src_ptr[i]) {
 			while (--i >= 0)
-				kfree(d[i]);
-			kfree(d);
-			return -ENOMEM;
+				kfree(dst_ptr[i]);
+			return false;
 		}
 	}
 
-	dst->pointer.str = d;
-	return 0;
+	return true;
 }
 
 static int property_entry_copy_data(struct property_entry *dst,
 				    const struct property_entry *src)
 {
 	const void *pointer = property_get_pointer(src);
-	const void *new;
-	int error;
+	void *dst_ptr;
+	size_t nval;
+
+	/*
+	 * Properties with no data should not be marked as stored
+	 * out of line.
+	 */
+	if (!src->is_inline && !src->length)
+		return -ENODATA;
+
+	/*
+	 * Reference properties are never stored inline as
+	 * they are too big.
+	 */
+	if (src->type == DEV_PROP_REF && src->is_inline)
+		return -EINVAL;
 
-	if (src->is_array) {
-		if (!src->length)
-			return -ENODATA;
-
-		if (src->type == DEV_PROP_STRING) {
-			error = property_copy_string_array(dst, src);
-			if (error)
-				return error;
-			new = dst->pointer.str;
-		} else {
-			new = kmemdup(pointer, src->length, GFP_KERNEL);
-			if (!new)
-				return -ENOMEM;
-		}
-	} else if (src->type == DEV_PROP_STRING) {
-		new = kstrdup(src->value.str, GFP_KERNEL);
-		if (!new && src->value.str)
+	if (src->length <= sizeof(dst->value)) {
+		dst_ptr = &dst->value;
+		dst->is_inline = true;
+	} else {
+		dst_ptr = kmalloc(src->length, GFP_KERNEL);
+		if (!dst_ptr)
 			return -ENOMEM;
+		dst->pointer = dst_ptr;
+	}
+
+	if (src->type == DEV_PROP_STRING) {
+		nval = src->length / sizeof(const char *);
+		if (!property_copy_string_array(dst_ptr, pointer, nval)) {
+			if (!dst->is_inline)
+				kfree(dst->pointer);
+			return -ENOMEM;
+		}
 	} else {
-		new = pointer;
+		memcpy(dst_ptr, pointer, src->length);
 	}
 
 	dst->length = src->length;
-	dst->is_array = src->is_array;
 	dst->type = src->type;
-
-	property_set_pointer(dst, new);
-
 	dst->name = kstrdup(src->name, GFP_KERNEL);
-	if (!dst->name)
-		goto out_free_data;
+	if (!dst->name) {
+		property_entry_free_data(dst);
+		return -ENOMEM;
+	}
 
 	return 0;
-
-out_free_data:
-	property_entry_free_data(dst);
-	return -ENOMEM;
 }
 
 /**
@@ -515,12 +392,47 @@ static int software_node_read_string_array(const struct fwnode_handle *fwnode,
 						propname, val, nval);
 }
 
+static const char *
+software_node_get_name(const struct fwnode_handle *fwnode)
+{
+	const struct swnode *swnode = to_swnode(fwnode);
+
+	if (!swnode)
+		return "(null)";
+
+	return kobject_name(&swnode->kobj);
+}
+
+static const char *
+software_node_get_name_prefix(const struct fwnode_handle *fwnode)
+{
+	struct fwnode_handle *parent;
+	const char *prefix;
+
+	parent = fwnode_get_parent(fwnode);
+	if (!parent)
+		return "";
+
+	/* Figure out the prefix from the parents. */
+	while (is_software_node(parent))
+		parent = fwnode_get_next_parent(parent);
+
+	prefix = fwnode_get_name_prefix(parent);
+	fwnode_handle_put(parent);
+
+	/* Guess something if prefix was NULL. */
+	return prefix ?: "/";
+}
+
 static struct fwnode_handle *
 software_node_get_parent(const struct fwnode_handle *fwnode)
 {
 	struct swnode *swnode = to_swnode(fwnode);
 
-	return swnode ? (swnode->parent ? &swnode->parent->fwnode : NULL) : NULL;
+	if (!swnode || !swnode->parent)
+		return NULL;
+
+	return fwnode_handle_get(&swnode->parent->fwnode);
 }
 
 static struct fwnode_handle *
@@ -567,31 +479,49 @@ software_node_get_reference_args(const struct fwnode_handle *fwnode,
 				 struct fwnode_reference_args *args)
 {
 	struct swnode *swnode = to_swnode(fwnode);
-	const struct software_node_reference *ref;
+	const struct software_node_ref_args *ref_array;
+	const struct software_node_ref_args *ref;
 	const struct property_entry *prop;
 	struct fwnode_handle *refnode;
+	u32 nargs_prop_val;
+	int error;
 	int i;
 
-	if (!swnode || !swnode->node->references)
+	if (!swnode)
 		return -ENOENT;
 
-	for (ref = swnode->node->references; ref->name; ref++)
-		if (!strcmp(ref->name, propname))
-			break;
+	prop = property_entry_get(swnode->node->properties, propname);
+	if (!prop)
+		return -ENOENT;
+
+	if (prop->type != DEV_PROP_REF)
+		return -EINVAL;
 
-	if (!ref->name || index > (ref->nrefs - 1))
+	/*
+	 * We expect that references are never stored inline, even
+	 * single ones, as they are too big.
+	 */
+	if (prop->is_inline)
+		return -EINVAL;
+
+	if (index * sizeof(*ref) >= prop->length)
 		return -ENOENT;
 
-	refnode = software_node_fwnode(ref->refs[index].node);
+	ref_array = prop->pointer;
+	ref = &ref_array[index];
+
+	refnode = software_node_fwnode(ref->node);
 	if (!refnode)
 		return -ENOENT;
 
 	if (nargs_prop) {
-		prop = property_entry_get(swnode->node->properties, nargs_prop);
-		if (!prop)
-			return -EINVAL;
+		error = property_entry_read_int_array(swnode->node->properties,
+						      nargs_prop, sizeof(u32),
+						      &nargs_prop_val, 1);
+		if (error)
+			return error;
 
-		nargs = prop->value.u32_data;
+		nargs = nargs_prop_val;
 	}
 
 	if (nargs > NR_FWNODE_REFERENCE_ARGS)
@@ -601,7 +531,7 @@ software_node_get_reference_args(const struct fwnode_handle *fwnode,
 	args->nargs = nargs;
 
 	for (i = 0; i < nargs; i++)
-		args->args[i] = ref->refs[index].args[i];
+		args->args[i] = ref->args[i];
 
 	return 0;
 }
@@ -612,6 +542,8 @@ static const struct fwnode_operations software_node_ops = {
 	.property_present = software_node_property_present,
 	.property_read_int_array = software_node_read_int_array,
 	.property_read_string_array = software_node_read_string_array,
+	.get_name = software_node_get_name,
+	.get_name_prefix = software_node_get_name_prefix,
 	.get_parent = software_node_get_parent,
 	.get_next_child_node = software_node_get_next_child,
 	.get_named_child_node = software_node_get_named_child_node,
diff --git a/drivers/base/test/Kconfig b/drivers/base/test/Kconfig
index 86e85daa80bf..305c7751184a 100644
--- a/drivers/base/test/Kconfig
+++ b/drivers/base/test/Kconfig
@@ -8,3 +8,6 @@ config TEST_ASYNC_DRIVER_PROBE
 	  The module name will be test_async_driver_probe.ko
 
 	  If unsure say N.
+config KUNIT_DRIVER_PE_TEST
+	bool "KUnit Tests for property entry API"
+	depends on KUNIT=y
diff --git a/drivers/base/test/Makefile b/drivers/base/test/Makefile
index 0f1f7277a013..3ca56367c84b 100644
--- a/drivers/base/test/Makefile
+++ b/drivers/base/test/Makefile
@@ -1,2 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_TEST_ASYNC_DRIVER_PROBE)	+= test_async_driver_probe.o
+
+obj-$(CONFIG_KUNIT_DRIVER_PE_TEST) += property-entry-test.o
diff --git a/drivers/base/test/property-entry-test.c b/drivers/base/test/property-entry-test.c
new file mode 100644
index 000000000000..abe03315180f
--- /dev/null
+++ b/drivers/base/test/property-entry-test.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+// Unit tests for property entries API
+//
+// Copyright 2019 Google LLC.
+
+#include <kunit/test.h>
+#include <linux/property.h>
+#include <linux/types.h>
+
+static void pe_test_uints(struct kunit *test)
+{
+	static const struct property_entry entries[] = {
+		PROPERTY_ENTRY_U8("prop-u8", 8),
+		PROPERTY_ENTRY_U16("prop-u16", 16),
+		PROPERTY_ENTRY_U32("prop-u32", 32),
+		PROPERTY_ENTRY_U64("prop-u64", 64),
+		{ }
+	};
+
+	struct fwnode_handle *node;
+	u8 val_u8, array_u8[2];
+	u16 val_u16, array_u16[2];
+	u32 val_u32, array_u32[2];
+	u64 val_u64, array_u64[2];
+	int error;
+
+	node = fwnode_create_software_node(entries, NULL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, node);
+
+	error = fwnode_property_read_u8(node, "prop-u8", &val_u8);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)val_u8, 8);
+
+	error = fwnode_property_read_u8_array(node, "prop-u8", array_u8, 1);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u8[0], 8);
+
+	error = fwnode_property_read_u8_array(node, "prop-u8", array_u8, 2);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u8(node, "no-prop-u8", &val_u8);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u8_array(node, "no-prop-u8", array_u8, 1);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u16(node, "prop-u16", &val_u16);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)val_u16, 16);
+
+	error = fwnode_property_read_u16_array(node, "prop-u16", array_u16, 1);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u16[0], 16);
+
+	error = fwnode_property_read_u16_array(node, "prop-u16", array_u16, 2);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u16(node, "no-prop-u16", &val_u16);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u16_array(node, "no-prop-u16", array_u16, 1);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u32(node, "prop-u32", &val_u32);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)val_u32, 32);
+
+	error = fwnode_property_read_u32_array(node, "prop-u32", array_u32, 1);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u32[0], 32);
+
+	error = fwnode_property_read_u32_array(node, "prop-u32", array_u32, 2);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u32(node, "no-prop-u32", &val_u32);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u32_array(node, "no-prop-u32", array_u32, 1);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u64(node, "prop-u64", &val_u64);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)val_u64, 64);
+
+	error = fwnode_property_read_u64_array(node, "prop-u64", array_u64, 1);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u64[0], 64);
+
+	error = fwnode_property_read_u64_array(node, "prop-u64", array_u64, 2);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u64(node, "no-prop-u64", &val_u64);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u64_array(node, "no-prop-u64", array_u64, 1);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	fwnode_remove_software_node(node);
+}
+
+static void pe_test_uint_arrays(struct kunit *test)
+{
+	static const u8 a_u8[16] = { 8, 9 };
+	static const u16 a_u16[16] = { 16, 17 };
+	static const u32 a_u32[16] = { 32, 33 };
+	static const u64 a_u64[16] = { 64, 65 };
+	static const struct property_entry entries[] = {
+		PROPERTY_ENTRY_U8_ARRAY("prop-u8", a_u8),
+		PROPERTY_ENTRY_U16_ARRAY("prop-u16", a_u16),
+		PROPERTY_ENTRY_U32_ARRAY("prop-u32", a_u32),
+		PROPERTY_ENTRY_U64_ARRAY("prop-u64", a_u64),
+		{ }
+	};
+
+	struct fwnode_handle *node;
+	u8 val_u8, array_u8[32];
+	u16 val_u16, array_u16[32];
+	u32 val_u32, array_u32[32];
+	u64 val_u64, array_u64[32];
+	int error;
+
+	node = fwnode_create_software_node(entries, NULL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, node);
+
+	error = fwnode_property_read_u8(node, "prop-u8", &val_u8);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)val_u8, 8);
+
+	error = fwnode_property_read_u8_array(node, "prop-u8", array_u8, 1);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u8[0], 8);
+
+	error = fwnode_property_read_u8_array(node, "prop-u8", array_u8, 2);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u8[0], 8);
+	KUNIT_EXPECT_EQ(test, (int)array_u8[1], 9);
+
+	error = fwnode_property_read_u8_array(node, "prop-u8", array_u8, 17);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u8(node, "no-prop-u8", &val_u8);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u8_array(node, "no-prop-u8", array_u8, 1);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u16(node, "prop-u16", &val_u16);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)val_u16, 16);
+
+	error = fwnode_property_read_u16_array(node, "prop-u16", array_u16, 1);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u16[0], 16);
+
+	error = fwnode_property_read_u16_array(node, "prop-u16", array_u16, 2);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u16[0], 16);
+	KUNIT_EXPECT_EQ(test, (int)array_u16[1], 17);
+
+	error = fwnode_property_read_u16_array(node, "prop-u16", array_u16, 17);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u16(node, "no-prop-u16", &val_u16);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u16_array(node, "no-prop-u16", array_u16, 1);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u32(node, "prop-u32", &val_u32);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)val_u32, 32);
+
+	error = fwnode_property_read_u32_array(node, "prop-u32", array_u32, 1);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u32[0], 32);
+
+	error = fwnode_property_read_u32_array(node, "prop-u32", array_u32, 2);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u32[0], 32);
+	KUNIT_EXPECT_EQ(test, (int)array_u32[1], 33);
+
+	error = fwnode_property_read_u32_array(node, "prop-u32", array_u32, 17);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u32(node, "no-prop-u32", &val_u32);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u32_array(node, "no-prop-u32", array_u32, 1);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u64(node, "prop-u64", &val_u64);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)val_u64, 64);
+
+	error = fwnode_property_read_u64_array(node, "prop-u64", array_u64, 1);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u64[0], 64);
+
+	error = fwnode_property_read_u64_array(node, "prop-u64", array_u64, 2);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_EQ(test, (int)array_u64[0], 64);
+	KUNIT_EXPECT_EQ(test, (int)array_u64[1], 65);
+
+	error = fwnode_property_read_u64_array(node, "prop-u64", array_u64, 17);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u64(node, "no-prop-u64", &val_u64);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_u64_array(node, "no-prop-u64", array_u64, 1);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	fwnode_remove_software_node(node);
+}
+
+static void pe_test_strings(struct kunit *test)
+{
+	static const char *strings[] = {
+		"string-a",
+		"string-b",
+	};
+
+	static const struct property_entry entries[] = {
+		PROPERTY_ENTRY_STRING("str", "single"),
+		PROPERTY_ENTRY_STRING("empty", ""),
+		PROPERTY_ENTRY_STRING_ARRAY("strs", strings),
+		{ }
+	};
+
+	struct fwnode_handle *node;
+	const char *str;
+	const char *strs[10];
+	int error;
+
+	node = fwnode_create_software_node(entries, NULL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, node);
+
+	error = fwnode_property_read_string(node, "str", &str);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_STREQ(test, str, "single");
+
+	error = fwnode_property_read_string_array(node, "str", strs, 1);
+	KUNIT_EXPECT_EQ(test, error, 1);
+	KUNIT_EXPECT_STREQ(test, strs[0], "single");
+
+	/* asking for more data returns what we have */
+	error = fwnode_property_read_string_array(node, "str", strs, 2);
+	KUNIT_EXPECT_EQ(test, error, 1);
+	KUNIT_EXPECT_STREQ(test, strs[0], "single");
+
+	error = fwnode_property_read_string(node, "no-str", &str);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_read_string_array(node, "no-str", strs, 1);
+	KUNIT_EXPECT_LT(test, error, 0);
+
+	error = fwnode_property_read_string(node, "empty", &str);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_STREQ(test, str, "");
+
+	error = fwnode_property_read_string_array(node, "strs", strs, 3);
+	KUNIT_EXPECT_EQ(test, error, 2);
+	KUNIT_EXPECT_STREQ(test, strs[0], "string-a");
+	KUNIT_EXPECT_STREQ(test, strs[1], "string-b");
+
+	error = fwnode_property_read_string_array(node, "strs", strs, 1);
+	KUNIT_EXPECT_EQ(test, error, 1);
+	KUNIT_EXPECT_STREQ(test, strs[0], "string-a");
+
+	/* NULL argument -> returns size */
+	error = fwnode_property_read_string_array(node, "strs", NULL, 0);
+	KUNIT_EXPECT_EQ(test, error, 2);
+
+	/* accessing array as single value */
+	error = fwnode_property_read_string(node, "strs", &str);
+	KUNIT_EXPECT_EQ(test, error, 0);
+	KUNIT_EXPECT_STREQ(test, str, "string-a");
+
+	fwnode_remove_software_node(node);
+}
+
+static void pe_test_bool(struct kunit *test)
+{
+	static const struct property_entry entries[] = {
+		PROPERTY_ENTRY_BOOL("prop"),
+		{ }
+	};
+
+	struct fwnode_handle *node;
+
+	node = fwnode_create_software_node(entries, NULL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, node);
+
+	KUNIT_EXPECT_TRUE(test, fwnode_property_read_bool(node, "prop"));
+	KUNIT_EXPECT_FALSE(test, fwnode_property_read_bool(node, "not-prop"));
+
+	fwnode_remove_software_node(node);
+}
+
+/* Verifies that small U8 array is stored inline when property is copied */
+static void pe_test_move_inline_u8(struct kunit *test)
+{
+	static const u8 u8_array_small[8] = { 1, 2, 3, 4 };
+	static const u8 u8_array_big[128] = { 5, 6, 7, 8 };
+	static const struct property_entry entries[] = {
+		PROPERTY_ENTRY_U8_ARRAY("small", u8_array_small),
+		PROPERTY_ENTRY_U8_ARRAY("big", u8_array_big),
+		{ }
+	};
+
+	struct property_entry *copy;
+	const u8 *data_ptr;
+
+	copy = property_entries_dup(entries);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, copy);
+
+	KUNIT_EXPECT_TRUE(test, copy[0].is_inline);
+	data_ptr = (u8 *)&copy[0].value;
+	KUNIT_EXPECT_EQ(test, (int)data_ptr[0], 1);
+	KUNIT_EXPECT_EQ(test, (int)data_ptr[1], 2);
+
+	KUNIT_EXPECT_FALSE(test, copy[1].is_inline);
+	data_ptr = copy[1].pointer;
+	KUNIT_EXPECT_EQ(test, (int)data_ptr[0], 5);
+	KUNIT_EXPECT_EQ(test, (int)data_ptr[1], 6);
+
+	property_entries_free(copy);
+}
+
+/* Verifies that single string array is stored inline when property is copied */
+static void pe_test_move_inline_str(struct kunit *test)
+{
+	static char *str_array_small[] = { "a" };
+	static char *str_array_big[] = { "b", "c", "d", "e" };
+	static char *str_array_small_empty[] = { "" };
+	static struct property_entry entries[] = {
+		PROPERTY_ENTRY_STRING_ARRAY("small", str_array_small),
+		PROPERTY_ENTRY_STRING_ARRAY("big", str_array_big),
+		PROPERTY_ENTRY_STRING_ARRAY("small-empty", str_array_small_empty),
+		{ }
+	};
+
+	struct property_entry *copy;
+	const char * const *data_ptr;
+
+	copy = property_entries_dup(entries);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, copy);
+
+	KUNIT_EXPECT_TRUE(test, copy[0].is_inline);
+	KUNIT_EXPECT_STREQ(test, copy[0].value.str[0], "a");
+
+	KUNIT_EXPECT_FALSE(test, copy[1].is_inline);
+	data_ptr = copy[1].pointer;
+	KUNIT_EXPECT_STREQ(test, data_ptr[0], "b");
+	KUNIT_EXPECT_STREQ(test, data_ptr[1], "c");
+
+	KUNIT_EXPECT_TRUE(test, copy[2].is_inline);
+	KUNIT_EXPECT_STREQ(test, copy[2].value.str[0], "");
+
+	property_entries_free(copy);
+}
+
+/* Handling of reference properties */
+static void pe_test_reference(struct kunit *test)
+{
+	static const struct software_node nodes[] = {
+		{ .name = "1", },
+		{ .name = "2", },
+		{ }
+	};
+
+	static const struct software_node_ref_args refs[] = {
+		{
+			.node = &nodes[0],
+			.nargs = 0,
+		},
+		{
+			.node = &nodes[1],
+			.nargs = 2,
+			.args = { 3, 4 },
+		},
+	};
+
+	const struct property_entry entries[] = {
+		PROPERTY_ENTRY_REF("ref-1", &nodes[0]),
+		PROPERTY_ENTRY_REF("ref-2", &nodes[1], 1, 2),
+		PROPERTY_ENTRY_REF_ARRAY("ref-3", refs),
+		{ }
+	};
+
+	struct fwnode_handle *node;
+	struct fwnode_reference_args ref;
+	int error;
+
+	error = software_node_register_nodes(nodes);
+	KUNIT_ASSERT_EQ(test, error, 0);
+
+	node = fwnode_create_software_node(entries, NULL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, node);
+
+	error = fwnode_property_get_reference_args(node, "ref-1", NULL,
+						   0, 0, &ref);
+	KUNIT_ASSERT_EQ(test, error, 0);
+	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &nodes[0]);
+	KUNIT_EXPECT_EQ(test, ref.nargs, 0U);
+
+	/* wrong index */
+	error = fwnode_property_get_reference_args(node, "ref-1", NULL,
+						   0, 1, &ref);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	error = fwnode_property_get_reference_args(node, "ref-2", NULL,
+						   1, 0, &ref);
+	KUNIT_ASSERT_EQ(test, error, 0);
+	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &nodes[1]);
+	KUNIT_EXPECT_EQ(test, ref.nargs, 1U);
+	KUNIT_EXPECT_EQ(test, ref.args[0], 1LLU);
+
+	/* asking for more args, padded with zero data */
+	error = fwnode_property_get_reference_args(node, "ref-2", NULL,
+						   3, 0, &ref);
+	KUNIT_ASSERT_EQ(test, error, 0);
+	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &nodes[1]);
+	KUNIT_EXPECT_EQ(test, ref.nargs, 3U);
+	KUNIT_EXPECT_EQ(test, ref.args[0], 1LLU);
+	KUNIT_EXPECT_EQ(test, ref.args[1], 2LLU);
+	KUNIT_EXPECT_EQ(test, ref.args[2], 0LLU);
+
+	/* wrong index */
+	error = fwnode_property_get_reference_args(node, "ref-2", NULL,
+						   2, 1, &ref);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	/* array of references */
+	error = fwnode_property_get_reference_args(node, "ref-3", NULL,
+						   0, 0, &ref);
+	KUNIT_ASSERT_EQ(test, error, 0);
+	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &nodes[0]);
+	KUNIT_EXPECT_EQ(test, ref.nargs, 0U);
+
+	/* second reference in the array */
+	error = fwnode_property_get_reference_args(node, "ref-3", NULL,
+						   2, 1, &ref);
+	KUNIT_ASSERT_EQ(test, error, 0);
+	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &nodes[1]);
+	KUNIT_EXPECT_EQ(test, ref.nargs, 2U);
+	KUNIT_EXPECT_EQ(test, ref.args[0], 3LLU);
+	KUNIT_EXPECT_EQ(test, ref.args[1], 4LLU);
+
+	/* wrong index */
+	error = fwnode_property_get_reference_args(node, "ref-1", NULL,
+						   0, 2, &ref);
+	KUNIT_EXPECT_NE(test, error, 0);
+
+	fwnode_remove_software_node(node);
+	software_node_unregister_nodes(nodes);
+}
+
+static struct kunit_case property_entry_test_cases[] = {
+	KUNIT_CASE(pe_test_uints),
+	KUNIT_CASE(pe_test_uint_arrays),
+	KUNIT_CASE(pe_test_strings),
+	KUNIT_CASE(pe_test_bool),
+	KUNIT_CASE(pe_test_move_inline_u8),
+	KUNIT_CASE(pe_test_move_inline_str),
+	KUNIT_CASE(pe_test_reference),
+	{ }
+};
+
+static struct kunit_suite property_entry_test_suite = {
+	.name = "property-entry",
+	.test_cases = property_entry_test_cases,
+};
+
+kunit_test_suite(property_entry_test_suite);
diff --git a/drivers/base/test/test_async_driver_probe.c b/drivers/base/test/test_async_driver_probe.c
index f4b1d8e54daf..3bb7beb127a9 100644
--- a/drivers/base/test/test_async_driver_probe.c
+++ b/drivers/base/test/test_async_driver_probe.c
@@ -44,7 +44,8 @@ static int test_probe(struct platform_device *pdev)
 	 * performing an async init on that node.
 	 */
 	if (dev->driver->probe_type == PROBE_PREFER_ASYNCHRONOUS) {
-		if (dev_to_node(dev) != numa_node_id()) {
+		if (IS_ENABLED(CONFIG_NUMA) &&
+		    dev_to_node(dev) != numa_node_id()) {
 			dev_warn(dev, "NUMA node mismatch %d != %d\n",
 				 dev_to_node(dev), numa_node_id());
 			atomic_inc(&warnings);
diff --git a/drivers/base/transport_class.c b/drivers/base/transport_class.c
index 5ed86ded6e6b..ccc86206e508 100644
--- a/drivers/base/transport_class.c
+++ b/drivers/base/transport_class.c
@@ -30,6 +30,10 @@
 #include <linux/attribute_container.h>
 #include <linux/transport_class.h>
 
+static int transport_remove_classdev(struct attribute_container *cont,
+				     struct device *dev,
+				     struct device *classdev);
+
 /**
  * transport_class_register - register an initial transport class
  *
@@ -172,10 +176,11 @@ static int transport_add_class_device(struct attribute_container *cont,
  * routine is simply a trigger point used to add the device to the
  * system and register attributes for it.
  */
-
-void transport_add_device(struct device *dev)
+int transport_add_device(struct device *dev)
 {
-	attribute_container_device_trigger(dev, transport_add_class_device);
+	return attribute_container_device_trigger_safe(dev,
+					transport_add_class_device,
+					transport_remove_classdev);
 }
 EXPORT_SYMBOL_GPL(transport_add_device);
 
diff --git a/drivers/bcma/driver_chipcommon_b.c b/drivers/bcma/driver_chipcommon_b.c
index 57f10b58b47c..c153c96a6145 100644
--- a/drivers/bcma/driver_chipcommon_b.c
+++ b/drivers/bcma/driver_chipcommon_b.c
@@ -48,7 +48,7 @@ int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb)
 		return 0;
 
 	ccb->setup_done = 1;
-	ccb->mii = ioremap_nocache(ccb->core->addr_s[1], BCMA_CORE_SIZE);
+	ccb->mii = ioremap(ccb->core->addr_s[1], BCMA_CORE_SIZE);
 	if (!ccb->mii)
 		return -ENOMEM;
 
diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index f4161064365c..3056f81efca4 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -233,8 +233,10 @@ static void bcma_pmu_workarounds(struct bcma_drv_cc *cc)
 
 	switch (bus->chipinfo.id) {
 	case BCMA_CHIP_ID_BCM4313:
-		/* enable 12 mA drive strenth for 4313 and set chipControl
-		   register bit 1 */
+		/*
+		 * enable 12 mA drive strenth for 4313 and set chipControl
+		 * register bit 1
+		 */
 		bcma_chipco_chipctl_maskset(cc, 0,
 					    ~BCMA_CCTRL_4313_12MA_LED_DRIVE,
 					    BCMA_CCTRL_4313_12MA_LED_DRIVE);
@@ -246,8 +248,10 @@ static void bcma_pmu_workarounds(struct bcma_drv_cc *cc)
 		break;
 	case BCMA_CHIP_ID_BCM43224:
 	case BCMA_CHIP_ID_BCM43421:
-		/* enable 12 mA drive strenth for 43224 and set chipControl
-		   register bit 15 */
+		/*
+		 * enable 12 mA drive strenth for 43224 and set chipControl
+		 * register bit 15
+		 */
 		if (bus->chipinfo.rev == 0) {
 			bcma_cc_maskset32(cc, BCMA_CC_CHIPCTL,
 					  ~BCMA_CCTRL_43224_GPIO_TOGGLE,
@@ -500,8 +504,10 @@ void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid)
 	case BCMA_CHIP_ID_BCM53572:
 		/* 5357[ab]0, 43236[ab]0, and 6362b0 */
 
-		/* BCM5357 needs to touch PLL1_PLLCTL[02],
-		   so offset PLL0_PLLCTL[02] by 6 */
+		/*
+		 * BCM5357 needs to touch PLL1_PLLCTL[02],
+		 * so offset PLL0_PLLCTL[02] by 6
+		 */
 		phypll_offset = (bus->chipinfo.id == BCMA_CHIP_ID_BCM5357 ||
 		       bus->chipinfo.id == BCMA_CHIP_ID_BCM4749 ||
 		       bus->chipinfo.id == BCMA_CHIP_ID_BCM53572) ? 6 : 0;
@@ -619,8 +625,10 @@ void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid)
 	case BCMA_CHIP_ID_BCM43228:
 	case BCMA_CHIP_ID_BCM43428:
 		/* LCNXN */
-		/* PLL Settings for spur avoidance on/off mode,
-		   no on2 support for 43228A0 */
+		/*
+		 * PLL Settings for spur avoidance on/off mode,
+		 * no on2 support for 43228A0
+		 */
 		if (spuravoid == 1) {
 			bcma_pmu_spuravoid_pll_write(cc, BCMA_CC_PMU_PLL_CTL0,
 						     0x01100014);
diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c
index c42cec7c7ecc..88a93c266c19 100644
--- a/drivers/bcma/driver_pci_host.c
+++ b/drivers/bcma/driver_pci_host.c
@@ -115,7 +115,7 @@ static int bcma_extpci_read_config(struct bcma_drv_pci *pc, unsigned int dev,
 		if (unlikely(!addr))
 			goto out;
 		err = -ENOMEM;
-		mmio = ioremap_nocache(addr, sizeof(val));
+		mmio = ioremap(addr, sizeof(val));
 		if (!mmio)
 			goto out;
 
@@ -180,7 +180,7 @@ static int bcma_extpci_write_config(struct bcma_drv_pci *pc, unsigned int dev,
 		if (unlikely(!addr))
 			goto out;
 		err = -ENOMEM;
-		mmio = ioremap_nocache(addr, sizeof(val));
+		mmio = ioremap(addr, sizeof(val));
 		if (!mmio)
 			goto out;
 
@@ -515,7 +515,7 @@ void bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
 	/* Ok, ready to run, register it to the system.
 	 * The following needs change, if we want to port hostmode
 	 * to non-MIPS platform. */
-	io_map_base = (unsigned long)ioremap_nocache(pc_host->mem_resource.start,
+	io_map_base = (unsigned long)ioremap(pc_host->mem_resource.start,
 						     resource_size(&pc_host->mem_resource));
 	pc_host->pci_controller.io_map_base = io_map_base;
 	set_io_port_base(pc_host->pci_controller.io_map_base);
diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c
index c8073b509a2b..90d5bdc12e03 100644
--- a/drivers/bcma/host_soc.c
+++ b/drivers/bcma/host_soc.c
@@ -172,7 +172,7 @@ int __init bcma_host_soc_register(struct bcma_soc *soc)
 	/* iomap only first core. We have to read some register on this core
 	 * to scan the bus.
 	 */
-	bus->mmio = ioremap_nocache(BCMA_ADDR_BASE, BCMA_CORE_SIZE * 1);
+	bus->mmio = ioremap(BCMA_ADDR_BASE, BCMA_CORE_SIZE * 1);
 	if (!bus->mmio)
 		return -ENOMEM;
 
diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index 4a2d1b235fb5..1a942f734188 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -219,7 +219,7 @@ static s32 bcma_erom_get_mst_port(struct bcma_bus *bus, u32 __iomem **eromptr)
 static u32 bcma_erom_get_addr_desc(struct bcma_bus *bus, u32 __iomem **eromptr,
 				  u32 type, u8 port)
 {
-	u32 addrl, addrh, sizel, sizeh = 0;
+	u32 addrl, addrh, sizeh = 0;
 	u32 size;
 
 	u32 ent = bcma_erom_get_ent(bus, eromptr);
@@ -239,12 +239,9 @@ static u32 bcma_erom_get_addr_desc(struct bcma_bus *bus, u32 __iomem **eromptr,
 
 	if ((ent & SCAN_ADDR_SZ) == SCAN_ADDR_SZ_SZD) {
 		size = bcma_erom_get_ent(bus, eromptr);
-		sizel = size & SCAN_SIZE_SZ;
 		if (size & SCAN_SIZE_SG32)
 			sizeh = bcma_erom_get_ent(bus, eromptr);
-	} else
-		sizel = SCAN_ADDR_SZ_BASE <<
-				((ent & SCAN_ADDR_SZ) >> SCAN_ADDR_SZ_SHIFT);
+	}
 
 	return addrl;
 }
@@ -425,11 +422,11 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr,
 		}
 	}
 	if (bus->hosttype == BCMA_HOSTTYPE_SOC) {
-		core->io_addr = ioremap_nocache(core->addr, BCMA_CORE_SIZE);
+		core->io_addr = ioremap(core->addr, BCMA_CORE_SIZE);
 		if (!core->io_addr)
 			return -ENOMEM;
 		if (core->wrap) {
-			core->io_wrap = ioremap_nocache(core->wrap,
+			core->io_wrap = ioremap(core->wrap,
 							BCMA_CORE_SIZE);
 			if (!core->io_wrap) {
 				iounmap(core->io_addr);
@@ -472,7 +469,7 @@ int bcma_bus_scan(struct bcma_bus *bus)
 
 	erombase = bcma_scan_read32(bus, 0, BCMA_CC_EROM);
 	if (bus->hosttype == BCMA_HOSTTYPE_SOC) {
-		eromptr = ioremap_nocache(erombase, BCMA_CORE_SIZE);
+		eromptr = ioremap(erombase, BCMA_CORE_SIZE);
 		if (!eromptr)
 			return -ENOMEM;
 	} else {
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index bd19f8af950b..7b32fb673375 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -329,6 +329,7 @@ static const struct block_device_operations aoe_bdops = {
 	.open = aoeblk_open,
 	.release = aoeblk_release,
 	.ioctl = aoeblk_ioctl,
+	.compat_ioctl = blkdev_compat_ptr_ioctl,
 	.getgeo = aoeblk_getgeo,
 	.owner = THIS_MODULE,
 };
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index bd7d3bb8b890..1553d41f0b91 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -857,7 +857,7 @@ static void fd_calibrate( void )
 	}
 
 	if (ATARIHW_PRESENT(FDCSPEED))
-		dma_wd.fdc_speed = 0; 	/* always seek with 8 Mhz */;
+		dma_wd.fdc_speed = 0;   /* always seek with 8 Mhz */
 	DPRINT(("fd_calibrate\n"));
 	SET_IRQ_HANDLER( fd_calibrate_done );
 	/* we can't verify, since the speed may be incorrect */
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index c548a5a6c1a0..a8730cc4db10 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -297,6 +297,10 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 		unsigned int len = bvec.bv_len;
 		int err;
 
+		/* Don't support un-aligned buffer */
+		WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
+				(len & (SECTOR_SIZE - 1)));
+
 		err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
 				  bio_op(bio), sector);
 		if (err)
@@ -382,7 +386,6 @@ static struct brd_device *brd_alloc(int i)
 		goto out_free_dev;
 
 	blk_queue_make_request(brd->brd_queue, brd_make_request);
-	blk_queue_max_hw_sectors(brd->brd_queue, 1024);
 
 	/* This is so fdisk will align partitions on 4k, because of
 	 * direct_access API needing 4k alignment, returning a PFN
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 5d52a2d32155..de2f94d0103a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -268,19 +268,18 @@ static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
 	/* some more paranoia, if the request was over-determined */
 	if (adm_ctx->device && adm_ctx->resource &&
 	    adm_ctx->device->resource != adm_ctx->resource) {
-		pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
-				adm_ctx->minor, adm_ctx->resource->name,
-				adm_ctx->device->resource->name);
+		pr_warn("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
+			adm_ctx->minor, adm_ctx->resource->name,
+			adm_ctx->device->resource->name);
 		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
 		return ERR_INVALID_REQUEST;
 	}
 	if (adm_ctx->device &&
 	    adm_ctx->volume != VOLUME_UNSPECIFIED &&
 	    adm_ctx->volume != adm_ctx->device->vnr) {
-		pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
-				adm_ctx->minor, adm_ctx->volume,
-				adm_ctx->device->vnr,
-				adm_ctx->device->resource->name);
+		pr_warn("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
+			adm_ctx->minor, adm_ctx->volume,
+			adm_ctx->device->vnr, adm_ctx->device->resource->name);
 		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
 		return ERR_INVALID_REQUEST;
 	}
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index f86cea4c0f8d..840c3aef3c5c 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -884,7 +884,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		start_new_tl_epoch(connection);
 		mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
 		break;
-	};
+	}
 
 	return rv;
 }
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 485865fd0412..cd3612e4e2e1 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3879,6 +3879,9 @@ static int fd_compat_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 {
 	int drive = (long)bdev->bd_disk->private_data;
 	switch (cmd) {
+	case CDROMEJECT: /* CD-ROM eject */
+	case 0x6470:	 /* SunOS floppy eject */
+
 	case FDMSGON:
 	case FDMSGOFF:
 	case FDSETEMSGTRESH:
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f6f77eaa7217..739b372a5112 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -417,18 +417,20 @@ out_free_page:
 	return ret;
 }
 
-static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos)
+static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
+			int mode)
 {
 	/*
-	 * We use punch hole to reclaim the free space used by the
-	 * image a.k.a. discard. However we do not support discard if
-	 * encryption is enabled, because it may give an attacker
-	 * useful information.
+	 * We use fallocate to manipulate the space mappings used by the image
+	 * a.k.a. discard/zerorange. However we do not support this if
+	 * encryption is enabled, because it may give an attacker useful
+	 * information.
 	 */
 	struct file *file = lo->lo_backing_file;
-	int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
 	int ret;
 
+	mode |= FALLOC_FL_KEEP_SIZE;
+
 	if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) {
 		ret = -EOPNOTSUPP;
 		goto out;
@@ -596,9 +598,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
 	switch (req_op(rq)) {
 	case REQ_OP_FLUSH:
 		return lo_req_flush(lo, rq);
-	case REQ_OP_DISCARD:
 	case REQ_OP_WRITE_ZEROES:
-		return lo_discard(lo, rq, pos);
+		/*
+		 * If the caller doesn't want deallocation, call zeroout to
+		 * write zeroes the range.  Otherwise, punch them out.
+		 */
+		return lo_fallocate(lo, rq, pos,
+			(rq->cmd_flags & REQ_NOUNMAP) ?
+				FALLOC_FL_ZERO_RANGE :
+				FALLOC_FL_PUNCH_HOLE);
+	case REQ_OP_DISCARD:
+		return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE);
 	case REQ_OP_WRITE:
 		if (lo->transfer)
 			return lo_write_transfer(lo, rq, pos);
@@ -630,7 +640,9 @@ static void loop_reread_partitions(struct loop_device *lo,
 {
 	int rc;
 
-	rc = blkdev_reread_part(bdev);
+	mutex_lock(&bdev->bd_mutex);
+	rc = bdev_disk_changed(bdev, false);
+	mutex_unlock(&bdev->bd_mutex);
 	if (rc)
 		pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
 			__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -1154,10 +1166,11 @@ out_unlock:
 		 * must be at least one and it can only become zero when the
 		 * current holder is released.
 		 */
-		if (release)
-			err = __blkdev_reread_part(bdev);
-		else
-			err = blkdev_reread_part(bdev);
+		if (!release)
+			mutex_lock(&bdev->bd_mutex);
+		err = bdev_disk_changed(bdev, false);
+		if (!release)
+			mutex_unlock(&bdev->bd_mutex);
 		if (err)
 			pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
 				__func__, lo_number, err);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 964f78cfffa0..f6bafa9a68b9 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -129,7 +129,7 @@ struct mtip_compat_ide_task_request_s {
 /*
  * This function check_for_surprise_removal is called
  * while card is removed from the system and it will
- * read the vendor id from the configration space
+ * read the vendor id from the configuration space
  *
  * @pdev Pointer to the pci_dev structure.
  *
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 19e75999bb15..b4607dd96185 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1032,14 +1032,15 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
 		sockfd_put(sock);
 		return -ENOMEM;
 	}
+
+	config->socks = socks;
+
 	nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
 	if (!nsock) {
 		sockfd_put(sock);
 		return -ENOMEM;
 	}
 
-	config->socks = socks;
-
 	nsock->fallback_index = -1;
 	nsock->dead = false;
 	mutex_init(&nsock->tx_lock);
@@ -1295,10 +1296,10 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
 	mutex_unlock(&nbd->config_lock);
 	ret = wait_event_interruptible(config->recv_wq,
 					 atomic_read(&config->recv_threads) == 0);
-	if (ret) {
+	if (ret)
 		sock_shutdown(nbd);
-		flush_workqueue(nbd->recv_workq);
-	}
+	flush_workqueue(nbd->recv_workq);
+
 	mutex_lock(&nbd->config_lock);
 	nbd_bdev_reset(bdev);
 	/* user requested, ignore socket errors */
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index a235c45e22a7..bc837862b767 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -91,11 +91,13 @@ struct nullb {
 #ifdef CONFIG_BLK_DEV_ZONED
 int null_zone_init(struct nullb_device *dev);
 void null_zone_exit(struct nullb_device *dev);
-int null_zone_report(struct gendisk *disk, sector_t sector,
-		     struct blk_zone *zones, unsigned int *nr_zones);
+int null_report_zones(struct gendisk *disk, sector_t sector,
+		      unsigned int nr_zones, report_zones_cb cb, void *data);
 blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
 				enum req_opf op, sector_t sector,
 				sector_t nr_sectors);
+size_t null_zone_valid_read_len(struct nullb *nullb,
+				sector_t sector, unsigned int len);
 #else
 static inline int null_zone_init(struct nullb_device *dev)
 {
@@ -103,17 +105,18 @@ static inline int null_zone_init(struct nullb_device *dev)
 	return -EINVAL;
 }
 static inline void null_zone_exit(struct nullb_device *dev) {}
-static inline int null_zone_report(struct gendisk *disk, sector_t sector,
-				   struct blk_zone *zones,
-				   unsigned int *nr_zones)
-{
-	return -EOPNOTSUPP;
-}
 static inline blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
 					     enum req_opf op, sector_t sector,
 					     sector_t nr_sectors)
 {
 	return BLK_STS_NOTSUPP;
 }
+static inline size_t null_zone_valid_read_len(struct nullb *nullb,
+					      sector_t sector,
+					      unsigned int len)
+{
+	return len;
+}
+#define null_report_zones	NULL
 #endif /* CONFIG_BLK_DEV_ZONED */
 #endif /* __NULL_BLK_H */
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 0e7da5015ccd..ae8d4bc532b0 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -227,7 +227,7 @@ static ssize_t nullb_device_uint_attr_store(unsigned int *val,
 	int result;
 
 	result = kstrtouint(page, 0, &tmp);
-	if (result)
+	if (result < 0)
 		return result;
 
 	*val = tmp;
@@ -241,7 +241,7 @@ static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
 	unsigned long tmp;
 
 	result = kstrtoul(page, 0, &tmp);
-	if (result)
+	if (result < 0)
 		return result;
 
 	*val = tmp;
@@ -255,7 +255,7 @@ static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
 	int result;
 
 	result = kstrtobool(page,  &tmp);
-	if (result)
+	if (result < 0)
 		return result;
 
 	*val = tmp;
@@ -263,7 +263,7 @@ static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
 }
 
 /* The following macro should only be used with TYPE = {uint, ulong, bool}. */
-#define NULLB_DEVICE_ATTR(NAME, TYPE)						\
+#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY)					\
 static ssize_t									\
 nullb_device_##NAME##_show(struct config_item *item, char *page)		\
 {										\
@@ -274,31 +274,57 @@ static ssize_t									\
 nullb_device_##NAME##_store(struct config_item *item, const char *page,		\
 			    size_t count)					\
 {										\
-	if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags))	\
-		return -EBUSY;							\
-	return nullb_device_##TYPE##_attr_store(				\
-			&to_nullb_device(item)->NAME, page, count);		\
+	int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;	\
+	struct nullb_device *dev = to_nullb_device(item);			\
+	TYPE new_value;								\
+	int ret;								\
+										\
+	ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);	\
+	if (ret < 0)								\
+		return ret;							\
+	if (apply_fn)								\
+		ret = apply_fn(dev, new_value);					\
+	else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) 		\
+		ret = -EBUSY;							\
+	if (ret < 0)								\
+		return ret;							\
+	dev->NAME = new_value;							\
+	return count;								\
 }										\
 CONFIGFS_ATTR(nullb_device_, NAME);
 
-NULLB_DEVICE_ATTR(size, ulong);
-NULLB_DEVICE_ATTR(completion_nsec, ulong);
-NULLB_DEVICE_ATTR(submit_queues, uint);
-NULLB_DEVICE_ATTR(home_node, uint);
-NULLB_DEVICE_ATTR(queue_mode, uint);
-NULLB_DEVICE_ATTR(blocksize, uint);
-NULLB_DEVICE_ATTR(irqmode, uint);
-NULLB_DEVICE_ATTR(hw_queue_depth, uint);
-NULLB_DEVICE_ATTR(index, uint);
-NULLB_DEVICE_ATTR(blocking, bool);
-NULLB_DEVICE_ATTR(use_per_node_hctx, bool);
-NULLB_DEVICE_ATTR(memory_backed, bool);
-NULLB_DEVICE_ATTR(discard, bool);
-NULLB_DEVICE_ATTR(mbps, uint);
-NULLB_DEVICE_ATTR(cache_size, ulong);
-NULLB_DEVICE_ATTR(zoned, bool);
-NULLB_DEVICE_ATTR(zone_size, ulong);
-NULLB_DEVICE_ATTR(zone_nr_conv, uint);
+static int nullb_apply_submit_queues(struct nullb_device *dev,
+				     unsigned int submit_queues)
+{
+	struct nullb *nullb = dev->nullb;
+	struct blk_mq_tag_set *set;
+
+	if (!nullb)
+		return 0;
+
+	set = nullb->tag_set;
+	blk_mq_update_nr_hw_queues(set, submit_queues);
+	return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM;
+}
+
+NULLB_DEVICE_ATTR(size, ulong, NULL);
+NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
+NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
+NULLB_DEVICE_ATTR(home_node, uint, NULL);
+NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
+NULLB_DEVICE_ATTR(blocksize, uint, NULL);
+NULLB_DEVICE_ATTR(irqmode, uint, NULL);
+NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
+NULLB_DEVICE_ATTR(index, uint, NULL);
+NULLB_DEVICE_ATTR(blocking, bool, NULL);
+NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
+NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
+NULLB_DEVICE_ATTR(discard, bool, NULL);
+NULLB_DEVICE_ATTR(mbps, uint, NULL);
+NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
+NULLB_DEVICE_ATTR(zoned, bool, NULL);
+NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
+NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
 
 static ssize_t nullb_device_power_show(struct config_item *item, char *page)
 {
@@ -467,7 +493,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
 
 static ssize_t memb_group_features_show(struct config_item *item, char *page)
 {
-	return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size\n");
+	return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_nr_conv\n");
 }
 
 CONFIGFS_ATTR_RO(memb_group_, features);
@@ -996,6 +1022,16 @@ next:
 	return 0;
 }
 
+static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
+			       unsigned int len, unsigned int off)
+{
+	void *dst;
+
+	dst = kmap_atomic(page);
+	memset(dst + off, 0xFF, len);
+	kunmap_atomic(dst);
+}
+
 static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
 {
 	size_t temp;
@@ -1036,10 +1072,24 @@ static int null_transfer(struct nullb *nullb, struct page *page,
 	unsigned int len, unsigned int off, bool is_write, sector_t sector,
 	bool is_fua)
 {
+	struct nullb_device *dev = nullb->dev;
+	unsigned int valid_len = len;
 	int err = 0;
 
 	if (!is_write) {
-		err = copy_from_nullb(nullb, page, off, sector, len);
+		if (dev->zoned)
+			valid_len = null_zone_valid_read_len(nullb,
+				sector, len);
+
+		if (valid_len) {
+			err = copy_from_nullb(nullb, page, off,
+				sector, valid_len);
+			off += valid_len;
+			len -= valid_len;
+		}
+
+		if (len)
+			nullb_fill_pattern(nullb, page, len, off);
 		flush_dcache_page(page);
 	} else {
 		flush_dcache_page(page);
@@ -1418,20 +1468,9 @@ static void null_config_discard(struct nullb *nullb)
 	blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
 }
 
-static int null_open(struct block_device *bdev, fmode_t mode)
-{
-	return 0;
-}
-
-static void null_release(struct gendisk *disk, fmode_t mode)
-{
-}
-
-static const struct block_device_operations null_fops = {
-	.owner =	THIS_MODULE,
-	.open =		null_open,
-	.release =	null_release,
-	.report_zones =	null_zone_report,
+static const struct block_device_operations null_ops = {
+	.owner		= THIS_MODULE,
+	.report_zones	= null_report_zones,
 };
 
 static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
@@ -1520,29 +1559,35 @@ static int init_driver_queues(struct nullb *nullb)
 
 static int null_gendisk_register(struct nullb *nullb)
 {
+	sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
 	struct gendisk *disk;
-	sector_t size;
 
 	disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
 	if (!disk)
 		return -ENOMEM;
-	size = (sector_t)nullb->dev->size * 1024 * 1024ULL;
-	set_capacity(disk, size >> 9);
+	set_capacity(disk, size);
 
 	disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
 	disk->major		= null_major;
 	disk->first_minor	= nullb->index;
-	disk->fops		= &null_fops;
+	disk->fops		= &null_ops;
 	disk->private_data	= nullb;
 	disk->queue		= nullb->q;
 	strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
 
+#ifdef CONFIG_BLK_DEV_ZONED
 	if (nullb->dev->zoned) {
-		int ret = blk_revalidate_disk_zones(disk);
-
-		if (ret != 0)
-			return ret;
+		if (queue_is_mq(nullb->q)) {
+			int ret = blk_revalidate_disk_zones(disk);
+			if (ret)
+				return ret;
+		} else {
+			blk_queue_chunk_sectors(nullb->q,
+					nullb->dev->zone_size_sects);
+			nullb->q->nr_zones = blkdev_nr_zones(disk);
+		}
 	}
+#endif
 
 	add_disk(disk);
 	return 0;
@@ -1568,7 +1613,7 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
 	return blk_mq_alloc_tag_set(set);
 }
 
-static void null_validate_conf(struct nullb_device *dev)
+static int null_validate_conf(struct nullb_device *dev)
 {
 	dev->blocksize = round_down(dev->blocksize, 512);
 	dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
@@ -1595,6 +1640,14 @@ static void null_validate_conf(struct nullb_device *dev)
 	/* can not stop a queue */
 	if (dev->queue_mode == NULL_Q_BIO)
 		dev->mbps = 0;
+
+	if (dev->zoned &&
+	    (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
+		pr_err("zone_size must be power-of-two\n");
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
@@ -1627,7 +1680,9 @@ static int null_add_dev(struct nullb_device *dev)
 	struct nullb *nullb;
 	int rv;
 
-	null_validate_conf(dev);
+	rv = null_validate_conf(dev);
+	if (rv)
+		return rv;
 
 	nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
 	if (!nullb) {
@@ -1692,7 +1747,6 @@ static int null_add_dev(struct nullb_device *dev)
 		if (rv)
 			goto out_cleanup_blk_queue;
 
-		blk_queue_chunk_sectors(nullb->q, dev->zone_size_sects);
 		nullb->q->limits.zoned = BLK_ZONED_HM;
 		blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, nullb->q);
 		blk_queue_required_elevator_features(nullb->q,
@@ -1753,11 +1807,6 @@ static int __init null_init(void)
 		g_bs = PAGE_SIZE;
 	}
 
-	if (!is_power_of_2(g_zone_size)) {
-		pr_err("zone_size must be power-of-two\n");
-		return -EINVAL;
-	}
-
 	if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
 		pr_err("invalid home_node value\n");
 		g_home_node = NUMA_NO_NODE;
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index 3d7fdea872f8..ed34785dd64b 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -66,22 +66,53 @@ void null_zone_exit(struct nullb_device *dev)
 	kvfree(dev->zones);
 }
 
-int null_zone_report(struct gendisk *disk, sector_t sector,
-		     struct blk_zone *zones, unsigned int *nr_zones)
+int null_report_zones(struct gendisk *disk, sector_t sector,
+		unsigned int nr_zones, report_zones_cb cb, void *data)
 {
 	struct nullb *nullb = disk->private_data;
 	struct nullb_device *dev = nullb->dev;
-	unsigned int zno, nrz = 0;
-
-	zno = null_zone_no(dev, sector);
-	if (zno < dev->nr_zones) {
-		nrz = min_t(unsigned int, *nr_zones, dev->nr_zones - zno);
-		memcpy(zones, &dev->zones[zno], nrz * sizeof(struct blk_zone));
+	unsigned int first_zone, i;
+	struct blk_zone zone;
+	int error;
+
+	first_zone = null_zone_no(dev, sector);
+	if (first_zone >= dev->nr_zones)
+		return 0;
+
+	nr_zones = min(nr_zones, dev->nr_zones - first_zone);
+	for (i = 0; i < nr_zones; i++) {
+		/*
+		 * Stacked DM target drivers will remap the zone information by
+		 * modifying the zone information passed to the report callback.
+		 * So use a local copy to avoid corruption of the device zone
+		 * array.
+		 */
+		memcpy(&zone, &dev->zones[first_zone + i],
+		       sizeof(struct blk_zone));
+		error = cb(&zone, i, data);
+		if (error)
+			return error;
 	}
 
-	*nr_zones = nrz;
+	return nr_zones;
+}
 
-	return 0;
+size_t null_zone_valid_read_len(struct nullb *nullb,
+				sector_t sector, unsigned int len)
+{
+	struct nullb_device *dev = nullb->dev;
+	struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
+	unsigned int nr_sectors = len >> SECTOR_SHIFT;
+
+	/* Read must be below the write pointer position */
+	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL ||
+	    sector + nr_sectors <= zone->wp)
+		return len;
+
+	if (sector > zone->wp)
+		return 0;
+
+	return (zone->wp - sector) << SECTOR_SHIFT;
 }
 
 static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
@@ -98,11 +129,13 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
 		return BLK_STS_IOERR;
 	case BLK_ZONE_COND_EMPTY:
 	case BLK_ZONE_COND_IMP_OPEN:
+	case BLK_ZONE_COND_EXP_OPEN:
+	case BLK_ZONE_COND_CLOSED:
 		/* Writes must be at the write pointer position */
 		if (sector != zone->wp)
 			return BLK_STS_IOERR;
 
-		if (zone->cond == BLK_ZONE_COND_EMPTY)
+		if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
 			zone->cond = BLK_ZONE_COND_IMP_OPEN;
 
 		zone->wp += nr_sectors;
@@ -118,14 +151,14 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
 	return BLK_STS_OK;
 }
 
-static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
+static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
+				   sector_t sector)
 {
 	struct nullb_device *dev = cmd->nq->dev;
-	unsigned int zno = null_zone_no(dev, sector);
-	struct blk_zone *zone = &dev->zones[zno];
+	struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
 	size_t i;
 
-	switch (req_op(cmd->rq)) {
+	switch (op) {
 	case REQ_OP_ZONE_RESET_ALL:
 		for (i = 0; i < dev->nr_zones; i++) {
 			if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL)
@@ -141,6 +174,32 @@ static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
 		zone->cond = BLK_ZONE_COND_EMPTY;
 		zone->wp = zone->start;
 		break;
+	case REQ_OP_ZONE_OPEN:
+		if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+			return BLK_STS_IOERR;
+		if (zone->cond == BLK_ZONE_COND_FULL)
+			return BLK_STS_IOERR;
+
+		zone->cond = BLK_ZONE_COND_EXP_OPEN;
+		break;
+	case REQ_OP_ZONE_CLOSE:
+		if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+			return BLK_STS_IOERR;
+		if (zone->cond == BLK_ZONE_COND_FULL)
+			return BLK_STS_IOERR;
+
+		if (zone->wp == zone->start)
+			zone->cond = BLK_ZONE_COND_EMPTY;
+		else
+			zone->cond = BLK_ZONE_COND_CLOSED;
+		break;
+	case REQ_OP_ZONE_FINISH:
+		if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+			return BLK_STS_IOERR;
+
+		zone->cond = BLK_ZONE_COND_FULL;
+		zone->wp = zone->start + zone->len;
+		break;
 	default:
 		return BLK_STS_NOTSUPP;
 	}
@@ -155,7 +214,10 @@ blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op,
 		return null_zone_write(cmd, sector, nr_sectors);
 	case REQ_OP_ZONE_RESET:
 	case REQ_OP_ZONE_RESET_ALL:
-		return null_zone_reset(cmd, sector);
+	case REQ_OP_ZONE_OPEN:
+	case REQ_OP_ZONE_CLOSE:
+	case REQ_OP_ZONE_FINISH:
+		return null_zone_mgmt(cmd, op, sector);
 	default:
 		return BLK_STS_OK;
 	}
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 636bfea2de6f..117cfc8cd05a 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -275,6 +275,9 @@ static const struct block_device_operations pcd_bdops = {
 	.open		= pcd_block_open,
 	.release	= pcd_block_release,
 	.ioctl		= pcd_block_ioctl,
+#ifdef CONFIG_COMPAT
+	.ioctl		= blkdev_compat_ptr_ioctl,
+#endif
 	.check_events	= pcd_block_check_events,
 };
 
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 6f9ad3fc716f..c0967507d085 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -874,6 +874,7 @@ static const struct block_device_operations pd_fops = {
 	.open		= pd_open,
 	.release	= pd_release,
 	.ioctl		= pd_ioctl,
+	.compat_ioctl	= pd_ioctl,
 	.getgeo		= pd_getgeo,
 	.check_events	= pd_check_events,
 	.revalidate_disk= pd_revalidate
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 6b7d4cab3687..bb09f21ce21a 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -276,6 +276,7 @@ static const struct block_device_operations pf_fops = {
 	.open		= pf_open,
 	.release	= pf_release,
 	.ioctl		= pf_ioctl,
+	.compat_ioctl	= pf_ioctl,
 	.getgeo		= pf_getgeo,
 	.check_events	= pf_check_events,
 };
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 76457003f140..5f970a7d32c0 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2684,6 +2684,7 @@ static const struct block_device_operations pktcdvd_ops = {
 	.open =			pkt_open,
 	.release =		pkt_close,
 	.ioctl =		pkt_ioctl,
+	.compat_ioctl =		blkdev_compat_ptr_ioctl,
 	.check_events =		pkt_check_events,
 };
 
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 13527a0b4e44..2b184563cd32 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -34,7 +34,7 @@
 #include <linux/ceph/cls_lock_client.h>
 #include <linux/ceph/striper.h>
 #include <linux/ceph/decode.h>
-#include <linux/parser.h>
+#include <linux/fs_parser.h>
 #include <linux/bsearch.h>
 
 #include <linux/kernel.h>
@@ -377,7 +377,6 @@ struct rbd_client_id {
 
 struct rbd_mapping {
 	u64                     size;
-	u64                     features;
 };
 
 /*
@@ -462,8 +461,9 @@ struct rbd_device {
  *   by rbd_dev->lock
  */
 enum rbd_dev_flags {
-	RBD_DEV_FLAG_EXISTS,	/* mapped snapshot has not been deleted */
+	RBD_DEV_FLAG_EXISTS,	/* rbd_dev_device_setup() ran */
 	RBD_DEV_FLAG_REMOVING,	/* this mapping is being removed */
+	RBD_DEV_FLAG_READONLY,  /* -o ro or snapshot */
 };
 
 static DEFINE_MUTEX(client_mutex);	/* Serialize client creation */
@@ -514,6 +514,16 @@ static int minor_to_rbd_dev_id(int minor)
 	return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
 }
 
+static bool rbd_is_ro(struct rbd_device *rbd_dev)
+{
+	return test_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
+}
+
+static bool rbd_is_snap(struct rbd_device *rbd_dev)
+{
+	return rbd_dev->spec->snap_id != CEPH_NOSNAP;
+}
+
 static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
 {
 	lockdep_assert_held(&rbd_dev->lock_rwsem);
@@ -633,8 +643,6 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
 					u64 snap_id);
 static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
 				u8 *order, u64 *snap_size);
-static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
-		u64 *snap_features);
 static int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev);
 
 static void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result);
@@ -695,9 +703,16 @@ static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg)
 	if (get_user(ro, (int __user *)arg))
 		return -EFAULT;
 
-	/* Snapshots can't be marked read-write */
-	if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro)
-		return -EROFS;
+	/*
+	 * Both images mapped read-only and snapshots can't be marked
+	 * read-write.
+	 */
+	if (!ro) {
+		if (rbd_is_ro(rbd_dev))
+			return -EROFS;
+
+		rbd_assert(!rbd_is_snap(rbd_dev));
+	}
 
 	/* Let blkdev_roset() handle it */
 	return -ENOTTY;
@@ -823,34 +838,34 @@ enum {
 	Opt_queue_depth,
 	Opt_alloc_size,
 	Opt_lock_timeout,
-	Opt_last_int,
 	/* int args above */
 	Opt_pool_ns,
-	Opt_last_string,
 	/* string args above */
 	Opt_read_only,
 	Opt_read_write,
 	Opt_lock_on_read,
 	Opt_exclusive,
 	Opt_notrim,
-	Opt_err
 };
 
-static match_table_t rbd_opts_tokens = {
-	{Opt_queue_depth, "queue_depth=%d"},
-	{Opt_alloc_size, "alloc_size=%d"},
-	{Opt_lock_timeout, "lock_timeout=%d"},
-	/* int args above */
-	{Opt_pool_ns, "_pool_ns=%s"},
-	/* string args above */
-	{Opt_read_only, "read_only"},
-	{Opt_read_only, "ro"},		/* Alternate spelling */
-	{Opt_read_write, "read_write"},
-	{Opt_read_write, "rw"},		/* Alternate spelling */
-	{Opt_lock_on_read, "lock_on_read"},
-	{Opt_exclusive, "exclusive"},
-	{Opt_notrim, "notrim"},
-	{Opt_err, NULL}
+static const struct fs_parameter_spec rbd_param_specs[] = {
+	fsparam_u32	("alloc_size",			Opt_alloc_size),
+	fsparam_flag	("exclusive",			Opt_exclusive),
+	fsparam_flag	("lock_on_read",		Opt_lock_on_read),
+	fsparam_u32	("lock_timeout",		Opt_lock_timeout),
+	fsparam_flag	("notrim",			Opt_notrim),
+	fsparam_string	("_pool_ns",			Opt_pool_ns),
+	fsparam_u32	("queue_depth",			Opt_queue_depth),
+	fsparam_flag	("read_only",			Opt_read_only),
+	fsparam_flag	("read_write",			Opt_read_write),
+	fsparam_flag	("ro",				Opt_read_only),
+	fsparam_flag	("rw",				Opt_read_write),
+	{}
+};
+
+static const struct fs_parameter_description rbd_parameters = {
+	.name		= "rbd",
+	.specs		= rbd_param_specs,
 };
 
 struct rbd_options {
@@ -871,87 +886,12 @@ struct rbd_options {
 #define RBD_EXCLUSIVE_DEFAULT	false
 #define RBD_TRIM_DEFAULT	true
 
-struct parse_rbd_opts_ctx {
+struct rbd_parse_opts_ctx {
 	struct rbd_spec		*spec;
+	struct ceph_options	*copts;
 	struct rbd_options	*opts;
 };
 
-static int parse_rbd_opts_token(char *c, void *private)
-{
-	struct parse_rbd_opts_ctx *pctx = private;
-	substring_t argstr[MAX_OPT_ARGS];
-	int token, intval, ret;
-
-	token = match_token(c, rbd_opts_tokens, argstr);
-	if (token < Opt_last_int) {
-		ret = match_int(&argstr[0], &intval);
-		if (ret < 0) {
-			pr_err("bad option arg (not int) at '%s'\n", c);
-			return ret;
-		}
-		dout("got int token %d val %d\n", token, intval);
-	} else if (token > Opt_last_int && token < Opt_last_string) {
-		dout("got string token %d val %s\n", token, argstr[0].from);
-	} else {
-		dout("got token %d\n", token);
-	}
-
-	switch (token) {
-	case Opt_queue_depth:
-		if (intval < 1) {
-			pr_err("queue_depth out of range\n");
-			return -EINVAL;
-		}
-		pctx->opts->queue_depth = intval;
-		break;
-	case Opt_alloc_size:
-		if (intval < SECTOR_SIZE) {
-			pr_err("alloc_size out of range\n");
-			return -EINVAL;
-		}
-		if (!is_power_of_2(intval)) {
-			pr_err("alloc_size must be a power of 2\n");
-			return -EINVAL;
-		}
-		pctx->opts->alloc_size = intval;
-		break;
-	case Opt_lock_timeout:
-		/* 0 is "wait forever" (i.e. infinite timeout) */
-		if (intval < 0 || intval > INT_MAX / 1000) {
-			pr_err("lock_timeout out of range\n");
-			return -EINVAL;
-		}
-		pctx->opts->lock_timeout = msecs_to_jiffies(intval * 1000);
-		break;
-	case Opt_pool_ns:
-		kfree(pctx->spec->pool_ns);
-		pctx->spec->pool_ns = match_strdup(argstr);
-		if (!pctx->spec->pool_ns)
-			return -ENOMEM;
-		break;
-	case Opt_read_only:
-		pctx->opts->read_only = true;
-		break;
-	case Opt_read_write:
-		pctx->opts->read_only = false;
-		break;
-	case Opt_lock_on_read:
-		pctx->opts->lock_on_read = true;
-		break;
-	case Opt_exclusive:
-		pctx->opts->exclusive = true;
-		break;
-	case Opt_notrim:
-		pctx->opts->trim = false;
-		break;
-	default:
-		/* libceph prints "bad option" msg */
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static char* obj_op_name(enum obj_operation_type op_type)
 {
 	switch (op_type) {
@@ -1302,51 +1242,23 @@ static int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
 	return 0;
 }
 
-static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
-			u64 *snap_features)
-{
-	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
-	if (snap_id == CEPH_NOSNAP) {
-		*snap_features = rbd_dev->header.features;
-	} else if (rbd_dev->image_format == 1) {
-		*snap_features = 0;	/* No features for format 1 */
-	} else {
-		u64 features = 0;
-		int ret;
-
-		ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, &features);
-		if (ret)
-			return ret;
-
-		*snap_features = features;
-	}
-	return 0;
-}
-
 static int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
 {
 	u64 snap_id = rbd_dev->spec->snap_id;
 	u64 size = 0;
-	u64 features = 0;
 	int ret;
 
 	ret = rbd_snap_size(rbd_dev, snap_id, &size);
 	if (ret)
 		return ret;
-	ret = rbd_snap_features(rbd_dev, snap_id, &features);
-	if (ret)
-		return ret;
 
 	rbd_dev->mapping.size = size;
-	rbd_dev->mapping.features = features;
-
 	return 0;
 }
 
 static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
 {
 	rbd_dev->mapping.size = 0;
-	rbd_dev->mapping.features = 0;
 }
 
 static void zero_bvec(struct bio_vec *bv)
@@ -1832,6 +1744,17 @@ static u8 rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno)
 
 static bool use_object_map(struct rbd_device *rbd_dev)
 {
+	/*
+	 * An image mapped read-only can't use the object map -- it isn't
+	 * loaded because the header lock isn't acquired.  Someone else can
+	 * write to the image and update the object map behind our back.
+	 *
+	 * A snapshot can't be written to, so using the object map is always
+	 * safe.
+	 */
+	if (!rbd_is_snap(rbd_dev) && rbd_is_ro(rbd_dev))
+		return false;
+
 	return ((rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) &&
 		!(rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID));
 }
@@ -3555,7 +3478,7 @@ static bool need_exclusive_lock(struct rbd_img_request *img_req)
 	if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK))
 		return false;
 
-	if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+	if (rbd_is_ro(rbd_dev))
 		return false;
 
 	rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
@@ -4230,7 +4153,7 @@ again:
 		 * lock owner acked, but resend if we don't see them
 		 * release the lock
 		 */
-		dout("%s rbd_dev %p requeueing lock_dwork\n", __func__,
+		dout("%s rbd_dev %p requeuing lock_dwork\n", __func__,
 		     rbd_dev);
 		mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
 		    msecs_to_jiffies(2 * RBD_NOTIFY_TIMEOUT * MSEC_PER_SEC));
@@ -4826,24 +4749,14 @@ static void rbd_queue_workfn(struct work_struct *work)
 		goto err_rq;
 	}
 
-	if (op_type != OBJ_OP_READ && rbd_dev->spec->snap_id != CEPH_NOSNAP) {
-		rbd_warn(rbd_dev, "%s on read-only snapshot",
-			 obj_op_name(op_type));
-		result = -EIO;
-		goto err;
-	}
-
-	/*
-	 * Quit early if the mapped snapshot no longer exists.  It's
-	 * still possible the snapshot will have disappeared by the
-	 * time our request arrives at the osd, but there's no sense in
-	 * sending it if we already know.
-	 */
-	if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) {
-		dout("request for non-existent snapshot");
-		rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
-		result = -ENXIO;
-		goto err_rq;
+	if (op_type != OBJ_OP_READ) {
+		if (rbd_is_ro(rbd_dev)) {
+			rbd_warn(rbd_dev, "%s on read-only mapping",
+				 obj_op_name(op_type));
+			result = -EIO;
+			goto err;
+		}
+		rbd_assert(!rbd_is_snap(rbd_dev));
 	}
 
 	if (offset && length > U64_MAX - offset + 1) {
@@ -5025,25 +4938,6 @@ out:
 	return ret;
 }
 
-/*
- * Clear the rbd device's EXISTS flag if the snapshot it's mapped to
- * has disappeared from the (just updated) snapshot context.
- */
-static void rbd_exists_validate(struct rbd_device *rbd_dev)
-{
-	u64 snap_id;
-
-	if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags))
-		return;
-
-	snap_id = rbd_dev->spec->snap_id;
-	if (snap_id == CEPH_NOSNAP)
-		return;
-
-	if (rbd_dev_snap_index(rbd_dev, snap_id) == BAD_SNAP_INDEX)
-		clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
-}
-
 static void rbd_dev_update_size(struct rbd_device *rbd_dev)
 {
 	sector_t size;
@@ -5084,12 +4978,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
 			goto out;
 	}
 
-	if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
-		rbd_dev->mapping.size = rbd_dev->header.image_size;
-	} else {
-		/* validate mapped snapshot's EXISTS flag */
-		rbd_exists_validate(rbd_dev);
-	}
+	rbd_assert(!rbd_is_snap(rbd_dev));
+	rbd_dev->mapping.size = rbd_dev->header.image_size;
 
 out:
 	up_write(&rbd_dev->header_rwsem);
@@ -5211,17 +5101,12 @@ static ssize_t rbd_size_show(struct device *dev,
 		(unsigned long long)rbd_dev->mapping.size);
 }
 
-/*
- * Note this shows the features for whatever's mapped, which is not
- * necessarily the base image.
- */
 static ssize_t rbd_features_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
-	return sprintf(buf, "0x%016llx\n",
-			(unsigned long long)rbd_dev->mapping.features);
+	return sprintf(buf, "0x%016llx\n", rbd_dev->header.features);
 }
 
 static ssize_t rbd_major_show(struct device *dev,
@@ -5709,9 +5594,12 @@ out:
 }
 
 static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
-		u64 *snap_features)
+				     bool read_only, u64 *snap_features)
 {
-	__le64 snapid = cpu_to_le64(snap_id);
+	struct {
+		__le64 snap_id;
+		u8 read_only;
+	} features_in;
 	struct {
 		__le64 features;
 		__le64 incompat;
@@ -5719,9 +5607,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
 	u64 unsup;
 	int ret;
 
+	features_in.snap_id = cpu_to_le64(snap_id);
+	features_in.read_only = read_only;
+
 	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
 				  &rbd_dev->header_oloc, "get_features",
-				  &snapid, sizeof(snapid),
+				  &features_in, sizeof(features_in),
 				  &features_buf, sizeof(features_buf));
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
@@ -5749,7 +5640,8 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
 static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
 {
 	return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
-						&rbd_dev->header.features);
+					 rbd_is_ro(rbd_dev),
+					 &rbd_dev->header.features);
 }
 
 /*
@@ -6456,6 +6348,122 @@ static inline char *dup_token(const char **buf, size_t *lenp)
 	return dup;
 }
 
+static int rbd_parse_param(struct fs_parameter *param,
+			    struct rbd_parse_opts_ctx *pctx)
+{
+	struct rbd_options *opt = pctx->opts;
+	struct fs_parse_result result;
+	int token, ret;
+
+	ret = ceph_parse_param(param, pctx->copts, NULL);
+	if (ret != -ENOPARAM)
+		return ret;
+
+	token = fs_parse(NULL, &rbd_parameters, param, &result);
+	dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
+	if (token < 0) {
+		if (token == -ENOPARAM) {
+			return invalf(NULL, "rbd: Unknown parameter '%s'",
+				      param->key);
+		}
+		return token;
+	}
+
+	switch (token) {
+	case Opt_queue_depth:
+		if (result.uint_32 < 1)
+			goto out_of_range;
+		opt->queue_depth = result.uint_32;
+		break;
+	case Opt_alloc_size:
+		if (result.uint_32 < SECTOR_SIZE)
+			goto out_of_range;
+		if (!is_power_of_2(result.uint_32)) {
+			return invalf(NULL, "rbd: alloc_size must be a power of 2");
+		}
+		opt->alloc_size = result.uint_32;
+		break;
+	case Opt_lock_timeout:
+		/* 0 is "wait forever" (i.e. infinite timeout) */
+		if (result.uint_32 > INT_MAX / 1000)
+			goto out_of_range;
+		opt->lock_timeout = msecs_to_jiffies(result.uint_32 * 1000);
+		break;
+	case Opt_pool_ns:
+		kfree(pctx->spec->pool_ns);
+		pctx->spec->pool_ns = param->string;
+		param->string = NULL;
+		break;
+	case Opt_read_only:
+		opt->read_only = true;
+		break;
+	case Opt_read_write:
+		opt->read_only = false;
+		break;
+	case Opt_lock_on_read:
+		opt->lock_on_read = true;
+		break;
+	case Opt_exclusive:
+		opt->exclusive = true;
+		break;
+	case Opt_notrim:
+		opt->trim = false;
+		break;
+	default:
+		BUG();
+	}
+
+	return 0;
+
+out_of_range:
+	return invalf(NULL, "rbd: %s out of range", param->key);
+}
+
+/*
+ * This duplicates most of generic_parse_monolithic(), untying it from
+ * fs_context and skipping standard superblock and security options.
+ */
+static int rbd_parse_options(char *options, struct rbd_parse_opts_ctx *pctx)
+{
+	char *key;
+	int ret = 0;
+
+	dout("%s '%s'\n", __func__, options);
+	while ((key = strsep(&options, ",")) != NULL) {
+		if (*key) {
+			struct fs_parameter param = {
+				.key	= key,
+				.type	= fs_value_is_string,
+			};
+			char *value = strchr(key, '=');
+			size_t v_len = 0;
+
+			if (value) {
+				if (value == key)
+					continue;
+				*value++ = 0;
+				v_len = strlen(value);
+			}
+
+
+			if (v_len > 0) {
+				param.string = kmemdup_nul(value, v_len,
+							   GFP_KERNEL);
+				if (!param.string)
+					return -ENOMEM;
+			}
+			param.size = v_len;
+
+			ret = rbd_parse_param(&param, pctx);
+			kfree(param.string);
+			if (ret)
+				break;
+		}
+	}
+
+	return ret;
+}
+
 /*
  * Parse the options provided for an "rbd add" (i.e., rbd image
  * mapping) request.  These arrive via a write to /sys/bus/rbd/add,
@@ -6507,8 +6515,7 @@ static int rbd_add_parse_args(const char *buf,
 	const char *mon_addrs;
 	char *snap_name;
 	size_t mon_addrs_size;
-	struct parse_rbd_opts_ctx pctx = { 0 };
-	struct ceph_options *copts;
+	struct rbd_parse_opts_ctx pctx = { 0 };
 	int ret;
 
 	/* The first four tokens are required */
@@ -6519,7 +6526,7 @@ static int rbd_add_parse_args(const char *buf,
 		return -EINVAL;
 	}
 	mon_addrs = buf;
-	mon_addrs_size = len + 1;
+	mon_addrs_size = len;
 	buf += len;
 
 	ret = -EINVAL;
@@ -6569,6 +6576,10 @@ static int rbd_add_parse_args(const char *buf,
 	*(snap_name + len) = '\0';
 	pctx.spec->snap_name = snap_name;
 
+	pctx.copts = ceph_alloc_options();
+	if (!pctx.copts)
+		goto out_mem;
+
 	/* Initialize all rbd options to the defaults */
 
 	pctx.opts = kzalloc(sizeof(*pctx.opts), GFP_KERNEL);
@@ -6583,27 +6594,27 @@ static int rbd_add_parse_args(const char *buf,
 	pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
 	pctx.opts->trim = RBD_TRIM_DEFAULT;
 
-	copts = ceph_parse_options(options, mon_addrs,
-				   mon_addrs + mon_addrs_size - 1,
-				   parse_rbd_opts_token, &pctx);
-	if (IS_ERR(copts)) {
-		ret = PTR_ERR(copts);
+	ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL);
+	if (ret)
+		goto out_err;
+
+	ret = rbd_parse_options(options, &pctx);
+	if (ret)
 		goto out_err;
-	}
-	kfree(options);
 
-	*ceph_opts = copts;
+	*ceph_opts = pctx.copts;
 	*opts = pctx.opts;
 	*rbd_spec = pctx.spec;
-
+	kfree(options);
 	return 0;
+
 out_mem:
 	ret = -ENOMEM;
 out_err:
 	kfree(pctx.opts);
+	ceph_destroy_options(pctx.copts);
 	rbd_spec_put(pctx.spec);
 	kfree(options);
-
 	return ret;
 }
 
@@ -6632,7 +6643,7 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
 		return -EINVAL;
 	}
 
-	if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+	if (rbd_is_ro(rbd_dev))
 		return 0;
 
 	rbd_assert(!rbd_is_lock_owner(rbd_dev));
@@ -6838,6 +6849,8 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
 	__rbd_get_client(rbd_dev->rbd_client);
 	rbd_spec_get(rbd_dev->parent_spec);
 
+	__set_bit(RBD_DEV_FLAG_READONLY, &parent->flags);
+
 	ret = rbd_dev_image_probe(parent, depth);
 	if (ret < 0)
 		goto out_err;
@@ -6889,7 +6902,7 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
 		goto err_out_blkdev;
 
 	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
-	set_disk_ro(rbd_dev->disk, rbd_dev->opts->read_only);
+	set_disk_ro(rbd_dev->disk, rbd_is_ro(rbd_dev));
 
 	ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
 	if (ret)
@@ -6927,6 +6940,24 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
 	return ret;
 }
 
+static void rbd_print_dne(struct rbd_device *rbd_dev, bool is_snap)
+{
+	if (!is_snap) {
+		pr_info("image %s/%s%s%s does not exist\n",
+			rbd_dev->spec->pool_name,
+			rbd_dev->spec->pool_ns ?: "",
+			rbd_dev->spec->pool_ns ? "/" : "",
+			rbd_dev->spec->image_name);
+	} else {
+		pr_info("snap %s/%s%s%s@%s does not exist\n",
+			rbd_dev->spec->pool_name,
+			rbd_dev->spec->pool_ns ?: "",
+			rbd_dev->spec->pool_ns ? "/" : "",
+			rbd_dev->spec->image_name,
+			rbd_dev->spec->snap_name);
+	}
+}
+
 static void rbd_dev_image_release(struct rbd_device *rbd_dev)
 {
 	rbd_dev_unprobe(rbd_dev);
@@ -6945,6 +6976,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
  */
 static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
 {
+	bool need_watch = !rbd_is_ro(rbd_dev);
 	int ret;
 
 	/*
@@ -6961,22 +6993,21 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
 	if (ret)
 		goto err_out_format;
 
-	if (!depth) {
+	if (need_watch) {
 		ret = rbd_register_watch(rbd_dev);
 		if (ret) {
 			if (ret == -ENOENT)
-				pr_info("image %s/%s%s%s does not exist\n",
-					rbd_dev->spec->pool_name,
-					rbd_dev->spec->pool_ns ?: "",
-					rbd_dev->spec->pool_ns ? "/" : "",
-					rbd_dev->spec->image_name);
+				rbd_print_dne(rbd_dev, false);
 			goto err_out_format;
 		}
 	}
 
 	ret = rbd_dev_header_info(rbd_dev);
-	if (ret)
+	if (ret) {
+		if (ret == -ENOENT && !need_watch)
+			rbd_print_dne(rbd_dev, false);
 		goto err_out_watch;
+	}
 
 	/*
 	 * If this image is the one being mapped, we have pool name and
@@ -6990,12 +7021,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
 		ret = rbd_spec_fill_names(rbd_dev);
 	if (ret) {
 		if (ret == -ENOENT)
-			pr_info("snap %s/%s%s%s@%s does not exist\n",
-				rbd_dev->spec->pool_name,
-				rbd_dev->spec->pool_ns ?: "",
-				rbd_dev->spec->pool_ns ? "/" : "",
-				rbd_dev->spec->image_name,
-				rbd_dev->spec->snap_name);
+			rbd_print_dne(rbd_dev, true);
 		goto err_out_probe;
 	}
 
@@ -7003,7 +7029,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
 	if (ret)
 		goto err_out_probe;
 
-	if (rbd_dev->spec->snap_id != CEPH_NOSNAP &&
+	if (rbd_is_snap(rbd_dev) &&
 	    (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) {
 		ret = rbd_object_map_load(rbd_dev);
 		if (ret)
@@ -7027,7 +7053,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
 err_out_probe:
 	rbd_dev_unprobe(rbd_dev);
 err_out_watch:
-	if (!depth)
+	if (need_watch)
 		rbd_unregister_watch(rbd_dev);
 err_out_format:
 	rbd_dev->image_format = 0;
@@ -7079,6 +7105,11 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 	spec = NULL;		/* rbd_dev now owns this */
 	rbd_opts = NULL;	/* rbd_dev now owns this */
 
+	/* if we are mapping a snapshot it will be a read-only mapping */
+	if (rbd_dev->opts->read_only ||
+	    strcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME))
+		__set_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
+
 	rbd_dev->config_info = kstrdup(buf, GFP_KERNEL);
 	if (!rbd_dev->config_info) {
 		rc = -ENOMEM;
@@ -7092,10 +7123,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 		goto err_out_rbd_dev;
 	}
 
-	/* If we are mapping a snapshot it must be marked read-only */
-	if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
-		rbd_dev->opts->read_only = true;
-
 	if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) {
 		rbd_warn(rbd_dev, "alloc_size adjusted to %u",
 			 rbd_dev->layout.object_size);
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 6b2fd630de85..39aeebc6837d 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -171,6 +171,7 @@ static const struct block_device_operations vdc_fops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= vdc_getgeo,
 	.ioctl		= vdc_ioctl,
+	.compat_ioctl	= blkdev_compat_ptr_ioctl,
 };
 
 static void vdc_blk_queue_start(struct vdc_port *port)
@@ -634,7 +635,7 @@ static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
 	case VD_OP_GET_EFI:
 	case VD_OP_SET_EFI:
 		return -EOPNOTSUPP;
-	};
+	}
 
 	map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
 
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 1f3f9e0f02a8..4eaf97d7a170 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -827,7 +827,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		goto failed_req_csr;
 	}
 
-	card->csr_remap = ioremap_nocache(csr_base, csr_len);
+	card->csr_remap = ioremap(csr_base, csr_len);
 	if (!card->csr_remap) {
 		dev_printk(KERN_ERR, &card->dev->dev,
 			"Unable to remap memory region\n");
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 7ffd719d89de..fbbf18ac1d5d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -405,6 +405,9 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 
 static const struct block_device_operations virtblk_fops = {
 	.ioctl  = virtblk_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = blkdev_compat_ptr_ioctl,
+#endif
 	.owner  = THIS_MODULE,
 	.getgeo = virtblk_getgeo,
 };
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index fd1e19f1a49f..716b99aa2307 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -936,6 +936,8 @@ next:
 out_of_memory:
 	pr_alert("%s: out of memory\n", __func__);
 	put_free_pages(ring, pages_to_gnt, segs_to_map);
+	for (i = last_map; i < num; i++)
+		pages[i]->handle = BLKBACK_INVALID_HANDLE;
 	return -ENOMEM;
 }
 
@@ -1504,5 +1506,13 @@ static int __init xen_blkif_init(void)
 
 module_init(xen_blkif_init);
 
+static void __exit xen_blkif_fini(void)
+{
+	xen_blkif_xenbus_fini();
+	xen_blkif_interface_fini();
+}
+
+module_exit(xen_blkif_fini);
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_ALIAS("xen-backend:vbd");
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 1d3002d773f7..49132b0adbbe 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -375,9 +375,12 @@ struct phys_req {
 	struct block_device	*bdev;
 	blkif_sector_t		sector_number;
 };
+
 int xen_blkif_interface_init(void);
+void xen_blkif_interface_fini(void);
 
 int xen_blkif_xenbus_init(void);
+void xen_blkif_xenbus_fini(void);
 
 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 int xen_blkif_schedule(void *arg);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index b90dbcd99c03..4c5d99f87813 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -171,6 +171,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	blkif->domid = domid;
 	atomic_set(&blkif->refcnt, 1);
 	init_completion(&blkif->drain_complete);
+
+	/*
+	 * Because freeing back to the cache may be deferred, it is not
+	 * safe to unload the module (and hence destroy the cache) until
+	 * this has completed. To prevent premature unloading, take an
+	 * extra module reference here and release only when the object
+	 * has been freed back to the cache.
+	 */
+	__module_get(THIS_MODULE);
 	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
 
 	return blkif;
@@ -181,6 +190,9 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
 {
 	int err;
 	struct xen_blkif *blkif = ring->blkif;
+	const struct blkif_common_sring *sring_common;
+	RING_IDX rsp_prod, req_prod;
+	unsigned int size;
 
 	/* Already connected through? */
 	if (ring->irq)
@@ -191,46 +203,62 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
 	if (err < 0)
 		return err;
 
+	sring_common = (struct blkif_common_sring *)ring->blk_ring;
+	rsp_prod = READ_ONCE(sring_common->rsp_prod);
+	req_prod = READ_ONCE(sring_common->req_prod);
+
 	switch (blkif->blk_protocol) {
 	case BLKIF_PROTOCOL_NATIVE:
 	{
-		struct blkif_sring *sring;
-		sring = (struct blkif_sring *)ring->blk_ring;
-		BACK_RING_INIT(&ring->blk_rings.native, sring,
-			       XEN_PAGE_SIZE * nr_grefs);
+		struct blkif_sring *sring_native =
+			(struct blkif_sring *)ring->blk_ring;
+
+		BACK_RING_ATTACH(&ring->blk_rings.native, sring_native,
+				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+		size = __RING_SIZE(sring_native, XEN_PAGE_SIZE * nr_grefs);
 		break;
 	}
 	case BLKIF_PROTOCOL_X86_32:
 	{
-		struct blkif_x86_32_sring *sring_x86_32;
-		sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
-		BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
-			       XEN_PAGE_SIZE * nr_grefs);
+		struct blkif_x86_32_sring *sring_x86_32 =
+			(struct blkif_x86_32_sring *)ring->blk_ring;
+
+		BACK_RING_ATTACH(&ring->blk_rings.x86_32, sring_x86_32,
+				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+		size = __RING_SIZE(sring_x86_32, XEN_PAGE_SIZE * nr_grefs);
 		break;
 	}
 	case BLKIF_PROTOCOL_X86_64:
 	{
-		struct blkif_x86_64_sring *sring_x86_64;
-		sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
-		BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
-			       XEN_PAGE_SIZE * nr_grefs);
+		struct blkif_x86_64_sring *sring_x86_64 =
+			(struct blkif_x86_64_sring *)ring->blk_ring;
+
+		BACK_RING_ATTACH(&ring->blk_rings.x86_64, sring_x86_64,
+				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+		size = __RING_SIZE(sring_x86_64, XEN_PAGE_SIZE * nr_grefs);
 		break;
 	}
 	default:
 		BUG();
 	}
 
+	err = -EIO;
+	if (req_prod - rsp_prod > size)
+		goto fail;
+
 	err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
 						    xen_blkif_be_int, 0,
 						    "blkif-backend", ring);
-	if (err < 0) {
-		xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
-		ring->blk_rings.common.sring = NULL;
-		return err;
-	}
+	if (err < 0)
+		goto fail;
 	ring->irq = err;
 
 	return 0;
+
+fail:
+	xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
+	ring->blk_rings.common.sring = NULL;
+	return err;
 }
 
 static int xen_blkif_disconnect(struct xen_blkif *blkif)
@@ -320,6 +348,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
 
 	/* Make sure everything is drained before shutting down */
 	kmem_cache_free(xen_blkif_cachep, blkif);
+	module_put(THIS_MODULE);
 }
 
 int __init xen_blkif_interface_init(void)
@@ -333,6 +362,12 @@ int __init xen_blkif_interface_init(void)
 	return 0;
 }
 
+void xen_blkif_interface_fini(void)
+{
+	kmem_cache_destroy(xen_blkif_cachep);
+	xen_blkif_cachep = NULL;
+}
+
 /*
  *  sysfs interface for VBD I/O requests
  */
@@ -1115,10 +1150,16 @@ static struct xenbus_driver xen_blkbk_driver = {
 	.ids  = xen_blkbk_ids,
 	.probe = xen_blkbk_probe,
 	.remove = xen_blkbk_remove,
-	.otherend_changed = frontend_changed
+	.otherend_changed = frontend_changed,
+	.allow_rebind = true,
 };
 
 int xen_blkif_xenbus_init(void)
 {
 	return xenbus_register_backend(&xen_blkbk_driver);
 }
+
+void xen_blkif_xenbus_fini(void)
+{
+	xenbus_unregister_driver(&xen_blkbk_driver);
+}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a74d03913822..57d50c5ba309 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1113,8 +1113,8 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 	if (!VDEV_IS_EXTENDED(info->vdevice)) {
 		err = xen_translate_vdev(info->vdevice, &minor, &offset);
 		if (err)
-			return err;		
- 		nr_parts = PARTS_PER_DISK;
+			return err;
+		nr_parts = PARTS_PER_DISK;
 	} else {
 		minor = BLKIF_MINOR_EXT(info->vdevice);
 		nr_parts = PARTS_PER_EXT_DISK;
@@ -2632,6 +2632,7 @@ static const struct block_device_operations xlvbd_block_fops =
 	.release = blkif_release,
 	.getgeo = blkif_getgeo,
 	.ioctl = blkif_ioctl,
+	.compat_ioctl = blkdev_compat_ptr_ioctl,
 };
 
 
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index aae665a3a254..f7aa2dc1ff85 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -380,17 +380,6 @@ config BT_ATH3K
 	  Say Y here to compile support for "Atheros firmware download driver"
 	  into the kernel or say M to compile it as module (ath3k).
 
-config BT_WILINK
-	tristate "Texas Instruments WiLink7 driver"
-	depends on TI_ST
-	help
-	  This enables the Bluetooth driver for Texas Instrument's BT/FM/GPS
-	  combo devices. This makes use of shared transport line discipline
-	  core driver to communicate with the BT core of the combo chip.
-
-	  Say Y here to compile support for Texas Instrument's WiLink7 driver
-	  into the kernel or say M to compile it as module (btwilink).
-
 config BT_MTKSDIO
 	tristate "MediaTek HCI SDIO driver"
 	depends on MMC
diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile
index 34887b9b3a85..1a58a3ae142c 100644
--- a/drivers/bluetooth/Makefile
+++ b/drivers/bluetooth/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_BT_INTEL)		+= btintel.o
 obj-$(CONFIG_BT_ATH3K)		+= ath3k.o
 obj-$(CONFIG_BT_MRVL)		+= btmrvl.o
 obj-$(CONFIG_BT_MRVL_SDIO)	+= btmrvl_sdio.o
-obj-$(CONFIG_BT_WILINK)		+= btwilink.o
 obj-$(CONFIG_BT_MTKSDIO)	+= btmtksdio.o
 obj-$(CONFIG_BT_MTKUART)	+= btmtkuart.o
 obj-$(CONFIG_BT_QCOMSMD)	+= btqcomsmd.o
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 2d2e6d862068..1f498f358f60 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -23,6 +23,7 @@
 #define BDADDR_BCM43430A0 (&(bdaddr_t) {{0xac, 0x1f, 0x12, 0xa0, 0x43, 0x43}})
 #define BDADDR_BCM4324B3 (&(bdaddr_t) {{0x00, 0x00, 0x00, 0xb3, 0x24, 0x43}})
 #define BDADDR_BCM4330B1 (&(bdaddr_t) {{0x00, 0x00, 0x00, 0xb1, 0x30, 0x43}})
+#define BDADDR_BCM4334B0 (&(bdaddr_t) {{0x00, 0x00, 0x00, 0xb0, 0x34, 0x43}})
 #define BDADDR_BCM4345C5 (&(bdaddr_t) {{0xac, 0x1f, 0x00, 0xc5, 0x45, 0x43}})
 #define BDADDR_BCM43341B (&(bdaddr_t) {{0xac, 0x1f, 0x00, 0x1b, 0x34, 0x43}})
 
@@ -35,6 +36,7 @@ int btbcm_check_bdaddr(struct hci_dev *hdev)
 			     HCI_INIT_TIMEOUT);
 	if (IS_ERR(skb)) {
 		int err = PTR_ERR(skb);
+
 		bt_dev_err(hdev, "BCM: Reading device address failed (%d)", err);
 		return err;
 	}
@@ -74,6 +76,7 @@ int btbcm_check_bdaddr(struct hci_dev *hdev)
 	    !bacmp(&bda->bdaddr, BDADDR_BCM2076B1) ||
 	    !bacmp(&bda->bdaddr, BDADDR_BCM4324B3) ||
 	    !bacmp(&bda->bdaddr, BDADDR_BCM4330B1) ||
+	    !bacmp(&bda->bdaddr, BDADDR_BCM4334B0) ||
 	    !bacmp(&bda->bdaddr, BDADDR_BCM4345C5) ||
 	    !bacmp(&bda->bdaddr, BDADDR_BCM43430A0) ||
 	    !bacmp(&bda->bdaddr, BDADDR_BCM43341B)) {
@@ -105,6 +108,52 @@ int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
 }
 EXPORT_SYMBOL_GPL(btbcm_set_bdaddr);
 
+int btbcm_read_pcm_int_params(struct hci_dev *hdev,
+			      struct bcm_set_pcm_int_params *params)
+{
+	struct sk_buff *skb;
+	int err = 0;
+
+	skb = __hci_cmd_sync(hdev, 0xfc1d, 0, NULL, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb)) {
+		err = PTR_ERR(skb);
+		bt_dev_err(hdev, "BCM: Read PCM int params failed (%d)", err);
+		return err;
+	}
+
+	if (skb->len != 6 || skb->data[0]) {
+		bt_dev_err(hdev, "BCM: Read PCM int params length mismatch");
+		kfree_skb(skb);
+		return -EIO;
+	}
+
+	if (params)
+		memcpy(params, skb->data + 1, 5);
+
+	kfree_skb(skb);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btbcm_read_pcm_int_params);
+
+int btbcm_write_pcm_int_params(struct hci_dev *hdev,
+			       const struct bcm_set_pcm_int_params *params)
+{
+	struct sk_buff *skb;
+	int err;
+
+	skb = __hci_cmd_sync(hdev, 0xfc1c, 5, params, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb)) {
+		err = PTR_ERR(skb);
+		bt_dev_err(hdev, "BCM: Write PCM int params failed (%d)", err);
+		return err;
+	}
+	kfree_skb(skb);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btbcm_write_pcm_int_params);
+
 int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw)
 {
 	const struct hci_command_hdr *cmd;
@@ -175,6 +224,7 @@ static int btbcm_reset(struct hci_dev *hdev)
 	skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
 	if (IS_ERR(skb)) {
 		int err = PTR_ERR(skb);
+
 		bt_dev_err(hdev, "BCM: Reset failed (%d)", err);
 		return err;
 	}
@@ -326,6 +376,7 @@ struct bcm_subver_table {
 
 static const struct bcm_subver_table bcm_uart_subver_table[] = {
 	{ 0x4103, "BCM4330B1"	},	/* 002.001.003 */
+	{ 0x410d, "BCM4334B0"	},	/* 002.001.013 */
 	{ 0x410e, "BCM43341B0"	},	/* 002.001.014 */
 	{ 0x4204, "BCM2076B1"	},	/* 002.002.004 */
 	{ 0x4406, "BCM4324B3"	},	/* 002.004.006 */
@@ -339,6 +390,7 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = {
 	{ 0x220e, "BCM20702A1"  },	/* 001.002.014 */
 	{ 0x4217, "BCM4329B1"   },	/* 002.002.023 */
 	{ 0x6106, "BCM4359C0"	},	/* 003.001.006 */
+	{ 0x4106, "BCM4335A0"	},	/* 002.001.006 */
 	{ }
 };
 
@@ -440,6 +492,12 @@ int btbcm_finalize(struct hci_dev *hdev)
 
 	set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
 
+	/* Some devices ship with the controller default address.
+	 * Allow the bootloader to set a valid address through the
+	 * device tree.
+	 */
+	set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(btbcm_finalize);
diff --git a/drivers/bluetooth/btbcm.h b/drivers/bluetooth/btbcm.h
index d204be8a84bf..014ef847a486 100644
--- a/drivers/bluetooth/btbcm.h
+++ b/drivers/bluetooth/btbcm.h
@@ -54,6 +54,10 @@ struct bcm_set_pcm_format_params {
 int btbcm_check_bdaddr(struct hci_dev *hdev);
 int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr);
 int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw);
+int btbcm_read_pcm_int_params(struct hci_dev *hdev,
+			      struct bcm_set_pcm_int_params *params);
+int btbcm_write_pcm_int_params(struct hci_dev *hdev,
+			       const struct bcm_set_pcm_int_params *params);
 
 int btbcm_setup_patchram(struct hci_dev *hdev);
 int btbcm_setup_apple(struct hci_dev *hdev);
@@ -74,6 +78,18 @@ static inline int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
 	return -EOPNOTSUPP;
 }
 
+static inline int btbcm_read_pcm_int_params(struct hci_dev *hdev,
+			      struct bcm_set_pcm_int_params *params)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int btbcm_write_pcm_int_params(struct hci_dev *hdev,
+			       const struct bcm_set_pcm_int_params *params)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw)
 {
 	return -EOPNOTSUPP;
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index bb99c8653aab..62e781a18bf0 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -709,6 +709,51 @@ done:
 }
 EXPORT_SYMBOL_GPL(btintel_download_firmware);
 
+void btintel_reset_to_bootloader(struct hci_dev *hdev)
+{
+	struct intel_reset params;
+	struct sk_buff *skb;
+
+	/* Send Intel Reset command. This will result in
+	 * re-enumeration of BT controller.
+	 *
+	 * Intel Reset parameter description:
+	 * reset_type :   0x00 (Soft reset),
+	 *		  0x01 (Hard reset)
+	 * patch_enable : 0x00 (Do not enable),
+	 *		  0x01 (Enable)
+	 * ddc_reload :   0x00 (Do not reload),
+	 *		  0x01 (Reload)
+	 * boot_option:   0x00 (Current image),
+	 *                0x01 (Specified boot address)
+	 * boot_param:    Boot address
+	 *
+	 */
+	params.reset_type = 0x01;
+	params.patch_enable = 0x01;
+	params.ddc_reload = 0x01;
+	params.boot_option = 0x00;
+	params.boot_param = cpu_to_le32(0x00000000);
+
+	skb = __hci_cmd_sync(hdev, 0xfc01, sizeof(params),
+			     &params, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb)) {
+		bt_dev_err(hdev, "FW download error recovery failed (%ld)",
+			   PTR_ERR(skb));
+		return;
+	}
+	bt_dev_info(hdev, "Intel reset sent to retry FW download");
+	kfree_skb(skb);
+
+	/* Current Intel BT controllers(ThP/JfP) hold the USB reset
+	 * lines for 2ms when it receives Intel Reset in bootloader mode.
+	 * Whereas, the upcoming Intel BT controllers will hold USB reset
+	 * for 150ms. To keep the delay generic, 150ms is chosen here.
+	 */
+	msleep(150);
+}
+EXPORT_SYMBOL_GPL(btintel_reset_to_bootloader);
+
 MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth support for Intel devices ver " VERSION);
 MODULE_VERSION(VERSION);
diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h
index 3d846190f2bf..a69ea8a87b9b 100644
--- a/drivers/bluetooth/btintel.h
+++ b/drivers/bluetooth/btintel.h
@@ -87,6 +87,7 @@ int btintel_read_boot_params(struct hci_dev *hdev,
 			     struct intel_boot_params *params);
 int btintel_download_firmware(struct hci_dev *dev, const struct firmware *fw,
 			      u32 *boot_param);
+void btintel_reset_to_bootloader(struct hci_dev *hdev);
 #else
 
 static inline int btintel_check_bdaddr(struct hci_dev *hdev)
@@ -181,4 +182,8 @@ static inline int btintel_download_firmware(struct hci_dev *dev,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline void btintel_reset_to_bootloader(struct hci_dev *hdev)
+{
+}
 #endif
diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c
index 813338288453..519788c442ca 100644
--- a/drivers/bluetooth/btmtksdio.c
+++ b/drivers/bluetooth/btmtksdio.c
@@ -57,6 +57,7 @@ static const struct sdio_device_id btmtksdio_table[] = {
 	 .driver_data = (kernel_ulong_t)&mt7668_data },
 	{ }	/* Terminating entry */
 };
+MODULE_DEVICE_TABLE(sdio, btmtksdio_table);
 
 #define MTK_REG_CHLPCR		0x4	/* W1S */
 #define C_INT_EN_SET		BIT(0)
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index 8cc21ad7cf29..ec69e5dd7bd3 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -14,19 +14,33 @@
 
 #define VERSION "0.1"
 
-int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version)
+int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version,
+			 enum qca_btsoc_type soc_type)
 {
 	struct sk_buff *skb;
 	struct edl_event_hdr *edl;
-	struct rome_version *ver;
+	struct qca_btsoc_version *ver;
 	char cmd;
 	int err = 0;
+	u8 event_type = HCI_EV_VENDOR;
+	u8 rlen = sizeof(*edl) + sizeof(*ver);
+	u8 rtype = EDL_APP_VER_RES_EVT;
 
 	bt_dev_dbg(hdev, "QCA Version Request");
 
+	/* Unlike other SoC's sending version command response as payload to
+	 * VSE event. WCN3991 sends version command response as a payload to
+	 * command complete event.
+	 */
+	if (soc_type == QCA_WCN3991) {
+		event_type = 0;
+		rlen += 1;
+		rtype = EDL_PATCH_VER_REQ_CMD;
+	}
+
 	cmd = EDL_PATCH_VER_REQ_CMD;
 	skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, EDL_PATCH_CMD_LEN,
-				&cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
+				&cmd, event_type, HCI_INIT_TIMEOUT);
 	if (IS_ERR(skb)) {
 		err = PTR_ERR(skb);
 		bt_dev_err(hdev, "Reading QCA version information failed (%d)",
@@ -34,7 +48,7 @@ int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version)
 		return err;
 	}
 
-	if (skb->len != sizeof(*edl) + sizeof(*ver)) {
+	if (skb->len != rlen) {
 		bt_dev_err(hdev, "QCA Version size mismatch len %d", skb->len);
 		err = -EILSEQ;
 		goto out;
@@ -48,18 +62,21 @@ int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version)
 	}
 
 	if (edl->cresp != EDL_CMD_REQ_RES_EVT ||
-	    edl->rtype != EDL_APP_VER_RES_EVT) {
+	    edl->rtype != rtype) {
 		bt_dev_err(hdev, "QCA Wrong packet received %d %d", edl->cresp,
 			   edl->rtype);
 		err = -EIO;
 		goto out;
 	}
 
-	ver = (struct rome_version *)(edl->data);
+	if (soc_type == QCA_WCN3991)
+		memmove(&edl->data, &edl->data[1], sizeof(*ver));
+
+	ver = (struct qca_btsoc_version *)(edl->data);
 
 	BT_DBG("%s: Product:0x%08x", hdev->name, le32_to_cpu(ver->product_id));
 	BT_DBG("%s: Patch  :0x%08x", hdev->name, le16_to_cpu(ver->patch_ver));
-	BT_DBG("%s: ROM    :0x%08x", hdev->name, le16_to_cpu(ver->rome_ver));
+	BT_DBG("%s: ROM    :0x%08x", hdev->name, le16_to_cpu(ver->rom_ver));
 	BT_DBG("%s: SOC    :0x%08x", hdev->name, le32_to_cpu(ver->soc_id));
 
 	/* QCA chipset version can be decided by patch and SoC
@@ -67,7 +84,7 @@ int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version)
 	 * and lower 2 bytes from patch will be used.
 	 */
 	*soc_version = (le32_to_cpu(ver->soc_id) << 16) |
-			(le16_to_cpu(ver->rome_ver) & 0x0000ffff);
+			(le16_to_cpu(ver->rom_ver) & 0x0000ffff);
 	if (*soc_version == 0)
 		err = -EILSEQ;
 
@@ -121,7 +138,7 @@ int qca_send_pre_shutdown_cmd(struct hci_dev *hdev)
 }
 EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd);
 
-static void qca_tlv_check_data(struct rome_config *config,
+static void qca_tlv_check_data(struct qca_fw_config *config,
 				const struct firmware *fw)
 {
 	const u8 *data;
@@ -140,8 +157,8 @@ static void qca_tlv_check_data(struct rome_config *config,
 	BT_DBG("TLV Type\t\t : 0x%x", type_len & 0x000000ff);
 	BT_DBG("Length\t\t : %d bytes", length);
 
-	config->dnld_mode = ROME_SKIP_EVT_NONE;
-	config->dnld_type = ROME_SKIP_EVT_NONE;
+	config->dnld_mode = QCA_SKIP_EVT_NONE;
+	config->dnld_type = QCA_SKIP_EVT_NONE;
 
 	switch (config->type) {
 	case TLV_TYPE_PATCH:
@@ -223,31 +240,45 @@ static void qca_tlv_check_data(struct rome_config *config,
 }
 
 static int qca_tlv_send_segment(struct hci_dev *hdev, int seg_size,
-				 const u8 *data, enum rome_tlv_dnld_mode mode)
+				const u8 *data, enum qca_tlv_dnld_mode mode,
+				enum qca_btsoc_type soc_type)
 {
 	struct sk_buff *skb;
 	struct edl_event_hdr *edl;
 	struct tlv_seg_resp *tlv_resp;
 	u8 cmd[MAX_SIZE_PER_TLV_SEGMENT + 2];
 	int err = 0;
+	u8 event_type = HCI_EV_VENDOR;
+	u8 rlen = (sizeof(*edl) + sizeof(*tlv_resp));
+	u8 rtype = EDL_TVL_DNLD_RES_EVT;
 
 	cmd[0] = EDL_PATCH_TLV_REQ_CMD;
 	cmd[1] = seg_size;
 	memcpy(cmd + 2, data, seg_size);
 
-	if (mode == ROME_SKIP_EVT_VSE_CC || mode == ROME_SKIP_EVT_VSE)
+	if (mode == QCA_SKIP_EVT_VSE_CC || mode == QCA_SKIP_EVT_VSE)
 		return __hci_cmd_send(hdev, EDL_PATCH_CMD_OPCODE, seg_size + 2,
 				      cmd);
 
+	/* Unlike other SoC's sending version command response as payload to
+	 * VSE event. WCN3991 sends version command response as a payload to
+	 * command complete event.
+	 */
+	if (soc_type == QCA_WCN3991) {
+		event_type = 0;
+		rlen = sizeof(*edl);
+		rtype = EDL_PATCH_TLV_REQ_CMD;
+	}
+
 	skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, seg_size + 2, cmd,
-				HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
+				event_type, HCI_INIT_TIMEOUT);
 	if (IS_ERR(skb)) {
 		err = PTR_ERR(skb);
 		bt_dev_err(hdev, "QCA Failed to send TLV segment (%d)", err);
 		return err;
 	}
 
-	if (skb->len != sizeof(*edl) + sizeof(*tlv_resp)) {
+	if (skb->len != rlen) {
 		bt_dev_err(hdev, "QCA TLV response size mismatch");
 		err = -EILSEQ;
 		goto out;
@@ -260,13 +291,19 @@ static int qca_tlv_send_segment(struct hci_dev *hdev, int seg_size,
 		goto out;
 	}
 
-	tlv_resp = (struct tlv_seg_resp *)(edl->data);
+	if (edl->cresp != EDL_CMD_REQ_RES_EVT || edl->rtype != rtype) {
+		bt_dev_err(hdev, "QCA TLV with error stat 0x%x rtype 0x%x",
+			   edl->cresp, edl->rtype);
+		err = -EIO;
+	}
 
-	if (edl->cresp != EDL_CMD_REQ_RES_EVT ||
-	    edl->rtype != EDL_TVL_DNLD_RES_EVT || tlv_resp->result != 0x00) {
+	if (soc_type == QCA_WCN3991)
+		goto out;
+
+	tlv_resp = (struct tlv_seg_resp *)(edl->data);
+	if (tlv_resp->result) {
 		bt_dev_err(hdev, "QCA TLV with error stat 0x%x rtype 0x%x (0x%x)",
 			   edl->cresp, edl->rtype, tlv_resp->result);
-		err = -EIO;
 	}
 
 out:
@@ -301,7 +338,8 @@ static int qca_inject_cmd_complete_event(struct hci_dev *hdev)
 }
 
 static int qca_download_firmware(struct hci_dev *hdev,
-				  struct rome_config *config)
+				 struct qca_fw_config *config,
+				 enum qca_btsoc_type soc_type)
 {
 	const struct firmware *fw;
 	const u8 *segment;
@@ -328,10 +366,10 @@ static int qca_download_firmware(struct hci_dev *hdev,
 		remain -= segsize;
 		/* The last segment is always acked regardless download mode */
 		if (!remain || segsize < MAX_SIZE_PER_TLV_SEGMENT)
-			config->dnld_mode = ROME_SKIP_EVT_NONE;
+			config->dnld_mode = QCA_SKIP_EVT_NONE;
 
 		ret = qca_tlv_send_segment(hdev, segsize, segment,
-					    config->dnld_mode);
+					   config->dnld_mode, soc_type);
 		if (ret)
 			goto out;
 
@@ -344,8 +382,8 @@ static int qca_download_firmware(struct hci_dev *hdev,
 	 * decrease the BT in initialization time. Here we will inject a command
 	 * complete event to avoid a command timeout error message.
 	 */
-	if (config->dnld_type == ROME_SKIP_EVT_VSE_CC ||
-	    config->dnld_type == ROME_SKIP_EVT_VSE)
+	if (config->dnld_type == QCA_SKIP_EVT_VSE_CC ||
+	    config->dnld_type == QCA_SKIP_EVT_VSE)
 		ret = qca_inject_cmd_complete_event(hdev);
 
 out:
@@ -382,7 +420,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 		   enum qca_btsoc_type soc_type, u32 soc_ver,
 		   const char *firmware_name)
 {
-	struct rome_config config;
+	struct qca_fw_config config;
 	int err;
 	u8 rom_ver = 0;
 
@@ -405,7 +443,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 			 "qca/rampatch_%08x.bin", soc_ver);
 	}
 
-	err = qca_download_firmware(hdev, &config);
+	err = qca_download_firmware(hdev, &config, soc_type);
 	if (err < 0) {
 		bt_dev_err(hdev, "QCA Failed to download patch (%d)", err);
 		return err;
@@ -426,7 +464,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/nvm_%08x.bin", soc_ver);
 
-	err = qca_download_firmware(hdev, &config);
+	err = qca_download_firmware(hdev, &config, soc_type);
 	if (err < 0) {
 		bt_dev_err(hdev, "QCA Failed to download NVM (%d)", err);
 		return err;
diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h
index 69c5315a65fd..f5795b1a3779 100644
--- a/drivers/bluetooth/btqca.h
+++ b/drivers/bluetooth/btqca.h
@@ -56,24 +56,24 @@ enum qca_baudrate {
 	QCA_BAUDRATE_RESERVED
 };
 
-enum rome_tlv_dnld_mode {
-	ROME_SKIP_EVT_NONE,
-	ROME_SKIP_EVT_VSE,
-	ROME_SKIP_EVT_CC,
-	ROME_SKIP_EVT_VSE_CC
+enum qca_tlv_dnld_mode {
+	QCA_SKIP_EVT_NONE,
+	QCA_SKIP_EVT_VSE,
+	QCA_SKIP_EVT_CC,
+	QCA_SKIP_EVT_VSE_CC
 };
 
-enum rome_tlv_type {
+enum qca_tlv_type {
 	TLV_TYPE_PATCH = 1,
 	TLV_TYPE_NVM
 };
 
-struct rome_config {
+struct qca_fw_config {
 	u8 type;
 	char fwname[64];
 	uint8_t user_baud_rate;
-	enum rome_tlv_dnld_mode dnld_mode;
-	enum rome_tlv_dnld_mode dnld_type;
+	enum qca_tlv_dnld_mode dnld_mode;
+	enum qca_tlv_dnld_mode dnld_type;
 };
 
 struct edl_event_hdr {
@@ -82,10 +82,10 @@ struct edl_event_hdr {
 	__u8 data[0];
 } __packed;
 
-struct rome_version {
+struct qca_btsoc_version {
 	__le32 product_id;
 	__le16 patch_ver;
-	__le16 rome_ver;
+	__le16 rom_ver;
 	__le32 soc_id;
 } __packed;
 
@@ -125,6 +125,7 @@ enum qca_btsoc_type {
 	QCA_AR3002,
 	QCA_ROME,
 	QCA_WCN3990,
+	QCA_WCN3991,
 	QCA_WCN3998,
 };
 
@@ -134,12 +135,14 @@ int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr);
 int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 		   enum qca_btsoc_type soc_type, u32 soc_ver,
 		   const char *firmware_name);
-int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version);
+int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version,
+			 enum qca_btsoc_type);
 int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr);
 int qca_send_pre_shutdown_cmd(struct hci_dev *hdev);
 static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type)
 {
-	return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3998;
+	return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3991 ||
+	       soc_type == QCA_WCN3998;
 }
 #else
 
@@ -155,7 +158,8 @@ static inline int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 	return -EOPNOTSUPP;
 }
 
-static inline int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version)
+static inline int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version,
+				       enum qca_btsoc_type)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index bf3c02be6930..577cfa3329db 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -370,11 +370,11 @@ static int rtlbt_parse_firmware(struct hci_dev *hdev,
 	 * the end.
 	 */
 	len = patch_length;
-	buf = kmemdup(btrtl_dev->fw_data + patch_offset, patch_length,
-		      GFP_KERNEL);
+	buf = kvmalloc(patch_length, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
+	memcpy(buf, btrtl_dev->fw_data + patch_offset, patch_length - 4);
 	memcpy(buf + patch_length - 4, &epatch_info->fw_version, 4);
 
 	*_buf = buf;
@@ -418,7 +418,7 @@ static int rtl_download_firmware(struct hci_dev *hdev,
 		if (IS_ERR(skb)) {
 			rtl_dev_err(hdev, "download fw command failed (%ld)",
 				    PTR_ERR(skb));
-			ret = -PTR_ERR(skb);
+			ret = PTR_ERR(skb);
 			goto out;
 		}
 
@@ -460,8 +460,10 @@ static int rtl_load_file(struct hci_dev *hdev, const char *name, u8 **buff)
 	if (ret < 0)
 		return ret;
 	ret = fw->size;
-	*buff = kmemdup(fw->data, ret, GFP_KERNEL);
-	if (!*buff)
+	*buff = kvmalloc(fw->size, GFP_KERNEL);
+	if (*buff)
+		memcpy(*buff, fw->data, ret);
+	else
 		ret = -ENOMEM;
 
 	release_firmware(fw);
@@ -499,14 +501,14 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev,
 		goto out;
 
 	if (btrtl_dev->cfg_len > 0) {
-		tbuff = kzalloc(ret + btrtl_dev->cfg_len, GFP_KERNEL);
+		tbuff = kvzalloc(ret + btrtl_dev->cfg_len, GFP_KERNEL);
 		if (!tbuff) {
 			ret = -ENOMEM;
 			goto out;
 		}
 
 		memcpy(tbuff, fw_data, ret);
-		kfree(fw_data);
+		kvfree(fw_data);
 
 		memcpy(tbuff + ret, btrtl_dev->cfg_data, btrtl_dev->cfg_len);
 		ret += btrtl_dev->cfg_len;
@@ -519,14 +521,14 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev,
 	ret = rtl_download_firmware(hdev, fw_data, ret);
 
 out:
-	kfree(fw_data);
+	kvfree(fw_data);
 	return ret;
 }
 
 void btrtl_free(struct btrtl_device_info *btrtl_dev)
 {
-	kfree(btrtl_dev->fw_data);
-	kfree(btrtl_dev->cfg_data);
+	kvfree(btrtl_dev->fw_data);
+	kvfree(btrtl_dev->cfg_data);
 	kfree(btrtl_dev);
 }
 EXPORT_SYMBOL_GPL(btrtl_free);
@@ -778,7 +780,7 @@ int btrtl_get_uart_settings(struct hci_dev *hdev,
 			rtl_dev_dbg(hdev, "skipping config entry 0x%x (len %u)",
 				   le16_to_cpu(entry->offset), entry->len);
 			break;
-		};
+		}
 
 		i += sizeof(*entry) + entry->len;
 	}
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index fd9571d5fdac..199e8f7d426d 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -145,11 +145,20 @@ static int btsdio_rx_packet(struct btsdio_data *data)
 
 	data->hdev->stat.byte_rx += len;
 
-	hci_skb_pkt_type(skb) = hdr[3];
-
-	err = hci_recv_frame(data->hdev, skb);
-	if (err < 0)
-		return err;
+	switch (hdr[3]) {
+	case HCI_EVENT_PKT:
+	case HCI_ACLDATA_PKT:
+	case HCI_SCODATA_PKT:
+	case HCI_ISODATA_PKT:
+		hci_skb_pkt_type(skb) = hdr[3];
+		err = hci_recv_frame(data->hdev, skb);
+		if (err < 0)
+			return err;
+		break;
+	default:
+		kfree_skb(skb);
+		return -EINVAL;
+	}
 
 	sdio_writeb(data->func, 0x00, REG_PC_RRT, NULL);
 
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index a9c35ebb30f8..f5924f3e8b8d 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -266,6 +266,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x04ca, 0x3015), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x04ca, 0x3016), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x04ca, 0x301a), .driver_info = BTUSB_QCA_ROME },
+	{ USB_DEVICE(0x04ca, 0x3021), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x13d3, 0x3491), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x13d3, 0x3496), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x13d3, 0x3501), .driver_info = BTUSB_QCA_ROME },
@@ -552,9 +553,9 @@ static void btusb_rtl_cmd_timeout(struct hci_dev *hdev)
 	}
 
 	bt_dev_err(hdev, "Reset Realtek device via gpio");
-	gpiod_set_value_cansleep(reset_gpio, 0);
-	msleep(200);
 	gpiod_set_value_cansleep(reset_gpio, 1);
+	msleep(200);
+	gpiod_set_value_cansleep(reset_gpio, 0);
 }
 
 static inline void btusb_free_frags(struct btusb_data *data)
@@ -1200,7 +1201,7 @@ static int btusb_open(struct hci_dev *hdev)
 	if (data->setup_on_usb) {
 		err = data->setup_on_usb(hdev);
 		if (err < 0)
-			return err;
+			goto setup_fail;
 	}
 
 	data->intf->needs_remote_wakeup = 1;
@@ -1239,6 +1240,7 @@ done:
 
 failed:
 	clear_bit(BTUSB_INTR_RUNNING, &data->flags);
+setup_fail:
 	usb_autopm_put_interface(data->intf);
 	return err;
 }
@@ -2182,8 +2184,11 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	 * loaded.
 	 */
 	err = btintel_read_version(hdev, &ver);
-	if (err)
+	if (err) {
+		bt_dev_err(hdev, "Intel Read version failed (%d)", err);
+		btintel_reset_to_bootloader(hdev);
 		return err;
+	}
 
 	/* The hardware platform number has a fixed value of 0x37 and
 	 * for now only accept this single value.
@@ -2326,9 +2331,13 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 
 	/* Start firmware downloading and get boot parameter */
 	err = btintel_download_firmware(hdev, fw, &boot_param);
-	if (err < 0)
+	if (err < 0) {
+		/* When FW download fails, send Intel Reset to retry
+		 * FW download.
+		 */
+		btintel_reset_to_bootloader(hdev);
 		goto done;
-
+	}
 	set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
 
 	bt_dev_info(hdev, "Waiting for firmware download to complete");
@@ -2355,6 +2364,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	if (err) {
 		bt_dev_err(hdev, "Firmware loading timeout");
 		err = -ETIMEDOUT;
+		btintel_reset_to_bootloader(hdev);
 		goto done;
 	}
 
@@ -2381,8 +2391,11 @@ done:
 	set_bit(BTUSB_BOOTING, &data->flags);
 
 	err = btintel_send_intel_reset(hdev, boot_param);
-	if (err)
+	if (err) {
+		bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
+		btintel_reset_to_bootloader(hdev);
 		return err;
+	}
 
 	/* The bootloader will not indicate when the device is ready. This
 	 * is done by the operational firmware sending bootup notification.
@@ -2404,6 +2417,7 @@ done:
 
 	if (err) {
 		bt_dev_err(hdev, "Device boot timeout");
+		btintel_reset_to_bootloader(hdev);
 		return -ETIMEDOUT;
 	}
 
@@ -2432,6 +2446,13 @@ done:
 	 */
 	btintel_set_event_mask(hdev, false);
 
+	/* Read the Intel version information after loading the FW  */
+	err = btintel_read_version(hdev, &ver);
+	if (err)
+		return err;
+
+	btintel_version_info(hdev, &ver);
+
 	return 0;
 }
 
@@ -2489,8 +2510,6 @@ static int btusb_shutdown_intel_new(struct hci_dev *hdev)
 	return 0;
 }
 
-#ifdef CONFIG_BT_HCIBTUSB_MTK
-
 #define FIRMWARE_MT7663		"mediatek/mt7663pr2h.bin"
 #define FIRMWARE_MT7668		"mediatek/mt7668pr2h.bin"
 
@@ -2584,7 +2603,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb)
 		 * and being processed the events from there then.
 		 */
 		if (test_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags)) {
-			data->evt_skb = skb_clone(skb, GFP_KERNEL);
+			data->evt_skb = skb_clone(skb, GFP_ATOMIC);
 			if (!data->evt_skb)
 				goto err_out;
 		}
@@ -2849,7 +2868,7 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname)
 	err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params);
 	if (err < 0) {
 		bt_dev_err(hdev, "Failed to send wmt rst (%d)", err);
-		return err;
+		goto err_release_fw;
 	}
 
 	/* Wait a few moments for firmware activation done */
@@ -3051,7 +3070,6 @@ static int btusb_mtk_shutdown(struct hci_dev *hdev)
 
 MODULE_FIRMWARE(FIRMWARE_MT7663);
 MODULE_FIRMWARE(FIRMWARE_MT7668);
-#endif
 
 #ifdef CONFIG_PM
 /* Configure an out-of-band gpio as wake-up pin, if specified in device tree */
@@ -3411,7 +3429,6 @@ static int btusb_setup_qca(struct hci_dev *hdev)
 	return 0;
 }
 
-#ifdef CONFIG_BT_HCIBTUSB_BCM
 static inline int __set_diag_interface(struct hci_dev *hdev)
 {
 	struct btusb_data *data = hci_get_drvdata(hdev);
@@ -3498,7 +3515,6 @@ static int btusb_bcm_set_diag(struct hci_dev *hdev, bool enable)
 
 	return submit_or_queue_tx_urb(hdev, urb);
 }
-#endif
 
 #ifdef CONFIG_PM
 static irqreturn_t btusb_oob_wake_handler(int irq, void *priv)
@@ -3724,8 +3740,8 @@ static int btusb_probe(struct usb_interface *intf,
 	if (id->driver_info & BTUSB_BCM92035)
 		hdev->setup = btusb_setup_bcm92035;
 
-#ifdef CONFIG_BT_HCIBTUSB_BCM
-	if (id->driver_info & BTUSB_BCM_PATCHRAM) {
+	if (IS_ENABLED(CONFIG_BT_HCIBTUSB_BCM) &&
+	    (id->driver_info & BTUSB_BCM_PATCHRAM)) {
 		hdev->manufacturer = 15;
 		hdev->setup = btbcm_setup_patchram;
 		hdev->set_diag = btusb_bcm_set_diag;
@@ -3735,7 +3751,8 @@ static int btusb_probe(struct usb_interface *intf,
 		data->diag = usb_ifnum_to_if(data->udev, ifnum_base + 2);
 	}
 
-	if (id->driver_info & BTUSB_BCM_APPLE) {
+	if (IS_ENABLED(CONFIG_BT_HCIBTUSB_BCM) &&
+	    (id->driver_info & BTUSB_BCM_APPLE)) {
 		hdev->manufacturer = 15;
 		hdev->setup = btbcm_setup_apple;
 		hdev->set_diag = btusb_bcm_set_diag;
@@ -3743,7 +3760,6 @@ static int btusb_probe(struct usb_interface *intf,
 		/* Broadcom LM_DIAG Interface numbers are hardcoded */
 		data->diag = usb_ifnum_to_if(data->udev, ifnum_base + 2);
 	}
-#endif
 
 	if (id->driver_info & BTUSB_INTEL) {
 		hdev->manufacturer = 2;
@@ -3774,14 +3790,13 @@ static int btusb_probe(struct usb_interface *intf,
 	if (id->driver_info & BTUSB_MARVELL)
 		hdev->set_bdaddr = btusb_set_bdaddr_marvell;
 
-#ifdef CONFIG_BT_HCIBTUSB_MTK
-	if (id->driver_info & BTUSB_MEDIATEK) {
+	if (IS_ENABLED(CONFIG_BT_HCIBTUSB_MTK) &&
+	    (id->driver_info & BTUSB_MEDIATEK)) {
 		hdev->setup = btusb_mtk_setup;
 		hdev->shutdown = btusb_mtk_shutdown;
 		hdev->manufacturer = 70;
 		set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
 	}
-#endif
 
 	if (id->driver_info & BTUSB_SWAVE) {
 		set_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks);
@@ -3807,8 +3822,8 @@ static int btusb_probe(struct usb_interface *intf,
 		btusb_check_needs_reset_resume(intf);
 	}
 
-#ifdef CONFIG_BT_HCIBTUSB_RTL
-	if (id->driver_info & BTUSB_REALTEK) {
+	if (IS_ENABLED(CONFIG_BT_HCIBTUSB_RTL) &&
+	    (id->driver_info & BTUSB_REALTEK)) {
 		hdev->setup = btrtl_setup_realtek;
 		hdev->shutdown = btrtl_shutdown_realtek;
 		hdev->cmd_timeout = btusb_rtl_cmd_timeout;
@@ -3818,8 +3833,11 @@ static int btusb_probe(struct usb_interface *intf,
 		 * (DEVICE_REMOTE_WAKEUP)
 		 */
 		set_bit(BTUSB_WAKEUP_DISABLE, &data->flags);
+
+		err = usb_autopm_get_interface(intf);
+		if (err < 0)
+			goto out_free_dev;
 	}
-#endif
 
 	if (id->driver_info & BTUSB_AMP) {
 		/* AMP controllers do not support SCO packets */
@@ -3887,15 +3905,13 @@ static int btusb_probe(struct usb_interface *intf,
 			goto out_free_dev;
 	}
 
-#ifdef CONFIG_BT_HCIBTUSB_BCM
-	if (data->diag) {
+	if (IS_ENABLED(CONFIG_BT_HCIBTUSB_BCM) && data->diag) {
 		if (!usb_driver_claim_interface(&btusb_driver,
 						data->diag, data))
 			__set_diag_interface(hdev);
 		else
 			data->diag = NULL;
 	}
-#endif
 
 	if (enable_autosuspend)
 		usb_enable_autosuspend(data->udev);
diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c
deleted file mode 100644
index e55f06e4270f..000000000000
--- a/drivers/bluetooth/btwilink.c
+++ /dev/null
@@ -1,337 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Texas Instrument's Bluetooth Driver For Shared Transport.
- *
- *  Bluetooth Driver acts as interface between HCI core and
- *  TI Shared Transport Layer.
- *
- *  Copyright (C) 2009-2010 Texas Instruments
- *  Author: Raja Mani <raja_mani@ti.com>
- *	Pavan Savoy <pavan_savoy@ti.com>
- */
-
-#include <linux/platform_device.h>
-#include <net/bluetooth/bluetooth.h>
-#include <net/bluetooth/hci_core.h>
-#include <net/bluetooth/hci.h>
-
-#include <linux/ti_wilink_st.h>
-#include <linux/module.h>
-
-/* Bluetooth Driver Version */
-#define VERSION               "1.0"
-#define MAX_BT_CHNL_IDS		3
-
-/* Number of seconds to wait for registration completion
- * when ST returns PENDING status.
- */
-#define BT_REGISTER_TIMEOUT   6000	/* 6 sec */
-
-/**
- * struct ti_st - driver operation structure
- * @hdev: hci device pointer which binds to bt driver
- * @reg_status: ST registration callback status
- * @st_write: write function provided by the ST driver
- *	to be used by the driver during send_frame.
- * @wait_reg_completion - completion sync between ti_st_open
- *	and st_reg_completion_cb.
- */
-struct ti_st {
-	struct hci_dev *hdev;
-	int reg_status;
-	long (*st_write) (struct sk_buff *);
-	struct completion wait_reg_completion;
-};
-
-/* Increments HCI counters based on pocket ID (cmd,acl,sco) */
-static inline void ti_st_tx_complete(struct ti_st *hst, int pkt_type)
-{
-	struct hci_dev *hdev = hst->hdev;
-
-	/* Update HCI stat counters */
-	switch (pkt_type) {
-	case HCI_COMMAND_PKT:
-		hdev->stat.cmd_tx++;
-		break;
-
-	case HCI_ACLDATA_PKT:
-		hdev->stat.acl_tx++;
-		break;
-
-	case HCI_SCODATA_PKT:
-		hdev->stat.sco_tx++;
-		break;
-	}
-}
-
-/* ------- Interfaces to Shared Transport ------ */
-
-/* Called by ST layer to indicate protocol registration completion
- * status.ti_st_open() function will wait for signal from this
- * API when st_register() function returns ST_PENDING.
- */
-static void st_reg_completion_cb(void *priv_data, int data)
-{
-	struct ti_st *lhst = priv_data;
-
-	/* Save registration status for use in ti_st_open() */
-	lhst->reg_status = data;
-	/* complete the wait in ti_st_open() */
-	complete(&lhst->wait_reg_completion);
-}
-
-/* Called by Shared Transport layer when receive data is available */
-static long st_receive(void *priv_data, struct sk_buff *skb)
-{
-	struct ti_st *lhst = priv_data;
-	int err;
-
-	if (!skb)
-		return -EFAULT;
-
-	if (!lhst) {
-		kfree_skb(skb);
-		return -EFAULT;
-	}
-
-	/* Forward skb to HCI core layer */
-	err = hci_recv_frame(lhst->hdev, skb);
-	if (err < 0) {
-		BT_ERR("Unable to push skb to HCI core(%d)", err);
-		return err;
-	}
-
-	lhst->hdev->stat.byte_rx += skb->len;
-
-	return 0;
-}
-
-/* ------- Interfaces to HCI layer ------ */
-/* protocol structure registered with shared transport */
-static struct st_proto_s ti_st_proto[MAX_BT_CHNL_IDS] = {
-	{
-		.chnl_id = HCI_EVENT_PKT, /* HCI Events */
-		.hdr_len = sizeof(struct hci_event_hdr),
-		.offset_len_in_hdr = offsetof(struct hci_event_hdr, plen),
-		.len_size = 1, /* sizeof(plen) in struct hci_event_hdr */
-		.reserve = 8,
-	},
-	{
-		.chnl_id = HCI_ACLDATA_PKT, /* ACL */
-		.hdr_len = sizeof(struct hci_acl_hdr),
-		.offset_len_in_hdr = offsetof(struct hci_acl_hdr, dlen),
-		.len_size = 2,	/* sizeof(dlen) in struct hci_acl_hdr */
-		.reserve = 8,
-	},
-	{
-		.chnl_id = HCI_SCODATA_PKT, /* SCO */
-		.hdr_len = sizeof(struct hci_sco_hdr),
-		.offset_len_in_hdr = offsetof(struct hci_sco_hdr, dlen),
-		.len_size = 1, /* sizeof(dlen) in struct hci_sco_hdr */
-		.reserve = 8,
-	},
-};
-
-/* Called from HCI core to initialize the device */
-static int ti_st_open(struct hci_dev *hdev)
-{
-	unsigned long timeleft;
-	struct ti_st *hst;
-	int err, i;
-
-	BT_DBG("%s %p", hdev->name, hdev);
-
-	/* provide contexts for callbacks from ST */
-	hst = hci_get_drvdata(hdev);
-
-	for (i = 0; i < MAX_BT_CHNL_IDS; i++) {
-		ti_st_proto[i].priv_data = hst;
-		ti_st_proto[i].max_frame_size = HCI_MAX_FRAME_SIZE;
-		ti_st_proto[i].recv = st_receive;
-		ti_st_proto[i].reg_complete_cb = st_reg_completion_cb;
-
-		/* Prepare wait-for-completion handler */
-		init_completion(&hst->wait_reg_completion);
-		/* Reset ST registration callback status flag,
-		 * this value will be updated in
-		 * st_reg_completion_cb()
-		 * function whenever it called from ST driver.
-		 */
-		hst->reg_status = -EINPROGRESS;
-
-		err = st_register(&ti_st_proto[i]);
-		if (!err)
-			goto done;
-
-		if (err != -EINPROGRESS) {
-			BT_ERR("st_register failed %d", err);
-			return err;
-		}
-
-		/* ST is busy with either protocol
-		 * registration or firmware download.
-		 */
-		BT_DBG("waiting for registration "
-				"completion signal from ST");
-		timeleft = wait_for_completion_timeout
-			(&hst->wait_reg_completion,
-			 msecs_to_jiffies(BT_REGISTER_TIMEOUT));
-		if (!timeleft) {
-			BT_ERR("Timeout(%d sec),didn't get reg "
-					"completion signal from ST",
-					BT_REGISTER_TIMEOUT / 1000);
-			return -ETIMEDOUT;
-		}
-
-		/* Is ST registration callback
-		 * called with ERROR status?
-		 */
-		if (hst->reg_status != 0) {
-			BT_ERR("ST registration completed with invalid "
-					"status %d", hst->reg_status);
-			return -EAGAIN;
-		}
-
-done:
-		hst->st_write = ti_st_proto[i].write;
-		if (!hst->st_write) {
-			BT_ERR("undefined ST write function");
-			for (i = 0; i < MAX_BT_CHNL_IDS; i++) {
-				/* Undo registration with ST */
-				err = st_unregister(&ti_st_proto[i]);
-				if (err)
-					BT_ERR("st_unregister() failed with "
-							"error %d", err);
-				hst->st_write = NULL;
-			}
-			return -EIO;
-		}
-	}
-	return 0;
-}
-
-/* Close device */
-static int ti_st_close(struct hci_dev *hdev)
-{
-	int err, i;
-	struct ti_st *hst = hci_get_drvdata(hdev);
-
-	for (i = MAX_BT_CHNL_IDS-1; i >= 0; i--) {
-		err = st_unregister(&ti_st_proto[i]);
-		if (err)
-			BT_ERR("st_unregister(%d) failed with error %d",
-					ti_st_proto[i].chnl_id, err);
-	}
-
-	hst->st_write = NULL;
-
-	return err;
-}
-
-static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
-{
-	struct ti_st *hst;
-	long len;
-	int pkt_type;
-
-	hst = hci_get_drvdata(hdev);
-
-	/* Prepend skb with frame type */
-	memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1);
-
-	BT_DBG("%s: type %d len %d", hdev->name, hci_skb_pkt_type(skb),
-	       skb->len);
-
-	/* Insert skb to shared transport layer's transmit queue.
-	 * Freeing skb memory is taken care in shared transport layer,
-	 * so don't free skb memory here.
-	 */
-	pkt_type = hci_skb_pkt_type(skb);
-	len = hst->st_write(skb);
-	if (len < 0) {
-		BT_ERR("ST write failed (%ld)", len);
-		/* Try Again, would only fail if UART has gone bad */
-		return -EAGAIN;
-	}
-
-	/* ST accepted our skb. So, Go ahead and do rest */
-	hdev->stat.byte_tx += len;
-	ti_st_tx_complete(hst, pkt_type);
-
-	return 0;
-}
-
-static int bt_ti_probe(struct platform_device *pdev)
-{
-	struct ti_st *hst;
-	struct hci_dev *hdev;
-	int err;
-
-	hst = devm_kzalloc(&pdev->dev, sizeof(struct ti_st), GFP_KERNEL);
-	if (!hst)
-		return -ENOMEM;
-
-	/* Expose "hciX" device to user space */
-	hdev = hci_alloc_dev();
-	if (!hdev)
-		return -ENOMEM;
-
-	BT_DBG("hdev %p", hdev);
-
-	hst->hdev = hdev;
-	hdev->bus = HCI_UART;
-	hci_set_drvdata(hdev, hst);
-	hdev->open = ti_st_open;
-	hdev->close = ti_st_close;
-	hdev->flush = NULL;
-	hdev->send = ti_st_send_frame;
-
-	err = hci_register_dev(hdev);
-	if (err < 0) {
-		BT_ERR("Can't register HCI device error %d", err);
-		hci_free_dev(hdev);
-		return err;
-	}
-
-	BT_DBG("HCI device registered (hdev %p)", hdev);
-
-	dev_set_drvdata(&pdev->dev, hst);
-	return 0;
-}
-
-static int bt_ti_remove(struct platform_device *pdev)
-{
-	struct hci_dev *hdev;
-	struct ti_st *hst = dev_get_drvdata(&pdev->dev);
-
-	if (!hst)
-		return -EFAULT;
-
-	BT_DBG("%s", hst->hdev->name);
-
-	hdev = hst->hdev;
-	ti_st_close(hdev);
-	hci_unregister_dev(hdev);
-
-	hci_free_dev(hdev);
-
-	dev_set_drvdata(&pdev->dev, NULL);
-	return 0;
-}
-
-static struct platform_driver btwilink_driver = {
-	.probe = bt_ti_probe,
-	.remove = bt_ti_remove,
-	.driver = {
-		.name = "btwilink",
-	},
-};
-
-module_platform_driver(btwilink_driver);
-
-/* ------ Module Info ------ */
-
-MODULE_AUTHOR("Raja Mani <raja_mani@ti.com>");
-MODULE_DESCRIPTION("Bluetooth Driver for TI Shared Transport" VERSION);
-MODULE_VERSION(VERSION);
-MODULE_LICENSE("GPL");
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 7646636f2d18..b236cb11c0dc 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/acpi.h>
 #include <linux/of.h>
+#include <linux/of_irq.h>
 #include <linux/property.h>
 #include <linux/platform_data/x86/apple.h>
 #include <linux/platform_device.h>
@@ -48,6 +49,15 @@
 #define BCM_NUM_SUPPLIES 2
 
 /**
+ * struct bcm_device_data - device specific data
+ * @no_early_set_baudrate: Disallow set baudrate before driver setup()
+ */
+struct bcm_device_data {
+	bool	no_early_set_baudrate;
+	bool	drive_rts_on_open;
+};
+
+/**
  * struct bcm_device - device driver resources
  * @serdev_hu: HCI UART controller struct
  * @list: bcm_device_list node
@@ -79,6 +89,7 @@
  * @hu: pointer to HCI UART controller struct,
  *	used to disable flow control during runtime suspend and system sleep
  * @is_suspended: whether flow control is currently disabled
+ * @no_early_set_baudrate: don't set_baudrate before setup()
  */
 struct bcm_device {
 	/* Must be the first member, hci_serdev.c expects this. */
@@ -112,6 +123,9 @@ struct bcm_device {
 	struct hci_uart		*hu;
 	bool			is_suspended;
 #endif
+	bool			no_early_set_baudrate;
+	bool			drive_rts_on_open;
+	u8			pcm_int_params[5];
 };
 
 /* generic bcm uart resources */
@@ -445,9 +459,22 @@ static int bcm_open(struct hci_uart *hu)
 
 out:
 	if (bcm->dev) {
+		if (bcm->dev->drive_rts_on_open)
+			hci_uart_set_flow_control(hu, true);
+
 		hu->init_speed = bcm->dev->init_speed;
-		hu->oper_speed = bcm->dev->oper_speed;
+
+		/* If oper_speed is set, ldisc/serdev will set the baudrate
+		 * before calling setup()
+		 */
+		if (!bcm->dev->no_early_set_baudrate)
+			hu->oper_speed = bcm->dev->oper_speed;
+
 		err = bcm_gpio_set_power(bcm->dev, true);
+
+		if (bcm->dev->drive_rts_on_open)
+			hci_uart_set_flow_control(hu, false);
+
 		if (err)
 			goto err_unset_hu;
 	}
@@ -563,6 +590,8 @@ static int bcm_setup(struct hci_uart *hu)
 	/* Operational speed if any */
 	if (hu->oper_speed)
 		speed = hu->oper_speed;
+	else if (bcm->dev && bcm->dev->oper_speed)
+		speed = bcm->dev->oper_speed;
 	else if (hu->proto->oper_speed)
 		speed = hu->proto->oper_speed;
 	else
@@ -574,6 +603,16 @@ static int bcm_setup(struct hci_uart *hu)
 			host_set_baudrate(hu, speed);
 	}
 
+	/* PCM parameters if provided */
+	if (bcm->dev && bcm->dev->pcm_int_params[0] != 0xff) {
+		struct bcm_set_pcm_int_params params;
+
+		btbcm_read_pcm_int_params(hu->hdev, &params);
+
+		memcpy(&params, bcm->dev->pcm_int_params, 5);
+		btbcm_write_pcm_int_params(hu->hdev, &params);
+	}
+
 finalize:
 	release_firmware(fw);
 
@@ -1111,6 +1150,10 @@ static int bcm_acpi_probe(struct bcm_device *dev)
 static int bcm_of_probe(struct bcm_device *bdev)
 {
 	device_property_read_u32(bdev->dev, "max-speed", &bdev->oper_speed);
+	device_property_read_u8_array(bdev->dev, "brcm,bt-pcm-int-params",
+				      bdev->pcm_int_params, 5);
+	bdev->irq = of_irq_get_byname(bdev->dev->of_node, "host-wakeup");
+
 	return 0;
 }
 
@@ -1126,6 +1169,9 @@ static int bcm_probe(struct platform_device *pdev)
 	dev->dev = &pdev->dev;
 	dev->irq = platform_get_irq(pdev, 0);
 
+	/* Initialize routing field to an unused value */
+	dev->pcm_int_params[0] = 0xff;
+
 	if (has_acpi_companion(&pdev->dev)) {
 		ret = bcm_acpi_probe(dev);
 		if (ret)
@@ -1372,6 +1418,7 @@ static struct platform_driver bcm_driver = {
 static int bcm_serdev_probe(struct serdev_device *serdev)
 {
 	struct bcm_device *bcmdev;
+	const struct bcm_device_data *data;
 	int err;
 
 	bcmdev = devm_kzalloc(&serdev->dev, sizeof(*bcmdev), GFP_KERNEL);
@@ -1385,6 +1432,9 @@ static int bcm_serdev_probe(struct serdev_device *serdev)
 	bcmdev->serdev_hu.serdev = serdev;
 	serdev_device_set_drvdata(serdev, bcmdev);
 
+	/* Initialize routing field to an unused value */
+	bcmdev->pcm_int_params[0] = 0xff;
+
 	if (has_acpi_companion(&serdev->dev))
 		err = bcm_acpi_probe(bcmdev);
 	else
@@ -1406,6 +1456,12 @@ static int bcm_serdev_probe(struct serdev_device *serdev)
 	if (err)
 		dev_err(&serdev->dev, "Failed to power down\n");
 
+	data = device_get_match_data(bcmdev->dev);
+	if (data) {
+		bcmdev->no_early_set_baudrate = data->no_early_set_baudrate;
+		bcmdev->drive_rts_on_open = data->drive_rts_on_open;
+	}
+
 	return hci_uart_register_device(&bcmdev->serdev_hu, &bcm_proto);
 }
 
@@ -1417,11 +1473,22 @@ static void bcm_serdev_remove(struct serdev_device *serdev)
 }
 
 #ifdef CONFIG_OF
+static struct bcm_device_data bcm4354_device_data = {
+	.no_early_set_baudrate = true,
+};
+
+static struct bcm_device_data bcm43438_device_data = {
+	.drive_rts_on_open = true,
+};
+
 static const struct of_device_id bcm_bluetooth_of_match[] = {
 	{ .compatible = "brcm,bcm20702a1" },
+	{ .compatible = "brcm,bcm4329-bt" },
 	{ .compatible = "brcm,bcm4345c5" },
 	{ .compatible = "brcm,bcm4330-bt" },
-	{ .compatible = "brcm,bcm43438-bt" },
+	{ .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data },
+	{ .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data },
+	{ .compatible = "brcm,bcm4335a0" },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match);
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
index fe2e307009f4..cf4a56095817 100644
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -591,6 +591,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count)
 			if (*ptr == 0xc0) {
 				BT_ERR("Short BCSP packet");
 				kfree_skb(bcsp->rx_skb);
+				bcsp->rx_skb = NULL;
 				bcsp->rx_state = BCSP_W4_PKT_START;
 				bcsp->rx_count = 0;
 			} else
@@ -606,6 +607,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count)
 			    bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) {
 				BT_ERR("Error in BCSP hdr checksum");
 				kfree_skb(bcsp->rx_skb);
+				bcsp->rx_skb = NULL;
 				bcsp->rx_state = BCSP_W4_PKT_DELIMITER;
 				bcsp->rx_count = 0;
 				continue;
@@ -630,6 +632,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count)
 				       bscp_get_crc(bcsp));
 
 				kfree_skb(bcsp->rx_skb);
+				bcsp->rx_skb = NULL;
 				bcsp->rx_state = BCSP_W4_PKT_DELIMITER;
 				bcsp->rx_count = 0;
 				continue;
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index 19ba52005009..6dc1fbeb564b 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -103,6 +103,7 @@ static const struct h4_recv_pkt h4_recv_pkts[] = {
 	{ H4_RECV_ACL,   .recv = hci_recv_frame },
 	{ H4_RECV_SCO,   .recv = hci_recv_frame },
 	{ H4_RECV_EVENT, .recv = hci_recv_frame },
+	{ H4_RECV_ISO,   .recv = hci_recv_frame },
 };
 
 /* Recv data */
diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index dacf297baf59..0b14547482a7 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -385,6 +385,7 @@ static void h5_complete_rx_pkt(struct hci_uart *hu)
 	case HCI_EVENT_PKT:
 	case HCI_ACLDATA_PKT:
 	case HCI_SCODATA_PKT:
+	case HCI_ISODATA_PKT:
 		hci_skb_pkt_type(h5->rx_skb) = H5_HDR_PKT_TYPE(hdr);
 
 		/* Remove Three-wire header */
@@ -594,6 +595,7 @@ static int h5_enqueue(struct hci_uart *hu, struct sk_buff *skb)
 		break;
 
 	case HCI_SCODATA_PKT:
+	case HCI_ISODATA_PKT:
 		skb_queue_tail(&h5->unrel, skb);
 		break;
 
@@ -636,6 +638,7 @@ static bool valid_packet_type(u8 type)
 	case HCI_ACLDATA_PKT:
 	case HCI_COMMAND_PKT:
 	case HCI_SCODATA_PKT:
+	case HCI_ISODATA_PKT:
 	case HCI_3WIRE_LINK_PKT:
 	case HCI_3WIRE_ACK_PKT:
 		return true;
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 285706618f8a..d9a4c6c691e0 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -621,13 +621,6 @@ static int ll_setup(struct hci_uart *hu)
 
 	serdev_device_set_flow_control(serdev, true);
 
-	if (hu->oper_speed)
-		speed = hu->oper_speed;
-	else if (hu->proto->oper_speed)
-		speed = hu->proto->oper_speed;
-	else
-		speed = 0;
-
 	do {
 		/* Reset the Bluetooth device */
 		gpiod_set_value_cansleep(lldev->enable_gpio, 0);
@@ -639,20 +632,6 @@ static int ll_setup(struct hci_uart *hu)
 			return err;
 		}
 
-		if (speed) {
-			__le32 speed_le = cpu_to_le32(speed);
-			struct sk_buff *skb;
-
-			skb = __hci_cmd_sync(hu->hdev,
-					     HCI_VS_UPDATE_UART_HCI_BAUDRATE,
-					     sizeof(speed_le), &speed_le,
-					     HCI_INIT_TIMEOUT);
-			if (!IS_ERR(skb)) {
-				kfree_skb(skb);
-				serdev_device_set_baudrate(serdev, speed);
-			}
-		}
-
 		err = download_firmware(lldev);
 		if (!err)
 			break;
@@ -677,7 +656,25 @@ static int ll_setup(struct hci_uart *hu)
 	}
 
 	/* Operational speed if any */
+	if (hu->oper_speed)
+		speed = hu->oper_speed;
+	else if (hu->proto->oper_speed)
+		speed = hu->proto->oper_speed;
+	else
+		speed = 0;
+
+	if (speed) {
+		__le32 speed_le = cpu_to_le32(speed);
+		struct sk_buff *skb;
 
+		skb = __hci_cmd_sync(hu->hdev, HCI_VS_UPDATE_UART_HCI_BAUDRATE,
+				     sizeof(speed_le), &speed_le,
+				     HCI_INIT_TIMEOUT);
+		if (!IS_ERR(skb)) {
+			kfree_skb(skb);
+			serdev_device_set_baudrate(serdev, speed);
+		}
+	}
 
 	return 0;
 }
diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c
index 6463350b7977..05f7f6de6863 100644
--- a/drivers/bluetooth/hci_nokia.c
+++ b/drivers/bluetooth/hci_nokia.c
@@ -520,7 +520,7 @@ static int nokia_enqueue(struct hci_uart *hu, struct sk_buff *skb)
 		err = skb_pad(skb, 1);
 		if (err)
 			return err;
-		skb_put_u8(skb, 0x00);
+		skb_put(skb, 1);
 	}
 
 	skb_queue_tail(&btdev->txq, skb);
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index e3164c200eac..d6e0c99ee5eb 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -20,6 +20,7 @@
 #include <linux/completion.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
+#include <linux/devcoredump.h>
 #include <linux/device.h>
 #include <linux/gpio/consumer.h>
 #include <linux/mod_devicetable.h>
@@ -43,8 +44,10 @@
 #define HCI_MAX_IBS_SIZE	10
 
 #define IBS_WAKE_RETRANS_TIMEOUT_MS	100
-#define IBS_TX_IDLE_TIMEOUT_MS		2000
+#define IBS_BTSOC_TX_IDLE_TIMEOUT_MS	40
+#define IBS_HOST_TX_IDLE_TIMEOUT_MS	2000
 #define CMD_TRANS_TIMEOUT_MS		100
+#define MEMDUMP_TIMEOUT_MS		8000
 
 /* susclk rate */
 #define SUSCLK_RATE_32KHZ	32768
@@ -52,11 +55,24 @@
 /* Controller debug log header */
 #define QCA_DEBUG_HANDLE	0x2EDC
 
+/* max retry count when init fails */
+#define MAX_INIT_RETRIES 3
+
+/* Controller dump header */
+#define QCA_SSR_DUMP_HANDLE		0x0108
+#define QCA_DUMP_PACKET_SIZE		255
+#define QCA_LAST_SEQUENCE_NUM		0xFFFF
+#define QCA_CRASHBYTE_PACKET_LEN	1096
+#define QCA_MEMDUMP_BYTE		0xFB
+
 enum qca_flags {
 	QCA_IBS_ENABLED,
 	QCA_DROP_VENDOR_EVENT,
+	QCA_SUSPENDING,
+	QCA_MEMDUMP_COLLECTION
 };
 
+
 /* HCI_IBS transmit side sleep protocol states */
 enum tx_ibs_states {
 	HCI_IBS_TX_ASLEEP,
@@ -79,11 +95,40 @@ enum hci_ibs_clock_state_vote {
 	HCI_IBS_RX_VOTE_CLOCK_OFF,
 };
 
+/* Controller memory dump states */
+enum qca_memdump_states {
+	QCA_MEMDUMP_IDLE,
+	QCA_MEMDUMP_COLLECTING,
+	QCA_MEMDUMP_COLLECTED,
+	QCA_MEMDUMP_TIMEOUT,
+};
+
+struct qca_memdump_data {
+	char *memdump_buf_head;
+	char *memdump_buf_tail;
+	u32 current_seq_no;
+	u32 received_dump;
+};
+
+struct qca_memdump_event_hdr {
+	__u8    evt;
+	__u8    plen;
+	__u16   opcode;
+	__u16   seq_no;
+	__u8    reserved;
+} __packed;
+
+
+struct qca_dump_size {
+	u32 dump_size;
+} __packed;
+
 struct qca_data {
 	struct hci_uart *hu;
 	struct sk_buff *rx_skb;
 	struct sk_buff_head txq;
 	struct sk_buff_head tx_wait_q;	/* HCI_IBS wait queue	*/
+	struct sk_buff_head rx_memdump_q;	/* Memdump wait queue	*/
 	spinlock_t hci_ibs_lock;	/* HCI_IBS state lock	*/
 	u8 tx_ibs_state;	/* HCI_IBS transmit side power state*/
 	u8 rx_ibs_state;	/* HCI_IBS receive side power state */
@@ -93,13 +138,18 @@ struct qca_data {
 	u32 tx_idle_delay;
 	struct timer_list wake_retrans_timer;
 	u32 wake_retrans;
+	struct timer_list memdump_timer;
 	struct workqueue_struct *workqueue;
 	struct work_struct ws_awake_rx;
 	struct work_struct ws_awake_device;
 	struct work_struct ws_rx_vote_off;
 	struct work_struct ws_tx_vote_off;
+	struct work_struct ctrl_memdump_evt;
+	struct qca_memdump_data *qca_memdump;
 	unsigned long flags;
 	struct completion drop_ev_comp;
+	wait_queue_head_t suspend_wait_q;
+	enum qca_memdump_states memdump_state;
 
 	/* For debugging purpose */
 	u64 ibs_sent_wacks;
@@ -130,8 +180,6 @@ enum qca_speed_type {
  */
 struct qca_vreg {
 	const char *name;
-	unsigned int min_uV;
-	unsigned int max_uV;
 	unsigned int load_uA;
 };
 
@@ -146,8 +194,8 @@ struct qca_vreg_data {
  */
 struct qca_power {
 	struct device *dev;
-	const struct qca_vreg_data *vreg_data;
 	struct regulator_bulk_data *vreg_bulk;
+	int num_vregs;
 	bool vregs_on;
 };
 
@@ -162,9 +210,11 @@ struct qca_serdev {
 	const char *firmware_name;
 };
 
-static int qca_power_setup(struct hci_uart *hu, bool on);
+static int qca_regulator_enable(struct qca_serdev *qcadev);
+static void qca_regulator_disable(struct qca_serdev *qcadev);
 static void qca_power_shutdown(struct hci_uart *hu);
 static int qca_power_off(struct hci_dev *hdev);
+static void qca_controller_memdump(struct work_struct *work);
 
 static enum qca_btsoc_type qca_soc_type(struct hci_uart *hu)
 {
@@ -438,6 +488,12 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t)
 	spin_lock_irqsave_nested(&qca->hci_ibs_lock,
 				 flags, SINGLE_DEPTH_NESTING);
 
+	/* Don't retransmit the HCI_IBS_WAKE_IND when suspending. */
+	if (test_bit(QCA_SUSPENDING, &qca->flags)) {
+		spin_unlock_irqrestore(&qca->hci_ibs_lock, flags);
+		return;
+	}
+
 	switch (qca->tx_ibs_state) {
 	case HCI_IBS_TX_WAKING:
 		/* No WAKE_ACK, retransmit WAKE */
@@ -466,12 +522,28 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t)
 		hci_uart_tx_wakeup(hu);
 }
 
+static void hci_memdump_timeout(struct timer_list *t)
+{
+	struct qca_data *qca = from_timer(qca, t, tx_idle_timer);
+	struct hci_uart *hu = qca->hu;
+	struct qca_memdump_data *qca_memdump = qca->qca_memdump;
+	char *memdump_buf = qca_memdump->memdump_buf_tail;
+
+	bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
+	/* Inject hw error event to reset the device and driver. */
+	hci_reset_dev(hu->hdev);
+	vfree(memdump_buf);
+	kfree(qca_memdump);
+	qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
+	del_timer(&qca->memdump_timer);
+	cancel_work_sync(&qca->ctrl_memdump_evt);
+}
+
 /* Initialize protocol */
 static int qca_open(struct hci_uart *hu)
 {
 	struct qca_serdev *qcadev;
 	struct qca_data *qca;
-	int ret;
 
 	BT_DBG("hu %p qca_open", hu);
 
@@ -484,6 +556,7 @@ static int qca_open(struct hci_uart *hu)
 
 	skb_queue_head_init(&qca->txq);
 	skb_queue_head_init(&qca->tx_wait_q);
+	skb_queue_head_init(&qca->rx_memdump_q);
 	spin_lock_init(&qca->hci_ibs_lock);
 	qca->workqueue = alloc_ordered_workqueue("qca_wq", 0);
 	if (!qca->workqueue) {
@@ -496,6 +569,8 @@ static int qca_open(struct hci_uart *hu)
 	INIT_WORK(&qca->ws_awake_device, qca_wq_awake_device);
 	INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off);
 	INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off);
+	INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump);
+	init_waitqueue_head(&qca->suspend_wait_q);
 
 	qca->hu = hu;
 	init_completion(&qca->drop_ev_comp);
@@ -509,23 +584,10 @@ static int qca_open(struct hci_uart *hu)
 	hu->priv = qca;
 
 	if (hu->serdev) {
-
 		qcadev = serdev_device_get_drvdata(hu->serdev);
-		if (!qca_is_wcn399x(qcadev->btsoc_type)) {
-			gpiod_set_value_cansleep(qcadev->bt_en, 1);
-			/* Controller needs time to bootup. */
-			msleep(150);
-		} else {
+		if (qca_is_wcn399x(qcadev->btsoc_type)) {
 			hu->init_speed = qcadev->init_speed;
 			hu->oper_speed = qcadev->oper_speed;
-			ret = qca_power_setup(hu, true);
-			if (ret) {
-				destroy_workqueue(qca->workqueue);
-				kfree_skb(qca->rx_skb);
-				hu->priv = NULL;
-				kfree(qca);
-				return ret;
-			}
 		}
 	}
 
@@ -533,7 +595,8 @@ static int qca_open(struct hci_uart *hu)
 	qca->wake_retrans = IBS_WAKE_RETRANS_TIMEOUT_MS;
 
 	timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0);
-	qca->tx_idle_delay = IBS_TX_IDLE_TIMEOUT_MS;
+	qca->tx_idle_delay = IBS_HOST_TX_IDLE_TIMEOUT_MS;
+	timer_setup(&qca->memdump_timer, hci_memdump_timeout, 0);
 
 	BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u",
 	       qca->tx_idle_delay, qca->wake_retrans);
@@ -603,7 +666,6 @@ static int qca_flush(struct hci_uart *hu)
 /* Close protocol */
 static int qca_close(struct hci_uart *hu)
 {
-	struct qca_serdev *qcadev;
 	struct qca_data *qca = hu->priv;
 
 	BT_DBG("hu %p qca close", hu);
@@ -612,19 +674,14 @@ static int qca_close(struct hci_uart *hu)
 
 	skb_queue_purge(&qca->tx_wait_q);
 	skb_queue_purge(&qca->txq);
+	skb_queue_purge(&qca->rx_memdump_q);
 	del_timer(&qca->tx_idle_timer);
 	del_timer(&qca->wake_retrans_timer);
+	del_timer(&qca->memdump_timer);
 	destroy_workqueue(qca->workqueue);
 	qca->hu = NULL;
 
-	if (hu->serdev) {
-		qcadev = serdev_device_get_drvdata(hu->serdev);
-		if (qca_is_wcn399x(qcadev->btsoc_type))
-			qca_power_shutdown(hu);
-		else
-			gpiod_set_value_cansleep(qcadev->bt_en, 0);
-
-	}
+	qca_power_shutdown(hu);
 
 	kfree_skb(qca->rx_skb);
 
@@ -648,6 +705,12 @@ static void device_want_to_wakeup(struct hci_uart *hu)
 
 	qca->ibs_recv_wakes++;
 
+	/* Don't wake the rx up when suspending. */
+	if (test_bit(QCA_SUSPENDING, &qca->flags)) {
+		spin_unlock_irqrestore(&qca->hci_ibs_lock, flags);
+		return;
+	}
+
 	switch (qca->rx_ibs_state) {
 	case HCI_IBS_RX_ASLEEP:
 		/* Make sure clock is on - we may have turned clock off since
@@ -712,6 +775,8 @@ static void device_want_to_sleep(struct hci_uart *hu)
 		break;
 	}
 
+	wake_up_interruptible(&qca->suspend_wait_q);
+
 	spin_unlock_irqrestore(&qca->hci_ibs_lock, flags);
 }
 
@@ -729,6 +794,12 @@ static void device_woke_up(struct hci_uart *hu)
 
 	qca->ibs_recv_wacks++;
 
+	/* Don't react to the wake-up-acknowledgment when suspending. */
+	if (test_bit(QCA_SUSPENDING, &qca->flags)) {
+		spin_unlock_irqrestore(&qca->hci_ibs_lock, flags);
+		return;
+	}
+
 	switch (qca->tx_ibs_state) {
 	case HCI_IBS_TX_AWAKE:
 		/* Expect one if we send 2 WAKEs */
@@ -781,8 +852,10 @@ static int qca_enqueue(struct hci_uart *hu, struct sk_buff *skb)
 
 	/* Don't go to sleep in middle of patch download or
 	 * Out-Of-Band(GPIOs control) sleep is selected.
+	 * Don't wake the device up when suspending.
 	 */
-	if (!test_bit(QCA_IBS_ENABLED, &qca->flags)) {
+	if (!test_bit(QCA_IBS_ENABLED, &qca->flags) ||
+	    test_bit(QCA_SUSPENDING, &qca->flags)) {
 		skb_queue_tail(&qca->txq, skb);
 		spin_unlock_irqrestore(&qca->hci_ibs_lock, flags);
 		return 0;
@@ -874,6 +947,125 @@ static int qca_recv_acl_data(struct hci_dev *hdev, struct sk_buff *skb)
 	return hci_recv_frame(hdev, skb);
 }
 
+static void qca_controller_memdump(struct work_struct *work)
+{
+	struct qca_data *qca = container_of(work, struct qca_data,
+					    ctrl_memdump_evt);
+	struct hci_uart *hu = qca->hu;
+	struct sk_buff *skb;
+	struct qca_memdump_event_hdr *cmd_hdr;
+	struct qca_memdump_data *qca_memdump = qca->qca_memdump;
+	struct qca_dump_size *dump;
+	char *memdump_buf;
+	char nullBuff[QCA_DUMP_PACKET_SIZE] = { 0 };
+	u16 seq_no;
+	u32 dump_size;
+
+	while ((skb = skb_dequeue(&qca->rx_memdump_q))) {
+
+		if (!qca_memdump) {
+			qca_memdump = kzalloc(sizeof(struct qca_memdump_data),
+					      GFP_ATOMIC);
+			if (!qca_memdump)
+				return;
+
+			qca->qca_memdump = qca_memdump;
+		}
+
+		qca->memdump_state = QCA_MEMDUMP_COLLECTING;
+		cmd_hdr = (void *) skb->data;
+		seq_no = __le16_to_cpu(cmd_hdr->seq_no);
+		skb_pull(skb, sizeof(struct qca_memdump_event_hdr));
+
+		if (!seq_no) {
+
+			/* This is the first frame of memdump packet from
+			 * the controller, Disable IBS to recevie dump
+			 * with out any interruption, ideally time required for
+			 * the controller to send the dump is 8 seconds. let us
+			 * start timer to handle this asynchronous activity.
+			 */
+			clear_bit(QCA_IBS_ENABLED, &qca->flags);
+			set_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
+			dump = (void *) skb->data;
+			dump_size = __le32_to_cpu(dump->dump_size);
+			if (!(dump_size)) {
+				bt_dev_err(hu->hdev, "Rx invalid memdump size");
+				kfree_skb(skb);
+				return;
+			}
+
+			bt_dev_info(hu->hdev, "QCA collecting dump of size:%u",
+				    dump_size);
+			mod_timer(&qca->memdump_timer, (jiffies +
+				  msecs_to_jiffies(MEMDUMP_TIMEOUT_MS)));
+
+			skb_pull(skb, sizeof(dump_size));
+			memdump_buf = vmalloc(dump_size);
+			qca_memdump->memdump_buf_head = memdump_buf;
+			qca_memdump->memdump_buf_tail = memdump_buf;
+		}
+
+		memdump_buf = qca_memdump->memdump_buf_tail;
+
+		/* If sequence no 0 is missed then there is no point in
+		 * accepting the other sequences.
+		 */
+		if (!memdump_buf) {
+			bt_dev_err(hu->hdev, "QCA: Discarding other packets");
+			kfree(qca_memdump);
+			kfree_skb(skb);
+			qca->qca_memdump = NULL;
+			return;
+		}
+
+		/* There could be chance of missing some packets from
+		 * the controller. In such cases let us store the dummy
+		 * packets in the buffer.
+		 */
+		while ((seq_no > qca_memdump->current_seq_no + 1) &&
+			seq_no != QCA_LAST_SEQUENCE_NUM) {
+			bt_dev_err(hu->hdev, "QCA controller missed packet:%d",
+				   qca_memdump->current_seq_no);
+			memcpy(memdump_buf, nullBuff, QCA_DUMP_PACKET_SIZE);
+			memdump_buf = memdump_buf + QCA_DUMP_PACKET_SIZE;
+			qca_memdump->received_dump += QCA_DUMP_PACKET_SIZE;
+			qca_memdump->current_seq_no++;
+		}
+
+		memcpy(memdump_buf, (unsigned char *) skb->data, skb->len);
+		memdump_buf = memdump_buf + skb->len;
+		qca_memdump->memdump_buf_tail = memdump_buf;
+		qca_memdump->current_seq_no = seq_no + 1;
+		qca_memdump->received_dump += skb->len;
+		qca->qca_memdump = qca_memdump;
+		kfree_skb(skb);
+		if (seq_no == QCA_LAST_SEQUENCE_NUM) {
+			bt_dev_info(hu->hdev, "QCA writing crash dump of size %d bytes",
+				   qca_memdump->received_dump);
+			memdump_buf = qca_memdump->memdump_buf_head;
+			dev_coredumpv(&hu->serdev->dev, memdump_buf,
+				      qca_memdump->received_dump, GFP_KERNEL);
+			del_timer(&qca->memdump_timer);
+			kfree(qca->qca_memdump);
+			qca->qca_memdump = NULL;
+			qca->memdump_state = QCA_MEMDUMP_COLLECTED;
+		}
+	}
+
+}
+
+int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct qca_data *qca = hu->priv;
+
+	skb_queue_tail(&qca->rx_memdump_q, skb);
+	queue_work(qca->workqueue, &qca->ctrl_memdump_evt);
+
+	return 0;
+}
+
 static int qca_recv_event(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_uart *hu = hci_get_drvdata(hdev);
@@ -899,6 +1091,14 @@ static int qca_recv_event(struct hci_dev *hdev, struct sk_buff *skb)
 
 		return 0;
 	}
+	/* We receive chip memory dump as an event packet, With a dedicated
+	 * handler followed by a hardware error event. When this event is
+	 * received we store dump into a file before closing hci. This
+	 * dump will help in triaging the issues.
+	 */
+	if ((skb->data[0] == HCI_VENDOR_PKT) &&
+	    (get_unaligned_be16(skb->data + 2) == QCA_SSR_DUMP_HANDLE))
+		return qca_controller_memdump_event(hdev, skb);
 
 	return hci_recv_frame(hdev, skb);
 }
@@ -1177,6 +1377,91 @@ error:
 	return ret;
 }
 
+static int qca_send_crashbuffer(struct hci_uart *hu)
+{
+	struct qca_data *qca = hu->priv;
+	struct sk_buff *skb;
+
+	skb = bt_skb_alloc(QCA_CRASHBYTE_PACKET_LEN, GFP_KERNEL);
+	if (!skb) {
+		bt_dev_err(hu->hdev, "Failed to allocate memory for skb packet");
+		return -ENOMEM;
+	}
+
+	/* We forcefully crash the controller, by sending 0xfb byte for
+	 * 1024 times. We also might have chance of losing data, To be
+	 * on safer side we send 1096 bytes to the SoC.
+	 */
+	memset(skb_put(skb, QCA_CRASHBYTE_PACKET_LEN), QCA_MEMDUMP_BYTE,
+	       QCA_CRASHBYTE_PACKET_LEN);
+	hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
+	bt_dev_info(hu->hdev, "crash the soc to collect controller dump");
+	skb_queue_tail(&qca->txq, skb);
+	hci_uart_tx_wakeup(hu);
+
+	return 0;
+}
+
+static void qca_wait_for_dump_collection(struct hci_dev *hdev)
+{
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct qca_data *qca = hu->priv;
+	struct qca_memdump_data *qca_memdump = qca->qca_memdump;
+	char *memdump_buf = NULL;
+
+	wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION,
+			    TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS);
+
+	clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
+	if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
+		bt_dev_err(hu->hdev, "Clearing the buffers due to timeout");
+		if (qca_memdump)
+			memdump_buf = qca_memdump->memdump_buf_tail;
+		vfree(memdump_buf);
+		kfree(qca_memdump);
+		qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
+		del_timer(&qca->memdump_timer);
+		cancel_work_sync(&qca->ctrl_memdump_evt);
+	}
+}
+
+static void qca_hw_error(struct hci_dev *hdev, u8 code)
+{
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct qca_data *qca = hu->priv;
+
+	bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state);
+
+	if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
+		/* If hardware error event received for other than QCA
+		 * soc memory dump event, then we need to crash the SOC
+		 * and wait here for 8 seconds to get the dump packets.
+		 * This will block main thread to be on hold until we
+		 * collect dump.
+		 */
+		set_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
+		qca_send_crashbuffer(hu);
+		qca_wait_for_dump_collection(hdev);
+	} else if (qca->memdump_state == QCA_MEMDUMP_COLLECTING) {
+		/* Let us wait here until memory dump collected or
+		 * memory dump timer expired.
+		 */
+		bt_dev_info(hdev, "waiting for dump to complete");
+		qca_wait_for_dump_collection(hdev);
+	}
+}
+
+static void qca_cmd_timeout(struct hci_dev *hdev)
+{
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct qca_data *qca = hu->priv;
+
+	if (qca->memdump_state == QCA_MEMDUMP_IDLE)
+		qca_send_crashbuffer(hu);
+	else
+		bt_dev_info(hdev, "Dump collection is in process");
+}
+
 static int qca_wcn3990_init(struct hci_uart *hu)
 {
 	struct qca_serdev *qcadev;
@@ -1188,7 +1473,7 @@ static int qca_wcn3990_init(struct hci_uart *hu)
 	qcadev = serdev_device_get_drvdata(hu->serdev);
 	if (!qcadev->bt_power->vregs_on) {
 		serdev_device_close(hu->serdev);
-		ret = qca_power_setup(hu, true);
+		ret = qca_regulator_enable(qcadev);
 		if (ret)
 			return ret;
 
@@ -1227,11 +1512,37 @@ static int qca_wcn3990_init(struct hci_uart *hu)
 	return 0;
 }
 
+static int qca_power_on(struct hci_dev *hdev)
+{
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	enum qca_btsoc_type soc_type = qca_soc_type(hu);
+	struct qca_serdev *qcadev;
+	int ret = 0;
+
+	/* Non-serdev device usually is powered by external power
+	 * and don't need additional action in driver for power on
+	 */
+	if (!hu->serdev)
+		return 0;
+
+	if (qca_is_wcn399x(soc_type)) {
+		ret = qca_wcn3990_init(hu);
+	} else {
+		qcadev = serdev_device_get_drvdata(hu->serdev);
+		gpiod_set_value_cansleep(qcadev->bt_en, 1);
+		/* Controller needs time to bootup. */
+		msleep(150);
+	}
+
+	return ret;
+}
+
 static int qca_setup(struct hci_uart *hu)
 {
 	struct hci_dev *hdev = hu->hdev;
 	struct qca_data *qca = hu->priv;
 	unsigned int speed, qca_baudrate = QCA_BAUDRATE_115200;
+	unsigned int retries = 0;
 	enum qca_btsoc_type soc_type = qca_soc_type(hu);
 	const char *firmware_name = qca_get_firmware_name(hu);
 	int ret;
@@ -1249,24 +1560,21 @@ static int qca_setup(struct hci_uart *hu)
 	 */
 	set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
 
-	if (qca_is_wcn399x(soc_type)) {
-		bt_dev_info(hdev, "setting up wcn3990");
+	bt_dev_info(hdev, "setting up %s",
+		qca_is_wcn399x(soc_type) ? "wcn399x" : "ROME");
 
-		/* Enable NON_PERSISTENT_SETUP QUIRK to ensure to execute
-		 * setup for every hci up.
-		 */
-		set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
+retry:
+	ret = qca_power_on(hdev);
+	if (ret)
+		return ret;
+
+	if (qca_is_wcn399x(soc_type)) {
 		set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
-		hu->hdev->shutdown = qca_power_off;
-		ret = qca_wcn3990_init(hu);
-		if (ret)
-			return ret;
 
-		ret = qca_read_soc_version(hdev, &soc_ver);
+		ret = qca_read_soc_version(hdev, &soc_ver, soc_type);
 		if (ret)
 			return ret;
 	} else {
-		bt_dev_info(hdev, "ROME setup");
 		qca_set_speed(hu, QCA_INIT_SPEED);
 	}
 
@@ -1282,7 +1590,7 @@ static int qca_setup(struct hci_uart *hu)
 
 	if (!qca_is_wcn399x(soc_type)) {
 		/* Get QCA version information */
-		ret = qca_read_soc_version(hdev, &soc_ver);
+		ret = qca_read_soc_version(hdev, &soc_ver, soc_type);
 		if (ret)
 			return ret;
 	}
@@ -1294,6 +1602,8 @@ static int qca_setup(struct hci_uart *hu)
 	if (!ret) {
 		set_bit(QCA_IBS_ENABLED, &qca->flags);
 		qca_debugfs_init(hdev);
+		hu->hdev->hw_error = qca_hw_error;
+		hu->hdev->cmd_timeout = qca_cmd_timeout;
 	} else if (ret == -ENOENT) {
 		/* No patch/nvm-config found, run with original fw/config */
 		ret = 0;
@@ -1303,6 +1613,20 @@ static int qca_setup(struct hci_uart *hu)
 		 * patch/nvm-config is found, so run with original fw/config.
 		 */
 		ret = 0;
+	} else {
+		if (retries < MAX_INIT_RETRIES) {
+			qca_power_shutdown(hu);
+			if (hu->serdev) {
+				serdev_device_close(hu->serdev);
+				ret = serdev_device_open(hu->serdev);
+				if (ret) {
+					bt_dev_err(hdev, "failed to open port");
+					return ret;
+				}
+			}
+			retries++;
+			goto retry;
+		}
 	}
 
 	/* Setup bdaddr */
@@ -1332,10 +1656,21 @@ static const struct hci_uart_proto qca_proto = {
 static const struct qca_vreg_data qca_soc_data_wcn3990 = {
 	.soc_type = QCA_WCN3990,
 	.vregs = (struct qca_vreg []) {
-		{ "vddio",   1800000, 1900000,  15000  },
-		{ "vddxo",   1800000, 1900000,  80000  },
-		{ "vddrf",   1300000, 1350000,  300000 },
-		{ "vddch0",  3300000, 3400000,  450000 },
+		{ "vddio", 15000  },
+		{ "vddxo", 80000  },
+		{ "vddrf", 300000 },
+		{ "vddch0", 450000 },
+	},
+	.num_vregs = 4,
+};
+
+static const struct qca_vreg_data qca_soc_data_wcn3991 = {
+	.soc_type = QCA_WCN3991,
+	.vregs = (struct qca_vreg []) {
+		{ "vddio", 15000  },
+		{ "vddxo", 80000  },
+		{ "vddrf", 300000 },
+		{ "vddch0", 450000 },
 	},
 	.num_vregs = 4,
 };
@@ -1343,18 +1678,22 @@ static const struct qca_vreg_data qca_soc_data_wcn3990 = {
 static const struct qca_vreg_data qca_soc_data_wcn3998 = {
 	.soc_type = QCA_WCN3998,
 	.vregs = (struct qca_vreg []) {
-		{ "vddio",   1800000, 1900000,  10000  },
-		{ "vddxo",   1800000, 1900000,  80000  },
-		{ "vddrf",   1300000, 1352000,  300000 },
-		{ "vddch0",  3300000, 3300000,  450000 },
+		{ "vddio", 10000  },
+		{ "vddxo", 80000  },
+		{ "vddrf", 300000 },
+		{ "vddch0", 450000 },
 	},
 	.num_vregs = 4,
 };
 
 static void qca_power_shutdown(struct hci_uart *hu)
 {
+	struct qca_serdev *qcadev;
 	struct qca_data *qca = hu->priv;
 	unsigned long flags;
+	enum qca_btsoc_type soc_type = qca_soc_type(hu);
+
+	qcadev = serdev_device_get_drvdata(hu->serdev);
 
 	/* From this point we go into power off state. But serial port is
 	 * still open, stop queueing the IBS data and flush all the buffered
@@ -1365,120 +1704,111 @@ static void qca_power_shutdown(struct hci_uart *hu)
 	qca_flush(hu);
 	spin_unlock_irqrestore(&qca->hci_ibs_lock, flags);
 
-	host_set_baudrate(hu, 2400);
-	qca_send_power_pulse(hu, false);
-	qca_power_setup(hu, false);
+	hu->hdev->hw_error = NULL;
+	hu->hdev->cmd_timeout = NULL;
+
+	/* Non-serdev device usually is powered by external power
+	 * and don't need additional action in driver for power down
+	 */
+	if (!hu->serdev)
+		return;
+
+	if (qca_is_wcn399x(soc_type)) {
+		host_set_baudrate(hu, 2400);
+		qca_send_power_pulse(hu, false);
+		qca_regulator_disable(qcadev);
+	} else {
+		gpiod_set_value_cansleep(qcadev->bt_en, 0);
+	}
 }
 
 static int qca_power_off(struct hci_dev *hdev)
 {
 	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct qca_data *qca = hu->priv;
 
-	/* Perform pre shutdown command */
-	qca_send_pre_shutdown_cmd(hdev);
-
-	usleep_range(8000, 10000);
+	/* Stop sending shutdown command if soc crashes. */
+	if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
+		qca_send_pre_shutdown_cmd(hdev);
+		usleep_range(8000, 10000);
+	}
 
+	qca->memdump_state = QCA_MEMDUMP_IDLE;
 	qca_power_shutdown(hu);
 	return 0;
 }
 
-static int qca_enable_regulator(struct qca_vreg vregs,
-				struct regulator *regulator)
+static int qca_regulator_enable(struct qca_serdev *qcadev)
 {
+	struct qca_power *power = qcadev->bt_power;
 	int ret;
 
-	ret = regulator_set_voltage(regulator, vregs.min_uV,
-				    vregs.max_uV);
-	if (ret)
-		return ret;
+	/* Already enabled */
+	if (power->vregs_on)
+		return 0;
 
-	if (vregs.load_uA)
-		ret = regulator_set_load(regulator,
-					 vregs.load_uA);
+	BT_DBG("enabling %d regulators)", power->num_vregs);
 
+	ret = regulator_bulk_enable(power->num_vregs, power->vreg_bulk);
 	if (ret)
 		return ret;
 
-	return regulator_enable(regulator);
+	power->vregs_on = true;
 
+	return 0;
 }
 
-static void qca_disable_regulator(struct qca_vreg vregs,
-				  struct regulator *regulator)
+static void qca_regulator_disable(struct qca_serdev *qcadev)
 {
-	regulator_disable(regulator);
-	regulator_set_voltage(regulator, 0, vregs.max_uV);
-	if (vregs.load_uA)
-		regulator_set_load(regulator, 0);
-
-}
+	struct qca_power *power;
 
-static int qca_power_setup(struct hci_uart *hu, bool on)
-{
-	struct qca_vreg *vregs;
-	struct regulator_bulk_data *vreg_bulk;
-	struct qca_serdev *qcadev;
-	int i, num_vregs, ret = 0;
-
-	qcadev = serdev_device_get_drvdata(hu->serdev);
-	if (!qcadev || !qcadev->bt_power || !qcadev->bt_power->vreg_data ||
-	    !qcadev->bt_power->vreg_bulk)
-		return -EINVAL;
+	if (!qcadev)
+		return;
 
-	vregs = qcadev->bt_power->vreg_data->vregs;
-	vreg_bulk = qcadev->bt_power->vreg_bulk;
-	num_vregs = qcadev->bt_power->vreg_data->num_vregs;
-	BT_DBG("on: %d", on);
-	if (on && !qcadev->bt_power->vregs_on) {
-		for (i = 0; i < num_vregs; i++) {
-			ret = qca_enable_regulator(vregs[i],
-						   vreg_bulk[i].consumer);
-			if (ret)
-				break;
-		}
+	power = qcadev->bt_power;
 
-		if (ret) {
-			BT_ERR("failed to enable regulator:%s", vregs[i].name);
-			/* turn off regulators which are enabled */
-			for (i = i - 1; i >= 0; i--)
-				qca_disable_regulator(vregs[i],
-						      vreg_bulk[i].consumer);
-		} else {
-			qcadev->bt_power->vregs_on = true;
-		}
-	} else if (!on && qcadev->bt_power->vregs_on) {
-		/* turn off regulator in reverse order */
-		i = qcadev->bt_power->vreg_data->num_vregs - 1;
-		for ( ; i >= 0; i--)
-			qca_disable_regulator(vregs[i], vreg_bulk[i].consumer);
-
-		qcadev->bt_power->vregs_on = false;
-	}
+	/* Already disabled? */
+	if (!power->vregs_on)
+		return;
 
-	return ret;
+	regulator_bulk_disable(power->num_vregs, power->vreg_bulk);
+	power->vregs_on = false;
 }
 
 static int qca_init_regulators(struct qca_power *qca,
 				const struct qca_vreg *vregs, size_t num_vregs)
 {
+	struct regulator_bulk_data *bulk;
+	int ret;
 	int i;
 
-	qca->vreg_bulk = devm_kcalloc(qca->dev, num_vregs,
-				      sizeof(struct regulator_bulk_data),
-				      GFP_KERNEL);
-	if (!qca->vreg_bulk)
+	bulk = devm_kcalloc(qca->dev, num_vregs, sizeof(*bulk), GFP_KERNEL);
+	if (!bulk)
 		return -ENOMEM;
 
 	for (i = 0; i < num_vregs; i++)
-		qca->vreg_bulk[i].supply = vregs[i].name;
+		bulk[i].supply = vregs[i].name;
+
+	ret = devm_regulator_bulk_get(qca->dev, num_vregs, bulk);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < num_vregs; i++) {
+		ret = regulator_set_load(bulk[i].consumer, vregs[i].load_uA);
+		if (ret)
+			return ret;
+	}
+
+	qca->vreg_bulk = bulk;
+	qca->num_vregs = num_vregs;
 
-	return devm_regulator_bulk_get(qca->dev, num_vregs, qca->vreg_bulk);
+	return 0;
 }
 
 static int qca_serdev_probe(struct serdev_device *serdev)
 {
 	struct qca_serdev *qcadev;
+	struct hci_dev *hdev;
 	const struct qca_vreg_data *data;
 	int err;
 
@@ -1487,7 +1817,7 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 		return -ENOMEM;
 
 	qcadev->serdev_hu.serdev = serdev;
-	data = of_device_get_match_data(&serdev->dev);
+	data = device_get_match_data(&serdev->dev);
 	serdev_device_set_drvdata(serdev, qcadev);
 	device_property_read_string(&serdev->dev, "firmware-name",
 					 &qcadev->firmware_name);
@@ -1500,12 +1830,11 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 			return -ENOMEM;
 
 		qcadev->bt_power->dev = &serdev->dev;
-		qcadev->bt_power->vreg_data = data;
 		err = qca_init_regulators(qcadev->bt_power, data->vregs,
 					  data->num_vregs);
 		if (err) {
 			BT_ERR("Failed to init regulators:%d", err);
-			goto out;
+			return err;
 		}
 
 		qcadev->bt_power->vregs_on = false;
@@ -1518,7 +1847,7 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 		err = hci_uart_register_device(&qcadev->serdev_hu, &qca_proto);
 		if (err) {
 			BT_ERR("wcn3990 serdev registration failed");
-			goto out;
+			return err;
 		}
 	} else {
 		qcadev->btsoc_type = QCA_ROME;
@@ -1544,12 +1873,18 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 			return err;
 
 		err = hci_uart_register_device(&qcadev->serdev_hu, &qca_proto);
-		if (err)
+		if (err) {
+			BT_ERR("Rome serdev registration failed");
 			clk_disable_unprepare(qcadev->susclk);
+			return err;
+		}
 	}
 
-out:	return err;
+	hdev = qcadev->serdev_hu.hdev;
+	set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
+	hdev->shutdown = qca_power_off;
 
+	return 0;
 }
 
 static void qca_serdev_remove(struct serdev_device *serdev)
@@ -1564,9 +1899,103 @@ static void qca_serdev_remove(struct serdev_device *serdev)
 	hci_uart_unregister_device(&qcadev->serdev_hu);
 }
 
+static int __maybe_unused qca_suspend(struct device *dev)
+{
+	struct hci_dev *hdev = container_of(dev, struct hci_dev, dev);
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct qca_data *qca = hu->priv;
+	unsigned long flags;
+	int ret = 0;
+	u8 cmd;
+
+	set_bit(QCA_SUSPENDING, &qca->flags);
+
+	/* Device is downloading patch or doesn't support in-band sleep. */
+	if (!test_bit(QCA_IBS_ENABLED, &qca->flags))
+		return 0;
+
+	cancel_work_sync(&qca->ws_awake_device);
+	cancel_work_sync(&qca->ws_awake_rx);
+
+	spin_lock_irqsave_nested(&qca->hci_ibs_lock,
+				 flags, SINGLE_DEPTH_NESTING);
+
+	switch (qca->tx_ibs_state) {
+	case HCI_IBS_TX_WAKING:
+		del_timer(&qca->wake_retrans_timer);
+		/* Fall through */
+	case HCI_IBS_TX_AWAKE:
+		del_timer(&qca->tx_idle_timer);
+
+		serdev_device_write_flush(hu->serdev);
+		cmd = HCI_IBS_SLEEP_IND;
+		ret = serdev_device_write_buf(hu->serdev, &cmd, sizeof(cmd));
+
+		if (ret < 0) {
+			BT_ERR("Failed to send SLEEP to device");
+			break;
+		}
+
+		qca->tx_ibs_state = HCI_IBS_TX_ASLEEP;
+		qca->ibs_sent_slps++;
+
+		qca_wq_serial_tx_clock_vote_off(&qca->ws_tx_vote_off);
+		break;
+
+	case HCI_IBS_TX_ASLEEP:
+		break;
+
+	default:
+		BT_ERR("Spurious tx state %d", qca->tx_ibs_state);
+		ret = -EINVAL;
+		break;
+	}
+
+	spin_unlock_irqrestore(&qca->hci_ibs_lock, flags);
+
+	if (ret < 0)
+		goto error;
+
+	serdev_device_wait_until_sent(hu->serdev,
+				      msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS));
+
+	/* Wait for HCI_IBS_SLEEP_IND sent by device to indicate its Tx is going
+	 * to sleep, so that the packet does not wake the system later.
+	 */
+
+	ret = wait_event_interruptible_timeout(qca->suspend_wait_q,
+			qca->rx_ibs_state == HCI_IBS_RX_ASLEEP,
+			msecs_to_jiffies(IBS_BTSOC_TX_IDLE_TIMEOUT_MS));
+
+	if (ret > 0)
+		return 0;
+
+	if (ret == 0)
+		ret = -ETIMEDOUT;
+
+error:
+	clear_bit(QCA_SUSPENDING, &qca->flags);
+
+	return ret;
+}
+
+static int __maybe_unused qca_resume(struct device *dev)
+{
+	struct hci_dev *hdev = container_of(dev, struct hci_dev, dev);
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct qca_data *qca = hu->priv;
+
+	clear_bit(QCA_SUSPENDING, &qca->flags);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(qca_pm_ops, qca_suspend, qca_resume);
+
 static const struct of_device_id qca_bluetooth_of_match[] = {
 	{ .compatible = "qcom,qca6174-bt" },
 	{ .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990},
+	{ .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991},
 	{ .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998},
 	{ /* sentinel */ }
 };
@@ -1578,6 +2007,7 @@ static struct serdev_device_driver qca_serdev_driver = {
 	.driver = {
 		.name = "hci_uart_qca",
 		.of_match_table = qca_bluetooth_of_match,
+		.pm = &qca_pm_ops,
 	},
 };
 
diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h
index 6ab631101019..4e039d7a16f8 100644
--- a/drivers/bluetooth/hci_uart.h
+++ b/drivers/bluetooth/hci_uart.h
@@ -143,6 +143,13 @@ struct h4_recv_pkt {
 	.lsize = 1, \
 	.maxlen = HCI_MAX_EVENT_SIZE
 
+#define H4_RECV_ISO \
+	.type = HCI_ISODATA_PKT, \
+	.hlen = HCI_ISO_HDR_SIZE, \
+	.loff = 2, \
+	.lsize = 2, \
+	.maxlen = HCI_MAX_FRAME_SIZE \
+
 struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb,
 			    const unsigned char *buffer, int count,
 			    const struct h4_recv_pkt *pkts, int pkts_count);
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index 65e41c1d760f..8ab26dec5f6e 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -178,6 +178,7 @@ static inline ssize_t vhci_get_user(struct vhci_data *data,
 	case HCI_EVENT_PKT:
 	case HCI_ACLDATA_PKT:
 	case HCI_SCODATA_PKT:
+	case HCI_ISODATA_PKT:
 		if (!data->hdev) {
 			kfree_skb(skb);
 			return -ENODEV;
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 6b331061d34b..50200d1c06ea 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -41,8 +41,9 @@ config MOXTET
 
 config HISILICON_LPC
 	bool "Support for ISA I/O space on HiSilicon Hip06/7"
-	depends on ARM64 && (ARCH_HISI || COMPILE_TEST)
-	select INDIRECT_PIO
+	depends on (ARM64 && ARCH_HISI) || (COMPILE_TEST && !ALPHA && !HEXAGON && !PARISC && !C6X)
+	depends on HAS_IOMEM
+	select INDIRECT_PIO if ARM64
 	help
 	  Driver to enable I/O access to devices attached to the Low Pin
 	  Count bus on the HiSilicon Hip06/7 SoC.
@@ -150,6 +151,15 @@ config TEGRA_GMI
 	  Driver for the Tegra Generic Memory Interface bus which can be used
 	  to attach devices such as NOR, UART, FPGA and more.
 
+config  TI_PWMSS
+	bool
+	default y if (ARCH_OMAP2PLUS) && (PWM_TIECAP || PWM_TIEHRPWM || TI_EQEP)
+	help
+	  PWM Subsystem driver support for AM33xx SOC.
+
+	  PWM submodules require PWM config space access from submodule
+	  drivers and require common parent driver support.
+
 config TI_SYSC
 	bool "TI sysc interconnect target module driver"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile
index 16b43d3468c6..1320bcf9fa9d 100644
--- a/drivers/bus/Makefile
+++ b/drivers/bus/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_SUNXI_RSB)		+= sunxi-rsb.o
 obj-$(CONFIG_SIMPLE_PM_BUS)	+= simple-pm-bus.o
 obj-$(CONFIG_TEGRA_ACONNECT)	+= tegra-aconnect.o
 obj-$(CONFIG_TEGRA_GMI)		+= tegra-gmi.o
+obj-$(CONFIG_TI_PWMSS)		+= ti-pwmss.o
 obj-$(CONFIG_TI_SYSC)		+= ti-sysc.o
 obj-$(CONFIG_TS_NBUS)		+= ts-nbus.o
 obj-$(CONFIG_UNIPHIER_SYSTEM_BUS)	+= uniphier-system-bus.o
diff --git a/drivers/bus/fsl-mc/dprc-driver.c b/drivers/bus/fsl-mc/dprc-driver.c
index 52c7e15143d6..c8b1c3842c1a 100644
--- a/drivers/bus/fsl-mc/dprc-driver.c
+++ b/drivers/bus/fsl-mc/dprc-driver.c
@@ -104,10 +104,8 @@ static int __fsl_mc_device_match(struct device *dev, void *data)
 	return fsl_mc_device_match(mc_dev, obj_desc);
 }
 
-static struct fsl_mc_device *fsl_mc_device_lookup(struct fsl_mc_obj_desc
-								*obj_desc,
-						  struct fsl_mc_device
-								*mc_bus_dev)
+struct fsl_mc_device *fsl_mc_device_lookup(struct fsl_mc_obj_desc *obj_desc,
+					   struct fsl_mc_device *mc_bus_dev)
 {
 	struct device *dev;
 
diff --git a/drivers/bus/fsl-mc/dprc.c b/drivers/bus/fsl-mc/dprc.c
index 0fe3f52ae0de..602f030d84eb 100644
--- a/drivers/bus/fsl-mc/dprc.c
+++ b/drivers/bus/fsl-mc/dprc.c
@@ -554,3 +554,56 @@ int dprc_get_container_id(struct fsl_mc_io *mc_io,
 
 	return 0;
 }
+
+/**
+ * dprc_get_connection() - Get connected endpoint and link status if connection
+ *			exists.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRC object
+ * @endpoint1:	Endpoint 1 configuration parameters
+ * @endpoint2:	Returned endpoint 2 configuration parameters
+ * @state:	Returned link state:
+ *		1 - link is up;
+ *		0 - link is down;
+ *		-1 - no connection (endpoint2 information is irrelevant)
+ *
+ * Return:     '0' on Success; -ENOTCONN if connection does not exist.
+ */
+int dprc_get_connection(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			const struct dprc_endpoint *endpoint1,
+			struct dprc_endpoint *endpoint2,
+			int *state)
+{
+	struct dprc_cmd_get_connection *cmd_params;
+	struct dprc_rsp_get_connection *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err, i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_CONNECTION,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dprc_cmd_get_connection *)cmd.params;
+	cmd_params->ep1_id = cpu_to_le32(endpoint1->id);
+	cmd_params->ep1_interface_id = cpu_to_le16(endpoint1->if_id);
+	for (i = 0; i < 16; i++)
+		cmd_params->ep1_type[i] = endpoint1->type[i];
+
+	/* send command to mc */
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return -ENOTCONN;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dprc_rsp_get_connection *)cmd.params;
+	endpoint2->id = le32_to_cpu(rsp_params->ep2_id);
+	endpoint2->if_id = le16_to_cpu(rsp_params->ep2_interface_id);
+	*state = le32_to_cpu(rsp_params->state);
+	for (i = 0; i < 16; i++)
+		endpoint2->type[i] = rsp_params->ep2_type[i];
+
+	return 0;
+}
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index 5c9bf2e06552..c78d10ea641f 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -166,42 +166,52 @@ EXPORT_SYMBOL_GPL(fsl_mc_bus_type);
 struct device_type fsl_mc_bus_dprc_type = {
 	.name = "fsl_mc_bus_dprc"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dprc_type);
 
 struct device_type fsl_mc_bus_dpni_type = {
 	.name = "fsl_mc_bus_dpni"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpni_type);
 
 struct device_type fsl_mc_bus_dpio_type = {
 	.name = "fsl_mc_bus_dpio"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpio_type);
 
 struct device_type fsl_mc_bus_dpsw_type = {
 	.name = "fsl_mc_bus_dpsw"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpsw_type);
 
 struct device_type fsl_mc_bus_dpbp_type = {
 	.name = "fsl_mc_bus_dpbp"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpbp_type);
 
 struct device_type fsl_mc_bus_dpcon_type = {
 	.name = "fsl_mc_bus_dpcon"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpcon_type);
 
 struct device_type fsl_mc_bus_dpmcp_type = {
 	.name = "fsl_mc_bus_dpmcp"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpmcp_type);
 
 struct device_type fsl_mc_bus_dpmac_type = {
 	.name = "fsl_mc_bus_dpmac"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpmac_type);
 
 struct device_type fsl_mc_bus_dprtc_type = {
 	.name = "fsl_mc_bus_dprtc"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dprtc_type);
 
 struct device_type fsl_mc_bus_dpseci_type = {
 	.name = "fsl_mc_bus_dpseci"
 };
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpseci_type);
 
 static struct device_type *fsl_mc_get_device_type(const char *type)
 {
@@ -702,6 +712,39 @@ void fsl_mc_device_remove(struct fsl_mc_device *mc_dev)
 }
 EXPORT_SYMBOL_GPL(fsl_mc_device_remove);
 
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
+{
+	struct fsl_mc_device *mc_bus_dev, *endpoint;
+	struct fsl_mc_obj_desc endpoint_desc = {{ 0 }};
+	struct dprc_endpoint endpoint1 = {{ 0 }};
+	struct dprc_endpoint endpoint2 = {{ 0 }};
+	int state, err;
+
+	mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent);
+	strcpy(endpoint1.type, mc_dev->obj_desc.type);
+	endpoint1.id = mc_dev->obj_desc.id;
+
+	err = dprc_get_connection(mc_bus_dev->mc_io, 0,
+				  mc_bus_dev->mc_handle,
+				  &endpoint1, &endpoint2,
+				  &state);
+
+	if (err == -ENOTCONN || state == -1)
+		return ERR_PTR(-ENOTCONN);
+
+	if (err < 0) {
+		dev_err(&mc_bus_dev->dev, "dprc_get_connection() = %d\n", err);
+		return ERR_PTR(err);
+	}
+
+	strcpy(endpoint_desc.type, endpoint2.type);
+	endpoint_desc.id = endpoint2.id;
+	endpoint = fsl_mc_device_lookup(&endpoint_desc, mc_bus_dev);
+
+	return endpoint;
+}
+EXPORT_SYMBOL_GPL(fsl_mc_get_endpoint);
+
 static int parse_mc_ranges(struct device *dev,
 			   int *paddr_cells,
 			   int *mc_addr_cells,
diff --git a/drivers/bus/fsl-mc/fsl-mc-private.h b/drivers/bus/fsl-mc/fsl-mc-private.h
index 020fcc04ec8b..21ca8c756ee7 100644
--- a/drivers/bus/fsl-mc/fsl-mc-private.h
+++ b/drivers/bus/fsl-mc/fsl-mc-private.h
@@ -105,6 +105,8 @@ int dpmcp_reset(struct fsl_mc_io *mc_io,
 #define DPRC_CMDID_GET_OBJ_REG_V2               DPRC_CMD_V2(0x15E)
 #define DPRC_CMDID_SET_OBJ_IRQ                  DPRC_CMD(0x15F)
 
+#define DPRC_CMDID_GET_CONNECTION               DPRC_CMD(0x16C)
+
 struct dprc_cmd_open {
 	__le32 container_id;
 };
@@ -228,6 +230,22 @@ struct dprc_cmd_set_obj_irq {
 	u8 obj_type[16];
 };
 
+struct dprc_cmd_get_connection {
+	__le32 ep1_id;
+	__le16 ep1_interface_id;
+	u8 pad[2];
+	u8 ep1_type[16];
+};
+
+struct dprc_rsp_get_connection {
+	__le64 pad[3];
+	__le32 ep2_id;
+	__le16 ep2_interface_id;
+	__le16 pad1;
+	u8 ep2_type[16];
+	__le32 state;
+};
+
 /*
  * DPRC API for managing and querying DPAA resources
  */
@@ -392,6 +410,27 @@ int dprc_get_container_id(struct fsl_mc_io *mc_io,
 			  u32 cmd_flags,
 			  int *container_id);
 
+/**
+ * struct dprc_endpoint - Endpoint description for link connect/disconnect
+ *			operations
+ * @type:	Endpoint object type: NULL terminated string
+ * @id:		Endpoint object ID
+ * @if_id:	Interface ID; should be set for endpoints with multiple
+ *		interfaces ("dpsw", "dpdmux"); for others, always set to 0
+ */
+struct dprc_endpoint {
+	char type[16];
+	int id;
+	u16 if_id;
+};
+
+int dprc_get_connection(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			const struct dprc_endpoint *endpoint1,
+			struct dprc_endpoint *endpoint2,
+			int *state);
+
 /*
  * Data Path Buffer Pool (DPBP) API
  */
@@ -574,4 +613,7 @@ void fsl_destroy_mc_io(struct fsl_mc_io *mc_io);
 
 bool fsl_mc_is_root_dprc(struct device *dev);
 
+struct fsl_mc_device *fsl_mc_device_lookup(struct fsl_mc_obj_desc *obj_desc,
+					   struct fsl_mc_device *mc_bus_dev);
+
 #endif /* _FSL_MC_PRIVATE_H_ */
diff --git a/drivers/bus/fsl-mc/mc-io.c b/drivers/bus/fsl-mc/mc-io.c
index d9629fc13a15..6ae48ad80409 100644
--- a/drivers/bus/fsl-mc/mc-io.c
+++ b/drivers/bus/fsl-mc/mc-io.c
@@ -97,12 +97,12 @@ int __must_check fsl_create_mc_io(struct device *dev,
 		return -EBUSY;
 	}
 
-	mc_portal_virt_addr = devm_ioremap_nocache(dev,
+	mc_portal_virt_addr = devm_ioremap(dev,
 						   mc_portal_phys_addr,
 						   mc_portal_size);
 	if (!mc_portal_virt_addr) {
 		dev_err(dev,
-			"devm_ioremap_nocache failed for MC portal %pa\n",
+			"devm_ioremap failed for MC portal %pa\n",
 			&mc_portal_phys_addr);
 		return -ENXIO;
 	}
diff --git a/drivers/bus/hisi_lpc.c b/drivers/bus/hisi_lpc.c
index 20c957185af2..8101df901830 100644
--- a/drivers/bus/hisi_lpc.c
+++ b/drivers/bus/hisi_lpc.c
@@ -74,7 +74,7 @@ struct hisi_lpc_dev {
 /* About 10us. This is specific for single IO operations, such as inb */
 #define LPC_PEROP_WAITCNT	100
 
-static int wait_lpc_idle(unsigned char *mbase, unsigned int waitcnt)
+static int wait_lpc_idle(void __iomem *mbase, unsigned int waitcnt)
 {
 	u32 status;
 
@@ -209,7 +209,7 @@ static u32 hisi_lpc_comm_in(void *hostdata, unsigned long pio, size_t dwidth)
 	struct hisi_lpc_dev *lpcdev = hostdata;
 	struct lpc_cycle_para iopara;
 	unsigned long addr;
-	u32 rd_data = 0;
+	__le32 rd_data = 0;
 	int ret;
 
 	if (!lpcdev || !dwidth || dwidth > LPC_MAX_DWIDTH)
@@ -244,13 +244,12 @@ static void hisi_lpc_comm_out(void *hostdata, unsigned long pio,
 	struct lpc_cycle_para iopara;
 	const unsigned char *buf;
 	unsigned long addr;
+	__le32 _val = cpu_to_le32(val);
 
 	if (!lpcdev || !dwidth || dwidth > LPC_MAX_DWIDTH)
 		return;
 
-	val = cpu_to_le32(val);
-
-	buf = (const unsigned char *)&val;
+	buf = (const unsigned char *)&_val;
 	addr = hisi_lpc_pio_to_addr(lpcdev, pio);
 
 	iopara.opflags = FG_INCRADDR_LPC;
diff --git a/drivers/pwm/pwm-tipwmss.c b/drivers/bus/ti-pwmss.c
index e9c26c94251b..e9c26c94251b 100644
--- a/drivers/pwm/pwm-tipwmss.c
+++ b/drivers/bus/ti-pwmss.c
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 2b6670daf7fc..ccb44fe790a7 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -343,6 +343,12 @@ static int sysc_get_clocks(struct sysc *ddata)
 		return -EINVAL;
 	}
 
+	/* Always add a slot for main clocks fck and ick even if unused */
+	if (!nr_fck)
+		ddata->nr_clocks++;
+	if (!nr_ick)
+		ddata->nr_clocks++;
+
 	ddata->clocks = devm_kcalloc(ddata->dev,
 				     ddata->nr_clocks, sizeof(*ddata->clocks),
 				     GFP_KERNEL);
@@ -421,7 +427,7 @@ static int sysc_enable_opt_clocks(struct sysc *ddata)
 	struct clk *clock;
 	int i, error;
 
-	if (!ddata->clocks)
+	if (!ddata->clocks || ddata->nr_clocks < SYSC_OPTFCK0 + 1)
 		return 0;
 
 	for (i = SYSC_OPTFCK0; i < SYSC_MAX_CLOCKS; i++) {
@@ -455,7 +461,7 @@ static void sysc_disable_opt_clocks(struct sysc *ddata)
 	struct clk *clock;
 	int i;
 
-	if (!ddata->clocks)
+	if (!ddata->clocks || ddata->nr_clocks < SYSC_OPTFCK0 + 1)
 		return;
 
 	for (i = SYSC_OPTFCK0; i < SYSC_MAX_CLOCKS; i++) {
@@ -917,6 +923,9 @@ set_midle:
 		return -EINVAL;
 	}
 
+	if (ddata->cfg.quirks & SYSC_QUIRK_SWSUP_MSTANDBY)
+		best_mode = SYSC_IDLE_NO;
+
 	reg &= ~(SYSC_IDLE_MASK << regbits->midle_shift);
 	reg |= best_mode << regbits->midle_shift;
 	sysc_write(ddata, ddata->offsets[SYSC_SYSCONFIG], reg);
@@ -978,6 +987,10 @@ static int sysc_disable_module(struct device *dev)
 		return ret;
 	}
 
+	if (ddata->cfg.quirks & (SYSC_QUIRK_SWSUP_MSTANDBY) ||
+	    ddata->cfg.quirks & (SYSC_QUIRK_FORCE_MSTANDBY))
+		best_mode = SYSC_IDLE_FORCE;
+
 	reg &= ~(SYSC_IDLE_MASK << regbits->midle_shift);
 	reg |= best_mode << regbits->midle_shift;
 	sysc_write(ddata, ddata->offsets[SYSC_SYSCONFIG], reg);
@@ -1037,8 +1050,6 @@ static int __maybe_unused sysc_runtime_resume_legacy(struct device *dev,
 	struct ti_sysc_platform_data *pdata;
 	int error;
 
-	reset_control_deassert(ddata->rsts);
-
 	pdata = dev_get_platdata(ddata->dev);
 	if (!pdata)
 		return 0;
@@ -1051,6 +1062,8 @@ static int __maybe_unused sysc_runtime_resume_legacy(struct device *dev,
 		dev_err(dev, "%s: could not enable: %i\n",
 			__func__, error);
 
+	reset_control_deassert(ddata->rsts);
+
 	return 0;
 }
 
@@ -1104,8 +1117,6 @@ static int __maybe_unused sysc_runtime_resume(struct device *dev)
 
 	sysc_clkdm_deny_idle(ddata);
 
-	reset_control_deassert(ddata->rsts);
-
 	if (sysc_opt_clks_needed(ddata)) {
 		error = sysc_enable_opt_clocks(ddata);
 		if (error)
@@ -1116,6 +1127,8 @@ static int __maybe_unused sysc_runtime_resume(struct device *dev)
 	if (error)
 		goto err_opt_clocks;
 
+	reset_control_deassert(ddata->rsts);
+
 	if (ddata->legacy_mode) {
 		error = sysc_runtime_resume_legacy(dev, ddata);
 		if (error)
@@ -1236,6 +1249,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 		   SYSC_QUIRK_SWSUP_SIDLE),
 
 	/* Quirks that need to be set based on detected module */
+	SYSC_QUIRK("aess", 0, 0, 0x10, -1, 0x40000000, 0xffffffff,
+		   SYSC_MODULE_QUIRK_AESS),
 	SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x00000006, 0xffffffff,
 		   SYSC_MODULE_QUIRK_HDQ1W),
 	SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x0000000a, 0xffffffff,
@@ -1251,6 +1266,10 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 	SYSC_QUIRK("gpu", 0x50000000, 0x14, -1, -1, 0x00010201, 0xffffffff, 0),
 	SYSC_QUIRK("gpu", 0x50000000, 0xfe00, 0xfe10, -1, 0x40000000 , 0xffffffff,
 		   SYSC_MODULE_QUIRK_SGX),
+	SYSC_QUIRK("usb_otg_hs", 0, 0x400, 0x404, 0x408, 0x00000050,
+		   0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
+	SYSC_QUIRK("usb_otg_hs", 0, 0, 0x10, -1, 0x4ea2080d, 0xffffffff,
+		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
 	SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0,
 		   SYSC_MODULE_QUIRK_WDT),
 	/* Watchdog on am3 and am4 */
@@ -1260,7 +1279,6 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 #ifdef DEBUG
 	SYSC_QUIRK("adc", 0, 0, 0x10, -1, 0x47300001, 0xffffffff, 0),
 	SYSC_QUIRK("atl", 0, 0, -1, -1, 0x0a070100, 0xffffffff, 0),
-	SYSC_QUIRK("aess", 0, 0, 0x10, -1, 0x40000000, 0xffffffff, 0),
 	SYSC_QUIRK("cm", 0, 0, -1, -1, 0x40000301, 0xffffffff, 0),
 	SYSC_QUIRK("control", 0, 0, 0x10, -1, 0x40000900, 0xffffffff, 0),
 	SYSC_QUIRK("cpgmac", 0, 0x1200, 0x1208, 0x1204, 0x4edb1902,
@@ -1309,8 +1327,6 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 	SYSC_QUIRK("usbhstll", 0, 0, 0x10, 0x14, 0x00000008, 0xffffffff, 0),
 	SYSC_QUIRK("usb_host_hs", 0, 0, 0x10, 0x14, 0x50700100, 0xffffffff, 0),
 	SYSC_QUIRK("usb_host_hs", 0, 0, 0x10, -1, 0x50700101, 0xffffffff, 0),
-	SYSC_QUIRK("usb_otg_hs", 0, 0x400, 0x404, 0x408, 0x00000050,
-		   0xffffffff, 0),
 	SYSC_QUIRK("vfpe", 0, 0, 0x104, -1, 0x4d001200, 0xffffffff, 0),
 #endif
 };
@@ -1394,6 +1410,14 @@ static void sysc_clk_enable_quirk_hdq1w(struct sysc *ddata)
 	sysc_write(ddata, offset, val);
 }
 
+/* AESS (Audio Engine SubSystem) needs autogating set after enable */
+static void sysc_module_enable_quirk_aess(struct sysc *ddata)
+{
+	int offset = 0x7c;	/* AESS_AUTO_GATING_ENABLE */
+
+	sysc_write(ddata, offset, 1);
+}
+
 /* I2C needs extra enable bit toggling for reset */
 static void sysc_clk_quirk_i2c(struct sysc *ddata, bool enable)
 {
@@ -1476,6 +1500,9 @@ static void sysc_init_module_quirks(struct sysc *ddata)
 		return;
 	}
 
+	if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_AESS)
+		ddata->module_enable_quirk = sysc_module_enable_quirk_aess;
+
 	if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_SGX)
 		ddata->module_enable_quirk = sysc_module_enable_quirk_sgx;
 
@@ -1532,37 +1559,6 @@ static int sysc_legacy_init(struct sysc *ddata)
 	return error;
 }
 
-/**
- * sysc_rstctrl_reset_deassert - deassert rstctrl reset
- * @ddata: device driver data
- * @reset: reset before deassert
- *
- * A module can have both OCP softreset control and external rstctrl.
- * If more complicated rstctrl resets are needed, please handle these
- * directly from the child device driver and map only the module reset
- * for the parent interconnect target module device.
- *
- * Automatic reset of the module on init can be skipped with the
- * "ti,no-reset-on-init" device tree property.
- */
-static int sysc_rstctrl_reset_deassert(struct sysc *ddata, bool reset)
-{
-	int error;
-
-	if (!ddata->rsts)
-		return 0;
-
-	if (reset) {
-		error = reset_control_assert(ddata->rsts);
-		if (error)
-			return error;
-	}
-
-	reset_control_deassert(ddata->rsts);
-
-	return 0;
-}
-
 /*
  * Note that the caller must ensure the interconnect target module is enabled
  * before calling reset. Otherwise reset will not complete.
@@ -1594,6 +1590,10 @@ static int sysc_reset(struct sysc *ddata)
 	sysc_val |= sysc_mask;
 	sysc_write(ddata, sysc_offset, sysc_val);
 
+	if (ddata->cfg.srst_udelay)
+		usleep_range(ddata->cfg.srst_udelay,
+			     ddata->cfg.srst_udelay * 2);
+
 	if (ddata->clk_enable_quirk)
 		ddata->clk_enable_quirk(ddata);
 
@@ -1625,15 +1625,6 @@ static int sysc_reset(struct sysc *ddata)
 static int sysc_init_module(struct sysc *ddata)
 {
 	int error = 0;
-	bool manage_clocks = true;
-
-	error = sysc_rstctrl_reset_deassert(ddata, false);
-	if (error)
-		return error;
-
-	if (ddata->cfg.quirks &
-	    (SYSC_QUIRK_NO_IDLE | SYSC_QUIRK_NO_IDLE_ON_INIT))
-		manage_clocks = false;
 
 	error = sysc_clockdomain_init(ddata);
 	if (error)
@@ -1654,7 +1645,7 @@ static int sysc_init_module(struct sysc *ddata)
 		goto err_opt_clocks;
 
 	if (!(ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT)) {
-		error = sysc_rstctrl_reset_deassert(ddata, true);
+		error = reset_control_deassert(ddata->rsts);
 		if (error)
 			goto err_main_clocks;
 	}
@@ -1666,28 +1657,32 @@ static int sysc_init_module(struct sysc *ddata)
 	if (ddata->legacy_mode) {
 		error = sysc_legacy_init(ddata);
 		if (error)
-			goto err_main_clocks;
+			goto err_reset;
 	}
 
 	if (!ddata->legacy_mode) {
 		error = sysc_enable_module(ddata->dev);
 		if (error)
-			goto err_main_clocks;
+			goto err_reset;
 	}
 
 	error = sysc_reset(ddata);
 	if (error)
 		dev_err(ddata->dev, "Reset failed with %d\n", error);
 
-	if (!ddata->legacy_mode && manage_clocks)
+	if (error && !ddata->legacy_mode)
 		sysc_disable_module(ddata->dev);
 
+err_reset:
+	if (error && !(ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT))
+		reset_control_assert(ddata->rsts);
+
 err_main_clocks:
-	if (manage_clocks)
+	if (error)
 		sysc_disable_main_clocks(ddata);
 err_opt_clocks:
 	/* No re-enable of clockdomain autoidle to prevent module autoidle */
-	if (manage_clocks) {
+	if (error) {
 		sysc_disable_opt_clocks(ddata);
 		sysc_clkdm_allow_idle(ddata);
 	}
@@ -1794,9 +1789,8 @@ static int sysc_child_add_named_clock(struct sysc *ddata,
 
 	clk = clk_get(child, name);
 	if (!IS_ERR(clk)) {
-		clk_put(clk);
-
-		return -EEXIST;
+		error = -EEXIST;
+		goto put_clk;
 	}
 
 	clk = clk_get(ddata->dev, name);
@@ -1806,7 +1800,7 @@ static int sysc_child_add_named_clock(struct sysc *ddata,
 	l = clkdev_create(clk, name, dev_name(child));
 	if (!l)
 		error = -ENOMEM;
-
+put_clk:
 	clk_put(clk);
 
 	return error;
@@ -2460,10 +2454,17 @@ static int sysc_probe(struct platform_device *pdev)
 		goto unprepare;
 	}
 
-	/* Balance reset counts */
-	if (ddata->rsts)
+	/* Balance use counts as PM runtime should have enabled these all */
+	if (!(ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT))
 		reset_control_assert(ddata->rsts);
 
+	if (!(ddata->cfg.quirks &
+	      (SYSC_QUIRK_NO_IDLE | SYSC_QUIRK_NO_IDLE_ON_INIT))) {
+		sysc_disable_main_clocks(ddata);
+		sysc_disable_opt_clocks(ddata);
+		sysc_clkdm_allow_idle(ddata);
+	}
+
 	sysc_show_registers(ddata);
 
 	ddata->dev->type = &sysc_device_type;
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index ac42ae4651ce..faca0f346fff 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -996,6 +996,12 @@ static void cdrom_count_tracks(struct cdrom_device_info *cdi, tracktype *tracks)
 	tracks->xa = 0;
 	tracks->error = 0;
 	cd_dbg(CD_COUNT_TRACKS, "entering cdrom_count_tracks\n");
+
+	if (!CDROM_CAN(CDC_PLAY_AUDIO)) {
+		tracks->error = CDS_NO_INFO;
+		return;
+	}
+
 	/* Grab the TOC header so we can see how many tracks there are */
 	ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCHDR, &header);
 	if (ret) {
@@ -1162,7 +1168,8 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
 		ret = open_for_data(cdi);
 		if (ret)
 			goto err;
-		cdrom_mmc3_profile(cdi);
+		if (CDROM_CAN(CDC_GENERIC_PACKET))
+			cdrom_mmc3_profile(cdi);
 		if (mode & FMODE_WRITE) {
 			ret = -EROFS;
 			if (cdrom_open_write(cdi))
@@ -2882,6 +2889,9 @@ int cdrom_get_last_written(struct cdrom_device_info *cdi, long *last_written)
 	   it doesn't give enough information or fails. then we return
 	   the toc contents. */
 use_toc:
+	if (!CDROM_CAN(CDC_PLAY_AUDIO))
+		return -ENOSYS;
+
 	toc.cdte_format = CDROM_MSF;
 	toc.cdte_track = CDROM_LEADOUT;
 	if ((ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &toc)))
@@ -3007,9 +3017,31 @@ static noinline int mmc_ioctl_cdrom_read_audio(struct cdrom_device_info *cdi,
 	struct cdrom_read_audio ra;
 	int lba;
 
-	if (copy_from_user(&ra, (struct cdrom_read_audio __user *)arg,
-			   sizeof(ra)))
-		return -EFAULT;
+#ifdef CONFIG_COMPAT
+	if (in_compat_syscall()) {
+		struct compat_cdrom_read_audio {
+			union cdrom_addr	addr;
+			u8			addr_format;
+			compat_int_t		nframes;
+			compat_caddr_t		buf;
+		} ra32;
+
+		if (copy_from_user(&ra32, arg, sizeof(ra32)))
+			return -EFAULT;
+
+		ra = (struct cdrom_read_audio) {
+			.addr		= ra32.addr,
+			.addr_format	= ra32.addr_format,
+			.nframes	= ra32.nframes,
+			.buf		= compat_ptr(ra32.buf),
+		};
+	} else
+#endif
+	{
+		if (copy_from_user(&ra, (struct cdrom_read_audio __user *)arg,
+				   sizeof(ra)))
+			return -EFAULT;
+	}
 
 	if (ra.addr_format == CDROM_MSF)
 		lba = msf_to_lba(ra.addr.msf.minute,
@@ -3261,9 +3293,10 @@ static noinline int mmc_ioctl_cdrom_last_written(struct cdrom_device_info *cdi,
 	ret = cdrom_get_last_written(cdi, &last);
 	if (ret)
 		return ret;
-	if (copy_to_user((long __user *)arg, &last, sizeof(last)))
-		return -EFAULT;
-	return 0;
+	if (in_compat_syscall())
+		return put_user(last, (__s32 __user *)arg);
+
+	return put_user(last, (long __user *)arg);
 }
 
 static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 6626c84f66d1..886b2638c730 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -518,6 +518,9 @@ static const struct block_device_operations gdrom_bdops = {
 	.release		= gdrom_bdops_release,
 	.check_events		= gdrom_bdops_check_events,
 	.ioctl			= gdrom_bdops_ioctl,
+#ifdef CONFIG_COMPAT
+	.ioctl			= blkdev_compat_ptr_ioctl,
+#endif
 };
 
 static irqreturn_t gdrom_command_interrupt(int irq, void *dev_id)
@@ -742,7 +745,7 @@ static int probe_gdrom(struct platform_device *devptr)
 	int err;
 	/* Start the device */
 	if (gdrom_execute_diagnostic() != 1) {
-		pr_warning("ATA Probe for GDROM failed\n");
+		pr_warn("ATA Probe for GDROM failed\n");
 		return -ENODEV;
 	}
 	/* Print out firmware ID */
@@ -814,7 +817,7 @@ probe_fail_no_disk:
 probe_fail_no_mem:
 	unregister_blkdev(gdrom_major, GDROM_DEV_NAME);
 	gdrom_major = 0;
-	pr_warning("Probe failed - error is 0x%X\n", err);
+	pr_warn("Probe failed - error is 0x%X\n", err);
 	return err;
 }
 
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index df0fc997dc3e..26956c006987 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -439,8 +439,8 @@ config RAW_DRIVER
 	  Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O.
 	  See the raw(8) manpage for more details.
 
-          Applications should preferably open the device (eg /dev/hda1)
-          with the O_DIRECT flag.
+	  Applications should preferably open the device (eg /dev/hda1)
+	  with the O_DIRECT flag.
 
 config MAX_RAW_DEVS
 	int "Maximum number of RAW devices to support (1-65536)"
@@ -559,4 +559,4 @@ config RANDOM_TRUST_BOOTLOADER
 	device randomness. Say Y here to assume the entropy provided by the
 	booloader is trustworthy so it will be added to the kernel's entropy
 	pool. Otherwise, say N here so it will be regarded as device input that
-	only mixes the entropy pool.
-\ No newline at end of file
+	only mixes the entropy pool.
diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig
index 812d6aa6e013..bc54235a7022 100644
--- a/drivers/char/agp/Kconfig
+++ b/drivers/char/agp/Kconfig
@@ -63,7 +63,7 @@ config AGP_AMD64
 	  This option gives you AGP support for the GLX component of
 	  X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs.
 	  You still need an external AGP bridge like the AMD 8151, VIA
-          K8T400M, SiS755. It may also support other AGP bridges when loaded
+	  K8T400M, SiS755. It may also support other AGP bridges when loaded
 	  with agp_try_unsupported=1.
 
 config AGP_INTEL
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index f6955888e676..47098648502d 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -102,14 +102,13 @@ agp_segment_priv *agp_find_seg_in_client(const struct agp_client *client,
 					    int size, pgprot_t page_prot)
 {
 	struct agp_segment_priv *seg;
-	int num_segments, i;
+	int i;
 	off_t pg_start;
 	size_t pg_count;
 
 	pg_start = offset / 4096;
 	pg_count = size / 4096;
 	seg = *(client->segments);
-	num_segments = client->num_segments;
 
 	for (i = 0; i < client->num_segments; i++) {
 		if ((seg[i].pg_start == pg_start) &&
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index df1edb5ec0ad..9e84239f88d4 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -207,6 +207,7 @@ EXPORT_SYMBOL(agp_free_memory);
 /**
  *	agp_allocate_memory  -  allocate a group of pages of a certain type.
  *
+ *	@bridge: an agp_bridge_data struct allocated for the AGP host bridge.
  *	@page_count:	size_t argument of the number of pages
  *	@type:	u32 argument of the type of memory to be allocated.
  *
@@ -355,6 +356,7 @@ EXPORT_SYMBOL_GPL(agp_num_entries);
 /**
  *	agp_copy_info  -  copy bridge state information
  *
+ *	@bridge: an agp_bridge_data struct allocated for the AGP host bridge.
  *	@info:		agp_kern_info pointer.  The caller should insure that this pointer is valid.
  *
  *	This function copies information about the agp bridge device and the state of
@@ -850,7 +852,6 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge)
 {
 	char *table;
 	char *table_end;
-	int size;
 	int page_order;
 	int num_entries;
 	int i;
@@ -864,25 +865,22 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge)
 	table = NULL;
 	i = bridge->aperture_size_idx;
 	temp = bridge->current_size;
-	size = page_order = num_entries = 0;
+	page_order = num_entries = 0;
 
 	if (bridge->driver->size_type != FIXED_APER_SIZE) {
 		do {
 			switch (bridge->driver->size_type) {
 			case U8_APER_SIZE:
-				size = A_SIZE_8(temp)->size;
 				page_order =
 				    A_SIZE_8(temp)->page_order;
 				num_entries =
 				    A_SIZE_8(temp)->num_entries;
 				break;
 			case U16_APER_SIZE:
-				size = A_SIZE_16(temp)->size;
 				page_order = A_SIZE_16(temp)->page_order;
 				num_entries = A_SIZE_16(temp)->num_entries;
 				break;
 			case U32_APER_SIZE:
-				size = A_SIZE_32(temp)->size;
 				page_order = A_SIZE_32(temp)->page_order;
 				num_entries = A_SIZE_32(temp)->num_entries;
 				break;
@@ -890,7 +888,7 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge)
 			case FIXED_APER_SIZE:
 			case LVL2_APER_SIZE:
 			default:
-				size = page_order = num_entries = 0;
+				page_order = num_entries = 0;
 				break;
 			}
 
@@ -920,7 +918,6 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge)
 			}
 		} while (!table && (i < bridge->driver->num_aperture_sizes));
 	} else {
-		size = ((struct aper_size_info_fixed *) temp)->size;
 		page_order = ((struct aper_size_info_fixed *) temp)->page_order;
 		num_entries = ((struct aper_size_info_fixed *) temp)->num_entries;
 		table = alloc_gatt_pages(page_order);
@@ -944,7 +941,7 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge)
 
 	bridge->gatt_table = (u32 __iomem *)table;
 #else
-	bridge->gatt_table = ioremap_nocache(virt_to_phys(table),
+	bridge->gatt_table = ioremap(virt_to_phys(table),
 					(PAGE_SIZE * (1 << page_order)));
 	bridge->driver->cache_flush();
 #endif
@@ -1282,6 +1279,7 @@ EXPORT_SYMBOL(agp_generic_destroy_page);
 /**
  * agp_enable  -  initialise the agp point-to-point connection.
  *
+ * @bridge: an agp_bridge_data struct allocated for the AGP host bridge.
  * @mode:	agp mode register value to configure with.
  */
 void agp_enable(struct agp_bridge_data *bridge, u32 mode)
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index c6271ce250b3..66a62d17a3f5 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -1087,7 +1087,7 @@ static void intel_i9xx_setup_flush(void)
 	}
 
 	if (intel_private.ifp_resource.start)
-		intel_private.i9xx_flush_page = ioremap_nocache(intel_private.ifp_resource.start, PAGE_SIZE);
+		intel_private.i9xx_flush_page = ioremap(intel_private.ifp_resource.start, PAGE_SIZE);
 	if (!intel_private.i9xx_flush_page)
 		dev_err(&intel_private.pcidev->dev,
 			"can't ioremap flush page - no chipset flushing\n");
diff --git a/drivers/char/agp/isoch.c b/drivers/char/agp/isoch.c
index 31c374b1b91b..7ecf20a6d19c 100644
--- a/drivers/char/agp/isoch.c
+++ b/drivers/char/agp/isoch.c
@@ -84,7 +84,6 @@ static int agp_3_5_isochronous_node_enable(struct agp_bridge_data *bridge,
 	unsigned int cdev = 0;
 	u32 mnistat, tnistat, tstatus, mcmd;
 	u16 tnicmd, mnicmd;
-	u8 mcapndx;
 	u32 tot_bw = 0, tot_n = 0, tot_rq = 0, y_max, rq_isoch, rq_async;
 	u32 step, rem, rem_isoch, rem_async;
 	int ret = 0;
@@ -138,8 +137,6 @@ static int agp_3_5_isochronous_node_enable(struct agp_bridge_data *bridge,
 		cur = list_entry(pos, struct agp_3_5_dev, list);
 		dev = cur->dev;
 
-		mcapndx = cur->capndx;
-
 		pci_read_config_dword(dev, cur->capndx+AGPNISTAT, &mnistat);
 
 		master[cdev].maxbw = (mnistat >> 16) & 0xff;
@@ -251,8 +248,6 @@ static int agp_3_5_isochronous_node_enable(struct agp_bridge_data *bridge,
 		cur = master[cdev].dev;
 		dev = cur->dev;
 
-		mcapndx = cur->capndx;
-
 		master[cdev].rq += (cdev == ndevs - 1)
 		              ? (rem_async + rem_isoch) : step;
 
@@ -319,7 +314,7 @@ int agp_3_5_enable(struct agp_bridge_data *bridge)
 {
 	struct pci_dev *td = bridge->dev, *dev = NULL;
 	u8 mcapndx;
-	u32 isoch, arqsz;
+	u32 isoch;
 	u32 tstatus, mstatus, ncapid;
 	u32 mmajor;
 	u16 mpstat;
@@ -334,8 +329,6 @@ int agp_3_5_enable(struct agp_bridge_data *bridge)
 	if (isoch == 0)	/* isoch xfers not available, bail out. */
 		return -ENODEV;
 
-	arqsz     = (tstatus >> 13) & 0x7;
-
 	/*
 	 * Allocate a head for our AGP 3.5 device list
 	 * (multiple AGP v3 devices are allowed behind a single bridge).
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index eb108b3c619a..51121a4b82c7 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -204,7 +204,7 @@ static int __init applicom_init(void)
 		if (pci_enable_device(dev))
 			return -EIO;
 
-		RamIO = ioremap_nocache(pci_resource_start(dev, 0), LEN_RAM_IO);
+		RamIO = ioremap(pci_resource_start(dev, 0), LEN_RAM_IO);
 
 		if (!RamIO) {
 			printk(KERN_INFO "ac.o: Failed to ioremap PCI memory "
@@ -259,7 +259,7 @@ static int __init applicom_init(void)
 	/* Now try the specified ISA cards */
 
 	for (i = 0; i < MAX_ISA_BOARD; i++) {
-		RamIO = ioremap_nocache(mem + (LEN_RAM_IO * i), LEN_RAM_IO);
+		RamIO = ioremap(mem + (LEN_RAM_IO * i), LEN_RAM_IO);
 
 		if (!RamIO) {
 			printk(KERN_INFO "ac.o: Failed to ioremap the ISA card's memory space (slot #%d)\n", i + 1);
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 9ac6671bb514..aed2c45f7968 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -110,7 +110,7 @@ struct hpets {
 	unsigned long hp_delta;
 	unsigned int hp_ntimer;
 	unsigned int hp_which;
-	struct hpet_dev hp_dev[1];
+	struct hpet_dev hp_dev[];
 };
 
 static struct hpets *hpets;
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 59f25286befe..914e293ba62b 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -90,7 +90,7 @@ config HW_RANDOM_BCM2835
 
 config HW_RANDOM_IPROC_RNG200
 	tristate "Broadcom iProc/STB RNG200 support"
-	depends on ARCH_BCM_IPROC || ARCH_BRCMSTB
+	depends on ARCH_BCM_IPROC || ARCH_BCM2835 || ARCH_BRCMSTB
 	default HW_RANDOM
 	---help---
 	  This driver provides kernel-side support for the RNG200
@@ -308,6 +308,19 @@ config HW_RANDOM_HISI
 
 	  If unsure, say Y.
 
+config HW_RANDOM_HISI_V2
+	tristate "HiSilicon True Random Number Generator V2 support"
+	depends on HW_RANDOM && ARM64 && ACPI
+	default HW_RANDOM
+	help
+	  This driver provides kernel-side support for the True Random Number
+	  Generator V2 hardware found on HiSilicon Hi1620 SoC.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called hisi-trng-v2.
+
+	  If unsure, say Y.
+
 config HW_RANDOM_ST
 	tristate "ST Microelectronics HW Random Number Generator support"
 	depends on HW_RANDOM && ARCH_STI
@@ -373,17 +386,17 @@ config HW_RANDOM_MESON
 	  If unsure, say Y.
 
 config HW_RANDOM_CAVIUM
-       tristate "Cavium ThunderX Random Number Generator support"
-       depends on HW_RANDOM && PCI && (ARM64 || (COMPILE_TEST && 64BIT))
-       default HW_RANDOM
-       ---help---
-         This driver provides kernel-side support for the Random Number
-         Generator hardware found on Cavium SoCs.
+	tristate "Cavium ThunderX Random Number Generator support"
+	depends on HW_RANDOM && PCI && (ARM64 || (COMPILE_TEST && 64BIT))
+	default HW_RANDOM
+	---help---
+	  This driver provides kernel-side support for the Random Number
+	  Generator hardware found on Cavium SoCs.
 
-         To compile this driver as a module, choose M here: the
-         module will be called cavium_rng.
+	  To compile this driver as a module, choose M here: the
+	  module will be called cavium_rng.
 
-         If unsure, say Y.
+	  If unsure, say Y.
 
 config HW_RANDOM_MTK
 	tristate "Mediatek Random Number Generator support"
@@ -440,6 +453,19 @@ config HW_RANDOM_OPTEE
 
 	  If unsure, say Y.
 
+config HW_RANDOM_NPCM
+	tristate "NPCM Random Number Generator support"
+	depends on ARCH_NPCM || COMPILE_TEST
+	default HW_RANDOM
+	help
+ 	  This driver provides support for the Random Number
+	  Generator hardware available in Nuvoton NPCM SoCs.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called npcm-rng.
+
+ 	  If unsure, say Y.
+
 endif # HW_RANDOM
 
 config UML_RANDOM
@@ -458,7 +484,7 @@ config UML_RANDOM
 	  /dev/hwrng and injects the entropy into /dev/random.
 
 config HW_RANDOM_KEYSTONE
-	depends on ARCH_KEYSTONE
+	depends on ARCH_KEYSTONE || COMPILE_TEST
 	default HW_RANDOM
 	tristate "TI Keystone NETCP SA Hardware random number generator"
 	help
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 7c9ef4a7667f..a7801b49ce6c 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o
 obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o
 obj-$(CONFIG_HW_RANDOM_POWERNV) += powernv-rng.o
 obj-$(CONFIG_HW_RANDOM_HISI)	+= hisi-rng.o
+obj-$(CONFIG_HW_RANDOM_HISI_V2) += hisi-trng-v2.o
 obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o
 obj-$(CONFIG_HW_RANDOM_IPROC_RNG200) += iproc-rng200.o
 obj-$(CONFIG_HW_RANDOM_ST) += st-rng.o
@@ -39,3 +40,4 @@ obj-$(CONFIG_HW_RANDOM_MTK)	+= mtk-rng.o
 obj-$(CONFIG_HW_RANDOM_S390) += s390-trng.o
 obj-$(CONFIG_HW_RANDOM_KEYSTONE) += ks-sa-rng.o
 obj-$(CONFIG_HW_RANDOM_OPTEE) += optee-rng.o
+obj-$(CONFIG_HW_RANDOM_NPCM) += npcm-rng.o
diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index e55705745d5e..ecb71c4317a5 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -14,14 +14,22 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/hw_random.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 
 #define TRNG_CR		0x00
+#define TRNG_MR		0x04
 #define TRNG_ISR	0x1c
 #define TRNG_ODATA	0x50
 
 #define TRNG_KEY	0x524e4700 /* RNG */
 
+#define TRNG_HALFR	BIT(0) /* generate RN every 168 cycles */
+
+struct atmel_trng_data {
+	bool has_half_rate;
+};
+
 struct atmel_trng {
 	struct clk *clk;
 	void __iomem *base;
@@ -62,21 +70,31 @@ static void atmel_trng_disable(struct atmel_trng *trng)
 static int atmel_trng_probe(struct platform_device *pdev)
 {
 	struct atmel_trng *trng;
-	struct resource *res;
+	const struct atmel_trng_data *data;
 	int ret;
 
 	trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
 	if (!trng)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	trng->base = devm_ioremap_resource(&pdev->dev, res);
+	trng->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(trng->base))
 		return PTR_ERR(trng->base);
 
 	trng->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(trng->clk))
 		return PTR_ERR(trng->clk);
+	data = of_device_get_match_data(&pdev->dev);
+	if (!data)
+		return -ENODEV;
+
+	if (data->has_half_rate) {
+		unsigned long rate = clk_get_rate(trng->clk);
+
+		/* if peripheral clk is above 100MHz, set HALFR */
+		if (rate > 100000000)
+			writel(TRNG_HALFR, trng->base + TRNG_MR);
+	}
 
 	ret = clk_prepare_enable(trng->clk);
 	if (ret)
@@ -141,9 +159,24 @@ static const struct dev_pm_ops atmel_trng_pm_ops = {
 };
 #endif /* CONFIG_PM */
 
+static const struct atmel_trng_data at91sam9g45_config = {
+	.has_half_rate = false,
+};
+
+static const struct atmel_trng_data sam9x60_config = {
+	.has_half_rate = true,
+};
+
 static const struct of_device_id atmel_trng_dt_ids[] = {
-	{ .compatible = "atmel,at91sam9g45-trng" },
-	{ /* sentinel */ }
+	{
+		.compatible = "atmel,at91sam9g45-trng",
+		.data = &at91sam9g45_config,
+	}, {
+		.compatible = "microchip,sam9x60-trng",
+		.data = &sam9x60_config,
+	}, {
+		/* sentinel */
+	}
 };
 MODULE_DEVICE_TABLE(of, atmel_trng_dt_ids);
 
diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c
index f759790c3cdb..d2a5791eb49f 100644
--- a/drivers/char/hw_random/bcm2835-rng.c
+++ b/drivers/char/hw_random/bcm2835-rng.c
@@ -142,7 +142,6 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
 	struct device_node *np = dev->of_node;
 	const struct of_device_id *rng_id;
 	struct bcm2835_rng_priv *priv;
-	struct resource *r;
 	int err;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -151,10 +150,8 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, priv);
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
 	/* map peripheral */
-	priv->base = devm_ioremap_resource(dev, r);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base))
 		return PTR_ERR(priv->base);
 
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 8d53b8ef545c..d2d7a42d7e0d 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -111,6 +111,14 @@ static void drop_current_rng(void)
 }
 
 /* Returns ERR_PTR(), NULL or refcounted hwrng */
+static struct hwrng *get_current_rng_nolock(void)
+{
+	if (current_rng)
+		kref_get(&current_rng->ref);
+
+	return current_rng;
+}
+
 static struct hwrng *get_current_rng(void)
 {
 	struct hwrng *rng;
@@ -118,9 +126,7 @@ static struct hwrng *get_current_rng(void)
 	if (mutex_lock_interruptible(&rng_mutex))
 		return ERR_PTR(-ERESTARTSYS);
 
-	rng = current_rng;
-	if (rng)
-		kref_get(&rng->ref);
+	rng = get_current_rng_nolock();
 
 	mutex_unlock(&rng_mutex);
 	return rng;
@@ -155,8 +161,6 @@ static int hwrng_init(struct hwrng *rng)
 	reinit_completion(&rng->cleanup_done);
 
 skip_init:
-	add_early_randomness(rng);
-
 	current_quality = rng->quality ? : default_quality;
 	if (current_quality > 1024)
 		current_quality = 1024;
@@ -320,12 +324,13 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
 					const char *buf, size_t len)
 {
 	int err = -ENODEV;
-	struct hwrng *rng;
+	struct hwrng *rng, *old_rng, *new_rng;
 
 	err = mutex_lock_interruptible(&rng_mutex);
 	if (err)
 		return -ERESTARTSYS;
 
+	old_rng = current_rng;
 	if (sysfs_streq(buf, "")) {
 		err = enable_best_rng();
 	} else {
@@ -337,9 +342,15 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
 			}
 		}
 	}
-
+	new_rng = get_current_rng_nolock();
 	mutex_unlock(&rng_mutex);
 
+	if (new_rng) {
+		if (new_rng != old_rng)
+			add_early_randomness(new_rng);
+		put_rng(new_rng);
+	}
+
 	return err ? : len;
 }
 
@@ -457,13 +468,15 @@ static void start_khwrngd(void)
 int hwrng_register(struct hwrng *rng)
 {
 	int err = -EINVAL;
-	struct hwrng *old_rng, *tmp;
+	struct hwrng *tmp;
 	struct list_head *rng_list_ptr;
+	bool is_new_current = false;
 
 	if (!rng->name || (!rng->data_read && !rng->read))
 		goto out;
 
 	mutex_lock(&rng_mutex);
+
 	/* Must not register two RNGs with the same name. */
 	err = -EEXIST;
 	list_for_each_entry(tmp, &rng_list, list) {
@@ -482,10 +495,8 @@ int hwrng_register(struct hwrng *rng)
 	}
 	list_add_tail(&rng->list, rng_list_ptr);
 
-	old_rng = current_rng;
-	err = 0;
-	if (!old_rng ||
-	    (!cur_rng_set_by_user && rng->quality > old_rng->quality)) {
+	if (!current_rng ||
+	    (!cur_rng_set_by_user && rng->quality > current_rng->quality)) {
 		/*
 		 * Set new rng as current as the new rng source
 		 * provides better entropy quality and was not
@@ -494,19 +505,26 @@ int hwrng_register(struct hwrng *rng)
 		err = set_current_rng(rng);
 		if (err)
 			goto out_unlock;
+		/* to use current_rng in add_early_randomness() we need
+		 * to take a ref
+		 */
+		is_new_current = true;
+		kref_get(&rng->ref);
 	}
-
-	if (old_rng && !rng->init) {
+	mutex_unlock(&rng_mutex);
+	if (is_new_current || !rng->init) {
 		/*
 		 * Use a new device's input to add some randomness to
 		 * the system.  If this rng device isn't going to be
 		 * used right away, its init function hasn't been
-		 * called yet; so only use the randomness from devices
-		 * that don't need an init callback.
+		 * called yet by set_current_rng(); so only use the
+		 * randomness from devices that don't need an init callback
 		 */
 		add_early_randomness(rng);
 	}
-
+	if (is_new_current)
+		put_rng(rng);
+	return 0;
 out_unlock:
 	mutex_unlock(&rng_mutex);
 out:
@@ -516,10 +534,12 @@ EXPORT_SYMBOL_GPL(hwrng_register);
 
 void hwrng_unregister(struct hwrng *rng)
 {
+	struct hwrng *old_rng, *new_rng;
 	int err;
 
 	mutex_lock(&rng_mutex);
 
+	old_rng = current_rng;
 	list_del(&rng->list);
 	if (current_rng == rng) {
 		err = enable_best_rng();
@@ -529,6 +549,7 @@ void hwrng_unregister(struct hwrng *rng)
 		}
 	}
 
+	new_rng = get_current_rng_nolock();
 	if (list_empty(&rng_list)) {
 		mutex_unlock(&rng_mutex);
 		if (hwrng_fill)
@@ -536,6 +557,12 @@ void hwrng_unregister(struct hwrng *rng)
 	} else
 		mutex_unlock(&rng_mutex);
 
+	if (new_rng) {
+		if (old_rng != new_rng)
+			add_early_randomness(new_rng);
+		put_rng(new_rng);
+	}
+
 	wait_for_completion(&rng->cleanup_done);
 }
 EXPORT_SYMBOL_GPL(hwrng_unregister);
diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c
index b4b52ab23b6b..8e1fe3f8dd2d 100644
--- a/drivers/char/hw_random/exynos-trng.c
+++ b/drivers/char/hw_random/exynos-trng.c
@@ -109,7 +109,6 @@ static int exynos_trng_init(struct hwrng *rng)
 static int exynos_trng_probe(struct platform_device *pdev)
 {
 	struct exynos_trng_dev *trng;
-	struct resource *res;
 	int ret = -ENOMEM;
 
 	trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
@@ -128,8 +127,7 @@ static int exynos_trng_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, trng);
 	trng->dev = &pdev->dev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	trng->mem = devm_ioremap_resource(&pdev->dev, res);
+	trng->mem = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(trng->mem))
 		return PTR_ERR(trng->mem);
 
diff --git a/drivers/char/hw_random/hisi-rng.c b/drivers/char/hw_random/hisi-rng.c
index c663d5dd85bb..6815e17a9834 100644
--- a/drivers/char/hw_random/hisi-rng.c
+++ b/drivers/char/hw_random/hisi-rng.c
@@ -73,7 +73,6 @@ static int hisi_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
 static int hisi_rng_probe(struct platform_device *pdev)
 {
 	struct hisi_rng *rng;
-	struct resource *res;
 	int ret;
 
 	rng = devm_kzalloc(&pdev->dev, sizeof(*rng), GFP_KERNEL);
@@ -82,8 +81,7 @@ static int hisi_rng_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, rng);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	rng->base = devm_ioremap_resource(&pdev->dev, res);
+	rng->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(rng->base))
 		return PTR_ERR(rng->base);
 
diff --git a/drivers/char/hw_random/hisi-trng-v2.c b/drivers/char/hw_random/hisi-trng-v2.c
new file mode 100644
index 000000000000..6a65b8232ce0
--- /dev/null
+++ b/drivers/char/hw_random/hisi-trng-v2.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 HiSilicon Limited. */
+
+#include <linux/acpi.h>
+#include <linux/err.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/random.h>
+
+#define HISI_TRNG_REG		0x00F0
+#define HISI_TRNG_BYTES		4
+#define HISI_TRNG_QUALITY	512
+#define SLEEP_US		10
+#define TIMEOUT_US		10000
+
+struct hisi_trng {
+	void __iomem *base;
+	struct hwrng rng;
+};
+
+static int hisi_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
+{
+	struct hisi_trng *trng;
+	int currsize = 0;
+	u32 val = 0;
+	u32 ret;
+
+	trng = container_of(rng, struct hisi_trng, rng);
+
+	do {
+		ret = readl_poll_timeout(trng->base + HISI_TRNG_REG, val,
+					 val, SLEEP_US, TIMEOUT_US);
+		if (ret)
+			return currsize;
+
+		if (max - currsize >= HISI_TRNG_BYTES) {
+			memcpy(buf + currsize, &val, HISI_TRNG_BYTES);
+			currsize += HISI_TRNG_BYTES;
+			if (currsize == max)
+				return currsize;
+			continue;
+		}
+
+		/* copy remaining bytes */
+		memcpy(buf + currsize, &val, max - currsize);
+		currsize = max;
+	} while (currsize < max);
+
+	return currsize;
+}
+
+static int hisi_trng_probe(struct platform_device *pdev)
+{
+	struct hisi_trng *trng;
+	int ret;
+
+	trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
+	if (!trng)
+		return -ENOMEM;
+
+	trng->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(trng->base))
+		return PTR_ERR(trng->base);
+
+	trng->rng.name = pdev->name;
+	trng->rng.read = hisi_trng_read;
+	trng->rng.quality = HISI_TRNG_QUALITY;
+
+	ret = devm_hwrng_register(&pdev->dev, &trng->rng);
+	if (ret)
+		dev_err(&pdev->dev, "failed to register hwrng!\n");
+
+	return ret;
+}
+
+static const struct acpi_device_id hisi_trng_acpi_match[] = {
+	{ "HISI02B3", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, hisi_trng_acpi_match);
+
+static struct platform_driver hisi_trng_driver = {
+	.probe		= hisi_trng_probe,
+	.driver		= {
+		.name	= "hisi-trng-v2",
+		.acpi_match_table = ACPI_PTR(hisi_trng_acpi_match),
+	},
+};
+
+module_platform_driver(hisi_trng_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Weili Qian <qianweili@huawei.com>");
+MODULE_AUTHOR("Zaibo Xu <xuzaibo@huawei.com>");
+MODULE_DESCRIPTION("HiSilicon true random number generator V2 driver");
diff --git a/drivers/char/hw_random/intel-rng.c b/drivers/char/hw_random/intel-rng.c
index 290c880266bf..9f205bd1acc0 100644
--- a/drivers/char/hw_random/intel-rng.c
+++ b/drivers/char/hw_random/intel-rng.c
@@ -317,7 +317,7 @@ PFX "RNG, try using the 'no_fwh_detect' option.\n";
 		return -EBUSY;
 	}
 
-	intel_rng_hw->mem = ioremap_nocache(INTEL_FWH_ADDR, INTEL_FWH_ADDR_LEN);
+	intel_rng_hw->mem = ioremap(INTEL_FWH_ADDR, INTEL_FWH_ADDR_LEN);
 	if (intel_rng_hw->mem == NULL)
 		return -EBUSY;
 
diff --git a/drivers/char/hw_random/iproc-rng200.c b/drivers/char/hw_random/iproc-rng200.c
index 92be1c0ab99f..32d9fe61a225 100644
--- a/drivers/char/hw_random/iproc-rng200.c
+++ b/drivers/char/hw_random/iproc-rng200.c
@@ -181,7 +181,6 @@ static void iproc_rng200_cleanup(struct hwrng *rng)
 static int iproc_rng200_probe(struct platform_device *pdev)
 {
 	struct iproc_rng200_dev *priv;
-	struct resource *res;
 	struct device *dev = &pdev->dev;
 	int ret;
 
@@ -190,13 +189,7 @@ static int iproc_rng200_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	/* Map peripheral */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(dev, "failed to get rng resources\n");
-		return -EINVAL;
-	}
-
-	priv->base = devm_ioremap_resource(dev, res);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base)) {
 		dev_err(dev, "failed to remap rng regs\n");
 		return PTR_ERR(priv->base);
@@ -220,6 +213,7 @@ static int iproc_rng200_probe(struct platform_device *pdev)
 }
 
 static const struct of_device_id iproc_rng200_of_match[] = {
+	{ .compatible = "brcm,bcm2711-rng200", },
 	{ .compatible = "brcm,bcm7211-rng200", },
 	{ .compatible = "brcm,bcm7278-rng200", },
 	{ .compatible = "brcm,iproc-rng200", },
diff --git a/drivers/char/hw_random/ks-sa-rng.c b/drivers/char/hw_random/ks-sa-rng.c
index a67430010aa6..e2330e757f1f 100644
--- a/drivers/char/hw_random/ks-sa-rng.c
+++ b/drivers/char/hw_random/ks-sa-rng.c
@@ -21,6 +21,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/delay.h>
+#include <linux/timekeeping.h>
 
 #define SA_CMD_STATUS_OFS			0x8
 
@@ -84,14 +85,37 @@ struct ks_sa_rng {
 	struct hwrng	rng;
 	struct clk	*clk;
 	struct regmap	*regmap_cfg;
-	struct trng_regs *reg_rng;
+	struct trng_regs __iomem *reg_rng;
+	u64 ready_ts;
+	unsigned int refill_delay_ns;
 };
 
+static unsigned int cycles_to_ns(unsigned long clk_rate, unsigned int cycles)
+{
+	return DIV_ROUND_UP_ULL((TRNG_DEF_CLK_DIV_CYCLES + 1) * 1000000000ull *
+				cycles, clk_rate);
+}
+
+static unsigned int startup_delay_ns(unsigned long clk_rate)
+{
+	if (!TRNG_DEF_STARTUP_CYCLES)
+		return cycles_to_ns(clk_rate, BIT(24));
+	return cycles_to_ns(clk_rate, 256 * TRNG_DEF_STARTUP_CYCLES);
+}
+
+static unsigned int refill_delay_ns(unsigned long clk_rate)
+{
+	if (!TRNG_DEF_MAX_REFILL_CYCLES)
+		return cycles_to_ns(clk_rate, BIT(24));
+	return cycles_to_ns(clk_rate, 256 * TRNG_DEF_MAX_REFILL_CYCLES);
+}
+
 static int ks_sa_rng_init(struct hwrng *rng)
 {
 	u32 value;
 	struct device *dev = (struct device *)rng->priv;
 	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+	unsigned long clk_rate = clk_get_rate(ks_sa_rng->clk);
 
 	/* Enable RNG module */
 	regmap_write_bits(ks_sa_rng->regmap_cfg, SA_CMD_STATUS_OFS,
@@ -120,6 +144,10 @@ static int ks_sa_rng_init(struct hwrng *rng)
 	value |= TRNG_CNTL_REG_TRNG_ENABLE;
 	writel(value, &ks_sa_rng->reg_rng->control);
 
+	ks_sa_rng->refill_delay_ns = refill_delay_ns(clk_rate);
+	ks_sa_rng->ready_ts = ktime_get_ns() +
+			      startup_delay_ns(clk_rate);
+
 	return 0;
 }
 
@@ -144,6 +172,7 @@ static int ks_sa_rng_data_read(struct hwrng *rng, u32 *data)
 	data[1] = readl(&ks_sa_rng->reg_rng->output_h);
 
 	writel(TRNG_INTACK_REG_READY, &ks_sa_rng->reg_rng->intack);
+	ks_sa_rng->ready_ts = ktime_get_ns() + ks_sa_rng->refill_delay_ns;
 
 	return sizeof(u32) * 2;
 }
@@ -152,10 +181,19 @@ static int ks_sa_rng_data_present(struct hwrng *rng, int wait)
 {
 	struct device *dev = (struct device *)rng->priv;
 	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+	u64 now = ktime_get_ns();
 
 	u32	ready;
 	int	j;
 
+	if (wait && now < ks_sa_rng->ready_ts) {
+		/* Max delay expected here is 81920000 ns */
+		unsigned long min_delay =
+			DIV_ROUND_UP((u32)(ks_sa_rng->ready_ts - now), 1000);
+
+		usleep_range(min_delay, min_delay + SA_RNG_DATA_RETRY_DELAY);
+	}
+
 	for (j = 0; j < SA_MAX_RNG_DATA_RETRIES; j++) {
 		ready = readl(&ks_sa_rng->reg_rng->status);
 		ready &= TRNG_STATUS_REG_READY;
@@ -174,7 +212,6 @@ static int ks_sa_rng_probe(struct platform_device *pdev)
 	struct ks_sa_rng	*ks_sa_rng;
 	struct device		*dev = &pdev->dev;
 	int			ret;
-	struct resource		*mem;
 
 	ks_sa_rng = devm_kzalloc(dev, sizeof(*ks_sa_rng), GFP_KERNEL);
 	if (!ks_sa_rng)
@@ -190,8 +227,7 @@ static int ks_sa_rng_probe(struct platform_device *pdev)
 	};
 	ks_sa_rng->rng.priv = (unsigned long)dev;
 
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ks_sa_rng->reg_rng = devm_ioremap_resource(dev, mem);
+	ks_sa_rng->reg_rng = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(ks_sa_rng->reg_rng))
 		return PTR_ERR(ks_sa_rng->reg_rng);
 
diff --git a/drivers/char/hw_random/meson-rng.c b/drivers/char/hw_random/meson-rng.c
index 76e693da5dde..e446236e81f2 100644
--- a/drivers/char/hw_random/meson-rng.c
+++ b/drivers/char/hw_random/meson-rng.c
@@ -42,7 +42,6 @@ static int meson_rng_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct meson_rng_data *data;
-	struct resource *res;
 	int ret;
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
@@ -51,8 +50,7 @@ static int meson_rng_probe(struct platform_device *pdev)
 
 	data->pdev = pdev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	data->base = devm_ioremap_resource(dev, res);
+	data->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(data->base))
 		return PTR_ERR(data->base);
 
diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c
index e649be5a5f13..8ad7b515a51b 100644
--- a/drivers/char/hw_random/mtk-rng.c
+++ b/drivers/char/hw_random/mtk-rng.c
@@ -105,16 +105,9 @@ static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
 
 static int mtk_rng_probe(struct platform_device *pdev)
 {
-	struct resource *res;
 	int ret;
 	struct mtk_rng *priv;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "no iomem resource\n");
-		return -ENXIO;
-	}
-
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
@@ -135,7 +128,7 @@ static int mtk_rng_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base))
 		return PTR_ERR(priv->base);
 
diff --git a/drivers/char/hw_random/npcm-rng.c b/drivers/char/hw_random/npcm-rng.c
new file mode 100644
index 000000000000..01d04404d8c0
--- /dev/null
+++ b/drivers/char/hw_random/npcm-rng.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Nuvoton Technology corporation.
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/hw_random.h>
+#include <linux/delay.h>
+#include <linux/of_irq.h>
+#include <linux/pm_runtime.h>
+
+#define NPCM_RNGCS_REG		0x00	/* Control and status register */
+#define NPCM_RNGD_REG		0x04	/* Data register */
+#define NPCM_RNGMODE_REG	0x08	/* Mode register */
+
+#define NPCM_RNG_CLK_SET_25MHZ	GENMASK(4, 3) /* 20-25 MHz */
+#define NPCM_RNG_DATA_VALID	BIT(1)
+#define NPCM_RNG_ENABLE		BIT(0)
+#define NPCM_RNG_M1ROSEL	BIT(1)
+
+#define NPCM_RNG_TIMEOUT_USEC	20000
+#define NPCM_RNG_POLL_USEC	1000
+
+#define to_npcm_rng(p)	container_of(p, struct npcm_rng, rng)
+
+struct npcm_rng {
+	void __iomem *base;
+	struct hwrng rng;
+};
+
+static int npcm_rng_init(struct hwrng *rng)
+{
+	struct npcm_rng *priv = to_npcm_rng(rng);
+
+	writel(NPCM_RNG_CLK_SET_25MHZ | NPCM_RNG_ENABLE,
+	       priv->base + NPCM_RNGCS_REG);
+
+	return 0;
+}
+
+static void npcm_rng_cleanup(struct hwrng *rng)
+{
+	struct npcm_rng *priv = to_npcm_rng(rng);
+
+	writel(NPCM_RNG_CLK_SET_25MHZ, priv->base + NPCM_RNGCS_REG);
+}
+
+static int npcm_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
+{
+	struct npcm_rng *priv = to_npcm_rng(rng);
+	int retval = 0;
+	int ready;
+
+	pm_runtime_get_sync((struct device *)priv->rng.priv);
+
+	while (max >= sizeof(u32)) {
+		if (wait) {
+			if (readl_poll_timeout(priv->base + NPCM_RNGCS_REG,
+					       ready,
+					       ready & NPCM_RNG_DATA_VALID,
+					       NPCM_RNG_POLL_USEC,
+					       NPCM_RNG_TIMEOUT_USEC))
+				break;
+		} else {
+			if ((readl(priv->base + NPCM_RNGCS_REG) &
+			    NPCM_RNG_DATA_VALID) == 0)
+				break;
+		}
+
+		*(u32 *)buf = readl(priv->base + NPCM_RNGD_REG);
+		retval += sizeof(u32);
+		buf += sizeof(u32);
+		max -= sizeof(u32);
+	}
+
+	pm_runtime_mark_last_busy((struct device *)priv->rng.priv);
+	pm_runtime_put_sync_autosuspend((struct device *)priv->rng.priv);
+
+	return retval || !wait ? retval : -EIO;
+}
+
+static int npcm_rng_probe(struct platform_device *pdev)
+{
+	struct npcm_rng *priv;
+	int ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	dev_set_drvdata(&pdev->dev, priv);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, 100);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+#ifndef CONFIG_PM
+	priv->rng.init = npcm_rng_init;
+	priv->rng.cleanup = npcm_rng_cleanup;
+#endif
+	priv->rng.name = pdev->name;
+	priv->rng.read = npcm_rng_read;
+	priv->rng.priv = (unsigned long)&pdev->dev;
+	priv->rng.quality = 1000;
+
+	writel(NPCM_RNG_M1ROSEL, priv->base + NPCM_RNGMODE_REG);
+
+	ret = devm_hwrng_register(&pdev->dev, &priv->rng);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to register rng device: %d\n",
+			ret);
+		pm_runtime_disable(&pdev->dev);
+		pm_runtime_set_suspended(&pdev->dev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int npcm_rng_remove(struct platform_device *pdev)
+{
+	struct npcm_rng *priv = platform_get_drvdata(pdev);
+
+	devm_hwrng_unregister(&pdev->dev, &priv->rng);
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int npcm_rng_runtime_suspend(struct device *dev)
+{
+	struct npcm_rng *priv = dev_get_drvdata(dev);
+
+	npcm_rng_cleanup(&priv->rng);
+
+	return 0;
+}
+
+static int npcm_rng_runtime_resume(struct device *dev)
+{
+	struct npcm_rng *priv = dev_get_drvdata(dev);
+
+	return npcm_rng_init(&priv->rng);
+}
+#endif
+
+static const struct dev_pm_ops npcm_rng_pm_ops = {
+	SET_RUNTIME_PM_OPS(npcm_rng_runtime_suspend,
+			   npcm_rng_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+};
+
+static const struct of_device_id rng_dt_id[] = {
+	{ .compatible = "nuvoton,npcm750-rng",  },
+	{},
+};
+MODULE_DEVICE_TABLE(of, rng_dt_id);
+
+static struct platform_driver npcm_rng_driver = {
+	.driver = {
+		.name		= "npcm-rng",
+		.pm		= &npcm_rng_pm_ops,
+		.of_match_table = of_match_ptr(rng_dt_id),
+	},
+	.probe		= npcm_rng_probe,
+	.remove		= npcm_rng_remove,
+};
+
+module_platform_driver(npcm_rng_driver);
+
+MODULE_DESCRIPTION("Nuvoton NPCM Random Number Generator Driver");
+MODULE_AUTHOR("Tomer Maimon <tomer.maimon@nuvoton.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/char/hw_random/octeon-rng.c b/drivers/char/hw_random/octeon-rng.c
index 8c78aa090492..7be8067ac4e8 100644
--- a/drivers/char/hw_random/octeon-rng.c
+++ b/drivers/char/hw_random/octeon-rng.c
@@ -81,13 +81,13 @@ static int octeon_rng_probe(struct platform_device *pdev)
 		return -ENOENT;
 
 
-	rng->control_status = devm_ioremap_nocache(&pdev->dev,
+	rng->control_status = devm_ioremap(&pdev->dev,
 						   res_ports->start,
 						   sizeof(u64));
 	if (!rng->control_status)
 		return -ENOENT;
 
-	rng->result = devm_ioremap_nocache(&pdev->dev,
+	rng->result = devm_ioremap(&pdev->dev,
 					   res_result->start,
 					   sizeof(u64));
 	if (!rng->result)
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index b27f39688b5e..0ed07d16ec8e 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -66,6 +66,13 @@
 #define OMAP4_RNG_OUTPUT_SIZE			0x8
 #define EIP76_RNG_OUTPUT_SIZE			0x10
 
+/*
+ * EIP76 RNG takes approx. 700us to produce 16 bytes of output data
+ * as per testing results. And to account for the lack of udelay()'s
+ * reliability, we keep the timeout as 1000us.
+ */
+#define RNG_DATA_FILL_TIMEOUT			100
+
 enum {
 	RNG_OUTPUT_0_REG = 0,
 	RNG_OUTPUT_1_REG,
@@ -176,7 +183,7 @@ static int omap_rng_do_read(struct hwrng *rng, void *data, size_t max,
 	if (max < priv->pdata->data_size)
 		return 0;
 
-	for (i = 0; i < 20; i++) {
+	for (i = 0; i < RNG_DATA_FILL_TIMEOUT; i++) {
 		present = priv->pdata->data_present(priv);
 		if (present || !wait)
 			break;
@@ -432,7 +439,6 @@ static int get_omap_rng_device_details(struct omap_rng_dev *omap_rng)
 static int omap_rng_probe(struct platform_device *pdev)
 {
 	struct omap_rng_dev *priv;
-	struct resource *res;
 	struct device *dev = &pdev->dev;
 	int ret;
 
@@ -449,8 +455,7 @@ static int omap_rng_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, priv);
 	priv->dev = dev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->base = devm_ioremap_resource(dev, res);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base)) {
 		ret = PTR_ERR(priv->base);
 		goto err_ioremap;
diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c
index 38b719017186..e08a8887e718 100644
--- a/drivers/char/hw_random/omap3-rom-rng.c
+++ b/drivers/char/hw_random/omap3-rom-rng.c
@@ -11,8 +11,6 @@
  * warranty of any kind, whether express or implied.
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/random.h>
@@ -20,117 +18,159 @@
 #include <linux/workqueue.h>
 #include <linux/clk.h>
 #include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 
 #define RNG_RESET			0x01
 #define RNG_GEN_PRNG_HW_INIT		0x02
 #define RNG_GEN_HW			0x08
 
-/* param1: ptr, param2: count, param3: flag */
-static u32 (*omap3_rom_rng_call)(u32, u32, u32);
-
-static struct delayed_work idle_work;
-static int rng_idle;
-static struct clk *rng_clk;
+struct omap_rom_rng {
+	struct clk *clk;
+	struct device *dev;
+	struct hwrng ops;
+	u32 (*rom_rng_call)(u32 ptr, u32 count, u32 flag);
+};
 
-static void omap3_rom_rng_idle(struct work_struct *work)
+static int omap3_rom_rng_read(struct hwrng *rng, void *data, size_t max, bool w)
 {
+	struct omap_rom_rng *ddata;
+	u32 ptr;
 	int r;
 
-	r = omap3_rom_rng_call(0, 0, RNG_RESET);
-	if (r != 0) {
-		pr_err("reset failed: %d\n", r);
-		return;
+	ddata = (struct omap_rom_rng *)rng->priv;
+
+	r = pm_runtime_get_sync(ddata->dev);
+	if (r < 0) {
+		pm_runtime_put_noidle(ddata->dev);
+
+		return r;
 	}
-	clk_disable_unprepare(rng_clk);
-	rng_idle = 1;
+
+	ptr = virt_to_phys(data);
+	r = ddata->rom_rng_call(ptr, 4, RNG_GEN_HW);
+	if (r != 0)
+		r = -EINVAL;
+	else
+		r = 4;
+
+	pm_runtime_mark_last_busy(ddata->dev);
+	pm_runtime_put_autosuspend(ddata->dev);
+
+	return r;
 }
 
-static int omap3_rom_rng_get_random(void *buf, unsigned int count)
+static int __maybe_unused omap_rom_rng_runtime_suspend(struct device *dev)
 {
-	u32 r;
-	u32 ptr;
+	struct omap_rom_rng *ddata;
+	int r;
 
-	cancel_delayed_work_sync(&idle_work);
-	if (rng_idle) {
-		r = clk_prepare_enable(rng_clk);
-		if (r)
-			return r;
-
-		r = omap3_rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT);
-		if (r != 0) {
-			clk_disable_unprepare(rng_clk);
-			pr_err("HW init failed: %d\n", r);
-			return -EIO;
-		}
-		rng_idle = 0;
-	}
+	ddata = dev_get_drvdata(dev);
 
-	ptr = virt_to_phys(buf);
-	r = omap3_rom_rng_call(ptr, count, RNG_GEN_HW);
-	schedule_delayed_work(&idle_work, msecs_to_jiffies(500));
+	r = ddata->rom_rng_call(0, 0, RNG_RESET);
 	if (r != 0)
-		return -EINVAL;
+		dev_err(dev, "reset failed: %d\n", r);
+
+	clk_disable_unprepare(ddata->clk);
+
 	return 0;
 }
 
-static int omap3_rom_rng_read(struct hwrng *rng, void *data, size_t max, bool w)
+static int __maybe_unused omap_rom_rng_runtime_resume(struct device *dev)
 {
+	struct omap_rom_rng *ddata;
 	int r;
 
-	r = omap3_rom_rng_get_random(data, 4);
+	ddata = dev_get_drvdata(dev);
+
+	r = clk_prepare_enable(ddata->clk);
 	if (r < 0)
 		return r;
-	return 4;
+
+	r = ddata->rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT);
+	if (r != 0) {
+		clk_disable(ddata->clk);
+		dev_err(dev, "HW init failed: %d\n", r);
+
+		return -EIO;
+	}
+
+	return 0;
 }
 
-static struct hwrng omap3_rom_rng_ops = {
-	.name		= "omap3-rom",
-	.read		= omap3_rom_rng_read,
-};
+static void omap_rom_rng_finish(void *data)
+{
+	struct omap_rom_rng *ddata = data;
+
+	pm_runtime_dont_use_autosuspend(ddata->dev);
+	pm_runtime_disable(ddata->dev);
+}
 
 static int omap3_rom_rng_probe(struct platform_device *pdev)
 {
+	struct omap_rom_rng *ddata;
 	int ret = 0;
 
-	pr_info("initializing\n");
+	ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL);
+	if (!ddata)
+		return -ENOMEM;
 
-	omap3_rom_rng_call = pdev->dev.platform_data;
-	if (!omap3_rom_rng_call) {
-		pr_err("omap3_rom_rng_call is NULL\n");
+	ddata->dev = &pdev->dev;
+	ddata->ops.priv = (unsigned long)ddata;
+	ddata->ops.name = "omap3-rom";
+	ddata->ops.read = of_device_get_match_data(&pdev->dev);
+	ddata->ops.quality = 900;
+	if (!ddata->ops.read) {
+		dev_err(&pdev->dev, "missing rom code handler\n");
+
+		return -ENODEV;
+	}
+	dev_set_drvdata(ddata->dev, ddata);
+
+	ddata->rom_rng_call = pdev->dev.platform_data;
+	if (!ddata->rom_rng_call) {
+		dev_err(ddata->dev, "rom_rng_call is NULL\n");
 		return -EINVAL;
 	}
 
-	INIT_DELAYED_WORK(&idle_work, omap3_rom_rng_idle);
-	rng_clk = devm_clk_get(&pdev->dev, "ick");
-	if (IS_ERR(rng_clk)) {
-		pr_err("unable to get RNG clock\n");
-		return PTR_ERR(rng_clk);
+	ddata->clk = devm_clk_get(ddata->dev, "ick");
+	if (IS_ERR(ddata->clk)) {
+		dev_err(ddata->dev, "unable to get RNG clock\n");
+		return PTR_ERR(ddata->clk);
 	}
 
-	/* Leave the RNG in reset state. */
-	ret = clk_prepare_enable(rng_clk);
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, 500);
+	pm_runtime_use_autosuspend(&pdev->dev);
+
+	ret = devm_add_action_or_reset(ddata->dev, omap_rom_rng_finish,
+				       ddata);
 	if (ret)
 		return ret;
-	omap3_rom_rng_idle(0);
 
-	return hwrng_register(&omap3_rom_rng_ops);
+	return devm_hwrng_register(ddata->dev, &ddata->ops);
 }
 
-static int omap3_rom_rng_remove(struct platform_device *pdev)
-{
-	cancel_delayed_work_sync(&idle_work);
-	hwrng_unregister(&omap3_rom_rng_ops);
-	clk_disable_unprepare(rng_clk);
-	return 0;
-}
+static const struct of_device_id omap_rom_rng_match[] = {
+	{ .compatible = "nokia,n900-rom-rng", .data = omap3_rom_rng_read, },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, omap_rom_rng_match);
+
+static const struct dev_pm_ops omap_rom_rng_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(omap_rom_rng_runtime_suspend,
+				omap_rom_rng_runtime_resume)
+};
 
 static struct platform_driver omap3_rom_rng_driver = {
 	.driver = {
 		.name		= "omap3-rom-rng",
+		.of_match_table = omap_rom_rng_match,
+		.pm = &omap_rom_rng_pm_ops,
 	},
 	.probe		= omap3_rom_rng_probe,
-	.remove		= omap3_rom_rng_remove,
 };
 
 module_platform_driver(omap3_rom_rng_driver);
diff --git a/drivers/char/hw_random/pasemi-rng.c b/drivers/char/hw_random/pasemi-rng.c
index 24b1460b49d4..2498d4ef9fe2 100644
--- a/drivers/char/hw_random/pasemi-rng.c
+++ b/drivers/char/hw_random/pasemi-rng.c
@@ -86,10 +86,8 @@ static struct hwrng pasemi_rng = {
 static int rng_probe(struct platform_device *pdev)
 {
 	void __iomem *rng_regs;
-	struct resource *res;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	rng_regs = devm_ioremap_resource(&pdev->dev, res);
+	rng_regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(rng_regs))
 		return PTR_ERR(rng_regs);
 
diff --git a/drivers/char/hw_random/pic32-rng.c b/drivers/char/hw_random/pic32-rng.c
index 90f498c98947..81080cb2294e 100644
--- a/drivers/char/hw_random/pic32-rng.c
+++ b/drivers/char/hw_random/pic32-rng.c
@@ -70,7 +70,6 @@ static int pic32_rng_read(struct hwrng *rng, void *buf, size_t max,
 static int pic32_rng_probe(struct platform_device *pdev)
 {
 	struct pic32_rng *priv;
-	struct resource *res;
 	u32 v;
 	int ret;
 
@@ -78,8 +77,7 @@ static int pic32_rng_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base))
 		return PTR_ERR(priv->base);
 
diff --git a/drivers/char/hw_random/st-rng.c b/drivers/char/hw_random/st-rng.c
index 863448360a7d..783c24e3f8b7 100644
--- a/drivers/char/hw_random/st-rng.c
+++ b/drivers/char/hw_random/st-rng.c
@@ -72,7 +72,6 @@ static int st_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 static int st_rng_probe(struct platform_device *pdev)
 {
 	struct st_rng_data *ddata;
-	struct resource *res;
 	struct clk *clk;
 	void __iomem *base;
 	int ret;
@@ -81,8 +80,7 @@ static int st_rng_probe(struct platform_device *pdev)
 	if (!ddata)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
diff --git a/drivers/char/hw_random/tx4939-rng.c b/drivers/char/hw_random/tx4939-rng.c
index 1093583b579c..c8bd34e740fd 100644
--- a/drivers/char/hw_random/tx4939-rng.c
+++ b/drivers/char/hw_random/tx4939-rng.c
@@ -107,14 +107,12 @@ static int tx4939_rng_data_read(struct hwrng *rng, u32 *buffer)
 static int __init tx4939_rng_probe(struct platform_device *dev)
 {
 	struct tx4939_rng *rngdev;
-	struct resource *r;
 	int i;
 
 	rngdev = devm_kzalloc(&dev->dev, sizeof(*rngdev), GFP_KERNEL);
 	if (!rngdev)
 		return -ENOMEM;
-	r = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	rngdev->base = devm_ioremap_resource(&dev->dev, r);
+	rngdev->base = devm_platform_ioremap_resource(dev, 0);
 	if (IS_ERR(rngdev->base))
 		return PTR_ERR(rngdev->base);
 
diff --git a/drivers/char/hw_random/xgene-rng.c b/drivers/char/hw_random/xgene-rng.c
index 7e568db87ae2..d7516a446987 100644
--- a/drivers/char/hw_random/xgene-rng.c
+++ b/drivers/char/hw_random/xgene-rng.c
@@ -313,7 +313,6 @@ static struct hwrng xgene_rng_func = {
 
 static int xgene_rng_probe(struct platform_device *pdev)
 {
-	struct resource *res;
 	struct xgene_rng_dev *ctx;
 	int rc = 0;
 
@@ -324,8 +323,7 @@ static int xgene_rng_probe(struct platform_device *pdev)
 	ctx->dev = &pdev->dev;
 	platform_set_drvdata(pdev, ctx);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ctx->csr_base = devm_ioremap_resource(&pdev->dev, res);
+	ctx->csr_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(ctx->csr_base))
 		return PTR_ERR(ctx->csr_base);
 
diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 4bad0614109b..7dc2c3ec4051 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -4,38 +4,38 @@
 #
 
 menuconfig IPMI_HANDLER
-       tristate 'IPMI top-level message handler'
-       depends on HAS_IOMEM
-       select IPMI_DMI_DECODE if DMI
-       help
-         This enables the central IPMI message handler, required for IPMI
-	 to work.
+	tristate 'IPMI top-level message handler'
+	depends on HAS_IOMEM
+	select IPMI_DMI_DECODE if DMI
+	help
+	  This enables the central IPMI message handler, required for IPMI
+	  to work.
 
-         IPMI is a standard for managing sensors (temperature,
-         voltage, etc.) in a system.
+	  IPMI is a standard for managing sensors (temperature,
+	  voltage, etc.) in a system.
 
-         See <file:Documentation/IPMI.txt> for more details on the driver.
+	  See <file:Documentation/IPMI.txt> for more details on the driver.
 
-	 If unsure, say N.
+	  If unsure, say N.
 
 config IPMI_DMI_DECODE
-       select IPMI_PLAT_DATA
-       bool
+	select IPMI_PLAT_DATA
+	bool
 
 config IPMI_PLAT_DATA
-       bool
+	bool
 
 if IPMI_HANDLER
 
 config IPMI_PANIC_EVENT
-       bool 'Generate a panic event to all BMCs on a panic'
-       help
-	 When a panic occurs, this will cause the IPMI message handler to,
-	 by default, generate an IPMI event describing the panic to each
-	 interface registered with the message handler.  This is always
-	 available, the module parameter for ipmi_msghandler named
-	 panic_op can be set to "event" to chose this value, this config
-	 simply causes the default value to be set to "event".
+	bool 'Generate a panic event to all BMCs on a panic'
+	help
+	  When a panic occurs, this will cause the IPMI message handler to,
+	  by default, generate an IPMI event describing the panic to each
+	  interface registered with the message handler.  This is always
+	  available, the module parameter for ipmi_msghandler named
+	  panic_op can be set to "event" to chose this value, this config
+	  simply causes the default value to be set to "event".
 
 config IPMI_PANIC_STRING
 	bool 'Generate OEM events containing the panic string'
@@ -54,43 +54,43 @@ config IPMI_PANIC_STRING
 	  causes the default value to be set to "string".
 
 config IPMI_DEVICE_INTERFACE
-       tristate 'Device interface for IPMI'
-       help
-         This provides an IOCTL interface to the IPMI message handler so
-	 userland processes may use IPMI.  It supports poll() and select().
+	tristate 'Device interface for IPMI'
+	help
+	  This provides an IOCTL interface to the IPMI message handler so
+	  userland processes may use IPMI.  It supports poll() and select().
 
 config IPMI_SI
-       tristate 'IPMI System Interface handler'
-       select IPMI_PLAT_DATA
-       help
-         Provides a driver for System Interfaces (KCS, SMIC, BT).
-	 Currently, only KCS and SMIC are supported.  If
-	 you are using IPMI, you should probably say "y" here.
+	tristate 'IPMI System Interface handler'
+	select IPMI_PLAT_DATA
+	help
+	  Provides a driver for System Interfaces (KCS, SMIC, BT).
+	  Currently, only KCS and SMIC are supported.  If
+	  you are using IPMI, you should probably say "y" here.
 
 config IPMI_SSIF
-       tristate 'IPMI SMBus handler (SSIF)'
-       select I2C
-       help
-         Provides a driver for a SMBus interface to a BMC, meaning that you
-	 have a driver that must be accessed over an I2C bus instead of a
-	 standard interface.  This module requires I2C support.
+	tristate 'IPMI SMBus handler (SSIF)'
+	select I2C
+	help
+	  Provides a driver for a SMBus interface to a BMC, meaning that you
+	  have a driver that must be accessed over an I2C bus instead of a
+	  standard interface.  This module requires I2C support.
 
 config IPMI_POWERNV
-       depends on PPC_POWERNV
-       tristate 'POWERNV (OPAL firmware) IPMI interface'
-       help
-         Provides a driver for OPAL firmware-based IPMI interfaces.
+	depends on PPC_POWERNV
+	tristate 'POWERNV (OPAL firmware) IPMI interface'
+	help
+	  Provides a driver for OPAL firmware-based IPMI interfaces.
 
 config IPMI_WATCHDOG
-       tristate 'IPMI Watchdog Timer'
-       help
-         This enables the IPMI watchdog timer.
+	tristate 'IPMI Watchdog Timer'
+	help
+	  This enables the IPMI watchdog timer.
 
 config IPMI_POWEROFF
-       tristate 'IPMI Poweroff'
-       help
-         This enables a function to power off the system with IPMI if
-	 the IPMI management controller is capable of this.
+	tristate 'IPMI Poweroff'
+	help
+	  This enables a function to power off the system with IPMI if
+	  the IPMI management controller is capable of this.
 
 endif # IPMI_HANDLER
 
@@ -126,7 +126,7 @@ config NPCM7XX_KCS_IPMI_BMC
 
 config ASPEED_BT_IPMI_BMC
 	depends on ARCH_ASPEED || COMPILE_TEST
-       depends on REGMAP && REGMAP_MMIO && MFD_SYSCON
+	depends on REGMAP && REGMAP_MMIO && MFD_SYSCON
 	tristate "BT IPMI bmc driver"
 	help
 	  Provides a driver for the BT (Block Transfer) IPMI interface
diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c
index 40b9927c072c..d36aeacb290e 100644
--- a/drivers/char/ipmi/bt-bmc.c
+++ b/drivers/char/ipmi/bt-bmc.c
@@ -444,15 +444,13 @@ static int bt_bmc_probe(struct platform_device *pdev)
 
 	bt_bmc->map = syscon_node_to_regmap(pdev->dev.parent->of_node);
 	if (IS_ERR(bt_bmc->map)) {
-		struct resource *res;
 		void __iomem *base;
 
 		/*
 		 * Assume it's not the MFD-based devicetree description, in
 		 * which case generate a regmap ourselves
 		 */
-		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		base = devm_ioremap_resource(&pdev->dev, res);
+		base = devm_platform_ioremap_resource(pdev, 0);
 		if (IS_ERR(base))
 			return PTR_ERR(base);
 
diff --git a/drivers/char/ipmi/ipmb_dev_int.c b/drivers/char/ipmi/ipmb_dev_int.c
index 285e0b8f9a97..1ff4fb1def7c 100644
--- a/drivers/char/ipmi/ipmb_dev_int.c
+++ b/drivers/char/ipmi/ipmb_dev_int.c
@@ -133,9 +133,6 @@ static ssize_t ipmb_write(struct file *file, const char __user *buf,
 	rq_sa = GET_7BIT_ADDR(msg[RQ_SA_8BIT_IDX]);
 	netf_rq_lun = msg[NETFN_LUN_IDX];
 
-	if (!(netf_rq_lun & NETFN_RSP_BIT_MASK))
-		return -EINVAL;
-
 	/*
 	 * subtract rq_sa and netf_rq_lun from the length of the msg passed to
 	 * i2c_smbus_xfer
@@ -154,16 +151,16 @@ static ssize_t ipmb_write(struct file *file, const char __user *buf,
 	return ret ? : count;
 }
 
-static unsigned int ipmb_poll(struct file *file, poll_table *wait)
+static __poll_t ipmb_poll(struct file *file, poll_table *wait)
 {
 	struct ipmb_dev *ipmb_dev = to_ipmb_dev(file);
-	unsigned int mask = POLLOUT;
+	__poll_t mask = EPOLLOUT;
 
 	mutex_lock(&ipmb_dev->file_mutex);
 	poll_wait(file, &ipmb_dev->wait_queue, wait);
 
 	if (atomic_read(&ipmb_dev->request_queue_len))
-		mask |= POLLIN;
+		mask |= EPOLLIN;
 	mutex_unlock(&ipmb_dev->file_mutex);
 
 	return mask;
@@ -203,25 +200,16 @@ static u8 ipmb_verify_checksum1(struct ipmb_dev *ipmb_dev, u8 rs_sa)
 		ipmb_dev->request.checksum1);
 }
 
-static bool is_ipmb_request(struct ipmb_dev *ipmb_dev, u8 rs_sa)
+/*
+ * Verify if message has proper ipmb header with minimum length
+ * and correct checksum byte.
+ */
+static bool is_ipmb_msg(struct ipmb_dev *ipmb_dev, u8 rs_sa)
 {
-	if (ipmb_dev->msg_idx >= IPMB_REQUEST_LEN_MIN) {
-		if (ipmb_verify_checksum1(ipmb_dev, rs_sa))
-			return false;
+	if ((ipmb_dev->msg_idx >= IPMB_REQUEST_LEN_MIN) &&
+	   (!ipmb_verify_checksum1(ipmb_dev, rs_sa)))
+		return true;
 
-		/*
-		 * Check whether this is an IPMB request or
-		 * response.
-		 * The 6 MSB of netfn_rs_lun are dedicated to the netfn
-		 * while the remaining bits are dedicated to the lun.
-		 * If the LSB of the netfn is cleared, it is associated
-		 * with an IPMB request.
-		 * If the LSB of the netfn is set, it is associated with
-		 * an IPMB response.
-		 */
-		if (!(ipmb_dev->request.netfn_rs_lun & NETFN_RSP_BIT_MASK))
-			return true;
-	}
 	return false;
 }
 
@@ -273,8 +261,7 @@ static int ipmb_slave_cb(struct i2c_client *client,
 
 	case I2C_SLAVE_STOP:
 		ipmb_dev->request.len = ipmb_dev->msg_idx;
-
-		if (is_ipmb_request(ipmb_dev, GET_8BIT_ADDR(client->addr)))
+		if (is_ipmb_msg(ipmb_dev, GET_8BIT_ADDR(client->addr)))
 			ipmb_handle_request(ipmb_dev);
 		break;
 
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 2aab80e19ae0..cad9563f8f48 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -44,25 +44,6 @@ static void need_waiter(struct ipmi_smi *intf);
 static int handle_one_recv_msg(struct ipmi_smi *intf,
 			       struct ipmi_smi_msg *msg);
 
-#ifdef DEBUG
-static void ipmi_debug_msg(const char *title, unsigned char *data,
-			   unsigned int len)
-{
-	int i, pos;
-	char buf[100];
-
-	pos = snprintf(buf, sizeof(buf), "%s: ", title);
-	for (i = 0; i < len; i++)
-		pos += snprintf(buf + pos, sizeof(buf) - pos,
-				" %2.2x", data[i]);
-	pr_debug("%s\n", buf);
-}
-#else
-static void ipmi_debug_msg(const char *title, unsigned char *data,
-			   unsigned int len)
-{ }
-#endif
-
 static bool initialized;
 static bool drvregistered;
 
@@ -448,6 +429,8 @@ enum ipmi_stat_indexes {
 
 #define IPMI_IPMB_NUM_SEQ	64
 struct ipmi_smi {
+	struct module *owner;
+
 	/* What interface number are we? */
 	int intf_num;
 
@@ -1220,6 +1203,11 @@ int ipmi_create_user(unsigned int          if_num,
 	if (rv)
 		goto out_kfree;
 
+	if (!try_module_get(intf->owner)) {
+		rv = -ENODEV;
+		goto out_kfree;
+	}
+
 	/* Note that each existing user holds a refcount to the interface. */
 	kref_get(&intf->refcount);
 
@@ -1349,6 +1337,7 @@ static void _ipmi_destroy_user(struct ipmi_user *user)
 	}
 
 	kref_put(&intf->refcount, intf_free);
+	module_put(intf->owner);
 }
 
 int ipmi_destroy_user(struct ipmi_user *user)
@@ -2267,7 +2256,7 @@ out_err:
 		ipmi_free_smi_msg(smi_msg);
 		ipmi_free_recv_msg(recv_msg);
 	} else {
-		ipmi_debug_msg("Send", smi_msg->data, smi_msg->data_size);
+		pr_debug("Send: %*ph\n", smi_msg->data_size, smi_msg->data);
 
 		smi_send(intf, intf->handlers, smi_msg, priority);
 	}
@@ -2459,7 +2448,7 @@ static int __get_device_id(struct ipmi_smi *intf, struct bmc_device *bmc)
  * been recently fetched, this will just use the cached data.  Otherwise
  * it will run a new fetch.
  *
- * Except for the first time this is called (in ipmi_register_smi()),
+ * Except for the first time this is called (in ipmi_add_smi()),
  * this will always return good data;
  */
 static int __bmc_get_device_id(struct ipmi_smi *intf, struct bmc_device *bmc,
@@ -3031,8 +3020,11 @@ static int __ipmi_bmc_register(struct ipmi_smi *intf,
 		bmc->pdev.name = "ipmi_bmc";
 
 		rv = ida_simple_get(&ipmi_bmc_ida, 0, 0, GFP_KERNEL);
-		if (rv < 0)
+		if (rv < 0) {
+			kfree(bmc);
 			goto out;
+		}
+
 		bmc->pdev.dev.driver = &ipmidriver.driver;
 		bmc->pdev.id = rv;
 		bmc->pdev.dev.release = release_bmc_device;
@@ -3377,10 +3369,11 @@ static void redo_bmc_reg(struct work_struct *work)
 	kref_put(&intf->refcount, intf_free);
 }
 
-int ipmi_register_smi(const struct ipmi_smi_handlers *handlers,
-		      void		       *send_info,
-		      struct device            *si_dev,
-		      unsigned char            slave_addr)
+int ipmi_add_smi(struct module         *owner,
+		 const struct ipmi_smi_handlers *handlers,
+		 void		       *send_info,
+		 struct device         *si_dev,
+		 unsigned char         slave_addr)
 {
 	int              i, j;
 	int              rv;
@@ -3406,7 +3399,7 @@ int ipmi_register_smi(const struct ipmi_smi_handlers *handlers,
 		return rv;
 	}
 
-
+	intf->owner = owner;
 	intf->bmc = &intf->tmp_bmc;
 	INIT_LIST_HEAD(&intf->bmc->intfs);
 	mutex_init(&intf->bmc->dyn_mutex);
@@ -3514,7 +3507,7 @@ int ipmi_register_smi(const struct ipmi_smi_handlers *handlers,
 
 	return rv;
 }
-EXPORT_SYMBOL(ipmi_register_smi);
+EXPORT_SYMBOL(ipmi_add_smi);
 
 static void deliver_smi_err_response(struct ipmi_smi *intf,
 				     struct ipmi_smi_msg *msg,
@@ -3730,7 +3723,7 @@ static int handle_ipmb_get_msg_cmd(struct ipmi_smi *intf,
 		msg->data[10] = ipmb_checksum(&msg->data[6], 4);
 		msg->data_size = 11;
 
-		ipmi_debug_msg("Invalid command:", msg->data, msg->data_size);
+		pr_debug("Invalid command: %*ph\n", msg->data_size, msg->data);
 
 		rcu_read_lock();
 		if (!intf->in_shutdown) {
@@ -4217,7 +4210,7 @@ static int handle_one_recv_msg(struct ipmi_smi *intf,
 	int requeue;
 	int chan;
 
-	ipmi_debug_msg("Recv:", msg->rsp, msg->rsp_size);
+	pr_debug("Recv: %*ph\n", msg->rsp_size, msg->rsp);
 
 	if ((msg->data_size >= 2)
 	    && (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2))
@@ -4576,7 +4569,7 @@ smi_from_recv_msg(struct ipmi_smi *intf, struct ipmi_recv_msg *recv_msg,
 	smi_msg->data_size = recv_msg->msg.data_len;
 	smi_msg->msgid = STORE_SEQ_IN_MSGID(seq, seqid);
 
-	ipmi_debug_msg("Resend: ", smi_msg->data, smi_msg->data_size);
+	pr_debug("Resend: %*ph\n", smi_msg->data_size, smi_msg->data);
 
 	return smi_msg;
 }
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 6b9a0593d2eb..c7cc8538b84a 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -265,10 +265,10 @@ static void cleanup_ipmi_si(void);
 #ifdef DEBUG_TIMING
 void debug_timestamp(char *msg)
 {
-	struct timespec t;
+	struct timespec64 t;
 
-	ktime_get_ts(&t);
-	pr_debug("**%s: %ld.%9.9ld\n", msg, (long) t.tv_sec, t.tv_nsec);
+	ktime_get_ts64(&t);
+	pr_debug("**%s: %lld.%9.9ld\n", msg, t.tv_sec, t.tv_nsec);
 }
 #else
 #define debug_timestamp(x)
@@ -935,38 +935,25 @@ static void set_run_to_completion(void *send_info, bool i_run_to_completion)
 }
 
 /*
- * Use -1 in the nsec value of the busy waiting timespec to tell that
- * we are spinning in kipmid looking for something and not delaying
- * between checks
+ * Use -1 as a special constant to tell that we are spinning in kipmid
+ * looking for something and not delaying between checks
  */
-static inline void ipmi_si_set_not_busy(struct timespec *ts)
-{
-	ts->tv_nsec = -1;
-}
-static inline int ipmi_si_is_busy(struct timespec *ts)
-{
-	return ts->tv_nsec != -1;
-}
-
+#define IPMI_TIME_NOT_BUSY ns_to_ktime(-1ull)
 static inline bool ipmi_thread_busy_wait(enum si_sm_result smi_result,
 					 const struct smi_info *smi_info,
-					 struct timespec *busy_until)
+					 ktime_t *busy_until)
 {
 	unsigned int max_busy_us = 0;
 
 	if (smi_info->si_num < num_max_busy_us)
 		max_busy_us = kipmid_max_busy_us[smi_info->si_num];
 	if (max_busy_us == 0 || smi_result != SI_SM_CALL_WITH_DELAY)
-		ipmi_si_set_not_busy(busy_until);
-	else if (!ipmi_si_is_busy(busy_until)) {
-		ktime_get_ts(busy_until);
-		timespec_add_ns(busy_until, max_busy_us * NSEC_PER_USEC);
+		*busy_until = IPMI_TIME_NOT_BUSY;
+	else if (*busy_until == IPMI_TIME_NOT_BUSY) {
+		*busy_until = ktime_get() + max_busy_us * NSEC_PER_USEC;
 	} else {
-		struct timespec now;
-
-		ktime_get_ts(&now);
-		if (unlikely(timespec_compare(&now, busy_until) > 0)) {
-			ipmi_si_set_not_busy(busy_until);
+		if (unlikely(ktime_get() > *busy_until)) {
+			*busy_until = IPMI_TIME_NOT_BUSY;
 			return false;
 		}
 	}
@@ -988,9 +975,8 @@ static int ipmi_thread(void *data)
 	struct smi_info *smi_info = data;
 	unsigned long flags;
 	enum si_sm_result smi_result;
-	struct timespec busy_until = { 0, 0 };
+	ktime_t busy_until = IPMI_TIME_NOT_BUSY;
 
-	ipmi_si_set_not_busy(&busy_until);
 	set_user_nice(current, MAX_NICE);
 	while (!kthread_should_stop()) {
 		int busy_wait;
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 74c6d1f34132..55986e10a124 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -893,6 +893,7 @@ static const struct file_operations ipmi_wdog_fops = {
 	.poll    = ipmi_poll,
 	.write   = ipmi_write,
 	.unlocked_ioctl = ipmi_unlocked_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
 	.open    = ipmi_open,
 	.release = ipmi_close,
 	.fasync  = ipmi_fasync,
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 7c9269e3477a..bd95aba1f9fe 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -713,6 +713,10 @@ static int lp_set_timeout64(unsigned int minor, void __user *arg)
 	if (copy_from_user(karg, arg, sizeof(karg)))
 		return -EFAULT;
 
+	/* sparc64 suseconds_t is 32-bit only */
+	if (IS_ENABLED(CONFIG_SPARC64) && !in_compat_syscall())
+		karg[1] >>= 32;
+
 	return lp_set_timeout(minor, karg[0], karg[1]);
 }
 
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 82f9a6a814ae..e342daa73d1b 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4169,7 +4169,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
  *
  * dev  pointer to network device structure
  */
-static void hdlcdev_tx_timeout(struct net_device *dev)
+static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	MGSLPC_INFO *info = dev_to_port(dev);
 	unsigned long flags;
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index c86f18aa8985..2c2381a806ae 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -619,20 +619,27 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		if (copy_from_user(time32, argp, sizeof(time32)))
 			return -EFAULT;
 
+		if ((time32[0] < 0) || (time32[1] < 0))
+			return -EINVAL;
+
 		return pp_set_timeout(pp->pdev, time32[0], time32[1]);
 
 	case PPSETTIME64:
 		if (copy_from_user(time64, argp, sizeof(time64)))
 			return -EFAULT;
 
+		if ((time64[0] < 0) || (time64[1] < 0))
+			return -EINVAL;
+
+		if (IS_ENABLED(CONFIG_SPARC64) && !in_compat_syscall())
+			time64[1] >>= 32;
+
 		return pp_set_timeout(pp->pdev, time64[0], time64[1]);
 
 	case PPGETTIME32:
 		jiffies_to_timespec64(pp->pdev->timeout, &ts);
 		time32[0] = ts.tv_sec;
 		time32[1] = ts.tv_nsec / NSEC_PER_USEC;
-		if ((time32[0] < 0) || (time32[1] < 0))
-			return -EINVAL;
 
 		if (copy_to_user(argp, time32, sizeof(time32)))
 			return -EFAULT;
@@ -643,8 +650,9 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		jiffies_to_timespec64(pp->pdev->timeout, &ts);
 		time64[0] = ts.tv_sec;
 		time64[1] = ts.tv_nsec / NSEC_PER_USEC;
-		if ((time64[0] < 0) || (time64[1] < 0))
-			return -EINVAL;
+
+		if (IS_ENABLED(CONFIG_SPARC64) && !in_compat_syscall())
+			time64[1] <<= 32;
 
 		if (copy_to_user(argp, time64, sizeof(time64)))
 			return -EFAULT;
@@ -670,14 +678,6 @@ static long pp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return ret;
 }
 
-#ifdef CONFIG_COMPAT
-static long pp_compat_ioctl(struct file *file, unsigned int cmd,
-			    unsigned long arg)
-{
-	return pp_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
-}
-#endif
-
 static int pp_open(struct inode *inode, struct file *file)
 {
 	unsigned int minor = iminor(inode);
@@ -786,9 +786,7 @@ static const struct file_operations pp_fops = {
 	.write		= pp_write,
 	.poll		= pp_poll,
 	.unlocked_ioctl	= pp_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl   = pp_compat_ioctl,
-#endif
+	.compat_ioctl   = compat_ptr_ioctl,
 	.open		= pp_open,
 	.release	= pp_release,
 };
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 01b8868b9bed..cda12933a17d 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2166,6 +2166,7 @@ const struct file_operations random_fops = {
 	.write = random_write,
 	.poll  = random_poll,
 	.unlocked_ioctl = random_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
 	.fasync = random_fasync,
 	.llseek = noop_llseek,
 };
@@ -2174,6 +2175,7 @@ const struct file_operations urandom_fops = {
 	.read  = urandom_read,
 	.write = random_write,
 	.unlocked_ioctl = random_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
 	.fasync = random_fasync,
 	.llseek = noop_llseek,
 };
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 9c37047f4b56..aacdeed93320 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -67,6 +67,13 @@ config TCG_TIS_SPI
 	  within Linux. To compile this driver as a module, choose  M here;
 	  the module will be called tpm_tis_spi.
 
+config TCG_TIS_SPI_CR50
+	bool "Cr50 SPI Interface"
+	depends on TCG_TIS_SPI
+	help
+	  If you have a H1 secure module running Cr50 firmware on SPI bus,
+	  say Yes and it will be accessible from within Linux.
+
 config TCG_TIS_I2C_ATMEL
 	tristate "TPM Interface Specification 1.2 Interface (I2C - Atmel)"
 	depends on I2C
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index c354cdff9c62..5a0d99d4fec0 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -21,7 +21,9 @@ tpm-$(CONFIG_EFI) += eventlog/efi.o
 tpm-$(CONFIG_OF) += eventlog/of.o
 obj-$(CONFIG_TCG_TIS_CORE) += tpm_tis_core.o
 obj-$(CONFIG_TCG_TIS) += tpm_tis.o
-obj-$(CONFIG_TCG_TIS_SPI) += tpm_tis_spi.o
+obj-$(CONFIG_TCG_TIS_SPI) += tpm_tis_spi_mod.o
+tpm_tis_spi_mod-y := tpm_tis_spi.o
+tpm_tis_spi_mod-$(CONFIG_TCG_TIS_SPI_CR50) += tpm_tis_spi_cr50.o
 obj-$(CONFIG_TCG_TIS_I2C_ATMEL) += tpm_i2c_atmel.o
 obj-$(CONFIG_TCG_TIS_I2C_INFINEON) += tpm_i2c_infineon.o
 obj-$(CONFIG_TCG_TIS_I2C_NUVOTON) += tpm_i2c_nuvoton.o
diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c
index 2ec47a69a2a6..87f449340202 100644
--- a/drivers/char/tpm/tpm-dev-common.c
+++ b/drivers/char/tpm/tpm-dev-common.c
@@ -61,6 +61,12 @@ static void tpm_dev_async_work(struct work_struct *work)
 
 	mutex_lock(&priv->buffer_mutex);
 	priv->command_enqueued = false;
+	ret = tpm_try_get_ops(priv->chip);
+	if (ret) {
+		priv->response_length = ret;
+		goto out;
+	}
+
 	ret = tpm_dev_transmit(priv->chip, priv->space, priv->data_buffer,
 			       sizeof(priv->data_buffer));
 	tpm_put_ops(priv->chip);
@@ -68,6 +74,7 @@ static void tpm_dev_async_work(struct work_struct *work)
 		priv->response_length = ret;
 		mod_timer(&priv->user_read_timer, jiffies + (120 * HZ));
 	}
+out:
 	mutex_unlock(&priv->buffer_mutex);
 	wake_up_interruptible(&priv->async_wait);
 }
@@ -123,7 +130,7 @@ ssize_t tpm_common_read(struct file *file, char __user *buf,
 		priv->response_read = true;
 
 		ret_size = min_t(ssize_t, size, priv->response_length);
-		if (!ret_size) {
+		if (ret_size <= 0) {
 			priv->response_length = 0;
 			goto out;
 		}
@@ -204,6 +211,7 @@ ssize_t tpm_common_write(struct file *file, const char __user *buf,
 	if (file->f_flags & O_NONBLOCK) {
 		priv->command_enqueued = true;
 		queue_work(tpm_dev_wq, &priv->async_work);
+		tpm_put_ops(priv->chip);
 		mutex_unlock(&priv->buffer_mutex);
 		return size;
 	}
diff --git a/drivers/char/tpm/tpm-dev.h b/drivers/char/tpm/tpm-dev.h
index 1089fc0bb290..f3742bcc73e3 100644
--- a/drivers/char/tpm/tpm-dev.h
+++ b/drivers/char/tpm/tpm-dev.h
@@ -14,7 +14,7 @@ struct file_priv {
 	struct work_struct timeout_work;
 	struct work_struct async_work;
 	wait_queue_head_t async_wait;
-	size_t response_length;
+	ssize_t response_length;
 	bool response_read;
 	bool command_enqueued;
 
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index d7a3888ad80f..a438b1206fcb 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
+#include <linux/suspend.h>
 #include <linux/freezer.h>
 #include <linux/tpm_eventlog.h>
 
@@ -394,7 +395,11 @@ int tpm_pm_suspend(struct device *dev)
 		return -ENODEV;
 
 	if (chip->flags & TPM_CHIP_FLAG_ALWAYS_POWERED)
-		return 0;
+		goto suspended;
+
+	if ((chip->flags & TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED) &&
+	    !pm_suspend_via_firmware())
+		goto suspended;
 
 	if (!tpm_chip_start(chip)) {
 		if (chip->flags & TPM_CHIP_FLAG_TPM2)
@@ -405,6 +410,7 @@ int tpm_pm_suspend(struct device *dev)
 		tpm_chip_stop(chip);
 	}
 
+suspended:
 	return rc;
 }
 EXPORT_SYMBOL_GPL(tpm_pm_suspend);
@@ -453,62 +459,6 @@ int tpm_get_random(struct tpm_chip *chip, u8 *out, size_t max)
 }
 EXPORT_SYMBOL_GPL(tpm_get_random);
 
-/**
- * tpm_seal_trusted() - seal a trusted key payload
- * @chip:	a &struct tpm_chip instance, %NULL for the default chip
- * @options:	authentication values and other options
- * @payload:	the key data in clear and encrypted form
- *
- * Note: only TPM 2.0 chip are supported. TPM 1.x implementation is located in
- * the keyring subsystem.
- *
- * Return: same as with tpm_transmit_cmd()
- */
-int tpm_seal_trusted(struct tpm_chip *chip, struct trusted_key_payload *payload,
-		     struct trusted_key_options *options)
-{
-	int rc;
-
-	chip = tpm_find_get_ops(chip);
-	if (!chip || !(chip->flags & TPM_CHIP_FLAG_TPM2))
-		return -ENODEV;
-
-	rc = tpm2_seal_trusted(chip, payload, options);
-
-	tpm_put_ops(chip);
-	return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_seal_trusted);
-
-/**
- * tpm_unseal_trusted() - unseal a trusted key
- * @chip:	a &struct tpm_chip instance, %NULL for the default chip
- * @options:	authentication values and other options
- * @payload:	the key data in clear and encrypted form
- *
- * Note: only TPM 2.0 chip are supported. TPM 1.x implementation is located in
- * the keyring subsystem.
- *
- * Return: same as with tpm_transmit_cmd()
- */
-int tpm_unseal_trusted(struct tpm_chip *chip,
-		       struct trusted_key_payload *payload,
-		       struct trusted_key_options *options)
-{
-	int rc;
-
-	chip = tpm_find_get_ops(chip);
-	if (!chip || !(chip->flags & TPM_CHIP_FLAG_TPM2))
-		return -ENODEV;
-
-	rc = tpm2_unseal_trusted(chip, payload, options);
-
-	tpm_put_ops(chip);
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_unseal_trusted);
-
 static int __init tpm_init(void)
 {
 	int rc;
diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c
index edfa89160010..d52bf4df0bca 100644
--- a/drivers/char/tpm/tpm-sysfs.c
+++ b/drivers/char/tpm/tpm-sysfs.c
@@ -217,6 +217,7 @@ static ssize_t caps_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
 	struct tpm_chip *chip = to_tpm_chip(dev);
+	struct tpm1_version *version;
 	ssize_t rc = 0;
 	char *str = buf;
 	cap_t cap;
@@ -232,31 +233,31 @@ static ssize_t caps_show(struct device *dev, struct device_attribute *attr,
 	str += sprintf(str, "Manufacturer: 0x%x\n",
 		       be32_to_cpu(cap.manufacturer_id));
 
-	/* Try to get a TPM version 1.2 TPM_CAP_VERSION_INFO */
-	rc = tpm1_getcap(chip, TPM_CAP_VERSION_1_2, &cap,
+	/* TPM 1.2 */
+	if (!tpm1_getcap(chip, TPM_CAP_VERSION_1_2, &cap,
 			 "attempting to determine the 1.2 version",
-			 sizeof(cap.tpm_version_1_2));
-	if (!rc) {
-		str += sprintf(str,
-			       "TCG version: %d.%d\nFirmware version: %d.%d\n",
-			       cap.tpm_version_1_2.Major,
-			       cap.tpm_version_1_2.Minor,
-			       cap.tpm_version_1_2.revMajor,
-			       cap.tpm_version_1_2.revMinor);
-	} else {
-		/* Otherwise just use TPM_STRUCT_VER */
-		if (tpm1_getcap(chip, TPM_CAP_VERSION_1_1, &cap,
-				"attempting to determine the 1.1 version",
-				sizeof(cap.tpm_version)))
-			goto out_ops;
-		str += sprintf(str,
-			       "TCG version: %d.%d\nFirmware version: %d.%d\n",
-			       cap.tpm_version.Major,
-			       cap.tpm_version.Minor,
-			       cap.tpm_version.revMajor,
-			       cap.tpm_version.revMinor);
+			 sizeof(cap.version2))) {
+		version = &cap.version2.version;
+		goto out_print;
 	}
+
+	/* TPM 1.1 */
+	if (tpm1_getcap(chip, TPM_CAP_VERSION_1_1, &cap,
+			"attempting to determine the 1.1 version",
+			sizeof(cap.version1))) {
+		goto out_ops;
+	}
+
+	version = &cap.version1;
+
+out_print:
+	str += sprintf(str,
+		       "TCG version: %d.%d\nFirmware version: %d.%d\n",
+		       version->major, version->minor,
+		       version->rev_major, version->rev_minor);
+
 	rc = str - buf;
+
 out_ops:
 	tpm_put_ops(chip);
 	return rc;
@@ -309,7 +310,17 @@ static ssize_t timeouts_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(timeouts);
 
-static struct attribute *tpm_dev_attrs[] = {
+static ssize_t tpm_version_major_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct tpm_chip *chip = to_tpm_chip(dev);
+
+	return sprintf(buf, "%s\n", chip->flags & TPM_CHIP_FLAG_TPM2
+		       ? "2" : "1");
+}
+static DEVICE_ATTR_RO(tpm_version_major);
+
+static struct attribute *tpm1_dev_attrs[] = {
 	&dev_attr_pubek.attr,
 	&dev_attr_pcrs.attr,
 	&dev_attr_enabled.attr,
@@ -320,18 +331,28 @@ static struct attribute *tpm_dev_attrs[] = {
 	&dev_attr_cancel.attr,
 	&dev_attr_durations.attr,
 	&dev_attr_timeouts.attr,
+	&dev_attr_tpm_version_major.attr,
 	NULL,
 };
 
-static const struct attribute_group tpm_dev_group = {
-	.attrs = tpm_dev_attrs,
+static struct attribute *tpm2_dev_attrs[] = {
+	&dev_attr_tpm_version_major.attr,
+	NULL
+};
+
+static const struct attribute_group tpm1_dev_group = {
+	.attrs = tpm1_dev_attrs,
+};
+
+static const struct attribute_group tpm2_dev_group = {
+	.attrs = tpm2_dev_attrs,
 };
 
 void tpm_sysfs_add_device(struct tpm_chip *chip)
 {
-	if (chip->flags & TPM_CHIP_FLAG_TPM2)
-		return;
-
 	WARN_ON(chip->groups_cnt != 0);
-	chip->groups[chip->groups_cnt++] = &tpm_dev_group;
+	if (chip->flags & TPM_CHIP_FLAG_TPM2)
+		chip->groups[chip->groups_cnt++] = &tpm2_dev_group;
+	else
+		chip->groups[chip->groups_cnt++] = &tpm1_dev_group;
 }
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index a7fea3e0ca86..5620747da0cf 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -25,7 +25,6 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/tpm.h>
-#include <linux/highmem.h>
 #include <linux/tpm_eventlog.h>
 
 #ifdef CONFIG_X86
@@ -58,123 +57,6 @@ enum tpm_addr {
 #define TPM_ERR_DISABLED        0x7
 #define TPM_ERR_INVALID_POSTINIT 38
 
-#define TPM_HEADER_SIZE		10
-
-enum tpm2_const {
-	TPM2_PLATFORM_PCR       =     24,
-	TPM2_PCR_SELECT_MIN     = ((TPM2_PLATFORM_PCR + 7) / 8),
-};
-
-enum tpm2_timeouts {
-	TPM2_TIMEOUT_A          =    750,
-	TPM2_TIMEOUT_B          =   2000,
-	TPM2_TIMEOUT_C          =    200,
-	TPM2_TIMEOUT_D          =     30,
-	TPM2_DURATION_SHORT     =     20,
-	TPM2_DURATION_MEDIUM    =    750,
-	TPM2_DURATION_LONG      =   2000,
-	TPM2_DURATION_LONG_LONG = 300000,
-	TPM2_DURATION_DEFAULT   = 120000,
-};
-
-enum tpm2_structures {
-	TPM2_ST_NO_SESSIONS	= 0x8001,
-	TPM2_ST_SESSIONS	= 0x8002,
-};
-
-/* Indicates from what layer of the software stack the error comes from */
-#define TSS2_RC_LAYER_SHIFT	 16
-#define TSS2_RESMGR_TPM_RC_LAYER (11 << TSS2_RC_LAYER_SHIFT)
-
-enum tpm2_return_codes {
-	TPM2_RC_SUCCESS		= 0x0000,
-	TPM2_RC_HASH		= 0x0083, /* RC_FMT1 */
-	TPM2_RC_HANDLE		= 0x008B,
-	TPM2_RC_INITIALIZE	= 0x0100, /* RC_VER1 */
-	TPM2_RC_FAILURE		= 0x0101,
-	TPM2_RC_DISABLED	= 0x0120,
-	TPM2_RC_COMMAND_CODE    = 0x0143,
-	TPM2_RC_TESTING		= 0x090A, /* RC_WARN */
-	TPM2_RC_REFERENCE_H0	= 0x0910,
-	TPM2_RC_RETRY		= 0x0922,
-};
-
-enum tpm2_command_codes {
-	TPM2_CC_FIRST		        = 0x011F,
-	TPM2_CC_HIERARCHY_CONTROL       = 0x0121,
-	TPM2_CC_HIERARCHY_CHANGE_AUTH   = 0x0129,
-	TPM2_CC_CREATE_PRIMARY          = 0x0131,
-	TPM2_CC_SEQUENCE_COMPLETE       = 0x013E,
-	TPM2_CC_SELF_TEST	        = 0x0143,
-	TPM2_CC_STARTUP		        = 0x0144,
-	TPM2_CC_SHUTDOWN	        = 0x0145,
-	TPM2_CC_NV_READ                 = 0x014E,
-	TPM2_CC_CREATE		        = 0x0153,
-	TPM2_CC_LOAD		        = 0x0157,
-	TPM2_CC_SEQUENCE_UPDATE         = 0x015C,
-	TPM2_CC_UNSEAL		        = 0x015E,
-	TPM2_CC_CONTEXT_LOAD	        = 0x0161,
-	TPM2_CC_CONTEXT_SAVE	        = 0x0162,
-	TPM2_CC_FLUSH_CONTEXT	        = 0x0165,
-	TPM2_CC_VERIFY_SIGNATURE        = 0x0177,
-	TPM2_CC_GET_CAPABILITY	        = 0x017A,
-	TPM2_CC_GET_RANDOM	        = 0x017B,
-	TPM2_CC_PCR_READ	        = 0x017E,
-	TPM2_CC_PCR_EXTEND	        = 0x0182,
-	TPM2_CC_EVENT_SEQUENCE_COMPLETE = 0x0185,
-	TPM2_CC_HASH_SEQUENCE_START     = 0x0186,
-	TPM2_CC_CREATE_LOADED           = 0x0191,
-	TPM2_CC_LAST		        = 0x0193, /* Spec 1.36 */
-};
-
-enum tpm2_permanent_handles {
-	TPM2_RS_PW		= 0x40000009,
-};
-
-enum tpm2_capabilities {
-	TPM2_CAP_HANDLES	= 1,
-	TPM2_CAP_COMMANDS	= 2,
-	TPM2_CAP_PCRS		= 5,
-	TPM2_CAP_TPM_PROPERTIES = 6,
-};
-
-enum tpm2_properties {
-	TPM_PT_TOTAL_COMMANDS	= 0x0129,
-};
-
-enum tpm2_startup_types {
-	TPM2_SU_CLEAR	= 0x0000,
-	TPM2_SU_STATE	= 0x0001,
-};
-
-enum tpm2_cc_attrs {
-	TPM2_CC_ATTR_CHANDLES	= 25,
-	TPM2_CC_ATTR_RHANDLE	= 28,
-};
-
-#define TPM_VID_INTEL    0x8086
-#define TPM_VID_WINBOND  0x1050
-#define TPM_VID_STM      0x104A
-
-enum tpm_chip_flags {
-	TPM_CHIP_FLAG_TPM2		= BIT(1),
-	TPM_CHIP_FLAG_IRQ		= BIT(2),
-	TPM_CHIP_FLAG_VIRTUAL		= BIT(3),
-	TPM_CHIP_FLAG_HAVE_TIMEOUTS	= BIT(4),
-	TPM_CHIP_FLAG_ALWAYS_POWERED	= BIT(5),
-};
-
-#define to_tpm_chip(d) container_of(d, struct tpm_chip, dev)
-
-struct tpm_header {
-	__be16 tag;
-	__be32 length;
-	union {
-		__be32 ordinal;
-		__be32 return_code;
-	};
-} __packed;
-
 #define TPM_TAG_RQU_COMMAND 193
 
 struct	stclear_flags_t {
@@ -186,19 +68,16 @@ struct	stclear_flags_t {
 	u8	bGlobalLock;
 } __packed;
 
-struct	tpm_version_t {
-	u8	Major;
-	u8	Minor;
-	u8	revMajor;
-	u8	revMinor;
+struct tpm1_version {
+	u8 major;
+	u8 minor;
+	u8 rev_major;
+	u8 rev_minor;
 } __packed;
 
-struct	tpm_version_1_2_t {
-	__be16	tag;
-	u8	Major;
-	u8	Minor;
-	u8	revMajor;
-	u8	revMinor;
+struct tpm1_version2 {
+	__be16 tag;
+	struct tpm1_version version;
 } __packed;
 
 struct	timeout_t {
@@ -243,8 +122,8 @@ typedef union {
 	struct	stclear_flags_t	stclear_flags;
 	__u8	owned;
 	__be32	num_pcrs;
-	struct	tpm_version_t	tpm_version;
-	struct	tpm_version_1_2_t tpm_version_1_2;
+	struct tpm1_version version1;
+	struct tpm1_version2 version2;
 	__be32	manufacturer_id;
 	struct timeout_t  timeout;
 	struct duration_t duration;
@@ -274,102 +153,6 @@ enum tpm_sub_capabilities {
  * compiler warnings about stack frame size. */
 #define TPM_MAX_RNG_DATA	128
 
-/* A string buffer type for constructing TPM commands. This is based on the
- * ideas of string buffer code in security/keys/trusted.h but is heap based
- * in order to keep the stack usage minimal.
- */
-
-enum tpm_buf_flags {
-	TPM_BUF_OVERFLOW	= BIT(0),
-};
-
-struct tpm_buf {
-	struct page *data_page;
-	unsigned int flags;
-	u8 *data;
-};
-
-static inline void tpm_buf_reset(struct tpm_buf *buf, u16 tag, u32 ordinal)
-{
-	struct tpm_header *head = (struct tpm_header *)buf->data;
-
-	head->tag = cpu_to_be16(tag);
-	head->length = cpu_to_be32(sizeof(*head));
-	head->ordinal = cpu_to_be32(ordinal);
-}
-
-static inline int tpm_buf_init(struct tpm_buf *buf, u16 tag, u32 ordinal)
-{
-	buf->data_page = alloc_page(GFP_HIGHUSER);
-	if (!buf->data_page)
-		return -ENOMEM;
-
-	buf->flags = 0;
-	buf->data = kmap(buf->data_page);
-	tpm_buf_reset(buf, tag, ordinal);
-	return 0;
-}
-
-static inline void tpm_buf_destroy(struct tpm_buf *buf)
-{
-	kunmap(buf->data_page);
-	__free_page(buf->data_page);
-}
-
-static inline u32 tpm_buf_length(struct tpm_buf *buf)
-{
-	struct tpm_header *head = (struct tpm_header *)buf->data;
-
-	return be32_to_cpu(head->length);
-}
-
-static inline u16 tpm_buf_tag(struct tpm_buf *buf)
-{
-	struct tpm_header *head = (struct tpm_header *)buf->data;
-
-	return be16_to_cpu(head->tag);
-}
-
-static inline void tpm_buf_append(struct tpm_buf *buf,
-				  const unsigned char *new_data,
-				  unsigned int new_len)
-{
-	struct tpm_header *head = (struct tpm_header *)buf->data;
-	u32 len = tpm_buf_length(buf);
-
-	/* Return silently if overflow has already happened. */
-	if (buf->flags & TPM_BUF_OVERFLOW)
-		return;
-
-	if ((len + new_len) > PAGE_SIZE) {
-		WARN(1, "tpm_buf: overflow\n");
-		buf->flags |= TPM_BUF_OVERFLOW;
-		return;
-	}
-
-	memcpy(&buf->data[len], new_data, new_len);
-	head->length = cpu_to_be32(len + new_len);
-}
-
-static inline void tpm_buf_append_u8(struct tpm_buf *buf, const u8 value)
-{
-	tpm_buf_append(buf, &value, 1);
-}
-
-static inline void tpm_buf_append_u16(struct tpm_buf *buf, const u16 value)
-{
-	__be16 value2 = cpu_to_be16(value);
-
-	tpm_buf_append(buf, (u8 *) &value2, 2);
-}
-
-static inline void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value)
-{
-	__be32 value2 = cpu_to_be32(value);
-
-	tpm_buf_append(buf, (u8 *) &value2, 4);
-}
-
 extern struct class *tpm_class;
 extern struct class *tpmrm_class;
 extern dev_t tpm_devt;
@@ -429,24 +212,12 @@ static inline void tpm_add_ppi(struct tpm_chip *chip)
 }
 #endif
 
-static inline u32 tpm2_rc_value(u32 rc)
-{
-	return (rc & BIT(7)) ? rc & 0xff : rc;
-}
-
 int tpm2_get_timeouts(struct tpm_chip *chip);
 int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx,
 		  struct tpm_digest *digest, u16 *digest_size_ptr);
 int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
 		    struct tpm_digest *digests);
 int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max);
-void tpm2_flush_context(struct tpm_chip *chip, u32 handle);
-int tpm2_seal_trusted(struct tpm_chip *chip,
-		      struct trusted_key_payload *payload,
-		      struct trusted_key_options *options);
-int tpm2_unseal_trusted(struct tpm_chip *chip,
-			struct trusted_key_payload *payload,
-			struct trusted_key_options *options);
 ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id,
 			u32 *value, const char *desc);
 
diff --git a/drivers/char/tpm/tpm1-cmd.c b/drivers/char/tpm/tpm1-cmd.c
index 149e953ca369..ca7158fa6e6c 100644
--- a/drivers/char/tpm/tpm1-cmd.c
+++ b/drivers/char/tpm/tpm1-cmd.c
@@ -343,6 +343,7 @@ int tpm1_get_timeouts(struct tpm_chip *chip)
 {
 	cap_t cap;
 	unsigned long timeout_old[4], timeout_chip[4], timeout_eff[4];
+	unsigned long durations[3];
 	ssize_t rc;
 
 	rc = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, NULL,
@@ -427,6 +428,20 @@ int tpm1_get_timeouts(struct tpm_chip *chip)
 		usecs_to_jiffies(be32_to_cpu(cap.duration.tpm_long));
 	chip->duration[TPM_LONG_LONG] = 0; /* not used under 1.2 */
 
+	/*
+	 * Provide the ability for vendor overrides of duration values in case
+	 * of misreporting.
+	 */
+	if (chip->ops->update_durations)
+		chip->ops->update_durations(chip, durations);
+
+	if (chip->duration_adjusted) {
+		dev_info(&chip->dev, HW_ERR "Adjusting reported durations.");
+		chip->duration[TPM_SHORT] = durations[0];
+		chip->duration[TPM_MEDIUM] = durations[1];
+		chip->duration[TPM_LONG] = durations[2];
+	}
+
 	/* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
 	 * value wrong and apparently reports msecs rather than usecs. So we
 	 * fix up the resulting too-small TPM_SHORT value to make things work.
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index ba9acae83bff..13696deceae8 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -13,20 +13,6 @@
 
 #include "tpm.h"
 #include <crypto/hash_info.h>
-#include <keys/trusted-type.h>
-
-enum tpm2_object_attributes {
-	TPM2_OA_USER_WITH_AUTH		= BIT(6),
-};
-
-enum tpm2_session_attributes {
-	TPM2_SA_CONTINUE_SESSION	= BIT(0),
-};
-
-struct tpm2_hash {
-	unsigned int crypto_id;
-	unsigned int tpm_id;
-};
 
 static struct tpm2_hash tpm2_hash_map[] = {
 	{HASH_ALGO_SHA1, TPM_ALG_SHA1},
@@ -376,299 +362,7 @@ void tpm2_flush_context(struct tpm_chip *chip, u32 handle)
 	tpm_transmit_cmd(chip, &buf, 0, "flushing context");
 	tpm_buf_destroy(&buf);
 }
-
-/**
- * tpm_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
- *
- * @buf: an allocated tpm_buf instance
- * @session_handle: session handle
- * @nonce: the session nonce, may be NULL if not used
- * @nonce_len: the session nonce length, may be 0 if not used
- * @attributes: the session attributes
- * @hmac: the session HMAC or password, may be NULL if not used
- * @hmac_len: the session HMAC or password length, maybe 0 if not used
- */
-static void tpm2_buf_append_auth(struct tpm_buf *buf, u32 session_handle,
-				 const u8 *nonce, u16 nonce_len,
-				 u8 attributes,
-				 const u8 *hmac, u16 hmac_len)
-{
-	tpm_buf_append_u32(buf, 9 + nonce_len + hmac_len);
-	tpm_buf_append_u32(buf, session_handle);
-	tpm_buf_append_u16(buf, nonce_len);
-
-	if (nonce && nonce_len)
-		tpm_buf_append(buf, nonce, nonce_len);
-
-	tpm_buf_append_u8(buf, attributes);
-	tpm_buf_append_u16(buf, hmac_len);
-
-	if (hmac && hmac_len)
-		tpm_buf_append(buf, hmac, hmac_len);
-}
-
-/**
- * tpm2_seal_trusted() - seal the payload of a trusted key
- *
- * @chip: TPM chip to use
- * @payload: the key data in clear and encrypted form
- * @options: authentication values and other options
- *
- * Return: < 0 on error and 0 on success.
- */
-int tpm2_seal_trusted(struct tpm_chip *chip,
-		      struct trusted_key_payload *payload,
-		      struct trusted_key_options *options)
-{
-	unsigned int blob_len;
-	struct tpm_buf buf;
-	u32 hash;
-	int i;
-	int rc;
-
-	for (i = 0; i < ARRAY_SIZE(tpm2_hash_map); i++) {
-		if (options->hash == tpm2_hash_map[i].crypto_id) {
-			hash = tpm2_hash_map[i].tpm_id;
-			break;
-		}
-	}
-
-	if (i == ARRAY_SIZE(tpm2_hash_map))
-		return -EINVAL;
-
-	rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_CREATE);
-	if (rc)
-		return rc;
-
-	tpm_buf_append_u32(&buf, options->keyhandle);
-	tpm2_buf_append_auth(&buf, TPM2_RS_PW,
-			     NULL /* nonce */, 0,
-			     0 /* session_attributes */,
-			     options->keyauth /* hmac */,
-			     TPM_DIGEST_SIZE);
-
-	/* sensitive */
-	tpm_buf_append_u16(&buf, 4 + TPM_DIGEST_SIZE + payload->key_len + 1);
-
-	tpm_buf_append_u16(&buf, TPM_DIGEST_SIZE);
-	tpm_buf_append(&buf, options->blobauth, TPM_DIGEST_SIZE);
-	tpm_buf_append_u16(&buf, payload->key_len + 1);
-	tpm_buf_append(&buf, payload->key, payload->key_len);
-	tpm_buf_append_u8(&buf, payload->migratable);
-
-	/* public */
-	tpm_buf_append_u16(&buf, 14 + options->policydigest_len);
-	tpm_buf_append_u16(&buf, TPM_ALG_KEYEDHASH);
-	tpm_buf_append_u16(&buf, hash);
-
-	/* policy */
-	if (options->policydigest_len) {
-		tpm_buf_append_u32(&buf, 0);
-		tpm_buf_append_u16(&buf, options->policydigest_len);
-		tpm_buf_append(&buf, options->policydigest,
-			       options->policydigest_len);
-	} else {
-		tpm_buf_append_u32(&buf, TPM2_OA_USER_WITH_AUTH);
-		tpm_buf_append_u16(&buf, 0);
-	}
-
-	/* public parameters */
-	tpm_buf_append_u16(&buf, TPM_ALG_NULL);
-	tpm_buf_append_u16(&buf, 0);
-
-	/* outside info */
-	tpm_buf_append_u16(&buf, 0);
-
-	/* creation PCR */
-	tpm_buf_append_u32(&buf, 0);
-
-	if (buf.flags & TPM_BUF_OVERFLOW) {
-		rc = -E2BIG;
-		goto out;
-	}
-
-	rc = tpm_transmit_cmd(chip, &buf, 4, "sealing data");
-	if (rc)
-		goto out;
-
-	blob_len = be32_to_cpup((__be32 *) &buf.data[TPM_HEADER_SIZE]);
-	if (blob_len > MAX_BLOB_SIZE) {
-		rc = -E2BIG;
-		goto out;
-	}
-	if (tpm_buf_length(&buf) < TPM_HEADER_SIZE + 4 + blob_len) {
-		rc = -EFAULT;
-		goto out;
-	}
-
-	memcpy(payload->blob, &buf.data[TPM_HEADER_SIZE + 4], blob_len);
-	payload->blob_len = blob_len;
-
-out:
-	tpm_buf_destroy(&buf);
-
-	if (rc > 0) {
-		if (tpm2_rc_value(rc) == TPM2_RC_HASH)
-			rc = -EINVAL;
-		else
-			rc = -EPERM;
-	}
-
-	return rc;
-}
-
-/**
- * tpm2_load_cmd() - execute a TPM2_Load command
- *
- * @chip: TPM chip to use
- * @payload: the key data in clear and encrypted form
- * @options: authentication values and other options
- * @blob_handle: returned blob handle
- *
- * Return: 0 on success.
- *        -E2BIG on wrong payload size.
- *        -EPERM on tpm error status.
- *        < 0 error from tpm_transmit_cmd.
- */
-static int tpm2_load_cmd(struct tpm_chip *chip,
-			 struct trusted_key_payload *payload,
-			 struct trusted_key_options *options,
-			 u32 *blob_handle)
-{
-	struct tpm_buf buf;
-	unsigned int private_len;
-	unsigned int public_len;
-	unsigned int blob_len;
-	int rc;
-
-	private_len = be16_to_cpup((__be16 *) &payload->blob[0]);
-	if (private_len > (payload->blob_len - 2))
-		return -E2BIG;
-
-	public_len = be16_to_cpup((__be16 *) &payload->blob[2 + private_len]);
-	blob_len = private_len + public_len + 4;
-	if (blob_len > payload->blob_len)
-		return -E2BIG;
-
-	rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_LOAD);
-	if (rc)
-		return rc;
-
-	tpm_buf_append_u32(&buf, options->keyhandle);
-	tpm2_buf_append_auth(&buf, TPM2_RS_PW,
-			     NULL /* nonce */, 0,
-			     0 /* session_attributes */,
-			     options->keyauth /* hmac */,
-			     TPM_DIGEST_SIZE);
-
-	tpm_buf_append(&buf, payload->blob, blob_len);
-
-	if (buf.flags & TPM_BUF_OVERFLOW) {
-		rc = -E2BIG;
-		goto out;
-	}
-
-	rc = tpm_transmit_cmd(chip, &buf, 4, "loading blob");
-	if (!rc)
-		*blob_handle = be32_to_cpup(
-			(__be32 *) &buf.data[TPM_HEADER_SIZE]);
-
-out:
-	tpm_buf_destroy(&buf);
-
-	if (rc > 0)
-		rc = -EPERM;
-
-	return rc;
-}
-
-/**
- * tpm2_unseal_cmd() - execute a TPM2_Unload command
- *
- * @chip: TPM chip to use
- * @payload: the key data in clear and encrypted form
- * @options: authentication values and other options
- * @blob_handle: blob handle
- *
- * Return: 0 on success
- *         -EPERM on tpm error status
- *         < 0 error from tpm_transmit_cmd
- */
-static int tpm2_unseal_cmd(struct tpm_chip *chip,
-			   struct trusted_key_payload *payload,
-			   struct trusted_key_options *options,
-			   u32 blob_handle)
-{
-	struct tpm_buf buf;
-	u16 data_len;
-	u8 *data;
-	int rc;
-
-	rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_UNSEAL);
-	if (rc)
-		return rc;
-
-	tpm_buf_append_u32(&buf, blob_handle);
-	tpm2_buf_append_auth(&buf,
-			     options->policyhandle ?
-			     options->policyhandle : TPM2_RS_PW,
-			     NULL /* nonce */, 0,
-			     TPM2_SA_CONTINUE_SESSION,
-			     options->blobauth /* hmac */,
-			     TPM_DIGEST_SIZE);
-
-	rc = tpm_transmit_cmd(chip, &buf, 6, "unsealing");
-	if (rc > 0)
-		rc = -EPERM;
-
-	if (!rc) {
-		data_len = be16_to_cpup(
-			(__be16 *) &buf.data[TPM_HEADER_SIZE + 4]);
-		if (data_len < MIN_KEY_SIZE ||  data_len > MAX_KEY_SIZE + 1) {
-			rc = -EFAULT;
-			goto out;
-		}
-
-		if (tpm_buf_length(&buf) < TPM_HEADER_SIZE + 6 + data_len) {
-			rc = -EFAULT;
-			goto out;
-		}
-		data = &buf.data[TPM_HEADER_SIZE + 6];
-
-		memcpy(payload->key, data, data_len - 1);
-		payload->key_len = data_len - 1;
-		payload->migratable = data[data_len - 1];
-	}
-
-out:
-	tpm_buf_destroy(&buf);
-	return rc;
-}
-
-/**
- * tpm2_unseal_trusted() - unseal the payload of a trusted key
- *
- * @chip: TPM chip to use
- * @payload: the key data in clear and encrypted form
- * @options: authentication values and other options
- *
- * Return: Same as with tpm_transmit_cmd.
- */
-int tpm2_unseal_trusted(struct tpm_chip *chip,
-			struct trusted_key_payload *payload,
-			struct trusted_key_options *options)
-{
-	u32 blob_handle;
-	int rc;
-
-	rc = tpm2_load_cmd(chip, payload, options, &blob_handle);
-	if (rc)
-		return rc;
-
-	rc = tpm2_unseal_cmd(chip, payload, options, blob_handle);
-	tpm2_flush_context(chip, blob_handle);
-	return rc;
-}
+EXPORT_SYMBOL_GPL(tpm2_flush_context);
 
 struct tpm2_get_cap_out {
 	u8 more_data;
@@ -939,6 +633,10 @@ static int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip)
 
 	chip->cc_attrs_tbl = devm_kcalloc(&chip->dev, 4, nr_commands,
 					  GFP_KERNEL);
+	if (!chip->cc_attrs_tbl) {
+		rc = -ENOMEM;
+		goto out;
+	}
 
 	rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_GET_CAPABILITY);
 	if (rc)
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index e59f1f91d7f3..a9dcf31eadd2 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -22,6 +22,7 @@
 #include "tpm.h"
 
 #define ACPI_SIG_TPM2 "TPM2"
+#define TPM_CRB_MAX_RESOURCES 3
 
 static const guid_t crb_acpi_start_guid =
 	GUID_INIT(0x6BBF6CAB, 0x5463, 0x4714,
@@ -91,7 +92,6 @@ enum crb_status {
 struct crb_priv {
 	u32 sm;
 	const char *hid;
-	void __iomem *iobase;
 	struct crb_regs_head __iomem *regs_h;
 	struct crb_regs_tail __iomem *regs_t;
 	u8 __iomem *cmd;
@@ -434,21 +434,27 @@ static const struct tpm_class_ops tpm_crb = {
 
 static int crb_check_resource(struct acpi_resource *ares, void *data)
 {
-	struct resource *io_res = data;
+	struct resource *iores_array = data;
 	struct resource_win win;
 	struct resource *res = &(win.res);
+	int i;
 
 	if (acpi_dev_resource_memory(ares, res) ||
 	    acpi_dev_resource_address_space(ares, &win)) {
-		*io_res = *res;
-		io_res->name = NULL;
+		for (i = 0; i < TPM_CRB_MAX_RESOURCES + 1; ++i) {
+			if (resource_type(iores_array + i) != IORESOURCE_MEM) {
+				iores_array[i] = *res;
+				iores_array[i].name = NULL;
+				break;
+			}
+		}
 	}
 
 	return 1;
 }
 
-static void __iomem *crb_map_res(struct device *dev, struct crb_priv *priv,
-				 struct resource *io_res, u64 start, u32 size)
+static void __iomem *crb_map_res(struct device *dev, struct resource *iores,
+				 void __iomem **iobase_ptr, u64 start, u32 size)
 {
 	struct resource new_res = {
 		.start	= start,
@@ -460,10 +466,16 @@ static void __iomem *crb_map_res(struct device *dev, struct crb_priv *priv,
 	if (start != new_res.start)
 		return (void __iomem *) ERR_PTR(-EINVAL);
 
-	if (!resource_contains(io_res, &new_res))
+	if (!iores)
 		return devm_ioremap_resource(dev, &new_res);
 
-	return priv->iobase + (new_res.start - io_res->start);
+	if (!*iobase_ptr) {
+		*iobase_ptr = devm_ioremap_resource(dev, iores);
+		if (IS_ERR(*iobase_ptr))
+			return *iobase_ptr;
+	}
+
+	return *iobase_ptr + (new_res.start - iores->start);
 }
 
 /*
@@ -490,9 +502,13 @@ static u64 crb_fixup_cmd_size(struct device *dev, struct resource *io_res,
 static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
 		      struct acpi_table_tpm2 *buf)
 {
-	struct list_head resources;
-	struct resource io_res;
+	struct list_head acpi_resource_list;
+	struct resource iores_array[TPM_CRB_MAX_RESOURCES + 1] = { {0} };
+	void __iomem *iobase_array[TPM_CRB_MAX_RESOURCES] = {NULL};
 	struct device *dev = &device->dev;
+	struct resource *iores;
+	void __iomem **iobase_ptr;
+	int i;
 	u32 pa_high, pa_low;
 	u64 cmd_pa;
 	u32 cmd_size;
@@ -501,21 +517,41 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
 	u32 rsp_size;
 	int ret;
 
-	INIT_LIST_HEAD(&resources);
-	ret = acpi_dev_get_resources(device, &resources, crb_check_resource,
-				     &io_res);
+	INIT_LIST_HEAD(&acpi_resource_list);
+	ret = acpi_dev_get_resources(device, &acpi_resource_list,
+				     crb_check_resource, iores_array);
 	if (ret < 0)
 		return ret;
-	acpi_dev_free_resource_list(&resources);
+	acpi_dev_free_resource_list(&acpi_resource_list);
 
-	if (resource_type(&io_res) != IORESOURCE_MEM) {
+	if (resource_type(iores_array) != IORESOURCE_MEM) {
 		dev_err(dev, FW_BUG "TPM2 ACPI table does not define a memory resource\n");
 		return -EINVAL;
+	} else if (resource_type(iores_array + TPM_CRB_MAX_RESOURCES) ==
+		IORESOURCE_MEM) {
+		dev_warn(dev, "TPM2 ACPI table defines too many memory resources\n");
+		memset(iores_array + TPM_CRB_MAX_RESOURCES,
+		       0, sizeof(*iores_array));
+		iores_array[TPM_CRB_MAX_RESOURCES].flags = 0;
 	}
 
-	priv->iobase = devm_ioremap_resource(dev, &io_res);
-	if (IS_ERR(priv->iobase))
-		return PTR_ERR(priv->iobase);
+	iores = NULL;
+	iobase_ptr = NULL;
+	for (i = 0; resource_type(iores_array + i) == IORESOURCE_MEM; ++i) {
+		if (buf->control_address >= iores_array[i].start &&
+		    buf->control_address + sizeof(struct crb_regs_tail) - 1 <=
+		    iores_array[i].end) {
+			iores = iores_array + i;
+			iobase_ptr = iobase_array + i;
+			break;
+		}
+	}
+
+	priv->regs_t = crb_map_res(dev, iores, iobase_ptr, buf->control_address,
+				   sizeof(struct crb_regs_tail));
+
+	if (IS_ERR(priv->regs_t))
+		return PTR_ERR(priv->regs_t);
 
 	/* The ACPI IO region starts at the head area and continues to include
 	 * the control area, as one nice sane region except for some older
@@ -523,9 +559,10 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
 	 */
 	if ((priv->sm == ACPI_TPM2_COMMAND_BUFFER) ||
 	    (priv->sm == ACPI_TPM2_MEMORY_MAPPED)) {
-		if (buf->control_address == io_res.start +
+		if (iores &&
+		    buf->control_address == iores->start +
 		    sizeof(*priv->regs_h))
-			priv->regs_h = priv->iobase;
+			priv->regs_h = *iobase_ptr;
 		else
 			dev_warn(dev, FW_BUG "Bad ACPI memory layout");
 	}
@@ -534,13 +571,6 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
 	if (ret)
 		return ret;
 
-	priv->regs_t = crb_map_res(dev, priv, &io_res, buf->control_address,
-				   sizeof(struct crb_regs_tail));
-	if (IS_ERR(priv->regs_t)) {
-		ret = PTR_ERR(priv->regs_t);
-		goto out_relinquish_locality;
-	}
-
 	/*
 	 * PTT HW bug w/a: wake up the device to access
 	 * possibly not retained registers.
@@ -552,13 +582,26 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
 	pa_high = ioread32(&priv->regs_t->ctrl_cmd_pa_high);
 	pa_low  = ioread32(&priv->regs_t->ctrl_cmd_pa_low);
 	cmd_pa = ((u64)pa_high << 32) | pa_low;
-	cmd_size = crb_fixup_cmd_size(dev, &io_res, cmd_pa,
-				      ioread32(&priv->regs_t->ctrl_cmd_size));
+	cmd_size = ioread32(&priv->regs_t->ctrl_cmd_size);
+
+	iores = NULL;
+	iobase_ptr = NULL;
+	for (i = 0; iores_array[i].end; ++i) {
+		if (cmd_pa >= iores_array[i].start &&
+		    cmd_pa <= iores_array[i].end) {
+			iores = iores_array + i;
+			iobase_ptr = iobase_array + i;
+			break;
+		}
+	}
+
+	if (iores)
+		cmd_size = crb_fixup_cmd_size(dev, iores, cmd_pa, cmd_size);
 
 	dev_dbg(dev, "cmd_hi = %X cmd_low = %X cmd_size %X\n",
 		pa_high, pa_low, cmd_size);
 
-	priv->cmd = crb_map_res(dev, priv, &io_res, cmd_pa, cmd_size);
+	priv->cmd = crb_map_res(dev, iores, iobase_ptr,	cmd_pa, cmd_size);
 	if (IS_ERR(priv->cmd)) {
 		ret = PTR_ERR(priv->cmd);
 		goto out;
@@ -566,11 +609,25 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
 
 	memcpy_fromio(&__rsp_pa, &priv->regs_t->ctrl_rsp_pa, 8);
 	rsp_pa = le64_to_cpu(__rsp_pa);
-	rsp_size = crb_fixup_cmd_size(dev, &io_res, rsp_pa,
-				      ioread32(&priv->regs_t->ctrl_rsp_size));
+	rsp_size = ioread32(&priv->regs_t->ctrl_rsp_size);
+
+	iores = NULL;
+	iobase_ptr = NULL;
+	for (i = 0; resource_type(iores_array + i) == IORESOURCE_MEM; ++i) {
+		if (rsp_pa >= iores_array[i].start &&
+		    rsp_pa <= iores_array[i].end) {
+			iores = iores_array + i;
+			iobase_ptr = iobase_array + i;
+			break;
+		}
+	}
+
+	if (iores)
+		rsp_size = crb_fixup_cmd_size(dev, iores, rsp_pa, rsp_size);
 
 	if (cmd_pa != rsp_pa) {
-		priv->rsp = crb_map_res(dev, priv, &io_res, rsp_pa, rsp_size);
+		priv->rsp = crb_map_res(dev, iores, iobase_ptr,
+					rsp_pa, rsp_size);
 		ret = PTR_ERR_OR_ZERO(priv->rsp);
 		goto out;
 	}
diff --git a/drivers/char/tpm/tpm_ftpm_tee.c b/drivers/char/tpm/tpm_ftpm_tee.c
index 6640a14dbe48..22bf553ccf9d 100644
--- a/drivers/char/tpm/tpm_ftpm_tee.c
+++ b/drivers/char/tpm/tpm_ftpm_tee.c
@@ -32,7 +32,7 @@ static const uuid_t ftpm_ta_uuid =
 		  0x82, 0xCB, 0x34, 0x3F, 0xB7, 0xF3, 0x78, 0x96);
 
 /**
- * ftpm_tee_tpm_op_recv - retrieve fTPM response.
+ * ftpm_tee_tpm_op_recv() - retrieve fTPM response.
  * @chip:	the tpm_chip description as specified in driver/char/tpm/tpm.h.
  * @buf:	the buffer to store data.
  * @count:	the number of bytes to read.
@@ -61,7 +61,7 @@ static int ftpm_tee_tpm_op_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 }
 
 /**
- * ftpm_tee_tpm_op_send - send TPM commands through the TEE shared memory.
+ * ftpm_tee_tpm_op_send() - send TPM commands through the TEE shared memory.
  * @chip:	the tpm_chip description as specified in driver/char/tpm/tpm.h
  * @buf:	the buffer to send.
  * @len:	the number of bytes to send.
@@ -208,7 +208,7 @@ static int ftpm_tee_match(struct tee_ioctl_version_data *ver, const void *data)
 }
 
 /**
- * ftpm_tee_probe - initialize the fTPM
+ * ftpm_tee_probe() - initialize the fTPM
  * @pdev: the platform_device description.
  *
  * Return:
@@ -298,7 +298,7 @@ out_tee_session:
 }
 
 /**
- * ftpm_tee_remove - remove the TPM device
+ * ftpm_tee_remove() - remove the TPM device
  * @pdev: the platform_device description.
  *
  * Return:
@@ -328,6 +328,19 @@ static int ftpm_tee_remove(struct platform_device *pdev)
 	return 0;
 }
 
+/**
+ * ftpm_tee_shutdown() - shutdown the TPM device
+ * @pdev: the platform_device description.
+ */
+static void ftpm_tee_shutdown(struct platform_device *pdev)
+{
+	struct ftpm_tee_private *pvt_data = dev_get_drvdata(&pdev->dev);
+
+	tee_shm_free(pvt_data->shm);
+	tee_client_close_session(pvt_data->ctx, pvt_data->session);
+	tee_client_close_context(pvt_data->ctx);
+}
+
 static const struct of_device_id of_ftpm_tee_ids[] = {
 	{ .compatible = "microsoft,ftpm" },
 	{ }
@@ -341,6 +354,7 @@ static struct platform_driver ftpm_tee_driver = {
 	},
 	.probe = ftpm_tee_probe,
 	.remove = ftpm_tee_remove,
+	.shutdown = ftpm_tee_shutdown,
 };
 
 module_platform_driver(ftpm_tee_driver);
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index e4fdde93ed4c..e7df342a317d 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -286,7 +286,7 @@ static int tpm_tis_plat_probe(struct platform_device *pdev)
 	}
 	tpm_info.res = *res;
 
-	tpm_info.irq = platform_get_irq(pdev, 0);
+	tpm_info.irq = platform_get_irq_optional(pdev, 0);
 	if (tpm_info.irq <= 0) {
 		if (pdev != force_pdev)
 			tpm_info.irq = -1;
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 270f43acbb77..27c6ca031e23 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -506,6 +506,84 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
 	return rc;
 }
 
+struct tis_vendor_durations_override {
+	u32 did_vid;
+	struct tpm1_version version;
+	unsigned long durations[3];
+};
+
+static const struct  tis_vendor_durations_override vendor_dur_overrides[] = {
+	/* STMicroelectronics 0x104a */
+	{ 0x0000104a,
+	  { 1, 2, 8, 28 },
+	  { (2 * 60 * HZ), (2 * 60 * HZ), (2 * 60 * HZ) } },
+};
+
+static void tpm_tis_update_durations(struct tpm_chip *chip,
+				     unsigned long *duration_cap)
+{
+	struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+	struct tpm1_version *version;
+	u32 did_vid;
+	int i, rc;
+	cap_t cap;
+
+	chip->duration_adjusted = false;
+
+	if (chip->ops->clk_enable != NULL)
+		chip->ops->clk_enable(chip, true);
+
+	rc = tpm_tis_read32(priv, TPM_DID_VID(0), &did_vid);
+	if (rc < 0) {
+		dev_warn(&chip->dev, "%s: failed to read did_vid. %d\n",
+			 __func__, rc);
+		goto out;
+	}
+
+	/* Try to get a TPM version 1.2 or 1.1 TPM_CAP_VERSION_INFO */
+	rc = tpm1_getcap(chip, TPM_CAP_VERSION_1_2, &cap,
+			 "attempting to determine the 1.2 version",
+			 sizeof(cap.version2));
+	if (!rc) {
+		version = &cap.version2.version;
+	} else {
+		rc = tpm1_getcap(chip, TPM_CAP_VERSION_1_1, &cap,
+				 "attempting to determine the 1.1 version",
+				 sizeof(cap.version1));
+
+		if (rc)
+			goto out;
+
+		version = &cap.version1;
+	}
+
+	for (i = 0; i != ARRAY_SIZE(vendor_dur_overrides); i++) {
+		if (vendor_dur_overrides[i].did_vid != did_vid)
+			continue;
+
+		if ((version->major ==
+		     vendor_dur_overrides[i].version.major) &&
+		    (version->minor ==
+		     vendor_dur_overrides[i].version.minor) &&
+		    (version->rev_major ==
+		     vendor_dur_overrides[i].version.rev_major) &&
+		    (version->rev_minor ==
+		     vendor_dur_overrides[i].version.rev_minor)) {
+
+			memcpy(duration_cap,
+			       vendor_dur_overrides[i].durations,
+			       sizeof(vendor_dur_overrides[i].durations));
+
+			chip->duration_adjusted = true;
+			goto out;
+		}
+	}
+
+out:
+	if (chip->ops->clk_enable != NULL)
+		chip->ops->clk_enable(chip, false);
+}
+
 struct tis_vendor_timeout_override {
 	u32 did_vid;
 	unsigned long timeout_us[4];
@@ -842,6 +920,7 @@ static const struct tpm_class_ops tpm_tis = {
 	.send = tpm_tis_send,
 	.cancel = tpm_tis_ready,
 	.update_timeouts = tpm_tis_update_timeouts,
+	.update_durations = tpm_tis_update_durations,
 	.req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
 	.req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
 	.req_canceled = tpm_tis_req_canceled,
@@ -980,8 +1059,6 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
 			goto out_err;
 		}
 
-		tpm_chip_start(chip);
-		chip->flags |= TPM_CHIP_FLAG_IRQ;
 		if (irq) {
 			tpm_tis_probe_irq_single(chip, intmask, IRQF_SHARED,
 						 irq);
@@ -991,7 +1068,6 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
 		} else {
 			tpm_tis_probe_irq(chip, intmask);
 		}
-		tpm_chip_stop(chip);
 	}
 
 	rc = tpm_chip_register(chip);
diff --git a/drivers/char/tpm/tpm_tis_spi.c b/drivers/char/tpm/tpm_tis_spi.c
index 19513e622053..d1754fd6c573 100644
--- a/drivers/char/tpm/tpm_tis_spi.c
+++ b/drivers/char/tpm/tpm_tis_spi.c
@@ -20,42 +20,64 @@
  * Dorn and Kyleen Hall and Jarko Sakkinnen.
  */
 
+#include <linux/acpi.h>
+#include <linux/completion.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/wait.h>
-#include <linux/acpi.h>
-#include <linux/freezer.h>
 
+#include <linux/of_device.h>
 #include <linux/spi/spi.h>
-#include <linux/gpio.h>
-#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
 #include <linux/tpm.h>
+
 #include "tpm.h"
 #include "tpm_tis_core.h"
+#include "tpm_tis_spi.h"
 
 #define MAX_SPI_FRAMESIZE 64
 
-struct tpm_tis_spi_phy {
-	struct tpm_tis_data priv;
-	struct spi_device *spi_device;
-	u8 *iobuf;
-};
-
-static inline struct tpm_tis_spi_phy *to_tpm_tis_spi_phy(struct tpm_tis_data *data)
+/*
+ * TCG SPI flow control is documented in section 6.4 of the spec[1]. In short,
+ * keep trying to read from the device until MISO goes high indicating the
+ * wait state has ended.
+ *
+ * [1] https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/
+ */
+static int tpm_tis_spi_flow_control(struct tpm_tis_spi_phy *phy,
+				    struct spi_transfer *spi_xfer)
 {
-	return container_of(data, struct tpm_tis_spi_phy, priv);
+	struct spi_message m;
+	int ret, i;
+
+	if ((phy->iobuf[3] & 0x01) == 0) {
+		// handle SPI wait states
+		phy->iobuf[0] = 0;
+
+		for (i = 0; i < TPM_RETRY; i++) {
+			spi_xfer->len = 1;
+			spi_message_init(&m);
+			spi_message_add_tail(spi_xfer, &m);
+			ret = spi_sync_locked(phy->spi_device, &m);
+			if (ret < 0)
+				return ret;
+			if (phy->iobuf[0] & 0x01)
+				break;
+		}
+
+		if (i == TPM_RETRY)
+			return -ETIMEDOUT;
+	}
+
+	return 0;
 }
 
-static int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
-				u8 *in, const u8 *out)
+int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
+			 u8 *in, const u8 *out)
 {
 	struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data);
 	int ret = 0;
-	int i;
 	struct spi_message m;
 	struct spi_transfer spi_xfer;
 	u8 transfer_len;
@@ -82,26 +104,9 @@ static int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
 		if (ret < 0)
 			goto exit;
 
-		if ((phy->iobuf[3] & 0x01) == 0) {
-			// handle SPI wait states
-			phy->iobuf[0] = 0;
-
-			for (i = 0; i < TPM_RETRY; i++) {
-				spi_xfer.len = 1;
-				spi_message_init(&m);
-				spi_message_add_tail(&spi_xfer, &m);
-				ret = spi_sync_locked(phy->spi_device, &m);
-				if (ret < 0)
-					goto exit;
-				if (phy->iobuf[0] & 0x01)
-					break;
-			}
-
-			if (i == TPM_RETRY) {
-				ret = -ETIMEDOUT;
-				goto exit;
-			}
-		}
+		ret = phy->flow_control(phy, &spi_xfer);
+		if (ret < 0)
+			goto exit;
 
 		spi_xfer.cs_change = 0;
 		spi_xfer.len = transfer_len;
@@ -117,6 +122,7 @@ static int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
 
 		spi_message_init(&m);
 		spi_message_add_tail(&spi_xfer, &m);
+		reinit_completion(&phy->ready);
 		ret = spi_sync_locked(phy->spi_device, &m);
 		if (ret < 0)
 			goto exit;
@@ -146,7 +152,7 @@ static int tpm_tis_spi_write_bytes(struct tpm_tis_data *data, u32 addr,
 	return tpm_tis_spi_transfer(data, addr, len, NULL, value);
 }
 
-static int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result)
+int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result)
 {
 	__le16 result_le;
 	int rc;
@@ -159,7 +165,7 @@ static int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result)
 	return rc;
 }
 
-static int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result)
+int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result)
 {
 	__le32 result_le;
 	int rc;
@@ -172,7 +178,7 @@ static int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result)
 	return rc;
 }
 
-static int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value)
+int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value)
 {
 	__le32 value_le;
 	int rc;
@@ -184,6 +190,18 @@ static int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value)
 	return rc;
 }
 
+int tpm_tis_spi_init(struct spi_device *spi, struct tpm_tis_spi_phy *phy,
+		     int irq, const struct tpm_tis_phy_ops *phy_ops)
+{
+	phy->iobuf = devm_kmalloc(&spi->dev, MAX_SPI_FRAMESIZE, GFP_KERNEL);
+	if (!phy->iobuf)
+		return -ENOMEM;
+
+	phy->spi_device = spi;
+
+	return tpm_tis_core_init(&spi->dev, &phy->priv, irq, phy_ops, NULL);
+}
+
 static const struct tpm_tis_phy_ops tpm_spi_phy_ops = {
 	.read_bytes = tpm_tis_spi_read_bytes,
 	.write_bytes = tpm_tis_spi_write_bytes,
@@ -202,11 +220,7 @@ static int tpm_tis_spi_probe(struct spi_device *dev)
 	if (!phy)
 		return -ENOMEM;
 
-	phy->spi_device = dev;
-
-	phy->iobuf = devm_kmalloc(&dev->dev, MAX_SPI_FRAMESIZE, GFP_KERNEL);
-	if (!phy->iobuf)
-		return -ENOMEM;
+	phy->flow_control = tpm_tis_spi_flow_control;
 
 	/* If the SPI device has an IRQ then use that */
 	if (dev->irq > 0)
@@ -214,11 +228,27 @@ static int tpm_tis_spi_probe(struct spi_device *dev)
 	else
 		irq = -1;
 
-	return tpm_tis_core_init(&dev->dev, &phy->priv, irq, &tpm_spi_phy_ops,
-				 NULL);
+	init_completion(&phy->ready);
+	return tpm_tis_spi_init(dev, phy, irq, &tpm_spi_phy_ops);
+}
+
+typedef int (*tpm_tis_spi_probe_func)(struct spi_device *);
+
+static int tpm_tis_spi_driver_probe(struct spi_device *spi)
+{
+	const struct spi_device_id *spi_dev_id = spi_get_device_id(spi);
+	tpm_tis_spi_probe_func probe_func;
+
+	probe_func = of_device_get_match_data(&spi->dev);
+	if (!probe_func && spi_dev_id)
+		probe_func = (tpm_tis_spi_probe_func)spi_dev_id->driver_data;
+	if (!probe_func)
+		return -ENODEV;
+
+	return probe_func(spi);
 }
 
-static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_resume);
+static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_spi_resume);
 
 static int tpm_tis_spi_remove(struct spi_device *dev)
 {
@@ -230,15 +260,17 @@ static int tpm_tis_spi_remove(struct spi_device *dev)
 }
 
 static const struct spi_device_id tpm_tis_spi_id[] = {
-	{"tpm_tis_spi", 0},
+	{ "tpm_tis_spi", (unsigned long)tpm_tis_spi_probe },
+	{ "cr50", (unsigned long)cr50_spi_probe },
 	{}
 };
 MODULE_DEVICE_TABLE(spi, tpm_tis_spi_id);
 
 static const struct of_device_id of_tis_spi_match[] = {
-	{ .compatible = "st,st33htpm-spi", },
-	{ .compatible = "infineon,slb9670", },
-	{ .compatible = "tcg,tpm_tis-spi", },
+	{ .compatible = "st,st33htpm-spi", .data = tpm_tis_spi_probe },
+	{ .compatible = "infineon,slb9670", .data = tpm_tis_spi_probe },
+	{ .compatible = "tcg,tpm_tis-spi", .data = tpm_tis_spi_probe },
+	{ .compatible = "google,cr50", .data = cr50_spi_probe },
 	{}
 };
 MODULE_DEVICE_TABLE(of, of_tis_spi_match);
@@ -251,13 +283,12 @@ MODULE_DEVICE_TABLE(acpi, acpi_tis_spi_match);
 
 static struct spi_driver tpm_tis_spi_driver = {
 	.driver = {
-		.owner = THIS_MODULE,
 		.name = "tpm_tis_spi",
 		.pm = &tpm_tis_pm,
 		.of_match_table = of_match_ptr(of_tis_spi_match),
 		.acpi_match_table = ACPI_PTR(acpi_tis_spi_match),
 	},
-	.probe = tpm_tis_spi_probe,
+	.probe = tpm_tis_spi_driver_probe,
 	.remove = tpm_tis_spi_remove,
 	.id_table = tpm_tis_spi_id,
 };
diff --git a/drivers/char/tpm/tpm_tis_spi.h b/drivers/char/tpm/tpm_tis_spi.h
new file mode 100644
index 000000000000..bba73979c368
--- /dev/null
+++ b/drivers/char/tpm/tpm_tis_spi.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015 Infineon Technologies AG
+ * Copyright (C) 2016 STMicroelectronics SAS
+ */
+
+#ifndef TPM_TIS_SPI_H
+#define TPM_TIS_SPI_H
+
+#include "tpm_tis_core.h"
+
+struct tpm_tis_spi_phy {
+	struct tpm_tis_data priv;
+	struct spi_device *spi_device;
+	int (*flow_control)(struct tpm_tis_spi_phy *phy,
+			     struct spi_transfer *xfer);
+	struct completion ready;
+	unsigned long wake_after;
+
+	u8 *iobuf;
+};
+
+static inline struct tpm_tis_spi_phy *to_tpm_tis_spi_phy(struct tpm_tis_data *data)
+{
+	return container_of(data, struct tpm_tis_spi_phy, priv);
+}
+
+extern int tpm_tis_spi_init(struct spi_device *spi, struct tpm_tis_spi_phy *phy,
+			    int irq, const struct tpm_tis_phy_ops *phy_ops);
+
+extern int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
+				u8 *in, const u8 *out);
+
+extern int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result);
+extern int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result);
+extern int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value);
+
+#ifdef CONFIG_TCG_TIS_SPI_CR50
+extern int cr50_spi_probe(struct spi_device *spi);
+#else
+static inline int cr50_spi_probe(struct spi_device *spi)
+{
+	return -ENODEV;
+}
+#endif
+
+#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_TCG_TIS_SPI_CR50)
+extern int tpm_tis_spi_resume(struct device *dev);
+#else
+#define tpm_tis_spi_resume	NULL
+#endif
+
+#endif
diff --git a/drivers/char/tpm/tpm_tis_spi_cr50.c b/drivers/char/tpm/tpm_tis_spi_cr50.c
new file mode 100644
index 000000000000..37d72e818335
--- /dev/null
+++ b/drivers/char/tpm/tpm_tis_spi_cr50.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This device driver implements a TCG PTP FIFO interface over SPI for chips
+ * with Cr50 firmware.
+ * It is based on tpm_tis_spi driver by Peter Huewe and Christophe Ricard.
+ */
+
+#include <linux/completion.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pm.h>
+#include <linux/spi/spi.h>
+#include <linux/wait.h>
+
+#include "tpm_tis_core.h"
+#include "tpm_tis_spi.h"
+
+/*
+ * Cr50 timing constants:
+ * - can go to sleep not earlier than after CR50_SLEEP_DELAY_MSEC.
+ * - needs up to CR50_WAKE_START_DELAY_USEC to wake after sleep.
+ * - requires waiting for "ready" IRQ, if supported; or waiting for at least
+ *   CR50_NOIRQ_ACCESS_DELAY_MSEC between transactions, if IRQ is not supported.
+ * - waits for up to CR50_FLOW_CONTROL for flow control 'ready' indication.
+ */
+#define CR50_SLEEP_DELAY_MSEC			1000
+#define CR50_WAKE_START_DELAY_USEC		1000
+#define CR50_NOIRQ_ACCESS_DELAY			msecs_to_jiffies(2)
+#define CR50_READY_IRQ_TIMEOUT			msecs_to_jiffies(TPM2_TIMEOUT_A)
+#define CR50_FLOW_CONTROL			msecs_to_jiffies(TPM2_TIMEOUT_A)
+#define MAX_IRQ_CONFIRMATION_ATTEMPTS		3
+
+#define TPM_CR50_FW_VER(l)			(0x0f90 | ((l) << 12))
+#define TPM_CR50_MAX_FW_VER_LEN			64
+
+struct cr50_spi_phy {
+	struct tpm_tis_spi_phy spi_phy;
+
+	struct mutex time_track_mutex;
+	unsigned long last_access;
+
+	unsigned long access_delay;
+
+	unsigned int irq_confirmation_attempt;
+	bool irq_needs_confirmation;
+	bool irq_confirmed;
+};
+
+static inline struct cr50_spi_phy *to_cr50_spi_phy(struct tpm_tis_spi_phy *phy)
+{
+	return container_of(phy, struct cr50_spi_phy, spi_phy);
+}
+
+/*
+ * The cr50 interrupt handler just signals waiting threads that the
+ * interrupt was asserted.  It does not do any processing triggered
+ * by interrupts but is instead used to avoid fixed delays.
+ */
+static irqreturn_t cr50_spi_irq_handler(int dummy, void *dev_id)
+{
+	struct cr50_spi_phy *cr50_phy = dev_id;
+
+	cr50_phy->irq_confirmed = true;
+	complete(&cr50_phy->spi_phy.ready);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Cr50 needs to have at least some delay between consecutive
+ * transactions. Make sure we wait.
+ */
+static void cr50_ensure_access_delay(struct cr50_spi_phy *phy)
+{
+	unsigned long allowed_access = phy->last_access + phy->access_delay;
+	unsigned long time_now = jiffies;
+	struct device *dev = &phy->spi_phy.spi_device->dev;
+
+	/*
+	 * Note: There is a small chance, if Cr50 is not accessed in a few days,
+	 * that time_in_range will not provide the correct result after the wrap
+	 * around for jiffies. In this case, we'll have an unneeded short delay,
+	 * which is fine.
+	 */
+	if (time_in_range_open(time_now, phy->last_access, allowed_access)) {
+		unsigned long remaining, timeout = allowed_access - time_now;
+
+		remaining = wait_for_completion_timeout(&phy->spi_phy.ready,
+							timeout);
+		if (!remaining && phy->irq_confirmed)
+			dev_warn(dev, "Timeout waiting for TPM ready IRQ\n");
+	}
+
+	if (phy->irq_needs_confirmation) {
+		unsigned int attempt = ++phy->irq_confirmation_attempt;
+
+		if (phy->irq_confirmed) {
+			phy->irq_needs_confirmation = false;
+			phy->access_delay = CR50_READY_IRQ_TIMEOUT;
+			dev_info(dev, "TPM ready IRQ confirmed on attempt %u\n",
+				 attempt);
+		} else if (attempt > MAX_IRQ_CONFIRMATION_ATTEMPTS) {
+			phy->irq_needs_confirmation = false;
+			dev_warn(dev, "IRQ not confirmed - will use delays\n");
+		}
+	}
+}
+
+/*
+ * Cr50 might go to sleep if there is no SPI activity for some time and
+ * miss the first few bits/bytes on the bus. In such case, wake it up
+ * by asserting CS and give it time to start up.
+ */
+static bool cr50_needs_waking(struct cr50_spi_phy *phy)
+{
+	/*
+	 * Note: There is a small chance, if Cr50 is not accessed in a few days,
+	 * that time_in_range will not provide the correct result after the wrap
+	 * around for jiffies. In this case, we'll probably timeout or read
+	 * incorrect value from TPM_STS and just retry the operation.
+	 */
+	return !time_in_range_open(jiffies, phy->last_access,
+				   phy->spi_phy.wake_after);
+}
+
+static void cr50_wake_if_needed(struct cr50_spi_phy *cr50_phy)
+{
+	struct tpm_tis_spi_phy *phy = &cr50_phy->spi_phy;
+
+	if (cr50_needs_waking(cr50_phy)) {
+		/* Assert CS, wait 1 msec, deassert CS */
+		struct spi_transfer spi_cs_wake = { .delay_usecs = 1000 };
+
+		spi_sync_transfer(phy->spi_device, &spi_cs_wake, 1);
+		/* Wait for it to fully wake */
+		usleep_range(CR50_WAKE_START_DELAY_USEC,
+			     CR50_WAKE_START_DELAY_USEC * 2);
+	}
+
+	/* Reset the time when we need to wake Cr50 again */
+	phy->wake_after = jiffies + msecs_to_jiffies(CR50_SLEEP_DELAY_MSEC);
+}
+
+/*
+ * Flow control: clock the bus and wait for cr50 to set LSB before
+ * sending/receiving data. TCG PTP spec allows it to happen during
+ * the last byte of header, but cr50 never does that in practice,
+ * and earlier versions had a bug when it was set too early, so don't
+ * check for it during header transfer.
+ */
+static int cr50_spi_flow_control(struct tpm_tis_spi_phy *phy,
+				 struct spi_transfer *spi_xfer)
+{
+	struct device *dev = &phy->spi_device->dev;
+	unsigned long timeout = jiffies + CR50_FLOW_CONTROL;
+	struct spi_message m;
+	int ret;
+
+	spi_xfer->len = 1;
+
+	do {
+		spi_message_init(&m);
+		spi_message_add_tail(spi_xfer, &m);
+		ret = spi_sync_locked(phy->spi_device, &m);
+		if (ret < 0)
+			return ret;
+
+		if (time_after(jiffies, timeout)) {
+			dev_warn(dev, "Timeout during flow control\n");
+			return -EBUSY;
+		}
+	} while (!(phy->iobuf[0] & 0x01));
+
+	return 0;
+}
+
+static int tpm_tis_spi_cr50_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
+				     u8 *in, const u8 *out)
+{
+	struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data);
+	struct cr50_spi_phy *cr50_phy = to_cr50_spi_phy(phy);
+	int ret;
+
+	mutex_lock(&cr50_phy->time_track_mutex);
+	/*
+	 * Do this outside of spi_bus_lock in case cr50 is not the
+	 * only device on that spi bus.
+	 */
+	cr50_ensure_access_delay(cr50_phy);
+	cr50_wake_if_needed(cr50_phy);
+
+	ret = tpm_tis_spi_transfer(data, addr, len, in, out);
+
+	cr50_phy->last_access = jiffies;
+	mutex_unlock(&cr50_phy->time_track_mutex);
+
+	return ret;
+}
+
+static int tpm_tis_spi_cr50_read_bytes(struct tpm_tis_data *data, u32 addr,
+				       u16 len, u8 *result)
+{
+	return tpm_tis_spi_cr50_transfer(data, addr, len, result, NULL);
+}
+
+static int tpm_tis_spi_cr50_write_bytes(struct tpm_tis_data *data, u32 addr,
+					u16 len, const u8 *value)
+{
+	return tpm_tis_spi_cr50_transfer(data, addr, len, NULL, value);
+}
+
+static const struct tpm_tis_phy_ops tpm_spi_cr50_phy_ops = {
+	.read_bytes = tpm_tis_spi_cr50_read_bytes,
+	.write_bytes = tpm_tis_spi_cr50_write_bytes,
+	.read16 = tpm_tis_spi_read16,
+	.read32 = tpm_tis_spi_read32,
+	.write32 = tpm_tis_spi_write32,
+};
+
+static void cr50_print_fw_version(struct tpm_tis_data *data)
+{
+	struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data);
+	int i, len = 0;
+	char fw_ver[TPM_CR50_MAX_FW_VER_LEN + 1];
+	char fw_ver_block[4];
+
+	/*
+	 * Write anything to TPM_CR50_FW_VER to start from the beginning
+	 * of the version string
+	 */
+	tpm_tis_write8(data, TPM_CR50_FW_VER(data->locality), 0);
+
+	/* Read the string, 4 bytes at a time, until we get '\0' */
+	do {
+		tpm_tis_read_bytes(data, TPM_CR50_FW_VER(data->locality), 4,
+				   fw_ver_block);
+		for (i = 0; i < 4 && fw_ver_block[i]; ++len, ++i)
+			fw_ver[len] = fw_ver_block[i];
+	} while (i == 4 && len < TPM_CR50_MAX_FW_VER_LEN);
+	fw_ver[len] = '\0';
+
+	dev_info(&phy->spi_device->dev, "Cr50 firmware version: %s\n", fw_ver);
+}
+
+int cr50_spi_probe(struct spi_device *spi)
+{
+	struct tpm_tis_spi_phy *phy;
+	struct cr50_spi_phy *cr50_phy;
+	int ret;
+	struct tpm_chip *chip;
+
+	cr50_phy = devm_kzalloc(&spi->dev, sizeof(*cr50_phy), GFP_KERNEL);
+	if (!cr50_phy)
+		return -ENOMEM;
+
+	phy = &cr50_phy->spi_phy;
+	phy->flow_control = cr50_spi_flow_control;
+	phy->wake_after = jiffies;
+	init_completion(&phy->ready);
+
+	cr50_phy->access_delay = CR50_NOIRQ_ACCESS_DELAY;
+	cr50_phy->last_access = jiffies;
+	mutex_init(&cr50_phy->time_track_mutex);
+
+	if (spi->irq > 0) {
+		ret = devm_request_irq(&spi->dev, spi->irq,
+				       cr50_spi_irq_handler,
+				       IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+				       "cr50_spi", cr50_phy);
+		if (ret < 0) {
+			if (ret == -EPROBE_DEFER)
+				return ret;
+			dev_warn(&spi->dev, "Requesting IRQ %d failed: %d\n",
+				 spi->irq, ret);
+			/*
+			 * This is not fatal, the driver will fall back to
+			 * delays automatically, since ready will never
+			 * be completed without a registered irq handler.
+			 * So, just fall through.
+			 */
+		} else {
+			/*
+			 * IRQ requested, let's verify that it is actually
+			 * triggered, before relying on it.
+			 */
+			cr50_phy->irq_needs_confirmation = true;
+		}
+	} else {
+		dev_warn(&spi->dev,
+			 "No IRQ - will use delays between transactions.\n");
+	}
+
+	ret = tpm_tis_spi_init(spi, phy, -1, &tpm_spi_cr50_phy_ops);
+	if (ret)
+		return ret;
+
+	cr50_print_fw_version(&phy->priv);
+
+	chip = dev_get_drvdata(&spi->dev);
+	chip->flags |= TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED;
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+int tpm_tis_spi_resume(struct device *dev)
+{
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+	struct tpm_tis_data *data = dev_get_drvdata(&chip->dev);
+	struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data);
+	/*
+	 * Jiffies not increased during suspend, so we need to reset
+	 * the time to wake Cr50 after resume.
+	 */
+	phy->wake_after = jiffies;
+
+	return tpm_tis_resume(dev);
+}
+#endif
diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c
index 2f6e087ec496..91c772e38bb5 100644
--- a/drivers/char/tpm/tpm_vtpm_proxy.c
+++ b/drivers/char/tpm/tpm_vtpm_proxy.c
@@ -670,20 +670,10 @@ static long vtpmx_fops_ioctl(struct file *f, unsigned int ioctl,
 	}
 }
 
-#ifdef CONFIG_COMPAT
-static long vtpmx_fops_compat_ioctl(struct file *f, unsigned int ioctl,
-					  unsigned long arg)
-{
-	return vtpmx_fops_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
-}
-#endif
-
 static const struct file_operations vtpmx_fops = {
 	.owner = THIS_MODULE,
 	.unlocked_ioctl = vtpmx_fops_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = vtpmx_fops_compat_ioctl,
-#endif
+	.compat_ioctl = compat_ptr_ioctl,
 	.llseek = noop_llseek,
 };
 
diff --git a/drivers/char/ttyprintk.c b/drivers/char/ttyprintk.c
index 4f24e46ebe7c..56db949a7b70 100644
--- a/drivers/char/ttyprintk.c
+++ b/drivers/char/ttyprintk.c
@@ -15,10 +15,11 @@
 #include <linux/serial.h>
 #include <linux/tty.h>
 #include <linux/module.h>
+#include <linux/spinlock.h>
 
 struct ttyprintk_port {
 	struct tty_port port;
-	struct mutex port_write_mutex;
+	spinlock_t spinlock;
 };
 
 static struct ttyprintk_port tpk_port;
@@ -99,11 +100,12 @@ static int tpk_open(struct tty_struct *tty, struct file *filp)
 static void tpk_close(struct tty_struct *tty, struct file *filp)
 {
 	struct ttyprintk_port *tpkp = tty->driver_data;
+	unsigned long flags;
 
-	mutex_lock(&tpkp->port_write_mutex);
+	spin_lock_irqsave(&tpkp->spinlock, flags);
 	/* flush tpk_printk buffer */
 	tpk_printk(NULL, 0);
-	mutex_unlock(&tpkp->port_write_mutex);
+	spin_unlock_irqrestore(&tpkp->spinlock, flags);
 
 	tty_port_close(&tpkp->port, tty, filp);
 }
@@ -115,13 +117,14 @@ static int tpk_write(struct tty_struct *tty,
 		const unsigned char *buf, int count)
 {
 	struct ttyprintk_port *tpkp = tty->driver_data;
+	unsigned long flags;
 	int ret;
 
 
 	/* exclusive use of tpk_printk within this tty */
-	mutex_lock(&tpkp->port_write_mutex);
+	spin_lock_irqsave(&tpkp->spinlock, flags);
 	ret = tpk_printk(buf, count);
-	mutex_unlock(&tpkp->port_write_mutex);
+	spin_unlock_irqrestore(&tpkp->spinlock, flags);
 
 	return ret;
 }
@@ -171,7 +174,7 @@ static int __init ttyprintk_init(void)
 {
 	int ret = -ENOMEM;
 
-	mutex_init(&tpk_port.port_write_mutex);
+	spin_lock_init(&tpk_port.spinlock);
 
 	ttyprintk_driver = tty_alloc_driver(1,
 			TTY_DRIVER_RESET_TERMIOS |
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 3259426f01dc..4df9b40d6342 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -919,6 +919,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
 		.pos = *ppos,
 		.u.data = &sgl,
 	};
+	unsigned int occupancy;
 
 	/*
 	 * Rproc_serial does not yet support splice. To support splice
@@ -929,21 +930,18 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
 	if (is_rproc_serial(port->out_vq->vdev))
 		return -EINVAL;
 
-	/*
-	 * pipe->nrbufs == 0 means there are no data to transfer,
-	 * so this returns just 0 for no data.
-	 */
 	pipe_lock(pipe);
-	if (!pipe->nrbufs) {
-		ret = 0;
+	ret = 0;
+	if (pipe_empty(pipe->head, pipe->tail))
 		goto error_out;
-	}
 
 	ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK);
 	if (ret < 0)
 		goto error_out;
 
-	buf = alloc_buf(port->portdev->vdev, 0, pipe->nrbufs);
+	occupancy = pipe_occupancy(pipe->head, pipe->tail);
+	buf = alloc_buf(port->portdev->vdev, 0, occupancy);
+
 	if (!buf) {
 		ret = -ENOMEM;
 		goto error_out;
@@ -951,7 +949,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
 
 	sgl.n = 0;
 	sgl.len = 0;
-	sgl.size = pipe->nrbufs;
+	sgl.size = occupancy;
 	sgl.sg = buf->sg;
 	sg_init_table(sgl.sg, sgl.size);
 	ret = __splice_from_pipe(pipe, &sd, pipe_to_sg);
diff --git a/drivers/char/xillybus/xillybus_of.c b/drivers/char/xillybus/xillybus_of.c
index bfafd8f5e826..96b6de8a30e5 100644
--- a/drivers/char/xillybus/xillybus_of.c
+++ b/drivers/char/xillybus/xillybus_of.c
@@ -116,7 +116,6 @@ static int xilly_drv_probe(struct platform_device *op)
 	struct xilly_endpoint *endpoint;
 	int rc;
 	int irq;
-	struct resource *res;
 	struct xilly_endpoint_hardware *ephw = &of_hw;
 
 	if (of_property_read_bool(dev->of_node, "dma-coherent"))
@@ -129,9 +128,7 @@ static int xilly_drv_probe(struct platform_device *op)
 
 	dev_set_drvdata(dev, endpoint);
 
-	res = platform_get_resource(op, IORESOURCE_MEM, 0);
-	endpoint->registers = devm_ioremap_resource(dev, res);
-
+	endpoint->registers = devm_platform_ioremap_resource(op, 0);
 	if (IS_ERR(endpoint->registers))
 		return PTR_ERR(endpoint->registers);
 
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index c44247d0b83e..45653a0e6ecd 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -136,6 +136,13 @@ config COMMON_CLK_SI570
 	  This driver supports Silicon Labs 570/571/598/599 programmable
 	  clock generators.
 
+config COMMON_CLK_BM1880
+	bool "Clock driver for Bitmain BM1880 SoC"
+	depends on ARCH_BITMAIN || COMPILE_TEST
+	default ARCH_BITMAIN
+	help
+	  This driver supports the clocks on Bitmain BM1880 SoC.
+
 config COMMON_CLK_CDCE706
 	tristate "Clock driver for TI CDCE706 clock synthesizer"
 	depends on I2C
@@ -292,6 +299,11 @@ config COMMON_CLK_STM32H7
 	help
 	  Support for stm32h7 SoC family clocks
 
+config COMMON_CLK_MMP2
+	def_bool COMMON_CLK && (MACH_MMP2_DT || MACH_MMP3_DT)
+	help
+	  Support for Marvell MMP2 and MMP3 SoC clocks
+
 config COMMON_CLK_BD718XX
 	tristate "Clock driver for ROHM BD718x7 PMIC"
 	depends on MFD_ROHM_BD718XX || MFD_ROHM_BD70528
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 0138fb14e6f8..0696a0c1ab58 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_MACH_ASM9260)		+= clk-asm9260.o
 obj-$(CONFIG_COMMON_CLK_AXI_CLKGEN)	+= clk-axi-clkgen.o
 obj-$(CONFIG_ARCH_AXXIA)		+= clk-axm5516.o
 obj-$(CONFIG_COMMON_CLK_BD718XX)	+= clk-bd718x7.o
+obj-$(CONFIG_COMMON_CLK_BM1880)		+= clk-bm1880.o
 obj-$(CONFIG_COMMON_CLK_CDCE706)	+= clk-cdce706.o
 obj-$(CONFIG_COMMON_CLK_CDCE925)	+= clk-cdce925.o
 obj-$(CONFIG_ARCH_CLPS711X)		+= clk-clps711x.o
diff --git a/drivers/clk/at91/at91sam9260.c b/drivers/clk/at91/at91sam9260.c
index 0aabe49aed09..a9d4234758d7 100644
--- a/drivers/clk/at91/at91sam9260.c
+++ b/drivers/clk/at91/at91sam9260.c
@@ -348,7 +348,7 @@ static void __init at91sam926x_pmc_setup(struct device_node *np,
 		return;
 	mainxtal_name = of_clk_get_parent_name(np, i);
 
-	regmap = syscon_node_to_regmap(np);
+	regmap = device_node_to_regmap(np);
 	if (IS_ERR(regmap))
 		return;
 
diff --git a/drivers/clk/at91/at91sam9rl.c b/drivers/clk/at91/at91sam9rl.c
index 0ac34cdaa106..77fe83a73bf4 100644
--- a/drivers/clk/at91/at91sam9rl.c
+++ b/drivers/clk/at91/at91sam9rl.c
@@ -83,7 +83,7 @@ static void __init at91sam9rl_pmc_setup(struct device_node *np)
 		return;
 	mainxtal_name = of_clk_get_parent_name(np, i);
 
-	regmap = syscon_node_to_regmap(np);
+	regmap = device_node_to_regmap(np);
 	if (IS_ERR(regmap))
 		return;
 
diff --git a/drivers/clk/at91/at91sam9x5.c b/drivers/clk/at91/at91sam9x5.c
index 0855f3a80cc7..086cf0b4955c 100644
--- a/drivers/clk/at91/at91sam9x5.c
+++ b/drivers/clk/at91/at91sam9x5.c
@@ -146,7 +146,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np,
 		return;
 	mainxtal_name = of_clk_get_parent_name(np, i);
 
-	regmap = syscon_node_to_regmap(np);
+	regmap = device_node_to_regmap(np);
 	if (IS_ERR(regmap))
 		return;
 
diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c
index 0b03cfae3a9d..b71515acdec1 100644
--- a/drivers/clk/at91/pmc.c
+++ b/drivers/clk/at91/pmc.c
@@ -275,7 +275,7 @@ static int __init pmc_register_ops(void)
 
 	np = of_find_matching_node(NULL, sama5d2_pmc_dt_ids);
 
-	pmcreg = syscon_node_to_regmap(np);
+	pmcreg = device_node_to_regmap(np);
 	if (IS_ERR(pmcreg))
 		return PTR_ERR(pmcreg);
 
diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c
index 0de1108737db..ff7e3f727082 100644
--- a/drivers/clk/at91/sama5d2.c
+++ b/drivers/clk/at91/sama5d2.c
@@ -162,7 +162,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np)
 		return;
 	mainxtal_name = of_clk_get_parent_name(np, i);
 
-	regmap = syscon_node_to_regmap(np);
+	regmap = device_node_to_regmap(np);
 	if (IS_ERR(regmap))
 		return;
 
diff --git a/drivers/clk/at91/sama5d4.c b/drivers/clk/at91/sama5d4.c
index 25b156d4e645..a6dee4a3b6e4 100644
--- a/drivers/clk/at91/sama5d4.c
+++ b/drivers/clk/at91/sama5d4.c
@@ -136,7 +136,7 @@ static void __init sama5d4_pmc_setup(struct device_node *np)
 		return;
 	mainxtal_name = of_clk_get_parent_name(np, i);
 
-	regmap = syscon_node_to_regmap(np);
+	regmap = device_node_to_regmap(np);
 	if (IS_ERR(regmap))
 		return;
 
diff --git a/drivers/clk/at91/sckc.c b/drivers/clk/at91/sckc.c
index fac0ca56d42d..15dc4cd86d76 100644
--- a/drivers/clk/at91/sckc.c
+++ b/drivers/clk/at91/sckc.c
@@ -487,8 +487,7 @@ static void __init of_sam9x60_sckc_setup(struct device_node *np)
 	if (IS_ERR(slow_osc))
 		goto unregister_slow_rc;
 
-	clk_data = kzalloc(sizeof(*clk_data) + (2 * sizeof(struct clk_hw *)),
-			   GFP_KERNEL);
+	clk_data = kzalloc(struct_size(clk_data, hws, 2), GFP_KERNEL);
 	if (!clk_data)
 		goto unregister_slow_osc;
 
diff --git a/drivers/clk/axs10x/i2s_pll_clock.c b/drivers/clk/axs10x/i2s_pll_clock.c
index 71c2e9519ca8..e9da0e69bf6c 100644
--- a/drivers/clk/axs10x/i2s_pll_clock.c
+++ b/drivers/clk/axs10x/i2s_pll_clock.c
@@ -172,14 +172,12 @@ static int i2s_pll_clk_probe(struct platform_device *pdev)
 	struct clk *clk;
 	struct i2s_pll_clk *pll_clk;
 	struct clk_init_data init;
-	struct resource *mem;
 
 	pll_clk = devm_kzalloc(dev, sizeof(*pll_clk), GFP_KERNEL);
 	if (!pll_clk)
 		return -ENOMEM;
 
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	pll_clk->base = devm_ioremap_resource(dev, mem);
+	pll_clk->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(pll_clk->base))
 		return PTR_ERR(pll_clk->base);
 
diff --git a/drivers/clk/axs10x/pll_clock.c b/drivers/clk/axs10x/pll_clock.c
index aba787b2e771..500345d99adb 100644
--- a/drivers/clk/axs10x/pll_clock.c
+++ b/drivers/clk/axs10x/pll_clock.c
@@ -221,7 +221,6 @@ static int axs10x_pll_clk_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	const char *parent_name;
 	struct axs10x_pll_clk *pll_clk;
-	struct resource *mem;
 	struct clk_init_data init = { };
 	int ret;
 
@@ -229,13 +228,11 @@ static int axs10x_pll_clk_probe(struct platform_device *pdev)
 	if (!pll_clk)
 		return -ENOMEM;
 
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	pll_clk->base = devm_ioremap_resource(dev, mem);
+	pll_clk->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(pll_clk->base))
 		return PTR_ERR(pll_clk->base);
 
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	pll_clk->lock = devm_ioremap_resource(dev, mem);
+	pll_clk->lock = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(pll_clk->lock))
 		return PTR_ERR(pll_clk->lock);
 
diff --git a/drivers/clk/bcm/clk-bcm2835-aux.c b/drivers/clk/bcm/clk-bcm2835-aux.c
index b6d07ca0164f..290a2846a86b 100644
--- a/drivers/clk/bcm/clk-bcm2835-aux.c
+++ b/drivers/clk/bcm/clk-bcm2835-aux.c
@@ -19,7 +19,6 @@ static int bcm2835_aux_clk_probe(struct platform_device *pdev)
 	struct clk_hw_onecell_data *onecell;
 	const char *parent;
 	struct clk *parent_clk;
-	struct resource *res;
 	void __iomem *reg, *gate;
 
 	parent_clk = devm_clk_get(dev, NULL);
@@ -27,8 +26,7 @@ static int bcm2835_aux_clk_probe(struct platform_device *pdev)
 		return PTR_ERR(parent_clk);
 	parent = __clk_get_name(parent_clk);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	reg = devm_ioremap_resource(dev, res);
+	reg = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(reg))
 		return PTR_ERR(reg);
 
diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index 802e488fd3c3..ded13ccf768e 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -2192,7 +2192,6 @@ static int bcm2835_clk_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct clk_hw **hws;
 	struct bcm2835_cprman *cprman;
-	struct resource *res;
 	const struct bcm2835_clk_desc *desc;
 	const size_t asize = ARRAY_SIZE(clk_desc_array);
 	const struct cprman_plat_data *pdata;
@@ -2211,8 +2210,7 @@ static int bcm2835_clk_probe(struct platform_device *pdev)
 
 	spin_lock_init(&cprman->regs_lock);
 	cprman->dev = dev;
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	cprman->regs = devm_ioremap_resource(dev, res);
+	cprman->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(cprman->regs))
 		return PTR_ERR(cprman->regs);
 
diff --git a/drivers/clk/clk-aspeed.c b/drivers/clk/clk-aspeed.c
index abf06fb6453e..411ff5fb2c07 100644
--- a/drivers/clk/clk-aspeed.c
+++ b/drivers/clk/clk-aspeed.c
@@ -14,7 +14,7 @@
 
 #include "clk-aspeed.h"
 
-#define ASPEED_NUM_CLKS		36
+#define ASPEED_NUM_CLKS		38
 
 #define ASPEED_RESET2_OFFSET	32
 
@@ -28,6 +28,7 @@
 #define  AST2400_HPLL_BYPASS_EN	BIT(17)
 #define ASPEED_MISC_CTRL	0x2c
 #define  UART_DIV13_EN		BIT(12)
+#define ASPEED_MAC_CLK_DLY	0x48
 #define ASPEED_STRAP		0x70
 #define  CLKIN_25MHZ_EN		BIT(23)
 #define  AST2400_CLK_SOURCE_SEL	BIT(18)
@@ -462,6 +463,30 @@ static int aspeed_clk_probe(struct platform_device *pdev)
 		return PTR_ERR(hw);
 	aspeed_clk_data->hws[ASPEED_CLK_MAC] = hw;
 
+	if (of_device_is_compatible(pdev->dev.of_node, "aspeed,ast2500-scu")) {
+		/* RMII 50MHz RCLK */
+		hw = clk_hw_register_fixed_rate(dev, "mac12rclk", "hpll", 0,
+						50000000);
+		if (IS_ERR(hw))
+			return PTR_ERR(hw);
+
+		/* RMII1 50MHz (RCLK) output enable */
+		hw = clk_hw_register_gate(dev, "mac1rclk", "mac12rclk", 0,
+				scu_base + ASPEED_MAC_CLK_DLY, 29, 0,
+				&aspeed_clk_lock);
+		if (IS_ERR(hw))
+			return PTR_ERR(hw);
+		aspeed_clk_data->hws[ASPEED_CLK_MAC1RCLK] = hw;
+
+		/* RMII2 50MHz (RCLK) output enable */
+		hw = clk_hw_register_gate(dev, "mac2rclk", "mac12rclk", 0,
+				scu_base + ASPEED_MAC_CLK_DLY, 30, 0,
+				&aspeed_clk_lock);
+		if (IS_ERR(hw))
+			return PTR_ERR(hw);
+		aspeed_clk_data->hws[ASPEED_CLK_MAC2RCLK] = hw;
+	}
+
 	/* LPC Host (LHCLK) clock divider */
 	hw = clk_hw_register_divider_table(dev, "lhclk", "hpll", 0,
 			scu_base + ASPEED_CLK_SELECTION, 20, 3, 0,
diff --git a/drivers/clk/clk-ast2600.c b/drivers/clk/clk-ast2600.c
index b1318e6b655b..392d01705b97 100644
--- a/drivers/clk/clk-ast2600.c
+++ b/drivers/clk/clk-ast2600.c
@@ -15,7 +15,7 @@
 
 #include "clk-aspeed.h"
 
-#define ASPEED_G6_NUM_CLKS		67
+#define ASPEED_G6_NUM_CLKS		71
 
 #define ASPEED_G6_SILICON_REV		0x004
 
@@ -40,6 +40,9 @@
 
 #define ASPEED_G6_STRAP1		0x500
 
+#define ASPEED_MAC12_CLK_DLY		0x340
+#define ASPEED_MAC34_CLK_DLY		0x350
+
 /* Globally visible clocks */
 static DEFINE_SPINLOCK(aspeed_g6_clk_lock);
 
@@ -116,8 +119,6 @@ static const struct aspeed_gate_data aspeed_g6_gates[] = {
 	[ASPEED_CLK_GATE_FSICLK]	= { 62,  59, "fsiclk-gate",	NULL,	 0 },	/* FSI */
 };
 
-static const char * const eclk_parent_names[] = { "mpll", "hpll", "dpll" };
-
 static const struct clk_div_table ast2600_eclk_div_table[] = {
 	{ 0x0, 2 },
 	{ 0x1, 2 },
@@ -486,6 +487,11 @@ static int aspeed_g6_clk_probe(struct platform_device *pdev)
 		return PTR_ERR(hw);
 	aspeed_g6_clk_data->hws[ASPEED_CLK_SDIO] = hw;
 
+	/* MAC1/2 RMII 50MHz RCLK */
+	hw = clk_hw_register_fixed_rate(dev, "mac12rclk", "hpll", 0, 50000000);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+
 	/* MAC1/2 AHB bus clock divider */
 	hw = clk_hw_register_divider_table(dev, "mac12", "hpll", 0,
 			scu_g6_base + ASPEED_G6_CLK_SELECTION1, 16, 3, 0,
@@ -495,6 +501,27 @@ static int aspeed_g6_clk_probe(struct platform_device *pdev)
 		return PTR_ERR(hw);
 	aspeed_g6_clk_data->hws[ASPEED_CLK_MAC12] = hw;
 
+	/* RMII1 50MHz (RCLK) output enable */
+	hw = clk_hw_register_gate(dev, "mac1rclk", "mac12rclk", 0,
+			scu_g6_base + ASPEED_MAC12_CLK_DLY, 29, 0,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_MAC1RCLK] = hw;
+
+	/* RMII2 50MHz (RCLK) output enable */
+	hw = clk_hw_register_gate(dev, "mac2rclk", "mac12rclk", 0,
+			scu_g6_base + ASPEED_MAC12_CLK_DLY, 30, 0,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_MAC2RCLK] = hw;
+
+	/* MAC1/2 RMII 50MHz RCLK */
+	hw = clk_hw_register_fixed_rate(dev, "mac34rclk", "hclk", 0, 50000000);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+
 	/* MAC3/4 AHB bus clock divider */
 	hw = clk_hw_register_divider_table(dev, "mac34", "hpll", 0,
 			scu_g6_base + 0x310, 24, 3, 0,
@@ -504,6 +531,22 @@ static int aspeed_g6_clk_probe(struct platform_device *pdev)
 		return PTR_ERR(hw);
 	aspeed_g6_clk_data->hws[ASPEED_CLK_MAC34] = hw;
 
+	/* RMII3 50MHz (RCLK) output enable */
+	hw = clk_hw_register_gate(dev, "mac3rclk", "mac34rclk", 0,
+			scu_g6_base + ASPEED_MAC34_CLK_DLY, 29, 0,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_MAC3RCLK] = hw;
+
+	/* RMII4 50MHz (RCLK) output enable */
+	hw = clk_hw_register_gate(dev, "mac4rclk", "mac34rclk", 0,
+			scu_g6_base + ASPEED_MAC34_CLK_DLY, 30, 0,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_MAC4RCLK] = hw;
+
 	/* LPC Host (LHCLK) clock divider */
 	hw = clk_hw_register_divider_table(dev, "lhclk", "hpll", 0,
 			scu_g6_base + ASPEED_G6_CLK_SELECTION1, 20, 3, 0,
diff --git a/drivers/clk/clk-bd718x7.c b/drivers/clk/clk-bd718x7.c
index ae6e5baee330..00926c587390 100644
--- a/drivers/clk/clk-bd718x7.c
+++ b/drivers/clk/clk-bd718x7.c
@@ -133,3 +133,4 @@ module_platform_driver(bd71837_clk);
 MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
 MODULE_DESCRIPTION("BD71837/BD71847/BD70528 chip clk driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:bd718xx-clk");
diff --git a/drivers/clk/clk-bm1880.c b/drivers/clk/clk-bm1880.c
new file mode 100644
index 000000000000..4cd175afce9b
--- /dev/null
+++ b/drivers/clk/clk-bm1880.c
@@ -0,0 +1,969 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Bitmain BM1880 SoC clock driver
+ *
+ * Copyright (c) 2019 Linaro Ltd.
+ * Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/clock/bm1880-clock.h>
+
+#define BM1880_CLK_MPLL_CTL	0x00
+#define BM1880_CLK_SPLL_CTL	0x04
+#define BM1880_CLK_FPLL_CTL	0x08
+#define BM1880_CLK_DDRPLL_CTL	0x0c
+
+#define BM1880_CLK_ENABLE0	0x00
+#define BM1880_CLK_ENABLE1	0x04
+#define BM1880_CLK_SELECT	0x20
+#define BM1880_CLK_DIV0		0x40
+#define BM1880_CLK_DIV1		0x44
+#define BM1880_CLK_DIV2		0x48
+#define BM1880_CLK_DIV3		0x4c
+#define BM1880_CLK_DIV4		0x50
+#define BM1880_CLK_DIV5		0x54
+#define BM1880_CLK_DIV6		0x58
+#define BM1880_CLK_DIV7		0x5c
+#define BM1880_CLK_DIV8		0x60
+#define BM1880_CLK_DIV9		0x64
+#define BM1880_CLK_DIV10	0x68
+#define BM1880_CLK_DIV11	0x6c
+#define BM1880_CLK_DIV12	0x70
+#define BM1880_CLK_DIV13	0x74
+#define BM1880_CLK_DIV14	0x78
+#define BM1880_CLK_DIV15	0x7c
+#define BM1880_CLK_DIV16	0x80
+#define BM1880_CLK_DIV17	0x84
+#define BM1880_CLK_DIV18	0x88
+#define BM1880_CLK_DIV19	0x8c
+#define BM1880_CLK_DIV20	0x90
+#define BM1880_CLK_DIV21	0x94
+#define BM1880_CLK_DIV22	0x98
+#define BM1880_CLK_DIV23	0x9c
+#define BM1880_CLK_DIV24	0xa0
+#define BM1880_CLK_DIV25	0xa4
+#define BM1880_CLK_DIV26	0xa8
+#define BM1880_CLK_DIV27	0xac
+#define BM1880_CLK_DIV28	0xb0
+
+#define to_bm1880_pll_clk(_hw) container_of(_hw, struct bm1880_pll_hw_clock, hw)
+#define to_bm1880_div_clk(_hw) container_of(_hw, struct bm1880_div_hw_clock, hw)
+
+static DEFINE_SPINLOCK(bm1880_clk_lock);
+
+struct bm1880_clock_data {
+	void __iomem *pll_base;
+	void __iomem *sys_base;
+	struct clk_hw_onecell_data hw_data;
+};
+
+struct bm1880_gate_clock {
+	unsigned int	id;
+	const char	*name;
+	const char      *parent;
+	u32		gate_reg;
+	s8		gate_shift;
+	unsigned long	flags;
+};
+
+struct bm1880_mux_clock {
+	unsigned int	id;
+	const char	*name;
+	const char      * const *parents;
+	s8		num_parents;
+	u32		reg;
+	s8		shift;
+	unsigned long	flags;
+};
+
+struct bm1880_div_clock {
+	unsigned int	id;
+	const char	*name;
+	u32		reg;
+	u8		shift;
+	u8		width;
+	u32		initval;
+	const struct clk_div_table *table;
+	unsigned long flags;
+};
+
+struct bm1880_div_hw_clock {
+	struct bm1880_div_clock div;
+	void __iomem *base;
+	spinlock_t *lock;
+	struct clk_hw hw;
+	struct clk_init_data init;
+};
+
+struct bm1880_composite_clock {
+	unsigned int	id;
+	const char	*name;
+	const char	*parent;
+	const char      * const *parents;
+	unsigned int	num_parents;
+	unsigned long	flags;
+
+	u32		gate_reg;
+	u32		mux_reg;
+	u32		div_reg;
+
+	s8		gate_shift;
+	s8		mux_shift;
+	s8		div_shift;
+	s8		div_width;
+	s16		div_initval;
+	const struct clk_div_table *table;
+};
+
+struct bm1880_pll_clock {
+	unsigned int	id;
+	const char	*name;
+	u32		reg;
+	unsigned long	flags;
+};
+
+struct bm1880_pll_hw_clock {
+	struct bm1880_pll_clock pll;
+	void __iomem *base;
+	struct clk_hw hw;
+	struct clk_init_data init;
+};
+
+static const struct clk_ops bm1880_pll_ops;
+static const struct clk_ops bm1880_clk_div_ops;
+
+#define GATE_DIV(_id, _name, _parent, _gate_reg, _gate_shift, _div_reg,	\
+			_div_shift, _div_width, _div_initval, _table,	\
+			_flags) {					\
+		.id = _id,						\
+		.parent = _parent,					\
+		.name = _name,						\
+		.gate_reg = _gate_reg,					\
+		.gate_shift = _gate_shift,				\
+		.div_reg = _div_reg,					\
+		.div_shift = _div_shift,				\
+		.div_width = _div_width,				\
+		.div_initval = _div_initval,				\
+		.table = _table,					\
+		.mux_shift = -1,					\
+		.flags = _flags,					\
+	}
+
+#define GATE_MUX(_id, _name, _parents, _gate_reg, _gate_shift,		\
+			_mux_reg, _mux_shift, _flags) {			\
+		.id = _id,						\
+		.parents = _parents,					\
+		.num_parents = ARRAY_SIZE(_parents),			\
+		.name = _name,						\
+		.gate_reg = _gate_reg,					\
+		.gate_shift = _gate_shift,				\
+		.div_shift = -1,					\
+		.mux_reg = _mux_reg,					\
+		.mux_shift = _mux_shift,				\
+		.flags = _flags,					\
+	}
+
+#define CLK_PLL(_id, _name, _parent, _reg, _flags) {			\
+		.pll.id = _id,						\
+		.pll.name = _name,					\
+		.pll.reg = _reg,					\
+		.hw.init = CLK_HW_INIT_PARENTS_DATA(_name, _parent,	\
+						    &bm1880_pll_ops,	\
+						    _flags),		\
+	}
+
+#define CLK_DIV(_id, _name, _parent, _reg, _shift, _width, _initval,	\
+				_table,	_flags) {			\
+		.div.id = _id,						\
+		.div.name = _name,					\
+		.div.reg = _reg,					\
+		.div.shift = _shift,					\
+		.div.width = _width,					\
+		.div.initval = _initval,				\
+		.div.table = _table,					\
+		.hw.init = CLK_HW_INIT_HW(_name, _parent,		\
+					  &bm1880_clk_div_ops,		\
+					  _flags),			\
+	}
+
+static struct clk_parent_data bm1880_pll_parent[] = {
+	{ .fw_name = "osc", .name = "osc" },
+};
+
+/*
+ * All PLL clocks are marked as CRITICAL, hence they are very crucial
+ * for the functioning of the SoC
+ */
+static struct bm1880_pll_hw_clock bm1880_pll_clks[] = {
+	CLK_PLL(BM1880_CLK_MPLL, "clk_mpll", bm1880_pll_parent,
+		BM1880_CLK_MPLL_CTL, 0),
+	CLK_PLL(BM1880_CLK_SPLL, "clk_spll", bm1880_pll_parent,
+		BM1880_CLK_SPLL_CTL, 0),
+	CLK_PLL(BM1880_CLK_FPLL, "clk_fpll", bm1880_pll_parent,
+		BM1880_CLK_FPLL_CTL, 0),
+	CLK_PLL(BM1880_CLK_DDRPLL, "clk_ddrpll", bm1880_pll_parent,
+		BM1880_CLK_DDRPLL_CTL, 0),
+};
+
+/*
+ * Clocks marked as CRITICAL are needed for the proper functioning
+ * of the SoC.
+ */
+static const struct bm1880_gate_clock bm1880_gate_clks[] = {
+	{ BM1880_CLK_AHB_ROM, "clk_ahb_rom", "clk_mux_axi6",
+	  BM1880_CLK_ENABLE0, 2, 0 },
+	{ BM1880_CLK_AXI_SRAM, "clk_axi_sram", "clk_axi1",
+	  BM1880_CLK_ENABLE0, 3, 0 },
+	/*
+	 * Since this clock is sourcing the DDR memory, let's mark it as
+	 * critical to avoid gating.
+	 */
+	{ BM1880_CLK_DDR_AXI, "clk_ddr_axi", "clk_mux_axi6",
+	  BM1880_CLK_ENABLE0, 4, CLK_IS_CRITICAL },
+	{ BM1880_CLK_APB_EFUSE, "clk_apb_efuse", "clk_mux_axi6",
+	  BM1880_CLK_ENABLE0, 6, 0 },
+	{ BM1880_CLK_AXI5_EMMC, "clk_axi5_emmc", "clk_axi5",
+	  BM1880_CLK_ENABLE0, 7, 0 },
+	{ BM1880_CLK_AXI5_SD, "clk_axi5_sd", "clk_axi5",
+	  BM1880_CLK_ENABLE0, 10, 0 },
+	{ BM1880_CLK_AXI4_ETH0, "clk_axi4_eth0", "clk_axi4",
+	  BM1880_CLK_ENABLE0, 14, 0 },
+	{ BM1880_CLK_AXI4_ETH1, "clk_axi4_eth1", "clk_axi4",
+	  BM1880_CLK_ENABLE0, 16, 0 },
+	{ BM1880_CLK_AXI1_GDMA, "clk_axi1_gdma", "clk_axi1",
+	  BM1880_CLK_ENABLE0, 17, 0 },
+	/* Don't gate GPIO clocks as it is not owned by the GPIO driver */
+	{ BM1880_CLK_APB_GPIO, "clk_apb_gpio", "clk_mux_axi6",
+	  BM1880_CLK_ENABLE0, 18, CLK_IGNORE_UNUSED },
+	{ BM1880_CLK_APB_GPIO_INTR, "clk_apb_gpio_intr", "clk_mux_axi6",
+	  BM1880_CLK_ENABLE0, 19, CLK_IGNORE_UNUSED },
+	{ BM1880_CLK_AXI1_MINER, "clk_axi1_miner", "clk_axi1",
+	  BM1880_CLK_ENABLE0, 21, 0 },
+	{ BM1880_CLK_AHB_SF, "clk_ahb_sf", "clk_mux_axi6",
+	  BM1880_CLK_ENABLE0, 22, 0 },
+	/*
+	 * Not sure which module this clock is sourcing but gating this clock
+	 * prevents the system from booting. So, let's mark it as critical.
+	 */
+	{ BM1880_CLK_SDMA_AXI, "clk_sdma_axi", "clk_axi5",
+	  BM1880_CLK_ENABLE0, 23, CLK_IS_CRITICAL },
+	{ BM1880_CLK_APB_I2C, "clk_apb_i2c", "clk_mux_axi6",
+	  BM1880_CLK_ENABLE0, 25, 0 },
+	{ BM1880_CLK_APB_WDT, "clk_apb_wdt", "clk_mux_axi6",
+	  BM1880_CLK_ENABLE0, 26, 0 },
+	{ BM1880_CLK_APB_JPEG, "clk_apb_jpeg", "clk_axi6",
+	  BM1880_CLK_ENABLE0, 27, 0 },
+	{ BM1880_CLK_AXI5_NF, "clk_axi5_nf", "clk_axi5",
+	  BM1880_CLK_ENABLE0, 29, 0 },
+	{ BM1880_CLK_APB_NF, "clk_apb_nf", "clk_axi6",
+	  BM1880_CLK_ENABLE0, 30, 0 },
+	{ BM1880_CLK_APB_PWM, "clk_apb_pwm", "clk_mux_axi6",
+	  BM1880_CLK_ENABLE1, 0, 0 },
+	{ BM1880_CLK_RV, "clk_rv", "clk_mux_rv",
+	  BM1880_CLK_ENABLE1, 1, 0 },
+	{ BM1880_CLK_APB_SPI, "clk_apb_spi", "clk_mux_axi6",
+	  BM1880_CLK_ENABLE1, 2, 0 },
+	{ BM1880_CLK_UART_500M, "clk_uart_500m", "clk_div_uart_500m",
+	  BM1880_CLK_ENABLE1, 4, 0 },
+	{ BM1880_CLK_APB_UART, "clk_apb_uart", "clk_axi6",
+	  BM1880_CLK_ENABLE1, 5, 0 },
+	{ BM1880_CLK_APB_I2S, "clk_apb_i2s", "clk_axi6",
+	  BM1880_CLK_ENABLE1, 6, 0 },
+	{ BM1880_CLK_AXI4_USB, "clk_axi4_usb", "clk_axi4",
+	  BM1880_CLK_ENABLE1, 7, 0 },
+	{ BM1880_CLK_APB_USB, "clk_apb_usb", "clk_axi6",
+	  BM1880_CLK_ENABLE1, 8, 0 },
+	{ BM1880_CLK_12M_USB, "clk_12m_usb", "clk_div_12m_usb",
+	  BM1880_CLK_ENABLE1, 11, 0 },
+	{ BM1880_CLK_APB_VIDEO, "clk_apb_video", "clk_axi6",
+	  BM1880_CLK_ENABLE1, 12, 0 },
+	{ BM1880_CLK_APB_VPP, "clk_apb_vpp", "clk_axi6",
+	  BM1880_CLK_ENABLE1, 15, 0 },
+	{ BM1880_CLK_AXI6, "clk_axi6", "clk_mux_axi6",
+	  BM1880_CLK_ENABLE1, 21, 0 },
+};
+
+static const char * const clk_a53_parents[] = { "clk_spll", "clk_mpll" };
+static const char * const clk_rv_parents[] = { "clk_div_1_rv", "clk_div_0_rv" };
+static const char * const clk_axi1_parents[] = { "clk_div_1_axi1", "clk_div_0_axi1" };
+static const char * const clk_axi6_parents[] = { "clk_div_1_axi6", "clk_div_0_axi6" };
+
+static const struct bm1880_mux_clock bm1880_mux_clks[] = {
+	{ BM1880_CLK_MUX_RV, "clk_mux_rv", clk_rv_parents, 2,
+	  BM1880_CLK_SELECT, 1, 0 },
+	{ BM1880_CLK_MUX_AXI6, "clk_mux_axi6", clk_axi6_parents, 2,
+	  BM1880_CLK_SELECT, 3, 0 },
+};
+
+static const struct clk_div_table bm1880_div_table_0[] = {
+	{ 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 4 },
+	{ 4, 5 }, { 5, 6 }, { 6, 7 }, { 7, 8 },
+	{ 8, 9 }, { 9, 10 }, { 10, 11 }, { 11, 12 },
+	{ 12, 13 }, { 13, 14 }, { 14, 15 }, { 15, 16 },
+	{ 16, 17 }, { 17, 18 }, { 18, 19 }, { 19, 20 },
+	{ 20, 21 }, { 21, 22 }, { 22, 23 }, { 23, 24 },
+	{ 24, 25 }, { 25, 26 }, { 26, 27 }, { 27, 28 },
+	{ 28, 29 }, { 29, 30 }, { 30, 31 }, { 31, 32 },
+	{ 0, 0 }
+};
+
+static const struct clk_div_table bm1880_div_table_1[] = {
+	{ 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 4 },
+	{ 4, 5 }, { 5, 6 }, { 6, 7 }, { 7, 8 },
+	{ 8, 9 }, { 9, 10 }, { 10, 11 }, { 11, 12 },
+	{ 12, 13 }, { 13, 14 }, { 14, 15 }, { 15, 16 },
+	{ 16, 17 }, { 17, 18 }, { 18, 19 }, { 19, 20 },
+	{ 20, 21 }, { 21, 22 }, { 22, 23 }, { 23, 24 },
+	{ 24, 25 }, { 25, 26 }, { 26, 27 }, { 27, 28 },
+	{ 28, 29 }, { 29, 30 }, { 30, 31 }, { 31, 32 },
+	{ 127, 128 }, { 0, 0 }
+};
+
+static const struct clk_div_table bm1880_div_table_2[] = {
+	{ 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 4 },
+	{ 4, 5 }, { 5, 6 }, { 6, 7 }, { 7, 8 },
+	{ 8, 9 }, { 9, 10 }, { 10, 11 }, { 11, 12 },
+	{ 12, 13 }, { 13, 14 }, { 14, 15 }, { 15, 16 },
+	{ 16, 17 }, { 17, 18 }, { 18, 19 }, { 19, 20 },
+	{ 20, 21 }, { 21, 22 }, { 22, 23 }, { 23, 24 },
+	{ 24, 25 }, { 25, 26 }, { 26, 27 }, { 27, 28 },
+	{ 28, 29 }, { 29, 30 }, { 30, 31 }, { 31, 32 },
+	{ 127, 128 }, { 255, 256 }, { 0, 0 }
+};
+
+static const struct clk_div_table bm1880_div_table_3[] = {
+	{ 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 4 },
+	{ 4, 5 }, { 5, 6 }, { 6, 7 }, { 7, 8 },
+	{ 8, 9 }, { 9, 10 }, { 10, 11 }, { 11, 12 },
+	{ 12, 13 }, { 13, 14 }, { 14, 15 }, { 15, 16 },
+	{ 16, 17 }, { 17, 18 }, { 18, 19 }, { 19, 20 },
+	{ 20, 21 }, { 21, 22 }, { 22, 23 }, { 23, 24 },
+	{ 24, 25 }, { 25, 26 }, { 26, 27 }, { 27, 28 },
+	{ 28, 29 }, { 29, 30 }, { 30, 31 }, { 31, 32 },
+	{ 127, 128 }, { 255, 256 }, { 511, 512 }, { 0, 0 }
+};
+
+static const struct clk_div_table bm1880_div_table_4[] = {
+	{ 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 4 },
+	{ 4, 5 }, { 5, 6 }, { 6, 7 }, { 7, 8 },
+	{ 8, 9 }, { 9, 10 }, { 10, 11 }, { 11, 12 },
+	{ 12, 13 }, { 13, 14 }, { 14, 15 }, { 15, 16 },
+	{ 16, 17 }, { 17, 18 }, { 18, 19 }, { 19, 20 },
+	{ 20, 21 }, { 21, 22 }, { 22, 23 }, { 23, 24 },
+	{ 24, 25 }, { 25, 26 }, { 26, 27 }, { 27, 28 },
+	{ 28, 29 }, { 29, 30 }, { 30, 31 }, { 31, 32 },
+	{ 127, 128 }, { 255, 256 }, { 511, 512 }, { 65535, 65536 },
+	{ 0, 0 }
+};
+
+/*
+ * Clocks marked as CRITICAL are needed for the proper functioning
+ * of the SoC.
+ */
+static struct bm1880_div_hw_clock bm1880_div_clks[] = {
+	CLK_DIV(BM1880_CLK_DIV_0_RV, "clk_div_0_rv", &bm1880_pll_clks[1].hw,
+		BM1880_CLK_DIV12, 16, 5, 1, bm1880_div_table_0, 0),
+	CLK_DIV(BM1880_CLK_DIV_1_RV, "clk_div_1_rv", &bm1880_pll_clks[2].hw,
+		BM1880_CLK_DIV13, 16, 5, 1, bm1880_div_table_0, 0),
+	CLK_DIV(BM1880_CLK_DIV_UART_500M, "clk_div_uart_500m", &bm1880_pll_clks[2].hw,
+		BM1880_CLK_DIV15, 16, 7, 3, bm1880_div_table_1, 0),
+	CLK_DIV(BM1880_CLK_DIV_0_AXI1, "clk_div_0_axi1", &bm1880_pll_clks[0].hw,
+		BM1880_CLK_DIV21, 16, 5, 2, bm1880_div_table_0,
+		0),
+	CLK_DIV(BM1880_CLK_DIV_1_AXI1, "clk_div_1_axi1", &bm1880_pll_clks[2].hw,
+		BM1880_CLK_DIV22, 16, 5, 3, bm1880_div_table_0,
+		0),
+	CLK_DIV(BM1880_CLK_DIV_0_AXI6, "clk_div_0_axi6", &bm1880_pll_clks[2].hw,
+		BM1880_CLK_DIV27, 16, 5, 15, bm1880_div_table_0,
+		0),
+	CLK_DIV(BM1880_CLK_DIV_1_AXI6, "clk_div_1_axi6", &bm1880_pll_clks[0].hw,
+		BM1880_CLK_DIV28, 16, 5, 11, bm1880_div_table_0,
+		0),
+	CLK_DIV(BM1880_CLK_DIV_12M_USB, "clk_div_12m_usb", &bm1880_pll_clks[2].hw,
+		BM1880_CLK_DIV18, 16, 7, 125, bm1880_div_table_1, 0),
+};
+
+/*
+ * Clocks marked as CRITICAL are all needed for the proper functioning
+ * of the SoC.
+ */
+static struct bm1880_composite_clock bm1880_composite_clks[] = {
+	/*
+	 * Since clk_a53 and clk_50m_a53 clocks are sourcing the CPU core,
+	 * let's mark them as critical to avoid gating.
+	 */
+	GATE_MUX(BM1880_CLK_A53, "clk_a53", clk_a53_parents,
+		 BM1880_CLK_ENABLE0, 0, BM1880_CLK_SELECT, 0,
+		 CLK_IS_CRITICAL),
+	GATE_DIV(BM1880_CLK_50M_A53, "clk_50m_a53", "clk_fpll",
+		 BM1880_CLK_ENABLE0, 1, BM1880_CLK_DIV0, 16, 5, 30,
+		 bm1880_div_table_0, CLK_IS_CRITICAL),
+	GATE_DIV(BM1880_CLK_EFUSE, "clk_efuse", "clk_fpll",
+		 BM1880_CLK_ENABLE0, 5, BM1880_CLK_DIV1, 16, 7, 60,
+		 bm1880_div_table_1, 0),
+	GATE_DIV(BM1880_CLK_EMMC, "clk_emmc", "clk_fpll",
+		 BM1880_CLK_ENABLE0, 8, BM1880_CLK_DIV2, 16, 5, 15,
+		 bm1880_div_table_0, 0),
+	GATE_DIV(BM1880_CLK_100K_EMMC, "clk_100k_emmc", "clk_div_12m_usb",
+		 BM1880_CLK_ENABLE0, 9, BM1880_CLK_DIV3, 16, 8, 120,
+		 bm1880_div_table_2, 0),
+	GATE_DIV(BM1880_CLK_SD, "clk_sd", "clk_fpll",
+		 BM1880_CLK_ENABLE0, 11, BM1880_CLK_DIV4, 16, 5, 15,
+		 bm1880_div_table_0, 0),
+	GATE_DIV(BM1880_CLK_100K_SD, "clk_100k_sd", "clk_div_12m_usb",
+		 BM1880_CLK_ENABLE0, 12, BM1880_CLK_DIV5, 16, 8, 120,
+		 bm1880_div_table_2, 0),
+	GATE_DIV(BM1880_CLK_500M_ETH0, "clk_500m_eth0", "clk_fpll",
+		 BM1880_CLK_ENABLE0, 13, BM1880_CLK_DIV6, 16, 5, 3,
+		 bm1880_div_table_0, 0),
+	GATE_DIV(BM1880_CLK_500M_ETH1, "clk_500m_eth1", "clk_fpll",
+		 BM1880_CLK_ENABLE0, 15, BM1880_CLK_DIV7, 16, 5, 3,
+		 bm1880_div_table_0, 0),
+	/* Don't gate GPIO clocks as it is not owned by the GPIO driver */
+	GATE_DIV(BM1880_CLK_GPIO_DB, "clk_gpio_db", "clk_div_12m_usb",
+		 BM1880_CLK_ENABLE0, 20, BM1880_CLK_DIV8, 16, 16, 120,
+		 bm1880_div_table_4, CLK_IGNORE_UNUSED),
+	GATE_DIV(BM1880_CLK_SDMA_AUD, "clk_sdma_aud", "clk_fpll",
+		 BM1880_CLK_ENABLE0, 24, BM1880_CLK_DIV9, 16, 7, 61,
+		 bm1880_div_table_1, 0),
+	GATE_DIV(BM1880_CLK_JPEG_AXI, "clk_jpeg_axi", "clk_fpll",
+		 BM1880_CLK_ENABLE0, 28, BM1880_CLK_DIV10, 16, 5, 4,
+		 bm1880_div_table_0, 0),
+	GATE_DIV(BM1880_CLK_NF, "clk_nf", "clk_fpll",
+		 BM1880_CLK_ENABLE0, 31, BM1880_CLK_DIV11, 16, 5, 30,
+		 bm1880_div_table_0, 0),
+	GATE_DIV(BM1880_CLK_TPU_AXI, "clk_tpu_axi", "clk_spll",
+		 BM1880_CLK_ENABLE1, 3, BM1880_CLK_DIV14, 16, 5, 1,
+		 bm1880_div_table_0, 0),
+	GATE_DIV(BM1880_CLK_125M_USB, "clk_125m_usb", "clk_fpll",
+		 BM1880_CLK_ENABLE1, 9, BM1880_CLK_DIV16, 16, 5, 12,
+		 bm1880_div_table_0, 0),
+	GATE_DIV(BM1880_CLK_33K_USB, "clk_33k_usb", "clk_div_12m_usb",
+		 BM1880_CLK_ENABLE1, 10, BM1880_CLK_DIV17, 16, 9, 363,
+		 bm1880_div_table_3, 0),
+	GATE_DIV(BM1880_CLK_VIDEO_AXI, "clk_video_axi", "clk_fpll",
+		 BM1880_CLK_ENABLE1, 13, BM1880_CLK_DIV19, 16, 5, 4,
+		 bm1880_div_table_0, 0),
+	GATE_DIV(BM1880_CLK_VPP_AXI, "clk_vpp_axi", "clk_fpll",
+		 BM1880_CLK_ENABLE1, 14, BM1880_CLK_DIV20, 16, 5, 4,
+		 bm1880_div_table_0, 0),
+	GATE_MUX(BM1880_CLK_AXI1, "clk_axi1", clk_axi1_parents,
+		 BM1880_CLK_ENABLE1, 15, BM1880_CLK_SELECT, 2, 0),
+	GATE_DIV(BM1880_CLK_AXI2, "clk_axi2", "clk_fpll",
+		 BM1880_CLK_ENABLE1, 17, BM1880_CLK_DIV23, 16, 5, 3,
+		 bm1880_div_table_0, 0),
+	GATE_DIV(BM1880_CLK_AXI3, "clk_axi3", "clk_mux_rv",
+		 BM1880_CLK_ENABLE1, 18, BM1880_CLK_DIV24, 16, 5, 2,
+		 bm1880_div_table_0, 0),
+	GATE_DIV(BM1880_CLK_AXI4, "clk_axi4", "clk_fpll",
+		 BM1880_CLK_ENABLE1, 19, BM1880_CLK_DIV25, 16, 5, 6,
+		 bm1880_div_table_0, 0),
+	GATE_DIV(BM1880_CLK_AXI5, "clk_axi5", "clk_fpll",
+		 BM1880_CLK_ENABLE1, 20, BM1880_CLK_DIV26, 16, 5, 15,
+		 bm1880_div_table_0, 0),
+};
+
+static unsigned long bm1880_pll_rate_calc(u32 regval, unsigned long parent_rate)
+{
+	u64 numerator;
+	u32 fbdiv, fref, refdiv;
+	u32 postdiv1, postdiv2, denominator;
+
+	fbdiv = (regval >> 16) & 0xfff;
+	fref = parent_rate;
+	refdiv = regval & 0x1f;
+	postdiv1 = (regval >> 8) & 0x7;
+	postdiv2 = (regval >> 12) & 0x7;
+
+	numerator = parent_rate * fbdiv;
+	denominator = refdiv * postdiv1 * postdiv2;
+	do_div(numerator, denominator);
+
+	return (unsigned long)numerator;
+}
+
+static unsigned long bm1880_pll_recalc_rate(struct clk_hw *hw,
+					    unsigned long parent_rate)
+{
+	struct bm1880_pll_hw_clock *pll_hw = to_bm1880_pll_clk(hw);
+	unsigned long rate;
+	u32 regval;
+
+	regval = readl(pll_hw->base + pll_hw->pll.reg);
+	rate = bm1880_pll_rate_calc(regval, parent_rate);
+
+	return rate;
+}
+
+static const struct clk_ops bm1880_pll_ops = {
+	.recalc_rate	= bm1880_pll_recalc_rate,
+};
+
+static struct clk_hw *bm1880_clk_register_pll(struct bm1880_pll_hw_clock *pll_clk,
+					      void __iomem *sys_base)
+{
+	struct clk_hw *hw;
+	int err;
+
+	pll_clk->base = sys_base;
+	hw = &pll_clk->hw;
+
+	err = clk_hw_register(NULL, hw);
+	if (err)
+		return ERR_PTR(err);
+
+	return hw;
+}
+
+static void bm1880_clk_unregister_pll(struct clk_hw *hw)
+{
+	struct bm1880_pll_hw_clock *pll_hw = to_bm1880_pll_clk(hw);
+
+	clk_hw_unregister(hw);
+	kfree(pll_hw);
+}
+
+static int bm1880_clk_register_plls(struct bm1880_pll_hw_clock *clks,
+				    int num_clks,
+				    struct bm1880_clock_data *data)
+{
+	struct clk_hw *hw;
+	void __iomem *pll_base = data->pll_base;
+	int i;
+
+	for (i = 0; i < num_clks; i++) {
+		struct bm1880_pll_hw_clock *bm1880_clk = &clks[i];
+
+		hw = bm1880_clk_register_pll(bm1880_clk, pll_base);
+		if (IS_ERR(hw)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, bm1880_clk->pll.name);
+			goto err_clk;
+		}
+
+		data->hw_data.hws[clks[i].pll.id] = hw;
+	}
+
+	return 0;
+
+err_clk:
+	while (i--)
+		bm1880_clk_unregister_pll(data->hw_data.hws[clks[i].pll.id]);
+
+	return PTR_ERR(hw);
+}
+
+static int bm1880_clk_register_mux(const struct bm1880_mux_clock *clks,
+				   int num_clks,
+				   struct bm1880_clock_data *data)
+{
+	struct clk_hw *hw;
+	void __iomem *sys_base = data->sys_base;
+	int i;
+
+	for (i = 0; i < num_clks; i++) {
+		hw = clk_hw_register_mux(NULL, clks[i].name,
+					 clks[i].parents,
+					 clks[i].num_parents,
+					 clks[i].flags,
+					 sys_base + clks[i].reg,
+					 clks[i].shift, 1, 0,
+					 &bm1880_clk_lock);
+		if (IS_ERR(hw)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			goto err_clk;
+		}
+
+		data->hw_data.hws[clks[i].id] = hw;
+	}
+
+	return 0;
+
+err_clk:
+	while (i--)
+		clk_hw_unregister_mux(data->hw_data.hws[clks[i].id]);
+
+	return PTR_ERR(hw);
+}
+
+static unsigned long bm1880_clk_div_recalc_rate(struct clk_hw *hw,
+						unsigned long parent_rate)
+{
+	struct bm1880_div_hw_clock *div_hw = to_bm1880_div_clk(hw);
+	struct bm1880_div_clock *div = &div_hw->div;
+	void __iomem *reg_addr = div_hw->base + div->reg;
+	unsigned int val;
+	unsigned long rate;
+
+	if (!(readl(reg_addr) & BIT(3))) {
+		val = div->initval;
+	} else {
+		val = readl(reg_addr) >> div->shift;
+		val &= clk_div_mask(div->width);
+	}
+
+	rate = divider_recalc_rate(hw, parent_rate, val, div->table,
+				   div->flags, div->width);
+
+	return rate;
+}
+
+static long bm1880_clk_div_round_rate(struct clk_hw *hw, unsigned long rate,
+				      unsigned long *prate)
+{
+	struct bm1880_div_hw_clock *div_hw = to_bm1880_div_clk(hw);
+	struct bm1880_div_clock *div = &div_hw->div;
+	void __iomem *reg_addr = div_hw->base + div->reg;
+
+	if (div->flags & CLK_DIVIDER_READ_ONLY) {
+		u32 val;
+
+		val = readl(reg_addr) >> div->shift;
+		val &= clk_div_mask(div->width);
+
+		return divider_ro_round_rate(hw, rate, prate, div->table,
+					     div->width, div->flags,
+					     val);
+	}
+
+	return divider_round_rate(hw, rate, prate, div->table,
+				  div->width, div->flags);
+}
+
+static int bm1880_clk_div_set_rate(struct clk_hw *hw, unsigned long rate,
+				   unsigned long parent_rate)
+{
+	struct bm1880_div_hw_clock *div_hw = to_bm1880_div_clk(hw);
+	struct bm1880_div_clock *div = &div_hw->div;
+	void __iomem *reg_addr = div_hw->base + div->reg;
+	unsigned long flags = 0;
+	int value;
+	u32 val;
+
+	value = divider_get_val(rate, parent_rate, div->table,
+				div->width, div_hw->div.flags);
+	if (value < 0)
+		return value;
+
+	if (div_hw->lock)
+		spin_lock_irqsave(div_hw->lock, flags);
+	else
+		__acquire(div_hw->lock);
+
+	val = readl(reg_addr);
+	val &= ~(clk_div_mask(div->width) << div_hw->div.shift);
+	val |= (u32)value << div->shift;
+	writel(val, reg_addr);
+
+	if (div_hw->lock)
+		spin_unlock_irqrestore(div_hw->lock, flags);
+	else
+		__release(div_hw->lock);
+
+	return 0;
+}
+
+static const struct clk_ops bm1880_clk_div_ops = {
+	.recalc_rate = bm1880_clk_div_recalc_rate,
+	.round_rate = bm1880_clk_div_round_rate,
+	.set_rate = bm1880_clk_div_set_rate,
+};
+
+static struct clk_hw *bm1880_clk_register_div(struct bm1880_div_hw_clock *div_clk,
+					      void __iomem *sys_base)
+{
+	struct clk_hw *hw;
+	int err;
+
+	div_clk->div.flags = CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO;
+	div_clk->base = sys_base;
+	div_clk->lock = &bm1880_clk_lock;
+
+	hw = &div_clk->hw;
+	err = clk_hw_register(NULL, hw);
+	if (err)
+		return ERR_PTR(err);
+
+	return hw;
+}
+
+static void bm1880_clk_unregister_div(struct clk_hw *hw)
+{
+	struct bm1880_div_hw_clock *div_hw = to_bm1880_div_clk(hw);
+
+	clk_hw_unregister(hw);
+	kfree(div_hw);
+}
+
+static int bm1880_clk_register_divs(struct bm1880_div_hw_clock *clks,
+				    int num_clks,
+				    struct bm1880_clock_data *data)
+{
+	struct clk_hw *hw;
+	void __iomem *sys_base = data->sys_base;
+	unsigned int i, id;
+
+	for (i = 0; i < num_clks; i++) {
+		struct bm1880_div_hw_clock *bm1880_clk = &clks[i];
+
+		hw = bm1880_clk_register_div(bm1880_clk, sys_base);
+		if (IS_ERR(hw)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, bm1880_clk->div.name);
+			goto err_clk;
+		}
+
+		id = clks[i].div.id;
+		data->hw_data.hws[id] = hw;
+	}
+
+	return 0;
+
+err_clk:
+	while (i--)
+		bm1880_clk_unregister_div(data->hw_data.hws[clks[i].div.id]);
+
+	return PTR_ERR(hw);
+}
+
+static int bm1880_clk_register_gate(const struct bm1880_gate_clock *clks,
+				    int num_clks,
+				    struct bm1880_clock_data *data)
+{
+	struct clk_hw *hw;
+	void __iomem *sys_base = data->sys_base;
+	int i;
+
+	for (i = 0; i < num_clks; i++) {
+		hw = clk_hw_register_gate(NULL, clks[i].name,
+					  clks[i].parent,
+					  clks[i].flags,
+					  sys_base + clks[i].gate_reg,
+					  clks[i].gate_shift, 0,
+					  &bm1880_clk_lock);
+		if (IS_ERR(hw)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			goto err_clk;
+		}
+
+		data->hw_data.hws[clks[i].id] = hw;
+	}
+
+	return 0;
+
+err_clk:
+	while (i--)
+		clk_hw_unregister_gate(data->hw_data.hws[clks[i].id]);
+
+	return PTR_ERR(hw);
+}
+
+static struct clk_hw *bm1880_clk_register_composite(struct bm1880_composite_clock *clks,
+						    void __iomem *sys_base)
+{
+	struct clk_hw *hw;
+	struct clk_mux *mux = NULL;
+	struct clk_gate *gate = NULL;
+	struct bm1880_div_hw_clock *div_hws = NULL;
+	struct clk_hw *mux_hw = NULL, *gate_hw = NULL, *div_hw = NULL;
+	const struct clk_ops *mux_ops = NULL, *gate_ops = NULL, *div_ops = NULL;
+	const char * const *parent_names;
+	const char *parent;
+	int num_parents;
+	int ret;
+
+	if (clks->mux_shift >= 0) {
+		mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+		if (!mux)
+			return ERR_PTR(-ENOMEM);
+
+		mux->reg = sys_base + clks->mux_reg;
+		mux->mask = 1;
+		mux->shift = clks->mux_shift;
+		mux_hw = &mux->hw;
+		mux_ops = &clk_mux_ops;
+		mux->lock = &bm1880_clk_lock;
+
+		parent_names = clks->parents;
+		num_parents = clks->num_parents;
+	} else {
+		parent = clks->parent;
+		parent_names = &parent;
+		num_parents = 1;
+	}
+
+	if (clks->gate_shift >= 0) {
+		gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+		if (!gate) {
+			ret = -ENOMEM;
+			goto err_out;
+		}
+
+		gate->reg = sys_base + clks->gate_reg;
+		gate->bit_idx = clks->gate_shift;
+		gate->lock = &bm1880_clk_lock;
+
+		gate_hw = &gate->hw;
+		gate_ops = &clk_gate_ops;
+	}
+
+	if (clks->div_shift >= 0) {
+		div_hws = kzalloc(sizeof(*div_hws), GFP_KERNEL);
+		if (!div_hws) {
+			ret = -ENOMEM;
+			goto err_out;
+		}
+
+		div_hws->base = sys_base;
+		div_hws->div.reg = clks->div_reg;
+		div_hws->div.shift = clks->div_shift;
+		div_hws->div.width = clks->div_width;
+		div_hws->div.table = clks->table;
+		div_hws->div.initval = clks->div_initval;
+		div_hws->lock = &bm1880_clk_lock;
+		div_hws->div.flags = CLK_DIVIDER_ONE_BASED |
+				     CLK_DIVIDER_ALLOW_ZERO;
+
+		div_hw = &div_hws->hw;
+		div_ops = &bm1880_clk_div_ops;
+	}
+
+	hw = clk_hw_register_composite(NULL, clks->name, parent_names,
+				       num_parents, mux_hw, mux_ops, div_hw,
+				       div_ops, gate_hw, gate_ops,
+				       clks->flags);
+
+	if (IS_ERR(hw)) {
+		ret = PTR_ERR(hw);
+		goto err_out;
+	}
+
+	return hw;
+
+err_out:
+	kfree(div_hws);
+	kfree(gate);
+	kfree(mux);
+
+	return ERR_PTR(ret);
+}
+
+static int bm1880_clk_register_composites(struct bm1880_composite_clock *clks,
+					  int num_clks,
+					  struct bm1880_clock_data *data)
+{
+	struct clk_hw *hw;
+	void __iomem *sys_base = data->sys_base;
+	int i;
+
+	for (i = 0; i < num_clks; i++) {
+		struct bm1880_composite_clock *bm1880_clk = &clks[i];
+
+		hw = bm1880_clk_register_composite(bm1880_clk, sys_base);
+		if (IS_ERR(hw)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, bm1880_clk->name);
+			goto err_clk;
+		}
+
+		data->hw_data.hws[clks[i].id] = hw;
+	}
+
+	return 0;
+
+err_clk:
+	while (i--)
+		clk_hw_unregister_composite(data->hw_data.hws[clks[i].id]);
+
+	return PTR_ERR(hw);
+}
+
+static int bm1880_clk_probe(struct platform_device *pdev)
+{
+	struct bm1880_clock_data *clk_data;
+	void __iomem *pll_base, *sys_base;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	int num_clks, i;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pll_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pll_base))
+		return PTR_ERR(pll_base);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	sys_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(sys_base))
+		return PTR_ERR(sys_base);
+
+	num_clks = ARRAY_SIZE(bm1880_pll_clks) +
+		   ARRAY_SIZE(bm1880_div_clks) +
+		   ARRAY_SIZE(bm1880_mux_clks) +
+		   ARRAY_SIZE(bm1880_composite_clks) +
+		   ARRAY_SIZE(bm1880_gate_clks);
+
+	clk_data = devm_kzalloc(dev, struct_size(clk_data, hw_data.hws,
+						 num_clks), GFP_KERNEL);
+	if (!clk_data)
+		return -ENOMEM;
+
+	clk_data->pll_base = pll_base;
+	clk_data->sys_base = sys_base;
+
+	for (i = 0; i < num_clks; i++)
+		clk_data->hw_data.hws[i] = ERR_PTR(-ENOENT);
+
+	clk_data->hw_data.num = num_clks;
+
+	bm1880_clk_register_plls(bm1880_pll_clks,
+				 ARRAY_SIZE(bm1880_pll_clks),
+				 clk_data);
+
+	bm1880_clk_register_divs(bm1880_div_clks,
+				 ARRAY_SIZE(bm1880_div_clks),
+				 clk_data);
+
+	bm1880_clk_register_mux(bm1880_mux_clks,
+				ARRAY_SIZE(bm1880_mux_clks),
+				clk_data);
+
+	bm1880_clk_register_composites(bm1880_composite_clks,
+				       ARRAY_SIZE(bm1880_composite_clks),
+				       clk_data);
+
+	bm1880_clk_register_gate(bm1880_gate_clks,
+				 ARRAY_SIZE(bm1880_gate_clks),
+				 clk_data);
+
+	return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
+				      &clk_data->hw_data);
+}
+
+static const struct of_device_id bm1880_of_match[] = {
+	{ .compatible = "bitmain,bm1880-clk", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, bm1880_of_match);
+
+static struct platform_driver bm1880_clk_driver = {
+	.driver = {
+		.name = "bm1880-clk",
+		.of_match_table = bm1880_of_match,
+	},
+	.probe = bm1880_clk_probe,
+};
+module_platform_driver(bm1880_clk_driver);
+
+MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>");
+MODULE_DESCRIPTION("Clock driver for Bitmain BM1880 SoC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c
index 4f13a681ddfc..3e9c3e608769 100644
--- a/drivers/clk/clk-composite.c
+++ b/drivers/clk/clk-composite.c
@@ -207,7 +207,7 @@ struct clk_hw *clk_hw_register_composite(struct device *dev, const char *name,
 			unsigned long flags)
 {
 	struct clk_hw *hw;
-	struct clk_init_data init;
+	struct clk_init_data init = {};
 	struct clk_composite *composite;
 	struct clk_ops *clk_composite_ops;
 	int ret;
@@ -343,3 +343,14 @@ void clk_unregister_composite(struct clk *clk)
 	clk_unregister(clk);
 	kfree(composite);
 }
+
+void clk_hw_unregister_composite(struct clk_hw *hw)
+{
+	struct clk_composite *composite;
+
+	composite = to_clk_composite(hw);
+
+	clk_hw_unregister(hw);
+	kfree(composite);
+}
+EXPORT_SYMBOL_GPL(clk_hw_unregister_composite);
diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 3f9ff78c4a2a..098b2b01f0af 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -471,7 +471,7 @@ static struct clk_hw *_register_divider(struct device *dev, const char *name,
 {
 	struct clk_divider *div;
 	struct clk_hw *hw;
-	struct clk_init_data init;
+	struct clk_init_data init = {};
 	int ret;
 
 	if (clk_divider_flags & CLK_DIVIDER_HIWORD_MASK) {
diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index a7e4aef7a376..2c4486c09040 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c
@@ -58,7 +58,7 @@ struct clk_hw *clk_hw_register_fixed_rate_with_accuracy(struct device *dev,
 {
 	struct clk_fixed_rate *fixed;
 	struct clk_hw *hw;
-	struct clk_init_data init;
+	struct clk_init_data init = {};
 	int ret;
 
 	/* allocate fixed-rate clock */
diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c
index 1b99fc962745..670053c58c1a 100644
--- a/drivers/clk/clk-gate.c
+++ b/drivers/clk/clk-gate.c
@@ -141,7 +141,7 @@ struct clk_hw *clk_hw_register_gate(struct device *dev, const char *name,
 {
 	struct clk_gate *gate;
 	struct clk_hw *hw;
-	struct clk_init_data init;
+	struct clk_init_data init = {};
 	int ret;
 
 	if (clk_gate_flags & CLK_GATE_HIWORD_MASK) {
diff --git a/drivers/clk/clk-gpio.c b/drivers/clk/clk-gpio.c
index 9d930edd6516..13304cf5f2a8 100644
--- a/drivers/clk/clk-gpio.c
+++ b/drivers/clk/clk-gpio.c
@@ -280,7 +280,7 @@ static int gpio_clk_driver_probe(struct platform_device *pdev)
 	else
 		clk = clk_register_gpio_gate(&pdev->dev, node->name,
 				parent_names ?  parent_names[0] : NULL, gpiod,
-				0);
+				CLK_SET_RATE_PARENT);
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c
index 66e91f740508..570b6e5b603b 100644
--- a/drivers/clk/clk-mux.c
+++ b/drivers/clk/clk-mux.c
@@ -153,7 +153,7 @@ struct clk_hw *clk_hw_register_mux_table(struct device *dev, const char *name,
 {
 	struct clk_mux *mux;
 	struct clk_hw *hw;
-	struct clk_init_data init;
+	struct clk_init_data init = {};
 	u8 width = 0;
 	int ret;
 
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 1c677d7f7f53..772258de2d1f 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1187,7 +1187,7 @@ static void clk_core_disable_unprepare(struct clk_core *core)
 	clk_core_unprepare_lock(core);
 }
 
-static void clk_unprepare_unused_subtree(struct clk_core *core)
+static void __init clk_unprepare_unused_subtree(struct clk_core *core)
 {
 	struct clk_core *child;
 
@@ -1217,7 +1217,7 @@ static void clk_unprepare_unused_subtree(struct clk_core *core)
 	clk_pm_runtime_put(core);
 }
 
-static void clk_disable_unused_subtree(struct clk_core *core)
+static void __init clk_disable_unused_subtree(struct clk_core *core)
 {
 	struct clk_core *child;
 	unsigned long flags;
@@ -1263,7 +1263,7 @@ unprepare_out:
 		clk_core_disable_unprepare(core->parent);
 }
 
-static bool clk_ignore_unused;
+static bool clk_ignore_unused __initdata;
 static int __init clk_ignore_unused_setup(char *__unused)
 {
 	clk_ignore_unused = true;
@@ -1271,7 +1271,7 @@ static int __init clk_ignore_unused_setup(char *__unused)
 }
 __setup("clk_ignore_unused", clk_ignore_unused_setup);
 
-static int clk_disable_unused(void)
+static int __init clk_disable_unused(void)
 {
 	struct clk_core *core;
 
@@ -1674,6 +1674,24 @@ static int clk_fetch_parent_index(struct clk_core *core,
 	return i;
 }
 
+/**
+ * clk_hw_get_parent_index - return the index of the parent clock
+ * @hw: clk_hw associated with the clk being consumed
+ *
+ * Fetches and returns the index of parent clock. Returns -EINVAL if the given
+ * clock does not have a current parent.
+ */
+int clk_hw_get_parent_index(struct clk_hw *hw)
+{
+	struct clk_hw *parent = clk_hw_get_parent(hw);
+
+	if (WARN_ON(parent == NULL))
+		return -EINVAL;
+
+	return clk_fetch_parent_index(hw->core, parent->core);
+}
+EXPORT_SYMBOL_GPL(clk_hw_get_parent_index);
+
 /*
  * Update the orphan status of @core and all its children.
  */
@@ -3231,6 +3249,34 @@ static inline void clk_debug_unregister(struct clk_core *core)
 }
 #endif
 
+static void clk_core_reparent_orphans_nolock(void)
+{
+	struct clk_core *orphan;
+	struct hlist_node *tmp2;
+
+	/*
+	 * walk the list of orphan clocks and reparent any that newly finds a
+	 * parent.
+	 */
+	hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
+		struct clk_core *parent = __clk_init_parent(orphan);
+
+		/*
+		 * We need to use __clk_set_parent_before() and _after() to
+		 * to properly migrate any prepare/enable count of the orphan
+		 * clock. This is important for CLK_IS_CRITICAL clocks, which
+		 * are enabled during init but might not have a parent yet.
+		 */
+		if (parent) {
+			/* update the clk tree topology */
+			__clk_set_parent_before(orphan, parent);
+			__clk_set_parent_after(orphan, parent, NULL);
+			__clk_recalc_accuracies(orphan);
+			__clk_recalc_rates(orphan, 0);
+		}
+	}
+}
+
 /**
  * __clk_core_init - initialize the data structures in a struct clk_core
  * @core:	clk_core being initialized
@@ -3241,8 +3287,6 @@ static inline void clk_debug_unregister(struct clk_core *core)
 static int __clk_core_init(struct clk_core *core)
 {
 	int ret;
-	struct clk_core *orphan;
-	struct hlist_node *tmp2;
 	unsigned long rate;
 
 	if (!core)
@@ -3382,34 +3426,21 @@ static int __clk_core_init(struct clk_core *core)
 	if (core->flags & CLK_IS_CRITICAL) {
 		unsigned long flags;
 
-		clk_core_prepare(core);
+		ret = clk_core_prepare(core);
+		if (ret)
+			goto out;
 
 		flags = clk_enable_lock();
-		clk_core_enable(core);
+		ret = clk_core_enable(core);
 		clk_enable_unlock(flags);
+		if (ret) {
+			clk_core_unprepare(core);
+			goto out;
+		}
 	}
 
-	/*
-	 * walk the list of orphan clocks and reparent any that newly finds a
-	 * parent.
-	 */
-	hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
-		struct clk_core *parent = __clk_init_parent(orphan);
+	clk_core_reparent_orphans_nolock();
 
-		/*
-		 * We need to use __clk_set_parent_before() and _after() to
-		 * to properly migrate any prepare/enable count of the orphan
-		 * clock. This is important for CLK_IS_CRITICAL clocks, which
-		 * are enabled during init but might not have a parent yet.
-		 */
-		if (parent) {
-			/* update the clk tree topology */
-			__clk_set_parent_before(orphan, parent);
-			__clk_set_parent_after(orphan, parent, NULL);
-			__clk_recalc_accuracies(orphan);
-			__clk_recalc_rates(orphan, 0);
-		}
-	}
 
 	kref_init(&core->ref);
 out:
@@ -3879,6 +3910,7 @@ void clk_unregister(struct clk *clk)
 					__func__, clk->core->name);
 
 	kref_put(&clk->core->ref, __clk_release);
+	free_clk(clk);
 unlock:
 	clk_prepare_unlock();
 }
@@ -4160,6 +4192,13 @@ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb)
 EXPORT_SYMBOL_GPL(clk_notifier_unregister);
 
 #ifdef CONFIG_OF
+static void clk_core_reparent_orphans(void)
+{
+	clk_prepare_lock();
+	clk_core_reparent_orphans_nolock();
+	clk_prepare_unlock();
+}
+
 /**
  * struct of_clk_provider - Clock provider registration structure
  * @link: Entry in global list of clock providers
@@ -4255,6 +4294,8 @@ int of_clk_add_provider(struct device_node *np,
 	mutex_unlock(&of_clk_mutex);
 	pr_debug("Added clock from %pOF\n", np);
 
+	clk_core_reparent_orphans();
+
 	ret = of_clk_set_defaults(np, true);
 	if (ret < 0)
 		of_clk_del_provider(np);
@@ -4290,6 +4331,8 @@ int of_clk_add_hw_provider(struct device_node *np,
 	mutex_unlock(&of_clk_mutex);
 	pr_debug("Added clk_hw provider from %pOF\n", np);
 
+	clk_core_reparent_orphans();
+
 	ret = of_clk_set_defaults(np, true);
 	if (ret < 0)
 		of_clk_del_provider(np);
diff --git a/drivers/clk/davinci/pll.c b/drivers/clk/davinci/pll.c
index 1ac11b6a47a3..8a23d5dfd1f8 100644
--- a/drivers/clk/davinci/pll.c
+++ b/drivers/clk/davinci/pll.c
@@ -910,7 +910,6 @@ static int davinci_pll_probe(struct platform_device *pdev)
 	struct davinci_pll_platform_data *pdata;
 	const struct of_device_id *of_id;
 	davinci_pll_init pll_init = NULL;
-	struct resource *res;
 	void __iomem *base;
 
 	of_id = of_match_device(davinci_pll_of_match, dev);
@@ -930,8 +929,7 @@ static int davinci_pll_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
diff --git a/drivers/clk/davinci/psc.c b/drivers/clk/davinci/psc.c
index 5b69e24a224f..7387e7f6276e 100644
--- a/drivers/clk/davinci/psc.c
+++ b/drivers/clk/davinci/psc.c
@@ -531,7 +531,6 @@ static int davinci_psc_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	const struct of_device_id *of_id;
 	const struct davinci_psc_init_data *init_data = NULL;
-	struct resource *res;
 	void __iomem *base;
 	int ret;
 
@@ -546,8 +545,7 @@ static int davinci_psc_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
diff --git a/drivers/clk/hisilicon/clk-hi3660.c b/drivers/clk/hisilicon/clk-hi3660.c
index 5b3ad26dcc77..41f61726ab19 100644
--- a/drivers/clk/hisilicon/clk-hi3660.c
+++ b/drivers/clk/hisilicon/clk-hi3660.c
@@ -333,49 +333,49 @@ static const struct hisi_mux_clock hi3660_crgctrl_mux_clks[] = {
 
 static const struct hisi_divider_clock hi3660_crgctrl_divider_clks[] = {
 	{ HI3660_CLK_DIV_UART0, "clk_div_uart0", "clk_andgt_uart0",
-	  CLK_SET_RATE_PARENT, 0xb0, 4, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xb0, 4, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_UART1, "clk_div_uart1", "clk_andgt_uart1",
-	  CLK_SET_RATE_PARENT, 0xb0, 8, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xb0, 8, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_UARTH, "clk_div_uarth", "clk_andgt_uarth",
-	  CLK_SET_RATE_PARENT, 0xb0, 12, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xb0, 12, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_MMC, "clk_div_mmc", "clk_andgt_mmc",
-	  CLK_SET_RATE_PARENT, 0xb4, 3, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xb4, 3, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_SD, "clk_div_sd", "clk_andgt_sd",
-	  CLK_SET_RATE_PARENT, 0xb8, 0, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xb8, 0, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_EDC0, "clk_div_edc0", "clk_andgt_edc0",
-	  CLK_SET_RATE_PARENT, 0xbc, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xbc, 0, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_LDI0, "clk_div_ldi0", "clk_andgt_ldi0",
-	  CLK_SET_RATE_PARENT, 0xbc, 10, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xbc, 10, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_SDIO, "clk_div_sdio", "clk_andgt_sdio",
-	  CLK_SET_RATE_PARENT, 0xc0, 0, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xc0, 0, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_LDI1, "clk_div_ldi1", "clk_andgt_ldi1",
-	  CLK_SET_RATE_PARENT, 0xc0, 8, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xc0, 8, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_SPI, "clk_div_spi", "clk_andgt_spi",
-	  CLK_SET_RATE_PARENT, 0xc4, 12, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xc4, 12, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_VENC, "clk_div_venc", "clk_andgt_venc",
-	  CLK_SET_RATE_PARENT, 0xc8, 6, 5, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xc8, 6, 5, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_VDEC, "clk_div_vdec", "clk_andgt_vdec",
-	  CLK_SET_RATE_PARENT, 0xcc, 0, 5, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xcc, 0, 5, CLK_DIVIDER_HIWORD_MASK,  },
 	{ HI3660_CLK_DIV_VIVOBUS, "clk_div_vivobus", "clk_vivobus_andgt",
-	  CLK_SET_RATE_PARENT, 0xd0, 7, 5, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xd0, 7, 5, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_I2C, "clk_div_i2c", "clk_div_320m",
-	  CLK_SET_RATE_PARENT, 0xe8, 4, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xe8, 4, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_UFSPHY, "clk_div_ufsphy_cfg", "clk_gate_ufsphy_gt",
-	  CLK_SET_RATE_PARENT, 0xe8, 9, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xe8, 9, 2, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_CFGBUS, "clk_div_cfgbus", "clk_div_sysbus",
-	  CLK_SET_RATE_PARENT, 0xec, 0, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xec, 0, 2, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_MMC0BUS, "clk_div_mmc0bus", "autodiv_emmc0bus",
-	  CLK_SET_RATE_PARENT, 0xec, 2, 1, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xec, 2, 1, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_MMC1BUS, "clk_div_mmc1bus", "clk_div_sysbus",
-	  CLK_SET_RATE_PARENT, 0xec, 3, 1, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xec, 3, 1, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_UFSPERI, "clk_div_ufsperi", "clk_gate_ufs_subsys",
-	  CLK_SET_RATE_PARENT, 0xec, 14, 1, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xec, 14, 1, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_AOMM, "clk_div_aomm", "clk_aomm_andgt",
-	  CLK_SET_RATE_PARENT, 0x100, 7, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x100, 7, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_ISP_SNCLK, "clk_isp_snclk_div", "clk_isp_snclk_fac",
-	  CLK_SET_RATE_PARENT, 0x108, 0, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x108, 0, 2, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_IOPERI, "clk_div_ioperi", "clk_mux_ioperi",
-	  CLK_SET_RATE_PARENT, 0x108, 11, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x108, 11, 4, CLK_DIVIDER_HIWORD_MASK, },
 };
 
 /* clk_pmuctrl */
@@ -420,13 +420,13 @@ static const struct hisi_gate_clock hi3660_sctrl_gate_clks[] = {
 	{ HI3660_PCLK_MMBUF_ANDGT, "pclk_mmbuf_andgt", "clk_sw_mmbuf",
 	  CLK_SET_RATE_PARENT, 0x258, 7, CLK_GATE_HIWORD_MASK, },
 	{ HI3660_CLK_MMBUF_PLL_ANDGT, "clk_mmbuf_pll_andgt", "clk_ppll0",
-	  CLK_SET_RATE_PARENT, 0x260, 11, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x260, 11, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_FLL_MMBUF_ANDGT, "clk_fll_mmbuf_andgt", "clk_fll_src",
-	  CLK_SET_RATE_PARENT, 0x260, 12, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x260, 12, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_SYS_MMBUF_ANDGT, "clk_sys_mmbuf_andgt", "clkin_sys",
-	  CLK_SET_RATE_PARENT, 0x260, 13, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x260, 13, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_GATE_PCIEPHY_GT, "clk_gate_pciephy_gt", "clk_ppll0",
-	  CLK_SET_RATE_PARENT, 0x268, 11, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x268, 11, CLK_DIVIDER_HIWORD_MASK, },
 };
 
 static const char *const
@@ -446,13 +446,13 @@ static const struct hisi_mux_clock hi3660_sctrl_mux_clks[] = {
 
 static const struct hisi_divider_clock hi3660_sctrl_divider_clks[] = {
 	{ HI3660_CLK_DIV_AOBUS, "clk_div_aobus", "clk_ppll0",
-	  CLK_SET_RATE_PARENT, 0x254, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x254, 0, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_PCLK_DIV_MMBUF, "pclk_div_mmbuf", "pclk_mmbuf_andgt",
-	  CLK_SET_RATE_PARENT, 0x258, 10, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x258, 10, 2, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_ACLK_DIV_MMBUF, "aclk_div_mmbuf", "clk_mmbuf_pll_andgt",
-	  CLK_SET_RATE_PARENT, 0x258, 12, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x258, 12, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3660_CLK_DIV_PCIEPHY, "clk_div_pciephy", "clk_gate_pciephy_gt",
-	  CLK_SET_RATE_PARENT, 0x268, 12, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x268, 12, 4, CLK_DIVIDER_HIWORD_MASK, },
 };
 
 /* clk_iomcu */
diff --git a/drivers/clk/hisilicon/clk-hi3670.c b/drivers/clk/hisilicon/clk-hi3670.c
index fd8c837a6ea3..4d05a71683a5 100644
--- a/drivers/clk/hisilicon/clk-hi3670.c
+++ b/drivers/clk/hisilicon/clk-hi3670.c
@@ -295,61 +295,61 @@ static const struct hisi_gate_clock hi3670_crgctrl_gate_sep_clks[] = {
 
 static const struct hisi_gate_clock hi3670_crgctrl_gate_clks[] = {
 	{ HI3670_AUTODIV_SYSBUS, "autodiv_sysbus", "clk_div_sysbus",
-	  CLK_SET_RATE_PARENT, 0x404, 5, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x404, 5, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_AUTODIV_EMMC0BUS, "autodiv_emmc0bus", "autodiv_sysbus",
-	  CLK_SET_RATE_PARENT, 0x404, 1, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x404, 1, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_PCLK_ANDGT_MMC1_PCIE, "pclk_andgt_mmc1_pcie", "clk_div_320m",
-	  CLK_SET_RATE_PARENT, 0xf8, 13, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xf8, 13, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_GATE_VCODECBUS_GT, "clk_gate_vcodecbus_gt", "clk_mux_vcodecbus",
-	  CLK_SET_RATE_PARENT, 0x0F0, 8, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x0F0, 8, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_SD, "clk_andgt_sd", "clk_mux_sd_pll",
-	  CLK_SET_RATE_PARENT, 0xF4, 3, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF4, 3, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_SD_SYS_GT, "clk_sd_sys_gt", "clkin_sys",
-	  CLK_SET_RATE_PARENT, 0xF4, 5, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF4, 5, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_SDIO, "clk_andgt_sdio", "clk_mux_sdio_pll",
-	  CLK_SET_RATE_PARENT, 0xF4, 8, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF4, 8, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_SDIO_SYS_GT, "clk_sdio_sys_gt", "clkin_sys",
-	  CLK_SET_RATE_PARENT, 0xF4, 6, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF4, 6, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_A53HPM_ANDGT, "clk_a53hpm_andgt", "clk_mux_a53hpm",
-	  CLK_SET_RATE_PARENT, 0x0F4, 7, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x0F4, 7, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_320M_PLL_GT, "clk_320m_pll_gt", "clk_mux_320m",
-	  CLK_SET_RATE_PARENT, 0xF8, 10, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF8, 10, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_UARTH, "clk_andgt_uarth", "clk_div_320m",
-	  CLK_SET_RATE_PARENT, 0xF4, 11, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF4, 11, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_UARTL, "clk_andgt_uartl", "clk_div_320m",
-	  CLK_SET_RATE_PARENT, 0xF4, 10, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF4, 10, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_UART0, "clk_andgt_uart0", "clk_div_320m",
-	  CLK_SET_RATE_PARENT, 0xF4, 9, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF4, 9, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_SPI, "clk_andgt_spi", "clk_div_320m",
-	  CLK_SET_RATE_PARENT, 0xF4, 13, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF4, 13, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_PCIEAXI, "clk_andgt_pcieaxi", "clk_mux_pcieaxi",
-	  CLK_SET_RATE_PARENT, 0xfc, 15, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xfc, 15, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_AO_ASP_GT, "clk_div_ao_asp_gt", "clk_mux_ao_asp",
-	  CLK_SET_RATE_PARENT, 0xF4, 4, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF4, 4, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_GATE_CSI_TRANS, "clk_gate_csi_trans", "clk_ppll2",
-	  CLK_SET_RATE_PARENT, 0xF4, 14, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF4, 14, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_GATE_DSI_TRANS, "clk_gate_dsi_trans", "clk_ppll2",
-	  CLK_SET_RATE_PARENT, 0xF4, 1, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF4, 1, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_PTP, "clk_andgt_ptp", "clk_div_320m",
-	  CLK_SET_RATE_PARENT, 0xF8, 5, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF8, 5, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_OUT0, "clk_andgt_out0", "clk_ppll0",
-	  CLK_SET_RATE_PARENT, 0xF0, 10, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF0, 10, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_OUT1, "clk_andgt_out1", "clk_ppll0",
-	  CLK_SET_RATE_PARENT, 0xF0, 11, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF0, 11, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLKGT_DP_AUDIO_PLL_AO, "clkgt_dp_audio_pll_ao", "clk_ppll6",
-	  CLK_SET_RATE_PARENT, 0xF8, 15, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF8, 15, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_VDEC, "clk_andgt_vdec", "clk_mux_vdec",
-	  CLK_SET_RATE_PARENT, 0xF0, 13, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF0, 13, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_VENC, "clk_andgt_venc", "clk_mux_venc",
-	  CLK_SET_RATE_PARENT, 0xF0, 9, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xF0, 9, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ISP_SNCLK_ANGT, "clk_isp_snclk_angt", "clk_div_a53hpm",
-	  CLK_SET_RATE_PARENT, 0x108, 2, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x108, 2, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_RXDPHY, "clk_andgt_rxdphy", "clk_div_a53hpm",
-	  CLK_SET_RATE_PARENT, 0x0F0, 12, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x0F0, 12, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_ICS, "clk_andgt_ics", "clk_mux_ics",
-	  CLK_SET_RATE_PARENT, 0xf0, 14, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xf0, 14, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_AUTODIV_DMABUS, "autodiv_dmabus", "autodiv_sysbus",
-	  CLK_SET_RATE_PARENT, 0x404, 3, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x404, 3, CLK_GATE_HIWORD_MASK, },
 };
 
 static const char *const
@@ -485,57 +485,57 @@ static const struct hisi_mux_clock hi3670_crgctrl_mux_clks[] = {
 
 static const struct hisi_divider_clock hi3670_crgctrl_divider_clks[] = {
 	{ HI3670_CLK_DIV_CFGBUS, "clk_div_cfgbus", "clk_div_sysbus",
-	  CLK_SET_RATE_PARENT, 0xEC, 0, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xEC, 0, 2, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_MMC0BUS, "clk_div_mmc0bus", "autodiv_emmc0bus",
-	  CLK_SET_RATE_PARENT, 0x0EC, 2, 1, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x0EC, 2, 1, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_MMC1BUS, "clk_div_mmc1bus", "clk_div_sysbus",
-	  CLK_SET_RATE_PARENT, 0x0EC, 3, 1, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x0EC, 3, 1, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_PCLK_DIV_MMC1_PCIE, "pclk_div_mmc1_pcie", "pclk_andgt_mmc1_pcie",
-	  CLK_SET_RATE_PARENT, 0xb4, 6, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xb4, 6, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_VCODECBUS, "clk_div_vcodecbus", "clk_gate_vcodecbus_gt",
-	  CLK_SET_RATE_PARENT, 0x0BC, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x0BC, 0, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_SD, "clk_div_sd", "clk_andgt_sd",
-	  CLK_SET_RATE_PARENT, 0xB8, 0, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xB8, 0, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_SDIO, "clk_div_sdio", "clk_andgt_sdio",
-	  CLK_SET_RATE_PARENT, 0xC0, 0, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xC0, 0, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_UARTH, "clk_div_uarth", "clk_andgt_uarth",
-	  CLK_SET_RATE_PARENT, 0xB0, 12, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xB0, 12, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_UARTL, "clk_div_uartl", "clk_andgt_uartl",
-	  CLK_SET_RATE_PARENT, 0xB0, 8, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xB0, 8, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_UART0, "clk_div_uart0", "clk_andgt_uart0",
-	  CLK_SET_RATE_PARENT, 0xB0, 4, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xB0, 4, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_I2C, "clk_div_i2c", "clk_div_320m",
-	  CLK_SET_RATE_PARENT, 0xE8, 4, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xE8, 4, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_SPI, "clk_div_spi", "clk_andgt_spi",
-	  CLK_SET_RATE_PARENT, 0xC4, 12, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xC4, 12, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_PCIEAXI, "clk_div_pcieaxi", "clk_andgt_pcieaxi",
-	  CLK_SET_RATE_PARENT, 0xb4, 0, 5, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xb4, 0, 5, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_AO_ASP, "clk_div_ao_asp", "clk_div_ao_asp_gt",
-	  CLK_SET_RATE_PARENT, 0x108, 6, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x108, 6, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_CSI_TRANS, "clk_div_csi_trans", "clk_gate_csi_trans",
-	  CLK_SET_RATE_PARENT, 0xD4, 0, 5, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xD4, 0, 5, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_DSI_TRANS, "clk_div_dsi_trans", "clk_gate_dsi_trans",
-	  CLK_SET_RATE_PARENT, 0xD4, 10, 5, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xD4, 10, 5, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_PTP, "clk_div_ptp", "clk_andgt_ptp",
-	  CLK_SET_RATE_PARENT, 0xD8, 0, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xD8, 0, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_CLKOUT0_PLL, "clk_div_clkout0_pll", "clk_andgt_out0",
-	  CLK_SET_RATE_PARENT, 0xe0, 4, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xe0, 4, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_CLKOUT1_PLL, "clk_div_clkout1_pll", "clk_andgt_out1",
-	  CLK_SET_RATE_PARENT, 0xe0, 10, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xe0, 10, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLKDIV_DP_AUDIO_PLL_AO, "clkdiv_dp_audio_pll_ao", "clkgt_dp_audio_pll_ao",
-	  CLK_SET_RATE_PARENT, 0xBC, 11, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xBC, 11, 4, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_VDEC, "clk_div_vdec", "clk_andgt_vdec",
-	  CLK_SET_RATE_PARENT, 0xC4, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xC4, 0, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_VENC, "clk_div_venc", "clk_andgt_venc",
-	  CLK_SET_RATE_PARENT, 0xC0, 8, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xC0, 8, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_ISP_SNCLK_DIV0, "clk_isp_snclk_div0", "clk_isp_snclk_fac",
-	  CLK_SET_RATE_PARENT, 0x108, 0, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x108, 0, 2, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_ISP_SNCLK_DIV1, "clk_isp_snclk_div1", "clk_isp_snclk_fac",
-	  CLK_SET_RATE_PARENT, 0x10C, 14, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x10C, 14, 2, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_ISP_SNCLK_DIV2, "clk_isp_snclk_div2", "clk_isp_snclk_fac",
-	  CLK_SET_RATE_PARENT, 0x10C, 11, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x10C, 11, 2, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_ICS, "clk_div_ics", "clk_andgt_ics",
-	  CLK_SET_RATE_PARENT, 0xE4, 9, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0xE4, 9, 6, CLK_DIVIDER_HIWORD_MASK, },
 };
 
 /* clk_pmuctrl */
@@ -608,12 +608,12 @@ static const struct hisi_gate_clock hi3670_sctrl_gate_sep_clks[] = {
 
 static const struct hisi_gate_clock hi3670_sctrl_gate_clks[] = {
 	{ HI3670_CLK_ANDGT_IOPERI, "clk_andgt_ioperi", "clk_ppll0",
-	  CLK_SET_RATE_PARENT, 0x270, 6, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x270, 6, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLKANDGT_ASP_SUBSYS_PERI, "clkandgt_asp_subsys_peri",
 	  "clk_ppll0",
-	  CLK_SET_RATE_PARENT, 0x268, 3, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x268, 3, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANGT_ASP_SUBSYS, "clk_angt_asp_subsys", "clk_ppll0",
-	  CLK_SET_RATE_PARENT, 0x258, 0, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x258, 0, CLK_GATE_HIWORD_MASK, },
 };
 
 static const char *const
@@ -650,19 +650,19 @@ static const struct hisi_mux_clock hi3670_sctrl_mux_clks[] = {
 
 static const struct hisi_divider_clock hi3670_sctrl_divider_clks[] = {
 	{ HI3670_CLK_DIV_AOBUS, "clk_div_aobus", "clk_ppll0",
-	  CLK_SET_RATE_PARENT, 0x254, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x254, 0, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_UFS_SUBSYS, "clk_div_ufs_subsys", "clk_mux_ufs_subsys",
-	  CLK_SET_RATE_PARENT, 0x274, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x274, 0, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_IOPERI, "clk_div_ioperi", "clk_andgt_ioperi",
-	  CLK_SET_RATE_PARENT, 0x270, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x270, 0, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_CLKOUT0_TCXO, "clk_div_clkout0_tcxo", "clkin_sys",
-	  CLK_SET_RATE_PARENT, 0x254, 6, 3, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x254, 6, 3, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_CLKOUT1_TCXO, "clk_div_clkout1_tcxo", "clkin_sys",
-	  CLK_SET_RATE_PARENT, 0x254, 9, 3, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x254, 9, 3, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_ASP_SUBSYS_PERI_DIV, "clk_asp_subsys_peri_div", "clkandgt_asp_subsys_peri",
-	  CLK_SET_RATE_PARENT, 0x268, 0, 3, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x268, 0, 3, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_ASP_SUBSYS, "clk_div_asp_subsys", "clk_angt_asp_subsys",
-	  CLK_SET_RATE_PARENT, 0x250, 0, 3, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x250, 0, 3, CLK_DIVIDER_HIWORD_MASK, },
 };
 
 /* clk_iomcu */
@@ -732,17 +732,17 @@ static const struct hisi_gate_clock hi3670_media1_gate_sep_clks[] = {
 
 static const struct hisi_gate_clock hi3670_media1_gate_clks[] = {
 	{ HI3670_CLK_GATE_VIVOBUS_ANDGT, "clk_gate_vivobus_andgt", "clk_mux_vivobus",
-	  CLK_SET_RATE_PARENT, 0x84, 3, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x84, 3, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_EDC0, "clk_andgt_edc0", "clk_mux_edc0",
-	  CLK_SET_RATE_PARENT, 0x84, 7, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x84, 7, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_LDI0, "clk_andgt_ldi0", "clk_mux_ldi0",
-	  CLK_SET_RATE_PARENT, 0x84, 9, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x84, 9, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_ANDGT_LDI1, "clk_andgt_ldi1", "clk_mux_ldi1",
-	  CLK_SET_RATE_PARENT, 0x84, 8, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x84, 8, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_CLK_MMBUF_PLL_ANDGT, "clk_mmbuf_pll_andgt", "clk_sw_mmbuf",
-	  CLK_SET_RATE_PARENT, 0x84, 14, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x84, 14, CLK_GATE_HIWORD_MASK, },
 	{ HI3670_PCLK_MMBUF_ANDGT, "pclk_mmbuf_andgt", "aclk_div_mmbuf",
-	  CLK_SET_RATE_PARENT, 0x84, 15, CLK_GATE_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x84, 15, CLK_GATE_HIWORD_MASK, },
 };
 
 static const char *const
@@ -799,17 +799,17 @@ static const struct hisi_mux_clock hi3670_media1_mux_clks[] = {
 
 static const struct hisi_divider_clock hi3670_media1_divider_clks[] = {
 	{ HI3670_CLK_DIV_VIVOBUS, "clk_div_vivobus", "clk_gate_vivobus_andgt",
-	  CLK_SET_RATE_PARENT, 0x74, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x74, 0, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_EDC0, "clk_div_edc0", "clk_andgt_edc0",
-	  CLK_SET_RATE_PARENT, 0x68, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x68, 0, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_LDI0, "clk_div_ldi0", "clk_andgt_ldi0",
-	  CLK_SET_RATE_PARENT, 0x60, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x60, 0, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_CLK_DIV_LDI1, "clk_div_ldi1", "clk_andgt_ldi1",
-	  CLK_SET_RATE_PARENT, 0x64, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x64, 0, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_ACLK_DIV_MMBUF, "aclk_div_mmbuf", "clk_mmbuf_pll_andgt",
-	  CLK_SET_RATE_PARENT, 0x7C, 10, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x7C, 10, 6, CLK_DIVIDER_HIWORD_MASK, },
 	{ HI3670_PCLK_DIV_MMBUF, "pclk_div_mmbuf", "pclk_mmbuf_andgt",
-	  CLK_SET_RATE_PARENT, 0x78, 0, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+	  CLK_SET_RATE_PARENT, 0x78, 0, 2, CLK_DIVIDER_HIWORD_MASK, },
 };
 
 /* clk_media2 */
diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c
index b2c5b6bbb1c1..e7cdf72d4b06 100644
--- a/drivers/clk/hisilicon/clk-hi6220.c
+++ b/drivers/clk/hisilicon/clk-hi6220.c
@@ -86,7 +86,8 @@ static void __init hi6220_clk_ao_init(struct device_node *np)
 	hisi_clk_register_gate_sep(hi6220_separated_gate_clks_ao,
 				ARRAY_SIZE(hi6220_separated_gate_clks_ao), clk_data_ao);
 }
-CLK_OF_DECLARE(hi6220_clk_ao, "hisilicon,hi6220-aoctrl", hi6220_clk_ao_init);
+/* Allow reset driver to probe as well */
+CLK_OF_DECLARE_DRIVER(hi6220_clk_ao, "hisilicon,hi6220-aoctrl", hi6220_clk_ao_init);
 
 
 /* clocks in sysctrl */
diff --git a/drivers/clk/hisilicon/reset.c b/drivers/clk/hisilicon/reset.c
index 2e22fea2a2e7..93cee17db8b1 100644
--- a/drivers/clk/hisilicon/reset.c
+++ b/drivers/clk/hisilicon/reset.c
@@ -90,14 +90,12 @@ static const struct reset_control_ops hisi_reset_ops = {
 struct hisi_reset_controller *hisi_reset_init(struct platform_device *pdev)
 {
 	struct hisi_reset_controller *rstc;
-	struct resource *res;
 
 	rstc = devm_kmalloc(&pdev->dev, sizeof(*rstc), GFP_KERNEL);
 	if (!rstc)
 		return NULL;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	rstc->membase = devm_ioremap_resource(&pdev->dev, res);
+	rstc->membase = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(rstc->membase))
 		return NULL;
 
diff --git a/drivers/clk/imgtec/clk-boston.c b/drivers/clk/imgtec/clk-boston.c
index 33ab4ff61165..b00cbd045af5 100644
--- a/drivers/clk/imgtec/clk-boston.c
+++ b/drivers/clk/imgtec/clk-boston.c
@@ -58,8 +58,7 @@ static void __init clk_boston_setup(struct device_node *np)
 	cpu_div = ext_field(mmcmdiv, BOSTON_PLAT_MMCMDIV_CLK1DIV);
 	cpu_freq = mult_frac(in_freq, mul, cpu_div);
 
-	onecell = kzalloc(sizeof(*onecell) +
-			  (BOSTON_CLK_COUNT * sizeof(struct clk_hw *)),
+	onecell = kzalloc(struct_size(onecell, hws, BOSTON_CLK_COUNT),
 			  GFP_KERNEL);
 	if (!onecell)
 		return;
diff --git a/drivers/clk/imx/clk-composite-8m.c b/drivers/clk/imx/clk-composite-8m.c
index 388bdb94f841..d3486ee79ab5 100644
--- a/drivers/clk/imx/clk-composite-8m.c
+++ b/drivers/clk/imx/clk-composite-8m.c
@@ -142,6 +142,7 @@ struct clk *imx8m_clk_composite_flags(const char *name,
 	mux->reg = reg;
 	mux->shift = PCG_PCS_SHIFT;
 	mux->mask = PCG_PCS_MASK;
+	mux->lock = &imx_ccm_lock;
 
 	div = kzalloc(sizeof(*div), GFP_KERNEL);
 	if (!div)
@@ -161,6 +162,7 @@ struct clk *imx8m_clk_composite_flags(const char *name,
 	gate_hw = &gate->hw;
 	gate->reg = reg;
 	gate->bit_idx = PCG_CGC_SHIFT;
+	gate->lock = &imx_ccm_lock;
 
 	hw = clk_hw_register_composite(NULL, name, parent_names, num_parents,
 			mux_hw, &clk_mux_ops, div_hw,
diff --git a/drivers/clk/imx/clk-imx6sll.c b/drivers/clk/imx/clk-imx6sll.c
index 5f3e92c09a5e..8e8288bda4d0 100644
--- a/drivers/clk/imx/clk-imx6sll.c
+++ b/drivers/clk/imx/clk-imx6sll.c
@@ -107,12 +107,12 @@ static void __init imx6sll_clocks_init(struct device_node *ccm_node)
 
 	hws[IMX6SLL_CLK_DUMMY] = imx_clk_hw_fixed("dummy", 0);
 
-	hws[IMX6SLL_CLK_CKIL] = __clk_get_hw(of_clk_get_by_name(ccm_node, "ckil"));
-	hws[IMX6SLL_CLK_OSC] = __clk_get_hw(of_clk_get_by_name(ccm_node, "osc"));
+	hws[IMX6SLL_CLK_CKIL] = imx_obtain_fixed_clk_hw(ccm_node, "ckil");
+	hws[IMX6SLL_CLK_OSC] = imx_obtain_fixed_clk_hw(ccm_node, "osc");
 
 	/* ipp_di clock is external input */
-	hws[IMX6SLL_CLK_IPP_DI0] = __clk_get_hw(of_clk_get_by_name(ccm_node, "ipp_di0"));
-	hws[IMX6SLL_CLK_IPP_DI1] = __clk_get_hw(of_clk_get_by_name(ccm_node, "ipp_di1"));
+	hws[IMX6SLL_CLK_IPP_DI0] = imx_obtain_fixed_clk_hw(ccm_node, "ipp_di0");
+	hws[IMX6SLL_CLK_IPP_DI1] = imx_obtain_fixed_clk_hw(ccm_node, "ipp_di1");
 
 	np = of_find_compatible_node(NULL, NULL, "fsl,imx6sll-anatop");
 	base = of_iomap(np, 0);
diff --git a/drivers/clk/imx/clk-imx6sx.c b/drivers/clk/imx/clk-imx6sx.c
index c4685c01929a..89ba71271e5c 100644
--- a/drivers/clk/imx/clk-imx6sx.c
+++ b/drivers/clk/imx/clk-imx6sx.c
@@ -139,16 +139,16 @@ static void __init imx6sx_clocks_init(struct device_node *ccm_node)
 
 	hws[IMX6SX_CLK_DUMMY] = imx_clk_hw_fixed("dummy", 0);
 
-	hws[IMX6SX_CLK_CKIL] = __clk_get_hw(of_clk_get_by_name(ccm_node, "ckil"));
-	hws[IMX6SX_CLK_OSC] = __clk_get_hw(of_clk_get_by_name(ccm_node, "osc"));
+	hws[IMX6SX_CLK_CKIL] = imx_obtain_fixed_clk_hw(ccm_node, "ckil");
+	hws[IMX6SX_CLK_OSC] = imx_obtain_fixed_clk_hw(ccm_node, "osc");
 
 	/* ipp_di clock is external input */
-	hws[IMX6SX_CLK_IPP_DI0] = __clk_get_hw(of_clk_get_by_name(ccm_node, "ipp_di0"));
-	hws[IMX6SX_CLK_IPP_DI1] = __clk_get_hw(of_clk_get_by_name(ccm_node, "ipp_di1"));
+	hws[IMX6SX_CLK_IPP_DI0] = imx_obtain_fixed_clk_hw(ccm_node, "ipp_di0");
+	hws[IMX6SX_CLK_IPP_DI1] = imx_obtain_fixed_clk_hw(ccm_node, "ipp_di1");
 
 	/* Clock source from external clock via CLK1/2 PAD */
-	hws[IMX6SX_CLK_ANACLK1] = __clk_get_hw(of_clk_get_by_name(ccm_node, "anaclk1"));
-	hws[IMX6SX_CLK_ANACLK2] = __clk_get_hw(of_clk_get_by_name(ccm_node, "anaclk2"));
+	hws[IMX6SX_CLK_ANACLK1] = imx_obtain_fixed_clk_hw(ccm_node, "anaclk1");
+	hws[IMX6SX_CLK_ANACLK2] = imx_obtain_fixed_clk_hw(ccm_node, "anaclk2");
 
 	np = of_find_compatible_node(NULL, NULL, "fsl,imx6sx-anatop");
 	base = of_iomap(np, 0);
diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index bc931988fe7b..dafc8806b03e 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -126,12 +126,12 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 
 	hws[IMX6UL_CLK_DUMMY] = imx_clk_hw_fixed("dummy", 0);
 
-	hws[IMX6UL_CLK_CKIL] = __clk_get_hw(of_clk_get_by_name(ccm_node, "ckil"));
-	hws[IMX6UL_CLK_OSC] = __clk_get_hw(of_clk_get_by_name(ccm_node, "osc"));
+	hws[IMX6UL_CLK_CKIL] = imx_obtain_fixed_clk_hw(ccm_node, "ckil");
+	hws[IMX6UL_CLK_OSC] = imx_obtain_fixed_clk_hw(ccm_node, "osc");
 
 	/* ipp_di clock is external input */
-	hws[IMX6UL_CLK_IPP_DI0] = __clk_get_hw(of_clk_get_by_name(ccm_node, "ipp_di0"));
-	hws[IMX6UL_CLK_IPP_DI1] = __clk_get_hw(of_clk_get_by_name(ccm_node, "ipp_di1"));
+	hws[IMX6UL_CLK_IPP_DI0] = imx_obtain_fixed_clk_hw(ccm_node, "ipp_di0");
+	hws[IMX6UL_CLK_IPP_DI1] = imx_obtain_fixed_clk_hw(ccm_node, "ipp_di1");
 
 	np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-anatop");
 	base = of_iomap(np, 0);
diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c
index fbea774ef687..0c9f7adb41ae 100644
--- a/drivers/clk/imx/clk-imx7d.c
+++ b/drivers/clk/imx/clk-imx7d.c
@@ -403,8 +403,8 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	hws = clk_hw_data->hws;
 
 	hws[IMX7D_CLK_DUMMY] = imx_clk_hw_fixed("dummy", 0);
-	hws[IMX7D_OSC_24M_CLK] = __clk_get_hw(of_clk_get_by_name(ccm_node, "osc"));
-	hws[IMX7D_CKIL] = __clk_get_hw(of_clk_get_by_name(ccm_node, "ckil"));
+	hws[IMX7D_OSC_24M_CLK] = imx_obtain_fixed_clk_hw(ccm_node, "osc");
+	hws[IMX7D_CKIL] = imx_obtain_fixed_clk_hw(ccm_node, "ckil");
 
 	np = of_find_compatible_node(NULL, NULL, "fsl,imx7d-anatop");
 	base = of_iomap(np, 0);
diff --git a/drivers/clk/imx/clk-imx7ulp.c b/drivers/clk/imx/clk-imx7ulp.c
index 2022d9bead91..281191b55b3a 100644
--- a/drivers/clk/imx/clk-imx7ulp.c
+++ b/drivers/clk/imx/clk-imx7ulp.c
@@ -24,11 +24,11 @@ static const char * const spll_pfd_sels[]	= { "spll_pfd0", "spll_pfd1", "spll_pf
 static const char * const spll_sels[]		= { "spll", "spll_pfd_sel", };
 static const char * const apll_pfd_sels[]	= { "apll_pfd0", "apll_pfd1", "apll_pfd2", "apll_pfd3", };
 static const char * const apll_sels[]		= { "apll", "apll_pfd_sel", };
-static const char * const scs_sels[]		= { "dummy", "sosc", "sirc", "firc", "dummy", "apll_sel", "spll_sel", "upll", };
-static const char * const ddr_sels[]		= { "apll_pfd_sel", "upll", };
+static const char * const scs_sels[]		= { "dummy", "sosc", "sirc", "firc", "dummy", "apll_sel", "spll_sel", "dummy", };
+static const char * const ddr_sels[]		= { "apll_pfd_sel", "dummy", "dummy", "dummy", };
 static const char * const nic_sels[]		= { "firc", "ddr_clk", };
 static const char * const periph_plat_sels[]	= { "dummy", "nic1_bus_clk", "nic1_clk", "ddr_clk", "apll_pfd2", "apll_pfd1", "apll_pfd0", "upll", };
-static const char * const periph_bus_sels[]	= { "dummy", "sosc_bus_clk", "mpll", "firc_bus_clk", "rosc", "nic1_bus_clk", "nic1_clk", "spll_bus_clk", };
+static const char * const periph_bus_sels[]	= { "dummy", "sosc_bus_clk", "dummy", "firc_bus_clk", "rosc", "nic1_bus_clk", "nic1_clk", "spll_bus_clk", };
 static const char * const arm_sels[]		= { "divcore", "dummy", "dummy", "hsrun_divcore", };
 
 /* used by sosc/sirc/firc/ddr/spll/apll dividers */
@@ -40,6 +40,7 @@ static const struct clk_div_table ulp_div_table[] = {
 	{ .val = 5, .div = 16, },
 	{ .val = 6, .div = 32, },
 	{ .val = 7, .div = 64, },
+	{ /* sentinel */ },
 };
 
 static const int pcc2_uart_clk_ids[] __initconst = {
@@ -75,7 +76,6 @@ static void __init imx7ulp_clk_scg1_init(struct device_node *np)
 	clks[IMX7ULP_CLK_SOSC]		= imx_obtain_fixed_clk_hw(np, "sosc");
 	clks[IMX7ULP_CLK_SIRC]		= imx_obtain_fixed_clk_hw(np, "sirc");
 	clks[IMX7ULP_CLK_FIRC]		= imx_obtain_fixed_clk_hw(np, "firc");
-	clks[IMX7ULP_CLK_MIPI_PLL]	= imx_obtain_fixed_clk_hw(np, "mpll");
 	clks[IMX7ULP_CLK_UPLL]		= imx_obtain_fixed_clk_hw(np, "upll");
 
 	/* SCG1 */
@@ -118,7 +118,7 @@ static void __init imx7ulp_clk_scg1_init(struct device_node *np)
 	clks[IMX7ULP_CLK_SYS_SEL]	= imx_clk_hw_mux2("scs_sel", base + 0x14, 24, 4, scs_sels, ARRAY_SIZE(scs_sels));
 	clks[IMX7ULP_CLK_HSRUN_SYS_SEL] = imx_clk_hw_mux2("hsrun_scs_sel", base + 0x1c, 24, 4, scs_sels, ARRAY_SIZE(scs_sels));
 	clks[IMX7ULP_CLK_NIC_SEL]	= imx_clk_hw_mux2("nic_sel", base + 0x40, 28, 1, nic_sels, ARRAY_SIZE(nic_sels));
-	clks[IMX7ULP_CLK_DDR_SEL]	= imx_clk_hw_mux_flags("ddr_sel", base + 0x30, 24, 1, ddr_sels, ARRAY_SIZE(ddr_sels), CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE);
+	clks[IMX7ULP_CLK_DDR_SEL]	= imx_clk_hw_mux_flags("ddr_sel", base + 0x30, 24, 2, ddr_sels, ARRAY_SIZE(ddr_sels), CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE);
 
 	clks[IMX7ULP_CLK_CORE_DIV]	= imx_clk_hw_divider_flags("divcore",	"scs_sel",  base + 0x14, 16, 4, CLK_SET_RATE_PARENT);
 	clks[IMX7ULP_CLK_HSRUN_CORE_DIV] = imx_clk_hw_divider_flags("hsrun_divcore", "hsrun_scs_sel", base + 0x1c, 16, 4, CLK_SET_RATE_PARENT);
diff --git a/drivers/clk/imx/clk-imx8mm.c b/drivers/clk/imx/clk-imx8mm.c
index 172589e94f60..030b15d7c0ce 100644
--- a/drivers/clk/imx/clk-imx8mm.c
+++ b/drivers/clk/imx/clk-imx8mm.c
@@ -26,73 +26,6 @@ static u32 share_count_disp;
 static u32 share_count_pdm;
 static u32 share_count_nand;
 
-static const struct imx_pll14xx_rate_table imx8mm_pll1416x_tbl[] = {
-	PLL_1416X_RATE(1800000000U, 225, 3, 0),
-	PLL_1416X_RATE(1600000000U, 200, 3, 0),
-	PLL_1416X_RATE(1200000000U, 300, 3, 1),
-	PLL_1416X_RATE(1000000000U, 250, 3, 1),
-	PLL_1416X_RATE(800000000U,  200, 3, 1),
-	PLL_1416X_RATE(750000000U,  250, 2, 2),
-	PLL_1416X_RATE(700000000U,  350, 3, 2),
-	PLL_1416X_RATE(600000000U,  300, 3, 2),
-};
-
-static const struct imx_pll14xx_rate_table imx8mm_audiopll_tbl[] = {
-	PLL_1443X_RATE(393216000U, 262, 2, 3, 9437),
-	PLL_1443X_RATE(361267200U, 361, 3, 3, 17511),
-};
-
-static const struct imx_pll14xx_rate_table imx8mm_videopll_tbl[] = {
-	PLL_1443X_RATE(650000000U, 325, 3, 2, 0),
-	PLL_1443X_RATE(594000000U, 198, 2, 2, 0),
-};
-
-static const struct imx_pll14xx_rate_table imx8mm_drampll_tbl[] = {
-	PLL_1443X_RATE(650000000U, 325, 3, 2, 0),
-};
-
-static struct imx_pll14xx_clk imx8mm_audio_pll = {
-		.type = PLL_1443X,
-		.rate_table = imx8mm_audiopll_tbl,
-		.rate_count = ARRAY_SIZE(imx8mm_audiopll_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mm_video_pll = {
-		.type = PLL_1443X,
-		.rate_table = imx8mm_videopll_tbl,
-		.rate_count = ARRAY_SIZE(imx8mm_videopll_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mm_dram_pll = {
-		.type = PLL_1443X,
-		.rate_table = imx8mm_drampll_tbl,
-		.rate_count = ARRAY_SIZE(imx8mm_drampll_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mm_arm_pll = {
-		.type = PLL_1416X,
-		.rate_table = imx8mm_pll1416x_tbl,
-		.rate_count = ARRAY_SIZE(imx8mm_pll1416x_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mm_gpu_pll = {
-		.type = PLL_1416X,
-		.rate_table = imx8mm_pll1416x_tbl,
-		.rate_count = ARRAY_SIZE(imx8mm_pll1416x_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mm_vpu_pll = {
-		.type = PLL_1416X,
-		.rate_table = imx8mm_pll1416x_tbl,
-		.rate_count = ARRAY_SIZE(imx8mm_pll1416x_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mm_sys_pll = {
-		.type = PLL_1416X,
-		.rate_table = imx8mm_pll1416x_tbl,
-		.rate_count = ARRAY_SIZE(imx8mm_pll1416x_tbl),
-};
-
 static const char *pll_ref_sels[] = { "osc_24m", "dummy", "dummy", "dummy", };
 static const char *audio_pll1_bypass_sels[] = {"audio_pll1", "audio_pll1_ref_sel", };
 static const char *audio_pll2_bypass_sels[] = {"audio_pll2", "audio_pll2_ref_sel", };
@@ -101,8 +34,6 @@ static const char *dram_pll_bypass_sels[] = {"dram_pll", "dram_pll_ref_sel", };
 static const char *gpu_pll_bypass_sels[] = {"gpu_pll", "gpu_pll_ref_sel", };
 static const char *vpu_pll_bypass_sels[] = {"vpu_pll", "vpu_pll_ref_sel", };
 static const char *arm_pll_bypass_sels[] = {"arm_pll", "arm_pll_ref_sel", };
-static const char *sys_pll1_bypass_sels[] = {"sys_pll1", "sys_pll1_ref_sel", };
-static const char *sys_pll2_bypass_sels[] = {"sys_pll2", "sys_pll2_ref_sel", };
 static const char *sys_pll3_bypass_sels[] = {"sys_pll3", "sys_pll3_ref_sel", };
 
 /* CCM ROOT */
@@ -392,20 +323,18 @@ static int imx8mm_clocks_probe(struct platform_device *pdev)
 	clks[IMX8MM_GPU_PLL_REF_SEL] = imx_clk_mux("gpu_pll_ref_sel", base + 0x64, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 	clks[IMX8MM_VPU_PLL_REF_SEL] = imx_clk_mux("vpu_pll_ref_sel", base + 0x74, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 	clks[IMX8MM_ARM_PLL_REF_SEL] = imx_clk_mux("arm_pll_ref_sel", base + 0x84, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
-	clks[IMX8MM_SYS_PLL1_REF_SEL] = imx_clk_mux("sys_pll1_ref_sel", base + 0x94, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
-	clks[IMX8MM_SYS_PLL2_REF_SEL] = imx_clk_mux("sys_pll2_ref_sel", base + 0x104, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 	clks[IMX8MM_SYS_PLL3_REF_SEL] = imx_clk_mux("sys_pll3_ref_sel", base + 0x114, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 
-	clks[IMX8MM_AUDIO_PLL1] = imx_clk_pll14xx("audio_pll1", "audio_pll1_ref_sel", base, &imx8mm_audio_pll);
-	clks[IMX8MM_AUDIO_PLL2] = imx_clk_pll14xx("audio_pll2", "audio_pll2_ref_sel", base + 0x14, &imx8mm_audio_pll);
-	clks[IMX8MM_VIDEO_PLL1] = imx_clk_pll14xx("video_pll1", "video_pll1_ref_sel", base + 0x28, &imx8mm_video_pll);
-	clks[IMX8MM_DRAM_PLL] = imx_clk_pll14xx("dram_pll", "dram_pll_ref_sel", base + 0x50, &imx8mm_dram_pll);
-	clks[IMX8MM_GPU_PLL] = imx_clk_pll14xx("gpu_pll", "gpu_pll_ref_sel", base + 0x64, &imx8mm_gpu_pll);
-	clks[IMX8MM_VPU_PLL] = imx_clk_pll14xx("vpu_pll", "vpu_pll_ref_sel", base + 0x74, &imx8mm_vpu_pll);
-	clks[IMX8MM_ARM_PLL] = imx_clk_pll14xx("arm_pll", "arm_pll_ref_sel", base + 0x84, &imx8mm_arm_pll);
-	clks[IMX8MM_SYS_PLL1] = imx_clk_pll14xx("sys_pll1", "sys_pll1_ref_sel", base + 0x94, &imx8mm_sys_pll);
-	clks[IMX8MM_SYS_PLL2] = imx_clk_pll14xx("sys_pll2", "sys_pll2_ref_sel", base + 0x104, &imx8mm_sys_pll);
-	clks[IMX8MM_SYS_PLL3] = imx_clk_pll14xx("sys_pll3", "sys_pll3_ref_sel", base + 0x114, &imx8mm_sys_pll);
+	clks[IMX8MM_AUDIO_PLL1] = imx_clk_pll14xx("audio_pll1", "audio_pll1_ref_sel", base, &imx_1443x_pll);
+	clks[IMX8MM_AUDIO_PLL2] = imx_clk_pll14xx("audio_pll2", "audio_pll2_ref_sel", base + 0x14, &imx_1443x_pll);
+	clks[IMX8MM_VIDEO_PLL1] = imx_clk_pll14xx("video_pll1", "video_pll1_ref_sel", base + 0x28, &imx_1443x_pll);
+	clks[IMX8MM_DRAM_PLL] = imx_clk_pll14xx("dram_pll", "dram_pll_ref_sel", base + 0x50, &imx_1443x_pll);
+	clks[IMX8MM_GPU_PLL] = imx_clk_pll14xx("gpu_pll", "gpu_pll_ref_sel", base + 0x64, &imx_1416x_pll);
+	clks[IMX8MM_VPU_PLL] = imx_clk_pll14xx("vpu_pll", "vpu_pll_ref_sel", base + 0x74, &imx_1416x_pll);
+	clks[IMX8MM_ARM_PLL] = imx_clk_pll14xx("arm_pll", "arm_pll_ref_sel", base + 0x84, &imx_1416x_pll);
+	clks[IMX8MM_SYS_PLL1] = imx_clk_fixed("sys_pll1", 800000000);
+	clks[IMX8MM_SYS_PLL2] = imx_clk_fixed("sys_pll2", 1000000000);
+	clks[IMX8MM_SYS_PLL3] = imx_clk_pll14xx("sys_pll3", "sys_pll3_ref_sel", base + 0x114, &imx_1416x_pll);
 
 	/* PLL bypass out */
 	clks[IMX8MM_AUDIO_PLL1_BYPASS] = imx_clk_mux_flags("audio_pll1_bypass", base, 16, 1, audio_pll1_bypass_sels, ARRAY_SIZE(audio_pll1_bypass_sels), CLK_SET_RATE_PARENT);
@@ -415,8 +344,6 @@ static int imx8mm_clocks_probe(struct platform_device *pdev)
 	clks[IMX8MM_GPU_PLL_BYPASS] = imx_clk_mux_flags("gpu_pll_bypass", base + 0x64, 28, 1, gpu_pll_bypass_sels, ARRAY_SIZE(gpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
 	clks[IMX8MM_VPU_PLL_BYPASS] = imx_clk_mux_flags("vpu_pll_bypass", base + 0x74, 28, 1, vpu_pll_bypass_sels, ARRAY_SIZE(vpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
 	clks[IMX8MM_ARM_PLL_BYPASS] = imx_clk_mux_flags("arm_pll_bypass", base + 0x84, 28, 1, arm_pll_bypass_sels, ARRAY_SIZE(arm_pll_bypass_sels), CLK_SET_RATE_PARENT);
-	clks[IMX8MM_SYS_PLL1_BYPASS] = imx_clk_mux_flags("sys_pll1_bypass", base + 0x94, 28, 1, sys_pll1_bypass_sels, ARRAY_SIZE(sys_pll1_bypass_sels), CLK_SET_RATE_PARENT);
-	clks[IMX8MM_SYS_PLL2_BYPASS] = imx_clk_mux_flags("sys_pll2_bypass", base + 0x104, 28, 1, sys_pll2_bypass_sels, ARRAY_SIZE(sys_pll2_bypass_sels), CLK_SET_RATE_PARENT);
 	clks[IMX8MM_SYS_PLL3_BYPASS] = imx_clk_mux_flags("sys_pll3_bypass", base + 0x114, 28, 1, sys_pll3_bypass_sels, ARRAY_SIZE(sys_pll3_bypass_sels), CLK_SET_RATE_PARENT);
 
 	/* PLL out gate */
@@ -427,29 +354,48 @@ static int imx8mm_clocks_probe(struct platform_device *pdev)
 	clks[IMX8MM_GPU_PLL_OUT] = imx_clk_gate("gpu_pll_out", "gpu_pll_bypass", base + 0x64, 11);
 	clks[IMX8MM_VPU_PLL_OUT] = imx_clk_gate("vpu_pll_out", "vpu_pll_bypass", base + 0x74, 11);
 	clks[IMX8MM_ARM_PLL_OUT] = imx_clk_gate("arm_pll_out", "arm_pll_bypass", base + 0x84, 11);
-	clks[IMX8MM_SYS_PLL1_OUT] = imx_clk_gate("sys_pll1_out", "sys_pll1_bypass", base + 0x94, 11);
-	clks[IMX8MM_SYS_PLL2_OUT] = imx_clk_gate("sys_pll2_out", "sys_pll2_bypass", base + 0x104, 11);
 	clks[IMX8MM_SYS_PLL3_OUT] = imx_clk_gate("sys_pll3_out", "sys_pll3_bypass", base + 0x114, 11);
 
-	/* SYS PLL fixed output */
-	clks[IMX8MM_SYS_PLL1_40M] = imx_clk_fixed_factor("sys_pll1_40m", "sys_pll1_out", 1, 20);
-	clks[IMX8MM_SYS_PLL1_80M] = imx_clk_fixed_factor("sys_pll1_80m", "sys_pll1_out", 1, 10);
-	clks[IMX8MM_SYS_PLL1_100M] = imx_clk_fixed_factor("sys_pll1_100m", "sys_pll1_out", 1, 8);
-	clks[IMX8MM_SYS_PLL1_133M] = imx_clk_fixed_factor("sys_pll1_133m", "sys_pll1_out", 1, 6);
-	clks[IMX8MM_SYS_PLL1_160M] = imx_clk_fixed_factor("sys_pll1_160m", "sys_pll1_out", 1, 5);
-	clks[IMX8MM_SYS_PLL1_200M] = imx_clk_fixed_factor("sys_pll1_200m", "sys_pll1_out", 1, 4);
-	clks[IMX8MM_SYS_PLL1_266M] = imx_clk_fixed_factor("sys_pll1_266m", "sys_pll1_out", 1, 3);
-	clks[IMX8MM_SYS_PLL1_400M] = imx_clk_fixed_factor("sys_pll1_400m", "sys_pll1_out", 1, 2);
+	/* SYS PLL1 fixed output */
+	clks[IMX8MM_SYS_PLL1_40M_CG] = imx_clk_gate("sys_pll1_40m_cg", "sys_pll1", base + 0x94, 27);
+	clks[IMX8MM_SYS_PLL1_80M_CG] = imx_clk_gate("sys_pll1_80m_cg", "sys_pll1", base + 0x94, 25);
+	clks[IMX8MM_SYS_PLL1_100M_CG] = imx_clk_gate("sys_pll1_100m_cg", "sys_pll1", base + 0x94, 23);
+	clks[IMX8MM_SYS_PLL1_133M_CG] = imx_clk_gate("sys_pll1_133m_cg", "sys_pll1", base + 0x94, 21);
+	clks[IMX8MM_SYS_PLL1_160M_CG] = imx_clk_gate("sys_pll1_160m_cg", "sys_pll1", base + 0x94, 19);
+	clks[IMX8MM_SYS_PLL1_200M_CG] = imx_clk_gate("sys_pll1_200m_cg", "sys_pll1", base + 0x94, 17);
+	clks[IMX8MM_SYS_PLL1_266M_CG] = imx_clk_gate("sys_pll1_266m_cg", "sys_pll1", base + 0x94, 15);
+	clks[IMX8MM_SYS_PLL1_400M_CG] = imx_clk_gate("sys_pll1_400m_cg", "sys_pll1", base + 0x94, 13);
+	clks[IMX8MM_SYS_PLL1_OUT] = imx_clk_gate("sys_pll1_out", "sys_pll1", base + 0x94, 11);
+
+	clks[IMX8MM_SYS_PLL1_40M] = imx_clk_fixed_factor("sys_pll1_40m", "sys_pll1_40m_cg", 1, 20);
+	clks[IMX8MM_SYS_PLL1_80M] = imx_clk_fixed_factor("sys_pll1_80m", "sys_pll1_80m_cg", 1, 10);
+	clks[IMX8MM_SYS_PLL1_100M] = imx_clk_fixed_factor("sys_pll1_100m", "sys_pll1_100m_cg", 1, 8);
+	clks[IMX8MM_SYS_PLL1_133M] = imx_clk_fixed_factor("sys_pll1_133m", "sys_pll1_133m_cg", 1, 6);
+	clks[IMX8MM_SYS_PLL1_160M] = imx_clk_fixed_factor("sys_pll1_160m", "sys_pll1_160m_cg", 1, 5);
+	clks[IMX8MM_SYS_PLL1_200M] = imx_clk_fixed_factor("sys_pll1_200m", "sys_pll1_200m_cg", 1, 4);
+	clks[IMX8MM_SYS_PLL1_266M] = imx_clk_fixed_factor("sys_pll1_266m", "sys_pll1_266m_cg", 1, 3);
+	clks[IMX8MM_SYS_PLL1_400M] = imx_clk_fixed_factor("sys_pll1_400m", "sys_pll1_400m_cg", 1, 2);
 	clks[IMX8MM_SYS_PLL1_800M] = imx_clk_fixed_factor("sys_pll1_800m", "sys_pll1_out", 1, 1);
 
-	clks[IMX8MM_SYS_PLL2_50M] = imx_clk_fixed_factor("sys_pll2_50m", "sys_pll2_out", 1, 20);
-	clks[IMX8MM_SYS_PLL2_100M] = imx_clk_fixed_factor("sys_pll2_100m", "sys_pll2_out", 1, 10);
-	clks[IMX8MM_SYS_PLL2_125M] = imx_clk_fixed_factor("sys_pll2_125m", "sys_pll2_out", 1, 8);
-	clks[IMX8MM_SYS_PLL2_166M] = imx_clk_fixed_factor("sys_pll2_166m", "sys_pll2_out", 1, 6);
-	clks[IMX8MM_SYS_PLL2_200M] = imx_clk_fixed_factor("sys_pll2_200m", "sys_pll2_out", 1, 5);
-	clks[IMX8MM_SYS_PLL2_250M] = imx_clk_fixed_factor("sys_pll2_250m", "sys_pll2_out", 1, 4);
-	clks[IMX8MM_SYS_PLL2_333M] = imx_clk_fixed_factor("sys_pll2_333m", "sys_pll2_out", 1, 3);
-	clks[IMX8MM_SYS_PLL2_500M] = imx_clk_fixed_factor("sys_pll2_500m", "sys_pll2_out", 1, 2);
+	/* SYS PLL2 fixed output */
+	clks[IMX8MM_SYS_PLL2_50M_CG] = imx_clk_gate("sys_pll2_50m_cg", "sys_pll2", base + 0x104, 27);
+	clks[IMX8MM_SYS_PLL2_100M_CG] = imx_clk_gate("sys_pll2_100m_cg", "sys_pll2", base + 0x104, 25);
+	clks[IMX8MM_SYS_PLL2_125M_CG] = imx_clk_gate("sys_pll2_125m_cg", "sys_pll2", base + 0x104, 23);
+	clks[IMX8MM_SYS_PLL2_166M_CG] = imx_clk_gate("sys_pll2_166m_cg", "sys_pll2", base + 0x104, 21);
+	clks[IMX8MM_SYS_PLL2_200M_CG] = imx_clk_gate("sys_pll2_200m_cg", "sys_pll2", base + 0x104, 19);
+	clks[IMX8MM_SYS_PLL2_250M_CG] = imx_clk_gate("sys_pll2_250m_cg", "sys_pll2", base + 0x104, 17);
+	clks[IMX8MM_SYS_PLL2_333M_CG] = imx_clk_gate("sys_pll2_333m_cg", "sys_pll2", base + 0x104, 15);
+	clks[IMX8MM_SYS_PLL2_500M_CG] = imx_clk_gate("sys_pll2_500m_cg", "sys_pll2", base + 0x104, 13);
+	clks[IMX8MM_SYS_PLL2_OUT] = imx_clk_gate("sys_pll2_out", "sys_pll2", base + 0x104, 11);
+
+	clks[IMX8MM_SYS_PLL2_50M] = imx_clk_fixed_factor("sys_pll2_50m", "sys_pll2_50m_cg", 1, 20);
+	clks[IMX8MM_SYS_PLL2_100M] = imx_clk_fixed_factor("sys_pll2_100m", "sys_pll2_100m_cg", 1, 10);
+	clks[IMX8MM_SYS_PLL2_125M] = imx_clk_fixed_factor("sys_pll2_125m", "sys_pll2_125m_cg", 1, 8);
+	clks[IMX8MM_SYS_PLL2_166M] = imx_clk_fixed_factor("sys_pll2_166m", "sys_pll2_166m_cg", 1, 6);
+	clks[IMX8MM_SYS_PLL2_200M] = imx_clk_fixed_factor("sys_pll2_200m", "sys_pll2_200m_cg", 1, 5);
+	clks[IMX8MM_SYS_PLL2_250M] = imx_clk_fixed_factor("sys_pll2_250m", "sys_pll2_250m_cg", 1, 4);
+	clks[IMX8MM_SYS_PLL2_333M] = imx_clk_fixed_factor("sys_pll2_333m", "sys_pll2_333m_cg", 1, 3);
+	clks[IMX8MM_SYS_PLL2_500M] = imx_clk_fixed_factor("sys_pll2_500m", "sys_pll2_500m_cg", 1, 2);
 	clks[IMX8MM_SYS_PLL2_1000M] = imx_clk_fixed_factor("sys_pll2_1000m", "sys_pll2_out", 1, 1);
 
 	np = dev->of_node;
diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c
index 58b5acee3830..9f5a5a56b45e 100644
--- a/drivers/clk/imx/clk-imx8mn.c
+++ b/drivers/clk/imx/clk-imx8mn.c
@@ -25,89 +25,6 @@ static u32 share_count_disp;
 static u32 share_count_pdm;
 static u32 share_count_nand;
 
-enum {
-	ARM_PLL,
-	GPU_PLL,
-	VPU_PLL,
-	SYS_PLL1,
-	SYS_PLL2,
-	SYS_PLL3,
-	DRAM_PLL,
-	AUDIO_PLL1,
-	AUDIO_PLL2,
-	VIDEO_PLL2,
-	NR_PLLS,
-};
-
-static const struct imx_pll14xx_rate_table imx8mn_pll1416x_tbl[] = {
-	PLL_1416X_RATE(1800000000U, 225, 3, 0),
-	PLL_1416X_RATE(1600000000U, 200, 3, 0),
-	PLL_1416X_RATE(1500000000U, 375, 3, 1),
-	PLL_1416X_RATE(1400000000U, 350, 3, 1),
-	PLL_1416X_RATE(1200000000U, 300, 3, 1),
-	PLL_1416X_RATE(1000000000U, 250, 3, 1),
-	PLL_1416X_RATE(800000000U,  200, 3, 1),
-	PLL_1416X_RATE(750000000U,  250, 2, 2),
-	PLL_1416X_RATE(700000000U,  350, 3, 2),
-	PLL_1416X_RATE(600000000U,  300, 3, 2),
-};
-
-static const struct imx_pll14xx_rate_table imx8mn_audiopll_tbl[] = {
-	PLL_1443X_RATE(393216000U, 262, 2, 3, 9437),
-	PLL_1443X_RATE(361267200U, 361, 3, 3, 17511),
-};
-
-static const struct imx_pll14xx_rate_table imx8mn_videopll_tbl[] = {
-	PLL_1443X_RATE(650000000U, 325, 3, 2, 0),
-	PLL_1443X_RATE(594000000U, 198, 2, 2, 0),
-};
-
-static const struct imx_pll14xx_rate_table imx8mn_drampll_tbl[] = {
-	PLL_1443X_RATE(650000000U, 325, 3, 2, 0),
-};
-
-static struct imx_pll14xx_clk imx8mn_audio_pll = {
-		.type = PLL_1443X,
-		.rate_table = imx8mn_audiopll_tbl,
-		.rate_count = ARRAY_SIZE(imx8mn_audiopll_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mn_video_pll = {
-		.type = PLL_1443X,
-		.rate_table = imx8mn_videopll_tbl,
-		.rate_count = ARRAY_SIZE(imx8mn_videopll_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mn_dram_pll = {
-		.type = PLL_1443X,
-		.rate_table = imx8mn_drampll_tbl,
-		.rate_count = ARRAY_SIZE(imx8mn_drampll_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mn_arm_pll = {
-		.type = PLL_1416X,
-		.rate_table = imx8mn_pll1416x_tbl,
-		.rate_count = ARRAY_SIZE(imx8mn_pll1416x_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mn_gpu_pll = {
-		.type = PLL_1416X,
-		.rate_table = imx8mn_pll1416x_tbl,
-		.rate_count = ARRAY_SIZE(imx8mn_pll1416x_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mn_vpu_pll = {
-		.type = PLL_1416X,
-		.rate_table = imx8mn_pll1416x_tbl,
-		.rate_count = ARRAY_SIZE(imx8mn_pll1416x_tbl),
-};
-
-static struct imx_pll14xx_clk imx8mn_sys_pll = {
-		.type = PLL_1416X,
-		.rate_table = imx8mn_pll1416x_tbl,
-		.rate_count = ARRAY_SIZE(imx8mn_pll1416x_tbl),
-};
-
 static const char * const pll_ref_sels[] = { "osc_24m", "dummy", "dummy", "dummy", };
 static const char * const audio_pll1_bypass_sels[] = {"audio_pll1", "audio_pll1_ref_sel", };
 static const char * const audio_pll2_bypass_sels[] = {"audio_pll2", "audio_pll2_ref_sel", };
@@ -116,8 +33,6 @@ static const char * const dram_pll_bypass_sels[] = {"dram_pll", "dram_pll_ref_se
 static const char * const gpu_pll_bypass_sels[] = {"gpu_pll", "gpu_pll_ref_sel", };
 static const char * const vpu_pll_bypass_sels[] = {"vpu_pll", "vpu_pll_ref_sel", };
 static const char * const arm_pll_bypass_sels[] = {"arm_pll", "arm_pll_ref_sel", };
-static const char * const sys_pll1_bypass_sels[] = {"sys_pll1", "sys_pll1_ref_sel", };
-static const char * const sys_pll2_bypass_sels[] = {"sys_pll2", "sys_pll2_ref_sel", };
 static const char * const sys_pll3_bypass_sels[] = {"sys_pll3", "sys_pll3_ref_sel", };
 
 static const char * const imx8mn_a53_sels[] = {"osc_24m", "arm_pll_out", "sys_pll2_500m",
@@ -405,20 +320,18 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
 	clks[IMX8MN_GPU_PLL_REF_SEL] = imx_clk_mux("gpu_pll_ref_sel", base + 0x64, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 	clks[IMX8MN_VPU_PLL_REF_SEL] = imx_clk_mux("vpu_pll_ref_sel", base + 0x74, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 	clks[IMX8MN_ARM_PLL_REF_SEL] = imx_clk_mux("arm_pll_ref_sel", base + 0x84, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
-	clks[IMX8MN_SYS_PLL1_REF_SEL] = imx_clk_mux("sys_pll1_ref_sel", base + 0x94, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
-	clks[IMX8MN_SYS_PLL2_REF_SEL] = imx_clk_mux("sys_pll2_ref_sel", base + 0x104, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 	clks[IMX8MN_SYS_PLL3_REF_SEL] = imx_clk_mux("sys_pll3_ref_sel", base + 0x114, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 
-	clks[IMX8MN_AUDIO_PLL1] = imx_clk_pll14xx("audio_pll1", "audio_pll1_ref_sel", base, &imx8mn_audio_pll);
-	clks[IMX8MN_AUDIO_PLL2] = imx_clk_pll14xx("audio_pll2", "audio_pll2_ref_sel", base + 0x14, &imx8mn_audio_pll);
-	clks[IMX8MN_VIDEO_PLL1] = imx_clk_pll14xx("video_pll1", "video_pll1_ref_sel", base + 0x28, &imx8mn_video_pll);
-	clks[IMX8MN_DRAM_PLL] = imx_clk_pll14xx("dram_pll", "dram_pll_ref_sel", base + 0x50, &imx8mn_dram_pll);
-	clks[IMX8MN_GPU_PLL] = imx_clk_pll14xx("gpu_pll", "gpu_pll_ref_sel", base + 0x64, &imx8mn_gpu_pll);
-	clks[IMX8MN_VPU_PLL] = imx_clk_pll14xx("vpu_pll", "vpu_pll_ref_sel", base + 0x74, &imx8mn_vpu_pll);
-	clks[IMX8MN_ARM_PLL] = imx_clk_pll14xx("arm_pll", "arm_pll_ref_sel", base + 0x84, &imx8mn_arm_pll);
-	clks[IMX8MN_SYS_PLL1] = imx_clk_pll14xx("sys_pll1", "sys_pll1_ref_sel", base + 0x94, &imx8mn_sys_pll);
-	clks[IMX8MN_SYS_PLL2] = imx_clk_pll14xx("sys_pll2", "sys_pll2_ref_sel", base + 0x104, &imx8mn_sys_pll);
-	clks[IMX8MN_SYS_PLL3] = imx_clk_pll14xx("sys_pll3", "sys_pll3_ref_sel", base + 0x114, &imx8mn_sys_pll);
+	clks[IMX8MN_AUDIO_PLL1] = imx_clk_pll14xx("audio_pll1", "audio_pll1_ref_sel", base, &imx_1443x_pll);
+	clks[IMX8MN_AUDIO_PLL2] = imx_clk_pll14xx("audio_pll2", "audio_pll2_ref_sel", base + 0x14, &imx_1443x_pll);
+	clks[IMX8MN_VIDEO_PLL1] = imx_clk_pll14xx("video_pll1", "video_pll1_ref_sel", base + 0x28, &imx_1443x_pll);
+	clks[IMX8MN_DRAM_PLL] = imx_clk_pll14xx("dram_pll", "dram_pll_ref_sel", base + 0x50, &imx_1443x_pll);
+	clks[IMX8MN_GPU_PLL] = imx_clk_pll14xx("gpu_pll", "gpu_pll_ref_sel", base + 0x64, &imx_1416x_pll);
+	clks[IMX8MN_VPU_PLL] = imx_clk_pll14xx("vpu_pll", "vpu_pll_ref_sel", base + 0x74, &imx_1416x_pll);
+	clks[IMX8MN_ARM_PLL] = imx_clk_pll14xx("arm_pll", "arm_pll_ref_sel", base + 0x84, &imx_1416x_pll);
+	clks[IMX8MN_SYS_PLL1] = imx_clk_fixed("sys_pll1", 800000000);
+	clks[IMX8MN_SYS_PLL2] = imx_clk_fixed("sys_pll2", 1000000000);
+	clks[IMX8MN_SYS_PLL3] = imx_clk_pll14xx("sys_pll3", "sys_pll3_ref_sel", base + 0x114, &imx_1416x_pll);
 
 	/* PLL bypass out */
 	clks[IMX8MN_AUDIO_PLL1_BYPASS] = imx_clk_mux_flags("audio_pll1_bypass", base, 16, 1, audio_pll1_bypass_sels, ARRAY_SIZE(audio_pll1_bypass_sels), CLK_SET_RATE_PARENT);
@@ -428,8 +341,6 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
 	clks[IMX8MN_GPU_PLL_BYPASS] = imx_clk_mux_flags("gpu_pll_bypass", base + 0x64, 28, 1, gpu_pll_bypass_sels, ARRAY_SIZE(gpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
 	clks[IMX8MN_VPU_PLL_BYPASS] = imx_clk_mux_flags("vpu_pll_bypass", base + 0x74, 28, 1, vpu_pll_bypass_sels, ARRAY_SIZE(vpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
 	clks[IMX8MN_ARM_PLL_BYPASS] = imx_clk_mux_flags("arm_pll_bypass", base + 0x84, 28, 1, arm_pll_bypass_sels, ARRAY_SIZE(arm_pll_bypass_sels), CLK_SET_RATE_PARENT);
-	clks[IMX8MN_SYS_PLL1_BYPASS] = imx_clk_mux_flags("sys_pll1_bypass", base + 0x94, 28, 1, sys_pll1_bypass_sels, ARRAY_SIZE(sys_pll1_bypass_sels), CLK_SET_RATE_PARENT);
-	clks[IMX8MN_SYS_PLL2_BYPASS] = imx_clk_mux_flags("sys_pll2_bypass", base + 0x104, 28, 1, sys_pll2_bypass_sels, ARRAY_SIZE(sys_pll2_bypass_sels), CLK_SET_RATE_PARENT);
 	clks[IMX8MN_SYS_PLL3_BYPASS] = imx_clk_mux_flags("sys_pll3_bypass", base + 0x114, 28, 1, sys_pll3_bypass_sels, ARRAY_SIZE(sys_pll3_bypass_sels), CLK_SET_RATE_PARENT);
 
 	/* PLL out gate */
@@ -440,29 +351,48 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
 	clks[IMX8MN_GPU_PLL_OUT] = imx_clk_gate("gpu_pll_out", "gpu_pll_bypass", base + 0x64, 11);
 	clks[IMX8MN_VPU_PLL_OUT] = imx_clk_gate("vpu_pll_out", "vpu_pll_bypass", base + 0x74, 11);
 	clks[IMX8MN_ARM_PLL_OUT] = imx_clk_gate("arm_pll_out", "arm_pll_bypass", base + 0x84, 11);
-	clks[IMX8MN_SYS_PLL1_OUT] = imx_clk_gate("sys_pll1_out", "sys_pll1_bypass", base + 0x94, 11);
-	clks[IMX8MN_SYS_PLL2_OUT] = imx_clk_gate("sys_pll2_out", "sys_pll2_bypass", base + 0x104, 11);
 	clks[IMX8MN_SYS_PLL3_OUT] = imx_clk_gate("sys_pll3_out", "sys_pll3_bypass", base + 0x114, 11);
 
-	/* SYS PLL fixed output */
-	clks[IMX8MN_SYS_PLL1_40M] = imx_clk_fixed_factor("sys_pll1_40m", "sys_pll1_out", 1, 20);
-	clks[IMX8MN_SYS_PLL1_80M] = imx_clk_fixed_factor("sys_pll1_80m", "sys_pll1_out", 1, 10);
-	clks[IMX8MN_SYS_PLL1_100M] = imx_clk_fixed_factor("sys_pll1_100m", "sys_pll1_out", 1, 8);
-	clks[IMX8MN_SYS_PLL1_133M] = imx_clk_fixed_factor("sys_pll1_133m", "sys_pll1_out", 1, 6);
-	clks[IMX8MN_SYS_PLL1_160M] = imx_clk_fixed_factor("sys_pll1_160m", "sys_pll1_out", 1, 5);
-	clks[IMX8MN_SYS_PLL1_200M] = imx_clk_fixed_factor("sys_pll1_200m", "sys_pll1_out", 1, 4);
-	clks[IMX8MN_SYS_PLL1_266M] = imx_clk_fixed_factor("sys_pll1_266m", "sys_pll1_out", 1, 3);
-	clks[IMX8MN_SYS_PLL1_400M] = imx_clk_fixed_factor("sys_pll1_400m", "sys_pll1_out", 1, 2);
+	/* SYS PLL1 fixed output */
+	clks[IMX8MN_SYS_PLL1_40M_CG] = imx_clk_gate("sys_pll1_40m_cg", "sys_pll1", base + 0x94, 27);
+	clks[IMX8MN_SYS_PLL1_80M_CG] = imx_clk_gate("sys_pll1_80m_cg", "sys_pll1", base + 0x94, 25);
+	clks[IMX8MN_SYS_PLL1_100M_CG] = imx_clk_gate("sys_pll1_100m_cg", "sys_pll1", base + 0x94, 23);
+	clks[IMX8MN_SYS_PLL1_133M_CG] = imx_clk_gate("sys_pll1_133m_cg", "sys_pll1", base + 0x94, 21);
+	clks[IMX8MN_SYS_PLL1_160M_CG] = imx_clk_gate("sys_pll1_160m_cg", "sys_pll1", base + 0x94, 19);
+	clks[IMX8MN_SYS_PLL1_200M_CG] = imx_clk_gate("sys_pll1_200m_cg", "sys_pll1", base + 0x94, 17);
+	clks[IMX8MN_SYS_PLL1_266M_CG] = imx_clk_gate("sys_pll1_266m_cg", "sys_pll1", base + 0x94, 15);
+	clks[IMX8MN_SYS_PLL1_400M_CG] = imx_clk_gate("sys_pll1_400m_cg", "sys_pll1", base + 0x94, 13);
+	clks[IMX8MN_SYS_PLL1_OUT] = imx_clk_gate("sys_pll1_out", "sys_pll1", base + 0x94, 11);
+
+	clks[IMX8MN_SYS_PLL1_40M] = imx_clk_fixed_factor("sys_pll1_40m", "sys_pll1_40m_cg", 1, 20);
+	clks[IMX8MN_SYS_PLL1_80M] = imx_clk_fixed_factor("sys_pll1_80m", "sys_pll1_80m_cg", 1, 10);
+	clks[IMX8MN_SYS_PLL1_100M] = imx_clk_fixed_factor("sys_pll1_100m", "sys_pll1_100m_cg", 1, 8);
+	clks[IMX8MN_SYS_PLL1_133M] = imx_clk_fixed_factor("sys_pll1_133m", "sys_pll1_133m_cg", 1, 6);
+	clks[IMX8MN_SYS_PLL1_160M] = imx_clk_fixed_factor("sys_pll1_160m", "sys_pll1_160m_cg", 1, 5);
+	clks[IMX8MN_SYS_PLL1_200M] = imx_clk_fixed_factor("sys_pll1_200m", "sys_pll1_200m_cg", 1, 4);
+	clks[IMX8MN_SYS_PLL1_266M] = imx_clk_fixed_factor("sys_pll1_266m", "sys_pll1_266m_cg", 1, 3);
+	clks[IMX8MN_SYS_PLL1_400M] = imx_clk_fixed_factor("sys_pll1_400m", "sys_pll1_400m_cg", 1, 2);
 	clks[IMX8MN_SYS_PLL1_800M] = imx_clk_fixed_factor("sys_pll1_800m", "sys_pll1_out", 1, 1);
 
-	clks[IMX8MN_SYS_PLL2_50M] = imx_clk_fixed_factor("sys_pll2_50m", "sys_pll2_out", 1, 20);
-	clks[IMX8MN_SYS_PLL2_100M] = imx_clk_fixed_factor("sys_pll2_100m", "sys_pll2_out", 1, 10);
-	clks[IMX8MN_SYS_PLL2_125M] = imx_clk_fixed_factor("sys_pll2_125m", "sys_pll2_out", 1, 8);
-	clks[IMX8MN_SYS_PLL2_166M] = imx_clk_fixed_factor("sys_pll2_166m", "sys_pll2_out", 1, 6);
-	clks[IMX8MN_SYS_PLL2_200M] = imx_clk_fixed_factor("sys_pll2_200m", "sys_pll2_out", 1, 5);
-	clks[IMX8MN_SYS_PLL2_250M] = imx_clk_fixed_factor("sys_pll2_250m", "sys_pll2_out", 1, 4);
-	clks[IMX8MN_SYS_PLL2_333M] = imx_clk_fixed_factor("sys_pll2_333m", "sys_pll2_out", 1, 3);
-	clks[IMX8MN_SYS_PLL2_500M] = imx_clk_fixed_factor("sys_pll2_500m", "sys_pll2_out", 1, 2);
+	/* SYS PLL2 fixed output */
+	clks[IMX8MN_SYS_PLL2_50M_CG] = imx_clk_gate("sys_pll2_50m_cg", "sys_pll2", base + 0x104, 27);
+	clks[IMX8MN_SYS_PLL2_100M_CG] = imx_clk_gate("sys_pll2_100m_cg", "sys_pll2", base + 0x104, 25);
+	clks[IMX8MN_SYS_PLL2_125M_CG] = imx_clk_gate("sys_pll2_125m_cg", "sys_pll2", base + 0x104, 23);
+	clks[IMX8MN_SYS_PLL2_166M_CG] = imx_clk_gate("sys_pll2_166m_cg", "sys_pll2", base + 0x104, 21);
+	clks[IMX8MN_SYS_PLL2_200M_CG] = imx_clk_gate("sys_pll2_200m_cg", "sys_pll2", base + 0x104, 19);
+	clks[IMX8MN_SYS_PLL2_250M_CG] = imx_clk_gate("sys_pll2_250m_cg", "sys_pll2", base + 0x104, 17);
+	clks[IMX8MN_SYS_PLL2_333M_CG] = imx_clk_gate("sys_pll2_333m_cg", "sys_pll2", base + 0x104, 15);
+	clks[IMX8MN_SYS_PLL2_500M_CG] = imx_clk_gate("sys_pll2_500m_cg", "sys_pll2", base + 0x104, 13);
+	clks[IMX8MN_SYS_PLL2_OUT] = imx_clk_gate("sys_pll2_out", "sys_pll2", base + 0x104, 11);
+
+	clks[IMX8MN_SYS_PLL2_50M] = imx_clk_fixed_factor("sys_pll2_50m", "sys_pll2_50m_cg", 1, 20);
+	clks[IMX8MN_SYS_PLL2_100M] = imx_clk_fixed_factor("sys_pll2_100m", "sys_pll2_100m_cg", 1, 10);
+	clks[IMX8MN_SYS_PLL2_125M] = imx_clk_fixed_factor("sys_pll2_125m", "sys_pll2_125m_cg", 1, 8);
+	clks[IMX8MN_SYS_PLL2_166M] = imx_clk_fixed_factor("sys_pll2_166m", "sys_pll2_166m_cg", 1, 6);
+	clks[IMX8MN_SYS_PLL2_200M] = imx_clk_fixed_factor("sys_pll2_200m", "sys_pll2_200m_cg", 1, 5);
+	clks[IMX8MN_SYS_PLL2_250M] = imx_clk_fixed_factor("sys_pll2_250m", "sys_pll2_250m_cg", 1, 4);
+	clks[IMX8MN_SYS_PLL2_333M] = imx_clk_fixed_factor("sys_pll2_333m", "sys_pll2_333m_cg", 1, 3);
+	clks[IMX8MN_SYS_PLL2_500M] = imx_clk_fixed_factor("sys_pll2_500m", "sys_pll2_500m_cg", 1, 2);
 	clks[IMX8MN_SYS_PLL2_1000M] = imx_clk_fixed_factor("sys_pll2_1000m", "sys_pll2_out", 1, 1);
 
 	np = dev->of_node;
diff --git a/drivers/clk/imx/clk-imx8mq.c b/drivers/clk/imx/clk-imx8mq.c
index 41fc9c63356e..5f10a606d836 100644
--- a/drivers/clk/imx/clk-imx8mq.c
+++ b/drivers/clk/imx/clk-imx8mq.c
@@ -34,10 +34,9 @@ static const char * const audio_pll1_bypass_sels[] = {"audio_pll1", "audio_pll1_
 static const char * const audio_pll2_bypass_sels[] = {"audio_pll2", "audio_pll2_ref_sel", };
 static const char * const video_pll1_bypass_sels[] = {"video_pll1", "video_pll1_ref_sel", };
 
-static const char * const sys1_pll_out_sels[] = {"sys1_pll1_ref_sel", };
-static const char * const sys2_pll_out_sels[] = {"sys1_pll1_ref_sel", "sys2_pll1_ref_sel", };
-static const char * const sys3_pll_out_sels[] = {"sys3_pll1_ref_sel", "sys2_pll1_ref_sel", };
+static const char * const sys3_pll_out_sels[] = {"sys3_pll1_ref_sel", };
 static const char * const dram_pll_out_sels[] = {"dram_pll1_ref_sel", };
+static const char * const video2_pll_out_sels[] = {"video2_pll1_ref_sel", };
 
 /* CCM ROOT */
 static const char * const imx8mq_a53_sels[] = {"osc_25m", "arm_pll_out", "sys2_pll_500m", "sys2_pll_1000m",
@@ -307,10 +306,9 @@ static int imx8mq_clocks_probe(struct platform_device *pdev)
 	clks[IMX8MQ_AUDIO_PLL1_REF_SEL] = imx_clk_mux("audio_pll1_ref_sel", base + 0x0, 16, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 	clks[IMX8MQ_AUDIO_PLL2_REF_SEL] = imx_clk_mux("audio_pll2_ref_sel", base + 0x8, 16, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 	clks[IMX8MQ_VIDEO_PLL1_REF_SEL] = imx_clk_mux("video_pll1_ref_sel", base + 0x10, 16, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
-	clks[IMX8MQ_SYS1_PLL1_REF_SEL]	= imx_clk_mux("sys1_pll1_ref_sel", base + 0x30, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
-	clks[IMX8MQ_SYS2_PLL1_REF_SEL]	= imx_clk_mux("sys2_pll1_ref_sel", base + 0x3c, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 	clks[IMX8MQ_SYS3_PLL1_REF_SEL]	= imx_clk_mux("sys3_pll1_ref_sel", base + 0x48, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 	clks[IMX8MQ_DRAM_PLL1_REF_SEL]	= imx_clk_mux("dram_pll1_ref_sel", base + 0x60, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
+	clks[IMX8MQ_VIDEO2_PLL1_REF_SEL] = imx_clk_mux("video2_pll1_ref_sel", base + 0x54, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
 
 	clks[IMX8MQ_ARM_PLL_REF_DIV]	= imx_clk_divider("arm_pll_ref_div", "arm_pll_ref_sel", base + 0x28, 5, 6);
 	clks[IMX8MQ_GPU_PLL_REF_DIV]	= imx_clk_divider("gpu_pll_ref_div", "gpu_pll_ref_sel", base + 0x18, 5, 6);
@@ -342,30 +340,53 @@ static int imx8mq_clocks_probe(struct platform_device *pdev)
 	clks[IMX8MQ_AUDIO_PLL2_OUT] = imx_clk_gate("audio_pll2_out", "audio_pll2_bypass", base + 0x8, 21);
 	clks[IMX8MQ_VIDEO_PLL1_OUT] = imx_clk_gate("video_pll1_out", "video_pll1_bypass", base + 0x10, 21);
 
-	clks[IMX8MQ_SYS1_PLL_OUT] = imx_clk_sccg_pll("sys1_pll_out", sys1_pll_out_sels, ARRAY_SIZE(sys1_pll_out_sels), 0, 0, 0, base + 0x30, CLK_IS_CRITICAL);
-	clks[IMX8MQ_SYS2_PLL_OUT] = imx_clk_sccg_pll("sys2_pll_out", sys2_pll_out_sels, ARRAY_SIZE(sys2_pll_out_sels), 0, 0, 1, base + 0x3c, CLK_IS_CRITICAL);
-	clks[IMX8MQ_SYS3_PLL_OUT] = imx_clk_sccg_pll("sys3_pll_out", sys3_pll_out_sels, ARRAY_SIZE(sys3_pll_out_sels), 0, 0, 1, base + 0x48, CLK_IS_CRITICAL);
+	clks[IMX8MQ_SYS1_PLL_OUT] = imx_clk_fixed("sys1_pll_out", 800000000);
+	clks[IMX8MQ_SYS2_PLL_OUT] = imx_clk_fixed("sys2_pll_out", 1000000000);
+	clks[IMX8MQ_SYS3_PLL_OUT] = imx_clk_sccg_pll("sys3_pll_out", sys3_pll_out_sels, ARRAY_SIZE(sys3_pll_out_sels), 0, 0, 0, base + 0x48, CLK_IS_CRITICAL);
 	clks[IMX8MQ_DRAM_PLL_OUT] = imx_clk_sccg_pll("dram_pll_out", dram_pll_out_sels, ARRAY_SIZE(dram_pll_out_sels), 0, 0, 0, base + 0x60, CLK_IS_CRITICAL);
-	/* SYS PLL fixed output */
-	clks[IMX8MQ_SYS1_PLL_40M] = imx_clk_fixed_factor("sys1_pll_40m", "sys1_pll_out", 1, 20);
-	clks[IMX8MQ_SYS1_PLL_80M] = imx_clk_fixed_factor("sys1_pll_80m", "sys1_pll_out", 1, 10);
-	clks[IMX8MQ_SYS1_PLL_100M] = imx_clk_fixed_factor("sys1_pll_100m", "sys1_pll_out", 1, 8);
-	clks[IMX8MQ_SYS1_PLL_133M] = imx_clk_fixed_factor("sys1_pll_133m", "sys1_pll_out", 1, 6);
-	clks[IMX8MQ_SYS1_PLL_160M] = imx_clk_fixed_factor("sys1_pll_160m", "sys1_pll_out", 1, 5);
-	clks[IMX8MQ_SYS1_PLL_200M] = imx_clk_fixed_factor("sys1_pll_200m", "sys1_pll_out", 1, 4);
-	clks[IMX8MQ_SYS1_PLL_266M] = imx_clk_fixed_factor("sys1_pll_266m", "sys1_pll_out", 1, 3);
-	clks[IMX8MQ_SYS1_PLL_400M] = imx_clk_fixed_factor("sys1_pll_400m", "sys1_pll_out", 1, 2);
-	clks[IMX8MQ_SYS1_PLL_800M] = imx_clk_fixed_factor("sys1_pll_800m", "sys1_pll_out", 1, 1);
-
-	clks[IMX8MQ_SYS2_PLL_50M] = imx_clk_fixed_factor("sys2_pll_50m", "sys2_pll_out", 1, 20);
-	clks[IMX8MQ_SYS2_PLL_100M] = imx_clk_fixed_factor("sys2_pll_100m", "sys2_pll_out", 1, 10);
-	clks[IMX8MQ_SYS2_PLL_125M] = imx_clk_fixed_factor("sys2_pll_125m", "sys2_pll_out", 1, 8);
-	clks[IMX8MQ_SYS2_PLL_166M] = imx_clk_fixed_factor("sys2_pll_166m", "sys2_pll_out", 1, 6);
-	clks[IMX8MQ_SYS2_PLL_200M] = imx_clk_fixed_factor("sys2_pll_200m", "sys2_pll_out", 1, 5);
-	clks[IMX8MQ_SYS2_PLL_250M] = imx_clk_fixed_factor("sys2_pll_250m", "sys2_pll_out", 1, 4);
-	clks[IMX8MQ_SYS2_PLL_333M] = imx_clk_fixed_factor("sys2_pll_333m", "sys2_pll_out", 1, 3);
-	clks[IMX8MQ_SYS2_PLL_500M] = imx_clk_fixed_factor("sys2_pll_500m", "sys2_pll_out", 1, 2);
-	clks[IMX8MQ_SYS2_PLL_1000M] = imx_clk_fixed_factor("sys2_pll_1000m", "sys2_pll_out", 1, 1);
+	clks[IMX8MQ_VIDEO2_PLL_OUT] = imx_clk_sccg_pll("video2_pll_out", video2_pll_out_sels, ARRAY_SIZE(video2_pll_out_sels), 0, 0, 0, base + 0x54, 0);
+
+	/* SYS PLL1 fixed output */
+	clks[IMX8MQ_SYS1_PLL_40M_CG] = imx_clk_gate("sys1_pll_40m_cg", "sys1_pll_out", base + 0x30, 9);
+	clks[IMX8MQ_SYS1_PLL_80M_CG] = imx_clk_gate("sys1_pll_80m_cg", "sys1_pll_out", base + 0x30, 11);
+	clks[IMX8MQ_SYS1_PLL_100M_CG] = imx_clk_gate("sys1_pll_100m_cg", "sys1_pll_out", base + 0x30, 13);
+	clks[IMX8MQ_SYS1_PLL_133M_CG] = imx_clk_gate("sys1_pll_133m_cg", "sys1_pll_out", base + 0x30, 15);
+	clks[IMX8MQ_SYS1_PLL_160M_CG] = imx_clk_gate("sys1_pll_160m_cg", "sys1_pll_out", base + 0x30, 17);
+	clks[IMX8MQ_SYS1_PLL_200M_CG] = imx_clk_gate("sys1_pll_200m_cg", "sys1_pll_out", base + 0x30, 19);
+	clks[IMX8MQ_SYS1_PLL_266M_CG] = imx_clk_gate("sys1_pll_266m_cg", "sys1_pll_out", base + 0x30, 21);
+	clks[IMX8MQ_SYS1_PLL_400M_CG] = imx_clk_gate("sys1_pll_400m_cg", "sys1_pll_out", base + 0x30, 23);
+	clks[IMX8MQ_SYS1_PLL_800M_CG] = imx_clk_gate("sys1_pll_800m_cg", "sys1_pll_out", base + 0x30, 25);
+
+	clks[IMX8MQ_SYS1_PLL_40M] = imx_clk_fixed_factor("sys1_pll_40m", "sys1_pll_40m_cg", 1, 20);
+	clks[IMX8MQ_SYS1_PLL_80M] = imx_clk_fixed_factor("sys1_pll_80m", "sys1_pll_80m_cg", 1, 10);
+	clks[IMX8MQ_SYS1_PLL_100M] = imx_clk_fixed_factor("sys1_pll_100m", "sys1_pll_100m_cg", 1, 8);
+	clks[IMX8MQ_SYS1_PLL_133M] = imx_clk_fixed_factor("sys1_pll_133m", "sys1_pll_133m_cg", 1, 6);
+	clks[IMX8MQ_SYS1_PLL_160M] = imx_clk_fixed_factor("sys1_pll_160m", "sys1_pll_160m_cg", 1, 5);
+	clks[IMX8MQ_SYS1_PLL_200M] = imx_clk_fixed_factor("sys1_pll_200m", "sys1_pll_200m_cg", 1, 4);
+	clks[IMX8MQ_SYS1_PLL_266M] = imx_clk_fixed_factor("sys1_pll_266m", "sys1_pll_266m_cg", 1, 3);
+	clks[IMX8MQ_SYS1_PLL_400M] = imx_clk_fixed_factor("sys1_pll_400m", "sys1_pll_400m_cg", 1, 2);
+	clks[IMX8MQ_SYS1_PLL_800M] = imx_clk_fixed_factor("sys1_pll_800m", "sys1_pll_800m_cg", 1, 1);
+
+	/* SYS PLL2 fixed output */
+	clks[IMX8MQ_SYS2_PLL_50M_CG] = imx_clk_gate("sys2_pll_50m_cg", "sys2_pll_out", base + 0x3c, 9);
+	clks[IMX8MQ_SYS2_PLL_100M_CG] = imx_clk_gate("sys2_pll_100m_cg", "sys2_pll_out", base + 0x3c, 11);
+	clks[IMX8MQ_SYS2_PLL_125M_CG] = imx_clk_gate("sys2_pll_125m_cg", "sys2_pll_out", base + 0x3c, 13);
+	clks[IMX8MQ_SYS2_PLL_166M_CG] = imx_clk_gate("sys2_pll_166m_cg", "sys2_pll_out", base + 0x3c, 15);
+	clks[IMX8MQ_SYS2_PLL_200M_CG] = imx_clk_gate("sys2_pll_200m_cg", "sys2_pll_out", base + 0x3c, 17);
+	clks[IMX8MQ_SYS2_PLL_250M_CG] = imx_clk_gate("sys2_pll_250m_cg", "sys2_pll_out", base + 0x3c, 19);
+	clks[IMX8MQ_SYS2_PLL_333M_CG] = imx_clk_gate("sys2_pll_333m_cg", "sys2_pll_out", base + 0x3c, 21);
+	clks[IMX8MQ_SYS2_PLL_500M_CG] = imx_clk_gate("sys2_pll_500m_cg", "sys2_pll_out", base + 0x3c, 23);
+	clks[IMX8MQ_SYS2_PLL_1000M_CG] = imx_clk_gate("sys2_pll_1000m_cg", "sys2_pll_out", base + 0x3c, 25);
+
+	clks[IMX8MQ_SYS2_PLL_50M] = imx_clk_fixed_factor("sys2_pll_50m", "sys2_pll_50m_cg", 1, 20);
+	clks[IMX8MQ_SYS2_PLL_100M] = imx_clk_fixed_factor("sys2_pll_100m", "sys2_pll_100m_cg", 1, 10);
+	clks[IMX8MQ_SYS2_PLL_125M] = imx_clk_fixed_factor("sys2_pll_125m", "sys2_pll_125m_cg", 1, 8);
+	clks[IMX8MQ_SYS2_PLL_166M] = imx_clk_fixed_factor("sys2_pll_166m", "sys2_pll_166m_cg", 1, 6);
+	clks[IMX8MQ_SYS2_PLL_200M] = imx_clk_fixed_factor("sys2_pll_200m", "sys2_pll_200m_cg", 1, 5);
+	clks[IMX8MQ_SYS2_PLL_250M] = imx_clk_fixed_factor("sys2_pll_250m", "sys2_pll_250m_cg", 1, 4);
+	clks[IMX8MQ_SYS2_PLL_333M] = imx_clk_fixed_factor("sys2_pll_333m", "sys2_pll_333m_cg", 1, 3);
+	clks[IMX8MQ_SYS2_PLL_500M] = imx_clk_fixed_factor("sys2_pll_500m", "sys2_pll_500m_cg", 1, 2);
+	clks[IMX8MQ_SYS2_PLL_1000M] = imx_clk_fixed_factor("sys2_pll_1000m", "sys2_pll_1000m_cg", 1, 1);
 
 	np = dev->of_node;
 	base = devm_platform_ioremap_resource(pdev, 0);
diff --git a/drivers/clk/imx/clk-pll14xx.c b/drivers/clk/imx/clk-pll14xx.c
index 7a815ec76aa5..3636c8035c7d 100644
--- a/drivers/clk/imx/clk-pll14xx.c
+++ b/drivers/clk/imx/clk-pll14xx.c
@@ -41,6 +41,38 @@ struct clk_pll14xx {
 
 #define to_clk_pll14xx(_hw) container_of(_hw, struct clk_pll14xx, hw)
 
+static const struct imx_pll14xx_rate_table imx_pll1416x_tbl[] = {
+	PLL_1416X_RATE(1800000000U, 225, 3, 0),
+	PLL_1416X_RATE(1600000000U, 200, 3, 0),
+	PLL_1416X_RATE(1500000000U, 375, 3, 1),
+	PLL_1416X_RATE(1400000000U, 350, 3, 1),
+	PLL_1416X_RATE(1200000000U, 300, 3, 1),
+	PLL_1416X_RATE(1000000000U, 250, 3, 1),
+	PLL_1416X_RATE(800000000U,  200, 3, 1),
+	PLL_1416X_RATE(750000000U,  250, 2, 2),
+	PLL_1416X_RATE(700000000U,  350, 3, 2),
+	PLL_1416X_RATE(600000000U,  300, 3, 2),
+};
+
+static const struct imx_pll14xx_rate_table imx_pll1443x_tbl[] = {
+	PLL_1443X_RATE(650000000U, 325, 3, 2, 0),
+	PLL_1443X_RATE(594000000U, 198, 2, 2, 0),
+	PLL_1443X_RATE(393216000U, 262, 2, 3, 9437),
+	PLL_1443X_RATE(361267200U, 361, 3, 3, 17511),
+};
+
+struct imx_pll14xx_clk imx_1443x_pll = {
+	.type = PLL_1443X,
+	.rate_table = imx_pll1443x_tbl,
+	.rate_count = ARRAY_SIZE(imx_pll1443x_tbl),
+};
+
+struct imx_pll14xx_clk imx_1416x_pll = {
+	.type = PLL_1416X,
+	.rate_table = imx_pll1416x_tbl,
+	.rate_count = ARRAY_SIZE(imx_pll1416x_tbl),
+};
+
 static const struct imx_pll14xx_rate_table *imx_get_pll_settings(
 		struct clk_pll14xx *pll, unsigned long rate)
 {
@@ -112,48 +144,22 @@ static unsigned long clk_pll1443x_recalc_rate(struct clk_hw *hw,
 	return fvco;
 }
 
-static inline bool clk_pll1416x_mp_change(const struct imx_pll14xx_rate_table *rate,
+static inline bool clk_pll14xx_mp_change(const struct imx_pll14xx_rate_table *rate,
 					  u32 pll_div)
 {
 	u32 old_mdiv, old_pdiv;
 
-	old_mdiv = (pll_div >> MDIV_SHIFT) & MDIV_MASK;
-	old_pdiv = (pll_div >> PDIV_SHIFT) & PDIV_MASK;
+	old_mdiv = (pll_div & MDIV_MASK) >> MDIV_SHIFT;
+	old_pdiv = (pll_div & PDIV_MASK) >> PDIV_SHIFT;
 
 	return rate->mdiv != old_mdiv || rate->pdiv != old_pdiv;
 }
 
-static inline bool clk_pll1443x_mpk_change(const struct imx_pll14xx_rate_table *rate,
-					  u32 pll_div_ctl0, u32 pll_div_ctl1)
-{
-	u32 old_mdiv, old_pdiv, old_kdiv;
-
-	old_mdiv = (pll_div_ctl0 >> MDIV_SHIFT) & MDIV_MASK;
-	old_pdiv = (pll_div_ctl0 >> PDIV_SHIFT) & PDIV_MASK;
-	old_kdiv = (pll_div_ctl1 >> KDIV_SHIFT) & KDIV_MASK;
-
-	return rate->mdiv != old_mdiv || rate->pdiv != old_pdiv ||
-		rate->kdiv != old_kdiv;
-}
-
-static inline bool clk_pll1443x_mp_change(const struct imx_pll14xx_rate_table *rate,
-					  u32 pll_div_ctl0, u32 pll_div_ctl1)
-{
-	u32 old_mdiv, old_pdiv, old_kdiv;
-
-	old_mdiv = (pll_div_ctl0 >> MDIV_SHIFT) & MDIV_MASK;
-	old_pdiv = (pll_div_ctl0 >> PDIV_SHIFT) & PDIV_MASK;
-	old_kdiv = (pll_div_ctl1 >> KDIV_SHIFT) & KDIV_MASK;
-
-	return rate->mdiv != old_mdiv || rate->pdiv != old_pdiv ||
-		rate->kdiv != old_kdiv;
-}
-
 static int clk_pll14xx_wait_lock(struct clk_pll14xx *pll)
 {
 	u32 val;
 
-	return readl_poll_timeout(pll->base, val, val & LOCK_TIMEOUT_US, 0,
+	return readl_poll_timeout(pll->base, val, val & LOCK_STATUS, 0,
 			LOCK_TIMEOUT_US);
 }
 
@@ -174,7 +180,7 @@ static int clk_pll1416x_set_rate(struct clk_hw *hw, unsigned long drate,
 
 	tmp = readl_relaxed(pll->base + 4);
 
-	if (!clk_pll1416x_mp_change(rate, tmp)) {
+	if (!clk_pll14xx_mp_change(rate, tmp)) {
 		tmp &= ~(SDIV_MASK) << SDIV_SHIFT;
 		tmp |= rate->sdiv << SDIV_SHIFT;
 		writel_relaxed(tmp, pll->base + 4);
@@ -239,13 +245,15 @@ static int clk_pll1443x_set_rate(struct clk_hw *hw, unsigned long drate,
 	}
 
 	tmp = readl_relaxed(pll->base + 4);
-	div_val = readl_relaxed(pll->base + 8);
 
-	if (!clk_pll1443x_mpk_change(rate, tmp, div_val)) {
+	if (!clk_pll14xx_mp_change(rate, tmp)) {
 		tmp &= ~(SDIV_MASK) << SDIV_SHIFT;
 		tmp |= rate->sdiv << SDIV_SHIFT;
 		writel_relaxed(tmp, pll->base + 4);
 
+		tmp = rate->kdiv << KDIV_SHIFT;
+		writel_relaxed(tmp, pll->base + 8);
+
 		return 0;
 	}
 
diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h
index f7a389a50401..bc5bb6ac8636 100644
--- a/drivers/clk/imx/clk.h
+++ b/drivers/clk/imx/clk.h
@@ -50,6 +50,9 @@ struct imx_pll14xx_clk {
 	int flags;
 };
 
+extern struct imx_pll14xx_clk imx_1416x_pll;
+extern struct imx_pll14xx_clk imx_1443x_pll;
+
 #define imx_clk_cpu(name, parent_name, div, mux, pll, step) \
 	imx_clk_hw_cpu(name, parent_name, div, mux, pll, step)->clk
 
diff --git a/drivers/clk/ingenic/Kconfig b/drivers/clk/ingenic/Kconfig
index 1cb489959a99..b4555b465ea6 100644
--- a/drivers/clk/ingenic/Kconfig
+++ b/drivers/clk/ingenic/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menu "Ingenic SoCs drivers"
-	depends on MIPS
+	depends on MIPS || COMPILE_TEST
 
 config INGENIC_CGU_COMMON
 	bool
@@ -45,6 +45,16 @@ config INGENIC_CGU_JZ4780
 
 	  If building for a JZ4780 SoC, you want to say Y here.
 
+config INGENIC_CGU_X1000
+	bool "Ingenic X1000 CGU driver"
+	default MACH_X1000
+	select INGENIC_CGU_COMMON
+	help
+	  Support the clocks provided by the CGU hardware on Ingenic X1000
+	  and compatible SoCs.
+
+	  If building for a X1000 SoC, you want to say Y here.
+
 config INGENIC_TCU_CLK
 	bool "Ingenic JZ47xx TCU clocks driver"
 	default MACH_INGENIC
diff --git a/drivers/clk/ingenic/Makefile b/drivers/clk/ingenic/Makefile
index 097220b05131..8b1dad9b74a7 100644
--- a/drivers/clk/ingenic/Makefile
+++ b/drivers/clk/ingenic/Makefile
@@ -4,4 +4,5 @@ obj-$(CONFIG_INGENIC_CGU_JZ4740)	+= jz4740-cgu.o
 obj-$(CONFIG_INGENIC_CGU_JZ4725B)	+= jz4725b-cgu.o
 obj-$(CONFIG_INGENIC_CGU_JZ4770)	+= jz4770-cgu.o
 obj-$(CONFIG_INGENIC_CGU_JZ4780)	+= jz4780-cgu.o
+obj-$(CONFIG_INGENIC_CGU_X1000)		+= x1000-cgu.o
 obj-$(CONFIG_INGENIC_TCU_CLK)		+= tcu.o
diff --git a/drivers/clk/ingenic/tcu.c b/drivers/clk/ingenic/tcu.c
index a1a5f9cb439e..ad7daa494fd4 100644
--- a/drivers/clk/ingenic/tcu.c
+++ b/drivers/clk/ingenic/tcu.c
@@ -358,8 +358,7 @@ static int __init ingenic_tcu_probe(struct device_node *np)
 		}
 	}
 
-	tcu->clocks = kzalloc(sizeof(*tcu->clocks) +
-			      sizeof(*tcu->clocks->hws) * TCU_CLK_COUNT,
+	tcu->clocks = kzalloc(struct_size(tcu->clocks, hws, TCU_CLK_COUNT),
 			      GFP_KERNEL);
 	if (!tcu->clocks) {
 		ret = -ENOMEM;
diff --git a/drivers/clk/ingenic/x1000-cgu.c b/drivers/clk/ingenic/x1000-cgu.c
new file mode 100644
index 000000000000..b22d87b3f555
--- /dev/null
+++ b/drivers/clk/ingenic/x1000-cgu.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * X1000 SoC CGU driver
+ * Copyright (c) 2019 Zhou Yanjie <zhouyanjie@zoho.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <dt-bindings/clock/x1000-cgu.h>
+#include "cgu.h"
+#include "pm.h"
+
+/* CGU register offsets */
+#define CGU_REG_CPCCR		0x00
+#define CGU_REG_APLL		0x10
+#define CGU_REG_MPLL		0x14
+#define CGU_REG_CLKGR		0x20
+#define CGU_REG_OPCR		0x24
+#define CGU_REG_DDRCDR		0x2c
+#define CGU_REG_MACCDR		0x54
+#define CGU_REG_I2SCDR		0x60
+#define CGU_REG_LPCDR		0x64
+#define CGU_REG_MSC0CDR		0x68
+#define CGU_REG_I2SCDR1		0x70
+#define CGU_REG_SSICDR		0x74
+#define CGU_REG_CIMCDR		0x7c
+#define CGU_REG_PCMCDR		0x84
+#define CGU_REG_MSC1CDR		0xa4
+#define CGU_REG_CMP_INTR	0xb0
+#define CGU_REG_CMP_INTRE	0xb4
+#define CGU_REG_DRCG		0xd0
+#define CGU_REG_CPCSR		0xd4
+#define CGU_REG_PCMCDR1		0xe0
+#define CGU_REG_MACPHYC		0xe8
+
+/* bits within the OPCR register */
+#define OPCR_SPENDN0		BIT(7)
+#define OPCR_SPENDN1		BIT(6)
+
+static struct ingenic_cgu *cgu;
+
+static const s8 pll_od_encoding[8] = {
+	0x0, 0x1, -1, 0x2, -1, -1, -1, 0x3,
+};
+
+static const struct ingenic_cgu_clk_info x1000_cgu_clocks[] = {
+
+	/* External clocks */
+
+	[X1000_CLK_EXCLK] = { "ext", CGU_CLK_EXT },
+	[X1000_CLK_RTCLK] = { "rtc", CGU_CLK_EXT },
+
+	/* PLLs */
+
+	[X1000_CLK_APLL] = {
+		"apll", CGU_CLK_PLL,
+		.parents = { X1000_CLK_EXCLK, -1, -1, -1 },
+		.pll = {
+			.reg = CGU_REG_APLL,
+			.m_shift = 24,
+			.m_bits = 7,
+			.m_offset = 1,
+			.n_shift = 18,
+			.n_bits = 5,
+			.n_offset = 1,
+			.od_shift = 16,
+			.od_bits = 2,
+			.od_max = 8,
+			.od_encoding = pll_od_encoding,
+			.bypass_bit = 9,
+			.enable_bit = 8,
+			.stable_bit = 10,
+		},
+	},
+
+	[X1000_CLK_MPLL] = {
+		"mpll", CGU_CLK_PLL,
+		.parents = { X1000_CLK_EXCLK, -1, -1, -1 },
+		.pll = {
+			.reg = CGU_REG_MPLL,
+			.m_shift = 24,
+			.m_bits = 7,
+			.m_offset = 1,
+			.n_shift = 18,
+			.n_bits = 5,
+			.n_offset = 1,
+			.od_shift = 16,
+			.od_bits = 2,
+			.od_max = 8,
+			.od_encoding = pll_od_encoding,
+			.bypass_bit = 6,
+			.enable_bit = 7,
+			.stable_bit = 0,
+		},
+	},
+
+	/* Muxes & dividers */
+
+	[X1000_CLK_SCLKA] = {
+		"sclk_a", CGU_CLK_MUX,
+		.parents = { -1, X1000_CLK_EXCLK, X1000_CLK_APLL, -1 },
+		.mux = { CGU_REG_CPCCR, 30, 2 },
+	},
+
+	[X1000_CLK_CPUMUX] = {
+		"cpu_mux", CGU_CLK_MUX,
+		.parents = { -1, X1000_CLK_SCLKA, X1000_CLK_MPLL, -1 },
+		.mux = { CGU_REG_CPCCR, 28, 2 },
+	},
+
+	[X1000_CLK_CPU] = {
+		"cpu", CGU_CLK_DIV,
+		.parents = { X1000_CLK_CPUMUX, -1, -1, -1 },
+		.div = { CGU_REG_CPCCR, 0, 1, 4, 22, -1, -1 },
+	},
+
+	[X1000_CLK_L2CACHE] = {
+		"l2cache", CGU_CLK_DIV,
+		.parents = { X1000_CLK_CPUMUX, -1, -1, -1 },
+		.div = { CGU_REG_CPCCR, 4, 1, 4, 22, -1, -1 },
+	},
+
+	[X1000_CLK_AHB0] = {
+		"ahb0", CGU_CLK_MUX | CGU_CLK_DIV,
+		.parents = { -1, X1000_CLK_SCLKA, X1000_CLK_MPLL, -1 },
+		.mux = { CGU_REG_CPCCR, 26, 2 },
+		.div = { CGU_REG_CPCCR, 8, 1, 4, 21, -1, -1 },
+	},
+
+	[X1000_CLK_AHB2PMUX] = {
+		"ahb2_apb_mux", CGU_CLK_MUX,
+		.parents = { -1, X1000_CLK_SCLKA, X1000_CLK_MPLL, -1 },
+		.mux = { CGU_REG_CPCCR, 24, 2 },
+	},
+
+	[X1000_CLK_AHB2] = {
+		"ahb2", CGU_CLK_DIV,
+		.parents = { X1000_CLK_AHB2PMUX, -1, -1, -1 },
+		.div = { CGU_REG_CPCCR, 12, 1, 4, 20, -1, -1 },
+	},
+
+	[X1000_CLK_PCLK] = {
+		"pclk", CGU_CLK_DIV,
+		.parents = { X1000_CLK_AHB2PMUX, -1, -1, -1 },
+		.div = { CGU_REG_CPCCR, 16, 1, 4, 20, -1, -1 },
+	},
+
+	[X1000_CLK_DDR] = {
+		"ddr", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE,
+		.parents = { -1, X1000_CLK_SCLKA, X1000_CLK_MPLL, -1 },
+		.mux = { CGU_REG_DDRCDR, 30, 2 },
+		.div = { CGU_REG_DDRCDR, 0, 1, 4, 29, 28, 27 },
+		.gate = { CGU_REG_CLKGR, 31 },
+	},
+
+	[X1000_CLK_MAC] = {
+		"mac", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE,
+		.parents = { X1000_CLK_SCLKA, X1000_CLK_MPLL},
+		.mux = { CGU_REG_MACCDR, 31, 1 },
+		.div = { CGU_REG_MACCDR, 0, 1, 8, 29, 28, 27 },
+		.gate = { CGU_REG_CLKGR, 25 },
+	},
+
+	[X1000_CLK_MSCMUX] = {
+		"msc_mux", CGU_CLK_MUX,
+		.parents = { X1000_CLK_SCLKA, X1000_CLK_MPLL},
+		.mux = { CGU_REG_MSC0CDR, 31, 1 },
+	},
+
+	[X1000_CLK_MSC0] = {
+		"msc0", CGU_CLK_DIV | CGU_CLK_GATE,
+		.parents = { X1000_CLK_MSCMUX, -1, -1, -1 },
+		.div = { CGU_REG_MSC0CDR, 0, 2, 8, 29, 28, 27 },
+		.gate = { CGU_REG_CLKGR, 4 },
+	},
+
+	[X1000_CLK_MSC1] = {
+		"msc1", CGU_CLK_DIV | CGU_CLK_GATE,
+		.parents = { X1000_CLK_MSCMUX, -1, -1, -1 },
+		.div = { CGU_REG_MSC1CDR, 0, 2, 8, 29, 28, 27 },
+		.gate = { CGU_REG_CLKGR, 5 },
+	},
+
+	[X1000_CLK_SSIPLL] = {
+		"ssi_pll", CGU_CLK_MUX | CGU_CLK_DIV,
+		.parents = { X1000_CLK_SCLKA, X1000_CLK_MPLL, -1, -1 },
+		.mux = { CGU_REG_SSICDR, 31, 1 },
+		.div = { CGU_REG_SSICDR, 0, 1, 8, 29, 28, 27 },
+	},
+
+	[X1000_CLK_SSIMUX] = {
+		"ssi_mux", CGU_CLK_MUX,
+		.parents = { X1000_CLK_EXCLK, X1000_CLK_SSIPLL, -1, -1 },
+		.mux = { CGU_REG_SSICDR, 30, 1 },
+	},
+
+	/* Gate-only clocks */
+
+	[X1000_CLK_SFC] = {
+		"sfc", CGU_CLK_GATE,
+		.parents = { X1000_CLK_SSIPLL, -1, -1, -1 },
+		.gate = { CGU_REG_CLKGR, 2 },
+	},
+
+	[X1000_CLK_I2C0] = {
+		"i2c0", CGU_CLK_GATE,
+		.parents = { X1000_CLK_PCLK, -1, -1, -1 },
+		.gate = { CGU_REG_CLKGR, 7 },
+	},
+
+	[X1000_CLK_I2C1] = {
+		"i2c1", CGU_CLK_GATE,
+		.parents = { X1000_CLK_PCLK, -1, -1, -1 },
+		.gate = { CGU_REG_CLKGR, 8 },
+	},
+
+	[X1000_CLK_I2C2] = {
+		"i2c2", CGU_CLK_GATE,
+		.parents = { X1000_CLK_PCLK, -1, -1, -1 },
+		.gate = { CGU_REG_CLKGR, 9 },
+	},
+
+	[X1000_CLK_UART0] = {
+		"uart0", CGU_CLK_GATE,
+		.parents = { X1000_CLK_EXCLK, -1, -1, -1 },
+		.gate = { CGU_REG_CLKGR, 14 },
+	},
+
+	[X1000_CLK_UART1] = {
+		"uart1", CGU_CLK_GATE,
+		.parents = { X1000_CLK_EXCLK, -1, -1, -1 },
+		.gate = { CGU_REG_CLKGR, 15 },
+	},
+
+	[X1000_CLK_UART2] = {
+		"uart2", CGU_CLK_GATE,
+		.parents = { X1000_CLK_EXCLK, -1, -1, -1 },
+		.gate = { CGU_REG_CLKGR, 16 },
+	},
+
+	[X1000_CLK_SSI] = {
+		"ssi", CGU_CLK_GATE,
+		.parents = { X1000_CLK_SSIMUX, -1, -1, -1 },
+		.gate = { CGU_REG_CLKGR, 19 },
+	},
+
+	[X1000_CLK_PDMA] = {
+		"pdma", CGU_CLK_GATE,
+		.parents = { X1000_CLK_EXCLK, -1, -1, -1 },
+		.gate = { CGU_REG_CLKGR, 21 },
+	},
+};
+
+static void __init x1000_cgu_init(struct device_node *np)
+{
+	int retval;
+
+	cgu = ingenic_cgu_new(x1000_cgu_clocks,
+			      ARRAY_SIZE(x1000_cgu_clocks), np);
+	if (!cgu) {
+		pr_err("%s: failed to initialise CGU\n", __func__);
+		return;
+	}
+
+	retval = ingenic_cgu_register_clocks(cgu);
+	if (retval) {
+		pr_err("%s: failed to register CGU Clocks\n", __func__);
+		return;
+	}
+
+	ingenic_cgu_register_syscore_ops(cgu);
+}
+CLK_OF_DECLARE(x1000_cgu, "ingenic,x1000-cgu", x1000_cgu_init);
diff --git a/drivers/clk/mediatek/clk-mt2712.c b/drivers/clk/mediatek/clk-mt2712.c
index 354c26f663b4..a3bd9a107209 100644
--- a/drivers/clk/mediatek/clk-mt2712.c
+++ b/drivers/clk/mediatek/clk-mt2712.c
@@ -1306,9 +1306,8 @@ static int clk_mt2712_top_probe(struct platform_device *pdev)
 	int r, i;
 	struct device_node *node = pdev->dev.of_node;
 	void __iomem *base;
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base)) {
 		pr_err("%s(): ioremap failed\n", __func__);
 		return PTR_ERR(base);
@@ -1394,9 +1393,8 @@ static int clk_mt2712_mcu_probe(struct platform_device *pdev)
 	int r;
 	struct device_node *node = pdev->dev.of_node;
 	void __iomem *base;
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base)) {
 		pr_err("%s(): ioremap failed\n", __func__);
 		return PTR_ERR(base);
diff --git a/drivers/clk/mediatek/clk-mt6779.c b/drivers/clk/mediatek/clk-mt6779.c
index 608a9a6621a3..9766cccf5844 100644
--- a/drivers/clk/mediatek/clk-mt6779.c
+++ b/drivers/clk/mediatek/clk-mt6779.c
@@ -1225,12 +1225,11 @@ static int clk_mt6779_apmixed_probe(struct platform_device *pdev)
 
 static int clk_mt6779_top_probe(struct platform_device *pdev)
 {
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	void __iomem *base;
 	struct clk_onecell_data *clk_data;
 	struct device_node *node = pdev->dev.of_node;
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
diff --git a/drivers/clk/mediatek/clk-mt6797.c b/drivers/clk/mediatek/clk-mt6797.c
index f62b0428da0e..f35389a11af1 100644
--- a/drivers/clk/mediatek/clk-mt6797.c
+++ b/drivers/clk/mediatek/clk-mt6797.c
@@ -385,9 +385,8 @@ static int mtk_topckgen_init(struct platform_device *pdev)
 	struct clk_onecell_data *clk_data;
 	void __iomem *base;
 	struct device_node *node = pdev->dev.of_node;
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
diff --git a/drivers/clk/mediatek/clk-mt7622.c b/drivers/clk/mediatek/clk-mt7622.c
index 8190dab179d7..ef5947e15c75 100644
--- a/drivers/clk/mediatek/clk-mt7622.c
+++ b/drivers/clk/mediatek/clk-mt7622.c
@@ -614,9 +614,8 @@ static int mtk_topckgen_init(struct platform_device *pdev)
 	struct clk_onecell_data *clk_data;
 	void __iomem *base;
 	struct device_node *node = pdev->dev.of_node;
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
@@ -695,9 +694,8 @@ static int mtk_pericfg_init(struct platform_device *pdev)
 	void __iomem *base;
 	int r;
 	struct device_node *node = pdev->dev.of_node;
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
diff --git a/drivers/clk/mediatek/clk-mt7629.c b/drivers/clk/mediatek/clk-mt7629.c
index d6233994af5a..b73bdf152836 100644
--- a/drivers/clk/mediatek/clk-mt7629.c
+++ b/drivers/clk/mediatek/clk-mt7629.c
@@ -574,9 +574,8 @@ static int mtk_topckgen_init(struct platform_device *pdev)
 	struct clk_onecell_data *clk_data;
 	void __iomem *base;
 	struct device_node *node = pdev->dev.of_node;
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
@@ -626,9 +625,8 @@ static int mtk_pericfg_init(struct platform_device *pdev)
 	void __iomem *base;
 	int r;
 	struct device_node *node = pdev->dev.of_node;
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c
index 51c8d5c9a030..5046852eb0fd 100644
--- a/drivers/clk/mediatek/clk-mt8183.c
+++ b/drivers/clk/mediatek/clk-mt8183.c
@@ -1189,11 +1189,10 @@ CLK_OF_DECLARE_DRIVER(mt8183_topckgen, "mediatek,mt8183-topckgen",
 
 static int clk_mt8183_top_probe(struct platform_device *pdev)
 {
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	void __iomem *base;
 	struct device_node *node = pdev->dev.of_node;
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
@@ -1262,9 +1261,8 @@ static int clk_mt8183_mcu_probe(struct platform_device *pdev)
 	struct clk_onecell_data *clk_data;
 	struct device_node *node = pdev->dev.of_node;
 	void __iomem *base;
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
diff --git a/drivers/clk/meson/axg-audio.c b/drivers/clk/meson/axg-audio.c
index 18b23cdf679c..53715e36326c 100644
--- a/drivers/clk/meson/axg-audio.c
+++ b/drivers/clk/meson/axg-audio.c
@@ -20,12 +20,7 @@
 #include "clk-phase.h"
 #include "sclk-div.h"
 
-#define AUD_MST_IN_COUNT	8
-#define AUD_SLV_SCLK_COUNT	10
-#define AUD_SLV_LRCLK_COUNT	10
-
-#define AUD_GATE(_name, _reg, _bit, _phws, _iflags)			\
-struct clk_regmap aud_##_name = {					\
+#define AUD_GATE(_name, _reg, _bit, _pname, _iflags) {			\
 	.data = &(struct clk_regmap_gate_data){				\
 		.offset = (_reg),					\
 		.bit_idx = (_bit),					\
@@ -33,14 +28,13 @@ struct clk_regmap aud_##_name = {					\
 	.hw.init = &(struct clk_init_data) {				\
 		.name = "aud_"#_name,					\
 		.ops = &clk_regmap_gate_ops,				\
-		.parent_hws = (const struct clk_hw *[]) { &_phws.hw },	\
+		.parent_names = (const char *[]){ #_pname },		\
 		.num_parents = 1,					\
 		.flags = CLK_DUTY_CYCLE_PARENT | (_iflags),		\
 	},								\
 }
 
-#define AUD_MUX(_name, _reg, _mask, _shift, _dflags, _pdata, _iflags)	\
-struct clk_regmap aud_##_name = {					\
+#define AUD_MUX(_name, _reg, _mask, _shift, _dflags, _pdata, _iflags) {	\
 	.data = &(struct clk_regmap_mux_data){				\
 		.offset = (_reg),					\
 		.mask = (_mask),					\
@@ -56,8 +50,7 @@ struct clk_regmap aud_##_name = {					\
 	},								\
 }
 
-#define AUD_DIV(_name, _reg, _shift, _width, _dflags, _phws, _iflags)	\
-struct clk_regmap aud_##_name = {					\
+#define AUD_DIV(_name, _reg, _shift, _width, _dflags, _pname, _iflags) { \
 	.data = &(struct clk_regmap_div_data){				\
 		.offset = (_reg),					\
 		.shift = (_shift),					\
@@ -67,137 +60,27 @@ struct clk_regmap aud_##_name = {					\
 	.hw.init = &(struct clk_init_data){				\
 		.name = "aud_"#_name,					\
 		.ops = &clk_regmap_divider_ops,				\
-		.parent_hws = (const struct clk_hw *[]) { &_phws.hw },	\
+		.parent_names = (const char *[]){ #_pname },		\
 		.num_parents = 1,					\
 		.flags = (_iflags),					\
 	},								\
 }
 
-#define AUD_PCLK_GATE(_name, _bit)				\
-struct clk_regmap aud_##_name = {					\
+#define AUD_PCLK_GATE(_name, _reg, _bit) {				\
 	.data = &(struct clk_regmap_gate_data){				\
-		.offset = (AUDIO_CLK_GATE_EN),				\
+		.offset = (_reg),					\
 		.bit_idx = (_bit),					\
 	},								\
 	.hw.init = &(struct clk_init_data) {				\
 		.name = "aud_"#_name,					\
 		.ops = &clk_regmap_gate_ops,				\
-		.parent_data = &(const struct clk_parent_data) {	\
-			.fw_name = "pclk",				\
-		},							\
+		.parent_names = (const char *[]){ "aud_top" },		\
 		.num_parents = 1,					\
 	},								\
 }
-/* Audio peripheral clocks */
-static AUD_PCLK_GATE(ddr_arb,	   0);
-static AUD_PCLK_GATE(pdm,	   1);
-static AUD_PCLK_GATE(tdmin_a,	   2);
-static AUD_PCLK_GATE(tdmin_b,	   3);
-static AUD_PCLK_GATE(tdmin_c,	   4);
-static AUD_PCLK_GATE(tdmin_lb,	   5);
-static AUD_PCLK_GATE(tdmout_a,	   6);
-static AUD_PCLK_GATE(tdmout_b,	   7);
-static AUD_PCLK_GATE(tdmout_c,	   8);
-static AUD_PCLK_GATE(frddr_a,	   9);
-static AUD_PCLK_GATE(frddr_b,	   10);
-static AUD_PCLK_GATE(frddr_c,	   11);
-static AUD_PCLK_GATE(toddr_a,	   12);
-static AUD_PCLK_GATE(toddr_b,	   13);
-static AUD_PCLK_GATE(toddr_c,	   14);
-static AUD_PCLK_GATE(loopback,	   15);
-static AUD_PCLK_GATE(spdifin,	   16);
-static AUD_PCLK_GATE(spdifout,	   17);
-static AUD_PCLK_GATE(resample,	   18);
-static AUD_PCLK_GATE(power_detect, 19);
-static AUD_PCLK_GATE(spdifout_b,   21);
-
-/* Audio Master Clocks */
-static const struct clk_parent_data mst_mux_parent_data[] = {
-	{ .fw_name = "mst_in0", },
-	{ .fw_name = "mst_in1", },
-	{ .fw_name = "mst_in2", },
-	{ .fw_name = "mst_in3", },
-	{ .fw_name = "mst_in4", },
-	{ .fw_name = "mst_in5", },
-	{ .fw_name = "mst_in6", },
-	{ .fw_name = "mst_in7", },
-};
-
-#define AUD_MST_MUX(_name, _reg, _flag)				\
-	AUD_MUX(_name##_sel, _reg, 0x7, 24, _flag,		\
-		mst_mux_parent_data, 0)
-
-#define AUD_MST_MCLK_MUX(_name, _reg)				\
-	AUD_MST_MUX(_name, _reg, CLK_MUX_ROUND_CLOSEST)
-
-#define AUD_MST_SYS_MUX(_name, _reg)				\
-	AUD_MST_MUX(_name, _reg, 0)
-
-static AUD_MST_MCLK_MUX(mst_a_mclk,   AUDIO_MCLK_A_CTRL);
-static AUD_MST_MCLK_MUX(mst_b_mclk,   AUDIO_MCLK_B_CTRL);
-static AUD_MST_MCLK_MUX(mst_c_mclk,   AUDIO_MCLK_C_CTRL);
-static AUD_MST_MCLK_MUX(mst_d_mclk,   AUDIO_MCLK_D_CTRL);
-static AUD_MST_MCLK_MUX(mst_e_mclk,   AUDIO_MCLK_E_CTRL);
-static AUD_MST_MCLK_MUX(mst_f_mclk,   AUDIO_MCLK_F_CTRL);
-static AUD_MST_MCLK_MUX(spdifout_clk, AUDIO_CLK_SPDIFOUT_CTRL);
-static AUD_MST_MCLK_MUX(pdm_dclk,     AUDIO_CLK_PDMIN_CTRL0);
-static AUD_MST_SYS_MUX(spdifin_clk,   AUDIO_CLK_SPDIFIN_CTRL);
-static AUD_MST_SYS_MUX(pdm_sysclk,    AUDIO_CLK_PDMIN_CTRL1);
-static AUD_MST_MCLK_MUX(spdifout_b_clk, AUDIO_CLK_SPDIFOUT_B_CTRL);
-
-#define AUD_MST_DIV(_name, _reg, _flag)				\
-	AUD_DIV(_name##_div, _reg, 0, 16, _flag,		\
-		    aud_##_name##_sel, CLK_SET_RATE_PARENT)	\
-
-#define AUD_MST_MCLK_DIV(_name, _reg)				\
-	AUD_MST_DIV(_name, _reg, CLK_DIVIDER_ROUND_CLOSEST)
-
-#define AUD_MST_SYS_DIV(_name, _reg)				\
-	AUD_MST_DIV(_name, _reg, 0)
-
-static AUD_MST_MCLK_DIV(mst_a_mclk,   AUDIO_MCLK_A_CTRL);
-static AUD_MST_MCLK_DIV(mst_b_mclk,   AUDIO_MCLK_B_CTRL);
-static AUD_MST_MCLK_DIV(mst_c_mclk,   AUDIO_MCLK_C_CTRL);
-static AUD_MST_MCLK_DIV(mst_d_mclk,   AUDIO_MCLK_D_CTRL);
-static AUD_MST_MCLK_DIV(mst_e_mclk,   AUDIO_MCLK_E_CTRL);
-static AUD_MST_MCLK_DIV(mst_f_mclk,   AUDIO_MCLK_F_CTRL);
-static AUD_MST_MCLK_DIV(spdifout_clk, AUDIO_CLK_SPDIFOUT_CTRL);
-static AUD_MST_MCLK_DIV(pdm_dclk,     AUDIO_CLK_PDMIN_CTRL0);
-static AUD_MST_SYS_DIV(spdifin_clk,   AUDIO_CLK_SPDIFIN_CTRL);
-static AUD_MST_SYS_DIV(pdm_sysclk,    AUDIO_CLK_PDMIN_CTRL1);
-static AUD_MST_MCLK_DIV(spdifout_b_clk, AUDIO_CLK_SPDIFOUT_B_CTRL);
-
-#define AUD_MST_MCLK_GATE(_name, _reg)				\
-	AUD_GATE(_name, _reg, 31,  aud_##_name##_div,		\
-		 CLK_SET_RATE_PARENT)
-
-static AUD_MST_MCLK_GATE(mst_a_mclk,   AUDIO_MCLK_A_CTRL);
-static AUD_MST_MCLK_GATE(mst_b_mclk,   AUDIO_MCLK_B_CTRL);
-static AUD_MST_MCLK_GATE(mst_c_mclk,   AUDIO_MCLK_C_CTRL);
-static AUD_MST_MCLK_GATE(mst_d_mclk,   AUDIO_MCLK_D_CTRL);
-static AUD_MST_MCLK_GATE(mst_e_mclk,   AUDIO_MCLK_E_CTRL);
-static AUD_MST_MCLK_GATE(mst_f_mclk,   AUDIO_MCLK_F_CTRL);
-static AUD_MST_MCLK_GATE(spdifout_clk, AUDIO_CLK_SPDIFOUT_CTRL);
-static AUD_MST_MCLK_GATE(spdifin_clk,  AUDIO_CLK_SPDIFIN_CTRL);
-static AUD_MST_MCLK_GATE(pdm_dclk,     AUDIO_CLK_PDMIN_CTRL0);
-static AUD_MST_MCLK_GATE(pdm_sysclk,   AUDIO_CLK_PDMIN_CTRL1);
-static AUD_MST_MCLK_GATE(spdifout_b_clk, AUDIO_CLK_SPDIFOUT_B_CTRL);
-
-/* Sample Clocks */
-#define AUD_MST_SCLK_PRE_EN(_name, _reg)			\
-	AUD_GATE(mst_##_name##_sclk_pre_en, _reg, 31,		\
-		 aud_mst_##_name##_mclk, 0)
-
-static AUD_MST_SCLK_PRE_EN(a, AUDIO_MST_A_SCLK_CTRL0);
-static AUD_MST_SCLK_PRE_EN(b, AUDIO_MST_B_SCLK_CTRL0);
-static AUD_MST_SCLK_PRE_EN(c, AUDIO_MST_C_SCLK_CTRL0);
-static AUD_MST_SCLK_PRE_EN(d, AUDIO_MST_D_SCLK_CTRL0);
-static AUD_MST_SCLK_PRE_EN(e, AUDIO_MST_E_SCLK_CTRL0);
-static AUD_MST_SCLK_PRE_EN(f, AUDIO_MST_F_SCLK_CTRL0);
 
 #define AUD_SCLK_DIV(_name, _reg, _div_shift, _div_width,		\
-			 _hi_shift, _hi_width, _phws, _iflags)		\
-struct clk_regmap aud_##_name = {					\
+		     _hi_shift, _hi_width, _pname, _iflags) {		\
 	.data = &(struct meson_sclk_div_data) {				\
 		.div = {						\
 			.reg_off = (_reg),				\
@@ -213,38 +96,14 @@ struct clk_regmap aud_##_name = {					\
 	.hw.init = &(struct clk_init_data) {				\
 		.name = "aud_"#_name,					\
 		.ops = &meson_sclk_div_ops,				\
-		.parent_hws = (const struct clk_hw *[]) { &_phws.hw },	\
+		.parent_names = (const char *[]){ #_pname },		\
 		.num_parents = 1,					\
 		.flags = (_iflags),					\
 	},								\
 }
 
-#define AUD_MST_SCLK_DIV(_name, _reg)					\
-	AUD_SCLK_DIV(mst_##_name##_sclk_div, _reg, 20, 10, 0, 0,	\
-		     aud_mst_##_name##_sclk_pre_en,			\
-		     CLK_SET_RATE_PARENT)
-
-static AUD_MST_SCLK_DIV(a, AUDIO_MST_A_SCLK_CTRL0);
-static AUD_MST_SCLK_DIV(b, AUDIO_MST_B_SCLK_CTRL0);
-static AUD_MST_SCLK_DIV(c, AUDIO_MST_C_SCLK_CTRL0);
-static AUD_MST_SCLK_DIV(d, AUDIO_MST_D_SCLK_CTRL0);
-static AUD_MST_SCLK_DIV(e, AUDIO_MST_E_SCLK_CTRL0);
-static AUD_MST_SCLK_DIV(f, AUDIO_MST_F_SCLK_CTRL0);
-
-#define AUD_MST_SCLK_POST_EN(_name, _reg)				\
-	AUD_GATE(mst_##_name##_sclk_post_en, _reg, 30,			\
-		 aud_mst_##_name##_sclk_div, CLK_SET_RATE_PARENT)
-
-static AUD_MST_SCLK_POST_EN(a, AUDIO_MST_A_SCLK_CTRL0);
-static AUD_MST_SCLK_POST_EN(b, AUDIO_MST_B_SCLK_CTRL0);
-static AUD_MST_SCLK_POST_EN(c, AUDIO_MST_C_SCLK_CTRL0);
-static AUD_MST_SCLK_POST_EN(d, AUDIO_MST_D_SCLK_CTRL0);
-static AUD_MST_SCLK_POST_EN(e, AUDIO_MST_E_SCLK_CTRL0);
-static AUD_MST_SCLK_POST_EN(f, AUDIO_MST_F_SCLK_CTRL0);
-
 #define AUD_TRIPHASE(_name, _reg, _width, _shift0, _shift1, _shift2,	\
-			 _phws, _iflags)				\
-struct clk_regmap aud_##_name = {					\
+		     _pname, _iflags) {					\
 	.data = &(struct meson_clk_triphase_data) {			\
 		.ph0 = {						\
 			.reg_off = (_reg),				\
@@ -265,52 +124,91 @@ struct clk_regmap aud_##_name = {					\
 	.hw.init = &(struct clk_init_data) {				\
 		.name = "aud_"#_name,					\
 		.ops = &meson_clk_triphase_ops,				\
-		.parent_hws = (const struct clk_hw *[]) { &_phws.hw },	\
+		.parent_names = (const char *[]){ #_pname },		\
 		.num_parents = 1,					\
 		.flags = CLK_DUTY_CYCLE_PARENT | (_iflags),		\
 	},								\
 }
 
+#define AUD_PHASE(_name, _reg, _width, _shift, _pname, _iflags) {	\
+	.data = &(struct meson_clk_phase_data) {			\
+		.ph = {							\
+			.reg_off = (_reg),				\
+			.shift   = (_shift),				\
+			.width   = (_width),				\
+		},							\
+	},								\
+	.hw.init = &(struct clk_init_data) {				\
+		.name = "aud_"#_name,					\
+		.ops = &meson_clk_phase_ops,				\
+		.parent_names = (const char *[]){ #_pname },		\
+		.num_parents = 1,					\
+		.flags = (_iflags),					\
+	},								\
+}
+
+/* Audio Master Clocks */
+static const struct clk_parent_data mst_mux_parent_data[] = {
+	{ .fw_name = "mst_in0", },
+	{ .fw_name = "mst_in1", },
+	{ .fw_name = "mst_in2", },
+	{ .fw_name = "mst_in3", },
+	{ .fw_name = "mst_in4", },
+	{ .fw_name = "mst_in5", },
+	{ .fw_name = "mst_in6", },
+	{ .fw_name = "mst_in7", },
+};
+
+#define AUD_MST_MUX(_name, _reg, _flag)					\
+	AUD_MUX(_name##_sel, _reg, 0x7, 24, _flag,			\
+		mst_mux_parent_data, 0)
+#define AUD_MST_DIV(_name, _reg, _flag)					\
+	AUD_DIV(_name##_div, _reg, 0, 16, _flag,			\
+		aud_##_name##_sel, CLK_SET_RATE_PARENT)
+#define AUD_MST_MCLK_GATE(_name, _reg)					\
+	AUD_GATE(_name, _reg, 31, aud_##_name##_div,			\
+		 CLK_SET_RATE_PARENT)
+
+#define AUD_MST_MCLK_MUX(_name, _reg)					\
+	AUD_MST_MUX(_name, _reg, CLK_MUX_ROUND_CLOSEST)
+#define AUD_MST_MCLK_DIV(_name, _reg)					\
+	AUD_MST_DIV(_name, _reg, CLK_DIVIDER_ROUND_CLOSEST)
+
+#define AUD_MST_SYS_MUX(_name, _reg)					\
+	AUD_MST_MUX(_name, _reg, 0)
+#define AUD_MST_SYS_DIV(_name, _reg)					\
+	AUD_MST_DIV(_name, _reg, 0)
+
+/* Sample Clocks */
+#define AUD_MST_SCLK_PRE_EN(_name, _reg)				\
+	AUD_GATE(mst_##_name##_sclk_pre_en, _reg, 31,			\
+		 aud_mst_##_name##_mclk, 0)
+#define AUD_MST_SCLK_DIV(_name, _reg)					\
+	AUD_SCLK_DIV(mst_##_name##_sclk_div, _reg, 20, 10, 0, 0,	\
+		     aud_mst_##_name##_sclk_pre_en,			\
+		     CLK_SET_RATE_PARENT)
+#define AUD_MST_SCLK_POST_EN(_name, _reg)				\
+	AUD_GATE(mst_##_name##_sclk_post_en, _reg, 30,			\
+		 aud_mst_##_name##_sclk_div, CLK_SET_RATE_PARENT)
 #define AUD_MST_SCLK(_name, _reg)					\
 	AUD_TRIPHASE(mst_##_name##_sclk, _reg, 1, 0, 2, 4,		\
 		     aud_mst_##_name##_sclk_post_en, CLK_SET_RATE_PARENT)
 
-static AUD_MST_SCLK(a, AUDIO_MST_A_SCLK_CTRL1);
-static AUD_MST_SCLK(b, AUDIO_MST_B_SCLK_CTRL1);
-static AUD_MST_SCLK(c, AUDIO_MST_C_SCLK_CTRL1);
-static AUD_MST_SCLK(d, AUDIO_MST_D_SCLK_CTRL1);
-static AUD_MST_SCLK(e, AUDIO_MST_E_SCLK_CTRL1);
-static AUD_MST_SCLK(f, AUDIO_MST_F_SCLK_CTRL1);
-
 #define AUD_MST_LRCLK_DIV(_name, _reg)					\
 	AUD_SCLK_DIV(mst_##_name##_lrclk_div, _reg, 0, 10, 10, 10,	\
-		     aud_mst_##_name##_sclk_post_en, 0)			\
-
-static AUD_MST_LRCLK_DIV(a, AUDIO_MST_A_SCLK_CTRL0);
-static AUD_MST_LRCLK_DIV(b, AUDIO_MST_B_SCLK_CTRL0);
-static AUD_MST_LRCLK_DIV(c, AUDIO_MST_C_SCLK_CTRL0);
-static AUD_MST_LRCLK_DIV(d, AUDIO_MST_D_SCLK_CTRL0);
-static AUD_MST_LRCLK_DIV(e, AUDIO_MST_E_SCLK_CTRL0);
-static AUD_MST_LRCLK_DIV(f, AUDIO_MST_F_SCLK_CTRL0);
-
+		     aud_mst_##_name##_sclk_post_en, 0)
 #define AUD_MST_LRCLK(_name, _reg)					\
 	AUD_TRIPHASE(mst_##_name##_lrclk, _reg, 1, 1, 3, 5,		\
 		     aud_mst_##_name##_lrclk_div, CLK_SET_RATE_PARENT)
 
-static AUD_MST_LRCLK(a, AUDIO_MST_A_SCLK_CTRL1);
-static AUD_MST_LRCLK(b, AUDIO_MST_B_SCLK_CTRL1);
-static AUD_MST_LRCLK(c, AUDIO_MST_C_SCLK_CTRL1);
-static AUD_MST_LRCLK(d, AUDIO_MST_D_SCLK_CTRL1);
-static AUD_MST_LRCLK(e, AUDIO_MST_E_SCLK_CTRL1);
-static AUD_MST_LRCLK(f, AUDIO_MST_F_SCLK_CTRL1);
-
+/* TDM bit clock sources */
 static const struct clk_parent_data tdm_sclk_parent_data[] = {
-	{ .hw = &aud_mst_a_sclk.hw, },
-	{ .hw = &aud_mst_b_sclk.hw, },
-	{ .hw = &aud_mst_c_sclk.hw, },
-	{ .hw = &aud_mst_d_sclk.hw, },
-	{ .hw = &aud_mst_e_sclk.hw, },
-	{ .hw = &aud_mst_f_sclk.hw, },
+	{ .name = "aud_mst_a_sclk", .index = -1, },
+	{ .name = "aud_mst_b_sclk", .index = -1, },
+	{ .name = "aud_mst_c_sclk", .index = -1, },
+	{ .name = "aud_mst_d_sclk", .index = -1, },
+	{ .name = "aud_mst_e_sclk", .index = -1, },
+	{ .name = "aud_mst_f_sclk", .index = -1, },
 	{ .fw_name = "slv_sclk0", },
 	{ .fw_name = "slv_sclk1", },
 	{ .fw_name = "slv_sclk2", },
@@ -323,78 +221,14 @@ static const struct clk_parent_data tdm_sclk_parent_data[] = {
 	{ .fw_name = "slv_sclk9", },
 };
 
-#define AUD_TDM_SCLK_MUX(_name, _reg)				\
-	AUD_MUX(tdm##_name##_sclk_sel, _reg, 0xf, 24,		\
-		    CLK_MUX_ROUND_CLOSEST,			\
-		    tdm_sclk_parent_data, 0)
-
-static AUD_TDM_SCLK_MUX(in_a,  AUDIO_CLK_TDMIN_A_CTRL);
-static AUD_TDM_SCLK_MUX(in_b,  AUDIO_CLK_TDMIN_B_CTRL);
-static AUD_TDM_SCLK_MUX(in_c,  AUDIO_CLK_TDMIN_C_CTRL);
-static AUD_TDM_SCLK_MUX(in_lb, AUDIO_CLK_TDMIN_LB_CTRL);
-static AUD_TDM_SCLK_MUX(out_a, AUDIO_CLK_TDMOUT_A_CTRL);
-static AUD_TDM_SCLK_MUX(out_b, AUDIO_CLK_TDMOUT_B_CTRL);
-static AUD_TDM_SCLK_MUX(out_c, AUDIO_CLK_TDMOUT_C_CTRL);
-
-#define AUD_TDM_SCLK_PRE_EN(_name, _reg)				\
-	AUD_GATE(tdm##_name##_sclk_pre_en, _reg, 31,			\
-		 aud_tdm##_name##_sclk_sel, CLK_SET_RATE_PARENT)
-
-static AUD_TDM_SCLK_PRE_EN(in_a,  AUDIO_CLK_TDMIN_A_CTRL);
-static AUD_TDM_SCLK_PRE_EN(in_b,  AUDIO_CLK_TDMIN_B_CTRL);
-static AUD_TDM_SCLK_PRE_EN(in_c,  AUDIO_CLK_TDMIN_C_CTRL);
-static AUD_TDM_SCLK_PRE_EN(in_lb, AUDIO_CLK_TDMIN_LB_CTRL);
-static AUD_TDM_SCLK_PRE_EN(out_a, AUDIO_CLK_TDMOUT_A_CTRL);
-static AUD_TDM_SCLK_PRE_EN(out_b, AUDIO_CLK_TDMOUT_B_CTRL);
-static AUD_TDM_SCLK_PRE_EN(out_c, AUDIO_CLK_TDMOUT_C_CTRL);
-
-#define AUD_TDM_SCLK_POST_EN(_name, _reg)				\
-	AUD_GATE(tdm##_name##_sclk_post_en, _reg, 30,			\
-		 aud_tdm##_name##_sclk_pre_en, CLK_SET_RATE_PARENT)
-
-static AUD_TDM_SCLK_POST_EN(in_a,  AUDIO_CLK_TDMIN_A_CTRL);
-static AUD_TDM_SCLK_POST_EN(in_b,  AUDIO_CLK_TDMIN_B_CTRL);
-static AUD_TDM_SCLK_POST_EN(in_c,  AUDIO_CLK_TDMIN_C_CTRL);
-static AUD_TDM_SCLK_POST_EN(in_lb, AUDIO_CLK_TDMIN_LB_CTRL);
-static AUD_TDM_SCLK_POST_EN(out_a, AUDIO_CLK_TDMOUT_A_CTRL);
-static AUD_TDM_SCLK_POST_EN(out_b, AUDIO_CLK_TDMOUT_B_CTRL);
-static AUD_TDM_SCLK_POST_EN(out_c, AUDIO_CLK_TDMOUT_C_CTRL);
-
-#define AUD_TDM_SCLK(_name, _reg)					\
-	struct clk_regmap aud_tdm##_name##_sclk = {			\
-	.data = &(struct meson_clk_phase_data) {			\
-		.ph = {							\
-			.reg_off = (_reg),				\
-			.shift   = 29,					\
-			.width   = 1,					\
-		},							\
-	},								\
-	.hw.init = &(struct clk_init_data) {				\
-		.name = "aud_tdm"#_name"_sclk",				\
-		.ops = &meson_clk_phase_ops,				\
-		.parent_hws = (const struct clk_hw *[]) {		\
-			&aud_tdm##_name##_sclk_post_en.hw		\
-		},							\
-		.num_parents = 1,					\
-		.flags = CLK_DUTY_CYCLE_PARENT | CLK_SET_RATE_PARENT,	\
-	},								\
-}
-
-static AUD_TDM_SCLK(in_a,  AUDIO_CLK_TDMIN_A_CTRL);
-static AUD_TDM_SCLK(in_b,  AUDIO_CLK_TDMIN_B_CTRL);
-static AUD_TDM_SCLK(in_c,  AUDIO_CLK_TDMIN_C_CTRL);
-static AUD_TDM_SCLK(in_lb, AUDIO_CLK_TDMIN_LB_CTRL);
-static AUD_TDM_SCLK(out_a, AUDIO_CLK_TDMOUT_A_CTRL);
-static AUD_TDM_SCLK(out_b, AUDIO_CLK_TDMOUT_B_CTRL);
-static AUD_TDM_SCLK(out_c, AUDIO_CLK_TDMOUT_C_CTRL);
-
+/* TDM sample clock sources */
 static const struct clk_parent_data tdm_lrclk_parent_data[] = {
-	{ .hw = &aud_mst_a_lrclk.hw, },
-	{ .hw = &aud_mst_b_lrclk.hw, },
-	{ .hw = &aud_mst_c_lrclk.hw, },
-	{ .hw = &aud_mst_d_lrclk.hw, },
-	{ .hw = &aud_mst_e_lrclk.hw, },
-	{ .hw = &aud_mst_f_lrclk.hw, },
+	{ .name = "aud_mst_a_lrclk", .index = -1, },
+	{ .name = "aud_mst_b_lrclk", .index = -1, },
+	{ .name = "aud_mst_c_lrclk", .index = -1, },
+	{ .name = "aud_mst_d_lrclk", .index = -1, },
+	{ .name = "aud_mst_e_lrclk", .index = -1, },
+	{ .name = "aud_mst_f_lrclk", .index = -1, },
 	{ .fw_name = "slv_lrclk0", },
 	{ .fw_name = "slv_lrclk1", },
 	{ .fw_name = "slv_lrclk2", },
@@ -407,69 +241,536 @@ static const struct clk_parent_data tdm_lrclk_parent_data[] = {
 	{ .fw_name = "slv_lrclk9", },
 };
 
-#define AUD_TDM_LRLCK(_name, _reg)			\
-	AUD_MUX(tdm##_name##_lrclk, _reg, 0xf, 20,	\
-		CLK_MUX_ROUND_CLOSEST,			\
-		tdm_lrclk_parent_data, 0)
+#define AUD_TDM_SCLK_MUX(_name, _reg)					\
+	AUD_MUX(tdm##_name##_sclk_sel, _reg, 0xf, 24,			\
+		CLK_MUX_ROUND_CLOSEST, tdm_sclk_parent_data, 0)
+#define AUD_TDM_SCLK_PRE_EN(_name, _reg)				\
+	AUD_GATE(tdm##_name##_sclk_pre_en, _reg, 31,			\
+		 aud_tdm##_name##_sclk_sel, CLK_SET_RATE_PARENT)
+#define AUD_TDM_SCLK_POST_EN(_name, _reg)				\
+	AUD_GATE(tdm##_name##_sclk_post_en, _reg, 30,			\
+		 aud_tdm##_name##_sclk_pre_en, CLK_SET_RATE_PARENT)
+#define AUD_TDM_SCLK(_name, _reg)					\
+	AUD_PHASE(tdm##_name##_sclk, _reg, 1, 29,			\
+		  aud_tdm##_name##_sclk_post_en,			\
+		  CLK_DUTY_CYCLE_PARENT | CLK_SET_RATE_PARENT)
+
+#define AUD_TDM_LRLCK(_name, _reg)					\
+	AUD_MUX(tdm##_name##_lrclk, _reg, 0xf, 20,			\
+		CLK_MUX_ROUND_CLOSEST, tdm_lrclk_parent_data, 0)
+
+/* Pad master clock sources */
+static const struct clk_parent_data mclk_pad_ctrl_parent_data[] = {
+	{ .name = "aud_mst_a_mclk", .index = -1,  },
+	{ .name = "aud_mst_b_mclk", .index = -1,  },
+	{ .name = "aud_mst_c_mclk", .index = -1,  },
+	{ .name = "aud_mst_d_mclk", .index = -1,  },
+	{ .name = "aud_mst_e_mclk", .index = -1,  },
+	{ .name = "aud_mst_f_mclk", .index = -1,  },
+};
+
+/* Pad bit clock sources */
+static const struct clk_parent_data sclk_pad_ctrl_parent_data[] = {
+	{ .name = "aud_mst_a_sclk", .index = -1, },
+	{ .name = "aud_mst_b_sclk", .index = -1, },
+	{ .name = "aud_mst_c_sclk", .index = -1, },
+	{ .name = "aud_mst_d_sclk", .index = -1, },
+	{ .name = "aud_mst_e_sclk", .index = -1, },
+	{ .name = "aud_mst_f_sclk", .index = -1, },
+};
 
-static AUD_TDM_LRLCK(in_a,  AUDIO_CLK_TDMIN_A_CTRL);
-static AUD_TDM_LRLCK(in_b,  AUDIO_CLK_TDMIN_B_CTRL);
-static AUD_TDM_LRLCK(in_c,  AUDIO_CLK_TDMIN_C_CTRL);
-static AUD_TDM_LRLCK(in_lb, AUDIO_CLK_TDMIN_LB_CTRL);
-static AUD_TDM_LRLCK(out_a, AUDIO_CLK_TDMOUT_A_CTRL);
-static AUD_TDM_LRLCK(out_b, AUDIO_CLK_TDMOUT_B_CTRL);
-static AUD_TDM_LRLCK(out_c, AUDIO_CLK_TDMOUT_C_CTRL);
+/* Pad sample clock sources */
+static const struct clk_parent_data lrclk_pad_ctrl_parent_data[] = {
+	{ .name = "aud_mst_a_lrclk", .index = -1, },
+	{ .name = "aud_mst_b_lrclk", .index = -1, },
+	{ .name = "aud_mst_c_lrclk", .index = -1, },
+	{ .name = "aud_mst_d_lrclk", .index = -1, },
+	{ .name = "aud_mst_e_lrclk", .index = -1, },
+	{ .name = "aud_mst_f_lrclk", .index = -1, },
+};
 
-/* G12a Pad control */
 #define AUD_TDM_PAD_CTRL(_name, _reg, _shift, _parents)		\
-	AUD_MUX(tdm_##_name, _reg, 0x7, _shift, 0, _parents,	\
+	AUD_MUX(_name, _reg, 0x7, _shift, 0, _parents,		\
 		CLK_SET_RATE_NO_REPARENT)
 
-static const struct clk_parent_data mclk_pad_ctrl_parent_data[] = {
-	{ .hw = &aud_mst_a_mclk.hw },
-	{ .hw = &aud_mst_b_mclk.hw },
-	{ .hw = &aud_mst_c_mclk.hw },
-	{ .hw = &aud_mst_d_mclk.hw },
-	{ .hw = &aud_mst_e_mclk.hw },
-	{ .hw = &aud_mst_f_mclk.hw },
+/* Common Clocks */
+static struct clk_regmap ddr_arb =
+	AUD_PCLK_GATE(ddr_arb, AUDIO_CLK_GATE_EN, 0);
+static struct clk_regmap pdm =
+	AUD_PCLK_GATE(pdm, AUDIO_CLK_GATE_EN, 1);
+static struct clk_regmap tdmin_a =
+	AUD_PCLK_GATE(tdmin_a, AUDIO_CLK_GATE_EN, 2);
+static struct clk_regmap tdmin_b =
+	AUD_PCLK_GATE(tdmin_b, AUDIO_CLK_GATE_EN, 3);
+static struct clk_regmap tdmin_c =
+	AUD_PCLK_GATE(tdmin_c, AUDIO_CLK_GATE_EN, 4);
+static struct clk_regmap tdmin_lb =
+	AUD_PCLK_GATE(tdmin_lb, AUDIO_CLK_GATE_EN, 5);
+static struct clk_regmap tdmout_a =
+	AUD_PCLK_GATE(tdmout_a, AUDIO_CLK_GATE_EN, 6);
+static struct clk_regmap tdmout_b =
+	AUD_PCLK_GATE(tdmout_b, AUDIO_CLK_GATE_EN, 7);
+static struct clk_regmap tdmout_c =
+	AUD_PCLK_GATE(tdmout_c, AUDIO_CLK_GATE_EN, 8);
+static struct clk_regmap frddr_a =
+	AUD_PCLK_GATE(frddr_a, AUDIO_CLK_GATE_EN, 9);
+static struct clk_regmap frddr_b =
+	AUD_PCLK_GATE(frddr_b, AUDIO_CLK_GATE_EN, 10);
+static struct clk_regmap frddr_c =
+	AUD_PCLK_GATE(frddr_c, AUDIO_CLK_GATE_EN, 11);
+static struct clk_regmap toddr_a =
+	AUD_PCLK_GATE(toddr_a, AUDIO_CLK_GATE_EN, 12);
+static struct clk_regmap toddr_b =
+	AUD_PCLK_GATE(toddr_b, AUDIO_CLK_GATE_EN, 13);
+static struct clk_regmap toddr_c =
+	AUD_PCLK_GATE(toddr_c, AUDIO_CLK_GATE_EN, 14);
+static struct clk_regmap loopback =
+	AUD_PCLK_GATE(loopback, AUDIO_CLK_GATE_EN, 15);
+static struct clk_regmap spdifin =
+	AUD_PCLK_GATE(spdifin, AUDIO_CLK_GATE_EN, 16);
+static struct clk_regmap spdifout =
+	AUD_PCLK_GATE(spdifout, AUDIO_CLK_GATE_EN, 17);
+static struct clk_regmap resample =
+	AUD_PCLK_GATE(resample, AUDIO_CLK_GATE_EN, 18);
+static struct clk_regmap power_detect =
+	AUD_PCLK_GATE(power_detect, AUDIO_CLK_GATE_EN, 19);
+
+static struct clk_regmap spdifout_clk_sel =
+	AUD_MST_MCLK_MUX(spdifout_clk, AUDIO_CLK_SPDIFOUT_CTRL);
+static struct clk_regmap pdm_dclk_sel =
+	AUD_MST_MCLK_MUX(pdm_dclk, AUDIO_CLK_PDMIN_CTRL0);
+static struct clk_regmap spdifin_clk_sel =
+	AUD_MST_SYS_MUX(spdifin_clk, AUDIO_CLK_SPDIFIN_CTRL);
+static struct clk_regmap pdm_sysclk_sel =
+	AUD_MST_SYS_MUX(pdm_sysclk, AUDIO_CLK_PDMIN_CTRL1);
+static struct clk_regmap spdifout_b_clk_sel =
+	AUD_MST_MCLK_MUX(spdifout_b_clk, AUDIO_CLK_SPDIFOUT_B_CTRL);
+
+static struct clk_regmap spdifout_clk_div =
+	AUD_MST_MCLK_DIV(spdifout_clk, AUDIO_CLK_SPDIFOUT_CTRL);
+static struct clk_regmap pdm_dclk_div =
+	AUD_MST_MCLK_DIV(pdm_dclk, AUDIO_CLK_PDMIN_CTRL0);
+static struct clk_regmap spdifin_clk_div =
+	AUD_MST_SYS_DIV(spdifin_clk, AUDIO_CLK_SPDIFIN_CTRL);
+static struct clk_regmap pdm_sysclk_div =
+	AUD_MST_SYS_DIV(pdm_sysclk, AUDIO_CLK_PDMIN_CTRL1);
+static struct clk_regmap spdifout_b_clk_div =
+	AUD_MST_MCLK_DIV(spdifout_b_clk, AUDIO_CLK_SPDIFOUT_B_CTRL);
+
+static struct clk_regmap spdifout_clk =
+	AUD_MST_MCLK_GATE(spdifout_clk, AUDIO_CLK_SPDIFOUT_CTRL);
+static struct clk_regmap spdifin_clk =
+	AUD_MST_MCLK_GATE(spdifin_clk, AUDIO_CLK_SPDIFIN_CTRL);
+static struct clk_regmap pdm_dclk =
+	AUD_MST_MCLK_GATE(pdm_dclk, AUDIO_CLK_PDMIN_CTRL0);
+static struct clk_regmap pdm_sysclk =
+	AUD_MST_MCLK_GATE(pdm_sysclk, AUDIO_CLK_PDMIN_CTRL1);
+static struct clk_regmap spdifout_b_clk =
+	AUD_MST_MCLK_GATE(spdifout_b_clk, AUDIO_CLK_SPDIFOUT_B_CTRL);
+
+static struct clk_regmap mst_a_sclk_pre_en =
+	AUD_MST_SCLK_PRE_EN(a, AUDIO_MST_A_SCLK_CTRL0);
+static struct clk_regmap mst_b_sclk_pre_en =
+	AUD_MST_SCLK_PRE_EN(b, AUDIO_MST_B_SCLK_CTRL0);
+static struct clk_regmap mst_c_sclk_pre_en =
+	AUD_MST_SCLK_PRE_EN(c, AUDIO_MST_C_SCLK_CTRL0);
+static struct clk_regmap mst_d_sclk_pre_en =
+	AUD_MST_SCLK_PRE_EN(d, AUDIO_MST_D_SCLK_CTRL0);
+static struct clk_regmap mst_e_sclk_pre_en =
+	AUD_MST_SCLK_PRE_EN(e, AUDIO_MST_E_SCLK_CTRL0);
+static struct clk_regmap mst_f_sclk_pre_en =
+	AUD_MST_SCLK_PRE_EN(f, AUDIO_MST_F_SCLK_CTRL0);
+
+static struct clk_regmap mst_a_sclk_div =
+	AUD_MST_SCLK_DIV(a, AUDIO_MST_A_SCLK_CTRL0);
+static struct clk_regmap mst_b_sclk_div =
+	AUD_MST_SCLK_DIV(b, AUDIO_MST_B_SCLK_CTRL0);
+static struct clk_regmap mst_c_sclk_div =
+	AUD_MST_SCLK_DIV(c, AUDIO_MST_C_SCLK_CTRL0);
+static struct clk_regmap mst_d_sclk_div =
+	AUD_MST_SCLK_DIV(d, AUDIO_MST_D_SCLK_CTRL0);
+static struct clk_regmap mst_e_sclk_div =
+	AUD_MST_SCLK_DIV(e, AUDIO_MST_E_SCLK_CTRL0);
+static struct clk_regmap mst_f_sclk_div =
+	AUD_MST_SCLK_DIV(f, AUDIO_MST_F_SCLK_CTRL0);
+
+static struct clk_regmap mst_a_sclk_post_en =
+	AUD_MST_SCLK_POST_EN(a, AUDIO_MST_A_SCLK_CTRL0);
+static struct clk_regmap mst_b_sclk_post_en =
+	AUD_MST_SCLK_POST_EN(b, AUDIO_MST_B_SCLK_CTRL0);
+static struct clk_regmap mst_c_sclk_post_en =
+	AUD_MST_SCLK_POST_EN(c, AUDIO_MST_C_SCLK_CTRL0);
+static struct clk_regmap mst_d_sclk_post_en =
+	AUD_MST_SCLK_POST_EN(d, AUDIO_MST_D_SCLK_CTRL0);
+static struct clk_regmap mst_e_sclk_post_en =
+	AUD_MST_SCLK_POST_EN(e, AUDIO_MST_E_SCLK_CTRL0);
+static struct clk_regmap mst_f_sclk_post_en =
+	AUD_MST_SCLK_POST_EN(f, AUDIO_MST_F_SCLK_CTRL0);
+
+static struct clk_regmap mst_a_sclk =
+	AUD_MST_SCLK(a, AUDIO_MST_A_SCLK_CTRL1);
+static struct clk_regmap mst_b_sclk =
+	AUD_MST_SCLK(b, AUDIO_MST_B_SCLK_CTRL1);
+static struct clk_regmap mst_c_sclk =
+	AUD_MST_SCLK(c, AUDIO_MST_C_SCLK_CTRL1);
+static struct clk_regmap mst_d_sclk =
+	AUD_MST_SCLK(d, AUDIO_MST_D_SCLK_CTRL1);
+static struct clk_regmap mst_e_sclk =
+	AUD_MST_SCLK(e, AUDIO_MST_E_SCLK_CTRL1);
+static struct clk_regmap mst_f_sclk =
+	AUD_MST_SCLK(f, AUDIO_MST_F_SCLK_CTRL1);
+
+static struct clk_regmap mst_a_lrclk_div =
+	AUD_MST_LRCLK_DIV(a, AUDIO_MST_A_SCLK_CTRL0);
+static struct clk_regmap mst_b_lrclk_div =
+	AUD_MST_LRCLK_DIV(b, AUDIO_MST_B_SCLK_CTRL0);
+static struct clk_regmap mst_c_lrclk_div =
+	AUD_MST_LRCLK_DIV(c, AUDIO_MST_C_SCLK_CTRL0);
+static struct clk_regmap mst_d_lrclk_div =
+	AUD_MST_LRCLK_DIV(d, AUDIO_MST_D_SCLK_CTRL0);
+static struct clk_regmap mst_e_lrclk_div =
+	AUD_MST_LRCLK_DIV(e, AUDIO_MST_E_SCLK_CTRL0);
+static struct clk_regmap mst_f_lrclk_div =
+	AUD_MST_LRCLK_DIV(f, AUDIO_MST_F_SCLK_CTRL0);
+
+static struct clk_regmap mst_a_lrclk =
+	AUD_MST_LRCLK(a, AUDIO_MST_A_SCLK_CTRL1);
+static struct clk_regmap mst_b_lrclk =
+	AUD_MST_LRCLK(b, AUDIO_MST_B_SCLK_CTRL1);
+static struct clk_regmap mst_c_lrclk =
+	AUD_MST_LRCLK(c, AUDIO_MST_C_SCLK_CTRL1);
+static struct clk_regmap mst_d_lrclk =
+	AUD_MST_LRCLK(d, AUDIO_MST_D_SCLK_CTRL1);
+static struct clk_regmap mst_e_lrclk =
+	AUD_MST_LRCLK(e, AUDIO_MST_E_SCLK_CTRL1);
+static struct clk_regmap mst_f_lrclk =
+	AUD_MST_LRCLK(f, AUDIO_MST_F_SCLK_CTRL1);
+
+static struct clk_regmap tdmin_a_sclk_sel =
+	AUD_TDM_SCLK_MUX(in_a, AUDIO_CLK_TDMIN_A_CTRL);
+static struct clk_regmap tdmin_b_sclk_sel =
+	AUD_TDM_SCLK_MUX(in_b, AUDIO_CLK_TDMIN_B_CTRL);
+static struct clk_regmap tdmin_c_sclk_sel =
+	AUD_TDM_SCLK_MUX(in_c, AUDIO_CLK_TDMIN_C_CTRL);
+static struct clk_regmap tdmin_lb_sclk_sel =
+	AUD_TDM_SCLK_MUX(in_lb, AUDIO_CLK_TDMIN_LB_CTRL);
+static struct clk_regmap tdmout_a_sclk_sel =
+	AUD_TDM_SCLK_MUX(out_a, AUDIO_CLK_TDMOUT_A_CTRL);
+static struct clk_regmap tdmout_b_sclk_sel =
+	AUD_TDM_SCLK_MUX(out_b, AUDIO_CLK_TDMOUT_B_CTRL);
+static struct clk_regmap tdmout_c_sclk_sel =
+	AUD_TDM_SCLK_MUX(out_c, AUDIO_CLK_TDMOUT_C_CTRL);
+
+static struct clk_regmap tdmin_a_sclk_pre_en =
+	AUD_TDM_SCLK_PRE_EN(in_a, AUDIO_CLK_TDMIN_A_CTRL);
+static struct clk_regmap tdmin_b_sclk_pre_en =
+	AUD_TDM_SCLK_PRE_EN(in_b, AUDIO_CLK_TDMIN_B_CTRL);
+static struct clk_regmap tdmin_c_sclk_pre_en =
+	AUD_TDM_SCLK_PRE_EN(in_c, AUDIO_CLK_TDMIN_C_CTRL);
+static struct clk_regmap tdmin_lb_sclk_pre_en =
+	AUD_TDM_SCLK_PRE_EN(in_lb, AUDIO_CLK_TDMIN_LB_CTRL);
+static struct clk_regmap tdmout_a_sclk_pre_en =
+	AUD_TDM_SCLK_PRE_EN(out_a, AUDIO_CLK_TDMOUT_A_CTRL);
+static struct clk_regmap tdmout_b_sclk_pre_en =
+	AUD_TDM_SCLK_PRE_EN(out_b, AUDIO_CLK_TDMOUT_B_CTRL);
+static struct clk_regmap tdmout_c_sclk_pre_en =
+	AUD_TDM_SCLK_PRE_EN(out_c, AUDIO_CLK_TDMOUT_C_CTRL);
+
+static struct clk_regmap tdmin_a_sclk_post_en =
+	AUD_TDM_SCLK_POST_EN(in_a, AUDIO_CLK_TDMIN_A_CTRL);
+static struct clk_regmap tdmin_b_sclk_post_en =
+	AUD_TDM_SCLK_POST_EN(in_b, AUDIO_CLK_TDMIN_B_CTRL);
+static struct clk_regmap tdmin_c_sclk_post_en =
+	AUD_TDM_SCLK_POST_EN(in_c, AUDIO_CLK_TDMIN_C_CTRL);
+static struct clk_regmap tdmin_lb_sclk_post_en =
+	AUD_TDM_SCLK_POST_EN(in_lb, AUDIO_CLK_TDMIN_LB_CTRL);
+static struct clk_regmap tdmout_a_sclk_post_en =
+	AUD_TDM_SCLK_POST_EN(out_a, AUDIO_CLK_TDMOUT_A_CTRL);
+static struct clk_regmap tdmout_b_sclk_post_en =
+	AUD_TDM_SCLK_POST_EN(out_b, AUDIO_CLK_TDMOUT_B_CTRL);
+static struct clk_regmap tdmout_c_sclk_post_en =
+	AUD_TDM_SCLK_POST_EN(out_c, AUDIO_CLK_TDMOUT_C_CTRL);
+
+static struct clk_regmap tdmin_a_sclk =
+	AUD_TDM_SCLK(in_a, AUDIO_CLK_TDMIN_A_CTRL);
+static struct clk_regmap tdmin_b_sclk =
+	AUD_TDM_SCLK(in_b, AUDIO_CLK_TDMIN_B_CTRL);
+static struct clk_regmap tdmin_c_sclk =
+	AUD_TDM_SCLK(in_c, AUDIO_CLK_TDMIN_C_CTRL);
+static struct clk_regmap tdmin_lb_sclk =
+	AUD_TDM_SCLK(in_lb, AUDIO_CLK_TDMIN_LB_CTRL);
+static struct clk_regmap tdmout_a_sclk =
+	AUD_TDM_SCLK(out_a, AUDIO_CLK_TDMOUT_A_CTRL);
+static struct clk_regmap tdmout_b_sclk =
+	AUD_TDM_SCLK(out_b, AUDIO_CLK_TDMOUT_B_CTRL);
+static struct clk_regmap tdmout_c_sclk =
+	AUD_TDM_SCLK(out_c, AUDIO_CLK_TDMOUT_C_CTRL);
+
+static struct clk_regmap tdmin_a_lrclk =
+	AUD_TDM_LRLCK(in_a, AUDIO_CLK_TDMIN_A_CTRL);
+static struct clk_regmap tdmin_b_lrclk =
+	AUD_TDM_LRLCK(in_b, AUDIO_CLK_TDMIN_B_CTRL);
+static struct clk_regmap tdmin_c_lrclk =
+	AUD_TDM_LRLCK(in_c, AUDIO_CLK_TDMIN_C_CTRL);
+static struct clk_regmap tdmin_lb_lrclk =
+	AUD_TDM_LRLCK(in_lb, AUDIO_CLK_TDMIN_LB_CTRL);
+static struct clk_regmap tdmout_a_lrclk =
+	AUD_TDM_LRLCK(out_a, AUDIO_CLK_TDMOUT_A_CTRL);
+static struct clk_regmap tdmout_b_lrclk =
+	AUD_TDM_LRLCK(out_b, AUDIO_CLK_TDMOUT_B_CTRL);
+static struct clk_regmap tdmout_c_lrclk =
+	AUD_TDM_LRLCK(out_c, AUDIO_CLK_TDMOUT_C_CTRL);
+
+/* AXG/G12A Clocks */
+static struct clk_hw axg_aud_top = {
+	.init = &(struct clk_init_data) {
+		/* Provide aud_top signal name on axg and g12a */
+		.name = "aud_top",
+		.ops = &(const struct clk_ops) {},
+		.parent_data = &(const struct clk_parent_data) {
+			.fw_name = "pclk",
+		},
+		.num_parents = 1,
+	},
 };
 
-static AUD_TDM_PAD_CTRL(mclk_pad_0, AUDIO_MST_PAD_CTRL0, 0,
-			mclk_pad_ctrl_parent_data);
-static AUD_TDM_PAD_CTRL(mclk_pad_1, AUDIO_MST_PAD_CTRL0, 4,
-			mclk_pad_ctrl_parent_data);
+static struct clk_regmap mst_a_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_a_mclk, AUDIO_MCLK_A_CTRL);
+static struct clk_regmap mst_b_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_b_mclk, AUDIO_MCLK_B_CTRL);
+static struct clk_regmap mst_c_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_c_mclk, AUDIO_MCLK_C_CTRL);
+static struct clk_regmap mst_d_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_d_mclk, AUDIO_MCLK_D_CTRL);
+static struct clk_regmap mst_e_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_e_mclk, AUDIO_MCLK_E_CTRL);
+static struct clk_regmap mst_f_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_f_mclk, AUDIO_MCLK_F_CTRL);
+
+static struct clk_regmap mst_a_mclk_div =
+	AUD_MST_MCLK_DIV(mst_a_mclk, AUDIO_MCLK_A_CTRL);
+static struct clk_regmap mst_b_mclk_div =
+	AUD_MST_MCLK_DIV(mst_b_mclk, AUDIO_MCLK_B_CTRL);
+static struct clk_regmap mst_c_mclk_div =
+	AUD_MST_MCLK_DIV(mst_c_mclk, AUDIO_MCLK_C_CTRL);
+static struct clk_regmap mst_d_mclk_div =
+	AUD_MST_MCLK_DIV(mst_d_mclk, AUDIO_MCLK_D_CTRL);
+static struct clk_regmap mst_e_mclk_div =
+	AUD_MST_MCLK_DIV(mst_e_mclk, AUDIO_MCLK_E_CTRL);
+static struct clk_regmap mst_f_mclk_div =
+	AUD_MST_MCLK_DIV(mst_f_mclk, AUDIO_MCLK_F_CTRL);
+
+static struct clk_regmap mst_a_mclk =
+	AUD_MST_MCLK_GATE(mst_a_mclk, AUDIO_MCLK_A_CTRL);
+static struct clk_regmap mst_b_mclk =
+	AUD_MST_MCLK_GATE(mst_b_mclk, AUDIO_MCLK_B_CTRL);
+static struct clk_regmap mst_c_mclk =
+	AUD_MST_MCLK_GATE(mst_c_mclk, AUDIO_MCLK_C_CTRL);
+static struct clk_regmap mst_d_mclk =
+	AUD_MST_MCLK_GATE(mst_d_mclk, AUDIO_MCLK_D_CTRL);
+static struct clk_regmap mst_e_mclk =
+	AUD_MST_MCLK_GATE(mst_e_mclk, AUDIO_MCLK_E_CTRL);
+static struct clk_regmap mst_f_mclk =
+	AUD_MST_MCLK_GATE(mst_f_mclk, AUDIO_MCLK_F_CTRL);
+
+/* G12a clocks */
+static struct clk_regmap g12a_tdm_mclk_pad_0 = AUD_TDM_PAD_CTRL(
+	mclk_pad_0, AUDIO_MST_PAD_CTRL0, 0, mclk_pad_ctrl_parent_data);
+static struct clk_regmap g12a_tdm_mclk_pad_1 = AUD_TDM_PAD_CTRL(
+	mclk_pad_1, AUDIO_MST_PAD_CTRL0, 4, mclk_pad_ctrl_parent_data);
+static struct clk_regmap g12a_tdm_lrclk_pad_0 = AUD_TDM_PAD_CTRL(
+	lrclk_pad_0, AUDIO_MST_PAD_CTRL1, 16, lrclk_pad_ctrl_parent_data);
+static struct clk_regmap g12a_tdm_lrclk_pad_1 = AUD_TDM_PAD_CTRL(
+	lrclk_pad_1, AUDIO_MST_PAD_CTRL1, 20, lrclk_pad_ctrl_parent_data);
+static struct clk_regmap g12a_tdm_lrclk_pad_2 = AUD_TDM_PAD_CTRL(
+	lrclk_pad_2, AUDIO_MST_PAD_CTRL1, 24, lrclk_pad_ctrl_parent_data);
+static struct clk_regmap g12a_tdm_sclk_pad_0 = AUD_TDM_PAD_CTRL(
+	sclk_pad_0, AUDIO_MST_PAD_CTRL1, 0, sclk_pad_ctrl_parent_data);
+static struct clk_regmap g12a_tdm_sclk_pad_1 = AUD_TDM_PAD_CTRL(
+	sclk_pad_1, AUDIO_MST_PAD_CTRL1, 4, sclk_pad_ctrl_parent_data);
+static struct clk_regmap g12a_tdm_sclk_pad_2 = AUD_TDM_PAD_CTRL(
+	sclk_pad_2, AUDIO_MST_PAD_CTRL1, 8, sclk_pad_ctrl_parent_data);
+
+/* G12a/SM1 clocks */
+static struct clk_regmap toram =
+	AUD_PCLK_GATE(toram, AUDIO_CLK_GATE_EN, 20);
+static struct clk_regmap spdifout_b =
+	AUD_PCLK_GATE(spdifout_b, AUDIO_CLK_GATE_EN, 21);
+static struct clk_regmap eqdrc =
+	AUD_PCLK_GATE(eqdrc, AUDIO_CLK_GATE_EN, 22);
+
+/* SM1 Clocks */
+static struct clk_regmap sm1_clk81_en = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = AUDIO_CLK81_EN,
+		.bit_idx = 31,
+	},
+	.hw.init = &(struct clk_init_data) {
+		.name = "aud_clk81_en",
+		.ops = &clk_regmap_gate_ops,
+		.parent_data = &(const struct clk_parent_data) {
+			.fw_name = "pclk",
+		},
+		.num_parents = 1,
+	},
+};
 
-static const struct clk_parent_data lrclk_pad_ctrl_parent_data[] = {
-	{ .hw = &aud_mst_a_lrclk.hw },
-	{ .hw = &aud_mst_b_lrclk.hw },
-	{ .hw = &aud_mst_c_lrclk.hw },
-	{ .hw = &aud_mst_d_lrclk.hw },
-	{ .hw = &aud_mst_e_lrclk.hw },
-	{ .hw = &aud_mst_f_lrclk.hw },
+static struct clk_regmap sm1_sysclk_a_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = AUDIO_CLK81_CTRL,
+		.shift = 0,
+		.width = 8,
+	},
+	.hw.init = &(struct clk_init_data) {
+		.name = "aud_sysclk_a_div",
+		.ops = &clk_regmap_divider_ops,
+		.parent_hws = (const struct clk_hw *[]) {
+			&sm1_clk81_en.hw,
+		},
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
 };
 
-static AUD_TDM_PAD_CTRL(lrclk_pad_0, AUDIO_MST_PAD_CTRL1, 16,
-			lrclk_pad_ctrl_parent_data);
-static AUD_TDM_PAD_CTRL(lrclk_pad_1, AUDIO_MST_PAD_CTRL1, 20,
-			lrclk_pad_ctrl_parent_data);
-static AUD_TDM_PAD_CTRL(lrclk_pad_2, AUDIO_MST_PAD_CTRL1, 24,
-			lrclk_pad_ctrl_parent_data);
+static struct clk_regmap sm1_sysclk_a_en = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = AUDIO_CLK81_CTRL,
+		.bit_idx = 8,
+	},
+	.hw.init = &(struct clk_init_data) {
+		.name = "aud_sysclk_a_en",
+		.ops = &clk_regmap_gate_ops,
+		.parent_hws = (const struct clk_hw *[]) {
+			&sm1_sysclk_a_div.hw,
+		},
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
 
-static const struct clk_parent_data sclk_pad_ctrl_parent_data[] = {
-	{ .hw = &aud_mst_a_sclk.hw },
-	{ .hw = &aud_mst_b_sclk.hw },
-	{ .hw = &aud_mst_c_sclk.hw },
-	{ .hw = &aud_mst_d_sclk.hw },
-	{ .hw = &aud_mst_e_sclk.hw },
-	{ .hw = &aud_mst_f_sclk.hw },
+static struct clk_regmap sm1_sysclk_b_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = AUDIO_CLK81_CTRL,
+		.shift = 16,
+		.width = 8,
+	},
+	.hw.init = &(struct clk_init_data) {
+		.name = "aud_sysclk_b_div",
+		.ops = &clk_regmap_divider_ops,
+		.parent_hws = (const struct clk_hw *[]) {
+			&sm1_clk81_en.hw,
+		},
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
 };
 
-static AUD_TDM_PAD_CTRL(sclk_pad_0, AUDIO_MST_PAD_CTRL1, 0,
-			sclk_pad_ctrl_parent_data);
-static AUD_TDM_PAD_CTRL(sclk_pad_1, AUDIO_MST_PAD_CTRL1, 4,
-			sclk_pad_ctrl_parent_data);
-static AUD_TDM_PAD_CTRL(sclk_pad_2, AUDIO_MST_PAD_CTRL1, 8,
-			sclk_pad_ctrl_parent_data);
+static struct clk_regmap sm1_sysclk_b_en = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = AUDIO_CLK81_CTRL,
+		.bit_idx = 24,
+	},
+	.hw.init = &(struct clk_init_data) {
+		.name = "aud_sysclk_b_en",
+		.ops = &clk_regmap_gate_ops,
+		.parent_hws = (const struct clk_hw *[]) {
+			&sm1_sysclk_b_div.hw,
+		},
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static const struct clk_hw *sm1_aud_top_parents[] = {
+	&sm1_sysclk_a_en.hw,
+	&sm1_sysclk_b_en.hw,
+};
+
+static struct clk_regmap sm1_aud_top = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = AUDIO_CLK81_CTRL,
+		.mask = 0x1,
+		.shift = 31,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "aud_top",
+		.ops = &clk_regmap_mux_ops,
+		.parent_hws = sm1_aud_top_parents,
+		.num_parents = ARRAY_SIZE(sm1_aud_top_parents),
+		.flags = CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
+static struct clk_regmap resample_b =
+	AUD_PCLK_GATE(resample_b, AUDIO_CLK_GATE_EN, 26);
+static struct clk_regmap tovad =
+	AUD_PCLK_GATE(tovad, AUDIO_CLK_GATE_EN, 27);
+static struct clk_regmap locker =
+	AUD_PCLK_GATE(locker, AUDIO_CLK_GATE_EN, 28);
+static struct clk_regmap spdifin_lb =
+	AUD_PCLK_GATE(spdifin_lb, AUDIO_CLK_GATE_EN, 29);
+static struct clk_regmap frddr_d =
+	AUD_PCLK_GATE(frddr_d, AUDIO_CLK_GATE_EN1, 0);
+static struct clk_regmap toddr_d =
+	AUD_PCLK_GATE(toddr_d, AUDIO_CLK_GATE_EN1, 1);
+static struct clk_regmap loopback_b =
+	AUD_PCLK_GATE(loopback_b, AUDIO_CLK_GATE_EN1, 2);
+
+static struct clk_regmap sm1_mst_a_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_a_mclk, AUDIO_SM1_MCLK_A_CTRL);
+static struct clk_regmap sm1_mst_b_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_b_mclk, AUDIO_SM1_MCLK_B_CTRL);
+static struct clk_regmap sm1_mst_c_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_c_mclk, AUDIO_SM1_MCLK_C_CTRL);
+static struct clk_regmap sm1_mst_d_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_d_mclk, AUDIO_SM1_MCLK_D_CTRL);
+static struct clk_regmap sm1_mst_e_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_e_mclk, AUDIO_SM1_MCLK_E_CTRL);
+static struct clk_regmap sm1_mst_f_mclk_sel =
+	AUD_MST_MCLK_MUX(mst_f_mclk, AUDIO_SM1_MCLK_F_CTRL);
+
+static struct clk_regmap sm1_mst_a_mclk_div =
+	AUD_MST_MCLK_DIV(mst_a_mclk, AUDIO_SM1_MCLK_A_CTRL);
+static struct clk_regmap sm1_mst_b_mclk_div =
+	AUD_MST_MCLK_DIV(mst_b_mclk, AUDIO_SM1_MCLK_B_CTRL);
+static struct clk_regmap sm1_mst_c_mclk_div =
+	AUD_MST_MCLK_DIV(mst_c_mclk, AUDIO_SM1_MCLK_C_CTRL);
+static struct clk_regmap sm1_mst_d_mclk_div =
+	AUD_MST_MCLK_DIV(mst_d_mclk, AUDIO_SM1_MCLK_D_CTRL);
+static struct clk_regmap sm1_mst_e_mclk_div =
+	AUD_MST_MCLK_DIV(mst_e_mclk, AUDIO_SM1_MCLK_E_CTRL);
+static struct clk_regmap sm1_mst_f_mclk_div =
+	AUD_MST_MCLK_DIV(mst_f_mclk, AUDIO_SM1_MCLK_F_CTRL);
+
+static struct clk_regmap sm1_mst_a_mclk =
+	AUD_MST_MCLK_GATE(mst_a_mclk, AUDIO_SM1_MCLK_A_CTRL);
+static struct clk_regmap sm1_mst_b_mclk =
+	AUD_MST_MCLK_GATE(mst_b_mclk, AUDIO_SM1_MCLK_B_CTRL);
+static struct clk_regmap sm1_mst_c_mclk =
+	AUD_MST_MCLK_GATE(mst_c_mclk, AUDIO_SM1_MCLK_C_CTRL);
+static struct clk_regmap sm1_mst_d_mclk =
+	AUD_MST_MCLK_GATE(mst_d_mclk, AUDIO_SM1_MCLK_D_CTRL);
+static struct clk_regmap sm1_mst_e_mclk =
+	AUD_MST_MCLK_GATE(mst_e_mclk, AUDIO_SM1_MCLK_E_CTRL);
+static struct clk_regmap sm1_mst_f_mclk =
+	AUD_MST_MCLK_GATE(mst_f_mclk, AUDIO_SM1_MCLK_F_CTRL);
+
+static struct clk_regmap sm1_tdm_mclk_pad_0 = AUD_TDM_PAD_CTRL(
+	tdm_mclk_pad_0, AUDIO_SM1_MST_PAD_CTRL0, 0, mclk_pad_ctrl_parent_data);
+static struct clk_regmap sm1_tdm_mclk_pad_1 = AUD_TDM_PAD_CTRL(
+	tdm_mclk_pad_1, AUDIO_SM1_MST_PAD_CTRL0, 4, mclk_pad_ctrl_parent_data);
+static struct clk_regmap sm1_tdm_lrclk_pad_0 = AUD_TDM_PAD_CTRL(
+	tdm_lrclk_pad_0, AUDIO_SM1_MST_PAD_CTRL1, 16, lrclk_pad_ctrl_parent_data);
+static struct clk_regmap sm1_tdm_lrclk_pad_1 = AUD_TDM_PAD_CTRL(
+	tdm_lrclk_pad_1, AUDIO_SM1_MST_PAD_CTRL1, 20, lrclk_pad_ctrl_parent_data);
+static struct clk_regmap sm1_tdm_lrclk_pad_2 = AUD_TDM_PAD_CTRL(
+	tdm_lrclk_pad_2, AUDIO_SM1_MST_PAD_CTRL1, 24, lrclk_pad_ctrl_parent_data);
+static struct clk_regmap sm1_tdm_sclk_pad_0 = AUD_TDM_PAD_CTRL(
+	tdm_sclk_pad_0, AUDIO_SM1_MST_PAD_CTRL1, 0, sclk_pad_ctrl_parent_data);
+static struct clk_regmap sm1_tdm_sclk_pad_1 = AUD_TDM_PAD_CTRL(
+	tdm_sclk_pad_1, AUDIO_SM1_MST_PAD_CTRL1, 4, sclk_pad_ctrl_parent_data);
+static struct clk_regmap sm1_tdm_sclk_pad_2 = AUD_TDM_PAD_CTRL(
+	tdm_sclk_pad_2, AUDIO_SM1_MST_PAD_CTRL1, 8, sclk_pad_ctrl_parent_data);
 
 /*
  * Array of all clocks provided by this provider
@@ -477,127 +778,128 @@ static AUD_TDM_PAD_CTRL(sclk_pad_2, AUDIO_MST_PAD_CTRL1, 8,
  */
 static struct clk_hw_onecell_data axg_audio_hw_onecell_data = {
 	.hws = {
-		[AUD_CLKID_DDR_ARB]		= &aud_ddr_arb.hw,
-		[AUD_CLKID_PDM]			= &aud_pdm.hw,
-		[AUD_CLKID_TDMIN_A]		= &aud_tdmin_a.hw,
-		[AUD_CLKID_TDMIN_B]		= &aud_tdmin_b.hw,
-		[AUD_CLKID_TDMIN_C]		= &aud_tdmin_c.hw,
-		[AUD_CLKID_TDMIN_LB]		= &aud_tdmin_lb.hw,
-		[AUD_CLKID_TDMOUT_A]		= &aud_tdmout_a.hw,
-		[AUD_CLKID_TDMOUT_B]		= &aud_tdmout_b.hw,
-		[AUD_CLKID_TDMOUT_C]		= &aud_tdmout_c.hw,
-		[AUD_CLKID_FRDDR_A]		= &aud_frddr_a.hw,
-		[AUD_CLKID_FRDDR_B]		= &aud_frddr_b.hw,
-		[AUD_CLKID_FRDDR_C]		= &aud_frddr_c.hw,
-		[AUD_CLKID_TODDR_A]		= &aud_toddr_a.hw,
-		[AUD_CLKID_TODDR_B]		= &aud_toddr_b.hw,
-		[AUD_CLKID_TODDR_C]		= &aud_toddr_c.hw,
-		[AUD_CLKID_LOOPBACK]		= &aud_loopback.hw,
-		[AUD_CLKID_SPDIFIN]		= &aud_spdifin.hw,
-		[AUD_CLKID_SPDIFOUT]		= &aud_spdifout.hw,
-		[AUD_CLKID_RESAMPLE]		= &aud_resample.hw,
-		[AUD_CLKID_POWER_DETECT]	= &aud_power_detect.hw,
-		[AUD_CLKID_MST_A_MCLK_SEL]	= &aud_mst_a_mclk_sel.hw,
-		[AUD_CLKID_MST_B_MCLK_SEL]	= &aud_mst_b_mclk_sel.hw,
-		[AUD_CLKID_MST_C_MCLK_SEL]	= &aud_mst_c_mclk_sel.hw,
-		[AUD_CLKID_MST_D_MCLK_SEL]	= &aud_mst_d_mclk_sel.hw,
-		[AUD_CLKID_MST_E_MCLK_SEL]	= &aud_mst_e_mclk_sel.hw,
-		[AUD_CLKID_MST_F_MCLK_SEL]	= &aud_mst_f_mclk_sel.hw,
-		[AUD_CLKID_MST_A_MCLK_DIV]	= &aud_mst_a_mclk_div.hw,
-		[AUD_CLKID_MST_B_MCLK_DIV]	= &aud_mst_b_mclk_div.hw,
-		[AUD_CLKID_MST_C_MCLK_DIV]	= &aud_mst_c_mclk_div.hw,
-		[AUD_CLKID_MST_D_MCLK_DIV]	= &aud_mst_d_mclk_div.hw,
-		[AUD_CLKID_MST_E_MCLK_DIV]	= &aud_mst_e_mclk_div.hw,
-		[AUD_CLKID_MST_F_MCLK_DIV]	= &aud_mst_f_mclk_div.hw,
-		[AUD_CLKID_MST_A_MCLK]		= &aud_mst_a_mclk.hw,
-		[AUD_CLKID_MST_B_MCLK]		= &aud_mst_b_mclk.hw,
-		[AUD_CLKID_MST_C_MCLK]		= &aud_mst_c_mclk.hw,
-		[AUD_CLKID_MST_D_MCLK]		= &aud_mst_d_mclk.hw,
-		[AUD_CLKID_MST_E_MCLK]		= &aud_mst_e_mclk.hw,
-		[AUD_CLKID_MST_F_MCLK]		= &aud_mst_f_mclk.hw,
-		[AUD_CLKID_SPDIFOUT_CLK_SEL]	= &aud_spdifout_clk_sel.hw,
-		[AUD_CLKID_SPDIFOUT_CLK_DIV]	= &aud_spdifout_clk_div.hw,
-		[AUD_CLKID_SPDIFOUT_CLK]	= &aud_spdifout_clk.hw,
-		[AUD_CLKID_SPDIFIN_CLK_SEL]	= &aud_spdifin_clk_sel.hw,
-		[AUD_CLKID_SPDIFIN_CLK_DIV]	= &aud_spdifin_clk_div.hw,
-		[AUD_CLKID_SPDIFIN_CLK]		= &aud_spdifin_clk.hw,
-		[AUD_CLKID_PDM_DCLK_SEL]	= &aud_pdm_dclk_sel.hw,
-		[AUD_CLKID_PDM_DCLK_DIV]	= &aud_pdm_dclk_div.hw,
-		[AUD_CLKID_PDM_DCLK]		= &aud_pdm_dclk.hw,
-		[AUD_CLKID_PDM_SYSCLK_SEL]	= &aud_pdm_sysclk_sel.hw,
-		[AUD_CLKID_PDM_SYSCLK_DIV]	= &aud_pdm_sysclk_div.hw,
-		[AUD_CLKID_PDM_SYSCLK]		= &aud_pdm_sysclk.hw,
-		[AUD_CLKID_MST_A_SCLK_PRE_EN]	= &aud_mst_a_sclk_pre_en.hw,
-		[AUD_CLKID_MST_B_SCLK_PRE_EN]	= &aud_mst_b_sclk_pre_en.hw,
-		[AUD_CLKID_MST_C_SCLK_PRE_EN]	= &aud_mst_c_sclk_pre_en.hw,
-		[AUD_CLKID_MST_D_SCLK_PRE_EN]	= &aud_mst_d_sclk_pre_en.hw,
-		[AUD_CLKID_MST_E_SCLK_PRE_EN]	= &aud_mst_e_sclk_pre_en.hw,
-		[AUD_CLKID_MST_F_SCLK_PRE_EN]	= &aud_mst_f_sclk_pre_en.hw,
-		[AUD_CLKID_MST_A_SCLK_DIV]	= &aud_mst_a_sclk_div.hw,
-		[AUD_CLKID_MST_B_SCLK_DIV]	= &aud_mst_b_sclk_div.hw,
-		[AUD_CLKID_MST_C_SCLK_DIV]	= &aud_mst_c_sclk_div.hw,
-		[AUD_CLKID_MST_D_SCLK_DIV]	= &aud_mst_d_sclk_div.hw,
-		[AUD_CLKID_MST_E_SCLK_DIV]	= &aud_mst_e_sclk_div.hw,
-		[AUD_CLKID_MST_F_SCLK_DIV]	= &aud_mst_f_sclk_div.hw,
-		[AUD_CLKID_MST_A_SCLK_POST_EN]	= &aud_mst_a_sclk_post_en.hw,
-		[AUD_CLKID_MST_B_SCLK_POST_EN]	= &aud_mst_b_sclk_post_en.hw,
-		[AUD_CLKID_MST_C_SCLK_POST_EN]	= &aud_mst_c_sclk_post_en.hw,
-		[AUD_CLKID_MST_D_SCLK_POST_EN]	= &aud_mst_d_sclk_post_en.hw,
-		[AUD_CLKID_MST_E_SCLK_POST_EN]	= &aud_mst_e_sclk_post_en.hw,
-		[AUD_CLKID_MST_F_SCLK_POST_EN]	= &aud_mst_f_sclk_post_en.hw,
-		[AUD_CLKID_MST_A_SCLK]		= &aud_mst_a_sclk.hw,
-		[AUD_CLKID_MST_B_SCLK]		= &aud_mst_b_sclk.hw,
-		[AUD_CLKID_MST_C_SCLK]		= &aud_mst_c_sclk.hw,
-		[AUD_CLKID_MST_D_SCLK]		= &aud_mst_d_sclk.hw,
-		[AUD_CLKID_MST_E_SCLK]		= &aud_mst_e_sclk.hw,
-		[AUD_CLKID_MST_F_SCLK]		= &aud_mst_f_sclk.hw,
-		[AUD_CLKID_MST_A_LRCLK_DIV]	= &aud_mst_a_lrclk_div.hw,
-		[AUD_CLKID_MST_B_LRCLK_DIV]	= &aud_mst_b_lrclk_div.hw,
-		[AUD_CLKID_MST_C_LRCLK_DIV]	= &aud_mst_c_lrclk_div.hw,
-		[AUD_CLKID_MST_D_LRCLK_DIV]	= &aud_mst_d_lrclk_div.hw,
-		[AUD_CLKID_MST_E_LRCLK_DIV]	= &aud_mst_e_lrclk_div.hw,
-		[AUD_CLKID_MST_F_LRCLK_DIV]	= &aud_mst_f_lrclk_div.hw,
-		[AUD_CLKID_MST_A_LRCLK]		= &aud_mst_a_lrclk.hw,
-		[AUD_CLKID_MST_B_LRCLK]		= &aud_mst_b_lrclk.hw,
-		[AUD_CLKID_MST_C_LRCLK]		= &aud_mst_c_lrclk.hw,
-		[AUD_CLKID_MST_D_LRCLK]		= &aud_mst_d_lrclk.hw,
-		[AUD_CLKID_MST_E_LRCLK]		= &aud_mst_e_lrclk.hw,
-		[AUD_CLKID_MST_F_LRCLK]		= &aud_mst_f_lrclk.hw,
-		[AUD_CLKID_TDMIN_A_SCLK_SEL]	= &aud_tdmin_a_sclk_sel.hw,
-		[AUD_CLKID_TDMIN_B_SCLK_SEL]	= &aud_tdmin_b_sclk_sel.hw,
-		[AUD_CLKID_TDMIN_C_SCLK_SEL]	= &aud_tdmin_c_sclk_sel.hw,
-		[AUD_CLKID_TDMIN_LB_SCLK_SEL]	= &aud_tdmin_lb_sclk_sel.hw,
-		[AUD_CLKID_TDMOUT_A_SCLK_SEL]	= &aud_tdmout_a_sclk_sel.hw,
-		[AUD_CLKID_TDMOUT_B_SCLK_SEL]	= &aud_tdmout_b_sclk_sel.hw,
-		[AUD_CLKID_TDMOUT_C_SCLK_SEL]	= &aud_tdmout_c_sclk_sel.hw,
-		[AUD_CLKID_TDMIN_A_SCLK_PRE_EN]	= &aud_tdmin_a_sclk_pre_en.hw,
-		[AUD_CLKID_TDMIN_B_SCLK_PRE_EN]	= &aud_tdmin_b_sclk_pre_en.hw,
-		[AUD_CLKID_TDMIN_C_SCLK_PRE_EN]	= &aud_tdmin_c_sclk_pre_en.hw,
-		[AUD_CLKID_TDMIN_LB_SCLK_PRE_EN] = &aud_tdmin_lb_sclk_pre_en.hw,
-		[AUD_CLKID_TDMOUT_A_SCLK_PRE_EN] = &aud_tdmout_a_sclk_pre_en.hw,
-		[AUD_CLKID_TDMOUT_B_SCLK_PRE_EN] = &aud_tdmout_b_sclk_pre_en.hw,
-		[AUD_CLKID_TDMOUT_C_SCLK_PRE_EN] = &aud_tdmout_c_sclk_pre_en.hw,
-		[AUD_CLKID_TDMIN_A_SCLK_POST_EN] = &aud_tdmin_a_sclk_post_en.hw,
-		[AUD_CLKID_TDMIN_B_SCLK_POST_EN] = &aud_tdmin_b_sclk_post_en.hw,
-		[AUD_CLKID_TDMIN_C_SCLK_POST_EN] = &aud_tdmin_c_sclk_post_en.hw,
-		[AUD_CLKID_TDMIN_LB_SCLK_POST_EN] = &aud_tdmin_lb_sclk_post_en.hw,
-		[AUD_CLKID_TDMOUT_A_SCLK_POST_EN] = &aud_tdmout_a_sclk_post_en.hw,
-		[AUD_CLKID_TDMOUT_B_SCLK_POST_EN] = &aud_tdmout_b_sclk_post_en.hw,
-		[AUD_CLKID_TDMOUT_C_SCLK_POST_EN] = &aud_tdmout_c_sclk_post_en.hw,
-		[AUD_CLKID_TDMIN_A_SCLK]	= &aud_tdmin_a_sclk.hw,
-		[AUD_CLKID_TDMIN_B_SCLK]	= &aud_tdmin_b_sclk.hw,
-		[AUD_CLKID_TDMIN_C_SCLK]	= &aud_tdmin_c_sclk.hw,
-		[AUD_CLKID_TDMIN_LB_SCLK]	= &aud_tdmin_lb_sclk.hw,
-		[AUD_CLKID_TDMOUT_A_SCLK]	= &aud_tdmout_a_sclk.hw,
-		[AUD_CLKID_TDMOUT_B_SCLK]	= &aud_tdmout_b_sclk.hw,
-		[AUD_CLKID_TDMOUT_C_SCLK]	= &aud_tdmout_c_sclk.hw,
-		[AUD_CLKID_TDMIN_A_LRCLK]	= &aud_tdmin_a_lrclk.hw,
-		[AUD_CLKID_TDMIN_B_LRCLK]	= &aud_tdmin_b_lrclk.hw,
-		[AUD_CLKID_TDMIN_C_LRCLK]	= &aud_tdmin_c_lrclk.hw,
-		[AUD_CLKID_TDMIN_LB_LRCLK]	= &aud_tdmin_lb_lrclk.hw,
-		[AUD_CLKID_TDMOUT_A_LRCLK]	= &aud_tdmout_a_lrclk.hw,
-		[AUD_CLKID_TDMOUT_B_LRCLK]	= &aud_tdmout_b_lrclk.hw,
-		[AUD_CLKID_TDMOUT_C_LRCLK]	= &aud_tdmout_c_lrclk.hw,
+		[AUD_CLKID_DDR_ARB]		= &ddr_arb.hw,
+		[AUD_CLKID_PDM]			= &pdm.hw,
+		[AUD_CLKID_TDMIN_A]		= &tdmin_a.hw,
+		[AUD_CLKID_TDMIN_B]		= &tdmin_b.hw,
+		[AUD_CLKID_TDMIN_C]		= &tdmin_c.hw,
+		[AUD_CLKID_TDMIN_LB]		= &tdmin_lb.hw,
+		[AUD_CLKID_TDMOUT_A]		= &tdmout_a.hw,
+		[AUD_CLKID_TDMOUT_B]		= &tdmout_b.hw,
+		[AUD_CLKID_TDMOUT_C]		= &tdmout_c.hw,
+		[AUD_CLKID_FRDDR_A]		= &frddr_a.hw,
+		[AUD_CLKID_FRDDR_B]		= &frddr_b.hw,
+		[AUD_CLKID_FRDDR_C]		= &frddr_c.hw,
+		[AUD_CLKID_TODDR_A]		= &toddr_a.hw,
+		[AUD_CLKID_TODDR_B]		= &toddr_b.hw,
+		[AUD_CLKID_TODDR_C]		= &toddr_c.hw,
+		[AUD_CLKID_LOOPBACK]		= &loopback.hw,
+		[AUD_CLKID_SPDIFIN]		= &spdifin.hw,
+		[AUD_CLKID_SPDIFOUT]		= &spdifout.hw,
+		[AUD_CLKID_RESAMPLE]		= &resample.hw,
+		[AUD_CLKID_POWER_DETECT]	= &power_detect.hw,
+		[AUD_CLKID_MST_A_MCLK_SEL]	= &mst_a_mclk_sel.hw,
+		[AUD_CLKID_MST_B_MCLK_SEL]	= &mst_b_mclk_sel.hw,
+		[AUD_CLKID_MST_C_MCLK_SEL]	= &mst_c_mclk_sel.hw,
+		[AUD_CLKID_MST_D_MCLK_SEL]	= &mst_d_mclk_sel.hw,
+		[AUD_CLKID_MST_E_MCLK_SEL]	= &mst_e_mclk_sel.hw,
+		[AUD_CLKID_MST_F_MCLK_SEL]	= &mst_f_mclk_sel.hw,
+		[AUD_CLKID_MST_A_MCLK_DIV]	= &mst_a_mclk_div.hw,
+		[AUD_CLKID_MST_B_MCLK_DIV]	= &mst_b_mclk_div.hw,
+		[AUD_CLKID_MST_C_MCLK_DIV]	= &mst_c_mclk_div.hw,
+		[AUD_CLKID_MST_D_MCLK_DIV]	= &mst_d_mclk_div.hw,
+		[AUD_CLKID_MST_E_MCLK_DIV]	= &mst_e_mclk_div.hw,
+		[AUD_CLKID_MST_F_MCLK_DIV]	= &mst_f_mclk_div.hw,
+		[AUD_CLKID_MST_A_MCLK]		= &mst_a_mclk.hw,
+		[AUD_CLKID_MST_B_MCLK]		= &mst_b_mclk.hw,
+		[AUD_CLKID_MST_C_MCLK]		= &mst_c_mclk.hw,
+		[AUD_CLKID_MST_D_MCLK]		= &mst_d_mclk.hw,
+		[AUD_CLKID_MST_E_MCLK]		= &mst_e_mclk.hw,
+		[AUD_CLKID_MST_F_MCLK]		= &mst_f_mclk.hw,
+		[AUD_CLKID_SPDIFOUT_CLK_SEL]	= &spdifout_clk_sel.hw,
+		[AUD_CLKID_SPDIFOUT_CLK_DIV]	= &spdifout_clk_div.hw,
+		[AUD_CLKID_SPDIFOUT_CLK]	= &spdifout_clk.hw,
+		[AUD_CLKID_SPDIFIN_CLK_SEL]	= &spdifin_clk_sel.hw,
+		[AUD_CLKID_SPDIFIN_CLK_DIV]	= &spdifin_clk_div.hw,
+		[AUD_CLKID_SPDIFIN_CLK]		= &spdifin_clk.hw,
+		[AUD_CLKID_PDM_DCLK_SEL]	= &pdm_dclk_sel.hw,
+		[AUD_CLKID_PDM_DCLK_DIV]	= &pdm_dclk_div.hw,
+		[AUD_CLKID_PDM_DCLK]		= &pdm_dclk.hw,
+		[AUD_CLKID_PDM_SYSCLK_SEL]	= &pdm_sysclk_sel.hw,
+		[AUD_CLKID_PDM_SYSCLK_DIV]	= &pdm_sysclk_div.hw,
+		[AUD_CLKID_PDM_SYSCLK]		= &pdm_sysclk.hw,
+		[AUD_CLKID_MST_A_SCLK_PRE_EN]	= &mst_a_sclk_pre_en.hw,
+		[AUD_CLKID_MST_B_SCLK_PRE_EN]	= &mst_b_sclk_pre_en.hw,
+		[AUD_CLKID_MST_C_SCLK_PRE_EN]	= &mst_c_sclk_pre_en.hw,
+		[AUD_CLKID_MST_D_SCLK_PRE_EN]	= &mst_d_sclk_pre_en.hw,
+		[AUD_CLKID_MST_E_SCLK_PRE_EN]	= &mst_e_sclk_pre_en.hw,
+		[AUD_CLKID_MST_F_SCLK_PRE_EN]	= &mst_f_sclk_pre_en.hw,
+		[AUD_CLKID_MST_A_SCLK_DIV]	= &mst_a_sclk_div.hw,
+		[AUD_CLKID_MST_B_SCLK_DIV]	= &mst_b_sclk_div.hw,
+		[AUD_CLKID_MST_C_SCLK_DIV]	= &mst_c_sclk_div.hw,
+		[AUD_CLKID_MST_D_SCLK_DIV]	= &mst_d_sclk_div.hw,
+		[AUD_CLKID_MST_E_SCLK_DIV]	= &mst_e_sclk_div.hw,
+		[AUD_CLKID_MST_F_SCLK_DIV]	= &mst_f_sclk_div.hw,
+		[AUD_CLKID_MST_A_SCLK_POST_EN]	= &mst_a_sclk_post_en.hw,
+		[AUD_CLKID_MST_B_SCLK_POST_EN]	= &mst_b_sclk_post_en.hw,
+		[AUD_CLKID_MST_C_SCLK_POST_EN]	= &mst_c_sclk_post_en.hw,
+		[AUD_CLKID_MST_D_SCLK_POST_EN]	= &mst_d_sclk_post_en.hw,
+		[AUD_CLKID_MST_E_SCLK_POST_EN]	= &mst_e_sclk_post_en.hw,
+		[AUD_CLKID_MST_F_SCLK_POST_EN]	= &mst_f_sclk_post_en.hw,
+		[AUD_CLKID_MST_A_SCLK]		= &mst_a_sclk.hw,
+		[AUD_CLKID_MST_B_SCLK]		= &mst_b_sclk.hw,
+		[AUD_CLKID_MST_C_SCLK]		= &mst_c_sclk.hw,
+		[AUD_CLKID_MST_D_SCLK]		= &mst_d_sclk.hw,
+		[AUD_CLKID_MST_E_SCLK]		= &mst_e_sclk.hw,
+		[AUD_CLKID_MST_F_SCLK]		= &mst_f_sclk.hw,
+		[AUD_CLKID_MST_A_LRCLK_DIV]	= &mst_a_lrclk_div.hw,
+		[AUD_CLKID_MST_B_LRCLK_DIV]	= &mst_b_lrclk_div.hw,
+		[AUD_CLKID_MST_C_LRCLK_DIV]	= &mst_c_lrclk_div.hw,
+		[AUD_CLKID_MST_D_LRCLK_DIV]	= &mst_d_lrclk_div.hw,
+		[AUD_CLKID_MST_E_LRCLK_DIV]	= &mst_e_lrclk_div.hw,
+		[AUD_CLKID_MST_F_LRCLK_DIV]	= &mst_f_lrclk_div.hw,
+		[AUD_CLKID_MST_A_LRCLK]		= &mst_a_lrclk.hw,
+		[AUD_CLKID_MST_B_LRCLK]		= &mst_b_lrclk.hw,
+		[AUD_CLKID_MST_C_LRCLK]		= &mst_c_lrclk.hw,
+		[AUD_CLKID_MST_D_LRCLK]		= &mst_d_lrclk.hw,
+		[AUD_CLKID_MST_E_LRCLK]		= &mst_e_lrclk.hw,
+		[AUD_CLKID_MST_F_LRCLK]		= &mst_f_lrclk.hw,
+		[AUD_CLKID_TDMIN_A_SCLK_SEL]	= &tdmin_a_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_B_SCLK_SEL]	= &tdmin_b_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_C_SCLK_SEL]	= &tdmin_c_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK_SEL]	= &tdmin_lb_sclk_sel.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK_SEL]	= &tdmout_a_sclk_sel.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK_SEL]	= &tdmout_b_sclk_sel.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK_SEL]	= &tdmout_c_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_A_SCLK_PRE_EN]	= &tdmin_a_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_B_SCLK_PRE_EN]	= &tdmin_b_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_C_SCLK_PRE_EN]	= &tdmin_c_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK_PRE_EN] = &tdmin_lb_sclk_pre_en.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK_PRE_EN] = &tdmout_a_sclk_pre_en.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK_PRE_EN] = &tdmout_b_sclk_pre_en.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK_PRE_EN] = &tdmout_c_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_A_SCLK_POST_EN] = &tdmin_a_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_B_SCLK_POST_EN] = &tdmin_b_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_C_SCLK_POST_EN] = &tdmin_c_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK_POST_EN] = &tdmin_lb_sclk_post_en.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK_POST_EN] = &tdmout_a_sclk_post_en.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK_POST_EN] = &tdmout_b_sclk_post_en.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK_POST_EN] = &tdmout_c_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_A_SCLK]	= &tdmin_a_sclk.hw,
+		[AUD_CLKID_TDMIN_B_SCLK]	= &tdmin_b_sclk.hw,
+		[AUD_CLKID_TDMIN_C_SCLK]	= &tdmin_c_sclk.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK]	= &tdmin_lb_sclk.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK]	= &tdmout_a_sclk.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK]	= &tdmout_b_sclk.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK]	= &tdmout_c_sclk.hw,
+		[AUD_CLKID_TDMIN_A_LRCLK]	= &tdmin_a_lrclk.hw,
+		[AUD_CLKID_TDMIN_B_LRCLK]	= &tdmin_b_lrclk.hw,
+		[AUD_CLKID_TDMIN_C_LRCLK]	= &tdmin_c_lrclk.hw,
+		[AUD_CLKID_TDMIN_LB_LRCLK]	= &tdmin_lb_lrclk.hw,
+		[AUD_CLKID_TDMOUT_A_LRCLK]	= &tdmout_a_lrclk.hw,
+		[AUD_CLKID_TDMOUT_B_LRCLK]	= &tdmout_b_lrclk.hw,
+		[AUD_CLKID_TDMOUT_C_LRCLK]	= &tdmout_c_lrclk.hw,
+		[AUD_CLKID_TOP]			= &axg_aud_top,
 		[NR_CLKS] = NULL,
 	},
 	.num = NR_CLKS,
@@ -609,284 +911,596 @@ static struct clk_hw_onecell_data axg_audio_hw_onecell_data = {
  */
 static struct clk_hw_onecell_data g12a_audio_hw_onecell_data = {
 	.hws = {
-		[AUD_CLKID_DDR_ARB]		= &aud_ddr_arb.hw,
-		[AUD_CLKID_PDM]			= &aud_pdm.hw,
-		[AUD_CLKID_TDMIN_A]		= &aud_tdmin_a.hw,
-		[AUD_CLKID_TDMIN_B]		= &aud_tdmin_b.hw,
-		[AUD_CLKID_TDMIN_C]		= &aud_tdmin_c.hw,
-		[AUD_CLKID_TDMIN_LB]		= &aud_tdmin_lb.hw,
-		[AUD_CLKID_TDMOUT_A]		= &aud_tdmout_a.hw,
-		[AUD_CLKID_TDMOUT_B]		= &aud_tdmout_b.hw,
-		[AUD_CLKID_TDMOUT_C]		= &aud_tdmout_c.hw,
-		[AUD_CLKID_FRDDR_A]		= &aud_frddr_a.hw,
-		[AUD_CLKID_FRDDR_B]		= &aud_frddr_b.hw,
-		[AUD_CLKID_FRDDR_C]		= &aud_frddr_c.hw,
-		[AUD_CLKID_TODDR_A]		= &aud_toddr_a.hw,
-		[AUD_CLKID_TODDR_B]		= &aud_toddr_b.hw,
-		[AUD_CLKID_TODDR_C]		= &aud_toddr_c.hw,
-		[AUD_CLKID_LOOPBACK]		= &aud_loopback.hw,
-		[AUD_CLKID_SPDIFIN]		= &aud_spdifin.hw,
-		[AUD_CLKID_SPDIFOUT]		= &aud_spdifout.hw,
-		[AUD_CLKID_RESAMPLE]		= &aud_resample.hw,
-		[AUD_CLKID_POWER_DETECT]	= &aud_power_detect.hw,
-		[AUD_CLKID_SPDIFOUT_B]		= &aud_spdifout_b.hw,
-		[AUD_CLKID_MST_A_MCLK_SEL]	= &aud_mst_a_mclk_sel.hw,
-		[AUD_CLKID_MST_B_MCLK_SEL]	= &aud_mst_b_mclk_sel.hw,
-		[AUD_CLKID_MST_C_MCLK_SEL]	= &aud_mst_c_mclk_sel.hw,
-		[AUD_CLKID_MST_D_MCLK_SEL]	= &aud_mst_d_mclk_sel.hw,
-		[AUD_CLKID_MST_E_MCLK_SEL]	= &aud_mst_e_mclk_sel.hw,
-		[AUD_CLKID_MST_F_MCLK_SEL]	= &aud_mst_f_mclk_sel.hw,
-		[AUD_CLKID_MST_A_MCLK_DIV]	= &aud_mst_a_mclk_div.hw,
-		[AUD_CLKID_MST_B_MCLK_DIV]	= &aud_mst_b_mclk_div.hw,
-		[AUD_CLKID_MST_C_MCLK_DIV]	= &aud_mst_c_mclk_div.hw,
-		[AUD_CLKID_MST_D_MCLK_DIV]	= &aud_mst_d_mclk_div.hw,
-		[AUD_CLKID_MST_E_MCLK_DIV]	= &aud_mst_e_mclk_div.hw,
-		[AUD_CLKID_MST_F_MCLK_DIV]	= &aud_mst_f_mclk_div.hw,
-		[AUD_CLKID_MST_A_MCLK]		= &aud_mst_a_mclk.hw,
-		[AUD_CLKID_MST_B_MCLK]		= &aud_mst_b_mclk.hw,
-		[AUD_CLKID_MST_C_MCLK]		= &aud_mst_c_mclk.hw,
-		[AUD_CLKID_MST_D_MCLK]		= &aud_mst_d_mclk.hw,
-		[AUD_CLKID_MST_E_MCLK]		= &aud_mst_e_mclk.hw,
-		[AUD_CLKID_MST_F_MCLK]		= &aud_mst_f_mclk.hw,
-		[AUD_CLKID_SPDIFOUT_CLK_SEL]	= &aud_spdifout_clk_sel.hw,
-		[AUD_CLKID_SPDIFOUT_CLK_DIV]	= &aud_spdifout_clk_div.hw,
-		[AUD_CLKID_SPDIFOUT_CLK]	= &aud_spdifout_clk.hw,
-		[AUD_CLKID_SPDIFOUT_B_CLK_SEL]	= &aud_spdifout_b_clk_sel.hw,
-		[AUD_CLKID_SPDIFOUT_B_CLK_DIV]	= &aud_spdifout_b_clk_div.hw,
-		[AUD_CLKID_SPDIFOUT_B_CLK]	= &aud_spdifout_b_clk.hw,
-		[AUD_CLKID_SPDIFIN_CLK_SEL]	= &aud_spdifin_clk_sel.hw,
-		[AUD_CLKID_SPDIFIN_CLK_DIV]	= &aud_spdifin_clk_div.hw,
-		[AUD_CLKID_SPDIFIN_CLK]		= &aud_spdifin_clk.hw,
-		[AUD_CLKID_PDM_DCLK_SEL]	= &aud_pdm_dclk_sel.hw,
-		[AUD_CLKID_PDM_DCLK_DIV]	= &aud_pdm_dclk_div.hw,
-		[AUD_CLKID_PDM_DCLK]		= &aud_pdm_dclk.hw,
-		[AUD_CLKID_PDM_SYSCLK_SEL]	= &aud_pdm_sysclk_sel.hw,
-		[AUD_CLKID_PDM_SYSCLK_DIV]	= &aud_pdm_sysclk_div.hw,
-		[AUD_CLKID_PDM_SYSCLK]		= &aud_pdm_sysclk.hw,
-		[AUD_CLKID_MST_A_SCLK_PRE_EN]	= &aud_mst_a_sclk_pre_en.hw,
-		[AUD_CLKID_MST_B_SCLK_PRE_EN]	= &aud_mst_b_sclk_pre_en.hw,
-		[AUD_CLKID_MST_C_SCLK_PRE_EN]	= &aud_mst_c_sclk_pre_en.hw,
-		[AUD_CLKID_MST_D_SCLK_PRE_EN]	= &aud_mst_d_sclk_pre_en.hw,
-		[AUD_CLKID_MST_E_SCLK_PRE_EN]	= &aud_mst_e_sclk_pre_en.hw,
-		[AUD_CLKID_MST_F_SCLK_PRE_EN]	= &aud_mst_f_sclk_pre_en.hw,
-		[AUD_CLKID_MST_A_SCLK_DIV]	= &aud_mst_a_sclk_div.hw,
-		[AUD_CLKID_MST_B_SCLK_DIV]	= &aud_mst_b_sclk_div.hw,
-		[AUD_CLKID_MST_C_SCLK_DIV]	= &aud_mst_c_sclk_div.hw,
-		[AUD_CLKID_MST_D_SCLK_DIV]	= &aud_mst_d_sclk_div.hw,
-		[AUD_CLKID_MST_E_SCLK_DIV]	= &aud_mst_e_sclk_div.hw,
-		[AUD_CLKID_MST_F_SCLK_DIV]	= &aud_mst_f_sclk_div.hw,
-		[AUD_CLKID_MST_A_SCLK_POST_EN]	= &aud_mst_a_sclk_post_en.hw,
-		[AUD_CLKID_MST_B_SCLK_POST_EN]	= &aud_mst_b_sclk_post_en.hw,
-		[AUD_CLKID_MST_C_SCLK_POST_EN]	= &aud_mst_c_sclk_post_en.hw,
-		[AUD_CLKID_MST_D_SCLK_POST_EN]	= &aud_mst_d_sclk_post_en.hw,
-		[AUD_CLKID_MST_E_SCLK_POST_EN]	= &aud_mst_e_sclk_post_en.hw,
-		[AUD_CLKID_MST_F_SCLK_POST_EN]	= &aud_mst_f_sclk_post_en.hw,
-		[AUD_CLKID_MST_A_SCLK]		= &aud_mst_a_sclk.hw,
-		[AUD_CLKID_MST_B_SCLK]		= &aud_mst_b_sclk.hw,
-		[AUD_CLKID_MST_C_SCLK]		= &aud_mst_c_sclk.hw,
-		[AUD_CLKID_MST_D_SCLK]		= &aud_mst_d_sclk.hw,
-		[AUD_CLKID_MST_E_SCLK]		= &aud_mst_e_sclk.hw,
-		[AUD_CLKID_MST_F_SCLK]		= &aud_mst_f_sclk.hw,
-		[AUD_CLKID_MST_A_LRCLK_DIV]	= &aud_mst_a_lrclk_div.hw,
-		[AUD_CLKID_MST_B_LRCLK_DIV]	= &aud_mst_b_lrclk_div.hw,
-		[AUD_CLKID_MST_C_LRCLK_DIV]	= &aud_mst_c_lrclk_div.hw,
-		[AUD_CLKID_MST_D_LRCLK_DIV]	= &aud_mst_d_lrclk_div.hw,
-		[AUD_CLKID_MST_E_LRCLK_DIV]	= &aud_mst_e_lrclk_div.hw,
-		[AUD_CLKID_MST_F_LRCLK_DIV]	= &aud_mst_f_lrclk_div.hw,
-		[AUD_CLKID_MST_A_LRCLK]		= &aud_mst_a_lrclk.hw,
-		[AUD_CLKID_MST_B_LRCLK]		= &aud_mst_b_lrclk.hw,
-		[AUD_CLKID_MST_C_LRCLK]		= &aud_mst_c_lrclk.hw,
-		[AUD_CLKID_MST_D_LRCLK]		= &aud_mst_d_lrclk.hw,
-		[AUD_CLKID_MST_E_LRCLK]		= &aud_mst_e_lrclk.hw,
-		[AUD_CLKID_MST_F_LRCLK]		= &aud_mst_f_lrclk.hw,
-		[AUD_CLKID_TDMIN_A_SCLK_SEL]	= &aud_tdmin_a_sclk_sel.hw,
-		[AUD_CLKID_TDMIN_B_SCLK_SEL]	= &aud_tdmin_b_sclk_sel.hw,
-		[AUD_CLKID_TDMIN_C_SCLK_SEL]	= &aud_tdmin_c_sclk_sel.hw,
-		[AUD_CLKID_TDMIN_LB_SCLK_SEL]	= &aud_tdmin_lb_sclk_sel.hw,
-		[AUD_CLKID_TDMOUT_A_SCLK_SEL]	= &aud_tdmout_a_sclk_sel.hw,
-		[AUD_CLKID_TDMOUT_B_SCLK_SEL]	= &aud_tdmout_b_sclk_sel.hw,
-		[AUD_CLKID_TDMOUT_C_SCLK_SEL]	= &aud_tdmout_c_sclk_sel.hw,
-		[AUD_CLKID_TDMIN_A_SCLK_PRE_EN]	= &aud_tdmin_a_sclk_pre_en.hw,
-		[AUD_CLKID_TDMIN_B_SCLK_PRE_EN]	= &aud_tdmin_b_sclk_pre_en.hw,
-		[AUD_CLKID_TDMIN_C_SCLK_PRE_EN]	= &aud_tdmin_c_sclk_pre_en.hw,
-		[AUD_CLKID_TDMIN_LB_SCLK_PRE_EN] = &aud_tdmin_lb_sclk_pre_en.hw,
-		[AUD_CLKID_TDMOUT_A_SCLK_PRE_EN] = &aud_tdmout_a_sclk_pre_en.hw,
-		[AUD_CLKID_TDMOUT_B_SCLK_PRE_EN] = &aud_tdmout_b_sclk_pre_en.hw,
-		[AUD_CLKID_TDMOUT_C_SCLK_PRE_EN] = &aud_tdmout_c_sclk_pre_en.hw,
-		[AUD_CLKID_TDMIN_A_SCLK_POST_EN] = &aud_tdmin_a_sclk_post_en.hw,
-		[AUD_CLKID_TDMIN_B_SCLK_POST_EN] = &aud_tdmin_b_sclk_post_en.hw,
-		[AUD_CLKID_TDMIN_C_SCLK_POST_EN] = &aud_tdmin_c_sclk_post_en.hw,
-		[AUD_CLKID_TDMIN_LB_SCLK_POST_EN] = &aud_tdmin_lb_sclk_post_en.hw,
-		[AUD_CLKID_TDMOUT_A_SCLK_POST_EN] = &aud_tdmout_a_sclk_post_en.hw,
-		[AUD_CLKID_TDMOUT_B_SCLK_POST_EN] = &aud_tdmout_b_sclk_post_en.hw,
-		[AUD_CLKID_TDMOUT_C_SCLK_POST_EN] = &aud_tdmout_c_sclk_post_en.hw,
-		[AUD_CLKID_TDMIN_A_SCLK]	= &aud_tdmin_a_sclk.hw,
-		[AUD_CLKID_TDMIN_B_SCLK]	= &aud_tdmin_b_sclk.hw,
-		[AUD_CLKID_TDMIN_C_SCLK]	= &aud_tdmin_c_sclk.hw,
-		[AUD_CLKID_TDMIN_LB_SCLK]	= &aud_tdmin_lb_sclk.hw,
-		[AUD_CLKID_TDMOUT_A_SCLK]	= &aud_tdmout_a_sclk.hw,
-		[AUD_CLKID_TDMOUT_B_SCLK]	= &aud_tdmout_b_sclk.hw,
-		[AUD_CLKID_TDMOUT_C_SCLK]	= &aud_tdmout_c_sclk.hw,
-		[AUD_CLKID_TDMIN_A_LRCLK]	= &aud_tdmin_a_lrclk.hw,
-		[AUD_CLKID_TDMIN_B_LRCLK]	= &aud_tdmin_b_lrclk.hw,
-		[AUD_CLKID_TDMIN_C_LRCLK]	= &aud_tdmin_c_lrclk.hw,
-		[AUD_CLKID_TDMIN_LB_LRCLK]	= &aud_tdmin_lb_lrclk.hw,
-		[AUD_CLKID_TDMOUT_A_LRCLK]	= &aud_tdmout_a_lrclk.hw,
-		[AUD_CLKID_TDMOUT_B_LRCLK]	= &aud_tdmout_b_lrclk.hw,
-		[AUD_CLKID_TDMOUT_C_LRCLK]	= &aud_tdmout_c_lrclk.hw,
-		[AUD_CLKID_TDM_MCLK_PAD0]	= &aud_tdm_mclk_pad_0.hw,
-		[AUD_CLKID_TDM_MCLK_PAD1]	= &aud_tdm_mclk_pad_1.hw,
-		[AUD_CLKID_TDM_LRCLK_PAD0]	= &aud_tdm_lrclk_pad_0.hw,
-		[AUD_CLKID_TDM_LRCLK_PAD1]	= &aud_tdm_lrclk_pad_1.hw,
-		[AUD_CLKID_TDM_LRCLK_PAD2]	= &aud_tdm_lrclk_pad_2.hw,
-		[AUD_CLKID_TDM_SCLK_PAD0]	= &aud_tdm_sclk_pad_0.hw,
-		[AUD_CLKID_TDM_SCLK_PAD1]	= &aud_tdm_sclk_pad_1.hw,
-		[AUD_CLKID_TDM_SCLK_PAD2]	= &aud_tdm_sclk_pad_2.hw,
+		[AUD_CLKID_DDR_ARB]		= &ddr_arb.hw,
+		[AUD_CLKID_PDM]			= &pdm.hw,
+		[AUD_CLKID_TDMIN_A]		= &tdmin_a.hw,
+		[AUD_CLKID_TDMIN_B]		= &tdmin_b.hw,
+		[AUD_CLKID_TDMIN_C]		= &tdmin_c.hw,
+		[AUD_CLKID_TDMIN_LB]		= &tdmin_lb.hw,
+		[AUD_CLKID_TDMOUT_A]		= &tdmout_a.hw,
+		[AUD_CLKID_TDMOUT_B]		= &tdmout_b.hw,
+		[AUD_CLKID_TDMOUT_C]		= &tdmout_c.hw,
+		[AUD_CLKID_FRDDR_A]		= &frddr_a.hw,
+		[AUD_CLKID_FRDDR_B]		= &frddr_b.hw,
+		[AUD_CLKID_FRDDR_C]		= &frddr_c.hw,
+		[AUD_CLKID_TODDR_A]		= &toddr_a.hw,
+		[AUD_CLKID_TODDR_B]		= &toddr_b.hw,
+		[AUD_CLKID_TODDR_C]		= &toddr_c.hw,
+		[AUD_CLKID_LOOPBACK]		= &loopback.hw,
+		[AUD_CLKID_SPDIFIN]		= &spdifin.hw,
+		[AUD_CLKID_SPDIFOUT]		= &spdifout.hw,
+		[AUD_CLKID_RESAMPLE]		= &resample.hw,
+		[AUD_CLKID_POWER_DETECT]	= &power_detect.hw,
+		[AUD_CLKID_SPDIFOUT_B]		= &spdifout_b.hw,
+		[AUD_CLKID_MST_A_MCLK_SEL]	= &mst_a_mclk_sel.hw,
+		[AUD_CLKID_MST_B_MCLK_SEL]	= &mst_b_mclk_sel.hw,
+		[AUD_CLKID_MST_C_MCLK_SEL]	= &mst_c_mclk_sel.hw,
+		[AUD_CLKID_MST_D_MCLK_SEL]	= &mst_d_mclk_sel.hw,
+		[AUD_CLKID_MST_E_MCLK_SEL]	= &mst_e_mclk_sel.hw,
+		[AUD_CLKID_MST_F_MCLK_SEL]	= &mst_f_mclk_sel.hw,
+		[AUD_CLKID_MST_A_MCLK_DIV]	= &mst_a_mclk_div.hw,
+		[AUD_CLKID_MST_B_MCLK_DIV]	= &mst_b_mclk_div.hw,
+		[AUD_CLKID_MST_C_MCLK_DIV]	= &mst_c_mclk_div.hw,
+		[AUD_CLKID_MST_D_MCLK_DIV]	= &mst_d_mclk_div.hw,
+		[AUD_CLKID_MST_E_MCLK_DIV]	= &mst_e_mclk_div.hw,
+		[AUD_CLKID_MST_F_MCLK_DIV]	= &mst_f_mclk_div.hw,
+		[AUD_CLKID_MST_A_MCLK]		= &mst_a_mclk.hw,
+		[AUD_CLKID_MST_B_MCLK]		= &mst_b_mclk.hw,
+		[AUD_CLKID_MST_C_MCLK]		= &mst_c_mclk.hw,
+		[AUD_CLKID_MST_D_MCLK]		= &mst_d_mclk.hw,
+		[AUD_CLKID_MST_E_MCLK]		= &mst_e_mclk.hw,
+		[AUD_CLKID_MST_F_MCLK]		= &mst_f_mclk.hw,
+		[AUD_CLKID_SPDIFOUT_CLK_SEL]	= &spdifout_clk_sel.hw,
+		[AUD_CLKID_SPDIFOUT_CLK_DIV]	= &spdifout_clk_div.hw,
+		[AUD_CLKID_SPDIFOUT_CLK]	= &spdifout_clk.hw,
+		[AUD_CLKID_SPDIFOUT_B_CLK_SEL]	= &spdifout_b_clk_sel.hw,
+		[AUD_CLKID_SPDIFOUT_B_CLK_DIV]	= &spdifout_b_clk_div.hw,
+		[AUD_CLKID_SPDIFOUT_B_CLK]	= &spdifout_b_clk.hw,
+		[AUD_CLKID_SPDIFIN_CLK_SEL]	= &spdifin_clk_sel.hw,
+		[AUD_CLKID_SPDIFIN_CLK_DIV]	= &spdifin_clk_div.hw,
+		[AUD_CLKID_SPDIFIN_CLK]		= &spdifin_clk.hw,
+		[AUD_CLKID_PDM_DCLK_SEL]	= &pdm_dclk_sel.hw,
+		[AUD_CLKID_PDM_DCLK_DIV]	= &pdm_dclk_div.hw,
+		[AUD_CLKID_PDM_DCLK]		= &pdm_dclk.hw,
+		[AUD_CLKID_PDM_SYSCLK_SEL]	= &pdm_sysclk_sel.hw,
+		[AUD_CLKID_PDM_SYSCLK_DIV]	= &pdm_sysclk_div.hw,
+		[AUD_CLKID_PDM_SYSCLK]		= &pdm_sysclk.hw,
+		[AUD_CLKID_MST_A_SCLK_PRE_EN]	= &mst_a_sclk_pre_en.hw,
+		[AUD_CLKID_MST_B_SCLK_PRE_EN]	= &mst_b_sclk_pre_en.hw,
+		[AUD_CLKID_MST_C_SCLK_PRE_EN]	= &mst_c_sclk_pre_en.hw,
+		[AUD_CLKID_MST_D_SCLK_PRE_EN]	= &mst_d_sclk_pre_en.hw,
+		[AUD_CLKID_MST_E_SCLK_PRE_EN]	= &mst_e_sclk_pre_en.hw,
+		[AUD_CLKID_MST_F_SCLK_PRE_EN]	= &mst_f_sclk_pre_en.hw,
+		[AUD_CLKID_MST_A_SCLK_DIV]	= &mst_a_sclk_div.hw,
+		[AUD_CLKID_MST_B_SCLK_DIV]	= &mst_b_sclk_div.hw,
+		[AUD_CLKID_MST_C_SCLK_DIV]	= &mst_c_sclk_div.hw,
+		[AUD_CLKID_MST_D_SCLK_DIV]	= &mst_d_sclk_div.hw,
+		[AUD_CLKID_MST_E_SCLK_DIV]	= &mst_e_sclk_div.hw,
+		[AUD_CLKID_MST_F_SCLK_DIV]	= &mst_f_sclk_div.hw,
+		[AUD_CLKID_MST_A_SCLK_POST_EN]	= &mst_a_sclk_post_en.hw,
+		[AUD_CLKID_MST_B_SCLK_POST_EN]	= &mst_b_sclk_post_en.hw,
+		[AUD_CLKID_MST_C_SCLK_POST_EN]	= &mst_c_sclk_post_en.hw,
+		[AUD_CLKID_MST_D_SCLK_POST_EN]	= &mst_d_sclk_post_en.hw,
+		[AUD_CLKID_MST_E_SCLK_POST_EN]	= &mst_e_sclk_post_en.hw,
+		[AUD_CLKID_MST_F_SCLK_POST_EN]	= &mst_f_sclk_post_en.hw,
+		[AUD_CLKID_MST_A_SCLK]		= &mst_a_sclk.hw,
+		[AUD_CLKID_MST_B_SCLK]		= &mst_b_sclk.hw,
+		[AUD_CLKID_MST_C_SCLK]		= &mst_c_sclk.hw,
+		[AUD_CLKID_MST_D_SCLK]		= &mst_d_sclk.hw,
+		[AUD_CLKID_MST_E_SCLK]		= &mst_e_sclk.hw,
+		[AUD_CLKID_MST_F_SCLK]		= &mst_f_sclk.hw,
+		[AUD_CLKID_MST_A_LRCLK_DIV]	= &mst_a_lrclk_div.hw,
+		[AUD_CLKID_MST_B_LRCLK_DIV]	= &mst_b_lrclk_div.hw,
+		[AUD_CLKID_MST_C_LRCLK_DIV]	= &mst_c_lrclk_div.hw,
+		[AUD_CLKID_MST_D_LRCLK_DIV]	= &mst_d_lrclk_div.hw,
+		[AUD_CLKID_MST_E_LRCLK_DIV]	= &mst_e_lrclk_div.hw,
+		[AUD_CLKID_MST_F_LRCLK_DIV]	= &mst_f_lrclk_div.hw,
+		[AUD_CLKID_MST_A_LRCLK]		= &mst_a_lrclk.hw,
+		[AUD_CLKID_MST_B_LRCLK]		= &mst_b_lrclk.hw,
+		[AUD_CLKID_MST_C_LRCLK]		= &mst_c_lrclk.hw,
+		[AUD_CLKID_MST_D_LRCLK]		= &mst_d_lrclk.hw,
+		[AUD_CLKID_MST_E_LRCLK]		= &mst_e_lrclk.hw,
+		[AUD_CLKID_MST_F_LRCLK]		= &mst_f_lrclk.hw,
+		[AUD_CLKID_TDMIN_A_SCLK_SEL]	= &tdmin_a_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_B_SCLK_SEL]	= &tdmin_b_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_C_SCLK_SEL]	= &tdmin_c_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK_SEL]	= &tdmin_lb_sclk_sel.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK_SEL]	= &tdmout_a_sclk_sel.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK_SEL]	= &tdmout_b_sclk_sel.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK_SEL]	= &tdmout_c_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_A_SCLK_PRE_EN]	= &tdmin_a_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_B_SCLK_PRE_EN]	= &tdmin_b_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_C_SCLK_PRE_EN]	= &tdmin_c_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK_PRE_EN] = &tdmin_lb_sclk_pre_en.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK_PRE_EN] = &tdmout_a_sclk_pre_en.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK_PRE_EN] = &tdmout_b_sclk_pre_en.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK_PRE_EN] = &tdmout_c_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_A_SCLK_POST_EN] = &tdmin_a_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_B_SCLK_POST_EN] = &tdmin_b_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_C_SCLK_POST_EN] = &tdmin_c_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK_POST_EN] = &tdmin_lb_sclk_post_en.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK_POST_EN] = &tdmout_a_sclk_post_en.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK_POST_EN] = &tdmout_b_sclk_post_en.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK_POST_EN] = &tdmout_c_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_A_SCLK]	= &tdmin_a_sclk.hw,
+		[AUD_CLKID_TDMIN_B_SCLK]	= &tdmin_b_sclk.hw,
+		[AUD_CLKID_TDMIN_C_SCLK]	= &tdmin_c_sclk.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK]	= &tdmin_lb_sclk.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK]	= &tdmout_a_sclk.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK]	= &tdmout_b_sclk.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK]	= &tdmout_c_sclk.hw,
+		[AUD_CLKID_TDMIN_A_LRCLK]	= &tdmin_a_lrclk.hw,
+		[AUD_CLKID_TDMIN_B_LRCLK]	= &tdmin_b_lrclk.hw,
+		[AUD_CLKID_TDMIN_C_LRCLK]	= &tdmin_c_lrclk.hw,
+		[AUD_CLKID_TDMIN_LB_LRCLK]	= &tdmin_lb_lrclk.hw,
+		[AUD_CLKID_TDMOUT_A_LRCLK]	= &tdmout_a_lrclk.hw,
+		[AUD_CLKID_TDMOUT_B_LRCLK]	= &tdmout_b_lrclk.hw,
+		[AUD_CLKID_TDMOUT_C_LRCLK]	= &tdmout_c_lrclk.hw,
+		[AUD_CLKID_TDM_MCLK_PAD0]	= &g12a_tdm_mclk_pad_0.hw,
+		[AUD_CLKID_TDM_MCLK_PAD1]	= &g12a_tdm_mclk_pad_1.hw,
+		[AUD_CLKID_TDM_LRCLK_PAD0]	= &g12a_tdm_lrclk_pad_0.hw,
+		[AUD_CLKID_TDM_LRCLK_PAD1]	= &g12a_tdm_lrclk_pad_1.hw,
+		[AUD_CLKID_TDM_LRCLK_PAD2]	= &g12a_tdm_lrclk_pad_2.hw,
+		[AUD_CLKID_TDM_SCLK_PAD0]	= &g12a_tdm_sclk_pad_0.hw,
+		[AUD_CLKID_TDM_SCLK_PAD1]	= &g12a_tdm_sclk_pad_1.hw,
+		[AUD_CLKID_TDM_SCLK_PAD2]	= &g12a_tdm_sclk_pad_2.hw,
+		[AUD_CLKID_TOP]			= &axg_aud_top,
 		[NR_CLKS] = NULL,
 	},
 	.num = NR_CLKS,
 };
 
+/*
+ * Array of all SM1 clocks provided by this provider
+ * The input clocks of the controller will be populated at runtime
+ */
+static struct clk_hw_onecell_data sm1_audio_hw_onecell_data = {
+	.hws = {
+		[AUD_CLKID_DDR_ARB]		= &ddr_arb.hw,
+		[AUD_CLKID_PDM]			= &pdm.hw,
+		[AUD_CLKID_TDMIN_A]		= &tdmin_a.hw,
+		[AUD_CLKID_TDMIN_B]		= &tdmin_b.hw,
+		[AUD_CLKID_TDMIN_C]		= &tdmin_c.hw,
+		[AUD_CLKID_TDMIN_LB]		= &tdmin_lb.hw,
+		[AUD_CLKID_TDMOUT_A]		= &tdmout_a.hw,
+		[AUD_CLKID_TDMOUT_B]		= &tdmout_b.hw,
+		[AUD_CLKID_TDMOUT_C]		= &tdmout_c.hw,
+		[AUD_CLKID_FRDDR_A]		= &frddr_a.hw,
+		[AUD_CLKID_FRDDR_B]		= &frddr_b.hw,
+		[AUD_CLKID_FRDDR_C]		= &frddr_c.hw,
+		[AUD_CLKID_TODDR_A]		= &toddr_a.hw,
+		[AUD_CLKID_TODDR_B]		= &toddr_b.hw,
+		[AUD_CLKID_TODDR_C]		= &toddr_c.hw,
+		[AUD_CLKID_LOOPBACK]		= &loopback.hw,
+		[AUD_CLKID_SPDIFIN]		= &spdifin.hw,
+		[AUD_CLKID_SPDIFOUT]		= &spdifout.hw,
+		[AUD_CLKID_RESAMPLE]		= &resample.hw,
+		[AUD_CLKID_SPDIFOUT_B]		= &spdifout_b.hw,
+		[AUD_CLKID_MST_A_MCLK_SEL]	= &sm1_mst_a_mclk_sel.hw,
+		[AUD_CLKID_MST_B_MCLK_SEL]	= &sm1_mst_b_mclk_sel.hw,
+		[AUD_CLKID_MST_C_MCLK_SEL]	= &sm1_mst_c_mclk_sel.hw,
+		[AUD_CLKID_MST_D_MCLK_SEL]	= &sm1_mst_d_mclk_sel.hw,
+		[AUD_CLKID_MST_E_MCLK_SEL]	= &sm1_mst_e_mclk_sel.hw,
+		[AUD_CLKID_MST_F_MCLK_SEL]	= &sm1_mst_f_mclk_sel.hw,
+		[AUD_CLKID_MST_A_MCLK_DIV]	= &sm1_mst_a_mclk_div.hw,
+		[AUD_CLKID_MST_B_MCLK_DIV]	= &sm1_mst_b_mclk_div.hw,
+		[AUD_CLKID_MST_C_MCLK_DIV]	= &sm1_mst_c_mclk_div.hw,
+		[AUD_CLKID_MST_D_MCLK_DIV]	= &sm1_mst_d_mclk_div.hw,
+		[AUD_CLKID_MST_E_MCLK_DIV]	= &sm1_mst_e_mclk_div.hw,
+		[AUD_CLKID_MST_F_MCLK_DIV]	= &sm1_mst_f_mclk_div.hw,
+		[AUD_CLKID_MST_A_MCLK]		= &sm1_mst_a_mclk.hw,
+		[AUD_CLKID_MST_B_MCLK]		= &sm1_mst_b_mclk.hw,
+		[AUD_CLKID_MST_C_MCLK]		= &sm1_mst_c_mclk.hw,
+		[AUD_CLKID_MST_D_MCLK]		= &sm1_mst_d_mclk.hw,
+		[AUD_CLKID_MST_E_MCLK]		= &sm1_mst_e_mclk.hw,
+		[AUD_CLKID_MST_F_MCLK]		= &sm1_mst_f_mclk.hw,
+		[AUD_CLKID_SPDIFOUT_CLK_SEL]	= &spdifout_clk_sel.hw,
+		[AUD_CLKID_SPDIFOUT_CLK_DIV]	= &spdifout_clk_div.hw,
+		[AUD_CLKID_SPDIFOUT_CLK]	= &spdifout_clk.hw,
+		[AUD_CLKID_SPDIFOUT_B_CLK_SEL]	= &spdifout_b_clk_sel.hw,
+		[AUD_CLKID_SPDIFOUT_B_CLK_DIV]	= &spdifout_b_clk_div.hw,
+		[AUD_CLKID_SPDIFOUT_B_CLK]	= &spdifout_b_clk.hw,
+		[AUD_CLKID_SPDIFIN_CLK_SEL]	= &spdifin_clk_sel.hw,
+		[AUD_CLKID_SPDIFIN_CLK_DIV]	= &spdifin_clk_div.hw,
+		[AUD_CLKID_SPDIFIN_CLK]		= &spdifin_clk.hw,
+		[AUD_CLKID_PDM_DCLK_SEL]	= &pdm_dclk_sel.hw,
+		[AUD_CLKID_PDM_DCLK_DIV]	= &pdm_dclk_div.hw,
+		[AUD_CLKID_PDM_DCLK]		= &pdm_dclk.hw,
+		[AUD_CLKID_PDM_SYSCLK_SEL]	= &pdm_sysclk_sel.hw,
+		[AUD_CLKID_PDM_SYSCLK_DIV]	= &pdm_sysclk_div.hw,
+		[AUD_CLKID_PDM_SYSCLK]		= &pdm_sysclk.hw,
+		[AUD_CLKID_MST_A_SCLK_PRE_EN]	= &mst_a_sclk_pre_en.hw,
+		[AUD_CLKID_MST_B_SCLK_PRE_EN]	= &mst_b_sclk_pre_en.hw,
+		[AUD_CLKID_MST_C_SCLK_PRE_EN]	= &mst_c_sclk_pre_en.hw,
+		[AUD_CLKID_MST_D_SCLK_PRE_EN]	= &mst_d_sclk_pre_en.hw,
+		[AUD_CLKID_MST_E_SCLK_PRE_EN]	= &mst_e_sclk_pre_en.hw,
+		[AUD_CLKID_MST_F_SCLK_PRE_EN]	= &mst_f_sclk_pre_en.hw,
+		[AUD_CLKID_MST_A_SCLK_DIV]	= &mst_a_sclk_div.hw,
+		[AUD_CLKID_MST_B_SCLK_DIV]	= &mst_b_sclk_div.hw,
+		[AUD_CLKID_MST_C_SCLK_DIV]	= &mst_c_sclk_div.hw,
+		[AUD_CLKID_MST_D_SCLK_DIV]	= &mst_d_sclk_div.hw,
+		[AUD_CLKID_MST_E_SCLK_DIV]	= &mst_e_sclk_div.hw,
+		[AUD_CLKID_MST_F_SCLK_DIV]	= &mst_f_sclk_div.hw,
+		[AUD_CLKID_MST_A_SCLK_POST_EN]	= &mst_a_sclk_post_en.hw,
+		[AUD_CLKID_MST_B_SCLK_POST_EN]	= &mst_b_sclk_post_en.hw,
+		[AUD_CLKID_MST_C_SCLK_POST_EN]	= &mst_c_sclk_post_en.hw,
+		[AUD_CLKID_MST_D_SCLK_POST_EN]	= &mst_d_sclk_post_en.hw,
+		[AUD_CLKID_MST_E_SCLK_POST_EN]	= &mst_e_sclk_post_en.hw,
+		[AUD_CLKID_MST_F_SCLK_POST_EN]	= &mst_f_sclk_post_en.hw,
+		[AUD_CLKID_MST_A_SCLK]		= &mst_a_sclk.hw,
+		[AUD_CLKID_MST_B_SCLK]		= &mst_b_sclk.hw,
+		[AUD_CLKID_MST_C_SCLK]		= &mst_c_sclk.hw,
+		[AUD_CLKID_MST_D_SCLK]		= &mst_d_sclk.hw,
+		[AUD_CLKID_MST_E_SCLK]		= &mst_e_sclk.hw,
+		[AUD_CLKID_MST_F_SCLK]		= &mst_f_sclk.hw,
+		[AUD_CLKID_MST_A_LRCLK_DIV]	= &mst_a_lrclk_div.hw,
+		[AUD_CLKID_MST_B_LRCLK_DIV]	= &mst_b_lrclk_div.hw,
+		[AUD_CLKID_MST_C_LRCLK_DIV]	= &mst_c_lrclk_div.hw,
+		[AUD_CLKID_MST_D_LRCLK_DIV]	= &mst_d_lrclk_div.hw,
+		[AUD_CLKID_MST_E_LRCLK_DIV]	= &mst_e_lrclk_div.hw,
+		[AUD_CLKID_MST_F_LRCLK_DIV]	= &mst_f_lrclk_div.hw,
+		[AUD_CLKID_MST_A_LRCLK]		= &mst_a_lrclk.hw,
+		[AUD_CLKID_MST_B_LRCLK]		= &mst_b_lrclk.hw,
+		[AUD_CLKID_MST_C_LRCLK]		= &mst_c_lrclk.hw,
+		[AUD_CLKID_MST_D_LRCLK]		= &mst_d_lrclk.hw,
+		[AUD_CLKID_MST_E_LRCLK]		= &mst_e_lrclk.hw,
+		[AUD_CLKID_MST_F_LRCLK]		= &mst_f_lrclk.hw,
+		[AUD_CLKID_TDMIN_A_SCLK_SEL]	= &tdmin_a_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_B_SCLK_SEL]	= &tdmin_b_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_C_SCLK_SEL]	= &tdmin_c_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK_SEL]	= &tdmin_lb_sclk_sel.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK_SEL]	= &tdmout_a_sclk_sel.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK_SEL]	= &tdmout_b_sclk_sel.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK_SEL]	= &tdmout_c_sclk_sel.hw,
+		[AUD_CLKID_TDMIN_A_SCLK_PRE_EN]	= &tdmin_a_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_B_SCLK_PRE_EN]	= &tdmin_b_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_C_SCLK_PRE_EN]	= &tdmin_c_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK_PRE_EN] = &tdmin_lb_sclk_pre_en.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK_PRE_EN] = &tdmout_a_sclk_pre_en.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK_PRE_EN] = &tdmout_b_sclk_pre_en.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK_PRE_EN] = &tdmout_c_sclk_pre_en.hw,
+		[AUD_CLKID_TDMIN_A_SCLK_POST_EN] = &tdmin_a_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_B_SCLK_POST_EN] = &tdmin_b_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_C_SCLK_POST_EN] = &tdmin_c_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK_POST_EN] = &tdmin_lb_sclk_post_en.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK_POST_EN] = &tdmout_a_sclk_post_en.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK_POST_EN] = &tdmout_b_sclk_post_en.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK_POST_EN] = &tdmout_c_sclk_post_en.hw,
+		[AUD_CLKID_TDMIN_A_SCLK]	= &tdmin_a_sclk.hw,
+		[AUD_CLKID_TDMIN_B_SCLK]	= &tdmin_b_sclk.hw,
+		[AUD_CLKID_TDMIN_C_SCLK]	= &tdmin_c_sclk.hw,
+		[AUD_CLKID_TDMIN_LB_SCLK]	= &tdmin_lb_sclk.hw,
+		[AUD_CLKID_TDMOUT_A_SCLK]	= &tdmout_a_sclk.hw,
+		[AUD_CLKID_TDMOUT_B_SCLK]	= &tdmout_b_sclk.hw,
+		[AUD_CLKID_TDMOUT_C_SCLK]	= &tdmout_c_sclk.hw,
+		[AUD_CLKID_TDMIN_A_LRCLK]	= &tdmin_a_lrclk.hw,
+		[AUD_CLKID_TDMIN_B_LRCLK]	= &tdmin_b_lrclk.hw,
+		[AUD_CLKID_TDMIN_C_LRCLK]	= &tdmin_c_lrclk.hw,
+		[AUD_CLKID_TDMIN_LB_LRCLK]	= &tdmin_lb_lrclk.hw,
+		[AUD_CLKID_TDMOUT_A_LRCLK]	= &tdmout_a_lrclk.hw,
+		[AUD_CLKID_TDMOUT_B_LRCLK]	= &tdmout_b_lrclk.hw,
+		[AUD_CLKID_TDMOUT_C_LRCLK]	= &tdmout_c_lrclk.hw,
+		[AUD_CLKID_TDM_MCLK_PAD0]	= &sm1_tdm_mclk_pad_0.hw,
+		[AUD_CLKID_TDM_MCLK_PAD1]	= &sm1_tdm_mclk_pad_1.hw,
+		[AUD_CLKID_TDM_LRCLK_PAD0]	= &sm1_tdm_lrclk_pad_0.hw,
+		[AUD_CLKID_TDM_LRCLK_PAD1]	= &sm1_tdm_lrclk_pad_1.hw,
+		[AUD_CLKID_TDM_LRCLK_PAD2]	= &sm1_tdm_lrclk_pad_2.hw,
+		[AUD_CLKID_TDM_SCLK_PAD0]	= &sm1_tdm_sclk_pad_0.hw,
+		[AUD_CLKID_TDM_SCLK_PAD1]	= &sm1_tdm_sclk_pad_1.hw,
+		[AUD_CLKID_TDM_SCLK_PAD2]	= &sm1_tdm_sclk_pad_2.hw,
+		[AUD_CLKID_TOP]			= &sm1_aud_top.hw,
+		[AUD_CLKID_TORAM]		= &toram.hw,
+		[AUD_CLKID_EQDRC]		= &eqdrc.hw,
+		[AUD_CLKID_RESAMPLE_B]		= &resample_b.hw,
+		[AUD_CLKID_TOVAD]		= &tovad.hw,
+		[AUD_CLKID_LOCKER]		= &locker.hw,
+		[AUD_CLKID_SPDIFIN_LB]		= &spdifin_lb.hw,
+		[AUD_CLKID_FRDDR_D]		= &frddr_d.hw,
+		[AUD_CLKID_TODDR_D]		= &toddr_d.hw,
+		[AUD_CLKID_LOOPBACK_B]		= &loopback_b.hw,
+		[AUD_CLKID_CLK81_EN]		= &sm1_clk81_en.hw,
+		[AUD_CLKID_SYSCLK_A_DIV]	= &sm1_sysclk_a_div.hw,
+		[AUD_CLKID_SYSCLK_A_EN]		= &sm1_sysclk_a_en.hw,
+		[AUD_CLKID_SYSCLK_B_DIV]	= &sm1_sysclk_b_div.hw,
+		[AUD_CLKID_SYSCLK_B_EN]		= &sm1_sysclk_b_en.hw,
+		[NR_CLKS] = NULL,
+	},
+	.num = NR_CLKS,
+};
+
+
 /* Convenience table to populate regmap in .probe()
  * Note that this table is shared between both AXG and G12A,
  * with spdifout_b clocks being exclusive to G12A. Since those
  * clocks are not declared within the AXG onecell table, we do not
  * feel the need to have separate AXG/G12A regmap tables.
  */
-static struct clk_regmap *const aud_clk_regmaps[] = {
-	&aud_ddr_arb,
-	&aud_pdm,
-	&aud_tdmin_a,
-	&aud_tdmin_b,
-	&aud_tdmin_c,
-	&aud_tdmin_lb,
-	&aud_tdmout_a,
-	&aud_tdmout_b,
-	&aud_tdmout_c,
-	&aud_frddr_a,
-	&aud_frddr_b,
-	&aud_frddr_c,
-	&aud_toddr_a,
-	&aud_toddr_b,
-	&aud_toddr_c,
-	&aud_loopback,
-	&aud_spdifin,
-	&aud_spdifout,
-	&aud_resample,
-	&aud_power_detect,
-	&aud_spdifout_b,
-	&aud_mst_a_mclk_sel,
-	&aud_mst_b_mclk_sel,
-	&aud_mst_c_mclk_sel,
-	&aud_mst_d_mclk_sel,
-	&aud_mst_e_mclk_sel,
-	&aud_mst_f_mclk_sel,
-	&aud_mst_a_mclk_div,
-	&aud_mst_b_mclk_div,
-	&aud_mst_c_mclk_div,
-	&aud_mst_d_mclk_div,
-	&aud_mst_e_mclk_div,
-	&aud_mst_f_mclk_div,
-	&aud_mst_a_mclk,
-	&aud_mst_b_mclk,
-	&aud_mst_c_mclk,
-	&aud_mst_d_mclk,
-	&aud_mst_e_mclk,
-	&aud_mst_f_mclk,
-	&aud_spdifout_clk_sel,
-	&aud_spdifout_clk_div,
-	&aud_spdifout_clk,
-	&aud_spdifin_clk_sel,
-	&aud_spdifin_clk_div,
-	&aud_spdifin_clk,
-	&aud_pdm_dclk_sel,
-	&aud_pdm_dclk_div,
-	&aud_pdm_dclk,
-	&aud_pdm_sysclk_sel,
-	&aud_pdm_sysclk_div,
-	&aud_pdm_sysclk,
-	&aud_mst_a_sclk_pre_en,
-	&aud_mst_b_sclk_pre_en,
-	&aud_mst_c_sclk_pre_en,
-	&aud_mst_d_sclk_pre_en,
-	&aud_mst_e_sclk_pre_en,
-	&aud_mst_f_sclk_pre_en,
-	&aud_mst_a_sclk_div,
-	&aud_mst_b_sclk_div,
-	&aud_mst_c_sclk_div,
-	&aud_mst_d_sclk_div,
-	&aud_mst_e_sclk_div,
-	&aud_mst_f_sclk_div,
-	&aud_mst_a_sclk_post_en,
-	&aud_mst_b_sclk_post_en,
-	&aud_mst_c_sclk_post_en,
-	&aud_mst_d_sclk_post_en,
-	&aud_mst_e_sclk_post_en,
-	&aud_mst_f_sclk_post_en,
-	&aud_mst_a_sclk,
-	&aud_mst_b_sclk,
-	&aud_mst_c_sclk,
-	&aud_mst_d_sclk,
-	&aud_mst_e_sclk,
-	&aud_mst_f_sclk,
-	&aud_mst_a_lrclk_div,
-	&aud_mst_b_lrclk_div,
-	&aud_mst_c_lrclk_div,
-	&aud_mst_d_lrclk_div,
-	&aud_mst_e_lrclk_div,
-	&aud_mst_f_lrclk_div,
-	&aud_mst_a_lrclk,
-	&aud_mst_b_lrclk,
-	&aud_mst_c_lrclk,
-	&aud_mst_d_lrclk,
-	&aud_mst_e_lrclk,
-	&aud_mst_f_lrclk,
-	&aud_tdmin_a_sclk_sel,
-	&aud_tdmin_b_sclk_sel,
-	&aud_tdmin_c_sclk_sel,
-	&aud_tdmin_lb_sclk_sel,
-	&aud_tdmout_a_sclk_sel,
-	&aud_tdmout_b_sclk_sel,
-	&aud_tdmout_c_sclk_sel,
-	&aud_tdmin_a_sclk_pre_en,
-	&aud_tdmin_b_sclk_pre_en,
-	&aud_tdmin_c_sclk_pre_en,
-	&aud_tdmin_lb_sclk_pre_en,
-	&aud_tdmout_a_sclk_pre_en,
-	&aud_tdmout_b_sclk_pre_en,
-	&aud_tdmout_c_sclk_pre_en,
-	&aud_tdmin_a_sclk_post_en,
-	&aud_tdmin_b_sclk_post_en,
-	&aud_tdmin_c_sclk_post_en,
-	&aud_tdmin_lb_sclk_post_en,
-	&aud_tdmout_a_sclk_post_en,
-	&aud_tdmout_b_sclk_post_en,
-	&aud_tdmout_c_sclk_post_en,
-	&aud_tdmin_a_sclk,
-	&aud_tdmin_b_sclk,
-	&aud_tdmin_c_sclk,
-	&aud_tdmin_lb_sclk,
-	&aud_tdmout_a_sclk,
-	&aud_tdmout_b_sclk,
-	&aud_tdmout_c_sclk,
-	&aud_tdmin_a_lrclk,
-	&aud_tdmin_b_lrclk,
-	&aud_tdmin_c_lrclk,
-	&aud_tdmin_lb_lrclk,
-	&aud_tdmout_a_lrclk,
-	&aud_tdmout_b_lrclk,
-	&aud_tdmout_c_lrclk,
-	&aud_spdifout_b_clk_sel,
-	&aud_spdifout_b_clk_div,
-	&aud_spdifout_b_clk,
-	&aud_tdm_mclk_pad_0,
-	&aud_tdm_mclk_pad_1,
-	&aud_tdm_lrclk_pad_0,
-	&aud_tdm_lrclk_pad_1,
-	&aud_tdm_lrclk_pad_2,
-	&aud_tdm_sclk_pad_0,
-	&aud_tdm_sclk_pad_1,
-	&aud_tdm_sclk_pad_2,
+static struct clk_regmap *const axg_clk_regmaps[] = {
+	&ddr_arb,
+	&pdm,
+	&tdmin_a,
+	&tdmin_b,
+	&tdmin_c,
+	&tdmin_lb,
+	&tdmout_a,
+	&tdmout_b,
+	&tdmout_c,
+	&frddr_a,
+	&frddr_b,
+	&frddr_c,
+	&toddr_a,
+	&toddr_b,
+	&toddr_c,
+	&loopback,
+	&spdifin,
+	&spdifout,
+	&resample,
+	&power_detect,
+	&spdifout_b,
+	&mst_a_mclk_sel,
+	&mst_b_mclk_sel,
+	&mst_c_mclk_sel,
+	&mst_d_mclk_sel,
+	&mst_e_mclk_sel,
+	&mst_f_mclk_sel,
+	&mst_a_mclk_div,
+	&mst_b_mclk_div,
+	&mst_c_mclk_div,
+	&mst_d_mclk_div,
+	&mst_e_mclk_div,
+	&mst_f_mclk_div,
+	&mst_a_mclk,
+	&mst_b_mclk,
+	&mst_c_mclk,
+	&mst_d_mclk,
+	&mst_e_mclk,
+	&mst_f_mclk,
+	&spdifout_clk_sel,
+	&spdifout_clk_div,
+	&spdifout_clk,
+	&spdifin_clk_sel,
+	&spdifin_clk_div,
+	&spdifin_clk,
+	&pdm_dclk_sel,
+	&pdm_dclk_div,
+	&pdm_dclk,
+	&pdm_sysclk_sel,
+	&pdm_sysclk_div,
+	&pdm_sysclk,
+	&mst_a_sclk_pre_en,
+	&mst_b_sclk_pre_en,
+	&mst_c_sclk_pre_en,
+	&mst_d_sclk_pre_en,
+	&mst_e_sclk_pre_en,
+	&mst_f_sclk_pre_en,
+	&mst_a_sclk_div,
+	&mst_b_sclk_div,
+	&mst_c_sclk_div,
+	&mst_d_sclk_div,
+	&mst_e_sclk_div,
+	&mst_f_sclk_div,
+	&mst_a_sclk_post_en,
+	&mst_b_sclk_post_en,
+	&mst_c_sclk_post_en,
+	&mst_d_sclk_post_en,
+	&mst_e_sclk_post_en,
+	&mst_f_sclk_post_en,
+	&mst_a_sclk,
+	&mst_b_sclk,
+	&mst_c_sclk,
+	&mst_d_sclk,
+	&mst_e_sclk,
+	&mst_f_sclk,
+	&mst_a_lrclk_div,
+	&mst_b_lrclk_div,
+	&mst_c_lrclk_div,
+	&mst_d_lrclk_div,
+	&mst_e_lrclk_div,
+	&mst_f_lrclk_div,
+	&mst_a_lrclk,
+	&mst_b_lrclk,
+	&mst_c_lrclk,
+	&mst_d_lrclk,
+	&mst_e_lrclk,
+	&mst_f_lrclk,
+	&tdmin_a_sclk_sel,
+	&tdmin_b_sclk_sel,
+	&tdmin_c_sclk_sel,
+	&tdmin_lb_sclk_sel,
+	&tdmout_a_sclk_sel,
+	&tdmout_b_sclk_sel,
+	&tdmout_c_sclk_sel,
+	&tdmin_a_sclk_pre_en,
+	&tdmin_b_sclk_pre_en,
+	&tdmin_c_sclk_pre_en,
+	&tdmin_lb_sclk_pre_en,
+	&tdmout_a_sclk_pre_en,
+	&tdmout_b_sclk_pre_en,
+	&tdmout_c_sclk_pre_en,
+	&tdmin_a_sclk_post_en,
+	&tdmin_b_sclk_post_en,
+	&tdmin_c_sclk_post_en,
+	&tdmin_lb_sclk_post_en,
+	&tdmout_a_sclk_post_en,
+	&tdmout_b_sclk_post_en,
+	&tdmout_c_sclk_post_en,
+	&tdmin_a_sclk,
+	&tdmin_b_sclk,
+	&tdmin_c_sclk,
+	&tdmin_lb_sclk,
+	&tdmout_a_sclk,
+	&tdmout_b_sclk,
+	&tdmout_c_sclk,
+	&tdmin_a_lrclk,
+	&tdmin_b_lrclk,
+	&tdmin_c_lrclk,
+	&tdmin_lb_lrclk,
+	&tdmout_a_lrclk,
+	&tdmout_b_lrclk,
+	&tdmout_c_lrclk,
+	&spdifout_b_clk_sel,
+	&spdifout_b_clk_div,
+	&spdifout_b_clk,
+	&g12a_tdm_mclk_pad_0,
+	&g12a_tdm_mclk_pad_1,
+	&g12a_tdm_lrclk_pad_0,
+	&g12a_tdm_lrclk_pad_1,
+	&g12a_tdm_lrclk_pad_2,
+	&g12a_tdm_sclk_pad_0,
+	&g12a_tdm_sclk_pad_1,
+	&g12a_tdm_sclk_pad_2,
+	&toram,
+	&eqdrc,
+};
+
+static struct clk_regmap *const sm1_clk_regmaps[] = {
+	&ddr_arb,
+	&pdm,
+	&tdmin_a,
+	&tdmin_b,
+	&tdmin_c,
+	&tdmin_lb,
+	&tdmout_a,
+	&tdmout_b,
+	&tdmout_c,
+	&frddr_a,
+	&frddr_b,
+	&frddr_c,
+	&toddr_a,
+	&toddr_b,
+	&toddr_c,
+	&loopback,
+	&spdifin,
+	&spdifout,
+	&resample,
+	&spdifout_b,
+	&sm1_mst_a_mclk_sel,
+	&sm1_mst_b_mclk_sel,
+	&sm1_mst_c_mclk_sel,
+	&sm1_mst_d_mclk_sel,
+	&sm1_mst_e_mclk_sel,
+	&sm1_mst_f_mclk_sel,
+	&sm1_mst_a_mclk_div,
+	&sm1_mst_b_mclk_div,
+	&sm1_mst_c_mclk_div,
+	&sm1_mst_d_mclk_div,
+	&sm1_mst_e_mclk_div,
+	&sm1_mst_f_mclk_div,
+	&sm1_mst_a_mclk,
+	&sm1_mst_b_mclk,
+	&sm1_mst_c_mclk,
+	&sm1_mst_d_mclk,
+	&sm1_mst_e_mclk,
+	&sm1_mst_f_mclk,
+	&spdifout_clk_sel,
+	&spdifout_clk_div,
+	&spdifout_clk,
+	&spdifin_clk_sel,
+	&spdifin_clk_div,
+	&spdifin_clk,
+	&pdm_dclk_sel,
+	&pdm_dclk_div,
+	&pdm_dclk,
+	&pdm_sysclk_sel,
+	&pdm_sysclk_div,
+	&pdm_sysclk,
+	&mst_a_sclk_pre_en,
+	&mst_b_sclk_pre_en,
+	&mst_c_sclk_pre_en,
+	&mst_d_sclk_pre_en,
+	&mst_e_sclk_pre_en,
+	&mst_f_sclk_pre_en,
+	&mst_a_sclk_div,
+	&mst_b_sclk_div,
+	&mst_c_sclk_div,
+	&mst_d_sclk_div,
+	&mst_e_sclk_div,
+	&mst_f_sclk_div,
+	&mst_a_sclk_post_en,
+	&mst_b_sclk_post_en,
+	&mst_c_sclk_post_en,
+	&mst_d_sclk_post_en,
+	&mst_e_sclk_post_en,
+	&mst_f_sclk_post_en,
+	&mst_a_sclk,
+	&mst_b_sclk,
+	&mst_c_sclk,
+	&mst_d_sclk,
+	&mst_e_sclk,
+	&mst_f_sclk,
+	&mst_a_lrclk_div,
+	&mst_b_lrclk_div,
+	&mst_c_lrclk_div,
+	&mst_d_lrclk_div,
+	&mst_e_lrclk_div,
+	&mst_f_lrclk_div,
+	&mst_a_lrclk,
+	&mst_b_lrclk,
+	&mst_c_lrclk,
+	&mst_d_lrclk,
+	&mst_e_lrclk,
+	&mst_f_lrclk,
+	&tdmin_a_sclk_sel,
+	&tdmin_b_sclk_sel,
+	&tdmin_c_sclk_sel,
+	&tdmin_lb_sclk_sel,
+	&tdmout_a_sclk_sel,
+	&tdmout_b_sclk_sel,
+	&tdmout_c_sclk_sel,
+	&tdmin_a_sclk_pre_en,
+	&tdmin_b_sclk_pre_en,
+	&tdmin_c_sclk_pre_en,
+	&tdmin_lb_sclk_pre_en,
+	&tdmout_a_sclk_pre_en,
+	&tdmout_b_sclk_pre_en,
+	&tdmout_c_sclk_pre_en,
+	&tdmin_a_sclk_post_en,
+	&tdmin_b_sclk_post_en,
+	&tdmin_c_sclk_post_en,
+	&tdmin_lb_sclk_post_en,
+	&tdmout_a_sclk_post_en,
+	&tdmout_b_sclk_post_en,
+	&tdmout_c_sclk_post_en,
+	&tdmin_a_sclk,
+	&tdmin_b_sclk,
+	&tdmin_c_sclk,
+	&tdmin_lb_sclk,
+	&tdmout_a_sclk,
+	&tdmout_b_sclk,
+	&tdmout_c_sclk,
+	&tdmin_a_lrclk,
+	&tdmin_b_lrclk,
+	&tdmin_c_lrclk,
+	&tdmin_lb_lrclk,
+	&tdmout_a_lrclk,
+	&tdmout_b_lrclk,
+	&tdmout_c_lrclk,
+	&spdifout_b_clk_sel,
+	&spdifout_b_clk_div,
+	&spdifout_b_clk,
+	&sm1_tdm_mclk_pad_0,
+	&sm1_tdm_mclk_pad_1,
+	&sm1_tdm_lrclk_pad_0,
+	&sm1_tdm_lrclk_pad_1,
+	&sm1_tdm_lrclk_pad_2,
+	&sm1_tdm_sclk_pad_0,
+	&sm1_tdm_sclk_pad_1,
+	&sm1_tdm_sclk_pad_2,
+	&sm1_aud_top,
+	&toram,
+	&eqdrc,
+	&resample_b,
+	&tovad,
+	&locker,
+	&spdifin_lb,
+	&frddr_d,
+	&toddr_d,
+	&loopback_b,
+	&sm1_clk81_en,
+	&sm1_sysclk_a_div,
+	&sm1_sysclk_a_en,
+	&sm1_sysclk_b_div,
+	&sm1_sysclk_b_en,
 };
 
 static int devm_clk_get_enable(struct device *dev, char *id)
@@ -1001,10 +1615,12 @@ static const struct regmap_config axg_audio_regmap_cfg = {
 	.reg_bits	= 32,
 	.val_bits	= 32,
 	.reg_stride	= 4,
-	.max_register	= AUDIO_CLK_PDMIN_CTRL1,
+	.max_register	= AUDIO_CLK_SPDIFOUT_B_CTRL,
 };
 
 struct audioclk_data {
+	struct clk_regmap *const *regmap_clks;
+	unsigned int regmap_clk_num;
 	struct clk_hw_onecell_data *hw_onecell_data;
 	unsigned int reset_offset;
 	unsigned int reset_num;
@@ -1016,7 +1632,6 @@ static int axg_audio_clkc_probe(struct platform_device *pdev)
 	const struct audioclk_data *data;
 	struct axg_audio_reset_data *rst;
 	struct regmap *map;
-	struct resource *res;
 	void __iomem *regs;
 	struct clk_hw *hw;
 	int ret, i;
@@ -1025,8 +1640,7 @@ static int axg_audio_clkc_probe(struct platform_device *pdev)
 	if (!data)
 		return -EINVAL;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	regs = devm_ioremap_resource(dev, res);
+	regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
 
@@ -1048,8 +1662,8 @@ static int axg_audio_clkc_probe(struct platform_device *pdev)
 	}
 
 	/* Populate regmap for the regmap backed clocks */
-	for (i = 0; i < ARRAY_SIZE(aud_clk_regmaps); i++)
-		aud_clk_regmaps[i]->map = map;
+	for (i = 0; i < data->regmap_clk_num; i++)
+		data->regmap_clks[i]->map = map;
 
 	/* Take care to skip the registered input clocks */
 	for (i = AUD_CLKID_DDR_ARB; i < data->hw_onecell_data->num; i++) {
@@ -1093,15 +1707,27 @@ static int axg_audio_clkc_probe(struct platform_device *pdev)
 }
 
 static const struct audioclk_data axg_audioclk_data = {
+	.regmap_clks = axg_clk_regmaps,
+	.regmap_clk_num = ARRAY_SIZE(axg_clk_regmaps),
 	.hw_onecell_data = &axg_audio_hw_onecell_data,
 };
 
 static const struct audioclk_data g12a_audioclk_data = {
+	.regmap_clks = axg_clk_regmaps,
+	.regmap_clk_num = ARRAY_SIZE(axg_clk_regmaps),
 	.hw_onecell_data = &g12a_audio_hw_onecell_data,
 	.reset_offset = AUDIO_SW_RESET,
 	.reset_num = 26,
 };
 
+static const struct audioclk_data sm1_audioclk_data = {
+	.regmap_clks = sm1_clk_regmaps,
+	.regmap_clk_num = ARRAY_SIZE(sm1_clk_regmaps),
+	.hw_onecell_data = &sm1_audio_hw_onecell_data,
+	.reset_offset = AUDIO_SM1_SW_RESET0,
+	.reset_num = 39,
+};
+
 static const struct of_device_id clkc_match_table[] = {
 	{
 		.compatible = "amlogic,axg-audio-clkc",
@@ -1109,6 +1735,9 @@ static const struct of_device_id clkc_match_table[] = {
 	}, {
 		.compatible = "amlogic,g12a-audio-clkc",
 		.data = &g12a_audioclk_data
+	}, {
+		.compatible = "amlogic,sm1-audio-clkc",
+		.data = &sm1_audioclk_data
 	}, {}
 };
 MODULE_DEVICE_TABLE(of, clkc_match_table);
@@ -1122,6 +1751,6 @@ static struct platform_driver axg_audio_driver = {
 };
 module_platform_driver(axg_audio_driver);
 
-MODULE_DESCRIPTION("Amlogic AXG/G12A Audio Clock driver");
+MODULE_DESCRIPTION("Amlogic AXG/G12A/SM1 Audio Clock driver");
 MODULE_AUTHOR("Jerome Brunet <jbrunet@baylibre.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/meson/axg-audio.h b/drivers/clk/meson/axg-audio.h
index c00e28b2e1a9..fd65a7d0704b 100644
--- a/drivers/clk/meson/axg-audio.h
+++ b/drivers/clk/meson/axg-audio.h
@@ -50,6 +50,20 @@
 #define AUDIO_CLK_PDMIN_CTRL1	0x0B0
 #define AUDIO_CLK_SPDIFOUT_B_CTRL 0x0B4
 
+/* SM1 introduce new register and some shifts :( */
+#define AUDIO_CLK_GATE_EN1	0x004
+#define AUDIO_SM1_MCLK_A_CTRL	0x008
+#define AUDIO_SM1_MCLK_B_CTRL	0x00C
+#define AUDIO_SM1_MCLK_C_CTRL	0x010
+#define AUDIO_SM1_MCLK_D_CTRL	0x014
+#define AUDIO_SM1_MCLK_E_CTRL	0x018
+#define AUDIO_SM1_MCLK_F_CTRL	0x01C
+#define AUDIO_SM1_MST_PAD_CTRL0	0x020
+#define AUDIO_SM1_MST_PAD_CTRL1	0x024
+#define AUDIO_SM1_SW_RESET0	0x028
+#define AUDIO_SM1_SW_RESET1	0x02C
+#define AUDIO_CLK81_CTRL	0x030
+#define AUDIO_CLK81_EN		0x034
 /*
  * CLKID index values
  * These indices are entirely contrived and do not map onto the hardware.
@@ -115,10 +129,15 @@
 #define AUD_CLKID_TDMOUT_C_SCLK_POST_EN	150
 #define AUD_CLKID_SPDIFOUT_B_CLK_SEL	153
 #define AUD_CLKID_SPDIFOUT_B_CLK_DIV	154
+#define AUD_CLKID_CLK81_EN		173
+#define AUD_CLKID_SYSCLK_A_DIV		174
+#define AUD_CLKID_SYSCLK_B_DIV		175
+#define AUD_CLKID_SYSCLK_A_EN		176
+#define AUD_CLKID_SYSCLK_B_EN		177
 
 /* include the CLKIDs which are part of the DT bindings */
 #include <dt-bindings/clock/axg-audio-clkc.h>
 
-#define NR_CLKS	163
+#define NR_CLKS	178
 
 #endif /*__AXG_AUDIO_CLKC_H */
diff --git a/drivers/clk/mmp/Makefile b/drivers/clk/mmp/Makefile
index 7bc7ac69391e..acc141adf087 100644
--- a/drivers/clk/mmp/Makefile
+++ b/drivers/clk/mmp/Makefile
@@ -8,7 +8,7 @@ obj-y += clk-apbc.o clk-apmu.o clk-frac.o clk-mix.o clk-gate.o clk.o
 obj-$(CONFIG_RESET_CONTROLLER) += reset.o
 
 obj-$(CONFIG_MACH_MMP_DT) += clk-of-pxa168.o clk-of-pxa910.o
-obj-$(CONFIG_MACH_MMP2_DT) += clk-of-mmp2.o
+obj-$(CONFIG_COMMON_CLK_MMP2) += clk-of-mmp2.o
 
 obj-$(CONFIG_CPU_PXA168) += clk-pxa168.o
 obj-$(CONFIG_CPU_PXA910) += clk-pxa910.o
diff --git a/drivers/clk/mmp/clk-of-mmp2.c b/drivers/clk/mmp/clk-of-mmp2.c
index a60a1be937ad..b4a95cbbda98 100644
--- a/drivers/clk/mmp/clk-of-mmp2.c
+++ b/drivers/clk/mmp/clk-of-mmp2.c
@@ -134,7 +134,7 @@ static DEFINE_SPINLOCK(ssp3_lock);
 static const char *ssp_parent_names[] = {"vctcxo_4", "vctcxo_2", "vctcxo", "pll1_16"};
 
 static DEFINE_SPINLOCK(timer_lock);
-static const char *timer_parent_names[] = {"clk32", "vctcxo_2", "vctcxo_4", "vctcxo"};
+static const char *timer_parent_names[] = {"clk32", "vctcxo_4", "vctcxo_2", "vctcxo"};
 
 static DEFINE_SPINLOCK(reset_lock);
 
diff --git a/drivers/clk/mvebu/ap-cpu-clk.c b/drivers/clk/mvebu/ap-cpu-clk.c
index af5e5acad370..6b394302c76a 100644
--- a/drivers/clk/mvebu/ap-cpu-clk.c
+++ b/drivers/clk/mvebu/ap-cpu-clk.c
@@ -274,8 +274,8 @@ static int ap_cpu_clock_probe(struct platform_device *pdev)
 	if (!ap_cpu_clk)
 		return -ENOMEM;
 
-	ap_cpu_data = devm_kzalloc(dev, sizeof(*ap_cpu_data) +
-				sizeof(struct clk_hw *) * nclusters,
+	ap_cpu_data = devm_kzalloc(dev, struct_size(ap_cpu_data, hws,
+						    nclusters),
 				GFP_KERNEL);
 	if (!ap_cpu_data)
 		return -ENOMEM;
diff --git a/drivers/clk/mvebu/armada-37xx-periph.c b/drivers/clk/mvebu/armada-37xx-periph.c
index 5fc6d486a381..f5746f9ea929 100644
--- a/drivers/clk/mvebu/armada-37xx-periph.c
+++ b/drivers/clk/mvebu/armada-37xx-periph.c
@@ -303,6 +303,7 @@ PERIPH_CLK_GATE_DIV(gbe_bm, 12, DIV_SEL1, 0, clk_table1);
 PERIPH_CLK_FULL_DD(sdio, 11, 14, DIV_SEL0, DIV_SEL0, 3, 6);
 PERIPH_CLK_FULL_DD(usb32_usb2_sys, 16, 16, DIV_SEL0, DIV_SEL0, 9, 12);
 PERIPH_CLK_FULL_DD(usb32_ss_sys, 17, 18, DIV_SEL0, DIV_SEL0, 15, 18);
+static PERIPH_GATE(pcie, 14);
 
 static struct clk_periph_data data_sb[] = {
 	REF_CLK_MUX_DD(gbe_50),
@@ -318,6 +319,7 @@ static struct clk_periph_data data_sb[] = {
 	REF_CLK_FULL_DD(sdio),
 	REF_CLK_FULL_DD(usb32_usb2_sys),
 	REF_CLK_FULL_DD(usb32_ss_sys),
+	REF_CLK_GATE(pcie, "gbe_core"),
 	{ },
 };
 
@@ -712,8 +714,8 @@ static int __maybe_unused armada_3700_periph_clock_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops armada_3700_periph_clock_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(armada_3700_periph_clock_suspend,
-				armada_3700_periph_clock_resume)
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(armada_3700_periph_clock_suspend,
+				      armada_3700_periph_clock_resume)
 };
 
 static int armada_3700_periph_clock_probe(struct platform_device *pdev)
diff --git a/drivers/clk/mvebu/armada-xp.c b/drivers/clk/mvebu/armada-xp.c
index fa1568279c23..45665655a258 100644
--- a/drivers/clk/mvebu/armada-xp.c
+++ b/drivers/clk/mvebu/armada-xp.c
@@ -50,12 +50,6 @@ static u32 __init axp_get_tclk_freq(void __iomem *sar)
 	return 250000000;
 }
 
-/* MV98DX3236 TCLK frequency is fixed to 200MHz */
-static u32 __init mv98dx3236_get_tclk_freq(void __iomem *sar)
-{
-	return 200000000;
-}
-
 static const u32 axp_cpu_freqs[] __initconst = {
 	1000000000,
 	1066000000,
@@ -93,12 +87,6 @@ static u32 __init axp_get_cpu_freq(void __iomem *sar)
 	return cpu_freq;
 }
 
-/* MV98DX3236 CLK frequency is fixed to 800MHz */
-static u32 __init mv98dx3236_get_cpu_freq(void __iomem *sar)
-{
-	return 800000000;
-}
-
 static const int axp_nbclk_ratios[32][2] __initconst = {
 	{0, 1}, {1, 2}, {2, 2}, {2, 2},
 	{1, 2}, {1, 2}, {1, 1}, {2, 3},
@@ -168,11 +156,6 @@ static const struct coreclk_soc_desc axp_coreclks = {
 	.num_ratios = ARRAY_SIZE(axp_coreclk_ratios),
 };
 
-static const struct coreclk_soc_desc mv98dx3236_coreclks = {
-	.get_tclk_freq = mv98dx3236_get_tclk_freq,
-	.get_cpu_freq = mv98dx3236_get_cpu_freq,
-};
-
 /*
  * Clock Gating Control
  */
@@ -210,15 +193,6 @@ static const struct clk_gating_soc_desc axp_gating_desc[] __initconst = {
 	{ }
 };
 
-static const struct clk_gating_soc_desc mv98dx3236_gating_desc[] __initconst = {
-	{ "ge1", NULL, 3, 0 },
-	{ "ge0", NULL, 4, 0 },
-	{ "pex00", NULL, 5, 0 },
-	{ "sdio", NULL, 17, 0 },
-	{ "xor0", NULL, 22, 0 },
-	{ }
-};
-
 static void __init axp_clk_init(struct device_node *np)
 {
 	struct device_node *cgnp =
diff --git a/drivers/clk/mvebu/cp110-system-controller.c b/drivers/clk/mvebu/cp110-system-controller.c
index 808463276145..84c8900542e4 100644
--- a/drivers/clk/mvebu/cp110-system-controller.c
+++ b/drivers/clk/mvebu/cp110-system-controller.c
@@ -235,8 +235,8 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
 	if (ret)
 		return ret;
 
-	cp110_clk_data = devm_kzalloc(dev, sizeof(*cp110_clk_data) +
-				      sizeof(struct clk_hw *) * CP110_CLK_NUM,
+	cp110_clk_data = devm_kzalloc(dev, struct_size(cp110_clk_data, hws,
+						       CP110_CLK_NUM),
 				      GFP_KERNEL);
 	if (!cp110_clk_data)
 		return -ENOMEM;
diff --git a/drivers/clk/pxa/clk-pxa27x.c b/drivers/clk/pxa/clk-pxa27x.c
index 287fdeae7c7c..7b123105b5de 100644
--- a/drivers/clk/pxa/clk-pxa27x.c
+++ b/drivers/clk/pxa/clk-pxa27x.c
@@ -459,6 +459,7 @@ struct dummy_clk {
 };
 static struct dummy_clk dummy_clks[] __initdata = {
 	DUMMY_CLK(NULL, "pxa27x-gpio", "osc_32_768khz"),
+	DUMMY_CLK(NULL, "pxa-rtc", "osc_32_768khz"),
 	DUMMY_CLK(NULL, "sa1100-rtc", "osc_32_768khz"),
 	DUMMY_CLK("UARTCLK", "pxa2xx-ir", "STUART"),
 };
diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index 32dbb4f09492..3b33ef129274 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -220,6 +220,15 @@ config MSM_GCC_8998
 	  Say Y if you want to use peripheral devices such as UART, SPI,
 	  i2c, USB, UFS, SD/eMMC, PCIe, etc.
 
+config MSM_GPUCC_8998
+	tristate "MSM8998 Graphics Clock Controller"
+	select MSM_GCC_8998
+	select QCOM_GDSC
+	help
+	  Support for the graphics clock controller on MSM8998 devices.
+	  Say Y if you want to support graphics controller devices and
+	  functionality such as 3D graphics.
+
 config QCS_GCC_404
 	tristate "QCS404 Global Clock Controller"
 	help
@@ -227,6 +236,15 @@ config QCS_GCC_404
 	  Say Y if you want to use multimedia devices or peripheral
 	  devices such as UART, SPI, I2C, USB, SD/eMMC, PCIe etc.
 
+config SC_GCC_7180
+	tristate "SC7180 Global Clock Controller"
+	select QCOM_GDSC
+	depends on COMMON_CLK_QCOM
+	help
+	  Support for the global clock controller on SC7180 devices.
+	  Say Y if you want to use peripheral devices such as UART, SPI,
+	  I2C, USB, UFS, SDCC, etc.
+
 config SDM_CAMCC_845
 	tristate "SDM845 Camera Clock Controller"
 	select SDM_GCC_845
@@ -248,6 +266,14 @@ config QCS_TURING_404
 	  Support for the Turing Clock Controller on QCS404, provides clocks
 	  and resets for the Turing subsystem.
 
+config QCS_Q6SSTOP_404
+	tristate "QCS404 Q6SSTOP Clock Controller"
+	select QCS_GCC_404
+	help
+	  Support for the Q6SSTOP clock controller on QCS404 devices.
+	  Say Y if you want to use the Q6SSTOP branch clocks of the WCSS clock
+	  controller to reset the Q6SSTOP subsystem.
+
 config SDM_GCC_845
 	tristate "SDM845 Global Clock Controller"
 	select QCOM_GDSC
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index 4a813b4055d0..d899661d0f44 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_MSM_GCC_8994) += gcc-msm8994.o
 obj-$(CONFIG_MSM_GCC_8996) += gcc-msm8996.o
 obj-$(CONFIG_MSM_LCC_8960) += lcc-msm8960.o
 obj-$(CONFIG_MSM_GCC_8998) += gcc-msm8998.o
+obj-$(CONFIG_MSM_GPUCC_8998) += gpucc-msm8998.o
 obj-$(CONFIG_MSM_MMCC_8960) += mmcc-msm8960.o
 obj-$(CONFIG_MSM_MMCC_8974) += mmcc-msm8974.o
 obj-$(CONFIG_MSM_MMCC_8996) += mmcc-msm8996.o
@@ -42,7 +43,9 @@ obj-$(CONFIG_QCOM_CLK_RPM) += clk-rpm.o
 obj-$(CONFIG_QCOM_CLK_RPMH) += clk-rpmh.o
 obj-$(CONFIG_QCOM_CLK_SMD_RPM) += clk-smd-rpm.o
 obj-$(CONFIG_QCS_GCC_404) += gcc-qcs404.o
+obj-$(CONFIG_QCS_Q6SSTOP_404) += q6sstop-qcs404.o
 obj-$(CONFIG_QCS_TURING_404) += turingcc-qcs404.o
+obj-$(CONFIG_SC_GCC_7180) += gcc-sc7180.o
 obj-$(CONFIG_SDM_CAMCC_845) += camcc-sdm845.o
 obj-$(CONFIG_SDM_DISPCC_845) += dispcc-sdm845.o
 obj-$(CONFIG_SDM_GCC_660) += gcc-sdm660.o
diff --git a/drivers/clk/qcom/clk-rcg.h b/drivers/clk/qcom/clk-rcg.h
index c25b57c3cbc8..78358b81d249 100644
--- a/drivers/clk/qcom/clk-rcg.h
+++ b/drivers/clk/qcom/clk-rcg.h
@@ -168,7 +168,7 @@ struct clk_rcg_dfs_data {
 };
 
 #define DEFINE_RCG_DFS(r) \
-	{ .rcg = &r##_src, .init = &r##_init }
+	{ .rcg = &r, .init = &r##_init }
 
 extern int qcom_cc_register_rcg_dfs(struct regmap *regmap,
 				    const struct clk_rcg_dfs_data *rcgs,
diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c
index b98b81ef43a1..8f4b9bec2956 100644
--- a/drivers/clk/qcom/clk-rcg2.c
+++ b/drivers/clk/qcom/clk-rcg2.c
@@ -206,7 +206,7 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw, const struct freq_tbl *f,
 		break;
 	default:
 		return -EINVAL;
-	};
+	}
 
 	if (!f)
 		return -EINVAL;
@@ -220,6 +220,8 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw, const struct freq_tbl *f,
 	if (clk_flags & CLK_SET_RATE_PARENT) {
 		rate = f->freq;
 		if (f->pre_div) {
+			if (!rate)
+				rate = req->rate;
 			rate /= 2;
 			rate *= f->pre_div + 1;
 		}
@@ -319,7 +321,7 @@ static int __clk_rcg2_set_rate(struct clk_hw *hw, unsigned long rate,
 		break;
 	default:
 		return -EINVAL;
-	};
+	}
 
 	if (!f)
 		return -EINVAL;
diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c
index 96a36f6ff667..7ed313ad6e43 100644
--- a/drivers/clk/qcom/clk-rpmh.c
+++ b/drivers/clk/qcom/clk-rpmh.c
@@ -334,13 +334,14 @@ static const struct clk_ops clk_rpmh_bcm_ops = {
 	.recalc_rate	= clk_rpmh_bcm_recalc_rate,
 };
 
-/* Resource name must match resource id present in cmd-db. */
+/* Resource name must match resource id present in cmd-db */
 DEFINE_CLK_RPMH_ARC(sdm845, bi_tcxo, bi_tcxo_ao, "xo.lvl", 0x3, 2);
 DEFINE_CLK_RPMH_VRM(sdm845, ln_bb_clk2, ln_bb_clk2_ao, "lnbclka2", 2);
 DEFINE_CLK_RPMH_VRM(sdm845, ln_bb_clk3, ln_bb_clk3_ao, "lnbclka3", 2);
 DEFINE_CLK_RPMH_VRM(sdm845, rf_clk1, rf_clk1_ao, "rfclka1", 1);
 DEFINE_CLK_RPMH_VRM(sdm845, rf_clk2, rf_clk2_ao, "rfclka2", 1);
 DEFINE_CLK_RPMH_VRM(sdm845, rf_clk3, rf_clk3_ao, "rfclka3", 1);
+DEFINE_CLK_RPMH_VRM(sm8150, rf_clk3, rf_clk3_ao, "rfclka3", 1);
 DEFINE_CLK_RPMH_BCM(sdm845, ipa, "IP0");
 
 static struct clk_hw *sdm845_rpmh_clocks[] = {
@@ -364,26 +365,19 @@ static const struct clk_rpmh_desc clk_rpmh_sdm845 = {
 	.num_clks = ARRAY_SIZE(sdm845_rpmh_clocks),
 };
 
-DEFINE_CLK_RPMH_ARC(sm8150, bi_tcxo, bi_tcxo_ao, "xo.lvl", 0x3, 2);
-DEFINE_CLK_RPMH_VRM(sm8150, ln_bb_clk2, ln_bb_clk2_ao, "lnbclka2", 2);
-DEFINE_CLK_RPMH_VRM(sm8150, ln_bb_clk3, ln_bb_clk3_ao, "lnbclka3", 2);
-DEFINE_CLK_RPMH_VRM(sm8150, rf_clk1, rf_clk1_ao, "rfclka1", 1);
-DEFINE_CLK_RPMH_VRM(sm8150, rf_clk2, rf_clk2_ao, "rfclka2", 1);
-DEFINE_CLK_RPMH_VRM(sm8150, rf_clk3, rf_clk3_ao, "rfclka3", 1);
-
 static struct clk_hw *sm8150_rpmh_clocks[] = {
-	[RPMH_CXO_CLK]		= &sm8150_bi_tcxo.hw,
-	[RPMH_CXO_CLK_A]	= &sm8150_bi_tcxo_ao.hw,
-	[RPMH_LN_BB_CLK2]	= &sm8150_ln_bb_clk2.hw,
-	[RPMH_LN_BB_CLK2_A]	= &sm8150_ln_bb_clk2_ao.hw,
-	[RPMH_LN_BB_CLK3]	= &sm8150_ln_bb_clk3.hw,
-	[RPMH_LN_BB_CLK3_A]	= &sm8150_ln_bb_clk3_ao.hw,
-	[RPMH_RF_CLK1]		= &sm8150_rf_clk1.hw,
-	[RPMH_RF_CLK1_A]	= &sm8150_rf_clk1_ao.hw,
-	[RPMH_RF_CLK2]		= &sm8150_rf_clk2.hw,
-	[RPMH_RF_CLK2_A]	= &sm8150_rf_clk2_ao.hw,
-	[RPMH_RF_CLK3]		= &sm8150_rf_clk3.hw,
-	[RPMH_RF_CLK3_A]	= &sm8150_rf_clk3_ao.hw,
+	[RPMH_CXO_CLK]		= &sdm845_bi_tcxo.hw,
+	[RPMH_CXO_CLK_A]	= &sdm845_bi_tcxo_ao.hw,
+	[RPMH_LN_BB_CLK2]	= &sdm845_ln_bb_clk2.hw,
+	[RPMH_LN_BB_CLK2_A]	= &sdm845_ln_bb_clk2_ao.hw,
+	[RPMH_LN_BB_CLK3]	= &sdm845_ln_bb_clk3.hw,
+	[RPMH_LN_BB_CLK3_A]	= &sdm845_ln_bb_clk3_ao.hw,
+	[RPMH_RF_CLK1]		= &sdm845_rf_clk1.hw,
+	[RPMH_RF_CLK1_A]	= &sdm845_rf_clk1_ao.hw,
+	[RPMH_RF_CLK2]		= &sdm845_rf_clk2.hw,
+	[RPMH_RF_CLK2_A]	= &sdm845_rf_clk2_ao.hw,
+	[RPMH_RF_CLK3]		= &sdm845_rf_clk3.hw,
+	[RPMH_RF_CLK3_A]	= &sdm845_rf_clk3_ao.hw,
 };
 
 static const struct clk_rpmh_desc clk_rpmh_sm8150 = {
@@ -391,6 +385,24 @@ static const struct clk_rpmh_desc clk_rpmh_sm8150 = {
 	.num_clks = ARRAY_SIZE(sm8150_rpmh_clocks),
 };
 
+static struct clk_hw *sc7180_rpmh_clocks[] = {
+	[RPMH_CXO_CLK]		= &sdm845_bi_tcxo.hw,
+	[RPMH_CXO_CLK_A]	= &sdm845_bi_tcxo_ao.hw,
+	[RPMH_LN_BB_CLK2]	= &sdm845_ln_bb_clk2.hw,
+	[RPMH_LN_BB_CLK2_A]	= &sdm845_ln_bb_clk2_ao.hw,
+	[RPMH_LN_BB_CLK3]	= &sdm845_ln_bb_clk3.hw,
+	[RPMH_LN_BB_CLK3_A]	= &sdm845_ln_bb_clk3_ao.hw,
+	[RPMH_RF_CLK1]		= &sdm845_rf_clk1.hw,
+	[RPMH_RF_CLK1_A]	= &sdm845_rf_clk1_ao.hw,
+	[RPMH_RF_CLK2]		= &sdm845_rf_clk2.hw,
+	[RPMH_RF_CLK2_A]	= &sdm845_rf_clk2_ao.hw,
+};
+
+static const struct clk_rpmh_desc clk_rpmh_sc7180 = {
+	.clks = sc7180_rpmh_clocks,
+	.num_clks = ARRAY_SIZE(sc7180_rpmh_clocks),
+};
+
 static struct clk_hw *of_clk_rpmh_hw_get(struct of_phandle_args *clkspec,
 					 void *data)
 {
@@ -471,6 +483,7 @@ static int clk_rpmh_probe(struct platform_device *pdev)
 static const struct of_device_id clk_rpmh_match_table[] = {
 	{ .compatible = "qcom,sdm845-rpmh-clk", .data = &clk_rpmh_sdm845},
 	{ .compatible = "qcom,sm8150-rpmh-clk", .data = &clk_rpmh_sm8150},
+	{ .compatible = "qcom,sc7180-rpmh-clk", .data = &clk_rpmh_sc7180},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, clk_rpmh_match_table);
@@ -487,7 +500,7 @@ static int __init clk_rpmh_init(void)
 {
 	return platform_driver_register(&clk_rpmh_driver);
 }
-subsys_initcall(clk_rpmh_init);
+core_initcall(clk_rpmh_init);
 
 static void __exit clk_rpmh_exit(void)
 {
diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c
index fef5e8157061..930fa4a4c52a 100644
--- a/drivers/clk/qcom/clk-smd-rpm.c
+++ b/drivers/clk/qcom/clk-smd-rpm.c
@@ -648,6 +648,7 @@ static const struct rpm_smd_clk_desc rpm_clk_qcs404 = {
 };
 
 /* msm8998 */
+DEFINE_CLK_SMD_RPM(msm8998, pcnoc_clk, pcnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 0);
 DEFINE_CLK_SMD_RPM(msm8998, snoc_clk, snoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 1);
 DEFINE_CLK_SMD_RPM(msm8998, cnoc_clk, cnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 2);
 DEFINE_CLK_SMD_RPM(msm8998, ce1_clk, ce1_a_clk, QCOM_SMD_RPM_CE_CLK, 0);
@@ -670,6 +671,8 @@ DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8998, rf_clk2_pin, rf_clk2_a_pin, 5);
 DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8998, rf_clk3, rf_clk3_a, 6);
 DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8998, rf_clk3_pin, rf_clk3_a_pin, 6);
 static struct clk_smd_rpm *msm8998_clks[] = {
+	[RPM_SMD_PCNOC_CLK] = &msm8998_pcnoc_clk,
+	[RPM_SMD_PCNOC_A_CLK] = &msm8998_pcnoc_a_clk,
 	[RPM_SMD_SNOC_CLK] = &msm8998_snoc_clk,
 	[RPM_SMD_SNOC_A_CLK] = &msm8998_snoc_a_clk,
 	[RPM_SMD_CNOC_CLK] = &msm8998_cnoc_clk,
diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c
index 28ddc747d703..60d2a78d1395 100644
--- a/drivers/clk/qcom/common.c
+++ b/drivers/clk/qcom/common.c
@@ -29,6 +29,9 @@ struct freq_tbl *qcom_find_freq(const struct freq_tbl *f, unsigned long rate)
 	if (!f)
 		return NULL;
 
+	if (!f->freq)
+		return f;
+
 	for (; f->freq; f++)
 		if (rate <= f->freq)
 			return f;
@@ -218,7 +221,7 @@ static struct clk_hw *qcom_cc_clk_hw_get(struct of_phandle_args *clkspec,
 		return ERR_PTR(-EINVAL);
 	}
 
-	return cc->rclks[idx] ? &cc->rclks[idx]->hw : ERR_PTR(-ENOENT);
+	return cc->rclks[idx] ? &cc->rclks[idx]->hw : NULL;
 }
 
 int qcom_cc_really_probe(struct platform_device *pdev,
diff --git a/drivers/clk/qcom/gcc-msm8998.c b/drivers/clk/qcom/gcc-msm8998.c
index 091acd59c1d6..cf31b5d03270 100644
--- a/drivers/clk/qcom/gcc-msm8998.c
+++ b/drivers/clk/qcom/gcc-msm8998.c
@@ -1266,6 +1266,72 @@ static struct clk_branch gcc_bimc_mss_q6_axi_clk = {
 	},
 };
 
+static struct clk_branch gcc_mss_cfg_ahb_clk = {
+	.halt_reg = 0x8a000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x8a000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_mss_cfg_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_mss_snoc_axi_clk = {
+	.halt_reg = 0x8a03c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x8a03c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_mss_snoc_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_mss_mnoc_bimc_axi_clk = {
+	.halt_reg = 0x8a004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x8a004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_mss_mnoc_bimc_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_boot_rom_ahb_clk = {
+	.halt_reg = 0x38004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x38004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52004,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_boot_rom_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_mss_gpll0_div_clk_src = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x5200c,
+		.enable_mask = BIT(2),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_mss_gpll0_div_clk_src",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
 static struct clk_branch gcc_blsp1_ahb_clk = {
 	.halt_reg = 0x17004,
 	.halt_check = BRANCH_HALT_VOTED,
@@ -2832,6 +2898,11 @@ static struct clk_regmap *gcc_msm8998_clocks[] = {
 	[GCC_USB3_CLKREF_CLK] = &gcc_usb3_clkref_clk.clkr,
 	[GCC_PCIE_CLKREF_CLK] = &gcc_pcie_clkref_clk.clkr,
 	[GCC_RX1_USB2_CLKREF_CLK] = &gcc_rx1_usb2_clkref_clk.clkr,
+	[GCC_MSS_CFG_AHB_CLK] = &gcc_mss_cfg_ahb_clk.clkr,
+	[GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr,
+	[GCC_MSS_GPLL0_DIV_CLK_SRC] = &gcc_mss_gpll0_div_clk_src.clkr,
+	[GCC_MSS_SNOC_AXI_CLK] = &gcc_mss_snoc_axi_clk.clkr,
+	[GCC_MSS_MNOC_BIMC_AXI_CLK] = &gcc_mss_mnoc_bimc_axi_clk.clkr,
 };
 
 static struct gdsc *gcc_msm8998_gdscs[] = {
@@ -2928,6 +2999,7 @@ static const struct qcom_reset_map gcc_msm8998_resets[] = {
 	[GCC_GPU_BCR] = { 0x71000 },
 	[GCC_SPSS_BCR] = { 0x72000 },
 	[GCC_OBT_ODT_BCR] = { 0x73000 },
+	[GCC_MSS_RESTART] = { 0x79000 },
 	[GCC_VS_BCR] = { 0x7a000 },
 	[GCC_MSS_VS_RESET] = { 0x7a100 },
 	[GCC_GPU_VS_RESET] = { 0x7a104 },
diff --git a/drivers/clk/qcom/gcc-qcs404.c b/drivers/clk/qcom/gcc-qcs404.c
index bd32212f37e6..9b0c4ce2ef4e 100644
--- a/drivers/clk/qcom/gcc-qcs404.c
+++ b/drivers/clk/qcom/gcc-qcs404.c
@@ -2855,7 +2855,7 @@ static int __init gcc_qcs404_init(void)
 {
 	return platform_driver_register(&gcc_qcs404_driver);
 }
-subsys_initcall(gcc_qcs404_init);
+core_initcall(gcc_qcs404_init);
 
 static void __exit gcc_qcs404_exit(void)
 {
diff --git a/drivers/clk/qcom/gcc-sc7180.c b/drivers/clk/qcom/gcc-sc7180.c
new file mode 100644
index 000000000000..7f59fb8da033
--- /dev/null
+++ b/drivers/clk/qcom/gcc-sc7180.c
@@ -0,0 +1,2452 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,gcc-sc7180.h>
+
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-rcg.h"
+#include "clk-regmap.h"
+#include "common.h"
+#include "gdsc.h"
+#include "reset.h"
+
+enum {
+	P_BI_TCXO,
+	P_CORE_BI_PLL_TEST_SE,
+	P_GPLL0_OUT_EVEN,
+	P_GPLL0_OUT_MAIN,
+	P_GPLL1_OUT_MAIN,
+	P_GPLL4_OUT_MAIN,
+	P_GPLL6_OUT_MAIN,
+	P_GPLL7_OUT_MAIN,
+	P_SLEEP_CLK,
+};
+
+static struct clk_alpha_pll gpll0 = {
+	.offset = 0x0,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr = {
+		.enable_reg = 0x52010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll0",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+				.name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_fabia_ops,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll0_out_even[] = {
+	{ 0x1, 2 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll0_out_even = {
+	.offset = 0x0,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll0_out_even,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll0_out_even),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll0_out_even",
+		.parent_data = &(const struct clk_parent_data){
+			.hw = &gpll0.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_fabia_ops,
+	},
+};
+
+static struct clk_fixed_factor gcc_pll0_main_div_cdiv = {
+	.mult = 1,
+	.div = 2,
+	.hw.init = &(struct clk_init_data){
+		.name = "gcc_pll0_main_div_cdiv",
+		.parent_data = &(const struct clk_parent_data){
+			.hw = &gpll0.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_fixed_factor_ops,
+	},
+};
+
+static struct clk_alpha_pll gpll1 = {
+	.offset = 0x01000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr = {
+		.enable_reg = 0x52010,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll1",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+				.name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_fabia_ops,
+		},
+	},
+};
+
+static struct clk_alpha_pll gpll4 = {
+	.offset = 0x76000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr = {
+		.enable_reg = 0x52010,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll4",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+				.name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_fabia_ops,
+		},
+	},
+};
+
+static struct clk_alpha_pll gpll6 = {
+	.offset = 0x13000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr = {
+		.enable_reg = 0x52010,
+		.enable_mask = BIT(6),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll6",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+				.name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_fabia_ops,
+		},
+	},
+};
+
+static struct clk_alpha_pll gpll7 = {
+	.offset = 0x27000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr = {
+		.enable_reg = 0x52010,
+		.enable_mask = BIT(7),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll7",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+				.name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_fabia_ops,
+		},
+	},
+};
+
+static const struct parent_map gcc_parent_map_0[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_MAIN, 1 },
+	{ P_GPLL0_OUT_EVEN, 6 },
+	{ P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parent_data_0[] = {
+	{ .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_out_even.clkr.hw },
+	{ .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct clk_parent_data gcc_parent_data_0_ao[] = {
+	{ .fw_name = "bi_tcxo_ao", .name = "bi_tcxo_ao" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_out_even.clkr.hw },
+	{ .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_1[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_MAIN, 1 },
+	{ P_GPLL6_OUT_MAIN, 2 },
+	{ P_GPLL0_OUT_EVEN, 6 },
+	{ P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parent_data_1[] = {
+	{ .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll0_out_even.clkr.hw },
+	{ .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_2[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_MAIN, 1 },
+	{ P_GPLL1_OUT_MAIN, 4 },
+	{ P_GPLL4_OUT_MAIN, 5 },
+	{ P_GPLL0_OUT_EVEN, 6 },
+	{ P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parent_data_2[] = {
+	{ .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll1.clkr.hw },
+	{ .hw = &gpll4.clkr.hw },
+	{ .hw = &gpll0_out_even.clkr.hw },
+	{ .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_3[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_MAIN, 1 },
+	{ P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parent_data_3[] = {
+	{ .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_4[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_MAIN, 1 },
+	{ P_SLEEP_CLK, 5 },
+	{ P_GPLL0_OUT_EVEN, 6 },
+	{ P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parent_data_4[] = {
+	{ .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .fw_name = "sleep_clk", .name = "sleep_clk" },
+	{ .hw = &gpll0_out_even.clkr.hw },
+	{ .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_5[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_MAIN, 1 },
+	{ P_GPLL7_OUT_MAIN, 3 },
+	{ P_GPLL0_OUT_EVEN, 6 },
+	{ P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parent_data_5[] = {
+	{ .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll7.clkr.hw },
+	{ .hw = &gpll0_out_even.clkr.hw },
+	{ .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_6[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_MAIN, 1 },
+	{ P_SLEEP_CLK, 5 },
+	{ P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parent_data_6[] = {
+	{ .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .fw_name = "sleep_clk", .name = "sleep_clk" },
+	{ .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct freq_tbl ftbl_gcc_cpuss_ahb_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_cpuss_ahb_clk_src = {
+	.cmd_rcgr = 0x48014,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_cpuss_ahb_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_cpuss_ahb_clk_src",
+		.parent_data = gcc_parent_data_0_ao,
+		.num_parents = 4,
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_ops,
+		},
+};
+
+static const struct freq_tbl ftbl_gcc_gp1_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+	F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+	F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
+	F(200000000, P_GPLL0_OUT_EVEN, 1.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_gp1_clk_src = {
+	.cmd_rcgr = 0x64004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_4,
+	.freq_tbl = ftbl_gcc_gp1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_gp1_clk_src",
+		.parent_data = gcc_parent_data_4,
+		.num_parents = 5,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_gp2_clk_src = {
+	.cmd_rcgr = 0x65004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_4,
+	.freq_tbl = ftbl_gcc_gp1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_gp2_clk_src",
+		.parent_data = gcc_parent_data_4,
+		.num_parents = 5,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_gp3_clk_src = {
+	.cmd_rcgr = 0x66004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_4,
+	.freq_tbl = ftbl_gcc_gp1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_gp3_clk_src",
+		.parent_data = gcc_parent_data_4,
+		.num_parents = 5,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_pdm2_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(60000000, P_GPLL0_OUT_EVEN, 5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_pdm2_clk_src = {
+	.cmd_rcgr = 0x33010,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_pdm2_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_pdm2_clk_src",
+		.parent_data = gcc_parent_data_0,
+		.num_parents = 4,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_qspi_core_clk_src[] = {
+	F(75000000, P_GPLL0_OUT_EVEN, 4, 0, 0),
+	F(150000000, P_GPLL0_OUT_EVEN, 2, 0, 0),
+	F(300000000, P_GPLL0_OUT_EVEN, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_qspi_core_clk_src = {
+	.cmd_rcgr = 0x4b00c,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_2,
+	.freq_tbl = ftbl_gcc_qspi_core_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_qspi_core_clk_src",
+		.parent_data = gcc_parent_data_2,
+		.num_parents = 6,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_qupv3_wrap0_s0_clk_src[] = {
+	F(7372800, P_GPLL0_OUT_EVEN, 1, 384, 15625),
+	F(14745600, P_GPLL0_OUT_EVEN, 1, 768, 15625),
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(29491200, P_GPLL0_OUT_EVEN, 1, 1536, 15625),
+	F(32000000, P_GPLL0_OUT_EVEN, 1, 8, 75),
+	F(48000000, P_GPLL0_OUT_EVEN, 1, 4, 25),
+	F(64000000, P_GPLL0_OUT_EVEN, 1, 16, 75),
+	F(75000000, P_GPLL0_OUT_EVEN, 4, 0, 0),
+	F(80000000, P_GPLL0_OUT_EVEN, 1, 4, 15),
+	F(96000000, P_GPLL0_OUT_EVEN, 1, 8, 25),
+	F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
+	F(102400000, P_GPLL0_OUT_EVEN, 1, 128, 375),
+	F(112000000, P_GPLL0_OUT_EVEN, 1, 28, 75),
+	F(117964800, P_GPLL0_OUT_EVEN, 1, 6144, 15625),
+	F(120000000, P_GPLL0_OUT_EVEN, 2.5, 0, 0),
+	F(128000000, P_GPLL6_OUT_MAIN, 3, 0, 0),
+	{ }
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s0_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s0_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s0_clk_src = {
+	.cmd_rcgr = 0x17034,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s0_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s1_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s1_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s1_clk_src = {
+	.cmd_rcgr = 0x17164,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s1_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s2_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s2_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s2_clk_src = {
+	.cmd_rcgr = 0x17294,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s2_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s3_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s3_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s3_clk_src = {
+	.cmd_rcgr = 0x173c4,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s3_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s4_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s4_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s4_clk_src = {
+	.cmd_rcgr = 0x174f4,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s4_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s5_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s5_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s5_clk_src = {
+	.cmd_rcgr = 0x17624,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s5_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s0_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s0_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s0_clk_src = {
+	.cmd_rcgr = 0x18018,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s0_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s1_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s1_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s1_clk_src = {
+	.cmd_rcgr = 0x18148,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s1_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s2_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s2_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s2_clk_src = {
+	.cmd_rcgr = 0x18278,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s2_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s3_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s3_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s3_clk_src = {
+	.cmd_rcgr = 0x183a8,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s3_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s4_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s4_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s4_clk_src = {
+	.cmd_rcgr = 0x184d8,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s4_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s5_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s5_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = 4,
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s5_clk_src = {
+	.cmd_rcgr = 0x18608,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s5_clk_src_init,
+};
+
+
+static const struct freq_tbl ftbl_gcc_sdcc1_apps_clk_src[] = {
+	F(144000, P_BI_TCXO, 16, 3, 25),
+	F(400000, P_BI_TCXO, 12, 1, 4),
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(20000000, P_GPLL0_OUT_EVEN, 5, 1, 3),
+	F(25000000, P_GPLL0_OUT_EVEN, 6, 1, 2),
+	F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+	F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
+	F(192000000, P_GPLL6_OUT_MAIN, 2, 0, 0),
+	F(384000000, P_GPLL6_OUT_MAIN, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_sdcc1_apps_clk_src = {
+	.cmd_rcgr = 0x12028,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_sdcc1_apps_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_sdcc1_apps_clk_src",
+		.parent_data = gcc_parent_data_1,
+		.num_parents = 5,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc1_ice_core_clk_src[] = {
+	F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
+	F(150000000, P_GPLL0_OUT_EVEN, 2, 0, 0),
+	F(200000000, P_GPLL0_OUT_MAIN, 3, 0, 0),
+	F(300000000, P_GPLL0_OUT_EVEN, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = {
+	.cmd_rcgr = 0x12010,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_sdcc1_ice_core_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_sdcc1_ice_core_clk_src",
+		.parent_data = gcc_parent_data_0,
+		.num_parents = 4,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc2_apps_clk_src[] = {
+	F(400000, P_BI_TCXO, 12, 1, 4),
+	F(9600000, P_BI_TCXO, 2, 0, 0),
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+	F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
+	F(202000000, P_GPLL7_OUT_MAIN, 4, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
+	.cmd_rcgr = 0x1400c,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_5,
+	.freq_tbl = ftbl_gcc_sdcc2_apps_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_sdcc2_apps_clk_src",
+		.parent_data = gcc_parent_data_5,
+		.num_parents = 5,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_axi_clk_src[] = {
+	F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+	F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+	F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
+	F(200000000, P_GPLL0_OUT_MAIN, 3, 0, 0),
+	F(240000000, P_GPLL0_OUT_MAIN, 2.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_axi_clk_src = {
+	.cmd_rcgr = 0x77020,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_ufs_phy_axi_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_axi_clk_src",
+		.parent_data = gcc_parent_data_0,
+		.num_parents = 4,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_ice_core_clk_src[] = {
+	F(37500000, P_GPLL0_OUT_EVEN, 8, 0, 0),
+	F(75000000, P_GPLL0_OUT_EVEN, 4, 0, 0),
+	F(150000000, P_GPLL0_OUT_EVEN, 2, 0, 0),
+	F(300000000, P_GPLL0_OUT_EVEN, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_ice_core_clk_src = {
+	.cmd_rcgr = 0x77048,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_ufs_phy_ice_core_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_ice_core_clk_src",
+		.parent_data = gcc_parent_data_0,
+		.num_parents = 4,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_phy_aux_clk_src[] = {
+	F(9600000, P_BI_TCXO, 2, 0, 0),
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_phy_aux_clk_src = {
+	.cmd_rcgr = 0x77098,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_3,
+	.freq_tbl = ftbl_gcc_ufs_phy_phy_aux_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_phy_aux_clk_src",
+		.parent_data = gcc_parent_data_3,
+		.num_parents = 3,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_unipro_core_clk_src[] = {
+	F(37500000, P_GPLL0_OUT_EVEN, 8, 0, 0),
+	F(75000000, P_GPLL0_OUT_EVEN, 4, 0, 0),
+	F(150000000, P_GPLL0_OUT_EVEN, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_unipro_core_clk_src = {
+	.cmd_rcgr = 0x77060,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_ufs_phy_unipro_core_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_unipro_core_clk_src",
+		.parent_data = gcc_parent_data_0,
+		.num_parents = 4,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_prim_master_clk_src[] = {
+	F(66666667, P_GPLL0_OUT_EVEN, 4.5, 0, 0),
+	F(133333333, P_GPLL0_OUT_MAIN, 4.5, 0, 0),
+	F(200000000, P_GPLL0_OUT_MAIN, 3, 0, 0),
+	F(240000000, P_GPLL0_OUT_MAIN, 2.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_usb30_prim_master_clk_src = {
+	.cmd_rcgr = 0xf01c,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_usb30_prim_master_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_usb30_prim_master_clk_src",
+		.parent_data = gcc_parent_data_0,
+		.num_parents = 4,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_prim_mock_utmi_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(20000000, P_GPLL0_OUT_EVEN, 15, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_usb30_prim_mock_utmi_clk_src = {
+	.cmd_rcgr = 0xf034,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_usb30_prim_mock_utmi_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_usb30_prim_mock_utmi_clk_src",
+		.parent_data = gcc_parent_data_0,
+		.num_parents = 4,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_usb3_prim_phy_aux_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_usb3_prim_phy_aux_clk_src = {
+	.cmd_rcgr = 0xf060,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_6,
+	.freq_tbl = ftbl_gcc_usb3_prim_phy_aux_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_usb3_prim_phy_aux_clk_src",
+		.parent_data = gcc_parent_data_6,
+		.num_parents = 4,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_aggre_ufs_phy_axi_clk = {
+	.halt_reg = 0x82024,
+	.halt_check = BRANCH_HALT_DELAY,
+	.hwcg_reg = 0x82024,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x82024,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_aggre_ufs_phy_axi_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_ufs_phy_axi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_aggre_usb3_prim_axi_clk = {
+	.halt_reg = 0x8201c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x8201c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_aggre_usb3_prim_axi_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_usb30_prim_master_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_boot_rom_ahb_clk = {
+	.halt_reg = 0x38004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x38004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_boot_rom_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camera_ahb_clk = {
+	.halt_reg = 0xb008,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0xb008,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0xb008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camera_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camera_hf_axi_clk = {
+	.halt_reg = 0xb020,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0xb020,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camera_hf_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camera_throttle_hf_axi_clk = {
+	.halt_reg = 0xb080,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0xb080,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0xb080,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camera_throttle_hf_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camera_xo_clk = {
+	.halt_reg = 0xb02c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0xb02c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camera_xo_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ce1_ahb_clk = {
+	.halt_reg = 0x4100c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x4100c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(3),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ce1_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ce1_axi_clk = {
+	.halt_reg = 0x41008,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ce1_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ce1_clk = {
+	.halt_reg = 0x41004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(5),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ce1_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_cfg_noc_usb3_prim_axi_clk = {
+	.halt_reg = 0x502c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x502c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cfg_noc_usb3_prim_axi_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_usb30_prim_master_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+/* For CPUSS functionality the AHB clock needs to be left enabled */
+static struct clk_branch gcc_cpuss_ahb_clk = {
+	.halt_reg = 0x48000,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(21),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cpuss_ahb_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_cpuss_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_cpuss_rbcpr_clk = {
+	.halt_reg = 0x48008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x48008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cpuss_rbcpr_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ddrss_gpu_axi_clk = {
+	.halt_reg = 0x4452c,
+	.halt_check = BRANCH_VOTED,
+	.clkr = {
+		.enable_reg = 0x4452c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ddrss_gpu_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_gpll0_clk_src = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(18),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_gpll0_clk_src",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gpll0.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_gpll0_div_clk_src = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(19),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_gpll0_div_clk_src",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_pll0_main_div_cdiv.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_hf_axi_clk = {
+	.halt_reg = 0xb024,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0xb024,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_hf_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_throttle_hf_axi_clk = {
+	.halt_reg = 0xb084,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0xb084,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0xb084,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_throttle_hf_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_xo_clk = {
+	.halt_reg = 0xb030,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0xb030,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_xo_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gp1_clk = {
+	.halt_reg = 0x64000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x64000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp1_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_gp1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gp2_clk = {
+	.halt_reg = 0x65000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x65000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp2_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_gp2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gp3_clk = {
+	.halt_reg = 0x66000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x66000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp3_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_gp3_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_gpll0_clk_src = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(15),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_gpll0_clk_src",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gpll0.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_gpll0_div_clk_src = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(16),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_gpll0_div_clk_src",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_pll0_main_div_cdiv.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_memnoc_gfx_clk = {
+	.halt_reg = 0x7100c,
+	.halt_check = BRANCH_VOTED,
+	.clkr = {
+		.enable_reg = 0x7100c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_memnoc_gfx_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_snoc_dvm_gfx_clk = {
+	.halt_reg = 0x71018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x71018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_snoc_dvm_gfx_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_axi_clk = {
+	.halt_reg = 0x4d008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_bwmon_axi_clk = {
+	.halt_reg = 0x73008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x73008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_bwmon_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_bwmon_dma_cfg_ahb_clk = {
+	.halt_reg = 0x73018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x73018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_bwmon_dma_cfg_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_bwmon_dsp_cfg_ahb_clk = {
+	.halt_reg = 0x7301c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x7301c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_bwmon_dsp_cfg_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_cfg_ahb_clk = {
+	.halt_reg = 0x4d004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x4d004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x4d004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_cfg_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_dma_clk = {
+	.halt_reg = 0x4d1a0,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x4d1a0,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x4d1a0,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_dma_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_gpll0_clk_src = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(25),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_gpll0_clk_src",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gpll0.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_gpll0_div_clk_src = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(26),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_gpll0_div_clk_src",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_pll0_main_div_cdiv.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pdm2_clk = {
+	.halt_reg = 0x3300c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3300c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pdm2_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_pdm2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pdm_ahb_clk = {
+	.halt_reg = 0x33004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x33004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x33004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pdm_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pdm_xo4_clk = {
+	.halt_reg = 0x33008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x33008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pdm_xo4_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_prng_ahb_clk = {
+	.halt_reg = 0x34004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x34004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(13),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_prng_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qspi_cnoc_periph_ahb_clk = {
+	.halt_reg = 0x4b004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x4b004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x4b004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qspi_cnoc_periph_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qspi_core_clk = {
+	.halt_reg = 0x4b008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4b008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qspi_core_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qspi_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_core_2x_clk = {
+	.halt_reg = 0x17014,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_core_2x_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_core_clk = {
+	.halt_reg = 0x1700c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(8),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_core_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s0_clk = {
+	.halt_reg = 0x17030,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s0_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap0_s0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s1_clk = {
+	.halt_reg = 0x17160,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s1_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap0_s1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s2_clk = {
+	.halt_reg = 0x17290,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(12),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s2_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap0_s2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s3_clk = {
+	.halt_reg = 0x173c0,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(13),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s3_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap0_s3_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s4_clk = {
+	.halt_reg = 0x174f0,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(14),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s4_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap0_s4_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s5_clk = {
+	.halt_reg = 0x17620,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(15),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s5_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap0_s5_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_core_2x_clk = {
+	.halt_reg = 0x18004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(18),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_core_2x_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_core_clk = {
+	.halt_reg = 0x18008,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(19),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_core_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s0_clk = {
+	.halt_reg = 0x18014,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(22),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s0_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap1_s0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s1_clk = {
+	.halt_reg = 0x18144,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(23),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s1_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap1_s1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s2_clk = {
+	.halt_reg = 0x18274,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(24),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s2_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap1_s2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s3_clk = {
+	.halt_reg = 0x183a4,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(25),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s3_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap1_s3_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s4_clk = {
+	.halt_reg = 0x184d4,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(26),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s4_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap1_s4_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s5_clk = {
+	.halt_reg = 0x18604,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(27),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s5_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_qupv3_wrap1_s5_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap_0_m_ahb_clk = {
+	.halt_reg = 0x17004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(6),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap_0_m_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap_0_s_ahb_clk = {
+	.halt_reg = 0x17008,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x17008,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(7),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap_0_s_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap_1_m_ahb_clk = {
+	.halt_reg = 0x1800c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(20),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap_1_m_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap_1_s_ahb_clk = {
+	.halt_reg = 0x18010,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x18010,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(21),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap_1_s_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_ahb_clk = {
+	.halt_reg = 0x12008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x12008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_apps_clk = {
+	.halt_reg = 0x1200c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1200c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_apps_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_sdcc1_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_ice_core_clk = {
+	.halt_reg = 0x12040,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x12040,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_ice_core_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_sdcc1_ice_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc2_ahb_clk = {
+	.halt_reg = 0x14008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x14008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc2_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc2_apps_clk = {
+	.halt_reg = 0x14004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x14004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc2_apps_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_sdcc2_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+/* For CPUSS functionality the SYS NOC clock needs to be left enabled */
+static struct clk_branch gcc_sys_noc_cpuss_ahb_clk = {
+	.halt_reg = 0x4144,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sys_noc_cpuss_ahb_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_cpuss_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_mem_clkref_clk = {
+	.halt_reg = 0x8c000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x8c000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_mem_clkref_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_ahb_clk = {
+	.halt_reg = 0x77014,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x77014,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x77014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_axi_clk = {
+	.halt_reg = 0x77038,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x77038,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x77038,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_axi_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_ufs_phy_axi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_ice_core_clk = {
+	.halt_reg = 0x77090,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x77090,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x77090,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_ice_core_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_ufs_phy_ice_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_phy_aux_clk = {
+	.halt_reg = 0x77094,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x77094,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x77094,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_phy_aux_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_ufs_phy_phy_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_rx_symbol_0_clk = {
+	.halt_reg = 0x7701c,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0x7701c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_rx_symbol_0_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_tx_symbol_0_clk = {
+	.halt_reg = 0x77018,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0x77018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_tx_symbol_0_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_unipro_core_clk = {
+	.halt_reg = 0x7708c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x7708c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x7708c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_unipro_core_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_ufs_phy_unipro_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb30_prim_master_clk = {
+	.halt_reg = 0xf010,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0xf010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb30_prim_master_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_usb30_prim_master_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb30_prim_mock_utmi_clk = {
+	.halt_reg = 0xf018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0xf018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb30_prim_mock_utmi_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw =
+				&gcc_usb30_prim_mock_utmi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb30_prim_sleep_clk = {
+	.halt_reg = 0xf014,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0xf014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb30_prim_sleep_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_prim_clkref_clk = {
+	.halt_reg = 0x8c010,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x8c010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_prim_clkref_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_prim_phy_aux_clk = {
+	.halt_reg = 0xf050,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0xf050,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_prim_phy_aux_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_usb3_prim_phy_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_prim_phy_com_aux_clk = {
+	.halt_reg = 0xf054,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0xf054,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_prim_phy_com_aux_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_usb3_prim_phy_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_prim_phy_pipe_clk = {
+	.halt_reg = 0xf058,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0xf058,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_prim_phy_pipe_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb_phy_cfg_ahb2phy_clk = {
+	.halt_reg = 0x6a004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x6a004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x6a004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb_phy_cfg_ahb2phy_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_axi_clk = {
+	.halt_reg = 0xb01c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0xb01c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_gpll0_div_clk_src = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(20),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_gpll0_div_clk_src",
+			.parent_data = &(const struct clk_parent_data){
+				.hw = &gcc_pll0_main_div_cdiv.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_throttle_axi_clk = {
+	.halt_reg = 0xb07c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0xb07c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0xb07c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_throttle_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_xo_clk = {
+	.halt_reg = 0xb028,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0xb028,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_xo_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct gdsc ufs_phy_gdsc = {
+	.gdscr = 0x77004,
+	.pd = {
+		.name = "ufs_phy_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc usb30_prim_gdsc = {
+	.gdscr = 0x0f004,
+	.pd = {
+		.name = "usb30_prim_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc = {
+	.gdscr = 0x7d040,
+	.pd = {
+		.name = "hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc hlos1_vote_mmnoc_mmu_tbu_sf_gdsc = {
+	.gdscr = 0x7d044,
+	.pd = {
+		.name = "hlos1_vote_mmnoc_mmu_tbu_sf_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc *gcc_sc7180_gdscs[] = {
+	[UFS_PHY_GDSC] = &ufs_phy_gdsc,
+	[USB30_PRIM_GDSC] = &usb30_prim_gdsc,
+	[HLOS1_VOTE_MMNOC_MMU_TBU_HF0_GDSC] =
+					&hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc,
+	[HLOS1_VOTE_MMNOC_MMU_TBU_SF_GDSC] =
+					&hlos1_vote_mmnoc_mmu_tbu_sf_gdsc,
+};
+
+
+static struct clk_hw *gcc_sc7180_hws[] = {
+	[GCC_GPLL0_MAIN_DIV_CDIV] = &gcc_pll0_main_div_cdiv.hw,
+};
+
+static struct clk_regmap *gcc_sc7180_clocks[] = {
+	[GCC_AGGRE_UFS_PHY_AXI_CLK] = &gcc_aggre_ufs_phy_axi_clk.clkr,
+	[GCC_AGGRE_USB3_PRIM_AXI_CLK] = &gcc_aggre_usb3_prim_axi_clk.clkr,
+	[GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr,
+	[GCC_CAMERA_AHB_CLK] = &gcc_camera_ahb_clk.clkr,
+	[GCC_CAMERA_HF_AXI_CLK] = &gcc_camera_hf_axi_clk.clkr,
+	[GCC_CAMERA_THROTTLE_HF_AXI_CLK] = &gcc_camera_throttle_hf_axi_clk.clkr,
+	[GCC_CAMERA_XO_CLK] = &gcc_camera_xo_clk.clkr,
+	[GCC_CE1_AHB_CLK] = &gcc_ce1_ahb_clk.clkr,
+	[GCC_CE1_AXI_CLK] = &gcc_ce1_axi_clk.clkr,
+	[GCC_CE1_CLK] = &gcc_ce1_clk.clkr,
+	[GCC_CFG_NOC_USB3_PRIM_AXI_CLK] = &gcc_cfg_noc_usb3_prim_axi_clk.clkr,
+	[GCC_CPUSS_AHB_CLK] = &gcc_cpuss_ahb_clk.clkr,
+	[GCC_CPUSS_AHB_CLK_SRC] = &gcc_cpuss_ahb_clk_src.clkr,
+	[GCC_CPUSS_RBCPR_CLK] = &gcc_cpuss_rbcpr_clk.clkr,
+	[GCC_DDRSS_GPU_AXI_CLK] = &gcc_ddrss_gpu_axi_clk.clkr,
+	[GCC_DISP_GPLL0_CLK_SRC] = &gcc_disp_gpll0_clk_src.clkr,
+	[GCC_DISP_GPLL0_DIV_CLK_SRC] = &gcc_disp_gpll0_div_clk_src.clkr,
+	[GCC_DISP_HF_AXI_CLK] = &gcc_disp_hf_axi_clk.clkr,
+	[GCC_DISP_THROTTLE_HF_AXI_CLK] = &gcc_disp_throttle_hf_axi_clk.clkr,
+	[GCC_DISP_XO_CLK] = &gcc_disp_xo_clk.clkr,
+	[GCC_GP1_CLK] = &gcc_gp1_clk.clkr,
+	[GCC_GP1_CLK_SRC] = &gcc_gp1_clk_src.clkr,
+	[GCC_GP2_CLK] = &gcc_gp2_clk.clkr,
+	[GCC_GP2_CLK_SRC] = &gcc_gp2_clk_src.clkr,
+	[GCC_GP3_CLK] = &gcc_gp3_clk.clkr,
+	[GCC_GP3_CLK_SRC] = &gcc_gp3_clk_src.clkr,
+	[GCC_GPU_GPLL0_CLK_SRC] = &gcc_gpu_gpll0_clk_src.clkr,
+	[GCC_GPU_GPLL0_DIV_CLK_SRC] = &gcc_gpu_gpll0_div_clk_src.clkr,
+	[GCC_GPU_MEMNOC_GFX_CLK] = &gcc_gpu_memnoc_gfx_clk.clkr,
+	[GCC_GPU_SNOC_DVM_GFX_CLK] = &gcc_gpu_snoc_dvm_gfx_clk.clkr,
+	[GCC_NPU_AXI_CLK] = &gcc_npu_axi_clk.clkr,
+	[GCC_NPU_BWMON_AXI_CLK] = &gcc_npu_bwmon_axi_clk.clkr,
+	[GCC_NPU_BWMON_DMA_CFG_AHB_CLK] = &gcc_npu_bwmon_dma_cfg_ahb_clk.clkr,
+	[GCC_NPU_BWMON_DSP_CFG_AHB_CLK] = &gcc_npu_bwmon_dsp_cfg_ahb_clk.clkr,
+	[GCC_NPU_CFG_AHB_CLK] = &gcc_npu_cfg_ahb_clk.clkr,
+	[GCC_NPU_DMA_CLK] = &gcc_npu_dma_clk.clkr,
+	[GCC_NPU_GPLL0_CLK_SRC] = &gcc_npu_gpll0_clk_src.clkr,
+	[GCC_NPU_GPLL0_DIV_CLK_SRC] = &gcc_npu_gpll0_div_clk_src.clkr,
+	[GCC_PDM2_CLK] = &gcc_pdm2_clk.clkr,
+	[GCC_PDM2_CLK_SRC] = &gcc_pdm2_clk_src.clkr,
+	[GCC_PDM_AHB_CLK] = &gcc_pdm_ahb_clk.clkr,
+	[GCC_PDM_XO4_CLK] = &gcc_pdm_xo4_clk.clkr,
+	[GCC_PRNG_AHB_CLK] = &gcc_prng_ahb_clk.clkr,
+	[GCC_QSPI_CNOC_PERIPH_AHB_CLK] = &gcc_qspi_cnoc_periph_ahb_clk.clkr,
+	[GCC_QSPI_CORE_CLK] = &gcc_qspi_core_clk.clkr,
+	[GCC_QSPI_CORE_CLK_SRC] = &gcc_qspi_core_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_CORE_2X_CLK] = &gcc_qupv3_wrap0_core_2x_clk.clkr,
+	[GCC_QUPV3_WRAP0_CORE_CLK] = &gcc_qupv3_wrap0_core_clk.clkr,
+	[GCC_QUPV3_WRAP0_S0_CLK] = &gcc_qupv3_wrap0_s0_clk.clkr,
+	[GCC_QUPV3_WRAP0_S0_CLK_SRC] = &gcc_qupv3_wrap0_s0_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S1_CLK] = &gcc_qupv3_wrap0_s1_clk.clkr,
+	[GCC_QUPV3_WRAP0_S1_CLK_SRC] = &gcc_qupv3_wrap0_s1_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S2_CLK] = &gcc_qupv3_wrap0_s2_clk.clkr,
+	[GCC_QUPV3_WRAP0_S2_CLK_SRC] = &gcc_qupv3_wrap0_s2_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S3_CLK] = &gcc_qupv3_wrap0_s3_clk.clkr,
+	[GCC_QUPV3_WRAP0_S3_CLK_SRC] = &gcc_qupv3_wrap0_s3_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S4_CLK] = &gcc_qupv3_wrap0_s4_clk.clkr,
+	[GCC_QUPV3_WRAP0_S4_CLK_SRC] = &gcc_qupv3_wrap0_s4_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S5_CLK] = &gcc_qupv3_wrap0_s5_clk.clkr,
+	[GCC_QUPV3_WRAP0_S5_CLK_SRC] = &gcc_qupv3_wrap0_s5_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_CORE_2X_CLK] = &gcc_qupv3_wrap1_core_2x_clk.clkr,
+	[GCC_QUPV3_WRAP1_CORE_CLK] = &gcc_qupv3_wrap1_core_clk.clkr,
+	[GCC_QUPV3_WRAP1_S0_CLK] = &gcc_qupv3_wrap1_s0_clk.clkr,
+	[GCC_QUPV3_WRAP1_S0_CLK_SRC] = &gcc_qupv3_wrap1_s0_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_S1_CLK] = &gcc_qupv3_wrap1_s1_clk.clkr,
+	[GCC_QUPV3_WRAP1_S1_CLK_SRC] = &gcc_qupv3_wrap1_s1_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_S2_CLK] = &gcc_qupv3_wrap1_s2_clk.clkr,
+	[GCC_QUPV3_WRAP1_S2_CLK_SRC] = &gcc_qupv3_wrap1_s2_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_S3_CLK] = &gcc_qupv3_wrap1_s3_clk.clkr,
+	[GCC_QUPV3_WRAP1_S3_CLK_SRC] = &gcc_qupv3_wrap1_s3_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_S4_CLK] = &gcc_qupv3_wrap1_s4_clk.clkr,
+	[GCC_QUPV3_WRAP1_S4_CLK_SRC] = &gcc_qupv3_wrap1_s4_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_S5_CLK] = &gcc_qupv3_wrap1_s5_clk.clkr,
+	[GCC_QUPV3_WRAP1_S5_CLK_SRC] = &gcc_qupv3_wrap1_s5_clk_src.clkr,
+	[GCC_QUPV3_WRAP_0_M_AHB_CLK] = &gcc_qupv3_wrap_0_m_ahb_clk.clkr,
+	[GCC_QUPV3_WRAP_0_S_AHB_CLK] = &gcc_qupv3_wrap_0_s_ahb_clk.clkr,
+	[GCC_QUPV3_WRAP_1_M_AHB_CLK] = &gcc_qupv3_wrap_1_m_ahb_clk.clkr,
+	[GCC_QUPV3_WRAP_1_S_AHB_CLK] = &gcc_qupv3_wrap_1_s_ahb_clk.clkr,
+	[GCC_SDCC1_AHB_CLK] = &gcc_sdcc1_ahb_clk.clkr,
+	[GCC_SDCC1_APPS_CLK] = &gcc_sdcc1_apps_clk.clkr,
+	[GCC_SDCC1_APPS_CLK_SRC] = &gcc_sdcc1_apps_clk_src.clkr,
+	[GCC_SDCC1_ICE_CORE_CLK] = &gcc_sdcc1_ice_core_clk.clkr,
+	[GCC_SDCC1_ICE_CORE_CLK_SRC] = &gcc_sdcc1_ice_core_clk_src.clkr,
+	[GCC_SDCC2_AHB_CLK] = &gcc_sdcc2_ahb_clk.clkr,
+	[GCC_SDCC2_APPS_CLK] = &gcc_sdcc2_apps_clk.clkr,
+	[GCC_SDCC2_APPS_CLK_SRC] = &gcc_sdcc2_apps_clk_src.clkr,
+	[GCC_SYS_NOC_CPUSS_AHB_CLK] = &gcc_sys_noc_cpuss_ahb_clk.clkr,
+	[GCC_UFS_MEM_CLKREF_CLK] = &gcc_ufs_mem_clkref_clk.clkr,
+	[GCC_UFS_PHY_AHB_CLK] = &gcc_ufs_phy_ahb_clk.clkr,
+	[GCC_UFS_PHY_AXI_CLK] = &gcc_ufs_phy_axi_clk.clkr,
+	[GCC_UFS_PHY_AXI_CLK_SRC] = &gcc_ufs_phy_axi_clk_src.clkr,
+	[GCC_UFS_PHY_ICE_CORE_CLK] = &gcc_ufs_phy_ice_core_clk.clkr,
+	[GCC_UFS_PHY_ICE_CORE_CLK_SRC] = &gcc_ufs_phy_ice_core_clk_src.clkr,
+	[GCC_UFS_PHY_PHY_AUX_CLK] = &gcc_ufs_phy_phy_aux_clk.clkr,
+	[GCC_UFS_PHY_PHY_AUX_CLK_SRC] = &gcc_ufs_phy_phy_aux_clk_src.clkr,
+	[GCC_UFS_PHY_RX_SYMBOL_0_CLK] = &gcc_ufs_phy_rx_symbol_0_clk.clkr,
+	[GCC_UFS_PHY_TX_SYMBOL_0_CLK] = &gcc_ufs_phy_tx_symbol_0_clk.clkr,
+	[GCC_UFS_PHY_UNIPRO_CORE_CLK] = &gcc_ufs_phy_unipro_core_clk.clkr,
+	[GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC] =
+		&gcc_ufs_phy_unipro_core_clk_src.clkr,
+	[GCC_USB30_PRIM_MASTER_CLK] = &gcc_usb30_prim_master_clk.clkr,
+	[GCC_USB30_PRIM_MASTER_CLK_SRC] = &gcc_usb30_prim_master_clk_src.clkr,
+	[GCC_USB30_PRIM_MOCK_UTMI_CLK] = &gcc_usb30_prim_mock_utmi_clk.clkr,
+	[GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC] =
+		&gcc_usb30_prim_mock_utmi_clk_src.clkr,
+	[GCC_USB30_PRIM_SLEEP_CLK] = &gcc_usb30_prim_sleep_clk.clkr,
+	[GCC_USB3_PRIM_CLKREF_CLK] = &gcc_usb3_prim_clkref_clk.clkr,
+	[GCC_USB3_PRIM_PHY_AUX_CLK] = &gcc_usb3_prim_phy_aux_clk.clkr,
+	[GCC_USB3_PRIM_PHY_AUX_CLK_SRC] = &gcc_usb3_prim_phy_aux_clk_src.clkr,
+	[GCC_USB3_PRIM_PHY_COM_AUX_CLK] = &gcc_usb3_prim_phy_com_aux_clk.clkr,
+	[GCC_USB3_PRIM_PHY_PIPE_CLK] = &gcc_usb3_prim_phy_pipe_clk.clkr,
+	[GCC_USB_PHY_CFG_AHB2PHY_CLK] = &gcc_usb_phy_cfg_ahb2phy_clk.clkr,
+	[GCC_VIDEO_AXI_CLK] = &gcc_video_axi_clk.clkr,
+	[GCC_VIDEO_GPLL0_DIV_CLK_SRC] = &gcc_video_gpll0_div_clk_src.clkr,
+	[GCC_VIDEO_THROTTLE_AXI_CLK] = &gcc_video_throttle_axi_clk.clkr,
+	[GCC_VIDEO_XO_CLK] = &gcc_video_xo_clk.clkr,
+	[GPLL0] = &gpll0.clkr,
+	[GPLL0_OUT_EVEN] = &gpll0_out_even.clkr,
+	[GPLL6] = &gpll6.clkr,
+	[GPLL7] = &gpll7.clkr,
+	[GPLL4] = &gpll4.clkr,
+	[GPLL1] = &gpll1.clkr,
+};
+
+static const struct qcom_reset_map gcc_sc7180_resets[] = {
+	[GCC_QUSB2PHY_PRIM_BCR] = { 0x26000 },
+	[GCC_QUSB2PHY_SEC_BCR] = { 0x26004 },
+	[GCC_UFS_PHY_BCR] = { 0x77000 },
+	[GCC_USB30_PRIM_BCR] = { 0xf000 },
+	[GCC_USB3_PHY_PRIM_BCR] = { 0x50000 },
+	[GCC_USB3PHY_PHY_PRIM_BCR] = { 0x50004 },
+	[GCC_USB3_PHY_SEC_BCR] = { 0x5000c },
+	[GCC_USB3_DP_PHY_PRIM_BCR] = { 0x50008 },
+	[GCC_USB3PHY_PHY_SEC_BCR] = { 0x50010 },
+	[GCC_USB3_DP_PHY_SEC_BCR] = { 0x50014 },
+	[GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x6a000 },
+};
+
+static struct clk_rcg_dfs_data gcc_dfs_clocks[] = {
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s0_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s1_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s2_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s3_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s4_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s5_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s0_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s1_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s2_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s3_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s4_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s5_clk_src),
+};
+
+static const struct regmap_config gcc_sc7180_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.max_register = 0x18208c,
+	.fast_io = true,
+};
+
+static const struct qcom_cc_desc gcc_sc7180_desc = {
+	.config = &gcc_sc7180_regmap_config,
+	.clk_hws = gcc_sc7180_hws,
+	.num_clk_hws = ARRAY_SIZE(gcc_sc7180_hws),
+	.clks = gcc_sc7180_clocks,
+	.num_clks = ARRAY_SIZE(gcc_sc7180_clocks),
+	.resets = gcc_sc7180_resets,
+	.num_resets = ARRAY_SIZE(gcc_sc7180_resets),
+	.gdscs = gcc_sc7180_gdscs,
+	.num_gdscs = ARRAY_SIZE(gcc_sc7180_gdscs),
+};
+
+static const struct of_device_id gcc_sc7180_match_table[] = {
+	{ .compatible = "qcom,gcc-sc7180" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, gcc_sc7180_match_table);
+
+static int gcc_sc7180_probe(struct platform_device *pdev)
+{
+	struct regmap *regmap;
+	int ret;
+
+	regmap = qcom_cc_map(pdev, &gcc_sc7180_desc);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	/*
+	 * Disable the GPLL0 active input to MM blocks, NPU
+	 * and GPU via MISC registers.
+	 */
+	regmap_update_bits(regmap, 0x09ffc, 0x3, 0x3);
+	regmap_update_bits(regmap, 0x4d110, 0x3, 0x3);
+	regmap_update_bits(regmap, 0x71028, 0x3, 0x3);
+
+	/*
+	 * Keep the clocks always-ON
+	 * GCC_CPUSS_GNOC_CLK, GCC_VIDEO_AHB_CLK, GCC_DISP_AHB_CLK
+	 * GCC_GPU_CFG_AHB_CLK
+	 */
+	regmap_update_bits(regmap, 0x48004, BIT(0), BIT(0));
+	regmap_update_bits(regmap, 0x0b004, BIT(0), BIT(0));
+	regmap_update_bits(regmap, 0x0b00c, BIT(0), BIT(0));
+	regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0));
+
+	ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks,
+					ARRAY_SIZE(gcc_dfs_clocks));
+	if (ret)
+		return ret;
+
+	return qcom_cc_really_probe(pdev, &gcc_sc7180_desc, regmap);
+}
+
+static struct platform_driver gcc_sc7180_driver = {
+	.probe = gcc_sc7180_probe,
+	.driver = {
+		.name = "gcc-sc7180",
+		.of_match_table = gcc_sc7180_match_table,
+	},
+};
+
+static int __init gcc_sc7180_init(void)
+{
+	return platform_driver_register(&gcc_sc7180_driver);
+}
+core_initcall(gcc_sc7180_init);
+
+static void __exit gcc_sc7180_exit(void)
+{
+	platform_driver_unregister(&gcc_sc7180_driver);
+}
+module_exit(gcc_sc7180_exit);
+
+MODULE_DESCRIPTION("QTI GCC SC7180 Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c
index 95be125c3bdd..f6ce888098be 100644
--- a/drivers/clk/qcom/gcc-sdm845.c
+++ b/drivers/clk/qcom/gcc-sdm845.c
@@ -408,7 +408,7 @@ static const struct freq_tbl ftbl_gcc_qupv3_wrap0_s0_clk_src[] = {
 	{ }
 };
 
-static struct clk_init_data gcc_qupv3_wrap0_s0_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap0_s0_clk_src_init = {
 	.name = "gcc_qupv3_wrap0_s0_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -421,10 +421,10 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s0_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap0_s0_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s0_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap0_s1_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap0_s1_clk_src_init = {
 	.name = "gcc_qupv3_wrap0_s1_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -437,10 +437,10 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s1_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap0_s1_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s1_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap0_s2_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap0_s2_clk_src_init = {
 	.name = "gcc_qupv3_wrap0_s2_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -453,10 +453,10 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s2_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap0_s2_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s2_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap0_s3_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap0_s3_clk_src_init = {
 	.name = "gcc_qupv3_wrap0_s3_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -469,10 +469,10 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s3_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap0_s3_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s3_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap0_s4_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap0_s4_clk_src_init = {
 	.name = "gcc_qupv3_wrap0_s4_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -485,10 +485,10 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s4_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap0_s4_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s4_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap0_s5_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap0_s5_clk_src_init = {
 	.name = "gcc_qupv3_wrap0_s5_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -501,10 +501,10 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s5_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap0_s5_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s5_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap0_s6_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap0_s6_clk_src_init = {
 	.name = "gcc_qupv3_wrap0_s6_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -517,10 +517,10 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s6_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap0_s6_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s6_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap0_s7_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap0_s7_clk_src_init = {
 	.name = "gcc_qupv3_wrap0_s7_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -533,10 +533,10 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s7_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap0_s7_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s7_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap1_s0_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap1_s0_clk_src_init = {
 	.name = "gcc_qupv3_wrap1_s0_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -549,10 +549,10 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s0_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap1_s0_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s0_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap1_s1_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap1_s1_clk_src_init = {
 	.name = "gcc_qupv3_wrap1_s1_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -565,10 +565,10 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s1_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap1_s1_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s1_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap1_s2_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap1_s2_clk_src_init = {
 	.name = "gcc_qupv3_wrap1_s2_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -581,10 +581,10 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s2_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap1_s2_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s2_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap1_s3_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap1_s3_clk_src_init = {
 	.name = "gcc_qupv3_wrap1_s3_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -597,10 +597,10 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s3_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap1_s3_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s3_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap1_s4_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap1_s4_clk_src_init = {
 	.name = "gcc_qupv3_wrap1_s4_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -613,10 +613,10 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s4_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap1_s4_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s4_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap1_s5_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap1_s5_clk_src_init = {
 	.name = "gcc_qupv3_wrap1_s5_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -629,10 +629,10 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s5_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap1_s5_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s5_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap1_s6_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap1_s6_clk_src_init = {
 	.name = "gcc_qupv3_wrap1_s6_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -645,10 +645,10 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s6_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap1_s6_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s6_clk_src_init,
 };
 
-static struct clk_init_data gcc_qupv3_wrap1_s7_clk_init = {
+static struct clk_init_data gcc_qupv3_wrap1_s7_clk_src_init = {
 	.name = "gcc_qupv3_wrap1_s7_clk_src",
 	.parent_names = gcc_parent_names_0,
 	.num_parents = 4,
@@ -661,7 +661,7 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s7_clk_src = {
 	.hid_width = 5,
 	.parent_map = gcc_parent_map_0,
 	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-	.clkr.hw.init = &gcc_qupv3_wrap1_s7_clk_init,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s7_clk_src_init,
 };
 
 static const struct freq_tbl ftbl_gcc_sdcc2_apps_clk_src[] = {
@@ -3255,6 +3255,7 @@ static struct gdsc hlos1_vote_aggre_noc_mmu_audio_tbu_gdsc = {
 		.name = "hlos1_vote_aggre_noc_mmu_audio_tbu_gdsc",
 	},
 	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
 };
 
 static struct gdsc hlos1_vote_aggre_noc_mmu_pcie_tbu_gdsc = {
@@ -3263,6 +3264,7 @@ static struct gdsc hlos1_vote_aggre_noc_mmu_pcie_tbu_gdsc = {
 		.name = "hlos1_vote_aggre_noc_mmu_pcie_tbu_gdsc",
 	},
 	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
 };
 
 static struct gdsc hlos1_vote_aggre_noc_mmu_tbu1_gdsc = {
@@ -3271,6 +3273,7 @@ static struct gdsc hlos1_vote_aggre_noc_mmu_tbu1_gdsc = {
 		.name = "hlos1_vote_aggre_noc_mmu_tbu1_gdsc",
 	},
 	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
 };
 
 static struct gdsc hlos1_vote_aggre_noc_mmu_tbu2_gdsc = {
@@ -3279,6 +3282,7 @@ static struct gdsc hlos1_vote_aggre_noc_mmu_tbu2_gdsc = {
 		.name = "hlos1_vote_aggre_noc_mmu_tbu2_gdsc",
 	},
 	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
 };
 
 static struct gdsc hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc = {
@@ -3287,6 +3291,7 @@ static struct gdsc hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc = {
 		.name = "hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc",
 	},
 	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
 };
 
 static struct gdsc hlos1_vote_mmnoc_mmu_tbu_hf1_gdsc = {
@@ -3295,6 +3300,7 @@ static struct gdsc hlos1_vote_mmnoc_mmu_tbu_hf1_gdsc = {
 		.name = "hlos1_vote_mmnoc_mmu_tbu_hf1_gdsc",
 	},
 	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
 };
 
 static struct gdsc hlos1_vote_mmnoc_mmu_tbu_sf_gdsc = {
@@ -3303,6 +3309,7 @@ static struct gdsc hlos1_vote_mmnoc_mmu_tbu_sf_gdsc = {
 		.name = "hlos1_vote_mmnoc_mmu_tbu_sf_gdsc",
 	},
 	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
 };
 
 static struct clk_regmap *gcc_sdm845_clocks[] = {
@@ -3577,22 +3584,22 @@ static const struct of_device_id gcc_sdm845_match_table[] = {
 MODULE_DEVICE_TABLE(of, gcc_sdm845_match_table);
 
 static const struct clk_rcg_dfs_data gcc_dfs_clocks[] = {
-	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s0_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s1_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s2_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s3_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s4_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s5_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s6_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s7_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s0_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s1_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s2_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s3_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s4_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s5_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s6_clk),
-	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s7_clk),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s0_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s1_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s2_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s3_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s4_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s5_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s6_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s7_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s0_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s1_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s2_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s3_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s4_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s5_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s6_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s7_clk_src),
 };
 
 static int gcc_sdm845_probe(struct platform_device *pdev)
@@ -3628,7 +3635,7 @@ static int __init gcc_sdm845_init(void)
 {
 	return platform_driver_register(&gcc_sdm845_driver);
 }
-subsys_initcall(gcc_sdm845_init);
+core_initcall(gcc_sdm845_init);
 
 static void __exit gcc_sdm845_exit(void)
 {
diff --git a/drivers/clk/qcom/gpucc-msm8998.c b/drivers/clk/qcom/gpucc-msm8998.c
new file mode 100644
index 000000000000..9b3923af02a1
--- /dev/null
+++ b/drivers/clk/qcom/gpucc-msm8998.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019, Jeffrey Hugo
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/clk-provider.h>
+#include <linux/regmap.h>
+#include <linux/reset-controller.h>
+
+#include <dt-bindings/clock/qcom,gpucc-msm8998.h>
+
+#include "common.h"
+#include "clk-regmap.h"
+#include "clk-regmap-divider.h"
+#include "clk-alpha-pll.h"
+#include "clk-rcg.h"
+#include "clk-branch.h"
+#include "reset.h"
+#include "gdsc.h"
+
+enum {
+	P_XO,
+	P_GPLL0,
+	P_GPUPLL0_OUT_EVEN,
+};
+
+/* Instead of going directly to the block, XO is routed through this branch */
+static struct clk_branch gpucc_cxo_clk = {
+	.halt_reg = 0x1020,
+	.clkr = {
+		.enable_reg = 0x1020,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpucc_cxo_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "xo",
+				.name = "xo"
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_IS_CRITICAL,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_fabia_even[] = {
+	{ 0x0, 1 },
+	{ 0x1, 2 },
+	{ 0x3, 4 },
+	{ 0x7, 8 },
+	{ }
+};
+
+static struct clk_alpha_pll gpupll0 = {
+	.offset = 0x0,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpupll0",
+		.parent_hws = (const struct clk_hw *[]){ &gpucc_cxo_clk.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_fixed_fabia_ops,
+	},
+};
+
+static struct clk_alpha_pll_postdiv gpupll0_out_even = {
+	.offset = 0x0,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_fabia_even,
+	.num_post_div = ARRAY_SIZE(post_div_table_fabia_even),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpupll0_out_even",
+		.parent_hws = (const struct clk_hw *[]){ &gpupll0.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_fabia_ops,
+	},
+};
+
+static const struct parent_map gpu_xo_gpll0_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 5 },
+};
+
+static const struct clk_parent_data gpu_xo_gpll0[] = {
+	{ .hw = &gpucc_cxo_clk.clkr.hw },
+	{ .fw_name = "gpll0", .name = "gpll0" },
+};
+
+static const struct parent_map gpu_xo_gpupll0_map[] = {
+	{ P_XO, 0 },
+	{ P_GPUPLL0_OUT_EVEN, 1 },
+};
+
+static const struct clk_parent_data gpu_xo_gpupll0[] = {
+	{ .hw = &gpucc_cxo_clk.clkr.hw },
+	{ .hw = &gpupll0_out_even.clkr.hw },
+};
+
+static const struct freq_tbl ftbl_rbcpr_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(50000000, P_GPLL0, 12, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 rbcpr_clk_src = {
+	.cmd_rcgr = 0x1030,
+	.hid_width = 5,
+	.parent_map = gpu_xo_gpll0_map,
+	.freq_tbl = ftbl_rbcpr_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "rbcpr_clk_src",
+		.parent_data = gpu_xo_gpll0,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gfx3d_clk_src[] = {
+	{ .src = P_GPUPLL0_OUT_EVEN, .pre_div = 3 },
+	{ }
+};
+
+static struct clk_rcg2 gfx3d_clk_src = {
+	.cmd_rcgr = 0x1070,
+	.hid_width = 5,
+	.parent_map = gpu_xo_gpupll0_map,
+	.freq_tbl = ftbl_gfx3d_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gfx3d_clk_src",
+		.parent_data = gpu_xo_gpupll0,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+		.flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+	},
+};
+
+static const struct freq_tbl ftbl_rbbmtimer_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 rbbmtimer_clk_src = {
+	.cmd_rcgr = 0x10b0,
+	.hid_width = 5,
+	.parent_map = gpu_xo_gpll0_map,
+	.freq_tbl = ftbl_rbbmtimer_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "rbbmtimer_clk_src",
+		.parent_data = gpu_xo_gpll0,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gfx3d_isense_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(40000000, P_GPLL0, 15, 0, 0),
+	F(200000000, P_GPLL0, 3, 0, 0),
+	F(300000000, P_GPLL0, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gfx3d_isense_clk_src = {
+	.cmd_rcgr = 0x1100,
+	.hid_width = 5,
+	.parent_map = gpu_xo_gpll0_map,
+	.freq_tbl = ftbl_gfx3d_isense_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gfx3d_isense_clk_src",
+		.parent_data = gpu_xo_gpll0,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch rbcpr_clk = {
+	.halt_reg = 0x1054,
+	.clkr = {
+		.enable_reg = 0x1054,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "rbcpr_clk",
+			.parent_hws = (const struct clk_hw *[]){ &rbcpr_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gfx3d_clk = {
+	.halt_reg = 0x1098,
+	.clkr = {
+		.enable_reg = 0x1098,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gfx3d_clk",
+			.parent_hws = (const struct clk_hw *[]){ &gfx3d_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch rbbmtimer_clk = {
+	.halt_reg = 0x10d0,
+	.clkr = {
+		.enable_reg = 0x10d0,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "rbbmtimer_clk",
+			.parent_hws = (const struct clk_hw *[]){ &rbbmtimer_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gfx3d_isense_clk = {
+	.halt_reg = 0x1124,
+	.clkr = {
+		.enable_reg = 0x1124,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gfx3d_isense_clk",
+			.parent_hws = (const struct clk_hw *[]){ &gfx3d_isense_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct gdsc gpu_cx_gdsc = {
+	.gdscr = 0x1004,
+	.gds_hw_ctrl = 0x1008,
+	.pd = {
+		.name = "gpu_cx",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc gpu_gx_gdsc = {
+	.gdscr = 0x1094,
+	.clamp_io_ctrl = 0x130,
+	.pd = {
+		.name = "gpu_gx",
+	},
+	.parent = &gpu_cx_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = CLAMP_IO | AON_RESET,
+};
+
+static struct clk_regmap *gpucc_msm8998_clocks[] = {
+	[GPUPLL0] = &gpupll0.clkr,
+	[GPUPLL0_OUT_EVEN] = &gpupll0_out_even.clkr,
+	[RBCPR_CLK_SRC] = &rbcpr_clk_src.clkr,
+	[GFX3D_CLK_SRC] = &gfx3d_clk_src.clkr,
+	[RBBMTIMER_CLK_SRC] = &rbbmtimer_clk_src.clkr,
+	[GFX3D_ISENSE_CLK_SRC] = &gfx3d_isense_clk_src.clkr,
+	[RBCPR_CLK] = &rbcpr_clk.clkr,
+	[GFX3D_CLK] = &gfx3d_clk.clkr,
+	[RBBMTIMER_CLK] = &rbbmtimer_clk.clkr,
+	[GFX3D_ISENSE_CLK] = &gfx3d_isense_clk.clkr,
+	[GPUCC_CXO_CLK] = &gpucc_cxo_clk.clkr,
+};
+
+static struct gdsc *gpucc_msm8998_gdscs[] = {
+	[GPU_CX_GDSC] = &gpu_cx_gdsc,
+	[GPU_GX_GDSC] = &gpu_gx_gdsc,
+};
+
+static const struct qcom_reset_map gpucc_msm8998_resets[] = {
+	[GPU_CX_BCR] = { 0x1000 },
+	[RBCPR_BCR] = { 0x1050 },
+	[GPU_GX_BCR] = { 0x1090 },
+	[GPU_ISENSE_BCR] = { 0x1120 },
+};
+
+static const struct regmap_config gpucc_msm8998_regmap_config = {
+	.reg_bits	= 32,
+	.reg_stride	= 4,
+	.val_bits	= 32,
+	.max_register	= 0x9000,
+	.fast_io	= true,
+};
+
+static const struct qcom_cc_desc gpucc_msm8998_desc = {
+	.config = &gpucc_msm8998_regmap_config,
+	.clks = gpucc_msm8998_clocks,
+	.num_clks = ARRAY_SIZE(gpucc_msm8998_clocks),
+	.resets = gpucc_msm8998_resets,
+	.num_resets = ARRAY_SIZE(gpucc_msm8998_resets),
+	.gdscs = gpucc_msm8998_gdscs,
+	.num_gdscs = ARRAY_SIZE(gpucc_msm8998_gdscs),
+};
+
+static const struct of_device_id gpucc_msm8998_match_table[] = {
+	{ .compatible = "qcom,msm8998-gpucc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, gpucc_msm8998_match_table);
+
+static int gpucc_msm8998_probe(struct platform_device *pdev)
+{
+	struct regmap *regmap;
+
+	regmap = qcom_cc_map(pdev, &gpucc_msm8998_desc);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	/* force periph logic on to avoid perf counter corruption */
+	regmap_write_bits(regmap, gfx3d_clk.clkr.enable_reg, BIT(13), BIT(13));
+	/* tweak droop detector (GPUCC_GPU_DD_WRAP_CTRL) to reduce leakage */
+	regmap_write_bits(regmap, gfx3d_clk.clkr.enable_reg, BIT(0), BIT(0));
+
+	return qcom_cc_really_probe(pdev, &gpucc_msm8998_desc, regmap);
+}
+
+static struct platform_driver gpucc_msm8998_driver = {
+	.probe		= gpucc_msm8998_probe,
+	.driver		= {
+		.name	= "gpucc-msm8998",
+		.of_match_table = gpucc_msm8998_match_table,
+	},
+};
+module_platform_driver(gpucc_msm8998_driver);
+
+MODULE_DESCRIPTION("QCOM GPUCC MSM8998 Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/qcom/q6sstop-qcs404.c b/drivers/clk/qcom/q6sstop-qcs404.c
new file mode 100644
index 000000000000..723f932fbf7d
--- /dev/null
+++ b/drivers/clk/qcom/q6sstop-qcs404.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_clock.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,q6sstopcc-qcs404.h>
+
+#include "clk-regmap.h"
+#include "clk-branch.h"
+#include "common.h"
+#include "reset.h"
+
+static struct clk_branch lcc_ahbfabric_cbc_clk = {
+	.halt_reg = 0x1b004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1b004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "lcc_ahbfabric_cbc_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch lcc_q6ss_ahbs_cbc_clk = {
+	.halt_reg = 0x22000,
+	.halt_check = BRANCH_VOTED,
+	.clkr = {
+		.enable_reg = 0x22000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "lcc_q6ss_ahbs_cbc_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch lcc_q6ss_tcm_slave_cbc_clk = {
+	.halt_reg = 0x1c000,
+	.halt_check = BRANCH_VOTED,
+	.clkr = {
+		.enable_reg = 0x1c000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "lcc_q6ss_tcm_slave_cbc_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch lcc_q6ss_ahbm_cbc_clk = {
+	.halt_reg = 0x22004,
+	.halt_check = BRANCH_VOTED,
+	.clkr = {
+		.enable_reg = 0x22004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "lcc_q6ss_ahbm_cbc_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch lcc_q6ss_axim_cbc_clk = {
+	.halt_reg = 0x1c004,
+	.halt_check = BRANCH_VOTED,
+	.clkr = {
+		.enable_reg = 0x1c004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "lcc_q6ss_axim_cbc_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch lcc_q6ss_bcr_sleep_clk = {
+	.halt_reg = 0x6004,
+	.halt_check = BRANCH_VOTED,
+	.clkr = {
+		.enable_reg = 0x6004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "lcc_q6ss_bcr_sleep_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+/* TCSR clock */
+static struct clk_branch tcsr_lcc_csr_cbcr_clk = {
+	.halt_reg = 0x8008,
+	.halt_check = BRANCH_VOTED,
+	.clkr = {
+		.enable_reg = 0x8008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "tcsr_lcc_csr_cbcr_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct regmap_config q6sstop_regmap_config = {
+	.reg_bits	= 32,
+	.reg_stride	= 4,
+	.val_bits	= 32,
+	.fast_io	= true,
+};
+
+static struct clk_regmap *q6sstop_qcs404_clocks[] = {
+	[LCC_AHBFABRIC_CBC_CLK] = &lcc_ahbfabric_cbc_clk.clkr,
+	[LCC_Q6SS_AHBS_CBC_CLK] = &lcc_q6ss_ahbs_cbc_clk.clkr,
+	[LCC_Q6SS_TCM_SLAVE_CBC_CLK] = &lcc_q6ss_tcm_slave_cbc_clk.clkr,
+	[LCC_Q6SS_AHBM_CBC_CLK] = &lcc_q6ss_ahbm_cbc_clk.clkr,
+	[LCC_Q6SS_AXIM_CBC_CLK] = &lcc_q6ss_axim_cbc_clk.clkr,
+	[LCC_Q6SS_BCR_SLEEP_CLK] = &lcc_q6ss_bcr_sleep_clk.clkr,
+};
+
+static const struct qcom_reset_map q6sstop_qcs404_resets[] = {
+	[Q6SSTOP_BCR_RESET] = { 0x6000 },
+};
+
+static const struct qcom_cc_desc q6sstop_qcs404_desc = {
+	.config = &q6sstop_regmap_config,
+	.clks = q6sstop_qcs404_clocks,
+	.num_clks = ARRAY_SIZE(q6sstop_qcs404_clocks),
+	.resets = q6sstop_qcs404_resets,
+	.num_resets = ARRAY_SIZE(q6sstop_qcs404_resets),
+};
+
+static struct clk_regmap *tcsr_qcs404_clocks[] = {
+	[TCSR_Q6SS_LCC_CBCR_CLK] = &tcsr_lcc_csr_cbcr_clk.clkr,
+};
+
+static const struct qcom_cc_desc tcsr_qcs404_desc = {
+	.config = &q6sstop_regmap_config,
+	.clks = tcsr_qcs404_clocks,
+	.num_clks = ARRAY_SIZE(tcsr_qcs404_clocks),
+};
+
+static const struct of_device_id q6sstopcc_qcs404_match_table[] = {
+	{ .compatible = "qcom,qcs404-q6sstopcc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, q6sstopcc_qcs404_match_table);
+
+static int q6sstopcc_qcs404_probe(struct platform_device *pdev)
+{
+	const struct qcom_cc_desc *desc;
+	int ret;
+
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_clk_create(&pdev->dev);
+	if (ret)
+		goto disable_pm_runtime;
+
+	ret = pm_clk_add(&pdev->dev, NULL);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to acquire iface clock\n");
+		goto destroy_pm_clk;
+	}
+
+	q6sstop_regmap_config.name = "q6sstop_tcsr";
+	desc = &tcsr_qcs404_desc;
+
+	ret = qcom_cc_probe_by_index(pdev, 1, desc);
+	if (ret)
+		goto destroy_pm_clk;
+
+	q6sstop_regmap_config.name = "q6sstop_cc";
+	desc = &q6sstop_qcs404_desc;
+
+	ret = qcom_cc_probe_by_index(pdev, 0, desc);
+	if (ret)
+		goto destroy_pm_clk;
+
+	return 0;
+
+destroy_pm_clk:
+	pm_clk_destroy(&pdev->dev);
+
+disable_pm_runtime:
+	pm_runtime_disable(&pdev->dev);
+
+	return ret;
+}
+
+static int q6sstopcc_qcs404_remove(struct platform_device *pdev)
+{
+	pm_clk_destroy(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops q6sstopcc_pm_ops = {
+	SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL)
+};
+
+static struct platform_driver q6sstopcc_qcs404_driver = {
+	.probe		= q6sstopcc_qcs404_probe,
+	.remove		= q6sstopcc_qcs404_remove,
+	.driver		= {
+		.name	= "qcs404-q6sstopcc",
+		.of_match_table = q6sstopcc_qcs404_match_table,
+		.pm = &q6sstopcc_pm_ops,
+	},
+};
+
+module_platform_driver(q6sstopcc_qcs404_driver);
+
+MODULE_DESCRIPTION("QTI QCS404 Q6SSTOP Clock Controller Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/renesas/Kconfig b/drivers/clk/renesas/Kconfig
index b879e3e3a6b4..4cd846bc98cc 100644
--- a/drivers/clk/renesas/Kconfig
+++ b/drivers/clk/renesas/Kconfig
@@ -12,6 +12,7 @@ config CLK_RENESAS
 	select CLK_R8A7745 if ARCH_R8A7745
 	select CLK_R8A77470 if ARCH_R8A77470
 	select CLK_R8A774A1 if ARCH_R8A774A1
+	select CLK_R8A774B1 if ARCH_R8A774B1
 	select CLK_R8A774C0 if ARCH_R8A774C0
 	select CLK_R8A7778 if ARCH_R8A7778
 	select CLK_R8A7779 if ARCH_R8A7779
@@ -20,7 +21,8 @@ config CLK_RENESAS
 	select CLK_R8A7792 if ARCH_R8A7792
 	select CLK_R8A7794 if ARCH_R8A7794
 	select CLK_R8A7795 if ARCH_R8A7795
-	select CLK_R8A7796 if ARCH_R8A7796
+	select CLK_R8A77960 if ARCH_R8A77960 || ARCH_R8A7796
+	select CLK_R8A77961 if ARCH_R8A77961
 	select CLK_R8A77965 if ARCH_R8A77965
 	select CLK_R8A77970 if ARCH_R8A77970
 	select CLK_R8A77980 if ARCH_R8A77980
@@ -31,17 +33,6 @@ config CLK_RENESAS
 
 if CLK_RENESAS
 
-config CLK_RENESAS_LEGACY
-	bool "Legacy DT clock support"
-	depends on CLK_R8A7790 || CLK_R8A7791 || CLK_R8A7792 || CLK_R8A7794
-	help
-	  Enable backward compatibility with old device trees describing a
-	  hierarchical representation of the various CPG and MSTP clocks.
-
-	  Say Y if you want your kernel to work with old DTBs.
-	  It is safe to say N if you use the DTS that is supplied with the
-	  current kernel source tree.
-
 # SoC
 config CLK_EMEV2
 	bool "Emma Mobile EV2 clock support" if COMPILE_TEST
@@ -80,6 +71,10 @@ config CLK_R8A774A1
 	bool "RZ/G2M clock support" if COMPILE_TEST
 	select CLK_RCAR_GEN3_CPG
 
+config CLK_R8A774B1
+	bool "RZ/G2N clock support" if COMPILE_TEST
+	select CLK_RCAR_GEN3_CPG
+
 config CLK_R8A774C0
 	bool "RZ/G2E clock support" if COMPILE_TEST
 	select CLK_RCAR_GEN3_CPG
@@ -94,24 +89,20 @@ config CLK_R8A7779
 
 config CLK_R8A7790
 	bool "R-Car H2 clock support" if COMPILE_TEST
-	select CLK_RCAR_GEN2 if CLK_RENESAS_LEGACY
 	select CLK_RCAR_GEN2_CPG
 	select CLK_RENESAS_DIV6
 
 config CLK_R8A7791
 	bool "R-Car M2-W/N clock support" if COMPILE_TEST
-	select CLK_RCAR_GEN2 if CLK_RENESAS_LEGACY
 	select CLK_RCAR_GEN2_CPG
 	select CLK_RENESAS_DIV6
 
 config CLK_R8A7792
 	bool "R-Car V2H clock support" if COMPILE_TEST
-	select CLK_RCAR_GEN2 if CLK_RENESAS_LEGACY
 	select CLK_RCAR_GEN2_CPG
 
 config CLK_R8A7794
 	bool "R-Car E2 clock support" if COMPILE_TEST
-	select CLK_RCAR_GEN2 if CLK_RENESAS_LEGACY
 	select CLK_RCAR_GEN2_CPG
 	select CLK_RENESAS_DIV6
 
@@ -119,10 +110,14 @@ config CLK_R8A7795
 	bool "R-Car H3 clock support" if COMPILE_TEST
 	select CLK_RCAR_GEN3_CPG
 
-config CLK_R8A7796
+config CLK_R8A77960
 	bool "R-Car M3-W clock support" if COMPILE_TEST
 	select CLK_RCAR_GEN3_CPG
 
+config CLK_R8A77961
+	bool "R-Car M3-W+ clock support" if COMPILE_TEST
+	select CLK_RCAR_GEN3_CPG
+
 config CLK_R8A77965
 	bool "R-Car M3-N clock support" if COMPILE_TEST
 	select CLK_RCAR_GEN3_CPG
@@ -155,11 +150,6 @@ config CLK_SH73A0
 
 
 # Family
-config CLK_RCAR_GEN2
-	bool "R-Car Gen2 legacy clock support" if COMPILE_TEST
-	select CLK_RENESAS_CPG_MSTP
-	select CLK_RENESAS_DIV6
-
 config CLK_RCAR_GEN2_CPG
 	bool "R-Car Gen2 CPG clock support" if COMPILE_TEST
 	select CLK_RENESAS_CPG_MSSR
diff --git a/drivers/clk/renesas/Makefile b/drivers/clk/renesas/Makefile
index c793e3cc9452..4a722bc5aac7 100644
--- a/drivers/clk/renesas/Makefile
+++ b/drivers/clk/renesas/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_CLK_R8A7743)		+= r8a7743-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A7745)		+= r8a7745-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A77470)		+= r8a77470-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A774A1)		+= r8a774a1-cpg-mssr.o
+obj-$(CONFIG_CLK_R8A774B1)		+= r8a774b1-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A774C0)		+= r8a774c0-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A7778)		+= clk-r8a7778.o
 obj-$(CONFIG_CLK_R8A7779)		+= clk-r8a7779.o
@@ -17,7 +18,8 @@ obj-$(CONFIG_CLK_R8A7791)		+= r8a7791-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A7792)		+= r8a7792-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A7794)		+= r8a7794-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A7795)		+= r8a7795-cpg-mssr.o
-obj-$(CONFIG_CLK_R8A7796)		+= r8a7796-cpg-mssr.o
+obj-$(CONFIG_CLK_R8A77960)		+= r8a7796-cpg-mssr.o
+obj-$(CONFIG_CLK_R8A77961)		+= r8a7796-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A77965)		+= r8a77965-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A77970)		+= r8a77970-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A77980)		+= r8a77980-cpg-mssr.o
@@ -27,7 +29,6 @@ obj-$(CONFIG_CLK_R9A06G032)		+= r9a06g032-clocks.o
 obj-$(CONFIG_CLK_SH73A0)		+= clk-sh73a0.o
 
 # Family
-obj-$(CONFIG_CLK_RCAR_GEN2)		+= clk-rcar-gen2.o
 obj-$(CONFIG_CLK_RCAR_GEN2_CPG)		+= rcar-gen2-cpg.o
 obj-$(CONFIG_CLK_RCAR_GEN3_CPG)		+= rcar-gen3-cpg.o
 obj-$(CONFIG_CLK_RCAR_USB2_CLOCK_SEL)	+= rcar-usb2-clock-sel.o
diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c
index e326e6dc09fc..003e9ce45757 100644
--- a/drivers/clk/renesas/clk-mstp.c
+++ b/drivers/clk/renesas/clk-mstp.c
@@ -189,10 +189,8 @@ static void __init cpg_mstp_clocks_init(struct device_node *np)
 	unsigned int i;
 
 	group = kzalloc(struct_size(group, clks, MSTP_MAX_CLOCKS), GFP_KERNEL);
-	if (group == NULL) {
-		kfree(group);
+	if (!group)
 		return;
-	}
 
 	clks = group->clks;
 	spin_lock_init(&group->lock);
diff --git a/drivers/clk/renesas/clk-rcar-gen2.c b/drivers/clk/renesas/clk-rcar-gen2.c
deleted file mode 100644
index da9fe3f032eb..000000000000
--- a/drivers/clk/renesas/clk-rcar-gen2.c
+++ /dev/null
@@ -1,457 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * rcar_gen2 Core CPG Clocks
- *
- * Copyright (C) 2013  Ideas On Board SPRL
- *
- * Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- */
-
-#include <linux/clk-provider.h>
-#include <linux/clk/renesas.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/math64.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/soc/renesas/rcar-rst.h>
-
-struct rcar_gen2_cpg {
-	struct clk_onecell_data data;
-	spinlock_t lock;
-	void __iomem *reg;
-};
-
-#define CPG_FRQCRB			0x00000004
-#define CPG_FRQCRB_KICK			BIT(31)
-#define CPG_SDCKCR			0x00000074
-#define CPG_PLL0CR			0x000000d8
-#define CPG_FRQCRC			0x000000e0
-#define CPG_FRQCRC_ZFC_MASK		(0x1f << 8)
-#define CPG_FRQCRC_ZFC_SHIFT		8
-#define CPG_ADSPCKCR			0x0000025c
-#define CPG_RCANCKCR			0x00000270
-
-/* -----------------------------------------------------------------------------
- * Z Clock
- *
- * Traits of this clock:
- * prepare - clk_prepare only ensures that parents are prepared
- * enable - clk_enable only ensures that parents are enabled
- * rate - rate is adjustable.  clk->rate = parent->rate * mult / 32
- * parent - fixed parent.  No clk_set_parent support
- */
-
-struct cpg_z_clk {
-	struct clk_hw hw;
-	void __iomem *reg;
-	void __iomem *kick_reg;
-};
-
-#define to_z_clk(_hw)	container_of(_hw, struct cpg_z_clk, hw)
-
-static unsigned long cpg_z_clk_recalc_rate(struct clk_hw *hw,
-					   unsigned long parent_rate)
-{
-	struct cpg_z_clk *zclk = to_z_clk(hw);
-	unsigned int mult;
-	unsigned int val;
-
-	val = (readl(zclk->reg) & CPG_FRQCRC_ZFC_MASK) >> CPG_FRQCRC_ZFC_SHIFT;
-	mult = 32 - val;
-
-	return div_u64((u64)parent_rate * mult, 32);
-}
-
-static long cpg_z_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				 unsigned long *parent_rate)
-{
-	unsigned long prate  = *parent_rate;
-	unsigned int mult;
-
-	if (!prate)
-		prate = 1;
-
-	mult = div_u64((u64)rate * 32, prate);
-	mult = clamp(mult, 1U, 32U);
-
-	return *parent_rate / 32 * mult;
-}
-
-static int cpg_z_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-			      unsigned long parent_rate)
-{
-	struct cpg_z_clk *zclk = to_z_clk(hw);
-	unsigned int mult;
-	u32 val, kick;
-	unsigned int i;
-
-	mult = div_u64((u64)rate * 32, parent_rate);
-	mult = clamp(mult, 1U, 32U);
-
-	if (readl(zclk->kick_reg) & CPG_FRQCRB_KICK)
-		return -EBUSY;
-
-	val = readl(zclk->reg);
-	val &= ~CPG_FRQCRC_ZFC_MASK;
-	val |= (32 - mult) << CPG_FRQCRC_ZFC_SHIFT;
-	writel(val, zclk->reg);
-
-	/*
-	 * Set KICK bit in FRQCRB to update hardware setting and wait for
-	 * clock change completion.
-	 */
-	kick = readl(zclk->kick_reg);
-	kick |= CPG_FRQCRB_KICK;
-	writel(kick, zclk->kick_reg);
-
-	/*
-	 * Note: There is no HW information about the worst case latency.
-	 *
-	 * Using experimental measurements, it seems that no more than
-	 * ~10 iterations are needed, independently of the CPU rate.
-	 * Since this value might be dependent on external xtal rate, pll1
-	 * rate or even the other emulation clocks rate, use 1000 as a
-	 * "super" safe value.
-	 */
-	for (i = 1000; i; i--) {
-		if (!(readl(zclk->kick_reg) & CPG_FRQCRB_KICK))
-			return 0;
-
-		cpu_relax();
-	}
-
-	return -ETIMEDOUT;
-}
-
-static const struct clk_ops cpg_z_clk_ops = {
-	.recalc_rate = cpg_z_clk_recalc_rate,
-	.round_rate = cpg_z_clk_round_rate,
-	.set_rate = cpg_z_clk_set_rate,
-};
-
-static struct clk * __init cpg_z_clk_register(struct rcar_gen2_cpg *cpg)
-{
-	static const char *parent_name = "pll0";
-	struct clk_init_data init;
-	struct cpg_z_clk *zclk;
-	struct clk *clk;
-
-	zclk = kzalloc(sizeof(*zclk), GFP_KERNEL);
-	if (!zclk)
-		return ERR_PTR(-ENOMEM);
-
-	init.name = "z";
-	init.ops = &cpg_z_clk_ops;
-	init.flags = 0;
-	init.parent_names = &parent_name;
-	init.num_parents = 1;
-
-	zclk->reg = cpg->reg + CPG_FRQCRC;
-	zclk->kick_reg = cpg->reg + CPG_FRQCRB;
-	zclk->hw.init = &init;
-
-	clk = clk_register(NULL, &zclk->hw);
-	if (IS_ERR(clk))
-		kfree(zclk);
-
-	return clk;
-}
-
-static struct clk * __init cpg_rcan_clk_register(struct rcar_gen2_cpg *cpg,
-						 struct device_node *np)
-{
-	const char *parent_name = of_clk_get_parent_name(np, 1);
-	struct clk_fixed_factor *fixed;
-	struct clk_gate *gate;
-	struct clk *clk;
-
-	fixed = kzalloc(sizeof(*fixed), GFP_KERNEL);
-	if (!fixed)
-		return ERR_PTR(-ENOMEM);
-
-	fixed->mult = 1;
-	fixed->div = 6;
-
-	gate = kzalloc(sizeof(*gate), GFP_KERNEL);
-	if (!gate) {
-		kfree(fixed);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	gate->reg = cpg->reg + CPG_RCANCKCR;
-	gate->bit_idx = 8;
-	gate->flags = CLK_GATE_SET_TO_DISABLE;
-	gate->lock = &cpg->lock;
-
-	clk = clk_register_composite(NULL, "rcan", &parent_name, 1, NULL, NULL,
-				     &fixed->hw, &clk_fixed_factor_ops,
-				     &gate->hw, &clk_gate_ops, 0);
-	if (IS_ERR(clk)) {
-		kfree(gate);
-		kfree(fixed);
-	}
-
-	return clk;
-}
-
-/* ADSP divisors */
-static const struct clk_div_table cpg_adsp_div_table[] = {
-	{  1,  3 }, {  2,  4 }, {  3,  6 }, {  4,  8 },
-	{  5, 12 }, {  6, 16 }, {  7, 18 }, {  8, 24 },
-	{ 10, 36 }, { 11, 48 }, {  0,  0 },
-};
-
-static struct clk * __init cpg_adsp_clk_register(struct rcar_gen2_cpg *cpg)
-{
-	const char *parent_name = "pll1";
-	struct clk_divider *div;
-	struct clk_gate *gate;
-	struct clk *clk;
-
-	div = kzalloc(sizeof(*div), GFP_KERNEL);
-	if (!div)
-		return ERR_PTR(-ENOMEM);
-
-	div->reg = cpg->reg + CPG_ADSPCKCR;
-	div->width = 4;
-	div->table = cpg_adsp_div_table;
-	div->lock = &cpg->lock;
-
-	gate = kzalloc(sizeof(*gate), GFP_KERNEL);
-	if (!gate) {
-		kfree(div);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	gate->reg = cpg->reg + CPG_ADSPCKCR;
-	gate->bit_idx = 8;
-	gate->flags = CLK_GATE_SET_TO_DISABLE;
-	gate->lock = &cpg->lock;
-
-	clk = clk_register_composite(NULL, "adsp", &parent_name, 1, NULL, NULL,
-				     &div->hw, &clk_divider_ops,
-				     &gate->hw, &clk_gate_ops, 0);
-	if (IS_ERR(clk)) {
-		kfree(gate);
-		kfree(div);
-	}
-
-	return clk;
-}
-
-/* -----------------------------------------------------------------------------
- * CPG Clock Data
- */
-
-/*
- *   MD		EXTAL		PLL0	PLL1	PLL3
- * 14 13 19	(MHz)		*1	*1
- *---------------------------------------------------
- * 0  0  0	15 x 1		x172/2	x208/2	x106
- * 0  0  1	15 x 1		x172/2	x208/2	x88
- * 0  1  0	20 x 1		x130/2	x156/2	x80
- * 0  1  1	20 x 1		x130/2	x156/2	x66
- * 1  0  0	26 / 2		x200/2	x240/2	x122
- * 1  0  1	26 / 2		x200/2	x240/2	x102
- * 1  1  0	30 / 2		x172/2	x208/2	x106
- * 1  1  1	30 / 2		x172/2	x208/2	x88
- *
- * *1 :	Table 7.6 indicates VCO output (PLLx = VCO/2)
- */
-#define CPG_PLL_CONFIG_INDEX(md)	((((md) & BIT(14)) >> 12) | \
-					 (((md) & BIT(13)) >> 12) | \
-					 (((md) & BIT(19)) >> 19))
-struct cpg_pll_config {
-	unsigned int extal_div;
-	unsigned int pll1_mult;
-	unsigned int pll3_mult;
-	unsigned int pll0_mult;		/* For R-Car V2H and E2 only */
-};
-
-static const struct cpg_pll_config cpg_pll_configs[8] __initconst = {
-	{ 1, 208, 106, 200 }, { 1, 208,  88, 200 },
-	{ 1, 156,  80, 150 }, { 1, 156,  66, 150 },
-	{ 2, 240, 122, 230 }, { 2, 240, 102, 230 },
-	{ 2, 208, 106, 200 }, { 2, 208,  88, 200 },
-};
-
-/* SDHI divisors */
-static const struct clk_div_table cpg_sdh_div_table[] = {
-	{  0,  2 }, {  1,  3 }, {  2,  4 }, {  3,  6 },
-	{  4,  8 }, {  5, 12 }, {  6, 16 }, {  7, 18 },
-	{  8, 24 }, { 10, 36 }, { 11, 48 }, {  0,  0 },
-};
-
-static const struct clk_div_table cpg_sd01_div_table[] = {
-	{  4,  8 },
-	{  5, 12 }, {  6, 16 }, {  7, 18 }, {  8, 24 },
-	{ 10, 36 }, { 11, 48 }, { 12, 10 }, {  0,  0 },
-};
-
-/* -----------------------------------------------------------------------------
- * Initialization
- */
-
-static u32 cpg_mode __initdata;
-
-static const char * const pll0_mult_match[] = {
-	"renesas,r8a7792-cpg-clocks",
-	"renesas,r8a7794-cpg-clocks",
-	NULL
-};
-
-static struct clk * __init
-rcar_gen2_cpg_register_clock(struct device_node *np, struct rcar_gen2_cpg *cpg,
-			     const struct cpg_pll_config *config,
-			     const char *name)
-{
-	const struct clk_div_table *table = NULL;
-	const char *parent_name;
-	unsigned int shift;
-	unsigned int mult = 1;
-	unsigned int div = 1;
-
-	if (!strcmp(name, "main")) {
-		parent_name = of_clk_get_parent_name(np, 0);
-		div = config->extal_div;
-	} else if (!strcmp(name, "pll0")) {
-		/* PLL0 is a configurable multiplier clock. Register it as a
-		 * fixed factor clock for now as there's no generic multiplier
-		 * clock implementation and we currently have no need to change
-		 * the multiplier value.
-		 */
-		if (of_device_compatible_match(np, pll0_mult_match)) {
-			/* R-Car V2H and E2 do not have PLL0CR */
-			mult = config->pll0_mult;
-			div = 3;
-		} else {
-			u32 value = readl(cpg->reg + CPG_PLL0CR);
-			mult = ((value >> 24) & ((1 << 7) - 1)) + 1;
-		}
-		parent_name = "main";
-	} else if (!strcmp(name, "pll1")) {
-		parent_name = "main";
-		mult = config->pll1_mult / 2;
-	} else if (!strcmp(name, "pll3")) {
-		parent_name = "main";
-		mult = config->pll3_mult;
-	} else if (!strcmp(name, "lb")) {
-		parent_name = "pll1";
-		div = cpg_mode & BIT(18) ? 36 : 24;
-	} else if (!strcmp(name, "qspi")) {
-		parent_name = "pll1_div2";
-		div = (cpg_mode & (BIT(3) | BIT(2) | BIT(1))) == BIT(2)
-		    ? 8 : 10;
-	} else if (!strcmp(name, "sdh")) {
-		parent_name = "pll1";
-		table = cpg_sdh_div_table;
-		shift = 8;
-	} else if (!strcmp(name, "sd0")) {
-		parent_name = "pll1";
-		table = cpg_sd01_div_table;
-		shift = 4;
-	} else if (!strcmp(name, "sd1")) {
-		parent_name = "pll1";
-		table = cpg_sd01_div_table;
-		shift = 0;
-	} else if (!strcmp(name, "z")) {
-		return cpg_z_clk_register(cpg);
-	} else if (!strcmp(name, "rcan")) {
-		return cpg_rcan_clk_register(cpg, np);
-	} else if (!strcmp(name, "adsp")) {
-		return cpg_adsp_clk_register(cpg);
-	} else {
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (!table)
-		return clk_register_fixed_factor(NULL, name, parent_name, 0,
-						 mult, div);
-	else
-		return clk_register_divider_table(NULL, name, parent_name, 0,
-						 cpg->reg + CPG_SDCKCR, shift,
-						 4, 0, table, &cpg->lock);
-}
-
-/*
- * Reset register definitions.
- */
-#define MODEMR	0xe6160060
-
-static u32 __init rcar_gen2_read_mode_pins(void)
-{
-	void __iomem *modemr = ioremap_nocache(MODEMR, 4);
-	u32 mode;
-
-	BUG_ON(!modemr);
-	mode = ioread32(modemr);
-	iounmap(modemr);
-
-	return mode;
-}
-
-static void __init rcar_gen2_cpg_clocks_init(struct device_node *np)
-{
-	const struct cpg_pll_config *config;
-	struct rcar_gen2_cpg *cpg;
-	struct clk **clks;
-	unsigned int i;
-	int num_clks;
-
-	if (rcar_rst_read_mode_pins(&cpg_mode)) {
-		/* Backward-compatibility with old DT */
-		pr_warn("%pOF: failed to obtain mode pins from RST\n", np);
-		cpg_mode = rcar_gen2_read_mode_pins();
-	}
-
-	num_clks = of_property_count_strings(np, "clock-output-names");
-	if (num_clks < 0) {
-		pr_err("%s: failed to count clocks\n", __func__);
-		return;
-	}
-
-	cpg = kzalloc(sizeof(*cpg), GFP_KERNEL);
-	clks = kcalloc(num_clks, sizeof(*clks), GFP_KERNEL);
-	if (cpg == NULL || clks == NULL) {
-		/* We're leaking memory on purpose, there's no point in cleaning
-		 * up as the system won't boot anyway.
-		 */
-		return;
-	}
-
-	spin_lock_init(&cpg->lock);
-
-	cpg->data.clks = clks;
-	cpg->data.clk_num = num_clks;
-
-	cpg->reg = of_iomap(np, 0);
-	if (WARN_ON(cpg->reg == NULL))
-		return;
-
-	config = &cpg_pll_configs[CPG_PLL_CONFIG_INDEX(cpg_mode)];
-
-	for (i = 0; i < num_clks; ++i) {
-		const char *name;
-		struct clk *clk;
-
-		of_property_read_string_index(np, "clock-output-names", i,
-					      &name);
-
-		clk = rcar_gen2_cpg_register_clock(np, cpg, config, name);
-		if (IS_ERR(clk))
-			pr_err("%s: failed to register %pOFn %s clock (%ld)\n",
-			       __func__, np, name, PTR_ERR(clk));
-		else
-			cpg->data.clks[i] = clk;
-	}
-
-	of_clk_add_provider(np, of_clk_src_onecell_get, &cpg->data);
-
-	cpg_mstp_add_clk_domain(np);
-}
-CLK_OF_DECLARE(rcar_gen2_cpg_clks, "renesas,rcar-gen2-cpg-clocks",
-	       rcar_gen2_cpg_clocks_init);
diff --git a/drivers/clk/renesas/clk-rz.c b/drivers/clk/renesas/clk-rz.c
index fbc34beafc78..7b703f14e20b 100644
--- a/drivers/clk/renesas/clk-rz.c
+++ b/drivers/clk/renesas/clk-rz.c
@@ -37,8 +37,8 @@ static u16 __init rz_cpg_read_mode_pins(void)
 	void __iomem *ppr0, *pibc0;
 	u16 modes;
 
-	ppr0 = ioremap_nocache(PPR0, 2);
-	pibc0 = ioremap_nocache(PIBC0, 2);
+	ppr0 = ioremap(PPR0, 2);
+	pibc0 = ioremap(PIBC0, 2);
 	BUG_ON(!ppr0 || !pibc0);
 	iowrite16(4, pibc0);	/* enable input buffer */
 	modes = ioread16(ppr0);
diff --git a/drivers/clk/renesas/r8a774b1-cpg-mssr.c b/drivers/clk/renesas/r8a774b1-cpg-mssr.c
new file mode 100644
index 000000000000..c9af70917312
--- /dev/null
+++ b/drivers/clk/renesas/r8a774b1-cpg-mssr.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * r8a774b1 Clock Pulse Generator / Module Standby and Software Reset
+ *
+ * Copyright (C) 2019 Renesas Electronics Corp.
+ *
+ * Based on r8a7796-cpg-mssr.c
+ *
+ * Copyright (C) 2016 Glider bvba
+ */
+
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/soc/renesas/rcar-rst.h>
+
+#include <dt-bindings/clock/r8a774b1-cpg-mssr.h>
+
+#include "renesas-cpg-mssr.h"
+#include "rcar-gen3-cpg.h"
+
+enum clk_ids {
+	/* Core Clock Outputs exported to DT */
+	LAST_DT_CORE_CLK = R8A774B1_CLK_CANFD,
+
+	/* External Input Clocks */
+	CLK_EXTAL,
+	CLK_EXTALR,
+
+	/* Internal Core Clocks */
+	CLK_MAIN,
+	CLK_PLL0,
+	CLK_PLL1,
+	CLK_PLL3,
+	CLK_PLL4,
+	CLK_PLL1_DIV2,
+	CLK_PLL1_DIV4,
+	CLK_S0,
+	CLK_S1,
+	CLK_S2,
+	CLK_S3,
+	CLK_SDSRC,
+	CLK_RINT,
+
+	/* Module Clocks */
+	MOD_CLK_BASE
+};
+
+static const struct cpg_core_clk r8a774b1_core_clks[] __initconst = {
+	/* External Clock Inputs */
+	DEF_INPUT("extal",      CLK_EXTAL),
+	DEF_INPUT("extalr",     CLK_EXTALR),
+
+	/* Internal Core Clocks */
+	DEF_BASE(".main",       CLK_MAIN, CLK_TYPE_GEN3_MAIN, CLK_EXTAL),
+	DEF_BASE(".pll0",       CLK_PLL0, CLK_TYPE_GEN3_PLL0, CLK_MAIN),
+	DEF_BASE(".pll1",       CLK_PLL1, CLK_TYPE_GEN3_PLL1, CLK_MAIN),
+	DEF_BASE(".pll3",       CLK_PLL3, CLK_TYPE_GEN3_PLL3, CLK_MAIN),
+	DEF_BASE(".pll4",       CLK_PLL4, CLK_TYPE_GEN3_PLL4, CLK_MAIN),
+
+	DEF_FIXED(".pll1_div2", CLK_PLL1_DIV2,     CLK_PLL1,       2, 1),
+	DEF_FIXED(".pll1_div4", CLK_PLL1_DIV4,     CLK_PLL1_DIV2,  2, 1),
+	DEF_FIXED(".s0",        CLK_S0,            CLK_PLL1_DIV2,  2, 1),
+	DEF_FIXED(".s1",        CLK_S1,            CLK_PLL1_DIV2,  3, 1),
+	DEF_FIXED(".s2",        CLK_S2,            CLK_PLL1_DIV2,  4, 1),
+	DEF_FIXED(".s3",        CLK_S3,            CLK_PLL1_DIV2,  6, 1),
+	DEF_FIXED(".sdsrc",     CLK_SDSRC,         CLK_PLL1_DIV2,  2, 1),
+
+	DEF_GEN3_OSC(".r",      CLK_RINT,          CLK_EXTAL,      32),
+
+	/* Core Clock Outputs */
+	DEF_GEN3_Z("z",         R8A774B1_CLK_Z,     CLK_TYPE_GEN3_Z,  CLK_PLL0, 2, 8),
+	DEF_FIXED("ztr",        R8A774B1_CLK_ZTR,   CLK_PLL1_DIV2,  6, 1),
+	DEF_FIXED("ztrd2",      R8A774B1_CLK_ZTRD2, CLK_PLL1_DIV2, 12, 1),
+	DEF_FIXED("zt",         R8A774B1_CLK_ZT,    CLK_PLL1_DIV2,  4, 1),
+	DEF_FIXED("zx",         R8A774B1_CLK_ZX,    CLK_PLL1_DIV2,  2, 1),
+	DEF_FIXED("s0d1",       R8A774B1_CLK_S0D1,  CLK_S0,         1, 1),
+	DEF_FIXED("s0d2",       R8A774B1_CLK_S0D2,  CLK_S0,         2, 1),
+	DEF_FIXED("s0d3",       R8A774B1_CLK_S0D3,  CLK_S0,         3, 1),
+	DEF_FIXED("s0d4",       R8A774B1_CLK_S0D4,  CLK_S0,         4, 1),
+	DEF_FIXED("s0d6",       R8A774B1_CLK_S0D6,  CLK_S0,         6, 1),
+	DEF_FIXED("s0d8",       R8A774B1_CLK_S0D8,  CLK_S0,         8, 1),
+	DEF_FIXED("s0d12",      R8A774B1_CLK_S0D12, CLK_S0,        12, 1),
+	DEF_FIXED("s1d2",       R8A774B1_CLK_S1D2,  CLK_S1,         2, 1),
+	DEF_FIXED("s1d4",       R8A774B1_CLK_S1D4,  CLK_S1,         4, 1),
+	DEF_FIXED("s2d1",       R8A774B1_CLK_S2D1,  CLK_S2,         1, 1),
+	DEF_FIXED("s2d2",       R8A774B1_CLK_S2D2,  CLK_S2,         2, 1),
+	DEF_FIXED("s2d4",       R8A774B1_CLK_S2D4,  CLK_S2,         4, 1),
+	DEF_FIXED("s3d1",       R8A774B1_CLK_S3D1,  CLK_S3,         1, 1),
+	DEF_FIXED("s3d2",       R8A774B1_CLK_S3D2,  CLK_S3,         2, 1),
+	DEF_FIXED("s3d4",       R8A774B1_CLK_S3D4,  CLK_S3,         4, 1),
+
+	DEF_GEN3_SD("sd0",      R8A774B1_CLK_SD0,   CLK_SDSRC,     0x074),
+	DEF_GEN3_SD("sd1",      R8A774B1_CLK_SD1,   CLK_SDSRC,     0x078),
+	DEF_GEN3_SD("sd2",      R8A774B1_CLK_SD2,   CLK_SDSRC,     0x268),
+	DEF_GEN3_SD("sd3",      R8A774B1_CLK_SD3,   CLK_SDSRC,     0x26c),
+
+	DEF_FIXED("cl",         R8A774B1_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
+	DEF_FIXED("cp",         R8A774B1_CLK_CP,    CLK_EXTAL,      2, 1),
+	DEF_FIXED("cpex",       R8A774B1_CLK_CPEX,  CLK_EXTAL,      2, 1),
+
+	DEF_DIV6P1("canfd",     R8A774B1_CLK_CANFD, CLK_PLL1_DIV4, 0x244),
+	DEF_DIV6P1("csi0",      R8A774B1_CLK_CSI0,  CLK_PLL1_DIV4, 0x00c),
+	DEF_DIV6P1("mso",       R8A774B1_CLK_MSO,   CLK_PLL1_DIV4, 0x014),
+	DEF_DIV6P1("hdmi",      R8A774B1_CLK_HDMI,  CLK_PLL1_DIV4, 0x250),
+
+	DEF_GEN3_OSC("osc",     R8A774B1_CLK_OSC,   CLK_EXTAL,     8),
+
+	DEF_BASE("r",           R8A774B1_CLK_R,     CLK_TYPE_GEN3_R, CLK_RINT),
+};
+
+static const struct mssr_mod_clk r8a774b1_mod_clks[] __initconst = {
+	DEF_MOD("tmu4",			 121,	R8A774B1_CLK_S0D6),
+	DEF_MOD("tmu3",			 122,	R8A774B1_CLK_S3D2),
+	DEF_MOD("tmu2",			 123,	R8A774B1_CLK_S3D2),
+	DEF_MOD("tmu1",			 124,	R8A774B1_CLK_S3D2),
+	DEF_MOD("tmu0",			 125,	R8A774B1_CLK_CP),
+	DEF_MOD("fdp1-0",		 119,	R8A774B1_CLK_S0D1),
+	DEF_MOD("scif5",		 202,	R8A774B1_CLK_S3D4),
+	DEF_MOD("scif4",		 203,	R8A774B1_CLK_S3D4),
+	DEF_MOD("scif3",		 204,	R8A774B1_CLK_S3D4),
+	DEF_MOD("scif1",		 206,	R8A774B1_CLK_S3D4),
+	DEF_MOD("scif0",		 207,	R8A774B1_CLK_S3D4),
+	DEF_MOD("msiof3",		 208,	R8A774B1_CLK_MSO),
+	DEF_MOD("msiof2",		 209,	R8A774B1_CLK_MSO),
+	DEF_MOD("msiof1",		 210,	R8A774B1_CLK_MSO),
+	DEF_MOD("msiof0",		 211,	R8A774B1_CLK_MSO),
+	DEF_MOD("sys-dmac2",		 217,	R8A774B1_CLK_S3D1),
+	DEF_MOD("sys-dmac1",		 218,	R8A774B1_CLK_S3D1),
+	DEF_MOD("sys-dmac0",		 219,	R8A774B1_CLK_S0D3),
+	DEF_MOD("cmt3",			 300,	R8A774B1_CLK_R),
+	DEF_MOD("cmt2",			 301,	R8A774B1_CLK_R),
+	DEF_MOD("cmt1",			 302,	R8A774B1_CLK_R),
+	DEF_MOD("cmt0",			 303,	R8A774B1_CLK_R),
+	DEF_MOD("tpu0",			 304,	R8A774B1_CLK_S3D4),
+	DEF_MOD("scif2",		 310,	R8A774B1_CLK_S3D4),
+	DEF_MOD("sdif3",		 311,	R8A774B1_CLK_SD3),
+	DEF_MOD("sdif2",		 312,	R8A774B1_CLK_SD2),
+	DEF_MOD("sdif1",		 313,	R8A774B1_CLK_SD1),
+	DEF_MOD("sdif0",		 314,	R8A774B1_CLK_SD0),
+	DEF_MOD("pcie1",		 318,	R8A774B1_CLK_S3D1),
+	DEF_MOD("pcie0",		 319,	R8A774B1_CLK_S3D1),
+	DEF_MOD("usb3-if0",		 328,	R8A774B1_CLK_S3D1),
+	DEF_MOD("usb-dmac0",		 330,	R8A774B1_CLK_S3D1),
+	DEF_MOD("usb-dmac1",		 331,	R8A774B1_CLK_S3D1),
+	DEF_MOD("rwdt",			 402,	R8A774B1_CLK_R),
+	DEF_MOD("intc-ex",		 407,	R8A774B1_CLK_CP),
+	DEF_MOD("intc-ap",		 408,	R8A774B1_CLK_S0D3),
+	DEF_MOD("audmac1",		 501,	R8A774B1_CLK_S1D2),
+	DEF_MOD("audmac0",		 502,	R8A774B1_CLK_S1D2),
+	DEF_MOD("hscif4",		 516,	R8A774B1_CLK_S3D1),
+	DEF_MOD("hscif3",		 517,	R8A774B1_CLK_S3D1),
+	DEF_MOD("hscif2",		 518,	R8A774B1_CLK_S3D1),
+	DEF_MOD("hscif1",		 519,	R8A774B1_CLK_S3D1),
+	DEF_MOD("hscif0",		 520,	R8A774B1_CLK_S3D1),
+	DEF_MOD("thermal",		 522,	R8A774B1_CLK_CP),
+	DEF_MOD("pwm",			 523,	R8A774B1_CLK_S0D12),
+	DEF_MOD("fcpvd1",		 602,	R8A774B1_CLK_S0D2),
+	DEF_MOD("fcpvd0",		 603,	R8A774B1_CLK_S0D2),
+	DEF_MOD("fcpvb0",		 607,	R8A774B1_CLK_S0D1),
+	DEF_MOD("fcpvi0",		 611,	R8A774B1_CLK_S0D1),
+	DEF_MOD("fcpf0",		 615,	R8A774B1_CLK_S0D1),
+	DEF_MOD("fcpcs",		 619,	R8A774B1_CLK_S0D2),
+	DEF_MOD("vspd1",		 622,	R8A774B1_CLK_S0D2),
+	DEF_MOD("vspd0",		 623,	R8A774B1_CLK_S0D2),
+	DEF_MOD("vspb",			 626,	R8A774B1_CLK_S0D1),
+	DEF_MOD("vspi0",		 631,	R8A774B1_CLK_S0D1),
+	DEF_MOD("ehci1",		 702,	R8A774B1_CLK_S3D2),
+	DEF_MOD("ehci0",		 703,	R8A774B1_CLK_S3D2),
+	DEF_MOD("hsusb",		 704,	R8A774B1_CLK_S3D2),
+	DEF_MOD("csi20",		 714,	R8A774B1_CLK_CSI0),
+	DEF_MOD("csi40",		 716,	R8A774B1_CLK_CSI0),
+	DEF_MOD("du3",			 721,	R8A774B1_CLK_S2D1),
+	DEF_MOD("du1",			 723,	R8A774B1_CLK_S2D1),
+	DEF_MOD("du0",			 724,	R8A774B1_CLK_S2D1),
+	DEF_MOD("lvds",			 727,	R8A774B1_CLK_S2D1),
+	DEF_MOD("hdmi0",		 729,	R8A774B1_CLK_HDMI),
+	DEF_MOD("vin7",			 804,	R8A774B1_CLK_S0D2),
+	DEF_MOD("vin6",			 805,	R8A774B1_CLK_S0D2),
+	DEF_MOD("vin5",			 806,	R8A774B1_CLK_S0D2),
+	DEF_MOD("vin4",			 807,	R8A774B1_CLK_S0D2),
+	DEF_MOD("vin3",			 808,	R8A774B1_CLK_S0D2),
+	DEF_MOD("vin2",			 809,	R8A774B1_CLK_S0D2),
+	DEF_MOD("vin1",			 810,	R8A774B1_CLK_S0D2),
+	DEF_MOD("vin0",			 811,	R8A774B1_CLK_S0D2),
+	DEF_MOD("etheravb",		 812,	R8A774B1_CLK_S0D6),
+	DEF_MOD("sata0",		 815,	R8A774B1_CLK_S3D2),
+	DEF_MOD("gpio7",		 905,	R8A774B1_CLK_S3D4),
+	DEF_MOD("gpio6",		 906,	R8A774B1_CLK_S3D4),
+	DEF_MOD("gpio5",		 907,	R8A774B1_CLK_S3D4),
+	DEF_MOD("gpio4",		 908,	R8A774B1_CLK_S3D4),
+	DEF_MOD("gpio3",		 909,	R8A774B1_CLK_S3D4),
+	DEF_MOD("gpio2",		 910,	R8A774B1_CLK_S3D4),
+	DEF_MOD("gpio1",		 911,	R8A774B1_CLK_S3D4),
+	DEF_MOD("gpio0",		 912,	R8A774B1_CLK_S3D4),
+	DEF_MOD("can-fd",		 914,	R8A774B1_CLK_S3D2),
+	DEF_MOD("can-if1",		 915,	R8A774B1_CLK_S3D4),
+	DEF_MOD("can-if0",		 916,	R8A774B1_CLK_S3D4),
+	DEF_MOD("i2c6",			 918,	R8A774B1_CLK_S0D6),
+	DEF_MOD("i2c5",			 919,	R8A774B1_CLK_S0D6),
+	DEF_MOD("i2c-dvfs",		 926,	R8A774B1_CLK_CP),
+	DEF_MOD("i2c4",			 927,	R8A774B1_CLK_S0D6),
+	DEF_MOD("i2c3",			 928,	R8A774B1_CLK_S0D6),
+	DEF_MOD("i2c2",			 929,	R8A774B1_CLK_S3D2),
+	DEF_MOD("i2c1",			 930,	R8A774B1_CLK_S3D2),
+	DEF_MOD("i2c0",			 931,	R8A774B1_CLK_S3D2),
+	DEF_MOD("ssi-all",		1005,	R8A774B1_CLK_S3D4),
+	DEF_MOD("ssi9",			1006,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi8",			1007,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi7",			1008,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi6",			1009,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi5",			1010,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi4",			1011,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi3",			1012,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi2",			1013,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi1",			1014,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi0",			1015,	MOD_CLK_ID(1005)),
+	DEF_MOD("scu-all",		1017,	R8A774B1_CLK_S3D4),
+	DEF_MOD("scu-dvc1",		1018,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-dvc0",		1019,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-ctu1-mix1",	1020,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-ctu0-mix0",	1021,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src9",		1022,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src8",		1023,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src7",		1024,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src6",		1025,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src5",		1026,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src4",		1027,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src3",		1028,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src2",		1029,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src1",		1030,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src0",		1031,	MOD_CLK_ID(1017)),
+};
+
+static const unsigned int r8a774b1_crit_mod_clks[] __initconst = {
+	MOD_CLK_ID(408),	/* INTC-AP (GIC) */
+};
+
+/*
+ * CPG Clock Data
+ */
+
+/*
+ *   MD		EXTAL		PLL0	PLL1	PLL3	PLL4	OSC
+ * 14 13 19 17	(MHz)
+ *-----------------------------------------------------------------
+ * 0  0  0  0	16.66 x 1	x180	x192	x192	x144	/16
+ * 0  0  0  1	16.66 x 1	x180	x192	x128	x144	/16
+ * 0  0  1  0	Prohibited setting
+ * 0  0  1  1	16.66 x 1	x180	x192	x192	x144	/16
+ * 0  1  0  0	20    x 1	x150	x160	x160	x120	/19
+ * 0  1  0  1	20    x 1	x150	x160	x106	x120	/19
+ * 0  1  1  0	Prohibited setting
+ * 0  1  1  1	20    x 1	x150	x160	x160	x120	/19
+ * 1  0  0  0	25    x 1	x120	x128	x128	x96	/24
+ * 1  0  0  1	25    x 1	x120	x128	x84	x96	/24
+ * 1  0  1  0	Prohibited setting
+ * 1  0  1  1	25    x 1	x120	x128	x128	x96	/24
+ * 1  1  0  0	33.33 / 2	x180	x192	x192	x144	/32
+ * 1  1  0  1	33.33 / 2	x180	x192	x128	x144	/32
+ * 1  1  1  0	Prohibited setting
+ * 1  1  1  1	33.33 / 2	x180	x192	x192	x144	/32
+ */
+#define CPG_PLL_CONFIG_INDEX(md)	((((md) & BIT(14)) >> 11) | \
+					 (((md) & BIT(13)) >> 11) | \
+					 (((md) & BIT(19)) >> 18) | \
+					 (((md) & BIT(17)) >> 17))
+
+static const struct rcar_gen3_cpg_pll_config cpg_pll_configs[16] __initconst = {
+	/* EXTAL div	PLL1 mult/div	PLL3 mult/div	OSC prediv */
+	{ 1,		192,	1,	192,	1,	16,	},
+	{ 1,		192,	1,	128,	1,	16,	},
+	{ 0, /* Prohibited setting */				},
+	{ 1,		192,	1,	192,	1,	16,	},
+	{ 1,		160,	1,	160,	1,	19,	},
+	{ 1,		160,	1,	106,	1,	19,	},
+	{ 0, /* Prohibited setting */				},
+	{ 1,		160,	1,	160,	1,	19,	},
+	{ 1,		128,	1,	128,	1,	24,	},
+	{ 1,		128,	1,	84,	1,	24,	},
+	{ 0, /* Prohibited setting */				},
+	{ 1,		128,	1,	128,	1,	24,	},
+	{ 2,		192,	1,	192,	1,	32,	},
+	{ 2,		192,	1,	128,	1,	32,	},
+	{ 0, /* Prohibited setting */				},
+	{ 2,		192,	1,	192,	1,	32,	},
+};
+
+static int __init r8a774b1_cpg_mssr_init(struct device *dev)
+{
+	const struct rcar_gen3_cpg_pll_config *cpg_pll_config;
+	u32 cpg_mode;
+	int error;
+
+	error = rcar_rst_read_mode_pins(&cpg_mode);
+	if (error)
+		return error;
+
+	cpg_pll_config = &cpg_pll_configs[CPG_PLL_CONFIG_INDEX(cpg_mode)];
+	if (!cpg_pll_config->extal_div) {
+		dev_err(dev, "Prohibited setting (cpg_mode=0x%x)\n", cpg_mode);
+		return -EINVAL;
+	}
+
+	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR, cpg_mode);
+}
+
+const struct cpg_mssr_info r8a774b1_cpg_mssr_info __initconst = {
+	/* Core Clocks */
+	.core_clks = r8a774b1_core_clks,
+	.num_core_clks = ARRAY_SIZE(r8a774b1_core_clks),
+	.last_dt_core_clk = LAST_DT_CORE_CLK,
+	.num_total_core_clks = MOD_CLK_BASE,
+
+	/* Module Clocks */
+	.mod_clks = r8a774b1_mod_clks,
+	.num_mod_clks = ARRAY_SIZE(r8a774b1_mod_clks),
+	.num_hw_mod_clks = 12 * 32,
+
+	/* Critical Module Clocks */
+	.crit_mod_clks = r8a774b1_crit_mod_clks,
+	.num_crit_mod_clks = ARRAY_SIZE(r8a774b1_crit_mod_clks),
+
+	/* Callbacks */
+	.init = r8a774b1_cpg_mssr_init,
+	.cpg_clk_register = rcar_gen3_cpg_clk_register,
+};
diff --git a/drivers/clk/renesas/r8a7796-cpg-mssr.c b/drivers/clk/renesas/r8a7796-cpg-mssr.c
index 90cc6a102602..e8420d3ada94 100644
--- a/drivers/clk/renesas/r8a7796-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7796-cpg-mssr.c
@@ -1,9 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * r8a7796 Clock Pulse Generator / Module Standby and Software Reset
+ * r8a7796 (R-Car M3-W/W+) Clock Pulse Generator / Module Standby and Software
+ * Reset
  *
- * Copyright (C) 2016 Glider bvba
- * Copyright (C) 2018 Renesas Electronics Corp.
+ * Copyright (C) 2016-2019 Glider bvba
+ * Copyright (C) 2018-2019 Renesas Electronics Corp.
  *
  * Based on r8a7795-cpg-mssr.c
  *
@@ -14,6 +15,7 @@
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/of.h>
 #include <linux/soc/renesas/rcar-rst.h>
 
 #include <dt-bindings/clock/r8a7796-cpg-mssr.h>
@@ -116,7 +118,7 @@ static const struct cpg_core_clk r8a7796_core_clks[] __initconst = {
 	DEF_BASE("r",           R8A7796_CLK_R,     CLK_TYPE_GEN3_R, CLK_RINT),
 };
 
-static const struct mssr_mod_clk r8a7796_mod_clks[] __initconst = {
+static struct mssr_mod_clk r8a7796_mod_clks[] __initdata = {
 	DEF_MOD("fdp1-0",		 119,	R8A7796_CLK_S0D1),
 	DEF_MOD("scif5",		 202,	R8A7796_CLK_S3D4),
 	DEF_MOD("scif4",		 203,	R8A7796_CLK_S3D4),
@@ -304,6 +306,14 @@ static const struct rcar_gen3_cpg_pll_config cpg_pll_configs[16] __initconst = {
 	{ 2,		192,	1,	192,	1,	32,	},
 };
 
+	/*
+	 * Fixups for R-Car M3-W+
+	 */
+
+static const unsigned int r8a77961_mod_nullify[] __initconst = {
+	MOD_CLK_ID(617),			/* FCPCI0  */
+};
+
 static int __init r8a7796_cpg_mssr_init(struct device *dev)
 {
 	const struct rcar_gen3_cpg_pll_config *cpg_pll_config;
@@ -320,6 +330,12 @@ static int __init r8a7796_cpg_mssr_init(struct device *dev)
 		return -EINVAL;
 	}
 
+	if (of_device_is_compatible(dev->of_node, "renesas,r8a77961-cpg-mssr"))
+		mssr_mod_nullify(r8a7796_mod_clks,
+				 ARRAY_SIZE(r8a7796_mod_clks),
+				 r8a77961_mod_nullify,
+				 ARRAY_SIZE(r8a77961_mod_nullify));
+
 	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR, cpg_mode);
 }
 
diff --git a/drivers/clk/renesas/r8a77965-cpg-mssr.c b/drivers/clk/renesas/r8a77965-cpg-mssr.c
index b4e8c5b7d515..b3af4da2ca74 100644
--- a/drivers/clk/renesas/r8a77965-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a77965-cpg-mssr.c
@@ -323,7 +323,7 @@ static int __init r8a77965_cpg_mssr_init(struct device *dev)
 	}
 
 	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR, cpg_mode);
-};
+}
 
 const struct cpg_mssr_info r8a77965_cpg_mssr_info __initconst = {
 	/* Core Clocks */
diff --git a/drivers/clk/renesas/rcar-gen2-cpg.c b/drivers/clk/renesas/rcar-gen2-cpg.c
index f596a2dafcf4..d4fa3dc3e2a2 100644
--- a/drivers/clk/renesas/rcar-gen2-cpg.c
+++ b/drivers/clk/renesas/rcar-gen2-cpg.c
@@ -63,19 +63,22 @@ static unsigned long cpg_z_clk_recalc_rate(struct clk_hw *hw,
 	return div_u64((u64)parent_rate * mult, 32);
 }
 
-static long cpg_z_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				 unsigned long *parent_rate)
+static int cpg_z_clk_determine_rate(struct clk_hw *hw,
+				    struct clk_rate_request *req)
 {
-	unsigned long prate  = *parent_rate;
-	unsigned int mult;
+	unsigned long prate = req->best_parent_rate;
+	unsigned int min_mult, max_mult, mult;
 
-	if (!prate)
-		prate = 1;
+	min_mult = max(div64_ul(req->min_rate * 32ULL, prate), 1ULL);
+	max_mult = min(div64_ul(req->max_rate * 32ULL, prate), 32ULL);
+	if (max_mult < min_mult)
+		return -EINVAL;
 
-	mult = div_u64((u64)rate * 32, prate);
-	mult = clamp(mult, 1U, 32U);
+	mult = div64_ul(req->rate * 32ULL, prate);
+	mult = clamp(mult, min_mult, max_mult);
 
-	return *parent_rate / 32 * mult;
+	req->rate = div_u64((u64)prate * mult, 32);
+	return 0;
 }
 
 static int cpg_z_clk_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -86,7 +89,7 @@ static int cpg_z_clk_set_rate(struct clk_hw *hw, unsigned long rate,
 	u32 val, kick;
 	unsigned int i;
 
-	mult = div_u64((u64)rate * 32, parent_rate);
+	mult = div64_ul(rate * 32ULL, parent_rate);
 	mult = clamp(mult, 1U, 32U);
 
 	if (readl(zclk->kick_reg) & CPG_FRQCRB_KICK)
@@ -126,7 +129,7 @@ static int cpg_z_clk_set_rate(struct clk_hw *hw, unsigned long rate,
 
 static const struct clk_ops cpg_z_clk_ops = {
 	.recalc_rate = cpg_z_clk_recalc_rate,
-	.round_rate = cpg_z_clk_round_rate,
+	.determine_rate = cpg_z_clk_determine_rate,
 	.set_rate = cpg_z_clk_set_rate,
 };
 
diff --git a/drivers/clk/renesas/rcar-gen3-cpg.c b/drivers/clk/renesas/rcar-gen3-cpg.c
index d25c8ba00a65..c97b647db9b6 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.c
+++ b/drivers/clk/renesas/rcar-gen3-cpg.c
@@ -114,18 +114,24 @@ static unsigned long cpg_z_clk_recalc_rate(struct clk_hw *hw,
 				     32 * zclk->fixed_div);
 }
 
-static long cpg_z_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				 unsigned long *parent_rate)
+static int cpg_z_clk_determine_rate(struct clk_hw *hw,
+				    struct clk_rate_request *req)
 {
 	struct cpg_z_clk *zclk = to_z_clk(hw);
+	unsigned int min_mult, max_mult, mult;
 	unsigned long prate;
-	unsigned int mult;
 
-	prate = *parent_rate / zclk->fixed_div;
-	mult = div_u64(rate * 32ULL, prate);
-	mult = clamp(mult, 1U, 32U);
+	prate = req->best_parent_rate / zclk->fixed_div;
+	min_mult = max(div64_ul(req->min_rate * 32ULL, prate), 1ULL);
+	max_mult = min(div64_ul(req->max_rate * 32ULL, prate), 32ULL);
+	if (max_mult < min_mult)
+		return -EINVAL;
+
+	mult = div64_ul(req->rate * 32ULL, prate);
+	mult = clamp(mult, min_mult, max_mult);
 
-	return (u64)prate * mult / 32;
+	req->rate = div_u64((u64)prate * mult, 32);
+	return 0;
 }
 
 static int cpg_z_clk_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -172,7 +178,7 @@ static int cpg_z_clk_set_rate(struct clk_hw *hw, unsigned long rate,
 
 static const struct clk_ops cpg_z_clk_ops = {
 	.recalc_rate = cpg_z_clk_recalc_rate,
-	.round_rate = cpg_z_clk_round_rate,
+	.determine_rate = cpg_z_clk_determine_rate,
 	.set_rate = cpg_z_clk_set_rate,
 };
 
@@ -309,44 +315,44 @@ static unsigned long cpg_sd_clock_recalc_rate(struct clk_hw *hw,
 				 clock->div_table[clock->cur_div_idx].div);
 }
 
-static unsigned int cpg_sd_clock_calc_div(struct sd_clock *clock,
-					  unsigned long rate,
-					  unsigned long parent_rate)
+static int cpg_sd_clock_determine_rate(struct clk_hw *hw,
+				       struct clk_rate_request *req)
 {
-	unsigned long calc_rate, diff, diff_min = ULONG_MAX;
-	unsigned int i, best_div = 0;
+	unsigned long best_rate = ULONG_MAX, diff_min = ULONG_MAX;
+	struct sd_clock *clock = to_sd_clock(hw);
+	unsigned long calc_rate, diff;
+	unsigned int i;
 
 	for (i = 0; i < clock->div_num; i++) {
-		calc_rate = DIV_ROUND_CLOSEST(parent_rate,
+		calc_rate = DIV_ROUND_CLOSEST(req->best_parent_rate,
 					      clock->div_table[i].div);
-		diff = calc_rate > rate ? calc_rate - rate : rate - calc_rate;
+		if (calc_rate < req->min_rate || calc_rate > req->max_rate)
+			continue;
+
+		diff = calc_rate > req->rate ? calc_rate - req->rate
+					     : req->rate - calc_rate;
 		if (diff < diff_min) {
-			best_div = clock->div_table[i].div;
+			best_rate = calc_rate;
 			diff_min = diff;
 		}
 	}
 
-	return best_div;
-}
-
-static long cpg_sd_clock_round_rate(struct clk_hw *hw, unsigned long rate,
-				      unsigned long *parent_rate)
-{
-	struct sd_clock *clock = to_sd_clock(hw);
-	unsigned int div = cpg_sd_clock_calc_div(clock, rate, *parent_rate);
+	if (best_rate == ULONG_MAX)
+		return -EINVAL;
 
-	return DIV_ROUND_CLOSEST(*parent_rate, div);
+	req->rate = best_rate;
+	return 0;
 }
 
 static int cpg_sd_clock_set_rate(struct clk_hw *hw, unsigned long rate,
-				   unsigned long parent_rate)
+				 unsigned long parent_rate)
 {
 	struct sd_clock *clock = to_sd_clock(hw);
-	unsigned int div = cpg_sd_clock_calc_div(clock, rate, parent_rate);
 	unsigned int i;
 
 	for (i = 0; i < clock->div_num; i++)
-		if (div == clock->div_table[i].div)
+		if (rate == DIV_ROUND_CLOSEST(parent_rate,
+					      clock->div_table[i].div))
 			break;
 
 	if (i >= clock->div_num)
@@ -366,7 +372,7 @@ static const struct clk_ops cpg_sd_clock_ops = {
 	.disable = cpg_sd_clock_disable,
 	.is_enabled = cpg_sd_clock_is_enabled,
 	.recalc_rate = cpg_sd_clock_recalc_rate,
-	.round_rate = cpg_sd_clock_round_rate,
+	.determine_rate = cpg_sd_clock_determine_rate,
 	.set_rate = cpg_sd_clock_set_rate,
 };
 
diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c
index 132cc96895e3..a2663fbbd7a5 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.c
+++ b/drivers/clk/renesas/renesas-cpg-mssr.c
@@ -702,6 +702,12 @@ static const struct of_device_id cpg_mssr_match[] = {
 		.data = &r8a774a1_cpg_mssr_info,
 	},
 #endif
+#ifdef CONFIG_CLK_R8A774B1
+	{
+		.compatible = "renesas,r8a774b1-cpg-mssr",
+		.data = &r8a774b1_cpg_mssr_info,
+	},
+#endif
 #ifdef CONFIG_CLK_R8A774C0
 	{
 		.compatible = "renesas,r8a774c0-cpg-mssr",
@@ -743,12 +749,18 @@ static const struct of_device_id cpg_mssr_match[] = {
 		.data = &r8a7795_cpg_mssr_info,
 	},
 #endif
-#ifdef CONFIG_CLK_R8A7796
+#ifdef CONFIG_CLK_R8A77960
 	{
 		.compatible = "renesas,r8a7796-cpg-mssr",
 		.data = &r8a7796_cpg_mssr_info,
 	},
 #endif
+#ifdef CONFIG_CLK_R8A77961
+	{
+		.compatible = "renesas,r8a77961-cpg-mssr",
+		.data = &r8a7796_cpg_mssr_info,
+	},
+#endif
 #ifdef CONFIG_CLK_R8A77965
 	{
 		.compatible = "renesas,r8a77965-cpg-mssr",
diff --git a/drivers/clk/renesas/renesas-cpg-mssr.h b/drivers/clk/renesas/renesas-cpg-mssr.h
index 4ddcdf3bfb95..3b852ba0ecec 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.h
+++ b/drivers/clk/renesas/renesas-cpg-mssr.h
@@ -159,6 +159,7 @@ extern const struct cpg_mssr_info r8a7743_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7745_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a77470_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a774a1_cpg_mssr_info;
+extern const struct cpg_mssr_info r8a774b1_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a774c0_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7790_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7791_cpg_mssr_info;
diff --git a/drivers/clk/rockchip/clk-half-divider.c b/drivers/clk/rockchip/clk-half-divider.c
index ba9f00dc9740..b333fc28c94b 100644
--- a/drivers/clk/rockchip/clk-half-divider.c
+++ b/drivers/clk/rockchip/clk-half-divider.c
@@ -139,12 +139,11 @@ static int clk_half_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 	return 0;
 }
 
-const struct clk_ops clk_half_divider_ops = {
+static const struct clk_ops clk_half_divider_ops = {
 	.recalc_rate = clk_half_divider_recalc_rate,
 	.round_rate = clk_half_divider_round_rate,
 	.set_rate = clk_half_divider_set_rate,
 };
-EXPORT_SYMBOL_GPL(clk_half_divider_ops);
 
 /**
  * Register a clock branch.
diff --git a/drivers/clk/rockchip/clk-px30.c b/drivers/clk/rockchip/clk-px30.c
index 3a501896b280..6fb9c98b7d24 100644
--- a/drivers/clk/rockchip/clk-px30.c
+++ b/drivers/clk/rockchip/clk-px30.c
@@ -167,6 +167,10 @@ PNAME(mux_uart5_p)		= { "clk_uart5_src", "clk_uart5_np5", "clk_uart5_frac" };
 PNAME(mux_cif_out_p)		= { "xin24m", "dummy_cpll", "npll", "usb480m" };
 PNAME(mux_dclk_vopb_p)		= { "dclk_vopb_src", "dclk_vopb_frac", "xin24m" };
 PNAME(mux_dclk_vopl_p)		= { "dclk_vopl_src", "dclk_vopl_frac", "xin24m" };
+PNAME(mux_nandc_p)		= { "clk_nandc_div", "clk_nandc_div50" };
+PNAME(mux_sdio_p)		= { "clk_sdio_div", "clk_sdio_div50" };
+PNAME(mux_emmc_p)		= { "clk_emmc_div", "clk_emmc_div50" };
+PNAME(mux_sdmmc_p)		= { "clk_sdmmc_div", "clk_sdmmc_div50" };
 PNAME(mux_gmac_p)		= { "clk_gmac_src", "gmac_clkin" };
 PNAME(mux_gmac_rmii_sel_p)	= { "clk_gmac_rx_tx_div20", "clk_gmac_rx_tx_div2" };
 PNAME(mux_rtc32k_pmu_p)		= { "xin32k", "pmu_pvtm_32k", "clk_rtc32k_frac", };
@@ -460,16 +464,40 @@ static struct rockchip_clk_branch px30_clk_branches[] __initdata = {
 	/* PD_MMC_NAND */
 	GATE(HCLK_MMC_NAND, "hclk_mmc_nand", "hclk_peri_pre", 0,
 			PX30_CLKGATE_CON(6), 0, GFLAGS),
-	COMPOSITE(SCLK_NANDC, "clk_nandc", mux_gpll_cpll_npll_p, 0,
+	COMPOSITE(SCLK_NANDC_DIV, "clk_nandc_div", mux_gpll_cpll_npll_p, 0,
 			PX30_CLKSEL_CON(15), 6, 2, MFLAGS, 0, 5, DFLAGS,
+			PX30_CLKGATE_CON(5), 11, GFLAGS),
+	COMPOSITE(SCLK_NANDC_DIV50, "clk_nandc_div50", mux_gpll_cpll_npll_p, 0,
+			PX30_CLKSEL_CON(15), 6, 2, MFLAGS, 8, 5, DFLAGS,
+			PX30_CLKGATE_CON(5), 12, GFLAGS),
+	COMPOSITE_NODIV(SCLK_NANDC, "clk_nandc", mux_nandc_p,
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+			PX30_CLKSEL_CON(15), 15, 1, MFLAGS,
 			PX30_CLKGATE_CON(5), 13, GFLAGS),
 
-	COMPOSITE(SCLK_SDIO, "clk_sdio", mux_gpll_cpll_npll_xin24m_p, 0,
+	COMPOSITE(SCLK_SDIO_DIV, "clk_sdio_div", mux_gpll_cpll_npll_xin24m_p, 0,
 			PX30_CLKSEL_CON(18), 14, 2, MFLAGS, 0, 8, DFLAGS,
+			PX30_CLKGATE_CON(6), 1, GFLAGS),
+	COMPOSITE_DIV_OFFSET(SCLK_SDIO_DIV50, "clk_sdio_div50",
+			mux_gpll_cpll_npll_xin24m_p, 0,
+			PX30_CLKSEL_CON(18), 14, 2, MFLAGS,
+			PX30_CLKSEL_CON(19), 0, 8, DFLAGS,
+			PX30_CLKGATE_CON(6), 2, GFLAGS),
+	COMPOSITE_NODIV(SCLK_SDIO, "clk_sdio", mux_sdio_p,
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+			PX30_CLKSEL_CON(19), 15, 1, MFLAGS,
 			PX30_CLKGATE_CON(6), 3, GFLAGS),
 
-	COMPOSITE(SCLK_EMMC, "clk_emmc", mux_gpll_cpll_npll_xin24m_p, 0,
+	COMPOSITE(SCLK_EMMC_DIV, "clk_emmc_div", mux_gpll_cpll_npll_xin24m_p, 0,
 			PX30_CLKSEL_CON(20), 14, 2, MFLAGS, 0, 8, DFLAGS,
+			PX30_CLKGATE_CON(6), 4, GFLAGS),
+	COMPOSITE_DIV_OFFSET(SCLK_EMMC_DIV50, "clk_emmc_div50", mux_gpll_cpll_npll_xin24m_p, 0,
+			PX30_CLKSEL_CON(20), 14, 2, MFLAGS,
+			PX30_CLKSEL_CON(21), 0, 8, DFLAGS,
+			PX30_CLKGATE_CON(6), 5, GFLAGS),
+	COMPOSITE_NODIV(SCLK_EMMC, "clk_emmc", mux_emmc_p,
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+			PX30_CLKSEL_CON(21), 15, 1, MFLAGS,
 			PX30_CLKGATE_CON(6), 6, GFLAGS),
 
 	COMPOSITE(SCLK_SFC, "clk_sfc", mux_gpll_cpll_p, 0,
@@ -494,8 +522,16 @@ static struct rockchip_clk_branch px30_clk_branches[] __initdata = {
 	/* PD_SDCARD */
 	GATE(0, "hclk_sdmmc_pre", "hclk_peri_pre", 0,
 			PX30_CLKGATE_CON(6), 12, GFLAGS),
-	COMPOSITE(SCLK_SDMMC, "clk_sdmmc", mux_gpll_cpll_npll_xin24m_p, 0,
+	COMPOSITE(SCLK_SDMMC_DIV, "clk_sdmmc_div", mux_gpll_cpll_npll_xin24m_p, 0,
 			PX30_CLKSEL_CON(16), 14, 2, MFLAGS, 0, 8, DFLAGS,
+			PX30_CLKGATE_CON(6), 13, GFLAGS),
+	COMPOSITE_DIV_OFFSET(SCLK_SDMMC_DIV50, "clk_sdmmc_div50", mux_gpll_cpll_npll_xin24m_p, 0,
+			PX30_CLKSEL_CON(16), 14, 2, MFLAGS,
+			PX30_CLKSEL_CON(17), 0, 8, DFLAGS,
+			PX30_CLKGATE_CON(6), 14, GFLAGS),
+	COMPOSITE_NODIV(SCLK_SDMMC, "clk_sdmmc", mux_sdmmc_p,
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+			PX30_CLKSEL_CON(17), 15, 1, MFLAGS,
 			PX30_CLKGATE_CON(6), 15, GFLAGS),
 
 	/* PD_USB */
@@ -763,29 +799,29 @@ static struct rockchip_clk_branch px30_clk_branches[] __initdata = {
 	GATE(0, "pclk_ddrphy", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 3, GFLAGS),
 	GATE(PCLK_MIPIDSIPHY, "pclk_mipidsiphy", "pclk_top_pre", 0, PX30_CLKGATE_CON(16), 4, GFLAGS),
 	GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_top_pre", 0, PX30_CLKGATE_CON(16), 5, GFLAGS),
-	GATE(PCLK_USB_GRF, "pclk_usb_grf", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 6, GFLAGS),
+	GATE(PCLK_USB_GRF, "pclk_usb_grf", "pclk_top_pre", 0, PX30_CLKGATE_CON(16), 6, GFLAGS),
 	GATE(0, "pclk_cpu_hoost", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 7, GFLAGS),
 
 	/* PD_VI */
-	GATE(0, "aclk_vi_niu", "aclk_vi_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(4), 15, GFLAGS),
+	GATE(0, "aclk_vi_niu", "aclk_vi_pre", 0, PX30_CLKGATE_CON(4), 15, GFLAGS),
 	GATE(ACLK_CIF, "aclk_cif", "aclk_vi_pre", 0, PX30_CLKGATE_CON(5), 1, GFLAGS),
 	GATE(ACLK_ISP, "aclk_isp", "aclk_vi_pre", 0, PX30_CLKGATE_CON(5), 3, GFLAGS),
-	GATE(0, "hclk_vi_niu", "hclk_vi_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(5), 0, GFLAGS),
+	GATE(0, "hclk_vi_niu", "hclk_vi_pre", 0, PX30_CLKGATE_CON(5), 0, GFLAGS),
 	GATE(HCLK_CIF, "hclk_cif", "hclk_vi_pre", 0, PX30_CLKGATE_CON(5), 2, GFLAGS),
 	GATE(HCLK_ISP, "hclk_isp", "hclk_vi_pre", 0, PX30_CLKGATE_CON(5), 4, GFLAGS),
 
 	/* PD_VO */
-	GATE(0, "aclk_vo_niu", "aclk_vo_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(3), 0, GFLAGS),
+	GATE(0, "aclk_vo_niu", "aclk_vo_pre", 0, PX30_CLKGATE_CON(3), 0, GFLAGS),
 	GATE(ACLK_VOPB, "aclk_vopb", "aclk_vo_pre", 0, PX30_CLKGATE_CON(3), 3, GFLAGS),
 	GATE(ACLK_RGA, "aclk_rga", "aclk_vo_pre", 0, PX30_CLKGATE_CON(3), 7, GFLAGS),
 	GATE(ACLK_VOPL, "aclk_vopl", "aclk_vo_pre", 0, PX30_CLKGATE_CON(3), 5, GFLAGS),
 
-	GATE(0, "hclk_vo_niu", "hclk_vo_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(3), 1, GFLAGS),
+	GATE(0, "hclk_vo_niu", "hclk_vo_pre", 0, PX30_CLKGATE_CON(3), 1, GFLAGS),
 	GATE(HCLK_VOPB, "hclk_vopb", "hclk_vo_pre", 0, PX30_CLKGATE_CON(3), 4, GFLAGS),
 	GATE(HCLK_RGA, "hclk_rga", "hclk_vo_pre", 0, PX30_CLKGATE_CON(3), 8, GFLAGS),
 	GATE(HCLK_VOPL, "hclk_vopl", "hclk_vo_pre", 0, PX30_CLKGATE_CON(3), 6, GFLAGS),
 
-	GATE(0, "pclk_vo_niu", "pclk_vo_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(3), 2, GFLAGS),
+	GATE(0, "pclk_vo_niu", "pclk_vo_pre", 0, PX30_CLKGATE_CON(3), 2, GFLAGS),
 	GATE(PCLK_MIPI_DSI, "pclk_mipi_dsi", "pclk_vo_pre", 0, PX30_CLKGATE_CON(3), 9, GFLAGS),
 
 	/* PD_BUS */
@@ -940,7 +976,7 @@ static struct rockchip_clk_branch px30_clk_pmu_branches[] __initdata = {
 	GATE(0, "pclk_cru_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 8, GFLAGS),
 };
 
-static const char *const px30_pmucru_critical_clocks[] __initconst = {
+static const char *const px30_cru_critical_clocks[] __initconst = {
 	"aclk_bus_pre",
 	"pclk_bus_pre",
 	"hclk_bus_pre",
@@ -950,10 +986,16 @@ static const char *const px30_pmucru_critical_clocks[] __initconst = {
 	"pclk_top_pre",
 	"pclk_pmu_pre",
 	"hclk_usb_niu",
+	"pclk_vo_niu",
+	"aclk_vo_niu",
+	"hclk_vo_niu",
+	"aclk_vi_niu",
+	"hclk_vi_niu",
 	"pll_npll",
 	"usb480m",
 	"clk_uart2",
 	"pclk_uart2",
+	"pclk_usb_grf",
 };
 
 static void __init px30_clk_init(struct device_node *np)
@@ -985,6 +1027,9 @@ static void __init px30_clk_init(struct device_node *np)
 				     &px30_cpuclk_data, px30_cpuclk_rates,
 				     ARRAY_SIZE(px30_cpuclk_rates));
 
+	rockchip_clk_protect_critical(px30_cru_critical_clocks,
+				      ARRAY_SIZE(px30_cru_critical_clocks));
+
 	rockchip_register_softrst(np, 12, reg_base + PX30_SOFTRST_CON(0),
 				  ROCKCHIP_SOFTRST_HIWORD_MASK);
 
@@ -1017,9 +1062,6 @@ static void __init px30_pmu_clk_init(struct device_node *np)
 	rockchip_clk_register_branches(ctx, px30_clk_pmu_branches,
 				       ARRAY_SIZE(px30_clk_pmu_branches));
 
-	rockchip_clk_protect_critical(px30_pmucru_critical_clocks,
-				      ARRAY_SIZE(px30_pmucru_critical_clocks));
-
 	rockchip_clk_of_add_provider(np, ctx);
 }
 CLK_OF_DECLARE(px30_cru_pmu, "rockchip,px30-pmucru", px30_pmu_clk_init);
diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c
index 31466cd1842f..c9e5a1fb6653 100644
--- a/drivers/clk/samsung/clk-exynos5420.c
+++ b/drivers/clk/samsung/clk-exynos5420.c
@@ -12,6 +12,7 @@
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/clk.h>
 
 #include "clk.h"
 #include "clk-cpu.h"
@@ -165,6 +166,8 @@ static const unsigned long exynos5x_clk_regs[] __initconst = {
 	GATE_BUS_CPU,
 	GATE_SCLK_CPU,
 	CLKOUT_CMU_CPU,
+	APLL_CON0,
+	KPLL_CON0,
 	CPLL_CON0,
 	DPLL_CON0,
 	EPLL_CON0,
@@ -611,7 +614,8 @@ static const struct samsung_mux_clock exynos5x_mux_clks[] __initconst = {
 	MUX(0, "mout_aclk66", mout_group1_p, SRC_TOP1, 8, 2),
 	MUX(0, "mout_aclk166", mout_group1_p, SRC_TOP1, 24, 2),
 
-	MUX(0, "mout_aclk_g3d", mout_group5_p, SRC_TOP2, 16, 1),
+	MUX_F(0, "mout_aclk_g3d", mout_group5_p, SRC_TOP2, 16, 1,
+	      CLK_SET_RATE_PARENT, 0),
 
 	MUX(0, "mout_user_aclk400_isp", mout_user_aclk400_isp_p,
 			SRC_TOP3, 0, 1),
@@ -653,8 +657,8 @@ static const struct samsung_mux_clock exynos5x_mux_clks[] __initconst = {
 			SRC_TOP5, 8, 1),
 	MUX(0, "mout_user_aclk266_g2d", mout_user_aclk266_g2d_p,
 			SRC_TOP5, 12, 1),
-	MUX(CLK_MOUT_G3D, "mout_user_aclk_g3d", mout_user_aclk_g3d_p,
-			SRC_TOP5, 16, 1),
+	MUX_F(CLK_MOUT_G3D, "mout_user_aclk_g3d", mout_user_aclk_g3d_p,
+			SRC_TOP5, 16, 1, CLK_SET_RATE_PARENT, 0),
 	MUX(0, "mout_user_aclk300_jpeg", mout_user_aclk300_jpeg_p,
 			SRC_TOP5, 20, 1),
 	MUX(CLK_MOUT_USER_ACLK300_DISP1, "mout_user_aclk300_disp1",
@@ -663,7 +667,8 @@ static const struct samsung_mux_clock exynos5x_mux_clks[] __initconst = {
 			mout_user_aclk300_gscl_p, SRC_TOP5, 28, 1),
 
 	MUX(0, "mout_sclk_mpll", mout_mpll_p, SRC_TOP6, 0, 1),
-	MUX(CLK_MOUT_VPLL, "mout_sclk_vpll", mout_vpll_p, SRC_TOP6, 4, 1),
+	MUX_F(CLK_MOUT_VPLL, "mout_sclk_vpll", mout_vpll_p, SRC_TOP6, 4, 1,
+	      CLK_SET_RATE_PARENT, 0),
 	MUX(CLK_MOUT_SCLK_SPLL, "mout_sclk_spll", mout_spll_p, SRC_TOP6, 8, 1),
 	MUX(0, "mout_sclk_ipll", mout_ipll_p, SRC_TOP6, 12, 1),
 	MUX(0, "mout_sclk_rpll", mout_rpll_p, SRC_TOP6, 16, 1),
@@ -707,7 +712,8 @@ static const struct samsung_mux_clock exynos5x_mux_clks[] __initconst = {
 			SRC_TOP12, 8, 1),
 	MUX(0, "mout_sw_aclk266_g2d", mout_sw_aclk266_g2d_p,
 			SRC_TOP12, 12, 1),
-	MUX(0, "mout_sw_aclk_g3d", mout_sw_aclk_g3d_p, SRC_TOP12, 16, 1),
+	MUX_F(0, "mout_sw_aclk_g3d", mout_sw_aclk_g3d_p, SRC_TOP12, 16, 1,
+	      CLK_SET_RATE_PARENT, 0),
 	MUX(0, "mout_sw_aclk300_jpeg", mout_sw_aclk300_jpeg_p,
 			SRC_TOP12, 20, 1),
 	MUX(CLK_MOUT_SW_ACLK300, "mout_sw_aclk300_disp1",
@@ -804,8 +810,8 @@ static const struct samsung_div_clock exynos5x_div_clks[] __initconst = {
 			DIV_TOP2, 8, 3),
 	DIV(CLK_DOUT_ACLK266_G2D, "dout_aclk266_g2d", "mout_aclk266_g2d",
 			DIV_TOP2, 12, 3),
-	DIV(CLK_DOUT_ACLK_G3D, "dout_aclk_g3d", "mout_aclk_g3d", DIV_TOP2,
-			16, 3),
+	DIV_F(CLK_DOUT_ACLK_G3D, "dout_aclk_g3d", "mout_aclk_g3d", DIV_TOP2,
+			16, 3, CLK_SET_RATE_PARENT, 0),
 	DIV(CLK_DOUT_ACLK300_JPEG, "dout_aclk300_jpeg", "mout_aclk300_jpeg",
 			DIV_TOP2, 20, 3),
 	DIV(CLK_DOUT_ACLK300_DISP1, "dout_aclk300_disp1",
@@ -1253,7 +1259,8 @@ static struct exynos5_subcmu_reg_dump exynos5x_gsc_suspend_regs[] = {
 };
 
 static const struct samsung_gate_clock exynos5x_g3d_gate_clks[] __initconst = {
-	GATE(CLK_G3D, "g3d", "mout_user_aclk_g3d", GATE_IP_G3D, 9, 0, 0),
+	GATE(CLK_G3D, "g3d", "mout_user_aclk_g3d", GATE_IP_G3D, 9,
+	     CLK_SET_RATE_PARENT, 0),
 };
 
 static struct exynos5_subcmu_reg_dump exynos5x_g3d_suspend_regs[] = {
@@ -1437,6 +1444,17 @@ static const struct samsung_pll_rate_table exynos5420_epll_24mhz_tbl[] = {
 	PLL_36XX_RATE(24 * MHZ,  32768001U, 131, 3, 5, 4719),
 };
 
+static const struct samsung_pll_rate_table exynos5420_vpll_24mhz_tbl[] = {
+	PLL_35XX_RATE(24 * MHZ, 600000000U,  200, 2, 2),
+	PLL_35XX_RATE(24 * MHZ, 543000000U,  181, 2, 2),
+	PLL_35XX_RATE(24 * MHZ, 480000000U,  160, 2, 2),
+	PLL_35XX_RATE(24 * MHZ, 420000000U,  140, 2, 2),
+	PLL_35XX_RATE(24 * MHZ, 350000000U,  175, 3, 2),
+	PLL_35XX_RATE(24 * MHZ, 266000000U,  266, 3, 3),
+	PLL_35XX_RATE(24 * MHZ, 177000000U,  118, 2, 3),
+	PLL_35XX_RATE(24 * MHZ, 100000000U,  200, 3, 4),
+};
+
 static struct samsung_pll_clock exynos5x_plls[nr_plls] __initdata = {
 	[apll] = PLL(pll_2550, CLK_FOUT_APLL, "fout_apll", "fin_pll", APLL_LOCK,
 		APLL_CON0, NULL),
@@ -1561,6 +1579,7 @@ static void __init exynos5x_clk_init(struct device_node *np,
 		exynos5x_plls[apll].rate_table = exynos5420_pll2550x_24mhz_tbl;
 		exynos5x_plls[epll].rate_table = exynos5420_epll_24mhz_tbl;
 		exynos5x_plls[kpll].rate_table = exynos5420_pll2550x_24mhz_tbl;
+		exynos5x_plls[vpll].rate_table = exynos5420_vpll_24mhz_tbl;
 	}
 
 	if (soc == EXYNOS5420)
@@ -1628,6 +1647,13 @@ static void __init exynos5x_clk_init(struct device_node *np,
 				     exynos5x_subcmus);
 	}
 
+	/*
+	 * Keep top part of G3D clock path enabled permanently to ensure
+	 * that the internal busses get their clock regardless of the
+	 * main G3D clock enablement status.
+	 */
+	clk_prepare_enable(__clk_lookup("mout_sw_aclk_g3d"));
+
 	samsung_clk_of_add_provider(np, ctx);
 }
 
diff --git a/drivers/clk/samsung/clk-s3c2410-dclk.c b/drivers/clk/samsung/clk-s3c2410-dclk.c
index 1281672cb00e..7dad9098e897 100644
--- a/drivers/clk/samsung/clk-s3c2410-dclk.c
+++ b/drivers/clk/samsung/clk-s3c2410-dclk.c
@@ -238,7 +238,6 @@ static SIMPLE_DEV_PM_OPS(s3c24xx_dclk_pm_ops,
 static int s3c24xx_dclk_probe(struct platform_device *pdev)
 {
 	struct s3c24xx_dclk *s3c24xx_dclk;
-	struct resource *mem;
 	struct s3c24xx_dclk_drv_data *dclk_variant;
 	struct clk_hw **clk_table;
 	int ret, i;
@@ -257,8 +256,7 @@ static int s3c24xx_dclk_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, s3c24xx_dclk);
 	spin_lock_init(&s3c24xx_dclk->dclk_lock);
 
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	s3c24xx_dclk->base = devm_ioremap_resource(&pdev->dev, mem);
+	s3c24xx_dclk->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(s3c24xx_dclk->base))
 		return PTR_ERR(s3c24xx_dclk->base);
 
diff --git a/drivers/clk/samsung/clk.c b/drivers/clk/samsung/clk.c
index e544a38106dd..dad31308c071 100644
--- a/drivers/clk/samsung/clk.c
+++ b/drivers/clk/samsung/clk.c
@@ -60,8 +60,7 @@ struct samsung_clk_provider *__init samsung_clk_init(struct device_node *np,
 	struct samsung_clk_provider *ctx;
 	int i;
 
-	ctx = kzalloc(sizeof(struct samsung_clk_provider) +
-		      sizeof(*ctx->clk_data.hws) * nr_clks, GFP_KERNEL);
+	ctx = kzalloc(struct_size(ctx, clk_data.hws, nr_clks), GFP_KERNEL);
 	if (!ctx)
 		panic("could not allocate clock provider context.\n");
 
diff --git a/drivers/clk/sprd/common.c b/drivers/clk/sprd/common.c
index 9d56eac43832..c0af4779892b 100644
--- a/drivers/clk/sprd/common.c
+++ b/drivers/clk/sprd/common.c
@@ -42,17 +42,15 @@ int sprd_clk_regmap_init(struct platform_device *pdev,
 	void __iomem *base;
 	struct device_node *node = pdev->dev.of_node;
 	struct regmap *regmap;
-	struct resource *res;
 
 	if (of_find_property(node, "sprd,syscon", NULL)) {
 		regmap = syscon_regmap_lookup_by_phandle(node, "sprd,syscon");
-		if (IS_ERR_OR_NULL(regmap)) {
+		if (IS_ERR(regmap)) {
 			pr_err("%s: failed to get syscon regmap\n", __func__);
 			return PTR_ERR(regmap);
 		}
 	} else {
-		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		base = devm_ioremap_resource(&pdev->dev, res);
+		base = devm_platform_ioremap_resource(pdev, 0);
 		if (IS_ERR(base))
 			return PTR_ERR(base);
 
diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
index 45a1ed3fe674..50f8d1bc7046 100644
--- a/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
@@ -23,9 +23,9 @@
  */
 
 static const char * const ar100_r_apb2_parents[] = { "osc24M", "osc32k",
-					     "pll-periph0", "iosc" };
+						     "iosc", "pll-periph0" };
 static const struct ccu_mux_var_prediv ar100_r_apb2_predivs[] = {
-	{ .index = 2, .shift = 0, .width = 5 },
+	{ .index = 3, .shift = 0, .width = 5 },
 };
 
 static struct ccu_div ar100_clk = {
@@ -51,17 +51,7 @@ static struct ccu_div ar100_clk = {
 
 static CLK_FIXED_FACTOR_HW(r_ahb_clk, "r-ahb", &ar100_clk.common.hw, 1, 1, 0);
 
-static struct ccu_div r_apb1_clk = {
-	.div		= _SUNXI_CCU_DIV(0, 2),
-
-	.common		= {
-		.reg		= 0x00c,
-		.hw.init	= CLK_HW_INIT("r-apb1",
-					      "r-ahb",
-					      &ccu_div_ops,
-					      0),
-	},
-};
+static SUNXI_CCU_M(r_apb1_clk, "r-apb1", "r-ahb", 0x00c, 0, 2, 0);
 
 static struct ccu_div r_apb2_clk = {
 	.div		= _SUNXI_CCU_DIV_FLAGS(8, 2, CLK_DIVIDER_POWER_OF_TWO),
diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
index d89353a3cdec..f2497d0a4683 100644
--- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
@@ -203,12 +203,21 @@ static struct ccu_nkmp pll_hsic_clk = {
  * hardcode it to match with the clock names.
  */
 #define SUN50I_H6_PLL_AUDIO_REG		0x078
+
+static struct ccu_sdm_setting pll_audio_sdm_table[] = {
+	{ .rate = 541900800, .pattern = 0xc001288d, .m = 1, .n = 22 },
+	{ .rate = 589824000, .pattern = 0xc00126e9, .m = 1, .n = 24 },
+};
+
 static struct ccu_nm pll_audio_base_clk = {
 	.enable		= BIT(31),
 	.lock		= BIT(28),
 	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
 	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
+	.sdm		= _SUNXI_CCU_SDM(pll_audio_sdm_table,
+					 BIT(24), 0x178, BIT(31)),
 	.common		= {
+		.features	= CCU_FEATURE_SIGMA_DELTA_MOD,
 		.reg		= 0x078,
 		.hw.init	= CLK_HW_INIT("pll-audio-base", "osc24M",
 					      &ccu_nm_ops,
@@ -290,7 +299,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
 				       0, 3,	/* M */
 				       24, 1,	/* mux */
 				       BIT(31),	/* gate */
-				       0);
+				       CLK_SET_RATE_PARENT);
 
 static SUNXI_CCU_GATE(bus_gpu_clk, "bus-gpu", "psi-ahb1-ahb2",
 		      0x67c, BIT(0), 0);
@@ -753,12 +762,12 @@ static const struct clk_hw *clk_parent_pll_audio[] = {
 };
 
 /*
- * The divider of pll-audio is fixed to 8 now, as pll-audio-4x has a
- * fixed post-divider 2.
+ * The divider of pll-audio is fixed to 24 for now, so 24576000 and 22579200
+ * rates can be set exactly in conjunction with sigma-delta modulation.
  */
 static CLK_FIXED_FACTOR_HWS(pll_audio_clk, "pll-audio",
 			    clk_parent_pll_audio,
-			    8, 1, CLK_SET_RATE_PARENT);
+			    24, 1, CLK_SET_RATE_PARENT);
 static CLK_FIXED_FACTOR_HWS(pll_audio_2x_clk, "pll-audio-2x",
 			    clk_parent_pll_audio,
 			    4, 1, CLK_SET_RATE_PARENT);
@@ -1215,12 +1224,12 @@ static int sun50i_h6_ccu_probe(struct platform_device *pdev)
 	}
 
 	/*
-	 * Force the post-divider of pll-audio to 8 and the output divider
-	 * of it to 1, to make the clock name represents the real frequency.
+	 * Force the post-divider of pll-audio to 12 and the output divider
+	 * of it to 2, so 24576000 and 22579200 rates can be set exactly.
 	 */
 	val = readl(reg + SUN50I_H6_PLL_AUDIO_REG);
 	val &= ~(GENMASK(21, 16) | BIT(0));
-	writel(val | (7 << 16), reg + SUN50I_H6_PLL_AUDIO_REG);
+	writel(val | (11 << 16) | BIT(0), reg + SUN50I_H6_PLL_AUDIO_REG);
 
 	/*
 	 * First clock parent (osc32K) is unusable for CEC. But since there
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
index b6e2680ef354..d8c38447e11b 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
@@ -48,10 +48,6 @@
 
 /* Some more module clocks are exported */
 
-#define CLK_MBUS		113
-
-/* And the GPU module clock is exported */
-
 #define CLK_NUMBER_H3		(CLK_GPU + 1)
 #define CLK_NUMBER_H5		(CLK_BUS_SCR1 + 1)
 
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-r.c b/drivers/clk/sunxi-ng/ccu-sun8i-r.c
index 4646fdc61053..4c8c491b87c2 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-r.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-r.c
@@ -51,19 +51,7 @@ static struct ccu_div ar100_clk = {
 
 static CLK_FIXED_FACTOR_HW(ahb0_clk, "ahb0", &ar100_clk.common.hw, 1, 1, 0);
 
-static struct ccu_div apb0_clk = {
-	.div		= _SUNXI_CCU_DIV_FLAGS(0, 2, CLK_DIVIDER_POWER_OF_TWO),
-
-	.common		= {
-		.reg		= 0x0c,
-		.hw.init	= CLK_HW_INIT_HW("apb0",
-						 &ahb0_clk.hw,
-						 &ccu_div_ops,
-						 0),
-	},
-};
-
-static SUNXI_CCU_M(a83t_apb0_clk, "apb0", "ahb0", 0x0c, 0, 2, 0);
+static SUNXI_CCU_M(apb0_clk, "apb0", "ahb0", 0x0c, 0, 2, 0);
 
 /*
  * Define the parent as an array that can be reused to save space
@@ -127,7 +115,7 @@ static struct ccu_mp a83t_ir_clk = {
 
 static struct ccu_common *sun8i_a83t_r_ccu_clks[] = {
 	&ar100_clk.common,
-	&a83t_apb0_clk.common,
+	&apb0_clk.common,
 	&apb0_pio_clk.common,
 	&apb0_ir_clk.common,
 	&apb0_timer_clk.common,
@@ -167,7 +155,7 @@ static struct clk_hw_onecell_data sun8i_a83t_r_hw_clks = {
 	.hws	= {
 		[CLK_AR100]		= &ar100_clk.common.hw,
 		[CLK_AHB0]		= &ahb0_clk.hw,
-		[CLK_APB0]		= &a83t_apb0_clk.common.hw,
+		[CLK_APB0]		= &apb0_clk.common.hw,
 		[CLK_APB0_PIO]		= &apb0_pio_clk.common.hw,
 		[CLK_APB0_IR]		= &apb0_ir_clk.common.hw,
 		[CLK_APB0_TIMER]	= &apb0_timer_clk.common.hw,
@@ -282,9 +270,6 @@ static void __init sunxi_r_ccu_init(struct device_node *node,
 
 static void __init sun8i_a83t_r_ccu_setup(struct device_node *node)
 {
-	/* Fix apb0 bus gate parents here */
-	apb0_gate_parent[0] = &a83t_apb0_clk.common.hw;
-
 	sunxi_r_ccu_init(node, &sun8i_a83t_r_ccu_desc);
 }
 CLK_OF_DECLARE(sun8i_a83t_r_ccu, "allwinner,sun8i-a83t-r-ccu",
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-r40.c b/drivers/clk/sunxi-ng/ccu-sun8i-r40.c
index 897490800102..23bfe1d12f21 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-r40.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-r40.c
@@ -761,7 +761,8 @@ static struct ccu_mp outa_clk = {
 		.reg		= 0x1f0,
 		.features	= CCU_FEATURE_FIXED_PREDIV,
 		.hw.init	= CLK_HW_INIT_PARENTS("outa", out_parents,
-						      &ccu_mp_ops, 0),
+						      &ccu_mp_ops,
+						      CLK_SET_RATE_PARENT),
 	}
 };
 
@@ -779,7 +780,8 @@ static struct ccu_mp outb_clk = {
 		.reg		= 0x1f4,
 		.features	= CCU_FEATURE_FIXED_PREDIV,
 		.hw.init	= CLK_HW_INIT_PARENTS("outb", out_parents,
-						      &ccu_mp_ops, 0),
+						      &ccu_mp_ops,
+						      CLK_SET_RATE_PARENT),
 	}
 };
 
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
index 5c779eec454b..0e36ca3bf3d5 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
@@ -618,7 +618,7 @@ static struct clk_hw_onecell_data sun8i_v3s_hw_clks = {
 		[CLK_MBUS]		= &mbus_clk.common.hw,
 		[CLK_MIPI_CSI]		= &mipi_csi_clk.common.hw,
 	},
-	.num	= CLK_NUMBER,
+	.num	= CLK_PLL_DDR1 + 1,
 };
 
 static struct clk_hw_onecell_data sun8i_v3_hw_clks = {
@@ -700,7 +700,7 @@ static struct clk_hw_onecell_data sun8i_v3_hw_clks = {
 		[CLK_MBUS]		= &mbus_clk.common.hw,
 		[CLK_MIPI_CSI]		= &mipi_csi_clk.common.hw,
 	},
-	.num	= CLK_NUMBER,
+	.num	= CLK_I2S0 + 1,
 };
 
 static struct ccu_reset_map sun8i_v3s_ccu_resets[] = {
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h
index b0160d305a67..108eeeedcbf7 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h
@@ -51,6 +51,4 @@
 
 #define CLK_PLL_DDR1		74
 
-#define CLK_NUMBER		(CLK_I2S0 + 1)
-
 #endif /* _CCU_SUN8I_H3_H_ */
diff --git a/drivers/clk/tegra/Makefile b/drivers/clk/tegra/Makefile
index 4812e45c2214..df966ca06788 100644
--- a/drivers/clk/tegra/Makefile
+++ b/drivers/clk/tegra/Makefile
@@ -17,7 +17,9 @@ obj-y					+= clk-tegra-fixed.o
 obj-y					+= clk-tegra-super-gen4.o
 obj-$(CONFIG_TEGRA_CLK_EMC)		+= clk-emc.o
 obj-$(CONFIG_ARCH_TEGRA_2x_SOC)         += clk-tegra20.o
+obj-$(CONFIG_ARCH_TEGRA_2x_SOC)		+= clk-tegra20-emc.o
 obj-$(CONFIG_ARCH_TEGRA_3x_SOC)         += clk-tegra30.o
+obj-$(CONFIG_ARCH_TEGRA_3x_SOC)		+= clk-tegra20-emc.o
 obj-$(CONFIG_ARCH_TEGRA_114_SOC)	+= clk-tegra114.o
 obj-$(CONFIG_ARCH_TEGRA_124_SOC)	+= clk-tegra124.o
 obj-$(CONFIG_TEGRA_CLK_DFLL)		+= clk-tegra124-dfll-fcpu.o
diff --git a/drivers/clk/tegra/clk-dfll.c b/drivers/clk/tegra/clk-dfll.c
index f8688c2ddf1a..c051d92c2bbf 100644
--- a/drivers/clk/tegra/clk-dfll.c
+++ b/drivers/clk/tegra/clk-dfll.c
@@ -1487,6 +1487,7 @@ static int dfll_init(struct tegra_dfll *td)
 	td->last_unrounded_rate = 0;
 
 	pm_runtime_enable(td->dev);
+	pm_runtime_irq_safe(td->dev);
 	pm_runtime_get_sync(td->dev);
 
 	dfll_set_mode(td, DFLL_DISABLED);
@@ -1513,6 +1514,61 @@ di_err1:
 	return ret;
 }
 
+/**
+ * tegra_dfll_suspend - check DFLL is disabled
+ * @dev: DFLL device *
+ *
+ * DFLL clock should be disabled by the CPUFreq driver. So, make
+ * sure it is disabled and disable all clocks needed by the DFLL.
+ */
+int tegra_dfll_suspend(struct device *dev)
+{
+	struct tegra_dfll *td = dev_get_drvdata(dev);
+
+	if (dfll_is_running(td)) {
+		dev_err(td->dev, "DFLL still enabled while suspending\n");
+		return -EBUSY;
+	}
+
+	reset_control_assert(td->dvco_rst);
+
+	return 0;
+}
+EXPORT_SYMBOL(tegra_dfll_suspend);
+
+/**
+ * tegra_dfll_resume - reinitialize DFLL on resume
+ * @dev: DFLL instance
+ *
+ * DFLL is disabled and reset during suspend and resume.
+ * So, reinitialize the DFLL IP block back for use.
+ * DFLL clock is enabled later in closed loop mode by CPUFreq
+ * driver before switching its clock source to DFLL output.
+ */
+int tegra_dfll_resume(struct device *dev)
+{
+	struct tegra_dfll *td = dev_get_drvdata(dev);
+
+	reset_control_deassert(td->dvco_rst);
+
+	pm_runtime_get_sync(td->dev);
+
+	dfll_set_mode(td, DFLL_DISABLED);
+	dfll_set_default_params(td);
+
+	if (td->soc->init_clock_trimmers)
+		td->soc->init_clock_trimmers();
+
+	dfll_set_open_loop_config(td);
+
+	dfll_init_out_if(td);
+
+	pm_runtime_put_sync(td->dev);
+
+	return 0;
+}
+EXPORT_SYMBOL(tegra_dfll_resume);
+
 /*
  * DT data fetch
  */
diff --git a/drivers/clk/tegra/clk-dfll.h b/drivers/clk/tegra/clk-dfll.h
index 1b14ebe7268b..fb209eb5f365 100644
--- a/drivers/clk/tegra/clk-dfll.h
+++ b/drivers/clk/tegra/clk-dfll.h
@@ -42,5 +42,7 @@ int tegra_dfll_register(struct platform_device *pdev,
 struct tegra_dfll_soc_data *tegra_dfll_unregister(struct platform_device *pdev);
 int tegra_dfll_runtime_suspend(struct device *dev);
 int tegra_dfll_runtime_resume(struct device *dev);
+int tegra_dfll_suspend(struct device *dev);
+int tegra_dfll_resume(struct device *dev);
 
 #endif /* __DRIVERS_CLK_TEGRA_CLK_DFLL_H */
diff --git a/drivers/clk/tegra/clk-divider.c b/drivers/clk/tegra/clk-divider.c
index e76731fb7d69..ca0de5f11f84 100644
--- a/drivers/clk/tegra/clk-divider.c
+++ b/drivers/clk/tegra/clk-divider.c
@@ -109,10 +109,21 @@ static int clk_frac_div_set_rate(struct clk_hw *hw, unsigned long rate,
 	return 0;
 }
 
+static void clk_divider_restore_context(struct clk_hw *hw)
+{
+	struct clk_hw *parent = clk_hw_get_parent(hw);
+	unsigned long parent_rate = clk_hw_get_rate(parent);
+	unsigned long rate = clk_hw_get_rate(hw);
+
+	if (clk_frac_div_set_rate(hw, rate, parent_rate) < 0)
+		WARN_ON(1);
+}
+
 const struct clk_ops tegra_clk_frac_div_ops = {
 	.recalc_rate = clk_frac_div_recalc_rate,
 	.set_rate = clk_frac_div_set_rate,
 	.round_rate = clk_frac_div_round_rate,
+	.restore_context = clk_divider_restore_context,
 };
 
 struct clk *tegra_clk_register_divider(const char *name,
diff --git a/drivers/clk/tegra/clk-emc.c b/drivers/clk/tegra/clk-emc.c
index ea39caf3d762..745f9faa98d8 100644
--- a/drivers/clk/tegra/clk-emc.c
+++ b/drivers/clk/tegra/clk-emc.c
@@ -403,20 +403,16 @@ static int load_one_timing_from_dt(struct tegra_clk_emc *tegra,
 	}
 
 	timing->parent_index = 0xff;
-	for (i = 0; i < ARRAY_SIZE(emc_parent_clk_names); i++) {
-		if (!strcmp(emc_parent_clk_names[i],
-			    __clk_get_name(timing->parent))) {
-			timing->parent_index = i;
-			break;
-		}
-	}
-	if (timing->parent_index == 0xff) {
+	i = match_string(emc_parent_clk_names, ARRAY_SIZE(emc_parent_clk_names),
+			 __clk_get_name(timing->parent));
+	if (i < 0) {
 		pr_err("timing %pOF: %s is not a valid parent\n",
 		       node, __clk_get_name(timing->parent));
 		clk_put(timing->parent);
 		return -EINVAL;
 	}
 
+	timing->parent_index = i;
 	return 0;
 }
 
diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h
index de466b4446da..c4faebd32760 100644
--- a/drivers/clk/tegra/clk-id.h
+++ b/drivers/clk/tegra/clk-id.h
@@ -236,9 +236,9 @@ enum clk_id {
 	tegra_clk_soc_therm,
 	tegra_clk_soc_therm_8,
 	tegra_clk_sor0,
-	tegra_clk_sor0_lvds,
+	tegra_clk_sor0_out,
 	tegra_clk_sor1,
-	tegra_clk_sor1_src,
+	tegra_clk_sor1_out,
 	tegra_clk_spdif,
 	tegra_clk_spdif_2x,
 	tegra_clk_spdif_in,
diff --git a/drivers/clk/tegra/clk-periph.c b/drivers/clk/tegra/clk-periph.c
index 58437da25156..67620c7ecd9e 100644
--- a/drivers/clk/tegra/clk-periph.c
+++ b/drivers/clk/tegra/clk-periph.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
  */
 
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/export.h>
 #include <linux/slab.h>
@@ -99,6 +100,23 @@ static void clk_periph_disable(struct clk_hw *hw)
 	gate_ops->disable(gate_hw);
 }
 
+static void clk_periph_restore_context(struct clk_hw *hw)
+{
+	struct tegra_clk_periph *periph = to_clk_periph(hw);
+	const struct clk_ops *div_ops = periph->div_ops;
+	struct clk_hw *div_hw = &periph->divider.hw;
+	int parent_id;
+
+	parent_id = clk_hw_get_parent_index(hw);
+	if (WARN_ON(parent_id < 0))
+		return;
+
+	if (!(periph->gate.flags & TEGRA_PERIPH_NO_DIV))
+		div_ops->restore_context(div_hw);
+
+	clk_periph_set_parent(hw, parent_id);
+}
+
 const struct clk_ops tegra_clk_periph_ops = {
 	.get_parent = clk_periph_get_parent,
 	.set_parent = clk_periph_set_parent,
@@ -108,6 +126,7 @@ const struct clk_ops tegra_clk_periph_ops = {
 	.is_enabled = clk_periph_is_enabled,
 	.enable = clk_periph_enable,
 	.disable = clk_periph_disable,
+	.restore_context = clk_periph_restore_context,
 };
 
 static const struct clk_ops tegra_clk_periph_nodiv_ops = {
@@ -116,6 +135,7 @@ static const struct clk_ops tegra_clk_periph_nodiv_ops = {
 	.is_enabled = clk_periph_is_enabled,
 	.enable = clk_periph_enable,
 	.disable = clk_periph_disable,
+	.restore_context = clk_periph_restore_context,
 };
 
 static const struct clk_ops tegra_clk_periph_no_gate_ops = {
@@ -124,6 +144,7 @@ static const struct clk_ops tegra_clk_periph_no_gate_ops = {
 	.recalc_rate = clk_periph_recalc_rate,
 	.round_rate = clk_periph_round_rate,
 	.set_rate = clk_periph_set_rate,
+	.restore_context = clk_periph_restore_context,
 };
 
 static struct clk *_tegra_clk_register_periph(const char *name,
diff --git a/drivers/clk/tegra/clk-pll-out.c b/drivers/clk/tegra/clk-pll-out.c
index 35f2bf00e1e6..d8bf89a81e6d 100644
--- a/drivers/clk/tegra/clk-pll-out.c
+++ b/drivers/clk/tegra/clk-pll-out.c
@@ -69,10 +69,19 @@ static void clk_pll_out_disable(struct clk_hw *hw)
 		spin_unlock_irqrestore(pll_out->lock, flags);
 }
 
+static void tegra_clk_pll_out_restore_context(struct clk_hw *hw)
+{
+	if (!__clk_get_enable_count(hw->clk))
+		clk_pll_out_disable(hw);
+	else
+		clk_pll_out_enable(hw);
+}
+
 const struct clk_ops tegra_clk_pll_out_ops = {
 	.is_enabled = clk_pll_out_is_enabled,
 	.enable = clk_pll_out_enable,
 	.disable = clk_pll_out_disable,
+	.restore_context = tegra_clk_pll_out_restore_context,
 };
 
 struct clk *tegra_clk_register_pll_out(const char *name,
diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c
index 1583f5fc992f..531c2b3d814e 100644
--- a/drivers/clk/tegra/clk-pll.c
+++ b/drivers/clk/tegra/clk-pll.c
@@ -1008,6 +1008,27 @@ static unsigned long clk_plle_recalc_rate(struct clk_hw *hw,
 	return rate;
 }
 
+static void tegra_clk_pll_restore_context(struct clk_hw *hw)
+{
+	struct tegra_clk_pll *pll = to_clk_pll(hw);
+	struct clk_hw *parent = clk_hw_get_parent(hw);
+	unsigned long parent_rate = clk_hw_get_rate(parent);
+	unsigned long rate = clk_hw_get_rate(hw);
+
+	if (clk_pll_is_enabled(hw))
+		return;
+
+	if (pll->params->set_defaults)
+		pll->params->set_defaults(pll);
+
+	clk_pll_set_rate(hw, rate, parent_rate);
+
+	if (!__clk_get_enable_count(hw->clk))
+		clk_pll_disable(hw);
+	else
+		clk_pll_enable(hw);
+}
+
 const struct clk_ops tegra_clk_pll_ops = {
 	.is_enabled = clk_pll_is_enabled,
 	.enable = clk_pll_enable,
@@ -1015,6 +1036,7 @@ const struct clk_ops tegra_clk_pll_ops = {
 	.recalc_rate = clk_pll_recalc_rate,
 	.round_rate = clk_pll_round_rate,
 	.set_rate = clk_pll_set_rate,
+	.restore_context = tegra_clk_pll_restore_context,
 };
 
 const struct clk_ops tegra_clk_plle_ops = {
@@ -1802,6 +1824,27 @@ out:
 
 	return ret;
 }
+
+static void _clk_plle_tegra_init_parent(struct tegra_clk_pll *pll)
+{
+	u32 val, val_aux;
+
+	/* ensure parent is set to pll_ref */
+	val = pll_readl_base(pll);
+	val_aux = pll_readl(pll->params->aux_reg, pll);
+
+	if (val & PLL_BASE_ENABLE) {
+		if ((val_aux & PLLE_AUX_PLLRE_SEL) ||
+		    (val_aux & PLLE_AUX_PLLP_SEL))
+			WARN(1, "pll_e enabled with unsupported parent %s\n",
+			     (val_aux & PLLE_AUX_PLLP_SEL) ? "pllp_out0" :
+			     "pll_re_vco");
+	} else {
+		val_aux &= ~(PLLE_AUX_PLLRE_SEL | PLLE_AUX_PLLP_SEL);
+		pll_writel(val_aux, pll->params->aux_reg, pll);
+		fence_udelay(1, pll->clk_base);
+	}
+}
 #endif
 
 static struct tegra_clk_pll *_tegra_init_pll(void __iomem *clk_base,
@@ -2214,27 +2257,12 @@ struct clk *tegra_clk_register_plle_tegra114(const char *name,
 {
 	struct tegra_clk_pll *pll;
 	struct clk *clk;
-	u32 val, val_aux;
 
 	pll = _tegra_init_pll(clk_base, NULL, pll_params, lock);
 	if (IS_ERR(pll))
 		return ERR_CAST(pll);
 
-	/* ensure parent is set to pll_re_vco */
-
-	val = pll_readl_base(pll);
-	val_aux = pll_readl(pll_params->aux_reg, pll);
-
-	if (val & PLL_BASE_ENABLE) {
-		if ((val_aux & PLLE_AUX_PLLRE_SEL) ||
-			(val_aux & PLLE_AUX_PLLP_SEL))
-			WARN(1, "pll_e enabled with unsupported parent %s\n",
-			  (val_aux & PLLE_AUX_PLLP_SEL) ? "pllp_out0" :
-					"pll_re_vco");
-	} else {
-		val_aux &= ~(PLLE_AUX_PLLRE_SEL | PLLE_AUX_PLLP_SEL);
-		pll_writel(val_aux, pll_params->aux_reg, pll);
-	}
+	_clk_plle_tegra_init_parent(pll);
 
 	clk = _tegra_clk_register_pll(pll, name, parent_name, flags,
 				      &tegra_clk_plle_tegra114_ops);
@@ -2276,6 +2304,7 @@ static const struct clk_ops tegra_clk_pllss_ops = {
 	.recalc_rate = clk_pll_recalc_rate,
 	.round_rate = clk_pll_ramp_round_rate,
 	.set_rate = clk_pllxc_set_rate,
+	.restore_context = tegra_clk_pll_restore_context,
 };
 
 struct clk *tegra_clk_register_pllss(const char *name, const char *parent_name,
@@ -2520,11 +2549,19 @@ out:
 		spin_unlock_irqrestore(pll->lock, flags);
 }
 
+static void tegra_clk_plle_t210_restore_context(struct clk_hw *hw)
+{
+	struct tegra_clk_pll *pll = to_clk_pll(hw);
+
+	_clk_plle_tegra_init_parent(pll);
+}
+
 static const struct clk_ops tegra_clk_plle_tegra210_ops = {
 	.is_enabled =  clk_plle_tegra210_is_enabled,
 	.enable = clk_plle_tegra210_enable,
 	.disable = clk_plle_tegra210_disable,
 	.recalc_rate = clk_pll_recalc_rate,
+	.restore_context = tegra_clk_plle_t210_restore_context,
 };
 
 struct clk *tegra_clk_register_plle_tegra210(const char *name,
@@ -2535,27 +2572,12 @@ struct clk *tegra_clk_register_plle_tegra210(const char *name,
 {
 	struct tegra_clk_pll *pll;
 	struct clk *clk;
-	u32 val, val_aux;
 
 	pll = _tegra_init_pll(clk_base, NULL, pll_params, lock);
 	if (IS_ERR(pll))
 		return ERR_CAST(pll);
 
-	/* ensure parent is set to pll_re_vco */
-
-	val = pll_readl_base(pll);
-	val_aux = pll_readl(pll_params->aux_reg, pll);
-
-	if (val & PLLE_BASE_ENABLE) {
-		if ((val_aux & PLLE_AUX_PLLRE_SEL) ||
-			(val_aux & PLLE_AUX_PLLP_SEL))
-			WARN(1, "pll_e enabled with unsupported parent %s\n",
-			  (val_aux & PLLE_AUX_PLLP_SEL) ? "pllp_out0" :
-					"pll_re_vco");
-	} else {
-		val_aux &= ~(PLLE_AUX_PLLRE_SEL | PLLE_AUX_PLLP_SEL);
-		pll_writel(val_aux, pll_params->aux_reg, pll);
-	}
+	_clk_plle_tegra_init_parent(pll);
 
 	clk = _tegra_clk_register_pll(pll, name, parent_name, flags,
 				      &tegra_clk_plle_tegra210_ops);
diff --git a/drivers/clk/tegra/clk-sdmmc-mux.c b/drivers/clk/tegra/clk-sdmmc-mux.c
index a5cd3e31dbae..316912d3b1a4 100644
--- a/drivers/clk/tegra/clk-sdmmc-mux.c
+++ b/drivers/clk/tegra/clk-sdmmc-mux.c
@@ -194,6 +194,21 @@ static void clk_sdmmc_mux_disable(struct clk_hw *hw)
 	gate_ops->disable(gate_hw);
 }
 
+static void clk_sdmmc_mux_restore_context(struct clk_hw *hw)
+{
+	struct clk_hw *parent = clk_hw_get_parent(hw);
+	unsigned long parent_rate = clk_hw_get_rate(parent);
+	unsigned long rate = clk_hw_get_rate(hw);
+	int parent_id;
+
+	parent_id = clk_hw_get_parent_index(hw);
+	if (WARN_ON(parent_id < 0))
+		return;
+
+	clk_sdmmc_mux_set_parent(hw, parent_id);
+	clk_sdmmc_mux_set_rate(hw, rate, parent_rate);
+}
+
 static const struct clk_ops tegra_clk_sdmmc_mux_ops = {
 	.get_parent = clk_sdmmc_mux_get_parent,
 	.set_parent = clk_sdmmc_mux_set_parent,
@@ -203,6 +218,7 @@ static const struct clk_ops tegra_clk_sdmmc_mux_ops = {
 	.is_enabled = clk_sdmmc_mux_is_enabled,
 	.enable = clk_sdmmc_mux_enable,
 	.disable = clk_sdmmc_mux_disable,
+	.restore_context = clk_sdmmc_mux_restore_context,
 };
 
 struct clk *tegra_clk_register_sdmmc_mux_div(const char *name,
diff --git a/drivers/clk/tegra/clk-super.c b/drivers/clk/tegra/clk-super.c
index 39ef31b46df5..6099c6e9acd4 100644
--- a/drivers/clk/tegra/clk-super.c
+++ b/drivers/clk/tegra/clk-super.c
@@ -28,6 +28,9 @@
 #define super_state_to_src_shift(m, s) ((m->width * s))
 #define super_state_to_src_mask(m) (((1 << m->width) - 1))
 
+#define CCLK_SRC_PLLP_OUT0 4
+#define CCLK_SRC_PLLP_OUT4 5
+
 static u8 clk_super_get_parent(struct clk_hw *hw)
 {
 	struct tegra_clk_super_mux *mux = to_clk_super_mux(hw);
@@ -97,12 +100,23 @@ static int clk_super_set_parent(struct clk_hw *hw, u8 index)
 		if (index == mux->div2_index)
 			index = mux->pllx_index;
 	}
+
+	/* enable PLLP branches to CPU before selecting PLLP source */
+	if ((mux->flags & TEGRA210_CPU_CLK) &&
+	    (index == CCLK_SRC_PLLP_OUT0 || index == CCLK_SRC_PLLP_OUT4))
+		tegra_clk_set_pllp_out_cpu(true);
+
 	val &= ~((super_state_to_src_mask(mux)) << shift);
 	val |= (index & (super_state_to_src_mask(mux))) << shift;
 
 	writel_relaxed(val, mux->reg);
 	udelay(2);
 
+	/* disable PLLP branches to CPU if not used */
+	if ((mux->flags & TEGRA210_CPU_CLK) &&
+	    index != CCLK_SRC_PLLP_OUT0 && index != CCLK_SRC_PLLP_OUT4)
+		tegra_clk_set_pllp_out_cpu(false);
+
 out:
 	if (mux->lock)
 		spin_unlock_irqrestore(mux->lock, flags);
@@ -110,9 +124,21 @@ out:
 	return err;
 }
 
+static void clk_super_mux_restore_context(struct clk_hw *hw)
+{
+	int parent_id;
+
+	parent_id = clk_hw_get_parent_index(hw);
+	if (WARN_ON(parent_id < 0))
+		return;
+
+	clk_super_set_parent(hw, parent_id);
+}
+
 static const struct clk_ops tegra_clk_super_mux_ops = {
 	.get_parent = clk_super_get_parent,
 	.set_parent = clk_super_set_parent,
+	.restore_context = clk_super_mux_restore_context,
 };
 
 static long clk_super_round_rate(struct clk_hw *hw, unsigned long rate,
@@ -148,12 +174,27 @@ static int clk_super_set_rate(struct clk_hw *hw, unsigned long rate,
 	return super->div_ops->set_rate(div_hw, rate, parent_rate);
 }
 
+static void clk_super_restore_context(struct clk_hw *hw)
+{
+	struct tegra_clk_super_mux *super = to_clk_super_mux(hw);
+	struct clk_hw *div_hw = &super->frac_div.hw;
+	int parent_id;
+
+	parent_id = clk_hw_get_parent_index(hw);
+	if (WARN_ON(parent_id < 0))
+		return;
+
+	super->div_ops->restore_context(div_hw);
+	clk_super_set_parent(hw, parent_id);
+}
+
 const struct clk_ops tegra_clk_super_ops = {
 	.get_parent = clk_super_get_parent,
 	.set_parent = clk_super_set_parent,
 	.set_rate = clk_super_set_rate,
 	.round_rate = clk_super_round_rate,
 	.recalc_rate = clk_super_recalc_rate,
+	.restore_context = clk_super_restore_context,
 };
 
 struct clk *tegra_clk_register_super_mux(const char *name,
diff --git a/drivers/clk/tegra/clk-tegra-fixed.c b/drivers/clk/tegra/clk-tegra-fixed.c
index 8d91b2b191cf..7c6c8abfcde6 100644
--- a/drivers/clk/tegra/clk-tegra-fixed.c
+++ b/drivers/clk/tegra/clk-tegra-fixed.c
@@ -17,6 +17,10 @@
 #define OSC_CTRL			0x50
 #define OSC_CTRL_OSC_FREQ_SHIFT		28
 #define OSC_CTRL_PLL_REF_DIV_SHIFT	26
+#define OSC_CTRL_MASK			(0x3f2 |	\
+					(0xf << OSC_CTRL_OSC_FREQ_SHIFT))
+
+static u32 osc_ctrl_ctx;
 
 int __init tegra_osc_clk_init(void __iomem *clk_base, struct tegra_clk *clks,
 			      unsigned long *input_freqs, unsigned int num,
@@ -29,6 +33,7 @@ int __init tegra_osc_clk_init(void __iomem *clk_base, struct tegra_clk *clks,
 	unsigned osc_idx;
 
 	val = readl_relaxed(clk_base + OSC_CTRL);
+	osc_ctrl_ctx = val & OSC_CTRL_MASK;
 	osc_idx = val >> OSC_CTRL_OSC_FREQ_SHIFT;
 
 	if (osc_idx < num)
@@ -96,3 +101,13 @@ void __init tegra_fixed_clk_init(struct tegra_clk *tegra_clks)
 		*dt_clk = clk;
 	}
 }
+
+void tegra_clk_osc_resume(void __iomem *clk_base)
+{
+	u32 val;
+
+	val = readl_relaxed(clk_base + OSC_CTRL) & ~OSC_CTRL_MASK;
+	val |= osc_ctrl_ctx;
+	writel_relaxed(val, clk_base + OSC_CTRL);
+	fence_udelay(2, clk_base);
+}
diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index 1ed85f120a1b..0d07c0ba49b6 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -262,7 +262,6 @@
 static DEFINE_SPINLOCK(PLLP_OUTA_lock);
 static DEFINE_SPINLOCK(PLLP_OUTB_lock);
 static DEFINE_SPINLOCK(PLLP_OUTC_lock);
-static DEFINE_SPINLOCK(sor0_lock);
 
 #define MUX_I2S_SPDIF(_id)						\
 static const char *mux_pllaout0_##_id##_2x_pllp_clkm[] = { "pll_a_out0", \
@@ -587,11 +586,6 @@ static u32 mux_pllp_pllre_clkm_idx[] = {
 	[0] = 0, [1] = 2, [2] = 3,
 };
 
-static const char *mux_clkm_plldp_sor0lvds[] = {
-	"clk_m", "pll_dp", "sor0_lvds",
-};
-#define mux_clkm_plldp_sor0lvds_idx NULL
-
 static const char * const mux_dmic1[] = {
 	"pll_a_out0", "dmic1_sync_clk", "pll_p", "clk_m"
 };
@@ -731,14 +725,12 @@ static struct tegra_periph_init_data periph_clks[] = {
 	MUX8("hdmi_audio", mux_pllp3_pllc_clkm, CLK_SOURCE_HDMI_AUDIO, 176, TEGRA_PERIPH_NO_RESET, tegra_clk_hdmi_audio),
 	MUX8("clk72mhz", mux_pllp3_pllc_clkm, CLK_SOURCE_CLK72MHZ, 177, TEGRA_PERIPH_NO_RESET, tegra_clk_clk72Mhz),
 	MUX8("clk72mhz", mux_pllp_out3_pllp_pllc_clkm, CLK_SOURCE_CLK72MHZ, 177, TEGRA_PERIPH_NO_RESET, tegra_clk_clk72Mhz_8),
-	MUX8_NOGATE_LOCK("sor0_lvds", mux_pllp_pllm_plld_plla_pllc_plld2_clkm, CLK_SOURCE_SOR0, tegra_clk_sor0_lvds, &sor0_lock),
 	MUX_FLAGS("csite", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_CSITE, 73, TEGRA_PERIPH_ON_APB, tegra_clk_csite, CLK_IGNORE_UNUSED),
 	MUX_FLAGS("csite", mux_pllp_pllre_clkm, CLK_SOURCE_CSITE, 73, TEGRA_PERIPH_ON_APB, tegra_clk_csite_8, CLK_IGNORE_UNUSED),
 	NODIV("disp1", mux_pllp_pllm_plld_plla_pllc_plld2_clkm, CLK_SOURCE_DISP1, 29, 7, 27, 0, tegra_clk_disp1, NULL),
 	NODIV("disp1", mux_pllp_plld_plld2_clkm, CLK_SOURCE_DISP1, 29, 7, 27, 0, tegra_clk_disp1_8, NULL),
 	NODIV("disp2", mux_pllp_pllm_plld_plla_pllc_plld2_clkm, CLK_SOURCE_DISP2, 29, 7, 26, 0, tegra_clk_disp2, NULL),
 	NODIV("disp2", mux_pllp_plld_plld2_clkm, CLK_SOURCE_DISP2, 29, 7, 26, 0, tegra_clk_disp2_8, NULL),
-	NODIV("sor0", mux_clkm_plldp_sor0lvds, CLK_SOURCE_SOR0, 14, 3, 182, 0, tegra_clk_sor0, &sor0_lock),
 	UART("uarta", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_UARTA, 6, tegra_clk_uarta),
 	UART("uartb", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_UARTB, 7, tegra_clk_uartb),
 	UART("uartc", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_UARTC, 55, tegra_clk_uartc),
diff --git a/drivers/clk/tegra/clk-tegra-super-gen4.c b/drivers/clk/tegra/clk-tegra-super-gen4.c
index cdfe7c9697e1..5760c978bef7 100644
--- a/drivers/clk/tegra/clk-tegra-super-gen4.c
+++ b/drivers/clk/tegra/clk-tegra-super-gen4.c
@@ -180,7 +180,7 @@ static void __init tegra_super_clk_init(void __iomem *clk_base,
 					gen_info->num_cclk_g_parents,
 					CLK_SET_RATE_PARENT,
 					clk_base + CCLKG_BURST_POLICY,
-					0, 4, 8, 0, NULL);
+					TEGRA210_CPU_CLK, 4, 8, 0, NULL);
 		} else {
 			clk = tegra_clk_register_super_mux("cclk_g",
 					gen_info->cclk_g_parents,
@@ -196,6 +196,11 @@ static void __init tegra_super_clk_init(void __iomem *clk_base,
 	dt_clk = tegra_lookup_dt_id(tegra_clk_cclk_lp, tegra_clks);
 	if (dt_clk) {
 		if (gen_info->gen == gen5) {
+			/*
+			 * TEGRA210_CPU_CLK flag is not needed for cclk_lp as
+			 * cluster switching is not currently supported on
+			 * Tegra210 and also cpu_lp is not used.
+			 */
 			clk = tegra_clk_register_super_mux("cclk_lp",
 					gen_info->cclk_lp_parents,
 					gen_info->num_cclk_lp_parents,
diff --git a/drivers/clk/tegra/clk-tegra124-dfll-fcpu.c b/drivers/clk/tegra/clk-tegra124-dfll-fcpu.c
index e84b6d52cbbd..2ac2679d696d 100644
--- a/drivers/clk/tegra/clk-tegra124-dfll-fcpu.c
+++ b/drivers/clk/tegra/clk-tegra124-dfll-fcpu.c
@@ -631,6 +631,7 @@ static int tegra124_dfll_fcpu_remove(struct platform_device *pdev)
 static const struct dev_pm_ops tegra124_dfll_pm_ops = {
 	SET_RUNTIME_PM_OPS(tegra_dfll_runtime_suspend,
 			   tegra_dfll_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(tegra_dfll_suspend, tegra_dfll_resume)
 };
 
 static struct platform_driver tegra124_dfll_fcpu_driver = {
diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c
index 0224fdc4766f..b3110d5b5a6c 100644
--- a/drivers/clk/tegra/clk-tegra124.c
+++ b/drivers/clk/tegra/clk-tegra124.c
@@ -27,6 +27,7 @@
 
 #define CLK_SOURCE_CSITE 0x1d4
 #define CLK_SOURCE_EMC 0x19c
+#define CLK_SOURCE_SOR0 0x414
 
 #define RST_DFLL_DVCO			0x2f4
 #define DVFS_DFLL_RESET_SHIFT		0
@@ -91,6 +92,22 @@
 /* Tegra CPU clock and reset control regs */
 #define CLK_RST_CONTROLLER_CPU_CMPLX_STATUS	0x470
 
+#define MASK(x) (BIT(x) - 1)
+
+#define MUX8_NOGATE_LOCK(_name, _parents, _offset, _clk_id, _lock)	\
+	TEGRA_INIT_DATA_TABLE(_name, NULL, NULL, _parents, _offset,	\
+			      29, MASK(3), 0, 0, 8, 1, TEGRA_DIVIDER_ROUND_UP,\
+			      0, TEGRA_PERIPH_NO_GATE, _clk_id,\
+			      _parents##_idx, 0, _lock)
+
+#define NODIV(_name, _parents, _offset, \
+			      _mux_shift, _mux_mask, _clk_num, \
+			      _gate_flags, _clk_id, _lock)		\
+	TEGRA_INIT_DATA_TABLE(_name, NULL, NULL, _parents, _offset,\
+			_mux_shift, _mux_mask, 0, 0, 0, 0, 0,\
+			_clk_num, (_gate_flags) | TEGRA_PERIPH_NO_DIV,\
+			_clk_id, _parents##_idx, 0, _lock)
+
 #ifdef CONFIG_PM_SLEEP
 static struct cpu_clk_suspend_context {
 	u32 clk_csite_src;
@@ -110,6 +127,7 @@ static DEFINE_SPINLOCK(pll_e_lock);
 static DEFINE_SPINLOCK(pll_re_lock);
 static DEFINE_SPINLOCK(pll_u_lock);
 static DEFINE_SPINLOCK(emc_lock);
+static DEFINE_SPINLOCK(sor0_lock);
 
 /* possible OSC frequencies in Hz */
 static unsigned long tegra124_input_freq[] = {
@@ -829,7 +847,7 @@ static struct tegra_clk tegra124_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_adx1] = { .dt_id = TEGRA124_CLK_ADX1, .present = true },
 	[tegra_clk_dpaux] = { .dt_id = TEGRA124_CLK_DPAUX, .present = true },
 	[tegra_clk_sor0] = { .dt_id = TEGRA124_CLK_SOR0, .present = true },
-	[tegra_clk_sor0_lvds] = { .dt_id = TEGRA124_CLK_SOR0_LVDS, .present = true },
+	[tegra_clk_sor0_out] = { .dt_id = TEGRA124_CLK_SOR0_OUT, .present = true },
 	[tegra_clk_gpu] = { .dt_id = TEGRA124_CLK_GPU, .present = true },
 	[tegra_clk_amx1] = { .dt_id = TEGRA124_CLK_AMX1, .present = true },
 	[tegra_clk_uartb] = { .dt_id = TEGRA124_CLK_UARTB, .present = true },
@@ -987,12 +1005,33 @@ static struct tegra_devclk devclks[] __initdata = {
 	{ .con_id = "hda2hdmi", .dt_id = TEGRA124_CLK_HDA2HDMI },
 };
 
+static const char * const sor0_parents[] = {
+	"pll_p_out0", "pll_m_out0", "pll_d_out0", "pll_a_out0", "pll_c_out0",
+	"pll_d2_out0", "clk_m",
+};
+
+static const char * const sor0_out_parents[] = {
+	"clk_m", "sor0_pad_clkout",
+};
+
+static struct tegra_periph_init_data tegra124_periph[] = {
+	TEGRA_INIT_DATA_TABLE("sor0", NULL, NULL, sor0_parents,
+			      CLK_SOURCE_SOR0, 29, 0x7, 0, 0, 0, 0,
+			      0, 182, 0, tegra_clk_sor0, NULL, 0,
+			      &sor0_lock),
+	TEGRA_INIT_DATA_TABLE("sor0_out", NULL, NULL, sor0_out_parents,
+			      CLK_SOURCE_SOR0, 14, 0x1, 0, 0, 0, 0,
+			      0, 0, TEGRA_PERIPH_NO_GATE, tegra_clk_sor0_out,
+			      NULL, 0, &sor0_lock),
+};
+
 static struct clk **clks;
 
 static __init void tegra124_periph_clk_init(void __iomem *clk_base,
 					    void __iomem *pmc_base)
 {
 	struct clk *clk;
+	unsigned int i;
 
 	/* xusb_ss_div2 */
 	clk = clk_register_fixed_factor(NULL, "xusb_ss_div2", "xusb_ss_src", 0,
@@ -1033,6 +1072,20 @@ static __init void tegra124_periph_clk_init(void __iomem *clk_base,
 	clk_register_clkdev(clk, "cml1", NULL);
 	clks[TEGRA124_CLK_CML1] = clk;
 
+	for (i = 0; i < ARRAY_SIZE(tegra124_periph); i++) {
+		struct tegra_periph_init_data *init = &tegra124_periph[i];
+		struct clk **clkp;
+
+		clkp = tegra_lookup_dt_id(init->clk_id, tegra124_clks);
+		if (!clkp) {
+			pr_warn("clock %u not found\n", init->clk_id);
+			continue;
+		}
+
+		clk = tegra_clk_register_periph_data(clk_base, init);
+		*clkp = clk;
+	}
+
 	tegra_periph_clk_init(clk_base, pmc_base, tegra124_clks, &pll_p_params);
 }
 
diff --git a/drivers/clk/tegra/clk-tegra20-emc.c b/drivers/clk/tegra/clk-tegra20-emc.c
new file mode 100644
index 000000000000..03bf0009a33c
--- /dev/null
+++ b/drivers/clk/tegra/clk-tegra20-emc.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Based on drivers/clk/tegra/clk-emc.c
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Author: Dmitry Osipenko <digetx@gmail.com>
+ * Copyright (C) 2019 GRATE-DRIVER project
+ */
+
+#define pr_fmt(fmt)	"tegra-emc-clk: " fmt
+
+#include <linux/bits.h>
+#include <linux/clk-provider.h>
+#include <linux/clk/tegra.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "clk.h"
+
+#define CLK_SOURCE_EMC_2X_CLK_DIVISOR_MASK	GENMASK(7, 0)
+#define CLK_SOURCE_EMC_2X_CLK_SRC_MASK		GENMASK(31, 30)
+#define CLK_SOURCE_EMC_2X_CLK_SRC_SHIFT		30
+
+#define MC_EMC_SAME_FREQ	BIT(16)
+#define USE_PLLM_UD		BIT(29)
+
+#define EMC_SRC_PLL_M		0
+#define EMC_SRC_PLL_C		1
+#define EMC_SRC_PLL_P		2
+#define EMC_SRC_CLK_M		3
+
+static const char * const emc_parent_clk_names[] = {
+	"pll_m", "pll_c", "pll_p", "clk_m",
+};
+
+struct tegra_clk_emc {
+	struct clk_hw hw;
+	void __iomem *reg;
+	bool mc_same_freq;
+	bool want_low_jitter;
+
+	tegra20_clk_emc_round_cb *round_cb;
+	void *cb_arg;
+};
+
+static inline struct tegra_clk_emc *to_tegra_clk_emc(struct clk_hw *hw)
+{
+	return container_of(hw, struct tegra_clk_emc, hw);
+}
+
+static unsigned long emc_recalc_rate(struct clk_hw *hw,
+				     unsigned long parent_rate)
+{
+	struct tegra_clk_emc *emc = to_tegra_clk_emc(hw);
+	u32 val, div;
+
+	val = readl_relaxed(emc->reg);
+	div = val & CLK_SOURCE_EMC_2X_CLK_DIVISOR_MASK;
+
+	return DIV_ROUND_UP(parent_rate * 2, div + 2);
+}
+
+static u8 emc_get_parent(struct clk_hw *hw)
+{
+	struct tegra_clk_emc *emc = to_tegra_clk_emc(hw);
+
+	return readl_relaxed(emc->reg) >> CLK_SOURCE_EMC_2X_CLK_SRC_SHIFT;
+}
+
+static int emc_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct tegra_clk_emc *emc = to_tegra_clk_emc(hw);
+	u32 val, div;
+
+	val = readl_relaxed(emc->reg);
+	val &= ~CLK_SOURCE_EMC_2X_CLK_SRC_MASK;
+	val |= index << CLK_SOURCE_EMC_2X_CLK_SRC_SHIFT;
+
+	div = val & CLK_SOURCE_EMC_2X_CLK_DIVISOR_MASK;
+
+	if (index == EMC_SRC_PLL_M && div == 0 && emc->want_low_jitter)
+		val |= USE_PLLM_UD;
+	else
+		val &= ~USE_PLLM_UD;
+
+	if (emc->mc_same_freq)
+		val |= MC_EMC_SAME_FREQ;
+	else
+		val &= ~MC_EMC_SAME_FREQ;
+
+	writel_relaxed(val, emc->reg);
+
+	fence_udelay(1, emc->reg);
+
+	return 0;
+}
+
+static int emc_set_rate(struct clk_hw *hw, unsigned long rate,
+			unsigned long parent_rate)
+{
+	struct tegra_clk_emc *emc = to_tegra_clk_emc(hw);
+	unsigned int index;
+	u32 val, div;
+
+	div = div_frac_get(rate, parent_rate, 8, 1, 0);
+
+	val = readl_relaxed(emc->reg);
+	val &= ~CLK_SOURCE_EMC_2X_CLK_DIVISOR_MASK;
+	val |= div;
+
+	index = val >> CLK_SOURCE_EMC_2X_CLK_SRC_SHIFT;
+
+	if (index == EMC_SRC_PLL_M && div == 0 && emc->want_low_jitter)
+		val |= USE_PLLM_UD;
+	else
+		val &= ~USE_PLLM_UD;
+
+	if (emc->mc_same_freq)
+		val |= MC_EMC_SAME_FREQ;
+	else
+		val &= ~MC_EMC_SAME_FREQ;
+
+	writel_relaxed(val, emc->reg);
+
+	fence_udelay(1, emc->reg);
+
+	return 0;
+}
+
+static int emc_set_rate_and_parent(struct clk_hw *hw,
+				   unsigned long rate,
+				   unsigned long parent_rate,
+				   u8 index)
+{
+	struct tegra_clk_emc *emc = to_tegra_clk_emc(hw);
+	u32 val, div;
+
+	div = div_frac_get(rate, parent_rate, 8, 1, 0);
+
+	val = readl_relaxed(emc->reg);
+
+	val &= ~CLK_SOURCE_EMC_2X_CLK_SRC_MASK;
+	val |= index << CLK_SOURCE_EMC_2X_CLK_SRC_SHIFT;
+
+	val &= ~CLK_SOURCE_EMC_2X_CLK_DIVISOR_MASK;
+	val |= div;
+
+	if (index == EMC_SRC_PLL_M && div == 0 && emc->want_low_jitter)
+		val |= USE_PLLM_UD;
+	else
+		val &= ~USE_PLLM_UD;
+
+	if (emc->mc_same_freq)
+		val |= MC_EMC_SAME_FREQ;
+	else
+		val &= ~MC_EMC_SAME_FREQ;
+
+	writel_relaxed(val, emc->reg);
+
+	fence_udelay(1, emc->reg);
+
+	return 0;
+}
+
+static int emc_determine_rate(struct clk_hw *hw, struct clk_rate_request *req)
+{
+	struct tegra_clk_emc *emc = to_tegra_clk_emc(hw);
+	struct clk_hw *parent_hw;
+	unsigned long divided_rate;
+	unsigned long parent_rate;
+	unsigned int i;
+	long emc_rate;
+	int div;
+
+	emc_rate = emc->round_cb(req->rate, req->min_rate, req->max_rate,
+				 emc->cb_arg);
+	if (emc_rate < 0)
+		return emc_rate;
+
+	for (i = 0; i < ARRAY_SIZE(emc_parent_clk_names); i++) {
+		parent_hw = clk_hw_get_parent_by_index(hw, i);
+
+		if (req->best_parent_hw == parent_hw)
+			parent_rate = req->best_parent_rate;
+		else
+			parent_rate = clk_hw_get_rate(parent_hw);
+
+		if (emc_rate > parent_rate)
+			continue;
+
+		div = div_frac_get(emc_rate, parent_rate, 8, 1, 0);
+		divided_rate = DIV_ROUND_UP(parent_rate * 2, div + 2);
+
+		if (divided_rate != emc_rate)
+			continue;
+
+		req->best_parent_rate = parent_rate;
+		req->best_parent_hw = parent_hw;
+		req->rate = emc_rate;
+		break;
+	}
+
+	if (i == ARRAY_SIZE(emc_parent_clk_names)) {
+		pr_err_once("can't find parent for rate %lu emc_rate %lu\n",
+			    req->rate, emc_rate);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct clk_ops tegra_clk_emc_ops = {
+	.recalc_rate = emc_recalc_rate,
+	.get_parent = emc_get_parent,
+	.set_parent = emc_set_parent,
+	.set_rate = emc_set_rate,
+	.set_rate_and_parent = emc_set_rate_and_parent,
+	.determine_rate = emc_determine_rate,
+};
+
+void tegra20_clk_set_emc_round_callback(tegra20_clk_emc_round_cb *round_cb,
+					void *cb_arg)
+{
+	struct clk *clk = __clk_lookup("emc");
+	struct tegra_clk_emc *emc;
+	struct clk_hw *hw;
+
+	if (clk) {
+		hw = __clk_get_hw(clk);
+		emc = to_tegra_clk_emc(hw);
+
+		emc->round_cb = round_cb;
+		emc->cb_arg = cb_arg;
+	}
+}
+
+bool tegra20_clk_emc_driver_available(struct clk_hw *emc_hw)
+{
+	return to_tegra_clk_emc(emc_hw)->round_cb != NULL;
+}
+
+struct clk *tegra20_clk_register_emc(void __iomem *ioaddr, bool low_jitter)
+{
+	struct tegra_clk_emc *emc;
+	struct clk_init_data init;
+	struct clk *clk;
+
+	emc = kzalloc(sizeof(*emc), GFP_KERNEL);
+	if (!emc)
+		return NULL;
+
+	/*
+	 * EMC stands for External Memory Controller.
+	 *
+	 * We don't want EMC clock to be disabled ever by gating its
+	 * parent and whatnot because system is busted immediately in that
+	 * case, hence the clock is marked as critical.
+	 */
+	init.name = "emc";
+	init.ops = &tegra_clk_emc_ops;
+	init.flags = CLK_IS_CRITICAL;
+	init.parent_names = emc_parent_clk_names;
+	init.num_parents = ARRAY_SIZE(emc_parent_clk_names);
+
+	emc->reg = ioaddr;
+	emc->hw.init = &init;
+	emc->want_low_jitter = low_jitter;
+
+	clk = clk_register(NULL, &emc->hw);
+	if (IS_ERR(clk)) {
+		kfree(emc);
+		return NULL;
+	}
+
+	return clk;
+}
+
+int tegra20_clk_prepare_emc_mc_same_freq(struct clk *emc_clk, bool same)
+{
+	struct tegra_clk_emc *emc;
+	struct clk_hw *hw;
+
+	if (!emc_clk)
+		return -EINVAL;
+
+	hw = __clk_get_hw(emc_clk);
+	emc = to_tegra_clk_emc(hw);
+	emc->mc_same_freq = same;
+
+	return 0;
+}
diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
index bcd871134f45..4d8222f5c638 100644
--- a/drivers/clk/tegra/clk-tegra20.c
+++ b/drivers/clk/tegra/clk-tegra20.c
@@ -130,8 +130,6 @@ static struct cpu_clk_suspend_context {
 static void __iomem *clk_base;
 static void __iomem *pmc_base;
 
-static DEFINE_SPINLOCK(emc_lock);
-
 #define TEGRA_INIT_DATA_MUX(_name, _parents, _offset,	\
 			    _clk_num, _gate_flags, _clk_id)	\
 	TEGRA_INIT_DATA(_name, NULL, NULL, _parents, _offset,	\
@@ -760,7 +758,6 @@ static const char *pwm_parents[] = { "pll_p", "pll_c", "audio", "clk_m",
 static const char *mux_pllpcm_clkm[] = { "pll_p", "pll_c", "pll_m", "clk_m" };
 static const char *mux_pllpdc_clkm[] = { "pll_p", "pll_d_out0", "pll_c",
 					 "clk_m" };
-static const char *mux_pllmcp_clkm[] = { "pll_m", "pll_c", "pll_p", "clk_m" };
 
 static struct tegra_periph_init_data tegra_periph_clk_list[] = {
 	TEGRA_INIT_DATA_MUX("i2s1", i2s1_parents,     CLK_SOURCE_I2S1,   11, TEGRA_PERIPH_ON_APB, TEGRA20_CLK_I2S1),
@@ -787,41 +784,6 @@ static struct tegra_periph_init_data tegra_periph_nodiv_clk_list[] = {
 	TEGRA_INIT_DATA_NODIV("disp2",	mux_pllpdc_clkm, CLK_SOURCE_DISP2, 30, 2, 26,  0, TEGRA20_CLK_DISP2),
 };
 
-static void __init tegra20_emc_clk_init(void)
-{
-	const u32 use_pllm_ud = BIT(29);
-	struct clk *clk;
-	u32 emc_reg;
-
-	clk = clk_register_mux(NULL, "emc_mux", mux_pllmcp_clkm,
-			       ARRAY_SIZE(mux_pllmcp_clkm),
-			       CLK_SET_RATE_NO_REPARENT,
-			       clk_base + CLK_SOURCE_EMC,
-			       30, 2, 0, &emc_lock);
-
-	clk = tegra_clk_register_mc("mc", "emc_mux", clk_base + CLK_SOURCE_EMC,
-				    &emc_lock);
-	clks[TEGRA20_CLK_MC] = clk;
-
-	/* un-divided pll_m_out0 is currently unsupported */
-	emc_reg = readl_relaxed(clk_base + CLK_SOURCE_EMC);
-	if (emc_reg & use_pllm_ud) {
-		pr_err("%s: un-divided PllM_out0 used as clock source\n",
-		       __func__);
-		return;
-	}
-
-	/*
-	 * Note that 'emc_mux' source and 'emc' rate shouldn't be changed at
-	 * the same time due to a HW bug, this won't happen because we're
-	 * defining 'emc_mux' and 'emc' as distinct clocks.
-	 */
-	clk = tegra_clk_register_divider("emc", "emc_mux",
-				clk_base + CLK_SOURCE_EMC, CLK_IS_CRITICAL,
-				TEGRA_DIVIDER_INT, 0, 8, 1, &emc_lock);
-	clks[TEGRA20_CLK_EMC] = clk;
-}
-
 static void __init tegra20_periph_clk_init(void)
 {
 	struct tegra_periph_init_data *data;
@@ -835,7 +797,13 @@ static void __init tegra20_periph_clk_init(void)
 	clks[TEGRA20_CLK_AC97] = clk;
 
 	/* emc */
-	tegra20_emc_clk_init();
+	clk = tegra20_clk_register_emc(clk_base + CLK_SOURCE_EMC, false);
+
+	clks[TEGRA20_CLK_EMC] = clk;
+
+	clk = tegra_clk_register_mc("mc", "emc", clk_base + CLK_SOURCE_EMC,
+				    NULL);
+	clks[TEGRA20_CLK_MC] = clk;
 
 	/* dsi */
 	clk = tegra_clk_register_periph_gate("dsi", "pll_d", 0, clk_base, 0,
@@ -987,6 +955,7 @@ static void tegra20_cpu_clock_suspend(void)
 static void tegra20_cpu_clock_resume(void)
 {
 	unsigned int reg, policy;
+	u32 misc, base;
 
 	/* Is CPU complex already running on PLLX? */
 	reg = readl(clk_base + CCLK_BURST_POLICY);
@@ -1000,15 +969,21 @@ static void tegra20_cpu_clock_resume(void)
 		BUG();
 
 	if (reg != CCLK_BURST_POLICY_PLLX) {
-		/* restore PLLX settings if CPU is on different PLL */
-		writel(tegra20_cpu_clk_sctx.pllx_misc,
-					clk_base + PLLX_MISC);
-		writel(tegra20_cpu_clk_sctx.pllx_base,
-					clk_base + PLLX_BASE);
-
-		/* wait for PLL stabilization if PLLX was enabled */
-		if (tegra20_cpu_clk_sctx.pllx_base & (1 << 30))
-			udelay(300);
+		misc = readl_relaxed(clk_base + PLLX_MISC);
+		base = readl_relaxed(clk_base + PLLX_BASE);
+
+		if (misc != tegra20_cpu_clk_sctx.pllx_misc ||
+		    base != tegra20_cpu_clk_sctx.pllx_base) {
+			/* restore PLLX settings if CPU is on different PLL */
+			writel(tegra20_cpu_clk_sctx.pllx_misc,
+						clk_base + PLLX_MISC);
+			writel(tegra20_cpu_clk_sctx.pllx_base,
+						clk_base + PLLX_BASE);
+
+			/* wait for PLL stabilization if PLLX was enabled */
+			if (tegra20_cpu_clk_sctx.pllx_base & (1 << 30))
+				udelay(300);
+		}
 	}
 
 	/*
@@ -1115,6 +1090,8 @@ static struct clk *tegra20_clk_src_onecell_get(struct of_phandle_args *clkspec,
 	if (IS_ERR(clk))
 		return clk;
 
+	hw = __clk_get_hw(clk);
+
 	/*
 	 * Tegra20 CDEV1 and CDEV2 clocks are a bit special case, their parent
 	 * clock is created by the pinctrl driver. It is possible for clk user
@@ -1124,13 +1101,16 @@ static struct clk *tegra20_clk_src_onecell_get(struct of_phandle_args *clkspec,
 	 */
 	if (clkspec->args[0] == TEGRA20_CLK_CDEV1 ||
 	    clkspec->args[0] == TEGRA20_CLK_CDEV2) {
-		hw = __clk_get_hw(clk);
-
 		parent_hw = clk_hw_get_parent(hw);
 		if (!parent_hw)
 			return ERR_PTR(-EPROBE_DEFER);
 	}
 
+	if (clkspec->args[0] == TEGRA20_CLK_EMC) {
+		if (!tegra20_clk_emc_driver_available(hw))
+			return ERR_PTR(-EPROBE_DEFER);
+	}
+
 	return clk;
 }
 
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index df172d5772d7..762cd186f714 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -9,13 +9,13 @@
 #include <linux/clkdev.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/syscore_ops.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/clk/tegra.h>
 #include <dt-bindings/clock/tegra210-car.h>
 #include <dt-bindings/reset/tegra210-car.h>
-#include <linux/iopoll.h>
 #include <linux/sizes.h>
 #include <soc/tegra/pmc.h>
 
@@ -33,6 +33,7 @@
 #define CLK_SOURCE_CSITE 0x1d4
 #define CLK_SOURCE_EMC 0x19c
 #define CLK_SOURCE_SOR1 0x410
+#define CLK_SOURCE_SOR0 0x414
 #define CLK_SOURCE_LA 0x1f8
 #define CLK_SOURCE_SDMMC2 0x154
 #define CLK_SOURCE_SDMMC4 0x164
@@ -220,11 +221,15 @@
 #define CLK_M_DIVISOR_SHIFT 2
 #define CLK_M_DIVISOR_MASK 0x3
 
+#define CLK_MASK_ARM	0x44
+#define MISC_CLK_ENB	0x48
+
 #define RST_DFLL_DVCO 0x2f4
 #define DVFS_DFLL_RESET_SHIFT 0
 
 #define CLK_RST_CONTROLLER_RST_DEV_Y_SET 0x2a8
 #define CLK_RST_CONTROLLER_RST_DEV_Y_CLR 0x2ac
+#define CPU_SOFTRST_CTRL 0x380
 
 #define LVL2_CLK_GATE_OVRA 0xf8
 #define LVL2_CLK_GATE_OVRC 0x3a0
@@ -298,6 +303,7 @@ static DEFINE_SPINLOCK(pll_d_lock);
 static DEFINE_SPINLOCK(pll_e_lock);
 static DEFINE_SPINLOCK(pll_re_lock);
 static DEFINE_SPINLOCK(pll_u_lock);
+static DEFINE_SPINLOCK(sor0_lock);
 static DEFINE_SPINLOCK(sor1_lock);
 static DEFINE_SPINLOCK(emc_lock);
 static DEFINE_MUTEX(lvl2_ovr_lock);
@@ -2351,9 +2357,9 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_dpaux] = { .dt_id = TEGRA210_CLK_DPAUX, .present = true },
 	[tegra_clk_dpaux1] = { .dt_id = TEGRA210_CLK_DPAUX1, .present = true },
 	[tegra_clk_sor0] = { .dt_id = TEGRA210_CLK_SOR0, .present = true },
-	[tegra_clk_sor0_lvds] = { .dt_id = TEGRA210_CLK_SOR0_LVDS, .present = true },
+	[tegra_clk_sor0_out] = { .dt_id = TEGRA210_CLK_SOR0_OUT, .present = true },
 	[tegra_clk_sor1] = { .dt_id = TEGRA210_CLK_SOR1, .present = true },
-	[tegra_clk_sor1_src] = { .dt_id = TEGRA210_CLK_SOR1_SRC, .present = true },
+	[tegra_clk_sor1_out] = { .dt_id = TEGRA210_CLK_SOR1_OUT, .present = true },
 	[tegra_clk_gpu] = { .dt_id = TEGRA210_CLK_GPU, .present = true },
 	[tegra_clk_pll_g_ref] = { .dt_id = TEGRA210_CLK_PLL_G_REF, .present = true, },
 	[tegra_clk_uartb_8] = { .dt_id = TEGRA210_CLK_UARTB, .present = true },
@@ -2551,7 +2557,6 @@ static struct tegra_devclk devclks[] __initdata = {
 	{ .con_id = "pll_c4_out2", .dt_id = TEGRA210_CLK_PLL_C4_OUT2 },
 	{ .con_id = "pll_c4_out3", .dt_id = TEGRA210_CLK_PLL_C4_OUT3 },
 	{ .con_id = "dpaux", .dt_id = TEGRA210_CLK_DPAUX },
-	{ .con_id = "sor0", .dt_id = TEGRA210_CLK_SOR0 },
 };
 
 static struct tegra_audio_clk_info tegra210_audio_plls[] = {
@@ -2825,6 +2830,7 @@ static int tegra210_enable_pllu(void)
 	struct tegra_clk_pll_freq_table *fentry;
 	struct tegra_clk_pll pllu;
 	u32 reg;
+	int ret;
 
 	for (fentry = pll_u_freq_table; fentry->input_rate; fentry++) {
 		if (fentry->input_rate == pll_ref_freq)
@@ -2841,7 +2847,7 @@ static int tegra210_enable_pllu(void)
 	reg = readl_relaxed(clk_base + pllu.params->ext_misc_reg[0]);
 	reg &= ~BIT(pllu.params->iddq_bit_idx);
 	writel_relaxed(reg, clk_base + pllu.params->ext_misc_reg[0]);
-	udelay(5);
+	fence_udelay(5, clk_base);
 
 	reg = readl_relaxed(clk_base + PLLU_BASE);
 	reg &= ~GENMASK(20, 0);
@@ -2849,13 +2855,18 @@ static int tegra210_enable_pllu(void)
 	reg |= fentry->n << 8;
 	reg |= fentry->p << 16;
 	writel(reg, clk_base + PLLU_BASE);
-	udelay(1);
+	fence_udelay(1, clk_base);
 	reg |= PLL_ENABLE;
 	writel(reg, clk_base + PLLU_BASE);
 
-	readl_relaxed_poll_timeout_atomic(clk_base + PLLU_BASE, reg,
-					  reg & PLL_BASE_LOCK, 2, 1000);
-	if (!(reg & PLL_BASE_LOCK)) {
+	/*
+	 * During clocks resume, same PLLU init and enable sequence get
+	 * executed. So, readx_poll_timeout_atomic can't be used here as it
+	 * uses ktime_get() and timekeeping resume doesn't happen by that
+	 * time. So, using tegra210_wait_for_mask for PLL LOCK.
+	 */
+	ret = tegra210_wait_for_mask(&pllu, PLLU_BASE, PLL_BASE_LOCK);
+	if (ret) {
 		pr_err("Timed out waiting for PLL_U to lock\n");
 		return -ETIMEDOUT;
 	}
@@ -2895,12 +2906,12 @@ static int tegra210_init_pllu(void)
 		reg = readl_relaxed(clk_base + XUSB_PLL_CFG0);
 		reg &= ~XUSB_PLL_CFG0_PLLU_LOCK_DLY_MASK;
 		writel_relaxed(reg, clk_base + XUSB_PLL_CFG0);
-		udelay(1);
+		fence_udelay(1, clk_base);
 
 		reg = readl_relaxed(clk_base + PLLU_HW_PWRDN_CFG0);
 		reg |= PLLU_HW_PWRDN_CFG0_SEQ_ENABLE;
 		writel_relaxed(reg, clk_base + PLLU_HW_PWRDN_CFG0);
-		udelay(1);
+		fence_udelay(1, clk_base);
 
 		reg = readl_relaxed(clk_base + PLLU_BASE);
 		reg &= ~PLLU_BASE_CLKENABLE_USB;
@@ -2915,6 +2926,39 @@ static int tegra210_init_pllu(void)
 	return 0;
 }
 
+/*
+ * The SOR hardware blocks are driven by two clocks: a module clock that is
+ * used to access registers and a pixel clock that is sourced from the same
+ * pixel clock that also drives the head attached to the SOR. The module
+ * clock is typically called sorX (with X being the SOR instance) and the
+ * pixel clock is called sorX_out. The source for the SOR pixel clock is
+ * referred to as the "parent" clock.
+ *
+ * On Tegra186 and newer, clocks are provided by the BPMP. Unfortunately the
+ * BPMP implementation for the SOR clocks doesn't exactly match the above in
+ * some aspects. For example, the SOR module is really clocked by the pad or
+ * sor_safe clocks, but BPMP models the sorX clock as being sourced by the
+ * pixel clocks. Conversely the sorX_out clock is sourced by the sor_safe or
+ * pad clocks on BPMP.
+ *
+ * In order to allow the display driver to deal with all SoC generations in
+ * a unified way, implement the BPMP semantics in this driver.
+ */
+
+static const char * const sor0_parents[] = {
+	"pll_d_out0",
+};
+
+static const char * const sor0_out_parents[] = {
+	"sor_safe", "sor0_pad_clkout",
+};
+
+static const char * const sor1_parents[] = {
+	"pll_p", "pll_d_out0", "pll_d2_out0", "clk_m",
+};
+
+static u32 sor1_parents_idx[] = { 0, 2, 5, 6 };
+
 static const char * const sor1_out_parents[] = {
 	/*
 	 * Bit 0 of the mux selects sor1_pad_clkout, irrespective of bit 1, so
@@ -2923,20 +2967,31 @@ static const char * const sor1_out_parents[] = {
 	 * these bits to 0b11. While not an invalid setting, code should
 	 * always set the bits to 0b01 to select sor1_pad_clkout.
 	 */
-	"sor_safe", "sor1_pad_clkout", "sor1", "sor1_pad_clkout",
+	"sor_safe", "sor1_pad_clkout", "sor1_out", "sor1_pad_clkout",
 };
 
-static const char * const sor1_parents[] = {
-	"pll_p", "pll_d_out0", "pll_d2_out0", "clk_m",
-};
-
-static u32 sor1_parents_idx[] = { 0, 2, 5, 6 };
-
 static struct tegra_periph_init_data tegra210_periph[] = {
+	/*
+	 * On Tegra210, the sor0 clock doesn't have a mux it bitfield 31:29,
+	 * but it is hardwired to the pll_d_out0 clock.
+	 */
+	TEGRA_INIT_DATA_TABLE("sor0", NULL, NULL, sor0_parents,
+			      CLK_SOURCE_SOR0, 29, 0x0, 0, 0, 0, 0,
+			      0, 182, 0, tegra_clk_sor0, NULL, 0,
+			      &sor0_lock),
+	TEGRA_INIT_DATA_TABLE("sor0_out", NULL, NULL, sor0_out_parents,
+			      CLK_SOURCE_SOR0, 14, 0x1, 0, 0, 0, 0,
+			      0, 0, TEGRA_PERIPH_NO_GATE, tegra_clk_sor0_out,
+			      NULL, 0, &sor0_lock),
 	TEGRA_INIT_DATA_TABLE("sor1", NULL, NULL, sor1_parents,
 			      CLK_SOURCE_SOR1, 29, 0x7, 0, 0, 8, 1,
-			      TEGRA_DIVIDER_ROUND_UP, 183, 0, tegra_clk_sor1,
-			      sor1_parents_idx, 0, &sor1_lock),
+			      TEGRA_DIVIDER_ROUND_UP, 183, 0,
+			      tegra_clk_sor1, sor1_parents_idx, 0,
+			      &sor1_lock),
+	TEGRA_INIT_DATA_TABLE("sor1_out", NULL, NULL, sor1_out_parents,
+			      CLK_SOURCE_SOR1, 14, 0x3, 0, 0, 0, 0,
+			      0, 0, TEGRA_PERIPH_NO_GATE,
+			      tegra_clk_sor1_out, NULL, 0, &sor1_lock),
 };
 
 static const char * const la_parents[] = {
@@ -2969,12 +3024,6 @@ static __init void tegra210_periph_clk_init(void __iomem *clk_base,
 					      1, 17, 207);
 	clks[TEGRA210_CLK_DPAUX1] = clk;
 
-	clk = clk_register_mux_table(NULL, "sor1_out", sor1_out_parents,
-				     ARRAY_SIZE(sor1_out_parents), 0,
-				     clk_base + CLK_SOURCE_SOR1, 14, 0x3,
-				     0, NULL, &sor1_lock);
-	clks[TEGRA210_CLK_SOR1_OUT] = clk;
-
 	/* pll_d_dsi_out */
 	clk = clk_register_gate(NULL, "pll_d_dsi_out", "pll_d_out0", 0,
 				clk_base + PLLD_MISC0, 21, 0, &pll_d_lock);
@@ -3287,6 +3336,77 @@ static void tegra210_disable_cpu_clock(u32 cpu)
 }
 
 #ifdef CONFIG_PM_SLEEP
+#define car_readl(_base, _off) readl_relaxed(clk_base + (_base) + ((_off) * 4))
+#define car_writel(_val, _base, _off) \
+		writel_relaxed(_val, clk_base + (_base) + ((_off) * 4))
+
+static u32 spare_reg_ctx, misc_clk_enb_ctx, clk_msk_arm_ctx;
+static u32 cpu_softrst_ctx[3];
+
+static int tegra210_clk_suspend(void)
+{
+	unsigned int i;
+
+	clk_save_context();
+
+	/*
+	 * Save the bootloader configured clock registers SPARE_REG0,
+	 * MISC_CLK_ENB, CLK_MASK_ARM, CPU_SOFTRST_CTRL.
+	 */
+	spare_reg_ctx = readl_relaxed(clk_base + SPARE_REG0);
+	misc_clk_enb_ctx = readl_relaxed(clk_base + MISC_CLK_ENB);
+	clk_msk_arm_ctx = readl_relaxed(clk_base + CLK_MASK_ARM);
+
+	for (i = 0; i < ARRAY_SIZE(cpu_softrst_ctx); i++)
+		cpu_softrst_ctx[i] = car_readl(CPU_SOFTRST_CTRL, i);
+
+	tegra_clk_periph_suspend();
+	return 0;
+}
+
+static void tegra210_clk_resume(void)
+{
+	unsigned int i;
+
+	tegra_clk_osc_resume(clk_base);
+
+	/*
+	 * Restore the bootloader configured clock registers SPARE_REG0,
+	 * MISC_CLK_ENB, CLK_MASK_ARM, CPU_SOFTRST_CTRL from saved context.
+	 */
+	writel_relaxed(spare_reg_ctx, clk_base + SPARE_REG0);
+	writel_relaxed(misc_clk_enb_ctx, clk_base + MISC_CLK_ENB);
+	writel_relaxed(clk_msk_arm_ctx, clk_base + CLK_MASK_ARM);
+
+	for (i = 0; i < ARRAY_SIZE(cpu_softrst_ctx); i++)
+		car_writel(cpu_softrst_ctx[i], CPU_SOFTRST_CTRL, i);
+
+	/*
+	 * Tegra clock programming sequence recommends peripheral clock to
+	 * be enabled prior to changing its clock source and divider to
+	 * prevent glitchless frequency switch.
+	 * So, enable all peripheral clocks before restoring their source
+	 * and dividers.
+	 */
+	writel_relaxed(TEGRA210_CLK_ENB_VLD_MSK_L, clk_base + CLK_OUT_ENB_L);
+	writel_relaxed(TEGRA210_CLK_ENB_VLD_MSK_H, clk_base + CLK_OUT_ENB_H);
+	writel_relaxed(TEGRA210_CLK_ENB_VLD_MSK_U, clk_base + CLK_OUT_ENB_U);
+	writel_relaxed(TEGRA210_CLK_ENB_VLD_MSK_V, clk_base + CLK_OUT_ENB_V);
+	writel_relaxed(TEGRA210_CLK_ENB_VLD_MSK_W, clk_base + CLK_OUT_ENB_W);
+	writel_relaxed(TEGRA210_CLK_ENB_VLD_MSK_X, clk_base + CLK_OUT_ENB_X);
+	writel_relaxed(TEGRA210_CLK_ENB_VLD_MSK_Y, clk_base + CLK_OUT_ENB_Y);
+
+	/* wait for all writes to happen to have all the clocks enabled */
+	fence_udelay(2, clk_base);
+
+	/* restore PLLs and all peripheral clock rates */
+	tegra210_init_pllu();
+	clk_restore_context();
+
+	/* restore saved context of peripheral clocks and reset state */
+	tegra_clk_periph_resume();
+}
+
 static void tegra210_cpu_clock_suspend(void)
 {
 	/* switch coresite to clk_m, save off original source */
@@ -3302,6 +3422,13 @@ static void tegra210_cpu_clock_resume(void)
 }
 #endif
 
+static struct syscore_ops tegra_clk_syscore_ops = {
+#ifdef CONFIG_PM_SLEEP
+	.suspend = tegra210_clk_suspend,
+	.resume = tegra210_clk_resume,
+#endif
+};
+
 static struct tegra_cpu_car_ops tegra210_cpu_car_ops = {
 	.wait_for_reset	= tegra210_wait_cpu_in_reset,
 	.disable_clock	= tegra210_disable_cpu_clock,
@@ -3586,5 +3713,7 @@ static void __init tegra210_clock_init(struct device_node *np)
 	tegra210_mbist_clk_init();
 
 	tegra_cpu_car_ops = &tegra210_cpu_car_ops;
+
+	register_syscore_ops(&tegra_clk_syscore_ops);
 }
 CLK_OF_DECLARE(tegra210, "nvidia,tegra210-car", tegra210_clock_init);
diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index 7b4c6a488527..c8bc18e4d7e5 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -151,7 +151,6 @@ static unsigned long input_freq;
 
 static DEFINE_SPINLOCK(cml_lock);
 static DEFINE_SPINLOCK(pll_d_lock);
-static DEFINE_SPINLOCK(emc_lock);
 
 #define TEGRA_INIT_DATA_MUX(_name, _parents, _offset,	\
 			    _clk_num, _gate_flags, _clk_id)	\
@@ -808,7 +807,7 @@ static struct tegra_clk tegra30_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_pll_a] = { .dt_id = TEGRA30_CLK_PLL_A, .present = true },
 	[tegra_clk_pll_a_out0] = { .dt_id = TEGRA30_CLK_PLL_A_OUT0, .present = true },
 	[tegra_clk_cec] = { .dt_id = TEGRA30_CLK_CEC, .present = true },
-	[tegra_clk_emc] = { .dt_id = TEGRA30_CLK_EMC, .present = true },
+	[tegra_clk_emc] = { .dt_id = TEGRA30_CLK_EMC, .present = false },
 };
 
 static const char *pll_e_parents[] = { "pll_ref", "pll_p" };
@@ -995,7 +994,6 @@ static void __init tegra30_super_clk_init(void)
 static const char *mux_pllacp_clkm[] = { "pll_a_out0", "unused", "pll_p",
 					 "clk_m" };
 static const char *mux_pllpcm_clkm[] = { "pll_p", "pll_c", "pll_m", "clk_m" };
-static const char *mux_pllmcp_clkm[] = { "pll_m", "pll_c", "pll_p", "clk_m" };
 static const char *spdif_out_parents[] = { "pll_a_out0", "spdif_2x", "pll_p",
 					   "clk_m" };
 static const char *mux_pllmcpa[] = { "pll_m", "pll_c", "pll_p", "pll_a_out0" };
@@ -1044,14 +1042,12 @@ static void __init tegra30_periph_clk_init(void)
 	clks[TEGRA30_CLK_AFI] = clk;
 
 	/* emc */
-	clk = clk_register_mux(NULL, "emc_mux", mux_pllmcp_clkm,
-			       ARRAY_SIZE(mux_pllmcp_clkm),
-			       CLK_SET_RATE_NO_REPARENT,
-			       clk_base + CLK_SOURCE_EMC,
-			       30, 2, 0, &emc_lock);
+	clk = tegra20_clk_register_emc(clk_base + CLK_SOURCE_EMC, true);
+
+	clks[TEGRA30_CLK_EMC] = clk;
 
-	clk = tegra_clk_register_mc("mc", "emc_mux", clk_base + CLK_SOURCE_EMC,
-				    &emc_lock);
+	clk = tegra_clk_register_mc("mc", "emc", clk_base + CLK_SOURCE_EMC,
+				    NULL);
 	clks[TEGRA30_CLK_MC] = clk;
 
 	/* cml0 */
@@ -1167,6 +1163,7 @@ static void tegra30_cpu_clock_suspend(void)
 static void tegra30_cpu_clock_resume(void)
 {
 	unsigned int reg, policy;
+	u32 misc, base;
 
 	/* Is CPU complex already running on PLLX? */
 	reg = readl(clk_base + CLK_RESET_CCLK_BURST);
@@ -1180,15 +1177,21 @@ static void tegra30_cpu_clock_resume(void)
 		BUG();
 
 	if (reg != CLK_RESET_CCLK_BURST_POLICY_PLLX) {
-		/* restore PLLX settings if CPU is on different PLL */
-		writel(tegra30_cpu_clk_sctx.pllx_misc,
-					clk_base + CLK_RESET_PLLX_MISC);
-		writel(tegra30_cpu_clk_sctx.pllx_base,
-					clk_base + CLK_RESET_PLLX_BASE);
-
-		/* wait for PLL stabilization if PLLX was enabled */
-		if (tegra30_cpu_clk_sctx.pllx_base & (1 << 30))
-			udelay(300);
+		misc = readl_relaxed(clk_base + CLK_RESET_PLLX_MISC);
+		base = readl_relaxed(clk_base + CLK_RESET_PLLX_BASE);
+
+		if (misc != tegra30_cpu_clk_sctx.pllx_misc ||
+		    base != tegra30_cpu_clk_sctx.pllx_base) {
+			/* restore PLLX settings if CPU is on different PLL */
+			writel(tegra30_cpu_clk_sctx.pllx_misc,
+						clk_base + CLK_RESET_PLLX_MISC);
+			writel(tegra30_cpu_clk_sctx.pllx_base,
+						clk_base + CLK_RESET_PLLX_BASE);
+
+			/* wait for PLL stabilization if PLLX was enabled */
+			if (tegra30_cpu_clk_sctx.pllx_base & (1 << 30))
+				udelay(300);
+		}
 	}
 
 	/*
@@ -1302,6 +1305,26 @@ static struct tegra_audio_clk_info tegra30_audio_plls[] = {
 	{ "pll_a", &pll_a_params, tegra_clk_pll_a, "pll_p_out1" },
 };
 
+static struct clk *tegra30_clk_src_onecell_get(struct of_phandle_args *clkspec,
+					       void *data)
+{
+	struct clk_hw *hw;
+	struct clk *clk;
+
+	clk = of_clk_src_onecell_get(clkspec, data);
+	if (IS_ERR(clk))
+		return clk;
+
+	hw = __clk_get_hw(clk);
+
+	if (clkspec->args[0] == TEGRA30_CLK_EMC) {
+		if (!tegra20_clk_emc_driver_available(hw))
+			return ERR_PTR(-EPROBE_DEFER);
+	}
+
+	return clk;
+}
+
 static void __init tegra30_clock_init(struct device_node *np)
 {
 	struct device_node *node;
@@ -1345,7 +1368,7 @@ static void __init tegra30_clock_init(struct device_node *np)
 
 	tegra_init_dup_clks(tegra_clk_duplicates, clks, TEGRA30_CLK_CLK_MAX);
 
-	tegra_add_of_provider(np, of_clk_src_onecell_get);
+	tegra_add_of_provider(np, tegra30_clk_src_onecell_get);
 	tegra_register_devclks(devclks, ARRAY_SIZE(devclks));
 
 	tegra_clk_apply_init_table = tegra30_clock_apply_init_table;
diff --git a/drivers/clk/tegra/clk.c b/drivers/clk/tegra/clk.c
index 573e3c967ae1..f6cdce441cf7 100644
--- a/drivers/clk/tegra/clk.c
+++ b/drivers/clk/tegra/clk.c
@@ -16,56 +16,13 @@
 
 #include "clk.h"
 
-#define CLK_OUT_ENB_L			0x010
-#define CLK_OUT_ENB_H			0x014
-#define CLK_OUT_ENB_U			0x018
-#define CLK_OUT_ENB_V			0x360
-#define CLK_OUT_ENB_W			0x364
-#define CLK_OUT_ENB_X			0x280
-#define CLK_OUT_ENB_Y			0x298
-#define CLK_OUT_ENB_SET_L		0x320
-#define CLK_OUT_ENB_CLR_L		0x324
-#define CLK_OUT_ENB_SET_H		0x328
-#define CLK_OUT_ENB_CLR_H		0x32c
-#define CLK_OUT_ENB_SET_U		0x330
-#define CLK_OUT_ENB_CLR_U		0x334
-#define CLK_OUT_ENB_SET_V		0x440
-#define CLK_OUT_ENB_CLR_V		0x444
-#define CLK_OUT_ENB_SET_W		0x448
-#define CLK_OUT_ENB_CLR_W		0x44c
-#define CLK_OUT_ENB_SET_X		0x284
-#define CLK_OUT_ENB_CLR_X		0x288
-#define CLK_OUT_ENB_SET_Y		0x29c
-#define CLK_OUT_ENB_CLR_Y		0x2a0
-
-#define RST_DEVICES_L			0x004
-#define RST_DEVICES_H			0x008
-#define RST_DEVICES_U			0x00C
-#define RST_DEVICES_V			0x358
-#define RST_DEVICES_W			0x35C
-#define RST_DEVICES_X			0x28C
-#define RST_DEVICES_Y			0x2a4
-#define RST_DEVICES_SET_L		0x300
-#define RST_DEVICES_CLR_L		0x304
-#define RST_DEVICES_SET_H		0x308
-#define RST_DEVICES_CLR_H		0x30c
-#define RST_DEVICES_SET_U		0x310
-#define RST_DEVICES_CLR_U		0x314
-#define RST_DEVICES_SET_V		0x430
-#define RST_DEVICES_CLR_V		0x434
-#define RST_DEVICES_SET_W		0x438
-#define RST_DEVICES_CLR_W		0x43c
-#define RST_DEVICES_SET_X		0x290
-#define RST_DEVICES_CLR_X		0x294
-#define RST_DEVICES_SET_Y		0x2a8
-#define RST_DEVICES_CLR_Y		0x2ac
-
 /* Global data of Tegra CPU CAR ops */
 static struct tegra_cpu_car_ops dummy_car_ops;
 struct tegra_cpu_car_ops *tegra_cpu_car_ops = &dummy_car_ops;
 
 int *periph_clk_enb_refcnt;
 static int periph_banks;
+static u32 *periph_state_ctx;
 static struct clk **clks;
 static int clk_num;
 static struct clk_onecell_data clk_data;
@@ -199,6 +156,65 @@ const struct tegra_clk_periph_regs *get_reg_bank(int clkid)
 	}
 }
 
+void tegra_clk_set_pllp_out_cpu(bool enable)
+{
+	u32 val;
+
+	val = readl_relaxed(clk_base + CLK_OUT_ENB_Y);
+	if (enable)
+		val |= CLK_ENB_PLLP_OUT_CPU;
+	else
+		val &= ~CLK_ENB_PLLP_OUT_CPU;
+
+	writel_relaxed(val, clk_base + CLK_OUT_ENB_Y);
+}
+
+void tegra_clk_periph_suspend(void)
+{
+	unsigned int i, idx;
+
+	idx = 0;
+	for (i = 0; i < periph_banks; i++, idx++)
+		periph_state_ctx[idx] =
+			readl_relaxed(clk_base + periph_regs[i].enb_reg);
+
+	for (i = 0; i < periph_banks; i++, idx++)
+		periph_state_ctx[idx] =
+			readl_relaxed(clk_base + periph_regs[i].rst_reg);
+}
+
+void tegra_clk_periph_resume(void)
+{
+	unsigned int i, idx;
+
+	idx = 0;
+	for (i = 0; i < periph_banks; i++, idx++)
+		writel_relaxed(periph_state_ctx[idx],
+			       clk_base + periph_regs[i].enb_reg);
+	/*
+	 * All non-boot peripherals will be in reset state on resume.
+	 * Wait for 5us of reset propagation delay before de-asserting
+	 * the peripherals based on the saved context.
+	 */
+	fence_udelay(5, clk_base);
+
+	for (i = 0; i < periph_banks; i++, idx++)
+		writel_relaxed(periph_state_ctx[idx],
+			       clk_base + periph_regs[i].rst_reg);
+
+	fence_udelay(2, clk_base);
+}
+
+static int tegra_clk_periph_ctx_init(int banks)
+{
+	periph_state_ctx = kcalloc(2 * banks, sizeof(*periph_state_ctx),
+				   GFP_KERNEL);
+	if (!periph_state_ctx)
+		return -ENOMEM;
+
+	return 0;
+}
+
 struct clk ** __init tegra_clk_init(void __iomem *regs, int num, int banks)
 {
 	clk_base = regs;
@@ -215,11 +231,21 @@ struct clk ** __init tegra_clk_init(void __iomem *regs, int num, int banks)
 	periph_banks = banks;
 
 	clks = kcalloc(num, sizeof(struct clk *), GFP_KERNEL);
-	if (!clks)
+	if (!clks) {
 		kfree(periph_clk_enb_refcnt);
+		return NULL;
+	}
 
 	clk_num = num;
 
+	if (IS_ENABLED(CONFIG_PM_SLEEP)) {
+		if (tegra_clk_periph_ctx_init(banks)) {
+			kfree(periph_clk_enb_refcnt);
+			kfree(clks);
+			return NULL;
+		}
+	}
+
 	return clks;
 }
 
diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h
index 905bf1096558..416a6b09f6a3 100644
--- a/drivers/clk/tegra/clk.h
+++ b/drivers/clk/tegra/clk.h
@@ -10,6 +10,65 @@
 #include <linux/clkdev.h>
 #include <linux/delay.h>
 
+#define CLK_OUT_ENB_L			0x010
+#define CLK_OUT_ENB_H			0x014
+#define CLK_OUT_ENB_U			0x018
+#define CLK_OUT_ENB_V			0x360
+#define CLK_OUT_ENB_W			0x364
+#define CLK_OUT_ENB_X			0x280
+#define CLK_OUT_ENB_Y			0x298
+#define CLK_ENB_PLLP_OUT_CPU		BIT(31)
+#define CLK_OUT_ENB_SET_L		0x320
+#define CLK_OUT_ENB_CLR_L		0x324
+#define CLK_OUT_ENB_SET_H		0x328
+#define CLK_OUT_ENB_CLR_H		0x32c
+#define CLK_OUT_ENB_SET_U		0x330
+#define CLK_OUT_ENB_CLR_U		0x334
+#define CLK_OUT_ENB_SET_V		0x440
+#define CLK_OUT_ENB_CLR_V		0x444
+#define CLK_OUT_ENB_SET_W		0x448
+#define CLK_OUT_ENB_CLR_W		0x44c
+#define CLK_OUT_ENB_SET_X		0x284
+#define CLK_OUT_ENB_CLR_X		0x288
+#define CLK_OUT_ENB_SET_Y		0x29c
+#define CLK_OUT_ENB_CLR_Y		0x2a0
+
+#define RST_DEVICES_L			0x004
+#define RST_DEVICES_H			0x008
+#define RST_DEVICES_U			0x00C
+#define RST_DEVICES_V			0x358
+#define RST_DEVICES_W			0x35C
+#define RST_DEVICES_X			0x28C
+#define RST_DEVICES_Y			0x2a4
+#define RST_DEVICES_SET_L		0x300
+#define RST_DEVICES_CLR_L		0x304
+#define RST_DEVICES_SET_H		0x308
+#define RST_DEVICES_CLR_H		0x30c
+#define RST_DEVICES_SET_U		0x310
+#define RST_DEVICES_CLR_U		0x314
+#define RST_DEVICES_SET_V		0x430
+#define RST_DEVICES_CLR_V		0x434
+#define RST_DEVICES_SET_W		0x438
+#define RST_DEVICES_CLR_W		0x43c
+#define RST_DEVICES_SET_X		0x290
+#define RST_DEVICES_CLR_X		0x294
+#define RST_DEVICES_SET_Y		0x2a8
+#define RST_DEVICES_CLR_Y		0x2ac
+
+/*
+ * Tegra CLK_OUT_ENB registers have some undefined bits which are not used and
+ * any accidental write of 1 to these bits can cause PSLVERR.
+ * So below are the valid mask defines for each CLK_OUT_ENB register used to
+ * turn ON only the valid clocks.
+ */
+#define TEGRA210_CLK_ENB_VLD_MSK_L	0xdcd7dff9
+#define TEGRA210_CLK_ENB_VLD_MSK_H	0x87d1f3e7
+#define TEGRA210_CLK_ENB_VLD_MSK_U	0xf3fed3fa
+#define TEGRA210_CLK_ENB_VLD_MSK_V	0xffc18cfb
+#define TEGRA210_CLK_ENB_VLD_MSK_W	0x793fb7ff
+#define TEGRA210_CLK_ENB_VLD_MSK_X	0x3fe66fff
+#define TEGRA210_CLK_ENB_VLD_MSK_Y	0xfc1fc7ff
+
 /**
  * struct tegra_clk_sync_source - external clock source from codec
  *
@@ -669,6 +728,9 @@ struct clk *tegra_clk_register_periph_data(void __iomem *clk_base,
  * Flags:
  * TEGRA_DIVIDER_2 - LP cluster has additional divider. This flag indicates
  *     that this is LP cluster clock.
+ * TEGRA210_CPU_CLK - This flag is used to identify CPU cluster for gen5
+ * super mux parent using PLLP branches. To use PLLP branches to CPU, need
+ * to configure additional bit PLLP_OUT_CPU in the clock registers.
  */
 struct tegra_clk_super_mux {
 	struct clk_hw	hw;
@@ -685,6 +747,7 @@ struct tegra_clk_super_mux {
 #define to_clk_super_mux(_hw) container_of(_hw, struct tegra_clk_super_mux, hw)
 
 #define TEGRA_DIVIDER_2 BIT(0)
+#define TEGRA210_CPU_CLK BIT(1)
 
 extern const struct clk_ops tegra_clk_super_ops;
 struct clk *tegra_clk_register_super_mux(const char *name,
@@ -829,6 +892,10 @@ u16 tegra_pll_get_fixed_mdiv(struct clk_hw *hw, unsigned long input_rate);
 int tegra_pll_p_div_to_hw(struct tegra_clk_pll *pll, u8 p_div);
 int div_frac_get(unsigned long rate, unsigned parent_rate, u8 width,
 		 u8 frac_width, u8 flags);
+void tegra_clk_osc_resume(void __iomem *clk_base);
+void tegra_clk_set_pllp_out_cpu(bool enable);
+void tegra_clk_periph_suspend(void);
+void tegra_clk_periph_resume(void);
 
 
 /* Combined read fence with delay */
@@ -838,4 +905,7 @@ int div_frac_get(unsigned long rate, unsigned parent_rate, u8 width,
 		udelay(delay);		\
 	} while (0)
 
+bool tegra20_clk_emc_driver_available(struct clk_hw *emc_hw);
+struct clk *tegra20_clk_register_emc(void __iomem *ioaddr, bool low_jitter);
+
 #endif /* TEGRA_CLK_H */
diff --git a/drivers/clk/ti/adpll.c b/drivers/clk/ti/adpll.c
index fdfb90058504..bb2f2836dab2 100644
--- a/drivers/clk/ti/adpll.c
+++ b/drivers/clk/ti/adpll.c
@@ -194,15 +194,8 @@ static const char *ti_adpll_clk_get_name(struct ti_adpll_data *d,
 		if (err)
 			return NULL;
 	} else {
-		const char *base_name = "adpll";
-		char *buf;
-
-		buf = devm_kzalloc(d->dev, 8 + 1 + strlen(base_name) + 1 +
-				    strlen(postfix), GFP_KERNEL);
-		if (!buf)
-			return NULL;
-		sprintf(buf, "%08lx.%s.%s", d->pa, base_name, postfix);
-		name = buf;
+		name = devm_kasprintf(d->dev, GFP_KERNEL, "%08lx.adpll.%s",
+				      d->pa, postfix);
 	}
 
 	return name;
diff --git a/drivers/clk/ti/clk-33xx.c b/drivers/clk/ti/clk-33xx.c
index a360d3109555..e001b9bcb6bf 100644
--- a/drivers/clk/ti/clk-33xx.c
+++ b/drivers/clk/ti/clk-33xx.c
@@ -107,7 +107,7 @@ static const struct omap_clkctrl_reg_data am3_l4hs_clkctrl_regs[] __initconst =
 };
 
 static const struct omap_clkctrl_reg_data am3_pruss_ocp_clkctrl_regs[] __initconst = {
-	{ AM3_PRUSS_OCP_PRUSS_CLKCTRL, NULL, CLKF_SW_SUP, "pruss_ocp_gclk" },
+	{ AM3_PRUSS_OCP_PRUSS_CLKCTRL, NULL, CLKF_SW_SUP | CLKF_NO_IDLEST, "pruss_ocp_gclk" },
 	{ 0 },
 };
 
@@ -217,7 +217,7 @@ static const struct omap_clkctrl_reg_data am3_l4_rtc_clkctrl_regs[] __initconst
 };
 
 static const struct omap_clkctrl_reg_data am3_gfx_l3_clkctrl_regs[] __initconst = {
-	{ AM3_GFX_L3_GFX_CLKCTRL, NULL, CLKF_SW_SUP, "gfx_fck_div_ck" },
+	{ AM3_GFX_L3_GFX_CLKCTRL, NULL, CLKF_SW_SUP | CLKF_NO_IDLEST, "gfx_fck_div_ck" },
 	{ 0 },
 };
 
diff --git a/drivers/clk/ti/clk-43xx.c b/drivers/clk/ti/clk-43xx.c
index 2782d91838ac..af3e7805769e 100644
--- a/drivers/clk/ti/clk-43xx.c
+++ b/drivers/clk/ti/clk-43xx.c
@@ -73,7 +73,7 @@ static const struct omap_clkctrl_reg_data am4_mpu_clkctrl_regs[] __initconst = {
 };
 
 static const struct omap_clkctrl_reg_data am4_gfx_l3_clkctrl_regs[] __initconst = {
-	{ AM4_GFX_L3_GFX_CLKCTRL, NULL, CLKF_SW_SUP, "gfx_fck_div_ck" },
+	{ AM4_GFX_L3_GFX_CLKCTRL, NULL, CLKF_SW_SUP | CLKF_NO_IDLEST, "gfx_fck_div_ck" },
 	{ 0 },
 };
 
@@ -126,7 +126,7 @@ static const struct omap_clkctrl_reg_data am4_l3s_clkctrl_regs[] __initconst = {
 };
 
 static const struct omap_clkctrl_reg_data am4_pruss_ocp_clkctrl_regs[] __initconst = {
-	{ AM4_PRUSS_OCP_PRUSS_CLKCTRL, NULL, CLKF_SW_SUP, "pruss_ocp_gclk" },
+	{ AM4_PRUSS_OCP_PRUSS_CLKCTRL, NULL, CLKF_SW_SUP | CLKF_NO_IDLEST, "pruss_ocp_gclk" },
 	{ 0 },
 };
 
diff --git a/drivers/clk/ti/clk-44xx.c b/drivers/clk/ti/clk-44xx.c
index b10ed0429091..2b4dab632318 100644
--- a/drivers/clk/ti/clk-44xx.c
+++ b/drivers/clk/ti/clk-44xx.c
@@ -37,7 +37,7 @@ static const struct omap_clkctrl_reg_data omap4_mpuss_clkctrl_regs[] __initconst
 };
 
 static const struct omap_clkctrl_reg_data omap4_tesla_clkctrl_regs[] __initconst = {
-	{ OMAP4_DSP_CLKCTRL, NULL, CLKF_HW_SUP, "dpll_iva_m4x2_ck" },
+	{ OMAP4_DSP_CLKCTRL, NULL, CLKF_HW_SUP | CLKF_NO_IDLEST, "dpll_iva_m4x2_ck" },
 	{ 0 },
 };
 
@@ -219,7 +219,7 @@ static const struct omap_clkctrl_reg_data omap4_l3_2_clkctrl_regs[] __initconst
 };
 
 static const struct omap_clkctrl_reg_data omap4_ducati_clkctrl_regs[] __initconst = {
-	{ OMAP4_IPU_CLKCTRL, NULL, CLKF_HW_SUP, "ducati_clk_mux_ck" },
+	{ OMAP4_IPU_CLKCTRL, NULL, CLKF_HW_SUP | CLKF_NO_IDLEST, "ducati_clk_mux_ck" },
 	{ 0 },
 };
 
diff --git a/drivers/clk/ti/clk-54xx.c b/drivers/clk/ti/clk-54xx.c
index e675e27f1203..c9608e5d993a 100644
--- a/drivers/clk/ti/clk-54xx.c
+++ b/drivers/clk/ti/clk-54xx.c
@@ -31,7 +31,7 @@ static const struct omap_clkctrl_reg_data omap5_mpu_clkctrl_regs[] __initconst =
 };
 
 static const struct omap_clkctrl_reg_data omap5_dsp_clkctrl_regs[] __initconst = {
-	{ OMAP5_MMU_DSP_CLKCTRL, NULL, CLKF_HW_SUP, "dpll_iva_h11x2_ck" },
+	{ OMAP5_MMU_DSP_CLKCTRL, NULL, CLKF_HW_SUP | CLKF_NO_IDLEST, "dpll_iva_h11x2_ck" },
 	{ 0 },
 };
 
@@ -145,7 +145,7 @@ static const struct omap_clkctrl_reg_data omap5_l3main2_clkctrl_regs[] __initcon
 };
 
 static const struct omap_clkctrl_reg_data omap5_ipu_clkctrl_regs[] __initconst = {
-	{ OMAP5_MMU_IPU_CLKCTRL, NULL, CLKF_HW_SUP, "dpll_core_h22x2_ck" },
+	{ OMAP5_MMU_IPU_CLKCTRL, NULL, CLKF_HW_SUP | CLKF_NO_IDLEST, "dpll_core_h22x2_ck" },
 	{ 0 },
 };
 
@@ -286,6 +286,12 @@ static const struct omap_clkctrl_reg_data omap5_l4per_clkctrl_regs[] __initconst
 	{ 0 },
 };
 
+static const struct omap_clkctrl_reg_data omap5_iva_clkctrl_regs[] __initconst = {
+	{ OMAP5_IVA_CLKCTRL, NULL, CLKF_HW_SUP, "dpll_iva_h12x2_ck" },
+	{ OMAP5_SL2IF_CLKCTRL, NULL, CLKF_HW_SUP, "dpll_iva_h12x2_ck" },
+	{ 0 },
+};
+
 static const char * const omap5_dss_dss_clk_parents[] __initconst = {
 	"dpll_per_h12x2_ck",
 	NULL,
@@ -502,6 +508,7 @@ const struct omap_clkctrl_data omap5_clkctrl_data[] __initconst = {
 	{ 0x4a008d20, omap5_l4cfg_clkctrl_regs },
 	{ 0x4a008e20, omap5_l3instr_clkctrl_regs },
 	{ 0x4a009020, omap5_l4per_clkctrl_regs },
+	{ 0x4a009220, omap5_iva_clkctrl_regs },
 	{ 0x4a009420, omap5_dss_clkctrl_regs },
 	{ 0x4a009520, omap5_gpu_clkctrl_regs },
 	{ 0x4a009620, omap5_l3init_clkctrl_regs },
diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c
index 9dd6185a4b4e..5f46782cebeb 100644
--- a/drivers/clk/ti/clk-7xx.c
+++ b/drivers/clk/ti/clk-7xx.c
@@ -25,7 +25,7 @@ static const struct omap_clkctrl_reg_data dra7_mpu_clkctrl_regs[] __initconst =
 };
 
 static const struct omap_clkctrl_reg_data dra7_dsp1_clkctrl_regs[] __initconst = {
-	{ DRA7_DSP1_MMU0_DSP1_CLKCTRL, NULL, CLKF_HW_SUP, "dpll_dsp_m2_ck" },
+	{ DRA7_DSP1_MMU0_DSP1_CLKCTRL, NULL, CLKF_HW_SUP | CLKF_NO_IDLEST, "dpll_dsp_m2_ck" },
 	{ 0 },
 };
 
@@ -41,7 +41,7 @@ static const struct omap_clkctrl_bit_data dra7_mmu_ipu1_bit_data[] __initconst =
 };
 
 static const struct omap_clkctrl_reg_data dra7_ipu1_clkctrl_regs[] __initconst = {
-	{ DRA7_IPU1_MMU_IPU1_CLKCTRL, dra7_mmu_ipu1_bit_data, CLKF_HW_SUP, "ipu1-clkctrl:0000:24" },
+	{ DRA7_IPU1_MMU_IPU1_CLKCTRL, dra7_mmu_ipu1_bit_data, CLKF_HW_SUP | CLKF_NO_IDLEST, "ipu1-clkctrl:0000:24" },
 	{ 0 },
 };
 
@@ -137,7 +137,7 @@ static const struct omap_clkctrl_reg_data dra7_ipu_clkctrl_regs[] __initconst =
 };
 
 static const struct omap_clkctrl_reg_data dra7_dsp2_clkctrl_regs[] __initconst = {
-	{ DRA7_DSP2_MMU0_DSP2_CLKCTRL, NULL, CLKF_HW_SUP, "dpll_dsp_m2_ck" },
+	{ DRA7_DSP2_MMU0_DSP2_CLKCTRL, NULL, CLKF_HW_SUP | CLKF_NO_IDLEST, "dpll_dsp_m2_ck" },
 	{ 0 },
 };
 
@@ -164,7 +164,7 @@ static const struct omap_clkctrl_reg_data dra7_l3main1_clkctrl_regs[] __initcons
 };
 
 static const struct omap_clkctrl_reg_data dra7_ipu2_clkctrl_regs[] __initconst = {
-	{ DRA7_IPU2_MMU_IPU2_CLKCTRL, NULL, CLKF_HW_SUP, "dpll_core_h22x2_ck" },
+	{ DRA7_IPU2_MMU_IPU2_CLKCTRL, NULL, CLKF_HW_SUP | CLKF_NO_IDLEST, "dpll_core_h22x2_ck" },
 	{ 0 },
 };
 
diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c
index f65e16c4f3c4..8d4c08b034bd 100644
--- a/drivers/clk/ti/clk-dra7-atl.c
+++ b/drivers/clk/ti/clk-dra7-atl.c
@@ -233,7 +233,6 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev)
 	cinfo->iobase = of_iomap(node, 0);
 	cinfo->dev = &pdev->dev;
 	pm_runtime_enable(cinfo->dev);
-	pm_runtime_irq_safe(cinfo->dev);
 
 	pm_runtime_get_sync(cinfo->dev);
 	atl_write(cinfo, DRA7_ATL_PCLKMUX_REG(0), DRA7_ATL_PCLKMUX);
diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c
index b0c0690a5a12..17b9a761242f 100644
--- a/drivers/clk/ti/clkctrl.c
+++ b/drivers/clk/ti/clkctrl.c
@@ -24,7 +24,7 @@
 #include <linux/timekeeping.h>
 #include "clock.h"
 
-#define NO_IDLEST			0x1
+#define NO_IDLEST			0
 
 #define OMAP4_MODULEMODE_MASK		0x3
 
@@ -34,6 +34,9 @@
 #define OMAP4_IDLEST_MASK		(0x3 << 16)
 #define OMAP4_IDLEST_SHIFT		16
 
+#define OMAP4_STBYST_MASK		BIT(18)
+#define OMAP4_STBYST_SHIFT		18
+
 #define CLKCTRL_IDLEST_FUNCTIONAL	0x0
 #define CLKCTRL_IDLEST_INTERFACE_IDLE	0x2
 #define CLKCTRL_IDLEST_DISABLED		0x3
@@ -159,7 +162,7 @@ static int _omap4_clkctrl_clk_enable(struct clk_hw *hw)
 
 	ti_clk_ll_ops->clk_writel(val, &clk->enable_reg);
 
-	if (clk->flags & NO_IDLEST)
+	if (test_bit(NO_IDLEST, &clk->flags))
 		return 0;
 
 	/* Wait until module is enabled */
@@ -188,7 +191,7 @@ static void _omap4_clkctrl_clk_disable(struct clk_hw *hw)
 
 	ti_clk_ll_ops->clk_writel(val, &clk->enable_reg);
 
-	if (clk->flags & NO_IDLEST)
+	if (test_bit(NO_IDLEST, &clk->flags))
 		goto exit;
 
 	/* Wait until module is disabled */
@@ -381,7 +384,7 @@ _ti_clkctrl_setup_div(struct omap_clkctrl_provider *provider,
 
 	if (ti_clk_parse_divider_data((int *)div_data->dividers, 0,
 				      div_data->max_div, div_flags,
-				      &div->width, &div->table)) {
+				      div)) {
 		pr_err("%s: Data parsing for %pOF:%04x:%d failed\n", __func__,
 		       node, offset, data->bit);
 		kfree(div);
@@ -597,7 +600,7 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node)
 		if (reg_data->flags & CLKF_HW_SUP)
 			hw->enable_bit = MODULEMODE_HWCTRL;
 		if (reg_data->flags & CLKF_NO_IDLEST)
-			hw->flags |= NO_IDLEST;
+			set_bit(NO_IDLEST, &hw->flags);
 
 		if (reg_data->clkdm_name)
 			hw->clkdm_name = reg_data->clkdm_name;
@@ -623,7 +626,7 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node)
 		init.ops = &omap4_clkctrl_clk_ops;
 		hw->hw.init = &init;
 
-		clk = ti_clk_register(NULL, &hw->hw, init.name);
+		clk = ti_clk_register_omap_hw(NULL, &hw->hw, init.name);
 		if (IS_ERR_OR_NULL(clk))
 			goto cleanup;
 
@@ -648,3 +651,33 @@ cleanup:
 }
 CLK_OF_DECLARE(ti_omap4_clkctrl_clock, "ti,clkctrl",
 	       _ti_omap4_clkctrl_setup);
+
+/**
+ * ti_clk_is_in_standby - Check if clkctrl clock is in standby or not
+ * @clk: clock to check standby status for
+ *
+ * Finds whether the provided clock is in standby mode or not. Returns
+ * true if the provided clock is a clkctrl type clock and it is in standby,
+ * false otherwise.
+ */
+bool ti_clk_is_in_standby(struct clk *clk)
+{
+	struct clk_hw *hw;
+	struct clk_hw_omap *hwclk;
+	u32 val;
+
+	hw = __clk_get_hw(clk);
+
+	if (!omap2_clk_is_hw_omap(hw))
+		return false;
+
+	hwclk = to_clk_hw_omap(hw);
+
+	val = ti_clk_ll_ops->clk_readl(&hwclk->enable_reg);
+
+	if (val & OMAP4_STBYST_MASK)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(ti_clk_is_in_standby);
diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index e4b8392ff63c..e6995c04001e 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -20,9 +20,11 @@ struct clk_omap_divider {
 	struct clk_hw		hw;
 	struct clk_omap_reg	reg;
 	u8			shift;
-	u8			width;
 	u8			flags;
 	s8			latch;
+	u16			min;
+	u16			max;
+	u16			mask;
 	const struct clk_div_table	*table;
 	u32		context;
 };
@@ -220,8 +222,7 @@ void ti_clk_latch(struct clk_omap_reg *reg, s8 shift);
 struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup);
 
 int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div,
-			      u8 flags, u8 *width,
-			      const struct clk_div_table **table);
+			      u8 flags, struct clk_omap_divider *div);
 
 int ti_clk_get_reg_addr(struct device_node *node, int index,
 			struct clk_omap_reg *reg);
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index 6cb863c13648..28080df92f72 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -26,30 +26,6 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#define div_mask(d)	((1 << ((d)->width)) - 1)
-
-static unsigned int _get_table_maxdiv(const struct clk_div_table *table)
-{
-	unsigned int maxdiv = 0;
-	const struct clk_div_table *clkt;
-
-	for (clkt = table; clkt->div; clkt++)
-		if (clkt->div > maxdiv)
-			maxdiv = clkt->div;
-	return maxdiv;
-}
-
-static unsigned int _get_maxdiv(struct clk_omap_divider *divider)
-{
-	if (divider->flags & CLK_DIVIDER_ONE_BASED)
-		return div_mask(divider);
-	if (divider->flags & CLK_DIVIDER_POWER_OF_TWO)
-		return 1 << div_mask(divider);
-	if (divider->table)
-		return _get_table_maxdiv(divider->table);
-	return div_mask(divider) + 1;
-}
-
 static unsigned int _get_table_div(const struct clk_div_table *table,
 				   unsigned int val)
 {
@@ -61,6 +37,34 @@ static unsigned int _get_table_div(const struct clk_div_table *table,
 	return 0;
 }
 
+static void _setup_mask(struct clk_omap_divider *divider)
+{
+	u16 mask;
+	u32 max_val;
+	const struct clk_div_table *clkt;
+
+	if (divider->table) {
+		max_val = 0;
+
+		for (clkt = divider->table; clkt->div; clkt++)
+			if (clkt->val > max_val)
+				max_val = clkt->val;
+	} else {
+		max_val = divider->max;
+
+		if (!(divider->flags & CLK_DIVIDER_ONE_BASED) &&
+		    !(divider->flags & CLK_DIVIDER_POWER_OF_TWO))
+			max_val--;
+	}
+
+	if (divider->flags & CLK_DIVIDER_POWER_OF_TWO)
+		mask = fls(max_val) - 1;
+	else
+		mask = max_val;
+
+	divider->mask = (1 << fls(mask)) - 1;
+}
+
 static unsigned int _get_div(struct clk_omap_divider *divider, unsigned int val)
 {
 	if (divider->flags & CLK_DIVIDER_ONE_BASED)
@@ -101,7 +105,7 @@ static unsigned long ti_clk_divider_recalc_rate(struct clk_hw *hw,
 	unsigned int div, val;
 
 	val = ti_clk_ll_ops->clk_readl(&divider->reg) >> divider->shift;
-	val &= div_mask(divider);
+	val &= divider->mask;
 
 	div = _get_div(divider, val);
 	if (!div) {
@@ -180,7 +184,7 @@ static int ti_clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 	if (!rate)
 		rate = 1;
 
-	maxdiv = _get_maxdiv(divider);
+	maxdiv = divider->max;
 
 	if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) {
 		parent_rate = *best_parent_rate;
@@ -219,7 +223,7 @@ static int ti_clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 	}
 
 	if (!bestdiv) {
-		bestdiv = _get_maxdiv(divider);
+		bestdiv = divider->max;
 		*best_parent_rate =
 			clk_hw_round_rate(clk_hw_get_parent(hw), 1);
 	}
@@ -249,17 +253,16 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 	divider = to_clk_omap_divider(hw);
 
 	div = DIV_ROUND_UP(parent_rate, rate);
-	value = _get_val(divider, div);
 
-	if (value > div_mask(divider))
-		value = div_mask(divider);
+	if (div > divider->max)
+		div = divider->max;
+	if (div < divider->min)
+		div = divider->min;
 
-	if (divider->flags & CLK_DIVIDER_HIWORD_MASK) {
-		val = div_mask(divider) << (divider->shift + 16);
-	} else {
-		val = ti_clk_ll_ops->clk_readl(&divider->reg);
-		val &= ~(div_mask(divider) << divider->shift);
-	}
+	value = _get_val(divider, div);
+
+	val = ti_clk_ll_ops->clk_readl(&divider->reg);
+	val &= ~(divider->mask << divider->shift);
 	val |= value << divider->shift;
 	ti_clk_ll_ops->clk_writel(val, &divider->reg);
 
@@ -280,7 +283,7 @@ static int clk_divider_save_context(struct clk_hw *hw)
 	u32 val;
 
 	val = ti_clk_ll_ops->clk_readl(&divider->reg) >> divider->shift;
-	divider->context = val & div_mask(divider);
+	divider->context = val & divider->mask;
 
 	return 0;
 }
@@ -297,7 +300,7 @@ static void clk_divider_restore_context(struct clk_hw *hw)
 	u32 val;
 
 	val = ti_clk_ll_ops->clk_readl(&divider->reg);
-	val &= ~(div_mask(divider) << divider->shift);
+	val &= ~(divider->mask << divider->shift);
 	val |= divider->context << divider->shift;
 	ti_clk_ll_ops->clk_writel(val, &divider->reg);
 }
@@ -310,47 +313,26 @@ const struct clk_ops ti_clk_divider_ops = {
 	.restore_context = clk_divider_restore_context,
 };
 
-static struct clk *_register_divider(struct device *dev, const char *name,
-				     const char *parent_name,
-				     unsigned long flags,
-				     struct clk_omap_reg *reg,
-				     u8 shift, u8 width, s8 latch,
-				     u8 clk_divider_flags,
-				     const struct clk_div_table *table)
+static struct clk *_register_divider(struct device_node *node,
+				     u32 flags,
+				     struct clk_omap_divider *div)
 {
-	struct clk_omap_divider *div;
 	struct clk *clk;
 	struct clk_init_data init;
+	const char *parent_name;
 
-	if (clk_divider_flags & CLK_DIVIDER_HIWORD_MASK) {
-		if (width + shift > 16) {
-			pr_warn("divider value exceeds LOWORD field\n");
-			return ERR_PTR(-EINVAL);
-		}
-	}
-
-	/* allocate the divider */
-	div = kzalloc(sizeof(*div), GFP_KERNEL);
-	if (!div)
-		return ERR_PTR(-ENOMEM);
+	parent_name = of_clk_get_parent_name(node, 0);
 
-	init.name = name;
+	init.name = node->name;
 	init.ops = &ti_clk_divider_ops;
 	init.flags = flags;
 	init.parent_names = (parent_name ? &parent_name : NULL);
 	init.num_parents = (parent_name ? 1 : 0);
 
-	/* struct clk_divider assignments */
-	memcpy(&div->reg, reg, sizeof(*reg));
-	div->shift = shift;
-	div->width = width;
-	div->latch = latch;
-	div->flags = clk_divider_flags;
 	div->hw.init = &init;
-	div->table = table;
 
 	/* register the clock */
-	clk = ti_clk_register(dev, &div->hw, name);
+	clk = ti_clk_register(NULL, &div->hw, node->name);
 
 	if (IS_ERR(clk))
 		kfree(div);
@@ -359,34 +341,17 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 }
 
 int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div,
-			      u8 flags, u8 *width,
-			      const struct clk_div_table **table)
+			      u8 flags, struct clk_omap_divider *divider)
 {
 	int valid_div = 0;
-	u32 val;
-	int div;
 	int i;
 	struct clk_div_table *tmp;
+	u16 min_div = 0;
 
 	if (!div_table) {
-		if (flags & CLKF_INDEX_STARTS_AT_ONE)
-			val = 1;
-		else
-			val = 0;
-
-		div = 1;
-
-		while (div < max_div) {
-			if (flags & CLKF_INDEX_POWER_OF_TWO)
-				div <<= 1;
-			else
-				div++;
-			val++;
-		}
-
-		*width = fls(val);
-		*table = NULL;
-
+		divider->min = 1;
+		divider->max = max_div;
+		_setup_mask(divider);
 		return 0;
 	}
 
@@ -403,30 +368,32 @@ int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div,
 	num_dividers = i;
 
 	tmp = kcalloc(valid_div + 1, sizeof(*tmp), GFP_KERNEL);
-	if (!tmp) {
-		*table = ERR_PTR(-ENOMEM);
+	if (!tmp)
 		return -ENOMEM;
-	}
 
 	valid_div = 0;
-	*width = 0;
 
 	for (i = 0; i < num_dividers; i++)
 		if (div_table[i] > 0) {
 			tmp[valid_div].div = div_table[i];
 			tmp[valid_div].val = i;
 			valid_div++;
-			*width = i;
+			if (div_table[i] > max_div)
+				max_div = div_table[i];
+			if (!min_div || div_table[i] < min_div)
+				min_div = div_table[i];
 		}
 
-	*width = fls(*width);
-	*table = tmp;
+	divider->min = min_div;
+	divider->max = max_div;
+	divider->table = tmp;
+	_setup_mask(divider);
 
 	return 0;
 }
 
-static struct clk_div_table *
-__init ti_clk_get_div_table(struct device_node *node)
+static int __init ti_clk_get_div_table(struct device_node *node,
+				       struct clk_omap_divider *div)
 {
 	struct clk_div_table *table;
 	const __be32 *divspec;
@@ -438,7 +405,7 @@ __init ti_clk_get_div_table(struct device_node *node)
 	divspec = of_get_property(node, "ti,dividers", &num_div);
 
 	if (!divspec)
-		return NULL;
+		return 0;
 
 	num_div /= 4;
 
@@ -453,13 +420,12 @@ __init ti_clk_get_div_table(struct device_node *node)
 
 	if (!valid_div) {
 		pr_err("no valid dividers for %pOFn table\n", node);
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	table = kcalloc(valid_div + 1, sizeof(*table), GFP_KERNEL);
-
 	if (!table)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	valid_div = 0;
 
@@ -472,19 +438,20 @@ __init ti_clk_get_div_table(struct device_node *node)
 		}
 	}
 
-	return table;
+	div->table = table;
+
+	return 0;
 }
 
-static int _get_divider_width(struct device_node *node,
-			      const struct clk_div_table *table,
-			      u8 flags)
+static int _populate_divider_min_max(struct device_node *node,
+				     struct clk_omap_divider *divider)
 {
-	u32 min_div;
-	u32 max_div;
-	u32 val = 0;
-	u32 div;
+	u32 min_div = 0;
+	u32 max_div = 0;
+	u32 val;
+	const struct clk_div_table *clkt;
 
-	if (!table) {
+	if (!divider->table) {
 		/* Clk divider table not provided, determine min/max divs */
 		if (of_property_read_u32(node, "ti,min-div", &min_div))
 			min_div = 1;
@@ -493,75 +460,62 @@ static int _get_divider_width(struct device_node *node,
 			pr_err("no max-div for %pOFn!\n", node);
 			return -EINVAL;
 		}
-
-		/* Determine bit width for the field */
-		if (flags & CLK_DIVIDER_ONE_BASED)
-			val = 1;
-
-		div = min_div;
-
-		while (div < max_div) {
-			if (flags & CLK_DIVIDER_POWER_OF_TWO)
-				div <<= 1;
-			else
-				div++;
-			val++;
-		}
 	} else {
-		div = 0;
 
-		while (table[div].div) {
-			val = table[div].val;
-			div++;
+		for (clkt = divider->table; clkt->div; clkt++) {
+			val = clkt->div;
+			if (val > max_div)
+				max_div = val;
+			if (!min_div || val < min_div)
+				min_div = val;
 		}
 	}
 
-	return fls(val);
+	divider->min = min_div;
+	divider->max = max_div;
+	_setup_mask(divider);
+
+	return 0;
 }
 
 static int __init ti_clk_divider_populate(struct device_node *node,
-	struct clk_omap_reg *reg, const struct clk_div_table **table,
-	u32 *flags, u8 *div_flags, u8 *width, u8 *shift, s8 *latch)
+					  struct clk_omap_divider *div,
+					  u32 *flags)
 {
 	u32 val;
 	int ret;
 
-	ret = ti_clk_get_reg_addr(node, 0, reg);
+	ret = ti_clk_get_reg_addr(node, 0, &div->reg);
 	if (ret)
 		return ret;
 
 	if (!of_property_read_u32(node, "ti,bit-shift", &val))
-		*shift = val;
+		div->shift = val;
 	else
-		*shift = 0;
+		div->shift = 0;
 
-	if (latch) {
-		if (!of_property_read_u32(node, "ti,latch-bit", &val))
-			*latch = val;
-		else
-			*latch = -EINVAL;
-	}
+	if (!of_property_read_u32(node, "ti,latch-bit", &val))
+		div->latch = val;
+	else
+		div->latch = -EINVAL;
 
 	*flags = 0;
-	*div_flags = 0;
+	div->flags = 0;
 
 	if (of_property_read_bool(node, "ti,index-starts-at-one"))
-		*div_flags |= CLK_DIVIDER_ONE_BASED;
+		div->flags |= CLK_DIVIDER_ONE_BASED;
 
 	if (of_property_read_bool(node, "ti,index-power-of-two"))
-		*div_flags |= CLK_DIVIDER_POWER_OF_TWO;
+		div->flags |= CLK_DIVIDER_POWER_OF_TWO;
 
 	if (of_property_read_bool(node, "ti,set-rate-parent"))
 		*flags |= CLK_SET_RATE_PARENT;
 
-	*table = ti_clk_get_div_table(node);
-
-	if (IS_ERR(*table))
-		return PTR_ERR(*table);
-
-	*width = _get_divider_width(node, *table, *div_flags);
+	ret = ti_clk_get_div_table(node, div);
+	if (ret)
+		return ret;
 
-	return 0;
+	return _populate_divider_min_max(node, div);
 }
 
 /**
@@ -573,24 +527,17 @@ static int __init ti_clk_divider_populate(struct device_node *node,
 static void __init of_ti_divider_clk_setup(struct device_node *node)
 {
 	struct clk *clk;
-	const char *parent_name;
-	struct clk_omap_reg reg;
-	u8 clk_divider_flags = 0;
-	u8 width = 0;
-	u8 shift = 0;
-	s8 latch = -EINVAL;
-	const struct clk_div_table *table = NULL;
 	u32 flags = 0;
+	struct clk_omap_divider *div;
 
-	parent_name = of_clk_get_parent_name(node, 0);
+	div = kzalloc(sizeof(*div), GFP_KERNEL);
+	if (!div)
+		return;
 
-	if (ti_clk_divider_populate(node, &reg, &table, &flags,
-				    &clk_divider_flags, &width, &shift, &latch))
+	if (ti_clk_divider_populate(node, div, &flags))
 		goto cleanup;
 
-	clk = _register_divider(NULL, node->name, parent_name, flags, &reg,
-				shift, width, latch, clk_divider_flags, table);
-
+	clk = _register_divider(node, flags, div);
 	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 		of_ti_clk_autoidle_setup(node);
@@ -598,22 +545,21 @@ static void __init of_ti_divider_clk_setup(struct device_node *node)
 	}
 
 cleanup:
-	kfree(table);
+	kfree(div->table);
+	kfree(div);
 }
 CLK_OF_DECLARE(divider_clk, "ti,divider-clock", of_ti_divider_clk_setup);
 
 static void __init of_ti_composite_divider_clk_setup(struct device_node *node)
 {
 	struct clk_omap_divider *div;
-	u32 val;
+	u32 tmp;
 
 	div = kzalloc(sizeof(*div), GFP_KERNEL);
 	if (!div)
 		return;
 
-	if (ti_clk_divider_populate(node, &div->reg, &div->table, &val,
-				    &div->flags, &div->width, &div->shift,
-				    NULL) < 0)
+	if (ti_clk_divider_populate(node, div, &tmp))
 		goto cleanup;
 
 	if (!ti_clk_add_component(node, &div->hw, CLK_COMPONENT_TYPE_DIVIDER))
diff --git a/drivers/clk/uniphier/clk-uniphier-core.c b/drivers/clk/uniphier/clk-uniphier-core.c
index c6aaca73cf86..12380236d7ab 100644
--- a/drivers/clk/uniphier/clk-uniphier-core.c
+++ b/drivers/clk/uniphier/clk-uniphier-core.c
@@ -64,8 +64,7 @@ static int uniphier_clk_probe(struct platform_device *pdev)
 	for (p = data; p->name; p++)
 		clk_num = max(clk_num, p->idx + 1);
 
-	hw_data = devm_kzalloc(dev,
-			sizeof(*hw_data) + clk_num * sizeof(struct clk_hw *),
+	hw_data = devm_kzalloc(dev, struct_size(hw_data, hws, clk_num),
 			GFP_KERNEL);
 	if (!hw_data)
 		return -ENOMEM;
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index f35a53ce8988..cc909e465823 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -88,7 +88,7 @@ config ROCKCHIP_TIMER
 	select TIMER_OF
 	select CLKSRC_MMIO
 	help
-	  Enables the support for the rockchip timer driver.
+	  Enables the support for the Rockchip timer driver.
 
 config ARMADA_370_XP_TIMER
 	bool "Armada 370 and XP timer driver" if COMPILE_TEST
@@ -162,13 +162,13 @@ config NPCM7XX_TIMER
 	select CLKSRC_MMIO
 	help
 	  Enable 24-bit TIMER0 and TIMER1 counters in the NPCM7xx architecture,
-	  While TIMER0 serves as clockevent and TIMER1 serves as clocksource.
+	  where TIMER0 serves as clockevent and TIMER1 serves as clocksource.
 
 config CADENCE_TTC_TIMER
 	bool "Cadence TTC timer driver" if COMPILE_TEST
 	depends on COMMON_CLK
 	help
-	  Enables support for the cadence ttc driver.
+	  Enables support for the Cadence TTC driver.
 
 config ASM9260_TIMER
 	bool "ASM9260 timer driver" if COMPILE_TEST
@@ -190,10 +190,10 @@ config CLKSRC_DBX500_PRCMU
 	bool "Clocksource PRCMU Timer" if COMPILE_TEST
 	depends on HAS_IOMEM
 	help
-	  Use the always on PRCMU Timer as clocksource
+	  Use the always on PRCMU Timer as clocksource.
 
 config CLPS711X_TIMER
-	bool "Cirrus logic timer driver" if COMPILE_TEST
+	bool "Cirrus Logic timer driver" if COMPILE_TEST
 	select CLKSRC_MMIO
 	help
 	  Enables support for the Cirrus Logic PS711 timer.
@@ -205,11 +205,11 @@ config ATLAS7_TIMER
 	  Enables support for the Atlas7 timer.
 
 config MXS_TIMER
-	bool "Mxs timer driver" if COMPILE_TEST
+	bool "MXS timer driver" if COMPILE_TEST
 	select CLKSRC_MMIO
 	select STMP_DEVICE
 	help
-	  Enables support for the Mxs timer.
+	  Enables support for the MXS timer.
 
 config PRIMA2_TIMER
 	bool "Prima2 timer driver" if COMPILE_TEST
@@ -238,10 +238,10 @@ config KEYSTONE_TIMER
 	  Enables support for the Keystone timer.
 
 config INTEGRATOR_AP_TIMER
-	bool "Integrator-ap timer driver" if COMPILE_TEST
+	bool "Integrator-AP timer driver" if COMPILE_TEST
 	select CLKSRC_MMIO
 	help
-	  Enables support for the Integrator-ap timer.
+	  Enables support for the Integrator-AP timer.
 
 config CLKSRC_EFM32
 	bool "Clocksource for Energy Micro's EFM32 SoCs" if !ARCH_EFM32
@@ -283,8 +283,8 @@ config CLKSRC_NPS
 	select TIMER_OF if OF
 	help
 	  NPS400 clocksource support.
-	  Got 64 bit counter with update rate up to 1000MHz.
-	  This counter is accessed via couple of 32 bit memory mapped registers.
+	  It has a 64-bit counter with update rate up to 1000MHz.
+	  This counter is accessed via couple of 32-bit memory-mapped registers.
 
 config CLKSRC_STM32
 	bool "Clocksource for STM32 SoCs" if !ARCH_STM32
@@ -305,14 +305,14 @@ config ARC_TIMERS
 	help
 	  These are legacy 32-bit TIMER0 and TIMER1 counters found on all ARC cores
 	  (ARC700 as well as ARC HS38).
-	  TIMER0 serves as clockevent while TIMER1 provides clocksource
+	  TIMER0 serves as clockevent while TIMER1 provides clocksource.
 
 config ARC_TIMERS_64BIT
 	bool "Support for 64-bit counters in ARC HS38 cores" if COMPILE_TEST
 	depends on ARC_TIMERS
 	select TIMER_OF
 	help
-	  This enables 2 different 64-bit timers: RTC (for UP) and GFRC (for SMP)
+	  This enables 2 different 64-bit timers: RTC (for UP) and GFRC (for SMP).
 	  RTC is implemented inside the core, while GFRC sits outside the core in
 	  ARConnect IP block. Driver automatically picks one of them for clocksource
 	  as appropriate.
@@ -390,7 +390,7 @@ config ARM_GLOBAL_TIMER
 	select TIMER_OF if OF
 	depends on ARM
 	help
-	  This options enables support for the ARM global timer unit
+	  This option enables support for the ARM global timer unit.
 
 config ARM_TIMER_SP804
 	bool "Support for Dual Timer SP804 module" if COMPILE_TEST
@@ -403,14 +403,14 @@ config CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
 	depends on ARM_GLOBAL_TIMER
 	default y
 	help
-	 Use ARM global timer clock source as sched_clock
+	  Use ARM global timer clock source as sched_clock.
 
 config ARMV7M_SYSTICK
 	bool "Support for the ARMv7M system time" if COMPILE_TEST
 	select TIMER_OF if OF
 	select CLKSRC_MMIO
 	help
-	  This options enables support for the ARMv7M system timer unit
+	  This option enables support for the ARMv7M system timer unit.
 
 config ATMEL_PIT
 	bool "Atmel PIT support" if COMPILE_TEST
@@ -460,7 +460,7 @@ config VF_PIT_TIMER
 	bool
 	select CLKSRC_MMIO
 	help
-	  Support for Period Interrupt Timer on Freescale Vybrid Family SoCs.
+	  Support for Periodic Interrupt Timer on Freescale Vybrid Family SoCs.
 
 config OXNAS_RPS_TIMER
 	bool "Oxford Semiconductor OXNAS RPS Timers driver" if COMPILE_TEST
@@ -470,7 +470,7 @@ config OXNAS_RPS_TIMER
 	  This enables support for the Oxford Semiconductor OXNAS RPS timers.
 
 config SYS_SUPPORTS_SH_CMT
-        bool
+	bool
 
 config MTK_TIMER
 	bool "Mediatek timer driver" if COMPILE_TEST
@@ -490,13 +490,13 @@ config SPRD_TIMER
 	  Enables support for the Spreadtrum timer driver.
 
 config SYS_SUPPORTS_SH_MTU2
-        bool
+	bool
 
 config SYS_SUPPORTS_SH_TMU
-        bool
+	bool
 
 config SYS_SUPPORTS_EM_STI
-        bool
+	bool
 
 config CLKSRC_JCORE_PIT
 	bool "J-Core PIT timer driver" if COMPILE_TEST
@@ -523,11 +523,12 @@ config SH_TIMER_MTU2
 	help
 	  This enables build of a clockevent driver for the Multi-Function
 	  Timer Pulse Unit 2 (MTU2) hardware available on SoCs from Renesas.
-	  This hardware comes with 16 bit-timer registers.
+	  This hardware comes with 16-bit timer registers.
 
 config RENESAS_OSTM
 	bool "Renesas OSTM timer driver" if COMPILE_TEST
 	select CLKSRC_MMIO
+	select TIMER_OF
 	help
 	  Enables the support for the Renesas OSTM.
 
@@ -579,7 +580,7 @@ config CLKSRC_TANGO_XTAL
 	select TIMER_OF
 	select CLKSRC_MMIO
 	help
-	  This enables the clocksource for Tango SoC
+	  This enables the clocksource for Tango SoC.
 
 config CLKSRC_PXA
 	bool "Clocksource for PXA or SA-11x0 platform" if COMPILE_TEST
@@ -590,24 +591,24 @@ config CLKSRC_PXA
 	  platforms.
 
 config H8300_TMR8
-        bool "Clockevent timer for the H8300 platform" if COMPILE_TEST
-        depends on HAS_IOMEM
+	bool "Clockevent timer for the H8300 platform" if COMPILE_TEST
+	depends on HAS_IOMEM
 	help
 	  This enables the 8 bits timer for the H8300 platform.
 
 config H8300_TMR16
-        bool "Clockevent timer for the H83069 platform" if COMPILE_TEST
-        depends on HAS_IOMEM
+	bool "Clockevent timer for the H83069 platform" if COMPILE_TEST
+	depends on HAS_IOMEM
 	help
 	  This enables the 16 bits timer for the H8300 platform with the
-	  H83069 cpu.
+	  H83069 CPU.
 
 config H8300_TPU
-        bool "Clocksource for the H8300 platform" if COMPILE_TEST
-        depends on HAS_IOMEM
+	bool "Clocksource for the H8300 platform" if COMPILE_TEST
+	depends on HAS_IOMEM
 	help
 	  This enables the clocksource for the H8300 platform with the
-	  H8S2678 cpu.
+	  H8S2678 CPU.
 
 config CLKSRC_IMX_GPT
 	bool "Clocksource using i.MX GPT" if COMPILE_TEST
@@ -665,8 +666,8 @@ config CSKY_MP_TIMER
 	help
 	  Say yes here to enable C-SKY SMP timer driver used for C-SKY SMP
 	  system.
-	  csky,mptimer is not only used in SMP system, it also could be used
-	  single core system. It's not a mmio reg and it use mtcr/mfcr instruction.
+	  csky,mptimer is not only used in SMP system, it also could be used in
+	  single core system. It's not a mmio reg and it uses mtcr/mfcr instruction.
 
 config GX6605S_TIMER
 	bool "Gx6605s SOC system timer driver" if COMPILE_TEST
@@ -696,4 +697,14 @@ config INGENIC_TIMER
 	help
 	  Support for the timer/counter unit of the Ingenic JZ SoCs.
 
+config MICROCHIP_PIT64B
+	bool "Microchip PIT64B support"
+	depends on OF || COMPILE_TEST
+	select CLKSRC_MMIO
+	help
+	  This option enables Microchip PIT64B timer for Atmel
+	  based system. It supports the oneshot, the periodic
+	  modes and high resolution. It is used as a clocksource
+	  and a clockevent.
+
 endmenu
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 4dfe4225ece7..713686faa549 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -88,3 +88,4 @@ obj-$(CONFIG_RISCV_TIMER)		+= timer-riscv.o
 obj-$(CONFIG_CSKY_MP_TIMER)		+= timer-mp-csky.o
 obj-$(CONFIG_GX6605S_TIMER)		+= timer-gx6605s.o
 obj-$(CONFIG_HYPERV_TIMER)		+= hyperv_timer.o
+obj-$(CONFIG_MICROCHIP_PIT64B)		+= timer-microchip-pit64b.o
diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c
index 9f09a59161e7..5b39d3701fa3 100644
--- a/drivers/clocksource/asm9260_timer.c
+++ b/drivers/clocksource/asm9260_timer.c
@@ -194,6 +194,10 @@ static int __init asm9260_timer_init(struct device_node *np)
 	}
 
 	clk = of_clk_get(np, 0);
+	if (IS_ERR(clk)) {
+		pr_err("Failed to get clk!\n");
+		return PTR_ERR(clk);
+	}
 
 	ret = clk_prepare_enable(clk);
 	if (ret) {
diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c
index 2b196cbfadb6..b235f446ee50 100644
--- a/drivers/clocksource/bcm2835_timer.c
+++ b/drivers/clocksource/bcm2835_timer.c
@@ -121,7 +121,7 @@ static int __init bcm2835_timer_init(struct device_node *node)
 	ret = setup_irq(irq, &timer->act);
 	if (ret) {
 		pr_err("Can't set up timer IRQ\n");
-		goto err_iounmap;
+		goto err_timer_free;
 	}
 
 	clockevents_config_and_register(&timer->evt, freq, 0xf, 0xffffffff);
@@ -130,6 +130,9 @@ static int __init bcm2835_timer_init(struct device_node *node)
 
 	return 0;
 
+err_timer_free:
+	kfree(timer);
+
 err_iounmap:
 	iounmap(base);
 	return ret;
diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c
index 9039df4f90e2..ab190dffb1ed 100644
--- a/drivers/clocksource/em_sti.c
+++ b/drivers/clocksource/em_sti.c
@@ -279,9 +279,7 @@ static void em_sti_register_clockevent(struct em_sti_priv *p)
 static int em_sti_probe(struct platform_device *pdev)
 {
 	struct em_sti_priv *p;
-	struct resource *res;
-	int irq;
-	int ret;
+	int irq, ret;
 
 	p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
 	if (p == NULL)
@@ -295,8 +293,7 @@ static int em_sti_probe(struct platform_device *pdev)
 		return irq;
 
 	/* map memory, let base point to the STI instance */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	p->base = devm_ioremap_resource(&pdev->dev, res);
+	p->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(p->base))
 		return PTR_ERR(p->base);
 
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 74cb299f5089..a267fe31ef13 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -4,7 +4,7 @@
  * Copyright (c) 2011 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
- * EXYNOS4 MCT(Multi-Core Timer) support
+ * Exynos4 MCT(Multi-Core Timer) support
 */
 
 #include <linux/interrupt.h>
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 2317d4e3daaf..9d808d595ca8 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -17,6 +17,7 @@
 #include <linux/clocksource.h>
 #include <linux/sched_clock.h>
 #include <linux/mm.h>
+#include <linux/cpuhotplug.h>
 #include <clocksource/hyperv_timer.h>
 #include <asm/hyperv-tlfs.h>
 #include <asm/mshyperv.h>
@@ -30,6 +31,15 @@ static u64 hv_sched_clock_offset __ro_after_init;
  * mechanism is used when running on older versions of Hyper-V
  * that don't support Direct Mode. While Hyper-V provides
  * four stimer's per CPU, Linux uses only stimer0.
+ *
+ * Because Direct Mode does not require processing a VMbus
+ * message, stimer interrupts can be enabled earlier in the
+ * process of booting a CPU, and consistent with when timer
+ * interrupts are enabled for other clocksource drivers.
+ * However, for legacy versions of Hyper-V when Direct Mode
+ * is not enabled, setting up stimer interrupts must be
+ * delayed until VMbus is initialized and can process the
+ * interrupt message.
  */
 static bool direct_mode_enabled;
 
@@ -56,7 +66,7 @@ static int hv_ce_set_next_event(unsigned long delta,
 {
 	u64 current_tick;
 
-	current_tick = hyperv_cs->read(NULL);
+	current_tick = hv_read_reference_counter();
 	current_tick += delta;
 	hv_init_timer(0, current_tick);
 	return 0;
@@ -102,17 +112,12 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)
 /*
  * hv_stimer_init - Per-cpu initialization of the clockevent
  */
-void hv_stimer_init(unsigned int cpu)
+static int hv_stimer_init(unsigned int cpu)
 {
 	struct clock_event_device *ce;
 
-	/*
-	 * Synthetic timers are always available except on old versions of
-	 * Hyper-V on x86.  In that case, just return as Linux will use a
-	 * clocksource based on emulated PIT or LAPIC timer hardware.
-	 */
-	if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
-		return;
+	if (!hv_clock_event)
+		return 0;
 
 	ce = per_cpu_ptr(hv_clock_event, cpu);
 	ce->name = "Hyper-V clockevent";
@@ -127,28 +132,55 @@ void hv_stimer_init(unsigned int cpu)
 					HV_CLOCK_HZ,
 					HV_MIN_DELTA_TICKS,
 					HV_MAX_MAX_DELTA_TICKS);
+	return 0;
 }
-EXPORT_SYMBOL_GPL(hv_stimer_init);
 
 /*
  * hv_stimer_cleanup - Per-cpu cleanup of the clockevent
  */
-void hv_stimer_cleanup(unsigned int cpu)
+int hv_stimer_cleanup(unsigned int cpu)
 {
 	struct clock_event_device *ce;
 
-	/* Turn off clockevent device */
-	if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
-		ce = per_cpu_ptr(hv_clock_event, cpu);
+	if (!hv_clock_event)
+		return 0;
+
+	/*
+	 * In the legacy case where Direct Mode is not enabled
+	 * (which can only be on x86/64), stimer cleanup happens
+	 * relatively early in the CPU offlining process. We
+	 * must unbind the stimer-based clockevent device so
+	 * that the LAPIC timer can take over until clockevents
+	 * are no longer needed in the offlining process. Note
+	 * that clockevents_unbind_device() eventually calls
+	 * hv_ce_shutdown().
+	 *
+	 * The unbind should not be done when Direct Mode is
+	 * enabled because we may be on an architecture where
+	 * there are no other clockevent devices to fallback to.
+	 */
+	ce = per_cpu_ptr(hv_clock_event, cpu);
+	if (direct_mode_enabled)
 		hv_ce_shutdown(ce);
-	}
+	else
+		clockevents_unbind_device(ce, cpu);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
 
 /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
-int hv_stimer_alloc(int sint)
+int hv_stimer_alloc(void)
 {
-	int ret;
+	int ret = 0;
+
+	/*
+	 * Synthetic timers are always available except on old versions of
+	 * Hyper-V on x86.  In that case, return as error as Linux will use a
+	 * clockevent based on emulated LAPIC timer hardware.
+	 */
+	if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
+		return -EINVAL;
 
 	hv_clock_event = alloc_percpu(struct clock_event_device);
 	if (!hv_clock_event)
@@ -159,22 +191,78 @@ int hv_stimer_alloc(int sint)
 	if (direct_mode_enabled) {
 		ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
 				hv_stimer0_isr);
-		if (ret) {
-			free_percpu(hv_clock_event);
-			hv_clock_event = NULL;
-			return ret;
-		}
+		if (ret)
+			goto free_percpu;
+
+		/*
+		 * Since we are in Direct Mode, stimer initialization
+		 * can be done now with a CPUHP value in the same range
+		 * as other clockevent devices.
+		 */
+		ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
+				"clockevents/hyperv/stimer:starting",
+				hv_stimer_init, hv_stimer_cleanup);
+		if (ret < 0)
+			goto free_stimer0_irq;
 	}
+	return ret;
 
-	stimer0_message_sint = sint;
-	return 0;
+free_stimer0_irq:
+	hv_remove_stimer0_irq(stimer0_irq);
+	stimer0_irq = 0;
+free_percpu:
+	free_percpu(hv_clock_event);
+	hv_clock_event = NULL;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(hv_stimer_alloc);
 
+/*
+ * hv_stimer_legacy_init -- Called from the VMbus driver to handle
+ * the case when Direct Mode is not enabled, and the stimer
+ * must be initialized late in the CPU onlining process.
+ *
+ */
+void hv_stimer_legacy_init(unsigned int cpu, int sint)
+{
+	if (direct_mode_enabled)
+		return;
+
+	/*
+	 * This function gets called by each vCPU, so setting the
+	 * global stimer_message_sint value each time is conceptually
+	 * not ideal, but the value passed in is always the same and
+	 * it avoids introducing yet another interface into this
+	 * clocksource driver just to set the sint in the legacy case.
+	 */
+	stimer0_message_sint = sint;
+	(void)hv_stimer_init(cpu);
+}
+EXPORT_SYMBOL_GPL(hv_stimer_legacy_init);
+
+/*
+ * hv_stimer_legacy_cleanup -- Called from the VMbus driver to
+ * handle the case when Direct Mode is not enabled, and the
+ * stimer must be cleaned up early in the CPU offlining
+ * process.
+ */
+void hv_stimer_legacy_cleanup(unsigned int cpu)
+{
+	if (direct_mode_enabled)
+		return;
+	(void)hv_stimer_cleanup(cpu);
+}
+EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup);
+
+
 /* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
 void hv_stimer_free(void)
 {
-	if (direct_mode_enabled && (stimer0_irq != 0)) {
+	if (!hv_clock_event)
+		return;
+
+	if (direct_mode_enabled) {
+		cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
 		hv_remove_stimer0_irq(stimer0_irq);
 		stimer0_irq = 0;
 	}
@@ -190,14 +278,20 @@ EXPORT_SYMBOL_GPL(hv_stimer_free);
 void hv_stimer_global_cleanup(void)
 {
 	int	cpu;
-	struct clock_event_device *ce;
 
-	if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
-		for_each_present_cpu(cpu) {
-			ce = per_cpu_ptr(hv_clock_event, cpu);
-			clockevents_unbind_device(ce, cpu);
-		}
+	/*
+	 * hv_stime_legacy_cleanup() will stop the stimer if Direct
+	 * Mode is not enabled, and fallback to the LAPIC timer.
+	 */
+	for_each_present_cpu(cpu) {
+		hv_stimer_legacy_cleanup(cpu);
 	}
+
+	/*
+	 * If Direct Mode is enabled, the cpuhp teardown callback
+	 * (hv_stimer_cleanup) will be run on all CPUs to stop the
+	 * stimers.
+	 */
 	hv_stimer_free();
 }
 EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
@@ -208,22 +302,33 @@ EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
  * the other that uses the TSC reference page feature as defined in the
  * TLFS.  The MSR version is for compatibility with old versions of
  * Hyper-V and 32-bit x86.  The TSC reference page version is preferred.
+ *
+ * The Hyper-V clocksource ratings of 250 are chosen to be below the
+ * TSC clocksource rating of 300.  In configurations where Hyper-V offers
+ * an InvariantTSC, the TSC is not marked "unstable", so the TSC clocksource
+ * is available and preferred.  With the higher rating, it will be the
+ * default.  On older hardware and Hyper-V versions, the TSC is marked
+ * "unstable", so no TSC clocksource is created and the selected Hyper-V
+ * clocksource will be the default.
  */
 
-struct clocksource *hyperv_cs;
-EXPORT_SYMBOL_GPL(hyperv_cs);
+u64 (*hv_read_reference_counter)(void);
+EXPORT_SYMBOL_GPL(hv_read_reference_counter);
 
-static struct ms_hyperv_tsc_page tsc_pg __aligned(PAGE_SIZE);
+static union {
+	struct ms_hyperv_tsc_page page;
+	u8 reserved[PAGE_SIZE];
+} tsc_pg __aligned(PAGE_SIZE);
 
 struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
 {
-	return &tsc_pg;
+	return &tsc_pg.page;
 }
 EXPORT_SYMBOL_GPL(hv_get_tsc_page);
 
-static u64 notrace read_hv_clock_tsc(struct clocksource *arg)
+static u64 notrace read_hv_clock_tsc(void)
 {
-	u64 current_tick = hv_read_tsc_page(&tsc_pg);
+	u64 current_tick = hv_read_tsc_page(hv_get_tsc_page());
 
 	if (current_tick == U64_MAX)
 		hv_get_time_ref_count(current_tick);
@@ -231,20 +336,50 @@ static u64 notrace read_hv_clock_tsc(struct clocksource *arg)
 	return current_tick;
 }
 
+static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg)
+{
+	return read_hv_clock_tsc();
+}
+
 static u64 read_hv_sched_clock_tsc(void)
 {
-	return read_hv_clock_tsc(NULL) - hv_sched_clock_offset;
+	return read_hv_clock_tsc() - hv_sched_clock_offset;
+}
+
+static void suspend_hv_clock_tsc(struct clocksource *arg)
+{
+	u64 tsc_msr;
+
+	/* Disable the TSC page */
+	hv_get_reference_tsc(tsc_msr);
+	tsc_msr &= ~BIT_ULL(0);
+	hv_set_reference_tsc(tsc_msr);
+}
+
+
+static void resume_hv_clock_tsc(struct clocksource *arg)
+{
+	phys_addr_t phys_addr = virt_to_phys(&tsc_pg);
+	u64 tsc_msr;
+
+	/* Re-enable the TSC page */
+	hv_get_reference_tsc(tsc_msr);
+	tsc_msr &= GENMASK_ULL(11, 0);
+	tsc_msr |= BIT_ULL(0) | (u64)phys_addr;
+	hv_set_reference_tsc(tsc_msr);
 }
 
 static struct clocksource hyperv_cs_tsc = {
 	.name	= "hyperv_clocksource_tsc_page",
-	.rating	= 400,
-	.read	= read_hv_clock_tsc,
+	.rating	= 250,
+	.read	= read_hv_clock_tsc_cs,
 	.mask	= CLOCKSOURCE_MASK(64),
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+	.suspend= suspend_hv_clock_tsc,
+	.resume	= resume_hv_clock_tsc,
 };
 
-static u64 notrace read_hv_clock_msr(struct clocksource *arg)
+static u64 notrace read_hv_clock_msr(void)
 {
 	u64 current_tick;
 	/*
@@ -256,15 +391,20 @@ static u64 notrace read_hv_clock_msr(struct clocksource *arg)
 	return current_tick;
 }
 
+static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg)
+{
+	return read_hv_clock_msr();
+}
+
 static u64 read_hv_sched_clock_msr(void)
 {
-	return read_hv_clock_msr(NULL) - hv_sched_clock_offset;
+	return read_hv_clock_msr() - hv_sched_clock_offset;
 }
 
 static struct clocksource hyperv_cs_msr = {
 	.name	= "hyperv_clocksource_msr",
-	.rating	= 400,
-	.read	= read_hv_clock_msr,
+	.rating	= 250,
+	.read	= read_hv_clock_msr_cs,
 	.mask	= CLOCKSOURCE_MASK(64),
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
@@ -277,8 +417,8 @@ static bool __init hv_init_tsc_clocksource(void)
 	if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
 		return false;
 
-	hyperv_cs = &hyperv_cs_tsc;
-	phys_addr = virt_to_phys(&tsc_pg);
+	hv_read_reference_counter = read_hv_clock_tsc;
+	phys_addr = virt_to_phys(hv_get_tsc_page());
 
 	/*
 	 * The Hyper-V TLFS specifies to preserve the value of reserved
@@ -295,7 +435,7 @@ static bool __init hv_init_tsc_clocksource(void)
 	hv_set_clocksource_vdso(hyperv_cs_tsc);
 	clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
 
-	hv_sched_clock_offset = hyperv_cs->read(hyperv_cs);
+	hv_sched_clock_offset = hv_read_reference_counter();
 	hv_setup_sched_clock(read_hv_sched_clock_tsc);
 
 	return true;
@@ -317,10 +457,10 @@ void __init hv_init_clocksource(void)
 	if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE))
 		return;
 
-	hyperv_cs = &hyperv_cs_msr;
+	hv_read_reference_counter = read_hv_clock_msr;
 	clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
 
-	hv_sched_clock_offset = hyperv_cs->read(hyperv_cs);
+	hv_sched_clock_offset = hv_read_reference_counter();
 	hv_setup_sched_clock(read_hv_sched_clock_msr);
 }
 EXPORT_SYMBOL_GPL(hv_init_clocksource);
diff --git a/drivers/clocksource/renesas-ostm.c b/drivers/clocksource/renesas-ostm.c
index 37c39b901bb1..3d06ba66008c 100644
--- a/drivers/clocksource/renesas-ostm.c
+++ b/drivers/clocksource/renesas-ostm.c
@@ -6,14 +6,14 @@
  * Copyright (C) 2017 Chris Brandt
  */
 
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
 #include <linux/clk.h>
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/sched_clock.h>
 #include <linux/slab.h>
 
+#include "timer-of.h"
+
 /*
  * The OSTM contains independent channels.
  * The first OSTM channel probed will be set up as a free running
@@ -24,12 +24,6 @@
  * driven clock event.
  */
 
-struct ostm_device {
-	void __iomem *base;
-	unsigned long ticks_per_jiffy;
-	struct clock_event_device ced;
-};
-
 static void __iomem *system_clock;	/* For sched_clock() */
 
 /* OSTM REGISTERS */
@@ -47,41 +41,32 @@ static void __iomem *system_clock;	/* For sched_clock() */
 #define	CTL_ONESHOT		0x02
 #define	CTL_FREERUN		0x02
 
-static struct ostm_device *ced_to_ostm(struct clock_event_device *ced)
-{
-	return container_of(ced, struct ostm_device, ced);
-}
-
-static void ostm_timer_stop(struct ostm_device *ostm)
+static void ostm_timer_stop(struct timer_of *to)
 {
-	if (readb(ostm->base + OSTM_TE) & TE) {
-		writeb(TT, ostm->base + OSTM_TT);
+	if (readb(timer_of_base(to) + OSTM_TE) & TE) {
+		writeb(TT, timer_of_base(to) + OSTM_TT);
 
 		/*
 		 * Read back the register simply to confirm the write operation
 		 * has completed since I/O writes can sometimes get queued by
 		 * the bus architecture.
 		 */
-		while (readb(ostm->base + OSTM_TE) & TE)
+		while (readb(timer_of_base(to) + OSTM_TE) & TE)
 			;
 	}
 }
 
-static int __init ostm_init_clksrc(struct ostm_device *ostm, unsigned long rate)
+static int __init ostm_init_clksrc(struct timer_of *to)
 {
-	/*
-	 * irq not used (clock sources don't use interrupts)
-	 */
-
-	ostm_timer_stop(ostm);
+	ostm_timer_stop(to);
 
-	writel(0, ostm->base + OSTM_CMP);
-	writeb(CTL_FREERUN, ostm->base + OSTM_CTL);
-	writeb(TS, ostm->base + OSTM_TS);
+	writel(0, timer_of_base(to) + OSTM_CMP);
+	writeb(CTL_FREERUN, timer_of_base(to) + OSTM_CTL);
+	writeb(TS, timer_of_base(to) + OSTM_TS);
 
-	return clocksource_mmio_init(ostm->base + OSTM_CNT,
-			"ostm", rate,
-			300, 32, clocksource_mmio_readl_up);
+	return clocksource_mmio_init(timer_of_base(to) + OSTM_CNT,
+				     to->np->full_name, timer_of_rate(to), 300,
+				     32, clocksource_mmio_readl_up);
 }
 
 static u64 notrace ostm_read_sched_clock(void)
@@ -89,87 +74,75 @@ static u64 notrace ostm_read_sched_clock(void)
 	return readl(system_clock);
 }
 
-static void __init ostm_init_sched_clock(struct ostm_device *ostm,
-			unsigned long rate)
+static void __init ostm_init_sched_clock(struct timer_of *to)
 {
-	system_clock = ostm->base + OSTM_CNT;
-	sched_clock_register(ostm_read_sched_clock, 32, rate);
+	system_clock = timer_of_base(to) + OSTM_CNT;
+	sched_clock_register(ostm_read_sched_clock, 32, timer_of_rate(to));
 }
 
 static int ostm_clock_event_next(unsigned long delta,
-				     struct clock_event_device *ced)
+				 struct clock_event_device *ced)
 {
-	struct ostm_device *ostm = ced_to_ostm(ced);
+	struct timer_of *to = to_timer_of(ced);
 
-	ostm_timer_stop(ostm);
+	ostm_timer_stop(to);
 
-	writel(delta, ostm->base + OSTM_CMP);
-	writeb(CTL_ONESHOT, ostm->base + OSTM_CTL);
-	writeb(TS, ostm->base + OSTM_TS);
+	writel(delta, timer_of_base(to) + OSTM_CMP);
+	writeb(CTL_ONESHOT, timer_of_base(to) + OSTM_CTL);
+	writeb(TS, timer_of_base(to) + OSTM_TS);
 
 	return 0;
 }
 
 static int ostm_shutdown(struct clock_event_device *ced)
 {
-	struct ostm_device *ostm = ced_to_ostm(ced);
+	struct timer_of *to = to_timer_of(ced);
 
-	ostm_timer_stop(ostm);
+	ostm_timer_stop(to);
 
 	return 0;
 }
 static int ostm_set_periodic(struct clock_event_device *ced)
 {
-	struct ostm_device *ostm = ced_to_ostm(ced);
+	struct timer_of *to = to_timer_of(ced);
 
 	if (clockevent_state_oneshot(ced) || clockevent_state_periodic(ced))
-		ostm_timer_stop(ostm);
+		ostm_timer_stop(to);
 
-	writel(ostm->ticks_per_jiffy - 1, ostm->base + OSTM_CMP);
-	writeb(CTL_PERIODIC, ostm->base + OSTM_CTL);
-	writeb(TS, ostm->base + OSTM_TS);
+	writel(timer_of_period(to) - 1, timer_of_base(to) + OSTM_CMP);
+	writeb(CTL_PERIODIC, timer_of_base(to) + OSTM_CTL);
+	writeb(TS, timer_of_base(to) + OSTM_TS);
 
 	return 0;
 }
 
 static int ostm_set_oneshot(struct clock_event_device *ced)
 {
-	struct ostm_device *ostm = ced_to_ostm(ced);
+	struct timer_of *to = to_timer_of(ced);
 
-	ostm_timer_stop(ostm);
+	ostm_timer_stop(to);
 
 	return 0;
 }
 
 static irqreturn_t ostm_timer_interrupt(int irq, void *dev_id)
 {
-	struct ostm_device *ostm = dev_id;
+	struct clock_event_device *ced = dev_id;
 
-	if (clockevent_state_oneshot(&ostm->ced))
-		ostm_timer_stop(ostm);
+	if (clockevent_state_oneshot(ced))
+		ostm_timer_stop(to_timer_of(ced));
 
 	/* notify clockevent layer */
-	if (ostm->ced.event_handler)
-		ostm->ced.event_handler(&ostm->ced);
+	if (ced->event_handler)
+		ced->event_handler(ced);
 
 	return IRQ_HANDLED;
 }
 
-static int __init ostm_init_clkevt(struct ostm_device *ostm, int irq,
-			unsigned long rate)
+static int __init ostm_init_clkevt(struct timer_of *to)
 {
-	struct clock_event_device *ced = &ostm->ced;
-	int ret = -ENXIO;
-
-	ret = request_irq(irq, ostm_timer_interrupt,
-			  IRQF_TIMER | IRQF_IRQPOLL,
-			  "ostm", ostm);
-	if (ret) {
-		pr_err("ostm: failed to request irq\n");
-		return ret;
-	}
+	struct clock_event_device *ced = &to->clkevt;
 
-	ced->name = "ostm";
 	ced->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC;
 	ced->set_state_shutdown = ostm_shutdown;
 	ced->set_state_periodic = ostm_set_periodic;
@@ -178,79 +151,61 @@ static int __init ostm_init_clkevt(struct ostm_device *ostm, int irq,
 	ced->shift = 32;
 	ced->rating = 300;
 	ced->cpumask = cpumask_of(0);
-	clockevents_config_and_register(ced, rate, 0xf, 0xffffffff);
+	clockevents_config_and_register(ced, timer_of_rate(to), 0xf,
+					0xffffffff);
 
 	return 0;
 }
 
 static int __init ostm_init(struct device_node *np)
 {
-	struct ostm_device *ostm;
-	int ret = -EFAULT;
-	struct clk *ostm_clk = NULL;
-	int irq;
-	unsigned long rate;
-
-	ostm = kzalloc(sizeof(*ostm), GFP_KERNEL);
-	if (!ostm)
-		return -ENOMEM;
-
-	ostm->base = of_iomap(np, 0);
-	if (!ostm->base) {
-		pr_err("ostm: failed to remap I/O memory\n");
-		goto err;
-	}
-
-	irq = irq_of_parse_and_map(np, 0);
-	if (irq < 0) {
-		pr_err("ostm: Failed to get irq\n");
-		goto err;
-	}
+	struct timer_of *to;
+	int ret;
 
-	ostm_clk = of_clk_get(np, 0);
-	if (IS_ERR(ostm_clk)) {
-		pr_err("ostm: Failed to get clock\n");
-		ostm_clk = NULL;
-		goto err;
-	}
+	to = kzalloc(sizeof(*to), GFP_KERNEL);
+	if (!to)
+		return -ENOMEM;
 
-	ret = clk_prepare_enable(ostm_clk);
-	if (ret) {
-		pr_err("ostm: Failed to enable clock\n");
-		goto err;
+	to->flags = TIMER_OF_BASE | TIMER_OF_CLOCK;
+	if (system_clock) {
+		/*
+		 * clock sources don't use interrupts, clock events do
+		 */
+		to->flags |= TIMER_OF_IRQ;
+		to->of_irq.flags = IRQF_TIMER | IRQF_IRQPOLL;
+		to->of_irq.handler = ostm_timer_interrupt;
 	}
 
-	rate = clk_get_rate(ostm_clk);
-	ostm->ticks_per_jiffy = DIV_ROUND_CLOSEST(rate, HZ);
+	ret = timer_of_init(np, to);
+	if (ret)
+		goto err_free;
 
 	/*
 	 * First probed device will be used as system clocksource. Any
 	 * additional devices will be used as clock events.
 	 */
 	if (!system_clock) {
-		ret = ostm_init_clksrc(ostm, rate);
-
-		if (!ret) {
-			ostm_init_sched_clock(ostm, rate);
-			pr_info("ostm: used for clocksource\n");
-		}
+		ret = ostm_init_clksrc(to);
+		if (ret)
+			goto err_cleanup;
 
+		ostm_init_sched_clock(to);
+		pr_info("%pOF: used for clocksource\n", np);
 	} else {
-		ret = ostm_init_clkevt(ostm, irq, rate);
+		ret = ostm_init_clkevt(to);
+		if (ret)
+			goto err_cleanup;
 
-		if (!ret)
-			pr_info("ostm: used for clock events\n");
-	}
-
-err:
-	if (ret) {
-		clk_disable_unprepare(ostm_clk);
-		iounmap(ostm->base);
-		kfree(ostm);
-		return ret;
+		pr_info("%pOF: used for clock events\n", np);
 	}
 
 	return 0;
+
+err_cleanup:
+	timer_of_cleanup(to);
+err_free:
+	kfree(to);
+	return ret;
 }
 
 TIMER_OF_DECLARE(ostm, "renesas,ostm", ostm_init);
diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index 895f53eb5771..dae1b2b5a0c5 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -430,8 +430,7 @@ static int __init samsung_pwm_alloc(struct device_node *np,
 
 	of_property_for_each_u32(np, "samsung,pwm-outputs", prop, cur, val) {
 		if (val >= SAMSUNG_PWM_NUM) {
-			pr_warning("%s: invalid channel index in samsung,pwm-outputs property\n",
-								__func__);
+			pr_warn("%s: invalid channel index in samsung,pwm-outputs property\n", __func__);
 			continue;
 		}
 		pwm.variant.output_mask |= 1 << val;
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index ef773db080e9..12ac75f7571f 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -25,6 +25,10 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
+#ifdef CONFIG_SUPERH
+#include <asm/platform_early.h>
+#endif
+
 struct sh_cmt_device;
 
 /*
@@ -901,7 +905,7 @@ static int sh_cmt_map_memory(struct sh_cmt_device *cmt)
 		return -ENXIO;
 	}
 
-	cmt->mapbase = ioremap_nocache(mem->start, resource_size(mem));
+	cmt->mapbase = ioremap(mem->start, resource_size(mem));
 	if (cmt->mapbase == NULL) {
 		dev_err(&cmt->pdev->dev, "failed to remap I/O memory\n");
 		return -ENXIO;
@@ -1052,7 +1056,7 @@ static int sh_cmt_probe(struct platform_device *pdev)
 	struct sh_cmt_device *cmt = platform_get_drvdata(pdev);
 	int ret;
 
-	if (!is_early_platform_device(pdev)) {
+	if (!is_sh_early_platform_device(pdev)) {
 		pm_runtime_set_active(&pdev->dev);
 		pm_runtime_enable(&pdev->dev);
 	}
@@ -1072,7 +1076,7 @@ static int sh_cmt_probe(struct platform_device *pdev)
 		pm_runtime_idle(&pdev->dev);
 		return ret;
 	}
-	if (is_early_platform_device(pdev))
+	if (is_sh_early_platform_device(pdev))
 		return 0;
 
  out:
@@ -1109,7 +1113,10 @@ static void __exit sh_cmt_exit(void)
 	platform_driver_unregister(&sh_cmt_device_driver);
 }
 
-early_platform_init("earlytimer", &sh_cmt_device_driver);
+#ifdef CONFIG_SUPERH
+sh_early_platform_init("earlytimer", &sh_cmt_device_driver);
+#endif
+
 subsys_initcall(sh_cmt_init);
 module_exit(sh_cmt_exit);
 
diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
index 62812f80b5cc..bfccb31e94ad 100644
--- a/drivers/clocksource/sh_mtu2.c
+++ b/drivers/clocksource/sh_mtu2.c
@@ -23,6 +23,10 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
+#ifdef CONFIG_SUPERH
+#include <asm/platform_early.h>
+#endif
+
 struct sh_mtu2_device;
 
 struct sh_mtu2_channel {
@@ -373,7 +377,7 @@ static int sh_mtu2_map_memory(struct sh_mtu2_device *mtu)
 		return -ENXIO;
 	}
 
-	mtu->mapbase = ioremap_nocache(res->start, resource_size(res));
+	mtu->mapbase = ioremap(res->start, resource_size(res));
 	if (mtu->mapbase == NULL)
 		return -ENXIO;
 
@@ -448,7 +452,7 @@ static int sh_mtu2_probe(struct platform_device *pdev)
 	struct sh_mtu2_device *mtu = platform_get_drvdata(pdev);
 	int ret;
 
-	if (!is_early_platform_device(pdev)) {
+	if (!is_sh_early_platform_device(pdev)) {
 		pm_runtime_set_active(&pdev->dev);
 		pm_runtime_enable(&pdev->dev);
 	}
@@ -468,7 +472,7 @@ static int sh_mtu2_probe(struct platform_device *pdev)
 		pm_runtime_idle(&pdev->dev);
 		return ret;
 	}
-	if (is_early_platform_device(pdev))
+	if (is_sh_early_platform_device(pdev))
 		return 0;
 
  out:
@@ -517,7 +521,10 @@ static void __exit sh_mtu2_exit(void)
 	platform_driver_unregister(&sh_mtu2_device_driver);
 }
 
-early_platform_init("earlytimer", &sh_mtu2_device_driver);
+#ifdef CONFIG_SUPERH
+sh_early_platform_init("earlytimer", &sh_mtu2_device_driver);
+#endif
+
 subsys_initcall(sh_mtu2_init);
 module_exit(sh_mtu2_exit);
 
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index 8c4f3753b36e..d41df9ba3725 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -24,6 +24,10 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
+#ifdef CONFIG_SUPERH
+#include <asm/platform_early.h>
+#endif
+
 enum sh_tmu_model {
 	SH_TMU,
 	SH_TMU_SH3,
@@ -482,7 +486,7 @@ static int sh_tmu_map_memory(struct sh_tmu_device *tmu)
 		return -ENXIO;
 	}
 
-	tmu->mapbase = ioremap_nocache(res->start, resource_size(res));
+	tmu->mapbase = ioremap(res->start, resource_size(res));
 	if (tmu->mapbase == NULL)
 		return -ENXIO;
 
@@ -595,7 +599,7 @@ static int sh_tmu_probe(struct platform_device *pdev)
 	struct sh_tmu_device *tmu = platform_get_drvdata(pdev);
 	int ret;
 
-	if (!is_early_platform_device(pdev)) {
+	if (!is_sh_early_platform_device(pdev)) {
 		pm_runtime_set_active(&pdev->dev);
 		pm_runtime_enable(&pdev->dev);
 	}
@@ -615,7 +619,8 @@ static int sh_tmu_probe(struct platform_device *pdev)
 		pm_runtime_idle(&pdev->dev);
 		return ret;
 	}
-	if (is_early_platform_device(pdev))
+
+	if (is_sh_early_platform_device(pdev))
 		return 0;
 
  out:
@@ -665,7 +670,10 @@ static void __exit sh_tmu_exit(void)
 	platform_driver_unregister(&sh_tmu_device_driver);
 }
 
-early_platform_init("earlytimer", &sh_tmu_device_driver);
+#ifdef CONFIG_SUPERH
+sh_early_platform_init("earlytimer", &sh_tmu_device_driver);
+#endif
+
 subsys_initcall(sh_tmu_init);
 module_exit(sh_tmu_exit);
 
diff --git a/drivers/clocksource/timer-cadence-ttc.c b/drivers/clocksource/timer-cadence-ttc.c
index 88fe2e9ba9a3..38858e141731 100644
--- a/drivers/clocksource/timer-cadence-ttc.c
+++ b/drivers/clocksource/timer-cadence-ttc.c
@@ -15,6 +15,8 @@
 #include <linux/of_irq.h>
 #include <linux/slab.h>
 #include <linux/sched_clock.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
 
 /*
  * This driver configures the 2 16/32-bit count-up timers as follows:
@@ -464,13 +466,7 @@ static int __init ttc_setup_clockevent(struct clk *clk,
 	return 0;
 }
 
-/**
- * ttc_timer_init - Initialize the timer
- *
- * Initializes the timer hardware and register the clock source and clock event
- * timers with Linux kernal timer framework
- */
-static int __init ttc_timer_init(struct device_node *timer)
+static int __init ttc_timer_probe(struct platform_device *pdev)
 {
 	unsigned int irq;
 	void __iomem *timer_baseaddr;
@@ -478,6 +474,7 @@ static int __init ttc_timer_init(struct device_node *timer)
 	static int initialized;
 	int clksel, ret;
 	u32 timer_width = 16;
+	struct device_node *timer = pdev->dev.of_node;
 
 	if (initialized)
 		return 0;
@@ -532,4 +529,17 @@ static int __init ttc_timer_init(struct device_node *timer)
 	return 0;
 }
 
-TIMER_OF_DECLARE(ttc, "cdns,ttc", ttc_timer_init);
+static const struct of_device_id ttc_timer_of_match[] = {
+	{.compatible = "cdns,ttc"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, ttc_timer_of_match);
+
+static struct platform_driver ttc_timer_driver = {
+	.driver = {
+		.name	= "cdns_ttc_timer",
+		.of_match_table = ttc_timer_of_match,
+	},
+};
+builtin_platform_driver_probe(ttc_timer_driver, ttc_timer_probe);
diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c
new file mode 100644
index 000000000000..bd63d3484838
--- /dev/null
+++ b/drivers/clocksource/timer-microchip-pit64b.c
@@ -0,0 +1,451 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * 64-bit Periodic Interval Timer driver
+ *
+ * Copyright (C) 2019 Microchip Technology Inc. and its subsidiaries
+ *
+ * Author: Claudiu Beznea <claudiu.beznea@microchip.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sched_clock.h>
+#include <linux/slab.h>
+
+#define MCHP_PIT64B_CR			0x00	/* Control Register */
+#define MCHP_PIT64B_CR_START		BIT(0)
+#define MCHP_PIT64B_CR_SWRST		BIT(8)
+
+#define MCHP_PIT64B_MR			0x04	/* Mode Register */
+#define MCHP_PIT64B_MR_CONT		BIT(0)
+#define MCHP_PIT64B_MR_ONE_SHOT		(0)
+#define MCHP_PIT64B_MR_SGCLK		BIT(3)
+#define MCHP_PIT64B_MR_PRES		GENMASK(11, 8)
+
+#define MCHP_PIT64B_LSB_PR		0x08	/* LSB Period Register */
+
+#define MCHP_PIT64B_MSB_PR		0x0C	/* MSB Period Register */
+
+#define MCHP_PIT64B_IER			0x10	/* Interrupt Enable Register */
+#define MCHP_PIT64B_IER_PERIOD		BIT(0)
+
+#define MCHP_PIT64B_ISR			0x1C	/* Interrupt Status Register */
+
+#define MCHP_PIT64B_TLSBR		0x20	/* Timer LSB Register */
+
+#define MCHP_PIT64B_TMSBR		0x24	/* Timer MSB Register */
+
+#define MCHP_PIT64B_PRES_MAX		0x10
+#define MCHP_PIT64B_LSBMASK		GENMASK_ULL(31, 0)
+#define MCHP_PIT64B_PRES_TO_MODE(p)	(MCHP_PIT64B_MR_PRES & ((p) << 8))
+#define MCHP_PIT64B_MODE_TO_PRES(m)	((MCHP_PIT64B_MR_PRES & (m)) >> 8)
+#define MCHP_PIT64B_DEF_CS_FREQ		5000000UL	/* 5 MHz */
+#define MCHP_PIT64B_DEF_CE_FREQ		32768		/* 32 KHz */
+
+#define MCHP_PIT64B_NAME		"pit64b"
+
+/**
+ * struct mchp_pit64b_timer - PIT64B timer data structure
+ * @base: base address of PIT64B hardware block
+ * @pclk: PIT64B's peripheral clock
+ * @gclk: PIT64B's generic clock
+ * @mode: precomputed value for mode register
+ */
+struct mchp_pit64b_timer {
+	void __iomem	*base;
+	struct clk	*pclk;
+	struct clk	*gclk;
+	u32		mode;
+};
+
+/**
+ * mchp_pit64b_clkevt - PIT64B clockevent data structure
+ * @timer: PIT64B timer
+ * @clkevt: clockevent
+ */
+struct mchp_pit64b_clkevt {
+	struct mchp_pit64b_timer	timer;
+	struct clock_event_device	clkevt;
+};
+
+#define to_mchp_pit64b_timer(x) \
+	((struct mchp_pit64b_timer *)container_of(x,\
+		struct mchp_pit64b_clkevt, clkevt))
+
+/* Base address for clocksource timer. */
+static void __iomem *mchp_pit64b_cs_base;
+/* Default cycles for clockevent timer. */
+static u64 mchp_pit64b_ce_cycles;
+
+static inline u64 mchp_pit64b_cnt_read(void __iomem *base)
+{
+	unsigned long	flags;
+	u32		low, high;
+
+	raw_local_irq_save(flags);
+
+	/*
+	 * When using a 64 bit period TLSB must be read first, followed by the
+	 * read of TMSB. This sequence generates an atomic read of the 64 bit
+	 * timer value whatever the lapse of time between the accesses.
+	 */
+	low = readl_relaxed(base + MCHP_PIT64B_TLSBR);
+	high = readl_relaxed(base + MCHP_PIT64B_TMSBR);
+
+	raw_local_irq_restore(flags);
+
+	return (((u64)high << 32) | low);
+}
+
+static inline void mchp_pit64b_reset(struct mchp_pit64b_timer *timer,
+				     u64 cycles, u32 mode, u32 irqs)
+{
+	u32 low, high;
+
+	low = cycles & MCHP_PIT64B_LSBMASK;
+	high = cycles >> 32;
+
+	writel_relaxed(MCHP_PIT64B_CR_SWRST, timer->base + MCHP_PIT64B_CR);
+	writel_relaxed(mode | timer->mode, timer->base + MCHP_PIT64B_MR);
+	writel_relaxed(high, timer->base + MCHP_PIT64B_MSB_PR);
+	writel_relaxed(low, timer->base + MCHP_PIT64B_LSB_PR);
+	writel_relaxed(irqs, timer->base + MCHP_PIT64B_IER);
+	writel_relaxed(MCHP_PIT64B_CR_START, timer->base + MCHP_PIT64B_CR);
+}
+
+static u64 mchp_pit64b_clksrc_read(struct clocksource *cs)
+{
+	return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
+}
+
+static u64 mchp_pit64b_sched_read_clk(void)
+{
+	return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
+}
+
+static int mchp_pit64b_clkevt_shutdown(struct clock_event_device *cedev)
+{
+	struct mchp_pit64b_timer *timer = to_mchp_pit64b_timer(cedev);
+
+	writel_relaxed(MCHP_PIT64B_CR_SWRST, timer->base + MCHP_PIT64B_CR);
+
+	return 0;
+}
+
+static int mchp_pit64b_clkevt_set_periodic(struct clock_event_device *cedev)
+{
+	struct mchp_pit64b_timer *timer = to_mchp_pit64b_timer(cedev);
+
+	mchp_pit64b_reset(timer, mchp_pit64b_ce_cycles, MCHP_PIT64B_MR_CONT,
+			  MCHP_PIT64B_IER_PERIOD);
+
+	return 0;
+}
+
+static int mchp_pit64b_clkevt_set_next_event(unsigned long evt,
+					     struct clock_event_device *cedev)
+{
+	struct mchp_pit64b_timer *timer = to_mchp_pit64b_timer(cedev);
+
+	mchp_pit64b_reset(timer, evt, MCHP_PIT64B_MR_ONE_SHOT,
+			  MCHP_PIT64B_IER_PERIOD);
+
+	return 0;
+}
+
+static void mchp_pit64b_clkevt_suspend(struct clock_event_device *cedev)
+{
+	struct mchp_pit64b_timer *timer = to_mchp_pit64b_timer(cedev);
+
+	writel_relaxed(MCHP_PIT64B_CR_SWRST, timer->base + MCHP_PIT64B_CR);
+	if (timer->mode & MCHP_PIT64B_MR_SGCLK)
+		clk_disable_unprepare(timer->gclk);
+	clk_disable_unprepare(timer->pclk);
+}
+
+static void mchp_pit64b_clkevt_resume(struct clock_event_device *cedev)
+{
+	struct mchp_pit64b_timer *timer = to_mchp_pit64b_timer(cedev);
+
+	clk_prepare_enable(timer->pclk);
+	if (timer->mode & MCHP_PIT64B_MR_SGCLK)
+		clk_prepare_enable(timer->gclk);
+}
+
+static irqreturn_t mchp_pit64b_interrupt(int irq, void *dev_id)
+{
+	struct mchp_pit64b_clkevt *irq_data = dev_id;
+
+	/* Need to clear the interrupt. */
+	readl_relaxed(irq_data->timer.base + MCHP_PIT64B_ISR);
+
+	irq_data->clkevt.event_handler(&irq_data->clkevt);
+
+	return IRQ_HANDLED;
+}
+
+static void __init mchp_pit64b_pres_compute(u32 *pres, u32 clk_rate,
+					    u32 max_rate)
+{
+	u32 tmp;
+
+	for (*pres = 0; *pres < MCHP_PIT64B_PRES_MAX; (*pres)++) {
+		tmp = clk_rate / (*pres + 1);
+		if (tmp <= max_rate)
+			break;
+	}
+
+	/* Use the bigest prescaler if we didn't match one. */
+	if (*pres == MCHP_PIT64B_PRES_MAX)
+		*pres = MCHP_PIT64B_PRES_MAX - 1;
+}
+
+/**
+ * mchp_pit64b_init_mode - prepare PIT64B mode register value to be used at
+ *			   runtime; this includes prescaler and SGCLK bit
+ *
+ * PIT64B timer may be fed by gclk or pclk. When gclk is used its rate has to
+ * be at least 3 times lower that pclk's rate. pclk rate is fixed, gclk rate
+ * could be changed via clock APIs. The chosen clock (pclk or gclk) could be
+ * divided by the internal PIT64B's divider.
+ *
+ * This function, first tries to use GCLK by requesting the desired rate from
+ * PMC and then using the internal PIT64B prescaler, if any, to reach the
+ * requested rate. If PCLK/GCLK < 3 (condition requested by PIT64B hardware)
+ * then the function falls back on using PCLK as clock source for PIT64B timer
+ * choosing the highest prescaler in case it doesn't locate one to match the
+ * requested frequency.
+ *
+ * Below is presented the PIT64B block in relation with PMC:
+ *
+ *                                PIT64B
+ *  PMC             +------------------------------------+
+ * +----+           |   +-----+                          |
+ * |    |-->gclk -->|-->|     |    +---------+  +-----+  |
+ * |    |           |   | MUX |--->| Divider |->|timer|  |
+ * |    |-->pclk -->|-->|     |    +---------+  +-----+  |
+ * +----+           |   +-----+                          |
+ *                  |      ^                             |
+ *                  |     sel                            |
+ *                  +------------------------------------+
+ *
+ * Where:
+ *	- gclk rate <= pclk rate/3
+ *	- gclk rate could be requested from PMC
+ *	- pclk rate is fixed (cannot be requested from PMC)
+ */
+static int __init mchp_pit64b_init_mode(struct mchp_pit64b_timer *timer,
+					unsigned long max_rate)
+{
+	unsigned long pclk_rate, diff = 0, best_diff = ULONG_MAX;
+	long gclk_round = 0;
+	u32 pres, best_pres = 0;
+
+	pclk_rate = clk_get_rate(timer->pclk);
+	if (!pclk_rate)
+		return -EINVAL;
+
+	timer->mode = 0;
+
+	/* Try using GCLK. */
+	gclk_round = clk_round_rate(timer->gclk, max_rate);
+	if (gclk_round < 0)
+		goto pclk;
+
+	if (pclk_rate / gclk_round < 3)
+		goto pclk;
+
+	mchp_pit64b_pres_compute(&pres, gclk_round, max_rate);
+	best_diff = abs(gclk_round / (pres + 1) - max_rate);
+	best_pres = pres;
+
+	if (!best_diff) {
+		timer->mode |= MCHP_PIT64B_MR_SGCLK;
+		goto done;
+	}
+
+pclk:
+	/* Check if requested rate could be obtained using PCLK. */
+	mchp_pit64b_pres_compute(&pres, pclk_rate, max_rate);
+	diff = abs(pclk_rate / (pres + 1) - max_rate);
+
+	if (best_diff > diff) {
+		/* Use PCLK. */
+		best_pres = pres;
+	} else {
+		/* Use GCLK. */
+		timer->mode |= MCHP_PIT64B_MR_SGCLK;
+		clk_set_rate(timer->gclk, gclk_round);
+	}
+
+done:
+	timer->mode |= MCHP_PIT64B_PRES_TO_MODE(best_pres);
+
+	pr_info("PIT64B: using clk=%s with prescaler %u, freq=%lu [Hz]\n",
+		timer->mode & MCHP_PIT64B_MR_SGCLK ? "gclk" : "pclk", best_pres,
+		timer->mode & MCHP_PIT64B_MR_SGCLK ?
+		gclk_round / (best_pres + 1) : pclk_rate / (best_pres + 1));
+
+	return 0;
+}
+
+static int __init mchp_pit64b_init_clksrc(struct mchp_pit64b_timer *timer,
+					  u32 clk_rate)
+{
+	int ret;
+
+	mchp_pit64b_reset(timer, ULLONG_MAX, MCHP_PIT64B_MR_CONT, 0);
+
+	mchp_pit64b_cs_base = timer->base;
+
+	ret = clocksource_mmio_init(timer->base, MCHP_PIT64B_NAME, clk_rate,
+				    210, 64, mchp_pit64b_clksrc_read);
+	if (ret) {
+		pr_debug("clksrc: Failed to register PIT64B clocksource!\n");
+
+		/* Stop timer. */
+		writel_relaxed(MCHP_PIT64B_CR_SWRST,
+			       timer->base + MCHP_PIT64B_CR);
+
+		return ret;
+	}
+
+	sched_clock_register(mchp_pit64b_sched_read_clk, 64, clk_rate);
+
+	return 0;
+}
+
+static int __init mchp_pit64b_init_clkevt(struct mchp_pit64b_timer *timer,
+					  u32 clk_rate, u32 irq)
+{
+	struct mchp_pit64b_clkevt *ce;
+	int ret;
+
+	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+	if (!ce)
+		return -ENOMEM;
+
+	mchp_pit64b_ce_cycles = DIV_ROUND_CLOSEST(clk_rate, HZ);
+
+	ce->timer.base = timer->base;
+	ce->timer.pclk = timer->pclk;
+	ce->timer.gclk = timer->gclk;
+	ce->timer.mode = timer->mode;
+	ce->clkevt.name = MCHP_PIT64B_NAME;
+	ce->clkevt.features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC;
+	ce->clkevt.rating = 150;
+	ce->clkevt.set_state_shutdown = mchp_pit64b_clkevt_shutdown;
+	ce->clkevt.set_state_periodic = mchp_pit64b_clkevt_set_periodic;
+	ce->clkevt.set_next_event = mchp_pit64b_clkevt_set_next_event;
+	ce->clkevt.suspend = mchp_pit64b_clkevt_suspend;
+	ce->clkevt.resume = mchp_pit64b_clkevt_resume;
+	ce->clkevt.cpumask = cpumask_of(0);
+	ce->clkevt.irq = irq;
+
+	ret = request_irq(irq, mchp_pit64b_interrupt, IRQF_TIMER,
+			  "pit64b_tick", ce);
+	if (ret) {
+		pr_debug("clkevt: Failed to setup PIT64B IRQ\n");
+		kfree(ce);
+		return ret;
+	}
+
+	clockevents_config_and_register(&ce->clkevt, clk_rate, 1, ULONG_MAX);
+
+	return 0;
+}
+
+static int __init mchp_pit64b_dt_init_timer(struct device_node *node,
+					    bool clkevt)
+{
+	u32 freq = clkevt ? MCHP_PIT64B_DEF_CE_FREQ : MCHP_PIT64B_DEF_CS_FREQ;
+	struct mchp_pit64b_timer timer;
+	unsigned long clk_rate;
+	u32 irq = 0;
+	int ret;
+
+	/* Parse DT node. */
+	timer.pclk = of_clk_get_by_name(node, "pclk");
+	if (IS_ERR(timer.pclk))
+		return PTR_ERR(timer.pclk);
+
+	timer.gclk = of_clk_get_by_name(node, "gclk");
+	if (IS_ERR(timer.gclk))
+		return PTR_ERR(timer.gclk);
+
+	timer.base = of_iomap(node, 0);
+	if (!timer.base)
+		return -ENXIO;
+
+	if (clkevt) {
+		irq = irq_of_parse_and_map(node, 0);
+		if (!irq) {
+			ret = -ENODEV;
+			goto io_unmap;
+		}
+	}
+
+	/* Initialize mode (prescaler + SGCK bit). To be used at runtime. */
+	ret = mchp_pit64b_init_mode(&timer, freq);
+	if (ret)
+		goto irq_unmap;
+
+	ret = clk_prepare_enable(timer.pclk);
+	if (ret)
+		goto irq_unmap;
+
+	if (timer.mode & MCHP_PIT64B_MR_SGCLK) {
+		ret = clk_prepare_enable(timer.gclk);
+		if (ret)
+			goto pclk_unprepare;
+
+		clk_rate = clk_get_rate(timer.gclk);
+	} else {
+		clk_rate = clk_get_rate(timer.pclk);
+	}
+	clk_rate = clk_rate / (MCHP_PIT64B_MODE_TO_PRES(timer.mode) + 1);
+
+	if (clkevt)
+		ret = mchp_pit64b_init_clkevt(&timer, clk_rate, irq);
+	else
+		ret = mchp_pit64b_init_clksrc(&timer, clk_rate);
+
+	if (ret)
+		goto gclk_unprepare;
+
+	return 0;
+
+gclk_unprepare:
+	if (timer.mode & MCHP_PIT64B_MR_SGCLK)
+		clk_disable_unprepare(timer.gclk);
+pclk_unprepare:
+	clk_disable_unprepare(timer.pclk);
+irq_unmap:
+	irq_dispose_mapping(irq);
+io_unmap:
+	iounmap(timer.base);
+
+	return ret;
+}
+
+static int __init mchp_pit64b_dt_init(struct device_node *node)
+{
+	static int inits;
+
+	switch (inits++) {
+	case 0:
+		/* 1st request, register clockevent. */
+		return mchp_pit64b_dt_init_timer(node, true);
+	case 1:
+		/* 2nd request, register clocksource. */
+		return mchp_pit64b_dt_init_timer(node, false);
+	}
+
+	/* The rest, don't care. */
+	return -EINVAL;
+}
+
+TIMER_OF_DECLARE(mchp_pit64b, "microchip,sam9x60-pit64b", mchp_pit64b_dt_init);
diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c
index 11ff701ff4bb..572da477c6d3 100644
--- a/drivers/clocksource/timer-of.c
+++ b/drivers/clocksource/timer-of.c
@@ -57,8 +57,8 @@ static __init int timer_of_irq_init(struct device_node *np,
 	if (of_irq->name) {
 		of_irq->irq = ret = of_irq_get_byname(np, of_irq->name);
 		if (ret < 0) {
-			pr_err("Failed to get interrupt %s for %s\n",
-			       of_irq->name, np->full_name);
+			pr_err("Failed to get interrupt %s for %pOF\n",
+			       of_irq->name, np);
 			return ret;
 		}
 	} else	{
@@ -192,7 +192,7 @@ int __init timer_of_init(struct device_node *np, struct timer_of *to)
 	}
 
 	if (!to->clkevt.name)
-		to->clkevt.name = np->name;
+		to->clkevt.name = np->full_name;
 
 	to->np = np;
 
diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index 470c7ef02ea4..c4f15c4068c0 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -3,9 +3,9 @@
  * Copyright (C) 2012 Regents of the University of California
  * Copyright (C) 2017 SiFive
  *
- * All RISC-V systems have a timer attached to every hart.  These timers can be
- * read from the "time" and "timeh" CSRs, and can use the SBI to setup
- * events.
+ * All RISC-V systems have a timer attached to every hart.  These timers can
+ * either be read from the "time" and "timeh" CSRs, and can use the SBI to
+ * setup events, or directly accessed using MMIO registers.
  */
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
@@ -13,14 +13,29 @@
 #include <linux/delay.h>
 #include <linux/irq.h>
 #include <linux/sched_clock.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <asm/smp.h>
 #include <asm/sbi.h>
 
+u64 __iomem *riscv_time_cmp;
+u64 __iomem *riscv_time_val;
+
+static inline void mmio_set_timer(u64 val)
+{
+	void __iomem *r;
+
+	r = riscv_time_cmp + cpuid_to_hartid_map(smp_processor_id());
+	writeq_relaxed(val, r);
+}
+
 static int riscv_clock_next_event(unsigned long delta,
 		struct clock_event_device *ce)
 {
-	csr_set(sie, SIE_STIE);
-	sbi_set_timer(get_cycles64() + delta);
+	csr_set(CSR_IE, IE_TIE);
+	if (IS_ENABLED(CONFIG_RISCV_SBI))
+		sbi_set_timer(get_cycles64() + delta);
+	else
+		mmio_set_timer(get_cycles64() + delta);
 	return 0;
 }
 
@@ -41,7 +56,7 @@ static unsigned long long riscv_clocksource_rdtime(struct clocksource *cs)
 	return get_cycles64();
 }
 
-static u64 riscv_sched_clock(void)
+static u64 notrace riscv_sched_clock(void)
 {
 	return get_cycles64();
 }
@@ -61,13 +76,13 @@ static int riscv_timer_starting_cpu(unsigned int cpu)
 	ce->cpumask = cpumask_of(cpu);
 	clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fffffff);
 
-	csr_set(sie, SIE_STIE);
+	csr_set(CSR_IE, IE_TIE);
 	return 0;
 }
 
 static int riscv_timer_dying_cpu(unsigned int cpu)
 {
-	csr_clear(sie, SIE_STIE);
+	csr_clear(CSR_IE, IE_TIE);
 	return 0;
 }
 
@@ -76,7 +91,7 @@ void riscv_timer_interrupt(void)
 {
 	struct clock_event_device *evdev = this_cpu_ptr(&riscv_clock_event);
 
-	csr_clear(sie, SIE_STIE);
+	csr_clear(CSR_IE, IE_TIE);
 	evdev->event_handler(evdev);
 }
 
diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index 5394d9dbdfbc..269a994d6a99 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -780,7 +780,6 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
 {
 	unsigned long flags;
 	struct omap_dm_timer *timer;
-	struct resource *mem, *irq;
 	struct device *dev = &pdev->dev;
 	const struct dmtimer_platform_data *pdata;
 	int ret;
@@ -796,24 +795,16 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (unlikely(!irq)) {
-		dev_err(dev, "%s: no IRQ resource.\n", __func__);
-		return -ENODEV;
-	}
-
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (unlikely(!mem)) {
-		dev_err(dev, "%s: no memory resource.\n", __func__);
-		return -ENODEV;
-	}
-
 	timer = devm_kzalloc(dev, sizeof(*timer), GFP_KERNEL);
 	if (!timer)
 		return  -ENOMEM;
 
+	timer->irq = platform_get_irq(pdev, 0);
+	if (timer->irq < 0)
+		return timer->irq;
+
 	timer->fclk = ERR_PTR(-ENODEV);
-	timer->io_base = devm_ioremap_resource(dev, mem);
+	timer->io_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(timer->io_base))
 		return PTR_ERR(timer->io_base);
 
@@ -836,7 +827,6 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
 	if (pdata)
 		timer->errata = pdata->timer_errata;
 
-	timer->irq = irq->start;
 	timer->pdev = pdev;
 
 	pm_runtime_enable(dev);
diff --git a/drivers/counter/104-quad-8.c b/drivers/counter/104-quad-8.c
index 00b113f4b958..17e67a84777d 100644
--- a/drivers/counter/104-quad-8.c
+++ b/drivers/counter/104-quad-8.c
@@ -562,11 +562,10 @@ static const struct iio_chan_spec quad8_channels[] = {
 };
 
 static int quad8_signal_read(struct counter_device *counter,
-	struct counter_signal *signal, struct counter_signal_read_value *val)
+	struct counter_signal *signal, enum counter_signal_value *val)
 {
 	const struct quad8_iio *const priv = counter->priv;
 	unsigned int state;
-	enum counter_signal_level level;
 
 	/* Only Index signal levels can be read */
 	if (signal->id < 16)
@@ -575,22 +574,19 @@ static int quad8_signal_read(struct counter_device *counter,
 	state = inb(priv->base + QUAD8_REG_INDEX_INPUT_LEVELS)
 		& BIT(signal->id - 16);
 
-	level = (state) ? COUNTER_SIGNAL_LEVEL_HIGH : COUNTER_SIGNAL_LEVEL_LOW;
-
-	counter_signal_read_value_set(val, COUNTER_SIGNAL_LEVEL, &level);
+	*val = (state) ? COUNTER_SIGNAL_HIGH : COUNTER_SIGNAL_LOW;
 
 	return 0;
 }
 
 static int quad8_count_read(struct counter_device *counter,
-	struct counter_count *count, struct counter_count_read_value *val)
+	struct counter_count *count, unsigned long *val)
 {
 	const struct quad8_iio *const priv = counter->priv;
 	const int base_offset = priv->base + 2 * count->id;
 	unsigned int flags;
 	unsigned int borrow;
 	unsigned int carry;
-	unsigned long position;
 	int i;
 
 	flags = inb(base_offset + 1);
@@ -598,36 +594,27 @@ static int quad8_count_read(struct counter_device *counter,
 	carry = !!(flags & QUAD8_FLAG_CT);
 
 	/* Borrow XOR Carry effectively doubles count range */
-	position = (unsigned long)(borrow ^ carry) << 24;
+	*val = (unsigned long)(borrow ^ carry) << 24;
 
 	/* Reset Byte Pointer; transfer Counter to Output Latch */
 	outb(QUAD8_CTR_RLD | QUAD8_RLD_RESET_BP | QUAD8_RLD_CNTR_OUT,
 	     base_offset + 1);
 
 	for (i = 0; i < 3; i++)
-		position |= (unsigned long)inb(base_offset) << (8 * i);
-
-	counter_count_read_value_set(val, COUNTER_COUNT_POSITION, &position);
+		*val |= (unsigned long)inb(base_offset) << (8 * i);
 
 	return 0;
 }
 
 static int quad8_count_write(struct counter_device *counter,
-	struct counter_count *count, struct counter_count_write_value *val)
+	struct counter_count *count, unsigned long val)
 {
 	const struct quad8_iio *const priv = counter->priv;
 	const int base_offset = priv->base + 2 * count->id;
-	int err;
-	unsigned long position;
 	int i;
 
-	err = counter_count_write_value_get(&position, COUNTER_COUNT_POSITION,
-					    val);
-	if (err)
-		return err;
-
 	/* Only 24-bit values are supported */
-	if (position > 0xFFFFFF)
+	if (val > 0xFFFFFF)
 		return -EINVAL;
 
 	/* Reset Byte Pointer */
@@ -635,7 +622,7 @@ static int quad8_count_write(struct counter_device *counter,
 
 	/* Counter can only be set via Preset Register */
 	for (i = 0; i < 3; i++)
-		outb(position >> (8 * i), base_offset);
+		outb(val >> (8 * i), base_offset);
 
 	/* Transfer Preset Register to Counter */
 	outb(QUAD8_CTR_RLD | QUAD8_RLD_PRESET_CNTR, base_offset + 1);
@@ -644,9 +631,9 @@ static int quad8_count_write(struct counter_device *counter,
 	outb(QUAD8_CTR_RLD | QUAD8_RLD_RESET_BP, base_offset + 1);
 
 	/* Set Preset Register back to original value */
-	position = priv->preset[count->id];
+	val = priv->preset[count->id];
 	for (i = 0; i < 3; i++)
-		outb(position >> (8 * i), base_offset);
+		outb(val >> (8 * i), base_offset);
 
 	/* Reset Borrow, Carry, Compare, and Sign flags */
 	outb(QUAD8_CTR_RLD | QUAD8_RLD_RESET_FLAGS, base_offset + 1);
diff --git a/drivers/counter/Kconfig b/drivers/counter/Kconfig
index 2967d0a9ff91..c80fa76bb531 100644
--- a/drivers/counter/Kconfig
+++ b/drivers/counter/Kconfig
@@ -49,6 +49,17 @@ config STM32_LPTIMER_CNT
 	  To compile this driver as a module, choose M here: the
 	  module will be called stm32-lptimer-cnt.
 
+config TI_EQEP
+	tristate "TI eQEP counter driver"
+	depends on (SOC_AM33XX || COMPILE_TEST)
+	select REGMAP_MMIO
+	help
+	  Select this option to enable the Texas Instruments Enhanced Quadrature
+	  Encoder Pulse (eQEP) counter driver.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called ti-eqep.
+
 config FTM_QUADDEC
 	tristate "Flex Timer Module Quadrature decoder driver"
 	depends on HAS_IOMEM && OF
diff --git a/drivers/counter/Makefile b/drivers/counter/Makefile
index 40d35522937d..55142d1f4c43 100644
--- a/drivers/counter/Makefile
+++ b/drivers/counter/Makefile
@@ -8,4 +8,5 @@ obj-$(CONFIG_COUNTER) += counter.o
 obj-$(CONFIG_104_QUAD_8)	+= 104-quad-8.o
 obj-$(CONFIG_STM32_TIMER_CNT)	+= stm32-timer-cnt.o
 obj-$(CONFIG_STM32_LPTIMER_CNT)	+= stm32-lptimer-cnt.o
+obj-$(CONFIG_TI_EQEP)		+= ti-eqep.o
 obj-$(CONFIG_FTM_QUADDEC)	+= ftm-quaddec.o
diff --git a/drivers/counter/counter.c b/drivers/counter/counter.c
index 106bc7180cd8..6a683d086008 100644
--- a/drivers/counter/counter.c
+++ b/drivers/counter/counter.c
@@ -220,86 +220,6 @@ ssize_t counter_device_enum_available_read(struct counter_device *counter,
 }
 EXPORT_SYMBOL_GPL(counter_device_enum_available_read);
 
-static const char *const counter_signal_level_str[] = {
-	[COUNTER_SIGNAL_LEVEL_LOW] = "low",
-	[COUNTER_SIGNAL_LEVEL_HIGH] = "high"
-};
-
-/**
- * counter_signal_read_value_set - set counter_signal_read_value data
- * @val:	counter_signal_read_value structure to set
- * @type:	property Signal data represents
- * @data:	Signal data
- *
- * This function sets an opaque counter_signal_read_value structure with the
- * provided Signal data.
- */
-void counter_signal_read_value_set(struct counter_signal_read_value *const val,
-				   const enum counter_signal_value_type type,
-				   void *const data)
-{
-	if (type == COUNTER_SIGNAL_LEVEL)
-		val->len = sprintf(val->buf, "%s\n",
-				   counter_signal_level_str[*(enum counter_signal_level *)data]);
-	else
-		val->len = 0;
-}
-EXPORT_SYMBOL_GPL(counter_signal_read_value_set);
-
-/**
- * counter_count_read_value_set - set counter_count_read_value data
- * @val:	counter_count_read_value structure to set
- * @type:	property Count data represents
- * @data:	Count data
- *
- * This function sets an opaque counter_count_read_value structure with the
- * provided Count data.
- */
-void counter_count_read_value_set(struct counter_count_read_value *const val,
-				  const enum counter_count_value_type type,
-				  void *const data)
-{
-	switch (type) {
-	case COUNTER_COUNT_POSITION:
-		val->len = sprintf(val->buf, "%lu\n", *(unsigned long *)data);
-		break;
-	default:
-		val->len = 0;
-	}
-}
-EXPORT_SYMBOL_GPL(counter_count_read_value_set);
-
-/**
- * counter_count_write_value_get - get counter_count_write_value data
- * @data:	Count data
- * @type:	property Count data represents
- * @val:	counter_count_write_value structure containing data
- *
- * This function extracts Count data from the provided opaque
- * counter_count_write_value structure and stores it at the address provided by
- * @data.
- *
- * RETURNS:
- * 0 on success, negative error number on failure.
- */
-int counter_count_write_value_get(void *const data,
-				  const enum counter_count_value_type type,
-				  const struct counter_count_write_value *const val)
-{
-	int err;
-
-	switch (type) {
-	case COUNTER_COUNT_POSITION:
-		err = kstrtoul(val->buf, 0, data);
-		if (err)
-			return err;
-		break;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(counter_count_write_value_get);
-
 struct counter_attr_parm {
 	struct counter_device_attr_group *group;
 	const char *prefix;
@@ -369,6 +289,11 @@ struct counter_signal_unit {
 	struct counter_signal *signal;
 };
 
+static const char *const counter_signal_value_str[] = {
+	[COUNTER_SIGNAL_LOW] = "low",
+	[COUNTER_SIGNAL_HIGH] = "high"
+};
+
 static ssize_t counter_signal_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
@@ -377,13 +302,13 @@ static ssize_t counter_signal_show(struct device *dev,
 	const struct counter_signal_unit *const component = devattr->component;
 	struct counter_signal *const signal = component->signal;
 	int err;
-	struct counter_signal_read_value val = { .buf = buf };
+	enum counter_signal_value val;
 
 	err = counter->ops->signal_read(counter, signal, &val);
 	if (err)
 		return err;
 
-	return val.len;
+	return sprintf(buf, "%s\n", counter_signal_value_str[val]);
 }
 
 struct counter_name_unit {
@@ -788,13 +713,13 @@ static ssize_t counter_count_show(struct device *dev,
 	const struct counter_count_unit *const component = devattr->component;
 	struct counter_count *const count = component->count;
 	int err;
-	struct counter_count_read_value val = { .buf = buf };
+	unsigned long val;
 
 	err = counter->ops->count_read(counter, count, &val);
 	if (err)
 		return err;
 
-	return val.len;
+	return sprintf(buf, "%lu\n", val);
 }
 
 static ssize_t counter_count_store(struct device *dev,
@@ -806,9 +731,13 @@ static ssize_t counter_count_store(struct device *dev,
 	const struct counter_count_unit *const component = devattr->component;
 	struct counter_count *const count = component->count;
 	int err;
-	struct counter_count_write_value val = { .buf = buf };
+	unsigned long val;
+
+	err = kstrtoul(buf, 0, &val);
+	if (err)
+		return err;
 
-	err = counter->ops->count_write(counter, count, &val);
+	err = counter->ops->count_write(counter, count, val);
 	if (err)
 		return err;
 
diff --git a/drivers/counter/ftm-quaddec.c b/drivers/counter/ftm-quaddec.c
index 4046aa9f9234..c2b3fdfd8b77 100644
--- a/drivers/counter/ftm-quaddec.c
+++ b/drivers/counter/ftm-quaddec.c
@@ -178,31 +178,25 @@ static const enum counter_count_function ftm_quaddec_count_functions[] = {
 
 static int ftm_quaddec_count_read(struct counter_device *counter,
 				  struct counter_count *count,
-				  struct counter_count_read_value *val)
+				  unsigned long *val)
 {
 	struct ftm_quaddec *const ftm = counter->priv;
 	uint32_t cntval;
 
 	ftm_read(ftm, FTM_CNT, &cntval);
 
-	counter_count_read_value_set(val, COUNTER_COUNT_POSITION, &cntval);
+	*val = cntval;
 
 	return 0;
 }
 
 static int ftm_quaddec_count_write(struct counter_device *counter,
 				   struct counter_count *count,
-				   struct counter_count_write_value *val)
+				   const unsigned long val)
 {
 	struct ftm_quaddec *const ftm = counter->priv;
-	u32 cnt;
-	int err;
 
-	err = counter_count_write_value_get(&cnt, COUNTER_COUNT_POSITION, val);
-	if (err)
-		return err;
-
-	if (cnt != 0) {
+	if (val != 0) {
 		dev_warn(&ftm->pdev->dev, "Can only accept '0' as new counter value\n");
 		return -EINVAL;
 	}
diff --git a/drivers/counter/stm32-lptimer-cnt.c b/drivers/counter/stm32-lptimer-cnt.c
index bbc930a5962c..8e276eb655f5 100644
--- a/drivers/counter/stm32-lptimer-cnt.c
+++ b/drivers/counter/stm32-lptimer-cnt.c
@@ -347,7 +347,7 @@ static const struct iio_chan_spec stm32_lptim_cnt_channels = {
 };
 
 /**
- * stm32_lptim_cnt_function - enumerates stm32 LPTimer counter & encoder modes
+ * enum stm32_lptim_cnt_function - enumerates LPTimer counter & encoder modes
  * @STM32_LPTIM_COUNTER_INCREASE: up count on IN1 rising, falling or both edges
  * @STM32_LPTIM_ENCODER_BOTH_EDGE: count on both edges (IN1 & IN2 quadrature)
  */
@@ -377,8 +377,7 @@ static enum counter_synapse_action stm32_lptim_cnt_synapse_actions[] = {
 };
 
 static int stm32_lptim_cnt_read(struct counter_device *counter,
-				struct counter_count *count,
-				struct counter_count_read_value *val)
+				struct counter_count *count, unsigned long *val)
 {
 	struct stm32_lptim_cnt *const priv = counter->priv;
 	u32 cnt;
@@ -388,7 +387,7 @@ static int stm32_lptim_cnt_read(struct counter_device *counter,
 	if (ret)
 		return ret;
 
-	counter_count_read_value_set(val, COUNTER_COUNT_POSITION, &cnt);
+	*val = cnt;
 
 	return 0;
 }
diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c
index 644ba18a72ad..3eafccec3beb 100644
--- a/drivers/counter/stm32-timer-cnt.c
+++ b/drivers/counter/stm32-timer-cnt.c
@@ -28,7 +28,7 @@ struct stm32_timer_cnt {
 };
 
 /**
- * stm32_count_function - enumerates stm32 timer counter encoder modes
+ * enum stm32_count_function - enumerates stm32 timer counter encoder modes
  * @STM32_COUNT_SLAVE_MODE_DISABLED: counts on internal clock when CEN=1
  * @STM32_COUNT_ENCODER_MODE_1: counts TI1FP1 edges, depending on TI2FP2 level
  * @STM32_COUNT_ENCODER_MODE_2: counts TI2FP2 edges, depending on TI1FP1 level
@@ -48,34 +48,27 @@ static enum counter_count_function stm32_count_functions[] = {
 };
 
 static int stm32_count_read(struct counter_device *counter,
-			    struct counter_count *count,
-			    struct counter_count_read_value *val)
+			    struct counter_count *count, unsigned long *val)
 {
 	struct stm32_timer_cnt *const priv = counter->priv;
 	u32 cnt;
 
 	regmap_read(priv->regmap, TIM_CNT, &cnt);
-	counter_count_read_value_set(val, COUNTER_COUNT_POSITION, &cnt);
+	*val = cnt;
 
 	return 0;
 }
 
 static int stm32_count_write(struct counter_device *counter,
 			     struct counter_count *count,
-			     struct counter_count_write_value *val)
+			     const unsigned long val)
 {
 	struct stm32_timer_cnt *const priv = counter->priv;
-	u32 cnt;
-	int err;
-
-	err = counter_count_write_value_get(&cnt, COUNTER_COUNT_POSITION, val);
-	if (err)
-		return err;
 
-	if (cnt > priv->ceiling)
+	if (val > priv->ceiling)
 		return -EINVAL;
 
-	return regmap_write(priv->regmap, TIM_CNT, cnt);
+	return regmap_write(priv->regmap, TIM_CNT, val);
 }
 
 static int stm32_count_function_get(struct counter_device *counter,
@@ -219,8 +212,8 @@ static ssize_t stm32_count_enable_write(struct counter_device *counter,
 
 	if (enable) {
 		regmap_read(priv->regmap, TIM_CR1, &cr1);
-			if (!(cr1 & TIM_CR1_CEN))
-				clk_enable(priv->clk);
+		if (!(cr1 & TIM_CR1_CEN))
+			clk_enable(priv->clk);
 
 		regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN,
 				   TIM_CR1_CEN);
diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c
new file mode 100644
index 000000000000..1ff07faef27f
--- /dev/null
+++ b/drivers/counter/ti-eqep.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 David Lechner <david@lechnology.com>
+ *
+ * Counter driver for Texas Instruments Enhanced Quadrature Encoder Pulse (eQEP)
+ */
+
+#include <linux/bitops.h>
+#include <linux/counter.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+
+/* 32-bit registers */
+#define QPOSCNT		0x0
+#define QPOSINIT	0x4
+#define QPOSMAX		0x8
+#define QPOSCMP		0xc
+#define QPOSILAT	0x10
+#define QPOSSLAT	0x14
+#define QPOSLAT		0x18
+#define QUTMR		0x1c
+#define QUPRD		0x20
+
+/* 16-bit registers */
+#define QWDTMR		0x0	/* 0x24 */
+#define QWDPRD		0x2	/* 0x26 */
+#define QDECCTL		0x4	/* 0x28 */
+#define QEPCTL		0x6	/* 0x2a */
+#define QCAPCTL		0x8	/* 0x2c */
+#define QPOSCTL		0xa	/* 0x2e */
+#define QEINT		0xc	/* 0x30 */
+#define QFLG		0xe	/* 0x32 */
+#define QCLR		0x10	/* 0x34 */
+#define QFRC		0x12	/* 0x36 */
+#define QEPSTS		0x14	/* 0x38 */
+#define QCTMR		0x16	/* 0x3a */
+#define QCPRD		0x18	/* 0x3c */
+#define QCTMRLAT	0x1a	/* 0x3e */
+#define QCPRDLAT	0x1c	/* 0x40 */
+
+#define QDECCTL_QSRC_SHIFT	14
+#define QDECCTL_QSRC		GENMASK(15, 14)
+#define QDECCTL_SOEN		BIT(13)
+#define QDECCTL_SPSEL		BIT(12)
+#define QDECCTL_XCR		BIT(11)
+#define QDECCTL_SWAP		BIT(10)
+#define QDECCTL_IGATE		BIT(9)
+#define QDECCTL_QAP		BIT(8)
+#define QDECCTL_QBP		BIT(7)
+#define QDECCTL_QIP		BIT(6)
+#define QDECCTL_QSP		BIT(5)
+
+#define QEPCTL_FREE_SOFT	GENMASK(15, 14)
+#define QEPCTL_PCRM		GENMASK(13, 12)
+#define QEPCTL_SEI		GENMASK(11, 10)
+#define QEPCTL_IEI		GENMASK(9, 8)
+#define QEPCTL_SWI		BIT(7)
+#define QEPCTL_SEL		BIT(6)
+#define QEPCTL_IEL		GENMASK(5, 4)
+#define QEPCTL_PHEN		BIT(3)
+#define QEPCTL_QCLM		BIT(2)
+#define QEPCTL_UTE		BIT(1)
+#define QEPCTL_WDE		BIT(0)
+
+/* EQEP Inputs */
+enum {
+	TI_EQEP_SIGNAL_QEPA,	/* QEPA/XCLK */
+	TI_EQEP_SIGNAL_QEPB,	/* QEPB/XDIR */
+};
+
+/* Position Counter Input Modes */
+enum {
+	TI_EQEP_COUNT_FUNC_QUAD_COUNT,
+	TI_EQEP_COUNT_FUNC_DIR_COUNT,
+	TI_EQEP_COUNT_FUNC_UP_COUNT,
+	TI_EQEP_COUNT_FUNC_DOWN_COUNT,
+};
+
+enum {
+	TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES,
+	TI_EQEP_SYNAPSE_ACTION_RISING_EDGE,
+	TI_EQEP_SYNAPSE_ACTION_NONE,
+};
+
+struct ti_eqep_cnt {
+	struct counter_device counter;
+	struct regmap *regmap32;
+	struct regmap *regmap16;
+};
+
+static int ti_eqep_count_read(struct counter_device *counter,
+			      struct counter_count *count, unsigned long *val)
+{
+	struct ti_eqep_cnt *priv = counter->priv;
+	u32 cnt;
+
+	regmap_read(priv->regmap32, QPOSCNT, &cnt);
+	*val = cnt;
+
+	return 0;
+}
+
+static int ti_eqep_count_write(struct counter_device *counter,
+			       struct counter_count *count, unsigned long val)
+{
+	struct ti_eqep_cnt *priv = counter->priv;
+	u32 max;
+
+	regmap_read(priv->regmap32, QPOSMAX, &max);
+	if (val > max)
+		return -EINVAL;
+
+	return regmap_write(priv->regmap32, QPOSCNT, val);
+}
+
+static int ti_eqep_function_get(struct counter_device *counter,
+				struct counter_count *count, size_t *function)
+{
+	struct ti_eqep_cnt *priv = counter->priv;
+	u32 qdecctl;
+
+	regmap_read(priv->regmap16, QDECCTL, &qdecctl);
+	*function = (qdecctl & QDECCTL_QSRC) >> QDECCTL_QSRC_SHIFT;
+
+	return 0;
+}
+
+static int ti_eqep_function_set(struct counter_device *counter,
+				struct counter_count *count, size_t function)
+{
+	struct ti_eqep_cnt *priv = counter->priv;
+
+	return regmap_write_bits(priv->regmap16, QDECCTL, QDECCTL_QSRC,
+				 function << QDECCTL_QSRC_SHIFT);
+}
+
+static int ti_eqep_action_get(struct counter_device *counter,
+			      struct counter_count *count,
+			      struct counter_synapse *synapse, size_t *action)
+{
+	struct ti_eqep_cnt *priv = counter->priv;
+	size_t function;
+	u32 qdecctl;
+	int err;
+
+	err = ti_eqep_function_get(counter, count, &function);
+	if (err)
+		return err;
+
+	switch (function) {
+	case TI_EQEP_COUNT_FUNC_QUAD_COUNT:
+		/* In quadrature mode, the rising and falling edge of both
+		 * QEPA and QEPB trigger QCLK.
+		 */
+		*action = TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES;
+		break;
+	case TI_EQEP_COUNT_FUNC_DIR_COUNT:
+		/* In direction-count mode only rising edge of QEPA is counted
+		 * and QEPB gives direction.
+		 */
+		switch (synapse->signal->id) {
+		case TI_EQEP_SIGNAL_QEPA:
+			*action = TI_EQEP_SYNAPSE_ACTION_RISING_EDGE;
+			break;
+		default:
+			*action = TI_EQEP_SYNAPSE_ACTION_NONE;
+			break;
+		}
+		break;
+	case TI_EQEP_COUNT_FUNC_UP_COUNT:
+	case TI_EQEP_COUNT_FUNC_DOWN_COUNT:
+		/* In up/down-count modes only QEPA is counted and QEPB is not
+		 * used.
+		 */
+		switch (synapse->signal->id) {
+		case TI_EQEP_SIGNAL_QEPA:
+			err = regmap_read(priv->regmap16, QDECCTL, &qdecctl);
+			if (err)
+				return err;
+
+			if (qdecctl & QDECCTL_XCR)
+				*action = TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES;
+			else
+				*action = TI_EQEP_SYNAPSE_ACTION_RISING_EDGE;
+			break;
+		default:
+			*action = TI_EQEP_SYNAPSE_ACTION_NONE;
+			break;
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static const struct counter_ops ti_eqep_counter_ops = {
+	.count_read	= ti_eqep_count_read,
+	.count_write	= ti_eqep_count_write,
+	.function_get	= ti_eqep_function_get,
+	.function_set	= ti_eqep_function_set,
+	.action_get	= ti_eqep_action_get,
+};
+
+static ssize_t ti_eqep_position_ceiling_read(struct counter_device *counter,
+					     struct counter_count *count,
+					     void *ext_priv, char *buf)
+{
+	struct ti_eqep_cnt *priv = counter->priv;
+	u32 qposmax;
+
+	regmap_read(priv->regmap32, QPOSMAX, &qposmax);
+
+	return sprintf(buf, "%u\n", qposmax);
+}
+
+static ssize_t ti_eqep_position_ceiling_write(struct counter_device *counter,
+					      struct counter_count *count,
+					      void *ext_priv, const char *buf,
+					      size_t len)
+{
+	struct ti_eqep_cnt *priv = counter->priv;
+	int err;
+	u32 res;
+
+	err = kstrtouint(buf, 0, &res);
+	if (err < 0)
+		return err;
+
+	regmap_write(priv->regmap32, QPOSMAX, res);
+
+	return len;
+}
+
+static ssize_t ti_eqep_position_floor_read(struct counter_device *counter,
+					   struct counter_count *count,
+					   void *ext_priv, char *buf)
+{
+	struct ti_eqep_cnt *priv = counter->priv;
+	u32 qposinit;
+
+	regmap_read(priv->regmap32, QPOSINIT, &qposinit);
+
+	return sprintf(buf, "%u\n", qposinit);
+}
+
+static ssize_t ti_eqep_position_floor_write(struct counter_device *counter,
+					    struct counter_count *count,
+					    void *ext_priv, const char *buf,
+					    size_t len)
+{
+	struct ti_eqep_cnt *priv = counter->priv;
+	int err;
+	u32 res;
+
+	err = kstrtouint(buf, 0, &res);
+	if (err < 0)
+		return err;
+
+	regmap_write(priv->regmap32, QPOSINIT, res);
+
+	return len;
+}
+
+static ssize_t ti_eqep_position_enable_read(struct counter_device *counter,
+					    struct counter_count *count,
+					    void *ext_priv, char *buf)
+{
+	struct ti_eqep_cnt *priv = counter->priv;
+	u32 qepctl;
+
+	regmap_read(priv->regmap16, QEPCTL, &qepctl);
+
+	return sprintf(buf, "%u\n", !!(qepctl & QEPCTL_PHEN));
+}
+
+static ssize_t ti_eqep_position_enable_write(struct counter_device *counter,
+					     struct counter_count *count,
+					     void *ext_priv, const char *buf,
+					     size_t len)
+{
+	struct ti_eqep_cnt *priv = counter->priv;
+	int err;
+	bool res;
+
+	err = kstrtobool(buf, &res);
+	if (err < 0)
+		return err;
+
+	regmap_write_bits(priv->regmap16, QEPCTL, QEPCTL_PHEN, res ? -1 : 0);
+
+	return len;
+}
+
+static struct counter_count_ext ti_eqep_position_ext[] = {
+	{
+		.name	= "ceiling",
+		.read	= ti_eqep_position_ceiling_read,
+		.write	= ti_eqep_position_ceiling_write,
+	},
+	{
+		.name	= "floor",
+		.read	= ti_eqep_position_floor_read,
+		.write	= ti_eqep_position_floor_write,
+	},
+	{
+		.name	= "enable",
+		.read	= ti_eqep_position_enable_read,
+		.write	= ti_eqep_position_enable_write,
+	},
+};
+
+static struct counter_signal ti_eqep_signals[] = {
+	[TI_EQEP_SIGNAL_QEPA] = {
+		.id = TI_EQEP_SIGNAL_QEPA,
+		.name = "QEPA"
+	},
+	[TI_EQEP_SIGNAL_QEPB] = {
+		.id = TI_EQEP_SIGNAL_QEPB,
+		.name = "QEPB"
+	},
+};
+
+static const enum counter_count_function ti_eqep_position_functions[] = {
+	[TI_EQEP_COUNT_FUNC_QUAD_COUNT]	= COUNTER_COUNT_FUNCTION_QUADRATURE_X4,
+	[TI_EQEP_COUNT_FUNC_DIR_COUNT]	= COUNTER_COUNT_FUNCTION_PULSE_DIRECTION,
+	[TI_EQEP_COUNT_FUNC_UP_COUNT]	= COUNTER_COUNT_FUNCTION_INCREASE,
+	[TI_EQEP_COUNT_FUNC_DOWN_COUNT]	= COUNTER_COUNT_FUNCTION_DECREASE,
+};
+
+static const enum counter_synapse_action ti_eqep_position_synapse_actions[] = {
+	[TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES]	= COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+	[TI_EQEP_SYNAPSE_ACTION_RISING_EDGE]	= COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+	[TI_EQEP_SYNAPSE_ACTION_NONE]		= COUNTER_SYNAPSE_ACTION_NONE,
+};
+
+static struct counter_synapse ti_eqep_position_synapses[] = {
+	{
+		.actions_list	= ti_eqep_position_synapse_actions,
+		.num_actions	= ARRAY_SIZE(ti_eqep_position_synapse_actions),
+		.signal		= &ti_eqep_signals[TI_EQEP_SIGNAL_QEPA],
+	},
+	{
+		.actions_list	= ti_eqep_position_synapse_actions,
+		.num_actions	= ARRAY_SIZE(ti_eqep_position_synapse_actions),
+		.signal		= &ti_eqep_signals[TI_EQEP_SIGNAL_QEPB],
+	},
+};
+
+static struct counter_count ti_eqep_counts[] = {
+	{
+		.id		= 0,
+		.name		= "QPOSCNT",
+		.functions_list	= ti_eqep_position_functions,
+		.num_functions	= ARRAY_SIZE(ti_eqep_position_functions),
+		.synapses	= ti_eqep_position_synapses,
+		.num_synapses	= ARRAY_SIZE(ti_eqep_position_synapses),
+		.ext		= ti_eqep_position_ext,
+		.num_ext	= ARRAY_SIZE(ti_eqep_position_ext),
+	},
+};
+
+static const struct regmap_config ti_eqep_regmap32_config = {
+	.name = "32-bit",
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = 0x24,
+};
+
+static const struct regmap_config ti_eqep_regmap16_config = {
+	.name = "16-bit",
+	.reg_bits = 16,
+	.val_bits = 16,
+	.reg_stride = 2,
+	.max_register = 0x1e,
+};
+
+static int ti_eqep_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ti_eqep_cnt *priv;
+	void __iomem *base;
+	int err;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	priv->regmap32 = devm_regmap_init_mmio(dev, base,
+					       &ti_eqep_regmap32_config);
+	if (IS_ERR(priv->regmap32))
+		return PTR_ERR(priv->regmap32);
+
+	priv->regmap16 = devm_regmap_init_mmio(dev, base + 0x24,
+					       &ti_eqep_regmap16_config);
+	if (IS_ERR(priv->regmap16))
+		return PTR_ERR(priv->regmap16);
+
+	priv->counter.name = dev_name(dev);
+	priv->counter.parent = dev;
+	priv->counter.ops = &ti_eqep_counter_ops;
+	priv->counter.counts = ti_eqep_counts;
+	priv->counter.num_counts = ARRAY_SIZE(ti_eqep_counts);
+	priv->counter.signals = ti_eqep_signals;
+	priv->counter.num_signals = ARRAY_SIZE(ti_eqep_signals);
+	priv->counter.priv = priv;
+
+	platform_set_drvdata(pdev, priv);
+
+	/*
+	 * Need to make sure power is turned on. On AM33xx, this comes from the
+	 * parent PWMSS bus driver. On AM17xx, this comes from the PSC power
+	 * domain.
+	 */
+	pm_runtime_enable(dev);
+	pm_runtime_get_sync(dev);
+
+	err = counter_register(&priv->counter);
+	if (err < 0) {
+		pm_runtime_put_sync(dev);
+		pm_runtime_disable(dev);
+		return err;
+	}
+
+	return 0;
+}
+
+static int ti_eqep_remove(struct platform_device *pdev)
+{
+	struct ti_eqep_cnt *priv = platform_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+
+	counter_unregister(&priv->counter);
+	pm_runtime_put_sync(dev),
+	pm_runtime_disable(dev);
+
+	return 0;
+}
+
+static const struct of_device_id ti_eqep_of_match[] = {
+	{ .compatible = "ti,am3352-eqep", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ti_eqep_of_match);
+
+static struct platform_driver ti_eqep_driver = {
+	.probe = ti_eqep_probe,
+	.remove = ti_eqep_remove,
+	.driver = {
+		.name = "ti-eqep-cnt",
+		.of_match_table = ti_eqep_of_match,
+	},
+};
+module_platform_driver(ti_eqep_driver);
+
+MODULE_AUTHOR("David Lechner <david@lechnology.com>");
+MODULE_DESCRIPTION("TI eQEP counter driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index a905796f7f85..3858d86cf409 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -49,14 +49,6 @@ config ARM_ARMADA_8K_CPUFREQ
 
 	  If in doubt, say N.
 
-# big LITTLE core layer and glue drivers
-config ARM_BIG_LITTLE_CPUFREQ
-	tristate "Generic ARM big LITTLE CPUfreq driver"
-	depends on ARM_CPU_TOPOLOGY && HAVE_CLK
-	select PM_OPP
-	help
-	  This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
-
 config ARM_SCPI_CPUFREQ
 	tristate "SCPI based CPUfreq driver"
 	depends on ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI
@@ -69,7 +61,9 @@ config ARM_SCPI_CPUFREQ
 
 config ARM_VEXPRESS_SPC_CPUFREQ
 	tristate "Versatile Express SPC based CPUfreq driver"
-	depends on ARM_BIG_LITTLE_CPUFREQ && ARCH_VEXPRESS_SPC
+	depends on ARM_CPU_TOPOLOGY && HAVE_CLK
+	depends on ARCH_VEXPRESS_SPC
+	select PM_OPP
 	help
 	  This add the CPUfreq driver support for Versatile Express
 	  big.LITTLE platforms using SPC for power management.
diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc
index 35b4f700f054..58151ca56695 100644
--- a/drivers/cpufreq/Kconfig.powerpc
+++ b/drivers/cpufreq/Kconfig.powerpc
@@ -48,9 +48,9 @@ config PPC_PASEMI_CPUFREQ
 	  PWRficient processors.
 
 config POWERNV_CPUFREQ
-       tristate "CPU frequency scaling for IBM POWERNV platform"
-       depends on PPC_POWERNV
-       default y
-       help
+	tristate "CPU frequency scaling for IBM POWERNV platform"
+	depends on PPC_POWERNV
+	default y
+	help
 	 This adds support for CPU frequency switching on IBM POWERNV
 	 platform
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index dfa6457deaf6..a6528388952e 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -4,17 +4,17 @@
 #
 
 config X86_INTEL_PSTATE
-       bool "Intel P state control"
-       depends on X86
-       select ACPI_PROCESSOR if ACPI
-       select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO
-       help
-          This driver provides a P state for Intel core processors.
+	bool "Intel P state control"
+	depends on X86
+	select ACPI_PROCESSOR if ACPI
+	select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO
+	help
+	  This driver provides a P state for Intel core processors.
 	  The driver implements an internal governor and will become
-          the scaling driver and governor for Sandy bridge processors.
+	  the scaling driver and governor for Sandy bridge processors.
 
 	  When this driver is enabled it will become the preferred
-          scaling driver for Sandy bridge processors.
+	  scaling driver for Sandy bridge processors.
 
 	  If in doubt, say N.
 
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 9a9f5ccd13d9..f6670c4abbb0 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -47,8 +47,6 @@ obj-$(CONFIG_X86_SFI_CPUFREQ)		+= sfi-cpufreq.o
 
 ##################################################################################
 # ARM SoC drivers
-obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ)	+= arm_big_little.o
-
 obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ)	+= armada-37xx-cpufreq.o
 obj-$(CONFIG_ARM_ARMADA_8K_CPUFREQ)	+= armada-8k-cpufreq.o
 obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ)	+= brcmstb-avs-cpufreq.o
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
deleted file mode 100644
index 7fe52fcddcf1..000000000000
--- a/drivers/cpufreq/arm_big_little.c
+++ /dev/null
@@ -1,658 +0,0 @@
-/*
- * ARM big.LITTLE Platforms CPUFreq support
- *
- * Copyright (C) 2013 ARM Ltd.
- * Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
- *
- * Copyright (C) 2013 Linaro.
- * Viresh Kumar <viresh.kumar@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/clk.h>
-#include <linux/cpu.h>
-#include <linux/cpufreq.h>
-#include <linux/cpumask.h>
-#include <linux/cpu_cooling.h>
-#include <linux/export.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/of_platform.h>
-#include <linux/pm_opp.h>
-#include <linux/slab.h>
-#include <linux/topology.h>
-#include <linux/types.h>
-
-#include "arm_big_little.h"
-
-/* Currently we support only two clusters */
-#define A15_CLUSTER	0
-#define A7_CLUSTER	1
-#define MAX_CLUSTERS	2
-
-#ifdef CONFIG_BL_SWITCHER
-#include <asm/bL_switcher.h>
-static bool bL_switching_enabled;
-#define is_bL_switching_enabled()	bL_switching_enabled
-#define set_switching_enabled(x)	(bL_switching_enabled = (x))
-#else
-#define is_bL_switching_enabled()	false
-#define set_switching_enabled(x)	do { } while (0)
-#define bL_switch_request(...)		do { } while (0)
-#define bL_switcher_put_enabled()	do { } while (0)
-#define bL_switcher_get_enabled()	do { } while (0)
-#endif
-
-#define ACTUAL_FREQ(cluster, freq)  ((cluster == A7_CLUSTER) ? freq << 1 : freq)
-#define VIRT_FREQ(cluster, freq)    ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
-
-static struct thermal_cooling_device *cdev[MAX_CLUSTERS];
-static const struct cpufreq_arm_bL_ops *arm_bL_ops;
-static struct clk *clk[MAX_CLUSTERS];
-static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
-static atomic_t cluster_usage[MAX_CLUSTERS + 1];
-
-static unsigned int clk_big_min;	/* (Big) clock frequencies */
-static unsigned int clk_little_max;	/* Maximum clock frequency (Little) */
-
-static DEFINE_PER_CPU(unsigned int, physical_cluster);
-static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq);
-
-static struct mutex cluster_lock[MAX_CLUSTERS];
-
-static inline int raw_cpu_to_cluster(int cpu)
-{
-	return topology_physical_package_id(cpu);
-}
-
-static inline int cpu_to_cluster(int cpu)
-{
-	return is_bL_switching_enabled() ?
-		MAX_CLUSTERS : raw_cpu_to_cluster(cpu);
-}
-
-static unsigned int find_cluster_maxfreq(int cluster)
-{
-	int j;
-	u32 max_freq = 0, cpu_freq;
-
-	for_each_online_cpu(j) {
-		cpu_freq = per_cpu(cpu_last_req_freq, j);
-
-		if ((cluster == per_cpu(physical_cluster, j)) &&
-				(max_freq < cpu_freq))
-			max_freq = cpu_freq;
-	}
-
-	pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster,
-			max_freq);
-
-	return max_freq;
-}
-
-static unsigned int clk_get_cpu_rate(unsigned int cpu)
-{
-	u32 cur_cluster = per_cpu(physical_cluster, cpu);
-	u32 rate = clk_get_rate(clk[cur_cluster]) / 1000;
-
-	/* For switcher we use virtual A7 clock rates */
-	if (is_bL_switching_enabled())
-		rate = VIRT_FREQ(cur_cluster, rate);
-
-	pr_debug("%s: cpu: %d, cluster: %d, freq: %u\n", __func__, cpu,
-			cur_cluster, rate);
-
-	return rate;
-}
-
-static unsigned int bL_cpufreq_get_rate(unsigned int cpu)
-{
-	if (is_bL_switching_enabled()) {
-		pr_debug("%s: freq: %d\n", __func__, per_cpu(cpu_last_req_freq,
-					cpu));
-
-		return per_cpu(cpu_last_req_freq, cpu);
-	} else {
-		return clk_get_cpu_rate(cpu);
-	}
-}
-
-static unsigned int
-bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
-{
-	u32 new_rate, prev_rate;
-	int ret;
-	bool bLs = is_bL_switching_enabled();
-
-	mutex_lock(&cluster_lock[new_cluster]);
-
-	if (bLs) {
-		prev_rate = per_cpu(cpu_last_req_freq, cpu);
-		per_cpu(cpu_last_req_freq, cpu) = rate;
-		per_cpu(physical_cluster, cpu) = new_cluster;
-
-		new_rate = find_cluster_maxfreq(new_cluster);
-		new_rate = ACTUAL_FREQ(new_cluster, new_rate);
-	} else {
-		new_rate = rate;
-	}
-
-	pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d, freq: %d\n",
-			__func__, cpu, old_cluster, new_cluster, new_rate);
-
-	ret = clk_set_rate(clk[new_cluster], new_rate * 1000);
-	if (!ret) {
-		/*
-		 * FIXME: clk_set_rate hasn't returned an error here however it
-		 * may be that clk_change_rate failed due to hardware or
-		 * firmware issues and wasn't able to report that due to the
-		 * current design of the clk core layer. To work around this
-		 * problem we will read back the clock rate and check it is
-		 * correct. This needs to be removed once clk core is fixed.
-		 */
-		if (clk_get_rate(clk[new_cluster]) != new_rate * 1000)
-			ret = -EIO;
-	}
-
-	if (WARN_ON(ret)) {
-		pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret,
-				new_cluster);
-		if (bLs) {
-			per_cpu(cpu_last_req_freq, cpu) = prev_rate;
-			per_cpu(physical_cluster, cpu) = old_cluster;
-		}
-
-		mutex_unlock(&cluster_lock[new_cluster]);
-
-		return ret;
-	}
-
-	mutex_unlock(&cluster_lock[new_cluster]);
-
-	/* Recalc freq for old cluster when switching clusters */
-	if (old_cluster != new_cluster) {
-		pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d\n",
-				__func__, cpu, old_cluster, new_cluster);
-
-		/* Switch cluster */
-		bL_switch_request(cpu, new_cluster);
-
-		mutex_lock(&cluster_lock[old_cluster]);
-
-		/* Set freq of old cluster if there are cpus left on it */
-		new_rate = find_cluster_maxfreq(old_cluster);
-		new_rate = ACTUAL_FREQ(old_cluster, new_rate);
-
-		if (new_rate) {
-			pr_debug("%s: Updating rate of old cluster: %d, to freq: %d\n",
-					__func__, old_cluster, new_rate);
-
-			if (clk_set_rate(clk[old_cluster], new_rate * 1000))
-				pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n",
-						__func__, ret, old_cluster);
-		}
-		mutex_unlock(&cluster_lock[old_cluster]);
-	}
-
-	return 0;
-}
-
-/* Set clock frequency */
-static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
-		unsigned int index)
-{
-	u32 cpu = policy->cpu, cur_cluster, new_cluster, actual_cluster;
-	unsigned int freqs_new;
-	int ret;
-
-	cur_cluster = cpu_to_cluster(cpu);
-	new_cluster = actual_cluster = per_cpu(physical_cluster, cpu);
-
-	freqs_new = freq_table[cur_cluster][index].frequency;
-
-	if (is_bL_switching_enabled()) {
-		if ((actual_cluster == A15_CLUSTER) &&
-				(freqs_new < clk_big_min)) {
-			new_cluster = A7_CLUSTER;
-		} else if ((actual_cluster == A7_CLUSTER) &&
-				(freqs_new > clk_little_max)) {
-			new_cluster = A15_CLUSTER;
-		}
-	}
-
-	ret = bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs_new);
-
-	if (!ret) {
-		arch_set_freq_scale(policy->related_cpus, freqs_new,
-				    policy->cpuinfo.max_freq);
-	}
-
-	return ret;
-}
-
-static inline u32 get_table_count(struct cpufreq_frequency_table *table)
-{
-	int count;
-
-	for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++)
-		;
-
-	return count;
-}
-
-/* get the minimum frequency in the cpufreq_frequency_table */
-static inline u32 get_table_min(struct cpufreq_frequency_table *table)
-{
-	struct cpufreq_frequency_table *pos;
-	uint32_t min_freq = ~0;
-	cpufreq_for_each_entry(pos, table)
-		if (pos->frequency < min_freq)
-			min_freq = pos->frequency;
-	return min_freq;
-}
-
-/* get the maximum frequency in the cpufreq_frequency_table */
-static inline u32 get_table_max(struct cpufreq_frequency_table *table)
-{
-	struct cpufreq_frequency_table *pos;
-	uint32_t max_freq = 0;
-	cpufreq_for_each_entry(pos, table)
-		if (pos->frequency > max_freq)
-			max_freq = pos->frequency;
-	return max_freq;
-}
-
-static int merge_cluster_tables(void)
-{
-	int i, j, k = 0, count = 1;
-	struct cpufreq_frequency_table *table;
-
-	for (i = 0; i < MAX_CLUSTERS; i++)
-		count += get_table_count(freq_table[i]);
-
-	table = kcalloc(count, sizeof(*table), GFP_KERNEL);
-	if (!table)
-		return -ENOMEM;
-
-	freq_table[MAX_CLUSTERS] = table;
-
-	/* Add in reverse order to get freqs in increasing order */
-	for (i = MAX_CLUSTERS - 1; i >= 0; i--) {
-		for (j = 0; freq_table[i][j].frequency != CPUFREQ_TABLE_END;
-				j++) {
-			table[k].frequency = VIRT_FREQ(i,
-					freq_table[i][j].frequency);
-			pr_debug("%s: index: %d, freq: %d\n", __func__, k,
-					table[k].frequency);
-			k++;
-		}
-	}
-
-	table[k].driver_data = k;
-	table[k].frequency = CPUFREQ_TABLE_END;
-
-	pr_debug("%s: End, table: %p, count: %d\n", __func__, table, k);
-
-	return 0;
-}
-
-static void _put_cluster_clk_and_freq_table(struct device *cpu_dev,
-					    const struct cpumask *cpumask)
-{
-	u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
-
-	if (!freq_table[cluster])
-		return;
-
-	clk_put(clk[cluster]);
-	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
-	if (arm_bL_ops->free_opp_table)
-		arm_bL_ops->free_opp_table(cpumask);
-	dev_dbg(cpu_dev, "%s: cluster: %d\n", __func__, cluster);
-}
-
-static void put_cluster_clk_and_freq_table(struct device *cpu_dev,
-					   const struct cpumask *cpumask)
-{
-	u32 cluster = cpu_to_cluster(cpu_dev->id);
-	int i;
-
-	if (atomic_dec_return(&cluster_usage[cluster]))
-		return;
-
-	if (cluster < MAX_CLUSTERS)
-		return _put_cluster_clk_and_freq_table(cpu_dev, cpumask);
-
-	for_each_present_cpu(i) {
-		struct device *cdev = get_cpu_device(i);
-		if (!cdev) {
-			pr_err("%s: failed to get cpu%d device\n", __func__, i);
-			return;
-		}
-
-		_put_cluster_clk_and_freq_table(cdev, cpumask);
-	}
-
-	/* free virtual table */
-	kfree(freq_table[cluster]);
-}
-
-static int _get_cluster_clk_and_freq_table(struct device *cpu_dev,
-					   const struct cpumask *cpumask)
-{
-	u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
-	int ret;
-
-	if (freq_table[cluster])
-		return 0;
-
-	ret = arm_bL_ops->init_opp_table(cpumask);
-	if (ret) {
-		dev_err(cpu_dev, "%s: init_opp_table failed, cpu: %d, err: %d\n",
-				__func__, cpu_dev->id, ret);
-		goto out;
-	}
-
-	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table[cluster]);
-	if (ret) {
-		dev_err(cpu_dev, "%s: failed to init cpufreq table, cpu: %d, err: %d\n",
-				__func__, cpu_dev->id, ret);
-		goto free_opp_table;
-	}
-
-	clk[cluster] = clk_get(cpu_dev, NULL);
-	if (!IS_ERR(clk[cluster])) {
-		dev_dbg(cpu_dev, "%s: clk: %p & freq table: %p, cluster: %d\n",
-				__func__, clk[cluster], freq_table[cluster],
-				cluster);
-		return 0;
-	}
-
-	dev_err(cpu_dev, "%s: Failed to get clk for cpu: %d, cluster: %d\n",
-			__func__, cpu_dev->id, cluster);
-	ret = PTR_ERR(clk[cluster]);
-	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
-
-free_opp_table:
-	if (arm_bL_ops->free_opp_table)
-		arm_bL_ops->free_opp_table(cpumask);
-out:
-	dev_err(cpu_dev, "%s: Failed to get data for cluster: %d\n", __func__,
-			cluster);
-	return ret;
-}
-
-static int get_cluster_clk_and_freq_table(struct device *cpu_dev,
-					  const struct cpumask *cpumask)
-{
-	u32 cluster = cpu_to_cluster(cpu_dev->id);
-	int i, ret;
-
-	if (atomic_inc_return(&cluster_usage[cluster]) != 1)
-		return 0;
-
-	if (cluster < MAX_CLUSTERS) {
-		ret = _get_cluster_clk_and_freq_table(cpu_dev, cpumask);
-		if (ret)
-			atomic_dec(&cluster_usage[cluster]);
-		return ret;
-	}
-
-	/*
-	 * Get data for all clusters and fill virtual cluster with a merge of
-	 * both
-	 */
-	for_each_present_cpu(i) {
-		struct device *cdev = get_cpu_device(i);
-		if (!cdev) {
-			pr_err("%s: failed to get cpu%d device\n", __func__, i);
-			return -ENODEV;
-		}
-
-		ret = _get_cluster_clk_and_freq_table(cdev, cpumask);
-		if (ret)
-			goto put_clusters;
-	}
-
-	ret = merge_cluster_tables();
-	if (ret)
-		goto put_clusters;
-
-	/* Assuming 2 cluster, set clk_big_min and clk_little_max */
-	clk_big_min = get_table_min(freq_table[0]);
-	clk_little_max = VIRT_FREQ(1, get_table_max(freq_table[1]));
-
-	pr_debug("%s: cluster: %d, clk_big_min: %d, clk_little_max: %d\n",
-			__func__, cluster, clk_big_min, clk_little_max);
-
-	return 0;
-
-put_clusters:
-	for_each_present_cpu(i) {
-		struct device *cdev = get_cpu_device(i);
-		if (!cdev) {
-			pr_err("%s: failed to get cpu%d device\n", __func__, i);
-			return -ENODEV;
-		}
-
-		_put_cluster_clk_and_freq_table(cdev, cpumask);
-	}
-
-	atomic_dec(&cluster_usage[cluster]);
-
-	return ret;
-}
-
-/* Per-CPU initialization */
-static int bL_cpufreq_init(struct cpufreq_policy *policy)
-{
-	u32 cur_cluster = cpu_to_cluster(policy->cpu);
-	struct device *cpu_dev;
-	int ret;
-
-	cpu_dev = get_cpu_device(policy->cpu);
-	if (!cpu_dev) {
-		pr_err("%s: failed to get cpu%d device\n", __func__,
-				policy->cpu);
-		return -ENODEV;
-	}
-
-	if (cur_cluster < MAX_CLUSTERS) {
-		int cpu;
-
-		cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
-
-		for_each_cpu(cpu, policy->cpus)
-			per_cpu(physical_cluster, cpu) = cur_cluster;
-	} else {
-		/* Assumption: during init, we are always running on A15 */
-		per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
-	}
-
-	ret = get_cluster_clk_and_freq_table(cpu_dev, policy->cpus);
-	if (ret)
-		return ret;
-
-	policy->freq_table = freq_table[cur_cluster];
-	policy->cpuinfo.transition_latency =
-				arm_bL_ops->get_transition_latency(cpu_dev);
-
-	dev_pm_opp_of_register_em(policy->cpus);
-
-	if (is_bL_switching_enabled())
-		per_cpu(cpu_last_req_freq, policy->cpu) = clk_get_cpu_rate(policy->cpu);
-
-	dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu);
-	return 0;
-}
-
-static int bL_cpufreq_exit(struct cpufreq_policy *policy)
-{
-	struct device *cpu_dev;
-	int cur_cluster = cpu_to_cluster(policy->cpu);
-
-	if (cur_cluster < MAX_CLUSTERS) {
-		cpufreq_cooling_unregister(cdev[cur_cluster]);
-		cdev[cur_cluster] = NULL;
-	}
-
-	cpu_dev = get_cpu_device(policy->cpu);
-	if (!cpu_dev) {
-		pr_err("%s: failed to get cpu%d device\n", __func__,
-				policy->cpu);
-		return -ENODEV;
-	}
-
-	put_cluster_clk_and_freq_table(cpu_dev, policy->related_cpus);
-	dev_dbg(cpu_dev, "%s: Exited, cpu: %d\n", __func__, policy->cpu);
-
-	return 0;
-}
-
-static void bL_cpufreq_ready(struct cpufreq_policy *policy)
-{
-	int cur_cluster = cpu_to_cluster(policy->cpu);
-
-	/* Do not register a cpu_cooling device if we are in IKS mode */
-	if (cur_cluster >= MAX_CLUSTERS)
-		return;
-
-	cdev[cur_cluster] = of_cpufreq_cooling_register(policy);
-}
-
-static struct cpufreq_driver bL_cpufreq_driver = {
-	.name			= "arm-big-little",
-	.flags			= CPUFREQ_STICKY |
-					CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
-					CPUFREQ_NEED_INITIAL_FREQ_CHECK,
-	.verify			= cpufreq_generic_frequency_table_verify,
-	.target_index		= bL_cpufreq_set_target,
-	.get			= bL_cpufreq_get_rate,
-	.init			= bL_cpufreq_init,
-	.exit			= bL_cpufreq_exit,
-	.ready			= bL_cpufreq_ready,
-	.attr			= cpufreq_generic_attr,
-};
-
-#ifdef CONFIG_BL_SWITCHER
-static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb,
-					unsigned long action, void *_arg)
-{
-	pr_debug("%s: action: %ld\n", __func__, action);
-
-	switch (action) {
-	case BL_NOTIFY_PRE_ENABLE:
-	case BL_NOTIFY_PRE_DISABLE:
-		cpufreq_unregister_driver(&bL_cpufreq_driver);
-		break;
-
-	case BL_NOTIFY_POST_ENABLE:
-		set_switching_enabled(true);
-		cpufreq_register_driver(&bL_cpufreq_driver);
-		break;
-
-	case BL_NOTIFY_POST_DISABLE:
-		set_switching_enabled(false);
-		cpufreq_register_driver(&bL_cpufreq_driver);
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block bL_switcher_notifier = {
-	.notifier_call = bL_cpufreq_switcher_notifier,
-};
-
-static int __bLs_register_notifier(void)
-{
-	return bL_switcher_register_notifier(&bL_switcher_notifier);
-}
-
-static int __bLs_unregister_notifier(void)
-{
-	return bL_switcher_unregister_notifier(&bL_switcher_notifier);
-}
-#else
-static int __bLs_register_notifier(void) { return 0; }
-static int __bLs_unregister_notifier(void) { return 0; }
-#endif
-
-int bL_cpufreq_register(const struct cpufreq_arm_bL_ops *ops)
-{
-	int ret, i;
-
-	if (arm_bL_ops) {
-		pr_debug("%s: Already registered: %s, exiting\n", __func__,
-				arm_bL_ops->name);
-		return -EBUSY;
-	}
-
-	if (!ops || !strlen(ops->name) || !ops->init_opp_table ||
-	    !ops->get_transition_latency) {
-		pr_err("%s: Invalid arm_bL_ops, exiting\n", __func__);
-		return -ENODEV;
-	}
-
-	arm_bL_ops = ops;
-
-	set_switching_enabled(bL_switcher_get_enabled());
-
-	for (i = 0; i < MAX_CLUSTERS; i++)
-		mutex_init(&cluster_lock[i]);
-
-	ret = cpufreq_register_driver(&bL_cpufreq_driver);
-	if (ret) {
-		pr_info("%s: Failed registering platform driver: %s, err: %d\n",
-				__func__, ops->name, ret);
-		arm_bL_ops = NULL;
-	} else {
-		ret = __bLs_register_notifier();
-		if (ret) {
-			cpufreq_unregister_driver(&bL_cpufreq_driver);
-			arm_bL_ops = NULL;
-		} else {
-			pr_info("%s: Registered platform driver: %s\n",
-					__func__, ops->name);
-		}
-	}
-
-	bL_switcher_put_enabled();
-	return ret;
-}
-EXPORT_SYMBOL_GPL(bL_cpufreq_register);
-
-void bL_cpufreq_unregister(const struct cpufreq_arm_bL_ops *ops)
-{
-	if (arm_bL_ops != ops) {
-		pr_err("%s: Registered with: %s, can't unregister, exiting\n",
-				__func__, arm_bL_ops->name);
-		return;
-	}
-
-	bL_switcher_get_enabled();
-	__bLs_unregister_notifier();
-	cpufreq_unregister_driver(&bL_cpufreq_driver);
-	bL_switcher_put_enabled();
-	pr_info("%s: Un-registered platform driver: %s\n", __func__,
-			arm_bL_ops->name);
-	arm_bL_ops = NULL;
-}
-EXPORT_SYMBOL_GPL(bL_cpufreq_unregister);
-
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>");
-MODULE_DESCRIPTION("Generic ARM big LITTLE cpufreq driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
deleted file mode 100644
index 88a176e466c8..000000000000
--- a/drivers/cpufreq/arm_big_little.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * ARM big.LITTLE platform's CPUFreq header file
- *
- * Copyright (C) 2013 ARM Ltd.
- * Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
- *
- * Copyright (C) 2013 Linaro.
- * Viresh Kumar <viresh.kumar@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-#ifndef CPUFREQ_ARM_BIG_LITTLE_H
-#define CPUFREQ_ARM_BIG_LITTLE_H
-
-#include <linux/cpufreq.h>
-#include <linux/device.h>
-#include <linux/types.h>
-
-struct cpufreq_arm_bL_ops {
-	char name[CPUFREQ_NAME_LEN];
-
-	/*
-	 * This must set opp table for cpu_dev in a similar way as done by
-	 * dev_pm_opp_of_add_table().
-	 */
-	int (*init_opp_table)(const struct cpumask *cpumask);
-
-	/* Optional */
-	int (*get_transition_latency)(struct device *cpu_dev);
-	void (*free_opp_table)(const struct cpumask *cpumask);
-};
-
-int bL_cpufreq_register(const struct cpufreq_arm_bL_ops *ops);
-void bL_cpufreq_unregister(const struct cpufreq_arm_bL_ops *ops);
-
-#endif /* CPUFREQ_ARM_BIG_LITTLE_H */
diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c
index 77b0e5d0fb13..4f86ce2db34f 100644
--- a/drivers/cpufreq/brcmstb-avs-cpufreq.c
+++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c
@@ -455,6 +455,8 @@ static unsigned int brcm_avs_cpufreq_get(unsigned int cpu)
 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
 	struct private_data *priv = policy->driver_data;
 
+	cpufreq_cpu_put(policy);
+
 	return brcm_avs_get_frequency(priv->base);
 }
 
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 8d8da763adc5..a06777c35fc0 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -39,7 +39,7 @@
 static struct cppc_cpudata **all_cpu_data;
 
 struct cppc_workaround_oem_info {
-	char oem_id[ACPI_OEM_ID_SIZE +1];
+	char oem_id[ACPI_OEM_ID_SIZE + 1];
 	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
 	u32 oem_revision;
 };
@@ -93,9 +93,13 @@ static void cppc_check_hisi_workaround(void)
 	for (i = 0; i < ARRAY_SIZE(wa_info); i++) {
 		if (!memcmp(wa_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) &&
 		    !memcmp(wa_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
-		    wa_info[i].oem_revision == tbl->oem_revision)
+		    wa_info[i].oem_revision == tbl->oem_revision) {
 			apply_hisi_workaround = true;
+			break;
+		}
 	}
+
+	acpi_put_table(tbl);
 }
 
 /* Callback function used to retrieve the max frequency from DMI */
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index bca8d1f47fd2..f2ae9cd455c1 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -86,7 +86,6 @@ static const struct of_device_id whitelist[] __initconst = {
 	{ .compatible = "st-ericsson,u9540", },
 
 	{ .compatible = "ti,omap2", },
-	{ .compatible = "ti,omap3", },
 	{ .compatible = "ti,omap4", },
 	{ .compatible = "ti,omap5", },
 
@@ -110,6 +109,7 @@ static const struct of_device_id blacklist[] __initconst = {
 	{ .compatible = "fsl,imx8mq", },
 	{ .compatible = "fsl,imx8mm", },
 	{ .compatible = "fsl,imx8mn", },
+	{ .compatible = "fsl,imx8mp", },
 
 	{ .compatible = "marvell,armadaxp", },
 
@@ -122,6 +122,8 @@ static const struct of_device_id blacklist[] __initconst = {
 	{ .compatible = "mediatek,mt8176", },
 	{ .compatible = "mediatek,mt8183", },
 
+	{ .compatible = "nvidia,tegra20", },
+	{ .compatible = "nvidia,tegra30", },
 	{ .compatible = "nvidia,tegra124", },
 	{ .compatible = "nvidia,tegra210", },
 
@@ -137,6 +139,7 @@ static const struct of_device_id blacklist[] __initconst = {
 	{ .compatible = "ti,am33xx", },
 	{ .compatible = "ti,am43", },
 	{ .compatible = "ti,dra7", },
+	{ .compatible = "ti,omap3", },
 
 	{ }
 };
@@ -180,4 +183,4 @@ create_pdev:
 			       -1, data,
 			       sizeof(struct cpufreq_dt_platform_data)));
 }
-device_initcall(cpufreq_dt_platdev_init);
+core_initcall(cpufreq_dt_platdev_init);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 48a224a6b178..77114a3897fb 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -113,18 +113,21 @@ EXPORT_SYMBOL_GPL(get_governor_parent_kobj);
 
 static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 {
-	u64 idle_time;
+	struct kernel_cpustat kcpustat;
 	u64 cur_wall_time;
+	u64 idle_time;
 	u64 busy_time;
 
 	cur_wall_time = jiffies64_to_nsecs(get_jiffies_64());
 
-	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+	kcpustat_cpu_fetch(&kcpustat, cpu);
+
+	busy_time = kcpustat.cpustat[CPUTIME_USER];
+	busy_time += kcpustat.cpustat[CPUTIME_SYSTEM];
+	busy_time += kcpustat.cpustat[CPUTIME_IRQ];
+	busy_time += kcpustat.cpustat[CPUTIME_SOFTIRQ];
+	busy_time += kcpustat.cpustat[CPUTIME_STEAL];
+	busy_time += kcpustat.cpustat[CPUTIME_NICE];
 
 	idle_time = cur_wall_time - busy_time;
 	if (wall)
@@ -933,6 +936,9 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 	struct freq_attr *fattr = to_attr(attr);
 	ssize_t ret;
 
+	if (!fattr->show)
+		return -EIO;
+
 	down_read(&policy->rwsem);
 	ret = fattr->show(policy, buf);
 	up_read(&policy->rwsem);
@@ -947,6 +953,9 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	struct freq_attr *fattr = to_attr(attr);
 	ssize_t ret = -EINVAL;
 
+	if (!fattr->store)
+		return -EIO;
+
 	/*
 	 * cpus_read_trylock() is used here to work around a circular lock
 	 * dependency problem with respect to the cpufreq_register_driver().
@@ -2385,7 +2394,10 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
 	new_policy->min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN);
 	new_policy->max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX);
 
-	/* verify the cpu speed can be set within this limit */
+	/*
+	 * Verify that the CPU speed can be set within these limits and make sure
+	 * that min <= max.
+	 */
 	ret = cpufreq_driver->verify(new_policy);
 	if (ret)
 		return ret;
@@ -2628,6 +2640,13 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	if (cpufreq_disabled())
 		return -ENODEV;
 
+	/*
+	 * The cpufreq core depends heavily on the availability of device
+	 * structure, make sure they are available before proceeding further.
+	 */
+	if (!get_cpu_device(0))
+		return -EPROBE_DEFER;
+
 	if (!driver_data || !driver_data->verify || !driver_data->init ||
 	    !(driver_data->setpolicy || driver_data->target_index ||
 		    driver_data->target) ||
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index b66e81c06a57..737ff3b9c2c0 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -346,7 +346,7 @@ struct cpufreq_governor *cpufreq_default_governor(void)
 	return CPU_FREQ_GOV_CONSERVATIVE;
 }
 
-fs_initcall(cpufreq_gov_dbs_init);
+core_initcall(cpufreq_gov_dbs_init);
 #else
 module_init(cpufreq_gov_dbs_init);
 #endif
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 4bb054d0cb43..f99ae45efaea 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -105,7 +105,7 @@ void gov_update_cpu_data(struct dbs_data *dbs_data)
 			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time,
 								  dbs_data->io_is_busy);
 			if (dbs_data->ignore_nice_load)
-				j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+				j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
 		}
 	}
 }
@@ -149,7 +149,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
 		j_cdbs->prev_cpu_idle = cur_idle_time;
 
 		if (ignore_nice) {
-			u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+			u64 cur_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
 
 			idle_time += div_u64(cur_nice - j_cdbs->prev_cpu_nice, NSEC_PER_USEC);
 			j_cdbs->prev_cpu_nice = cur_nice;
@@ -530,7 +530,7 @@ int cpufreq_dbs_governor_start(struct cpufreq_policy *policy)
 		j_cdbs->prev_load = 0;
 
 		if (ignore_nice)
-			j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+			j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
 	}
 
 	gov->start(policy);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index dced033875bf..82a4d37ddecb 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -483,7 +483,7 @@ struct cpufreq_governor *cpufreq_default_governor(void)
 	return CPU_FREQ_GOV_ONDEMAND;
 }
 
-fs_initcall(cpufreq_gov_dbs_init);
+core_initcall(cpufreq_gov_dbs_init);
 #else
 module_init(cpufreq_gov_dbs_init);
 #endif
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c
index aaa04dfcacd9..def9afe0f5b8 100644
--- a/drivers/cpufreq/cpufreq_performance.c
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -50,5 +50,5 @@ MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("CPUfreq policy governor 'performance'");
 MODULE_LICENSE("GPL");
 
-fs_initcall(cpufreq_gov_performance_init);
+core_initcall(cpufreq_gov_performance_init);
 module_exit(cpufreq_gov_performance_exit);
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c
index c143dc237d87..1ae66019eb83 100644
--- a/drivers/cpufreq/cpufreq_powersave.c
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -43,7 +43,7 @@ struct cpufreq_governor *cpufreq_default_governor(void)
 	return &cpufreq_gov_powersave;
 }
 
-fs_initcall(cpufreq_gov_powersave_init);
+core_initcall(cpufreq_gov_powersave_init);
 #else
 module_init(cpufreq_gov_powersave_init);
 #endif
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index cbd81c58cb8f..b43e7cd502c5 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -147,7 +147,7 @@ struct cpufreq_governor *cpufreq_default_governor(void)
 	return &cpufreq_gov_userspace;
 }
 
-fs_initcall(cpufreq_gov_userspace_init);
+core_initcall(cpufreq_gov_userspace_init);
 #else
 module_init(cpufreq_gov_userspace_init);
 #endif
diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c
index 35db14cf3102..6cb8193421ea 100644
--- a/drivers/cpufreq/imx-cpufreq-dt.c
+++ b/drivers/cpufreq/imx-cpufreq-dt.c
@@ -35,7 +35,8 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	if (of_machine_is_compatible("fsl,imx8mn"))
+	if (of_machine_is_compatible("fsl,imx8mn") ||
+	    of_machine_is_compatible("fsl,imx8mp"))
 		speed_grade = (cell_value & IMX8MN_OCOTP_CFG3_SPEED_GRADE_MASK)
 			      >> OCOTP_CFG3_SPEED_GRADE_SHIFT;
 	else
@@ -44,19 +45,20 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev)
 	mkt_segment = (cell_value & OCOTP_CFG3_MKT_SEGMENT_MASK) >> OCOTP_CFG3_MKT_SEGMENT_SHIFT;
 
 	/*
-	 * Early samples without fuses written report "0 0" which means
-	 * consumer segment and minimum speed grading.
-	 *
-	 * According to datasheet minimum speed grading is not supported for
-	 * consumer parts so clamp to 1 to avoid warning for "no OPPs"
+	 * Early samples without fuses written report "0 0" which may NOT
+	 * match any OPP defined in DT. So clamp to minimum OPP defined in
+	 * DT to avoid warning for "no OPPs".
 	 *
 	 * Applies to i.MX8M series SoCs.
 	 */
-	if (mkt_segment == 0 && speed_grade == 0 && (
-			of_machine_is_compatible("fsl,imx8mm") ||
-			of_machine_is_compatible("fsl,imx8mn") ||
-			of_machine_is_compatible("fsl,imx8mq")))
-		speed_grade = 1;
+	if (mkt_segment == 0 && speed_grade == 0) {
+		if (of_machine_is_compatible("fsl,imx8mm") ||
+		    of_machine_is_compatible("fsl,imx8mq"))
+			speed_grade = 1;
+		if (of_machine_is_compatible("fsl,imx8mn") ||
+		    of_machine_is_compatible("fsl,imx8mp"))
+			speed_grade = 0xb;
+	}
 
 	supported_hw[0] = BIT(speed_grade);
 	supported_hw[1] = BIT(mkt_segment);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 8ab31702cf6a..ad6a17cf0011 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -172,7 +172,7 @@ struct vid_data {
 /**
  * struct global_params - Global parameters, mostly tunable via sysfs.
  * @no_turbo:		Whether or not to use turbo P-states.
- * @turbo_disabled:	Whethet or not turbo P-states are available at all,
+ * @turbo_disabled:	Whether or not turbo P-states are available at all,
  *			based on the MSR_IA32_MISC_ENABLE value and whether or
  *			not the maximum reported turbo P-state is different from
  *			the maximum reported non-turbo one.
@@ -2662,21 +2662,21 @@ enum {
 
 /* Hardware vendor-specific info that has its own power management modes */
 static struct acpi_platform_list plat_info[] __initdata = {
-	{"HP    ", "ProLiant", 0, ACPI_SIG_FADT, all_versions, 0, PSS},
-	{"ORACLE", "X4-2    ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X4-2L   ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X4-2B   ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X3-2    ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X3-2L   ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X3-2B   ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X4470M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X4270M3 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X4270M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X4170M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X4170 M3", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X4275 M3", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "X6-2    ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
-	{"ORACLE", "Sudbury ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
+	{"HP    ", "ProLiant", 0, ACPI_SIG_FADT, all_versions, NULL, PSS},
+	{"ORACLE", "X4-2    ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X4-2L   ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X4-2B   ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X3-2    ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X3-2L   ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X3-2B   ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X4470M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X4270M3 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X4270M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X4170M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X4170 M3", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X4275 M3", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "X6-2    ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+	{"ORACLE", "Sudbury ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
 	{ } /* End */
 };
 
diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
index cb74bdc5baaa..70ad8fe1d78b 100644
--- a/drivers/cpufreq/kirkwood-cpufreq.c
+++ b/drivers/cpufreq/kirkwood-cpufreq.c
@@ -102,13 +102,11 @@ static struct cpufreq_driver kirkwood_cpufreq_driver = {
 static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 {
 	struct device_node *np;
-	struct resource *res;
 	int err;
 
 	priv.dev = &pdev->dev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv.base = devm_ioremap_resource(&pdev->dev, res);
+	priv.base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv.base))
 		return PTR_ERR(priv.base);
 
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index 890813e0bb76..909f40fbcde2 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -23,7 +23,7 @@
 #include <asm/clock.h>
 #include <asm/idle.h>
 
-#include <asm/mach-loongson64/loongson.h>
+#include <asm/mach-loongson2ef/loongson.h>
 
 static uint nowait;
 
@@ -144,9 +144,11 @@ static void loongson2_cpu_wait(void)
 	u32 cpu_freq;
 
 	spin_lock_irqsave(&loongson2_wait_lock, flags);
-	cpu_freq = LOONGSON_CHIPCFG(0);
-	LOONGSON_CHIPCFG(0) &= ~0x7;	/* Put CPU into wait mode */
-	LOONGSON_CHIPCFG(0) = cpu_freq;	/* Restore CPU state */
+	cpu_freq = readl(LOONGSON_CHIPCFG);
+	/* Put CPU into wait mode */
+	writel(readl(LOONGSON_CHIPCFG) & ~0x7, LOONGSON_CHIPCFG);
+	/* Restore CPU state */
+	writel(cpu_freq, LOONGSON_CHIPCFG);
 	spin_unlock_irqrestore(&loongson2_wait_lock, flags);
 	local_irq_enable();
 }
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index fdc767fdbe6a..89d4fa8b65e9 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -445,7 +445,7 @@ static int __init pcc_cpufreq_probe(void)
 		goto out_free;
 	}
 
-	pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
+	pcch_virt_addr = ioremap(mem_resource->minimum,
 					mem_resource->address_length);
 	if (pcch_virt_addr == NULL) {
 		pr_debug("probe: could not map shared mem region\n");
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 6061850e59c9..56f4bc0d209e 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -1041,9 +1041,14 @@ static struct cpufreq_driver powernv_cpufreq_driver = {
 
 static int init_chip_info(void)
 {
-	unsigned int chip[256];
+	unsigned int *chip;
 	unsigned int cpu, i;
 	unsigned int prev_chip_id = UINT_MAX;
+	int ret = 0;
+
+	chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
 
 	for_each_possible_cpu(cpu) {
 		unsigned int id = cpu_to_chip_id(cpu);
@@ -1055,8 +1060,10 @@ static int init_chip_info(void)
 	}
 
 	chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
-	if (!chips)
-		return -ENOMEM;
+	if (!chips) {
+		ret = -ENOMEM;
+		goto free_and_return;
+	}
 
 	for (i = 0; i < nr_chips; i++) {
 		chips[i].id = chip[i];
@@ -1066,7 +1073,9 @@ static int init_chip_info(void)
 			per_cpu(chip_info, cpu) =  &chips[i];
 	}
 
-	return 0;
+free_and_return:
+	kfree(chip);
+	return ret;
 }
 
 static inline void clean_chip_info(void)
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index a9ae2f84a4ef..fc92a8842e25 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -334,7 +334,7 @@ static int __init qcom_cpufreq_hw_init(void)
 {
 	return platform_driver_register(&qcom_cpufreq_hw_driver);
 }
-device_initcall(qcom_cpufreq_hw_init);
+postcore_initcall(qcom_cpufreq_hw_init);
 
 static void __exit qcom_cpufreq_hw_exit(void)
 {
diff --git a/drivers/cpufreq/s3c2416-cpufreq.c b/drivers/cpufreq/s3c2416-cpufreq.c
index 106910351c41..5c221bc90210 100644
--- a/drivers/cpufreq/s3c2416-cpufreq.c
+++ b/drivers/cpufreq/s3c2416-cpufreq.c
@@ -304,6 +304,7 @@ static int s3c2416_cpufreq_reboot_notifier_evt(struct notifier_block *this,
 {
 	struct s3c2416_data *s3c_freq = &s3c2416_cpufreq;
 	int ret;
+	struct cpufreq_policy *policy;
 
 	mutex_lock(&cpufreq_lock);
 
@@ -318,7 +319,16 @@ static int s3c2416_cpufreq_reboot_notifier_evt(struct notifier_block *this,
 	 */
 	if (s3c_freq->is_dvs) {
 		pr_debug("cpufreq: leave dvs on reboot\n");
-		ret = cpufreq_driver_target(cpufreq_cpu_get(0), FREQ_SLEEP, 0);
+
+		policy = cpufreq_cpu_get(0);
+		if (!policy) {
+			pr_debug("cpufreq: get no policy for cpu0\n");
+			return NOTIFY_BAD;
+		}
+
+		ret = cpufreq_driver_target(policy, FREQ_SLEEP, 0);
+		cpufreq_cpu_put(policy);
+
 		if (ret < 0)
 			return NOTIFY_BAD;
 	}
diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c
index af0c00dabb22..c6bdfc308e99 100644
--- a/drivers/cpufreq/s3c64xx-cpufreq.c
+++ b/drivers/cpufreq/s3c64xx-cpufreq.c
@@ -19,7 +19,6 @@
 static struct regulator *vddarm;
 static unsigned long regulator_latency;
 
-#ifdef CONFIG_CPU_S3C6410
 struct s3c64xx_dvfs {
 	unsigned int vddarm_min;
 	unsigned int vddarm_max;
@@ -48,7 +47,6 @@ static struct cpufreq_frequency_table s3c64xx_freq_table[] = {
 	{ 0, 4, 800000 },
 	{ 0, 0, CPUFREQ_TABLE_END },
 };
-#endif
 
 static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy,
 				      unsigned int index)
@@ -149,11 +147,6 @@ static int s3c64xx_cpufreq_driver_init(struct cpufreq_policy *policy)
 	if (policy->cpu != 0)
 		return -EINVAL;
 
-	if (s3c64xx_freq_table == NULL) {
-		pr_err("No frequency information for this CPU\n");
-		return -ENODEV;
-	}
-
 	policy->clk = clk_get(NULL, "armclk");
 	if (IS_ERR(policy->clk)) {
 		pr_err("Unable to obtain ARMCLK: %ld\n",
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index 5d10030f2560..e84281e2561d 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -555,8 +555,17 @@ static int s5pv210_cpufreq_reboot_notifier_event(struct notifier_block *this,
 						 unsigned long event, void *ptr)
 {
 	int ret;
+	struct cpufreq_policy *policy;
+
+	policy = cpufreq_cpu_get(0);
+	if (!policy) {
+		pr_debug("cpufreq: get no policy for cpu0\n");
+		return NOTIFY_BAD;
+	}
+
+	ret = cpufreq_driver_target(policy, SLEEP_FREQ, 0);
+	cpufreq_cpu_put(policy);
 
-	ret = cpufreq_driver_target(cpufreq_cpu_get(0), SLEEP_FREQ, 0);
 	if (ret < 0)
 		return NOTIFY_BAD;
 
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index 2b51e0718c9f..20d1f85d5f5a 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -1,8 +1,6 @@
 /*
  * System Control and Power Interface (SCPI) based CPUFreq Interface driver
  *
- * It provides necessary ops to arm_big_little cpufreq driver.
- *
  * Copyright (C) 2015 ARM Ltd.
  * Sudeep Holla <sudeep.holla@arm.com>
  *
diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
index eca32e443716..9907a165135b 100644
--- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c
+++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
@@ -25,7 +25,7 @@
 static struct platform_device *cpufreq_dt_pdev, *sun50i_cpufreq_pdev;
 
 /**
- * sun50i_cpufreq_get_efuse() - Parse and return efuse value present on SoC
+ * sun50i_cpufreq_get_efuse() - Determine speed grade from efuse value
  * @versions: Set to the value parsed from efuse
  *
  * Returns 0 if success.
@@ -69,21 +69,16 @@ static int sun50i_cpufreq_get_efuse(u32 *versions)
 		return PTR_ERR(speedbin);
 
 	efuse_value = (*speedbin >> NVMEM_SHIFT) & NVMEM_MASK;
-	switch (efuse_value) {
-	case 0b0001:
-		*versions = 1;
-		break;
-	case 0b0011:
-		*versions = 2;
-		break;
-	default:
-		/*
-		 * For other situations, we treat it as bin0.
-		 * This vf table can be run for any good cpu.
-		 */
+
+	/*
+	 * We treat unexpected efuse values as if the SoC was from
+	 * the slowest bin. Expected efuse values are 1-3, slowest
+	 * to fastest.
+	 */
+	if (efuse_value >= 1 && efuse_value <= 3)
+		*versions = efuse_value - 1;
+	else
 		*versions = 0;
-		break;
-	}
 
 	kfree(speedbin);
 	return 0;
diff --git a/drivers/cpufreq/tegra124-cpufreq.c b/drivers/cpufreq/tegra124-cpufreq.c
index 4f0c637b3b49..7a1ea6fdcab6 100644
--- a/drivers/cpufreq/tegra124-cpufreq.c
+++ b/drivers/cpufreq/tegra124-cpufreq.c
@@ -6,6 +6,7 @@
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
 #include <linux/clk.h>
+#include <linux/cpufreq.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -128,8 +129,66 @@ out_put_np:
 	return ret;
 }
 
+static int __maybe_unused tegra124_cpufreq_suspend(struct device *dev)
+{
+	struct tegra124_cpufreq_priv *priv = dev_get_drvdata(dev);
+	int err;
+
+	/*
+	 * PLLP rate 408Mhz is below the CPU Fmax at Vmin and is safe to
+	 * use during suspend and resume. So, switch the CPU clock source
+	 * to PLLP and disable DFLL.
+	 */
+	err = clk_set_parent(priv->cpu_clk, priv->pllp_clk);
+	if (err < 0) {
+		dev_err(dev, "failed to reparent to PLLP: %d\n", err);
+		return err;
+	}
+
+	clk_disable_unprepare(priv->dfll_clk);
+
+	return 0;
+}
+
+static int __maybe_unused tegra124_cpufreq_resume(struct device *dev)
+{
+	struct tegra124_cpufreq_priv *priv = dev_get_drvdata(dev);
+	int err;
+
+	/*
+	 * Warmboot code powers up the CPU with PLLP clock source.
+	 * Enable DFLL clock and switch CPU clock source back to DFLL.
+	 */
+	err = clk_prepare_enable(priv->dfll_clk);
+	if (err < 0) {
+		dev_err(dev, "failed to enable DFLL clock for CPU: %d\n", err);
+		goto disable_cpufreq;
+	}
+
+	err = clk_set_parent(priv->cpu_clk, priv->dfll_clk);
+	if (err < 0) {
+		dev_err(dev, "failed to reparent to DFLL clock: %d\n", err);
+		goto disable_dfll;
+	}
+
+	return 0;
+
+disable_dfll:
+	clk_disable_unprepare(priv->dfll_clk);
+disable_cpufreq:
+	disable_cpufreq();
+
+	return err;
+}
+
+static const struct dev_pm_ops tegra124_cpufreq_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(tegra124_cpufreq_suspend,
+				tegra124_cpufreq_resume)
+};
+
 static struct platform_driver tegra124_cpufreq_platdrv = {
 	.driver.name	= "cpufreq-tegra124",
+	.driver.pm	= &tegra124_cpufreq_pm_ops,
 	.probe		= tegra124_cpufreq_probe,
 };
 
diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c
index bcecb068b51b..2e233ad72758 100644
--- a/drivers/cpufreq/tegra186-cpufreq.c
+++ b/drivers/cpufreq/tegra186-cpufreq.c
@@ -187,7 +187,6 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev)
 {
 	struct tegra186_cpufreq_data *data;
 	struct tegra_bpmp *bpmp;
-	struct resource *res;
 	unsigned int i = 0, err;
 
 	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
@@ -205,8 +204,7 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev)
 	if (IS_ERR(bpmp))
 		return PTR_ERR(bpmp);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	data->regs = devm_ioremap_resource(&pdev->dev, res);
+	data->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(data->regs)) {
 		err = PTR_ERR(data->regs);
 		goto put_bpmp;
diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
index aeaa883a8c9d..557cb513bf7f 100644
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -31,11 +31,17 @@
 #define DRA7_EFUSE_OD_MPU_OPP			BIT(1)
 #define DRA7_EFUSE_HIGH_MPU_OPP			BIT(2)
 
+#define OMAP3_CONTROL_DEVICE_STATUS		0x4800244C
+#define OMAP3_CONTROL_IDCODE			0x4830A204
+#define OMAP34xx_ProdID_SKUID			0x4830A20C
+#define OMAP3_SYSCON_BASE	(0x48000000 + 0x2000 + 0x270)
+
 #define VERSION_COUNT				2
 
 struct ti_cpufreq_data;
 
 struct ti_cpufreq_soc_data {
+	const char * const *reg_names;
 	unsigned long (*efuse_xlate)(struct ti_cpufreq_data *opp_data,
 				     unsigned long efuse);
 	unsigned long efuse_fallback;
@@ -85,6 +91,13 @@ static unsigned long dra7_efuse_xlate(struct ti_cpufreq_data *opp_data,
 	return calculated_efuse;
 }
 
+static unsigned long omap3_efuse_xlate(struct ti_cpufreq_data *opp_data,
+				      unsigned long efuse)
+{
+	/* OPP enable bit ("Speed Binned") */
+	return BIT(efuse);
+}
+
 static struct ti_cpufreq_soc_data am3x_soc_data = {
 	.efuse_xlate = amx3_efuse_xlate,
 	.efuse_fallback = AM33XX_800M_ARM_MPU_MAX_FREQ,
@@ -112,6 +125,74 @@ static struct ti_cpufreq_soc_data dra7_soc_data = {
 	.multi_regulator = true,
 };
 
+/*
+ * OMAP35x TRM (SPRUF98K):
+ *  CONTROL_IDCODE (0x4830 A204) describes Silicon revisions.
+ *  Control OMAP Status Register 15:0 (Address 0x4800 244C)
+ *    to separate between omap3503, omap3515, omap3525, omap3530
+ *    and feature presence.
+ *    There are encodings for versions limited to 400/266MHz
+ *    but we ignore.
+ *    Not clear if this also holds for omap34xx.
+ *  some eFuse values e.g. CONTROL_FUSE_OPP1_VDD1
+ *    are stored in the SYSCON register range
+ *  Register 0x4830A20C [ProdID.SKUID] [0:3]
+ *    0x0 for normal 600/430MHz device.
+ *    0x8 for 720/520MHz device.
+ *    Not clear what omap34xx value is.
+ */
+
+static struct ti_cpufreq_soc_data omap34xx_soc_data = {
+	.efuse_xlate = omap3_efuse_xlate,
+	.efuse_offset = OMAP34xx_ProdID_SKUID - OMAP3_SYSCON_BASE,
+	.efuse_shift = 3,
+	.efuse_mask = BIT(3),
+	.rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE,
+	.multi_regulator = false,
+};
+
+/*
+ * AM/DM37x TRM (SPRUGN4M)
+ *  CONTROL_IDCODE (0x4830 A204) describes Silicon revisions.
+ *  Control Device Status Register 15:0 (Address 0x4800 244C)
+ *    to separate between am3703, am3715, dm3725, dm3730
+ *    and feature presence.
+ *   Speed Binned = Bit 9
+ *     0 800/600 MHz
+ *     1 1000/800 MHz
+ *  some eFuse values e.g. CONTROL_FUSE_OPP 1G_VDD1
+ *    are stored in the SYSCON register range.
+ *  There is no 0x4830A20C [ProdID.SKUID] register (exists but
+ *    seems to always read as 0).
+ */
+
+static const char * const omap3_reg_names[] = {"cpu0", "vbb"};
+
+static struct ti_cpufreq_soc_data omap36xx_soc_data = {
+	.reg_names = omap3_reg_names,
+	.efuse_xlate = omap3_efuse_xlate,
+	.efuse_offset = OMAP3_CONTROL_DEVICE_STATUS - OMAP3_SYSCON_BASE,
+	.efuse_shift = 9,
+	.efuse_mask = BIT(9),
+	.rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE,
+	.multi_regulator = true,
+};
+
+/*
+ * AM3517 is quite similar to AM/DM37x except that it has no
+ * high speed grade eFuse and no abb ldo
+ */
+
+static struct ti_cpufreq_soc_data am3517_soc_data = {
+	.efuse_xlate = omap3_efuse_xlate,
+	.efuse_offset = OMAP3_CONTROL_DEVICE_STATUS - OMAP3_SYSCON_BASE,
+	.efuse_shift = 0,
+	.efuse_mask = 0,
+	.rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE,
+	.multi_regulator = false,
+};
+
+
 /**
  * ti_cpufreq_get_efuse() - Parse and return efuse value present on SoC
  * @opp_data: pointer to ti_cpufreq_data context
@@ -128,7 +209,17 @@ static int ti_cpufreq_get_efuse(struct ti_cpufreq_data *opp_data,
 
 	ret = regmap_read(opp_data->syscon, opp_data->soc_data->efuse_offset,
 			  &efuse);
-	if (ret) {
+	if (ret == -EIO) {
+		/* not a syscon register! */
+		void __iomem *regs = ioremap(OMAP3_SYSCON_BASE +
+				opp_data->soc_data->efuse_offset, 4);
+
+		if (!regs)
+			return -ENOMEM;
+		efuse = readl(regs);
+		iounmap(regs);
+		}
+	else if (ret) {
 		dev_err(dev,
 			"Failed to read the efuse value from syscon: %d\n",
 			ret);
@@ -159,7 +250,17 @@ static int ti_cpufreq_get_rev(struct ti_cpufreq_data *opp_data,
 
 	ret = regmap_read(opp_data->syscon, opp_data->soc_data->rev_offset,
 			  &revision);
-	if (ret) {
+	if (ret == -EIO) {
+		/* not a syscon register! */
+		void __iomem *regs = ioremap(OMAP3_SYSCON_BASE +
+				opp_data->soc_data->rev_offset, 4);
+
+		if (!regs)
+			return -ENOMEM;
+		revision = readl(regs);
+		iounmap(regs);
+		}
+	else if (ret) {
 		dev_err(dev,
 			"Failed to read the revision number from syscon: %d\n",
 			ret);
@@ -189,8 +290,14 @@ static int ti_cpufreq_setup_syscon_register(struct ti_cpufreq_data *opp_data)
 
 static const struct of_device_id ti_cpufreq_of_match[] = {
 	{ .compatible = "ti,am33xx", .data = &am3x_soc_data, },
+	{ .compatible = "ti,am3517", .data = &am3517_soc_data, },
 	{ .compatible = "ti,am43", .data = &am4x_soc_data, },
 	{ .compatible = "ti,dra7", .data = &dra7_soc_data },
+	{ .compatible = "ti,omap34xx", .data = &omap34xx_soc_data, },
+	{ .compatible = "ti,omap36xx", .data = &omap36xx_soc_data, },
+	/* legacy */
+	{ .compatible = "ti,omap3430", .data = &omap34xx_soc_data, },
+	{ .compatible = "ti,omap3630", .data = &omap36xx_soc_data, },
 	{},
 };
 
@@ -212,7 +319,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev)
 	const struct of_device_id *match;
 	struct opp_table *ti_opp_table;
 	struct ti_cpufreq_data *opp_data;
-	const char * const reg_names[] = {"vdd", "vbb"};
+	const char * const default_reg_names[] = {"vdd", "vbb"};
 	int ret;
 
 	match = dev_get_platdata(&pdev->dev);
@@ -268,9 +375,13 @@ static int ti_cpufreq_probe(struct platform_device *pdev)
 	opp_data->opp_table = ti_opp_table;
 
 	if (opp_data->soc_data->multi_regulator) {
+		const char * const *reg_names = default_reg_names;
+
+		if (opp_data->soc_data->reg_names)
+			reg_names = opp_data->soc_data->reg_names;
 		ti_opp_table = dev_pm_opp_set_regulators(opp_data->cpu_dev,
 							 reg_names,
-							 ARRAY_SIZE(reg_names));
+							 ARRAY_SIZE(default_reg_names));
 		if (IS_ERR(ti_opp_table)) {
 			dev_pm_opp_put_supported_hw(opp_data->opp_table);
 			ret =  PTR_ERR(ti_opp_table);
diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c
index 53237289e606..83c85d3d67e3 100644
--- a/drivers/cpufreq/vexpress-spc-cpufreq.c
+++ b/drivers/cpufreq/vexpress-spc-cpufreq.c
@@ -1,61 +1,592 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Versatile Express SPC CPUFreq Interface driver
  *
- * It provides necessary ops to arm_big_little cpufreq driver.
+ * Copyright (C) 2013 - 2019 ARM Ltd.
+ * Sudeep Holla <sudeep.holla@arm.com>
  *
- * Copyright (C) 2013 ARM Ltd.
- * Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * Copyright (C) 2013 Linaro.
+ * Viresh Kumar <viresh.kumar@linaro.org>
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/clk.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/cpu_cooling.h>
+#include <linux/device.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
+#include <linux/slab.h>
+#include <linux/topology.h>
 #include <linux/types.h>
 
-#include "arm_big_little.h"
+/* Currently we support only two clusters */
+#define A15_CLUSTER	0
+#define A7_CLUSTER	1
+#define MAX_CLUSTERS	2
+
+#ifdef CONFIG_BL_SWITCHER
+#include <asm/bL_switcher.h>
+static bool bL_switching_enabled;
+#define is_bL_switching_enabled()	bL_switching_enabled
+#define set_switching_enabled(x)	(bL_switching_enabled = (x))
+#else
+#define is_bL_switching_enabled()	false
+#define set_switching_enabled(x)	do { } while (0)
+#define bL_switch_request(...)		do { } while (0)
+#define bL_switcher_put_enabled()	do { } while (0)
+#define bL_switcher_get_enabled()	do { } while (0)
+#endif
+
+#define ACTUAL_FREQ(cluster, freq)  ((cluster == A7_CLUSTER) ? freq << 1 : freq)
+#define VIRT_FREQ(cluster, freq)    ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
+
+static struct thermal_cooling_device *cdev[MAX_CLUSTERS];
+static struct clk *clk[MAX_CLUSTERS];
+static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
+static atomic_t cluster_usage[MAX_CLUSTERS + 1];
+
+static unsigned int clk_big_min;	/* (Big) clock frequencies */
+static unsigned int clk_little_max;	/* Maximum clock frequency (Little) */
+
+static DEFINE_PER_CPU(unsigned int, physical_cluster);
+static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq);
+
+static struct mutex cluster_lock[MAX_CLUSTERS];
+
+static inline int raw_cpu_to_cluster(int cpu)
+{
+	return topology_physical_package_id(cpu);
+}
+
+static inline int cpu_to_cluster(int cpu)
+{
+	return is_bL_switching_enabled() ?
+		MAX_CLUSTERS : raw_cpu_to_cluster(cpu);
+}
+
+static unsigned int find_cluster_maxfreq(int cluster)
+{
+	int j;
+	u32 max_freq = 0, cpu_freq;
+
+	for_each_online_cpu(j) {
+		cpu_freq = per_cpu(cpu_last_req_freq, j);
+
+		if (cluster == per_cpu(physical_cluster, j) &&
+		    max_freq < cpu_freq)
+			max_freq = cpu_freq;
+	}
+
+	return max_freq;
+}
+
+static unsigned int clk_get_cpu_rate(unsigned int cpu)
+{
+	u32 cur_cluster = per_cpu(physical_cluster, cpu);
+	u32 rate = clk_get_rate(clk[cur_cluster]) / 1000;
+
+	/* For switcher we use virtual A7 clock rates */
+	if (is_bL_switching_enabled())
+		rate = VIRT_FREQ(cur_cluster, rate);
+
+	return rate;
+}
+
+static unsigned int ve_spc_cpufreq_get_rate(unsigned int cpu)
+{
+	if (is_bL_switching_enabled())
+		return per_cpu(cpu_last_req_freq, cpu);
+	else
+		return clk_get_cpu_rate(cpu);
+}
+
+static unsigned int
+ve_spc_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
+{
+	u32 new_rate, prev_rate;
+	int ret;
+	bool bLs = is_bL_switching_enabled();
+
+	mutex_lock(&cluster_lock[new_cluster]);
+
+	if (bLs) {
+		prev_rate = per_cpu(cpu_last_req_freq, cpu);
+		per_cpu(cpu_last_req_freq, cpu) = rate;
+		per_cpu(physical_cluster, cpu) = new_cluster;
+
+		new_rate = find_cluster_maxfreq(new_cluster);
+		new_rate = ACTUAL_FREQ(new_cluster, new_rate);
+	} else {
+		new_rate = rate;
+	}
+
+	ret = clk_set_rate(clk[new_cluster], new_rate * 1000);
+	if (!ret) {
+		/*
+		 * FIXME: clk_set_rate hasn't returned an error here however it
+		 * may be that clk_change_rate failed due to hardware or
+		 * firmware issues and wasn't able to report that due to the
+		 * current design of the clk core layer. To work around this
+		 * problem we will read back the clock rate and check it is
+		 * correct. This needs to be removed once clk core is fixed.
+		 */
+		if (clk_get_rate(clk[new_cluster]) != new_rate * 1000)
+			ret = -EIO;
+	}
+
+	if (WARN_ON(ret)) {
+		if (bLs) {
+			per_cpu(cpu_last_req_freq, cpu) = prev_rate;
+			per_cpu(physical_cluster, cpu) = old_cluster;
+		}
+
+		mutex_unlock(&cluster_lock[new_cluster]);
+
+		return ret;
+	}
+
+	mutex_unlock(&cluster_lock[new_cluster]);
+
+	/* Recalc freq for old cluster when switching clusters */
+	if (old_cluster != new_cluster) {
+		/* Switch cluster */
+		bL_switch_request(cpu, new_cluster);
+
+		mutex_lock(&cluster_lock[old_cluster]);
+
+		/* Set freq of old cluster if there are cpus left on it */
+		new_rate = find_cluster_maxfreq(old_cluster);
+		new_rate = ACTUAL_FREQ(old_cluster, new_rate);
+
+		if (new_rate &&
+		    clk_set_rate(clk[old_cluster], new_rate * 1000)) {
+			pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n",
+			       __func__, ret, old_cluster);
+		}
+		mutex_unlock(&cluster_lock[old_cluster]);
+	}
+
+	return 0;
+}
+
+/* Set clock frequency */
+static int ve_spc_cpufreq_set_target(struct cpufreq_policy *policy,
+				     unsigned int index)
+{
+	u32 cpu = policy->cpu, cur_cluster, new_cluster, actual_cluster;
+	unsigned int freqs_new;
+	int ret;
+
+	cur_cluster = cpu_to_cluster(cpu);
+	new_cluster = actual_cluster = per_cpu(physical_cluster, cpu);
+
+	freqs_new = freq_table[cur_cluster][index].frequency;
+
+	if (is_bL_switching_enabled()) {
+		if (actual_cluster == A15_CLUSTER && freqs_new < clk_big_min)
+			new_cluster = A7_CLUSTER;
+		else if (actual_cluster == A7_CLUSTER &&
+			 freqs_new > clk_little_max)
+			new_cluster = A15_CLUSTER;
+	}
+
+	ret = ve_spc_cpufreq_set_rate(cpu, actual_cluster, new_cluster,
+				      freqs_new);
+
+	if (!ret) {
+		arch_set_freq_scale(policy->related_cpus, freqs_new,
+				    policy->cpuinfo.max_freq);
+	}
+
+	return ret;
+}
+
+static inline u32 get_table_count(struct cpufreq_frequency_table *table)
+{
+	int count;
+
+	for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++)
+		;
+
+	return count;
+}
+
+/* get the minimum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_min(struct cpufreq_frequency_table *table)
+{
+	struct cpufreq_frequency_table *pos;
+	u32 min_freq = ~0;
+
+	cpufreq_for_each_entry(pos, table)
+		if (pos->frequency < min_freq)
+			min_freq = pos->frequency;
+	return min_freq;
+}
+
+/* get the maximum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_max(struct cpufreq_frequency_table *table)
+{
+	struct cpufreq_frequency_table *pos;
+	u32 max_freq = 0;
+
+	cpufreq_for_each_entry(pos, table)
+		if (pos->frequency > max_freq)
+			max_freq = pos->frequency;
+	return max_freq;
+}
+
+static bool search_frequency(struct cpufreq_frequency_table *table, int size,
+			     unsigned int freq)
+{
+	int count;
+
+	for (count = 0; count < size; count++) {
+		if (table[count].frequency == freq)
+			return true;
+	}
+
+	return false;
+}
+
+static int merge_cluster_tables(void)
+{
+	int i, j, k = 0, count = 1;
+	struct cpufreq_frequency_table *table;
+
+	for (i = 0; i < MAX_CLUSTERS; i++)
+		count += get_table_count(freq_table[i]);
+
+	table = kcalloc(count, sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	freq_table[MAX_CLUSTERS] = table;
+
+	/* Add in reverse order to get freqs in increasing order */
+	for (i = MAX_CLUSTERS - 1; i >= 0; i--, count = k) {
+		for (j = 0; freq_table[i][j].frequency != CPUFREQ_TABLE_END;
+		     j++) {
+			if (i == A15_CLUSTER &&
+			    search_frequency(table, count, freq_table[i][j].frequency))
+				continue; /* skip duplicates */
+			table[k++].frequency =
+				VIRT_FREQ(i, freq_table[i][j].frequency);
+		}
+	}
+
+	table[k].driver_data = k;
+	table[k].frequency = CPUFREQ_TABLE_END;
+
+	return 0;
+}
 
-static int ve_spc_init_opp_table(const struct cpumask *cpumask)
+static void _put_cluster_clk_and_freq_table(struct device *cpu_dev,
+					    const struct cpumask *cpumask)
 {
-	struct device *cpu_dev = get_cpu_device(cpumask_first(cpumask));
+	u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
+
+	if (!freq_table[cluster])
+		return;
+
+	clk_put(clk[cluster]);
+	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
+}
+
+static void put_cluster_clk_and_freq_table(struct device *cpu_dev,
+					   const struct cpumask *cpumask)
+{
+	u32 cluster = cpu_to_cluster(cpu_dev->id);
+	int i;
+
+	if (atomic_dec_return(&cluster_usage[cluster]))
+		return;
+
+	if (cluster < MAX_CLUSTERS)
+		return _put_cluster_clk_and_freq_table(cpu_dev, cpumask);
+
+	for_each_present_cpu(i) {
+		struct device *cdev = get_cpu_device(i);
+
+		if (!cdev)
+			return;
+
+		_put_cluster_clk_and_freq_table(cdev, cpumask);
+	}
+
+	/* free virtual table */
+	kfree(freq_table[cluster]);
+}
+
+static int _get_cluster_clk_and_freq_table(struct device *cpu_dev,
+					   const struct cpumask *cpumask)
+{
+	u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
+	int ret;
+
+	if (freq_table[cluster])
+		return 0;
+
 	/*
 	 * platform specific SPC code must initialise the opp table
 	 * so just check if the OPP count is non-zero
 	 */
-	return dev_pm_opp_get_opp_count(cpu_dev) <= 0;
+	ret = dev_pm_opp_get_opp_count(cpu_dev) <= 0;
+	if (ret)
+		goto out;
+
+	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table[cluster]);
+	if (ret)
+		goto out;
+
+	clk[cluster] = clk_get(cpu_dev, NULL);
+	if (!IS_ERR(clk[cluster]))
+		return 0;
+
+	dev_err(cpu_dev, "%s: Failed to get clk for cpu: %d, cluster: %d\n",
+		__func__, cpu_dev->id, cluster);
+	ret = PTR_ERR(clk[cluster]);
+	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
+
+out:
+	dev_err(cpu_dev, "%s: Failed to get data for cluster: %d\n", __func__,
+		cluster);
+	return ret;
 }
 
-static int ve_spc_get_transition_latency(struct device *cpu_dev)
+static int get_cluster_clk_and_freq_table(struct device *cpu_dev,
+					  const struct cpumask *cpumask)
 {
-	return 1000000; /* 1 ms */
+	u32 cluster = cpu_to_cluster(cpu_dev->id);
+	int i, ret;
+
+	if (atomic_inc_return(&cluster_usage[cluster]) != 1)
+		return 0;
+
+	if (cluster < MAX_CLUSTERS) {
+		ret = _get_cluster_clk_and_freq_table(cpu_dev, cpumask);
+		if (ret)
+			atomic_dec(&cluster_usage[cluster]);
+		return ret;
+	}
+
+	/*
+	 * Get data for all clusters and fill virtual cluster with a merge of
+	 * both
+	 */
+	for_each_present_cpu(i) {
+		struct device *cdev = get_cpu_device(i);
+
+		if (!cdev)
+			return -ENODEV;
+
+		ret = _get_cluster_clk_and_freq_table(cdev, cpumask);
+		if (ret)
+			goto put_clusters;
+	}
+
+	ret = merge_cluster_tables();
+	if (ret)
+		goto put_clusters;
+
+	/* Assuming 2 cluster, set clk_big_min and clk_little_max */
+	clk_big_min = get_table_min(freq_table[A15_CLUSTER]);
+	clk_little_max = VIRT_FREQ(A7_CLUSTER,
+				   get_table_max(freq_table[A7_CLUSTER]));
+
+	return 0;
+
+put_clusters:
+	for_each_present_cpu(i) {
+		struct device *cdev = get_cpu_device(i);
+
+		if (!cdev)
+			return -ENODEV;
+
+		_put_cluster_clk_and_freq_table(cdev, cpumask);
+	}
+
+	atomic_dec(&cluster_usage[cluster]);
+
+	return ret;
 }
 
-static const struct cpufreq_arm_bL_ops ve_spc_cpufreq_ops = {
-	.name	= "vexpress-spc",
-	.get_transition_latency = ve_spc_get_transition_latency,
-	.init_opp_table = ve_spc_init_opp_table,
+/* Per-CPU initialization */
+static int ve_spc_cpufreq_init(struct cpufreq_policy *policy)
+{
+	u32 cur_cluster = cpu_to_cluster(policy->cpu);
+	struct device *cpu_dev;
+	int ret;
+
+	cpu_dev = get_cpu_device(policy->cpu);
+	if (!cpu_dev) {
+		pr_err("%s: failed to get cpu%d device\n", __func__,
+		       policy->cpu);
+		return -ENODEV;
+	}
+
+	if (cur_cluster < MAX_CLUSTERS) {
+		int cpu;
+
+		dev_pm_opp_get_sharing_cpus(cpu_dev, policy->cpus);
+
+		for_each_cpu(cpu, policy->cpus)
+			per_cpu(physical_cluster, cpu) = cur_cluster;
+	} else {
+		/* Assumption: during init, we are always running on A15 */
+		per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
+	}
+
+	ret = get_cluster_clk_and_freq_table(cpu_dev, policy->cpus);
+	if (ret)
+		return ret;
+
+	policy->freq_table = freq_table[cur_cluster];
+	policy->cpuinfo.transition_latency = 1000000; /* 1 ms */
+
+	dev_pm_opp_of_register_em(policy->cpus);
+
+	if (is_bL_switching_enabled())
+		per_cpu(cpu_last_req_freq, policy->cpu) =
+						clk_get_cpu_rate(policy->cpu);
+
+	dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu);
+	return 0;
+}
+
+static int ve_spc_cpufreq_exit(struct cpufreq_policy *policy)
+{
+	struct device *cpu_dev;
+	int cur_cluster = cpu_to_cluster(policy->cpu);
+
+	if (cur_cluster < MAX_CLUSTERS) {
+		cpufreq_cooling_unregister(cdev[cur_cluster]);
+		cdev[cur_cluster] = NULL;
+	}
+
+	cpu_dev = get_cpu_device(policy->cpu);
+	if (!cpu_dev) {
+		pr_err("%s: failed to get cpu%d device\n", __func__,
+		       policy->cpu);
+		return -ENODEV;
+	}
+
+	put_cluster_clk_and_freq_table(cpu_dev, policy->related_cpus);
+	return 0;
+}
+
+static void ve_spc_cpufreq_ready(struct cpufreq_policy *policy)
+{
+	int cur_cluster = cpu_to_cluster(policy->cpu);
+
+	/* Do not register a cpu_cooling device if we are in IKS mode */
+	if (cur_cluster >= MAX_CLUSTERS)
+		return;
+
+	cdev[cur_cluster] = of_cpufreq_cooling_register(policy);
+}
+
+static struct cpufreq_driver ve_spc_cpufreq_driver = {
+	.name			= "vexpress-spc",
+	.flags			= CPUFREQ_STICKY |
+					CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
+					CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+	.verify			= cpufreq_generic_frequency_table_verify,
+	.target_index		= ve_spc_cpufreq_set_target,
+	.get			= ve_spc_cpufreq_get_rate,
+	.init			= ve_spc_cpufreq_init,
+	.exit			= ve_spc_cpufreq_exit,
+	.ready			= ve_spc_cpufreq_ready,
+	.attr			= cpufreq_generic_attr,
 };
 
+#ifdef CONFIG_BL_SWITCHER
+static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb,
+					unsigned long action, void *_arg)
+{
+	pr_debug("%s: action: %ld\n", __func__, action);
+
+	switch (action) {
+	case BL_NOTIFY_PRE_ENABLE:
+	case BL_NOTIFY_PRE_DISABLE:
+		cpufreq_unregister_driver(&ve_spc_cpufreq_driver);
+		break;
+
+	case BL_NOTIFY_POST_ENABLE:
+		set_switching_enabled(true);
+		cpufreq_register_driver(&ve_spc_cpufreq_driver);
+		break;
+
+	case BL_NOTIFY_POST_DISABLE:
+		set_switching_enabled(false);
+		cpufreq_register_driver(&ve_spc_cpufreq_driver);
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block bL_switcher_notifier = {
+	.notifier_call = bL_cpufreq_switcher_notifier,
+};
+
+static int __bLs_register_notifier(void)
+{
+	return bL_switcher_register_notifier(&bL_switcher_notifier);
+}
+
+static int __bLs_unregister_notifier(void)
+{
+	return bL_switcher_unregister_notifier(&bL_switcher_notifier);
+}
+#else
+static int __bLs_register_notifier(void) { return 0; }
+static int __bLs_unregister_notifier(void) { return 0; }
+#endif
+
 static int ve_spc_cpufreq_probe(struct platform_device *pdev)
 {
-	return bL_cpufreq_register(&ve_spc_cpufreq_ops);
+	int ret, i;
+
+	set_switching_enabled(bL_switcher_get_enabled());
+
+	for (i = 0; i < MAX_CLUSTERS; i++)
+		mutex_init(&cluster_lock[i]);
+
+	ret = cpufreq_register_driver(&ve_spc_cpufreq_driver);
+	if (ret) {
+		pr_info("%s: Failed registering platform driver: %s, err: %d\n",
+			__func__, ve_spc_cpufreq_driver.name, ret);
+	} else {
+		ret = __bLs_register_notifier();
+		if (ret)
+			cpufreq_unregister_driver(&ve_spc_cpufreq_driver);
+		else
+			pr_info("%s: Registered platform driver: %s\n",
+				__func__, ve_spc_cpufreq_driver.name);
+	}
+
+	bL_switcher_put_enabled();
+	return ret;
 }
 
 static int ve_spc_cpufreq_remove(struct platform_device *pdev)
 {
-	bL_cpufreq_unregister(&ve_spc_cpufreq_ops);
+	bL_switcher_get_enabled();
+	__bLs_unregister_notifier();
+	cpufreq_unregister_driver(&ve_spc_cpufreq_driver);
+	bL_switcher_put_enabled();
+	pr_info("%s: Un-registered platform driver: %s\n", __func__,
+		ve_spc_cpufreq_driver.name);
 	return 0;
 }
 
@@ -68,4 +599,7 @@ static struct platform_driver ve_spc_cpufreq_platdrv = {
 };
 module_platform_driver(ve_spc_cpufreq_platdrv);
 
-MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>");
+MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>");
+MODULE_DESCRIPTION("Vexpress SPC ARM big LITTLE cpufreq driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index 88727b7c0d59..c0aeedd66f02 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -16,7 +16,7 @@ config CPU_IDLE
 if CPU_IDLE
 
 config CPU_IDLE_MULTIPLE_DRIVERS
-        bool
+	bool
 
 config CPU_IDLE_GOV_LADDER
 	bool "Ladder governor (for periodic timer tick)"
@@ -63,13 +63,13 @@ source "drivers/cpuidle/Kconfig.powerpc"
 endmenu
 
 config HALTPOLL_CPUIDLE
-       tristate "Halt poll cpuidle driver"
-       depends on X86 && KVM_GUEST
-       default y
-       help
-         This option enables halt poll cpuidle driver, which allows to poll
-         before halting in the guest (more efficient than polling in the
-         host via halt_poll_ns for some scenarios).
+	tristate "Halt poll cpuidle driver"
+	depends on X86 && KVM_GUEST
+	default y
+	help
+	 This option enables halt poll cpuidle driver, which allows to poll
+	 before halting in the guest (more efficient than polling in the
+	 host via halt_poll_ns for some scenarios).
 
 endif
 
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index d8530475493c..62272ecfa771 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm
@@ -3,15 +3,15 @@
 # ARM CPU Idle drivers
 #
 config ARM_CPUIDLE
-        bool "Generic ARM/ARM64 CPU idle Driver"
-        select DT_IDLE_STATES
+	bool "Generic ARM/ARM64 CPU idle Driver"
+	select DT_IDLE_STATES
 	select CPU_IDLE_MULTIPLE_DRIVERS
-        help
-          Select this to enable generic cpuidle driver for ARM.
-          It provides a generic idle driver whose idle states are configured
-          at run-time through DT nodes. The CPUidle suspend backend is
-          initialized by calling the CPU operations init idle hook
-          provided by architecture code.
+	help
+	  Select this to enable generic cpuidle driver for ARM.
+	  It provides a generic idle driver whose idle states are configured
+	  at run-time through DT nodes. The CPUidle suspend backend is
+	  initialized by calling the CPU operations init idle hook
+	  provided by architecture code.
 
 config ARM_PSCI_CPUIDLE
 	bool "PSCI CPU idle Driver"
@@ -25,7 +25,7 @@ config ARM_PSCI_CPUIDLE
 
 config ARM_BIG_LITTLE_CPUIDLE
 	bool "Support for ARM big.LITTLE processors"
-	depends on ARCH_VEXPRESS_TC2_PM || ARCH_EXYNOS
+	depends on ARCH_VEXPRESS_TC2_PM || ARCH_EXYNOS || COMPILE_TEST
 	depends on MCPM && !ARM64
 	select ARM_CPU_SUSPEND
 	select CPU_IDLE_MULTIPLE_DRIVERS
@@ -51,13 +51,13 @@ config ARM_HIGHBANK_CPUIDLE
 
 config ARM_KIRKWOOD_CPUIDLE
 	bool "CPU Idle Driver for Marvell Kirkwood SoCs"
-	depends on MACH_KIRKWOOD && !ARM64
+	depends on (MACH_KIRKWOOD || COMPILE_TEST) && !ARM64
 	help
 	  This adds the CPU Idle driver for Marvell Kirkwood SoCs.
 
 config ARM_ZYNQ_CPUIDLE
 	bool "CPU Idle Driver for Xilinx Zynq processors"
-	depends on ARCH_ZYNQ && !ARM64
+	depends on (ARCH_ZYNQ || COMPILE_TEST) && !ARM64
 	help
 	  Select this to enable cpuidle on Xilinx Zynq processors.
 
@@ -65,24 +65,24 @@ config ARM_U8500_CPUIDLE
 	bool "Cpu Idle Driver for the ST-E u8500 processors"
 	depends on ARCH_U8500 && !ARM64
 	help
-	  Select this to enable cpuidle for ST-E u8500 processors
+	  Select this to enable cpuidle for ST-E u8500 processors.
 
 config ARM_AT91_CPUIDLE
 	bool "Cpu Idle Driver for the AT91 processors"
 	default y
-	depends on ARCH_AT91 && !ARM64
+	depends on (ARCH_AT91 || COMPILE_TEST) && !ARM64
 	help
-	  Select this to enable cpuidle for AT91 processors
+	  Select this to enable cpuidle for AT91 processors.
 
 config ARM_EXYNOS_CPUIDLE
 	bool "Cpu Idle Driver for the Exynos processors"
-	depends on ARCH_EXYNOS && !ARM64
+	depends on (ARCH_EXYNOS || COMPILE_TEST) && !ARM64
 	select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
 	help
-	  Select this to enable cpuidle for Exynos processors
+	  Select this to enable cpuidle for Exynos processors.
 
 config ARM_MVEBU_V7_CPUIDLE
 	bool "CPU Idle Driver for mvebu v7 family processors"
-	depends on ARCH_MVEBU && !ARM64
+	depends on (ARCH_MVEBU || COMPILE_TEST) && !ARM64
 	help
 	  Select this to enable cpuidle on Armada 370, 38x and XP processors.
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index b607278df25b..04003b90dc49 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -89,6 +89,7 @@
  * @coupled_cpus: mask of cpus that are part of the coupled set
  * @requested_state: array of requested states for cpus in the coupled set
  * @ready_waiting_counts: combined count of cpus  in ready or waiting loops
+ * @abort_barrier: synchronisation point for abort cases
  * @online_count: count of cpus that are online
  * @refcnt: reference count of cpuidle devices that are using this struct
  * @prevent: flag to prevent coupled idle while a cpu is hotplugging
@@ -338,7 +339,7 @@ static void cpuidle_coupled_poke(int cpu)
 
 /**
  * cpuidle_coupled_poke_others - wake up all other cpus that may be waiting
- * @dev: struct cpuidle_device for this cpu
+ * @this_cpu: target cpu
  * @coupled: the struct coupled that contains the current cpu
  *
  * Calls cpuidle_coupled_poke on all other online cpus.
@@ -355,7 +356,7 @@ static void cpuidle_coupled_poke_others(int this_cpu,
 
 /**
  * cpuidle_coupled_set_waiting - mark this cpu as in the wait loop
- * @dev: struct cpuidle_device for this cpu
+ * @cpu: target cpu
  * @coupled: the struct coupled that contains the current cpu
  * @next_state: the index in drv->states of the requested state for this cpu
  *
@@ -376,7 +377,7 @@ static int cpuidle_coupled_set_waiting(int cpu,
 
 /**
  * cpuidle_coupled_set_not_waiting - mark this cpu as leaving the wait loop
- * @dev: struct cpuidle_device for this cpu
+ * @cpu: target cpu
  * @coupled: the struct coupled that contains the current cpu
  *
  * Removes the requested idle state for the specified cpuidle device.
@@ -412,7 +413,7 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled)
 
 /**
  * cpuidle_coupled_clear_pokes - spin until the poke interrupt is processed
- * @cpu - this cpu
+ * @cpu: this cpu
  *
  * Turns on interrupts and spins until any outstanding poke interrupts have
  * been processed and the poke bit has been cleared.
diff --git a/drivers/cpuidle/cpuidle-clps711x.c b/drivers/cpuidle/cpuidle-clps711x.c
index 6e36740f5719..fc22c59b6c73 100644
--- a/drivers/cpuidle/cpuidle-clps711x.c
+++ b/drivers/cpuidle/cpuidle-clps711x.c
@@ -37,10 +37,7 @@ static struct cpuidle_driver clps711x_idle_driver = {
 
 static int __init clps711x_cpuidle_probe(struct platform_device *pdev)
 {
-	struct resource *res;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	clps711x_halt = devm_ioremap_resource(&pdev->dev, res);
+	clps711x_halt = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(clps711x_halt))
 		return PTR_ERR(clps711x_halt);
 
diff --git a/drivers/cpuidle/cpuidle-kirkwood.c b/drivers/cpuidle/cpuidle-kirkwood.c
index d23d8f468c12..511c4f46027a 100644
--- a/drivers/cpuidle/cpuidle-kirkwood.c
+++ b/drivers/cpuidle/cpuidle-kirkwood.c
@@ -55,10 +55,7 @@ static struct cpuidle_driver kirkwood_idle_driver = {
 /* Initialize CPU idle by registering the idle states */
 static int kirkwood_cpuidle_probe(struct platform_device *pdev)
 {
-	struct resource *res;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ddr_operation_base = devm_ioremap_resource(&pdev->dev, res);
+	ddr_operation_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(ddr_operation_base))
 		return PTR_ERR(ddr_operation_base);
 
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 84b1ebe212b3..1b299e801f74 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -56,13 +56,10 @@ static u64 get_snooze_timeout(struct cpuidle_device *dev,
 		return default_snooze_timeout;
 
 	for (i = index + 1; i < drv->state_count; i++) {
-		struct cpuidle_state *s = &drv->states[i];
-		struct cpuidle_state_usage *su = &dev->states_usage[i];
-
-		if (s->disabled || su->disable)
+		if (dev->states_usage[i].disable)
 			continue;
 
-		return s->target_residency * tb_ticks_per_usec;
+		return drv->states[i].target_residency * tb_ticks_per_usec;
 	}
 
 	return default_snooze_timeout;
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 0895b988fa92..de81298051b3 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -75,44 +75,45 @@ int cpuidle_play_dead(void)
 
 static int find_deepest_state(struct cpuidle_driver *drv,
 			      struct cpuidle_device *dev,
-			      unsigned int max_latency,
+			      u64 max_latency_ns,
 			      unsigned int forbidden_flags,
 			      bool s2idle)
 {
-	unsigned int latency_req = 0;
+	u64 latency_req = 0;
 	int i, ret = 0;
 
 	for (i = 1; i < drv->state_count; i++) {
 		struct cpuidle_state *s = &drv->states[i];
-		struct cpuidle_state_usage *su = &dev->states_usage[i];
 
-		if (s->disabled || su->disable || s->exit_latency <= latency_req
-		    || s->exit_latency > max_latency
-		    || (s->flags & forbidden_flags)
-		    || (s2idle && !s->enter_s2idle))
+		if (dev->states_usage[i].disable ||
+		    s->exit_latency_ns <= latency_req ||
+		    s->exit_latency_ns > max_latency_ns ||
+		    (s->flags & forbidden_flags) ||
+		    (s2idle && !s->enter_s2idle))
 			continue;
 
-		latency_req = s->exit_latency;
+		latency_req = s->exit_latency_ns;
 		ret = i;
 	}
 	return ret;
 }
 
 /**
- * cpuidle_use_deepest_state - Set/clear governor override flag.
- * @enable: New value of the flag.
+ * cpuidle_use_deepest_state - Set/unset governor override mode.
+ * @latency_limit_ns: Idle state exit latency limit (or no override if 0).
  *
- * Set/unset the current CPU to use the deepest idle state (override governors
- * going forward if set).
+ * If @latency_limit_ns is nonzero, set the current CPU to use the deepest idle
+ * state with exit latency within @latency_limit_ns (override governors going
+ * forward), or do not override governors if it is zero.
  */
-void cpuidle_use_deepest_state(bool enable)
+void cpuidle_use_deepest_state(u64 latency_limit_ns)
 {
 	struct cpuidle_device *dev;
 
 	preempt_disable();
 	dev = cpuidle_get_device();
 	if (dev)
-		dev->use_deepest_state = enable;
+		dev->forced_idle_latency_limit_ns = latency_limit_ns;
 	preempt_enable();
 }
 
@@ -120,11 +121,15 @@ void cpuidle_use_deepest_state(bool enable)
  * cpuidle_find_deepest_state - Find the deepest available idle state.
  * @drv: cpuidle driver for the given CPU.
  * @dev: cpuidle device for the given CPU.
+ * @latency_limit_ns: Idle state exit latency limit
+ *
+ * Return: the index of the deepest available idle state.
  */
 int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
-			       struct cpuidle_device *dev)
+			       struct cpuidle_device *dev,
+			       u64 latency_limit_ns)
 {
-	return find_deepest_state(drv, dev, UINT_MAX, 0, false);
+	return find_deepest_state(drv, dev, latency_limit_ns, 0, false);
 }
 
 #ifdef CONFIG_SUSPEND
@@ -180,7 +185,7 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * that interrupts won't be enabled when it exits and allows the tick to
 	 * be frozen safely.
 	 */
-	index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
+	index = find_deepest_state(drv, dev, U64_MAX, 0, true);
 	if (index > 0)
 		enter_s2idle_proper(drv, dev, index);
 
@@ -209,7 +214,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	 * CPU as a broadcast timer, this call may fail if it is not available.
 	 */
 	if (broadcast && tick_broadcast_enter()) {
-		index = find_deepest_state(drv, dev, target_state->exit_latency,
+		index = find_deepest_state(drv, dev, target_state->exit_latency_ns,
 					   CPUIDLE_FLAG_TIMER_STOP, false);
 		if (index < 0) {
 			default_idle_call();
@@ -247,7 +252,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 		local_irq_enable();
 
 	if (entered_state >= 0) {
-		s64 diff, delay = drv->states[entered_state].exit_latency;
+		s64 diff, delay = drv->states[entered_state].exit_latency_ns;
 		int i;
 
 		/*
@@ -255,18 +260,15 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 		 * This can be moved to within driver enter routine,
 		 * but that results in multiple copies of same code.
 		 */
-		diff = ktime_us_delta(time_end, time_start);
-		if (diff > INT_MAX)
-			diff = INT_MAX;
+		diff = ktime_sub(time_end, time_start);
 
-		dev->last_residency = (int)diff;
-		dev->states_usage[entered_state].time += dev->last_residency;
+		dev->last_residency_ns = diff;
+		dev->states_usage[entered_state].time_ns += diff;
 		dev->states_usage[entered_state].usage++;
 
-		if (diff < drv->states[entered_state].target_residency) {
+		if (diff < drv->states[entered_state].target_residency_ns) {
 			for (i = entered_state - 1; i >= 0; i--) {
-				if (drv->states[i].disabled ||
-				    dev->states_usage[i].disable)
+				if (dev->states_usage[i].disable)
 					continue;
 
 				/* Shallower states are enabled, so update. */
@@ -275,22 +277,21 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 			}
 		} else if (diff > delay) {
 			for (i = entered_state + 1; i < drv->state_count; i++) {
-				if (drv->states[i].disabled ||
-				    dev->states_usage[i].disable)
+				if (dev->states_usage[i].disable)
 					continue;
 
 				/*
 				 * Update if a deeper state would have been a
 				 * better match for the observed idle duration.
 				 */
-				if (diff - delay >= drv->states[i].target_residency)
+				if (diff - delay >= drv->states[i].target_residency_ns)
 					dev->states_usage[entered_state].below++;
 
 				break;
 			}
 		}
 	} else {
-		dev->last_residency = 0;
+		dev->last_residency_ns = 0;
 	}
 
 	return entered_state;
@@ -380,10 +381,11 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv,
 
 	limit_ns = TICK_NSEC;
 	for (i = 1; i < drv->state_count; i++) {
-		if (drv->states[i].disabled || dev->states_usage[i].disable)
+		if (dev->states_usage[i].disable)
 			continue;
 
-		limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC;
+		limit_ns = drv->states[i].target_residency_ns;
+		break;
 	}
 
 	dev->poll_limit_ns = limit_ns;
@@ -554,7 +556,7 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev)
 static void __cpuidle_device_init(struct cpuidle_device *dev)
 {
 	memset(dev->states_usage, 0, sizeof(dev->states_usage));
-	dev->last_residency = 0;
+	dev->last_residency_ns = 0;
 	dev->next_hrtimer = 0;
 }
 
@@ -567,12 +569,20 @@ static void __cpuidle_device_init(struct cpuidle_device *dev)
  */
 static int __cpuidle_register_device(struct cpuidle_device *dev)
 {
-	int ret;
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+	int i, ret;
 
 	if (!try_module_get(drv->owner))
 		return -EINVAL;
 
+	for (i = 0; i < drv->state_count; i++) {
+		if (drv->states[i].flags & CPUIDLE_FLAG_UNUSABLE)
+			dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER;
+
+		if (drv->states[i].flags & CPUIDLE_FLAG_OFF)
+			dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_USER;
+	}
+
 	per_cpu(cpuidle_devices, dev->cpu) = dev;
 	list_add(&dev->device_list, &cpuidle_detected_devices);
 
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 80c1a830d991..4070e573bf43 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -62,24 +62,23 @@ static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
  * __cpuidle_set_driver - set per CPU driver variables for the given driver.
  * @drv: a valid pointer to a struct cpuidle_driver
  *
- * For each CPU in the driver's cpumask, unset the registered driver per CPU
- * to @drv.
- *
- * Returns 0 on success, -EBUSY if the CPUs have driver(s) already.
+ * Returns 0 on success, -EBUSY if any CPU in the cpumask have a driver
+ * different from drv already.
  */
 static inline int __cpuidle_set_driver(struct cpuidle_driver *drv)
 {
 	int cpu;
 
 	for_each_cpu(cpu, drv->cpumask) {
+		struct cpuidle_driver *old_drv;
 
-		if (__cpuidle_get_cpu_driver(cpu)) {
-			__cpuidle_unset_driver(drv);
+		old_drv = __cpuidle_get_cpu_driver(cpu);
+		if (old_drv && old_drv != drv)
 			return -EBUSY;
-		}
+	}
 
+	for_each_cpu(cpu, drv->cpumask)
 		per_cpu(cpuidle_drivers, cpu) = drv;
-	}
 
 	return 0;
 }
@@ -156,8 +155,6 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
 {
 	int i;
 
-	drv->refcnt = 0;
-
 	/*
 	 * Use all possible CPUs as the default, because if the kernel boots
 	 * with some CPUs offline and then we online one of them, the CPU
@@ -166,16 +163,27 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
 	if (!drv->cpumask)
 		drv->cpumask = (struct cpumask *)cpu_possible_mask;
 
-	/*
-	 * Look for the timer stop flag in the different states, so that we know
-	 * if the broadcast timer has to be set up.  The loop is in the reverse
-	 * order, because usually one of the deeper states have this flag set.
-	 */
-	for (i = drv->state_count - 1; i >= 0 ; i--) {
-		if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) {
+	for (i = 0; i < drv->state_count; i++) {
+		struct cpuidle_state *s = &drv->states[i];
+
+		/*
+		 * Look for the timer stop flag in the different states and if
+		 * it is found, indicate that the broadcast timer has to be set
+		 * up.
+		 */
+		if (s->flags & CPUIDLE_FLAG_TIMER_STOP)
 			drv->bctimer = 1;
-			break;
-		}
+
+		/*
+		 * The core will use the target residency and exit latency
+		 * values in nanoseconds, but allow drivers to provide them in
+		 * microseconds too.
+		 */
+		if (s->target_residency > 0)
+			s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
+
+		if (s->exit_latency > 0)
+			s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
 	}
 }
 
@@ -230,9 +238,6 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv)
  */
 static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
 {
-	if (WARN_ON(drv->refcnt > 0))
-		return;
-
 	if (drv->bctimer) {
 		drv->bctimer = 0;
 		on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
@@ -340,42 +345,39 @@ struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
 EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver);
 
 /**
- * cpuidle_driver_ref - get a reference to the driver.
- *
- * Increment the reference counter of the cpuidle driver associated with
- * the current CPU.
- *
- * Returns a pointer to the driver, or NULL if the current CPU has no driver.
+ * cpuidle_driver_state_disabled - Disable or enable an idle state
+ * @drv: cpuidle driver owning the state
+ * @idx: State index
+ * @disable: Whether or not to disable the state
  */
-struct cpuidle_driver *cpuidle_driver_ref(void)
+void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx,
+				 bool disable)
 {
-	struct cpuidle_driver *drv;
+	unsigned int cpu;
 
-	spin_lock(&cpuidle_driver_lock);
+	mutex_lock(&cpuidle_lock);
 
-	drv = cpuidle_get_driver();
-	if (drv)
-		drv->refcnt++;
+	spin_lock(&cpuidle_driver_lock);
 
-	spin_unlock(&cpuidle_driver_lock);
-	return drv;
-}
+	if (!drv->cpumask) {
+		drv->states[idx].flags |= CPUIDLE_FLAG_UNUSABLE;
+		goto unlock;
+	}
 
-/**
- * cpuidle_driver_unref - puts down the refcount for the driver
- *
- * Decrement the reference counter of the cpuidle driver associated with
- * the current CPU.
- */
-void cpuidle_driver_unref(void)
-{
-	struct cpuidle_driver *drv;
+	for_each_cpu(cpu, drv->cpumask) {
+		struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
 
-	spin_lock(&cpuidle_driver_lock);
+		if (!dev)
+			continue;
 
-	drv = cpuidle_get_driver();
-	if (drv && !WARN_ON(drv->refcnt <= 0))
-		drv->refcnt--;
+		if (disable)
+			dev->states_usage[idx].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER;
+		else
+			dev->states_usage[idx].disable &= ~CPUIDLE_STATE_DISABLED_BY_DRIVER;
+	}
 
+unlock:
 	spin_unlock(&cpuidle_driver_lock);
+
+	mutex_unlock(&cpuidle_lock);
 }
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index e9801f26c732..e48271e117a3 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -107,11 +107,14 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
  * cpuidle_governor_latency_req - Compute a latency constraint for CPU
  * @cpu: Target CPU
  */
-int cpuidle_governor_latency_req(unsigned int cpu)
+s64 cpuidle_governor_latency_req(unsigned int cpu)
 {
 	int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 	struct device *device = get_cpu_device(cpu);
 	int device_req = dev_pm_qos_raw_resume_latency(device);
 
-	return device_req < global_req ? device_req : global_req;
+	if (device_req > global_req)
+		device_req = global_req;
+
+	return (s64)device_req * NSEC_PER_USEC;
 }
diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c
index 7a703d2e0064..cb2a96eafc02 100644
--- a/drivers/cpuidle/governors/haltpoll.c
+++ b/drivers/cpuidle/governors/haltpoll.c
@@ -49,7 +49,7 @@ static int haltpoll_select(struct cpuidle_driver *drv,
 			   struct cpuidle_device *dev,
 			   bool *stop_tick)
 {
-	int latency_req = cpuidle_governor_latency_req(dev->cpu);
+	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
 
 	if (!drv->state_count || latency_req == 0) {
 		*stop_tick = false;
@@ -75,10 +75,9 @@ static int haltpoll_select(struct cpuidle_driver *drv,
 	return 0;
 }
 
-static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us)
+static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
 {
 	unsigned int val;
-	u64 block_ns = block_us*NSEC_PER_USEC;
 
 	/* Grow cpu_halt_poll_us if
 	 * cpu_halt_poll_us < block_ns < guest_halt_poll_us
@@ -115,7 +114,7 @@ static void haltpoll_reflect(struct cpuidle_device *dev, int index)
 	dev->last_state_idx = index;
 
 	if (index != 0)
-		adjust_poll_limit(dev, dev->last_residency);
+		adjust_poll_limit(dev, dev->last_residency_ns);
 }
 
 /**
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 428eeb832fe7..8e9058c4ea63 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -27,8 +27,8 @@ struct ladder_device_state {
 	struct {
 		u32 promotion_count;
 		u32 demotion_count;
-		u32 promotion_time;
-		u32 demotion_time;
+		u64 promotion_time_ns;
+		u64 demotion_time_ns;
 	} threshold;
 	struct {
 		int promotion_count;
@@ -68,9 +68,10 @@ static int ladder_select_state(struct cpuidle_driver *drv,
 {
 	struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
 	struct ladder_device_state *last_state;
-	int last_residency, last_idx = dev->last_state_idx;
+	int last_idx = dev->last_state_idx;
 	int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
-	int latency_req = cpuidle_governor_latency_req(dev->cpu);
+	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+	s64 last_residency;
 
 	/* Special case when user has set very strict latency requirement */
 	if (unlikely(latency_req == 0)) {
@@ -80,14 +81,13 @@ static int ladder_select_state(struct cpuidle_driver *drv,
 
 	last_state = &ldev->states[last_idx];
 
-	last_residency = dev->last_residency - drv->states[last_idx].exit_latency;
+	last_residency = dev->last_residency_ns - drv->states[last_idx].exit_latency_ns;
 
 	/* consider promotion */
 	if (last_idx < drv->state_count - 1 &&
-	    !drv->states[last_idx + 1].disabled &&
 	    !dev->states_usage[last_idx + 1].disable &&
-	    last_residency > last_state->threshold.promotion_time &&
-	    drv->states[last_idx + 1].exit_latency <= latency_req) {
+	    last_residency > last_state->threshold.promotion_time_ns &&
+	    drv->states[last_idx + 1].exit_latency_ns <= latency_req) {
 		last_state->stats.promotion_count++;
 		last_state->stats.demotion_count = 0;
 		if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
@@ -98,13 +98,12 @@ static int ladder_select_state(struct cpuidle_driver *drv,
 
 	/* consider demotion */
 	if (last_idx > first_idx &&
-	    (drv->states[last_idx].disabled ||
-	    dev->states_usage[last_idx].disable ||
-	    drv->states[last_idx].exit_latency > latency_req)) {
+	    (dev->states_usage[last_idx].disable ||
+	    drv->states[last_idx].exit_latency_ns > latency_req)) {
 		int i;
 
 		for (i = last_idx - 1; i > first_idx; i--) {
-			if (drv->states[i].exit_latency <= latency_req)
+			if (drv->states[i].exit_latency_ns <= latency_req)
 				break;
 		}
 		ladder_do_selection(dev, ldev, last_idx, i);
@@ -112,7 +111,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
 	}
 
 	if (last_idx > first_idx &&
-	    last_residency < last_state->threshold.demotion_time) {
+	    last_residency < last_state->threshold.demotion_time_ns) {
 		last_state->stats.demotion_count++;
 		last_state->stats.promotion_count = 0;
 		if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
@@ -152,9 +151,9 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
 		lstate->threshold.demotion_count = DEMOTION_COUNT;
 
 		if (i < drv->state_count - 1)
-			lstate->threshold.promotion_time = state->exit_latency;
+			lstate->threshold.promotion_time_ns = state->exit_latency_ns;
 		if (i > first_idx)
-			lstate->threshold.demotion_time = state->exit_latency;
+			lstate->threshold.demotion_time_ns = state->exit_latency_ns;
 	}
 
 	return 0;
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index e5a5d0c8d66b..b0a7ad566081 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -19,22 +19,12 @@
 #include <linux/sched/stat.h>
 #include <linux/math64.h>
 
-/*
- * Please note when changing the tuning values:
- * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of
- * a scaling operation multiplication may overflow on 32 bit platforms.
- * In that case, #define RESOLUTION as ULL to get 64 bit result:
- * #define RESOLUTION 1024ULL
- *
- * The default values do not overflow.
- */
 #define BUCKETS 12
 #define INTERVAL_SHIFT 3
 #define INTERVALS (1UL << INTERVAL_SHIFT)
 #define RESOLUTION 1024
 #define DECAY 8
-#define MAX_INTERESTING 50000
-
+#define MAX_INTERESTING (50000 * NSEC_PER_USEC)
 
 /*
  * Concepts and ideas behind the menu governor
@@ -120,14 +110,14 @@ struct menu_device {
 	int             needs_update;
 	int             tick_wakeup;
 
-	unsigned int	next_timer_us;
+	u64		next_timer_ns;
 	unsigned int	bucket;
 	unsigned int	correction_factor[BUCKETS];
 	unsigned int	intervals[INTERVALS];
 	int		interval_ptr;
 };
 
-static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
+static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters)
 {
 	int bucket = 0;
 
@@ -140,15 +130,15 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters
 	if (nr_iowaiters)
 		bucket = BUCKETS/2;
 
-	if (duration < 10)
+	if (duration_ns < 10ULL * NSEC_PER_USEC)
 		return bucket;
-	if (duration < 100)
+	if (duration_ns < 100ULL * NSEC_PER_USEC)
 		return bucket + 1;
-	if (duration < 1000)
+	if (duration_ns < 1000ULL * NSEC_PER_USEC)
 		return bucket + 2;
-	if (duration < 10000)
+	if (duration_ns < 10000ULL * NSEC_PER_USEC)
 		return bucket + 3;
-	if (duration < 100000)
+	if (duration_ns < 100000ULL * NSEC_PER_USEC)
 		return bucket + 4;
 	return bucket + 5;
 }
@@ -276,13 +266,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		       bool *stop_tick)
 {
 	struct menu_device *data = this_cpu_ptr(&menu_devices);
-	int latency_req = cpuidle_governor_latency_req(dev->cpu);
-	int i;
-	int idx;
-	unsigned int interactivity_req;
+	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
 	unsigned int predicted_us;
+	u64 predicted_ns;
+	u64 interactivity_req;
 	unsigned long nr_iowaiters;
 	ktime_t delta_next;
+	int i, idx;
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
@@ -290,15 +280,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	}
 
 	/* determine the expected residency time, round up */
-	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
+	data->next_timer_ns = tick_nohz_get_sleep_length(&delta_next);
 
 	nr_iowaiters = nr_iowait_cpu(dev->cpu);
-	data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
+	data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
 
 	if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
-	    ((data->next_timer_us < drv->states[1].target_residency ||
-	      latency_req < drv->states[1].exit_latency) &&
-	     !drv->states[0].disabled && !dev->states_usage[0].disable)) {
+	    ((data->next_timer_ns < drv->states[1].target_residency_ns ||
+	      latency_req < drv->states[1].exit_latency_ns) &&
+	     !dev->states_usage[0].disable)) {
 		/*
 		 * In this case state[0] will be used no matter what, so return
 		 * it right away and keep the tick running if state[0] is a
@@ -308,18 +298,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		return 0;
 	}
 
-	/*
-	 * Force the result of multiplication to be 64 bits even if both
-	 * operands are 32 bits.
-	 * Make sure to round up for half microseconds.
-	 */
-	predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us *
-					 data->correction_factor[data->bucket],
-					 RESOLUTION * DECAY);
-	/*
-	 * Use the lowest expected idle interval to pick the idle state.
-	 */
-	predicted_us = min(predicted_us, get_typical_interval(data, predicted_us));
+	/* Round up the result for half microseconds. */
+	predicted_us = div_u64(data->next_timer_ns *
+			       data->correction_factor[data->bucket] +
+			       (RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
+			       RESOLUTION * DECAY * NSEC_PER_USEC);
+	/* Use the lowest expected idle interval to pick the idle state. */
+	predicted_ns = (u64)min(predicted_us,
+				get_typical_interval(data, predicted_us)) *
+				NSEC_PER_USEC;
 
 	if (tick_nohz_tick_stopped()) {
 		/*
@@ -330,14 +317,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		 * the known time till the closest timer event for the idle
 		 * state selection.
 		 */
-		if (predicted_us < TICK_USEC)
-			predicted_us = ktime_to_us(delta_next);
+		if (predicted_ns < TICK_NSEC)
+			predicted_ns = delta_next;
 	} else {
 		/*
 		 * Use the performance multiplier and the user-configurable
 		 * latency_req to determine the maximum exit latency.
 		 */
-		interactivity_req = predicted_us / performance_multiplier(nr_iowaiters);
+		interactivity_req = div64_u64(predicted_ns,
+					      performance_multiplier(nr_iowaiters));
 		if (latency_req > interactivity_req)
 			latency_req = interactivity_req;
 	}
@@ -349,27 +337,26 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	idx = -1;
 	for (i = 0; i < drv->state_count; i++) {
 		struct cpuidle_state *s = &drv->states[i];
-		struct cpuidle_state_usage *su = &dev->states_usage[i];
 
-		if (s->disabled || su->disable)
+		if (dev->states_usage[i].disable)
 			continue;
 
 		if (idx == -1)
 			idx = i; /* first enabled state */
 
-		if (s->target_residency > predicted_us) {
+		if (s->target_residency_ns > predicted_ns) {
 			/*
 			 * Use a physical idle state, not busy polling, unless
 			 * a timer is going to trigger soon enough.
 			 */
 			if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
-			    s->exit_latency <= latency_req &&
-			    s->target_residency <= data->next_timer_us) {
-				predicted_us = s->target_residency;
+			    s->exit_latency_ns <= latency_req &&
+			    s->target_residency_ns <= data->next_timer_ns) {
+				predicted_ns = s->target_residency_ns;
 				idx = i;
 				break;
 			}
-			if (predicted_us < TICK_USEC)
+			if (predicted_ns < TICK_NSEC)
 				break;
 
 			if (!tick_nohz_tick_stopped()) {
@@ -379,7 +366,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 				 * tick in that case and let the governor run
 				 * again in the next iteration of the loop.
 				 */
-				predicted_us = drv->states[idx].target_residency;
+				predicted_ns = drv->states[idx].target_residency_ns;
 				break;
 			}
 
@@ -389,13 +376,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 			 * closest timer event, select this one to avoid getting
 			 * stuck in the shallow one for too long.
 			 */
-			if (drv->states[idx].target_residency < TICK_USEC &&
-			    s->target_residency <= ktime_to_us(delta_next))
+			if (drv->states[idx].target_residency_ns < TICK_NSEC &&
+			    s->target_residency_ns <= delta_next)
 				idx = i;
 
 			return idx;
 		}
-		if (s->exit_latency > latency_req)
+		if (s->exit_latency_ns > latency_req)
 			break;
 
 		idx = i;
@@ -409,12 +396,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * expected idle duration is shorter than the tick period length.
 	 */
 	if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
-	     predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
-		unsigned int delta_next_us = ktime_to_us(delta_next);
-
+	     predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
 		*stop_tick = false;
 
-		if (idx > 0 && drv->states[idx].target_residency > delta_next_us) {
+		if (idx > 0 && drv->states[idx].target_residency_ns > delta_next) {
 			/*
 			 * The tick is not going to be stopped and the target
 			 * residency of the state to be returned is not within
@@ -422,12 +407,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 			 * tick, so try to correct that.
 			 */
 			for (i = idx - 1; i >= 0; i--) {
-				if (drv->states[i].disabled ||
-				    dev->states_usage[i].disable)
+				if (dev->states_usage[i].disable)
 					continue;
 
 				idx = i;
-				if (drv->states[i].target_residency <= delta_next_us)
+				if (drv->states[i].target_residency_ns <= delta_next)
 					break;
 			}
 		}
@@ -463,7 +447,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	struct menu_device *data = this_cpu_ptr(&menu_devices);
 	int last_idx = dev->last_state_idx;
 	struct cpuidle_state *target = &drv->states[last_idx];
-	unsigned int measured_us;
+	u64 measured_ns;
 	unsigned int new_factor;
 
 	/*
@@ -481,7 +465,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * assume the state was never reached and the exit latency is 0.
 	 */
 
-	if (data->tick_wakeup && data->next_timer_us > TICK_USEC) {
+	if (data->tick_wakeup && data->next_timer_ns > TICK_NSEC) {
 		/*
 		 * The nohz code said that there wouldn't be any events within
 		 * the tick boundary (if the tick was stopped), but the idle
@@ -491,7 +475,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		 * have been idle long (but not forever) to help the idle
 		 * duration predictor do a better job next time.
 		 */
-		measured_us = 9 * MAX_INTERESTING / 10;
+		measured_ns = 9 * MAX_INTERESTING / 10;
 	} else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) &&
 		   dev->poll_time_limit) {
 		/*
@@ -501,28 +485,29 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		 * the CPU might have been woken up from idle by the next timer.
 		 * Assume that to be the case.
 		 */
-		measured_us = data->next_timer_us;
+		measured_ns = data->next_timer_ns;
 	} else {
 		/* measured value */
-		measured_us = dev->last_residency;
+		measured_ns = dev->last_residency_ns;
 
 		/* Deduct exit latency */
-		if (measured_us > 2 * target->exit_latency)
-			measured_us -= target->exit_latency;
+		if (measured_ns > 2 * target->exit_latency_ns)
+			measured_ns -= target->exit_latency_ns;
 		else
-			measured_us /= 2;
+			measured_ns /= 2;
 	}
 
 	/* Make sure our coefficients do not exceed unity */
-	if (measured_us > data->next_timer_us)
-		measured_us = data->next_timer_us;
+	if (measured_ns > data->next_timer_ns)
+		measured_ns = data->next_timer_ns;
 
 	/* Update our correction ratio */
 	new_factor = data->correction_factor[data->bucket];
 	new_factor -= new_factor / DECAY;
 
-	if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
-		new_factor += RESOLUTION * measured_us / data->next_timer_us;
+	if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING)
+		new_factor += div64_u64(RESOLUTION * measured_ns,
+					data->next_timer_ns);
 	else
 		/*
 		 * we were idle so long that we count it as a perfect
@@ -542,7 +527,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	data->correction_factor[data->bucket] = new_factor;
 
 	/* update the repeating-pattern data */
-	data->intervals[data->interval_ptr++] = measured_us;
+	data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns);
 	if (data->interval_ptr >= INTERVALS)
 		data->interval_ptr = 0;
 }
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index b5a0e498f798..6deaaf5f05b5 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -104,7 +104,7 @@ struct teo_cpu {
 	u64 sleep_length_ns;
 	struct teo_idle_state states[CPUIDLE_STATE_MAX];
 	int interval_idx;
-	unsigned int intervals[INTERVALS];
+	u64 intervals[INTERVALS];
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -117,9 +117,8 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
 static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
-	unsigned int sleep_length_us = ktime_to_us(cpu_data->sleep_length_ns);
 	int i, idx_hit = -1, idx_timer = -1;
-	unsigned int measured_us;
+	u64 measured_ns;
 
 	if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
 		/*
@@ -127,23 +126,28 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		 * enough to the closest timer event expected at the idle state
 		 * selection time to be discarded.
 		 */
-		measured_us = UINT_MAX;
+		measured_ns = U64_MAX;
 	} else {
-		unsigned int lat;
+		u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
 
-		lat = drv->states[dev->last_state_idx].exit_latency;
-
-		measured_us = ktime_to_us(cpu_data->time_span_ns);
+		/*
+		 * The computations below are to determine whether or not the
+		 * (saved) time till the next timer event and the measured idle
+		 * duration fall into the same "bin", so use last_residency_ns
+		 * for that instead of time_span_ns which includes the cpuidle
+		 * overhead.
+		 */
+		measured_ns = dev->last_residency_ns;
 		/*
 		 * The delay between the wakeup and the first instruction
 		 * executed by the CPU is not likely to be worst-case every
 		 * time, so take 1/2 of the exit latency as a very rough
 		 * approximation of the average of it.
 		 */
-		if (measured_us >= lat)
-			measured_us -= lat / 2;
+		if (measured_ns >= lat_ns)
+			measured_ns -= lat_ns / 2;
 		else
-			measured_us /= 2;
+			measured_ns /= 2;
 	}
 
 	/*
@@ -155,9 +159,9 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 		cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT;
 
-		if (drv->states[i].target_residency <= sleep_length_us) {
+		if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) {
 			idx_timer = i;
-			if (drv->states[i].target_residency <= measured_us)
+			if (drv->states[i].target_residency_ns <= measured_ns)
 				idx_hit = i;
 		}
 	}
@@ -193,30 +197,35 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * Save idle duration values corresponding to non-timer wakeups for
 	 * pattern detection.
 	 */
-	cpu_data->intervals[cpu_data->interval_idx++] = measured_us;
-	if (cpu_data->interval_idx > INTERVALS)
+	cpu_data->intervals[cpu_data->interval_idx++] = measured_ns;
+	if (cpu_data->interval_idx >= INTERVALS)
 		cpu_data->interval_idx = 0;
 }
 
+static bool teo_time_ok(u64 interval_ns)
+{
+	return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
+}
+
 /**
  * teo_find_shallower_state - Find shallower idle state matching given duration.
  * @drv: cpuidle driver containing state data.
  * @dev: Target CPU.
  * @state_idx: Index of the capping idle state.
- * @duration_us: Idle duration value to match.
+ * @duration_ns: Idle duration value to match.
  */
 static int teo_find_shallower_state(struct cpuidle_driver *drv,
 				    struct cpuidle_device *dev, int state_idx,
-				    unsigned int duration_us)
+				    u64 duration_ns)
 {
 	int i;
 
 	for (i = state_idx - 1; i >= 0; i--) {
-		if (drv->states[i].disabled || dev->states_usage[i].disable)
+		if (dev->states_usage[i].disable)
 			continue;
 
 		state_idx = i;
-		if (drv->states[i].target_residency <= duration_us)
+		if (drv->states[i].target_residency_ns <= duration_ns)
 			break;
 	}
 	return state_idx;
@@ -232,9 +241,10 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		      bool *stop_tick)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
-	int latency_req = cpuidle_governor_latency_req(dev->cpu);
-	unsigned int duration_us, count;
-	int max_early_idx, constraint_idx, idx, i;
+	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+	u64 duration_ns;
+	unsigned int hits, misses, early_hits;
+	int max_early_idx, prev_max_early_idx, constraint_idx, idx, i;
 	ktime_t delta_tick;
 
 	if (dev->last_state_idx >= 0) {
@@ -244,50 +254,92 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
 	cpu_data->time_span_ns = local_clock();
 
-	cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick);
-	duration_us = ktime_to_us(cpu_data->sleep_length_ns);
+	duration_ns = tick_nohz_get_sleep_length(&delta_tick);
+	cpu_data->sleep_length_ns = duration_ns;
 
-	count = 0;
+	hits = 0;
+	misses = 0;
+	early_hits = 0;
 	max_early_idx = -1;
+	prev_max_early_idx = -1;
 	constraint_idx = drv->state_count;
 	idx = -1;
 
 	for (i = 0; i < drv->state_count; i++) {
 		struct cpuidle_state *s = &drv->states[i];
-		struct cpuidle_state_usage *su = &dev->states_usage[i];
 
-		if (s->disabled || su->disable) {
+		if (dev->states_usage[i].disable) {
+			/*
+			 * Ignore disabled states with target residencies beyond
+			 * the anticipated idle duration.
+			 */
+			if (s->target_residency_ns > duration_ns)
+				continue;
+
+			/*
+			 * This state is disabled, so the range of idle duration
+			 * values corresponding to it is covered by the current
+			 * candidate state, but still the "hits" and "misses"
+			 * metrics of the disabled state need to be used to
+			 * decide whether or not the state covering the range in
+			 * question is good enough.
+			 */
+			hits = cpu_data->states[i].hits;
+			misses = cpu_data->states[i].misses;
+
+			if (early_hits >= cpu_data->states[i].early_hits ||
+			    idx < 0)
+				continue;
+
 			/*
-			 * If the "early hits" metric of a disabled state is
-			 * greater than the current maximum, it should be taken
-			 * into account, because it would be a mistake to select
-			 * a deeper state with lower "early hits" metric.  The
-			 * index cannot be changed to point to it, however, so
-			 * just increase the max count alone and let the index
-			 * still point to a shallower idle state.
+			 * If the current candidate state has been the one with
+			 * the maximum "early hits" metric so far, the "early
+			 * hits" metric of the disabled state replaces the
+			 * current "early hits" count to avoid selecting a
+			 * deeper state with lower "early hits" metric.
 			 */
-			if (max_early_idx >= 0 &&
-			    count < cpu_data->states[i].early_hits)
-				count = cpu_data->states[i].early_hits;
+			if (max_early_idx == idx) {
+				early_hits = cpu_data->states[i].early_hits;
+				continue;
+			}
+
+			/*
+			 * The current candidate state is closer to the disabled
+			 * one than the current maximum "early hits" state, so
+			 * replace the latter with it, but in case the maximum
+			 * "early hits" state index has not been set so far,
+			 * check if the current candidate state is not too
+			 * shallow for that role.
+			 */
+			if (teo_time_ok(drv->states[idx].target_residency_ns)) {
+				prev_max_early_idx = max_early_idx;
+				early_hits = cpu_data->states[i].early_hits;
+				max_early_idx = idx;
+			}
 
 			continue;
 		}
 
-		if (idx < 0)
+		if (idx < 0) {
 			idx = i; /* first enabled state */
+			hits = cpu_data->states[i].hits;
+			misses = cpu_data->states[i].misses;
+		}
 
-		if (s->target_residency > duration_us)
+		if (s->target_residency_ns > duration_ns)
 			break;
 
-		if (s->exit_latency > latency_req && constraint_idx > i)
+		if (s->exit_latency_ns > latency_req && constraint_idx > i)
 			constraint_idx = i;
 
 		idx = i;
+		hits = cpu_data->states[i].hits;
+		misses = cpu_data->states[i].misses;
 
-		if (count < cpu_data->states[i].early_hits &&
-		    !(tick_nohz_tick_stopped() &&
-		      drv->states[i].target_residency < TICK_USEC)) {
-			count = cpu_data->states[i].early_hits;
+		if (early_hits < cpu_data->states[i].early_hits &&
+		    teo_time_ok(drv->states[i].target_residency_ns)) {
+			prev_max_early_idx = max_early_idx;
+			early_hits = cpu_data->states[i].early_hits;
 			max_early_idx = i;
 		}
 	}
@@ -300,10 +352,19 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * "early hits" metric, but if that cannot be determined, just use the
 	 * state selected so far.
 	 */
-	if (cpu_data->states[idx].hits <= cpu_data->states[idx].misses &&
-	    max_early_idx >= 0) {
-		idx = max_early_idx;
-		duration_us = drv->states[idx].target_residency;
+	if (hits <= misses) {
+		/*
+		 * The current candidate state is not suitable, so take the one
+		 * whose "early hits" metric is the maximum for the range of
+		 * shallower states.
+		 */
+		if (idx == max_early_idx)
+			max_early_idx = prev_max_early_idx;
+
+		if (max_early_idx >= 0) {
+			idx = max_early_idx;
+			duration_ns = drv->states[idx].target_residency_ns;
+		}
 	}
 
 	/*
@@ -316,18 +377,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	if (idx < 0) {
 		idx = 0; /* No states enabled. Must use 0. */
 	} else if (idx > 0) {
+		unsigned int count = 0;
 		u64 sum = 0;
 
-		count = 0;
-
 		/*
 		 * Count and sum the most recent idle duration values less than
 		 * the current expected idle duration value.
 		 */
 		for (i = 0; i < INTERVALS; i++) {
-			unsigned int val = cpu_data->intervals[i];
+			u64 val = cpu_data->intervals[i];
 
-			if (val >= duration_us)
+			if (val >= duration_ns)
 				continue;
 
 			count++;
@@ -339,17 +399,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		 * values are in the interesting range.
 		 */
 		if (count > INTERVALS / 2) {
-			unsigned int avg_us = div64_u64(sum, count);
+			u64 avg_ns = div64_u64(sum, count);
 
 			/*
 			 * Avoid spending too much time in an idle state that
 			 * would be too shallow.
 			 */
-			if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) {
-				duration_us = avg_us;
-				if (drv->states[idx].target_residency > avg_us)
+			if (teo_time_ok(avg_ns)) {
+				duration_ns = avg_ns;
+				if (drv->states[idx].target_residency_ns > avg_ns)
 					idx = teo_find_shallower_state(drv, dev,
-								       idx, avg_us);
+								       idx, avg_ns);
 			}
 		}
 	}
@@ -359,9 +419,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * expected idle duration is shorter than the tick period length.
 	 */
 	if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
-	    duration_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
-		unsigned int delta_tick_us = ktime_to_us(delta_tick);
-
+	    duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
 		*stop_tick = false;
 
 		/*
@@ -370,8 +428,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		 * till the closest timer including the tick, try to correct
 		 * that.
 		 */
-		if (idx > 0 && drv->states[idx].target_residency > delta_tick_us)
-			idx = teo_find_shallower_state(drv, dev, idx, delta_tick_us);
+		if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick)
+			idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
 	}
 
 	return idx;
@@ -415,7 +473,7 @@ static int teo_enable_device(struct cpuidle_driver *drv,
 	memset(cpu_data, 0, sizeof(*cpu_data));
 
 	for (i = 0; i < INTERVALS; i++)
-		cpu_data->intervals[i] = UINT_MAX;
+		cpu_data->intervals[i] = U64_MAX;
 
 	return 0;
 }
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index c8fa5f41dfc4..f7e83613ae94 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -49,9 +49,10 @@ void cpuidle_poll_state_init(struct cpuidle_driver *drv)
 	snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
 	state->exit_latency = 0;
 	state->target_residency = 0;
+	state->exit_latency_ns = 0;
+	state->target_residency_ns = 0;
 	state->power_usage = -1;
 	state->enter = poll_idle;
-	state->disabled = false;
 	state->flags = CPUIDLE_FLAG_POLLING;
 }
 EXPORT_SYMBOL_GPL(cpuidle_poll_state_init);
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 2bb2683b493c..cdeedbf02646 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -142,6 +142,7 @@ static struct attribute_group cpuidle_attr_group = {
 
 /**
  * cpuidle_add_interface - add CPU global sysfs attributes
+ * @dev: the target device
  */
 int cpuidle_add_interface(struct device *dev)
 {
@@ -153,6 +154,7 @@ int cpuidle_add_interface(struct device *dev)
 
 /**
  * cpuidle_remove_interface - remove CPU global sysfs attributes
+ * @dev: the target device
  */
 void cpuidle_remove_interface(struct device *dev)
 {
@@ -255,25 +257,6 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
 	return sprintf(buf, "%u\n", state->_name);\
 }
 
-#define define_store_state_ull_function(_name) \
-static ssize_t store_state_##_name(struct cpuidle_state *state, \
-				   struct cpuidle_state_usage *state_usage, \
-				   const char *buf, size_t size)	\
-{ \
-	unsigned long long value; \
-	int err; \
-	if (!capable(CAP_SYS_ADMIN)) \
-		return -EPERM; \
-	err = kstrtoull(buf, 0, &value); \
-	if (err) \
-		return err; \
-	if (value) \
-		state_usage->_name = 1; \
-	else \
-		state_usage->_name = 0; \
-	return size; \
-}
-
 #define define_show_state_ull_function(_name) \
 static ssize_t show_state_##_name(struct cpuidle_state *state, \
 				  struct cpuidle_state_usage *state_usage, \
@@ -292,18 +275,68 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
 	return sprintf(buf, "%s\n", state->_name);\
 }
 
-define_show_state_function(exit_latency)
-define_show_state_function(target_residency)
+#define define_show_state_time_function(_name) \
+static ssize_t show_state_##_name(struct cpuidle_state *state, \
+				  struct cpuidle_state_usage *state_usage, \
+				  char *buf) \
+{ \
+	return sprintf(buf, "%llu\n", ktime_to_us(state->_name##_ns)); \
+}
+
+define_show_state_time_function(exit_latency)
+define_show_state_time_function(target_residency)
 define_show_state_function(power_usage)
 define_show_state_ull_function(usage)
-define_show_state_ull_function(time)
 define_show_state_str_function(name)
 define_show_state_str_function(desc)
-define_show_state_ull_function(disable)
-define_store_state_ull_function(disable)
 define_show_state_ull_function(above)
 define_show_state_ull_function(below)
 
+static ssize_t show_state_time(struct cpuidle_state *state,
+			       struct cpuidle_state_usage *state_usage,
+			       char *buf)
+{
+	return sprintf(buf, "%llu\n", ktime_to_us(state_usage->time_ns));
+}
+
+static ssize_t show_state_disable(struct cpuidle_state *state,
+				  struct cpuidle_state_usage *state_usage,
+				  char *buf)
+{
+	return sprintf(buf, "%llu\n",
+		       state_usage->disable & CPUIDLE_STATE_DISABLED_BY_USER);
+}
+
+static ssize_t store_state_disable(struct cpuidle_state *state,
+				   struct cpuidle_state_usage *state_usage,
+				   const char *buf, size_t size)
+{
+	unsigned int value;
+	int err;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	err = kstrtouint(buf, 0, &value);
+	if (err)
+		return err;
+
+	if (value)
+		state_usage->disable |= CPUIDLE_STATE_DISABLED_BY_USER;
+	else
+		state_usage->disable &= ~CPUIDLE_STATE_DISABLED_BY_USER;
+
+	return size;
+}
+
+static ssize_t show_state_default_status(struct cpuidle_state *state,
+					  struct cpuidle_state_usage *state_usage,
+					  char *buf)
+{
+	return sprintf(buf, "%s\n",
+		       state->flags & CPUIDLE_FLAG_OFF ? "disabled" : "enabled");
+}
+
 define_one_state_ro(name, show_state_name);
 define_one_state_ro(desc, show_state_desc);
 define_one_state_ro(latency, show_state_exit_latency);
@@ -314,6 +347,7 @@ define_one_state_ro(time, show_state_time);
 define_one_state_rw(disable, show_state_disable, store_state_disable);
 define_one_state_ro(above, show_state_above);
 define_one_state_ro(below, show_state_below);
+define_one_state_ro(default_status, show_state_default_status);
 
 static struct attribute *cpuidle_state_default_attrs[] = {
 	&attr_name.attr,
@@ -326,6 +360,7 @@ static struct attribute *cpuidle_state_default_attrs[] = {
 	&attr_disable.attr,
 	&attr_above.attr,
 	&attr_below.attr,
+	&attr_default_status.attr,
 	NULL
 };
 
@@ -592,7 +627,7 @@ static struct kobj_type ktype_driver_cpuidle = {
 
 /**
  * cpuidle_add_driver_sysfs - adds the driver name sysfs attribute
- * @device: the target device
+ * @dev: the target device
  */
 static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
 {
@@ -623,7 +658,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
 
 /**
  * cpuidle_remove_driver_sysfs - removes the driver name sysfs attribute
- * @device: the target device
+ * @dev: the target device
  */
 static void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev)
 {
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 1fb622f2a87d..c2767ed54dfe 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -11,6 +11,8 @@ menuconfig CRYPTO_HW
 
 if CRYPTO_HW
 
+source "drivers/crypto/allwinner/Kconfig"
+
 config CRYPTO_DEV_PADLOCK
 	tristate "Support for VIA PadLock ACE"
 	depends on X86 && !UML
@@ -26,7 +28,7 @@ config CRYPTO_DEV_PADLOCK
 config CRYPTO_DEV_PADLOCK_AES
 	tristate "PadLock driver for AES algorithm"
 	depends on CRYPTO_DEV_PADLOCK
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_AES
 	help
 	  Use VIA PadLock for AES algorithm.
@@ -54,7 +56,7 @@ config CRYPTO_DEV_GEODE
 	tristate "Support for the Geode LX AES engine"
 	depends on X86_32 && PCI
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Say 'Y' here to use the AMD Geode LX processor on-board AES
 	  engine for the CryptoAPI AES algorithm.
@@ -107,7 +109,7 @@ config CRYPTO_PAES_S390
 	depends on ZCRYPT
 	depends on PKEY
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  AES cipher algorithms for use with protected key.
@@ -169,7 +171,7 @@ config CRYPTO_DES_S390
 	tristate "DES and Triple DES cipher algorithms"
 	depends on S390
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	help
 	  This is the s390 hardware accelerated implementation of the
@@ -182,7 +184,7 @@ config CRYPTO_AES_S390
 	tristate "AES cipher algorithms"
 	depends on S390
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  AES cipher algorithms (FIPS-197).
@@ -236,7 +238,7 @@ config CRYPTO_DEV_MARVELL_CESA
 	depends on PLAT_ORION || ARCH_MVEBU
 	select CRYPTO_LIB_AES
 	select CRYPTO_LIB_DES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_HASH
 	select SRAM
 	help
@@ -246,15 +248,15 @@ config CRYPTO_DEV_MARVELL_CESA
 	  This driver supports CPU offload through DMA transfers.
 
 config CRYPTO_DEV_NIAGARA2
-       tristate "Niagara2 Stream Processing Unit driver"
-       select CRYPTO_LIB_DES
-       select CRYPTO_BLKCIPHER
-       select CRYPTO_HASH
-       select CRYPTO_MD5
-       select CRYPTO_SHA1
-       select CRYPTO_SHA256
-       depends on SPARC64
-       help
+	tristate "Niagara2 Stream Processing Unit driver"
+	select CRYPTO_LIB_DES
+	select CRYPTO_SKCIPHER
+	select CRYPTO_HASH
+	select CRYPTO_MD5
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	depends on SPARC64
+	help
 	  Each core of a Niagara2 processor contains a Stream
 	  Processing Unit, which itself contains several cryptographic
 	  sub-units.  One set provides the Modular Arithmetic Unit,
@@ -265,7 +267,7 @@ config CRYPTO_DEV_NIAGARA2
 config CRYPTO_DEV_HIFN_795X
 	tristate "Driver HIFN 795x crypto accelerator chips"
 	select CRYPTO_LIB_DES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select HW_RANDOM if CRYPTO_DEV_HIFN_795X_RNG
 	depends on PCI
 	depends on !ARCH_DMA_ADDR_T_64BIT
@@ -285,8 +287,9 @@ config CRYPTO_DEV_TALITOS
 	tristate "Talitos Freescale Security Engine (SEC)"
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_HASH
+	select CRYPTO_LIB_DES
 	select HW_RANDOM
 	depends on FSL_SOC
 	help
@@ -323,7 +326,7 @@ config CRYPTO_DEV_IXP4XX
 	select CRYPTO_LIB_DES
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Driver for the IXP4xx NPE crypto engine.
 
@@ -332,11 +335,12 @@ config CRYPTO_DEV_PPC4XX
 	depends on PPC && 4xx
 	select CRYPTO_HASH
 	select CRYPTO_AEAD
+	select CRYPTO_AES
 	select CRYPTO_LIB_AES
 	select CRYPTO_CCM
 	select CRYPTO_CTR
 	select CRYPTO_GCM
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  This option allows you to have support for AMCC crypto acceleration.
 
@@ -353,7 +357,7 @@ config CRYPTO_DEV_OMAP
 	depends on ARCH_OMAP2PLUS
 	help
 	  OMAP processors have various crypto HW accelerators. Select this if
-          you want to use the OMAP modules for any of the crypto algorithms.
+	  you want to use the OMAP modules for any of the crypto algorithms.
 
 if CRYPTO_DEV_OMAP
 
@@ -373,7 +377,7 @@ config CRYPTO_DEV_OMAP_AES
 	tristate "Support for OMAP AES hw engine"
 	depends on ARCH_OMAP2 || ARCH_OMAP3 || ARCH_OMAP2PLUS
 	select CRYPTO_AES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_ENGINE
 	select CRYPTO_CBC
 	select CRYPTO_ECB
@@ -387,7 +391,7 @@ config CRYPTO_DEV_OMAP_DES
 	tristate "Support for OMAP DES/3DES hw engine"
 	depends on ARCH_OMAP2PLUS
 	select CRYPTO_LIB_DES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_ENGINE
 	help
 	  OMAP processors have DES/3DES module accelerator. Select this if you
@@ -403,7 +407,7 @@ config CRYPTO_DEV_PICOXCELL
 	select CRYPTO_AEAD
 	select CRYPTO_AES
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	select CRYPTO_CBC
 	select CRYPTO_ECB
@@ -418,7 +422,7 @@ config CRYPTO_DEV_PICOXCELL
 config CRYPTO_DEV_SAHARA
 	tristate "Support for SAHARA crypto accelerator"
 	depends on ARCH_MXC && OF
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AES
 	select CRYPTO_ECB
 	help
@@ -426,7 +430,7 @@ config CRYPTO_DEV_SAHARA
 	  found in some Freescale i.MX chips.
 
 config CRYPTO_DEV_EXYNOS_RNG
-	tristate "EXYNOS HW pseudo random number generator support"
+	tristate "Exynos HW pseudo random number generator support"
 	depends on ARCH_EXYNOS || COMPILE_TEST
 	depends on HAS_IOMEM
 	select CRYPTO_RNG
@@ -445,7 +449,7 @@ config CRYPTO_DEV_S5P
 	depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
 	depends on HAS_IOMEM
 	select CRYPTO_AES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  This option allows you to have support for S5P crypto acceleration.
 	  Select this to offload Samsung S5PV210 or S5PC110, Exynos from AES
@@ -489,11 +493,9 @@ if CRYPTO_DEV_UX500
 endif # if CRYPTO_DEV_UX500
 
 config CRYPTO_DEV_ATMEL_AUTHENC
-	tristate "Support for Atmel IPSEC/SSL hw accelerator"
+	bool "Support for Atmel IPSEC/SSL hw accelerator"
 	depends on ARCH_AT91 || COMPILE_TEST
-	select CRYPTO_AUTHENC
-	select CRYPTO_DEV_ATMEL_AES
-	select CRYPTO_DEV_ATMEL_SHA
+	depends on CRYPTO_DEV_ATMEL_AES
 	help
 	  Some Atmel processors can combine the AES and SHA hw accelerators
 	  to enhance support of IPSEC/SSL.
@@ -505,7 +507,9 @@ config CRYPTO_DEV_ATMEL_AES
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_AES
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
+	select CRYPTO_AUTHENC if CRYPTO_DEV_ATMEL_AUTHENC
+	select CRYPTO_DEV_ATMEL_SHA if CRYPTO_DEV_ATMEL_AUTHENC
 	help
 	  Some Atmel processors have AES hw accelerator.
 	  Select this if you want to use the Atmel module for
@@ -518,7 +522,7 @@ config CRYPTO_DEV_ATMEL_TDES
 	tristate "Support for Atmel DES/TDES hw accelerator"
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_LIB_DES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Some Atmel processors have DES/TDES hw accelerator.
 	  Select this if you want to use the Atmel module for
@@ -590,7 +594,7 @@ config CRYPTO_DEV_MXS_DCP
 	select CRYPTO_CBC
 	select CRYPTO_ECB
 	select CRYPTO_AES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_HASH
 	help
 	  The Freescale i.MX23/i.MX28 has SHA1/SHA256 and AES128 CBC/ECB
@@ -614,17 +618,72 @@ config CRYPTO_DEV_QCE
 	tristate "Qualcomm crypto engine accelerator"
 	depends on ARCH_QCOM || COMPILE_TEST
 	depends on HAS_IOMEM
+	help
+	  This driver supports Qualcomm crypto engine accelerator
+	  hardware. To compile this driver as a module, choose M here. The
+	  module will be called qcrypto.
+
+config CRYPTO_DEV_QCE_SKCIPHER
+	bool
+	depends on CRYPTO_DEV_QCE
 	select CRYPTO_AES
 	select CRYPTO_LIB_DES
 	select CRYPTO_ECB
 	select CRYPTO_CBC
 	select CRYPTO_XTS
 	select CRYPTO_CTR
-	select CRYPTO_BLKCIPHER
-	help
-	  This driver supports Qualcomm crypto engine accelerator
-	  hardware. To compile this driver as a module, choose M here. The
-	  module will be called qcrypto.
+	select CRYPTO_SKCIPHER
+
+config CRYPTO_DEV_QCE_SHA
+	bool
+	depends on CRYPTO_DEV_QCE
+
+choice
+	prompt "Algorithms enabled for QCE acceleration"
+	default CRYPTO_DEV_QCE_ENABLE_ALL
+	depends on CRYPTO_DEV_QCE
+	help
+	  This option allows to choose whether to build support for all algorihtms
+	  (default), hashes-only, or skciphers-only.
+
+	  The QCE engine does not appear to scale as well as the CPU to handle
+	  multiple crypto requests.  While the ipq40xx chips have 4-core CPUs, the
+	  QCE handles only 2 requests in parallel.
+
+	  Ipsec throughput seems to improve when disabling either family of
+	  algorithms, sharing the load with the CPU.  Enabling skciphers-only
+	  appears to work best.
+
+	config CRYPTO_DEV_QCE_ENABLE_ALL
+		bool "All supported algorithms"
+		select CRYPTO_DEV_QCE_SKCIPHER
+		select CRYPTO_DEV_QCE_SHA
+		help
+		  Enable all supported algorithms:
+			- AES (CBC, CTR, ECB, XTS)
+			- 3DES (CBC, ECB)
+			- DES (CBC, ECB)
+			- SHA1, HMAC-SHA1
+			- SHA256, HMAC-SHA256
+
+	config CRYPTO_DEV_QCE_ENABLE_SKCIPHER
+		bool "Symmetric-key ciphers only"
+		select CRYPTO_DEV_QCE_SKCIPHER
+		help
+		  Enable symmetric-key ciphers only:
+			- AES (CBC, CTR, ECB, XTS)
+			- 3DES (ECB, CBC)
+			- DES (ECB, CBC)
+
+	config CRYPTO_DEV_QCE_ENABLE_SHA
+		bool "Hash/HMAC only"
+		select CRYPTO_DEV_QCE_SHA
+		help
+		  Enable hashes/HMAC algorithms only:
+			- SHA1, HMAC-SHA1
+			- SHA256, HMAC-SHA256
+
+endchoice
 
 config CRYPTO_DEV_QCOM_RNG
 	tristate "Qualcomm Random Number Generator Driver"
@@ -635,7 +694,7 @@ config CRYPTO_DEV_QCOM_RNG
 	  Generator hardware found on Qualcomm SoCs.
 
 	  To compile this driver as a module, choose M here. The
-          module will be called qcom-rng. If unsure, say N.
+	  module will be called qcom-rng. If unsure, say N.
 
 config CRYPTO_DEV_VMX
 	bool "Support for VMX cryptographic acceleration instructions"
@@ -657,31 +716,6 @@ config CRYPTO_DEV_IMGTEC_HASH
 	  hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256
 	  hashing algorithms.
 
-config CRYPTO_DEV_SUN4I_SS
-	tristate "Support for Allwinner Security System cryptographic accelerator"
-	depends on ARCH_SUNXI && !64BIT
-	select CRYPTO_MD5
-	select CRYPTO_SHA1
-	select CRYPTO_AES
-	select CRYPTO_LIB_DES
-	select CRYPTO_BLKCIPHER
-	help
-	  Some Allwinner SoC have a crypto accelerator named
-	  Security System. Select this if you want to use it.
-	  The Security System handle AES/DES/3DES ciphers in CBC mode
-	  and SHA1 and MD5 hash algorithms.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called sun4i-ss.
-
-config CRYPTO_DEV_SUN4I_SS_PRNG
-	bool "Support for Allwinner Security System PRNG"
-	depends on CRYPTO_DEV_SUN4I_SS
-	select CRYPTO_RNG
-	help
-	  Select this option if you want to provide kernel-side support for
-	  the Pseudo-Random Number Generator found in the Security System.
-
 config CRYPTO_DEV_ROCKCHIP
 	tristate "Rockchip's Cryptographic Engine driver"
 	depends on OF && ARCH_ROCKCHIP
@@ -691,7 +725,7 @@ config CRYPTO_DEV_ROCKCHIP
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	select CRYPTO_HASH
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 
 	help
 	  This driver interfaces with the hardware crypto accelerator.
@@ -702,7 +736,7 @@ config CRYPTO_DEV_MEDIATEK
 	depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
 	select CRYPTO_AES
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CTR
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
@@ -730,17 +764,17 @@ config CRYPTO_DEV_BCM_SPU
 	select CRYPTO_SHA512
 	help
 	  This driver provides support for Broadcom crypto acceleration using the
-	  Secure Processing Unit (SPU). The SPU driver registers ablkcipher,
+	  Secure Processing Unit (SPU). The SPU driver registers skcipher,
 	  ahash, and aead algorithms with the kernel cryptographic API.
 
 source "drivers/crypto/stm32/Kconfig"
 
 config CRYPTO_DEV_SAFEXCEL
 	tristate "Inside Secure's SafeXcel cryptographic engine driver"
-	depends on OF || PCI || COMPILE_TEST
+	depends on (OF || PCI || COMPILE_TEST) && HAS_IOMEM
 	select CRYPTO_LIB_AES
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	select CRYPTO_HASH
 	select CRYPTO_HMAC
@@ -748,6 +782,8 @@ config CRYPTO_DEV_SAFEXCEL
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	select CRYPTO_SHA512
+	select CRYPTO_CHACHA20POLY1305
+	select CRYPTO_SHA3
 	help
 	  This driver interfaces with the SafeXcel EIP-97 and EIP-197 cryptographic
 	  engines designed by Inside Secure. It currently accelerates DES, 3DES and
@@ -762,7 +798,7 @@ config CRYPTO_DEV_ARTPEC6
 	select CRYPTO_AEAD
 	select CRYPTO_AES
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CTR
 	select CRYPTO_HASH
 	select CRYPTO_SHA1
@@ -779,7 +815,7 @@ config CRYPTO_DEV_CCREE
 	depends on CRYPTO && CRYPTO_HW && OF && HAS_DMA
 	default n
 	select CRYPTO_HASH
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
@@ -805,4 +841,6 @@ config CRYPTO_DEV_CCREE
 
 source "drivers/crypto/hisilicon/Kconfig"
 
+source "drivers/crypto/amlogic/Kconfig"
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index afc4753b5d28..40229d499476 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CRYPTO_DEV_ALLWINNER) += allwinner/
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
@@ -39,7 +40,6 @@ obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
 obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
 obj-$(CONFIG_ARCH_STM32) += stm32/
-obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
 obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
@@ -48,3 +48,4 @@ obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
 obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
 obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/
 obj-y += hisilicon/
+obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic/
diff --git a/drivers/crypto/allwinner/Kconfig b/drivers/crypto/allwinner/Kconfig
new file mode 100644
index 000000000000..12e7c6a85a02
--- /dev/null
+++ b/drivers/crypto/allwinner/Kconfig
@@ -0,0 +1,87 @@
+config CRYPTO_DEV_ALLWINNER
+	bool "Support for Allwinner cryptographic offloader"
+	depends on ARCH_SUNXI || COMPILE_TEST
+	default y if ARCH_SUNXI
+	help
+	  Say Y here to get to see options for Allwinner hardware crypto devices
+
+config CRYPTO_DEV_SUN4I_SS
+	tristate "Support for Allwinner Security System cryptographic accelerator"
+	depends on ARCH_SUNXI
+	depends on PM
+	depends on CRYPTO_DEV_ALLWINNER
+	select CRYPTO_MD5
+	select CRYPTO_SHA1
+	select CRYPTO_AES
+	select CRYPTO_LIB_DES
+	select CRYPTO_SKCIPHER
+	help
+	  Some Allwinner SoC have a crypto accelerator named
+	  Security System. Select this if you want to use it.
+	  The Security System handle AES/DES/3DES ciphers in CBC mode
+	  and SHA1 and MD5 hash algorithms.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sun4i-ss.
+
+config CRYPTO_DEV_SUN4I_SS_PRNG
+	bool "Support for Allwinner Security System PRNG"
+	depends on CRYPTO_DEV_SUN4I_SS
+	select CRYPTO_RNG
+	help
+	  Select this option if you want to provide kernel-side support for
+	  the Pseudo-Random Number Generator found in the Security System.
+
+config CRYPTO_DEV_SUN8I_CE
+	tristate "Support for Allwinner Crypto Engine cryptographic offloader"
+	select CRYPTO_SKCIPHER
+	select CRYPTO_ENGINE
+	select CRYPTO_ECB
+	select CRYPTO_CBC
+	select CRYPTO_AES
+	select CRYPTO_DES
+	depends on CRYPTO_DEV_ALLWINNER
+	depends on PM
+	help
+	  Select y here to have support for the crypto Engine availlable on
+	  Allwinner SoC H2+, H3, H5, H6, R40 and A64.
+	  The Crypto Engine handle AES/3DES ciphers in ECB/CBC mode.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sun8i-ce.
+
+config CRYPTO_DEV_SUN8I_CE_DEBUG
+	bool "Enable sun8i-ce stats"
+	depends on CRYPTO_DEV_SUN8I_CE
+	depends on DEBUG_FS
+	help
+	  Say y to enable sun8i-ce debug stats.
+	  This will create /sys/kernel/debug/sun8i-ce/stats for displaying
+	  the number of requests per flow and per algorithm.
+
+config CRYPTO_DEV_SUN8I_SS
+	tristate "Support for Allwinner Security System cryptographic offloader"
+	select CRYPTO_SKCIPHER
+	select CRYPTO_ENGINE
+	select CRYPTO_ECB
+	select CRYPTO_CBC
+	select CRYPTO_AES
+	select CRYPTO_DES
+	depends on CRYPTO_DEV_ALLWINNER
+	depends on PM
+	help
+	  Select y here to have support for the Security System available on
+	  Allwinner SoC A80, A83T.
+	  The Security System handle AES/3DES ciphers in ECB/CBC mode.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sun8i-ss.
+
+config CRYPTO_DEV_SUN8I_SS_DEBUG
+	bool "Enable sun8i-ss stats"
+	depends on CRYPTO_DEV_SUN8I_SS
+	depends on DEBUG_FS
+	help
+	  Say y to enable sun8i-ss debug stats.
+	  This will create /sys/kernel/debug/sun8i-ss/stats for displaying
+	  the number of requests per flow and per algorithm.
diff --git a/drivers/crypto/allwinner/Makefile b/drivers/crypto/allwinner/Makefile
new file mode 100644
index 000000000000..6effe864d7ff
--- /dev/null
+++ b/drivers/crypto/allwinner/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sun4i-ss/
+obj-$(CONFIG_CRYPTO_DEV_SUN8I_CE) += sun8i-ce/
+obj-$(CONFIG_CRYPTO_DEV_SUN8I_SS) += sun8i-ss/
diff --git a/drivers/crypto/sunxi-ss/Makefile b/drivers/crypto/allwinner/sun4i-ss/Makefile
index c0a2797d3168..c0a2797d3168 100644
--- a/drivers/crypto/sunxi-ss/Makefile
+++ b/drivers/crypto/allwinner/sun4i-ss/Makefile
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c
index 6536fd4bee65..7f22d305178e 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c
@@ -72,7 +72,8 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq)
 	oi = 0;
 	oo = 0;
 	do {
-		todo = min3(rx_cnt, ileft, (mi.length - oi) / 4);
+		todo = min(rx_cnt, ileft);
+		todo = min_t(size_t, todo, (mi.length - oi) / 4);
 		if (todo) {
 			ileft -= todo;
 			writesl(ss->base + SS_RXFIFO, mi.addr + oi, todo);
@@ -87,7 +88,8 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq)
 		rx_cnt = SS_RXFIFO_SPACES(spaces);
 		tx_cnt = SS_TXFIFO_SPACES(spaces);
 
-		todo = min3(tx_cnt, oleft, (mo.length - oo) / 4);
+		todo = min(tx_cnt, oleft);
+		todo = min_t(size_t, todo, (mo.length - oo) / 4);
 		if (todo) {
 			oleft -= todo;
 			readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo);
@@ -239,7 +241,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
 			 * todo is the number of consecutive 4byte word that we
 			 * can read from current SG
 			 */
-			todo = min3(rx_cnt, ileft / 4, (mi.length - oi) / 4);
+			todo = min(rx_cnt, ileft / 4);
+			todo = min_t(size_t, todo, (mi.length - oi) / 4);
 			if (todo && !ob) {
 				writesl(ss->base + SS_RXFIFO, mi.addr + oi,
 					todo);
@@ -253,8 +256,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
 				 * we need to be able to write all buf in one
 				 * pass, so it is why we min() with rx_cnt
 				 */
-				todo = min3(rx_cnt * 4 - ob, ileft,
-					    mi.length - oi);
+				todo = min(rx_cnt * 4 - ob, ileft);
+				todo = min_t(size_t, todo, mi.length - oi);
 				memcpy(buf + ob, mi.addr + oi, todo);
 				ileft -= todo;
 				oi += todo;
@@ -274,7 +277,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
 		spaces = readl(ss->base + SS_FCSR);
 		rx_cnt = SS_RXFIFO_SPACES(spaces);
 		tx_cnt = SS_TXFIFO_SPACES(spaces);
-		dev_dbg(ss->dev, "%x %u/%u %u/%u cnt=%u %u/%u %u/%u cnt=%u %u\n",
+		dev_dbg(ss->dev,
+			"%x %u/%zu %u/%u cnt=%u %u/%zu %u/%u cnt=%u %u\n",
 			mode,
 			oi, mi.length, ileft, areq->cryptlen, rx_cnt,
 			oo, mo.length, oleft, areq->cryptlen, tx_cnt, ob);
@@ -282,7 +286,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
 		if (!tx_cnt)
 			continue;
 		/* todo in 4bytes word */
-		todo = min3(tx_cnt, oleft / 4, (mo.length - oo) / 4);
+		todo = min(tx_cnt, oleft / 4);
+		todo = min_t(size_t, todo, (mo.length - oo) / 4);
 		if (todo) {
 			readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo);
 			oleft -= todo * 4;
@@ -308,7 +313,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
 				 * no more than remaining buffer
 				 * no need to test against oleft
 				 */
-				todo = min(mo.length - oo, obl - obo);
+				todo = min_t(size_t,
+					     mo.length - oo, obl - obo);
 				memcpy(mo.addr + oo, bufo + obo, todo);
 				oleft -= todo;
 				obo += todo;
@@ -480,6 +486,7 @@ int sun4i_ss_cipher_init(struct crypto_tfm *tfm)
 	struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
 	struct sun4i_ss_alg_template *algt;
 	const char *name = crypto_tfm_alg_name(tfm);
+	int err;
 
 	memset(op, 0, sizeof(struct sun4i_tfm_ctx));
 
@@ -497,13 +504,22 @@ int sun4i_ss_cipher_init(struct crypto_tfm *tfm)
 		return PTR_ERR(op->fallback_tfm);
 	}
 
+	err = pm_runtime_get_sync(op->ss->dev);
+	if (err < 0)
+		goto error_pm;
+
 	return 0;
+error_pm:
+	crypto_free_sync_skcipher(op->fallback_tfm);
+	return err;
 }
 
 void sun4i_ss_cipher_exit(struct crypto_tfm *tfm)
 {
 	struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
+
 	crypto_free_sync_skcipher(op->fallback_tfm);
+	pm_runtime_put(op->ss->dev);
 }
 
 /* check and set the AES key, prepare the mode to be used */
@@ -524,8 +540,7 @@ int sun4i_ss_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		op->keymode = SS_AES_256BITS;
 		break;
 	default:
-		dev_err(ss->dev, "ERROR: Invalid keylen %u\n", keylen);
-		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		dev_dbg(ss->dev, "ERROR: Invalid keylen %u\n", keylen);
 		return -EINVAL;
 	}
 	op->keylen = keylen;
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-core.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c
index 9aa6fe081a27..a2b67f7f8a81 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-core.c
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c
@@ -13,6 +13,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <crypto/scatterwalk.h>
 #include <linux/scatterlist.h>
@@ -22,6 +23,14 @@
 
 #include "sun4i-ss.h"
 
+static const struct ss_variant ss_a10_variant = {
+	.sha1_in_be = false,
+};
+
+static const struct ss_variant ss_a33_variant = {
+	.sha1_in_be = true,
+};
+
 static struct sun4i_ss_alg_template ss_algs[] = {
 {       .type = CRYPTO_ALG_TYPE_AHASH,
 	.mode = SS_OP_MD5,
@@ -44,7 +53,8 @@ static struct sun4i_ss_alg_template ss_algs[] = {
 				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun4i_req_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun4i_hash_crainit
+				.cra_init = sun4i_hash_crainit,
+				.cra_exit = sun4i_hash_craexit,
 			}
 		}
 	}
@@ -70,7 +80,8 @@ static struct sun4i_ss_alg_template ss_algs[] = {
 				.cra_blocksize = SHA1_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun4i_req_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun4i_hash_crainit
+				.cra_init = sun4i_hash_crainit,
+				.cra_exit = sun4i_hash_craexit,
 			}
 		}
 	}
@@ -223,6 +234,82 @@ static struct sun4i_ss_alg_template ss_algs[] = {
 #endif
 };
 
+/*
+ * Power management strategy: The device is suspended unless a TFM exists for
+ * one of the algorithms proposed by this driver.
+ */
+static int sun4i_ss_pm_suspend(struct device *dev)
+{
+	struct sun4i_ss_ctx *ss = dev_get_drvdata(dev);
+
+	if (ss->reset)
+		reset_control_assert(ss->reset);
+
+	clk_disable_unprepare(ss->ssclk);
+	clk_disable_unprepare(ss->busclk);
+	return 0;
+}
+
+static int sun4i_ss_pm_resume(struct device *dev)
+{
+	struct sun4i_ss_ctx *ss = dev_get_drvdata(dev);
+
+	int err;
+
+	err = clk_prepare_enable(ss->busclk);
+	if (err) {
+		dev_err(ss->dev, "Cannot prepare_enable busclk\n");
+		goto err_enable;
+	}
+
+	err = clk_prepare_enable(ss->ssclk);
+	if (err) {
+		dev_err(ss->dev, "Cannot prepare_enable ssclk\n");
+		goto err_enable;
+	}
+
+	if (ss->reset) {
+		err = reset_control_deassert(ss->reset);
+		if (err) {
+			dev_err(ss->dev, "Cannot deassert reset control\n");
+			goto err_enable;
+		}
+	}
+
+	return err;
+err_enable:
+	sun4i_ss_pm_suspend(dev);
+	return err;
+}
+
+static const struct dev_pm_ops sun4i_ss_pm_ops = {
+	SET_RUNTIME_PM_OPS(sun4i_ss_pm_suspend, sun4i_ss_pm_resume, NULL)
+};
+
+/*
+ * When power management is enabled, this function enables the PM and set the
+ * device as suspended
+ * When power management is disabled, this function just enables the device
+ */
+static int sun4i_ss_pm_init(struct sun4i_ss_ctx *ss)
+{
+	int err;
+
+	pm_runtime_use_autosuspend(ss->dev);
+	pm_runtime_set_autosuspend_delay(ss->dev, 2000);
+
+	err = pm_runtime_set_suspended(ss->dev);
+	if (err)
+		return err;
+	pm_runtime_enable(ss->dev);
+	return err;
+}
+
+static void sun4i_ss_pm_exit(struct sun4i_ss_ctx *ss)
+{
+	pm_runtime_disable(ss->dev);
+}
+
 static int sun4i_ss_probe(struct platform_device *pdev)
 {
 	u32 v;
@@ -245,6 +332,12 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 		return PTR_ERR(ss->base);
 	}
 
+	ss->variant = of_device_get_match_data(&pdev->dev);
+	if (!ss->variant) {
+		dev_err(&pdev->dev, "Missing Security System variant\n");
+		return -EINVAL;
+	}
+
 	ss->ssclk = devm_clk_get(&pdev->dev, "mod");
 	if (IS_ERR(ss->ssclk)) {
 		err = PTR_ERR(ss->ssclk);
@@ -269,18 +362,6 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 		ss->reset = NULL;
 	}
 
-	/* Enable both clocks */
-	err = clk_prepare_enable(ss->busclk);
-	if (err) {
-		dev_err(&pdev->dev, "Cannot prepare_enable busclk\n");
-		return err;
-	}
-	err = clk_prepare_enable(ss->ssclk);
-	if (err) {
-		dev_err(&pdev->dev, "Cannot prepare_enable ssclk\n");
-		goto error_ssclk;
-	}
-
 	/*
 	 * Check that clock have the correct rates given in the datasheet
 	 * Try to set the clock to the maximum allowed
@@ -288,16 +369,7 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 	err = clk_set_rate(ss->ssclk, cr_mod);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot set clock rate to ssclk\n");
-		goto error_clk;
-	}
-
-	/* Deassert reset if we have a reset control */
-	if (ss->reset) {
-		err = reset_control_deassert(ss->reset);
-		if (err) {
-			dev_err(&pdev->dev, "Cannot deassert reset control\n");
-			goto error_clk;
-		}
+		return err;
 	}
 
 	/*
@@ -325,12 +397,26 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 		dev_warn(&pdev->dev, "Clock ss is at %lu (%lu MHz) (must be <= %lu)\n",
 			 cr, cr / 1000000, cr_mod);
 
+	ss->dev = &pdev->dev;
+	platform_set_drvdata(pdev, ss);
+
+	spin_lock_init(&ss->slock);
+
+	err = sun4i_ss_pm_init(ss);
+	if (err)
+		return err;
+
 	/*
 	 * Datasheet named it "Die Bonding ID"
 	 * I expect to be a sort of Security System Revision number.
 	 * Since the A80 seems to have an other version of SS
 	 * this info could be useful
 	 */
+
+	err = pm_runtime_get_sync(ss->dev);
+	if (err < 0)
+		goto error_pm;
+
 	writel(SS_ENABLED, ss->base + SS_CTL);
 	v = readl(ss->base + SS_CTL);
 	v >>= 16;
@@ -338,9 +424,7 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 	dev_info(&pdev->dev, "Die ID %d\n", v);
 	writel(0, ss->base + SS_CTL);
 
-	ss->dev = &pdev->dev;
-
-	spin_lock_init(&ss->slock);
+	pm_runtime_put_sync(ss->dev);
 
 	for (i = 0; i < ARRAY_SIZE(ss_algs); i++) {
 		ss_algs[i].ss = ss;
@@ -370,7 +454,6 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 			break;
 		}
 	}
-	platform_set_drvdata(pdev, ss);
 	return 0;
 error_alg:
 	i--;
@@ -387,12 +470,8 @@ error_alg:
 			break;
 		}
 	}
-	if (ss->reset)
-		reset_control_assert(ss->reset);
-error_clk:
-	clk_disable_unprepare(ss->ssclk);
-error_ssclk:
-	clk_disable_unprepare(ss->busclk);
+error_pm:
+	sun4i_ss_pm_exit(ss);
 	return err;
 }
 
@@ -415,16 +494,17 @@ static int sun4i_ss_remove(struct platform_device *pdev)
 		}
 	}
 
-	writel(0, ss->base + SS_CTL);
-	if (ss->reset)
-		reset_control_assert(ss->reset);
-	clk_disable_unprepare(ss->busclk);
-	clk_disable_unprepare(ss->ssclk);
+	sun4i_ss_pm_exit(ss);
 	return 0;
 }
 
 static const struct of_device_id a20ss_crypto_of_match_table[] = {
-	{ .compatible = "allwinner,sun4i-a10-crypto" },
+	{ .compatible = "allwinner,sun4i-a10-crypto",
+	  .data = &ss_a10_variant
+	},
+	{ .compatible = "allwinner,sun8i-a33-crypto",
+	  .data = &ss_a33_variant
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, a20ss_crypto_of_match_table);
@@ -434,6 +514,7 @@ static struct platform_driver sun4i_ss_driver = {
 	.remove         = sun4i_ss_remove,
 	.driver         = {
 		.name           = "sun4i-ss",
+		.pm		= &sun4i_ss_pm_ops,
 		.of_match_table	= a20ss_crypto_of_match_table,
 	},
 };
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c
index fcffba5ef927..dc35edd90034 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c
@@ -19,17 +19,29 @@ int sun4i_hash_crainit(struct crypto_tfm *tfm)
 	struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
 	struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
 	struct sun4i_ss_alg_template *algt;
+	int err;
 
 	memset(op, 0, sizeof(struct sun4i_tfm_ctx));
 
 	algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
 	op->ss = algt->ss;
 
+	err = pm_runtime_get_sync(op->ss->dev);
+	if (err < 0)
+		return err;
+
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct sun4i_req_ctx));
 	return 0;
 }
 
+void sun4i_hash_craexit(struct crypto_tfm *tfm)
+{
+	struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
+
+	pm_runtime_put(op->ss->dev);
+}
+
 /* sun4i_hash_init: initialize request context */
 int sun4i_hash_init(struct ahash_request *areq)
 {
@@ -175,7 +187,7 @@ static int sun4i_hash(struct ahash_request *areq)
 	 */
 	unsigned int i = 0, end, fill, min_fill, nwait, nbw = 0, j = 0, todo;
 	unsigned int in_i = 0;
-	u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, wb = 0, v, ivmode = 0;
+	u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, v, ivmode = 0;
 	struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
@@ -184,6 +196,7 @@ static int sun4i_hash(struct ahash_request *areq)
 	struct sg_mapping_iter mi;
 	int in_r, err = 0;
 	size_t copied = 0;
+	__le32 wb = 0;
 
 	dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
 		__func__, crypto_tfm_alg_name(areq->base.tfm),
@@ -215,7 +228,7 @@ static int sun4i_hash(struct ahash_request *areq)
 	 */
 	if (op->byte_count) {
 		ivmode = SS_IV_ARBITRARY;
-		for (i = 0; i < 5; i++)
+		for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
 			writel(op->hash[i], ss->base + SS_IV0 + i * 4);
 	}
 	/* Enable the device */
@@ -272,8 +285,8 @@ static int sun4i_hash(struct ahash_request *areq)
 			 */
 			while (op->len < 64 && i < end) {
 				/* how many bytes we can read from current SG */
-				in_r = min3(mi.length - in_i, end - i,
-					    64 - op->len);
+				in_r = min(end - i, 64 - op->len);
+				in_r = min_t(size_t, mi.length - in_i, in_r);
 				memcpy(op->buf + op->len, mi.addr + in_i, in_r);
 				op->len += in_r;
 				i += in_r;
@@ -293,8 +306,8 @@ static int sun4i_hash(struct ahash_request *areq)
 		}
 		if (mi.length - in_i > 3 && i < end) {
 			/* how many bytes we can read from current SG */
-			in_r = min3(mi.length - in_i, areq->nbytes - i,
-				    ((mi.length - in_i) / 4) * 4);
+			in_r = min_t(size_t, mi.length - in_i, areq->nbytes - i);
+			in_r = min_t(size_t, ((mi.length - in_i) / 4) * 4, in_r);
 			/* how many bytes we can write in the device*/
 			todo = min3((u32)(end - i) / 4, rx_cnt, (u32)in_r / 4);
 			writesl(ss->base + SS_RXFIFO, mi.addr + in_i, todo);
@@ -320,8 +333,8 @@ static int sun4i_hash(struct ahash_request *areq)
 	if ((areq->nbytes - i) < 64) {
 		while (i < areq->nbytes && in_i < mi.length && op->len < 64) {
 			/* how many bytes we can read from current SG */
-			in_r = min3(mi.length - in_i, areq->nbytes - i,
-				    64 - op->len);
+			in_r = min(areq->nbytes - i, 64 - op->len);
+			in_r = min_t(size_t, mi.length - in_i, in_r);
 			memcpy(op->buf + op->len, mi.addr + in_i, in_r);
 			op->len += in_r;
 			i += in_r;
@@ -395,7 +408,7 @@ hash_final:
 
 		nbw = op->len - 4 * nwait;
 		if (nbw) {
-			wb = *(u32 *)(op->buf + nwait * 4);
+			wb = cpu_to_le32(*(u32 *)(op->buf + nwait * 4));
 			wb &= GENMASK((nbw * 8) - 1, 0);
 
 			op->byte_count += nbw;
@@ -404,7 +417,7 @@ hash_final:
 
 	/* write the remaining bytes of the nbw buffer */
 	wb |= ((1 << 7) << (nbw * 8));
-	bf[j++] = wb;
+	bf[j++] = le32_to_cpu(wb);
 
 	/*
 	 * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1)
@@ -423,13 +436,13 @@ hash_final:
 
 	/* write the length of data */
 	if (op->mode == SS_OP_SHA1) {
-		__be64 bits = cpu_to_be64(op->byte_count << 3);
-		bf[j++] = lower_32_bits(bits);
-		bf[j++] = upper_32_bits(bits);
+		__be64 *bits = (__be64 *)&bf[j];
+		*bits = cpu_to_be64(op->byte_count << 3);
+		j += 2;
 	} else {
-		__le64 bits = op->byte_count << 3;
-		bf[j++] = lower_32_bits(bits);
-		bf[j++] = upper_32_bits(bits);
+		__le64 *bits = (__le64 *)&bf[j];
+		*bits = cpu_to_le64(op->byte_count << 3);
+		j += 2;
 	}
 	writesl(ss->base + SS_RXFIFO, bf, j);
 
@@ -466,12 +479,15 @@ hash_final:
 	/* Get the hash from the device */
 	if (op->mode == SS_OP_SHA1) {
 		for (i = 0; i < 5; i++) {
-			v = cpu_to_be32(readl(ss->base + SS_MD0 + i * 4));
+			if (ss->variant->sha1_in_be)
+				v = cpu_to_le32(readl(ss->base + SS_MD0 + i * 4));
+			else
+				v = cpu_to_be32(readl(ss->base + SS_MD0 + i * 4));
 			memcpy(areq->result + i * 4, &v, 4);
 		}
 	} else {
 		for (i = 0; i < 4; i++) {
-			v = readl(ss->base + SS_MD0 + i * 4);
+			v = cpu_to_le32(readl(ss->base + SS_MD0 + i * 4));
 			memcpy(areq->result + i * 4, &v, 4);
 		}
 	}
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-prng.c
index 63d636424161..729aafdbea84 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-prng.c
@@ -17,7 +17,7 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
 {
 	struct sun4i_ss_alg_template *algt;
 	struct rng_alg *alg = crypto_rng_alg(tfm);
-	int i;
+	int i, err;
 	u32 v;
 	u32 *data = (u32 *)dst;
 	const u32 mode = SS_OP_PRNG | SS_PRNG_CONTINUE | SS_ENABLED;
@@ -28,6 +28,10 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
 	algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng);
 	ss = algt->ss;
 
+	err = pm_runtime_get_sync(ss->dev);
+	if (err < 0)
+		return err;
+
 	spin_lock_bh(&ss->slock);
 
 	writel(mode, ss->base + SS_CTL);
@@ -52,5 +56,8 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
 
 	writel(0, ss->base + SS_CTL);
 	spin_unlock_bh(&ss->slock);
+
+	pm_runtime_put(ss->dev);
+
 	return 0;
 }
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss.h b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h
index 35a27a7145f8..2b4c6333eb67 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss.h
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h
@@ -22,6 +22,7 @@
 #include <linux/scatterlist.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
+#include <linux/pm_runtime.h>
 #include <crypto/md5.h>
 #include <crypto/skcipher.h>
 #include <crypto/sha.h>
@@ -130,7 +131,16 @@
 #define SS_SEED_LEN 192
 #define SS_DATA_LEN 160
 
+/*
+ * struct ss_variant - Describe SS hardware variant
+ * @sha1_in_be:		The SHA1 digest is given by SS in BE, and so need to be inverted.
+ */
+struct ss_variant {
+	bool sha1_in_be;
+};
+
 struct sun4i_ss_ctx {
+	const struct ss_variant *variant;
 	void __iomem *base;
 	int irq;
 	struct clk *busclk;
@@ -177,6 +187,7 @@ struct sun4i_req_ctx {
 };
 
 int sun4i_hash_crainit(struct crypto_tfm *tfm);
+void sun4i_hash_craexit(struct crypto_tfm *tfm);
 int sun4i_hash_init(struct ahash_request *areq);
 int sun4i_hash_update(struct ahash_request *areq);
 int sun4i_hash_final(struct ahash_request *areq);
diff --git a/drivers/crypto/allwinner/sun8i-ce/Makefile b/drivers/crypto/allwinner/sun8i-ce/Makefile
new file mode 100644
index 000000000000..08b68c3c1ca9
--- /dev/null
+++ b/drivers/crypto/allwinner/sun8i-ce/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_SUN8I_CE) += sun8i-ce.o
+sun8i-ce-y += sun8i-ce-core.o sun8i-ce-cipher.o
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
new file mode 100644
index 000000000000..a5fd8975f3d3
--- /dev/null
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sun8i-ce-cipher.c - hardware cryptographic offloader for
+ * Allwinner H3/A64/H5/H2+/H6/R40 SoC
+ *
+ * Copyright (C) 2016-2019 Corentin LABBE <clabbe.montjoie@gmail.com>
+ *
+ * This file add support for AES cipher with 128,192,256 bits keysize in
+ * CBC and ECB mode.
+ *
+ * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ */
+
+#include <linux/crypto.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/pm_runtime.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
+#include "sun8i-ce.h"
+
+static int sun8i_ce_cipher_need_fallback(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct scatterlist *sg;
+
+	if (sg_nents(areq->src) > MAX_SG || sg_nents(areq->dst) > MAX_SG)
+		return true;
+
+	if (areq->cryptlen < crypto_skcipher_ivsize(tfm))
+		return true;
+
+	if (areq->cryptlen == 0 || areq->cryptlen % 16)
+		return true;
+
+	sg = areq->src;
+	while (sg) {
+		if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32)))
+			return true;
+		sg = sg_next(sg);
+	}
+	sg = areq->dst;
+	while (sg) {
+		if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32)))
+			return true;
+		sg = sg_next(sg);
+	}
+	return false;
+}
+
+static int sun8i_ce_cipher_fallback(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	int err;
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct sun8i_ce_alg_template *algt;
+#endif
+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, op->fallback_tfm);
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+	algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
+	algt->stat_fb++;
+#endif
+
+	skcipher_request_set_sync_tfm(subreq, op->fallback_tfm);
+	skcipher_request_set_callback(subreq, areq->base.flags, NULL, NULL);
+	skcipher_request_set_crypt(subreq, areq->src, areq->dst,
+				   areq->cryptlen, areq->iv);
+	if (rctx->op_dir & CE_DECRYPTION)
+		err = crypto_skcipher_decrypt(subreq);
+	else
+		err = crypto_skcipher_encrypt(subreq);
+	skcipher_request_zero(subreq);
+	return err;
+}
+
+static int sun8i_ce_cipher(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun8i_ce_dev *ce = op->ce;
+	struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct sun8i_ce_alg_template *algt;
+	struct sun8i_ce_flow *chan;
+	struct ce_task *cet;
+	struct scatterlist *sg;
+	unsigned int todo, len, offset, ivsize;
+	dma_addr_t addr_iv = 0, addr_key = 0;
+	void *backup_iv = NULL;
+	u32 common, sym;
+	int flow, i;
+	int nr_sgs = 0;
+	int nr_sgd = 0;
+	int err = 0;
+
+	algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
+
+	dev_dbg(ce->dev, "%s %s %u %x IV(%p %u) key=%u\n", __func__,
+		crypto_tfm_alg_name(areq->base.tfm),
+		areq->cryptlen,
+		rctx->op_dir, areq->iv, crypto_skcipher_ivsize(tfm),
+		op->keylen);
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+	algt->stat_req++;
+#endif
+
+	flow = rctx->flow;
+
+	chan = &ce->chanlist[flow];
+
+	cet = chan->tl;
+	memset(cet, 0, sizeof(struct ce_task));
+
+	cet->t_id = cpu_to_le32(flow);
+	common = ce->variant->alg_cipher[algt->ce_algo_id];
+	common |= rctx->op_dir | CE_COMM_INT;
+	cet->t_common_ctl = cpu_to_le32(common);
+	/* CTS and recent CE (H6) need length in bytes, in word otherwise */
+	if (ce->variant->has_t_dlen_in_bytes)
+		cet->t_dlen = cpu_to_le32(areq->cryptlen);
+	else
+		cet->t_dlen = cpu_to_le32(areq->cryptlen / 4);
+
+	sym = ce->variant->op_mode[algt->ce_blockmode];
+	len = op->keylen;
+	switch (len) {
+	case 128 / 8:
+		sym |= CE_AES_128BITS;
+		break;
+	case 192 / 8:
+		sym |= CE_AES_192BITS;
+		break;
+	case 256 / 8:
+		sym |= CE_AES_256BITS;
+		break;
+	}
+
+	cet->t_sym_ctl = cpu_to_le32(sym);
+	cet->t_asym_ctl = 0;
+
+	addr_key = dma_map_single(ce->dev, op->key, op->keylen, DMA_TO_DEVICE);
+	cet->t_key = cpu_to_le32(addr_key);
+	if (dma_mapping_error(ce->dev, addr_key)) {
+		dev_err(ce->dev, "Cannot DMA MAP KEY\n");
+		err = -EFAULT;
+		goto theend;
+	}
+
+	ivsize = crypto_skcipher_ivsize(tfm);
+	if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
+		chan->ivlen = ivsize;
+		chan->bounce_iv = kzalloc(ivsize, GFP_KERNEL | GFP_DMA);
+		if (!chan->bounce_iv) {
+			err = -ENOMEM;
+			goto theend_key;
+		}
+		if (rctx->op_dir & CE_DECRYPTION) {
+			backup_iv = kzalloc(ivsize, GFP_KERNEL);
+			if (!backup_iv) {
+				err = -ENOMEM;
+				goto theend_key;
+			}
+			offset = areq->cryptlen - ivsize;
+			scatterwalk_map_and_copy(backup_iv, areq->src, offset,
+						 ivsize, 0);
+		}
+		memcpy(chan->bounce_iv, areq->iv, ivsize);
+		addr_iv = dma_map_single(ce->dev, chan->bounce_iv, chan->ivlen,
+					 DMA_TO_DEVICE);
+		cet->t_iv = cpu_to_le32(addr_iv);
+		if (dma_mapping_error(ce->dev, addr_iv)) {
+			dev_err(ce->dev, "Cannot DMA MAP IV\n");
+			err = -ENOMEM;
+			goto theend_iv;
+		}
+	}
+
+	if (areq->src == areq->dst) {
+		nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src),
+				    DMA_BIDIRECTIONAL);
+		if (nr_sgs <= 0 || nr_sgs > MAX_SG) {
+			dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
+			err = -EINVAL;
+			goto theend_iv;
+		}
+		nr_sgd = nr_sgs;
+	} else {
+		nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src),
+				    DMA_TO_DEVICE);
+		if (nr_sgs <= 0 || nr_sgs > MAX_SG) {
+			dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
+			err = -EINVAL;
+			goto theend_iv;
+		}
+		nr_sgd = dma_map_sg(ce->dev, areq->dst, sg_nents(areq->dst),
+				    DMA_FROM_DEVICE);
+		if (nr_sgd <= 0 || nr_sgd > MAX_SG) {
+			dev_err(ce->dev, "Invalid sg number %d\n", nr_sgd);
+			err = -EINVAL;
+			goto theend_sgs;
+		}
+	}
+
+	len = areq->cryptlen;
+	for_each_sg(areq->src, sg, nr_sgs, i) {
+		cet->t_src[i].addr = cpu_to_le32(sg_dma_address(sg));
+		todo = min(len, sg_dma_len(sg));
+		cet->t_src[i].len = cpu_to_le32(todo / 4);
+		dev_dbg(ce->dev, "%s total=%u SG(%d %u off=%d) todo=%u\n", __func__,
+			areq->cryptlen, i, cet->t_src[i].len, sg->offset, todo);
+		len -= todo;
+	}
+	if (len > 0) {
+		dev_err(ce->dev, "remaining len %d\n", len);
+		err = -EINVAL;
+		goto theend_sgs;
+	}
+
+	len = areq->cryptlen;
+	for_each_sg(areq->dst, sg, nr_sgd, i) {
+		cet->t_dst[i].addr = cpu_to_le32(sg_dma_address(sg));
+		todo = min(len, sg_dma_len(sg));
+		cet->t_dst[i].len = cpu_to_le32(todo / 4);
+		dev_dbg(ce->dev, "%s total=%u SG(%d %u off=%d) todo=%u\n", __func__,
+			areq->cryptlen, i, cet->t_dst[i].len, sg->offset, todo);
+		len -= todo;
+	}
+	if (len > 0) {
+		dev_err(ce->dev, "remaining len %d\n", len);
+		err = -EINVAL;
+		goto theend_sgs;
+	}
+
+	chan->timeout = areq->cryptlen;
+	err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(areq->base.tfm));
+
+theend_sgs:
+	if (areq->src == areq->dst) {
+		dma_unmap_sg(ce->dev, areq->src, nr_sgs, DMA_BIDIRECTIONAL);
+	} else {
+		if (nr_sgs > 0)
+			dma_unmap_sg(ce->dev, areq->src, nr_sgs, DMA_TO_DEVICE);
+		dma_unmap_sg(ce->dev, areq->dst, nr_sgd, DMA_FROM_DEVICE);
+	}
+
+theend_iv:
+	if (areq->iv && ivsize > 0) {
+		if (addr_iv)
+			dma_unmap_single(ce->dev, addr_iv, chan->ivlen,
+					 DMA_TO_DEVICE);
+		offset = areq->cryptlen - ivsize;
+		if (rctx->op_dir & CE_DECRYPTION) {
+			memcpy(areq->iv, backup_iv, ivsize);
+			kzfree(backup_iv);
+		} else {
+			scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
+						 ivsize, 0);
+		}
+		kfree(chan->bounce_iv);
+	}
+
+theend_key:
+	dma_unmap_single(ce->dev, addr_key, op->keylen, DMA_TO_DEVICE);
+
+theend:
+	return err;
+}
+
+static int sun8i_ce_handle_cipher_request(struct crypto_engine *engine, void *areq)
+{
+	int err;
+	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
+
+	err = sun8i_ce_cipher(breq);
+	crypto_finalize_skcipher_request(engine, breq, err);
+
+	return 0;
+}
+
+int sun8i_ce_skdecrypt(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct crypto_engine *engine;
+	int e;
+
+	rctx->op_dir = CE_DECRYPTION;
+	if (sun8i_ce_cipher_need_fallback(areq))
+		return sun8i_ce_cipher_fallback(areq);
+
+	e = sun8i_ce_get_engine_number(op->ce);
+	rctx->flow = e;
+	engine = op->ce->chanlist[e].engine;
+
+	return crypto_transfer_skcipher_request_to_engine(engine, areq);
+}
+
+int sun8i_ce_skencrypt(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct crypto_engine *engine;
+	int e;
+
+	rctx->op_dir = CE_ENCRYPTION;
+	if (sun8i_ce_cipher_need_fallback(areq))
+		return sun8i_ce_cipher_fallback(areq);
+
+	e = sun8i_ce_get_engine_number(op->ce);
+	rctx->flow = e;
+	engine = op->ce->chanlist[e].engine;
+
+	return crypto_transfer_skcipher_request_to_engine(engine, areq);
+}
+
+int sun8i_ce_cipher_init(struct crypto_tfm *tfm)
+{
+	struct sun8i_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm);
+	struct sun8i_ce_alg_template *algt;
+	const char *name = crypto_tfm_alg_name(tfm);
+	struct crypto_skcipher *sktfm = __crypto_skcipher_cast(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(sktfm);
+	int err;
+
+	memset(op, 0, sizeof(struct sun8i_cipher_tfm_ctx));
+
+	algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
+	op->ce = algt->ce;
+
+	sktfm->reqsize = sizeof(struct sun8i_cipher_req_ctx);
+
+	op->fallback_tfm = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(op->fallback_tfm)) {
+		dev_err(op->ce->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
+			name, PTR_ERR(op->fallback_tfm));
+		return PTR_ERR(op->fallback_tfm);
+	}
+
+	dev_info(op->ce->dev, "Fallback for %s is %s\n",
+		 crypto_tfm_alg_driver_name(&sktfm->base),
+		 crypto_tfm_alg_driver_name(crypto_skcipher_tfm(&op->fallback_tfm->base)));
+
+	op->enginectx.op.do_one_request = sun8i_ce_handle_cipher_request;
+	op->enginectx.op.prepare_request = NULL;
+	op->enginectx.op.unprepare_request = NULL;
+
+	err = pm_runtime_get_sync(op->ce->dev);
+	if (err < 0)
+		goto error_pm;
+
+	return 0;
+error_pm:
+	crypto_free_sync_skcipher(op->fallback_tfm);
+	return err;
+}
+
+void sun8i_ce_cipher_exit(struct crypto_tfm *tfm)
+{
+	struct sun8i_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm);
+
+	if (op->key) {
+		memzero_explicit(op->key, op->keylen);
+		kfree(op->key);
+	}
+	crypto_free_sync_skcipher(op->fallback_tfm);
+	pm_runtime_put_sync_suspend(op->ce->dev);
+}
+
+int sun8i_ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			unsigned int keylen)
+{
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun8i_ce_dev *ce = op->ce;
+
+	switch (keylen) {
+	case 128 / 8:
+		break;
+	case 192 / 8:
+		break;
+	case 256 / 8:
+		break;
+	default:
+		dev_dbg(ce->dev, "ERROR: Invalid keylen %u\n", keylen);
+		return -EINVAL;
+	}
+	if (op->key) {
+		memzero_explicit(op->key, op->keylen);
+		kfree(op->key);
+	}
+	op->keylen = keylen;
+	op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
+	if (!op->key)
+		return -ENOMEM;
+
+	crypto_sync_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+	crypto_sync_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+
+	return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+}
+
+int sun8i_ce_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			 unsigned int keylen)
+{
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = verify_skcipher_des3_key(tfm, key);
+	if (err)
+		return err;
+
+	if (op->key) {
+		memzero_explicit(op->key, op->keylen);
+		kfree(op->key);
+	}
+	op->keylen = keylen;
+	op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
+	if (!op->key)
+		return -ENOMEM;
+
+	crypto_sync_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+	crypto_sync_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+
+	return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+}
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
new file mode 100644
index 000000000000..f72346a44e69
--- /dev/null
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
@@ -0,0 +1,676 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sun8i-ce-core.c - hardware cryptographic offloader for
+ * Allwinner H3/A64/H5/H2+/H6/R40 SoC
+ *
+ * Copyright (C) 2015-2019 Corentin Labbe <clabbe.montjoie@gmail.com>
+ *
+ * Core file which registers crypto algorithms supported by the CryptoEngine.
+ *
+ * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ */
+#include <linux/clk.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <crypto/internal/skcipher.h>
+
+#include "sun8i-ce.h"
+
+/*
+ * mod clock is lower on H3 than other SoC due to some DMA timeout occurring
+ * with high value.
+ * If you want to tune mod clock, loading driver and passing selftest is
+ * insufficient, you need to test with some LUKS test (mount and write to it)
+ */
+static const struct ce_variant ce_h3_variant = {
+	.alg_cipher = { CE_ALG_AES, CE_ALG_DES, CE_ALG_3DES,
+	},
+	.op_mode = { CE_OP_ECB, CE_OP_CBC
+	},
+	.ce_clks = {
+		{ "bus", 0, 200000000 },
+		{ "mod", 50000000, 0 },
+		}
+};
+
+static const struct ce_variant ce_h5_variant = {
+	.alg_cipher = { CE_ALG_AES, CE_ALG_DES, CE_ALG_3DES,
+	},
+	.op_mode = { CE_OP_ECB, CE_OP_CBC
+	},
+	.ce_clks = {
+		{ "bus", 0, 200000000 },
+		{ "mod", 300000000, 0 },
+		}
+};
+
+static const struct ce_variant ce_h6_variant = {
+	.alg_cipher = { CE_ALG_AES, CE_ALG_DES, CE_ALG_3DES,
+	},
+	.op_mode = { CE_OP_ECB, CE_OP_CBC
+	},
+	.has_t_dlen_in_bytes = true,
+	.ce_clks = {
+		{ "bus", 0, 200000000 },
+		{ "mod", 300000000, 0 },
+		{ "ram", 0, 400000000 },
+		}
+};
+
+static const struct ce_variant ce_a64_variant = {
+	.alg_cipher = { CE_ALG_AES, CE_ALG_DES, CE_ALG_3DES,
+	},
+	.op_mode = { CE_OP_ECB, CE_OP_CBC
+	},
+	.ce_clks = {
+		{ "bus", 0, 200000000 },
+		{ "mod", 300000000, 0 },
+		}
+};
+
+static const struct ce_variant ce_r40_variant = {
+	.alg_cipher = { CE_ALG_AES, CE_ALG_DES, CE_ALG_3DES,
+	},
+	.op_mode = { CE_OP_ECB, CE_OP_CBC
+	},
+	.ce_clks = {
+		{ "bus", 0, 200000000 },
+		{ "mod", 300000000, 0 },
+		}
+};
+
+/*
+ * sun8i_ce_get_engine_number() get the next channel slot
+ * This is a simple round-robin way of getting the next channel
+ */
+int sun8i_ce_get_engine_number(struct sun8i_ce_dev *ce)
+{
+	return atomic_inc_return(&ce->flow) % MAXFLOW;
+}
+
+int sun8i_ce_run_task(struct sun8i_ce_dev *ce, int flow, const char *name)
+{
+	u32 v;
+	int err = 0;
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+	ce->chanlist[flow].stat_req++;
+#endif
+
+	mutex_lock(&ce->mlock);
+
+	v = readl(ce->base + CE_ICR);
+	v |= 1 << flow;
+	writel(v, ce->base + CE_ICR);
+
+	reinit_completion(&ce->chanlist[flow].complete);
+	writel(ce->chanlist[flow].t_phy, ce->base + CE_TDQ);
+
+	ce->chanlist[flow].status = 0;
+	/* Be sure all data is written before enabling the task */
+	wmb();
+
+	v = 1 | (ce->chanlist[flow].tl->t_common_ctl & 0x7F) << 8;
+	writel(v, ce->base + CE_TLR);
+	mutex_unlock(&ce->mlock);
+
+	wait_for_completion_interruptible_timeout(&ce->chanlist[flow].complete,
+			msecs_to_jiffies(ce->chanlist[flow].timeout));
+
+	if (ce->chanlist[flow].status == 0) {
+		dev_err(ce->dev, "DMA timeout for %s\n", name);
+		err = -EFAULT;
+	}
+	/* No need to lock for this read, the channel is locked so
+	 * nothing could modify the error value for this channel
+	 */
+	v = readl(ce->base + CE_ESR);
+	if (v) {
+		v >>= (flow * 4);
+		v &= 0xFF;
+		if (v) {
+			dev_err(ce->dev, "CE ERROR: %x for flow %x\n", v, flow);
+			err = -EFAULT;
+		}
+		if (v & CE_ERR_ALGO_NOTSUP)
+			dev_err(ce->dev, "CE ERROR: algorithm not supported\n");
+		if (v & CE_ERR_DATALEN)
+			dev_err(ce->dev, "CE ERROR: data length error\n");
+		if (v & CE_ERR_KEYSRAM)
+			dev_err(ce->dev, "CE ERROR: keysram access error for AES\n");
+		if (v & CE_ERR_ADDR_INVALID)
+			dev_err(ce->dev, "CE ERROR: address invalid\n");
+		}
+
+	return err;
+}
+
+static irqreturn_t ce_irq_handler(int irq, void *data)
+{
+	struct sun8i_ce_dev *ce = (struct sun8i_ce_dev *)data;
+	int flow = 0;
+	u32 p;
+
+	p = readl(ce->base + CE_ISR);
+	for (flow = 0; flow < MAXFLOW; flow++) {
+		if (p & (BIT(flow))) {
+			writel(BIT(flow), ce->base + CE_ISR);
+			ce->chanlist[flow].status = 1;
+			complete(&ce->chanlist[flow].complete);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct sun8i_ce_alg_template ce_algs[] = {
+{
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.ce_algo_id = CE_ID_CIPHER_AES,
+	.ce_blockmode = CE_ID_OP_CBC,
+	.alg.skcipher = {
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "cbc-aes-sun8i-ce",
+			.cra_priority = 400,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 0xf,
+			.cra_init = sun8i_ce_cipher_init,
+			.cra_exit = sun8i_ce_cipher_exit,
+		},
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= sun8i_ce_aes_setkey,
+		.encrypt	= sun8i_ce_skencrypt,
+		.decrypt	= sun8i_ce_skdecrypt,
+	}
+},
+{
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.ce_algo_id = CE_ID_CIPHER_AES,
+	.ce_blockmode = CE_ID_OP_ECB,
+	.alg.skcipher = {
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "ecb-aes-sun8i-ce",
+			.cra_priority = 400,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 0xf,
+			.cra_init = sun8i_ce_cipher_init,
+			.cra_exit = sun8i_ce_cipher_exit,
+		},
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.setkey		= sun8i_ce_aes_setkey,
+		.encrypt	= sun8i_ce_skencrypt,
+		.decrypt	= sun8i_ce_skdecrypt,
+	}
+},
+{
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.ce_algo_id = CE_ID_CIPHER_DES3,
+	.ce_blockmode = CE_ID_OP_CBC,
+	.alg.skcipher = {
+		.base = {
+			.cra_name = "cbc(des3_ede)",
+			.cra_driver_name = "cbc-des3-sun8i-ce",
+			.cra_priority = 400,
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 0xf,
+			.cra_init = sun8i_ce_cipher_init,
+			.cra_exit = sun8i_ce_cipher_exit,
+		},
+		.min_keysize	= DES3_EDE_KEY_SIZE,
+		.max_keysize	= DES3_EDE_KEY_SIZE,
+		.ivsize		= DES3_EDE_BLOCK_SIZE,
+		.setkey		= sun8i_ce_des3_setkey,
+		.encrypt	= sun8i_ce_skencrypt,
+		.decrypt	= sun8i_ce_skdecrypt,
+	}
+},
+{
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.ce_algo_id = CE_ID_CIPHER_DES3,
+	.ce_blockmode = CE_ID_OP_ECB,
+	.alg.skcipher = {
+		.base = {
+			.cra_name = "ecb(des3_ede)",
+			.cra_driver_name = "ecb-des3-sun8i-ce",
+			.cra_priority = 400,
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 0xf,
+			.cra_init = sun8i_ce_cipher_init,
+			.cra_exit = sun8i_ce_cipher_exit,
+		},
+		.min_keysize	= DES3_EDE_KEY_SIZE,
+		.max_keysize	= DES3_EDE_KEY_SIZE,
+		.setkey		= sun8i_ce_des3_setkey,
+		.encrypt	= sun8i_ce_skencrypt,
+		.decrypt	= sun8i_ce_skdecrypt,
+	}
+},
+};
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+static int sun8i_ce_dbgfs_read(struct seq_file *seq, void *v)
+{
+	struct sun8i_ce_dev *ce = seq->private;
+	int i;
+
+	for (i = 0; i < MAXFLOW; i++)
+		seq_printf(seq, "Channel %d: nreq %lu\n", i, ce->chanlist[i].stat_req);
+
+	for (i = 0; i < ARRAY_SIZE(ce_algs); i++) {
+		if (!ce_algs[i].ce)
+			continue;
+		switch (ce_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			seq_printf(seq, "%s %s %lu %lu\n",
+				   ce_algs[i].alg.skcipher.base.cra_driver_name,
+				   ce_algs[i].alg.skcipher.base.cra_name,
+				   ce_algs[i].stat_req, ce_algs[i].stat_fb);
+			break;
+		}
+	}
+	return 0;
+}
+
+static int sun8i_ce_dbgfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sun8i_ce_dbgfs_read, inode->i_private);
+}
+
+static const struct file_operations sun8i_ce_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = sun8i_ce_dbgfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+#endif
+
+static void sun8i_ce_free_chanlist(struct sun8i_ce_dev *ce, int i)
+{
+	while (i >= 0) {
+		crypto_engine_exit(ce->chanlist[i].engine);
+		if (ce->chanlist[i].tl)
+			dma_free_coherent(ce->dev, sizeof(struct ce_task),
+					  ce->chanlist[i].tl,
+					  ce->chanlist[i].t_phy);
+		i--;
+	}
+}
+
+/*
+ * Allocate the channel list structure
+ */
+static int sun8i_ce_allocate_chanlist(struct sun8i_ce_dev *ce)
+{
+	int i, err;
+
+	ce->chanlist = devm_kcalloc(ce->dev, MAXFLOW,
+				    sizeof(struct sun8i_ce_flow), GFP_KERNEL);
+	if (!ce->chanlist)
+		return -ENOMEM;
+
+	for (i = 0; i < MAXFLOW; i++) {
+		init_completion(&ce->chanlist[i].complete);
+
+		ce->chanlist[i].engine = crypto_engine_alloc_init(ce->dev, true);
+		if (!ce->chanlist[i].engine) {
+			dev_err(ce->dev, "Cannot allocate engine\n");
+			i--;
+			err = -ENOMEM;
+			goto error_engine;
+		}
+		err = crypto_engine_start(ce->chanlist[i].engine);
+		if (err) {
+			dev_err(ce->dev, "Cannot start engine\n");
+			goto error_engine;
+		}
+		ce->chanlist[i].tl = dma_alloc_coherent(ce->dev,
+							sizeof(struct ce_task),
+							&ce->chanlist[i].t_phy,
+							GFP_KERNEL);
+		if (!ce->chanlist[i].tl) {
+			dev_err(ce->dev, "Cannot get DMA memory for task %d\n",
+				i);
+			err = -ENOMEM;
+			goto error_engine;
+		}
+	}
+	return 0;
+error_engine:
+	sun8i_ce_free_chanlist(ce, i);
+	return err;
+}
+
+/*
+ * Power management strategy: The device is suspended unless a TFM exists for
+ * one of the algorithms proposed by this driver.
+ */
+static int sun8i_ce_pm_suspend(struct device *dev)
+{
+	struct sun8i_ce_dev *ce = dev_get_drvdata(dev);
+	int i;
+
+	reset_control_assert(ce->reset);
+	for (i = 0; i < CE_MAX_CLOCKS; i++)
+		clk_disable_unprepare(ce->ceclks[i]);
+	return 0;
+}
+
+static int sun8i_ce_pm_resume(struct device *dev)
+{
+	struct sun8i_ce_dev *ce = dev_get_drvdata(dev);
+	int err, i;
+
+	for (i = 0; i < CE_MAX_CLOCKS; i++) {
+		if (!ce->variant->ce_clks[i].name)
+			continue;
+		err = clk_prepare_enable(ce->ceclks[i]);
+		if (err) {
+			dev_err(ce->dev, "Cannot prepare_enable %s\n",
+				ce->variant->ce_clks[i].name);
+			goto error;
+		}
+	}
+	err = reset_control_deassert(ce->reset);
+	if (err) {
+		dev_err(ce->dev, "Cannot deassert reset control\n");
+		goto error;
+	}
+	return 0;
+error:
+	sun8i_ce_pm_suspend(dev);
+	return err;
+}
+
+static const struct dev_pm_ops sun8i_ce_pm_ops = {
+	SET_RUNTIME_PM_OPS(sun8i_ce_pm_suspend, sun8i_ce_pm_resume, NULL)
+};
+
+static int sun8i_ce_pm_init(struct sun8i_ce_dev *ce)
+{
+	int err;
+
+	pm_runtime_use_autosuspend(ce->dev);
+	pm_runtime_set_autosuspend_delay(ce->dev, 2000);
+
+	err = pm_runtime_set_suspended(ce->dev);
+	if (err)
+		return err;
+	pm_runtime_enable(ce->dev);
+	return err;
+}
+
+static void sun8i_ce_pm_exit(struct sun8i_ce_dev *ce)
+{
+	pm_runtime_disable(ce->dev);
+}
+
+static int sun8i_ce_get_clks(struct sun8i_ce_dev *ce)
+{
+	unsigned long cr;
+	int err, i;
+
+	for (i = 0; i < CE_MAX_CLOCKS; i++) {
+		if (!ce->variant->ce_clks[i].name)
+			continue;
+		ce->ceclks[i] = devm_clk_get(ce->dev, ce->variant->ce_clks[i].name);
+		if (IS_ERR(ce->ceclks[i])) {
+			err = PTR_ERR(ce->ceclks[i]);
+			dev_err(ce->dev, "Cannot get %s CE clock err=%d\n",
+				ce->variant->ce_clks[i].name, err);
+			return err;
+		}
+		cr = clk_get_rate(ce->ceclks[i]);
+		if (!cr)
+			return -EINVAL;
+		if (ce->variant->ce_clks[i].freq > 0 &&
+		    cr != ce->variant->ce_clks[i].freq) {
+			dev_info(ce->dev, "Set %s clock to %lu (%lu Mhz) from %lu (%lu Mhz)\n",
+				 ce->variant->ce_clks[i].name,
+				 ce->variant->ce_clks[i].freq,
+				 ce->variant->ce_clks[i].freq / 1000000,
+				 cr, cr / 1000000);
+			err = clk_set_rate(ce->ceclks[i], ce->variant->ce_clks[i].freq);
+			if (err)
+				dev_err(ce->dev, "Fail to set %s clk speed to %lu hz\n",
+					ce->variant->ce_clks[i].name,
+					ce->variant->ce_clks[i].freq);
+		}
+		if (ce->variant->ce_clks[i].max_freq > 0 &&
+		    cr > ce->variant->ce_clks[i].max_freq)
+			dev_warn(ce->dev, "Frequency for %s (%lu hz) is higher than datasheet's recommendation (%lu hz)",
+				 ce->variant->ce_clks[i].name, cr,
+				 ce->variant->ce_clks[i].max_freq);
+	}
+	return 0;
+}
+
+static int sun8i_ce_register_algs(struct sun8i_ce_dev *ce)
+{
+	int ce_method, err, id, i;
+
+	for (i = 0; i < ARRAY_SIZE(ce_algs); i++) {
+		ce_algs[i].ce = ce;
+		switch (ce_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			id = ce_algs[i].ce_algo_id;
+			ce_method = ce->variant->alg_cipher[id];
+			if (ce_method == CE_ID_NOTSUPP) {
+				dev_dbg(ce->dev,
+					"DEBUG: Algo of %s not supported\n",
+					ce_algs[i].alg.skcipher.base.cra_name);
+				ce_algs[i].ce = NULL;
+				break;
+			}
+			id = ce_algs[i].ce_blockmode;
+			ce_method = ce->variant->op_mode[id];
+			if (ce_method == CE_ID_NOTSUPP) {
+				dev_dbg(ce->dev, "DEBUG: Blockmode of %s not supported\n",
+					ce_algs[i].alg.skcipher.base.cra_name);
+				ce_algs[i].ce = NULL;
+				break;
+			}
+			dev_info(ce->dev, "Register %s\n",
+				 ce_algs[i].alg.skcipher.base.cra_name);
+			err = crypto_register_skcipher(&ce_algs[i].alg.skcipher);
+			if (err) {
+				dev_err(ce->dev, "ERROR: Fail to register %s\n",
+					ce_algs[i].alg.skcipher.base.cra_name);
+				ce_algs[i].ce = NULL;
+				return err;
+			}
+			break;
+		default:
+			ce_algs[i].ce = NULL;
+			dev_err(ce->dev, "ERROR: tried to register an unknown algo\n");
+		}
+	}
+	return 0;
+}
+
+static void sun8i_ce_unregister_algs(struct sun8i_ce_dev *ce)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ce_algs); i++) {
+		if (!ce_algs[i].ce)
+			continue;
+		switch (ce_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			dev_info(ce->dev, "Unregister %d %s\n", i,
+				 ce_algs[i].alg.skcipher.base.cra_name);
+			crypto_unregister_skcipher(&ce_algs[i].alg.skcipher);
+			break;
+		}
+	}
+}
+
+static int sun8i_ce_probe(struct platform_device *pdev)
+{
+	struct sun8i_ce_dev *ce;
+	int err, irq;
+	u32 v;
+
+	ce = devm_kzalloc(&pdev->dev, sizeof(*ce), GFP_KERNEL);
+	if (!ce)
+		return -ENOMEM;
+
+	ce->dev = &pdev->dev;
+	platform_set_drvdata(pdev, ce);
+
+	ce->variant = of_device_get_match_data(&pdev->dev);
+	if (!ce->variant) {
+		dev_err(&pdev->dev, "Missing Crypto Engine variant\n");
+		return -EINVAL;
+	}
+
+	ce->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(ce->base))
+		return PTR_ERR(ce->base);
+
+	err = sun8i_ce_get_clks(ce);
+	if (err)
+		return err;
+
+	/* Get Non Secure IRQ */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(ce->dev, "Cannot get CryptoEngine Non-secure IRQ\n");
+		return irq;
+	}
+
+	ce->reset = devm_reset_control_get(&pdev->dev, NULL);
+	if (IS_ERR(ce->reset)) {
+		if (PTR_ERR(ce->reset) == -EPROBE_DEFER)
+			return PTR_ERR(ce->reset);
+		dev_err(&pdev->dev, "No reset control found\n");
+		return PTR_ERR(ce->reset);
+	}
+
+	mutex_init(&ce->mlock);
+
+	err = sun8i_ce_allocate_chanlist(ce);
+	if (err)
+		return err;
+
+	err = sun8i_ce_pm_init(ce);
+	if (err)
+		goto error_pm;
+
+	err = devm_request_irq(&pdev->dev, irq, ce_irq_handler, 0,
+			       "sun8i-ce-ns", ce);
+	if (err) {
+		dev_err(ce->dev, "Cannot request CryptoEngine Non-secure IRQ (err=%d)\n", err);
+		goto error_irq;
+	}
+
+	err = sun8i_ce_register_algs(ce);
+	if (err)
+		goto error_alg;
+
+	err = pm_runtime_get_sync(ce->dev);
+	if (err < 0)
+		goto error_alg;
+
+	v = readl(ce->base + CE_CTR);
+	v >>= CE_DIE_ID_SHIFT;
+	v &= CE_DIE_ID_MASK;
+	dev_info(&pdev->dev, "CryptoEngine Die ID %x\n", v);
+
+	pm_runtime_put_sync(ce->dev);
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+	/* Ignore error of debugfs */
+	ce->dbgfs_dir = debugfs_create_dir("sun8i-ce", NULL);
+	ce->dbgfs_stats = debugfs_create_file("stats", 0444,
+					      ce->dbgfs_dir, ce,
+					      &sun8i_ce_debugfs_fops);
+#endif
+
+	return 0;
+error_alg:
+	sun8i_ce_unregister_algs(ce);
+error_irq:
+	sun8i_ce_pm_exit(ce);
+error_pm:
+	sun8i_ce_free_chanlist(ce, MAXFLOW - 1);
+	return err;
+}
+
+static int sun8i_ce_remove(struct platform_device *pdev)
+{
+	struct sun8i_ce_dev *ce = platform_get_drvdata(pdev);
+
+	sun8i_ce_unregister_algs(ce);
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+	debugfs_remove_recursive(ce->dbgfs_dir);
+#endif
+
+	sun8i_ce_free_chanlist(ce, MAXFLOW - 1);
+
+	sun8i_ce_pm_exit(ce);
+	return 0;
+}
+
+static const struct of_device_id sun8i_ce_crypto_of_match_table[] = {
+	{ .compatible = "allwinner,sun8i-h3-crypto",
+	  .data = &ce_h3_variant },
+	{ .compatible = "allwinner,sun8i-r40-crypto",
+	  .data = &ce_r40_variant },
+	{ .compatible = "allwinner,sun50i-a64-crypto",
+	  .data = &ce_a64_variant },
+	{ .compatible = "allwinner,sun50i-h5-crypto",
+	  .data = &ce_h5_variant },
+	{ .compatible = "allwinner,sun50i-h6-crypto",
+	  .data = &ce_h6_variant },
+	{}
+};
+MODULE_DEVICE_TABLE(of, sun8i_ce_crypto_of_match_table);
+
+static struct platform_driver sun8i_ce_driver = {
+	.probe		 = sun8i_ce_probe,
+	.remove		 = sun8i_ce_remove,
+	.driver		 = {
+		.name		= "sun8i-ce",
+		.pm		= &sun8i_ce_pm_ops,
+		.of_match_table	= sun8i_ce_crypto_of_match_table,
+	},
+};
+
+module_platform_driver(sun8i_ce_driver);
+
+MODULE_DESCRIPTION("Allwinner Crypto Engine cryptographic offloader");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Corentin Labbe <clabbe.montjoie@gmail.com>");
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
new file mode 100644
index 000000000000..8f8404c84a4d
--- /dev/null
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sun8i-ce.h - hardware cryptographic offloader for
+ * Allwinner H3/A64/H5/H2+/H6 SoC
+ *
+ * Copyright (C) 2016-2019 Corentin LABBE <clabbe.montjoie@gmail.com>
+ */
+#include <crypto/aes.h>
+#include <crypto/des.h>
+#include <crypto/engine.h>
+#include <crypto/skcipher.h>
+#include <linux/atomic.h>
+#include <linux/debugfs.h>
+#include <linux/crypto.h>
+
+/* CE Registers */
+#define CE_TDQ	0x00
+#define CE_CTR	0x04
+#define CE_ICR	0x08
+#define CE_ISR	0x0C
+#define CE_TLR	0x10
+#define CE_TSR	0x14
+#define CE_ESR	0x18
+#define CE_CSSGR	0x1C
+#define CE_CDSGR	0x20
+#define CE_CSAR	0x24
+#define CE_CDAR	0x28
+#define CE_TPR	0x2C
+
+/* Used in struct ce_task */
+/* ce_task common */
+#define CE_ENCRYPTION		0
+#define CE_DECRYPTION		BIT(8)
+
+#define CE_COMM_INT		BIT(31)
+
+/* ce_task symmetric */
+#define CE_AES_128BITS 0
+#define CE_AES_192BITS 1
+#define CE_AES_256BITS 2
+
+#define CE_OP_ECB	0
+#define CE_OP_CBC	(1 << 8)
+
+#define CE_ALG_AES		0
+#define CE_ALG_DES		1
+#define CE_ALG_3DES		2
+
+/* Used in ce_variant */
+#define CE_ID_NOTSUPP		0xFF
+
+#define CE_ID_CIPHER_AES	0
+#define CE_ID_CIPHER_DES	1
+#define CE_ID_CIPHER_DES3	2
+#define CE_ID_CIPHER_MAX	3
+
+#define CE_ID_OP_ECB	0
+#define CE_ID_OP_CBC	1
+#define CE_ID_OP_MAX	2
+
+/* Used in CE registers */
+#define CE_ERR_ALGO_NOTSUP	BIT(0)
+#define CE_ERR_DATALEN		BIT(1)
+#define CE_ERR_KEYSRAM		BIT(2)
+#define CE_ERR_ADDR_INVALID	BIT(5)
+#define CE_ERR_KEYLADDER	BIT(6)
+
+#define CE_DIE_ID_SHIFT	16
+#define CE_DIE_ID_MASK	0x07
+
+#define MAX_SG 8
+
+#define CE_MAX_CLOCKS 3
+
+#define MAXFLOW 4
+
+/*
+ * struct ce_clock - Describe clocks used by sun8i-ce
+ * @name:	Name of clock needed by this variant
+ * @freq:	Frequency to set for each clock
+ * @max_freq:	Maximum frequency for each clock (generally given by datasheet)
+ */
+struct ce_clock {
+	const char *name;
+	unsigned long freq;
+	unsigned long max_freq;
+};
+
+/*
+ * struct ce_variant - Describe CE capability for each variant hardware
+ * @alg_cipher:	list of supported ciphers. for each CE_ID_ this will give the
+ *              coresponding CE_ALG_XXX value
+ * @op_mode:	list of supported block modes
+ * @has_t_dlen_in_bytes:	Does the request size for cipher is in
+ *				bytes or words
+ * @ce_clks:	list of clocks needed by this variant
+ */
+struct ce_variant {
+	char alg_cipher[CE_ID_CIPHER_MAX];
+	u32 op_mode[CE_ID_OP_MAX];
+	bool has_t_dlen_in_bytes;
+	struct ce_clock ce_clks[CE_MAX_CLOCKS];
+};
+
+struct sginfo {
+	__le32 addr;
+	__le32 len;
+} __packed;
+
+/*
+ * struct ce_task - CE Task descriptor
+ * The structure of this descriptor could be found in the datasheet
+ */
+struct ce_task {
+	__le32 t_id;
+	__le32 t_common_ctl;
+	__le32 t_sym_ctl;
+	__le32 t_asym_ctl;
+	__le32 t_key;
+	__le32 t_iv;
+	__le32 t_ctr;
+	__le32 t_dlen;
+	struct sginfo t_src[MAX_SG];
+	struct sginfo t_dst[MAX_SG];
+	__le32 next;
+	__le32 reserved[3];
+} __packed __aligned(8);
+
+/*
+ * struct sun8i_ce_flow - Information used by each flow
+ * @engine:	ptr to the crypto_engine for this flow
+ * @bounce_iv:	buffer which contain the IV
+ * @ivlen:	size of bounce_iv
+ * @complete:	completion for the current task on this flow
+ * @status:	set to 1 by interrupt if task is done
+ * @t_phy:	Physical address of task
+ * @tl:		pointer to the current ce_task for this flow
+ * @stat_req:	number of request done by this flow
+ */
+struct sun8i_ce_flow {
+	struct crypto_engine *engine;
+	void *bounce_iv;
+	unsigned int ivlen;
+	struct completion complete;
+	int status;
+	dma_addr_t t_phy;
+	int timeout;
+	struct ce_task *tl;
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+	unsigned long stat_req;
+#endif
+};
+
+/*
+ * struct sun8i_ce_dev - main container for all this driver information
+ * @base:	base address of CE
+ * @ceclks:	clocks used by CE
+ * @reset:	pointer to reset controller
+ * @dev:	the platform device
+ * @mlock:	Control access to device registers
+ * @chanlist:	array of all flow
+ * @flow:	flow to use in next request
+ * @variant:	pointer to variant specific data
+ * @dbgfs_dir:	Debugfs dentry for statistic directory
+ * @dbgfs_stats: Debugfs dentry for statistic counters
+ */
+struct sun8i_ce_dev {
+	void __iomem *base;
+	struct clk *ceclks[CE_MAX_CLOCKS];
+	struct reset_control *reset;
+	struct device *dev;
+	struct mutex mlock;
+	struct sun8i_ce_flow *chanlist;
+	atomic_t flow;
+	const struct ce_variant *variant;
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+	struct dentry *dbgfs_dir;
+	struct dentry *dbgfs_stats;
+#endif
+};
+
+/*
+ * struct sun8i_cipher_req_ctx - context for a skcipher request
+ * @op_dir:	direction (encrypt vs decrypt) for this request
+ * @flow:	the flow to use for this request
+ */
+struct sun8i_cipher_req_ctx {
+	u32 op_dir;
+	int flow;
+};
+
+/*
+ * struct sun8i_cipher_tfm_ctx - context for a skcipher TFM
+ * @enginectx:		crypto_engine used by this TFM
+ * @key:		pointer to key data
+ * @keylen:		len of the key
+ * @ce:			pointer to the private data of driver handling this TFM
+ * @fallback_tfm:	pointer to the fallback TFM
+ */
+struct sun8i_cipher_tfm_ctx {
+	struct crypto_engine_ctx enginectx;
+	u32 *key;
+	u32 keylen;
+	struct sun8i_ce_dev *ce;
+	struct crypto_sync_skcipher *fallback_tfm;
+};
+
+/*
+ * struct sun8i_ce_alg_template - crypto_alg template
+ * @type:		the CRYPTO_ALG_TYPE for this template
+ * @ce_algo_id:		the CE_ID for this template
+ * @ce_blockmode:	the type of block operation CE_ID
+ * @ce:			pointer to the sun8i_ce_dev structure associated with
+ *			this template
+ * @alg:		one of sub struct must be used
+ * @stat_req:		number of request done on this template
+ * @stat_fb:		total of all data len done on this template
+ */
+struct sun8i_ce_alg_template {
+	u32 type;
+	u32 ce_algo_id;
+	u32 ce_blockmode;
+	struct sun8i_ce_dev *ce;
+	union {
+		struct skcipher_alg skcipher;
+	} alg;
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+	unsigned long stat_req;
+	unsigned long stat_fb;
+#endif
+};
+
+int sun8i_ce_enqueue(struct crypto_async_request *areq, u32 type);
+
+int sun8i_ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			unsigned int keylen);
+int sun8i_ce_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			 unsigned int keylen);
+int sun8i_ce_cipher_init(struct crypto_tfm *tfm);
+void sun8i_ce_cipher_exit(struct crypto_tfm *tfm);
+int sun8i_ce_skdecrypt(struct skcipher_request *areq);
+int sun8i_ce_skencrypt(struct skcipher_request *areq);
+
+int sun8i_ce_get_engine_number(struct sun8i_ce_dev *ce);
+
+int sun8i_ce_run_task(struct sun8i_ce_dev *ce, int flow, const char *name);
diff --git a/drivers/crypto/allwinner/sun8i-ss/Makefile b/drivers/crypto/allwinner/sun8i-ss/Makefile
new file mode 100644
index 000000000000..add7b0543fd5
--- /dev/null
+++ b/drivers/crypto/allwinner/sun8i-ss/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_SUN8I_SS) += sun8i-ss.o
+sun8i-ss-y += sun8i-ss-core.o sun8i-ss-cipher.o
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
new file mode 100644
index 000000000000..84d52fc3a2da
--- /dev/null
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sun8i-ss-cipher.c - hardware cryptographic offloader for
+ * Allwinner A80/A83T SoC
+ *
+ * Copyright (C) 2016-2019 Corentin LABBE <clabbe.montjoie@gmail.com>
+ *
+ * This file add support for AES cipher with 128,192,256 bits keysize in
+ * CBC and ECB mode.
+ *
+ * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ */
+
+#include <linux/crypto.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/pm_runtime.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/skcipher.h>
+#include "sun8i-ss.h"
+
+static bool sun8i_ss_need_fallback(struct skcipher_request *areq)
+{
+	struct scatterlist *in_sg = areq->src;
+	struct scatterlist *out_sg = areq->dst;
+	struct scatterlist *sg;
+
+	if (areq->cryptlen == 0 || areq->cryptlen % 16)
+		return true;
+
+	if (sg_nents(areq->src) > 8 || sg_nents(areq->dst) > 8)
+		return true;
+
+	sg = areq->src;
+	while (sg) {
+		if ((sg->length % 16) != 0)
+			return true;
+		if ((sg_dma_len(sg) % 16) != 0)
+			return true;
+		if (!IS_ALIGNED(sg->offset, 16))
+			return true;
+		sg = sg_next(sg);
+	}
+	sg = areq->dst;
+	while (sg) {
+		if ((sg->length % 16) != 0)
+			return true;
+		if ((sg_dma_len(sg) % 16) != 0)
+			return true;
+		if (!IS_ALIGNED(sg->offset, 16))
+			return true;
+		sg = sg_next(sg);
+	}
+
+	/* SS need same numbers of SG (with same length) for source and destination */
+	in_sg = areq->src;
+	out_sg = areq->dst;
+	while (in_sg && out_sg) {
+		if (in_sg->length != out_sg->length)
+			return true;
+		in_sg = sg_next(in_sg);
+		out_sg = sg_next(out_sg);
+	}
+	if (in_sg || out_sg)
+		return true;
+	return false;
+}
+
+static int sun8i_ss_cipher_fallback(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	int err;
+
+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, op->fallback_tfm);
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct sun8i_ss_alg_template *algt;
+
+	algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
+	algt->stat_fb++;
+#endif
+	skcipher_request_set_sync_tfm(subreq, op->fallback_tfm);
+	skcipher_request_set_callback(subreq, areq->base.flags, NULL, NULL);
+	skcipher_request_set_crypt(subreq, areq->src, areq->dst,
+				   areq->cryptlen, areq->iv);
+	if (rctx->op_dir & SS_DECRYPTION)
+		err = crypto_skcipher_decrypt(subreq);
+	else
+		err = crypto_skcipher_encrypt(subreq);
+	skcipher_request_zero(subreq);
+	return err;
+}
+
+static int sun8i_ss_cipher(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun8i_ss_dev *ss = op->ss;
+	struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct sun8i_ss_alg_template *algt;
+	struct scatterlist *sg;
+	unsigned int todo, len, offset, ivsize;
+	void *backup_iv = NULL;
+	int nr_sgs = 0;
+	int nr_sgd = 0;
+	int err = 0;
+	int i;
+
+	algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
+
+	dev_dbg(ss->dev, "%s %s %u %x IV(%p %u) key=%u\n", __func__,
+		crypto_tfm_alg_name(areq->base.tfm),
+		areq->cryptlen,
+		rctx->op_dir, areq->iv, crypto_skcipher_ivsize(tfm),
+		op->keylen);
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+	algt->stat_req++;
+#endif
+
+	rctx->op_mode = ss->variant->op_mode[algt->ss_blockmode];
+	rctx->method = ss->variant->alg_cipher[algt->ss_algo_id];
+	rctx->keylen = op->keylen;
+
+	rctx->p_key = dma_map_single(ss->dev, op->key, op->keylen, DMA_TO_DEVICE);
+	if (dma_mapping_error(ss->dev, rctx->p_key)) {
+		dev_err(ss->dev, "Cannot DMA MAP KEY\n");
+		err = -EFAULT;
+		goto theend;
+	}
+
+	ivsize = crypto_skcipher_ivsize(tfm);
+	if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
+		rctx->ivlen = ivsize;
+		rctx->biv = kzalloc(ivsize, GFP_KERNEL | GFP_DMA);
+		if (!rctx->biv) {
+			err = -ENOMEM;
+			goto theend_key;
+		}
+		if (rctx->op_dir & SS_DECRYPTION) {
+			backup_iv = kzalloc(ivsize, GFP_KERNEL);
+			if (!backup_iv) {
+				err = -ENOMEM;
+				goto theend_key;
+			}
+			offset = areq->cryptlen - ivsize;
+			scatterwalk_map_and_copy(backup_iv, areq->src, offset,
+						 ivsize, 0);
+		}
+		memcpy(rctx->biv, areq->iv, ivsize);
+		rctx->p_iv = dma_map_single(ss->dev, rctx->biv, rctx->ivlen,
+					    DMA_TO_DEVICE);
+		if (dma_mapping_error(ss->dev, rctx->p_iv)) {
+			dev_err(ss->dev, "Cannot DMA MAP IV\n");
+			err = -ENOMEM;
+			goto theend_iv;
+		}
+	}
+	if (areq->src == areq->dst) {
+		nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src),
+				    DMA_BIDIRECTIONAL);
+		if (nr_sgs <= 0 || nr_sgs > 8) {
+			dev_err(ss->dev, "Invalid sg number %d\n", nr_sgs);
+			err = -EINVAL;
+			goto theend_iv;
+		}
+		nr_sgd = nr_sgs;
+	} else {
+		nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src),
+				    DMA_TO_DEVICE);
+		if (nr_sgs <= 0 || nr_sgs > 8) {
+			dev_err(ss->dev, "Invalid sg number %d\n", nr_sgs);
+			err = -EINVAL;
+			goto theend_iv;
+		}
+		nr_sgd = dma_map_sg(ss->dev, areq->dst, sg_nents(areq->dst),
+				    DMA_FROM_DEVICE);
+		if (nr_sgd <= 0 || nr_sgd > 8) {
+			dev_err(ss->dev, "Invalid sg number %d\n", nr_sgd);
+			err = -EINVAL;
+			goto theend_sgs;
+		}
+	}
+
+	len = areq->cryptlen;
+	i = 0;
+	sg = areq->src;
+	while (i < nr_sgs && sg && len) {
+		if (sg_dma_len(sg) == 0)
+			goto sgs_next;
+		rctx->t_src[i].addr = sg_dma_address(sg);
+		todo = min(len, sg_dma_len(sg));
+		rctx->t_src[i].len = todo / 4;
+		dev_dbg(ss->dev, "%s total=%u SGS(%d %u off=%d) todo=%u\n", __func__,
+			areq->cryptlen, i, rctx->t_src[i].len, sg->offset, todo);
+		len -= todo;
+		i++;
+sgs_next:
+		sg = sg_next(sg);
+	}
+	if (len > 0) {
+		dev_err(ss->dev, "remaining len %d\n", len);
+		err = -EINVAL;
+		goto theend_sgs;
+	}
+
+	len = areq->cryptlen;
+	i = 0;
+	sg = areq->dst;
+	while (i < nr_sgd && sg && len) {
+		if (sg_dma_len(sg) == 0)
+			goto sgd_next;
+		rctx->t_dst[i].addr = sg_dma_address(sg);
+		todo = min(len, sg_dma_len(sg));
+		rctx->t_dst[i].len = todo / 4;
+		dev_dbg(ss->dev, "%s total=%u SGD(%d %u off=%d) todo=%u\n", __func__,
+			areq->cryptlen, i, rctx->t_dst[i].len, sg->offset, todo);
+		len -= todo;
+		i++;
+sgd_next:
+		sg = sg_next(sg);
+	}
+	if (len > 0) {
+		dev_err(ss->dev, "remaining len %d\n", len);
+		err = -EINVAL;
+		goto theend_sgs;
+	}
+
+	err = sun8i_ss_run_task(ss, rctx, crypto_tfm_alg_name(areq->base.tfm));
+
+theend_sgs:
+	if (areq->src == areq->dst) {
+		dma_unmap_sg(ss->dev, areq->src, nr_sgs, DMA_BIDIRECTIONAL);
+	} else {
+		dma_unmap_sg(ss->dev, areq->src, nr_sgs, DMA_TO_DEVICE);
+		dma_unmap_sg(ss->dev, areq->dst, nr_sgd, DMA_FROM_DEVICE);
+	}
+
+theend_iv:
+	if (rctx->p_iv)
+		dma_unmap_single(ss->dev, rctx->p_iv, rctx->ivlen,
+				 DMA_TO_DEVICE);
+
+	if (areq->iv && ivsize > 0) {
+		if (rctx->biv) {
+			offset = areq->cryptlen - ivsize;
+			if (rctx->op_dir & SS_DECRYPTION) {
+				memcpy(areq->iv, backup_iv, ivsize);
+				memzero_explicit(backup_iv, ivsize);
+				kzfree(backup_iv);
+			} else {
+				scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
+							 ivsize, 0);
+			}
+			kfree(rctx->biv);
+		}
+	}
+
+theend_key:
+	dma_unmap_single(ss->dev, rctx->p_key, op->keylen, DMA_TO_DEVICE);
+
+theend:
+
+	return err;
+}
+
+static int sun8i_ss_handle_cipher_request(struct crypto_engine *engine, void *areq)
+{
+	int err;
+	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
+
+	err = sun8i_ss_cipher(breq);
+	crypto_finalize_skcipher_request(engine, breq, err);
+
+	return 0;
+}
+
+int sun8i_ss_skdecrypt(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct crypto_engine *engine;
+	int e;
+
+	memset(rctx, 0, sizeof(struct sun8i_cipher_req_ctx));
+	rctx->op_dir = SS_DECRYPTION;
+
+	if (sun8i_ss_need_fallback(areq))
+		return sun8i_ss_cipher_fallback(areq);
+
+	e = sun8i_ss_get_engine_number(op->ss);
+	engine = op->ss->flows[e].engine;
+	rctx->flow = e;
+
+	return crypto_transfer_skcipher_request_to_engine(engine, areq);
+}
+
+int sun8i_ss_skencrypt(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct crypto_engine *engine;
+	int e;
+
+	memset(rctx, 0, sizeof(struct sun8i_cipher_req_ctx));
+	rctx->op_dir = SS_ENCRYPTION;
+
+	if (sun8i_ss_need_fallback(areq))
+		return sun8i_ss_cipher_fallback(areq);
+
+	e = sun8i_ss_get_engine_number(op->ss);
+	engine = op->ss->flows[e].engine;
+	rctx->flow = e;
+
+	return crypto_transfer_skcipher_request_to_engine(engine, areq);
+}
+
+int sun8i_ss_cipher_init(struct crypto_tfm *tfm)
+{
+	struct sun8i_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm);
+	struct sun8i_ss_alg_template *algt;
+	const char *name = crypto_tfm_alg_name(tfm);
+	struct crypto_skcipher *sktfm = __crypto_skcipher_cast(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(sktfm);
+	int err;
+
+	memset(op, 0, sizeof(struct sun8i_cipher_tfm_ctx));
+
+	algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
+	op->ss = algt->ss;
+
+	sktfm->reqsize = sizeof(struct sun8i_cipher_req_ctx);
+
+	op->fallback_tfm = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(op->fallback_tfm)) {
+		dev_err(op->ss->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
+			name, PTR_ERR(op->fallback_tfm));
+		return PTR_ERR(op->fallback_tfm);
+	}
+
+	dev_info(op->ss->dev, "Fallback for %s is %s\n",
+		 crypto_tfm_alg_driver_name(&sktfm->base),
+		 crypto_tfm_alg_driver_name(crypto_skcipher_tfm(&op->fallback_tfm->base)));
+
+	op->enginectx.op.do_one_request = sun8i_ss_handle_cipher_request;
+	op->enginectx.op.prepare_request = NULL;
+	op->enginectx.op.unprepare_request = NULL;
+
+	err = pm_runtime_get_sync(op->ss->dev);
+	if (err < 0) {
+		dev_err(op->ss->dev, "pm error %d\n", err);
+		goto error_pm;
+	}
+
+	return 0;
+error_pm:
+	crypto_free_sync_skcipher(op->fallback_tfm);
+	return err;
+}
+
+void sun8i_ss_cipher_exit(struct crypto_tfm *tfm)
+{
+	struct sun8i_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm);
+
+	if (op->key) {
+		memzero_explicit(op->key, op->keylen);
+		kfree(op->key);
+	}
+	crypto_free_sync_skcipher(op->fallback_tfm);
+	pm_runtime_put_sync(op->ss->dev);
+}
+
+int sun8i_ss_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			unsigned int keylen)
+{
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun8i_ss_dev *ss = op->ss;
+
+	switch (keylen) {
+	case 128 / 8:
+		break;
+	case 192 / 8:
+		break;
+	case 256 / 8:
+		break;
+	default:
+		dev_dbg(ss->dev, "ERROR: Invalid keylen %u\n", keylen);
+		return -EINVAL;
+	}
+	if (op->key) {
+		memzero_explicit(op->key, op->keylen);
+		kfree(op->key);
+	}
+	op->keylen = keylen;
+	op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
+	if (!op->key)
+		return -ENOMEM;
+
+	crypto_sync_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+	crypto_sync_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+
+	return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+}
+
+int sun8i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			 unsigned int keylen)
+{
+	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun8i_ss_dev *ss = op->ss;
+
+	if (unlikely(keylen != 3 * DES_KEY_SIZE)) {
+		dev_dbg(ss->dev, "Invalid keylen %u\n", keylen);
+		return -EINVAL;
+	}
+
+	if (op->key) {
+		memzero_explicit(op->key, op->keylen);
+		kfree(op->key);
+	}
+	op->keylen = keylen;
+	op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
+	if (!op->key)
+		return -ENOMEM;
+
+	crypto_sync_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+	crypto_sync_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+
+	return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+}
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
new file mode 100644
index 000000000000..6b301afffd11
--- /dev/null
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
@@ -0,0 +1,642 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sun8i-ss-core.c - hardware cryptographic offloader for
+ * Allwinner A80/A83T SoC
+ *
+ * Copyright (C) 2015-2019 Corentin Labbe <clabbe.montjoie@gmail.com>
+ *
+ * Core file which registers crypto algorithms supported by the SecuritySystem
+ *
+ * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ */
+#include <linux/clk.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <crypto/internal/skcipher.h>
+
+#include "sun8i-ss.h"
+
+static const struct ss_variant ss_a80_variant = {
+	.alg_cipher = { SS_ALG_AES, SS_ALG_DES, SS_ALG_3DES,
+	},
+	.op_mode = { SS_OP_ECB, SS_OP_CBC,
+	},
+	.ss_clks = {
+		{ "bus", 0, 300 * 1000 * 1000 },
+		{ "mod", 0, 300 * 1000 * 1000 },
+	}
+};
+
+static const struct ss_variant ss_a83t_variant = {
+	.alg_cipher = { SS_ALG_AES, SS_ALG_DES, SS_ALG_3DES,
+	},
+	.op_mode = { SS_OP_ECB, SS_OP_CBC,
+	},
+	.ss_clks = {
+		{ "bus", 0, 300 * 1000 * 1000 },
+		{ "mod", 0, 300 * 1000 * 1000 },
+	}
+};
+
+/*
+ * sun8i_ss_get_engine_number() get the next channel slot
+ * This is a simple round-robin way of getting the next channel
+ */
+int sun8i_ss_get_engine_number(struct sun8i_ss_dev *ss)
+{
+	return atomic_inc_return(&ss->flow) % MAXFLOW;
+}
+
+int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx,
+		      const char *name)
+{
+	int flow = rctx->flow;
+	u32 v = 1;
+	int i;
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+	ss->flows[flow].stat_req++;
+#endif
+
+	/* choose between stream0/stream1 */
+	if (flow)
+		v |= SS_FLOW1;
+	else
+		v |= SS_FLOW0;
+
+	v |= rctx->op_mode;
+	v |= rctx->method;
+
+	if (rctx->op_dir)
+		v |= SS_DECRYPTION;
+
+	switch (rctx->keylen) {
+	case 128 / 8:
+		v |= SS_AES_128BITS << 7;
+		break;
+	case 192 / 8:
+		v |= SS_AES_192BITS << 7;
+		break;
+	case 256 / 8:
+		v |= SS_AES_256BITS << 7;
+		break;
+	}
+
+	for (i = 0; i < MAX_SG; i++) {
+		if (!rctx->t_dst[i].addr)
+			break;
+
+		mutex_lock(&ss->mlock);
+		writel(rctx->p_key, ss->base + SS_KEY_ADR_REG);
+
+		if (i == 0) {
+			if (rctx->p_iv)
+				writel(rctx->p_iv, ss->base + SS_IV_ADR_REG);
+		} else {
+			if (rctx->biv) {
+				if (rctx->op_dir == SS_ENCRYPTION)
+					writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG);
+				else
+					writel(rctx->t_src[i - 1].addr + rctx->t_src[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG);
+			}
+		}
+
+		dev_dbg(ss->dev,
+			"Processing SG %d on flow %d %s ctl=%x %d to %d method=%x opmode=%x opdir=%x srclen=%d\n",
+			i, flow, name, v,
+			rctx->t_src[i].len, rctx->t_dst[i].len,
+			rctx->method, rctx->op_mode,
+			rctx->op_dir, rctx->t_src[i].len);
+
+		writel(rctx->t_src[i].addr, ss->base + SS_SRC_ADR_REG);
+		writel(rctx->t_dst[i].addr, ss->base + SS_DST_ADR_REG);
+		writel(rctx->t_src[i].len, ss->base + SS_LEN_ADR_REG);
+
+		reinit_completion(&ss->flows[flow].complete);
+		ss->flows[flow].status = 0;
+		wmb();
+
+		writel(v, ss->base + SS_CTL_REG);
+		mutex_unlock(&ss->mlock);
+		wait_for_completion_interruptible_timeout(&ss->flows[flow].complete,
+							  msecs_to_jiffies(2000));
+		if (ss->flows[flow].status == 0) {
+			dev_err(ss->dev, "DMA timeout for %s\n", name);
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static irqreturn_t ss_irq_handler(int irq, void *data)
+{
+	struct sun8i_ss_dev *ss = (struct sun8i_ss_dev *)data;
+	int flow = 0;
+	u32 p;
+
+	p = readl(ss->base + SS_INT_STA_REG);
+	for (flow = 0; flow < MAXFLOW; flow++) {
+		if (p & (BIT(flow))) {
+			writel(BIT(flow), ss->base + SS_INT_STA_REG);
+			ss->flows[flow].status = 1;
+			complete(&ss->flows[flow].complete);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct sun8i_ss_alg_template ss_algs[] = {
+{
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.ss_algo_id = SS_ID_CIPHER_AES,
+	.ss_blockmode = SS_ID_OP_CBC,
+	.alg.skcipher = {
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "cbc-aes-sun8i-ss",
+			.cra_priority = 400,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 0xf,
+			.cra_init = sun8i_ss_cipher_init,
+			.cra_exit = sun8i_ss_cipher_exit,
+		},
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= sun8i_ss_aes_setkey,
+		.encrypt	= sun8i_ss_skencrypt,
+		.decrypt	= sun8i_ss_skdecrypt,
+	}
+},
+{
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.ss_algo_id = SS_ID_CIPHER_AES,
+	.ss_blockmode = SS_ID_OP_ECB,
+	.alg.skcipher = {
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "ecb-aes-sun8i-ss",
+			.cra_priority = 400,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 0xf,
+			.cra_init = sun8i_ss_cipher_init,
+			.cra_exit = sun8i_ss_cipher_exit,
+		},
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.setkey		= sun8i_ss_aes_setkey,
+		.encrypt	= sun8i_ss_skencrypt,
+		.decrypt	= sun8i_ss_skdecrypt,
+	}
+},
+{
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.ss_algo_id = SS_ID_CIPHER_DES3,
+	.ss_blockmode = SS_ID_OP_CBC,
+	.alg.skcipher = {
+		.base = {
+			.cra_name = "cbc(des3_ede)",
+			.cra_driver_name = "cbc-des3-sun8i-ss",
+			.cra_priority = 400,
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 0xf,
+			.cra_init = sun8i_ss_cipher_init,
+			.cra_exit = sun8i_ss_cipher_exit,
+		},
+		.min_keysize	= DES3_EDE_KEY_SIZE,
+		.max_keysize	= DES3_EDE_KEY_SIZE,
+		.ivsize		= DES3_EDE_BLOCK_SIZE,
+		.setkey		= sun8i_ss_des3_setkey,
+		.encrypt	= sun8i_ss_skencrypt,
+		.decrypt	= sun8i_ss_skdecrypt,
+	}
+},
+{
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.ss_algo_id = SS_ID_CIPHER_DES3,
+	.ss_blockmode = SS_ID_OP_ECB,
+	.alg.skcipher = {
+		.base = {
+			.cra_name = "ecb(des3_ede)",
+			.cra_driver_name = "ecb-des3-sun8i-ss",
+			.cra_priority = 400,
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 0xf,
+			.cra_init = sun8i_ss_cipher_init,
+			.cra_exit = sun8i_ss_cipher_exit,
+		},
+		.min_keysize	= DES3_EDE_KEY_SIZE,
+		.max_keysize	= DES3_EDE_KEY_SIZE,
+		.setkey		= sun8i_ss_des3_setkey,
+		.encrypt	= sun8i_ss_skencrypt,
+		.decrypt	= sun8i_ss_skdecrypt,
+	}
+},
+};
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+static int sun8i_ss_dbgfs_read(struct seq_file *seq, void *v)
+{
+	struct sun8i_ss_dev *ss = seq->private;
+	int i;
+
+	for (i = 0; i < MAXFLOW; i++)
+		seq_printf(seq, "Channel %d: nreq %lu\n", i, ss->flows[i].stat_req);
+
+	for (i = 0; i < ARRAY_SIZE(ss_algs); i++) {
+		if (!ss_algs[i].ss)
+			continue;
+		switch (ss_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			seq_printf(seq, "%s %s %lu %lu\n",
+				   ss_algs[i].alg.skcipher.base.cra_driver_name,
+				   ss_algs[i].alg.skcipher.base.cra_name,
+				   ss_algs[i].stat_req, ss_algs[i].stat_fb);
+			break;
+		}
+	}
+	return 0;
+}
+
+static int sun8i_ss_dbgfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sun8i_ss_dbgfs_read, inode->i_private);
+}
+
+static const struct file_operations sun8i_ss_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = sun8i_ss_dbgfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+#endif
+
+static void sun8i_ss_free_flows(struct sun8i_ss_dev *ss, int i)
+{
+	while (i >= 0) {
+		crypto_engine_exit(ss->flows[i].engine);
+		i--;
+	}
+}
+
+/*
+ * Allocate the flow list structure
+ */
+static int allocate_flows(struct sun8i_ss_dev *ss)
+{
+	int i, err;
+
+	ss->flows = devm_kcalloc(ss->dev, MAXFLOW, sizeof(struct sun8i_ss_flow),
+				 GFP_KERNEL);
+	if (!ss->flows)
+		return -ENOMEM;
+
+	for (i = 0; i < MAXFLOW; i++) {
+		init_completion(&ss->flows[i].complete);
+
+		ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true);
+		if (!ss->flows[i].engine) {
+			dev_err(ss->dev, "Cannot allocate engine\n");
+			i--;
+			err = -ENOMEM;
+			goto error_engine;
+		}
+		err = crypto_engine_start(ss->flows[i].engine);
+		if (err) {
+			dev_err(ss->dev, "Cannot start engine\n");
+			goto error_engine;
+		}
+	}
+	return 0;
+error_engine:
+	sun8i_ss_free_flows(ss, i);
+	return err;
+}
+
+/*
+ * Power management strategy: The device is suspended unless a TFM exists for
+ * one of the algorithms proposed by this driver.
+ */
+static int sun8i_ss_pm_suspend(struct device *dev)
+{
+	struct sun8i_ss_dev *ss = dev_get_drvdata(dev);
+	int i;
+
+	reset_control_assert(ss->reset);
+	for (i = 0; i < SS_MAX_CLOCKS; i++)
+		clk_disable_unprepare(ss->ssclks[i]);
+	return 0;
+}
+
+static int sun8i_ss_pm_resume(struct device *dev)
+{
+	struct sun8i_ss_dev *ss = dev_get_drvdata(dev);
+	int err, i;
+
+	for (i = 0; i < SS_MAX_CLOCKS; i++) {
+		if (!ss->variant->ss_clks[i].name)
+			continue;
+		err = clk_prepare_enable(ss->ssclks[i]);
+		if (err) {
+			dev_err(ss->dev, "Cannot prepare_enable %s\n",
+				ss->variant->ss_clks[i].name);
+			goto error;
+		}
+	}
+	err = reset_control_deassert(ss->reset);
+	if (err) {
+		dev_err(ss->dev, "Cannot deassert reset control\n");
+		goto error;
+	}
+	/* enable interrupts for all flows */
+	writel(BIT(0) | BIT(1), ss->base + SS_INT_CTL_REG);
+
+	return 0;
+error:
+	sun8i_ss_pm_suspend(dev);
+	return err;
+}
+
+static const struct dev_pm_ops sun8i_ss_pm_ops = {
+	SET_RUNTIME_PM_OPS(sun8i_ss_pm_suspend, sun8i_ss_pm_resume, NULL)
+};
+
+static int sun8i_ss_pm_init(struct sun8i_ss_dev *ss)
+{
+	int err;
+
+	pm_runtime_use_autosuspend(ss->dev);
+	pm_runtime_set_autosuspend_delay(ss->dev, 2000);
+
+	err = pm_runtime_set_suspended(ss->dev);
+	if (err)
+		return err;
+	pm_runtime_enable(ss->dev);
+	return err;
+}
+
+static void sun8i_ss_pm_exit(struct sun8i_ss_dev *ss)
+{
+	pm_runtime_disable(ss->dev);
+}
+
+static int sun8i_ss_register_algs(struct sun8i_ss_dev *ss)
+{
+	int ss_method, err, id, i;
+
+	for (i = 0; i < ARRAY_SIZE(ss_algs); i++) {
+		ss_algs[i].ss = ss;
+		switch (ss_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			id = ss_algs[i].ss_algo_id;
+			ss_method = ss->variant->alg_cipher[id];
+			if (ss_method == SS_ID_NOTSUPP) {
+				dev_info(ss->dev,
+					 "DEBUG: Algo of %s not supported\n",
+					 ss_algs[i].alg.skcipher.base.cra_name);
+				ss_algs[i].ss = NULL;
+				break;
+			}
+			id = ss_algs[i].ss_blockmode;
+			ss_method = ss->variant->op_mode[id];
+			if (ss_method == SS_ID_NOTSUPP) {
+				dev_info(ss->dev, "DEBUG: Blockmode of %s not supported\n",
+					 ss_algs[i].alg.skcipher.base.cra_name);
+				ss_algs[i].ss = NULL;
+				break;
+			}
+			dev_info(ss->dev, "DEBUG: Register %s\n",
+				 ss_algs[i].alg.skcipher.base.cra_name);
+			err = crypto_register_skcipher(&ss_algs[i].alg.skcipher);
+			if (err) {
+				dev_err(ss->dev, "Fail to register %s\n",
+					ss_algs[i].alg.skcipher.base.cra_name);
+				ss_algs[i].ss = NULL;
+				return err;
+			}
+			break;
+		default:
+			ss_algs[i].ss = NULL;
+			dev_err(ss->dev, "ERROR: tried to register an unknown algo\n");
+		}
+	}
+	return 0;
+}
+
+static void sun8i_ss_unregister_algs(struct sun8i_ss_dev *ss)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ss_algs); i++) {
+		if (!ss_algs[i].ss)
+			continue;
+		switch (ss_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			dev_info(ss->dev, "Unregister %d %s\n", i,
+				 ss_algs[i].alg.skcipher.base.cra_name);
+			crypto_unregister_skcipher(&ss_algs[i].alg.skcipher);
+			break;
+		}
+	}
+}
+
+static int sun8i_ss_get_clks(struct sun8i_ss_dev *ss)
+{
+	unsigned long cr;
+	int err, i;
+
+	for (i = 0; i < SS_MAX_CLOCKS; i++) {
+		if (!ss->variant->ss_clks[i].name)
+			continue;
+		ss->ssclks[i] = devm_clk_get(ss->dev, ss->variant->ss_clks[i].name);
+		if (IS_ERR(ss->ssclks[i])) {
+			err = PTR_ERR(ss->ssclks[i]);
+			dev_err(ss->dev, "Cannot get %s SS clock err=%d\n",
+				ss->variant->ss_clks[i].name, err);
+			return err;
+		}
+		cr = clk_get_rate(ss->ssclks[i]);
+		if (!cr)
+			return -EINVAL;
+		if (ss->variant->ss_clks[i].freq > 0 &&
+		    cr != ss->variant->ss_clks[i].freq) {
+			dev_info(ss->dev, "Set %s clock to %lu (%lu Mhz) from %lu (%lu Mhz)\n",
+				 ss->variant->ss_clks[i].name,
+				 ss->variant->ss_clks[i].freq,
+				 ss->variant->ss_clks[i].freq / 1000000,
+				 cr, cr / 1000000);
+			err = clk_set_rate(ss->ssclks[i], ss->variant->ss_clks[i].freq);
+			if (err)
+				dev_err(ss->dev, "Fail to set %s clk speed to %lu hz\n",
+					ss->variant->ss_clks[i].name,
+					ss->variant->ss_clks[i].freq);
+		}
+		if (ss->variant->ss_clks[i].max_freq > 0 &&
+		    cr > ss->variant->ss_clks[i].max_freq)
+			dev_warn(ss->dev, "Frequency for %s (%lu hz) is higher than datasheet's recommendation (%lu hz)",
+				 ss->variant->ss_clks[i].name, cr,
+				 ss->variant->ss_clks[i].max_freq);
+	}
+	return 0;
+}
+
+static int sun8i_ss_probe(struct platform_device *pdev)
+{
+	struct sun8i_ss_dev *ss;
+	int err, irq;
+	u32 v;
+
+	ss = devm_kzalloc(&pdev->dev, sizeof(*ss), GFP_KERNEL);
+	if (!ss)
+		return -ENOMEM;
+
+	ss->dev = &pdev->dev;
+	platform_set_drvdata(pdev, ss);
+
+	ss->variant = of_device_get_match_data(&pdev->dev);
+	if (!ss->variant) {
+		dev_err(&pdev->dev, "Missing Crypto Engine variant\n");
+		return -EINVAL;
+	}
+
+	ss->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(ss->base))
+		return PTR_ERR(ss->base);
+
+	err = sun8i_ss_get_clks(ss);
+	if (err)
+		return err;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(ss->dev, "Cannot get SecuritySystem IRQ\n");
+		return irq;
+	}
+
+	ss->reset = devm_reset_control_get(&pdev->dev, NULL);
+	if (IS_ERR(ss->reset)) {
+		if (PTR_ERR(ss->reset) == -EPROBE_DEFER)
+			return PTR_ERR(ss->reset);
+		dev_err(&pdev->dev, "No reset control found\n");
+		return PTR_ERR(ss->reset);
+	}
+
+	mutex_init(&ss->mlock);
+
+	err = allocate_flows(ss);
+	if (err)
+		return err;
+
+	err = sun8i_ss_pm_init(ss);
+	if (err)
+		goto error_pm;
+
+	err = devm_request_irq(&pdev->dev, irq, ss_irq_handler, 0, "sun8i-ss", ss);
+	if (err) {
+		dev_err(ss->dev, "Cannot request SecuritySystem IRQ (err=%d)\n", err);
+		goto error_irq;
+	}
+
+	err = sun8i_ss_register_algs(ss);
+	if (err)
+		goto error_alg;
+
+	err = pm_runtime_get_sync(ss->dev);
+	if (err < 0)
+		goto error_alg;
+
+	v = readl(ss->base + SS_CTL_REG);
+	v >>= SS_DIE_ID_SHIFT;
+	v &= SS_DIE_ID_MASK;
+	dev_info(&pdev->dev, "Security System Die ID %x\n", v);
+
+	pm_runtime_put_sync(ss->dev);
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+	/* Ignore error of debugfs */
+	ss->dbgfs_dir = debugfs_create_dir("sun8i-ss", NULL);
+	ss->dbgfs_stats = debugfs_create_file("stats", 0444,
+					      ss->dbgfs_dir, ss,
+					      &sun8i_ss_debugfs_fops);
+#endif
+
+	return 0;
+error_alg:
+	sun8i_ss_unregister_algs(ss);
+error_irq:
+	sun8i_ss_pm_exit(ss);
+error_pm:
+	sun8i_ss_free_flows(ss, MAXFLOW - 1);
+	return err;
+}
+
+static int sun8i_ss_remove(struct platform_device *pdev)
+{
+	struct sun8i_ss_dev *ss = platform_get_drvdata(pdev);
+
+	sun8i_ss_unregister_algs(ss);
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+	debugfs_remove_recursive(ss->dbgfs_dir);
+#endif
+
+	sun8i_ss_free_flows(ss, MAXFLOW - 1);
+
+	sun8i_ss_pm_exit(ss);
+
+	return 0;
+}
+
+static const struct of_device_id sun8i_ss_crypto_of_match_table[] = {
+	{ .compatible = "allwinner,sun8i-a83t-crypto",
+	  .data = &ss_a83t_variant },
+	{ .compatible = "allwinner,sun9i-a80-crypto",
+	  .data = &ss_a80_variant },
+	{}
+};
+MODULE_DEVICE_TABLE(of, sun8i_ss_crypto_of_match_table);
+
+static struct platform_driver sun8i_ss_driver = {
+	.probe		 = sun8i_ss_probe,
+	.remove		 = sun8i_ss_remove,
+	.driver		 = {
+		.name		= "sun8i-ss",
+		.pm             = &sun8i_ss_pm_ops,
+		.of_match_table	= sun8i_ss_crypto_of_match_table,
+	},
+};
+
+module_platform_driver(sun8i_ss_driver);
+
+MODULE_DESCRIPTION("Allwinner SecuritySystem cryptographic offloader");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Corentin Labbe <clabbe.montjoie@gmail.com>");
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
new file mode 100644
index 000000000000..b5f855f3de10
--- /dev/null
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sun8i-ss.h - hardware cryptographic offloader for
+ * Allwinner A80/A83T SoC
+ *
+ * Copyright (C) 2016-2019 Corentin LABBE <clabbe.montjoie@gmail.com>
+ */
+#include <crypto/aes.h>
+#include <crypto/des.h>
+#include <crypto/engine.h>
+#include <crypto/skcipher.h>
+#include <linux/atomic.h>
+#include <linux/debugfs.h>
+#include <linux/crypto.h>
+
+#define SS_ENCRYPTION		0
+#define SS_DECRYPTION		BIT(6)
+
+#define SS_ALG_AES		0
+#define SS_ALG_DES		(1 << 2)
+#define SS_ALG_3DES		(2 << 2)
+
+#define SS_CTL_REG		0x00
+#define SS_INT_CTL_REG		0x04
+#define SS_INT_STA_REG		0x08
+#define SS_KEY_ADR_REG		0x10
+#define SS_IV_ADR_REG		0x18
+#define SS_SRC_ADR_REG		0x20
+#define SS_DST_ADR_REG		0x28
+#define SS_LEN_ADR_REG		0x30
+
+#define SS_ID_NOTSUPP		0xFF
+
+#define SS_ID_CIPHER_AES	0
+#define SS_ID_CIPHER_DES	1
+#define SS_ID_CIPHER_DES3	2
+#define SS_ID_CIPHER_MAX	3
+
+#define SS_ID_OP_ECB	0
+#define SS_ID_OP_CBC	1
+#define SS_ID_OP_MAX	2
+
+#define SS_AES_128BITS 0
+#define SS_AES_192BITS 1
+#define SS_AES_256BITS 2
+
+#define SS_OP_ECB	0
+#define SS_OP_CBC	(1 << 13)
+
+#define SS_FLOW0	BIT(30)
+#define SS_FLOW1	BIT(31)
+
+#define MAX_SG 8
+
+#define MAXFLOW 2
+
+#define SS_MAX_CLOCKS 2
+
+#define SS_DIE_ID_SHIFT	20
+#define SS_DIE_ID_MASK	0x07
+
+/*
+ * struct ss_clock - Describe clocks used by sun8i-ss
+ * @name:       Name of clock needed by this variant
+ * @freq:       Frequency to set for each clock
+ * @max_freq:   Maximum frequency for each clock
+ */
+struct ss_clock {
+	const char *name;
+	unsigned long freq;
+	unsigned long max_freq;
+};
+
+/*
+ * struct ss_variant - Describe SS capability for each variant hardware
+ * @alg_cipher:	list of supported ciphers. for each SS_ID_ this will give the
+ *              coresponding SS_ALG_XXX value
+ * @op_mode:	list of supported block modes
+ * @ss_clks!	list of clock needed by this variant
+ */
+struct ss_variant {
+	char alg_cipher[SS_ID_CIPHER_MAX];
+	u32 op_mode[SS_ID_OP_MAX];
+	struct ss_clock ss_clks[SS_MAX_CLOCKS];
+};
+
+struct sginfo {
+	u32 addr;
+	u32 len;
+};
+
+/*
+ * struct sun8i_ss_flow - Information used by each flow
+ * @engine:	ptr to the crypto_engine for this flow
+ * @complete:	completion for the current task on this flow
+ * @status:	set to 1 by interrupt if task is done
+ * @stat_req:	number of request done by this flow
+ */
+struct sun8i_ss_flow {
+	struct crypto_engine *engine;
+	struct completion complete;
+	int status;
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+	unsigned long stat_req;
+#endif
+};
+
+/*
+ * struct sun8i_ss_dev - main container for all this driver information
+ * @base:	base address of SS
+ * @ssclks:	clocks used by SS
+ * @reset:	pointer to reset controller
+ * @dev:	the platform device
+ * @mlock:	Control access to device registers
+ * @flows:	array of all flow
+ * @flow:	flow to use in next request
+ * @variant:	pointer to variant specific data
+ * @dbgfs_dir:	Debugfs dentry for statistic directory
+ * @dbgfs_stats: Debugfs dentry for statistic counters
+ */
+struct sun8i_ss_dev {
+	void __iomem *base;
+	struct clk *ssclks[SS_MAX_CLOCKS];
+	struct reset_control *reset;
+	struct device *dev;
+	struct mutex mlock;
+	struct sun8i_ss_flow *flows;
+	atomic_t flow;
+	const struct ss_variant *variant;
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+	struct dentry *dbgfs_dir;
+	struct dentry *dbgfs_stats;
+#endif
+};
+
+/*
+ * struct sun8i_cipher_req_ctx - context for a skcipher request
+ * @t_src:	list of mapped SGs with their size
+ * @t_dst:	list of mapped SGs with their size
+ * @p_key:	DMA address of the key
+ * @p_iv:	DMA address of the IV
+ * @method:	current algorithm for this request
+ * @op_mode:	op_mode for this request
+ * @op_dir:	direction (encrypt vs decrypt) for this request
+ * @flow:	the flow to use for this request
+ * @ivlen:	size of biv
+ * @keylen:	keylen for this request
+ * @biv:	buffer which contain the IV
+ */
+struct sun8i_cipher_req_ctx {
+	struct sginfo t_src[MAX_SG];
+	struct sginfo t_dst[MAX_SG];
+	u32 p_key;
+	u32 p_iv;
+	u32 method;
+	u32 op_mode;
+	u32 op_dir;
+	int flow;
+	unsigned int ivlen;
+	unsigned int keylen;
+	void *biv;
+};
+
+/*
+ * struct sun8i_cipher_tfm_ctx - context for a skcipher TFM
+ * @enginectx:		crypto_engine used by this TFM
+ * @key:		pointer to key data
+ * @keylen:		len of the key
+ * @ss:			pointer to the private data of driver handling this TFM
+ * @fallback_tfm:	pointer to the fallback TFM
+ */
+struct sun8i_cipher_tfm_ctx {
+	struct crypto_engine_ctx enginectx;
+	u32 *key;
+	u32 keylen;
+	struct sun8i_ss_dev *ss;
+	struct crypto_sync_skcipher *fallback_tfm;
+};
+
+/*
+ * struct sun8i_ss_alg_template - crypto_alg template
+ * @type:		the CRYPTO_ALG_TYPE for this template
+ * @ss_algo_id:		the SS_ID for this template
+ * @ss_blockmode:	the type of block operation SS_ID
+ * @ss:			pointer to the sun8i_ss_dev structure associated with
+ *			this template
+ * @alg:		one of sub struct must be used
+ * @stat_req:		number of request done on this template
+ * @stat_fb:		total of all data len done on this template
+ */
+struct sun8i_ss_alg_template {
+	u32 type;
+	u32 ss_algo_id;
+	u32 ss_blockmode;
+	struct sun8i_ss_dev *ss;
+	union {
+		struct skcipher_alg skcipher;
+	} alg;
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+	unsigned long stat_req;
+	unsigned long stat_fb;
+#endif
+};
+
+int sun8i_ss_enqueue(struct crypto_async_request *areq, u32 type);
+
+int sun8i_ss_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			unsigned int keylen);
+int sun8i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			 unsigned int keylen);
+int sun8i_ss_cipher_init(struct crypto_tfm *tfm);
+void sun8i_ss_cipher_exit(struct crypto_tfm *tfm);
+int sun8i_ss_skdecrypt(struct skcipher_request *areq);
+int sun8i_ss_skencrypt(struct skcipher_request *areq);
+
+int sun8i_ss_get_engine_number(struct sun8i_ss_dev *ss);
+
+int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx, const char *name);
diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c
index a42f8619589d..f7fc0c464125 100644
--- a/drivers/crypto/amcc/crypto4xx_alg.c
+++ b/drivers/crypto/amcc/crypto4xx_alg.c
@@ -128,12 +128,9 @@ static int crypto4xx_setkey_aes(struct crypto_skcipher *cipher,
 	struct dynamic_sa_ctl *sa;
 	int    rc;
 
-	if (keylen != AES_KEYSIZE_256 &&
-		keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_128) {
-		crypto_skcipher_set_flags(cipher,
-				CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != AES_KEYSIZE_256 && keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_128)
 		return -EINVAL;
-	}
 
 	/* Create SA */
 	if (ctx->sa_in || ctx->sa_out)
@@ -292,19 +289,11 @@ static int crypto4xx_sk_setup_fallback(struct crypto4xx_ctx *ctx,
 				       const u8 *key,
 				       unsigned int keylen)
 {
-	int rc;
-
 	crypto_sync_skcipher_clear_flags(ctx->sw_cipher.cipher,
 				    CRYPTO_TFM_REQ_MASK);
 	crypto_sync_skcipher_set_flags(ctx->sw_cipher.cipher,
 		crypto_skcipher_get_flags(cipher) & CRYPTO_TFM_REQ_MASK);
-	rc = crypto_sync_skcipher_setkey(ctx->sw_cipher.cipher, key, keylen);
-	crypto_skcipher_clear_flags(cipher, CRYPTO_TFM_RES_MASK);
-	crypto_skcipher_set_flags(cipher,
-		crypto_sync_skcipher_get_flags(ctx->sw_cipher.cipher) &
-			CRYPTO_TFM_RES_MASK);
-
-	return rc;
+	return crypto_sync_skcipher_setkey(ctx->sw_cipher.cipher, key, keylen);
 }
 
 int crypto4xx_setkey_aes_ctr(struct crypto_skcipher *cipher,
@@ -379,18 +368,10 @@ static int crypto4xx_aead_setup_fallback(struct crypto4xx_ctx *ctx,
 					 const u8 *key,
 					 unsigned int keylen)
 {
-	int rc;
-
 	crypto_aead_clear_flags(ctx->sw_cipher.aead, CRYPTO_TFM_REQ_MASK);
 	crypto_aead_set_flags(ctx->sw_cipher.aead,
 		crypto_aead_get_flags(cipher) & CRYPTO_TFM_REQ_MASK);
-	rc = crypto_aead_setkey(ctx->sw_cipher.aead, key, keylen);
-	crypto_aead_clear_flags(cipher, CRYPTO_TFM_RES_MASK);
-	crypto_aead_set_flags(cipher,
-		crypto_aead_get_flags(ctx->sw_cipher.aead) &
-			CRYPTO_TFM_RES_MASK);
-
-	return rc;
+	return crypto_aead_setkey(ctx->sw_cipher.aead, key, keylen);
 }
 
 /**
@@ -551,10 +532,8 @@ int crypto4xx_setkey_aes_gcm(struct crypto_aead *cipher,
 	struct dynamic_sa_ctl *sa;
 	int    rc = 0;
 
-	if (crypto4xx_aes_gcm_validate_keylen(keylen) != 0) {
-		crypto_aead_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (crypto4xx_aes_gcm_validate_keylen(keylen) != 0)
 		return -EINVAL;
-	}
 
 	rc = crypto4xx_aead_setup_fallback(ctx, cipher, key, keylen);
 	if (rc)
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index de5e9352e920..981de43ea5e2 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -169,7 +169,7 @@ static u32 crypto4xx_build_pdr(struct crypto4xx_device *dev)
 	int i;
 	dev->pdr = dma_alloc_coherent(dev->core_dev->device,
 				      sizeof(struct ce_pd) * PPC4XX_NUM_PD,
-				      &dev->pdr_pa, GFP_ATOMIC);
+				      &dev->pdr_pa, GFP_KERNEL);
 	if (!dev->pdr)
 		return -ENOMEM;
 
@@ -185,13 +185,13 @@ static u32 crypto4xx_build_pdr(struct crypto4xx_device *dev)
 	dev->shadow_sa_pool = dma_alloc_coherent(dev->core_dev->device,
 				   sizeof(union shadow_sa_buf) * PPC4XX_NUM_PD,
 				   &dev->shadow_sa_pool_pa,
-				   GFP_ATOMIC);
+				   GFP_KERNEL);
 	if (!dev->shadow_sa_pool)
 		return -ENOMEM;
 
 	dev->shadow_sr_pool = dma_alloc_coherent(dev->core_dev->device,
 			 sizeof(struct sa_state_record) * PPC4XX_NUM_PD,
-			 &dev->shadow_sr_pool_pa, GFP_ATOMIC);
+			 &dev->shadow_sr_pool_pa, GFP_KERNEL);
 	if (!dev->shadow_sr_pool)
 		return -ENOMEM;
 	for (i = 0; i < PPC4XX_NUM_PD; i++) {
@@ -277,7 +277,7 @@ static u32 crypto4xx_build_gdr(struct crypto4xx_device *dev)
 {
 	dev->gdr = dma_alloc_coherent(dev->core_dev->device,
 				      sizeof(struct ce_gd) * PPC4XX_NUM_GD,
-				      &dev->gdr_pa, GFP_ATOMIC);
+				      &dev->gdr_pa, GFP_KERNEL);
 	if (!dev->gdr)
 		return -ENOMEM;
 
@@ -286,7 +286,8 @@ static u32 crypto4xx_build_gdr(struct crypto4xx_device *dev)
 
 static inline void crypto4xx_destroy_gdr(struct crypto4xx_device *dev)
 {
-	dma_free_coherent(dev->core_dev->device,
+	if (dev->gdr)
+		dma_free_coherent(dev->core_dev->device,
 			  sizeof(struct ce_gd) * PPC4XX_NUM_GD,
 			  dev->gdr, dev->gdr_pa);
 }
@@ -354,24 +355,20 @@ static u32 crypto4xx_build_sdr(struct crypto4xx_device *dev)
 {
 	int i;
 
+	dev->scatter_buffer_va =
+		dma_alloc_coherent(dev->core_dev->device,
+			PPC4XX_SD_BUFFER_SIZE * PPC4XX_NUM_SD,
+			&dev->scatter_buffer_pa, GFP_KERNEL);
+	if (!dev->scatter_buffer_va)
+		return -ENOMEM;
+
 	/* alloc memory for scatter descriptor ring */
 	dev->sdr = dma_alloc_coherent(dev->core_dev->device,
 				      sizeof(struct ce_sd) * PPC4XX_NUM_SD,
-				      &dev->sdr_pa, GFP_ATOMIC);
+				      &dev->sdr_pa, GFP_KERNEL);
 	if (!dev->sdr)
 		return -ENOMEM;
 
-	dev->scatter_buffer_va =
-		dma_alloc_coherent(dev->core_dev->device,
-			PPC4XX_SD_BUFFER_SIZE * PPC4XX_NUM_SD,
-			&dev->scatter_buffer_pa, GFP_ATOMIC);
-	if (!dev->scatter_buffer_va) {
-		dma_free_coherent(dev->core_dev->device,
-				  sizeof(struct ce_sd) * PPC4XX_NUM_SD,
-				  dev->sdr, dev->sdr_pa);
-		return -ENOMEM;
-	}
-
 	for (i = 0; i < PPC4XX_NUM_SD; i++) {
 		dev->sdr[i].ptr = dev->scatter_buffer_pa +
 				  PPC4XX_SD_BUFFER_SIZE * i;
@@ -1443,16 +1440,15 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 	spin_lock_init(&core_dev->lock);
 	INIT_LIST_HEAD(&core_dev->dev->alg_list);
 	ratelimit_default_init(&core_dev->dev->aead_ratelimit);
+	rc = crypto4xx_build_sdr(core_dev->dev);
+	if (rc)
+		goto err_build_sdr;
 	rc = crypto4xx_build_pdr(core_dev->dev);
 	if (rc)
-		goto err_build_pdr;
+		goto err_build_sdr;
 
 	rc = crypto4xx_build_gdr(core_dev->dev);
 	if (rc)
-		goto err_build_pdr;
-
-	rc = crypto4xx_build_sdr(core_dev->dev);
-	if (rc)
 		goto err_build_sdr;
 
 	/* Init tasklet for bottom half processing */
@@ -1497,7 +1493,6 @@ err_iomap:
 err_build_sdr:
 	crypto4xx_destroy_sdr(core_dev->dev);
 	crypto4xx_destroy_gdr(core_dev->dev);
-err_build_pdr:
 	crypto4xx_destroy_pdr(core_dev->dev);
 	kfree(core_dev->dev);
 err_alloc_dev:
diff --git a/drivers/crypto/amlogic/Kconfig b/drivers/crypto/amlogic/Kconfig
new file mode 100644
index 000000000000..cf9547602670
--- /dev/null
+++ b/drivers/crypto/amlogic/Kconfig
@@ -0,0 +1,25 @@
+config CRYPTO_DEV_AMLOGIC_GXL
+	tristate "Support for amlogic cryptographic offloader"
+	depends on HAS_IOMEM
+	default y if ARCH_MESON
+	select CRYPTO_SKCIPHER
+	select CRYPTO_ENGINE
+	select CRYPTO_ECB
+	select CRYPTO_CBC
+	select CRYPTO_AES
+	help
+	  Select y here to have support for the cryptographic offloader
+	  available on Amlogic GXL SoC.
+	  This hardware handles AES ciphers in ECB/CBC mode.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called amlogic-gxl-crypto.
+
+config CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+	bool "Enable amlogic stats"
+	depends on CRYPTO_DEV_AMLOGIC_GXL
+	depends on DEBUG_FS
+	help
+	  Say y to enable amlogic-crypto debug stats.
+	  This will create /sys/kernel/debug/gxl-crypto/stats for displaying
+	  the number of requests per flow and per algorithm.
diff --git a/drivers/crypto/amlogic/Makefile b/drivers/crypto/amlogic/Makefile
new file mode 100644
index 000000000000..39057e62c13e
--- /dev/null
+++ b/drivers/crypto/amlogic/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic-gxl-crypto.o
+amlogic-gxl-crypto-y := amlogic-gxl-core.o amlogic-gxl-cipher.o
diff --git a/drivers/crypto/amlogic/amlogic-gxl-cipher.c b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
new file mode 100644
index 000000000000..9819dd50fbad
--- /dev/null
+++ b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
@@ -0,0 +1,381 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * amlogic-cipher.c - hardware cryptographic offloader for Amlogic GXL SoC
+ *
+ * Copyright (C) 2018-2019 Corentin LABBE <clabbe@baylibre.com>
+ *
+ * This file add support for AES cipher with 128,192,256 bits keysize in
+ * CBC and ECB mode.
+ */
+
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <crypto/scatterwalk.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <crypto/internal/skcipher.h>
+#include "amlogic-gxl.h"
+
+static int get_engine_number(struct meson_dev *mc)
+{
+	return atomic_inc_return(&mc->flow) % MAXFLOW;
+}
+
+static bool meson_cipher_need_fallback(struct skcipher_request *areq)
+{
+	struct scatterlist *src_sg = areq->src;
+	struct scatterlist *dst_sg = areq->dst;
+
+	if (areq->cryptlen == 0)
+		return true;
+
+	if (sg_nents(src_sg) != sg_nents(dst_sg))
+		return true;
+
+	/* KEY/IV descriptors use 3 desc */
+	if (sg_nents(src_sg) > MAXDESC - 3 || sg_nents(dst_sg) > MAXDESC - 3)
+		return true;
+
+	while (src_sg && dst_sg) {
+		if ((src_sg->length % 16) != 0)
+			return true;
+		if ((dst_sg->length % 16) != 0)
+			return true;
+		if (src_sg->length != dst_sg->length)
+			return true;
+		if (!IS_ALIGNED(src_sg->offset, sizeof(u32)))
+			return true;
+		if (!IS_ALIGNED(dst_sg->offset, sizeof(u32)))
+			return true;
+		src_sg = sg_next(src_sg);
+		dst_sg = sg_next(dst_sg);
+	}
+
+	return false;
+}
+
+static int meson_cipher_do_fallback(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	int err;
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct meson_alg_template *algt;
+#endif
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, op->fallback_tfm);
+
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+	algt = container_of(alg, struct meson_alg_template, alg.skcipher);
+	algt->stat_fb++;
+#endif
+	skcipher_request_set_sync_tfm(req, op->fallback_tfm);
+	skcipher_request_set_callback(req, areq->base.flags, NULL, NULL);
+	skcipher_request_set_crypt(req, areq->src, areq->dst,
+				   areq->cryptlen, areq->iv);
+	if (rctx->op_dir == MESON_DECRYPT)
+		err = crypto_skcipher_decrypt(req);
+	else
+		err = crypto_skcipher_encrypt(req);
+	skcipher_request_zero(req);
+	return err;
+}
+
+static int meson_cipher(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct meson_dev *mc = op->mc;
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct meson_alg_template *algt;
+	int flow = rctx->flow;
+	unsigned int todo, eat, len;
+	struct scatterlist *src_sg = areq->src;
+	struct scatterlist *dst_sg = areq->dst;
+	struct meson_desc *desc;
+	int nr_sgs, nr_sgd;
+	int i, err = 0;
+	unsigned int keyivlen, ivsize, offset, tloffset;
+	dma_addr_t phykeyiv;
+	void *backup_iv = NULL, *bkeyiv;
+	__le32 v;
+
+	algt = container_of(alg, struct meson_alg_template, alg.skcipher);
+
+	dev_dbg(mc->dev, "%s %s %u %x IV(%u) key=%u flow=%d\n", __func__,
+		crypto_tfm_alg_name(areq->base.tfm),
+		areq->cryptlen,
+		rctx->op_dir, crypto_skcipher_ivsize(tfm),
+		op->keylen, flow);
+
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+	algt->stat_req++;
+	mc->chanlist[flow].stat_req++;
+#endif
+
+	/*
+	 * The hardware expect a list of meson_desc structures.
+	 * The 2 first structures store key
+	 * The third stores IV
+	 */
+	bkeyiv = kzalloc(48, GFP_KERNEL | GFP_DMA);
+	if (!bkeyiv)
+		return -ENOMEM;
+
+	memcpy(bkeyiv, op->key, op->keylen);
+	keyivlen = op->keylen;
+
+	ivsize = crypto_skcipher_ivsize(tfm);
+	if (areq->iv && ivsize > 0) {
+		if (ivsize > areq->cryptlen) {
+			dev_err(mc->dev, "invalid ivsize=%d vs len=%d\n", ivsize, areq->cryptlen);
+			err = -EINVAL;
+			goto theend;
+		}
+		memcpy(bkeyiv + 32, areq->iv, ivsize);
+		keyivlen = 48;
+		if (rctx->op_dir == MESON_DECRYPT) {
+			backup_iv = kzalloc(ivsize, GFP_KERNEL);
+			if (!backup_iv) {
+				err = -ENOMEM;
+				goto theend;
+			}
+			offset = areq->cryptlen - ivsize;
+			scatterwalk_map_and_copy(backup_iv, areq->src, offset,
+						 ivsize, 0);
+		}
+	}
+	if (keyivlen == 24)
+		keyivlen = 32;
+
+	phykeyiv = dma_map_single(mc->dev, bkeyiv, keyivlen,
+				  DMA_TO_DEVICE);
+	err = dma_mapping_error(mc->dev, phykeyiv);
+	if (err) {
+		dev_err(mc->dev, "Cannot DMA MAP KEY IV\n");
+		goto theend;
+	}
+
+	tloffset = 0;
+	eat = 0;
+	i = 0;
+	while (keyivlen > eat) {
+		desc = &mc->chanlist[flow].tl[tloffset];
+		memset(desc, 0, sizeof(struct meson_desc));
+		todo = min(keyivlen - eat, 16u);
+		desc->t_src = cpu_to_le32(phykeyiv + i * 16);
+		desc->t_dst = cpu_to_le32(i * 16);
+		v = (MODE_KEY << 20) | DESC_OWN | 16;
+		desc->t_status = cpu_to_le32(v);
+
+		eat += todo;
+		i++;
+		tloffset++;
+	}
+
+	if (areq->src == areq->dst) {
+		nr_sgs = dma_map_sg(mc->dev, areq->src, sg_nents(areq->src),
+				    DMA_BIDIRECTIONAL);
+		if (nr_sgs < 0) {
+			dev_err(mc->dev, "Invalid SG count %d\n", nr_sgs);
+			err = -EINVAL;
+			goto theend;
+		}
+		nr_sgd = nr_sgs;
+	} else {
+		nr_sgs = dma_map_sg(mc->dev, areq->src, sg_nents(areq->src),
+				    DMA_TO_DEVICE);
+		if (nr_sgs < 0 || nr_sgs > MAXDESC - 3) {
+			dev_err(mc->dev, "Invalid SG count %d\n", nr_sgs);
+			err = -EINVAL;
+			goto theend;
+		}
+		nr_sgd = dma_map_sg(mc->dev, areq->dst, sg_nents(areq->dst),
+				    DMA_FROM_DEVICE);
+		if (nr_sgd < 0 || nr_sgd > MAXDESC - 3) {
+			dev_err(mc->dev, "Invalid SG count %d\n", nr_sgd);
+			err = -EINVAL;
+			goto theend;
+		}
+	}
+
+	src_sg = areq->src;
+	dst_sg = areq->dst;
+	len = areq->cryptlen;
+	while (src_sg) {
+		desc = &mc->chanlist[flow].tl[tloffset];
+		memset(desc, 0, sizeof(struct meson_desc));
+
+		desc->t_src = cpu_to_le32(sg_dma_address(src_sg));
+		desc->t_dst = cpu_to_le32(sg_dma_address(dst_sg));
+		todo = min(len, sg_dma_len(src_sg));
+		v = (op->keymode << 20) | DESC_OWN | todo | (algt->blockmode << 26);
+		if (rctx->op_dir)
+			v |= DESC_ENCRYPTION;
+		len -= todo;
+
+		if (!sg_next(src_sg))
+			v |= DESC_LAST;
+		desc->t_status = cpu_to_le32(v);
+		tloffset++;
+		src_sg = sg_next(src_sg);
+		dst_sg = sg_next(dst_sg);
+	}
+
+	reinit_completion(&mc->chanlist[flow].complete);
+	mc->chanlist[flow].status = 0;
+	writel(mc->chanlist[flow].t_phy | 2, mc->base + (flow << 2));
+	wait_for_completion_interruptible_timeout(&mc->chanlist[flow].complete,
+						  msecs_to_jiffies(500));
+	if (mc->chanlist[flow].status == 0) {
+		dev_err(mc->dev, "DMA timeout for flow %d\n", flow);
+		err = -EINVAL;
+	}
+
+	dma_unmap_single(mc->dev, phykeyiv, keyivlen, DMA_TO_DEVICE);
+
+	if (areq->src == areq->dst) {
+		dma_unmap_sg(mc->dev, areq->src, nr_sgs, DMA_BIDIRECTIONAL);
+	} else {
+		dma_unmap_sg(mc->dev, areq->src, nr_sgs, DMA_TO_DEVICE);
+		dma_unmap_sg(mc->dev, areq->dst, nr_sgd, DMA_FROM_DEVICE);
+	}
+
+	if (areq->iv && ivsize > 0) {
+		if (rctx->op_dir == MESON_DECRYPT) {
+			memcpy(areq->iv, backup_iv, ivsize);
+		} else {
+			scatterwalk_map_and_copy(areq->iv, areq->dst,
+						 areq->cryptlen - ivsize,
+						 ivsize, 0);
+		}
+	}
+theend:
+	kzfree(bkeyiv);
+	kzfree(backup_iv);
+
+	return err;
+}
+
+static int meson_handle_cipher_request(struct crypto_engine *engine,
+				       void *areq)
+{
+	int err;
+	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
+
+	err = meson_cipher(breq);
+	crypto_finalize_skcipher_request(engine, breq, err);
+
+	return 0;
+}
+
+int meson_skdecrypt(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct crypto_engine *engine;
+	int e;
+
+	rctx->op_dir = MESON_DECRYPT;
+	if (meson_cipher_need_fallback(areq))
+		return meson_cipher_do_fallback(areq);
+	e = get_engine_number(op->mc);
+	engine = op->mc->chanlist[e].engine;
+	rctx->flow = e;
+
+	return crypto_transfer_skcipher_request_to_engine(engine, areq);
+}
+
+int meson_skencrypt(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct crypto_engine *engine;
+	int e;
+
+	rctx->op_dir = MESON_ENCRYPT;
+	if (meson_cipher_need_fallback(areq))
+		return meson_cipher_do_fallback(areq);
+	e = get_engine_number(op->mc);
+	engine = op->mc->chanlist[e].engine;
+	rctx->flow = e;
+
+	return crypto_transfer_skcipher_request_to_engine(engine, areq);
+}
+
+int meson_cipher_init(struct crypto_tfm *tfm)
+{
+	struct meson_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm);
+	struct meson_alg_template *algt;
+	const char *name = crypto_tfm_alg_name(tfm);
+	struct crypto_skcipher *sktfm = __crypto_skcipher_cast(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(sktfm);
+
+	memset(op, 0, sizeof(struct meson_cipher_tfm_ctx));
+
+	algt = container_of(alg, struct meson_alg_template, alg.skcipher);
+	op->mc = algt->mc;
+
+	sktfm->reqsize = sizeof(struct meson_cipher_req_ctx);
+
+	op->fallback_tfm = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(op->fallback_tfm)) {
+		dev_err(op->mc->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
+			name, PTR_ERR(op->fallback_tfm));
+		return PTR_ERR(op->fallback_tfm);
+	}
+
+	op->enginectx.op.do_one_request = meson_handle_cipher_request;
+	op->enginectx.op.prepare_request = NULL;
+	op->enginectx.op.unprepare_request = NULL;
+
+	return 0;
+}
+
+void meson_cipher_exit(struct crypto_tfm *tfm)
+{
+	struct meson_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm);
+
+	if (op->key) {
+		memzero_explicit(op->key, op->keylen);
+		kfree(op->key);
+	}
+	crypto_free_sync_skcipher(op->fallback_tfm);
+}
+
+int meson_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+		     unsigned int keylen)
+{
+	struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct meson_dev *mc = op->mc;
+
+	switch (keylen) {
+	case 128 / 8:
+		op->keymode = MODE_AES_128;
+		break;
+	case 192 / 8:
+		op->keymode = MODE_AES_192;
+		break;
+	case 256 / 8:
+		op->keymode = MODE_AES_256;
+		break;
+	default:
+		dev_dbg(mc->dev, "ERROR: Invalid keylen %u\n", keylen);
+		return -EINVAL;
+	}
+	if (op->key) {
+		memzero_explicit(op->key, op->keylen);
+		kfree(op->key);
+	}
+	op->keylen = keylen;
+	op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
+	if (!op->key)
+		return -ENOMEM;
+
+	return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+}
diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c
new file mode 100644
index 000000000000..9d4ead2f7ebb
--- /dev/null
+++ b/drivers/crypto/amlogic/amlogic-gxl-core.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * amlgoic-core.c - hardware cryptographic offloader for Amlogic GXL SoC
+ *
+ * Copyright (C) 2018-2019 Corentin Labbe <clabbe@baylibre.com>
+ *
+ * Core file which registers crypto algorithms supported by the hardware.
+ */
+#include <linux/clk.h>
+#include <linux/crypto.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/dma-mapping.h>
+
+#include "amlogic-gxl.h"
+
+static irqreturn_t meson_irq_handler(int irq, void *data)
+{
+	struct meson_dev *mc = (struct meson_dev *)data;
+	int flow;
+	u32 p;
+
+	for (flow = 0; flow < MAXFLOW; flow++) {
+		if (mc->irqs[flow] == irq) {
+			p = readl(mc->base + ((0x04 + flow) << 2));
+			if (p) {
+				writel_relaxed(0xF, mc->base + ((0x4 + flow) << 2));
+				mc->chanlist[flow].status = 1;
+				complete(&mc->chanlist[flow].complete);
+				return IRQ_HANDLED;
+			}
+			dev_err(mc->dev, "%s %d Got irq for flow %d but ctrl is empty\n", __func__, irq, flow);
+		}
+	}
+
+	dev_err(mc->dev, "%s %d from unknown irq\n", __func__, irq);
+	return IRQ_HANDLED;
+}
+
+static struct meson_alg_template mc_algs[] = {
+{
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.blockmode = MESON_OPMODE_CBC,
+	.alg.skcipher = {
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "cbc-aes-gxl",
+			.cra_priority = 400,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			.cra_ctxsize = sizeof(struct meson_cipher_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 0xf,
+			.cra_init = meson_cipher_init,
+			.cra_exit = meson_cipher_exit,
+		},
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= meson_aes_setkey,
+		.encrypt	= meson_skencrypt,
+		.decrypt	= meson_skdecrypt,
+	}
+},
+{
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.blockmode = MESON_OPMODE_ECB,
+	.alg.skcipher = {
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "ecb-aes-gxl",
+			.cra_priority = 400,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			.cra_ctxsize = sizeof(struct meson_cipher_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 0xf,
+			.cra_init = meson_cipher_init,
+			.cra_exit = meson_cipher_exit,
+		},
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.setkey		= meson_aes_setkey,
+		.encrypt	= meson_skencrypt,
+		.decrypt	= meson_skdecrypt,
+	}
+},
+};
+
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+static int meson_dbgfs_read(struct seq_file *seq, void *v)
+{
+	struct meson_dev *mc = seq->private;
+	int i;
+
+	for (i = 0; i < MAXFLOW; i++)
+		seq_printf(seq, "Channel %d: nreq %lu\n", i, mc->chanlist[i].stat_req);
+
+	for (i = 0; i < ARRAY_SIZE(mc_algs); i++) {
+		switch (mc_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			seq_printf(seq, "%s %s %lu %lu\n",
+				   mc_algs[i].alg.skcipher.base.cra_driver_name,
+				   mc_algs[i].alg.skcipher.base.cra_name,
+				   mc_algs[i].stat_req, mc_algs[i].stat_fb);
+			break;
+		}
+	}
+	return 0;
+}
+
+static int meson_dbgfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, meson_dbgfs_read, inode->i_private);
+}
+
+static const struct file_operations meson_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = meson_dbgfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+#endif
+
+static void meson_free_chanlist(struct meson_dev *mc, int i)
+{
+	while (i >= 0) {
+		crypto_engine_exit(mc->chanlist[i].engine);
+		if (mc->chanlist[i].tl)
+			dma_free_coherent(mc->dev, sizeof(struct meson_desc) * MAXDESC,
+					  mc->chanlist[i].tl,
+					  mc->chanlist[i].t_phy);
+		i--;
+	}
+}
+
+/*
+ * Allocate the channel list structure
+ */
+static int meson_allocate_chanlist(struct meson_dev *mc)
+{
+	int i, err;
+
+	mc->chanlist = devm_kcalloc(mc->dev, MAXFLOW,
+				    sizeof(struct meson_flow), GFP_KERNEL);
+	if (!mc->chanlist)
+		return -ENOMEM;
+
+	for (i = 0; i < MAXFLOW; i++) {
+		init_completion(&mc->chanlist[i].complete);
+
+		mc->chanlist[i].engine = crypto_engine_alloc_init(mc->dev, true);
+		if (!mc->chanlist[i].engine) {
+			dev_err(mc->dev, "Cannot allocate engine\n");
+			i--;
+			err = -ENOMEM;
+			goto error_engine;
+		}
+		err = crypto_engine_start(mc->chanlist[i].engine);
+		if (err) {
+			dev_err(mc->dev, "Cannot start engine\n");
+			goto error_engine;
+		}
+		mc->chanlist[i].tl = dma_alloc_coherent(mc->dev,
+							sizeof(struct meson_desc) * MAXDESC,
+							&mc->chanlist[i].t_phy,
+							GFP_KERNEL);
+		if (!mc->chanlist[i].tl) {
+			err = -ENOMEM;
+			goto error_engine;
+		}
+	}
+	return 0;
+error_engine:
+	meson_free_chanlist(mc, i);
+	return err;
+}
+
+static int meson_register_algs(struct meson_dev *mc)
+{
+	int err, i;
+
+	for (i = 0; i < ARRAY_SIZE(mc_algs); i++) {
+		mc_algs[i].mc = mc;
+		switch (mc_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			err = crypto_register_skcipher(&mc_algs[i].alg.skcipher);
+			if (err) {
+				dev_err(mc->dev, "Fail to register %s\n",
+					mc_algs[i].alg.skcipher.base.cra_name);
+				mc_algs[i].mc = NULL;
+				return err;
+			}
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static void meson_unregister_algs(struct meson_dev *mc)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mc_algs); i++) {
+		if (!mc_algs[i].mc)
+			continue;
+		switch (mc_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			crypto_unregister_skcipher(&mc_algs[i].alg.skcipher);
+			break;
+		}
+	}
+}
+
+static int meson_crypto_probe(struct platform_device *pdev)
+{
+	struct meson_dev *mc;
+	int err, i;
+
+	if (!pdev->dev.of_node)
+		return -ENODEV;
+
+	mc = devm_kzalloc(&pdev->dev, sizeof(*mc), GFP_KERNEL);
+	if (!mc)
+		return -ENOMEM;
+
+	mc->dev = &pdev->dev;
+	platform_set_drvdata(pdev, mc);
+
+	mc->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(mc->base)) {
+		err = PTR_ERR(mc->base);
+		dev_err(&pdev->dev, "Cannot request MMIO err=%d\n", err);
+		return err;
+	}
+	mc->busclk = devm_clk_get(&pdev->dev, "blkmv");
+	if (IS_ERR(mc->busclk)) {
+		err = PTR_ERR(mc->busclk);
+		dev_err(&pdev->dev, "Cannot get core clock err=%d\n", err);
+		return err;
+	}
+
+	mc->irqs = devm_kcalloc(mc->dev, MAXFLOW, sizeof(int), GFP_KERNEL);
+	for (i = 0; i < MAXFLOW; i++) {
+		mc->irqs[i] = platform_get_irq(pdev, i);
+		if (mc->irqs[i] < 0) {
+			dev_err(mc->dev, "Cannot get IRQ for flow %d\n", i);
+			return mc->irqs[i];
+		}
+
+		err = devm_request_irq(&pdev->dev, mc->irqs[i], meson_irq_handler, 0,
+				       "gxl-crypto", mc);
+		if (err < 0) {
+			dev_err(mc->dev, "Cannot request IRQ for flow %d\n", i);
+			return err;
+		}
+	}
+
+	err = clk_prepare_enable(mc->busclk);
+	if (err != 0) {
+		dev_err(&pdev->dev, "Cannot prepare_enable busclk\n");
+		return err;
+	}
+
+	err = meson_allocate_chanlist(mc);
+	if (err)
+		goto error_flow;
+
+	err = meson_register_algs(mc);
+	if (err)
+		goto error_alg;
+
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+	mc->dbgfs_dir = debugfs_create_dir("gxl-crypto", NULL);
+	debugfs_create_file("stats", 0444, mc->dbgfs_dir, mc, &meson_debugfs_fops);
+#endif
+
+	return 0;
+error_alg:
+	meson_unregister_algs(mc);
+error_flow:
+	meson_free_chanlist(mc, MAXFLOW - 1);
+	clk_disable_unprepare(mc->busclk);
+	return err;
+}
+
+static int meson_crypto_remove(struct platform_device *pdev)
+{
+	struct meson_dev *mc = platform_get_drvdata(pdev);
+
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+	debugfs_remove_recursive(mc->dbgfs_dir);
+#endif
+
+	meson_unregister_algs(mc);
+
+	meson_free_chanlist(mc, MAXFLOW - 1);
+
+	clk_disable_unprepare(mc->busclk);
+	return 0;
+}
+
+static const struct of_device_id meson_crypto_of_match_table[] = {
+	{ .compatible = "amlogic,gxl-crypto", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, meson_crypto_of_match_table);
+
+static struct platform_driver meson_crypto_driver = {
+	.probe		 = meson_crypto_probe,
+	.remove		 = meson_crypto_remove,
+	.driver		 = {
+		.name		   = "gxl-crypto",
+		.of_match_table	= meson_crypto_of_match_table,
+	},
+};
+
+module_platform_driver(meson_crypto_driver);
+
+MODULE_DESCRIPTION("Amlogic GXL cryptographic offloader");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Corentin Labbe <clabbe@baylibre.com>");
diff --git a/drivers/crypto/amlogic/amlogic-gxl.h b/drivers/crypto/amlogic/amlogic-gxl.h
new file mode 100644
index 000000000000..b7f2de91ab76
--- /dev/null
+++ b/drivers/crypto/amlogic/amlogic-gxl.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * amlogic.h - hardware cryptographic offloader for Amlogic SoC
+ *
+ * Copyright (C) 2018-2019 Corentin LABBE <clabbe@baylibre.com>
+ */
+#include <crypto/aes.h>
+#include <crypto/engine.h>
+#include <crypto/skcipher.h>
+#include <linux/debugfs.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+
+#define MODE_KEY 1
+#define MODE_AES_128 0x8
+#define MODE_AES_192 0x9
+#define MODE_AES_256 0xa
+
+#define MESON_DECRYPT 0
+#define MESON_ENCRYPT 1
+
+#define MESON_OPMODE_ECB 0
+#define MESON_OPMODE_CBC 1
+
+#define MAXFLOW 2
+
+#define MAXDESC 64
+
+#define DESC_LAST BIT(18)
+#define DESC_ENCRYPTION BIT(28)
+#define DESC_OWN BIT(31)
+
+/*
+ * struct meson_desc - Descriptor for DMA operations
+ * Note that without datasheet, some are unknown
+ * @t_status:	Descriptor of the cipher operation (see description below)
+ * @t_src:	Physical address of data to read
+ * @t_dst:	Physical address of data to write
+ * t_status is segmented like this:
+ * @len:	0-16	length of data to operate
+ * @irq:	17	Ignored by hardware
+ * @eoc:	18	End means the descriptor is the last
+ * @loop:	19	Unknown
+ * @mode:	20-23	Type of algorithm (AES, SHA)
+ * @begin:	24	Unknown
+ * @end:	25	Unknown
+ * @op_mode:	26-27	Blockmode (CBC, ECB)
+ * @enc:	28	0 means decryption, 1 is for encryption
+ * @block:	29	Unknown
+ * @error:	30	Unknown
+ * @owner:	31	owner of the descriptor, 1 own by HW
+ */
+struct meson_desc {
+	__le32 t_status;
+	__le32 t_src;
+	__le32 t_dst;
+};
+
+/*
+ * struct meson_flow - Information used by each flow
+ * @engine:	ptr to the crypto_engine for this flow
+ * @keylen:	keylen for this flow operation
+ * @complete:	completion for the current task on this flow
+ * @status:	set to 1 by interrupt if task is done
+ * @t_phy:	Physical address of task
+ * @tl:		pointer to the current ce_task for this flow
+ * @stat_req:	number of request done by this flow
+ */
+struct meson_flow {
+	struct crypto_engine *engine;
+	struct completion complete;
+	int status;
+	unsigned int keylen;
+	dma_addr_t t_phy;
+	struct meson_desc *tl;
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+	unsigned long stat_req;
+#endif
+};
+
+/*
+ * struct meson_dev - main container for all this driver information
+ * @base:	base address of amlogic-crypto
+ * @busclk:	bus clock for amlogic-crypto
+ * @dev:	the platform device
+ * @chanlist:	array of all flow
+ * @flow:	flow to use in next request
+ * @irqs:	IRQ numbers for amlogic-crypto
+ * @dbgfs_dir:	Debugfs dentry for statistic directory
+ * @dbgfs_stats: Debugfs dentry for statistic counters
+ */
+struct meson_dev {
+	void __iomem *base;
+	struct clk *busclk;
+	struct device *dev;
+	struct meson_flow *chanlist;
+	atomic_t flow;
+	int *irqs;
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+	struct dentry *dbgfs_dir;
+#endif
+};
+
+/*
+ * struct meson_cipher_req_ctx - context for a skcipher request
+ * @op_dir:	direction (encrypt vs decrypt) for this request
+ * @flow:	the flow to use for this request
+ */
+struct meson_cipher_req_ctx {
+	u32 op_dir;
+	int flow;
+};
+
+/*
+ * struct meson_cipher_tfm_ctx - context for a skcipher TFM
+ * @enginectx:		crypto_engine used by this TFM
+ * @key:		pointer to key data
+ * @keylen:		len of the key
+ * @keymode:		The keymode(type and size of key) associated with this TFM
+ * @mc:			pointer to the private data of driver handling this TFM
+ * @fallback_tfm:	pointer to the fallback TFM
+ */
+struct meson_cipher_tfm_ctx {
+	struct crypto_engine_ctx enginectx;
+	u32 *key;
+	u32 keylen;
+	u32 keymode;
+	struct meson_dev *mc;
+	struct crypto_sync_skcipher *fallback_tfm;
+};
+
+/*
+ * struct meson_alg_template - crypto_alg template
+ * @type:		the CRYPTO_ALG_TYPE for this template
+ * @blockmode:		the type of block operation
+ * @mc:			pointer to the meson_dev structure associated with this template
+ * @alg:		one of sub struct must be used
+ * @stat_req:		number of request done on this template
+ * @stat_fb:		total of all data len done on this template
+ */
+struct meson_alg_template {
+	u32 type;
+	u32 blockmode;
+	union {
+		struct skcipher_alg skcipher;
+	} alg;
+	struct meson_dev *mc;
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+	unsigned long stat_req;
+	unsigned long stat_fb;
+#endif
+};
+
+int meson_enqueue(struct crypto_async_request *areq, u32 type);
+
+int meson_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+		     unsigned int keylen);
+int meson_cipher_init(struct crypto_tfm *tfm);
+void meson_cipher_exit(struct crypto_tfm *tfm);
+int meson_skdecrypt(struct skcipher_request *areq);
+int meson_skencrypt(struct skcipher_request *areq);
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 026f193556f9..a6e14491e080 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -21,6 +21,7 @@
 #include <linux/platform_device.h>
 
 #include <linux/device.h>
+#include <linux/dmaengine.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
@@ -36,8 +37,7 @@
 #include <crypto/gcm.h>
 #include <crypto/xts.h>
 #include <crypto/internal/aead.h>
-#include <linux/platform_data/crypto-atmel.h>
-#include <dt-bindings/dma/at91.h>
+#include <crypto/internal/skcipher.h>
 #include "atmel-aes-regs.h"
 #include "atmel-authenc.h"
 
@@ -88,7 +88,6 @@
 struct atmel_aes_caps {
 	bool			has_dualbuff;
 	bool			has_cfb64;
-	bool			has_ctr32;
 	bool			has_gcm;
 	bool			has_xts;
 	bool			has_authenc;
@@ -117,10 +116,11 @@ struct atmel_aes_ctx {
 struct atmel_aes_ctr_ctx {
 	struct atmel_aes_base_ctx	base;
 
-	u32			iv[AES_BLOCK_SIZE / sizeof(u32)];
+	__be32			iv[AES_BLOCK_SIZE / sizeof(u32)];
 	size_t			offset;
 	struct scatterlist	src[2];
 	struct scatterlist	dst[2];
+	u32			blocks;
 };
 
 struct atmel_aes_gcm_ctx {
@@ -129,13 +129,13 @@ struct atmel_aes_gcm_ctx {
 	struct scatterlist	src[2];
 	struct scatterlist	dst[2];
 
-	u32			j0[AES_BLOCK_SIZE / sizeof(u32)];
+	__be32			j0[AES_BLOCK_SIZE / sizeof(u32)];
 	u32			tag[AES_BLOCK_SIZE / sizeof(u32)];
-	u32			ghash[AES_BLOCK_SIZE / sizeof(u32)];
+	__be32			ghash[AES_BLOCK_SIZE / sizeof(u32)];
 	size_t			textlen;
 
-	const u32		*ghash_in;
-	u32			*ghash_out;
+	const __be32		*ghash_in;
+	__be32			*ghash_out;
 	atmel_aes_fn_t		ghash_resume;
 };
 
@@ -145,7 +145,7 @@ struct atmel_aes_xts_ctx {
 	u32			key2[AES_KEYSIZE_256 / sizeof(u32)];
 };
 
-#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
 struct atmel_aes_authenc_ctx {
 	struct atmel_aes_base_ctx	base;
 	struct atmel_sha_authenc_ctx	*auth;
@@ -154,10 +154,10 @@ struct atmel_aes_authenc_ctx {
 
 struct atmel_aes_reqctx {
 	unsigned long		mode;
-	u32			lastc[AES_BLOCK_SIZE / sizeof(u32)];
+	u8			lastc[AES_BLOCK_SIZE];
 };
 
-#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
 struct atmel_aes_authenc_reqctx {
 	struct atmel_aes_reqctx	base;
 
@@ -388,13 +388,13 @@ static void atmel_aes_write_n(struct atmel_aes_dev *dd, u32 offset,
 }
 
 static inline void atmel_aes_read_block(struct atmel_aes_dev *dd, u32 offset,
-					u32 *value)
+					void *value)
 {
 	atmel_aes_read_n(dd, offset, value, SIZE_IN_WORDS(AES_BLOCK_SIZE));
 }
 
 static inline void atmel_aes_write_block(struct atmel_aes_dev *dd, u32 offset,
-					 const u32 *value)
+					 const void *value)
 {
 	atmel_aes_write_n(dd, offset, value, SIZE_IN_WORDS(AES_BLOCK_SIZE));
 }
@@ -486,13 +486,65 @@ static inline bool atmel_aes_is_encrypt(const struct atmel_aes_dev *dd)
 	return (dd->flags & AES_FLAGS_ENCRYPT);
 }
 
-#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
 static void atmel_aes_authenc_complete(struct atmel_aes_dev *dd, int err);
 #endif
 
+static void atmel_aes_set_iv_as_last_ciphertext_block(struct atmel_aes_dev *dd)
+{
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
+
+	if (req->cryptlen < ivsize)
+		return;
+
+	if (rctx->mode & AES_FLAGS_ENCRYPT) {
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 req->cryptlen - ivsize, ivsize, 0);
+	} else {
+		if (req->src == req->dst)
+			memcpy(req->iv, rctx->lastc, ivsize);
+		else
+			scatterwalk_map_and_copy(req->iv, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
+	}
+}
+
+static inline struct atmel_aes_ctr_ctx *
+atmel_aes_ctr_ctx_cast(struct atmel_aes_base_ctx *ctx)
+{
+	return container_of(ctx, struct atmel_aes_ctr_ctx, base);
+}
+
+static void atmel_aes_ctr_update_req_iv(struct atmel_aes_dev *dd)
+{
+	struct atmel_aes_ctr_ctx *ctx = atmel_aes_ctr_ctx_cast(dd->ctx);
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
+	int i;
+
+	/*
+	 * The CTR transfer works in fragments of data of maximum 1 MByte
+	 * because of the 16 bit CTR counter embedded in the IP. When reaching
+	 * here, ctx->blocks contains the number of blocks of the last fragment
+	 * processed, there is no need to explicit cast it to u16.
+	 */
+	for (i = 0; i < ctx->blocks; i++)
+		crypto_inc((u8 *)ctx->iv, AES_BLOCK_SIZE);
+
+	memcpy(req->iv, ctx->iv, ivsize);
+}
+
 static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
 {
-#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req);
+
+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
 	if (dd->ctx->is_aead)
 		atmel_aes_authenc_complete(dd, err);
 #endif
@@ -500,25 +552,12 @@ static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
 	clk_disable(dd->iclk);
 	dd->flags &= ~AES_FLAGS_BUSY;
 
-	if (!dd->ctx->is_aead) {
-		struct ablkcipher_request *req =
-			ablkcipher_request_cast(dd->areq);
-		struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req);
-		struct crypto_ablkcipher *ablkcipher =
-			crypto_ablkcipher_reqtfm(req);
-		int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-
-		if (rctx->mode & AES_FLAGS_ENCRYPT) {
-			scatterwalk_map_and_copy(req->info, req->dst,
-				req->nbytes - ivsize, ivsize, 0);
-		} else {
-			if (req->src == req->dst) {
-				memcpy(req->info, rctx->lastc, ivsize);
-			} else {
-				scatterwalk_map_and_copy(req->info, req->src,
-					req->nbytes - ivsize, ivsize, 0);
-			}
-		}
+	if (!err && !dd->ctx->is_aead &&
+	    (rctx->mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_ECB) {
+		if ((rctx->mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_CTR)
+			atmel_aes_set_iv_as_last_ciphertext_block(dd);
+		else
+			atmel_aes_ctr_update_req_iv(dd);
 	}
 
 	if (dd->is_async)
@@ -530,7 +569,7 @@ static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
 }
 
 static void atmel_aes_write_ctrl_key(struct atmel_aes_dev *dd, bool use_dma,
-				     const u32 *iv, const u32 *key, int keylen)
+				     const __be32 *iv, const u32 *key, int keylen)
 {
 	u32 valmr = 0;
 
@@ -561,7 +600,7 @@ static void atmel_aes_write_ctrl_key(struct atmel_aes_dev *dd, bool use_dma,
 }
 
 static inline void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
-					const u32 *iv)
+					const __be32 *iv)
 
 {
 	atmel_aes_write_ctrl_key(dd, use_dma, iv,
@@ -784,7 +823,6 @@ static int atmel_aes_dma_transfer_start(struct atmel_aes_dev *dd,
 	int err;
 
 	memset(&config, 0, sizeof(config));
-	config.direction = dir;
 	config.src_addr_width = addr_width;
 	config.dst_addr_width = addr_width;
 	config.src_maxburst = maxburst;
@@ -824,27 +862,6 @@ static int atmel_aes_dma_transfer_start(struct atmel_aes_dev *dd,
 	return 0;
 }
 
-static void atmel_aes_dma_transfer_stop(struct atmel_aes_dev *dd,
-					enum dma_transfer_direction dir)
-{
-	struct atmel_aes_dma *dma;
-
-	switch (dir) {
-	case DMA_MEM_TO_DEV:
-		dma = &dd->src;
-		break;
-
-	case DMA_DEV_TO_MEM:
-		dma = &dd->dst;
-		break;
-
-	default:
-		return;
-	}
-
-	dmaengine_terminate_all(dma->chan);
-}
-
 static int atmel_aes_dma_start(struct atmel_aes_dev *dd,
 			       struct scatterlist *src,
 			       struct scatterlist *dst,
@@ -903,25 +920,18 @@ static int atmel_aes_dma_start(struct atmel_aes_dev *dd,
 	return -EINPROGRESS;
 
 output_transfer_stop:
-	atmel_aes_dma_transfer_stop(dd, DMA_DEV_TO_MEM);
+	dmaengine_terminate_sync(dd->dst.chan);
 unmap:
 	atmel_aes_unmap(dd);
 exit:
 	return atmel_aes_complete(dd, err);
 }
 
-static void atmel_aes_dma_stop(struct atmel_aes_dev *dd)
-{
-	atmel_aes_dma_transfer_stop(dd, DMA_MEM_TO_DEV);
-	atmel_aes_dma_transfer_stop(dd, DMA_DEV_TO_MEM);
-	atmel_aes_unmap(dd);
-}
-
 static void atmel_aes_dma_callback(void *data)
 {
 	struct atmel_aes_dev *dd = data;
 
-	atmel_aes_dma_stop(dd);
+	atmel_aes_unmap(dd);
 	dd->is_async = true;
 	(void)dd->resume(dd);
 }
@@ -976,9 +986,9 @@ static int atmel_aes_transfer_complete(struct atmel_aes_dev *dd)
 
 static int atmel_aes_start(struct atmel_aes_dev *dd)
 {
-	struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
-	struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req);
-	bool use_dma = (req->nbytes >= ATMEL_AES_DMA_THRESHOLD ||
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req);
+	bool use_dma = (req->cryptlen >= ATMEL_AES_DMA_THRESHOLD ||
 			dd->ctx->block_size != AES_BLOCK_SIZE);
 	int err;
 
@@ -988,60 +998,46 @@ static int atmel_aes_start(struct atmel_aes_dev *dd)
 	if (err)
 		return atmel_aes_complete(dd, err);
 
-	atmel_aes_write_ctrl(dd, use_dma, req->info);
+	atmel_aes_write_ctrl(dd, use_dma, (void *)req->iv);
 	if (use_dma)
-		return atmel_aes_dma_start(dd, req->src, req->dst, req->nbytes,
+		return atmel_aes_dma_start(dd, req->src, req->dst,
+					   req->cryptlen,
 					   atmel_aes_transfer_complete);
 
-	return atmel_aes_cpu_start(dd, req->src, req->dst, req->nbytes,
+	return atmel_aes_cpu_start(dd, req->src, req->dst, req->cryptlen,
 				   atmel_aes_transfer_complete);
 }
 
-static inline struct atmel_aes_ctr_ctx *
-atmel_aes_ctr_ctx_cast(struct atmel_aes_base_ctx *ctx)
-{
-	return container_of(ctx, struct atmel_aes_ctr_ctx, base);
-}
-
 static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd)
 {
 	struct atmel_aes_ctr_ctx *ctx = atmel_aes_ctr_ctx_cast(dd->ctx);
-	struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
 	struct scatterlist *src, *dst;
-	u32 ctr, blocks;
 	size_t datalen;
+	u32 ctr;
+	u16 start, end;
 	bool use_dma, fragmented = false;
 
 	/* Check for transfer completion. */
 	ctx->offset += dd->total;
-	if (ctx->offset >= req->nbytes)
+	if (ctx->offset >= req->cryptlen)
 		return atmel_aes_transfer_complete(dd);
 
 	/* Compute data length. */
-	datalen = req->nbytes - ctx->offset;
-	blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE);
+	datalen = req->cryptlen - ctx->offset;
+	ctx->blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE);
 	ctr = be32_to_cpu(ctx->iv[3]);
-	if (dd->caps.has_ctr32) {
-		/* Check 32bit counter overflow. */
-		u32 start = ctr;
-		u32 end = start + blocks - 1;
-
-		if (end < start) {
-			ctr |= 0xffffffff;
-			datalen = AES_BLOCK_SIZE * -start;
-			fragmented = true;
-		}
-	} else {
-		/* Check 16bit counter overflow. */
-		u16 start = ctr & 0xffff;
-		u16 end = start + (u16)blocks - 1;
-
-		if (blocks >> 16 || end < start) {
-			ctr |= 0xffff;
-			datalen = AES_BLOCK_SIZE * (0x10000-start);
-			fragmented = true;
-		}
+
+	/* Check 16bit counter overflow. */
+	start = ctr & 0xffff;
+	end = start + ctx->blocks - 1;
+
+	if (ctx->blocks >> 16 || end < start) {
+		ctr |= 0xffff;
+		datalen = AES_BLOCK_SIZE * (0x10000 - start);
+		fragmented = true;
 	}
+
 	use_dma = (datalen >= ATMEL_AES_DMA_THRESHOLD);
 
 	/* Jump to offset. */
@@ -1071,8 +1067,8 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd)
 static int atmel_aes_ctr_start(struct atmel_aes_dev *dd)
 {
 	struct atmel_aes_ctr_ctx *ctx = atmel_aes_ctr_ctx_cast(dd->ctx);
-	struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
-	struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req);
 	int err;
 
 	atmel_aes_set_mode(dd, rctx);
@@ -1081,16 +1077,16 @@ static int atmel_aes_ctr_start(struct atmel_aes_dev *dd)
 	if (err)
 		return atmel_aes_complete(dd, err);
 
-	memcpy(ctx->iv, req->info, AES_BLOCK_SIZE);
+	memcpy(ctx->iv, req->iv, AES_BLOCK_SIZE);
 	ctx->offset = 0;
 	dd->total = 0;
 	return atmel_aes_ctr_transfer(dd);
 }
 
-static int atmel_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
+static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
 {
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct atmel_aes_base_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct atmel_aes_base_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct atmel_aes_reqctx *rctx;
 	struct atmel_aes_dev *dd;
 
@@ -1121,30 +1117,31 @@ static int atmel_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 	if (!dd)
 		return -ENODEV;
 
-	rctx = ablkcipher_request_ctx(req);
+	rctx = skcipher_request_ctx(req);
 	rctx->mode = mode;
 
-	if (!(mode & AES_FLAGS_ENCRYPT) && (req->src == req->dst)) {
-		int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	if ((mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_ECB &&
+	    !(mode & AES_FLAGS_ENCRYPT) && req->src == req->dst) {
+		unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
 
-		scatterwalk_map_and_copy(rctx->lastc, req->src,
-			(req->nbytes - ivsize), ivsize, 0);
+		if (req->cryptlen >= ivsize)
+			scatterwalk_map_and_copy(rctx->lastc, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
 	}
 
 	return atmel_aes_handle_queue(dd, &req->base);
 }
 
-static int atmel_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int atmel_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			   unsigned int keylen)
 {
-	struct atmel_aes_base_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct atmel_aes_base_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	if (keylen != AES_KEYSIZE_128 &&
 	    keylen != AES_KEYSIZE_192 &&
-	    keylen != AES_KEYSIZE_256) {
-		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	    keylen != AES_KEYSIZE_256)
 		return -EINVAL;
-	}
 
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
@@ -1152,297 +1149,243 @@ static int atmel_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	return 0;
 }
 
-static int atmel_aes_ecb_encrypt(struct ablkcipher_request *req)
+static int atmel_aes_ecb_encrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_ECB | AES_FLAGS_ENCRYPT);
 }
 
-static int atmel_aes_ecb_decrypt(struct ablkcipher_request *req)
+static int atmel_aes_ecb_decrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_ECB);
 }
 
-static int atmel_aes_cbc_encrypt(struct ablkcipher_request *req)
+static int atmel_aes_cbc_encrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CBC | AES_FLAGS_ENCRYPT);
 }
 
-static int atmel_aes_cbc_decrypt(struct ablkcipher_request *req)
+static int atmel_aes_cbc_decrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CBC);
 }
 
-static int atmel_aes_ofb_encrypt(struct ablkcipher_request *req)
+static int atmel_aes_ofb_encrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_OFB | AES_FLAGS_ENCRYPT);
 }
 
-static int atmel_aes_ofb_decrypt(struct ablkcipher_request *req)
+static int atmel_aes_ofb_decrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_OFB);
 }
 
-static int atmel_aes_cfb_encrypt(struct ablkcipher_request *req)
+static int atmel_aes_cfb_encrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CFB128 | AES_FLAGS_ENCRYPT);
 }
 
-static int atmel_aes_cfb_decrypt(struct ablkcipher_request *req)
+static int atmel_aes_cfb_decrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CFB128);
 }
 
-static int atmel_aes_cfb64_encrypt(struct ablkcipher_request *req)
+static int atmel_aes_cfb64_encrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CFB64 | AES_FLAGS_ENCRYPT);
 }
 
-static int atmel_aes_cfb64_decrypt(struct ablkcipher_request *req)
+static int atmel_aes_cfb64_decrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CFB64);
 }
 
-static int atmel_aes_cfb32_encrypt(struct ablkcipher_request *req)
+static int atmel_aes_cfb32_encrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CFB32 | AES_FLAGS_ENCRYPT);
 }
 
-static int atmel_aes_cfb32_decrypt(struct ablkcipher_request *req)
+static int atmel_aes_cfb32_decrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CFB32);
 }
 
-static int atmel_aes_cfb16_encrypt(struct ablkcipher_request *req)
+static int atmel_aes_cfb16_encrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CFB16 | AES_FLAGS_ENCRYPT);
 }
 
-static int atmel_aes_cfb16_decrypt(struct ablkcipher_request *req)
+static int atmel_aes_cfb16_decrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CFB16);
 }
 
-static int atmel_aes_cfb8_encrypt(struct ablkcipher_request *req)
+static int atmel_aes_cfb8_encrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CFB8 | AES_FLAGS_ENCRYPT);
 }
 
-static int atmel_aes_cfb8_decrypt(struct ablkcipher_request *req)
+static int atmel_aes_cfb8_decrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CFB8);
 }
 
-static int atmel_aes_ctr_encrypt(struct ablkcipher_request *req)
+static int atmel_aes_ctr_encrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CTR | AES_FLAGS_ENCRYPT);
 }
 
-static int atmel_aes_ctr_decrypt(struct ablkcipher_request *req)
+static int atmel_aes_ctr_decrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_CTR);
 }
 
-static int atmel_aes_cra_init(struct crypto_tfm *tfm)
+static int atmel_aes_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct atmel_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_aes_reqctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
 	ctx->base.start = atmel_aes_start;
 
 	return 0;
 }
 
-static int atmel_aes_ctr_cra_init(struct crypto_tfm *tfm)
+static int atmel_aes_ctr_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct atmel_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_aes_reqctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
 	ctx->base.start = atmel_aes_ctr_start;
 
 	return 0;
 }
 
-static struct crypto_alg aes_algs[] = {
-{
-	.cra_name		= "ecb(aes)",
-	.cra_driver_name	= "atmel-ecb-aes",
-	.cra_priority		= ATMEL_AES_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_aes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.setkey		= atmel_aes_setkey,
-		.encrypt	= atmel_aes_ecb_encrypt,
-		.decrypt	= atmel_aes_ecb_decrypt,
-	}
+static struct skcipher_alg aes_algs[] = {
+{
+	.base.cra_name		= "ecb(aes)",
+	.base.cra_driver_name	= "atmel-ecb-aes",
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct atmel_aes_ctx),
+
+	.init			= atmel_aes_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= atmel_aes_setkey,
+	.encrypt		= atmel_aes_ecb_encrypt,
+	.decrypt		= atmel_aes_ecb_decrypt,
 },
 {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "atmel-cbc-aes",
-	.cra_priority		= ATMEL_AES_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_aes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= atmel_aes_setkey,
-		.encrypt	= atmel_aes_cbc_encrypt,
-		.decrypt	= atmel_aes_cbc_decrypt,
-	}
+	.base.cra_name		= "cbc(aes)",
+	.base.cra_driver_name	= "atmel-cbc-aes",
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct atmel_aes_ctx),
+
+	.init			= atmel_aes_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= atmel_aes_setkey,
+	.encrypt		= atmel_aes_cbc_encrypt,
+	.decrypt		= atmel_aes_cbc_decrypt,
+	.ivsize			= AES_BLOCK_SIZE,
 },
 {
-	.cra_name		= "ofb(aes)",
-	.cra_driver_name	= "atmel-ofb-aes",
-	.cra_priority		= ATMEL_AES_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_aes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= atmel_aes_setkey,
-		.encrypt	= atmel_aes_ofb_encrypt,
-		.decrypt	= atmel_aes_ofb_decrypt,
-	}
+	.base.cra_name		= "ofb(aes)",
+	.base.cra_driver_name	= "atmel-ofb-aes",
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct atmel_aes_ctx),
+
+	.init			= atmel_aes_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= atmel_aes_setkey,
+	.encrypt		= atmel_aes_ofb_encrypt,
+	.decrypt		= atmel_aes_ofb_decrypt,
+	.ivsize			= AES_BLOCK_SIZE,
 },
 {
-	.cra_name		= "cfb(aes)",
-	.cra_driver_name	= "atmel-cfb-aes",
-	.cra_priority		= ATMEL_AES_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_aes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= atmel_aes_setkey,
-		.encrypt	= atmel_aes_cfb_encrypt,
-		.decrypt	= atmel_aes_cfb_decrypt,
-	}
+	.base.cra_name		= "cfb(aes)",
+	.base.cra_driver_name	= "atmel-cfb-aes",
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct atmel_aes_ctx),
+
+	.init			= atmel_aes_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= atmel_aes_setkey,
+	.encrypt		= atmel_aes_cfb_encrypt,
+	.decrypt		= atmel_aes_cfb_decrypt,
+	.ivsize			= AES_BLOCK_SIZE,
 },
 {
-	.cra_name		= "cfb32(aes)",
-	.cra_driver_name	= "atmel-cfb32-aes",
-	.cra_priority		= ATMEL_AES_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CFB32_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
-	.cra_alignmask		= 0x3,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_aes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= atmel_aes_setkey,
-		.encrypt	= atmel_aes_cfb32_encrypt,
-		.decrypt	= atmel_aes_cfb32_decrypt,
-	}
+	.base.cra_name		= "cfb32(aes)",
+	.base.cra_driver_name	= "atmel-cfb32-aes",
+	.base.cra_blocksize	= CFB32_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct atmel_aes_ctx),
+
+	.init			= atmel_aes_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= atmel_aes_setkey,
+	.encrypt		= atmel_aes_cfb32_encrypt,
+	.decrypt		= atmel_aes_cfb32_decrypt,
+	.ivsize			= AES_BLOCK_SIZE,
 },
 {
-	.cra_name		= "cfb16(aes)",
-	.cra_driver_name	= "atmel-cfb16-aes",
-	.cra_priority		= ATMEL_AES_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CFB16_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
-	.cra_alignmask		= 0x1,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_aes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= atmel_aes_setkey,
-		.encrypt	= atmel_aes_cfb16_encrypt,
-		.decrypt	= atmel_aes_cfb16_decrypt,
-	}
+	.base.cra_name		= "cfb16(aes)",
+	.base.cra_driver_name	= "atmel-cfb16-aes",
+	.base.cra_blocksize	= CFB16_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct atmel_aes_ctx),
+
+	.init			= atmel_aes_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= atmel_aes_setkey,
+	.encrypt		= atmel_aes_cfb16_encrypt,
+	.decrypt		= atmel_aes_cfb16_decrypt,
+	.ivsize			= AES_BLOCK_SIZE,
 },
 {
-	.cra_name		= "cfb8(aes)",
-	.cra_driver_name	= "atmel-cfb8-aes",
-	.cra_priority		= ATMEL_AES_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CFB8_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
-	.cra_alignmask		= 0x0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_aes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= atmel_aes_setkey,
-		.encrypt	= atmel_aes_cfb8_encrypt,
-		.decrypt	= atmel_aes_cfb8_decrypt,
-	}
+	.base.cra_name		= "cfb8(aes)",
+	.base.cra_driver_name	= "atmel-cfb8-aes",
+	.base.cra_blocksize	= CFB8_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct atmel_aes_ctx),
+
+	.init			= atmel_aes_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= atmel_aes_setkey,
+	.encrypt		= atmel_aes_cfb8_encrypt,
+	.decrypt		= atmel_aes_cfb8_decrypt,
+	.ivsize			= AES_BLOCK_SIZE,
 },
 {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "atmel-ctr-aes",
-	.cra_priority		= ATMEL_AES_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct atmel_aes_ctr_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_aes_ctr_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= atmel_aes_setkey,
-		.encrypt	= atmel_aes_ctr_encrypt,
-		.decrypt	= atmel_aes_ctr_decrypt,
-	}
+	.base.cra_name		= "ctr(aes)",
+	.base.cra_driver_name	= "atmel-ctr-aes",
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct atmel_aes_ctr_ctx),
+
+	.init			= atmel_aes_ctr_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= atmel_aes_setkey,
+	.encrypt		= atmel_aes_ctr_encrypt,
+	.decrypt		= atmel_aes_ctr_decrypt,
+	.ivsize			= AES_BLOCK_SIZE,
 },
 };
 
-static struct crypto_alg aes_cfb64_alg = {
-	.cra_name		= "cfb64(aes)",
-	.cra_driver_name	= "atmel-cfb64-aes",
-	.cra_priority		= ATMEL_AES_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CFB64_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_aes_ctx),
-	.cra_alignmask		= 0x7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_aes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= atmel_aes_setkey,
-		.encrypt	= atmel_aes_cfb64_encrypt,
-		.decrypt	= atmel_aes_cfb64_decrypt,
-	}
+static struct skcipher_alg aes_cfb64_alg = {
+	.base.cra_name		= "cfb64(aes)",
+	.base.cra_driver_name	= "atmel-cfb64-aes",
+	.base.cra_blocksize	= CFB64_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct atmel_aes_ctx),
+
+	.init			= atmel_aes_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= atmel_aes_setkey,
+	.encrypt		= atmel_aes_cfb64_encrypt,
+	.decrypt		= atmel_aes_cfb64_decrypt,
+	.ivsize			= AES_BLOCK_SIZE,
 };
 
 
@@ -1450,7 +1393,7 @@ static struct crypto_alg aes_cfb64_alg = {
 
 static int atmel_aes_gcm_ghash(struct atmel_aes_dev *dd,
 			       const u32 *data, size_t datalen,
-			       const u32 *ghash_in, u32 *ghash_out,
+			       const __be32 *ghash_in, __be32 *ghash_out,
 			       atmel_aes_fn_t resume);
 static int atmel_aes_gcm_ghash_init(struct atmel_aes_dev *dd);
 static int atmel_aes_gcm_ghash_finalize(struct atmel_aes_dev *dd);
@@ -1471,7 +1414,7 @@ atmel_aes_gcm_ctx_cast(struct atmel_aes_base_ctx *ctx)
 
 static int atmel_aes_gcm_ghash(struct atmel_aes_dev *dd,
 			       const u32 *data, size_t datalen,
-			       const u32 *ghash_in, u32 *ghash_out,
+			       const __be32 *ghash_in, __be32 *ghash_out,
 			       atmel_aes_fn_t resume)
 {
 	struct atmel_aes_gcm_ctx *ctx = atmel_aes_gcm_ctx_cast(dd->ctx);
@@ -1558,7 +1501,7 @@ static int atmel_aes_gcm_start(struct atmel_aes_dev *dd)
 
 	memcpy(data, iv, ivsize);
 	memset(data + ivsize, 0, padlen + sizeof(u64));
-	((u64 *)(data + datalen))[-1] = cpu_to_be64(ivsize * 8);
+	((__be64 *)(data + datalen))[-1] = cpu_to_be64(ivsize * 8);
 
 	return atmel_aes_gcm_ghash(dd, (const u32 *)data, datalen,
 				   NULL, ctx->j0, atmel_aes_gcm_process);
@@ -1591,7 +1534,7 @@ static int atmel_aes_gcm_length(struct atmel_aes_dev *dd)
 {
 	struct atmel_aes_gcm_ctx *ctx = atmel_aes_gcm_ctx_cast(dd->ctx);
 	struct aead_request *req = aead_request_cast(dd->areq);
-	u32 j0_lsw, *j0 = ctx->j0;
+	__be32 j0_lsw, *j0 = ctx->j0;
 	size_t padlen;
 
 	/* Write incr32(J0) into IV. */
@@ -1674,7 +1617,7 @@ static int atmel_aes_gcm_tag_init(struct atmel_aes_dev *dd)
 {
 	struct atmel_aes_gcm_ctx *ctx = atmel_aes_gcm_ctx_cast(dd->ctx);
 	struct aead_request *req = aead_request_cast(dd->areq);
-	u64 *data = dd->buf;
+	__be64 *data = dd->buf;
 
 	if (likely(dd->flags & AES_FLAGS_GTAGEN)) {
 		if (!(atmel_aes_read(dd, AES_ISR) & AES_INT_TAGRDY)) {
@@ -1771,10 +1714,8 @@ static int atmel_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
 
 	if (keylen != AES_KEYSIZE_256 &&
 	    keylen != AES_KEYSIZE_192 &&
-	    keylen != AES_KEYSIZE_128) {
-		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	    keylen != AES_KEYSIZE_128)
 		return -EINVAL;
-	}
 
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
@@ -1785,21 +1726,7 @@ static int atmel_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
 static int atmel_aes_gcm_setauthsize(struct crypto_aead *tfm,
 				     unsigned int authsize)
 {
-	/* Same as crypto_gcm_authsize() from crypto/gcm.c */
-	switch (authsize) {
-	case 4:
-	case 8:
-	case 12:
-	case 13:
-	case 14:
-	case 15:
-	case 16:
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
+	return crypto_gcm_check_authsize(authsize);
 }
 
 static int atmel_aes_gcm_encrypt(struct aead_request *req)
@@ -1834,12 +1761,8 @@ static struct aead_alg aes_gcm_alg = {
 	.base = {
 		.cra_name		= "gcm(aes)",
 		.cra_driver_name	= "atmel-gcm-aes",
-		.cra_priority		= ATMEL_AES_PRIORITY,
-		.cra_flags		= CRYPTO_ALG_ASYNC,
 		.cra_blocksize		= 1,
 		.cra_ctxsize		= sizeof(struct atmel_aes_gcm_ctx),
-		.cra_alignmask		= 0xf,
-		.cra_module		= THIS_MODULE,
 	},
 };
 
@@ -1857,8 +1780,8 @@ static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd);
 static int atmel_aes_xts_start(struct atmel_aes_dev *dd)
 {
 	struct atmel_aes_xts_ctx *ctx = atmel_aes_xts_ctx_cast(dd->ctx);
-	struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
-	struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req);
 	unsigned long flags;
 	int err;
 
@@ -1868,7 +1791,7 @@ static int atmel_aes_xts_start(struct atmel_aes_dev *dd)
 	if (err)
 		return atmel_aes_complete(dd, err);
 
-	/* Compute the tweak value from req->info with ecb(aes). */
+	/* Compute the tweak value from req->iv with ecb(aes). */
 	flags = dd->flags;
 	dd->flags &= ~AES_FLAGS_MODE_MASK;
 	dd->flags |= (AES_FLAGS_ECB | AES_FLAGS_ENCRYPT);
@@ -1876,16 +1799,16 @@ static int atmel_aes_xts_start(struct atmel_aes_dev *dd)
 				 ctx->key2, ctx->base.keylen);
 	dd->flags = flags;
 
-	atmel_aes_write_block(dd, AES_IDATAR(0), req->info);
+	atmel_aes_write_block(dd, AES_IDATAR(0), req->iv);
 	return atmel_aes_wait_for_data_ready(dd, atmel_aes_xts_process_data);
 }
 
 static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd)
 {
-	struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
-	bool use_dma = (req->nbytes >= ATMEL_AES_DMA_THRESHOLD);
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	bool use_dma = (req->cryptlen >= ATMEL_AES_DMA_THRESHOLD);
 	u32 tweak[AES_BLOCK_SIZE / sizeof(u32)];
-	static const u32 one[AES_BLOCK_SIZE / sizeof(u32)] = {cpu_to_le32(1), };
+	static const __le32 one[AES_BLOCK_SIZE / sizeof(u32)] = {cpu_to_le32(1), };
 	u8 *tweak_bytes = (u8 *)tweak;
 	int i;
 
@@ -1908,20 +1831,21 @@ static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd)
 	atmel_aes_write_block(dd, AES_TWR(0), tweak);
 	atmel_aes_write_block(dd, AES_ALPHAR(0), one);
 	if (use_dma)
-		return atmel_aes_dma_start(dd, req->src, req->dst, req->nbytes,
+		return atmel_aes_dma_start(dd, req->src, req->dst,
+					   req->cryptlen,
 					   atmel_aes_transfer_complete);
 
-	return atmel_aes_cpu_start(dd, req->src, req->dst, req->nbytes,
+	return atmel_aes_cpu_start(dd, req->src, req->dst, req->cryptlen,
 				   atmel_aes_transfer_complete);
 }
 
-static int atmel_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int atmel_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
 				unsigned int keylen)
 {
-	struct atmel_aes_xts_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err;
 
-	err = xts_check_key(crypto_ablkcipher_tfm(tfm), key, keylen);
+	err = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen);
 	if (err)
 		return err;
 
@@ -1932,48 +1856,42 @@ static int atmel_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	return 0;
 }
 
-static int atmel_aes_xts_encrypt(struct ablkcipher_request *req)
+static int atmel_aes_xts_encrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_XTS | AES_FLAGS_ENCRYPT);
 }
 
-static int atmel_aes_xts_decrypt(struct ablkcipher_request *req)
+static int atmel_aes_xts_decrypt(struct skcipher_request *req)
 {
 	return atmel_aes_crypt(req, AES_FLAGS_XTS);
 }
 
-static int atmel_aes_xts_cra_init(struct crypto_tfm *tfm)
+static int atmel_aes_xts_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct atmel_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_aes_reqctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
 	ctx->base.start = atmel_aes_xts_start;
 
 	return 0;
 }
 
-static struct crypto_alg aes_xts_alg = {
-	.cra_name		= "xts(aes)",
-	.cra_driver_name	= "atmel-xts-aes",
-	.cra_priority		= ATMEL_AES_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_aes_xts_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_aes_xts_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= atmel_aes_xts_setkey,
-		.encrypt	= atmel_aes_xts_encrypt,
-		.decrypt	= atmel_aes_xts_decrypt,
-	}
+static struct skcipher_alg aes_xts_alg = {
+	.base.cra_name		= "xts(aes)",
+	.base.cra_driver_name	= "atmel-xts-aes",
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct atmel_aes_xts_ctx),
+
+	.min_keysize		= 2 * AES_MIN_KEY_SIZE,
+	.max_keysize		= 2 * AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= atmel_aes_xts_setkey,
+	.encrypt		= atmel_aes_xts_encrypt,
+	.decrypt		= atmel_aes_xts_decrypt,
+	.init			= atmel_aes_xts_init_tfm,
 };
 
-#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
 /* authenc aead functions */
 
 static int atmel_aes_authenc_start(struct atmel_aes_dev *dd);
@@ -2041,7 +1959,7 @@ static int atmel_aes_authenc_transfer(struct atmel_aes_dev *dd, int err,
 	struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req);
 	bool enc = atmel_aes_is_encrypt(dd);
 	struct scatterlist *src, *dst;
-	u32 iv[AES_BLOCK_SIZE / sizeof(u32)];
+	__be32 iv[AES_BLOCK_SIZE / sizeof(u32)];
 	u32 emr;
 
 	if (is_async)
@@ -2123,7 +2041,6 @@ static int atmel_aes_authenc_setkey(struct crypto_aead *tfm, const u8 *key,
 {
 	struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_authenc_keys keys;
-	u32 flags;
 	int err;
 
 	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
@@ -2133,11 +2050,9 @@ static int atmel_aes_authenc_setkey(struct crypto_aead *tfm, const u8 *key,
 		goto badkey;
 
 	/* Save auth key. */
-	flags = crypto_aead_get_flags(tfm);
 	err = atmel_sha_authenc_setkey(ctx->auth,
 				       keys.authkey, keys.authkeylen,
-				       &flags);
-	crypto_aead_set_flags(tfm, flags & CRYPTO_TFM_RES_MASK);
+				       crypto_aead_get_flags(tfm));
 	if (err) {
 		memzero_explicit(&keys, sizeof(keys));
 		return err;
@@ -2151,7 +2066,6 @@ static int atmel_aes_authenc_setkey(struct crypto_aead *tfm, const u8 *key,
 	return 0;
 
 badkey:
-	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
@@ -2262,12 +2176,8 @@ static struct aead_alg aes_authenc_algs[] = {
 	.base = {
 		.cra_name		= "authenc(hmac(sha1),cbc(aes))",
 		.cra_driver_name	= "atmel-authenc-hmac-sha1-cbc-aes",
-		.cra_priority		= ATMEL_AES_PRIORITY,
-		.cra_flags		= CRYPTO_ALG_ASYNC,
 		.cra_blocksize		= AES_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct atmel_aes_authenc_ctx),
-		.cra_alignmask		= 0xf,
-		.cra_module		= THIS_MODULE,
 	},
 },
 {
@@ -2282,12 +2192,8 @@ static struct aead_alg aes_authenc_algs[] = {
 	.base = {
 		.cra_name		= "authenc(hmac(sha224),cbc(aes))",
 		.cra_driver_name	= "atmel-authenc-hmac-sha224-cbc-aes",
-		.cra_priority		= ATMEL_AES_PRIORITY,
-		.cra_flags		= CRYPTO_ALG_ASYNC,
 		.cra_blocksize		= AES_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct atmel_aes_authenc_ctx),
-		.cra_alignmask		= 0xf,
-		.cra_module		= THIS_MODULE,
 	},
 },
 {
@@ -2302,12 +2208,8 @@ static struct aead_alg aes_authenc_algs[] = {
 	.base = {
 		.cra_name		= "authenc(hmac(sha256),cbc(aes))",
 		.cra_driver_name	= "atmel-authenc-hmac-sha256-cbc-aes",
-		.cra_priority		= ATMEL_AES_PRIORITY,
-		.cra_flags		= CRYPTO_ALG_ASYNC,
 		.cra_blocksize		= AES_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct atmel_aes_authenc_ctx),
-		.cra_alignmask		= 0xf,
-		.cra_module		= THIS_MODULE,
 	},
 },
 {
@@ -2322,12 +2224,8 @@ static struct aead_alg aes_authenc_algs[] = {
 	.base = {
 		.cra_name		= "authenc(hmac(sha384),cbc(aes))",
 		.cra_driver_name	= "atmel-authenc-hmac-sha384-cbc-aes",
-		.cra_priority		= ATMEL_AES_PRIORITY,
-		.cra_flags		= CRYPTO_ALG_ASYNC,
 		.cra_blocksize		= AES_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct atmel_aes_authenc_ctx),
-		.cra_alignmask		= 0xf,
-		.cra_module		= THIS_MODULE,
 	},
 },
 {
@@ -2342,12 +2240,8 @@ static struct aead_alg aes_authenc_algs[] = {
 	.base = {
 		.cra_name		= "authenc(hmac(sha512),cbc(aes))",
 		.cra_driver_name	= "atmel-authenc-hmac-sha512-cbc-aes",
-		.cra_priority		= ATMEL_AES_PRIORITY,
-		.cra_flags		= CRYPTO_ALG_ASYNC,
 		.cra_blocksize		= AES_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct atmel_aes_authenc_ctx),
-		.cra_alignmask		= 0xf,
-		.cra_module		= THIS_MODULE,
 	},
 },
 };
@@ -2374,47 +2268,30 @@ static void atmel_aes_buff_cleanup(struct atmel_aes_dev *dd)
 	free_page((unsigned long)dd->buf);
 }
 
-static bool atmel_aes_filter(struct dma_chan *chan, void *slave)
-{
-	struct at_dma_slave	*sl = slave;
-
-	if (sl && sl->dma_dev == chan->device->dev) {
-		chan->private = sl;
-		return true;
-	} else {
-		return false;
-	}
-}
-
-static int atmel_aes_dma_init(struct atmel_aes_dev *dd,
-			      struct crypto_platform_data *pdata)
+static int atmel_aes_dma_init(struct atmel_aes_dev *dd)
 {
-	struct at_dma_slave *slave;
-	dma_cap_mask_t mask;
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
+	int ret;
 
 	/* Try to grab 2 DMA channels */
-	slave = &pdata->dma_slave->rxdata;
-	dd->src.chan = dma_request_slave_channel_compat(mask, atmel_aes_filter,
-							slave, dd->dev, "tx");
-	if (!dd->src.chan)
+	dd->src.chan = dma_request_chan(dd->dev, "tx");
+	if (IS_ERR(dd->src.chan)) {
+		ret = PTR_ERR(dd->src.chan);
 		goto err_dma_in;
+	}
 
-	slave = &pdata->dma_slave->txdata;
-	dd->dst.chan = dma_request_slave_channel_compat(mask, atmel_aes_filter,
-							slave, dd->dev, "rx");
-	if (!dd->dst.chan)
+	dd->dst.chan = dma_request_chan(dd->dev, "rx");
+	if (IS_ERR(dd->dst.chan)) {
+		ret = PTR_ERR(dd->dst.chan);
 		goto err_dma_out;
+	}
 
 	return 0;
 
 err_dma_out:
 	dma_release_channel(dd->src.chan);
 err_dma_in:
-	dev_warn(dd->dev, "no DMA channel available\n");
-	return -ENODEV;
+	dev_err(dd->dev, "no DMA channel available\n");
+	return ret;
 }
 
 static void atmel_aes_dma_cleanup(struct atmel_aes_dev *dd)
@@ -2460,23 +2337,31 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
 {
 	int i;
 
-#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
 	if (dd->caps.has_authenc)
 		for (i = 0; i < ARRAY_SIZE(aes_authenc_algs); i++)
 			crypto_unregister_aead(&aes_authenc_algs[i]);
 #endif
 
 	if (dd->caps.has_xts)
-		crypto_unregister_alg(&aes_xts_alg);
+		crypto_unregister_skcipher(&aes_xts_alg);
 
 	if (dd->caps.has_gcm)
 		crypto_unregister_aead(&aes_gcm_alg);
 
 	if (dd->caps.has_cfb64)
-		crypto_unregister_alg(&aes_cfb64_alg);
+		crypto_unregister_skcipher(&aes_cfb64_alg);
 
 	for (i = 0; i < ARRAY_SIZE(aes_algs); i++)
-		crypto_unregister_alg(&aes_algs[i]);
+		crypto_unregister_skcipher(&aes_algs[i]);
+}
+
+static void atmel_aes_crypto_alg_init(struct crypto_alg *alg)
+{
+	alg->cra_flags = CRYPTO_ALG_ASYNC;
+	alg->cra_alignmask = 0xf;
+	alg->cra_priority = ATMEL_AES_PRIORITY;
+	alg->cra_module = THIS_MODULE;
 }
 
 static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
@@ -2484,32 +2369,42 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
 	int err, i, j;
 
 	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
-		err = crypto_register_alg(&aes_algs[i]);
+		atmel_aes_crypto_alg_init(&aes_algs[i].base);
+
+		err = crypto_register_skcipher(&aes_algs[i]);
 		if (err)
 			goto err_aes_algs;
 	}
 
 	if (dd->caps.has_cfb64) {
-		err = crypto_register_alg(&aes_cfb64_alg);
+		atmel_aes_crypto_alg_init(&aes_cfb64_alg.base);
+
+		err = crypto_register_skcipher(&aes_cfb64_alg);
 		if (err)
 			goto err_aes_cfb64_alg;
 	}
 
 	if (dd->caps.has_gcm) {
+		atmel_aes_crypto_alg_init(&aes_gcm_alg.base);
+
 		err = crypto_register_aead(&aes_gcm_alg);
 		if (err)
 			goto err_aes_gcm_alg;
 	}
 
 	if (dd->caps.has_xts) {
-		err = crypto_register_alg(&aes_xts_alg);
+		atmel_aes_crypto_alg_init(&aes_xts_alg.base);
+
+		err = crypto_register_skcipher(&aes_xts_alg);
 		if (err)
 			goto err_aes_xts_alg;
 	}
 
-#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
 	if (dd->caps.has_authenc) {
 		for (i = 0; i < ARRAY_SIZE(aes_authenc_algs); i++) {
+			atmel_aes_crypto_alg_init(&aes_authenc_algs[i].base);
+
 			err = crypto_register_aead(&aes_authenc_algs[i]);
 			if (err)
 				goto err_aes_authenc_alg;
@@ -2519,22 +2414,22 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
 
 	return 0;
 
-#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
 	/* i = ARRAY_SIZE(aes_authenc_algs); */
 err_aes_authenc_alg:
 	for (j = 0; j < i; j++)
 		crypto_unregister_aead(&aes_authenc_algs[j]);
-	crypto_unregister_alg(&aes_xts_alg);
+	crypto_unregister_skcipher(&aes_xts_alg);
 #endif
 err_aes_xts_alg:
 	crypto_unregister_aead(&aes_gcm_alg);
 err_aes_gcm_alg:
-	crypto_unregister_alg(&aes_cfb64_alg);
+	crypto_unregister_skcipher(&aes_cfb64_alg);
 err_aes_cfb64_alg:
 	i = ARRAY_SIZE(aes_algs);
 err_aes_algs:
 	for (j = 0; j < i; j++)
-		crypto_unregister_alg(&aes_algs[j]);
+		crypto_unregister_skcipher(&aes_algs[j]);
 
 	return err;
 }
@@ -2543,7 +2438,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
 {
 	dd->caps.has_dualbuff = 0;
 	dd->caps.has_cfb64 = 0;
-	dd->caps.has_ctr32 = 0;
 	dd->caps.has_gcm = 0;
 	dd->caps.has_xts = 0;
 	dd->caps.has_authenc = 0;
@@ -2554,7 +2448,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
 	case 0x500:
 		dd->caps.has_dualbuff = 1;
 		dd->caps.has_cfb64 = 1;
-		dd->caps.has_ctr32 = 1;
 		dd->caps.has_gcm = 1;
 		dd->caps.has_xts = 1;
 		dd->caps.has_authenc = 1;
@@ -2563,7 +2456,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
 	case 0x200:
 		dd->caps.has_dualbuff = 1;
 		dd->caps.has_cfb64 = 1;
-		dd->caps.has_ctr32 = 1;
 		dd->caps.has_gcm = 1;
 		dd->caps.max_burst_size = 4;
 		break;
@@ -2587,65 +2479,18 @@ static const struct of_device_id atmel_aes_dt_ids[] = {
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, atmel_aes_dt_ids);
-
-static struct crypto_platform_data *atmel_aes_of_init(struct platform_device *pdev)
-{
-	struct device_node *np = pdev->dev.of_node;
-	struct crypto_platform_data *pdata;
-
-	if (!np) {
-		dev_err(&pdev->dev, "device node not found\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return ERR_PTR(-ENOMEM);
-
-	pdata->dma_slave = devm_kzalloc(&pdev->dev,
-					sizeof(*(pdata->dma_slave)),
-					GFP_KERNEL);
-	if (!pdata->dma_slave) {
-		devm_kfree(&pdev->dev, pdata);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	return pdata;
-}
-#else
-static inline struct crypto_platform_data *atmel_aes_of_init(struct platform_device *pdev)
-{
-	return ERR_PTR(-EINVAL);
-}
 #endif
 
 static int atmel_aes_probe(struct platform_device *pdev)
 {
 	struct atmel_aes_dev *aes_dd;
-	struct crypto_platform_data *pdata;
 	struct device *dev = &pdev->dev;
 	struct resource *aes_res;
 	int err;
 
-	pdata = pdev->dev.platform_data;
-	if (!pdata) {
-		pdata = atmel_aes_of_init(pdev);
-		if (IS_ERR(pdata)) {
-			err = PTR_ERR(pdata);
-			goto aes_dd_err;
-		}
-	}
-
-	if (!pdata->dma_slave) {
-		err = -ENXIO;
-		goto aes_dd_err;
-	}
-
 	aes_dd = devm_kzalloc(&pdev->dev, sizeof(*aes_dd), GFP_KERNEL);
-	if (aes_dd == NULL) {
-		err = -ENOMEM;
-		goto aes_dd_err;
-	}
+	if (!aes_dd)
+		return -ENOMEM;
 
 	aes_dd->dev = dev;
 
@@ -2666,7 +2511,7 @@ static int atmel_aes_probe(struct platform_device *pdev)
 	if (!aes_res) {
 		dev_err(dev, "no MEM resource info\n");
 		err = -ENODEV;
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 	aes_dd->phys_base = aes_res->start;
 
@@ -2674,14 +2519,14 @@ static int atmel_aes_probe(struct platform_device *pdev)
 	aes_dd->irq = platform_get_irq(pdev,  0);
 	if (aes_dd->irq < 0) {
 		err = aes_dd->irq;
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
 	err = devm_request_irq(&pdev->dev, aes_dd->irq, atmel_aes_irq,
 			       IRQF_SHARED, "atmel-aes", aes_dd);
 	if (err) {
 		dev_err(dev, "unable to request aes irq.\n");
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
 	/* Initializing the clock */
@@ -2689,40 +2534,40 @@ static int atmel_aes_probe(struct platform_device *pdev)
 	if (IS_ERR(aes_dd->iclk)) {
 		dev_err(dev, "clock initialization failed.\n");
 		err = PTR_ERR(aes_dd->iclk);
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
 	aes_dd->io_base = devm_ioremap_resource(&pdev->dev, aes_res);
 	if (IS_ERR(aes_dd->io_base)) {
 		dev_err(dev, "can't ioremap\n");
 		err = PTR_ERR(aes_dd->io_base);
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
 	err = clk_prepare(aes_dd->iclk);
 	if (err)
-		goto res_err;
+		goto err_tasklet_kill;
 
 	err = atmel_aes_hw_version_init(aes_dd);
 	if (err)
-		goto iclk_unprepare;
+		goto err_iclk_unprepare;
 
 	atmel_aes_get_cap(aes_dd);
 
-#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
 	if (aes_dd->caps.has_authenc && !atmel_sha_authenc_is_ready()) {
 		err = -EPROBE_DEFER;
-		goto iclk_unprepare;
+		goto err_iclk_unprepare;
 	}
 #endif
 
 	err = atmel_aes_buff_init(aes_dd);
 	if (err)
-		goto err_aes_buff;
+		goto err_iclk_unprepare;
 
-	err = atmel_aes_dma_init(aes_dd, pdata);
+	err = atmel_aes_dma_init(aes_dd);
 	if (err)
-		goto err_aes_dma;
+		goto err_buff_cleanup;
 
 	spin_lock(&atmel_aes.lock);
 	list_add_tail(&aes_dd->list, &atmel_aes.dev_list);
@@ -2743,17 +2588,13 @@ err_algs:
 	list_del(&aes_dd->list);
 	spin_unlock(&atmel_aes.lock);
 	atmel_aes_dma_cleanup(aes_dd);
-err_aes_dma:
+err_buff_cleanup:
 	atmel_aes_buff_cleanup(aes_dd);
-err_aes_buff:
-iclk_unprepare:
+err_iclk_unprepare:
 	clk_unprepare(aes_dd->iclk);
-res_err:
+err_tasklet_kill:
 	tasklet_kill(&aes_dd->done_task);
 	tasklet_kill(&aes_dd->queue_task);
-aes_dd_err:
-	if (err != -EPROBE_DEFER)
-		dev_err(dev, "initialization failed.\n");
 
 	return err;
 }
diff --git a/drivers/crypto/atmel-authenc.h b/drivers/crypto/atmel-authenc.h
index cbd37a2edada..c6530a1c8c20 100644
--- a/drivers/crypto/atmel-authenc.h
+++ b/drivers/crypto/atmel-authenc.h
@@ -12,7 +12,7 @@
 #ifndef __ATMEL_AUTHENC_H__
 #define __ATMEL_AUTHENC_H__
 
-#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
 
 #include <crypto/authenc.h>
 #include <crypto/hash.h>
@@ -30,8 +30,7 @@ unsigned int atmel_sha_authenc_get_reqsize(void);
 struct atmel_sha_authenc_ctx *atmel_sha_authenc_spawn(unsigned long mode);
 void atmel_sha_authenc_free(struct atmel_sha_authenc_ctx *auth);
 int atmel_sha_authenc_setkey(struct atmel_sha_authenc_ctx *auth,
-			     const u8 *key, unsigned int keylen,
-			     u32 *flags);
+			     const u8 *key, unsigned int keylen, u32 flags);
 
 int atmel_sha_authenc_schedule(struct ahash_request *req,
 			       struct atmel_sha_authenc_ctx *auth,
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 84cb8748a795..e536e2a6bbd8 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -21,6 +21,7 @@
 #include <linux/platform_device.h>
 
 #include <linux/device.h>
+#include <linux/dmaengine.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
@@ -36,10 +37,11 @@
 #include <crypto/sha.h>
 #include <crypto/hash.h>
 #include <crypto/internal/hash.h>
-#include <linux/platform_data/crypto-atmel.h>
 #include "atmel-sha-regs.h"
 #include "atmel-authenc.h"
 
+#define ATMEL_SHA_PRIORITY	300
+
 /* SHA flags */
 #define SHA_FLAGS_BUSY			BIT(0)
 #define	SHA_FLAGS_FINAL			BIT(1)
@@ -134,7 +136,6 @@ struct atmel_sha_dev {
 	void __iomem		*io_base;
 
 	spinlock_t		lock;
-	int			err;
 	struct tasklet_struct	done_task;
 	struct tasklet_struct	queue_task;
 
@@ -360,7 +361,7 @@ static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
 static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length)
 {
 	unsigned int index, padlen;
-	u64 bits[2];
+	__be64 bits[2];
 	u64 size[2];
 
 	size[0] = ctx->digcnt[0];
@@ -851,7 +852,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
 								0, final);
 }
 
-static int atmel_sha_update_dma_stop(struct atmel_sha_dev *dd)
+static void atmel_sha_update_dma_stop(struct atmel_sha_dev *dd)
 {
 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
 
@@ -870,8 +871,6 @@ static int atmel_sha_update_dma_stop(struct atmel_sha_dev *dd)
 		dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen +
 						ctx->block_size, DMA_TO_DEVICE);
 	}
-
-	return 0;
 }
 
 static int atmel_sha_update_req(struct atmel_sha_dev *dd)
@@ -1025,7 +1024,6 @@ static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
 	if (!(SHA_FLAGS_INIT & dd->flags)) {
 		atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST);
 		dd->flags |= SHA_FLAGS_INIT;
-		dd->err = 0;
 	}
 
 	return 0;
@@ -1036,9 +1034,13 @@ static inline unsigned int atmel_sha_get_version(struct atmel_sha_dev *dd)
 	return atmel_sha_read(dd, SHA_HW_VERSION) & 0x00000fff;
 }
 
-static void atmel_sha_hw_version_init(struct atmel_sha_dev *dd)
+static int atmel_sha_hw_version_init(struct atmel_sha_dev *dd)
 {
-	atmel_sha_hw_init(dd);
+	int err;
+
+	err = atmel_sha_hw_init(dd);
+	if (err)
+		return err;
 
 	dd->hw_version = atmel_sha_get_version(dd);
 
@@ -1046,6 +1048,8 @@ static void atmel_sha_hw_version_init(struct atmel_sha_dev *dd)
 			"version: 0x%x\n", dd->hw_version);
 
 	clk_disable(dd->iclk);
+
+	return 0;
 }
 
 static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
@@ -1248,130 +1252,66 @@ static int atmel_sha_cra_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static void atmel_sha_alg_init(struct ahash_alg *alg)
+{
+	alg->halg.base.cra_priority = ATMEL_SHA_PRIORITY;
+	alg->halg.base.cra_flags = CRYPTO_ALG_ASYNC;
+	alg->halg.base.cra_ctxsize = sizeof(struct atmel_sha_ctx);
+	alg->halg.base.cra_module = THIS_MODULE;
+	alg->halg.base.cra_init = atmel_sha_cra_init;
+
+	alg->halg.statesize = sizeof(struct atmel_sha_reqctx);
+
+	alg->init = atmel_sha_init;
+	alg->update = atmel_sha_update;
+	alg->final = atmel_sha_final;
+	alg->finup = atmel_sha_finup;
+	alg->digest = atmel_sha_digest;
+	alg->export = atmel_sha_export;
+	alg->import = atmel_sha_import;
+}
+
 static struct ahash_alg sha_1_256_algs[] = {
 {
-	.init		= atmel_sha_init,
-	.update		= atmel_sha_update,
-	.final		= atmel_sha_final,
-	.finup		= atmel_sha_finup,
-	.digest		= atmel_sha_digest,
-	.export		= atmel_sha_export,
-	.import		= atmel_sha_import,
-	.halg = {
-		.digestsize	= SHA1_DIGEST_SIZE,
-		.statesize	= sizeof(struct atmel_sha_reqctx),
-		.base	= {
-			.cra_name		= "sha1",
-			.cra_driver_name	= "atmel-sha1",
-			.cra_priority		= 100,
-			.cra_flags		= CRYPTO_ALG_ASYNC,
-			.cra_blocksize		= SHA1_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
-			.cra_alignmask		= 0,
-			.cra_module		= THIS_MODULE,
-			.cra_init		= atmel_sha_cra_init,
-		}
-	}
+	.halg.base.cra_name		= "sha1",
+	.halg.base.cra_driver_name	= "atmel-sha1",
+	.halg.base.cra_blocksize	= SHA1_BLOCK_SIZE,
+
+	.halg.digestsize = SHA1_DIGEST_SIZE,
 },
 {
-	.init		= atmel_sha_init,
-	.update		= atmel_sha_update,
-	.final		= atmel_sha_final,
-	.finup		= atmel_sha_finup,
-	.digest		= atmel_sha_digest,
-	.export		= atmel_sha_export,
-	.import		= atmel_sha_import,
-	.halg = {
-		.digestsize	= SHA256_DIGEST_SIZE,
-		.statesize	= sizeof(struct atmel_sha_reqctx),
-		.base	= {
-			.cra_name		= "sha256",
-			.cra_driver_name	= "atmel-sha256",
-			.cra_priority		= 100,
-			.cra_flags		= CRYPTO_ALG_ASYNC,
-			.cra_blocksize		= SHA256_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
-			.cra_alignmask		= 0,
-			.cra_module		= THIS_MODULE,
-			.cra_init		= atmel_sha_cra_init,
-		}
-	}
+	.halg.base.cra_name		= "sha256",
+	.halg.base.cra_driver_name	= "atmel-sha256",
+	.halg.base.cra_blocksize	= SHA256_BLOCK_SIZE,
+
+	.halg.digestsize = SHA256_DIGEST_SIZE,
 },
 };
 
 static struct ahash_alg sha_224_alg = {
-	.init		= atmel_sha_init,
-	.update		= atmel_sha_update,
-	.final		= atmel_sha_final,
-	.finup		= atmel_sha_finup,
-	.digest		= atmel_sha_digest,
-	.export		= atmel_sha_export,
-	.import		= atmel_sha_import,
-	.halg = {
-		.digestsize	= SHA224_DIGEST_SIZE,
-		.statesize	= sizeof(struct atmel_sha_reqctx),
-		.base	= {
-			.cra_name		= "sha224",
-			.cra_driver_name	= "atmel-sha224",
-			.cra_priority		= 100,
-			.cra_flags		= CRYPTO_ALG_ASYNC,
-			.cra_blocksize		= SHA224_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
-			.cra_alignmask		= 0,
-			.cra_module		= THIS_MODULE,
-			.cra_init		= atmel_sha_cra_init,
-		}
-	}
+	.halg.base.cra_name		= "sha224",
+	.halg.base.cra_driver_name	= "atmel-sha224",
+	.halg.base.cra_blocksize	= SHA224_BLOCK_SIZE,
+
+	.halg.digestsize = SHA224_DIGEST_SIZE,
 };
 
 static struct ahash_alg sha_384_512_algs[] = {
 {
-	.init		= atmel_sha_init,
-	.update		= atmel_sha_update,
-	.final		= atmel_sha_final,
-	.finup		= atmel_sha_finup,
-	.digest		= atmel_sha_digest,
-	.export		= atmel_sha_export,
-	.import		= atmel_sha_import,
-	.halg = {
-		.digestsize	= SHA384_DIGEST_SIZE,
-		.statesize	= sizeof(struct atmel_sha_reqctx),
-		.base	= {
-			.cra_name		= "sha384",
-			.cra_driver_name	= "atmel-sha384",
-			.cra_priority		= 100,
-			.cra_flags		= CRYPTO_ALG_ASYNC,
-			.cra_blocksize		= SHA384_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
-			.cra_alignmask		= 0x3,
-			.cra_module		= THIS_MODULE,
-			.cra_init		= atmel_sha_cra_init,
-		}
-	}
+	.halg.base.cra_name		= "sha384",
+	.halg.base.cra_driver_name	= "atmel-sha384",
+	.halg.base.cra_blocksize	= SHA384_BLOCK_SIZE,
+	.halg.base.cra_alignmask	= 0x3,
+
+	.halg.digestsize = SHA384_DIGEST_SIZE,
 },
 {
-	.init		= atmel_sha_init,
-	.update		= atmel_sha_update,
-	.final		= atmel_sha_final,
-	.finup		= atmel_sha_finup,
-	.digest		= atmel_sha_digest,
-	.export		= atmel_sha_export,
-	.import		= atmel_sha_import,
-	.halg = {
-		.digestsize	= SHA512_DIGEST_SIZE,
-		.statesize	= sizeof(struct atmel_sha_reqctx),
-		.base	= {
-			.cra_name		= "sha512",
-			.cra_driver_name	= "atmel-sha512",
-			.cra_priority		= 100,
-			.cra_flags		= CRYPTO_ALG_ASYNC,
-			.cra_blocksize		= SHA512_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
-			.cra_alignmask		= 0x3,
-			.cra_module		= THIS_MODULE,
-			.cra_init		= atmel_sha_cra_init,
-		}
-	}
+	.halg.base.cra_name		= "sha512",
+	.halg.base.cra_driver_name	= "atmel-sha512",
+	.halg.base.cra_blocksize	= SHA512_BLOCK_SIZE,
+	.halg.base.cra_alignmask	= 0x3,
+
+	.halg.digestsize = SHA512_DIGEST_SIZE,
 },
 };
 
@@ -1395,10 +1335,6 @@ static int atmel_sha_done(struct atmel_sha_dev *dd)
 		if (SHA_FLAGS_DMA_ACTIVE & dd->flags) {
 			dd->flags &= ~SHA_FLAGS_DMA_ACTIVE;
 			atmel_sha_update_dma_stop(dd);
-			if (dd->err) {
-				err = dd->err;
-				goto finish;
-			}
 		}
 		if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
 			/* hash or semi-hash ready */
@@ -1493,7 +1429,6 @@ static void atmel_sha_dma_callback2(void *data)
 	struct scatterlist *sg;
 	int nents;
 
-	dmaengine_terminate_all(dma->chan);
 	dma_unmap_sg(dd->dev, dma->sg, dma->nents, DMA_TO_DEVICE);
 
 	sg = dma->sg;
@@ -1918,12 +1853,7 @@ static int atmel_sha_hmac_setkey(struct crypto_ahash *tfm, const u8 *key,
 {
 	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
 
-	if (atmel_sha_hmac_key_set(&hmac->hkey, key, keylen)) {
-		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	return 0;
+	return atmel_sha_hmac_key_set(&hmac->hkey, key, keylen);
 }
 
 static int atmel_sha_hmac_init(struct ahash_request *req)
@@ -2084,135 +2014,65 @@ static void atmel_sha_hmac_cra_exit(struct crypto_tfm *tfm)
 	atmel_sha_hmac_key_release(&hmac->hkey);
 }
 
+static void atmel_sha_hmac_alg_init(struct ahash_alg *alg)
+{
+	alg->halg.base.cra_priority = ATMEL_SHA_PRIORITY;
+	alg->halg.base.cra_flags = CRYPTO_ALG_ASYNC;
+	alg->halg.base.cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx);
+	alg->halg.base.cra_module = THIS_MODULE;
+	alg->halg.base.cra_init	= atmel_sha_hmac_cra_init;
+	alg->halg.base.cra_exit	= atmel_sha_hmac_cra_exit;
+
+	alg->halg.statesize = sizeof(struct atmel_sha_reqctx);
+
+	alg->init = atmel_sha_hmac_init;
+	alg->update = atmel_sha_update;
+	alg->final = atmel_sha_final;
+	alg->digest = atmel_sha_hmac_digest;
+	alg->setkey = atmel_sha_hmac_setkey;
+	alg->export = atmel_sha_export;
+	alg->import = atmel_sha_import;
+}
+
 static struct ahash_alg sha_hmac_algs[] = {
 {
-	.init		= atmel_sha_hmac_init,
-	.update		= atmel_sha_update,
-	.final		= atmel_sha_final,
-	.digest		= atmel_sha_hmac_digest,
-	.setkey		= atmel_sha_hmac_setkey,
-	.export		= atmel_sha_export,
-	.import		= atmel_sha_import,
-	.halg = {
-		.digestsize	= SHA1_DIGEST_SIZE,
-		.statesize	= sizeof(struct atmel_sha_reqctx),
-		.base	= {
-			.cra_name		= "hmac(sha1)",
-			.cra_driver_name	= "atmel-hmac-sha1",
-			.cra_priority		= 100,
-			.cra_flags		= CRYPTO_ALG_ASYNC,
-			.cra_blocksize		= SHA1_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct atmel_sha_hmac_ctx),
-			.cra_alignmask		= 0,
-			.cra_module		= THIS_MODULE,
-			.cra_init		= atmel_sha_hmac_cra_init,
-			.cra_exit		= atmel_sha_hmac_cra_exit,
-		}
-	}
+	.halg.base.cra_name		= "hmac(sha1)",
+	.halg.base.cra_driver_name	= "atmel-hmac-sha1",
+	.halg.base.cra_blocksize	= SHA1_BLOCK_SIZE,
+
+	.halg.digestsize = SHA1_DIGEST_SIZE,
 },
 {
-	.init		= atmel_sha_hmac_init,
-	.update		= atmel_sha_update,
-	.final		= atmel_sha_final,
-	.digest		= atmel_sha_hmac_digest,
-	.setkey		= atmel_sha_hmac_setkey,
-	.export		= atmel_sha_export,
-	.import		= atmel_sha_import,
-	.halg = {
-		.digestsize	= SHA224_DIGEST_SIZE,
-		.statesize	= sizeof(struct atmel_sha_reqctx),
-		.base	= {
-			.cra_name		= "hmac(sha224)",
-			.cra_driver_name	= "atmel-hmac-sha224",
-			.cra_priority		= 100,
-			.cra_flags		= CRYPTO_ALG_ASYNC,
-			.cra_blocksize		= SHA224_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct atmel_sha_hmac_ctx),
-			.cra_alignmask		= 0,
-			.cra_module		= THIS_MODULE,
-			.cra_init		= atmel_sha_hmac_cra_init,
-			.cra_exit		= atmel_sha_hmac_cra_exit,
-		}
-	}
+	.halg.base.cra_name		= "hmac(sha224)",
+	.halg.base.cra_driver_name	= "atmel-hmac-sha224",
+	.halg.base.cra_blocksize	= SHA224_BLOCK_SIZE,
+
+	.halg.digestsize = SHA224_DIGEST_SIZE,
 },
 {
-	.init		= atmel_sha_hmac_init,
-	.update		= atmel_sha_update,
-	.final		= atmel_sha_final,
-	.digest		= atmel_sha_hmac_digest,
-	.setkey		= atmel_sha_hmac_setkey,
-	.export		= atmel_sha_export,
-	.import		= atmel_sha_import,
-	.halg = {
-		.digestsize	= SHA256_DIGEST_SIZE,
-		.statesize	= sizeof(struct atmel_sha_reqctx),
-		.base	= {
-			.cra_name		= "hmac(sha256)",
-			.cra_driver_name	= "atmel-hmac-sha256",
-			.cra_priority		= 100,
-			.cra_flags		= CRYPTO_ALG_ASYNC,
-			.cra_blocksize		= SHA256_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct atmel_sha_hmac_ctx),
-			.cra_alignmask		= 0,
-			.cra_module		= THIS_MODULE,
-			.cra_init		= atmel_sha_hmac_cra_init,
-			.cra_exit		= atmel_sha_hmac_cra_exit,
-		}
-	}
+	.halg.base.cra_name		= "hmac(sha256)",
+	.halg.base.cra_driver_name	= "atmel-hmac-sha256",
+	.halg.base.cra_blocksize	= SHA256_BLOCK_SIZE,
+
+	.halg.digestsize = SHA256_DIGEST_SIZE,
 },
 {
-	.init		= atmel_sha_hmac_init,
-	.update		= atmel_sha_update,
-	.final		= atmel_sha_final,
-	.digest		= atmel_sha_hmac_digest,
-	.setkey		= atmel_sha_hmac_setkey,
-	.export		= atmel_sha_export,
-	.import		= atmel_sha_import,
-	.halg = {
-		.digestsize	= SHA384_DIGEST_SIZE,
-		.statesize	= sizeof(struct atmel_sha_reqctx),
-		.base	= {
-			.cra_name		= "hmac(sha384)",
-			.cra_driver_name	= "atmel-hmac-sha384",
-			.cra_priority		= 100,
-			.cra_flags		= CRYPTO_ALG_ASYNC,
-			.cra_blocksize		= SHA384_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct atmel_sha_hmac_ctx),
-			.cra_alignmask		= 0,
-			.cra_module		= THIS_MODULE,
-			.cra_init		= atmel_sha_hmac_cra_init,
-			.cra_exit		= atmel_sha_hmac_cra_exit,
-		}
-	}
+	.halg.base.cra_name		= "hmac(sha384)",
+	.halg.base.cra_driver_name	= "atmel-hmac-sha384",
+	.halg.base.cra_blocksize	= SHA384_BLOCK_SIZE,
+
+	.halg.digestsize = SHA384_DIGEST_SIZE,
 },
 {
-	.init		= atmel_sha_hmac_init,
-	.update		= atmel_sha_update,
-	.final		= atmel_sha_final,
-	.digest		= atmel_sha_hmac_digest,
-	.setkey		= atmel_sha_hmac_setkey,
-	.export		= atmel_sha_export,
-	.import		= atmel_sha_import,
-	.halg = {
-		.digestsize	= SHA512_DIGEST_SIZE,
-		.statesize	= sizeof(struct atmel_sha_reqctx),
-		.base	= {
-			.cra_name		= "hmac(sha512)",
-			.cra_driver_name	= "atmel-hmac-sha512",
-			.cra_priority		= 100,
-			.cra_flags		= CRYPTO_ALG_ASYNC,
-			.cra_blocksize		= SHA512_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct atmel_sha_hmac_ctx),
-			.cra_alignmask		= 0,
-			.cra_module		= THIS_MODULE,
-			.cra_init		= atmel_sha_hmac_cra_init,
-			.cra_exit		= atmel_sha_hmac_cra_exit,
-		}
-	}
+	.halg.base.cra_name		= "hmac(sha512)",
+	.halg.base.cra_driver_name	= "atmel-hmac-sha512",
+	.halg.base.cra_blocksize	= SHA512_BLOCK_SIZE,
+
+	.halg.digestsize = SHA512_DIGEST_SIZE,
 },
 };
 
-#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
 /* authenc functions */
 
 static int atmel_sha_authenc_init2(struct atmel_sha_dev *dd);
@@ -2347,18 +2207,13 @@ void atmel_sha_authenc_free(struct atmel_sha_authenc_ctx *auth)
 EXPORT_SYMBOL_GPL(atmel_sha_authenc_free);
 
 int atmel_sha_authenc_setkey(struct atmel_sha_authenc_ctx *auth,
-			     const u8 *key, unsigned int keylen,
-			     u32 *flags)
+			     const u8 *key, unsigned int keylen, u32 flags)
 {
 	struct crypto_ahash *tfm = auth->tfm;
-	int err;
 
 	crypto_ahash_clear_flags(tfm, CRYPTO_TFM_REQ_MASK);
-	crypto_ahash_set_flags(tfm, *flags & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ahash_setkey(tfm, key, keylen);
-	*flags = crypto_ahash_get_flags(tfm);
-
-	return err;
+	crypto_ahash_set_flags(tfm, flags & CRYPTO_TFM_REQ_MASK);
+	return crypto_ahash_setkey(tfm, key, keylen);
 }
 EXPORT_SYMBOL_GPL(atmel_sha_authenc_setkey);
 
@@ -2561,12 +2416,16 @@ static int atmel_sha_register_algs(struct atmel_sha_dev *dd)
 	int err, i, j;
 
 	for (i = 0; i < ARRAY_SIZE(sha_1_256_algs); i++) {
+		atmel_sha_alg_init(&sha_1_256_algs[i]);
+
 		err = crypto_register_ahash(&sha_1_256_algs[i]);
 		if (err)
 			goto err_sha_1_256_algs;
 	}
 
 	if (dd->caps.has_sha224) {
+		atmel_sha_alg_init(&sha_224_alg);
+
 		err = crypto_register_ahash(&sha_224_alg);
 		if (err)
 			goto err_sha_224_algs;
@@ -2574,6 +2433,8 @@ static int atmel_sha_register_algs(struct atmel_sha_dev *dd)
 
 	if (dd->caps.has_sha_384_512) {
 		for (i = 0; i < ARRAY_SIZE(sha_384_512_algs); i++) {
+			atmel_sha_alg_init(&sha_384_512_algs[i]);
+
 			err = crypto_register_ahash(&sha_384_512_algs[i]);
 			if (err)
 				goto err_sha_384_512_algs;
@@ -2582,6 +2443,8 @@ static int atmel_sha_register_algs(struct atmel_sha_dev *dd)
 
 	if (dd->caps.has_hmac) {
 		for (i = 0; i < ARRAY_SIZE(sha_hmac_algs); i++) {
+			atmel_sha_hmac_alg_init(&sha_hmac_algs[i]);
+
 			err = crypto_register_ahash(&sha_hmac_algs[i]);
 			if (err)
 				goto err_sha_hmac_algs;
@@ -2608,35 +2471,14 @@ err_sha_1_256_algs:
 	return err;
 }
 
-static bool atmel_sha_filter(struct dma_chan *chan, void *slave)
+static int atmel_sha_dma_init(struct atmel_sha_dev *dd)
 {
-	struct at_dma_slave	*sl = slave;
-
-	if (sl && sl->dma_dev == chan->device->dev) {
-		chan->private = sl;
-		return true;
-	} else {
-		return false;
+	dd->dma_lch_in.chan = dma_request_chan(dd->dev, "tx");
+	if (IS_ERR(dd->dma_lch_in.chan)) {
+		dev_err(dd->dev, "DMA channel is not available\n");
+		return PTR_ERR(dd->dma_lch_in.chan);
 	}
-}
 
-static int atmel_sha_dma_init(struct atmel_sha_dev *dd,
-				struct crypto_platform_data *pdata)
-{
-	dma_cap_mask_t mask_in;
-
-	/* Try to grab DMA channel */
-	dma_cap_zero(mask_in);
-	dma_cap_set(DMA_SLAVE, mask_in);
-
-	dd->dma_lch_in.chan = dma_request_slave_channel_compat(mask_in,
-			atmel_sha_filter, &pdata->dma_slave->rxdata, dd->dev, "tx");
-	if (!dd->dma_lch_in.chan) {
-		dev_warn(dd->dev, "no DMA channel available\n");
-		return -ENODEV;
-	}
-
-	dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV;
 	dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base +
 		SHA_REG_DIN(0);
 	dd->dma_lch_in.dma_conf.src_maxburst = 1;
@@ -2709,49 +2551,18 @@ static const struct of_device_id atmel_sha_dt_ids[] = {
 };
 
 MODULE_DEVICE_TABLE(of, atmel_sha_dt_ids);
-
-static struct crypto_platform_data *atmel_sha_of_init(struct platform_device *pdev)
-{
-	struct device_node *np = pdev->dev.of_node;
-	struct crypto_platform_data *pdata;
-
-	if (!np) {
-		dev_err(&pdev->dev, "device node not found\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return ERR_PTR(-ENOMEM);
-
-	pdata->dma_slave = devm_kzalloc(&pdev->dev,
-					sizeof(*(pdata->dma_slave)),
-					GFP_KERNEL);
-	if (!pdata->dma_slave)
-		return ERR_PTR(-ENOMEM);
-
-	return pdata;
-}
-#else /* CONFIG_OF */
-static inline struct crypto_platform_data *atmel_sha_of_init(struct platform_device *dev)
-{
-	return ERR_PTR(-EINVAL);
-}
 #endif
 
 static int atmel_sha_probe(struct platform_device *pdev)
 {
 	struct atmel_sha_dev *sha_dd;
-	struct crypto_platform_data	*pdata;
 	struct device *dev = &pdev->dev;
 	struct resource *sha_res;
 	int err;
 
 	sha_dd = devm_kzalloc(&pdev->dev, sizeof(*sha_dd), GFP_KERNEL);
-	if (sha_dd == NULL) {
-		err = -ENOMEM;
-		goto sha_dd_err;
-	}
+	if (!sha_dd)
+		return -ENOMEM;
 
 	sha_dd->dev = dev;
 
@@ -2772,7 +2583,7 @@ static int atmel_sha_probe(struct platform_device *pdev)
 	if (!sha_res) {
 		dev_err(dev, "no MEM resource info\n");
 		err = -ENODEV;
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 	sha_dd->phys_base = sha_res->start;
 
@@ -2780,14 +2591,14 @@ static int atmel_sha_probe(struct platform_device *pdev)
 	sha_dd->irq = platform_get_irq(pdev,  0);
 	if (sha_dd->irq < 0) {
 		err = sha_dd->irq;
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
 	err = devm_request_irq(&pdev->dev, sha_dd->irq, atmel_sha_irq,
 			       IRQF_SHARED, "atmel-sha", sha_dd);
 	if (err) {
 		dev_err(dev, "unable to request sha irq.\n");
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
 	/* Initializing the clock */
@@ -2795,41 +2606,30 @@ static int atmel_sha_probe(struct platform_device *pdev)
 	if (IS_ERR(sha_dd->iclk)) {
 		dev_err(dev, "clock initialization failed.\n");
 		err = PTR_ERR(sha_dd->iclk);
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
 	sha_dd->io_base = devm_ioremap_resource(&pdev->dev, sha_res);
 	if (IS_ERR(sha_dd->io_base)) {
 		dev_err(dev, "can't ioremap\n");
 		err = PTR_ERR(sha_dd->io_base);
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
 	err = clk_prepare(sha_dd->iclk);
 	if (err)
-		goto res_err;
+		goto err_tasklet_kill;
 
-	atmel_sha_hw_version_init(sha_dd);
+	err = atmel_sha_hw_version_init(sha_dd);
+	if (err)
+		goto err_iclk_unprepare;
 
 	atmel_sha_get_cap(sha_dd);
 
 	if (sha_dd->caps.has_dma) {
-		pdata = pdev->dev.platform_data;
-		if (!pdata) {
-			pdata = atmel_sha_of_init(pdev);
-			if (IS_ERR(pdata)) {
-				dev_err(&pdev->dev, "platform data not available\n");
-				err = PTR_ERR(pdata);
-				goto iclk_unprepare;
-			}
-		}
-		if (!pdata->dma_slave) {
-			err = -ENXIO;
-			goto iclk_unprepare;
-		}
-		err = atmel_sha_dma_init(sha_dd, pdata);
+		err = atmel_sha_dma_init(sha_dd);
 		if (err)
-			goto err_sha_dma;
+			goto err_iclk_unprepare;
 
 		dev_info(dev, "using %s for DMA transfers\n",
 				dma_chan_name(sha_dd->dma_lch_in.chan));
@@ -2855,14 +2655,11 @@ err_algs:
 	spin_unlock(&atmel_sha.lock);
 	if (sha_dd->caps.has_dma)
 		atmel_sha_dma_cleanup(sha_dd);
-err_sha_dma:
-iclk_unprepare:
+err_iclk_unprepare:
 	clk_unprepare(sha_dd->iclk);
-res_err:
+err_tasklet_kill:
 	tasklet_kill(&sha_dd->queue_task);
 	tasklet_kill(&sha_dd->done_task);
-sha_dd_err:
-	dev_err(dev, "initialization failed.\n");
 
 	return err;
 }
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 1a6c86ae6148..ed40dbb98c6b 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -21,6 +21,7 @@
 #include <linux/platform_device.h>
 
 #include <linux/device.h>
+#include <linux/dmaengine.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
@@ -30,30 +31,32 @@
 #include <linux/of_device.h>
 #include <linux/delay.h>
 #include <linux/crypto.h>
-#include <linux/cryptohash.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/des.h>
-#include <crypto/hash.h>
-#include <crypto/internal/hash.h>
-#include <linux/platform_data/crypto-atmel.h>
+#include <crypto/internal/skcipher.h>
 #include "atmel-tdes-regs.h"
 
+#define ATMEL_TDES_PRIORITY	300
+
 /* TDES flags  */
-#define TDES_FLAGS_MODE_MASK		0x00ff
-#define TDES_FLAGS_ENCRYPT	BIT(0)
-#define TDES_FLAGS_CBC		BIT(1)
-#define TDES_FLAGS_CFB		BIT(2)
-#define TDES_FLAGS_CFB8		BIT(3)
-#define TDES_FLAGS_CFB16	BIT(4)
-#define TDES_FLAGS_CFB32	BIT(5)
-#define TDES_FLAGS_CFB64	BIT(6)
-#define TDES_FLAGS_OFB		BIT(7)
-
-#define TDES_FLAGS_INIT		BIT(16)
-#define TDES_FLAGS_FAST		BIT(17)
-#define TDES_FLAGS_BUSY		BIT(18)
-#define TDES_FLAGS_DMA		BIT(19)
+/* Reserve bits [17:16], [13:12], [2:0] for AES Mode Register */
+#define TDES_FLAGS_ENCRYPT	TDES_MR_CYPHER_ENC
+#define TDES_FLAGS_OPMODE_MASK	(TDES_MR_OPMOD_MASK | TDES_MR_CFBS_MASK)
+#define TDES_FLAGS_ECB		TDES_MR_OPMOD_ECB
+#define TDES_FLAGS_CBC		TDES_MR_OPMOD_CBC
+#define TDES_FLAGS_OFB		TDES_MR_OPMOD_OFB
+#define TDES_FLAGS_CFB64	(TDES_MR_OPMOD_CFB | TDES_MR_CFBS_64b)
+#define TDES_FLAGS_CFB32	(TDES_MR_OPMOD_CFB | TDES_MR_CFBS_32b)
+#define TDES_FLAGS_CFB16	(TDES_MR_OPMOD_CFB | TDES_MR_CFBS_16b)
+#define TDES_FLAGS_CFB8		(TDES_MR_OPMOD_CFB | TDES_MR_CFBS_8b)
+
+#define TDES_FLAGS_MODE_MASK	(TDES_FLAGS_OPMODE_MASK | TDES_FLAGS_ENCRYPT)
+
+#define TDES_FLAGS_INIT		BIT(3)
+#define TDES_FLAGS_FAST		BIT(4)
+#define TDES_FLAGS_BUSY		BIT(5)
+#define TDES_FLAGS_DMA		BIT(6)
 
 #define ATMEL_TDES_QUEUE_LENGTH	50
 
@@ -72,7 +75,7 @@ struct atmel_tdes_ctx {
 	struct atmel_tdes_dev *dd;
 
 	int		keylen;
-	u32		key[3*DES_KEY_SIZE / sizeof(u32)];
+	u32		key[DES3_EDE_KEY_SIZE / sizeof(u32)];
 	unsigned long	flags;
 
 	u16		block_size;
@@ -80,6 +83,7 @@ struct atmel_tdes_ctx {
 
 struct atmel_tdes_reqctx {
 	unsigned long mode;
+	u8 lastc[DES_BLOCK_SIZE];
 };
 
 struct atmel_tdes_dma {
@@ -98,7 +102,6 @@ struct atmel_tdes_dev {
 	int					irq;
 
 	unsigned long		flags;
-	int			err;
 
 	spinlock_t		lock;
 	struct crypto_queue	queue;
@@ -106,7 +109,7 @@ struct atmel_tdes_dev {
 	struct tasklet_struct	done_task;
 	struct tasklet_struct	queue_task;
 
-	struct ablkcipher_request	*req;
+	struct skcipher_request	*req;
 	size_t				total;
 
 	struct scatterlist	*in_sg;
@@ -187,7 +190,7 @@ static inline void atmel_tdes_write(struct atmel_tdes_dev *dd,
 }
 
 static void atmel_tdes_write_n(struct atmel_tdes_dev *dd, u32 offset,
-					u32 *value, int count)
+			       const u32 *value, int count)
 {
 	for (; count--; value++, offset += 4)
 		atmel_tdes_write(dd, offset, *value);
@@ -224,7 +227,6 @@ static int atmel_tdes_hw_init(struct atmel_tdes_dev *dd)
 	if (!(dd->flags & TDES_FLAGS_INIT)) {
 		atmel_tdes_write(dd, TDES_CR, TDES_CR_SWRST);
 		dd->flags |= TDES_FLAGS_INIT;
-		dd->err = 0;
 	}
 
 	return 0;
@@ -235,9 +237,13 @@ static inline unsigned int atmel_tdes_get_version(struct atmel_tdes_dev *dd)
 	return atmel_tdes_read(dd, TDES_HW_VERSION) & 0x00000fff;
 }
 
-static void atmel_tdes_hw_version_init(struct atmel_tdes_dev *dd)
+static int atmel_tdes_hw_version_init(struct atmel_tdes_dev *dd)
 {
-	atmel_tdes_hw_init(dd);
+	int err;
+
+	err = atmel_tdes_hw_init(dd);
+	if (err)
+		return err;
 
 	dd->hw_version = atmel_tdes_get_version(dd);
 
@@ -245,6 +251,8 @@ static void atmel_tdes_hw_version_init(struct atmel_tdes_dev *dd)
 			"version: 0x%x\n", dd->hw_version);
 
 	clk_disable_unprepare(dd->iclk);
+
+	return 0;
 }
 
 static void atmel_tdes_dma_callback(void *data)
@@ -258,7 +266,7 @@ static void atmel_tdes_dma_callback(void *data)
 static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd)
 {
 	int err;
-	u32 valcr = 0, valmr = TDES_MR_SMOD_PDC;
+	u32 valmr = TDES_MR_SMOD_PDC;
 
 	err = atmel_tdes_hw_init(dd);
 
@@ -280,36 +288,15 @@ static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd)
 		valmr |= TDES_MR_TDESMOD_DES;
 	}
 
-	if (dd->flags & TDES_FLAGS_CBC) {
-		valmr |= TDES_MR_OPMOD_CBC;
-	} else if (dd->flags & TDES_FLAGS_CFB) {
-		valmr |= TDES_MR_OPMOD_CFB;
-
-		if (dd->flags & TDES_FLAGS_CFB8)
-			valmr |= TDES_MR_CFBS_8b;
-		else if (dd->flags & TDES_FLAGS_CFB16)
-			valmr |= TDES_MR_CFBS_16b;
-		else if (dd->flags & TDES_FLAGS_CFB32)
-			valmr |= TDES_MR_CFBS_32b;
-		else if (dd->flags & TDES_FLAGS_CFB64)
-			valmr |= TDES_MR_CFBS_64b;
-	} else if (dd->flags & TDES_FLAGS_OFB) {
-		valmr |= TDES_MR_OPMOD_OFB;
-	}
+	valmr |= dd->flags & TDES_FLAGS_MODE_MASK;
 
-	if ((dd->flags & TDES_FLAGS_ENCRYPT) || (dd->flags & TDES_FLAGS_OFB))
-		valmr |= TDES_MR_CYPHER_ENC;
-
-	atmel_tdes_write(dd, TDES_CR, valcr);
 	atmel_tdes_write(dd, TDES_MR, valmr);
 
 	atmel_tdes_write_n(dd, TDES_KEY1W1R, dd->ctx->key,
 						dd->ctx->keylen >> 2);
 
-	if (((dd->flags & TDES_FLAGS_CBC) || (dd->flags & TDES_FLAGS_CFB) ||
-		(dd->flags & TDES_FLAGS_OFB)) && dd->req->info) {
-		atmel_tdes_write_n(dd, TDES_IV1R, dd->req->info, 2);
-	}
+	if (dd->req->iv && (valmr & TDES_MR_OPMOD_MASK) != TDES_MR_OPMOD_ECB)
+		atmel_tdes_write_n(dd, TDES_IV1R, (void *)dd->req->iv, 2);
 
 	return 0;
 }
@@ -395,11 +382,11 @@ static void atmel_tdes_buff_cleanup(struct atmel_tdes_dev *dd)
 	free_page((unsigned long)dd->buf_in);
 }
 
-static int atmel_tdes_crypt_pdc(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
-			       dma_addr_t dma_addr_out, int length)
+static int atmel_tdes_crypt_pdc(struct atmel_tdes_dev *dd,
+				dma_addr_t dma_addr_in,
+				dma_addr_t dma_addr_out, int length)
 {
-	struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct atmel_tdes_dev *dd = ctx->dd;
+	struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(dd->req);
 	int len32;
 
 	dd->dma_size = length;
@@ -409,12 +396,19 @@ static int atmel_tdes_crypt_pdc(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
 					   DMA_TO_DEVICE);
 	}
 
-	if ((dd->flags & TDES_FLAGS_CFB) && (dd->flags & TDES_FLAGS_CFB8))
+	switch (rctx->mode & TDES_FLAGS_OPMODE_MASK) {
+	case TDES_FLAGS_CFB8:
 		len32 = DIV_ROUND_UP(length, sizeof(u8));
-	else if ((dd->flags & TDES_FLAGS_CFB) && (dd->flags & TDES_FLAGS_CFB16))
+		break;
+
+	case TDES_FLAGS_CFB16:
 		len32 = DIV_ROUND_UP(length, sizeof(u16));
-	else
+		break;
+
+	default:
 		len32 = DIV_ROUND_UP(length, sizeof(u32));
+		break;
+	}
 
 	atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTDIS|TDES_PTCR_RXTDIS);
 	atmel_tdes_write(dd, TDES_TPR, dma_addr_in);
@@ -431,13 +425,14 @@ static int atmel_tdes_crypt_pdc(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
 	return 0;
 }
 
-static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
-			       dma_addr_t dma_addr_out, int length)
+static int atmel_tdes_crypt_dma(struct atmel_tdes_dev *dd,
+				dma_addr_t dma_addr_in,
+				dma_addr_t dma_addr_out, int length)
 {
-	struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct atmel_tdes_dev *dd = ctx->dd;
+	struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(dd->req);
 	struct scatterlist sg[2];
 	struct dma_async_tx_descriptor	*in_desc, *out_desc;
+	enum dma_slave_buswidth addr_width;
 
 	dd->dma_size = length;
 
@@ -446,23 +441,23 @@ static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
 					   DMA_TO_DEVICE);
 	}
 
-	if (dd->flags & TDES_FLAGS_CFB8) {
-		dd->dma_lch_in.dma_conf.dst_addr_width =
-			DMA_SLAVE_BUSWIDTH_1_BYTE;
-		dd->dma_lch_out.dma_conf.src_addr_width =
-			DMA_SLAVE_BUSWIDTH_1_BYTE;
-	} else if (dd->flags & TDES_FLAGS_CFB16) {
-		dd->dma_lch_in.dma_conf.dst_addr_width =
-			DMA_SLAVE_BUSWIDTH_2_BYTES;
-		dd->dma_lch_out.dma_conf.src_addr_width =
-			DMA_SLAVE_BUSWIDTH_2_BYTES;
-	} else {
-		dd->dma_lch_in.dma_conf.dst_addr_width =
-			DMA_SLAVE_BUSWIDTH_4_BYTES;
-		dd->dma_lch_out.dma_conf.src_addr_width =
-			DMA_SLAVE_BUSWIDTH_4_BYTES;
+	switch (rctx->mode & TDES_FLAGS_OPMODE_MASK) {
+	case TDES_FLAGS_CFB8:
+		addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		break;
+
+	case TDES_FLAGS_CFB16:
+		addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+		break;
+
+	default:
+		addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		break;
 	}
 
+	dd->dma_lch_in.dma_conf.dst_addr_width = addr_width;
+	dd->dma_lch_out.dma_conf.src_addr_width = addr_width;
+
 	dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf);
 	dmaengine_slave_config(dd->dma_lch_out.chan, &dd->dma_lch_out.dma_conf);
 
@@ -502,8 +497,6 @@ static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
 
 static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(
-					crypto_ablkcipher_reqtfm(dd->req));
 	int err, fast = 0, in, out;
 	size_t count;
 	dma_addr_t addr_in, addr_out;
@@ -559,9 +552,9 @@ static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd)
 	dd->total -= count;
 
 	if (dd->caps.has_dma)
-		err = atmel_tdes_crypt_dma(tfm, addr_in, addr_out, count);
+		err = atmel_tdes_crypt_dma(dd, addr_in, addr_out, count);
 	else
-		err = atmel_tdes_crypt_pdc(tfm, addr_in, addr_out, count);
+		err = atmel_tdes_crypt_pdc(dd, addr_in, addr_out, count);
 
 	if (err && (dd->flags & TDES_FLAGS_FAST)) {
 		dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
@@ -571,19 +564,47 @@ static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd)
 	return err;
 }
 
+static void
+atmel_tdes_set_iv_as_last_ciphertext_block(struct atmel_tdes_dev *dd)
+{
+	struct skcipher_request *req = dd->req;
+	struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
+
+	if (req->cryptlen < ivsize)
+		return;
+
+	if (rctx->mode & TDES_FLAGS_ENCRYPT) {
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 req->cryptlen - ivsize, ivsize, 0);
+	} else {
+		if (req->src == req->dst)
+			memcpy(req->iv, rctx->lastc, ivsize);
+		else
+			scatterwalk_map_and_copy(req->iv, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
+	}
+}
+
 static void atmel_tdes_finish_req(struct atmel_tdes_dev *dd, int err)
 {
-	struct ablkcipher_request *req = dd->req;
+	struct skcipher_request *req = dd->req;
+	struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req);
 
 	clk_disable_unprepare(dd->iclk);
 
 	dd->flags &= ~TDES_FLAGS_BUSY;
 
+	if (!err && (rctx->mode & TDES_FLAGS_OPMODE_MASK) != TDES_FLAGS_ECB)
+		atmel_tdes_set_iv_as_last_ciphertext_block(dd);
+
 	req->base.complete(&req->base, err);
 }
 
 static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
-			       struct ablkcipher_request *req)
+			       struct skcipher_request *req)
 {
 	struct crypto_async_request *async_req, *backlog;
 	struct atmel_tdes_ctx *ctx;
@@ -593,7 +614,7 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
 
 	spin_lock_irqsave(&dd->lock, flags);
 	if (req)
-		ret = ablkcipher_enqueue_request(&dd->queue, req);
+		ret = crypto_enqueue_request(&dd->queue, &req->base);
 	if (dd->flags & TDES_FLAGS_BUSY) {
 		spin_unlock_irqrestore(&dd->lock, flags);
 		return ret;
@@ -610,18 +631,18 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
 	if (backlog)
 		backlog->complete(backlog, -EINPROGRESS);
 
-	req = ablkcipher_request_cast(async_req);
+	req = skcipher_request_cast(async_req);
 
 	/* assign new request to device */
 	dd->req = req;
-	dd->total = req->nbytes;
+	dd->total = req->cryptlen;
 	dd->in_offset = 0;
 	dd->in_sg = req->src;
 	dd->out_offset = 0;
 	dd->out_sg = req->dst;
 
-	rctx = ablkcipher_request_ctx(req);
-	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+	rctx = skcipher_request_ctx(req);
+	ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	rctx->mode &= TDES_FLAGS_MODE_MASK;
 	dd->flags = (dd->flags & ~TDES_FLAGS_MODE_MASK) | rctx->mode;
 	dd->ctx = ctx;
@@ -665,70 +686,72 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd)
 	return err;
 }
 
-static int atmel_tdes_crypt(struct ablkcipher_request *req, unsigned long mode)
+static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
 {
-	struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx(
-			crypto_ablkcipher_reqtfm(req));
-	struct atmel_tdes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req);
 
-	if (mode & TDES_FLAGS_CFB8) {
-		if (!IS_ALIGNED(req->nbytes, CFB8_BLOCK_SIZE)) {
+	switch (mode & TDES_FLAGS_OPMODE_MASK) {
+	case TDES_FLAGS_CFB8:
+		if (!IS_ALIGNED(req->cryptlen, CFB8_BLOCK_SIZE)) {
 			pr_err("request size is not exact amount of CFB8 blocks\n");
 			return -EINVAL;
 		}
 		ctx->block_size = CFB8_BLOCK_SIZE;
-	} else if (mode & TDES_FLAGS_CFB16) {
-		if (!IS_ALIGNED(req->nbytes, CFB16_BLOCK_SIZE)) {
+		break;
+
+	case TDES_FLAGS_CFB16:
+		if (!IS_ALIGNED(req->cryptlen, CFB16_BLOCK_SIZE)) {
 			pr_err("request size is not exact amount of CFB16 blocks\n");
 			return -EINVAL;
 		}
 		ctx->block_size = CFB16_BLOCK_SIZE;
-	} else if (mode & TDES_FLAGS_CFB32) {
-		if (!IS_ALIGNED(req->nbytes, CFB32_BLOCK_SIZE)) {
+		break;
+
+	case TDES_FLAGS_CFB32:
+		if (!IS_ALIGNED(req->cryptlen, CFB32_BLOCK_SIZE)) {
 			pr_err("request size is not exact amount of CFB32 blocks\n");
 			return -EINVAL;
 		}
 		ctx->block_size = CFB32_BLOCK_SIZE;
-	} else {
-		if (!IS_ALIGNED(req->nbytes, DES_BLOCK_SIZE)) {
+		break;
+
+	default:
+		if (!IS_ALIGNED(req->cryptlen, DES_BLOCK_SIZE)) {
 			pr_err("request size is not exact amount of DES blocks\n");
 			return -EINVAL;
 		}
 		ctx->block_size = DES_BLOCK_SIZE;
+		break;
 	}
 
 	rctx->mode = mode;
 
-	return atmel_tdes_handle_queue(ctx->dd, req);
-}
-
-static bool atmel_tdes_filter(struct dma_chan *chan, void *slave)
-{
-	struct at_dma_slave	*sl = slave;
+	if ((mode & TDES_FLAGS_OPMODE_MASK) != TDES_FLAGS_ECB &&
+	    !(mode & TDES_FLAGS_ENCRYPT) && req->src == req->dst) {
+		unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
 
-	if (sl && sl->dma_dev == chan->device->dev) {
-		chan->private = sl;
-		return true;
-	} else {
-		return false;
+		if (req->cryptlen >= ivsize)
+			scatterwalk_map_and_copy(rctx->lastc, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
 	}
+
+	return atmel_tdes_handle_queue(ctx->dd, req);
 }
 
-static int atmel_tdes_dma_init(struct atmel_tdes_dev *dd,
-			struct crypto_platform_data *pdata)
+static int atmel_tdes_dma_init(struct atmel_tdes_dev *dd)
 {
-	dma_cap_mask_t mask;
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
+	int ret;
 
 	/* Try to grab 2 DMA channels */
-	dd->dma_lch_in.chan = dma_request_slave_channel_compat(mask,
-			atmel_tdes_filter, &pdata->dma_slave->rxdata, dd->dev, "tx");
-	if (!dd->dma_lch_in.chan)
+	dd->dma_lch_in.chan = dma_request_chan(dd->dev, "tx");
+	if (IS_ERR(dd->dma_lch_in.chan)) {
+		ret = PTR_ERR(dd->dma_lch_in.chan);
 		goto err_dma_in;
+	}
 
-	dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV;
 	dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base +
 		TDES_IDATA1R;
 	dd->dma_lch_in.dma_conf.src_maxburst = 1;
@@ -739,12 +762,12 @@ static int atmel_tdes_dma_init(struct atmel_tdes_dev *dd,
 		DMA_SLAVE_BUSWIDTH_4_BYTES;
 	dd->dma_lch_in.dma_conf.device_fc = false;
 
-	dd->dma_lch_out.chan = dma_request_slave_channel_compat(mask,
-			atmel_tdes_filter, &pdata->dma_slave->txdata, dd->dev, "rx");
-	if (!dd->dma_lch_out.chan)
+	dd->dma_lch_out.chan = dma_request_chan(dd->dev, "rx");
+	if (IS_ERR(dd->dma_lch_out.chan)) {
+		ret = PTR_ERR(dd->dma_lch_out.chan);
 		goto err_dma_out;
+	}
 
-	dd->dma_lch_out.dma_conf.direction = DMA_DEV_TO_MEM;
 	dd->dma_lch_out.dma_conf.src_addr = dd->phys_base +
 		TDES_ODATA1R;
 	dd->dma_lch_out.dma_conf.src_maxburst = 1;
@@ -760,8 +783,8 @@ static int atmel_tdes_dma_init(struct atmel_tdes_dev *dd,
 err_dma_out:
 	dma_release_channel(dd->dma_lch_in.chan);
 err_dma_in:
-	dev_warn(dd->dev, "no DMA channel available\n");
-	return -ENODEV;
+	dev_err(dd->dev, "no DMA channel available\n");
+	return ret;
 }
 
 static void atmel_tdes_dma_cleanup(struct atmel_tdes_dev *dd)
@@ -770,13 +793,13 @@ static void atmel_tdes_dma_cleanup(struct atmel_tdes_dev *dd)
 	dma_release_channel(dd->dma_lch_out.chan);
 }
 
-static int atmel_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int atmel_des_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			   unsigned int keylen)
 {
-	struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err;
 
-	err = verify_ablkcipher_des_key(tfm, key);
+	err = verify_skcipher_des_key(tfm, key);
 	if (err)
 		return err;
 
@@ -786,13 +809,13 @@ static int atmel_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	return 0;
 }
 
-static int atmel_tdes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int atmel_tdes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			   unsigned int keylen)
 {
-	struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err;
 
-	err = verify_ablkcipher_des3_key(tfm, key);
+	err = verify_skcipher_des3_key(tfm, key);
 	if (err)
 		return err;
 
@@ -802,84 +825,81 @@ static int atmel_tdes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	return 0;
 }
 
-static int atmel_tdes_ecb_encrypt(struct ablkcipher_request *req)
+static int atmel_tdes_ecb_encrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT);
+	return atmel_tdes_crypt(req, TDES_FLAGS_ECB | TDES_FLAGS_ENCRYPT);
 }
 
-static int atmel_tdes_ecb_decrypt(struct ablkcipher_request *req)
+static int atmel_tdes_ecb_decrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, 0);
+	return atmel_tdes_crypt(req, TDES_FLAGS_ECB);
 }
 
-static int atmel_tdes_cbc_encrypt(struct ablkcipher_request *req)
+static int atmel_tdes_cbc_encrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CBC);
+	return atmel_tdes_crypt(req, TDES_FLAGS_CBC | TDES_FLAGS_ENCRYPT);
 }
 
-static int atmel_tdes_cbc_decrypt(struct ablkcipher_request *req)
+static int atmel_tdes_cbc_decrypt(struct skcipher_request *req)
 {
 	return atmel_tdes_crypt(req, TDES_FLAGS_CBC);
 }
-static int atmel_tdes_cfb_encrypt(struct ablkcipher_request *req)
+static int atmel_tdes_cfb_encrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB);
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB64 | TDES_FLAGS_ENCRYPT);
 }
 
-static int atmel_tdes_cfb_decrypt(struct ablkcipher_request *req)
+static int atmel_tdes_cfb_decrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, TDES_FLAGS_CFB);
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB64);
 }
 
-static int atmel_tdes_cfb8_encrypt(struct ablkcipher_request *req)
+static int atmel_tdes_cfb8_encrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB |
-						TDES_FLAGS_CFB8);
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB8 | TDES_FLAGS_ENCRYPT);
 }
 
-static int atmel_tdes_cfb8_decrypt(struct ablkcipher_request *req)
+static int atmel_tdes_cfb8_decrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB8);
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB8);
 }
 
-static int atmel_tdes_cfb16_encrypt(struct ablkcipher_request *req)
+static int atmel_tdes_cfb16_encrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB |
-						TDES_FLAGS_CFB16);
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB16 | TDES_FLAGS_ENCRYPT);
 }
 
-static int atmel_tdes_cfb16_decrypt(struct ablkcipher_request *req)
+static int atmel_tdes_cfb16_decrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB16);
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB16);
 }
 
-static int atmel_tdes_cfb32_encrypt(struct ablkcipher_request *req)
+static int atmel_tdes_cfb32_encrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB |
-						TDES_FLAGS_CFB32);
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB32 | TDES_FLAGS_ENCRYPT);
 }
 
-static int atmel_tdes_cfb32_decrypt(struct ablkcipher_request *req)
+static int atmel_tdes_cfb32_decrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB32);
+	return atmel_tdes_crypt(req, TDES_FLAGS_CFB32);
 }
 
-static int atmel_tdes_ofb_encrypt(struct ablkcipher_request *req)
+static int atmel_tdes_ofb_encrypt(struct skcipher_request *req)
 {
-	return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_OFB);
+	return atmel_tdes_crypt(req, TDES_FLAGS_OFB | TDES_FLAGS_ENCRYPT);
 }
 
-static int atmel_tdes_ofb_decrypt(struct ablkcipher_request *req)
+static int atmel_tdes_ofb_decrypt(struct skcipher_request *req)
 {
 	return atmel_tdes_crypt(req, TDES_FLAGS_OFB);
 }
 
-static int atmel_tdes_cra_init(struct crypto_tfm *tfm)
+static int atmel_tdes_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct atmel_tdes_dev *dd;
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_tdes_reqctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_tdes_reqctx));
 
 	dd = atmel_tdes_find_dev(ctx);
 	if (!dd)
@@ -888,204 +908,144 @@ static int atmel_tdes_cra_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static struct crypto_alg tdes_algs[] = {
+static void atmel_tdes_skcipher_alg_init(struct skcipher_alg *alg)
 {
-	.cra_name		= "ecb(des)",
-	.cra_driver_name	= "atmel-ecb-des",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
-	.cra_alignmask		= 0x7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_tdes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= DES_KEY_SIZE,
-		.max_keysize	= DES_KEY_SIZE,
-		.setkey		= atmel_des_setkey,
-		.encrypt	= atmel_tdes_ecb_encrypt,
-		.decrypt	= atmel_tdes_ecb_decrypt,
-	}
+	alg->base.cra_priority = ATMEL_TDES_PRIORITY;
+	alg->base.cra_flags = CRYPTO_ALG_ASYNC;
+	alg->base.cra_ctxsize = sizeof(struct atmel_tdes_ctx),
+	alg->base.cra_module = THIS_MODULE;
+
+	alg->init = atmel_tdes_init_tfm;
+}
+
+static struct skcipher_alg tdes_algs[] = {
+{
+	.base.cra_name		= "ecb(des)",
+	.base.cra_driver_name	= "atmel-ecb-des",
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_alignmask	= 0x7,
+
+	.min_keysize		= DES_KEY_SIZE,
+	.max_keysize		= DES_KEY_SIZE,
+	.setkey			= atmel_des_setkey,
+	.encrypt		= atmel_tdes_ecb_encrypt,
+	.decrypt		= atmel_tdes_ecb_decrypt,
 },
 {
-	.cra_name		= "cbc(des)",
-	.cra_driver_name	= "atmel-cbc-des",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
-	.cra_alignmask		= 0x7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_tdes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= DES_KEY_SIZE,
-		.max_keysize	= DES_KEY_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= atmel_des_setkey,
-		.encrypt	= atmel_tdes_cbc_encrypt,
-		.decrypt	= atmel_tdes_cbc_decrypt,
-	}
+	.base.cra_name		= "cbc(des)",
+	.base.cra_driver_name	= "atmel-cbc-des",
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_alignmask	= 0x7,
+
+	.min_keysize		= DES_KEY_SIZE,
+	.max_keysize		= DES_KEY_SIZE,
+	.ivsize			= DES_BLOCK_SIZE,
+	.setkey			= atmel_des_setkey,
+	.encrypt		= atmel_tdes_cbc_encrypt,
+	.decrypt		= atmel_tdes_cbc_decrypt,
 },
 {
-	.cra_name		= "cfb(des)",
-	.cra_driver_name	= "atmel-cfb-des",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
-	.cra_alignmask		= 0x7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_tdes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= DES_KEY_SIZE,
-		.max_keysize	= DES_KEY_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= atmel_des_setkey,
-		.encrypt	= atmel_tdes_cfb_encrypt,
-		.decrypt	= atmel_tdes_cfb_decrypt,
-	}
+	.base.cra_name		= "cfb(des)",
+	.base.cra_driver_name	= "atmel-cfb-des",
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_alignmask	= 0x7,
+
+	.min_keysize		= DES_KEY_SIZE,
+	.max_keysize		= DES_KEY_SIZE,
+	.ivsize			= DES_BLOCK_SIZE,
+	.setkey			= atmel_des_setkey,
+	.encrypt		= atmel_tdes_cfb_encrypt,
+	.decrypt		= atmel_tdes_cfb_decrypt,
 },
 {
-	.cra_name		= "cfb8(des)",
-	.cra_driver_name	= "atmel-cfb8-des",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CFB8_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_tdes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= DES_KEY_SIZE,
-		.max_keysize	= DES_KEY_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= atmel_des_setkey,
-		.encrypt	= atmel_tdes_cfb8_encrypt,
-		.decrypt	= atmel_tdes_cfb8_decrypt,
-	}
+	.base.cra_name		= "cfb8(des)",
+	.base.cra_driver_name	= "atmel-cfb8-des",
+	.base.cra_blocksize	= CFB8_BLOCK_SIZE,
+	.base.cra_alignmask	= 0,
+
+	.min_keysize		= DES_KEY_SIZE,
+	.max_keysize		= DES_KEY_SIZE,
+	.ivsize			= DES_BLOCK_SIZE,
+	.setkey			= atmel_des_setkey,
+	.encrypt		= atmel_tdes_cfb8_encrypt,
+	.decrypt		= atmel_tdes_cfb8_decrypt,
 },
 {
-	.cra_name		= "cfb16(des)",
-	.cra_driver_name	= "atmel-cfb16-des",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CFB16_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
-	.cra_alignmask		= 0x1,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_tdes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= DES_KEY_SIZE,
-		.max_keysize	= DES_KEY_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= atmel_des_setkey,
-		.encrypt	= atmel_tdes_cfb16_encrypt,
-		.decrypt	= atmel_tdes_cfb16_decrypt,
-	}
+	.base.cra_name		= "cfb16(des)",
+	.base.cra_driver_name	= "atmel-cfb16-des",
+	.base.cra_blocksize	= CFB16_BLOCK_SIZE,
+	.base.cra_alignmask	= 0x1,
+
+	.min_keysize		= DES_KEY_SIZE,
+	.max_keysize		= DES_KEY_SIZE,
+	.ivsize			= DES_BLOCK_SIZE,
+	.setkey			= atmel_des_setkey,
+	.encrypt		= atmel_tdes_cfb16_encrypt,
+	.decrypt		= atmel_tdes_cfb16_decrypt,
 },
 {
-	.cra_name		= "cfb32(des)",
-	.cra_driver_name	= "atmel-cfb32-des",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CFB32_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
-	.cra_alignmask		= 0x3,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_tdes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= DES_KEY_SIZE,
-		.max_keysize	= DES_KEY_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= atmel_des_setkey,
-		.encrypt	= atmel_tdes_cfb32_encrypt,
-		.decrypt	= atmel_tdes_cfb32_decrypt,
-	}
+	.base.cra_name		= "cfb32(des)",
+	.base.cra_driver_name	= "atmel-cfb32-des",
+	.base.cra_blocksize	= CFB32_BLOCK_SIZE,
+	.base.cra_alignmask	= 0x3,
+
+	.min_keysize		= DES_KEY_SIZE,
+	.max_keysize		= DES_KEY_SIZE,
+	.ivsize			= DES_BLOCK_SIZE,
+	.setkey			= atmel_des_setkey,
+	.encrypt		= atmel_tdes_cfb32_encrypt,
+	.decrypt		= atmel_tdes_cfb32_decrypt,
 },
 {
-	.cra_name		= "ofb(des)",
-	.cra_driver_name	= "atmel-ofb-des",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
-	.cra_alignmask		= 0x7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_tdes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= DES_KEY_SIZE,
-		.max_keysize	= DES_KEY_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= atmel_des_setkey,
-		.encrypt	= atmel_tdes_ofb_encrypt,
-		.decrypt	= atmel_tdes_ofb_decrypt,
-	}
+	.base.cra_name		= "ofb(des)",
+	.base.cra_driver_name	= "atmel-ofb-des",
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_alignmask	= 0x7,
+
+	.min_keysize		= DES_KEY_SIZE,
+	.max_keysize		= DES_KEY_SIZE,
+	.ivsize			= DES_BLOCK_SIZE,
+	.setkey			= atmel_des_setkey,
+	.encrypt		= atmel_tdes_ofb_encrypt,
+	.decrypt		= atmel_tdes_ofb_decrypt,
 },
 {
-	.cra_name		= "ecb(des3_ede)",
-	.cra_driver_name	= "atmel-ecb-tdes",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
-	.cra_alignmask		= 0x7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_tdes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= 3 * DES_KEY_SIZE,
-		.max_keysize	= 3 * DES_KEY_SIZE,
-		.setkey		= atmel_tdes_setkey,
-		.encrypt	= atmel_tdes_ecb_encrypt,
-		.decrypt	= atmel_tdes_ecb_decrypt,
-	}
+	.base.cra_name		= "ecb(des3_ede)",
+	.base.cra_driver_name	= "atmel-ecb-tdes",
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_alignmask	= 0x7,
+
+	.min_keysize		= DES3_EDE_KEY_SIZE,
+	.max_keysize		= DES3_EDE_KEY_SIZE,
+	.setkey			= atmel_tdes_setkey,
+	.encrypt		= atmel_tdes_ecb_encrypt,
+	.decrypt		= atmel_tdes_ecb_decrypt,
 },
 {
-	.cra_name		= "cbc(des3_ede)",
-	.cra_driver_name	= "atmel-cbc-tdes",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
-	.cra_alignmask		= 0x7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_tdes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= 3*DES_KEY_SIZE,
-		.max_keysize	= 3*DES_KEY_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= atmel_tdes_setkey,
-		.encrypt	= atmel_tdes_cbc_encrypt,
-		.decrypt	= atmel_tdes_cbc_decrypt,
-	}
+	.base.cra_name		= "cbc(des3_ede)",
+	.base.cra_driver_name	= "atmel-cbc-tdes",
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_alignmask	= 0x7,
+
+	.min_keysize		= DES3_EDE_KEY_SIZE,
+	.max_keysize		= DES3_EDE_KEY_SIZE,
+	.setkey			= atmel_tdes_setkey,
+	.encrypt		= atmel_tdes_cbc_encrypt,
+	.decrypt		= atmel_tdes_cbc_decrypt,
+	.ivsize			= DES_BLOCK_SIZE,
 },
 {
-	.cra_name		= "ofb(des3_ede)",
-	.cra_driver_name	= "atmel-ofb-tdes",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct atmel_tdes_ctx),
-	.cra_alignmask		= 0x7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= atmel_tdes_cra_init,
-	.cra_u.ablkcipher = {
-		.min_keysize	= 3*DES_KEY_SIZE,
-		.max_keysize	= 3*DES_KEY_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= atmel_tdes_setkey,
-		.encrypt	= atmel_tdes_ofb_encrypt,
-		.decrypt	= atmel_tdes_ofb_decrypt,
-	}
+	.base.cra_name		= "ofb(des3_ede)",
+	.base.cra_driver_name	= "atmel-ofb-tdes",
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_alignmask	= 0x7,
+
+	.min_keysize		= DES3_EDE_KEY_SIZE,
+	.max_keysize		= DES3_EDE_KEY_SIZE,
+	.setkey			= atmel_tdes_setkey,
+	.encrypt		= atmel_tdes_ofb_encrypt,
+	.decrypt		= atmel_tdes_ofb_decrypt,
+	.ivsize			= DES_BLOCK_SIZE,
 },
 };
 
@@ -1106,8 +1066,6 @@ static void atmel_tdes_done_task(unsigned long data)
 	else
 		err = atmel_tdes_crypt_dma_stop(dd);
 
-	err = dd->err ? : err;
-
 	if (dd->total && !err) {
 		if (dd->flags & TDES_FLAGS_FAST) {
 			dd->in_sg = sg_next(dd->in_sg);
@@ -1148,7 +1106,7 @@ static void atmel_tdes_unregister_algs(struct atmel_tdes_dev *dd)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(tdes_algs); i++)
-		crypto_unregister_alg(&tdes_algs[i]);
+		crypto_unregister_skcipher(&tdes_algs[i]);
 }
 
 static int atmel_tdes_register_algs(struct atmel_tdes_dev *dd)
@@ -1156,7 +1114,9 @@ static int atmel_tdes_register_algs(struct atmel_tdes_dev *dd)
 	int err, i, j;
 
 	for (i = 0; i < ARRAY_SIZE(tdes_algs); i++) {
-		err = crypto_register_alg(&tdes_algs[i]);
+		atmel_tdes_skcipher_alg_init(&tdes_algs[i]);
+
+		err = crypto_register_skcipher(&tdes_algs[i]);
 		if (err)
 			goto err_tdes_algs;
 	}
@@ -1165,7 +1125,7 @@ static int atmel_tdes_register_algs(struct atmel_tdes_dev *dd)
 
 err_tdes_algs:
 	for (j = 0; j < i; j++)
-		crypto_unregister_alg(&tdes_algs[j]);
+		crypto_unregister_skcipher(&tdes_algs[j]);
 
 	return err;
 }
@@ -1197,49 +1157,18 @@ static const struct of_device_id atmel_tdes_dt_ids[] = {
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, atmel_tdes_dt_ids);
-
-static struct crypto_platform_data *atmel_tdes_of_init(struct platform_device *pdev)
-{
-	struct device_node *np = pdev->dev.of_node;
-	struct crypto_platform_data *pdata;
-
-	if (!np) {
-		dev_err(&pdev->dev, "device node not found\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return ERR_PTR(-ENOMEM);
-
-	pdata->dma_slave = devm_kzalloc(&pdev->dev,
-					sizeof(*(pdata->dma_slave)),
-					GFP_KERNEL);
-	if (!pdata->dma_slave)
-		return ERR_PTR(-ENOMEM);
-
-	return pdata;
-}
-#else /* CONFIG_OF */
-static inline struct crypto_platform_data *atmel_tdes_of_init(struct platform_device *pdev)
-{
-	return ERR_PTR(-EINVAL);
-}
 #endif
 
 static int atmel_tdes_probe(struct platform_device *pdev)
 {
 	struct atmel_tdes_dev *tdes_dd;
-	struct crypto_platform_data	*pdata;
 	struct device *dev = &pdev->dev;
 	struct resource *tdes_res;
 	int err;
 
 	tdes_dd = devm_kmalloc(&pdev->dev, sizeof(*tdes_dd), GFP_KERNEL);
-	if (tdes_dd == NULL) {
-		err = -ENOMEM;
-		goto tdes_dd_err;
-	}
+	if (!tdes_dd)
+		return -ENOMEM;
 
 	tdes_dd->dev = dev;
 
@@ -1260,7 +1189,7 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 	if (!tdes_res) {
 		dev_err(dev, "no MEM resource info\n");
 		err = -ENODEV;
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 	tdes_dd->phys_base = tdes_res->start;
 
@@ -1268,14 +1197,14 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 	tdes_dd->irq = platform_get_irq(pdev,  0);
 	if (tdes_dd->irq < 0) {
 		err = tdes_dd->irq;
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
 	err = devm_request_irq(&pdev->dev, tdes_dd->irq, atmel_tdes_irq,
 			       IRQF_SHARED, "atmel-tdes", tdes_dd);
 	if (err) {
 		dev_err(dev, "unable to request tdes irq.\n");
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
 	/* Initializing the clock */
@@ -1283,41 +1212,30 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 	if (IS_ERR(tdes_dd->iclk)) {
 		dev_err(dev, "clock initialization failed.\n");
 		err = PTR_ERR(tdes_dd->iclk);
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
 	tdes_dd->io_base = devm_ioremap_resource(&pdev->dev, tdes_res);
 	if (IS_ERR(tdes_dd->io_base)) {
 		dev_err(dev, "can't ioremap\n");
 		err = PTR_ERR(tdes_dd->io_base);
-		goto res_err;
+		goto err_tasklet_kill;
 	}
 
-	atmel_tdes_hw_version_init(tdes_dd);
+	err = atmel_tdes_hw_version_init(tdes_dd);
+	if (err)
+		goto err_tasklet_kill;
 
 	atmel_tdes_get_cap(tdes_dd);
 
 	err = atmel_tdes_buff_init(tdes_dd);
 	if (err)
-		goto err_tdes_buff;
+		goto err_tasklet_kill;
 
 	if (tdes_dd->caps.has_dma) {
-		pdata = pdev->dev.platform_data;
-		if (!pdata) {
-			pdata = atmel_tdes_of_init(pdev);
-			if (IS_ERR(pdata)) {
-				dev_err(&pdev->dev, "platform data not available\n");
-				err = PTR_ERR(pdata);
-				goto err_pdata;
-			}
-		}
-		if (!pdata->dma_slave) {
-			err = -ENXIO;
-			goto err_pdata;
-		}
-		err = atmel_tdes_dma_init(tdes_dd, pdata);
+		err = atmel_tdes_dma_init(tdes_dd);
 		if (err)
-			goto err_tdes_dma;
+			goto err_buff_cleanup;
 
 		dev_info(dev, "using %s, %s for DMA transfers\n",
 				dma_chan_name(tdes_dd->dma_lch_in.chan),
@@ -1342,15 +1260,11 @@ err_algs:
 	spin_unlock(&atmel_tdes.lock);
 	if (tdes_dd->caps.has_dma)
 		atmel_tdes_dma_cleanup(tdes_dd);
-err_tdes_dma:
-err_pdata:
+err_buff_cleanup:
 	atmel_tdes_buff_cleanup(tdes_dd);
-err_tdes_buff:
-res_err:
+err_tasklet_kill:
 	tasklet_kill(&tdes_dd->done_task);
 	tasklet_kill(&tdes_dd->queue_task);
-tdes_dd_err:
-	dev_err(dev, "initialization failed.\n");
 
 	return err;
 }
diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c
index 4b20606983a4..fcf1effc7661 100644
--- a/drivers/crypto/axis/artpec6_crypto.c
+++ b/drivers/crypto/axis/artpec6_crypto.c
@@ -1249,10 +1249,8 @@ static int artpec6_crypto_aead_set_key(struct crypto_aead *tfm, const u8 *key,
 {
 	struct artpec6_cryptotfm_context *ctx = crypto_tfm_ctx(&tfm->base);
 
-	if (len != 16 && len != 24 && len != 32) {
-		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -1;
-	}
+	if (len != 16 && len != 24 && len != 32)
+		return -EINVAL;
 
 	ctx->key_length = len;
 
@@ -1606,8 +1604,6 @@ artpec6_crypto_cipher_set_key(struct crypto_skcipher *cipher, const u8 *key,
 	case 32:
 		break;
 	default:
-		crypto_skcipher_set_flags(cipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
@@ -1634,8 +1630,6 @@ artpec6_crypto_xts_set_key(struct crypto_skcipher *cipher, const u8 *key,
 	case 64:
 		break;
 	default:
-		crypto_skcipher_set_flags(cipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index f85356a48e7e..c8b9408541a9 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -110,8 +110,8 @@ static u8 select_channel(void)
 }
 
 /**
- * spu_ablkcipher_rx_sg_create() - Build up the scatterlist of buffers used to
- * receive a SPU response message for an ablkcipher request. Includes buffers to
+ * spu_skcipher_rx_sg_create() - Build up the scatterlist of buffers used to
+ * receive a SPU response message for an skcipher request. Includes buffers to
  * catch SPU message headers and the response data.
  * @mssg:	mailbox message containing the receive sg
  * @rctx:	crypto request context
@@ -130,7 +130,7 @@ static u8 select_channel(void)
  *   < 0 if an error
  */
 static int
-spu_ablkcipher_rx_sg_create(struct brcm_message *mssg,
+spu_skcipher_rx_sg_create(struct brcm_message *mssg,
 			    struct iproc_reqctx_s *rctx,
 			    u8 rx_frag_num,
 			    unsigned int chunksize, u32 stat_pad_len)
@@ -179,8 +179,8 @@ spu_ablkcipher_rx_sg_create(struct brcm_message *mssg,
 }
 
 /**
- * spu_ablkcipher_tx_sg_create() - Build up the scatterlist of buffers used to
- * send a SPU request message for an ablkcipher request. Includes SPU message
+ * spu_skcipher_tx_sg_create() - Build up the scatterlist of buffers used to
+ * send a SPU request message for an skcipher request. Includes SPU message
  * headers and the request data.
  * @mssg:	mailbox message containing the transmit sg
  * @rctx:	crypto request context
@@ -198,7 +198,7 @@ spu_ablkcipher_rx_sg_create(struct brcm_message *mssg,
  *   < 0 if an error
  */
 static int
-spu_ablkcipher_tx_sg_create(struct brcm_message *mssg,
+spu_skcipher_tx_sg_create(struct brcm_message *mssg,
 			    struct iproc_reqctx_s *rctx,
 			    u8 tx_frag_num, unsigned int chunksize, u32 pad_len)
 {
@@ -283,7 +283,7 @@ static int mailbox_send_message(struct brcm_message *mssg, u32 flags,
 }
 
 /**
- * handle_ablkcipher_req() - Submit as much of a block cipher request as fits in
+ * handle_skcipher_req() - Submit as much of a block cipher request as fits in
  * a single SPU request message, starting at the current position in the request
  * data.
  * @rctx:	Crypto request context
@@ -300,12 +300,12 @@ static int mailbox_send_message(struct brcm_message *mssg, u32 flags,
  *			 asynchronously
  *         Any other value indicates an error
  */
-static int handle_ablkcipher_req(struct iproc_reqctx_s *rctx)
+static int handle_skcipher_req(struct iproc_reqctx_s *rctx)
 {
 	struct spu_hw *spu = &iproc_priv.spu;
 	struct crypto_async_request *areq = rctx->parent;
-	struct ablkcipher_request *req =
-	    container_of(areq, struct ablkcipher_request, base);
+	struct skcipher_request *req =
+	    container_of(areq, struct skcipher_request, base);
 	struct iproc_ctx_s *ctx = rctx->ctx;
 	struct spu_cipher_parms cipher_parms;
 	int err = 0;
@@ -468,7 +468,7 @@ static int handle_ablkcipher_req(struct iproc_reqctx_s *rctx)
 	    spu->spu_xts_tweak_in_payload())
 		rx_frag_num++;	/* extra sg to insert tweak */
 
-	err = spu_ablkcipher_rx_sg_create(mssg, rctx, rx_frag_num, chunksize,
+	err = spu_skcipher_rx_sg_create(mssg, rctx, rx_frag_num, chunksize,
 					  stat_pad_len);
 	if (err)
 		return err;
@@ -482,7 +482,7 @@ static int handle_ablkcipher_req(struct iproc_reqctx_s *rctx)
 	    spu->spu_xts_tweak_in_payload())
 		tx_frag_num++;	/* extra sg to insert tweak */
 
-	err = spu_ablkcipher_tx_sg_create(mssg, rctx, tx_frag_num, chunksize,
+	err = spu_skcipher_tx_sg_create(mssg, rctx, tx_frag_num, chunksize,
 					  pad_len);
 	if (err)
 		return err;
@@ -495,16 +495,16 @@ static int handle_ablkcipher_req(struct iproc_reqctx_s *rctx)
 }
 
 /**
- * handle_ablkcipher_resp() - Process a block cipher SPU response. Updates the
+ * handle_skcipher_resp() - Process a block cipher SPU response. Updates the
  * total received count for the request and updates global stats.
  * @rctx:	Crypto request context
  */
-static void handle_ablkcipher_resp(struct iproc_reqctx_s *rctx)
+static void handle_skcipher_resp(struct iproc_reqctx_s *rctx)
 {
 	struct spu_hw *spu = &iproc_priv.spu;
 #ifdef DEBUG
 	struct crypto_async_request *areq = rctx->parent;
-	struct ablkcipher_request *req = ablkcipher_request_cast(areq);
+	struct skcipher_request *req = skcipher_request_cast(areq);
 #endif
 	struct iproc_ctx_s *ctx = rctx->ctx;
 	u32 payload_len;
@@ -1685,8 +1685,8 @@ static void spu_rx_callback(struct mbox_client *cl, void *msg)
 
 	/* Process the SPU response message */
 	switch (rctx->ctx->alg->type) {
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		handle_ablkcipher_resp(rctx);
+	case CRYPTO_ALG_TYPE_SKCIPHER:
+		handle_skcipher_resp(rctx);
 		break;
 	case CRYPTO_ALG_TYPE_AHASH:
 		handle_ahash_resp(rctx);
@@ -1708,8 +1708,8 @@ static void spu_rx_callback(struct mbox_client *cl, void *msg)
 		spu_chunk_cleanup(rctx);
 
 		switch (rctx->ctx->alg->type) {
-		case CRYPTO_ALG_TYPE_ABLKCIPHER:
-			err = handle_ablkcipher_req(rctx);
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			err = handle_skcipher_req(rctx);
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			err = handle_ahash_req(rctx);
@@ -1739,7 +1739,7 @@ cb_finish:
 /* ==================== Kernel Cryptographic API ==================== */
 
 /**
- * ablkcipher_enqueue() - Handle ablkcipher encrypt or decrypt request.
+ * skcipher_enqueue() - Handle skcipher encrypt or decrypt request.
  * @req:	Crypto API request
  * @encrypt:	true if encrypting; false if decrypting
  *
@@ -1747,11 +1747,11 @@ cb_finish:
  *			asynchronously
  *	   < 0 if an error
  */
-static int ablkcipher_enqueue(struct ablkcipher_request *req, bool encrypt)
+static int skcipher_enqueue(struct skcipher_request *req, bool encrypt)
 {
-	struct iproc_reqctx_s *rctx = ablkcipher_request_ctx(req);
+	struct iproc_reqctx_s *rctx = skcipher_request_ctx(req);
 	struct iproc_ctx_s *ctx =
-	    crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+	    crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	int err;
 
 	flow_log("%s() enc:%u\n", __func__, encrypt);
@@ -1761,7 +1761,7 @@ static int ablkcipher_enqueue(struct ablkcipher_request *req, bool encrypt)
 	rctx->parent = &req->base;
 	rctx->is_encrypt = encrypt;
 	rctx->bd_suppress = false;
-	rctx->total_todo = req->nbytes;
+	rctx->total_todo = req->cryptlen;
 	rctx->src_sent = 0;
 	rctx->total_sent = 0;
 	rctx->total_received = 0;
@@ -1782,15 +1782,15 @@ static int ablkcipher_enqueue(struct ablkcipher_request *req, bool encrypt)
 	    ctx->cipher.mode == CIPHER_MODE_GCM ||
 	    ctx->cipher.mode == CIPHER_MODE_CCM) {
 		rctx->iv_ctr_len =
-		    crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
-		memcpy(rctx->msg_buf.iv_ctr, req->info, rctx->iv_ctr_len);
+		    crypto_skcipher_ivsize(crypto_skcipher_reqtfm(req));
+		memcpy(rctx->msg_buf.iv_ctr, req->iv, rctx->iv_ctr_len);
 	} else {
 		rctx->iv_ctr_len = 0;
 	}
 
 	/* Choose a SPU to process this request */
 	rctx->chan_idx = select_channel();
-	err = handle_ablkcipher_req(rctx);
+	err = handle_skcipher_req(rctx);
 	if (err != -EINPROGRESS)
 		/* synchronous result */
 		spu_chunk_cleanup(rctx);
@@ -1798,13 +1798,13 @@ static int ablkcipher_enqueue(struct ablkcipher_request *req, bool encrypt)
 	return err;
 }
 
-static int des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int des_setkey(struct crypto_skcipher *cipher, const u8 *key,
 		      unsigned int keylen)
 {
-	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
+	struct iproc_ctx_s *ctx = crypto_skcipher_ctx(cipher);
 	int err;
 
-	err = verify_ablkcipher_des_key(cipher, key);
+	err = verify_skcipher_des_key(cipher, key);
 	if (err)
 		return err;
 
@@ -1812,13 +1812,13 @@ static int des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int threedes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int threedes_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			   unsigned int keylen)
 {
-	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
+	struct iproc_ctx_s *ctx = crypto_skcipher_ctx(cipher);
 	int err;
 
-	err = verify_ablkcipher_des3_key(cipher, key);
+	err = verify_skcipher_des3_key(cipher, key);
 	if (err)
 		return err;
 
@@ -1826,10 +1826,10 @@ static int threedes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int aes_setkey(struct crypto_skcipher *cipher, const u8 *key,
 		      unsigned int keylen)
 {
-	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
+	struct iproc_ctx_s *ctx = crypto_skcipher_ctx(cipher);
 
 	if (ctx->cipher.mode == CIPHER_MODE_XTS)
 		/* XTS includes two keys of equal length */
@@ -1846,7 +1846,6 @@ static int aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 		ctx->cipher_type = CIPHER_TYPE_AES256;
 		break;
 	default:
-		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	WARN_ON((ctx->max_payload != SPU_MAX_PAYLOAD_INF) &&
@@ -1854,10 +1853,10 @@ static int aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int rc4_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int rc4_setkey(struct crypto_skcipher *cipher, const u8 *key,
 		      unsigned int keylen)
 {
-	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
+	struct iproc_ctx_s *ctx = crypto_skcipher_ctx(cipher);
 	int i;
 
 	ctx->enckeylen = ARC4_MAX_KEY_SIZE + ARC4_STATE_SIZE;
@@ -1874,16 +1873,16 @@ static int rc4_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int skcipher_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			     unsigned int keylen)
 {
 	struct spu_hw *spu = &iproc_priv.spu;
-	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
+	struct iproc_ctx_s *ctx = crypto_skcipher_ctx(cipher);
 	struct spu_cipher_parms cipher_parms;
 	u32 alloc_len = 0;
 	int err;
 
-	flow_log("ablkcipher_setkey() keylen: %d\n", keylen);
+	flow_log("skcipher_setkey() keylen: %d\n", keylen);
 	flow_dump("  key: ", key, keylen);
 
 	switch (ctx->cipher.alg) {
@@ -1926,7 +1925,7 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 		alloc_len = BCM_HDR_LEN + SPU2_HEADER_ALLOC_LEN;
 	memset(ctx->bcm_spu_req_hdr, 0, alloc_len);
 	cipher_parms.iv_buf = NULL;
-	cipher_parms.iv_len = crypto_ablkcipher_ivsize(cipher);
+	cipher_parms.iv_len = crypto_skcipher_ivsize(cipher);
 	flow_log("%s: iv_len %u\n", __func__, cipher_parms.iv_len);
 
 	cipher_parms.alg = ctx->cipher.alg;
@@ -1950,17 +1949,17 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int ablkcipher_encrypt(struct ablkcipher_request *req)
+static int skcipher_encrypt(struct skcipher_request *req)
 {
-	flow_log("ablkcipher_encrypt() nbytes:%u\n", req->nbytes);
+	flow_log("skcipher_encrypt() nbytes:%u\n", req->cryptlen);
 
-	return ablkcipher_enqueue(req, true);
+	return skcipher_enqueue(req, true);
 }
 
-static int ablkcipher_decrypt(struct ablkcipher_request *req)
+static int skcipher_decrypt(struct skcipher_request *req)
 {
-	flow_log("ablkcipher_decrypt() nbytes:%u\n", req->nbytes);
-	return ablkcipher_enqueue(req, false);
+	flow_log("skcipher_decrypt() nbytes:%u\n", req->cryptlen);
+	return skcipher_enqueue(req, false);
 }
 
 static int ahash_enqueue(struct ahash_request *req)
@@ -2894,13 +2893,8 @@ static int aead_authenc_setkey(struct crypto_aead *cipher,
 		ctx->fallback_cipher->base.crt_flags |=
 		    tfm->crt_flags & CRYPTO_TFM_REQ_MASK;
 		ret = crypto_aead_setkey(ctx->fallback_cipher, key, keylen);
-		if (ret) {
+		if (ret)
 			flow_log("  fallback setkey() returned:%d\n", ret);
-			tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-			tfm->crt_flags |=
-			    (ctx->fallback_cipher->base.crt_flags &
-			     CRYPTO_TFM_RES_MASK);
-		}
 	}
 
 	ctx->spu_resp_hdr_len = spu->spu_response_hdr_len(ctx->authkeylen,
@@ -2916,7 +2910,6 @@ badkey:
 	ctx->authkeylen = 0;
 	ctx->digestsize = 0;
 
-	crypto_aead_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
 }
 
@@ -2967,13 +2960,8 @@ static int aead_gcm_ccm_setkey(struct crypto_aead *cipher,
 		    tfm->crt_flags & CRYPTO_TFM_REQ_MASK;
 		ret = crypto_aead_setkey(ctx->fallback_cipher, key,
 					 keylen + ctx->salt_len);
-		if (ret) {
+		if (ret)
 			flow_log("  fallback setkey() returned:%d\n", ret);
-			tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-			tfm->crt_flags |=
-			    (ctx->fallback_cipher->base.crt_flags &
-			     CRYPTO_TFM_RES_MASK);
-		}
 	}
 
 	ctx->spu_resp_hdr_len = spu->spu_response_hdr_len(ctx->authkeylen,
@@ -2992,7 +2980,6 @@ badkey:
 	ctx->authkeylen = 0;
 	ctx->digestsize = 0;
 
-	crypto_aead_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
 }
 
@@ -3585,18 +3572,16 @@ static struct iproc_alg_s driver_algs[] = {
 	 .auth_first = 0,
 	 },
 
-/* ABLKCIPHER algorithms. */
+/* SKCIPHER algorithms. */
 	{
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "ecb(arc4)",
-			.cra_driver_name = "ecb-arc4-iproc",
-			.cra_blocksize = ARC4_BLOCK_SIZE,
-			.cra_ablkcipher = {
-					   .min_keysize = ARC4_MIN_KEY_SIZE,
-					   .max_keysize = ARC4_MAX_KEY_SIZE,
-					   .ivsize = 0,
-					}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "ecb(arc4)",
+			.base.cra_driver_name = "ecb-arc4-iproc",
+			.base.cra_blocksize = ARC4_BLOCK_SIZE,
+			.min_keysize = ARC4_MIN_KEY_SIZE,
+			.max_keysize = ARC4_MAX_KEY_SIZE,
+			.ivsize = 0,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_RC4,
@@ -3608,16 +3593,14 @@ static struct iproc_alg_s driver_algs[] = {
 		       },
 	 },
 	{
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "ofb(des)",
-			.cra_driver_name = "ofb-des-iproc",
-			.cra_blocksize = DES_BLOCK_SIZE,
-			.cra_ablkcipher = {
-					   .min_keysize = DES_KEY_SIZE,
-					   .max_keysize = DES_KEY_SIZE,
-					   .ivsize = DES_BLOCK_SIZE,
-					}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "ofb(des)",
+			.base.cra_driver_name = "ofb-des-iproc",
+			.base.cra_blocksize = DES_BLOCK_SIZE,
+			.min_keysize = DES_KEY_SIZE,
+			.max_keysize = DES_KEY_SIZE,
+			.ivsize = DES_BLOCK_SIZE,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_DES,
@@ -3629,16 +3612,14 @@ static struct iproc_alg_s driver_algs[] = {
 		       },
 	 },
 	{
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "cbc(des)",
-			.cra_driver_name = "cbc-des-iproc",
-			.cra_blocksize = DES_BLOCK_SIZE,
-			.cra_ablkcipher = {
-					   .min_keysize = DES_KEY_SIZE,
-					   .max_keysize = DES_KEY_SIZE,
-					   .ivsize = DES_BLOCK_SIZE,
-					}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "cbc(des)",
+			.base.cra_driver_name = "cbc-des-iproc",
+			.base.cra_blocksize = DES_BLOCK_SIZE,
+			.min_keysize = DES_KEY_SIZE,
+			.max_keysize = DES_KEY_SIZE,
+			.ivsize = DES_BLOCK_SIZE,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_DES,
@@ -3650,16 +3631,14 @@ static struct iproc_alg_s driver_algs[] = {
 		       },
 	 },
 	{
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "ecb(des)",
-			.cra_driver_name = "ecb-des-iproc",
-			.cra_blocksize = DES_BLOCK_SIZE,
-			.cra_ablkcipher = {
-					   .min_keysize = DES_KEY_SIZE,
-					   .max_keysize = DES_KEY_SIZE,
-					   .ivsize = 0,
-					}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "ecb(des)",
+			.base.cra_driver_name = "ecb-des-iproc",
+			.base.cra_blocksize = DES_BLOCK_SIZE,
+			.min_keysize = DES_KEY_SIZE,
+			.max_keysize = DES_KEY_SIZE,
+			.ivsize = 0,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_DES,
@@ -3671,16 +3650,14 @@ static struct iproc_alg_s driver_algs[] = {
 		       },
 	 },
 	{
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "ofb(des3_ede)",
-			.cra_driver_name = "ofb-des3-iproc",
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_ablkcipher = {
-					   .min_keysize = DES3_EDE_KEY_SIZE,
-					   .max_keysize = DES3_EDE_KEY_SIZE,
-					   .ivsize = DES3_EDE_BLOCK_SIZE,
-					}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "ofb(des3_ede)",
+			.base.cra_driver_name = "ofb-des3-iproc",
+			.base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_3DES,
@@ -3692,16 +3669,14 @@ static struct iproc_alg_s driver_algs[] = {
 		       },
 	 },
 	{
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "cbc(des3_ede)",
-			.cra_driver_name = "cbc-des3-iproc",
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_ablkcipher = {
-					   .min_keysize = DES3_EDE_KEY_SIZE,
-					   .max_keysize = DES3_EDE_KEY_SIZE,
-					   .ivsize = DES3_EDE_BLOCK_SIZE,
-					}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "cbc(des3_ede)",
+			.base.cra_driver_name = "cbc-des3-iproc",
+			.base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_3DES,
@@ -3713,16 +3688,14 @@ static struct iproc_alg_s driver_algs[] = {
 		       },
 	 },
 	{
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "ecb(des3_ede)",
-			.cra_driver_name = "ecb-des3-iproc",
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_ablkcipher = {
-					   .min_keysize = DES3_EDE_KEY_SIZE,
-					   .max_keysize = DES3_EDE_KEY_SIZE,
-					   .ivsize = 0,
-					}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "ecb(des3_ede)",
+			.base.cra_driver_name = "ecb-des3-iproc",
+			.base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize = 0,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_3DES,
@@ -3734,16 +3707,14 @@ static struct iproc_alg_s driver_algs[] = {
 		       },
 	 },
 	{
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "ofb(aes)",
-			.cra_driver_name = "ofb-aes-iproc",
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_ablkcipher = {
-					   .min_keysize = AES_MIN_KEY_SIZE,
-					   .max_keysize = AES_MAX_KEY_SIZE,
-					   .ivsize = AES_BLOCK_SIZE,
-					}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "ofb(aes)",
+			.base.cra_driver_name = "ofb-aes-iproc",
+			.base.cra_blocksize = AES_BLOCK_SIZE,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_AES,
@@ -3755,16 +3726,14 @@ static struct iproc_alg_s driver_algs[] = {
 		       },
 	 },
 	{
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "cbc(aes)",
-			.cra_driver_name = "cbc-aes-iproc",
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_ablkcipher = {
-					   .min_keysize = AES_MIN_KEY_SIZE,
-					   .max_keysize = AES_MAX_KEY_SIZE,
-					   .ivsize = AES_BLOCK_SIZE,
-					}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "cbc(aes)",
+			.base.cra_driver_name = "cbc-aes-iproc",
+			.base.cra_blocksize = AES_BLOCK_SIZE,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_AES,
@@ -3776,16 +3745,14 @@ static struct iproc_alg_s driver_algs[] = {
 		       },
 	 },
 	{
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "ecb(aes)",
-			.cra_driver_name = "ecb-aes-iproc",
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_ablkcipher = {
-					   .min_keysize = AES_MIN_KEY_SIZE,
-					   .max_keysize = AES_MAX_KEY_SIZE,
-					   .ivsize = 0,
-					}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "ecb(aes)",
+			.base.cra_driver_name = "ecb-aes-iproc",
+			.base.cra_blocksize = AES_BLOCK_SIZE,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = 0,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_AES,
@@ -3797,16 +3764,14 @@ static struct iproc_alg_s driver_algs[] = {
 		       },
 	 },
 	{
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "ctr(aes)",
-			.cra_driver_name = "ctr-aes-iproc",
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_ablkcipher = {
-					   .min_keysize = AES_MIN_KEY_SIZE,
-					   .max_keysize = AES_MAX_KEY_SIZE,
-					   .ivsize = AES_BLOCK_SIZE,
-					}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "ctr(aes)",
+			.base.cra_driver_name = "ctr-aes-iproc",
+			.base.cra_blocksize = AES_BLOCK_SIZE,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_AES,
@@ -3818,16 +3783,14 @@ static struct iproc_alg_s driver_algs[] = {
 		       },
 	 },
 {
-	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	 .alg.crypto = {
-			.cra_name = "xts(aes)",
-			.cra_driver_name = "xts-aes-iproc",
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_ablkcipher = {
-				.min_keysize = 2 * AES_MIN_KEY_SIZE,
-				.max_keysize = 2 * AES_MAX_KEY_SIZE,
-				.ivsize = AES_BLOCK_SIZE,
-				}
+	 .type = CRYPTO_ALG_TYPE_SKCIPHER,
+	 .alg.skcipher = {
+			.base.cra_name = "xts(aes)",
+			.base.cra_driver_name = "xts-aes-iproc",
+			.base.cra_blocksize = AES_BLOCK_SIZE,
+			.min_keysize = 2 * AES_MIN_KEY_SIZE,
+			.max_keysize = 2 * AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
 			},
 	 .cipher_info = {
 			 .alg = CIPHER_ALG_AES,
@@ -4282,16 +4245,17 @@ static int generic_cra_init(struct crypto_tfm *tfm,
 	return 0;
 }
 
-static int ablkcipher_cra_init(struct crypto_tfm *tfm)
+static int skcipher_init_tfm(struct crypto_skcipher *skcipher)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(skcipher);
+	struct skcipher_alg *alg = crypto_skcipher_alg(skcipher);
 	struct iproc_alg_s *cipher_alg;
 
 	flow_log("%s()\n", __func__);
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct iproc_reqctx_s);
+	crypto_skcipher_set_reqsize(skcipher, sizeof(struct iproc_reqctx_s));
 
-	cipher_alg = container_of(alg, struct iproc_alg_s, alg.crypto);
+	cipher_alg = container_of(alg, struct iproc_alg_s, alg.skcipher);
 	return generic_cra_init(tfm, cipher_alg);
 }
 
@@ -4363,6 +4327,11 @@ static void generic_cra_exit(struct crypto_tfm *tfm)
 	atomic_dec(&iproc_priv.session_count);
 }
 
+static void skcipher_exit_tfm(struct crypto_skcipher *tfm)
+{
+	generic_cra_exit(crypto_skcipher_tfm(tfm));
+}
+
 static void aead_cra_exit(struct crypto_aead *aead)
 {
 	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
@@ -4524,10 +4493,10 @@ static void spu_counters_init(void)
 	atomic_set(&iproc_priv.bad_icv, 0);
 }
 
-static int spu_register_ablkcipher(struct iproc_alg_s *driver_alg)
+static int spu_register_skcipher(struct iproc_alg_s *driver_alg)
 {
 	struct spu_hw *spu = &iproc_priv.spu;
-	struct crypto_alg *crypto = &driver_alg->alg.crypto;
+	struct skcipher_alg *crypto = &driver_alg->alg.skcipher;
 	int err;
 
 	/* SPU2 does not support RC4 */
@@ -4535,26 +4504,23 @@ static int spu_register_ablkcipher(struct iproc_alg_s *driver_alg)
 	    (spu->spu_type == SPU_TYPE_SPU2))
 		return 0;
 
-	crypto->cra_module = THIS_MODULE;
-	crypto->cra_priority = cipher_pri;
-	crypto->cra_alignmask = 0;
-	crypto->cra_ctxsize = sizeof(struct iproc_ctx_s);
-
-	crypto->cra_init = ablkcipher_cra_init;
-	crypto->cra_exit = generic_cra_exit;
-	crypto->cra_type = &crypto_ablkcipher_type;
-	crypto->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
-				CRYPTO_ALG_KERN_DRIVER_ONLY;
+	crypto->base.cra_module = THIS_MODULE;
+	crypto->base.cra_priority = cipher_pri;
+	crypto->base.cra_alignmask = 0;
+	crypto->base.cra_ctxsize = sizeof(struct iproc_ctx_s);
+	crypto->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
 
-	crypto->cra_ablkcipher.setkey = ablkcipher_setkey;
-	crypto->cra_ablkcipher.encrypt = ablkcipher_encrypt;
-	crypto->cra_ablkcipher.decrypt = ablkcipher_decrypt;
+	crypto->init = skcipher_init_tfm;
+	crypto->exit = skcipher_exit_tfm;
+	crypto->setkey = skcipher_setkey;
+	crypto->encrypt = skcipher_encrypt;
+	crypto->decrypt = skcipher_decrypt;
 
-	err = crypto_register_alg(crypto);
+	err = crypto_register_skcipher(crypto);
 	/* Mark alg as having been registered, if successful */
 	if (err == 0)
 		driver_alg->registered = true;
-	pr_debug("  registered ablkcipher %s\n", crypto->cra_driver_name);
+	pr_debug("  registered skcipher %s\n", crypto->base.cra_driver_name);
 	return err;
 }
 
@@ -4649,8 +4615,8 @@ static int spu_algs_register(struct device *dev)
 
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
 		switch (driver_algs[i].type) {
-		case CRYPTO_ALG_TYPE_ABLKCIPHER:
-			err = spu_register_ablkcipher(&driver_algs[i]);
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			err = spu_register_skcipher(&driver_algs[i]);
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			err = spu_register_ahash(&driver_algs[i]);
@@ -4680,8 +4646,8 @@ err_algs:
 		if (!driver_algs[j].registered)
 			continue;
 		switch (driver_algs[j].type) {
-		case CRYPTO_ALG_TYPE_ABLKCIPHER:
-			crypto_unregister_alg(&driver_algs[j].alg.crypto);
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			crypto_unregister_skcipher(&driver_algs[j].alg.skcipher);
 			driver_algs[j].registered = false;
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
@@ -4837,10 +4803,10 @@ static int bcm_spu_remove(struct platform_device *pdev)
 			continue;
 
 		switch (driver_algs[i].type) {
-		case CRYPTO_ALG_TYPE_ABLKCIPHER:
-			crypto_unregister_alg(&driver_algs[i].alg.crypto);
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			crypto_unregister_skcipher(&driver_algs[i].alg.skcipher);
 			dev_dbg(dev, "  unregistered cipher %s\n",
-				driver_algs[i].alg.crypto.cra_driver_name);
+				driver_algs[i].alg.skcipher.base.cra_driver_name);
 			driver_algs[i].registered = false;
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
diff --git a/drivers/crypto/bcm/cipher.h b/drivers/crypto/bcm/cipher.h
index 766452b24d0a..b6d83e3aa46c 100644
--- a/drivers/crypto/bcm/cipher.h
+++ b/drivers/crypto/bcm/cipher.h
@@ -1,3 +1,4 @@
+
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2016 Broadcom
@@ -11,6 +12,7 @@
 #include <linux/mailbox_client.h>
 #include <crypto/aes.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/aead.h>
 #include <crypto/arc4.h>
 #include <crypto/gcm.h>
@@ -102,7 +104,7 @@ struct auth_op {
 struct iproc_alg_s {
 	u32 type;
 	union {
-		struct crypto_alg crypto;
+		struct skcipher_alg skcipher;
 		struct ahash_alg hash;
 		struct aead_alg aead;
 	} alg;
@@ -149,7 +151,7 @@ struct spu_msg_buf {
 	u8 rx_stat[ALIGN(SPU_RX_STATUS_LEN, SPU_MSG_ALIGN)];
 
 	union {
-		/* Buffers only used for ablkcipher */
+		/* Buffers only used for skcipher */
 		struct {
 			/*
 			 * Field used for either SUPDT when RC4 is used
@@ -214,7 +216,7 @@ struct iproc_ctx_s {
 
 	/*
 	 * Buffer to hold SPU message header template. Template is created at
-	 * setkey time for ablkcipher requests, since most of the fields in the
+	 * setkey time for skcipher requests, since most of the fields in the
 	 * header are known at that time. At request time, just fill in a few
 	 * missing pieces related to length of data in the request and IVs, etc.
 	 */
@@ -256,7 +258,7 @@ struct iproc_reqctx_s {
 
 	/* total todo, rx'd, and sent for this request */
 	unsigned int total_todo;
-	unsigned int total_received;	/* only valid for ablkcipher */
+	unsigned int total_received;	/* only valid for skcipher */
 	unsigned int total_sent;
 
 	/*
diff --git a/drivers/crypto/bcm/spu2.c b/drivers/crypto/bcm/spu2.c
index 2add51024575..59abb5ecefa4 100644
--- a/drivers/crypto/bcm/spu2.c
+++ b/drivers/crypto/bcm/spu2.c
@@ -542,7 +542,7 @@ void spu2_dump_msg_hdr(u8 *buf, unsigned int buf_len)
 
 /**
  * spu2_fmd_init() - At setkey time, initialize the fixed meta data for
- * subsequent ablkcipher requests for this context.
+ * subsequent skcipher requests for this context.
  * @spu2_cipher_type:  Cipher algorithm
  * @spu2_mode:         Cipher mode
  * @cipher_key_len:    Length of cipher key, in bytes
@@ -1107,13 +1107,13 @@ u32 spu2_create_request(u8 *spu_hdr,
 }
 
 /**
- * spu_cipher_req_init() - Build an ablkcipher SPU2 request message header,
+ * spu_cipher_req_init() - Build an skcipher SPU2 request message header,
  * including FMD and OMD.
  * @spu_hdr:       Location of start of SPU request (FMD field)
  * @cipher_parms:  Parameters describing cipher request
  *
  * Called at setkey time to initialize a msg header that can be reused for all
- * subsequent ablkcipher requests. Construct the message starting at spu_hdr.
+ * subsequent skcipher requests. Construct the message starting at spu_hdr.
  * Caller should allocate this buffer in DMA-able memory at least
  * SPU_HEADER_ALLOC_LEN bytes long.
  *
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 137ed3df0c74..fac5b2e26610 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -97,7 +97,7 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
 	select CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	help
 	  Selecting this will offload crypto for users of the
@@ -110,7 +110,7 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI
 	default y
 	select CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_DES
 	help
 	  Selecting this will use CAAM Queue Interface (QI) for sending
@@ -130,13 +130,13 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API
 	  scatterlist crypto API to the SEC4 via job ring.
 
 config CRYPTO_DEV_FSL_CAAM_PKC_API
-        bool "Register public key cryptography implementations with Crypto API"
-        default y
-        select CRYPTO_RSA
-        help
-          Selecting this will allow SEC Public key support for RSA.
-          Supported cryptographic primitives: encryption, decryption,
-          signature and verification.
+	bool "Register public key cryptography implementations with Crypto API"
+	default y
+	select CRYPTO_RSA
+	help
+	  Selecting this will allow SEC Public key support for RSA.
+	  Supported cryptographic primitives: encryption, decryption,
+	  signature and verification.
 
 config CRYPTO_DEV_FSL_CAAM_RNG_API
 	bool "Register caam device for hwrng API"
@@ -158,7 +158,7 @@ config CRYPTO_DEV_FSL_DPAA2_CAAM
 	select CRYPTO_DEV_FSL_CAAM_COMMON
 	select CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
 	select CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AUTHENC
 	select CRYPTO_AEAD
 	select CRYPTO_HASH
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 2912006b946b..ef1a65f4fc92 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -548,10 +548,8 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	unsigned int saltlen = CHACHAPOLY_IV_SIZE - ivsize;
 
-	if (keylen != CHACHA_KEY_SIZE + saltlen) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != CHACHA_KEY_SIZE + saltlen)
 		return -EINVAL;
-	}
 
 	ctx->cdata.key_virt = key;
 	ctx->cdata.keylen = keylen - saltlen;
@@ -619,7 +617,6 @@ skip_split_key:
 	memzero_explicit(&keys, sizeof(keys));
 	return aead_set_sh_desc(aead);
 badkey:
-	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
@@ -649,10 +646,8 @@ static int gcm_setkey(struct crypto_aead *aead,
 	int err;
 
 	err = aes_check_keylen(keylen);
-	if (err) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -672,10 +667,8 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 	int err;
 
 	err = aes_check_keylen(keylen - 4);
-	if (err) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -700,10 +693,8 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 	int err;
 
 	err = aes_check_keylen(keylen - 4);
-	if (err) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -762,11 +753,8 @@ static int aes_skcipher_setkey(struct crypto_skcipher *skcipher,
 	int err;
 
 	err = aes_check_keylen(keylen);
-	if (err) {
-		crypto_skcipher_set_flags(skcipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	return skcipher_setkey(skcipher, key, keylen, 0);
 }
@@ -786,11 +774,8 @@ static int rfc3686_skcipher_setkey(struct crypto_skcipher *skcipher,
 	keylen -= CTR_RFC3686_NONCE_SIZE;
 
 	err = aes_check_keylen(keylen);
-	if (err) {
-		crypto_skcipher_set_flags(skcipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
 }
@@ -809,11 +794,8 @@ static int ctr_skcipher_setkey(struct crypto_skcipher *skcipher,
 	ctx1_iv_off = 16;
 
 	err = aes_check_keylen(keylen);
-	if (err) {
-		crypto_skcipher_set_flags(skcipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
 }
@@ -846,7 +828,6 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 	u32 *desc;
 
 	if (keylen != 2 * AES_MIN_KEY_SIZE  && keylen != 2 * AES_MAX_KEY_SIZE) {
-		crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		dev_err(jrdev, "key size mismatch\n");
 		return -EINVAL;
 	}
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 8e3449670d2f..4a29e0ef9d63 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -268,7 +268,6 @@ skip_split_key:
 	memzero_explicit(&keys, sizeof(keys));
 	return ret;
 badkey:
-	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
@@ -356,10 +355,8 @@ static int gcm_setkey(struct crypto_aead *aead,
 	int ret;
 
 	ret = aes_check_keylen(keylen);
-	if (ret) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (ret)
 		return ret;
-	}
 
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -462,10 +459,8 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 	int ret;
 
 	ret = aes_check_keylen(keylen - 4);
-	if (ret) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (ret)
 		return ret;
-	}
 
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -570,10 +565,8 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 	int ret;
 
 	ret = aes_check_keylen(keylen - 4);
-	if (ret) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (ret)
 		return ret;
-	}
 
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -644,7 +637,7 @@ static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 					  ctx->sh_desc_enc);
 		if (ret) {
 			dev_err(jrdev, "driver enc context update failed\n");
-			goto badkey;
+			return -EINVAL;
 		}
 	}
 
@@ -653,14 +646,11 @@ static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 					  ctx->sh_desc_dec);
 		if (ret) {
 			dev_err(jrdev, "driver dec context update failed\n");
-			goto badkey;
+			return -EINVAL;
 		}
 	}
 
 	return ret;
-badkey:
-	crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	return -EINVAL;
 }
 
 static int aes_skcipher_setkey(struct crypto_skcipher *skcipher,
@@ -669,11 +659,8 @@ static int aes_skcipher_setkey(struct crypto_skcipher *skcipher,
 	int err;
 
 	err = aes_check_keylen(keylen);
-	if (err) {
-		crypto_skcipher_set_flags(skcipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	return skcipher_setkey(skcipher, key, keylen, 0);
 }
@@ -693,11 +680,8 @@ static int rfc3686_skcipher_setkey(struct crypto_skcipher *skcipher,
 	keylen -= CTR_RFC3686_NONCE_SIZE;
 
 	err = aes_check_keylen(keylen);
-	if (err) {
-		crypto_skcipher_set_flags(skcipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
 }
@@ -716,11 +700,8 @@ static int ctr_skcipher_setkey(struct crypto_skcipher *skcipher,
 	ctx1_iv_off = 16;
 
 	err = aes_check_keylen(keylen);
-	if (err) {
-		crypto_skcipher_set_flags(skcipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
 }
@@ -748,7 +729,7 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 
 	if (keylen != 2 * AES_MIN_KEY_SIZE  && keylen != 2 * AES_MAX_KEY_SIZE) {
 		dev_err(jrdev, "key size mismatch\n");
-		goto badkey;
+		return -EINVAL;
 	}
 
 	ctx->cdata.keylen = keylen;
@@ -765,7 +746,7 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 					  ctx->sh_desc_enc);
 		if (ret) {
 			dev_err(jrdev, "driver enc context update failed\n");
-			goto badkey;
+			return -EINVAL;
 		}
 	}
 
@@ -774,14 +755,11 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 					  ctx->sh_desc_dec);
 		if (ret) {
 			dev_err(jrdev, "driver dec context update failed\n");
-			goto badkey;
+			return -EINVAL;
 		}
 	}
 
 	return ret;
-badkey:
-	crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	return -EINVAL;
 }
 
 /*
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index 3443f6d6dd83..28669cbecf77 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -313,7 +313,6 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 	memzero_explicit(&keys, sizeof(keys));
 	return aead_set_sh_desc(aead);
 badkey:
-	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
@@ -326,11 +325,11 @@ static int des3_aead_setkey(struct crypto_aead *aead, const u8 *key,
 
 	err = crypto_authenc_extractkeys(&keys, key, keylen);
 	if (unlikely(err))
-		goto badkey;
+		goto out;
 
 	err = -EINVAL;
 	if (keys.enckeylen != DES3_EDE_KEY_SIZE)
-		goto badkey;
+		goto out;
 
 	err = crypto_des3_ede_verify_key(crypto_aead_tfm(aead), keys.enckey) ?:
 	      aead_setkey(aead, key, keylen);
@@ -338,10 +337,6 @@ static int des3_aead_setkey(struct crypto_aead *aead, const u8 *key,
 out:
 	memzero_explicit(&keys, sizeof(keys));
 	return err;
-
-badkey:
-	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	goto out;
 }
 
 static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
@@ -634,10 +629,8 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	unsigned int saltlen = CHACHAPOLY_IV_SIZE - ivsize;
 
-	if (keylen != CHACHA_KEY_SIZE + saltlen) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != CHACHA_KEY_SIZE + saltlen)
 		return -EINVAL;
-	}
 
 	ctx->cdata.key_virt = key;
 	ctx->cdata.keylen = keylen - saltlen;
@@ -725,10 +718,8 @@ static int gcm_setkey(struct crypto_aead *aead,
 	int ret;
 
 	ret = aes_check_keylen(keylen);
-	if (ret) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (ret)
 		return ret;
-	}
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 
@@ -822,10 +813,8 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 	int ret;
 
 	ret = aes_check_keylen(keylen - 4);
-	if (ret) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (ret)
 		return ret;
-	}
 
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -923,10 +912,8 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 	int ret;
 
 	ret = aes_check_keylen(keylen - 4);
-	if (ret) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (ret)
 		return ret;
-	}
 
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -992,11 +979,8 @@ static int aes_skcipher_setkey(struct crypto_skcipher *skcipher,
 	int err;
 
 	err = aes_check_keylen(keylen);
-	if (err) {
-		crypto_skcipher_set_flags(skcipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	return skcipher_setkey(skcipher, key, keylen, 0);
 }
@@ -1016,11 +1000,8 @@ static int rfc3686_skcipher_setkey(struct crypto_skcipher *skcipher,
 	keylen -= CTR_RFC3686_NONCE_SIZE;
 
 	err = aes_check_keylen(keylen);
-	if (err) {
-		crypto_skcipher_set_flags(skcipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
 }
@@ -1039,11 +1020,8 @@ static int ctr_skcipher_setkey(struct crypto_skcipher *skcipher,
 	ctx1_iv_off = 16;
 
 	err = aes_check_keylen(keylen);
-	if (err) {
-		crypto_skcipher_set_flags(skcipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
 }
@@ -1051,11 +1029,8 @@ static int ctr_skcipher_setkey(struct crypto_skcipher *skcipher,
 static int chacha20_skcipher_setkey(struct crypto_skcipher *skcipher,
 				    const u8 *key, unsigned int keylen)
 {
-	if (keylen != CHACHA_KEY_SIZE) {
-		crypto_skcipher_set_flags(skcipher,
-					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != CHACHA_KEY_SIZE)
 		return -EINVAL;
-	}
 
 	return skcipher_setkey(skcipher, key, keylen, 0);
 }
@@ -1084,7 +1059,6 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 
 	if (keylen != 2 * AES_MIN_KEY_SIZE  && keylen != 2 * AES_MAX_KEY_SIZE) {
 		dev_err(dev, "key size mismatch\n");
-		crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
@@ -2481,7 +2455,7 @@ static struct caam_aead_alg driver_aeads[] = {
 				.cra_name = "echainiv(authenc(hmac(sha256),"
 					    "cbc(des)))",
 				.cra_driver_name = "echainiv-authenc-"
-						   "hmac-sha256-cbc-desi-"
+						   "hmac-sha256-cbc-des-"
 						   "caam-qi2",
 				.cra_blocksize = DES_BLOCK_SIZE,
 			},
@@ -2998,15 +2972,13 @@ struct caam_hash_state {
 	dma_addr_t buf_dma;
 	dma_addr_t ctx_dma;
 	int ctx_dma_len;
-	u8 buf_0[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
-	int buflen_0;
-	u8 buf_1[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
-	int buflen_1;
+	u8 buf[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
+	int buflen;
+	int next_buflen;
 	u8 caam_ctx[MAX_CTX_LEN] ____cacheline_aligned;
 	int (*update)(struct ahash_request *req);
 	int (*final)(struct ahash_request *req);
 	int (*finup)(struct ahash_request *req);
-	int current_buf;
 };
 
 struct caam_export_state {
@@ -3018,42 +2990,17 @@ struct caam_export_state {
 	int (*finup)(struct ahash_request *req);
 };
 
-static inline void switch_buf(struct caam_hash_state *state)
-{
-	state->current_buf ^= 1;
-}
-
-static inline u8 *current_buf(struct caam_hash_state *state)
-{
-	return state->current_buf ? state->buf_1 : state->buf_0;
-}
-
-static inline u8 *alt_buf(struct caam_hash_state *state)
-{
-	return state->current_buf ? state->buf_0 : state->buf_1;
-}
-
-static inline int *current_buflen(struct caam_hash_state *state)
-{
-	return state->current_buf ? &state->buflen_1 : &state->buflen_0;
-}
-
-static inline int *alt_buflen(struct caam_hash_state *state)
-{
-	return state->current_buf ? &state->buflen_0 : &state->buflen_1;
-}
-
 /* Map current buffer in state (if length > 0) and put it in link table */
 static inline int buf_map_to_qm_sg(struct device *dev,
 				   struct dpaa2_sg_entry *qm_sg,
 				   struct caam_hash_state *state)
 {
-	int buflen = *current_buflen(state);
+	int buflen = state->buflen;
 
 	if (!buflen)
 		return 0;
 
-	state->buf_dma = dma_map_single(dev, current_buf(state), buflen,
+	state->buf_dma = dma_map_single(dev, state->buf, buflen,
 					DMA_TO_DEVICE);
 	if (dma_mapping_error(dev, state->buf_dma)) {
 		dev_err(dev, "unable to map buf\n");
@@ -3304,7 +3251,6 @@ static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key,
 	return ret;
 bad_free_key:
 	kfree(hashed_key);
-	crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
 }
 
@@ -3321,7 +3267,7 @@ static inline void ahash_unmap(struct device *dev, struct ahash_edesc *edesc,
 				 DMA_TO_DEVICE);
 
 	if (state->buf_dma) {
-		dma_unmap_single(dev, state->buf_dma, *current_buflen(state),
+		dma_unmap_single(dev, state->buf_dma, state->buflen,
 				 DMA_TO_DEVICE);
 		state->buf_dma = 0;
 	}
@@ -3383,9 +3329,17 @@ static void ahash_done_bi(void *cbk_ctx, u32 status)
 		ecode = caam_qi2_strstatus(ctx->dev, status);
 
 	ahash_unmap_ctx(ctx->dev, edesc, req, DMA_BIDIRECTIONAL);
-	switch_buf(state);
 	qi_cache_free(edesc);
 
+	scatterwalk_map_and_copy(state->buf, req->src,
+				 req->nbytes - state->next_buflen,
+				 state->next_buflen, 0);
+	state->buflen = state->next_buflen;
+
+	print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, state->buf,
+			     state->buflen, 1);
+
 	print_hex_dump_debug("ctx@" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
 			     ctx->ctx_len, 1);
@@ -3440,9 +3394,17 @@ static void ahash_done_ctx_dst(void *cbk_ctx, u32 status)
 		ecode = caam_qi2_strstatus(ctx->dev, status);
 
 	ahash_unmap_ctx(ctx->dev, edesc, req, DMA_FROM_DEVICE);
-	switch_buf(state);
 	qi_cache_free(edesc);
 
+	scatterwalk_map_and_copy(state->buf, req->src,
+				 req->nbytes - state->next_buflen,
+				 state->next_buflen, 0);
+	state->buflen = state->next_buflen;
+
+	print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, state->buf,
+			     state->buflen, 1);
+
 	print_hex_dump_debug("ctx@" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
 			     ctx->ctx_len, 1);
@@ -3464,16 +3426,14 @@ static int ahash_update_ctx(struct ahash_request *req)
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		      GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = current_buf(state);
-	int *buflen = current_buflen(state);
-	u8 *next_buf = alt_buf(state);
-	int *next_buflen = alt_buflen(state), last_buflen;
+	u8 *buf = state->buf;
+	int *buflen = &state->buflen;
+	int *next_buflen = &state->next_buflen;
 	int in_len = *buflen + req->nbytes, to_hash;
 	int src_nents, mapped_nents, qm_sg_bytes, qm_sg_src_index;
 	struct ahash_edesc *edesc;
 	int ret = 0;
 
-	last_buflen = *next_buflen;
 	*next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
 	to_hash = in_len - *next_buflen;
 
@@ -3524,10 +3484,6 @@ static int ahash_update_ctx(struct ahash_request *req)
 		if (mapped_nents) {
 			sg_to_qm_sg_last(req->src, src_len,
 					 sg_table + qm_sg_src_index, 0);
-			if (*next_buflen)
-				scatterwalk_map_and_copy(next_buf, req->src,
-							 to_hash - *buflen,
-							 *next_buflen, 0);
 		} else {
 			dpaa2_sg_set_final(sg_table + qm_sg_src_index - 1,
 					   true);
@@ -3566,14 +3522,11 @@ static int ahash_update_ctx(struct ahash_request *req)
 		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
 					 req->nbytes, 0);
 		*buflen = *next_buflen;
-		*next_buflen = last_buflen;
-	}
 
-	print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
-	print_hex_dump_debug("next buf@" __stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, next_buf, *next_buflen,
-			     1);
+		print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+				     DUMP_PREFIX_ADDRESS, 16, 4, buf,
+				     *buflen, 1);
+	}
 
 	return ret;
 unmap_ctx:
@@ -3592,7 +3545,7 @@ static int ahash_final_ctx(struct ahash_request *req)
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		      GFP_KERNEL : GFP_ATOMIC;
-	int buflen = *current_buflen(state);
+	int buflen = state->buflen;
 	int qm_sg_bytes;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct ahash_edesc *edesc;
@@ -3663,7 +3616,7 @@ static int ahash_finup_ctx(struct ahash_request *req)
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		      GFP_KERNEL : GFP_ATOMIC;
-	int buflen = *current_buflen(state);
+	int buflen = state->buflen;
 	int qm_sg_bytes, qm_sg_src_index;
 	int src_nents, mapped_nents;
 	int digestsize = crypto_ahash_digestsize(ahash);
@@ -3852,8 +3805,8 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		      GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = current_buf(state);
-	int buflen = *current_buflen(state);
+	u8 *buf = state->buf;
+	int buflen = state->buflen;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct ahash_edesc *edesc;
 	int ret = -ENOMEM;
@@ -3925,10 +3878,9 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		      GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = current_buf(state);
-	int *buflen = current_buflen(state);
-	u8 *next_buf = alt_buf(state);
-	int *next_buflen = alt_buflen(state);
+	u8 *buf = state->buf;
+	int *buflen = &state->buflen;
+	int *next_buflen = &state->next_buflen;
 	int in_len = *buflen + req->nbytes, to_hash;
 	int qm_sg_bytes, src_nents, mapped_nents;
 	struct ahash_edesc *edesc;
@@ -3977,11 +3929,6 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 
 		sg_to_qm_sg_last(req->src, src_len, sg_table + 1, 0);
 
-		if (*next_buflen)
-			scatterwalk_map_and_copy(next_buf, req->src,
-						 to_hash - *buflen,
-						 *next_buflen, 0);
-
 		edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table,
 						  qm_sg_bytes, DMA_TO_DEVICE);
 		if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
@@ -4029,14 +3976,11 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
 					 req->nbytes, 0);
 		*buflen = *next_buflen;
-		*next_buflen = 0;
-	}
 
-	print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
-	print_hex_dump_debug("next buf@" __stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, next_buf, *next_buflen,
-			     1);
+		print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+				     DUMP_PREFIX_ADDRESS, 16, 4, buf,
+				     *buflen, 1);
+	}
 
 	return ret;
 unmap_ctx:
@@ -4055,7 +3999,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		      GFP_KERNEL : GFP_ATOMIC;
-	int buflen = *current_buflen(state);
+	int buflen = state->buflen;
 	int qm_sg_bytes, src_nents, mapped_nents;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct ahash_edesc *edesc;
@@ -4151,8 +4095,9 @@ static int ahash_update_first(struct ahash_request *req)
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		      GFP_KERNEL : GFP_ATOMIC;
-	u8 *next_buf = alt_buf(state);
-	int *next_buflen = alt_buflen(state);
+	u8 *buf = state->buf;
+	int *buflen = &state->buflen;
+	int *next_buflen = &state->next_buflen;
 	int to_hash;
 	int src_nents, mapped_nents;
 	struct ahash_edesc *edesc;
@@ -4220,10 +4165,6 @@ static int ahash_update_first(struct ahash_request *req)
 			dpaa2_fl_set_addr(in_fle, sg_dma_address(req->src));
 		}
 
-		if (*next_buflen)
-			scatterwalk_map_and_copy(next_buf, req->src, to_hash,
-						 *next_buflen, 0);
-
 		state->ctx_dma_len = ctx->ctx_len;
 		state->ctx_dma = dma_map_single(ctx->dev, state->caam_ctx,
 						ctx->ctx_len, DMA_FROM_DEVICE);
@@ -4257,14 +4198,14 @@ static int ahash_update_first(struct ahash_request *req)
 		state->update = ahash_update_no_ctx;
 		state->finup = ahash_finup_no_ctx;
 		state->final = ahash_final_no_ctx;
-		scatterwalk_map_and_copy(next_buf, req->src, 0,
+		scatterwalk_map_and_copy(buf, req->src, 0,
 					 req->nbytes, 0);
-		switch_buf(state);
-	}
+		*buflen = *next_buflen;
 
-	print_hex_dump_debug("next buf@" __stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, next_buf, *next_buflen,
-			     1);
+		print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+				     DUMP_PREFIX_ADDRESS, 16, 4, buf,
+				     *buflen, 1);
+	}
 
 	return ret;
 unmap_ctx:
@@ -4288,10 +4229,9 @@ static int ahash_init(struct ahash_request *req)
 
 	state->ctx_dma = 0;
 	state->ctx_dma_len = 0;
-	state->current_buf = 0;
 	state->buf_dma = 0;
-	state->buflen_0 = 0;
-	state->buflen_1 = 0;
+	state->buflen = 0;
+	state->next_buflen = 0;
 
 	return 0;
 }
@@ -4321,16 +4261,8 @@ static int ahash_export(struct ahash_request *req, void *out)
 {
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_export_state *export = out;
-	int len;
-	u8 *buf;
-
-	if (state->current_buf) {
-		buf = state->buf_1;
-		len = state->buflen_1;
-	} else {
-		buf = state->buf_0;
-		len = state->buflen_0;
-	}
+	u8 *buf = state->buf;
+	int len = state->buflen;
 
 	memcpy(export->buf, buf, len);
 	memcpy(export->caam_ctx, state->caam_ctx, sizeof(export->caam_ctx));
@@ -4348,9 +4280,9 @@ static int ahash_import(struct ahash_request *req, const void *in)
 	const struct caam_export_state *export = in;
 
 	memset(state, 0, sizeof(*state));
-	memcpy(state->buf_0, export->buf, export->buflen);
+	memcpy(state->buf, export->buf, export->buflen);
 	memcpy(state->caam_ctx, export->caam_ctx, sizeof(state->caam_ctx));
-	state->buflen_0 = export->buflen;
+	state->buflen = export->buflen;
 	state->update = export->update;
 	state->final = export->final;
 	state->finup = export->finup;
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 65399cb2a770..8d9143407fc5 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -107,15 +107,13 @@ struct caam_hash_state {
 	dma_addr_t buf_dma;
 	dma_addr_t ctx_dma;
 	int ctx_dma_len;
-	u8 buf_0[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
-	int buflen_0;
-	u8 buf_1[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
-	int buflen_1;
+	u8 buf[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
+	int buflen;
+	int next_buflen;
 	u8 caam_ctx[MAX_CTX_LEN] ____cacheline_aligned;
 	int (*update)(struct ahash_request *req);
 	int (*final)(struct ahash_request *req);
 	int (*finup)(struct ahash_request *req);
-	int current_buf;
 };
 
 struct caam_export_state {
@@ -127,31 +125,6 @@ struct caam_export_state {
 	int (*finup)(struct ahash_request *req);
 };
 
-static inline void switch_buf(struct caam_hash_state *state)
-{
-	state->current_buf ^= 1;
-}
-
-static inline u8 *current_buf(struct caam_hash_state *state)
-{
-	return state->current_buf ? state->buf_1 : state->buf_0;
-}
-
-static inline u8 *alt_buf(struct caam_hash_state *state)
-{
-	return state->current_buf ? state->buf_0 : state->buf_1;
-}
-
-static inline int *current_buflen(struct caam_hash_state *state)
-{
-	return state->current_buf ? &state->buflen_1 : &state->buflen_0;
-}
-
-static inline int *alt_buflen(struct caam_hash_state *state)
-{
-	return state->current_buf ? &state->buflen_0 : &state->buflen_1;
-}
-
 static inline bool is_cmac_aes(u32 algtype)
 {
 	return (algtype & (OP_ALG_ALGSEL_MASK | OP_ALG_AAI_MASK)) ==
@@ -183,12 +156,12 @@ static inline int buf_map_to_sec4_sg(struct device *jrdev,
 				     struct sec4_sg_entry *sec4_sg,
 				     struct caam_hash_state *state)
 {
-	int buflen = *current_buflen(state);
+	int buflen = state->buflen;
 
 	if (!buflen)
 		return 0;
 
-	state->buf_dma = dma_map_single(jrdev, current_buf(state), buflen,
+	state->buf_dma = dma_map_single(jrdev, state->buf, buflen,
 					DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, state->buf_dma)) {
 		dev_err(jrdev, "unable to map buf\n");
@@ -500,7 +473,6 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 	return ahash_set_sh_desc(ahash);
  bad_free_key:
 	kfree(hashed_key);
-	crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
 }
 
@@ -510,10 +482,8 @@ static int axcbc_setkey(struct crypto_ahash *ahash, const u8 *key,
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct device *jrdev = ctx->jrdev;
 
-	if (keylen != AES_KEYSIZE_128) {
-		crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != AES_KEYSIZE_128)
 		return -EINVAL;
-	}
 
 	memcpy(ctx->key, key, keylen);
 	dma_sync_single_for_device(jrdev, ctx->adata.key_dma, keylen,
@@ -533,10 +503,8 @@ static int acmac_setkey(struct crypto_ahash *ahash, const u8 *key,
 	int err;
 
 	err = aes_check_keylen(keylen);
-	if (err) {
-		crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (err)
 		return err;
-	}
 
 	/* key is immediate data for all cmac shared descriptors */
 	ctx->adata.key_virt = key;
@@ -578,7 +546,7 @@ static inline void ahash_unmap(struct device *dev,
 				 edesc->sec4_sg_bytes, DMA_TO_DEVICE);
 
 	if (state->buf_dma) {
-		dma_unmap_single(dev, state->buf_dma, *current_buflen(state),
+		dma_unmap_single(dev, state->buf_dma, state->buflen,
 				 DMA_TO_DEVICE);
 		state->buf_dma = 0;
 	}
@@ -643,9 +611,17 @@ static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
 		ecode = caam_jr_strstatus(jrdev, err);
 
 	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
-	switch_buf(state);
 	kfree(edesc);
 
+	scatterwalk_map_and_copy(state->buf, req->src,
+				 req->nbytes - state->next_buflen,
+				 state->next_buflen, 0);
+	state->buflen = state->next_buflen;
+
+	print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, state->buf,
+			     state->buflen, 1);
+
 	print_hex_dump_debug("ctx@"__stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
 			     ctx->ctx_len, 1);
@@ -703,9 +679,17 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
 		ecode = caam_jr_strstatus(jrdev, err);
 
 	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE);
-	switch_buf(state);
 	kfree(edesc);
 
+	scatterwalk_map_and_copy(state->buf, req->src,
+				 req->nbytes - state->next_buflen,
+				 state->next_buflen, 0);
+	state->buflen = state->next_buflen;
+
+	print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, state->buf,
+			     state->buflen, 1);
+
 	print_hex_dump_debug("ctx@"__stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
 			     ctx->ctx_len, 1);
@@ -786,18 +770,16 @@ static int ahash_update_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = current_buf(state);
-	int *buflen = current_buflen(state);
-	u8 *next_buf = alt_buf(state);
+	u8 *buf = state->buf;
+	int *buflen = &state->buflen;
+	int *next_buflen = &state->next_buflen;
 	int blocksize = crypto_ahash_blocksize(ahash);
-	int *next_buflen = alt_buflen(state), last_buflen;
 	int in_len = *buflen + req->nbytes, to_hash;
 	u32 *desc;
 	int src_nents, mapped_nents, sec4_sg_bytes, sec4_sg_src_index;
 	struct ahash_edesc *edesc;
 	int ret = 0;
 
-	last_buflen = *next_buflen;
 	*next_buflen = in_len & (blocksize - 1);
 	to_hash = in_len - *next_buflen;
 
@@ -868,10 +850,6 @@ static int ahash_update_ctx(struct ahash_request *req)
 			sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index -
 					    1);
 
-		if (*next_buflen)
-			scatterwalk_map_and_copy(next_buf, req->src,
-						 to_hash - *buflen,
-						 *next_buflen, 0);
 		desc = edesc->hw_desc;
 
 		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
@@ -901,14 +879,11 @@ static int ahash_update_ctx(struct ahash_request *req)
 		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
 					 req->nbytes, 0);
 		*buflen = *next_buflen;
-		*next_buflen = last_buflen;
-	}
 
-	print_hex_dump_debug("buf@"__stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
-	print_hex_dump_debug("next buf@"__stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, next_buf,
-			     *next_buflen, 1);
+		print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+				     DUMP_PREFIX_ADDRESS, 16, 4, buf,
+				     *buflen, 1);
+	}
 
 	return ret;
 unmap_ctx:
@@ -925,7 +900,7 @@ static int ahash_final_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
-	int buflen = *current_buflen(state);
+	int buflen = state->buflen;
 	u32 *desc;
 	int sec4_sg_bytes;
 	int digestsize = crypto_ahash_digestsize(ahash);
@@ -991,7 +966,7 @@ static int ahash_finup_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
-	int buflen = *current_buflen(state);
+	int buflen = state->buflen;
 	u32 *desc;
 	int sec4_sg_src_index;
 	int src_nents, mapped_nents;
@@ -1148,8 +1123,8 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = current_buf(state);
-	int buflen = *current_buflen(state);
+	u8 *buf = state->buf;
+	int buflen = state->buflen;
 	u32 *desc;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct ahash_edesc *edesc;
@@ -1207,11 +1182,10 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = current_buf(state);
-	int *buflen = current_buflen(state);
+	u8 *buf = state->buf;
+	int *buflen = &state->buflen;
+	int *next_buflen = &state->next_buflen;
 	int blocksize = crypto_ahash_blocksize(ahash);
-	u8 *next_buf = alt_buf(state);
-	int *next_buflen = alt_buflen(state);
 	int in_len = *buflen + req->nbytes, to_hash;
 	int sec4_sg_bytes, src_nents, mapped_nents;
 	struct ahash_edesc *edesc;
@@ -1278,12 +1252,6 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 
 		sg_to_sec4_sg_last(req->src, src_len, edesc->sec4_sg + 1, 0);
 
-		if (*next_buflen) {
-			scatterwalk_map_and_copy(next_buf, req->src,
-						 to_hash - *buflen,
-						 *next_buflen, 0);
-		}
-
 		desc = edesc->hw_desc;
 
 		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
@@ -1317,14 +1285,11 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
 					 req->nbytes, 0);
 		*buflen = *next_buflen;
-		*next_buflen = 0;
-	}
 
-	print_hex_dump_debug("buf@"__stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
-	print_hex_dump_debug("next buf@"__stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, next_buf, *next_buflen,
-			     1);
+		print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+				     DUMP_PREFIX_ADDRESS, 16, 4, buf,
+				     *buflen, 1);
+	}
 
 	return ret;
  unmap_ctx:
@@ -1342,7 +1307,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
-	int buflen = *current_buflen(state);
+	int buflen = state->buflen;
 	u32 *desc;
 	int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents;
 	int digestsize = crypto_ahash_digestsize(ahash);
@@ -1428,8 +1393,9 @@ static int ahash_update_first(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
-	u8 *next_buf = alt_buf(state);
-	int *next_buflen = alt_buflen(state);
+	u8 *buf = state->buf;
+	int *buflen = &state->buflen;
+	int *next_buflen = &state->next_buflen;
 	int to_hash;
 	int blocksize = crypto_ahash_blocksize(ahash);
 	u32 *desc;
@@ -1491,10 +1457,6 @@ static int ahash_update_first(struct ahash_request *req)
 		if (ret)
 			goto unmap_ctx;
 
-		if (*next_buflen)
-			scatterwalk_map_and_copy(next_buf, req->src, to_hash,
-						 *next_buflen, 0);
-
 		desc = edesc->hw_desc;
 
 		ret = map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len);
@@ -1517,14 +1479,14 @@ static int ahash_update_first(struct ahash_request *req)
 		state->update = ahash_update_no_ctx;
 		state->finup = ahash_finup_no_ctx;
 		state->final = ahash_final_no_ctx;
-		scatterwalk_map_and_copy(next_buf, req->src, 0,
+		scatterwalk_map_and_copy(buf, req->src, 0,
 					 req->nbytes, 0);
-		switch_buf(state);
-	}
+		*buflen = *next_buflen;
 
-	print_hex_dump_debug("next buf@"__stringify(__LINE__)": ",
-			     DUMP_PREFIX_ADDRESS, 16, 4, next_buf, *next_buflen,
-			     1);
+		print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+				     DUMP_PREFIX_ADDRESS, 16, 4, buf,
+				     *buflen, 1);
+	}
 
 	return ret;
  unmap_ctx:
@@ -1548,10 +1510,9 @@ static int ahash_init(struct ahash_request *req)
 
 	state->ctx_dma = 0;
 	state->ctx_dma_len = 0;
-	state->current_buf = 0;
 	state->buf_dma = 0;
-	state->buflen_0 = 0;
-	state->buflen_1 = 0;
+	state->buflen = 0;
+	state->next_buflen = 0;
 
 	return 0;
 }
@@ -1581,16 +1542,8 @@ static int ahash_export(struct ahash_request *req, void *out)
 {
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_export_state *export = out;
-	int len;
-	u8 *buf;
-
-	if (state->current_buf) {
-		buf = state->buf_1;
-		len = state->buflen_1;
-	} else {
-		buf = state->buf_0;
-		len = state->buflen_0;
-	}
+	u8 *buf = state->buf;
+	int len = state->buflen;
 
 	memcpy(export->buf, buf, len);
 	memcpy(export->caam_ctx, state->caam_ctx, sizeof(export->caam_ctx));
@@ -1608,9 +1561,9 @@ static int ahash_import(struct ahash_request *req, const void *in)
 	const struct caam_export_state *export = in;
 
 	memset(state, 0, sizeof(*state));
-	memcpy(state->buf_0, export->buf, export->buflen);
+	memcpy(state->buf, export->buf, export->buflen);
 	memcpy(state->caam_ctx, export->caam_ctx, sizeof(state->caam_ctx));
-	state->buflen_0 = export->buflen;
+	state->buflen = export->buflen;
 	state->update = export->update;
 	state->final = export->final;
 	state->finup = export->finup;
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 83f96d4f86e0..6619c512ef1a 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -252,9 +252,9 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 	int sg_flags = (flags == GFP_ATOMIC) ? SG_MITER_ATOMIC : 0;
-	int sgc;
 	int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
 	int src_nents, dst_nents;
+	int mapped_src_nents, mapped_dst_nents;
 	unsigned int diff_size = 0;
 	int lzeros;
 
@@ -285,13 +285,27 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 				     req_ctx->fixup_src_len);
 	dst_nents = sg_nents_for_len(req->dst, req->dst_len);
 
-	if (!diff_size && src_nents == 1)
+	mapped_src_nents = dma_map_sg(dev, req_ctx->fixup_src, src_nents,
+				      DMA_TO_DEVICE);
+	if (unlikely(!mapped_src_nents)) {
+		dev_err(dev, "unable to map source\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	mapped_dst_nents = dma_map_sg(dev, req->dst, dst_nents,
+				      DMA_FROM_DEVICE);
+	if (unlikely(!mapped_dst_nents)) {
+		dev_err(dev, "unable to map destination\n");
+		goto src_fail;
+	}
+
+	if (!diff_size && mapped_src_nents == 1)
 		sec4_sg_len = 0; /* no need for an input hw s/g table */
 	else
-		sec4_sg_len = src_nents + !!diff_size;
+		sec4_sg_len = mapped_src_nents + !!diff_size;
 	sec4_sg_index = sec4_sg_len;
-	if (dst_nents > 1)
-		sec4_sg_len += pad_sg_nents(dst_nents);
+
+	if (mapped_dst_nents > 1)
+		sec4_sg_len += pad_sg_nents(mapped_dst_nents);
 	else
 		sec4_sg_len = pad_sg_nents(sec4_sg_len);
 
@@ -301,19 +315,7 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 	edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes,
 			GFP_DMA | flags);
 	if (!edesc)
-		return ERR_PTR(-ENOMEM);
-
-	sgc = dma_map_sg(dev, req_ctx->fixup_src, src_nents, DMA_TO_DEVICE);
-	if (unlikely(!sgc)) {
-		dev_err(dev, "unable to map source\n");
-		goto src_fail;
-	}
-
-	sgc = dma_map_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE);
-	if (unlikely(!sgc)) {
-		dev_err(dev, "unable to map destination\n");
 		goto dst_fail;
-	}
 
 	edesc->sec4_sg = (void *)edesc + sizeof(*edesc) + desclen;
 	if (diff_size)
@@ -324,7 +326,7 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 		sg_to_sec4_sg_last(req_ctx->fixup_src, req_ctx->fixup_src_len,
 				   edesc->sec4_sg + !!diff_size, 0);
 
-	if (dst_nents > 1)
+	if (mapped_dst_nents > 1)
 		sg_to_sec4_sg_last(req->dst, req->dst_len,
 				   edesc->sec4_sg + sec4_sg_index, 0);
 
@@ -335,6 +337,9 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 	if (!sec4_sg_bytes)
 		return edesc;
 
+	edesc->mapped_src_nents = mapped_src_nents;
+	edesc->mapped_dst_nents = mapped_dst_nents;
+
 	edesc->sec4_sg_dma = dma_map_single(dev, edesc->sec4_sg,
 					    sec4_sg_bytes, DMA_TO_DEVICE);
 	if (dma_mapping_error(dev, edesc->sec4_sg_dma)) {
@@ -351,11 +356,11 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 	return edesc;
 
 sec4_sg_fail:
-	dma_unmap_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE);
+	kfree(edesc);
 dst_fail:
-	dma_unmap_sg(dev, req_ctx->fixup_src, src_nents, DMA_TO_DEVICE);
+	dma_unmap_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE);
 src_fail:
-	kfree(edesc);
+	dma_unmap_sg(dev, req_ctx->fixup_src, src_nents, DMA_TO_DEVICE);
 	return ERR_PTR(-ENOMEM);
 }
 
@@ -383,15 +388,15 @@ static int set_rsa_pub_pdb(struct akcipher_request *req,
 		return -ENOMEM;
 	}
 
-	if (edesc->src_nents > 1) {
+	if (edesc->mapped_src_nents > 1) {
 		pdb->sgf |= RSA_PDB_SGF_F;
 		pdb->f_dma = edesc->sec4_sg_dma;
-		sec4_sg_index += edesc->src_nents;
+		sec4_sg_index += edesc->mapped_src_nents;
 	} else {
 		pdb->f_dma = sg_dma_address(req_ctx->fixup_src);
 	}
 
-	if (edesc->dst_nents > 1) {
+	if (edesc->mapped_dst_nents > 1) {
 		pdb->sgf |= RSA_PDB_SGF_G;
 		pdb->g_dma = edesc->sec4_sg_dma +
 			     sec4_sg_index * sizeof(struct sec4_sg_entry);
@@ -428,17 +433,18 @@ static int set_rsa_priv_f1_pdb(struct akcipher_request *req,
 		return -ENOMEM;
 	}
 
-	if (edesc->src_nents > 1) {
+	if (edesc->mapped_src_nents > 1) {
 		pdb->sgf |= RSA_PRIV_PDB_SGF_G;
 		pdb->g_dma = edesc->sec4_sg_dma;
-		sec4_sg_index += edesc->src_nents;
+		sec4_sg_index += edesc->mapped_src_nents;
+
 	} else {
 		struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 
 		pdb->g_dma = sg_dma_address(req_ctx->fixup_src);
 	}
 
-	if (edesc->dst_nents > 1) {
+	if (edesc->mapped_dst_nents > 1) {
 		pdb->sgf |= RSA_PRIV_PDB_SGF_F;
 		pdb->f_dma = edesc->sec4_sg_dma +
 			     sec4_sg_index * sizeof(struct sec4_sg_entry);
@@ -493,17 +499,17 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req,
 		goto unmap_tmp1;
 	}
 
-	if (edesc->src_nents > 1) {
+	if (edesc->mapped_src_nents > 1) {
 		pdb->sgf |= RSA_PRIV_PDB_SGF_G;
 		pdb->g_dma = edesc->sec4_sg_dma;
-		sec4_sg_index += edesc->src_nents;
+		sec4_sg_index += edesc->mapped_src_nents;
 	} else {
 		struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 
 		pdb->g_dma = sg_dma_address(req_ctx->fixup_src);
 	}
 
-	if (edesc->dst_nents > 1) {
+	if (edesc->mapped_dst_nents > 1) {
 		pdb->sgf |= RSA_PRIV_PDB_SGF_F;
 		pdb->f_dma = edesc->sec4_sg_dma +
 			     sec4_sg_index * sizeof(struct sec4_sg_entry);
@@ -582,17 +588,17 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req,
 		goto unmap_tmp1;
 	}
 
-	if (edesc->src_nents > 1) {
+	if (edesc->mapped_src_nents > 1) {
 		pdb->sgf |= RSA_PRIV_PDB_SGF_G;
 		pdb->g_dma = edesc->sec4_sg_dma;
-		sec4_sg_index += edesc->src_nents;
+		sec4_sg_index += edesc->mapped_src_nents;
 	} else {
 		struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 
 		pdb->g_dma = sg_dma_address(req_ctx->fixup_src);
 	}
 
-	if (edesc->dst_nents > 1) {
+	if (edesc->mapped_dst_nents > 1) {
 		pdb->sgf |= RSA_PRIV_PDB_SGF_F;
 		pdb->f_dma = edesc->sec4_sg_dma +
 			     sec4_sg_index * sizeof(struct sec4_sg_entry);
diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h
index 2c488c9a3812..c68fb4c03ee6 100644
--- a/drivers/crypto/caam/caampkc.h
+++ b/drivers/crypto/caam/caampkc.h
@@ -112,8 +112,10 @@ struct caam_rsa_req_ctx {
 
 /**
  * rsa_edesc - s/w-extended rsa descriptor
- * @src_nents     : number of segments in input scatterlist
- * @dst_nents     : number of segments in output scatterlist
+ * @src_nents     : number of segments in input s/w scatterlist
+ * @dst_nents     : number of segments in output s/w scatterlist
+ * @mapped_src_nents: number of segments in input h/w link table
+ * @mapped_dst_nents: number of segments in output h/w link table
  * @sec4_sg_bytes : length of h/w link table
  * @sec4_sg_dma   : dma address of h/w link table
  * @sec4_sg       : pointer to h/w link table
@@ -123,6 +125,8 @@ struct caam_rsa_req_ctx {
 struct rsa_edesc {
 	int src_nents;
 	int dst_nents;
+	int mapped_src_nents;
+	int mapped_dst_nents;
 	int sec4_sg_bytes;
 	dma_addr_t sec4_sg_dma;
 	struct sec4_sg_entry *sec4_sg;
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index db22777d59b4..7139366da016 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -99,10 +99,13 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc,
 
 	if (ctrlpriv->virt_en == 1 ||
 	    /*
-	     * Apparently on i.MX8MQ it doesn't matter if virt_en == 1
+	     * Apparently on i.MX8M{Q,M,N,P} it doesn't matter if virt_en == 1
 	     * and the following steps should be performed regardless
 	     */
-	    of_machine_is_compatible("fsl,imx8mq")) {
+	    of_machine_is_compatible("fsl,imx8mq") ||
+	    of_machine_is_compatible("fsl,imx8mm") ||
+	    of_machine_is_compatible("fsl,imx8mn") ||
+	    of_machine_is_compatible("fsl,imx8mp")) {
 		clrsetbits_32(&ctrl->deco_rsr, 0, DECORSR_JR0);
 
 		while (!(rd_reg32(&ctrl->deco_rsr) & DECORSR_VALID) &&
@@ -176,6 +179,73 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc,
 }
 
 /*
+ * deinstantiate_rng - builds and executes a descriptor on DECO0,
+ *		       which deinitializes the RNG block.
+ * @ctrldev - pointer to device
+ * @state_handle_mask - bitmask containing the instantiation status
+ *			for the RNG4 state handles which exist in
+ *			the RNG4 block: 1 if it's been instantiated
+ *
+ * Return: - 0 if no error occurred
+ *	   - -ENOMEM if there isn't enough memory to allocate the descriptor
+ *	   - -ENODEV if DECO0 couldn't be acquired
+ *	   - -EAGAIN if an error occurred when executing the descriptor
+ */
+static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask)
+{
+	u32 *desc, status;
+	int sh_idx, ret = 0;
+
+	desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
+		/*
+		 * If the corresponding bit is set, then it means the state
+		 * handle was initialized by us, and thus it needs to be
+		 * deinitialized as well
+		 */
+		if ((1 << sh_idx) & state_handle_mask) {
+			/*
+			 * Create the descriptor for deinstantating this state
+			 * handle
+			 */
+			build_deinstantiation_desc(desc, sh_idx);
+
+			/* Try to run it through DECO0 */
+			ret = run_descriptor_deco0(ctrldev, desc, &status);
+
+			if (ret ||
+			    (status && status != JRSTA_SSRC_JUMP_HALT_CC)) {
+				dev_err(ctrldev,
+					"Failed to deinstantiate RNG4 SH%d\n",
+					sh_idx);
+				break;
+			}
+			dev_info(ctrldev, "Deinstantiated RNG4 SH%d\n", sh_idx);
+		}
+	}
+
+	kfree(desc);
+
+	return ret;
+}
+
+static void devm_deinstantiate_rng(void *data)
+{
+	struct device *ctrldev = data;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev);
+
+	/*
+	 * De-initialize RNG state handles initialized by this driver.
+	 * In case of SoCs with Management Complex, RNG is managed by MC f/w.
+	 */
+	if (ctrlpriv->rng4_sh_init)
+		deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init);
+}
+
+/*
  * instantiate_rng - builds and executes a descriptor on DECO0,
  *		     which initializes the RNG block.
  * @ctrldev - pointer to device
@@ -247,99 +317,13 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
 
 	kfree(desc);
 
-	return ret;
-}
-
-/*
- * deinstantiate_rng - builds and executes a descriptor on DECO0,
- *		       which deinitializes the RNG block.
- * @ctrldev - pointer to device
- * @state_handle_mask - bitmask containing the instantiation status
- *			for the RNG4 state handles which exist in
- *			the RNG4 block: 1 if it's been instantiated
- *
- * Return: - 0 if no error occurred
- *	   - -ENOMEM if there isn't enough memory to allocate the descriptor
- *	   - -ENODEV if DECO0 couldn't be acquired
- *	   - -EAGAIN if an error occurred when executing the descriptor
- */
-static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask)
-{
-	u32 *desc, status;
-	int sh_idx, ret = 0;
-
-	desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL);
-	if (!desc)
-		return -ENOMEM;
-
-	for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
-		/*
-		 * If the corresponding bit is set, then it means the state
-		 * handle was initialized by us, and thus it needs to be
-		 * deinitialized as well
-		 */
-		if ((1 << sh_idx) & state_handle_mask) {
-			/*
-			 * Create the descriptor for deinstantating this state
-			 * handle
-			 */
-			build_deinstantiation_desc(desc, sh_idx);
-
-			/* Try to run it through DECO0 */
-			ret = run_descriptor_deco0(ctrldev, desc, &status);
-
-			if (ret ||
-			    (status && status != JRSTA_SSRC_JUMP_HALT_CC)) {
-				dev_err(ctrldev,
-					"Failed to deinstantiate RNG4 SH%d\n",
-					sh_idx);
-				break;
-			}
-			dev_info(ctrldev, "Deinstantiated RNG4 SH%d\n", sh_idx);
-		}
-	}
-
-	kfree(desc);
+	if (!ret)
+		ret = devm_add_action_or_reset(ctrldev, devm_deinstantiate_rng,
+					       ctrldev);
 
 	return ret;
 }
 
-static int caam_remove(struct platform_device *pdev)
-{
-	struct device *ctrldev;
-	struct caam_drv_private *ctrlpriv;
-	struct caam_ctrl __iomem *ctrl;
-
-	ctrldev = &pdev->dev;
-	ctrlpriv = dev_get_drvdata(ctrldev);
-	ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
-
-	/* Remove platform devices under the crypto node */
-	of_platform_depopulate(ctrldev);
-
-#ifdef CONFIG_CAAM_QI
-	if (ctrlpriv->qi_init)
-		caam_qi_shutdown(ctrldev);
-#endif
-
-	/*
-	 * De-initialize RNG state handles initialized by this driver.
-	 * In case of SoCs with Management Complex, RNG is managed by MC f/w.
-	 */
-	if (!ctrlpriv->mc_en && ctrlpriv->rng4_sh_init)
-		deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init);
-
-	/* Shut down debug views */
-#ifdef CONFIG_DEBUG_FS
-	debugfs_remove_recursive(ctrlpriv->dfs_root);
-#endif
-
-	/* Unmap controller region */
-	iounmap(ctrl);
-
-	return 0;
-}
-
 /*
  * kick_trng - sets the various parameters for enabling the initialization
  *	       of the RNG4 block in CAAM
@@ -527,7 +511,7 @@ static const struct soc_device_attribute caam_imx_soc_table[] = {
 	{ .soc_id = "i.MX6UL", .data = &caam_imx6ul_data },
 	{ .soc_id = "i.MX6*",  .data = &caam_imx6_data },
 	{ .soc_id = "i.MX7*",  .data = &caam_imx7_data },
-	{ .soc_id = "i.MX8MQ", .data = &caam_imx7_data },
+	{ .soc_id = "i.MX8M*", .data = &caam_imx7_data },
 	{ .family = "Freescale i.MX" },
 	{ /* sentinel */ }
 };
@@ -568,6 +552,13 @@ static int init_clocks(struct device *dev, const struct caam_imx_data *data)
 	return devm_add_action_or_reset(dev, disable_clocks, ctrlpriv);
 }
 
+#ifdef CONFIG_DEBUG_FS
+static void caam_remove_debugfs(void *root)
+{
+	debugfs_remove_recursive(root);
+}
+#endif
+
 /* Probe routine for CAAM top (controller) level */
 static int caam_probe(struct platform_device *pdev)
 {
@@ -580,6 +571,7 @@ static int caam_probe(struct platform_device *pdev)
 	struct caam_drv_private *ctrlpriv;
 #ifdef CONFIG_DEBUG_FS
 	struct caam_perfmon *perfmon;
+	struct dentry *dfs_root;
 #endif
 	u32 scfgr, comp_params;
 	u8 rng_vid;
@@ -611,10 +603,11 @@ static int caam_probe(struct platform_device *pdev)
 
 	/* Get configuration properties from device tree */
 	/* First, get register page */
-	ctrl = of_iomap(nprop, 0);
-	if (!ctrl) {
+	ctrl = devm_of_iomap(dev, nprop, 0, NULL);
+	ret = PTR_ERR_OR_ZERO(ctrl);
+	if (ret) {
 		dev_err(dev, "caam: of_iomap() failed\n");
-		return -ENOMEM;
+		return ret;
 	}
 
 	caam_little_end = !(bool)(rd_reg32(&ctrl->perfmon.status) &
@@ -632,22 +625,18 @@ static int caam_probe(struct platform_device *pdev)
 	if (ctrlpriv->qi_present && !caam_dpaa2) {
 		ret = qman_is_probed();
 		if (!ret) {
-			ret = -EPROBE_DEFER;
-			goto iounmap_ctrl;
+			return -EPROBE_DEFER;
 		} else if (ret < 0) {
 			dev_err(dev, "failing probe due to qman probe error\n");
-			ret = -ENODEV;
-			goto iounmap_ctrl;
+			return -ENODEV;
 		}
 
 		ret = qman_portals_probed();
 		if (!ret) {
-			ret = -EPROBE_DEFER;
-			goto iounmap_ctrl;
+			return -EPROBE_DEFER;
 		} else if (ret < 0) {
 			dev_err(dev, "failing probe due to qman portals probe error\n");
-			ret = -ENODEV;
-			goto iounmap_ctrl;
+			return -ENODEV;
 		}
 	}
 #endif
@@ -685,11 +674,9 @@ static int caam_probe(struct platform_device *pdev)
 	of_node_put(np);
 
 	if (!ctrlpriv->mc_en)
-		clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR,
+		clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK,
 			      MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
-			      MCFGR_WDENABLE | MCFGR_LARGE_BURST |
-			      (sizeof(dma_addr_t) == sizeof(u64) ?
-			       MCFGR_LONG_PTR : 0));
+			      MCFGR_WDENABLE | MCFGR_LARGE_BURST);
 
 	handle_imx6_err005766(&ctrl->mcr);
 
@@ -722,7 +709,7 @@ static int caam_probe(struct platform_device *pdev)
 	ret = dma_set_mask_and_coherent(dev, caam_get_dma_mask(dev));
 	if (ret) {
 		dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
-		goto iounmap_ctrl;
+		return ret;
 	}
 
 	ctrlpriv->era = caam_get_era(ctrl);
@@ -736,8 +723,12 @@ static int caam_probe(struct platform_device *pdev)
 	 */
 	perfmon = (struct caam_perfmon __force *)&ctrl->perfmon;
 
-	ctrlpriv->dfs_root = debugfs_create_dir(dev_name(dev), NULL);
-	ctrlpriv->ctl = debugfs_create_dir("ctl", ctrlpriv->dfs_root);
+	dfs_root = debugfs_create_dir(dev_name(dev), NULL);
+	ret = devm_add_action_or_reset(dev, caam_remove_debugfs, dfs_root);
+	if (ret)
+		return ret;
+
+	ctrlpriv->ctl = debugfs_create_dir("ctl", dfs_root);
 #endif
 
 	/* Check to see if (DPAA 1.x) QI present. If so, enable */
@@ -757,12 +748,6 @@ static int caam_probe(struct platform_device *pdev)
 #endif
 	}
 
-	ret = of_platform_populate(nprop, caam_match, NULL, dev);
-	if (ret) {
-		dev_err(dev, "JR platform devices creation error\n");
-		goto shutdown_qi;
-	}
-
 	ring = 0;
 	for_each_available_child_of_node(nprop, np)
 		if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
@@ -779,8 +764,7 @@ static int caam_probe(struct platform_device *pdev)
 	/* If no QI and no rings specified, quit and go home */
 	if ((!ctrlpriv->qi_present) && (!ctrlpriv->total_jobrs)) {
 		dev_err(dev, "no queues configured, terminating\n");
-		ret = -ENOMEM;
-		goto caam_remove;
+		return -ENOMEM;
 	}
 
 	if (ctrlpriv->era < 10)
@@ -843,7 +827,7 @@ static int caam_probe(struct platform_device *pdev)
 		} while ((ret == -EAGAIN) && (ent_delay < RTSDCTL_ENT_DLY_MAX));
 		if (ret) {
 			dev_err(dev, "failed to instantiate RNG");
-			goto caam_remove;
+			return ret;
 		}
 		/*
 		 * Set handles init'ed by this module as the complement of the
@@ -916,19 +900,11 @@ static int caam_probe(struct platform_device *pdev)
 	debugfs_create_blob("tdsk", S_IRUSR | S_IRGRP | S_IROTH, ctrlpriv->ctl,
 			    &ctrlpriv->ctl_tdsk_wrap);
 #endif
-	return 0;
 
-caam_remove:
-	caam_remove(pdev);
-	return ret;
+	ret = devm_of_platform_populate(dev);
+	if (ret)
+		dev_err(dev, "JR platform devices creation error\n");
 
-shutdown_qi:
-#ifdef CONFIG_CAAM_QI
-	if (ctrlpriv->qi_init)
-		caam_qi_shutdown(dev);
-#endif
-iounmap_ctrl:
-	iounmap(ctrl);
 	return ret;
 }
 
@@ -938,7 +914,6 @@ static struct platform_driver caam_driver = {
 		.of_match_table = caam_match,
 	},
 	.probe       = caam_probe,
-	.remove      = caam_remove,
 };
 
 module_platform_driver(caam_driver);
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 731b06becd9c..c7c10c90464b 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -81,9 +81,6 @@ struct caam_drv_private {
 	 */
 	u8 total_jobrs;		/* Total Job Rings in device */
 	u8 qi_present;		/* Nonzero if QI present in device */
-#ifdef CONFIG_CAAM_QI
-	u8 qi_init;		/* Nonzero if QI has been initialized */
-#endif
 	u8 mc_en;		/* Nonzero if MC f/w is active */
 	int secvio_irq;		/* Security violation interrupt number */
 	int virt_en;		/* Virtualization enabled in CAAM */
@@ -102,7 +99,6 @@ struct caam_drv_private {
 	 * variables at runtime.
 	 */
 #ifdef CONFIG_DEBUG_FS
-	struct dentry *dfs_root;
 	struct dentry *ctl; /* controller dir */
 	struct debugfs_blob_wrapper ctl_kek_wrap, ctl_tkek_wrap, ctl_tdsk_wrap;
 #endif
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 378f627e1d64..dacf2fa4aa8e 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -500,9 +500,10 @@ void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx)
 }
 EXPORT_SYMBOL(caam_drv_ctx_rel);
 
-void caam_qi_shutdown(struct device *qidev)
+static void caam_qi_shutdown(void *data)
 {
 	int i;
+	struct device *qidev = data;
 	struct caam_qi_priv *priv = &qipriv;
 	const cpumask_t *cpus = qman_affine_cpus();
 
@@ -761,7 +762,10 @@ int caam_qi_init(struct platform_device *caam_pdev)
 			    &times_congested, &caam_fops_u64_ro);
 #endif
 
-	ctrlpriv->qi_init = 1;
+	err = devm_add_action_or_reset(qidev, caam_qi_shutdown, ctrlpriv);
+	if (err)
+		return err;
+
 	dev_info(qidev, "Linux CAAM Queue I/F driver initialised\n");
 	return 0;
 }
diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h
index db0549549e3b..848958951f68 100644
--- a/drivers/crypto/caam/qi.h
+++ b/drivers/crypto/caam/qi.h
@@ -147,7 +147,6 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc);
 void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx);
 
 int caam_qi_init(struct platform_device *pdev);
-void caam_qi_shutdown(struct device *dev);
 
 /**
  * qi_cache_alloc - Allocate buffers from CAAM-QI cache
diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c
index 596ce28b957d..1be1adffff1d 100644
--- a/drivers/crypto/cavium/cpt/cptvf_algs.c
+++ b/drivers/crypto/cavium/cpt/cptvf_algs.c
@@ -92,15 +92,15 @@ static inline void update_output_data(struct cpt_request_info *req_info,
 	}
 }
 
-static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc,
+static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc,
 				 u32 *argcnt)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct cvm_enc_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cvm_enc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct cvm_req_ctx *rctx = skcipher_request_ctx(req);
 	struct fc_context *fctx = &rctx->fctx;
 	u64 *offset_control = &rctx->control_word;
-	u32 enc_iv_len = crypto_ablkcipher_ivsize(tfm);
+	u32 enc_iv_len = crypto_skcipher_ivsize(tfm);
 	struct cpt_request_info *req_info = &rctx->cpt_req;
 	u64 *ctrl_flags = NULL;
 
@@ -115,7 +115,7 @@ static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc,
 	else
 		req_info->req.opcode.s.minor = 3;
 
-	req_info->req.param1 = req->nbytes; /* Encryption Data length */
+	req_info->req.param1 = req->cryptlen; /* Encryption Data length */
 	req_info->req.param2 = 0; /*Auth data length */
 
 	fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type;
@@ -147,32 +147,32 @@ static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc,
 	return 0;
 }
 
-static inline u32 create_input_list(struct ablkcipher_request  *req, u32 enc,
+static inline u32 create_input_list(struct skcipher_request  *req, u32 enc,
 				    u32 enc_iv_len)
 {
-	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct cvm_req_ctx *rctx = skcipher_request_ctx(req);
 	struct cpt_request_info *req_info = &rctx->cpt_req;
 	u32 argcnt =  0;
 
 	create_ctx_hdr(req, enc, &argcnt);
-	update_input_iv(req_info, req->info, enc_iv_len, &argcnt);
-	update_input_data(req_info, req->src, req->nbytes, &argcnt);
+	update_input_iv(req_info, req->iv, enc_iv_len, &argcnt);
+	update_input_data(req_info, req->src, req->cryptlen, &argcnt);
 	req_info->incnt = argcnt;
 
 	return 0;
 }
 
-static inline void store_cb_info(struct ablkcipher_request *req,
+static inline void store_cb_info(struct skcipher_request *req,
 				 struct cpt_request_info *req_info)
 {
 	req_info->callback = (void *)cvm_callback;
 	req_info->callback_arg = (void *)&req->base;
 }
 
-static inline void create_output_list(struct ablkcipher_request *req,
+static inline void create_output_list(struct skcipher_request *req,
 				      u32 enc_iv_len)
 {
-	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct cvm_req_ctx *rctx = skcipher_request_ctx(req);
 	struct cpt_request_info *req_info = &rctx->cpt_req;
 	u32 argcnt = 0;
 
@@ -184,16 +184,16 @@ static inline void create_output_list(struct ablkcipher_request *req,
 	 * [ 16 Bytes/     [   Request Enc/Dec/ DATA Len AES CBC ]
 	 */
 	/* Reading IV information */
-	update_output_iv(req_info, req->info, enc_iv_len, &argcnt);
-	update_output_data(req_info, req->dst, req->nbytes, &argcnt);
+	update_output_iv(req_info, req->iv, enc_iv_len, &argcnt);
+	update_output_data(req_info, req->dst, req->cryptlen, &argcnt);
 	req_info->outcnt = argcnt;
 }
 
-static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc)
+static inline int cvm_enc_dec(struct skcipher_request *req, u32 enc)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
-	u32 enc_iv_len = crypto_ablkcipher_ivsize(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cvm_req_ctx *rctx = skcipher_request_ctx(req);
+	u32 enc_iv_len = crypto_skcipher_ivsize(tfm);
 	struct fc_context *fctx = &rctx->fctx;
 	struct cpt_request_info *req_info = &rctx->cpt_req;
 	void *cdev = NULL;
@@ -217,20 +217,20 @@ static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc)
 		return -EINPROGRESS;
 }
 
-static int cvm_encrypt(struct ablkcipher_request *req)
+static int cvm_encrypt(struct skcipher_request *req)
 {
 	return cvm_enc_dec(req, true);
 }
 
-static int cvm_decrypt(struct ablkcipher_request *req)
+static int cvm_decrypt(struct skcipher_request *req)
 {
 	return cvm_enc_dec(req, false);
 }
 
-static int cvm_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int cvm_xts_setkey(struct crypto_skcipher *cipher, const u8 *key,
 		   u32 keylen)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
 	struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm);
 	int err;
 	const u8 *key1 = key;
@@ -284,10 +284,10 @@ static int cvm_validate_keylen(struct cvm_enc_ctx *ctx, u32 keylen)
 	return -EINVAL;
 }
 
-static int cvm_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int cvm_setkey(struct crypto_skcipher *cipher, const u8 *key,
 		      u32 keylen, u8 cipher_type)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
 	struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	ctx->cipher_type = cipher_type;
@@ -295,183 +295,157 @@ static int cvm_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 		memcpy(ctx->enc_key, key, keylen);
 		return 0;
 	} else {
-		crypto_ablkcipher_set_flags(cipher,
-					    CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 }
 
-static int cvm_cbc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int cvm_cbc_aes_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			      u32 keylen)
 {
 	return cvm_setkey(cipher, key, keylen, AES_CBC);
 }
 
-static int cvm_ecb_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int cvm_ecb_aes_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			      u32 keylen)
 {
 	return cvm_setkey(cipher, key, keylen, AES_ECB);
 }
 
-static int cvm_cfb_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int cvm_cfb_aes_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			      u32 keylen)
 {
 	return cvm_setkey(cipher, key, keylen, AES_CFB);
 }
 
-static int cvm_cbc_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int cvm_cbc_des3_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			       u32 keylen)
 {
-	return verify_ablkcipher_des3_key(cipher, key) ?:
+	return verify_skcipher_des3_key(cipher, key) ?:
 	       cvm_setkey(cipher, key, keylen, DES3_CBC);
 }
 
-static int cvm_ecb_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int cvm_ecb_des3_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			       u32 keylen)
 {
-	return verify_ablkcipher_des3_key(cipher, key) ?:
+	return verify_skcipher_des3_key(cipher, key) ?:
 	       cvm_setkey(cipher, key, keylen, DES3_ECB);
 }
 
-static int cvm_enc_dec_init(struct crypto_tfm *tfm)
+static int cvm_enc_dec_init(struct crypto_skcipher *tfm)
 {
-	tfm->crt_ablkcipher.reqsize = sizeof(struct cvm_req_ctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct cvm_req_ctx));
+
 	return 0;
 }
 
-static struct crypto_alg algs[] = { {
-	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize = AES_BLOCK_SIZE,
-	.cra_ctxsize = sizeof(struct cvm_enc_ctx),
-	.cra_alignmask = 7,
-	.cra_priority = 4001,
-	.cra_name = "xts(aes)",
-	.cra_driver_name = "cavium-xts-aes",
-	.cra_type = &crypto_ablkcipher_type,
-	.cra_u = {
-		.ablkcipher = {
-			.ivsize = AES_BLOCK_SIZE,
-			.min_keysize = 2 * AES_MIN_KEY_SIZE,
-			.max_keysize = 2 * AES_MAX_KEY_SIZE,
-			.setkey = cvm_xts_setkey,
-			.encrypt = cvm_encrypt,
-			.decrypt = cvm_decrypt,
-		},
-	},
-	.cra_init = cvm_enc_dec_init,
-	.cra_module = THIS_MODULE,
+static struct skcipher_alg algs[] = { {
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct cvm_enc_ctx),
+	.base.cra_alignmask	= 7,
+	.base.cra_priority	= 4001,
+	.base.cra_name		= "xts(aes)",
+	.base.cra_driver_name	= "cavium-xts-aes",
+	.base.cra_module	= THIS_MODULE,
+
+	.ivsize			= AES_BLOCK_SIZE,
+	.min_keysize		= 2 * AES_MIN_KEY_SIZE,
+	.max_keysize		= 2 * AES_MAX_KEY_SIZE,
+	.setkey			= cvm_xts_setkey,
+	.encrypt		= cvm_encrypt,
+	.decrypt		= cvm_decrypt,
+	.init			= cvm_enc_dec_init,
 }, {
-	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize = AES_BLOCK_SIZE,
-	.cra_ctxsize = sizeof(struct cvm_enc_ctx),
-	.cra_alignmask = 7,
-	.cra_priority = 4001,
-	.cra_name = "cbc(aes)",
-	.cra_driver_name = "cavium-cbc-aes",
-	.cra_type = &crypto_ablkcipher_type,
-	.cra_u = {
-		.ablkcipher = {
-			.ivsize = AES_BLOCK_SIZE,
-			.min_keysize = AES_MIN_KEY_SIZE,
-			.max_keysize = AES_MAX_KEY_SIZE,
-			.setkey = cvm_cbc_aes_setkey,
-			.encrypt = cvm_encrypt,
-			.decrypt = cvm_decrypt,
-		},
-	},
-	.cra_init = cvm_enc_dec_init,
-	.cra_module = THIS_MODULE,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct cvm_enc_ctx),
+	.base.cra_alignmask	= 7,
+	.base.cra_priority	= 4001,
+	.base.cra_name		= "cbc(aes)",
+	.base.cra_driver_name	= "cavium-cbc-aes",
+	.base.cra_module	= THIS_MODULE,
+
+	.ivsize			= AES_BLOCK_SIZE,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= cvm_cbc_aes_setkey,
+	.encrypt		= cvm_encrypt,
+	.decrypt		= cvm_decrypt,
+	.init			= cvm_enc_dec_init,
 }, {
-	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize = AES_BLOCK_SIZE,
-	.cra_ctxsize = sizeof(struct cvm_enc_ctx),
-	.cra_alignmask = 7,
-	.cra_priority = 4001,
-	.cra_name = "ecb(aes)",
-	.cra_driver_name = "cavium-ecb-aes",
-	.cra_type = &crypto_ablkcipher_type,
-	.cra_u = {
-		.ablkcipher = {
-			.ivsize = AES_BLOCK_SIZE,
-			.min_keysize = AES_MIN_KEY_SIZE,
-			.max_keysize = AES_MAX_KEY_SIZE,
-			.setkey = cvm_ecb_aes_setkey,
-			.encrypt = cvm_encrypt,
-			.decrypt = cvm_decrypt,
-		},
-	},
-	.cra_init = cvm_enc_dec_init,
-	.cra_module = THIS_MODULE,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct cvm_enc_ctx),
+	.base.cra_alignmask	= 7,
+	.base.cra_priority	= 4001,
+	.base.cra_name		= "ecb(aes)",
+	.base.cra_driver_name	= "cavium-ecb-aes",
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= cvm_ecb_aes_setkey,
+	.encrypt		= cvm_encrypt,
+	.decrypt		= cvm_decrypt,
+	.init			= cvm_enc_dec_init,
 }, {
-	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize = AES_BLOCK_SIZE,
-	.cra_ctxsize = sizeof(struct cvm_enc_ctx),
-	.cra_alignmask = 7,
-	.cra_priority = 4001,
-	.cra_name = "cfb(aes)",
-	.cra_driver_name = "cavium-cfb-aes",
-	.cra_type = &crypto_ablkcipher_type,
-	.cra_u = {
-		.ablkcipher = {
-			.ivsize = AES_BLOCK_SIZE,
-			.min_keysize = AES_MIN_KEY_SIZE,
-			.max_keysize = AES_MAX_KEY_SIZE,
-			.setkey = cvm_cfb_aes_setkey,
-			.encrypt = cvm_encrypt,
-			.decrypt = cvm_decrypt,
-		},
-	},
-	.cra_init = cvm_enc_dec_init,
-	.cra_module = THIS_MODULE,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct cvm_enc_ctx),
+	.base.cra_alignmask	= 7,
+	.base.cra_priority	= 4001,
+	.base.cra_name		= "cfb(aes)",
+	.base.cra_driver_name	= "cavium-cfb-aes",
+	.base.cra_module	= THIS_MODULE,
+
+	.ivsize			= AES_BLOCK_SIZE,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= cvm_cfb_aes_setkey,
+	.encrypt		= cvm_encrypt,
+	.decrypt		= cvm_decrypt,
+	.init			= cvm_enc_dec_init,
 }, {
-	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize = sizeof(struct cvm_des3_ctx),
-	.cra_alignmask = 7,
-	.cra_priority = 4001,
-	.cra_name = "cbc(des3_ede)",
-	.cra_driver_name = "cavium-cbc-des3_ede",
-	.cra_type = &crypto_ablkcipher_type,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize = DES3_EDE_KEY_SIZE,
-			.max_keysize = DES3_EDE_KEY_SIZE,
-			.ivsize = DES_BLOCK_SIZE,
-			.setkey = cvm_cbc_des3_setkey,
-			.encrypt = cvm_encrypt,
-			.decrypt = cvm_decrypt,
-		},
-	},
-	.cra_init = cvm_enc_dec_init,
-	.cra_module = THIS_MODULE,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct cvm_des3_ctx),
+	.base.cra_alignmask	= 7,
+	.base.cra_priority	= 4001,
+	.base.cra_name		= "cbc(des3_ede)",
+	.base.cra_driver_name	= "cavium-cbc-des3_ede",
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= DES3_EDE_KEY_SIZE,
+	.max_keysize		= DES3_EDE_KEY_SIZE,
+	.ivsize			= DES_BLOCK_SIZE,
+	.setkey			= cvm_cbc_des3_setkey,
+	.encrypt		= cvm_encrypt,
+	.decrypt		= cvm_decrypt,
+	.init			= cvm_enc_dec_init,
 }, {
-	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize = sizeof(struct cvm_des3_ctx),
-	.cra_alignmask = 7,
-	.cra_priority = 4001,
-	.cra_name = "ecb(des3_ede)",
-	.cra_driver_name = "cavium-ecb-des3_ede",
-	.cra_type = &crypto_ablkcipher_type,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize = DES3_EDE_KEY_SIZE,
-			.max_keysize = DES3_EDE_KEY_SIZE,
-			.ivsize = DES_BLOCK_SIZE,
-			.setkey = cvm_ecb_des3_setkey,
-			.encrypt = cvm_encrypt,
-			.decrypt = cvm_decrypt,
-		},
-	},
-	.cra_init = cvm_enc_dec_init,
-	.cra_module = THIS_MODULE,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct cvm_des3_ctx),
+	.base.cra_alignmask	= 7,
+	.base.cra_priority	= 4001,
+	.base.cra_name		= "ecb(des3_ede)",
+	.base.cra_driver_name	= "cavium-ecb-des3_ede",
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= DES3_EDE_KEY_SIZE,
+	.max_keysize		= DES3_EDE_KEY_SIZE,
+	.ivsize			= DES_BLOCK_SIZE,
+	.setkey			= cvm_ecb_des3_setkey,
+	.encrypt		= cvm_encrypt,
+	.decrypt		= cvm_decrypt,
+	.init			= cvm_enc_dec_init,
 } };
 
 static inline int cav_register_algs(void)
 {
 	int err = 0;
 
-	err = crypto_register_algs(algs, ARRAY_SIZE(algs));
+	err = crypto_register_skciphers(algs, ARRAY_SIZE(algs));
 	if (err)
 		return err;
 
@@ -480,7 +454,7 @@ static inline int cav_register_algs(void)
 
 static inline void cav_unregister_algs(void)
 {
-	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
 }
 
 int cvm_crypto_init(struct cpt_vf *cptvf)
diff --git a/drivers/crypto/cavium/nitrox/Kconfig b/drivers/crypto/cavium/nitrox/Kconfig
index 7b1e751bb9cd..7dc008332a81 100644
--- a/drivers/crypto/cavium/nitrox/Kconfig
+++ b/drivers/crypto/cavium/nitrox/Kconfig
@@ -4,7 +4,7 @@
 #
 config CRYPTO_DEV_NITROX
 	tristate
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AES
 	select CRYPTO_LIB_DES
 	select FW_LOADER
diff --git a/drivers/crypto/cavium/nitrox/nitrox_aead.c b/drivers/crypto/cavium/nitrox/nitrox_aead.c
index e4841eb2a09f..dce5423a5883 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_aead.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_aead.c
@@ -40,10 +40,8 @@ static int nitrox_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 	union fc_ctx_flags flags;
 
 	aes_keylen = flexi_aes_keylen(keylen);
-	if (aes_keylen < 0) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (aes_keylen < 0)
 		return -EINVAL;
-	}
 
 	/* fill crypto context */
 	fctx = nctx->u.fctx;
@@ -74,6 +72,25 @@ static int nitrox_aead_setauthsize(struct crypto_aead *aead,
 	return 0;
 }
 
+static int nitrox_aes_gcm_setauthsize(struct crypto_aead *aead,
+				      unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return nitrox_aead_setauthsize(aead, authsize);
+}
+
 static int alloc_src_sglist(struct nitrox_kcrypt_request *nkreq,
 			    struct scatterlist *src, char *iv, int ivsize,
 			    int buflen)
@@ -186,6 +203,14 @@ static void nitrox_aead_callback(void *arg, int err)
 	areq->base.complete(&areq->base, err);
 }
 
+static inline bool nitrox_aes_gcm_assoclen_supported(unsigned int assoclen)
+{
+	if (assoclen <= 512)
+		return true;
+
+	return false;
+}
+
 static int nitrox_aes_gcm_enc(struct aead_request *areq)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(areq);
@@ -195,6 +220,9 @@ static int nitrox_aes_gcm_enc(struct aead_request *areq)
 	struct flexi_crypto_context *fctx = nctx->u.fctx;
 	int ret;
 
+	if (!nitrox_aes_gcm_assoclen_supported(areq->assoclen))
+		return -EINVAL;
+
 	memcpy(fctx->crypto.iv, areq->iv, GCM_AES_SALT_SIZE);
 
 	rctx->cryptlen = areq->cryptlen;
@@ -226,6 +254,9 @@ static int nitrox_aes_gcm_dec(struct aead_request *areq)
 	struct flexi_crypto_context *fctx = nctx->u.fctx;
 	int ret;
 
+	if (!nitrox_aes_gcm_assoclen_supported(areq->assoclen))
+		return -EINVAL;
+
 	memcpy(fctx->crypto.iv, areq->iv, GCM_AES_SALT_SIZE);
 
 	rctx->cryptlen = areq->cryptlen - aead->authsize;
@@ -492,13 +523,13 @@ static struct aead_alg nitrox_aeads[] = { {
 		.cra_driver_name = "n5_aes_gcm",
 		.cra_priority = PRIO,
 		.cra_flags = CRYPTO_ALG_ASYNC,
-		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_blocksize = 1,
 		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
 	},
 	.setkey = nitrox_aes_gcm_setkey,
-	.setauthsize = nitrox_aead_setauthsize,
+	.setauthsize = nitrox_aes_gcm_setauthsize,
 	.encrypt = nitrox_aes_gcm_enc,
 	.decrypt = nitrox_aes_gcm_dec,
 	.init = nitrox_aes_gcm_init,
@@ -511,7 +542,7 @@ static struct aead_alg nitrox_aeads[] = { {
 		.cra_driver_name = "n5_rfc4106",
 		.cra_priority = PRIO,
 		.cra_flags = CRYPTO_ALG_ASYNC,
-		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_blocksize = 1,
 		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
diff --git a/drivers/crypto/cavium/nitrox/nitrox_dev.h b/drivers/crypto/cavium/nitrox/nitrox_dev.h
index 2217a2736c8e..c2d0c23fb81b 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_dev.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_dev.h
@@ -109,6 +109,13 @@ struct nitrox_q_vector {
 	};
 };
 
+enum mcode_type {
+	MCODE_TYPE_INVALID,
+	MCODE_TYPE_AE,
+	MCODE_TYPE_SE_SSL,
+	MCODE_TYPE_SE_IPSEC,
+};
+
 /**
  * mbox_msg - Mailbox message data
  * @type: message type
@@ -128,6 +135,14 @@ union mbox_msg {
 		u64 chipid: 8;
 		u64 vfid: 8;
 	} id;
+	struct {
+		u64 type: 2;
+		u64 opcode: 6;
+		u64 count: 4;
+		u64 info: 40;
+		u64 next_se_grp: 3;
+		u64 next_ae_grp: 3;
+	} mcode_info;
 };
 
 /**
diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index bc924980e10c..c4632d84c9a1 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -103,8 +103,7 @@ static void write_to_ucd_unit(struct nitrox_device *ndev, u32 ucode_size,
 	offset = UCD_UCODE_LOAD_BLOCK_NUM;
 	nitrox_write_csr(ndev, offset, block_num);
 
-	code_size = ucode_size;
-	code_size = roundup(code_size, 8);
+	code_size = roundup(ucode_size, 16);
 	while (code_size) {
 		data = ucode_data[i];
 		/* write 8 bytes at a time */
@@ -220,11 +219,11 @@ static int nitrox_load_fw(struct nitrox_device *ndev)
 
 	/* write block number and firmware length
 	 * bit:<2:0> block number
-	 * bit:3 is set SE uses 32KB microcode
-	 * bit:3 is clear SE uses 64KB microcode
+	 * bit:3 is set AE uses 32KB microcode
+	 * bit:3 is clear AE uses 64KB microcode
 	 */
 	core_2_eid_val.value = 0ULL;
-	core_2_eid_val.ucode_blk = 0;
+	core_2_eid_val.ucode_blk = 2;
 	if (ucode_size <= CNN55XX_UCD_BLOCK_SIZE)
 		core_2_eid_val.ucode_len = 1;
 	else
diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
index 02ee95064841..b51b0449b478 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
@@ -25,6 +25,7 @@ enum mbx_msg_opcode {
 	MSG_OP_VF_UP,
 	MSG_OP_VF_DOWN,
 	MSG_OP_CHIPID_VFID,
+	MSG_OP_MCODE_INFO = 11,
 };
 
 struct pf2vf_work {
@@ -73,6 +74,13 @@ static void pf2vf_send_response(struct nitrox_device *ndev,
 		vfdev->nr_queues = 0;
 		atomic_set(&vfdev->state, __NDEV_NOT_READY);
 		break;
+	case MSG_OP_MCODE_INFO:
+		msg.data = 0;
+		msg.mcode_info.count = 2;
+		msg.mcode_info.info = MCODE_TYPE_SE_SSL | (MCODE_TYPE_AE << 5);
+		msg.mcode_info.next_se_grp = 1;
+		msg.mcode_info.next_ae_grp = 1;
+		break;
 	default:
 		msg.type = MBX_MSG_TYPE_NOP;
 		break;
diff --git a/drivers/crypto/cavium/nitrox/nitrox_req.h b/drivers/crypto/cavium/nitrox/nitrox_req.h
index f69ba02c4d25..12282c1b14f5 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_req.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_req.h
@@ -10,6 +10,8 @@
 #define PENDING_SIG	0xFFFFFFFFFFFFFFFFUL
 #define PRIO 4001
 
+typedef void (*sereq_completion_t)(void *req, int err);
+
 /**
  * struct gphdr - General purpose Header
  * @param0: first parameter.
@@ -203,12 +205,14 @@ struct nitrox_crypto_ctx {
 		struct flexi_crypto_context *fctx;
 	} u;
 	struct crypto_ctx_hdr *chdr;
+	sereq_completion_t callback;
 };
 
 struct nitrox_kcrypt_request {
 	struct se_crypto_request creq;
 	u8 *src;
 	u8 *dst;
+	u8 *iv_out;
 };
 
 /**
diff --git a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
index 3cdce1f0f257..18088b0a2257 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
@@ -6,6 +6,7 @@
 
 #include <crypto/aes.h>
 #include <crypto/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <crypto/ctr.h>
 #include <crypto/internal/des.h>
 #include <crypto/xts.h>
@@ -47,6 +48,63 @@ static enum flexi_cipher flexi_cipher_type(const char *name)
 	return cipher->value;
 }
 
+static void free_src_sglist(struct skcipher_request *skreq)
+{
+	struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq);
+
+	kfree(nkreq->src);
+}
+
+static void free_dst_sglist(struct skcipher_request *skreq)
+{
+	struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq);
+
+	kfree(nkreq->dst);
+}
+
+static void nitrox_skcipher_callback(void *arg, int err)
+{
+	struct skcipher_request *skreq = arg;
+
+	free_src_sglist(skreq);
+	free_dst_sglist(skreq);
+	if (err) {
+		pr_err_ratelimited("request failed status 0x%0x\n", err);
+		err = -EINVAL;
+	}
+
+	skcipher_request_complete(skreq, err);
+}
+
+static void nitrox_cbc_cipher_callback(void *arg, int err)
+{
+	struct skcipher_request *skreq = arg;
+	struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(skreq);
+	int ivsize = crypto_skcipher_ivsize(cipher);
+	unsigned int start = skreq->cryptlen - ivsize;
+
+	if (err) {
+		nitrox_skcipher_callback(arg, err);
+		return;
+	}
+
+	if (nkreq->creq.ctrl.s.arg == ENCRYPT) {
+		scatterwalk_map_and_copy(skreq->iv, skreq->dst, start, ivsize,
+					 0);
+	} else {
+		if (skreq->src != skreq->dst) {
+			scatterwalk_map_and_copy(skreq->iv, skreq->src, start,
+						 ivsize, 0);
+		} else {
+			memcpy(skreq->iv, nkreq->iv_out, ivsize);
+			kfree(nkreq->iv_out);
+		}
+	}
+
+	nitrox_skcipher_callback(arg, err);
+}
+
 static int nitrox_skcipher_init(struct crypto_skcipher *tfm)
 {
 	struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm);
@@ -63,6 +121,8 @@ static int nitrox_skcipher_init(struct crypto_skcipher *tfm)
 		nitrox_put_device(nctx->ndev);
 		return -ENOMEM;
 	}
+
+	nctx->callback = nitrox_skcipher_callback;
 	nctx->chdr = chdr;
 	nctx->u.ctx_handle = (uintptr_t)((u8 *)chdr->vaddr +
 					 sizeof(struct ctx_hdr));
@@ -71,6 +131,19 @@ static int nitrox_skcipher_init(struct crypto_skcipher *tfm)
 	return 0;
 }
 
+static int nitrox_cbc_init(struct crypto_skcipher *tfm)
+{
+	int err;
+	struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm);
+
+	err = nitrox_skcipher_init(tfm);
+	if (err)
+		return err;
+
+	nctx->callback = nitrox_cbc_cipher_callback;
+	return 0;
+}
+
 static void nitrox_skcipher_exit(struct crypto_skcipher *tfm)
 {
 	struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm);
@@ -127,10 +200,8 @@ static int nitrox_aes_setkey(struct crypto_skcipher *cipher, const u8 *key,
 	int aes_keylen;
 
 	aes_keylen = flexi_aes_keylen(keylen);
-	if (aes_keylen < 0) {
-		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (aes_keylen < 0)
 		return -EINVAL;
-	}
 	return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen);
 }
 
@@ -173,34 +244,6 @@ static int alloc_dst_sglist(struct skcipher_request *skreq, int ivsize)
 	return 0;
 }
 
-static void free_src_sglist(struct skcipher_request *skreq)
-{
-	struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq);
-
-	kfree(nkreq->src);
-}
-
-static void free_dst_sglist(struct skcipher_request *skreq)
-{
-	struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq);
-
-	kfree(nkreq->dst);
-}
-
-static void nitrox_skcipher_callback(void *arg, int err)
-{
-	struct skcipher_request *skreq = arg;
-
-	free_src_sglist(skreq);
-	free_dst_sglist(skreq);
-	if (err) {
-		pr_err_ratelimited("request failed status 0x%0x\n", err);
-		err = -EINVAL;
-	}
-
-	skcipher_request_complete(skreq, err);
-}
-
 static int nitrox_skcipher_crypt(struct skcipher_request *skreq, bool enc)
 {
 	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(skreq);
@@ -240,8 +283,28 @@ static int nitrox_skcipher_crypt(struct skcipher_request *skreq, bool enc)
 	}
 
 	/* send the crypto request */
-	return nitrox_process_se_request(nctx->ndev, creq,
-					 nitrox_skcipher_callback, skreq);
+	return nitrox_process_se_request(nctx->ndev, creq, nctx->callback,
+					 skreq);
+}
+
+static int nitrox_cbc_decrypt(struct skcipher_request *skreq)
+{
+	struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(skreq);
+	int ivsize = crypto_skcipher_ivsize(cipher);
+	gfp_t flags = (skreq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+			GFP_KERNEL : GFP_ATOMIC;
+	unsigned int start = skreq->cryptlen - ivsize;
+
+	if (skreq->src != skreq->dst)
+		return nitrox_skcipher_crypt(skreq, false);
+
+	nkreq->iv_out = kmalloc(ivsize, flags);
+	if (!nkreq->iv_out)
+		return -ENOMEM;
+
+	scatterwalk_map_and_copy(nkreq->iv_out, skreq->src, start, ivsize, 0);
+	return nitrox_skcipher_crypt(skreq, false);
 }
 
 static int nitrox_aes_encrypt(struct skcipher_request *skreq)
@@ -286,10 +349,8 @@ static int nitrox_aes_xts_setkey(struct crypto_skcipher *cipher,
 	keylen /= 2;
 
 	aes_keylen = flexi_aes_keylen(keylen);
-	if (aes_keylen < 0) {
-		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (aes_keylen < 0)
 		return -EINVAL;
-	}
 
 	fctx = nctx->u.fctx;
 	/* copy KEY2 */
@@ -317,10 +378,8 @@ static int nitrox_aes_ctr_rfc3686_setkey(struct crypto_skcipher *cipher,
 	keylen -= CTR_RFC3686_NONCE_SIZE;
 
 	aes_keylen = flexi_aes_keylen(keylen);
-	if (aes_keylen < 0) {
-		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (aes_keylen < 0)
 		return -EINVAL;
-	}
 	return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen);
 }
 
@@ -340,8 +399,8 @@ static struct skcipher_alg nitrox_skciphers[] = { {
 	.ivsize = AES_BLOCK_SIZE,
 	.setkey = nitrox_aes_setkey,
 	.encrypt = nitrox_aes_encrypt,
-	.decrypt = nitrox_aes_decrypt,
-	.init = nitrox_skcipher_init,
+	.decrypt = nitrox_cbc_decrypt,
+	.init = nitrox_cbc_init,
 	.exit = nitrox_skcipher_exit,
 }, {
 	.base = {
@@ -428,7 +487,6 @@ static struct skcipher_alg nitrox_skciphers[] = { {
 		.cra_blocksize = AES_BLOCK_SIZE,
 		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
 		.cra_alignmask = 0,
-		.cra_type = &crypto_ablkcipher_type,
 		.cra_module = THIS_MODULE,
 	},
 	.min_keysize = AES_MIN_KEY_SIZE,
@@ -455,8 +513,8 @@ static struct skcipher_alg nitrox_skciphers[] = { {
 	.ivsize = DES3_EDE_BLOCK_SIZE,
 	.setkey = nitrox_3des_setkey,
 	.encrypt = nitrox_3des_encrypt,
-	.decrypt = nitrox_3des_decrypt,
-	.init = nitrox_skcipher_init,
+	.decrypt = nitrox_cbc_decrypt,
+	.init = nitrox_cbc_init,
 	.exit = nitrox_skcipher_exit,
 }, {
 	.base = {
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
index 8fec733f567f..e0a8bd15aa74 100644
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -27,7 +27,7 @@ config CRYPTO_DEV_CCP_CRYPTO
 	depends on CRYPTO_DEV_CCP_DD
 	depends on CRYPTO_DEV_SP_CCP
 	select CRYPTO_HASH
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AUTHENC
 	select CRYPTO_RSA
 	select CRYPTO_LIB_AES
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index 6b86f1e6d634..db362fe472ea 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -8,7 +8,9 @@ ccp-$(CONFIG_CRYPTO_DEV_SP_CCP) += ccp-dev.o \
 	    ccp-dmaengine.o
 ccp-$(CONFIG_CRYPTO_DEV_CCP_DEBUGFS) += ccp-debugfs.o
 ccp-$(CONFIG_PCI) += sp-pci.o
-ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o
+ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o \
+                                   sev-dev.o \
+                                   tee-dev.o
 
 obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
 ccp-crypto-objs := ccp-crypto-main.o \
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index 32f19f402073..5eba7ee49e81 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -276,7 +276,6 @@ static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
 		ctx->u.aes.type = CCP_AES_TYPE_256;
 		break;
 	default:
-		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	ctx->u.aes.mode = alg->mode;
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
index 94c1ad7eeddf..9e8f07c1afac 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-galois.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
@@ -42,7 +42,6 @@ static int ccp_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
 		ctx->u.aes.type = CCP_AES_TYPE_256;
 		break;
 	default:
-		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
@@ -172,14 +171,12 @@ static struct aead_alg ccp_aes_gcm_defaults = {
 	.ivsize = GCM_AES_IV_SIZE,
 	.maxauthsize = AES_BLOCK_SIZE,
 	.base = {
-		.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC |
+		.cra_flags	= CRYPTO_ALG_ASYNC |
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_NEED_FALLBACK,
 		.cra_blocksize	= AES_BLOCK_SIZE,
 		.cra_ctxsize	= sizeof(struct ccp_ctx),
 		.cra_priority	= CCP_CRA_PRIORITY,
-		.cra_type	= &crypto_ablkcipher_type,
 		.cra_exit	= ccp_aes_gcm_cra_exit,
 		.cra_module	= THIS_MODULE,
 	},
@@ -229,11 +226,10 @@ static int ccp_register_aes_aead(struct list_head *head,
 	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
 		 def->driver_name);
 	alg->base.cra_blocksize = def->blocksize;
-	alg->base.cra_ablkcipher.ivsize = def->ivsize;
 
 	ret = crypto_register_aead(alg);
 	if (ret) {
-		pr_err("%s ablkcipher algorithm registration error (%d)\n",
+		pr_err("%s aead algorithm registration error (%d)\n",
 		       alg->base.cra_name, ret);
 		kfree(ccp_aead);
 		return ret;
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 8e4a531f4f70..04b2517df955 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -24,7 +24,7 @@ struct ccp_aes_xts_def {
 	const char *drv_name;
 };
 
-static struct ccp_aes_xts_def aes_xts_algs[] = {
+static const struct ccp_aes_xts_def aes_xts_algs[] = {
 	{
 		.name		= "xts(aes)",
 		.drv_name	= "xts-aes-ccp",
@@ -61,26 +61,25 @@ static struct ccp_unit_size_map xts_unit_sizes[] = {
 
 static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret)
 {
-	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
-	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct skcipher_request *req = skcipher_request_cast(async_req);
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
 
 	if (ret)
 		return ret;
 
-	memcpy(req->info, rctx->iv, AES_BLOCK_SIZE);
+	memcpy(req->iv, rctx->iv, AES_BLOCK_SIZE);
 
 	return 0;
 }
 
-static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int ccp_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			      unsigned int key_len)
 {
-	struct crypto_tfm *xfm = crypto_ablkcipher_tfm(tfm);
-	struct ccp_ctx *ctx = crypto_tfm_ctx(xfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
 	unsigned int ccpversion = ccp_version();
 	int ret;
 
-	ret = xts_check_key(xfm, key, key_len);
+	ret = xts_verify_key(tfm, key, key_len);
 	if (ret)
 		return ret;
 
@@ -102,11 +101,12 @@ static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	return crypto_sync_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len);
 }
 
-static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
+static int ccp_aes_xts_crypt(struct skcipher_request *req,
 			     unsigned int encrypt)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
 	unsigned int ccpversion = ccp_version();
 	unsigned int fallback = 0;
 	unsigned int unit;
@@ -116,7 +116,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 	if (!ctx->u.aes.key_len)
 		return -EINVAL;
 
-	if (!req->info)
+	if (!req->iv)
 		return -EINVAL;
 
 	/* Check conditions under which the CCP can fulfill a request. The
@@ -127,7 +127,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 	 */
 	unit_size = CCP_XTS_AES_UNIT_SIZE__LAST;
 	for (unit = 0; unit < ARRAY_SIZE(xts_unit_sizes); unit++) {
-		if (req->nbytes == xts_unit_sizes[unit].size) {
+		if (req->cryptlen == xts_unit_sizes[unit].size) {
 			unit_size = unit;
 			break;
 		}
@@ -155,14 +155,14 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 		skcipher_request_set_callback(subreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
-					   req->nbytes, req->info);
+					   req->cryptlen, req->iv);
 		ret = encrypt ? crypto_skcipher_encrypt(subreq) :
 				crypto_skcipher_decrypt(subreq);
 		skcipher_request_zero(subreq);
 		return ret;
 	}
 
-	memcpy(rctx->iv, req->info, AES_BLOCK_SIZE);
+	memcpy(rctx->iv, req->iv, AES_BLOCK_SIZE);
 	sg_init_one(&rctx->iv_sg, rctx->iv, AES_BLOCK_SIZE);
 
 	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
@@ -177,7 +177,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 	rctx->cmd.u.xts.iv = &rctx->iv_sg;
 	rctx->cmd.u.xts.iv_len = AES_BLOCK_SIZE;
 	rctx->cmd.u.xts.src = req->src;
-	rctx->cmd.u.xts.src_len = req->nbytes;
+	rctx->cmd.u.xts.src_len = req->cryptlen;
 	rctx->cmd.u.xts.dst = req->dst;
 
 	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
@@ -185,19 +185,19 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 	return ret;
 }
 
-static int ccp_aes_xts_encrypt(struct ablkcipher_request *req)
+static int ccp_aes_xts_encrypt(struct skcipher_request *req)
 {
 	return ccp_aes_xts_crypt(req, 1);
 }
 
-static int ccp_aes_xts_decrypt(struct ablkcipher_request *req)
+static int ccp_aes_xts_decrypt(struct skcipher_request *req)
 {
 	return ccp_aes_xts_crypt(req, 0);
 }
 
-static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm)
+static int ccp_aes_xts_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_sync_skcipher *fallback_tfm;
 
 	ctx->complete = ccp_aes_xts_complete;
@@ -212,14 +212,14 @@ static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm)
 	}
 	ctx->u.aes.tfm_skcipher = fallback_tfm;
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx));
 
 	return 0;
 }
 
-static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm)
+static void ccp_aes_xts_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	crypto_free_sync_skcipher(ctx->u.aes.tfm_skcipher);
 }
@@ -227,8 +227,8 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm)
 static int ccp_register_aes_xts_alg(struct list_head *head,
 				    const struct ccp_aes_xts_def *def)
 {
-	struct ccp_crypto_ablkcipher_alg *ccp_alg;
-	struct crypto_alg *alg;
+	struct ccp_crypto_skcipher_alg *ccp_alg;
+	struct skcipher_alg *alg;
 	int ret;
 
 	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
@@ -239,30 +239,30 @@ static int ccp_register_aes_xts_alg(struct list_head *head,
 
 	alg = &ccp_alg->alg;
 
-	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
-	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
 		 def->drv_name);
-	alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
-			 CRYPTO_ALG_KERN_DRIVER_ONLY |
-			 CRYPTO_ALG_NEED_FALLBACK;
-	alg->cra_blocksize = AES_BLOCK_SIZE;
-	alg->cra_ctxsize = sizeof(struct ccp_ctx);
-	alg->cra_priority = CCP_CRA_PRIORITY;
-	alg->cra_type = &crypto_ablkcipher_type;
-	alg->cra_ablkcipher.setkey = ccp_aes_xts_setkey;
-	alg->cra_ablkcipher.encrypt = ccp_aes_xts_encrypt;
-	alg->cra_ablkcipher.decrypt = ccp_aes_xts_decrypt;
-	alg->cra_ablkcipher.min_keysize = AES_MIN_KEY_SIZE * 2;
-	alg->cra_ablkcipher.max_keysize = AES_MAX_KEY_SIZE * 2;
-	alg->cra_ablkcipher.ivsize = AES_BLOCK_SIZE;
-	alg->cra_init = ccp_aes_xts_cra_init;
-	alg->cra_exit = ccp_aes_xts_cra_exit;
-	alg->cra_module = THIS_MODULE;
-
-	ret = crypto_register_alg(alg);
+	alg->base.cra_flags	= CRYPTO_ALG_ASYNC |
+				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_NEED_FALLBACK;
+	alg->base.cra_blocksize	= AES_BLOCK_SIZE;
+	alg->base.cra_ctxsize	= sizeof(struct ccp_ctx);
+	alg->base.cra_priority	= CCP_CRA_PRIORITY;
+	alg->base.cra_module	= THIS_MODULE;
+
+	alg->setkey		= ccp_aes_xts_setkey;
+	alg->encrypt		= ccp_aes_xts_encrypt;
+	alg->decrypt		= ccp_aes_xts_decrypt;
+	alg->min_keysize	= AES_MIN_KEY_SIZE * 2;
+	alg->max_keysize	= AES_MAX_KEY_SIZE * 2;
+	alg->ivsize		= AES_BLOCK_SIZE;
+	alg->init		= ccp_aes_xts_init_tfm;
+	alg->exit		= ccp_aes_xts_exit_tfm;
+
+	ret = crypto_register_skcipher(alg);
 	if (ret) {
-		pr_err("%s ablkcipher algorithm registration error (%d)\n",
-		       alg->cra_name, ret);
+		pr_err("%s skcipher algorithm registration error (%d)\n",
+		       alg->base.cra_name, ret);
 		kfree(ccp_alg);
 		return ret;
 	}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
index 58c6dddfc5e1..51e12fbd1159 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -21,25 +21,24 @@
 
 static int ccp_aes_complete(struct crypto_async_request *async_req, int ret)
 {
-	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
+	struct skcipher_request *req = skcipher_request_cast(async_req);
 	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
 
 	if (ret)
 		return ret;
 
 	if (ctx->u.aes.mode != CCP_AES_MODE_ECB)
-		memcpy(req->info, rctx->iv, AES_BLOCK_SIZE);
+		memcpy(req->iv, rctx->iv, AES_BLOCK_SIZE);
 
 	return 0;
 }
 
-static int ccp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int ccp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			  unsigned int key_len)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
-	struct ccp_crypto_ablkcipher_alg *alg =
-		ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm));
+	struct ccp_crypto_skcipher_alg *alg = ccp_crypto_skcipher_alg(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	switch (key_len) {
 	case AES_KEYSIZE_128:
@@ -52,7 +51,6 @@ static int ccp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 		ctx->u.aes.type = CCP_AES_TYPE_256;
 		break;
 	default:
-		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	ctx->u.aes.mode = alg->mode;
@@ -64,10 +62,11 @@ static int ccp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	return 0;
 }
 
-static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt)
+static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
 	struct scatterlist *iv_sg = NULL;
 	unsigned int iv_len = 0;
 	int ret;
@@ -77,14 +76,14 @@ static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt)
 
 	if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) ||
 	     (ctx->u.aes.mode == CCP_AES_MODE_CBC)) &&
-	    (req->nbytes & (AES_BLOCK_SIZE - 1)))
+	    (req->cryptlen & (AES_BLOCK_SIZE - 1)))
 		return -EINVAL;
 
 	if (ctx->u.aes.mode != CCP_AES_MODE_ECB) {
-		if (!req->info)
+		if (!req->iv)
 			return -EINVAL;
 
-		memcpy(rctx->iv, req->info, AES_BLOCK_SIZE);
+		memcpy(rctx->iv, req->iv, AES_BLOCK_SIZE);
 		iv_sg = &rctx->iv_sg;
 		iv_len = AES_BLOCK_SIZE;
 		sg_init_one(iv_sg, rctx->iv, iv_len);
@@ -102,7 +101,7 @@ static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt)
 	rctx->cmd.u.aes.iv = iv_sg;
 	rctx->cmd.u.aes.iv_len = iv_len;
 	rctx->cmd.u.aes.src = req->src;
-	rctx->cmd.u.aes.src_len = req->nbytes;
+	rctx->cmd.u.aes.src_len = req->cryptlen;
 	rctx->cmd.u.aes.dst = req->dst;
 
 	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
@@ -110,48 +109,44 @@ static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt)
 	return ret;
 }
 
-static int ccp_aes_encrypt(struct ablkcipher_request *req)
+static int ccp_aes_encrypt(struct skcipher_request *req)
 {
 	return ccp_aes_crypt(req, true);
 }
 
-static int ccp_aes_decrypt(struct ablkcipher_request *req)
+static int ccp_aes_decrypt(struct skcipher_request *req)
 {
 	return ccp_aes_crypt(req, false);
 }
 
-static int ccp_aes_cra_init(struct crypto_tfm *tfm)
+static int ccp_aes_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->complete = ccp_aes_complete;
 	ctx->u.aes.key_len = 0;
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx));
 
 	return 0;
 }
 
-static void ccp_aes_cra_exit(struct crypto_tfm *tfm)
-{
-}
-
 static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req,
 				    int ret)
 {
-	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
-	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct skcipher_request *req = skcipher_request_cast(async_req);
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
 
 	/* Restore the original pointer */
-	req->info = rctx->rfc3686_info;
+	req->iv = rctx->rfc3686_info;
 
 	return ccp_aes_complete(async_req, ret);
 }
 
-static int ccp_aes_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int ccp_aes_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key,
 				  unsigned int key_len)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	if (key_len < CTR_RFC3686_NONCE_SIZE)
 		return -EINVAL;
@@ -162,10 +157,11 @@ static int ccp_aes_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	return ccp_aes_setkey(tfm, key, key_len);
 }
 
-static int ccp_aes_rfc3686_crypt(struct ablkcipher_request *req, bool encrypt)
+static int ccp_aes_rfc3686_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
 	u8 *iv;
 
 	/* Initialize the CTR block */
@@ -173,84 +169,72 @@ static int ccp_aes_rfc3686_crypt(struct ablkcipher_request *req, bool encrypt)
 	memcpy(iv, ctx->u.aes.nonce, CTR_RFC3686_NONCE_SIZE);
 
 	iv += CTR_RFC3686_NONCE_SIZE;
-	memcpy(iv, req->info, CTR_RFC3686_IV_SIZE);
+	memcpy(iv, req->iv, CTR_RFC3686_IV_SIZE);
 
 	iv += CTR_RFC3686_IV_SIZE;
 	*(__be32 *)iv = cpu_to_be32(1);
 
 	/* Point to the new IV */
-	rctx->rfc3686_info = req->info;
-	req->info = rctx->rfc3686_iv;
+	rctx->rfc3686_info = req->iv;
+	req->iv = rctx->rfc3686_iv;
 
 	return ccp_aes_crypt(req, encrypt);
 }
 
-static int ccp_aes_rfc3686_encrypt(struct ablkcipher_request *req)
+static int ccp_aes_rfc3686_encrypt(struct skcipher_request *req)
 {
 	return ccp_aes_rfc3686_crypt(req, true);
 }
 
-static int ccp_aes_rfc3686_decrypt(struct ablkcipher_request *req)
+static int ccp_aes_rfc3686_decrypt(struct skcipher_request *req)
 {
 	return ccp_aes_rfc3686_crypt(req, false);
 }
 
-static int ccp_aes_rfc3686_cra_init(struct crypto_tfm *tfm)
+static int ccp_aes_rfc3686_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->complete = ccp_aes_rfc3686_complete;
 	ctx->u.aes.key_len = 0;
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx));
 
 	return 0;
 }
 
-static void ccp_aes_rfc3686_cra_exit(struct crypto_tfm *tfm)
-{
-}
-
-static struct crypto_alg ccp_aes_defaults = {
-	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
-			  CRYPTO_ALG_ASYNC |
-			  CRYPTO_ALG_KERN_DRIVER_ONLY |
-			  CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize	= AES_BLOCK_SIZE,
-	.cra_ctxsize	= sizeof(struct ccp_ctx),
-	.cra_priority	= CCP_CRA_PRIORITY,
-	.cra_type	= &crypto_ablkcipher_type,
-	.cra_init	= ccp_aes_cra_init,
-	.cra_exit	= ccp_aes_cra_exit,
-	.cra_module	= THIS_MODULE,
-	.cra_ablkcipher	= {
-		.setkey		= ccp_aes_setkey,
-		.encrypt	= ccp_aes_encrypt,
-		.decrypt	= ccp_aes_decrypt,
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-	},
+static const struct skcipher_alg ccp_aes_defaults = {
+	.setkey			= ccp_aes_setkey,
+	.encrypt		= ccp_aes_encrypt,
+	.decrypt		= ccp_aes_decrypt,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.init			= ccp_aes_init_tfm,
+
+	.base.cra_flags		= CRYPTO_ALG_ASYNC |
+				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct ccp_ctx),
+	.base.cra_priority	= CCP_CRA_PRIORITY,
+	.base.cra_module	= THIS_MODULE,
 };
 
-static struct crypto_alg ccp_aes_rfc3686_defaults = {
-	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
-			   CRYPTO_ALG_ASYNC |
-			   CRYPTO_ALG_KERN_DRIVER_ONLY |
-			   CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize	= CTR_RFC3686_BLOCK_SIZE,
-	.cra_ctxsize	= sizeof(struct ccp_ctx),
-	.cra_priority	= CCP_CRA_PRIORITY,
-	.cra_type	= &crypto_ablkcipher_type,
-	.cra_init	= ccp_aes_rfc3686_cra_init,
-	.cra_exit	= ccp_aes_rfc3686_cra_exit,
-	.cra_module	= THIS_MODULE,
-	.cra_ablkcipher	= {
-		.setkey		= ccp_aes_rfc3686_setkey,
-		.encrypt	= ccp_aes_rfc3686_encrypt,
-		.decrypt	= ccp_aes_rfc3686_decrypt,
-		.min_keysize	= AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
-	},
+static const struct skcipher_alg ccp_aes_rfc3686_defaults = {
+	.setkey			= ccp_aes_rfc3686_setkey,
+	.encrypt		= ccp_aes_rfc3686_encrypt,
+	.decrypt		= ccp_aes_rfc3686_decrypt,
+	.min_keysize		= AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+	.init			= ccp_aes_rfc3686_init_tfm,
+
+	.base.cra_flags		= CRYPTO_ALG_ASYNC |
+				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	= CTR_RFC3686_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct ccp_ctx),
+	.base.cra_priority	= CCP_CRA_PRIORITY,
+	.base.cra_module	= THIS_MODULE,
 };
 
 struct ccp_aes_def {
@@ -260,7 +244,7 @@ struct ccp_aes_def {
 	const char *driver_name;
 	unsigned int blocksize;
 	unsigned int ivsize;
-	struct crypto_alg *alg_defaults;
+	const struct skcipher_alg *alg_defaults;
 };
 
 static struct ccp_aes_def aes_algs[] = {
@@ -323,8 +307,8 @@ static struct ccp_aes_def aes_algs[] = {
 static int ccp_register_aes_alg(struct list_head *head,
 				const struct ccp_aes_def *def)
 {
-	struct ccp_crypto_ablkcipher_alg *ccp_alg;
-	struct crypto_alg *alg;
+	struct ccp_crypto_skcipher_alg *ccp_alg;
+	struct skcipher_alg *alg;
 	int ret;
 
 	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
@@ -338,16 +322,16 @@ static int ccp_register_aes_alg(struct list_head *head,
 	/* Copy the defaults and override as necessary */
 	alg = &ccp_alg->alg;
 	*alg = *def->alg_defaults;
-	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
-	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
 		 def->driver_name);
-	alg->cra_blocksize = def->blocksize;
-	alg->cra_ablkcipher.ivsize = def->ivsize;
+	alg->base.cra_blocksize = def->blocksize;
+	alg->ivsize = def->ivsize;
 
-	ret = crypto_register_alg(alg);
+	ret = crypto_register_skcipher(alg);
 	if (ret) {
-		pr_err("%s ablkcipher algorithm registration error (%d)\n",
-		       alg->cra_name, ret);
+		pr_err("%s skcipher algorithm registration error (%d)\n",
+		       alg->base.cra_name, ret);
 		kfree(ccp_alg);
 		return ret;
 	}
diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c
index d2c49b2f0323..9c129defdb50 100644
--- a/drivers/crypto/ccp/ccp-crypto-des3.c
+++ b/drivers/crypto/ccp/ccp-crypto-des3.c
@@ -20,28 +20,27 @@
 
 static int ccp_des3_complete(struct crypto_async_request *async_req, int ret)
 {
-	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
+	struct skcipher_request *req = skcipher_request_cast(async_req);
 	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct ccp_des3_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct ccp_des3_req_ctx *rctx = skcipher_request_ctx(req);
 
 	if (ret)
 		return ret;
 
 	if (ctx->u.des3.mode != CCP_DES3_MODE_ECB)
-		memcpy(req->info, rctx->iv, DES3_EDE_BLOCK_SIZE);
+		memcpy(req->iv, rctx->iv, DES3_EDE_BLOCK_SIZE);
 
 	return 0;
 }
 
-static int ccp_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int ccp_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		unsigned int key_len)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
-	struct ccp_crypto_ablkcipher_alg *alg =
-		ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm));
+	struct ccp_crypto_skcipher_alg *alg = ccp_crypto_skcipher_alg(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err;
 
-	err = verify_ablkcipher_des3_key(tfm, key);
+	err = verify_skcipher_des3_key(tfm, key);
 	if (err)
 		return err;
 
@@ -58,10 +57,11 @@ static int ccp_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	return 0;
 }
 
-static int ccp_des3_crypt(struct ablkcipher_request *req, bool encrypt)
+static int ccp_des3_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct ccp_des3_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_des3_req_ctx *rctx = skcipher_request_ctx(req);
 	struct scatterlist *iv_sg = NULL;
 	unsigned int iv_len = 0;
 	int ret;
@@ -71,14 +71,14 @@ static int ccp_des3_crypt(struct ablkcipher_request *req, bool encrypt)
 
 	if (((ctx->u.des3.mode == CCP_DES3_MODE_ECB) ||
 	     (ctx->u.des3.mode == CCP_DES3_MODE_CBC)) &&
-	    (req->nbytes & (DES3_EDE_BLOCK_SIZE - 1)))
+	    (req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1)))
 		return -EINVAL;
 
 	if (ctx->u.des3.mode != CCP_DES3_MODE_ECB) {
-		if (!req->info)
+		if (!req->iv)
 			return -EINVAL;
 
-		memcpy(rctx->iv, req->info, DES3_EDE_BLOCK_SIZE);
+		memcpy(rctx->iv, req->iv, DES3_EDE_BLOCK_SIZE);
 		iv_sg = &rctx->iv_sg;
 		iv_len = DES3_EDE_BLOCK_SIZE;
 		sg_init_one(iv_sg, rctx->iv, iv_len);
@@ -97,7 +97,7 @@ static int ccp_des3_crypt(struct ablkcipher_request *req, bool encrypt)
 	rctx->cmd.u.des3.iv = iv_sg;
 	rctx->cmd.u.des3.iv_len = iv_len;
 	rctx->cmd.u.des3.src = req->src;
-	rctx->cmd.u.des3.src_len = req->nbytes;
+	rctx->cmd.u.des3.src_len = req->cryptlen;
 	rctx->cmd.u.des3.dst = req->dst;
 
 	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
@@ -105,51 +105,43 @@ static int ccp_des3_crypt(struct ablkcipher_request *req, bool encrypt)
 	return ret;
 }
 
-static int ccp_des3_encrypt(struct ablkcipher_request *req)
+static int ccp_des3_encrypt(struct skcipher_request *req)
 {
 	return ccp_des3_crypt(req, true);
 }
 
-static int ccp_des3_decrypt(struct ablkcipher_request *req)
+static int ccp_des3_decrypt(struct skcipher_request *req)
 {
 	return ccp_des3_crypt(req, false);
 }
 
-static int ccp_des3_cra_init(struct crypto_tfm *tfm)
+static int ccp_des3_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->complete = ccp_des3_complete;
 	ctx->u.des3.key_len = 0;
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_des3_req_ctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_des3_req_ctx));
 
 	return 0;
 }
 
-static void ccp_des3_cra_exit(struct crypto_tfm *tfm)
-{
-}
-
-static struct crypto_alg ccp_des3_defaults = {
-	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
-		CRYPTO_ALG_ASYNC |
-		CRYPTO_ALG_KERN_DRIVER_ONLY |
-		CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize	= sizeof(struct ccp_ctx),
-	.cra_priority	= CCP_CRA_PRIORITY,
-	.cra_type	= &crypto_ablkcipher_type,
-	.cra_init	= ccp_des3_cra_init,
-	.cra_exit	= ccp_des3_cra_exit,
-	.cra_module	= THIS_MODULE,
-	.cra_ablkcipher	= {
-		.setkey		= ccp_des3_setkey,
-		.encrypt	= ccp_des3_encrypt,
-		.decrypt	= ccp_des3_decrypt,
-		.min_keysize	= DES3_EDE_KEY_SIZE,
-		.max_keysize	= DES3_EDE_KEY_SIZE,
-	},
+static const struct skcipher_alg ccp_des3_defaults = {
+	.setkey			= ccp_des3_setkey,
+	.encrypt		= ccp_des3_encrypt,
+	.decrypt		= ccp_des3_decrypt,
+	.min_keysize		= DES3_EDE_KEY_SIZE,
+	.max_keysize		= DES3_EDE_KEY_SIZE,
+	.init			= ccp_des3_init_tfm,
+
+	.base.cra_flags		= CRYPTO_ALG_ASYNC |
+				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct ccp_ctx),
+	.base.cra_priority	= CCP_CRA_PRIORITY,
+	.base.cra_module	= THIS_MODULE,
 };
 
 struct ccp_des3_def {
@@ -159,10 +151,10 @@ struct ccp_des3_def {
 	const char *driver_name;
 	unsigned int blocksize;
 	unsigned int ivsize;
-	struct crypto_alg *alg_defaults;
+	const struct skcipher_alg *alg_defaults;
 };
 
-static struct ccp_des3_def des3_algs[] = {
+static const struct ccp_des3_def des3_algs[] = {
 	{
 		.mode		= CCP_DES3_MODE_ECB,
 		.version	= CCP_VERSION(5, 0),
@@ -186,8 +178,8 @@ static struct ccp_des3_def des3_algs[] = {
 static int ccp_register_des3_alg(struct list_head *head,
 				 const struct ccp_des3_def *def)
 {
-	struct ccp_crypto_ablkcipher_alg *ccp_alg;
-	struct crypto_alg *alg;
+	struct ccp_crypto_skcipher_alg *ccp_alg;
+	struct skcipher_alg *alg;
 	int ret;
 
 	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
@@ -201,16 +193,16 @@ static int ccp_register_des3_alg(struct list_head *head,
 	/* Copy the defaults and override as necessary */
 	alg = &ccp_alg->alg;
 	*alg = *def->alg_defaults;
-	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
-	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
 			def->driver_name);
-	alg->cra_blocksize = def->blocksize;
-	alg->cra_ablkcipher.ivsize = def->ivsize;
+	alg->base.cra_blocksize = def->blocksize;
+	alg->ivsize = def->ivsize;
 
-	ret = crypto_register_alg(alg);
+	ret = crypto_register_skcipher(alg);
 	if (ret) {
-		pr_err("%s ablkcipher algorithm registration error (%d)\n",
-				alg->cra_name, ret);
+		pr_err("%s skcipher algorithm registration error (%d)\n",
+				alg->base.cra_name, ret);
 		kfree(ccp_alg);
 		return ret;
 	}
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 8ee4cb45a3f3..88275b4867ea 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -41,7 +41,7 @@ MODULE_PARM_DESC(rsa_disable, "Disable use of RSA - any non-zero value");
 
 /* List heads for the supported algorithms */
 static LIST_HEAD(hash_algs);
-static LIST_HEAD(cipher_algs);
+static LIST_HEAD(skcipher_algs);
 static LIST_HEAD(aead_algs);
 static LIST_HEAD(akcipher_algs);
 
@@ -330,7 +330,7 @@ static int ccp_register_algs(void)
 	int ret;
 
 	if (!aes_disable) {
-		ret = ccp_register_aes_algs(&cipher_algs);
+		ret = ccp_register_aes_algs(&skcipher_algs);
 		if (ret)
 			return ret;
 
@@ -338,7 +338,7 @@ static int ccp_register_algs(void)
 		if (ret)
 			return ret;
 
-		ret = ccp_register_aes_xts_algs(&cipher_algs);
+		ret = ccp_register_aes_xts_algs(&skcipher_algs);
 		if (ret)
 			return ret;
 
@@ -348,7 +348,7 @@ static int ccp_register_algs(void)
 	}
 
 	if (!des3_disable) {
-		ret = ccp_register_des3_algs(&cipher_algs);
+		ret = ccp_register_des3_algs(&skcipher_algs);
 		if (ret)
 			return ret;
 	}
@@ -371,7 +371,7 @@ static int ccp_register_algs(void)
 static void ccp_unregister_algs(void)
 {
 	struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp;
-	struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp;
+	struct ccp_crypto_skcipher_alg *ablk_alg, *ablk_tmp;
 	struct ccp_crypto_aead *aead_alg, *aead_tmp;
 	struct ccp_crypto_akcipher_alg *akc_alg, *akc_tmp;
 
@@ -381,8 +381,8 @@ static void ccp_unregister_algs(void)
 		kfree(ahash_alg);
 	}
 
-	list_for_each_entry_safe(ablk_alg, ablk_tmp, &cipher_algs, entry) {
-		crypto_unregister_alg(&ablk_alg->alg);
+	list_for_each_entry_safe(ablk_alg, ablk_tmp, &skcipher_algs, entry) {
+		crypto_unregister_skcipher(&ablk_alg->alg);
 		list_del(&ablk_alg->entry);
 		kfree(ablk_alg);
 	}
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 453b9797f93f..474e6f1a6a84 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -293,10 +293,8 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
 
 		ret = crypto_shash_digest(sdesc, key, key_len,
 					  ctx->u.sha.key);
-		if (ret) {
-			crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		if (ret)
 			return -EINVAL;
-		}
 
 		key_len = digest_size;
 	} else {
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index 9015b5da6ba3..90a009e6b5c1 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -21,6 +21,7 @@
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <crypto/akcipher.h>
+#include <crypto/skcipher.h>
 #include <crypto/internal/rsa.h>
 
 /* We want the module name in front of our messages */
@@ -31,12 +32,12 @@
 
 #define CCP_CRA_PRIORITY	300
 
-struct ccp_crypto_ablkcipher_alg {
+struct ccp_crypto_skcipher_alg {
 	struct list_head entry;
 
 	u32 mode;
 
-	struct crypto_alg alg;
+	struct skcipher_alg alg;
 };
 
 struct ccp_crypto_aead {
@@ -66,12 +67,12 @@ struct ccp_crypto_akcipher_alg {
 	struct akcipher_alg alg;
 };
 
-static inline struct ccp_crypto_ablkcipher_alg *
-	ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm)
+static inline struct ccp_crypto_skcipher_alg *
+	ccp_crypto_skcipher_alg(struct crypto_skcipher *tfm)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 
-	return container_of(alg, struct ccp_crypto_ablkcipher_alg, alg);
+	return container_of(alg, struct ccp_crypto_skcipher_alg, alg);
 }
 
 static inline struct ccp_crypto_ahash_alg *
diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
index 0186b3df4c87..0d5576f6ad21 100644
--- a/drivers/crypto/ccp/ccp-dev-v3.c
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -586,6 +586,7 @@ const struct ccp_vdata ccpv3_platform = {
 	.setup = NULL,
 	.perform = &ccp3_actions,
 	.offset = 0,
+	.rsamax = CCP_RSA_MAX_WIDTH,
 };
 
 const struct ccp_vdata ccpv3 = {
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index 57eb53b8ac21..82ac4c14c04c 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -789,6 +789,18 @@ static int ccp5_init(struct ccp_device *ccp)
 
 	/* Find available queues */
 	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
+	/*
+	 * Check for a access to the registers.  If this read returns
+	 * 0xffffffff, it's likely that the system is running a broken
+	 * BIOS which disallows access to the device. Stop here and fail
+	 * the initialization (but not the load, as the PSP could get
+	 * properly initialized).
+	 */
+	if (qmr == 0xffffffff) {
+		dev_notice(dev, "ccp: unable to access the device: you might be running a broken BIOS.\n");
+		return 1;
+	}
+
 	for (i = 0; (i < MAX_HW_QUEUES) && (ccp->cmd_q_count < ccp->max_q_count); i++) {
 		if (!(qmr & (1 << i)))
 			continue;
@@ -854,7 +866,7 @@ static int ccp5_init(struct ccp_device *ccp)
 
 	if (ccp->cmd_q_count == 0) {
 		dev_notice(dev, "no command queues available\n");
-		ret = -EIO;
+		ret = 1;
 		goto e_pool;
 	}
 
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 73acf0fdb793..19ac509ed76e 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -641,18 +641,27 @@ int ccp_dev_init(struct sp_device *sp)
 		ccp->vdata->setup(ccp);
 
 	ret = ccp->vdata->perform->init(ccp);
-	if (ret)
+	if (ret) {
+		/* A positive number means that the device cannot be initialized,
+		 * but no additional message is required.
+		 */
+		if (ret > 0)
+			goto e_quiet;
+
+		/* An unexpected problem occurred, and should be reported in the log */
 		goto e_err;
+	}
 
 	dev_notice(dev, "ccp enabled\n");
 
 	return 0;
 
 e_err:
-	sp->ccp_data = NULL;
-
 	dev_notice(dev, "ccp initialization failed\n");
 
+e_quiet:
+	sp->ccp_data = NULL;
+
 	return ret;
 }
 
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index a54f9367a580..0770a83bf1a5 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -342,6 +342,7 @@ static struct ccp_dma_desc *ccp_alloc_dma_desc(struct ccp_dma_chan *chan,
 	desc->tx_desc.flags = flags;
 	desc->tx_desc.tx_submit = ccp_tx_submit;
 	desc->ccp = chan->ccp;
+	INIT_LIST_HEAD(&desc->entry);
 	INIT_LIST_HEAD(&desc->pending);
 	INIT_LIST_HEAD(&desc->active);
 	desc->status = DMA_IN_PROGRESS;
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index c8da8eb160da..422193690fd4 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -1777,8 +1777,9 @@ ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 			       LSB_ITEM_SIZE);
 			break;
 		default:
+			kfree(hmac_buf);
 			ret = -EINVAL;
-			goto e_ctx;
+			goto e_data;
 		}
 
 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index 6b17d179ef8a..e95e7aa5dbf1 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -2,55 +2,20 @@
 /*
  * AMD Platform Security Processor (PSP) interface
  *
- * Copyright (C) 2016,2018 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2019 Advanced Micro Devices, Inc.
  *
  * Author: Brijesh Singh <brijesh.singh@amd.com>
  */
 
-#include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/kthread.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/spinlock_types.h>
-#include <linux/types.h>
-#include <linux/mutex.h>
-#include <linux/delay.h>
-#include <linux/hw_random.h>
-#include <linux/ccp.h>
-#include <linux/firmware.h>
+#include <linux/irqreturn.h>
 
 #include "sp-dev.h"
 #include "psp-dev.h"
+#include "sev-dev.h"
+#include "tee-dev.h"
 
-#define DEVICE_NAME		"sev"
-#define SEV_FW_FILE		"amd/sev.fw"
-#define SEV_FW_NAME_SIZE	64
-
-static DEFINE_MUTEX(sev_cmd_mutex);
-static struct sev_misc_dev *misc_dev;
-static struct psp_device *psp_master;
-
-static int psp_cmd_timeout = 100;
-module_param(psp_cmd_timeout, int, 0644);
-MODULE_PARM_DESC(psp_cmd_timeout, " default timeout value, in seconds, for PSP commands");
-
-static int psp_probe_timeout = 5;
-module_param(psp_probe_timeout, int, 0644);
-MODULE_PARM_DESC(psp_probe_timeout, " default timeout value, in seconds, during PSP device probe");
-
-static bool psp_dead;
-static int psp_timeout;
-
-static inline bool sev_version_greater_or_equal(u8 maj, u8 min)
-{
-	if (psp_master->api_major > maj)
-		return true;
-	if (psp_master->api_major == maj && psp_master->api_minor >= min)
-		return true;
-	return false;
-}
+struct psp_device *psp_master;
 
 static struct psp_device *psp_alloc_struct(struct sp_device *sp)
 {
@@ -73,866 +38,95 @@ static irqreturn_t psp_irq_handler(int irq, void *data)
 {
 	struct psp_device *psp = data;
 	unsigned int status;
-	int reg;
 
 	/* Read the interrupt status: */
 	status = ioread32(psp->io_regs + psp->vdata->intsts_reg);
 
-	/* Check if it is command completion: */
-	if (!(status & PSP_CMD_COMPLETE))
-		goto done;
+	/* invoke subdevice interrupt handlers */
+	if (status) {
+		if (psp->sev_irq_handler)
+			psp->sev_irq_handler(irq, psp->sev_irq_data, status);
 
-	/* Check if it is SEV command completion: */
-	reg = ioread32(psp->io_regs + psp->vdata->cmdresp_reg);
-	if (reg & PSP_CMDRESP_RESP) {
-		psp->sev_int_rcvd = 1;
-		wake_up(&psp->sev_int_queue);
+		if (psp->tee_irq_handler)
+			psp->tee_irq_handler(irq, psp->tee_irq_data, status);
 	}
 
-done:
 	/* Clear the interrupt status by writing the same value we read. */
 	iowrite32(status, psp->io_regs + psp->vdata->intsts_reg);
 
 	return IRQ_HANDLED;
 }
 
-static int sev_wait_cmd_ioc(struct psp_device *psp,
-			    unsigned int *reg, unsigned int timeout)
-{
-	int ret;
-
-	ret = wait_event_timeout(psp->sev_int_queue,
-			psp->sev_int_rcvd, timeout * HZ);
-	if (!ret)
-		return -ETIMEDOUT;
-
-	*reg = ioread32(psp->io_regs + psp->vdata->cmdresp_reg);
-
-	return 0;
-}
-
-static int sev_cmd_buffer_len(int cmd)
-{
-	switch (cmd) {
-	case SEV_CMD_INIT:			return sizeof(struct sev_data_init);
-	case SEV_CMD_PLATFORM_STATUS:		return sizeof(struct sev_user_data_status);
-	case SEV_CMD_PEK_CSR:			return sizeof(struct sev_data_pek_csr);
-	case SEV_CMD_PEK_CERT_IMPORT:		return sizeof(struct sev_data_pek_cert_import);
-	case SEV_CMD_PDH_CERT_EXPORT:		return sizeof(struct sev_data_pdh_cert_export);
-	case SEV_CMD_LAUNCH_START:		return sizeof(struct sev_data_launch_start);
-	case SEV_CMD_LAUNCH_UPDATE_DATA:	return sizeof(struct sev_data_launch_update_data);
-	case SEV_CMD_LAUNCH_UPDATE_VMSA:	return sizeof(struct sev_data_launch_update_vmsa);
-	case SEV_CMD_LAUNCH_FINISH:		return sizeof(struct sev_data_launch_finish);
-	case SEV_CMD_LAUNCH_MEASURE:		return sizeof(struct sev_data_launch_measure);
-	case SEV_CMD_ACTIVATE:			return sizeof(struct sev_data_activate);
-	case SEV_CMD_DEACTIVATE:		return sizeof(struct sev_data_deactivate);
-	case SEV_CMD_DECOMMISSION:		return sizeof(struct sev_data_decommission);
-	case SEV_CMD_GUEST_STATUS:		return sizeof(struct sev_data_guest_status);
-	case SEV_CMD_DBG_DECRYPT:		return sizeof(struct sev_data_dbg);
-	case SEV_CMD_DBG_ENCRYPT:		return sizeof(struct sev_data_dbg);
-	case SEV_CMD_SEND_START:		return sizeof(struct sev_data_send_start);
-	case SEV_CMD_SEND_UPDATE_DATA:		return sizeof(struct sev_data_send_update_data);
-	case SEV_CMD_SEND_UPDATE_VMSA:		return sizeof(struct sev_data_send_update_vmsa);
-	case SEV_CMD_SEND_FINISH:		return sizeof(struct sev_data_send_finish);
-	case SEV_CMD_RECEIVE_START:		return sizeof(struct sev_data_receive_start);
-	case SEV_CMD_RECEIVE_FINISH:		return sizeof(struct sev_data_receive_finish);
-	case SEV_CMD_RECEIVE_UPDATE_DATA:	return sizeof(struct sev_data_receive_update_data);
-	case SEV_CMD_RECEIVE_UPDATE_VMSA:	return sizeof(struct sev_data_receive_update_vmsa);
-	case SEV_CMD_LAUNCH_UPDATE_SECRET:	return sizeof(struct sev_data_launch_secret);
-	case SEV_CMD_DOWNLOAD_FIRMWARE:		return sizeof(struct sev_data_download_firmware);
-	case SEV_CMD_GET_ID:			return sizeof(struct sev_data_get_id);
-	default:				return 0;
-	}
-
-	return 0;
-}
-
-static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
-{
-	struct psp_device *psp = psp_master;
-	unsigned int phys_lsb, phys_msb;
-	unsigned int reg, ret = 0;
-
-	if (!psp)
-		return -ENODEV;
-
-	if (psp_dead)
-		return -EBUSY;
-
-	/* Get the physical address of the command buffer */
-	phys_lsb = data ? lower_32_bits(__psp_pa(data)) : 0;
-	phys_msb = data ? upper_32_bits(__psp_pa(data)) : 0;
-
-	dev_dbg(psp->dev, "sev command id %#x buffer 0x%08x%08x timeout %us\n",
-		cmd, phys_msb, phys_lsb, psp_timeout);
-
-	print_hex_dump_debug("(in):  ", DUMP_PREFIX_OFFSET, 16, 2, data,
-			     sev_cmd_buffer_len(cmd), false);
-
-	iowrite32(phys_lsb, psp->io_regs + psp->vdata->cmdbuff_addr_lo_reg);
-	iowrite32(phys_msb, psp->io_regs + psp->vdata->cmdbuff_addr_hi_reg);
-
-	psp->sev_int_rcvd = 0;
-
-	reg = cmd;
-	reg <<= PSP_CMDRESP_CMD_SHIFT;
-	reg |= PSP_CMDRESP_IOC;
-	iowrite32(reg, psp->io_regs + psp->vdata->cmdresp_reg);
-
-	/* wait for command completion */
-	ret = sev_wait_cmd_ioc(psp, &reg, psp_timeout);
-	if (ret) {
-		if (psp_ret)
-			*psp_ret = 0;
-
-		dev_err(psp->dev, "sev command %#x timed out, disabling PSP \n", cmd);
-		psp_dead = true;
-
-		return ret;
-	}
-
-	psp_timeout = psp_cmd_timeout;
-
-	if (psp_ret)
-		*psp_ret = reg & PSP_CMDRESP_ERR_MASK;
-
-	if (reg & PSP_CMDRESP_ERR_MASK) {
-		dev_dbg(psp->dev, "sev command %#x failed (%#010x)\n",
-			cmd, reg & PSP_CMDRESP_ERR_MASK);
-		ret = -EIO;
-	}
-
-	print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,
-			     sev_cmd_buffer_len(cmd), false);
-
-	return ret;
-}
-
-static int sev_do_cmd(int cmd, void *data, int *psp_ret)
-{
-	int rc;
-
-	mutex_lock(&sev_cmd_mutex);
-	rc = __sev_do_cmd_locked(cmd, data, psp_ret);
-	mutex_unlock(&sev_cmd_mutex);
-
-	return rc;
-}
-
-static int __sev_platform_init_locked(int *error)
-{
-	struct psp_device *psp = psp_master;
-	int rc = 0;
-
-	if (!psp)
-		return -ENODEV;
-
-	if (psp->sev_state == SEV_STATE_INIT)
-		return 0;
-
-	rc = __sev_do_cmd_locked(SEV_CMD_INIT, &psp->init_cmd_buf, error);
-	if (rc)
-		return rc;
-
-	psp->sev_state = SEV_STATE_INIT;
-	dev_dbg(psp->dev, "SEV firmware initialized\n");
-
-	return rc;
-}
-
-int sev_platform_init(int *error)
-{
-	int rc;
-
-	mutex_lock(&sev_cmd_mutex);
-	rc = __sev_platform_init_locked(error);
-	mutex_unlock(&sev_cmd_mutex);
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(sev_platform_init);
-
-static int __sev_platform_shutdown_locked(int *error)
-{
-	int ret;
-
-	ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
-	if (ret)
-		return ret;
-
-	psp_master->sev_state = SEV_STATE_UNINIT;
-	dev_dbg(psp_master->dev, "SEV firmware shutdown\n");
-
-	return ret;
-}
-
-static int sev_platform_shutdown(int *error)
-{
-	int rc;
-
-	mutex_lock(&sev_cmd_mutex);
-	rc = __sev_platform_shutdown_locked(NULL);
-	mutex_unlock(&sev_cmd_mutex);
-
-	return rc;
-}
-
-static int sev_get_platform_state(int *state, int *error)
-{
-	int rc;
-
-	rc = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS,
-				 &psp_master->status_cmd_buf, error);
-	if (rc)
-		return rc;
-
-	*state = psp_master->status_cmd_buf.state;
-	return rc;
-}
-
-static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
+static unsigned int psp_get_capability(struct psp_device *psp)
 {
-	int state, rc;
+	unsigned int val = ioread32(psp->io_regs + psp->vdata->feature_reg);
 
 	/*
-	 * The SEV spec requires that FACTORY_RESET must be issued in
-	 * UNINIT state. Before we go further lets check if any guest is
-	 * active.
-	 *
-	 * If FW is in WORKING state then deny the request otherwise issue
-	 * SHUTDOWN command do INIT -> UNINIT before issuing the FACTORY_RESET.
-	 *
-	 */
-	rc = sev_get_platform_state(&state, &argp->error);
-	if (rc)
-		return rc;
-
-	if (state == SEV_STATE_WORKING)
-		return -EBUSY;
-
-	if (state == SEV_STATE_INIT) {
-		rc = __sev_platform_shutdown_locked(&argp->error);
-		if (rc)
-			return rc;
-	}
-
-	return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error);
-}
-
-static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
-{
-	struct sev_user_data_status *data = &psp_master->status_cmd_buf;
-	int ret;
-
-	ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, data, &argp->error);
-	if (ret)
-		return ret;
-
-	if (copy_to_user((void __user *)argp->data, data, sizeof(*data)))
-		ret = -EFAULT;
-
-	return ret;
-}
-
-static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
-{
-	int rc;
-
-	if (psp_master->sev_state == SEV_STATE_UNINIT) {
-		rc = __sev_platform_init_locked(&argp->error);
-		if (rc)
-			return rc;
-	}
-
-	return __sev_do_cmd_locked(cmd, NULL, &argp->error);
-}
-
-static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
-{
-	struct sev_user_data_pek_csr input;
-	struct sev_data_pek_csr *data;
-	void *blob = NULL;
-	int ret;
-
-	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
-		return -EFAULT;
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	/* userspace wants to query CSR length */
-	if (!input.address || !input.length)
-		goto cmd;
-
-	/* allocate a physically contiguous buffer to store the CSR blob */
-	if (!access_ok(input.address, input.length) ||
-	    input.length > SEV_FW_BLOB_MAX_SIZE) {
-		ret = -EFAULT;
-		goto e_free;
-	}
-
-	blob = kmalloc(input.length, GFP_KERNEL);
-	if (!blob) {
-		ret = -ENOMEM;
-		goto e_free;
-	}
-
-	data->address = __psp_pa(blob);
-	data->len = input.length;
-
-cmd:
-	if (psp_master->sev_state == SEV_STATE_UNINIT) {
-		ret = __sev_platform_init_locked(&argp->error);
-		if (ret)
-			goto e_free_blob;
-	}
-
-	ret = __sev_do_cmd_locked(SEV_CMD_PEK_CSR, data, &argp->error);
-
-	 /* If we query the CSR length, FW responded with expected data. */
-	input.length = data->len;
-
-	if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
-		ret = -EFAULT;
-		goto e_free_blob;
-	}
-
-	if (blob) {
-		if (copy_to_user((void __user *)input.address, blob, input.length))
-			ret = -EFAULT;
-	}
-
-e_free_blob:
-	kfree(blob);
-e_free:
-	kfree(data);
-	return ret;
-}
-
-void *psp_copy_user_blob(u64 __user uaddr, u32 len)
-{
-	if (!uaddr || !len)
-		return ERR_PTR(-EINVAL);
-
-	/* verify that blob length does not exceed our limit */
-	if (len > SEV_FW_BLOB_MAX_SIZE)
-		return ERR_PTR(-EINVAL);
-
-	return memdup_user((void __user *)(uintptr_t)uaddr, len);
-}
-EXPORT_SYMBOL_GPL(psp_copy_user_blob);
-
-static int sev_get_api_version(void)
-{
-	struct sev_user_data_status *status;
-	int error = 0, ret;
-
-	status = &psp_master->status_cmd_buf;
-	ret = sev_platform_status(status, &error);
-	if (ret) {
-		dev_err(psp_master->dev,
-			"SEV: failed to get status. Error: %#x\n", error);
-		return 1;
-	}
-
-	psp_master->api_major = status->api_major;
-	psp_master->api_minor = status->api_minor;
-	psp_master->build = status->build;
-	psp_master->sev_state = status->state;
-
-	return 0;
-}
-
-static int sev_get_firmware(struct device *dev,
-			    const struct firmware **firmware)
-{
-	char fw_name_specific[SEV_FW_NAME_SIZE];
-	char fw_name_subset[SEV_FW_NAME_SIZE];
-
-	snprintf(fw_name_specific, sizeof(fw_name_specific),
-		 "amd/amd_sev_fam%.2xh_model%.2xh.sbin",
-		 boot_cpu_data.x86, boot_cpu_data.x86_model);
-
-	snprintf(fw_name_subset, sizeof(fw_name_subset),
-		 "amd/amd_sev_fam%.2xh_model%.1xxh.sbin",
-		 boot_cpu_data.x86, (boot_cpu_data.x86_model & 0xf0) >> 4);
-
-	/* Check for SEV FW for a particular model.
-	 * Ex. amd_sev_fam17h_model00h.sbin for Family 17h Model 00h
-	 *
-	 * or
-	 *
-	 * Check for SEV FW common to a subset of models.
-	 * Ex. amd_sev_fam17h_model0xh.sbin for
-	 *     Family 17h Model 00h -- Family 17h Model 0Fh
-	 *
-	 * or
-	 *
-	 * Fall-back to using generic name: sev.fw
+	 * Check for a access to the registers.  If this read returns
+	 * 0xffffffff, it's likely that the system is running a broken
+	 * BIOS which disallows access to the device. Stop here and
+	 * fail the PSP initialization (but not the load, as the CCP
+	 * could get properly initialized).
 	 */
-	if ((firmware_request_nowarn(firmware, fw_name_specific, dev) >= 0) ||
-	    (firmware_request_nowarn(firmware, fw_name_subset, dev) >= 0) ||
-	    (firmware_request_nowarn(firmware, SEV_FW_FILE, dev) >= 0))
+	if (val == 0xffffffff) {
+		dev_notice(psp->dev, "psp: unable to access the device: you might be running a broken BIOS.\n");
 		return 0;
-
-	return -ENOENT;
-}
-
-/* Don't fail if SEV FW couldn't be updated. Continue with existing SEV FW */
-static int sev_update_firmware(struct device *dev)
-{
-	struct sev_data_download_firmware *data;
-	const struct firmware *firmware;
-	int ret, error, order;
-	struct page *p;
-	u64 data_size;
-
-	if (sev_get_firmware(dev, &firmware) == -ENOENT) {
-		dev_dbg(dev, "No SEV firmware file present\n");
-		return -1;
-	}
-
-	/*
-	 * SEV FW expects the physical address given to it to be 32
-	 * byte aligned. Memory allocated has structure placed at the
-	 * beginning followed by the firmware being passed to the SEV
-	 * FW. Allocate enough memory for data structure + alignment
-	 * padding + SEV FW.
-	 */
-	data_size = ALIGN(sizeof(struct sev_data_download_firmware), 32);
-
-	order = get_order(firmware->size + data_size);
-	p = alloc_pages(GFP_KERNEL, order);
-	if (!p) {
-		ret = -1;
-		goto fw_err;
-	}
-
-	/*
-	 * Copy firmware data to a kernel allocated contiguous
-	 * memory region.
-	 */
-	data = page_address(p);
-	memcpy(page_address(p) + data_size, firmware->data, firmware->size);
-
-	data->address = __psp_pa(page_address(p) + data_size);
-	data->len = firmware->size;
-
-	ret = sev_do_cmd(SEV_CMD_DOWNLOAD_FIRMWARE, data, &error);
-	if (ret)
-		dev_dbg(dev, "Failed to update SEV firmware: %#x\n", error);
-	else
-		dev_info(dev, "SEV firmware update successful\n");
-
-	__free_pages(p, order);
-
-fw_err:
-	release_firmware(firmware);
-
-	return ret;
-}
-
-static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
-{
-	struct sev_user_data_pek_cert_import input;
-	struct sev_data_pek_cert_import *data;
-	void *pek_blob, *oca_blob;
-	int ret;
-
-	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
-		return -EFAULT;
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	/* copy PEK certificate blobs from userspace */
-	pek_blob = psp_copy_user_blob(input.pek_cert_address, input.pek_cert_len);
-	if (IS_ERR(pek_blob)) {
-		ret = PTR_ERR(pek_blob);
-		goto e_free;
-	}
-
-	data->pek_cert_address = __psp_pa(pek_blob);
-	data->pek_cert_len = input.pek_cert_len;
-
-	/* copy PEK certificate blobs from userspace */
-	oca_blob = psp_copy_user_blob(input.oca_cert_address, input.oca_cert_len);
-	if (IS_ERR(oca_blob)) {
-		ret = PTR_ERR(oca_blob);
-		goto e_free_pek;
-	}
-
-	data->oca_cert_address = __psp_pa(oca_blob);
-	data->oca_cert_len = input.oca_cert_len;
-
-	/* If platform is not in INIT state then transition it to INIT */
-	if (psp_master->sev_state != SEV_STATE_INIT) {
-		ret = __sev_platform_init_locked(&argp->error);
-		if (ret)
-			goto e_free_oca;
-	}
-
-	ret = __sev_do_cmd_locked(SEV_CMD_PEK_CERT_IMPORT, data, &argp->error);
-
-e_free_oca:
-	kfree(oca_blob);
-e_free_pek:
-	kfree(pek_blob);
-e_free:
-	kfree(data);
-	return ret;
-}
-
-static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp)
-{
-	struct sev_user_data_get_id2 input;
-	struct sev_data_get_id *data;
-	void *id_blob = NULL;
-	int ret;
-
-	/* SEV GET_ID is available from SEV API v0.16 and up */
-	if (!sev_version_greater_or_equal(0, 16))
-		return -ENOTSUPP;
-
-	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
-		return -EFAULT;
-
-	/* Check if we have write access to the userspace buffer */
-	if (input.address &&
-	    input.length &&
-	    !access_ok(input.address, input.length))
-		return -EFAULT;
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	if (input.address && input.length) {
-		id_blob = kmalloc(input.length, GFP_KERNEL);
-		if (!id_blob) {
-			kfree(data);
-			return -ENOMEM;
-		}
-
-		data->address = __psp_pa(id_blob);
-		data->len = input.length;
-	}
-
-	ret = __sev_do_cmd_locked(SEV_CMD_GET_ID, data, &argp->error);
-
-	/*
-	 * Firmware will return the length of the ID value (either the minimum
-	 * required length or the actual length written), return it to the user.
-	 */
-	input.length = data->len;
-
-	if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
-		ret = -EFAULT;
-		goto e_free;
-	}
-
-	if (id_blob) {
-		if (copy_to_user((void __user *)input.address,
-				 id_blob, data->len)) {
-			ret = -EFAULT;
-			goto e_free;
-		}
-	}
-
-e_free:
-	kfree(id_blob);
-	kfree(data);
-
-	return ret;
-}
-
-static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp)
-{
-	struct sev_data_get_id *data;
-	u64 data_size, user_size;
-	void *id_blob, *mem;
-	int ret;
-
-	/* SEV GET_ID available from SEV API v0.16 and up */
-	if (!sev_version_greater_or_equal(0, 16))
-		return -ENOTSUPP;
-
-	/* SEV FW expects the buffer it fills with the ID to be
-	 * 8-byte aligned. Memory allocated should be enough to
-	 * hold data structure + alignment padding + memory
-	 * where SEV FW writes the ID.
-	 */
-	data_size = ALIGN(sizeof(struct sev_data_get_id), 8);
-	user_size = sizeof(struct sev_user_data_get_id);
-
-	mem = kzalloc(data_size + user_size, GFP_KERNEL);
-	if (!mem)
-		return -ENOMEM;
-
-	data = mem;
-	id_blob = mem + data_size;
-
-	data->address = __psp_pa(id_blob);
-	data->len = user_size;
-
-	ret = __sev_do_cmd_locked(SEV_CMD_GET_ID, data, &argp->error);
-	if (!ret) {
-		if (copy_to_user((void __user *)argp->data, id_blob, data->len))
-			ret = -EFAULT;
 	}
 
-	kfree(mem);
-
-	return ret;
+	return val;
 }
 
-static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
+static int psp_check_sev_support(struct psp_device *psp,
+				 unsigned int capability)
 {
-	struct sev_user_data_pdh_cert_export input;
-	void *pdh_blob = NULL, *cert_blob = NULL;
-	struct sev_data_pdh_cert_export *data;
-	int ret;
-
-	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
-		return -EFAULT;
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	/* Userspace wants to query the certificate length. */
-	if (!input.pdh_cert_address ||
-	    !input.pdh_cert_len ||
-	    !input.cert_chain_address)
-		goto cmd;
-
-	/* Allocate a physically contiguous buffer to store the PDH blob. */
-	if ((input.pdh_cert_len > SEV_FW_BLOB_MAX_SIZE) ||
-	    !access_ok(input.pdh_cert_address, input.pdh_cert_len)) {
-		ret = -EFAULT;
-		goto e_free;
-	}
-
-	/* Allocate a physically contiguous buffer to store the cert chain blob. */
-	if ((input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE) ||
-	    !access_ok(input.cert_chain_address, input.cert_chain_len)) {
-		ret = -EFAULT;
-		goto e_free;
-	}
-
-	pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL);
-	if (!pdh_blob) {
-		ret = -ENOMEM;
-		goto e_free;
-	}
-
-	data->pdh_cert_address = __psp_pa(pdh_blob);
-	data->pdh_cert_len = input.pdh_cert_len;
-
-	cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL);
-	if (!cert_blob) {
-		ret = -ENOMEM;
-		goto e_free_pdh;
-	}
-
-	data->cert_chain_address = __psp_pa(cert_blob);
-	data->cert_chain_len = input.cert_chain_len;
-
-cmd:
-	/* If platform is not in INIT state then transition it to INIT. */
-	if (psp_master->sev_state != SEV_STATE_INIT) {
-		ret = __sev_platform_init_locked(&argp->error);
-		if (ret)
-			goto e_free_cert;
-	}
-
-	ret = __sev_do_cmd_locked(SEV_CMD_PDH_CERT_EXPORT, data, &argp->error);
-
-	/* If we query the length, FW responded with expected data. */
-	input.cert_chain_len = data->cert_chain_len;
-	input.pdh_cert_len = data->pdh_cert_len;
-
-	if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
-		ret = -EFAULT;
-		goto e_free_cert;
-	}
-
-	if (pdh_blob) {
-		if (copy_to_user((void __user *)input.pdh_cert_address,
-				 pdh_blob, input.pdh_cert_len)) {
-			ret = -EFAULT;
-			goto e_free_cert;
-		}
-	}
-
-	if (cert_blob) {
-		if (copy_to_user((void __user *)input.cert_chain_address,
-				 cert_blob, input.cert_chain_len))
-			ret = -EFAULT;
+	/* Check if device supports SEV feature */
+	if (!(capability & 1)) {
+		dev_dbg(psp->dev, "psp does not support SEV\n");
+		return -ENODEV;
 	}
 
-e_free_cert:
-	kfree(cert_blob);
-e_free_pdh:
-	kfree(pdh_blob);
-e_free:
-	kfree(data);
-	return ret;
+	return 0;
 }
 
-static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
+static int psp_check_tee_support(struct psp_device *psp,
+				 unsigned int capability)
 {
-	void __user *argp = (void __user *)arg;
-	struct sev_issue_cmd input;
-	int ret = -EFAULT;
-
-	if (!psp_master)
+	/* Check if device supports TEE feature */
+	if (!(capability & 2)) {
+		dev_dbg(psp->dev, "psp does not support TEE\n");
 		return -ENODEV;
-
-	if (ioctl != SEV_ISSUE_CMD)
-		return -EINVAL;
-
-	if (copy_from_user(&input, argp, sizeof(struct sev_issue_cmd)))
-		return -EFAULT;
-
-	if (input.cmd > SEV_MAX)
-		return -EINVAL;
-
-	mutex_lock(&sev_cmd_mutex);
-
-	switch (input.cmd) {
-
-	case SEV_FACTORY_RESET:
-		ret = sev_ioctl_do_reset(&input);
-		break;
-	case SEV_PLATFORM_STATUS:
-		ret = sev_ioctl_do_platform_status(&input);
-		break;
-	case SEV_PEK_GEN:
-		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input);
-		break;
-	case SEV_PDH_GEN:
-		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input);
-		break;
-	case SEV_PEK_CSR:
-		ret = sev_ioctl_do_pek_csr(&input);
-		break;
-	case SEV_PEK_CERT_IMPORT:
-		ret = sev_ioctl_do_pek_import(&input);
-		break;
-	case SEV_PDH_CERT_EXPORT:
-		ret = sev_ioctl_do_pdh_export(&input);
-		break;
-	case SEV_GET_ID:
-		pr_warn_once("SEV_GET_ID command is deprecated, use SEV_GET_ID2\n");
-		ret = sev_ioctl_do_get_id(&input);
-		break;
-	case SEV_GET_ID2:
-		ret = sev_ioctl_do_get_id2(&input);
-		break;
-	default:
-		ret = -EINVAL;
-		goto out;
 	}
 
-	if (copy_to_user(argp, &input, sizeof(struct sev_issue_cmd)))
-		ret = -EFAULT;
-out:
-	mutex_unlock(&sev_cmd_mutex);
-
-	return ret;
-}
-
-static const struct file_operations sev_fops = {
-	.owner	= THIS_MODULE,
-	.unlocked_ioctl = sev_ioctl,
-};
-
-int sev_platform_status(struct sev_user_data_status *data, int *error)
-{
-	return sev_do_cmd(SEV_CMD_PLATFORM_STATUS, data, error);
-}
-EXPORT_SYMBOL_GPL(sev_platform_status);
-
-int sev_guest_deactivate(struct sev_data_deactivate *data, int *error)
-{
-	return sev_do_cmd(SEV_CMD_DEACTIVATE, data, error);
-}
-EXPORT_SYMBOL_GPL(sev_guest_deactivate);
-
-int sev_guest_activate(struct sev_data_activate *data, int *error)
-{
-	return sev_do_cmd(SEV_CMD_ACTIVATE, data, error);
-}
-EXPORT_SYMBOL_GPL(sev_guest_activate);
-
-int sev_guest_decommission(struct sev_data_decommission *data, int *error)
-{
-	return sev_do_cmd(SEV_CMD_DECOMMISSION, data, error);
+	return 0;
 }
-EXPORT_SYMBOL_GPL(sev_guest_decommission);
 
-int sev_guest_df_flush(int *error)
+static int psp_check_support(struct psp_device *psp,
+			     unsigned int capability)
 {
-	return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error);
-}
-EXPORT_SYMBOL_GPL(sev_guest_df_flush);
+	int sev_support = psp_check_sev_support(psp, capability);
+	int tee_support = psp_check_tee_support(psp, capability);
 
-static void sev_exit(struct kref *ref)
-{
-	struct sev_misc_dev *misc_dev = container_of(ref, struct sev_misc_dev, refcount);
+	/* Return error if device neither supports SEV nor TEE */
+	if (sev_support && tee_support)
+		return -ENODEV;
 
-	misc_deregister(&misc_dev->misc);
+	return 0;
 }
 
-static int sev_misc_init(struct psp_device *psp)
+static int psp_init(struct psp_device *psp, unsigned int capability)
 {
-	struct device *dev = psp->dev;
 	int ret;
 
-	/*
-	 * SEV feature support can be detected on multiple devices but the SEV
-	 * FW commands must be issued on the master. During probe, we do not
-	 * know the master hence we create /dev/sev on the first device probe.
-	 * sev_do_cmd() finds the right master device to which to issue the
-	 * command to the firmware.
-	 */
-	if (!misc_dev) {
-		struct miscdevice *misc;
-
-		misc_dev = devm_kzalloc(dev, sizeof(*misc_dev), GFP_KERNEL);
-		if (!misc_dev)
-			return -ENOMEM;
-
-		misc = &misc_dev->misc;
-		misc->minor = MISC_DYNAMIC_MINOR;
-		misc->name = DEVICE_NAME;
-		misc->fops = &sev_fops;
-
-		ret = misc_register(misc);
+	if (!psp_check_sev_support(psp, capability)) {
+		ret = sev_dev_init(psp);
 		if (ret)
 			return ret;
-
-		kref_init(&misc_dev->refcount);
-	} else {
-		kref_get(&misc_dev->refcount);
 	}
 
-	init_waitqueue_head(&psp->sev_int_queue);
-	psp->sev_misc = misc_dev;
-	dev_dbg(dev, "registered SEV device\n");
-
-	return 0;
-}
-
-static int psp_check_sev_support(struct psp_device *psp)
-{
-	/* Check if device supports SEV feature */
-	if (!(ioread32(psp->io_regs + psp->vdata->feature_reg) & 1)) {
-		dev_dbg(psp->dev, "psp does not support SEV\n");
-		return -ENODEV;
+	if (!psp_check_tee_support(psp, capability)) {
+		ret = tee_dev_init(psp);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -942,6 +136,7 @@ int psp_dev_init(struct sp_device *sp)
 {
 	struct device *dev = sp->dev;
 	struct psp_device *psp;
+	unsigned int capability;
 	int ret;
 
 	ret = -ENOMEM;
@@ -960,7 +155,11 @@ int psp_dev_init(struct sp_device *sp)
 
 	psp->io_regs = sp->io_map;
 
-	ret = psp_check_sev_support(psp);
+	capability = psp_get_capability(psp);
+	if (!capability)
+		goto e_disable;
+
+	ret = psp_check_support(psp, capability);
 	if (ret)
 		goto e_disable;
 
@@ -975,7 +174,7 @@ int psp_dev_init(struct sp_device *sp)
 		goto e_err;
 	}
 
-	ret = sev_misc_init(psp);
+	ret = psp_init(psp, capability);
 	if (ret)
 		goto e_irq;
 
@@ -1011,71 +210,52 @@ void psp_dev_destroy(struct sp_device *sp)
 	if (!psp)
 		return;
 
-	if (psp->sev_misc)
-		kref_put(&misc_dev->refcount, sev_exit);
+	sev_dev_destroy(psp);
+
+	tee_dev_destroy(psp);
 
 	sp_free_psp_irq(sp, psp);
 }
 
-int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd,
-				void *data, int *error)
+void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler,
+			     void *data)
 {
-	if (!filep || filep->f_op != &sev_fops)
-		return -EBADF;
-
-	return  sev_do_cmd(cmd, data, error);
+	psp->sev_irq_data = data;
+	psp->sev_irq_handler = handler;
 }
-EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
 
-void psp_pci_init(void)
+void psp_clear_sev_irq_handler(struct psp_device *psp)
 {
-	struct sp_device *sp;
-	int error, rc;
-
-	sp = sp_get_psp_master_device();
-	if (!sp)
-		return;
-
-	psp_master = sp->psp_data;
+	psp_set_sev_irq_handler(psp, NULL, NULL);
+}
 
-	psp_timeout = psp_probe_timeout;
+void psp_set_tee_irq_handler(struct psp_device *psp, psp_irq_handler_t handler,
+			     void *data)
+{
+	psp->tee_irq_data = data;
+	psp->tee_irq_handler = handler;
+}
 
-	if (sev_get_api_version())
-		goto err;
+void psp_clear_tee_irq_handler(struct psp_device *psp)
+{
+	psp_set_tee_irq_handler(psp, NULL, NULL);
+}
 
-	/*
-	 * If platform is not in UNINIT state then firmware upgrade and/or
-	 * platform INIT command will fail. These command require UNINIT state.
-	 *
-	 * In a normal boot we should never run into case where the firmware
-	 * is not in UNINIT state on boot. But in case of kexec boot, a reboot
-	 * may not go through a typical shutdown sequence and may leave the
-	 * firmware in INIT or WORKING state.
-	 */
+struct psp_device *psp_get_master_device(void)
+{
+	struct sp_device *sp = sp_get_psp_master_device();
 
-	if (psp_master->sev_state != SEV_STATE_UNINIT) {
-		sev_platform_shutdown(NULL);
-		psp_master->sev_state = SEV_STATE_UNINIT;
-	}
+	return sp ? sp->psp_data : NULL;
+}
 
-	if (sev_version_greater_or_equal(0, 15) &&
-	    sev_update_firmware(psp_master->dev) == 0)
-		sev_get_api_version();
+void psp_pci_init(void)
+{
+	psp_master = psp_get_master_device();
 
-	/* Initialize the platform */
-	rc = sev_platform_init(&error);
-	if (rc) {
-		dev_err(sp->dev, "SEV: failed to INIT error %#x\n", error);
+	if (!psp_master)
 		return;
-	}
-
-	dev_info(sp->dev, "SEV API:%d.%d build:%d\n", psp_master->api_major,
-		 psp_master->api_minor, psp_master->build);
-
-	return;
 
-err:
-	psp_master = NULL;
+	sev_pci_init();
 }
 
 void psp_pci_exit(void)
@@ -1083,5 +263,5 @@ void psp_pci_exit(void)
 	if (!psp_master)
 		return;
 
-	sev_platform_shutdown(NULL);
+	sev_pci_exit();
 }
diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h
index 82a084f02990..ef38e4135d81 100644
--- a/drivers/crypto/ccp/psp-dev.h
+++ b/drivers/crypto/ccp/psp-dev.h
@@ -2,7 +2,7 @@
 /*
  * AMD Platform Security Processor (PSP) interface driver
  *
- * Copyright (C) 2017-2018 Advanced Micro Devices, Inc.
+ * Copyright (C) 2017-2019 Advanced Micro Devices, Inc.
  *
  * Author: Brijesh Singh <brijesh.singh@amd.com>
  */
@@ -11,34 +11,20 @@
 #define __PSP_DEV_H__
 
 #include <linux/device.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
 #include <linux/list.h>
-#include <linux/wait.h>
-#include <linux/dmapool.h>
-#include <linux/hw_random.h>
-#include <linux/bitops.h>
+#include <linux/bits.h>
 #include <linux/interrupt.h>
-#include <linux/irqreturn.h>
-#include <linux/dmaengine.h>
-#include <linux/psp-sev.h>
-#include <linux/miscdevice.h>
 
 #include "sp-dev.h"
 
-#define PSP_CMD_COMPLETE		BIT(1)
-
-#define PSP_CMDRESP_CMD_SHIFT		16
-#define PSP_CMDRESP_IOC			BIT(0)
 #define PSP_CMDRESP_RESP		BIT(31)
 #define PSP_CMDRESP_ERR_MASK		0xffff
 
 #define MAX_PSP_NAME_LEN		16
 
-struct sev_misc_dev {
-	struct kref refcount;
-	struct miscdevice misc;
-};
+extern struct psp_device *psp_master;
+
+typedef void (*psp_irq_handler_t)(int, void *, unsigned int);
 
 struct psp_device {
 	struct list_head entry;
@@ -51,16 +37,24 @@ struct psp_device {
 
 	void __iomem *io_regs;
 
-	int sev_state;
-	unsigned int sev_int_rcvd;
-	wait_queue_head_t sev_int_queue;
-	struct sev_misc_dev *sev_misc;
-	struct sev_user_data_status status_cmd_buf;
-	struct sev_data_init init_cmd_buf;
+	psp_irq_handler_t sev_irq_handler;
+	void *sev_irq_data;
+
+	psp_irq_handler_t tee_irq_handler;
+	void *tee_irq_data;
 
-	u8 api_major;
-	u8 api_minor;
-	u8 build;
+	void *sev_data;
+	void *tee_data;
 };
 
+void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler,
+			     void *data);
+void psp_clear_sev_irq_handler(struct psp_device *psp);
+
+void psp_set_tee_irq_handler(struct psp_device *psp, psp_irq_handler_t handler,
+			     void *data);
+void psp_clear_tee_irq_handler(struct psp_device *psp);
+
+struct psp_device *psp_get_master_device(void);
+
 #endif /* __PSP_DEV_H */
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
new file mode 100644
index 000000000000..e467860f797d
--- /dev/null
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -0,0 +1,1077 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Secure Encrypted Virtualization (SEV) interface
+ *
+ * Copyright (C) 2016,2019 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/ccp.h>
+#include <linux/firmware.h>
+
+#include <asm/smp.h>
+
+#include "psp-dev.h"
+#include "sev-dev.h"
+
+#define DEVICE_NAME		"sev"
+#define SEV_FW_FILE		"amd/sev.fw"
+#define SEV_FW_NAME_SIZE	64
+
+static DEFINE_MUTEX(sev_cmd_mutex);
+static struct sev_misc_dev *misc_dev;
+
+static int psp_cmd_timeout = 100;
+module_param(psp_cmd_timeout, int, 0644);
+MODULE_PARM_DESC(psp_cmd_timeout, " default timeout value, in seconds, for PSP commands");
+
+static int psp_probe_timeout = 5;
+module_param(psp_probe_timeout, int, 0644);
+MODULE_PARM_DESC(psp_probe_timeout, " default timeout value, in seconds, during PSP device probe");
+
+static bool psp_dead;
+static int psp_timeout;
+
+static inline bool sev_version_greater_or_equal(u8 maj, u8 min)
+{
+	struct sev_device *sev = psp_master->sev_data;
+
+	if (sev->api_major > maj)
+		return true;
+
+	if (sev->api_major == maj && sev->api_minor >= min)
+		return true;
+
+	return false;
+}
+
+static void sev_irq_handler(int irq, void *data, unsigned int status)
+{
+	struct sev_device *sev = data;
+	int reg;
+
+	/* Check if it is command completion: */
+	if (!(status & SEV_CMD_COMPLETE))
+		return;
+
+	/* Check if it is SEV command completion: */
+	reg = ioread32(sev->io_regs + sev->vdata->cmdresp_reg);
+	if (reg & PSP_CMDRESP_RESP) {
+		sev->int_rcvd = 1;
+		wake_up(&sev->int_queue);
+	}
+}
+
+static int sev_wait_cmd_ioc(struct sev_device *sev,
+			    unsigned int *reg, unsigned int timeout)
+{
+	int ret;
+
+	ret = wait_event_timeout(sev->int_queue,
+			sev->int_rcvd, timeout * HZ);
+	if (!ret)
+		return -ETIMEDOUT;
+
+	*reg = ioread32(sev->io_regs + sev->vdata->cmdresp_reg);
+
+	return 0;
+}
+
+static int sev_cmd_buffer_len(int cmd)
+{
+	switch (cmd) {
+	case SEV_CMD_INIT:			return sizeof(struct sev_data_init);
+	case SEV_CMD_PLATFORM_STATUS:		return sizeof(struct sev_user_data_status);
+	case SEV_CMD_PEK_CSR:			return sizeof(struct sev_data_pek_csr);
+	case SEV_CMD_PEK_CERT_IMPORT:		return sizeof(struct sev_data_pek_cert_import);
+	case SEV_CMD_PDH_CERT_EXPORT:		return sizeof(struct sev_data_pdh_cert_export);
+	case SEV_CMD_LAUNCH_START:		return sizeof(struct sev_data_launch_start);
+	case SEV_CMD_LAUNCH_UPDATE_DATA:	return sizeof(struct sev_data_launch_update_data);
+	case SEV_CMD_LAUNCH_UPDATE_VMSA:	return sizeof(struct sev_data_launch_update_vmsa);
+	case SEV_CMD_LAUNCH_FINISH:		return sizeof(struct sev_data_launch_finish);
+	case SEV_CMD_LAUNCH_MEASURE:		return sizeof(struct sev_data_launch_measure);
+	case SEV_CMD_ACTIVATE:			return sizeof(struct sev_data_activate);
+	case SEV_CMD_DEACTIVATE:		return sizeof(struct sev_data_deactivate);
+	case SEV_CMD_DECOMMISSION:		return sizeof(struct sev_data_decommission);
+	case SEV_CMD_GUEST_STATUS:		return sizeof(struct sev_data_guest_status);
+	case SEV_CMD_DBG_DECRYPT:		return sizeof(struct sev_data_dbg);
+	case SEV_CMD_DBG_ENCRYPT:		return sizeof(struct sev_data_dbg);
+	case SEV_CMD_SEND_START:		return sizeof(struct sev_data_send_start);
+	case SEV_CMD_SEND_UPDATE_DATA:		return sizeof(struct sev_data_send_update_data);
+	case SEV_CMD_SEND_UPDATE_VMSA:		return sizeof(struct sev_data_send_update_vmsa);
+	case SEV_CMD_SEND_FINISH:		return sizeof(struct sev_data_send_finish);
+	case SEV_CMD_RECEIVE_START:		return sizeof(struct sev_data_receive_start);
+	case SEV_CMD_RECEIVE_FINISH:		return sizeof(struct sev_data_receive_finish);
+	case SEV_CMD_RECEIVE_UPDATE_DATA:	return sizeof(struct sev_data_receive_update_data);
+	case SEV_CMD_RECEIVE_UPDATE_VMSA:	return sizeof(struct sev_data_receive_update_vmsa);
+	case SEV_CMD_LAUNCH_UPDATE_SECRET:	return sizeof(struct sev_data_launch_secret);
+	case SEV_CMD_DOWNLOAD_FIRMWARE:		return sizeof(struct sev_data_download_firmware);
+	case SEV_CMD_GET_ID:			return sizeof(struct sev_data_get_id);
+	default:				return 0;
+	}
+
+	return 0;
+}
+
+static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
+{
+	struct psp_device *psp = psp_master;
+	struct sev_device *sev;
+	unsigned int phys_lsb, phys_msb;
+	unsigned int reg, ret = 0;
+
+	if (!psp || !psp->sev_data)
+		return -ENODEV;
+
+	if (psp_dead)
+		return -EBUSY;
+
+	sev = psp->sev_data;
+
+	/* Get the physical address of the command buffer */
+	phys_lsb = data ? lower_32_bits(__psp_pa(data)) : 0;
+	phys_msb = data ? upper_32_bits(__psp_pa(data)) : 0;
+
+	dev_dbg(sev->dev, "sev command id %#x buffer 0x%08x%08x timeout %us\n",
+		cmd, phys_msb, phys_lsb, psp_timeout);
+
+	print_hex_dump_debug("(in):  ", DUMP_PREFIX_OFFSET, 16, 2, data,
+			     sev_cmd_buffer_len(cmd), false);
+
+	iowrite32(phys_lsb, sev->io_regs + sev->vdata->cmdbuff_addr_lo_reg);
+	iowrite32(phys_msb, sev->io_regs + sev->vdata->cmdbuff_addr_hi_reg);
+
+	sev->int_rcvd = 0;
+
+	reg = cmd;
+	reg <<= SEV_CMDRESP_CMD_SHIFT;
+	reg |= SEV_CMDRESP_IOC;
+	iowrite32(reg, sev->io_regs + sev->vdata->cmdresp_reg);
+
+	/* wait for command completion */
+	ret = sev_wait_cmd_ioc(sev, &reg, psp_timeout);
+	if (ret) {
+		if (psp_ret)
+			*psp_ret = 0;
+
+		dev_err(sev->dev, "sev command %#x timed out, disabling PSP\n", cmd);
+		psp_dead = true;
+
+		return ret;
+	}
+
+	psp_timeout = psp_cmd_timeout;
+
+	if (psp_ret)
+		*psp_ret = reg & PSP_CMDRESP_ERR_MASK;
+
+	if (reg & PSP_CMDRESP_ERR_MASK) {
+		dev_dbg(sev->dev, "sev command %#x failed (%#010x)\n",
+			cmd, reg & PSP_CMDRESP_ERR_MASK);
+		ret = -EIO;
+	}
+
+	print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,
+			     sev_cmd_buffer_len(cmd), false);
+
+	return ret;
+}
+
+static int sev_do_cmd(int cmd, void *data, int *psp_ret)
+{
+	int rc;
+
+	mutex_lock(&sev_cmd_mutex);
+	rc = __sev_do_cmd_locked(cmd, data, psp_ret);
+	mutex_unlock(&sev_cmd_mutex);
+
+	return rc;
+}
+
+static int __sev_platform_init_locked(int *error)
+{
+	struct psp_device *psp = psp_master;
+	struct sev_device *sev;
+	int rc = 0;
+
+	if (!psp || !psp->sev_data)
+		return -ENODEV;
+
+	sev = psp->sev_data;
+
+	if (sev->state == SEV_STATE_INIT)
+		return 0;
+
+	rc = __sev_do_cmd_locked(SEV_CMD_INIT, &sev->init_cmd_buf, error);
+	if (rc)
+		return rc;
+
+	sev->state = SEV_STATE_INIT;
+
+	/* Prepare for first SEV guest launch after INIT */
+	wbinvd_on_all_cpus();
+	rc = __sev_do_cmd_locked(SEV_CMD_DF_FLUSH, NULL, error);
+	if (rc)
+		return rc;
+
+	dev_dbg(sev->dev, "SEV firmware initialized\n");
+
+	return rc;
+}
+
+int sev_platform_init(int *error)
+{
+	int rc;
+
+	mutex_lock(&sev_cmd_mutex);
+	rc = __sev_platform_init_locked(error);
+	mutex_unlock(&sev_cmd_mutex);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(sev_platform_init);
+
+static int __sev_platform_shutdown_locked(int *error)
+{
+	struct sev_device *sev = psp_master->sev_data;
+	int ret;
+
+	ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
+	if (ret)
+		return ret;
+
+	sev->state = SEV_STATE_UNINIT;
+	dev_dbg(sev->dev, "SEV firmware shutdown\n");
+
+	return ret;
+}
+
+static int sev_platform_shutdown(int *error)
+{
+	int rc;
+
+	mutex_lock(&sev_cmd_mutex);
+	rc = __sev_platform_shutdown_locked(NULL);
+	mutex_unlock(&sev_cmd_mutex);
+
+	return rc;
+}
+
+static int sev_get_platform_state(int *state, int *error)
+{
+	struct sev_device *sev = psp_master->sev_data;
+	int rc;
+
+	rc = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS,
+				 &sev->status_cmd_buf, error);
+	if (rc)
+		return rc;
+
+	*state = sev->status_cmd_buf.state;
+	return rc;
+}
+
+static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
+{
+	int state, rc;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/*
+	 * The SEV spec requires that FACTORY_RESET must be issued in
+	 * UNINIT state. Before we go further lets check if any guest is
+	 * active.
+	 *
+	 * If FW is in WORKING state then deny the request otherwise issue
+	 * SHUTDOWN command do INIT -> UNINIT before issuing the FACTORY_RESET.
+	 *
+	 */
+	rc = sev_get_platform_state(&state, &argp->error);
+	if (rc)
+		return rc;
+
+	if (state == SEV_STATE_WORKING)
+		return -EBUSY;
+
+	if (state == SEV_STATE_INIT) {
+		rc = __sev_platform_shutdown_locked(&argp->error);
+		if (rc)
+			return rc;
+	}
+
+	return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error);
+}
+
+static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
+{
+	struct sev_device *sev = psp_master->sev_data;
+	struct sev_user_data_status *data = &sev->status_cmd_buf;
+	int ret;
+
+	ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, data, &argp->error);
+	if (ret)
+		return ret;
+
+	if (copy_to_user((void __user *)argp->data, data, sizeof(*data)))
+		ret = -EFAULT;
+
+	return ret;
+}
+
+static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
+{
+	struct sev_device *sev = psp_master->sev_data;
+	int rc;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (sev->state == SEV_STATE_UNINIT) {
+		rc = __sev_platform_init_locked(&argp->error);
+		if (rc)
+			return rc;
+	}
+
+	return __sev_do_cmd_locked(cmd, NULL, &argp->error);
+}
+
+static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
+{
+	struct sev_device *sev = psp_master->sev_data;
+	struct sev_user_data_pek_csr input;
+	struct sev_data_pek_csr *data;
+	void *blob = NULL;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
+		return -EFAULT;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* userspace wants to query CSR length */
+	if (!input.address || !input.length)
+		goto cmd;
+
+	/* allocate a physically contiguous buffer to store the CSR blob */
+	if (!access_ok(input.address, input.length) ||
+	    input.length > SEV_FW_BLOB_MAX_SIZE) {
+		ret = -EFAULT;
+		goto e_free;
+	}
+
+	blob = kmalloc(input.length, GFP_KERNEL);
+	if (!blob) {
+		ret = -ENOMEM;
+		goto e_free;
+	}
+
+	data->address = __psp_pa(blob);
+	data->len = input.length;
+
+cmd:
+	if (sev->state == SEV_STATE_UNINIT) {
+		ret = __sev_platform_init_locked(&argp->error);
+		if (ret)
+			goto e_free_blob;
+	}
+
+	ret = __sev_do_cmd_locked(SEV_CMD_PEK_CSR, data, &argp->error);
+
+	 /* If we query the CSR length, FW responded with expected data. */
+	input.length = data->len;
+
+	if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
+		ret = -EFAULT;
+		goto e_free_blob;
+	}
+
+	if (blob) {
+		if (copy_to_user((void __user *)input.address, blob, input.length))
+			ret = -EFAULT;
+	}
+
+e_free_blob:
+	kfree(blob);
+e_free:
+	kfree(data);
+	return ret;
+}
+
+void *psp_copy_user_blob(u64 __user uaddr, u32 len)
+{
+	if (!uaddr || !len)
+		return ERR_PTR(-EINVAL);
+
+	/* verify that blob length does not exceed our limit */
+	if (len > SEV_FW_BLOB_MAX_SIZE)
+		return ERR_PTR(-EINVAL);
+
+	return memdup_user((void __user *)(uintptr_t)uaddr, len);
+}
+EXPORT_SYMBOL_GPL(psp_copy_user_blob);
+
+static int sev_get_api_version(void)
+{
+	struct sev_device *sev = psp_master->sev_data;
+	struct sev_user_data_status *status;
+	int error = 0, ret;
+
+	status = &sev->status_cmd_buf;
+	ret = sev_platform_status(status, &error);
+	if (ret) {
+		dev_err(sev->dev,
+			"SEV: failed to get status. Error: %#x\n", error);
+		return 1;
+	}
+
+	sev->api_major = status->api_major;
+	sev->api_minor = status->api_minor;
+	sev->build = status->build;
+	sev->state = status->state;
+
+	return 0;
+}
+
+static int sev_get_firmware(struct device *dev,
+			    const struct firmware **firmware)
+{
+	char fw_name_specific[SEV_FW_NAME_SIZE];
+	char fw_name_subset[SEV_FW_NAME_SIZE];
+
+	snprintf(fw_name_specific, sizeof(fw_name_specific),
+		 "amd/amd_sev_fam%.2xh_model%.2xh.sbin",
+		 boot_cpu_data.x86, boot_cpu_data.x86_model);
+
+	snprintf(fw_name_subset, sizeof(fw_name_subset),
+		 "amd/amd_sev_fam%.2xh_model%.1xxh.sbin",
+		 boot_cpu_data.x86, (boot_cpu_data.x86_model & 0xf0) >> 4);
+
+	/* Check for SEV FW for a particular model.
+	 * Ex. amd_sev_fam17h_model00h.sbin for Family 17h Model 00h
+	 *
+	 * or
+	 *
+	 * Check for SEV FW common to a subset of models.
+	 * Ex. amd_sev_fam17h_model0xh.sbin for
+	 *     Family 17h Model 00h -- Family 17h Model 0Fh
+	 *
+	 * or
+	 *
+	 * Fall-back to using generic name: sev.fw
+	 */
+	if ((firmware_request_nowarn(firmware, fw_name_specific, dev) >= 0) ||
+	    (firmware_request_nowarn(firmware, fw_name_subset, dev) >= 0) ||
+	    (firmware_request_nowarn(firmware, SEV_FW_FILE, dev) >= 0))
+		return 0;
+
+	return -ENOENT;
+}
+
+/* Don't fail if SEV FW couldn't be updated. Continue with existing SEV FW */
+static int sev_update_firmware(struct device *dev)
+{
+	struct sev_data_download_firmware *data;
+	const struct firmware *firmware;
+	int ret, error, order;
+	struct page *p;
+	u64 data_size;
+
+	if (sev_get_firmware(dev, &firmware) == -ENOENT) {
+		dev_dbg(dev, "No SEV firmware file present\n");
+		return -1;
+	}
+
+	/*
+	 * SEV FW expects the physical address given to it to be 32
+	 * byte aligned. Memory allocated has structure placed at the
+	 * beginning followed by the firmware being passed to the SEV
+	 * FW. Allocate enough memory for data structure + alignment
+	 * padding + SEV FW.
+	 */
+	data_size = ALIGN(sizeof(struct sev_data_download_firmware), 32);
+
+	order = get_order(firmware->size + data_size);
+	p = alloc_pages(GFP_KERNEL, order);
+	if (!p) {
+		ret = -1;
+		goto fw_err;
+	}
+
+	/*
+	 * Copy firmware data to a kernel allocated contiguous
+	 * memory region.
+	 */
+	data = page_address(p);
+	memcpy(page_address(p) + data_size, firmware->data, firmware->size);
+
+	data->address = __psp_pa(page_address(p) + data_size);
+	data->len = firmware->size;
+
+	ret = sev_do_cmd(SEV_CMD_DOWNLOAD_FIRMWARE, data, &error);
+	if (ret)
+		dev_dbg(dev, "Failed to update SEV firmware: %#x\n", error);
+	else
+		dev_info(dev, "SEV firmware update successful\n");
+
+	__free_pages(p, order);
+
+fw_err:
+	release_firmware(firmware);
+
+	return ret;
+}
+
+static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
+{
+	struct sev_device *sev = psp_master->sev_data;
+	struct sev_user_data_pek_cert_import input;
+	struct sev_data_pek_cert_import *data;
+	void *pek_blob, *oca_blob;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
+		return -EFAULT;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* copy PEK certificate blobs from userspace */
+	pek_blob = psp_copy_user_blob(input.pek_cert_address, input.pek_cert_len);
+	if (IS_ERR(pek_blob)) {
+		ret = PTR_ERR(pek_blob);
+		goto e_free;
+	}
+
+	data->pek_cert_address = __psp_pa(pek_blob);
+	data->pek_cert_len = input.pek_cert_len;
+
+	/* copy PEK certificate blobs from userspace */
+	oca_blob = psp_copy_user_blob(input.oca_cert_address, input.oca_cert_len);
+	if (IS_ERR(oca_blob)) {
+		ret = PTR_ERR(oca_blob);
+		goto e_free_pek;
+	}
+
+	data->oca_cert_address = __psp_pa(oca_blob);
+	data->oca_cert_len = input.oca_cert_len;
+
+	/* If platform is not in INIT state then transition it to INIT */
+	if (sev->state != SEV_STATE_INIT) {
+		ret = __sev_platform_init_locked(&argp->error);
+		if (ret)
+			goto e_free_oca;
+	}
+
+	ret = __sev_do_cmd_locked(SEV_CMD_PEK_CERT_IMPORT, data, &argp->error);
+
+e_free_oca:
+	kfree(oca_blob);
+e_free_pek:
+	kfree(pek_blob);
+e_free:
+	kfree(data);
+	return ret;
+}
+
+static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp)
+{
+	struct sev_user_data_get_id2 input;
+	struct sev_data_get_id *data;
+	void *id_blob = NULL;
+	int ret;
+
+	/* SEV GET_ID is available from SEV API v0.16 and up */
+	if (!sev_version_greater_or_equal(0, 16))
+		return -ENOTSUPP;
+
+	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
+		return -EFAULT;
+
+	/* Check if we have write access to the userspace buffer */
+	if (input.address &&
+	    input.length &&
+	    !access_ok(input.address, input.length))
+		return -EFAULT;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	if (input.address && input.length) {
+		id_blob = kmalloc(input.length, GFP_KERNEL);
+		if (!id_blob) {
+			kfree(data);
+			return -ENOMEM;
+		}
+
+		data->address = __psp_pa(id_blob);
+		data->len = input.length;
+	}
+
+	ret = __sev_do_cmd_locked(SEV_CMD_GET_ID, data, &argp->error);
+
+	/*
+	 * Firmware will return the length of the ID value (either the minimum
+	 * required length or the actual length written), return it to the user.
+	 */
+	input.length = data->len;
+
+	if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
+		ret = -EFAULT;
+		goto e_free;
+	}
+
+	if (id_blob) {
+		if (copy_to_user((void __user *)input.address,
+				 id_blob, data->len)) {
+			ret = -EFAULT;
+			goto e_free;
+		}
+	}
+
+e_free:
+	kfree(id_blob);
+	kfree(data);
+
+	return ret;
+}
+
+static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp)
+{
+	struct sev_data_get_id *data;
+	u64 data_size, user_size;
+	void *id_blob, *mem;
+	int ret;
+
+	/* SEV GET_ID available from SEV API v0.16 and up */
+	if (!sev_version_greater_or_equal(0, 16))
+		return -ENOTSUPP;
+
+	/* SEV FW expects the buffer it fills with the ID to be
+	 * 8-byte aligned. Memory allocated should be enough to
+	 * hold data structure + alignment padding + memory
+	 * where SEV FW writes the ID.
+	 */
+	data_size = ALIGN(sizeof(struct sev_data_get_id), 8);
+	user_size = sizeof(struct sev_user_data_get_id);
+
+	mem = kzalloc(data_size + user_size, GFP_KERNEL);
+	if (!mem)
+		return -ENOMEM;
+
+	data = mem;
+	id_blob = mem + data_size;
+
+	data->address = __psp_pa(id_blob);
+	data->len = user_size;
+
+	ret = __sev_do_cmd_locked(SEV_CMD_GET_ID, data, &argp->error);
+	if (!ret) {
+		if (copy_to_user((void __user *)argp->data, id_blob, data->len))
+			ret = -EFAULT;
+	}
+
+	kfree(mem);
+
+	return ret;
+}
+
+static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
+{
+	struct sev_device *sev = psp_master->sev_data;
+	struct sev_user_data_pdh_cert_export input;
+	void *pdh_blob = NULL, *cert_blob = NULL;
+	struct sev_data_pdh_cert_export *data;
+	int ret;
+
+	/* If platform is not in INIT state then transition it to INIT. */
+	if (sev->state != SEV_STATE_INIT) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		ret = __sev_platform_init_locked(&argp->error);
+		if (ret)
+			return ret;
+	}
+
+	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
+		return -EFAULT;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* Userspace wants to query the certificate length. */
+	if (!input.pdh_cert_address ||
+	    !input.pdh_cert_len ||
+	    !input.cert_chain_address)
+		goto cmd;
+
+	/* Allocate a physically contiguous buffer to store the PDH blob. */
+	if ((input.pdh_cert_len > SEV_FW_BLOB_MAX_SIZE) ||
+	    !access_ok(input.pdh_cert_address, input.pdh_cert_len)) {
+		ret = -EFAULT;
+		goto e_free;
+	}
+
+	/* Allocate a physically contiguous buffer to store the cert chain blob. */
+	if ((input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE) ||
+	    !access_ok(input.cert_chain_address, input.cert_chain_len)) {
+		ret = -EFAULT;
+		goto e_free;
+	}
+
+	pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL);
+	if (!pdh_blob) {
+		ret = -ENOMEM;
+		goto e_free;
+	}
+
+	data->pdh_cert_address = __psp_pa(pdh_blob);
+	data->pdh_cert_len = input.pdh_cert_len;
+
+	cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL);
+	if (!cert_blob) {
+		ret = -ENOMEM;
+		goto e_free_pdh;
+	}
+
+	data->cert_chain_address = __psp_pa(cert_blob);
+	data->cert_chain_len = input.cert_chain_len;
+
+cmd:
+	ret = __sev_do_cmd_locked(SEV_CMD_PDH_CERT_EXPORT, data, &argp->error);
+
+	/* If we query the length, FW responded with expected data. */
+	input.cert_chain_len = data->cert_chain_len;
+	input.pdh_cert_len = data->pdh_cert_len;
+
+	if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
+		ret = -EFAULT;
+		goto e_free_cert;
+	}
+
+	if (pdh_blob) {
+		if (copy_to_user((void __user *)input.pdh_cert_address,
+				 pdh_blob, input.pdh_cert_len)) {
+			ret = -EFAULT;
+			goto e_free_cert;
+		}
+	}
+
+	if (cert_blob) {
+		if (copy_to_user((void __user *)input.cert_chain_address,
+				 cert_blob, input.cert_chain_len))
+			ret = -EFAULT;
+	}
+
+e_free_cert:
+	kfree(cert_blob);
+e_free_pdh:
+	kfree(pdh_blob);
+e_free:
+	kfree(data);
+	return ret;
+}
+
+static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct sev_issue_cmd input;
+	int ret = -EFAULT;
+
+	if (!psp_master || !psp_master->sev_data)
+		return -ENODEV;
+
+	if (ioctl != SEV_ISSUE_CMD)
+		return -EINVAL;
+
+	if (copy_from_user(&input, argp, sizeof(struct sev_issue_cmd)))
+		return -EFAULT;
+
+	if (input.cmd > SEV_MAX)
+		return -EINVAL;
+
+	mutex_lock(&sev_cmd_mutex);
+
+	switch (input.cmd) {
+
+	case SEV_FACTORY_RESET:
+		ret = sev_ioctl_do_reset(&input);
+		break;
+	case SEV_PLATFORM_STATUS:
+		ret = sev_ioctl_do_platform_status(&input);
+		break;
+	case SEV_PEK_GEN:
+		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input);
+		break;
+	case SEV_PDH_GEN:
+		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input);
+		break;
+	case SEV_PEK_CSR:
+		ret = sev_ioctl_do_pek_csr(&input);
+		break;
+	case SEV_PEK_CERT_IMPORT:
+		ret = sev_ioctl_do_pek_import(&input);
+		break;
+	case SEV_PDH_CERT_EXPORT:
+		ret = sev_ioctl_do_pdh_export(&input);
+		break;
+	case SEV_GET_ID:
+		pr_warn_once("SEV_GET_ID command is deprecated, use SEV_GET_ID2\n");
+		ret = sev_ioctl_do_get_id(&input);
+		break;
+	case SEV_GET_ID2:
+		ret = sev_ioctl_do_get_id2(&input);
+		break;
+	default:
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (copy_to_user(argp, &input, sizeof(struct sev_issue_cmd)))
+		ret = -EFAULT;
+out:
+	mutex_unlock(&sev_cmd_mutex);
+
+	return ret;
+}
+
+static const struct file_operations sev_fops = {
+	.owner	= THIS_MODULE,
+	.unlocked_ioctl = sev_ioctl,
+};
+
+int sev_platform_status(struct sev_user_data_status *data, int *error)
+{
+	return sev_do_cmd(SEV_CMD_PLATFORM_STATUS, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_platform_status);
+
+int sev_guest_deactivate(struct sev_data_deactivate *data, int *error)
+{
+	return sev_do_cmd(SEV_CMD_DEACTIVATE, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_deactivate);
+
+int sev_guest_activate(struct sev_data_activate *data, int *error)
+{
+	return sev_do_cmd(SEV_CMD_ACTIVATE, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_activate);
+
+int sev_guest_decommission(struct sev_data_decommission *data, int *error)
+{
+	return sev_do_cmd(SEV_CMD_DECOMMISSION, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_decommission);
+
+int sev_guest_df_flush(int *error)
+{
+	return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_df_flush);
+
+static void sev_exit(struct kref *ref)
+{
+	struct sev_misc_dev *misc_dev = container_of(ref, struct sev_misc_dev, refcount);
+
+	misc_deregister(&misc_dev->misc);
+}
+
+static int sev_misc_init(struct sev_device *sev)
+{
+	struct device *dev = sev->dev;
+	int ret;
+
+	/*
+	 * SEV feature support can be detected on multiple devices but the SEV
+	 * FW commands must be issued on the master. During probe, we do not
+	 * know the master hence we create /dev/sev on the first device probe.
+	 * sev_do_cmd() finds the right master device to which to issue the
+	 * command to the firmware.
+	 */
+	if (!misc_dev) {
+		struct miscdevice *misc;
+
+		misc_dev = devm_kzalloc(dev, sizeof(*misc_dev), GFP_KERNEL);
+		if (!misc_dev)
+			return -ENOMEM;
+
+		misc = &misc_dev->misc;
+		misc->minor = MISC_DYNAMIC_MINOR;
+		misc->name = DEVICE_NAME;
+		misc->fops = &sev_fops;
+
+		ret = misc_register(misc);
+		if (ret)
+			return ret;
+
+		kref_init(&misc_dev->refcount);
+	} else {
+		kref_get(&misc_dev->refcount);
+	}
+
+	init_waitqueue_head(&sev->int_queue);
+	sev->misc = misc_dev;
+	dev_dbg(dev, "registered SEV device\n");
+
+	return 0;
+}
+
+int sev_dev_init(struct psp_device *psp)
+{
+	struct device *dev = psp->dev;
+	struct sev_device *sev;
+	int ret = -ENOMEM;
+
+	sev = devm_kzalloc(dev, sizeof(*sev), GFP_KERNEL);
+	if (!sev)
+		goto e_err;
+
+	psp->sev_data = sev;
+
+	sev->dev = dev;
+	sev->psp = psp;
+
+	sev->io_regs = psp->io_regs;
+
+	sev->vdata = (struct sev_vdata *)psp->vdata->sev;
+	if (!sev->vdata) {
+		ret = -ENODEV;
+		dev_err(dev, "sev: missing driver data\n");
+		goto e_err;
+	}
+
+	psp_set_sev_irq_handler(psp, sev_irq_handler, sev);
+
+	ret = sev_misc_init(sev);
+	if (ret)
+		goto e_irq;
+
+	dev_notice(dev, "sev enabled\n");
+
+	return 0;
+
+e_irq:
+	psp_clear_sev_irq_handler(psp);
+e_err:
+	psp->sev_data = NULL;
+
+	dev_notice(dev, "sev initialization failed\n");
+
+	return ret;
+}
+
+void sev_dev_destroy(struct psp_device *psp)
+{
+	struct sev_device *sev = psp->sev_data;
+
+	if (!sev)
+		return;
+
+	if (sev->misc)
+		kref_put(&misc_dev->refcount, sev_exit);
+
+	psp_clear_sev_irq_handler(psp);
+}
+
+int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd,
+				void *data, int *error)
+{
+	if (!filep || filep->f_op != &sev_fops)
+		return -EBADF;
+
+	return sev_do_cmd(cmd, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
+
+void sev_pci_init(void)
+{
+	struct sev_device *sev = psp_master->sev_data;
+	int error, rc;
+
+	if (!sev)
+		return;
+
+	psp_timeout = psp_probe_timeout;
+
+	if (sev_get_api_version())
+		goto err;
+
+	/*
+	 * If platform is not in UNINIT state then firmware upgrade and/or
+	 * platform INIT command will fail. These command require UNINIT state.
+	 *
+	 * In a normal boot we should never run into case where the firmware
+	 * is not in UNINIT state on boot. But in case of kexec boot, a reboot
+	 * may not go through a typical shutdown sequence and may leave the
+	 * firmware in INIT or WORKING state.
+	 */
+
+	if (sev->state != SEV_STATE_UNINIT) {
+		sev_platform_shutdown(NULL);
+		sev->state = SEV_STATE_UNINIT;
+	}
+
+	if (sev_version_greater_or_equal(0, 15) &&
+	    sev_update_firmware(sev->dev) == 0)
+		sev_get_api_version();
+
+	/* Initialize the platform */
+	rc = sev_platform_init(&error);
+	if (rc && (error == SEV_RET_SECURE_DATA_INVALID)) {
+		/*
+		 * INIT command returned an integrity check failure
+		 * status code, meaning that firmware load and
+		 * validation of SEV related persistent data has
+		 * failed and persistent state has been erased.
+		 * Retrying INIT command here should succeed.
+		 */
+		dev_dbg(sev->dev, "SEV: retrying INIT command");
+		rc = sev_platform_init(&error);
+	}
+
+	if (rc) {
+		dev_err(sev->dev, "SEV: failed to INIT error %#x\n", error);
+		return;
+	}
+
+	dev_info(sev->dev, "SEV API:%d.%d build:%d\n", sev->api_major,
+		 sev->api_minor, sev->build);
+
+	return;
+
+err:
+	psp_master->sev_data = NULL;
+}
+
+void sev_pci_exit(void)
+{
+	if (!psp_master->sev_data)
+		return;
+
+	sev_platform_shutdown(NULL);
+}
diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h
new file mode 100644
index 000000000000..dd5c4fe82914
--- /dev/null
+++ b/drivers/crypto/ccp/sev-dev.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AMD Platform Security Processor (PSP) interface driver
+ *
+ * Copyright (C) 2017-2019 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ */
+
+#ifndef __SEV_DEV_H__
+#define __SEV_DEV_H__
+
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+#include <linux/hw_random.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+#include <linux/dmaengine.h>
+#include <linux/psp-sev.h>
+#include <linux/miscdevice.h>
+#include <linux/capability.h>
+
+#define SEV_CMD_COMPLETE		BIT(1)
+#define SEV_CMDRESP_CMD_SHIFT		16
+#define SEV_CMDRESP_IOC			BIT(0)
+
+struct sev_misc_dev {
+	struct kref refcount;
+	struct miscdevice misc;
+};
+
+struct sev_device {
+	struct device *dev;
+	struct psp_device *psp;
+
+	void __iomem *io_regs;
+
+	struct sev_vdata *vdata;
+
+	int state;
+	unsigned int int_rcvd;
+	wait_queue_head_t int_queue;
+	struct sev_misc_dev *misc;
+	struct sev_user_data_status status_cmd_buf;
+	struct sev_data_init init_cmd_buf;
+
+	u8 api_major;
+	u8 api_minor;
+	u8 build;
+};
+
+int sev_dev_init(struct psp_device *psp);
+void sev_dev_destroy(struct psp_device *psp);
+
+void sev_pci_init(void);
+void sev_pci_exit(void);
+
+#endif /* __SEV_DEV_H */
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
index 53c12562d31e..423594608ad1 100644
--- a/drivers/crypto/ccp/sp-dev.h
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -2,7 +2,7 @@
 /*
  * AMD Secure Processor driver
  *
- * Copyright (C) 2017-2018 Advanced Micro Devices, Inc.
+ * Copyright (C) 2017-2019 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
@@ -39,10 +39,23 @@ struct ccp_vdata {
 	const unsigned int rsamax;
 };
 
-struct psp_vdata {
+struct sev_vdata {
 	const unsigned int cmdresp_reg;
 	const unsigned int cmdbuff_addr_lo_reg;
 	const unsigned int cmdbuff_addr_hi_reg;
+};
+
+struct tee_vdata {
+	const unsigned int cmdresp_reg;
+	const unsigned int cmdbuff_addr_lo_reg;
+	const unsigned int cmdbuff_addr_hi_reg;
+	const unsigned int ring_wptr_reg;
+	const unsigned int ring_rptr_reg;
+};
+
+struct psp_vdata {
+	const struct sev_vdata *sev;
+	const struct tee_vdata *tee;
 	const unsigned int feature_reg;
 	const unsigned int inten_reg;
 	const unsigned int intsts_reg;
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index b29d2e663e10..56c1f61c0f84 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -2,7 +2,7 @@
 /*
  * AMD Secure Processor device driver
  *
- * Copyright (C) 2013,2018 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2019 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
@@ -262,19 +262,42 @@ static int sp_pci_resume(struct pci_dev *pdev)
 #endif
 
 #ifdef CONFIG_CRYPTO_DEV_SP_PSP
-static const struct psp_vdata pspv1 = {
+static const struct sev_vdata sevv1 = {
 	.cmdresp_reg		= 0x10580,
 	.cmdbuff_addr_lo_reg	= 0x105e0,
 	.cmdbuff_addr_hi_reg	= 0x105e4,
+};
+
+static const struct sev_vdata sevv2 = {
+	.cmdresp_reg		= 0x10980,
+	.cmdbuff_addr_lo_reg	= 0x109e0,
+	.cmdbuff_addr_hi_reg	= 0x109e4,
+};
+
+static const struct tee_vdata teev1 = {
+	.cmdresp_reg		= 0x10544,
+	.cmdbuff_addr_lo_reg	= 0x10548,
+	.cmdbuff_addr_hi_reg	= 0x1054c,
+	.ring_wptr_reg          = 0x10550,
+	.ring_rptr_reg          = 0x10554,
+};
+
+static const struct psp_vdata pspv1 = {
+	.sev			= &sevv1,
 	.feature_reg		= 0x105fc,
 	.inten_reg		= 0x10610,
 	.intsts_reg		= 0x10614,
 };
 
 static const struct psp_vdata pspv2 = {
-	.cmdresp_reg		= 0x10980,
-	.cmdbuff_addr_lo_reg	= 0x109e0,
-	.cmdbuff_addr_hi_reg	= 0x109e4,
+	.sev			= &sevv2,
+	.feature_reg		= 0x109fc,
+	.inten_reg		= 0x10690,
+	.intsts_reg		= 0x10694,
+};
+
+static const struct psp_vdata pspv3 = {
+	.tee			= &teev1,
 	.feature_reg		= 0x109fc,
 	.inten_reg		= 0x10690,
 	.intsts_reg		= 0x10694,
@@ -312,12 +335,22 @@ static const struct sp_dev_vdata dev_vdata[] = {
 		.psp_vdata = &pspv2,
 #endif
 	},
+	{	/* 4 */
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv5a,
+#endif
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+		.psp_vdata = &pspv3,
+#endif
+	},
 };
 static const struct pci_device_id sp_pci_table[] = {
 	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&dev_vdata[0] },
 	{ PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&dev_vdata[1] },
 	{ PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&dev_vdata[2] },
 	{ PCI_VDEVICE(AMD, 0x1486), (kernel_ulong_t)&dev_vdata[3] },
+	{ PCI_VDEVICE(AMD, 0x15DF), (kernel_ulong_t)&dev_vdata[4] },
 	/* Last entry must be zero */
 	{ 0, }
 };
diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c
new file mode 100644
index 000000000000..5e697a90ea7f
--- /dev/null
+++ b/drivers/crypto/ccp/tee-dev.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: MIT
+/*
+ * AMD Trusted Execution Environment (TEE) interface
+ *
+ * Author: Rijo Thomas <Rijo-john.Thomas@amd.com>
+ * Author: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com>
+ *
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/gfp.h>
+#include <linux/psp-sev.h>
+#include <linux/psp-tee.h>
+
+#include "psp-dev.h"
+#include "tee-dev.h"
+
+static bool psp_dead;
+
+static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size)
+{
+	struct ring_buf_manager *rb_mgr = &tee->rb_mgr;
+	void *start_addr;
+
+	if (!ring_size)
+		return -EINVAL;
+
+	/* We need actual physical address instead of DMA address, since
+	 * Trusted OS running on AMD Secure Processor will map this region
+	 */
+	start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size));
+	if (!start_addr)
+		return -ENOMEM;
+
+	rb_mgr->ring_start = start_addr;
+	rb_mgr->ring_size = ring_size;
+	rb_mgr->ring_pa = __psp_pa(start_addr);
+	mutex_init(&rb_mgr->mutex);
+
+	return 0;
+}
+
+static void tee_free_ring(struct psp_tee_device *tee)
+{
+	struct ring_buf_manager *rb_mgr = &tee->rb_mgr;
+
+	if (!rb_mgr->ring_start)
+		return;
+
+	free_pages((unsigned long)rb_mgr->ring_start,
+		   get_order(rb_mgr->ring_size));
+
+	rb_mgr->ring_start = NULL;
+	rb_mgr->ring_size = 0;
+	rb_mgr->ring_pa = 0;
+	mutex_destroy(&rb_mgr->mutex);
+}
+
+static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout,
+			     unsigned int *reg)
+{
+	/* ~10ms sleep per loop => nloop = timeout * 100 */
+	int nloop = timeout * 100;
+
+	while (--nloop) {
+		*reg = ioread32(tee->io_regs + tee->vdata->cmdresp_reg);
+		if (*reg & PSP_CMDRESP_RESP)
+			return 0;
+
+		usleep_range(10000, 10100);
+	}
+
+	dev_err(tee->dev, "tee: command timed out, disabling PSP\n");
+	psp_dead = true;
+
+	return -ETIMEDOUT;
+}
+
+static
+struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee)
+{
+	struct tee_init_ring_cmd *cmd;
+
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return NULL;
+
+	cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa);
+	cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa);
+	cmd->size = tee->rb_mgr.ring_size;
+
+	dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n",
+		cmd->hi_addr, cmd->low_addr, cmd->size);
+
+	return cmd;
+}
+
+static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd)
+{
+	kfree(cmd);
+}
+
+static int tee_init_ring(struct psp_tee_device *tee)
+{
+	int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd);
+	struct tee_init_ring_cmd *cmd;
+	phys_addr_t cmd_buffer;
+	unsigned int reg;
+	int ret;
+
+	BUILD_BUG_ON(sizeof(struct tee_ring_cmd) != 1024);
+
+	ret = tee_alloc_ring(tee, ring_size);
+	if (ret) {
+		dev_err(tee->dev, "tee: ring allocation failed %d\n", ret);
+		return ret;
+	}
+
+	tee->rb_mgr.wptr = 0;
+
+	cmd = tee_alloc_cmd_buffer(tee);
+	if (!cmd) {
+		tee_free_ring(tee);
+		return -ENOMEM;
+	}
+
+	cmd_buffer = __psp_pa((void *)cmd);
+
+	/* Send command buffer details to Trusted OS by writing to
+	 * CPU-PSP message registers
+	 */
+
+	iowrite32(lower_32_bits(cmd_buffer),
+		  tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg);
+	iowrite32(upper_32_bits(cmd_buffer),
+		  tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg);
+	iowrite32(TEE_RING_INIT_CMD,
+		  tee->io_regs + tee->vdata->cmdresp_reg);
+
+	ret = tee_wait_cmd_poll(tee, TEE_DEFAULT_TIMEOUT, &reg);
+	if (ret) {
+		dev_err(tee->dev, "tee: ring init command timed out\n");
+		tee_free_ring(tee);
+		goto free_buf;
+	}
+
+	if (reg & PSP_CMDRESP_ERR_MASK) {
+		dev_err(tee->dev, "tee: ring init command failed (%#010x)\n",
+			reg & PSP_CMDRESP_ERR_MASK);
+		tee_free_ring(tee);
+		ret = -EIO;
+	}
+
+free_buf:
+	tee_free_cmd_buffer(cmd);
+
+	return ret;
+}
+
+static void tee_destroy_ring(struct psp_tee_device *tee)
+{
+	unsigned int reg;
+	int ret;
+
+	if (!tee->rb_mgr.ring_start)
+		return;
+
+	if (psp_dead)
+		goto free_ring;
+
+	iowrite32(TEE_RING_DESTROY_CMD,
+		  tee->io_regs + tee->vdata->cmdresp_reg);
+
+	ret = tee_wait_cmd_poll(tee, TEE_DEFAULT_TIMEOUT, &reg);
+	if (ret) {
+		dev_err(tee->dev, "tee: ring destroy command timed out\n");
+	} else if (reg & PSP_CMDRESP_ERR_MASK) {
+		dev_err(tee->dev, "tee: ring destroy command failed (%#010x)\n",
+			reg & PSP_CMDRESP_ERR_MASK);
+	}
+
+free_ring:
+	tee_free_ring(tee);
+}
+
+int tee_dev_init(struct psp_device *psp)
+{
+	struct device *dev = psp->dev;
+	struct psp_tee_device *tee;
+	int ret;
+
+	ret = -ENOMEM;
+	tee = devm_kzalloc(dev, sizeof(*tee), GFP_KERNEL);
+	if (!tee)
+		goto e_err;
+
+	psp->tee_data = tee;
+
+	tee->dev = dev;
+	tee->psp = psp;
+
+	tee->io_regs = psp->io_regs;
+
+	tee->vdata = (struct tee_vdata *)psp->vdata->tee;
+	if (!tee->vdata) {
+		ret = -ENODEV;
+		dev_err(dev, "tee: missing driver data\n");
+		goto e_err;
+	}
+
+	ret = tee_init_ring(tee);
+	if (ret) {
+		dev_err(dev, "tee: failed to init ring buffer\n");
+		goto e_err;
+	}
+
+	dev_notice(dev, "tee enabled\n");
+
+	return 0;
+
+e_err:
+	psp->tee_data = NULL;
+
+	dev_notice(dev, "tee initialization failed\n");
+
+	return ret;
+}
+
+void tee_dev_destroy(struct psp_device *psp)
+{
+	struct psp_tee_device *tee = psp->tee_data;
+
+	if (!tee)
+		return;
+
+	tee_destroy_ring(tee);
+}
+
+static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id,
+			  void *buf, size_t len, struct tee_ring_cmd **resp)
+{
+	struct tee_ring_cmd *cmd;
+	u32 rptr, wptr;
+	int nloop = 1000, ret = 0;
+
+	*resp = NULL;
+
+	mutex_lock(&tee->rb_mgr.mutex);
+
+	wptr = tee->rb_mgr.wptr;
+
+	/* Check if ring buffer is full */
+	do {
+		rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg);
+
+		if (!(wptr + sizeof(struct tee_ring_cmd) == rptr))
+			break;
+
+		dev_info(tee->dev, "tee: ring buffer full. rptr = %u wptr = %u\n",
+			 rptr, wptr);
+
+		/* Wait if ring buffer is full */
+		mutex_unlock(&tee->rb_mgr.mutex);
+		schedule_timeout_interruptible(msecs_to_jiffies(10));
+		mutex_lock(&tee->rb_mgr.mutex);
+
+	} while (--nloop);
+
+	if (!nloop && (wptr + sizeof(struct tee_ring_cmd) == rptr)) {
+		dev_err(tee->dev, "tee: ring buffer full. rptr = %u wptr = %u\n",
+			rptr, wptr);
+		ret = -EBUSY;
+		goto unlock;
+	}
+
+	/* Pointer to empty data entry in ring buffer */
+	cmd = (struct tee_ring_cmd *)(tee->rb_mgr.ring_start + wptr);
+
+	/* Write command data into ring buffer */
+	cmd->cmd_id = cmd_id;
+	cmd->cmd_state = TEE_CMD_STATE_INIT;
+	memset(&cmd->buf[0], 0, sizeof(cmd->buf));
+	memcpy(&cmd->buf[0], buf, len);
+
+	/* Update local copy of write pointer */
+	tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd);
+	if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size)
+		tee->rb_mgr.wptr = 0;
+
+	/* Trigger interrupt to Trusted OS */
+	iowrite32(tee->rb_mgr.wptr, tee->io_regs + tee->vdata->ring_wptr_reg);
+
+	/* The response is provided by Trusted OS in same
+	 * location as submitted data entry within ring buffer.
+	 */
+	*resp = cmd;
+
+unlock:
+	mutex_unlock(&tee->rb_mgr.mutex);
+
+	return ret;
+}
+
+static int tee_wait_cmd_completion(struct psp_tee_device *tee,
+				   struct tee_ring_cmd *resp,
+				   unsigned int timeout)
+{
+	/* ~5ms sleep per loop => nloop = timeout * 200 */
+	int nloop = timeout * 200;
+
+	while (--nloop) {
+		if (resp->cmd_state == TEE_CMD_STATE_COMPLETED)
+			return 0;
+
+		usleep_range(5000, 5100);
+	}
+
+	dev_err(tee->dev, "tee: command 0x%x timed out, disabling PSP\n",
+		resp->cmd_id);
+
+	psp_dead = true;
+
+	return -ETIMEDOUT;
+}
+
+int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len,
+			u32 *status)
+{
+	struct psp_device *psp = psp_get_master_device();
+	struct psp_tee_device *tee;
+	struct tee_ring_cmd *resp;
+	int ret;
+
+	if (!buf || !status || !len || len > sizeof(resp->buf))
+		return -EINVAL;
+
+	*status = 0;
+
+	if (!psp || !psp->tee_data)
+		return -ENODEV;
+
+	if (psp_dead)
+		return -EBUSY;
+
+	tee = psp->tee_data;
+
+	ret = tee_submit_cmd(tee, cmd_id, buf, len, &resp);
+	if (ret)
+		return ret;
+
+	ret = tee_wait_cmd_completion(tee, resp, TEE_DEFAULT_TIMEOUT);
+	if (ret)
+		return ret;
+
+	memcpy(buf, &resp->buf[0], len);
+	*status = resp->status;
+
+	return 0;
+}
+EXPORT_SYMBOL(psp_tee_process_cmd);
+
+int psp_check_tee_status(void)
+{
+	struct psp_device *psp = psp_get_master_device();
+
+	if (!psp || !psp->tee_data)
+		return -ENODEV;
+
+	return 0;
+}
+EXPORT_SYMBOL(psp_check_tee_status);
diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h
new file mode 100644
index 000000000000..f09960112115
--- /dev/null
+++ b/drivers/crypto/ccp/tee-dev.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Author: Rijo Thomas <Rijo-john.Thomas@amd.com>
+ * Author: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com>
+ *
+ */
+
+/* This file describes the TEE communication interface between host and AMD
+ * Secure Processor
+ */
+
+#ifndef __TEE_DEV_H__
+#define __TEE_DEV_H__
+
+#include <linux/device.h>
+#include <linux/mutex.h>
+
+#define TEE_DEFAULT_TIMEOUT		10
+#define MAX_BUFFER_SIZE			992
+
+/**
+ * enum tee_ring_cmd_id - TEE interface commands for ring buffer configuration
+ * @TEE_RING_INIT_CMD:		Initialize ring buffer
+ * @TEE_RING_DESTROY_CMD:	Destroy ring buffer
+ * @TEE_RING_MAX_CMD:		Maximum command id
+ */
+enum tee_ring_cmd_id {
+	TEE_RING_INIT_CMD		= 0x00010000,
+	TEE_RING_DESTROY_CMD		= 0x00020000,
+	TEE_RING_MAX_CMD		= 0x000F0000,
+};
+
+/**
+ * struct tee_init_ring_cmd - Command to init TEE ring buffer
+ * @low_addr:  bits [31:0] of the physical address of ring buffer
+ * @hi_addr:   bits [63:32] of the physical address of ring buffer
+ * @size:      size of ring buffer in bytes
+ */
+struct tee_init_ring_cmd {
+	u32 low_addr;
+	u32 hi_addr;
+	u32 size;
+};
+
+#define MAX_RING_BUFFER_ENTRIES		32
+
+/**
+ * struct ring_buf_manager - Helper structure to manage ring buffer.
+ * @ring_start:  starting address of ring buffer
+ * @ring_size:   size of ring buffer in bytes
+ * @ring_pa:     physical address of ring buffer
+ * @wptr:        index to the last written entry in ring buffer
+ */
+struct ring_buf_manager {
+	struct mutex mutex;	/* synchronizes access to ring buffer */
+	void *ring_start;
+	u32 ring_size;
+	phys_addr_t ring_pa;
+	u32 wptr;
+};
+
+struct psp_tee_device {
+	struct device *dev;
+	struct psp_device *psp;
+	void __iomem *io_regs;
+	struct tee_vdata *vdata;
+	struct ring_buf_manager rb_mgr;
+};
+
+/**
+ * enum tee_cmd_state - TEE command states for the ring buffer interface
+ * @TEE_CMD_STATE_INIT:      initial state of command when sent from host
+ * @TEE_CMD_STATE_PROCESS:   command being processed by TEE environment
+ * @TEE_CMD_STATE_COMPLETED: command processing completed
+ */
+enum tee_cmd_state {
+	TEE_CMD_STATE_INIT,
+	TEE_CMD_STATE_PROCESS,
+	TEE_CMD_STATE_COMPLETED,
+};
+
+/**
+ * struct tee_ring_cmd - Structure of the command buffer in TEE ring
+ * @cmd_id:      refers to &enum tee_cmd_id. Command id for the ring buffer
+ *               interface
+ * @cmd_state:   refers to &enum tee_cmd_state
+ * @status:      status of TEE command execution
+ * @res0:        reserved region
+ * @pdata:       private data (currently unused)
+ * @res1:        reserved region
+ * @buf:         TEE command specific buffer
+ */
+struct tee_ring_cmd {
+	u32 cmd_id;
+	u32 cmd_state;
+	u32 status;
+	u32 res0[1];
+	u64 pdata;
+	u32 res1[2];
+	u8 buf[MAX_BUFFER_SIZE];
+
+	/* Total size: 1024 bytes */
+} __packed;
+
+int tee_dev_init(struct psp_device *psp);
+void tee_dev_destroy(struct psp_device *psp);
+
+#endif /* __TEE_DEV_H__ */
diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c
index d3e8faa03f15..2fc0e0da790b 100644
--- a/drivers/crypto/ccree/cc_aead.c
+++ b/drivers/crypto/ccree/cc_aead.c
@@ -237,7 +237,7 @@ static void cc_aead_complete(struct device *dev, void *cc_req, int err)
 			 * revealed the decrypted message --> zero its memory.
 			 */
 			sg_zero_buffer(areq->dst, sg_nents(areq->dst),
-				       areq->cryptlen, 0);
+				       areq->cryptlen, areq->assoclen);
 			err = -EBADMSG;
 		}
 	/*ENCRYPT*/
@@ -293,7 +293,8 @@ static unsigned int xcbc_setkey(struct cc_hw_desc *desc,
 	return 4;
 }
 
-static int hmac_setkey(struct cc_hw_desc *desc, struct cc_aead_ctx *ctx)
+static unsigned int hmac_setkey(struct cc_hw_desc *desc,
+				struct cc_aead_ctx *ctx)
 {
 	unsigned int hmac_pad_const[2] = { HMAC_IPAD_CONST, HMAC_OPAD_CONST };
 	unsigned int digest_ofs = 0;
@@ -384,13 +385,13 @@ static int validate_keys_sizes(struct cc_aead_ctx *ctx)
 			return -EINVAL;
 		break;
 	default:
-		dev_err(dev, "Invalid auth_mode=%d\n", ctx->auth_mode);
+		dev_dbg(dev, "Invalid auth_mode=%d\n", ctx->auth_mode);
 		return -EINVAL;
 	}
 	/* Check cipher key size */
 	if (ctx->flow_mode == S_DIN_to_DES) {
 		if (ctx->enc_keylen != DES3_EDE_KEY_SIZE) {
-			dev_err(dev, "Invalid cipher(3DES) key size: %u\n",
+			dev_dbg(dev, "Invalid cipher(3DES) key size: %u\n",
 				ctx->enc_keylen);
 			return -EINVAL;
 		}
@@ -398,7 +399,7 @@ static int validate_keys_sizes(struct cc_aead_ctx *ctx)
 		if (ctx->enc_keylen != AES_KEYSIZE_128 &&
 		    ctx->enc_keylen != AES_KEYSIZE_192 &&
 		    ctx->enc_keylen != AES_KEYSIZE_256) {
-			dev_err(dev, "Invalid cipher(AES) key size: %u\n",
+			dev_dbg(dev, "Invalid cipher(AES) key size: %u\n",
 				ctx->enc_keylen);
 			return -EINVAL;
 		}
@@ -561,7 +562,7 @@ static int cc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 
 		rc = crypto_authenc_extractkeys(&keys, key, keylen);
 		if (rc)
-			goto badkey;
+			return rc;
 		enckey = keys.enckey;
 		authkey = keys.authkey;
 		ctx->enc_keylen = keys.enckeylen;
@@ -569,10 +570,9 @@ static int cc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 
 		if (ctx->cipher_mode == DRV_CIPHER_CTR) {
 			/* the nonce is stored in bytes at end of key */
-			rc = -EINVAL;
 			if (ctx->enc_keylen <
 			    (AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE))
-				goto badkey;
+				return -EINVAL;
 			/* Copy nonce from last 4 bytes in CTR key to
 			 *  first 4 bytes in CTR IV
 			 */
@@ -590,7 +590,7 @@ static int cc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 
 	rc = validate_keys_sizes(ctx);
 	if (rc)
-		goto badkey;
+		return rc;
 
 	/* STAT_PHASE_1: Copy key to ctx */
 
@@ -604,7 +604,7 @@ static int cc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 	} else if (ctx->auth_mode != DRV_HASH_NULL) { /* HMAC */
 		rc = cc_get_plain_hmac_key(tfm, authkey, ctx->auth_keylen);
 		if (rc)
-			goto badkey;
+			return rc;
 	}
 
 	/* STAT_PHASE_2: Create sequence */
@@ -621,8 +621,7 @@ static int cc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 		break; /* No auth. key setup */
 	default:
 		dev_err(dev, "Unsupported authenc (%d)\n", ctx->auth_mode);
-		rc = -ENOTSUPP;
-		goto badkey;
+		return -ENOTSUPP;
 	}
 
 	/* STAT_PHASE_3: Submit sequence to HW */
@@ -631,18 +630,12 @@ static int cc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 		rc = cc_send_sync_request(ctx->drvdata, &cc_req, desc, seq_len);
 		if (rc) {
 			dev_err(dev, "send_request() failed (rc=%d)\n", rc);
-			goto setkey_error;
+			return rc;
 		}
 	}
 
 	/* Update STAT_PHASE_3 */
 	return rc;
-
-badkey:
-	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-
-setkey_error:
-	return rc;
 }
 
 static int cc_des3_aead_setkey(struct crypto_aead *aead, const u8 *key,
@@ -1566,7 +1559,7 @@ static int config_ccm_adata(struct aead_request *req)
 	/* taken from crypto/ccm.c */
 	/* 2 <= L <= 8, so 1 <= L' <= 7. */
 	if (l < 2 || l > 8) {
-		dev_err(dev, "illegal iv value %X\n", req->iv[0]);
+		dev_dbg(dev, "illegal iv value %X\n", req->iv[0]);
 		return -EINVAL;
 	}
 	memcpy(b0, req->iv, AES_BLOCK_SIZE);
@@ -1924,7 +1917,6 @@ static int cc_proc_aead(struct aead_request *req,
 	if (validate_data_size(ctx, direct, req)) {
 		dev_err(dev, "Unsupported crypt/assoc len %d/%d.\n",
 			req->cryptlen, areq_ctx->assoclen);
-		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_BLOCK_LEN);
 		return -EINVAL;
 	}
 
@@ -2064,7 +2056,7 @@ static int cc_rfc4309_ccm_encrypt(struct aead_request *req)
 	int rc = -EINVAL;
 
 	if (!valid_assoclen(req)) {
-		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
 		goto out;
 	}
 
@@ -2114,7 +2106,7 @@ static int cc_rfc4309_ccm_decrypt(struct aead_request *req)
 	int rc = -EINVAL;
 
 	if (!valid_assoclen(req)) {
-		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
 		goto out;
 	}
 
@@ -2233,7 +2225,7 @@ static int cc_rfc4106_gcm_encrypt(struct aead_request *req)
 	int rc = -EINVAL;
 
 	if (!valid_assoclen(req)) {
-		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
 		goto out;
 	}
 
@@ -2264,7 +2256,7 @@ static int cc_rfc4543_gcm_encrypt(struct aead_request *req)
 	int rc = -EINVAL;
 
 	if (!valid_assoclen(req)) {
-		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
 		goto out;
 	}
 
@@ -2298,7 +2290,7 @@ static int cc_rfc4106_gcm_decrypt(struct aead_request *req)
 	int rc = -EINVAL;
 
 	if (!valid_assoclen(req)) {
-		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
 		goto out;
 	}
 
@@ -2329,7 +2321,7 @@ static int cc_rfc4543_gcm_decrypt(struct aead_request *req)
 	int rc = -EINVAL;
 
 	if (!valid_assoclen(req)) {
-		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
 		goto out;
 	}
 
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index 254b48797799..7d6252d892d7 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -16,7 +16,7 @@
 #include "cc_cipher.h"
 #include "cc_request_mgr.h"
 
-#define MAX_ABLKCIPHER_SEQ_LEN 6
+#define MAX_SKCIPHER_SEQ_LEN 6
 
 #define template_skcipher	template_u.skcipher
 
@@ -291,7 +291,6 @@ static int cc_cipher_sethkey(struct crypto_skcipher *sktfm, const u8 *key,
 	/* This check the size of the protected key token */
 	if (keylen != sizeof(hki)) {
 		dev_err(dev, "Unsupported protected key size %d.\n", keylen);
-		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
@@ -303,8 +302,7 @@ static int cc_cipher_sethkey(struct crypto_skcipher *sktfm, const u8 *key,
 	keylen = hki.keylen;
 
 	if (validate_keys_sizes(ctx_p, keylen)) {
-		dev_err(dev, "Unsupported key size %d.\n", keylen);
-		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		dev_dbg(dev, "Unsupported key size %d.\n", keylen);
 		return -EINVAL;
 	}
 
@@ -394,8 +392,7 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
 	/* STAT_PHASE_0: Init and sanity checks */
 
 	if (validate_keys_sizes(ctx_p, keylen)) {
-		dev_err(dev, "Unsupported key size %d.\n", keylen);
-		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		dev_dbg(dev, "Unsupported key size %d.\n", keylen);
 		return -EINVAL;
 	}
 
@@ -523,6 +520,7 @@ static void cc_setup_readiv_desc(struct crypto_tfm *tfm,
 	}
 }
 
+
 static void cc_setup_state_desc(struct crypto_tfm *tfm,
 				 struct cipher_req_ctx *req_ctx,
 				 unsigned int ivsize, unsigned int nbytes,
@@ -534,8 +532,6 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm,
 	int cipher_mode = ctx_p->cipher_mode;
 	int flow_mode = ctx_p->flow_mode;
 	int direction = req_ctx->gen_ctx.op_type;
-	dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr;
-	unsigned int key_len = ctx_p->keylen;
 	dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr;
 	unsigned int du_size = nbytes;
 
@@ -571,6 +567,47 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm,
 	case DRV_CIPHER_XTS:
 	case DRV_CIPHER_ESSIV:
 	case DRV_CIPHER_BITLOCKER:
+		break;
+	default:
+		dev_err(dev, "Unsupported cipher mode (%d)\n", cipher_mode);
+	}
+}
+
+
+static void cc_setup_xex_state_desc(struct crypto_tfm *tfm,
+				 struct cipher_req_ctx *req_ctx,
+				 unsigned int ivsize, unsigned int nbytes,
+				 struct cc_hw_desc desc[],
+				 unsigned int *seq_size)
+{
+	struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx_p->drvdata);
+	int cipher_mode = ctx_p->cipher_mode;
+	int flow_mode = ctx_p->flow_mode;
+	int direction = req_ctx->gen_ctx.op_type;
+	dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr;
+	unsigned int key_len = ctx_p->keylen;
+	dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr;
+	unsigned int du_size = nbytes;
+
+	struct cc_crypto_alg *cc_alg =
+		container_of(tfm->__crt_alg, struct cc_crypto_alg,
+			     skcipher_alg.base);
+
+	if (cc_alg->data_unit)
+		du_size = cc_alg->data_unit;
+
+	switch (cipher_mode) {
+	case DRV_CIPHER_ECB:
+		break;
+	case DRV_CIPHER_CBC:
+	case DRV_CIPHER_CBC_CTS:
+	case DRV_CIPHER_CTR:
+	case DRV_CIPHER_OFB:
+		break;
+	case DRV_CIPHER_XTS:
+	case DRV_CIPHER_ESSIV:
+	case DRV_CIPHER_BITLOCKER:
 		/* load XEX key */
 		hw_desc_init(&desc[*seq_size]);
 		set_cipher_mode(&desc[*seq_size], cipher_mode);
@@ -822,7 +859,7 @@ static int cc_cipher_process(struct skcipher_request *req,
 	void *iv = req->iv;
 	struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
 	struct device *dev = drvdata_to_dev(ctx_p->drvdata);
-	struct cc_hw_desc desc[MAX_ABLKCIPHER_SEQ_LEN];
+	struct cc_hw_desc desc[MAX_SKCIPHER_SEQ_LEN];
 	struct cc_crypto_req cc_req = {};
 	int rc;
 	unsigned int seq_len = 0;
@@ -836,8 +873,7 @@ static int cc_cipher_process(struct skcipher_request *req,
 
 	/* TODO: check data length according to mode */
 	if (validate_data_size(ctx_p, nbytes)) {
-		dev_err(dev, "Unsupported data size %d.\n", nbytes);
-		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_BLOCK_LEN);
+		dev_dbg(dev, "Unsupported data size %d.\n", nbytes);
 		rc = -EINVAL;
 		goto exit_process;
 	}
@@ -881,12 +917,14 @@ static int cc_cipher_process(struct skcipher_request *req,
 
 	/* STAT_PHASE_2: Create sequence */
 
-	/* Setup IV and XEX key used */
+	/* Setup state (IV)  */
 	cc_setup_state_desc(tfm, req_ctx, ivsize, nbytes, desc, &seq_len);
 	/* Setup MLLI line, if needed */
 	cc_setup_mlli_desc(tfm, req_ctx, dst, src, nbytes, req, desc, &seq_len);
 	/* Setup key */
 	cc_setup_key_desc(tfm, req_ctx, nbytes, desc, &seq_len);
+	/* Setup state (IV and XEX key)  */
+	cc_setup_xex_state_desc(tfm, req_ctx, ivsize, nbytes, desc, &seq_len);
 	/* Data processing */
 	cc_setup_flow_desc(tfm, req_ctx, dst, src, nbytes, desc, &seq_len);
 	/* Read next IV */
diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
index 8b8eee513c27..532bc95a8373 100644
--- a/drivers/crypto/ccree/cc_driver.c
+++ b/drivers/crypto/ccree/cc_driver.c
@@ -133,7 +133,7 @@ static irqreturn_t cc_isr(int irq, void *dev_id)
 	u32 imr;
 
 	/* STAT_OP_TYPE_GENERIC STAT_PHASE_0: Interrupt */
-	/* if driver suspended return, probebly shared interrupt */
+	/* if driver suspended return, probably shared interrupt */
 	if (cc_pm_is_dev_suspended(dev))
 		return IRQ_NONE;
 
@@ -271,6 +271,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
 	const struct cc_hw_data *hw_rev;
 	const struct of_device_id *dev_id;
 	struct clk *clk;
+	int irq;
 	int rc = 0;
 
 	new_drvdata = devm_kzalloc(dev, sizeof(*new_drvdata), GFP_KERNEL);
@@ -337,9 +338,9 @@ static int init_cc_resources(struct platform_device *plat_dev)
 		&req_mem_cc_regs->start, new_drvdata->cc_base);
 
 	/* Then IRQ */
-	new_drvdata->irq = platform_get_irq(plat_dev, 0);
-	if (new_drvdata->irq < 0)
-		return new_drvdata->irq;
+	irq = platform_get_irq(plat_dev, 0);
+	if (irq < 0)
+		return irq;
 
 	init_completion(&new_drvdata->hw_queue_avail);
 
@@ -442,14 +443,13 @@ static int init_cc_resources(struct platform_device *plat_dev)
 	dev_info(dev, "ARM CryptoCell %s Driver: HW version 0x%08X/0x%8X, Driver version %s\n",
 		 hw_rev->name, hw_rev_pidr, sig_cidr, DRV_MODULE_VERSION);
 	/* register the driver isr function */
-	rc = devm_request_irq(dev, new_drvdata->irq, cc_isr,
-			      IRQF_SHARED, "ccree", new_drvdata);
+	rc = devm_request_irq(dev, irq, cc_isr, IRQF_SHARED, "ccree",
+			      new_drvdata);
 	if (rc) {
-		dev_err(dev, "Could not register to interrupt %d\n",
-			new_drvdata->irq);
+		dev_err(dev, "Could not register to interrupt %d\n", irq);
 		goto post_clk_err;
 	}
-	dev_dbg(dev, "Registered to IRQ: %d\n", new_drvdata->irq);
+	dev_dbg(dev, "Registered to IRQ: %d\n", irq);
 
 	rc = init_cc_regs(new_drvdata, true);
 	if (rc) {
@@ -465,7 +465,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
 
 	rc = cc_fips_init(new_drvdata);
 	if (rc) {
-		dev_err(dev, "CC_FIPS_INIT failed 0x%x\n", rc);
+		dev_err(dev, "cc_fips_init failed 0x%x\n", rc);
 		goto post_debugfs_err;
 	}
 	rc = cc_sram_mgr_init(new_drvdata);
@@ -490,13 +490,13 @@ static int init_cc_resources(struct platform_device *plat_dev)
 
 	rc = cc_buffer_mgr_init(new_drvdata);
 	if (rc) {
-		dev_err(dev, "buffer_mgr_init failed\n");
+		dev_err(dev, "cc_buffer_mgr_init failed\n");
 		goto post_req_mgr_err;
 	}
 
 	rc = cc_pm_init(new_drvdata);
 	if (rc) {
-		dev_err(dev, "ssi_power_mgr_init failed\n");
+		dev_err(dev, "cc_pm_init failed\n");
 		goto post_buf_mgr_err;
 	}
 
diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h
index ab31d4a68c80..c227718ba992 100644
--- a/drivers/crypto/ccree/cc_driver.h
+++ b/drivers/crypto/ccree/cc_driver.h
@@ -28,7 +28,6 @@
 
 /* Registers definitions from shared/hw/ree_include */
 #include "cc_host_regs.h"
-#define CC_DEV_SHA_MAX 512
 #include "cc_crypto_ctx.h"
 #include "cc_hw_queue_defs.h"
 #include "cc_sram_mgr.h"
@@ -133,13 +132,11 @@ struct cc_crypto_req {
 /**
  * struct cc_drvdata - driver private data context
  * @cc_base:	virt address of the CC registers
- * @irq:	device IRQ number
- * @irq_mask:	Interrupt mask shadow (1 for masked interrupts)
+ * @irq:	bitmap indicating source of last interrupt
  */
 struct cc_drvdata {
 	void __iomem *cc_base;
 	int irq;
-	u32 irq_mask;
 	struct completion hw_queue_avail; /* wait for HW queue availability */
 	struct platform_device *plat_dev;
 	cc_sram_addr_t mlli_sram_addr;
@@ -161,6 +158,7 @@ struct cc_drvdata {
 	int std_bodies;
 	bool sec_disabled;
 	u32 comp_mask;
+	bool pm_on;
 };
 
 struct cc_crypto_alg {
diff --git a/drivers/crypto/ccree/cc_fips.c b/drivers/crypto/ccree/cc_fips.c
index 4c8bce33abcf..702aefc21447 100644
--- a/drivers/crypto/ccree/cc_fips.c
+++ b/drivers/crypto/ccree/cc_fips.c
@@ -120,7 +120,7 @@ static void fips_dsr(unsigned long devarg)
 		cc_tee_handle_fips_error(drvdata);
 	}
 
-	/* after verifing that there is nothing to do,
+	/* after verifying that there is nothing to do,
 	 * unmask AXI completion interrupt.
 	 */
 	val = (CC_REG(HOST_IMR) & ~irq);
diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c
index bc71bdf44a9f..912e5ce5079d 100644
--- a/drivers/crypto/ccree/cc_hash.c
+++ b/drivers/crypto/ccree/cc_hash.c
@@ -899,9 +899,6 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key,
 	rc = cc_send_sync_request(ctx->drvdata, &cc_req, desc, idx);
 
 out:
-	if (rc)
-		crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
-
 	if (ctx->key_params.key_dma_addr) {
 		dma_unmap_single(dev, ctx->key_params.key_dma_addr,
 				 ctx->key_params.keylen, DMA_TO_DEVICE);
@@ -990,9 +987,6 @@ static int cc_xcbc_setkey(struct crypto_ahash *ahash,
 
 	rc = cc_send_sync_request(ctx->drvdata, &cc_req, desc, idx);
 
-	if (rc)
-		crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
-
 	dma_unmap_single(dev, ctx->key_params.key_dma_addr,
 			 ctx->key_params.keylen, DMA_TO_DEVICE);
 	dev_dbg(dev, "Unmapped key-buffer: key_dma_addr=%pad keylen=%u\n",
@@ -2358,11 +2352,9 @@ cc_digest_len_addr(void *drvdata, u32 mode)
 	case DRV_HASH_SHA256:
 	case DRV_HASH_MD5:
 		return digest_len_addr;
-#if (CC_DEV_SHA_MAX > 256)
 	case DRV_HASH_SHA384:
 	case DRV_HASH_SHA512:
 		return  digest_len_addr + sizeof(cc_digest_len_init);
-#endif
 	default:
 		return digest_len_addr; /*to avoid kernel crash*/
 	}
diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c
index dbc508fb719b..24c368b866f6 100644
--- a/drivers/crypto/ccree/cc_pm.c
+++ b/drivers/crypto/ccree/cc_pm.c
@@ -22,14 +22,8 @@ const struct dev_pm_ops ccree_pm = {
 int cc_pm_suspend(struct device *dev)
 {
 	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
-	int rc;
 
 	dev_dbg(dev, "set HOST_POWER_DOWN_EN\n");
-	rc = cc_suspend_req_queue(drvdata);
-	if (rc) {
-		dev_err(dev, "cc_suspend_req_queue (%x)\n", rc);
-		return rc;
-	}
 	fini_cc_regs(drvdata);
 	cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE);
 	cc_clk_off(drvdata);
@@ -48,7 +42,7 @@ int cc_pm_resume(struct device *dev)
 		dev_err(dev, "failed getting clock back on. We're toast.\n");
 		return rc;
 	}
-	/* wait for Crytpcell reset completion */
+	/* wait for Cryptocell reset completion */
 	if (!cc_wait_for_reset_completion(drvdata)) {
 		dev_err(dev, "Cryptocell reset not completed");
 		return -EBUSY;
@@ -63,13 +57,6 @@ int cc_pm_resume(struct device *dev)
 	/* check if tee fips error occurred during power down */
 	cc_tee_handle_fips_error(drvdata);
 
-	rc = cc_resume_req_queue(drvdata);
-	if (rc) {
-		dev_err(dev, "cc_resume_req_queue (%x)\n", rc);
-		return rc;
-	}
-
-	/* must be after the queue resuming as it uses the HW queue*/
 	cc_init_hash_sram(drvdata);
 
 	return 0;
@@ -80,28 +67,20 @@ int cc_pm_get(struct device *dev)
 	int rc = 0;
 	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
 
-	if (cc_req_queue_suspended(drvdata))
+	if (drvdata->pm_on)
 		rc = pm_runtime_get_sync(dev);
-	else
-		pm_runtime_get_noresume(dev);
 
-	return rc;
+	return (rc == 1 ? 0 : rc);
 }
 
-int cc_pm_put_suspend(struct device *dev)
+void cc_pm_put_suspend(struct device *dev)
 {
-	int rc = 0;
 	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
 
-	if (!cc_req_queue_suspended(drvdata)) {
+	if (drvdata->pm_on) {
 		pm_runtime_mark_last_busy(dev);
-		rc = pm_runtime_put_autosuspend(dev);
-	} else {
-		/* Something wrong happens*/
-		dev_err(dev, "request to suspend already suspended queue");
-		rc = -EBUSY;
+		pm_runtime_put_autosuspend(dev);
 	}
-	return rc;
 }
 
 bool cc_pm_is_dev_suspended(struct device *dev)
@@ -114,10 +93,10 @@ int cc_pm_init(struct cc_drvdata *drvdata)
 {
 	struct device *dev = drvdata_to_dev(drvdata);
 
-	/* must be before the enabling to avoid resdundent suspending */
+	/* must be before the enabling to avoid redundant suspending */
 	pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT);
 	pm_runtime_use_autosuspend(dev);
-	/* activate the PM module */
+	/* set us as active - note we won't do PM ops until cc_pm_go()! */
 	return pm_runtime_set_active(dev);
 }
 
@@ -125,9 +104,11 @@ int cc_pm_init(struct cc_drvdata *drvdata)
 void cc_pm_go(struct cc_drvdata *drvdata)
 {
 	pm_runtime_enable(drvdata_to_dev(drvdata));
+	drvdata->pm_on = true;
 }
 
 void cc_pm_fini(struct cc_drvdata *drvdata)
 {
 	pm_runtime_disable(drvdata_to_dev(drvdata));
+	drvdata->pm_on = false;
 }
diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h
index a7d98a5da2e1..80a18e11cae4 100644
--- a/drivers/crypto/ccree/cc_pm.h
+++ b/drivers/crypto/ccree/cc_pm.h
@@ -21,7 +21,7 @@ void cc_pm_fini(struct cc_drvdata *drvdata);
 int cc_pm_suspend(struct device *dev);
 int cc_pm_resume(struct device *dev);
 int cc_pm_get(struct device *dev);
-int cc_pm_put_suspend(struct device *dev);
+void cc_pm_put_suspend(struct device *dev);
 bool cc_pm_is_dev_suspended(struct device *dev);
 
 #else
@@ -35,25 +35,12 @@ static inline void cc_pm_go(struct cc_drvdata *drvdata) {}
 
 static inline void cc_pm_fini(struct cc_drvdata *drvdata) {}
 
-static inline int cc_pm_suspend(struct device *dev)
-{
-	return 0;
-}
-
-static inline int cc_pm_resume(struct device *dev)
-{
-	return 0;
-}
-
 static inline int cc_pm_get(struct device *dev)
 {
 	return 0;
 }
 
-static inline int cc_pm_put_suspend(struct device *dev)
-{
-	return 0;
-}
+static inline void cc_pm_put_suspend(struct device *dev) {}
 
 static inline bool cc_pm_is_dev_suspended(struct device *dev)
 {
diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c
index a947d5a2cf35..9d61e6f12478 100644
--- a/drivers/crypto/ccree/cc_request_mgr.c
+++ b/drivers/crypto/ccree/cc_request_mgr.c
@@ -41,7 +41,6 @@ struct cc_req_mgr_handle {
 #else
 	struct tasklet_struct comptask;
 #endif
-	bool is_runtime_suspended;
 };
 
 struct cc_bl_item {
@@ -230,7 +229,7 @@ static int cc_queues_status(struct cc_drvdata *drvdata,
 	struct device *dev = drvdata_to_dev(drvdata);
 
 	/* SW queue is checked only once as it will not
-	 * be chaned during the poll because the spinlock_bh
+	 * be changed during the poll because the spinlock_bh
 	 * is held by the thread
 	 */
 	if (((req_mgr_h->req_queue_head + 1) & (MAX_REQUEST_QUEUE_SIZE - 1)) ==
@@ -275,12 +274,11 @@ static int cc_queues_status(struct cc_drvdata *drvdata,
  * \param len The crypto sequence length
  * \param add_comp If "true": add an artificial dout DMA to mark completion
  *
- * \return int Returns -EINPROGRESS or error code
  */
-static int cc_do_send_request(struct cc_drvdata *drvdata,
-			      struct cc_crypto_req *cc_req,
-			      struct cc_hw_desc *desc, unsigned int len,
-				bool add_comp)
+static void cc_do_send_request(struct cc_drvdata *drvdata,
+			       struct cc_crypto_req *cc_req,
+			       struct cc_hw_desc *desc, unsigned int len,
+			       bool add_comp)
 {
 	struct cc_req_mgr_handle *req_mgr_h = drvdata->request_mgr_handle;
 	unsigned int used_sw_slots;
@@ -303,8 +301,8 @@ static int cc_do_send_request(struct cc_drvdata *drvdata,
 
 	/*
 	 * We are about to push command to the HW via the command registers
-	 * that may refernece hsot memory. We need to issue a memory barrier
-	 * to make sure there are no outstnading memory writes
+	 * that may reference host memory. We need to issue a memory barrier
+	 * to make sure there are no outstanding memory writes
 	 */
 	wmb();
 
@@ -328,9 +326,6 @@ static int cc_do_send_request(struct cc_drvdata *drvdata,
 		/* Update the free slots in HW queue */
 		req_mgr_h->q_free_slots -= total_seq_len;
 	}
-
-	/* Operation still in process */
-	return -EINPROGRESS;
 }
 
 static void cc_enqueue_backlog(struct cc_drvdata *drvdata,
@@ -390,20 +385,15 @@ static void cc_proc_backlog(struct cc_drvdata *drvdata)
 			return;
 		}
 
-		rc = cc_do_send_request(drvdata, &bli->creq, bli->desc,
-					bli->len, false);
-
+		cc_do_send_request(drvdata, &bli->creq, bli->desc, bli->len,
+				   false);
 		spin_unlock(&mgr->hw_lock);
 
-		if (rc != -EINPROGRESS) {
-			cc_pm_put_suspend(dev);
-			creq->user_cb(dev, req, rc);
-		}
-
 		/* Remove ourselves from the backlog list */
 		spin_lock(&mgr->bl_lock);
 		list_del(&bli->list);
 		--mgr->bl_len;
+		kfree(bli);
 	}
 
 	spin_unlock(&mgr->bl_lock);
@@ -422,7 +412,7 @@ int cc_send_request(struct cc_drvdata *drvdata, struct cc_crypto_req *cc_req,
 
 	rc = cc_pm_get(dev);
 	if (rc) {
-		dev_err(dev, "ssi_power_mgr_runtime_get returned %x\n", rc);
+		dev_err(dev, "cc_pm_get returned %x\n", rc);
 		return rc;
 	}
 
@@ -451,8 +441,10 @@ int cc_send_request(struct cc_drvdata *drvdata, struct cc_crypto_req *cc_req,
 		return -EBUSY;
 	}
 
-	if (!rc)
-		rc = cc_do_send_request(drvdata, cc_req, desc, len, false);
+	if (!rc) {
+		cc_do_send_request(drvdata, cc_req, desc, len, false);
+		rc = -EINPROGRESS;
+	}
 
 	spin_unlock_bh(&mgr->hw_lock);
 	return rc;
@@ -472,7 +464,7 @@ int cc_send_sync_request(struct cc_drvdata *drvdata,
 
 	rc = cc_pm_get(dev);
 	if (rc) {
-		dev_err(dev, "ssi_power_mgr_runtime_get returned %x\n", rc);
+		dev_err(dev, "cc_pm_get returned %x\n", rc);
 		return rc;
 	}
 
@@ -492,14 +484,8 @@ int cc_send_sync_request(struct cc_drvdata *drvdata,
 		reinit_completion(&drvdata->hw_queue_avail);
 	}
 
-	rc = cc_do_send_request(drvdata, cc_req, desc, len, true);
+	cc_do_send_request(drvdata, cc_req, desc, len, true);
 	spin_unlock_bh(&mgr->hw_lock);
-
-	if (rc != -EINPROGRESS) {
-		cc_pm_put_suspend(dev);
-		return rc;
-	}
-
 	wait_for_completion(&cc_req->seq_compl);
 	return 0;
 }
@@ -532,8 +518,8 @@ int send_request_init(struct cc_drvdata *drvdata, struct cc_hw_desc *desc,
 
 	/*
 	 * We are about to push command to the HW via the command registers
-	 * that may refernece hsot memory. We need to issue a memory barrier
-	 * to make sure there are no outstnading memory writes
+	 * that may reference host memory. We need to issue a memory barrier
+	 * to make sure there are no outstanding memory writes
 	 */
 	wmb();
 	enqueue_seq(drvdata, desc, len);
@@ -668,7 +654,7 @@ static void comp_handler(unsigned long devarg)
 		request_mgr_handle->axi_completed += cc_axi_comp_count(drvdata);
 	}
 
-	/* after verifing that there is nothing to do,
+	/* after verifying that there is nothing to do,
 	 * unmask AXI completion interrupt
 	 */
 	cc_iowrite(drvdata, CC_REG(HOST_IMR),
@@ -677,52 +663,3 @@ static void comp_handler(unsigned long devarg)
 	cc_proc_backlog(drvdata);
 	dev_dbg(dev, "Comp. handler done.\n");
 }
-
-/*
- * resume the queue configuration - no need to take the lock as this happens
- * inside the spin lock protection
- */
-#if defined(CONFIG_PM)
-int cc_resume_req_queue(struct cc_drvdata *drvdata)
-{
-	struct cc_req_mgr_handle *request_mgr_handle =
-		drvdata->request_mgr_handle;
-
-	spin_lock_bh(&request_mgr_handle->hw_lock);
-	request_mgr_handle->is_runtime_suspended = false;
-	spin_unlock_bh(&request_mgr_handle->hw_lock);
-
-	return 0;
-}
-
-/*
- * suspend the queue configuration. Since it is used for the runtime suspend
- * only verify that the queue can be suspended.
- */
-int cc_suspend_req_queue(struct cc_drvdata *drvdata)
-{
-	struct cc_req_mgr_handle *request_mgr_handle =
-						drvdata->request_mgr_handle;
-
-	/* lock the send_request */
-	spin_lock_bh(&request_mgr_handle->hw_lock);
-	if (request_mgr_handle->req_queue_head !=
-	    request_mgr_handle->req_queue_tail) {
-		spin_unlock_bh(&request_mgr_handle->hw_lock);
-		return -EBUSY;
-	}
-	request_mgr_handle->is_runtime_suspended = true;
-	spin_unlock_bh(&request_mgr_handle->hw_lock);
-
-	return 0;
-}
-
-bool cc_req_queue_suspended(struct cc_drvdata *drvdata)
-{
-	struct cc_req_mgr_handle *request_mgr_handle =
-						drvdata->request_mgr_handle;
-
-	return	request_mgr_handle->is_runtime_suspended;
-}
-
-#endif
diff --git a/drivers/crypto/ccree/cc_request_mgr.h b/drivers/crypto/ccree/cc_request_mgr.h
index f46cf766fe4d..ff7746aaaf35 100644
--- a/drivers/crypto/ccree/cc_request_mgr.h
+++ b/drivers/crypto/ccree/cc_request_mgr.h
@@ -40,12 +40,4 @@ void complete_request(struct cc_drvdata *drvdata);
 
 void cc_req_mgr_fini(struct cc_drvdata *drvdata);
 
-#if defined(CONFIG_PM)
-int cc_resume_req_queue(struct cc_drvdata *drvdata);
-
-int cc_suspend_req_queue(struct cc_drvdata *drvdata);
-
-bool cc_req_queue_suspended(struct cc_drvdata *drvdata);
-#endif
-
 #endif /*__REQUEST_MGR_H__*/
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index 250150560e68..f078b2686418 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -23,22 +23,22 @@ config CRYPTO_DEV_CHELSIO
 	  will be called chcr.
 
 config CHELSIO_IPSEC_INLINE
-        bool "Chelsio IPSec XFRM Tx crypto offload"
-        depends on CHELSIO_T4
+	bool "Chelsio IPSec XFRM Tx crypto offload"
+	depends on CHELSIO_T4
 	depends on CRYPTO_DEV_CHELSIO
-        depends on XFRM_OFFLOAD
-        depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
-        default n
-        ---help---
-          Enable support for IPSec Tx Inline.
+	depends on XFRM_OFFLOAD
+	depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+	default n
+	---help---
+	  Enable support for IPSec Tx Inline.
 
 config CRYPTO_DEV_CHELSIO_TLS
-        tristate "Chelsio Crypto Inline TLS Driver"
-        depends on CHELSIO_T4
-        depends on TLS
-        select CRYPTO_DEV_CHELSIO
-        ---help---
-          Support Chelsio Inline TLS with Chelsio crypto accelerator.
+	tristate "Chelsio Crypto Inline TLS Driver"
+	depends on CHELSIO_T4
+	depends on TLS_TOE
+	select CRYPTO_DEV_CHELSIO
+	---help---
+	  Support Chelsio Inline TLS with Chelsio crypto accelerator.
 
-          To compile this driver as a module, choose M here: the module
-          will be called chtls.
+	  To compile this driver as a module, choose M here: the module
+	  will be called chtls.
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 38ee38b37ae6..b4b9b22125d1 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -93,7 +93,7 @@ static u32 round_constant[11] = {
 	0x1B000000, 0x36000000, 0x6C000000
 };
 
-static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
+static int chcr_handle_cipher_resp(struct skcipher_request *req,
 				   unsigned char *input, int err);
 
 static inline  struct chcr_aead_ctx *AEAD_CTX(struct chcr_context *ctx)
@@ -568,11 +568,11 @@ static void  ulptx_walk_add_sg(struct ulptx_walk *walk,
 	}
 }
 
-static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm)
+static inline int get_cryptoalg_subtype(struct crypto_skcipher *tfm)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct chcr_alg_template *chcr_crypto_alg =
-		container_of(alg, struct chcr_alg_template, alg.crypto);
+		container_of(alg, struct chcr_alg_template, alg.skcipher);
 
 	return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK;
 }
@@ -757,14 +757,14 @@ static inline void create_wreq(struct chcr_context *ctx,
  */
 static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(wrparam->req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req);
 	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
 	struct sk_buff *skb = NULL;
 	struct chcr_wr *chcr_req;
 	struct cpl_rx_phys_dsgl *phys_cpl;
 	struct ulptx_sgl *ulptx;
-	struct chcr_blkcipher_req_ctx *reqctx =
-		ablkcipher_request_ctx(wrparam->req);
+	struct chcr_skcipher_req_ctx *reqctx =
+		skcipher_request_ctx(wrparam->req);
 	unsigned int temp = 0, transhdr_len, dst_size;
 	int error;
 	int nents;
@@ -807,9 +807,9 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
 
 	chcr_req->key_ctx.ctx_hdr = ablkctx->key_ctx_hdr;
 	if ((reqctx->op == CHCR_DECRYPT_OP) &&
-	    (!(get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	    (!(get_cryptoalg_subtype(tfm) ==
 	       CRYPTO_ALG_SUB_TYPE_CTR)) &&
-	    (!(get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	    (!(get_cryptoalg_subtype(tfm) ==
 	       CRYPTO_ALG_SUB_TYPE_CTR_RFC3686))) {
 		generate_copy_rrkey(ablkctx, &chcr_req->key_ctx);
 	} else {
@@ -843,7 +843,7 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
 	if (reqctx->op && (ablkctx->ciph_mode ==
 			   CHCR_SCMD_CIPHER_MODE_AES_CBC))
 		sg_pcopy_to_buffer(wrparam->req->src,
-			sg_nents(wrparam->req->src), wrparam->req->info, 16,
+			sg_nents(wrparam->req->src), wrparam->req->iv, 16,
 			reqctx->processed + wrparam->bytes - AES_BLOCK_SIZE);
 
 	return skb;
@@ -866,27 +866,20 @@ static inline int chcr_keyctx_ck_size(unsigned int keylen)
 
 	return ck_size;
 }
-static int chcr_cipher_fallback_setkey(struct crypto_ablkcipher *cipher,
+static int chcr_cipher_fallback_setkey(struct crypto_skcipher *cipher,
 				       const u8 *key,
 				       unsigned int keylen)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
 	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(cipher));
-	int err = 0;
 
 	crypto_sync_skcipher_clear_flags(ablkctx->sw_cipher,
 				CRYPTO_TFM_REQ_MASK);
 	crypto_sync_skcipher_set_flags(ablkctx->sw_cipher,
 				cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK);
-	err = crypto_sync_skcipher_setkey(ablkctx->sw_cipher, key, keylen);
-	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->crt_flags |=
-		crypto_sync_skcipher_get_flags(ablkctx->sw_cipher) &
-		CRYPTO_TFM_RES_MASK;
-	return err;
+	return crypto_sync_skcipher_setkey(ablkctx->sw_cipher, key, keylen);
 }
 
-static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *cipher,
+static int chcr_aes_cbc_setkey(struct crypto_skcipher *cipher,
 			       const u8 *key,
 			       unsigned int keylen)
 {
@@ -912,13 +905,12 @@ static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *cipher,
 	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CBC;
 	return 0;
 badkey_err:
-	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	ablkctx->enckey_len = 0;
 
 	return err;
 }
 
-static int chcr_aes_ctr_setkey(struct crypto_ablkcipher *cipher,
+static int chcr_aes_ctr_setkey(struct crypto_skcipher *cipher,
 				   const u8 *key,
 				   unsigned int keylen)
 {
@@ -943,13 +935,12 @@ static int chcr_aes_ctr_setkey(struct crypto_ablkcipher *cipher,
 
 	return 0;
 badkey_err:
-	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	ablkctx->enckey_len = 0;
 
 	return err;
 }
 
-static int chcr_aes_rfc3686_setkey(struct crypto_ablkcipher *cipher,
+static int chcr_aes_rfc3686_setkey(struct crypto_skcipher *cipher,
 				   const u8 *key,
 				   unsigned int keylen)
 {
@@ -981,7 +972,6 @@ static int chcr_aes_rfc3686_setkey(struct crypto_ablkcipher *cipher,
 
 	return 0;
 badkey_err:
-	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	ablkctx->enckey_len = 0;
 
 	return err;
@@ -1017,12 +1007,12 @@ static unsigned int adjust_ctr_overflow(u8 *iv, u32 bytes)
 	return bytes;
 }
 
-static int chcr_update_tweak(struct ablkcipher_request *req, u8 *iv,
+static int chcr_update_tweak(struct skcipher_request *req, u8 *iv,
 			     u32 isfinal)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
-	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
 	struct crypto_aes_ctx aes;
 	int ret, i;
 	u8 *key;
@@ -1051,16 +1041,16 @@ static int chcr_update_tweak(struct ablkcipher_request *req, u8 *iv,
 	return 0;
 }
 
-static int chcr_update_cipher_iv(struct ablkcipher_request *req,
+static int chcr_update_cipher_iv(struct skcipher_request *req,
 				   struct cpl_fw6_pld *fw6_pld, u8 *iv)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
-	int subtype = get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm));
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
+	int subtype = get_cryptoalg_subtype(tfm);
 	int ret = 0;
 
 	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR)
-		ctr_add_iv(iv, req->info, (reqctx->processed /
+		ctr_add_iv(iv, req->iv, (reqctx->processed /
 			   AES_BLOCK_SIZE));
 	else if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_RFC3686)
 		*(__be32 *)(reqctx->iv + CTR_RFC3686_NONCE_SIZE +
@@ -1071,7 +1061,7 @@ static int chcr_update_cipher_iv(struct ablkcipher_request *req,
 	else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
 		if (reqctx->op)
 			/*Updated before sending last WR*/
-			memcpy(iv, req->info, AES_BLOCK_SIZE);
+			memcpy(iv, req->iv, AES_BLOCK_SIZE);
 		else
 			memcpy(iv, &fw6_pld->data[2], AES_BLOCK_SIZE);
 	}
@@ -1085,16 +1075,16 @@ static int chcr_update_cipher_iv(struct ablkcipher_request *req,
  * for subsequent update requests
  */
 
-static int chcr_final_cipher_iv(struct ablkcipher_request *req,
+static int chcr_final_cipher_iv(struct skcipher_request *req,
 				   struct cpl_fw6_pld *fw6_pld, u8 *iv)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
-	int subtype = get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm));
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
+	int subtype = get_cryptoalg_subtype(tfm);
 	int ret = 0;
 
 	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR)
-		ctr_add_iv(iv, req->info, DIV_ROUND_UP(reqctx->processed,
+		ctr_add_iv(iv, req->iv, DIV_ROUND_UP(reqctx->processed,
 						       AES_BLOCK_SIZE));
 	else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS)
 		ret = chcr_update_tweak(req, iv, 1);
@@ -1108,25 +1098,25 @@ static int chcr_final_cipher_iv(struct ablkcipher_request *req,
 
 }
 
-static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
+static int chcr_handle_cipher_resp(struct skcipher_request *req,
 				   unsigned char *input, int err)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
 	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
 	struct sk_buff *skb;
 	struct cpl_fw6_pld *fw6_pld = (struct cpl_fw6_pld *)input;
-	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
 	struct  cipher_wr_param wrparam;
 	struct chcr_dev *dev = c_ctx(tfm)->dev;
 	int bytes;
 
 	if (err)
 		goto unmap;
-	if (req->nbytes == reqctx->processed) {
+	if (req->cryptlen == reqctx->processed) {
 		chcr_cipher_dma_unmap(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev,
 				      req);
-		err = chcr_final_cipher_iv(req, fw6_pld, req->info);
+		err = chcr_final_cipher_iv(req, fw6_pld, req->iv);
 		goto complete;
 	}
 
@@ -1134,13 +1124,13 @@ static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
 		bytes = chcr_sg_ent_in_wr(reqctx->srcsg, reqctx->dstsg, 0,
 					  CIP_SPACE_LEFT(ablkctx->enckey_len),
 					  reqctx->src_ofst, reqctx->dst_ofst);
-		if ((bytes + reqctx->processed) >= req->nbytes)
-			bytes  = req->nbytes - reqctx->processed;
+		if ((bytes + reqctx->processed) >= req->cryptlen)
+			bytes  = req->cryptlen - reqctx->processed;
 		else
 			bytes = rounddown(bytes, 16);
 	} else {
 		/*CTR mode counter overfloa*/
-		bytes  = req->nbytes - reqctx->processed;
+		bytes  = req->cryptlen - reqctx->processed;
 	}
 	err = chcr_update_cipher_iv(req, fw6_pld, reqctx->iv);
 	if (err)
@@ -1153,13 +1143,13 @@ static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
 				     req->base.flags,
 				     req->src,
 				     req->dst,
-				     req->nbytes,
-				     req->info,
+				     req->cryptlen,
+				     req->iv,
 				     reqctx->op);
 		goto complete;
 	}
 
-	if (get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	if (get_cryptoalg_subtype(tfm) ==
 	    CRYPTO_ALG_SUB_TYPE_CTR)
 		bytes = adjust_ctr_overflow(reqctx->iv, bytes);
 	wrparam.qid = u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx];
@@ -1185,33 +1175,33 @@ complete:
 	return err;
 }
 
-static int process_cipher(struct ablkcipher_request *req,
+static int process_cipher(struct skcipher_request *req,
 				  unsigned short qid,
 				  struct sk_buff **skb,
 				  unsigned short op_type)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm);
-	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
+	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
 	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
 	struct	cipher_wr_param wrparam;
 	int bytes, err = -EINVAL;
 
 	reqctx->processed = 0;
-	if (!req->info)
+	if (!req->iv)
 		goto error;
 	if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
-	    (req->nbytes == 0) ||
-	    (req->nbytes % crypto_ablkcipher_blocksize(tfm))) {
+	    (req->cryptlen == 0) ||
+	    (req->cryptlen % crypto_skcipher_blocksize(tfm))) {
 		pr_err("AES: Invalid value of Key Len %d nbytes %d IV Len %d\n",
-		       ablkctx->enckey_len, req->nbytes, ivsize);
+		       ablkctx->enckey_len, req->cryptlen, ivsize);
 		goto error;
 	}
 
 	err = chcr_cipher_dma_map(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev, req);
 	if (err)
 		goto error;
-	if (req->nbytes < (SGE_MAX_WR_LEN - (sizeof(struct chcr_wr) +
+	if (req->cryptlen < (SGE_MAX_WR_LEN - (sizeof(struct chcr_wr) +
 					    AES_MIN_KEY_SIZE +
 					    sizeof(struct cpl_rx_phys_dsgl) +
 					/*Min dsgl size*/
@@ -1219,14 +1209,14 @@ static int process_cipher(struct ablkcipher_request *req,
 		/* Can be sent as Imm*/
 		unsigned int dnents = 0, transhdr_len, phys_dsgl, kctx_len;
 
-		dnents = sg_nents_xlen(req->dst, req->nbytes,
+		dnents = sg_nents_xlen(req->dst, req->cryptlen,
 				       CHCR_DST_SG_SIZE, 0);
 		phys_dsgl = get_space_for_phys_dsgl(dnents);
 		kctx_len = roundup(ablkctx->enckey_len, 16);
 		transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl);
-		reqctx->imm = (transhdr_len + IV + req->nbytes) <=
+		reqctx->imm = (transhdr_len + IV + req->cryptlen) <=
 			SGE_MAX_WR_LEN;
-		bytes = IV + req->nbytes;
+		bytes = IV + req->cryptlen;
 
 	} else {
 		reqctx->imm = 0;
@@ -1236,21 +1226,21 @@ static int process_cipher(struct ablkcipher_request *req,
 		bytes = chcr_sg_ent_in_wr(req->src, req->dst, 0,
 					  CIP_SPACE_LEFT(ablkctx->enckey_len),
 					  0, 0);
-		if ((bytes + reqctx->processed) >= req->nbytes)
-			bytes  = req->nbytes - reqctx->processed;
+		if ((bytes + reqctx->processed) >= req->cryptlen)
+			bytes  = req->cryptlen - reqctx->processed;
 		else
 			bytes = rounddown(bytes, 16);
 	} else {
-		bytes = req->nbytes;
+		bytes = req->cryptlen;
 	}
-	if (get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	if (get_cryptoalg_subtype(tfm) ==
 	    CRYPTO_ALG_SUB_TYPE_CTR) {
-		bytes = adjust_ctr_overflow(req->info, bytes);
+		bytes = adjust_ctr_overflow(req->iv, bytes);
 	}
-	if (get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	if (get_cryptoalg_subtype(tfm) ==
 	    CRYPTO_ALG_SUB_TYPE_CTR_RFC3686) {
 		memcpy(reqctx->iv, ablkctx->nonce, CTR_RFC3686_NONCE_SIZE);
-		memcpy(reqctx->iv + CTR_RFC3686_NONCE_SIZE, req->info,
+		memcpy(reqctx->iv + CTR_RFC3686_NONCE_SIZE, req->iv,
 				CTR_RFC3686_IV_SIZE);
 
 		/* initialize counter portion of counter block */
@@ -1259,7 +1249,7 @@ static int process_cipher(struct ablkcipher_request *req,
 
 	} else {
 
-		memcpy(reqctx->iv, req->info, IV);
+		memcpy(reqctx->iv, req->iv, IV);
 	}
 	if (unlikely(bytes == 0)) {
 		chcr_cipher_dma_unmap(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev,
@@ -1268,7 +1258,7 @@ static int process_cipher(struct ablkcipher_request *req,
 					   req->base.flags,
 					   req->src,
 					   req->dst,
-					   req->nbytes,
+					   req->cryptlen,
 					   reqctx->iv,
 					   op_type);
 		goto error;
@@ -1296,9 +1286,9 @@ error:
 	return err;
 }
 
-static int chcr_aes_encrypt(struct ablkcipher_request *req)
+static int chcr_aes_encrypt(struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chcr_dev *dev = c_ctx(tfm)->dev;
 	struct sk_buff *skb = NULL;
 	int err, isfull = 0;
@@ -1329,9 +1319,9 @@ error:
 	return err;
 }
 
-static int chcr_aes_decrypt(struct ablkcipher_request *req)
+static int chcr_aes_decrypt(struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
 	struct chcr_dev *dev = c_ctx(tfm)->dev;
 	struct sk_buff *skb = NULL;
@@ -1379,7 +1369,8 @@ static int chcr_device_init(struct chcr_context *ctx)
 		txq_perchan = ntxq / u_ctx->lldi.nchan;
 		spin_lock(&ctx->dev->lock_chcr_dev);
 		ctx->tx_chan_id = ctx->dev->tx_channel_id;
-		ctx->dev->tx_channel_id = !ctx->dev->tx_channel_id;
+		ctx->dev->tx_channel_id =
+			(ctx->dev->tx_channel_id + 1) %  u_ctx->lldi.nchan;
 		spin_unlock(&ctx->dev->lock_chcr_dev);
 		rxq_idx = ctx->tx_chan_id * rxq_perchan;
 		rxq_idx += id % rxq_perchan;
@@ -1398,27 +1389,28 @@ out:
 	return err;
 }
 
-static int chcr_cra_init(struct crypto_tfm *tfm)
+static int chcr_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
-	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct chcr_context *ctx = crypto_skcipher_ctx(tfm);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 
-	ablkctx->sw_cipher = crypto_alloc_sync_skcipher(alg->cra_name, 0,
+	ablkctx->sw_cipher = crypto_alloc_sync_skcipher(alg->base.cra_name, 0,
 				CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ablkctx->sw_cipher)) {
-		pr_err("failed to allocate fallback for %s\n", alg->cra_name);
+		pr_err("failed to allocate fallback for %s\n", alg->base.cra_name);
 		return PTR_ERR(ablkctx->sw_cipher);
 	}
 
-	tfm->crt_ablkcipher.reqsize =  sizeof(struct chcr_blkcipher_req_ctx);
-	return chcr_device_init(crypto_tfm_ctx(tfm));
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct chcr_skcipher_req_ctx));
+
+	return chcr_device_init(ctx);
 }
 
-static int chcr_rfc3686_init(struct crypto_tfm *tfm)
+static int chcr_rfc3686_init(struct crypto_skcipher *tfm)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
-	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct chcr_context *ctx = crypto_skcipher_ctx(tfm);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 
 	/*RFC3686 initialises IV counter value to 1, rfc3686(ctr(aes))
@@ -1427,17 +1419,17 @@ static int chcr_rfc3686_init(struct crypto_tfm *tfm)
 	ablkctx->sw_cipher = crypto_alloc_sync_skcipher("ctr(aes)", 0,
 				CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ablkctx->sw_cipher)) {
-		pr_err("failed to allocate fallback for %s\n", alg->cra_name);
+		pr_err("failed to allocate fallback for %s\n", alg->base.cra_name);
 		return PTR_ERR(ablkctx->sw_cipher);
 	}
-	tfm->crt_ablkcipher.reqsize =  sizeof(struct chcr_blkcipher_req_ctx);
-	return chcr_device_init(crypto_tfm_ctx(tfm));
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct chcr_skcipher_req_ctx));
+	return chcr_device_init(ctx);
 }
 
 
-static void chcr_cra_exit(struct crypto_tfm *tfm)
+static void chcr_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct chcr_context *ctx = crypto_skcipher_ctx(tfm);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 
 	crypto_free_sync_skcipher(ablkctx->sw_cipher);
@@ -2056,8 +2048,8 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 		err = chcr_handle_aead_resp(aead_request_cast(req), input, err);
 		break;
 
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		 chcr_handle_cipher_resp(ablkcipher_request_cast(req),
+	case CRYPTO_ALG_TYPE_SKCIPHER:
+		 chcr_handle_cipher_resp(skcipher_request_cast(req),
 					       input, err);
 		break;
 	case CRYPTO_ALG_TYPE_AHASH:
@@ -2148,7 +2140,7 @@ out:
 	return err;
 }
 
-static int chcr_aes_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int chcr_aes_xts_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			       unsigned int key_len)
 {
 	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(cipher));
@@ -2172,7 +2164,6 @@ static int chcr_aes_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS;
 	return 0;
 badkey_err:
-	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	ablkctx->enckey_len = 0;
 
 	return err;
@@ -2576,12 +2567,12 @@ void chcr_add_aead_dst_ent(struct aead_request *req,
 	dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id);
 }
 
-void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
+void chcr_add_cipher_src_ent(struct skcipher_request *req,
 			     void *ulptx,
 			     struct  cipher_wr_param *wrparam)
 {
 	struct ulptx_walk ulp_walk;
-	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
 	u8 *buf = ulptx;
 
 	memcpy(buf, reqctx->iv, IV);
@@ -2599,13 +2590,13 @@ void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
 	}
 }
 
-void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
+void chcr_add_cipher_dst_ent(struct skcipher_request *req,
 			     struct cpl_rx_phys_dsgl *phys_cpl,
 			     struct  cipher_wr_param *wrparam,
 			     unsigned short qid)
 {
-	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(wrparam->req);
+	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req);
 	struct chcr_context *ctx = c_ctx(tfm);
 	struct dsgl_walk dsgl_walk;
 
@@ -2680,7 +2671,7 @@ void chcr_hash_dma_unmap(struct device *dev,
 }
 
 int chcr_cipher_dma_map(struct device *dev,
-			struct ablkcipher_request *req)
+			struct skcipher_request *req)
 {
 	int error;
 
@@ -2709,7 +2700,7 @@ err:
 }
 
 void chcr_cipher_dma_unmap(struct device *dev,
-			   struct ablkcipher_request *req)
+			   struct skcipher_request *req)
 {
 	if (req->src == req->dst) {
 		dma_unmap_sg(dev, req->src, sg_nents(req->src),
@@ -3194,9 +3185,6 @@ static int chcr_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
 		aeadctx->mayverify = VERIFY_SW;
 		break;
 	default:
-
-		  crypto_tfm_set_flags((struct crypto_tfm *) tfm,
-			CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize);
@@ -3221,8 +3209,6 @@ static int chcr_4106_4309_setauthsize(struct crypto_aead *tfm,
 		aeadctx->mayverify = VERIFY_HW;
 		break;
 	default:
-		crypto_tfm_set_flags((struct crypto_tfm *)tfm,
-				     CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize);
@@ -3263,8 +3249,6 @@ static int chcr_ccm_setauthsize(struct crypto_aead *tfm,
 		aeadctx->mayverify = VERIFY_HW;
 		break;
 	default:
-		crypto_tfm_set_flags((struct crypto_tfm *)tfm,
-				     CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize);
@@ -3289,8 +3273,6 @@ static int chcr_ccm_common_setkey(struct crypto_aead *aead,
 		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
 		mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256;
 	} else {
-		crypto_tfm_set_flags((struct crypto_tfm *)aead,
-				     CRYPTO_TFM_RES_BAD_KEY_LEN);
 		aeadctx->enckey_len = 0;
 		return	-EINVAL;
 	}
@@ -3313,9 +3295,6 @@ static int chcr_aead_ccm_setkey(struct crypto_aead *aead,
 	crypto_aead_set_flags(aeadctx->sw_cipher, crypto_aead_get_flags(aead) &
 			      CRYPTO_TFM_REQ_MASK);
 	error = crypto_aead_setkey(aeadctx->sw_cipher, key, keylen);
-	crypto_aead_clear_flags(aead, CRYPTO_TFM_RES_MASK);
-	crypto_aead_set_flags(aead, crypto_aead_get_flags(aeadctx->sw_cipher) &
-			      CRYPTO_TFM_RES_MASK);
 	if (error)
 		return error;
 	return chcr_ccm_common_setkey(aead, key, keylen);
@@ -3328,8 +3307,6 @@ static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key,
 	int error;
 
 	if (keylen < 3) {
-		crypto_tfm_set_flags((struct crypto_tfm *)aead,
-				     CRYPTO_TFM_RES_BAD_KEY_LEN);
 		aeadctx->enckey_len = 0;
 		return	-EINVAL;
 	}
@@ -3337,9 +3314,6 @@ static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key,
 	crypto_aead_set_flags(aeadctx->sw_cipher, crypto_aead_get_flags(aead) &
 			      CRYPTO_TFM_REQ_MASK);
 	error = crypto_aead_setkey(aeadctx->sw_cipher, key, keylen);
-	crypto_aead_clear_flags(aead, CRYPTO_TFM_RES_MASK);
-	crypto_aead_set_flags(aead, crypto_aead_get_flags(aeadctx->sw_cipher) &
-			      CRYPTO_TFM_RES_MASK);
 	if (error)
 		return error;
 	keylen -= 3;
@@ -3361,9 +3335,6 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 	crypto_aead_set_flags(aeadctx->sw_cipher, crypto_aead_get_flags(aead)
 			      & CRYPTO_TFM_REQ_MASK);
 	ret = crypto_aead_setkey(aeadctx->sw_cipher, key, keylen);
-	crypto_aead_clear_flags(aead, CRYPTO_TFM_RES_MASK);
-	crypto_aead_set_flags(aead, crypto_aead_get_flags(aeadctx->sw_cipher) &
-			      CRYPTO_TFM_RES_MASK);
 	if (ret)
 		goto out;
 
@@ -3379,8 +3350,6 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 	} else if (keylen == AES_KEYSIZE_256) {
 		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
 	} else {
-		crypto_tfm_set_flags((struct crypto_tfm *)aead,
-				     CRYPTO_TFM_RES_BAD_KEY_LEN);
 		pr_err("GCM: Invalid key length %d\n", keylen);
 		ret = -EINVAL;
 		goto out;
@@ -3431,16 +3400,11 @@ static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 	crypto_aead_set_flags(aeadctx->sw_cipher, crypto_aead_get_flags(authenc)
 			      & CRYPTO_TFM_REQ_MASK);
 	err = crypto_aead_setkey(aeadctx->sw_cipher, key, keylen);
-	crypto_aead_clear_flags(authenc, CRYPTO_TFM_RES_MASK);
-	crypto_aead_set_flags(authenc, crypto_aead_get_flags(aeadctx->sw_cipher)
-			      & CRYPTO_TFM_RES_MASK);
 	if (err)
 		goto out;
 
-	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) {
-		crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
 		goto out;
-	}
 
 	if (get_alg_config(&param, max_authsize)) {
 		pr_err("chcr : Unsupported digest size\n");
@@ -3561,16 +3525,12 @@ static int chcr_aead_digest_null_setkey(struct crypto_aead *authenc,
 	crypto_aead_set_flags(aeadctx->sw_cipher, crypto_aead_get_flags(authenc)
 			      & CRYPTO_TFM_REQ_MASK);
 	err = crypto_aead_setkey(aeadctx->sw_cipher, key, keylen);
-	crypto_aead_clear_flags(authenc, CRYPTO_TFM_RES_MASK);
-	crypto_aead_set_flags(authenc, crypto_aead_get_flags(aeadctx->sw_cipher)
-			      & CRYPTO_TFM_RES_MASK);
 	if (err)
 		goto out;
 
-	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) {
-		crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
 		goto out;
-	}
+
 	subtype = get_aead_subtype(authenc);
 	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_SHA ||
 	    subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL) {
@@ -3712,82 +3672,76 @@ static int chcr_aead_decrypt(struct aead_request *req)
 static struct chcr_alg_template driver_algs[] = {
 	/* AES-CBC */
 	{
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_SUB_TYPE_CBC,
+		.type = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_SUB_TYPE_CBC,
 		.is_registered = 0,
-		.alg.crypto = {
-			.cra_name		= "cbc(aes)",
-			.cra_driver_name	= "cbc-aes-chcr",
-			.cra_blocksize		= AES_BLOCK_SIZE,
-			.cra_init		= chcr_cra_init,
-			.cra_exit		= chcr_cra_exit,
-			.cra_u.ablkcipher	= {
-				.min_keysize	= AES_MIN_KEY_SIZE,
-				.max_keysize	= AES_MAX_KEY_SIZE,
-				.ivsize		= AES_BLOCK_SIZE,
-				.setkey			= chcr_aes_cbc_setkey,
-				.encrypt		= chcr_aes_encrypt,
-				.decrypt		= chcr_aes_decrypt,
+		.alg.skcipher = {
+			.base.cra_name		= "cbc(aes)",
+			.base.cra_driver_name	= "cbc-aes-chcr",
+			.base.cra_blocksize	= AES_BLOCK_SIZE,
+
+			.init			= chcr_init_tfm,
+			.exit			= chcr_exit_tfm,
+			.min_keysize		= AES_MIN_KEY_SIZE,
+			.max_keysize		= AES_MAX_KEY_SIZE,
+			.ivsize			= AES_BLOCK_SIZE,
+			.setkey			= chcr_aes_cbc_setkey,
+			.encrypt		= chcr_aes_encrypt,
+			.decrypt		= chcr_aes_decrypt,
 			}
-		}
 	},
 	{
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_SUB_TYPE_XTS,
+		.type = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_SUB_TYPE_XTS,
 		.is_registered = 0,
-		.alg.crypto =   {
-			.cra_name		= "xts(aes)",
-			.cra_driver_name	= "xts-aes-chcr",
-			.cra_blocksize		= AES_BLOCK_SIZE,
-			.cra_init		= chcr_cra_init,
-			.cra_exit		= NULL,
-			.cra_u .ablkcipher = {
-					.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-					.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-					.ivsize		= AES_BLOCK_SIZE,
-					.setkey		= chcr_aes_xts_setkey,
-					.encrypt	= chcr_aes_encrypt,
-					.decrypt	= chcr_aes_decrypt,
-				}
+		.alg.skcipher = {
+			.base.cra_name		= "xts(aes)",
+			.base.cra_driver_name	= "xts-aes-chcr",
+			.base.cra_blocksize	= AES_BLOCK_SIZE,
+
+			.init			= chcr_init_tfm,
+			.exit			= chcr_exit_tfm,
+			.min_keysize		= 2 * AES_MIN_KEY_SIZE,
+			.max_keysize		= 2 * AES_MAX_KEY_SIZE,
+			.ivsize			= AES_BLOCK_SIZE,
+			.setkey			= chcr_aes_xts_setkey,
+			.encrypt		= chcr_aes_encrypt,
+			.decrypt		= chcr_aes_decrypt,
 			}
 	},
 	{
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_SUB_TYPE_CTR,
+		.type = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_SUB_TYPE_CTR,
 		.is_registered = 0,
-		.alg.crypto = {
-			.cra_name		= "ctr(aes)",
-			.cra_driver_name	= "ctr-aes-chcr",
-			.cra_blocksize		= 1,
-			.cra_init		= chcr_cra_init,
-			.cra_exit		= chcr_cra_exit,
-			.cra_u.ablkcipher	= {
-				.min_keysize	= AES_MIN_KEY_SIZE,
-				.max_keysize	= AES_MAX_KEY_SIZE,
-				.ivsize		= AES_BLOCK_SIZE,
-				.setkey		= chcr_aes_ctr_setkey,
-				.encrypt	= chcr_aes_encrypt,
-				.decrypt	= chcr_aes_decrypt,
-			}
+		.alg.skcipher = {
+			.base.cra_name		= "ctr(aes)",
+			.base.cra_driver_name	= "ctr-aes-chcr",
+			.base.cra_blocksize	= 1,
+
+			.init			= chcr_init_tfm,
+			.exit			= chcr_exit_tfm,
+			.min_keysize		= AES_MIN_KEY_SIZE,
+			.max_keysize		= AES_MAX_KEY_SIZE,
+			.ivsize			= AES_BLOCK_SIZE,
+			.setkey			= chcr_aes_ctr_setkey,
+			.encrypt		= chcr_aes_encrypt,
+			.decrypt		= chcr_aes_decrypt,
 		}
 	},
 	{
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER |
+		.type = CRYPTO_ALG_TYPE_SKCIPHER |
 			CRYPTO_ALG_SUB_TYPE_CTR_RFC3686,
 		.is_registered = 0,
-		.alg.crypto = {
-			.cra_name		= "rfc3686(ctr(aes))",
-			.cra_driver_name	= "rfc3686-ctr-aes-chcr",
-			.cra_blocksize		= 1,
-			.cra_init		= chcr_rfc3686_init,
-			.cra_exit		= chcr_cra_exit,
-			.cra_u.ablkcipher	= {
-				.min_keysize	= AES_MIN_KEY_SIZE +
-					CTR_RFC3686_NONCE_SIZE,
-				.max_keysize	= AES_MAX_KEY_SIZE +
-					CTR_RFC3686_NONCE_SIZE,
-				.ivsize		= CTR_RFC3686_IV_SIZE,
-				.setkey		= chcr_aes_rfc3686_setkey,
-				.encrypt	= chcr_aes_encrypt,
-				.decrypt	= chcr_aes_decrypt,
-			}
+		.alg.skcipher = {
+			.base.cra_name		= "rfc3686(ctr(aes))",
+			.base.cra_driver_name	= "rfc3686-ctr-aes-chcr",
+			.base.cra_blocksize	= 1,
+
+			.init			= chcr_rfc3686_init,
+			.exit			= chcr_exit_tfm,
+			.min_keysize		= AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+			.max_keysize		= AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+			.ivsize			= CTR_RFC3686_IV_SIZE,
+			.setkey			= chcr_aes_rfc3686_setkey,
+			.encrypt		= chcr_aes_encrypt,
+			.decrypt		= chcr_aes_decrypt,
 		}
 	},
 	/* SHA */
@@ -4254,10 +4208,10 @@ static int chcr_unregister_alg(void)
 
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
 		switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) {
-		case CRYPTO_ALG_TYPE_ABLKCIPHER:
+		case CRYPTO_ALG_TYPE_SKCIPHER:
 			if (driver_algs[i].is_registered)
-				crypto_unregister_alg(
-						&driver_algs[i].alg.crypto);
+				crypto_unregister_skcipher(
+						&driver_algs[i].alg.skcipher);
 			break;
 		case CRYPTO_ALG_TYPE_AEAD:
 			if (driver_algs[i].is_registered)
@@ -4293,21 +4247,20 @@ static int chcr_register_alg(void)
 		if (driver_algs[i].is_registered)
 			continue;
 		switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) {
-		case CRYPTO_ALG_TYPE_ABLKCIPHER:
-			driver_algs[i].alg.crypto.cra_priority =
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			driver_algs[i].alg.skcipher.base.cra_priority =
 				CHCR_CRA_PRIORITY;
-			driver_algs[i].alg.crypto.cra_module = THIS_MODULE;
-			driver_algs[i].alg.crypto.cra_flags =
-				CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
+			driver_algs[i].alg.skcipher.base.cra_module = THIS_MODULE;
+			driver_algs[i].alg.skcipher.base.cra_flags =
+				CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC |
 				CRYPTO_ALG_NEED_FALLBACK;
-			driver_algs[i].alg.crypto.cra_ctxsize =
+			driver_algs[i].alg.skcipher.base.cra_ctxsize =
 				sizeof(struct chcr_context) +
 				sizeof(struct ablk_ctx);
-			driver_algs[i].alg.crypto.cra_alignmask = 0;
-			driver_algs[i].alg.crypto.cra_type =
-				&crypto_ablkcipher_type;
-			err = crypto_register_alg(&driver_algs[i].alg.crypto);
-			name = driver_algs[i].alg.crypto.cra_driver_name;
+			driver_algs[i].alg.skcipher.base.cra_alignmask = 0;
+
+			err = crypto_register_skcipher(&driver_algs[i].alg.skcipher);
+			name = driver_algs[i].alg.skcipher.base.cra_driver_name;
 			break;
 		case CRYPTO_ALG_TYPE_AEAD:
 			driver_algs[i].alg.aead.base.cra_flags =
diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
index d1e6b51df0ce..f58c2b5c7fc5 100644
--- a/drivers/crypto/chelsio/chcr_algo.h
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -287,7 +287,7 @@ struct hash_wr_param {
 };
 
 struct cipher_wr_param {
-	struct ablkcipher_request *req;
+	struct skcipher_request *req;
 	char *iv;
 	int bytes;
 	unsigned short qid;
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index 029a7354f541..e937605670ac 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -132,8 +132,6 @@ static void chcr_dev_init(struct uld_ctx *u_ctx)
 
 static int chcr_dev_move(struct uld_ctx *u_ctx)
 {
-	struct adapter *adap;
-
 	 mutex_lock(&drv_data.drv_mutex);
 	if (drv_data.last_dev == u_ctx) {
 		if (list_is_last(&drv_data.last_dev->entry, &drv_data.act_dev))
@@ -146,8 +144,6 @@ static int chcr_dev_move(struct uld_ctx *u_ctx)
 	list_move(&u_ctx->entry, &drv_data.inact_dev);
 	if (list_empty(&drv_data.act_dev))
 		drv_data.last_dev = NULL;
-	adap = padap(&u_ctx->dev);
-	memset(&adap->chcr_stats, 0, sizeof(adap->chcr_stats));
 	atomic_dec(&drv_data.dev_count);
 	mutex_unlock(&drv_data.drv_mutex);
 
@@ -299,17 +295,21 @@ static int __init chcr_crypto_init(void)
 static void __exit chcr_crypto_exit(void)
 {
 	struct uld_ctx *u_ctx, *tmp;
+	struct adapter *adap;
 
 	stop_crypto();
-
 	cxgb4_unregister_uld(CXGB4_ULD_CRYPTO);
 	/* Remove all devices from list */
 	mutex_lock(&drv_data.drv_mutex);
 	list_for_each_entry_safe(u_ctx, tmp, &drv_data.act_dev, entry) {
+		adap = padap(&u_ctx->dev);
+		memset(&adap->chcr_stats, 0, sizeof(adap->chcr_stats));
 		list_del(&u_ctx->entry);
 		kfree(u_ctx);
 	}
 	list_for_each_entry_safe(u_ctx, tmp, &drv_data.inact_dev, entry) {
+		adap = padap(&u_ctx->dev);
+		memset(&adap->chcr_stats, 0, sizeof(adap->chcr_stats));
 		list_del(&u_ctx->entry);
 		kfree(u_ctx);
 	}
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 993c97e70565..6db2df8c8a05 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -160,9 +160,9 @@ static inline struct chcr_context *a_ctx(struct crypto_aead *tfm)
 	return crypto_aead_ctx(tfm);
 }
 
-static inline struct chcr_context *c_ctx(struct crypto_ablkcipher *tfm)
+static inline struct chcr_context *c_ctx(struct crypto_skcipher *tfm)
 {
-	return crypto_ablkcipher_ctx(tfm);
+	return crypto_skcipher_ctx(tfm);
 }
 
 static inline struct chcr_context *h_ctx(struct crypto_ahash *tfm)
@@ -285,7 +285,7 @@ struct chcr_ahash_req_ctx {
 	u8 bfr2[CHCR_HASH_MAX_BLOCK_SIZE_128];
 };
 
-struct chcr_blkcipher_req_ctx {
+struct chcr_skcipher_req_ctx {
 	struct sk_buff *skb;
 	struct scatterlist *dstsg;
 	unsigned int processed;
@@ -302,7 +302,7 @@ struct chcr_alg_template {
 	u32 type;
 	u32 is_registered;
 	union {
-		struct crypto_alg crypto;
+		struct skcipher_alg skcipher;
 		struct ahash_alg hash;
 		struct aead_alg aead;
 	} alg;
@@ -321,12 +321,12 @@ void chcr_add_aead_dst_ent(struct aead_request *req,
 			   struct cpl_rx_phys_dsgl *phys_cpl,
 			   unsigned short qid);
 void chcr_add_aead_src_ent(struct aead_request *req, struct ulptx_sgl *ulptx);
-void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
+void chcr_add_cipher_src_ent(struct skcipher_request *req,
 			     void *ulptx,
 			     struct  cipher_wr_param *wrparam);
-int chcr_cipher_dma_map(struct device *dev, struct ablkcipher_request *req);
-void chcr_cipher_dma_unmap(struct device *dev, struct ablkcipher_request *req);
-void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
+int chcr_cipher_dma_map(struct device *dev, struct skcipher_request *req);
+void chcr_cipher_dma_unmap(struct device *dev, struct skcipher_request *req);
+void chcr_add_cipher_dst_ent(struct skcipher_request *req,
 			     struct cpl_rx_phys_dsgl *phys_cpl,
 			     struct  cipher_wr_param *wrparam,
 			     unsigned short qid);
diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c
index 24355680f30a..9da0f93a330b 100644
--- a/drivers/crypto/chelsio/chcr_ipsec.c
+++ b/drivers/crypto/chelsio/chcr_ipsec.c
@@ -673,16 +673,16 @@ static inline void txq_advance(struct sge_txq *q, unsigned int n)
 int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct xfrm_state *x = xfrm_input_state(skb);
+	unsigned int last_desc, ndesc, flits = 0;
 	struct ipsec_sa_entry *sa_entry;
 	u64 *pos, *end, *before, *sgl;
+	struct tx_sw_desc *sgl_sdesc;
 	int qidx, left, credits;
-	unsigned int flits = 0, ndesc;
-	struct adapter *adap;
+	bool immediate = false;
 	struct sge_eth_txq *q;
+	struct adapter *adap;
 	struct port_info *pi;
-	dma_addr_t addr[MAX_SKB_FRAGS + 1];
 	struct sec_path *sp;
-	bool immediate = false;
 
 	if (!x->xso.offload_handle)
 		return NETDEV_TX_BUSY;
@@ -715,8 +715,14 @@ out_free:       dev_kfree_skb_any(skb);
 		return NETDEV_TX_BUSY;
 	}
 
+	last_desc = q->q.pidx + ndesc - 1;
+	if (last_desc >= q->q.size)
+		last_desc -= q->q.size;
+	sgl_sdesc = &q->q.sdesc[last_desc];
+
 	if (!immediate &&
-	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) {
+	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) {
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
 		q->mapping_err++;
 		goto out_free;
 	}
@@ -742,17 +748,10 @@ out_free:       dev_kfree_skb_any(skb);
 		cxgb4_inline_tx_skb(skb, &q->q, sgl);
 		dev_consume_skb_any(skb);
 	} else {
-		int last_desc;
-
 		cxgb4_write_sgl(skb, &q->q, (void *)sgl, end,
-				0, addr);
+				0, sgl_sdesc->addr);
 		skb_orphan(skb);
-
-		last_desc = q->q.pidx + ndesc - 1;
-		if (last_desc >= q->q.size)
-			last_desc -= q->q.size;
-		q->q.sdesc[last_desc].skb = skb;
-		q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)sgl;
+		sgl_sdesc->skb = skb;
 	}
 	txq_advance(&q->q, ndesc);
 
diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/crypto/chelsio/chtls/chtls.h
index 025c831d0899..459442704eb1 100644
--- a/drivers/crypto/chelsio/chtls/chtls.h
+++ b/drivers/crypto/chelsio/chtls/chtls.h
@@ -21,6 +21,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/tls.h>
 #include <net/tls.h>
+#include <net/tls_toe.h>
 
 #include "t4fw_api.h"
 #include "t4_msg.h"
@@ -118,7 +119,7 @@ struct tls_scmd {
 };
 
 struct chtls_dev {
-	struct tls_device tlsdev;
+	struct tls_toe_device tlsdev;
 	struct list_head list;
 	struct cxgb4_lld_info *lldi;
 	struct pci_dev *pdev;
@@ -178,7 +179,10 @@ struct chtls_hws {
 	u32 copied_seq;
 	u64 tx_seq_no;
 	struct tls_scmd scmd;
-	struct tls12_crypto_info_aes_gcm_128 crypto_info;
+	union {
+		struct tls12_crypto_info_aes_gcm_128 aes_gcm_128;
+		struct tls12_crypto_info_aes_gcm_256 aes_gcm_256;
+	} crypto_info;
 };
 
 struct chtls_sock {
@@ -362,7 +366,7 @@ enum {
 #define TCP_PAGE(sk)   (sk->sk_frag.page)
 #define TCP_OFF(sk)    (sk->sk_frag.offset)
 
-static inline struct chtls_dev *to_chtls_dev(struct tls_device *tlsdev)
+static inline struct chtls_dev *to_chtls_dev(struct tls_toe_device *tlsdev)
 {
 	return container_of(tlsdev, struct chtls_dev, tlsdev);
 }
@@ -481,7 +485,7 @@ int send_tx_flowc_wr(struct sock *sk, int compl,
 void chtls_tcp_push(struct sock *sk, int flags);
 int chtls_push_frames(struct chtls_sock *csk, int comp);
 int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val);
-int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 mode);
+int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 mode, int cipher_type);
 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags);
 unsigned int keyid_to_addr(int start_addr, int keyid);
 void free_tls_keyid(struct sock *sk);
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
index aca75237bbcf..9b2745ad9e38 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
@@ -727,6 +727,14 @@ static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
 	return 0;
 }
 
+static void chtls_purge_wr_queue(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = dequeue_wr(sk)) != NULL)
+		kfree_skb(skb);
+}
+
 static void chtls_release_resources(struct sock *sk)
 {
 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
@@ -741,6 +749,11 @@ static void chtls_release_resources(struct sock *sk)
 	kfree_skb(csk->txdata_skb_cache);
 	csk->txdata_skb_cache = NULL;
 
+	if (csk->wr_credits != csk->wr_max_credits) {
+		chtls_purge_wr_queue(sk);
+		chtls_reset_wr_list(csk);
+	}
+
 	if (csk->l2t_entry) {
 		cxgb4_l2t_release(csk->l2t_entry);
 		csk->l2t_entry = NULL;
@@ -1273,7 +1286,7 @@ static int chtls_pass_accept_req(struct chtls_dev *cdev, struct sk_buff *skb)
 	ctx = (struct listen_ctx *)data;
 	lsk = ctx->lsk;
 
-	if (unlikely(tid >= cdev->tids->ntids)) {
+	if (unlikely(tid_out_of_range(cdev->tids, tid))) {
 		pr_info("passive open TID %u too large\n", tid);
 		return 1;
 	}
@@ -1735,6 +1748,7 @@ static void chtls_peer_close(struct sock *sk, struct sk_buff *skb)
 		else
 			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
 	}
+	kfree_skb(skb);
 }
 
 static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb)
@@ -1815,6 +1829,20 @@ static void send_defer_abort_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
 	kfree_skb(skb);
 }
 
+/*
+ * Add an skb to the deferred skb queue for processing from process context.
+ */
+static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev,
+			   defer_handler_t handler)
+{
+	DEFERRED_SKB_CB(skb)->handler = handler;
+	spin_lock_bh(&cdev->deferq.lock);
+	__skb_queue_tail(&cdev->deferq, skb);
+	if (skb_queue_len(&cdev->deferq) == 1)
+		schedule_work(&cdev->deferq_task);
+	spin_unlock_bh(&cdev->deferq.lock);
+}
+
 static void send_abort_rpl(struct sock *sk, struct sk_buff *skb,
 			   struct chtls_dev *cdev, int status, int queue)
 {
@@ -1829,7 +1857,7 @@ static void send_abort_rpl(struct sock *sk, struct sk_buff *skb,
 
 	if (!reply_skb) {
 		req->status = (queue << 1);
-		send_defer_abort_rpl(cdev, skb);
+		t4_defer_reply(skb, cdev, send_defer_abort_rpl);
 		return;
 	}
 
@@ -1848,20 +1876,6 @@ static void send_abort_rpl(struct sock *sk, struct sk_buff *skb,
 	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
 }
 
-/*
- * Add an skb to the deferred skb queue for processing from process context.
- */
-static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev,
-			   defer_handler_t handler)
-{
-	DEFERRED_SKB_CB(skb)->handler = handler;
-	spin_lock_bh(&cdev->deferq.lock);
-	__skb_queue_tail(&cdev->deferq, skb);
-	if (skb_queue_len(&cdev->deferq) == 1)
-		schedule_work(&cdev->deferq_task);
-	spin_unlock_bh(&cdev->deferq.lock);
-}
-
 static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb,
 				 struct chtls_dev *cdev,
 				 int status, int queue)
@@ -2062,19 +2076,6 @@ rel_skb:
 	return 0;
 }
 
-static struct sk_buff *dequeue_wr(struct sock *sk)
-{
-	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
-	struct sk_buff *skb = csk->wr_skb_head;
-
-	if (likely(skb)) {
-	/* Don't bother clearing the tail */
-		csk->wr_skb_head = WR_SKB_CB(skb)->next_wr;
-		WR_SKB_CB(skb)->next_wr = NULL;
-	}
-	return skb;
-}
-
 static void chtls_rx_ack(struct sock *sk, struct sk_buff *skb)
 {
 	struct cpl_fw4_ack *hdr = cplhdr(skb) + RSS_HDR;
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.h b/drivers/crypto/chelsio/chtls/chtls_cm.h
index 129d7ac649a9..3fac0c74a41f 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.h
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.h
@@ -185,6 +185,12 @@ static inline void chtls_kfree_skb(struct sock *sk, struct sk_buff *skb)
 	kfree_skb(skb);
 }
 
+static inline void chtls_reset_wr_list(struct chtls_sock *csk)
+{
+	csk->wr_skb_head = NULL;
+	csk->wr_skb_tail = NULL;
+}
+
 static inline void enqueue_wr(struct chtls_sock *csk, struct sk_buff *skb)
 {
 	WR_SKB_CB(skb)->next_wr = NULL;
@@ -197,4 +203,19 @@ static inline void enqueue_wr(struct chtls_sock *csk, struct sk_buff *skb)
 		WR_SKB_CB(csk->wr_skb_tail)->next_wr = skb;
 	csk->wr_skb_tail = skb;
 }
+
+static inline struct sk_buff *dequeue_wr(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb = NULL;
+
+	skb = csk->wr_skb_head;
+
+	if (likely(skb)) {
+	 /* Don't bother clearing the tail */
+		csk->wr_skb_head = WR_SKB_CB(skb)->next_wr;
+		WR_SKB_CB(skb)->next_wr = NULL;
+	}
+	return skb;
+}
 #endif
diff --git a/drivers/crypto/chelsio/chtls/chtls_hw.c b/drivers/crypto/chelsio/chtls/chtls_hw.c
index 2a34035d3cfb..f1820aca0d33 100644
--- a/drivers/crypto/chelsio/chtls/chtls_hw.c
+++ b/drivers/crypto/chelsio/chtls/chtls_hw.c
@@ -208,28 +208,53 @@ static void chtls_rxkey_ivauth(struct _key_ctx *kctx)
 
 static int chtls_key_info(struct chtls_sock *csk,
 			  struct _key_ctx *kctx,
-			  u32 keylen, u32 optname)
+			  u32 keylen, u32 optname,
+			  int cipher_type)
 {
-	unsigned char key[AES_KEYSIZE_128];
-	struct tls12_crypto_info_aes_gcm_128 *gcm_ctx;
+	unsigned char key[AES_MAX_KEY_SIZE];
+	unsigned char *key_p, *salt;
 	unsigned char ghash_h[AEAD_H_SIZE];
-	int ck_size, key_ctx_size;
+	int ck_size, key_ctx_size, kctx_mackey_size, salt_size;
 	struct crypto_aes_ctx aes;
 	int ret;
 
-	gcm_ctx = (struct tls12_crypto_info_aes_gcm_128 *)
-		  &csk->tlshws.crypto_info;
-
 	key_ctx_size = sizeof(struct _key_ctx) +
 		       roundup(keylen, 16) + AEAD_H_SIZE;
 
-	if (keylen == AES_KEYSIZE_128) {
-		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
-	} else {
+	/* GCM mode of AES supports 128 and 256 bit encryption, so
+	 * prepare key context base on GCM cipher type
+	 */
+	switch (cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		struct tls12_crypto_info_aes_gcm_128 *gcm_ctx_128 =
+			(struct tls12_crypto_info_aes_gcm_128 *)
+					&csk->tlshws.crypto_info;
+		memcpy(key, gcm_ctx_128->key, keylen);
+
+		key_p            = gcm_ctx_128->key;
+		salt             = gcm_ctx_128->salt;
+		ck_size          = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+		salt_size        = TLS_CIPHER_AES_GCM_128_SALT_SIZE;
+		kctx_mackey_size = CHCR_KEYCTX_MAC_KEY_SIZE_128;
+		break;
+	}
+	case TLS_CIPHER_AES_GCM_256: {
+		struct tls12_crypto_info_aes_gcm_256 *gcm_ctx_256 =
+			(struct tls12_crypto_info_aes_gcm_256 *)
+					&csk->tlshws.crypto_info;
+		memcpy(key, gcm_ctx_256->key, keylen);
+
+		key_p            = gcm_ctx_256->key;
+		salt             = gcm_ctx_256->salt;
+		ck_size          = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+		salt_size        = TLS_CIPHER_AES_GCM_256_SALT_SIZE;
+		kctx_mackey_size = CHCR_KEYCTX_MAC_KEY_SIZE_256;
+		break;
+	}
+	default:
 		pr_err("GCM: Invalid key length %d\n", keylen);
 		return -EINVAL;
 	}
-	memcpy(key, gcm_ctx->key, keylen);
 
 	/* Calculate the H = CIPH(K, 0 repeated 16 times).
 	 * It will go in key context
@@ -249,20 +274,20 @@ static int chtls_key_info(struct chtls_sock *csk,
 
 		key_ctx = ((key_ctx_size >> 4) << 3);
 		kctx->ctx_hdr = FILL_KEY_CRX_HDR(ck_size,
-						 CHCR_KEYCTX_MAC_KEY_SIZE_128,
+						 kctx_mackey_size,
 						 0, 0, key_ctx);
 		chtls_rxkey_ivauth(kctx);
 	} else {
 		kctx->ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
-						 CHCR_KEYCTX_MAC_KEY_SIZE_128,
+						 kctx_mackey_size,
 						 0, 0, key_ctx_size >> 4);
 	}
 
-	memcpy(kctx->salt, gcm_ctx->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
-	memcpy(kctx->key, gcm_ctx->key, keylen);
+	memcpy(kctx->salt, salt, salt_size);
+	memcpy(kctx->key, key_p, keylen);
 	memcpy(kctx->key + keylen, ghash_h, AEAD_H_SIZE);
 	/* erase key info from driver */
-	memset(gcm_ctx->key, 0, keylen);
+	memset(key_p, 0, keylen);
 
 	return 0;
 }
@@ -288,7 +313,8 @@ static void chtls_set_scmd(struct chtls_sock *csk)
 		SCMD_TLS_FRAG_ENABLE_V(1);
 }
 
-int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 optname)
+int chtls_setkey(struct chtls_sock *csk, u32 keylen,
+		 u32 optname, int cipher_type)
 {
 	struct tls_key_req *kwr;
 	struct chtls_dev *cdev;
@@ -350,9 +376,10 @@ int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 optname)
 	kwr->sc_imm.cmd_more = cpu_to_be32(ULPTX_CMD_V(ULP_TX_SC_IMM));
 	kwr->sc_imm.len = cpu_to_be32(klen);
 
+	lock_sock(sk);
 	/* key info */
 	kctx = (struct _key_ctx *)(kwr + 1);
-	ret = chtls_key_info(csk, kctx, keylen, optname);
+	ret = chtls_key_info(csk, kctx, keylen, optname, cipher_type);
 	if (ret)
 		goto out_notcb;
 
@@ -388,8 +415,10 @@ int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 optname)
 		csk->tlshws.txkey = keyid;
 	}
 
+	release_sock(sk);
 	return ret;
 out_notcb:
+	release_sock(sk);
 	free_tls_keyid(sk);
 out_nokey:
 	kfree_skb(skb);
diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
index 98bc5a4cd5e7..5cf9b021220b 100644
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -97,7 +97,7 @@ static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
 	if (!skb)
 		return NULL;
 
-	memcpy(__skb_put(skb, flowclen), flowc, flowclen);
+	__skb_put_data(skb, flowc, flowclen);
 	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
 
 	return skb;
@@ -1437,7 +1437,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 				      csk->wr_max_credits))
 			sk->sk_write_space(sk);
 
-		if (copied >= target && !sk->sk_backlog.tail)
+		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
 			break;
 
 		if (copied) {
@@ -1470,7 +1470,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 				break;
 			}
 		}
-		if (sk->sk_backlog.tail) {
+		if (READ_ONCE(sk->sk_backlog.tail)) {
 			release_sock(sk);
 			lock_sock(sk);
 			chtls_cleanup_rbuf(sk, copied);
@@ -1615,7 +1615,7 @@ static int peekmsg(struct sock *sk, struct msghdr *msg,
 			break;
 		}
 
-		if (sk->sk_backlog.tail) {
+		if (READ_ONCE(sk->sk_backlog.tail)) {
 			/* Do not sleep, just process backlog. */
 			release_sock(sk);
 			lock_sock(sk);
@@ -1743,7 +1743,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 				      csk->wr_max_credits))
 			sk->sk_write_space(sk);
 
-		if (copied >= target && !sk->sk_backlog.tail)
+		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
 			break;
 
 		if (copied) {
@@ -1774,7 +1774,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			}
 		}
 
-		if (sk->sk_backlog.tail) {
+		if (READ_ONCE(sk->sk_backlog.tail)) {
 			release_sock(sk);
 			lock_sock(sk);
 			chtls_cleanup_rbuf(sk, copied);
@@ -1841,8 +1841,7 @@ skip_copy:
 			tp->urg_data = 0;
 
 		if (avail + offset >= skb->len) {
-			if (likely(skb))
-				chtls_free_skb(sk, skb);
+			chtls_free_skb(sk, skb);
 			buffers_freed++;
 
 			if  (copied >= target &&
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index e6df5b95ed47..a038de90b2ea 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -84,7 +84,6 @@ static int listen_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 static int chtls_start_listen(struct chtls_dev *cdev, struct sock *sk)
 {
 	struct chtls_listen *clisten;
-	int err;
 
 	if (sk->sk_protocol != IPPROTO_TCP)
 		return -EPROTONOSUPPORT;
@@ -100,10 +99,10 @@ static int chtls_start_listen(struct chtls_dev *cdev, struct sock *sk)
 	clisten->cdev = cdev;
 	clisten->sk = sk;
 	mutex_lock(&notify_mutex);
-	err = raw_notifier_call_chain(&listen_notify_list,
+	raw_notifier_call_chain(&listen_notify_list,
 				      CHTLS_LISTEN_START, clisten);
 	mutex_unlock(&notify_mutex);
-	return err;
+	return 0;
 }
 
 static void chtls_stop_listen(struct chtls_dev *cdev, struct sock *sk)
@@ -124,7 +123,7 @@ static void chtls_stop_listen(struct chtls_dev *cdev, struct sock *sk)
 	mutex_unlock(&notify_mutex);
 }
 
-static int chtls_inline_feature(struct tls_device *dev)
+static int chtls_inline_feature(struct tls_toe_device *dev)
 {
 	struct net_device *netdev;
 	struct chtls_dev *cdev;
@@ -140,7 +139,7 @@ static int chtls_inline_feature(struct tls_device *dev)
 	return 0;
 }
 
-static int chtls_create_hash(struct tls_device *dev, struct sock *sk)
+static int chtls_create_hash(struct tls_toe_device *dev, struct sock *sk)
 {
 	struct chtls_dev *cdev = to_chtls_dev(dev);
 
@@ -149,7 +148,7 @@ static int chtls_create_hash(struct tls_device *dev, struct sock *sk)
 	return 0;
 }
 
-static void chtls_destroy_hash(struct tls_device *dev, struct sock *sk)
+static void chtls_destroy_hash(struct tls_toe_device *dev, struct sock *sk)
 {
 	struct chtls_dev *cdev = to_chtls_dev(dev);
 
@@ -161,7 +160,7 @@ static void chtls_free_uld(struct chtls_dev *cdev)
 {
 	int i;
 
-	tls_unregister_device(&cdev->tlsdev);
+	tls_toe_unregister_device(&cdev->tlsdev);
 	kvfree(cdev->kmap.addr);
 	idr_destroy(&cdev->hwtid_idr);
 	for (i = 0; i < (1 << RSPQ_HASH_BITS); i++)
@@ -173,27 +172,27 @@ static void chtls_free_uld(struct chtls_dev *cdev)
 
 static inline void chtls_dev_release(struct kref *kref)
 {
+	struct tls_toe_device *dev;
 	struct chtls_dev *cdev;
-	struct tls_device *dev;
 
-	dev = container_of(kref, struct tls_device, kref);
+	dev = container_of(kref, struct tls_toe_device, kref);
 	cdev = to_chtls_dev(dev);
 	chtls_free_uld(cdev);
 }
 
 static void chtls_register_dev(struct chtls_dev *cdev)
 {
-	struct tls_device *tlsdev = &cdev->tlsdev;
+	struct tls_toe_device *tlsdev = &cdev->tlsdev;
 
-	strlcpy(tlsdev->name, "chtls", TLS_DEVICE_NAME_MAX);
+	strlcpy(tlsdev->name, "chtls", TLS_TOE_DEVICE_NAME_MAX);
 	strlcat(tlsdev->name, cdev->lldi->ports[0]->name,
-		TLS_DEVICE_NAME_MAX);
+		TLS_TOE_DEVICE_NAME_MAX);
 	tlsdev->feature = chtls_inline_feature;
 	tlsdev->hash = chtls_create_hash;
 	tlsdev->unhash = chtls_destroy_hash;
 	tlsdev->release = chtls_dev_release;
 	kref_init(&tlsdev->kref);
-	tls_register_device(tlsdev);
+	tls_toe_register_device(tlsdev);
 	cdev->cdev_state = CHTLS_CDEV_STATE_UP;
 }
 
@@ -486,6 +485,7 @@ static int do_chtls_setsockopt(struct sock *sk, int optname,
 	struct tls_crypto_info *crypto_info, tmp_crypto_info;
 	struct chtls_sock *csk;
 	int keylen;
+	int cipher_type;
 	int rc = 0;
 
 	csk = rcu_dereference_sk_user_data(sk);
@@ -509,6 +509,9 @@ static int do_chtls_setsockopt(struct sock *sk, int optname,
 
 	crypto_info = (struct tls_crypto_info *)&csk->tlshws.crypto_info;
 
+	/* GCM mode of AES supports 128 and 256 bit encryption, so
+	 * copy keys from user based on GCM cipher type.
+	 */
 	switch (tmp_crypto_info.cipher_type) {
 	case TLS_CIPHER_AES_GCM_128: {
 		/* Obtain version and type from previous copy */
@@ -525,13 +528,30 @@ static int do_chtls_setsockopt(struct sock *sk, int optname,
 		}
 
 		keylen = TLS_CIPHER_AES_GCM_128_KEY_SIZE;
-		rc = chtls_setkey(csk, keylen, optname);
+		cipher_type = TLS_CIPHER_AES_GCM_128;
+		break;
+	}
+	case TLS_CIPHER_AES_GCM_256: {
+		crypto_info[0] = tmp_crypto_info;
+		rc = copy_from_user((char *)crypto_info + sizeof(*crypto_info),
+				    optval + sizeof(*crypto_info),
+				sizeof(struct tls12_crypto_info_aes_gcm_256)
+				- sizeof(*crypto_info));
+
+		if (rc) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		keylen = TLS_CIPHER_AES_GCM_256_KEY_SIZE;
+		cipher_type = TLS_CIPHER_AES_GCM_256;
 		break;
 	}
 	default:
 		rc = -EINVAL;
 		goto out;
 	}
+	rc = chtls_setkey(csk, keylen, optname, cipher_type);
 out:
 	return rc;
 }
diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c
index d81a1297cb9e..f4f18bfc2247 100644
--- a/drivers/crypto/geode-aes.c
+++ b/drivers/crypto/geode-aes.c
@@ -10,6 +10,7 @@
 #include <linux/spinlock.h>
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
 
 #include <linux/io.h>
 #include <linux/delay.h>
@@ -23,12 +24,12 @@ static spinlock_t lock;
 
 /* Write a 128 bit field (either a writable key or IV) */
 static inline void
-_writefield(u32 offset, void *value)
+_writefield(u32 offset, const void *value)
 {
 	int i;
 
 	for (i = 0; i < 4; i++)
-		iowrite32(((u32 *) value)[i], _iobase + offset + (i * 4));
+		iowrite32(((const u32 *) value)[i], _iobase + offset + (i * 4));
 }
 
 /* Read a 128 bit field (either a writable key or IV) */
@@ -42,12 +43,12 @@ _readfield(u32 offset, void *value)
 }
 
 static int
-do_crypt(void *src, void *dst, int len, u32 flags)
+do_crypt(const void *src, void *dst, u32 len, u32 flags)
 {
 	u32 status;
 	u32 counter = AES_OP_TIMEOUT;
 
-	iowrite32(virt_to_phys(src), _iobase + AES_SOURCEA_REG);
+	iowrite32(virt_to_phys((void *)src), _iobase + AES_SOURCEA_REG);
 	iowrite32(virt_to_phys(dst), _iobase + AES_DSTA_REG);
 	iowrite32(len,  _iobase + AES_LENA_REG);
 
@@ -64,16 +65,14 @@ do_crypt(void *src, void *dst, int len, u32 flags)
 	return counter ? 0 : 1;
 }
 
-static unsigned int
-geode_aes_crypt(struct geode_aes_op *op)
+static void
+geode_aes_crypt(const struct geode_aes_tfm_ctx *tctx, const void *src,
+		void *dst, u32 len, u8 *iv, int mode, int dir)
 {
 	u32 flags = 0;
 	unsigned long iflags;
 	int ret;
 
-	if (op->len == 0)
-		return 0;
-
 	/* If the source and destination is the same, then
 	 * we need to turn on the coherent flags, otherwise
 	 * we don't need to worry
@@ -81,32 +80,28 @@ geode_aes_crypt(struct geode_aes_op *op)
 
 	flags |= (AES_CTRL_DCA | AES_CTRL_SCA);
 
-	if (op->dir == AES_DIR_ENCRYPT)
+	if (dir == AES_DIR_ENCRYPT)
 		flags |= AES_CTRL_ENCRYPT;
 
 	/* Start the critical section */
 
 	spin_lock_irqsave(&lock, iflags);
 
-	if (op->mode == AES_MODE_CBC) {
+	if (mode == AES_MODE_CBC) {
 		flags |= AES_CTRL_CBC;
-		_writefield(AES_WRITEIV0_REG, op->iv);
+		_writefield(AES_WRITEIV0_REG, iv);
 	}
 
-	if (!(op->flags & AES_FLAGS_HIDDENKEY)) {
-		flags |= AES_CTRL_WRKEY;
-		_writefield(AES_WRITEKEY0_REG, op->key);
-	}
+	flags |= AES_CTRL_WRKEY;
+	_writefield(AES_WRITEKEY0_REG, tctx->key);
 
-	ret = do_crypt(op->src, op->dst, op->len, flags);
+	ret = do_crypt(src, dst, len, flags);
 	BUG_ON(ret);
 
-	if (op->mode == AES_MODE_CBC)
-		_readfield(AES_WRITEIV0_REG, op->iv);
+	if (mode == AES_MODE_CBC)
+		_readfield(AES_WRITEIV0_REG, iv);
 
 	spin_unlock_irqrestore(&lock, iflags);
-
-	return op->len;
 }
 
 /* CRYPTO-API Functions */
@@ -114,154 +109,96 @@ geode_aes_crypt(struct geode_aes_op *op)
 static int geode_setkey_cip(struct crypto_tfm *tfm, const u8 *key,
 		unsigned int len)
 {
-	struct geode_aes_op *op = crypto_tfm_ctx(tfm);
-	unsigned int ret;
+	struct geode_aes_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
 
-	op->keylen = len;
+	tctx->keylen = len;
 
 	if (len == AES_KEYSIZE_128) {
-		memcpy(op->key, key, len);
+		memcpy(tctx->key, key, len);
 		return 0;
 	}
 
-	if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256) {
+	if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256)
 		/* not supported at all */
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
-	}
 
 	/*
 	 * The requested key size is not supported by HW, do a fallback
 	 */
-	op->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
-	op->fallback.cip->base.crt_flags |= (tfm->crt_flags & CRYPTO_TFM_REQ_MASK);
+	tctx->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+	tctx->fallback.cip->base.crt_flags |=
+		(tfm->crt_flags & CRYPTO_TFM_REQ_MASK);
 
-	ret = crypto_cipher_setkey(op->fallback.cip, key, len);
-	if (ret) {
-		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-		tfm->crt_flags |= (op->fallback.cip->base.crt_flags & CRYPTO_TFM_RES_MASK);
-	}
-	return ret;
+	return crypto_cipher_setkey(tctx->fallback.cip, key, len);
 }
 
-static int geode_setkey_blk(struct crypto_tfm *tfm, const u8 *key,
-		unsigned int len)
+static int geode_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				 unsigned int len)
 {
-	struct geode_aes_op *op = crypto_tfm_ctx(tfm);
-	unsigned int ret;
+	struct geode_aes_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
 
-	op->keylen = len;
+	tctx->keylen = len;
 
 	if (len == AES_KEYSIZE_128) {
-		memcpy(op->key, key, len);
+		memcpy(tctx->key, key, len);
 		return 0;
 	}
 
-	if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256) {
+	if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256)
 		/* not supported at all */
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
-	}
 
 	/*
 	 * The requested key size is not supported by HW, do a fallback
 	 */
-	op->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
-	op->fallback.blk->base.crt_flags |= (tfm->crt_flags & CRYPTO_TFM_REQ_MASK);
-
-	ret = crypto_blkcipher_setkey(op->fallback.blk, key, len);
-	if (ret) {
-		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-		tfm->crt_flags |= (op->fallback.blk->base.crt_flags & CRYPTO_TFM_RES_MASK);
-	}
-	return ret;
-}
-
-static int fallback_blk_dec(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
-{
-	unsigned int ret;
-	struct crypto_blkcipher *tfm;
-	struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm);
-
-	tfm = desc->tfm;
-	desc->tfm = op->fallback.blk;
-
-	ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes);
-
-	desc->tfm = tfm;
-	return ret;
-}
-static int fallback_blk_enc(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
-{
-	unsigned int ret;
-	struct crypto_blkcipher *tfm;
-	struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm);
-
-	tfm = desc->tfm;
-	desc->tfm = op->fallback.blk;
-
-	ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes);
-
-	desc->tfm = tfm;
-	return ret;
+	crypto_skcipher_clear_flags(tctx->fallback.skcipher,
+				    CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(tctx->fallback.skcipher,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	return crypto_skcipher_setkey(tctx->fallback.skcipher, key, len);
 }
 
 static void
 geode_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct geode_aes_op *op = crypto_tfm_ctx(tfm);
+	const struct geode_aes_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
 
-	if (unlikely(op->keylen != AES_KEYSIZE_128)) {
-		crypto_cipher_encrypt_one(op->fallback.cip, out, in);
+	if (unlikely(tctx->keylen != AES_KEYSIZE_128)) {
+		crypto_cipher_encrypt_one(tctx->fallback.cip, out, in);
 		return;
 	}
 
-	op->src = (void *) in;
-	op->dst = (void *) out;
-	op->mode = AES_MODE_ECB;
-	op->flags = 0;
-	op->len = AES_BLOCK_SIZE;
-	op->dir = AES_DIR_ENCRYPT;
-
-	geode_aes_crypt(op);
+	geode_aes_crypt(tctx, in, out, AES_BLOCK_SIZE, NULL,
+			AES_MODE_ECB, AES_DIR_ENCRYPT);
 }
 
 
 static void
 geode_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct geode_aes_op *op = crypto_tfm_ctx(tfm);
+	const struct geode_aes_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
 
-	if (unlikely(op->keylen != AES_KEYSIZE_128)) {
-		crypto_cipher_decrypt_one(op->fallback.cip, out, in);
+	if (unlikely(tctx->keylen != AES_KEYSIZE_128)) {
+		crypto_cipher_decrypt_one(tctx->fallback.cip, out, in);
 		return;
 	}
 
-	op->src = (void *) in;
-	op->dst = (void *) out;
-	op->mode = AES_MODE_ECB;
-	op->flags = 0;
-	op->len = AES_BLOCK_SIZE;
-	op->dir = AES_DIR_DECRYPT;
-
-	geode_aes_crypt(op);
+	geode_aes_crypt(tctx, in, out, AES_BLOCK_SIZE, NULL,
+			AES_MODE_ECB, AES_DIR_DECRYPT);
 }
 
 static int fallback_init_cip(struct crypto_tfm *tfm)
 {
 	const char *name = crypto_tfm_alg_name(tfm);
-	struct geode_aes_op *op = crypto_tfm_ctx(tfm);
+	struct geode_aes_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
 
-	op->fallback.cip = crypto_alloc_cipher(name, 0,
-					       CRYPTO_ALG_NEED_FALLBACK);
+	tctx->fallback.cip = crypto_alloc_cipher(name, 0,
+						 CRYPTO_ALG_NEED_FALLBACK);
 
-	if (IS_ERR(op->fallback.cip)) {
+	if (IS_ERR(tctx->fallback.cip)) {
 		printk(KERN_ERR "Error allocating fallback algo %s\n", name);
-		return PTR_ERR(op->fallback.cip);
+		return PTR_ERR(tctx->fallback.cip);
 	}
 
 	return 0;
@@ -269,10 +206,9 @@ static int fallback_init_cip(struct crypto_tfm *tfm)
 
 static void fallback_exit_cip(struct crypto_tfm *tfm)
 {
-	struct geode_aes_op *op = crypto_tfm_ctx(tfm);
+	struct geode_aes_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
 
-	crypto_free_cipher(op->fallback.cip);
-	op->fallback.cip = NULL;
+	crypto_free_cipher(tctx->fallback.cip);
 }
 
 static struct crypto_alg geode_alg = {
@@ -285,7 +221,7 @@ static struct crypto_alg geode_alg = {
 	.cra_init			=	fallback_init_cip,
 	.cra_exit			=	fallback_exit_cip,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct geode_aes_op),
+	.cra_ctxsize		=	sizeof(struct geode_aes_tfm_ctx),
 	.cra_module			=	THIS_MODULE,
 	.cra_u				=	{
 		.cipher	=	{
@@ -298,209 +234,126 @@ static struct crypto_alg geode_alg = {
 	}
 };
 
-static int
-geode_cbc_decrypt(struct blkcipher_desc *desc,
-		  struct scatterlist *dst, struct scatterlist *src,
-		  unsigned int nbytes)
+static int geode_init_skcipher(struct crypto_skcipher *tfm)
 {
-	struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	int err, ret;
-
-	if (unlikely(op->keylen != AES_KEYSIZE_128))
-		return fallback_blk_dec(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	op->iv = walk.iv;
-
-	while ((nbytes = walk.nbytes)) {
-		op->src = walk.src.virt.addr,
-		op->dst = walk.dst.virt.addr;
-		op->mode = AES_MODE_CBC;
-		op->len = nbytes - (nbytes % AES_BLOCK_SIZE);
-		op->dir = AES_DIR_DECRYPT;
+	const char *name = crypto_tfm_alg_name(&tfm->base);
+	struct geode_aes_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
 
-		ret = geode_aes_crypt(op);
-
-		nbytes -= ret;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	tctx->fallback.skcipher =
+		crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK |
+				      CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tctx->fallback.skcipher)) {
+		printk(KERN_ERR "Error allocating fallback algo %s\n", name);
+		return PTR_ERR(tctx->fallback.skcipher);
 	}
 
-	return err;
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+				    crypto_skcipher_reqsize(tctx->fallback.skcipher));
+	return 0;
 }
 
-static int
-geode_cbc_encrypt(struct blkcipher_desc *desc,
-		  struct scatterlist *dst, struct scatterlist *src,
-		  unsigned int nbytes)
+static void geode_exit_skcipher(struct crypto_skcipher *tfm)
 {
-	struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	int err, ret;
-
-	if (unlikely(op->keylen != AES_KEYSIZE_128))
-		return fallback_blk_enc(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	op->iv = walk.iv;
-
-	while ((nbytes = walk.nbytes)) {
-		op->src = walk.src.virt.addr,
-		op->dst = walk.dst.virt.addr;
-		op->mode = AES_MODE_CBC;
-		op->len = nbytes - (nbytes % AES_BLOCK_SIZE);
-		op->dir = AES_DIR_ENCRYPT;
-
-		ret = geode_aes_crypt(op);
-		nbytes -= ret;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
+	struct geode_aes_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
 
-	return err;
+	crypto_free_skcipher(tctx->fallback.skcipher);
 }
 
-static int fallback_init_blk(struct crypto_tfm *tfm)
+static int geode_skcipher_crypt(struct skcipher_request *req, int mode, int dir)
 {
-	const char *name = crypto_tfm_alg_name(tfm);
-	struct geode_aes_op *op = crypto_tfm_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct geode_aes_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int err;
+
+	if (unlikely(tctx->keylen != AES_KEYSIZE_128)) {
+		struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+		*subreq = *req;
+		skcipher_request_set_tfm(subreq, tctx->fallback.skcipher);
+		if (dir == AES_DIR_DECRYPT)
+			return crypto_skcipher_decrypt(subreq);
+		else
+			return crypto_skcipher_encrypt(subreq);
+	}
 
-	op->fallback.blk = crypto_alloc_blkcipher(name, 0,
-			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	if (IS_ERR(op->fallback.blk)) {
-		printk(KERN_ERR "Error allocating fallback algo %s\n", name);
-		return PTR_ERR(op->fallback.blk);
+	while ((nbytes = walk.nbytes) != 0) {
+		geode_aes_crypt(tctx, walk.src.virt.addr, walk.dst.virt.addr,
+				round_down(nbytes, AES_BLOCK_SIZE),
+				walk.iv, mode, dir);
+		err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
 	}
 
-	return 0;
+	return err;
 }
 
-static void fallback_exit_blk(struct crypto_tfm *tfm)
+static int geode_cbc_encrypt(struct skcipher_request *req)
 {
-	struct geode_aes_op *op = crypto_tfm_ctx(tfm);
-
-	crypto_free_blkcipher(op->fallback.blk);
-	op->fallback.blk = NULL;
+	return geode_skcipher_crypt(req, AES_MODE_CBC, AES_DIR_ENCRYPT);
 }
 
-static struct crypto_alg geode_cbc_alg = {
-	.cra_name		=	"cbc(aes)",
-	.cra_driver_name	=	"cbc-aes-geode",
-	.cra_priority		=	400,
-	.cra_flags			=	CRYPTO_ALG_TYPE_BLKCIPHER |
-						CRYPTO_ALG_KERN_DRIVER_ONLY |
-						CRYPTO_ALG_NEED_FALLBACK,
-	.cra_init			=	fallback_init_blk,
-	.cra_exit			=	fallback_exit_blk,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct geode_aes_op),
-	.cra_alignmask		=	15,
-	.cra_type			=	&crypto_blkcipher_type,
-	.cra_module			=	THIS_MODULE,
-	.cra_u				=	{
-		.blkcipher	=	{
-			.min_keysize	=	AES_MIN_KEY_SIZE,
-			.max_keysize	=	AES_MAX_KEY_SIZE,
-			.setkey			=	geode_setkey_blk,
-			.encrypt		=	geode_cbc_encrypt,
-			.decrypt		=	geode_cbc_decrypt,
-			.ivsize			=	AES_BLOCK_SIZE,
-		}
-	}
-};
-
-static int
-geode_ecb_decrypt(struct blkcipher_desc *desc,
-		  struct scatterlist *dst, struct scatterlist *src,
-		  unsigned int nbytes)
+static int geode_cbc_decrypt(struct skcipher_request *req)
 {
-	struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	int err, ret;
-
-	if (unlikely(op->keylen != AES_KEYSIZE_128))
-		return fallback_blk_dec(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		op->src = walk.src.virt.addr,
-		op->dst = walk.dst.virt.addr;
-		op->mode = AES_MODE_ECB;
-		op->len = nbytes - (nbytes % AES_BLOCK_SIZE);
-		op->dir = AES_DIR_DECRYPT;
-
-		ret = geode_aes_crypt(op);
-		nbytes -= ret;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
+	return geode_skcipher_crypt(req, AES_MODE_CBC, AES_DIR_DECRYPT);
 }
 
-static int
-geode_ecb_encrypt(struct blkcipher_desc *desc,
-		  struct scatterlist *dst, struct scatterlist *src,
-		  unsigned int nbytes)
+static int geode_ecb_encrypt(struct skcipher_request *req)
 {
-	struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	int err, ret;
-
-	if (unlikely(op->keylen != AES_KEYSIZE_128))
-		return fallback_blk_enc(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		op->src = walk.src.virt.addr,
-		op->dst = walk.dst.virt.addr;
-		op->mode = AES_MODE_ECB;
-		op->len = nbytes - (nbytes % AES_BLOCK_SIZE);
-		op->dir = AES_DIR_ENCRYPT;
-
-		ret = geode_aes_crypt(op);
-		nbytes -= ret;
-		ret =  blkcipher_walk_done(desc, &walk, nbytes);
-	}
+	return geode_skcipher_crypt(req, AES_MODE_ECB, AES_DIR_ENCRYPT);
+}
 
-	return err;
+static int geode_ecb_decrypt(struct skcipher_request *req)
+{
+	return geode_skcipher_crypt(req, AES_MODE_ECB, AES_DIR_DECRYPT);
 }
 
-static struct crypto_alg geode_ecb_alg = {
-	.cra_name			=	"ecb(aes)",
-	.cra_driver_name	=	"ecb-aes-geode",
-	.cra_priority		=	400,
-	.cra_flags			=	CRYPTO_ALG_TYPE_BLKCIPHER |
-						CRYPTO_ALG_KERN_DRIVER_ONLY |
-						CRYPTO_ALG_NEED_FALLBACK,
-	.cra_init			=	fallback_init_blk,
-	.cra_exit			=	fallback_exit_blk,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct geode_aes_op),
-	.cra_alignmask		=	15,
-	.cra_type			=	&crypto_blkcipher_type,
-	.cra_module			=	THIS_MODULE,
-	.cra_u				=	{
-		.blkcipher	=	{
-			.min_keysize	=	AES_MIN_KEY_SIZE,
-			.max_keysize	=	AES_MAX_KEY_SIZE,
-			.setkey			=	geode_setkey_blk,
-			.encrypt		=	geode_ecb_encrypt,
-			.decrypt		=	geode_ecb_decrypt,
-		}
-	}
+static struct skcipher_alg geode_skcipher_algs[] = {
+	{
+		.base.cra_name		= "cbc(aes)",
+		.base.cra_driver_name	= "cbc-aes-geode",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct geode_aes_tfm_ctx),
+		.base.cra_alignmask	= 15,
+		.base.cra_module	= THIS_MODULE,
+		.init			= geode_init_skcipher,
+		.exit			= geode_exit_skcipher,
+		.setkey			= geode_setkey_skcipher,
+		.encrypt		= geode_cbc_encrypt,
+		.decrypt		= geode_cbc_decrypt,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+	}, {
+		.base.cra_name		= "ecb(aes)",
+		.base.cra_driver_name	= "ecb-aes-geode",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct geode_aes_tfm_ctx),
+		.base.cra_alignmask	= 15,
+		.base.cra_module	= THIS_MODULE,
+		.init			= geode_init_skcipher,
+		.exit			= geode_exit_skcipher,
+		.setkey			= geode_setkey_skcipher,
+		.encrypt		= geode_ecb_encrypt,
+		.decrypt		= geode_ecb_decrypt,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+	},
 };
 
 static void geode_aes_remove(struct pci_dev *dev)
 {
 	crypto_unregister_alg(&geode_alg);
-	crypto_unregister_alg(&geode_ecb_alg);
-	crypto_unregister_alg(&geode_cbc_alg);
+	crypto_unregister_skciphers(geode_skcipher_algs,
+				    ARRAY_SIZE(geode_skcipher_algs));
 
 	pci_iounmap(dev, _iobase);
 	_iobase = NULL;
@@ -538,20 +391,14 @@ static int geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	if (ret)
 		goto eiomap;
 
-	ret = crypto_register_alg(&geode_ecb_alg);
+	ret = crypto_register_skciphers(geode_skcipher_algs,
+					ARRAY_SIZE(geode_skcipher_algs));
 	if (ret)
 		goto ealg;
 
-	ret = crypto_register_alg(&geode_cbc_alg);
-	if (ret)
-		goto eecb;
-
 	dev_notice(&dev->dev, "GEODE AES engine enabled.\n");
 	return 0;
 
- eecb:
-	crypto_unregister_alg(&geode_ecb_alg);
-
  ealg:
 	crypto_unregister_alg(&geode_alg);
 
diff --git a/drivers/crypto/geode-aes.h b/drivers/crypto/geode-aes.h
index 5c6e131a8f9d..6d0a0cdc7647 100644
--- a/drivers/crypto/geode-aes.h
+++ b/drivers/crypto/geode-aes.h
@@ -46,21 +46,10 @@
 
 #define AES_OP_TIMEOUT    0x50000
 
-struct geode_aes_op {
-
-	void *src;
-	void *dst;
-
-	u32 mode;
-	u32 dir;
-	u32 flags;
-	int len;
-
+struct geode_aes_tfm_ctx {
 	u8 key[AES_KEYSIZE_128];
-	u8 *iv;
-
 	union {
-		struct crypto_blkcipher *blk;
+		struct crypto_skcipher *skcipher;
 		struct crypto_cipher *cip;
 	} fallback;
 	u32 keylen;
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index a18e62df68d9..354836468c5d 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -22,6 +22,7 @@
 
 #include <crypto/algapi.h>
 #include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
 
 static char hifn_pll_ref[sizeof("extNNN")] = "ext";
 module_param_string(hifn_pll_ref, hifn_pll_ref, sizeof(hifn_pll_ref), 0444);
@@ -596,7 +597,7 @@ struct hifn_crypt_result {
 
 struct hifn_crypto_alg {
 	struct list_head	entry;
-	struct crypto_alg	alg;
+	struct skcipher_alg	alg;
 	struct hifn_device	*dev;
 };
 
@@ -1404,7 +1405,7 @@ static void hifn_cipher_walk_exit(struct hifn_cipher_walk *w)
 	w->num = 0;
 }
 
-static int ablkcipher_add(unsigned int *drestp, struct scatterlist *dst,
+static int skcipher_add(unsigned int *drestp, struct scatterlist *dst,
 		unsigned int size, unsigned int *nbytesp)
 {
 	unsigned int copy, drest = *drestp, nbytes = *nbytesp;
@@ -1433,11 +1434,11 @@ static int ablkcipher_add(unsigned int *drestp, struct scatterlist *dst,
 	return idx;
 }
 
-static int hifn_cipher_walk(struct ablkcipher_request *req,
+static int hifn_cipher_walk(struct skcipher_request *req,
 		struct hifn_cipher_walk *w)
 {
 	struct scatterlist *dst, *t;
-	unsigned int nbytes = req->nbytes, offset, copy, diff;
+	unsigned int nbytes = req->cryptlen, offset, copy, diff;
 	int idx, tidx, err;
 
 	tidx = idx = 0;
@@ -1459,7 +1460,7 @@ static int hifn_cipher_walk(struct ablkcipher_request *req,
 
 			t = &w->cache[idx];
 
-			err = ablkcipher_add(&dlen, dst, slen, &nbytes);
+			err = skcipher_add(&dlen, dst, slen, &nbytes);
 			if (err < 0)
 				return err;
 
@@ -1498,7 +1499,7 @@ static int hifn_cipher_walk(struct ablkcipher_request *req,
 
 				dst = &req->dst[idx];
 
-				err = ablkcipher_add(&dlen, dst, nbytes, &nbytes);
+				err = skcipher_add(&dlen, dst, nbytes, &nbytes);
 				if (err < 0)
 					return err;
 
@@ -1518,13 +1519,13 @@ static int hifn_cipher_walk(struct ablkcipher_request *req,
 	return tidx;
 }
 
-static int hifn_setup_session(struct ablkcipher_request *req)
+static int hifn_setup_session(struct skcipher_request *req)
 {
 	struct hifn_context *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct hifn_request_context *rctx = ablkcipher_request_ctx(req);
+	struct hifn_request_context *rctx = skcipher_request_ctx(req);
 	struct hifn_device *dev = ctx->dev;
 	unsigned long dlen, flags;
-	unsigned int nbytes = req->nbytes, idx = 0;
+	unsigned int nbytes = req->cryptlen, idx = 0;
 	int err = -EINVAL, sg_num;
 	struct scatterlist *dst;
 
@@ -1563,7 +1564,7 @@ static int hifn_setup_session(struct ablkcipher_request *req)
 		goto err_out;
 	}
 
-	err = hifn_setup_dma(dev, ctx, rctx, req->src, req->dst, req->nbytes, req);
+	err = hifn_setup_dma(dev, ctx, rctx, req->src, req->dst, req->cryptlen, req);
 	if (err)
 		goto err_out;
 
@@ -1610,7 +1611,7 @@ static int hifn_start_device(struct hifn_device *dev)
 	return 0;
 }
 
-static int ablkcipher_get(void *saddr, unsigned int *srestp, unsigned int offset,
+static int skcipher_get(void *saddr, unsigned int *srestp, unsigned int offset,
 		struct scatterlist *dst, unsigned int size, unsigned int *nbytesp)
 {
 	unsigned int srest = *srestp, nbytes = *nbytesp, copy;
@@ -1660,12 +1661,12 @@ static inline void hifn_complete_sa(struct hifn_device *dev, int i)
 	BUG_ON(dev->started < 0);
 }
 
-static void hifn_process_ready(struct ablkcipher_request *req, int error)
+static void hifn_process_ready(struct skcipher_request *req, int error)
 {
-	struct hifn_request_context *rctx = ablkcipher_request_ctx(req);
+	struct hifn_request_context *rctx = skcipher_request_ctx(req);
 
 	if (rctx->walk.flags & ASYNC_FLAGS_MISALIGNED) {
-		unsigned int nbytes = req->nbytes;
+		unsigned int nbytes = req->cryptlen;
 		int idx = 0, err;
 		struct scatterlist *dst, *t;
 		void *saddr;
@@ -1688,7 +1689,7 @@ static void hifn_process_ready(struct ablkcipher_request *req, int error)
 
 			saddr = kmap_atomic(sg_page(t));
 
-			err = ablkcipher_get(saddr, &t->length, t->offset,
+			err = skcipher_get(saddr, &t->length, t->offset,
 					dst, nbytes, &nbytes);
 			if (err < 0) {
 				kunmap_atomic(saddr);
@@ -1910,7 +1911,7 @@ static void hifn_flush(struct hifn_device *dev)
 {
 	unsigned long flags;
 	struct crypto_async_request *async_req;
-	struct ablkcipher_request *req;
+	struct skcipher_request *req;
 	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
 	int i;
 
@@ -1926,7 +1927,7 @@ static void hifn_flush(struct hifn_device *dev)
 
 	spin_lock_irqsave(&dev->lock, flags);
 	while ((async_req = crypto_dequeue_request(&dev->queue))) {
-		req = ablkcipher_request_cast(async_req);
+		req = skcipher_request_cast(async_req);
 		spin_unlock_irqrestore(&dev->lock, flags);
 
 		hifn_process_ready(req, -ENODEV);
@@ -1936,14 +1937,14 @@ static void hifn_flush(struct hifn_device *dev)
 	spin_unlock_irqrestore(&dev->lock, flags);
 }
 
-static int hifn_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int hifn_setkey(struct crypto_skcipher *cipher, const u8 *key,
 		unsigned int len)
 {
-	struct hifn_context *ctx = crypto_ablkcipher_ctx(cipher);
+	struct hifn_context *ctx = crypto_skcipher_ctx(cipher);
 	struct hifn_device *dev = ctx->dev;
 	int err;
 
-	err = verify_ablkcipher_des_key(cipher, key);
+	err = verify_skcipher_des_key(cipher, key);
 	if (err)
 		return err;
 
@@ -1955,14 +1956,14 @@ static int hifn_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int hifn_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int hifn_des3_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			    unsigned int len)
 {
-	struct hifn_context *ctx = crypto_ablkcipher_ctx(cipher);
+	struct hifn_context *ctx = crypto_skcipher_ctx(cipher);
 	struct hifn_device *dev = ctx->dev;
 	int err;
 
-	err = verify_ablkcipher_des3_key(cipher, key);
+	err = verify_skcipher_des3_key(cipher, key);
 	if (err)
 		return err;
 
@@ -1974,36 +1975,36 @@ static int hifn_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int hifn_handle_req(struct ablkcipher_request *req)
+static int hifn_handle_req(struct skcipher_request *req)
 {
 	struct hifn_context *ctx = crypto_tfm_ctx(req->base.tfm);
 	struct hifn_device *dev = ctx->dev;
 	int err = -EAGAIN;
 
-	if (dev->started + DIV_ROUND_UP(req->nbytes, PAGE_SIZE) <= HIFN_QUEUE_LENGTH)
+	if (dev->started + DIV_ROUND_UP(req->cryptlen, PAGE_SIZE) <= HIFN_QUEUE_LENGTH)
 		err = hifn_setup_session(req);
 
 	if (err == -EAGAIN) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&dev->lock, flags);
-		err = ablkcipher_enqueue_request(&dev->queue, req);
+		err = crypto_enqueue_request(&dev->queue, &req->base);
 		spin_unlock_irqrestore(&dev->lock, flags);
 	}
 
 	return err;
 }
 
-static int hifn_setup_crypto_req(struct ablkcipher_request *req, u8 op,
+static int hifn_setup_crypto_req(struct skcipher_request *req, u8 op,
 		u8 type, u8 mode)
 {
 	struct hifn_context *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct hifn_request_context *rctx = ablkcipher_request_ctx(req);
+	struct hifn_request_context *rctx = skcipher_request_ctx(req);
 	unsigned ivsize;
 
-	ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
+	ivsize = crypto_skcipher_ivsize(crypto_skcipher_reqtfm(req));
 
-	if (req->info && mode != ACRYPTO_MODE_ECB) {
+	if (req->iv && mode != ACRYPTO_MODE_ECB) {
 		if (type == ACRYPTO_TYPE_AES_128)
 			ivsize = HIFN_AES_IV_LENGTH;
 		else if (type == ACRYPTO_TYPE_DES)
@@ -2022,7 +2023,7 @@ static int hifn_setup_crypto_req(struct ablkcipher_request *req, u8 op,
 	rctx->op = op;
 	rctx->mode = mode;
 	rctx->type = type;
-	rctx->iv = req->info;
+	rctx->iv = req->iv;
 	rctx->ivsize = ivsize;
 
 	/*
@@ -2037,7 +2038,7 @@ static int hifn_setup_crypto_req(struct ablkcipher_request *req, u8 op,
 static int hifn_process_queue(struct hifn_device *dev)
 {
 	struct crypto_async_request *async_req, *backlog;
-	struct ablkcipher_request *req;
+	struct skcipher_request *req;
 	unsigned long flags;
 	int err = 0;
 
@@ -2053,7 +2054,7 @@ static int hifn_process_queue(struct hifn_device *dev)
 		if (backlog)
 			backlog->complete(backlog, -EINPROGRESS);
 
-		req = ablkcipher_request_cast(async_req);
+		req = skcipher_request_cast(async_req);
 
 		err = hifn_handle_req(req);
 		if (err)
@@ -2063,7 +2064,7 @@ static int hifn_process_queue(struct hifn_device *dev)
 	return err;
 }
 
-static int hifn_setup_crypto(struct ablkcipher_request *req, u8 op,
+static int hifn_setup_crypto(struct skcipher_request *req, u8 op,
 		u8 type, u8 mode)
 {
 	int err;
@@ -2083,22 +2084,22 @@ static int hifn_setup_crypto(struct ablkcipher_request *req, u8 op,
 /*
  * AES ecryption functions.
  */
-static inline int hifn_encrypt_aes_ecb(struct ablkcipher_request *req)
+static inline int hifn_encrypt_aes_ecb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_AES_128, ACRYPTO_MODE_ECB);
 }
-static inline int hifn_encrypt_aes_cbc(struct ablkcipher_request *req)
+static inline int hifn_encrypt_aes_cbc(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_AES_128, ACRYPTO_MODE_CBC);
 }
-static inline int hifn_encrypt_aes_cfb(struct ablkcipher_request *req)
+static inline int hifn_encrypt_aes_cfb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_AES_128, ACRYPTO_MODE_CFB);
 }
-static inline int hifn_encrypt_aes_ofb(struct ablkcipher_request *req)
+static inline int hifn_encrypt_aes_ofb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_AES_128, ACRYPTO_MODE_OFB);
@@ -2107,22 +2108,22 @@ static inline int hifn_encrypt_aes_ofb(struct ablkcipher_request *req)
 /*
  * AES decryption functions.
  */
-static inline int hifn_decrypt_aes_ecb(struct ablkcipher_request *req)
+static inline int hifn_decrypt_aes_ecb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_AES_128, ACRYPTO_MODE_ECB);
 }
-static inline int hifn_decrypt_aes_cbc(struct ablkcipher_request *req)
+static inline int hifn_decrypt_aes_cbc(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_AES_128, ACRYPTO_MODE_CBC);
 }
-static inline int hifn_decrypt_aes_cfb(struct ablkcipher_request *req)
+static inline int hifn_decrypt_aes_cfb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_AES_128, ACRYPTO_MODE_CFB);
 }
-static inline int hifn_decrypt_aes_ofb(struct ablkcipher_request *req)
+static inline int hifn_decrypt_aes_ofb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_AES_128, ACRYPTO_MODE_OFB);
@@ -2131,22 +2132,22 @@ static inline int hifn_decrypt_aes_ofb(struct ablkcipher_request *req)
 /*
  * DES ecryption functions.
  */
-static inline int hifn_encrypt_des_ecb(struct ablkcipher_request *req)
+static inline int hifn_encrypt_des_ecb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_DES, ACRYPTO_MODE_ECB);
 }
-static inline int hifn_encrypt_des_cbc(struct ablkcipher_request *req)
+static inline int hifn_encrypt_des_cbc(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_DES, ACRYPTO_MODE_CBC);
 }
-static inline int hifn_encrypt_des_cfb(struct ablkcipher_request *req)
+static inline int hifn_encrypt_des_cfb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_DES, ACRYPTO_MODE_CFB);
 }
-static inline int hifn_encrypt_des_ofb(struct ablkcipher_request *req)
+static inline int hifn_encrypt_des_ofb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_DES, ACRYPTO_MODE_OFB);
@@ -2155,22 +2156,22 @@ static inline int hifn_encrypt_des_ofb(struct ablkcipher_request *req)
 /*
  * DES decryption functions.
  */
-static inline int hifn_decrypt_des_ecb(struct ablkcipher_request *req)
+static inline int hifn_decrypt_des_ecb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_DES, ACRYPTO_MODE_ECB);
 }
-static inline int hifn_decrypt_des_cbc(struct ablkcipher_request *req)
+static inline int hifn_decrypt_des_cbc(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_DES, ACRYPTO_MODE_CBC);
 }
-static inline int hifn_decrypt_des_cfb(struct ablkcipher_request *req)
+static inline int hifn_decrypt_des_cfb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_DES, ACRYPTO_MODE_CFB);
 }
-static inline int hifn_decrypt_des_ofb(struct ablkcipher_request *req)
+static inline int hifn_decrypt_des_ofb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_DES, ACRYPTO_MODE_OFB);
@@ -2179,44 +2180,44 @@ static inline int hifn_decrypt_des_ofb(struct ablkcipher_request *req)
 /*
  * 3DES ecryption functions.
  */
-static inline int hifn_encrypt_3des_ecb(struct ablkcipher_request *req)
+static inline int hifn_encrypt_3des_ecb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_3DES, ACRYPTO_MODE_ECB);
 }
-static inline int hifn_encrypt_3des_cbc(struct ablkcipher_request *req)
+static inline int hifn_encrypt_3des_cbc(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_3DES, ACRYPTO_MODE_CBC);
 }
-static inline int hifn_encrypt_3des_cfb(struct ablkcipher_request *req)
+static inline int hifn_encrypt_3des_cfb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_3DES, ACRYPTO_MODE_CFB);
 }
-static inline int hifn_encrypt_3des_ofb(struct ablkcipher_request *req)
+static inline int hifn_encrypt_3des_ofb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_ENCRYPT,
 			ACRYPTO_TYPE_3DES, ACRYPTO_MODE_OFB);
 }
 
 /* 3DES decryption functions. */
-static inline int hifn_decrypt_3des_ecb(struct ablkcipher_request *req)
+static inline int hifn_decrypt_3des_ecb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_3DES, ACRYPTO_MODE_ECB);
 }
-static inline int hifn_decrypt_3des_cbc(struct ablkcipher_request *req)
+static inline int hifn_decrypt_3des_cbc(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_3DES, ACRYPTO_MODE_CBC);
 }
-static inline int hifn_decrypt_3des_cfb(struct ablkcipher_request *req)
+static inline int hifn_decrypt_3des_cfb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_3DES, ACRYPTO_MODE_CFB);
 }
-static inline int hifn_decrypt_3des_ofb(struct ablkcipher_request *req)
+static inline int hifn_decrypt_3des_ofb(struct skcipher_request *req)
 {
 	return hifn_setup_crypto(req, ACRYPTO_OP_DECRYPT,
 			ACRYPTO_TYPE_3DES, ACRYPTO_MODE_OFB);
@@ -2226,16 +2227,16 @@ struct hifn_alg_template {
 	char name[CRYPTO_MAX_ALG_NAME];
 	char drv_name[CRYPTO_MAX_ALG_NAME];
 	unsigned int bsize;
-	struct ablkcipher_alg ablkcipher;
+	struct skcipher_alg skcipher;
 };
 
-static struct hifn_alg_template hifn_alg_templates[] = {
+static const struct hifn_alg_template hifn_alg_templates[] = {
 	/*
 	 * 3DES ECB, CBC, CFB and OFB modes.
 	 */
 	{
 		.name = "cfb(des3_ede)", .drv_name = "cfb-3des", .bsize = 8,
-		.ablkcipher = {
+		.skcipher = {
 			.min_keysize	=	HIFN_3DES_KEY_LENGTH,
 			.max_keysize	=	HIFN_3DES_KEY_LENGTH,
 			.setkey		=	hifn_des3_setkey,
@@ -2245,7 +2246,7 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	},
 	{
 		.name = "ofb(des3_ede)", .drv_name = "ofb-3des", .bsize = 8,
-		.ablkcipher = {
+		.skcipher = {
 			.min_keysize	=	HIFN_3DES_KEY_LENGTH,
 			.max_keysize	=	HIFN_3DES_KEY_LENGTH,
 			.setkey		=	hifn_des3_setkey,
@@ -2255,7 +2256,7 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	},
 	{
 		.name = "cbc(des3_ede)", .drv_name = "cbc-3des", .bsize = 8,
-		.ablkcipher = {
+		.skcipher = {
 			.ivsize		=	HIFN_IV_LENGTH,
 			.min_keysize	=	HIFN_3DES_KEY_LENGTH,
 			.max_keysize	=	HIFN_3DES_KEY_LENGTH,
@@ -2266,7 +2267,7 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	},
 	{
 		.name = "ecb(des3_ede)", .drv_name = "ecb-3des", .bsize = 8,
-		.ablkcipher = {
+		.skcipher = {
 			.min_keysize	=	HIFN_3DES_KEY_LENGTH,
 			.max_keysize	=	HIFN_3DES_KEY_LENGTH,
 			.setkey		=	hifn_des3_setkey,
@@ -2280,7 +2281,7 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	 */
 	{
 		.name = "cfb(des)", .drv_name = "cfb-des", .bsize = 8,
-		.ablkcipher = {
+		.skcipher = {
 			.min_keysize	=	HIFN_DES_KEY_LENGTH,
 			.max_keysize	=	HIFN_DES_KEY_LENGTH,
 			.setkey		=	hifn_setkey,
@@ -2290,7 +2291,7 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	},
 	{
 		.name = "ofb(des)", .drv_name = "ofb-des", .bsize = 8,
-		.ablkcipher = {
+		.skcipher = {
 			.min_keysize	=	HIFN_DES_KEY_LENGTH,
 			.max_keysize	=	HIFN_DES_KEY_LENGTH,
 			.setkey		=	hifn_setkey,
@@ -2300,7 +2301,7 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	},
 	{
 		.name = "cbc(des)", .drv_name = "cbc-des", .bsize = 8,
-		.ablkcipher = {
+		.skcipher = {
 			.ivsize		=	HIFN_IV_LENGTH,
 			.min_keysize	=	HIFN_DES_KEY_LENGTH,
 			.max_keysize	=	HIFN_DES_KEY_LENGTH,
@@ -2311,7 +2312,7 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	},
 	{
 		.name = "ecb(des)", .drv_name = "ecb-des", .bsize = 8,
-		.ablkcipher = {
+		.skcipher = {
 			.min_keysize	=	HIFN_DES_KEY_LENGTH,
 			.max_keysize	=	HIFN_DES_KEY_LENGTH,
 			.setkey		=	hifn_setkey,
@@ -2325,7 +2326,7 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	 */
 	{
 		.name = "ecb(aes)", .drv_name = "ecb-aes", .bsize = 16,
-		.ablkcipher = {
+		.skcipher = {
 			.min_keysize	=	AES_MIN_KEY_SIZE,
 			.max_keysize	=	AES_MAX_KEY_SIZE,
 			.setkey		=	hifn_setkey,
@@ -2335,7 +2336,7 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	},
 	{
 		.name = "cbc(aes)", .drv_name = "cbc-aes", .bsize = 16,
-		.ablkcipher = {
+		.skcipher = {
 			.ivsize		=	HIFN_AES_IV_LENGTH,
 			.min_keysize	=	AES_MIN_KEY_SIZE,
 			.max_keysize	=	AES_MAX_KEY_SIZE,
@@ -2346,7 +2347,7 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	},
 	{
 		.name = "cfb(aes)", .drv_name = "cfb-aes", .bsize = 16,
-		.ablkcipher = {
+		.skcipher = {
 			.min_keysize	=	AES_MIN_KEY_SIZE,
 			.max_keysize	=	AES_MAX_KEY_SIZE,
 			.setkey		=	hifn_setkey,
@@ -2356,7 +2357,7 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	},
 	{
 		.name = "ofb(aes)", .drv_name = "ofb-aes", .bsize = 16,
-		.ablkcipher = {
+		.skcipher = {
 			.min_keysize	=	AES_MIN_KEY_SIZE,
 			.max_keysize	=	AES_MAX_KEY_SIZE,
 			.setkey		=	hifn_setkey,
@@ -2366,18 +2367,19 @@ static struct hifn_alg_template hifn_alg_templates[] = {
 	},
 };
 
-static int hifn_cra_init(struct crypto_tfm *tfm)
+static int hifn_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct hifn_crypto_alg *ha = crypto_alg_to_hifn(alg);
-	struct hifn_context *ctx = crypto_tfm_ctx(tfm);
+	struct hifn_context *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->dev = ha->dev;
-	tfm->crt_ablkcipher.reqsize = sizeof(struct hifn_request_context);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct hifn_request_context));
+
 	return 0;
 }
 
-static int hifn_alg_alloc(struct hifn_device *dev, struct hifn_alg_template *t)
+static int hifn_alg_alloc(struct hifn_device *dev, const struct hifn_alg_template *t)
 {
 	struct hifn_crypto_alg *alg;
 	int err;
@@ -2386,26 +2388,25 @@ static int hifn_alg_alloc(struct hifn_device *dev, struct hifn_alg_template *t)
 	if (!alg)
 		return -ENOMEM;
 
-	snprintf(alg->alg.cra_name, CRYPTO_MAX_ALG_NAME, "%s", t->name);
-	snprintf(alg->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s-%s",
+	alg->alg = t->skcipher;
+	alg->alg.init = hifn_init_tfm;
+
+	snprintf(alg->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", t->name);
+	snprintf(alg->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s-%s",
 		 t->drv_name, dev->name);
 
-	alg->alg.cra_priority = 300;
-	alg->alg.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-				CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC;
-	alg->alg.cra_blocksize = t->bsize;
-	alg->alg.cra_ctxsize = sizeof(struct hifn_context);
-	alg->alg.cra_alignmask = 0;
-	alg->alg.cra_type = &crypto_ablkcipher_type;
-	alg->alg.cra_module = THIS_MODULE;
-	alg->alg.cra_u.ablkcipher = t->ablkcipher;
-	alg->alg.cra_init = hifn_cra_init;
+	alg->alg.base.cra_priority = 300;
+	alg->alg.base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC;
+	alg->alg.base.cra_blocksize = t->bsize;
+	alg->alg.base.cra_ctxsize = sizeof(struct hifn_context);
+	alg->alg.base.cra_alignmask = 0;
+	alg->alg.base.cra_module = THIS_MODULE;
 
 	alg->dev = dev;
 
 	list_add_tail(&alg->entry, &dev->alg_list);
 
-	err = crypto_register_alg(&alg->alg);
+	err = crypto_register_skcipher(&alg->alg);
 	if (err) {
 		list_del(&alg->entry);
 		kfree(alg);
@@ -2420,7 +2421,7 @@ static void hifn_unregister_alg(struct hifn_device *dev)
 
 	list_for_each_entry_safe(a, n, &dev->alg_list, entry) {
 		list_del(&a->entry);
-		crypto_unregister_alg(&a->alg);
+		crypto_unregister_skcipher(&a->alg);
 		kfree(a);
 	}
 }
@@ -2506,7 +2507,7 @@ static int hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		addr = pci_resource_start(pdev, i);
 		size = pci_resource_len(pdev, i);
 
-		dev->bar[i] = ioremap_nocache(addr, size);
+		dev->bar[i] = ioremap(addr, size);
 		if (!dev->bar[i]) {
 			err = -ENOMEM;
 			goto err_out_unmap_bars;
diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig
index ebaf91e0146d..8851161f722f 100644
--- a/drivers/crypto/hisilicon/Kconfig
+++ b/drivers/crypto/hisilicon/Kconfig
@@ -2,7 +2,7 @@
 
 config CRYPTO_DEV_HISI_SEC
 	tristate "Support for Hisilicon SEC crypto block cipher accelerator"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_ALGAPI
 	select CRYPTO_LIB_DES
 	select SG_SPLIT
@@ -14,26 +14,52 @@ config CRYPTO_DEV_HISI_SEC
 	  To compile this as a module, choose M here: the module
 	  will be called hisi_sec.
 
+config CRYPTO_DEV_HISI_SEC2
+	tristate "Support for HiSilicon SEC2 crypto block cipher accelerator"
+	select CRYPTO_SKCIPHER
+	select CRYPTO_ALGAPI
+	select CRYPTO_LIB_DES
+	select CRYPTO_DEV_HISI_QM
+	select CRYPTO_AEAD
+	select CRYPTO_AUTHENC
+	select CRYPTO_HMAC
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_SHA512
+	depends on PCI && PCI_MSI
+	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	help
+	  Support for HiSilicon SEC Engine of version 2 in crypto subsystem.
+	  It provides AES, SM4, and 3DES algorithms with ECB
+	  CBC, and XTS cipher mode, and AEAD algorithms.
+
+	  To compile this as a module, choose M here: the module
+          will be called hisi_sec2.
+
 config CRYPTO_DEV_HISI_QM
 	tristate
-	depends on ARM64 && PCI && PCI_MSI
+	depends on ARM64 || COMPILE_TEST
+	depends on PCI && PCI_MSI
 	help
 	  HiSilicon accelerator engines use a common queue management
 	  interface. Specific engine driver may use this module.
 
-config CRYPTO_HISI_SGL
-	tristate
-	depends on ARM64
-	help
-	  HiSilicon accelerator engines use a common hardware scatterlist
-	  interface for data format. Specific engine driver may use this
-	  module.
-
 config CRYPTO_DEV_HISI_ZIP
 	tristate "Support for HiSilicon ZIP accelerator"
-	depends on ARM64 && PCI && PCI_MSI
+	depends on PCI && PCI_MSI
+	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	depends on !CPU_BIG_ENDIAN || COMPILE_TEST
 	select CRYPTO_DEV_HISI_QM
-	select CRYPTO_HISI_SGL
-	select SG_SPLIT
 	help
 	  Support for HiSilicon ZIP Driver
+
+config CRYPTO_DEV_HISI_HPRE
+	tristate "Support for HISI HPRE accelerator"
+	depends on PCI && PCI_MSI
+	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	select CRYPTO_DEV_HISI_QM
+	select CRYPTO_DH
+	select CRYPTO_RSA
+	help
+	  Support for HiSilicon HPRE(High Performance RSA Engine)
+	  accelerator, which can accelerate RSA and DH algorithms.
diff --git a/drivers/crypto/hisilicon/Makefile b/drivers/crypto/hisilicon/Makefile
index 45a279741126..7f5f74c72baa 100644
--- a/drivers/crypto/hisilicon/Makefile
+++ b/drivers/crypto/hisilicon/Makefile
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CRYPTO_DEV_HISI_HPRE) += hpre/
 obj-$(CONFIG_CRYPTO_DEV_HISI_SEC) += sec/
-obj-$(CONFIG_CRYPTO_DEV_HISI_QM) += qm.o
-obj-$(CONFIG_CRYPTO_HISI_SGL) += sgl.o
+obj-$(CONFIG_CRYPTO_DEV_HISI_SEC2) += sec2/
+obj-$(CONFIG_CRYPTO_DEV_HISI_QM) += hisi_qm.o
+hisi_qm-objs = qm.o sgl.o
 obj-$(CONFIG_CRYPTO_DEV_HISI_ZIP) += zip/
diff --git a/drivers/crypto/hisilicon/hpre/Makefile b/drivers/crypto/hisilicon/hpre/Makefile
new file mode 100644
index 000000000000..4fd32b789e1e
--- /dev/null
+++ b/drivers/crypto/hisilicon/hpre/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_HISI_HPRE) += hisi_hpre.o
+hisi_hpre-objs = hpre_main.o hpre_crypto.o
diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h
new file mode 100644
index 000000000000..ddf13ea9862a
--- /dev/null
+++ b/drivers/crypto/hisilicon/hpre/hpre.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 HiSilicon Limited. */
+#ifndef __HISI_HPRE_H
+#define __HISI_HPRE_H
+
+#include <linux/list.h>
+#include "../qm.h"
+
+#define HPRE_SQE_SIZE			sizeof(struct hpre_sqe)
+#define HPRE_PF_DEF_Q_NUM		64
+#define HPRE_PF_DEF_Q_BASE		0
+
+enum {
+	HPRE_CLUSTER0,
+	HPRE_CLUSTER1,
+	HPRE_CLUSTER2,
+	HPRE_CLUSTER3,
+	HPRE_CLUSTERS_NUM,
+};
+
+enum hpre_ctrl_dbgfs_file {
+	HPRE_CURRENT_QM,
+	HPRE_CLEAR_ENABLE,
+	HPRE_CLUSTER_CTRL,
+	HPRE_DEBUG_FILE_NUM,
+};
+
+#define HPRE_DEBUGFS_FILE_NUM    (HPRE_DEBUG_FILE_NUM + HPRE_CLUSTERS_NUM - 1)
+
+struct hpre_debugfs_file {
+	int index;
+	enum hpre_ctrl_dbgfs_file type;
+	spinlock_t lock;
+	struct hpre_debug *debug;
+};
+
+/*
+ * One HPRE controller has one PF and multiple VFs, some global configurations
+ * which PF has need this structure.
+ * Just relevant for PF.
+ */
+struct hpre_debug {
+	struct dentry *debug_root;
+	struct hpre_debugfs_file files[HPRE_DEBUGFS_FILE_NUM];
+};
+
+struct hpre {
+	struct hisi_qm qm;
+	struct list_head list;
+	struct hpre_debug debug;
+	u32 num_vfs;
+	unsigned long status;
+};
+
+enum hpre_alg_type {
+	HPRE_ALG_NC_NCRT = 0x0,
+	HPRE_ALG_NC_CRT = 0x1,
+	HPRE_ALG_KG_STD = 0x2,
+	HPRE_ALG_KG_CRT = 0x3,
+	HPRE_ALG_DH_G2 = 0x4,
+	HPRE_ALG_DH = 0x5,
+};
+
+struct hpre_sqe {
+	__le32 dw0;
+	__u8 task_len1;
+	__u8 task_len2;
+	__u8 mrttest_num;
+	__u8 resv1;
+	__le64 key;
+	__le64 in;
+	__le64 out;
+	__le16 tag;
+	__le16 resv2;
+#define _HPRE_SQE_ALIGN_EXT	7
+	__le32 rsvd1[_HPRE_SQE_ALIGN_EXT];
+};
+
+struct hpre *hpre_find_device(int node);
+int hpre_algs_register(void);
+void hpre_algs_unregister(void);
+
+#endif
diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
new file mode 100644
index 000000000000..5d400d69e8e4
--- /dev/null
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -0,0 +1,1134 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 HiSilicon Limited. */
+#include <crypto/akcipher.h>
+#include <crypto/dh.h>
+#include <crypto/internal/akcipher.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/internal/rsa.h>
+#include <crypto/kpp.h>
+#include <crypto/scatterwalk.h>
+#include <linux/dma-mapping.h>
+#include <linux/fips.h>
+#include <linux/module.h>
+#include "hpre.h"
+
+struct hpre_ctx;
+
+#define HPRE_CRYPTO_ALG_PRI	1000
+#define HPRE_ALIGN_SZ		64
+#define HPRE_BITS_2_BYTES_SHIFT	3
+#define HPRE_RSA_512BITS_KSZ	64
+#define HPRE_RSA_1536BITS_KSZ	192
+#define HPRE_CRT_PRMS		5
+#define HPRE_CRT_Q		2
+#define HPRE_CRT_P		3
+#define HPRE_CRT_INV		4
+#define HPRE_DH_G_FLAG		0x02
+#define HPRE_TRY_SEND_TIMES	100
+#define HPRE_INVLD_REQ_ID		(-1)
+#define HPRE_DEV(ctx)		(&((ctx)->qp->qm->pdev->dev))
+
+#define HPRE_SQE_ALG_BITS	5
+#define HPRE_SQE_DONE_SHIFT	30
+#define HPRE_DH_MAX_P_SZ	512
+
+typedef void (*hpre_cb)(struct hpre_ctx *ctx, void *sqe);
+
+struct hpre_rsa_ctx {
+	/* low address: e--->n */
+	char *pubkey;
+	dma_addr_t dma_pubkey;
+
+	/* low address: d--->n */
+	char *prikey;
+	dma_addr_t dma_prikey;
+
+	/* low address: dq->dp->q->p->qinv */
+	char *crt_prikey;
+	dma_addr_t dma_crt_prikey;
+
+	struct crypto_akcipher *soft_tfm;
+};
+
+struct hpre_dh_ctx {
+	/*
+	 * If base is g we compute the public key
+	 *	ya = g^xa mod p; [RFC2631 sec 2.1.1]
+	 * else if base if the counterpart public key we
+	 * compute the shared secret
+	 *	ZZ = yb^xa mod p; [RFC2631 sec 2.1.1]
+	 */
+	char *xa_p; /* low address: d--->n, please refer to Hisilicon HPRE UM */
+	dma_addr_t dma_xa_p;
+
+	char *g; /* m */
+	dma_addr_t dma_g;
+};
+
+struct hpre_ctx {
+	struct hisi_qp *qp;
+	struct hpre_asym_request **req_list;
+	spinlock_t req_lock;
+	unsigned int key_sz;
+	bool crt_g2_mode;
+	struct idr req_idr;
+	union {
+		struct hpre_rsa_ctx rsa;
+		struct hpre_dh_ctx dh;
+	};
+};
+
+struct hpre_asym_request {
+	char *src;
+	char *dst;
+	struct hpre_sqe req;
+	struct hpre_ctx *ctx;
+	union {
+		struct akcipher_request *rsa;
+		struct kpp_request *dh;
+	} areq;
+	int err;
+	int req_id;
+	hpre_cb cb;
+};
+
+static DEFINE_MUTEX(hpre_alg_lock);
+static unsigned int hpre_active_devs;
+
+static int hpre_alloc_req_id(struct hpre_ctx *ctx)
+{
+	unsigned long flags;
+	int id;
+
+	spin_lock_irqsave(&ctx->req_lock, flags);
+	id = idr_alloc(&ctx->req_idr, NULL, 0, QM_Q_DEPTH, GFP_ATOMIC);
+	spin_unlock_irqrestore(&ctx->req_lock, flags);
+
+	return id;
+}
+
+static void hpre_free_req_id(struct hpre_ctx *ctx, int req_id)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->req_lock, flags);
+	idr_remove(&ctx->req_idr, req_id);
+	spin_unlock_irqrestore(&ctx->req_lock, flags);
+}
+
+static int hpre_add_req_to_ctx(struct hpre_asym_request *hpre_req)
+{
+	struct hpre_ctx *ctx;
+	int id;
+
+	ctx = hpre_req->ctx;
+	id = hpre_alloc_req_id(ctx);
+	if (unlikely(id < 0))
+		return -EINVAL;
+
+	ctx->req_list[id] = hpre_req;
+	hpre_req->req_id = id;
+
+	return id;
+}
+
+static void hpre_rm_req_from_ctx(struct hpre_asym_request *hpre_req)
+{
+	struct hpre_ctx *ctx = hpre_req->ctx;
+	int id = hpre_req->req_id;
+
+	if (hpre_req->req_id >= 0) {
+		hpre_req->req_id = HPRE_INVLD_REQ_ID;
+		ctx->req_list[id] = NULL;
+		hpre_free_req_id(ctx, id);
+	}
+}
+
+static struct hisi_qp *hpre_get_qp_and_start(void)
+{
+	struct hisi_qp *qp;
+	struct hpre *hpre;
+	int ret;
+
+	/* find the proper hpre device, which is near the current CPU core */
+	hpre = hpre_find_device(cpu_to_node(smp_processor_id()));
+	if (!hpre) {
+		pr_err("Can not find proper hpre device!\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	qp = hisi_qm_create_qp(&hpre->qm, 0);
+	if (IS_ERR(qp)) {
+		pci_err(hpre->qm.pdev, "Can not create qp!\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	ret = hisi_qm_start_qp(qp, 0);
+	if (ret < 0) {
+		hisi_qm_release_qp(qp);
+		pci_err(hpre->qm.pdev, "Can not start qp!\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return qp;
+}
+
+static int hpre_get_data_dma_addr(struct hpre_asym_request *hpre_req,
+				  struct scatterlist *data, unsigned int len,
+				  int is_src, dma_addr_t *tmp)
+{
+	struct hpre_ctx *ctx = hpre_req->ctx;
+	struct device *dev = HPRE_DEV(ctx);
+	enum dma_data_direction dma_dir;
+
+	if (is_src) {
+		hpre_req->src = NULL;
+		dma_dir = DMA_TO_DEVICE;
+	} else {
+		hpre_req->dst = NULL;
+		dma_dir = DMA_FROM_DEVICE;
+	}
+	*tmp = dma_map_single(dev, sg_virt(data),
+			      len, dma_dir);
+	if (unlikely(dma_mapping_error(dev, *tmp))) {
+		dev_err(dev, "dma map data err!\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int hpre_prepare_dma_buf(struct hpre_asym_request *hpre_req,
+				struct scatterlist *data, unsigned int len,
+				int is_src, dma_addr_t *tmp)
+{
+	struct hpre_ctx *ctx = hpre_req->ctx;
+	struct device *dev = HPRE_DEV(ctx);
+	void *ptr;
+	int shift;
+
+	shift = ctx->key_sz - len;
+	if (unlikely(shift < 0))
+		return -EINVAL;
+
+	ptr = dma_alloc_coherent(dev, ctx->key_sz, tmp, GFP_KERNEL);
+	if (unlikely(!ptr))
+		return -ENOMEM;
+
+	if (is_src) {
+		scatterwalk_map_and_copy(ptr + shift, data, 0, len, 0);
+		hpre_req->src = ptr;
+	} else {
+		hpre_req->dst = ptr;
+	}
+
+	return 0;
+}
+
+static int hpre_hw_data_init(struct hpre_asym_request *hpre_req,
+			     struct scatterlist *data, unsigned int len,
+			     int is_src, int is_dh)
+{
+	struct hpre_sqe *msg = &hpre_req->req;
+	struct hpre_ctx *ctx = hpre_req->ctx;
+	dma_addr_t tmp = 0;
+	int ret;
+
+	/* when the data is dh's source, we should format it */
+	if ((sg_is_last(data) && len == ctx->key_sz) &&
+	    ((is_dh && !is_src) || !is_dh))
+		ret = hpre_get_data_dma_addr(hpre_req, data, len, is_src, &tmp);
+	else
+		ret = hpre_prepare_dma_buf(hpre_req, data, len,
+					  is_src, &tmp);
+	if (unlikely(ret))
+		return ret;
+
+	if (is_src)
+		msg->in = cpu_to_le64(tmp);
+	else
+		msg->out = cpu_to_le64(tmp);
+
+	return 0;
+}
+
+static void hpre_hw_data_clr_all(struct hpre_ctx *ctx,
+				 struct hpre_asym_request *req,
+				 struct scatterlist *dst,
+				 struct scatterlist *src)
+{
+	struct device *dev = HPRE_DEV(ctx);
+	struct hpre_sqe *sqe = &req->req;
+	dma_addr_t tmp;
+
+	tmp = le64_to_cpu(sqe->in);
+	if (unlikely(!tmp))
+		return;
+
+	if (src) {
+		if (req->src)
+			dma_free_coherent(dev, ctx->key_sz,
+					  req->src, tmp);
+		else
+			dma_unmap_single(dev, tmp,
+					 ctx->key_sz, DMA_TO_DEVICE);
+	}
+
+	tmp = le64_to_cpu(sqe->out);
+	if (unlikely(!tmp))
+		return;
+
+	if (req->dst) {
+		if (dst)
+			scatterwalk_map_and_copy(req->dst, dst, 0,
+						 ctx->key_sz, 1);
+		dma_free_coherent(dev, ctx->key_sz, req->dst, tmp);
+	} else {
+		dma_unmap_single(dev, tmp, ctx->key_sz, DMA_FROM_DEVICE);
+	}
+}
+
+static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe,
+				void **kreq)
+{
+	struct hpre_asym_request *req;
+	int err, id, done;
+
+#define HPRE_NO_HW_ERR		0
+#define HPRE_HW_TASK_DONE	3
+#define HREE_HW_ERR_MASK	0x7ff
+#define HREE_SQE_DONE_MASK	0x3
+	id = (int)le16_to_cpu(sqe->tag);
+	req = ctx->req_list[id];
+	hpre_rm_req_from_ctx(req);
+	*kreq = req;
+
+	err = (le32_to_cpu(sqe->dw0) >> HPRE_SQE_ALG_BITS) &
+		HREE_HW_ERR_MASK;
+
+	done = (le32_to_cpu(sqe->dw0) >> HPRE_SQE_DONE_SHIFT) &
+		HREE_SQE_DONE_MASK;
+
+	if (likely(err == HPRE_NO_HW_ERR && done == HPRE_HW_TASK_DONE))
+		return  0;
+
+	return -EINVAL;
+}
+
+static int hpre_ctx_set(struct hpre_ctx *ctx, struct hisi_qp *qp, int qlen)
+{
+	if (!ctx || !qp || qlen < 0)
+		return -EINVAL;
+
+	spin_lock_init(&ctx->req_lock);
+	ctx->qp = qp;
+
+	ctx->req_list = kcalloc(qlen, sizeof(void *), GFP_KERNEL);
+	if (!ctx->req_list)
+		return -ENOMEM;
+	ctx->key_sz = 0;
+	ctx->crt_g2_mode = false;
+	idr_init(&ctx->req_idr);
+
+	return 0;
+}
+
+static void hpre_ctx_clear(struct hpre_ctx *ctx, bool is_clear_all)
+{
+	if (is_clear_all) {
+		idr_destroy(&ctx->req_idr);
+		kfree(ctx->req_list);
+		hisi_qm_release_qp(ctx->qp);
+	}
+
+	ctx->crt_g2_mode = false;
+	ctx->key_sz = 0;
+}
+
+static void hpre_dh_cb(struct hpre_ctx *ctx, void *resp)
+{
+	struct hpre_asym_request *req;
+	struct kpp_request *areq;
+	int ret;
+
+	ret = hpre_alg_res_post_hf(ctx, resp, (void **)&req);
+	areq = req->areq.dh;
+	areq->dst_len = ctx->key_sz;
+	hpre_hw_data_clr_all(ctx, req, areq->dst, areq->src);
+	kpp_request_complete(areq, ret);
+}
+
+static void hpre_rsa_cb(struct hpre_ctx *ctx, void *resp)
+{
+	struct hpre_asym_request *req;
+	struct akcipher_request *areq;
+	int ret;
+
+	ret = hpre_alg_res_post_hf(ctx, resp, (void **)&req);
+	areq = req->areq.rsa;
+	areq->dst_len = ctx->key_sz;
+	hpre_hw_data_clr_all(ctx, req, areq->dst, areq->src);
+	akcipher_request_complete(areq, ret);
+}
+
+static void hpre_alg_cb(struct hisi_qp *qp, void *resp)
+{
+	struct hpre_ctx *ctx = qp->qp_ctx;
+	struct hpre_sqe *sqe = resp;
+
+	ctx->req_list[le16_to_cpu(sqe->tag)]->cb(ctx, resp);
+}
+
+static int hpre_ctx_init(struct hpre_ctx *ctx)
+{
+	struct hisi_qp *qp;
+
+	qp = hpre_get_qp_and_start();
+	if (IS_ERR(qp))
+		return PTR_ERR(qp);
+
+	qp->qp_ctx = ctx;
+	qp->req_cb = hpre_alg_cb;
+
+	return hpre_ctx_set(ctx, qp, QM_Q_DEPTH);
+}
+
+static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa)
+{
+	struct hpre_asym_request *h_req;
+	struct hpre_sqe *msg;
+	int req_id;
+	void *tmp;
+
+	if (is_rsa) {
+		struct akcipher_request *akreq = req;
+
+		if (akreq->dst_len < ctx->key_sz) {
+			akreq->dst_len = ctx->key_sz;
+			return -EOVERFLOW;
+		}
+
+		tmp = akcipher_request_ctx(akreq);
+		h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+		h_req->cb = hpre_rsa_cb;
+		h_req->areq.rsa = akreq;
+		msg = &h_req->req;
+		memset(msg, 0, sizeof(*msg));
+	} else {
+		struct kpp_request *kreq = req;
+
+		if (kreq->dst_len < ctx->key_sz) {
+			kreq->dst_len = ctx->key_sz;
+			return -EOVERFLOW;
+		}
+
+		tmp = kpp_request_ctx(kreq);
+		h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+		h_req->cb = hpre_dh_cb;
+		h_req->areq.dh = kreq;
+		msg = &h_req->req;
+		memset(msg, 0, sizeof(*msg));
+		msg->key = cpu_to_le64((u64)ctx->dh.dma_xa_p);
+	}
+
+	msg->dw0 |= cpu_to_le32(0x1 << HPRE_SQE_DONE_SHIFT);
+	msg->task_len1 = (ctx->key_sz >> HPRE_BITS_2_BYTES_SHIFT) - 1;
+	h_req->ctx = ctx;
+
+	req_id = hpre_add_req_to_ctx(h_req);
+	if (req_id < 0)
+		return -EBUSY;
+
+	msg->tag = cpu_to_le16((u16)req_id);
+
+	return 0;
+}
+
+#ifdef CONFIG_CRYPTO_DH
+static int hpre_dh_compute_value(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+	void *tmp = kpp_request_ctx(req);
+	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+	struct hpre_sqe *msg = &hpre_req->req;
+	int ctr = 0;
+	int ret;
+
+	ret = hpre_msg_request_set(ctx, req, false);
+	if (unlikely(ret))
+		return ret;
+
+	if (req->src) {
+		ret = hpre_hw_data_init(hpre_req, req->src, req->src_len, 1, 1);
+		if (unlikely(ret))
+			goto clear_all;
+	}
+
+	ret = hpre_hw_data_init(hpre_req, req->dst, req->dst_len, 0, 1);
+	if (unlikely(ret))
+		goto clear_all;
+
+	if (ctx->crt_g2_mode && !req->src)
+		msg->dw0 = cpu_to_le32(le32_to_cpu(msg->dw0) | HPRE_ALG_DH_G2);
+	else
+		msg->dw0 = cpu_to_le32(le32_to_cpu(msg->dw0) | HPRE_ALG_DH);
+	do {
+		ret = hisi_qp_send(ctx->qp, msg);
+	} while (ret == -EBUSY && ctr++ < HPRE_TRY_SEND_TIMES);
+
+	/* success */
+	if (likely(!ret))
+		return -EINPROGRESS;
+
+clear_all:
+	hpre_rm_req_from_ctx(hpre_req);
+	hpre_hw_data_clr_all(ctx, hpre_req, req->dst, req->src);
+
+	return ret;
+}
+
+static int hpre_is_dh_params_length_valid(unsigned int key_sz)
+{
+#define _HPRE_DH_GRP1		768
+#define _HPRE_DH_GRP2		1024
+#define _HPRE_DH_GRP5		1536
+#define _HPRE_DH_GRP14		2048
+#define _HPRE_DH_GRP15		3072
+#define _HPRE_DH_GRP16		4096
+	switch (key_sz) {
+	case _HPRE_DH_GRP1:
+	case _HPRE_DH_GRP2:
+	case _HPRE_DH_GRP5:
+	case _HPRE_DH_GRP14:
+	case _HPRE_DH_GRP15:
+	case _HPRE_DH_GRP16:
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int hpre_dh_set_params(struct hpre_ctx *ctx, struct dh *params)
+{
+	struct device *dev = HPRE_DEV(ctx);
+	unsigned int sz;
+
+	if (params->p_size > HPRE_DH_MAX_P_SZ)
+		return -EINVAL;
+
+	if (hpre_is_dh_params_length_valid(params->p_size <<
+					   HPRE_BITS_2_BYTES_SHIFT))
+		return -EINVAL;
+
+	sz = ctx->key_sz = params->p_size;
+	ctx->dh.xa_p = dma_alloc_coherent(dev, sz << 1,
+					  &ctx->dh.dma_xa_p, GFP_KERNEL);
+	if (!ctx->dh.xa_p)
+		return -ENOMEM;
+
+	memcpy(ctx->dh.xa_p + sz, params->p, sz);
+
+	/* If g equals 2 don't copy it */
+	if (params->g_size == 1 && *(char *)params->g == HPRE_DH_G_FLAG) {
+		ctx->crt_g2_mode = true;
+		return 0;
+	}
+
+	ctx->dh.g = dma_alloc_coherent(dev, sz, &ctx->dh.dma_g, GFP_KERNEL);
+	if (!ctx->dh.g) {
+		dma_free_coherent(dev, sz << 1, ctx->dh.xa_p,
+				  ctx->dh.dma_xa_p);
+		ctx->dh.xa_p = NULL;
+		return -ENOMEM;
+	}
+
+	memcpy(ctx->dh.g + (sz - params->g_size), params->g, params->g_size);
+
+	return 0;
+}
+
+static void hpre_dh_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all)
+{
+	struct device *dev = HPRE_DEV(ctx);
+	unsigned int sz = ctx->key_sz;
+
+	if (is_clear_all)
+		hisi_qm_stop_qp(ctx->qp);
+
+	if (ctx->dh.g) {
+		dma_free_coherent(dev, sz, ctx->dh.g, ctx->dh.dma_g);
+		ctx->dh.g = NULL;
+	}
+
+	if (ctx->dh.xa_p) {
+		memzero_explicit(ctx->dh.xa_p, sz);
+		dma_free_coherent(dev, sz << 1, ctx->dh.xa_p,
+				  ctx->dh.dma_xa_p);
+		ctx->dh.xa_p = NULL;
+	}
+
+	hpre_ctx_clear(ctx, is_clear_all);
+}
+
+static int hpre_dh_set_secret(struct crypto_kpp *tfm, const void *buf,
+			      unsigned int len)
+{
+	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+	struct dh params;
+	int ret;
+
+	if (crypto_dh_decode_key(buf, len, &params) < 0)
+		return -EINVAL;
+
+	/* Free old secret if any */
+	hpre_dh_clear_ctx(ctx, false);
+
+	ret = hpre_dh_set_params(ctx, &params);
+	if (ret < 0)
+		goto err_clear_ctx;
+
+	memcpy(ctx->dh.xa_p + (ctx->key_sz - params.key_size), params.key,
+	       params.key_size);
+
+	return 0;
+
+err_clear_ctx:
+	hpre_dh_clear_ctx(ctx, false);
+	return ret;
+}
+
+static unsigned int hpre_dh_max_size(struct crypto_kpp *tfm)
+{
+	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	return ctx->key_sz;
+}
+
+static int hpre_dh_init_tfm(struct crypto_kpp *tfm)
+{
+	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	return hpre_ctx_init(ctx);
+}
+
+static void hpre_dh_exit_tfm(struct crypto_kpp *tfm)
+{
+	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	hpre_dh_clear_ctx(ctx, true);
+}
+#endif
+
+static void hpre_rsa_drop_leading_zeros(const char **ptr, size_t *len)
+{
+	while (!**ptr && *len) {
+		(*ptr)++;
+		(*len)--;
+	}
+}
+
+static bool hpre_rsa_key_size_is_support(unsigned int len)
+{
+	unsigned int bits = len << HPRE_BITS_2_BYTES_SHIFT;
+
+#define _RSA_1024BITS_KEY_WDTH		1024
+#define _RSA_2048BITS_KEY_WDTH		2048
+#define _RSA_3072BITS_KEY_WDTH		3072
+#define _RSA_4096BITS_KEY_WDTH		4096
+
+	switch (bits) {
+	case _RSA_1024BITS_KEY_WDTH:
+	case _RSA_2048BITS_KEY_WDTH:
+	case _RSA_3072BITS_KEY_WDTH:
+	case _RSA_4096BITS_KEY_WDTH:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int hpre_rsa_enc(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm);
+	void *tmp = akcipher_request_ctx(req);
+	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+	struct hpre_sqe *msg = &hpre_req->req;
+	int ctr = 0;
+	int ret;
+
+	/* For 512 and 1536 bits key size, use soft tfm instead */
+	if (ctx->key_sz == HPRE_RSA_512BITS_KSZ ||
+	    ctx->key_sz == HPRE_RSA_1536BITS_KSZ) {
+		akcipher_request_set_tfm(req, ctx->rsa.soft_tfm);
+		ret = crypto_akcipher_encrypt(req);
+		akcipher_request_set_tfm(req, tfm);
+		return ret;
+	}
+
+	if (unlikely(!ctx->rsa.pubkey))
+		return -EINVAL;
+
+	ret = hpre_msg_request_set(ctx, req, true);
+	if (unlikely(ret))
+		return ret;
+
+	msg->dw0 |= cpu_to_le32(HPRE_ALG_NC_NCRT);
+	msg->key = cpu_to_le64((u64)ctx->rsa.dma_pubkey);
+
+	ret = hpre_hw_data_init(hpre_req, req->src, req->src_len, 1, 0);
+	if (unlikely(ret))
+		goto clear_all;
+
+	ret = hpre_hw_data_init(hpre_req, req->dst, req->dst_len, 0, 0);
+	if (unlikely(ret))
+		goto clear_all;
+
+	do {
+		ret = hisi_qp_send(ctx->qp, msg);
+	} while (ret == -EBUSY && ctr++ < HPRE_TRY_SEND_TIMES);
+
+	/* success */
+	if (likely(!ret))
+		return -EINPROGRESS;
+
+clear_all:
+	hpre_rm_req_from_ctx(hpre_req);
+	hpre_hw_data_clr_all(ctx, hpre_req, req->dst, req->src);
+
+	return ret;
+}
+
+static int hpre_rsa_dec(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm);
+	void *tmp = akcipher_request_ctx(req);
+	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+	struct hpre_sqe *msg = &hpre_req->req;
+	int ctr = 0;
+	int ret;
+
+	/* For 512 and 1536 bits key size, use soft tfm instead */
+	if (ctx->key_sz == HPRE_RSA_512BITS_KSZ ||
+	    ctx->key_sz == HPRE_RSA_1536BITS_KSZ) {
+		akcipher_request_set_tfm(req, ctx->rsa.soft_tfm);
+		ret = crypto_akcipher_decrypt(req);
+		akcipher_request_set_tfm(req, tfm);
+		return ret;
+	}
+
+	if (unlikely(!ctx->rsa.prikey))
+		return -EINVAL;
+
+	ret = hpre_msg_request_set(ctx, req, true);
+	if (unlikely(ret))
+		return ret;
+
+	if (ctx->crt_g2_mode) {
+		msg->key = cpu_to_le64((u64)ctx->rsa.dma_crt_prikey);
+		msg->dw0 = cpu_to_le32(le32_to_cpu(msg->dw0) |
+				       HPRE_ALG_NC_CRT);
+	} else {
+		msg->key = cpu_to_le64((u64)ctx->rsa.dma_prikey);
+		msg->dw0 = cpu_to_le32(le32_to_cpu(msg->dw0) |
+				       HPRE_ALG_NC_NCRT);
+	}
+
+	ret = hpre_hw_data_init(hpre_req, req->src, req->src_len, 1, 0);
+	if (unlikely(ret))
+		goto clear_all;
+
+	ret = hpre_hw_data_init(hpre_req, req->dst, req->dst_len, 0, 0);
+	if (unlikely(ret))
+		goto clear_all;
+
+	do {
+		ret = hisi_qp_send(ctx->qp, msg);
+	} while (ret == -EBUSY && ctr++ < HPRE_TRY_SEND_TIMES);
+
+	/* success */
+	if (likely(!ret))
+		return -EINPROGRESS;
+
+clear_all:
+	hpre_rm_req_from_ctx(hpre_req);
+	hpre_hw_data_clr_all(ctx, hpre_req, req->dst, req->src);
+
+	return ret;
+}
+
+static int hpre_rsa_set_n(struct hpre_ctx *ctx, const char *value,
+			  size_t vlen, bool private)
+{
+	const char *ptr = value;
+
+	hpre_rsa_drop_leading_zeros(&ptr, &vlen);
+
+	ctx->key_sz = vlen;
+
+	/* if invalid key size provided, we use software tfm */
+	if (!hpre_rsa_key_size_is_support(ctx->key_sz))
+		return 0;
+
+	ctx->rsa.pubkey = dma_alloc_coherent(HPRE_DEV(ctx), vlen << 1,
+					     &ctx->rsa.dma_pubkey,
+					     GFP_KERNEL);
+	if (!ctx->rsa.pubkey)
+		return -ENOMEM;
+
+	if (private) {
+		ctx->rsa.prikey = dma_alloc_coherent(HPRE_DEV(ctx), vlen << 1,
+						     &ctx->rsa.dma_prikey,
+						     GFP_KERNEL);
+		if (!ctx->rsa.prikey) {
+			dma_free_coherent(HPRE_DEV(ctx), vlen << 1,
+					  ctx->rsa.pubkey,
+					  ctx->rsa.dma_pubkey);
+			ctx->rsa.pubkey = NULL;
+			return -ENOMEM;
+		}
+		memcpy(ctx->rsa.prikey + vlen, ptr, vlen);
+	}
+	memcpy(ctx->rsa.pubkey + vlen, ptr, vlen);
+
+	/* Using hardware HPRE to do RSA */
+	return 1;
+}
+
+static int hpre_rsa_set_e(struct hpre_ctx *ctx, const char *value,
+			  size_t vlen)
+{
+	const char *ptr = value;
+
+	hpre_rsa_drop_leading_zeros(&ptr, &vlen);
+
+	if (!ctx->key_sz || !vlen || vlen > ctx->key_sz)
+		return -EINVAL;
+
+	memcpy(ctx->rsa.pubkey + ctx->key_sz - vlen, ptr, vlen);
+
+	return 0;
+}
+
+static int hpre_rsa_set_d(struct hpre_ctx *ctx, const char *value,
+			  size_t vlen)
+{
+	const char *ptr = value;
+
+	hpre_rsa_drop_leading_zeros(&ptr, &vlen);
+
+	if (!ctx->key_sz || !vlen || vlen > ctx->key_sz)
+		return -EINVAL;
+
+	memcpy(ctx->rsa.prikey + ctx->key_sz - vlen, ptr, vlen);
+
+	return 0;
+}
+
+static int hpre_crt_para_get(char *para, size_t para_sz,
+			     const char *raw, size_t raw_sz)
+{
+	const char *ptr = raw;
+	size_t len = raw_sz;
+
+	hpre_rsa_drop_leading_zeros(&ptr, &len);
+	if (!len || len > para_sz)
+		return -EINVAL;
+
+	memcpy(para + para_sz - len, ptr, len);
+
+	return 0;
+}
+
+static int hpre_rsa_setkey_crt(struct hpre_ctx *ctx, struct rsa_key *rsa_key)
+{
+	unsigned int hlf_ksz = ctx->key_sz >> 1;
+	struct device *dev = HPRE_DEV(ctx);
+	u64 offset;
+	int ret;
+
+	ctx->rsa.crt_prikey = dma_alloc_coherent(dev, hlf_ksz * HPRE_CRT_PRMS,
+					&ctx->rsa.dma_crt_prikey,
+					GFP_KERNEL);
+	if (!ctx->rsa.crt_prikey)
+		return -ENOMEM;
+
+	ret = hpre_crt_para_get(ctx->rsa.crt_prikey, hlf_ksz,
+				rsa_key->dq, rsa_key->dq_sz);
+	if (ret)
+		goto free_key;
+
+	offset = hlf_ksz;
+	ret = hpre_crt_para_get(ctx->rsa.crt_prikey + offset, hlf_ksz,
+				rsa_key->dp, rsa_key->dp_sz);
+	if (ret)
+		goto free_key;
+
+	offset = hlf_ksz * HPRE_CRT_Q;
+	ret = hpre_crt_para_get(ctx->rsa.crt_prikey + offset, hlf_ksz,
+				rsa_key->q, rsa_key->q_sz);
+	if (ret)
+		goto free_key;
+
+	offset = hlf_ksz * HPRE_CRT_P;
+	ret = hpre_crt_para_get(ctx->rsa.crt_prikey + offset, hlf_ksz,
+				rsa_key->p, rsa_key->p_sz);
+	if (ret)
+		goto free_key;
+
+	offset = hlf_ksz * HPRE_CRT_INV;
+	ret = hpre_crt_para_get(ctx->rsa.crt_prikey + offset, hlf_ksz,
+				rsa_key->qinv, rsa_key->qinv_sz);
+	if (ret)
+		goto free_key;
+
+	ctx->crt_g2_mode = true;
+
+	return 0;
+
+free_key:
+	offset = hlf_ksz * HPRE_CRT_PRMS;
+	memzero_explicit(ctx->rsa.crt_prikey, offset);
+	dma_free_coherent(dev, hlf_ksz * HPRE_CRT_PRMS, ctx->rsa.crt_prikey,
+			  ctx->rsa.dma_crt_prikey);
+	ctx->rsa.crt_prikey = NULL;
+	ctx->crt_g2_mode = false;
+
+	return ret;
+}
+
+/* If it is clear all, all the resources of the QP will be cleaned. */
+static void hpre_rsa_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all)
+{
+	unsigned int half_key_sz = ctx->key_sz >> 1;
+	struct device *dev = HPRE_DEV(ctx);
+
+	if (is_clear_all)
+		hisi_qm_stop_qp(ctx->qp);
+
+	if (ctx->rsa.pubkey) {
+		dma_free_coherent(dev, ctx->key_sz << 1,
+				  ctx->rsa.pubkey, ctx->rsa.dma_pubkey);
+		ctx->rsa.pubkey = NULL;
+	}
+
+	if (ctx->rsa.crt_prikey) {
+		memzero_explicit(ctx->rsa.crt_prikey,
+				 half_key_sz * HPRE_CRT_PRMS);
+		dma_free_coherent(dev, half_key_sz * HPRE_CRT_PRMS,
+				  ctx->rsa.crt_prikey, ctx->rsa.dma_crt_prikey);
+		ctx->rsa.crt_prikey = NULL;
+	}
+
+	if (ctx->rsa.prikey) {
+		memzero_explicit(ctx->rsa.prikey, ctx->key_sz);
+		dma_free_coherent(dev, ctx->key_sz << 1, ctx->rsa.prikey,
+				  ctx->rsa.dma_prikey);
+		ctx->rsa.prikey = NULL;
+	}
+
+	hpre_ctx_clear(ctx, is_clear_all);
+}
+
+/*
+ * we should judge if it is CRT or not,
+ * CRT: return true,  N-CRT: return false .
+ */
+static bool hpre_is_crt_key(struct rsa_key *key)
+{
+	u16 len = key->p_sz + key->q_sz + key->dp_sz + key->dq_sz +
+		  key->qinv_sz;
+
+#define LEN_OF_NCRT_PARA	5
+
+	/* N-CRT less than 5 parameters */
+	return len > LEN_OF_NCRT_PARA;
+}
+
+static int hpre_rsa_setkey(struct hpre_ctx *ctx, const void *key,
+			   unsigned int keylen, bool private)
+{
+	struct rsa_key rsa_key;
+	int ret;
+
+	hpre_rsa_clear_ctx(ctx, false);
+
+	if (private)
+		ret = rsa_parse_priv_key(&rsa_key, key, keylen);
+	else
+		ret = rsa_parse_pub_key(&rsa_key, key, keylen);
+	if (ret < 0)
+		return ret;
+
+	ret = hpre_rsa_set_n(ctx, rsa_key.n, rsa_key.n_sz, private);
+	if (ret <= 0)
+		return ret;
+
+	if (private) {
+		ret = hpre_rsa_set_d(ctx, rsa_key.d, rsa_key.d_sz);
+		if (ret < 0)
+			goto free;
+
+		if (hpre_is_crt_key(&rsa_key)) {
+			ret = hpre_rsa_setkey_crt(ctx, &rsa_key);
+			if (ret < 0)
+				goto free;
+		}
+	}
+
+	ret = hpre_rsa_set_e(ctx, rsa_key.e, rsa_key.e_sz);
+	if (ret < 0)
+		goto free;
+
+	if ((private && !ctx->rsa.prikey) || !ctx->rsa.pubkey) {
+		ret = -EINVAL;
+		goto free;
+	}
+
+	return 0;
+
+free:
+	hpre_rsa_clear_ctx(ctx, false);
+	return ret;
+}
+
+static int hpre_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key,
+			      unsigned int keylen)
+{
+	struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm);
+	int ret;
+
+	ret = crypto_akcipher_set_pub_key(ctx->rsa.soft_tfm, key, keylen);
+	if (ret)
+		return ret;
+
+	return hpre_rsa_setkey(ctx, key, keylen, false);
+}
+
+static int hpre_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key,
+			       unsigned int keylen)
+{
+	struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm);
+	int ret;
+
+	ret = crypto_akcipher_set_priv_key(ctx->rsa.soft_tfm, key, keylen);
+	if (ret)
+		return ret;
+
+	return hpre_rsa_setkey(ctx, key, keylen, true);
+}
+
+static unsigned int hpre_rsa_max_size(struct crypto_akcipher *tfm)
+{
+	struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm);
+
+	/* For 512 and 1536 bits key size, use soft tfm instead */
+	if (ctx->key_sz == HPRE_RSA_512BITS_KSZ ||
+	    ctx->key_sz == HPRE_RSA_1536BITS_KSZ)
+		return crypto_akcipher_maxsize(ctx->rsa.soft_tfm);
+
+	return ctx->key_sz;
+}
+
+static int hpre_rsa_init_tfm(struct crypto_akcipher *tfm)
+{
+	struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm);
+	int ret;
+
+	ctx->rsa.soft_tfm = crypto_alloc_akcipher("rsa-generic", 0, 0);
+	if (IS_ERR(ctx->rsa.soft_tfm)) {
+		pr_err("Can not alloc_akcipher!\n");
+		return PTR_ERR(ctx->rsa.soft_tfm);
+	}
+
+	ret = hpre_ctx_init(ctx);
+	if (ret)
+		crypto_free_akcipher(ctx->rsa.soft_tfm);
+
+	return ret;
+}
+
+static void hpre_rsa_exit_tfm(struct crypto_akcipher *tfm)
+{
+	struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm);
+
+	hpre_rsa_clear_ctx(ctx, true);
+	crypto_free_akcipher(ctx->rsa.soft_tfm);
+}
+
+static struct akcipher_alg rsa = {
+	.sign = hpre_rsa_dec,
+	.verify = hpre_rsa_enc,
+	.encrypt = hpre_rsa_enc,
+	.decrypt = hpre_rsa_dec,
+	.set_pub_key = hpre_rsa_setpubkey,
+	.set_priv_key = hpre_rsa_setprivkey,
+	.max_size = hpre_rsa_max_size,
+	.init = hpre_rsa_init_tfm,
+	.exit = hpre_rsa_exit_tfm,
+	.reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ,
+	.base = {
+		.cra_ctxsize = sizeof(struct hpre_ctx),
+		.cra_priority = HPRE_CRYPTO_ALG_PRI,
+		.cra_name = "rsa",
+		.cra_driver_name = "hpre-rsa",
+		.cra_module = THIS_MODULE,
+	},
+};
+
+#ifdef CONFIG_CRYPTO_DH
+static struct kpp_alg dh = {
+	.set_secret = hpre_dh_set_secret,
+	.generate_public_key = hpre_dh_compute_value,
+	.compute_shared_secret = hpre_dh_compute_value,
+	.max_size = hpre_dh_max_size,
+	.init = hpre_dh_init_tfm,
+	.exit = hpre_dh_exit_tfm,
+	.reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ,
+	.base = {
+		.cra_ctxsize = sizeof(struct hpre_ctx),
+		.cra_priority = HPRE_CRYPTO_ALG_PRI,
+		.cra_name = "dh",
+		.cra_driver_name = "hpre-dh",
+		.cra_module = THIS_MODULE,
+	},
+};
+#endif
+
+int hpre_algs_register(void)
+{
+	int ret = 0;
+
+	mutex_lock(&hpre_alg_lock);
+	if (++hpre_active_devs == 1) {
+		rsa.base.cra_flags = 0;
+		ret = crypto_register_akcipher(&rsa);
+		if (ret)
+			goto unlock;
+#ifdef CONFIG_CRYPTO_DH
+		ret = crypto_register_kpp(&dh);
+		if (ret) {
+			crypto_unregister_akcipher(&rsa);
+			goto unlock;
+		}
+#endif
+	}
+
+unlock:
+	mutex_unlock(&hpre_alg_lock);
+	return ret;
+}
+
+void hpre_algs_unregister(void)
+{
+	mutex_lock(&hpre_alg_lock);
+	if (--hpre_active_devs == 0) {
+		crypto_unregister_akcipher(&rsa);
+#ifdef CONFIG_CRYPTO_DH
+		crypto_unregister_kpp(&dh);
+#endif
+	}
+	mutex_unlock(&hpre_alg_lock);
+}
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
new file mode 100644
index 000000000000..401747de67a8
--- /dev/null
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -0,0 +1,1038 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2019 HiSilicon Limited. */
+#include <linux/acpi.h>
+#include <linux/aer.h>
+#include <linux/bitops.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/topology.h>
+#include "hpre.h"
+
+#define HPRE_VF_NUM			63
+#define HPRE_QUEUE_NUM_V2		1024
+#define HPRE_QM_ABNML_INT_MASK		0x100004
+#define HPRE_CTRL_CNT_CLR_CE_BIT	BIT(0)
+#define HPRE_COMM_CNT_CLR_CE		0x0
+#define HPRE_CTRL_CNT_CLR_CE		0x301000
+#define HPRE_FSM_MAX_CNT		0x301008
+#define HPRE_VFG_AXQOS			0x30100c
+#define HPRE_VFG_AXCACHE		0x301010
+#define HPRE_RDCHN_INI_CFG		0x301014
+#define HPRE_AWUSR_FP_CFG		0x301018
+#define HPRE_BD_ENDIAN			0x301020
+#define HPRE_ECC_BYPASS			0x301024
+#define HPRE_RAS_WIDTH_CFG		0x301028
+#define HPRE_POISON_BYPASS		0x30102c
+#define HPRE_BD_ARUSR_CFG		0x301030
+#define HPRE_BD_AWUSR_CFG		0x301034
+#define HPRE_TYPES_ENB			0x301038
+#define HPRE_DATA_RUSER_CFG		0x30103c
+#define HPRE_DATA_WUSER_CFG		0x301040
+#define HPRE_INT_MASK			0x301400
+#define HPRE_INT_STATUS			0x301800
+#define HPRE_CORE_INT_ENABLE		0
+#define HPRE_CORE_INT_DISABLE		0x003fffff
+#define HPRE_RAS_ECC_1BIT_TH		0x30140c
+#define HPRE_RDCHN_INI_ST		0x301a00
+#define HPRE_CLSTR_BASE			0x302000
+#define HPRE_CORE_EN_OFFSET		0x04
+#define HPRE_CORE_INI_CFG_OFFSET	0x20
+#define HPRE_CORE_INI_STATUS_OFFSET	0x80
+#define HPRE_CORE_HTBT_WARN_OFFSET	0x8c
+#define HPRE_CORE_IS_SCHD_OFFSET	0x90
+
+#define HPRE_RAS_CE_ENB			0x301410
+#define HPRE_HAC_RAS_CE_ENABLE		0x3f
+#define HPRE_RAS_NFE_ENB		0x301414
+#define HPRE_HAC_RAS_NFE_ENABLE		0x3fffc0
+#define HPRE_RAS_FE_ENB			0x301418
+#define HPRE_HAC_RAS_FE_ENABLE		0
+
+#define HPRE_CORE_ENB		(HPRE_CLSTR_BASE + HPRE_CORE_EN_OFFSET)
+#define HPRE_CORE_INI_CFG	(HPRE_CLSTR_BASE + HPRE_CORE_INI_CFG_OFFSET)
+#define HPRE_CORE_INI_STATUS (HPRE_CLSTR_BASE + HPRE_CORE_INI_STATUS_OFFSET)
+#define HPRE_HAC_ECC1_CNT		0x301a04
+#define HPRE_HAC_ECC2_CNT		0x301a08
+#define HPRE_HAC_INT_STATUS		0x301800
+#define HPRE_HAC_SOURCE_INT		0x301600
+#define MASTER_GLOBAL_CTRL_SHUTDOWN	1
+#define MASTER_TRANS_RETURN_RW		3
+#define HPRE_MASTER_TRANS_RETURN	0x300150
+#define HPRE_MASTER_GLOBAL_CTRL		0x300000
+#define HPRE_CLSTR_ADDR_INTRVL		0x1000
+#define HPRE_CLUSTER_INQURY		0x100
+#define HPRE_CLSTR_ADDR_INQRY_RSLT	0x104
+#define HPRE_TIMEOUT_ABNML_BIT		6
+#define HPRE_PASID_EN_BIT		9
+#define HPRE_REG_RD_INTVRL_US		10
+#define HPRE_REG_RD_TMOUT_US		1000
+#define HPRE_DBGFS_VAL_MAX_LEN		20
+#define HPRE_PCI_DEVICE_ID		0xa258
+#define HPRE_PCI_VF_DEVICE_ID		0xa259
+#define HPRE_ADDR(qm, offset)		((qm)->io_base + (offset))
+#define HPRE_QM_USR_CFG_MASK		0xfffffffe
+#define HPRE_QM_AXI_CFG_MASK		0xffff
+#define HPRE_QM_VFG_AX_MASK		0xff
+#define HPRE_BD_USR_MASK		0x3
+#define HPRE_CLUSTER_CORE_MASK		0xf
+
+#define HPRE_VIA_MSI_DSM		1
+
+static LIST_HEAD(hpre_list);
+static DEFINE_MUTEX(hpre_list_lock);
+static const char hpre_name[] = "hisi_hpre";
+static struct dentry *hpre_debugfs_root;
+static const struct pci_device_id hpre_dev_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_DEVICE_ID) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_VF_DEVICE_ID) },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, hpre_dev_ids);
+
+struct hpre_hw_error {
+	u32 int_msk;
+	const char *msg;
+};
+
+static const char * const hpre_debug_file_name[] = {
+	[HPRE_CURRENT_QM]   = "current_qm",
+	[HPRE_CLEAR_ENABLE] = "rdclr_en",
+	[HPRE_CLUSTER_CTRL] = "cluster_ctrl",
+};
+
+static const struct hpre_hw_error hpre_hw_errors[] = {
+	{ .int_msk = BIT(0), .msg = "core_ecc_1bit_err_int_set" },
+	{ .int_msk = BIT(1), .msg = "core_ecc_2bit_err_int_set" },
+	{ .int_msk = BIT(2), .msg = "dat_wb_poison_int_set" },
+	{ .int_msk = BIT(3), .msg = "dat_rd_poison_int_set" },
+	{ .int_msk = BIT(4), .msg = "bd_rd_poison_int_set" },
+	{ .int_msk = BIT(5), .msg = "ooo_ecc_2bit_err_int_set" },
+	{ .int_msk = BIT(6), .msg = "cluster1_shb_timeout_int_set" },
+	{ .int_msk = BIT(7), .msg = "cluster2_shb_timeout_int_set" },
+	{ .int_msk = BIT(8), .msg = "cluster3_shb_timeout_int_set" },
+	{ .int_msk = BIT(9), .msg = "cluster4_shb_timeout_int_set" },
+	{ .int_msk = GENMASK(15, 10), .msg = "ooo_rdrsp_err_int_set" },
+	{ .int_msk = GENMASK(21, 16), .msg = "ooo_wrrsp_err_int_set" },
+	{ /* sentinel */ }
+};
+
+static const u64 hpre_cluster_offsets[] = {
+	[HPRE_CLUSTER0] =
+		HPRE_CLSTR_BASE + HPRE_CLUSTER0 * HPRE_CLSTR_ADDR_INTRVL,
+	[HPRE_CLUSTER1] =
+		HPRE_CLSTR_BASE + HPRE_CLUSTER1 * HPRE_CLSTR_ADDR_INTRVL,
+	[HPRE_CLUSTER2] =
+		HPRE_CLSTR_BASE + HPRE_CLUSTER2 * HPRE_CLSTR_ADDR_INTRVL,
+	[HPRE_CLUSTER3] =
+		HPRE_CLSTR_BASE + HPRE_CLUSTER3 * HPRE_CLSTR_ADDR_INTRVL,
+};
+
+static struct debugfs_reg32 hpre_cluster_dfx_regs[] = {
+	{"CORES_EN_STATUS          ",  HPRE_CORE_EN_OFFSET},
+	{"CORES_INI_CFG              ",  HPRE_CORE_INI_CFG_OFFSET},
+	{"CORES_INI_STATUS         ",  HPRE_CORE_INI_STATUS_OFFSET},
+	{"CORES_HTBT_WARN         ",  HPRE_CORE_HTBT_WARN_OFFSET},
+	{"CORES_IS_SCHD               ",  HPRE_CORE_IS_SCHD_OFFSET},
+};
+
+static struct debugfs_reg32 hpre_com_dfx_regs[] = {
+	{"READ_CLR_EN          ",  HPRE_CTRL_CNT_CLR_CE},
+	{"AXQOS                   ",  HPRE_VFG_AXQOS},
+	{"AWUSR_CFG              ",  HPRE_AWUSR_FP_CFG},
+	{"QM_ARUSR_MCFG1           ",  QM_ARUSER_M_CFG_1},
+	{"QM_AWUSR_MCFG1           ",  QM_AWUSER_M_CFG_1},
+	{"BD_ENDIAN               ",  HPRE_BD_ENDIAN},
+	{"ECC_CHECK_CTRL       ",  HPRE_ECC_BYPASS},
+	{"RAS_INT_WIDTH       ",  HPRE_RAS_WIDTH_CFG},
+	{"POISON_BYPASS       ",  HPRE_POISON_BYPASS},
+	{"BD_ARUSER               ",  HPRE_BD_ARUSR_CFG},
+	{"BD_AWUSER               ",  HPRE_BD_AWUSR_CFG},
+	{"DATA_ARUSER            ",  HPRE_DATA_RUSER_CFG},
+	{"DATA_AWUSER           ",  HPRE_DATA_WUSER_CFG},
+	{"INT_STATUS               ",  HPRE_INT_STATUS},
+};
+
+static int hpre_pf_q_num_set(const char *val, const struct kernel_param *kp)
+{
+	struct pci_dev *pdev;
+	u32 n, q_num;
+	u8 rev_id;
+	int ret;
+
+	if (!val)
+		return -EINVAL;
+
+	pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_DEVICE_ID, NULL);
+	if (!pdev) {
+		q_num = HPRE_QUEUE_NUM_V2;
+		pr_info("No device found currently, suppose queue number is %d\n",
+			q_num);
+	} else {
+		rev_id = pdev->revision;
+		if (rev_id != QM_HW_V2)
+			return -EINVAL;
+
+		q_num = HPRE_QUEUE_NUM_V2;
+	}
+
+	ret = kstrtou32(val, 10, &n);
+	if (ret != 0 || n == 0 || n > q_num)
+		return -EINVAL;
+
+	return param_set_int(val, kp);
+}
+
+static const struct kernel_param_ops hpre_pf_q_num_ops = {
+	.set = hpre_pf_q_num_set,
+	.get = param_get_int,
+};
+
+static u32 hpre_pf_q_num = HPRE_PF_DEF_Q_NUM;
+module_param_cb(hpre_pf_q_num, &hpre_pf_q_num_ops, &hpre_pf_q_num, 0444);
+MODULE_PARM_DESC(hpre_pf_q_num, "Number of queues in PF of CS(1-1024)");
+
+static inline void hpre_add_to_list(struct hpre *hpre)
+{
+	mutex_lock(&hpre_list_lock);
+	list_add_tail(&hpre->list, &hpre_list);
+	mutex_unlock(&hpre_list_lock);
+}
+
+static inline void hpre_remove_from_list(struct hpre *hpre)
+{
+	mutex_lock(&hpre_list_lock);
+	list_del(&hpre->list);
+	mutex_unlock(&hpre_list_lock);
+}
+
+struct hpre *hpre_find_device(int node)
+{
+	struct hpre *hpre, *ret = NULL;
+	int min_distance = INT_MAX;
+	struct device *dev;
+	int dev_node = 0;
+
+	mutex_lock(&hpre_list_lock);
+	list_for_each_entry(hpre, &hpre_list, list) {
+		dev = &hpre->qm.pdev->dev;
+#ifdef CONFIG_NUMA
+		dev_node = dev->numa_node;
+		if (dev_node < 0)
+			dev_node = 0;
+#endif
+		if (node_distance(dev_node, node) < min_distance) {
+			ret = hpre;
+			min_distance = node_distance(dev_node, node);
+		}
+	}
+	mutex_unlock(&hpre_list_lock);
+
+	return ret;
+}
+
+static int hpre_cfg_by_dsm(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+	union acpi_object *obj;
+	guid_t guid;
+
+	if (guid_parse("b06b81ab-0134-4a45-9b0c-483447b95fa7", &guid)) {
+		dev_err(dev, "Hpre GUID failed\n");
+		return -EINVAL;
+	}
+
+	/* Switch over to MSI handling due to non-standard PCI implementation */
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), &guid,
+				0, HPRE_VIA_MSI_DSM, NULL);
+	if (!obj) {
+		dev_err(dev, "ACPI handle failed!\n");
+		return -EIO;
+	}
+
+	ACPI_FREE(obj);
+
+	return 0;
+}
+
+static int hpre_set_user_domain_and_cache(struct hpre *hpre)
+{
+	struct hisi_qm *qm = &hpre->qm;
+	struct device *dev = &qm->pdev->dev;
+	unsigned long offset;
+	int ret, i;
+	u32 val;
+
+	writel(HPRE_QM_USR_CFG_MASK, HPRE_ADDR(qm, QM_ARUSER_M_CFG_ENABLE));
+	writel(HPRE_QM_USR_CFG_MASK, HPRE_ADDR(qm, QM_AWUSER_M_CFG_ENABLE));
+	writel_relaxed(HPRE_QM_AXI_CFG_MASK, HPRE_ADDR(qm, QM_AXI_M_CFG));
+
+	/* disable FLR triggered by BME(bus master enable) */
+	writel(PEH_AXUSER_CFG, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG));
+	writel(PEH_AXUSER_CFG_ENABLE, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG_ENABLE));
+
+	/* HPRE need more time, we close this interrupt */
+	val = readl_relaxed(HPRE_ADDR(qm, HPRE_QM_ABNML_INT_MASK));
+	val |= BIT(HPRE_TIMEOUT_ABNML_BIT);
+	writel_relaxed(val, HPRE_ADDR(qm, HPRE_QM_ABNML_INT_MASK));
+
+	writel(0x1, HPRE_ADDR(qm, HPRE_TYPES_ENB));
+	writel(HPRE_QM_VFG_AX_MASK, HPRE_ADDR(qm, HPRE_VFG_AXCACHE));
+	writel(0x0, HPRE_ADDR(qm, HPRE_BD_ENDIAN));
+	writel(0x0, HPRE_ADDR(qm, HPRE_INT_MASK));
+	writel(0x0, HPRE_ADDR(qm, HPRE_RAS_ECC_1BIT_TH));
+	writel(0x0, HPRE_ADDR(qm, HPRE_POISON_BYPASS));
+	writel(0x0, HPRE_ADDR(qm, HPRE_COMM_CNT_CLR_CE));
+	writel(0x0, HPRE_ADDR(qm, HPRE_ECC_BYPASS));
+
+	writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_ARUSR_CFG));
+	writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_AWUSR_CFG));
+	writel(0x1, HPRE_ADDR(qm, HPRE_RDCHN_INI_CFG));
+	ret = readl_relaxed_poll_timeout(HPRE_ADDR(qm, HPRE_RDCHN_INI_ST), val,
+			val & BIT(0),
+			HPRE_REG_RD_INTVRL_US,
+			HPRE_REG_RD_TMOUT_US);
+	if (ret) {
+		dev_err(dev, "read rd channel timeout fail!\n");
+		return -ETIMEDOUT;
+	}
+
+	for (i = 0; i < HPRE_CLUSTERS_NUM; i++) {
+		offset = i * HPRE_CLSTR_ADDR_INTRVL;
+
+		/* clusters initiating */
+		writel(HPRE_CLUSTER_CORE_MASK,
+		       HPRE_ADDR(qm, offset + HPRE_CORE_ENB));
+		writel(0x1, HPRE_ADDR(qm, offset + HPRE_CORE_INI_CFG));
+		ret = readl_relaxed_poll_timeout(HPRE_ADDR(qm, offset +
+					HPRE_CORE_INI_STATUS), val,
+					((val & HPRE_CLUSTER_CORE_MASK) ==
+					HPRE_CLUSTER_CORE_MASK),
+					HPRE_REG_RD_INTVRL_US,
+					HPRE_REG_RD_TMOUT_US);
+		if (ret) {
+			dev_err(dev,
+				"cluster %d int st status timeout!\n", i);
+			return -ETIMEDOUT;
+		}
+	}
+
+	ret = hpre_cfg_by_dsm(qm);
+	if (ret)
+		dev_err(dev, "acpi_evaluate_dsm err.\n");
+
+	return ret;
+}
+
+static void hpre_cnt_regs_clear(struct hisi_qm *qm)
+{
+	unsigned long offset;
+	int i;
+
+	/* clear current_qm */
+	writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
+	writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
+
+	/* clear clusterX/cluster_ctrl */
+	for (i = 0; i < HPRE_CLUSTERS_NUM; i++) {
+		offset = HPRE_CLSTR_BASE + i * HPRE_CLSTR_ADDR_INTRVL;
+		writel(0x0, qm->io_base + offset + HPRE_CLUSTER_INQURY);
+	}
+
+	/* clear rdclr_en */
+	writel(0x0, qm->io_base + HPRE_CTRL_CNT_CLR_CE);
+
+	hisi_qm_debug_regs_clear(qm);
+}
+
+static void hpre_hw_error_disable(struct hpre *hpre)
+{
+	struct hisi_qm *qm = &hpre->qm;
+
+	/* disable hpre hw error interrupts */
+	writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_INT_MASK);
+}
+
+static void hpre_hw_error_enable(struct hpre *hpre)
+{
+	struct hisi_qm *qm = &hpre->qm;
+
+	/* enable hpre hw error interrupts */
+	writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK);
+	writel(HPRE_HAC_RAS_CE_ENABLE, qm->io_base + HPRE_RAS_CE_ENB);
+	writel(HPRE_HAC_RAS_NFE_ENABLE, qm->io_base + HPRE_RAS_NFE_ENB);
+	writel(HPRE_HAC_RAS_FE_ENABLE, qm->io_base + HPRE_RAS_FE_ENB);
+}
+
+static inline struct hisi_qm *hpre_file_to_qm(struct hpre_debugfs_file *file)
+{
+	struct hpre *hpre = container_of(file->debug, struct hpre, debug);
+
+	return &hpre->qm;
+}
+
+static u32 hpre_current_qm_read(struct hpre_debugfs_file *file)
+{
+	struct hisi_qm *qm = hpre_file_to_qm(file);
+
+	return readl(qm->io_base + QM_DFX_MB_CNT_VF);
+}
+
+static int hpre_current_qm_write(struct hpre_debugfs_file *file, u32 val)
+{
+	struct hisi_qm *qm = hpre_file_to_qm(file);
+	struct hpre_debug *debug = file->debug;
+	struct hpre *hpre = container_of(debug, struct hpre, debug);
+	u32 num_vfs = hpre->num_vfs;
+	u32 vfq_num, tmp;
+
+
+	if (val > num_vfs)
+		return -EINVAL;
+
+	/* According PF or VF Dev ID to calculation curr_qm_qp_num and store */
+	if (val == 0) {
+		qm->debug.curr_qm_qp_num = qm->qp_num;
+	} else {
+		vfq_num = (qm->ctrl_qp_num - qm->qp_num) / num_vfs;
+		if (val == num_vfs) {
+			qm->debug.curr_qm_qp_num =
+			qm->ctrl_qp_num - qm->qp_num - (num_vfs - 1) * vfq_num;
+		} else {
+			qm->debug.curr_qm_qp_num = vfq_num;
+		}
+	}
+
+	writel(val, qm->io_base + QM_DFX_MB_CNT_VF);
+	writel(val, qm->io_base + QM_DFX_DB_CNT_VF);
+
+	tmp = val |
+	      (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_Q_MASK);
+	writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+
+	tmp = val |
+	      (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_Q_MASK);
+	writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+	return  0;
+}
+
+static u32 hpre_clear_enable_read(struct hpre_debugfs_file *file)
+{
+	struct hisi_qm *qm = hpre_file_to_qm(file);
+
+	return readl(qm->io_base + HPRE_CTRL_CNT_CLR_CE) &
+	       HPRE_CTRL_CNT_CLR_CE_BIT;
+}
+
+static int hpre_clear_enable_write(struct hpre_debugfs_file *file, u32 val)
+{
+	struct hisi_qm *qm = hpre_file_to_qm(file);
+	u32 tmp;
+
+	if (val != 1 && val != 0)
+		return -EINVAL;
+
+	tmp = (readl(qm->io_base + HPRE_CTRL_CNT_CLR_CE) &
+	       ~HPRE_CTRL_CNT_CLR_CE_BIT) | val;
+	writel(tmp, qm->io_base + HPRE_CTRL_CNT_CLR_CE);
+
+	return  0;
+}
+
+static u32 hpre_cluster_inqry_read(struct hpre_debugfs_file *file)
+{
+	struct hisi_qm *qm = hpre_file_to_qm(file);
+	int cluster_index = file->index - HPRE_CLUSTER_CTRL;
+	unsigned long offset = HPRE_CLSTR_BASE +
+			       cluster_index * HPRE_CLSTR_ADDR_INTRVL;
+
+	return readl(qm->io_base + offset + HPRE_CLSTR_ADDR_INQRY_RSLT);
+}
+
+static int hpre_cluster_inqry_write(struct hpre_debugfs_file *file, u32 val)
+{
+	struct hisi_qm *qm = hpre_file_to_qm(file);
+	int cluster_index = file->index - HPRE_CLUSTER_CTRL;
+	unsigned long offset = HPRE_CLSTR_BASE + cluster_index *
+			       HPRE_CLSTR_ADDR_INTRVL;
+
+	writel(val, qm->io_base + offset + HPRE_CLUSTER_INQURY);
+
+	return  0;
+}
+
+static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf,
+			       size_t count, loff_t *pos)
+{
+	struct hpre_debugfs_file *file = filp->private_data;
+	char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
+	u32 val;
+	int ret;
+
+	spin_lock_irq(&file->lock);
+	switch (file->type) {
+	case HPRE_CURRENT_QM:
+		val = hpre_current_qm_read(file);
+		break;
+	case HPRE_CLEAR_ENABLE:
+		val = hpre_clear_enable_read(file);
+		break;
+	case HPRE_CLUSTER_CTRL:
+		val = hpre_cluster_inqry_read(file);
+		break;
+	default:
+		spin_unlock_irq(&file->lock);
+		return -EINVAL;
+	}
+	spin_unlock_irq(&file->lock);
+	ret = snprintf(tbuf, HPRE_DBGFS_VAL_MAX_LEN, "%u\n", val);
+	return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
+				size_t count, loff_t *pos)
+{
+	struct hpre_debugfs_file *file = filp->private_data;
+	char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
+	unsigned long val;
+	int len, ret;
+
+	if (*pos != 0)
+		return 0;
+
+	if (count >= HPRE_DBGFS_VAL_MAX_LEN)
+		return -ENOSPC;
+
+	len = simple_write_to_buffer(tbuf, HPRE_DBGFS_VAL_MAX_LEN - 1,
+				     pos, buf, count);
+	if (len < 0)
+		return len;
+
+	tbuf[len] = '\0';
+	if (kstrtoul(tbuf, 0, &val))
+		return -EFAULT;
+
+	spin_lock_irq(&file->lock);
+	switch (file->type) {
+	case HPRE_CURRENT_QM:
+		ret = hpre_current_qm_write(file, val);
+		if (ret)
+			goto err_input;
+		break;
+	case HPRE_CLEAR_ENABLE:
+		ret = hpre_clear_enable_write(file, val);
+		if (ret)
+			goto err_input;
+		break;
+	case HPRE_CLUSTER_CTRL:
+		ret = hpre_cluster_inqry_write(file, val);
+		if (ret)
+			goto err_input;
+		break;
+	default:
+		ret = -EINVAL;
+		goto err_input;
+	}
+	spin_unlock_irq(&file->lock);
+
+	return count;
+
+err_input:
+	spin_unlock_irq(&file->lock);
+	return ret;
+}
+
+static const struct file_operations hpre_ctrl_debug_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = hpre_ctrl_debug_read,
+	.write = hpre_ctrl_debug_write,
+};
+
+static int hpre_create_debugfs_file(struct hpre_debug *dbg, struct dentry *dir,
+				    enum hpre_ctrl_dbgfs_file type, int indx)
+{
+	struct dentry *file_dir;
+
+	if (dir)
+		file_dir = dir;
+	else
+		file_dir = dbg->debug_root;
+
+	if (type >= HPRE_DEBUG_FILE_NUM)
+		return -EINVAL;
+
+	spin_lock_init(&dbg->files[indx].lock);
+	dbg->files[indx].debug = dbg;
+	dbg->files[indx].type = type;
+	dbg->files[indx].index = indx;
+	debugfs_create_file(hpre_debug_file_name[type], 0600, file_dir,
+			    dbg->files + indx, &hpre_ctrl_debug_fops);
+
+	return 0;
+}
+
+static int hpre_pf_comm_regs_debugfs_init(struct hpre_debug *debug)
+{
+	struct hpre *hpre = container_of(debug, struct hpre, debug);
+	struct hisi_qm *qm = &hpre->qm;
+	struct device *dev = &qm->pdev->dev;
+	struct debugfs_regset32 *regset;
+
+	regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
+	if (!regset)
+		return -ENOMEM;
+
+	regset->regs = hpre_com_dfx_regs;
+	regset->nregs = ARRAY_SIZE(hpre_com_dfx_regs);
+	regset->base = qm->io_base;
+
+	debugfs_create_regset32("regs", 0444,  debug->debug_root, regset);
+	return 0;
+}
+
+static int hpre_cluster_debugfs_init(struct hpre_debug *debug)
+{
+	struct hpre *hpre = container_of(debug, struct hpre, debug);
+	struct hisi_qm *qm = &hpre->qm;
+	struct device *dev = &qm->pdev->dev;
+	char buf[HPRE_DBGFS_VAL_MAX_LEN];
+	struct debugfs_regset32 *regset;
+	struct dentry *tmp_d;
+	int i, ret;
+
+	for (i = 0; i < HPRE_CLUSTERS_NUM; i++) {
+		ret = snprintf(buf, HPRE_DBGFS_VAL_MAX_LEN, "cluster%d", i);
+		if (ret < 0)
+			return -EINVAL;
+		tmp_d = debugfs_create_dir(buf, debug->debug_root);
+
+		regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
+		if (!regset)
+			return -ENOMEM;
+
+		regset->regs = hpre_cluster_dfx_regs;
+		regset->nregs = ARRAY_SIZE(hpre_cluster_dfx_regs);
+		regset->base = qm->io_base + hpre_cluster_offsets[i];
+
+		debugfs_create_regset32("regs", 0444, tmp_d, regset);
+		ret = hpre_create_debugfs_file(debug, tmp_d, HPRE_CLUSTER_CTRL,
+					       i + HPRE_CLUSTER_CTRL);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hpre_ctrl_debug_init(struct hpre_debug *debug)
+{
+	int ret;
+
+	ret = hpre_create_debugfs_file(debug, NULL, HPRE_CURRENT_QM,
+				       HPRE_CURRENT_QM);
+	if (ret)
+		return ret;
+
+	ret = hpre_create_debugfs_file(debug, NULL, HPRE_CLEAR_ENABLE,
+				       HPRE_CLEAR_ENABLE);
+	if (ret)
+		return ret;
+
+	ret = hpre_pf_comm_regs_debugfs_init(debug);
+	if (ret)
+		return ret;
+
+	return hpre_cluster_debugfs_init(debug);
+}
+
+static int hpre_debugfs_init(struct hpre *hpre)
+{
+	struct hisi_qm *qm = &hpre->qm;
+	struct device *dev = &qm->pdev->dev;
+	struct dentry *dir;
+	int ret;
+
+	dir = debugfs_create_dir(dev_name(dev), hpre_debugfs_root);
+	qm->debug.debug_root = dir;
+
+	ret = hisi_qm_debug_init(qm);
+	if (ret)
+		goto failed_to_create;
+
+	if (qm->pdev->device == HPRE_PCI_DEVICE_ID) {
+		hpre->debug.debug_root = dir;
+		ret = hpre_ctrl_debug_init(&hpre->debug);
+		if (ret)
+			goto failed_to_create;
+	}
+	return 0;
+
+failed_to_create:
+	debugfs_remove_recursive(qm->debug.debug_root);
+	return ret;
+}
+
+static void hpre_debugfs_exit(struct hpre *hpre)
+{
+	struct hisi_qm *qm = &hpre->qm;
+
+	debugfs_remove_recursive(qm->debug.debug_root);
+}
+
+static int hpre_qm_pre_init(struct hisi_qm *qm, struct pci_dev *pdev)
+{
+	enum qm_hw_ver rev_id;
+
+	rev_id = hisi_qm_get_hw_version(pdev);
+	if (rev_id < 0)
+		return -ENODEV;
+
+	if (rev_id == QM_HW_V1) {
+		pci_warn(pdev, "HPRE version 1 is not supported!\n");
+		return -EINVAL;
+	}
+
+	qm->pdev = pdev;
+	qm->ver = rev_id;
+	qm->sqe_size = HPRE_SQE_SIZE;
+	qm->dev_name = hpre_name;
+	qm->fun_type = (pdev->device == HPRE_PCI_DEVICE_ID) ?
+		       QM_HW_PF : QM_HW_VF;
+	if (pdev->is_physfn) {
+		qm->qp_base = HPRE_PF_DEF_Q_BASE;
+		qm->qp_num = hpre_pf_q_num;
+	}
+	qm->use_dma_api = true;
+
+	return 0;
+}
+
+static void hpre_hw_err_init(struct hpre *hpre)
+{
+	hisi_qm_hw_error_init(&hpre->qm, QM_BASE_CE, QM_BASE_NFE,
+			      0, QM_DB_RANDOM_INVALID);
+	hpre_hw_error_enable(hpre);
+}
+
+static int hpre_pf_probe_init(struct hpre *hpre)
+{
+	struct hisi_qm *qm = &hpre->qm;
+	int ret;
+
+	qm->ctrl_qp_num = HPRE_QUEUE_NUM_V2;
+
+	ret = hpre_set_user_domain_and_cache(hpre);
+	if (ret)
+		return ret;
+
+	hpre_hw_err_init(hpre);
+
+	return 0;
+}
+
+static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct hisi_qm *qm;
+	struct hpre *hpre;
+	int ret;
+
+	hpre = devm_kzalloc(&pdev->dev, sizeof(*hpre), GFP_KERNEL);
+	if (!hpre)
+		return -ENOMEM;
+
+	pci_set_drvdata(pdev, hpre);
+
+	qm = &hpre->qm;
+	ret = hpre_qm_pre_init(qm, pdev);
+	if (ret)
+		return ret;
+
+	ret = hisi_qm_init(qm);
+	if (ret)
+		return ret;
+
+	if (pdev->is_physfn) {
+		ret = hpre_pf_probe_init(hpre);
+		if (ret)
+			goto err_with_qm_init;
+	} else if (qm->fun_type == QM_HW_VF && qm->ver == QM_HW_V2) {
+		/* v2 starts to support get vft by mailbox */
+		ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+		if (ret)
+			goto err_with_qm_init;
+	}
+
+	ret = hisi_qm_start(qm);
+	if (ret)
+		goto err_with_err_init;
+
+	ret = hpre_debugfs_init(hpre);
+	if (ret)
+		dev_warn(&pdev->dev, "init debugfs fail!\n");
+
+	hpre_add_to_list(hpre);
+
+	ret = hpre_algs_register();
+	if (ret < 0) {
+		hpre_remove_from_list(hpre);
+		pci_err(pdev, "fail to register algs to crypto!\n");
+		goto err_with_qm_start;
+	}
+	return 0;
+
+err_with_qm_start:
+	hisi_qm_stop(qm);
+
+err_with_err_init:
+	if (pdev->is_physfn)
+		hpre_hw_error_disable(hpre);
+
+err_with_qm_init:
+	hisi_qm_uninit(qm);
+
+	return ret;
+}
+
+static int hpre_vf_q_assign(struct hpre *hpre, int num_vfs)
+{
+	struct hisi_qm *qm = &hpre->qm;
+	u32 qp_num = qm->qp_num;
+	int q_num, remain_q_num, i;
+	u32 q_base = qp_num;
+	int ret;
+
+	if (!num_vfs)
+		return -EINVAL;
+
+	remain_q_num = qm->ctrl_qp_num - qp_num;
+
+	/* If remaining queues are not enough, return error. */
+	if (remain_q_num < num_vfs)
+		return -EINVAL;
+
+	q_num = remain_q_num / num_vfs;
+	for (i = 1; i <= num_vfs; i++) {
+		if (i == num_vfs)
+			q_num += remain_q_num % num_vfs;
+		ret = hisi_qm_set_vft(qm, i, q_base, (u32)q_num);
+		if (ret)
+			return ret;
+		q_base += q_num;
+	}
+
+	return 0;
+}
+
+static int hpre_clear_vft_config(struct hpre *hpre)
+{
+	struct hisi_qm *qm = &hpre->qm;
+	u32 num_vfs = hpre->num_vfs;
+	int ret;
+	u32 i;
+
+	for (i = 1; i <= num_vfs; i++) {
+		ret = hisi_qm_set_vft(qm, i, 0, 0);
+		if (ret)
+			return ret;
+	}
+	hpre->num_vfs = 0;
+
+	return 0;
+}
+
+static int hpre_sriov_enable(struct pci_dev *pdev, int max_vfs)
+{
+	struct hpre *hpre = pci_get_drvdata(pdev);
+	int pre_existing_vfs, num_vfs, ret;
+
+	pre_existing_vfs = pci_num_vf(pdev);
+	if (pre_existing_vfs) {
+		pci_err(pdev,
+			"Can't enable VF. Please disable pre-enabled VFs!\n");
+		return 0;
+	}
+
+	num_vfs = min_t(int, max_vfs, HPRE_VF_NUM);
+	ret = hpre_vf_q_assign(hpre, num_vfs);
+	if (ret) {
+		pci_err(pdev, "Can't assign queues for VF!\n");
+		return ret;
+	}
+
+	hpre->num_vfs = num_vfs;
+
+	ret = pci_enable_sriov(pdev, num_vfs);
+	if (ret) {
+		pci_err(pdev, "Can't enable VF!\n");
+		hpre_clear_vft_config(hpre);
+		return ret;
+	}
+
+	return num_vfs;
+}
+
+static int hpre_sriov_disable(struct pci_dev *pdev)
+{
+	struct hpre *hpre = pci_get_drvdata(pdev);
+
+	if (pci_vfs_assigned(pdev)) {
+		pci_err(pdev, "Failed to disable VFs while VFs are assigned!\n");
+		return -EPERM;
+	}
+
+	/* remove in hpre_pci_driver will be called to free VF resources */
+	pci_disable_sriov(pdev);
+
+	return hpre_clear_vft_config(hpre);
+}
+
+static int hpre_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	if (num_vfs)
+		return hpre_sriov_enable(pdev, num_vfs);
+	else
+		return hpre_sriov_disable(pdev);
+}
+
+static void hpre_remove(struct pci_dev *pdev)
+{
+	struct hpre *hpre = pci_get_drvdata(pdev);
+	struct hisi_qm *qm = &hpre->qm;
+	int ret;
+
+	hpre_algs_unregister();
+	hpre_remove_from_list(hpre);
+	if (qm->fun_type == QM_HW_PF && hpre->num_vfs != 0) {
+		ret = hpre_sriov_disable(pdev);
+		if (ret) {
+			pci_err(pdev, "Disable SRIOV fail!\n");
+			return;
+		}
+	}
+	if (qm->fun_type == QM_HW_PF) {
+		hpre_cnt_regs_clear(qm);
+		qm->debug.curr_qm_qp_num = 0;
+	}
+
+	hpre_debugfs_exit(hpre);
+	hisi_qm_stop(qm);
+	if (qm->fun_type == QM_HW_PF)
+		hpre_hw_error_disable(hpre);
+	hisi_qm_uninit(qm);
+}
+
+static void hpre_log_hw_error(struct hpre *hpre, u32 err_sts)
+{
+	const struct hpre_hw_error *err = hpre_hw_errors;
+	struct device *dev = &hpre->qm.pdev->dev;
+
+	while (err->msg) {
+		if (err->int_msk & err_sts)
+			dev_warn(dev, "%s [error status=0x%x] found\n",
+				 err->msg, err->int_msk);
+		err++;
+	}
+}
+
+static pci_ers_result_t hpre_hw_error_handle(struct hpre *hpre)
+{
+	u32 err_sts;
+
+	/* read err sts */
+	err_sts = readl(hpre->qm.io_base + HPRE_HAC_INT_STATUS);
+	if (err_sts) {
+		hpre_log_hw_error(hpre, err_sts);
+
+		/* clear error interrupts */
+		writel(err_sts, hpre->qm.io_base + HPRE_HAC_SOURCE_INT);
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t hpre_process_hw_error(struct pci_dev *pdev)
+{
+	struct hpre *hpre = pci_get_drvdata(pdev);
+	pci_ers_result_t qm_ret, hpre_ret;
+
+	/* log qm error */
+	qm_ret = hisi_qm_hw_error_handle(&hpre->qm);
+
+	/* log hpre error */
+	hpre_ret = hpre_hw_error_handle(hpre);
+
+	return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
+		hpre_ret == PCI_ERS_RESULT_NEED_RESET) ?
+		PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t hpre_error_detected(struct pci_dev *pdev,
+					    pci_channel_state_t state)
+{
+	pci_info(pdev, "PCI error detected, state(=%d)!!\n", state);
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	return hpre_process_hw_error(pdev);
+}
+
+static const struct pci_error_handlers hpre_err_handler = {
+	.error_detected		= hpre_error_detected,
+};
+
+static struct pci_driver hpre_pci_driver = {
+	.name			= hpre_name,
+	.id_table		= hpre_dev_ids,
+	.probe			= hpre_probe,
+	.remove			= hpre_remove,
+	.sriov_configure	= hpre_sriov_configure,
+	.err_handler		= &hpre_err_handler,
+};
+
+static void hpre_register_debugfs(void)
+{
+	if (!debugfs_initialized())
+		return;
+
+	hpre_debugfs_root = debugfs_create_dir(hpre_name, NULL);
+}
+
+static void hpre_unregister_debugfs(void)
+{
+	debugfs_remove_recursive(hpre_debugfs_root);
+}
+
+static int __init hpre_init(void)
+{
+	int ret;
+
+	hpre_register_debugfs();
+
+	ret = pci_register_driver(&hpre_pci_driver);
+	if (ret) {
+		hpre_unregister_debugfs();
+		pr_err("hpre: can't register hisi hpre driver.\n");
+	}
+
+	return ret;
+}
+
+static void __exit hpre_exit(void)
+{
+	pci_unregister_driver(&hpre_pci_driver);
+	hpre_unregister_debugfs();
+}
+
+module_init(hpre_init);
+module_exit(hpre_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Zaibo Xu <xuzaibo@huawei.com>");
+MODULE_DESCRIPTION("Driver for HiSilicon HPRE accelerator");
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index f975c393a603..b57da5ef8b5b 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -59,17 +59,17 @@
 #define QM_CQ_PHASE_SHIFT		0
 #define QM_CQ_FLAG_SHIFT		1
 
-#define QM_CQE_PHASE(cqe)		((cqe)->w7 & 0x1)
+#define QM_CQE_PHASE(cqe)		(le16_to_cpu((cqe)->w7) & 0x1)
 #define QM_QC_CQE_SIZE			4
 
 /* eqc shift */
 #define QM_EQE_AEQE_SIZE		(2UL << 12)
 #define QM_EQC_PHASE_SHIFT		16
 
-#define QM_EQE_PHASE(eqe)		(((eqe)->dw0 >> 16) & 0x1)
+#define QM_EQE_PHASE(eqe)		((le32_to_cpu((eqe)->dw0) >> 16) & 0x1)
 #define QM_EQE_CQN_MASK			GENMASK(15, 0)
 
-#define QM_AEQE_PHASE(aeqe)		(((aeqe)->dw0 >> 16) & 0x1)
+#define QM_AEQE_PHASE(aeqe)		((le32_to_cpu((aeqe)->dw0) >> 16) & 0x1)
 #define QM_AEQE_TYPE_SHIFT		17
 
 #define QM_DOORBELL_CMD_SQ		0
@@ -169,17 +169,17 @@
 #define QM_MK_SQC_DW3_V2(sqe_sz) \
 	((QM_Q_DEPTH - 1) | ((u32)ilog2(sqe_sz) << QM_SQ_SQE_SIZE_SHIFT))
 
-#define INIT_QC_COMMON(qc, base, pasid) do {	\
-	(qc)->head = 0;				\
-	(qc)->tail = 0;				\
-	(qc)->base_l = lower_32_bits(base);	\
-	(qc)->base_h = upper_32_bits(base);	\
-	(qc)->dw3 = 0;				\
-	(qc)->w8 = 0;				\
-	(qc)->rsvd0 = 0;			\
-	(qc)->pasid = pasid;			\
-	(qc)->w11 = 0;				\
-	(qc)->rsvd1 = 0;			\
+#define INIT_QC_COMMON(qc, base, pasid) do {			\
+	(qc)->head = 0;						\
+	(qc)->tail = 0;						\
+	(qc)->base_l = cpu_to_le32(lower_32_bits(base));	\
+	(qc)->base_h = cpu_to_le32(upper_32_bits(base));	\
+	(qc)->dw3 = 0;						\
+	(qc)->w8 = 0;						\
+	(qc)->rsvd0 = 0;					\
+	(qc)->pasid = cpu_to_le16(pasid);			\
+	(qc)->w11 = 0;						\
+	(qc)->rsvd1 = 0;					\
 } while (0)
 
 enum vft_type {
@@ -331,12 +331,18 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src)
 	void __iomem *fun_base = qm->io_base + QM_MB_CMD_SEND_BASE;
 	unsigned long tmp0 = 0, tmp1 = 0;
 
+	if (!IS_ENABLED(CONFIG_ARM64)) {
+		memcpy_toio(fun_base, src, 16);
+		wmb();
+		return;
+	}
+
 	asm volatile("ldp %0, %1, %3\n"
 		     "stp %0, %1, %2\n"
 		     "dsb sy\n"
 		     : "=&r" (tmp0),
 		       "=&r" (tmp1),
-		       "+Q" (*((char *)fun_base))
+		       "+Q" (*((char __iomem *)fun_base))
 		     : "Q" (*((char *)src))
 		     : "memory");
 }
@@ -350,12 +356,12 @@ static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
 	dev_dbg(&qm->pdev->dev, "QM mailbox request to q%u: %u-%llx\n",
 		queue, cmd, (unsigned long long)dma_addr);
 
-	mailbox.w0 = cmd |
+	mailbox.w0 = cpu_to_le16(cmd |
 		     (op ? 0x1 << QM_MB_OP_SHIFT : 0) |
-		     (0x1 << QM_MB_BUSY_SHIFT);
-	mailbox.queue_num = queue;
-	mailbox.base_l = lower_32_bits(dma_addr);
-	mailbox.base_h = upper_32_bits(dma_addr);
+		     (0x1 << QM_MB_BUSY_SHIFT));
+	mailbox.queue_num = cpu_to_le16(queue);
+	mailbox.base_l = cpu_to_le32(lower_32_bits(dma_addr));
+	mailbox.base_h = cpu_to_le32(upper_32_bits(dma_addr));
 	mailbox.rsvd = 0;
 
 	mutex_lock(&qm->mailbox_lock);
@@ -442,7 +448,7 @@ static u32 qm_get_irq_num_v2(struct hisi_qm *qm)
 
 static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
 {
-	u16 cqn = eqe->dw0 & QM_EQE_CQN_MASK;
+	u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
 
 	return qm->qp_array[cqn];
 }
@@ -464,7 +470,8 @@ static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
 	if (qp->req_cb) {
 		while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
 			dma_rmb();
-			qp->req_cb(qp, qp->sqe + qm->sqe_size * cqe->sq_head);
+			qp->req_cb(qp, qp->sqe + qm->sqe_size *
+				   le16_to_cpu(cqe->sq_head));
 			qm_cq_head_update(qp);
 			cqe = qp->cqe + qp->qp_status.cq_head;
 			qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
@@ -542,7 +549,7 @@ static irqreturn_t qm_aeq_irq(int irq, void *data)
 		return IRQ_NONE;
 
 	while (QM_AEQE_PHASE(aeqe) == qm->status.aeqc_phase) {
-		type = aeqe->dw0 >> QM_AEQE_TYPE_SHIFT;
+		type = le32_to_cpu(aeqe->dw0) >> QM_AEQE_TYPE_SHIFT;
 		if (type < ARRAY_SIZE(qm_fifo_overflow))
 			dev_err(&qm->pdev->dev, "%s overflow\n",
 				qm_fifo_overflow[type]);
@@ -646,7 +653,7 @@ static void qm_init_qp_status(struct hisi_qp *qp)
 
 	qp_status->sq_tail = 0;
 	qp_status->cq_head = 0;
-	qp_status->cqc_phase = 1;
+	qp_status->cqc_phase = true;
 	qp_status->flags = 0;
 }
 
@@ -963,13 +970,11 @@ static const struct file_operations qm_regs_fops = {
 
 static int qm_create_debugfs_file(struct hisi_qm *qm, enum qm_debug_file index)
 {
-	struct dentry *qm_d = qm->debug.qm_d, *tmp;
+	struct dentry *qm_d = qm->debug.qm_d;
 	struct debugfs_file *file = qm->debug.files + index;
 
-	tmp = debugfs_create_file(qm_debug_file_name[index], 0600, qm_d, file,
-				  &qm_debug_fops);
-	if (IS_ERR(tmp))
-		return -ENOENT;
+	debugfs_create_file(qm_debug_file_name[index], 0600, qm_d, file,
+			    &qm_debug_fops);
 
 	file->index = index;
 	mutex_init(&file->lock);
@@ -981,9 +986,6 @@ static int qm_create_debugfs_file(struct hisi_qm *qm, enum qm_debug_file index)
 static void qm_hw_error_init_v1(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
 				u32 msi)
 {
-	dev_info(&qm->pdev->dev,
-		 "QM v%d does not support hw error handle\n", qm->ver);
-
 	writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
 }
 
@@ -1123,6 +1125,7 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
 	}
 	set_bit(qp_id, qm->qp_bitmap);
 	qm->qp_array[qp_id] = qp;
+	qm->qp_in_used++;
 
 	write_unlock(&qm->qps_lock);
 
@@ -1187,6 +1190,7 @@ void hisi_qm_release_qp(struct hisi_qp *qp)
 	write_lock(&qm->qps_lock);
 	qm->qp_array[qp->qp_id] = NULL;
 	clear_bit(qp->qp_id, qm->qp_bitmap);
+	qm->qp_in_used--;
 	write_unlock(&qm->qps_lock);
 
 	kfree(qp);
@@ -1218,14 +1222,14 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid)
 
 	INIT_QC_COMMON(sqc, qp->sqe_dma, pasid);
 	if (ver == QM_HW_V1) {
-		sqc->dw3 = QM_MK_SQC_DW3_V1(0, 0, 0, qm->sqe_size);
-		sqc->w8 = QM_Q_DEPTH - 1;
+		sqc->dw3 = cpu_to_le32(QM_MK_SQC_DW3_V1(0, 0, 0, qm->sqe_size));
+		sqc->w8 = cpu_to_le16(QM_Q_DEPTH - 1);
 	} else if (ver == QM_HW_V2) {
-		sqc->dw3 = QM_MK_SQC_DW3_V2(qm->sqe_size);
+		sqc->dw3 = cpu_to_le32(QM_MK_SQC_DW3_V2(qm->sqe_size));
 		sqc->w8 = 0; /* rand_qc */
 	}
-	sqc->cq_num = qp_id;
-	sqc->w13 = QM_MK_SQC_W13(0, 1, qp->alg_type);
+	sqc->cq_num = cpu_to_le16(qp_id);
+	sqc->w13 = cpu_to_le16(QM_MK_SQC_W13(0, 1, qp->alg_type));
 
 	ret = qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0);
 	dma_unmap_single(dev, sqc_dma, sizeof(struct qm_sqc), DMA_TO_DEVICE);
@@ -1245,13 +1249,13 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid)
 
 	INIT_QC_COMMON(cqc, qp->cqe_dma, pasid);
 	if (ver == QM_HW_V1) {
-		cqc->dw3 = QM_MK_CQC_DW3_V1(0, 0, 0, 4);
-		cqc->w8 = QM_Q_DEPTH - 1;
+		cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V1(0, 0, 0, 4));
+		cqc->w8 = cpu_to_le16(QM_Q_DEPTH - 1);
 	} else if (ver == QM_HW_V2) {
-		cqc->dw3 = QM_MK_CQC_DW3_V2(4);
+		cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V2(4));
 		cqc->w8 = 0;
 	}
-	cqc->dw6 = 1 << QM_CQ_PHASE_SHIFT | 1 << QM_CQ_FLAG_SHIFT;
+	cqc->dw6 = cpu_to_le32(1 << QM_CQ_PHASE_SHIFT | 1 << QM_CQ_FLAG_SHIFT);
 
 	ret = qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0);
 	dma_unmap_single(dev, cqc_dma, sizeof(struct qm_cqc), DMA_TO_DEVICE);
@@ -1392,6 +1396,24 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm)
 }
 
 /**
+ * hisi_qm_get_free_qp_num() - Get free number of qp in qm.
+ * @qm: The qm which want to get free qp.
+ *
+ * This function return free number of qp in qm.
+ */
+int hisi_qm_get_free_qp_num(struct hisi_qm *qm)
+{
+	int ret;
+
+	read_lock(&qm->qps_lock);
+	ret = qm->qp_num - qm->qp_in_used;
+	read_unlock(&qm->qps_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_get_free_qp_num);
+
+/**
  * hisi_qm_init() - Initialize configures about qm.
  * @qm: The qm needing init.
  *
@@ -1454,6 +1476,7 @@ int hisi_qm_init(struct hisi_qm *qm)
 	if (ret)
 		goto err_free_irq_vectors;
 
+	qm->qp_in_used = 0;
 	mutex_init(&qm->mailbox_lock);
 	rwlock_init(&qm->qps_lock);
 
@@ -1560,8 +1583,8 @@ static void qm_init_eq_aeq_status(struct hisi_qm *qm)
 
 	status->eq_head = 0;
 	status->aeq_head = 0;
-	status->eqc_phase = 1;
-	status->aeqc_phase = 1;
+	status->eqc_phase = true;
+	status->aeqc_phase = true;
 }
 
 static int qm_eq_ctx_cfg(struct hisi_qm *qm)
@@ -1585,11 +1608,11 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm)
 		return -ENOMEM;
 	}
 
-	eqc->base_l = lower_32_bits(qm->eqe_dma);
-	eqc->base_h = upper_32_bits(qm->eqe_dma);
+	eqc->base_l = cpu_to_le32(lower_32_bits(qm->eqe_dma));
+	eqc->base_h = cpu_to_le32(upper_32_bits(qm->eqe_dma));
 	if (qm->ver == QM_HW_V1)
-		eqc->dw3 = QM_EQE_AEQE_SIZE;
-	eqc->dw6 = (QM_Q_DEPTH - 1) | (1 << QM_EQC_PHASE_SHIFT);
+		eqc->dw3 = cpu_to_le32(QM_EQE_AEQE_SIZE);
+	eqc->dw6 = cpu_to_le32((QM_Q_DEPTH - 1) | (1 << QM_EQC_PHASE_SHIFT));
 	ret = qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0);
 	dma_unmap_single(dev, eqc_dma, sizeof(struct qm_eqc), DMA_TO_DEVICE);
 	kfree(eqc);
@@ -1606,9 +1629,9 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm)
 		return -ENOMEM;
 	}
 
-	aeqc->base_l = lower_32_bits(qm->aeqe_dma);
-	aeqc->base_h = upper_32_bits(qm->aeqe_dma);
-	aeqc->dw6 = (QM_Q_DEPTH - 1) | (1 << QM_EQC_PHASE_SHIFT);
+	aeqc->base_l = cpu_to_le32(lower_32_bits(qm->aeqe_dma));
+	aeqc->base_h = cpu_to_le32(upper_32_bits(qm->aeqe_dma));
+	aeqc->dw6 = cpu_to_le32((QM_Q_DEPTH - 1) | (1 << QM_EQC_PHASE_SHIFT));
 
 	ret = qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0);
 	dma_unmap_single(dev, aeqc_dma, sizeof(struct qm_aeqc), DMA_TO_DEVICE);
@@ -1780,12 +1803,10 @@ EXPORT_SYMBOL_GPL(hisi_qm_stop);
  */
 int hisi_qm_debug_init(struct hisi_qm *qm)
 {
-	struct dentry *qm_d, *qm_regs;
+	struct dentry *qm_d;
 	int i, ret;
 
 	qm_d = debugfs_create_dir("qm", qm->debug.debug_root);
-	if (IS_ERR(qm_d))
-		return -ENOENT;
 	qm->debug.qm_d = qm_d;
 
 	/* only show this in PF */
@@ -1796,12 +1817,7 @@ int hisi_qm_debug_init(struct hisi_qm *qm)
 				goto failed_to_create;
 			}
 
-	qm_regs = debugfs_create_file("qm_regs", 0444, qm->debug.qm_d, qm,
-				      &qm_regs_fops);
-	if (IS_ERR(qm_regs)) {
-		ret = -ENOENT;
-		goto failed_to_create;
-	}
+	debugfs_create_file("qm_regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
 
 	return 0;
 
@@ -1862,8 +1878,7 @@ void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
 			   u32 msi)
 {
 	if (!qm->ops->hw_error_init) {
-		dev_err(&qm->pdev->dev, "QM version %d doesn't support hw error handling!\n",
-			qm->ver);
+		dev_err(&qm->pdev->dev, "QM doesn't support hw error handling!\n");
 		return;
 	}
 
@@ -1877,11 +1892,10 @@ EXPORT_SYMBOL_GPL(hisi_qm_hw_error_init);
  *
  * Accelerators use this function to handle qm non-fatal hardware errors.
  */
-int hisi_qm_hw_error_handle(struct hisi_qm *qm)
+pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm)
 {
 	if (!qm->ops->hw_error_handle) {
-		dev_err(&qm->pdev->dev, "QM version %d doesn't support hw error report!\n",
-			qm->ver);
+		dev_err(&qm->pdev->dev, "QM doesn't support hw error report!\n");
 		return PCI_ERS_RESULT_NONE;
 	}
 
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
index 70e672ae86bf..078b8f1f1b77 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -75,6 +75,8 @@
 
 #define QM_Q_DEPTH			1024
 
+#define HISI_ACC_SGL_SGE_NR_MAX		255
+
 enum qp_state {
 	QP_STOP,
 };
@@ -132,6 +134,7 @@ struct hisi_qm {
 	u32 sqe_size;
 	u32 qp_base;
 	u32 qp_num;
+	u32 qp_in_used;
 	u32 ctrl_qp_num;
 
 	struct qm_dma qdma;
@@ -204,12 +207,24 @@ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg);
 int hisi_qm_stop_qp(struct hisi_qp *qp);
 void hisi_qm_release_qp(struct hisi_qp *qp);
 int hisi_qp_send(struct hisi_qp *qp, const void *msg);
+int hisi_qm_get_free_qp_num(struct hisi_qm *qm);
 int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number);
 int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base, u32 number);
 int hisi_qm_debug_init(struct hisi_qm *qm);
 void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
 			   u32 msi);
-int hisi_qm_hw_error_handle(struct hisi_qm *qm);
+pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm);
 enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev);
 void hisi_qm_debug_regs_clear(struct hisi_qm *qm);
+
+struct hisi_acc_sgl_pool;
+struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
+	struct scatterlist *sgl, struct hisi_acc_sgl_pool *pool,
+	u32 index, dma_addr_t *hw_sgl_dma);
+void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
+			   struct hisi_acc_hw_sgl *hw_sgl);
+struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
+						   u32 count, u32 sge_nr);
+void hisi_acc_free_sgl_pool(struct device *dev,
+			    struct hisi_acc_sgl_pool *pool);
 #endif
diff --git a/drivers/crypto/hisilicon/sec2/Makefile b/drivers/crypto/hisilicon/sec2/Makefile
new file mode 100644
index 000000000000..b4f6cf14be3a
--- /dev/null
+++ b/drivers/crypto/hisilicon/sec2/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_HISI_SEC2) += hisi_sec2.o
+hisi_sec2-objs = sec_main.o sec_crypto.o
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
new file mode 100644
index 000000000000..13e2d8d7be94
--- /dev/null
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 HiSilicon Limited. */
+
+#ifndef __HISI_SEC_V2_H
+#define __HISI_SEC_V2_H
+
+#include <linux/list.h>
+
+#include "../qm.h"
+#include "sec_crypto.h"
+
+/* Algorithm resource per hardware SEC queue */
+struct sec_alg_res {
+	u8 *c_ivin;
+	dma_addr_t c_ivin_dma;
+	u8 *out_mac;
+	dma_addr_t out_mac_dma;
+};
+
+/* Cipher request of SEC private */
+struct sec_cipher_req {
+	struct hisi_acc_hw_sgl *c_in;
+	dma_addr_t c_in_dma;
+	struct hisi_acc_hw_sgl *c_out;
+	dma_addr_t c_out_dma;
+	struct skcipher_request *sk_req;
+	u32 c_len;
+	bool encrypt;
+};
+
+struct sec_aead_req {
+	u8 *out_mac;
+	dma_addr_t out_mac_dma;
+	struct aead_request *aead_req;
+};
+
+/* SEC request of Crypto */
+struct sec_req {
+	struct sec_sqe sec_sqe;
+	struct sec_ctx *ctx;
+	struct sec_qp_ctx *qp_ctx;
+
+	struct sec_cipher_req c_req;
+	struct sec_aead_req aead_req;
+
+	int err_type;
+	int req_id;
+
+	/* Status of the SEC request */
+	bool fake_busy;
+};
+
+/**
+ * struct sec_req_op - Operations for SEC request
+ * @buf_map: DMA map the SGL buffers of the request
+ * @buf_unmap: DMA unmap the SGL buffers of the request
+ * @bd_fill: Fill the SEC queue BD
+ * @bd_send: Send the SEC BD into the hardware queue
+ * @callback: Call back for the request
+ * @process: Main processing logic of Skcipher
+ */
+struct sec_req_op {
+	int (*buf_map)(struct sec_ctx *ctx, struct sec_req *req);
+	void (*buf_unmap)(struct sec_ctx *ctx, struct sec_req *req);
+	void (*do_transfer)(struct sec_ctx *ctx, struct sec_req *req);
+	int (*bd_fill)(struct sec_ctx *ctx, struct sec_req *req);
+	int (*bd_send)(struct sec_ctx *ctx, struct sec_req *req);
+	void (*callback)(struct sec_ctx *ctx, struct sec_req *req, int err);
+	int (*process)(struct sec_ctx *ctx, struct sec_req *req);
+};
+
+/* SEC auth context */
+struct sec_auth_ctx {
+	dma_addr_t a_key_dma;
+	u8 *a_key;
+	u8 a_key_len;
+	u8 mac_len;
+	u8 a_alg;
+	struct crypto_shash *hash_tfm;
+};
+
+/* SEC cipher context which cipher's relatives */
+struct sec_cipher_ctx {
+	u8 *c_key;
+	dma_addr_t c_key_dma;
+	sector_t iv_offset;
+	u32 c_gran_size;
+	u32 ivsize;
+	u8 c_mode;
+	u8 c_alg;
+	u8 c_key_len;
+};
+
+/* SEC queue context which defines queue's relatives */
+struct sec_qp_ctx {
+	struct hisi_qp *qp;
+	struct sec_req *req_list[QM_Q_DEPTH];
+	struct idr req_idr;
+	struct sec_alg_res res[QM_Q_DEPTH];
+	struct sec_ctx *ctx;
+	struct mutex req_lock;
+	struct hisi_acc_sgl_pool *c_in_pool;
+	struct hisi_acc_sgl_pool *c_out_pool;
+	atomic_t pending_reqs;
+};
+
+enum sec_alg_type {
+	SEC_SKCIPHER,
+	SEC_AEAD
+};
+
+/* SEC Crypto TFM context which defines queue and cipher .etc relatives */
+struct sec_ctx {
+	struct sec_qp_ctx *qp_ctx;
+	struct sec_dev *sec;
+	const struct sec_req_op *req_op;
+
+	/* Half queues for encipher, and half for decipher */
+	u32 hlf_q_num;
+
+	/* Threshold for fake busy, trigger to return -EBUSY to user */
+	u32 fake_req_limit;
+
+	/* Currrent cyclic index to select a queue for encipher */
+	atomic_t enc_qcyclic;
+
+	 /* Currrent cyclic index to select a queue for decipher */
+	atomic_t dec_qcyclic;
+
+	enum sec_alg_type alg_type;
+	struct sec_cipher_ctx c_ctx;
+	struct sec_auth_ctx a_ctx;
+};
+
+enum sec_endian {
+	SEC_LE = 0,
+	SEC_32BE,
+	SEC_64BE
+};
+
+enum sec_debug_file_index {
+	SEC_CURRENT_QM,
+	SEC_CLEAR_ENABLE,
+	SEC_DEBUG_FILE_NUM,
+};
+
+struct sec_debug_file {
+	enum sec_debug_file_index index;
+	spinlock_t lock;
+	struct hisi_qm *qm;
+};
+
+struct sec_dfx {
+	atomic64_t send_cnt;
+	atomic64_t recv_cnt;
+};
+
+struct sec_debug {
+	struct sec_dfx dfx;
+	struct sec_debug_file files[SEC_DEBUG_FILE_NUM];
+};
+
+struct sec_dev {
+	struct hisi_qm qm;
+	struct list_head list;
+	struct sec_debug debug;
+	u32 ctx_q_num;
+	u32 num_vfs;
+	unsigned long status;
+};
+
+struct sec_dev *sec_find_device(int node);
+int sec_register_to_crypto(void);
+void sec_unregister_from_crypto(void);
+#endif
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
new file mode 100644
index 000000000000..a2cfcc9ccd94
--- /dev/null
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -0,0 +1,1446 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 HiSilicon Limited. */
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/authenc.h>
+#include <crypto/des.h>
+#include <crypto/hash.h>
+#include <crypto/internal/aead.h>
+#include <crypto/sha.h>
+#include <crypto/skcipher.h>
+#include <crypto/xts.h>
+#include <linux/crypto.h>
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+
+#include "sec.h"
+#include "sec_crypto.h"
+
+#define SEC_PRIORITY		4001
+#define SEC_XTS_MIN_KEY_SIZE	(2 * AES_MIN_KEY_SIZE)
+#define SEC_XTS_MAX_KEY_SIZE	(2 * AES_MAX_KEY_SIZE)
+#define SEC_DES3_2KEY_SIZE	(2 * DES_KEY_SIZE)
+#define SEC_DES3_3KEY_SIZE	(3 * DES_KEY_SIZE)
+
+/* SEC sqe(bd) bit operational relative MACRO */
+#define SEC_DE_OFFSET		1
+#define SEC_CIPHER_OFFSET	4
+#define SEC_SCENE_OFFSET	3
+#define SEC_DST_SGL_OFFSET	2
+#define SEC_SRC_SGL_OFFSET	7
+#define SEC_CKEY_OFFSET		9
+#define SEC_CMODE_OFFSET	12
+#define SEC_AKEY_OFFSET         5
+#define SEC_AEAD_ALG_OFFSET     11
+#define SEC_AUTH_OFFSET		6
+
+#define SEC_FLAG_OFFSET		7
+#define SEC_FLAG_MASK		0x0780
+#define SEC_TYPE_MASK		0x0F
+#define SEC_DONE_MASK		0x0001
+
+#define SEC_TOTAL_IV_SZ		(SEC_IV_SIZE * QM_Q_DEPTH)
+#define SEC_SGL_SGE_NR		128
+#define SEC_CTX_DEV(ctx)	(&(ctx)->sec->qm.pdev->dev)
+#define SEC_CIPHER_AUTH		0xfe
+#define SEC_AUTH_CIPHER		0x1
+#define SEC_MAX_MAC_LEN		64
+#define SEC_TOTAL_MAC_SZ	(SEC_MAX_MAC_LEN * QM_Q_DEPTH)
+#define SEC_SQE_LEN_RATE	4
+#define SEC_SQE_CFLAG		2
+#define SEC_SQE_AEAD_FLAG	3
+#define SEC_SQE_DONE		0x1
+
+static atomic_t sec_active_devs;
+
+/* Get an en/de-cipher queue cyclically to balance load over queues of TFM */
+static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req)
+{
+	if (req->c_req.encrypt)
+		return (u32)atomic_inc_return(&ctx->enc_qcyclic) %
+				 ctx->hlf_q_num;
+
+	return (u32)atomic_inc_return(&ctx->dec_qcyclic) % ctx->hlf_q_num +
+				 ctx->hlf_q_num;
+}
+
+static inline void sec_free_queue_id(struct sec_ctx *ctx, struct sec_req *req)
+{
+	if (req->c_req.encrypt)
+		atomic_dec(&ctx->enc_qcyclic);
+	else
+		atomic_dec(&ctx->dec_qcyclic);
+}
+
+static int sec_alloc_req_id(struct sec_req *req, struct sec_qp_ctx *qp_ctx)
+{
+	int req_id;
+
+	mutex_lock(&qp_ctx->req_lock);
+
+	req_id = idr_alloc_cyclic(&qp_ctx->req_idr, NULL,
+				  0, QM_Q_DEPTH, GFP_ATOMIC);
+	mutex_unlock(&qp_ctx->req_lock);
+	if (unlikely(req_id < 0)) {
+		dev_err(SEC_CTX_DEV(req->ctx), "alloc req id fail!\n");
+		return req_id;
+	}
+
+	req->qp_ctx = qp_ctx;
+	qp_ctx->req_list[req_id] = req;
+	return req_id;
+}
+
+static void sec_free_req_id(struct sec_req *req)
+{
+	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+	int req_id = req->req_id;
+
+	if (unlikely(req_id < 0 || req_id >= QM_Q_DEPTH)) {
+		dev_err(SEC_CTX_DEV(req->ctx), "free request id invalid!\n");
+		return;
+	}
+
+	qp_ctx->req_list[req_id] = NULL;
+	req->qp_ctx = NULL;
+
+	mutex_lock(&qp_ctx->req_lock);
+	idr_remove(&qp_ctx->req_idr, req_id);
+	mutex_unlock(&qp_ctx->req_lock);
+}
+
+static int sec_aead_verify(struct sec_req *req, struct sec_qp_ctx *qp_ctx)
+{
+	struct aead_request *aead_req = req->aead_req.aead_req;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
+	u8 *mac_out = qp_ctx->res[req->req_id].out_mac;
+	size_t authsize = crypto_aead_authsize(tfm);
+	u8 *mac = mac_out + SEC_MAX_MAC_LEN;
+	struct scatterlist *sgl = aead_req->src;
+	size_t sz;
+
+	sz = sg_pcopy_to_buffer(sgl, sg_nents(sgl), mac, authsize,
+				aead_req->cryptlen + aead_req->assoclen -
+				authsize);
+	if (unlikely(sz != authsize || memcmp(mac_out, mac, sz))) {
+		dev_err(SEC_CTX_DEV(req->ctx), "aead verify failure!\n");
+		return -EBADMSG;
+	}
+
+	return 0;
+}
+
+static void sec_req_cb(struct hisi_qp *qp, void *resp)
+{
+	struct sec_qp_ctx *qp_ctx = qp->qp_ctx;
+	struct sec_sqe *bd = resp;
+	struct sec_ctx *ctx;
+	struct sec_req *req;
+	u16 done, flag;
+	int err = 0;
+	u8 type;
+
+	type = bd->type_cipher_auth & SEC_TYPE_MASK;
+	if (unlikely(type != SEC_BD_TYPE2)) {
+		pr_err("err bd type [%d]\n", type);
+		return;
+	}
+
+	req = qp_ctx->req_list[le16_to_cpu(bd->type2.tag)];
+	req->err_type = bd->type2.error_type;
+	ctx = req->ctx;
+	done = le16_to_cpu(bd->type2.done_flag) & SEC_DONE_MASK;
+	flag = (le16_to_cpu(bd->type2.done_flag) &
+		SEC_FLAG_MASK) >> SEC_FLAG_OFFSET;
+	if (unlikely(req->err_type || done != SEC_SQE_DONE ||
+	    (ctx->alg_type == SEC_SKCIPHER && flag != SEC_SQE_CFLAG) ||
+	    (ctx->alg_type == SEC_AEAD && flag != SEC_SQE_AEAD_FLAG))) {
+		dev_err(SEC_CTX_DEV(ctx),
+			"err_type[%d],done[%d],flag[%d]\n",
+			req->err_type, done, flag);
+		err = -EIO;
+	}
+
+	if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt)
+		err = sec_aead_verify(req, qp_ctx);
+
+	atomic64_inc(&ctx->sec->debug.dfx.recv_cnt);
+
+	ctx->req_op->buf_unmap(ctx, req);
+
+	ctx->req_op->callback(ctx, req, err);
+}
+
+static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+	int ret;
+
+	mutex_lock(&qp_ctx->req_lock);
+	ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe);
+	mutex_unlock(&qp_ctx->req_lock);
+	atomic64_inc(&ctx->sec->debug.dfx.send_cnt);
+
+	if (unlikely(ret == -EBUSY))
+		return -ENOBUFS;
+
+	if (!ret) {
+		if (req->fake_busy)
+			ret = -EBUSY;
+		else
+			ret = -EINPROGRESS;
+	}
+
+	return ret;
+}
+
+/* Get DMA memory resources */
+static int sec_alloc_civ_resource(struct device *dev, struct sec_alg_res *res)
+{
+	int i;
+
+	res->c_ivin = dma_alloc_coherent(dev, SEC_TOTAL_IV_SZ,
+					 &res->c_ivin_dma, GFP_KERNEL);
+	if (!res->c_ivin)
+		return -ENOMEM;
+
+	for (i = 1; i < QM_Q_DEPTH; i++) {
+		res[i].c_ivin_dma = res->c_ivin_dma + i * SEC_IV_SIZE;
+		res[i].c_ivin = res->c_ivin + i * SEC_IV_SIZE;
+	}
+
+	return 0;
+}
+
+static void sec_free_civ_resource(struct device *dev, struct sec_alg_res *res)
+{
+	if (res->c_ivin)
+		dma_free_coherent(dev, SEC_TOTAL_IV_SZ,
+				  res->c_ivin, res->c_ivin_dma);
+}
+
+static int sec_alloc_mac_resource(struct device *dev, struct sec_alg_res *res)
+{
+	int i;
+
+	res->out_mac = dma_alloc_coherent(dev, SEC_TOTAL_MAC_SZ << 1,
+					  &res->out_mac_dma, GFP_KERNEL);
+	if (!res->out_mac)
+		return -ENOMEM;
+
+	for (i = 1; i < QM_Q_DEPTH; i++) {
+		res[i].out_mac_dma = res->out_mac_dma +
+				     i * (SEC_MAX_MAC_LEN << 1);
+		res[i].out_mac = res->out_mac + i * (SEC_MAX_MAC_LEN << 1);
+	}
+
+	return 0;
+}
+
+static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res)
+{
+	if (res->out_mac)
+		dma_free_coherent(dev, SEC_TOTAL_MAC_SZ << 1,
+				  res->out_mac, res->out_mac_dma);
+}
+
+static int sec_alg_resource_alloc(struct sec_ctx *ctx,
+				  struct sec_qp_ctx *qp_ctx)
+{
+	struct device *dev = SEC_CTX_DEV(ctx);
+	struct sec_alg_res *res = qp_ctx->res;
+	int ret;
+
+	ret = sec_alloc_civ_resource(dev, res);
+	if (ret)
+		return ret;
+
+	if (ctx->alg_type == SEC_AEAD) {
+		ret = sec_alloc_mac_resource(dev, res);
+		if (ret)
+			goto get_fail;
+	}
+
+	return 0;
+get_fail:
+	sec_free_civ_resource(dev, res);
+
+	return ret;
+}
+
+static void sec_alg_resource_free(struct sec_ctx *ctx,
+				  struct sec_qp_ctx *qp_ctx)
+{
+	struct device *dev = SEC_CTX_DEV(ctx);
+
+	sec_free_civ_resource(dev, qp_ctx->res);
+
+	if (ctx->alg_type == SEC_AEAD)
+		sec_free_mac_resource(dev, qp_ctx->res);
+}
+
+static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
+			     int qp_ctx_id, int alg_type)
+{
+	struct device *dev = SEC_CTX_DEV(ctx);
+	struct sec_qp_ctx *qp_ctx;
+	struct hisi_qp *qp;
+	int ret = -ENOMEM;
+
+	qp = hisi_qm_create_qp(qm, alg_type);
+	if (IS_ERR(qp))
+		return PTR_ERR(qp);
+
+	qp_ctx = &ctx->qp_ctx[qp_ctx_id];
+	qp->req_type = 0;
+	qp->qp_ctx = qp_ctx;
+	qp->req_cb = sec_req_cb;
+	qp_ctx->qp = qp;
+	qp_ctx->ctx = ctx;
+
+	mutex_init(&qp_ctx->req_lock);
+	atomic_set(&qp_ctx->pending_reqs, 0);
+	idr_init(&qp_ctx->req_idr);
+
+	qp_ctx->c_in_pool = hisi_acc_create_sgl_pool(dev, QM_Q_DEPTH,
+						     SEC_SGL_SGE_NR);
+	if (IS_ERR(qp_ctx->c_in_pool)) {
+		dev_err(dev, "fail to create sgl pool for input!\n");
+		goto err_destroy_idr;
+	}
+
+	qp_ctx->c_out_pool = hisi_acc_create_sgl_pool(dev, QM_Q_DEPTH,
+						      SEC_SGL_SGE_NR);
+	if (IS_ERR(qp_ctx->c_out_pool)) {
+		dev_err(dev, "fail to create sgl pool for output!\n");
+		goto err_free_c_in_pool;
+	}
+
+	ret = sec_alg_resource_alloc(ctx, qp_ctx);
+	if (ret)
+		goto err_free_c_out_pool;
+
+	ret = hisi_qm_start_qp(qp, 0);
+	if (ret < 0)
+		goto err_queue_free;
+
+	return 0;
+
+err_queue_free:
+	sec_alg_resource_free(ctx, qp_ctx);
+err_free_c_out_pool:
+	hisi_acc_free_sgl_pool(dev, qp_ctx->c_out_pool);
+err_free_c_in_pool:
+	hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool);
+err_destroy_idr:
+	idr_destroy(&qp_ctx->req_idr);
+	hisi_qm_release_qp(qp);
+
+	return ret;
+}
+
+static void sec_release_qp_ctx(struct sec_ctx *ctx,
+			       struct sec_qp_ctx *qp_ctx)
+{
+	struct device *dev = SEC_CTX_DEV(ctx);
+
+	hisi_qm_stop_qp(qp_ctx->qp);
+	sec_alg_resource_free(ctx, qp_ctx);
+
+	hisi_acc_free_sgl_pool(dev, qp_ctx->c_out_pool);
+	hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool);
+
+	idr_destroy(&qp_ctx->req_idr);
+	hisi_qm_release_qp(qp_ctx->qp);
+}
+
+static int sec_ctx_base_init(struct sec_ctx *ctx)
+{
+	struct sec_dev *sec;
+	int i, ret;
+
+	sec = sec_find_device(cpu_to_node(smp_processor_id()));
+	if (!sec) {
+		pr_err("Can not find proper Hisilicon SEC device!\n");
+		return -ENODEV;
+	}
+	ctx->sec = sec;
+	ctx->hlf_q_num = sec->ctx_q_num >> 1;
+
+	/* Half of queue depth is taken as fake requests limit in the queue. */
+	ctx->fake_req_limit = QM_Q_DEPTH >> 1;
+	ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx),
+			      GFP_KERNEL);
+	if (!ctx->qp_ctx)
+		return -ENOMEM;
+
+	for (i = 0; i < sec->ctx_q_num; i++) {
+		ret = sec_create_qp_ctx(&sec->qm, ctx, i, 0);
+		if (ret)
+			goto err_sec_release_qp_ctx;
+	}
+
+	return 0;
+err_sec_release_qp_ctx:
+	for (i = i - 1; i >= 0; i--)
+		sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]);
+
+	kfree(ctx->qp_ctx);
+	return ret;
+}
+
+static void sec_ctx_base_uninit(struct sec_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < ctx->sec->ctx_q_num; i++)
+		sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]);
+
+	kfree(ctx->qp_ctx);
+}
+
+static int sec_cipher_init(struct sec_ctx *ctx)
+{
+	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+
+	c_ctx->c_key = dma_alloc_coherent(SEC_CTX_DEV(ctx), SEC_MAX_KEY_SIZE,
+					  &c_ctx->c_key_dma, GFP_KERNEL);
+	if (!c_ctx->c_key)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void sec_cipher_uninit(struct sec_ctx *ctx)
+{
+	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+
+	memzero_explicit(c_ctx->c_key, SEC_MAX_KEY_SIZE);
+	dma_free_coherent(SEC_CTX_DEV(ctx), SEC_MAX_KEY_SIZE,
+			  c_ctx->c_key, c_ctx->c_key_dma);
+}
+
+static int sec_auth_init(struct sec_ctx *ctx)
+{
+	struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
+
+	a_ctx->a_key = dma_alloc_coherent(SEC_CTX_DEV(ctx), SEC_MAX_KEY_SIZE,
+					  &a_ctx->a_key_dma, GFP_KERNEL);
+	if (!a_ctx->a_key)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void sec_auth_uninit(struct sec_ctx *ctx)
+{
+	struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
+
+	memzero_explicit(a_ctx->a_key, SEC_MAX_KEY_SIZE);
+	dma_free_coherent(SEC_CTX_DEV(ctx), SEC_MAX_KEY_SIZE,
+			  a_ctx->a_key, a_ctx->a_key_dma);
+}
+
+static int sec_skcipher_init(struct crypto_skcipher *tfm)
+{
+	struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int ret;
+
+	ctx = crypto_skcipher_ctx(tfm);
+	ctx->alg_type = SEC_SKCIPHER;
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct sec_req));
+	ctx->c_ctx.ivsize = crypto_skcipher_ivsize(tfm);
+	if (ctx->c_ctx.ivsize > SEC_IV_SIZE) {
+		dev_err(SEC_CTX_DEV(ctx), "get error skcipher iv size!\n");
+		return -EINVAL;
+	}
+
+	ret = sec_ctx_base_init(ctx);
+	if (ret)
+		return ret;
+
+	ret = sec_cipher_init(ctx);
+	if (ret)
+		goto err_cipher_init;
+
+	return 0;
+err_cipher_init:
+	sec_ctx_base_uninit(ctx);
+
+	return ret;
+}
+
+static void sec_skcipher_uninit(struct crypto_skcipher *tfm)
+{
+	struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	sec_cipher_uninit(ctx);
+	sec_ctx_base_uninit(ctx);
+}
+
+static int sec_skcipher_3des_setkey(struct sec_cipher_ctx *c_ctx,
+				    const u32 keylen,
+				    const enum sec_cmode c_mode)
+{
+	switch (keylen) {
+	case SEC_DES3_2KEY_SIZE:
+		c_ctx->c_key_len = SEC_CKEY_3DES_2KEY;
+		break;
+	case SEC_DES3_3KEY_SIZE:
+		c_ctx->c_key_len = SEC_CKEY_3DES_3KEY;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int sec_skcipher_aes_sm4_setkey(struct sec_cipher_ctx *c_ctx,
+				       const u32 keylen,
+				       const enum sec_cmode c_mode)
+{
+	if (c_mode == SEC_CMODE_XTS) {
+		switch (keylen) {
+		case SEC_XTS_MIN_KEY_SIZE:
+			c_ctx->c_key_len = SEC_CKEY_128BIT;
+			break;
+		case SEC_XTS_MAX_KEY_SIZE:
+			c_ctx->c_key_len = SEC_CKEY_256BIT;
+			break;
+		default:
+			pr_err("hisi_sec2: xts mode key error!\n");
+			return -EINVAL;
+		}
+	} else {
+		switch (keylen) {
+		case AES_KEYSIZE_128:
+			c_ctx->c_key_len = SEC_CKEY_128BIT;
+			break;
+		case AES_KEYSIZE_192:
+			c_ctx->c_key_len = SEC_CKEY_192BIT;
+			break;
+		case AES_KEYSIZE_256:
+			c_ctx->c_key_len = SEC_CKEY_256BIT;
+			break;
+		default:
+			pr_err("hisi_sec2: aes key error!\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       const u32 keylen, const enum sec_calg c_alg,
+			       const enum sec_cmode c_mode)
+{
+	struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+	int ret;
+
+	if (c_mode == SEC_CMODE_XTS) {
+		ret = xts_verify_key(tfm, key, keylen);
+		if (ret) {
+			dev_err(SEC_CTX_DEV(ctx), "xts mode key err!\n");
+			return ret;
+		}
+	}
+
+	c_ctx->c_alg  = c_alg;
+	c_ctx->c_mode = c_mode;
+
+	switch (c_alg) {
+	case SEC_CALG_3DES:
+		ret = sec_skcipher_3des_setkey(c_ctx, keylen, c_mode);
+		break;
+	case SEC_CALG_AES:
+	case SEC_CALG_SM4:
+		ret = sec_skcipher_aes_sm4_setkey(c_ctx, keylen, c_mode);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ret) {
+		dev_err(SEC_CTX_DEV(ctx), "set sec key err!\n");
+		return ret;
+	}
+
+	memcpy(c_ctx->c_key, key, keylen);
+
+	return 0;
+}
+
+#define GEN_SEC_SETKEY_FUNC(name, c_alg, c_mode)			\
+static int sec_setkey_##name(struct crypto_skcipher *tfm, const u8 *key,\
+	u32 keylen)							\
+{									\
+	return sec_skcipher_setkey(tfm, key, keylen, c_alg, c_mode);	\
+}
+
+GEN_SEC_SETKEY_FUNC(aes_ecb, SEC_CALG_AES, SEC_CMODE_ECB)
+GEN_SEC_SETKEY_FUNC(aes_cbc, SEC_CALG_AES, SEC_CMODE_CBC)
+GEN_SEC_SETKEY_FUNC(aes_xts, SEC_CALG_AES, SEC_CMODE_XTS)
+
+GEN_SEC_SETKEY_FUNC(3des_ecb, SEC_CALG_3DES, SEC_CMODE_ECB)
+GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
+
+GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
+GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
+
+static int sec_cipher_map(struct device *dev, struct sec_req *req,
+			  struct scatterlist *src, struct scatterlist *dst)
+{
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+
+	c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src,
+						    qp_ctx->c_in_pool,
+						    req->req_id,
+						    &c_req->c_in_dma);
+
+	if (IS_ERR(c_req->c_in)) {
+		dev_err(dev, "fail to dma map input sgl buffers!\n");
+		return PTR_ERR(c_req->c_in);
+	}
+
+	if (dst == src) {
+		c_req->c_out = c_req->c_in;
+		c_req->c_out_dma = c_req->c_in_dma;
+	} else {
+		c_req->c_out = hisi_acc_sg_buf_map_to_hw_sgl(dev, dst,
+							     qp_ctx->c_out_pool,
+							     req->req_id,
+							     &c_req->c_out_dma);
+
+		if (IS_ERR(c_req->c_out)) {
+			dev_err(dev, "fail to dma map output sgl buffers!\n");
+			hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
+			return PTR_ERR(c_req->c_out);
+		}
+	}
+
+	return 0;
+}
+
+static void sec_cipher_unmap(struct device *dev, struct sec_cipher_req *req,
+			     struct scatterlist *src, struct scatterlist *dst)
+{
+	if (dst != src)
+		hisi_acc_sg_buf_unmap(dev, src, req->c_in);
+
+	hisi_acc_sg_buf_unmap(dev, dst, req->c_out);
+}
+
+static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct skcipher_request *sq = req->c_req.sk_req;
+
+	return sec_cipher_map(SEC_CTX_DEV(ctx), req, sq->src, sq->dst);
+}
+
+static void sec_skcipher_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct device *dev = SEC_CTX_DEV(ctx);
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct skcipher_request *sk_req = c_req->sk_req;
+
+	sec_cipher_unmap(dev, c_req, sk_req->src, sk_req->dst);
+}
+
+static int sec_aead_aes_set_key(struct sec_cipher_ctx *c_ctx,
+				struct crypto_authenc_keys *keys)
+{
+	switch (keys->enckeylen) {
+	case AES_KEYSIZE_128:
+		c_ctx->c_key_len = SEC_CKEY_128BIT;
+		break;
+	case AES_KEYSIZE_192:
+		c_ctx->c_key_len = SEC_CKEY_192BIT;
+		break;
+	case AES_KEYSIZE_256:
+		c_ctx->c_key_len = SEC_CKEY_256BIT;
+		break;
+	default:
+		pr_err("hisi_sec2: aead aes key error!\n");
+		return -EINVAL;
+	}
+	memcpy(c_ctx->c_key, keys->enckey, keys->enckeylen);
+
+	return 0;
+}
+
+static int sec_aead_auth_set_key(struct sec_auth_ctx *ctx,
+				 struct crypto_authenc_keys *keys)
+{
+	struct crypto_shash *hash_tfm = ctx->hash_tfm;
+	SHASH_DESC_ON_STACK(shash, hash_tfm);
+	int blocksize, ret;
+
+	if (!keys->authkeylen) {
+		pr_err("hisi_sec2: aead auth key error!\n");
+		return -EINVAL;
+	}
+
+	blocksize = crypto_shash_blocksize(hash_tfm);
+	if (keys->authkeylen > blocksize) {
+		ret = crypto_shash_digest(shash, keys->authkey,
+					  keys->authkeylen, ctx->a_key);
+		if (ret) {
+			pr_err("hisi_sec2: aead auth digest error!\n");
+			return -EINVAL;
+		}
+		ctx->a_key_len = blocksize;
+	} else {
+		memcpy(ctx->a_key, keys->authkey, keys->authkeylen);
+		ctx->a_key_len = keys->authkeylen;
+	}
+
+	return 0;
+}
+
+static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key,
+			   const u32 keylen, const enum sec_hash_alg a_alg,
+			   const enum sec_calg c_alg,
+			   const enum sec_mac_len mac_len,
+			   const enum sec_cmode c_mode)
+{
+	struct sec_ctx *ctx = crypto_aead_ctx(tfm);
+	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+	struct crypto_authenc_keys keys;
+	int ret;
+
+	ctx->a_ctx.a_alg = a_alg;
+	ctx->c_ctx.c_alg = c_alg;
+	ctx->a_ctx.mac_len = mac_len;
+	c_ctx->c_mode = c_mode;
+
+	if (crypto_authenc_extractkeys(&keys, key, keylen))
+		goto bad_key;
+
+	ret = sec_aead_aes_set_key(c_ctx, &keys);
+	if (ret) {
+		dev_err(SEC_CTX_DEV(ctx), "set sec cipher key err!\n");
+		goto bad_key;
+	}
+
+	ret = sec_aead_auth_set_key(&ctx->a_ctx, &keys);
+	if (ret) {
+		dev_err(SEC_CTX_DEV(ctx), "set sec auth key err!\n");
+		goto bad_key;
+	}
+
+	return 0;
+bad_key:
+	memzero_explicit(&keys, sizeof(struct crypto_authenc_keys));
+
+	return -EINVAL;
+}
+
+
+#define GEN_SEC_AEAD_SETKEY_FUNC(name, aalg, calg, maclen, cmode)	\
+static int sec_setkey_##name(struct crypto_aead *tfm, const u8 *key,	\
+	u32 keylen)							\
+{									\
+	return sec_aead_setkey(tfm, key, keylen, aalg, calg, maclen, cmode);\
+}
+
+GEN_SEC_AEAD_SETKEY_FUNC(aes_cbc_sha1, SEC_A_HMAC_SHA1,
+			 SEC_CALG_AES, SEC_HMAC_SHA1_MAC, SEC_CMODE_CBC)
+GEN_SEC_AEAD_SETKEY_FUNC(aes_cbc_sha256, SEC_A_HMAC_SHA256,
+			 SEC_CALG_AES, SEC_HMAC_SHA256_MAC, SEC_CMODE_CBC)
+GEN_SEC_AEAD_SETKEY_FUNC(aes_cbc_sha512, SEC_A_HMAC_SHA512,
+			 SEC_CALG_AES, SEC_HMAC_SHA512_MAC, SEC_CMODE_CBC)
+
+static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct aead_request *aq = req->aead_req.aead_req;
+
+	return sec_cipher_map(SEC_CTX_DEV(ctx), req, aq->src, aq->dst);
+}
+
+static void sec_aead_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct device *dev = SEC_CTX_DEV(ctx);
+	struct sec_cipher_req *cq = &req->c_req;
+	struct aead_request *aq = req->aead_req.aead_req;
+
+	sec_cipher_unmap(dev, cq, aq->src, aq->dst);
+}
+
+static int sec_request_transfer(struct sec_ctx *ctx, struct sec_req *req)
+{
+	int ret;
+
+	ret = ctx->req_op->buf_map(ctx, req);
+	if (unlikely(ret))
+		return ret;
+
+	ctx->req_op->do_transfer(ctx, req);
+
+	ret = ctx->req_op->bd_fill(ctx, req);
+	if (unlikely(ret))
+		goto unmap_req_buf;
+
+	return ret;
+
+unmap_req_buf:
+	ctx->req_op->buf_unmap(ctx, req);
+
+	return ret;
+}
+
+static void sec_request_untransfer(struct sec_ctx *ctx, struct sec_req *req)
+{
+	ctx->req_op->buf_unmap(ctx, req);
+}
+
+static void sec_skcipher_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct skcipher_request *sk_req = req->c_req.sk_req;
+	u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
+
+	memcpy(c_ivin, sk_req->iv, ctx->c_ctx.ivsize);
+}
+
+static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct sec_sqe *sec_sqe = &req->sec_sqe;
+	u8 scene, sa_type, da_type;
+	u8 bd_type, cipher;
+	u8 de = 0;
+
+	memset(sec_sqe, 0, sizeof(struct sec_sqe));
+
+	sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma);
+	sec_sqe->type2.c_ivin_addr =
+		cpu_to_le64(req->qp_ctx->res[req->req_id].c_ivin_dma);
+	sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma);
+	sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma);
+
+	sec_sqe->type2.icvw_kmode |= cpu_to_le16(((u16)c_ctx->c_mode) <<
+						SEC_CMODE_OFFSET);
+	sec_sqe->type2.c_alg = c_ctx->c_alg;
+	sec_sqe->type2.icvw_kmode |= cpu_to_le16(((u16)c_ctx->c_key_len) <<
+						SEC_CKEY_OFFSET);
+
+	bd_type = SEC_BD_TYPE2;
+	if (c_req->encrypt)
+		cipher = SEC_CIPHER_ENC << SEC_CIPHER_OFFSET;
+	else
+		cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET;
+	sec_sqe->type_cipher_auth = bd_type | cipher;
+
+	sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
+	scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
+	if (c_req->c_in_dma != c_req->c_out_dma)
+		de = 0x1 << SEC_DE_OFFSET;
+
+	sec_sqe->sds_sa_type = (de | scene | sa_type);
+
+	/* Just set DST address type */
+	da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
+	sec_sqe->sdm_addr_type |= da_type;
+
+	sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len);
+	sec_sqe->type2.tag = cpu_to_le16((u16)req->req_id);
+
+	return 0;
+}
+
+static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type)
+{
+	struct aead_request *aead_req = req->aead_req.aead_req;
+	struct skcipher_request *sk_req = req->c_req.sk_req;
+	u32 iv_size = req->ctx->c_ctx.ivsize;
+	struct scatterlist *sgl;
+	unsigned int cryptlen;
+	size_t sz;
+	u8 *iv;
+
+	if (req->c_req.encrypt)
+		sgl = alg_type == SEC_SKCIPHER ? sk_req->dst : aead_req->dst;
+	else
+		sgl = alg_type == SEC_SKCIPHER ? sk_req->src : aead_req->src;
+
+	if (alg_type == SEC_SKCIPHER) {
+		iv = sk_req->iv;
+		cryptlen = sk_req->cryptlen;
+	} else {
+		iv = aead_req->iv;
+		cryptlen = aead_req->cryptlen;
+	}
+
+	sz = sg_pcopy_to_buffer(sgl, sg_nents(sgl), iv, iv_size,
+				cryptlen - iv_size);
+	if (unlikely(sz != iv_size))
+		dev_err(SEC_CTX_DEV(req->ctx), "copy output iv error!\n");
+}
+
+static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req,
+				  int err)
+{
+	struct skcipher_request *sk_req = req->c_req.sk_req;
+	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+
+	atomic_dec(&qp_ctx->pending_reqs);
+	sec_free_req_id(req);
+
+	/* IV output at encrypto of CBC mode */
+	if (!err && ctx->c_ctx.c_mode == SEC_CMODE_CBC && req->c_req.encrypt)
+		sec_update_iv(req, SEC_SKCIPHER);
+
+	if (req->fake_busy)
+		sk_req->base.complete(&sk_req->base, -EINPROGRESS);
+
+	sk_req->base.complete(&sk_req->base, err);
+}
+
+static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct aead_request *aead_req = req->aead_req.aead_req;
+	u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
+
+	memcpy(c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
+}
+
+static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
+			       struct sec_req *req, struct sec_sqe *sec_sqe)
+{
+	struct sec_aead_req *a_req = &req->aead_req;
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct aead_request *aq = a_req->aead_req;
+
+	sec_sqe->type2.a_key_addr = cpu_to_le64(ctx->a_key_dma);
+
+	sec_sqe->type2.mac_key_alg =
+			cpu_to_le32(ctx->mac_len / SEC_SQE_LEN_RATE);
+
+	sec_sqe->type2.mac_key_alg |=
+			cpu_to_le32((u32)((ctx->a_key_len) /
+			SEC_SQE_LEN_RATE) << SEC_AKEY_OFFSET);
+
+	sec_sqe->type2.mac_key_alg |=
+			cpu_to_le32((u32)(ctx->a_alg) << SEC_AEAD_ALG_OFFSET);
+
+	sec_sqe->type_cipher_auth |= SEC_AUTH_TYPE1 << SEC_AUTH_OFFSET;
+
+	if (dir)
+		sec_sqe->sds_sa_type &= SEC_CIPHER_AUTH;
+	else
+		sec_sqe->sds_sa_type |= SEC_AUTH_CIPHER;
+
+	sec_sqe->type2.alen_ivllen = cpu_to_le32(c_req->c_len + aq->assoclen);
+
+	sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
+
+	sec_sqe->type2.mac_addr =
+		cpu_to_le64(req->qp_ctx->res[req->req_id].out_mac_dma);
+}
+
+static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct sec_auth_ctx *auth_ctx = &ctx->a_ctx;
+	struct sec_sqe *sec_sqe = &req->sec_sqe;
+	int ret;
+
+	ret = sec_skcipher_bd_fill(ctx, req);
+	if (unlikely(ret)) {
+		dev_err(SEC_CTX_DEV(ctx), "skcipher bd fill is error!\n");
+		return ret;
+	}
+
+	sec_auth_bd_fill_ex(auth_ctx, req->c_req.encrypt, req, sec_sqe);
+
+	return 0;
+}
+
+static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
+{
+	struct aead_request *a_req = req->aead_req.aead_req;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(a_req);
+	struct sec_cipher_req *c_req = &req->c_req;
+	size_t authsize = crypto_aead_authsize(tfm);
+	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+	size_t sz;
+
+	atomic_dec(&qp_ctx->pending_reqs);
+
+	if (!err && c->c_ctx.c_mode == SEC_CMODE_CBC && c_req->encrypt)
+		sec_update_iv(req, SEC_AEAD);
+
+	/* Copy output mac */
+	if (!err && c_req->encrypt) {
+		struct scatterlist *sgl = a_req->dst;
+
+		sz = sg_pcopy_from_buffer(sgl, sg_nents(sgl),
+					  qp_ctx->res[req->req_id].out_mac,
+					  authsize, a_req->cryptlen +
+					  a_req->assoclen);
+
+		if (unlikely(sz != authsize)) {
+			dev_err(SEC_CTX_DEV(req->ctx), "copy out mac err!\n");
+			err = -EINVAL;
+		}
+	}
+
+	sec_free_req_id(req);
+
+	if (req->fake_busy)
+		a_req->base.complete(&a_req->base, -EINPROGRESS);
+
+	a_req->base.complete(&a_req->base, err);
+}
+
+static void sec_request_uninit(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+
+	atomic_dec(&qp_ctx->pending_reqs);
+	sec_free_req_id(req);
+	sec_free_queue_id(ctx, req);
+}
+
+static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct sec_qp_ctx *qp_ctx;
+	int queue_id;
+
+	/* To load balance */
+	queue_id = sec_alloc_queue_id(ctx, req);
+	qp_ctx = &ctx->qp_ctx[queue_id];
+
+	req->req_id = sec_alloc_req_id(req, qp_ctx);
+	if (unlikely(req->req_id < 0)) {
+		sec_free_queue_id(ctx, req);
+		return req->req_id;
+	}
+
+	if (ctx->fake_req_limit <= atomic_inc_return(&qp_ctx->pending_reqs))
+		req->fake_busy = true;
+	else
+		req->fake_busy = false;
+
+	return 0;
+}
+
+static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
+{
+	int ret;
+
+	ret = sec_request_init(ctx, req);
+	if (unlikely(ret))
+		return ret;
+
+	ret = sec_request_transfer(ctx, req);
+	if (unlikely(ret))
+		goto err_uninit_req;
+
+	/* Output IV as decrypto */
+	if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt)
+		sec_update_iv(req, ctx->alg_type);
+
+	ret = ctx->req_op->bd_send(ctx, req);
+	if (unlikely(ret != -EBUSY && ret != -EINPROGRESS)) {
+		dev_err_ratelimited(SEC_CTX_DEV(ctx), "send sec request failed!\n");
+		goto err_send_req;
+	}
+
+	return ret;
+
+err_send_req:
+	/* As failing, restore the IV from user */
+	if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt) {
+		if (ctx->alg_type == SEC_SKCIPHER)
+			memcpy(req->c_req.sk_req->iv,
+			       req->qp_ctx->res[req->req_id].c_ivin,
+			       ctx->c_ctx.ivsize);
+		else
+			memcpy(req->aead_req.aead_req->iv,
+			       req->qp_ctx->res[req->req_id].c_ivin,
+			       ctx->c_ctx.ivsize);
+	}
+
+	sec_request_untransfer(ctx, req);
+err_uninit_req:
+	sec_request_uninit(ctx, req);
+
+	return ret;
+}
+
+static const struct sec_req_op sec_skcipher_req_ops = {
+	.buf_map	= sec_skcipher_sgl_map,
+	.buf_unmap	= sec_skcipher_sgl_unmap,
+	.do_transfer	= sec_skcipher_copy_iv,
+	.bd_fill	= sec_skcipher_bd_fill,
+	.bd_send	= sec_bd_send,
+	.callback	= sec_skcipher_callback,
+	.process	= sec_process,
+};
+
+static const struct sec_req_op sec_aead_req_ops = {
+	.buf_map	= sec_aead_sgl_map,
+	.buf_unmap	= sec_aead_sgl_unmap,
+	.do_transfer	= sec_aead_copy_iv,
+	.bd_fill	= sec_aead_bd_fill,
+	.bd_send	= sec_bd_send,
+	.callback	= sec_aead_callback,
+	.process	= sec_process,
+};
+
+static int sec_skcipher_ctx_init(struct crypto_skcipher *tfm)
+{
+	struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->req_op = &sec_skcipher_req_ops;
+
+	return sec_skcipher_init(tfm);
+}
+
+static void sec_skcipher_ctx_exit(struct crypto_skcipher *tfm)
+{
+	sec_skcipher_uninit(tfm);
+}
+
+static int sec_aead_init(struct crypto_aead *tfm)
+{
+	struct sec_ctx *ctx = crypto_aead_ctx(tfm);
+	int ret;
+
+	crypto_aead_set_reqsize(tfm, sizeof(struct sec_req));
+	ctx->alg_type = SEC_AEAD;
+	ctx->c_ctx.ivsize = crypto_aead_ivsize(tfm);
+	if (ctx->c_ctx.ivsize > SEC_IV_SIZE) {
+		dev_err(SEC_CTX_DEV(ctx), "get error aead iv size!\n");
+		return -EINVAL;
+	}
+
+	ctx->req_op = &sec_aead_req_ops;
+	ret = sec_ctx_base_init(ctx);
+	if (ret)
+		return ret;
+
+	ret = sec_auth_init(ctx);
+	if (ret)
+		goto err_auth_init;
+
+	ret = sec_cipher_init(ctx);
+	if (ret)
+		goto err_cipher_init;
+
+	return ret;
+
+err_cipher_init:
+	sec_auth_uninit(ctx);
+err_auth_init:
+	sec_ctx_base_uninit(ctx);
+
+	return ret;
+}
+
+static void sec_aead_exit(struct crypto_aead *tfm)
+{
+	struct sec_ctx *ctx = crypto_aead_ctx(tfm);
+
+	sec_cipher_uninit(ctx);
+	sec_auth_uninit(ctx);
+	sec_ctx_base_uninit(ctx);
+}
+
+static int sec_aead_ctx_init(struct crypto_aead *tfm, const char *hash_name)
+{
+	struct sec_ctx *ctx = crypto_aead_ctx(tfm);
+	struct sec_auth_ctx *auth_ctx = &ctx->a_ctx;
+	int ret;
+
+	ret = sec_aead_init(tfm);
+	if (ret) {
+		pr_err("hisi_sec2: aead init error!\n");
+		return ret;
+	}
+
+	auth_ctx->hash_tfm = crypto_alloc_shash(hash_name, 0, 0);
+	if (IS_ERR(auth_ctx->hash_tfm)) {
+		dev_err(SEC_CTX_DEV(ctx), "aead alloc shash error!\n");
+		sec_aead_exit(tfm);
+		return PTR_ERR(auth_ctx->hash_tfm);
+	}
+
+	return 0;
+}
+
+static void sec_aead_ctx_exit(struct crypto_aead *tfm)
+{
+	struct sec_ctx *ctx = crypto_aead_ctx(tfm);
+
+	crypto_free_shash(ctx->a_ctx.hash_tfm);
+	sec_aead_exit(tfm);
+}
+
+static int sec_aead_sha1_ctx_init(struct crypto_aead *tfm)
+{
+	return sec_aead_ctx_init(tfm, "sha1");
+}
+
+static int sec_aead_sha256_ctx_init(struct crypto_aead *tfm)
+{
+	return sec_aead_ctx_init(tfm, "sha256");
+}
+
+static int sec_aead_sha512_ctx_init(struct crypto_aead *tfm)
+{
+	return sec_aead_ctx_init(tfm, "sha512");
+}
+
+static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
+{
+	struct skcipher_request *sk_req = sreq->c_req.sk_req;
+	struct device *dev = SEC_CTX_DEV(ctx);
+	u8 c_alg = ctx->c_ctx.c_alg;
+
+	if (unlikely(!sk_req->src || !sk_req->dst)) {
+		dev_err(dev, "skcipher input param error!\n");
+		return -EINVAL;
+	}
+	sreq->c_req.c_len = sk_req->cryptlen;
+	if (c_alg == SEC_CALG_3DES) {
+		if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) {
+			dev_err(dev, "skcipher 3des input length error!\n");
+			return -EINVAL;
+		}
+		return 0;
+	} else if (c_alg == SEC_CALG_AES || c_alg == SEC_CALG_SM4) {
+		if (unlikely(sk_req->cryptlen & (AES_BLOCK_SIZE - 1))) {
+			dev_err(dev, "skcipher aes input length error!\n");
+			return -EINVAL;
+		}
+		return 0;
+	}
+
+	dev_err(dev, "skcipher algorithm error!\n");
+	return -EINVAL;
+}
+
+static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(sk_req);
+	struct sec_req *req = skcipher_request_ctx(sk_req);
+	struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int ret;
+
+	if (!sk_req->cryptlen)
+		return 0;
+
+	req->c_req.sk_req = sk_req;
+	req->c_req.encrypt = encrypt;
+	req->ctx = ctx;
+
+	ret = sec_skcipher_param_check(ctx, req);
+	if (unlikely(ret))
+		return -EINVAL;
+
+	return ctx->req_op->process(ctx, req);
+}
+
+static int sec_skcipher_encrypt(struct skcipher_request *sk_req)
+{
+	return sec_skcipher_crypto(sk_req, true);
+}
+
+static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
+{
+	return sec_skcipher_crypto(sk_req, false);
+}
+
+#define SEC_SKCIPHER_GEN_ALG(sec_cra_name, sec_set_key, sec_min_key_size, \
+	sec_max_key_size, ctx_init, ctx_exit, blk_size, iv_size)\
+{\
+	.base = {\
+		.cra_name = sec_cra_name,\
+		.cra_driver_name = "hisi_sec_"sec_cra_name,\
+		.cra_priority = SEC_PRIORITY,\
+		.cra_flags = CRYPTO_ALG_ASYNC,\
+		.cra_blocksize = blk_size,\
+		.cra_ctxsize = sizeof(struct sec_ctx),\
+		.cra_module = THIS_MODULE,\
+	},\
+	.init = ctx_init,\
+	.exit = ctx_exit,\
+	.setkey = sec_set_key,\
+	.decrypt = sec_skcipher_decrypt,\
+	.encrypt = sec_skcipher_encrypt,\
+	.min_keysize = sec_min_key_size,\
+	.max_keysize = sec_max_key_size,\
+	.ivsize = iv_size,\
+},
+
+#define SEC_SKCIPHER_ALG(name, key_func, min_key_size, \
+	max_key_size, blk_size, iv_size) \
+	SEC_SKCIPHER_GEN_ALG(name, key_func, min_key_size, max_key_size, \
+	sec_skcipher_ctx_init, sec_skcipher_ctx_exit, blk_size, iv_size)
+
+static struct skcipher_alg sec_skciphers[] = {
+	SEC_SKCIPHER_ALG("ecb(aes)", sec_setkey_aes_ecb,
+			 AES_MIN_KEY_SIZE, AES_MAX_KEY_SIZE,
+			 AES_BLOCK_SIZE, 0)
+
+	SEC_SKCIPHER_ALG("cbc(aes)", sec_setkey_aes_cbc,
+			 AES_MIN_KEY_SIZE, AES_MAX_KEY_SIZE,
+			 AES_BLOCK_SIZE, AES_BLOCK_SIZE)
+
+	SEC_SKCIPHER_ALG("xts(aes)", sec_setkey_aes_xts,
+			 SEC_XTS_MIN_KEY_SIZE, SEC_XTS_MAX_KEY_SIZE,
+			 AES_BLOCK_SIZE, AES_BLOCK_SIZE)
+
+	SEC_SKCIPHER_ALG("ecb(des3_ede)", sec_setkey_3des_ecb,
+			 SEC_DES3_2KEY_SIZE, SEC_DES3_3KEY_SIZE,
+			 DES3_EDE_BLOCK_SIZE, 0)
+
+	SEC_SKCIPHER_ALG("cbc(des3_ede)", sec_setkey_3des_cbc,
+			 SEC_DES3_2KEY_SIZE, SEC_DES3_3KEY_SIZE,
+			 DES3_EDE_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE)
+
+	SEC_SKCIPHER_ALG("xts(sm4)", sec_setkey_sm4_xts,
+			 SEC_XTS_MIN_KEY_SIZE, SEC_XTS_MIN_KEY_SIZE,
+			 AES_BLOCK_SIZE, AES_BLOCK_SIZE)
+
+	SEC_SKCIPHER_ALG("cbc(sm4)", sec_setkey_sm4_cbc,
+			 AES_MIN_KEY_SIZE, AES_MIN_KEY_SIZE,
+			 AES_BLOCK_SIZE, AES_BLOCK_SIZE)
+};
+
+static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
+{
+	u8 c_alg = ctx->c_ctx.c_alg;
+	struct aead_request *req = sreq->aead_req.aead_req;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	size_t authsize = crypto_aead_authsize(tfm);
+
+	if (unlikely(!req->src || !req->dst || !req->cryptlen)) {
+		dev_err(SEC_CTX_DEV(ctx), "aead input param error!\n");
+		return -EINVAL;
+	}
+
+	/* Support AES only */
+	if (unlikely(c_alg != SEC_CALG_AES)) {
+		dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n");
+		return -EINVAL;
+
+	}
+	if (sreq->c_req.encrypt)
+		sreq->c_req.c_len = req->cryptlen;
+	else
+		sreq->c_req.c_len = req->cryptlen - authsize;
+
+	if (unlikely(sreq->c_req.c_len & (AES_BLOCK_SIZE - 1))) {
+		dev_err(SEC_CTX_DEV(ctx), "aead crypto length error!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(a_req);
+	struct sec_req *req = aead_request_ctx(a_req);
+	struct sec_ctx *ctx = crypto_aead_ctx(tfm);
+	int ret;
+
+	req->aead_req.aead_req = a_req;
+	req->c_req.encrypt = encrypt;
+	req->ctx = ctx;
+
+	ret = sec_aead_param_check(ctx, req);
+	if (unlikely(ret))
+		return -EINVAL;
+
+	return ctx->req_op->process(ctx, req);
+}
+
+static int sec_aead_encrypt(struct aead_request *a_req)
+{
+	return sec_aead_crypto(a_req, true);
+}
+
+static int sec_aead_decrypt(struct aead_request *a_req)
+{
+	return sec_aead_crypto(a_req, false);
+}
+
+#define SEC_AEAD_GEN_ALG(sec_cra_name, sec_set_key, ctx_init,\
+			 ctx_exit, blk_size, iv_size, max_authsize)\
+{\
+	.base = {\
+		.cra_name = sec_cra_name,\
+		.cra_driver_name = "hisi_sec_"sec_cra_name,\
+		.cra_priority = SEC_PRIORITY,\
+		.cra_flags = CRYPTO_ALG_ASYNC,\
+		.cra_blocksize = blk_size,\
+		.cra_ctxsize = sizeof(struct sec_ctx),\
+		.cra_module = THIS_MODULE,\
+	},\
+	.init = ctx_init,\
+	.exit = ctx_exit,\
+	.setkey = sec_set_key,\
+	.decrypt = sec_aead_decrypt,\
+	.encrypt = sec_aead_encrypt,\
+	.ivsize = iv_size,\
+	.maxauthsize = max_authsize,\
+}
+
+#define SEC_AEAD_ALG(algname, keyfunc, aead_init, blksize, ivsize, authsize)\
+	SEC_AEAD_GEN_ALG(algname, keyfunc, aead_init,\
+			sec_aead_ctx_exit, blksize, ivsize, authsize)
+
+static struct aead_alg sec_aeads[] = {
+	SEC_AEAD_ALG("authenc(hmac(sha1),cbc(aes))",
+		     sec_setkey_aes_cbc_sha1, sec_aead_sha1_ctx_init,
+		     AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA1_DIGEST_SIZE),
+
+	SEC_AEAD_ALG("authenc(hmac(sha256),cbc(aes))",
+		     sec_setkey_aes_cbc_sha256, sec_aead_sha256_ctx_init,
+		     AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA256_DIGEST_SIZE),
+
+	SEC_AEAD_ALG("authenc(hmac(sha512),cbc(aes))",
+		     sec_setkey_aes_cbc_sha512, sec_aead_sha512_ctx_init,
+		     AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA512_DIGEST_SIZE),
+};
+
+int sec_register_to_crypto(void)
+{
+	int ret = 0;
+
+	/* To avoid repeat register */
+	if (atomic_add_return(1, &sec_active_devs) == 1) {
+		ret = crypto_register_skciphers(sec_skciphers,
+						ARRAY_SIZE(sec_skciphers));
+		if (ret)
+			return ret;
+
+		ret = crypto_register_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
+		if (ret)
+			goto reg_aead_fail;
+	}
+
+	return ret;
+
+reg_aead_fail:
+	crypto_unregister_skciphers(sec_skciphers, ARRAY_SIZE(sec_skciphers));
+
+	return ret;
+}
+
+void sec_unregister_from_crypto(void)
+{
+	if (atomic_sub_return(1, &sec_active_devs) == 0) {
+		crypto_unregister_skciphers(sec_skciphers,
+					    ARRAY_SIZE(sec_skciphers));
+		crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
+	}
+}
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h
new file mode 100644
index 000000000000..b2786e17d8fe
--- /dev/null
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 HiSilicon Limited. */
+
+#ifndef __HISI_SEC_V2_CRYPTO_H
+#define __HISI_SEC_V2_CRYPTO_H
+
+#define SEC_IV_SIZE		24
+#define SEC_MAX_KEY_SIZE	64
+#define SEC_COMM_SCENE		0
+
+enum sec_calg {
+	SEC_CALG_3DES = 0x1,
+	SEC_CALG_AES  = 0x2,
+	SEC_CALG_SM4  = 0x3,
+};
+
+enum sec_hash_alg {
+	SEC_A_HMAC_SHA1   = 0x10,
+	SEC_A_HMAC_SHA256 = 0x11,
+	SEC_A_HMAC_SHA512 = 0x15,
+};
+
+enum sec_mac_len {
+	SEC_HMAC_SHA1_MAC   = 20,
+	SEC_HMAC_SHA256_MAC = 32,
+	SEC_HMAC_SHA512_MAC = 64,
+};
+
+enum sec_cmode {
+	SEC_CMODE_ECB    = 0x0,
+	SEC_CMODE_CBC    = 0x1,
+	SEC_CMODE_CTR    = 0x4,
+	SEC_CMODE_XTS    = 0x7,
+};
+
+enum sec_ckey_type {
+	SEC_CKEY_128BIT = 0x0,
+	SEC_CKEY_192BIT = 0x1,
+	SEC_CKEY_256BIT = 0x2,
+	SEC_CKEY_3DES_3KEY = 0x1,
+	SEC_CKEY_3DES_2KEY = 0x3,
+};
+
+enum sec_bd_type {
+	SEC_BD_TYPE1 = 0x1,
+	SEC_BD_TYPE2 = 0x2,
+};
+
+enum sec_auth {
+	SEC_NO_AUTH = 0x0,
+	SEC_AUTH_TYPE1 = 0x1,
+	SEC_AUTH_TYPE2 = 0x2,
+};
+
+enum sec_cipher_dir {
+	SEC_CIPHER_ENC = 0x1,
+	SEC_CIPHER_DEC = 0x2,
+};
+
+enum sec_addr_type {
+	SEC_PBUF = 0x0,
+	SEC_SGL  = 0x1,
+	SEC_PRP  = 0x2,
+};
+
+struct sec_sqe_type2 {
+
+	/*
+	 * mac_len: 0~4 bits
+	 * a_key_len: 5~10 bits
+	 * a_alg: 11~16 bits
+	 */
+	__le32 mac_key_alg;
+
+	/*
+	 * c_icv_len: 0~5 bits
+	 * c_width: 6~8 bits
+	 * c_key_len: 9~11 bits
+	 * c_mode: 12~15 bits
+	 */
+	__le16 icvw_kmode;
+
+	/* c_alg: 0~3 bits */
+	__u8 c_alg;
+	__u8 rsvd4;
+
+	/*
+	 * a_len: 0~23 bits
+	 * iv_offset_l: 24~31 bits
+	 */
+	__le32 alen_ivllen;
+
+	/*
+	 * c_len: 0~23 bits
+	 * iv_offset_h: 24~31 bits
+	 */
+	__le32 clen_ivhlen;
+
+	__le16 auth_src_offset;
+	__le16 cipher_src_offset;
+	__le16 cs_ip_header_offset;
+	__le16 cs_udp_header_offset;
+	__le16 pass_word_len;
+	__le16 dk_len;
+	__u8 salt3;
+	__u8 salt2;
+	__u8 salt1;
+	__u8 salt0;
+
+	__le16 tag;
+	__le16 rsvd5;
+
+	/*
+	 * c_pad_type: 0~3 bits
+	 * c_pad_len: 4~11 bits
+	 * c_pad_data_type: 12~15 bits
+	 */
+	__le16 cph_pad;
+
+	/* c_pad_len_field: 0~1 bits */
+	__le16 c_pad_len_field;
+
+
+	__le64 long_a_data_len;
+	__le64 a_ivin_addr;
+	__le64 a_key_addr;
+	__le64 mac_addr;
+	__le64 c_ivin_addr;
+	__le64 c_key_addr;
+
+	__le64 data_src_addr;
+	__le64 data_dst_addr;
+
+	/*
+	 * done: 0 bit
+	 * icv: 1~3 bits
+	 * csc: 4~6 bits
+	 * flag: 7-10 bits
+	 * dif_check: 11~13 bits
+	 */
+	__le16 done_flag;
+
+	__u8 error_type;
+	__u8 warning_type;
+	__u8 mac_i3;
+	__u8 mac_i2;
+	__u8 mac_i1;
+	__u8 mac_i0;
+	__le16 check_sum_i;
+	__u8 tls_pad_len_i;
+	__u8 rsvd12;
+	__le32 counter;
+};
+
+struct sec_sqe {
+	/*
+	 * type:	0~3 bits
+	 * cipher:	4~5 bits
+	 * auth:	6~7 bit s
+	 */
+	__u8 type_cipher_auth;
+
+	/*
+	 * seq:	0 bit
+	 * de:	1~2 bits
+	 * scene:	3~6 bits
+	 * src_addr_type: ~7 bit, with sdm_addr_type 0-1 bits
+	 */
+	__u8 sds_sa_type;
+
+	/*
+	 * src_addr_type: 0~1 bits, not used now,
+	 * if support PRP, set this field, or set zero.
+	 * dst_addr_type: 2~4 bits
+	 * mac_addr_type: 5~7 bits
+	 */
+	__u8 sdm_addr_type;
+	__u8 rsvd0;
+
+	/*
+	 * nonce_len(type2): 0~3 bits
+	 * huk(type2): 4 bit
+	 * key_s(type2): 5 bit
+	 * ci_gen: 6~7 bits
+	 */
+	__u8 huk_key_ci;
+
+	/*
+	 * ai_gen: 0~1 bits
+	 * a_pad(type2): 2~3 bits
+	 * c_s(type2): 4~5 bits
+	 */
+	__u8 ai_apd_cs;
+
+	/*
+	 * rhf(type2): 0 bit
+	 * c_key_type: 1~2 bits
+	 * a_key_type: 3~4 bits
+	 * write_frame_len(type2): 5~7 bits
+	 */
+	__u8 rca_key_frm;
+
+	/*
+	 * cal_iv_addr_en(type2): 0 bit
+	 * tls_up(type2): 1 bit
+	 * inveld: 7 bit
+	 */
+	__u8 iv_tls_ld;
+
+	/* Just using type2 BD now */
+	struct sec_sqe_type2 type2;
+};
+
+int sec_register_to_crypto(void);
+void sec_unregister_from_crypto(void);
+#endif
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
new file mode 100644
index 000000000000..2bbaf1e2dae7
--- /dev/null
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -0,0 +1,1106 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 HiSilicon Limited. */
+
+#include <linux/acpi.h>
+#include <linux/aer.h>
+#include <linux/bitops.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+#include <linux/topology.h>
+
+#include "sec.h"
+
+#define SEC_VF_NUM			63
+#define SEC_QUEUE_NUM_V1		4096
+#define SEC_QUEUE_NUM_V2		1024
+#define SEC_PF_PCI_DEVICE_ID		0xa255
+#define SEC_VF_PCI_DEVICE_ID		0xa256
+
+#define SEC_XTS_MIV_ENABLE_REG		0x301384
+#define SEC_XTS_MIV_ENABLE_MSK		0x7FFFFFFF
+#define SEC_XTS_MIV_DISABLE_MSK		0xFFFFFFFF
+#define SEC_BD_ERR_CHK_EN1		0xfffff7fd
+#define SEC_BD_ERR_CHK_EN2		0xffffbfff
+
+#define SEC_SQE_SIZE			128
+#define SEC_SQ_SIZE			(SEC_SQE_SIZE * QM_Q_DEPTH)
+#define SEC_PF_DEF_Q_NUM		64
+#define SEC_PF_DEF_Q_BASE		0
+#define SEC_CTX_Q_NUM_DEF		24
+#define SEC_CTX_Q_NUM_MAX		32
+
+#define SEC_CTRL_CNT_CLR_CE		0x301120
+#define SEC_CTRL_CNT_CLR_CE_BIT		BIT(0)
+#define SEC_ENGINE_PF_CFG_OFF		0x300000
+#define SEC_ACC_COMMON_REG_OFF		0x1000
+#define SEC_CORE_INT_SOURCE		0x301010
+#define SEC_CORE_INT_MASK		0x301000
+#define SEC_CORE_INT_STATUS		0x301008
+#define SEC_CORE_SRAM_ECC_ERR_INFO	0x301C14
+#define SEC_ECC_NUM(err)			(((err) >> 16) & 0xFF)
+#define SEC_ECC_ADDR(err)			((err) >> 0)
+#define SEC_CORE_INT_DISABLE		0x0
+#define SEC_CORE_INT_ENABLE		0x1ff
+
+#define SEC_RAS_CE_REG			0x50
+#define SEC_RAS_FE_REG			0x54
+#define SEC_RAS_NFE_REG			0x58
+#define SEC_RAS_CE_ENB_MSK		0x88
+#define SEC_RAS_FE_ENB_MSK		0x0
+#define SEC_RAS_NFE_ENB_MSK		0x177
+#define SEC_RAS_DISABLE			0x0
+#define SEC_MEM_START_INIT_REG		0x0100
+#define SEC_MEM_INIT_DONE_REG		0x0104
+#define SEC_QM_ABNORMAL_INT_MASK	0x100004
+
+#define SEC_CONTROL_REG			0x0200
+#define SEC_TRNG_EN_SHIFT		8
+#define SEC_CLK_GATE_ENABLE		BIT(3)
+#define SEC_CLK_GATE_DISABLE		(~BIT(3))
+#define SEC_AXI_SHUTDOWN_ENABLE	BIT(12)
+#define SEC_AXI_SHUTDOWN_DISABLE	0xFFFFEFFF
+
+#define SEC_INTERFACE_USER_CTRL0_REG	0x0220
+#define SEC_INTERFACE_USER_CTRL1_REG	0x0224
+#define SEC_BD_ERR_CHK_EN_REG1		0x0384
+#define SEC_BD_ERR_CHK_EN_REG2		0x038c
+
+#define SEC_USER0_SMMU_NORMAL		(BIT(23) | BIT(15))
+#define SEC_USER1_SMMU_NORMAL		(BIT(31) | BIT(23) | BIT(15) | BIT(7))
+#define SEC_CORE_INT_STATUS_M_ECC	BIT(2)
+
+#define SEC_DELAY_10_US			10
+#define SEC_POLL_TIMEOUT_US		1000
+#define SEC_VF_CNT_MASK			0xffffffc0
+#define SEC_DBGFS_VAL_MAX_LEN		20
+
+#define SEC_ADDR(qm, offset) ((qm)->io_base + (offset) + \
+			     SEC_ENGINE_PF_CFG_OFF + SEC_ACC_COMMON_REG_OFF)
+
+struct sec_hw_error {
+	u32 int_msk;
+	const char *msg;
+};
+
+static const char sec_name[] = "hisi_sec2";
+static struct dentry *sec_debugfs_root;
+static LIST_HEAD(sec_list);
+static DEFINE_MUTEX(sec_list_lock);
+
+static const struct sec_hw_error sec_hw_errors[] = {
+	{.int_msk = BIT(0), .msg = "sec_axi_rresp_err_rint"},
+	{.int_msk = BIT(1), .msg = "sec_axi_bresp_err_rint"},
+	{.int_msk = BIT(2), .msg = "sec_ecc_2bit_err_rint"},
+	{.int_msk = BIT(3), .msg = "sec_ecc_1bit_err_rint"},
+	{.int_msk = BIT(4), .msg = "sec_req_trng_timeout_rint"},
+	{.int_msk = BIT(5), .msg = "sec_fsm_hbeat_rint"},
+	{.int_msk = BIT(6), .msg = "sec_channel_req_rng_timeout_rint"},
+	{.int_msk = BIT(7), .msg = "sec_bd_err_rint"},
+	{.int_msk = BIT(8), .msg = "sec_chain_buff_err_rint"},
+	{ /* sentinel */ }
+};
+
+struct sec_dev *sec_find_device(int node)
+{
+#define SEC_NUMA_MAX_DISTANCE	100
+	int min_distance = SEC_NUMA_MAX_DISTANCE;
+	int dev_node = 0, free_qp_num = 0;
+	struct sec_dev *sec, *ret = NULL;
+	struct hisi_qm *qm;
+	struct device *dev;
+
+	mutex_lock(&sec_list_lock);
+	list_for_each_entry(sec, &sec_list, list) {
+		qm = &sec->qm;
+		dev = &qm->pdev->dev;
+#ifdef CONFIG_NUMA
+		dev_node = dev->numa_node;
+		if (dev_node < 0)
+			dev_node = 0;
+#endif
+		if (node_distance(dev_node, node) < min_distance) {
+			free_qp_num = hisi_qm_get_free_qp_num(qm);
+			if (free_qp_num >= sec->ctx_q_num) {
+				ret = sec;
+				min_distance = node_distance(dev_node, node);
+			}
+		}
+	}
+	mutex_unlock(&sec_list_lock);
+
+	return ret;
+}
+
+static const char * const sec_dbg_file_name[] = {
+	[SEC_CURRENT_QM] = "current_qm",
+	[SEC_CLEAR_ENABLE] = "clear_enable",
+};
+
+static struct debugfs_reg32 sec_dfx_regs[] = {
+	{"SEC_PF_ABNORMAL_INT_SOURCE    ",  0x301010},
+	{"SEC_SAA_EN                    ",  0x301270},
+	{"SEC_BD_LATENCY_MIN            ",  0x301600},
+	{"SEC_BD_LATENCY_MAX            ",  0x301608},
+	{"SEC_BD_LATENCY_AVG            ",  0x30160C},
+	{"SEC_BD_NUM_IN_SAA0            ",  0x301670},
+	{"SEC_BD_NUM_IN_SAA1            ",  0x301674},
+	{"SEC_BD_NUM_IN_SEC             ",  0x301680},
+	{"SEC_ECC_1BIT_CNT              ",  0x301C00},
+	{"SEC_ECC_1BIT_INFO             ",  0x301C04},
+	{"SEC_ECC_2BIT_CNT              ",  0x301C10},
+	{"SEC_ECC_2BIT_INFO             ",  0x301C14},
+	{"SEC_BD_SAA0                   ",  0x301C20},
+	{"SEC_BD_SAA1                   ",  0x301C24},
+	{"SEC_BD_SAA2                   ",  0x301C28},
+	{"SEC_BD_SAA3                   ",  0x301C2C},
+	{"SEC_BD_SAA4                   ",  0x301C30},
+	{"SEC_BD_SAA5                   ",  0x301C34},
+	{"SEC_BD_SAA6                   ",  0x301C38},
+	{"SEC_BD_SAA7                   ",  0x301C3C},
+	{"SEC_BD_SAA8                   ",  0x301C40},
+};
+
+static int sec_pf_q_num_set(const char *val, const struct kernel_param *kp)
+{
+	struct pci_dev *pdev;
+	u32 n, q_num;
+	u8 rev_id;
+	int ret;
+
+	if (!val)
+		return -EINVAL;
+
+	pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI,
+			      SEC_PF_PCI_DEVICE_ID, NULL);
+	if (!pdev) {
+		q_num = min_t(u32, SEC_QUEUE_NUM_V1, SEC_QUEUE_NUM_V2);
+		pr_info("No device, suppose queue number is %d!\n", q_num);
+	} else {
+		rev_id = pdev->revision;
+
+		switch (rev_id) {
+		case QM_HW_V1:
+			q_num = SEC_QUEUE_NUM_V1;
+			break;
+		case QM_HW_V2:
+			q_num = SEC_QUEUE_NUM_V2;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	ret = kstrtou32(val, 10, &n);
+	if (ret || !n || n > q_num)
+		return -EINVAL;
+
+	return param_set_int(val, kp);
+}
+
+static const struct kernel_param_ops sec_pf_q_num_ops = {
+	.set = sec_pf_q_num_set,
+	.get = param_get_int,
+};
+static u32 pf_q_num = SEC_PF_DEF_Q_NUM;
+module_param_cb(pf_q_num, &sec_pf_q_num_ops, &pf_q_num, 0444);
+MODULE_PARM_DESC(pf_q_num, "Number of queues in PF(v1 0-4096, v2 0-1024)");
+
+static int sec_ctx_q_num_set(const char *val, const struct kernel_param *kp)
+{
+	u32 ctx_q_num;
+	int ret;
+
+	if (!val)
+		return -EINVAL;
+
+	ret = kstrtou32(val, 10, &ctx_q_num);
+	if (ret)
+		return -EINVAL;
+
+	if (!ctx_q_num || ctx_q_num > SEC_CTX_Q_NUM_MAX || ctx_q_num & 0x1) {
+		pr_err("ctx queue num[%u] is invalid!\n", ctx_q_num);
+		return -EINVAL;
+	}
+
+	return param_set_int(val, kp);
+}
+
+static const struct kernel_param_ops sec_ctx_q_num_ops = {
+	.set = sec_ctx_q_num_set,
+	.get = param_get_int,
+};
+static u32 ctx_q_num = SEC_CTX_Q_NUM_DEF;
+module_param_cb(ctx_q_num, &sec_ctx_q_num_ops, &ctx_q_num, 0444);
+MODULE_PARM_DESC(ctx_q_num, "Queue num in ctx (24 default, 2, 4, ..., 32)");
+
+static const struct pci_device_id sec_dev_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_PF_PCI_DEVICE_ID) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_VF_PCI_DEVICE_ID) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, sec_dev_ids);
+
+static inline void sec_add_to_list(struct sec_dev *sec)
+{
+	mutex_lock(&sec_list_lock);
+	list_add_tail(&sec->list, &sec_list);
+	mutex_unlock(&sec_list_lock);
+}
+
+static inline void sec_remove_from_list(struct sec_dev *sec)
+{
+	mutex_lock(&sec_list_lock);
+	list_del(&sec->list);
+	mutex_unlock(&sec_list_lock);
+}
+
+static u8 sec_get_endian(struct sec_dev *sec)
+{
+	struct hisi_qm *qm = &sec->qm;
+	u32 reg;
+
+	/*
+	 * As for VF, it is a wrong way to get endian setting by
+	 * reading a register of the engine
+	 */
+	if (qm->pdev->is_virtfn) {
+		dev_err_ratelimited(&qm->pdev->dev,
+				    "cannot access a register in VF!\n");
+		return SEC_LE;
+	}
+	reg = readl_relaxed(qm->io_base + SEC_ENGINE_PF_CFG_OFF +
+			    SEC_ACC_COMMON_REG_OFF + SEC_CONTROL_REG);
+
+	/* BD little endian mode */
+	if (!(reg & BIT(0)))
+		return SEC_LE;
+
+	/* BD 32-bits big endian mode */
+	else if (!(reg & BIT(1)))
+		return SEC_32BE;
+
+	/* BD 64-bits big endian mode */
+	else
+		return SEC_64BE;
+}
+
+static int sec_engine_init(struct sec_dev *sec)
+{
+	struct hisi_qm *qm = &sec->qm;
+	int ret;
+	u32 reg;
+
+	/* disable clock gate control */
+	reg = readl_relaxed(SEC_ADDR(qm, SEC_CONTROL_REG));
+	reg &= SEC_CLK_GATE_DISABLE;
+	writel_relaxed(reg, SEC_ADDR(qm, SEC_CONTROL_REG));
+
+	writel_relaxed(0x1, SEC_ADDR(qm, SEC_MEM_START_INIT_REG));
+
+	ret = readl_relaxed_poll_timeout(SEC_ADDR(qm, SEC_MEM_INIT_DONE_REG),
+					 reg, reg & 0x1, SEC_DELAY_10_US,
+					 SEC_POLL_TIMEOUT_US);
+	if (ret) {
+		dev_err(&qm->pdev->dev, "fail to init sec mem\n");
+		return ret;
+	}
+
+	reg = readl_relaxed(SEC_ADDR(qm, SEC_CONTROL_REG));
+	reg |= (0x1 << SEC_TRNG_EN_SHIFT);
+	writel_relaxed(reg, SEC_ADDR(qm, SEC_CONTROL_REG));
+
+	reg = readl_relaxed(SEC_ADDR(qm, SEC_INTERFACE_USER_CTRL0_REG));
+	reg |= SEC_USER0_SMMU_NORMAL;
+	writel_relaxed(reg, SEC_ADDR(qm, SEC_INTERFACE_USER_CTRL0_REG));
+
+	reg = readl_relaxed(SEC_ADDR(qm, SEC_INTERFACE_USER_CTRL1_REG));
+	reg |= SEC_USER1_SMMU_NORMAL;
+	writel_relaxed(reg, SEC_ADDR(qm, SEC_INTERFACE_USER_CTRL1_REG));
+
+	writel_relaxed(SEC_BD_ERR_CHK_EN1,
+		       SEC_ADDR(qm, SEC_BD_ERR_CHK_EN_REG1));
+	writel_relaxed(SEC_BD_ERR_CHK_EN2,
+		       SEC_ADDR(qm, SEC_BD_ERR_CHK_EN_REG2));
+
+	/* enable clock gate control */
+	reg = readl_relaxed(SEC_ADDR(qm, SEC_CONTROL_REG));
+	reg |= SEC_CLK_GATE_ENABLE;
+	writel_relaxed(reg, SEC_ADDR(qm, SEC_CONTROL_REG));
+
+	/* config endian */
+	reg = readl_relaxed(SEC_ADDR(qm, SEC_CONTROL_REG));
+	reg |= sec_get_endian(sec);
+	writel_relaxed(reg, SEC_ADDR(qm, SEC_CONTROL_REG));
+
+	/* Enable sm4 xts mode multiple iv */
+	writel_relaxed(SEC_XTS_MIV_ENABLE_MSK,
+		       qm->io_base + SEC_XTS_MIV_ENABLE_REG);
+
+	return 0;
+}
+
+static int sec_set_user_domain_and_cache(struct sec_dev *sec)
+{
+	struct hisi_qm *qm = &sec->qm;
+
+	/* qm user domain */
+	writel(AXUSER_BASE, qm->io_base + QM_ARUSER_M_CFG_1);
+	writel(ARUSER_M_CFG_ENABLE, qm->io_base + QM_ARUSER_M_CFG_ENABLE);
+	writel(AXUSER_BASE, qm->io_base + QM_AWUSER_M_CFG_1);
+	writel(AWUSER_M_CFG_ENABLE, qm->io_base + QM_AWUSER_M_CFG_ENABLE);
+	writel(WUSER_M_CFG_ENABLE, qm->io_base + QM_WUSER_M_CFG_ENABLE);
+
+	/* qm cache */
+	writel(AXI_M_CFG, qm->io_base + QM_AXI_M_CFG);
+	writel(AXI_M_CFG_ENABLE, qm->io_base + QM_AXI_M_CFG_ENABLE);
+
+	/* disable FLR triggered by BME(bus master enable) */
+	writel(PEH_AXUSER_CFG, qm->io_base + QM_PEH_AXUSER_CFG);
+	writel(PEH_AXUSER_CFG_ENABLE, qm->io_base + QM_PEH_AXUSER_CFG_ENABLE);
+
+	/* enable sqc,cqc writeback */
+	writel(SQC_CACHE_ENABLE | CQC_CACHE_ENABLE | SQC_CACHE_WB_ENABLE |
+	       CQC_CACHE_WB_ENABLE | FIELD_PREP(SQC_CACHE_WB_THRD, 1) |
+	       FIELD_PREP(CQC_CACHE_WB_THRD, 1), qm->io_base + QM_CACHE_CTL);
+
+	return sec_engine_init(sec);
+}
+
+/* sec_debug_regs_clear() - clear the sec debug regs */
+static void sec_debug_regs_clear(struct hisi_qm *qm)
+{
+	/* clear current_qm */
+	writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
+	writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
+
+	/* clear rdclr_en */
+	writel(0x0, qm->io_base + SEC_CTRL_CNT_CLR_CE);
+
+	hisi_qm_debug_regs_clear(qm);
+}
+
+static void sec_hw_error_enable(struct sec_dev *sec)
+{
+	struct hisi_qm *qm = &sec->qm;
+	u32 val;
+
+	if (qm->ver == QM_HW_V1) {
+		writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK);
+		dev_info(&qm->pdev->dev, "V1 not support hw error handle\n");
+		return;
+	}
+
+	val = readl(qm->io_base + SEC_CONTROL_REG);
+
+	/* clear SEC hw error source if having */
+	writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_SOURCE);
+
+	/* enable SEC hw error interrupts */
+	writel(SEC_CORE_INT_ENABLE, qm->io_base + SEC_CORE_INT_MASK);
+
+	/* enable RAS int */
+	writel(SEC_RAS_CE_ENB_MSK, qm->io_base + SEC_RAS_CE_REG);
+	writel(SEC_RAS_FE_ENB_MSK, qm->io_base + SEC_RAS_FE_REG);
+	writel(SEC_RAS_NFE_ENB_MSK, qm->io_base + SEC_RAS_NFE_REG);
+
+	/* enable SEC block master OOO when m-bit error occur */
+	val = val | SEC_AXI_SHUTDOWN_ENABLE;
+
+	writel(val, qm->io_base + SEC_CONTROL_REG);
+}
+
+static void sec_hw_error_disable(struct sec_dev *sec)
+{
+	struct hisi_qm *qm = &sec->qm;
+	u32 val;
+
+	val = readl(qm->io_base + SEC_CONTROL_REG);
+
+	/* disable RAS int */
+	writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_CE_REG);
+	writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_FE_REG);
+	writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG);
+
+	/* disable SEC hw error interrupts */
+	writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK);
+
+	/* disable SEC block master OOO when m-bit error occur */
+	val = val & SEC_AXI_SHUTDOWN_DISABLE;
+
+	writel(val, qm->io_base + SEC_CONTROL_REG);
+}
+
+static void sec_hw_error_init(struct sec_dev *sec)
+{
+	if (sec->qm.fun_type == QM_HW_VF)
+		return;
+
+	hisi_qm_hw_error_init(&sec->qm, QM_BASE_CE,
+			      QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT
+			      | QM_ACC_WB_NOT_READY_TIMEOUT, 0,
+			      QM_DB_RANDOM_INVALID);
+	sec_hw_error_enable(sec);
+}
+
+static void sec_hw_error_uninit(struct sec_dev *sec)
+{
+	if (sec->qm.fun_type == QM_HW_VF)
+		return;
+
+	sec_hw_error_disable(sec);
+	writel(GENMASK(12, 0), sec->qm.io_base + SEC_QM_ABNORMAL_INT_MASK);
+}
+
+static u32 sec_current_qm_read(struct sec_debug_file *file)
+{
+	struct hisi_qm *qm = file->qm;
+
+	return readl(qm->io_base + QM_DFX_MB_CNT_VF);
+}
+
+static int sec_current_qm_write(struct sec_debug_file *file, u32 val)
+{
+	struct hisi_qm *qm = file->qm;
+	struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
+	u32 vfq_num;
+	u32 tmp;
+
+	if (val > sec->num_vfs)
+		return -EINVAL;
+
+	/* According PF or VF Dev ID to calculation curr_qm_qp_num and store */
+	if (!val) {
+		qm->debug.curr_qm_qp_num = qm->qp_num;
+	} else {
+		vfq_num = (qm->ctrl_qp_num - qm->qp_num) / sec->num_vfs;
+
+		if (val == sec->num_vfs)
+			qm->debug.curr_qm_qp_num =
+				qm->ctrl_qp_num - qm->qp_num -
+				(sec->num_vfs - 1) * vfq_num;
+		else
+			qm->debug.curr_qm_qp_num = vfq_num;
+	}
+
+	writel(val, qm->io_base + QM_DFX_MB_CNT_VF);
+	writel(val, qm->io_base + QM_DFX_DB_CNT_VF);
+
+	tmp = val |
+	      (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_Q_MASK);
+	writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+
+	tmp = val |
+	      (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_Q_MASK);
+	writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+	return 0;
+}
+
+static u32 sec_clear_enable_read(struct sec_debug_file *file)
+{
+	struct hisi_qm *qm = file->qm;
+
+	return readl(qm->io_base + SEC_CTRL_CNT_CLR_CE) &
+			SEC_CTRL_CNT_CLR_CE_BIT;
+}
+
+static int sec_clear_enable_write(struct sec_debug_file *file, u32 val)
+{
+	struct hisi_qm *qm = file->qm;
+	u32 tmp;
+
+	if (val != 1 && val)
+		return -EINVAL;
+
+	tmp = (readl(qm->io_base + SEC_CTRL_CNT_CLR_CE) &
+	       ~SEC_CTRL_CNT_CLR_CE_BIT) | val;
+	writel(tmp, qm->io_base + SEC_CTRL_CNT_CLR_CE);
+
+	return 0;
+}
+
+static ssize_t sec_debug_read(struct file *filp, char __user *buf,
+			       size_t count, loff_t *pos)
+{
+	struct sec_debug_file *file = filp->private_data;
+	char tbuf[SEC_DBGFS_VAL_MAX_LEN];
+	u32 val;
+	int ret;
+
+	spin_lock_irq(&file->lock);
+
+	switch (file->index) {
+	case SEC_CURRENT_QM:
+		val = sec_current_qm_read(file);
+		break;
+	case SEC_CLEAR_ENABLE:
+		val = sec_clear_enable_read(file);
+		break;
+	default:
+		spin_unlock_irq(&file->lock);
+		return -EINVAL;
+	}
+
+	spin_unlock_irq(&file->lock);
+	ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val);
+
+	return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
+			       size_t count, loff_t *pos)
+{
+	struct sec_debug_file *file = filp->private_data;
+	char tbuf[SEC_DBGFS_VAL_MAX_LEN];
+	unsigned long val;
+	int len, ret;
+
+	if (*pos != 0)
+		return 0;
+
+	if (count >= SEC_DBGFS_VAL_MAX_LEN)
+		return -ENOSPC;
+
+	len = simple_write_to_buffer(tbuf, SEC_DBGFS_VAL_MAX_LEN - 1,
+				     pos, buf, count);
+	if (len < 0)
+		return len;
+
+	tbuf[len] = '\0';
+	if (kstrtoul(tbuf, 0, &val))
+		return -EFAULT;
+
+	spin_lock_irq(&file->lock);
+
+	switch (file->index) {
+	case SEC_CURRENT_QM:
+		ret = sec_current_qm_write(file, val);
+		if (ret)
+			goto err_input;
+		break;
+	case SEC_CLEAR_ENABLE:
+		ret = sec_clear_enable_write(file, val);
+		if (ret)
+			goto err_input;
+		break;
+	default:
+		ret = -EINVAL;
+		goto err_input;
+	}
+
+	spin_unlock_irq(&file->lock);
+
+	return count;
+
+ err_input:
+	spin_unlock_irq(&file->lock);
+	return ret;
+}
+
+static const struct file_operations sec_dbg_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = sec_debug_read,
+	.write = sec_debug_write,
+};
+
+static int sec_debugfs_atomic64_get(void *data, u64 *val)
+{
+	*val = atomic64_read((atomic64_t *)data);
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(sec_atomic64_ops, sec_debugfs_atomic64_get,
+			 NULL, "%lld\n");
+
+static int sec_core_debug_init(struct sec_dev *sec)
+{
+	struct hisi_qm *qm = &sec->qm;
+	struct device *dev = &qm->pdev->dev;
+	struct sec_dfx *dfx = &sec->debug.dfx;
+	struct debugfs_regset32 *regset;
+	struct dentry *tmp_d;
+
+	tmp_d = debugfs_create_dir("sec_dfx", sec->qm.debug.debug_root);
+
+	regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
+	if (!regset)
+		return -ENOENT;
+
+	regset->regs = sec_dfx_regs;
+	regset->nregs = ARRAY_SIZE(sec_dfx_regs);
+	regset->base = qm->io_base;
+
+	debugfs_create_regset32("regs", 0444, tmp_d, regset);
+
+	debugfs_create_file("send_cnt", 0444, tmp_d,
+			    &dfx->send_cnt, &sec_atomic64_ops);
+
+	debugfs_create_file("recv_cnt", 0444, tmp_d,
+			    &dfx->recv_cnt, &sec_atomic64_ops);
+
+	return 0;
+}
+
+static int sec_debug_init(struct sec_dev *sec)
+{
+	int i;
+
+	for (i = SEC_CURRENT_QM; i < SEC_DEBUG_FILE_NUM; i++) {
+		spin_lock_init(&sec->debug.files[i].lock);
+		sec->debug.files[i].index = i;
+		sec->debug.files[i].qm = &sec->qm;
+
+		debugfs_create_file(sec_dbg_file_name[i], 0600,
+				    sec->qm.debug.debug_root,
+				    sec->debug.files + i,
+				    &sec_dbg_fops);
+	}
+
+	return sec_core_debug_init(sec);
+}
+
+static int sec_debugfs_init(struct sec_dev *sec)
+{
+	struct hisi_qm *qm = &sec->qm;
+	struct device *dev = &qm->pdev->dev;
+	int ret;
+
+	qm->debug.debug_root = debugfs_create_dir(dev_name(dev),
+						  sec_debugfs_root);
+	ret = hisi_qm_debug_init(qm);
+	if (ret)
+		goto failed_to_create;
+
+	if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) {
+		ret = sec_debug_init(sec);
+		if (ret)
+			goto failed_to_create;
+	}
+
+	return 0;
+
+failed_to_create:
+	debugfs_remove_recursive(sec_debugfs_root);
+
+	return ret;
+}
+
+static void sec_debugfs_exit(struct sec_dev *sec)
+{
+	debugfs_remove_recursive(sec->qm.debug.debug_root);
+}
+
+static int sec_pf_probe_init(struct sec_dev *sec)
+{
+	struct hisi_qm *qm = &sec->qm;
+	int ret;
+
+	switch (qm->ver) {
+	case QM_HW_V1:
+		qm->ctrl_qp_num = SEC_QUEUE_NUM_V1;
+		break;
+
+	case QM_HW_V2:
+		qm->ctrl_qp_num = SEC_QUEUE_NUM_V2;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	ret = sec_set_user_domain_and_cache(sec);
+	if (ret)
+		return ret;
+
+	sec_hw_error_init(sec);
+	sec_debug_regs_clear(qm);
+
+	return 0;
+}
+
+static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
+{
+	enum qm_hw_ver rev_id;
+
+	rev_id = hisi_qm_get_hw_version(pdev);
+	if (rev_id == QM_HW_UNKNOWN)
+		return -ENODEV;
+
+	qm->pdev = pdev;
+	qm->ver = rev_id;
+
+	qm->sqe_size = SEC_SQE_SIZE;
+	qm->dev_name = sec_name;
+	qm->fun_type = (pdev->device == SEC_PF_PCI_DEVICE_ID) ?
+			QM_HW_PF : QM_HW_VF;
+	qm->use_dma_api = true;
+
+	return hisi_qm_init(qm);
+}
+
+static void sec_qm_uninit(struct hisi_qm *qm)
+{
+	hisi_qm_uninit(qm);
+}
+
+static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
+{
+	if (qm->fun_type == QM_HW_PF) {
+		qm->qp_base = SEC_PF_DEF_Q_BASE;
+		qm->qp_num = pf_q_num;
+		qm->debug.curr_qm_qp_num = pf_q_num;
+
+		return sec_pf_probe_init(sec);
+	} else if (qm->fun_type == QM_HW_VF) {
+		/*
+		 * have no way to get qm configure in VM in v1 hardware,
+		 * so currently force PF to uses SEC_PF_DEF_Q_NUM, and force
+		 * to trigger only one VF in v1 hardware.
+		 * v2 hardware has no such problem.
+		 */
+		if (qm->ver == QM_HW_V1) {
+			qm->qp_base = SEC_PF_DEF_Q_NUM;
+			qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
+		} else if (qm->ver == QM_HW_V2) {
+			/* v2 starts to support get vft by mailbox */
+			return hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+		}
+	} else {
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void sec_probe_uninit(struct sec_dev *sec)
+{
+	sec_hw_error_uninit(sec);
+}
+
+static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct sec_dev *sec;
+	struct hisi_qm *qm;
+	int ret;
+
+	sec = devm_kzalloc(&pdev->dev, sizeof(*sec), GFP_KERNEL);
+	if (!sec)
+		return -ENOMEM;
+
+	pci_set_drvdata(pdev, sec);
+
+	sec->ctx_q_num = ctx_q_num;
+
+	qm = &sec->qm;
+
+	ret = sec_qm_init(qm, pdev);
+	if (ret) {
+		pci_err(pdev, "Failed to pre init qm!\n");
+		return ret;
+	}
+
+	ret = sec_probe_init(qm, sec);
+	if (ret) {
+		pci_err(pdev, "Failed to probe!\n");
+		goto err_qm_uninit;
+	}
+
+	ret = hisi_qm_start(qm);
+	if (ret) {
+		pci_err(pdev, "Failed to start sec qm!\n");
+		goto err_probe_uninit;
+	}
+
+	ret = sec_debugfs_init(sec);
+	if (ret)
+		pci_warn(pdev, "Failed to init debugfs!\n");
+
+	sec_add_to_list(sec);
+
+	ret = sec_register_to_crypto();
+	if (ret < 0) {
+		pr_err("Failed to register driver to crypto.\n");
+		goto err_remove_from_list;
+	}
+
+	return 0;
+
+err_remove_from_list:
+	sec_remove_from_list(sec);
+	sec_debugfs_exit(sec);
+	hisi_qm_stop(qm);
+
+err_probe_uninit:
+	sec_probe_uninit(sec);
+
+err_qm_uninit:
+	sec_qm_uninit(qm);
+
+	return ret;
+}
+
+/* now we only support equal assignment */
+static int sec_vf_q_assign(struct sec_dev *sec, u32 num_vfs)
+{
+	struct hisi_qm *qm = &sec->qm;
+	u32 qp_num = qm->qp_num;
+	u32 q_base = qp_num;
+	u32 q_num, remain_q_num;
+	int i, j, ret;
+
+	if (!num_vfs)
+		return -EINVAL;
+
+	remain_q_num = qm->ctrl_qp_num - qp_num;
+	q_num = remain_q_num / num_vfs;
+
+	for (i = 1; i <= num_vfs; i++) {
+		if (i == num_vfs)
+			q_num += remain_q_num % num_vfs;
+		ret = hisi_qm_set_vft(qm, i, q_base, q_num);
+		if (ret) {
+			for (j = i; j > 0; j--)
+				hisi_qm_set_vft(qm, j, 0, 0);
+			return ret;
+		}
+		q_base += q_num;
+	}
+
+	return 0;
+}
+
+static int sec_clear_vft_config(struct sec_dev *sec)
+{
+	struct hisi_qm *qm = &sec->qm;
+	u32 num_vfs = sec->num_vfs;
+	int ret;
+	u32 i;
+
+	for (i = 1; i <= num_vfs; i++) {
+		ret = hisi_qm_set_vft(qm, i, 0, 0);
+		if (ret)
+			return ret;
+	}
+
+	sec->num_vfs = 0;
+
+	return 0;
+}
+
+static int sec_sriov_enable(struct pci_dev *pdev, int max_vfs)
+{
+	struct sec_dev *sec = pci_get_drvdata(pdev);
+	int pre_existing_vfs, ret;
+	u32 num_vfs;
+
+	pre_existing_vfs = pci_num_vf(pdev);
+
+	if (pre_existing_vfs) {
+		pci_err(pdev, "Can't enable VF. Please disable at first!\n");
+		return 0;
+	}
+
+	num_vfs = min_t(u32, max_vfs, SEC_VF_NUM);
+
+	ret = sec_vf_q_assign(sec, num_vfs);
+	if (ret) {
+		pci_err(pdev, "Can't assign queues for VF!\n");
+		return ret;
+	}
+
+	sec->num_vfs = num_vfs;
+
+	ret = pci_enable_sriov(pdev, num_vfs);
+	if (ret) {
+		pci_err(pdev, "Can't enable VF!\n");
+		sec_clear_vft_config(sec);
+		return ret;
+	}
+
+	return num_vfs;
+}
+
+static int sec_sriov_disable(struct pci_dev *pdev)
+{
+	struct sec_dev *sec = pci_get_drvdata(pdev);
+
+	if (pci_vfs_assigned(pdev)) {
+		pci_err(pdev, "Can't disable VFs while VFs are assigned!\n");
+		return -EPERM;
+	}
+
+	/* remove in sec_pci_driver will be called to free VF resources */
+	pci_disable_sriov(pdev);
+
+	return sec_clear_vft_config(sec);
+}
+
+static int sec_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	if (num_vfs)
+		return sec_sriov_enable(pdev, num_vfs);
+	else
+		return sec_sriov_disable(pdev);
+}
+
+static void sec_remove(struct pci_dev *pdev)
+{
+	struct sec_dev *sec = pci_get_drvdata(pdev);
+	struct hisi_qm *qm = &sec->qm;
+
+	sec_unregister_from_crypto();
+
+	sec_remove_from_list(sec);
+
+	if (qm->fun_type == QM_HW_PF && sec->num_vfs)
+		(void)sec_sriov_disable(pdev);
+
+	sec_debugfs_exit(sec);
+
+	(void)hisi_qm_stop(qm);
+
+	if (qm->fun_type == QM_HW_PF)
+		sec_debug_regs_clear(qm);
+
+	sec_probe_uninit(sec);
+
+	sec_qm_uninit(qm);
+}
+
+static void sec_log_hw_error(struct sec_dev *sec, u32 err_sts)
+{
+	const struct sec_hw_error *errs = sec_hw_errors;
+	struct device *dev = &sec->qm.pdev->dev;
+	u32 err_val;
+
+	while (errs->msg) {
+		if (errs->int_msk & err_sts) {
+			dev_err(dev, "%s [error status=0x%x] found\n",
+				errs->msg, errs->int_msk);
+
+			if (SEC_CORE_INT_STATUS_M_ECC & err_sts) {
+				err_val = readl(sec->qm.io_base +
+						SEC_CORE_SRAM_ECC_ERR_INFO);
+				dev_err(dev, "multi ecc sram num=0x%x\n",
+					SEC_ECC_NUM(err_val));
+				dev_err(dev, "multi ecc sram addr=0x%x\n",
+					SEC_ECC_ADDR(err_val));
+			}
+		}
+		errs++;
+	}
+}
+
+static pci_ers_result_t sec_hw_error_handle(struct sec_dev *sec)
+{
+	u32 err_sts;
+
+	/* read err sts */
+	err_sts = readl(sec->qm.io_base + SEC_CORE_INT_STATUS);
+	if (err_sts) {
+		sec_log_hw_error(sec, err_sts);
+
+		/* clear error interrupts */
+		writel(err_sts, sec->qm.io_base + SEC_CORE_INT_SOURCE);
+
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t sec_process_hw_error(struct pci_dev *pdev)
+{
+	struct sec_dev *sec = pci_get_drvdata(pdev);
+	pci_ers_result_t qm_ret, sec_ret;
+
+	if (!sec) {
+		pci_err(pdev, "Can't recover error during device init\n");
+		return PCI_ERS_RESULT_NONE;
+	}
+
+	/* log qm error */
+	qm_ret = hisi_qm_hw_error_handle(&sec->qm);
+
+	/* log sec error */
+	sec_ret = sec_hw_error_handle(sec);
+
+	return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
+		sec_ret == PCI_ERS_RESULT_NEED_RESET) ?
+		PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t sec_error_detected(struct pci_dev *pdev,
+					   pci_channel_state_t state)
+{
+	if (pdev->is_virtfn)
+		return PCI_ERS_RESULT_NONE;
+
+	pci_info(pdev, "PCI error detected, state(=%d)!!\n", state);
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	return sec_process_hw_error(pdev);
+}
+
+static const struct pci_error_handlers sec_err_handler = {
+	.error_detected = sec_error_detected,
+};
+
+static struct pci_driver sec_pci_driver = {
+	.name = "hisi_sec2",
+	.id_table = sec_dev_ids,
+	.probe = sec_probe,
+	.remove = sec_remove,
+	.err_handler = &sec_err_handler,
+	.sriov_configure = sec_sriov_configure,
+};
+
+static void sec_register_debugfs(void)
+{
+	if (!debugfs_initialized())
+		return;
+
+	sec_debugfs_root = debugfs_create_dir("hisi_sec2", NULL);
+}
+
+static void sec_unregister_debugfs(void)
+{
+	debugfs_remove_recursive(sec_debugfs_root);
+}
+
+static int __init sec_init(void)
+{
+	int ret;
+
+	sec_register_debugfs();
+
+	ret = pci_register_driver(&sec_pci_driver);
+	if (ret < 0) {
+		sec_unregister_debugfs();
+		pr_err("Failed to register pci driver.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __exit sec_exit(void)
+{
+	pci_unregister_driver(&sec_pci_driver);
+	sec_unregister_debugfs();
+}
+
+module_init(sec_init);
+module_exit(sec_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Zaibo Xu <xuzaibo@huawei.com>");
+MODULE_AUTHOR("Longfang Liu <liulongfang@huawei.com>");
+MODULE_AUTHOR("Wei Zhang <zhangwei375@huawei.com>");
+MODULE_DESCRIPTION("Driver for HiSilicon SEC accelerator");
diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c
index e083d172b618..0e8c7e324fb4 100644
--- a/drivers/crypto/hisilicon/sgl.c
+++ b/drivers/crypto/hisilicon/sgl.c
@@ -2,37 +2,13 @@
 /* Copyright (c) 2019 HiSilicon Limited. */
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
-#include "./sgl.h"
+#include <linux/slab.h>
+#include "qm.h"
 
 #define HISI_ACC_SGL_SGE_NR_MIN		1
-#define HISI_ACC_SGL_SGE_NR_MAX		255
-#define HISI_ACC_SGL_SGE_NR_DEF		10
 #define HISI_ACC_SGL_NR_MAX		256
 #define HISI_ACC_SGL_ALIGN_SIZE		64
-
-static int acc_sgl_sge_set(const char *val, const struct kernel_param *kp)
-{
-	int ret;
-	u32 n;
-
-	if (!val)
-		return -EINVAL;
-
-	ret = kstrtou32(val, 10, &n);
-	if (ret != 0 || n > HISI_ACC_SGL_SGE_NR_MAX || n == 0)
-		return -EINVAL;
-
-	return param_set_int(val, kp);
-}
-
-static const struct kernel_param_ops acc_sgl_sge_ops = {
-	.set = acc_sgl_sge_set,
-	.get = param_get_int,
-};
-
-static u32 acc_sgl_sge_nr = HISI_ACC_SGL_SGE_NR_DEF;
-module_param_cb(acc_sgl_sge_nr, &acc_sgl_sge_ops, &acc_sgl_sge_nr, 0444);
-MODULE_PARM_DESC(acc_sgl_sge_nr, "Number of sge in sgl(1-255)");
+#define HISI_ACC_MEM_BLOCK_NR		5
 
 struct acc_hw_sge {
 	dma_addr_t buf;
@@ -55,37 +31,91 @@ struct hisi_acc_hw_sgl {
 	struct acc_hw_sge sge_entries[];
 } __aligned(1);
 
+struct hisi_acc_sgl_pool {
+	struct mem_block {
+		struct hisi_acc_hw_sgl *sgl;
+		dma_addr_t sgl_dma;
+		size_t size;
+	} mem_block[HISI_ACC_MEM_BLOCK_NR];
+	u32 sgl_num_per_block;
+	u32 block_num;
+	u32 count;
+	u32 sge_nr;
+	size_t sgl_size;
+};
+
 /**
  * hisi_acc_create_sgl_pool() - Create a hw sgl pool.
  * @dev: The device which hw sgl pool belongs to.
- * @pool: Pointer of pool.
  * @count: Count of hisi_acc_hw_sgl in pool.
+ * @sge_nr: The count of sge in hw_sgl
  *
  * This function creates a hw sgl pool, after this user can get hw sgl memory
  * from it.
  */
-int hisi_acc_create_sgl_pool(struct device *dev,
-			     struct hisi_acc_sgl_pool *pool, u32 count)
+struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
+						   u32 count, u32 sge_nr)
 {
-	u32 sgl_size;
-	u32 size;
+	u32 sgl_size, block_size, sgl_num_per_block, block_num, remain_sgl = 0;
+	struct hisi_acc_sgl_pool *pool;
+	struct mem_block *block;
+	u32 i, j;
 
-	if (!dev || !pool || !count)
-		return -EINVAL;
+	if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX)
+		return ERR_PTR(-EINVAL);
 
-	sgl_size = sizeof(struct acc_hw_sge) * acc_sgl_sge_nr +
+	sgl_size = sizeof(struct acc_hw_sge) * sge_nr +
 		   sizeof(struct hisi_acc_hw_sgl);
-	size = sgl_size * count;
+	block_size = PAGE_SIZE * (1 << (MAX_ORDER - 1));
+	sgl_num_per_block = block_size / sgl_size;
+	block_num = count / sgl_num_per_block;
+	remain_sgl = count % sgl_num_per_block;
+
+	if ((!remain_sgl && block_num > HISI_ACC_MEM_BLOCK_NR) ||
+	    (remain_sgl > 0 && block_num > HISI_ACC_MEM_BLOCK_NR - 1))
+		return ERR_PTR(-EINVAL);
 
-	pool->sgl = dma_alloc_coherent(dev, size, &pool->sgl_dma, GFP_KERNEL);
-	if (!pool->sgl)
-		return -ENOMEM;
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool)
+		return ERR_PTR(-ENOMEM);
+	block = pool->mem_block;
 
-	pool->size = size;
+	for (i = 0; i < block_num; i++) {
+		block[i].sgl = dma_alloc_coherent(dev, block_size,
+						  &block[i].sgl_dma,
+						  GFP_KERNEL);
+		if (!block[i].sgl)
+			goto err_free_mem;
+
+		block[i].size = block_size;
+	}
+
+	if (remain_sgl > 0) {
+		block[i].sgl = dma_alloc_coherent(dev, remain_sgl * sgl_size,
+						  &block[i].sgl_dma,
+						  GFP_KERNEL);
+		if (!block[i].sgl)
+			goto err_free_mem;
+
+		block[i].size = remain_sgl * sgl_size;
+	}
+
+	pool->sgl_num_per_block = sgl_num_per_block;
+	pool->block_num = remain_sgl ? block_num + 1 : block_num;
 	pool->count = count;
 	pool->sgl_size = sgl_size;
+	pool->sge_nr = sge_nr;
+
+	return pool;
 
-	return 0;
+err_free_mem:
+	for (j = 0; j < i; j++) {
+		dma_free_coherent(dev, block_size, block[j].sgl,
+				  block[j].sgl_dma);
+		memset(block + j, 0, sizeof(*block));
+	}
+	kfree(pool);
+	return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL_GPL(hisi_acc_create_sgl_pool);
 
@@ -98,38 +128,57 @@ EXPORT_SYMBOL_GPL(hisi_acc_create_sgl_pool);
  */
 void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool)
 {
-	dma_free_coherent(dev, pool->size, pool->sgl, pool->sgl_dma);
-	memset(pool, 0, sizeof(struct hisi_acc_sgl_pool));
+	struct mem_block *block;
+	int i;
+
+	if (!dev || !pool)
+		return;
+
+	block = pool->mem_block;
+
+	for (i = 0; i < pool->block_num; i++)
+		dma_free_coherent(dev, block[i].size, block[i].sgl,
+				  block[i].sgl_dma);
+
+	kfree(pool);
 }
 EXPORT_SYMBOL_GPL(hisi_acc_free_sgl_pool);
 
-struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool, u32 index,
-				    dma_addr_t *hw_sgl_dma)
+static struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool,
+					   u32 index, dma_addr_t *hw_sgl_dma)
 {
-	if (!pool || !hw_sgl_dma || index >= pool->count || !pool->sgl)
+	struct mem_block *block;
+	u32 block_index, offset;
+
+	if (!pool || !hw_sgl_dma || index >= pool->count)
 		return ERR_PTR(-EINVAL);
 
-	*hw_sgl_dma = pool->sgl_dma + pool->sgl_size * index;
-	return (void *)pool->sgl + pool->sgl_size * index;
-}
+	block = pool->mem_block;
+	block_index = index / pool->sgl_num_per_block;
+	offset = index % pool->sgl_num_per_block;
 
-void acc_put_sgl(struct hisi_acc_sgl_pool *pool, u32 index) {}
+	*hw_sgl_dma = block[block_index].sgl_dma + pool->sgl_size * offset;
+	return (void *)block[block_index].sgl + pool->sgl_size * offset;
+}
 
 static void sg_map_to_hw_sg(struct scatterlist *sgl,
 			    struct acc_hw_sge *hw_sge)
 {
-	hw_sge->buf = sgl->dma_address;
-	hw_sge->len = sgl->dma_length;
+	hw_sge->buf = sg_dma_address(sgl);
+	hw_sge->len = cpu_to_le32(sg_dma_len(sgl));
 }
 
 static void inc_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl)
 {
-	hw_sgl->entry_sum_in_sgl++;
+	u16 var = le16_to_cpu(hw_sgl->entry_sum_in_sgl);
+
+	var++;
+	hw_sgl->entry_sum_in_sgl = cpu_to_le16(var);
 }
 
 static void update_hw_sgl_sum_sge(struct hisi_acc_hw_sgl *hw_sgl, u16 sum)
 {
-	hw_sgl->entry_sum_in_chain = sum;
+	hw_sgl->entry_sum_in_chain = cpu_to_le16(sum);
 }
 
 /**
@@ -153,38 +202,41 @@ hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
 	dma_addr_t curr_sgl_dma = 0;
 	struct acc_hw_sge *curr_hw_sge;
 	struct scatterlist *sg;
-	int sg_n = sg_nents(sgl);
-	int i, ret;
+	int i, sg_n, sg_n_mapped;
 
-	if (!dev || !sgl || !pool || !hw_sgl_dma || sg_n > acc_sgl_sge_nr)
+	if (!dev || !sgl || !pool || !hw_sgl_dma)
 		return ERR_PTR(-EINVAL);
 
-	ret = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
-	if (!ret)
+	sg_n = sg_nents(sgl);
+
+	sg_n_mapped = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
+	if (!sg_n_mapped)
 		return ERR_PTR(-EINVAL);
 
+	if (sg_n_mapped > pool->sge_nr) {
+		dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
+		return ERR_PTR(-EINVAL);
+	}
+
 	curr_hw_sgl = acc_get_sgl(pool, index, &curr_sgl_dma);
-	if (!curr_hw_sgl) {
-		ret = -ENOMEM;
-		goto err_unmap_sg;
+	if (IS_ERR(curr_hw_sgl)) {
+		dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
+		return ERR_PTR(-ENOMEM);
+
 	}
-	curr_hw_sgl->entry_length_in_sgl = acc_sgl_sge_nr;
+	curr_hw_sgl->entry_length_in_sgl = cpu_to_le16(pool->sge_nr);
 	curr_hw_sge = curr_hw_sgl->sge_entries;
 
-	for_each_sg(sgl, sg, sg_n, i) {
+	for_each_sg(sgl, sg, sg_n_mapped, i) {
 		sg_map_to_hw_sg(sg, curr_hw_sge);
 		inc_hw_sgl_sge(curr_hw_sgl);
 		curr_hw_sge++;
 	}
 
-	update_hw_sgl_sum_sge(curr_hw_sgl, acc_sgl_sge_nr);
+	update_hw_sgl_sum_sge(curr_hw_sgl, pool->sge_nr);
 	*hw_sgl_dma = curr_sgl_dma;
 
 	return curr_hw_sgl;
-
-err_unmap_sg:
-	dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
-	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_map_to_hw_sgl);
 
@@ -201,6 +253,9 @@ EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_map_to_hw_sgl);
 void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
 			   struct hisi_acc_hw_sgl *hw_sgl)
 {
+	if (!dev || !sgl || !hw_sgl)
+		return;
+
 	dma_unmap_sg(dev, sgl, sg_nents(sgl), DMA_BIDIRECTIONAL);
 
 	hw_sgl->entry_sum_in_chain = 0;
@@ -208,7 +263,3 @@ void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
 	hw_sgl->entry_length_in_sgl = 0;
 }
 EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_unmap);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
-MODULE_DESCRIPTION("HiSilicon Accelerator SGL support");
diff --git a/drivers/crypto/hisilicon/sgl.h b/drivers/crypto/hisilicon/sgl.h
deleted file mode 100644
index 3ac8871c7acf..000000000000
--- a/drivers/crypto/hisilicon/sgl.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2019 HiSilicon Limited. */
-#ifndef HISI_ACC_SGL_H
-#define HISI_ACC_SGL_H
-
-struct hisi_acc_sgl_pool {
-	struct hisi_acc_hw_sgl *sgl;
-	dma_addr_t sgl_dma;
-	size_t size;
-	u32 count;
-	size_t sgl_size;
-};
-
-struct hisi_acc_hw_sgl *
-hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
-			      struct scatterlist *sgl,
-			      struct hisi_acc_sgl_pool *pool,
-			      u32 index, dma_addr_t *hw_sgl_dma);
-void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
-			   struct hisi_acc_hw_sgl *hw_sgl);
-int hisi_acc_create_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool,
-			     u32 count);
-void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool);
-#endif
diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h
index ffb00d987d02..bc1db26598bb 100644
--- a/drivers/crypto/hisilicon/zip/zip.h
+++ b/drivers/crypto/hisilicon/zip/zip.h
@@ -8,10 +8,13 @@
 
 #include <linux/list.h>
 #include "../qm.h"
-#include "../sgl.h"
 
 /* hisi_zip_sqe dw3 */
 #define HZIP_BD_STATUS_M			GENMASK(7, 0)
+/* hisi_zip_sqe dw7 */
+#define HZIP_IN_SGE_DATA_OFFSET_M		GENMASK(23, 0)
+/* hisi_zip_sqe dw8 */
+#define HZIP_OUT_SGE_DATA_OFFSET_M		GENMASK(23, 0)
 /* hisi_zip_sqe dw9 */
 #define HZIP_REQ_TYPE_M				GENMASK(7, 0)
 #define HZIP_ALG_TYPE_ZLIB			0x02
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
index 59023545a1c4..9815d5e3ccd0 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -22,6 +22,7 @@
 #define HZIP_CTX_Q_NUM				2
 #define HZIP_GZIP_HEAD_BUF			256
 #define HZIP_ALG_PRIORITY			300
+#define HZIP_SGL_SGE_NR				10
 
 static const u8 zlib_head[HZIP_ZLIB_HEAD_SIZE] = {0x78, 0x9c};
 static const u8 gzip_head[HZIP_GZIP_HEAD_SIZE] = {0x1f, 0x8b, 0x08, 0x0, 0x0,
@@ -41,14 +42,12 @@ enum hisi_zip_alg_type {
 
 #define TO_HEAD(req_type)						\
 	(((req_type) == HZIP_ALG_TYPE_ZLIB) ? zlib_head :		\
-	 ((req_type) == HZIP_ALG_TYPE_GZIP) ? gzip_head : 0)		\
+	 ((req_type) == HZIP_ALG_TYPE_GZIP) ? gzip_head : NULL)		\
 
 struct hisi_zip_req {
 	struct acomp_req *req;
-	struct scatterlist *src;
-	struct scatterlist *dst;
-	size_t slen;
-	size_t dlen;
+	int sskip;
+	int dskip;
 	struct hisi_acc_hw_sgl *hw_src;
 	struct hisi_acc_hw_sgl *hw_dst;
 	dma_addr_t dma_src;
@@ -67,7 +66,7 @@ struct hisi_zip_qp_ctx {
 	struct hisi_qp *qp;
 	struct hisi_zip_sqe zip_sqe;
 	struct hisi_zip_req_q req_q;
-	struct hisi_acc_sgl_pool sgl_pool;
+	struct hisi_acc_sgl_pool *sgl_pool;
 	struct hisi_zip *zip_dev;
 	struct hisi_zip_ctx *ctx;
 };
@@ -78,6 +77,30 @@ struct hisi_zip_ctx {
 	struct hisi_zip_qp_ctx qp_ctx[HZIP_CTX_Q_NUM];
 };
 
+static int sgl_sge_nr_set(const char *val, const struct kernel_param *kp)
+{
+	int ret;
+	u16 n;
+
+	if (!val)
+		return -EINVAL;
+
+	ret = kstrtou16(val, 10, &n);
+	if (ret || n == 0 || n > HISI_ACC_SGL_SGE_NR_MAX)
+		return -EINVAL;
+
+	return param_set_int(val, kp);
+}
+
+static const struct kernel_param_ops sgl_sge_nr_ops = {
+	.set = sgl_sge_nr_set,
+	.get = param_get_int,
+};
+
+static u16 sgl_sge_nr = HZIP_SGL_SGE_NR;
+module_param_cb(sgl_sge_nr, &sgl_sge_nr_ops, &sgl_sge_nr, 0444);
+MODULE_PARM_DESC(sgl_sge_nr, "Number of sge in sgl(1-255)");
+
 static void hisi_zip_config_buf_type(struct hisi_zip_sqe *sqe, u8 buf_type)
 {
 	u32 val;
@@ -94,13 +117,15 @@ static void hisi_zip_config_tag(struct hisi_zip_sqe *sqe, u32 tag)
 
 static void hisi_zip_fill_sqe(struct hisi_zip_sqe *sqe, u8 req_type,
 			      dma_addr_t s_addr, dma_addr_t d_addr, u32 slen,
-			      u32 dlen)
+			      u32 dlen, int sskip, int dskip)
 {
 	memset(sqe, 0, sizeof(struct hisi_zip_sqe));
 
-	sqe->input_data_length = slen;
+	sqe->input_data_length = slen - sskip;
+	sqe->dw7 = FIELD_PREP(HZIP_IN_SGE_DATA_OFFSET_M, sskip);
+	sqe->dw8 = FIELD_PREP(HZIP_OUT_SGE_DATA_OFFSET_M, dskip);
 	sqe->dw9 = FIELD_PREP(HZIP_REQ_TYPE_M, req_type);
-	sqe->dest_avail_out = dlen;
+	sqe->dest_avail_out = dlen - dskip;
 	sqe->source_addr_l = lower_32_bits(s_addr);
 	sqe->source_addr_h = upper_32_bits(s_addr);
 	sqe->dest_addr_l = lower_32_bits(d_addr);
@@ -265,14 +290,15 @@ static void hisi_zip_release_req_q(struct hisi_zip_ctx *ctx)
 static int hisi_zip_create_sgl_pool(struct hisi_zip_ctx *ctx)
 {
 	struct hisi_zip_qp_ctx *tmp;
-	int i, ret;
+	struct device *dev;
+	int i;
 
 	for (i = 0; i < HZIP_CTX_Q_NUM; i++) {
 		tmp = &ctx->qp_ctx[i];
-		ret = hisi_acc_create_sgl_pool(&tmp->qp->qm->pdev->dev,
-					       &tmp->sgl_pool,
-					       QM_Q_DEPTH << 1);
-		if (ret < 0) {
+		dev = &tmp->qp->qm->pdev->dev;
+		tmp->sgl_pool = hisi_acc_create_sgl_pool(dev, QM_Q_DEPTH << 1,
+							 sgl_sge_nr);
+		if (IS_ERR(tmp->sgl_pool)) {
 			if (i == 1)
 				goto err_free_sgl_pool0;
 			return -ENOMEM;
@@ -283,7 +309,7 @@ static int hisi_zip_create_sgl_pool(struct hisi_zip_ctx *ctx)
 
 err_free_sgl_pool0:
 	hisi_acc_free_sgl_pool(&ctx->qp_ctx[QPC_COMP].qp->qm->pdev->dev,
-			       &ctx->qp_ctx[QPC_COMP].sgl_pool);
+			       ctx->qp_ctx[QPC_COMP].sgl_pool);
 	return -ENOMEM;
 }
 
@@ -293,7 +319,7 @@ static void hisi_zip_release_sgl_pool(struct hisi_zip_ctx *ctx)
 
 	for (i = 0; i < HZIP_CTX_Q_NUM; i++)
 		hisi_acc_free_sgl_pool(&ctx->qp_ctx[i].qp->qm->pdev->dev,
-				       &ctx->qp_ctx[i].sgl_pool);
+				       ctx->qp_ctx[i].sgl_pool);
 }
 
 static void hisi_zip_remove_req(struct hisi_zip_qp_ctx *qp_ctx,
@@ -301,11 +327,6 @@ static void hisi_zip_remove_req(struct hisi_zip_qp_ctx *qp_ctx,
 {
 	struct hisi_zip_req_q *req_q = &qp_ctx->req_q;
 
-	if (qp_ctx->qp->alg_type == HZIP_ALG_TYPE_COMP)
-		kfree(req->dst);
-	else
-		kfree(req->src);
-
 	write_lock(&req_q->req_lock);
 	clear_bit(req->req_id, req_q->req_bitmap);
 	memset(req, 0, sizeof(struct hisi_zip_req));
@@ -333,8 +354,8 @@ static void hisi_zip_acomp_cb(struct hisi_qp *qp, void *data)
 	}
 	dlen = sqe->produced;
 
-	hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src);
-	hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst);
+	hisi_acc_sg_buf_unmap(dev, acomp_req->src, req->hw_src);
+	hisi_acc_sg_buf_unmap(dev, acomp_req->dst, req->hw_dst);
 
 	head_size = (qp->alg_type == 0) ? TO_HEAD_SIZE(qp->req_type) : 0;
 	acomp_req->dlen = dlen + head_size;
@@ -428,20 +449,6 @@ static size_t get_comp_head_size(struct scatterlist *src, u8 req_type)
 	}
 }
 
-static int get_sg_skip_bytes(struct scatterlist *sgl, size_t bytes,
-			     size_t remains, struct scatterlist **out)
-{
-#define SPLIT_NUM 2
-	size_t split_sizes[SPLIT_NUM];
-	int out_mapped_nents[SPLIT_NUM];
-
-	split_sizes[0] = bytes;
-	split_sizes[1] = remains;
-
-	return sg_split(sgl, 0, 0, SPLIT_NUM, split_sizes, out,
-			out_mapped_nents, GFP_KERNEL);
-}
-
 static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req,
 						struct hisi_zip_qp_ctx *qp_ctx,
 						size_t head_size, bool is_comp)
@@ -449,31 +456,7 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req,
 	struct hisi_zip_req_q *req_q = &qp_ctx->req_q;
 	struct hisi_zip_req *q = req_q->q;
 	struct hisi_zip_req *req_cache;
-	struct scatterlist *out[2];
-	struct scatterlist *sgl;
-	size_t len;
-	int ret, req_id;
-
-	/*
-	 * remove/add zlib/gzip head, as hardware operations do not include
-	 * comp head. so split req->src to get sgl without heads in acomp, or
-	 * add comp head to req->dst ahead of that hardware output compressed
-	 * data in sgl splited from req->dst without comp head.
-	 */
-	if (is_comp) {
-		sgl = req->dst;
-		len = req->dlen - head_size;
-	} else {
-		sgl = req->src;
-		len = req->slen - head_size;
-	}
-
-	ret = get_sg_skip_bytes(sgl, head_size, len, out);
-	if (ret)
-		return ERR_PTR(ret);
-
-	/* sgl for comp head is useless, so free it now */
-	kfree(out[0]);
+	int req_id;
 
 	write_lock(&req_q->req_lock);
 
@@ -481,7 +464,6 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req,
 	if (req_id >= req_q->size) {
 		write_unlock(&req_q->req_lock);
 		dev_dbg(&qp_ctx->qp->qm->pdev->dev, "req cache is full!\n");
-		kfree(out[1]);
 		return ERR_PTR(-EBUSY);
 	}
 	set_bit(req_id, req_q->req_bitmap);
@@ -489,16 +471,13 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req,
 	req_cache = q + req_id;
 	req_cache->req_id = req_id;
 	req_cache->req = req;
+
 	if (is_comp) {
-		req_cache->src = req->src;
-		req_cache->dst = out[1];
-		req_cache->slen = req->slen;
-		req_cache->dlen = req->dlen - head_size;
+		req_cache->sskip = 0;
+		req_cache->dskip = head_size;
 	} else {
-		req_cache->src = out[1];
-		req_cache->dst = req->dst;
-		req_cache->slen = req->slen - head_size;
-		req_cache->dlen = req->dlen;
+		req_cache->sskip = head_size;
+		req_cache->dskip = 0;
 	}
 
 	write_unlock(&req_q->req_lock);
@@ -510,23 +489,24 @@ static int hisi_zip_do_work(struct hisi_zip_req *req,
 			    struct hisi_zip_qp_ctx *qp_ctx)
 {
 	struct hisi_zip_sqe *zip_sqe = &qp_ctx->zip_sqe;
+	struct acomp_req *a_req = req->req;
 	struct hisi_qp *qp = qp_ctx->qp;
 	struct device *dev = &qp->qm->pdev->dev;
-	struct hisi_acc_sgl_pool *pool = &qp_ctx->sgl_pool;
+	struct hisi_acc_sgl_pool *pool = qp_ctx->sgl_pool;
 	dma_addr_t input;
 	dma_addr_t output;
 	int ret;
 
-	if (!req->src || !req->slen || !req->dst || !req->dlen)
+	if (!a_req->src || !a_req->slen || !a_req->dst || !a_req->dlen)
 		return -EINVAL;
 
-	req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->src, pool,
+	req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->src, pool,
 						    req->req_id << 1, &input);
 	if (IS_ERR(req->hw_src))
 		return PTR_ERR(req->hw_src);
 	req->dma_src = input;
 
-	req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->dst, pool,
+	req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->dst, pool,
 						    (req->req_id << 1) + 1,
 						    &output);
 	if (IS_ERR(req->hw_dst)) {
@@ -535,8 +515,8 @@ static int hisi_zip_do_work(struct hisi_zip_req *req,
 	}
 	req->dma_dst = output;
 
-	hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, req->slen,
-			  req->dlen);
+	hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, a_req->slen,
+			  a_req->dlen, req->sskip, req->dskip);
 	hisi_zip_config_buf_type(zip_sqe, HZIP_SGL);
 	hisi_zip_config_tag(zip_sqe, req->req_id);
 
@@ -548,9 +528,9 @@ static int hisi_zip_do_work(struct hisi_zip_req *req,
 	return -EINPROGRESS;
 
 err_unmap_output:
-	hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst);
+	hisi_acc_sg_buf_unmap(dev, a_req->dst, req->hw_dst);
 err_unmap_input:
-	hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src);
+	hisi_acc_sg_buf_unmap(dev, a_req->src, req->hw_src);
 	return ret;
 }
 
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 1b2ee96c888d..e1bab1a91333 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -79,47 +79,80 @@
 #define HZIP_SOFT_CTRL_CNT_CLR_CE	0x301000
 #define SOFT_CTRL_CNT_CLR_CE_BIT	BIT(0)
 
-#define HZIP_NUMA_DISTANCE		100
 #define HZIP_BUF_SIZE			22
 
 static const char hisi_zip_name[] = "hisi_zip";
 static struct dentry *hzip_debugfs_root;
-LIST_HEAD(hisi_zip_list);
-DEFINE_MUTEX(hisi_zip_list_lock);
+static LIST_HEAD(hisi_zip_list);
+static DEFINE_MUTEX(hisi_zip_list_lock);
 
-#ifdef CONFIG_NUMA
-static struct hisi_zip *find_zip_device_numa(int node)
+struct hisi_zip_resource {
+	struct hisi_zip *hzip;
+	int distance;
+	struct list_head list;
+};
+
+static void free_list(struct list_head *head)
 {
-	struct hisi_zip *zip = NULL;
-	struct hisi_zip *hisi_zip;
-	int min_distance = HZIP_NUMA_DISTANCE;
-	struct device *dev;
+	struct hisi_zip_resource *res, *tmp;
 
-	list_for_each_entry(hisi_zip, &hisi_zip_list, list) {
-		dev = &hisi_zip->qm.pdev->dev;
-		if (node_distance(dev->numa_node, node) < min_distance) {
-			zip = hisi_zip;
-			min_distance = node_distance(dev->numa_node, node);
-		}
+	list_for_each_entry_safe(res, tmp, head, list) {
+		list_del(&res->list);
+		kfree(res);
 	}
-
-	return zip;
 }
-#endif
 
 struct hisi_zip *find_zip_device(int node)
 {
-	struct hisi_zip *zip = NULL;
+	struct hisi_zip_resource *res, *tmp;
+	struct hisi_zip *ret = NULL;
+	struct hisi_zip *hisi_zip;
+	struct list_head *n;
+	struct device *dev;
+	LIST_HEAD(head);
 
 	mutex_lock(&hisi_zip_list_lock);
-#ifdef CONFIG_NUMA
-	zip = find_zip_device_numa(node);
-#else
-	zip = list_first_entry(&hisi_zip_list, struct hisi_zip, list);
-#endif
+
+	if (IS_ENABLED(CONFIG_NUMA)) {
+		list_for_each_entry(hisi_zip, &hisi_zip_list, list) {
+			res = kzalloc(sizeof(*res), GFP_KERNEL);
+			if (!res)
+				goto err;
+
+			dev = &hisi_zip->qm.pdev->dev;
+			res->hzip = hisi_zip;
+			res->distance = node_distance(dev_to_node(dev), node);
+
+			n = &head;
+			list_for_each_entry(tmp, &head, list) {
+				if (res->distance < tmp->distance) {
+					n = &tmp->list;
+					break;
+				}
+			}
+			list_add_tail(&res->list, n);
+		}
+
+		list_for_each_entry(tmp, &head, list) {
+			if (hisi_qm_get_free_qp_num(&tmp->hzip->qm)) {
+				ret = tmp->hzip;
+				break;
+			}
+		}
+
+		free_list(&head);
+	} else {
+		ret = list_first_entry(&hisi_zip_list, struct hisi_zip, list);
+	}
+
 	mutex_unlock(&hisi_zip_list_lock);
 
-	return zip;
+	return ret;
+
+err:
+	free_list(&head);
+	mutex_unlock(&hisi_zip_list_lock);
+	return NULL;
 }
 
 struct hisi_zip_hw_error {
@@ -267,6 +300,10 @@ MODULE_PARM_DESC(pf_q_num, "Number of queues in PF(v1 1-4096, v2 1-1024)");
 static int uacce_mode;
 module_param(uacce_mode, int, 0);
 
+static u32 vfs_num;
+module_param(vfs_num, uint, 0444);
+MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63)");
+
 static const struct pci_device_id hisi_zip_dev_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_PF) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_VF) },
@@ -335,8 +372,7 @@ static void hisi_zip_hw_error_set_state(struct hisi_zip *hisi_zip, bool state)
 
 	if (qm->ver == QM_HW_V1) {
 		writel(HZIP_CORE_INT_DISABLE, qm->io_base + HZIP_CORE_INT_MASK);
-		dev_info(&qm->pdev->dev, "ZIP v%d does not support hw error handle\n",
-			 qm->ver);
+		dev_info(&qm->pdev->dev, "Does not support hw error handle\n");
 		return;
 	}
 
@@ -511,7 +547,7 @@ static int hisi_zip_core_debug_init(struct hisi_zip_ctrl *ctrl)
 	struct hisi_qm *qm = &hisi_zip->qm;
 	struct device *dev = &qm->pdev->dev;
 	struct debugfs_regset32 *regset;
-	struct dentry *tmp_d, *tmp;
+	struct dentry *tmp_d;
 	char buf[HZIP_BUF_SIZE];
 	int i;
 
@@ -521,10 +557,6 @@ static int hisi_zip_core_debug_init(struct hisi_zip_ctrl *ctrl)
 		else
 			sprintf(buf, "decomp_core%d", i - HZIP_COMP_CORE_NUM);
 
-		tmp_d = debugfs_create_dir(buf, ctrl->debug_root);
-		if (!tmp_d)
-			return -ENOENT;
-
 		regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
 		if (!regset)
 			return -ENOENT;
@@ -533,9 +565,8 @@ static int hisi_zip_core_debug_init(struct hisi_zip_ctrl *ctrl)
 		regset->nregs = ARRAY_SIZE(hzip_dfx_regs);
 		regset->base = qm->io_base + core_offsets[i];
 
-		tmp = debugfs_create_regset32("regs", 0444, tmp_d, regset);
-		if (!tmp)
-			return -ENOENT;
+		tmp_d = debugfs_create_dir(buf, ctrl->debug_root);
+		debugfs_create_regset32("regs", 0444, tmp_d, regset);
 	}
 
 	return 0;
@@ -543,7 +574,6 @@ static int hisi_zip_core_debug_init(struct hisi_zip_ctrl *ctrl)
 
 static int hisi_zip_ctrl_debug_init(struct hisi_zip_ctrl *ctrl)
 {
-	struct dentry *tmp;
 	int i;
 
 	for (i = HZIP_CURRENT_QM; i < HZIP_DEBUG_FILE_NUM; i++) {
@@ -551,11 +581,9 @@ static int hisi_zip_ctrl_debug_init(struct hisi_zip_ctrl *ctrl)
 		ctrl->files[i].ctrl = ctrl;
 		ctrl->files[i].index = i;
 
-		tmp = debugfs_create_file(ctrl_debug_file_name[i], 0600,
-					  ctrl->debug_root, ctrl->files + i,
-					  &ctrl_debug_fops);
-		if (!tmp)
-			return -ENOENT;
+		debugfs_create_file(ctrl_debug_file_name[i], 0600,
+				    ctrl->debug_root, ctrl->files + i,
+				    &ctrl_debug_fops);
 	}
 
 	return hisi_zip_core_debug_init(ctrl);
@@ -569,8 +597,6 @@ static int hisi_zip_debugfs_init(struct hisi_zip *hisi_zip)
 	int ret;
 
 	dev_d = debugfs_create_dir(dev_name(dev), hzip_debugfs_root);
-	if (!dev_d)
-		return -ENOENT;
 
 	qm->debug.debug_root = dev_d;
 	ret = hisi_qm_debug_init(qm);
@@ -652,90 +678,6 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
 	return 0;
 }
 
-static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
-	struct hisi_zip *hisi_zip;
-	enum qm_hw_ver rev_id;
-	struct hisi_qm *qm;
-	int ret;
-
-	rev_id = hisi_qm_get_hw_version(pdev);
-	if (rev_id == QM_HW_UNKNOWN)
-		return -EINVAL;
-
-	hisi_zip = devm_kzalloc(&pdev->dev, sizeof(*hisi_zip), GFP_KERNEL);
-	if (!hisi_zip)
-		return -ENOMEM;
-	pci_set_drvdata(pdev, hisi_zip);
-
-	qm = &hisi_zip->qm;
-	qm->pdev = pdev;
-	qm->ver = rev_id;
-
-	qm->sqe_size = HZIP_SQE_SIZE;
-	qm->dev_name = hisi_zip_name;
-	qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF :
-								QM_HW_VF;
-	switch (uacce_mode) {
-	case 0:
-		qm->use_dma_api = true;
-		break;
-	case 1:
-		qm->use_dma_api = false;
-		break;
-	case 2:
-		qm->use_dma_api = true;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	ret = hisi_qm_init(qm);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to init qm!\n");
-		return ret;
-	}
-
-	if (qm->fun_type == QM_HW_PF) {
-		ret = hisi_zip_pf_probe_init(hisi_zip);
-		if (ret)
-			return ret;
-
-		qm->qp_base = HZIP_PF_DEF_Q_BASE;
-		qm->qp_num = pf_q_num;
-	} else if (qm->fun_type == QM_HW_VF) {
-		/*
-		 * have no way to get qm configure in VM in v1 hardware,
-		 * so currently force PF to uses HZIP_PF_DEF_Q_NUM, and force
-		 * to trigger only one VF in v1 hardware.
-		 *
-		 * v2 hardware has no such problem.
-		 */
-		if (qm->ver == QM_HW_V1) {
-			qm->qp_base = HZIP_PF_DEF_Q_NUM;
-			qm->qp_num = HZIP_QUEUE_NUM_V1 - HZIP_PF_DEF_Q_NUM;
-		} else if (qm->ver == QM_HW_V2)
-			/* v2 starts to support get vft by mailbox */
-			hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
-	}
-
-	ret = hisi_qm_start(qm);
-	if (ret)
-		goto err_qm_uninit;
-
-	ret = hisi_zip_debugfs_init(hisi_zip);
-	if (ret)
-		dev_err(&pdev->dev, "Failed to init debugfs (%d)!\n", ret);
-
-	hisi_zip_add_to_list(hisi_zip);
-
-	return 0;
-
-err_qm_uninit:
-	hisi_qm_uninit(qm);
-	return ret;
-}
-
 /* Currently we only support equal assignment */
 static int hisi_zip_vf_q_assign(struct hisi_zip *hisi_zip, int num_vfs)
 {
@@ -832,6 +774,100 @@ static int hisi_zip_sriov_disable(struct pci_dev *pdev)
 	return hisi_zip_clear_vft_config(hisi_zip);
 }
 
+static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct hisi_zip *hisi_zip;
+	enum qm_hw_ver rev_id;
+	struct hisi_qm *qm;
+	int ret;
+
+	rev_id = hisi_qm_get_hw_version(pdev);
+	if (rev_id == QM_HW_UNKNOWN)
+		return -EINVAL;
+
+	hisi_zip = devm_kzalloc(&pdev->dev, sizeof(*hisi_zip), GFP_KERNEL);
+	if (!hisi_zip)
+		return -ENOMEM;
+	pci_set_drvdata(pdev, hisi_zip);
+
+	qm = &hisi_zip->qm;
+	qm->pdev = pdev;
+	qm->ver = rev_id;
+
+	qm->sqe_size = HZIP_SQE_SIZE;
+	qm->dev_name = hisi_zip_name;
+	qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF :
+								QM_HW_VF;
+	switch (uacce_mode) {
+	case 0:
+		qm->use_dma_api = true;
+		break;
+	case 1:
+		qm->use_dma_api = false;
+		break;
+	case 2:
+		qm->use_dma_api = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = hisi_qm_init(qm);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to init qm!\n");
+		return ret;
+	}
+
+	if (qm->fun_type == QM_HW_PF) {
+		ret = hisi_zip_pf_probe_init(hisi_zip);
+		if (ret)
+			return ret;
+
+		qm->qp_base = HZIP_PF_DEF_Q_BASE;
+		qm->qp_num = pf_q_num;
+	} else if (qm->fun_type == QM_HW_VF) {
+		/*
+		 * have no way to get qm configure in VM in v1 hardware,
+		 * so currently force PF to uses HZIP_PF_DEF_Q_NUM, and force
+		 * to trigger only one VF in v1 hardware.
+		 *
+		 * v2 hardware has no such problem.
+		 */
+		if (qm->ver == QM_HW_V1) {
+			qm->qp_base = HZIP_PF_DEF_Q_NUM;
+			qm->qp_num = HZIP_QUEUE_NUM_V1 - HZIP_PF_DEF_Q_NUM;
+		} else if (qm->ver == QM_HW_V2)
+			/* v2 starts to support get vft by mailbox */
+			hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+	}
+
+	ret = hisi_qm_start(qm);
+	if (ret)
+		goto err_qm_uninit;
+
+	ret = hisi_zip_debugfs_init(hisi_zip);
+	if (ret)
+		dev_err(&pdev->dev, "Failed to init debugfs (%d)!\n", ret);
+
+	hisi_zip_add_to_list(hisi_zip);
+
+	if (qm->fun_type == QM_HW_PF && vfs_num > 0) {
+		ret = hisi_zip_sriov_enable(pdev, vfs_num);
+		if (ret < 0)
+			goto err_remove_from_list;
+	}
+
+	return 0;
+
+err_remove_from_list:
+	hisi_zip_remove_from_list(hisi_zip);
+	hisi_zip_debugfs_exit(hisi_zip);
+	hisi_qm_stop(qm);
+err_qm_uninit:
+	hisi_qm_uninit(qm);
+	return ret;
+}
+
 static int hisi_zip_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
 	if (num_vfs == 0)
@@ -945,7 +981,7 @@ static struct pci_driver hisi_zip_pci_driver = {
 	.probe			= hisi_zip_probe,
 	.remove			= hisi_zip_remove,
 	.sriov_configure	= IS_ENABLED(CONFIG_PCI_IOV) ?
-					hisi_zip_sriov_configure : 0,
+					hisi_zip_sriov_configure : NULL,
 	.err_handler		= &hisi_zip_err_handler,
 };
 
@@ -955,8 +991,6 @@ static void hisi_zip_register_debugfs(void)
 		return;
 
 	hzip_debugfs_root = debugfs_create_dir("hisi_zip", NULL);
-	if (IS_ERR_OR_NULL(hzip_debugfs_root))
-		hzip_debugfs_root = NULL;
 }
 
 static void hisi_zip_unregister_debugfs(void)
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
index fe4cc8babe1c..25d5227f74a1 100644
--- a/drivers/crypto/img-hash.c
+++ b/drivers/crypto/img-hash.c
@@ -332,10 +332,10 @@ static int img_hash_dma_init(struct img_hash_dev *hdev)
 	struct dma_slave_config dma_conf;
 	int err = -EINVAL;
 
-	hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
-	if (!hdev->dma_lch) {
+	hdev->dma_lch = dma_request_chan(hdev->dev, "tx");
+	if (IS_ERR(hdev->dma_lch)) {
 		dev_err(hdev->dev, "Couldn't acquire a slave DMA channel.\n");
-		return -EBUSY;
+		return PTR_ERR(hdev->dma_lch);
 	}
 	dma_conf.direction = DMA_MEM_TO_DEV;
 	dma_conf.dst_addr = hdev->bus_addr;
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index 4ab1bde8dd9b..2cb53fbae841 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -75,9 +75,9 @@ static void eip197_trc_cache_banksel(struct safexcel_crypto_priv *priv,
 }
 
 static u32 eip197_trc_cache_probe(struct safexcel_crypto_priv *priv,
-				  int maxbanks, u32 probemask)
+				  int maxbanks, u32 probemask, u32 stride)
 {
-	u32 val, addrhi, addrlo, addrmid;
+	u32 val, addrhi, addrlo, addrmid, addralias, delta, marker;
 	int actbank;
 
 	/*
@@ -87,32 +87,37 @@ static u32 eip197_trc_cache_probe(struct safexcel_crypto_priv *priv,
 	addrhi = 1 << (16 + maxbanks);
 	addrlo = 0;
 	actbank = min(maxbanks - 1, 0);
-	while ((addrhi - addrlo) > 32) {
+	while ((addrhi - addrlo) > stride) {
 		/* write marker to lowest address in top half */
 		addrmid = (addrhi + addrlo) >> 1;
+		marker = (addrmid ^ 0xabadbabe) & probemask; /* Unique */
 		eip197_trc_cache_banksel(priv, addrmid, &actbank);
-		writel((addrmid | (addrlo << 16)) & probemask,
+		writel(marker,
 			priv->base + EIP197_CLASSIFICATION_RAMS +
 			(addrmid & 0xffff));
 
-		/* write marker to lowest address in bottom half */
-		eip197_trc_cache_banksel(priv, addrlo, &actbank);
-		writel((addrlo | (addrhi << 16)) & probemask,
-			priv->base + EIP197_CLASSIFICATION_RAMS +
-			(addrlo & 0xffff));
+		/* write invalid markers to possible aliases */
+		delta = 1 << __fls(addrmid);
+		while (delta >= stride) {
+			addralias = addrmid - delta;
+			eip197_trc_cache_banksel(priv, addralias, &actbank);
+			writel(~marker,
+			       priv->base + EIP197_CLASSIFICATION_RAMS +
+			       (addralias & 0xffff));
+			delta >>= 1;
+		}
 
 		/* read back marker from top half */
 		eip197_trc_cache_banksel(priv, addrmid, &actbank);
 		val = readl(priv->base + EIP197_CLASSIFICATION_RAMS +
 			    (addrmid & 0xffff));
 
-		if (val == ((addrmid | (addrlo << 16)) & probemask)) {
+		if ((val & probemask) == marker)
 			/* read back correct, continue with top half */
 			addrlo = addrmid;
-		} else {
+		else
 			/* not read back correct, continue with bottom half */
 			addrhi = addrmid;
-		}
 	}
 	return addrhi;
 }
@@ -150,7 +155,7 @@ static void eip197_trc_cache_clear(struct safexcel_crypto_priv *priv,
 		       htable_offset + i * sizeof(u32));
 }
 
-static void eip197_trc_cache_init(struct safexcel_crypto_priv *priv)
+static int eip197_trc_cache_init(struct safexcel_crypto_priv *priv)
 {
 	u32 val, dsize, asize;
 	int cs_rc_max, cs_ht_wc, cs_trc_rec_wc, cs_trc_lg_rec_wc;
@@ -183,7 +188,7 @@ static void eip197_trc_cache_init(struct safexcel_crypto_priv *priv)
 	writel(val, priv->base + EIP197_TRC_PARAMS);
 
 	/* Probed data RAM size in bytes */
-	dsize = eip197_trc_cache_probe(priv, maxbanks, 0xffffffff);
+	dsize = eip197_trc_cache_probe(priv, maxbanks, 0xffffffff, 32);
 
 	/*
 	 * Now probe the administration RAM size pretty much the same way
@@ -196,11 +201,18 @@ static void eip197_trc_cache_init(struct safexcel_crypto_priv *priv)
 	writel(val, priv->base + EIP197_TRC_PARAMS);
 
 	/* Probed admin RAM size in admin words */
-	asize = eip197_trc_cache_probe(priv, 0, 0xbfffffff) >> 4;
+	asize = eip197_trc_cache_probe(priv, 0, 0x3fffffff, 16) >> 4;
 
 	/* Clear any ECC errors detected while probing! */
 	writel(0, priv->base + EIP197_TRC_ECCCTRL);
 
+	/* Sanity check probing results */
+	if (dsize < EIP197_MIN_DSIZE || asize < EIP197_MIN_ASIZE) {
+		dev_err(priv->dev, "Record cache probing failed (%d,%d).",
+			dsize, asize);
+		return -ENODEV;
+	}
+
 	/*
 	 * Determine optimal configuration from RAM sizes
 	 * Note that we assume that the physical RAM configuration is sane
@@ -221,9 +233,9 @@ static void eip197_trc_cache_init(struct safexcel_crypto_priv *priv)
 	/* Step #3: Determine log2 of hash table size */
 	cs_ht_sz = __fls(asize - cs_rc_max) - 2;
 	/* Step #4: determine current size of hash table in dwords */
-	cs_ht_wc = 16<<cs_ht_sz; /* dwords, not admin words */
+	cs_ht_wc = 16 << cs_ht_sz; /* dwords, not admin words */
 	/* Step #5: add back excess words and see if we can fit more records */
-	cs_rc_max = min_t(uint, cs_rc_abs_max, asize - (cs_ht_wc >> 4));
+	cs_rc_max = min_t(uint, cs_rc_abs_max, asize - (cs_ht_wc >> 2));
 
 	/* Clear the cache RAMs */
 	eip197_trc_cache_clear(priv, cs_rc_max, cs_ht_wc);
@@ -251,6 +263,7 @@ static void eip197_trc_cache_init(struct safexcel_crypto_priv *priv)
 
 	dev_info(priv->dev, "TRC init: %dd,%da (%dr,%dh)\n",
 		 dsize, asize, cs_rc_max, cs_ht_wc + cs_ht_wc);
+	return 0;
 }
 
 static void eip197_init_firmware(struct safexcel_crypto_priv *priv)
@@ -298,13 +311,14 @@ static void eip197_init_firmware(struct safexcel_crypto_priv *priv)
 static int eip197_write_firmware(struct safexcel_crypto_priv *priv,
 				  const struct firmware *fw)
 {
-	const u32 *data = (const u32 *)fw->data;
+	const __be32 *data = (const __be32 *)fw->data;
 	int i;
 
 	/* Write the firmware */
 	for (i = 0; i < fw->size / sizeof(u32); i++)
 		writel(be32_to_cpu(data[i]),
-		       priv->base + EIP197_CLASSIFICATION_RAMS + i * sizeof(u32));
+		       priv->base + EIP197_CLASSIFICATION_RAMS +
+		       i * sizeof(__be32));
 
 	/* Exclude final 2 NOPs from size */
 	return i - EIP197_FW_TERMINAL_NOPS;
@@ -471,6 +485,14 @@ static int safexcel_hw_setup_cdesc_rings(struct safexcel_crypto_priv *priv)
 		cd_fetch_cnt = ((1 << priv->hwconfig.hwcfsize) /
 				cd_size_rnd) - 1;
 	}
+	/*
+	 * Since we're using command desc's way larger than formally specified,
+	 * we need to check whether we can fit even 1 for low-end EIP196's!
+	 */
+	if (!cd_fetch_cnt) {
+		dev_err(priv->dev, "Unable to fit even 1 command desc!\n");
+		return -ENODEV;
+	}
 
 	for (i = 0; i < priv->config.rings; i++) {
 		/* ring base address */
@@ -479,12 +501,12 @@ static int safexcel_hw_setup_cdesc_rings(struct safexcel_crypto_priv *priv)
 		writel(upper_32_bits(priv->ring[i].cdr.base_dma),
 		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
 
-		writel(EIP197_xDR_DESC_MODE_64BIT | (priv->config.cd_offset << 16) |
-		       priv->config.cd_size,
+		writel(EIP197_xDR_DESC_MODE_64BIT | EIP197_CDR_DESC_MODE_ADCP |
+		       (priv->config.cd_offset << 14) | priv->config.cd_size,
 		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_DESC_SIZE);
 		writel(((cd_fetch_cnt *
 			 (cd_size_rnd << priv->hwconfig.hwdataw)) << 16) |
-		       (cd_fetch_cnt * priv->config.cd_offset),
+		       (cd_fetch_cnt * (priv->config.cd_offset / sizeof(u32))),
 		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_CFG);
 
 		/* Configure DMA tx control */
@@ -527,13 +549,13 @@ static int safexcel_hw_setup_rdesc_rings(struct safexcel_crypto_priv *priv)
 		writel(upper_32_bits(priv->ring[i].rdr.base_dma),
 		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
 
-		writel(EIP197_xDR_DESC_MODE_64BIT | (priv->config.rd_offset << 16) |
+		writel(EIP197_xDR_DESC_MODE_64BIT | (priv->config.rd_offset << 14) |
 		       priv->config.rd_size,
 		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_DESC_SIZE);
 
 		writel(((rd_fetch_cnt *
 			 (rd_size_rnd << priv->hwconfig.hwdataw)) << 16) |
-		       (rd_fetch_cnt * priv->config.rd_offset),
+		       (rd_fetch_cnt * (priv->config.rd_offset / sizeof(u32))),
 		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_CFG);
 
 		/* Configure DMA tx control */
@@ -559,7 +581,7 @@ static int safexcel_hw_setup_rdesc_rings(struct safexcel_crypto_priv *priv)
 static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 {
 	u32 val;
-	int i, ret, pe;
+	int i, ret, pe, opbuflo, opbufhi;
 
 	dev_dbg(priv->dev, "HW init: using %d pipe(s) and %d ring(s)\n",
 		priv->config.pes, priv->config.rings);
@@ -595,8 +617,8 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 		writel(EIP197_DxE_THR_CTRL_RESET_PE,
 		       EIP197_HIA_DFE_THR(priv) + EIP197_HIA_DFE_THR_CTRL(pe));
 
-		if (priv->flags & SAFEXCEL_HW_EIP197)
-			/* Reset HIA input interface arbiter (EIP197 only) */
+		if (priv->flags & EIP197_PE_ARB)
+			/* Reset HIA input interface arbiter (if present) */
 			writel(EIP197_HIA_RA_PE_CTRL_RESET,
 			       EIP197_HIA_AIC(priv) + EIP197_HIA_RA_PE_CTRL(pe));
 
@@ -639,9 +661,16 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 			;
 
 		/* DMA transfer size to use */
+		if (priv->hwconfig.hwnumpes > 4) {
+			opbuflo = 9;
+			opbufhi = 10;
+		} else {
+			opbuflo = 7;
+			opbufhi = 8;
+		}
 		val = EIP197_HIA_DSE_CFG_DIS_DEBUG;
-		val |= EIP197_HIA_DxE_CFG_MIN_DATA_SIZE(7) |
-		       EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(8);
+		val |= EIP197_HIA_DxE_CFG_MIN_DATA_SIZE(opbuflo) |
+		       EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(opbufhi);
 		val |= EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(WR_CACHE_3BITS);
 		val |= EIP197_HIA_DSE_CFG_ALWAYS_BUFFERABLE;
 		/* FIXME: instability issues can occur for EIP97 but disabling
@@ -655,8 +684,8 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 		writel(0, EIP197_HIA_DSE_THR(priv) + EIP197_HIA_DSE_THR_CTRL(pe));
 
 		/* Configure the procesing engine thresholds */
-		writel(EIP197_PE_OUT_DBUF_THRES_MIN(7) |
-		       EIP197_PE_OUT_DBUF_THRES_MAX(8),
+		writel(EIP197_PE_OUT_DBUF_THRES_MIN(opbuflo) |
+		       EIP197_PE_OUT_DBUF_THRES_MAX(opbufhi),
 		       EIP197_PE(priv) + EIP197_PE_OUT_DBUF_THRES(pe));
 
 		/* Processing Engine configuration */
@@ -696,7 +725,7 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 		writel(0,
 		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_PROC_PNTR);
 
-		writel((EIP197_DEFAULT_RING_SIZE * priv->config.cd_offset) << 2,
+		writel((EIP197_DEFAULT_RING_SIZE * priv->config.cd_offset),
 		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_RING_SIZE);
 	}
 
@@ -719,7 +748,7 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_PROC_PNTR);
 
 		/* Ring size */
-		writel((EIP197_DEFAULT_RING_SIZE * priv->config.rd_offset) << 2,
+		writel((EIP197_DEFAULT_RING_SIZE * priv->config.rd_offset),
 		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_RING_SIZE);
 	}
 
@@ -736,19 +765,28 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 	/* Clear any HIA interrupt */
 	writel(GENMASK(30, 20), EIP197_HIA_AIC_G(priv) + EIP197_HIA_AIC_G_ACK);
 
-	if (priv->flags & SAFEXCEL_HW_EIP197) {
-		eip197_trc_cache_init(priv);
-		priv->flags |= EIP197_TRC_CACHE;
+	if (priv->flags & EIP197_SIMPLE_TRC) {
+		writel(EIP197_STRC_CONFIG_INIT |
+		       EIP197_STRC_CONFIG_LARGE_REC(EIP197_CS_TRC_REC_WC) |
+		       EIP197_STRC_CONFIG_SMALL_REC(EIP197_CS_TRC_REC_WC),
+		       priv->base + EIP197_STRC_CONFIG);
+		writel(EIP197_PE_EIP96_TOKEN_CTRL2_CTX_DONE,
+		       EIP197_PE(priv) + EIP197_PE_EIP96_TOKEN_CTRL2(0));
+	} else if (priv->flags & SAFEXCEL_HW_EIP197) {
+		ret = eip197_trc_cache_init(priv);
+		if (ret)
+			return ret;
+	}
 
+	if (priv->flags & EIP197_ICE) {
 		ret = eip197_load_firmwares(priv);
 		if (ret)
 			return ret;
 	}
 
-	safexcel_hw_setup_cdesc_rings(priv);
-	safexcel_hw_setup_rdesc_rings(priv);
-
-	return 0;
+	return safexcel_hw_setup_cdesc_rings(priv) ?:
+	       safexcel_hw_setup_rdesc_rings(priv) ?:
+	       0;
 }
 
 /* Called with ring's lock taken */
@@ -836,20 +874,24 @@ finalize:
 	spin_unlock_bh(&priv->ring[ring].lock);
 
 	/* let the RDR know we have pending descriptors */
-	writel((rdesc * priv->config.rd_offset) << 2,
+	writel((rdesc * priv->config.rd_offset),
 	       EIP197_HIA_RDR(priv, ring) + EIP197_HIA_xDR_PREP_COUNT);
 
 	/* let the CDR know we have pending descriptors */
-	writel((cdesc * priv->config.cd_offset) << 2,
+	writel((cdesc * priv->config.cd_offset),
 	       EIP197_HIA_CDR(priv, ring) + EIP197_HIA_xDR_PREP_COUNT);
 }
 
 inline int safexcel_rdesc_check_errors(struct safexcel_crypto_priv *priv,
-				       struct safexcel_result_desc *rdesc)
+				       void *rdp)
 {
-	if (likely((!rdesc->descriptor_overflow) &&
-		   (!rdesc->buffer_overflow) &&
-		   (!rdesc->result_data.error_code)))
+	struct safexcel_result_desc *rdesc = rdp;
+	struct result_data_desc *result_data = rdp + priv->config.res_offset;
+
+	if (likely((!rdesc->last_seg) || /* Rest only valid if last seg! */
+		   ((!rdesc->descriptor_overflow) &&
+		    (!rdesc->buffer_overflow) &&
+		    (!result_data->error_code))))
 		return 0;
 
 	if (rdesc->descriptor_overflow)
@@ -858,13 +900,14 @@ inline int safexcel_rdesc_check_errors(struct safexcel_crypto_priv *priv,
 	if (rdesc->buffer_overflow)
 		dev_err(priv->dev, "Buffer overflow detected");
 
-	if (rdesc->result_data.error_code & 0x4066) {
+	if (result_data->error_code & 0x4066) {
 		/* Fatal error (bits 1,2,5,6 & 14) */
 		dev_err(priv->dev,
 			"result descriptor error (%x)",
-			rdesc->result_data.error_code);
+			result_data->error_code);
+
 		return -EIO;
-	} else if (rdesc->result_data.error_code &
+	} else if (result_data->error_code &
 		   (BIT(7) | BIT(4) | BIT(3) | BIT(0))) {
 		/*
 		 * Give priority over authentication fails:
@@ -872,7 +915,7 @@ inline int safexcel_rdesc_check_errors(struct safexcel_crypto_priv *priv,
 		 * something wrong with the input!
 		 */
 		return -EINVAL;
-	} else if (rdesc->result_data.error_code & BIT(9)) {
+	} else if (result_data->error_code & BIT(9)) {
 		/* Authentication failed */
 		return -EBADMSG;
 	}
@@ -931,16 +974,18 @@ int safexcel_invalidate_cache(struct crypto_async_request *async,
 {
 	struct safexcel_command_desc *cdesc;
 	struct safexcel_result_desc *rdesc;
+	struct safexcel_token  *dmmy;
 	int ret = 0;
 
 	/* Prepare command descriptor */
-	cdesc = safexcel_add_cdesc(priv, ring, true, true, 0, 0, 0, ctxr_dma);
+	cdesc = safexcel_add_cdesc(priv, ring, true, true, 0, 0, 0, ctxr_dma,
+				   &dmmy);
 	if (IS_ERR(cdesc))
 		return PTR_ERR(cdesc);
 
 	cdesc->control_data.type = EIP197_TYPE_EXTENDED;
 	cdesc->control_data.options = 0;
-	cdesc->control_data.refresh = 0;
+	cdesc->control_data.context_lo &= ~EIP197_CONTEXT_SIZE_MASK;
 	cdesc->control_data.control0 = CONTEXT_CONTROL_INV_TR;
 
 	/* Prepare result descriptor */
@@ -1003,7 +1048,7 @@ handle_results:
 acknowledge:
 	if (i)
 		writel(EIP197_xDR_PROC_xD_PKT(i) |
-		       EIP197_xDR_PROC_xD_COUNT(tot_descs * priv->config.rd_offset),
+		       (tot_descs * priv->config.rd_offset),
 		       EIP197_HIA_RDR(priv, ring) + EIP197_HIA_xDR_PROC_COUNT);
 
 	/* If the number of requests overflowed the counter, try to proceed more
@@ -1120,6 +1165,8 @@ static int safexcel_request_ring_irq(void *pdev, int irqid,
 				irq_name, irq);
 			return irq;
 		}
+	} else {
+		return -ENXIO;
 	}
 
 	ret = devm_request_threaded_irq(dev, irq, handler,
@@ -1169,6 +1216,44 @@ static struct safexcel_alg_template *safexcel_algs[] = {
 	&safexcel_alg_xts_aes,
 	&safexcel_alg_gcm,
 	&safexcel_alg_ccm,
+	&safexcel_alg_crc32,
+	&safexcel_alg_cbcmac,
+	&safexcel_alg_xcbcmac,
+	&safexcel_alg_cmac,
+	&safexcel_alg_chacha20,
+	&safexcel_alg_chachapoly,
+	&safexcel_alg_chachapoly_esp,
+	&safexcel_alg_sm3,
+	&safexcel_alg_hmac_sm3,
+	&safexcel_alg_ecb_sm4,
+	&safexcel_alg_cbc_sm4,
+	&safexcel_alg_ofb_sm4,
+	&safexcel_alg_cfb_sm4,
+	&safexcel_alg_ctr_sm4,
+	&safexcel_alg_authenc_hmac_sha1_cbc_sm4,
+	&safexcel_alg_authenc_hmac_sm3_cbc_sm4,
+	&safexcel_alg_authenc_hmac_sha1_ctr_sm4,
+	&safexcel_alg_authenc_hmac_sm3_ctr_sm4,
+	&safexcel_alg_sha3_224,
+	&safexcel_alg_sha3_256,
+	&safexcel_alg_sha3_384,
+	&safexcel_alg_sha3_512,
+	&safexcel_alg_hmac_sha3_224,
+	&safexcel_alg_hmac_sha3_256,
+	&safexcel_alg_hmac_sha3_384,
+	&safexcel_alg_hmac_sha3_512,
+	&safexcel_alg_authenc_hmac_sha1_cbc_des,
+	&safexcel_alg_authenc_hmac_sha256_cbc_des3_ede,
+	&safexcel_alg_authenc_hmac_sha224_cbc_des3_ede,
+	&safexcel_alg_authenc_hmac_sha512_cbc_des3_ede,
+	&safexcel_alg_authenc_hmac_sha384_cbc_des3_ede,
+	&safexcel_alg_authenc_hmac_sha256_cbc_des,
+	&safexcel_alg_authenc_hmac_sha224_cbc_des,
+	&safexcel_alg_authenc_hmac_sha512_cbc_des,
+	&safexcel_alg_authenc_hmac_sha384_cbc_des,
+	&safexcel_alg_rfc4106_gcm,
+	&safexcel_alg_rfc4543_gcm,
+	&safexcel_alg_rfc4309_ccm,
 };
 
 static int safexcel_register_algorithms(struct safexcel_crypto_priv *priv)
@@ -1238,30 +1323,30 @@ static void safexcel_unregister_algorithms(struct safexcel_crypto_priv *priv)
 
 static void safexcel_configure(struct safexcel_crypto_priv *priv)
 {
-	u32 val, mask = 0;
-
-	val = readl(EIP197_HIA_AIC_G(priv) + EIP197_HIA_OPTIONS);
-
-	/* Read number of PEs from the engine */
-	if (priv->flags & SAFEXCEL_HW_EIP197)
-		/* Wider field width for all EIP197 type engines */
-		mask = EIP197_N_PES_MASK;
-	else
-		/* Narrow field width for EIP97 type engine */
-		mask = EIP97_N_PES_MASK;
+	u32 mask = BIT(priv->hwconfig.hwdataw) - 1;
 
-	priv->config.pes = (val >> EIP197_N_PES_OFFSET) & mask;
+	priv->config.pes = priv->hwconfig.hwnumpes;
+	priv->config.rings = min_t(u32, priv->hwconfig.hwnumrings, max_rings);
+	/* Cannot currently support more rings than we have ring AICs! */
+	priv->config.rings = min_t(u32, priv->config.rings,
+					priv->hwconfig.hwnumraic);
 
-	priv->config.rings = min_t(u32, val & GENMASK(3, 0), max_rings);
-
-	val = (val & GENMASK(27, 25)) >> 25;
-	mask = BIT(val) - 1;
-
-	priv->config.cd_size = (sizeof(struct safexcel_command_desc) / sizeof(u32));
+	priv->config.cd_size = EIP197_CD64_FETCH_SIZE;
 	priv->config.cd_offset = (priv->config.cd_size + mask) & ~mask;
+	priv->config.cdsh_offset = (EIP197_MAX_TOKENS + mask) & ~mask;
 
-	priv->config.rd_size = (sizeof(struct safexcel_result_desc) / sizeof(u32));
+	/* res token is behind the descr, but ofs must be rounded to buswdth */
+	priv->config.res_offset = (EIP197_RD64_FETCH_SIZE + mask) & ~mask;
+	/* now the size of the descr is this 1st part plus the result struct */
+	priv->config.rd_size    = priv->config.res_offset +
+				  EIP197_RD64_RESULT_SIZE;
 	priv->config.rd_offset = (priv->config.rd_size + mask) & ~mask;
+
+	/* convert dwords to bytes */
+	priv->config.cd_offset *= sizeof(u32);
+	priv->config.cdsh_offset *= sizeof(u32);
+	priv->config.rd_offset *= sizeof(u32);
+	priv->config.res_offset *= sizeof(u32);
 }
 
 static void safexcel_init_register_offsets(struct safexcel_crypto_priv *priv)
@@ -1307,7 +1392,7 @@ static int safexcel_probe_generic(void *pdev,
 				  int is_pci_dev)
 {
 	struct device *dev = priv->dev;
-	u32 peid, version, mask, val, hiaopt;
+	u32 peid, version, mask, val, hiaopt, hwopt, peopt;
 	int i, ret, hwctg;
 
 	priv->context_pool = dmam_pool_create("safexcel-context", dev,
@@ -1369,13 +1454,16 @@ static int safexcel_probe_generic(void *pdev,
 	 */
 	version = readl(EIP197_GLOBAL(priv) + EIP197_VERSION);
 	if (((priv->flags & SAFEXCEL_HW_EIP197) &&
-	     (EIP197_REG_LO16(version) != EIP197_VERSION_LE)) ||
+	     (EIP197_REG_LO16(version) != EIP197_VERSION_LE) &&
+	     (EIP197_REG_LO16(version) != EIP196_VERSION_LE)) ||
 	    ((!(priv->flags & SAFEXCEL_HW_EIP197) &&
 	     (EIP197_REG_LO16(version) != EIP97_VERSION_LE)))) {
 		/*
 		 * We did not find the device that matched our initial probing
 		 * (or our initial probing failed) Report appropriate error.
 		 */
+		dev_err(priv->dev, "Probing for EIP97/EIP19x failed - no such device (read %08x)\n",
+			version);
 		return -ENODEV;
 	}
 
@@ -1383,6 +1471,14 @@ static int safexcel_probe_generic(void *pdev,
 	hwctg = version >> 28;
 	peid = version & 255;
 
+	/* Detect EIP206 processing pipe */
+	version = readl(EIP197_PE(priv) + + EIP197_PE_VERSION(0));
+	if (EIP197_REG_LO16(version) != EIP206_VERSION_LE) {
+		dev_err(priv->dev, "EIP%d: EIP206 not detected\n", peid);
+		return -ENODEV;
+	}
+	priv->hwconfig.ppver = EIP197_VERSION_MASK(version);
+
 	/* Detect EIP96 packet engine and version */
 	version = readl(EIP197_PE(priv) + EIP197_PE_EIP96_VERSION(0));
 	if (EIP197_REG_LO16(version) != EIP96_VERSION_LE) {
@@ -1391,10 +1487,13 @@ static int safexcel_probe_generic(void *pdev,
 	}
 	priv->hwconfig.pever = EIP197_VERSION_MASK(version);
 
+	hwopt = readl(EIP197_GLOBAL(priv) + EIP197_OPTIONS);
 	hiaopt = readl(EIP197_HIA_AIC(priv) + EIP197_HIA_OPTIONS);
 
 	if (priv->flags & SAFEXCEL_HW_EIP197) {
 		/* EIP197 */
+		peopt = readl(EIP197_PE(priv) + EIP197_PE_OPTIONS(0));
+
 		priv->hwconfig.hwdataw  = (hiaopt >> EIP197_HWDATAW_OFFSET) &
 					  EIP197_HWDATAW_MASK;
 		priv->hwconfig.hwcfsize = ((hiaopt >> EIP197_CFSIZE_OFFSET) &
@@ -1403,6 +1502,19 @@ static int safexcel_probe_generic(void *pdev,
 		priv->hwconfig.hwrfsize = ((hiaopt >> EIP197_RFSIZE_OFFSET) &
 					   EIP197_RFSIZE_MASK) +
 					  EIP197_RFSIZE_ADJUST;
+		priv->hwconfig.hwnumpes	= (hiaopt >> EIP197_N_PES_OFFSET) &
+					  EIP197_N_PES_MASK;
+		priv->hwconfig.hwnumrings = (hiaopt >> EIP197_N_RINGS_OFFSET) &
+					    EIP197_N_RINGS_MASK;
+		if (hiaopt & EIP197_HIA_OPT_HAS_PE_ARB)
+			priv->flags |= EIP197_PE_ARB;
+		if (EIP206_OPT_ICE_TYPE(peopt) == 1)
+			priv->flags |= EIP197_ICE;
+		/* If not a full TRC, then assume simple TRC */
+		if (!(hwopt & EIP197_OPT_HAS_TRC))
+			priv->flags |= EIP197_SIMPLE_TRC;
+		/* EIP197 always has SOME form of TRC */
+		priv->flags |= EIP197_TRC_CACHE;
 	} else {
 		/* EIP97 */
 		priv->hwconfig.hwdataw  = (hiaopt >> EIP197_HWDATAW_OFFSET) &
@@ -1411,6 +1523,23 @@ static int safexcel_probe_generic(void *pdev,
 					  EIP97_CFSIZE_MASK;
 		priv->hwconfig.hwrfsize = (hiaopt >> EIP97_RFSIZE_OFFSET) &
 					  EIP97_RFSIZE_MASK;
+		priv->hwconfig.hwnumpes	= 1; /* by definition */
+		priv->hwconfig.hwnumrings = (hiaopt >> EIP197_N_RINGS_OFFSET) &
+					    EIP197_N_RINGS_MASK;
+	}
+
+	/* Scan for ring AIC's */
+	for (i = 0; i < EIP197_MAX_RING_AIC; i++) {
+		version = readl(EIP197_HIA_AIC_R(priv) +
+				EIP197_HIA_AIC_R_VERSION(i));
+		if (EIP197_REG_LO16(version) != EIP201_VERSION_LE)
+			break;
+	}
+	priv->hwconfig.hwnumraic = i;
+	/* Low-end EIP196 may not have any ring AIC's ... */
+	if (!priv->hwconfig.hwnumraic) {
+		dev_err(priv->dev, "No ring interrupt controller present!\n");
+		return -ENODEV;
 	}
 
 	/* Get supported algorithms from EIP96 transform engine */
@@ -1418,10 +1547,12 @@ static int safexcel_probe_generic(void *pdev,
 				    EIP197_PE_EIP96_OPTIONS(0));
 
 	/* Print single info line describing what we just detected */
-	dev_info(priv->dev, "EIP%d:%x(%d)-HIA:%x(%d,%d,%d),PE:%x,alg:%08x\n",
-		 peid, priv->hwconfig.hwver, hwctg, priv->hwconfig.hiaver,
-		 priv->hwconfig.hwdataw, priv->hwconfig.hwcfsize,
-		 priv->hwconfig.hwrfsize, priv->hwconfig.pever,
+	dev_info(priv->dev, "EIP%d:%x(%d,%d,%d,%d)-HIA:%x(%d,%d,%d),PE:%x/%x,alg:%08x\n",
+		 peid, priv->hwconfig.hwver, hwctg, priv->hwconfig.hwnumpes,
+		 priv->hwconfig.hwnumrings, priv->hwconfig.hwnumraic,
+		 priv->hwconfig.hiaver, priv->hwconfig.hwdataw,
+		 priv->hwconfig.hwcfsize, priv->hwconfig.hwrfsize,
+		 priv->hwconfig.ppver, priv->hwconfig.pever,
 		 priv->hwconfig.algo_flags);
 
 	safexcel_configure(priv);
@@ -1545,7 +1676,6 @@ static void safexcel_hw_reset_rings(struct safexcel_crypto_priv *priv)
 	}
 }
 
-#if IS_ENABLED(CONFIG_OF)
 /* for Device Tree platform driver */
 
 static int safexcel_probe(struct platform_device *pdev)
@@ -1623,6 +1753,7 @@ static int safexcel_remove(struct platform_device *pdev)
 	safexcel_unregister_algorithms(priv);
 	safexcel_hw_reset_rings(priv);
 
+	clk_disable_unprepare(priv->reg_clk);
 	clk_disable_unprepare(priv->clk);
 
 	for (i = 0; i < priv->config.rings; i++)
@@ -1664,9 +1795,7 @@ static struct platform_driver  crypto_safexcel = {
 		.of_match_table = safexcel_of_match_table,
 	},
 };
-#endif
 
-#if IS_ENABLED(CONFIG_PCI)
 /* PCIE devices - i.e. Inside Secure development boards */
 
 static int safexcel_pci_probe(struct pci_dev *pdev,
@@ -1757,7 +1886,7 @@ static int safexcel_pci_probe(struct pci_dev *pdev,
 	return rc;
 }
 
-void safexcel_pci_remove(struct pci_dev *pdev)
+static void safexcel_pci_remove(struct pci_dev *pdev)
 {
 	struct safexcel_crypto_priv *priv = pci_get_drvdata(pdev);
 	int i;
@@ -1787,54 +1916,32 @@ static struct pci_driver safexcel_pci_driver = {
 	.probe         = safexcel_pci_probe,
 	.remove        = safexcel_pci_remove,
 };
-#endif
-
-/* Unfortunately, we have to resort to global variables here */
-#if IS_ENABLED(CONFIG_PCI)
-int pcireg_rc = -EINVAL; /* Default safe value */
-#endif
-#if IS_ENABLED(CONFIG_OF)
-int ofreg_rc = -EINVAL; /* Default safe value */
-#endif
 
 static int __init safexcel_init(void)
 {
-#if IS_ENABLED(CONFIG_PCI)
+	int ret;
+
 	/* Register PCI driver */
-	pcireg_rc = pci_register_driver(&safexcel_pci_driver);
-#endif
+	ret = pci_register_driver(&safexcel_pci_driver);
 
-#if IS_ENABLED(CONFIG_OF)
 	/* Register platform driver */
-	ofreg_rc = platform_driver_register(&crypto_safexcel);
- #if IS_ENABLED(CONFIG_PCI)
-	/* Return success if either PCI or OF registered OK */
-	return pcireg_rc ? ofreg_rc : 0;
- #else
-	return ofreg_rc;
- #endif
-#else
- #if IS_ENABLED(CONFIG_PCI)
-	return pcireg_rc;
- #else
-	return -EINVAL;
- #endif
-#endif
+	if (IS_ENABLED(CONFIG_OF) && !ret) {
+		ret = platform_driver_register(&crypto_safexcel);
+		if (ret)
+			pci_unregister_driver(&safexcel_pci_driver);
+	}
+
+	return ret;
 }
 
 static void __exit safexcel_exit(void)
 {
-#if IS_ENABLED(CONFIG_OF)
 	/* Unregister platform driver */
-	if (!ofreg_rc)
+	if (IS_ENABLED(CONFIG_OF))
 		platform_driver_unregister(&crypto_safexcel);
-#endif
 
-#if IS_ENABLED(CONFIG_PCI)
 	/* Unregister PCI driver if successfully registered before */
-	if (!pcireg_rc)
-		pci_unregister_driver(&safexcel_pci_driver);
-#endif
+	pci_unregister_driver(&safexcel_pci_driver);
 }
 
 module_init(safexcel_init);
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
index 930cc48a6f85..94016c505abb 100644
--- a/drivers/crypto/inside-secure/safexcel.h
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -17,8 +17,11 @@
 #define EIP197_HIA_VERSION_BE			0xca35
 #define EIP197_HIA_VERSION_LE			0x35ca
 #define EIP97_VERSION_LE			0x9e61
+#define EIP196_VERSION_LE			0x3bc4
 #define EIP197_VERSION_LE			0x3ac5
 #define EIP96_VERSION_LE			0x9f60
+#define EIP201_VERSION_LE			0x36c9
+#define EIP206_VERSION_LE			0x31ce
 #define EIP197_REG_LO16(reg)			(reg & 0xffff)
 #define EIP197_REG_HI16(reg)			((reg >> 16) & 0xffff)
 #define EIP197_VERSION_MASK(reg)		((reg >> 16) & 0xfff)
@@ -26,12 +29,23 @@
 						((reg >> 4) & 0xf0) | \
 						((reg >> 12) & 0xf))
 
+/* EIP197 HIA OPTIONS ENCODING */
+#define EIP197_HIA_OPT_HAS_PE_ARB		BIT(29)
+
+/* EIP206 OPTIONS ENCODING */
+#define EIP206_OPT_ICE_TYPE(n)			((n>>8)&3)
+
+/* EIP197 OPTIONS ENCODING */
+#define EIP197_OPT_HAS_TRC			BIT(31)
+
 /* Static configuration */
 #define EIP197_DEFAULT_RING_SIZE		400
-#define EIP197_MAX_TOKENS			18
+#define EIP197_EMB_TOKENS			4 /* Pad CD to 16 dwords */
+#define EIP197_MAX_TOKENS			16
 #define EIP197_MAX_RINGS			4
 #define EIP197_FETCH_DEPTH			2
 #define EIP197_MAX_BATCH_SZ			64
+#define EIP197_MAX_RING_AIC			14
 
 #define EIP197_GFP_FLAGS(base)	((base).flags & CRYPTO_TFM_REQ_MAY_SLEEP ? \
 				 GFP_KERNEL : GFP_ATOMIC)
@@ -138,6 +152,7 @@
 #define EIP197_HIA_AIC_R_ENABLED_STAT(r)	(0xe010 - EIP197_HIA_AIC_R_OFF(r))
 #define EIP197_HIA_AIC_R_ACK(r)			(0xe010 - EIP197_HIA_AIC_R_OFF(r))
 #define EIP197_HIA_AIC_R_ENABLE_CLR(r)		(0xe014 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_R_VERSION(r)		(0xe01c - EIP197_HIA_AIC_R_OFF(r))
 #define EIP197_HIA_AIC_G_ENABLE_CTRL		0xf808
 #define EIP197_HIA_AIC_G_ENABLED_STAT		0xf810
 #define EIP197_HIA_AIC_G_ACK			0xf810
@@ -157,12 +172,16 @@
 #define EIP197_PE_EIP96_FUNCTION_EN(n)		(0x1004 + (0x2000 * (n)))
 #define EIP197_PE_EIP96_CONTEXT_CTRL(n)		(0x1008 + (0x2000 * (n)))
 #define EIP197_PE_EIP96_CONTEXT_STAT(n)		(0x100c + (0x2000 * (n)))
+#define EIP197_PE_EIP96_TOKEN_CTRL2(n)		(0x102c + (0x2000 * (n)))
 #define EIP197_PE_EIP96_FUNCTION2_EN(n)		(0x1030 + (0x2000 * (n)))
 #define EIP197_PE_EIP96_OPTIONS(n)		(0x13f8 + (0x2000 * (n)))
 #define EIP197_PE_EIP96_VERSION(n)		(0x13fc + (0x2000 * (n)))
 #define EIP197_PE_OUT_DBUF_THRES(n)		(0x1c00 + (0x2000 * (n)))
 #define EIP197_PE_OUT_TBUF_THRES(n)		(0x1d00 + (0x2000 * (n)))
+#define EIP197_PE_OPTIONS(n)			(0x1ff8 + (0x2000 * (n)))
+#define EIP197_PE_VERSION(n)			(0x1ffc + (0x2000 * (n)))
 #define EIP197_MST_CTRL				0xfff4
+#define EIP197_OPTIONS				0xfff8
 #define EIP197_VERSION				0xfffc
 
 /* EIP197-specific registers, no indirection */
@@ -178,6 +197,7 @@
 #define EIP197_TRC_ECCADMINSTAT			0xf0838
 #define EIP197_TRC_ECCDATASTAT			0xf083c
 #define EIP197_TRC_ECCDATA			0xf0840
+#define EIP197_STRC_CONFIG			0xf43f0
 #define EIP197_FLUE_CACHEBASE_LO(n)		(0xf6000 + (32 * (n)))
 #define EIP197_FLUE_CACHEBASE_HI(n)		(0xf6004 + (32 * (n)))
 #define EIP197_FLUE_CONFIG(n)			(0xf6010 + (32 * (n)))
@@ -188,6 +208,7 @@
 
 /* EIP197_HIA_xDR_DESC_SIZE */
 #define EIP197_xDR_DESC_MODE_64BIT		BIT(31)
+#define EIP197_CDR_DESC_MODE_ADCP		BIT(30)
 
 /* EIP197_HIA_xDR_DMA_CFG */
 #define EIP197_HIA_xDR_WR_RES_BUF		BIT(22)
@@ -213,7 +234,6 @@
 /* EIP197_HIA_xDR_PROC_COUNT */
 #define EIP197_xDR_PROC_xD_PKT_OFFSET		24
 #define EIP197_xDR_PROC_xD_PKT_MASK		GENMASK(6, 0)
-#define EIP197_xDR_PROC_xD_COUNT(n)		((n) << 2)
 #define EIP197_xDR_PROC_xD_PKT(n)		((n) << 24)
 #define EIP197_xDR_PROC_CLR_COUNT		BIT(31)
 
@@ -228,6 +248,8 @@
 #define EIP197_HIA_RA_PE_CTRL_EN		BIT(30)
 
 /* EIP197_HIA_OPTIONS */
+#define EIP197_N_RINGS_OFFSET			0
+#define EIP197_N_RINGS_MASK			GENMASK(3, 0)
 #define EIP197_N_PES_OFFSET			4
 #define EIP197_N_PES_MASK			GENMASK(4, 0)
 #define EIP97_N_PES_MASK			GENMASK(2, 0)
@@ -237,13 +259,13 @@
 #define EIP197_CFSIZE_OFFSET			9
 #define EIP197_CFSIZE_ADJUST			4
 #define EIP97_CFSIZE_OFFSET			8
-#define EIP197_CFSIZE_MASK			GENMASK(3, 0)
-#define EIP97_CFSIZE_MASK			GENMASK(4, 0)
+#define EIP197_CFSIZE_MASK			GENMASK(2, 0)
+#define EIP97_CFSIZE_MASK			GENMASK(3, 0)
 #define EIP197_RFSIZE_OFFSET			12
 #define EIP197_RFSIZE_ADJUST			4
 #define EIP97_RFSIZE_OFFSET			12
-#define EIP197_RFSIZE_MASK			GENMASK(3, 0)
-#define EIP97_RFSIZE_MASK			GENMASK(4, 0)
+#define EIP197_RFSIZE_MASK			GENMASK(2, 0)
+#define EIP97_RFSIZE_MASK			GENMASK(3, 0)
 
 /* EIP197_HIA_AIC_R_ENABLE_CTRL */
 #define EIP197_CDR_IRQ(n)			BIT((n) * 2)
@@ -257,9 +279,9 @@
 #define EIP197_HIA_DxE_CFG_MIN_CTRL_SIZE(n)	((n) << 16)
 #define EIP197_HIA_DxE_CFG_CTRL_CACHE_CTRL(n)	(((n) & 0x7) << 20)
 #define EIP197_HIA_DxE_CFG_MAX_CTRL_SIZE(n)	((n) << 24)
-#define EIP197_HIA_DFE_CFG_DIS_DEBUG		(BIT(31) | BIT(29))
+#define EIP197_HIA_DFE_CFG_DIS_DEBUG		GENMASK(31, 29)
 #define EIP197_HIA_DSE_CFG_EN_SINGLE_WR		BIT(29)
-#define EIP197_HIA_DSE_CFG_DIS_DEBUG		BIT(31)
+#define EIP197_HIA_DSE_CFG_DIS_DEBUG		GENMASK(31, 30)
 
 /* EIP197_HIA_DFE/DSE_THR_CTRL */
 #define EIP197_DxE_THR_CTRL_EN			BIT(30)
@@ -327,13 +349,21 @@
 #define EIP197_ADDRESS_MODE			BIT(8)
 #define EIP197_CONTROL_MODE			BIT(9)
 
+/* EIP197_PE_EIP96_TOKEN_CTRL2 */
+#define EIP197_PE_EIP96_TOKEN_CTRL2_CTX_DONE	BIT(3)
+
+/* EIP197_STRC_CONFIG */
+#define EIP197_STRC_CONFIG_INIT			BIT(31)
+#define EIP197_STRC_CONFIG_LARGE_REC(s)		(s<<8)
+#define EIP197_STRC_CONFIG_SMALL_REC(s)		(s<<0)
+
 /* EIP197_FLUE_CONFIG */
 #define EIP197_FLUE_CONFIG_MAGIC		0xc7000004
 
 /* Context Control */
 struct safexcel_context_record {
-	u32 control0;
-	u32 control1;
+	__le32 control0;
+	__le32 control1;
 
 	__le32 data[40];
 } __packed;
@@ -358,10 +388,14 @@ struct safexcel_context_record {
 #define CONTEXT_CONTROL_CRYPTO_ALG_AES128	(0x5 << 17)
 #define CONTEXT_CONTROL_CRYPTO_ALG_AES192	(0x6 << 17)
 #define CONTEXT_CONTROL_CRYPTO_ALG_AES256	(0x7 << 17)
+#define CONTEXT_CONTROL_CRYPTO_ALG_CHACHA20	(0x8 << 17)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SM4		(0xd << 17)
+#define CONTEXT_CONTROL_DIGEST_INITIAL		(0x0 << 21)
 #define CONTEXT_CONTROL_DIGEST_PRECOMPUTED	(0x1 << 21)
 #define CONTEXT_CONTROL_DIGEST_XCM		(0x2 << 21)
 #define CONTEXT_CONTROL_DIGEST_HMAC		(0x3 << 21)
 #define CONTEXT_CONTROL_CRYPTO_ALG_MD5		(0x0 << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_CRC32	(0x0 << 23)
 #define CONTEXT_CONTROL_CRYPTO_ALG_SHA1		(0x2 << 23)
 #define CONTEXT_CONTROL_CRYPTO_ALG_SHA224	(0x4 << 23)
 #define CONTEXT_CONTROL_CRYPTO_ALG_SHA256	(0x3 << 23)
@@ -371,17 +405,25 @@ struct safexcel_context_record {
 #define CONTEXT_CONTROL_CRYPTO_ALG_XCBC128	(0x1 << 23)
 #define CONTEXT_CONTROL_CRYPTO_ALG_XCBC192	(0x2 << 23)
 #define CONTEXT_CONTROL_CRYPTO_ALG_XCBC256	(0x3 << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SM3		(0x7 << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SHA3_256	(0xb << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SHA3_224	(0xc << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SHA3_512	(0xd << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SHA3_384	(0xe << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_POLY1305	(0xf << 23)
 #define CONTEXT_CONTROL_INV_FR			(0x5 << 24)
 #define CONTEXT_CONTROL_INV_TR			(0x6 << 24)
 
 /* control1 */
 #define CONTEXT_CONTROL_CRYPTO_MODE_ECB		(0 << 0)
 #define CONTEXT_CONTROL_CRYPTO_MODE_CBC		(1 << 0)
+#define CONTEXT_CONTROL_CHACHA20_MODE_256_32	(2 << 0)
 #define CONTEXT_CONTROL_CRYPTO_MODE_OFB		(4 << 0)
 #define CONTEXT_CONTROL_CRYPTO_MODE_CFB		(5 << 0)
 #define CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD	(6 << 0)
 #define CONTEXT_CONTROL_CRYPTO_MODE_XTS		(7 << 0)
 #define CONTEXT_CONTROL_CRYPTO_MODE_XCM		((6 << 0) | BIT(17))
+#define CONTEXT_CONTROL_CHACHA20_MODE_CALC_OTK	(12 << 0)
 #define CONTEXT_CONTROL_IV0			BIT(5)
 #define CONTEXT_CONTROL_IV1			BIT(6)
 #define CONTEXT_CONTROL_IV2			BIT(7)
@@ -394,6 +436,13 @@ struct safexcel_context_record {
 #define EIP197_XCM_MODE_GCM			1
 #define EIP197_XCM_MODE_CCM			2
 
+#define EIP197_AEAD_TYPE_IPSEC_ESP		2
+#define EIP197_AEAD_TYPE_IPSEC_ESP_GMAC		3
+#define EIP197_AEAD_IPSEC_IV_SIZE		8
+#define EIP197_AEAD_IPSEC_NONCE_SIZE		4
+#define EIP197_AEAD_IPSEC_COUNTER_SIZE		4
+#define EIP197_AEAD_IPSEC_CCM_NONCE_SIZE	3
+
 /* The hash counter given to the engine in the context has a granularity of
  * 64 bits.
  */
@@ -423,6 +472,8 @@ struct safexcel_context_record {
 #define EIP197_TRC_PARAMS2_RC_SZ_SMALL(n)	((n) << 18)
 
 /* Cache helpers */
+#define EIP197_MIN_DSIZE			1024
+#define EIP197_MIN_ASIZE			8
 #define EIP197_CS_TRC_REC_WC			64
 #define EIP197_CS_RC_SIZE			(4 * sizeof(u32))
 #define EIP197_CS_RC_NEXT(x)			(x)
@@ -447,7 +498,7 @@ struct result_data_desc {
 	u16 application_id;
 	u16 rsvd1;
 
-	u32 rsvd2;
+	u32 rsvd2[5];
 } __packed;
 
 
@@ -465,16 +516,15 @@ struct safexcel_result_desc {
 
 	u32 data_lo;
 	u32 data_hi;
-
-	struct result_data_desc result_data;
 } __packed;
 
 /*
  * The EIP(1)97 only needs to fetch the descriptor part of
  * the result descriptor, not the result token part!
  */
-#define EIP197_RD64_FETCH_SIZE		((sizeof(struct safexcel_result_desc) -\
-					  sizeof(struct result_data_desc)) /\
+#define EIP197_RD64_FETCH_SIZE		(sizeof(struct safexcel_result_desc) /\
+					 sizeof(u32))
+#define EIP197_RD64_RESULT_SIZE		(sizeof(struct result_data_desc) /\
 					 sizeof(u32))
 
 struct safexcel_token {
@@ -505,6 +555,8 @@ static inline void eip197_noop_token(struct safexcel_token *token)
 {
 	token->opcode = EIP197_TOKEN_OPCODE_NOOP;
 	token->packet_length = BIT(2);
+	token->stat = 0;
+	token->instructions = 0;
 }
 
 /* Instructions */
@@ -526,14 +578,13 @@ struct safexcel_control_data_desc {
 	u16 application_id;
 	u16 rsvd;
 
-	u8 refresh:2;
-	u32 context_lo:30;
+	u32 context_lo;
 	u32 context_hi;
 
 	u32 control0;
 	u32 control1;
 
-	u32 token[EIP197_MAX_TOKENS];
+	u32 token[EIP197_EMB_TOKENS];
 } __packed;
 
 #define EIP197_OPTION_MAGIC_VALUE	BIT(0)
@@ -543,7 +594,10 @@ struct safexcel_control_data_desc {
 #define EIP197_OPTION_2_TOKEN_IV_CMD	GENMASK(11, 10)
 #define EIP197_OPTION_4_TOKEN_IV_CMD	GENMASK(11, 9)
 
+#define EIP197_TYPE_BCLA		0x0
 #define EIP197_TYPE_EXTENDED		0x3
+#define EIP197_CONTEXT_SMALL		0x2
+#define EIP197_CONTEXT_SIZE_MASK	0x3
 
 /* Basic Command Descriptor format */
 struct safexcel_command_desc {
@@ -551,16 +605,22 @@ struct safexcel_command_desc {
 	u8 rsvd0:5;
 	u8 last_seg:1;
 	u8 first_seg:1;
-	u16 additional_cdata_size:8;
+	u8 additional_cdata_size:8;
 
 	u32 rsvd1;
 
 	u32 data_lo;
 	u32 data_hi;
 
+	u32 atok_lo;
+	u32 atok_hi;
+
 	struct safexcel_control_data_desc control_data;
 } __packed;
 
+#define EIP197_CD64_FETCH_SIZE		(sizeof(struct safexcel_command_desc) /\
+					sizeof(u32))
+
 /*
  * Internal structures & functions
  */
@@ -578,15 +638,20 @@ enum eip197_fw {
 
 struct safexcel_desc_ring {
 	void *base;
+	void *shbase;
 	void *base_end;
+	void *shbase_end;
 	dma_addr_t base_dma;
+	dma_addr_t shbase_dma;
 
 	/* write and read pointers */
 	void *write;
+	void *shwrite;
 	void *read;
 
 	/* descriptor element offset */
-	unsigned offset;
+	unsigned int offset;
+	unsigned int shoffset;
 };
 
 enum safexcel_alg_type {
@@ -601,9 +666,11 @@ struct safexcel_config {
 
 	u32 cd_size;
 	u32 cd_offset;
+	u32 cdsh_offset;
 
 	u32 rd_size;
 	u32 rd_offset;
+	u32 res_offset;
 };
 
 struct safexcel_work_data {
@@ -654,6 +721,12 @@ enum safexcel_eip_version {
 /* Priority we use for advertising our algorithms */
 #define SAFEXCEL_CRA_PRIORITY		300
 
+/* SM3 digest result for zero length message */
+#define EIP197_SM3_ZEROM_HASH	"\x1A\xB2\x1D\x83\x55\xCF\xA1\x7F" \
+				"\x8E\x61\x19\x48\x31\xE8\x1A\x8F" \
+				"\x22\xBE\xC8\xC7\x28\xFE\xFB\x74" \
+				"\x7E\xD0\x35\xEB\x50\x82\xAA\x2B"
+
 /* EIP algorithm presence flags */
 enum safexcel_eip_algorithms {
 	SAFEXCEL_ALG_BC0      = BIT(5),
@@ -697,16 +770,23 @@ struct safexcel_register_offsets {
 enum safexcel_flags {
 	EIP197_TRC_CACHE	= BIT(0),
 	SAFEXCEL_HW_EIP197	= BIT(1),
+	EIP197_PE_ARB		= BIT(2),
+	EIP197_ICE		= BIT(3),
+	EIP197_SIMPLE_TRC	= BIT(4),
 };
 
 struct safexcel_hwconfig {
 	enum safexcel_eip_algorithms algo_flags;
 	int hwver;
 	int hiaver;
+	int ppver;
 	int pever;
 	int hwdataw;
 	int hwcfsize;
 	int hwrfsize;
+	int hwnumpes;
+	int hwnumrings;
+	int hwnumraic;
 };
 
 struct safexcel_crypto_priv {
@@ -778,7 +858,7 @@ struct safexcel_inv_result {
 
 void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring);
 int safexcel_rdesc_check_errors(struct safexcel_crypto_priv *priv,
-				struct safexcel_result_desc *rdesc);
+				void *rdp);
 void safexcel_complete(struct safexcel_crypto_priv *priv, int ring);
 int safexcel_invalidate_cache(struct crypto_async_request *async,
 			      struct safexcel_crypto_priv *priv,
@@ -797,7 +877,8 @@ struct safexcel_command_desc *safexcel_add_cdesc(struct safexcel_crypto_priv *pr
 						 bool first, bool last,
 						 dma_addr_t data, u32 len,
 						 u32 full_data_len,
-						 dma_addr_t context);
+						 dma_addr_t context,
+						 struct safexcel_token **atoken);
 struct safexcel_result_desc *safexcel_add_rdesc(struct safexcel_crypto_priv *priv,
 						 int ring_id,
 						bool first, bool last,
@@ -853,5 +934,43 @@ extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_ctr_aes;
 extern struct safexcel_alg_template safexcel_alg_xts_aes;
 extern struct safexcel_alg_template safexcel_alg_gcm;
 extern struct safexcel_alg_template safexcel_alg_ccm;
+extern struct safexcel_alg_template safexcel_alg_crc32;
+extern struct safexcel_alg_template safexcel_alg_cbcmac;
+extern struct safexcel_alg_template safexcel_alg_xcbcmac;
+extern struct safexcel_alg_template safexcel_alg_cmac;
+extern struct safexcel_alg_template safexcel_alg_chacha20;
+extern struct safexcel_alg_template safexcel_alg_chachapoly;
+extern struct safexcel_alg_template safexcel_alg_chachapoly_esp;
+extern struct safexcel_alg_template safexcel_alg_sm3;
+extern struct safexcel_alg_template safexcel_alg_hmac_sm3;
+extern struct safexcel_alg_template safexcel_alg_ecb_sm4;
+extern struct safexcel_alg_template safexcel_alg_cbc_sm4;
+extern struct safexcel_alg_template safexcel_alg_ofb_sm4;
+extern struct safexcel_alg_template safexcel_alg_cfb_sm4;
+extern struct safexcel_alg_template safexcel_alg_ctr_sm4;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_sm4;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sm3_cbc_sm4;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_ctr_sm4;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sm3_ctr_sm4;
+extern struct safexcel_alg_template safexcel_alg_sha3_224;
+extern struct safexcel_alg_template safexcel_alg_sha3_256;
+extern struct safexcel_alg_template safexcel_alg_sha3_384;
+extern struct safexcel_alg_template safexcel_alg_sha3_512;
+extern struct safexcel_alg_template safexcel_alg_hmac_sha3_224;
+extern struct safexcel_alg_template safexcel_alg_hmac_sha3_256;
+extern struct safexcel_alg_template safexcel_alg_hmac_sha3_384;
+extern struct safexcel_alg_template safexcel_alg_hmac_sha3_512;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_des;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_cbc_des3_ede;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_cbc_des3_ede;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_cbc_des3_ede;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_cbc_des3_ede;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_cbc_des;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_cbc_des;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_cbc_des;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_cbc_des;
+extern struct safexcel_alg_template safexcel_alg_rfc4106_gcm;
+extern struct safexcel_alg_template safexcel_alg_rfc4543_gcm;
+extern struct safexcel_alg_template safexcel_alg_rfc4309_ccm;
 
 #endif
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index ef51f8c2b473..0c5e80c3f6e3 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -5,18 +5,22 @@
  * Antoine Tenart <antoine.tenart@free-electrons.com>
  */
 
+#include <asm/unaligned.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmapool.h>
-
 #include <crypto/aead.h>
 #include <crypto/aes.h>
 #include <crypto/authenc.h>
+#include <crypto/chacha.h>
 #include <crypto/ctr.h>
 #include <crypto/internal/des.h>
 #include <crypto/gcm.h>
 #include <crypto/ghash.h>
+#include <crypto/poly1305.h>
 #include <crypto/sha.h>
+#include <crypto/sm3.h>
+#include <crypto/sm4.h>
 #include <crypto/xts.h>
 #include <crypto/skcipher.h>
 #include <crypto/internal/aead.h>
@@ -33,6 +37,8 @@ enum safexcel_cipher_alg {
 	SAFEXCEL_DES,
 	SAFEXCEL_3DES,
 	SAFEXCEL_AES,
+	SAFEXCEL_CHACHA20,
+	SAFEXCEL_SM4,
 };
 
 struct safexcel_cipher_ctx {
@@ -41,8 +47,12 @@ struct safexcel_cipher_ctx {
 
 	u32 mode;
 	enum safexcel_cipher_alg alg;
-	bool aead;
-	int  xcm; /* 0=authenc, 1=GCM, 2 reserved for CCM */
+	u8 aead; /* !=0=AEAD, 2=IPSec ESP AEAD, 3=IPsec ESP GMAC */
+	u8 xcm;  /* 0=authenc, 1=GCM, 2 reserved for CCM */
+	u8 aadskip;
+	u8 blocksz;
+	u32 ivmask;
+	u32 ctrinit;
 
 	__le32 key[16];
 	u32 nonce;
@@ -51,10 +61,11 @@ struct safexcel_cipher_ctx {
 	/* All the below is AEAD specific */
 	u32 hash_alg;
 	u32 state_sz;
-	u32 ipad[SHA512_DIGEST_SIZE / sizeof(u32)];
-	u32 opad[SHA512_DIGEST_SIZE / sizeof(u32)];
+	__be32 ipad[SHA512_DIGEST_SIZE / sizeof(u32)];
+	__be32 opad[SHA512_DIGEST_SIZE / sizeof(u32)];
 
 	struct crypto_cipher *hkaes;
+	struct crypto_aead *fback;
 };
 
 struct safexcel_cipher_req {
@@ -65,206 +76,298 @@ struct safexcel_cipher_req {
 	int  nr_src, nr_dst;
 };
 
-static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
-				  struct safexcel_command_desc *cdesc)
+static int safexcel_skcipher_iv(struct safexcel_cipher_ctx *ctx, u8 *iv,
+				struct safexcel_command_desc *cdesc)
 {
-	u32 block_sz = 0;
-
 	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD) {
 		cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
-
 		/* 32 bit nonce */
 		cdesc->control_data.token[0] = ctx->nonce;
 		/* 64 bit IV part */
 		memcpy(&cdesc->control_data.token[1], iv, 8);
-		/* 32 bit counter, start at 1 (big endian!) */
-		cdesc->control_data.token[3] = cpu_to_be32(1);
-
-		return;
-	} else if (ctx->xcm == EIP197_XCM_MODE_GCM) {
-		cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
-
-		/* 96 bit IV part */
-		memcpy(&cdesc->control_data.token[0], iv, 12);
-		/* 32 bit counter, start at 1 (big endian!) */
-		cdesc->control_data.token[3] = cpu_to_be32(1);
-
-		return;
-	} else if (ctx->xcm == EIP197_XCM_MODE_CCM) {
+		/* 32 bit counter, start at 0 or 1 (big endian!) */
+		cdesc->control_data.token[3] =
+			(__force u32)cpu_to_be32(ctx->ctrinit);
+		return 4;
+	}
+	if (ctx->alg == SAFEXCEL_CHACHA20) {
 		cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
-
-		/* Variable length IV part */
-		memcpy(&cdesc->control_data.token[0], iv, 15 - iv[0]);
-		/* Start variable length counter at 0 */
-		memset((u8 *)&cdesc->control_data.token[0] + 15 - iv[0],
-		       0, iv[0] + 1);
-
-		return;
+		/* 96 bit nonce part */
+		memcpy(&cdesc->control_data.token[0], &iv[4], 12);
+		/* 32 bit counter */
+		cdesc->control_data.token[3] = *(u32 *)iv;
+		return 4;
 	}
 
-	if (ctx->mode != CONTEXT_CONTROL_CRYPTO_MODE_ECB) {
-		switch (ctx->alg) {
-		case SAFEXCEL_DES:
-			block_sz = DES_BLOCK_SIZE;
-			cdesc->control_data.options |= EIP197_OPTION_2_TOKEN_IV_CMD;
-			break;
-		case SAFEXCEL_3DES:
-			block_sz = DES3_EDE_BLOCK_SIZE;
-			cdesc->control_data.options |= EIP197_OPTION_2_TOKEN_IV_CMD;
-			break;
-		case SAFEXCEL_AES:
-			block_sz = AES_BLOCK_SIZE;
-			cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
-			break;
-		}
-		memcpy(cdesc->control_data.token, iv, block_sz);
-	}
+	cdesc->control_data.options |= ctx->ivmask;
+	memcpy(cdesc->control_data.token, iv, ctx->blocksz);
+	return ctx->blocksz / sizeof(u32);
 }
 
 static void safexcel_skcipher_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
 				    struct safexcel_command_desc *cdesc,
+				    struct safexcel_token *atoken,
 				    u32 length)
 {
 	struct safexcel_token *token;
+	int ivlen;
 
-	safexcel_cipher_token(ctx, iv, cdesc);
+	ivlen = safexcel_skcipher_iv(ctx, iv, cdesc);
+	if (ivlen == 4) {
+		/* No space in cdesc, instruction moves to atoken */
+		cdesc->additional_cdata_size = 1;
+		token = atoken;
+	} else {
+		/* Everything fits in cdesc */
+		token = (struct safexcel_token *)(cdesc->control_data.token + 2);
+		/* Need to pad with NOP */
+		eip197_noop_token(&token[1]);
+	}
 
-	/* skip over worst case IV of 4 dwords, no need to be exact */
-	token = (struct safexcel_token *)(cdesc->control_data.token + 4);
+	token->opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+	token->packet_length = length;
+	token->stat = EIP197_TOKEN_STAT_LAST_PACKET |
+		      EIP197_TOKEN_STAT_LAST_HASH;
+	token->instructions = EIP197_TOKEN_INS_LAST |
+			      EIP197_TOKEN_INS_TYPE_CRYPTO |
+			      EIP197_TOKEN_INS_TYPE_OUTPUT;
+}
 
-	token[0].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
-	token[0].packet_length = length;
-	token[0].stat = EIP197_TOKEN_STAT_LAST_PACKET |
-			EIP197_TOKEN_STAT_LAST_HASH;
-	token[0].instructions = EIP197_TOKEN_INS_LAST |
-				EIP197_TOKEN_INS_TYPE_CRYPTO |
-				EIP197_TOKEN_INS_TYPE_OUTPUT;
+static void safexcel_aead_iv(struct safexcel_cipher_ctx *ctx, u8 *iv,
+			     struct safexcel_command_desc *cdesc)
+{
+	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD ||
+	    ctx->aead & EIP197_AEAD_TYPE_IPSEC_ESP) { /* _ESP and _ESP_GMAC */
+		/* 32 bit nonce */
+		cdesc->control_data.token[0] = ctx->nonce;
+		/* 64 bit IV part */
+		memcpy(&cdesc->control_data.token[1], iv, 8);
+		/* 32 bit counter, start at 0 or 1 (big endian!) */
+		cdesc->control_data.token[3] =
+			(__force u32)cpu_to_be32(ctx->ctrinit);
+		return;
+	}
+	if (ctx->xcm == EIP197_XCM_MODE_GCM || ctx->alg == SAFEXCEL_CHACHA20) {
+		/* 96 bit IV part */
+		memcpy(&cdesc->control_data.token[0], iv, 12);
+		/* 32 bit counter, start at 0 or 1 (big endian!) */
+		cdesc->control_data.token[3] =
+			(__force u32)cpu_to_be32(ctx->ctrinit);
+		return;
+	}
+	/* CBC */
+	memcpy(cdesc->control_data.token, iv, ctx->blocksz);
 }
 
 static void safexcel_aead_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
 				struct safexcel_command_desc *cdesc,
+				struct safexcel_token *atoken,
 				enum safexcel_cipher_direction direction,
 				u32 cryptlen, u32 assoclen, u32 digestsize)
 {
-	struct safexcel_token *token;
+	struct safexcel_token *aadref;
+	int atoksize = 2; /* Start with minimum size */
+	int assocadj = assoclen - ctx->aadskip, aadalign;
 
-	safexcel_cipher_token(ctx, iv, cdesc);
+	/* Always 4 dwords of embedded IV  for AEAD modes */
+	cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
 
-	if (direction == SAFEXCEL_ENCRYPT) {
-		/* align end of instruction sequence to end of token */
-		token = (struct safexcel_token *)(cdesc->control_data.token +
-			 EIP197_MAX_TOKENS - 13);
-
-		token[12].opcode = EIP197_TOKEN_OPCODE_INSERT;
-		token[12].packet_length = digestsize;
-		token[12].stat = EIP197_TOKEN_STAT_LAST_HASH |
-				 EIP197_TOKEN_STAT_LAST_PACKET;
-		token[12].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
-					 EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
-	} else {
+	if (direction == SAFEXCEL_DECRYPT)
 		cryptlen -= digestsize;
 
-		/* align end of instruction sequence to end of token */
-		token = (struct safexcel_token *)(cdesc->control_data.token +
-			 EIP197_MAX_TOKENS - 14);
-
-		token[12].opcode = EIP197_TOKEN_OPCODE_RETRIEVE;
-		token[12].packet_length = digestsize;
-		token[12].stat = EIP197_TOKEN_STAT_LAST_HASH |
-				 EIP197_TOKEN_STAT_LAST_PACKET;
-		token[12].instructions = EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
-
-		token[13].opcode = EIP197_TOKEN_OPCODE_VERIFY;
-		token[13].packet_length = digestsize |
-					  EIP197_TOKEN_HASH_RESULT_VERIFY;
-		token[13].stat = EIP197_TOKEN_STAT_LAST_HASH |
-				 EIP197_TOKEN_STAT_LAST_PACKET;
-		token[13].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT;
-	}
-
-	token[6].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
-	token[6].packet_length = assoclen;
-
-	if (likely(cryptlen)) {
-		token[6].instructions = EIP197_TOKEN_INS_TYPE_HASH;
-
-		token[10].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
-		token[10].packet_length = cryptlen;
-		token[10].stat = EIP197_TOKEN_STAT_LAST_HASH;
-		token[10].instructions = EIP197_TOKEN_INS_LAST |
-					 EIP197_TOKEN_INS_TYPE_CRYPTO |
-					 EIP197_TOKEN_INS_TYPE_HASH |
-					 EIP197_TOKEN_INS_TYPE_OUTPUT;
-	} else if (ctx->xcm != EIP197_XCM_MODE_CCM) {
-		token[6].stat = EIP197_TOKEN_STAT_LAST_HASH;
-		token[6].instructions = EIP197_TOKEN_INS_LAST |
-					EIP197_TOKEN_INS_TYPE_HASH;
-	}
-
-	if (!ctx->xcm)
-		return;
-
-	token[8].opcode = EIP197_TOKEN_OPCODE_INSERT_REMRES;
-	token[8].packet_length = 0;
-	token[8].instructions = AES_BLOCK_SIZE;
+	if (unlikely(ctx->xcm == EIP197_XCM_MODE_CCM)) {
+		/* Construct IV block B0 for the CBC-MAC */
+		u8 *final_iv = (u8 *)cdesc->control_data.token;
+		u8 *cbcmaciv = (u8 *)&atoken[1];
+		__le32 *aadlen = (__le32 *)&atoken[5];
+
+		if (ctx->aead == EIP197_AEAD_TYPE_IPSEC_ESP) {
+			/* Length + nonce */
+			cdesc->control_data.token[0] = ctx->nonce;
+			/* Fixup flags byte */
+			*(__le32 *)cbcmaciv =
+				cpu_to_le32(ctx->nonce |
+					    ((assocadj > 0) << 6) |
+					    ((digestsize - 2) << 2));
+			/* 64 bit IV part */
+			memcpy(&cdesc->control_data.token[1], iv, 8);
+			memcpy(cbcmaciv + 4, iv, 8);
+			/* Start counter at 0 */
+			cdesc->control_data.token[3] = 0;
+			/* Message length */
+			*(__be32 *)(cbcmaciv + 12) = cpu_to_be32(cryptlen);
+		} else {
+			/* Variable length IV part */
+			memcpy(final_iv, iv, 15 - iv[0]);
+			memcpy(cbcmaciv, iv, 15 - iv[0]);
+			/* Start variable length counter at 0 */
+			memset(final_iv + 15 - iv[0], 0, iv[0] + 1);
+			memset(cbcmaciv + 15 - iv[0], 0, iv[0] - 1);
+			/* fixup flags byte */
+			cbcmaciv[0] |= ((assocadj > 0) << 6) |
+				       ((digestsize - 2) << 2);
+			/* insert lower 2 bytes of message length */
+			cbcmaciv[14] = cryptlen >> 8;
+			cbcmaciv[15] = cryptlen & 255;
+		}
 
-	token[9].opcode = EIP197_TOKEN_OPCODE_INSERT;
-	token[9].packet_length = AES_BLOCK_SIZE;
-	token[9].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
-				EIP197_TOKEN_INS_TYPE_CRYPTO;
+		atoken->opcode = EIP197_TOKEN_OPCODE_INSERT;
+		atoken->packet_length = AES_BLOCK_SIZE +
+					((assocadj > 0) << 1);
+		atoken->stat = 0;
+		atoken->instructions = EIP197_TOKEN_INS_ORIGIN_TOKEN |
+				       EIP197_TOKEN_INS_TYPE_HASH;
+
+		if (likely(assocadj)) {
+			*aadlen = cpu_to_le32((assocadj >> 8) |
+					      (assocadj & 255) << 8);
+			atoken += 6;
+			atoksize += 7;
+		} else {
+			atoken += 5;
+			atoksize += 6;
+		}
 
-	if (ctx->xcm == EIP197_XCM_MODE_GCM) {
-		token[6].instructions = EIP197_TOKEN_INS_LAST |
-					EIP197_TOKEN_INS_TYPE_HASH;
+		/* Process AAD data */
+		aadref = atoken;
+		atoken->opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+		atoken->packet_length = assocadj;
+		atoken->stat = 0;
+		atoken->instructions = EIP197_TOKEN_INS_TYPE_HASH;
+		atoken++;
+
+		/* For CCM only, align AAD data towards hash engine */
+		atoken->opcode = EIP197_TOKEN_OPCODE_INSERT;
+		aadalign = (assocadj + 2) & 15;
+		atoken->packet_length = assocadj && aadalign ?
+						16 - aadalign :
+						0;
+		if (likely(cryptlen)) {
+			atoken->stat = 0;
+			atoken->instructions = EIP197_TOKEN_INS_TYPE_HASH;
+		} else {
+			atoken->stat = EIP197_TOKEN_STAT_LAST_HASH;
+			atoken->instructions = EIP197_TOKEN_INS_LAST |
+					       EIP197_TOKEN_INS_TYPE_HASH;
+		}
 	} else {
-		u8 *cbcmaciv = (u8 *)&token[1];
-		u32 *aadlen = (u32 *)&token[5];
-
-		/* Construct IV block B0 for the CBC-MAC */
-		token[0].opcode = EIP197_TOKEN_OPCODE_INSERT;
-		token[0].packet_length = AES_BLOCK_SIZE +
-					 ((assoclen > 0) << 1);
-		token[0].instructions = EIP197_TOKEN_INS_ORIGIN_TOKEN |
-					EIP197_TOKEN_INS_TYPE_HASH;
-		/* Variable length IV part */
-		memcpy(cbcmaciv, iv, 15 - iv[0]);
-		/* fixup flags byte */
-		cbcmaciv[0] |= ((assoclen > 0) << 6) | ((digestsize - 2) << 2);
-		/* Clear upper bytes of variable message length to 0 */
-		memset(cbcmaciv + 15 - iv[0], 0, iv[0] - 1);
-		/* insert lower 2 bytes of message length */
-		cbcmaciv[14] = cryptlen >> 8;
-		cbcmaciv[15] = cryptlen & 255;
+		safexcel_aead_iv(ctx, iv, cdesc);
+
+		/* Process AAD data */
+		aadref = atoken;
+		atoken->opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+		atoken->packet_length = assocadj;
+		atoken->stat = EIP197_TOKEN_STAT_LAST_HASH;
+		atoken->instructions = EIP197_TOKEN_INS_LAST |
+				       EIP197_TOKEN_INS_TYPE_HASH;
+	}
+	atoken++;
+
+	if (ctx->aead == EIP197_AEAD_TYPE_IPSEC_ESP) {
+		/* For ESP mode (and not GMAC), skip over the IV */
+		atoken->opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+		atoken->packet_length = EIP197_AEAD_IPSEC_IV_SIZE;
+		atoken->stat = 0;
+		atoken->instructions = 0;
+		atoken++;
+		atoksize++;
+	} else if (unlikely(ctx->alg == SAFEXCEL_CHACHA20 &&
+			    direction == SAFEXCEL_DECRYPT)) {
+		/* Poly-chacha decryption needs a dummy NOP here ... */
+		atoken->opcode = EIP197_TOKEN_OPCODE_INSERT;
+		atoken->packet_length = 16; /* According to Op Manual */
+		atoken->stat = 0;
+		atoken->instructions = 0;
+		atoken++;
+		atoksize++;
+	}
 
-		if (assoclen) {
-			*aadlen = cpu_to_le32(cpu_to_be16(assoclen));
-			assoclen += 2;
-		}
+	if  (ctx->xcm) {
+		/* For GCM and CCM, obtain enc(Y0) */
+		atoken->opcode = EIP197_TOKEN_OPCODE_INSERT_REMRES;
+		atoken->packet_length = 0;
+		atoken->stat = 0;
+		atoken->instructions = AES_BLOCK_SIZE;
+		atoken++;
+
+		atoken->opcode = EIP197_TOKEN_OPCODE_INSERT;
+		atoken->packet_length = AES_BLOCK_SIZE;
+		atoken->stat = 0;
+		atoken->instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
+				       EIP197_TOKEN_INS_TYPE_CRYPTO;
+		atoken++;
+		atoksize += 2;
+	}
 
-		token[6].instructions = EIP197_TOKEN_INS_TYPE_HASH;
+	if (likely(cryptlen || ctx->alg == SAFEXCEL_CHACHA20)) {
+		/* Fixup stat field for AAD direction instruction */
+		aadref->stat = 0;
 
-		/* Align AAD data towards hash engine */
-		token[7].opcode = EIP197_TOKEN_OPCODE_INSERT;
-		assoclen &= 15;
-		token[7].packet_length = assoclen ? 16 - assoclen : 0;
+		/* Process crypto data */
+		atoken->opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+		atoken->packet_length = cryptlen;
 
-		if (likely(cryptlen)) {
-			token[7].instructions = EIP197_TOKEN_INS_TYPE_HASH;
+		if (unlikely(ctx->aead == EIP197_AEAD_TYPE_IPSEC_ESP_GMAC)) {
+			/* Fixup instruction field for AAD dir instruction */
+			aadref->instructions = EIP197_TOKEN_INS_TYPE_HASH;
 
-			/* Align crypto data towards hash engine */
-			token[10].stat = 0;
+			/* Do not send to crypt engine in case of GMAC */
+			atoken->instructions = EIP197_TOKEN_INS_LAST |
+					       EIP197_TOKEN_INS_TYPE_HASH |
+					       EIP197_TOKEN_INS_TYPE_OUTPUT;
+		} else {
+			atoken->instructions = EIP197_TOKEN_INS_LAST |
+					       EIP197_TOKEN_INS_TYPE_CRYPTO |
+					       EIP197_TOKEN_INS_TYPE_HASH |
+					       EIP197_TOKEN_INS_TYPE_OUTPUT;
+		}
 
-			token[11].opcode = EIP197_TOKEN_OPCODE_INSERT;
-			cryptlen &= 15;
-			token[11].packet_length = cryptlen ? 16 - cryptlen : 0;
-			token[11].stat = EIP197_TOKEN_STAT_LAST_HASH;
-			token[11].instructions = EIP197_TOKEN_INS_TYPE_HASH;
+		cryptlen &= 15;
+		if (unlikely(ctx->xcm == EIP197_XCM_MODE_CCM && cryptlen)) {
+			atoken->stat = 0;
+			/* For CCM only, pad crypto data to the hash engine */
+			atoken++;
+			atoksize++;
+			atoken->opcode = EIP197_TOKEN_OPCODE_INSERT;
+			atoken->packet_length = 16 - cryptlen;
+			atoken->stat = EIP197_TOKEN_STAT_LAST_HASH;
+			atoken->instructions = EIP197_TOKEN_INS_TYPE_HASH;
 		} else {
-			token[7].stat = EIP197_TOKEN_STAT_LAST_HASH;
-			token[7].instructions = EIP197_TOKEN_INS_LAST |
-						EIP197_TOKEN_INS_TYPE_HASH;
+			atoken->stat = EIP197_TOKEN_STAT_LAST_HASH;
 		}
+		atoken++;
+		atoksize++;
+	}
+
+	if (direction == SAFEXCEL_ENCRYPT) {
+		/* Append ICV */
+		atoken->opcode = EIP197_TOKEN_OPCODE_INSERT;
+		atoken->packet_length = digestsize;
+		atoken->stat = EIP197_TOKEN_STAT_LAST_HASH |
+			       EIP197_TOKEN_STAT_LAST_PACKET;
+		atoken->instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
+				       EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
+	} else {
+		/* Extract ICV */
+		atoken->opcode = EIP197_TOKEN_OPCODE_RETRIEVE;
+		atoken->packet_length = digestsize;
+		atoken->stat = EIP197_TOKEN_STAT_LAST_HASH |
+			       EIP197_TOKEN_STAT_LAST_PACKET;
+		atoken->instructions = EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
+		atoken++;
+		atoksize++;
+
+		/* Verify ICV */
+		atoken->opcode = EIP197_TOKEN_OPCODE_VERIFY;
+		atoken->packet_length = digestsize |
+					EIP197_TOKEN_HASH_RESULT_VERIFY;
+		atoken->stat = EIP197_TOKEN_STAT_LAST_HASH |
+			       EIP197_TOKEN_STAT_LAST_PACKET;
+		atoken->instructions = EIP197_TOKEN_INS_TYPE_OUTPUT;
 	}
+
+	/* Fixup length of the token in the command descriptor */
+	cdesc->additional_cdata_size = atoksize;
 }
 
 static int safexcel_skcipher_aes_setkey(struct crypto_skcipher *ctfm,
@@ -277,14 +380,12 @@ static int safexcel_skcipher_aes_setkey(struct crypto_skcipher *ctfm,
 	int ret, i;
 
 	ret = aes_expandkey(&aes, key, len);
-	if (ret) {
-		crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (ret)
 		return ret;
-	}
 
 	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
 		for (i = 0; i < len / sizeof(u32); i++) {
-			if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+			if (le32_to_cpu(ctx->key[i]) != aes.key_enc[i]) {
 				ctx->base.needs_inv = true;
 				break;
 			}
@@ -309,43 +410,57 @@ static int safexcel_aead_setkey(struct crypto_aead *ctfm, const u8 *key,
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct crypto_authenc_keys keys;
 	struct crypto_aes_ctx aes;
-	int err = -EINVAL;
+	int err = -EINVAL, i;
 
-	if (crypto_authenc_extractkeys(&keys, key, len) != 0)
+	if (unlikely(crypto_authenc_extractkeys(&keys, key, len)))
 		goto badkey;
 
 	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD) {
-		/* Minimum keysize is minimum AES key size + nonce size */
-		if (keys.enckeylen < (AES_MIN_KEY_SIZE +
-				      CTR_RFC3686_NONCE_SIZE))
+		/* Must have at least space for the nonce here */
+		if (unlikely(keys.enckeylen < CTR_RFC3686_NONCE_SIZE))
 			goto badkey;
 		/* last 4 bytes of key are the nonce! */
 		ctx->nonce = *(u32 *)(keys.enckey + keys.enckeylen -
 				      CTR_RFC3686_NONCE_SIZE);
 		/* exclude the nonce here */
-		keys.enckeylen -= CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD;
+		keys.enckeylen -= CTR_RFC3686_NONCE_SIZE;
 	}
 
 	/* Encryption key */
 	switch (ctx->alg) {
+	case SAFEXCEL_DES:
+		err = verify_aead_des_key(ctfm, keys.enckey, keys.enckeylen);
+		if (unlikely(err))
+			goto badkey;
+		break;
 	case SAFEXCEL_3DES:
 		err = verify_aead_des3_key(ctfm, keys.enckey, keys.enckeylen);
 		if (unlikely(err))
-			goto badkey_expflags;
+			goto badkey;
 		break;
 	case SAFEXCEL_AES:
 		err = aes_expandkey(&aes, keys.enckey, keys.enckeylen);
 		if (unlikely(err))
 			goto badkey;
 		break;
+	case SAFEXCEL_SM4:
+		if (unlikely(keys.enckeylen != SM4_KEY_SIZE))
+			goto badkey;
+		break;
 	default:
 		dev_err(priv->dev, "aead: unsupported cipher algorithm\n");
 		goto badkey;
 	}
 
-	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma &&
-	    memcmp(ctx->key, keys.enckey, keys.enckeylen))
-		ctx->base.needs_inv = true;
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
+		for (i = 0; i < keys.enckeylen / sizeof(u32); i++) {
+			if (le32_to_cpu(ctx->key[i]) !=
+			    ((u32 *)keys.enckey)[i]) {
+				ctx->base.needs_inv = true;
+				break;
+			}
+		}
+	}
 
 	/* Auth key */
 	switch (ctx->hash_alg) {
@@ -374,21 +489,24 @@ static int safexcel_aead_setkey(struct crypto_aead *ctfm, const u8 *key,
 					 keys.authkeylen, &istate, &ostate))
 			goto badkey;
 		break;
+	case CONTEXT_CONTROL_CRYPTO_ALG_SM3:
+		if (safexcel_hmac_setkey("safexcel-sm3", keys.authkey,
+					 keys.authkeylen, &istate, &ostate))
+			goto badkey;
+		break;
 	default:
 		dev_err(priv->dev, "aead: unsupported hash algorithm\n");
 		goto badkey;
 	}
 
-	crypto_aead_set_flags(ctfm, crypto_aead_get_flags(ctfm) &
-				    CRYPTO_TFM_RES_MASK);
-
 	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma &&
 	    (memcmp(ctx->ipad, istate.state, ctx->state_sz) ||
 	     memcmp(ctx->opad, ostate.state, ctx->state_sz)))
 		ctx->base.needs_inv = true;
 
 	/* Now copy the keys into the context */
-	memcpy(ctx->key, keys.enckey, keys.enckeylen);
+	for (i = 0; i < keys.enckeylen / sizeof(u32); i++)
+		ctx->key[i] = cpu_to_le32(((u32 *)keys.enckey)[i]);
 	ctx->key_len = keys.enckeylen;
 
 	memcpy(ctx->ipad, &istate.state, ctx->state_sz);
@@ -398,8 +516,6 @@ static int safexcel_aead_setkey(struct crypto_aead *ctfm, const u8 *key,
 	return 0;
 
 badkey:
-	crypto_aead_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-badkey_expflags:
 	memzero_explicit(&keys, sizeof(keys));
 	return err;
 }
@@ -423,6 +539,17 @@ static int safexcel_context_control(struct safexcel_cipher_ctx *ctx,
 				CONTEXT_CONTROL_DIGEST_XCM |
 				ctx->hash_alg |
 				CONTEXT_CONTROL_SIZE(ctrl_size);
+		} else if (ctx->alg == SAFEXCEL_CHACHA20) {
+			/* Chacha20-Poly1305 */
+			cdesc->control_data.control0 =
+				CONTEXT_CONTROL_KEY_EN |
+				CONTEXT_CONTROL_CRYPTO_ALG_CHACHA20 |
+				(sreq->direction == SAFEXCEL_ENCRYPT ?
+					CONTEXT_CONTROL_TYPE_ENCRYPT_HASH_OUT :
+					CONTEXT_CONTROL_TYPE_HASH_DECRYPT_IN) |
+				ctx->hash_alg |
+				CONTEXT_CONTROL_SIZE(ctrl_size);
+			return 0;
 		} else {
 			ctrl_size += ctx->state_sz / sizeof(u32) * 2;
 			cdesc->control_data.control0 =
@@ -431,17 +558,21 @@ static int safexcel_context_control(struct safexcel_cipher_ctx *ctx,
 				ctx->hash_alg |
 				CONTEXT_CONTROL_SIZE(ctrl_size);
 		}
-		if (sreq->direction == SAFEXCEL_ENCRYPT)
-			cdesc->control_data.control0 |=
-				(ctx->xcm == EIP197_XCM_MODE_CCM) ?
-					CONTEXT_CONTROL_TYPE_HASH_ENCRYPT_OUT :
-					CONTEXT_CONTROL_TYPE_ENCRYPT_HASH_OUT;
 
+		if (sreq->direction == SAFEXCEL_ENCRYPT &&
+		    (ctx->xcm == EIP197_XCM_MODE_CCM ||
+		     ctx->aead == EIP197_AEAD_TYPE_IPSEC_ESP_GMAC))
+			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_TYPE_HASH_ENCRYPT_OUT;
+		else if (sreq->direction == SAFEXCEL_ENCRYPT)
+			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_TYPE_ENCRYPT_HASH_OUT;
+		else if (ctx->xcm == EIP197_XCM_MODE_CCM)
+			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_TYPE_DECRYPT_HASH_IN;
 		else
 			cdesc->control_data.control0 |=
-				(ctx->xcm == EIP197_XCM_MODE_CCM) ?
-					CONTEXT_CONTROL_TYPE_DECRYPT_HASH_IN :
-					CONTEXT_CONTROL_TYPE_HASH_DECRYPT_IN;
+				CONTEXT_CONTROL_TYPE_HASH_DECRYPT_IN;
 	} else {
 		if (sreq->direction == SAFEXCEL_ENCRYPT)
 			cdesc->control_data.control0 =
@@ -480,6 +611,12 @@ static int safexcel_context_control(struct safexcel_cipher_ctx *ctx,
 				ctx->key_len >> ctx->xts);
 			return -EINVAL;
 		}
+	} else if (ctx->alg == SAFEXCEL_CHACHA20) {
+		cdesc->control_data.control0 |=
+			CONTEXT_CONTROL_CRYPTO_ALG_CHACHA20;
+	} else if (ctx->alg == SAFEXCEL_SM4) {
+		cdesc->control_data.control0 |=
+			CONTEXT_CONTROL_CRYPTO_ALG_SM4;
 	}
 
 	return 0;
@@ -563,6 +700,7 @@ static int safexcel_send_req(struct crypto_async_request *base, int ring,
 	unsigned int totlen;
 	unsigned int totlen_src = cryptlen + assoclen;
 	unsigned int totlen_dst = totlen_src;
+	struct safexcel_token *atoken;
 	int n_cdesc = 0, n_rdesc = 0;
 	int queued, i, ret = 0;
 	bool first = true;
@@ -637,56 +775,60 @@ static int safexcel_send_req(struct crypto_async_request *base, int ring,
 
 	memcpy(ctx->base.ctxr->data, ctx->key, ctx->key_len);
 
-	/* The EIP cannot deal with zero length input packets! */
-	if (totlen == 0)
-		totlen = 1;
+	if (!totlen) {
+		/*
+		 * The EIP97 cannot deal with zero length input packets!
+		 * So stuff a dummy command descriptor indicating a 1 byte
+		 * (dummy) input packet, using the context record as source.
+		 */
+		first_cdesc = safexcel_add_cdesc(priv, ring,
+						 1, 1, ctx->base.ctxr_dma,
+						 1, 1, ctx->base.ctxr_dma,
+						 &atoken);
+		if (IS_ERR(first_cdesc)) {
+			/* No space left in the command descriptor ring */
+			ret = PTR_ERR(first_cdesc);
+			goto cdesc_rollback;
+		}
+		n_cdesc = 1;
+		goto skip_cdesc;
+	}
 
 	/* command descriptors */
 	for_each_sg(src, sg, sreq->nr_src, i) {
 		int len = sg_dma_len(sg);
 
 		/* Do not overflow the request */
-		if (queued - len < 0)
+		if (queued < len)
 			len = queued;
 
 		cdesc = safexcel_add_cdesc(priv, ring, !n_cdesc,
 					   !(queued - len),
 					   sg_dma_address(sg), len, totlen,
-					   ctx->base.ctxr_dma);
+					   ctx->base.ctxr_dma, &atoken);
 		if (IS_ERR(cdesc)) {
 			/* No space left in the command descriptor ring */
 			ret = PTR_ERR(cdesc);
 			goto cdesc_rollback;
 		}
-		n_cdesc++;
 
-		if (n_cdesc == 1) {
+		if (!n_cdesc)
 			first_cdesc = cdesc;
-		}
 
+		n_cdesc++;
 		queued -= len;
 		if (!queued)
 			break;
 	}
-
-	if (unlikely(!n_cdesc)) {
-		/*
-		 * Special case: zero length input buffer.
-		 * The engine always needs the 1st command descriptor, however!
-		 */
-		first_cdesc = safexcel_add_cdesc(priv, ring, 1, 1, 0, 0, totlen,
-						 ctx->base.ctxr_dma);
-		n_cdesc = 1;
-	}
-
+skip_cdesc:
 	/* Add context control words and token to first command descriptor */
 	safexcel_context_control(ctx, base, sreq, first_cdesc);
 	if (ctx->aead)
-		safexcel_aead_token(ctx, iv, first_cdesc,
+		safexcel_aead_token(ctx, iv, first_cdesc, atoken,
 				    sreq->direction, cryptlen,
 				    assoclen, digestsize);
 	else
-		safexcel_skcipher_token(ctx, iv, first_cdesc,
+		safexcel_skcipher_token(ctx, iv, first_cdesc, atoken,
 					cryptlen);
 
 	/* result descriptors */
@@ -1073,6 +1215,8 @@ static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm)
 
 	ctx->base.send = safexcel_skcipher_send;
 	ctx->base.handle_result = safexcel_skcipher_handle_result;
+	ctx->ivmask = EIP197_OPTION_4_TOKEN_IV_CMD;
+	ctx->ctrinit = 1;
 	return 0;
 }
 
@@ -1137,6 +1281,8 @@ static int safexcel_skcipher_aes_ecb_cra_init(struct crypto_tfm *tfm)
 	safexcel_skcipher_cra_init(tfm);
 	ctx->alg  = SAFEXCEL_AES;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_ECB;
+	ctx->blocksz = 0;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
 	return 0;
 }
 
@@ -1171,6 +1317,7 @@ static int safexcel_skcipher_aes_cbc_cra_init(struct crypto_tfm *tfm)
 
 	safexcel_skcipher_cra_init(tfm);
 	ctx->alg  = SAFEXCEL_AES;
+	ctx->blocksz = AES_BLOCK_SIZE;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CBC;
 	return 0;
 }
@@ -1207,6 +1354,7 @@ static int safexcel_skcipher_aes_cfb_cra_init(struct crypto_tfm *tfm)
 
 	safexcel_skcipher_cra_init(tfm);
 	ctx->alg  = SAFEXCEL_AES;
+	ctx->blocksz = AES_BLOCK_SIZE;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CFB;
 	return 0;
 }
@@ -1243,6 +1391,7 @@ static int safexcel_skcipher_aes_ofb_cra_init(struct crypto_tfm *tfm)
 
 	safexcel_skcipher_cra_init(tfm);
 	ctx->alg  = SAFEXCEL_AES;
+	ctx->blocksz = AES_BLOCK_SIZE;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_OFB;
 	return 0;
 }
@@ -1288,14 +1437,12 @@ static int safexcel_skcipher_aesctr_setkey(struct crypto_skcipher *ctfm,
 	/* exclude the nonce here */
 	keylen = len - CTR_RFC3686_NONCE_SIZE;
 	ret = aes_expandkey(&aes, key, keylen);
-	if (ret) {
-		crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (ret)
 		return ret;
-	}
 
 	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
 		for (i = 0; i < keylen / sizeof(u32); i++) {
-			if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+			if (le32_to_cpu(ctx->key[i]) != aes.key_enc[i]) {
 				ctx->base.needs_inv = true;
 				break;
 			}
@@ -1317,6 +1464,7 @@ static int safexcel_skcipher_aes_ctr_cra_init(struct crypto_tfm *tfm)
 
 	safexcel_skcipher_cra_init(tfm);
 	ctx->alg  = SAFEXCEL_AES;
+	ctx->blocksz = AES_BLOCK_SIZE;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD;
 	return 0;
 }
@@ -1352,6 +1500,7 @@ static int safexcel_des_setkey(struct crypto_skcipher *ctfm, const u8 *key,
 			       unsigned int len)
 {
 	struct safexcel_cipher_ctx *ctx = crypto_skcipher_ctx(ctfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
 	int ret;
 
 	ret = verify_skcipher_des_key(ctfm, key);
@@ -1359,7 +1508,7 @@ static int safexcel_des_setkey(struct crypto_skcipher *ctfm, const u8 *key,
 		return ret;
 
 	/* if context exits and key changed, need to invalidate it */
-	if (ctx->base.ctxr_dma)
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma)
 		if (memcmp(ctx->key, key, len))
 			ctx->base.needs_inv = true;
 
@@ -1375,6 +1524,8 @@ static int safexcel_skcipher_des_cbc_cra_init(struct crypto_tfm *tfm)
 
 	safexcel_skcipher_cra_init(tfm);
 	ctx->alg  = SAFEXCEL_DES;
+	ctx->blocksz = DES_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CBC;
 	return 0;
 }
@@ -1412,6 +1563,8 @@ static int safexcel_skcipher_des_ecb_cra_init(struct crypto_tfm *tfm)
 	safexcel_skcipher_cra_init(tfm);
 	ctx->alg  = SAFEXCEL_DES;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_ECB;
+	ctx->blocksz = 0;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
 	return 0;
 }
 
@@ -1444,6 +1597,7 @@ static int safexcel_des3_ede_setkey(struct crypto_skcipher *ctfm,
 				   const u8 *key, unsigned int len)
 {
 	struct safexcel_cipher_ctx *ctx = crypto_skcipher_ctx(ctfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
 	int err;
 
 	err = verify_skcipher_des3_key(ctfm, key);
@@ -1451,13 +1605,11 @@ static int safexcel_des3_ede_setkey(struct crypto_skcipher *ctfm,
 		return err;
 
 	/* if context exits and key changed, need to invalidate it */
-	if (ctx->base.ctxr_dma) {
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma)
 		if (memcmp(ctx->key, key, len))
 			ctx->base.needs_inv = true;
-	}
 
 	memcpy(ctx->key, key, len);
-
 	ctx->key_len = len;
 
 	return 0;
@@ -1469,6 +1621,8 @@ static int safexcel_skcipher_des3_cbc_cra_init(struct crypto_tfm *tfm)
 
 	safexcel_skcipher_cra_init(tfm);
 	ctx->alg  = SAFEXCEL_3DES;
+	ctx->blocksz = DES3_EDE_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CBC;
 	return 0;
 }
@@ -1506,6 +1660,8 @@ static int safexcel_skcipher_des3_ecb_cra_init(struct crypto_tfm *tfm)
 	safexcel_skcipher_cra_init(tfm);
 	ctx->alg  = SAFEXCEL_3DES;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_ECB;
+	ctx->blocksz = 0;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
 	return 0;
 }
 
@@ -1561,6 +1717,9 @@ static int safexcel_aead_cra_init(struct crypto_tfm *tfm)
 	ctx->priv = tmpl->priv;
 
 	ctx->alg  = SAFEXCEL_AES; /* default */
+	ctx->blocksz = AES_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_4_TOKEN_IV_CMD;
+	ctx->ctrinit = 1;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CBC; /* default */
 	ctx->aead = true;
 	ctx->base.send = safexcel_aead_send;
@@ -1749,6 +1908,8 @@ static int safexcel_aead_sha1_des3_cra_init(struct crypto_tfm *tfm)
 
 	safexcel_aead_sha1_cra_init(tfm);
 	ctx->alg = SAFEXCEL_3DES; /* override default */
+	ctx->blocksz = DES3_EDE_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
 	return 0;
 }
 
@@ -1777,6 +1938,330 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_des3_ede = {
 	},
 };
 
+static int safexcel_aead_sha256_des3_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha256_cra_init(tfm);
+	ctx->alg = SAFEXCEL_3DES; /* override default */
+	ctx->blocksz = DES3_EDE_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_cbc_des3_ede = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_DES | SAFEXCEL_ALG_SHA2_256,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = DES3_EDE_BLOCK_SIZE,
+		.maxauthsize = SHA256_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha256),cbc(des3_ede))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha256-cbc-des3_ede",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha256_des3_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha224_des3_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha224_cra_init(tfm);
+	ctx->alg = SAFEXCEL_3DES; /* override default */
+	ctx->blocksz = DES3_EDE_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_cbc_des3_ede = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_DES | SAFEXCEL_ALG_SHA2_256,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = DES3_EDE_BLOCK_SIZE,
+		.maxauthsize = SHA224_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha224),cbc(des3_ede))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha224-cbc-des3_ede",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha224_des3_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha512_des3_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha512_cra_init(tfm);
+	ctx->alg = SAFEXCEL_3DES; /* override default */
+	ctx->blocksz = DES3_EDE_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_cbc_des3_ede = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_DES | SAFEXCEL_ALG_SHA2_512,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = DES3_EDE_BLOCK_SIZE,
+		.maxauthsize = SHA512_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha512),cbc(des3_ede))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha512-cbc-des3_ede",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha512_des3_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha384_des3_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha384_cra_init(tfm);
+	ctx->alg = SAFEXCEL_3DES; /* override default */
+	ctx->blocksz = DES3_EDE_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_cbc_des3_ede = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_DES | SAFEXCEL_ALG_SHA2_512,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = DES3_EDE_BLOCK_SIZE,
+		.maxauthsize = SHA384_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha384),cbc(des3_ede))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha384-cbc-des3_ede",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha384_des3_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha1_des_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha1_cra_init(tfm);
+	ctx->alg = SAFEXCEL_DES; /* override default */
+	ctx->blocksz = DES_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_des = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_DES | SAFEXCEL_ALG_SHA1,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = DES_BLOCK_SIZE,
+		.maxauthsize = SHA1_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha1),cbc(des))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha1-cbc-des",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha1_des_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha256_des_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha256_cra_init(tfm);
+	ctx->alg = SAFEXCEL_DES; /* override default */
+	ctx->blocksz = DES_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_cbc_des = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_DES | SAFEXCEL_ALG_SHA2_256,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = DES_BLOCK_SIZE,
+		.maxauthsize = SHA256_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha256),cbc(des))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha256-cbc-des",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha256_des_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha224_des_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha224_cra_init(tfm);
+	ctx->alg = SAFEXCEL_DES; /* override default */
+	ctx->blocksz = DES_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_cbc_des = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_DES | SAFEXCEL_ALG_SHA2_256,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = DES_BLOCK_SIZE,
+		.maxauthsize = SHA224_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha224),cbc(des))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha224-cbc-des",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha224_des_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha512_des_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha512_cra_init(tfm);
+	ctx->alg = SAFEXCEL_DES; /* override default */
+	ctx->blocksz = DES_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_cbc_des = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_DES | SAFEXCEL_ALG_SHA2_512,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = DES_BLOCK_SIZE,
+		.maxauthsize = SHA512_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha512),cbc(des))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha512-cbc-des",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha512_des_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha384_des_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha384_cra_init(tfm);
+	ctx->alg = SAFEXCEL_DES; /* override default */
+	ctx->blocksz = DES_BLOCK_SIZE;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_cbc_des = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_DES | SAFEXCEL_ALG_SHA2_512,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = DES_BLOCK_SIZE,
+		.maxauthsize = SHA384_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha384),cbc(des))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha384-cbc-des",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha384_des_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
 static int safexcel_aead_sha1_ctr_cra_init(struct crypto_tfm *tfm)
 {
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -1965,14 +2450,12 @@ static int safexcel_skcipher_aesxts_setkey(struct crypto_skcipher *ctfm,
 	/* Only half of the key data is cipher key */
 	keylen = (len >> 1);
 	ret = aes_expandkey(&aes, key, keylen);
-	if (ret) {
-		crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (ret)
 		return ret;
-	}
 
 	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
 		for (i = 0; i < keylen / sizeof(u32); i++) {
-			if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+			if (le32_to_cpu(ctx->key[i]) != aes.key_enc[i]) {
 				ctx->base.needs_inv = true;
 				break;
 			}
@@ -1984,15 +2467,13 @@ static int safexcel_skcipher_aesxts_setkey(struct crypto_skcipher *ctfm,
 
 	/* The other half is the tweak key */
 	ret = aes_expandkey(&aes, (u8 *)(key + keylen), keylen);
-	if (ret) {
-		crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (ret)
 		return ret;
-	}
 
 	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
 		for (i = 0; i < keylen / sizeof(u32); i++) {
-			if (ctx->key[i + keylen / sizeof(u32)] !=
-			    cpu_to_le32(aes.key_enc[i])) {
+			if (le32_to_cpu(ctx->key[i + keylen / sizeof(u32)]) !=
+			    aes.key_enc[i]) {
 				ctx->base.needs_inv = true;
 				break;
 			}
@@ -2015,6 +2496,7 @@ static int safexcel_skcipher_aes_xts_cra_init(struct crypto_tfm *tfm)
 
 	safexcel_skcipher_cra_init(tfm);
 	ctx->alg  = SAFEXCEL_AES;
+	ctx->blocksz = AES_BLOCK_SIZE;
 	ctx->xts  = 1;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_XTS;
 	return 0;
@@ -2075,14 +2557,13 @@ static int safexcel_aead_gcm_setkey(struct crypto_aead *ctfm, const u8 *key,
 
 	ret = aes_expandkey(&aes, key, len);
 	if (ret) {
-		crypto_aead_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		memzero_explicit(&aes, sizeof(aes));
 		return ret;
 	}
 
 	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
 		for (i = 0; i < len / sizeof(u32); i++) {
-			if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+			if (le32_to_cpu(ctx->key[i]) != aes.key_enc[i]) {
 				ctx->base.needs_inv = true;
 				break;
 			}
@@ -2099,8 +2580,6 @@ static int safexcel_aead_gcm_setkey(struct crypto_aead *ctfm, const u8 *key,
 	crypto_cipher_set_flags(ctx->hkaes, crypto_aead_get_flags(ctfm) &
 				CRYPTO_TFM_REQ_MASK);
 	ret = crypto_cipher_setkey(ctx->hkaes, key, len);
-	crypto_aead_set_flags(ctfm, crypto_cipher_get_flags(ctx->hkaes) &
-			      CRYPTO_TFM_RES_MASK);
 	if (ret)
 		return ret;
 
@@ -2109,7 +2588,7 @@ static int safexcel_aead_gcm_setkey(struct crypto_aead *ctfm, const u8 *key,
 
 	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
 		for (i = 0; i < AES_BLOCK_SIZE / sizeof(u32); i++) {
-			if (ctx->ipad[i] != cpu_to_be32(hashkey[i])) {
+			if (be32_to_cpu(ctx->ipad[i]) != hashkey[i]) {
 				ctx->base.needs_inv = true;
 				break;
 			}
@@ -2135,10 +2614,7 @@ static int safexcel_aead_gcm_cra_init(struct crypto_tfm *tfm)
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_XCM; /* override default */
 
 	ctx->hkaes = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(ctx->hkaes))
-		return PTR_ERR(ctx->hkaes);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(ctx->hkaes);
 }
 
 static void safexcel_aead_gcm_cra_exit(struct crypto_tfm *tfm)
@@ -2192,14 +2668,13 @@ static int safexcel_aead_ccm_setkey(struct crypto_aead *ctfm, const u8 *key,
 
 	ret = aes_expandkey(&aes, key, len);
 	if (ret) {
-		crypto_aead_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		memzero_explicit(&aes, sizeof(aes));
 		return ret;
 	}
 
 	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
 		for (i = 0; i < len / sizeof(u32); i++) {
-			if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+			if (le32_to_cpu(ctx->key[i]) != aes.key_enc[i]) {
 				ctx->base.needs_inv = true;
 				break;
 			}
@@ -2235,6 +2710,7 @@ static int safexcel_aead_ccm_cra_init(struct crypto_tfm *tfm)
 	ctx->state_sz = 3 * AES_BLOCK_SIZE;
 	ctx->xcm = EIP197_XCM_MODE_CCM;
 	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_XCM; /* override default */
+	ctx->ctrinit = 0;
 	return 0;
 }
 
@@ -2303,3 +2779,947 @@ struct safexcel_alg_template safexcel_alg_ccm = {
 		},
 	},
 };
+
+static void safexcel_chacha20_setkey(struct safexcel_cipher_ctx *ctx,
+				     const u8 *key)
+{
+	struct safexcel_crypto_priv *priv = ctx->priv;
+
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma)
+		if (memcmp(ctx->key, key, CHACHA_KEY_SIZE))
+			ctx->base.needs_inv = true;
+
+	memcpy(ctx->key, key, CHACHA_KEY_SIZE);
+	ctx->key_len = CHACHA_KEY_SIZE;
+}
+
+static int safexcel_skcipher_chacha20_setkey(struct crypto_skcipher *ctfm,
+					     const u8 *key, unsigned int len)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_skcipher_ctx(ctfm);
+
+	if (len != CHACHA_KEY_SIZE)
+		return -EINVAL;
+
+	safexcel_chacha20_setkey(ctx, key);
+
+	return 0;
+}
+
+static int safexcel_skcipher_chacha20_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_CHACHA20;
+	ctx->ctrinit = 0;
+	ctx->mode = CONTEXT_CONTROL_CHACHA20_MODE_256_32;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_chacha20 = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.algo_mask = SAFEXCEL_ALG_CHACHA20,
+	.alg.skcipher = {
+		.setkey = safexcel_skcipher_chacha20_setkey,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
+		.min_keysize = CHACHA_KEY_SIZE,
+		.max_keysize = CHACHA_KEY_SIZE,
+		.ivsize = CHACHA_IV_SIZE,
+		.base = {
+			.cra_name = "chacha20",
+			.cra_driver_name = "safexcel-chacha20",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_chacha20_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_chachapoly_setkey(struct crypto_aead *ctfm,
+				    const u8 *key, unsigned int len)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_aead_ctx(ctfm);
+
+	if (ctx->aead  == EIP197_AEAD_TYPE_IPSEC_ESP &&
+	    len > EIP197_AEAD_IPSEC_NONCE_SIZE) {
+		/* ESP variant has nonce appended to key */
+		len -= EIP197_AEAD_IPSEC_NONCE_SIZE;
+		ctx->nonce = *(u32 *)(key + len);
+	}
+	if (len != CHACHA_KEY_SIZE)
+		return -EINVAL;
+
+	safexcel_chacha20_setkey(ctx, key);
+
+	return 0;
+}
+
+static int safexcel_aead_chachapoly_setauthsize(struct crypto_aead *tfm,
+					 unsigned int authsize)
+{
+	if (authsize != POLY1305_DIGEST_SIZE)
+		return -EINVAL;
+	return 0;
+}
+
+static int safexcel_aead_chachapoly_crypt(struct aead_request *req,
+					  enum safexcel_cipher_direction dir)
+{
+	struct safexcel_cipher_req *creq = aead_request_ctx(req);
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aead_request *subreq = aead_request_ctx(req);
+	u32 key[CHACHA_KEY_SIZE / sizeof(u32) + 1];
+	int ret = 0;
+
+	/*
+	 * Instead of wasting time detecting umpteen silly corner cases,
+	 * just dump all "small" requests to the fallback implementation.
+	 * HW would not be faster on such small requests anyway.
+	 */
+	if (likely((ctx->aead != EIP197_AEAD_TYPE_IPSEC_ESP ||
+		    req->assoclen >= EIP197_AEAD_IPSEC_IV_SIZE) &&
+		   req->cryptlen > POLY1305_DIGEST_SIZE)) {
+		return safexcel_queue_req(&req->base, creq, dir);
+	}
+
+	/* HW cannot do full (AAD+payload) zero length, use fallback */
+	memcpy(key, ctx->key, CHACHA_KEY_SIZE);
+	if (ctx->aead == EIP197_AEAD_TYPE_IPSEC_ESP) {
+		/* ESP variant has nonce appended to the key */
+		key[CHACHA_KEY_SIZE / sizeof(u32)] = ctx->nonce;
+		ret = crypto_aead_setkey(ctx->fback, (u8 *)key,
+					 CHACHA_KEY_SIZE +
+					 EIP197_AEAD_IPSEC_NONCE_SIZE);
+	} else {
+		ret = crypto_aead_setkey(ctx->fback, (u8 *)key,
+					 CHACHA_KEY_SIZE);
+	}
+	if (ret) {
+		crypto_aead_clear_flags(aead, CRYPTO_TFM_REQ_MASK);
+		crypto_aead_set_flags(aead, crypto_aead_get_flags(ctx->fback) &
+					    CRYPTO_TFM_REQ_MASK);
+		return ret;
+	}
+
+	aead_request_set_tfm(subreq, ctx->fback);
+	aead_request_set_callback(subreq, req->base.flags, req->base.complete,
+				  req->base.data);
+	aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+			       req->iv);
+	aead_request_set_ad(subreq, req->assoclen);
+
+	return (dir ==  SAFEXCEL_ENCRYPT) ?
+		crypto_aead_encrypt(subreq) :
+		crypto_aead_decrypt(subreq);
+}
+
+static int safexcel_aead_chachapoly_encrypt(struct aead_request *req)
+{
+	return safexcel_aead_chachapoly_crypt(req, SAFEXCEL_ENCRYPT);
+}
+
+static int safexcel_aead_chachapoly_decrypt(struct aead_request *req)
+{
+	return safexcel_aead_chachapoly_crypt(req, SAFEXCEL_DECRYPT);
+}
+
+static int safexcel_aead_fallback_cra_init(struct crypto_tfm *tfm)
+{
+	struct crypto_aead *aead = __crypto_aead_cast(tfm);
+	struct aead_alg *alg = crypto_aead_alg(aead);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_cra_init(tfm);
+
+	/* Allocate fallback implementation */
+	ctx->fback = crypto_alloc_aead(alg->base.cra_name, 0,
+				       CRYPTO_ALG_ASYNC |
+				       CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ctx->fback))
+		return PTR_ERR(ctx->fback);
+
+	crypto_aead_set_reqsize(aead, max(sizeof(struct safexcel_cipher_req),
+					  sizeof(struct aead_request) +
+					  crypto_aead_reqsize(ctx->fback)));
+
+	return 0;
+}
+
+static int safexcel_aead_chachapoly_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_fallback_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_CHACHA20;
+	ctx->mode = CONTEXT_CONTROL_CHACHA20_MODE_256_32 |
+		    CONTEXT_CONTROL_CHACHA20_MODE_CALC_OTK;
+	ctx->ctrinit = 0;
+	ctx->hash_alg = CONTEXT_CONTROL_CRYPTO_ALG_POLY1305;
+	ctx->state_sz = 0; /* Precomputed by HW */
+	return 0;
+}
+
+static void safexcel_aead_fallback_cra_exit(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_aead(ctx->fback);
+	safexcel_aead_cra_exit(tfm);
+}
+
+struct safexcel_alg_template safexcel_alg_chachapoly = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_CHACHA20 | SAFEXCEL_ALG_POLY1305,
+	.alg.aead = {
+		.setkey = safexcel_aead_chachapoly_setkey,
+		.setauthsize = safexcel_aead_chachapoly_setauthsize,
+		.encrypt = safexcel_aead_chachapoly_encrypt,
+		.decrypt = safexcel_aead_chachapoly_decrypt,
+		.ivsize = CHACHAPOLY_IV_SIZE,
+		.maxauthsize = POLY1305_DIGEST_SIZE,
+		.base = {
+			.cra_name = "rfc7539(chacha20,poly1305)",
+			.cra_driver_name = "safexcel-chacha20-poly1305",
+			/* +1 to put it above HW chacha + SW poly */
+			.cra_priority = SAFEXCEL_CRA_PRIORITY + 1,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY |
+				     CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_chachapoly_cra_init,
+			.cra_exit = safexcel_aead_fallback_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_chachapolyesp_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = safexcel_aead_chachapoly_cra_init(tfm);
+	ctx->aead  = EIP197_AEAD_TYPE_IPSEC_ESP;
+	ctx->aadskip = EIP197_AEAD_IPSEC_IV_SIZE;
+	return ret;
+}
+
+struct safexcel_alg_template safexcel_alg_chachapoly_esp = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_CHACHA20 | SAFEXCEL_ALG_POLY1305,
+	.alg.aead = {
+		.setkey = safexcel_aead_chachapoly_setkey,
+		.setauthsize = safexcel_aead_chachapoly_setauthsize,
+		.encrypt = safexcel_aead_chachapoly_encrypt,
+		.decrypt = safexcel_aead_chachapoly_decrypt,
+		.ivsize = CHACHAPOLY_IV_SIZE - EIP197_AEAD_IPSEC_NONCE_SIZE,
+		.maxauthsize = POLY1305_DIGEST_SIZE,
+		.base = {
+			.cra_name = "rfc7539esp(chacha20,poly1305)",
+			.cra_driver_name = "safexcel-chacha20-poly1305-esp",
+			/* +1 to put it above HW chacha + SW poly */
+			.cra_priority = SAFEXCEL_CRA_PRIORITY + 1,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY |
+				     CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_chachapolyesp_cra_init,
+			.cra_exit = safexcel_aead_fallback_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_skcipher_sm4_setkey(struct crypto_skcipher *ctfm,
+					const u8 *key, unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+
+	if (len != SM4_KEY_SIZE)
+		return -EINVAL;
+
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma)
+		if (memcmp(ctx->key, key, SM4_KEY_SIZE))
+			ctx->base.needs_inv = true;
+
+	memcpy(ctx->key, key, SM4_KEY_SIZE);
+	ctx->key_len = SM4_KEY_SIZE;
+
+	return 0;
+}
+
+static int safexcel_sm4_blk_encrypt(struct skcipher_request *req)
+{
+	/* Workaround for HW bug: EIP96 4.3 does not report blocksize error */
+	if (req->cryptlen & (SM4_BLOCK_SIZE - 1))
+		return -EINVAL;
+	else
+		return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
+					  SAFEXCEL_ENCRYPT);
+}
+
+static int safexcel_sm4_blk_decrypt(struct skcipher_request *req)
+{
+	/* Workaround for HW bug: EIP96 4.3 does not report blocksize error */
+	if (req->cryptlen & (SM4_BLOCK_SIZE - 1))
+		return -EINVAL;
+	else
+		return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
+					  SAFEXCEL_DECRYPT);
+}
+
+static int safexcel_skcipher_sm4_ecb_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_SM4;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_ECB;
+	ctx->blocksz = 0;
+	ctx->ivmask = EIP197_OPTION_2_TOKEN_IV_CMD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_ecb_sm4 = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.algo_mask = SAFEXCEL_ALG_SM4,
+	.alg.skcipher = {
+		.setkey = safexcel_skcipher_sm4_setkey,
+		.encrypt = safexcel_sm4_blk_encrypt,
+		.decrypt = safexcel_sm4_blk_decrypt,
+		.min_keysize = SM4_KEY_SIZE,
+		.max_keysize = SM4_KEY_SIZE,
+		.base = {
+			.cra_name = "ecb(sm4)",
+			.cra_driver_name = "safexcel-ecb-sm4",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = SM4_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_sm4_ecb_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_skcipher_sm4_cbc_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_SM4;
+	ctx->blocksz = SM4_BLOCK_SIZE;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CBC;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_cbc_sm4 = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.algo_mask = SAFEXCEL_ALG_SM4,
+	.alg.skcipher = {
+		.setkey = safexcel_skcipher_sm4_setkey,
+		.encrypt = safexcel_sm4_blk_encrypt,
+		.decrypt = safexcel_sm4_blk_decrypt,
+		.min_keysize = SM4_KEY_SIZE,
+		.max_keysize = SM4_KEY_SIZE,
+		.ivsize = SM4_BLOCK_SIZE,
+		.base = {
+			.cra_name = "cbc(sm4)",
+			.cra_driver_name = "safexcel-cbc-sm4",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = SM4_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_sm4_cbc_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_skcipher_sm4_ofb_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_SM4;
+	ctx->blocksz = SM4_BLOCK_SIZE;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_OFB;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_ofb_sm4 = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.algo_mask = SAFEXCEL_ALG_SM4 | SAFEXCEL_ALG_AES_XFB,
+	.alg.skcipher = {
+		.setkey = safexcel_skcipher_sm4_setkey,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
+		.min_keysize = SM4_KEY_SIZE,
+		.max_keysize = SM4_KEY_SIZE,
+		.ivsize = SM4_BLOCK_SIZE,
+		.base = {
+			.cra_name = "ofb(sm4)",
+			.cra_driver_name = "safexcel-ofb-sm4",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_sm4_ofb_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_skcipher_sm4_cfb_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_SM4;
+	ctx->blocksz = SM4_BLOCK_SIZE;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CFB;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_cfb_sm4 = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.algo_mask = SAFEXCEL_ALG_SM4 | SAFEXCEL_ALG_AES_XFB,
+	.alg.skcipher = {
+		.setkey = safexcel_skcipher_sm4_setkey,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
+		.min_keysize = SM4_KEY_SIZE,
+		.max_keysize = SM4_KEY_SIZE,
+		.ivsize = SM4_BLOCK_SIZE,
+		.base = {
+			.cra_name = "cfb(sm4)",
+			.cra_driver_name = "safexcel-cfb-sm4",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_sm4_cfb_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_skcipher_sm4ctr_setkey(struct crypto_skcipher *ctfm,
+					   const u8 *key, unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	/* last 4 bytes of key are the nonce! */
+	ctx->nonce = *(u32 *)(key + len - CTR_RFC3686_NONCE_SIZE);
+	/* exclude the nonce here */
+	len -= CTR_RFC3686_NONCE_SIZE;
+
+	return safexcel_skcipher_sm4_setkey(ctfm, key, len);
+}
+
+static int safexcel_skcipher_sm4_ctr_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_SM4;
+	ctx->blocksz = SM4_BLOCK_SIZE;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_ctr_sm4 = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.algo_mask = SAFEXCEL_ALG_SM4,
+	.alg.skcipher = {
+		.setkey = safexcel_skcipher_sm4ctr_setkey,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
+		/* Add nonce size */
+		.min_keysize = SM4_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+		.max_keysize = SM4_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+		.ivsize = CTR_RFC3686_IV_SIZE,
+		.base = {
+			.cra_name = "rfc3686(ctr(sm4))",
+			.cra_driver_name = "safexcel-ctr-sm4",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_sm4_ctr_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sm4_blk_encrypt(struct aead_request *req)
+{
+	/* Workaround for HW bug: EIP96 4.3 does not report blocksize error */
+	if (req->cryptlen & (SM4_BLOCK_SIZE - 1))
+		return -EINVAL;
+
+	return safexcel_queue_req(&req->base, aead_request_ctx(req),
+				  SAFEXCEL_ENCRYPT);
+}
+
+static int safexcel_aead_sm4_blk_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+
+	/* Workaround for HW bug: EIP96 4.3 does not report blocksize error */
+	if ((req->cryptlen - crypto_aead_authsize(tfm)) & (SM4_BLOCK_SIZE - 1))
+		return -EINVAL;
+
+	return safexcel_queue_req(&req->base, aead_request_ctx(req),
+				  SAFEXCEL_DECRYPT);
+}
+
+static int safexcel_aead_sm4cbc_sha1_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_cra_init(tfm);
+	ctx->alg = SAFEXCEL_SM4;
+	ctx->blocksz = SM4_BLOCK_SIZE;
+	ctx->hash_alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA1;
+	ctx->state_sz = SHA1_DIGEST_SIZE;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_sm4 = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_SM4 | SAFEXCEL_ALG_SHA1,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_sm4_blk_encrypt,
+		.decrypt = safexcel_aead_sm4_blk_decrypt,
+		.ivsize = SM4_BLOCK_SIZE,
+		.maxauthsize = SHA1_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha1),cbc(sm4))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha1-cbc-sm4",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = SM4_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sm4cbc_sha1_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_fallback_setkey(struct crypto_aead *ctfm,
+					 const u8 *key, unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	/* Keep fallback cipher synchronized */
+	return crypto_aead_setkey(ctx->fback, (u8 *)key, len) ?:
+	       safexcel_aead_setkey(ctfm, key, len);
+}
+
+static int safexcel_aead_fallback_setauthsize(struct crypto_aead *ctfm,
+					      unsigned int authsize)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	/* Keep fallback cipher synchronized */
+	return crypto_aead_setauthsize(ctx->fback, authsize);
+}
+
+static int safexcel_aead_fallback_crypt(struct aead_request *req,
+					enum safexcel_cipher_direction dir)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aead_request *subreq = aead_request_ctx(req);
+
+	aead_request_set_tfm(subreq, ctx->fback);
+	aead_request_set_callback(subreq, req->base.flags, req->base.complete,
+				  req->base.data);
+	aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+			       req->iv);
+	aead_request_set_ad(subreq, req->assoclen);
+
+	return (dir ==  SAFEXCEL_ENCRYPT) ?
+		crypto_aead_encrypt(subreq) :
+		crypto_aead_decrypt(subreq);
+}
+
+static int safexcel_aead_sm4cbc_sm3_encrypt(struct aead_request *req)
+{
+	struct safexcel_cipher_req *creq = aead_request_ctx(req);
+
+	/* Workaround for HW bug: EIP96 4.3 does not report blocksize error */
+	if (req->cryptlen & (SM4_BLOCK_SIZE - 1))
+		return -EINVAL;
+	else if (req->cryptlen || req->assoclen) /* If input length > 0 only */
+		return safexcel_queue_req(&req->base, creq, SAFEXCEL_ENCRYPT);
+
+	/* HW cannot do full (AAD+payload) zero length, use fallback */
+	return safexcel_aead_fallback_crypt(req, SAFEXCEL_ENCRYPT);
+}
+
+static int safexcel_aead_sm4cbc_sm3_decrypt(struct aead_request *req)
+{
+	struct safexcel_cipher_req *creq = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+
+	/* Workaround for HW bug: EIP96 4.3 does not report blocksize error */
+	if ((req->cryptlen - crypto_aead_authsize(tfm)) & (SM4_BLOCK_SIZE - 1))
+		return -EINVAL;
+	else if (req->cryptlen > crypto_aead_authsize(tfm) || req->assoclen)
+		/* If input length > 0 only */
+		return safexcel_queue_req(&req->base, creq, SAFEXCEL_DECRYPT);
+
+	/* HW cannot do full (AAD+payload) zero length, use fallback */
+	return safexcel_aead_fallback_crypt(req, SAFEXCEL_DECRYPT);
+}
+
+static int safexcel_aead_sm4cbc_sm3_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_fallback_cra_init(tfm);
+	ctx->alg = SAFEXCEL_SM4;
+	ctx->blocksz = SM4_BLOCK_SIZE;
+	ctx->hash_alg = CONTEXT_CONTROL_CRYPTO_ALG_SM3;
+	ctx->state_sz = SM3_DIGEST_SIZE;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sm3_cbc_sm4 = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_SM4 | SAFEXCEL_ALG_SM3,
+	.alg.aead = {
+		.setkey = safexcel_aead_fallback_setkey,
+		.setauthsize = safexcel_aead_fallback_setauthsize,
+		.encrypt = safexcel_aead_sm4cbc_sm3_encrypt,
+		.decrypt = safexcel_aead_sm4cbc_sm3_decrypt,
+		.ivsize = SM4_BLOCK_SIZE,
+		.maxauthsize = SM3_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sm3),cbc(sm4))",
+			.cra_driver_name = "safexcel-authenc-hmac-sm3-cbc-sm4",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY |
+				     CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize = SM4_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sm4cbc_sm3_cra_init,
+			.cra_exit = safexcel_aead_fallback_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sm4ctr_sha1_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sm4cbc_sha1_cra_init(tfm);
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_ctr_sm4 = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_SM4 | SAFEXCEL_ALG_SHA1,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = CTR_RFC3686_IV_SIZE,
+		.maxauthsize = SHA1_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha1),rfc3686(ctr(sm4)))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha1-ctr-sm4",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sm4ctr_sha1_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sm4ctr_sm3_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sm4cbc_sm3_cra_init(tfm);
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sm3_ctr_sm4 = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_SM4 | SAFEXCEL_ALG_SM3,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = CTR_RFC3686_IV_SIZE,
+		.maxauthsize = SM3_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sm3),rfc3686(ctr(sm4)))",
+			.cra_driver_name = "safexcel-authenc-hmac-sm3-ctr-sm4",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sm4ctr_sm3_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_rfc4106_gcm_setkey(struct crypto_aead *ctfm, const u8 *key,
+				       unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	/* last 4 bytes of key are the nonce! */
+	ctx->nonce = *(u32 *)(key + len - CTR_RFC3686_NONCE_SIZE);
+
+	len -= CTR_RFC3686_NONCE_SIZE;
+	return safexcel_aead_gcm_setkey(ctfm, key, len);
+}
+
+static int safexcel_rfc4106_gcm_setauthsize(struct crypto_aead *tfm,
+					    unsigned int authsize)
+{
+	return crypto_rfc4106_check_authsize(authsize);
+}
+
+static int safexcel_rfc4106_encrypt(struct aead_request *req)
+{
+	return crypto_ipsec_check_assoclen(req->assoclen) ?:
+	       safexcel_aead_encrypt(req);
+}
+
+static int safexcel_rfc4106_decrypt(struct aead_request *req)
+{
+	return crypto_ipsec_check_assoclen(req->assoclen) ?:
+	       safexcel_aead_decrypt(req);
+}
+
+static int safexcel_rfc4106_gcm_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = safexcel_aead_gcm_cra_init(tfm);
+	ctx->aead  = EIP197_AEAD_TYPE_IPSEC_ESP;
+	ctx->aadskip = EIP197_AEAD_IPSEC_IV_SIZE;
+	return ret;
+}
+
+struct safexcel_alg_template safexcel_alg_rfc4106_gcm = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_GHASH,
+	.alg.aead = {
+		.setkey = safexcel_rfc4106_gcm_setkey,
+		.setauthsize = safexcel_rfc4106_gcm_setauthsize,
+		.encrypt = safexcel_rfc4106_encrypt,
+		.decrypt = safexcel_rfc4106_decrypt,
+		.ivsize = GCM_RFC4106_IV_SIZE,
+		.maxauthsize = GHASH_DIGEST_SIZE,
+		.base = {
+			.cra_name = "rfc4106(gcm(aes))",
+			.cra_driver_name = "safexcel-rfc4106-gcm-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_rfc4106_gcm_cra_init,
+			.cra_exit = safexcel_aead_gcm_cra_exit,
+		},
+	},
+};
+
+static int safexcel_rfc4543_gcm_setauthsize(struct crypto_aead *tfm,
+					    unsigned int authsize)
+{
+	if (authsize != GHASH_DIGEST_SIZE)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int safexcel_rfc4543_gcm_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = safexcel_aead_gcm_cra_init(tfm);
+	ctx->aead  = EIP197_AEAD_TYPE_IPSEC_ESP_GMAC;
+	return ret;
+}
+
+struct safexcel_alg_template safexcel_alg_rfc4543_gcm = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_GHASH,
+	.alg.aead = {
+		.setkey = safexcel_rfc4106_gcm_setkey,
+		.setauthsize = safexcel_rfc4543_gcm_setauthsize,
+		.encrypt = safexcel_rfc4106_encrypt,
+		.decrypt = safexcel_rfc4106_decrypt,
+		.ivsize = GCM_RFC4543_IV_SIZE,
+		.maxauthsize = GHASH_DIGEST_SIZE,
+		.base = {
+			.cra_name = "rfc4543(gcm(aes))",
+			.cra_driver_name = "safexcel-rfc4543-gcm-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_rfc4543_gcm_cra_init,
+			.cra_exit = safexcel_aead_gcm_cra_exit,
+		},
+	},
+};
+
+static int safexcel_rfc4309_ccm_setkey(struct crypto_aead *ctfm, const u8 *key,
+				       unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	/* First byte of the nonce = L = always 3 for RFC4309 (4 byte ctr) */
+	*(u8 *)&ctx->nonce = EIP197_AEAD_IPSEC_COUNTER_SIZE - 1;
+	/* last 3 bytes of key are the nonce! */
+	memcpy((u8 *)&ctx->nonce + 1, key + len -
+	       EIP197_AEAD_IPSEC_CCM_NONCE_SIZE,
+	       EIP197_AEAD_IPSEC_CCM_NONCE_SIZE);
+
+	len -= EIP197_AEAD_IPSEC_CCM_NONCE_SIZE;
+	return safexcel_aead_ccm_setkey(ctfm, key, len);
+}
+
+static int safexcel_rfc4309_ccm_setauthsize(struct crypto_aead *tfm,
+					    unsigned int authsize)
+{
+	/* Borrowed from crypto/ccm.c */
+	switch (authsize) {
+	case 8:
+	case 12:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int safexcel_rfc4309_ccm_encrypt(struct aead_request *req)
+{
+	struct safexcel_cipher_req *creq = aead_request_ctx(req);
+
+	/* Borrowed from crypto/ccm.c */
+	if (req->assoclen != 16 && req->assoclen != 20)
+		return -EINVAL;
+
+	return safexcel_queue_req(&req->base, creq, SAFEXCEL_ENCRYPT);
+}
+
+static int safexcel_rfc4309_ccm_decrypt(struct aead_request *req)
+{
+	struct safexcel_cipher_req *creq = aead_request_ctx(req);
+
+	/* Borrowed from crypto/ccm.c */
+	if (req->assoclen != 16 && req->assoclen != 20)
+		return -EINVAL;
+
+	return safexcel_queue_req(&req->base, creq, SAFEXCEL_DECRYPT);
+}
+
+static int safexcel_rfc4309_ccm_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = safexcel_aead_ccm_cra_init(tfm);
+	ctx->aead  = EIP197_AEAD_TYPE_IPSEC_ESP;
+	ctx->aadskip = EIP197_AEAD_IPSEC_IV_SIZE;
+	return ret;
+}
+
+struct safexcel_alg_template safexcel_alg_rfc4309_ccm = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_CBC_MAC_ALL,
+	.alg.aead = {
+		.setkey = safexcel_rfc4309_ccm_setkey,
+		.setauthsize = safexcel_rfc4309_ccm_setauthsize,
+		.encrypt = safexcel_rfc4309_ccm_encrypt,
+		.decrypt = safexcel_rfc4309_ccm_decrypt,
+		.ivsize = EIP197_AEAD_IPSEC_IV_SIZE,
+		.maxauthsize = AES_BLOCK_SIZE,
+		.base = {
+			.cra_name = "rfc4309(ccm(aes))",
+			.cra_driver_name = "safexcel-rfc4309-ccm-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_rfc4309_ccm_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 2effb6d21e8b..43962bc709c6 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -5,9 +5,13 @@
  * Antoine Tenart <antoine.tenart@free-electrons.com>
  */
 
+#include <crypto/aes.h>
 #include <crypto/hmac.h>
 #include <crypto/md5.h>
 #include <crypto/sha.h>
+#include <crypto/sha3.h>
+#include <crypto/skcipher.h>
+#include <crypto/sm3.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmapool.h>
@@ -19,9 +23,19 @@ struct safexcel_ahash_ctx {
 	struct safexcel_crypto_priv *priv;
 
 	u32 alg;
-
-	u32 ipad[SHA512_DIGEST_SIZE / sizeof(u32)];
-	u32 opad[SHA512_DIGEST_SIZE / sizeof(u32)];
+	u8  key_sz;
+	bool cbcmac;
+	bool do_fallback;
+	bool fb_init_done;
+	bool fb_do_setkey;
+
+	__le32 ipad[SHA3_512_BLOCK_SIZE / sizeof(__le32)];
+	__le32 opad[SHA3_512_BLOCK_SIZE / sizeof(__le32)];
+
+	struct crypto_cipher *kaes;
+	struct crypto_ahash *fback;
+	struct crypto_shash *shpre;
+	struct shash_desc *shdesc;
 };
 
 struct safexcel_ahash_req {
@@ -31,6 +45,8 @@ struct safexcel_ahash_req {
 	bool needs_inv;
 	bool hmac_zlen;
 	bool len_is_le;
+	bool not_first;
+	bool xcbcmac;
 
 	int nents;
 	dma_addr_t result_dma;
@@ -39,7 +55,9 @@ struct safexcel_ahash_req {
 
 	u8 state_sz;    /* expected state size, only set once */
 	u8 block_sz;    /* block size, only set once */
-	u32 state[SHA512_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32));
+	u8 digest_sz;   /* output digest size, only set once */
+	__le32 state[SHA3_512_BLOCK_SIZE /
+		     sizeof(__le32)] __aligned(sizeof(__le32));
 
 	u64 len;
 	u64 processed;
@@ -57,22 +75,36 @@ static inline u64 safexcel_queued_len(struct safexcel_ahash_req *req)
 }
 
 static void safexcel_hash_token(struct safexcel_command_desc *cdesc,
-				u32 input_length, u32 result_length)
+				u32 input_length, u32 result_length,
+				bool cbcmac)
 {
 	struct safexcel_token *token =
 		(struct safexcel_token *)cdesc->control_data.token;
 
 	token[0].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
 	token[0].packet_length = input_length;
-	token[0].stat = EIP197_TOKEN_STAT_LAST_HASH;
 	token[0].instructions = EIP197_TOKEN_INS_TYPE_HASH;
 
-	token[1].opcode = EIP197_TOKEN_OPCODE_INSERT;
-	token[1].packet_length = result_length;
-	token[1].stat = EIP197_TOKEN_STAT_LAST_HASH |
+	input_length &= 15;
+	if (unlikely(cbcmac && input_length)) {
+		token[0].stat =  0;
+		token[1].opcode = EIP197_TOKEN_OPCODE_INSERT;
+		token[1].packet_length = 16 - input_length;
+		token[1].stat = EIP197_TOKEN_STAT_LAST_HASH;
+		token[1].instructions = EIP197_TOKEN_INS_TYPE_HASH;
+	} else {
+		token[0].stat = EIP197_TOKEN_STAT_LAST_HASH;
+		eip197_noop_token(&token[1]);
+	}
+
+	token[2].opcode = EIP197_TOKEN_OPCODE_INSERT;
+	token[2].stat = EIP197_TOKEN_STAT_LAST_HASH |
 			EIP197_TOKEN_STAT_LAST_PACKET;
-	token[1].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
+	token[2].packet_length = result_length;
+	token[2].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
 				EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
+
+	eip197_noop_token(&token[3]);
 }
 
 static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
@@ -82,29 +114,49 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	u64 count = 0;
 
-	cdesc->control_data.control0 |= ctx->alg;
+	cdesc->control_data.control0 = ctx->alg;
+	cdesc->control_data.control1 = 0;
 
 	/*
 	 * Copy the input digest if needed, and setup the context
 	 * fields. Do this now as we need it to setup the first command
 	 * descriptor.
 	 */
-	if (!req->processed) {
-		/* First - and possibly only - block of basic hash only */
-		if (req->finish) {
+	if (unlikely(req->digest == CONTEXT_CONTROL_DIGEST_XCM)) {
+		if (req->xcbcmac)
+			memcpy(ctx->base.ctxr->data, ctx->ipad, ctx->key_sz);
+		else
+			memcpy(ctx->base.ctxr->data, req->state, req->state_sz);
+
+		if (!req->finish && req->xcbcmac)
+			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_DIGEST_XCM |
+				CONTEXT_CONTROL_TYPE_HASH_OUT  |
+				CONTEXT_CONTROL_NO_FINISH_HASH |
+				CONTEXT_CONTROL_SIZE(req->state_sz /
+						     sizeof(u32));
+		else
 			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_DIGEST_XCM |
+				CONTEXT_CONTROL_TYPE_HASH_OUT  |
+				CONTEXT_CONTROL_SIZE(req->state_sz /
+						     sizeof(u32));
+		return;
+	} else if (!req->processed) {
+		/* First - and possibly only - block of basic hash only */
+		if (req->finish)
+			cdesc->control_data.control0 |= req->digest |
 				CONTEXT_CONTROL_TYPE_HASH_OUT |
 				CONTEXT_CONTROL_RESTART_HASH  |
 				/* ensure its not 0! */
 				CONTEXT_CONTROL_SIZE(1);
-		} else {
-			cdesc->control_data.control0 |=
+		else
+			cdesc->control_data.control0 |= req->digest |
 				CONTEXT_CONTROL_TYPE_HASH_OUT  |
 				CONTEXT_CONTROL_RESTART_HASH   |
 				CONTEXT_CONTROL_NO_FINISH_HASH |
 				/* ensure its not 0! */
 				CONTEXT_CONTROL_SIZE(1);
-		}
 		return;
 	}
 
@@ -204,7 +256,7 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv,
 	}
 
 	if (sreq->result_dma) {
-		dma_unmap_single(priv->dev, sreq->result_dma, sreq->state_sz,
+		dma_unmap_single(priv->dev, sreq->result_dma, sreq->digest_sz,
 				 DMA_FROM_DEVICE);
 		sreq->result_dma = 0;
 	}
@@ -223,14 +275,15 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv,
 			memcpy(sreq->cache, sreq->state,
 			       crypto_ahash_digestsize(ahash));
 
-			memcpy(sreq->state, ctx->opad, sreq->state_sz);
+			memcpy(sreq->state, ctx->opad, sreq->digest_sz);
 
 			sreq->len = sreq->block_sz +
 				    crypto_ahash_digestsize(ahash);
 			sreq->processed = sreq->block_sz;
 			sreq->hmac = 0;
 
-			ctx->base.needs_inv = true;
+			if (priv->flags & EIP197_TRC_CACHE)
+				ctx->base.needs_inv = true;
 			areq->nbytes = 0;
 			safexcel_ahash_enqueue(areq);
 
@@ -238,8 +291,14 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv,
 			return 1;
 		}
 
-		memcpy(areq->result, sreq->state,
-		       crypto_ahash_digestsize(ahash));
+		if (unlikely(sreq->digest == CONTEXT_CONTROL_DIGEST_XCM &&
+			     ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_CRC32)) {
+			/* Undo final XOR with 0xffffffff ...*/
+			*(__le32 *)areq->result = ~sreq->state[0];
+		} else {
+			memcpy(areq->result, sreq->state,
+			       crypto_ahash_digestsize(ahash));
+		}
 	}
 
 	cache_len = safexcel_queued_len(sreq);
@@ -261,10 +320,11 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 	struct safexcel_command_desc *cdesc, *first_cdesc = NULL;
 	struct safexcel_result_desc *rdesc;
 	struct scatterlist *sg;
-	int i, extra = 0, n_cdesc = 0, ret = 0;
-	u64 queued, len, cache_len;
+	struct safexcel_token *dmmy;
+	int i, extra = 0, n_cdesc = 0, ret = 0, cache_len, skip = 0;
+	u64 queued, len;
 
-	queued = len = safexcel_queued_len(req);
+	queued = safexcel_queued_len(req);
 	if (queued <= HASH_CACHE_SIZE)
 		cache_len = queued;
 	else
@@ -287,15 +347,52 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 				   areq->nbytes - extra);
 
 		queued -= extra;
-		len -= extra;
 
 		if (!queued) {
 			*commands = 0;
 			*results = 0;
 			return 0;
 		}
+
+		extra = 0;
+	}
+
+	if (unlikely(req->xcbcmac && req->processed > AES_BLOCK_SIZE)) {
+		if (unlikely(cache_len < AES_BLOCK_SIZE)) {
+			/*
+			 * Cache contains less than 1 full block, complete.
+			 */
+			extra = AES_BLOCK_SIZE - cache_len;
+			if (queued > cache_len) {
+				/* More data follows: borrow bytes */
+				u64 tmp = queued - cache_len;
+
+				skip = min_t(u64, tmp, extra);
+				sg_pcopy_to_buffer(areq->src,
+					sg_nents(areq->src),
+					req->cache + cache_len,
+					skip, 0);
+			}
+			extra -= skip;
+			memset(req->cache + cache_len + skip, 0, extra);
+			if (!ctx->cbcmac && extra) {
+				// 10- padding for XCBCMAC & CMAC
+				req->cache[cache_len + skip] = 0x80;
+				// HW will use K2 iso K3 - compensate!
+				for (i = 0; i < AES_BLOCK_SIZE / sizeof(u32); i++)
+					((__be32 *)req->cache)[i] ^=
+					  cpu_to_be32(le32_to_cpu(
+					    ctx->ipad[i] ^ ctx->ipad[i + 4]));
+			}
+			cache_len = AES_BLOCK_SIZE;
+			queued = queued + extra;
+		}
+
+		/* XCBC continue: XOR previous result into 1st word */
+		crypto_xor(req->cache, (const u8 *)req->state, AES_BLOCK_SIZE);
 	}
 
+	len = queued;
 	/* Add a command descriptor for the cached data, if any */
 	if (cache_len) {
 		req->cache_dma = dma_map_single(priv->dev, req->cache,
@@ -306,8 +403,9 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 		req->cache_sz = cache_len;
 		first_cdesc = safexcel_add_cdesc(priv, ring, 1,
 						 (cache_len == len),
-						 req->cache_dma, cache_len, len,
-						 ctx->base.ctxr_dma);
+						 req->cache_dma, cache_len,
+						 len, ctx->base.ctxr_dma,
+						 &dmmy);
 		if (IS_ERR(first_cdesc)) {
 			ret = PTR_ERR(first_cdesc);
 			goto unmap_cache;
@@ -319,10 +417,6 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 			goto send_command;
 	}
 
-	/* Skip descriptor generation for zero-length requests */
-	if (!areq->nbytes)
-		goto send_command;
-
 	/* Now handle the current ahash request buffer(s) */
 	req->nents = dma_map_sg(priv->dev, areq->src,
 				sg_nents_for_len(areq->src,
@@ -336,26 +430,34 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 	for_each_sg(areq->src, sg, req->nents, i) {
 		int sglen = sg_dma_len(sg);
 
+		if (unlikely(sglen <= skip)) {
+			skip -= sglen;
+			continue;
+		}
+
 		/* Do not overflow the request */
-		if (queued < sglen)
+		if ((queued + skip) <= sglen)
 			sglen = queued;
+		else
+			sglen -= skip;
 
 		cdesc = safexcel_add_cdesc(priv, ring, !n_cdesc,
 					   !(queued - sglen),
-					   sg_dma_address(sg),
-					   sglen, len, ctx->base.ctxr_dma);
+					   sg_dma_address(sg) + skip, sglen,
+					   len, ctx->base.ctxr_dma, &dmmy);
 		if (IS_ERR(cdesc)) {
 			ret = PTR_ERR(cdesc);
 			goto unmap_sg;
 		}
-		n_cdesc++;
 
-		if (n_cdesc == 1)
+		if (!n_cdesc)
 			first_cdesc = cdesc;
+		n_cdesc++;
 
 		queued -= sglen;
 		if (!queued)
 			break;
+		skip = 0;
 	}
 
 send_command:
@@ -363,9 +465,9 @@ send_command:
 	safexcel_context_control(ctx, req, first_cdesc);
 
 	/* Add the token */
-	safexcel_hash_token(first_cdesc, len, req->state_sz);
+	safexcel_hash_token(first_cdesc, len, req->digest_sz, ctx->cbcmac);
 
-	req->result_dma = dma_map_single(priv->dev, req->state, req->state_sz,
+	req->result_dma = dma_map_single(priv->dev, req->state, req->digest_sz,
 					 DMA_FROM_DEVICE);
 	if (dma_mapping_error(priv->dev, req->result_dma)) {
 		ret = -EINVAL;
@@ -374,7 +476,7 @@ send_command:
 
 	/* Add a result descriptor */
 	rdesc = safexcel_add_rdesc(priv, ring, 1, 1, req->result_dma,
-				   req->state_sz);
+				   req->digest_sz);
 	if (IS_ERR(rdesc)) {
 		ret = PTR_ERR(rdesc);
 		goto unmap_result;
@@ -382,17 +484,20 @@ send_command:
 
 	safexcel_rdr_req_set(priv, ring, rdesc, &areq->base);
 
-	req->processed += len;
+	req->processed += len - extra;
 
 	*commands = n_cdesc;
 	*results = 1;
 	return 0;
 
 unmap_result:
-	dma_unmap_single(priv->dev, req->result_dma, req->state_sz,
+	dma_unmap_single(priv->dev, req->result_dma, req->digest_sz,
 			 DMA_FROM_DEVICE);
 unmap_sg:
-	dma_unmap_sg(priv->dev, areq->src, req->nents, DMA_TO_DEVICE);
+	if (req->nents) {
+		dma_unmap_sg(priv->dev, areq->src, req->nents, DMA_TO_DEVICE);
+		req->nents = 0;
+	}
 cdesc_rollback:
 	for (i = 0; i < n_cdesc; i++)
 		safexcel_ring_rollback_wptr(priv, &priv->ring[ring].cdr);
@@ -590,16 +695,12 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq)
 
 	if (ctx->base.ctxr) {
 		if (priv->flags & EIP197_TRC_CACHE && !ctx->base.needs_inv &&
-		    req->processed &&
-		    (/* invalidate for basic hash continuation finish */
-		     (req->finish &&
-		      (req->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)) ||
+		     /* invalidate for *any* non-XCBC continuation */
+		   ((req->not_first && !req->xcbcmac) ||
 		     /* invalidate if (i)digest changed */
 		     memcmp(ctx->base.ctxr->data, req->state, req->state_sz) ||
-		     /* invalidate for HMAC continuation finish */
-		     (req->finish && (req->processed != req->block_sz)) ||
 		     /* invalidate for HMAC finish with odigest changed */
-		     (req->finish &&
+		     (req->finish && req->hmac &&
 		      memcmp(ctx->base.ctxr->data + (req->state_sz>>2),
 			     ctx->opad, req->state_sz))))
 			/*
@@ -622,6 +723,7 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq)
 		if (!ctx->base.ctxr)
 			return -ENOMEM;
 	}
+	req->not_first = true;
 
 	ring = ctx->base.ring;
 
@@ -691,8 +793,34 @@ static int safexcel_ahash_final(struct ahash_request *areq)
 		else if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA512)
 			memcpy(areq->result, sha512_zero_message_hash,
 			       SHA512_DIGEST_SIZE);
+		else if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SM3) {
+			memcpy(areq->result,
+			       EIP197_SM3_ZEROM_HASH, SM3_DIGEST_SIZE);
+		}
 
 		return 0;
+	} else if (unlikely(req->digest == CONTEXT_CONTROL_DIGEST_XCM &&
+			    ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_MD5 &&
+			    req->len == sizeof(u32) && !areq->nbytes)) {
+		/* Zero length CRC32 */
+		memcpy(areq->result, ctx->ipad, sizeof(u32));
+		return 0;
+	} else if (unlikely(ctx->cbcmac && req->len == AES_BLOCK_SIZE &&
+			    !areq->nbytes)) {
+		/* Zero length CBC MAC */
+		memset(areq->result, 0, AES_BLOCK_SIZE);
+		return 0;
+	} else if (unlikely(req->xcbcmac && req->len == AES_BLOCK_SIZE &&
+			    !areq->nbytes)) {
+		/* Zero length (X)CBC/CMAC */
+		int i;
+
+		for (i = 0; i < AES_BLOCK_SIZE / sizeof(u32); i++)
+			((__be32 *)areq->result)[i] =
+				cpu_to_be32(le32_to_cpu(ctx->ipad[i + 4]));//K3
+		areq->result[0] ^= 0x80;			// 10- padding
+		crypto_cipher_encrypt_one(ctx->kaes, areq->result, areq->result);
+		return 0;
 	} else if (unlikely(req->hmac &&
 			    (req->len == req->block_sz) &&
 			    !areq->nbytes)) {
@@ -792,6 +920,7 @@ static int safexcel_ahash_cra_init(struct crypto_tfm *tfm)
 	ctx->priv = tmpl->priv;
 	ctx->base.send = safexcel_ahash_send;
 	ctx->base.handle_result = safexcel_handle_result;
+	ctx->fb_do_setkey = false;
 
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct safexcel_ahash_req));
@@ -808,6 +937,7 @@ static int safexcel_sha1_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA1;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA1_DIGEST_SIZE;
+	req->digest_sz = SHA1_DIGEST_SIZE;
 	req->block_sz = SHA1_BLOCK_SIZE;
 
 	return 0;
@@ -889,6 +1019,7 @@ static int safexcel_hmac_sha1_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA1;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA1_DIGEST_SIZE;
+	req->digest_sz = SHA1_DIGEST_SIZE;
 	req->block_sz = SHA1_BLOCK_SIZE;
 	req->hmac = true;
 
@@ -1125,6 +1256,7 @@ static int safexcel_sha256_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA256;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA256_DIGEST_SIZE;
+	req->digest_sz = SHA256_DIGEST_SIZE;
 	req->block_sz = SHA256_BLOCK_SIZE;
 
 	return 0;
@@ -1180,6 +1312,7 @@ static int safexcel_sha224_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA224;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA256_DIGEST_SIZE;
+	req->digest_sz = SHA256_DIGEST_SIZE;
 	req->block_sz = SHA256_BLOCK_SIZE;
 
 	return 0;
@@ -1248,6 +1381,7 @@ static int safexcel_hmac_sha224_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA224;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA256_DIGEST_SIZE;
+	req->digest_sz = SHA256_DIGEST_SIZE;
 	req->block_sz = SHA256_BLOCK_SIZE;
 	req->hmac = true;
 
@@ -1318,6 +1452,7 @@ static int safexcel_hmac_sha256_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA256;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA256_DIGEST_SIZE;
+	req->digest_sz = SHA256_DIGEST_SIZE;
 	req->block_sz = SHA256_BLOCK_SIZE;
 	req->hmac = true;
 
@@ -1375,6 +1510,7 @@ static int safexcel_sha512_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA512;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA512_DIGEST_SIZE;
+	req->digest_sz = SHA512_DIGEST_SIZE;
 	req->block_sz = SHA512_BLOCK_SIZE;
 
 	return 0;
@@ -1430,6 +1566,7 @@ static int safexcel_sha384_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA384;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA512_DIGEST_SIZE;
+	req->digest_sz = SHA512_DIGEST_SIZE;
 	req->block_sz = SHA512_BLOCK_SIZE;
 
 	return 0;
@@ -1498,6 +1635,7 @@ static int safexcel_hmac_sha512_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA512;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA512_DIGEST_SIZE;
+	req->digest_sz = SHA512_DIGEST_SIZE;
 	req->block_sz = SHA512_BLOCK_SIZE;
 	req->hmac = true;
 
@@ -1568,6 +1706,7 @@ static int safexcel_hmac_sha384_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA384;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA512_DIGEST_SIZE;
+	req->digest_sz = SHA512_DIGEST_SIZE;
 	req->block_sz = SHA512_BLOCK_SIZE;
 	req->hmac = true;
 
@@ -1625,6 +1764,7 @@ static int safexcel_md5_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_MD5;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = MD5_DIGEST_SIZE;
+	req->digest_sz = MD5_DIGEST_SIZE;
 	req->block_sz = MD5_HMAC_BLOCK_SIZE;
 
 	return 0;
@@ -1686,6 +1826,7 @@ static int safexcel_hmac_md5_init(struct ahash_request *areq)
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_MD5;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = MD5_DIGEST_SIZE;
+	req->digest_sz = MD5_DIGEST_SIZE;
 	req->block_sz = MD5_HMAC_BLOCK_SIZE;
 	req->len_is_le = true; /* MD5 is little endian! ... */
 	req->hmac = true;
@@ -1740,3 +1881,1233 @@ struct safexcel_alg_template safexcel_alg_hmac_md5 = {
 		},
 	},
 };
+
+static int safexcel_crc32_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret = safexcel_ahash_cra_init(tfm);
+
+	/* Default 'key' is all zeroes */
+	memset(ctx->ipad, 0, sizeof(u32));
+	return ret;
+}
+
+static int safexcel_crc32_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	/* Start from loaded key */
+	req->state[0]	= (__force __le32)le32_to_cpu(~ctx->ipad[0]);
+	/* Set processed to non-zero to enable invalidation detection */
+	req->len	= sizeof(u32);
+	req->processed	= sizeof(u32);
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_CRC32;
+	req->digest = CONTEXT_CONTROL_DIGEST_XCM;
+	req->state_sz = sizeof(u32);
+	req->digest_sz = sizeof(u32);
+	req->block_sz = sizeof(u32);
+
+	return 0;
+}
+
+static int safexcel_crc32_setkey(struct crypto_ahash *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+
+	if (keylen != sizeof(u32))
+		return -EINVAL;
+
+	memcpy(ctx->ipad, key, sizeof(u32));
+	return 0;
+}
+
+static int safexcel_crc32_digest(struct ahash_request *areq)
+{
+	return safexcel_crc32_init(areq) ?: safexcel_ahash_finup(areq);
+}
+
+struct safexcel_alg_template safexcel_alg_crc32 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = 0,
+	.alg.ahash = {
+		.init = safexcel_crc32_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_crc32_digest,
+		.setkey = safexcel_crc32_setkey,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = sizeof(u32),
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "crc32",
+				.cra_driver_name = "safexcel-crc32",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_OPTIONAL_KEY |
+					     CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = 1,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_crc32_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_cbcmac_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	/* Start from loaded keys */
+	memcpy(req->state, ctx->ipad, ctx->key_sz);
+	/* Set processed to non-zero to enable invalidation detection */
+	req->len	= AES_BLOCK_SIZE;
+	req->processed	= AES_BLOCK_SIZE;
+
+	req->digest   = CONTEXT_CONTROL_DIGEST_XCM;
+	req->state_sz = ctx->key_sz;
+	req->digest_sz = AES_BLOCK_SIZE;
+	req->block_sz = AES_BLOCK_SIZE;
+	req->xcbcmac  = true;
+
+	return 0;
+}
+
+static int safexcel_cbcmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+				 unsigned int len)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct crypto_aes_ctx aes;
+	int ret, i;
+
+	ret = aes_expandkey(&aes, key, len);
+	if (ret)
+		return ret;
+
+	memset(ctx->ipad, 0, 2 * AES_BLOCK_SIZE);
+	for (i = 0; i < len / sizeof(u32); i++)
+		ctx->ipad[i + 8] = (__force __le32)cpu_to_be32(aes.key_enc[i]);
+
+	if (len == AES_KEYSIZE_192) {
+		ctx->alg    = CONTEXT_CONTROL_CRYPTO_ALG_XCBC192;
+		ctx->key_sz = AES_MAX_KEY_SIZE + 2 * AES_BLOCK_SIZE;
+	} else if (len == AES_KEYSIZE_256) {
+		ctx->alg    = CONTEXT_CONTROL_CRYPTO_ALG_XCBC256;
+		ctx->key_sz = AES_MAX_KEY_SIZE + 2 * AES_BLOCK_SIZE;
+	} else {
+		ctx->alg    = CONTEXT_CONTROL_CRYPTO_ALG_XCBC128;
+		ctx->key_sz = AES_MIN_KEY_SIZE + 2 * AES_BLOCK_SIZE;
+	}
+	ctx->cbcmac  = true;
+
+	memzero_explicit(&aes, sizeof(aes));
+	return 0;
+}
+
+static int safexcel_cbcmac_digest(struct ahash_request *areq)
+{
+	return safexcel_cbcmac_init(areq) ?: safexcel_ahash_finup(areq);
+}
+
+struct safexcel_alg_template safexcel_alg_cbcmac = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = 0,
+	.alg.ahash = {
+		.init = safexcel_cbcmac_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_cbcmac_digest,
+		.setkey = safexcel_cbcmac_setkey,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = AES_BLOCK_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "cbcmac(aes)",
+				.cra_driver_name = "safexcel-cbcmac-aes",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = 1,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_xcbcmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+				 unsigned int len)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct crypto_aes_ctx aes;
+	u32 key_tmp[3 * AES_BLOCK_SIZE / sizeof(u32)];
+	int ret, i;
+
+	ret = aes_expandkey(&aes, key, len);
+	if (ret)
+		return ret;
+
+	/* precompute the XCBC key material */
+	crypto_cipher_clear_flags(ctx->kaes, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(ctx->kaes, crypto_ahash_get_flags(tfm) &
+				CRYPTO_TFM_REQ_MASK);
+	ret = crypto_cipher_setkey(ctx->kaes, key, len);
+	if (ret)
+		return ret;
+
+	crypto_cipher_encrypt_one(ctx->kaes, (u8 *)key_tmp + 2 * AES_BLOCK_SIZE,
+		"\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1");
+	crypto_cipher_encrypt_one(ctx->kaes, (u8 *)key_tmp,
+		"\x2\x2\x2\x2\x2\x2\x2\x2\x2\x2\x2\x2\x2\x2\x2\x2");
+	crypto_cipher_encrypt_one(ctx->kaes, (u8 *)key_tmp + AES_BLOCK_SIZE,
+		"\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3");
+	for (i = 0; i < 3 * AES_BLOCK_SIZE / sizeof(u32); i++)
+		ctx->ipad[i] =
+			cpu_to_le32((__force u32)cpu_to_be32(key_tmp[i]));
+
+	crypto_cipher_clear_flags(ctx->kaes, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(ctx->kaes, crypto_ahash_get_flags(tfm) &
+				CRYPTO_TFM_REQ_MASK);
+	ret = crypto_cipher_setkey(ctx->kaes,
+				   (u8 *)key_tmp + 2 * AES_BLOCK_SIZE,
+				   AES_MIN_KEY_SIZE);
+	if (ret)
+		return ret;
+
+	ctx->alg    = CONTEXT_CONTROL_CRYPTO_ALG_XCBC128;
+	ctx->key_sz = AES_MIN_KEY_SIZE + 2 * AES_BLOCK_SIZE;
+	ctx->cbcmac = false;
+
+	memzero_explicit(&aes, sizeof(aes));
+	return 0;
+}
+
+static int safexcel_xcbcmac_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_ahash_cra_init(tfm);
+	ctx->kaes = crypto_alloc_cipher("aes", 0, 0);
+	return PTR_ERR_OR_ZERO(ctx->kaes);
+}
+
+static void safexcel_xcbcmac_cra_exit(struct crypto_tfm *tfm)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_cipher(ctx->kaes);
+	safexcel_ahash_cra_exit(tfm);
+}
+
+struct safexcel_alg_template safexcel_alg_xcbcmac = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = 0,
+	.alg.ahash = {
+		.init = safexcel_cbcmac_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_cbcmac_digest,
+		.setkey = safexcel_xcbcmac_setkey,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = AES_BLOCK_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "xcbc(aes)",
+				.cra_driver_name = "safexcel-xcbc-aes",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = AES_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_xcbcmac_cra_init,
+				.cra_exit = safexcel_xcbcmac_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+				unsigned int len)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct crypto_aes_ctx aes;
+	__be64 consts[4];
+	u64 _const[2];
+	u8 msb_mask, gfmask;
+	int ret, i;
+
+	ret = aes_expandkey(&aes, key, len);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < len / sizeof(u32); i++)
+		ctx->ipad[i + 8] =
+			cpu_to_le32((__force u32)cpu_to_be32(aes.key_enc[i]));
+
+	/* precompute the CMAC key material */
+	crypto_cipher_clear_flags(ctx->kaes, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(ctx->kaes, crypto_ahash_get_flags(tfm) &
+				CRYPTO_TFM_REQ_MASK);
+	ret = crypto_cipher_setkey(ctx->kaes, key, len);
+	if (ret)
+		return ret;
+
+	/* code below borrowed from crypto/cmac.c */
+	/* encrypt the zero block */
+	memset(consts, 0, AES_BLOCK_SIZE);
+	crypto_cipher_encrypt_one(ctx->kaes, (u8 *)consts, (u8 *)consts);
+
+	gfmask = 0x87;
+	_const[0] = be64_to_cpu(consts[1]);
+	_const[1] = be64_to_cpu(consts[0]);
+
+	/* gf(2^128) multiply zero-ciphertext with u and u^2 */
+	for (i = 0; i < 4; i += 2) {
+		msb_mask = ((s64)_const[1] >> 63) & gfmask;
+		_const[1] = (_const[1] << 1) | (_const[0] >> 63);
+		_const[0] = (_const[0] << 1) ^ msb_mask;
+
+		consts[i + 0] = cpu_to_be64(_const[1]);
+		consts[i + 1] = cpu_to_be64(_const[0]);
+	}
+	/* end of code borrowed from crypto/cmac.c */
+
+	for (i = 0; i < 2 * AES_BLOCK_SIZE / sizeof(u32); i++)
+		ctx->ipad[i] = (__force __le32)cpu_to_be32(((u32 *)consts)[i]);
+
+	if (len == AES_KEYSIZE_192) {
+		ctx->alg    = CONTEXT_CONTROL_CRYPTO_ALG_XCBC192;
+		ctx->key_sz = AES_MAX_KEY_SIZE + 2 * AES_BLOCK_SIZE;
+	} else if (len == AES_KEYSIZE_256) {
+		ctx->alg    = CONTEXT_CONTROL_CRYPTO_ALG_XCBC256;
+		ctx->key_sz = AES_MAX_KEY_SIZE + 2 * AES_BLOCK_SIZE;
+	} else {
+		ctx->alg    = CONTEXT_CONTROL_CRYPTO_ALG_XCBC128;
+		ctx->key_sz = AES_MIN_KEY_SIZE + 2 * AES_BLOCK_SIZE;
+	}
+	ctx->cbcmac = false;
+
+	memzero_explicit(&aes, sizeof(aes));
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_cmac = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = 0,
+	.alg.ahash = {
+		.init = safexcel_cbcmac_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_cbcmac_digest,
+		.setkey = safexcel_cmac_setkey,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = AES_BLOCK_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "cmac(aes)",
+				.cra_driver_name = "safexcel-cmac-aes",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = AES_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_xcbcmac_cra_init,
+				.cra_exit = safexcel_xcbcmac_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_sm3_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SM3;
+	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SM3_DIGEST_SIZE;
+	req->digest_sz = SM3_DIGEST_SIZE;
+	req->block_sz = SM3_BLOCK_SIZE;
+
+	return 0;
+}
+
+static int safexcel_sm3_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_sm3_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+struct safexcel_alg_template safexcel_alg_sm3 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = SAFEXCEL_ALG_SM3,
+	.alg.ahash = {
+		.init = safexcel_sm3_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_sm3_digest,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SM3_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "sm3",
+				.cra_driver_name = "safexcel-sm3",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SM3_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_hmac_sm3_setkey(struct crypto_ahash *tfm, const u8 *key,
+				    unsigned int keylen)
+{
+	return safexcel_hmac_alg_setkey(tfm, key, keylen, "safexcel-sm3",
+					SM3_DIGEST_SIZE);
+}
+
+static int safexcel_hmac_sm3_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	/* Start from ipad precompute */
+	memcpy(req->state, ctx->ipad, SM3_DIGEST_SIZE);
+	/* Already processed the key^ipad part now! */
+	req->len	= SM3_BLOCK_SIZE;
+	req->processed	= SM3_BLOCK_SIZE;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SM3;
+	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SM3_DIGEST_SIZE;
+	req->digest_sz = SM3_DIGEST_SIZE;
+	req->block_sz = SM3_BLOCK_SIZE;
+	req->hmac = true;
+
+	return 0;
+}
+
+static int safexcel_hmac_sm3_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_hmac_sm3_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+struct safexcel_alg_template safexcel_alg_hmac_sm3 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = SAFEXCEL_ALG_SM3,
+	.alg.ahash = {
+		.init = safexcel_hmac_sm3_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_hmac_sm3_digest,
+		.setkey = safexcel_hmac_sm3_setkey,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SM3_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "hmac(sm3)",
+				.cra_driver_name = "safexcel-hmac-sm3",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SM3_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_sha3_224_init(struct ahash_request *areq)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA3_224;
+	req->digest = CONTEXT_CONTROL_DIGEST_INITIAL;
+	req->state_sz = SHA3_224_DIGEST_SIZE;
+	req->digest_sz = SHA3_224_DIGEST_SIZE;
+	req->block_sz = SHA3_224_BLOCK_SIZE;
+	ctx->do_fallback = false;
+	ctx->fb_init_done = false;
+	return 0;
+}
+
+static int safexcel_sha3_fbcheck(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *subreq = ahash_request_ctx(req);
+	int ret = 0;
+
+	if (ctx->do_fallback) {
+		ahash_request_set_tfm(subreq, ctx->fback);
+		ahash_request_set_callback(subreq, req->base.flags,
+					   req->base.complete, req->base.data);
+		ahash_request_set_crypt(subreq, req->src, req->result,
+					req->nbytes);
+		if (!ctx->fb_init_done) {
+			if (ctx->fb_do_setkey) {
+				/* Set fallback cipher HMAC key */
+				u8 key[SHA3_224_BLOCK_SIZE];
+
+				memcpy(key, ctx->ipad,
+				       crypto_ahash_blocksize(ctx->fback) / 2);
+				memcpy(key +
+				       crypto_ahash_blocksize(ctx->fback) / 2,
+				       ctx->opad,
+				       crypto_ahash_blocksize(ctx->fback) / 2);
+				ret = crypto_ahash_setkey(ctx->fback, key,
+					crypto_ahash_blocksize(ctx->fback));
+				memzero_explicit(key,
+					crypto_ahash_blocksize(ctx->fback));
+				ctx->fb_do_setkey = false;
+			}
+			ret = ret ?: crypto_ahash_init(subreq);
+			ctx->fb_init_done = true;
+		}
+	}
+	return ret;
+}
+
+static int safexcel_sha3_update(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *subreq = ahash_request_ctx(req);
+
+	ctx->do_fallback = true;
+	return safexcel_sha3_fbcheck(req) ?: crypto_ahash_update(subreq);
+}
+
+static int safexcel_sha3_final(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *subreq = ahash_request_ctx(req);
+
+	ctx->do_fallback = true;
+	return safexcel_sha3_fbcheck(req) ?: crypto_ahash_final(subreq);
+}
+
+static int safexcel_sha3_finup(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *subreq = ahash_request_ctx(req);
+
+	ctx->do_fallback |= !req->nbytes;
+	if (ctx->do_fallback)
+		/* Update or ex/import happened or len 0, cannot use the HW */
+		return safexcel_sha3_fbcheck(req) ?:
+		       crypto_ahash_finup(subreq);
+	else
+		return safexcel_ahash_finup(req);
+}
+
+static int safexcel_sha3_digest_fallback(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *subreq = ahash_request_ctx(req);
+
+	ctx->do_fallback = true;
+	ctx->fb_init_done = false;
+	return safexcel_sha3_fbcheck(req) ?: crypto_ahash_finup(subreq);
+}
+
+static int safexcel_sha3_224_digest(struct ahash_request *req)
+{
+	if (req->nbytes)
+		return safexcel_sha3_224_init(req) ?: safexcel_ahash_finup(req);
+
+	/* HW cannot do zero length hash, use fallback instead */
+	return safexcel_sha3_digest_fallback(req);
+}
+
+static int safexcel_sha3_export(struct ahash_request *req, void *out)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *subreq = ahash_request_ctx(req);
+
+	ctx->do_fallback = true;
+	return safexcel_sha3_fbcheck(req) ?: crypto_ahash_export(subreq, out);
+}
+
+static int safexcel_sha3_import(struct ahash_request *req, const void *in)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *subreq = ahash_request_ctx(req);
+
+	ctx->do_fallback = true;
+	return safexcel_sha3_fbcheck(req) ?: crypto_ahash_import(subreq, in);
+	// return safexcel_ahash_import(req, in);
+}
+
+static int safexcel_sha3_cra_init(struct crypto_tfm *tfm)
+{
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_ahash_cra_init(tfm);
+
+	/* Allocate fallback implementation */
+	ctx->fback = crypto_alloc_ahash(crypto_tfm_alg_name(tfm), 0,
+					CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ctx->fback))
+		return PTR_ERR(ctx->fback);
+
+	/* Update statesize from fallback algorithm! */
+	crypto_hash_alg_common(ahash)->statesize =
+		crypto_ahash_statesize(ctx->fback);
+	crypto_ahash_set_reqsize(ahash, max(sizeof(struct safexcel_ahash_req),
+					    sizeof(struct ahash_request) +
+					    crypto_ahash_reqsize(ctx->fback)));
+	return 0;
+}
+
+static void safexcel_sha3_cra_exit(struct crypto_tfm *tfm)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_ahash(ctx->fback);
+	safexcel_ahash_cra_exit(tfm);
+}
+
+struct safexcel_alg_template safexcel_alg_sha3_224 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = SAFEXCEL_ALG_SHA3,
+	.alg.ahash = {
+		.init = safexcel_sha3_224_init,
+		.update = safexcel_sha3_update,
+		.final = safexcel_sha3_final,
+		.finup = safexcel_sha3_finup,
+		.digest = safexcel_sha3_224_digest,
+		.export = safexcel_sha3_export,
+		.import = safexcel_sha3_import,
+		.halg = {
+			.digestsize = SHA3_224_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "sha3-224",
+				.cra_driver_name = "safexcel-sha3-224",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY |
+					     CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = SHA3_224_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_sha3_cra_init,
+				.cra_exit = safexcel_sha3_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_sha3_256_init(struct ahash_request *areq)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA3_256;
+	req->digest = CONTEXT_CONTROL_DIGEST_INITIAL;
+	req->state_sz = SHA3_256_DIGEST_SIZE;
+	req->digest_sz = SHA3_256_DIGEST_SIZE;
+	req->block_sz = SHA3_256_BLOCK_SIZE;
+	ctx->do_fallback = false;
+	ctx->fb_init_done = false;
+	return 0;
+}
+
+static int safexcel_sha3_256_digest(struct ahash_request *req)
+{
+	if (req->nbytes)
+		return safexcel_sha3_256_init(req) ?: safexcel_ahash_finup(req);
+
+	/* HW cannot do zero length hash, use fallback instead */
+	return safexcel_sha3_digest_fallback(req);
+}
+
+struct safexcel_alg_template safexcel_alg_sha3_256 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = SAFEXCEL_ALG_SHA3,
+	.alg.ahash = {
+		.init = safexcel_sha3_256_init,
+		.update = safexcel_sha3_update,
+		.final = safexcel_sha3_final,
+		.finup = safexcel_sha3_finup,
+		.digest = safexcel_sha3_256_digest,
+		.export = safexcel_sha3_export,
+		.import = safexcel_sha3_import,
+		.halg = {
+			.digestsize = SHA3_256_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "sha3-256",
+				.cra_driver_name = "safexcel-sha3-256",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY |
+					     CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = SHA3_256_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_sha3_cra_init,
+				.cra_exit = safexcel_sha3_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_sha3_384_init(struct ahash_request *areq)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA3_384;
+	req->digest = CONTEXT_CONTROL_DIGEST_INITIAL;
+	req->state_sz = SHA3_384_DIGEST_SIZE;
+	req->digest_sz = SHA3_384_DIGEST_SIZE;
+	req->block_sz = SHA3_384_BLOCK_SIZE;
+	ctx->do_fallback = false;
+	ctx->fb_init_done = false;
+	return 0;
+}
+
+static int safexcel_sha3_384_digest(struct ahash_request *req)
+{
+	if (req->nbytes)
+		return safexcel_sha3_384_init(req) ?: safexcel_ahash_finup(req);
+
+	/* HW cannot do zero length hash, use fallback instead */
+	return safexcel_sha3_digest_fallback(req);
+}
+
+struct safexcel_alg_template safexcel_alg_sha3_384 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = SAFEXCEL_ALG_SHA3,
+	.alg.ahash = {
+		.init = safexcel_sha3_384_init,
+		.update = safexcel_sha3_update,
+		.final = safexcel_sha3_final,
+		.finup = safexcel_sha3_finup,
+		.digest = safexcel_sha3_384_digest,
+		.export = safexcel_sha3_export,
+		.import = safexcel_sha3_import,
+		.halg = {
+			.digestsize = SHA3_384_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "sha3-384",
+				.cra_driver_name = "safexcel-sha3-384",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY |
+					     CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = SHA3_384_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_sha3_cra_init,
+				.cra_exit = safexcel_sha3_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_sha3_512_init(struct ahash_request *areq)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA3_512;
+	req->digest = CONTEXT_CONTROL_DIGEST_INITIAL;
+	req->state_sz = SHA3_512_DIGEST_SIZE;
+	req->digest_sz = SHA3_512_DIGEST_SIZE;
+	req->block_sz = SHA3_512_BLOCK_SIZE;
+	ctx->do_fallback = false;
+	ctx->fb_init_done = false;
+	return 0;
+}
+
+static int safexcel_sha3_512_digest(struct ahash_request *req)
+{
+	if (req->nbytes)
+		return safexcel_sha3_512_init(req) ?: safexcel_ahash_finup(req);
+
+	/* HW cannot do zero length hash, use fallback instead */
+	return safexcel_sha3_digest_fallback(req);
+}
+
+struct safexcel_alg_template safexcel_alg_sha3_512 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = SAFEXCEL_ALG_SHA3,
+	.alg.ahash = {
+		.init = safexcel_sha3_512_init,
+		.update = safexcel_sha3_update,
+		.final = safexcel_sha3_final,
+		.finup = safexcel_sha3_finup,
+		.digest = safexcel_sha3_512_digest,
+		.export = safexcel_sha3_export,
+		.import = safexcel_sha3_import,
+		.halg = {
+			.digestsize = SHA3_512_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "sha3-512",
+				.cra_driver_name = "safexcel-sha3-512",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY |
+					     CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = SHA3_512_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_sha3_cra_init,
+				.cra_exit = safexcel_sha3_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_hmac_sha3_cra_init(struct crypto_tfm *tfm, const char *alg)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = safexcel_sha3_cra_init(tfm);
+	if (ret)
+		return ret;
+
+	/* Allocate precalc basic digest implementation */
+	ctx->shpre = crypto_alloc_shash(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ctx->shpre))
+		return PTR_ERR(ctx->shpre);
+
+	ctx->shdesc = kmalloc(sizeof(*ctx->shdesc) +
+			      crypto_shash_descsize(ctx->shpre), GFP_KERNEL);
+	if (!ctx->shdesc) {
+		crypto_free_shash(ctx->shpre);
+		return -ENOMEM;
+	}
+	ctx->shdesc->tfm = ctx->shpre;
+	return 0;
+}
+
+static void safexcel_hmac_sha3_cra_exit(struct crypto_tfm *tfm)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_ahash(ctx->fback);
+	crypto_free_shash(ctx->shpre);
+	kfree(ctx->shdesc);
+	safexcel_ahash_cra_exit(tfm);
+}
+
+static int safexcel_hmac_sha3_setkey(struct crypto_ahash *tfm, const u8 *key,
+				     unsigned int keylen)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	int ret = 0;
+
+	if (keylen > crypto_ahash_blocksize(tfm)) {
+		/*
+		 * If the key is larger than the blocksize, then hash it
+		 * first using our fallback cipher
+		 */
+		ret = crypto_shash_digest(ctx->shdesc, key, keylen,
+					  (u8 *)ctx->ipad);
+		keylen = crypto_shash_digestsize(ctx->shpre);
+
+		/*
+		 * If the digest is larger than half the blocksize, we need to
+		 * move the rest to opad due to the way our HMAC infra works.
+		 */
+		if (keylen > crypto_ahash_blocksize(tfm) / 2)
+			/* Buffers overlap, need to use memmove iso memcpy! */
+			memmove(ctx->opad,
+				(u8 *)ctx->ipad +
+					crypto_ahash_blocksize(tfm) / 2,
+				keylen - crypto_ahash_blocksize(tfm) / 2);
+	} else {
+		/*
+		 * Copy the key to our ipad & opad buffers
+		 * Note that ipad and opad each contain one half of the key,
+		 * to match the existing HMAC driver infrastructure.
+		 */
+		if (keylen <= crypto_ahash_blocksize(tfm) / 2) {
+			memcpy(ctx->ipad, key, keylen);
+		} else {
+			memcpy(ctx->ipad, key,
+			       crypto_ahash_blocksize(tfm) / 2);
+			memcpy(ctx->opad,
+			       key + crypto_ahash_blocksize(tfm) / 2,
+			       keylen - crypto_ahash_blocksize(tfm) / 2);
+		}
+	}
+
+	/* Pad key with zeroes */
+	if (keylen <= crypto_ahash_blocksize(tfm) / 2) {
+		memset((u8 *)ctx->ipad + keylen, 0,
+		       crypto_ahash_blocksize(tfm) / 2 - keylen);
+		memset(ctx->opad, 0, crypto_ahash_blocksize(tfm) / 2);
+	} else {
+		memset((u8 *)ctx->opad + keylen -
+		       crypto_ahash_blocksize(tfm) / 2, 0,
+		       crypto_ahash_blocksize(tfm) - keylen);
+	}
+
+	/* If doing fallback, still need to set the new key! */
+	ctx->fb_do_setkey = true;
+	return ret;
+}
+
+static int safexcel_hmac_sha3_224_init(struct ahash_request *areq)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	/* Copy (half of) the key */
+	memcpy(req->state, ctx->ipad, SHA3_224_BLOCK_SIZE / 2);
+	/* Start of HMAC should have len == processed == blocksize */
+	req->len	= SHA3_224_BLOCK_SIZE;
+	req->processed	= SHA3_224_BLOCK_SIZE;
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA3_224;
+	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	req->state_sz = SHA3_224_BLOCK_SIZE / 2;
+	req->digest_sz = SHA3_224_DIGEST_SIZE;
+	req->block_sz = SHA3_224_BLOCK_SIZE;
+	req->hmac = true;
+	ctx->do_fallback = false;
+	ctx->fb_init_done = false;
+	return 0;
+}
+
+static int safexcel_hmac_sha3_224_digest(struct ahash_request *req)
+{
+	if (req->nbytes)
+		return safexcel_hmac_sha3_224_init(req) ?:
+		       safexcel_ahash_finup(req);
+
+	/* HW cannot do zero length HMAC, use fallback instead */
+	return safexcel_sha3_digest_fallback(req);
+}
+
+static int safexcel_hmac_sha3_224_cra_init(struct crypto_tfm *tfm)
+{
+	return safexcel_hmac_sha3_cra_init(tfm, "sha3-224");
+}
+
+struct safexcel_alg_template safexcel_alg_hmac_sha3_224 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = SAFEXCEL_ALG_SHA3,
+	.alg.ahash = {
+		.init = safexcel_hmac_sha3_224_init,
+		.update = safexcel_sha3_update,
+		.final = safexcel_sha3_final,
+		.finup = safexcel_sha3_finup,
+		.digest = safexcel_hmac_sha3_224_digest,
+		.setkey = safexcel_hmac_sha3_setkey,
+		.export = safexcel_sha3_export,
+		.import = safexcel_sha3_import,
+		.halg = {
+			.digestsize = SHA3_224_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "hmac(sha3-224)",
+				.cra_driver_name = "safexcel-hmac-sha3-224",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY |
+					     CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = SHA3_224_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_hmac_sha3_224_cra_init,
+				.cra_exit = safexcel_hmac_sha3_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_hmac_sha3_256_init(struct ahash_request *areq)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	/* Copy (half of) the key */
+	memcpy(req->state, ctx->ipad, SHA3_256_BLOCK_SIZE / 2);
+	/* Start of HMAC should have len == processed == blocksize */
+	req->len	= SHA3_256_BLOCK_SIZE;
+	req->processed	= SHA3_256_BLOCK_SIZE;
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA3_256;
+	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	req->state_sz = SHA3_256_BLOCK_SIZE / 2;
+	req->digest_sz = SHA3_256_DIGEST_SIZE;
+	req->block_sz = SHA3_256_BLOCK_SIZE;
+	req->hmac = true;
+	ctx->do_fallback = false;
+	ctx->fb_init_done = false;
+	return 0;
+}
+
+static int safexcel_hmac_sha3_256_digest(struct ahash_request *req)
+{
+	if (req->nbytes)
+		return safexcel_hmac_sha3_256_init(req) ?:
+		       safexcel_ahash_finup(req);
+
+	/* HW cannot do zero length HMAC, use fallback instead */
+	return safexcel_sha3_digest_fallback(req);
+}
+
+static int safexcel_hmac_sha3_256_cra_init(struct crypto_tfm *tfm)
+{
+	return safexcel_hmac_sha3_cra_init(tfm, "sha3-256");
+}
+
+struct safexcel_alg_template safexcel_alg_hmac_sha3_256 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = SAFEXCEL_ALG_SHA3,
+	.alg.ahash = {
+		.init = safexcel_hmac_sha3_256_init,
+		.update = safexcel_sha3_update,
+		.final = safexcel_sha3_final,
+		.finup = safexcel_sha3_finup,
+		.digest = safexcel_hmac_sha3_256_digest,
+		.setkey = safexcel_hmac_sha3_setkey,
+		.export = safexcel_sha3_export,
+		.import = safexcel_sha3_import,
+		.halg = {
+			.digestsize = SHA3_256_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "hmac(sha3-256)",
+				.cra_driver_name = "safexcel-hmac-sha3-256",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY |
+					     CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = SHA3_256_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_hmac_sha3_256_cra_init,
+				.cra_exit = safexcel_hmac_sha3_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_hmac_sha3_384_init(struct ahash_request *areq)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	/* Copy (half of) the key */
+	memcpy(req->state, ctx->ipad, SHA3_384_BLOCK_SIZE / 2);
+	/* Start of HMAC should have len == processed == blocksize */
+	req->len	= SHA3_384_BLOCK_SIZE;
+	req->processed	= SHA3_384_BLOCK_SIZE;
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA3_384;
+	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	req->state_sz = SHA3_384_BLOCK_SIZE / 2;
+	req->digest_sz = SHA3_384_DIGEST_SIZE;
+	req->block_sz = SHA3_384_BLOCK_SIZE;
+	req->hmac = true;
+	ctx->do_fallback = false;
+	ctx->fb_init_done = false;
+	return 0;
+}
+
+static int safexcel_hmac_sha3_384_digest(struct ahash_request *req)
+{
+	if (req->nbytes)
+		return safexcel_hmac_sha3_384_init(req) ?:
+		       safexcel_ahash_finup(req);
+
+	/* HW cannot do zero length HMAC, use fallback instead */
+	return safexcel_sha3_digest_fallback(req);
+}
+
+static int safexcel_hmac_sha3_384_cra_init(struct crypto_tfm *tfm)
+{
+	return safexcel_hmac_sha3_cra_init(tfm, "sha3-384");
+}
+
+struct safexcel_alg_template safexcel_alg_hmac_sha3_384 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = SAFEXCEL_ALG_SHA3,
+	.alg.ahash = {
+		.init = safexcel_hmac_sha3_384_init,
+		.update = safexcel_sha3_update,
+		.final = safexcel_sha3_final,
+		.finup = safexcel_sha3_finup,
+		.digest = safexcel_hmac_sha3_384_digest,
+		.setkey = safexcel_hmac_sha3_setkey,
+		.export = safexcel_sha3_export,
+		.import = safexcel_sha3_import,
+		.halg = {
+			.digestsize = SHA3_384_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "hmac(sha3-384)",
+				.cra_driver_name = "safexcel-hmac-sha3-384",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY |
+					     CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = SHA3_384_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_hmac_sha3_384_cra_init,
+				.cra_exit = safexcel_hmac_sha3_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_hmac_sha3_512_init(struct ahash_request *areq)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	/* Copy (half of) the key */
+	memcpy(req->state, ctx->ipad, SHA3_512_BLOCK_SIZE / 2);
+	/* Start of HMAC should have len == processed == blocksize */
+	req->len	= SHA3_512_BLOCK_SIZE;
+	req->processed	= SHA3_512_BLOCK_SIZE;
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA3_512;
+	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	req->state_sz = SHA3_512_BLOCK_SIZE / 2;
+	req->digest_sz = SHA3_512_DIGEST_SIZE;
+	req->block_sz = SHA3_512_BLOCK_SIZE;
+	req->hmac = true;
+	ctx->do_fallback = false;
+	ctx->fb_init_done = false;
+	return 0;
+}
+
+static int safexcel_hmac_sha3_512_digest(struct ahash_request *req)
+{
+	if (req->nbytes)
+		return safexcel_hmac_sha3_512_init(req) ?:
+		       safexcel_ahash_finup(req);
+
+	/* HW cannot do zero length HMAC, use fallback instead */
+	return safexcel_sha3_digest_fallback(req);
+}
+
+static int safexcel_hmac_sha3_512_cra_init(struct crypto_tfm *tfm)
+{
+	return safexcel_hmac_sha3_cra_init(tfm, "sha3-512");
+}
+struct safexcel_alg_template safexcel_alg_hmac_sha3_512 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.algo_mask = SAFEXCEL_ALG_SHA3,
+	.alg.ahash = {
+		.init = safexcel_hmac_sha3_512_init,
+		.update = safexcel_sha3_update,
+		.final = safexcel_sha3_final,
+		.finup = safexcel_sha3_finup,
+		.digest = safexcel_hmac_sha3_512_digest,
+		.setkey = safexcel_hmac_sha3_setkey,
+		.export = safexcel_sha3_export,
+		.import = safexcel_sha3_import,
+		.halg = {
+			.digestsize = SHA3_512_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "hmac(sha3-512)",
+				.cra_driver_name = "safexcel-hmac-sha3-512",
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY |
+					     CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = SHA3_512_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_hmac_sha3_512_cra_init,
+				.cra_exit = safexcel_hmac_sha3_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
diff --git a/drivers/crypto/inside-secure/safexcel_ring.c b/drivers/crypto/inside-secure/safexcel_ring.c
index 0f269b89cfd4..e454c3d44f07 100644
--- a/drivers/crypto/inside-secure/safexcel_ring.c
+++ b/drivers/crypto/inside-secure/safexcel_ring.c
@@ -14,7 +14,12 @@ int safexcel_init_ring_descriptors(struct safexcel_crypto_priv *priv,
 				   struct safexcel_desc_ring *cdr,
 				   struct safexcel_desc_ring *rdr)
 {
-	cdr->offset = sizeof(u32) * priv->config.cd_offset;
+	int i;
+	struct safexcel_command_desc *cdesc;
+	dma_addr_t atok;
+
+	/* Actual command descriptor ring */
+	cdr->offset = priv->config.cd_offset;
 	cdr->base = dmam_alloc_coherent(priv->dev,
 					cdr->offset * EIP197_DEFAULT_RING_SIZE,
 					&cdr->base_dma, GFP_KERNEL);
@@ -24,7 +29,34 @@ int safexcel_init_ring_descriptors(struct safexcel_crypto_priv *priv,
 	cdr->base_end = cdr->base + cdr->offset * (EIP197_DEFAULT_RING_SIZE - 1);
 	cdr->read = cdr->base;
 
-	rdr->offset = sizeof(u32) * priv->config.rd_offset;
+	/* Command descriptor shadow ring for storing additional token data */
+	cdr->shoffset = priv->config.cdsh_offset;
+	cdr->shbase = dmam_alloc_coherent(priv->dev,
+					  cdr->shoffset *
+					  EIP197_DEFAULT_RING_SIZE,
+					  &cdr->shbase_dma, GFP_KERNEL);
+	if (!cdr->shbase)
+		return -ENOMEM;
+	cdr->shwrite = cdr->shbase;
+	cdr->shbase_end = cdr->shbase + cdr->shoffset *
+					(EIP197_DEFAULT_RING_SIZE - 1);
+
+	/*
+	 * Populate command descriptors with physical pointers to shadow descs.
+	 * Note that we only need to do this once if we don't overwrite them.
+	 */
+	cdesc = cdr->base;
+	atok = cdr->shbase_dma;
+	for (i = 0; i < EIP197_DEFAULT_RING_SIZE; i++) {
+		cdesc->atok_lo = lower_32_bits(atok);
+		cdesc->atok_hi = upper_32_bits(atok);
+		cdesc = (void *)cdesc + cdr->offset;
+		atok += cdr->shoffset;
+	}
+
+	rdr->offset = priv->config.rd_offset;
+	/* Use shoffset for result token offset here */
+	rdr->shoffset = priv->config.res_offset;
 	rdr->base = dmam_alloc_coherent(priv->dev,
 					rdr->offset * EIP197_DEFAULT_RING_SIZE,
 					&rdr->base_dma, GFP_KERNEL);
@@ -42,11 +74,40 @@ inline int safexcel_select_ring(struct safexcel_crypto_priv *priv)
 	return (atomic_inc_return(&priv->ring_used) % priv->config.rings);
 }
 
-static void *safexcel_ring_next_wptr(struct safexcel_crypto_priv *priv,
-				     struct safexcel_desc_ring *ring)
+static void *safexcel_ring_next_cwptr(struct safexcel_crypto_priv *priv,
+				     struct safexcel_desc_ring *ring,
+				     bool first,
+				     struct safexcel_token **atoken)
 {
 	void *ptr = ring->write;
 
+	if (first)
+		*atoken = ring->shwrite;
+
+	if ((ring->write == ring->read - ring->offset) ||
+	    (ring->read == ring->base && ring->write == ring->base_end))
+		return ERR_PTR(-ENOMEM);
+
+	if (ring->write == ring->base_end) {
+		ring->write = ring->base;
+		ring->shwrite = ring->shbase;
+	} else {
+		ring->write += ring->offset;
+		ring->shwrite += ring->shoffset;
+	}
+
+	return ptr;
+}
+
+static void *safexcel_ring_next_rwptr(struct safexcel_crypto_priv *priv,
+				     struct safexcel_desc_ring *ring,
+				     struct result_data_desc **rtoken)
+{
+	void *ptr = ring->write;
+
+	/* Result token at relative offset shoffset */
+	*rtoken = ring->write + ring->shoffset;
+
 	if ((ring->write == ring->read - ring->offset) ||
 	    (ring->read == ring->base && ring->write == ring->base_end))
 		return ERR_PTR(-ENOMEM);
@@ -106,10 +167,13 @@ void safexcel_ring_rollback_wptr(struct safexcel_crypto_priv *priv,
 	if (ring->write == ring->read)
 		return;
 
-	if (ring->write == ring->base)
+	if (ring->write == ring->base) {
 		ring->write = ring->base_end;
-	else
+		ring->shwrite = ring->shbase_end;
+	} else {
 		ring->write -= ring->offset;
+		ring->shwrite -= ring->shoffset;
+	}
 }
 
 struct safexcel_command_desc *safexcel_add_cdesc(struct safexcel_crypto_priv *priv,
@@ -117,26 +181,26 @@ struct safexcel_command_desc *safexcel_add_cdesc(struct safexcel_crypto_priv *pr
 						 bool first, bool last,
 						 dma_addr_t data, u32 data_len,
 						 u32 full_data_len,
-						 dma_addr_t context) {
+						 dma_addr_t context,
+						 struct safexcel_token **atoken)
+{
 	struct safexcel_command_desc *cdesc;
-	int i;
 
-	cdesc = safexcel_ring_next_wptr(priv, &priv->ring[ring_id].cdr);
+	cdesc = safexcel_ring_next_cwptr(priv, &priv->ring[ring_id].cdr,
+					 first, atoken);
 	if (IS_ERR(cdesc))
 		return cdesc;
 
-	memset(cdesc, 0, sizeof(struct safexcel_command_desc));
-
-	cdesc->first_seg = first;
-	cdesc->last_seg = last;
 	cdesc->particle_size = data_len;
+	cdesc->rsvd0 = 0;
+	cdesc->last_seg = last;
+	cdesc->first_seg = first;
+	cdesc->additional_cdata_size = 0;
+	cdesc->rsvd1 = 0;
 	cdesc->data_lo = lower_32_bits(data);
 	cdesc->data_hi = upper_32_bits(data);
 
-	if (first && context) {
-		struct safexcel_token *token =
-			(struct safexcel_token *)cdesc->control_data.token;
-
+	if (first) {
 		/*
 		 * Note that the length here MUST be >0 or else the EIP(1)97
 		 * may hang. Newer EIP197 firmware actually incorporates this
@@ -146,20 +210,12 @@ struct safexcel_command_desc *safexcel_add_cdesc(struct safexcel_crypto_priv *pr
 		cdesc->control_data.packet_length = full_data_len ?: 1;
 		cdesc->control_data.options = EIP197_OPTION_MAGIC_VALUE |
 					      EIP197_OPTION_64BIT_CTX |
-					      EIP197_OPTION_CTX_CTRL_IN_CMD;
-		cdesc->control_data.context_lo =
-			(lower_32_bits(context) & GENMASK(31, 2)) >> 2;
+					      EIP197_OPTION_CTX_CTRL_IN_CMD |
+					      EIP197_OPTION_RC_AUTO;
+		cdesc->control_data.type = EIP197_TYPE_BCLA;
+		cdesc->control_data.context_lo = lower_32_bits(context) |
+						 EIP197_CONTEXT_SMALL;
 		cdesc->control_data.context_hi = upper_32_bits(context);
-
-		if (priv->version == EIP197B_MRVL ||
-		    priv->version == EIP197D_MRVL)
-			cdesc->control_data.options |= EIP197_OPTION_RC_AUTO;
-
-		/* TODO: large xform HMAC with SHA-384/512 uses refresh = 3 */
-		cdesc->control_data.refresh = 2;
-
-		for (i = 0; i < EIP197_MAX_TOKENS; i++)
-			eip197_noop_token(&token[i]);
 	}
 
 	return cdesc;
@@ -171,18 +227,27 @@ struct safexcel_result_desc *safexcel_add_rdesc(struct safexcel_crypto_priv *pri
 						dma_addr_t data, u32 len)
 {
 	struct safexcel_result_desc *rdesc;
+	struct result_data_desc *rtoken;
 
-	rdesc = safexcel_ring_next_wptr(priv, &priv->ring[ring_id].rdr);
+	rdesc = safexcel_ring_next_rwptr(priv, &priv->ring[ring_id].rdr,
+					 &rtoken);
 	if (IS_ERR(rdesc))
 		return rdesc;
 
-	memset(rdesc, 0, sizeof(struct safexcel_result_desc));
-
-	rdesc->first_seg = first;
-	rdesc->last_seg = last;
 	rdesc->particle_size = len;
+	rdesc->rsvd0 = 0;
+	rdesc->descriptor_overflow = 0;
+	rdesc->buffer_overflow = 0;
+	rdesc->last_seg = last;
+	rdesc->first_seg = first;
+	rdesc->result_size = EIP197_RD64_RESULT_SIZE;
+	rdesc->rsvd1 = 0;
 	rdesc->data_lo = lower_32_bits(data);
 	rdesc->data_hi = upper_32_bits(data);
 
+	/* Clear length & error code in result token */
+	rtoken->packet_length = 0;
+	rtoken->error_code = 0;
+
 	return rdesc;
 }
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 9181523ba760..ad73fc946682 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -23,6 +23,7 @@
 #include <crypto/sha.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/authenc.h>
 #include <crypto/scatterwalk.h>
 
@@ -137,7 +138,7 @@ struct crypt_ctl {
 	/* Used by Host: 4*4 bytes*/
 	unsigned ctl_flags;
 	union {
-		struct ablkcipher_request *ablk_req;
+		struct skcipher_request *ablk_req;
 		struct aead_request *aead_req;
 		struct crypto_tfm *tfm;
 	} data;
@@ -186,7 +187,7 @@ struct ixp_ctx {
 };
 
 struct ixp_alg {
-	struct crypto_alg crypto;
+	struct skcipher_alg crypto;
 	const struct ix_hash_algo *hash;
 	u32 cfg_enc;
 	u32 cfg_dec;
@@ -239,17 +240,17 @@ static inline struct crypt_ctl *crypt_phys2virt(dma_addr_t phys)
 
 static inline u32 cipher_cfg_enc(struct crypto_tfm *tfm)
 {
-	return container_of(tfm->__crt_alg, struct ixp_alg,crypto)->cfg_enc;
+	return container_of(tfm->__crt_alg, struct ixp_alg,crypto.base)->cfg_enc;
 }
 
 static inline u32 cipher_cfg_dec(struct crypto_tfm *tfm)
 {
-	return container_of(tfm->__crt_alg, struct ixp_alg,crypto)->cfg_dec;
+	return container_of(tfm->__crt_alg, struct ixp_alg,crypto.base)->cfg_dec;
 }
 
 static inline const struct ix_hash_algo *ix_hash(struct crypto_tfm *tfm)
 {
-	return container_of(tfm->__crt_alg, struct ixp_alg, crypto)->hash;
+	return container_of(tfm->__crt_alg, struct ixp_alg, crypto.base)->hash;
 }
 
 static int setup_crypt_desc(void)
@@ -378,8 +379,8 @@ static void one_packet(dma_addr_t phys)
 		break;
 	}
 	case CTL_FLAG_PERFORM_ABLK: {
-		struct ablkcipher_request *req = crypt->data.ablk_req;
-		struct ablk_ctx *req_ctx = ablkcipher_request_ctx(req);
+		struct skcipher_request *req = crypt->data.ablk_req;
+		struct ablk_ctx *req_ctx = skcipher_request_ctx(req);
 
 		if (req_ctx->dst) {
 			free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
@@ -571,10 +572,10 @@ static int init_tfm(struct crypto_tfm *tfm)
 	return ret;
 }
 
-static int init_tfm_ablk(struct crypto_tfm *tfm)
+static int init_tfm_ablk(struct crypto_skcipher *tfm)
 {
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ablk_ctx);
-	return init_tfm(tfm);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct ablk_ctx));
+	return init_tfm(crypto_skcipher_tfm(tfm));
 }
 
 static int init_tfm_aead(struct crypto_aead *tfm)
@@ -590,6 +591,11 @@ static void exit_tfm(struct crypto_tfm *tfm)
 	free_sa_dir(&ctx->decrypt);
 }
 
+static void exit_tfm_ablk(struct crypto_skcipher *tfm)
+{
+	exit_tfm(crypto_skcipher_tfm(tfm));
+}
+
 static void exit_tfm_aead(struct crypto_aead *tfm)
 {
 	exit_tfm(crypto_aead_tfm(tfm));
@@ -734,7 +740,7 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
 	u32 keylen_cfg = 0;
 	struct ix_sa_dir *dir;
 	struct ixp_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
+	int err;
 
 	dir = encrypt ? &ctx->encrypt : &ctx->decrypt;
 	cinfo = dir->npe_ctx;
@@ -751,12 +757,13 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
 		case 24: keylen_cfg = MOD_AES192; break;
 		case 32: keylen_cfg = MOD_AES256; break;
 		default:
-			*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 			return -EINVAL;
 		}
 		cipher_cfg |= keylen_cfg;
 	} else {
-		crypto_des_verify_key(tfm, key);
+		err = crypto_des_verify_key(tfm, key);
+		if (err)
+			return err;
 	}
 	/* write cfg word to cryptinfo */
 	*(u32*)cinfo = cpu_to_be32(cipher_cfg);
@@ -809,11 +816,10 @@ static struct buffer_desc *chainup_buffers(struct device *dev,
 	return buf;
 }
 
-static int ablk_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int ablk_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			unsigned int key_len)
 {
-	struct ixp_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	u32 *flags = &tfm->base.crt_flags;
+	struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int ret;
 
 	init_completion(&ctx->completion);
@@ -829,33 +835,23 @@ static int ablk_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	if (ret)
 		goto out;
 	ret = setup_cipher(&tfm->base, 1, key, key_len);
-	if (ret)
-		goto out;
-
-	if (*flags & CRYPTO_TFM_RES_WEAK_KEY) {
-		if (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) {
-			ret = -EINVAL;
-		} else {
-			*flags &= ~CRYPTO_TFM_RES_WEAK_KEY;
-		}
-	}
 out:
 	if (!atomic_dec_and_test(&ctx->configuring))
 		wait_for_completion(&ctx->completion);
 	return ret;
 }
 
-static int ablk_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int ablk_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			    unsigned int key_len)
 {
-	return verify_ablkcipher_des3_key(tfm, key) ?:
+	return verify_skcipher_des3_key(tfm, key) ?:
 	       ablk_setkey(tfm, key, key_len);
 }
 
-static int ablk_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int ablk_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		unsigned int key_len)
 {
-	struct ixp_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	/* the nonce is stored in bytes at end of key */
 	if (key_len < CTR_RFC3686_NONCE_SIZE)
@@ -868,16 +864,16 @@ static int ablk_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	return ablk_setkey(tfm, key, key_len);
 }
 
-static int ablk_perform(struct ablkcipher_request *req, int encrypt)
+static int ablk_perform(struct skcipher_request *req, int encrypt)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct ixp_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	unsigned ivsize = crypto_ablkcipher_ivsize(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	unsigned ivsize = crypto_skcipher_ivsize(tfm);
 	struct ix_sa_dir *dir;
 	struct crypt_ctl *crypt;
-	unsigned int nbytes = req->nbytes;
+	unsigned int nbytes = req->cryptlen;
 	enum dma_data_direction src_direction = DMA_BIDIRECTIONAL;
-	struct ablk_ctx *req_ctx = ablkcipher_request_ctx(req);
+	struct ablk_ctx *req_ctx = skcipher_request_ctx(req);
 	struct buffer_desc src_hook;
 	struct device *dev = &pdev->dev;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
@@ -902,8 +898,8 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt)
 	crypt->crypt_offs = 0;
 	crypt->crypt_len = nbytes;
 
-	BUG_ON(ivsize && !req->info);
-	memcpy(crypt->iv, req->info, ivsize);
+	BUG_ON(ivsize && !req->iv);
+	memcpy(crypt->iv, req->iv, ivsize);
 	if (req->src != req->dst) {
 		struct buffer_desc dst_hook;
 		crypt->mode |= NPE_OP_NOT_IN_PLACE;
@@ -941,22 +937,22 @@ free_buf_dest:
 	return -ENOMEM;
 }
 
-static int ablk_encrypt(struct ablkcipher_request *req)
+static int ablk_encrypt(struct skcipher_request *req)
 {
 	return ablk_perform(req, 1);
 }
 
-static int ablk_decrypt(struct ablkcipher_request *req)
+static int ablk_decrypt(struct skcipher_request *req)
 {
 	return ablk_perform(req, 0);
 }
 
-static int ablk_rfc3686_crypt(struct ablkcipher_request *req)
+static int ablk_rfc3686_crypt(struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct ixp_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u8 iv[CTR_RFC3686_BLOCK_SIZE];
-	u8 *info = req->info;
+	u8 *info = req->iv;
 	int ret;
 
 	/* set up counter block */
@@ -967,9 +963,9 @@ static int ablk_rfc3686_crypt(struct ablkcipher_request *req)
 	*(__be32 *)(iv + CTR_RFC3686_NONCE_SIZE + CTR_RFC3686_IV_SIZE) =
 		cpu_to_be32(1);
 
-	req->info = iv;
+	req->iv = iv;
 	ret = ablk_perform(req, 1);
-	req->info = info;
+	req->iv = info;
 	return ret;
 }
 
@@ -1090,7 +1086,6 @@ free_buf_src:
 static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
 {
 	struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
-	u32 *flags = &tfm->base.crt_flags;
 	unsigned digest_len = crypto_aead_maxauthsize(tfm);
 	int ret;
 
@@ -1114,17 +1109,6 @@ static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
 		goto out;
 	ret = setup_auth(&tfm->base, 1, authsize,  ctx->authkey,
 			ctx->authkey_len, digest_len);
-	if (ret)
-		goto out;
-
-	if (*flags & CRYPTO_TFM_RES_WEAK_KEY) {
-		if (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) {
-			ret = -EINVAL;
-			goto out;
-		} else {
-			*flags &= ~CRYPTO_TFM_RES_WEAK_KEY;
-		}
-	}
 out:
 	if (!atomic_dec_and_test(&ctx->configuring))
 		wait_for_completion(&ctx->completion);
@@ -1163,7 +1147,6 @@ static int aead_setkey(struct crypto_aead *tfm, const u8 *key,
 	memzero_explicit(&keys, sizeof(keys));
 	return aead_setup(tfm, crypto_aead_authsize(tfm));
 badkey:
-	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
@@ -1212,107 +1195,91 @@ static int aead_decrypt(struct aead_request *req)
 static struct ixp_alg ixp4xx_algos[] = {
 {
 	.crypto	= {
-		.cra_name	= "cbc(des)",
-		.cra_blocksize	= DES_BLOCK_SIZE,
-		.cra_u		= { .ablkcipher = {
-			.min_keysize	= DES_KEY_SIZE,
-			.max_keysize	= DES_KEY_SIZE,
-			.ivsize		= DES_BLOCK_SIZE,
-			}
-		}
+		.base.cra_name		= "cbc(des)",
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+
+		.min_keysize		= DES_KEY_SIZE,
+		.max_keysize		= DES_KEY_SIZE,
+		.ivsize			= DES_BLOCK_SIZE,
 	},
 	.cfg_enc = CIPH_ENCR | MOD_DES | MOD_CBC_ENC | KEYLEN_192,
 	.cfg_dec = CIPH_DECR | MOD_DES | MOD_CBC_DEC | KEYLEN_192,
 
 }, {
 	.crypto	= {
-		.cra_name	= "ecb(des)",
-		.cra_blocksize	= DES_BLOCK_SIZE,
-		.cra_u		= { .ablkcipher = {
-			.min_keysize	= DES_KEY_SIZE,
-			.max_keysize	= DES_KEY_SIZE,
-			}
-		}
+		.base.cra_name		= "ecb(des)",
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.min_keysize		= DES_KEY_SIZE,
+		.max_keysize		= DES_KEY_SIZE,
 	},
 	.cfg_enc = CIPH_ENCR | MOD_DES | MOD_ECB | KEYLEN_192,
 	.cfg_dec = CIPH_DECR | MOD_DES | MOD_ECB | KEYLEN_192,
 }, {
 	.crypto	= {
-		.cra_name	= "cbc(des3_ede)",
-		.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
-		.cra_u		= { .ablkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.ivsize		= DES3_EDE_BLOCK_SIZE,
-			.setkey		= ablk_des3_setkey,
-			}
-		}
+		.base.cra_name		= "cbc(des3_ede)",
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES3_EDE_BLOCK_SIZE,
+		.setkey			= ablk_des3_setkey,
 	},
 	.cfg_enc = CIPH_ENCR | MOD_3DES | MOD_CBC_ENC | KEYLEN_192,
 	.cfg_dec = CIPH_DECR | MOD_3DES | MOD_CBC_DEC | KEYLEN_192,
 }, {
 	.crypto	= {
-		.cra_name	= "ecb(des3_ede)",
-		.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
-		.cra_u		= { .ablkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.setkey		= ablk_des3_setkey,
-			}
-		}
+		.base.cra_name		= "ecb(des3_ede)",
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.setkey			= ablk_des3_setkey,
 	},
 	.cfg_enc = CIPH_ENCR | MOD_3DES | MOD_ECB | KEYLEN_192,
 	.cfg_dec = CIPH_DECR | MOD_3DES | MOD_ECB | KEYLEN_192,
 }, {
 	.crypto	= {
-		.cra_name	= "cbc(aes)",
-		.cra_blocksize	= AES_BLOCK_SIZE,
-		.cra_u		= { .ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			}
-		}
+		.base.cra_name		= "cbc(aes)",
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
 	},
 	.cfg_enc = CIPH_ENCR | MOD_AES | MOD_CBC_ENC,
 	.cfg_dec = CIPH_DECR | MOD_AES | MOD_CBC_DEC,
 }, {
 	.crypto	= {
-		.cra_name	= "ecb(aes)",
-		.cra_blocksize	= AES_BLOCK_SIZE,
-		.cra_u		= { .ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			}
-		}
+		.base.cra_name		= "ecb(aes)",
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
 	},
 	.cfg_enc = CIPH_ENCR | MOD_AES | MOD_ECB,
 	.cfg_dec = CIPH_DECR | MOD_AES | MOD_ECB,
 }, {
 	.crypto	= {
-		.cra_name	= "ctr(aes)",
-		.cra_blocksize	= AES_BLOCK_SIZE,
-		.cra_u		= { .ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			}
-		}
+		.base.cra_name		= "ctr(aes)",
+		.base.cra_blocksize	= 1,
+
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
 	},
 	.cfg_enc = CIPH_ENCR | MOD_AES | MOD_CTR,
 	.cfg_dec = CIPH_ENCR | MOD_AES | MOD_CTR,
 }, {
 	.crypto	= {
-		.cra_name	= "rfc3686(ctr(aes))",
-		.cra_blocksize	= AES_BLOCK_SIZE,
-		.cra_u		= { .ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_rfc3686_setkey,
-			.encrypt	= ablk_rfc3686_crypt,
-			.decrypt	= ablk_rfc3686_crypt }
-		}
+		.base.cra_name		= "rfc3686(ctr(aes))",
+		.base.cra_blocksize	= 1,
+
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= ablk_rfc3686_setkey,
+		.encrypt		= ablk_rfc3686_crypt,
+		.decrypt		= ablk_rfc3686_crypt,
 	},
 	.cfg_enc = CIPH_ENCR | MOD_AES | MOD_CTR,
 	.cfg_dec = CIPH_ENCR | MOD_AES | MOD_CTR,
@@ -1421,10 +1388,10 @@ static int __init ixp_module_init(void)
 		return err;
 	}
 	for (i=0; i< num; i++) {
-		struct crypto_alg *cra = &ixp4xx_algos[i].crypto;
+		struct skcipher_alg *cra = &ixp4xx_algos[i].crypto;
 
-		if (snprintf(cra->cra_driver_name, CRYPTO_MAX_ALG_NAME,
-			"%s"IXP_POSTFIX, cra->cra_name) >=
+		if (snprintf(cra->base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+			"%s"IXP_POSTFIX, cra->base.cra_name) >=
 			CRYPTO_MAX_ALG_NAME)
 		{
 			continue;
@@ -1434,26 +1401,24 @@ static int __init ixp_module_init(void)
 		}
 
 		/* block ciphers */
-		cra->cra_type = &crypto_ablkcipher_type;
-		cra->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-				 CRYPTO_ALG_KERN_DRIVER_ONLY |
-				 CRYPTO_ALG_ASYNC;
-		if (!cra->cra_ablkcipher.setkey)
-			cra->cra_ablkcipher.setkey = ablk_setkey;
-		if (!cra->cra_ablkcipher.encrypt)
-			cra->cra_ablkcipher.encrypt = ablk_encrypt;
-		if (!cra->cra_ablkcipher.decrypt)
-			cra->cra_ablkcipher.decrypt = ablk_decrypt;
-		cra->cra_init = init_tfm_ablk;
-
-		cra->cra_ctxsize = sizeof(struct ixp_ctx);
-		cra->cra_module = THIS_MODULE;
-		cra->cra_alignmask = 3;
-		cra->cra_priority = 300;
-		cra->cra_exit = exit_tfm;
-		if (crypto_register_alg(cra))
+		cra->base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |
+				      CRYPTO_ALG_ASYNC;
+		if (!cra->setkey)
+			cra->setkey = ablk_setkey;
+		if (!cra->encrypt)
+			cra->encrypt = ablk_encrypt;
+		if (!cra->decrypt)
+			cra->decrypt = ablk_decrypt;
+		cra->init = init_tfm_ablk;
+		cra->exit = exit_tfm_ablk;
+
+		cra->base.cra_ctxsize = sizeof(struct ixp_ctx);
+		cra->base.cra_module = THIS_MODULE;
+		cra->base.cra_alignmask = 3;
+		cra->base.cra_priority = 300;
+		if (crypto_register_skcipher(cra))
 			printk(KERN_ERR "Failed to register '%s'\n",
-				cra->cra_name);
+				cra->base.cra_name);
 		else
 			ixp4xx_algos[i].registered = 1;
 	}
@@ -1504,7 +1469,7 @@ static void __exit ixp_module_exit(void)
 
 	for (i=0; i< num; i++) {
 		if (ixp4xx_algos[i].registered)
-			crypto_unregister_alg(&ixp4xx_algos[i].crypto);
+			crypto_unregister_skcipher(&ixp4xx_algos[i].crypto);
 	}
 	release_ixp_crypto(&pdev->dev);
 	platform_device_unregister(pdev);
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
index d63a6ee905c9..f1ed3b85c0d2 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h
@@ -232,13 +232,13 @@ struct mv_cesa_sec_accel_desc {
 };
 
 /**
- * struct mv_cesa_blkcipher_op_ctx - cipher operation context
+ * struct mv_cesa_skcipher_op_ctx - cipher operation context
  * @key:	cipher key
  * @iv:		cipher IV
  *
  * Context associated to a cipher operation.
  */
-struct mv_cesa_blkcipher_op_ctx {
+struct mv_cesa_skcipher_op_ctx {
 	u32 key[8];
 	u32 iv[4];
 };
@@ -265,7 +265,7 @@ struct mv_cesa_hash_op_ctx {
 struct mv_cesa_op_ctx {
 	struct mv_cesa_sec_accel_desc desc;
 	union {
-		struct mv_cesa_blkcipher_op_ctx blkcipher;
+		struct mv_cesa_skcipher_op_ctx skcipher;
 		struct mv_cesa_hash_op_ctx hash;
 	} ctx;
 };
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
index 84ceddfee76b..c24f34a48cef 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cipher.c
@@ -209,7 +209,7 @@ mv_cesa_skcipher_complete(struct crypto_async_request *req)
 		struct mv_cesa_req *basereq;
 
 		basereq = &creq->base;
-		memcpy(skreq->iv, basereq->chain.last->op->ctx.blkcipher.iv,
+		memcpy(skreq->iv, basereq->chain.last->op->ctx.skcipher.iv,
 		       ivsize);
 	} else {
 		memcpy_fromio(skreq->iv,
@@ -255,10 +255,8 @@ static int mv_cesa_aes_setkey(struct crypto_skcipher *cipher, const u8 *key,
 	int i;
 
 	ret = aes_expandkey(&ctx->aes, key, len);
-	if (ret) {
-		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (ret)
 		return ret;
-	}
 
 	remaining = (ctx->aes.key_length - 16) / 4;
 	offset = ctx->aes.key_length + 24 - remaining;
@@ -470,7 +468,7 @@ static int mv_cesa_des_op(struct skcipher_request *req,
 	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES,
 			      CESA_SA_DESC_CFG_CRYPTM_MSK);
 
-	memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE);
+	memcpy(tmpl->ctx.skcipher.key, ctx->key, DES_KEY_SIZE);
 
 	return mv_cesa_skcipher_queue_req(req, tmpl);
 }
@@ -523,7 +521,7 @@ static int mv_cesa_cbc_des_op(struct skcipher_request *req,
 	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTCM_CBC,
 			      CESA_SA_DESC_CFG_CRYPTCM_MSK);
 
-	memcpy(tmpl->ctx.blkcipher.iv, req->iv, DES_BLOCK_SIZE);
+	memcpy(tmpl->ctx.skcipher.iv, req->iv, DES_BLOCK_SIZE);
 
 	return mv_cesa_des_op(req, tmpl);
 }
@@ -575,7 +573,7 @@ static int mv_cesa_des3_op(struct skcipher_request *req,
 	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_3DES,
 			      CESA_SA_DESC_CFG_CRYPTM_MSK);
 
-	memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES3_EDE_KEY_SIZE);
+	memcpy(tmpl->ctx.skcipher.key, ctx->key, DES3_EDE_KEY_SIZE);
 
 	return mv_cesa_skcipher_queue_req(req, tmpl);
 }
@@ -628,7 +626,7 @@ struct skcipher_alg mv_cesa_ecb_des3_ede_alg = {
 static int mv_cesa_cbc_des3_op(struct skcipher_request *req,
 			       struct mv_cesa_op_ctx *tmpl)
 {
-	memcpy(tmpl->ctx.blkcipher.iv, req->iv, DES3_EDE_BLOCK_SIZE);
+	memcpy(tmpl->ctx.skcipher.iv, req->iv, DES3_EDE_BLOCK_SIZE);
 
 	return mv_cesa_des3_op(req, tmpl);
 }
@@ -694,7 +692,7 @@ static int mv_cesa_aes_op(struct skcipher_request *req,
 		key = ctx->aes.key_enc;
 
 	for (i = 0; i < ctx->aes.key_length / sizeof(u32); i++)
-		tmpl->ctx.blkcipher.key[i] = cpu_to_le32(key[i]);
+		tmpl->ctx.skcipher.key[i] = cpu_to_le32(key[i]);
 
 	if (ctx->aes.key_length == 24)
 		cfg |= CESA_SA_DESC_CFG_AES_LEN_192;
@@ -755,7 +753,7 @@ static int mv_cesa_cbc_aes_op(struct skcipher_request *req,
 {
 	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTCM_CBC,
 			      CESA_SA_DESC_CFG_CRYPTCM_MSK);
-	memcpy(tmpl->ctx.blkcipher.iv, req->iv, AES_BLOCK_SIZE);
+	memcpy(tmpl->ctx.skcipher.iv, req->iv, AES_BLOCK_SIZE);
 
 	return mv_cesa_aes_op(req, tmpl);
 }
diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c
index 90c9644fb8a8..78d660d963e2 100644
--- a/drivers/crypto/mediatek/mtk-aes.c
+++ b/drivers/crypto/mediatek/mtk-aes.c
@@ -11,6 +11,7 @@
 
 #include <crypto/aes.h>
 #include <crypto/gcm.h>
+#include <crypto/internal/skcipher.h>
 #include "mtk-platform.h"
 
 #define AES_QUEUE_SIZE		512
@@ -414,7 +415,7 @@ exit:
 static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
 			      size_t len)
 {
-	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
+	struct skcipher_request *req = skcipher_request_cast(aes->areq);
 	struct mtk_aes_base_ctx *ctx = aes->ctx;
 	struct mtk_aes_info *info = &ctx->info;
 	u32 cnt = 0;
@@ -450,7 +451,7 @@ static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
 		return;
 	}
 
-	mtk_aes_write_state_le(info->state + ctx->keylen, req->info,
+	mtk_aes_write_state_le(info->state + ctx->keylen, (void *)req->iv,
 			       AES_BLOCK_SIZE);
 ctr:
 	info->tfm[0] += AES_TFM_SIZE(SIZE_IN_WORDS(AES_BLOCK_SIZE));
@@ -552,13 +553,13 @@ static int mtk_aes_transfer_complete(struct mtk_cryp *cryp,
 
 static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 {
-	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
-	struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct skcipher_request *req = skcipher_request_cast(aes->areq);
+	struct mtk_aes_reqctx *rctx = skcipher_request_ctx(req);
 
 	mtk_aes_set_mode(aes, rctx);
 	aes->resume = mtk_aes_transfer_complete;
 
-	return mtk_aes_dma(cryp, aes, req->src, req->dst, req->nbytes);
+	return mtk_aes_dma(cryp, aes, req->src, req->dst, req->cryptlen);
 }
 
 static inline struct mtk_aes_ctr_ctx *
@@ -571,7 +572,7 @@ static int mtk_aes_ctr_transfer(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 {
 	struct mtk_aes_base_ctx *ctx = aes->ctx;
 	struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(ctx);
-	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
+	struct skcipher_request *req = skcipher_request_cast(aes->areq);
 	struct scatterlist *src, *dst;
 	u32 start, end, ctr, blocks;
 	size_t datalen;
@@ -579,11 +580,11 @@ static int mtk_aes_ctr_transfer(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 
 	/* Check for transfer completion. */
 	cctx->offset += aes->total;
-	if (cctx->offset >= req->nbytes)
+	if (cctx->offset >= req->cryptlen)
 		return mtk_aes_transfer_complete(cryp, aes);
 
 	/* Compute data length. */
-	datalen = req->nbytes - cctx->offset;
+	datalen = req->cryptlen - cctx->offset;
 	blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE);
 	ctr = be32_to_cpu(cctx->iv[3]);
 
@@ -591,7 +592,7 @@ static int mtk_aes_ctr_transfer(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 	start = ctr;
 	end = start + blocks - 1;
 	if (end < start) {
-		ctr |= 0xffffffff;
+		ctr = 0xffffffff;
 		datalen = AES_BLOCK_SIZE * -start;
 		fragmented = true;
 	}
@@ -620,12 +621,12 @@ static int mtk_aes_ctr_transfer(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 static int mtk_aes_ctr_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 {
 	struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(aes->ctx);
-	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
-	struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct skcipher_request *req = skcipher_request_cast(aes->areq);
+	struct mtk_aes_reqctx *rctx = skcipher_request_ctx(req);
 
 	mtk_aes_set_mode(aes, rctx);
 
-	memcpy(cctx->iv, req->info, AES_BLOCK_SIZE);
+	memcpy(cctx->iv, req->iv, AES_BLOCK_SIZE);
 	cctx->offset = 0;
 	aes->total = 0;
 	aes->resume = mtk_aes_ctr_transfer;
@@ -634,10 +635,10 @@ static int mtk_aes_ctr_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 }
 
 /* Check and set the AES key to transform state buffer */
-static int mtk_aes_setkey(struct crypto_ablkcipher *tfm,
+static int mtk_aes_setkey(struct crypto_skcipher *tfm,
 			  const u8 *key, u32 keylen)
 {
-	struct mtk_aes_base_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct mtk_aes_base_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	switch (keylen) {
 	case AES_KEYSIZE_128:
@@ -651,7 +652,6 @@ static int mtk_aes_setkey(struct crypto_ablkcipher *tfm,
 		break;
 
 	default:
-		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
@@ -661,10 +661,10 @@ static int mtk_aes_setkey(struct crypto_ablkcipher *tfm,
 	return 0;
 }
 
-static int mtk_aes_crypt(struct ablkcipher_request *req, u64 mode)
+static int mtk_aes_crypt(struct skcipher_request *req, u64 mode)
 {
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct mtk_aes_base_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct mtk_aes_base_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct mtk_aes_reqctx *rctx;
 	struct mtk_cryp *cryp;
 
@@ -672,185 +672,168 @@ static int mtk_aes_crypt(struct ablkcipher_request *req, u64 mode)
 	if (!cryp)
 		return -ENODEV;
 
-	rctx = ablkcipher_request_ctx(req);
+	rctx = skcipher_request_ctx(req);
 	rctx->mode = mode;
 
 	return mtk_aes_handle_queue(cryp, !(mode & AES_FLAGS_ENCRYPT),
 				    &req->base);
 }
 
-static int mtk_aes_ecb_encrypt(struct ablkcipher_request *req)
+static int mtk_aes_ecb_encrypt(struct skcipher_request *req)
 {
 	return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_ECB);
 }
 
-static int mtk_aes_ecb_decrypt(struct ablkcipher_request *req)
+static int mtk_aes_ecb_decrypt(struct skcipher_request *req)
 {
 	return mtk_aes_crypt(req, AES_FLAGS_ECB);
 }
 
-static int mtk_aes_cbc_encrypt(struct ablkcipher_request *req)
+static int mtk_aes_cbc_encrypt(struct skcipher_request *req)
 {
 	return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CBC);
 }
 
-static int mtk_aes_cbc_decrypt(struct ablkcipher_request *req)
+static int mtk_aes_cbc_decrypt(struct skcipher_request *req)
 {
 	return mtk_aes_crypt(req, AES_FLAGS_CBC);
 }
 
-static int mtk_aes_ctr_encrypt(struct ablkcipher_request *req)
+static int mtk_aes_ctr_encrypt(struct skcipher_request *req)
 {
 	return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CTR);
 }
 
-static int mtk_aes_ctr_decrypt(struct ablkcipher_request *req)
+static int mtk_aes_ctr_decrypt(struct skcipher_request *req)
 {
 	return mtk_aes_crypt(req, AES_FLAGS_CTR);
 }
 
-static int mtk_aes_ofb_encrypt(struct ablkcipher_request *req)
+static int mtk_aes_ofb_encrypt(struct skcipher_request *req)
 {
 	return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_OFB);
 }
 
-static int mtk_aes_ofb_decrypt(struct ablkcipher_request *req)
+static int mtk_aes_ofb_decrypt(struct skcipher_request *req)
 {
 	return mtk_aes_crypt(req, AES_FLAGS_OFB);
 }
 
-static int mtk_aes_cfb_encrypt(struct ablkcipher_request *req)
+static int mtk_aes_cfb_encrypt(struct skcipher_request *req)
 {
 	return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CFB128);
 }
 
-static int mtk_aes_cfb_decrypt(struct ablkcipher_request *req)
+static int mtk_aes_cfb_decrypt(struct skcipher_request *req)
 {
 	return mtk_aes_crypt(req, AES_FLAGS_CFB128);
 }
 
-static int mtk_aes_cra_init(struct crypto_tfm *tfm)
+static int mtk_aes_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct mtk_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct mtk_aes_reqctx));
 	ctx->base.start = mtk_aes_start;
 	return 0;
 }
 
-static int mtk_aes_ctr_cra_init(struct crypto_tfm *tfm)
+static int mtk_aes_ctr_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct mtk_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct mtk_aes_reqctx));
 	ctx->base.start = mtk_aes_ctr_start;
 	return 0;
 }
 
-static struct crypto_alg aes_algs[] = {
+static struct skcipher_alg aes_algs[] = {
 {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "cbc-aes-mtk",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_init		= mtk_aes_cra_init,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct mtk_aes_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.setkey		= mtk_aes_setkey,
-		.encrypt	= mtk_aes_cbc_encrypt,
-		.decrypt	= mtk_aes_cbc_decrypt,
-		.ivsize		= AES_BLOCK_SIZE,
-	}
+	.base.cra_name		= "cbc(aes)",
+	.base.cra_driver_name	= "cbc-aes-mtk",
+	.base.cra_priority	= 400,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct mtk_aes_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= mtk_aes_setkey,
+	.encrypt		= mtk_aes_cbc_encrypt,
+	.decrypt		= mtk_aes_cbc_decrypt,
+	.ivsize			= AES_BLOCK_SIZE,
+	.init			= mtk_aes_init_tfm,
 },
 {
-	.cra_name		= "ecb(aes)",
-	.cra_driver_name	= "ecb-aes-mtk",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_init		= mtk_aes_cra_init,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct mtk_aes_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.setkey		= mtk_aes_setkey,
-		.encrypt	= mtk_aes_ecb_encrypt,
-		.decrypt	= mtk_aes_ecb_decrypt,
-	}
+	.base.cra_name		= "ecb(aes)",
+	.base.cra_driver_name	= "ecb-aes-mtk",
+	.base.cra_priority	= 400,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct mtk_aes_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= mtk_aes_setkey,
+	.encrypt		= mtk_aes_ecb_encrypt,
+	.decrypt		= mtk_aes_ecb_decrypt,
+	.init			= mtk_aes_init_tfm,
 },
 {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "ctr-aes-mtk",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_init		= mtk_aes_ctr_cra_init,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct mtk_aes_ctr_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= mtk_aes_setkey,
-		.encrypt	= mtk_aes_ctr_encrypt,
-		.decrypt	= mtk_aes_ctr_decrypt,
-	}
+	.base.cra_name		= "ctr(aes)",
+	.base.cra_driver_name	= "ctr-aes-mtk",
+	.base.cra_priority	= 400,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct mtk_aes_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= mtk_aes_setkey,
+	.encrypt		= mtk_aes_ctr_encrypt,
+	.decrypt		= mtk_aes_ctr_decrypt,
+	.init			= mtk_aes_ctr_init_tfm,
 },
 {
-	.cra_name		= "ofb(aes)",
-	.cra_driver_name	= "ofb-aes-mtk",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_init		= mtk_aes_cra_init,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct mtk_aes_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= mtk_aes_setkey,
-		.encrypt	= mtk_aes_ofb_encrypt,
-		.decrypt	= mtk_aes_ofb_decrypt,
-	}
+	.base.cra_name		= "ofb(aes)",
+	.base.cra_driver_name	= "ofb-aes-mtk",
+	.base.cra_priority	= 400,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct mtk_aes_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= mtk_aes_setkey,
+	.encrypt		= mtk_aes_ofb_encrypt,
+	.decrypt		= mtk_aes_ofb_decrypt,
 },
 {
-	.cra_name		= "cfb(aes)",
-	.cra_driver_name	= "cfb-aes-mtk",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_init		= mtk_aes_cra_init,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct mtk_aes_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= mtk_aes_setkey,
-		.encrypt	= mtk_aes_cfb_encrypt,
-		.decrypt	= mtk_aes_cfb_decrypt,
-	}
+	.base.cra_name		= "cfb(aes)",
+	.base.cra_driver_name	= "cfb-aes-mtk",
+	.base.cra_priority	= 400,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct mtk_aes_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= mtk_aes_setkey,
+	.encrypt		= mtk_aes_cfb_encrypt,
+	.decrypt		= mtk_aes_cfb_decrypt,
 },
 };
 
@@ -1038,7 +1021,6 @@ static int mtk_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 		break;
 
 	default:
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
@@ -1049,8 +1031,6 @@ static int mtk_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 	crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) &
 				  CRYPTO_TFM_REQ_MASK);
 	err = crypto_skcipher_setkey(ctr, key, keylen);
-	crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) &
-			      CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
 
@@ -1259,7 +1239,7 @@ static void mtk_aes_unregister_algs(void)
 	crypto_unregister_aead(&aes_gcm_alg);
 
 	for (i = 0; i < ARRAY_SIZE(aes_algs); i++)
-		crypto_unregister_alg(&aes_algs[i]);
+		crypto_unregister_skcipher(&aes_algs[i]);
 }
 
 static int mtk_aes_register_algs(void)
@@ -1267,7 +1247,7 @@ static int mtk_aes_register_algs(void)
 	int err, i;
 
 	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
-		err = crypto_register_alg(&aes_algs[i]);
+		err = crypto_register_skcipher(&aes_algs[i]);
 		if (err)
 			goto err_aes_algs;
 	}
@@ -1280,7 +1260,7 @@ static int mtk_aes_register_algs(void)
 
 err_aes_algs:
 	for (; i--; )
-		crypto_unregister_alg(&aes_algs[i]);
+		crypto_unregister_skcipher(&aes_algs[i]);
 
 	return err;
 }
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index bf8d2197bc11..435ac1c83df9 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -211,11 +211,11 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
  * Encryption (AES128)
  */
 static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
-			   struct ablkcipher_request *req, int init)
+			   struct skcipher_request *req, int init)
 {
 	struct dcp *sdcp = global_sdcp;
 	struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
-	struct dcp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req);
 	int ret;
 
 	dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
@@ -274,9 +274,9 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
 {
 	struct dcp *sdcp = global_sdcp;
 
-	struct ablkcipher_request *req = ablkcipher_request_cast(arq);
+	struct skcipher_request *req = skcipher_request_cast(arq);
 	struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm);
-	struct dcp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req);
 
 	struct scatterlist *dst = req->dst;
 	struct scatterlist *src = req->src;
@@ -305,7 +305,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
 
 	if (!rctx->ecb) {
 		/* Copy the CBC IV just past the key. */
-		memcpy(key + AES_KEYSIZE_128, req->info, AES_KEYSIZE_128);
+		memcpy(key + AES_KEYSIZE_128, req->iv, AES_KEYSIZE_128);
 		/* CBC needs the INIT set. */
 		init = 1;
 	} else {
@@ -316,10 +316,10 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
 		src_buf = sg_virt(src);
 		len = sg_dma_len(src);
 		tlen += len;
-		limit_hit = tlen > req->nbytes;
+		limit_hit = tlen > req->cryptlen;
 
 		if (limit_hit)
-			len = req->nbytes - (tlen - len);
+			len = req->cryptlen - (tlen - len);
 
 		do {
 			if (actx->fill + len > out_off)
@@ -375,10 +375,10 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
 	/* Copy the IV for CBC for chaining */
 	if (!rctx->ecb) {
 		if (rctx->enc)
-			memcpy(req->info, out_buf+(last_out_len-AES_BLOCK_SIZE),
+			memcpy(req->iv, out_buf+(last_out_len-AES_BLOCK_SIZE),
 				AES_BLOCK_SIZE);
 		else
-			memcpy(req->info, in_buf+(last_out_len-AES_BLOCK_SIZE),
+			memcpy(req->iv, in_buf+(last_out_len-AES_BLOCK_SIZE),
 				AES_BLOCK_SIZE);
 	}
 
@@ -422,17 +422,17 @@ static int dcp_chan_thread_aes(void *data)
 	return 0;
 }
 
-static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc)
+static int mxs_dcp_block_fallback(struct skcipher_request *req, int enc)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct dcp_async_ctx *ctx = crypto_skcipher_ctx(tfm);
 	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 	int ret;
 
 	skcipher_request_set_sync_tfm(subreq, ctx->fallback);
 	skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL);
 	skcipher_request_set_crypt(subreq, req->src, req->dst,
-				   req->nbytes, req->info);
+				   req->cryptlen, req->iv);
 
 	if (enc)
 		ret = crypto_skcipher_encrypt(subreq);
@@ -444,12 +444,12 @@ static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc)
 	return ret;
 }
 
-static int mxs_dcp_aes_enqueue(struct ablkcipher_request *req, int enc, int ecb)
+static int mxs_dcp_aes_enqueue(struct skcipher_request *req, int enc, int ecb)
 {
 	struct dcp *sdcp = global_sdcp;
 	struct crypto_async_request *arq = &req->base;
 	struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm);
-	struct dcp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req);
 	int ret;
 
 	if (unlikely(actx->key_len != AES_KEYSIZE_128))
@@ -468,31 +468,30 @@ static int mxs_dcp_aes_enqueue(struct ablkcipher_request *req, int enc, int ecb)
 	return ret;
 }
 
-static int mxs_dcp_aes_ecb_decrypt(struct ablkcipher_request *req)
+static int mxs_dcp_aes_ecb_decrypt(struct skcipher_request *req)
 {
 	return mxs_dcp_aes_enqueue(req, 0, 1);
 }
 
-static int mxs_dcp_aes_ecb_encrypt(struct ablkcipher_request *req)
+static int mxs_dcp_aes_ecb_encrypt(struct skcipher_request *req)
 {
 	return mxs_dcp_aes_enqueue(req, 1, 1);
 }
 
-static int mxs_dcp_aes_cbc_decrypt(struct ablkcipher_request *req)
+static int mxs_dcp_aes_cbc_decrypt(struct skcipher_request *req)
 {
 	return mxs_dcp_aes_enqueue(req, 0, 0);
 }
 
-static int mxs_dcp_aes_cbc_encrypt(struct ablkcipher_request *req)
+static int mxs_dcp_aes_cbc_encrypt(struct skcipher_request *req)
 {
 	return mxs_dcp_aes_enqueue(req, 1, 0);
 }
 
-static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int mxs_dcp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			      unsigned int len)
 {
-	struct dcp_async_ctx *actx = crypto_ablkcipher_ctx(tfm);
-	unsigned int ret;
+	struct dcp_async_ctx *actx = crypto_skcipher_ctx(tfm);
 
 	/*
 	 * AES 128 is supposed by the hardware, store key into temporary
@@ -513,22 +512,13 @@ static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	crypto_sync_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK);
 	crypto_sync_skcipher_set_flags(actx->fallback,
 				  tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
-
-	ret = crypto_sync_skcipher_setkey(actx->fallback, key, len);
-	if (!ret)
-		return 0;
-
-	tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->base.crt_flags |= crypto_sync_skcipher_get_flags(actx->fallback) &
-			       CRYPTO_TFM_RES_MASK;
-
-	return ret;
+	return crypto_sync_skcipher_setkey(actx->fallback, key, len);
 }
 
-static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm)
+static int mxs_dcp_aes_fallback_init_tfm(struct crypto_skcipher *tfm)
 {
-	const char *name = crypto_tfm_alg_name(tfm);
-	struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm);
+	const char *name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm));
+	struct dcp_async_ctx *actx = crypto_skcipher_ctx(tfm);
 	struct crypto_sync_skcipher *blk;
 
 	blk = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
@@ -536,13 +526,13 @@ static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm)
 		return PTR_ERR(blk);
 
 	actx->fallback = blk;
-	tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_aes_req_ctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct dcp_aes_req_ctx));
 	return 0;
 }
 
-static void mxs_dcp_aes_fallback_exit(struct crypto_tfm *tfm)
+static void mxs_dcp_aes_fallback_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm);
+	struct dcp_async_ctx *actx = crypto_skcipher_ctx(tfm);
 
 	crypto_free_sync_skcipher(actx->fallback);
 }
@@ -854,54 +844,44 @@ static void dcp_sha_cra_exit(struct crypto_tfm *tfm)
 }
 
 /* AES 128 ECB and AES 128 CBC */
-static struct crypto_alg dcp_aes_algs[] = {
+static struct skcipher_alg dcp_aes_algs[] = {
 	{
-		.cra_name		= "ecb(aes)",
-		.cra_driver_name	= "ecb-aes-dcp",
-		.cra_priority		= 400,
-		.cra_alignmask		= 15,
-		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-					  CRYPTO_ALG_ASYNC |
+		.base.cra_name		= "ecb(aes)",
+		.base.cra_driver_name	= "ecb-aes-dcp",
+		.base.cra_priority	= 400,
+		.base.cra_alignmask	= 15,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC |
 					  CRYPTO_ALG_NEED_FALLBACK,
-		.cra_init		= mxs_dcp_aes_fallback_init,
-		.cra_exit		= mxs_dcp_aes_fallback_exit,
-		.cra_blocksize		= AES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct dcp_async_ctx),
-		.cra_type		= &crypto_ablkcipher_type,
-		.cra_module		= THIS_MODULE,
-		.cra_u	= {
-			.ablkcipher = {
-				.min_keysize	= AES_MIN_KEY_SIZE,
-				.max_keysize	= AES_MAX_KEY_SIZE,
-				.setkey		= mxs_dcp_aes_setkey,
-				.encrypt	= mxs_dcp_aes_ecb_encrypt,
-				.decrypt	= mxs_dcp_aes_ecb_decrypt
-			},
-		},
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct dcp_async_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.setkey			= mxs_dcp_aes_setkey,
+		.encrypt		= mxs_dcp_aes_ecb_encrypt,
+		.decrypt		= mxs_dcp_aes_ecb_decrypt,
+		.init			= mxs_dcp_aes_fallback_init_tfm,
+		.exit			= mxs_dcp_aes_fallback_exit_tfm,
 	}, {
-		.cra_name		= "cbc(aes)",
-		.cra_driver_name	= "cbc-aes-dcp",
-		.cra_priority		= 400,
-		.cra_alignmask		= 15,
-		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-					  CRYPTO_ALG_ASYNC |
+		.base.cra_name		= "cbc(aes)",
+		.base.cra_driver_name	= "cbc-aes-dcp",
+		.base.cra_priority	= 400,
+		.base.cra_alignmask	= 15,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC |
 					  CRYPTO_ALG_NEED_FALLBACK,
-		.cra_init		= mxs_dcp_aes_fallback_init,
-		.cra_exit		= mxs_dcp_aes_fallback_exit,
-		.cra_blocksize		= AES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct dcp_async_ctx),
-		.cra_type		= &crypto_ablkcipher_type,
-		.cra_module		= THIS_MODULE,
-		.cra_u = {
-			.ablkcipher = {
-				.min_keysize	= AES_MIN_KEY_SIZE,
-				.max_keysize	= AES_MAX_KEY_SIZE,
-				.setkey		= mxs_dcp_aes_setkey,
-				.encrypt	= mxs_dcp_aes_cbc_encrypt,
-				.decrypt	= mxs_dcp_aes_cbc_decrypt,
-				.ivsize		= AES_BLOCK_SIZE,
-			},
-		},
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct dcp_async_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.setkey			= mxs_dcp_aes_setkey,
+		.encrypt		= mxs_dcp_aes_cbc_encrypt,
+		.decrypt		= mxs_dcp_aes_cbc_decrypt,
+		.ivsize			= AES_BLOCK_SIZE,
+		.init			= mxs_dcp_aes_fallback_init_tfm,
+		.exit			= mxs_dcp_aes_fallback_exit_tfm,
 	},
 };
 
@@ -1104,8 +1084,8 @@ static int mxs_dcp_probe(struct platform_device *pdev)
 	sdcp->caps = readl(sdcp->base + MXS_DCP_CAPABILITY1);
 
 	if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128) {
-		ret = crypto_register_algs(dcp_aes_algs,
-					   ARRAY_SIZE(dcp_aes_algs));
+		ret = crypto_register_skciphers(dcp_aes_algs,
+						ARRAY_SIZE(dcp_aes_algs));
 		if (ret) {
 			/* Failed to register algorithm. */
 			dev_err(dev, "Failed to register AES crypto!\n");
@@ -1139,7 +1119,7 @@ err_unregister_sha1:
 
 err_unregister_aes:
 	if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128)
-		crypto_unregister_algs(dcp_aes_algs, ARRAY_SIZE(dcp_aes_algs));
+		crypto_unregister_skciphers(dcp_aes_algs, ARRAY_SIZE(dcp_aes_algs));
 
 err_destroy_aes_thread:
 	kthread_stop(sdcp->thread[DCP_CHAN_CRYPTO]);
@@ -1164,7 +1144,7 @@ static int mxs_dcp_remove(struct platform_device *pdev)
 		crypto_unregister_ahash(&dcp_sha1_alg);
 
 	if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128)
-		crypto_unregister_algs(dcp_aes_algs, ARRAY_SIZE(dcp_aes_algs));
+		crypto_unregister_skciphers(dcp_aes_algs, ARRAY_SIZE(dcp_aes_algs));
 
 	kthread_stop(sdcp->thread[DCP_CHAN_HASH_SHA]);
 	kthread_stop(sdcp->thread[DCP_CHAN_CRYPTO]);
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index dc15b06e96ab..f5c468f2cc82 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -23,6 +23,7 @@
 #include <linux/sched.h>
 
 #include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/algapi.h>
 
@@ -381,8 +382,8 @@ static int n2_hash_cra_init(struct crypto_tfm *tfm)
 	fallback_tfm = crypto_alloc_ahash(fallback_driver_name, 0,
 					  CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback_tfm)) {
-		pr_warning("Fallback driver '%s' could not be loaded!\n",
-			   fallback_driver_name);
+		pr_warn("Fallback driver '%s' could not be loaded!\n",
+			fallback_driver_name);
 		err = PTR_ERR(fallback_tfm);
 		goto out;
 	}
@@ -418,16 +419,16 @@ static int n2_hmac_cra_init(struct crypto_tfm *tfm)
 	fallback_tfm = crypto_alloc_ahash(fallback_driver_name, 0,
 					  CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback_tfm)) {
-		pr_warning("Fallback driver '%s' could not be loaded!\n",
-			   fallback_driver_name);
+		pr_warn("Fallback driver '%s' could not be loaded!\n",
+			fallback_driver_name);
 		err = PTR_ERR(fallback_tfm);
 		goto out;
 	}
 
 	child_shash = crypto_alloc_shash(n2alg->child_alg, 0, 0);
 	if (IS_ERR(child_shash)) {
-		pr_warning("Child shash '%s' could not be loaded!\n",
-			   n2alg->child_alg);
+		pr_warn("Child shash '%s' could not be loaded!\n",
+			n2alg->child_alg);
 		err = PTR_ERR(child_shash);
 		goto out_free_fallback;
 	}
@@ -657,7 +658,7 @@ static int n2_hmac_async_digest(struct ahash_request *req)
 				  ctx->hash_key_len);
 }
 
-struct n2_cipher_context {
+struct n2_skcipher_context {
 	int			key_len;
 	int			enc_type;
 	union {
@@ -683,7 +684,7 @@ struct n2_crypto_chunk {
 };
 
 struct n2_request_context {
-	struct ablkcipher_walk	walk;
+	struct skcipher_walk	walk;
 	struct list_head	chunk_list;
 	struct n2_crypto_chunk	chunk;
 	u8			temp_iv[16];
@@ -708,29 +709,29 @@ struct n2_request_context {
  * is not a valid sequence.
  */
 
-struct n2_cipher_alg {
+struct n2_skcipher_alg {
 	struct list_head	entry;
 	u8			enc_type;
-	struct crypto_alg	alg;
+	struct skcipher_alg	skcipher;
 };
 
-static inline struct n2_cipher_alg *n2_cipher_alg(struct crypto_tfm *tfm)
+static inline struct n2_skcipher_alg *n2_skcipher_alg(struct crypto_skcipher *tfm)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 
-	return container_of(alg, struct n2_cipher_alg, alg);
+	return container_of(alg, struct n2_skcipher_alg, skcipher);
 }
 
-struct n2_cipher_request_context {
-	struct ablkcipher_walk	walk;
+struct n2_skcipher_request_context {
+	struct skcipher_walk	walk;
 };
 
-static int n2_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int n2_aes_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 			 unsigned int keylen)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
-	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(skcipher);
+	struct n2_skcipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_skcipher_alg *n2alg = n2_skcipher_alg(skcipher);
 
 	ctx->enc_type = (n2alg->enc_type & ENC_TYPE_CHAINING_MASK);
 
@@ -745,7 +746,6 @@ static int n2_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 		ctx->enc_type |= ENC_TYPE_ALG_AES256;
 		break;
 	default:
-		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
@@ -754,15 +754,15 @@ static int n2_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int n2_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int n2_des_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 			 unsigned int keylen)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
-	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(skcipher);
+	struct n2_skcipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_skcipher_alg *n2alg = n2_skcipher_alg(skcipher);
 	int err;
 
-	err = verify_ablkcipher_des_key(cipher, key);
+	err = verify_skcipher_des_key(skcipher, key);
 	if (err)
 		return err;
 
@@ -773,15 +773,15 @@ static int n2_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int n2_3des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int n2_3des_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 			  unsigned int keylen)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
-	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(skcipher);
+	struct n2_skcipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_skcipher_alg *n2alg = n2_skcipher_alg(skcipher);
 	int err;
 
-	err = verify_ablkcipher_des3_key(cipher, key);
+	err = verify_skcipher_des3_key(skcipher, key);
 	if (err)
 		return err;
 
@@ -792,12 +792,12 @@ static int n2_3des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int n2_arc4_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int n2_arc4_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 			  unsigned int keylen)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
-	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(skcipher);
+	struct n2_skcipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_skcipher_alg *n2alg = n2_skcipher_alg(skcipher);
 	u8 *s = ctx->key.arc4;
 	u8 *x = s + 256;
 	u8 *y = x + 1;
@@ -822,7 +822,7 @@ static int n2_arc4_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static inline int cipher_descriptor_len(int nbytes, unsigned int block_size)
+static inline int skcipher_descriptor_len(int nbytes, unsigned int block_size)
 {
 	int this_len = nbytes;
 
@@ -830,10 +830,11 @@ static inline int cipher_descriptor_len(int nbytes, unsigned int block_size)
 	return this_len > (1 << 16) ? (1 << 16) : this_len;
 }
 
-static int __n2_crypt_chunk(struct crypto_tfm *tfm, struct n2_crypto_chunk *cp,
+static int __n2_crypt_chunk(struct crypto_skcipher *skcipher,
+			    struct n2_crypto_chunk *cp,
 			    struct spu_queue *qp, bool encrypt)
 {
-	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_skcipher_context *ctx = crypto_skcipher_ctx(skcipher);
 	struct cwq_initial_entry *ent;
 	bool in_place;
 	int i;
@@ -877,18 +878,17 @@ static int __n2_crypt_chunk(struct crypto_tfm *tfm, struct n2_crypto_chunk *cp,
 	return (spu_queue_submit(qp, ent) != HV_EOK) ? -EINVAL : 0;
 }
 
-static int n2_compute_chunks(struct ablkcipher_request *req)
+static int n2_compute_chunks(struct skcipher_request *req)
 {
-	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
-	struct ablkcipher_walk *walk = &rctx->walk;
+	struct n2_request_context *rctx = skcipher_request_ctx(req);
+	struct skcipher_walk *walk = &rctx->walk;
 	struct n2_crypto_chunk *chunk;
 	unsigned long dest_prev;
 	unsigned int tot_len;
 	bool prev_in_place;
 	int err, nbytes;
 
-	ablkcipher_walk_init(walk, req->dst, req->src, req->nbytes);
-	err = ablkcipher_walk_phys(req, walk);
+	err = skcipher_walk_async(walk, req);
 	if (err)
 		return err;
 
@@ -910,12 +910,12 @@ static int n2_compute_chunks(struct ablkcipher_request *req)
 		bool in_place;
 		int this_len;
 
-		src_paddr = (page_to_phys(walk->src.page) +
-			     walk->src.offset);
-		dest_paddr = (page_to_phys(walk->dst.page) +
-			      walk->dst.offset);
+		src_paddr = (page_to_phys(walk->src.phys.page) +
+			     walk->src.phys.offset);
+		dest_paddr = (page_to_phys(walk->dst.phys.page) +
+			      walk->dst.phys.offset);
 		in_place = (src_paddr == dest_paddr);
-		this_len = cipher_descriptor_len(nbytes, walk->blocksize);
+		this_len = skcipher_descriptor_len(nbytes, walk->blocksize);
 
 		if (chunk->arr_len != 0) {
 			if (in_place != prev_in_place ||
@@ -946,7 +946,7 @@ static int n2_compute_chunks(struct ablkcipher_request *req)
 		prev_in_place = in_place;
 		tot_len += this_len;
 
-		err = ablkcipher_walk_done(req, walk, nbytes - this_len);
+		err = skcipher_walk_done(walk, nbytes - this_len);
 		if (err)
 			break;
 	}
@@ -958,15 +958,14 @@ static int n2_compute_chunks(struct ablkcipher_request *req)
 	return err;
 }
 
-static void n2_chunk_complete(struct ablkcipher_request *req, void *final_iv)
+static void n2_chunk_complete(struct skcipher_request *req, void *final_iv)
 {
-	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+	struct n2_request_context *rctx = skcipher_request_ctx(req);
 	struct n2_crypto_chunk *c, *tmp;
 
 	if (final_iv)
 		memcpy(rctx->walk.iv, final_iv, rctx->walk.blocksize);
 
-	ablkcipher_walk_complete(&rctx->walk);
 	list_for_each_entry_safe(c, tmp, &rctx->chunk_list, entry) {
 		list_del(&c->entry);
 		if (unlikely(c != &rctx->chunk))
@@ -975,10 +974,10 @@ static void n2_chunk_complete(struct ablkcipher_request *req, void *final_iv)
 
 }
 
-static int n2_do_ecb(struct ablkcipher_request *req, bool encrypt)
+static int n2_do_ecb(struct skcipher_request *req, bool encrypt)
 {
-	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
-	struct crypto_tfm *tfm = req->base.tfm;
+	struct n2_request_context *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	int err = n2_compute_chunks(req);
 	struct n2_crypto_chunk *c, *tmp;
 	unsigned long flags, hv_ret;
@@ -1017,20 +1016,20 @@ out:
 	return err;
 }
 
-static int n2_encrypt_ecb(struct ablkcipher_request *req)
+static int n2_encrypt_ecb(struct skcipher_request *req)
 {
 	return n2_do_ecb(req, true);
 }
 
-static int n2_decrypt_ecb(struct ablkcipher_request *req)
+static int n2_decrypt_ecb(struct skcipher_request *req)
 {
 	return n2_do_ecb(req, false);
 }
 
-static int n2_do_chaining(struct ablkcipher_request *req, bool encrypt)
+static int n2_do_chaining(struct skcipher_request *req, bool encrypt)
 {
-	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
-	struct crypto_tfm *tfm = req->base.tfm;
+	struct n2_request_context *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	unsigned long flags, hv_ret, iv_paddr;
 	int err = n2_compute_chunks(req);
 	struct n2_crypto_chunk *c, *tmp;
@@ -1107,32 +1106,32 @@ out:
 	return err;
 }
 
-static int n2_encrypt_chaining(struct ablkcipher_request *req)
+static int n2_encrypt_chaining(struct skcipher_request *req)
 {
 	return n2_do_chaining(req, true);
 }
 
-static int n2_decrypt_chaining(struct ablkcipher_request *req)
+static int n2_decrypt_chaining(struct skcipher_request *req)
 {
 	return n2_do_chaining(req, false);
 }
 
-struct n2_cipher_tmpl {
+struct n2_skcipher_tmpl {
 	const char		*name;
 	const char		*drv_name;
 	u8			block_size;
 	u8			enc_type;
-	struct ablkcipher_alg	ablkcipher;
+	struct skcipher_alg	skcipher;
 };
 
-static const struct n2_cipher_tmpl cipher_tmpls[] = {
+static const struct n2_skcipher_tmpl skcipher_tmpls[] = {
 	/* ARC4: only ECB is supported (chaining bits ignored) */
 	{	.name		= "ecb(arc4)",
 		.drv_name	= "ecb-arc4",
 		.block_size	= 1,
 		.enc_type	= (ENC_TYPE_ALG_RC4_STREAM |
 				   ENC_TYPE_CHAINING_ECB),
-		.ablkcipher	= {
+		.skcipher	= {
 			.min_keysize	= 1,
 			.max_keysize	= 256,
 			.setkey		= n2_arc4_setkey,
@@ -1147,7 +1146,7 @@ static const struct n2_cipher_tmpl cipher_tmpls[] = {
 		.block_size	= DES_BLOCK_SIZE,
 		.enc_type	= (ENC_TYPE_ALG_DES |
 				   ENC_TYPE_CHAINING_ECB),
-		.ablkcipher	= {
+		.skcipher	= {
 			.min_keysize	= DES_KEY_SIZE,
 			.max_keysize	= DES_KEY_SIZE,
 			.setkey		= n2_des_setkey,
@@ -1160,7 +1159,7 @@ static const struct n2_cipher_tmpl cipher_tmpls[] = {
 		.block_size	= DES_BLOCK_SIZE,
 		.enc_type	= (ENC_TYPE_ALG_DES |
 				   ENC_TYPE_CHAINING_CBC),
-		.ablkcipher	= {
+		.skcipher	= {
 			.ivsize		= DES_BLOCK_SIZE,
 			.min_keysize	= DES_KEY_SIZE,
 			.max_keysize	= DES_KEY_SIZE,
@@ -1174,7 +1173,7 @@ static const struct n2_cipher_tmpl cipher_tmpls[] = {
 		.block_size	= DES_BLOCK_SIZE,
 		.enc_type	= (ENC_TYPE_ALG_DES |
 				   ENC_TYPE_CHAINING_CFB),
-		.ablkcipher	= {
+		.skcipher	= {
 			.min_keysize	= DES_KEY_SIZE,
 			.max_keysize	= DES_KEY_SIZE,
 			.setkey		= n2_des_setkey,
@@ -1189,7 +1188,7 @@ static const struct n2_cipher_tmpl cipher_tmpls[] = {
 		.block_size	= DES_BLOCK_SIZE,
 		.enc_type	= (ENC_TYPE_ALG_3DES |
 				   ENC_TYPE_CHAINING_ECB),
-		.ablkcipher	= {
+		.skcipher	= {
 			.min_keysize	= 3 * DES_KEY_SIZE,
 			.max_keysize	= 3 * DES_KEY_SIZE,
 			.setkey		= n2_3des_setkey,
@@ -1202,7 +1201,7 @@ static const struct n2_cipher_tmpl cipher_tmpls[] = {
 		.block_size	= DES_BLOCK_SIZE,
 		.enc_type	= (ENC_TYPE_ALG_3DES |
 				   ENC_TYPE_CHAINING_CBC),
-		.ablkcipher	= {
+		.skcipher	= {
 			.ivsize		= DES_BLOCK_SIZE,
 			.min_keysize	= 3 * DES_KEY_SIZE,
 			.max_keysize	= 3 * DES_KEY_SIZE,
@@ -1216,7 +1215,7 @@ static const struct n2_cipher_tmpl cipher_tmpls[] = {
 		.block_size	= DES_BLOCK_SIZE,
 		.enc_type	= (ENC_TYPE_ALG_3DES |
 				   ENC_TYPE_CHAINING_CFB),
-		.ablkcipher	= {
+		.skcipher	= {
 			.min_keysize	= 3 * DES_KEY_SIZE,
 			.max_keysize	= 3 * DES_KEY_SIZE,
 			.setkey		= n2_3des_setkey,
@@ -1230,7 +1229,7 @@ static const struct n2_cipher_tmpl cipher_tmpls[] = {
 		.block_size	= AES_BLOCK_SIZE,
 		.enc_type	= (ENC_TYPE_ALG_AES128 |
 				   ENC_TYPE_CHAINING_ECB),
-		.ablkcipher	= {
+		.skcipher	= {
 			.min_keysize	= AES_MIN_KEY_SIZE,
 			.max_keysize	= AES_MAX_KEY_SIZE,
 			.setkey		= n2_aes_setkey,
@@ -1243,7 +1242,7 @@ static const struct n2_cipher_tmpl cipher_tmpls[] = {
 		.block_size	= AES_BLOCK_SIZE,
 		.enc_type	= (ENC_TYPE_ALG_AES128 |
 				   ENC_TYPE_CHAINING_CBC),
-		.ablkcipher	= {
+		.skcipher	= {
 			.ivsize		= AES_BLOCK_SIZE,
 			.min_keysize	= AES_MIN_KEY_SIZE,
 			.max_keysize	= AES_MAX_KEY_SIZE,
@@ -1257,7 +1256,7 @@ static const struct n2_cipher_tmpl cipher_tmpls[] = {
 		.block_size	= AES_BLOCK_SIZE,
 		.enc_type	= (ENC_TYPE_ALG_AES128 |
 				   ENC_TYPE_CHAINING_COUNTER),
-		.ablkcipher	= {
+		.skcipher	= {
 			.ivsize		= AES_BLOCK_SIZE,
 			.min_keysize	= AES_MIN_KEY_SIZE,
 			.max_keysize	= AES_MAX_KEY_SIZE,
@@ -1268,9 +1267,9 @@ static const struct n2_cipher_tmpl cipher_tmpls[] = {
 	},
 
 };
-#define NUM_CIPHER_TMPLS ARRAY_SIZE(cipher_tmpls)
+#define NUM_CIPHER_TMPLS ARRAY_SIZE(skcipher_tmpls)
 
-static LIST_HEAD(cipher_algs);
+static LIST_HEAD(skcipher_algs);
 
 struct n2_hash_tmpl {
 	const char	*name;
@@ -1344,14 +1343,14 @@ static int algs_registered;
 
 static void __n2_unregister_algs(void)
 {
-	struct n2_cipher_alg *cipher, *cipher_tmp;
+	struct n2_skcipher_alg *skcipher, *skcipher_tmp;
 	struct n2_ahash_alg *alg, *alg_tmp;
 	struct n2_hmac_alg *hmac, *hmac_tmp;
 
-	list_for_each_entry_safe(cipher, cipher_tmp, &cipher_algs, entry) {
-		crypto_unregister_alg(&cipher->alg);
-		list_del(&cipher->entry);
-		kfree(cipher);
+	list_for_each_entry_safe(skcipher, skcipher_tmp, &skcipher_algs, entry) {
+		crypto_unregister_skcipher(&skcipher->skcipher);
+		list_del(&skcipher->entry);
+		kfree(skcipher);
 	}
 	list_for_each_entry_safe(hmac, hmac_tmp, &hmac_algs, derived.entry) {
 		crypto_unregister_ahash(&hmac->derived.alg);
@@ -1365,44 +1364,42 @@ static void __n2_unregister_algs(void)
 	}
 }
 
-static int n2_cipher_cra_init(struct crypto_tfm *tfm)
+static int n2_skcipher_init_tfm(struct crypto_skcipher *tfm)
 {
-	tfm->crt_ablkcipher.reqsize = sizeof(struct n2_request_context);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct n2_request_context));
 	return 0;
 }
 
-static int __n2_register_one_cipher(const struct n2_cipher_tmpl *tmpl)
+static int __n2_register_one_skcipher(const struct n2_skcipher_tmpl *tmpl)
 {
-	struct n2_cipher_alg *p = kzalloc(sizeof(*p), GFP_KERNEL);
-	struct crypto_alg *alg;
+	struct n2_skcipher_alg *p = kzalloc(sizeof(*p), GFP_KERNEL);
+	struct skcipher_alg *alg;
 	int err;
 
 	if (!p)
 		return -ENOMEM;
 
-	alg = &p->alg;
+	alg = &p->skcipher;
+	*alg = tmpl->skcipher;
 
-	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
-	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s-n2", tmpl->drv_name);
-	alg->cra_priority = N2_CRA_PRIORITY;
-	alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-			 CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC;
-	alg->cra_blocksize = tmpl->block_size;
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s-n2", tmpl->drv_name);
+	alg->base.cra_priority = N2_CRA_PRIORITY;
+	alg->base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC;
+	alg->base.cra_blocksize = tmpl->block_size;
 	p->enc_type = tmpl->enc_type;
-	alg->cra_ctxsize = sizeof(struct n2_cipher_context);
-	alg->cra_type = &crypto_ablkcipher_type;
-	alg->cra_u.ablkcipher = tmpl->ablkcipher;
-	alg->cra_init = n2_cipher_cra_init;
-	alg->cra_module = THIS_MODULE;
-
-	list_add(&p->entry, &cipher_algs);
-	err = crypto_register_alg(alg);
+	alg->base.cra_ctxsize = sizeof(struct n2_skcipher_context);
+	alg->base.cra_module = THIS_MODULE;
+	alg->init = n2_skcipher_init_tfm;
+
+	list_add(&p->entry, &skcipher_algs);
+	err = crypto_register_skcipher(alg);
 	if (err) {
-		pr_err("%s alg registration failed\n", alg->cra_name);
+		pr_err("%s alg registration failed\n", alg->base.cra_name);
 		list_del(&p->entry);
 		kfree(p);
 	} else {
-		pr_info("%s alg registered\n", alg->cra_name);
+		pr_info("%s alg registered\n", alg->base.cra_name);
 	}
 	return err;
 }
@@ -1517,7 +1514,7 @@ static int n2_register_algs(void)
 		}
 	}
 	for (i = 0; i < NUM_CIPHER_TMPLS; i++) {
-		err = __n2_register_one_cipher(&cipher_tmpls[i]);
+		err = __n2_register_one_skcipher(&skcipher_tmpls[i]);
 		if (err) {
 			__n2_unregister_algs();
 			goto out;
diff --git a/drivers/crypto/nx/nx-aes-cbc.c b/drivers/crypto/nx/nx-aes-cbc.c
index e631f9979127..92e921eceed7 100644
--- a/drivers/crypto/nx/nx-aes-cbc.c
+++ b/drivers/crypto/nx/nx-aes-cbc.c
@@ -18,11 +18,11 @@
 #include "nx.h"
 
 
-static int cbc_aes_nx_set_key(struct crypto_tfm *tfm,
-			      const u8          *in_key,
-			      unsigned int       key_len)
+static int cbc_aes_nx_set_key(struct crypto_skcipher *tfm,
+			      const u8               *in_key,
+			      unsigned int            key_len)
 {
-	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm);
+	struct nx_crypto_ctx *nx_ctx = crypto_skcipher_ctx(tfm);
 	struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
 
 	nx_ctx_init(nx_ctx, HCOP_FC_AES);
@@ -50,13 +50,11 @@ static int cbc_aes_nx_set_key(struct crypto_tfm *tfm,
 	return 0;
 }
 
-static int cbc_aes_nx_crypt(struct blkcipher_desc *desc,
-			    struct scatterlist    *dst,
-			    struct scatterlist    *src,
-			    unsigned int           nbytes,
-			    int                    enc)
+static int cbc_aes_nx_crypt(struct skcipher_request *req,
+			    int                      enc)
 {
-	struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct nx_crypto_ctx *nx_ctx = crypto_skcipher_ctx(tfm);
 	struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
 	unsigned long irq_flags;
 	unsigned int processed = 0, to_process;
@@ -70,10 +68,11 @@ static int cbc_aes_nx_crypt(struct blkcipher_desc *desc,
 		NX_CPB_FDM(csbcpb) &= ~NX_FDM_ENDE_ENCRYPT;
 
 	do {
-		to_process = nbytes - processed;
+		to_process = req->cryptlen - processed;
 
-		rc = nx_build_sg_lists(nx_ctx, desc, dst, src, &to_process,
-				       processed, csbcpb->cpb.aes_cbc.iv);
+		rc = nx_build_sg_lists(nx_ctx, req->iv, req->dst, req->src,
+				       &to_process, processed,
+				       csbcpb->cpb.aes_cbc.iv);
 		if (rc)
 			goto out;
 
@@ -83,56 +82,46 @@ static int cbc_aes_nx_crypt(struct blkcipher_desc *desc,
 		}
 
 		rc = nx_hcall_sync(nx_ctx, &nx_ctx->op,
-				   desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP);
+				   req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP);
 		if (rc)
 			goto out;
 
-		memcpy(desc->info, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE);
+		memcpy(req->iv, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE);
 		atomic_inc(&(nx_ctx->stats->aes_ops));
 		atomic64_add(csbcpb->csb.processed_byte_count,
 			     &(nx_ctx->stats->aes_bytes));
 
 		processed += to_process;
-	} while (processed < nbytes);
+	} while (processed < req->cryptlen);
 out:
 	spin_unlock_irqrestore(&nx_ctx->lock, irq_flags);
 	return rc;
 }
 
-static int cbc_aes_nx_encrypt(struct blkcipher_desc *desc,
-			      struct scatterlist    *dst,
-			      struct scatterlist    *src,
-			      unsigned int           nbytes)
+static int cbc_aes_nx_encrypt(struct skcipher_request *req)
 {
-	return cbc_aes_nx_crypt(desc, dst, src, nbytes, 1);
+	return cbc_aes_nx_crypt(req, 1);
 }
 
-static int cbc_aes_nx_decrypt(struct blkcipher_desc *desc,
-			      struct scatterlist    *dst,
-			      struct scatterlist    *src,
-			      unsigned int           nbytes)
+static int cbc_aes_nx_decrypt(struct skcipher_request *req)
 {
-	return cbc_aes_nx_crypt(desc, dst, src, nbytes, 0);
+	return cbc_aes_nx_crypt(req, 0);
 }
 
-struct crypto_alg nx_cbc_aes_alg = {
-	.cra_name        = "cbc(aes)",
-	.cra_driver_name = "cbc-aes-nx",
-	.cra_priority    = 300,
-	.cra_flags       = CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize   = AES_BLOCK_SIZE,
-	.cra_ctxsize     = sizeof(struct nx_crypto_ctx),
-	.cra_type        = &crypto_blkcipher_type,
-	.cra_alignmask   = 0xf,
-	.cra_module      = THIS_MODULE,
-	.cra_init        = nx_crypto_ctx_aes_cbc_init,
-	.cra_exit        = nx_crypto_ctx_exit,
-	.cra_blkcipher = {
-		.min_keysize = AES_MIN_KEY_SIZE,
-		.max_keysize = AES_MAX_KEY_SIZE,
-		.ivsize      = AES_BLOCK_SIZE,
-		.setkey      = cbc_aes_nx_set_key,
-		.encrypt     = cbc_aes_nx_encrypt,
-		.decrypt     = cbc_aes_nx_decrypt,
-	}
+struct skcipher_alg nx_cbc_aes_alg = {
+	.base.cra_name		= "cbc(aes)",
+	.base.cra_driver_name	= "cbc-aes-nx",
+	.base.cra_priority	= 300,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct nx_crypto_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+	.init			= nx_crypto_ctx_aes_cbc_init,
+	.exit			= nx_crypto_ctx_skcipher_exit,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= cbc_aes_nx_set_key,
+	.encrypt		= cbc_aes_nx_encrypt,
+	.decrypt		= cbc_aes_nx_decrypt,
 };
diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c
index 5be8f01c5da8..4c9362eebefd 100644
--- a/drivers/crypto/nx/nx-aes-ccm.c
+++ b/drivers/crypto/nx/nx-aes-ccm.c
@@ -327,7 +327,7 @@ static int generate_pat(u8                   *iv,
 }
 
 static int ccm_nx_decrypt(struct aead_request   *req,
-			  struct blkcipher_desc *desc,
+			  u8                    *iv,
 			  unsigned int assoclen)
 {
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm);
@@ -348,7 +348,7 @@ static int ccm_nx_decrypt(struct aead_request   *req,
 				 req->src, nbytes + req->assoclen, authsize,
 				 SCATTERWALK_FROM_SG);
 
-	rc = generate_pat(desc->info, req, nx_ctx, authsize, nbytes, assoclen,
+	rc = generate_pat(iv, req, nx_ctx, authsize, nbytes, assoclen,
 			  csbcpb->cpb.aes_ccm.in_pat_or_b0);
 	if (rc)
 		goto out;
@@ -367,7 +367,7 @@ static int ccm_nx_decrypt(struct aead_request   *req,
 
 		NX_CPB_FDM(nx_ctx->csbcpb) &= ~NX_FDM_ENDE_ENCRYPT;
 
-		rc = nx_build_sg_lists(nx_ctx, desc, req->dst, req->src,
+		rc = nx_build_sg_lists(nx_ctx, iv, req->dst, req->src,
 				       &to_process, processed + req->assoclen,
 				       csbcpb->cpb.aes_ccm.iv_or_ctr);
 		if (rc)
@@ -381,7 +381,7 @@ static int ccm_nx_decrypt(struct aead_request   *req,
 		/* for partial completion, copy following for next
 		 * entry into loop...
 		 */
-		memcpy(desc->info, csbcpb->cpb.aes_ccm.out_ctr, AES_BLOCK_SIZE);
+		memcpy(iv, csbcpb->cpb.aes_ccm.out_ctr, AES_BLOCK_SIZE);
 		memcpy(csbcpb->cpb.aes_ccm.in_pat_or_b0,
 			csbcpb->cpb.aes_ccm.out_pat_or_mac, AES_BLOCK_SIZE);
 		memcpy(csbcpb->cpb.aes_ccm.in_s0,
@@ -405,7 +405,7 @@ out:
 }
 
 static int ccm_nx_encrypt(struct aead_request   *req,
-			  struct blkcipher_desc *desc,
+			  u8                    *iv,
 			  unsigned int assoclen)
 {
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm);
@@ -418,7 +418,7 @@ static int ccm_nx_encrypt(struct aead_request   *req,
 
 	spin_lock_irqsave(&nx_ctx->lock, irq_flags);
 
-	rc = generate_pat(desc->info, req, nx_ctx, authsize, nbytes, assoclen,
+	rc = generate_pat(iv, req, nx_ctx, authsize, nbytes, assoclen,
 			  csbcpb->cpb.aes_ccm.in_pat_or_b0);
 	if (rc)
 		goto out;
@@ -436,7 +436,7 @@ static int ccm_nx_encrypt(struct aead_request   *req,
 
 		NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT;
 
-		rc = nx_build_sg_lists(nx_ctx, desc, req->dst, req->src,
+		rc = nx_build_sg_lists(nx_ctx, iv, req->dst, req->src,
 				       &to_process, processed + req->assoclen,
 				       csbcpb->cpb.aes_ccm.iv_or_ctr);
 		if (rc)
@@ -450,7 +450,7 @@ static int ccm_nx_encrypt(struct aead_request   *req,
 		/* for partial completion, copy following for next
 		 * entry into loop...
 		 */
-		memcpy(desc->info, csbcpb->cpb.aes_ccm.out_ctr, AES_BLOCK_SIZE);
+		memcpy(iv, csbcpb->cpb.aes_ccm.out_ctr, AES_BLOCK_SIZE);
 		memcpy(csbcpb->cpb.aes_ccm.in_pat_or_b0,
 			csbcpb->cpb.aes_ccm.out_pat_or_mac, AES_BLOCK_SIZE);
 		memcpy(csbcpb->cpb.aes_ccm.in_s0,
@@ -481,67 +481,50 @@ static int ccm4309_aes_nx_encrypt(struct aead_request *req)
 {
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm);
 	struct nx_gcm_rctx *rctx = aead_request_ctx(req);
-	struct blkcipher_desc desc;
 	u8 *iv = rctx->iv;
 
 	iv[0] = 3;
 	memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3);
 	memcpy(iv + 4, req->iv, 8);
 
-	desc.info = iv;
-
-	return ccm_nx_encrypt(req, &desc, req->assoclen - 8);
+	return ccm_nx_encrypt(req, iv, req->assoclen - 8);
 }
 
 static int ccm_aes_nx_encrypt(struct aead_request *req)
 {
-	struct blkcipher_desc desc;
 	int rc;
 
-	desc.info = req->iv;
-
-	rc = crypto_ccm_check_iv(desc.info);
+	rc = crypto_ccm_check_iv(req->iv);
 	if (rc)
 		return rc;
 
-	return ccm_nx_encrypt(req, &desc, req->assoclen);
+	return ccm_nx_encrypt(req, req->iv, req->assoclen);
 }
 
 static int ccm4309_aes_nx_decrypt(struct aead_request *req)
 {
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm);
 	struct nx_gcm_rctx *rctx = aead_request_ctx(req);
-	struct blkcipher_desc desc;
 	u8 *iv = rctx->iv;
 
 	iv[0] = 3;
 	memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3);
 	memcpy(iv + 4, req->iv, 8);
 
-	desc.info = iv;
-
-	return ccm_nx_decrypt(req, &desc, req->assoclen - 8);
+	return ccm_nx_decrypt(req, iv, req->assoclen - 8);
 }
 
 static int ccm_aes_nx_decrypt(struct aead_request *req)
 {
-	struct blkcipher_desc desc;
 	int rc;
 
-	desc.info = req->iv;
-
-	rc = crypto_ccm_check_iv(desc.info);
+	rc = crypto_ccm_check_iv(req->iv);
 	if (rc)
 		return rc;
 
-	return ccm_nx_decrypt(req, &desc, req->assoclen);
+	return ccm_nx_decrypt(req, req->iv, req->assoclen);
 }
 
-/* tell the block cipher walk routines that this is a stream cipher by
- * setting cra_blocksize to 1. Even using blkcipher_walk_virt_block
- * during encrypt/decrypt doesn't solve this problem, because it calls
- * blkcipher_walk_done under the covers, which doesn't use walk->blocksize,
- * but instead uses this tfm->blocksize. */
 struct aead_alg nx_ccm_aes_alg = {
 	.base = {
 		.cra_name        = "ccm(aes)",
diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c
index 191e226a11a1..6d5ce1a66f1e 100644
--- a/drivers/crypto/nx/nx-aes-ctr.c
+++ b/drivers/crypto/nx/nx-aes-ctr.c
@@ -19,11 +19,11 @@
 #include "nx.h"
 
 
-static int ctr_aes_nx_set_key(struct crypto_tfm *tfm,
-			      const u8          *in_key,
-			      unsigned int       key_len)
+static int ctr_aes_nx_set_key(struct crypto_skcipher *tfm,
+			      const u8               *in_key,
+			      unsigned int            key_len)
 {
-	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm);
+	struct nx_crypto_ctx *nx_ctx = crypto_skcipher_ctx(tfm);
 	struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
 
 	nx_ctx_init(nx_ctx, HCOP_FC_AES);
@@ -51,11 +51,11 @@ static int ctr_aes_nx_set_key(struct crypto_tfm *tfm,
 	return 0;
 }
 
-static int ctr3686_aes_nx_set_key(struct crypto_tfm *tfm,
-				  const u8          *in_key,
-				  unsigned int       key_len)
+static int ctr3686_aes_nx_set_key(struct crypto_skcipher *tfm,
+				  const u8               *in_key,
+				  unsigned int            key_len)
 {
-	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm);
+	struct nx_crypto_ctx *nx_ctx = crypto_skcipher_ctx(tfm);
 
 	if (key_len < CTR_RFC3686_NONCE_SIZE)
 		return -EINVAL;
@@ -69,12 +69,10 @@ static int ctr3686_aes_nx_set_key(struct crypto_tfm *tfm,
 	return ctr_aes_nx_set_key(tfm, in_key, key_len);
 }
 
-static int ctr_aes_nx_crypt(struct blkcipher_desc *desc,
-			    struct scatterlist    *dst,
-			    struct scatterlist    *src,
-			    unsigned int           nbytes)
+static int ctr_aes_nx_crypt(struct skcipher_request *req, u8 *iv)
 {
-	struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct nx_crypto_ctx *nx_ctx = crypto_skcipher_ctx(tfm);
 	struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
 	unsigned long irq_flags;
 	unsigned int processed = 0, to_process;
@@ -83,10 +81,11 @@ static int ctr_aes_nx_crypt(struct blkcipher_desc *desc,
 	spin_lock_irqsave(&nx_ctx->lock, irq_flags);
 
 	do {
-		to_process = nbytes - processed;
+		to_process = req->cryptlen - processed;
 
-		rc = nx_build_sg_lists(nx_ctx, desc, dst, src, &to_process,
-				       processed, csbcpb->cpb.aes_ctr.iv);
+		rc = nx_build_sg_lists(nx_ctx, iv, req->dst, req->src,
+				       &to_process, processed,
+				       csbcpb->cpb.aes_ctr.iv);
 		if (rc)
 			goto out;
 
@@ -96,59 +95,51 @@ static int ctr_aes_nx_crypt(struct blkcipher_desc *desc,
 		}
 
 		rc = nx_hcall_sync(nx_ctx, &nx_ctx->op,
-				   desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP);
+				   req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP);
 		if (rc)
 			goto out;
 
-		memcpy(desc->info, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE);
+		memcpy(iv, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE);
 
 		atomic_inc(&(nx_ctx->stats->aes_ops));
 		atomic64_add(csbcpb->csb.processed_byte_count,
 			     &(nx_ctx->stats->aes_bytes));
 
 		processed += to_process;
-	} while (processed < nbytes);
+	} while (processed < req->cryptlen);
 out:
 	spin_unlock_irqrestore(&nx_ctx->lock, irq_flags);
 	return rc;
 }
 
-static int ctr3686_aes_nx_crypt(struct blkcipher_desc *desc,
-				struct scatterlist    *dst,
-				struct scatterlist    *src,
-				unsigned int           nbytes)
+static int ctr3686_aes_nx_crypt(struct skcipher_request *req)
 {
-	struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct nx_crypto_ctx *nx_ctx = crypto_skcipher_ctx(tfm);
 	u8 iv[16];
 
 	memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_IV_SIZE);
-	memcpy(iv + CTR_RFC3686_NONCE_SIZE,
-	       desc->info, CTR_RFC3686_IV_SIZE);
+	memcpy(iv + CTR_RFC3686_NONCE_SIZE, req->iv, CTR_RFC3686_IV_SIZE);
 	iv[12] = iv[13] = iv[14] = 0;
 	iv[15] = 1;
 
-	desc->info = iv;
-
-	return ctr_aes_nx_crypt(desc, dst, src, nbytes);
+	return ctr_aes_nx_crypt(req, iv);
 }
 
-struct crypto_alg nx_ctr3686_aes_alg = {
-	.cra_name        = "rfc3686(ctr(aes))",
-	.cra_driver_name = "rfc3686-ctr-aes-nx",
-	.cra_priority    = 300,
-	.cra_flags       = CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize   = 1,
-	.cra_ctxsize     = sizeof(struct nx_crypto_ctx),
-	.cra_type        = &crypto_blkcipher_type,
-	.cra_module      = THIS_MODULE,
-	.cra_init        = nx_crypto_ctx_aes_ctr_init,
-	.cra_exit        = nx_crypto_ctx_exit,
-	.cra_blkcipher = {
-		.min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
-		.max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
-		.ivsize      = CTR_RFC3686_IV_SIZE,
-		.setkey      = ctr3686_aes_nx_set_key,
-		.encrypt     = ctr3686_aes_nx_crypt,
-		.decrypt     = ctr3686_aes_nx_crypt,
-	}
+struct skcipher_alg nx_ctr3686_aes_alg = {
+	.base.cra_name		= "rfc3686(ctr(aes))",
+	.base.cra_driver_name	= "rfc3686-ctr-aes-nx",
+	.base.cra_priority	= 300,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct nx_crypto_ctx),
+	.base.cra_module	= THIS_MODULE,
+	.init			= nx_crypto_ctx_aes_ctr_init,
+	.exit			= nx_crypto_ctx_skcipher_exit,
+	.min_keysize		= AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+	.ivsize			= CTR_RFC3686_IV_SIZE,
+	.setkey			= ctr3686_aes_nx_set_key,
+	.encrypt		= ctr3686_aes_nx_crypt,
+	.decrypt		= ctr3686_aes_nx_crypt,
+	.chunksize		= AES_BLOCK_SIZE,
 };
diff --git a/drivers/crypto/nx/nx-aes-ecb.c b/drivers/crypto/nx/nx-aes-ecb.c
index c67570470c9d..77e338dc33f1 100644
--- a/drivers/crypto/nx/nx-aes-ecb.c
+++ b/drivers/crypto/nx/nx-aes-ecb.c
@@ -18,11 +18,11 @@
 #include "nx.h"
 
 
-static int ecb_aes_nx_set_key(struct crypto_tfm *tfm,
-			      const u8          *in_key,
-			      unsigned int       key_len)
+static int ecb_aes_nx_set_key(struct crypto_skcipher *tfm,
+			      const u8               *in_key,
+			      unsigned int            key_len)
 {
-	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm);
+	struct nx_crypto_ctx *nx_ctx = crypto_skcipher_ctx(tfm);
 	struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
 
 	nx_ctx_init(nx_ctx, HCOP_FC_AES);
@@ -50,13 +50,11 @@ static int ecb_aes_nx_set_key(struct crypto_tfm *tfm,
 	return 0;
 }
 
-static int ecb_aes_nx_crypt(struct blkcipher_desc *desc,
-			    struct scatterlist    *dst,
-			    struct scatterlist    *src,
-			    unsigned int           nbytes,
-			    int                    enc)
+static int ecb_aes_nx_crypt(struct skcipher_request *req,
+			    int                      enc)
 {
-	struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct nx_crypto_ctx *nx_ctx = crypto_skcipher_ctx(tfm);
 	struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
 	unsigned long irq_flags;
 	unsigned int processed = 0, to_process;
@@ -70,10 +68,10 @@ static int ecb_aes_nx_crypt(struct blkcipher_desc *desc,
 		NX_CPB_FDM(csbcpb) &= ~NX_FDM_ENDE_ENCRYPT;
 
 	do {
-		to_process = nbytes - processed;
+		to_process = req->cryptlen - processed;
 
-		rc = nx_build_sg_lists(nx_ctx, desc, dst, src, &to_process,
-				processed, NULL);
+		rc = nx_build_sg_lists(nx_ctx, NULL, req->dst, req->src,
+				       &to_process, processed, NULL);
 		if (rc)
 			goto out;
 
@@ -83,7 +81,7 @@ static int ecb_aes_nx_crypt(struct blkcipher_desc *desc,
 		}
 
 		rc = nx_hcall_sync(nx_ctx, &nx_ctx->op,
-				   desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP);
+				   req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP);
 		if (rc)
 			goto out;
 
@@ -92,46 +90,36 @@ static int ecb_aes_nx_crypt(struct blkcipher_desc *desc,
 			     &(nx_ctx->stats->aes_bytes));
 
 		processed += to_process;
-	} while (processed < nbytes);
+	} while (processed < req->cryptlen);
 
 out:
 	spin_unlock_irqrestore(&nx_ctx->lock, irq_flags);
 	return rc;
 }
 
-static int ecb_aes_nx_encrypt(struct blkcipher_desc *desc,
-			      struct scatterlist    *dst,
-			      struct scatterlist    *src,
-			      unsigned int           nbytes)
+static int ecb_aes_nx_encrypt(struct skcipher_request *req)
 {
-	return ecb_aes_nx_crypt(desc, dst, src, nbytes, 1);
+	return ecb_aes_nx_crypt(req, 1);
 }
 
-static int ecb_aes_nx_decrypt(struct blkcipher_desc *desc,
-			      struct scatterlist    *dst,
-			      struct scatterlist    *src,
-			      unsigned int           nbytes)
+static int ecb_aes_nx_decrypt(struct skcipher_request *req)
 {
-	return ecb_aes_nx_crypt(desc, dst, src, nbytes, 0);
+	return ecb_aes_nx_crypt(req, 0);
 }
 
-struct crypto_alg nx_ecb_aes_alg = {
-	.cra_name        = "ecb(aes)",
-	.cra_driver_name = "ecb-aes-nx",
-	.cra_priority    = 300,
-	.cra_flags       = CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize   = AES_BLOCK_SIZE,
-	.cra_alignmask   = 0xf,
-	.cra_ctxsize     = sizeof(struct nx_crypto_ctx),
-	.cra_type        = &crypto_blkcipher_type,
-	.cra_module      = THIS_MODULE,
-	.cra_init        = nx_crypto_ctx_aes_ecb_init,
-	.cra_exit        = nx_crypto_ctx_exit,
-	.cra_blkcipher = {
-		.min_keysize = AES_MIN_KEY_SIZE,
-		.max_keysize = AES_MAX_KEY_SIZE,
-		.setkey      = ecb_aes_nx_set_key,
-		.encrypt     = ecb_aes_nx_encrypt,
-		.decrypt     = ecb_aes_nx_decrypt,
-	}
+struct skcipher_alg nx_ecb_aes_alg = {
+	.base.cra_name		= "ecb(aes)",
+	.base.cra_driver_name	= "ecb-aes-nx",
+	.base.cra_priority	= 300,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_alignmask	= 0xf,
+	.base.cra_ctxsize	= sizeof(struct nx_crypto_ctx),
+	.base.cra_module	= THIS_MODULE,
+	.init			= nx_crypto_ctx_aes_ecb_init,
+	.exit			= nx_crypto_ctx_skcipher_exit,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= ecb_aes_nx_set_key,
+	.encrypt		= ecb_aes_nx_encrypt,
+	.decrypt		= ecb_aes_nx_decrypt,
 };
diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c
index 7d3d67871270..19c6ed5baea4 100644
--- a/drivers/crypto/nx/nx-aes-gcm.c
+++ b/drivers/crypto/nx/nx-aes-gcm.c
@@ -166,8 +166,7 @@ static int nx_gca(struct nx_crypto_ctx  *nx_ctx,
 	return rc;
 }
 
-static int gmac(struct aead_request *req, struct blkcipher_desc *desc,
-		unsigned int assoclen)
+static int gmac(struct aead_request *req, const u8 *iv, unsigned int assoclen)
 {
 	int rc;
 	struct nx_crypto_ctx *nx_ctx =
@@ -190,7 +189,7 @@ static int gmac(struct aead_request *req, struct blkcipher_desc *desc,
 			   nx_ctx->ap->databytelen/NX_PAGE_SIZE);
 
 	/* Copy IV */
-	memcpy(csbcpb->cpb.aes_gcm.iv_or_cnt, desc->info, AES_BLOCK_SIZE);
+	memcpy(csbcpb->cpb.aes_gcm.iv_or_cnt, iv, AES_BLOCK_SIZE);
 
 	do {
 		/*
@@ -240,8 +239,7 @@ out:
 	return rc;
 }
 
-static int gcm_empty(struct aead_request *req, struct blkcipher_desc *desc,
-		     int enc)
+static int gcm_empty(struct aead_request *req, const u8 *iv, int enc)
 {
 	int rc;
 	struct nx_crypto_ctx *nx_ctx =
@@ -268,7 +266,7 @@ static int gcm_empty(struct aead_request *req, struct blkcipher_desc *desc,
 	len = AES_BLOCK_SIZE;
 
 	/* Encrypt the counter/IV */
-	in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *) desc->info,
+	in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *) iv,
 				 &len, nx_ctx->ap->sglen);
 
 	if (len != AES_BLOCK_SIZE)
@@ -285,7 +283,7 @@ static int gcm_empty(struct aead_request *req, struct blkcipher_desc *desc,
 	nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg);
 
 	rc = nx_hcall_sync(nx_ctx, &nx_ctx->op,
-			   desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP);
+			   req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP);
 	if (rc)
 		goto out;
 	atomic_inc(&(nx_ctx->stats->aes_ops));
@@ -313,7 +311,6 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc,
 		crypto_aead_ctx(crypto_aead_reqtfm(req));
 	struct nx_gcm_rctx *rctx = aead_request_ctx(req);
 	struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
-	struct blkcipher_desc desc;
 	unsigned int nbytes = req->cryptlen;
 	unsigned int processed = 0, to_process;
 	unsigned long irq_flags;
@@ -321,15 +318,14 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc,
 
 	spin_lock_irqsave(&nx_ctx->lock, irq_flags);
 
-	desc.info = rctx->iv;
 	/* initialize the counter */
-	*(u32 *)(desc.info + NX_GCM_CTR_OFFSET) = 1;
+	*(u32 *)&rctx->iv[NX_GCM_CTR_OFFSET] = 1;
 
 	if (nbytes == 0) {
 		if (assoclen == 0)
-			rc = gcm_empty(req, &desc, enc);
+			rc = gcm_empty(req, rctx->iv, enc);
 		else
-			rc = gmac(req, &desc, assoclen);
+			rc = gmac(req, rctx->iv, assoclen);
 		if (rc)
 			goto out;
 		else
@@ -358,7 +354,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc,
 		to_process = nbytes - processed;
 
 		csbcpb->cpb.aes_gcm.bit_length_data = nbytes * 8;
-		rc = nx_build_sg_lists(nx_ctx, &desc, req->dst,
+		rc = nx_build_sg_lists(nx_ctx, rctx->iv, req->dst,
 				       req->src, &to_process,
 				       processed + req->assoclen,
 				       csbcpb->cpb.aes_gcm.iv_or_cnt);
@@ -377,7 +373,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc,
 		if (rc)
 			goto out;
 
-		memcpy(desc.info, csbcpb->cpb.aes_gcm.out_cnt, AES_BLOCK_SIZE);
+		memcpy(rctx->iv, csbcpb->cpb.aes_gcm.out_cnt, AES_BLOCK_SIZE);
 		memcpy(csbcpb->cpb.aes_gcm.in_pat_or_aad,
 			csbcpb->cpb.aes_gcm.out_pat_or_mac, AES_BLOCK_SIZE);
 		memcpy(csbcpb->cpb.aes_gcm.in_s0,
@@ -471,11 +467,6 @@ static int gcm4106_aes_nx_decrypt(struct aead_request *req)
 	return gcm_aes_nx_crypt(req, 0, req->assoclen - 8);
 }
 
-/* tell the block cipher walk routines that this is a stream cipher by
- * setting cra_blocksize to 1. Even using blkcipher_walk_virt_block
- * during encrypt/decrypt doesn't solve this problem, because it calls
- * blkcipher_walk_done under the covers, which doesn't use walk->blocksize,
- * but instead uses this tfm->blocksize. */
 struct aead_alg nx_gcm_aes_alg = {
 	.base = {
 		.cra_name        = "gcm(aes)",
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 28817880c76d..f03c238f5a31 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -243,25 +243,25 @@ static long int trim_sg_list(struct nx_sg *sg,
  *                     scatterlists based on them.
  *
  * @nx_ctx: NX crypto context for the lists we're building
- * @desc: the block cipher descriptor for the operation
+ * @iv: iv data, if the algorithm requires it
  * @dst: destination scatterlist
  * @src: source scatterlist
  * @nbytes: length of data described in the scatterlists
  * @offset: number of bytes to fast-forward past at the beginning of
  *          scatterlists.
- * @iv: destination for the iv data, if the algorithm requires it
+ * @oiv: destination for the iv data, if the algorithm requires it
  *
- * This is common code shared by all the AES algorithms. It uses the block
- * cipher walk routines to traverse input and output scatterlists, building
+ * This is common code shared by all the AES algorithms. It uses the crypto
+ * scatterlist walk routines to traverse input and output scatterlists, building
  * corresponding NX scatterlists
  */
 int nx_build_sg_lists(struct nx_crypto_ctx  *nx_ctx,
-		      struct blkcipher_desc *desc,
+		      const u8              *iv,
 		      struct scatterlist    *dst,
 		      struct scatterlist    *src,
 		      unsigned int          *nbytes,
 		      unsigned int           offset,
-		      u8                    *iv)
+		      u8                    *oiv)
 {
 	unsigned int delta = 0;
 	unsigned int total = *nbytes;
@@ -274,8 +274,8 @@ int nx_build_sg_lists(struct nx_crypto_ctx  *nx_ctx,
 	max_sg_len = min_t(u64, max_sg_len,
 			nx_ctx->ap->databytelen/NX_PAGE_SIZE);
 
-	if (iv)
-		memcpy(iv, desc->info, AES_BLOCK_SIZE);
+	if (oiv)
+		memcpy(oiv, iv, AES_BLOCK_SIZE);
 
 	*nbytes = min_t(u64, *nbytes, nx_ctx->ap->databytelen);
 
@@ -511,10 +511,10 @@ static bool nx_check_props(struct device *dev, u32 fc, u32 mode)
 	return true;
 }
 
-static int nx_register_alg(struct crypto_alg *alg, u32 fc, u32 mode)
+static int nx_register_skcipher(struct skcipher_alg *alg, u32 fc, u32 mode)
 {
 	return nx_check_props(&nx_driver.viodev->dev, fc, mode) ?
-	       crypto_register_alg(alg) : 0;
+	       crypto_register_skcipher(alg) : 0;
 }
 
 static int nx_register_aead(struct aead_alg *alg, u32 fc, u32 mode)
@@ -531,10 +531,10 @@ static int nx_register_shash(struct shash_alg *alg, u32 fc, u32 mode, int slot)
 	       crypto_register_shash(alg) : 0;
 }
 
-static void nx_unregister_alg(struct crypto_alg *alg, u32 fc, u32 mode)
+static void nx_unregister_skcipher(struct skcipher_alg *alg, u32 fc, u32 mode)
 {
 	if (nx_check_props(NULL, fc, mode))
-		crypto_unregister_alg(alg);
+		crypto_unregister_skcipher(alg);
 }
 
 static void nx_unregister_aead(struct aead_alg *alg, u32 fc, u32 mode)
@@ -573,15 +573,16 @@ static int nx_register_algs(void)
 
 	nx_driver.of.status = NX_OKAY;
 
-	rc = nx_register_alg(&nx_ecb_aes_alg, NX_FC_AES, NX_MODE_AES_ECB);
+	rc = nx_register_skcipher(&nx_ecb_aes_alg, NX_FC_AES, NX_MODE_AES_ECB);
 	if (rc)
 		goto out;
 
-	rc = nx_register_alg(&nx_cbc_aes_alg, NX_FC_AES, NX_MODE_AES_CBC);
+	rc = nx_register_skcipher(&nx_cbc_aes_alg, NX_FC_AES, NX_MODE_AES_CBC);
 	if (rc)
 		goto out_unreg_ecb;
 
-	rc = nx_register_alg(&nx_ctr3686_aes_alg, NX_FC_AES, NX_MODE_AES_CTR);
+	rc = nx_register_skcipher(&nx_ctr3686_aes_alg, NX_FC_AES,
+				  NX_MODE_AES_CTR);
 	if (rc)
 		goto out_unreg_cbc;
 
@@ -633,11 +634,11 @@ out_unreg_gcm4106:
 out_unreg_gcm:
 	nx_unregister_aead(&nx_gcm_aes_alg, NX_FC_AES, NX_MODE_AES_GCM);
 out_unreg_ctr3686:
-	nx_unregister_alg(&nx_ctr3686_aes_alg, NX_FC_AES, NX_MODE_AES_CTR);
+	nx_unregister_skcipher(&nx_ctr3686_aes_alg, NX_FC_AES, NX_MODE_AES_CTR);
 out_unreg_cbc:
-	nx_unregister_alg(&nx_cbc_aes_alg, NX_FC_AES, NX_MODE_AES_CBC);
+	nx_unregister_skcipher(&nx_cbc_aes_alg, NX_FC_AES, NX_MODE_AES_CBC);
 out_unreg_ecb:
-	nx_unregister_alg(&nx_ecb_aes_alg, NX_FC_AES, NX_MODE_AES_ECB);
+	nx_unregister_skcipher(&nx_ecb_aes_alg, NX_FC_AES, NX_MODE_AES_ECB);
 out:
 	return rc;
 }
@@ -704,21 +705,21 @@ int nx_crypto_ctx_aes_gcm_init(struct crypto_aead *tfm)
 				  NX_MODE_AES_GCM);
 }
 
-int nx_crypto_ctx_aes_ctr_init(struct crypto_tfm *tfm)
+int nx_crypto_ctx_aes_ctr_init(struct crypto_skcipher *tfm)
 {
-	return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES,
+	return nx_crypto_ctx_init(crypto_skcipher_ctx(tfm), NX_FC_AES,
 				  NX_MODE_AES_CTR);
 }
 
-int nx_crypto_ctx_aes_cbc_init(struct crypto_tfm *tfm)
+int nx_crypto_ctx_aes_cbc_init(struct crypto_skcipher *tfm)
 {
-	return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES,
+	return nx_crypto_ctx_init(crypto_skcipher_ctx(tfm), NX_FC_AES,
 				  NX_MODE_AES_CBC);
 }
 
-int nx_crypto_ctx_aes_ecb_init(struct crypto_tfm *tfm)
+int nx_crypto_ctx_aes_ecb_init(struct crypto_skcipher *tfm)
 {
-	return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES,
+	return nx_crypto_ctx_init(crypto_skcipher_ctx(tfm), NX_FC_AES,
 				  NX_MODE_AES_ECB);
 }
 
@@ -752,6 +753,11 @@ void nx_crypto_ctx_exit(struct crypto_tfm *tfm)
 	nx_ctx->out_sg = NULL;
 }
 
+void nx_crypto_ctx_skcipher_exit(struct crypto_skcipher *tfm)
+{
+	nx_crypto_ctx_exit(crypto_skcipher_ctx(tfm));
+}
+
 void nx_crypto_ctx_aead_exit(struct crypto_aead *tfm)
 {
 	struct nx_crypto_ctx *nx_ctx = crypto_aead_ctx(tfm);
@@ -798,10 +804,12 @@ static int nx_remove(struct vio_dev *viodev)
 				   NX_FC_AES, NX_MODE_AES_GCM);
 		nx_unregister_aead(&nx_gcm_aes_alg,
 				   NX_FC_AES, NX_MODE_AES_GCM);
-		nx_unregister_alg(&nx_ctr3686_aes_alg,
-				  NX_FC_AES, NX_MODE_AES_CTR);
-		nx_unregister_alg(&nx_cbc_aes_alg, NX_FC_AES, NX_MODE_AES_CBC);
-		nx_unregister_alg(&nx_ecb_aes_alg, NX_FC_AES, NX_MODE_AES_ECB);
+		nx_unregister_skcipher(&nx_ctr3686_aes_alg,
+				       NX_FC_AES, NX_MODE_AES_CTR);
+		nx_unregister_skcipher(&nx_cbc_aes_alg, NX_FC_AES,
+				       NX_MODE_AES_CBC);
+		nx_unregister_skcipher(&nx_ecb_aes_alg, NX_FC_AES,
+				       NX_MODE_AES_ECB);
 	}
 
 	return 0;
diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h
index 7ecca168f8c4..91c54289124a 100644
--- a/drivers/crypto/nx/nx.h
+++ b/drivers/crypto/nx/nx.h
@@ -145,19 +145,20 @@ struct crypto_aead;
 int nx_crypto_ctx_aes_ccm_init(struct crypto_aead *tfm);
 int nx_crypto_ctx_aes_gcm_init(struct crypto_aead *tfm);
 int nx_crypto_ctx_aes_xcbc_init(struct crypto_tfm *tfm);
-int nx_crypto_ctx_aes_ctr_init(struct crypto_tfm *tfm);
-int nx_crypto_ctx_aes_cbc_init(struct crypto_tfm *tfm);
-int nx_crypto_ctx_aes_ecb_init(struct crypto_tfm *tfm);
+int nx_crypto_ctx_aes_ctr_init(struct crypto_skcipher *tfm);
+int nx_crypto_ctx_aes_cbc_init(struct crypto_skcipher *tfm);
+int nx_crypto_ctx_aes_ecb_init(struct crypto_skcipher *tfm);
 int nx_crypto_ctx_sha_init(struct crypto_tfm *tfm);
 void nx_crypto_ctx_exit(struct crypto_tfm *tfm);
+void nx_crypto_ctx_skcipher_exit(struct crypto_skcipher *tfm);
 void nx_crypto_ctx_aead_exit(struct crypto_aead *tfm);
 void nx_ctx_init(struct nx_crypto_ctx *nx_ctx, unsigned int function);
 int nx_hcall_sync(struct nx_crypto_ctx *ctx, struct vio_pfo_op *op,
 		  u32 may_sleep);
 struct nx_sg *nx_build_sg_list(struct nx_sg *, u8 *, unsigned int *, u32);
-int nx_build_sg_lists(struct nx_crypto_ctx *, struct blkcipher_desc *,
-		      struct scatterlist *, struct scatterlist *, unsigned int *,
-		      unsigned int, u8 *);
+int nx_build_sg_lists(struct nx_crypto_ctx *nx_ctx, const u8 *iv,
+		      struct scatterlist *dst, struct scatterlist *src,
+		      unsigned int *nbytes, unsigned int offset, u8 *oiv);
 struct nx_sg *nx_walk_and_build(struct nx_sg *, unsigned int,
 				struct scatterlist *, unsigned int,
 				unsigned int *);
@@ -175,11 +176,11 @@ void nx_debugfs_fini(struct nx_crypto_driver *);
 
 #define NX_PAGE_NUM(x)		((u64)(x) & 0xfffffffffffff000ULL)
 
-extern struct crypto_alg nx_cbc_aes_alg;
-extern struct crypto_alg nx_ecb_aes_alg;
+extern struct skcipher_alg nx_cbc_aes_alg;
+extern struct skcipher_alg nx_ecb_aes_alg;
 extern struct aead_alg nx_gcm_aes_alg;
 extern struct aead_alg nx_gcm4106_aes_alg;
-extern struct crypto_alg nx_ctr3686_aes_alg;
+extern struct skcipher_alg nx_ctr3686_aes_alg;
 extern struct aead_alg nx_ccm_aes_alg;
 extern struct aead_alg nx_ccm4309_aes_alg;
 extern struct shash_alg nx_shash_aes_xcbc_alg;
diff --git a/drivers/crypto/nx/nx_debugfs.c b/drivers/crypto/nx/nx_debugfs.c
index e0d44a5512ab..1975bcbee997 100644
--- a/drivers/crypto/nx/nx_debugfs.c
+++ b/drivers/crypto/nx/nx_debugfs.c
@@ -38,23 +38,23 @@ void nx_debugfs_init(struct nx_crypto_driver *drv)
 	drv->dfs_root = root;
 
 	debugfs_create_u32("aes_ops", S_IRUSR | S_IRGRP | S_IROTH,
-			   root, (u32 *)&drv->stats.aes_ops);
+			   root, &drv->stats.aes_ops.counter);
 	debugfs_create_u32("sha256_ops", S_IRUSR | S_IRGRP | S_IROTH,
-			   root, (u32 *)&drv->stats.sha256_ops);
+			   root, &drv->stats.sha256_ops.counter);
 	debugfs_create_u32("sha512_ops", S_IRUSR | S_IRGRP | S_IROTH,
-			   root, (u32 *)&drv->stats.sha512_ops);
+			   root, &drv->stats.sha512_ops.counter);
 	debugfs_create_u64("aes_bytes", S_IRUSR | S_IRGRP | S_IROTH,
-			   root, (u64 *)&drv->stats.aes_bytes);
+			   root, &drv->stats.aes_bytes.counter);
 	debugfs_create_u64("sha256_bytes", S_IRUSR | S_IRGRP | S_IROTH,
-			   root, (u64 *)&drv->stats.sha256_bytes);
+			   root, &drv->stats.sha256_bytes.counter);
 	debugfs_create_u64("sha512_bytes", S_IRUSR | S_IRGRP | S_IROTH,
-			   root, (u64 *)&drv->stats.sha512_bytes);
+			   root, &drv->stats.sha512_bytes.counter);
 	debugfs_create_u32("errors", S_IRUSR | S_IRGRP | S_IROTH,
-			   root, (u32 *)&drv->stats.errors);
+			   root, &drv->stats.errors.counter);
 	debugfs_create_u32("last_error", S_IRUSR | S_IRGRP | S_IROTH,
-			   root, (u32 *)&drv->stats.last_error);
+			   root, &drv->stats.last_error.counter);
 	debugfs_create_u32("last_error_pid", S_IRUSR | S_IRGRP | S_IROTH,
-			   root, (u32 *)&drv->stats.last_error_pid);
+			   root, &drv->stats.last_error_pid.counter);
 }
 
 void
diff --git a/drivers/crypto/omap-aes-gcm.c b/drivers/crypto/omap-aes-gcm.c
index 9bbedbccfadf..32dc00dc570b 100644
--- a/drivers/crypto/omap-aes-gcm.c
+++ b/drivers/crypto/omap-aes-gcm.c
@@ -13,6 +13,7 @@
 #include <linux/dmaengine.h>
 #include <linux/omap-dma.h>
 #include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
 #include <crypto/aes.h>
 #include <crypto/gcm.h>
 #include <crypto/scatterwalk.h>
@@ -29,11 +30,13 @@ static void omap_aes_gcm_finish_req(struct omap_aes_dev *dd, int ret)
 {
 	struct aead_request *req = dd->aead_req;
 
-	dd->flags &= ~FLAGS_BUSY;
 	dd->in_sg = NULL;
 	dd->out_sg = NULL;
 
-	req->base.complete(&req->base, ret);
+	crypto_finalize_aead_request(dd->engine, req, ret);
+
+	pm_runtime_mark_last_busy(dd->dev);
+	pm_runtime_put_autosuspend(dd->dev);
 }
 
 static void omap_aes_gcm_done_task(struct omap_aes_dev *dd)
@@ -81,7 +84,6 @@ static void omap_aes_gcm_done_task(struct omap_aes_dev *dd)
 	}
 
 	omap_aes_gcm_finish_req(dd, ret);
-	omap_aes_gcm_handle_queue(dd, NULL);
 }
 
 static int omap_aes_gcm_copy_buffers(struct omap_aes_dev *dd,
@@ -120,11 +122,16 @@ static int omap_aes_gcm_copy_buffers(struct omap_aes_dev *dd,
 					   OMAP_CRYPTO_FORCE_SINGLE_ENTRY,
 					   FLAGS_ASSOC_DATA_ST_SHIFT,
 					   &dd->flags);
+		if (ret)
+			return ret;
 	}
 
 	if (cryptlen) {
 		tmp = scatterwalk_ffwd(sg_arr, req->src, req->assoclen);
 
+		if (nsg)
+			sg_unmark_end(dd->in_sgl);
+
 		ret = omap_crypto_align_sg(&tmp, cryptlen,
 					   AES_BLOCK_SIZE, &dd->in_sgl[nsg],
 					   OMAP_CRYPTO_COPY_DATA |
@@ -132,6 +139,8 @@ static int omap_aes_gcm_copy_buffers(struct omap_aes_dev *dd,
 					   OMAP_CRYPTO_FORCE_SINGLE_ENTRY,
 					   FLAGS_IN_DATA_ST_SHIFT,
 					   &dd->flags);
+		if (ret)
+			return ret;
 	}
 
 	dd->in_sg = dd->in_sgl;
@@ -142,18 +151,20 @@ static int omap_aes_gcm_copy_buffers(struct omap_aes_dev *dd,
 	dd->out_sg = req->dst;
 	dd->orig_out = req->dst;
 
-	dd->out_sg = scatterwalk_ffwd(sg_arr, req->dst, assoclen);
+	dd->out_sg = scatterwalk_ffwd(sg_arr, req->dst, req->assoclen);
 
 	flags = 0;
 	if (req->src == req->dst || dd->out_sg == sg_arr)
 		flags |= OMAP_CRYPTO_FORCE_COPY;
 
-	ret = omap_crypto_align_sg(&dd->out_sg, cryptlen,
-				   AES_BLOCK_SIZE, &dd->out_sgl,
-				   flags,
-				   FLAGS_OUT_DATA_ST_SHIFT, &dd->flags);
-	if (ret)
-		return ret;
+	if (cryptlen) {
+		ret = omap_crypto_align_sg(&dd->out_sg, cryptlen,
+					   AES_BLOCK_SIZE, &dd->out_sgl,
+					   flags,
+					   FLAGS_OUT_DATA_ST_SHIFT, &dd->flags);
+		if (ret)
+			return ret;
+	}
 
 	dd->in_sg_len = sg_nents_for_len(dd->in_sg, alen + clen);
 	dd->out_sg_len = sg_nents_for_len(dd->out_sg, clen);
@@ -161,62 +172,12 @@ static int omap_aes_gcm_copy_buffers(struct omap_aes_dev *dd,
 	return 0;
 }
 
-static void omap_aes_gcm_complete(struct crypto_async_request *req, int err)
-{
-	struct omap_aes_gcm_result *res = req->data;
-
-	if (err == -EINPROGRESS)
-		return;
-
-	res->err = err;
-	complete(&res->completion);
-}
-
 static int do_encrypt_iv(struct aead_request *req, u32 *tag, u32 *iv)
 {
-	struct scatterlist iv_sg, tag_sg;
-	struct skcipher_request *sk_req;
-	struct omap_aes_gcm_result result;
-	struct omap_aes_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
-	int ret = 0;
-
-	sk_req = skcipher_request_alloc(ctx->ctr, GFP_KERNEL);
-	if (!sk_req) {
-		pr_err("skcipher: Failed to allocate request\n");
-		return -ENOMEM;
-	}
-
-	init_completion(&result.completion);
-
-	sg_init_one(&iv_sg, iv, AES_BLOCK_SIZE);
-	sg_init_one(&tag_sg, tag, AES_BLOCK_SIZE);
-	skcipher_request_set_callback(sk_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				      omap_aes_gcm_complete, &result);
-	ret = crypto_skcipher_setkey(ctx->ctr, (u8 *)ctx->key, ctx->keylen);
-	skcipher_request_set_crypt(sk_req, &iv_sg, &tag_sg, AES_BLOCK_SIZE,
-				   NULL);
-	ret = crypto_skcipher_encrypt(sk_req);
-	switch (ret) {
-	case 0:
-		break;
-	case -EINPROGRESS:
-	case -EBUSY:
-		ret = wait_for_completion_interruptible(&result.completion);
-		if (!ret) {
-			ret = result.err;
-			if (!ret) {
-				reinit_completion(&result.completion);
-				break;
-			}
-		}
-		/* fall through */
-	default:
-		pr_err("Encryption of IV failed for GCM mode\n");
-		break;
-	}
+	struct omap_aes_gcm_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
 
-	skcipher_request_free(sk_req);
-	return ret;
+	aes_encrypt(&ctx->actx, (u8 *)tag, (u8 *)iv);
+	return 0;
 }
 
 void omap_aes_gcm_dma_out_callback(void *data)
@@ -246,37 +207,21 @@ void omap_aes_gcm_dma_out_callback(void *data)
 static int omap_aes_gcm_handle_queue(struct omap_aes_dev *dd,
 				     struct aead_request *req)
 {
-	struct omap_aes_ctx *ctx;
-	struct aead_request *backlog;
-	struct omap_aes_reqctx *rctx;
-	unsigned long flags;
-	int err, ret = 0;
-
-	spin_lock_irqsave(&dd->lock, flags);
 	if (req)
-		ret = aead_enqueue_request(&dd->aead_queue, req);
-	if (dd->flags & FLAGS_BUSY) {
-		spin_unlock_irqrestore(&dd->lock, flags);
-		return ret;
-	}
-
-	backlog = aead_get_backlog(&dd->aead_queue);
-	req = aead_dequeue_request(&dd->aead_queue);
-	if (req)
-		dd->flags |= FLAGS_BUSY;
-	spin_unlock_irqrestore(&dd->lock, flags);
-
-	if (!req)
-		return ret;
+		return crypto_transfer_aead_request_to_engine(dd->engine, req);
 
-	if (backlog)
-		backlog->base.complete(&backlog->base, -EINPROGRESS);
+	return 0;
+}
 
-	ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
-	rctx = aead_request_ctx(req);
+static int omap_aes_gcm_prepare_req(struct crypto_engine *engine, void *areq)
+{
+	struct aead_request *req = container_of(areq, struct aead_request,
+						base);
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+	struct omap_aes_dev *dd = rctx->dd;
+	struct omap_aes_gcm_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	int err;
 
-	dd->ctx = ctx;
-	rctx->dd = dd;
 	dd->aead_req = req;
 
 	rctx->mode &= FLAGS_MODE_MASK;
@@ -286,16 +231,9 @@ static int omap_aes_gcm_handle_queue(struct omap_aes_dev *dd,
 	if (err)
 		return err;
 
-	err = omap_aes_write_ctrl(dd);
-	if (!err)
-		err = omap_aes_crypt_dma_start(dd);
+	dd->ctx = &ctx->octx;
 
-	if (err) {
-		omap_aes_gcm_finish_req(dd, err);
-		omap_aes_gcm_handle_queue(dd, NULL);
-	}
-
-	return ret;
+	return omap_aes_write_ctrl(dd);
 }
 
 static int omap_aes_gcm_crypt(struct aead_request *req, unsigned long mode)
@@ -350,36 +288,39 @@ int omap_aes_gcm_decrypt(struct aead_request *req)
 
 int omap_aes_4106gcm_encrypt(struct aead_request *req)
 {
-	struct omap_aes_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct omap_aes_gcm_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
 	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
 
-	memcpy(rctx->iv, ctx->nonce, 4);
+	memcpy(rctx->iv, ctx->octx.nonce, 4);
 	memcpy(rctx->iv + 4, req->iv, 8);
-	return omap_aes_gcm_crypt(req, FLAGS_ENCRYPT | FLAGS_GCM |
+	return crypto_ipsec_check_assoclen(req->assoclen) ?:
+	       omap_aes_gcm_crypt(req, FLAGS_ENCRYPT | FLAGS_GCM |
 				  FLAGS_RFC4106_GCM);
 }
 
 int omap_aes_4106gcm_decrypt(struct aead_request *req)
 {
-	struct omap_aes_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct omap_aes_gcm_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
 	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
 
-	memcpy(rctx->iv, ctx->nonce, 4);
+	memcpy(rctx->iv, ctx->octx.nonce, 4);
 	memcpy(rctx->iv + 4, req->iv, 8);
-	return omap_aes_gcm_crypt(req, FLAGS_GCM | FLAGS_RFC4106_GCM);
+	return crypto_ipsec_check_assoclen(req->assoclen) ?:
+	       omap_aes_gcm_crypt(req, FLAGS_GCM | FLAGS_RFC4106_GCM);
 }
 
 int omap_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
 			unsigned int keylen)
 {
-	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	struct omap_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm);
+	int ret;
 
-	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
-	    keylen != AES_KEYSIZE_256)
-		return -EINVAL;
+	ret = aes_expandkey(&ctx->actx, key, keylen);
+	if (ret)
+		return ret;
 
-	memcpy(ctx->key, key, keylen);
-	ctx->keylen = keylen;
+	memcpy(ctx->octx.key, key, keylen);
+	ctx->octx.keylen = keylen;
 
 	return 0;
 }
@@ -387,19 +328,63 @@ int omap_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
 int omap_aes_4106gcm_setkey(struct crypto_aead *tfm, const u8 *key,
 			    unsigned int keylen)
 {
-	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	struct omap_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm);
+	int ret;
 
 	if (keylen < 4)
 		return -EINVAL;
-
 	keylen -= 4;
-	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
-	    keylen != AES_KEYSIZE_256)
-		return -EINVAL;
 
-	memcpy(ctx->key, key, keylen);
-	memcpy(ctx->nonce, key + keylen, 4);
-	ctx->keylen = keylen;
+	ret = aes_expandkey(&ctx->actx, key, keylen);
+	if (ret)
+		return ret;
+
+	memcpy(ctx->octx.key, key, keylen);
+	memcpy(ctx->octx.nonce, key + keylen, 4);
+	ctx->octx.keylen = keylen;
+
+	return 0;
+}
+
+int omap_aes_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	return crypto_gcm_check_authsize(authsize);
+}
+
+int omap_aes_4106gcm_setauthsize(struct crypto_aead *parent,
+				 unsigned int authsize)
+{
+	return crypto_rfc4106_check_authsize(authsize);
+}
+
+static int omap_aes_gcm_crypt_req(struct crypto_engine *engine, void *areq)
+{
+	struct aead_request *req = container_of(areq, struct aead_request,
+						base);
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+	struct omap_aes_dev *dd = rctx->dd;
+	int ret = 0;
+
+	if (!dd)
+		return -ENODEV;
+
+	if (dd->in_sg_len)
+		ret = omap_aes_crypt_dma_start(dd);
+	else
+		omap_aes_gcm_dma_out_callback(dd);
+
+	return ret;
+}
+
+int omap_aes_gcm_cra_init(struct crypto_aead *tfm)
+{
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+	ctx->enginectx.op.prepare_request = omap_aes_gcm_prepare_req;
+	ctx->enginectx.op.unprepare_request = NULL;
+	ctx->enginectx.op.do_one_request = omap_aes_gcm_crypt_req;
+
+	crypto_aead_set_reqsize(tfm, sizeof(struct omap_aes_reqctx));
 
 	return 0;
 }
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 2f53fbb74100..824ddf2a66ff 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -142,8 +142,8 @@ int omap_aes_write_ctrl(struct omap_aes_dev *dd)
 			__le32_to_cpu(dd->ctx->key[i]));
 	}
 
-	if ((dd->flags & (FLAGS_CBC | FLAGS_CTR)) && dd->req->info)
-		omap_aes_write_n(dd, AES_REG_IV(dd, 0), dd->req->info, 4);
+	if ((dd->flags & (FLAGS_CBC | FLAGS_CTR)) && dd->req->iv)
+		omap_aes_write_n(dd, AES_REG_IV(dd, 0), (void *)dd->req->iv, 4);
 
 	if ((dd->flags & (FLAGS_GCM)) && dd->aead_req->iv) {
 		rctx = aead_request_ctx(dd->aead_req);
@@ -269,13 +269,14 @@ static int omap_aes_crypt_dma(struct omap_aes_dev *dd,
 			      struct scatterlist *out_sg,
 			      int in_sg_len, int out_sg_len)
 {
-	struct dma_async_tx_descriptor *tx_in, *tx_out;
+	struct dma_async_tx_descriptor *tx_in, *tx_out = NULL, *cb_desc;
 	struct dma_slave_config cfg;
 	int ret;
 
 	if (dd->pio_only) {
 		scatterwalk_start(&dd->in_walk, dd->in_sg);
-		scatterwalk_start(&dd->out_walk, dd->out_sg);
+		if (out_sg_len)
+			scatterwalk_start(&dd->out_walk, dd->out_sg);
 
 		/* Enable DATAIN interrupt and let it take
 		   care of the rest */
@@ -312,34 +313,45 @@ static int omap_aes_crypt_dma(struct omap_aes_dev *dd,
 
 	/* No callback necessary */
 	tx_in->callback_param = dd;
+	tx_in->callback = NULL;
 
 	/* OUT */
-	ret = dmaengine_slave_config(dd->dma_lch_out, &cfg);
-	if (ret) {
-		dev_err(dd->dev, "can't configure OUT dmaengine slave: %d\n",
-			ret);
-		return ret;
-	}
+	if (out_sg_len) {
+		ret = dmaengine_slave_config(dd->dma_lch_out, &cfg);
+		if (ret) {
+			dev_err(dd->dev, "can't configure OUT dmaengine slave: %d\n",
+				ret);
+			return ret;
+		}
 
-	tx_out = dmaengine_prep_slave_sg(dd->dma_lch_out, out_sg, out_sg_len,
-					DMA_DEV_TO_MEM,
-					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	if (!tx_out) {
-		dev_err(dd->dev, "OUT prep_slave_sg() failed\n");
-		return -EINVAL;
+		tx_out = dmaengine_prep_slave_sg(dd->dma_lch_out, out_sg,
+						 out_sg_len,
+						 DMA_DEV_TO_MEM,
+						 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!tx_out) {
+			dev_err(dd->dev, "OUT prep_slave_sg() failed\n");
+			return -EINVAL;
+		}
+
+		cb_desc = tx_out;
+	} else {
+		cb_desc = tx_in;
 	}
 
 	if (dd->flags & FLAGS_GCM)
-		tx_out->callback = omap_aes_gcm_dma_out_callback;
+		cb_desc->callback = omap_aes_gcm_dma_out_callback;
 	else
-		tx_out->callback = omap_aes_dma_out_callback;
-	tx_out->callback_param = dd;
+		cb_desc->callback = omap_aes_dma_out_callback;
+	cb_desc->callback_param = dd;
+
 
 	dmaengine_submit(tx_in);
-	dmaengine_submit(tx_out);
+	if (tx_out)
+		dmaengine_submit(tx_out);
 
 	dma_async_issue_pending(dd->dma_lch_in);
-	dma_async_issue_pending(dd->dma_lch_out);
+	if (out_sg_len)
+		dma_async_issue_pending(dd->dma_lch_out);
 
 	/* start DMA */
 	dd->pdata->trigger(dd, dd->total);
@@ -361,11 +373,13 @@ int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 			return -EINVAL;
 		}
 
-		err = dma_map_sg(dd->dev, dd->out_sg, dd->out_sg_len,
-				 DMA_FROM_DEVICE);
-		if (!err) {
-			dev_err(dd->dev, "dma_map_sg() error\n");
-			return -EINVAL;
+		if (dd->out_sg_len) {
+			err = dma_map_sg(dd->dev, dd->out_sg, dd->out_sg_len,
+					 DMA_FROM_DEVICE);
+			if (!err) {
+				dev_err(dd->dev, "dma_map_sg() error\n");
+				return -EINVAL;
+			}
 		}
 	}
 
@@ -373,8 +387,9 @@ int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 				 dd->out_sg_len);
 	if (err && !dd->pio_only) {
 		dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE);
-		dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len,
-			     DMA_FROM_DEVICE);
+		if (dd->out_sg_len)
+			dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len,
+				     DMA_FROM_DEVICE);
 	}
 
 	return err;
@@ -382,11 +397,11 @@ int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 
 static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
 {
-	struct ablkcipher_request *req = dd->req;
+	struct skcipher_request *req = dd->req;
 
 	pr_debug("err: %d\n", err);
 
-	crypto_finalize_ablkcipher_request(dd->engine, req, err);
+	crypto_finalize_skcipher_request(dd->engine, req, err);
 
 	pm_runtime_mark_last_busy(dd->dev);
 	pm_runtime_put_autosuspend(dd->dev);
@@ -403,10 +418,10 @@ int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
 }
 
 static int omap_aes_handle_queue(struct omap_aes_dev *dd,
-				 struct ablkcipher_request *req)
+				 struct skcipher_request *req)
 {
 	if (req)
-		return crypto_transfer_ablkcipher_request_to_engine(dd->engine, req);
+		return crypto_transfer_skcipher_request_to_engine(dd->engine, req);
 
 	return 0;
 }
@@ -414,10 +429,10 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd,
 static int omap_aes_prepare_req(struct crypto_engine *engine,
 				void *areq)
 {
-	struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
-	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
-			crypto_ablkcipher_reqtfm(req));
-	struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct skcipher_request *req = container_of(areq, struct skcipher_request, base);
+	struct omap_aes_ctx *ctx = crypto_skcipher_ctx(
+			crypto_skcipher_reqtfm(req));
+	struct omap_aes_reqctx *rctx = skcipher_request_ctx(req);
 	struct omap_aes_dev *dd = rctx->dd;
 	int ret;
 	u16 flags;
@@ -427,8 +442,8 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 
 	/* assign new request to device */
 	dd->req = req;
-	dd->total = req->nbytes;
-	dd->total_save = req->nbytes;
+	dd->total = req->cryptlen;
+	dd->total_save = req->cryptlen;
 	dd->in_sg = req->src;
 	dd->out_sg = req->dst;
 	dd->orig_out = req->dst;
@@ -469,8 +484,8 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 static int omap_aes_crypt_req(struct crypto_engine *engine,
 			      void *areq)
 {
-	struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
-	struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct skcipher_request *req = container_of(areq, struct skcipher_request, base);
+	struct omap_aes_reqctx *rctx = skcipher_request_ctx(req);
 	struct omap_aes_dev *dd = rctx->dd;
 
 	if (!dd)
@@ -479,6 +494,14 @@ static int omap_aes_crypt_req(struct crypto_engine *engine,
 	return omap_aes_crypt_dma_start(dd);
 }
 
+static void omap_aes_copy_ivout(struct omap_aes_dev *dd, u8 *ivbuf)
+{
+	int i;
+
+	for (i = 0; i < 4; i++)
+		((u32 *)ivbuf)[i] = omap_aes_read(dd, AES_REG_IV(dd, i));
+}
+
 static void omap_aes_done_task(unsigned long data)
 {
 	struct omap_aes_dev *dd = (struct omap_aes_dev *)data;
@@ -494,37 +517,44 @@ static void omap_aes_done_task(unsigned long data)
 		omap_aes_crypt_dma_stop(dd);
 	}
 
-	omap_crypto_cleanup(dd->in_sgl, NULL, 0, dd->total_save,
+	omap_crypto_cleanup(dd->in_sg, NULL, 0, dd->total_save,
 			    FLAGS_IN_DATA_ST_SHIFT, dd->flags);
 
-	omap_crypto_cleanup(&dd->out_sgl, dd->orig_out, 0, dd->total_save,
+	omap_crypto_cleanup(dd->out_sg, dd->orig_out, 0, dd->total_save,
 			    FLAGS_OUT_DATA_ST_SHIFT, dd->flags);
 
+	/* Update IV output */
+	if (dd->flags & (FLAGS_CBC | FLAGS_CTR))
+		omap_aes_copy_ivout(dd, dd->req->iv);
+
 	omap_aes_finish_req(dd, 0);
 
 	pr_debug("exit\n");
 }
 
-static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
+static int omap_aes_crypt(struct skcipher_request *req, unsigned long mode)
 {
-	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
-			crypto_ablkcipher_reqtfm(req));
-	struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct omap_aes_ctx *ctx = crypto_skcipher_ctx(
+			crypto_skcipher_reqtfm(req));
+	struct omap_aes_reqctx *rctx = skcipher_request_ctx(req);
 	struct omap_aes_dev *dd;
 	int ret;
 
-	pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes,
+	if ((req->cryptlen % AES_BLOCK_SIZE) && !(mode & FLAGS_CTR))
+		return -EINVAL;
+
+	pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->cryptlen,
 		  !!(mode & FLAGS_ENCRYPT),
 		  !!(mode & FLAGS_CBC));
 
-	if (req->nbytes < aes_fallback_sz) {
+	if (req->cryptlen < aes_fallback_sz) {
 		SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 
 		skcipher_request_set_sync_tfm(subreq, ctx->fallback);
 		skcipher_request_set_callback(subreq, req->base.flags, NULL,
 					      NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
-					   req->nbytes, req->info);
+					   req->cryptlen, req->iv);
 
 		if (mode & FLAGS_ENCRYPT)
 			ret = crypto_skcipher_encrypt(subreq);
@@ -545,10 +575,10 @@ static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 
 /* ********************** ALG API ************************************ */
 
-static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int omap_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			   unsigned int keylen)
 {
-	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct omap_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int ret;
 
 	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
@@ -571,32 +601,32 @@ static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	return 0;
 }
 
-static int omap_aes_ecb_encrypt(struct ablkcipher_request *req)
+static int omap_aes_ecb_encrypt(struct skcipher_request *req)
 {
 	return omap_aes_crypt(req, FLAGS_ENCRYPT);
 }
 
-static int omap_aes_ecb_decrypt(struct ablkcipher_request *req)
+static int omap_aes_ecb_decrypt(struct skcipher_request *req)
 {
 	return omap_aes_crypt(req, 0);
 }
 
-static int omap_aes_cbc_encrypt(struct ablkcipher_request *req)
+static int omap_aes_cbc_encrypt(struct skcipher_request *req)
 {
 	return omap_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CBC);
 }
 
-static int omap_aes_cbc_decrypt(struct ablkcipher_request *req)
+static int omap_aes_cbc_decrypt(struct skcipher_request *req)
 {
 	return omap_aes_crypt(req, FLAGS_CBC);
 }
 
-static int omap_aes_ctr_encrypt(struct ablkcipher_request *req)
+static int omap_aes_ctr_encrypt(struct skcipher_request *req)
 {
 	return omap_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CTR);
 }
 
-static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
+static int omap_aes_ctr_decrypt(struct skcipher_request *req)
 {
 	return omap_aes_crypt(req, FLAGS_CTR);
 }
@@ -606,10 +636,10 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 static int omap_aes_crypt_req(struct crypto_engine *engine,
 			      void *req);
 
-static int omap_aes_cra_init(struct crypto_tfm *tfm)
+static int omap_aes_init_tfm(struct crypto_skcipher *tfm)
 {
-	const char *name = crypto_tfm_alg_name(tfm);
-	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	const char *name = crypto_tfm_alg_name(&tfm->base);
+	struct omap_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_sync_skcipher *blk;
 
 	blk = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
@@ -618,7 +648,7 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
 
 	ctx->fallback = blk;
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct omap_aes_reqctx));
 
 	ctx->enginectx.op.prepare_request = omap_aes_prepare_req;
 	ctx->enginectx.op.unprepare_request = NULL;
@@ -627,39 +657,9 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static int omap_aes_gcm_cra_init(struct crypto_aead *tfm)
-{
-	struct omap_aes_dev *dd = NULL;
-	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
-	int err;
-
-	/* Find AES device, currently picks the first device */
-	spin_lock_bh(&list_lock);
-	list_for_each_entry(dd, &dev_list, list) {
-		break;
-	}
-	spin_unlock_bh(&list_lock);
-
-	err = pm_runtime_get_sync(dd->dev);
-	if (err < 0) {
-		dev_err(dd->dev, "%s: failed to get_sync(%d)\n",
-			__func__, err);
-		return err;
-	}
-
-	tfm->reqsize = sizeof(struct omap_aes_reqctx);
-	ctx->ctr = crypto_alloc_skcipher("ecb(aes)", 0, 0);
-	if (IS_ERR(ctx->ctr)) {
-		pr_warn("could not load aes driver for encrypting IV\n");
-		return PTR_ERR(ctx->ctr);
-	}
-
-	return 0;
-}
-
-static void omap_aes_cra_exit(struct crypto_tfm *tfm)
+static void omap_aes_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct omap_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	if (ctx->fallback)
 		crypto_free_sync_skcipher(ctx->fallback);
@@ -667,90 +667,71 @@ static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 	ctx->fallback = NULL;
 }
 
-static void omap_aes_gcm_cra_exit(struct crypto_aead *tfm)
-{
-	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
-
-	omap_aes_cra_exit(crypto_aead_tfm(tfm));
-
-	if (ctx->ctr)
-		crypto_free_skcipher(ctx->ctr);
-}
-
 /* ********************** ALGS ************************************ */
 
-static struct crypto_alg algs_ecb_cbc[] = {
+static struct skcipher_alg algs_ecb_cbc[] = {
 {
-	.cra_name		= "ecb(aes)",
-	.cra_driver_name	= "ecb-aes-omap",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct omap_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= omap_aes_cra_init,
-	.cra_exit		= omap_aes_cra_exit,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.setkey		= omap_aes_setkey,
-		.encrypt	= omap_aes_ecb_encrypt,
-		.decrypt	= omap_aes_ecb_decrypt,
-	}
+	.base.cra_name		= "ecb(aes)",
+	.base.cra_driver_name	= "ecb-aes-omap",
+	.base.cra_priority	= 300,
+	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_ASYNC |
+				  CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct omap_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= omap_aes_setkey,
+	.encrypt		= omap_aes_ecb_encrypt,
+	.decrypt		= omap_aes_ecb_decrypt,
+	.init			= omap_aes_init_tfm,
+	.exit			= omap_aes_exit_tfm,
 },
 {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "cbc-aes-omap",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct omap_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= omap_aes_cra_init,
-	.cra_exit		= omap_aes_cra_exit,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= omap_aes_setkey,
-		.encrypt	= omap_aes_cbc_encrypt,
-		.decrypt	= omap_aes_cbc_decrypt,
-	}
+	.base.cra_name		= "cbc(aes)",
+	.base.cra_driver_name	= "cbc-aes-omap",
+	.base.cra_priority	= 300,
+	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_ASYNC |
+				  CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct omap_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= omap_aes_setkey,
+	.encrypt		= omap_aes_cbc_encrypt,
+	.decrypt		= omap_aes_cbc_decrypt,
+	.init			= omap_aes_init_tfm,
+	.exit			= omap_aes_exit_tfm,
 }
 };
 
-static struct crypto_alg algs_ctr[] = {
+static struct skcipher_alg algs_ctr[] = {
 {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "ctr-aes-omap",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct omap_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= omap_aes_cra_init,
-	.cra_exit		= omap_aes_cra_exit,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= omap_aes_setkey,
-		.encrypt	= omap_aes_ctr_encrypt,
-		.decrypt	= omap_aes_ctr_decrypt,
-	}
-} ,
+	.base.cra_name		= "ctr(aes)",
+	.base.cra_driver_name	= "ctr-aes-omap",
+	.base.cra_priority	= 300,
+	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_ASYNC |
+				  CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct omap_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= omap_aes_setkey,
+	.encrypt		= omap_aes_ctr_encrypt,
+	.decrypt		= omap_aes_ctr_decrypt,
+	.init			= omap_aes_init_tfm,
+	.exit			= omap_aes_exit_tfm,
+}
 };
 
 static struct omap_aes_algs_info omap_aes_algs_info_ecb_cbc[] = {
@@ -769,15 +750,15 @@ static struct aead_alg algs_aead_gcm[] = {
 		.cra_flags		= CRYPTO_ALG_ASYNC |
 					  CRYPTO_ALG_KERN_DRIVER_ONLY,
 		.cra_blocksize		= 1,
-		.cra_ctxsize		= sizeof(struct omap_aes_ctx),
+		.cra_ctxsize		= sizeof(struct omap_aes_gcm_ctx),
 		.cra_alignmask		= 0xf,
 		.cra_module		= THIS_MODULE,
 	},
 	.init		= omap_aes_gcm_cra_init,
-	.exit		= omap_aes_gcm_cra_exit,
 	.ivsize		= GCM_AES_IV_SIZE,
 	.maxauthsize	= AES_BLOCK_SIZE,
 	.setkey		= omap_aes_gcm_setkey,
+	.setauthsize	= omap_aes_gcm_setauthsize,
 	.encrypt	= omap_aes_gcm_encrypt,
 	.decrypt	= omap_aes_gcm_decrypt,
 },
@@ -789,15 +770,15 @@ static struct aead_alg algs_aead_gcm[] = {
 		.cra_flags		= CRYPTO_ALG_ASYNC |
 					  CRYPTO_ALG_KERN_DRIVER_ONLY,
 		.cra_blocksize		= 1,
-		.cra_ctxsize		= sizeof(struct omap_aes_ctx),
+		.cra_ctxsize		= sizeof(struct omap_aes_gcm_ctx),
 		.cra_alignmask		= 0xf,
 		.cra_module		= THIS_MODULE,
 	},
 	.init		= omap_aes_gcm_cra_init,
-	.exit		= omap_aes_gcm_cra_exit,
 	.maxauthsize	= AES_BLOCK_SIZE,
 	.ivsize		= GCM_RFC4106_IV_SIZE,
 	.setkey		= omap_aes_4106gcm_setkey,
+	.setauthsize	= omap_aes_4106gcm_setauthsize,
 	.encrypt	= omap_aes_4106gcm_encrypt,
 	.decrypt	= omap_aes_4106gcm_decrypt,
 },
@@ -1121,7 +1102,7 @@ static int omap_aes_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct omap_aes_dev *dd;
-	struct crypto_alg *algp;
+	struct skcipher_alg *algp;
 	struct aead_alg *aalg;
 	struct resource res;
 	int err = -ENOMEM, i, j, irq = -1;
@@ -1215,9 +1196,9 @@ static int omap_aes_probe(struct platform_device *pdev)
 			for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
 				algp = &dd->pdata->algs_info[i].algs_list[j];
 
-				pr_debug("reg alg: %s\n", algp->cra_name);
+				pr_debug("reg alg: %s\n", algp->base.cra_name);
 
-				err = crypto_register_alg(algp);
+				err = crypto_register_skcipher(algp);
 				if (err)
 					goto err_algs;
 
@@ -1230,9 +1211,8 @@ static int omap_aes_probe(struct platform_device *pdev)
 	    !dd->pdata->aead_algs_info->registered) {
 		for (i = 0; i < dd->pdata->aead_algs_info->size; i++) {
 			aalg = &dd->pdata->aead_algs_info->algs_list[i];
-			algp = &aalg->base;
 
-			pr_debug("reg alg: %s\n", algp->cra_name);
+			pr_debug("reg alg: %s\n", aalg->base.cra_name);
 
 			err = crypto_register_aead(aalg);
 			if (err)
@@ -1257,7 +1237,7 @@ err_aead_algs:
 err_algs:
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
-			crypto_unregister_alg(
+			crypto_unregister_skcipher(
 					&dd->pdata->algs_info[i].algs_list[j]);
 
 err_engine:
@@ -1290,7 +1270,7 @@ static int omap_aes_remove(struct platform_device *pdev)
 
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
-			crypto_unregister_alg(
+			crypto_unregister_skcipher(
 					&dd->pdata->algs_info[i].algs_list[j]);
 
 	for (i = dd->pdata->aead_algs_info->size - 1; i >= 0; i--) {
@@ -1303,7 +1283,8 @@ static int omap_aes_remove(struct platform_device *pdev)
 	tasklet_kill(&dd->done_task);
 	omap_aes_dma_cleanup(dd);
 	pm_runtime_disable(dd->dev);
-	dd = NULL;
+
+	sysfs_remove_group(&dd->dev->kobj, &omap_aes_attr_group);
 
 	return 0;
 }
diff --git a/drivers/crypto/omap-aes.h b/drivers/crypto/omap-aes.h
index 2d4b1f87a1c9..2d111bf906e1 100644
--- a/drivers/crypto/omap-aes.h
+++ b/drivers/crypto/omap-aes.h
@@ -9,6 +9,7 @@
 #ifndef __OMAP_AES_H__
 #define __OMAP_AES_H__
 
+#include <crypto/aes.h>
 #include <crypto/engine.h>
 
 #define DST_MAXBURST			4
@@ -79,7 +80,6 @@
 
 #define FLAGS_INIT		BIT(5)
 #define FLAGS_FAST		BIT(6)
-#define FLAGS_BUSY		BIT(7)
 
 #define FLAGS_IN_DATA_ST_SHIFT	8
 #define FLAGS_OUT_DATA_ST_SHIFT	10
@@ -98,7 +98,11 @@ struct omap_aes_ctx {
 	u32		key[AES_KEYSIZE_256 / sizeof(u32)];
 	u8		nonce[4];
 	struct crypto_sync_skcipher	*fallback;
-	struct crypto_skcipher	*ctr;
+};
+
+struct omap_aes_gcm_ctx {
+	struct omap_aes_ctx	octx;
+	struct crypto_aes_ctx	actx;
 };
 
 struct omap_aes_reqctx {
@@ -112,7 +116,7 @@ struct omap_aes_reqctx {
 #define OMAP_AES_CACHE_SIZE	0
 
 struct omap_aes_algs_info {
-	struct crypto_alg	*algs_list;
+	struct skcipher_alg	*algs_list;
 	unsigned int		size;
 	unsigned int		registered;
 };
@@ -162,7 +166,7 @@ struct omap_aes_dev {
 	struct aead_queue	aead_queue;
 	spinlock_t		lock;
 
-	struct ablkcipher_request	*req;
+	struct skcipher_request		*req;
 	struct aead_request		*aead_req;
 	struct crypto_engine		*engine;
 
@@ -202,8 +206,12 @@ int omap_aes_4106gcm_setkey(struct crypto_aead *tfm, const u8 *key,
 			    unsigned int keylen);
 int omap_aes_gcm_encrypt(struct aead_request *req);
 int omap_aes_gcm_decrypt(struct aead_request *req);
+int omap_aes_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize);
 int omap_aes_4106gcm_encrypt(struct aead_request *req);
 int omap_aes_4106gcm_decrypt(struct aead_request *req);
+int omap_aes_4106gcm_setauthsize(struct crypto_aead *parent,
+				 unsigned int authsize);
+int omap_aes_gcm_cra_init(struct crypto_aead *tfm);
 int omap_aes_write_ctrl(struct omap_aes_dev *dd);
 int omap_aes_crypt_dma_start(struct omap_aes_dev *dd);
 int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd);
diff --git a/drivers/crypto/omap-crypto.c b/drivers/crypto/omap-crypto.c
index 7d592d93bb1c..cc88b7362bc2 100644
--- a/drivers/crypto/omap-crypto.c
+++ b/drivers/crypto/omap-crypto.c
@@ -154,6 +154,41 @@ int omap_crypto_align_sg(struct scatterlist **sg, int total, int bs,
 }
 EXPORT_SYMBOL_GPL(omap_crypto_align_sg);
 
+static void omap_crypto_copy_data(struct scatterlist *src,
+				  struct scatterlist *dst,
+				  int offset, int len)
+{
+	int amt;
+	void *srcb, *dstb;
+	int srco = 0, dsto = offset;
+
+	while (src && dst && len) {
+		if (srco >= src->length) {
+			srco -= src->length;
+			src = sg_next(src);
+			continue;
+		}
+
+		if (dsto >= dst->length) {
+			dsto -= dst->length;
+			dst = sg_next(dst);
+			continue;
+		}
+
+		amt = min(src->length - srco, dst->length - dsto);
+		amt = min(len, amt);
+
+		srcb = sg_virt(src) + srco;
+		dstb = sg_virt(dst) + dsto;
+
+		memcpy(dstb, srcb, amt);
+
+		srco += amt;
+		dsto += amt;
+		len -= amt;
+	}
+}
+
 void omap_crypto_cleanup(struct scatterlist *sg, struct scatterlist *orig,
 			 int offset, int len, u8 flags_shift,
 			 unsigned long flags)
@@ -171,7 +206,7 @@ void omap_crypto_cleanup(struct scatterlist *sg, struct scatterlist *orig,
 	pages = get_order(len);
 
 	if (orig && (flags & OMAP_CRYPTO_COPY_MASK))
-		scatterwalk_map_and_copy(buf, orig, offset, len, 1);
+		omap_crypto_copy_data(sg, orig, offset, len);
 
 	if (flags & OMAP_CRYPTO_DATA_COPIED)
 		free_pages((unsigned long)buf, pages);
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index b19d7e5d55ec..8eda43319204 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -34,6 +34,7 @@
 #include <linux/interrupt.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/algapi.h>
 #include <crypto/engine.h>
 
@@ -98,7 +99,7 @@ struct omap_des_reqctx {
 #define OMAP_DES_CACHE_SIZE	0
 
 struct omap_des_algs_info {
-	struct crypto_alg	*algs_list;
+	struct skcipher_alg	*algs_list;
 	unsigned int		size;
 	unsigned int		registered;
 };
@@ -139,7 +140,7 @@ struct omap_des_dev {
 
 	struct tasklet_struct	done_task;
 
-	struct ablkcipher_request	*req;
+	struct skcipher_request	*req;
 	struct crypto_engine		*engine;
 	/*
 	 * total is used by PIO mode for book keeping so introduce
@@ -261,8 +262,8 @@ static int omap_des_write_ctrl(struct omap_des_dev *dd)
 			       __le32_to_cpu(dd->ctx->key[i]));
 	}
 
-	if ((dd->flags & FLAGS_CBC) && dd->req->info)
-		omap_des_write_n(dd, DES_REG_IV(dd, 0), dd->req->info, 2);
+	if ((dd->flags & FLAGS_CBC) && dd->req->iv)
+		omap_des_write_n(dd, DES_REG_IV(dd, 0), (void *)dd->req->iv, 2);
 
 	if (dd->flags & FLAGS_CBC)
 		val |= DES_REG_CTRL_CBC;
@@ -456,8 +457,8 @@ static int omap_des_crypt_dma(struct crypto_tfm *tfm,
 
 static int omap_des_crypt_dma_start(struct omap_des_dev *dd)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(
-					crypto_ablkcipher_reqtfm(dd->req));
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(
+					crypto_skcipher_reqtfm(dd->req));
 	int err;
 
 	pr_debug("total: %d\n", dd->total);
@@ -491,11 +492,11 @@ static int omap_des_crypt_dma_start(struct omap_des_dev *dd)
 
 static void omap_des_finish_req(struct omap_des_dev *dd, int err)
 {
-	struct ablkcipher_request *req = dd->req;
+	struct skcipher_request *req = dd->req;
 
 	pr_debug("err: %d\n", err);
 
-	crypto_finalize_ablkcipher_request(dd->engine, req, err);
+	crypto_finalize_skcipher_request(dd->engine, req, err);
 
 	pm_runtime_mark_last_busy(dd->dev);
 	pm_runtime_put_autosuspend(dd->dev);
@@ -514,10 +515,10 @@ static int omap_des_crypt_dma_stop(struct omap_des_dev *dd)
 }
 
 static int omap_des_handle_queue(struct omap_des_dev *dd,
-				 struct ablkcipher_request *req)
+				 struct skcipher_request *req)
 {
 	if (req)
-		return crypto_transfer_ablkcipher_request_to_engine(dd->engine, req);
+		return crypto_transfer_skcipher_request_to_engine(dd->engine, req);
 
 	return 0;
 }
@@ -525,9 +526,9 @@ static int omap_des_handle_queue(struct omap_des_dev *dd,
 static int omap_des_prepare_req(struct crypto_engine *engine,
 				void *areq)
 {
-	struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
-	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(
-			crypto_ablkcipher_reqtfm(req));
+	struct skcipher_request *req = container_of(areq, struct skcipher_request, base);
+	struct omap_des_ctx *ctx = crypto_skcipher_ctx(
+			crypto_skcipher_reqtfm(req));
 	struct omap_des_dev *dd = omap_des_find_dev(ctx);
 	struct omap_des_reqctx *rctx;
 	int ret;
@@ -538,8 +539,8 @@ static int omap_des_prepare_req(struct crypto_engine *engine,
 
 	/* assign new request to device */
 	dd->req = req;
-	dd->total = req->nbytes;
-	dd->total_save = req->nbytes;
+	dd->total = req->cryptlen;
+	dd->total_save = req->cryptlen;
 	dd->in_sg = req->src;
 	dd->out_sg = req->dst;
 	dd->orig_out = req->dst;
@@ -568,8 +569,8 @@ static int omap_des_prepare_req(struct crypto_engine *engine,
 	if (dd->out_sg_len < 0)
 		return dd->out_sg_len;
 
-	rctx = ablkcipher_request_ctx(req);
-	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+	rctx = skcipher_request_ctx(req);
+	ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	rctx->mode &= FLAGS_MODE_MASK;
 	dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode;
 
@@ -582,9 +583,9 @@ static int omap_des_prepare_req(struct crypto_engine *engine,
 static int omap_des_crypt_req(struct crypto_engine *engine,
 			      void *areq)
 {
-	struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
-	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(
-			crypto_ablkcipher_reqtfm(req));
+	struct skcipher_request *req = container_of(areq, struct skcipher_request, base);
+	struct omap_des_ctx *ctx = crypto_skcipher_ctx(
+			crypto_skcipher_reqtfm(req));
 	struct omap_des_dev *dd = omap_des_find_dev(ctx);
 
 	if (!dd)
@@ -596,6 +597,7 @@ static int omap_des_crypt_req(struct crypto_engine *engine,
 static void omap_des_done_task(unsigned long data)
 {
 	struct omap_des_dev *dd = (struct omap_des_dev *)data;
+	int i;
 
 	pr_debug("enter done_task\n");
 
@@ -614,26 +616,32 @@ static void omap_des_done_task(unsigned long data)
 	omap_crypto_cleanup(&dd->out_sgl, dd->orig_out, 0, dd->total_save,
 			    FLAGS_OUT_DATA_ST_SHIFT, dd->flags);
 
+	if ((dd->flags & FLAGS_CBC) && dd->req->iv)
+		for (i = 0; i < 2; i++)
+			((u32 *)dd->req->iv)[i] =
+				omap_des_read(dd, DES_REG_IV(dd, i));
+
 	omap_des_finish_req(dd, 0);
 
 	pr_debug("exit\n");
 }
 
-static int omap_des_crypt(struct ablkcipher_request *req, unsigned long mode)
+static int omap_des_crypt(struct skcipher_request *req, unsigned long mode)
 {
-	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(
-			crypto_ablkcipher_reqtfm(req));
-	struct omap_des_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct omap_des_ctx *ctx = crypto_skcipher_ctx(
+			crypto_skcipher_reqtfm(req));
+	struct omap_des_reqctx *rctx = skcipher_request_ctx(req);
 	struct omap_des_dev *dd;
 
-	pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes,
+	pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->cryptlen,
 		 !!(mode & FLAGS_ENCRYPT),
 		 !!(mode & FLAGS_CBC));
 
-	if (!IS_ALIGNED(req->nbytes, DES_BLOCK_SIZE)) {
-		pr_err("request size is not exact amount of DES blocks\n");
+	if (!req->cryptlen)
+		return 0;
+
+	if (!IS_ALIGNED(req->cryptlen, DES_BLOCK_SIZE))
 		return -EINVAL;
-	}
 
 	dd = omap_des_find_dev(ctx);
 	if (!dd)
@@ -646,15 +654,15 @@ static int omap_des_crypt(struct ablkcipher_request *req, unsigned long mode)
 
 /* ********************** ALG API ************************************ */
 
-static int omap_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int omap_des_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			   unsigned int keylen)
 {
-	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct omap_des_ctx *ctx = crypto_skcipher_ctx(cipher);
 	int err;
 
 	pr_debug("enter, keylen: %d\n", keylen);
 
-	err = verify_ablkcipher_des_key(cipher, key);
+	err = verify_skcipher_des_key(cipher, key);
 	if (err)
 		return err;
 
@@ -664,15 +672,15 @@ static int omap_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int omap_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int omap_des3_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			    unsigned int keylen)
 {
-	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct omap_des_ctx *ctx = crypto_skcipher_ctx(cipher);
 	int err;
 
 	pr_debug("enter, keylen: %d\n", keylen);
 
-	err = verify_ablkcipher_des3_key(cipher, key);
+	err = verify_skcipher_des3_key(cipher, key);
 	if (err)
 		return err;
 
@@ -682,22 +690,22 @@ static int omap_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static int omap_des_ecb_encrypt(struct ablkcipher_request *req)
+static int omap_des_ecb_encrypt(struct skcipher_request *req)
 {
 	return omap_des_crypt(req, FLAGS_ENCRYPT);
 }
 
-static int omap_des_ecb_decrypt(struct ablkcipher_request *req)
+static int omap_des_ecb_decrypt(struct skcipher_request *req)
 {
 	return omap_des_crypt(req, 0);
 }
 
-static int omap_des_cbc_encrypt(struct ablkcipher_request *req)
+static int omap_des_cbc_encrypt(struct skcipher_request *req)
 {
 	return omap_des_crypt(req, FLAGS_ENCRYPT | FLAGS_CBC);
 }
 
-static int omap_des_cbc_decrypt(struct ablkcipher_request *req)
+static int omap_des_cbc_decrypt(struct skcipher_request *req)
 {
 	return omap_des_crypt(req, FLAGS_CBC);
 }
@@ -707,13 +715,13 @@ static int omap_des_prepare_req(struct crypto_engine *engine,
 static int omap_des_crypt_req(struct crypto_engine *engine,
 			      void *areq);
 
-static int omap_des_cra_init(struct crypto_tfm *tfm)
+static int omap_des_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct omap_des_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct omap_des_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	pr_debug("enter\n");
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct omap_des_reqctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct omap_des_reqctx));
 
 	ctx->enginectx.op.prepare_request = omap_des_prepare_req;
 	ctx->enginectx.op.unprepare_request = NULL;
@@ -722,103 +730,78 @@ static int omap_des_cra_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static void omap_des_cra_exit(struct crypto_tfm *tfm)
-{
-	pr_debug("enter\n");
-}
-
 /* ********************** ALGS ************************************ */
 
-static struct crypto_alg algs_ecb_cbc[] = {
+static struct skcipher_alg algs_ecb_cbc[] = {
 {
-	.cra_name		= "ecb(des)",
-	.cra_driver_name	= "ecb-des-omap",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+	.base.cra_name		= "ecb(des)",
+	.base.cra_driver_name	= "ecb-des-omap",
+	.base.cra_priority	= 100,
+	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct omap_des_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= omap_des_cra_init,
-	.cra_exit		= omap_des_cra_exit,
-	.cra_u.ablkcipher = {
-		.min_keysize	= DES_KEY_SIZE,
-		.max_keysize	= DES_KEY_SIZE,
-		.setkey		= omap_des_setkey,
-		.encrypt	= omap_des_ecb_encrypt,
-		.decrypt	= omap_des_ecb_decrypt,
-	}
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= DES_KEY_SIZE,
+	.max_keysize		= DES_KEY_SIZE,
+	.setkey			= omap_des_setkey,
+	.encrypt		= omap_des_ecb_encrypt,
+	.decrypt		= omap_des_ecb_decrypt,
+	.init			= omap_des_init_tfm,
 },
 {
-	.cra_name		= "cbc(des)",
-	.cra_driver_name	= "cbc-des-omap",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+	.base.cra_name		= "cbc(des)",
+	.base.cra_driver_name	= "cbc-des-omap",
+	.base.cra_priority	= 100,
+	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct omap_des_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= omap_des_cra_init,
-	.cra_exit		= omap_des_cra_exit,
-	.cra_u.ablkcipher = {
-		.min_keysize	= DES_KEY_SIZE,
-		.max_keysize	= DES_KEY_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= omap_des_setkey,
-		.encrypt	= omap_des_cbc_encrypt,
-		.decrypt	= omap_des_cbc_decrypt,
-	}
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= DES_KEY_SIZE,
+	.max_keysize		= DES_KEY_SIZE,
+	.ivsize			= DES_BLOCK_SIZE,
+	.setkey			= omap_des_setkey,
+	.encrypt		= omap_des_cbc_encrypt,
+	.decrypt		= omap_des_cbc_decrypt,
+	.init			= omap_des_init_tfm,
 },
 {
-	.cra_name		= "ecb(des3_ede)",
-	.cra_driver_name	= "ecb-des3-omap",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+	.base.cra_name		= "ecb(des3_ede)",
+	.base.cra_driver_name	= "ecb-des3-omap",
+	.base.cra_priority	= 100,
+	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct omap_des_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= omap_des_cra_init,
-	.cra_exit		= omap_des_cra_exit,
-	.cra_u.ablkcipher = {
-		.min_keysize	= 3*DES_KEY_SIZE,
-		.max_keysize	= 3*DES_KEY_SIZE,
-		.setkey		= omap_des3_setkey,
-		.encrypt	= omap_des_ecb_encrypt,
-		.decrypt	= omap_des_ecb_decrypt,
-	}
+	.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= DES3_EDE_KEY_SIZE,
+	.max_keysize		= DES3_EDE_KEY_SIZE,
+	.setkey			= omap_des3_setkey,
+	.encrypt		= omap_des_ecb_encrypt,
+	.decrypt		= omap_des_ecb_decrypt,
+	.init			= omap_des_init_tfm,
 },
 {
-	.cra_name		= "cbc(des3_ede)",
-	.cra_driver_name	= "cbc-des3-omap",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+	.base.cra_name		= "cbc(des3_ede)",
+	.base.cra_driver_name	= "cbc-des3-omap",
+	.base.cra_priority	= 100,
+	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct omap_des_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= omap_des_cra_init,
-	.cra_exit		= omap_des_cra_exit,
-	.cra_u.ablkcipher = {
-		.min_keysize	= 3*DES_KEY_SIZE,
-		.max_keysize	= 3*DES_KEY_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= omap_des3_setkey,
-		.encrypt	= omap_des_cbc_encrypt,
-		.decrypt	= omap_des_cbc_decrypt,
-	}
+	.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= DES3_EDE_KEY_SIZE,
+	.max_keysize		= DES3_EDE_KEY_SIZE,
+	.ivsize			= DES3_EDE_BLOCK_SIZE,
+	.setkey			= omap_des3_setkey,
+	.encrypt		= omap_des_cbc_encrypt,
+	.decrypt		= omap_des_cbc_decrypt,
+	.init			= omap_des_init_tfm,
 }
 };
 
@@ -976,7 +959,7 @@ static int omap_des_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct omap_des_dev *dd;
-	struct crypto_alg *algp;
+	struct skcipher_alg *algp;
 	struct resource *res;
 	int err = -ENOMEM, i, j, irq = -1;
 	u32 reg;
@@ -1071,9 +1054,9 @@ static int omap_des_probe(struct platform_device *pdev)
 		for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
 			algp = &dd->pdata->algs_info[i].algs_list[j];
 
-			pr_debug("reg alg: %s\n", algp->cra_name);
+			pr_debug("reg alg: %s\n", algp->base.cra_name);
 
-			err = crypto_register_alg(algp);
+			err = crypto_register_skcipher(algp);
 			if (err)
 				goto err_algs;
 
@@ -1086,7 +1069,7 @@ static int omap_des_probe(struct platform_device *pdev)
 err_algs:
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
-			crypto_unregister_alg(
+			crypto_unregister_skcipher(
 					&dd->pdata->algs_info[i].algs_list[j]);
 
 err_engine:
@@ -1119,7 +1102,7 @@ static int omap_des_remove(struct platform_device *pdev)
 
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
-			crypto_unregister_alg(
+			crypto_unregister_skcipher(
 					&dd->pdata->algs_info[i].algs_list[j]);
 
 	tasklet_kill(&dd->done_task);
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index ac80bc6af093..4f915a4ef5b0 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -112,6 +112,8 @@
 #define FLAGS_BE32_SHA1		8
 #define FLAGS_SGS_COPIED	9
 #define FLAGS_SGS_ALLOCED	10
+#define FLAGS_HUGE		11
+
 /* context flags */
 #define FLAGS_FINUP		16
 
@@ -136,6 +138,8 @@
 #define BUFLEN			SHA512_BLOCK_SIZE
 #define OMAP_SHA_DMA_THRESHOLD	256
 
+#define OMAP_SHA_MAX_DMA_LEN	(1024 * 2048)
+
 struct omap_sham_dev;
 
 struct omap_sham_reqctx {
@@ -644,6 +648,8 @@ static int omap_sham_copy_sg_lists(struct omap_sham_reqctx *ctx,
 	struct scatterlist *tmp;
 	int offset = ctx->offset;
 
+	ctx->total = new_len;
+
 	if (ctx->bufcnt)
 		n++;
 
@@ -661,15 +667,16 @@ static int omap_sham_copy_sg_lists(struct omap_sham_reqctx *ctx,
 		sg_set_buf(tmp, ctx->dd->xmit_buf, ctx->bufcnt);
 		tmp = sg_next(tmp);
 		ctx->sg_len++;
+		new_len -= ctx->bufcnt;
 	}
 
 	while (sg && new_len) {
 		int len = sg->length - offset;
 
-		if (offset) {
+		if (len <= 0) {
 			offset -= sg->length;
-			if (offset < 0)
-				offset = 0;
+			sg = sg_next(sg);
+			continue;
 		}
 
 		if (new_len < len)
@@ -677,33 +684,37 @@ static int omap_sham_copy_sg_lists(struct omap_sham_reqctx *ctx,
 
 		if (len > 0) {
 			new_len -= len;
-			sg_set_page(tmp, sg_page(sg), len, sg->offset);
+			sg_set_page(tmp, sg_page(sg), len, sg->offset + offset);
+			offset = 0;
+			ctx->offset = 0;
+			ctx->sg_len++;
 			if (new_len <= 0)
-				sg_mark_end(tmp);
+				break;
 			tmp = sg_next(tmp);
-			ctx->sg_len++;
 		}
 
 		sg = sg_next(sg);
 	}
 
+	if (tmp)
+		sg_mark_end(tmp);
+
 	set_bit(FLAGS_SGS_ALLOCED, &ctx->dd->flags);
 
+	ctx->offset += new_len - ctx->bufcnt;
 	ctx->bufcnt = 0;
 
 	return 0;
 }
 
 static int omap_sham_copy_sgs(struct omap_sham_reqctx *ctx,
-			      struct scatterlist *sg, int bs, int new_len)
+			      struct scatterlist *sg, int bs,
+			      unsigned int new_len)
 {
 	int pages;
 	void *buf;
-	int len;
-
-	len = new_len + ctx->bufcnt;
 
-	pages = get_order(ctx->total);
+	pages = get_order(new_len);
 
 	buf = (void *)__get_free_pages(GFP_ATOMIC, pages);
 	if (!buf) {
@@ -715,14 +726,15 @@ static int omap_sham_copy_sgs(struct omap_sham_reqctx *ctx,
 		memcpy(buf, ctx->dd->xmit_buf, ctx->bufcnt);
 
 	scatterwalk_map_and_copy(buf + ctx->bufcnt, sg, ctx->offset,
-				 ctx->total - ctx->bufcnt, 0);
+				 min(new_len, ctx->total) - ctx->bufcnt, 0);
 	sg_init_table(ctx->sgl, 1);
-	sg_set_buf(ctx->sgl, buf, len);
+	sg_set_buf(ctx->sgl, buf, new_len);
 	ctx->sg = ctx->sgl;
 	set_bit(FLAGS_SGS_COPIED, &ctx->dd->flags);
 	ctx->sg_len = 1;
+	ctx->offset += new_len - ctx->bufcnt;
 	ctx->bufcnt = 0;
-	ctx->offset = 0;
+	ctx->total = new_len;
 
 	return 0;
 }
@@ -737,6 +749,7 @@ static int omap_sham_align_sgs(struct scatterlist *sg,
 	struct scatterlist *sg_tmp = sg;
 	int new_len;
 	int offset = rctx->offset;
+	int bufcnt = rctx->bufcnt;
 
 	if (!sg || !sg->length || !nbytes)
 		return 0;
@@ -751,12 +764,28 @@ static int omap_sham_align_sgs(struct scatterlist *sg,
 	else
 		new_len = (new_len - 1) / bs * bs;
 
+	if (!new_len)
+		return 0;
+
 	if (nbytes != new_len)
 		list_ok = false;
 
 	while (nbytes > 0 && sg_tmp) {
 		n++;
 
+		if (bufcnt) {
+			if (!IS_ALIGNED(bufcnt, bs)) {
+				aligned = false;
+				break;
+			}
+			nbytes -= bufcnt;
+			bufcnt = 0;
+			if (!nbytes)
+				list_ok = false;
+
+			continue;
+		}
+
 #ifdef CONFIG_ZONE_DMA
 		if (page_zonenum(sg_page(sg_tmp)) != ZONE_DMA) {
 			aligned = false;
@@ -794,13 +823,27 @@ static int omap_sham_align_sgs(struct scatterlist *sg,
 		}
 	}
 
+	if (new_len > OMAP_SHA_MAX_DMA_LEN) {
+		new_len = OMAP_SHA_MAX_DMA_LEN;
+		aligned = false;
+	}
+
 	if (!aligned)
 		return omap_sham_copy_sgs(rctx, sg, bs, new_len);
 	else if (!list_ok)
 		return omap_sham_copy_sg_lists(rctx, sg, bs, new_len);
 
+	rctx->total = new_len;
+	rctx->offset += new_len;
 	rctx->sg_len = n;
-	rctx->sg = sg;
+	if (rctx->bufcnt) {
+		sg_init_table(rctx->sgl, 2);
+		sg_set_buf(rctx->sgl, rctx->dd->xmit_buf, rctx->bufcnt);
+		sg_chain(rctx->sgl, 2, sg);
+		rctx->sg = rctx->sgl;
+	} else {
+		rctx->sg = sg;
+	}
 
 	return 0;
 }
@@ -810,31 +853,35 @@ static int omap_sham_prepare_request(struct ahash_request *req, bool update)
 	struct omap_sham_reqctx *rctx = ahash_request_ctx(req);
 	int bs;
 	int ret;
-	int nbytes;
+	unsigned int nbytes;
 	bool final = rctx->flags & BIT(FLAGS_FINUP);
-	int xmit_len, hash_later;
+	int hash_later;
 
 	bs = get_block_size(rctx);
 
+	nbytes = rctx->bufcnt;
+
 	if (update)
-		nbytes = req->nbytes;
-	else
-		nbytes = 0;
+		nbytes += req->nbytes - rctx->offset;
 
-	rctx->total = nbytes + rctx->bufcnt;
+	dev_dbg(rctx->dd->dev,
+		"%s: nbytes=%d, bs=%d, total=%d, offset=%d, bufcnt=%d\n",
+		__func__, nbytes, bs, rctx->total, rctx->offset,
+		rctx->bufcnt);
 
-	if (!rctx->total)
+	if (!nbytes)
 		return 0;
 
-	if (nbytes && (!IS_ALIGNED(rctx->bufcnt, bs))) {
+	rctx->total = nbytes;
+
+	if (update && req->nbytes && (!IS_ALIGNED(rctx->bufcnt, bs))) {
 		int len = bs - rctx->bufcnt % bs;
 
-		if (len > nbytes)
-			len = nbytes;
+		if (len > req->nbytes)
+			len = req->nbytes;
 		scatterwalk_map_and_copy(rctx->buffer + rctx->bufcnt, req->src,
 					 0, len, 0);
 		rctx->bufcnt += len;
-		nbytes -= len;
 		rctx->offset = len;
 	}
 
@@ -845,64 +892,25 @@ static int omap_sham_prepare_request(struct ahash_request *req, bool update)
 	if (ret)
 		return ret;
 
-	xmit_len = rctx->total;
-
-	if (!IS_ALIGNED(xmit_len, bs)) {
-		if (final)
-			xmit_len = DIV_ROUND_UP(xmit_len, bs) * bs;
-		else
-			xmit_len = xmit_len / bs * bs;
-	} else if (!final) {
-		xmit_len -= bs;
-	}
-
-	hash_later = rctx->total - xmit_len;
+	hash_later = nbytes - rctx->total;
 	if (hash_later < 0)
 		hash_later = 0;
 
-	if (rctx->bufcnt && nbytes) {
-		/* have data from previous operation and current */
-		sg_init_table(rctx->sgl, 2);
-		sg_set_buf(rctx->sgl, rctx->dd->xmit_buf, rctx->bufcnt);
-
-		sg_chain(rctx->sgl, 2, req->src);
-
-		rctx->sg = rctx->sgl;
-
-		rctx->sg_len++;
-	} else if (rctx->bufcnt) {
-		/* have buffered data only */
-		sg_init_table(rctx->sgl, 1);
-		sg_set_buf(rctx->sgl, rctx->dd->xmit_buf, xmit_len);
-
-		rctx->sg = rctx->sgl;
-
-		rctx->sg_len = 1;
-	}
-
 	if (hash_later) {
-		int offset = 0;
-
-		if (hash_later > req->nbytes) {
-			memcpy(rctx->buffer, rctx->buffer + xmit_len,
-			       hash_later - req->nbytes);
-			offset = hash_later - req->nbytes;
-		}
-
-		if (req->nbytes) {
-			scatterwalk_map_and_copy(rctx->buffer + offset,
-						 req->src,
-						 offset + req->nbytes -
-						 hash_later, hash_later, 0);
-		}
+		scatterwalk_map_and_copy(rctx->buffer,
+					 req->src,
+					 req->nbytes - hash_later,
+					 hash_later, 0);
 
 		rctx->bufcnt = hash_later;
 	} else {
 		rctx->bufcnt = 0;
 	}
 
-	if (!final)
-		rctx->total = xmit_len;
+	if (hash_later > rctx->buflen)
+		set_bit(FLAGS_HUGE, &rctx->dd->flags);
+
+	rctx->total = min(nbytes, rctx->total);
 
 	return 0;
 }
@@ -998,10 +1006,11 @@ static int omap_sham_update_req(struct omap_sham_dev *dd)
 	struct ahash_request *req = dd->req;
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
 	int err;
-	bool final = ctx->flags & BIT(FLAGS_FINUP);
+	bool final = (ctx->flags & BIT(FLAGS_FINUP)) &&
+			!(dd->flags & BIT(FLAGS_HUGE));
 
-	dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
-		 ctx->total, ctx->digcnt, (ctx->flags & BIT(FLAGS_FINUP)) != 0);
+	dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, final: %d",
+		ctx->total, ctx->digcnt, final);
 
 	if (ctx->total < get_block_size(ctx) ||
 	    ctx->total < dd->fallback_sz)
@@ -1024,6 +1033,9 @@ static int omap_sham_final_req(struct omap_sham_dev *dd)
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
 	int err = 0, use_dma = 1;
 
+	if (dd->flags & BIT(FLAGS_HUGE))
+		return 0;
+
 	if ((ctx->total <= get_block_size(ctx)) || dd->polling_mode)
 		/*
 		 * faster to handle last block with cpu or
@@ -1083,7 +1095,7 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
 
 	if (test_bit(FLAGS_SGS_COPIED, &dd->flags))
 		free_pages((unsigned long)sg_virt(ctx->sg),
-			   get_order(ctx->sg->length + ctx->bufcnt));
+			   get_order(ctx->sg->length));
 
 	if (test_bit(FLAGS_SGS_ALLOCED, &dd->flags))
 		kfree(ctx->sg);
@@ -1092,6 +1104,21 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
 
 	dd->flags &= ~(BIT(FLAGS_SGS_ALLOCED) | BIT(FLAGS_SGS_COPIED));
 
+	if (dd->flags & BIT(FLAGS_HUGE)) {
+		dd->flags &= ~(BIT(FLAGS_CPU) | BIT(FLAGS_DMA_READY) |
+				BIT(FLAGS_OUTPUT_READY) | BIT(FLAGS_HUGE));
+		omap_sham_prepare_request(req, ctx->op == OP_UPDATE);
+		if (ctx->op == OP_UPDATE || (dd->flags & BIT(FLAGS_HUGE))) {
+			err = omap_sham_update_req(dd);
+			if (err != -EINPROGRESS &&
+			    (ctx->flags & BIT(FLAGS_FINUP)))
+				err = omap_sham_final_req(dd);
+		} else if (ctx->op == OP_FINAL) {
+			omap_sham_final_req(dd);
+		}
+		return;
+	}
+
 	if (!err) {
 		dd->pdata->copy_hash(req, 1);
 		if (test_bit(FLAGS_FINAL, &dd->flags))
@@ -1107,6 +1134,8 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
 	pm_runtime_mark_last_busy(dd->dev);
 	pm_runtime_put_autosuspend(dd->dev);
 
+	ctx->offset = 0;
+
 	if (req->base.complete)
 		req->base.complete(&req->base, err);
 }
@@ -1158,7 +1187,7 @@ retry:
 		/* request has changed - restore hash */
 		dd->pdata->copy_hash(req, 0);
 
-	if (ctx->op == OP_UPDATE) {
+	if (ctx->op == OP_UPDATE || (dd->flags & BIT(FLAGS_HUGE))) {
 		err = omap_sham_update_req(dd);
 		if (err != -EINPROGRESS && (ctx->flags & BIT(FLAGS_FINUP)))
 			/* no final() after finup() */
@@ -1730,6 +1759,8 @@ static void omap_sham_done_task(unsigned long data)
 	struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
 	int err = 0;
 
+	dev_dbg(dd->dev, "%s: flags=%lx\n", __func__, dd->flags);
+
 	if (!test_bit(FLAGS_BUSY, &dd->flags)) {
 		omap_sham_handle_queue(dd, NULL);
 		return;
@@ -2223,6 +2254,8 @@ static int omap_sham_remove(struct platform_device *pdev)
 	if (!dd->polling_mode)
 		dma_release_channel(dd->dma_lch);
 
+	sysfs_remove_group(&dd->dev->kobj, &omap_sham_attr_group);
+
 	return 0;
 }
 
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 8a0661250078..594d6b1695d5 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -10,6 +10,7 @@
 
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/padlock.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -97,9 +98,9 @@ static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm)
 	return aes_ctx_common(crypto_tfm_ctx(tfm));
 }
 
-static inline struct aes_ctx *blk_aes_ctx(struct crypto_blkcipher *tfm)
+static inline struct aes_ctx *skcipher_aes_ctx(struct crypto_skcipher *tfm)
 {
-	return aes_ctx_common(crypto_blkcipher_ctx(tfm));
+	return aes_ctx_common(crypto_skcipher_ctx(tfm));
 }
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
@@ -107,14 +108,11 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 {
 	struct aes_ctx *ctx = aes_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
-	u32 *flags = &tfm->crt_flags;
 	struct crypto_aes_ctx gen_aes;
 	int cpu;
 
-	if (key_len % 8) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	if (key_len % 8)
 		return -EINVAL;
-	}
 
 	/*
 	 * If the hardware is capable of generating the extended key
@@ -145,10 +143,8 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	ctx->cword.encrypt.keygen = 1;
 	ctx->cword.decrypt.keygen = 1;
 
-	if (aes_expandkey(&gen_aes, in_key, key_len)) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	if (aes_expandkey(&gen_aes, in_key, key_len))
 		return -EINVAL;
-	}
 
 	memcpy(ctx->E, gen_aes.key_enc, AES_MAX_KEYLENGTH);
 	memcpy(ctx->D, gen_aes.key_dec, AES_MAX_KEYLENGTH);
@@ -162,6 +158,12 @@ ok:
 	return 0;
 }
 
+static int aes_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *in_key,
+				unsigned int key_len)
+{
+	return aes_set_key(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
 /* ====== Encryption/decryption routines ====== */
 
 /* These are the real call to PadLock. */
@@ -338,25 +340,24 @@ static struct crypto_alg aes_alg = {
 	}
 };
 
-static int ecb_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_aes_encrypt(struct skcipher_request *req)
 {
-	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aes_ctx *ctx = skcipher_aes_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
 	padlock_reset_key(&ctx->cword.encrypt);
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes) != 0) {
 		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
 				   ctx->E, &ctx->cword.encrypt,
 				   nbytes / AES_BLOCK_SIZE);
 		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	padlock_store_cword(&ctx->cword.encrypt);
@@ -364,25 +365,24 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
 	return err;
 }
 
-static int ecb_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_aes_decrypt(struct skcipher_request *req)
 {
-	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aes_ctx *ctx = skcipher_aes_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
 	padlock_reset_key(&ctx->cword.decrypt);
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes) != 0) {
 		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
 				   ctx->D, &ctx->cword.decrypt,
 				   nbytes / AES_BLOCK_SIZE);
 		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	padlock_store_cword(&ctx->cword.encrypt);
@@ -390,48 +390,41 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
 	return err;
 }
 
-static struct crypto_alg ecb_aes_alg = {
-	.cra_name		=	"ecb(aes)",
-	.cra_driver_name	=	"ecb-aes-padlock",
-	.cra_priority		=	PADLOCK_COMPOSITE_PRIORITY,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct aes_ctx),
-	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.setkey	   		= 	aes_set_key,
-			.encrypt		=	ecb_aes_encrypt,
-			.decrypt		=	ecb_aes_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_aes_alg = {
+	.base.cra_name		=	"ecb(aes)",
+	.base.cra_driver_name	=	"ecb-aes-padlock",
+	.base.cra_priority	=	PADLOCK_COMPOSITE_PRIORITY,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct aes_ctx),
+	.base.cra_alignmask	=	PADLOCK_ALIGNMENT - 1,
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.setkey			=	aes_set_key_skcipher,
+	.encrypt		=	ecb_aes_encrypt,
+	.decrypt		=	ecb_aes_decrypt,
 };
 
-static int cbc_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_aes_encrypt(struct skcipher_request *req)
 {
-	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aes_ctx *ctx = skcipher_aes_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
 	padlock_reset_key(&ctx->cword.encrypt);
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes) != 0) {
 		u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr,
 					    walk.dst.virt.addr, ctx->E,
 					    walk.iv, &ctx->cword.encrypt,
 					    nbytes / AES_BLOCK_SIZE);
 		memcpy(walk.iv, iv, AES_BLOCK_SIZE);
 		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	padlock_store_cword(&ctx->cword.decrypt);
@@ -439,25 +432,24 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
 	return err;
 }
 
-static int cbc_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_aes_decrypt(struct skcipher_request *req)
 {
-	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aes_ctx *ctx = skcipher_aes_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
 	padlock_reset_key(&ctx->cword.encrypt);
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes) != 0) {
 		padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr,
 				   ctx->D, walk.iv, &ctx->cword.decrypt,
 				   nbytes / AES_BLOCK_SIZE);
 		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	padlock_store_cword(&ctx->cword.encrypt);
@@ -465,26 +457,20 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
 	return err;
 }
 
-static struct crypto_alg cbc_aes_alg = {
-	.cra_name		=	"cbc(aes)",
-	.cra_driver_name	=	"cbc-aes-padlock",
-	.cra_priority		=	PADLOCK_COMPOSITE_PRIORITY,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct aes_ctx),
-	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey	   		= 	aes_set_key,
-			.encrypt		=	cbc_aes_encrypt,
-			.decrypt		=	cbc_aes_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_aes_alg = {
+	.base.cra_name		=	"cbc(aes)",
+	.base.cra_driver_name	=	"cbc-aes-padlock",
+	.base.cra_priority	=	PADLOCK_COMPOSITE_PRIORITY,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct aes_ctx),
+	.base.cra_alignmask	=	PADLOCK_ALIGNMENT - 1,
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	aes_set_key_skcipher,
+	.encrypt		=	cbc_aes_encrypt,
+	.decrypt		=	cbc_aes_decrypt,
 };
 
 static const struct x86_cpu_id padlock_cpu_id[] = {
@@ -506,13 +492,13 @@ static int __init padlock_init(void)
 		return -ENODEV;
 	}
 
-	if ((ret = crypto_register_alg(&aes_alg)))
+	if ((ret = crypto_register_alg(&aes_alg)) != 0)
 		goto aes_err;
 
-	if ((ret = crypto_register_alg(&ecb_aes_alg)))
+	if ((ret = crypto_register_skcipher(&ecb_aes_alg)) != 0)
 		goto ecb_aes_err;
 
-	if ((ret = crypto_register_alg(&cbc_aes_alg)))
+	if ((ret = crypto_register_skcipher(&cbc_aes_alg)) != 0)
 		goto cbc_aes_err;
 
 	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
@@ -527,7 +513,7 @@ out:
 	return ret;
 
 cbc_aes_err:
-	crypto_unregister_alg(&ecb_aes_alg);
+	crypto_unregister_skcipher(&ecb_aes_alg);
 ecb_aes_err:
 	crypto_unregister_alg(&aes_alg);
 aes_err:
@@ -537,8 +523,8 @@ aes_err:
 
 static void __exit padlock_fini(void)
 {
-	crypto_unregister_alg(&cbc_aes_alg);
-	crypto_unregister_alg(&ecb_aes_alg);
+	crypto_unregister_skcipher(&cbc_aes_alg);
+	crypto_unregister_skcipher(&ecb_aes_alg);
 	crypto_unregister_alg(&aes_alg);
 }
 
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index ddf1b549fdca..c826abe79e79 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -190,13 +190,11 @@ static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
 	return padlock_sha256_finup(desc, buf, 0, out);
 }
 
-static int padlock_cra_init(struct crypto_tfm *tfm)
+static int padlock_init_tfm(struct crypto_shash *hash)
 {
-	struct crypto_shash *hash = __crypto_shash_cast(tfm);
-	const char *fallback_driver_name = crypto_tfm_alg_name(tfm);
-	struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+	const char *fallback_driver_name = crypto_shash_alg_name(hash);
+	struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
 	struct crypto_shash *fallback_tfm;
-	int err = -ENOMEM;
 
 	/* Allocate a fallback and abort if it failed. */
 	fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
@@ -204,21 +202,17 @@ static int padlock_cra_init(struct crypto_tfm *tfm)
 	if (IS_ERR(fallback_tfm)) {
 		printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
 		       fallback_driver_name);
-		err = PTR_ERR(fallback_tfm);
-		goto out;
+		return PTR_ERR(fallback_tfm);
 	}
 
 	ctx->fallback = fallback_tfm;
 	hash->descsize += crypto_shash_descsize(fallback_tfm);
 	return 0;
-
-out:
-	return err;
 }
 
-static void padlock_cra_exit(struct crypto_tfm *tfm)
+static void padlock_exit_tfm(struct crypto_shash *hash)
 {
-	struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
 
 	crypto_free_shash(ctx->fallback);
 }
@@ -231,6 +225,8 @@ static struct shash_alg sha1_alg = {
 	.final  	=	padlock_sha1_final,
 	.export		=	padlock_sha_export,
 	.import		=	padlock_sha_import,
+	.init_tfm	=	padlock_init_tfm,
+	.exit_tfm	=	padlock_exit_tfm,
 	.descsize	=	sizeof(struct padlock_sha_desc),
 	.statesize	=	sizeof(struct sha1_state),
 	.base		=	{
@@ -241,8 +237,6 @@ static struct shash_alg sha1_alg = {
 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
 		.cra_module		=	THIS_MODULE,
-		.cra_init		=	padlock_cra_init,
-		.cra_exit		=	padlock_cra_exit,
 	}
 };
 
@@ -254,6 +248,8 @@ static struct shash_alg sha256_alg = {
 	.final  	=	padlock_sha256_final,
 	.export		=	padlock_sha_export,
 	.import		=	padlock_sha_import,
+	.init_tfm	=	padlock_init_tfm,
+	.exit_tfm	=	padlock_exit_tfm,
 	.descsize	=	sizeof(struct padlock_sha_desc),
 	.statesize	=	sizeof(struct sha256_state),
 	.base		=	{
@@ -264,8 +260,6 @@ static struct shash_alg sha256_alg = {
 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
 		.cra_module		=	THIS_MODULE,
-		.cra_init		=	padlock_cra_init,
-		.cra_exit		=	padlock_cra_exit,
 	}
 };
 
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index 3cbefb41b099..7384e91c8b32 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -134,7 +134,7 @@ struct spacc_engine {
 struct spacc_alg {
 	unsigned long			ctrl_default;
 	unsigned long			type;
-	struct crypto_alg		alg;
+	struct skcipher_alg		alg;
 	struct spacc_engine		*engine;
 	struct list_head		entry;
 	int				key_offs;
@@ -173,7 +173,7 @@ struct spacc_aead_ctx {
 
 static int spacc_ablk_submit(struct spacc_req *req);
 
-static inline struct spacc_alg *to_spacc_alg(struct crypto_alg *alg)
+static inline struct spacc_alg *to_spacc_skcipher(struct skcipher_alg *alg)
 {
 	return alg ? container_of(alg, struct spacc_alg, alg) : NULL;
 }
@@ -465,9 +465,6 @@ static int spacc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 	crypto_aead_set_flags(ctx->sw_cipher, crypto_aead_get_flags(tfm) &
 					      CRYPTO_TFM_REQ_MASK);
 	err = crypto_aead_setkey(ctx->sw_cipher, key, keylen);
-	crypto_aead_clear_flags(tfm, CRYPTO_TFM_RES_MASK);
-	crypto_aead_set_flags(tfm, crypto_aead_get_flags(ctx->sw_cipher) &
-				   CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
 
@@ -490,7 +487,6 @@ static int spacc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 	return 0;
 
 badkey:
-	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
@@ -733,13 +729,13 @@ static void spacc_aead_cra_exit(struct crypto_aead *tfm)
  * Set the DES key for a block cipher transform. This also performs weak key
  * checking if the transform has requested it.
  */
-static int spacc_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int spacc_des_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			    unsigned int len)
 {
-	struct spacc_ablk_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct spacc_ablk_ctx *ctx = crypto_skcipher_ctx(cipher);
 	int err;
 
-	err = verify_ablkcipher_des_key(cipher, key);
+	err = verify_skcipher_des_key(cipher, key);
 	if (err)
 		return err;
 
@@ -753,13 +749,13 @@ static int spacc_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
  * Set the 3DES key for a block cipher transform. This also performs weak key
  * checking if the transform has requested it.
  */
-static int spacc_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int spacc_des3_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			     unsigned int len)
 {
-	struct spacc_ablk_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct spacc_ablk_ctx *ctx = crypto_skcipher_ctx(cipher);
 	int err;
 
-	err = verify_ablkcipher_des3_key(cipher, key);
+	err = verify_skcipher_des3_key(cipher, key);
 	if (err)
 		return err;
 
@@ -773,17 +769,15 @@ static int spacc_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
  * Set the key for an AES block cipher. Some key lengths are not supported in
  * hardware so this must also check whether a fallback is needed.
  */
-static int spacc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int spacc_aes_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			    unsigned int len)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
 	struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
 	int err = 0;
 
-	if (len > AES_MAX_KEY_SIZE) {
-		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (len > AES_MAX_KEY_SIZE)
 		return -EINVAL;
-	}
 
 	/*
 	 * IPSec engine only supports 128 and 256 bit AES keys. If we get a
@@ -805,12 +799,6 @@ static int spacc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 					  CRYPTO_TFM_REQ_MASK);
 
 		err = crypto_sync_skcipher_setkey(ctx->sw_cipher, key, len);
-
-		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-		tfm->crt_flags |=
-			crypto_sync_skcipher_get_flags(ctx->sw_cipher) &
-			CRYPTO_TFM_RES_MASK;
-
 		if (err)
 			goto sw_setkey_failed;
 	}
@@ -822,15 +810,14 @@ sw_setkey_failed:
 	return err;
 }
 
-static int spacc_kasumi_f8_setkey(struct crypto_ablkcipher *cipher,
+static int spacc_kasumi_f8_setkey(struct crypto_skcipher *cipher,
 				  const u8 *key, unsigned int len)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
 	struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
 	int err = 0;
 
 	if (len > AES_MAX_KEY_SIZE) {
-		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		err = -EINVAL;
 		goto out;
 	}
@@ -844,12 +831,12 @@ out:
 
 static int spacc_ablk_need_fallback(struct spacc_req *req)
 {
+	struct skcipher_request *ablk_req = skcipher_request_cast(req->req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(ablk_req);
+	struct spacc_alg *spacc_alg = to_spacc_skcipher(crypto_skcipher_alg(tfm));
 	struct spacc_ablk_ctx *ctx;
-	struct crypto_tfm *tfm = req->req->tfm;
-	struct crypto_alg *alg = req->req->tfm->__crt_alg;
-	struct spacc_alg *spacc_alg = to_spacc_alg(alg);
 
-	ctx = crypto_tfm_ctx(tfm);
+	ctx = crypto_skcipher_ctx(tfm);
 
 	return (spacc_alg->ctrl_default & SPACC_CRYPTO_ALG_MASK) ==
 			SPA_CTRL_CIPH_ALG_AES &&
@@ -859,39 +846,39 @@ static int spacc_ablk_need_fallback(struct spacc_req *req)
 
 static void spacc_ablk_complete(struct spacc_req *req)
 {
-	struct ablkcipher_request *ablk_req = ablkcipher_request_cast(req->req);
+	struct skcipher_request *ablk_req = skcipher_request_cast(req->req);
 
 	if (ablk_req->src != ablk_req->dst) {
 		spacc_free_ddt(req, req->src_ddt, req->src_addr, ablk_req->src,
-			       ablk_req->nbytes, DMA_TO_DEVICE);
+			       ablk_req->cryptlen, DMA_TO_DEVICE);
 		spacc_free_ddt(req, req->dst_ddt, req->dst_addr, ablk_req->dst,
-			       ablk_req->nbytes, DMA_FROM_DEVICE);
+			       ablk_req->cryptlen, DMA_FROM_DEVICE);
 	} else
 		spacc_free_ddt(req, req->dst_ddt, req->dst_addr, ablk_req->dst,
-			       ablk_req->nbytes, DMA_BIDIRECTIONAL);
+			       ablk_req->cryptlen, DMA_BIDIRECTIONAL);
 
 	req->req->complete(req->req, req->result);
 }
 
 static int spacc_ablk_submit(struct spacc_req *req)
 {
-	struct crypto_tfm *tfm = req->req->tfm;
-	struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct ablkcipher_request *ablk_req = ablkcipher_request_cast(req->req);
-	struct crypto_alg *alg = req->req->tfm->__crt_alg;
-	struct spacc_alg *spacc_alg = to_spacc_alg(alg);
+	struct skcipher_request *ablk_req = skcipher_request_cast(req->req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(ablk_req);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct spacc_alg *spacc_alg = to_spacc_skcipher(alg);
+	struct spacc_ablk_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct spacc_engine *engine = ctx->generic.engine;
 	u32 ctrl;
 
 	req->ctx_id = spacc_load_ctx(&ctx->generic, ctx->key,
-		ctx->key_len, ablk_req->info, alg->cra_ablkcipher.ivsize,
+		ctx->key_len, ablk_req->iv, alg->ivsize,
 		NULL, 0);
 
 	writel(req->src_addr, engine->regs + SPA_SRC_PTR_REG_OFFSET);
 	writel(req->dst_addr, engine->regs + SPA_DST_PTR_REG_OFFSET);
 	writel(0, engine->regs + SPA_OFFSET_REG_OFFSET);
 
-	writel(ablk_req->nbytes, engine->regs + SPA_PROC_LEN_REG_OFFSET);
+	writel(ablk_req->cryptlen, engine->regs + SPA_PROC_LEN_REG_OFFSET);
 	writel(0, engine->regs + SPA_ICV_OFFSET_REG_OFFSET);
 	writel(0, engine->regs + SPA_AUX_INFO_REG_OFFSET);
 	writel(0, engine->regs + SPA_AAD_LEN_REG_OFFSET);
@@ -907,11 +894,11 @@ static int spacc_ablk_submit(struct spacc_req *req)
 	return -EINPROGRESS;
 }
 
-static int spacc_ablk_do_fallback(struct ablkcipher_request *req,
+static int spacc_ablk_do_fallback(struct skcipher_request *req,
 				  unsigned alg_type, bool is_encrypt)
 {
 	struct crypto_tfm *old_tfm =
-	    crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
+	    crypto_skcipher_tfm(crypto_skcipher_reqtfm(req));
 	struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm);
 	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->sw_cipher);
 	int err;
@@ -924,7 +911,7 @@ static int spacc_ablk_do_fallback(struct ablkcipher_request *req,
 	skcipher_request_set_sync_tfm(subreq, ctx->sw_cipher);
 	skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL);
 	skcipher_request_set_crypt(subreq, req->src, req->dst,
-				   req->nbytes, req->info);
+				   req->cryptlen, req->iv);
 	err = is_encrypt ? crypto_skcipher_encrypt(subreq) :
 			   crypto_skcipher_decrypt(subreq);
 	skcipher_request_zero(subreq);
@@ -932,12 +919,13 @@ static int spacc_ablk_do_fallback(struct ablkcipher_request *req,
 	return err;
 }
 
-static int spacc_ablk_setup(struct ablkcipher_request *req, unsigned alg_type,
+static int spacc_ablk_setup(struct skcipher_request *req, unsigned alg_type,
 			    bool is_encrypt)
 {
-	struct crypto_alg *alg = req->base.tfm->__crt_alg;
-	struct spacc_engine *engine = to_spacc_alg(alg)->engine;
-	struct spacc_req *dev_req = ablkcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct spacc_engine *engine = to_spacc_skcipher(alg)->engine;
+	struct spacc_req *dev_req = skcipher_request_ctx(req);
 	unsigned long flags;
 	int err = -ENOMEM;
 
@@ -956,17 +944,17 @@ static int spacc_ablk_setup(struct ablkcipher_request *req, unsigned alg_type,
 	 */
 	if (req->src != req->dst) {
 		dev_req->src_ddt = spacc_sg_to_ddt(engine, req->src,
-			req->nbytes, DMA_TO_DEVICE, &dev_req->src_addr);
+			req->cryptlen, DMA_TO_DEVICE, &dev_req->src_addr);
 		if (!dev_req->src_ddt)
 			goto out;
 
 		dev_req->dst_ddt = spacc_sg_to_ddt(engine, req->dst,
-			req->nbytes, DMA_FROM_DEVICE, &dev_req->dst_addr);
+			req->cryptlen, DMA_FROM_DEVICE, &dev_req->dst_addr);
 		if (!dev_req->dst_ddt)
 			goto out_free_src;
 	} else {
 		dev_req->dst_ddt = spacc_sg_to_ddt(engine, req->dst,
-			req->nbytes, DMA_BIDIRECTIONAL, &dev_req->dst_addr);
+			req->cryptlen, DMA_BIDIRECTIONAL, &dev_req->dst_addr);
 		if (!dev_req->dst_ddt)
 			goto out;
 
@@ -999,65 +987,65 @@ static int spacc_ablk_setup(struct ablkcipher_request *req, unsigned alg_type,
 
 out_free_ddts:
 	spacc_free_ddt(dev_req, dev_req->dst_ddt, dev_req->dst_addr, req->dst,
-		       req->nbytes, req->src == req->dst ?
+		       req->cryptlen, req->src == req->dst ?
 		       DMA_BIDIRECTIONAL : DMA_FROM_DEVICE);
 out_free_src:
 	if (req->src != req->dst)
 		spacc_free_ddt(dev_req, dev_req->src_ddt, dev_req->src_addr,
-			       req->src, req->nbytes, DMA_TO_DEVICE);
+			       req->src, req->cryptlen, DMA_TO_DEVICE);
 out:
 	return err;
 }
 
-static int spacc_ablk_cra_init(struct crypto_tfm *tfm)
+static int spacc_ablk_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_alg *alg = tfm->__crt_alg;
-	struct spacc_alg *spacc_alg = to_spacc_alg(alg);
+	struct spacc_ablk_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct spacc_alg *spacc_alg = to_spacc_skcipher(alg);
 	struct spacc_engine *engine = spacc_alg->engine;
 
 	ctx->generic.flags = spacc_alg->type;
 	ctx->generic.engine = engine;
-	if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
+	if (alg->base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
 		ctx->sw_cipher = crypto_alloc_sync_skcipher(
-			alg->cra_name, 0, CRYPTO_ALG_NEED_FALLBACK);
+			alg->base.cra_name, 0, CRYPTO_ALG_NEED_FALLBACK);
 		if (IS_ERR(ctx->sw_cipher)) {
 			dev_warn(engine->dev, "failed to allocate fallback for %s\n",
-				 alg->cra_name);
+				 alg->base.cra_name);
 			return PTR_ERR(ctx->sw_cipher);
 		}
 	}
 	ctx->generic.key_offs = spacc_alg->key_offs;
 	ctx->generic.iv_offs = spacc_alg->iv_offs;
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct spacc_req);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct spacc_req));
 
 	return 0;
 }
 
-static void spacc_ablk_cra_exit(struct crypto_tfm *tfm)
+static void spacc_ablk_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct spacc_ablk_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	crypto_free_sync_skcipher(ctx->sw_cipher);
 }
 
-static int spacc_ablk_encrypt(struct ablkcipher_request *req)
+static int spacc_ablk_encrypt(struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req);
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct spacc_alg *alg = to_spacc_alg(tfm->__crt_alg);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct skcipher_alg *alg = crypto_skcipher_alg(cipher);
+	struct spacc_alg *spacc_alg = to_spacc_skcipher(alg);
 
-	return spacc_ablk_setup(req, alg->type, 1);
+	return spacc_ablk_setup(req, spacc_alg->type, 1);
 }
 
-static int spacc_ablk_decrypt(struct ablkcipher_request *req)
+static int spacc_ablk_decrypt(struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req);
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct spacc_alg *alg = to_spacc_alg(tfm->__crt_alg);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct skcipher_alg *alg = crypto_skcipher_alg(cipher);
+	struct spacc_alg *spacc_alg = to_spacc_skcipher(alg);
 
-	return spacc_ablk_setup(req, alg->type, 0);
+	return spacc_ablk_setup(req, spacc_alg->type, 0);
 }
 
 static inline int spacc_fifo_stat_empty(struct spacc_engine *engine)
@@ -1233,27 +1221,24 @@ static struct spacc_alg ipsec_engine_algs[] = {
 		.key_offs = 0,
 		.iv_offs = AES_MAX_KEY_SIZE,
 		.alg = {
-			.cra_name = "cbc(aes)",
-			.cra_driver_name = "cbc-aes-picoxcell",
-			.cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-				     CRYPTO_ALG_KERN_DRIVER_ONLY |
-				     CRYPTO_ALG_ASYNC |
-				     CRYPTO_ALG_NEED_FALLBACK,
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct spacc_ablk_ctx),
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_module = THIS_MODULE,
-			.cra_ablkcipher = {
-				.setkey = spacc_aes_setkey,
-				.encrypt = spacc_ablk_encrypt,
-				.decrypt = spacc_ablk_decrypt,
-				.min_keysize = AES_MIN_KEY_SIZE,
-				.max_keysize = AES_MAX_KEY_SIZE,
-				.ivsize = AES_BLOCK_SIZE,
-			},
-			.cra_init = spacc_ablk_cra_init,
-			.cra_exit = spacc_ablk_cra_exit,
+			.base.cra_name		= "cbc(aes)",
+			.base.cra_driver_name	= "cbc-aes-picoxcell",
+			.base.cra_priority	= SPACC_CRYPTO_ALG_PRIORITY,
+			.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+						  CRYPTO_ALG_ASYNC |
+						  CRYPTO_ALG_NEED_FALLBACK,
+			.base.cra_blocksize	= AES_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct spacc_ablk_ctx),
+			.base.cra_module	= THIS_MODULE,
+
+			.setkey			= spacc_aes_setkey,
+			.encrypt 		= spacc_ablk_encrypt,
+			.decrypt 		= spacc_ablk_decrypt,
+			.min_keysize 		= AES_MIN_KEY_SIZE,
+			.max_keysize 		= AES_MAX_KEY_SIZE,
+			.ivsize			= AES_BLOCK_SIZE,
+			.init			= spacc_ablk_init_tfm,
+			.exit			= spacc_ablk_exit_tfm,
 		},
 	},
 	{
@@ -1261,25 +1246,23 @@ static struct spacc_alg ipsec_engine_algs[] = {
 		.iv_offs = AES_MAX_KEY_SIZE,
 		.ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_ECB,
 		.alg = {
-			.cra_name = "ecb(aes)",
-			.cra_driver_name = "ecb-aes-picoxcell",
-			.cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-				CRYPTO_ALG_KERN_DRIVER_ONLY |
-				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct spacc_ablk_ctx),
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_module = THIS_MODULE,
-			.cra_ablkcipher = {
-				.setkey = spacc_aes_setkey,
-				.encrypt = spacc_ablk_encrypt,
-				.decrypt = spacc_ablk_decrypt,
-				.min_keysize = AES_MIN_KEY_SIZE,
-				.max_keysize = AES_MAX_KEY_SIZE,
-			},
-			.cra_init = spacc_ablk_cra_init,
-			.cra_exit = spacc_ablk_cra_exit,
+			.base.cra_name		= "ecb(aes)",
+			.base.cra_driver_name	= "ecb-aes-picoxcell",
+			.base.cra_priority	= SPACC_CRYPTO_ALG_PRIORITY,
+			.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+						  CRYPTO_ALG_ASYNC |
+						  CRYPTO_ALG_NEED_FALLBACK,
+			.base.cra_blocksize	= AES_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct spacc_ablk_ctx),
+			.base.cra_module	= THIS_MODULE,
+
+			.setkey			= spacc_aes_setkey,
+			.encrypt 		= spacc_ablk_encrypt,
+			.decrypt 		= spacc_ablk_decrypt,
+			.min_keysize 		= AES_MIN_KEY_SIZE,
+			.max_keysize 		= AES_MAX_KEY_SIZE,
+			.init			= spacc_ablk_init_tfm,
+			.exit			= spacc_ablk_exit_tfm,
 		},
 	},
 	{
@@ -1287,26 +1270,23 @@ static struct spacc_alg ipsec_engine_algs[] = {
 		.iv_offs = 0,
 		.ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC,
 		.alg = {
-			.cra_name = "cbc(des)",
-			.cra_driver_name = "cbc-des-picoxcell",
-			.cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-					CRYPTO_ALG_ASYNC |
-					CRYPTO_ALG_KERN_DRIVER_ONLY,
-			.cra_blocksize = DES_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct spacc_ablk_ctx),
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_module = THIS_MODULE,
-			.cra_ablkcipher = {
-				.setkey = spacc_des_setkey,
-				.encrypt = spacc_ablk_encrypt,
-				.decrypt = spacc_ablk_decrypt,
-				.min_keysize = DES_KEY_SIZE,
-				.max_keysize = DES_KEY_SIZE,
-				.ivsize = DES_BLOCK_SIZE,
-			},
-			.cra_init = spacc_ablk_cra_init,
-			.cra_exit = spacc_ablk_cra_exit,
+			.base.cra_name		= "cbc(des)",
+			.base.cra_driver_name	= "cbc-des-picoxcell",
+			.base.cra_priority	= SPACC_CRYPTO_ALG_PRIORITY,
+			.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+						  CRYPTO_ALG_ASYNC,
+			.base.cra_blocksize	= DES_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct spacc_ablk_ctx),
+			.base.cra_module	= THIS_MODULE,
+
+			.setkey			= spacc_des_setkey,
+			.encrypt		= spacc_ablk_encrypt,
+			.decrypt		= spacc_ablk_decrypt,
+			.min_keysize		= DES_KEY_SIZE,
+			.max_keysize		= DES_KEY_SIZE,
+			.ivsize			= DES_BLOCK_SIZE,
+			.init			= spacc_ablk_init_tfm,
+			.exit			= spacc_ablk_exit_tfm,
 		},
 	},
 	{
@@ -1314,25 +1294,22 @@ static struct spacc_alg ipsec_engine_algs[] = {
 		.iv_offs = 0,
 		.ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_ECB,
 		.alg = {
-			.cra_name = "ecb(des)",
-			.cra_driver_name = "ecb-des-picoxcell",
-			.cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-					CRYPTO_ALG_ASYNC |
-					CRYPTO_ALG_KERN_DRIVER_ONLY,
-			.cra_blocksize = DES_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct spacc_ablk_ctx),
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_module = THIS_MODULE,
-			.cra_ablkcipher = {
-				.setkey = spacc_des_setkey,
-				.encrypt = spacc_ablk_encrypt,
-				.decrypt = spacc_ablk_decrypt,
-				.min_keysize = DES_KEY_SIZE,
-				.max_keysize = DES_KEY_SIZE,
-			},
-			.cra_init = spacc_ablk_cra_init,
-			.cra_exit = spacc_ablk_cra_exit,
+			.base.cra_name		= "ecb(des)",
+			.base.cra_driver_name	= "ecb-des-picoxcell",
+			.base.cra_priority	= SPACC_CRYPTO_ALG_PRIORITY,
+			.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+						  CRYPTO_ALG_ASYNC,
+			.base.cra_blocksize	= DES_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct spacc_ablk_ctx),
+			.base.cra_module	= THIS_MODULE,
+
+			.setkey			= spacc_des_setkey,
+			.encrypt		= spacc_ablk_encrypt,
+			.decrypt		= spacc_ablk_decrypt,
+			.min_keysize		= DES_KEY_SIZE,
+			.max_keysize		= DES_KEY_SIZE,
+			.init			= spacc_ablk_init_tfm,
+			.exit			= spacc_ablk_exit_tfm,
 		},
 	},
 	{
@@ -1340,26 +1317,23 @@ static struct spacc_alg ipsec_engine_algs[] = {
 		.iv_offs = 0,
 		.ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC,
 		.alg = {
-			.cra_name = "cbc(des3_ede)",
-			.cra_driver_name = "cbc-des3-ede-picoxcell",
-			.cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-					CRYPTO_ALG_ASYNC |
-					CRYPTO_ALG_KERN_DRIVER_ONLY,
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct spacc_ablk_ctx),
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_module = THIS_MODULE,
-			.cra_ablkcipher = {
-				.setkey = spacc_des3_setkey,
-				.encrypt = spacc_ablk_encrypt,
-				.decrypt = spacc_ablk_decrypt,
-				.min_keysize = DES3_EDE_KEY_SIZE,
-				.max_keysize = DES3_EDE_KEY_SIZE,
-				.ivsize = DES3_EDE_BLOCK_SIZE,
-			},
-			.cra_init = spacc_ablk_cra_init,
-			.cra_exit = spacc_ablk_cra_exit,
+			.base.cra_name		= "cbc(des3_ede)",
+			.base.cra_driver_name	= "cbc-des3-ede-picoxcell",
+			.base.cra_priority	= SPACC_CRYPTO_ALG_PRIORITY,
+			.base.cra_flags		= CRYPTO_ALG_ASYNC |
+						  CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct spacc_ablk_ctx),
+			.base.cra_module	= THIS_MODULE,
+
+			.setkey			= spacc_des3_setkey,
+			.encrypt		= spacc_ablk_encrypt,
+			.decrypt		= spacc_ablk_decrypt,
+			.min_keysize		= DES3_EDE_KEY_SIZE,
+			.max_keysize		= DES3_EDE_KEY_SIZE,
+			.ivsize			= DES3_EDE_BLOCK_SIZE,
+			.init			= spacc_ablk_init_tfm,
+			.exit			= spacc_ablk_exit_tfm,
 		},
 	},
 	{
@@ -1367,25 +1341,22 @@ static struct spacc_alg ipsec_engine_algs[] = {
 		.iv_offs = 0,
 		.ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_ECB,
 		.alg = {
-			.cra_name = "ecb(des3_ede)",
-			.cra_driver_name = "ecb-des3-ede-picoxcell",
-			.cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-					CRYPTO_ALG_ASYNC |
-					CRYPTO_ALG_KERN_DRIVER_ONLY,
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct spacc_ablk_ctx),
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_module = THIS_MODULE,
-			.cra_ablkcipher = {
-				.setkey = spacc_des3_setkey,
-				.encrypt = spacc_ablk_encrypt,
-				.decrypt = spacc_ablk_decrypt,
-				.min_keysize = DES3_EDE_KEY_SIZE,
-				.max_keysize = DES3_EDE_KEY_SIZE,
-			},
-			.cra_init = spacc_ablk_cra_init,
-			.cra_exit = spacc_ablk_cra_exit,
+			.base.cra_name		= "ecb(des3_ede)",
+			.base.cra_driver_name	= "ecb-des3-ede-picoxcell",
+			.base.cra_priority	= SPACC_CRYPTO_ALG_PRIORITY,
+			.base.cra_flags		= CRYPTO_ALG_ASYNC |
+						  CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct spacc_ablk_ctx),
+			.base.cra_module	= THIS_MODULE,
+
+			.setkey			= spacc_des3_setkey,
+			.encrypt		= spacc_ablk_encrypt,
+			.decrypt		= spacc_ablk_decrypt,
+			.min_keysize		= DES3_EDE_KEY_SIZE,
+			.max_keysize		= DES3_EDE_KEY_SIZE,
+			.init			= spacc_ablk_init_tfm,
+			.exit			= spacc_ablk_exit_tfm,
 		},
 	},
 };
@@ -1581,25 +1552,23 @@ static struct spacc_alg l2_engine_algs[] = {
 		.ctrl_default = SPA_CTRL_CIPH_ALG_KASUMI |
 				SPA_CTRL_CIPH_MODE_F8,
 		.alg = {
-			.cra_name = "f8(kasumi)",
-			.cra_driver_name = "f8-kasumi-picoxcell",
-			.cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
-			.cra_flags = CRYPTO_ALG_ASYNC |
-					CRYPTO_ALG_KERN_DRIVER_ONLY,
-			.cra_blocksize = 8,
-			.cra_ctxsize = sizeof(struct spacc_ablk_ctx),
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_module = THIS_MODULE,
-			.cra_ablkcipher = {
-				.setkey = spacc_kasumi_f8_setkey,
-				.encrypt = spacc_ablk_encrypt,
-				.decrypt = spacc_ablk_decrypt,
-				.min_keysize = 16,
-				.max_keysize = 16,
-				.ivsize = 8,
-			},
-			.cra_init = spacc_ablk_cra_init,
-			.cra_exit = spacc_ablk_cra_exit,
+			.base.cra_name		= "f8(kasumi)",
+			.base.cra_driver_name	= "f8-kasumi-picoxcell",
+			.base.cra_priority	= SPACC_CRYPTO_ALG_PRIORITY,
+			.base.cra_flags		= CRYPTO_ALG_ASYNC |
+						  CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.base.cra_blocksize	= 8,
+			.base.cra_ctxsize	= sizeof(struct spacc_ablk_ctx),
+			.base.cra_module	= THIS_MODULE,
+
+			.setkey			= spacc_kasumi_f8_setkey,
+			.encrypt		= spacc_ablk_encrypt,
+			.decrypt		= spacc_ablk_decrypt,
+			.min_keysize		= 16,
+			.max_keysize		= 16,
+			.ivsize			= 8,
+			.init			= spacc_ablk_init_tfm,
+			.exit			= spacc_ablk_exit_tfm,
 		},
 	},
 };
@@ -1613,6 +1582,11 @@ static const struct of_device_id spacc_of_id_table[] = {
 MODULE_DEVICE_TABLE(of, spacc_of_id_table);
 #endif /* CONFIG_OF */
 
+static void spacc_tasklet_kill(void *data)
+{
+	tasklet_kill(data);
+}
+
 static int spacc_probe(struct platform_device *pdev)
 {
 	int i, err, ret;
@@ -1655,6 +1629,14 @@ static int spacc_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
+	tasklet_init(&engine->complete, spacc_spacc_complete,
+		     (unsigned long)engine);
+
+	ret = devm_add_action(&pdev->dev, spacc_tasklet_kill,
+			      &engine->complete);
+	if (ret)
+		return ret;
+
 	if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0,
 			     engine->name, engine)) {
 		dev_err(engine->dev, "failed to request IRQ\n");
@@ -1712,8 +1694,6 @@ static int spacc_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&engine->completed);
 	INIT_LIST_HEAD(&engine->in_progress);
 	engine->in_flight = 0;
-	tasklet_init(&engine->complete, spacc_spacc_complete,
-		     (unsigned long)engine);
 
 	platform_set_drvdata(pdev, engine);
 
@@ -1721,7 +1701,7 @@ static int spacc_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&engine->registered_algs);
 	for (i = 0; i < engine->num_algs; ++i) {
 		engine->algs[i].engine = engine;
-		err = crypto_register_alg(&engine->algs[i].alg);
+		err = crypto_register_skcipher(&engine->algs[i].alg);
 		if (!err) {
 			list_add_tail(&engine->algs[i].entry,
 				      &engine->registered_algs);
@@ -1729,10 +1709,10 @@ static int spacc_probe(struct platform_device *pdev)
 		}
 		if (err)
 			dev_err(engine->dev, "failed to register alg \"%s\"\n",
-				engine->algs[i].alg.cra_name);
+				engine->algs[i].alg.base.cra_name);
 		else
 			dev_dbg(engine->dev, "registered alg \"%s\"\n",
-				engine->algs[i].alg.cra_name);
+				engine->algs[i].alg.base.cra_name);
 	}
 
 	INIT_LIST_HEAD(&engine->registered_aeads);
@@ -1781,7 +1761,7 @@ static int spacc_remove(struct platform_device *pdev)
 
 	list_for_each_entry_safe(alg, next, &engine->registered_algs, entry) {
 		list_del(&alg->entry);
-		crypto_unregister_alg(&alg->alg);
+		crypto_unregister_skcipher(&alg->alg);
 	}
 
 	clk_disable_unprepare(engine->clk);
diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig
index 6ab7e5a88756..2006322345de 100644
--- a/drivers/crypto/qat/Kconfig
+++ b/drivers/crypto/qat/Kconfig
@@ -3,7 +3,7 @@ config CRYPTO_DEV_QAT
 	tristate
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AKCIPHER
 	select CRYPTO_DH
 	select CRYPTO_HMAC
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index abc7a7f64d64..ef0e482ee04f 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -68,7 +68,7 @@ static long adf_ctl_ioctl(struct file *fp, unsigned int cmd, unsigned long arg);
 static const struct file_operations adf_ctl_ops = {
 	.owner = THIS_MODULE,
 	.unlocked_ioctl = adf_ctl_ioctl,
-	.compat_ioctl = adf_ctl_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
 };
 
 struct adf_ctl_drv_info {
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index b50eb55f8f57..833bb1d3a11b 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -48,6 +48,7 @@
 #include <linux/slab.h>
 #include <linux/crypto.h>
 #include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/aes.h>
 #include <crypto/sha.h>
 #include <crypto/hash.h>
@@ -122,7 +123,7 @@ struct qat_alg_aead_ctx {
 	char opad[SHA512_BLOCK_SIZE];
 };
 
-struct qat_alg_ablkcipher_ctx {
+struct qat_alg_skcipher_ctx {
 	struct icp_qat_hw_cipher_algo_blk *enc_cd;
 	struct icp_qat_hw_cipher_algo_blk *dec_cd;
 	dma_addr_t enc_cd_paddr;
@@ -130,7 +131,7 @@ struct qat_alg_ablkcipher_ctx {
 	struct icp_qat_fw_la_bulk_req enc_fw_req;
 	struct icp_qat_fw_la_bulk_req dec_fw_req;
 	struct qat_crypto_instance *inst;
-	struct crypto_tfm *tfm;
+	struct crypto_skcipher *tfm;
 };
 
 static int qat_get_inter_state_size(enum icp_qat_hw_auth_algo qat_hash_alg)
@@ -463,10 +464,10 @@ static int qat_alg_aead_init_dec_session(struct crypto_aead *aead_tfm,
 	return 0;
 }
 
-static void qat_alg_ablkcipher_init_com(struct qat_alg_ablkcipher_ctx *ctx,
-					struct icp_qat_fw_la_bulk_req *req,
-					struct icp_qat_hw_cipher_algo_blk *cd,
-					const uint8_t *key, unsigned int keylen)
+static void qat_alg_skcipher_init_com(struct qat_alg_skcipher_ctx *ctx,
+				      struct icp_qat_fw_la_bulk_req *req,
+				      struct icp_qat_hw_cipher_algo_blk *cd,
+				      const uint8_t *key, unsigned int keylen)
 {
 	struct icp_qat_fw_comn_req_hdr_cd_pars *cd_pars = &req->cd_pars;
 	struct icp_qat_fw_comn_req_hdr *header = &req->comn_hdr;
@@ -485,28 +486,28 @@ static void qat_alg_ablkcipher_init_com(struct qat_alg_ablkcipher_ctx *ctx,
 	ICP_QAT_FW_COMN_NEXT_ID_SET(cd_ctrl, ICP_QAT_FW_SLICE_DRAM_WR);
 }
 
-static void qat_alg_ablkcipher_init_enc(struct qat_alg_ablkcipher_ctx *ctx,
-					int alg, const uint8_t *key,
-					unsigned int keylen, int mode)
+static void qat_alg_skcipher_init_enc(struct qat_alg_skcipher_ctx *ctx,
+				      int alg, const uint8_t *key,
+				      unsigned int keylen, int mode)
 {
 	struct icp_qat_hw_cipher_algo_blk *enc_cd = ctx->enc_cd;
 	struct icp_qat_fw_la_bulk_req *req = &ctx->enc_fw_req;
 	struct icp_qat_fw_comn_req_hdr_cd_pars *cd_pars = &req->cd_pars;
 
-	qat_alg_ablkcipher_init_com(ctx, req, enc_cd, key, keylen);
+	qat_alg_skcipher_init_com(ctx, req, enc_cd, key, keylen);
 	cd_pars->u.s.content_desc_addr = ctx->enc_cd_paddr;
 	enc_cd->aes.cipher_config.val = QAT_AES_HW_CONFIG_ENC(alg, mode);
 }
 
-static void qat_alg_ablkcipher_init_dec(struct qat_alg_ablkcipher_ctx *ctx,
-					int alg, const uint8_t *key,
-					unsigned int keylen, int mode)
+static void qat_alg_skcipher_init_dec(struct qat_alg_skcipher_ctx *ctx,
+				      int alg, const uint8_t *key,
+				      unsigned int keylen, int mode)
 {
 	struct icp_qat_hw_cipher_algo_blk *dec_cd = ctx->dec_cd;
 	struct icp_qat_fw_la_bulk_req *req = &ctx->dec_fw_req;
 	struct icp_qat_fw_comn_req_hdr_cd_pars *cd_pars = &req->cd_pars;
 
-	qat_alg_ablkcipher_init_com(ctx, req, dec_cd, key, keylen);
+	qat_alg_skcipher_init_com(ctx, req, dec_cd, key, keylen);
 	cd_pars->u.s.content_desc_addr = ctx->dec_cd_paddr;
 
 	if (mode != ICP_QAT_HW_CIPHER_CTR_MODE)
@@ -569,7 +570,6 @@ static int qat_alg_aead_init_sessions(struct crypto_aead *tfm, const u8 *key,
 	memzero_explicit(&keys, sizeof(keys));
 	return 0;
 bad_key:
-	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 error:
@@ -577,22 +577,19 @@ error:
 	return -EFAULT;
 }
 
-static int qat_alg_ablkcipher_init_sessions(struct qat_alg_ablkcipher_ctx *ctx,
-					    const uint8_t *key,
-					    unsigned int keylen,
-					    int mode)
+static int qat_alg_skcipher_init_sessions(struct qat_alg_skcipher_ctx *ctx,
+					  const uint8_t *key,
+					  unsigned int keylen,
+					  int mode)
 {
 	int alg;
 
 	if (qat_alg_validate_key(keylen, &alg, mode))
-		goto bad_key;
+		return -EINVAL;
 
-	qat_alg_ablkcipher_init_enc(ctx, alg, key, keylen, mode);
-	qat_alg_ablkcipher_init_dec(ctx, alg, key, keylen, mode);
+	qat_alg_skcipher_init_enc(ctx, alg, key, keylen, mode);
+	qat_alg_skcipher_init_dec(ctx, alg, key, keylen, mode);
 	return 0;
-bad_key:
-	crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	return -EINVAL;
 }
 
 static int qat_alg_aead_rekey(struct crypto_aead *tfm, const uint8_t *key,
@@ -832,12 +829,12 @@ static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
 	areq->base.complete(&areq->base, res);
 }
 
-static void qat_ablkcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
-					struct qat_crypto_request *qat_req)
+static void qat_skcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
+				      struct qat_crypto_request *qat_req)
 {
-	struct qat_alg_ablkcipher_ctx *ctx = qat_req->ablkcipher_ctx;
+	struct qat_alg_skcipher_ctx *ctx = qat_req->skcipher_ctx;
 	struct qat_crypto_instance *inst = ctx->inst;
-	struct ablkcipher_request *areq = qat_req->ablkcipher_req;
+	struct skcipher_request *sreq = qat_req->skcipher_req;
 	uint8_t stat_filed = qat_resp->comn_resp.comn_status;
 	struct device *dev = &GET_DEV(ctx->inst->accel_dev);
 	int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed);
@@ -846,11 +843,11 @@ static void qat_ablkcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
 	if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK))
 		res = -EINVAL;
 
-	memcpy(areq->info, qat_req->iv, AES_BLOCK_SIZE);
+	memcpy(sreq->iv, qat_req->iv, AES_BLOCK_SIZE);
 	dma_free_coherent(dev, AES_BLOCK_SIZE, qat_req->iv,
 			  qat_req->iv_paddr);
 
-	areq->base.complete(&areq->base, res);
+	sreq->base.complete(&sreq->base, res);
 }
 
 void qat_alg_callback(void *resp)
@@ -949,21 +946,21 @@ static int qat_alg_aead_enc(struct aead_request *areq)
 	return -EINPROGRESS;
 }
 
-static int qat_alg_ablkcipher_rekey(struct qat_alg_ablkcipher_ctx *ctx,
-				    const u8 *key, unsigned int keylen,
-				    int mode)
+static int qat_alg_skcipher_rekey(struct qat_alg_skcipher_ctx *ctx,
+				  const u8 *key, unsigned int keylen,
+				  int mode)
 {
 	memset(ctx->enc_cd, 0, sizeof(*ctx->enc_cd));
 	memset(ctx->dec_cd, 0, sizeof(*ctx->dec_cd));
 	memset(&ctx->enc_fw_req, 0, sizeof(ctx->enc_fw_req));
 	memset(&ctx->dec_fw_req, 0, sizeof(ctx->dec_fw_req));
 
-	return qat_alg_ablkcipher_init_sessions(ctx, key, keylen, mode);
+	return qat_alg_skcipher_init_sessions(ctx, key, keylen, mode);
 }
 
-static int qat_alg_ablkcipher_newkey(struct qat_alg_ablkcipher_ctx *ctx,
-				     const u8 *key, unsigned int keylen,
-				     int mode)
+static int qat_alg_skcipher_newkey(struct qat_alg_skcipher_ctx *ctx,
+				   const u8 *key, unsigned int keylen,
+				   int mode)
 {
 	struct qat_crypto_instance *inst = NULL;
 	struct device *dev;
@@ -990,7 +987,7 @@ static int qat_alg_ablkcipher_newkey(struct qat_alg_ablkcipher_ctx *ctx,
 		goto out_free_enc;
 	}
 
-	ret = qat_alg_ablkcipher_init_sessions(ctx, key, keylen, mode);
+	ret = qat_alg_skcipher_init_sessions(ctx, key, keylen, mode);
 	if (ret)
 		goto out_free_all;
 
@@ -1012,51 +1009,51 @@ out_free_instance:
 	return ret;
 }
 
-static int qat_alg_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
-				     const u8 *key, unsigned int keylen,
-				     int mode)
+static int qat_alg_skcipher_setkey(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen,
+				   int mode)
 {
-	struct qat_alg_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct qat_alg_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	if (ctx->enc_cd)
-		return qat_alg_ablkcipher_rekey(ctx, key, keylen, mode);
+		return qat_alg_skcipher_rekey(ctx, key, keylen, mode);
 	else
-		return qat_alg_ablkcipher_newkey(ctx, key, keylen, mode);
+		return qat_alg_skcipher_newkey(ctx, key, keylen, mode);
 }
 
-static int qat_alg_ablkcipher_cbc_setkey(struct crypto_ablkcipher *tfm,
-					 const u8 *key, unsigned int keylen)
+static int qat_alg_skcipher_cbc_setkey(struct crypto_skcipher *tfm,
+				       const u8 *key, unsigned int keylen)
 {
-	return qat_alg_ablkcipher_setkey(tfm, key, keylen,
-					 ICP_QAT_HW_CIPHER_CBC_MODE);
+	return qat_alg_skcipher_setkey(tfm, key, keylen,
+				       ICP_QAT_HW_CIPHER_CBC_MODE);
 }
 
-static int qat_alg_ablkcipher_ctr_setkey(struct crypto_ablkcipher *tfm,
-					 const u8 *key, unsigned int keylen)
+static int qat_alg_skcipher_ctr_setkey(struct crypto_skcipher *tfm,
+				       const u8 *key, unsigned int keylen)
 {
-	return qat_alg_ablkcipher_setkey(tfm, key, keylen,
-					 ICP_QAT_HW_CIPHER_CTR_MODE);
+	return qat_alg_skcipher_setkey(tfm, key, keylen,
+				       ICP_QAT_HW_CIPHER_CTR_MODE);
 }
 
-static int qat_alg_ablkcipher_xts_setkey(struct crypto_ablkcipher *tfm,
-					 const u8 *key, unsigned int keylen)
+static int qat_alg_skcipher_xts_setkey(struct crypto_skcipher *tfm,
+				       const u8 *key, unsigned int keylen)
 {
-	return qat_alg_ablkcipher_setkey(tfm, key, keylen,
-					 ICP_QAT_HW_CIPHER_XTS_MODE);
+	return qat_alg_skcipher_setkey(tfm, key, keylen,
+				       ICP_QAT_HW_CIPHER_XTS_MODE);
 }
 
-static int qat_alg_ablkcipher_encrypt(struct ablkcipher_request *req)
+static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req);
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(atfm);
-	struct qat_alg_ablkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct qat_crypto_request *qat_req = ablkcipher_request_ctx(req);
+	struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm);
+	struct qat_alg_skcipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct qat_crypto_request *qat_req = skcipher_request_ctx(req);
 	struct icp_qat_fw_la_cipher_req_params *cipher_param;
 	struct icp_qat_fw_la_bulk_req *msg;
 	struct device *dev = &GET_DEV(ctx->inst->accel_dev);
 	int ret, ctr = 0;
 
-	if (req->nbytes == 0)
+	if (req->cryptlen == 0)
 		return 0;
 
 	qat_req->iv = dma_alloc_coherent(dev, AES_BLOCK_SIZE,
@@ -1073,17 +1070,17 @@ static int qat_alg_ablkcipher_encrypt(struct ablkcipher_request *req)
 
 	msg = &qat_req->req;
 	*msg = ctx->enc_fw_req;
-	qat_req->ablkcipher_ctx = ctx;
-	qat_req->ablkcipher_req = req;
-	qat_req->cb = qat_ablkcipher_alg_callback;
+	qat_req->skcipher_ctx = ctx;
+	qat_req->skcipher_req = req;
+	qat_req->cb = qat_skcipher_alg_callback;
 	qat_req->req.comn_mid.opaque_data = (uint64_t)(__force long)qat_req;
 	qat_req->req.comn_mid.src_data_addr = qat_req->buf.blp;
 	qat_req->req.comn_mid.dest_data_addr = qat_req->buf.bloutp;
 	cipher_param = (void *)&qat_req->req.serv_specif_rqpars;
-	cipher_param->cipher_length = req->nbytes;
+	cipher_param->cipher_length = req->cryptlen;
 	cipher_param->cipher_offset = 0;
 	cipher_param->u.s.cipher_IV_ptr = qat_req->iv_paddr;
-	memcpy(qat_req->iv, req->info, AES_BLOCK_SIZE);
+	memcpy(qat_req->iv, req->iv, AES_BLOCK_SIZE);
 	do {
 		ret = adf_send_message(ctx->inst->sym_tx, (uint32_t *)msg);
 	} while (ret == -EAGAIN && ctr++ < 10);
@@ -1097,26 +1094,26 @@ static int qat_alg_ablkcipher_encrypt(struct ablkcipher_request *req)
 	return -EINPROGRESS;
 }
 
-static int qat_alg_ablkcipher_blk_encrypt(struct ablkcipher_request *req)
+static int qat_alg_skcipher_blk_encrypt(struct skcipher_request *req)
 {
-	if (req->nbytes % AES_BLOCK_SIZE != 0)
+	if (req->cryptlen % AES_BLOCK_SIZE != 0)
 		return -EINVAL;
 
-	return qat_alg_ablkcipher_encrypt(req);
+	return qat_alg_skcipher_encrypt(req);
 }
 
-static int qat_alg_ablkcipher_decrypt(struct ablkcipher_request *req)
+static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req);
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(atfm);
-	struct qat_alg_ablkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct qat_crypto_request *qat_req = ablkcipher_request_ctx(req);
+	struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm);
+	struct qat_alg_skcipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct qat_crypto_request *qat_req = skcipher_request_ctx(req);
 	struct icp_qat_fw_la_cipher_req_params *cipher_param;
 	struct icp_qat_fw_la_bulk_req *msg;
 	struct device *dev = &GET_DEV(ctx->inst->accel_dev);
 	int ret, ctr = 0;
 
-	if (req->nbytes == 0)
+	if (req->cryptlen == 0)
 		return 0;
 
 	qat_req->iv = dma_alloc_coherent(dev, AES_BLOCK_SIZE,
@@ -1133,17 +1130,17 @@ static int qat_alg_ablkcipher_decrypt(struct ablkcipher_request *req)
 
 	msg = &qat_req->req;
 	*msg = ctx->dec_fw_req;
-	qat_req->ablkcipher_ctx = ctx;
-	qat_req->ablkcipher_req = req;
-	qat_req->cb = qat_ablkcipher_alg_callback;
+	qat_req->skcipher_ctx = ctx;
+	qat_req->skcipher_req = req;
+	qat_req->cb = qat_skcipher_alg_callback;
 	qat_req->req.comn_mid.opaque_data = (uint64_t)(__force long)qat_req;
 	qat_req->req.comn_mid.src_data_addr = qat_req->buf.blp;
 	qat_req->req.comn_mid.dest_data_addr = qat_req->buf.bloutp;
 	cipher_param = (void *)&qat_req->req.serv_specif_rqpars;
-	cipher_param->cipher_length = req->nbytes;
+	cipher_param->cipher_length = req->cryptlen;
 	cipher_param->cipher_offset = 0;
 	cipher_param->u.s.cipher_IV_ptr = qat_req->iv_paddr;
-	memcpy(qat_req->iv, req->info, AES_BLOCK_SIZE);
+	memcpy(qat_req->iv, req->iv, AES_BLOCK_SIZE);
 	do {
 		ret = adf_send_message(ctx->inst->sym_tx, (uint32_t *)msg);
 	} while (ret == -EAGAIN && ctr++ < 10);
@@ -1157,12 +1154,12 @@ static int qat_alg_ablkcipher_decrypt(struct ablkcipher_request *req)
 	return -EINPROGRESS;
 }
 
-static int qat_alg_ablkcipher_blk_decrypt(struct ablkcipher_request *req)
+static int qat_alg_skcipher_blk_decrypt(struct skcipher_request *req)
 {
-	if (req->nbytes % AES_BLOCK_SIZE != 0)
+	if (req->cryptlen % AES_BLOCK_SIZE != 0)
 		return -EINVAL;
 
-	return qat_alg_ablkcipher_decrypt(req);
+	return qat_alg_skcipher_decrypt(req);
 }
 static int qat_alg_aead_init(struct crypto_aead *tfm,
 			     enum icp_qat_hw_auth_algo hash,
@@ -1218,18 +1215,18 @@ static void qat_alg_aead_exit(struct crypto_aead *tfm)
 	qat_crypto_put_instance(inst);
 }
 
-static int qat_alg_ablkcipher_init(struct crypto_tfm *tfm)
+static int qat_alg_skcipher_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct qat_alg_ablkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct qat_alg_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct qat_crypto_request);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct qat_crypto_request));
 	ctx->tfm = tfm;
 	return 0;
 }
 
-static void qat_alg_ablkcipher_exit(struct crypto_tfm *tfm)
+static void qat_alg_skcipher_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct qat_alg_ablkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct qat_alg_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct qat_crypto_instance *inst = ctx->inst;
 	struct device *dev;
 
@@ -1308,92 +1305,75 @@ static struct aead_alg qat_aeads[] = { {
 	.maxauthsize = SHA512_DIGEST_SIZE,
 } };
 
-static struct crypto_alg qat_algs[] = { {
-	.cra_name = "cbc(aes)",
-	.cra_driver_name = "qat_aes_cbc",
-	.cra_priority = 4001,
-	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize = AES_BLOCK_SIZE,
-	.cra_ctxsize = sizeof(struct qat_alg_ablkcipher_ctx),
-	.cra_alignmask = 0,
-	.cra_type = &crypto_ablkcipher_type,
-	.cra_module = THIS_MODULE,
-	.cra_init = qat_alg_ablkcipher_init,
-	.cra_exit = qat_alg_ablkcipher_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.setkey = qat_alg_ablkcipher_cbc_setkey,
-			.decrypt = qat_alg_ablkcipher_blk_decrypt,
-			.encrypt = qat_alg_ablkcipher_blk_encrypt,
-			.min_keysize = AES_MIN_KEY_SIZE,
-			.max_keysize = AES_MAX_KEY_SIZE,
-			.ivsize = AES_BLOCK_SIZE,
-		},
-	},
+static struct skcipher_alg qat_skciphers[] = { {
+	.base.cra_name = "cbc(aes)",
+	.base.cra_driver_name = "qat_aes_cbc",
+	.base.cra_priority = 4001,
+	.base.cra_flags = CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize = AES_BLOCK_SIZE,
+	.base.cra_ctxsize = sizeof(struct qat_alg_skcipher_ctx),
+	.base.cra_alignmask = 0,
+	.base.cra_module = THIS_MODULE,
+
+	.init = qat_alg_skcipher_init_tfm,
+	.exit = qat_alg_skcipher_exit_tfm,
+	.setkey = qat_alg_skcipher_cbc_setkey,
+	.decrypt = qat_alg_skcipher_blk_decrypt,
+	.encrypt = qat_alg_skcipher_blk_encrypt,
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
 }, {
-	.cra_name = "ctr(aes)",
-	.cra_driver_name = "qat_aes_ctr",
-	.cra_priority = 4001,
-	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize = 1,
-	.cra_ctxsize = sizeof(struct qat_alg_ablkcipher_ctx),
-	.cra_alignmask = 0,
-	.cra_type = &crypto_ablkcipher_type,
-	.cra_module = THIS_MODULE,
-	.cra_init = qat_alg_ablkcipher_init,
-	.cra_exit = qat_alg_ablkcipher_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.setkey = qat_alg_ablkcipher_ctr_setkey,
-			.decrypt = qat_alg_ablkcipher_decrypt,
-			.encrypt = qat_alg_ablkcipher_encrypt,
-			.min_keysize = AES_MIN_KEY_SIZE,
-			.max_keysize = AES_MAX_KEY_SIZE,
-			.ivsize = AES_BLOCK_SIZE,
-		},
-	},
+	.base.cra_name = "ctr(aes)",
+	.base.cra_driver_name = "qat_aes_ctr",
+	.base.cra_priority = 4001,
+	.base.cra_flags = CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize = 1,
+	.base.cra_ctxsize = sizeof(struct qat_alg_skcipher_ctx),
+	.base.cra_alignmask = 0,
+	.base.cra_module = THIS_MODULE,
+
+	.init = qat_alg_skcipher_init_tfm,
+	.exit = qat_alg_skcipher_exit_tfm,
+	.setkey = qat_alg_skcipher_ctr_setkey,
+	.decrypt = qat_alg_skcipher_decrypt,
+	.encrypt = qat_alg_skcipher_encrypt,
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
 }, {
-	.cra_name = "xts(aes)",
-	.cra_driver_name = "qat_aes_xts",
-	.cra_priority = 4001,
-	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize = AES_BLOCK_SIZE,
-	.cra_ctxsize = sizeof(struct qat_alg_ablkcipher_ctx),
-	.cra_alignmask = 0,
-	.cra_type = &crypto_ablkcipher_type,
-	.cra_module = THIS_MODULE,
-	.cra_init = qat_alg_ablkcipher_init,
-	.cra_exit = qat_alg_ablkcipher_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.setkey = qat_alg_ablkcipher_xts_setkey,
-			.decrypt = qat_alg_ablkcipher_blk_decrypt,
-			.encrypt = qat_alg_ablkcipher_blk_encrypt,
-			.min_keysize = 2 * AES_MIN_KEY_SIZE,
-			.max_keysize = 2 * AES_MAX_KEY_SIZE,
-			.ivsize = AES_BLOCK_SIZE,
-		},
-	},
+	.base.cra_name = "xts(aes)",
+	.base.cra_driver_name = "qat_aes_xts",
+	.base.cra_priority = 4001,
+	.base.cra_flags = CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize = AES_BLOCK_SIZE,
+	.base.cra_ctxsize = sizeof(struct qat_alg_skcipher_ctx),
+	.base.cra_alignmask = 0,
+	.base.cra_module = THIS_MODULE,
+
+	.init = qat_alg_skcipher_init_tfm,
+	.exit = qat_alg_skcipher_exit_tfm,
+	.setkey = qat_alg_skcipher_xts_setkey,
+	.decrypt = qat_alg_skcipher_blk_decrypt,
+	.encrypt = qat_alg_skcipher_blk_encrypt,
+	.min_keysize = 2 * AES_MIN_KEY_SIZE,
+	.max_keysize = 2 * AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
 } };
 
 int qat_algs_register(void)
 {
-	int ret = 0, i;
+	int ret = 0;
 
 	mutex_lock(&algs_lock);
 	if (++active_devs != 1)
 		goto unlock;
 
-	for (i = 0; i < ARRAY_SIZE(qat_algs); i++)
-		qat_algs[i].cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC;
-
-	ret = crypto_register_algs(qat_algs, ARRAY_SIZE(qat_algs));
+	ret = crypto_register_skciphers(qat_skciphers,
+					ARRAY_SIZE(qat_skciphers));
 	if (ret)
 		goto unlock;
 
-	for (i = 0; i < ARRAY_SIZE(qat_aeads); i++)
-		qat_aeads[i].base.cra_flags = CRYPTO_ALG_ASYNC;
-
 	ret = crypto_register_aeads(qat_aeads, ARRAY_SIZE(qat_aeads));
 	if (ret)
 		goto unreg_algs;
@@ -1403,7 +1383,7 @@ unlock:
 	return ret;
 
 unreg_algs:
-	crypto_unregister_algs(qat_algs, ARRAY_SIZE(qat_algs));
+	crypto_unregister_skciphers(qat_skciphers, ARRAY_SIZE(qat_skciphers));
 	goto unlock;
 }
 
@@ -1414,7 +1394,7 @@ void qat_algs_unregister(void)
 		goto unlock;
 
 	crypto_unregister_aeads(qat_aeads, ARRAY_SIZE(qat_aeads));
-	crypto_unregister_algs(qat_algs, ARRAY_SIZE(qat_algs));
+	crypto_unregister_skciphers(qat_skciphers, ARRAY_SIZE(qat_skciphers));
 
 unlock:
 	mutex_unlock(&algs_lock);
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h
index c77a80020cde..300bb919a33a 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.h
+++ b/drivers/crypto/qat/qat_common/qat_crypto.h
@@ -79,11 +79,11 @@ struct qat_crypto_request {
 	struct icp_qat_fw_la_bulk_req req;
 	union {
 		struct qat_alg_aead_ctx *aead_ctx;
-		struct qat_alg_ablkcipher_ctx *ablkcipher_ctx;
+		struct qat_alg_skcipher_ctx *skcipher_ctx;
 	};
 	union {
 		struct aead_request *aead_req;
-		struct ablkcipher_request *ablkcipher_req;
+		struct skcipher_request *skcipher_req;
 	};
 	struct qat_crypto_request_buffs buf;
 	void (*cb)(struct icp_qat_fw_la_resp *resp,
diff --git a/drivers/crypto/qce/Makefile b/drivers/crypto/qce/Makefile
index 19a7f899acff..14ade8a7d664 100644
--- a/drivers/crypto/qce/Makefile
+++ b/drivers/crypto/qce/Makefile
@@ -2,6 +2,7 @@
 obj-$(CONFIG_CRYPTO_DEV_QCE) += qcrypto.o
 qcrypto-objs := core.o \
 		common.o \
-		dma.o \
-		sha.o \
-		ablkcipher.o
+		dma.o
+
+qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SHA) += sha.o
+qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SKCIPHER) += skcipher.o
diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c
deleted file mode 100644
index 7a98bf5cc967..000000000000
--- a/drivers/crypto/qce/ablkcipher.c
+++ /dev/null
@@ -1,440 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
- */
-
-#include <linux/device.h>
-#include <linux/interrupt.h>
-#include <linux/types.h>
-#include <crypto/aes.h>
-#include <crypto/internal/des.h>
-#include <crypto/internal/skcipher.h>
-
-#include "cipher.h"
-
-static LIST_HEAD(ablkcipher_algs);
-
-static void qce_ablkcipher_done(void *data)
-{
-	struct crypto_async_request *async_req = data;
-	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
-	struct qce_cipher_reqctx *rctx = ablkcipher_request_ctx(req);
-	struct qce_alg_template *tmpl = to_cipher_tmpl(async_req->tfm);
-	struct qce_device *qce = tmpl->qce;
-	enum dma_data_direction dir_src, dir_dst;
-	u32 status;
-	int error;
-	bool diff_dst;
-
-	diff_dst = (req->src != req->dst) ? true : false;
-	dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
-	dir_dst = diff_dst ? DMA_FROM_DEVICE : DMA_BIDIRECTIONAL;
-
-	error = qce_dma_terminate_all(&qce->dma);
-	if (error)
-		dev_dbg(qce->dev, "ablkcipher dma termination error (%d)\n",
-			error);
-
-	if (diff_dst)
-		dma_unmap_sg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src);
-	dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
-
-	sg_free_table(&rctx->dst_tbl);
-
-	error = qce_check_status(qce, &status);
-	if (error < 0)
-		dev_dbg(qce->dev, "ablkcipher operation error (%x)\n", status);
-
-	qce->async_req_done(tmpl->qce, error);
-}
-
-static int
-qce_ablkcipher_async_req_handle(struct crypto_async_request *async_req)
-{
-	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
-	struct qce_cipher_reqctx *rctx = ablkcipher_request_ctx(req);
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct qce_alg_template *tmpl = to_cipher_tmpl(async_req->tfm);
-	struct qce_device *qce = tmpl->qce;
-	enum dma_data_direction dir_src, dir_dst;
-	struct scatterlist *sg;
-	bool diff_dst;
-	gfp_t gfp;
-	int ret;
-
-	rctx->iv = req->info;
-	rctx->ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-	rctx->cryptlen = req->nbytes;
-
-	diff_dst = (req->src != req->dst) ? true : false;
-	dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
-	dir_dst = diff_dst ? DMA_FROM_DEVICE : DMA_BIDIRECTIONAL;
-
-	rctx->src_nents = sg_nents_for_len(req->src, req->nbytes);
-	if (diff_dst)
-		rctx->dst_nents = sg_nents_for_len(req->dst, req->nbytes);
-	else
-		rctx->dst_nents = rctx->src_nents;
-	if (rctx->src_nents < 0) {
-		dev_err(qce->dev, "Invalid numbers of src SG.\n");
-		return rctx->src_nents;
-	}
-	if (rctx->dst_nents < 0) {
-		dev_err(qce->dev, "Invalid numbers of dst SG.\n");
-		return -rctx->dst_nents;
-	}
-
-	rctx->dst_nents += 1;
-
-	gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-						GFP_KERNEL : GFP_ATOMIC;
-
-	ret = sg_alloc_table(&rctx->dst_tbl, rctx->dst_nents, gfp);
-	if (ret)
-		return ret;
-
-	sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ);
-
-	sg = qce_sgtable_add(&rctx->dst_tbl, req->dst);
-	if (IS_ERR(sg)) {
-		ret = PTR_ERR(sg);
-		goto error_free;
-	}
-
-	sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg);
-	if (IS_ERR(sg)) {
-		ret = PTR_ERR(sg);
-		goto error_free;
-	}
-
-	sg_mark_end(sg);
-	rctx->dst_sg = rctx->dst_tbl.sgl;
-
-	ret = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
-	if (ret < 0)
-		goto error_free;
-
-	if (diff_dst) {
-		ret = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src);
-		if (ret < 0)
-			goto error_unmap_dst;
-		rctx->src_sg = req->src;
-	} else {
-		rctx->src_sg = rctx->dst_sg;
-	}
-
-	ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, rctx->src_nents,
-			       rctx->dst_sg, rctx->dst_nents,
-			       qce_ablkcipher_done, async_req);
-	if (ret)
-		goto error_unmap_src;
-
-	qce_dma_issue_pending(&qce->dma);
-
-	ret = qce_start(async_req, tmpl->crypto_alg_type, req->nbytes, 0);
-	if (ret)
-		goto error_terminate;
-
-	return 0;
-
-error_terminate:
-	qce_dma_terminate_all(&qce->dma);
-error_unmap_src:
-	if (diff_dst)
-		dma_unmap_sg(qce->dev, req->src, rctx->src_nents, dir_src);
-error_unmap_dst:
-	dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
-error_free:
-	sg_free_table(&rctx->dst_tbl);
-	return ret;
-}
-
-static int qce_ablkcipher_setkey(struct crypto_ablkcipher *ablk, const u8 *key,
-				 unsigned int keylen)
-{
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(ablk);
-	struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
-	int ret;
-
-	if (!key || !keylen)
-		return -EINVAL;
-
-	switch (keylen) {
-	case AES_KEYSIZE_128:
-	case AES_KEYSIZE_256:
-		break;
-	default:
-		goto fallback;
-	}
-
-	ctx->enc_keylen = keylen;
-	memcpy(ctx->enc_key, key, keylen);
-	return 0;
-fallback:
-	ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
-	if (!ret)
-		ctx->enc_keylen = keylen;
-	return ret;
-}
-
-static int qce_des_setkey(struct crypto_ablkcipher *ablk, const u8 *key,
-			  unsigned int keylen)
-{
-	struct qce_cipher_ctx *ctx = crypto_ablkcipher_ctx(ablk);
-	int err;
-
-	err = verify_ablkcipher_des_key(ablk, key);
-	if (err)
-		return err;
-
-	ctx->enc_keylen = keylen;
-	memcpy(ctx->enc_key, key, keylen);
-	return 0;
-}
-
-static int qce_des3_setkey(struct crypto_ablkcipher *ablk, const u8 *key,
-			   unsigned int keylen)
-{
-	struct qce_cipher_ctx *ctx = crypto_ablkcipher_ctx(ablk);
-	int err;
-
-	err = verify_ablkcipher_des3_key(ablk, key);
-	if (err)
-		return err;
-
-	ctx->enc_keylen = keylen;
-	memcpy(ctx->enc_key, key, keylen);
-	return 0;
-}
-
-static int qce_ablkcipher_crypt(struct ablkcipher_request *req, int encrypt)
-{
-	struct crypto_tfm *tfm =
-			crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
-	struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct qce_cipher_reqctx *rctx = ablkcipher_request_ctx(req);
-	struct qce_alg_template *tmpl = to_cipher_tmpl(tfm);
-	int ret;
-
-	rctx->flags = tmpl->alg_flags;
-	rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT;
-
-	if (IS_AES(rctx->flags) && ctx->enc_keylen != AES_KEYSIZE_128 &&
-	    ctx->enc_keylen != AES_KEYSIZE_256) {
-		SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
-
-		skcipher_request_set_sync_tfm(subreq, ctx->fallback);
-		skcipher_request_set_callback(subreq, req->base.flags,
-					      NULL, NULL);
-		skcipher_request_set_crypt(subreq, req->src, req->dst,
-					   req->nbytes, req->info);
-		ret = encrypt ? crypto_skcipher_encrypt(subreq) :
-				crypto_skcipher_decrypt(subreq);
-		skcipher_request_zero(subreq);
-		return ret;
-	}
-
-	return tmpl->qce->async_req_enqueue(tmpl->qce, &req->base);
-}
-
-static int qce_ablkcipher_encrypt(struct ablkcipher_request *req)
-{
-	return qce_ablkcipher_crypt(req, 1);
-}
-
-static int qce_ablkcipher_decrypt(struct ablkcipher_request *req)
-{
-	return qce_ablkcipher_crypt(req, 0);
-}
-
-static int qce_ablkcipher_init(struct crypto_tfm *tfm)
-{
-	struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	memset(ctx, 0, sizeof(*ctx));
-	tfm->crt_ablkcipher.reqsize = sizeof(struct qce_cipher_reqctx);
-
-	ctx->fallback = crypto_alloc_sync_skcipher(crypto_tfm_alg_name(tfm),
-						   0, CRYPTO_ALG_NEED_FALLBACK);
-	return PTR_ERR_OR_ZERO(ctx->fallback);
-}
-
-static void qce_ablkcipher_exit(struct crypto_tfm *tfm)
-{
-	struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	crypto_free_sync_skcipher(ctx->fallback);
-}
-
-struct qce_ablkcipher_def {
-	unsigned long flags;
-	const char *name;
-	const char *drv_name;
-	unsigned int blocksize;
-	unsigned int ivsize;
-	unsigned int min_keysize;
-	unsigned int max_keysize;
-};
-
-static const struct qce_ablkcipher_def ablkcipher_def[] = {
-	{
-		.flags		= QCE_ALG_AES | QCE_MODE_ECB,
-		.name		= "ecb(aes)",
-		.drv_name	= "ecb-aes-qce",
-		.blocksize	= AES_BLOCK_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-	},
-	{
-		.flags		= QCE_ALG_AES | QCE_MODE_CBC,
-		.name		= "cbc(aes)",
-		.drv_name	= "cbc-aes-qce",
-		.blocksize	= AES_BLOCK_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-	},
-	{
-		.flags		= QCE_ALG_AES | QCE_MODE_CTR,
-		.name		= "ctr(aes)",
-		.drv_name	= "ctr-aes-qce",
-		.blocksize	= AES_BLOCK_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-	},
-	{
-		.flags		= QCE_ALG_AES | QCE_MODE_XTS,
-		.name		= "xts(aes)",
-		.drv_name	= "xts-aes-qce",
-		.blocksize	= AES_BLOCK_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-	},
-	{
-		.flags		= QCE_ALG_DES | QCE_MODE_ECB,
-		.name		= "ecb(des)",
-		.drv_name	= "ecb-des-qce",
-		.blocksize	= DES_BLOCK_SIZE,
-		.ivsize		= 0,
-		.min_keysize	= DES_KEY_SIZE,
-		.max_keysize	= DES_KEY_SIZE,
-	},
-	{
-		.flags		= QCE_ALG_DES | QCE_MODE_CBC,
-		.name		= "cbc(des)",
-		.drv_name	= "cbc-des-qce",
-		.blocksize	= DES_BLOCK_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.min_keysize	= DES_KEY_SIZE,
-		.max_keysize	= DES_KEY_SIZE,
-	},
-	{
-		.flags		= QCE_ALG_3DES | QCE_MODE_ECB,
-		.name		= "ecb(des3_ede)",
-		.drv_name	= "ecb-3des-qce",
-		.blocksize	= DES3_EDE_BLOCK_SIZE,
-		.ivsize		= 0,
-		.min_keysize	= DES3_EDE_KEY_SIZE,
-		.max_keysize	= DES3_EDE_KEY_SIZE,
-	},
-	{
-		.flags		= QCE_ALG_3DES | QCE_MODE_CBC,
-		.name		= "cbc(des3_ede)",
-		.drv_name	= "cbc-3des-qce",
-		.blocksize	= DES3_EDE_BLOCK_SIZE,
-		.ivsize		= DES3_EDE_BLOCK_SIZE,
-		.min_keysize	= DES3_EDE_KEY_SIZE,
-		.max_keysize	= DES3_EDE_KEY_SIZE,
-	},
-};
-
-static int qce_ablkcipher_register_one(const struct qce_ablkcipher_def *def,
-				       struct qce_device *qce)
-{
-	struct qce_alg_template *tmpl;
-	struct crypto_alg *alg;
-	int ret;
-
-	tmpl = kzalloc(sizeof(*tmpl), GFP_KERNEL);
-	if (!tmpl)
-		return -ENOMEM;
-
-	alg = &tmpl->alg.crypto;
-
-	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
-	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
-		 def->drv_name);
-
-	alg->cra_blocksize = def->blocksize;
-	alg->cra_ablkcipher.ivsize = def->ivsize;
-	alg->cra_ablkcipher.min_keysize = def->min_keysize;
-	alg->cra_ablkcipher.max_keysize = def->max_keysize;
-	alg->cra_ablkcipher.setkey = IS_3DES(def->flags) ? qce_des3_setkey :
-				     IS_DES(def->flags) ? qce_des_setkey :
-				     qce_ablkcipher_setkey;
-	alg->cra_ablkcipher.encrypt = qce_ablkcipher_encrypt;
-	alg->cra_ablkcipher.decrypt = qce_ablkcipher_decrypt;
-
-	alg->cra_priority = 300;
-	alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
-			 CRYPTO_ALG_NEED_FALLBACK;
-	alg->cra_ctxsize = sizeof(struct qce_cipher_ctx);
-	alg->cra_alignmask = 0;
-	alg->cra_type = &crypto_ablkcipher_type;
-	alg->cra_module = THIS_MODULE;
-	alg->cra_init = qce_ablkcipher_init;
-	alg->cra_exit = qce_ablkcipher_exit;
-
-	INIT_LIST_HEAD(&tmpl->entry);
-	tmpl->crypto_alg_type = CRYPTO_ALG_TYPE_ABLKCIPHER;
-	tmpl->alg_flags = def->flags;
-	tmpl->qce = qce;
-
-	ret = crypto_register_alg(alg);
-	if (ret) {
-		kfree(tmpl);
-		dev_err(qce->dev, "%s registration failed\n", alg->cra_name);
-		return ret;
-	}
-
-	list_add_tail(&tmpl->entry, &ablkcipher_algs);
-	dev_dbg(qce->dev, "%s is registered\n", alg->cra_name);
-	return 0;
-}
-
-static void qce_ablkcipher_unregister(struct qce_device *qce)
-{
-	struct qce_alg_template *tmpl, *n;
-
-	list_for_each_entry_safe(tmpl, n, &ablkcipher_algs, entry) {
-		crypto_unregister_alg(&tmpl->alg.crypto);
-		list_del(&tmpl->entry);
-		kfree(tmpl);
-	}
-}
-
-static int qce_ablkcipher_register(struct qce_device *qce)
-{
-	int ret, i;
-
-	for (i = 0; i < ARRAY_SIZE(ablkcipher_def); i++) {
-		ret = qce_ablkcipher_register_one(&ablkcipher_def[i], qce);
-		if (ret)
-			goto err;
-	}
-
-	return 0;
-err:
-	qce_ablkcipher_unregister(qce);
-	return ret;
-}
-
-const struct qce_algo_ops ablkcipher_ops = {
-	.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-	.register_algs = qce_ablkcipher_register,
-	.unregister_algs = qce_ablkcipher_unregister,
-	.async_req_handle = qce_ablkcipher_async_req_handle,
-};
diff --git a/drivers/crypto/qce/cipher.h b/drivers/crypto/qce/cipher.h
index 5cab8f0706a8..7770660bc853 100644
--- a/drivers/crypto/qce/cipher.h
+++ b/drivers/crypto/qce/cipher.h
@@ -45,12 +45,12 @@ struct qce_cipher_reqctx {
 	unsigned int cryptlen;
 };
 
-static inline struct qce_alg_template *to_cipher_tmpl(struct crypto_tfm *tfm)
+static inline struct qce_alg_template *to_cipher_tmpl(struct crypto_skcipher *tfm)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
-	return container_of(alg, struct qce_alg_template, alg.crypto);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	return container_of(alg, struct qce_alg_template, alg.skcipher);
 }
 
-extern const struct qce_algo_ops ablkcipher_ops;
+extern const struct qce_algo_ops skcipher_ops;
 
 #endif /* _CIPHER_H_ */
diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c
index 3fb510164326..629e7f34dc09 100644
--- a/drivers/crypto/qce/common.c
+++ b/drivers/crypto/qce/common.c
@@ -45,52 +45,56 @@ qce_clear_array(struct qce_device *qce, u32 offset, unsigned int len)
 		qce_write(qce, offset + i * sizeof(u32), 0);
 }
 
-static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size)
+static u32 qce_config_reg(struct qce_device *qce, int little)
 {
-	u32 cfg = 0;
+	u32 beats = (qce->burst_size >> 3) - 1;
+	u32 pipe_pair = qce->pipe_pair_id;
+	u32 config;
 
-	if (IS_AES(flags)) {
-		if (aes_key_size == AES_KEYSIZE_128)
-			cfg |= ENCR_KEY_SZ_AES128 << ENCR_KEY_SZ_SHIFT;
-		else if (aes_key_size == AES_KEYSIZE_256)
-			cfg |= ENCR_KEY_SZ_AES256 << ENCR_KEY_SZ_SHIFT;
-	}
+	config = (beats << REQ_SIZE_SHIFT) & REQ_SIZE_MASK;
+	config |= BIT(MASK_DOUT_INTR_SHIFT) | BIT(MASK_DIN_INTR_SHIFT) |
+		  BIT(MASK_OP_DONE_INTR_SHIFT) | BIT(MASK_ERR_INTR_SHIFT);
+	config |= (pipe_pair << PIPE_SET_SELECT_SHIFT) & PIPE_SET_SELECT_MASK;
+	config &= ~HIGH_SPD_EN_N_SHIFT;
 
-	if (IS_AES(flags))
-		cfg |= ENCR_ALG_AES << ENCR_ALG_SHIFT;
-	else if (IS_DES(flags) || IS_3DES(flags))
-		cfg |= ENCR_ALG_DES << ENCR_ALG_SHIFT;
+	if (little)
+		config |= BIT(LITTLE_ENDIAN_MODE_SHIFT);
 
-	if (IS_DES(flags))
-		cfg |= ENCR_KEY_SZ_DES << ENCR_KEY_SZ_SHIFT;
+	return config;
+}
 
-	if (IS_3DES(flags))
-		cfg |= ENCR_KEY_SZ_3DES << ENCR_KEY_SZ_SHIFT;
+void qce_cpu_to_be32p_array(__be32 *dst, const u8 *src, unsigned int len)
+{
+	__be32 *d = dst;
+	const u8 *s = src;
+	unsigned int n;
 
-	switch (flags & QCE_MODE_MASK) {
-	case QCE_MODE_ECB:
-		cfg |= ENCR_MODE_ECB << ENCR_MODE_SHIFT;
-		break;
-	case QCE_MODE_CBC:
-		cfg |= ENCR_MODE_CBC << ENCR_MODE_SHIFT;
-		break;
-	case QCE_MODE_CTR:
-		cfg |= ENCR_MODE_CTR << ENCR_MODE_SHIFT;
-		break;
-	case QCE_MODE_XTS:
-		cfg |= ENCR_MODE_XTS << ENCR_MODE_SHIFT;
-		break;
-	case QCE_MODE_CCM:
-		cfg |= ENCR_MODE_CCM << ENCR_MODE_SHIFT;
-		cfg |= LAST_CCM_XFR << LAST_CCM_SHIFT;
-		break;
-	default:
-		return ~0;
+	n = len / sizeof(u32);
+	for (; n > 0; n--) {
+		*d = cpu_to_be32p((const __u32 *) s);
+		s += sizeof(__u32);
+		d++;
 	}
+}
 
-	return cfg;
+static void qce_setup_config(struct qce_device *qce)
+{
+	u32 config;
+
+	/* get big endianness */
+	config = qce_config_reg(qce, 0);
+
+	/* clear status */
+	qce_write(qce, REG_STATUS, 0);
+	qce_write(qce, REG_CONFIG, config);
+}
+
+static inline void qce_crypto_go(struct qce_device *qce)
+{
+	qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT));
 }
 
+#ifdef CONFIG_CRYPTO_DEV_QCE_SHA
 static u32 qce_auth_cfg(unsigned long flags, u32 key_size)
 {
 	u32 cfg = 0;
@@ -137,88 +141,6 @@ static u32 qce_auth_cfg(unsigned long flags, u32 key_size)
 	return cfg;
 }
 
-static u32 qce_config_reg(struct qce_device *qce, int little)
-{
-	u32 beats = (qce->burst_size >> 3) - 1;
-	u32 pipe_pair = qce->pipe_pair_id;
-	u32 config;
-
-	config = (beats << REQ_SIZE_SHIFT) & REQ_SIZE_MASK;
-	config |= BIT(MASK_DOUT_INTR_SHIFT) | BIT(MASK_DIN_INTR_SHIFT) |
-		  BIT(MASK_OP_DONE_INTR_SHIFT) | BIT(MASK_ERR_INTR_SHIFT);
-	config |= (pipe_pair << PIPE_SET_SELECT_SHIFT) & PIPE_SET_SELECT_MASK;
-	config &= ~HIGH_SPD_EN_N_SHIFT;
-
-	if (little)
-		config |= BIT(LITTLE_ENDIAN_MODE_SHIFT);
-
-	return config;
-}
-
-void qce_cpu_to_be32p_array(__be32 *dst, const u8 *src, unsigned int len)
-{
-	__be32 *d = dst;
-	const u8 *s = src;
-	unsigned int n;
-
-	n = len / sizeof(u32);
-	for (; n > 0; n--) {
-		*d = cpu_to_be32p((const __u32 *) s);
-		s += sizeof(__u32);
-		d++;
-	}
-}
-
-static void qce_xts_swapiv(__be32 *dst, const u8 *src, unsigned int ivsize)
-{
-	u8 swap[QCE_AES_IV_LENGTH];
-	u32 i, j;
-
-	if (ivsize > QCE_AES_IV_LENGTH)
-		return;
-
-	memset(swap, 0, QCE_AES_IV_LENGTH);
-
-	for (i = (QCE_AES_IV_LENGTH - ivsize), j = ivsize - 1;
-	     i < QCE_AES_IV_LENGTH; i++, j--)
-		swap[i] = src[j];
-
-	qce_cpu_to_be32p_array(dst, swap, QCE_AES_IV_LENGTH);
-}
-
-static void qce_xtskey(struct qce_device *qce, const u8 *enckey,
-		       unsigned int enckeylen, unsigned int cryptlen)
-{
-	u32 xtskey[QCE_MAX_CIPHER_KEY_SIZE / sizeof(u32)] = {0};
-	unsigned int xtsklen = enckeylen / (2 * sizeof(u32));
-	unsigned int xtsdusize;
-
-	qce_cpu_to_be32p_array((__be32 *)xtskey, enckey + enckeylen / 2,
-			       enckeylen / 2);
-	qce_write_array(qce, REG_ENCR_XTS_KEY0, xtskey, xtsklen);
-
-	/* xts du size 512B */
-	xtsdusize = min_t(u32, QCE_SECTOR_SIZE, cryptlen);
-	qce_write(qce, REG_ENCR_XTS_DU_SIZE, xtsdusize);
-}
-
-static void qce_setup_config(struct qce_device *qce)
-{
-	u32 config;
-
-	/* get big endianness */
-	config = qce_config_reg(qce, 0);
-
-	/* clear status */
-	qce_write(qce, REG_STATUS, 0);
-	qce_write(qce, REG_CONFIG, config);
-}
-
-static inline void qce_crypto_go(struct qce_device *qce)
-{
-	qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT));
-}
-
 static int qce_setup_regs_ahash(struct crypto_async_request *async_req,
 				u32 totallen, u32 offset)
 {
@@ -303,14 +225,95 @@ go_proc:
 
 	return 0;
 }
+#endif
+
+#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER
+static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size)
+{
+	u32 cfg = 0;
+
+	if (IS_AES(flags)) {
+		if (aes_key_size == AES_KEYSIZE_128)
+			cfg |= ENCR_KEY_SZ_AES128 << ENCR_KEY_SZ_SHIFT;
+		else if (aes_key_size == AES_KEYSIZE_256)
+			cfg |= ENCR_KEY_SZ_AES256 << ENCR_KEY_SZ_SHIFT;
+	}
+
+	if (IS_AES(flags))
+		cfg |= ENCR_ALG_AES << ENCR_ALG_SHIFT;
+	else if (IS_DES(flags) || IS_3DES(flags))
+		cfg |= ENCR_ALG_DES << ENCR_ALG_SHIFT;
+
+	if (IS_DES(flags))
+		cfg |= ENCR_KEY_SZ_DES << ENCR_KEY_SZ_SHIFT;
+
+	if (IS_3DES(flags))
+		cfg |= ENCR_KEY_SZ_3DES << ENCR_KEY_SZ_SHIFT;
+
+	switch (flags & QCE_MODE_MASK) {
+	case QCE_MODE_ECB:
+		cfg |= ENCR_MODE_ECB << ENCR_MODE_SHIFT;
+		break;
+	case QCE_MODE_CBC:
+		cfg |= ENCR_MODE_CBC << ENCR_MODE_SHIFT;
+		break;
+	case QCE_MODE_CTR:
+		cfg |= ENCR_MODE_CTR << ENCR_MODE_SHIFT;
+		break;
+	case QCE_MODE_XTS:
+		cfg |= ENCR_MODE_XTS << ENCR_MODE_SHIFT;
+		break;
+	case QCE_MODE_CCM:
+		cfg |= ENCR_MODE_CCM << ENCR_MODE_SHIFT;
+		cfg |= LAST_CCM_XFR << LAST_CCM_SHIFT;
+		break;
+	default:
+		return ~0;
+	}
+
+	return cfg;
+}
+
+static void qce_xts_swapiv(__be32 *dst, const u8 *src, unsigned int ivsize)
+{
+	u8 swap[QCE_AES_IV_LENGTH];
+	u32 i, j;
+
+	if (ivsize > QCE_AES_IV_LENGTH)
+		return;
+
+	memset(swap, 0, QCE_AES_IV_LENGTH);
+
+	for (i = (QCE_AES_IV_LENGTH - ivsize), j = ivsize - 1;
+	     i < QCE_AES_IV_LENGTH; i++, j--)
+		swap[i] = src[j];
+
+	qce_cpu_to_be32p_array(dst, swap, QCE_AES_IV_LENGTH);
+}
+
+static void qce_xtskey(struct qce_device *qce, const u8 *enckey,
+		       unsigned int enckeylen, unsigned int cryptlen)
+{
+	u32 xtskey[QCE_MAX_CIPHER_KEY_SIZE / sizeof(u32)] = {0};
+	unsigned int xtsklen = enckeylen / (2 * sizeof(u32));
+	unsigned int xtsdusize;
+
+	qce_cpu_to_be32p_array((__be32 *)xtskey, enckey + enckeylen / 2,
+			       enckeylen / 2);
+	qce_write_array(qce, REG_ENCR_XTS_KEY0, xtskey, xtsklen);
+
+	/* xts du size 512B */
+	xtsdusize = min_t(u32, QCE_SECTOR_SIZE, cryptlen);
+	qce_write(qce, REG_ENCR_XTS_DU_SIZE, xtsdusize);
+}
 
-static int qce_setup_regs_ablkcipher(struct crypto_async_request *async_req,
+static int qce_setup_regs_skcipher(struct crypto_async_request *async_req,
 				     u32 totallen, u32 offset)
 {
-	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
-	struct qce_cipher_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct skcipher_request *req = skcipher_request_cast(async_req);
+	struct qce_cipher_reqctx *rctx = skcipher_request_ctx(req);
 	struct qce_cipher_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
-	struct qce_alg_template *tmpl = to_cipher_tmpl(async_req->tfm);
+	struct qce_alg_template *tmpl = to_cipher_tmpl(crypto_skcipher_reqtfm(req));
 	struct qce_device *qce = tmpl->qce;
 	__be32 enckey[QCE_MAX_CIPHER_KEY_SIZE / sizeof(__be32)] = {0};
 	__be32 enciv[QCE_MAX_IV_SIZE / sizeof(__be32)] = {0};
@@ -384,15 +387,20 @@ static int qce_setup_regs_ablkcipher(struct crypto_async_request *async_req,
 
 	return 0;
 }
+#endif
 
 int qce_start(struct crypto_async_request *async_req, u32 type, u32 totallen,
 	      u32 offset)
 {
 	switch (type) {
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		return qce_setup_regs_ablkcipher(async_req, totallen, offset);
+#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER
+	case CRYPTO_ALG_TYPE_SKCIPHER:
+		return qce_setup_regs_skcipher(async_req, totallen, offset);
+#endif
+#ifdef CONFIG_CRYPTO_DEV_QCE_SHA
 	case CRYPTO_ALG_TYPE_AHASH:
 		return qce_setup_regs_ahash(async_req, totallen, offset);
+#endif
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/crypto/qce/common.h b/drivers/crypto/qce/common.h
index 47fb523357ac..282d4317470d 100644
--- a/drivers/crypto/qce/common.h
+++ b/drivers/crypto/qce/common.h
@@ -10,6 +10,7 @@
 #include <linux/types.h>
 #include <crypto/aes.h>
 #include <crypto/hash.h>
+#include <crypto/internal/skcipher.h>
 
 /* key size in bytes */
 #define QCE_SHA_HMAC_KEY_SIZE		64
@@ -79,7 +80,7 @@ struct qce_alg_template {
 	unsigned long alg_flags;
 	const u32 *std_iv;
 	union {
-		struct crypto_alg crypto;
+		struct skcipher_alg skcipher;
 		struct ahash_alg ahash;
 	} alg;
 	struct qce_device *qce;
diff --git a/drivers/crypto/qce/core.c b/drivers/crypto/qce/core.c
index 08d4ce3bfddf..cb6d61eb7302 100644
--- a/drivers/crypto/qce/core.c
+++ b/drivers/crypto/qce/core.c
@@ -22,8 +22,12 @@
 #define QCE_QUEUE_LENGTH	1
 
 static const struct qce_algo_ops *qce_ops[] = {
-	&ablkcipher_ops,
+#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER
+	&skcipher_ops,
+#endif
+#ifdef CONFIG_CRYPTO_DEV_QCE_SHA
 	&ahash_ops,
+#endif
 };
 
 static void qce_unregister_algs(struct qce_device *qce)
diff --git a/drivers/crypto/qce/dma.c b/drivers/crypto/qce/dma.c
index 0984a719144d..7da893dc00e7 100644
--- a/drivers/crypto/qce/dma.c
+++ b/drivers/crypto/qce/dma.c
@@ -12,11 +12,11 @@ int qce_dma_request(struct device *dev, struct qce_dma_data *dma)
 {
 	int ret;
 
-	dma->txchan = dma_request_slave_channel_reason(dev, "tx");
+	dma->txchan = dma_request_chan(dev, "tx");
 	if (IS_ERR(dma->txchan))
 		return PTR_ERR(dma->txchan);
 
-	dma->rxchan = dma_request_slave_channel_reason(dev, "rx");
+	dma->rxchan = dma_request_chan(dev, "rx");
 	if (IS_ERR(dma->rxchan)) {
 		ret = PTR_ERR(dma->rxchan);
 		goto error_rx;
@@ -47,7 +47,8 @@ void qce_dma_release(struct qce_dma_data *dma)
 }
 
 struct scatterlist *
-qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl)
+qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl,
+		int max_ents)
 {
 	struct scatterlist *sg = sgt->sgl, *sg_last = NULL;
 
@@ -60,12 +61,13 @@ qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl)
 	if (!sg)
 		return ERR_PTR(-EINVAL);
 
-	while (new_sgl && sg) {
+	while (new_sgl && sg && max_ents) {
 		sg_set_page(sg, sg_page(new_sgl), new_sgl->length,
 			    new_sgl->offset);
 		sg_last = sg;
 		sg = sg_next(sg);
 		new_sgl = sg_next(new_sgl);
+		max_ents--;
 	}
 
 	return sg_last;
diff --git a/drivers/crypto/qce/dma.h b/drivers/crypto/qce/dma.h
index 1e25a9e0e6f8..ed25a0d9829e 100644
--- a/drivers/crypto/qce/dma.h
+++ b/drivers/crypto/qce/dma.h
@@ -42,6 +42,7 @@ int qce_dma_prep_sgs(struct qce_dma_data *dma, struct scatterlist *sg_in,
 void qce_dma_issue_pending(struct qce_dma_data *dma);
 int qce_dma_terminate_all(struct qce_dma_data *dma);
 struct scatterlist *
-qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add);
+qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add,
+		int max_ents);
 
 #endif /* _DMA_H_ */
diff --git a/drivers/crypto/qce/sha.c b/drivers/crypto/qce/sha.c
index 0853e74583ad..1ab62e7d5f3c 100644
--- a/drivers/crypto/qce/sha.c
+++ b/drivers/crypto/qce/sha.c
@@ -396,8 +396,6 @@ static int qce_ahash_hmac_setkey(struct crypto_ahash *tfm, const u8 *key,
 	ahash_request_set_crypt(req, &sg, ctx->authkey, keylen);
 
 	ret = crypto_wait_req(crypto_ahash_digest(req), &wait);
-	if (ret)
-		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 
 	kfree(buf);
 err_free_req:
@@ -495,7 +493,7 @@ static int qce_ahash_register_one(const struct qce_ahash_def *def,
 	base = &alg->halg.base;
 	base->cra_blocksize = def->blocksize;
 	base->cra_priority = 300;
-	base->cra_flags = CRYPTO_ALG_ASYNC;
+	base->cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
 	base->cra_ctxsize = sizeof(struct qce_sha_ctx);
 	base->cra_alignmask = 0;
 	base->cra_module = THIS_MODULE;
diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c
new file mode 100644
index 000000000000..4217b745f124
--- /dev/null
+++ b/drivers/crypto/qce/skcipher.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <crypto/aes.h>
+#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
+
+#include "cipher.h"
+
+static LIST_HEAD(skcipher_algs);
+
+static void qce_skcipher_done(void *data)
+{
+	struct crypto_async_request *async_req = data;
+	struct skcipher_request *req = skcipher_request_cast(async_req);
+	struct qce_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	struct qce_alg_template *tmpl = to_cipher_tmpl(crypto_skcipher_reqtfm(req));
+	struct qce_device *qce = tmpl->qce;
+	struct qce_result_dump *result_buf = qce->dma.result_buf;
+	enum dma_data_direction dir_src, dir_dst;
+	u32 status;
+	int error;
+	bool diff_dst;
+
+	diff_dst = (req->src != req->dst) ? true : false;
+	dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
+	dir_dst = diff_dst ? DMA_FROM_DEVICE : DMA_BIDIRECTIONAL;
+
+	error = qce_dma_terminate_all(&qce->dma);
+	if (error)
+		dev_dbg(qce->dev, "skcipher dma termination error (%d)\n",
+			error);
+
+	if (diff_dst)
+		dma_unmap_sg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src);
+	dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
+
+	sg_free_table(&rctx->dst_tbl);
+
+	error = qce_check_status(qce, &status);
+	if (error < 0)
+		dev_dbg(qce->dev, "skcipher operation error (%x)\n", status);
+
+	memcpy(rctx->iv, result_buf->encr_cntr_iv, rctx->ivsize);
+	qce->async_req_done(tmpl->qce, error);
+}
+
+static int
+qce_skcipher_async_req_handle(struct crypto_async_request *async_req)
+{
+	struct skcipher_request *req = skcipher_request_cast(async_req);
+	struct qce_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct qce_alg_template *tmpl = to_cipher_tmpl(crypto_skcipher_reqtfm(req));
+	struct qce_device *qce = tmpl->qce;
+	enum dma_data_direction dir_src, dir_dst;
+	struct scatterlist *sg;
+	bool diff_dst;
+	gfp_t gfp;
+	int ret;
+
+	rctx->iv = req->iv;
+	rctx->ivsize = crypto_skcipher_ivsize(skcipher);
+	rctx->cryptlen = req->cryptlen;
+
+	diff_dst = (req->src != req->dst) ? true : false;
+	dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
+	dir_dst = diff_dst ? DMA_FROM_DEVICE : DMA_BIDIRECTIONAL;
+
+	rctx->src_nents = sg_nents_for_len(req->src, req->cryptlen);
+	if (diff_dst)
+		rctx->dst_nents = sg_nents_for_len(req->dst, req->cryptlen);
+	else
+		rctx->dst_nents = rctx->src_nents;
+	if (rctx->src_nents < 0) {
+		dev_err(qce->dev, "Invalid numbers of src SG.\n");
+		return rctx->src_nents;
+	}
+	if (rctx->dst_nents < 0) {
+		dev_err(qce->dev, "Invalid numbers of dst SG.\n");
+		return -rctx->dst_nents;
+	}
+
+	rctx->dst_nents += 1;
+
+	gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+						GFP_KERNEL : GFP_ATOMIC;
+
+	ret = sg_alloc_table(&rctx->dst_tbl, rctx->dst_nents, gfp);
+	if (ret)
+		return ret;
+
+	sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ);
+
+	sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, rctx->dst_nents - 1);
+	if (IS_ERR(sg)) {
+		ret = PTR_ERR(sg);
+		goto error_free;
+	}
+
+	sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, 1);
+	if (IS_ERR(sg)) {
+		ret = PTR_ERR(sg);
+		goto error_free;
+	}
+
+	sg_mark_end(sg);
+	rctx->dst_sg = rctx->dst_tbl.sgl;
+
+	ret = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
+	if (ret < 0)
+		goto error_free;
+
+	if (diff_dst) {
+		ret = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src);
+		if (ret < 0)
+			goto error_unmap_dst;
+		rctx->src_sg = req->src;
+	} else {
+		rctx->src_sg = rctx->dst_sg;
+	}
+
+	ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, rctx->src_nents,
+			       rctx->dst_sg, rctx->dst_nents,
+			       qce_skcipher_done, async_req);
+	if (ret)
+		goto error_unmap_src;
+
+	qce_dma_issue_pending(&qce->dma);
+
+	ret = qce_start(async_req, tmpl->crypto_alg_type, req->cryptlen, 0);
+	if (ret)
+		goto error_terminate;
+
+	return 0;
+
+error_terminate:
+	qce_dma_terminate_all(&qce->dma);
+error_unmap_src:
+	if (diff_dst)
+		dma_unmap_sg(qce->dev, req->src, rctx->src_nents, dir_src);
+error_unmap_dst:
+	dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
+error_free:
+	sg_free_table(&rctx->dst_tbl);
+	return ret;
+}
+
+static int qce_skcipher_setkey(struct crypto_skcipher *ablk, const u8 *key,
+				 unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(ablk);
+	struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	unsigned long flags = to_cipher_tmpl(ablk)->alg_flags;
+	int ret;
+
+	if (!key || !keylen)
+		return -EINVAL;
+
+	switch (IS_XTS(flags) ? keylen >> 1 : keylen) {
+	case AES_KEYSIZE_128:
+	case AES_KEYSIZE_256:
+		break;
+	default:
+		goto fallback;
+	}
+
+	ctx->enc_keylen = keylen;
+	memcpy(ctx->enc_key, key, keylen);
+	return 0;
+fallback:
+	ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
+	if (!ret)
+		ctx->enc_keylen = keylen;
+	return ret;
+}
+
+static int qce_des_setkey(struct crypto_skcipher *ablk, const u8 *key,
+			  unsigned int keylen)
+{
+	struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(ablk);
+	int err;
+
+	err = verify_skcipher_des_key(ablk, key);
+	if (err)
+		return err;
+
+	ctx->enc_keylen = keylen;
+	memcpy(ctx->enc_key, key, keylen);
+	return 0;
+}
+
+static int qce_des3_setkey(struct crypto_skcipher *ablk, const u8 *key,
+			   unsigned int keylen)
+{
+	struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(ablk);
+	int err;
+
+	err = verify_skcipher_des3_key(ablk, key);
+	if (err)
+		return err;
+
+	ctx->enc_keylen = keylen;
+	memcpy(ctx->enc_key, key, keylen);
+	return 0;
+}
+
+static int qce_skcipher_crypt(struct skcipher_request *req, int encrypt)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct qce_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	struct qce_alg_template *tmpl = to_cipher_tmpl(tfm);
+	int keylen;
+	int ret;
+
+	rctx->flags = tmpl->alg_flags;
+	rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT;
+	keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen;
+
+	if (IS_AES(rctx->flags) && keylen != AES_KEYSIZE_128 &&
+	    keylen != AES_KEYSIZE_256) {
+		SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+
+		skcipher_request_set_sync_tfm(subreq, ctx->fallback);
+		skcipher_request_set_callback(subreq, req->base.flags,
+					      NULL, NULL);
+		skcipher_request_set_crypt(subreq, req->src, req->dst,
+					   req->cryptlen, req->iv);
+		ret = encrypt ? crypto_skcipher_encrypt(subreq) :
+				crypto_skcipher_decrypt(subreq);
+		skcipher_request_zero(subreq);
+		return ret;
+	}
+
+	return tmpl->qce->async_req_enqueue(tmpl->qce, &req->base);
+}
+
+static int qce_skcipher_encrypt(struct skcipher_request *req)
+{
+	return qce_skcipher_crypt(req, 1);
+}
+
+static int qce_skcipher_decrypt(struct skcipher_request *req)
+{
+	return qce_skcipher_crypt(req, 0);
+}
+
+static int qce_skcipher_init(struct crypto_skcipher *tfm)
+{
+	struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct qce_cipher_reqctx));
+	return 0;
+}
+
+static int qce_skcipher_init_fallback(struct crypto_skcipher *tfm)
+{
+	struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	qce_skcipher_init(tfm);
+	ctx->fallback = crypto_alloc_sync_skcipher(crypto_tfm_alg_name(&tfm->base),
+						   0, CRYPTO_ALG_NEED_FALLBACK);
+	return PTR_ERR_OR_ZERO(ctx->fallback);
+}
+
+static void qce_skcipher_exit(struct crypto_skcipher *tfm)
+{
+	struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_sync_skcipher(ctx->fallback);
+}
+
+struct qce_skcipher_def {
+	unsigned long flags;
+	const char *name;
+	const char *drv_name;
+	unsigned int blocksize;
+	unsigned int chunksize;
+	unsigned int ivsize;
+	unsigned int min_keysize;
+	unsigned int max_keysize;
+};
+
+static const struct qce_skcipher_def skcipher_def[] = {
+	{
+		.flags		= QCE_ALG_AES | QCE_MODE_ECB,
+		.name		= "ecb(aes)",
+		.drv_name	= "ecb-aes-qce",
+		.blocksize	= AES_BLOCK_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+	},
+	{
+		.flags		= QCE_ALG_AES | QCE_MODE_CBC,
+		.name		= "cbc(aes)",
+		.drv_name	= "cbc-aes-qce",
+		.blocksize	= AES_BLOCK_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+	},
+	{
+		.flags		= QCE_ALG_AES | QCE_MODE_CTR,
+		.name		= "ctr(aes)",
+		.drv_name	= "ctr-aes-qce",
+		.blocksize	= 1,
+		.chunksize	= AES_BLOCK_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+	},
+	{
+		.flags		= QCE_ALG_AES | QCE_MODE_XTS,
+		.name		= "xts(aes)",
+		.drv_name	= "xts-aes-qce",
+		.blocksize	= AES_BLOCK_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.min_keysize	= AES_MIN_KEY_SIZE * 2,
+		.max_keysize	= AES_MAX_KEY_SIZE * 2,
+	},
+	{
+		.flags		= QCE_ALG_DES | QCE_MODE_ECB,
+		.name		= "ecb(des)",
+		.drv_name	= "ecb-des-qce",
+		.blocksize	= DES_BLOCK_SIZE,
+		.ivsize		= 0,
+		.min_keysize	= DES_KEY_SIZE,
+		.max_keysize	= DES_KEY_SIZE,
+	},
+	{
+		.flags		= QCE_ALG_DES | QCE_MODE_CBC,
+		.name		= "cbc(des)",
+		.drv_name	= "cbc-des-qce",
+		.blocksize	= DES_BLOCK_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.min_keysize	= DES_KEY_SIZE,
+		.max_keysize	= DES_KEY_SIZE,
+	},
+	{
+		.flags		= QCE_ALG_3DES | QCE_MODE_ECB,
+		.name		= "ecb(des3_ede)",
+		.drv_name	= "ecb-3des-qce",
+		.blocksize	= DES3_EDE_BLOCK_SIZE,
+		.ivsize		= 0,
+		.min_keysize	= DES3_EDE_KEY_SIZE,
+		.max_keysize	= DES3_EDE_KEY_SIZE,
+	},
+	{
+		.flags		= QCE_ALG_3DES | QCE_MODE_CBC,
+		.name		= "cbc(des3_ede)",
+		.drv_name	= "cbc-3des-qce",
+		.blocksize	= DES3_EDE_BLOCK_SIZE,
+		.ivsize		= DES3_EDE_BLOCK_SIZE,
+		.min_keysize	= DES3_EDE_KEY_SIZE,
+		.max_keysize	= DES3_EDE_KEY_SIZE,
+	},
+};
+
+static int qce_skcipher_register_one(const struct qce_skcipher_def *def,
+				       struct qce_device *qce)
+{
+	struct qce_alg_template *tmpl;
+	struct skcipher_alg *alg;
+	int ret;
+
+	tmpl = kzalloc(sizeof(*tmpl), GFP_KERNEL);
+	if (!tmpl)
+		return -ENOMEM;
+
+	alg = &tmpl->alg.skcipher;
+
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->drv_name);
+
+	alg->base.cra_blocksize		= def->blocksize;
+	alg->chunksize			= def->chunksize;
+	alg->ivsize			= def->ivsize;
+	alg->min_keysize		= def->min_keysize;
+	alg->max_keysize		= def->max_keysize;
+	alg->setkey			= IS_3DES(def->flags) ? qce_des3_setkey :
+					  IS_DES(def->flags) ? qce_des_setkey :
+					  qce_skcipher_setkey;
+	alg->encrypt			= qce_skcipher_encrypt;
+	alg->decrypt			= qce_skcipher_decrypt;
+
+	alg->base.cra_priority		= 300;
+	alg->base.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_KERN_DRIVER_ONLY;
+	alg->base.cra_ctxsize		= sizeof(struct qce_cipher_ctx);
+	alg->base.cra_alignmask		= 0;
+	alg->base.cra_module		= THIS_MODULE;
+
+	if (IS_AES(def->flags)) {
+		alg->base.cra_flags    |= CRYPTO_ALG_NEED_FALLBACK;
+		alg->init		= qce_skcipher_init_fallback;
+		alg->exit		= qce_skcipher_exit;
+	} else {
+		alg->init		= qce_skcipher_init;
+	}
+
+	INIT_LIST_HEAD(&tmpl->entry);
+	tmpl->crypto_alg_type = CRYPTO_ALG_TYPE_SKCIPHER;
+	tmpl->alg_flags = def->flags;
+	tmpl->qce = qce;
+
+	ret = crypto_register_skcipher(alg);
+	if (ret) {
+		kfree(tmpl);
+		dev_err(qce->dev, "%s registration failed\n", alg->base.cra_name);
+		return ret;
+	}
+
+	list_add_tail(&tmpl->entry, &skcipher_algs);
+	dev_dbg(qce->dev, "%s is registered\n", alg->base.cra_name);
+	return 0;
+}
+
+static void qce_skcipher_unregister(struct qce_device *qce)
+{
+	struct qce_alg_template *tmpl, *n;
+
+	list_for_each_entry_safe(tmpl, n, &skcipher_algs, entry) {
+		crypto_unregister_skcipher(&tmpl->alg.skcipher);
+		list_del(&tmpl->entry);
+		kfree(tmpl);
+	}
+}
+
+static int qce_skcipher_register(struct qce_device *qce)
+{
+	int ret, i;
+
+	for (i = 0; i < ARRAY_SIZE(skcipher_def); i++) {
+		ret = qce_skcipher_register_one(&skcipher_def[i], qce);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+err:
+	qce_skcipher_unregister(qce);
+	return ret;
+}
+
+const struct qce_algo_ops skcipher_ops = {
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.register_algs = qce_skcipher_register,
+	.unregister_algs = qce_skcipher_unregister,
+	.async_req_handle = qce_skcipher_async_req_handle,
+};
diff --git a/drivers/crypto/rockchip/Makefile b/drivers/crypto/rockchip/Makefile
index 6e23764e6c8a..785277aca71e 100644
--- a/drivers/crypto/rockchip/Makefile
+++ b/drivers/crypto/rockchip/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rk_crypto.o
 rk_crypto-objs := rk3288_crypto.o \
-		  rk3288_crypto_ablkcipher.o \
+		  rk3288_crypto_skcipher.o \
 		  rk3288_crypto_ahash.o
diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index e5714ef24bf2..f385587f99af 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -264,8 +264,8 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info)
 	for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) {
 		rk_cipher_algs[i]->dev = crypto_info;
 		if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER)
-			err = crypto_register_alg(
-					&rk_cipher_algs[i]->alg.crypto);
+			err = crypto_register_skcipher(
+					&rk_cipher_algs[i]->alg.skcipher);
 		else
 			err = crypto_register_ahash(
 					&rk_cipher_algs[i]->alg.hash);
@@ -277,7 +277,7 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info)
 err_cipher_algs:
 	for (k = 0; k < i; k++) {
 		if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER)
-			crypto_unregister_alg(&rk_cipher_algs[k]->alg.crypto);
+			crypto_unregister_skcipher(&rk_cipher_algs[k]->alg.skcipher);
 		else
 			crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash);
 	}
@@ -290,7 +290,7 @@ static void rk_crypto_unregister(void)
 
 	for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) {
 		if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER)
-			crypto_unregister_alg(&rk_cipher_algs[i]->alg.crypto);
+			crypto_unregister_skcipher(&rk_cipher_algs[i]->alg.skcipher);
 		else
 			crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash);
 	}
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 18e2b3f29336..2b49c677afdb 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -8,6 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
 
 #include <crypto/md5.h>
 #include <crypto/sha.h>
@@ -256,7 +257,7 @@ enum alg_type {
 struct rk_crypto_tmp {
 	struct rk_crypto_info		*dev;
 	union {
-		struct crypto_alg	crypto;
+		struct skcipher_alg	skcipher;
 		struct ahash_alg	hash;
 	} alg;
 	enum alg_type			type;
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
deleted file mode 100644
index d0f4b2d18059..000000000000
--- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
+++ /dev/null
@@ -1,556 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Crypto acceleration support for Rockchip RK3288
- *
- * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd
- *
- * Author: Zain Wang <zain.wang@rock-chips.com>
- *
- * Some ideas are from marvell-cesa.c and s5p-sss.c driver.
- */
-#include "rk3288_crypto.h"
-
-#define RK_CRYPTO_DEC			BIT(0)
-
-static void rk_crypto_complete(struct crypto_async_request *base, int err)
-{
-	if (base->complete)
-		base->complete(base, err);
-}
-
-static int rk_handle_req(struct rk_crypto_info *dev,
-			 struct ablkcipher_request *req)
-{
-	if (!IS_ALIGNED(req->nbytes, dev->align_size))
-		return -EINVAL;
-	else
-		return dev->enqueue(dev, &req->base);
-}
-
-static int rk_aes_setkey(struct crypto_ablkcipher *cipher,
-			 const u8 *key, unsigned int keylen)
-{
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct rk_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
-	    keylen != AES_KEYSIZE_256) {
-		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-	ctx->keylen = keylen;
-	memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, key, keylen);
-	return 0;
-}
-
-static int rk_des_setkey(struct crypto_ablkcipher *cipher,
-			 const u8 *key, unsigned int keylen)
-{
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	int err;
-
-	err = verify_ablkcipher_des_key(cipher, key);
-	if (err)
-		return err;
-
-	ctx->keylen = keylen;
-	memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
-	return 0;
-}
-
-static int rk_tdes_setkey(struct crypto_ablkcipher *cipher,
-			  const u8 *key, unsigned int keylen)
-{
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	int err;
-
-	err = verify_ablkcipher_des3_key(cipher, key);
-	if (err)
-		return err;
-
-	ctx->keylen = keylen;
-	memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
-	return 0;
-}
-
-static int rk_aes_ecb_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = RK_CRYPTO_AES_ECB_MODE;
-	return rk_handle_req(dev, req);
-}
-
-static int rk_aes_ecb_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
-}
-
-static int rk_aes_cbc_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = RK_CRYPTO_AES_CBC_MODE;
-	return rk_handle_req(dev, req);
-}
-
-static int rk_aes_cbc_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
-}
-
-static int rk_des_ecb_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = 0;
-	return rk_handle_req(dev, req);
-}
-
-static int rk_des_ecb_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
-}
-
-static int rk_des_cbc_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC;
-	return rk_handle_req(dev, req);
-}
-
-static int rk_des_cbc_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
-}
-
-static int rk_des3_ede_ecb_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = RK_CRYPTO_TDES_SELECT;
-	return rk_handle_req(dev, req);
-}
-
-static int rk_des3_ede_ecb_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
-}
-
-static int rk_des3_ede_cbc_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC;
-	return rk_handle_req(dev, req);
-}
-
-static int rk_des3_ede_cbc_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct rk_crypto_info *dev = ctx->dev;
-
-	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC |
-		    RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
-}
-
-static void rk_ablk_hw_init(struct rk_crypto_info *dev)
-{
-	struct ablkcipher_request *req =
-		ablkcipher_request_cast(dev->async_req);
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req);
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	u32 ivsize, block, conf_reg = 0;
-
-	block = crypto_tfm_alg_blocksize(tfm);
-	ivsize = crypto_ablkcipher_ivsize(cipher);
-
-	if (block == DES_BLOCK_SIZE) {
-		ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE |
-			     RK_CRYPTO_TDES_BYTESWAP_KEY |
-			     RK_CRYPTO_TDES_BYTESWAP_IV;
-		CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, ctx->mode);
-		memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->info, ivsize);
-		conf_reg = RK_CRYPTO_DESSEL;
-	} else {
-		ctx->mode |= RK_CRYPTO_AES_FIFO_MODE |
-			     RK_CRYPTO_AES_KEY_CHANGE |
-			     RK_CRYPTO_AES_BYTESWAP_KEY |
-			     RK_CRYPTO_AES_BYTESWAP_IV;
-		if (ctx->keylen == AES_KEYSIZE_192)
-			ctx->mode |= RK_CRYPTO_AES_192BIT_key;
-		else if (ctx->keylen == AES_KEYSIZE_256)
-			ctx->mode |= RK_CRYPTO_AES_256BIT_key;
-		CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, ctx->mode);
-		memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->info, ivsize);
-	}
-	conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO |
-		    RK_CRYPTO_BYTESWAP_BRFIFO;
-	CRYPTO_WRITE(dev, RK_CRYPTO_CONF, conf_reg);
-	CRYPTO_WRITE(dev, RK_CRYPTO_INTENA,
-		     RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA);
-}
-
-static void crypto_dma_start(struct rk_crypto_info *dev)
-{
-	CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, dev->addr_in);
-	CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, dev->count / 4);
-	CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, dev->addr_out);
-	CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START |
-		     _SBF(RK_CRYPTO_BLOCK_START, 16));
-}
-
-static int rk_set_data_start(struct rk_crypto_info *dev)
-{
-	int err;
-	struct ablkcipher_request *req =
-		ablkcipher_request_cast(dev->async_req);
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	u32 ivsize = crypto_ablkcipher_ivsize(tfm);
-	u8 *src_last_blk = page_address(sg_page(dev->sg_src)) +
-		dev->sg_src->offset + dev->sg_src->length - ivsize;
-
-	/* Store the iv that need to be updated in chain mode.
-	 * And update the IV buffer to contain the next IV for decryption mode.
-	 */
-	if (ctx->mode & RK_CRYPTO_DEC) {
-		memcpy(ctx->iv, src_last_blk, ivsize);
-		sg_pcopy_to_buffer(dev->first, dev->src_nents, req->info,
-				   ivsize, dev->total - ivsize);
-	}
-
-	err = dev->load_data(dev, dev->sg_src, dev->sg_dst);
-	if (!err)
-		crypto_dma_start(dev);
-	return err;
-}
-
-static int rk_ablk_start(struct rk_crypto_info *dev)
-{
-	struct ablkcipher_request *req =
-		ablkcipher_request_cast(dev->async_req);
-	unsigned long flags;
-	int err = 0;
-
-	dev->left_bytes = req->nbytes;
-	dev->total = req->nbytes;
-	dev->sg_src = req->src;
-	dev->first = req->src;
-	dev->src_nents = sg_nents(req->src);
-	dev->sg_dst = req->dst;
-	dev->dst_nents = sg_nents(req->dst);
-	dev->aligned = 1;
-
-	spin_lock_irqsave(&dev->lock, flags);
-	rk_ablk_hw_init(dev);
-	err = rk_set_data_start(dev);
-	spin_unlock_irqrestore(&dev->lock, flags);
-	return err;
-}
-
-static void rk_iv_copyback(struct rk_crypto_info *dev)
-{
-	struct ablkcipher_request *req =
-		ablkcipher_request_cast(dev->async_req);
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	u32 ivsize = crypto_ablkcipher_ivsize(tfm);
-
-	/* Update the IV buffer to contain the next IV for encryption mode. */
-	if (!(ctx->mode & RK_CRYPTO_DEC)) {
-		if (dev->aligned) {
-			memcpy(req->info, sg_virt(dev->sg_dst) +
-				dev->sg_dst->length - ivsize, ivsize);
-		} else {
-			memcpy(req->info, dev->addr_vir +
-				dev->count - ivsize, ivsize);
-		}
-	}
-}
-
-static void rk_update_iv(struct rk_crypto_info *dev)
-{
-	struct ablkcipher_request *req =
-		ablkcipher_request_cast(dev->async_req);
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	u32 ivsize = crypto_ablkcipher_ivsize(tfm);
-	u8 *new_iv = NULL;
-
-	if (ctx->mode & RK_CRYPTO_DEC) {
-		new_iv = ctx->iv;
-	} else {
-		new_iv = page_address(sg_page(dev->sg_dst)) +
-			 dev->sg_dst->offset + dev->sg_dst->length - ivsize;
-	}
-
-	if (ivsize == DES_BLOCK_SIZE)
-		memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, new_iv, ivsize);
-	else if (ivsize == AES_BLOCK_SIZE)
-		memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, new_iv, ivsize);
-}
-
-/* return:
- *	true	some err was occurred
- *	fault	no err, continue
- */
-static int rk_ablk_rx(struct rk_crypto_info *dev)
-{
-	int err = 0;
-	struct ablkcipher_request *req =
-		ablkcipher_request_cast(dev->async_req);
-
-	dev->unload_data(dev);
-	if (!dev->aligned) {
-		if (!sg_pcopy_from_buffer(req->dst, dev->dst_nents,
-					  dev->addr_vir, dev->count,
-					  dev->total - dev->left_bytes -
-					  dev->count)) {
-			err = -EINVAL;
-			goto out_rx;
-		}
-	}
-	if (dev->left_bytes) {
-		rk_update_iv(dev);
-		if (dev->aligned) {
-			if (sg_is_last(dev->sg_src)) {
-				dev_err(dev->dev, "[%s:%d] Lack of data\n",
-					__func__, __LINE__);
-				err = -ENOMEM;
-				goto out_rx;
-			}
-			dev->sg_src = sg_next(dev->sg_src);
-			dev->sg_dst = sg_next(dev->sg_dst);
-		}
-		err = rk_set_data_start(dev);
-	} else {
-		rk_iv_copyback(dev);
-		/* here show the calculation is over without any err */
-		dev->complete(dev->async_req, 0);
-		tasklet_schedule(&dev->queue_task);
-	}
-out_rx:
-	return err;
-}
-
-static int rk_ablk_cra_init(struct crypto_tfm *tfm)
-{
-	struct rk_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_alg *alg = tfm->__crt_alg;
-	struct rk_crypto_tmp *algt;
-
-	algt = container_of(alg, struct rk_crypto_tmp, alg.crypto);
-
-	ctx->dev = algt->dev;
-	ctx->dev->align_size = crypto_tfm_alg_alignmask(tfm) + 1;
-	ctx->dev->start = rk_ablk_start;
-	ctx->dev->update = rk_ablk_rx;
-	ctx->dev->complete = rk_crypto_complete;
-	ctx->dev->addr_vir = (char *)__get_free_page(GFP_KERNEL);
-
-	return ctx->dev->addr_vir ? ctx->dev->enable_clk(ctx->dev) : -ENOMEM;
-}
-
-static void rk_ablk_cra_exit(struct crypto_tfm *tfm)
-{
-	struct rk_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	free_page((unsigned long)ctx->dev->addr_vir);
-	ctx->dev->disable_clk(ctx->dev);
-}
-
-struct rk_crypto_tmp rk_ecb_aes_alg = {
-	.type = ALG_TYPE_CIPHER,
-	.alg.crypto = {
-		.cra_name		= "ecb(aes)",
-		.cra_driver_name	= "ecb-aes-rk",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-					  CRYPTO_ALG_ASYNC,
-		.cra_blocksize		= AES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct rk_cipher_ctx),
-		.cra_alignmask		= 0x0f,
-		.cra_type		= &crypto_ablkcipher_type,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= rk_ablk_cra_init,
-		.cra_exit		= rk_ablk_cra_exit,
-		.cra_u.ablkcipher	= {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.setkey		= rk_aes_setkey,
-			.encrypt	= rk_aes_ecb_encrypt,
-			.decrypt	= rk_aes_ecb_decrypt,
-		}
-	}
-};
-
-struct rk_crypto_tmp rk_cbc_aes_alg = {
-	.type = ALG_TYPE_CIPHER,
-	.alg.crypto = {
-		.cra_name		= "cbc(aes)",
-		.cra_driver_name	= "cbc-aes-rk",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-					  CRYPTO_ALG_ASYNC,
-		.cra_blocksize		= AES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct rk_cipher_ctx),
-		.cra_alignmask		= 0x0f,
-		.cra_type		= &crypto_ablkcipher_type,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= rk_ablk_cra_init,
-		.cra_exit		= rk_ablk_cra_exit,
-		.cra_u.ablkcipher	= {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= rk_aes_setkey,
-			.encrypt	= rk_aes_cbc_encrypt,
-			.decrypt	= rk_aes_cbc_decrypt,
-		}
-	}
-};
-
-struct rk_crypto_tmp rk_ecb_des_alg = {
-	.type = ALG_TYPE_CIPHER,
-	.alg.crypto = {
-		.cra_name		= "ecb(des)",
-		.cra_driver_name	= "ecb-des-rk",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-					  CRYPTO_ALG_ASYNC,
-		.cra_blocksize		= DES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct rk_cipher_ctx),
-		.cra_alignmask		= 0x07,
-		.cra_type		= &crypto_ablkcipher_type,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= rk_ablk_cra_init,
-		.cra_exit		= rk_ablk_cra_exit,
-		.cra_u.ablkcipher	= {
-			.min_keysize	= DES_KEY_SIZE,
-			.max_keysize	= DES_KEY_SIZE,
-			.setkey		= rk_des_setkey,
-			.encrypt	= rk_des_ecb_encrypt,
-			.decrypt	= rk_des_ecb_decrypt,
-		}
-	}
-};
-
-struct rk_crypto_tmp rk_cbc_des_alg = {
-	.type = ALG_TYPE_CIPHER,
-	.alg.crypto = {
-		.cra_name		= "cbc(des)",
-		.cra_driver_name	= "cbc-des-rk",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-					  CRYPTO_ALG_ASYNC,
-		.cra_blocksize		= DES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct rk_cipher_ctx),
-		.cra_alignmask		= 0x07,
-		.cra_type		= &crypto_ablkcipher_type,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= rk_ablk_cra_init,
-		.cra_exit		= rk_ablk_cra_exit,
-		.cra_u.ablkcipher	= {
-			.min_keysize	= DES_KEY_SIZE,
-			.max_keysize	= DES_KEY_SIZE,
-			.ivsize		= DES_BLOCK_SIZE,
-			.setkey		= rk_des_setkey,
-			.encrypt	= rk_des_cbc_encrypt,
-			.decrypt	= rk_des_cbc_decrypt,
-		}
-	}
-};
-
-struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
-	.type = ALG_TYPE_CIPHER,
-	.alg.crypto = {
-		.cra_name		= "ecb(des3_ede)",
-		.cra_driver_name	= "ecb-des3-ede-rk",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-					  CRYPTO_ALG_ASYNC,
-		.cra_blocksize		= DES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct rk_cipher_ctx),
-		.cra_alignmask		= 0x07,
-		.cra_type		= &crypto_ablkcipher_type,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= rk_ablk_cra_init,
-		.cra_exit		= rk_ablk_cra_exit,
-		.cra_u.ablkcipher	= {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.ivsize		= DES_BLOCK_SIZE,
-			.setkey		= rk_tdes_setkey,
-			.encrypt	= rk_des3_ede_ecb_encrypt,
-			.decrypt	= rk_des3_ede_ecb_decrypt,
-		}
-	}
-};
-
-struct rk_crypto_tmp rk_cbc_des3_ede_alg = {
-	.type = ALG_TYPE_CIPHER,
-	.alg.crypto = {
-		.cra_name		= "cbc(des3_ede)",
-		.cra_driver_name	= "cbc-des3-ede-rk",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-					  CRYPTO_ALG_ASYNC,
-		.cra_blocksize		= DES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct rk_cipher_ctx),
-		.cra_alignmask		= 0x07,
-		.cra_type		= &crypto_ablkcipher_type,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= rk_ablk_cra_init,
-		.cra_exit		= rk_ablk_cra_exit,
-		.cra_u.ablkcipher	= {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.ivsize		= DES_BLOCK_SIZE,
-			.setkey		= rk_tdes_setkey,
-			.encrypt	= rk_des3_ede_cbc_encrypt,
-			.decrypt	= rk_des3_ede_cbc_decrypt,
-		}
-	}
-};
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
new file mode 100644
index 000000000000..4a75c8e1fa6c
--- /dev/null
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -0,0 +1,536 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Crypto acceleration support for Rockchip RK3288
+ *
+ * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * Author: Zain Wang <zain.wang@rock-chips.com>
+ *
+ * Some ideas are from marvell-cesa.c and s5p-sss.c driver.
+ */
+#include "rk3288_crypto.h"
+
+#define RK_CRYPTO_DEC			BIT(0)
+
+static void rk_crypto_complete(struct crypto_async_request *base, int err)
+{
+	if (base->complete)
+		base->complete(base, err);
+}
+
+static int rk_handle_req(struct rk_crypto_info *dev,
+			 struct skcipher_request *req)
+{
+	if (!IS_ALIGNED(req->cryptlen, dev->align_size))
+		return -EINVAL;
+	else
+		return dev->enqueue(dev, &req->base);
+}
+
+static int rk_aes_setkey(struct crypto_skcipher *cipher,
+			 const u8 *key, unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
+	struct rk_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256)
+		return -EINVAL;
+	ctx->keylen = keylen;
+	memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, key, keylen);
+	return 0;
+}
+
+static int rk_des_setkey(struct crypto_skcipher *cipher,
+			 const u8 *key, unsigned int keylen)
+{
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	int err;
+
+	err = verify_skcipher_des_key(cipher, key);
+	if (err)
+		return err;
+
+	ctx->keylen = keylen;
+	memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
+	return 0;
+}
+
+static int rk_tdes_setkey(struct crypto_skcipher *cipher,
+			  const u8 *key, unsigned int keylen)
+{
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	int err;
+
+	err = verify_skcipher_des3_key(cipher, key);
+	if (err)
+		return err;
+
+	ctx->keylen = keylen;
+	memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
+	return 0;
+}
+
+static int rk_aes_ecb_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = RK_CRYPTO_AES_ECB_MODE;
+	return rk_handle_req(dev, req);
+}
+
+static int rk_aes_ecb_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC;
+	return rk_handle_req(dev, req);
+}
+
+static int rk_aes_cbc_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = RK_CRYPTO_AES_CBC_MODE;
+	return rk_handle_req(dev, req);
+}
+
+static int rk_aes_cbc_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC;
+	return rk_handle_req(dev, req);
+}
+
+static int rk_des_ecb_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = 0;
+	return rk_handle_req(dev, req);
+}
+
+static int rk_des_ecb_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = RK_CRYPTO_DEC;
+	return rk_handle_req(dev, req);
+}
+
+static int rk_des_cbc_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC;
+	return rk_handle_req(dev, req);
+}
+
+static int rk_des_cbc_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC;
+	return rk_handle_req(dev, req);
+}
+
+static int rk_des3_ede_ecb_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = RK_CRYPTO_TDES_SELECT;
+	return rk_handle_req(dev, req);
+}
+
+static int rk_des3_ede_ecb_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC;
+	return rk_handle_req(dev, req);
+}
+
+static int rk_des3_ede_cbc_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC;
+	return rk_handle_req(dev, req);
+}
+
+static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *dev = ctx->dev;
+
+	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC |
+		    RK_CRYPTO_DEC;
+	return rk_handle_req(dev, req);
+}
+
+static void rk_ablk_hw_init(struct rk_crypto_info *dev)
+{
+	struct skcipher_request *req =
+		skcipher_request_cast(dev->async_req);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	u32 ivsize, block, conf_reg = 0;
+
+	block = crypto_tfm_alg_blocksize(tfm);
+	ivsize = crypto_skcipher_ivsize(cipher);
+
+	if (block == DES_BLOCK_SIZE) {
+		ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE |
+			     RK_CRYPTO_TDES_BYTESWAP_KEY |
+			     RK_CRYPTO_TDES_BYTESWAP_IV;
+		CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, ctx->mode);
+		memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->iv, ivsize);
+		conf_reg = RK_CRYPTO_DESSEL;
+	} else {
+		ctx->mode |= RK_CRYPTO_AES_FIFO_MODE |
+			     RK_CRYPTO_AES_KEY_CHANGE |
+			     RK_CRYPTO_AES_BYTESWAP_KEY |
+			     RK_CRYPTO_AES_BYTESWAP_IV;
+		if (ctx->keylen == AES_KEYSIZE_192)
+			ctx->mode |= RK_CRYPTO_AES_192BIT_key;
+		else if (ctx->keylen == AES_KEYSIZE_256)
+			ctx->mode |= RK_CRYPTO_AES_256BIT_key;
+		CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, ctx->mode);
+		memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->iv, ivsize);
+	}
+	conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO |
+		    RK_CRYPTO_BYTESWAP_BRFIFO;
+	CRYPTO_WRITE(dev, RK_CRYPTO_CONF, conf_reg);
+	CRYPTO_WRITE(dev, RK_CRYPTO_INTENA,
+		     RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA);
+}
+
+static void crypto_dma_start(struct rk_crypto_info *dev)
+{
+	CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, dev->addr_in);
+	CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, dev->count / 4);
+	CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, dev->addr_out);
+	CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START |
+		     _SBF(RK_CRYPTO_BLOCK_START, 16));
+}
+
+static int rk_set_data_start(struct rk_crypto_info *dev)
+{
+	int err;
+	struct skcipher_request *req =
+		skcipher_request_cast(dev->async_req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 ivsize = crypto_skcipher_ivsize(tfm);
+	u8 *src_last_blk = page_address(sg_page(dev->sg_src)) +
+		dev->sg_src->offset + dev->sg_src->length - ivsize;
+
+	/* Store the iv that need to be updated in chain mode.
+	 * And update the IV buffer to contain the next IV for decryption mode.
+	 */
+	if (ctx->mode & RK_CRYPTO_DEC) {
+		memcpy(ctx->iv, src_last_blk, ivsize);
+		sg_pcopy_to_buffer(dev->first, dev->src_nents, req->iv,
+				   ivsize, dev->total - ivsize);
+	}
+
+	err = dev->load_data(dev, dev->sg_src, dev->sg_dst);
+	if (!err)
+		crypto_dma_start(dev);
+	return err;
+}
+
+static int rk_ablk_start(struct rk_crypto_info *dev)
+{
+	struct skcipher_request *req =
+		skcipher_request_cast(dev->async_req);
+	unsigned long flags;
+	int err = 0;
+
+	dev->left_bytes = req->cryptlen;
+	dev->total = req->cryptlen;
+	dev->sg_src = req->src;
+	dev->first = req->src;
+	dev->src_nents = sg_nents(req->src);
+	dev->sg_dst = req->dst;
+	dev->dst_nents = sg_nents(req->dst);
+	dev->aligned = 1;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	rk_ablk_hw_init(dev);
+	err = rk_set_data_start(dev);
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return err;
+}
+
+static void rk_iv_copyback(struct rk_crypto_info *dev)
+{
+	struct skcipher_request *req =
+		skcipher_request_cast(dev->async_req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 ivsize = crypto_skcipher_ivsize(tfm);
+
+	/* Update the IV buffer to contain the next IV for encryption mode. */
+	if (!(ctx->mode & RK_CRYPTO_DEC)) {
+		if (dev->aligned) {
+			memcpy(req->iv, sg_virt(dev->sg_dst) +
+				dev->sg_dst->length - ivsize, ivsize);
+		} else {
+			memcpy(req->iv, dev->addr_vir +
+				dev->count - ivsize, ivsize);
+		}
+	}
+}
+
+static void rk_update_iv(struct rk_crypto_info *dev)
+{
+	struct skcipher_request *req =
+		skcipher_request_cast(dev->async_req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 ivsize = crypto_skcipher_ivsize(tfm);
+	u8 *new_iv = NULL;
+
+	if (ctx->mode & RK_CRYPTO_DEC) {
+		new_iv = ctx->iv;
+	} else {
+		new_iv = page_address(sg_page(dev->sg_dst)) +
+			 dev->sg_dst->offset + dev->sg_dst->length - ivsize;
+	}
+
+	if (ivsize == DES_BLOCK_SIZE)
+		memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, new_iv, ivsize);
+	else if (ivsize == AES_BLOCK_SIZE)
+		memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, new_iv, ivsize);
+}
+
+/* return:
+ *	true	some err was occurred
+ *	fault	no err, continue
+ */
+static int rk_ablk_rx(struct rk_crypto_info *dev)
+{
+	int err = 0;
+	struct skcipher_request *req =
+		skcipher_request_cast(dev->async_req);
+
+	dev->unload_data(dev);
+	if (!dev->aligned) {
+		if (!sg_pcopy_from_buffer(req->dst, dev->dst_nents,
+					  dev->addr_vir, dev->count,
+					  dev->total - dev->left_bytes -
+					  dev->count)) {
+			err = -EINVAL;
+			goto out_rx;
+		}
+	}
+	if (dev->left_bytes) {
+		rk_update_iv(dev);
+		if (dev->aligned) {
+			if (sg_is_last(dev->sg_src)) {
+				dev_err(dev->dev, "[%s:%d] Lack of data\n",
+					__func__, __LINE__);
+				err = -ENOMEM;
+				goto out_rx;
+			}
+			dev->sg_src = sg_next(dev->sg_src);
+			dev->sg_dst = sg_next(dev->sg_dst);
+		}
+		err = rk_set_data_start(dev);
+	} else {
+		rk_iv_copyback(dev);
+		/* here show the calculation is over without any err */
+		dev->complete(dev->async_req, 0);
+		tasklet_schedule(&dev->queue_task);
+	}
+out_rx:
+	return err;
+}
+
+static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct rk_crypto_tmp *algt;
+
+	algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+
+	ctx->dev = algt->dev;
+	ctx->dev->align_size = crypto_tfm_alg_alignmask(crypto_skcipher_tfm(tfm)) + 1;
+	ctx->dev->start = rk_ablk_start;
+	ctx->dev->update = rk_ablk_rx;
+	ctx->dev->complete = rk_crypto_complete;
+	ctx->dev->addr_vir = (char *)__get_free_page(GFP_KERNEL);
+
+	return ctx->dev->addr_vir ? ctx->dev->enable_clk(ctx->dev) : -ENOMEM;
+}
+
+static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	free_page((unsigned long)ctx->dev->addr_vir);
+	ctx->dev->disable_clk(ctx->dev);
+}
+
+struct rk_crypto_tmp rk_ecb_aes_alg = {
+	.type = ALG_TYPE_CIPHER,
+	.alg.skcipher = {
+		.base.cra_name		= "ecb(aes)",
+		.base.cra_driver_name	= "ecb-aes-rk",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
+		.base.cra_alignmask	= 0x0f,
+		.base.cra_module	= THIS_MODULE,
+
+		.init			= rk_ablk_init_tfm,
+		.exit			= rk_ablk_exit_tfm,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.setkey			= rk_aes_setkey,
+		.encrypt		= rk_aes_ecb_encrypt,
+		.decrypt		= rk_aes_ecb_decrypt,
+	}
+};
+
+struct rk_crypto_tmp rk_cbc_aes_alg = {
+	.type = ALG_TYPE_CIPHER,
+	.alg.skcipher = {
+		.base.cra_name		= "cbc(aes)",
+		.base.cra_driver_name	= "cbc-aes-rk",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
+		.base.cra_alignmask	= 0x0f,
+		.base.cra_module	= THIS_MODULE,
+
+		.init			= rk_ablk_init_tfm,
+		.exit			= rk_ablk_exit_tfm,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= rk_aes_setkey,
+		.encrypt		= rk_aes_cbc_encrypt,
+		.decrypt		= rk_aes_cbc_decrypt,
+	}
+};
+
+struct rk_crypto_tmp rk_ecb_des_alg = {
+	.type = ALG_TYPE_CIPHER,
+	.alg.skcipher = {
+		.base.cra_name		= "ecb(des)",
+		.base.cra_driver_name	= "ecb-des-rk",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
+		.base.cra_alignmask	= 0x07,
+		.base.cra_module	= THIS_MODULE,
+
+		.init			= rk_ablk_init_tfm,
+		.exit			= rk_ablk_exit_tfm,
+		.min_keysize		= DES_KEY_SIZE,
+		.max_keysize		= DES_KEY_SIZE,
+		.setkey			= rk_des_setkey,
+		.encrypt		= rk_des_ecb_encrypt,
+		.decrypt		= rk_des_ecb_decrypt,
+	}
+};
+
+struct rk_crypto_tmp rk_cbc_des_alg = {
+	.type = ALG_TYPE_CIPHER,
+	.alg.skcipher = {
+		.base.cra_name		= "cbc(des)",
+		.base.cra_driver_name	= "cbc-des-rk",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
+		.base.cra_alignmask	= 0x07,
+		.base.cra_module	= THIS_MODULE,
+
+		.init			= rk_ablk_init_tfm,
+		.exit			= rk_ablk_exit_tfm,
+		.min_keysize		= DES_KEY_SIZE,
+		.max_keysize		= DES_KEY_SIZE,
+		.ivsize			= DES_BLOCK_SIZE,
+		.setkey			= rk_des_setkey,
+		.encrypt		= rk_des_cbc_encrypt,
+		.decrypt		= rk_des_cbc_decrypt,
+	}
+};
+
+struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
+	.type = ALG_TYPE_CIPHER,
+	.alg.skcipher = {
+		.base.cra_name		= "ecb(des3_ede)",
+		.base.cra_driver_name	= "ecb-des3-ede-rk",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
+		.base.cra_alignmask	= 0x07,
+		.base.cra_module	= THIS_MODULE,
+
+		.init			= rk_ablk_init_tfm,
+		.exit			= rk_ablk_exit_tfm,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES_BLOCK_SIZE,
+		.setkey			= rk_tdes_setkey,
+		.encrypt		= rk_des3_ede_ecb_encrypt,
+		.decrypt		= rk_des3_ede_ecb_decrypt,
+	}
+};
+
+struct rk_crypto_tmp rk_cbc_des3_ede_alg = {
+	.type = ALG_TYPE_CIPHER,
+	.alg.skcipher = {
+		.base.cra_name		= "cbc(des3_ede)",
+		.base.cra_driver_name	= "cbc-des3-ede-rk",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
+		.base.cra_alignmask	= 0x07,
+		.base.cra_module	= THIS_MODULE,
+
+		.init			= rk_ablk_init_tfm,
+		.exit			= rk_ablk_exit_tfm,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES_BLOCK_SIZE,
+		.setkey			= rk_tdes_setkey,
+		.encrypt		= rk_des3_ede_cbc_encrypt,
+		.decrypt		= rk_des3_ede_cbc_decrypt,
+	}
+};
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 010f1bb20dad..d66e20a2f54c 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -303,7 +303,7 @@ struct s5p_aes_dev {
 	void __iomem			*aes_ioaddr;
 	int				irq_fc;
 
-	struct ablkcipher_request	*req;
+	struct skcipher_request		*req;
 	struct s5p_aes_ctx		*ctx;
 	struct scatterlist		*sg_src;
 	struct scatterlist		*sg_dst;
@@ -456,7 +456,7 @@ static void s5p_free_sg_cpy(struct s5p_aes_dev *dev, struct scatterlist **sg)
 	if (!*sg)
 		return;
 
-	len = ALIGN(dev->req->nbytes, AES_BLOCK_SIZE);
+	len = ALIGN(dev->req->cryptlen, AES_BLOCK_SIZE);
 	free_pages((unsigned long)sg_virt(*sg), get_order(len));
 
 	kfree(*sg);
@@ -478,27 +478,27 @@ static void s5p_sg_copy_buf(void *buf, struct scatterlist *sg,
 
 static void s5p_sg_done(struct s5p_aes_dev *dev)
 {
-	struct ablkcipher_request *req = dev->req;
-	struct s5p_aes_reqctx *reqctx = ablkcipher_request_ctx(req);
+	struct skcipher_request *req = dev->req;
+	struct s5p_aes_reqctx *reqctx = skcipher_request_ctx(req);
 
 	if (dev->sg_dst_cpy) {
 		dev_dbg(dev->dev,
 			"Copying %d bytes of output data back to original place\n",
-			dev->req->nbytes);
+			dev->req->cryptlen);
 		s5p_sg_copy_buf(sg_virt(dev->sg_dst_cpy), dev->req->dst,
-				dev->req->nbytes, 1);
+				dev->req->cryptlen, 1);
 	}
 	s5p_free_sg_cpy(dev, &dev->sg_src_cpy);
 	s5p_free_sg_cpy(dev, &dev->sg_dst_cpy);
 	if (reqctx->mode & FLAGS_AES_CBC)
-		memcpy_fromio(req->info, dev->aes_ioaddr + SSS_REG_AES_IV_DATA(0), AES_BLOCK_SIZE);
+		memcpy_fromio(req->iv, dev->aes_ioaddr + SSS_REG_AES_IV_DATA(0), AES_BLOCK_SIZE);
 
 	else if (reqctx->mode & FLAGS_AES_CTR)
-		memcpy_fromio(req->info, dev->aes_ioaddr + SSS_REG_AES_CNT_DATA(0), AES_BLOCK_SIZE);
+		memcpy_fromio(req->iv, dev->aes_ioaddr + SSS_REG_AES_CNT_DATA(0), AES_BLOCK_SIZE);
 }
 
 /* Calls the completion. Cannot be called with dev->lock hold. */
-static void s5p_aes_complete(struct ablkcipher_request *req, int err)
+static void s5p_aes_complete(struct skcipher_request *req, int err)
 {
 	req->base.complete(&req->base, err);
 }
@@ -523,7 +523,7 @@ static int s5p_make_sg_cpy(struct s5p_aes_dev *dev, struct scatterlist *src,
 	if (!*dst)
 		return -ENOMEM;
 
-	len = ALIGN(dev->req->nbytes, AES_BLOCK_SIZE);
+	len = ALIGN(dev->req->cryptlen, AES_BLOCK_SIZE);
 	pages = (void *)__get_free_pages(GFP_ATOMIC, get_order(len));
 	if (!pages) {
 		kfree(*dst);
@@ -531,7 +531,7 @@ static int s5p_make_sg_cpy(struct s5p_aes_dev *dev, struct scatterlist *src,
 		return -ENOMEM;
 	}
 
-	s5p_sg_copy_buf(pages, src, dev->req->nbytes, 0);
+	s5p_sg_copy_buf(pages, src, dev->req->cryptlen, 0);
 
 	sg_init_table(*dst, 1);
 	sg_set_buf(*dst, pages, len);
@@ -660,7 +660,7 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id)
 {
 	struct platform_device *pdev = dev_id;
 	struct s5p_aes_dev *dev = platform_get_drvdata(pdev);
-	struct ablkcipher_request *req;
+	struct skcipher_request *req;
 	int err_dma_tx = 0;
 	int err_dma_rx = 0;
 	int err_dma_hx = 0;
@@ -1870,7 +1870,7 @@ static bool s5p_is_sg_aligned(struct scatterlist *sg)
 }
 
 static int s5p_set_indata_start(struct s5p_aes_dev *dev,
-				struct ablkcipher_request *req)
+				struct skcipher_request *req)
 {
 	struct scatterlist *sg;
 	int err;
@@ -1897,7 +1897,7 @@ static int s5p_set_indata_start(struct s5p_aes_dev *dev,
 }
 
 static int s5p_set_outdata_start(struct s5p_aes_dev *dev,
-				 struct ablkcipher_request *req)
+				 struct skcipher_request *req)
 {
 	struct scatterlist *sg;
 	int err;
@@ -1925,7 +1925,7 @@ static int s5p_set_outdata_start(struct s5p_aes_dev *dev,
 
 static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode)
 {
-	struct ablkcipher_request *req = dev->req;
+	struct skcipher_request *req = dev->req;
 	u32 aes_control;
 	unsigned long flags;
 	int err;
@@ -1938,12 +1938,12 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode)
 
 	if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC) {
 		aes_control |= SSS_AES_CHAIN_MODE_CBC;
-		iv = req->info;
+		iv = req->iv;
 		ctr = NULL;
 	} else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR) {
 		aes_control |= SSS_AES_CHAIN_MODE_CTR;
 		iv = NULL;
-		ctr = req->info;
+		ctr = req->iv;
 	} else {
 		iv = NULL; /* AES_ECB */
 		ctr = NULL;
@@ -2021,21 +2021,21 @@ static void s5p_tasklet_cb(unsigned long data)
 	if (backlog)
 		backlog->complete(backlog, -EINPROGRESS);
 
-	dev->req = ablkcipher_request_cast(async_req);
+	dev->req = skcipher_request_cast(async_req);
 	dev->ctx = crypto_tfm_ctx(dev->req->base.tfm);
-	reqctx   = ablkcipher_request_ctx(dev->req);
+	reqctx   = skcipher_request_ctx(dev->req);
 
 	s5p_aes_crypt_start(dev, reqctx->mode);
 }
 
 static int s5p_aes_handle_req(struct s5p_aes_dev *dev,
-			      struct ablkcipher_request *req)
+			      struct skcipher_request *req)
 {
 	unsigned long flags;
 	int err;
 
 	spin_lock_irqsave(&dev->lock, flags);
-	err = ablkcipher_enqueue_request(&dev->queue, req);
+	err = crypto_enqueue_request(&dev->queue, &req->base);
 	if (dev->busy) {
 		spin_unlock_irqrestore(&dev->lock, flags);
 		return err;
@@ -2049,17 +2049,17 @@ static int s5p_aes_handle_req(struct s5p_aes_dev *dev,
 	return err;
 }
 
-static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
+static int s5p_aes_crypt(struct skcipher_request *req, unsigned long mode)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct s5p_aes_reqctx *reqctx = ablkcipher_request_ctx(req);
-	struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s5p_aes_reqctx *reqctx = skcipher_request_ctx(req);
+	struct s5p_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct s5p_aes_dev *dev = ctx->dev;
 
-	if (!req->nbytes)
+	if (!req->cryptlen)
 		return 0;
 
-	if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE) &&
+	if (!IS_ALIGNED(req->cryptlen, AES_BLOCK_SIZE) &&
 			((mode & FLAGS_AES_MODE_MASK) != FLAGS_AES_CTR)) {
 		dev_dbg(dev->dev, "request size is not exact amount of AES blocks\n");
 		return -EINVAL;
@@ -2070,10 +2070,10 @@ static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 	return s5p_aes_handle_req(dev, req);
 }
 
-static int s5p_aes_setkey(struct crypto_ablkcipher *cipher,
+static int s5p_aes_setkey(struct crypto_skcipher *cipher,
 			  const u8 *key, unsigned int keylen)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
 	struct s5p_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (keylen != AES_KEYSIZE_128 &&
@@ -2087,106 +2087,97 @@ static int s5p_aes_setkey(struct crypto_ablkcipher *cipher,
 	return 0;
 }
 
-static int s5p_aes_ecb_encrypt(struct ablkcipher_request *req)
+static int s5p_aes_ecb_encrypt(struct skcipher_request *req)
 {
 	return s5p_aes_crypt(req, 0);
 }
 
-static int s5p_aes_ecb_decrypt(struct ablkcipher_request *req)
+static int s5p_aes_ecb_decrypt(struct skcipher_request *req)
 {
 	return s5p_aes_crypt(req, FLAGS_AES_DECRYPT);
 }
 
-static int s5p_aes_cbc_encrypt(struct ablkcipher_request *req)
+static int s5p_aes_cbc_encrypt(struct skcipher_request *req)
 {
 	return s5p_aes_crypt(req, FLAGS_AES_CBC);
 }
 
-static int s5p_aes_cbc_decrypt(struct ablkcipher_request *req)
+static int s5p_aes_cbc_decrypt(struct skcipher_request *req)
 {
 	return s5p_aes_crypt(req, FLAGS_AES_DECRYPT | FLAGS_AES_CBC);
 }
 
-static int s5p_aes_ctr_crypt(struct ablkcipher_request *req)
+static int s5p_aes_ctr_crypt(struct skcipher_request *req)
 {
 	return s5p_aes_crypt(req, FLAGS_AES_CTR);
 }
 
-static int s5p_aes_cra_init(struct crypto_tfm *tfm)
+static int s5p_aes_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct s5p_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s5p_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->dev = s5p_dev;
-	tfm->crt_ablkcipher.reqsize = sizeof(struct s5p_aes_reqctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct s5p_aes_reqctx));
 
 	return 0;
 }
 
-static struct crypto_alg algs[] = {
+static struct skcipher_alg algs[] = {
 	{
-		.cra_name		= "ecb(aes)",
-		.cra_driver_name	= "ecb-aes-s5p",
-		.cra_priority		= 100,
-		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-					  CRYPTO_ALG_ASYNC |
+		.base.cra_name		= "ecb(aes)",
+		.base.cra_driver_name	= "ecb-aes-s5p",
+		.base.cra_priority	= 100,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC |
 					  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.cra_blocksize		= AES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct s5p_aes_ctx),
-		.cra_alignmask		= 0x0f,
-		.cra_type		= &crypto_ablkcipher_type,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= s5p_aes_cra_init,
-		.cra_u.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.setkey		= s5p_aes_setkey,
-			.encrypt	= s5p_aes_ecb_encrypt,
-			.decrypt	= s5p_aes_ecb_decrypt,
-		}
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct s5p_aes_ctx),
+		.base.cra_alignmask	= 0x0f,
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.setkey			= s5p_aes_setkey,
+		.encrypt		= s5p_aes_ecb_encrypt,
+		.decrypt		= s5p_aes_ecb_decrypt,
+		.init			= s5p_aes_init_tfm,
 	},
 	{
-		.cra_name		= "cbc(aes)",
-		.cra_driver_name	= "cbc-aes-s5p",
-		.cra_priority		= 100,
-		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-					  CRYPTO_ALG_ASYNC |
+		.base.cra_name		= "cbc(aes)",
+		.base.cra_driver_name	= "cbc-aes-s5p",
+		.base.cra_priority	= 100,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC |
 					  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.cra_blocksize		= AES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct s5p_aes_ctx),
-		.cra_alignmask		= 0x0f,
-		.cra_type		= &crypto_ablkcipher_type,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= s5p_aes_cra_init,
-		.cra_u.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= s5p_aes_setkey,
-			.encrypt	= s5p_aes_cbc_encrypt,
-			.decrypt	= s5p_aes_cbc_decrypt,
-		}
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct s5p_aes_ctx),
+		.base.cra_alignmask	= 0x0f,
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= s5p_aes_setkey,
+		.encrypt		= s5p_aes_cbc_encrypt,
+		.decrypt		= s5p_aes_cbc_decrypt,
+		.init			= s5p_aes_init_tfm,
 	},
 	{
-		.cra_name		= "ctr(aes)",
-		.cra_driver_name	= "ctr-aes-s5p",
-		.cra_priority		= 100,
-		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-					  CRYPTO_ALG_ASYNC |
+		.base.cra_name		= "ctr(aes)",
+		.base.cra_driver_name	= "ctr-aes-s5p",
+		.base.cra_priority	= 100,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC |
 					  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.cra_blocksize		= 1,
-		.cra_ctxsize		= sizeof(struct s5p_aes_ctx),
-		.cra_alignmask		= 0x0f,
-		.cra_type		= &crypto_ablkcipher_type,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= s5p_aes_cra_init,
-		.cra_u.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= s5p_aes_setkey,
-			.encrypt	= s5p_aes_ctr_crypt,
-			.decrypt	= s5p_aes_ctr_crypt,
-		}
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct s5p_aes_ctx),
+		.base.cra_alignmask	= 0x0f,
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= s5p_aes_setkey,
+		.encrypt		= s5p_aes_ctr_crypt,
+		.decrypt		= s5p_aes_ctr_crypt,
+		.init			= s5p_aes_init_tfm,
 	},
 };
 
@@ -2297,7 +2288,7 @@ static int s5p_aes_probe(struct platform_device *pdev)
 	crypto_init_queue(&pdata->queue, CRYPTO_QUEUE_LEN);
 
 	for (i = 0; i < ARRAY_SIZE(algs); i++) {
-		err = crypto_register_alg(&algs[i]);
+		err = crypto_register_skcipher(&algs[i]);
 		if (err)
 			goto err_algs;
 	}
@@ -2334,11 +2325,11 @@ err_hash:
 
 err_algs:
 	if (i < ARRAY_SIZE(algs))
-		dev_err(dev, "can't register '%s': %d\n", algs[i].cra_name,
+		dev_err(dev, "can't register '%s': %d\n", algs[i].base.cra_name,
 			err);
 
 	for (j = 0; j < i; j++)
-		crypto_unregister_alg(&algs[j]);
+		crypto_unregister_skcipher(&algs[j]);
 
 	tasklet_kill(&pdata->tasklet);
 
@@ -2362,7 +2353,7 @@ static int s5p_aes_remove(struct platform_device *pdev)
 		return -ENODEV;
 
 	for (i = 0; i < ARRAY_SIZE(algs); i++)
-		crypto_unregister_alg(&algs[i]);
+		crypto_unregister_skcipher(&algs[i]);
 
 	tasklet_kill(&pdata->tasklet);
 	if (pdata->use_hash) {
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 8ac8ec6decd5..466e30bd529c 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -547,7 +547,7 @@ unmap_in:
 	return -EINVAL;
 }
 
-static int sahara_aes_process(struct ablkcipher_request *req)
+static int sahara_aes_process(struct skcipher_request *req)
 {
 	struct sahara_dev *dev = dev_ptr;
 	struct sahara_ctx *ctx;
@@ -558,20 +558,20 @@ static int sahara_aes_process(struct ablkcipher_request *req)
 	/* Request is ready to be dispatched by the device */
 	dev_dbg(dev->device,
 		"dispatch request (nbytes=%d, src=%p, dst=%p)\n",
-		req->nbytes, req->src, req->dst);
+		req->cryptlen, req->src, req->dst);
 
 	/* assign new request to device */
-	dev->total = req->nbytes;
+	dev->total = req->cryptlen;
 	dev->in_sg = req->src;
 	dev->out_sg = req->dst;
 
-	rctx = ablkcipher_request_ctx(req);
-	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+	rctx = skcipher_request_ctx(req);
+	ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	rctx->mode &= FLAGS_MODE_MASK;
 	dev->flags = (dev->flags & ~FLAGS_MODE_MASK) | rctx->mode;
 
-	if ((dev->flags & FLAGS_CBC) && req->info)
-		memcpy(dev->iv_base, req->info, AES_KEYSIZE_128);
+	if ((dev->flags & FLAGS_CBC) && req->iv)
+		memcpy(dev->iv_base, req->iv, AES_KEYSIZE_128);
 
 	/* assign new context to device */
 	dev->ctx = ctx;
@@ -597,11 +597,10 @@ static int sahara_aes_process(struct ablkcipher_request *req)
 	return 0;
 }
 
-static int sahara_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int sahara_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			     unsigned int keylen)
 {
-	struct sahara_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	int ret;
+	struct sahara_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->keylen = keylen;
 
@@ -621,25 +620,19 @@ static int sahara_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	crypto_sync_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
 	crypto_sync_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
 						 CRYPTO_TFM_REQ_MASK);
-
-	ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
-
-	tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->base.crt_flags |= crypto_sync_skcipher_get_flags(ctx->fallback) &
-			       CRYPTO_TFM_RES_MASK;
-	return ret;
+	return crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
 }
 
-static int sahara_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
+static int sahara_aes_crypt(struct skcipher_request *req, unsigned long mode)
 {
-	struct sahara_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct sahara_aes_reqctx *rctx = skcipher_request_ctx(req);
 	struct sahara_dev *dev = dev_ptr;
 	int err = 0;
 
 	dev_dbg(dev->device, "nbytes: %d, enc: %d, cbc: %d\n",
-		req->nbytes, !!(mode & FLAGS_ENCRYPT), !!(mode & FLAGS_CBC));
+		req->cryptlen, !!(mode & FLAGS_ENCRYPT), !!(mode & FLAGS_CBC));
 
-	if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) {
+	if (!IS_ALIGNED(req->cryptlen, AES_BLOCK_SIZE)) {
 		dev_err(dev->device,
 			"request size is not exact amount of AES blocks\n");
 		return -EINVAL;
@@ -648,7 +641,7 @@ static int sahara_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 	rctx->mode = mode;
 
 	mutex_lock(&dev->queue_mutex);
-	err = ablkcipher_enqueue_request(&dev->queue, req);
+	err = crypto_enqueue_request(&dev->queue, &req->base);
 	mutex_unlock(&dev->queue_mutex);
 
 	wake_up_process(dev->kthread);
@@ -656,10 +649,10 @@ static int sahara_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 	return err;
 }
 
-static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req)
+static int sahara_aes_ecb_encrypt(struct skcipher_request *req)
 {
-	struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
-		crypto_ablkcipher_reqtfm(req));
+	struct sahara_ctx *ctx = crypto_skcipher_ctx(
+		crypto_skcipher_reqtfm(req));
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
@@ -669,7 +662,7 @@ static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req)
 		skcipher_request_set_callback(subreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
-					   req->nbytes, req->info);
+					   req->cryptlen, req->iv);
 		err = crypto_skcipher_encrypt(subreq);
 		skcipher_request_zero(subreq);
 		return err;
@@ -678,10 +671,10 @@ static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req)
 	return sahara_aes_crypt(req, FLAGS_ENCRYPT);
 }
 
-static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req)
+static int sahara_aes_ecb_decrypt(struct skcipher_request *req)
 {
-	struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
-		crypto_ablkcipher_reqtfm(req));
+	struct sahara_ctx *ctx = crypto_skcipher_ctx(
+		crypto_skcipher_reqtfm(req));
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
@@ -691,7 +684,7 @@ static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req)
 		skcipher_request_set_callback(subreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
-					   req->nbytes, req->info);
+					   req->cryptlen, req->iv);
 		err = crypto_skcipher_decrypt(subreq);
 		skcipher_request_zero(subreq);
 		return err;
@@ -700,10 +693,10 @@ static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req)
 	return sahara_aes_crypt(req, 0);
 }
 
-static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req)
+static int sahara_aes_cbc_encrypt(struct skcipher_request *req)
 {
-	struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
-		crypto_ablkcipher_reqtfm(req));
+	struct sahara_ctx *ctx = crypto_skcipher_ctx(
+		crypto_skcipher_reqtfm(req));
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
@@ -713,7 +706,7 @@ static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req)
 		skcipher_request_set_callback(subreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
-					   req->nbytes, req->info);
+					   req->cryptlen, req->iv);
 		err = crypto_skcipher_encrypt(subreq);
 		skcipher_request_zero(subreq);
 		return err;
@@ -722,10 +715,10 @@ static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req)
 	return sahara_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CBC);
 }
 
-static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req)
+static int sahara_aes_cbc_decrypt(struct skcipher_request *req)
 {
-	struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
-		crypto_ablkcipher_reqtfm(req));
+	struct sahara_ctx *ctx = crypto_skcipher_ctx(
+		crypto_skcipher_reqtfm(req));
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
@@ -735,7 +728,7 @@ static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req)
 		skcipher_request_set_callback(subreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
-					   req->nbytes, req->info);
+					   req->cryptlen, req->iv);
 		err = crypto_skcipher_decrypt(subreq);
 		skcipher_request_zero(subreq);
 		return err;
@@ -744,10 +737,10 @@ static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req)
 	return sahara_aes_crypt(req, FLAGS_CBC);
 }
 
-static int sahara_aes_cra_init(struct crypto_tfm *tfm)
+static int sahara_aes_init_tfm(struct crypto_skcipher *tfm)
 {
-	const char *name = crypto_tfm_alg_name(tfm);
-	struct sahara_ctx *ctx = crypto_tfm_ctx(tfm);
+	const char *name = crypto_tfm_alg_name(&tfm->base);
+	struct sahara_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
 					      CRYPTO_ALG_NEED_FALLBACK);
@@ -756,14 +749,14 @@ static int sahara_aes_cra_init(struct crypto_tfm *tfm)
 		return PTR_ERR(ctx->fallback);
 	}
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct sahara_aes_reqctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct sahara_aes_reqctx));
 
 	return 0;
 }
 
-static void sahara_aes_cra_exit(struct crypto_tfm *tfm)
+static void sahara_aes_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct sahara_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct sahara_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	crypto_free_sync_skcipher(ctx->fallback);
 }
@@ -1071,8 +1064,8 @@ static int sahara_queue_manage(void *data)
 
 				ret = sahara_sha_process(req);
 			} else {
-				struct ablkcipher_request *req =
-					ablkcipher_request_cast(async_req);
+				struct skcipher_request *req =
+					skcipher_request_cast(async_req);
 
 				ret = sahara_aes_process(req);
 			}
@@ -1189,48 +1182,42 @@ static int sahara_sha_cra_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static struct crypto_alg aes_algs[] = {
+static struct skcipher_alg aes_algs[] = {
 {
-	.cra_name		= "ecb(aes)",
-	.cra_driver_name	= "sahara-ecb-aes",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct sahara_ctx),
-	.cra_alignmask		= 0x0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= sahara_aes_cra_init,
-	.cra_exit		= sahara_aes_cra_exit,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE ,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.setkey		= sahara_aes_setkey,
-		.encrypt	= sahara_aes_ecb_encrypt,
-		.decrypt	= sahara_aes_ecb_decrypt,
-	}
+	.base.cra_name		= "ecb(aes)",
+	.base.cra_driver_name	= "sahara-ecb-aes",
+	.base.cra_priority	= 300,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct sahara_ctx),
+	.base.cra_alignmask	= 0x0,
+	.base.cra_module	= THIS_MODULE,
+
+	.init			= sahara_aes_init_tfm,
+	.exit			= sahara_aes_exit_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE ,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= sahara_aes_setkey,
+	.encrypt		= sahara_aes_ecb_encrypt,
+	.decrypt		= sahara_aes_ecb_decrypt,
 }, {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "sahara-cbc-aes",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct sahara_ctx),
-	.cra_alignmask		= 0x0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= sahara_aes_cra_init,
-	.cra_exit		= sahara_aes_cra_exit,
-	.cra_u.ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE ,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= sahara_aes_setkey,
-		.encrypt	= sahara_aes_cbc_encrypt,
-		.decrypt	= sahara_aes_cbc_decrypt,
-	}
+	.base.cra_name		= "cbc(aes)",
+	.base.cra_driver_name	= "sahara-cbc-aes",
+	.base.cra_priority	= 300,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct sahara_ctx),
+	.base.cra_alignmask	= 0x0,
+	.base.cra_module	= THIS_MODULE,
+
+	.init			= sahara_aes_init_tfm,
+	.exit			= sahara_aes_exit_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE ,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= sahara_aes_setkey,
+	.encrypt		= sahara_aes_cbc_encrypt,
+	.decrypt		= sahara_aes_cbc_decrypt,
 }
 };
 
@@ -1318,7 +1305,7 @@ static int sahara_register_algs(struct sahara_dev *dev)
 	unsigned int i, j, k, l;
 
 	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
-		err = crypto_register_alg(&aes_algs[i]);
+		err = crypto_register_skcipher(&aes_algs[i]);
 		if (err)
 			goto err_aes_algs;
 	}
@@ -1348,7 +1335,7 @@ err_sha_v3_algs:
 
 err_aes_algs:
 	for (j = 0; j < i; j++)
-		crypto_unregister_alg(&aes_algs[j]);
+		crypto_unregister_skcipher(&aes_algs[j]);
 
 	return err;
 }
@@ -1358,7 +1345,7 @@ static void sahara_unregister_algs(struct sahara_dev *dev)
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(aes_algs); i++)
-		crypto_unregister_alg(&aes_algs[i]);
+		crypto_unregister_skcipher(&aes_algs[i]);
 
 	for (i = 0; i < ARRAY_SIZE(sha_v3_algs); i++)
 		crypto_unregister_ahash(&sha_v3_algs[i]);
diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig
index 1aba9372cd23..4ef3eb11361c 100644
--- a/drivers/crypto/stm32/Kconfig
+++ b/drivers/crypto/stm32/Kconfig
@@ -4,7 +4,7 @@ config CRYPTO_DEV_STM32_CRC
 	depends on ARCH_STM32
 	select CRYPTO_HASH
 	help
-          This enables support for the CRC32 hw accelerator which can be found
+	  This enables support for the CRC32 hw accelerator which can be found
 	  on STMicroelectronics STM32 SOC.
 
 config CRYPTO_DEV_STM32_HASH
@@ -17,7 +17,7 @@ config CRYPTO_DEV_STM32_HASH
 	select CRYPTO_SHA256
 	select CRYPTO_ENGINE
 	help
-          This enables support for the HASH hw accelerator which can be found
+	  This enables support for the HASH hw accelerator which can be found
 	  on STMicroelectronics STM32 SOC.
 
 config CRYPTO_DEV_STM32_CRYP
@@ -27,5 +27,5 @@ config CRYPTO_DEV_STM32_CRYP
 	select CRYPTO_ENGINE
 	select CRYPTO_LIB_DES
 	help
-          This enables support for the CRYP (AES/DES/TDES) hw accelerator which
+	  This enables support for the CRYP (AES/DES/TDES) hw accelerator which
 	  can be found on STMicroelectronics STM32 SOC.
diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
index 9e11c3480353..8e92e4ac79f1 100644
--- a/drivers/crypto/stm32/stm32-crc32.c
+++ b/drivers/crypto/stm32/stm32-crc32.c
@@ -85,10 +85,8 @@ static int stm32_crc_setkey(struct crypto_shash *tfm, const u8 *key,
 {
 	struct stm32_crc_ctx *mctx = crypto_shash_ctx(tfm);
 
-	if (keylen != sizeof(u32)) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != sizeof(u32))
 		return -EINVAL;
-	}
 
 	mctx->key = get_unaligned_le32(key);
 	return 0;
diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
index ba5ea6434f9c..d347a1d6e351 100644
--- a/drivers/crypto/stm32/stm32-cryp.c
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -19,6 +19,7 @@
 #include <crypto/engine.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
 
 #define DRIVER_NAME             "stm32-cryp"
 
@@ -137,7 +138,7 @@ struct stm32_cryp {
 
 	struct crypto_engine    *engine;
 
-	struct ablkcipher_request *req;
+	struct skcipher_request *req;
 	struct aead_request     *areq;
 
 	size_t                  authsize;
@@ -395,8 +396,8 @@ static void stm32_cryp_hw_write_iv(struct stm32_cryp *cryp, u32 *iv)
 
 static void stm32_cryp_get_iv(struct stm32_cryp *cryp)
 {
-	struct ablkcipher_request *req = cryp->req;
-	u32 *tmp = req->info;
+	struct skcipher_request *req = cryp->req;
+	u32 *tmp = (void *)req->iv;
 
 	if (!tmp)
 		return;
@@ -616,7 +617,7 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
 	case CR_TDES_CBC:
 	case CR_AES_CBC:
 	case CR_AES_CTR:
-		stm32_cryp_hw_write_iv(cryp, (u32 *)cryp->req->info);
+		stm32_cryp_hw_write_iv(cryp, (u32 *)cryp->req->iv);
 		break;
 
 	default:
@@ -667,7 +668,7 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err)
 	if (is_gcm(cryp) || is_ccm(cryp))
 		crypto_finalize_aead_request(cryp->engine, cryp->areq, err);
 	else
-		crypto_finalize_ablkcipher_request(cryp->engine, cryp->req,
+		crypto_finalize_skcipher_request(cryp->engine, cryp->req,
 						   err);
 
 	memset(cryp->ctx->key, 0, cryp->ctx->keylen);
@@ -685,11 +686,11 @@ static int stm32_cryp_cipher_one_req(struct crypto_engine *engine, void *areq);
 static int stm32_cryp_prepare_cipher_req(struct crypto_engine *engine,
 					 void *areq);
 
-static int stm32_cryp_cra_init(struct crypto_tfm *tfm)
+static int stm32_cryp_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct stm32_cryp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct stm32_cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct stm32_cryp_reqctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct stm32_cryp_reqctx));
 
 	ctx->enginectx.op.do_one_request = stm32_cryp_cipher_one_req;
 	ctx->enginectx.op.prepare_request = stm32_cryp_prepare_cipher_req;
@@ -714,11 +715,11 @@ static int stm32_cryp_aes_aead_init(struct crypto_aead *tfm)
 	return 0;
 }
 
-static int stm32_cryp_crypt(struct ablkcipher_request *req, unsigned long mode)
+static int stm32_cryp_crypt(struct skcipher_request *req, unsigned long mode)
 {
-	struct stm32_cryp_ctx *ctx = crypto_ablkcipher_ctx(
-			crypto_ablkcipher_reqtfm(req));
-	struct stm32_cryp_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct stm32_cryp_ctx *ctx = crypto_skcipher_ctx(
+			crypto_skcipher_reqtfm(req));
+	struct stm32_cryp_reqctx *rctx = skcipher_request_ctx(req);
 	struct stm32_cryp *cryp = stm32_cryp_find_dev(ctx);
 
 	if (!cryp)
@@ -726,7 +727,7 @@ static int stm32_cryp_crypt(struct ablkcipher_request *req, unsigned long mode)
 
 	rctx->mode = mode;
 
-	return crypto_transfer_ablkcipher_request_to_engine(cryp->engine, req);
+	return crypto_transfer_skcipher_request_to_engine(cryp->engine, req);
 }
 
 static int stm32_cryp_aead_crypt(struct aead_request *req, unsigned long mode)
@@ -743,10 +744,10 @@ static int stm32_cryp_aead_crypt(struct aead_request *req, unsigned long mode)
 	return crypto_transfer_aead_request_to_engine(cryp->engine, req);
 }
 
-static int stm32_cryp_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int stm32_cryp_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			     unsigned int keylen)
 {
-	struct stm32_cryp_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct stm32_cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
@@ -754,7 +755,7 @@ static int stm32_cryp_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	return 0;
 }
 
-static int stm32_cryp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int stm32_cryp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 				 unsigned int keylen)
 {
 	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
@@ -764,17 +765,17 @@ static int stm32_cryp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 		return stm32_cryp_setkey(tfm, key, keylen);
 }
 
-static int stm32_cryp_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int stm32_cryp_des_setkey(struct crypto_skcipher *tfm, const u8 *key,
 				 unsigned int keylen)
 {
-	return verify_ablkcipher_des_key(tfm, key) ?:
+	return verify_skcipher_des_key(tfm, key) ?:
 	       stm32_cryp_setkey(tfm, key, keylen);
 }
 
-static int stm32_cryp_tdes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int stm32_cryp_tdes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 				  unsigned int keylen)
 {
-	return verify_ablkcipher_des3_key(tfm, key) ?:
+	return verify_skcipher_des3_key(tfm, key) ?:
 	       stm32_cryp_setkey(tfm, key, keylen);
 }
 
@@ -818,32 +819,32 @@ static int stm32_cryp_aes_ccm_setauthsize(struct crypto_aead *tfm,
 	return 0;
 }
 
-static int stm32_cryp_aes_ecb_encrypt(struct ablkcipher_request *req)
+static int stm32_cryp_aes_ecb_encrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_AES | FLG_ECB | FLG_ENCRYPT);
 }
 
-static int stm32_cryp_aes_ecb_decrypt(struct ablkcipher_request *req)
+static int stm32_cryp_aes_ecb_decrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_AES | FLG_ECB);
 }
 
-static int stm32_cryp_aes_cbc_encrypt(struct ablkcipher_request *req)
+static int stm32_cryp_aes_cbc_encrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_AES | FLG_CBC | FLG_ENCRYPT);
 }
 
-static int stm32_cryp_aes_cbc_decrypt(struct ablkcipher_request *req)
+static int stm32_cryp_aes_cbc_decrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_AES | FLG_CBC);
 }
 
-static int stm32_cryp_aes_ctr_encrypt(struct ablkcipher_request *req)
+static int stm32_cryp_aes_ctr_encrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_AES | FLG_CTR | FLG_ENCRYPT);
 }
 
-static int stm32_cryp_aes_ctr_decrypt(struct ablkcipher_request *req)
+static int stm32_cryp_aes_ctr_decrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_AES | FLG_CTR);
 }
@@ -868,47 +869,47 @@ static int stm32_cryp_aes_ccm_decrypt(struct aead_request *req)
 	return stm32_cryp_aead_crypt(req, FLG_AES | FLG_CCM);
 }
 
-static int stm32_cryp_des_ecb_encrypt(struct ablkcipher_request *req)
+static int stm32_cryp_des_ecb_encrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_DES | FLG_ECB | FLG_ENCRYPT);
 }
 
-static int stm32_cryp_des_ecb_decrypt(struct ablkcipher_request *req)
+static int stm32_cryp_des_ecb_decrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_DES | FLG_ECB);
 }
 
-static int stm32_cryp_des_cbc_encrypt(struct ablkcipher_request *req)
+static int stm32_cryp_des_cbc_encrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_DES | FLG_CBC | FLG_ENCRYPT);
 }
 
-static int stm32_cryp_des_cbc_decrypt(struct ablkcipher_request *req)
+static int stm32_cryp_des_cbc_decrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_DES | FLG_CBC);
 }
 
-static int stm32_cryp_tdes_ecb_encrypt(struct ablkcipher_request *req)
+static int stm32_cryp_tdes_ecb_encrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_TDES | FLG_ECB | FLG_ENCRYPT);
 }
 
-static int stm32_cryp_tdes_ecb_decrypt(struct ablkcipher_request *req)
+static int stm32_cryp_tdes_ecb_decrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_TDES | FLG_ECB);
 }
 
-static int stm32_cryp_tdes_cbc_encrypt(struct ablkcipher_request *req)
+static int stm32_cryp_tdes_cbc_encrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC | FLG_ENCRYPT);
 }
 
-static int stm32_cryp_tdes_cbc_decrypt(struct ablkcipher_request *req)
+static int stm32_cryp_tdes_cbc_decrypt(struct skcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC);
 }
 
-static int stm32_cryp_prepare_req(struct ablkcipher_request *req,
+static int stm32_cryp_prepare_req(struct skcipher_request *req,
 				  struct aead_request *areq)
 {
 	struct stm32_cryp_ctx *ctx;
@@ -919,7 +920,7 @@ static int stm32_cryp_prepare_req(struct ablkcipher_request *req,
 	if (!req && !areq)
 		return -EINVAL;
 
-	ctx = req ? crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)) :
+	ctx = req ? crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)) :
 		    crypto_aead_ctx(crypto_aead_reqtfm(areq));
 
 	cryp = ctx->cryp;
@@ -927,7 +928,7 @@ static int stm32_cryp_prepare_req(struct ablkcipher_request *req,
 	if (!cryp)
 		return -ENODEV;
 
-	rctx = req ? ablkcipher_request_ctx(req) : aead_request_ctx(areq);
+	rctx = req ? skcipher_request_ctx(req) : aead_request_ctx(areq);
 	rctx->mode &= FLG_MODE_MASK;
 
 	ctx->cryp = cryp;
@@ -939,7 +940,7 @@ static int stm32_cryp_prepare_req(struct ablkcipher_request *req,
 	if (req) {
 		cryp->req = req;
 		cryp->areq = NULL;
-		cryp->total_in = req->nbytes;
+		cryp->total_in = req->cryptlen;
 		cryp->total_out = cryp->total_in;
 	} else {
 		/*
@@ -1016,8 +1017,8 @@ static int stm32_cryp_prepare_req(struct ablkcipher_request *req,
 static int stm32_cryp_prepare_cipher_req(struct crypto_engine *engine,
 					 void *areq)
 {
-	struct ablkcipher_request *req = container_of(areq,
-						      struct ablkcipher_request,
+	struct skcipher_request *req = container_of(areq,
+						      struct skcipher_request,
 						      base);
 
 	return stm32_cryp_prepare_req(req, NULL);
@@ -1025,11 +1026,11 @@ static int stm32_cryp_prepare_cipher_req(struct crypto_engine *engine,
 
 static int stm32_cryp_cipher_one_req(struct crypto_engine *engine, void *areq)
 {
-	struct ablkcipher_request *req = container_of(areq,
-						      struct ablkcipher_request,
+	struct skcipher_request *req = container_of(areq,
+						      struct skcipher_request,
 						      base);
-	struct stm32_cryp_ctx *ctx = crypto_ablkcipher_ctx(
-			crypto_ablkcipher_reqtfm(req));
+	struct stm32_cryp_ctx *ctx = crypto_skcipher_ctx(
+			crypto_skcipher_reqtfm(req));
 	struct stm32_cryp *cryp = ctx->cryp;
 
 	if (!cryp)
@@ -1724,150 +1725,129 @@ static irqreturn_t stm32_cryp_irq(int irq, void *arg)
 	return IRQ_WAKE_THREAD;
 }
 
-static struct crypto_alg crypto_algs[] = {
-{
-	.cra_name		= "ecb(aes)",
-	.cra_driver_name	= "stm32-ecb-aes",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= stm32_cryp_cra_init,
-	.cra_ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.setkey		= stm32_cryp_aes_setkey,
-		.encrypt	= stm32_cryp_aes_ecb_encrypt,
-		.decrypt	= stm32_cryp_aes_ecb_decrypt,
-	}
+static struct skcipher_alg crypto_algs[] = {
+{
+	.base.cra_name		= "ecb(aes)",
+	.base.cra_driver_name	= "stm32-ecb-aes",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.init			= stm32_cryp_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= stm32_cryp_aes_setkey,
+	.encrypt		= stm32_cryp_aes_ecb_encrypt,
+	.decrypt		= stm32_cryp_aes_ecb_decrypt,
 },
 {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "stm32-cbc-aes",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= stm32_cryp_cra_init,
-	.cra_ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= stm32_cryp_aes_setkey,
-		.encrypt	= stm32_cryp_aes_cbc_encrypt,
-		.decrypt	= stm32_cryp_aes_cbc_decrypt,
-	}
+	.base.cra_name		= "cbc(aes)",
+	.base.cra_driver_name	= "stm32-cbc-aes",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.init			= stm32_cryp_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= stm32_cryp_aes_setkey,
+	.encrypt		= stm32_cryp_aes_cbc_encrypt,
+	.decrypt		= stm32_cryp_aes_cbc_decrypt,
 },
 {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "stm32-ctr-aes",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= stm32_cryp_cra_init,
-	.cra_ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= stm32_cryp_aes_setkey,
-		.encrypt	= stm32_cryp_aes_ctr_encrypt,
-		.decrypt	= stm32_cryp_aes_ctr_decrypt,
-	}
+	.base.cra_name		= "ctr(aes)",
+	.base.cra_driver_name	= "stm32-ctr-aes",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.init			= stm32_cryp_init_tfm,
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= stm32_cryp_aes_setkey,
+	.encrypt		= stm32_cryp_aes_ctr_encrypt,
+	.decrypt		= stm32_cryp_aes_ctr_decrypt,
 },
 {
-	.cra_name		= "ecb(des)",
-	.cra_driver_name	= "stm32-ecb-des",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= stm32_cryp_cra_init,
-	.cra_ablkcipher = {
-		.min_keysize	= DES_BLOCK_SIZE,
-		.max_keysize	= DES_BLOCK_SIZE,
-		.setkey		= stm32_cryp_des_setkey,
-		.encrypt	= stm32_cryp_des_ecb_encrypt,
-		.decrypt	= stm32_cryp_des_ecb_decrypt,
-	}
+	.base.cra_name		= "ecb(des)",
+	.base.cra_driver_name	= "stm32-ecb-des",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.init			= stm32_cryp_init_tfm,
+	.min_keysize		= DES_BLOCK_SIZE,
+	.max_keysize		= DES_BLOCK_SIZE,
+	.setkey			= stm32_cryp_des_setkey,
+	.encrypt		= stm32_cryp_des_ecb_encrypt,
+	.decrypt		= stm32_cryp_des_ecb_decrypt,
 },
 {
-	.cra_name		= "cbc(des)",
-	.cra_driver_name	= "stm32-cbc-des",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= stm32_cryp_cra_init,
-	.cra_ablkcipher = {
-		.min_keysize	= DES_BLOCK_SIZE,
-		.max_keysize	= DES_BLOCK_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= stm32_cryp_des_setkey,
-		.encrypt	= stm32_cryp_des_cbc_encrypt,
-		.decrypt	= stm32_cryp_des_cbc_decrypt,
-	}
+	.base.cra_name		= "cbc(des)",
+	.base.cra_driver_name	= "stm32-cbc-des",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.init			= stm32_cryp_init_tfm,
+	.min_keysize		= DES_BLOCK_SIZE,
+	.max_keysize		= DES_BLOCK_SIZE,
+	.ivsize			= DES_BLOCK_SIZE,
+	.setkey			= stm32_cryp_des_setkey,
+	.encrypt		= stm32_cryp_des_cbc_encrypt,
+	.decrypt		= stm32_cryp_des_cbc_decrypt,
 },
 {
-	.cra_name		= "ecb(des3_ede)",
-	.cra_driver_name	= "stm32-ecb-des3",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= stm32_cryp_cra_init,
-	.cra_ablkcipher = {
-		.min_keysize	= 3 * DES_BLOCK_SIZE,
-		.max_keysize	= 3 * DES_BLOCK_SIZE,
-		.setkey		= stm32_cryp_tdes_setkey,
-		.encrypt	= stm32_cryp_tdes_ecb_encrypt,
-		.decrypt	= stm32_cryp_tdes_ecb_decrypt,
-	}
+	.base.cra_name		= "ecb(des3_ede)",
+	.base.cra_driver_name	= "stm32-ecb-des3",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.init			= stm32_cryp_init_tfm,
+	.min_keysize		= 3 * DES_BLOCK_SIZE,
+	.max_keysize		= 3 * DES_BLOCK_SIZE,
+	.setkey			= stm32_cryp_tdes_setkey,
+	.encrypt		= stm32_cryp_tdes_ecb_encrypt,
+	.decrypt		= stm32_cryp_tdes_ecb_decrypt,
 },
 {
-	.cra_name		= "cbc(des3_ede)",
-	.cra_driver_name	= "stm32-cbc-des3",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
-	.cra_alignmask		= 0xf,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= stm32_cryp_cra_init,
-	.cra_ablkcipher = {
-		.min_keysize	= 3 * DES_BLOCK_SIZE,
-		.max_keysize	= 3 * DES_BLOCK_SIZE,
-		.ivsize		= DES_BLOCK_SIZE,
-		.setkey		= stm32_cryp_tdes_setkey,
-		.encrypt	= stm32_cryp_tdes_cbc_encrypt,
-		.decrypt	= stm32_cryp_tdes_cbc_decrypt,
-	}
+	.base.cra_name		= "cbc(des3_ede)",
+	.base.cra_driver_name	= "stm32-cbc-des3",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_ASYNC,
+	.base.cra_blocksize	= DES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+	.base.cra_alignmask	= 0xf,
+	.base.cra_module	= THIS_MODULE,
+
+	.init			= stm32_cryp_init_tfm,
+	.min_keysize		= 3 * DES_BLOCK_SIZE,
+	.max_keysize		= 3 * DES_BLOCK_SIZE,
+	.ivsize			= DES_BLOCK_SIZE,
+	.setkey			= stm32_cryp_tdes_setkey,
+	.encrypt		= stm32_cryp_tdes_cbc_encrypt,
+	.decrypt		= stm32_cryp_tdes_cbc_decrypt,
 },
 };
 
@@ -2010,7 +1990,7 @@ static int stm32_cryp_probe(struct platform_device *pdev)
 		goto err_engine2;
 	}
 
-	ret = crypto_register_algs(crypto_algs, ARRAY_SIZE(crypto_algs));
+	ret = crypto_register_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs));
 	if (ret) {
 		dev_err(dev, "Could not register algs\n");
 		goto err_algs;
@@ -2027,7 +2007,7 @@ static int stm32_cryp_probe(struct platform_device *pdev)
 	return 0;
 
 err_aead_algs:
-	crypto_unregister_algs(crypto_algs, ARRAY_SIZE(crypto_algs));
+	crypto_unregister_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs));
 err_algs:
 err_engine2:
 	crypto_engine_exit(cryp->engine);
@@ -2059,7 +2039,7 @@ static int stm32_cryp_remove(struct platform_device *pdev)
 		return ret;
 
 	crypto_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs));
-	crypto_unregister_algs(crypto_algs, ARRAY_SIZE(crypto_algs));
+	crypto_unregister_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs));
 
 	crypto_engine_exit(cryp->engine);
 
diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index cfc8e0e37bee..167b80eec437 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -518,10 +518,10 @@ static int stm32_hash_dma_init(struct stm32_hash_dev *hdev)
 	dma_conf.dst_maxburst = hdev->dma_maxburst;
 	dma_conf.device_fc = false;
 
-	hdev->dma_lch = dma_request_slave_channel(hdev->dev, "in");
-	if (!hdev->dma_lch) {
+	hdev->dma_lch = dma_request_chan(hdev->dev, "in");
+	if (IS_ERR(hdev->dma_lch)) {
 		dev_err(hdev->dev, "Couldn't acquire a slave DMA channel.\n");
-		return -EBUSY;
+		return PTR_ERR(hdev->dma_lch);
 	}
 
 	err = dmaengine_slave_config(hdev->dma_lch, &dma_conf);
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 56e3068c9947..9c6db7f698c4 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -35,7 +35,7 @@
 #include <crypto/md5.h>
 #include <crypto/internal/aead.h>
 #include <crypto/authenc.h>
-#include <crypto/skcipher.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/hash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
@@ -914,7 +914,6 @@ static int aead_setkey(struct crypto_aead *authenc,
 	return 0;
 
 badkey:
-	crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
@@ -929,11 +928,11 @@ static int aead_des3_setkey(struct crypto_aead *authenc,
 
 	err = crypto_authenc_extractkeys(&keys, key, keylen);
 	if (unlikely(err))
-		goto badkey;
+		goto out;
 
 	err = -EINVAL;
 	if (keys.authkeylen + keys.enckeylen > TALITOS_MAX_KEY_SIZE)
-		goto badkey;
+		goto out;
 
 	err = verify_aead_des3_key(authenc, keys.enckey, keys.enckeylen);
 	if (err)
@@ -954,10 +953,6 @@ static int aead_des3_setkey(struct crypto_aead *authenc,
 out:
 	memzero_explicit(&keys, sizeof(keys));
 	return err;
-
-badkey:
-	crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	goto out;
 }
 
 static void talitos_sg_unmap(struct device *dev,
@@ -1490,10 +1485,10 @@ static int aead_decrypt(struct aead_request *req)
 	return ipsec_esp(edesc, req, false, ipsec_esp_decrypt_swauth_done);
 }
 
-static int ablkcipher_setkey(struct crypto_ablkcipher *cipher,
+static int skcipher_setkey(struct crypto_skcipher *cipher,
 			     const u8 *key, unsigned int keylen)
 {
-	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct talitos_ctx *ctx = crypto_skcipher_ctx(cipher);
 	struct device *dev = ctx->dev;
 
 	if (ctx->keylen)
@@ -1507,39 +1502,37 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher,
 	return 0;
 }
 
-static int ablkcipher_des_setkey(struct crypto_ablkcipher *cipher,
+static int skcipher_des_setkey(struct crypto_skcipher *cipher,
 				 const u8 *key, unsigned int keylen)
 {
-	return verify_ablkcipher_des_key(cipher, key) ?:
-	       ablkcipher_setkey(cipher, key, keylen);
+	return verify_skcipher_des_key(cipher, key) ?:
+	       skcipher_setkey(cipher, key, keylen);
 }
 
-static int ablkcipher_des3_setkey(struct crypto_ablkcipher *cipher,
+static int skcipher_des3_setkey(struct crypto_skcipher *cipher,
 				  const u8 *key, unsigned int keylen)
 {
-	return verify_ablkcipher_des3_key(cipher, key) ?:
-	       ablkcipher_setkey(cipher, key, keylen);
+	return verify_skcipher_des3_key(cipher, key) ?:
+	       skcipher_setkey(cipher, key, keylen);
 }
 
-static int ablkcipher_aes_setkey(struct crypto_ablkcipher *cipher,
+static int skcipher_aes_setkey(struct crypto_skcipher *cipher,
 				  const u8 *key, unsigned int keylen)
 {
 	if (keylen == AES_KEYSIZE_128 || keylen == AES_KEYSIZE_192 ||
 	    keylen == AES_KEYSIZE_256)
-		return ablkcipher_setkey(cipher, key, keylen);
-
-	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return skcipher_setkey(cipher, key, keylen);
 
 	return -EINVAL;
 }
 
 static void common_nonsnoop_unmap(struct device *dev,
 				  struct talitos_edesc *edesc,
-				  struct ablkcipher_request *areq)
+				  struct skcipher_request *areq)
 {
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
 
-	talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->nbytes, 0);
+	talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->cryptlen, 0);
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE);
 
 	if (edesc->dma_len)
@@ -1547,20 +1540,20 @@ static void common_nonsnoop_unmap(struct device *dev,
 				 DMA_BIDIRECTIONAL);
 }
 
-static void ablkcipher_done(struct device *dev,
+static void skcipher_done(struct device *dev,
 			    struct talitos_desc *desc, void *context,
 			    int err)
 {
-	struct ablkcipher_request *areq = context;
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	unsigned int ivsize = crypto_ablkcipher_ivsize(cipher);
+	struct skcipher_request *areq = context;
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_skcipher_ctx(cipher);
+	unsigned int ivsize = crypto_skcipher_ivsize(cipher);
 	struct talitos_edesc *edesc;
 
 	edesc = container_of(desc, struct talitos_edesc, desc);
 
 	common_nonsnoop_unmap(dev, edesc, areq);
-	memcpy(areq->info, ctx->iv, ivsize);
+	memcpy(areq->iv, ctx->iv, ivsize);
 
 	kfree(edesc);
 
@@ -1568,17 +1561,17 @@ static void ablkcipher_done(struct device *dev,
 }
 
 static int common_nonsnoop(struct talitos_edesc *edesc,
-			   struct ablkcipher_request *areq,
+			   struct skcipher_request *areq,
 			   void (*callback) (struct device *dev,
 					     struct talitos_desc *desc,
 					     void *context, int error))
 {
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_skcipher_ctx(cipher);
 	struct device *dev = ctx->dev;
 	struct talitos_desc *desc = &edesc->desc;
-	unsigned int cryptlen = areq->nbytes;
-	unsigned int ivsize = crypto_ablkcipher_ivsize(cipher);
+	unsigned int cryptlen = areq->cryptlen;
+	unsigned int ivsize = crypto_skcipher_ivsize(cipher);
 	int sg_count, ret;
 	bool sync_needed = false;
 	struct talitos_private *priv = dev_get_drvdata(dev);
@@ -1638,65 +1631,65 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
 	return ret;
 }
 
-static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request *
+static struct talitos_edesc *skcipher_edesc_alloc(struct skcipher_request *
 						    areq, bool encrypt)
 {
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	unsigned int ivsize = crypto_ablkcipher_ivsize(cipher);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_skcipher_ctx(cipher);
+	unsigned int ivsize = crypto_skcipher_ivsize(cipher);
 
 	return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst,
-				   areq->info, 0, areq->nbytes, 0, ivsize, 0,
+				   areq->iv, 0, areq->cryptlen, 0, ivsize, 0,
 				   areq->base.flags, encrypt);
 }
 
-static int ablkcipher_encrypt(struct ablkcipher_request *areq)
+static int skcipher_encrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_skcipher_ctx(cipher);
 	struct talitos_edesc *edesc;
 	unsigned int blocksize =
-			crypto_tfm_alg_blocksize(crypto_ablkcipher_tfm(cipher));
+			crypto_tfm_alg_blocksize(crypto_skcipher_tfm(cipher));
 
-	if (!areq->nbytes)
+	if (!areq->cryptlen)
 		return 0;
 
-	if (areq->nbytes % blocksize)
+	if (areq->cryptlen % blocksize)
 		return -EINVAL;
 
 	/* allocate extended descriptor */
-	edesc = ablkcipher_edesc_alloc(areq, true);
+	edesc = skcipher_edesc_alloc(areq, true);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
 	/* set encrypt */
 	edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_MODE0_ENCRYPT;
 
-	return common_nonsnoop(edesc, areq, ablkcipher_done);
+	return common_nonsnoop(edesc, areq, skcipher_done);
 }
 
-static int ablkcipher_decrypt(struct ablkcipher_request *areq)
+static int skcipher_decrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_skcipher_ctx(cipher);
 	struct talitos_edesc *edesc;
 	unsigned int blocksize =
-			crypto_tfm_alg_blocksize(crypto_ablkcipher_tfm(cipher));
+			crypto_tfm_alg_blocksize(crypto_skcipher_tfm(cipher));
 
-	if (!areq->nbytes)
+	if (!areq->cryptlen)
 		return 0;
 
-	if (areq->nbytes % blocksize)
+	if (areq->cryptlen % blocksize)
 		return -EINVAL;
 
 	/* allocate extended descriptor */
-	edesc = ablkcipher_edesc_alloc(areq, false);
+	edesc = skcipher_edesc_alloc(areq, false);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
 	edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
 
-	return common_nonsnoop(edesc, areq, ablkcipher_done);
+	return common_nonsnoop(edesc, areq, skcipher_done);
 }
 
 static void common_nonsnoop_hash_unmap(struct device *dev,
@@ -1704,6 +1697,7 @@ static void common_nonsnoop_hash_unmap(struct device *dev,
 				       struct ahash_request *areq)
 {
 	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	bool is_sec1 = has_ftr_sec1(priv);
 	struct talitos_desc *desc = &edesc->desc;
@@ -1714,6 +1708,9 @@ static void common_nonsnoop_hash_unmap(struct device *dev,
 	if (desc->next_desc &&
 	    desc->ptr[5].ptr != desc2->ptr[5].ptr)
 		unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE);
+	if (req_ctx->last)
+		memcpy(areq->result, req_ctx->hw_context,
+		       crypto_ahash_digestsize(tfm));
 
 	if (req_ctx->psrc)
 		talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
@@ -1845,7 +1842,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 	if (req_ctx->last)
 		map_single_talitos_ptr(dev, &desc->ptr[5],
 				       crypto_ahash_digestsize(tfm),
-				       areq->result, DMA_FROM_DEVICE);
+				       req_ctx->hw_context, DMA_FROM_DEVICE);
 	else
 		map_single_talitos_ptr_nosync(dev, &desc->ptr[5],
 					      req_ctx->hw_context_size,
@@ -2230,10 +2227,8 @@ static int ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 		/* Must get the hash of the long key */
 		ret = keyhash(tfm, key, keylen, hash);
 
-		if (ret) {
-			crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		if (ret)
 			return -EINVAL;
-		}
 
 		keysize = digestsize;
 		memcpy(ctx->key, hash, digestsize);
@@ -2253,7 +2248,7 @@ struct talitos_alg_template {
 	u32 type;
 	u32 priority;
 	union {
-		struct crypto_alg crypto;
+		struct skcipher_alg skcipher;
 		struct ahash_alg hash;
 		struct aead_alg aead;
 	} alg;
@@ -2698,123 +2693,102 @@ static struct talitos_alg_template driver_algs[] = {
 				     DESC_HDR_MODE1_MDEU_PAD |
 				     DESC_HDR_MODE1_MDEU_MD5_HMAC,
 	},
-	/* ABLKCIPHER algorithms. */
-	{	.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.alg.crypto = {
-			.cra_name = "ecb(aes)",
-			.cra_driver_name = "ecb-aes-talitos",
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-				     CRYPTO_ALG_ASYNC,
-			.cra_ablkcipher = {
-				.min_keysize = AES_MIN_KEY_SIZE,
-				.max_keysize = AES_MAX_KEY_SIZE,
-				.setkey = ablkcipher_aes_setkey,
-			}
+	/* SKCIPHER algorithms. */
+	{	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+		.alg.skcipher = {
+			.base.cra_name = "ecb(aes)",
+			.base.cra_driver_name = "ecb-aes-talitos",
+			.base.cra_blocksize = AES_BLOCK_SIZE,
+			.base.cra_flags = CRYPTO_ALG_ASYNC,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.setkey = skcipher_aes_setkey,
 		},
 		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
 				     DESC_HDR_SEL0_AESU,
 	},
-	{	.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.alg.crypto = {
-			.cra_name = "cbc(aes)",
-			.cra_driver_name = "cbc-aes-talitos",
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-                                     CRYPTO_ALG_ASYNC,
-			.cra_ablkcipher = {
-				.min_keysize = AES_MIN_KEY_SIZE,
-				.max_keysize = AES_MAX_KEY_SIZE,
-				.ivsize = AES_BLOCK_SIZE,
-				.setkey = ablkcipher_aes_setkey,
-			}
+	{	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+		.alg.skcipher = {
+			.base.cra_name = "cbc(aes)",
+			.base.cra_driver_name = "cbc-aes-talitos",
+			.base.cra_blocksize = AES_BLOCK_SIZE,
+			.base.cra_flags = CRYPTO_ALG_ASYNC,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			.setkey = skcipher_aes_setkey,
 		},
 		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
 				     DESC_HDR_SEL0_AESU |
 				     DESC_HDR_MODE0_AESU_CBC,
 	},
-	{	.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.alg.crypto = {
-			.cra_name = "ctr(aes)",
-			.cra_driver_name = "ctr-aes-talitos",
-			.cra_blocksize = 1,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-				     CRYPTO_ALG_ASYNC,
-			.cra_ablkcipher = {
-				.min_keysize = AES_MIN_KEY_SIZE,
-				.max_keysize = AES_MAX_KEY_SIZE,
-				.ivsize = AES_BLOCK_SIZE,
-				.setkey = ablkcipher_aes_setkey,
-			}
+	{	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+		.alg.skcipher = {
+			.base.cra_name = "ctr(aes)",
+			.base.cra_driver_name = "ctr-aes-talitos",
+			.base.cra_blocksize = 1,
+			.base.cra_flags = CRYPTO_ALG_ASYNC,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			.setkey = skcipher_aes_setkey,
 		},
 		.desc_hdr_template = DESC_HDR_TYPE_AESU_CTR_NONSNOOP |
 				     DESC_HDR_SEL0_AESU |
 				     DESC_HDR_MODE0_AESU_CTR,
 	},
-	{	.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.alg.crypto = {
-			.cra_name = "ecb(des)",
-			.cra_driver_name = "ecb-des-talitos",
-			.cra_blocksize = DES_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-				     CRYPTO_ALG_ASYNC,
-			.cra_ablkcipher = {
-				.min_keysize = DES_KEY_SIZE,
-				.max_keysize = DES_KEY_SIZE,
-				.setkey = ablkcipher_des_setkey,
-			}
+	{	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+		.alg.skcipher = {
+			.base.cra_name = "ecb(des)",
+			.base.cra_driver_name = "ecb-des-talitos",
+			.base.cra_blocksize = DES_BLOCK_SIZE,
+			.base.cra_flags = CRYPTO_ALG_ASYNC,
+			.min_keysize = DES_KEY_SIZE,
+			.max_keysize = DES_KEY_SIZE,
+			.setkey = skcipher_des_setkey,
 		},
 		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
 				     DESC_HDR_SEL0_DEU,
 	},
-	{	.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.alg.crypto = {
-			.cra_name = "cbc(des)",
-			.cra_driver_name = "cbc-des-talitos",
-			.cra_blocksize = DES_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-				     CRYPTO_ALG_ASYNC,
-			.cra_ablkcipher = {
-				.min_keysize = DES_KEY_SIZE,
-				.max_keysize = DES_KEY_SIZE,
-				.ivsize = DES_BLOCK_SIZE,
-				.setkey = ablkcipher_des_setkey,
-			}
+	{	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+		.alg.skcipher = {
+			.base.cra_name = "cbc(des)",
+			.base.cra_driver_name = "cbc-des-talitos",
+			.base.cra_blocksize = DES_BLOCK_SIZE,
+			.base.cra_flags = CRYPTO_ALG_ASYNC,
+			.min_keysize = DES_KEY_SIZE,
+			.max_keysize = DES_KEY_SIZE,
+			.ivsize = DES_BLOCK_SIZE,
+			.setkey = skcipher_des_setkey,
 		},
 		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
 				     DESC_HDR_SEL0_DEU |
 				     DESC_HDR_MODE0_DEU_CBC,
 	},
-	{	.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.alg.crypto = {
-			.cra_name = "ecb(des3_ede)",
-			.cra_driver_name = "ecb-3des-talitos",
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-				     CRYPTO_ALG_ASYNC,
-			.cra_ablkcipher = {
-				.min_keysize = DES3_EDE_KEY_SIZE,
-				.max_keysize = DES3_EDE_KEY_SIZE,
-				.setkey = ablkcipher_des3_setkey,
-			}
+	{	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+		.alg.skcipher = {
+			.base.cra_name = "ecb(des3_ede)",
+			.base.cra_driver_name = "ecb-3des-talitos",
+			.base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.base.cra_flags = CRYPTO_ALG_ASYNC,
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.setkey = skcipher_des3_setkey,
 		},
 		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
 				     DESC_HDR_SEL0_DEU |
 				     DESC_HDR_MODE0_DEU_3DES,
 	},
-	{	.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.alg.crypto = {
-			.cra_name = "cbc(des3_ede)",
-			.cra_driver_name = "cbc-3des-talitos",
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-                                     CRYPTO_ALG_ASYNC,
-			.cra_ablkcipher = {
-				.min_keysize = DES3_EDE_KEY_SIZE,
-				.max_keysize = DES3_EDE_KEY_SIZE,
-				.ivsize = DES3_EDE_BLOCK_SIZE,
-				.setkey = ablkcipher_des3_setkey,
-			}
+	{	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+		.alg.skcipher = {
+			.base.cra_name = "cbc(des3_ede)",
+			.base.cra_driver_name = "cbc-3des-talitos",
+			.base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.base.cra_flags = CRYPTO_ALG_ASYNC,
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.setkey = skcipher_des3_setkey,
 		},
 		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
 			             DESC_HDR_SEL0_DEU |
@@ -3032,46 +3006,45 @@ static int talitos_init_common(struct talitos_ctx *ctx,
 	return 0;
 }
 
-static int talitos_cra_init(struct crypto_tfm *tfm)
+static int talitos_cra_init_aead(struct crypto_aead *tfm)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
+	struct aead_alg *alg = crypto_aead_alg(tfm);
 	struct talitos_crypto_alg *talitos_alg;
-	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct talitos_ctx *ctx = crypto_aead_ctx(tfm);
 
-	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH)
-		talitos_alg = container_of(__crypto_ahash_alg(alg),
-					   struct talitos_crypto_alg,
-					   algt.alg.hash);
-	else
-		talitos_alg = container_of(alg, struct talitos_crypto_alg,
-					   algt.alg.crypto);
+	talitos_alg = container_of(alg, struct talitos_crypto_alg,
+				   algt.alg.aead);
 
 	return talitos_init_common(ctx, talitos_alg);
 }
 
-static int talitos_cra_init_aead(struct crypto_aead *tfm)
+static int talitos_cra_init_skcipher(struct crypto_skcipher *tfm)
 {
-	struct aead_alg *alg = crypto_aead_alg(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct talitos_crypto_alg *talitos_alg;
-	struct talitos_ctx *ctx = crypto_aead_ctx(tfm);
+	struct talitos_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	talitos_alg = container_of(alg, struct talitos_crypto_alg,
-				   algt.alg.aead);
+				   algt.alg.skcipher);
 
 	return talitos_init_common(ctx, talitos_alg);
 }
 
 static int talitos_cra_init_ahash(struct crypto_tfm *tfm)
 {
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct talitos_crypto_alg *talitos_alg;
 	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	talitos_cra_init(tfm);
+	talitos_alg = container_of(__crypto_ahash_alg(alg),
+				   struct talitos_crypto_alg,
+				   algt.alg.hash);
 
 	ctx->keylen = 0;
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct talitos_ahash_req_ctx));
 
-	return 0;
+	return talitos_init_common(ctx, talitos_alg);
 }
 
 static void talitos_cra_exit(struct crypto_tfm *tfm)
@@ -3112,7 +3085,8 @@ static int talitos_remove(struct platform_device *ofdev)
 
 	list_for_each_entry_safe(t_alg, n, &priv->alg_list, entry) {
 		switch (t_alg->algt.type) {
-		case CRYPTO_ALG_TYPE_ABLKCIPHER:
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			crypto_unregister_skcipher(&t_alg->algt.alg.skcipher);
 			break;
 		case CRYPTO_ALG_TYPE_AEAD:
 			crypto_unregister_aead(&t_alg->algt.alg.aead);
@@ -3156,15 +3130,14 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 	t_alg->algt = *template;
 
 	switch (t_alg->algt.type) {
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		alg = &t_alg->algt.alg.crypto;
-		alg->cra_init = talitos_cra_init;
+	case CRYPTO_ALG_TYPE_SKCIPHER:
+		alg = &t_alg->algt.alg.skcipher.base;
 		alg->cra_exit = talitos_cra_exit;
-		alg->cra_type = &crypto_ablkcipher_type;
-		alg->cra_ablkcipher.setkey = alg->cra_ablkcipher.setkey ?:
-					     ablkcipher_setkey;
-		alg->cra_ablkcipher.encrypt = ablkcipher_encrypt;
-		alg->cra_ablkcipher.decrypt = ablkcipher_decrypt;
+		t_alg->algt.alg.skcipher.init = talitos_cra_init_skcipher;
+		t_alg->algt.alg.skcipher.setkey =
+			t_alg->algt.alg.skcipher.setkey ?: skcipher_setkey;
+		t_alg->algt.alg.skcipher.encrypt = skcipher_encrypt;
+		t_alg->algt.alg.skcipher.decrypt = skcipher_decrypt;
 		break;
 	case CRYPTO_ALG_TYPE_AEAD:
 		alg = &t_alg->algt.alg.aead.base;
@@ -3461,10 +3434,10 @@ static int talitos_probe(struct platform_device *ofdev)
 			}
 
 			switch (t_alg->algt.type) {
-			case CRYPTO_ALG_TYPE_ABLKCIPHER:
-				err = crypto_register_alg(
-						&t_alg->algt.alg.crypto);
-				alg = &t_alg->algt.alg.crypto;
+			case CRYPTO_ALG_TYPE_SKCIPHER:
+				err = crypto_register_skcipher(
+						&t_alg->algt.alg.skcipher);
+				alg = &t_alg->algt.alg.skcipher.base;
 				break;
 
 			case CRYPTO_ALG_TYPE_AEAD:
diff --git a/drivers/crypto/ux500/Kconfig b/drivers/crypto/ux500/Kconfig
index b1c6f739f77b..f56d65c56ccf 100644
--- a/drivers/crypto/ux500/Kconfig
+++ b/drivers/crypto/ux500/Kconfig
@@ -8,21 +8,21 @@ config CRYPTO_DEV_UX500_CRYP
 	tristate "UX500 crypto driver for CRYP block"
 	depends on CRYPTO_DEV_UX500
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	help
-        This selects the crypto driver for the UX500_CRYP hardware. It supports
-        AES-ECB, CBC and CTR with keys sizes of 128, 192 and 256 bit sizes.
+	This selects the crypto driver for the UX500_CRYP hardware. It supports
+	AES-ECB, CBC and CTR with keys sizes of 128, 192 and 256 bit sizes.
 
 config CRYPTO_DEV_UX500_HASH
-        tristate "UX500 crypto driver for HASH block"
-        depends on CRYPTO_DEV_UX500
-        select CRYPTO_HASH
+	tristate "UX500 crypto driver for HASH block"
+	depends on CRYPTO_DEV_UX500
+	select CRYPTO_HASH
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
-        help
-          This selects the hash driver for the UX500_HASH hardware.
-          Depends on UX500/STM DMA if running in DMA mode.
+	help
+	  This selects the hash driver for the UX500_HASH hardware.
+	  Depends on UX500/STM DMA if running in DMA mode.
 
 config CRYPTO_DEV_UX500_DEBUG
 	bool "Activate ux500 platform debug-mode for crypto and hash block"
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index 1628ae7a1467..800dfc4d16c4 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -30,6 +30,7 @@
 #include <crypto/algapi.h>
 #include <crypto/ctr.h>
 #include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 
 #include <linux/platform_data/crypto-ux500.h>
@@ -828,10 +829,10 @@ static int get_nents(struct scatterlist *sg, int nbytes)
 	return nents;
 }
 
-static int ablk_dma_crypt(struct ablkcipher_request *areq)
+static int ablk_dma_crypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct cryp_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
 	struct cryp_device_data *device_data;
 
 	int bytes_written = 0;
@@ -840,8 +841,8 @@ static int ablk_dma_crypt(struct ablkcipher_request *areq)
 
 	pr_debug(DEV_DBG_NAME " [%s]", __func__);
 
-	ctx->datalen = areq->nbytes;
-	ctx->outlen = areq->nbytes;
+	ctx->datalen = areq->cryptlen;
+	ctx->outlen = areq->cryptlen;
 
 	ret = cryp_get_device_data(ctx, &device_data);
 	if (ret)
@@ -885,11 +886,11 @@ out:
 	return 0;
 }
 
-static int ablk_crypt(struct ablkcipher_request *areq)
+static int ablk_crypt(struct skcipher_request *areq)
 {
-	struct ablkcipher_walk walk;
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct cryp_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct skcipher_walk walk;
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
 	struct cryp_device_data *device_data;
 	unsigned long src_paddr;
 	unsigned long dst_paddr;
@@ -902,21 +903,20 @@ static int ablk_crypt(struct ablkcipher_request *areq)
 	if (ret)
 		goto out;
 
-	ablkcipher_walk_init(&walk, areq->dst, areq->src, areq->nbytes);
-	ret = ablkcipher_walk_phys(areq, &walk);
+	ret = skcipher_walk_async(&walk, areq);
 
 	if (ret) {
-		pr_err(DEV_DBG_NAME "[%s]: ablkcipher_walk_phys() failed!",
+		pr_err(DEV_DBG_NAME "[%s]: skcipher_walk_async() failed!",
 			__func__);
 		goto out;
 	}
 
 	while ((nbytes = walk.nbytes) > 0) {
 		ctx->iv = walk.iv;
-		src_paddr = (page_to_phys(walk.src.page) + walk.src.offset);
+		src_paddr = (page_to_phys(walk.src.phys.page) + walk.src.phys.offset);
 		ctx->indata = phys_to_virt(src_paddr);
 
-		dst_paddr = (page_to_phys(walk.dst.page) + walk.dst.offset);
+		dst_paddr = (page_to_phys(walk.dst.phys.page) + walk.dst.phys.offset);
 		ctx->outdata = phys_to_virt(dst_paddr);
 
 		ctx->datalen = nbytes - (nbytes % ctx->blocksize);
@@ -926,11 +926,10 @@ static int ablk_crypt(struct ablkcipher_request *areq)
 			goto out;
 
 		nbytes -= ctx->datalen;
-		ret = ablkcipher_walk_done(areq, &walk, nbytes);
+		ret = skcipher_walk_done(&walk, nbytes);
 		if (ret)
 			goto out;
 	}
-	ablkcipher_walk_complete(&walk);
 
 out:
 	/* Release the device */
@@ -948,11 +947,10 @@ out:
 	return ret;
 }
 
-static int aes_ablkcipher_setkey(struct crypto_ablkcipher *cipher,
+static int aes_skcipher_setkey(struct crypto_skcipher *cipher,
 				 const u8 *key, unsigned int keylen)
 {
-	struct cryp_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	u32 *flags = &cipher->base.crt_flags;
+	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
 
 	pr_debug(DEV_DBG_NAME " [%s]", __func__);
 
@@ -971,7 +969,6 @@ static int aes_ablkcipher_setkey(struct crypto_ablkcipher *cipher,
 
 	default:
 		pr_err(DEV_DBG_NAME "[%s]: Unknown keylen!", __func__);
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
 
@@ -983,15 +980,15 @@ static int aes_ablkcipher_setkey(struct crypto_ablkcipher *cipher,
 	return 0;
 }
 
-static int des_ablkcipher_setkey(struct crypto_ablkcipher *cipher,
+static int des_skcipher_setkey(struct crypto_skcipher *cipher,
 				 const u8 *key, unsigned int keylen)
 {
-	struct cryp_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
 	int err;
 
 	pr_debug(DEV_DBG_NAME " [%s]", __func__);
 
-	err = verify_ablkcipher_des_key(cipher, key);
+	err = verify_skcipher_des_key(cipher, key);
 	if (err)
 		return err;
 
@@ -1002,15 +999,15 @@ static int des_ablkcipher_setkey(struct crypto_ablkcipher *cipher,
 	return 0;
 }
 
-static int des3_ablkcipher_setkey(struct crypto_ablkcipher *cipher,
+static int des3_skcipher_setkey(struct crypto_skcipher *cipher,
 				  const u8 *key, unsigned int keylen)
 {
-	struct cryp_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
 	int err;
 
 	pr_debug(DEV_DBG_NAME " [%s]", __func__);
 
-	err = verify_ablkcipher_des3_key(cipher, key);
+	err = verify_skcipher_des3_key(cipher, key);
 	if (err)
 		return err;
 
@@ -1021,10 +1018,10 @@ static int des3_ablkcipher_setkey(struct crypto_ablkcipher *cipher,
 	return 0;
 }
 
-static int cryp_blk_encrypt(struct ablkcipher_request *areq)
+static int cryp_blk_encrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct cryp_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
 
 	pr_debug(DEV_DBG_NAME " [%s]", __func__);
 
@@ -1039,10 +1036,10 @@ static int cryp_blk_encrypt(struct ablkcipher_request *areq)
 	return ablk_crypt(areq);
 }
 
-static int cryp_blk_decrypt(struct ablkcipher_request *areq)
+static int cryp_blk_decrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct cryp_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
 
 	pr_debug(DEV_DBG_NAME " [%s]", __func__);
 
@@ -1058,19 +1055,19 @@ static int cryp_blk_decrypt(struct ablkcipher_request *areq)
 
 struct cryp_algo_template {
 	enum cryp_algo_mode algomode;
-	struct crypto_alg crypto;
+	struct skcipher_alg skcipher;
 };
 
-static int cryp_cra_init(struct crypto_tfm *tfm)
+static int cryp_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct cryp_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_alg *alg = tfm->__crt_alg;
+	struct cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct cryp_algo_template *cryp_alg = container_of(alg,
 			struct cryp_algo_template,
-			crypto);
+			skcipher);
 
 	ctx->config.algomode = cryp_alg->algomode;
-	ctx->blocksize = crypto_tfm_alg_blocksize(tfm);
+	ctx->blocksize = crypto_skcipher_blocksize(tfm);
 
 	return 0;
 }
@@ -1078,205 +1075,147 @@ static int cryp_cra_init(struct crypto_tfm *tfm)
 static struct cryp_algo_template cryp_algs[] = {
 	{
 		.algomode = CRYP_ALGO_AES_ECB,
-		.crypto = {
-			.cra_name = "aes",
-			.cra_driver_name = "aes-ux500",
-			.cra_priority =	300,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-					CRYPTO_ALG_ASYNC,
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct cryp_ctx),
-			.cra_alignmask = 3,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_init = cryp_cra_init,
-			.cra_module = THIS_MODULE,
-			.cra_u = {
-				.ablkcipher = {
-					.min_keysize = AES_MIN_KEY_SIZE,
-					.max_keysize = AES_MAX_KEY_SIZE,
-					.setkey = aes_ablkcipher_setkey,
-					.encrypt = cryp_blk_encrypt,
-					.decrypt = cryp_blk_decrypt
-				}
-			}
-		}
-	},
-	{
-		.algomode = CRYP_ALGO_AES_ECB,
-		.crypto = {
-			.cra_name = "ecb(aes)",
-			.cra_driver_name = "ecb-aes-ux500",
-			.cra_priority = 300,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-					CRYPTO_ALG_ASYNC,
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct cryp_ctx),
-			.cra_alignmask = 3,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_init = cryp_cra_init,
-			.cra_module = THIS_MODULE,
-			.cra_u = {
-				.ablkcipher = {
-					.min_keysize = AES_MIN_KEY_SIZE,
-					.max_keysize = AES_MAX_KEY_SIZE,
-					.setkey = aes_ablkcipher_setkey,
-					.encrypt = cryp_blk_encrypt,
-					.decrypt = cryp_blk_decrypt,
-				}
-			}
+		.skcipher = {
+			.base.cra_name		= "ecb(aes)",
+			.base.cra_driver_name	= "ecb-aes-ux500",
+			.base.cra_priority	= 300,
+			.base.cra_flags		= CRYPTO_ALG_ASYNC,
+			.base.cra_blocksize	= AES_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
+			.base.cra_alignmask	= 3,
+			.base.cra_module	= THIS_MODULE,
+
+			.min_keysize		= AES_MIN_KEY_SIZE,
+			.max_keysize		= AES_MAX_KEY_SIZE,
+			.setkey			= aes_skcipher_setkey,
+			.encrypt		= cryp_blk_encrypt,
+			.decrypt		= cryp_blk_decrypt,
+			.init			= cryp_init_tfm,
 		}
 	},
 	{
 		.algomode = CRYP_ALGO_AES_CBC,
-		.crypto = {
-			.cra_name = "cbc(aes)",
-			.cra_driver_name = "cbc-aes-ux500",
-			.cra_priority = 300,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-					CRYPTO_ALG_ASYNC,
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct cryp_ctx),
-			.cra_alignmask = 3,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_init = cryp_cra_init,
-			.cra_module = THIS_MODULE,
-			.cra_u = {
-				.ablkcipher = {
-					.min_keysize = AES_MIN_KEY_SIZE,
-					.max_keysize = AES_MAX_KEY_SIZE,
-					.setkey = aes_ablkcipher_setkey,
-					.encrypt = cryp_blk_encrypt,
-					.decrypt = cryp_blk_decrypt,
-					.ivsize = AES_BLOCK_SIZE,
-				}
-			}
+		.skcipher = {
+			.base.cra_name		= "cbc(aes)",
+			.base.cra_driver_name	= "cbc-aes-ux500",
+			.base.cra_priority	= 300,
+			.base.cra_flags		= CRYPTO_ALG_ASYNC,
+			.base.cra_blocksize	= AES_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
+			.base.cra_alignmask	= 3,
+			.base.cra_module	= THIS_MODULE,
+
+			.min_keysize		= AES_MIN_KEY_SIZE,
+			.max_keysize		= AES_MAX_KEY_SIZE,
+			.setkey			= aes_skcipher_setkey,
+			.encrypt		= cryp_blk_encrypt,
+			.decrypt		= cryp_blk_decrypt,
+			.init			= cryp_init_tfm,
+			.ivsize			= AES_BLOCK_SIZE,
 		}
 	},
 	{
 		.algomode = CRYP_ALGO_AES_CTR,
-		.crypto = {
-			.cra_name = "ctr(aes)",
-			.cra_driver_name = "ctr-aes-ux500",
-			.cra_priority = 300,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-						CRYPTO_ALG_ASYNC,
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct cryp_ctx),
-			.cra_alignmask = 3,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_init = cryp_cra_init,
-			.cra_module = THIS_MODULE,
-			.cra_u = {
-				.ablkcipher = {
-					.min_keysize = AES_MIN_KEY_SIZE,
-					.max_keysize = AES_MAX_KEY_SIZE,
-					.setkey = aes_ablkcipher_setkey,
-					.encrypt = cryp_blk_encrypt,
-					.decrypt = cryp_blk_decrypt,
-					.ivsize = AES_BLOCK_SIZE,
-				}
-			}
+		.skcipher = {
+			.base.cra_name		= "ctr(aes)",
+			.base.cra_driver_name	= "ctr-aes-ux500",
+			.base.cra_priority	= 300,
+			.base.cra_flags		= CRYPTO_ALG_ASYNC,
+			.base.cra_blocksize	= 1,
+			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
+			.base.cra_alignmask	= 3,
+			.base.cra_module	= THIS_MODULE,
+
+			.min_keysize		= AES_MIN_KEY_SIZE,
+			.max_keysize		= AES_MAX_KEY_SIZE,
+			.setkey			= aes_skcipher_setkey,
+			.encrypt		= cryp_blk_encrypt,
+			.decrypt		= cryp_blk_decrypt,
+			.init			= cryp_init_tfm,
+			.ivsize			= AES_BLOCK_SIZE,
+			.chunksize		= AES_BLOCK_SIZE,
 		}
 	},
 	{
 		.algomode = CRYP_ALGO_DES_ECB,
-		.crypto = {
-			.cra_name = "ecb(des)",
-			.cra_driver_name = "ecb-des-ux500",
-			.cra_priority = 300,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-					CRYPTO_ALG_ASYNC,
-			.cra_blocksize = DES_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct cryp_ctx),
-			.cra_alignmask = 3,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_init = cryp_cra_init,
-			.cra_module = THIS_MODULE,
-			.cra_u = {
-				.ablkcipher = {
-					.min_keysize = DES_KEY_SIZE,
-					.max_keysize = DES_KEY_SIZE,
-					.setkey = des_ablkcipher_setkey,
-					.encrypt = cryp_blk_encrypt,
-					.decrypt = cryp_blk_decrypt,
-				}
-			}
+		.skcipher = {
+			.base.cra_name		= "ecb(des)",
+			.base.cra_driver_name	= "ecb-des-ux500",
+			.base.cra_priority	= 300,
+			.base.cra_flags		= CRYPTO_ALG_ASYNC,
+			.base.cra_blocksize	= DES_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
+			.base.cra_alignmask	= 3,
+			.base.cra_module	= THIS_MODULE,
+
+			.min_keysize		= DES_KEY_SIZE,
+			.max_keysize		= DES_KEY_SIZE,
+			.setkey			= des_skcipher_setkey,
+			.encrypt		= cryp_blk_encrypt,
+			.decrypt		= cryp_blk_decrypt,
+			.init			= cryp_init_tfm,
 		}
 	},
 	{
 		.algomode = CRYP_ALGO_TDES_ECB,
-		.crypto = {
-			.cra_name = "ecb(des3_ede)",
-			.cra_driver_name = "ecb-des3_ede-ux500",
-			.cra_priority = 300,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-					CRYPTO_ALG_ASYNC,
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct cryp_ctx),
-			.cra_alignmask = 3,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_init = cryp_cra_init,
-			.cra_module = THIS_MODULE,
-			.cra_u = {
-				.ablkcipher = {
-					.min_keysize = DES3_EDE_KEY_SIZE,
-					.max_keysize = DES3_EDE_KEY_SIZE,
-					.setkey = des3_ablkcipher_setkey,
-					.encrypt = cryp_blk_encrypt,
-					.decrypt = cryp_blk_decrypt,
-				}
-			}
+		.skcipher = {
+			.base.cra_name		= "ecb(des3_ede)",
+			.base.cra_driver_name	= "ecb-des3_ede-ux500",
+			.base.cra_priority	= 300,
+			.base.cra_flags		= CRYPTO_ALG_ASYNC,
+			.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
+			.base.cra_alignmask	= 3,
+			.base.cra_module	= THIS_MODULE,
+
+			.min_keysize		= DES3_EDE_KEY_SIZE,
+			.max_keysize		= DES3_EDE_KEY_SIZE,
+			.setkey			= des3_skcipher_setkey,
+			.encrypt		= cryp_blk_encrypt,
+			.decrypt		= cryp_blk_decrypt,
+			.init			= cryp_init_tfm,
 		}
 	},
 	{
 		.algomode = CRYP_ALGO_DES_CBC,
-		.crypto = {
-			.cra_name = "cbc(des)",
-			.cra_driver_name = "cbc-des-ux500",
-			.cra_priority = 300,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-					CRYPTO_ALG_ASYNC,
-			.cra_blocksize = DES_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct cryp_ctx),
-			.cra_alignmask = 3,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_init = cryp_cra_init,
-			.cra_module = THIS_MODULE,
-			.cra_u = {
-				.ablkcipher = {
-					.min_keysize = DES_KEY_SIZE,
-					.max_keysize = DES_KEY_SIZE,
-					.setkey = des_ablkcipher_setkey,
-					.encrypt = cryp_blk_encrypt,
-					.decrypt = cryp_blk_decrypt,
-				}
-			}
+		.skcipher = {
+			.base.cra_name		= "cbc(des)",
+			.base.cra_driver_name	= "cbc-des-ux500",
+			.base.cra_priority	= 300,
+			.base.cra_flags		= CRYPTO_ALG_ASYNC,
+			.base.cra_blocksize	= DES_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
+			.base.cra_alignmask	= 3,
+			.base.cra_module	= THIS_MODULE,
+
+			.min_keysize		= DES_KEY_SIZE,
+			.max_keysize		= DES_KEY_SIZE,
+			.setkey			= des_skcipher_setkey,
+			.encrypt		= cryp_blk_encrypt,
+			.decrypt		= cryp_blk_decrypt,
+			.ivsize			= DES_BLOCK_SIZE,
+			.init			= cryp_init_tfm,
 		}
 	},
 	{
 		.algomode = CRYP_ALGO_TDES_CBC,
-		.crypto = {
-			.cra_name = "cbc(des3_ede)",
-			.cra_driver_name = "cbc-des3_ede-ux500",
-			.cra_priority = 300,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-					CRYPTO_ALG_ASYNC,
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_ctxsize = sizeof(struct cryp_ctx),
-			.cra_alignmask = 3,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_init = cryp_cra_init,
-			.cra_module = THIS_MODULE,
-			.cra_u = {
-				.ablkcipher = {
-					.min_keysize = DES3_EDE_KEY_SIZE,
-					.max_keysize = DES3_EDE_KEY_SIZE,
-					.setkey = des3_ablkcipher_setkey,
-					.encrypt = cryp_blk_encrypt,
-					.decrypt = cryp_blk_decrypt,
-					.ivsize = DES3_EDE_BLOCK_SIZE,
-				}
-			}
+		.skcipher = {
+			.base.cra_name		= "cbc(des3_ede)",
+			.base.cra_driver_name	= "cbc-des3_ede-ux500",
+			.base.cra_priority	= 300,
+			.base.cra_flags		= CRYPTO_ALG_ASYNC,
+			.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
+			.base.cra_alignmask	= 3,
+			.base.cra_module	= THIS_MODULE,
+
+			.min_keysize		= DES3_EDE_KEY_SIZE,
+			.max_keysize		= DES3_EDE_KEY_SIZE,
+			.setkey			= des3_skcipher_setkey,
+			.encrypt		= cryp_blk_encrypt,
+			.decrypt		= cryp_blk_decrypt,
+			.ivsize			= DES3_EDE_BLOCK_SIZE,
+			.init			= cryp_init_tfm,
 		}
 	}
 };
@@ -1293,18 +1232,18 @@ static int cryp_algs_register_all(void)
 	pr_debug("[%s]", __func__);
 
 	for (i = 0; i < ARRAY_SIZE(cryp_algs); i++) {
-		ret = crypto_register_alg(&cryp_algs[i].crypto);
+		ret = crypto_register_skcipher(&cryp_algs[i].skcipher);
 		if (ret) {
 			count = i;
 			pr_err("[%s] alg registration failed",
-					cryp_algs[i].crypto.cra_driver_name);
+					cryp_algs[i].skcipher.base.cra_driver_name);
 			goto unreg;
 		}
 	}
 	return 0;
 unreg:
 	for (i = 0; i < count; i++)
-		crypto_unregister_alg(&cryp_algs[i].crypto);
+		crypto_unregister_skcipher(&cryp_algs[i].skcipher);
 	return ret;
 }
 
@@ -1318,7 +1257,7 @@ static void cryp_algs_unregister_all(void)
 	pr_debug(DEV_DBG_NAME " [%s]", __func__);
 
 	for (i = 0; i < ARRAY_SIZE(cryp_algs); i++)
-		crypto_unregister_alg(&cryp_algs[i].crypto);
+		crypto_unregister_skcipher(&cryp_algs[i].skcipher);
 }
 
 static int ux500_cryp_probe(struct platform_device *pdev)
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index c172a6953477..c24f2db8d5e8 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -140,7 +140,6 @@ static int hash_set_dma_transfer(struct hash_ctx *ctx, struct scatterlist *sg,
 {
 	struct dma_async_tx_descriptor *desc = NULL;
 	struct dma_chan *channel = NULL;
-	dma_cookie_t cookie;
 
 	if (direction != DMA_TO_DEVICE) {
 		dev_err(ctx->device->dev, "%s: Invalid DMA direction\n",
@@ -176,7 +175,7 @@ static int hash_set_dma_transfer(struct hash_ctx *ctx, struct scatterlist *sg,
 	desc->callback = hash_dma_callback;
 	desc->callback_param = ctx;
 
-	cookie = dmaengine_submit(desc);
+	dmaengine_submit(desc);
 	dma_async_issue_pending(channel);
 
 	return 0;
diff --git a/drivers/crypto/virtio/Kconfig b/drivers/crypto/virtio/Kconfig
index 01b625e4e5ad..fb294174e408 100644
--- a/drivers/crypto/virtio/Kconfig
+++ b/drivers/crypto/virtio/Kconfig
@@ -3,7 +3,7 @@ config CRYPTO_DEV_VIRTIO
 	tristate "VirtIO crypto driver"
 	depends on VIRTIO
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_ENGINE
 	default m
 	help
diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c
index 42d19205166b..fd045e64972a 100644
--- a/drivers/crypto/virtio/virtio_crypto_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_algs.c
@@ -8,6 +8,7 @@
 
 #include <linux/scatterlist.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <crypto/scatterwalk.h>
 #include <linux/atomic.h>
@@ -16,10 +17,10 @@
 #include "virtio_crypto_common.h"
 
 
-struct virtio_crypto_ablkcipher_ctx {
+struct virtio_crypto_skcipher_ctx {
 	struct crypto_engine_ctx enginectx;
 	struct virtio_crypto *vcrypto;
-	struct crypto_tfm *tfm;
+	struct crypto_skcipher *tfm;
 
 	struct virtio_crypto_sym_session_info enc_sess_info;
 	struct virtio_crypto_sym_session_info dec_sess_info;
@@ -30,8 +31,8 @@ struct virtio_crypto_sym_request {
 
 	/* Cipher or aead */
 	uint32_t type;
-	struct virtio_crypto_ablkcipher_ctx *ablkcipher_ctx;
-	struct ablkcipher_request *ablkcipher_req;
+	struct virtio_crypto_skcipher_ctx *skcipher_ctx;
+	struct skcipher_request *skcipher_req;
 	uint8_t *iv;
 	/* Encryption? */
 	bool encrypt;
@@ -41,7 +42,7 @@ struct virtio_crypto_algo {
 	uint32_t algonum;
 	uint32_t service;
 	unsigned int active_devs;
-	struct crypto_alg algo;
+	struct skcipher_alg algo;
 };
 
 /*
@@ -49,9 +50,9 @@ struct virtio_crypto_algo {
  * and crypto algorithms registion.
  */
 static DEFINE_MUTEX(algs_lock);
-static void virtio_crypto_ablkcipher_finalize_req(
+static void virtio_crypto_skcipher_finalize_req(
 	struct virtio_crypto_sym_request *vc_sym_req,
-	struct ablkcipher_request *req,
+	struct skcipher_request *req,
 	int err);
 
 static void virtio_crypto_dataq_sym_callback
@@ -59,7 +60,7 @@ static void virtio_crypto_dataq_sym_callback
 {
 	struct virtio_crypto_sym_request *vc_sym_req =
 		container_of(vc_req, struct virtio_crypto_sym_request, base);
-	struct ablkcipher_request *ablk_req;
+	struct skcipher_request *ablk_req;
 	int error;
 
 	/* Finish the encrypt or decrypt process */
@@ -79,8 +80,8 @@ static void virtio_crypto_dataq_sym_callback
 			error = -EIO;
 			break;
 		}
-		ablk_req = vc_sym_req->ablkcipher_req;
-		virtio_crypto_ablkcipher_finalize_req(vc_sym_req,
+		ablk_req = vc_sym_req->skcipher_req;
+		virtio_crypto_skcipher_finalize_req(vc_sym_req,
 							ablk_req, error);
 	}
 }
@@ -105,15 +106,13 @@ virtio_crypto_alg_validate_key(int key_len, uint32_t *alg)
 		*alg = VIRTIO_CRYPTO_CIPHER_AES_CBC;
 		break;
 	default:
-		pr_err("virtio_crypto: Unsupported key length: %d\n",
-			key_len);
 		return -EINVAL;
 	}
 	return 0;
 }
 
-static int virtio_crypto_alg_ablkcipher_init_session(
-		struct virtio_crypto_ablkcipher_ctx *ctx,
+static int virtio_crypto_alg_skcipher_init_session(
+		struct virtio_crypto_skcipher_ctx *ctx,
 		uint32_t alg, const uint8_t *key,
 		unsigned int keylen,
 		int encrypt)
@@ -202,8 +201,8 @@ static int virtio_crypto_alg_ablkcipher_init_session(
 	return 0;
 }
 
-static int virtio_crypto_alg_ablkcipher_close_session(
-		struct virtio_crypto_ablkcipher_ctx *ctx,
+static int virtio_crypto_alg_skcipher_close_session(
+		struct virtio_crypto_skcipher_ctx *ctx,
 		int encrypt)
 {
 	struct scatterlist outhdr, status_sg, *sgs[2];
@@ -263,8 +262,8 @@ static int virtio_crypto_alg_ablkcipher_close_session(
 	return 0;
 }
 
-static int virtio_crypto_alg_ablkcipher_init_sessions(
-		struct virtio_crypto_ablkcipher_ctx *ctx,
+static int virtio_crypto_alg_skcipher_init_sessions(
+		struct virtio_crypto_skcipher_ctx *ctx,
 		const uint8_t *key, unsigned int keylen)
 {
 	uint32_t alg;
@@ -273,37 +272,33 @@ static int virtio_crypto_alg_ablkcipher_init_sessions(
 
 	if (keylen > vcrypto->max_cipher_key_len) {
 		pr_err("virtio_crypto: the key is too long\n");
-		goto bad_key;
+		return -EINVAL;
 	}
 
 	if (virtio_crypto_alg_validate_key(keylen, &alg))
-		goto bad_key;
+		return -EINVAL;
 
 	/* Create encryption session */
-	ret = virtio_crypto_alg_ablkcipher_init_session(ctx,
+	ret = virtio_crypto_alg_skcipher_init_session(ctx,
 			alg, key, keylen, 1);
 	if (ret)
 		return ret;
 	/* Create decryption session */
-	ret = virtio_crypto_alg_ablkcipher_init_session(ctx,
+	ret = virtio_crypto_alg_skcipher_init_session(ctx,
 			alg, key, keylen, 0);
 	if (ret) {
-		virtio_crypto_alg_ablkcipher_close_session(ctx, 1);
+		virtio_crypto_alg_skcipher_close_session(ctx, 1);
 		return ret;
 	}
 	return 0;
-
-bad_key:
-	crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	return -EINVAL;
 }
 
 /* Note: kernel crypto API realization */
-static int virtio_crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
+static int virtio_crypto_skcipher_setkey(struct crypto_skcipher *tfm,
 					 const uint8_t *key,
 					 unsigned int keylen)
 {
-	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct virtio_crypto_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	uint32_t alg;
 	int ret;
 
@@ -325,11 +320,11 @@ static int virtio_crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
 		ctx->vcrypto = vcrypto;
 	} else {
 		/* Rekeying, we should close the created sessions previously */
-		virtio_crypto_alg_ablkcipher_close_session(ctx, 1);
-		virtio_crypto_alg_ablkcipher_close_session(ctx, 0);
+		virtio_crypto_alg_skcipher_close_session(ctx, 1);
+		virtio_crypto_alg_skcipher_close_session(ctx, 0);
 	}
 
-	ret = virtio_crypto_alg_ablkcipher_init_sessions(ctx, key, keylen);
+	ret = virtio_crypto_alg_skcipher_init_sessions(ctx, key, keylen);
 	if (ret) {
 		virtcrypto_dev_put(ctx->vcrypto);
 		ctx->vcrypto = NULL;
@@ -341,14 +336,14 @@ static int virtio_crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
 }
 
 static int
-__virtio_crypto_ablkcipher_do_req(struct virtio_crypto_sym_request *vc_sym_req,
-		struct ablkcipher_request *req,
+__virtio_crypto_skcipher_do_req(struct virtio_crypto_sym_request *vc_sym_req,
+		struct skcipher_request *req,
 		struct data_queue *data_vq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct virtio_crypto_ablkcipher_ctx *ctx = vc_sym_req->ablkcipher_ctx;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct virtio_crypto_skcipher_ctx *ctx = vc_sym_req->skcipher_ctx;
 	struct virtio_crypto_request *vc_req = &vc_sym_req->base;
-	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm);
+	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
 	struct virtio_crypto *vcrypto = ctx->vcrypto;
 	struct virtio_crypto_op_data_req *req_data;
 	int src_nents, dst_nents;
@@ -361,7 +356,7 @@ __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_sym_request *vc_sym_req,
 	int sg_total;
 	uint8_t *iv;
 
-	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	src_nents = sg_nents_for_len(req->src, req->cryptlen);
 	dst_nents = sg_nents(req->dst);
 
 	pr_debug("virtio_crypto: Number of sgs (src_nents: %d, dst_nents: %d)\n",
@@ -398,7 +393,7 @@ __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_sym_request *vc_sym_req,
 	req_data->u.sym_req.op_type = cpu_to_le32(VIRTIO_CRYPTO_SYM_OP_CIPHER);
 	req_data->u.sym_req.u.cipher.para.iv_len = cpu_to_le32(ivsize);
 	req_data->u.sym_req.u.cipher.para.src_data_len =
-			cpu_to_le32(req->nbytes);
+			cpu_to_le32(req->cryptlen);
 
 	dst_len = virtio_crypto_alg_sg_nents_length(req->dst);
 	if (unlikely(dst_len > U32_MAX)) {
@@ -408,9 +403,9 @@ __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_sym_request *vc_sym_req,
 	}
 
 	pr_debug("virtio_crypto: src_len: %u, dst_len: %llu\n",
-			req->nbytes, dst_len);
+			req->cryptlen, dst_len);
 
-	if (unlikely(req->nbytes + dst_len + ivsize +
+	if (unlikely(req->cryptlen + dst_len + ivsize +
 		sizeof(vc_req->status) > vcrypto->max_size)) {
 		pr_err("virtio_crypto: The length is too big\n");
 		err = -EINVAL;
@@ -436,7 +431,12 @@ __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_sym_request *vc_sym_req,
 		err = -ENOMEM;
 		goto free;
 	}
-	memcpy(iv, req->info, ivsize);
+	memcpy(iv, req->iv, ivsize);
+	if (!vc_sym_req->encrypt)
+		scatterwalk_map_and_copy(req->iv, req->src,
+					 req->cryptlen - AES_BLOCK_SIZE,
+					 AES_BLOCK_SIZE, 0);
+
 	sg_init_one(&iv_sg, iv, ivsize);
 	sgs[num_out++] = &iv_sg;
 	vc_sym_req->iv = iv;
@@ -473,83 +473,93 @@ free:
 	return err;
 }
 
-static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req)
+static int virtio_crypto_skcipher_encrypt(struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req);
-	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm);
+	struct crypto_skcipher *atfm = crypto_skcipher_reqtfm(req);
+	struct virtio_crypto_skcipher_ctx *ctx = crypto_skcipher_ctx(atfm);
 	struct virtio_crypto_sym_request *vc_sym_req =
-				ablkcipher_request_ctx(req);
+				skcipher_request_ctx(req);
 	struct virtio_crypto_request *vc_req = &vc_sym_req->base;
 	struct virtio_crypto *vcrypto = ctx->vcrypto;
 	/* Use the first data virtqueue as default */
 	struct data_queue *data_vq = &vcrypto->data_vq[0];
 
+	if (!req->cryptlen)
+		return 0;
+	if (req->cryptlen % AES_BLOCK_SIZE)
+		return -EINVAL;
+
 	vc_req->dataq = data_vq;
 	vc_req->alg_cb = virtio_crypto_dataq_sym_callback;
-	vc_sym_req->ablkcipher_ctx = ctx;
-	vc_sym_req->ablkcipher_req = req;
+	vc_sym_req->skcipher_ctx = ctx;
+	vc_sym_req->skcipher_req = req;
 	vc_sym_req->encrypt = true;
 
-	return crypto_transfer_ablkcipher_request_to_engine(data_vq->engine, req);
+	return crypto_transfer_skcipher_request_to_engine(data_vq->engine, req);
 }
 
-static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
+static int virtio_crypto_skcipher_decrypt(struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req);
-	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm);
+	struct crypto_skcipher *atfm = crypto_skcipher_reqtfm(req);
+	struct virtio_crypto_skcipher_ctx *ctx = crypto_skcipher_ctx(atfm);
 	struct virtio_crypto_sym_request *vc_sym_req =
-				ablkcipher_request_ctx(req);
+				skcipher_request_ctx(req);
 	struct virtio_crypto_request *vc_req = &vc_sym_req->base;
 	struct virtio_crypto *vcrypto = ctx->vcrypto;
 	/* Use the first data virtqueue as default */
 	struct data_queue *data_vq = &vcrypto->data_vq[0];
 
+	if (!req->cryptlen)
+		return 0;
+	if (req->cryptlen % AES_BLOCK_SIZE)
+		return -EINVAL;
+
 	vc_req->dataq = data_vq;
 	vc_req->alg_cb = virtio_crypto_dataq_sym_callback;
-	vc_sym_req->ablkcipher_ctx = ctx;
-	vc_sym_req->ablkcipher_req = req;
+	vc_sym_req->skcipher_ctx = ctx;
+	vc_sym_req->skcipher_req = req;
 	vc_sym_req->encrypt = false;
 
-	return crypto_transfer_ablkcipher_request_to_engine(data_vq->engine, req);
+	return crypto_transfer_skcipher_request_to_engine(data_vq->engine, req);
 }
 
-static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm)
+static int virtio_crypto_skcipher_init(struct crypto_skcipher *tfm)
 {
-	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct virtio_crypto_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct virtio_crypto_sym_request);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct virtio_crypto_sym_request));
 	ctx->tfm = tfm;
 
-	ctx->enginectx.op.do_one_request = virtio_crypto_ablkcipher_crypt_req;
+	ctx->enginectx.op.do_one_request = virtio_crypto_skcipher_crypt_req;
 	ctx->enginectx.op.prepare_request = NULL;
 	ctx->enginectx.op.unprepare_request = NULL;
 	return 0;
 }
 
-static void virtio_crypto_ablkcipher_exit(struct crypto_tfm *tfm)
+static void virtio_crypto_skcipher_exit(struct crypto_skcipher *tfm)
 {
-	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct virtio_crypto_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	if (!ctx->vcrypto)
 		return;
 
-	virtio_crypto_alg_ablkcipher_close_session(ctx, 1);
-	virtio_crypto_alg_ablkcipher_close_session(ctx, 0);
+	virtio_crypto_alg_skcipher_close_session(ctx, 1);
+	virtio_crypto_alg_skcipher_close_session(ctx, 0);
 	virtcrypto_dev_put(ctx->vcrypto);
 	ctx->vcrypto = NULL;
 }
 
-int virtio_crypto_ablkcipher_crypt_req(
+int virtio_crypto_skcipher_crypt_req(
 	struct crypto_engine *engine, void *vreq)
 {
-	struct ablkcipher_request *req = container_of(vreq, struct ablkcipher_request, base);
+	struct skcipher_request *req = container_of(vreq, struct skcipher_request, base);
 	struct virtio_crypto_sym_request *vc_sym_req =
-				ablkcipher_request_ctx(req);
+				skcipher_request_ctx(req);
 	struct virtio_crypto_request *vc_req = &vc_sym_req->base;
 	struct data_queue *data_vq = vc_req->dataq;
 	int ret;
 
-	ret = __virtio_crypto_ablkcipher_do_req(vc_sym_req, req, data_vq);
+	ret = __virtio_crypto_skcipher_do_req(vc_sym_req, req, data_vq);
 	if (ret < 0)
 		return ret;
 
@@ -558,12 +568,16 @@ int virtio_crypto_ablkcipher_crypt_req(
 	return 0;
 }
 
-static void virtio_crypto_ablkcipher_finalize_req(
+static void virtio_crypto_skcipher_finalize_req(
 	struct virtio_crypto_sym_request *vc_sym_req,
-	struct ablkcipher_request *req,
+	struct skcipher_request *req,
 	int err)
 {
-	crypto_finalize_ablkcipher_request(vc_sym_req->base.dataq->engine,
+	if (vc_sym_req->encrypt)
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 req->cryptlen - AES_BLOCK_SIZE,
+					 AES_BLOCK_SIZE, 0);
+	crypto_finalize_skcipher_request(vc_sym_req->base.dataq->engine,
 					   req, err);
 	kzfree(vc_sym_req->iv);
 	virtcrypto_clear_request(&vc_sym_req->base);
@@ -573,27 +587,21 @@ static struct virtio_crypto_algo virtio_crypto_algs[] = { {
 	.algonum = VIRTIO_CRYPTO_CIPHER_AES_CBC,
 	.service = VIRTIO_CRYPTO_SERVICE_CIPHER,
 	.algo = {
-		.cra_name = "cbc(aes)",
-		.cra_driver_name = "virtio_crypto_aes_cbc",
-		.cra_priority = 150,
-		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-		.cra_blocksize = AES_BLOCK_SIZE,
-		.cra_ctxsize  = sizeof(struct virtio_crypto_ablkcipher_ctx),
-		.cra_alignmask = 0,
-		.cra_module = THIS_MODULE,
-		.cra_type = &crypto_ablkcipher_type,
-		.cra_init = virtio_crypto_ablkcipher_init,
-		.cra_exit = virtio_crypto_ablkcipher_exit,
-		.cra_u = {
-			.ablkcipher = {
-				.setkey = virtio_crypto_ablkcipher_setkey,
-				.decrypt = virtio_crypto_ablkcipher_decrypt,
-				.encrypt = virtio_crypto_ablkcipher_encrypt,
-				.min_keysize = AES_MIN_KEY_SIZE,
-				.max_keysize = AES_MAX_KEY_SIZE,
-				.ivsize = AES_BLOCK_SIZE,
-			},
-		},
+		.base.cra_name		= "cbc(aes)",
+		.base.cra_driver_name	= "virtio_crypto_aes_cbc",
+		.base.cra_priority	= 150,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct virtio_crypto_skcipher_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.init			= virtio_crypto_skcipher_init,
+		.exit			= virtio_crypto_skcipher_exit,
+		.setkey			= virtio_crypto_skcipher_setkey,
+		.decrypt		= virtio_crypto_skcipher_decrypt,
+		.encrypt		= virtio_crypto_skcipher_encrypt,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
 	},
 } };
 
@@ -613,14 +621,14 @@ int virtio_crypto_algs_register(struct virtio_crypto *vcrypto)
 			continue;
 
 		if (virtio_crypto_algs[i].active_devs == 0) {
-			ret = crypto_register_alg(&virtio_crypto_algs[i].algo);
+			ret = crypto_register_skcipher(&virtio_crypto_algs[i].algo);
 			if (ret)
 				goto unlock;
 		}
 
 		virtio_crypto_algs[i].active_devs++;
 		dev_info(&vcrypto->vdev->dev, "Registered algo %s\n",
-			 virtio_crypto_algs[i].algo.cra_name);
+			 virtio_crypto_algs[i].algo.base.cra_name);
 	}
 
 unlock:
@@ -644,7 +652,7 @@ void virtio_crypto_algs_unregister(struct virtio_crypto *vcrypto)
 			continue;
 
 		if (virtio_crypto_algs[i].active_devs == 1)
-			crypto_unregister_alg(&virtio_crypto_algs[i].algo);
+			crypto_unregister_skcipher(&virtio_crypto_algs[i].algo);
 
 		virtio_crypto_algs[i].active_devs--;
 	}
diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h
index 1c6e00da5a29..a24f85c589e7 100644
--- a/drivers/crypto/virtio/virtio_crypto_common.h
+++ b/drivers/crypto/virtio/virtio_crypto_common.h
@@ -112,7 +112,7 @@ struct virtio_crypto *virtcrypto_get_dev_node(int node,
 					      uint32_t algo);
 int virtcrypto_dev_start(struct virtio_crypto *vcrypto);
 void virtcrypto_dev_stop(struct virtio_crypto *vcrypto);
-int virtio_crypto_ablkcipher_crypt_req(
+int virtio_crypto_skcipher_crypt_req(
 	struct crypto_engine *engine, void *vreq);
 
 void
diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
index cab32cfec9c4..709670d2b553 100644
--- a/drivers/crypto/vmx/Makefile
+++ b/drivers/crypto/vmx/Makefile
@@ -3,13 +3,13 @@ obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
 vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
 
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-TARGET := linux-ppc64le
+override flavour := linux-ppc64le
 else
-TARGET := linux-ppc64
+override flavour := linux-ppc64
 endif
 
 quiet_cmd_perl = PERL $@
-      cmd_perl = $(PERL) $(<) $(TARGET) > $(@)
+      cmd_perl = $(PERL) $(<) $(flavour) > $(@)
 
 targets += aesp8-ppc.S ghashp8-ppc.S
 
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
index d59e736882f6..9fee1b1532a4 100644
--- a/drivers/crypto/vmx/aes_xts.c
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -84,6 +84,9 @@ static int p8_aes_xts_crypt(struct skcipher_request *req, int enc)
 	u8 tweak[AES_BLOCK_SIZE];
 	int ret;
 
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
 	if (!crypto_simd_usable() || (req->cryptlen % XTS_BLOCK_SIZE) != 0) {
 		struct skcipher_request *subreq = skcipher_request_ctx(req);
 
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index f33c73e4af41..3b6c06f07326 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -32,19 +32,36 @@ config DEV_DAX_PMEM
 
 	  Say M if unsure
 
+config DEV_DAX_HMEM
+	tristate "HMEM DAX: direct access to 'specific purpose' memory"
+	depends on EFI_SOFT_RESERVE
+	default DEV_DAX
+	help
+	  EFI 2.8 platforms, and others, may advertise 'specific purpose'
+	  memory. For example, a high bandwidth memory pool. The
+	  indication from platform firmware is meant to reserve the
+	  memory from typical usage by default. This driver creates
+	  device-dax instances for these memory ranges, and that also
+	  enables the possibility to assign them to the DEV_DAX_KMEM
+	  driver to override the reservation and add them to kernel
+	  "System RAM" pool.
+
+	  Say M if unsure.
+
 config DEV_DAX_KMEM
 	tristate "KMEM DAX: volatile-use of persistent memory"
 	default DEV_DAX
 	depends on DEV_DAX
 	depends on MEMORY_HOTPLUG # for add_memory() and friends
 	help
-	  Support access to persistent memory as if it were RAM.  This
-	  allows easier use of persistent memory by unmodified
-	  applications.
+	  Support access to persistent, or other performance
+	  differentiated memory as if it were System RAM. This allows
+	  easier use of persistent memory by unmodified applications, or
+	  adds core kernel memory services to heterogeneous memory types
+	  (HMEM) marked "reserved" by platform firmware.
 
 	  To use this feature, a DAX device must be unbound from the
-	  device_dax driver (PMEM DAX) and bound to this kmem driver
-	  on each boot.
+	  device_dax driver and bound to this kmem driver on each boot.
 
 	  Say N if unsure.
 
diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
index 81f7d54dadfb..80065b38b3c4 100644
--- a/drivers/dax/Makefile
+++ b/drivers/dax/Makefile
@@ -2,9 +2,11 @@
 obj-$(CONFIG_DAX) += dax.o
 obj-$(CONFIG_DEV_DAX) += device_dax.o
 obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
+obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o
 
 dax-y := super.o
 dax-y += bus.o
 device_dax-y := device.o
+dax_hmem-y := hmem.o
 
 obj-y += pmem/
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 8fafbeab510a..46e46047a1f7 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -227,7 +227,7 @@ static void dax_region_unregister(void *region)
 
 struct dax_region *alloc_dax_region(struct device *parent, int region_id,
 		struct resource *res, int target_node, unsigned int align,
-		unsigned long pfn_flags)
+		unsigned long long pfn_flags)
 {
 	struct dax_region *dax_region;
 
@@ -309,7 +309,7 @@ static ssize_t resource_show(struct device *dev,
 
 	return sprintf(buf, "%#llx\n", dev_dax_resource(dev_dax));
 }
-static DEVICE_ATTR_RO(resource);
+static DEVICE_ATTR(resource, 0400, resource_show, NULL);
 
 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 		char *buf)
@@ -322,6 +322,13 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(modalias);
 
+static ssize_t numa_node_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", dev_to_node(dev));
+}
+static DEVICE_ATTR_RO(numa_node);
+
 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -329,8 +336,8 @@ static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
 
 	if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
 		return 0;
-	if (a == &dev_attr_resource.attr)
-		return 0400;
+	if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
+		return 0;
 	return a->mode;
 }
 
@@ -339,6 +346,7 @@ static struct attribute *dev_dax_attributes[] = {
 	&dev_attr_size.attr,
 	&dev_attr_target_node.attr,
 	&dev_attr_resource.attr,
+	&dev_attr_numa_node.attr,
 	NULL,
 };
 
@@ -373,6 +381,11 @@ static void dev_dax_release(struct device *dev)
 	kfree(dev_dax);
 }
 
+static const struct device_type dev_dax_type = {
+	.release = dev_dax_release,
+	.groups = dax_attribute_groups,
+};
+
 static void unregister_dev_dax(void *dev)
 {
 	struct dev_dax *dev_dax = to_dev_dax(dev);
@@ -430,8 +443,7 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
 	else
 		dev->class = dax_class;
 	dev->parent = parent;
-	dev->groups = dax_attribute_groups;
-	dev->release = dev_dax_release;
+	dev->type = &dev_dax_type;
 	dev_set_name(dev, "dax%d.%d", dax_region->id, id);
 
 	rc = device_add(dev);
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
index 8619e3299943..9e4eba67e8b9 100644
--- a/drivers/dax/bus.h
+++ b/drivers/dax/bus.h
@@ -11,7 +11,7 @@ struct dax_region;
 void dax_region_put(struct dax_region *dax_region);
 struct dax_region *alloc_dax_region(struct device *parent, int region_id,
 		struct resource *res, int target_node, unsigned int align,
-		unsigned long flags);
+		unsigned long long flags);
 
 enum dev_dax_subsys {
 	DEV_DAX_BUS,
diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
index 6ccca3b890d6..3107ce80e809 100644
--- a/drivers/dax/dax-private.h
+++ b/drivers/dax/dax-private.h
@@ -32,7 +32,7 @@ struct dax_region {
 	struct device *dev;
 	unsigned int align;
 	struct resource res;
-	unsigned long pfn_flags;
+	unsigned long long pfn_flags;
 };
 
 /**
diff --git a/drivers/dax/hmem.c b/drivers/dax/hmem.c
new file mode 100644
index 000000000000..fe7214daf62e
--- /dev/null
+++ b/drivers/dax/hmem.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/platform_device.h>
+#include <linux/memregion.h>
+#include <linux/module.h>
+#include <linux/pfn_t.h>
+#include "bus.h"
+
+static int dax_hmem_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dev_pagemap pgmap = { };
+	struct dax_region *dax_region;
+	struct memregion_info *mri;
+	struct dev_dax *dev_dax;
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENOMEM;
+
+	mri = dev->platform_data;
+	memcpy(&pgmap.res, res, sizeof(*res));
+
+	dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node,
+			PMD_SIZE, PFN_DEV|PFN_MAP);
+	if (!dax_region)
+		return -ENOMEM;
+
+	dev_dax = devm_create_dev_dax(dax_region, 0, &pgmap);
+	if (IS_ERR(dev_dax))
+		return PTR_ERR(dev_dax);
+
+	/* child dev_dax instances now own the lifetime of the dax_region */
+	dax_region_put(dax_region);
+	return 0;
+}
+
+static int dax_hmem_remove(struct platform_device *pdev)
+{
+	/* devm handles teardown */
+	return 0;
+}
+
+static struct platform_driver dax_hmem_driver = {
+	.probe = dax_hmem_probe,
+	.remove = dax_hmem_remove,
+	.driver = {
+		.name = "hmem",
+	},
+};
+
+module_platform_driver(dax_hmem_driver);
+
+MODULE_ALIAS("platform:hmem*");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c
index 6eb6dfdf19bf..2bedf8414fff 100644
--- a/drivers/dax/pmem/core.c
+++ b/drivers/dax/pmem/core.c
@@ -25,20 +25,20 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
 	ndns = nvdimm_namespace_common_probe(dev);
 	if (IS_ERR(ndns))
 		return ERR_CAST(ndns);
-	nsio = to_nd_namespace_io(&ndns->dev);
 
 	/* parse the 'pfn' info block via ->rw_bytes */
-	rc = devm_nsio_enable(dev, nsio);
+	rc = devm_namespace_enable(dev, ndns, nd_info_block_reserve());
 	if (rc)
 		return ERR_PTR(rc);
 	rc = nvdimm_setup_pfn(nd_pfn, &pgmap);
 	if (rc)
 		return ERR_PTR(rc);
-	devm_nsio_disable(dev, nsio);
+	devm_namespace_disable(dev, ndns);
 
 	/* reserve the metadata area, device-dax will reserve the data */
 	pfn_sb = nd_pfn->pfn_sb;
 	offset = le64_to_cpu(pfn_sb->dataoff);
+	nsio = to_nd_namespace_io(&ndns->dev);
 	if (!devm_request_mem_region(dev, nsio->res.start, offset,
 				dev_name(&ndns->dev))) {
 		dev_warn(dev, "could not reserve metadata\n");
diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index defe1d438710..0b1df12e0f21 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -77,13 +77,12 @@ config DEVFREQ_GOV_PASSIVE
 comment "DEVFREQ Drivers"
 
 config ARM_EXYNOS_BUS_DEVFREQ
-	tristate "ARM EXYNOS Generic Memory Bus DEVFREQ Driver"
+	tristate "ARM Exynos Generic Memory Bus DEVFREQ Driver"
 	depends on ARCH_EXYNOS || COMPILE_TEST
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
 	select DEVFREQ_GOV_PASSIVE
 	select DEVFREQ_EVENT_EXYNOS_PPMU
 	select PM_DEVFREQ_EVENT
-	select PM_OPP
 	help
 	  This adds the common DEVFREQ driver for Exynos Memory bus. Exynos
 	  Memory bus has one more group of memory bus (e.g, MIF and INT block).
@@ -92,13 +91,23 @@ config ARM_EXYNOS_BUS_DEVFREQ
 	  and adjusts the operating frequencies and voltages with OPP support.
 	  This does not yet operate with optimal voltages.
 
+config ARM_IMX8M_DDRC_DEVFREQ
+	tristate "i.MX8M DDRC DEVFREQ Driver"
+	depends on (ARCH_MXC && HAVE_ARM_SMCCC) || \
+		(COMPILE_TEST && HAVE_ARM_SMCCC)
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select DEVFREQ_GOV_USERSPACE
+	help
+	  This adds the DEVFREQ driver for the i.MX8M DDR Controller. It allows
+	  adjusting DRAM frequency.
+
 config ARM_TEGRA_DEVFREQ
 	tristate "NVIDIA Tegra30/114/124/210 DEVFREQ Driver"
 	depends on ARCH_TEGRA_3x_SOC || ARCH_TEGRA_114_SOC || \
 		ARCH_TEGRA_132_SOC || ARCH_TEGRA_124_SOC || \
 		ARCH_TEGRA_210_SOC || \
 		COMPILE_TEST
-	select PM_OPP
+	depends on COMMON_CLK
 	help
 	  This adds the DEVFREQ driver for the Tegra family of SoCs.
 	  It reads ACTMON counters of memory controllers and adjusts the
@@ -109,7 +118,6 @@ config ARM_TEGRA20_DEVFREQ
 	depends on (TEGRA_MC && TEGRA20_EMC) || COMPILE_TEST
 	depends on COMMON_CLK
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
-	select PM_OPP
 	help
 	  This adds the DEVFREQ driver for the Tegra20 family of SoCs.
 	  It reads Memory Controller counters and adjusts the operating
@@ -117,15 +125,15 @@ config ARM_TEGRA20_DEVFREQ
 
 config ARM_RK3399_DMC_DEVFREQ
 	tristate "ARM RK3399 DMC DEVFREQ Driver"
-	depends on ARCH_ROCKCHIP
+	depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \
+		(COMPILE_TEST && HAVE_ARM_SMCCC)
 	select DEVFREQ_EVENT_ROCKCHIP_DFI
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
 	select PM_DEVFREQ_EVENT
-	select PM_OPP
 	help
-          This adds the DEVFREQ driver for the RK3399 DMC(Dynamic Memory Controller).
-          It sets the frequency for the memory controller and reads the usage counts
-          from hardware.
+	  This adds the DEVFREQ driver for the RK3399 DMC(Dynamic Memory Controller).
+	  It sets the frequency for the memory controller and reads the usage counts
+	  from hardware.
 
 source "drivers/devfreq/event/Kconfig"
 
diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index 338ae8440db6..3eb4d5e6635c 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_DEVFREQ_GOV_PASSIVE)	+= governor_passive.o
 
 # DEVFREQ Drivers
 obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ)	+= exynos-bus.o
+obj-$(CONFIG_ARM_IMX8M_DDRC_DEVFREQ)	+= imx8m-ddrc.o
 obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ)	+= rk3399_dmc.o
 obj-$(CONFIG_ARM_TEGRA_DEVFREQ)		+= tegra30-devfreq.o
 obj-$(CONFIG_ARM_TEGRA20_DEVFREQ)	+= tegra20-devfreq.o
diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c
index 3dc5fd6065a3..8c31b0f2e28f 100644
--- a/drivers/devfreq/devfreq-event.c
+++ b/drivers/devfreq/devfreq-event.c
@@ -346,9 +346,9 @@ EXPORT_SYMBOL_GPL(devfreq_event_add_edev);
 
 /**
  * devfreq_event_remove_edev() - Remove the devfreq-event device registered.
- * @dev		: the devfreq-event device
+ * @edev	: the devfreq-event device
  *
- * Note that this function remove the registered devfreq-event device.
+ * Note that this function removes the registered devfreq-event device.
  */
 int devfreq_event_remove_edev(struct devfreq_event_dev *edev)
 {
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 446490c9d635..cceee8bc3c2f 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/kmod.h>
 #include <linux/sched.h>
+#include <linux/debugfs.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -24,12 +25,16 @@
 #include <linux/printk.h>
 #include <linux/hrtimer.h>
 #include <linux/of.h>
+#include <linux/pm_qos.h>
 #include "governor.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/devfreq.h>
 
+#define HZ_PER_KHZ	1000
+
 static struct class *devfreq_class;
+static struct dentry *devfreq_debugfs;
 
 /*
  * devfreq core provides delayed work based load monitoring helper
@@ -99,6 +104,54 @@ static unsigned long find_available_max_freq(struct devfreq *devfreq)
 }
 
 /**
+ * get_freq_range() - Get the current freq range
+ * @devfreq:	the devfreq instance
+ * @min_freq:	the min frequency
+ * @max_freq:	the max frequency
+ *
+ * This takes into consideration all constraints.
+ */
+static void get_freq_range(struct devfreq *devfreq,
+			   unsigned long *min_freq,
+			   unsigned long *max_freq)
+{
+	unsigned long *freq_table = devfreq->profile->freq_table;
+	s32 qos_min_freq, qos_max_freq;
+
+	lockdep_assert_held(&devfreq->lock);
+
+	/*
+	 * Initialize minimum/maximum frequency from freq table.
+	 * The devfreq drivers can initialize this in either ascending or
+	 * descending order and devfreq core supports both.
+	 */
+	if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
+		*min_freq = freq_table[0];
+		*max_freq = freq_table[devfreq->profile->max_state - 1];
+	} else {
+		*min_freq = freq_table[devfreq->profile->max_state - 1];
+		*max_freq = freq_table[0];
+	}
+
+	/* Apply constraints from PM QoS */
+	qos_min_freq = dev_pm_qos_read_value(devfreq->dev.parent,
+					     DEV_PM_QOS_MIN_FREQUENCY);
+	qos_max_freq = dev_pm_qos_read_value(devfreq->dev.parent,
+					     DEV_PM_QOS_MAX_FREQUENCY);
+	*min_freq = max(*min_freq, (unsigned long)HZ_PER_KHZ * qos_min_freq);
+	if (qos_max_freq != PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE)
+		*max_freq = min(*max_freq,
+				(unsigned long)HZ_PER_KHZ * qos_max_freq);
+
+	/* Apply constraints from OPP interface */
+	*min_freq = max(*min_freq, devfreq->scaling_min_freq);
+	*max_freq = min(*max_freq, devfreq->scaling_max_freq);
+
+	if (*min_freq > *max_freq)
+		*min_freq = *max_freq;
+}
+
+/**
  * devfreq_get_freq_level() - Lookup freq_table for the frequency
  * @devfreq:	the devfreq instance
  * @freq:	the target frequency
@@ -158,9 +211,10 @@ static int set_freq_table(struct devfreq *devfreq)
 int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
 {
 	int lev, prev_lev, ret = 0;
-	unsigned long cur_time;
+	u64 cur_time;
 
-	cur_time = jiffies;
+	lockdep_assert_held(&devfreq->lock);
+	cur_time = get_jiffies_64();
 
 	/* Immediately exit if previous_freq is not initialized yet. */
 	if (!devfreq->previous_freq)
@@ -172,8 +226,8 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
 		goto out;
 	}
 
-	devfreq->time_in_state[prev_lev] +=
-			 cur_time - devfreq->last_stat_updated;
+	devfreq->stats.time_in_state[prev_lev] +=
+			cur_time - devfreq->stats.last_update;
 
 	lev = devfreq_get_freq_level(devfreq, freq);
 	if (lev < 0) {
@@ -182,13 +236,13 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
 	}
 
 	if (lev != prev_lev) {
-		devfreq->trans_table[(prev_lev *
-				devfreq->profile->max_state) + lev]++;
-		devfreq->total_trans++;
+		devfreq->stats.trans_table[
+			(prev_lev * devfreq->profile->max_state) + lev]++;
+		devfreq->stats.total_trans++;
 	}
 
 out:
-	devfreq->last_stat_updated = cur_time;
+	devfreq->stats.last_update = cur_time;
 	return ret;
 }
 EXPORT_SYMBOL(devfreq_update_status);
@@ -350,16 +404,7 @@ int update_devfreq(struct devfreq *devfreq)
 	err = devfreq->governor->get_target_freq(devfreq, &freq);
 	if (err)
 		return err;
-
-	/*
-	 * Adjust the frequency with user freq, QoS and available freq.
-	 *
-	 * List from the highest priority
-	 * max_freq
-	 * min_freq
-	 */
-	max_freq = min(devfreq->scaling_max_freq, devfreq->max_freq);
-	min_freq = max(devfreq->scaling_min_freq, devfreq->min_freq);
+	get_freq_range(devfreq, &min_freq, &max_freq);
 
 	if (freq < min_freq) {
 		freq = min_freq;
@@ -409,6 +454,9 @@ static void devfreq_monitor(struct work_struct *work)
  */
 void devfreq_monitor_start(struct devfreq *devfreq)
 {
+	if (devfreq->governor->interrupt_driven)
+		return;
+
 	INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
 	if (devfreq->profile->polling_ms)
 		queue_delayed_work(devfreq_wq, &devfreq->work,
@@ -426,6 +474,9 @@ EXPORT_SYMBOL(devfreq_monitor_start);
  */
 void devfreq_monitor_stop(struct devfreq *devfreq)
 {
+	if (devfreq->governor->interrupt_driven)
+		return;
+
 	cancel_delayed_work_sync(&devfreq->work);
 }
 EXPORT_SYMBOL(devfreq_monitor_stop);
@@ -453,6 +504,10 @@ void devfreq_monitor_suspend(struct devfreq *devfreq)
 	devfreq_update_status(devfreq, devfreq->previous_freq);
 	devfreq->stop_polling = true;
 	mutex_unlock(&devfreq->lock);
+
+	if (devfreq->governor->interrupt_driven)
+		return;
+
 	cancel_delayed_work_sync(&devfreq->work);
 }
 EXPORT_SYMBOL(devfreq_monitor_suspend);
@@ -473,12 +528,16 @@ void devfreq_monitor_resume(struct devfreq *devfreq)
 	if (!devfreq->stop_polling)
 		goto out;
 
+	if (devfreq->governor->interrupt_driven)
+		goto out_update;
+
 	if (!delayed_work_pending(&devfreq->work) &&
 			devfreq->profile->polling_ms)
 		queue_delayed_work(devfreq_wq, &devfreq->work,
 			msecs_to_jiffies(devfreq->profile->polling_ms));
 
-	devfreq->last_stat_updated = jiffies;
+out_update:
+	devfreq->stats.last_update = get_jiffies_64();
 	devfreq->stop_polling = false;
 
 	if (devfreq->profile->get_cur_freq &&
@@ -509,6 +568,9 @@ void devfreq_interval_update(struct devfreq *devfreq, unsigned int *delay)
 	if (devfreq->stop_polling)
 		goto out;
 
+	if (devfreq->governor->interrupt_driven)
+		goto out;
+
 	/* if new delay is zero, stop polling */
 	if (!new_delay) {
 		mutex_unlock(&devfreq->lock);
@@ -550,26 +612,69 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
 				 void *devp)
 {
 	struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
-	int ret;
+	int err = -EINVAL;
 
 	mutex_lock(&devfreq->lock);
 
 	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
-	if (!devfreq->scaling_min_freq) {
-		mutex_unlock(&devfreq->lock);
-		return -EINVAL;
-	}
+	if (!devfreq->scaling_min_freq)
+		goto out;
 
 	devfreq->scaling_max_freq = find_available_max_freq(devfreq);
 	if (!devfreq->scaling_max_freq) {
-		mutex_unlock(&devfreq->lock);
-		return -EINVAL;
+		devfreq->scaling_max_freq = ULONG_MAX;
+		goto out;
 	}
 
-	ret = update_devfreq(devfreq);
+	err = update_devfreq(devfreq);
+
+out:
 	mutex_unlock(&devfreq->lock);
+	if (err)
+		dev_err(devfreq->dev.parent,
+			"failed to update frequency from OPP notifier (%d)\n",
+			err);
 
-	return ret;
+	return NOTIFY_OK;
+}
+
+/**
+ * qos_notifier_call() - Common handler for QoS constraints.
+ * @devfreq:    the devfreq instance.
+ */
+static int qos_notifier_call(struct devfreq *devfreq)
+{
+	int err;
+
+	mutex_lock(&devfreq->lock);
+	err = update_devfreq(devfreq);
+	mutex_unlock(&devfreq->lock);
+	if (err)
+		dev_err(devfreq->dev.parent,
+			"failed to update frequency from PM QoS (%d)\n",
+			err);
+
+	return NOTIFY_OK;
+}
+
+/**
+ * qos_min_notifier_call() - Callback for QoS min_freq changes.
+ * @nb:		Should be devfreq->nb_min
+ */
+static int qos_min_notifier_call(struct notifier_block *nb,
+					 unsigned long val, void *ptr)
+{
+	return qos_notifier_call(container_of(nb, struct devfreq, nb_min));
+}
+
+/**
+ * qos_max_notifier_call() - Callback for QoS max_freq changes.
+ * @nb:		Should be devfreq->nb_max
+ */
+static int qos_max_notifier_call(struct notifier_block *nb,
+					 unsigned long val, void *ptr)
+{
+	return qos_notifier_call(container_of(nb, struct devfreq, nb_max));
 }
 
 /**
@@ -581,16 +686,36 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
 static void devfreq_dev_release(struct device *dev)
 {
 	struct devfreq *devfreq = to_devfreq(dev);
+	int err;
 
 	mutex_lock(&devfreq_list_lock);
-	if (IS_ERR(find_device_devfreq(devfreq->dev.parent))) {
-		mutex_unlock(&devfreq_list_lock);
-		dev_warn(&devfreq->dev, "releasing devfreq which doesn't exist\n");
-		return;
-	}
 	list_del(&devfreq->node);
 	mutex_unlock(&devfreq_list_lock);
 
+	err = dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max,
+					 DEV_PM_QOS_MAX_FREQUENCY);
+	if (err && err != -ENOENT)
+		dev_warn(dev->parent,
+			"Failed to remove max_freq notifier: %d\n", err);
+	err = dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min,
+					 DEV_PM_QOS_MIN_FREQUENCY);
+	if (err && err != -ENOENT)
+		dev_warn(dev->parent,
+			"Failed to remove min_freq notifier: %d\n", err);
+
+	if (dev_pm_qos_request_active(&devfreq->user_max_freq_req)) {
+		err = dev_pm_qos_remove_request(&devfreq->user_max_freq_req);
+		if (err)
+			dev_warn(dev->parent,
+				"Failed to remove max_freq request: %d\n", err);
+	}
+	if (dev_pm_qos_request_active(&devfreq->user_min_freq_req)) {
+		err = dev_pm_qos_remove_request(&devfreq->user_min_freq_req);
+		if (err)
+			dev_warn(dev->parent,
+				"Failed to remove min_freq request: %d\n", err);
+	}
+
 	if (devfreq->profile->exit)
 		devfreq->profile->exit(devfreq->dev.parent);
 
@@ -625,7 +750,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq = find_device_devfreq(dev);
 	mutex_unlock(&devfreq_list_lock);
 	if (!IS_ERR(devfreq)) {
-		dev_err(dev, "%s: Unable to create devfreq for the device.\n",
+		dev_err(dev, "%s: devfreq device already exists!\n",
 			__func__);
 		err = -EINVAL;
 		goto err_out;
@@ -642,6 +767,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->dev.parent = dev;
 	devfreq->dev.class = devfreq_class;
 	devfreq->dev.release = devfreq_dev_release;
+	INIT_LIST_HEAD(&devfreq->node);
 	devfreq->profile = profile;
 	strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
 	devfreq->previous_freq = profile->initial_freq;
@@ -663,7 +789,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		err = -EINVAL;
 		goto err_dev;
 	}
-	devfreq->min_freq = devfreq->scaling_min_freq;
 
 	devfreq->scaling_max_freq = find_available_max_freq(devfreq);
 	if (!devfreq->scaling_max_freq) {
@@ -671,7 +796,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		err = -EINVAL;
 		goto err_dev;
 	}
-	devfreq->max_freq = devfreq->scaling_max_freq;
 
 	devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
 	atomic_set(&devfreq->suspend_count, 0);
@@ -685,33 +809,56 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		goto err_out;
 	}
 
-	devfreq->trans_table = devm_kzalloc(&devfreq->dev,
+	devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
 			array3_size(sizeof(unsigned int),
 				    devfreq->profile->max_state,
 				    devfreq->profile->max_state),
 			GFP_KERNEL);
-	if (!devfreq->trans_table) {
+	if (!devfreq->stats.trans_table) {
 		mutex_unlock(&devfreq->lock);
 		err = -ENOMEM;
 		goto err_devfreq;
 	}
 
-	devfreq->time_in_state = devm_kcalloc(&devfreq->dev,
+	devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
 			devfreq->profile->max_state,
-			sizeof(unsigned long),
+			sizeof(*devfreq->stats.time_in_state),
 			GFP_KERNEL);
-	if (!devfreq->time_in_state) {
+	if (!devfreq->stats.time_in_state) {
 		mutex_unlock(&devfreq->lock);
 		err = -ENOMEM;
 		goto err_devfreq;
 	}
 
-	devfreq->last_stat_updated = jiffies;
+	devfreq->stats.total_trans = 0;
+	devfreq->stats.last_update = get_jiffies_64();
 
 	srcu_init_notifier_head(&devfreq->transition_notifier_list);
 
 	mutex_unlock(&devfreq->lock);
 
+	err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
+				     DEV_PM_QOS_MIN_FREQUENCY, 0);
+	if (err < 0)
+		goto err_devfreq;
+	err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
+				     DEV_PM_QOS_MAX_FREQUENCY,
+				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
+	if (err < 0)
+		goto err_devfreq;
+
+	devfreq->nb_min.notifier_call = qos_min_notifier_call;
+	err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
+				      DEV_PM_QOS_MIN_FREQUENCY);
+	if (err)
+		goto err_devfreq;
+
+	devfreq->nb_max.notifier_call = qos_max_notifier_call;
+	err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_max,
+				      DEV_PM_QOS_MAX_FREQUENCY);
+	if (err)
+		goto err_devfreq;
+
 	mutex_lock(&devfreq_list_lock);
 
 	governor = try_then_request_governor(devfreq->governor_name);
@@ -903,7 +1050,9 @@ int devfreq_suspend_device(struct devfreq *devfreq)
 	}
 
 	if (devfreq->suspend_freq) {
+		mutex_lock(&devfreq->lock);
 		ret = devfreq_set_target(devfreq, devfreq->suspend_freq, 0);
+		mutex_unlock(&devfreq->lock);
 		if (ret)
 			return ret;
 	}
@@ -931,7 +1080,9 @@ int devfreq_resume_device(struct devfreq *devfreq)
 		return 0;
 
 	if (devfreq->resume_freq) {
+		mutex_lock(&devfreq->lock);
 		ret = devfreq_set_target(devfreq, devfreq->resume_freq, 0);
+		mutex_unlock(&devfreq->lock);
 		if (ret)
 			return ret;
 	}
@@ -1111,6 +1262,14 @@ err_out:
 }
 EXPORT_SYMBOL(devfreq_remove_governor);
 
+static ssize_t name_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct devfreq *devfreq = to_devfreq(dev);
+	return sprintf(buf, "%s\n", dev_name(devfreq->dev.parent));
+}
+static DEVICE_ATTR_RO(name);
+
 static ssize_t governor_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
@@ -1195,7 +1354,7 @@ static ssize_t available_governors_show(struct device *d,
 	 * The devfreq with immutable governor (e.g., passive) shows
 	 * only own governor.
 	 */
-	if (df->governor->immutable) {
+	if (df->governor && df->governor->immutable) {
 		count = scnprintf(&buf[count], DEVFREQ_NAME_LEN,
 				  "%s ", df->governor_name);
 	/*
@@ -1281,42 +1440,39 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
 	unsigned long value;
 	int ret;
 
+	/*
+	 * Protect against theoretical sysfs writes between
+	 * device_add and dev_pm_qos_add_request
+	 */
+	if (!dev_pm_qos_request_active(&df->user_min_freq_req))
+		return -EAGAIN;
+
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		return -EINVAL;
 
-	mutex_lock(&df->lock);
-
-	if (value) {
-		if (value > df->max_freq) {
-			ret = -EINVAL;
-			goto unlock;
-		}
-	} else {
-		unsigned long *freq_table = df->profile->freq_table;
-
-		/* Get minimum frequency according to sorting order */
-		if (freq_table[0] < freq_table[df->profile->max_state - 1])
-			value = freq_table[0];
-		else
-			value = freq_table[df->profile->max_state - 1];
-	}
+	/* Round down to kHz for PM QoS */
+	ret = dev_pm_qos_update_request(&df->user_min_freq_req,
+					value / HZ_PER_KHZ);
+	if (ret < 0)
+		return ret;
 
-	df->min_freq = value;
-	update_devfreq(df);
-	ret = count;
-unlock:
-	mutex_unlock(&df->lock);
-	return ret;
+	return count;
 }
 
 static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
 	struct devfreq *df = to_devfreq(dev);
+	unsigned long min_freq, max_freq;
 
-	return sprintf(buf, "%lu\n", max(df->scaling_min_freq, df->min_freq));
+	mutex_lock(&df->lock);
+	get_freq_range(df, &min_freq, &max_freq);
+	mutex_unlock(&df->lock);
+
+	return sprintf(buf, "%lu\n", min_freq);
 }
+static DEVICE_ATTR_RW(min_freq);
 
 static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
 			      const char *buf, size_t count)
@@ -1325,42 +1481,50 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
 	unsigned long value;
 	int ret;
 
+	/*
+	 * Protect against theoretical sysfs writes between
+	 * device_add and dev_pm_qos_add_request
+	 */
+	if (!dev_pm_qos_request_active(&df->user_max_freq_req))
+		return -EINVAL;
+
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		return -EINVAL;
 
-	mutex_lock(&df->lock);
-
-	if (value) {
-		if (value < df->min_freq) {
-			ret = -EINVAL;
-			goto unlock;
-		}
-	} else {
-		unsigned long *freq_table = df->profile->freq_table;
+	/*
+	 * PM QoS frequencies are in kHz so we need to convert. Convert by
+	 * rounding upwards so that the acceptable interval never shrinks.
+	 *
+	 * For example if the user writes "666666666" to sysfs this value will
+	 * be converted to 666667 kHz and back to 666667000 Hz before an OPP
+	 * lookup, this ensures that an OPP of 666666666Hz is still accepted.
+	 *
+	 * A value of zero means "no limit".
+	 */
+	if (value)
+		value = DIV_ROUND_UP(value, HZ_PER_KHZ);
+	else
+		value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE;
 
-		/* Get maximum frequency according to sorting order */
-		if (freq_table[0] < freq_table[df->profile->max_state - 1])
-			value = freq_table[df->profile->max_state - 1];
-		else
-			value = freq_table[0];
-	}
+	ret = dev_pm_qos_update_request(&df->user_max_freq_req, value);
+	if (ret < 0)
+		return ret;
 
-	df->max_freq = value;
-	update_devfreq(df);
-	ret = count;
-unlock:
-	mutex_unlock(&df->lock);
-	return ret;
+	return count;
 }
-static DEVICE_ATTR_RW(min_freq);
 
 static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
 	struct devfreq *df = to_devfreq(dev);
+	unsigned long min_freq, max_freq;
 
-	return sprintf(buf, "%lu\n", min(df->scaling_max_freq, df->max_freq));
+	mutex_lock(&df->lock);
+	get_freq_range(df, &min_freq, &max_freq);
+	mutex_unlock(&df->lock);
+
+	return sprintf(buf, "%lu\n", max_freq);
 }
 static DEVICE_ATTR_RW(max_freq);
 
@@ -1397,12 +1561,17 @@ static ssize_t trans_stat_show(struct device *dev,
 	int i, j;
 	unsigned int max_state = devfreq->profile->max_state;
 
-	if (!devfreq->stop_polling &&
-			devfreq_update_status(devfreq, devfreq->previous_freq))
-		return 0;
 	if (max_state == 0)
 		return sprintf(buf, "Not Supported.\n");
 
+	mutex_lock(&devfreq->lock);
+	if (!devfreq->stop_polling &&
+			devfreq_update_status(devfreq, devfreq->previous_freq)) {
+		mutex_unlock(&devfreq->lock);
+		return 0;
+	}
+	mutex_unlock(&devfreq->lock);
+
 	len = sprintf(buf, "     From  :   To\n");
 	len += sprintf(buf + len, "           :");
 	for (i = 0; i < max_state; i++)
@@ -1422,18 +1591,47 @@ static ssize_t trans_stat_show(struct device *dev,
 				devfreq->profile->freq_table[i]);
 		for (j = 0; j < max_state; j++)
 			len += sprintf(buf + len, "%10u",
-				devfreq->trans_table[(i * max_state) + j]);
-		len += sprintf(buf + len, "%10u\n",
-			jiffies_to_msecs(devfreq->time_in_state[i]));
+				devfreq->stats.trans_table[(i * max_state) + j]);
+
+		len += sprintf(buf + len, "%10llu\n", (u64)
+			jiffies64_to_msecs(devfreq->stats.time_in_state[i]));
 	}
 
 	len += sprintf(buf + len, "Total transition : %u\n",
-					devfreq->total_trans);
+					devfreq->stats.total_trans);
 	return len;
 }
-static DEVICE_ATTR_RO(trans_stat);
+
+static ssize_t trans_stat_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct devfreq *df = to_devfreq(dev);
+	int err, value;
+
+	if (df->profile->max_state == 0)
+		return count;
+
+	err = kstrtoint(buf, 10, &value);
+	if (err || value != 0)
+		return -EINVAL;
+
+	mutex_lock(&df->lock);
+	memset(df->stats.time_in_state, 0, (df->profile->max_state *
+					sizeof(*df->stats.time_in_state)));
+	memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
+					df->profile->max_state,
+					df->profile->max_state));
+	df->stats.total_trans = 0;
+	df->stats.last_update = get_jiffies_64();
+	mutex_unlock(&df->lock);
+
+	return count;
+}
+static DEVICE_ATTR_RW(trans_stat);
 
 static struct attribute *devfreq_attrs[] = {
+	&dev_attr_name.attr,
 	&dev_attr_governor.attr,
 	&dev_attr_available_governors.attr,
 	&dev_attr_cur_freq.attr,
@@ -1447,6 +1645,81 @@ static struct attribute *devfreq_attrs[] = {
 };
 ATTRIBUTE_GROUPS(devfreq);
 
+/**
+ * devfreq_summary_show() - Show the summary of the devfreq devices
+ * @s:		seq_file instance to show the summary of devfreq devices
+ * @data:	not used
+ *
+ * Show the summary of the devfreq devices via 'devfreq_summary' debugfs file.
+ * It helps that user can know the detailed information of the devfreq devices.
+ *
+ * Return 0 always because it shows the information without any data change.
+ */
+static int devfreq_summary_show(struct seq_file *s, void *data)
+{
+	struct devfreq *devfreq;
+	struct devfreq *p_devfreq = NULL;
+	unsigned long cur_freq, min_freq, max_freq;
+	unsigned int polling_ms;
+
+	seq_printf(s, "%-30s %-10s %-10s %-15s %10s %12s %12s %12s\n",
+			"dev_name",
+			"dev",
+			"parent_dev",
+			"governor",
+			"polling_ms",
+			"cur_freq_Hz",
+			"min_freq_Hz",
+			"max_freq_Hz");
+	seq_printf(s, "%30s %10s %10s %15s %10s %12s %12s %12s\n",
+			"------------------------------",
+			"----------",
+			"----------",
+			"---------------",
+			"----------",
+			"------------",
+			"------------",
+			"------------");
+
+	mutex_lock(&devfreq_list_lock);
+
+	list_for_each_entry_reverse(devfreq, &devfreq_list, node) {
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE)
+		if (!strncmp(devfreq->governor_name, DEVFREQ_GOV_PASSIVE,
+							DEVFREQ_NAME_LEN)) {
+			struct devfreq_passive_data *data = devfreq->data;
+
+			if (data)
+				p_devfreq = data->parent;
+		} else {
+			p_devfreq = NULL;
+		}
+#endif
+
+		mutex_lock(&devfreq->lock);
+		cur_freq = devfreq->previous_freq,
+		get_freq_range(devfreq, &min_freq, &max_freq);
+		polling_ms = devfreq->profile->polling_ms,
+		mutex_unlock(&devfreq->lock);
+
+		seq_printf(s,
+			"%-30s %-10s %-10s %-15s %10d %12ld %12ld %12ld\n",
+			dev_name(devfreq->dev.parent),
+			dev_name(&devfreq->dev),
+			p_devfreq ? dev_name(&p_devfreq->dev) : "null",
+			devfreq->governor_name,
+			polling_ms,
+			cur_freq,
+			min_freq,
+			max_freq);
+	}
+
+	mutex_unlock(&devfreq_list_lock);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(devfreq_summary);
+
 static int __init devfreq_init(void)
 {
 	devfreq_class = class_create(THIS_MODULE, "devfreq");
@@ -1463,6 +1736,11 @@ static int __init devfreq_init(void)
 	}
 	devfreq_class->dev_groups = devfreq_groups;
 
+	devfreq_debugfs = debugfs_create_dir("devfreq", NULL);
+	debugfs_create_file("devfreq_summary", 0444,
+				devfreq_debugfs, NULL,
+				&devfreq_summary_fops);
+
 	return 0;
 }
 subsys_initcall(devfreq_init);
@@ -1656,7 +1934,7 @@ static void devm_devfreq_notifier_release(struct device *dev, void *res)
 
 /**
  * devm_devfreq_register_notifier()
-	- Resource-managed devfreq_register_notifier()
+ *	- Resource-managed devfreq_register_notifier()
  * @dev:	The devfreq user device. (parent of devfreq)
  * @devfreq:	The devfreq object.
  * @nb:		The notifier block to be unregistered.
@@ -1692,7 +1970,7 @@ EXPORT_SYMBOL(devm_devfreq_register_notifier);
 
 /**
  * devm_devfreq_unregister_notifier()
-	- Resource-managed devfreq_unregister_notifier()
+ *	- Resource-managed devfreq_unregister_notifier()
  * @dev:	The devfreq user device. (parent of devfreq)
  * @devfreq:	The devfreq object.
  * @nb:		The notifier block to be unregistered.
diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig
index cef2cf5347ca..878825372f6f 100644
--- a/drivers/devfreq/event/Kconfig
+++ b/drivers/devfreq/event/Kconfig
@@ -15,7 +15,7 @@ menuconfig PM_DEVFREQ_EVENT
 if PM_DEVFREQ_EVENT
 
 config DEVFREQ_EVENT_EXYNOS_NOCP
-	tristate "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver"
+	tristate "Exynos NoC (Network On Chip) Probe DEVFREQ event Driver"
 	depends on ARCH_EXYNOS || COMPILE_TEST
 	select PM_OPP
 	select REGMAP_MMIO
@@ -24,7 +24,7 @@ config DEVFREQ_EVENT_EXYNOS_NOCP
 	  (Network on Chip) Probe counters to measure the bandwidth of AXI bus.
 
 config DEVFREQ_EVENT_EXYNOS_PPMU
-	tristate "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver"
+	tristate "Exynos PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver"
 	depends on ARCH_EXYNOS || COMPILE_TEST
 	select PM_OPP
 	help
@@ -34,7 +34,7 @@ config DEVFREQ_EVENT_EXYNOS_PPMU
 
 config DEVFREQ_EVENT_ROCKCHIP_DFI
 	tristate "ROCKCHIP DFI DEVFREQ event Driver"
-	depends on ARCH_ROCKCHIP
+	depends on ARCH_ROCKCHIP || COMPILE_TEST
 	help
 	  This add the devfreq-event driver for Rockchip SoC. It provides DFI
 	  (DDR Monitor Module) driver to count ddr load.
diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c
index 1c565926db9f..ccc531ee6938 100644
--- a/drivers/devfreq/event/exynos-nocp.c
+++ b/drivers/devfreq/event/exynos-nocp.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * exynos-nocp.c - EXYNOS NoC (Network On Chip) Probe support
+ * exynos-nocp.c - Exynos NoC (Network On Chip) Probe support
  *
  * Copyright (c) 2016 Samsung Electronics Co., Ltd.
  * Author : Chanwoo Choi <cw00.choi@samsung.com>
diff --git a/drivers/devfreq/event/exynos-nocp.h b/drivers/devfreq/event/exynos-nocp.h
index 55cc96284a36..2d6f08cfd0c5 100644
--- a/drivers/devfreq/event/exynos-nocp.h
+++ b/drivers/devfreq/event/exynos-nocp.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * exynos-nocp.h - EXYNOS NoC (Network on Chip) Probe header file
+ * exynos-nocp.h - Exynos NoC (Network on Chip) Probe header file
  *
  * Copyright (c) 2016 Samsung Electronics Co., Ltd.
  * Author : Chanwoo Choi <cw00.choi@samsung.com>
diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index 87b42055e6bc..17ed980d9099 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * exynos_ppmu.c - EXYNOS PPMU (Platform Performance Monitoring Unit) support
+ * exynos_ppmu.c - Exynos PPMU (Platform Performance Monitoring Unit) support
  *
  * Copyright (c) 2014-2015 Samsung Electronics Co., Ltd.
  * Author : Chanwoo Choi <cw00.choi@samsung.com>
@@ -101,17 +101,22 @@ static struct __exynos_ppmu_events {
 	PPMU_EVENT(dmc1_1),
 };
 
-static int exynos_ppmu_find_ppmu_id(struct devfreq_event_dev *edev)
+static int __exynos_ppmu_find_ppmu_id(const char *edev_name)
 {
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(ppmu_events); i++)
-		if (!strcmp(edev->desc->name, ppmu_events[i].name))
+		if (!strcmp(edev_name, ppmu_events[i].name))
 			return ppmu_events[i].id;
 
 	return -EINVAL;
 }
 
+static int exynos_ppmu_find_ppmu_id(struct devfreq_event_dev *edev)
+{
+	return __exynos_ppmu_find_ppmu_id(edev->desc->name);
+}
+
 /*
  * The devfreq-event ops structure for PPMU v1.1
  */
@@ -556,13 +561,11 @@ static int of_get_devfreq_events(struct device_node *np,
 			 * use default if not.
 			 */
 			if (info->ppmu_type == EXYNOS_TYPE_PPMU_V2) {
-				struct devfreq_event_dev edev;
 				int id;
 				/* Not all registers take the same value for
 				 * read+write data count.
 				 */
-				edev.desc = &desc[j];
-				id = exynos_ppmu_find_ppmu_id(&edev);
+				id = __exynos_ppmu_find_ppmu_id(desc[j].name);
 
 				switch (id) {
 				case PPMU_PMNCNT0:
@@ -673,7 +676,6 @@ static int exynos_ppmu_probe(struct platform_device *pdev)
 	for (i = 0; i < info->num_events; i++) {
 		edev[i] = devm_devfreq_event_add_edev(&pdev->dev, &desc[i]);
 		if (IS_ERR(edev[i])) {
-			ret = PTR_ERR(edev[i]);
 			dev_err(&pdev->dev,
 				"failed to add devfreq-event device\n");
 			return PTR_ERR(edev[i]);
diff --git a/drivers/devfreq/event/exynos-ppmu.h b/drivers/devfreq/event/exynos-ppmu.h
index 284420047455..97f667d0cbdd 100644
--- a/drivers/devfreq/event/exynos-ppmu.h
+++ b/drivers/devfreq/event/exynos-ppmu.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * exynos_ppmu.h - EXYNOS PPMU header file
+ * exynos_ppmu.h - Exynos PPMU header file
  *
  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
  * Author : Chanwoo Choi <cw00.choi@samsung.com>
diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c
index 5d1042188727..9a88faaf8b27 100644
--- a/drivers/devfreq/event/rockchip-dfi.c
+++ b/drivers/devfreq/event/rockchip-dfi.c
@@ -177,7 +177,6 @@ static int rockchip_dfi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct rockchip_dfi *data;
-	struct resource *res;
 	struct devfreq_event_desc *desc;
 	struct device_node *np = pdev->dev.of_node, *node;
 
@@ -185,8 +184,7 @@ static int rockchip_dfi_probe(struct platform_device *pdev)
 	if (!data)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	data->regs = devm_ioremap_resource(&pdev->dev, res);
+	data->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(data->regs))
 		return PTR_ERR(data->regs);
 
@@ -200,6 +198,7 @@ static int rockchip_dfi_probe(struct platform_device *pdev)
 	node = of_parse_phandle(np, "rockchip,pmu", 0);
 	if (node) {
 		data->regmap_pmu = syscon_node_to_regmap(node);
+		of_node_put(node);
 		if (IS_ERR(data->regmap_pmu))
 			return PTR_ERR(data->regmap_pmu);
 	}
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index c832673273a2..8fa8eb541373 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -15,11 +15,10 @@
 #include <linux/device.h>
 #include <linux/export.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pm_opp.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
-#include <linux/slab.h>
 
 #define DEFAULT_SATURATION_RATIO	40
 
@@ -127,6 +126,7 @@ static int exynos_bus_get_dev_status(struct device *dev,
 
 	ret = exynos_bus_get_event(bus, &edata);
 	if (ret < 0) {
+		dev_err(dev, "failed to get event from devfreq-event devices\n");
 		stat->total_time = stat->busy_time = 0;
 		goto err;
 	}
@@ -287,52 +287,12 @@ err_clk:
 	return ret;
 }
 
-static int exynos_bus_probe(struct platform_device *pdev)
+static int exynos_bus_profile_init(struct exynos_bus *bus,
+				   struct devfreq_dev_profile *profile)
 {
-	struct device *dev = &pdev->dev;
-	struct device_node *np = dev->of_node, *node;
-	struct devfreq_dev_profile *profile;
+	struct device *dev = bus->dev;
 	struct devfreq_simple_ondemand_data *ondemand_data;
-	struct devfreq_passive_data *passive_data;
-	struct devfreq *parent_devfreq;
-	struct exynos_bus *bus;
-	int ret, max_state;
-	unsigned long min_freq, max_freq;
-	bool passive = false;
-
-	if (!np) {
-		dev_err(dev, "failed to find devicetree node\n");
-		return -EINVAL;
-	}
-
-	bus = devm_kzalloc(&pdev->dev, sizeof(*bus), GFP_KERNEL);
-	if (!bus)
-		return -ENOMEM;
-	mutex_init(&bus->lock);
-	bus->dev = &pdev->dev;
-	platform_set_drvdata(pdev, bus);
-
-	profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL);
-	if (!profile)
-		return -ENOMEM;
-
-	node = of_parse_phandle(dev->of_node, "devfreq", 0);
-	if (node) {
-		of_node_put(node);
-		passive = true;
-	} else {
-		ret = exynos_bus_parent_parse_of(np, bus);
-		if (ret < 0)
-			return ret;
-	}
-
-	/* Parse the device-tree to get the resource information */
-	ret = exynos_bus_parse_of(np, bus);
-	if (ret < 0)
-		goto err_reg;
-
-	if (passive)
-		goto passive;
+	int ret;
 
 	/* Initialize the struct profile and governor data for parent device */
 	profile->polling_ms = 50;
@@ -341,10 +301,9 @@ static int exynos_bus_probe(struct platform_device *pdev)
 	profile->exit = exynos_bus_exit;
 
 	ondemand_data = devm_kzalloc(dev, sizeof(*ondemand_data), GFP_KERNEL);
-	if (!ondemand_data) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (!ondemand_data)
+		return -ENOMEM;
+
 	ondemand_data->upthreshold = 40;
 	ondemand_data->downdifferential = 5;
 
@@ -354,15 +313,14 @@ static int exynos_bus_probe(struct platform_device *pdev)
 						ondemand_data);
 	if (IS_ERR(bus->devfreq)) {
 		dev_err(dev, "failed to add devfreq device\n");
-		ret = PTR_ERR(bus->devfreq);
-		goto err;
+		return PTR_ERR(bus->devfreq);
 	}
 
 	/* Register opp_notifier to catch the change of OPP  */
 	ret = devm_devfreq_register_opp_notifier(dev, bus->devfreq);
 	if (ret < 0) {
 		dev_err(dev, "failed to register opp notifier\n");
-		goto err;
+		return ret;
 	}
 
 	/*
@@ -372,33 +330,44 @@ static int exynos_bus_probe(struct platform_device *pdev)
 	ret = exynos_bus_enable_edev(bus);
 	if (ret < 0) {
 		dev_err(dev, "failed to enable devfreq-event devices\n");
-		goto err;
+		return ret;
 	}
 
 	ret = exynos_bus_set_event(bus);
 	if (ret < 0) {
 		dev_err(dev, "failed to set event to devfreq-event devices\n");
-		goto err;
+		goto err_edev;
 	}
 
-	goto out;
-passive:
+	return 0;
+
+err_edev:
+	if (exynos_bus_disable_edev(bus))
+		dev_warn(dev, "failed to disable the devfreq-event devices\n");
+
+	return ret;
+}
+
+static int exynos_bus_profile_init_passive(struct exynos_bus *bus,
+					   struct devfreq_dev_profile *profile)
+{
+	struct device *dev = bus->dev;
+	struct devfreq_passive_data *passive_data;
+	struct devfreq *parent_devfreq;
+
 	/* Initialize the struct profile and governor data for passive device */
 	profile->target = exynos_bus_target;
 	profile->exit = exynos_bus_passive_exit;
 
 	/* Get the instance of parent devfreq device */
 	parent_devfreq = devfreq_get_devfreq_by_phandle(dev, 0);
-	if (IS_ERR(parent_devfreq)) {
-		ret = -EPROBE_DEFER;
-		goto err;
-	}
+	if (IS_ERR(parent_devfreq))
+		return -EPROBE_DEFER;
 
 	passive_data = devm_kzalloc(dev, sizeof(*passive_data), GFP_KERNEL);
-	if (!passive_data) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (!passive_data)
+		return -ENOMEM;
+
 	passive_data->parent = parent_devfreq;
 
 	/* Add devfreq device for exynos bus with passive governor */
@@ -407,11 +376,61 @@ passive:
 	if (IS_ERR(bus->devfreq)) {
 		dev_err(dev,
 			"failed to add devfreq dev with passive governor\n");
-		ret = PTR_ERR(bus->devfreq);
-		goto err;
+		return PTR_ERR(bus->devfreq);
+	}
+
+	return 0;
+}
+
+static int exynos_bus_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node, *node;
+	struct devfreq_dev_profile *profile;
+	struct exynos_bus *bus;
+	int ret, max_state;
+	unsigned long min_freq, max_freq;
+	bool passive = false;
+
+	if (!np) {
+		dev_err(dev, "failed to find devicetree node\n");
+		return -EINVAL;
 	}
 
-out:
+	bus = devm_kzalloc(&pdev->dev, sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		return -ENOMEM;
+	mutex_init(&bus->lock);
+	bus->dev = &pdev->dev;
+	platform_set_drvdata(pdev, bus);
+
+	profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL);
+	if (!profile)
+		return -ENOMEM;
+
+	node = of_parse_phandle(dev->of_node, "devfreq", 0);
+	if (node) {
+		of_node_put(node);
+		passive = true;
+	} else {
+		ret = exynos_bus_parent_parse_of(np, bus);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Parse the device-tree to get the resource information */
+	ret = exynos_bus_parse_of(np, bus);
+	if (ret < 0)
+		goto err_reg;
+
+	if (passive)
+		ret = exynos_bus_profile_init_passive(bus, profile);
+	else
+		ret = exynos_bus_profile_init(bus, profile);
+
+	if (ret < 0)
+		goto err;
+
 	max_state = bus->devfreq->profile->max_state;
 	min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
 	max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index bbe5ff9fcecf..dc7533ccc3db 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -31,6 +31,8 @@
  * @name:		Governor's name
  * @immutable:		Immutable flag for governor. If the value is 1,
  *			this govenror is never changeable to other governor.
+ * @interrupt_driven:	Devfreq core won't schedule polling work for this
+ *			governor if value is set to 1.
  * @get_target_freq:	Returns desired operating frequency for the device.
  *			Basically, get_target_freq will run
  *			devfreq_dev_profile.get_dev_status() to get the
@@ -49,6 +51,7 @@ struct devfreq_governor {
 
 	const char name[DEVFREQ_NAME_LEN];
 	const unsigned int immutable;
+	const unsigned int interrupt_driven;
 	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
 	int (*event_handler)(struct devfreq *devfreq,
 				unsigned int event, void *data);
diff --git a/drivers/devfreq/imx8m-ddrc.c b/drivers/devfreq/imx8m-ddrc.c
new file mode 100644
index 000000000000..bc82d3653bff
--- /dev/null
+++ b/drivers/devfreq/imx8m-ddrc.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 NXP
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/devfreq.h>
+#include <linux/pm_opp.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/arm-smccc.h>
+
+#define IMX_SIP_DDR_DVFS			0xc2000004
+
+/* Query available frequencies. */
+#define IMX_SIP_DDR_DVFS_GET_FREQ_COUNT		0x10
+#define IMX_SIP_DDR_DVFS_GET_FREQ_INFO		0x11
+
+/*
+ * This should be in a 1:1 mapping with devicetree OPPs but
+ * firmware provides additional info.
+ */
+struct imx8m_ddrc_freq {
+	unsigned long rate;
+	unsigned long smcarg;
+	int dram_core_parent_index;
+	int dram_alt_parent_index;
+	int dram_apb_parent_index;
+};
+
+/* Hardware limitation */
+#define IMX8M_DDRC_MAX_FREQ_COUNT 4
+
+/*
+ * i.MX8M DRAM Controller clocks have the following structure (abridged):
+ *
+ * +----------+       |\            +------+
+ * | dram_pll |-------|M| dram_core |      |
+ * +----------+       |U|---------->| D    |
+ *                 /--|X|           |  D   |
+ *   dram_alt_root |  |/            |   R  |
+ *                 |                |    C |
+ *            +---------+           |      |
+ *            |FIX DIV/4|           |      |
+ *            +---------+           |      |
+ *  composite:     |                |      |
+ * +----------+    |                |      |
+ * | dram_alt |----/                |      |
+ * +----------+                     |      |
+ * | dram_apb |-------------------->|      |
+ * +----------+                     +------+
+ *
+ * The dram_pll is used for higher rates and dram_alt is used for lower rates.
+ *
+ * Frequency switching is implemented in TF-A (via SMC call) and can change the
+ * configuration of the clocks, including mux parents. The dram_alt and
+ * dram_apb clocks are "imx composite" and their parent can change too.
+ *
+ * We need to prepare/enable the new mux parents head of switching and update
+ * their information afterwards.
+ */
+struct imx8m_ddrc {
+	struct devfreq_dev_profile profile;
+	struct devfreq *devfreq;
+
+	/* For frequency switching: */
+	struct clk *dram_core;
+	struct clk *dram_pll;
+	struct clk *dram_alt;
+	struct clk *dram_apb;
+
+	int freq_count;
+	struct imx8m_ddrc_freq freq_table[IMX8M_DDRC_MAX_FREQ_COUNT];
+};
+
+static struct imx8m_ddrc_freq *imx8m_ddrc_find_freq(struct imx8m_ddrc *priv,
+						    unsigned long rate)
+{
+	struct imx8m_ddrc_freq *freq;
+	int i;
+
+	/*
+	 * Firmware reports values in MT/s, so we round-down from Hz
+	 * Rounding is extra generous to ensure a match.
+	 */
+	rate = DIV_ROUND_CLOSEST(rate, 250000);
+	for (i = 0; i < priv->freq_count; ++i) {
+		freq = &priv->freq_table[i];
+		if (freq->rate == rate ||
+				freq->rate + 1 == rate ||
+				freq->rate - 1 == rate)
+			return freq;
+	}
+
+	return NULL;
+}
+
+static void imx8m_ddrc_smc_set_freq(int target_freq)
+{
+	struct arm_smccc_res res;
+	u32 online_cpus = 0;
+	int cpu;
+
+	local_irq_disable();
+
+	for_each_online_cpu(cpu)
+		online_cpus |= (1 << (cpu * 8));
+
+	/* change the ddr freqency */
+	arm_smccc_smc(IMX_SIP_DDR_DVFS, target_freq, online_cpus,
+			0, 0, 0, 0, 0, &res);
+
+	local_irq_enable();
+}
+
+static struct clk *clk_get_parent_by_index(struct clk *clk, int index)
+{
+	struct clk_hw *hw;
+
+	hw = clk_hw_get_parent_by_index(__clk_get_hw(clk), index);
+
+	return hw ? hw->clk : NULL;
+}
+
+static int imx8m_ddrc_set_freq(struct device *dev, struct imx8m_ddrc_freq *freq)
+{
+	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
+	struct clk *new_dram_core_parent;
+	struct clk *new_dram_alt_parent;
+	struct clk *new_dram_apb_parent;
+	int ret;
+
+	/*
+	 * Fetch new parents
+	 *
+	 * new_dram_alt_parent and new_dram_apb_parent are optional but
+	 * new_dram_core_parent is not.
+	 */
+	new_dram_core_parent = clk_get_parent_by_index(
+			priv->dram_core, freq->dram_core_parent_index - 1);
+	if (!new_dram_core_parent) {
+		dev_err(dev, "failed to fetch new dram_core parent\n");
+		return -EINVAL;
+	}
+	if (freq->dram_alt_parent_index) {
+		new_dram_alt_parent = clk_get_parent_by_index(
+				priv->dram_alt,
+				freq->dram_alt_parent_index - 1);
+		if (!new_dram_alt_parent) {
+			dev_err(dev, "failed to fetch new dram_alt parent\n");
+			return -EINVAL;
+		}
+	} else
+		new_dram_alt_parent = NULL;
+
+	if (freq->dram_apb_parent_index) {
+		new_dram_apb_parent = clk_get_parent_by_index(
+				priv->dram_apb,
+				freq->dram_apb_parent_index - 1);
+		if (!new_dram_apb_parent) {
+			dev_err(dev, "failed to fetch new dram_apb parent\n");
+			return -EINVAL;
+		}
+	} else
+		new_dram_apb_parent = NULL;
+
+	/* increase reference counts and ensure clks are ON before switch */
+	ret = clk_prepare_enable(new_dram_core_parent);
+	if (ret) {
+		dev_err(dev, "failed to enable new dram_core parent: %d\n",
+			ret);
+		goto out;
+	}
+	ret = clk_prepare_enable(new_dram_alt_parent);
+	if (ret) {
+		dev_err(dev, "failed to enable new dram_alt parent: %d\n",
+			ret);
+		goto out_disable_core_parent;
+	}
+	ret = clk_prepare_enable(new_dram_apb_parent);
+	if (ret) {
+		dev_err(dev, "failed to enable new dram_apb parent: %d\n",
+			ret);
+		goto out_disable_alt_parent;
+	}
+
+	imx8m_ddrc_smc_set_freq(freq->smcarg);
+
+	/* update parents in clk tree after switch. */
+	ret = clk_set_parent(priv->dram_core, new_dram_core_parent);
+	if (ret)
+		dev_warn(dev, "failed to set dram_core parent: %d\n", ret);
+	if (new_dram_alt_parent) {
+		ret = clk_set_parent(priv->dram_alt, new_dram_alt_parent);
+		if (ret)
+			dev_warn(dev, "failed to set dram_alt parent: %d\n",
+				 ret);
+	}
+	if (new_dram_apb_parent) {
+		ret = clk_set_parent(priv->dram_apb, new_dram_apb_parent);
+		if (ret)
+			dev_warn(dev, "failed to set dram_apb parent: %d\n",
+				 ret);
+	}
+
+	/*
+	 * Explicitly refresh dram PLL rate.
+	 *
+	 * Even if it's marked with CLK_GET_RATE_NOCACHE the rate will not be
+	 * automatically refreshed when clk_get_rate is called on children.
+	 */
+	clk_get_rate(priv->dram_pll);
+
+	/*
+	 * clk_set_parent transfer the reference count from old parent.
+	 * now we drop extra reference counts used during the switch
+	 */
+	clk_disable_unprepare(new_dram_apb_parent);
+out_disable_alt_parent:
+	clk_disable_unprepare(new_dram_alt_parent);
+out_disable_core_parent:
+	clk_disable_unprepare(new_dram_core_parent);
+out:
+	return ret;
+}
+
+static int imx8m_ddrc_target(struct device *dev, unsigned long *freq, u32 flags)
+{
+	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
+	struct imx8m_ddrc_freq *freq_info;
+	struct dev_pm_opp *new_opp;
+	unsigned long old_freq, new_freq;
+	int ret;
+
+	new_opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(new_opp)) {
+		ret = PTR_ERR(new_opp);
+		dev_err(dev, "failed to get recommended opp: %d\n", ret);
+		return ret;
+	}
+	dev_pm_opp_put(new_opp);
+
+	old_freq = clk_get_rate(priv->dram_core);
+	if (*freq == old_freq)
+		return 0;
+
+	freq_info = imx8m_ddrc_find_freq(priv, *freq);
+	if (!freq_info)
+		return -EINVAL;
+
+	/*
+	 * Read back the clk rate to verify switch was correct and so that
+	 * we can report it on all error paths.
+	 */
+	ret = imx8m_ddrc_set_freq(dev, freq_info);
+
+	new_freq = clk_get_rate(priv->dram_core);
+	if (ret)
+		dev_err(dev, "ddrc failed freq switch to %lu from %lu: error %d. now at %lu\n",
+			*freq, old_freq, ret, new_freq);
+	else if (*freq != new_freq)
+		dev_err(dev, "ddrc failed freq update to %lu from %lu, now at %lu\n",
+			*freq, old_freq, new_freq);
+	else
+		dev_dbg(dev, "ddrc freq set to %lu (was %lu)\n",
+			*freq, old_freq);
+
+	return ret;
+}
+
+static int imx8m_ddrc_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
+
+	*freq = clk_get_rate(priv->dram_core);
+
+	return 0;
+}
+
+static int imx8m_ddrc_get_dev_status(struct device *dev,
+				     struct devfreq_dev_status *stat)
+{
+	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
+
+	stat->busy_time = 0;
+	stat->total_time = 0;
+	stat->current_frequency = clk_get_rate(priv->dram_core);
+
+	return 0;
+}
+
+static int imx8m_ddrc_init_freq_info(struct device *dev)
+{
+	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
+	struct arm_smccc_res res;
+	int index;
+
+	/* An error here means DDR DVFS API not supported by firmware */
+	arm_smccc_smc(IMX_SIP_DDR_DVFS, IMX_SIP_DDR_DVFS_GET_FREQ_COUNT,
+			0, 0, 0, 0, 0, 0, &res);
+	priv->freq_count = res.a0;
+	if (priv->freq_count <= 0 ||
+			priv->freq_count > IMX8M_DDRC_MAX_FREQ_COUNT)
+		return -ENODEV;
+
+	for (index = 0; index < priv->freq_count; ++index) {
+		struct imx8m_ddrc_freq *freq = &priv->freq_table[index];
+
+		arm_smccc_smc(IMX_SIP_DDR_DVFS, IMX_SIP_DDR_DVFS_GET_FREQ_INFO,
+			      index, 0, 0, 0, 0, 0, &res);
+		/* Result should be strictly positive */
+		if ((long)res.a0 <= 0)
+			return -ENODEV;
+
+		freq->rate = res.a0;
+		freq->smcarg = index;
+		freq->dram_core_parent_index = res.a1;
+		freq->dram_alt_parent_index = res.a2;
+		freq->dram_apb_parent_index = res.a3;
+
+		/* dram_core has 2 options: dram_pll or dram_alt_root */
+		if (freq->dram_core_parent_index != 1 &&
+				freq->dram_core_parent_index != 2)
+			return -ENODEV;
+		/* dram_apb and dram_alt have exactly 8 possible parents */
+		if (freq->dram_alt_parent_index > 8 ||
+				freq->dram_apb_parent_index > 8)
+			return -ENODEV;
+		/* dram_core from alt requires explicit dram_alt parent */
+		if (freq->dram_core_parent_index == 2 &&
+				freq->dram_alt_parent_index == 0)
+			return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int imx8m_ddrc_check_opps(struct device *dev)
+{
+	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
+	struct imx8m_ddrc_freq *freq_info;
+	struct dev_pm_opp *opp;
+	unsigned long freq;
+	int i, opp_count;
+
+	/* Enumerate DT OPPs and disable those not supported by firmware */
+	opp_count = dev_pm_opp_get_opp_count(dev);
+	if (opp_count < 0)
+		return opp_count;
+	for (i = 0, freq = 0; i < opp_count; ++i, ++freq) {
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		if (IS_ERR(opp)) {
+			dev_err(dev, "Failed enumerating OPPs: %ld\n",
+				PTR_ERR(opp));
+			return PTR_ERR(opp);
+		}
+		dev_pm_opp_put(opp);
+
+		freq_info = imx8m_ddrc_find_freq(priv, freq);
+		if (!freq_info) {
+			dev_info(dev, "Disable unsupported OPP %luHz %luMT/s\n",
+					freq, DIV_ROUND_CLOSEST(freq, 250000));
+			dev_pm_opp_disable(dev, freq);
+		}
+	}
+
+	return 0;
+}
+
+static void imx8m_ddrc_exit(struct device *dev)
+{
+	dev_pm_opp_of_remove_table(dev);
+}
+
+static int imx8m_ddrc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct imx8m_ddrc *priv;
+	const char *gov = DEVFREQ_GOV_USERSPACE;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, priv);
+
+	ret = imx8m_ddrc_init_freq_info(dev);
+	if (ret) {
+		dev_err(dev, "failed to init firmware freq info: %d\n", ret);
+		return ret;
+	}
+
+	priv->dram_core = devm_clk_get(dev, "core");
+	if (IS_ERR(priv->dram_core)) {
+		ret = PTR_ERR(priv->dram_core);
+		dev_err(dev, "failed to fetch core clock: %d\n", ret);
+		return ret;
+	}
+	priv->dram_pll = devm_clk_get(dev, "pll");
+	if (IS_ERR(priv->dram_pll)) {
+		ret = PTR_ERR(priv->dram_pll);
+		dev_err(dev, "failed to fetch pll clock: %d\n", ret);
+		return ret;
+	}
+	priv->dram_alt = devm_clk_get(dev, "alt");
+	if (IS_ERR(priv->dram_alt)) {
+		ret = PTR_ERR(priv->dram_alt);
+		dev_err(dev, "failed to fetch alt clock: %d\n", ret);
+		return ret;
+	}
+	priv->dram_apb = devm_clk_get(dev, "apb");
+	if (IS_ERR(priv->dram_apb)) {
+		ret = PTR_ERR(priv->dram_apb);
+		dev_err(dev, "failed to fetch apb clock: %d\n", ret);
+		return ret;
+	}
+
+	ret = dev_pm_opp_of_add_table(dev);
+	if (ret < 0) {
+		dev_err(dev, "failed to get OPP table\n");
+		return ret;
+	}
+
+	ret = imx8m_ddrc_check_opps(dev);
+	if (ret < 0)
+		goto err;
+
+	priv->profile.polling_ms = 1000;
+	priv->profile.target = imx8m_ddrc_target;
+	priv->profile.get_dev_status = imx8m_ddrc_get_dev_status;
+	priv->profile.exit = imx8m_ddrc_exit;
+	priv->profile.get_cur_freq = imx8m_ddrc_get_cur_freq;
+	priv->profile.initial_freq = clk_get_rate(priv->dram_core);
+
+	priv->devfreq = devm_devfreq_add_device(dev, &priv->profile,
+						gov, NULL);
+	if (IS_ERR(priv->devfreq)) {
+		ret = PTR_ERR(priv->devfreq);
+		dev_err(dev, "failed to add devfreq device: %d\n", ret);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	dev_pm_opp_of_remove_table(dev);
+	return ret;
+}
+
+static const struct of_device_id imx8m_ddrc_of_match[] = {
+	{ .compatible = "fsl,imx8m-ddrc", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, imx8m_ddrc_of_match);
+
+static struct platform_driver imx8m_ddrc_platdrv = {
+	.probe		= imx8m_ddrc_probe,
+	.driver = {
+		.name	= "imx8m-ddrc-devfreq",
+		.of_match_table = of_match_ptr(imx8m_ddrc_of_match),
+	},
+};
+module_platform_driver(imx8m_ddrc_platdrv);
+
+MODULE_DESCRIPTION("i.MX8M DDR Controller frequency driver");
+MODULE_AUTHOR("Leonard Crestez <leonard.crestez@nxp.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
index 2e65d7279d79..24f04f78285b 100644
--- a/drivers/devfreq/rk3399_dmc.c
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -364,7 +364,8 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
 			if (res.a0) {
 				dev_err(dev, "Failed to set dram param: %ld\n",
 					res.a0);
-				return -EINVAL;
+				ret = -EINVAL;
+				goto err_edev;
 			}
 		}
 	}
@@ -372,8 +373,11 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
 	node = of_parse_phandle(np, "rockchip,pmu", 0);
 	if (node) {
 		data->regmap_pmu = syscon_node_to_regmap(node);
-		if (IS_ERR(data->regmap_pmu))
-			return PTR_ERR(data->regmap_pmu);
+		of_node_put(node);
+		if (IS_ERR(data->regmap_pmu)) {
+			ret = PTR_ERR(data->regmap_pmu);
+			goto err_edev;
+		}
 	}
 
 	regmap_read(data->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val);
@@ -391,7 +395,8 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
 		data->odt_dis_freq = data->timing.lpddr4_odt_dis_freq;
 		break;
 	default:
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_edev;
 	};
 
 	arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0,
@@ -425,7 +430,8 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
 	 */
 	if (dev_pm_opp_of_add_table(dev)) {
 		dev_err(dev, "Invalid operating-points in device tree.\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_edev;
 	}
 
 	of_property_read_u32(np, "upthreshold",
@@ -465,6 +471,9 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
 
 err_free_opp:
 	dev_pm_opp_of_remove_table(&pdev->dev);
+err_edev:
+	devfreq_event_disable_edev(data->edev);
+
 	return ret;
 }
 
diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
index a6ba75f4106d..0b65f89d74d5 100644
--- a/drivers/devfreq/tegra30-devfreq.c
+++ b/drivers/devfreq/tegra30-devfreq.c
@@ -11,11 +11,13 @@
 #include <linux/devfreq.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/module.h>
-#include <linux/mod_devicetable.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/reset.h>
+#include <linux/workqueue.h>
 
 #include "governor.h"
 
@@ -33,6 +35,8 @@
 #define ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN		BIT(30)
 #define ACTMON_DEV_CTRL_ENB					BIT(31)
 
+#define ACTMON_DEV_CTRL_STOP					0x00000000
+
 #define ACTMON_DEV_UPPER_WMARK					0x4
 #define ACTMON_DEV_LOWER_WMARK					0x8
 #define ACTMON_DEV_INIT_AVG					0xc
@@ -68,6 +72,8 @@
 
 #define KHZ							1000
 
+#define KHZ_MAX						(ULONG_MAX / KHZ)
+
 /* Assume that the bus is saturated if the utilization is 25% */
 #define BUS_SATURATION_RATIO					25
 
@@ -90,9 +96,10 @@ struct tegra_devfreq_device_config {
 	unsigned int	boost_down_threshold;
 
 	/*
-	 * Threshold of activity (cycles) below which the CPU frequency isn't
-	 * to be taken into account. This is to avoid increasing the EMC
-	 * frequency when the CPU is very busy but not accessing the bus often.
+	 * Threshold of activity (cycles translated to kHz) below which the
+	 * CPU frequency isn't to be taken into account. This is to avoid
+	 * increasing the EMC frequency when the CPU is very busy but not
+	 * accessing the bus often.
 	 */
 	u32		avg_dependency_threshold;
 };
@@ -102,7 +109,7 @@ enum tegra_actmon_device {
 	MCCPU,
 };
 
-static struct tegra_devfreq_device_config actmon_device_configs[] = {
+static const struct tegra_devfreq_device_config actmon_device_configs[] = {
 	{
 		/* MCALL: All memory accesses (including from the CPUs) */
 		.offset = 0x1c0,
@@ -117,10 +124,10 @@ static struct tegra_devfreq_device_config actmon_device_configs[] = {
 		.offset = 0x200,
 		.irq_mask = 1 << 25,
 		.boost_up_coeff = 800,
-		.boost_down_coeff = 90,
+		.boost_down_coeff = 40,
 		.boost_up_threshold = 27,
 		.boost_down_threshold = 10,
-		.avg_dependency_threshold = 50000,
+		.avg_dependency_threshold = 16000, /* 16MHz in kHz units */
 	},
 };
 
@@ -156,11 +163,16 @@ struct tegra_devfreq {
 	struct clk		*emc_clock;
 	unsigned long		max_freq;
 	unsigned long		cur_freq;
-	struct notifier_block	rate_change_nb;
+	struct notifier_block	clk_rate_change_nb;
+
+	struct delayed_work	cpufreq_update_work;
+	struct notifier_block	cpu_rate_change_nb;
 
 	struct tegra_devfreq_device devices[ARRAY_SIZE(actmon_device_configs)];
 
-	int irq;
+	unsigned int		irq;
+
+	bool			started;
 };
 
 struct tegra_actmon_emc_ratio {
@@ -168,8 +180,8 @@ struct tegra_actmon_emc_ratio {
 	unsigned long emc_freq;
 };
 
-static struct tegra_actmon_emc_ratio actmon_emc_ratios[] = {
-	{ 1400000, ULONG_MAX },
+static const struct tegra_actmon_emc_ratio actmon_emc_ratios[] = {
+	{ 1400000,    KHZ_MAX },
 	{ 1200000,    750000 },
 	{ 1100000,    600000 },
 	{ 1000000,    500000 },
@@ -199,18 +211,26 @@ static void device_writel(struct tegra_devfreq_device *dev, u32 val,
 	writel_relaxed(val, dev->regs + offset);
 }
 
-static unsigned long do_percent(unsigned long val, unsigned int pct)
+static unsigned long do_percent(unsigned long long val, unsigned int pct)
 {
-	return val * pct / 100;
+	val = val * pct;
+	do_div(val, 100);
+
+	/*
+	 * High freq + high boosting percent + large polling interval are
+	 * resulting in integer overflow when watermarks are calculated.
+	 */
+	return min_t(u64, val, U32_MAX);
 }
 
 static void tegra_devfreq_update_avg_wmark(struct tegra_devfreq *tegra,
 					   struct tegra_devfreq_device *dev)
 {
-	u32 avg = dev->avg_count;
 	u32 avg_band_freq = tegra->max_freq * ACTMON_DEFAULT_AVG_BAND / KHZ;
-	u32 band = avg_band_freq * ACTMON_SAMPLING_PERIOD;
+	u32 band = avg_band_freq * tegra->devfreq->profile->polling_ms;
+	u32 avg;
 
+	avg = min(dev->avg_count, U32_MAX - band);
 	device_writel(dev, avg + band, ACTMON_DEV_AVG_UPPER_WMARK);
 
 	avg = max(dev->avg_count, band);
@@ -220,7 +240,7 @@ static void tegra_devfreq_update_avg_wmark(struct tegra_devfreq *tegra,
 static void tegra_devfreq_update_wmark(struct tegra_devfreq *tegra,
 				       struct tegra_devfreq_device *dev)
 {
-	u32 val = tegra->cur_freq * ACTMON_SAMPLING_PERIOD;
+	u32 val = tegra->cur_freq * tegra->devfreq->profile->polling_ms;
 
 	device_writel(dev, do_percent(val, dev->config->boost_up_threshold),
 		      ACTMON_DEV_UPPER_WMARK);
@@ -229,12 +249,6 @@ static void tegra_devfreq_update_wmark(struct tegra_devfreq *tegra,
 		      ACTMON_DEV_LOWER_WMARK);
 }
 
-static void actmon_write_barrier(struct tegra_devfreq *tegra)
-{
-	/* ensure the update has reached the ACTMON */
-	readl(tegra->regs + ACTMON_GLB_STATUS);
-}
-
 static void actmon_isr_device(struct tegra_devfreq *tegra,
 			      struct tegra_devfreq_device *dev)
 {
@@ -256,10 +270,10 @@ static void actmon_isr_device(struct tegra_devfreq *tegra,
 
 		dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
 
-		if (dev->boost_freq >= tegra->max_freq)
+		if (dev->boost_freq >= tegra->max_freq) {
+			dev_ctrl &= ~ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
 			dev->boost_freq = tegra->max_freq;
-		else
-			dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
+		}
 	} else if (intr_status & ACTMON_DEV_INTR_CONSECUTIVE_LOWER) {
 		/*
 		 * new_boost = old_boost * down_coef
@@ -270,31 +284,22 @@ static void actmon_isr_device(struct tegra_devfreq *tegra,
 
 		dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
 
-		if (dev->boost_freq < (ACTMON_BOOST_FREQ_STEP >> 1))
-			dev->boost_freq = 0;
-		else
-			dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
-	}
-
-	if (dev->config->avg_dependency_threshold) {
-		if (dev->avg_count >= dev->config->avg_dependency_threshold)
-			dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
-		else if (dev->boost_freq == 0)
+		if (dev->boost_freq < (ACTMON_BOOST_FREQ_STEP >> 1)) {
 			dev_ctrl &= ~ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
+			dev->boost_freq = 0;
+		}
 	}
 
 	device_writel(dev, dev_ctrl, ACTMON_DEV_CTRL);
 
 	device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS);
-
-	actmon_write_barrier(tegra);
 }
 
 static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra,
 					    unsigned long cpu_freq)
 {
 	unsigned int i;
-	struct tegra_actmon_emc_ratio *ratio = actmon_emc_ratios;
+	const struct tegra_actmon_emc_ratio *ratio = actmon_emc_ratios;
 
 	for (i = 0; i < ARRAY_SIZE(actmon_emc_ratios); i++, ratio++) {
 		if (cpu_freq >= ratio->cpu_freq) {
@@ -308,25 +313,37 @@ static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra,
 	return 0;
 }
 
+static unsigned long actmon_device_target_freq(struct tegra_devfreq *tegra,
+					       struct tegra_devfreq_device *dev)
+{
+	unsigned int avg_sustain_coef;
+	unsigned long target_freq;
+
+	target_freq = dev->avg_count / tegra->devfreq->profile->polling_ms;
+	avg_sustain_coef = 100 * 100 / dev->config->boost_up_threshold;
+	target_freq = do_percent(target_freq, avg_sustain_coef);
+
+	return target_freq;
+}
+
 static void actmon_update_target(struct tegra_devfreq *tegra,
 				 struct tegra_devfreq_device *dev)
 {
 	unsigned long cpu_freq = 0;
 	unsigned long static_cpu_emc_freq = 0;
-	unsigned int avg_sustain_coef;
 
-	if (dev->config->avg_dependency_threshold) {
-		cpu_freq = cpufreq_get(0);
-		static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq);
-	}
+	dev->target_freq = actmon_device_target_freq(tegra, dev);
 
-	dev->target_freq = dev->avg_count / ACTMON_SAMPLING_PERIOD;
-	avg_sustain_coef = 100 * 100 / dev->config->boost_up_threshold;
-	dev->target_freq = do_percent(dev->target_freq, avg_sustain_coef);
-	dev->target_freq += dev->boost_freq;
+	if (dev->config->avg_dependency_threshold &&
+	    dev->config->avg_dependency_threshold <= dev->target_freq) {
+		cpu_freq = cpufreq_quick_get(0);
+		static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq);
 
-	if (dev->avg_count >= dev->config->avg_dependency_threshold)
+		dev->target_freq += dev->boost_freq;
 		dev->target_freq = max(dev->target_freq, static_cpu_emc_freq);
+	} else {
+		dev->target_freq += dev->boost_freq;
+	}
 }
 
 static irqreturn_t actmon_thread_isr(int irq, void *data)
@@ -354,8 +371,8 @@ static irqreturn_t actmon_thread_isr(int irq, void *data)
 	return handled ? IRQ_HANDLED : IRQ_NONE;
 }
 
-static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
-				       unsigned long action, void *ptr)
+static int tegra_actmon_clk_notify_cb(struct notifier_block *nb,
+				      unsigned long action, void *ptr)
 {
 	struct clk_notifier_data *data = ptr;
 	struct tegra_devfreq *tegra;
@@ -365,7 +382,7 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
 	if (action != POST_RATE_CHANGE)
 		return NOTIFY_OK;
 
-	tegra = container_of(nb, struct tegra_devfreq, rate_change_nb);
+	tegra = container_of(nb, struct tegra_devfreq, clk_rate_change_nb);
 
 	tegra->cur_freq = data->new_rate / KHZ;
 
@@ -375,7 +392,79 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
 		tegra_devfreq_update_wmark(tegra, dev);
 	}
 
-	actmon_write_barrier(tegra);
+	return NOTIFY_OK;
+}
+
+static void tegra_actmon_delayed_update(struct work_struct *work)
+{
+	struct tegra_devfreq *tegra = container_of(work, struct tegra_devfreq,
+						   cpufreq_update_work.work);
+
+	mutex_lock(&tegra->devfreq->lock);
+	update_devfreq(tegra->devfreq);
+	mutex_unlock(&tegra->devfreq->lock);
+}
+
+static unsigned long
+tegra_actmon_cpufreq_contribution(struct tegra_devfreq *tegra,
+				  unsigned int cpu_freq)
+{
+	struct tegra_devfreq_device *actmon_dev = &tegra->devices[MCCPU];
+	unsigned long static_cpu_emc_freq, dev_freq;
+
+	dev_freq = actmon_device_target_freq(tegra, actmon_dev);
+
+	/* check whether CPU's freq is taken into account at all */
+	if (dev_freq < actmon_dev->config->avg_dependency_threshold)
+		return 0;
+
+	static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq);
+
+	if (dev_freq >= static_cpu_emc_freq)
+		return 0;
+
+	return static_cpu_emc_freq;
+}
+
+static int tegra_actmon_cpu_notify_cb(struct notifier_block *nb,
+				      unsigned long action, void *ptr)
+{
+	struct cpufreq_freqs *freqs = ptr;
+	struct tegra_devfreq *tegra;
+	unsigned long old, new, delay;
+
+	if (action != CPUFREQ_POSTCHANGE)
+		return NOTIFY_OK;
+
+	tegra = container_of(nb, struct tegra_devfreq, cpu_rate_change_nb);
+
+	/*
+	 * Quickly check whether CPU frequency should be taken into account
+	 * at all, without blocking CPUFreq's core.
+	 */
+	if (mutex_trylock(&tegra->devfreq->lock)) {
+		old = tegra_actmon_cpufreq_contribution(tegra, freqs->old);
+		new = tegra_actmon_cpufreq_contribution(tegra, freqs->new);
+		mutex_unlock(&tegra->devfreq->lock);
+
+		/*
+		 * If CPU's frequency shouldn't be taken into account at
+		 * the moment, then there is no need to update the devfreq's
+		 * state because ISR will re-check CPU's frequency on the
+		 * next interrupt.
+		 */
+		if (old == new)
+			return NOTIFY_OK;
+	}
+
+	/*
+	 * CPUFreq driver should support CPUFREQ_ASYNC_NOTIFICATION in order
+	 * to allow asynchronous notifications. This means we can't block
+	 * here for too long, otherwise CPUFreq's core will complain with a
+	 * warning splat.
+	 */
+	delay = msecs_to_jiffies(ACTMON_SAMPLING_PERIOD);
+	schedule_delayed_work(&tegra->cpufreq_update_work, delay);
 
 	return NOTIFY_OK;
 }
@@ -385,9 +474,12 @@ static void tegra_actmon_configure_device(struct tegra_devfreq *tegra,
 {
 	u32 val = 0;
 
+	/* reset boosting on governor's restart */
+	dev->boost_freq = 0;
+
 	dev->target_freq = tegra->cur_freq;
 
-	dev->avg_count = tegra->cur_freq * ACTMON_SAMPLING_PERIOD;
+	dev->avg_count = tegra->cur_freq * tegra->devfreq->profile->polling_ms;
 	device_writel(dev, dev->avg_count, ACTMON_DEV_INIT_AVG);
 
 	tegra_devfreq_update_avg_wmark(tegra, dev);
@@ -405,45 +497,116 @@ static void tegra_actmon_configure_device(struct tegra_devfreq *tegra,
 		<< ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_NUM_SHIFT;
 	val |= ACTMON_DEV_CTRL_AVG_ABOVE_WMARK_EN;
 	val |= ACTMON_DEV_CTRL_AVG_BELOW_WMARK_EN;
-	val |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
 	val |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
 	val |= ACTMON_DEV_CTRL_ENB;
 
 	device_writel(dev, val, ACTMON_DEV_CTRL);
 }
 
-static void tegra_actmon_start(struct tegra_devfreq *tegra)
+static void tegra_actmon_stop_devices(struct tegra_devfreq *tegra)
 {
+	struct tegra_devfreq_device *dev = tegra->devices;
 	unsigned int i;
 
-	disable_irq(tegra->irq);
+	for (i = 0; i < ARRAY_SIZE(tegra->devices); i++, dev++) {
+		device_writel(dev, ACTMON_DEV_CTRL_STOP, ACTMON_DEV_CTRL);
+		device_writel(dev, ACTMON_INTR_STATUS_CLEAR,
+			      ACTMON_DEV_INTR_STATUS);
+	}
+}
 
-	actmon_writel(tegra, ACTMON_SAMPLING_PERIOD - 1,
+static int tegra_actmon_resume(struct tegra_devfreq *tegra)
+{
+	unsigned int i;
+	int err;
+
+	if (!tegra->devfreq->profile->polling_ms || !tegra->started)
+		return 0;
+
+	actmon_writel(tegra, tegra->devfreq->profile->polling_ms - 1,
 		      ACTMON_GLB_PERIOD_CTRL);
 
+	/*
+	 * CLK notifications are needed in order to reconfigure the upper
+	 * consecutive watermark in accordance to the actual clock rate
+	 * to avoid unnecessary upper interrupts.
+	 */
+	err = clk_notifier_register(tegra->emc_clock,
+				    &tegra->clk_rate_change_nb);
+	if (err) {
+		dev_err(tegra->devfreq->dev.parent,
+			"Failed to register rate change notifier\n");
+		return err;
+	}
+
+	tegra->cur_freq = clk_get_rate(tegra->emc_clock) / KHZ;
+
 	for (i = 0; i < ARRAY_SIZE(tegra->devices); i++)
 		tegra_actmon_configure_device(tegra, &tegra->devices[i]);
 
-	actmon_write_barrier(tegra);
+	/*
+	 * We are estimating CPU's memory bandwidth requirement based on
+	 * amount of memory accesses and system's load, judging by CPU's
+	 * frequency. We also don't want to receive events about CPU's
+	 * frequency transaction when governor is stopped, hence notifier
+	 * is registered dynamically.
+	 */
+	err = cpufreq_register_notifier(&tegra->cpu_rate_change_nb,
+					CPUFREQ_TRANSITION_NOTIFIER);
+	if (err) {
+		dev_err(tegra->devfreq->dev.parent,
+			"Failed to register rate change notifier: %d\n", err);
+		goto err_stop;
+	}
 
 	enable_irq(tegra->irq);
+
+	return 0;
+
+err_stop:
+	tegra_actmon_stop_devices(tegra);
+
+	clk_notifier_unregister(tegra->emc_clock, &tegra->clk_rate_change_nb);
+
+	return err;
 }
 
-static void tegra_actmon_stop(struct tegra_devfreq *tegra)
+static int tegra_actmon_start(struct tegra_devfreq *tegra)
 {
-	unsigned int i;
+	int ret = 0;
 
-	disable_irq(tegra->irq);
+	if (!tegra->started) {
+		tegra->started = true;
 
-	for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) {
-		device_writel(&tegra->devices[i], 0x00000000, ACTMON_DEV_CTRL);
-		device_writel(&tegra->devices[i], ACTMON_INTR_STATUS_CLEAR,
-			      ACTMON_DEV_INTR_STATUS);
+		ret = tegra_actmon_resume(tegra);
+		if (ret)
+			tegra->started = false;
 	}
 
-	actmon_write_barrier(tegra);
+	return ret;
+}
 
-	enable_irq(tegra->irq);
+static void tegra_actmon_pause(struct tegra_devfreq *tegra)
+{
+	if (!tegra->devfreq->profile->polling_ms || !tegra->started)
+		return;
+
+	disable_irq(tegra->irq);
+
+	cpufreq_unregister_notifier(&tegra->cpu_rate_change_nb,
+				    CPUFREQ_TRANSITION_NOTIFIER);
+
+	cancel_delayed_work_sync(&tegra->cpufreq_update_work);
+
+	tegra_actmon_stop_devices(tegra);
+
+	clk_notifier_unregister(tegra->emc_clock, &tegra->clk_rate_change_nb);
+}
+
+static void tegra_actmon_stop(struct tegra_devfreq *tegra)
+{
+	tegra_actmon_pause(tegra);
+	tegra->started = false;
 }
 
 static int tegra_devfreq_target(struct device *dev, unsigned long *freq,
@@ -463,7 +626,7 @@ static int tegra_devfreq_target(struct device *dev, unsigned long *freq,
 	rate = dev_pm_opp_get_freq(opp);
 	dev_pm_opp_put(opp);
 
-	err = clk_set_min_rate(tegra->emc_clock, rate);
+	err = clk_set_min_rate(tegra->emc_clock, rate * KHZ);
 	if (err)
 		return err;
 
@@ -492,7 +655,7 @@ static int tegra_devfreq_get_dev_status(struct device *dev,
 	stat->private_data = tegra;
 
 	/* The below are to be used by the other governors */
-	stat->current_frequency = cur_freq * KHZ;
+	stat->current_frequency = cur_freq;
 
 	actmon_dev = &tegra->devices[MCALL];
 
@@ -503,7 +666,7 @@ static int tegra_devfreq_get_dev_status(struct device *dev,
 	stat->busy_time *= 100 / BUS_SATURATION_RATIO;
 
 	/* Number of cycles in a sampling period */
-	stat->total_time = ACTMON_SAMPLING_PERIOD * cur_freq;
+	stat->total_time = tegra->devfreq->profile->polling_ms * cur_freq;
 
 	stat->busy_time = min(stat->busy_time, stat->total_time);
 
@@ -511,7 +674,7 @@ static int tegra_devfreq_get_dev_status(struct device *dev,
 }
 
 static struct devfreq_dev_profile tegra_devfreq_profile = {
-	.polling_ms	= 0,
+	.polling_ms	= ACTMON_SAMPLING_PERIOD,
 	.target		= tegra_devfreq_target,
 	.get_dev_status	= tegra_devfreq_get_dev_status,
 };
@@ -542,7 +705,7 @@ static int tegra_governor_get_target(struct devfreq *devfreq,
 		target_freq = max(target_freq, dev->target_freq);
 	}
 
-	*freq = target_freq * KHZ;
+	*freq = target_freq;
 
 	return 0;
 }
@@ -551,11 +714,19 @@ static int tegra_governor_event_handler(struct devfreq *devfreq,
 					unsigned int event, void *data)
 {
 	struct tegra_devfreq *tegra = dev_get_drvdata(devfreq->dev.parent);
+	unsigned int *new_delay = data;
+	int ret = 0;
+
+	/*
+	 * Couple devfreq-device with the governor early because it is
+	 * needed at the moment of governor's start (used by ISR).
+	 */
+	tegra->devfreq = devfreq;
 
 	switch (event) {
 	case DEVFREQ_GOV_START:
 		devfreq_monitor_start(devfreq);
-		tegra_actmon_start(tegra);
+		ret = tegra_actmon_start(tegra);
 		break;
 
 	case DEVFREQ_GOV_STOP:
@@ -563,6 +734,21 @@ static int tegra_governor_event_handler(struct devfreq *devfreq,
 		devfreq_monitor_stop(devfreq);
 		break;
 
+	case DEVFREQ_GOV_INTERVAL:
+		/*
+		 * ACTMON hardware supports up to 256 milliseconds for the
+		 * sampling period.
+		 */
+		if (*new_delay > 256) {
+			ret = -EINVAL;
+			break;
+		}
+
+		tegra_actmon_pause(tegra);
+		devfreq_interval_update(devfreq, new_delay);
+		ret = tegra_actmon_resume(tegra);
+		break;
+
 	case DEVFREQ_GOV_SUSPEND:
 		tegra_actmon_stop(tegra);
 		devfreq_monitor_suspend(devfreq);
@@ -570,11 +756,11 @@ static int tegra_governor_event_handler(struct devfreq *devfreq,
 
 	case DEVFREQ_GOV_RESUME:
 		devfreq_monitor_resume(devfreq);
-		tegra_actmon_start(tegra);
+		ret = tegra_actmon_start(tegra);
 		break;
 	}
 
-	return 0;
+	return ret;
 }
 
 static struct devfreq_governor tegra_devfreq_governor = {
@@ -582,14 +768,16 @@ static struct devfreq_governor tegra_devfreq_governor = {
 	.get_target_freq = tegra_governor_get_target,
 	.event_handler = tegra_governor_event_handler,
 	.immutable = true,
+	.interrupt_driven = true,
 };
 
 static int tegra_devfreq_probe(struct platform_device *pdev)
 {
-	struct tegra_devfreq *tegra;
 	struct tegra_devfreq_device *dev;
+	struct tegra_devfreq *tegra;
+	struct devfreq *devfreq;
 	unsigned int i;
-	unsigned long rate;
+	long rate;
 	int err;
 
 	tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL);
@@ -618,12 +806,22 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
 		return PTR_ERR(tegra->emc_clock);
 	}
 
-	tegra->irq = platform_get_irq(pdev, 0);
-	if (tegra->irq < 0) {
-		err = tegra->irq;
+	err = platform_get_irq(pdev, 0);
+	if (err < 0) {
 		dev_err(&pdev->dev, "Failed to get IRQ: %d\n", err);
 		return err;
 	}
+	tegra->irq = err;
+
+	irq_set_status_flags(tegra->irq, IRQ_NOAUTOEN);
+
+	err = devm_request_threaded_irq(&pdev->dev, tegra->irq, NULL,
+					actmon_thread_isr, IRQF_ONESHOT,
+					"tegra-devfreq", tegra);
+	if (err) {
+		dev_err(&pdev->dev, "Interrupt request failed: %d\n", err);
+		return err;
+	}
 
 	reset_control_assert(tegra->reset);
 
@@ -636,8 +834,13 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
 
 	reset_control_deassert(tegra->reset);
 
-	tegra->max_freq = clk_round_rate(tegra->emc_clock, ULONG_MAX) / KHZ;
-	tegra->cur_freq = clk_get_rate(tegra->emc_clock) / KHZ;
+	rate = clk_round_rate(tegra->emc_clock, ULONG_MAX);
+	if (rate < 0) {
+		dev_err(&pdev->dev, "Failed to round clock rate: %ld\n", rate);
+		return rate;
+	}
+
+	tegra->max_freq = rate / KHZ;
 
 	for (i = 0; i < ARRAY_SIZE(actmon_device_configs); i++) {
 		dev = tegra->devices + i;
@@ -648,7 +851,14 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
 	for (rate = 0; rate <= tegra->max_freq * KHZ; rate++) {
 		rate = clk_round_rate(tegra->emc_clock, rate);
 
-		err = dev_pm_opp_add(&pdev->dev, rate, 0);
+		if (rate < 0) {
+			dev_err(&pdev->dev,
+				"Failed to round clock rate: %ld\n", rate);
+			err = rate;
+			goto remove_opps;
+		}
+
+		err = dev_pm_opp_add(&pdev->dev, rate / KHZ, 0);
 		if (err) {
 			dev_err(&pdev->dev, "Failed to add OPP: %d\n", err);
 			goto remove_opps;
@@ -657,49 +867,33 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, tegra);
 
-	tegra->rate_change_nb.notifier_call = tegra_actmon_rate_notify_cb;
-	err = clk_notifier_register(tegra->emc_clock, &tegra->rate_change_nb);
-	if (err) {
-		dev_err(&pdev->dev,
-			"Failed to register rate change notifier\n");
-		goto remove_opps;
-	}
+	tegra->clk_rate_change_nb.notifier_call = tegra_actmon_clk_notify_cb;
+	tegra->cpu_rate_change_nb.notifier_call = tegra_actmon_cpu_notify_cb;
+
+	INIT_DELAYED_WORK(&tegra->cpufreq_update_work,
+			  tegra_actmon_delayed_update);
 
 	err = devfreq_add_governor(&tegra_devfreq_governor);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to add governor: %d\n", err);
-		goto unreg_notifier;
+		goto remove_opps;
 	}
 
 	tegra_devfreq_profile.initial_freq = clk_get_rate(tegra->emc_clock);
-	tegra->devfreq = devfreq_add_device(&pdev->dev,
-					    &tegra_devfreq_profile,
-					    "tegra_actmon",
-					    NULL);
-	if (IS_ERR(tegra->devfreq)) {
-		err = PTR_ERR(tegra->devfreq);
-		goto remove_governor;
-	}
+	tegra_devfreq_profile.initial_freq /= KHZ;
 
-	err = devm_request_threaded_irq(&pdev->dev, tegra->irq, NULL,
-					actmon_thread_isr, IRQF_ONESHOT,
-					"tegra-devfreq", tegra);
-	if (err) {
-		dev_err(&pdev->dev, "Interrupt request failed: %d\n", err);
-		goto remove_devfreq;
+	devfreq = devfreq_add_device(&pdev->dev, &tegra_devfreq_profile,
+				     "tegra_actmon", NULL);
+	if (IS_ERR(devfreq)) {
+		err = PTR_ERR(devfreq);
+		goto remove_governor;
 	}
 
 	return 0;
 
-remove_devfreq:
-	devfreq_remove_device(tegra->devfreq);
-
 remove_governor:
 	devfreq_remove_governor(&tegra_devfreq_governor);
 
-unreg_notifier:
-	clk_notifier_unregister(tegra->emc_clock, &tegra->rate_change_nb);
-
 remove_opps:
 	dev_pm_opp_remove_all_dynamic(&pdev->dev);
 
@@ -716,7 +910,6 @@ static int tegra_devfreq_remove(struct platform_device *pdev)
 	devfreq_remove_device(tegra->devfreq);
 	devfreq_remove_governor(&tegra_devfreq_governor);
 
-	clk_notifier_unregister(tegra->emc_clock, &tegra->rate_change_nb);
 	dev_pm_opp_remove_all_dynamic(&pdev->dev);
 
 	reset_control_reset(tegra->reset);
diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig
index a23b6752d11a..0613bb7770f5 100644
--- a/drivers/dma-buf/Kconfig
+++ b/drivers/dma-buf/Kconfig
@@ -44,4 +44,15 @@ config DMABUF_SELFTESTS
 	default n
 	depends on DMA_SHARED_BUFFER
 
+menuconfig DMABUF_HEAPS
+	bool "DMA-BUF Userland Memory Heaps"
+	select DMA_SHARED_BUFFER
+	help
+	  Choose this option to enable the DMA-BUF userland memory heaps.
+	  This options creates per heap chardevs in /dev/dma_heap/ which
+	  allows userspace to allocate dma-bufs that can be shared
+	  between drivers.
+
+source "drivers/dma-buf/heaps/Kconfig"
+
 endmenu
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 03479da06422..9c190026bfab 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
 	 dma-resv.o seqno-fence.o
+obj-$(CONFIG_DMABUF_HEAPS)	+= dma-heap.o
+obj-$(CONFIG_DMABUF_HEAPS)	+= heaps/
 obj-$(CONFIG_SYNC_FILE)		+= sync_file.o
 obj-$(CONFIG_SW_SYNC)		+= sw_sync.o sync_debug.o
 obj-$(CONFIG_UDMABUF)		+= udmabuf.o
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 433d91d710e4..d4097856c86b 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -45,10 +45,10 @@ static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
 	size_t ret = 0;
 
 	dmabuf = dentry->d_fsdata;
-	mutex_lock(&dmabuf->lock);
+	dma_resv_lock(dmabuf->resv, NULL);
 	if (dmabuf->name)
 		ret = strlcpy(name, dmabuf->name, DMA_BUF_NAME_LEN);
-	mutex_unlock(&dmabuf->lock);
+	dma_resv_unlock(dmabuf->resv);
 
 	return dynamic_dname(dentry, buffer, buflen, "/%s:%s",
 			     dentry->d_name.name, ret > 0 ? name : "");
@@ -334,7 +334,7 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf)
 	if (IS_ERR(name))
 		return PTR_ERR(name);
 
-	mutex_lock(&dmabuf->lock);
+	dma_resv_lock(dmabuf->resv, NULL);
 	if (!list_empty(&dmabuf->attachments)) {
 		ret = -EBUSY;
 		kfree(name);
@@ -344,7 +344,7 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf)
 	dmabuf->name = name;
 
 out_unlock:
-	mutex_unlock(&dmabuf->lock);
+	dma_resv_unlock(dmabuf->resv);
 	return ret;
 }
 
@@ -403,10 +403,10 @@ static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file)
 	/* Don't count the temporary reference taken inside procfs seq_show */
 	seq_printf(m, "count:\t%ld\n", file_count(dmabuf->file) - 1);
 	seq_printf(m, "exp_name:\t%s\n", dmabuf->exp_name);
-	mutex_lock(&dmabuf->lock);
+	dma_resv_lock(dmabuf->resv, NULL);
 	if (dmabuf->name)
 		seq_printf(m, "name:\t%s\n", dmabuf->name);
-	mutex_unlock(&dmabuf->lock);
+	dma_resv_unlock(dmabuf->resv);
 }
 
 static const struct file_operations dma_buf_fops = {
@@ -415,9 +415,7 @@ static const struct file_operations dma_buf_fops = {
 	.llseek		= dma_buf_llseek,
 	.poll		= dma_buf_poll,
 	.unlocked_ioctl	= dma_buf_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= dma_buf_ioctl,
-#endif
+	.compat_ioctl	= compat_ptr_ioctl,
 	.show_fdinfo	= dma_buf_show_fdinfo,
 };
 
@@ -525,6 +523,10 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 		return ERR_PTR(-EINVAL);
 	}
 
+	if (WARN_ON(exp_info->ops->cache_sgt_mapping &&
+		    exp_info->ops->dynamic_mapping))
+		return ERR_PTR(-EINVAL);
+
 	if (!try_module_get(exp_info->owner))
 		return ERR_PTR(-ENOENT);
 
@@ -645,10 +647,11 @@ void dma_buf_put(struct dma_buf *dmabuf)
 EXPORT_SYMBOL_GPL(dma_buf_put);
 
 /**
- * dma_buf_attach - Add the device to dma_buf's attachments list; optionally,
+ * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list; optionally,
  * calls attach() of dma_buf_ops to allow device-specific attach functionality
- * @dmabuf:	[in]	buffer to attach device to.
- * @dev:	[in]	device to be attached.
+ * @dmabuf:		[in]	buffer to attach device to.
+ * @dev:		[in]	device to be attached.
+ * @dynamic_mapping:	[in]	calling convention for map/unmap
  *
  * Returns struct dma_buf_attachment pointer for this attachment. Attachments
  * must be cleaned up by calling dma_buf_detach().
@@ -662,8 +665,9 @@ EXPORT_SYMBOL_GPL(dma_buf_put);
  * accessible to @dev, and cannot be moved to a more suitable place. This is
  * indicated with the error code -EBUSY.
  */
-struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-					  struct device *dev)
+struct dma_buf_attachment *
+dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
+		       bool dynamic_mapping)
 {
 	struct dma_buf_attachment *attach;
 	int ret;
@@ -677,25 +681,69 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 
 	attach->dev = dev;
 	attach->dmabuf = dmabuf;
-
-	mutex_lock(&dmabuf->lock);
+	attach->dynamic_mapping = dynamic_mapping;
 
 	if (dmabuf->ops->attach) {
 		ret = dmabuf->ops->attach(dmabuf, attach);
 		if (ret)
 			goto err_attach;
 	}
+	dma_resv_lock(dmabuf->resv, NULL);
 	list_add(&attach->node, &dmabuf->attachments);
+	dma_resv_unlock(dmabuf->resv);
 
-	mutex_unlock(&dmabuf->lock);
+	/* When either the importer or the exporter can't handle dynamic
+	 * mappings we cache the mapping here to avoid issues with the
+	 * reservation object lock.
+	 */
+	if (dma_buf_attachment_is_dynamic(attach) !=
+	    dma_buf_is_dynamic(dmabuf)) {
+		struct sg_table *sgt;
+
+		if (dma_buf_is_dynamic(attach->dmabuf))
+			dma_resv_lock(attach->dmabuf->resv, NULL);
+
+		sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
+		if (!sgt)
+			sgt = ERR_PTR(-ENOMEM);
+		if (IS_ERR(sgt)) {
+			ret = PTR_ERR(sgt);
+			goto err_unlock;
+		}
+		if (dma_buf_is_dynamic(attach->dmabuf))
+			dma_resv_unlock(attach->dmabuf->resv);
+		attach->sgt = sgt;
+		attach->dir = DMA_BIDIRECTIONAL;
+	}
 
 	return attach;
 
 err_attach:
 	kfree(attach);
-	mutex_unlock(&dmabuf->lock);
+	return ERR_PTR(ret);
+
+err_unlock:
+	if (dma_buf_is_dynamic(attach->dmabuf))
+		dma_resv_unlock(attach->dmabuf->resv);
+
+	dma_buf_detach(dmabuf, attach);
 	return ERR_PTR(ret);
 }
+EXPORT_SYMBOL_GPL(dma_buf_dynamic_attach);
+
+/**
+ * dma_buf_attach - Wrapper for dma_buf_dynamic_attach
+ * @dmabuf:	[in]	buffer to attach device to.
+ * @dev:	[in]	device to be attached.
+ *
+ * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a static
+ * mapping.
+ */
+struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+					  struct device *dev)
+{
+	return dma_buf_dynamic_attach(dmabuf, dev, false);
+}
 EXPORT_SYMBOL_GPL(dma_buf_attach);
 
 /**
@@ -711,15 +759,22 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 	if (WARN_ON(!dmabuf || !attach))
 		return;
 
-	if (attach->sgt)
+	if (attach->sgt) {
+		if (dma_buf_is_dynamic(attach->dmabuf))
+			dma_resv_lock(attach->dmabuf->resv, NULL);
+
 		dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir);
 
-	mutex_lock(&dmabuf->lock);
+		if (dma_buf_is_dynamic(attach->dmabuf))
+			dma_resv_unlock(attach->dmabuf->resv);
+	}
+
+	dma_resv_lock(dmabuf->resv, NULL);
 	list_del(&attach->node);
+	dma_resv_unlock(dmabuf->resv);
 	if (dmabuf->ops->detach)
 		dmabuf->ops->detach(dmabuf, attach);
 
-	mutex_unlock(&dmabuf->lock);
 	kfree(attach);
 }
 EXPORT_SYMBOL_GPL(dma_buf_detach);
@@ -749,6 +804,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 	if (WARN_ON(!attach || !attach->dmabuf))
 		return ERR_PTR(-EINVAL);
 
+	if (dma_buf_attachment_is_dynamic(attach))
+		dma_resv_assert_held(attach->dmabuf->resv);
+
 	if (attach->sgt) {
 		/*
 		 * Two mappings with different directions for the same
@@ -761,6 +819,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 		return attach->sgt;
 	}
 
+	if (dma_buf_is_dynamic(attach->dmabuf))
+		dma_resv_assert_held(attach->dmabuf->resv);
+
 	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
 	if (!sg_table)
 		sg_table = ERR_PTR(-ENOMEM);
@@ -793,9 +854,15 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
 	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
 		return;
 
+	if (dma_buf_attachment_is_dynamic(attach))
+		dma_resv_assert_held(attach->dmabuf->resv);
+
 	if (attach->sgt == sg_table)
 		return;
 
+	if (dma_buf_is_dynamic(attach->dmabuf))
+		dma_resv_assert_held(attach->dmabuf->resv);
+
 	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
 }
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
@@ -811,29 +878,9 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
  *   with calls to dma_buf_begin_cpu_access() and dma_buf_end_cpu_access()
  *   access.
  *
- *   To support dma_buf objects residing in highmem cpu access is page-based
- *   using an api similar to kmap. Accessing a dma_buf is done in aligned chunks
- *   of PAGE_SIZE size. Before accessing a chunk it needs to be mapped, which
- *   returns a pointer in kernel virtual address space. Afterwards the chunk
- *   needs to be unmapped again. There is no limit on how often a given chunk
- *   can be mapped and unmapped, i.e. the importer does not need to call
- *   begin_cpu_access again before mapping the same chunk again.
- *
- *   Interfaces::
- *      void \*dma_buf_kmap(struct dma_buf \*, unsigned long);
- *      void dma_buf_kunmap(struct dma_buf \*, unsigned long, void \*);
- *
- *   Implementing the functions is optional for exporters and for importers all
- *   the restrictions of using kmap apply.
- *
- *   dma_buf kmap calls outside of the range specified in begin_cpu_access are
- *   undefined. If the range is not PAGE_SIZE aligned, kmap needs to succeed on
- *   the partial chunks at the beginning and end but may return stale or bogus
- *   data outside of the range (in these partial chunks).
- *
- *   For some cases the overhead of kmap can be too high, a vmap interface
- *   is introduced. This interface should be used very carefully, as vmalloc
- *   space is a limited resources on many architectures.
+ *   Since for most kernel internal dma-buf accesses need the entire buffer, a
+ *   vmap interface is introduced. Note that on very old 32-bit architectures
+ *   vmalloc space might be limited and result in vmap calls failing.
  *
  *   Interfaces::
  *      void \*dma_buf_vmap(struct dma_buf \*dmabuf)
@@ -983,43 +1030,6 @@ int dma_buf_end_cpu_access(struct dma_buf *dmabuf,
 }
 EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
 
-/**
- * dma_buf_kmap - Map a page of the buffer object into kernel address space. The
- * same restrictions as for kmap and friends apply.
- * @dmabuf:	[in]	buffer to map page from.
- * @page_num:	[in]	page in PAGE_SIZE units to map.
- *
- * This call must always succeed, any necessary preparations that might fail
- * need to be done in begin_cpu_access.
- */
-void *dma_buf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
-{
-	WARN_ON(!dmabuf);
-
-	if (!dmabuf->ops->map)
-		return NULL;
-	return dmabuf->ops->map(dmabuf, page_num);
-}
-EXPORT_SYMBOL_GPL(dma_buf_kmap);
-
-/**
- * dma_buf_kunmap - Unmap a page obtained by dma_buf_kmap.
- * @dmabuf:	[in]	buffer to unmap page from.
- * @page_num:	[in]	page in PAGE_SIZE units to unmap.
- * @vaddr:	[in]	kernel space pointer obtained from dma_buf_kmap.
- *
- * This call must always succeed.
- */
-void dma_buf_kunmap(struct dma_buf *dmabuf, unsigned long page_num,
-		    void *vaddr)
-{
-	WARN_ON(!dmabuf);
-
-	if (dmabuf->ops->unmap)
-		dmabuf->ops->unmap(dmabuf, page_num, vaddr);
-}
-EXPORT_SYMBOL_GPL(dma_buf_kunmap);
-
 
 /**
  * dma_buf_mmap - Setup up a userspace mmap with the given vma
@@ -1171,13 +1181,10 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 		   "size", "flags", "mode", "count", "ino");
 
 	list_for_each_entry(buf_obj, &db_list.head, list_node) {
-		ret = mutex_lock_interruptible(&buf_obj->lock);
 
-		if (ret) {
-			seq_puts(s,
-				 "\tERROR locking buffer object: skipping\n");
-			continue;
-		}
+		ret = dma_resv_lock_interruptible(buf_obj->resv, NULL);
+		if (ret)
+			goto error_unlock;
 
 		seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\t%s\n",
 				buf_obj->size,
@@ -1223,19 +1230,23 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 			seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
 			attach_count++;
 		}
+		dma_resv_unlock(buf_obj->resv);
 
 		seq_printf(s, "Total %d devices attached\n\n",
 				attach_count);
 
 		count++;
 		size += buf_obj->size;
-		mutex_unlock(&buf_obj->lock);
 	}
 
 	seq_printf(s, "\nTotal %d objects, %zu bytes\n", count, size);
 
 	mutex_unlock(&db_list.lock);
 	return 0;
+
+error_unlock:
+	mutex_unlock(&db_list.lock);
+	return ret;
 }
 
 DEFINE_SHOW_ATTRIBUTE(dma_buf_debug);
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 2c136aee3e79..052a41e2451c 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -273,6 +273,30 @@ void dma_fence_free(struct dma_fence *fence)
 }
 EXPORT_SYMBOL(dma_fence_free);
 
+static bool __dma_fence_enable_signaling(struct dma_fence *fence)
+{
+	bool was_set;
+
+	lockdep_assert_held(fence->lock);
+
+	was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+				   &fence->flags);
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return false;
+
+	if (!was_set && fence->ops->enable_signaling) {
+		trace_dma_fence_enable_signal(fence);
+
+		if (!fence->ops->enable_signaling(fence)) {
+			dma_fence_signal_locked(fence);
+			return false;
+		}
+	}
+
+	return true;
+}
+
 /**
  * dma_fence_enable_sw_signaling - enable signaling on fence
  * @fence: the fence to enable
@@ -285,19 +309,12 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence)
 {
 	unsigned long flags;
 
-	if (!test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
-			      &fence->flags) &&
-	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
-	    fence->ops->enable_signaling) {
-		trace_dma_fence_enable_signal(fence);
-
-		spin_lock_irqsave(fence->lock, flags);
-
-		if (!fence->ops->enable_signaling(fence))
-			dma_fence_signal_locked(fence);
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return;
 
-		spin_unlock_irqrestore(fence->lock, flags);
-	}
+	spin_lock_irqsave(fence->lock, flags);
+	__dma_fence_enable_signaling(fence);
+	spin_unlock_irqrestore(fence->lock, flags);
 }
 EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
 
@@ -331,7 +348,6 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
 {
 	unsigned long flags;
 	int ret = 0;
-	bool was_set;
 
 	if (WARN_ON(!fence || !func))
 		return -EINVAL;
@@ -343,25 +359,14 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
 
 	spin_lock_irqsave(fence->lock, flags);
 
-	was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
-				   &fence->flags);
-
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		ret = -ENOENT;
-	else if (!was_set && fence->ops->enable_signaling) {
-		trace_dma_fence_enable_signal(fence);
-
-		if (!fence->ops->enable_signaling(fence)) {
-			dma_fence_signal_locked(fence);
-			ret = -ENOENT;
-		}
-	}
-
-	if (!ret) {
+	if (__dma_fence_enable_signaling(fence)) {
 		cb->func = func;
 		list_add_tail(&cb->node, &fence->cb_list);
-	} else
+	} else {
 		INIT_LIST_HEAD(&cb->node);
+		ret = -ENOENT;
+	}
+
 	spin_unlock_irqrestore(fence->lock, flags);
 
 	return ret;
@@ -461,7 +466,6 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 	struct default_wait_cb cb;
 	unsigned long flags;
 	signed long ret = timeout ? timeout : 1;
-	bool was_set;
 
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		return ret;
@@ -473,21 +477,9 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 		goto out;
 	}
 
-	was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
-				   &fence->flags);
-
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+	if (!__dma_fence_enable_signaling(fence))
 		goto out;
 
-	if (!was_set && fence->ops->enable_signaling) {
-		trace_dma_fence_enable_signal(fence);
-
-		if (!fence->ops->enable_signaling(fence)) {
-			dma_fence_signal_locked(fence);
-			goto out;
-		}
-	}
-
 	if (!timeout) {
 		ret = 0;
 		goto out;
diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c
new file mode 100644
index 000000000000..afd22c9dbdcf
--- /dev/null
+++ b/drivers/dma-buf/dma-heap.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Framework for userspace DMA-BUF allocations
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Copyright (C) 2019 Linaro Ltd.
+ */
+
+#include <linux/cdev.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/err.h>
+#include <linux/xarray.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/dma-heap.h>
+#include <uapi/linux/dma-heap.h>
+
+#define DEVNAME "dma_heap"
+
+#define NUM_HEAP_MINORS 128
+
+/**
+ * struct dma_heap - represents a dmabuf heap in the system
+ * @name:		used for debugging/device-node name
+ * @ops:		ops struct for this heap
+ * @heap_devt		heap device node
+ * @list		list head connecting to list of heaps
+ * @heap_cdev		heap char device
+ *
+ * Represents a heap of memory from which buffers can be made.
+ */
+struct dma_heap {
+	const char *name;
+	const struct dma_heap_ops *ops;
+	void *priv;
+	dev_t heap_devt;
+	struct list_head list;
+	struct cdev heap_cdev;
+};
+
+static LIST_HEAD(heap_list);
+static DEFINE_MUTEX(heap_list_lock);
+static dev_t dma_heap_devt;
+static struct class *dma_heap_class;
+static DEFINE_XARRAY_ALLOC(dma_heap_minors);
+
+static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len,
+				 unsigned int fd_flags,
+				 unsigned int heap_flags)
+{
+	/*
+	 * Allocations from all heaps have to begin
+	 * and end on page boundaries.
+	 */
+	len = PAGE_ALIGN(len);
+	if (!len)
+		return -EINVAL;
+
+	return heap->ops->allocate(heap, len, fd_flags, heap_flags);
+}
+
+static int dma_heap_open(struct inode *inode, struct file *file)
+{
+	struct dma_heap *heap;
+
+	heap = xa_load(&dma_heap_minors, iminor(inode));
+	if (!heap) {
+		pr_err("dma_heap: minor %d unknown.\n", iminor(inode));
+		return -ENODEV;
+	}
+
+	/* instance data as context */
+	file->private_data = heap;
+	nonseekable_open(inode, file);
+
+	return 0;
+}
+
+static long dma_heap_ioctl_allocate(struct file *file, void *data)
+{
+	struct dma_heap_allocation_data *heap_allocation = data;
+	struct dma_heap *heap = file->private_data;
+	int fd;
+
+	if (heap_allocation->fd)
+		return -EINVAL;
+
+	if (heap_allocation->fd_flags & ~DMA_HEAP_VALID_FD_FLAGS)
+		return -EINVAL;
+
+	if (heap_allocation->heap_flags & ~DMA_HEAP_VALID_HEAP_FLAGS)
+		return -EINVAL;
+
+	fd = dma_heap_buffer_alloc(heap, heap_allocation->len,
+				   heap_allocation->fd_flags,
+				   heap_allocation->heap_flags);
+	if (fd < 0)
+		return fd;
+
+	heap_allocation->fd = fd;
+
+	return 0;
+}
+
+static unsigned int dma_heap_ioctl_cmds[] = {
+	DMA_HEAP_IOCTL_ALLOC,
+};
+
+static long dma_heap_ioctl(struct file *file, unsigned int ucmd,
+			   unsigned long arg)
+{
+	char stack_kdata[128];
+	char *kdata = stack_kdata;
+	unsigned int kcmd;
+	unsigned int in_size, out_size, drv_size, ksize;
+	int nr = _IOC_NR(ucmd);
+	int ret = 0;
+
+	if (nr >= ARRAY_SIZE(dma_heap_ioctl_cmds))
+		return -EINVAL;
+
+	/* Get the kernel ioctl cmd that matches */
+	kcmd = dma_heap_ioctl_cmds[nr];
+
+	/* Figure out the delta between user cmd size and kernel cmd size */
+	drv_size = _IOC_SIZE(kcmd);
+	out_size = _IOC_SIZE(ucmd);
+	in_size = out_size;
+	if ((ucmd & kcmd & IOC_IN) == 0)
+		in_size = 0;
+	if ((ucmd & kcmd & IOC_OUT) == 0)
+		out_size = 0;
+	ksize = max(max(in_size, out_size), drv_size);
+
+	/* If necessary, allocate buffer for ioctl argument */
+	if (ksize > sizeof(stack_kdata)) {
+		kdata = kmalloc(ksize, GFP_KERNEL);
+		if (!kdata)
+			return -ENOMEM;
+	}
+
+	if (copy_from_user(kdata, (void __user *)arg, in_size) != 0) {
+		ret = -EFAULT;
+		goto err;
+	}
+
+	/* zero out any difference between the kernel/user structure size */
+	if (ksize > in_size)
+		memset(kdata + in_size, 0, ksize - in_size);
+
+	switch (kcmd) {
+	case DMA_HEAP_IOCTL_ALLOC:
+		ret = dma_heap_ioctl_allocate(file, kdata);
+		break;
+	default:
+		ret = -ENOTTY;
+		goto err;
+	}
+
+	if (copy_to_user((void __user *)arg, kdata, out_size) != 0)
+		ret = -EFAULT;
+err:
+	if (kdata != stack_kdata)
+		kfree(kdata);
+	return ret;
+}
+
+static const struct file_operations dma_heap_fops = {
+	.owner          = THIS_MODULE,
+	.open		= dma_heap_open,
+	.unlocked_ioctl = dma_heap_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= dma_heap_ioctl,
+#endif
+};
+
+/**
+ * dma_heap_get_drvdata() - get per-subdriver data for the heap
+ * @heap: DMA-Heap to retrieve private data for
+ *
+ * Returns:
+ * The per-subdriver data for the heap.
+ */
+void *dma_heap_get_drvdata(struct dma_heap *heap)
+{
+	return heap->priv;
+}
+
+struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info)
+{
+	struct dma_heap *heap, *h, *err_ret;
+	struct device *dev_ret;
+	unsigned int minor;
+	int ret;
+
+	if (!exp_info->name || !strcmp(exp_info->name, "")) {
+		pr_err("dma_heap: Cannot add heap without a name\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!exp_info->ops || !exp_info->ops->allocate) {
+		pr_err("dma_heap: Cannot add heap with invalid ops struct\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* check the name is unique */
+	mutex_lock(&heap_list_lock);
+	list_for_each_entry(h, &heap_list, list) {
+		if (!strcmp(h->name, exp_info->name)) {
+			mutex_unlock(&heap_list_lock);
+			pr_err("dma_heap: Already registered heap named %s\n",
+			       exp_info->name);
+			return ERR_PTR(-EINVAL);
+		}
+	}
+	mutex_unlock(&heap_list_lock);
+
+	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
+	if (!heap)
+		return ERR_PTR(-ENOMEM);
+
+	heap->name = exp_info->name;
+	heap->ops = exp_info->ops;
+	heap->priv = exp_info->priv;
+
+	/* Find unused minor number */
+	ret = xa_alloc(&dma_heap_minors, &minor, heap,
+		       XA_LIMIT(0, NUM_HEAP_MINORS - 1), GFP_KERNEL);
+	if (ret < 0) {
+		pr_err("dma_heap: Unable to get minor number for heap\n");
+		err_ret = ERR_PTR(ret);
+		goto err0;
+	}
+
+	/* Create device */
+	heap->heap_devt = MKDEV(MAJOR(dma_heap_devt), minor);
+
+	cdev_init(&heap->heap_cdev, &dma_heap_fops);
+	ret = cdev_add(&heap->heap_cdev, heap->heap_devt, 1);
+	if (ret < 0) {
+		pr_err("dma_heap: Unable to add char device\n");
+		err_ret = ERR_PTR(ret);
+		goto err1;
+	}
+
+	dev_ret = device_create(dma_heap_class,
+				NULL,
+				heap->heap_devt,
+				NULL,
+				heap->name);
+	if (IS_ERR(dev_ret)) {
+		pr_err("dma_heap: Unable to create device\n");
+		err_ret = ERR_CAST(dev_ret);
+		goto err2;
+	}
+	/* Add heap to the list */
+	mutex_lock(&heap_list_lock);
+	list_add(&heap->list, &heap_list);
+	mutex_unlock(&heap_list_lock);
+
+	return heap;
+
+err2:
+	cdev_del(&heap->heap_cdev);
+err1:
+	xa_erase(&dma_heap_minors, minor);
+err0:
+	kfree(heap);
+	return err_ret;
+}
+
+static char *dma_heap_devnode(struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "dma_heap/%s", dev_name(dev));
+}
+
+static int dma_heap_init(void)
+{
+	int ret;
+
+	ret = alloc_chrdev_region(&dma_heap_devt, 0, NUM_HEAP_MINORS, DEVNAME);
+	if (ret)
+		return ret;
+
+	dma_heap_class = class_create(THIS_MODULE, DEVNAME);
+	if (IS_ERR(dma_heap_class)) {
+		unregister_chrdev_region(dma_heap_devt, NUM_HEAP_MINORS);
+		return PTR_ERR(dma_heap_class);
+	}
+	dma_heap_class->devnode = dma_heap_devnode;
+
+	return 0;
+}
+subsys_initcall(dma_heap_init);
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 709002515550..4264e64788c4 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -34,6 +34,7 @@
 
 #include <linux/dma-resv.h>
 #include <linux/export.h>
+#include <linux/sched/mm.h>
 
 /**
  * DOC: Reservation Object Overview
@@ -95,6 +96,37 @@ static void dma_resv_list_free(struct dma_resv_list *list)
 	kfree_rcu(list, rcu);
 }
 
+#if IS_ENABLED(CONFIG_LOCKDEP)
+static int __init dma_resv_lockdep(void)
+{
+	struct mm_struct *mm = mm_alloc();
+	struct ww_acquire_ctx ctx;
+	struct dma_resv obj;
+	int ret;
+
+	if (!mm)
+		return -ENOMEM;
+
+	dma_resv_init(&obj);
+
+	down_read(&mm->mmap_sem);
+	ww_acquire_init(&ctx, &reservation_ww_class);
+	ret = dma_resv_lock(&obj, &ctx);
+	if (ret == -EDEADLK)
+		dma_resv_lock_slow(&obj, &ctx);
+	fs_reclaim_acquire(GFP_KERNEL);
+	fs_reclaim_release(GFP_KERNEL);
+	ww_mutex_unlock(&obj.lock);
+	ww_acquire_fini(&ctx);
+	up_read(&mm->mmap_sem);
+	
+	mmput(mm);
+
+	return 0;
+}
+subsys_initcall(dma_resv_lockdep);
+#endif
+
 /**
  * dma_resv_init - initialize a reservation object
  * @obj: the reservation object
diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig
new file mode 100644
index 000000000000..a5eef06c4226
--- /dev/null
+++ b/drivers/dma-buf/heaps/Kconfig
@@ -0,0 +1,14 @@
+config DMABUF_HEAPS_SYSTEM
+	bool "DMA-BUF System Heap"
+	depends on DMABUF_HEAPS
+	help
+	  Choose this option to enable the system dmabuf heap. The system heap
+	  is backed by pages from the buddy allocator. If in doubt, say Y.
+
+config DMABUF_HEAPS_CMA
+	bool "DMA-BUF CMA Heap"
+	depends on DMABUF_HEAPS && DMA_CMA
+	help
+	  Choose this option to enable dma-buf CMA heap. This heap is backed
+	  by the Contiguous Memory Allocator (CMA). If your system has these
+	  regions, you should say Y here.
diff --git a/drivers/dma-buf/heaps/Makefile b/drivers/dma-buf/heaps/Makefile
new file mode 100644
index 000000000000..6e54cdec3da0
--- /dev/null
+++ b/drivers/dma-buf/heaps/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y					+= heap-helpers.o
+obj-$(CONFIG_DMABUF_HEAPS_SYSTEM)	+= system_heap.o
+obj-$(CONFIG_DMABUF_HEAPS_CMA)		+= cma_heap.o
diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
new file mode 100644
index 000000000000..626cf7fd033a
--- /dev/null
+++ b/drivers/dma-buf/heaps/cma_heap.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DMABUF CMA heap exporter
+ *
+ * Copyright (C) 2012, 2019 Linaro Ltd.
+ * Author: <benjamin.gaignard@linaro.org> for ST-Ericsson.
+ */
+
+#include <linux/cma.h>
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
+#include <linux/dma-contiguous.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <linux/sched/signal.h>
+
+#include "heap-helpers.h"
+
+struct cma_heap {
+	struct dma_heap *heap;
+	struct cma *cma;
+};
+
+static void cma_heap_free(struct heap_helper_buffer *buffer)
+{
+	struct cma_heap *cma_heap = dma_heap_get_drvdata(buffer->heap);
+	unsigned long nr_pages = buffer->pagecount;
+	struct page *cma_pages = buffer->priv_virt;
+
+	/* free page list */
+	kfree(buffer->pages);
+	/* release memory */
+	cma_release(cma_heap->cma, cma_pages, nr_pages);
+	kfree(buffer);
+}
+
+/* dmabuf heap CMA operations functions */
+static int cma_heap_allocate(struct dma_heap *heap,
+			     unsigned long len,
+			     unsigned long fd_flags,
+			     unsigned long heap_flags)
+{
+	struct cma_heap *cma_heap = dma_heap_get_drvdata(heap);
+	struct heap_helper_buffer *helper_buffer;
+	struct page *cma_pages;
+	size_t size = PAGE_ALIGN(len);
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	unsigned long align = get_order(size);
+	struct dma_buf *dmabuf;
+	int ret = -ENOMEM;
+	pgoff_t pg;
+
+	if (align > CONFIG_CMA_ALIGNMENT)
+		align = CONFIG_CMA_ALIGNMENT;
+
+	helper_buffer = kzalloc(sizeof(*helper_buffer), GFP_KERNEL);
+	if (!helper_buffer)
+		return -ENOMEM;
+
+	init_heap_helper_buffer(helper_buffer, cma_heap_free);
+	helper_buffer->heap = heap;
+	helper_buffer->size = len;
+
+	cma_pages = cma_alloc(cma_heap->cma, nr_pages, align, false);
+	if (!cma_pages)
+		goto free_buf;
+
+	if (PageHighMem(cma_pages)) {
+		unsigned long nr_clear_pages = nr_pages;
+		struct page *page = cma_pages;
+
+		while (nr_clear_pages > 0) {
+			void *vaddr = kmap_atomic(page);
+
+			memset(vaddr, 0, PAGE_SIZE);
+			kunmap_atomic(vaddr);
+			/*
+			 * Avoid wasting time zeroing memory if the process
+			 * has been killed by by SIGKILL
+			 */
+			if (fatal_signal_pending(current))
+				goto free_cma;
+
+			page++;
+			nr_clear_pages--;
+		}
+	} else {
+		memset(page_address(cma_pages), 0, size);
+	}
+
+	helper_buffer->pagecount = nr_pages;
+	helper_buffer->pages = kmalloc_array(helper_buffer->pagecount,
+					     sizeof(*helper_buffer->pages),
+					     GFP_KERNEL);
+	if (!helper_buffer->pages) {
+		ret = -ENOMEM;
+		goto free_cma;
+	}
+
+	for (pg = 0; pg < helper_buffer->pagecount; pg++)
+		helper_buffer->pages[pg] = &cma_pages[pg];
+
+	/* create the dmabuf */
+	dmabuf = heap_helper_export_dmabuf(helper_buffer, fd_flags);
+	if (IS_ERR(dmabuf)) {
+		ret = PTR_ERR(dmabuf);
+		goto free_pages;
+	}
+
+	helper_buffer->dmabuf = dmabuf;
+	helper_buffer->priv_virt = cma_pages;
+
+	ret = dma_buf_fd(dmabuf, fd_flags);
+	if (ret < 0) {
+		dma_buf_put(dmabuf);
+		/* just return, as put will call release and that will free */
+		return ret;
+	}
+
+	return ret;
+
+free_pages:
+	kfree(helper_buffer->pages);
+free_cma:
+	cma_release(cma_heap->cma, cma_pages, nr_pages);
+free_buf:
+	kfree(helper_buffer);
+	return ret;
+}
+
+static const struct dma_heap_ops cma_heap_ops = {
+	.allocate = cma_heap_allocate,
+};
+
+static int __add_cma_heap(struct cma *cma, void *data)
+{
+	struct cma_heap *cma_heap;
+	struct dma_heap_export_info exp_info;
+
+	cma_heap = kzalloc(sizeof(*cma_heap), GFP_KERNEL);
+	if (!cma_heap)
+		return -ENOMEM;
+	cma_heap->cma = cma;
+
+	exp_info.name = cma_get_name(cma);
+	exp_info.ops = &cma_heap_ops;
+	exp_info.priv = cma_heap;
+
+	cma_heap->heap = dma_heap_add(&exp_info);
+	if (IS_ERR(cma_heap->heap)) {
+		int ret = PTR_ERR(cma_heap->heap);
+
+		kfree(cma_heap);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int add_default_cma_heap(void)
+{
+	struct cma *default_cma = dev_get_cma_area(NULL);
+	int ret = 0;
+
+	if (default_cma)
+		ret = __add_cma_heap(default_cma, NULL);
+
+	return ret;
+}
+module_init(add_default_cma_heap);
+MODULE_DESCRIPTION("DMA-BUF CMA Heap");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma-buf/heaps/heap-helpers.c b/drivers/dma-buf/heaps/heap-helpers.c
new file mode 100644
index 000000000000..9f964ca3f59c
--- /dev/null
+++ b/drivers/dma-buf/heaps/heap-helpers.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <uapi/linux/dma-heap.h>
+
+#include "heap-helpers.h"
+
+void init_heap_helper_buffer(struct heap_helper_buffer *buffer,
+			     void (*free)(struct heap_helper_buffer *))
+{
+	buffer->priv_virt = NULL;
+	mutex_init(&buffer->lock);
+	buffer->vmap_cnt = 0;
+	buffer->vaddr = NULL;
+	buffer->pagecount = 0;
+	buffer->pages = NULL;
+	INIT_LIST_HEAD(&buffer->attachments);
+	buffer->free = free;
+}
+
+struct dma_buf *heap_helper_export_dmabuf(struct heap_helper_buffer *buffer,
+					  int fd_flags)
+{
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+	exp_info.ops = &heap_helper_ops;
+	exp_info.size = buffer->size;
+	exp_info.flags = fd_flags;
+	exp_info.priv = buffer;
+
+	return dma_buf_export(&exp_info);
+}
+
+static void *dma_heap_map_kernel(struct heap_helper_buffer *buffer)
+{
+	void *vaddr;
+
+	vaddr = vmap(buffer->pages, buffer->pagecount, VM_MAP, PAGE_KERNEL);
+	if (!vaddr)
+		return ERR_PTR(-ENOMEM);
+
+	return vaddr;
+}
+
+static void dma_heap_buffer_destroy(struct heap_helper_buffer *buffer)
+{
+	if (buffer->vmap_cnt > 0) {
+		WARN(1, "%s: buffer still mapped in the kernel\n", __func__);
+		vunmap(buffer->vaddr);
+	}
+
+	buffer->free(buffer);
+}
+
+static void *dma_heap_buffer_vmap_get(struct heap_helper_buffer *buffer)
+{
+	void *vaddr;
+
+	if (buffer->vmap_cnt) {
+		buffer->vmap_cnt++;
+		return buffer->vaddr;
+	}
+	vaddr = dma_heap_map_kernel(buffer);
+	if (IS_ERR(vaddr))
+		return vaddr;
+	buffer->vaddr = vaddr;
+	buffer->vmap_cnt++;
+	return vaddr;
+}
+
+static void dma_heap_buffer_vmap_put(struct heap_helper_buffer *buffer)
+{
+	if (!--buffer->vmap_cnt) {
+		vunmap(buffer->vaddr);
+		buffer->vaddr = NULL;
+	}
+}
+
+struct dma_heaps_attachment {
+	struct device *dev;
+	struct sg_table table;
+	struct list_head list;
+};
+
+static int dma_heap_attach(struct dma_buf *dmabuf,
+			   struct dma_buf_attachment *attachment)
+{
+	struct dma_heaps_attachment *a;
+	struct heap_helper_buffer *buffer = dmabuf->priv;
+	int ret;
+
+	a = kzalloc(sizeof(*a), GFP_KERNEL);
+	if (!a)
+		return -ENOMEM;
+
+	ret = sg_alloc_table_from_pages(&a->table, buffer->pages,
+					buffer->pagecount, 0,
+					buffer->pagecount << PAGE_SHIFT,
+					GFP_KERNEL);
+	if (ret) {
+		kfree(a);
+		return ret;
+	}
+
+	a->dev = attachment->dev;
+	INIT_LIST_HEAD(&a->list);
+
+	attachment->priv = a;
+
+	mutex_lock(&buffer->lock);
+	list_add(&a->list, &buffer->attachments);
+	mutex_unlock(&buffer->lock);
+
+	return 0;
+}
+
+static void dma_heap_detach(struct dma_buf *dmabuf,
+			    struct dma_buf_attachment *attachment)
+{
+	struct dma_heaps_attachment *a = attachment->priv;
+	struct heap_helper_buffer *buffer = dmabuf->priv;
+
+	mutex_lock(&buffer->lock);
+	list_del(&a->list);
+	mutex_unlock(&buffer->lock);
+
+	sg_free_table(&a->table);
+	kfree(a);
+}
+
+static
+struct sg_table *dma_heap_map_dma_buf(struct dma_buf_attachment *attachment,
+				      enum dma_data_direction direction)
+{
+	struct dma_heaps_attachment *a = attachment->priv;
+	struct sg_table *table;
+
+	table = &a->table;
+
+	if (!dma_map_sg(attachment->dev, table->sgl, table->nents,
+			direction))
+		table = ERR_PTR(-ENOMEM);
+	return table;
+}
+
+static void dma_heap_unmap_dma_buf(struct dma_buf_attachment *attachment,
+				   struct sg_table *table,
+				   enum dma_data_direction direction)
+{
+	dma_unmap_sg(attachment->dev, table->sgl, table->nents, direction);
+}
+
+static vm_fault_t dma_heap_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct heap_helper_buffer *buffer = vma->vm_private_data;
+
+	if (vmf->pgoff > buffer->pagecount)
+		return VM_FAULT_SIGBUS;
+
+	vmf->page = buffer->pages[vmf->pgoff];
+	get_page(vmf->page);
+
+	return 0;
+}
+
+static const struct vm_operations_struct dma_heap_vm_ops = {
+	.fault = dma_heap_vm_fault,
+};
+
+static int dma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct heap_helper_buffer *buffer = dmabuf->priv;
+
+	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
+		return -EINVAL;
+
+	vma->vm_ops = &dma_heap_vm_ops;
+	vma->vm_private_data = buffer;
+
+	return 0;
+}
+
+static void dma_heap_dma_buf_release(struct dma_buf *dmabuf)
+{
+	struct heap_helper_buffer *buffer = dmabuf->priv;
+
+	dma_heap_buffer_destroy(buffer);
+}
+
+static int dma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
+					     enum dma_data_direction direction)
+{
+	struct heap_helper_buffer *buffer = dmabuf->priv;
+	struct dma_heaps_attachment *a;
+	int ret = 0;
+
+	mutex_lock(&buffer->lock);
+
+	if (buffer->vmap_cnt)
+		invalidate_kernel_vmap_range(buffer->vaddr, buffer->size);
+
+	list_for_each_entry(a, &buffer->attachments, list) {
+		dma_sync_sg_for_cpu(a->dev, a->table.sgl, a->table.nents,
+				    direction);
+	}
+	mutex_unlock(&buffer->lock);
+
+	return ret;
+}
+
+static int dma_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+					   enum dma_data_direction direction)
+{
+	struct heap_helper_buffer *buffer = dmabuf->priv;
+	struct dma_heaps_attachment *a;
+
+	mutex_lock(&buffer->lock);
+
+	if (buffer->vmap_cnt)
+		flush_kernel_vmap_range(buffer->vaddr, buffer->size);
+
+	list_for_each_entry(a, &buffer->attachments, list) {
+		dma_sync_sg_for_device(a->dev, a->table.sgl, a->table.nents,
+				       direction);
+	}
+	mutex_unlock(&buffer->lock);
+
+	return 0;
+}
+
+static void *dma_heap_dma_buf_vmap(struct dma_buf *dmabuf)
+{
+	struct heap_helper_buffer *buffer = dmabuf->priv;
+	void *vaddr;
+
+	mutex_lock(&buffer->lock);
+	vaddr = dma_heap_buffer_vmap_get(buffer);
+	mutex_unlock(&buffer->lock);
+
+	return vaddr;
+}
+
+static void dma_heap_dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
+{
+	struct heap_helper_buffer *buffer = dmabuf->priv;
+
+	mutex_lock(&buffer->lock);
+	dma_heap_buffer_vmap_put(buffer);
+	mutex_unlock(&buffer->lock);
+}
+
+const struct dma_buf_ops heap_helper_ops = {
+	.map_dma_buf = dma_heap_map_dma_buf,
+	.unmap_dma_buf = dma_heap_unmap_dma_buf,
+	.mmap = dma_heap_mmap,
+	.release = dma_heap_dma_buf_release,
+	.attach = dma_heap_attach,
+	.detach = dma_heap_detach,
+	.begin_cpu_access = dma_heap_dma_buf_begin_cpu_access,
+	.end_cpu_access = dma_heap_dma_buf_end_cpu_access,
+	.vmap = dma_heap_dma_buf_vmap,
+	.vunmap = dma_heap_dma_buf_vunmap,
+};
diff --git a/drivers/dma-buf/heaps/heap-helpers.h b/drivers/dma-buf/heaps/heap-helpers.h
new file mode 100644
index 000000000000..805d2df88024
--- /dev/null
+++ b/drivers/dma-buf/heaps/heap-helpers.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DMABUF Heaps helper code
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Copyright (C) 2019 Linaro Ltd.
+ */
+
+#ifndef _HEAP_HELPERS_H
+#define _HEAP_HELPERS_H
+
+#include <linux/dma-heap.h>
+#include <linux/list.h>
+
+/**
+ * struct heap_helper_buffer - helper buffer metadata
+ * @heap:		back pointer to the heap the buffer came from
+ * @dmabuf:		backing dma-buf for this buffer
+ * @size:		size of the buffer
+ * @priv_virt		pointer to heap specific private value
+ * @lock		mutext to protect the data in this structure
+ * @vmap_cnt		count of vmap references on the buffer
+ * @vaddr		vmap'ed virtual address
+ * @pagecount		number of pages in the buffer
+ * @pages		list of page pointers
+ * @attachments		list of device attachments
+ *
+ * @free		heap callback to free the buffer
+ */
+struct heap_helper_buffer {
+	struct dma_heap *heap;
+	struct dma_buf *dmabuf;
+	size_t size;
+
+	void *priv_virt;
+	struct mutex lock;
+	int vmap_cnt;
+	void *vaddr;
+	pgoff_t pagecount;
+	struct page **pages;
+	struct list_head attachments;
+
+	void (*free)(struct heap_helper_buffer *buffer);
+};
+
+void init_heap_helper_buffer(struct heap_helper_buffer *buffer,
+			     void (*free)(struct heap_helper_buffer *));
+
+struct dma_buf *heap_helper_export_dmabuf(struct heap_helper_buffer *buffer,
+					  int fd_flags);
+
+extern const struct dma_buf_ops heap_helper_ops;
+#endif /* _HEAP_HELPERS_H */
diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c
new file mode 100644
index 000000000000..0bf688e3c023
--- /dev/null
+++ b/drivers/dma-buf/heaps/system_heap.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DMABUF System heap exporter
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Copyright (C) 2019 Linaro Ltd.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-heap.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/sched/signal.h>
+#include <asm/page.h>
+
+#include "heap-helpers.h"
+
+struct dma_heap *sys_heap;
+
+static void system_heap_free(struct heap_helper_buffer *buffer)
+{
+	pgoff_t pg;
+
+	for (pg = 0; pg < buffer->pagecount; pg++)
+		__free_page(buffer->pages[pg]);
+	kfree(buffer->pages);
+	kfree(buffer);
+}
+
+static int system_heap_allocate(struct dma_heap *heap,
+				unsigned long len,
+				unsigned long fd_flags,
+				unsigned long heap_flags)
+{
+	struct heap_helper_buffer *helper_buffer;
+	struct dma_buf *dmabuf;
+	int ret = -ENOMEM;
+	pgoff_t pg;
+
+	helper_buffer = kzalloc(sizeof(*helper_buffer), GFP_KERNEL);
+	if (!helper_buffer)
+		return -ENOMEM;
+
+	init_heap_helper_buffer(helper_buffer, system_heap_free);
+	helper_buffer->heap = heap;
+	helper_buffer->size = len;
+
+	helper_buffer->pagecount = len / PAGE_SIZE;
+	helper_buffer->pages = kmalloc_array(helper_buffer->pagecount,
+					     sizeof(*helper_buffer->pages),
+					     GFP_KERNEL);
+	if (!helper_buffer->pages) {
+		ret = -ENOMEM;
+		goto err0;
+	}
+
+	for (pg = 0; pg < helper_buffer->pagecount; pg++) {
+		/*
+		 * Avoid trying to allocate memory if the process
+		 * has been killed by by SIGKILL
+		 */
+		if (fatal_signal_pending(current))
+			goto err1;
+
+		helper_buffer->pages[pg] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!helper_buffer->pages[pg])
+			goto err1;
+	}
+
+	/* create the dmabuf */
+	dmabuf = heap_helper_export_dmabuf(helper_buffer, fd_flags);
+	if (IS_ERR(dmabuf)) {
+		ret = PTR_ERR(dmabuf);
+		goto err1;
+	}
+
+	helper_buffer->dmabuf = dmabuf;
+
+	ret = dma_buf_fd(dmabuf, fd_flags);
+	if (ret < 0) {
+		dma_buf_put(dmabuf);
+		/* just return, as put will call release and that will free */
+		return ret;
+	}
+
+	return ret;
+
+err1:
+	while (pg > 0)
+		__free_page(helper_buffer->pages[--pg]);
+	kfree(helper_buffer->pages);
+err0:
+	kfree(helper_buffer);
+
+	return ret;
+}
+
+static const struct dma_heap_ops system_heap_ops = {
+	.allocate = system_heap_allocate,
+};
+
+static int system_heap_create(void)
+{
+	struct dma_heap_export_info exp_info;
+	int ret = 0;
+
+	exp_info.name = "system";
+	exp_info.ops = &system_heap_ops;
+	exp_info.priv = NULL;
+
+	sys_heap = dma_heap_add(&exp_info);
+	if (IS_ERR(sys_heap))
+		ret = PTR_ERR(sys_heap);
+
+	return ret;
+}
+module_init(system_heap_create);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 6713cfb1995c..348b3a9170fa 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -408,5 +408,5 @@ const struct file_operations sw_sync_debugfs_fops = {
 	.open           = sw_sync_debugfs_open,
 	.release        = sw_sync_debugfs_release,
 	.unlocked_ioctl = sw_sync_ioctl,
-	.compat_ioctl	= sw_sync_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
 };
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 25c5c071645b..5a5a1da01a00 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -221,7 +221,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 	a_fences = get_fences(a, &a_num_fences);
 	b_fences = get_fences(b, &b_num_fences);
 	if (a_num_fences > INT_MAX - b_num_fences)
-		return NULL;
+		goto err;
 
 	num_fences = a_num_fences + b_num_fences;
 
@@ -480,5 +480,5 @@ static const struct file_operations sync_file_fops = {
 	.release = sync_file_release,
 	.poll = sync_file_poll,
 	.unlocked_ioctl = sync_file_ioctl,
-	.compat_ioctl = sync_file_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
 };
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 9635897458a0..acb26c627d27 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -18,6 +18,8 @@ static const size_t size_limit_mb = 64; /* total dmabuf size, in megabytes  */
 struct udmabuf {
 	pgoff_t pagecount;
 	struct page **pages;
+	struct sg_table *sg;
+	struct miscdevice *device;
 };
 
 static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
@@ -46,10 +48,10 @@ static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
 	return 0;
 }
 
-static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
-				    enum dma_data_direction direction)
+static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
+				     enum dma_data_direction direction)
 {
-	struct udmabuf *ubuf = at->dmabuf->priv;
+	struct udmabuf *ubuf = buf->priv;
 	struct sg_table *sg;
 	int ret;
 
@@ -61,7 +63,7 @@ static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
 					GFP_KERNEL);
 	if (ret < 0)
 		goto err;
-	if (!dma_map_sg(at->dev, sg->sgl, sg->nents, direction)) {
+	if (!dma_map_sg(dev, sg->sgl, sg->nents, direction)) {
 		ret = -EINVAL;
 		goto err;
 	}
@@ -73,54 +75,89 @@ err:
 	return ERR_PTR(ret);
 }
 
+static void put_sg_table(struct device *dev, struct sg_table *sg,
+			 enum dma_data_direction direction)
+{
+	dma_unmap_sg(dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+}
+
+static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
+				    enum dma_data_direction direction)
+{
+	return get_sg_table(at->dev, at->dmabuf, direction);
+}
+
 static void unmap_udmabuf(struct dma_buf_attachment *at,
 			  struct sg_table *sg,
 			  enum dma_data_direction direction)
 {
-	dma_unmap_sg(at->dev, sg->sgl, sg->nents, direction);
-	sg_free_table(sg);
-	kfree(sg);
+	return put_sg_table(at->dev, sg, direction);
 }
 
 static void release_udmabuf(struct dma_buf *buf)
 {
 	struct udmabuf *ubuf = buf->priv;
+	struct device *dev = ubuf->device->this_device;
 	pgoff_t pg;
 
+	if (ubuf->sg)
+		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
+
 	for (pg = 0; pg < ubuf->pagecount; pg++)
 		put_page(ubuf->pages[pg]);
 	kfree(ubuf->pages);
 	kfree(ubuf);
 }
 
-static void *kmap_udmabuf(struct dma_buf *buf, unsigned long page_num)
+static int begin_cpu_udmabuf(struct dma_buf *buf,
+			     enum dma_data_direction direction)
 {
 	struct udmabuf *ubuf = buf->priv;
-	struct page *page = ubuf->pages[page_num];
+	struct device *dev = ubuf->device->this_device;
+
+	if (!ubuf->sg) {
+		ubuf->sg = get_sg_table(dev, buf, direction);
+		if (IS_ERR(ubuf->sg))
+			return PTR_ERR(ubuf->sg);
+	} else {
+		dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
+				    direction);
+	}
 
-	return kmap(page);
+	return 0;
 }
 
-static void kunmap_udmabuf(struct dma_buf *buf, unsigned long page_num,
-			   void *vaddr)
+static int end_cpu_udmabuf(struct dma_buf *buf,
+			   enum dma_data_direction direction)
 {
-	kunmap(vaddr);
+	struct udmabuf *ubuf = buf->priv;
+	struct device *dev = ubuf->device->this_device;
+
+	if (!ubuf->sg)
+		return -EINVAL;
+
+	dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction);
+	return 0;
 }
 
 static const struct dma_buf_ops udmabuf_ops = {
-	.map_dma_buf	  = map_udmabuf,
-	.unmap_dma_buf	  = unmap_udmabuf,
-	.release	  = release_udmabuf,
-	.map		  = kmap_udmabuf,
-	.unmap		  = kunmap_udmabuf,
-	.mmap		  = mmap_udmabuf,
+	.cache_sgt_mapping = true,
+	.map_dma_buf	   = map_udmabuf,
+	.unmap_dma_buf	   = unmap_udmabuf,
+	.release	   = release_udmabuf,
+	.mmap		   = mmap_udmabuf,
+	.begin_cpu_access  = begin_cpu_udmabuf,
+	.end_cpu_access    = end_cpu_udmabuf,
 };
 
 #define SEALS_WANTED (F_SEAL_SHRINK)
 #define SEALS_DENIED (F_SEAL_WRITE)
 
-static long udmabuf_create(const struct udmabuf_create_list *head,
-			   const struct udmabuf_create_item *list)
+static long udmabuf_create(struct miscdevice *device,
+			   struct udmabuf_create_list *head,
+			   struct udmabuf_create_item *list)
 {
 	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
 	struct file *memfd = NULL;
@@ -187,6 +224,7 @@ static long udmabuf_create(const struct udmabuf_create_list *head,
 	exp_info.priv = ubuf;
 	exp_info.flags = O_RDWR;
 
+	ubuf->device = device;
 	buf = dma_buf_export(&exp_info);
 	if (IS_ERR(buf)) {
 		ret = PTR_ERR(buf);
@@ -224,7 +262,7 @@ static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
 	list.offset = create.offset;
 	list.size   = create.size;
 
-	return udmabuf_create(&head, &list);
+	return udmabuf_create(filp->private_data, &head, &list);
 }
 
 static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
@@ -243,7 +281,7 @@ static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
 	if (IS_ERR(list))
 		return PTR_ERR(list);
 
-	ret = udmabuf_create(&head, list);
+	ret = udmabuf_create(filp->private_data, &head, list);
 	kfree(list);
 	return ret;
 }
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 7af874b69ffb..5142da401db3 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -15,19 +15,19 @@ menuconfig DMADEVICES
 	  be empty in some cases.
 
 config DMADEVICES_DEBUG
-        bool "DMA Engine debugging"
-        depends on DMADEVICES != n
-        help
-          This is an option for use by developers; most people should
-          say N here.  This enables DMA engine core and driver debugging.
+	bool "DMA Engine debugging"
+	depends on DMADEVICES != n
+	help
+	  This is an option for use by developers; most people should
+	  say N here.  This enables DMA engine core and driver debugging.
 
 config DMADEVICES_VDEBUG
-        bool "DMA Engine verbose debugging"
-        depends on DMADEVICES_DEBUG != n
-        help
-          This is an option for use by developers; most people should
-          say N here.  This enables deeper (more verbose) debugging of
-          the DMA engine core and drivers.
+	bool "DMA Engine verbose debugging"
+	depends on DMADEVICES_DEBUG != n
+	help
+	  This is an option for use by developers; most people should
+	  say N here.  This enables deeper (more verbose) debugging of
+	  the DMA engine core and drivers.
 
 
 if DMADEVICES
@@ -215,30 +215,38 @@ config FSL_EDMA
 	  This module can be found on Freescale Vybrid and LS-1 SoCs.
 
 config FSL_QDMA
-       tristate "NXP Layerscape qDMA engine support"
-       depends on ARM || ARM64
-       select DMA_ENGINE
-       select DMA_VIRTUAL_CHANNELS
-       select DMA_ENGINE_RAID
-       select ASYNC_TX_ENABLE_CHANNEL_SWITCH
-       help
-         Support the NXP Layerscape qDMA engine with command queue and legacy mode.
-         Channel virtualization is supported through enqueuing of DMA jobs to,
-         or dequeuing DMA jobs from, different work queues.
-         This module can be found on NXP Layerscape SoCs.
+	tristate "NXP Layerscape qDMA engine support"
+	depends on ARM || ARM64
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	select DMA_ENGINE_RAID
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	help
+	 Support the NXP Layerscape qDMA engine with command queue and legacy mode.
+	 Channel virtualization is supported through enqueuing of DMA jobs to,
+	 or dequeuing DMA jobs from, different work queues.
+	 This module can be found on NXP Layerscape SoCs.
 	  The qdma driver only work on  SoCs with a DPAA hardware block.
 
 config FSL_RAID
-        tristate "Freescale RAID engine Support"
-        depends on FSL_SOC && !ASYNC_TX_ENABLE_CHANNEL_SWITCH
-        select DMA_ENGINE
-        select DMA_ENGINE_RAID
-        ---help---
-          Enable support for Freescale RAID Engine. RAID Engine is
-          available on some QorIQ SoCs (like P5020/P5040). It has
-          the capability to offload memcpy, xor and pq computation
+	tristate "Freescale RAID engine Support"
+	depends on FSL_SOC && !ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	select DMA_ENGINE
+	select DMA_ENGINE_RAID
+	---help---
+	  Enable support for Freescale RAID Engine. RAID Engine is
+	  available on some QorIQ SoCs (like P5020/P5040). It has
+	  the capability to offload memcpy, xor and pq computation
 	  for raid5/6.
 
+config HISI_DMA
+	tristate "HiSilicon DMA Engine support"
+	depends on ARM64 || (COMPILE_TEST && PCI_MSI)
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support HiSilicon Kunpeng DMA engine.
+
 config IMG_MDC_DMA
 	tristate "IMG MDC support"
 	depends on MIPS || COMPILE_TEST
@@ -273,6 +281,19 @@ config INTEL_IDMA64
 	  Enable DMA support for Intel Low Power Subsystem such as found on
 	  Intel Skylake PCH.
 
+config INTEL_IDXD
+	tristate "Intel Data Accelerators support"
+	depends on PCI && X86_64
+	select DMA_ENGINE
+	select SBITMAP
+	help
+	  Enable support for the Intel(R) data accelerators present
+	  in Intel Xeon CPU.
+
+	  Say Y if you have such a platform.
+
+	  If unsure, say N.
+
 config INTEL_IOATDMA
 	tristate "Intel I/OAT DMA support"
 	depends on PCI && X86_64
@@ -342,6 +363,26 @@ config MCF_EDMA
 	  minimal intervention from a host processor.
 	  This module can be found on Freescale ColdFire mcf5441x SoCs.
 
+config MILBEAUT_HDMAC
+	tristate "Milbeaut AHB DMA support"
+	depends on ARCH_MILBEAUT || COMPILE_TEST
+	depends on OF
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Say yes here to support the Socionext Milbeaut
+	  HDMAC device.
+
+config MILBEAUT_XDMAC
+	tristate "Milbeaut AXI DMA support"
+	depends on ARCH_MILBEAUT || COMPILE_TEST
+	depends on OF
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Say yes here to support the Socionext Milbeaut
+	  XDMAC device.
+
 config MMP_PDMA
 	bool "MMP PDMA support"
 	depends on ARCH_MMP || ARCH_PXA || COMPILE_TEST
@@ -477,6 +518,15 @@ config PXA_DMA
 	  16 to 32 channels for peripheral to memory or memory to memory
 	  transfers.
 
+config PLX_DMA
+	tristate "PLX ExpressLane PEX Switch DMA Engine Support"
+	depends on PCI
+	select DMA_ENGINE
+	help
+	  Some PLX ExpressLane PCI Switches support additional DMA engines.
+	  These are exposed via extra functions on the switch's
+	  upstream port. Each function exposes one DMA channel.
+
 config SIRF_DMA
 	tristate "CSR SiRFprimaII/SiRFmarco DMA support"
 	depends on ARCH_SIRF
@@ -635,6 +685,10 @@ config XILINX_DMA
 	  destination address.
 	  AXI DMA engine provides high-bandwidth one dimensional direct
 	  memory access between memory and AXI4-Stream target peripherals.
+	  AXI MCDMA engine provides high-bandwidth direct memory access
+	  between memory and AXI4-Stream target peripherals. It provides
+	  the scatter gather interface with multiple channels independent
+	  configuration support.
 
 config XILINX_ZYNQMP_DMA
 	tristate "Xilinx ZynqMP DMA Engine"
@@ -665,10 +719,14 @@ source "drivers/dma/dw-edma/Kconfig"
 
 source "drivers/dma/hsu/Kconfig"
 
+source "drivers/dma/sf-pdma/Kconfig"
+
 source "drivers/dma/sh/Kconfig"
 
 source "drivers/dma/ti/Kconfig"
 
+source "drivers/dma/fsl-dpaa2-qdma/Kconfig"
+
 # clients
 comment "DMA Clients"
 	depends on DMA_ENGINE
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index f5ce8665e944..1d908394fbea 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -35,16 +35,20 @@ obj-$(CONFIG_FSL_EDMA) += fsl-edma.o fsl-edma-common.o
 obj-$(CONFIG_MCF_EDMA) += mcf-edma.o fsl-edma-common.o
 obj-$(CONFIG_FSL_QDMA) += fsl-qdma.o
 obj-$(CONFIG_FSL_RAID) += fsl_raid.o
+obj-$(CONFIG_HISI_DMA) += hisi_dma.o
 obj-$(CONFIG_HSU_DMA) += hsu/
 obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o
 obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_INTEL_IDMA64) += idma64.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
+obj-$(CONFIG_INTEL_IDXD) += idxd/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
 obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o
 obj-$(CONFIG_K3_DMA) += k3dma.o
 obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o
+obj-$(CONFIG_MILBEAUT_HDMAC) += milbeaut-hdmac.o
+obj-$(CONFIG_MILBEAUT_XDMAC) += milbeaut-xdmac.o
 obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
 obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
 obj-$(CONFIG_MOXART_DMA) += moxart-dma.o
@@ -57,9 +61,11 @@ obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o
 obj-$(CONFIG_OWL_DMA) += owl-dma.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
+obj-$(CONFIG_PLX_DMA) += plx_dma.o
 obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
 obj-$(CONFIG_PXA_DMA) += pxa_dma.o
 obj-$(CONFIG_RENESAS_DMA) += sh/
+obj-$(CONFIG_SF_PDMA) += sf-pdma/
 obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_STM32_DMA) += stm32-dma.o
@@ -75,6 +81,7 @@ obj-$(CONFIG_UNIPHIER_MDMAC) += uniphier-mdmac.o
 obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
 obj-$(CONFIG_ZX_DMA) += zx_dma.o
 obj-$(CONFIG_ST_FDMA) += st_fdma.o
+obj-$(CONFIG_FSL_DPAA2_QDMA) += fsl-dpaa2-qdma/
 
 obj-y += mediatek/
 obj-y += qcom/
diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c
index 832aefbe7af9..539e785039ca 100644
--- a/drivers/dma/altera-msgdma.c
+++ b/drivers/dma/altera-msgdma.c
@@ -772,10 +772,10 @@ static int request_and_map(struct platform_device *pdev, const char *name,
 		return -EBUSY;
 	}
 
-	*ptr = devm_ioremap_nocache(device, region->start,
+	*ptr = devm_ioremap(device, region->start,
 				    resource_size(region));
 	if (*ptr == NULL) {
-		dev_err(device, "ioremap_nocache of %s failed!", name);
+		dev_err(device, "ioremap of %s failed!", name);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index b58ac720d9a1..f71c9f77d405 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -1957,21 +1957,16 @@ static int atmel_xdmac_resume(struct device *dev)
 
 static int at_xdmac_probe(struct platform_device *pdev)
 {
-	struct resource	*res;
 	struct at_xdmac	*atxdmac;
 	int		irq, size, nr_channels, i, ret;
 	void __iomem	*base;
 	u32		reg;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -EINVAL;
-
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
 		return irq;
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index e4c593f48575..4768ef26013b 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -797,10 +797,7 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan)
 
 	/* stop DMA activity */
 	if (c->desc) {
-		if (c->desc->vd.tx.flags & DMA_PREP_INTERRUPT)
-			vchan_terminate_vdesc(&c->desc->vd);
-		else
-			vchan_vdesc_fini(&c->desc->vd);
+		vchan_terminate_vdesc(&c->desc->vd);
 		c->desc = NULL;
 		bcm2835_dma_abort(c);
 	}
diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
index a0ee404b736e..f1d149e32839 100644
--- a/drivers/dma/dma-axi-dmac.c
+++ b/drivers/dma/dma-axi-dmac.c
@@ -830,6 +830,7 @@ static int axi_dmac_probe(struct platform_device *pdev)
 	struct dma_device *dma_dev;
 	struct axi_dmac *dmac;
 	struct resource *res;
+	struct regmap *regmap;
 	int ret;
 
 	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
@@ -921,10 +922,17 @@ static int axi_dmac_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, dmac);
 
-	devm_regmap_init_mmio(&pdev->dev, dmac->base, &axi_dmac_regmap_config);
+	regmap = devm_regmap_init_mmio(&pdev->dev, dmac->base,
+		 &axi_dmac_regmap_config);
+	if (IS_ERR(regmap)) {
+		ret = PTR_ERR(regmap);
+		goto err_free_irq;
+	}
 
 	return 0;
 
+err_free_irq:
+	free_irq(dmac->irq, dmac);
 err_unregister_of:
 	of_dma_controller_free(pdev->dev.of_node);
 err_unregister_device:
diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
index cafb1cc065bb..448f663da89c 100644
--- a/drivers/dma/dma-jz4780.c
+++ b/drivers/dma/dma-jz4780.c
@@ -858,13 +858,7 @@ static int jz4780_dma_probe(struct platform_device *pdev)
 	jzdma->soc_data = soc_data;
 	platform_set_drvdata(pdev, jzdma);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(dev, "failed to get I/O memory\n");
-		return -EINVAL;
-	}
-
-	jzdma->chn_base = devm_ioremap_resource(dev, res);
+	jzdma->chn_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(jzdma->chn_base))
 		return PTR_ERR(jzdma->chn_base);
 
@@ -987,6 +981,7 @@ static int jz4780_dma_remove(struct platform_device *pdev)
 
 	of_dma_controller_free(pdev->dev.of_node);
 
+	clk_disable_unprepare(jzdma->clk);
 	free_irq(jzdma->irq, jzdma);
 
 	for (i = 0; i < jzdma->soc_data->nb_channels; i++)
@@ -1004,7 +999,8 @@ static const struct jz4780_dma_soc_data jz4740_dma_soc_data = {
 static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = {
 	.nb_channels = 6,
 	.transfer_ord_max = 5,
-	.flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
+	.flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC |
+		 JZ_SOC_DATA_BREAK_LINKS,
 };
 
 static const struct jz4780_dma_soc_data jz4770_dma_soc_data = {
@@ -1019,11 +1015,25 @@ static const struct jz4780_dma_soc_data jz4780_dma_soc_data = {
 	.flags = JZ_SOC_DATA_ALLOW_LEGACY_DT | JZ_SOC_DATA_PROGRAMMABLE_DMA,
 };
 
+static const struct jz4780_dma_soc_data x1000_dma_soc_data = {
+	.nb_channels = 8,
+	.transfer_ord_max = 7,
+	.flags = JZ_SOC_DATA_PROGRAMMABLE_DMA,
+};
+
+static const struct jz4780_dma_soc_data x1830_dma_soc_data = {
+	.nb_channels = 32,
+	.transfer_ord_max = 7,
+	.flags = JZ_SOC_DATA_PROGRAMMABLE_DMA,
+};
+
 static const struct of_device_id jz4780_dma_dt_match[] = {
 	{ .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data },
 	{ .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data },
 	{ .compatible = "ingenic,jz4770-dma", .data = &jz4770_dma_soc_data },
 	{ .compatible = "ingenic,jz4780-dma", .data = &jz4780_dma_soc_data },
+	{ .compatible = "ingenic,x1000-dma", .data = &x1000_dma_soc_data },
+	{ .compatible = "ingenic,x1830-dma", .data = &x1830_dma_soc_data },
 	{},
 };
 MODULE_DEVICE_TABLE(of, jz4780_dma_dt_match);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 03ac4b96117c..f3ef4edd4de1 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -60,6 +60,8 @@ static long dmaengine_ref_count;
 
 /* --- sysfs implementation --- */
 
+#define DMA_SLAVE_NAME	"slave"
+
 /**
  * dev_to_dma_chan - convert a device pointer to its sysfs container object
  * @dev - device node
@@ -164,11 +166,152 @@ static struct class dma_devclass = {
 
 /* --- client and device registration --- */
 
-#define dma_device_satisfies_mask(device, mask) \
-	__dma_device_satisfies_mask((device), &(mask))
-static int
-__dma_device_satisfies_mask(struct dma_device *device,
-			    const dma_cap_mask_t *want)
+/**
+ * dma_cap_mask_all - enable iteration over all operation types
+ */
+static dma_cap_mask_t dma_cap_mask_all;
+
+/**
+ * dma_chan_tbl_ent - tracks channel allocations per core/operation
+ * @chan - associated channel for this entry
+ */
+struct dma_chan_tbl_ent {
+	struct dma_chan *chan;
+};
+
+/**
+ * channel_table - percpu lookup table for memory-to-memory offload providers
+ */
+static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END];
+
+static int __init dma_channel_table_init(void)
+{
+	enum dma_transaction_type cap;
+	int err = 0;
+
+	bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
+
+	/* 'interrupt', 'private', and 'slave' are channel capabilities,
+	 * but are not associated with an operation so they do not need
+	 * an entry in the channel_table
+	 */
+	clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
+	clear_bit(DMA_PRIVATE, dma_cap_mask_all.bits);
+	clear_bit(DMA_SLAVE, dma_cap_mask_all.bits);
+
+	for_each_dma_cap_mask(cap, dma_cap_mask_all) {
+		channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent);
+		if (!channel_table[cap]) {
+			err = -ENOMEM;
+			break;
+		}
+	}
+
+	if (err) {
+		pr_err("dmaengine dma_channel_table_init failure: %d\n", err);
+		for_each_dma_cap_mask(cap, dma_cap_mask_all)
+			free_percpu(channel_table[cap]);
+	}
+
+	return err;
+}
+arch_initcall(dma_channel_table_init);
+
+/**
+ * dma_chan_is_local - returns true if the channel is in the same numa-node as
+ *	the cpu
+ */
+static bool dma_chan_is_local(struct dma_chan *chan, int cpu)
+{
+	int node = dev_to_node(chan->device->dev);
+	return node == NUMA_NO_NODE ||
+		cpumask_test_cpu(cpu, cpumask_of_node(node));
+}
+
+/**
+ * min_chan - returns the channel with min count and in the same numa-node as
+ *	the cpu
+ * @cap: capability to match
+ * @cpu: cpu index which the channel should be close to
+ *
+ * If some channels are close to the given cpu, the one with the lowest
+ * reference count is returned. Otherwise, cpu is ignored and only the
+ * reference count is taken into account.
+ * Must be called under dma_list_mutex.
+ */
+static struct dma_chan *min_chan(enum dma_transaction_type cap, int cpu)
+{
+	struct dma_device *device;
+	struct dma_chan *chan;
+	struct dma_chan *min = NULL;
+	struct dma_chan *localmin = NULL;
+
+	list_for_each_entry(device, &dma_device_list, global_node) {
+		if (!dma_has_cap(cap, device->cap_mask) ||
+		    dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node) {
+			if (!chan->client_count)
+				continue;
+			if (!min || chan->table_count < min->table_count)
+				min = chan;
+
+			if (dma_chan_is_local(chan, cpu))
+				if (!localmin ||
+				    chan->table_count < localmin->table_count)
+					localmin = chan;
+		}
+	}
+
+	chan = localmin ? localmin : min;
+
+	if (chan)
+		chan->table_count++;
+
+	return chan;
+}
+
+/**
+ * dma_channel_rebalance - redistribute the available channels
+ *
+ * Optimize for cpu isolation (each cpu gets a dedicated channel for an
+ * operation type) in the SMP case,  and operation isolation (avoid
+ * multi-tasking channels) in the non-SMP case.  Must be called under
+ * dma_list_mutex.
+ */
+static void dma_channel_rebalance(void)
+{
+	struct dma_chan *chan;
+	struct dma_device *device;
+	int cpu;
+	int cap;
+
+	/* undo the last distribution */
+	for_each_dma_cap_mask(cap, dma_cap_mask_all)
+		for_each_possible_cpu(cpu)
+			per_cpu_ptr(channel_table[cap], cpu)->chan = NULL;
+
+	list_for_each_entry(device, &dma_device_list, global_node) {
+		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node)
+			chan->table_count = 0;
+	}
+
+	/* don't populate the channel_table if no clients are available */
+	if (!dmaengine_ref_count)
+		return;
+
+	/* redistribute available channels */
+	for_each_dma_cap_mask(cap, dma_cap_mask_all)
+		for_each_online_cpu(cpu) {
+			chan = min_chan(cap, cpu);
+			per_cpu_ptr(channel_table[cap], cpu)->chan = chan;
+		}
+}
+
+static int dma_device_satisfies_mask(struct dma_device *device,
+				     const dma_cap_mask_t *want)
 {
 	dma_cap_mask_t has;
 
@@ -179,7 +322,7 @@ __dma_device_satisfies_mask(struct dma_device *device,
 
 static struct module *dma_chan_to_owner(struct dma_chan *chan)
 {
-	return chan->device->dev->driver->owner;
+	return chan->device->owner;
 }
 
 /**
@@ -198,6 +341,23 @@ static void balance_ref_count(struct dma_chan *chan)
 	}
 }
 
+static void dma_device_release(struct kref *ref)
+{
+	struct dma_device *device = container_of(ref, struct dma_device, ref);
+
+	list_del_rcu(&device->global_node);
+	dma_channel_rebalance();
+
+	if (device->device_release)
+		device->device_release(device);
+}
+
+static void dma_device_put(struct dma_device *device)
+{
+	lockdep_assert_held(&dma_list_mutex);
+	kref_put(&device->ref, dma_device_release);
+}
+
 /**
  * dma_chan_get - try to grab a dma channel's parent driver module
  * @chan - channel to grab
@@ -218,6 +378,12 @@ static int dma_chan_get(struct dma_chan *chan)
 	if (!try_module_get(owner))
 		return -ENODEV;
 
+	ret = kref_get_unless_zero(&chan->device->ref);
+	if (!ret) {
+		ret = -ENODEV;
+		goto module_put_out;
+	}
+
 	/* allocate upon first client reference */
 	if (chan->device->device_alloc_chan_resources) {
 		ret = chan->device->device_alloc_chan_resources(chan);
@@ -233,6 +399,8 @@ out:
 	return 0;
 
 err_out:
+	dma_device_put(chan->device);
+module_put_out:
 	module_put(owner);
 	return ret;
 }
@@ -250,7 +418,6 @@ static void dma_chan_put(struct dma_chan *chan)
 		return;
 
 	chan->client_count--;
-	module_put(dma_chan_to_owner(chan));
 
 	/* This channel is not in use anymore, free it */
 	if (!chan->client_count && chan->device->device_free_chan_resources) {
@@ -265,6 +432,9 @@ static void dma_chan_put(struct dma_chan *chan)
 		chan->router = NULL;
 		chan->route_data = NULL;
 	}
+
+	dma_device_put(chan->device);
+	module_put(dma_chan_to_owner(chan));
 }
 
 enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
@@ -289,57 +459,6 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
 EXPORT_SYMBOL(dma_sync_wait);
 
 /**
- * dma_cap_mask_all - enable iteration over all operation types
- */
-static dma_cap_mask_t dma_cap_mask_all;
-
-/**
- * dma_chan_tbl_ent - tracks channel allocations per core/operation
- * @chan - associated channel for this entry
- */
-struct dma_chan_tbl_ent {
-	struct dma_chan *chan;
-};
-
-/**
- * channel_table - percpu lookup table for memory-to-memory offload providers
- */
-static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END];
-
-static int __init dma_channel_table_init(void)
-{
-	enum dma_transaction_type cap;
-	int err = 0;
-
-	bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
-
-	/* 'interrupt', 'private', and 'slave' are channel capabilities,
-	 * but are not associated with an operation so they do not need
-	 * an entry in the channel_table
-	 */
-	clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
-	clear_bit(DMA_PRIVATE, dma_cap_mask_all.bits);
-	clear_bit(DMA_SLAVE, dma_cap_mask_all.bits);
-
-	for_each_dma_cap_mask(cap, dma_cap_mask_all) {
-		channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent);
-		if (!channel_table[cap]) {
-			err = -ENOMEM;
-			break;
-		}
-	}
-
-	if (err) {
-		pr_err("initialization failure\n");
-		for_each_dma_cap_mask(cap, dma_cap_mask_all)
-			free_percpu(channel_table[cap]);
-	}
-
-	return err;
-}
-arch_initcall(dma_channel_table_init);
-
-/**
  * dma_find_channel - find a channel to carry out the operation
  * @tx_type: transaction type
  */
@@ -369,97 +488,6 @@ void dma_issue_pending_all(void)
 }
 EXPORT_SYMBOL(dma_issue_pending_all);
 
-/**
- * dma_chan_is_local - returns true if the channel is in the same numa-node as the cpu
- */
-static bool dma_chan_is_local(struct dma_chan *chan, int cpu)
-{
-	int node = dev_to_node(chan->device->dev);
-	return node == NUMA_NO_NODE ||
-		cpumask_test_cpu(cpu, cpumask_of_node(node));
-}
-
-/**
- * min_chan - returns the channel with min count and in the same numa-node as the cpu
- * @cap: capability to match
- * @cpu: cpu index which the channel should be close to
- *
- * If some channels are close to the given cpu, the one with the lowest
- * reference count is returned. Otherwise, cpu is ignored and only the
- * reference count is taken into account.
- * Must be called under dma_list_mutex.
- */
-static struct dma_chan *min_chan(enum dma_transaction_type cap, int cpu)
-{
-	struct dma_device *device;
-	struct dma_chan *chan;
-	struct dma_chan *min = NULL;
-	struct dma_chan *localmin = NULL;
-
-	list_for_each_entry(device, &dma_device_list, global_node) {
-		if (!dma_has_cap(cap, device->cap_mask) ||
-		    dma_has_cap(DMA_PRIVATE, device->cap_mask))
-			continue;
-		list_for_each_entry(chan, &device->channels, device_node) {
-			if (!chan->client_count)
-				continue;
-			if (!min || chan->table_count < min->table_count)
-				min = chan;
-
-			if (dma_chan_is_local(chan, cpu))
-				if (!localmin ||
-				    chan->table_count < localmin->table_count)
-					localmin = chan;
-		}
-	}
-
-	chan = localmin ? localmin : min;
-
-	if (chan)
-		chan->table_count++;
-
-	return chan;
-}
-
-/**
- * dma_channel_rebalance - redistribute the available channels
- *
- * Optimize for cpu isolation (each cpu gets a dedicated channel for an
- * operation type) in the SMP case,  and operation isolation (avoid
- * multi-tasking channels) in the non-SMP case.  Must be called under
- * dma_list_mutex.
- */
-static void dma_channel_rebalance(void)
-{
-	struct dma_chan *chan;
-	struct dma_device *device;
-	int cpu;
-	int cap;
-
-	/* undo the last distribution */
-	for_each_dma_cap_mask(cap, dma_cap_mask_all)
-		for_each_possible_cpu(cpu)
-			per_cpu_ptr(channel_table[cap], cpu)->chan = NULL;
-
-	list_for_each_entry(device, &dma_device_list, global_node) {
-		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
-			continue;
-		list_for_each_entry(chan, &device->channels, device_node)
-			chan->table_count = 0;
-	}
-
-	/* don't populate the channel_table if no clients are available */
-	if (!dmaengine_ref_count)
-		return;
-
-	/* redistribute available channels */
-	for_each_dma_cap_mask(cap, dma_cap_mask_all)
-		for_each_online_cpu(cpu) {
-			chan = min_chan(cap, cpu);
-			per_cpu_ptr(channel_table[cap], cpu)->chan = chan;
-		}
-}
-
 int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
 {
 	struct dma_device *device;
@@ -502,7 +530,7 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask,
 {
 	struct dma_chan *chan;
 
-	if (mask && !__dma_device_satisfies_mask(dev, mask)) {
+	if (mask && !dma_device_satisfies_mask(dev, mask)) {
 		dev_dbg(dev->dev, "%s: wrong capabilities\n", __func__);
 		return NULL;
 	}
@@ -704,11 +732,11 @@ struct dma_chan *dma_request_chan(struct device *dev, const char *name)
 	if (has_acpi_companion(dev) && !chan)
 		chan = acpi_dma_request_slave_chan_by_name(dev, name);
 
-	if (chan) {
-		/* Valid channel found or requester needs to be deferred */
-		if (!IS_ERR(chan) || PTR_ERR(chan) == -EPROBE_DEFER)
-			return chan;
-	}
+	if (PTR_ERR(chan) == -EPROBE_DEFER)
+		return chan;
+
+	if (!IS_ERR_OR_NULL(chan))
+		goto found;
 
 	/* Try to find the channel via the DMA filter map(s) */
 	mutex_lock(&dma_list_mutex);
@@ -728,7 +756,23 @@ struct dma_chan *dma_request_chan(struct device *dev, const char *name)
 	}
 	mutex_unlock(&dma_list_mutex);
 
-	return chan ? chan : ERR_PTR(-EPROBE_DEFER);
+	if (!IS_ERR_OR_NULL(chan))
+		goto found;
+
+	return ERR_PTR(-EPROBE_DEFER);
+
+found:
+	chan->slave = dev;
+	chan->name = kasprintf(GFP_KERNEL, "dma:%s", name);
+	if (!chan->name)
+		return ERR_PTR(-ENOMEM);
+
+	if (sysfs_create_link(&chan->dev->device.kobj, &dev->kobj,
+			      DMA_SLAVE_NAME))
+		dev_err(dev, "Cannot create DMA %s symlink\n", DMA_SLAVE_NAME);
+	if (sysfs_create_link(&dev->kobj, &chan->dev->device.kobj, chan->name))
+		dev_err(dev, "Cannot create DMA %s symlink\n", chan->name);
+	return chan;
 }
 EXPORT_SYMBOL_GPL(dma_request_chan);
 
@@ -786,6 +830,13 @@ void dma_release_channel(struct dma_chan *chan)
 	/* drop PRIVATE cap enabled by __dma_request_channel() */
 	if (--chan->device->privatecnt == 0)
 		dma_cap_clear(DMA_PRIVATE, chan->device->cap_mask);
+	if (chan->slave) {
+		sysfs_remove_link(&chan->slave->kobj, chan->name);
+		kfree(chan->name);
+		chan->name = NULL;
+		chan->slave = NULL;
+	}
+	sysfs_remove_link(&chan->dev->device.kobj, DMA_SLAVE_NAME);
 	mutex_unlock(&dma_list_mutex);
 }
 EXPORT_SYMBOL_GPL(dma_release_channel);
@@ -834,14 +885,14 @@ EXPORT_SYMBOL(dmaengine_get);
  */
 void dmaengine_put(void)
 {
-	struct dma_device *device;
+	struct dma_device *device, *_d;
 	struct dma_chan *chan;
 
 	mutex_lock(&dma_list_mutex);
 	dmaengine_ref_count--;
 	BUG_ON(dmaengine_ref_count < 0);
 	/* drop channel references */
-	list_for_each_entry(device, &dma_device_list, global_node) {
+	list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
 		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
 			continue;
 		list_for_each_entry(chan, &device->channels, device_node)
@@ -900,15 +951,115 @@ static int get_dma_id(struct dma_device *device)
 	return 0;
 }
 
+static int __dma_async_device_channel_register(struct dma_device *device,
+					       struct dma_chan *chan,
+					       int chan_id)
+{
+	int rc = 0;
+	int chancnt = device->chancnt;
+	atomic_t *idr_ref;
+	struct dma_chan *tchan;
+
+	tchan = list_first_entry_or_null(&device->channels,
+					 struct dma_chan, device_node);
+	if (tchan->dev) {
+		idr_ref = tchan->dev->idr_ref;
+	} else {
+		idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
+		if (!idr_ref)
+			return -ENOMEM;
+		atomic_set(idr_ref, 0);
+	}
+
+	chan->local = alloc_percpu(typeof(*chan->local));
+	if (!chan->local)
+		goto err_out;
+	chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
+	if (!chan->dev) {
+		free_percpu(chan->local);
+		chan->local = NULL;
+		goto err_out;
+	}
+
+	/*
+	 * When the chan_id is a negative value, we are dynamically adding
+	 * the channel. Otherwise we are static enumerating.
+	 */
+	chan->chan_id = chan_id < 0 ? chancnt : chan_id;
+	chan->dev->device.class = &dma_devclass;
+	chan->dev->device.parent = device->dev;
+	chan->dev->chan = chan;
+	chan->dev->idr_ref = idr_ref;
+	chan->dev->dev_id = device->dev_id;
+	atomic_inc(idr_ref);
+	dev_set_name(&chan->dev->device, "dma%dchan%d",
+		     device->dev_id, chan->chan_id);
+
+	rc = device_register(&chan->dev->device);
+	if (rc)
+		goto err_out;
+	chan->client_count = 0;
+	device->chancnt = chan->chan_id + 1;
+
+	return 0;
+
+ err_out:
+	free_percpu(chan->local);
+	kfree(chan->dev);
+	if (atomic_dec_return(idr_ref) == 0)
+		kfree(idr_ref);
+	return rc;
+}
+
+int dma_async_device_channel_register(struct dma_device *device,
+				      struct dma_chan *chan)
+{
+	int rc;
+
+	rc = __dma_async_device_channel_register(device, chan, -1);
+	if (rc < 0)
+		return rc;
+
+	dma_channel_rebalance();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dma_async_device_channel_register);
+
+static void __dma_async_device_channel_unregister(struct dma_device *device,
+						  struct dma_chan *chan)
+{
+	WARN_ONCE(!device->device_release && chan->client_count,
+		  "%s called while %d clients hold a reference\n",
+		  __func__, chan->client_count);
+	mutex_lock(&dma_list_mutex);
+	list_del(&chan->device_node);
+	device->chancnt--;
+	chan->dev->chan = NULL;
+	mutex_unlock(&dma_list_mutex);
+	device_unregister(&chan->dev->device);
+	free_percpu(chan->local);
+}
+
+void dma_async_device_channel_unregister(struct dma_device *device,
+					 struct dma_chan *chan)
+{
+	__dma_async_device_channel_unregister(device, chan);
+	dma_channel_rebalance();
+}
+EXPORT_SYMBOL_GPL(dma_async_device_channel_unregister);
+
 /**
  * dma_async_device_register - registers DMA devices found
  * @device: &dma_device
+ *
+ * After calling this routine the structure should not be freed except in the
+ * device_release() callback which will be called after
+ * dma_async_device_unregister() is called and no further references are taken.
  */
 int dma_async_device_register(struct dma_device *device)
 {
-	int chancnt = 0, rc;
+	int rc, i = 0;
 	struct dma_chan* chan;
-	atomic_t *idr_ref;
 
 	if (!device)
 		return -ENODEV;
@@ -919,6 +1070,8 @@ int dma_async_device_register(struct dma_device *device)
 		return -EIO;
 	}
 
+	device->owner = device->dev->driver->owner;
+
 	if (dma_has_cap(DMA_MEMCPY, device->cap_mask) && !device->device_prep_dma_memcpy) {
 		dev_err(device->dev,
 			"Device claims capability %s, but op is not defined\n",
@@ -994,65 +1147,29 @@ int dma_async_device_register(struct dma_device *device)
 		return -EIO;
 	}
 
+	if (!device->device_release)
+		dev_warn(device->dev,
+			 "WARN: Device release is not defined so it is not safe to unbind this driver while in use\n");
+
+	kref_init(&device->ref);
+
 	/* note: this only matters in the
 	 * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case
 	 */
 	if (device_has_all_tx_types(device))
 		dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
 
-	idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
-	if (!idr_ref)
-		return -ENOMEM;
 	rc = get_dma_id(device);
-	if (rc != 0) {
-		kfree(idr_ref);
+	if (rc != 0)
 		return rc;
-	}
-
-	atomic_set(idr_ref, 0);
 
 	/* represent channels in sysfs. Probably want devs too */
 	list_for_each_entry(chan, &device->channels, device_node) {
-		rc = -ENOMEM;
-		chan->local = alloc_percpu(typeof(*chan->local));
-		if (chan->local == NULL)
-			goto err_out;
-		chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
-		if (chan->dev == NULL) {
-			free_percpu(chan->local);
-			chan->local = NULL;
+		rc = __dma_async_device_channel_register(device, chan, i++);
+		if (rc < 0)
 			goto err_out;
-		}
-
-		chan->chan_id = chancnt++;
-		chan->dev->device.class = &dma_devclass;
-		chan->dev->device.parent = device->dev;
-		chan->dev->chan = chan;
-		chan->dev->idr_ref = idr_ref;
-		chan->dev->dev_id = device->dev_id;
-		atomic_inc(idr_ref);
-		dev_set_name(&chan->dev->device, "dma%dchan%d",
-			     device->dev_id, chan->chan_id);
-
-		rc = device_register(&chan->dev->device);
-		if (rc) {
-			free_percpu(chan->local);
-			chan->local = NULL;
-			kfree(chan->dev);
-			atomic_dec(idr_ref);
-			goto err_out;
-		}
-		chan->client_count = 0;
-	}
-
-	if (!chancnt) {
-		dev_err(device->dev, "%s: device has no channels!\n", __func__);
-		rc = -ENODEV;
-		goto err_out;
 	}
 
-	device->chancnt = chancnt;
-
 	mutex_lock(&dma_list_mutex);
 	/* take references on public channels */
 	if (dmaengine_ref_count && !dma_has_cap(DMA_PRIVATE, device->cap_mask))
@@ -1080,9 +1197,8 @@ int dma_async_device_register(struct dma_device *device)
 
 err_out:
 	/* if we never registered a channel just release the idr */
-	if (atomic_read(idr_ref) == 0) {
+	if (!device->chancnt) {
 		ida_free(&dma_ida, device->dev_id);
-		kfree(idr_ref);
 		return rc;
 	}
 
@@ -1108,23 +1224,20 @@ EXPORT_SYMBOL(dma_async_device_register);
  */
 void dma_async_device_unregister(struct dma_device *device)
 {
-	struct dma_chan *chan;
+	struct dma_chan *chan, *n;
+
+	list_for_each_entry_safe(chan, n, &device->channels, device_node)
+		__dma_async_device_channel_unregister(device, chan);
 
 	mutex_lock(&dma_list_mutex);
-	list_del_rcu(&device->global_node);
+	/*
+	 * setting DMA_PRIVATE ensures the device being torn down will not
+	 * be used in the channel_table
+	 */
+	dma_cap_set(DMA_PRIVATE, device->cap_mask);
 	dma_channel_rebalance();
+	dma_device_put(device);
 	mutex_unlock(&dma_list_mutex);
-
-	list_for_each_entry(chan, &device->channels, device_node) {
-		WARN_ONCE(chan->client_count,
-			  "%s called while %d clients hold a reference\n",
-			  __func__, chan->client_count);
-		mutex_lock(&dma_list_mutex);
-		chan->dev->chan = NULL;
-		mutex_unlock(&dma_list_mutex);
-		device_unregister(&chan->dev->device);
-		free_percpu(chan->local);
-	}
 }
 EXPORT_SYMBOL(dma_async_device_unregister);
 
@@ -1302,6 +1415,79 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 }
 EXPORT_SYMBOL(dma_async_tx_descriptor_init);
 
+static inline int desc_check_and_set_metadata_mode(
+	struct dma_async_tx_descriptor *desc, enum dma_desc_metadata_mode mode)
+{
+	/* Make sure that the metadata mode is not mixed */
+	if (!desc->desc_metadata_mode) {
+		if (dmaengine_is_metadata_mode_supported(desc->chan, mode))
+			desc->desc_metadata_mode = mode;
+		else
+			return -ENOTSUPP;
+	} else if (desc->desc_metadata_mode != mode) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int dmaengine_desc_attach_metadata(struct dma_async_tx_descriptor *desc,
+				   void *data, size_t len)
+{
+	int ret;
+
+	if (!desc)
+		return -EINVAL;
+
+	ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_CLIENT);
+	if (ret)
+		return ret;
+
+	if (!desc->metadata_ops || !desc->metadata_ops->attach)
+		return -ENOTSUPP;
+
+	return desc->metadata_ops->attach(desc, data, len);
+}
+EXPORT_SYMBOL_GPL(dmaengine_desc_attach_metadata);
+
+void *dmaengine_desc_get_metadata_ptr(struct dma_async_tx_descriptor *desc,
+				      size_t *payload_len, size_t *max_len)
+{
+	int ret;
+
+	if (!desc)
+		return ERR_PTR(-EINVAL);
+
+	ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_ENGINE);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (!desc->metadata_ops || !desc->metadata_ops->get_ptr)
+		return ERR_PTR(-ENOTSUPP);
+
+	return desc->metadata_ops->get_ptr(desc, payload_len, max_len);
+}
+EXPORT_SYMBOL_GPL(dmaengine_desc_get_metadata_ptr);
+
+int dmaengine_desc_set_metadata_len(struct dma_async_tx_descriptor *desc,
+				    size_t payload_len)
+{
+	int ret;
+
+	if (!desc)
+		return -EINVAL;
+
+	ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_ENGINE);
+	if (ret)
+		return ret;
+
+	if (!desc->metadata_ops || !desc->metadata_ops->set_len)
+		return -ENOTSUPP;
+
+	return desc->metadata_ops->set_len(desc, payload_len);
+}
+EXPORT_SYMBOL_GPL(dmaengine_desc_set_metadata_len);
+
 /* dma_wait_for_async_tx - spin wait for a transaction to complete
  * @tx: in-flight transaction to wait on
  */
@@ -1373,5 +1559,3 @@ static int __init dma_bus_init(void)
 	return class_register(&dma_devclass);
 }
 arch_initcall(dma_bus_init);
-
-
diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h
index 501c0b063f85..e8a320c9e57c 100644
--- a/drivers/dma/dmaengine.h
+++ b/drivers/dma/dmaengine.h
@@ -77,6 +77,7 @@ static inline enum dma_status dma_cookie_status(struct dma_chan *chan,
 		state->last = complete;
 		state->used = used;
 		state->residue = 0;
+		state->in_flight_bytes = 0;
 	}
 	return dma_async_is_complete(cookie, complete, used);
 }
@@ -87,6 +88,13 @@ static inline void dma_set_residue(struct dma_tx_state *state, u32 residue)
 		state->residue = residue;
 }
 
+static inline void dma_set_in_flight_bytes(struct dma_tx_state *state,
+					   u32 in_flight_bytes)
+{
+	if (state)
+		state->in_flight_bytes = in_flight_bytes;
+}
+
 struct dmaengine_desc_callback {
 	dma_async_tx_callback callback;
 	dma_async_tx_callback_result callback_result;
@@ -171,4 +179,7 @@ dmaengine_desc_callback_valid(struct dmaengine_desc_callback *cb)
 	return (cb->callback) ? true : false;
 }
 
+struct dma_chan *dma_get_slave_channel(struct dma_chan *chan);
+struct dma_chan *dma_get_any_slave_channel(struct dma_device *device);
+
 #endif
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
index a1ce307c502f..14c1ac26f866 100644
--- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
@@ -636,14 +636,10 @@ static int dma_chan_terminate_all(struct dma_chan *dchan)
 
 	vchan_get_all_descriptors(&chan->vc, &head);
 
-	/*
-	 * As vchan_dma_desc_free_list can access to desc_allocated list
-	 * we need to call it in vc.lock context.
-	 */
-	vchan_dma_desc_free_list(&chan->vc, &head);
-
 	spin_unlock_irqrestore(&chan->vc.lock, flags);
 
+	vchan_dma_desc_free_list(&chan->vc, &head);
+
 	dev_vdbg(dchan2dev(dchan), "terminated: %s\n", axi_chan_name(chan));
 
 	return 0;
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index c90c798e5ec3..0585d749d935 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -66,7 +66,7 @@ static int dw_probe(struct platform_device *pdev)
 
 	data->chip = chip;
 
-	chip->clk = devm_clk_get(chip->dev, "hclk");
+	chip->clk = devm_clk_get_optional(chip->dev, "hclk");
 	if (IS_ERR(chip->clk))
 		return PTR_ERR(chip->clk);
 	err = clk_prepare_enable(chip->clk);
diff --git a/drivers/dma/fsl-dpaa2-qdma/Kconfig b/drivers/dma/fsl-dpaa2-qdma/Kconfig
new file mode 100644
index 000000000000..258ed6be934d
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/Kconfig
@@ -0,0 +1,9 @@
+menuconfig FSL_DPAA2_QDMA
+	tristate "NXP DPAA2 QDMA"
+	depends on ARM64
+	depends on FSL_MC_BUS && FSL_MC_DPIO
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  NXP Data Path Acceleration Architecture 2 QDMA driver,
+	  using the NXP MC bus driver.
diff --git a/drivers/dma/fsl-dpaa2-qdma/Makefile b/drivers/dma/fsl-dpaa2-qdma/Makefile
new file mode 100644
index 000000000000..c1d0226f2bd7
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for the NXP DPAA2 qDMA controllers
+obj-$(CONFIG_FSL_DPAA2_QDMA) += dpaa2-qdma.o dpdmai.o
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
new file mode 100644
index 000000000000..c70a7965f140
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
@@ -0,0 +1,825 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2019 NXP
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/dmapool.h>
+#include <linux/of_irq.h>
+#include <linux/iommu.h>
+#include <linux/sys_soc.h>
+#include <linux/fsl/mc.h>
+#include <soc/fsl/dpaa2-io.h>
+
+#include "../virt-dma.h"
+#include "dpdmai.h"
+#include "dpaa2-qdma.h"
+
+static bool smmu_disable = true;
+
+static struct dpaa2_qdma_chan *to_dpaa2_qdma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct dpaa2_qdma_chan, vchan.chan);
+}
+
+static struct dpaa2_qdma_comp *to_fsl_qdma_comp(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct dpaa2_qdma_comp, vdesc);
+}
+
+static int dpaa2_qdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan);
+	struct dpaa2_qdma_engine *dpaa2_qdma = dpaa2_chan->qdma;
+	struct device *dev = &dpaa2_qdma->priv->dpdmai_dev->dev;
+
+	dpaa2_chan->fd_pool = dma_pool_create("fd_pool", dev,
+					      sizeof(struct dpaa2_fd),
+					      sizeof(struct dpaa2_fd), 0);
+	if (!dpaa2_chan->fd_pool)
+		goto err;
+
+	dpaa2_chan->fl_pool = dma_pool_create("fl_pool", dev,
+					      sizeof(struct dpaa2_fl_entry),
+					      sizeof(struct dpaa2_fl_entry), 0);
+	if (!dpaa2_chan->fl_pool)
+		goto err_fd;
+
+	dpaa2_chan->sdd_pool =
+		dma_pool_create("sdd_pool", dev,
+				sizeof(struct dpaa2_qdma_sd_d),
+				sizeof(struct dpaa2_qdma_sd_d), 0);
+	if (!dpaa2_chan->sdd_pool)
+		goto err_fl;
+
+	return dpaa2_qdma->desc_allocated++;
+err_fl:
+	dma_pool_destroy(dpaa2_chan->fl_pool);
+err_fd:
+	dma_pool_destroy(dpaa2_chan->fd_pool);
+err:
+	return -ENOMEM;
+}
+
+static void dpaa2_qdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan);
+	struct dpaa2_qdma_engine *dpaa2_qdma = dpaa2_chan->qdma;
+	unsigned long flags;
+
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&dpaa2_chan->vchan.lock, flags);
+	vchan_get_all_descriptors(&dpaa2_chan->vchan, &head);
+	spin_unlock_irqrestore(&dpaa2_chan->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&dpaa2_chan->vchan, &head);
+
+	dpaa2_dpdmai_free_comp(dpaa2_chan, &dpaa2_chan->comp_used);
+	dpaa2_dpdmai_free_comp(dpaa2_chan, &dpaa2_chan->comp_free);
+
+	dma_pool_destroy(dpaa2_chan->fd_pool);
+	dma_pool_destroy(dpaa2_chan->fl_pool);
+	dma_pool_destroy(dpaa2_chan->sdd_pool);
+	dpaa2_qdma->desc_allocated--;
+}
+
+/*
+ * Request a command descriptor for enqueue.
+ */
+static struct dpaa2_qdma_comp *
+dpaa2_qdma_request_desc(struct dpaa2_qdma_chan *dpaa2_chan)
+{
+	struct dpaa2_qdma_priv *qdma_priv = dpaa2_chan->qdma->priv;
+	struct device *dev = &qdma_priv->dpdmai_dev->dev;
+	struct dpaa2_qdma_comp *comp_temp = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dpaa2_chan->queue_lock, flags);
+	if (list_empty(&dpaa2_chan->comp_free)) {
+		spin_unlock_irqrestore(&dpaa2_chan->queue_lock, flags);
+		comp_temp = kzalloc(sizeof(*comp_temp), GFP_NOWAIT);
+		if (!comp_temp)
+			goto err;
+		comp_temp->fd_virt_addr =
+			dma_pool_alloc(dpaa2_chan->fd_pool, GFP_NOWAIT,
+				       &comp_temp->fd_bus_addr);
+		if (!comp_temp->fd_virt_addr)
+			goto err_comp;
+
+		comp_temp->fl_virt_addr =
+			dma_pool_alloc(dpaa2_chan->fl_pool, GFP_NOWAIT,
+				       &comp_temp->fl_bus_addr);
+		if (!comp_temp->fl_virt_addr)
+			goto err_fd_virt;
+
+		comp_temp->desc_virt_addr =
+			dma_pool_alloc(dpaa2_chan->sdd_pool, GFP_NOWAIT,
+				       &comp_temp->desc_bus_addr);
+		if (!comp_temp->desc_virt_addr)
+			goto err_fl_virt;
+
+		comp_temp->qchan = dpaa2_chan;
+		return comp_temp;
+	}
+
+	comp_temp = list_first_entry(&dpaa2_chan->comp_free,
+				     struct dpaa2_qdma_comp, list);
+	list_del(&comp_temp->list);
+	spin_unlock_irqrestore(&dpaa2_chan->queue_lock, flags);
+
+	comp_temp->qchan = dpaa2_chan;
+
+	return comp_temp;
+
+err_fl_virt:
+		dma_pool_free(dpaa2_chan->fl_pool,
+			      comp_temp->fl_virt_addr,
+			      comp_temp->fl_bus_addr);
+err_fd_virt:
+		dma_pool_free(dpaa2_chan->fd_pool,
+			      comp_temp->fd_virt_addr,
+			      comp_temp->fd_bus_addr);
+err_comp:
+	kfree(comp_temp);
+err:
+	dev_err(dev, "Failed to request descriptor\n");
+	return NULL;
+}
+
+static void
+dpaa2_qdma_populate_fd(u32 format, struct dpaa2_qdma_comp *dpaa2_comp)
+{
+	struct dpaa2_fd *fd;
+
+	fd = dpaa2_comp->fd_virt_addr;
+	memset(fd, 0, sizeof(struct dpaa2_fd));
+
+	/* fd populated */
+	dpaa2_fd_set_addr(fd, dpaa2_comp->fl_bus_addr);
+
+	/*
+	 * Bypass memory translation, Frame list format, short length disable
+	 * we need to disable BMT if fsl-mc use iova addr
+	 */
+	if (smmu_disable)
+		dpaa2_fd_set_bpid(fd, QMAN_FD_BMT_ENABLE);
+	dpaa2_fd_set_format(fd, QMAN_FD_FMT_ENABLE | QMAN_FD_SL_DISABLE);
+
+	dpaa2_fd_set_frc(fd, format | QDMA_SER_CTX);
+}
+
+/* first frame list for descriptor buffer */
+static void
+dpaa2_qdma_populate_first_framel(struct dpaa2_fl_entry *f_list,
+				 struct dpaa2_qdma_comp *dpaa2_comp,
+				 bool wrt_changed)
+{
+	struct dpaa2_qdma_sd_d *sdd;
+
+	sdd = dpaa2_comp->desc_virt_addr;
+	memset(sdd, 0, 2 * (sizeof(*sdd)));
+
+	/* source descriptor CMD */
+	sdd->cmd = cpu_to_le32(QDMA_SD_CMD_RDTTYPE_COHERENT);
+	sdd++;
+
+	/* dest descriptor CMD */
+	if (wrt_changed)
+		sdd->cmd = cpu_to_le32(LX2160_QDMA_DD_CMD_WRTTYPE_COHERENT);
+	else
+		sdd->cmd = cpu_to_le32(QDMA_DD_CMD_WRTTYPE_COHERENT);
+
+	memset(f_list, 0, sizeof(struct dpaa2_fl_entry));
+
+	/* first frame list to source descriptor */
+	dpaa2_fl_set_addr(f_list, dpaa2_comp->desc_bus_addr);
+	dpaa2_fl_set_len(f_list, 0x20);
+	dpaa2_fl_set_format(f_list, QDMA_FL_FMT_SBF | QDMA_FL_SL_LONG);
+
+	/* bypass memory translation */
+	if (smmu_disable)
+		f_list->bpid = cpu_to_le16(QDMA_FL_BMT_ENABLE);
+}
+
+/* source and destination frame list */
+static void
+dpaa2_qdma_populate_frames(struct dpaa2_fl_entry *f_list,
+			   dma_addr_t dst, dma_addr_t src,
+			   size_t len, uint8_t fmt)
+{
+	/* source frame list to source buffer */
+	memset(f_list, 0, sizeof(struct dpaa2_fl_entry));
+
+	dpaa2_fl_set_addr(f_list, src);
+	dpaa2_fl_set_len(f_list, len);
+
+	/* single buffer frame or scatter gather frame */
+	dpaa2_fl_set_format(f_list, (fmt | QDMA_FL_SL_LONG));
+
+	/* bypass memory translation */
+	if (smmu_disable)
+		f_list->bpid = cpu_to_le16(QDMA_FL_BMT_ENABLE);
+
+	f_list++;
+
+	/* destination frame list to destination buffer */
+	memset(f_list, 0, sizeof(struct dpaa2_fl_entry));
+
+	dpaa2_fl_set_addr(f_list, dst);
+	dpaa2_fl_set_len(f_list, len);
+	dpaa2_fl_set_format(f_list, (fmt | QDMA_FL_SL_LONG));
+	/* single buffer frame or scatter gather frame */
+	dpaa2_fl_set_final(f_list, QDMA_FL_F);
+	/* bypass memory translation */
+	if (smmu_disable)
+		f_list->bpid = cpu_to_le16(QDMA_FL_BMT_ENABLE);
+}
+
+static struct dma_async_tx_descriptor
+*dpaa2_qdma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst,
+			dma_addr_t src, size_t len, ulong flags)
+{
+	struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan);
+	struct dpaa2_qdma_engine *dpaa2_qdma;
+	struct dpaa2_qdma_comp *dpaa2_comp;
+	struct dpaa2_fl_entry *f_list;
+	bool wrt_changed;
+
+	dpaa2_qdma = dpaa2_chan->qdma;
+	dpaa2_comp = dpaa2_qdma_request_desc(dpaa2_chan);
+	if (!dpaa2_comp)
+		return NULL;
+
+	wrt_changed = (bool)dpaa2_qdma->qdma_wrtype_fixup;
+
+	/* populate Frame descriptor */
+	dpaa2_qdma_populate_fd(QDMA_FD_LONG_FORMAT, dpaa2_comp);
+
+	f_list = dpaa2_comp->fl_virt_addr;
+
+	/* first frame list for descriptor buffer (logn format) */
+	dpaa2_qdma_populate_first_framel(f_list, dpaa2_comp, wrt_changed);
+
+	f_list++;
+
+	dpaa2_qdma_populate_frames(f_list, dst, src, len, QDMA_FL_FMT_SBF);
+
+	return vchan_tx_prep(&dpaa2_chan->vchan, &dpaa2_comp->vdesc, flags);
+}
+
+static void dpaa2_qdma_issue_pending(struct dma_chan *chan)
+{
+	struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan);
+	struct dpaa2_qdma_comp *dpaa2_comp;
+	struct virt_dma_desc *vdesc;
+	struct dpaa2_fd *fd;
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&dpaa2_chan->queue_lock, flags);
+	spin_lock(&dpaa2_chan->vchan.lock);
+	if (vchan_issue_pending(&dpaa2_chan->vchan)) {
+		vdesc = vchan_next_desc(&dpaa2_chan->vchan);
+		if (!vdesc)
+			goto err_enqueue;
+		dpaa2_comp = to_fsl_qdma_comp(vdesc);
+
+		fd = dpaa2_comp->fd_virt_addr;
+
+		list_del(&vdesc->node);
+		list_add_tail(&dpaa2_comp->list, &dpaa2_chan->comp_used);
+
+		err = dpaa2_io_service_enqueue_fq(NULL, dpaa2_chan->fqid, fd);
+		if (err) {
+			list_del(&dpaa2_comp->list);
+			list_add_tail(&dpaa2_comp->list,
+				      &dpaa2_chan->comp_free);
+		}
+	}
+err_enqueue:
+	spin_unlock(&dpaa2_chan->vchan.lock);
+	spin_unlock_irqrestore(&dpaa2_chan->queue_lock, flags);
+}
+
+static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev)
+{
+	struct dpaa2_qdma_priv_per_prio *ppriv;
+	struct device *dev = &ls_dev->dev;
+	struct dpaa2_qdma_priv *priv;
+	u8 prio_def = DPDMAI_PRIO_NUM;
+	int err = -EINVAL;
+	int i;
+
+	priv = dev_get_drvdata(dev);
+
+	priv->dev = dev;
+	priv->dpqdma_id = ls_dev->obj_desc.id;
+
+	/* Get the handle for the DPDMAI this interface is associate with */
+	err = dpdmai_open(priv->mc_io, 0, priv->dpqdma_id, &ls_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpdmai_open() failed\n");
+		return err;
+	}
+
+	dev_dbg(dev, "Opened dpdmai object successfully\n");
+
+	err = dpdmai_get_attributes(priv->mc_io, 0, ls_dev->mc_handle,
+				    &priv->dpdmai_attr);
+	if (err) {
+		dev_err(dev, "dpdmai_get_attributes() failed\n");
+		goto exit;
+	}
+
+	if (priv->dpdmai_attr.version.major > DPDMAI_VER_MAJOR) {
+		dev_err(dev, "DPDMAI major version mismatch\n"
+			     "Found %u.%u, supported version is %u.%u\n",
+				priv->dpdmai_attr.version.major,
+				priv->dpdmai_attr.version.minor,
+				DPDMAI_VER_MAJOR, DPDMAI_VER_MINOR);
+		goto exit;
+	}
+
+	if (priv->dpdmai_attr.version.minor > DPDMAI_VER_MINOR) {
+		dev_err(dev, "DPDMAI minor version mismatch\n"
+			     "Found %u.%u, supported version is %u.%u\n",
+				priv->dpdmai_attr.version.major,
+				priv->dpdmai_attr.version.minor,
+				DPDMAI_VER_MAJOR, DPDMAI_VER_MINOR);
+		goto exit;
+	}
+
+	priv->num_pairs = min(priv->dpdmai_attr.num_of_priorities, prio_def);
+	ppriv = kcalloc(priv->num_pairs, sizeof(*ppriv), GFP_KERNEL);
+	if (!ppriv) {
+		err = -ENOMEM;
+		goto exit;
+	}
+	priv->ppriv = ppriv;
+
+	for (i = 0; i < priv->num_pairs; i++) {
+		err = dpdmai_get_rx_queue(priv->mc_io, 0, ls_dev->mc_handle,
+					  i, &priv->rx_queue_attr[i]);
+		if (err) {
+			dev_err(dev, "dpdmai_get_rx_queue() failed\n");
+			goto exit;
+		}
+		ppriv->rsp_fqid = priv->rx_queue_attr[i].fqid;
+
+		err = dpdmai_get_tx_queue(priv->mc_io, 0, ls_dev->mc_handle,
+					  i, &priv->tx_fqid[i]);
+		if (err) {
+			dev_err(dev, "dpdmai_get_tx_queue() failed\n");
+			goto exit;
+		}
+		ppriv->req_fqid = priv->tx_fqid[i];
+		ppriv->prio = i;
+		ppriv->priv = priv;
+		ppriv++;
+	}
+
+	return 0;
+exit:
+	dpdmai_close(priv->mc_io, 0, ls_dev->mc_handle);
+	return err;
+}
+
+static void dpaa2_qdma_fqdan_cb(struct dpaa2_io_notification_ctx *ctx)
+{
+	struct dpaa2_qdma_priv_per_prio *ppriv = container_of(ctx,
+			struct dpaa2_qdma_priv_per_prio, nctx);
+	struct dpaa2_qdma_comp *dpaa2_comp, *_comp_tmp;
+	struct dpaa2_qdma_priv *priv = ppriv->priv;
+	u32 n_chans = priv->dpaa2_qdma->n_chans;
+	struct dpaa2_qdma_chan *qchan;
+	const struct dpaa2_fd *fd_eq;
+	const struct dpaa2_fd *fd;
+	struct dpaa2_dq *dq;
+	int is_last = 0;
+	int found;
+	u8 status;
+	int err;
+	int i;
+
+	do {
+		err = dpaa2_io_service_pull_fq(NULL, ppriv->rsp_fqid,
+					       ppriv->store);
+	} while (err);
+
+	while (!is_last) {
+		do {
+			dq = dpaa2_io_store_next(ppriv->store, &is_last);
+		} while (!is_last && !dq);
+		if (!dq) {
+			dev_err(priv->dev, "FQID returned no valid frames!\n");
+			continue;
+		}
+
+		/* obtain FD and process the error */
+		fd = dpaa2_dq_fd(dq);
+
+		status = dpaa2_fd_get_ctrl(fd) & 0xff;
+		if (status)
+			dev_err(priv->dev, "FD error occurred\n");
+		found = 0;
+		for (i = 0; i < n_chans; i++) {
+			qchan = &priv->dpaa2_qdma->chans[i];
+			spin_lock(&qchan->queue_lock);
+			if (list_empty(&qchan->comp_used)) {
+				spin_unlock(&qchan->queue_lock);
+				continue;
+			}
+			list_for_each_entry_safe(dpaa2_comp, _comp_tmp,
+						 &qchan->comp_used, list) {
+				fd_eq = dpaa2_comp->fd_virt_addr;
+
+				if (le64_to_cpu(fd_eq->simple.addr) ==
+				    le64_to_cpu(fd->simple.addr)) {
+					spin_lock(&qchan->vchan.lock);
+					vchan_cookie_complete(&
+							dpaa2_comp->vdesc);
+					spin_unlock(&qchan->vchan.lock);
+					found = 1;
+					break;
+				}
+			}
+			spin_unlock(&qchan->queue_lock);
+			if (found)
+				break;
+		}
+	}
+
+	dpaa2_io_service_rearm(NULL, ctx);
+}
+
+static int __cold dpaa2_qdma_dpio_setup(struct dpaa2_qdma_priv *priv)
+{
+	struct dpaa2_qdma_priv_per_prio *ppriv;
+	struct device *dev = priv->dev;
+	int err = -EINVAL;
+	int i, num;
+
+	num = priv->num_pairs;
+	ppriv = priv->ppriv;
+	for (i = 0; i < num; i++) {
+		ppriv->nctx.is_cdan = 0;
+		ppriv->nctx.desired_cpu = DPAA2_IO_ANY_CPU;
+		ppriv->nctx.id = ppriv->rsp_fqid;
+		ppriv->nctx.cb = dpaa2_qdma_fqdan_cb;
+		err = dpaa2_io_service_register(NULL, &ppriv->nctx, dev);
+		if (err) {
+			dev_err(dev, "Notification register failed\n");
+			goto err_service;
+		}
+
+		ppriv->store =
+			dpaa2_io_store_create(DPAA2_QDMA_STORE_SIZE, dev);
+		if (!ppriv->store) {
+			dev_err(dev, "dpaa2_io_store_create() failed\n");
+			goto err_store;
+		}
+
+		ppriv++;
+	}
+	return 0;
+
+err_store:
+	dpaa2_io_service_deregister(NULL, &ppriv->nctx, dev);
+err_service:
+	ppriv--;
+	while (ppriv >= priv->ppriv) {
+		dpaa2_io_service_deregister(NULL, &ppriv->nctx, dev);
+		dpaa2_io_store_destroy(ppriv->store);
+		ppriv--;
+	}
+	return err;
+}
+
+static void dpaa2_dpmai_store_free(struct dpaa2_qdma_priv *priv)
+{
+	struct dpaa2_qdma_priv_per_prio *ppriv = priv->ppriv;
+	int i;
+
+	for (i = 0; i < priv->num_pairs; i++) {
+		dpaa2_io_store_destroy(ppriv->store);
+		ppriv++;
+	}
+}
+
+static void dpaa2_dpdmai_dpio_free(struct dpaa2_qdma_priv *priv)
+{
+	struct dpaa2_qdma_priv_per_prio *ppriv = priv->ppriv;
+	struct device *dev = priv->dev;
+	int i;
+
+	for (i = 0; i < priv->num_pairs; i++) {
+		dpaa2_io_service_deregister(NULL, &ppriv->nctx, dev);
+		ppriv++;
+	}
+}
+
+static int __cold dpaa2_dpdmai_bind(struct dpaa2_qdma_priv *priv)
+{
+	struct dpdmai_rx_queue_cfg rx_queue_cfg;
+	struct dpaa2_qdma_priv_per_prio *ppriv;
+	struct device *dev = priv->dev;
+	struct fsl_mc_device *ls_dev;
+	int i, num;
+	int err;
+
+	ls_dev = to_fsl_mc_device(dev);
+	num = priv->num_pairs;
+	ppriv = priv->ppriv;
+	for (i = 0; i < num; i++) {
+		rx_queue_cfg.options = DPDMAI_QUEUE_OPT_USER_CTX |
+					DPDMAI_QUEUE_OPT_DEST;
+		rx_queue_cfg.user_ctx = ppriv->nctx.qman64;
+		rx_queue_cfg.dest_cfg.dest_type = DPDMAI_DEST_DPIO;
+		rx_queue_cfg.dest_cfg.dest_id = ppriv->nctx.dpio_id;
+		rx_queue_cfg.dest_cfg.priority = ppriv->prio;
+		err = dpdmai_set_rx_queue(priv->mc_io, 0, ls_dev->mc_handle,
+					  rx_queue_cfg.dest_cfg.priority,
+					  &rx_queue_cfg);
+		if (err) {
+			dev_err(dev, "dpdmai_set_rx_queue() failed\n");
+			return err;
+		}
+
+		ppriv++;
+	}
+
+	return 0;
+}
+
+static int __cold dpaa2_dpdmai_dpio_unbind(struct dpaa2_qdma_priv *priv)
+{
+	struct dpaa2_qdma_priv_per_prio *ppriv = priv->ppriv;
+	struct device *dev = priv->dev;
+	struct fsl_mc_device *ls_dev;
+	int err = 0;
+	int i;
+
+	ls_dev = to_fsl_mc_device(dev);
+
+	for (i = 0; i < priv->num_pairs; i++) {
+		ppriv->nctx.qman64 = 0;
+		ppriv->nctx.dpio_id = 0;
+		ppriv++;
+	}
+
+	err = dpdmai_reset(priv->mc_io, 0, ls_dev->mc_handle);
+	if (err)
+		dev_err(dev, "dpdmai_reset() failed\n");
+
+	return err;
+}
+
+static void dpaa2_dpdmai_free_comp(struct dpaa2_qdma_chan *qchan,
+				   struct list_head *head)
+{
+	struct dpaa2_qdma_comp *comp_tmp, *_comp_tmp;
+	unsigned long flags;
+
+	list_for_each_entry_safe(comp_tmp, _comp_tmp,
+				 head, list) {
+		spin_lock_irqsave(&qchan->queue_lock, flags);
+		list_del(&comp_tmp->list);
+		spin_unlock_irqrestore(&qchan->queue_lock, flags);
+		dma_pool_free(qchan->fd_pool,
+			      comp_tmp->fd_virt_addr,
+			      comp_tmp->fd_bus_addr);
+		dma_pool_free(qchan->fl_pool,
+			      comp_tmp->fl_virt_addr,
+			      comp_tmp->fl_bus_addr);
+		dma_pool_free(qchan->sdd_pool,
+			      comp_tmp->desc_virt_addr,
+			      comp_tmp->desc_bus_addr);
+		kfree(comp_tmp);
+	}
+}
+
+static void dpaa2_dpdmai_free_channels(struct dpaa2_qdma_engine *dpaa2_qdma)
+{
+	struct dpaa2_qdma_chan *qchan;
+	int num, i;
+
+	num = dpaa2_qdma->n_chans;
+	for (i = 0; i < num; i++) {
+		qchan = &dpaa2_qdma->chans[i];
+		dpaa2_dpdmai_free_comp(qchan, &qchan->comp_used);
+		dpaa2_dpdmai_free_comp(qchan, &qchan->comp_free);
+		dma_pool_destroy(qchan->fd_pool);
+		dma_pool_destroy(qchan->fl_pool);
+		dma_pool_destroy(qchan->sdd_pool);
+	}
+}
+
+static void dpaa2_qdma_free_desc(struct virt_dma_desc *vdesc)
+{
+	struct dpaa2_qdma_comp *dpaa2_comp;
+	struct dpaa2_qdma_chan *qchan;
+	unsigned long flags;
+
+	dpaa2_comp = to_fsl_qdma_comp(vdesc);
+	qchan = dpaa2_comp->qchan;
+	spin_lock_irqsave(&qchan->queue_lock, flags);
+	list_del(&dpaa2_comp->list);
+	list_add_tail(&dpaa2_comp->list, &qchan->comp_free);
+	spin_unlock_irqrestore(&qchan->queue_lock, flags);
+}
+
+static int dpaa2_dpdmai_init_channels(struct dpaa2_qdma_engine *dpaa2_qdma)
+{
+	struct dpaa2_qdma_priv *priv = dpaa2_qdma->priv;
+	struct dpaa2_qdma_chan *dpaa2_chan;
+	int num = priv->num_pairs;
+	int i;
+
+	INIT_LIST_HEAD(&dpaa2_qdma->dma_dev.channels);
+	for (i = 0; i < dpaa2_qdma->n_chans; i++) {
+		dpaa2_chan = &dpaa2_qdma->chans[i];
+		dpaa2_chan->qdma = dpaa2_qdma;
+		dpaa2_chan->fqid = priv->tx_fqid[i % num];
+		dpaa2_chan->vchan.desc_free = dpaa2_qdma_free_desc;
+		vchan_init(&dpaa2_chan->vchan, &dpaa2_qdma->dma_dev);
+		spin_lock_init(&dpaa2_chan->queue_lock);
+		INIT_LIST_HEAD(&dpaa2_chan->comp_used);
+		INIT_LIST_HEAD(&dpaa2_chan->comp_free);
+	}
+	return 0;
+}
+
+static int dpaa2_qdma_probe(struct fsl_mc_device *dpdmai_dev)
+{
+	struct device *dev = &dpdmai_dev->dev;
+	struct dpaa2_qdma_engine *dpaa2_qdma;
+	struct dpaa2_qdma_priv *priv;
+	int err;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+	dev_set_drvdata(dev, priv);
+	priv->dpdmai_dev = dpdmai_dev;
+
+	priv->iommu_domain = iommu_get_domain_for_dev(dev);
+	if (priv->iommu_domain)
+		smmu_disable = false;
+
+	/* obtain a MC portal */
+	err = fsl_mc_portal_allocate(dpdmai_dev, 0, &priv->mc_io);
+	if (err) {
+		if (err == -ENXIO)
+			err = -EPROBE_DEFER;
+		else
+			dev_err(dev, "MC portal allocation failed\n");
+		goto err_mcportal;
+	}
+
+	/* DPDMAI initialization */
+	err = dpaa2_qdma_setup(dpdmai_dev);
+	if (err) {
+		dev_err(dev, "dpaa2_dpdmai_setup() failed\n");
+		goto err_dpdmai_setup;
+	}
+
+	/* DPIO */
+	err = dpaa2_qdma_dpio_setup(priv);
+	if (err) {
+		dev_err(dev, "dpaa2_dpdmai_dpio_setup() failed\n");
+		goto err_dpio_setup;
+	}
+
+	/* DPDMAI binding to DPIO */
+	err = dpaa2_dpdmai_bind(priv);
+	if (err) {
+		dev_err(dev, "dpaa2_dpdmai_bind() failed\n");
+		goto err_bind;
+	}
+
+	/* DPDMAI enable */
+	err = dpdmai_enable(priv->mc_io, 0, dpdmai_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpdmai_enable() faile\n");
+		goto err_enable;
+	}
+
+	dpaa2_qdma = kzalloc(sizeof(*dpaa2_qdma), GFP_KERNEL);
+	if (!dpaa2_qdma) {
+		err = -ENOMEM;
+		goto err_eng;
+	}
+
+	priv->dpaa2_qdma = dpaa2_qdma;
+	dpaa2_qdma->priv = priv;
+
+	dpaa2_qdma->desc_allocated = 0;
+	dpaa2_qdma->n_chans = NUM_CH;
+
+	dpaa2_dpdmai_init_channels(dpaa2_qdma);
+
+	if (soc_device_match(soc_fixup_tuning))
+		dpaa2_qdma->qdma_wrtype_fixup = true;
+	else
+		dpaa2_qdma->qdma_wrtype_fixup = false;
+
+	dma_cap_set(DMA_PRIVATE, dpaa2_qdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_SLAVE, dpaa2_qdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_MEMCPY, dpaa2_qdma->dma_dev.cap_mask);
+
+	dpaa2_qdma->dma_dev.dev = dev;
+	dpaa2_qdma->dma_dev.device_alloc_chan_resources =
+		dpaa2_qdma_alloc_chan_resources;
+	dpaa2_qdma->dma_dev.device_free_chan_resources =
+		dpaa2_qdma_free_chan_resources;
+	dpaa2_qdma->dma_dev.device_tx_status = dma_cookie_status;
+	dpaa2_qdma->dma_dev.device_prep_dma_memcpy = dpaa2_qdma_prep_memcpy;
+	dpaa2_qdma->dma_dev.device_issue_pending = dpaa2_qdma_issue_pending;
+
+	err = dma_async_device_register(&dpaa2_qdma->dma_dev);
+	if (err) {
+		dev_err(dev, "Can't register NXP QDMA engine.\n");
+		goto err_dpaa2_qdma;
+	}
+
+	return 0;
+
+err_dpaa2_qdma:
+	kfree(dpaa2_qdma);
+err_eng:
+	dpdmai_disable(priv->mc_io, 0, dpdmai_dev->mc_handle);
+err_enable:
+	dpaa2_dpdmai_dpio_unbind(priv);
+err_bind:
+	dpaa2_dpmai_store_free(priv);
+	dpaa2_dpdmai_dpio_free(priv);
+err_dpio_setup:
+	kfree(priv->ppriv);
+	dpdmai_close(priv->mc_io, 0, dpdmai_dev->mc_handle);
+err_dpdmai_setup:
+	fsl_mc_portal_free(priv->mc_io);
+err_mcportal:
+	kfree(priv);
+	dev_set_drvdata(dev, NULL);
+	return err;
+}
+
+static int dpaa2_qdma_remove(struct fsl_mc_device *ls_dev)
+{
+	struct dpaa2_qdma_engine *dpaa2_qdma;
+	struct dpaa2_qdma_priv *priv;
+	struct device *dev;
+
+	dev = &ls_dev->dev;
+	priv = dev_get_drvdata(dev);
+	dpaa2_qdma = priv->dpaa2_qdma;
+
+	dpdmai_disable(priv->mc_io, 0, ls_dev->mc_handle);
+	dpaa2_dpdmai_dpio_unbind(priv);
+	dpaa2_dpmai_store_free(priv);
+	dpaa2_dpdmai_dpio_free(priv);
+	dpdmai_close(priv->mc_io, 0, ls_dev->mc_handle);
+	fsl_mc_portal_free(priv->mc_io);
+	dev_set_drvdata(dev, NULL);
+	dpaa2_dpdmai_free_channels(dpaa2_qdma);
+
+	dma_async_device_unregister(&dpaa2_qdma->dma_dev);
+	kfree(priv);
+	kfree(dpaa2_qdma);
+
+	return 0;
+}
+
+static const struct fsl_mc_device_id dpaa2_qdma_id_table[] = {
+	{
+		.vendor = FSL_MC_VENDOR_FREESCALE,
+		.obj_type = "dpdmai",
+	},
+	{ .vendor = 0x0 }
+};
+
+static struct fsl_mc_driver dpaa2_qdma_driver = {
+	.driver		= {
+		.name	= "dpaa2-qdma",
+		.owner  = THIS_MODULE,
+	},
+	.probe          = dpaa2_qdma_probe,
+	.remove		= dpaa2_qdma_remove,
+	.match_id_table	= dpaa2_qdma_id_table
+};
+
+static int __init dpaa2_qdma_driver_init(void)
+{
+	return fsl_mc_driver_register(&(dpaa2_qdma_driver));
+}
+late_initcall(dpaa2_qdma_driver_init);
+
+static void __exit fsl_qdma_exit(void)
+{
+	fsl_mc_driver_unregister(&(dpaa2_qdma_driver));
+}
+module_exit(fsl_qdma_exit);
+
+MODULE_ALIAS("platform:fsl-dpaa2-qdma");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("NXP Layerscape DPAA2 qDMA engine driver");
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h
new file mode 100644
index 000000000000..7d571849c569
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 NXP */
+
+#ifndef __DPAA2_QDMA_H
+#define __DPAA2_QDMA_H
+
+#define DPAA2_QDMA_STORE_SIZE 16
+#define NUM_CH 8
+
+struct dpaa2_qdma_sd_d {
+	u32 rsv:32;
+	union {
+		struct {
+			u32 ssd:12; /* souce stride distance */
+			u32 sss:12; /* souce stride size */
+			u32 rsv1:8;
+		} sdf;
+		struct {
+			u32 dsd:12; /* Destination stride distance */
+			u32 dss:12; /* Destination stride size */
+			u32 rsv2:8;
+		} ddf;
+	} df;
+	u32 rbpcmd;	/* Route-by-port command */
+	u32 cmd;
+} __attribute__((__packed__));
+
+/* Source descriptor command read transaction type for RBP=0: */
+/* coherent copy of cacheable memory */
+#define QDMA_SD_CMD_RDTTYPE_COHERENT (0xb << 28)
+/* Destination descriptor command write transaction type for RBP=0: */
+/* coherent copy of cacheable memory */
+#define QDMA_DD_CMD_WRTTYPE_COHERENT (0x6 << 28)
+#define LX2160_QDMA_DD_CMD_WRTTYPE_COHERENT (0xb << 28)
+
+#define QMAN_FD_FMT_ENABLE	BIT(0) /* frame list table enable */
+#define QMAN_FD_BMT_ENABLE	BIT(15) /* bypass memory translation */
+#define QMAN_FD_BMT_DISABLE	(0) /* bypass memory translation */
+#define QMAN_FD_SL_DISABLE	(0) /* short lengthe disabled */
+#define QMAN_FD_SL_ENABLE	BIT(14) /* short lengthe enabled */
+
+#define QDMA_FINAL_BIT_DISABLE	(0) /* final bit disable */
+#define QDMA_FINAL_BIT_ENABLE	BIT(31) /* final bit enable */
+
+#define QDMA_FD_SHORT_FORMAT	BIT(11) /* short format */
+#define QDMA_FD_LONG_FORMAT	(0) /* long format */
+#define QDMA_SER_DISABLE	(8) /* no notification */
+#define QDMA_SER_CTX		BIT(8) /* notification by FQD_CTX[fqid] */
+#define QDMA_SER_DEST		(2 << 8) /* notification by destination desc */
+#define QDMA_SER_BOTH		(3 << 8) /* soruce and dest notification */
+#define QDMA_FD_SPF_ENALBE	BIT(30) /* source prefetch enable */
+
+#define QMAN_FD_VA_ENABLE	BIT(14) /* Address used is virtual address */
+#define QMAN_FD_VA_DISABLE	(0)/* Address used is a real address */
+/* Flow Context: 49bit physical address */
+#define QMAN_FD_CBMT_ENABLE	BIT(15)
+#define QMAN_FD_CBMT_DISABLE	(0) /* Flow Context: 64bit virtual address */
+#define QMAN_FD_SC_DISABLE	(0) /* stashing control */
+
+#define QDMA_FL_FMT_SBF		(0x0) /* Single buffer frame */
+#define QDMA_FL_FMT_SGE		(0x2) /* Scatter gather frame */
+#define QDMA_FL_BMT_ENABLE	BIT(15) /* enable bypass memory translation */
+#define QDMA_FL_BMT_DISABLE	(0x0) /* enable bypass memory translation */
+#define QDMA_FL_SL_LONG		(0x0)/* long length */
+#define QDMA_FL_SL_SHORT	(0x1) /* short length */
+#define QDMA_FL_F		(0x1)/* last frame list bit */
+
+/*Description of Frame list table structure*/
+struct dpaa2_qdma_chan {
+	struct dpaa2_qdma_engine	*qdma;
+	struct virt_dma_chan		vchan;
+	struct virt_dma_desc		vdesc;
+	enum dma_status			status;
+	u32				fqid;
+
+	/* spinlock used by dpaa2 qdma driver */
+	spinlock_t			queue_lock;
+	struct dma_pool			*fd_pool;
+	struct dma_pool			*fl_pool;
+	struct dma_pool			*sdd_pool;
+
+	struct list_head		comp_used;
+	struct list_head		comp_free;
+
+};
+
+struct dpaa2_qdma_comp {
+	dma_addr_t		fd_bus_addr;
+	dma_addr_t		fl_bus_addr;
+	dma_addr_t		desc_bus_addr;
+	struct dpaa2_fd		*fd_virt_addr;
+	struct dpaa2_fl_entry	*fl_virt_addr;
+	struct dpaa2_qdma_sd_d	*desc_virt_addr;
+	struct dpaa2_qdma_chan	*qchan;
+	struct virt_dma_desc	vdesc;
+	struct list_head	list;
+};
+
+struct dpaa2_qdma_engine {
+	struct dma_device	dma_dev;
+	u32			n_chans;
+	struct dpaa2_qdma_chan	chans[NUM_CH];
+	int			qdma_wrtype_fixup;
+	int			desc_allocated;
+
+	struct dpaa2_qdma_priv *priv;
+};
+
+/*
+ * dpaa2_qdma_priv - driver private data
+ */
+struct dpaa2_qdma_priv {
+	int dpqdma_id;
+
+	struct iommu_domain	*iommu_domain;
+	struct dpdmai_attr	dpdmai_attr;
+	struct device		*dev;
+	struct fsl_mc_io	*mc_io;
+	struct fsl_mc_device	*dpdmai_dev;
+	u8			num_pairs;
+
+	struct dpaa2_qdma_engine	*dpaa2_qdma;
+	struct dpaa2_qdma_priv_per_prio	*ppriv;
+
+	struct dpdmai_rx_queue_attr rx_queue_attr[DPDMAI_PRIO_NUM];
+	u32 tx_fqid[DPDMAI_PRIO_NUM];
+};
+
+struct dpaa2_qdma_priv_per_prio {
+	int req_fqid;
+	int rsp_fqid;
+	int prio;
+
+	struct dpaa2_io_store *store;
+	struct dpaa2_io_notification_ctx nctx;
+
+	struct dpaa2_qdma_priv *priv;
+};
+
+static struct soc_device_attribute soc_fixup_tuning[] = {
+	{ .family = "QorIQ LX2160A"},
+	{ },
+};
+
+/* FD pool size: one FD + 3 Frame list + 2 source/destination descriptor */
+#define FD_POOL_SIZE (sizeof(struct dpaa2_fd) + \
+		sizeof(struct dpaa2_fl_entry) * 3 + \
+		sizeof(struct dpaa2_qdma_sd_d) * 2)
+
+static void dpaa2_dpdmai_free_channels(struct dpaa2_qdma_engine *dpaa2_qdma);
+static void dpaa2_dpdmai_free_comp(struct dpaa2_qdma_chan *qchan,
+				   struct list_head *head);
+#endif /* __DPAA2_QDMA_H */
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpdmai.c b/drivers/dma/fsl-dpaa2-qdma/dpdmai.c
new file mode 100644
index 000000000000..f8d22115154a
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/dpdmai.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2019 NXP
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/fsl/mc.h>
+#include "dpdmai.h"
+
+struct dpdmai_rsp_get_attributes {
+	__le32 id;
+	u8 num_of_priorities;
+	u8 pad0[3];
+	__le16 major;
+	__le16 minor;
+};
+
+struct dpdmai_cmd_queue {
+	__le32 dest_id;
+	u8 priority;
+	u8 queue;
+	u8 dest_type;
+	u8 pad;
+	__le64 user_ctx;
+	union {
+		__le32 options;
+		__le32 fqid;
+	};
+};
+
+struct dpdmai_rsp_get_tx_queue {
+	__le64 pad;
+	__le32 fqid;
+};
+
+#define MC_CMD_OP(_cmd, _param, _offset, _width, _type, _arg) \
+	((_cmd).params[_param] |= mc_enc((_offset), (_width), _arg))
+
+/* cmd, param, offset, width, type, arg_name */
+#define DPDMAI_CMD_CREATE(cmd, cfg) \
+do { \
+	MC_CMD_OP(cmd, 0, 8,  8,  u8,  (cfg)->priorities[0]);\
+	MC_CMD_OP(cmd, 0, 16, 8,  u8,  (cfg)->priorities[1]);\
+} while (0)
+
+static inline u64 mc_enc(int lsoffset, int width, u64 val)
+{
+	return (val & MAKE_UMASK64(width)) << lsoffset;
+}
+
+/**
+ * dpdmai_open() - Open a control session for the specified object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @dpdmai_id:	DPDMAI unique ID
+ * @token:	Returned token; use in subsequent API calls
+ *
+ * This function can be used to open a control session for an
+ * already created object; an object may have been declared in
+ * the DPL or by calling the dpdmai_create() function.
+ * This function returns a unique authentication token,
+ * associated with the specific object ID and the specific MC
+ * portal; this token must be used in all subsequent commands for
+ * this specific object.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpdmai_open(struct fsl_mc_io *mc_io, u32 cmd_flags,
+		int dpdmai_id, u16 *token)
+{
+	struct fsl_mc_command cmd = { 0 };
+	__le64 *cmd_dpdmai_id;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_OPEN,
+					  cmd_flags, 0);
+
+	cmd_dpdmai_id = cmd.params;
+	*cmd_dpdmai_id = cpu_to_le32(dpdmai_id);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	*token = mc_cmd_hdr_read_token(&cmd);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dpdmai_open);
+
+/**
+ * dpdmai_close() - Close the control session of the object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPDMAI object
+ *
+ * After this function is called, no further operations are
+ * allowed on the object without opening a new control session.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpdmai_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_CLOSE,
+					  cmd_flags, token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(dpdmai_close);
+
+/**
+ * dpdmai_create() - Create the DPDMAI object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @cfg:	Configuration structure
+ * @token:	Returned token; use in subsequent API calls
+ *
+ * Create the DPDMAI object, allocate required resources and
+ * perform required initialization.
+ *
+ * The object can be created either by declaring it in the
+ * DPL file, or by calling this function.
+ *
+ * This function returns a unique authentication token,
+ * associated with the specific object ID and the specific MC
+ * portal; this token must be used in all subsequent calls to
+ * this specific object. For objects that are created using the
+ * DPL file, call dpdmai_open() function to get an authentication
+ * token first.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpdmai_create(struct fsl_mc_io *mc_io, u32 cmd_flags,
+		  const struct dpdmai_cfg *cfg, u16 *token)
+{
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_CREATE,
+					  cmd_flags, 0);
+	DPDMAI_CMD_CREATE(cmd, cfg);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	*token = mc_cmd_hdr_read_token(&cmd);
+
+	return 0;
+}
+
+/**
+ * dpdmai_enable() - Enable the DPDMAI, allow sending and receiving frames.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPDMAI object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpdmai_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_ENABLE,
+					  cmd_flags, token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(dpdmai_enable);
+
+/**
+ * dpdmai_disable() - Disable the DPDMAI, stop sending and receiving frames.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPDMAI object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpdmai_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_DISABLE,
+					  cmd_flags, token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(dpdmai_disable);
+
+/**
+ * dpdmai_reset() - Reset the DPDMAI, returns the object to initial state.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPDMAI object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpdmai_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_RESET,
+					  cmd_flags, token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(dpdmai_reset);
+
+/**
+ * dpdmai_get_attributes() - Retrieve DPDMAI attributes.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPDMAI object
+ * @attr:	Returned object's attributes
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpdmai_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
+			  u16 token, struct dpdmai_attr *attr)
+{
+	struct dpdmai_rsp_get_attributes *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_ATTR,
+					  cmd_flags, token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpdmai_rsp_get_attributes *)cmd.params;
+	attr->id = le32_to_cpu(rsp_params->id);
+	attr->version.major = le16_to_cpu(rsp_params->major);
+	attr->version.minor = le16_to_cpu(rsp_params->minor);
+	attr->num_of_priorities = rsp_params->num_of_priorities;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dpdmai_get_attributes);
+
+/**
+ * dpdmai_set_rx_queue() - Set Rx queue configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPDMAI object
+ * @priority:	Select the queue relative to number of
+ *		priorities configured at DPDMAI creation
+ * @cfg:	Rx queue configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 priority, const struct dpdmai_rx_queue_cfg *cfg)
+{
+	struct dpdmai_cmd_queue *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_SET_RX_QUEUE,
+					  cmd_flags, token);
+
+	cmd_params = (struct dpdmai_cmd_queue *)cmd.params;
+	cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
+	cmd_params->priority = cfg->dest_cfg.priority;
+	cmd_params->queue = priority;
+	cmd_params->dest_type = cfg->dest_cfg.dest_type;
+	cmd_params->user_ctx = cpu_to_le64(cfg->user_ctx);
+	cmd_params->options = cpu_to_le32(cfg->options);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(dpdmai_set_rx_queue);
+
+/**
+ * dpdmai_get_rx_queue() - Retrieve Rx queue attributes.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPDMAI object
+ * @priority:	Select the queue relative to number of
+ *				priorities configured at DPDMAI creation
+ * @attr:	Returned Rx queue attributes
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 priority, struct dpdmai_rx_queue_attr *attr)
+{
+	struct dpdmai_cmd_queue *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_RX_QUEUE,
+					  cmd_flags, token);
+
+	cmd_params = (struct dpdmai_cmd_queue *)cmd.params;
+	cmd_params->queue = priority;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	attr->dest_cfg.dest_id = le32_to_cpu(cmd_params->dest_id);
+	attr->dest_cfg.priority = cmd_params->priority;
+	attr->dest_cfg.dest_type = cmd_params->dest_type;
+	attr->user_ctx = le64_to_cpu(cmd_params->user_ctx);
+	attr->fqid = le32_to_cpu(cmd_params->fqid);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dpdmai_get_rx_queue);
+
+/**
+ * dpdmai_get_tx_queue() - Retrieve Tx queue attributes.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPDMAI object
+ * @priority:	Select the queue relative to number of
+ *			priorities configured at DPDMAI creation
+ * @fqid:	Returned Tx queue
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpdmai_get_tx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags,
+			u16 token, u8 priority, u32 *fqid)
+{
+	struct dpdmai_rsp_get_tx_queue *rsp_params;
+	struct dpdmai_cmd_queue *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_TX_QUEUE,
+					  cmd_flags, token);
+
+	cmd_params = (struct dpdmai_cmd_queue *)cmd.params;
+	cmd_params->queue = priority;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+
+	rsp_params = (struct dpdmai_rsp_get_tx_queue *)cmd.params;
+	*fqid = le32_to_cpu(rsp_params->fqid);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dpdmai_get_tx_queue);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpdmai.h b/drivers/dma/fsl-dpaa2-qdma/dpdmai.h
new file mode 100644
index 000000000000..6d785093da8e
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/dpdmai.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 NXP */
+
+#ifndef __FSL_DPDMAI_H
+#define __FSL_DPDMAI_H
+
+/* DPDMAI Version */
+#define DPDMAI_VER_MAJOR	2
+#define DPDMAI_VER_MINOR	2
+
+#define DPDMAI_CMD_BASE_VERSION	0
+#define DPDMAI_CMD_ID_OFFSET	4
+
+#define DPDMAI_CMDID_FORMAT(x)	(((x) << DPDMAI_CMD_ID_OFFSET) | \
+				DPDMAI_CMD_BASE_VERSION)
+
+/* Command IDs */
+#define DPDMAI_CMDID_CLOSE		DPDMAI_CMDID_FORMAT(0x800)
+#define DPDMAI_CMDID_OPEN               DPDMAI_CMDID_FORMAT(0x80E)
+#define DPDMAI_CMDID_CREATE             DPDMAI_CMDID_FORMAT(0x90E)
+
+#define DPDMAI_CMDID_ENABLE             DPDMAI_CMDID_FORMAT(0x002)
+#define DPDMAI_CMDID_DISABLE            DPDMAI_CMDID_FORMAT(0x003)
+#define DPDMAI_CMDID_GET_ATTR           DPDMAI_CMDID_FORMAT(0x004)
+#define DPDMAI_CMDID_RESET              DPDMAI_CMDID_FORMAT(0x005)
+#define DPDMAI_CMDID_IS_ENABLED         DPDMAI_CMDID_FORMAT(0x006)
+
+#define DPDMAI_CMDID_SET_IRQ            DPDMAI_CMDID_FORMAT(0x010)
+#define DPDMAI_CMDID_GET_IRQ            DPDMAI_CMDID_FORMAT(0x011)
+#define DPDMAI_CMDID_SET_IRQ_ENABLE     DPDMAI_CMDID_FORMAT(0x012)
+#define DPDMAI_CMDID_GET_IRQ_ENABLE     DPDMAI_CMDID_FORMAT(0x013)
+#define DPDMAI_CMDID_SET_IRQ_MASK       DPDMAI_CMDID_FORMAT(0x014)
+#define DPDMAI_CMDID_GET_IRQ_MASK       DPDMAI_CMDID_FORMAT(0x015)
+#define DPDMAI_CMDID_GET_IRQ_STATUS     DPDMAI_CMDID_FORMAT(0x016)
+#define DPDMAI_CMDID_CLEAR_IRQ_STATUS	DPDMAI_CMDID_FORMAT(0x017)
+
+#define DPDMAI_CMDID_SET_RX_QUEUE	DPDMAI_CMDID_FORMAT(0x1A0)
+#define DPDMAI_CMDID_GET_RX_QUEUE       DPDMAI_CMDID_FORMAT(0x1A1)
+#define DPDMAI_CMDID_GET_TX_QUEUE       DPDMAI_CMDID_FORMAT(0x1A2)
+
+#define MC_CMD_HDR_TOKEN_O 32  /* Token field offset */
+#define MC_CMD_HDR_TOKEN_S 16  /* Token field size */
+
+#define MAKE_UMASK64(_width) \
+	((u64)((_width) < 64 ? ((u64)1 << (_width)) - 1 : (u64)-1))
+
+/* Data Path DMA Interface API
+ * Contains initialization APIs and runtime control APIs for DPDMAI
+ */
+
+/**
+ * Maximum number of Tx/Rx priorities per DPDMAI object
+ */
+#define DPDMAI_PRIO_NUM		2
+
+/* DPDMAI queue modification options */
+
+/**
+ * Select to modify the user's context associated with the queue
+ */
+#define DPDMAI_QUEUE_OPT_USER_CTX	0x1
+
+/**
+ * Select to modify the queue's destination
+ */
+#define DPDMAI_QUEUE_OPT_DEST		0x2
+
+/**
+ * struct dpdmai_cfg - Structure representing DPDMAI configuration
+ * @priorities: Priorities for the DMA hardware processing; valid priorities are
+ *	configured with values 1-8; the entry following last valid entry
+ *	should be configured with 0
+ */
+struct dpdmai_cfg {
+	u8 priorities[DPDMAI_PRIO_NUM];
+};
+
+/**
+ * struct dpdmai_attr - Structure representing DPDMAI attributes
+ * @id: DPDMAI object ID
+ * @version: DPDMAI version
+ * @num_of_priorities: number of priorities
+ */
+struct dpdmai_attr {
+	int	id;
+	/**
+	 * struct version - DPDMAI version
+	 * @major: DPDMAI major version
+	 * @minor: DPDMAI minor version
+	 */
+	struct {
+		u16 major;
+		u16 minor;
+	} version;
+	u8 num_of_priorities;
+};
+
+/**
+ * enum dpdmai_dest - DPDMAI destination types
+ * @DPDMAI_DEST_NONE: Unassigned destination; The queue is set in parked mode
+ *	and does not generate FQDAN notifications; user is expected to dequeue
+ *	from the queue based on polling or other user-defined method
+ * @DPDMAI_DEST_DPIO: The queue is set in schedule mode and generates FQDAN
+ *	notifications to the specified DPIO; user is expected to dequeue
+ *	from the queue only after notification is received
+ * @DPDMAI_DEST_DPCON: The queue is set in schedule mode and does not generate
+ *	FQDAN notifications, but is connected to the specified DPCON object;
+ *	user is expected to dequeue from the DPCON channel
+ */
+enum dpdmai_dest {
+	DPDMAI_DEST_NONE = 0,
+	DPDMAI_DEST_DPIO = 1,
+	DPDMAI_DEST_DPCON = 2
+};
+
+/**
+ * struct dpdmai_dest_cfg - Structure representing DPDMAI destination parameters
+ * @dest_type: Destination type
+ * @dest_id: Either DPIO ID or DPCON ID, depending on the destination type
+ * @priority: Priority selection within the DPIO or DPCON channel; valid values
+ *	are 0-1 or 0-7, depending on the number of priorities in that
+ *	channel; not relevant for 'DPDMAI_DEST_NONE' option
+ */
+struct dpdmai_dest_cfg {
+	enum dpdmai_dest dest_type;
+	int dest_id;
+	u8 priority;
+};
+
+/**
+ * struct dpdmai_rx_queue_cfg - DPDMAI RX queue configuration
+ * @options: Flags representing the suggested modifications to the queue;
+ *	Use any combination of 'DPDMAI_QUEUE_OPT_<X>' flags
+ * @user_ctx: User context value provided in the frame descriptor of each
+ *	dequeued frame;
+ *	valid only if 'DPDMAI_QUEUE_OPT_USER_CTX' is contained in 'options'
+ * @dest_cfg: Queue destination parameters;
+ *	valid only if 'DPDMAI_QUEUE_OPT_DEST' is contained in 'options'
+ */
+struct dpdmai_rx_queue_cfg {
+	struct dpdmai_dest_cfg dest_cfg;
+	u32 options;
+	u64 user_ctx;
+
+};
+
+/**
+ * struct dpdmai_rx_queue_attr - Structure representing attributes of Rx queues
+ * @user_ctx:  User context value provided in the frame descriptor of each
+ *	 dequeued frame
+ * @dest_cfg: Queue destination configuration
+ * @fqid: Virtual FQID value to be used for dequeue operations
+ */
+struct dpdmai_rx_queue_attr {
+	struct dpdmai_dest_cfg	dest_cfg;
+	u64 user_ctx;
+	u32 fqid;
+};
+
+int dpdmai_open(struct fsl_mc_io *mc_io, u32 cmd_flags,
+		int dpdmai_id, u16 *token);
+int dpdmai_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+int dpdmai_create(struct fsl_mc_io *mc_io, u32 cmd_flags,
+		  const struct dpdmai_cfg *cfg, u16 *token);
+int dpdmai_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+int dpdmai_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+int dpdmai_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+int dpdmai_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
+			  u16 token, struct dpdmai_attr	*attr);
+int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 priority, const struct dpdmai_rx_queue_cfg *cfg);
+int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 priority, struct dpdmai_rx_queue_attr *attr);
+int dpdmai_get_tx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags,
+			u16 token, u8 priority, u32 *fqid);
+
+#endif /* __FSL_DPDMAI_H */
diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c
index b1a7ca91701a..5697c3622699 100644
--- a/drivers/dma/fsl-edma-common.c
+++ b/drivers/dma/fsl-edma-common.c
@@ -109,10 +109,15 @@ void fsl_edma_chan_mux(struct fsl_edma_chan *fsl_chan,
 	u32 ch = fsl_chan->vchan.chan.chan_id;
 	void __iomem *muxaddr;
 	unsigned int chans_per_mux, ch_off;
+	int endian_diff[4] = {3, 1, -1, -3};
 	u32 dmamux_nr = fsl_chan->edma->drvdata->dmamuxs;
 
 	chans_per_mux = fsl_chan->edma->n_chans / dmamux_nr;
 	ch_off = fsl_chan->vchan.chan.chan_id % chans_per_mux;
+
+	if (fsl_chan->edma->drvdata->mux_swap)
+		ch_off += endian_diff[ch_off % 4];
+
 	muxaddr = fsl_chan->edma->muxbase[ch / chans_per_mux];
 	slot = EDMAMUX_CHCFG_SOURCE(slot);
 
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index 5eaa2902ed39..67e422590c9a 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -147,6 +147,7 @@ struct fsl_edma_drvdata {
 	enum edma_version	version;
 	u32			dmamuxs;
 	bool			has_dmaclk;
+	bool			mux_swap;
 	int			(*setup_irq)(struct platform_device *pdev,
 					     struct fsl_edma_engine *fsl_edma);
 };
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index b626c06ac2e0..eff7ebd8cf35 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -233,6 +233,13 @@ static struct fsl_edma_drvdata vf610_data = {
 	.setup_irq = fsl_edma_irq_init,
 };
 
+static struct fsl_edma_drvdata ls1028a_data = {
+	.version = v1,
+	.dmamuxs = DMAMUX_NR,
+	.mux_swap = true,
+	.setup_irq = fsl_edma_irq_init,
+};
+
 static struct fsl_edma_drvdata imx7ulp_data = {
 	.version = v3,
 	.dmamuxs = 1,
@@ -242,6 +249,7 @@ static struct fsl_edma_drvdata imx7ulp_data = {
 
 static const struct of_device_id fsl_edma_dt_ids[] = {
 	{ .compatible = "fsl,vf610-edma", .data = &vf610_data},
+	{ .compatible = "fsl,ls1028a-edma", .data = &ls1028a_data},
 	{ .compatible = "fsl,imx7ulp-edma", .data = &imx7ulp_data},
 	{ /* sentinel */ }
 };
diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c
index 06664fbd2d91..95cc0256b387 100644
--- a/drivers/dma/fsl-qdma.c
+++ b/drivers/dma/fsl-qdma.c
@@ -304,7 +304,7 @@ static void fsl_qdma_free_chan_resources(struct dma_chan *chan)
 
 	vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
 
-	if (!fsl_queue->comp_pool && !fsl_queue->comp_pool)
+	if (!fsl_queue->comp_pool && !fsl_queue->desc_pool)
 		return;
 
 	list_for_each_entry_safe(comp_temp, _comp_temp,
@@ -1155,6 +1155,9 @@ static int fsl_qdma_probe(struct platform_device *pdev)
 		return ret;
 
 	fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0");
+	if (fsl_qdma->irq_base < 0)
+		return fsl_qdma->irq_base;
+
 	fsl_qdma->feature = of_property_read_bool(np, "big-endian");
 	INIT_LIST_HEAD(&fsl_qdma->dma_dev.channels);
 
diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c
new file mode 100644
index 000000000000..ed3619266a48
--- /dev/null
+++ b/drivers/dma/hisi_dma.c
@@ -0,0 +1,611 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2019 HiSilicon Limited. */
+#include <linux/bitfield.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include "virt-dma.h"
+
+#define HISI_DMA_SQ_BASE_L		0x0
+#define HISI_DMA_SQ_BASE_H		0x4
+#define HISI_DMA_SQ_DEPTH		0x8
+#define HISI_DMA_SQ_TAIL_PTR		0xc
+#define HISI_DMA_CQ_BASE_L		0x10
+#define HISI_DMA_CQ_BASE_H		0x14
+#define HISI_DMA_CQ_DEPTH		0x18
+#define HISI_DMA_CQ_HEAD_PTR		0x1c
+#define HISI_DMA_CTRL0			0x20
+#define HISI_DMA_CTRL0_QUEUE_EN_S	0
+#define HISI_DMA_CTRL0_QUEUE_PAUSE_S	4
+#define HISI_DMA_CTRL1			0x24
+#define HISI_DMA_CTRL1_QUEUE_RESET_S	0
+#define HISI_DMA_Q_FSM_STS		0x30
+#define HISI_DMA_FSM_STS_MASK		GENMASK(3, 0)
+#define HISI_DMA_INT_STS		0x40
+#define HISI_DMA_INT_STS_MASK		GENMASK(12, 0)
+#define HISI_DMA_INT_MSK		0x44
+#define HISI_DMA_MODE			0x217c
+#define HISI_DMA_OFFSET			0x100
+
+#define HISI_DMA_MSI_NUM		30
+#define HISI_DMA_CHAN_NUM		30
+#define HISI_DMA_Q_DEPTH_VAL		1024
+
+#define PCI_BAR_2			2
+
+enum hisi_dma_mode {
+	EP = 0,
+	RC,
+};
+
+enum hisi_dma_chan_status {
+	DISABLE = -1,
+	IDLE = 0,
+	RUN,
+	CPL,
+	PAUSE,
+	HALT,
+	ABORT,
+	WAIT,
+	BUFFCLR,
+};
+
+struct hisi_dma_sqe {
+	__le32 dw0;
+#define OPCODE_MASK			GENMASK(3, 0)
+#define OPCODE_SMALL_PACKAGE		0x1
+#define OPCODE_M2M			0x4
+#define LOCAL_IRQ_EN			BIT(8)
+#define ATTR_SRC_MASK			GENMASK(14, 12)
+	__le32 dw1;
+	__le32 dw2;
+#define ATTR_DST_MASK			GENMASK(26, 24)
+	__le32 length;
+	__le64 src_addr;
+	__le64 dst_addr;
+};
+
+struct hisi_dma_cqe {
+	__le32 rsv0;
+	__le32 rsv1;
+	__le16 sq_head;
+	__le16 rsv2;
+	__le16 rsv3;
+	__le16 w0;
+#define STATUS_MASK			GENMASK(15, 1)
+#define STATUS_SUCC			0x0
+#define VALID_BIT			BIT(0)
+};
+
+struct hisi_dma_desc {
+	struct virt_dma_desc vd;
+	struct hisi_dma_sqe sqe;
+};
+
+struct hisi_dma_chan {
+	struct virt_dma_chan vc;
+	struct hisi_dma_dev *hdma_dev;
+	struct hisi_dma_sqe *sq;
+	struct hisi_dma_cqe *cq;
+	dma_addr_t sq_dma;
+	dma_addr_t cq_dma;
+	u32 sq_tail;
+	u32 cq_head;
+	u32 qp_num;
+	enum hisi_dma_chan_status status;
+	struct hisi_dma_desc *desc;
+};
+
+struct hisi_dma_dev {
+	struct pci_dev *pdev;
+	void __iomem *base;
+	struct dma_device dma_dev;
+	u32 chan_num;
+	u32 chan_depth;
+	struct hisi_dma_chan chan[];
+};
+
+static inline struct hisi_dma_chan *to_hisi_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct hisi_dma_chan, vc.chan);
+}
+
+static inline struct hisi_dma_desc *to_hisi_dma_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct hisi_dma_desc, vd);
+}
+
+static inline void hisi_dma_chan_write(void __iomem *base, u32 reg, u32 index,
+				       u32 val)
+{
+	writel_relaxed(val, base + reg + index * HISI_DMA_OFFSET);
+}
+
+static inline void hisi_dma_update_bit(void __iomem *addr, u32 pos, bool val)
+{
+	u32 tmp;
+
+	tmp = readl_relaxed(addr);
+	tmp = val ? tmp | BIT(pos) : tmp & ~BIT(pos);
+	writel_relaxed(tmp, addr);
+}
+
+static void hisi_dma_free_irq_vectors(void *data)
+{
+	pci_free_irq_vectors(data);
+}
+
+static void hisi_dma_pause_dma(struct hisi_dma_dev *hdma_dev, u32 index,
+			       bool pause)
+{
+	void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL0 + index *
+			     HISI_DMA_OFFSET;
+
+	hisi_dma_update_bit(addr, HISI_DMA_CTRL0_QUEUE_PAUSE_S, pause);
+}
+
+static void hisi_dma_enable_dma(struct hisi_dma_dev *hdma_dev, u32 index,
+				bool enable)
+{
+	void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL0 + index *
+			     HISI_DMA_OFFSET;
+
+	hisi_dma_update_bit(addr, HISI_DMA_CTRL0_QUEUE_EN_S, enable);
+}
+
+static void hisi_dma_mask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_INT_MSK, qp_index,
+			    HISI_DMA_INT_STS_MASK);
+}
+
+static void hisi_dma_unmask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+	void __iomem *base = hdma_dev->base;
+
+	hisi_dma_chan_write(base, HISI_DMA_INT_STS, qp_index,
+			    HISI_DMA_INT_STS_MASK);
+	hisi_dma_chan_write(base, HISI_DMA_INT_MSK, qp_index, 0);
+}
+
+static void hisi_dma_do_reset(struct hisi_dma_dev *hdma_dev, u32 index)
+{
+	void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL1 + index *
+			     HISI_DMA_OFFSET;
+
+	hisi_dma_update_bit(addr, HISI_DMA_CTRL1_QUEUE_RESET_S, 1);
+}
+
+static void hisi_dma_reset_qp_point(struct hisi_dma_dev *hdma_dev, u32 index)
+{
+	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_SQ_TAIL_PTR, index, 0);
+	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_CQ_HEAD_PTR, index, 0);
+}
+
+static void hisi_dma_reset_hw_chan(struct hisi_dma_chan *chan)
+{
+	struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+	u32 index = chan->qp_num, tmp;
+	int ret;
+
+	hisi_dma_pause_dma(hdma_dev, index, true);
+	hisi_dma_enable_dma(hdma_dev, index, false);
+	hisi_dma_mask_irq(hdma_dev, index);
+
+	ret = readl_relaxed_poll_timeout(hdma_dev->base +
+		HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET, tmp,
+		FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) != RUN, 10, 1000);
+	if (ret) {
+		dev_err(&hdma_dev->pdev->dev, "disable channel timeout!\n");
+		WARN_ON(1);
+	}
+
+	hisi_dma_do_reset(hdma_dev, index);
+	hisi_dma_reset_qp_point(hdma_dev, index);
+	hisi_dma_pause_dma(hdma_dev, index, false);
+	hisi_dma_enable_dma(hdma_dev, index, true);
+	hisi_dma_unmask_irq(hdma_dev, index);
+
+	ret = readl_relaxed_poll_timeout(hdma_dev->base +
+		HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET, tmp,
+		FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) == IDLE, 10, 1000);
+	if (ret) {
+		dev_err(&hdma_dev->pdev->dev, "reset channel timeout!\n");
+		WARN_ON(1);
+	}
+}
+
+static void hisi_dma_free_chan_resources(struct dma_chan *c)
+{
+	struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+	struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+
+	hisi_dma_reset_hw_chan(chan);
+	vchan_free_chan_resources(&chan->vc);
+
+	memset(chan->sq, 0, sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth);
+	memset(chan->cq, 0, sizeof(struct hisi_dma_cqe) * hdma_dev->chan_depth);
+	chan->sq_tail = 0;
+	chan->cq_head = 0;
+	chan->status = DISABLE;
+}
+
+static void hisi_dma_desc_free(struct virt_dma_desc *vd)
+{
+	kfree(to_hisi_dma_desc(vd));
+}
+
+static struct dma_async_tx_descriptor *
+hisi_dma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dst, dma_addr_t src,
+			 size_t len, unsigned long flags)
+{
+	struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+	struct hisi_dma_desc *desc;
+
+	desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->sqe.length = cpu_to_le32(len);
+	desc->sqe.src_addr = cpu_to_le64(src);
+	desc->sqe.dst_addr = cpu_to_le64(dst);
+
+	return vchan_tx_prep(&chan->vc, &desc->vd, flags);
+}
+
+static enum dma_status
+hisi_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+		   struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(c, cookie, txstate);
+}
+
+static void hisi_dma_start_transfer(struct hisi_dma_chan *chan)
+{
+	struct hisi_dma_sqe *sqe = chan->sq + chan->sq_tail;
+	struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+	struct hisi_dma_desc *desc;
+	struct virt_dma_desc *vd;
+
+	vd = vchan_next_desc(&chan->vc);
+	if (!vd) {
+		dev_err(&hdma_dev->pdev->dev, "no issued task!\n");
+		chan->desc = NULL;
+		return;
+	}
+	list_del(&vd->node);
+	desc = to_hisi_dma_desc(vd);
+	chan->desc = desc;
+
+	memcpy(sqe, &desc->sqe, sizeof(struct hisi_dma_sqe));
+
+	/* update other field in sqe */
+	sqe->dw0 = cpu_to_le32(FIELD_PREP(OPCODE_MASK, OPCODE_M2M));
+	sqe->dw0 |= cpu_to_le32(LOCAL_IRQ_EN);
+
+	/* make sure data has been updated in sqe */
+	wmb();
+
+	/* update sq tail, point to new sqe position */
+	chan->sq_tail = (chan->sq_tail + 1) % hdma_dev->chan_depth;
+
+	/* update sq_tail to trigger a new task */
+	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_SQ_TAIL_PTR, chan->qp_num,
+			    chan->sq_tail);
+}
+
+static void hisi_dma_issue_pending(struct dma_chan *c)
+{
+	struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	if (vchan_issue_pending(&chan->vc))
+		hisi_dma_start_transfer(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static int hisi_dma_terminate_all(struct dma_chan *c)
+{
+	struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	hisi_dma_pause_dma(chan->hdma_dev, chan->qp_num, true);
+	if (chan->desc) {
+		vchan_terminate_vdesc(&chan->desc->vd);
+		chan->desc = NULL;
+	}
+
+	vchan_get_all_descriptors(&chan->vc, &head);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	vchan_dma_desc_free_list(&chan->vc, &head);
+	hisi_dma_pause_dma(chan->hdma_dev, chan->qp_num, false);
+
+	return 0;
+}
+
+static void hisi_dma_synchronize(struct dma_chan *c)
+{
+	struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+
+	vchan_synchronize(&chan->vc);
+}
+
+static int hisi_dma_alloc_qps_mem(struct hisi_dma_dev *hdma_dev)
+{
+	size_t sq_size = sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth;
+	size_t cq_size = sizeof(struct hisi_dma_cqe) * hdma_dev->chan_depth;
+	struct device *dev = &hdma_dev->pdev->dev;
+	struct hisi_dma_chan *chan;
+	int i;
+
+	for (i = 0; i < hdma_dev->chan_num; i++) {
+		chan = &hdma_dev->chan[i];
+		chan->sq = dmam_alloc_coherent(dev, sq_size, &chan->sq_dma,
+					       GFP_KERNEL);
+		if (!chan->sq)
+			return -ENOMEM;
+
+		chan->cq = dmam_alloc_coherent(dev, cq_size, &chan->cq_dma,
+					       GFP_KERNEL);
+		if (!chan->cq)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void hisi_dma_init_hw_qp(struct hisi_dma_dev *hdma_dev, u32 index)
+{
+	struct hisi_dma_chan *chan = &hdma_dev->chan[index];
+	u32 hw_depth = hdma_dev->chan_depth - 1;
+	void __iomem *base = hdma_dev->base;
+
+	/* set sq, cq base */
+	hisi_dma_chan_write(base, HISI_DMA_SQ_BASE_L, index,
+			    lower_32_bits(chan->sq_dma));
+	hisi_dma_chan_write(base, HISI_DMA_SQ_BASE_H, index,
+			    upper_32_bits(chan->sq_dma));
+	hisi_dma_chan_write(base, HISI_DMA_CQ_BASE_L, index,
+			    lower_32_bits(chan->cq_dma));
+	hisi_dma_chan_write(base, HISI_DMA_CQ_BASE_H, index,
+			    upper_32_bits(chan->cq_dma));
+
+	/* set sq, cq depth */
+	hisi_dma_chan_write(base, HISI_DMA_SQ_DEPTH, index, hw_depth);
+	hisi_dma_chan_write(base, HISI_DMA_CQ_DEPTH, index, hw_depth);
+
+	/* init sq tail and cq head */
+	hisi_dma_chan_write(base, HISI_DMA_SQ_TAIL_PTR, index, 0);
+	hisi_dma_chan_write(base, HISI_DMA_CQ_HEAD_PTR, index, 0);
+}
+
+static void hisi_dma_enable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+	hisi_dma_init_hw_qp(hdma_dev, qp_index);
+	hisi_dma_unmask_irq(hdma_dev, qp_index);
+	hisi_dma_enable_dma(hdma_dev, qp_index, true);
+}
+
+static void hisi_dma_disable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+	hisi_dma_reset_hw_chan(&hdma_dev->chan[qp_index]);
+}
+
+static void hisi_dma_enable_qps(struct hisi_dma_dev *hdma_dev)
+{
+	int i;
+
+	for (i = 0; i < hdma_dev->chan_num; i++) {
+		hdma_dev->chan[i].qp_num = i;
+		hdma_dev->chan[i].hdma_dev = hdma_dev;
+		hdma_dev->chan[i].vc.desc_free = hisi_dma_desc_free;
+		vchan_init(&hdma_dev->chan[i].vc, &hdma_dev->dma_dev);
+		hisi_dma_enable_qp(hdma_dev, i);
+	}
+}
+
+static void hisi_dma_disable_qps(struct hisi_dma_dev *hdma_dev)
+{
+	int i;
+
+	for (i = 0; i < hdma_dev->chan_num; i++) {
+		hisi_dma_disable_qp(hdma_dev, i);
+		tasklet_kill(&hdma_dev->chan[i].vc.task);
+	}
+}
+
+static irqreturn_t hisi_dma_irq(int irq, void *data)
+{
+	struct hisi_dma_chan *chan = data;
+	struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+	struct hisi_dma_desc *desc;
+	struct hisi_dma_cqe *cqe;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	desc = chan->desc;
+	cqe = chan->cq + chan->cq_head;
+	if (desc) {
+		if (FIELD_GET(STATUS_MASK, cqe->w0) == STATUS_SUCC) {
+			chan->cq_head = (chan->cq_head + 1) %
+					hdma_dev->chan_depth;
+			hisi_dma_chan_write(hdma_dev->base,
+					    HISI_DMA_CQ_HEAD_PTR, chan->qp_num,
+					    chan->cq_head);
+			vchan_cookie_complete(&desc->vd);
+		} else {
+			dev_err(&hdma_dev->pdev->dev, "task error!\n");
+		}
+
+		chan->desc = NULL;
+	}
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static int hisi_dma_request_qps_irq(struct hisi_dma_dev *hdma_dev)
+{
+	struct pci_dev *pdev = hdma_dev->pdev;
+	int i, ret;
+
+	for (i = 0; i < hdma_dev->chan_num; i++) {
+		ret = devm_request_irq(&pdev->dev, pci_irq_vector(pdev, i),
+				       hisi_dma_irq, IRQF_SHARED, "hisi_dma",
+				       &hdma_dev->chan[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* This function enables all hw channels in a device */
+static int hisi_dma_enable_hw_channels(struct hisi_dma_dev *hdma_dev)
+{
+	int ret;
+
+	ret = hisi_dma_alloc_qps_mem(hdma_dev);
+	if (ret) {
+		dev_err(&hdma_dev->pdev->dev, "fail to allocate qp memory!\n");
+		return ret;
+	}
+
+	ret = hisi_dma_request_qps_irq(hdma_dev);
+	if (ret) {
+		dev_err(&hdma_dev->pdev->dev, "fail to request qp irq!\n");
+		return ret;
+	}
+
+	hisi_dma_enable_qps(hdma_dev);
+
+	return 0;
+}
+
+static void hisi_dma_disable_hw_channels(void *data)
+{
+	hisi_dma_disable_qps(data);
+}
+
+static void hisi_dma_set_mode(struct hisi_dma_dev *hdma_dev,
+			      enum hisi_dma_mode mode)
+{
+	writel_relaxed(mode == RC ? 1 : 0, hdma_dev->base + HISI_DMA_MODE);
+}
+
+static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct device *dev = &pdev->dev;
+	struct hisi_dma_dev *hdma_dev;
+	struct dma_device *dma_dev;
+	size_t dev_size;
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		dev_err(dev, "failed to enable device mem!\n");
+		return ret;
+	}
+
+	ret = pcim_iomap_regions(pdev, 1 << PCI_BAR_2, pci_name(pdev));
+	if (ret) {
+		dev_err(dev, "failed to remap I/O region!\n");
+		return ret;
+	}
+
+	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (ret)
+		return ret;
+
+	ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (ret)
+		return ret;
+
+	dev_size = sizeof(struct hisi_dma_chan) * HISI_DMA_CHAN_NUM +
+		   sizeof(*hdma_dev);
+	hdma_dev = devm_kzalloc(dev, dev_size, GFP_KERNEL);
+	if (!hdma_dev)
+		return -EINVAL;
+
+	hdma_dev->base = pcim_iomap_table(pdev)[PCI_BAR_2];
+	hdma_dev->pdev = pdev;
+	hdma_dev->chan_num = HISI_DMA_CHAN_NUM;
+	hdma_dev->chan_depth = HISI_DMA_Q_DEPTH_VAL;
+
+	pci_set_drvdata(pdev, hdma_dev);
+	pci_set_master(pdev);
+
+	ret = pci_alloc_irq_vectors(pdev, HISI_DMA_MSI_NUM, HISI_DMA_MSI_NUM,
+				    PCI_IRQ_MSI);
+	if (ret < 0) {
+		dev_err(dev, "Failed to allocate MSI vectors!\n");
+		return ret;
+	}
+
+	ret = devm_add_action_or_reset(dev, hisi_dma_free_irq_vectors, pdev);
+	if (ret)
+		return ret;
+
+	dma_dev = &hdma_dev->dma_dev;
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_dev->device_free_chan_resources = hisi_dma_free_chan_resources;
+	dma_dev->device_prep_dma_memcpy = hisi_dma_prep_dma_memcpy;
+	dma_dev->device_tx_status = hisi_dma_tx_status;
+	dma_dev->device_issue_pending = hisi_dma_issue_pending;
+	dma_dev->device_terminate_all = hisi_dma_terminate_all;
+	dma_dev->device_synchronize = hisi_dma_synchronize;
+	dma_dev->directions = BIT(DMA_MEM_TO_MEM);
+	dma_dev->dev = dev;
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	hisi_dma_set_mode(hdma_dev, RC);
+
+	ret = hisi_dma_enable_hw_channels(hdma_dev);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable hw channel!\n");
+		return ret;
+	}
+
+	ret = devm_add_action_or_reset(dev, hisi_dma_disable_hw_channels,
+				       hdma_dev);
+	if (ret)
+		return ret;
+
+	ret = dmaenginem_async_device_register(dma_dev);
+	if (ret < 0)
+		dev_err(dev, "failed to register device!\n");
+
+	return ret;
+}
+
+static const struct pci_device_id hisi_dma_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa122) },
+	{ 0, }
+};
+
+static struct pci_driver hisi_dma_pci_driver = {
+	.name		= "hisi_dma",
+	.id_table	= hisi_dma_pci_tbl,
+	.probe		= hisi_dma_probe,
+};
+
+module_pci_driver(hisi_dma_pci_driver);
+
+MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
+MODULE_AUTHOR("Zhenfa Qiu <qiuzhenfa@hisilicon.com>");
+MODULE_DESCRIPTION("HiSilicon Kunpeng DMA controller driver");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(pci, hisi_dma_pci_tbl);
diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile
new file mode 100644
index 000000000000..8978b898d777
--- /dev/null
+++ b/drivers/dma/idxd/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INTEL_IDXD) += idxd.o
+idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
new file mode 100644
index 000000000000..1d7347825b95
--- /dev/null
+++ b/drivers/dma/idxd/cdev.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/sched/task.h>
+#include <linux/intel-svm.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <uapi/linux/idxd.h>
+#include "registers.h"
+#include "idxd.h"
+
+struct idxd_cdev_context {
+	const char *name;
+	dev_t devt;
+	struct ida minor_ida;
+};
+
+/*
+ * ictx is an array based off of accelerator types. enum idxd_type
+ * is used as index
+ */
+static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = {
+	{ .name = "dsa" },
+};
+
+struct idxd_user_context {
+	struct idxd_wq *wq;
+	struct task_struct *task;
+	unsigned int flags;
+};
+
+enum idxd_cdev_cleanup {
+	CDEV_NORMAL = 0,
+	CDEV_FAILED,
+};
+
+static void idxd_cdev_dev_release(struct device *dev)
+{
+	dev_dbg(dev, "releasing cdev device\n");
+	kfree(dev);
+}
+
+static struct device_type idxd_cdev_device_type = {
+	.name = "idxd_cdev",
+	.release = idxd_cdev_dev_release,
+};
+
+static inline struct idxd_cdev *inode_idxd_cdev(struct inode *inode)
+{
+	struct cdev *cdev = inode->i_cdev;
+
+	return container_of(cdev, struct idxd_cdev, cdev);
+}
+
+static inline struct idxd_wq *idxd_cdev_wq(struct idxd_cdev *idxd_cdev)
+{
+	return container_of(idxd_cdev, struct idxd_wq, idxd_cdev);
+}
+
+static inline struct idxd_wq *inode_wq(struct inode *inode)
+{
+	return idxd_cdev_wq(inode_idxd_cdev(inode));
+}
+
+static int idxd_cdev_open(struct inode *inode, struct file *filp)
+{
+	struct idxd_user_context *ctx;
+	struct idxd_device *idxd;
+	struct idxd_wq *wq;
+	struct device *dev;
+	struct idxd_cdev *idxd_cdev;
+
+	wq = inode_wq(inode);
+	idxd = wq->idxd;
+	dev = &idxd->pdev->dev;
+	idxd_cdev = &wq->idxd_cdev;
+
+	dev_dbg(dev, "%s called\n", __func__);
+
+	if (idxd_wq_refcount(wq) > 1 && wq_dedicated(wq))
+		return -EBUSY;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->wq = wq;
+	filp->private_data = ctx;
+	idxd_wq_get(wq);
+	return 0;
+}
+
+static int idxd_cdev_release(struct inode *node, struct file *filep)
+{
+	struct idxd_user_context *ctx = filep->private_data;
+	struct idxd_wq *wq = ctx->wq;
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+
+	dev_dbg(dev, "%s called\n", __func__);
+	filep->private_data = NULL;
+
+	kfree(ctx);
+	idxd_wq_put(wq);
+	return 0;
+}
+
+static int check_vma(struct idxd_wq *wq, struct vm_area_struct *vma,
+		     const char *func)
+{
+	struct device *dev = &wq->idxd->pdev->dev;
+
+	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
+		dev_info_ratelimited(dev,
+				     "%s: %s: mapping too large: %lu\n",
+				     current->comm, func,
+				     vma->vm_end - vma->vm_start);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct idxd_user_context *ctx = filp->private_data;
+	struct idxd_wq *wq = ctx->wq;
+	struct idxd_device *idxd = wq->idxd;
+	struct pci_dev *pdev = idxd->pdev;
+	phys_addr_t base = pci_resource_start(pdev, IDXD_WQ_BAR);
+	unsigned long pfn;
+	int rc;
+
+	dev_dbg(&pdev->dev, "%s called\n", __func__);
+	rc = check_vma(wq, vma, __func__);
+
+	vma->vm_flags |= VM_DONTCOPY;
+	pfn = (base + idxd_get_wq_portal_full_offset(wq->id,
+				IDXD_PORTAL_LIMITED)) >> PAGE_SHIFT;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_private_data = ctx;
+
+	return io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
+			vma->vm_page_prot);
+}
+
+static __poll_t idxd_cdev_poll(struct file *filp,
+			       struct poll_table_struct *wait)
+{
+	struct idxd_user_context *ctx = filp->private_data;
+	struct idxd_wq *wq = ctx->wq;
+	struct idxd_device *idxd = wq->idxd;
+	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
+	unsigned long flags;
+	__poll_t out = 0;
+
+	poll_wait(filp, &idxd_cdev->err_queue, wait);
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	if (idxd->sw_err.valid)
+		out = EPOLLIN | EPOLLRDNORM;
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+
+	return out;
+}
+
+static const struct file_operations idxd_cdev_fops = {
+	.owner = THIS_MODULE,
+	.open = idxd_cdev_open,
+	.release = idxd_cdev_release,
+	.mmap = idxd_cdev_mmap,
+	.poll = idxd_cdev_poll,
+};
+
+int idxd_cdev_get_major(struct idxd_device *idxd)
+{
+	return MAJOR(ictx[idxd->type].devt);
+}
+
+static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
+	struct idxd_cdev_context *cdev_ctx;
+	struct device *dev;
+	int minor, rc;
+
+	idxd_cdev->dev = kzalloc(sizeof(*idxd_cdev->dev), GFP_KERNEL);
+	if (!idxd_cdev->dev)
+		return -ENOMEM;
+
+	dev = idxd_cdev->dev;
+	dev->parent = &idxd->pdev->dev;
+	dev_set_name(dev, "%s/wq%u.%u", idxd_get_dev_name(idxd),
+		     idxd->id, wq->id);
+	dev->bus = idxd_get_bus_type(idxd);
+
+	cdev_ctx = &ictx[wq->idxd->type];
+	minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
+	if (minor < 0) {
+		rc = minor;
+		goto ida_err;
+	}
+
+	dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
+	dev->type = &idxd_cdev_device_type;
+	rc = device_register(dev);
+	if (rc < 0) {
+		dev_err(&idxd->pdev->dev, "device register failed\n");
+		put_device(dev);
+		goto dev_reg_err;
+	}
+	idxd_cdev->minor = minor;
+
+	return 0;
+
+ dev_reg_err:
+	ida_simple_remove(&cdev_ctx->minor_ida, MINOR(dev->devt));
+ ida_err:
+	kfree(dev);
+	idxd_cdev->dev = NULL;
+	return rc;
+}
+
+static void idxd_wq_cdev_cleanup(struct idxd_wq *wq,
+				 enum idxd_cdev_cleanup cdev_state)
+{
+	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
+	struct idxd_cdev_context *cdev_ctx;
+
+	cdev_ctx = &ictx[wq->idxd->type];
+	if (cdev_state == CDEV_NORMAL)
+		cdev_del(&idxd_cdev->cdev);
+	device_unregister(idxd_cdev->dev);
+	/*
+	 * The device_type->release() will be called on the device and free
+	 * the allocated struct device. We can just forget it.
+	 */
+	ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor);
+	idxd_cdev->dev = NULL;
+	idxd_cdev->minor = -1;
+}
+
+int idxd_wq_add_cdev(struct idxd_wq *wq)
+{
+	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
+	struct cdev *cdev = &idxd_cdev->cdev;
+	struct device *dev;
+	int rc;
+
+	rc = idxd_wq_cdev_dev_setup(wq);
+	if (rc < 0)
+		return rc;
+
+	dev = idxd_cdev->dev;
+	cdev_init(cdev, &idxd_cdev_fops);
+	cdev_set_parent(cdev, &dev->kobj);
+	rc = cdev_add(cdev, dev->devt, 1);
+	if (rc) {
+		dev_dbg(&wq->idxd->pdev->dev, "cdev_add failed: %d\n", rc);
+		idxd_wq_cdev_cleanup(wq, CDEV_FAILED);
+		return rc;
+	}
+
+	init_waitqueue_head(&idxd_cdev->err_queue);
+	return 0;
+}
+
+void idxd_wq_del_cdev(struct idxd_wq *wq)
+{
+	idxd_wq_cdev_cleanup(wq, CDEV_NORMAL);
+}
+
+int idxd_cdev_register(void)
+{
+	int rc, i;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++) {
+		ida_init(&ictx[i].minor_ida);
+		rc = alloc_chrdev_region(&ictx[i].devt, 0, MINORMASK,
+					 ictx[i].name);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+void idxd_cdev_remove(void)
+{
+	int i;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++) {
+		unregister_chrdev_region(ictx[i].devt, MINORMASK);
+		ida_destroy(&ictx[i].minor_ida);
+	}
+}
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
new file mode 100644
index 000000000000..ada69e722f84
--- /dev/null
+++ b/drivers/dma/idxd/device.c
@@ -0,0 +1,693 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmaengine.h>
+#include <uapi/linux/idxd.h>
+#include "../dmaengine.h"
+#include "idxd.h"
+#include "registers.h"
+
+static int idxd_cmd_wait(struct idxd_device *idxd, u32 *status, int timeout);
+static int idxd_cmd_send(struct idxd_device *idxd, int cmd_code, u32 operand);
+
+/* Interrupt control bits */
+int idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
+{
+	struct pci_dev *pdev = idxd->pdev;
+	int msixcnt = pci_msix_vec_count(pdev);
+	union msix_perm perm;
+	u32 offset;
+
+	if (vec_id < 0 || vec_id >= msixcnt)
+		return -EINVAL;
+
+	offset = idxd->msix_perm_offset + vec_id * 8;
+	perm.bits = ioread32(idxd->reg_base + offset);
+	perm.ignore = 1;
+	iowrite32(perm.bits, idxd->reg_base + offset);
+
+	return 0;
+}
+
+void idxd_mask_msix_vectors(struct idxd_device *idxd)
+{
+	struct pci_dev *pdev = idxd->pdev;
+	int msixcnt = pci_msix_vec_count(pdev);
+	int i, rc;
+
+	for (i = 0; i < msixcnt; i++) {
+		rc = idxd_mask_msix_vector(idxd, i);
+		if (rc < 0)
+			dev_warn(&pdev->dev,
+				 "Failed disabling msix vec %d\n", i);
+	}
+}
+
+int idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id)
+{
+	struct pci_dev *pdev = idxd->pdev;
+	int msixcnt = pci_msix_vec_count(pdev);
+	union msix_perm perm;
+	u32 offset;
+
+	if (vec_id < 0 || vec_id >= msixcnt)
+		return -EINVAL;
+
+	offset = idxd->msix_perm_offset + vec_id * 8;
+	perm.bits = ioread32(idxd->reg_base + offset);
+	perm.ignore = 0;
+	iowrite32(perm.bits, idxd->reg_base + offset);
+
+	return 0;
+}
+
+void idxd_unmask_error_interrupts(struct idxd_device *idxd)
+{
+	union genctrl_reg genctrl;
+
+	genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
+	genctrl.softerr_int_en = 1;
+	iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
+}
+
+void idxd_mask_error_interrupts(struct idxd_device *idxd)
+{
+	union genctrl_reg genctrl;
+
+	genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
+	genctrl.softerr_int_en = 0;
+	iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
+}
+
+static void free_hw_descs(struct idxd_wq *wq)
+{
+	int i;
+
+	for (i = 0; i < wq->num_descs; i++)
+		kfree(wq->hw_descs[i]);
+
+	kfree(wq->hw_descs);
+}
+
+static int alloc_hw_descs(struct idxd_wq *wq, int num)
+{
+	struct device *dev = &wq->idxd->pdev->dev;
+	int i;
+	int node = dev_to_node(dev);
+
+	wq->hw_descs = kcalloc_node(num, sizeof(struct dsa_hw_desc *),
+				    GFP_KERNEL, node);
+	if (!wq->hw_descs)
+		return -ENOMEM;
+
+	for (i = 0; i < num; i++) {
+		wq->hw_descs[i] = kzalloc_node(sizeof(*wq->hw_descs[i]),
+					       GFP_KERNEL, node);
+		if (!wq->hw_descs[i]) {
+			free_hw_descs(wq);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void free_descs(struct idxd_wq *wq)
+{
+	int i;
+
+	for (i = 0; i < wq->num_descs; i++)
+		kfree(wq->descs[i]);
+
+	kfree(wq->descs);
+}
+
+static int alloc_descs(struct idxd_wq *wq, int num)
+{
+	struct device *dev = &wq->idxd->pdev->dev;
+	int i;
+	int node = dev_to_node(dev);
+
+	wq->descs = kcalloc_node(num, sizeof(struct idxd_desc *),
+				 GFP_KERNEL, node);
+	if (!wq->descs)
+		return -ENOMEM;
+
+	for (i = 0; i < num; i++) {
+		wq->descs[i] = kzalloc_node(sizeof(*wq->descs[i]),
+					    GFP_KERNEL, node);
+		if (!wq->descs[i]) {
+			free_descs(wq);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+/* WQ control bits */
+int idxd_wq_alloc_resources(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct idxd_group *group = wq->group;
+	struct device *dev = &idxd->pdev->dev;
+	int rc, num_descs, i;
+
+	if (wq->type != IDXD_WQT_KERNEL)
+		return 0;
+
+	num_descs = wq->size +
+		idxd->hw.gen_cap.max_descs_per_engine * group->num_engines;
+	wq->num_descs = num_descs;
+
+	rc = alloc_hw_descs(wq, num_descs);
+	if (rc < 0)
+		return rc;
+
+	wq->compls_size = num_descs * sizeof(struct dsa_completion_record);
+	wq->compls = dma_alloc_coherent(dev, wq->compls_size,
+					&wq->compls_addr, GFP_KERNEL);
+	if (!wq->compls) {
+		rc = -ENOMEM;
+		goto fail_alloc_compls;
+	}
+
+	rc = alloc_descs(wq, num_descs);
+	if (rc < 0)
+		goto fail_alloc_descs;
+
+	rc = sbitmap_init_node(&wq->sbmap, num_descs, -1, GFP_KERNEL,
+			       dev_to_node(dev));
+	if (rc < 0)
+		goto fail_sbitmap_init;
+
+	for (i = 0; i < num_descs; i++) {
+		struct idxd_desc *desc = wq->descs[i];
+
+		desc->hw = wq->hw_descs[i];
+		desc->completion = &wq->compls[i];
+		desc->compl_dma  = wq->compls_addr +
+			sizeof(struct dsa_completion_record) * i;
+		desc->id = i;
+		desc->wq = wq;
+
+		dma_async_tx_descriptor_init(&desc->txd, &wq->dma_chan);
+		desc->txd.tx_submit = idxd_dma_tx_submit;
+	}
+
+	return 0;
+
+ fail_sbitmap_init:
+	free_descs(wq);
+ fail_alloc_descs:
+	dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
+ fail_alloc_compls:
+	free_hw_descs(wq);
+	return rc;
+}
+
+void idxd_wq_free_resources(struct idxd_wq *wq)
+{
+	struct device *dev = &wq->idxd->pdev->dev;
+
+	if (wq->type != IDXD_WQT_KERNEL)
+		return;
+
+	free_hw_descs(wq);
+	free_descs(wq);
+	dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
+	sbitmap_free(&wq->sbmap);
+}
+
+int idxd_wq_enable(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	u32 status;
+	int rc;
+
+	lockdep_assert_held(&idxd->dev_lock);
+
+	if (wq->state == IDXD_WQ_ENABLED) {
+		dev_dbg(dev, "WQ %d already enabled\n", wq->id);
+		return -ENXIO;
+	}
+
+	rc = idxd_cmd_send(idxd, IDXD_CMD_ENABLE_WQ, wq->id);
+	if (rc < 0)
+		return rc;
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	if (status != IDXD_CMDSTS_SUCCESS &&
+	    status != IDXD_CMDSTS_ERR_WQ_ENABLED) {
+		dev_dbg(dev, "WQ enable failed: %#x\n", status);
+		return -ENXIO;
+	}
+
+	wq->state = IDXD_WQ_ENABLED;
+	dev_dbg(dev, "WQ %d enabled\n", wq->id);
+	return 0;
+}
+
+int idxd_wq_disable(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	u32 status, operand;
+	int rc;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	dev_dbg(dev, "Disabling WQ %d\n", wq->id);
+
+	if (wq->state != IDXD_WQ_ENABLED) {
+		dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state);
+		return 0;
+	}
+
+	operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
+	rc = idxd_cmd_send(idxd, IDXD_CMD_DISABLE_WQ, operand);
+	if (rc < 0)
+		return rc;
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	if (status != IDXD_CMDSTS_SUCCESS) {
+		dev_dbg(dev, "WQ disable failed: %#x\n", status);
+		return -ENXIO;
+	}
+
+	wq->state = IDXD_WQ_DISABLED;
+	dev_dbg(dev, "WQ %d disabled\n", wq->id);
+	return 0;
+}
+
+int idxd_wq_map_portal(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct pci_dev *pdev = idxd->pdev;
+	struct device *dev = &pdev->dev;
+	resource_size_t start;
+
+	start = pci_resource_start(pdev, IDXD_WQ_BAR);
+	start = start + wq->id * IDXD_PORTAL_SIZE;
+
+	wq->dportal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE);
+	if (!wq->dportal)
+		return -ENOMEM;
+	dev_dbg(dev, "wq %d portal mapped at %p\n", wq->id, wq->dportal);
+
+	return 0;
+}
+
+void idxd_wq_unmap_portal(struct idxd_wq *wq)
+{
+	struct device *dev = &wq->idxd->pdev->dev;
+
+	devm_iounmap(dev, wq->dportal);
+}
+
+/* Device control bits */
+static inline bool idxd_is_enabled(struct idxd_device *idxd)
+{
+	union gensts_reg gensts;
+
+	gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
+
+	if (gensts.state == IDXD_DEVICE_STATE_ENABLED)
+		return true;
+	return false;
+}
+
+static int idxd_cmd_wait(struct idxd_device *idxd, u32 *status, int timeout)
+{
+	u32 sts, to = timeout;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	sts = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+	while (sts & IDXD_CMDSTS_ACTIVE && --to) {
+		cpu_relax();
+		sts = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+	}
+
+	if (to == 0 && sts & IDXD_CMDSTS_ACTIVE) {
+		dev_warn(&idxd->pdev->dev, "%s timed out!\n", __func__);
+		*status = 0;
+		return -EBUSY;
+	}
+
+	*status = sts;
+	return 0;
+}
+
+static int idxd_cmd_send(struct idxd_device *idxd, int cmd_code, u32 operand)
+{
+	union idxd_command_reg cmd;
+	int rc;
+	u32 status;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.cmd = cmd_code;
+	cmd.operand = operand;
+	dev_dbg(&idxd->pdev->dev, "%s: sending cmd: %#x op: %#x\n",
+		__func__, cmd_code, operand);
+	iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
+
+	return 0;
+}
+
+int idxd_device_enable(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int rc;
+	u32 status;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	if (idxd_is_enabled(idxd)) {
+		dev_dbg(dev, "Device already enabled\n");
+		return -ENXIO;
+	}
+
+	rc = idxd_cmd_send(idxd, IDXD_CMD_ENABLE_DEVICE, 0);
+	if (rc < 0)
+		return rc;
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	/* If the command is successful or if the device was enabled */
+	if (status != IDXD_CMDSTS_SUCCESS &&
+	    status != IDXD_CMDSTS_ERR_DEV_ENABLED) {
+		dev_dbg(dev, "%s: err_code: %#x\n", __func__, status);
+		return -ENXIO;
+	}
+
+	idxd->state = IDXD_DEV_ENABLED;
+	return 0;
+}
+
+int idxd_device_disable(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int rc;
+	u32 status;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	if (!idxd_is_enabled(idxd)) {
+		dev_dbg(dev, "Device is not enabled\n");
+		return 0;
+	}
+
+	rc = idxd_cmd_send(idxd, IDXD_CMD_DISABLE_DEVICE, 0);
+	if (rc < 0)
+		return rc;
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	/* If the command is successful or if the device was disabled */
+	if (status != IDXD_CMDSTS_SUCCESS &&
+	    !(status & IDXD_CMDSTS_ERR_DIS_DEV_EN)) {
+		dev_dbg(dev, "%s: err_code: %#x\n", __func__, status);
+		rc = -ENXIO;
+		return rc;
+	}
+
+	idxd->state = IDXD_DEV_CONF_READY;
+	return 0;
+}
+
+int __idxd_device_reset(struct idxd_device *idxd)
+{
+	u32 status;
+	int rc;
+
+	rc = idxd_cmd_send(idxd, IDXD_CMD_RESET_DEVICE, 0);
+	if (rc < 0)
+		return rc;
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
+int idxd_device_reset(struct idxd_device *idxd)
+{
+	unsigned long flags;
+	int rc;
+
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	rc = __idxd_device_reset(idxd);
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	return rc;
+}
+
+/* Device configuration bits */
+static void idxd_group_config_write(struct idxd_group *group)
+{
+	struct idxd_device *idxd = group->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	int i;
+	u32 grpcfg_offset;
+
+	dev_dbg(dev, "Writing group %d cfg registers\n", group->id);
+
+	/* setup GRPWQCFG */
+	for (i = 0; i < 4; i++) {
+		grpcfg_offset = idxd->grpcfg_offset +
+			group->id * 64 + i * sizeof(u64);
+		iowrite64(group->grpcfg.wqs[i],
+			  idxd->reg_base + grpcfg_offset);
+		dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n",
+			group->id, i, grpcfg_offset,
+			ioread64(idxd->reg_base + grpcfg_offset));
+	}
+
+	/* setup GRPENGCFG */
+	grpcfg_offset = idxd->grpcfg_offset + group->id * 64 + 32;
+	iowrite64(group->grpcfg.engines, idxd->reg_base + grpcfg_offset);
+	dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id,
+		grpcfg_offset, ioread64(idxd->reg_base + grpcfg_offset));
+
+	/* setup GRPFLAGS */
+	grpcfg_offset = idxd->grpcfg_offset + group->id * 64 + 40;
+	iowrite32(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset);
+	dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n",
+		group->id, grpcfg_offset,
+		ioread32(idxd->reg_base + grpcfg_offset));
+}
+
+static int idxd_groups_config_write(struct idxd_device *idxd)
+
+{
+	union gencfg_reg reg;
+	int i;
+	struct device *dev = &idxd->pdev->dev;
+
+	/* Setup bandwidth token limit */
+	if (idxd->token_limit) {
+		reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+		reg.token_limit = idxd->token_limit;
+		iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
+	}
+
+	dev_dbg(dev, "GENCFG(%#x): %#x\n", IDXD_GENCFG_OFFSET,
+		ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET));
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		struct idxd_group *group = &idxd->groups[i];
+
+		idxd_group_config_write(group);
+	}
+
+	return 0;
+}
+
+static int idxd_wq_config_write(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	u32 wq_offset;
+	int i;
+
+	if (!wq->group)
+		return 0;
+
+	memset(&wq->wqcfg, 0, sizeof(union wqcfg));
+
+	/* byte 0-3 */
+	wq->wqcfg.wq_size = wq->size;
+
+	if (wq->size == 0) {
+		dev_warn(dev, "Incorrect work queue size: 0\n");
+		return -EINVAL;
+	}
+
+	/* bytes 4-7 */
+	wq->wqcfg.wq_thresh = wq->threshold;
+
+	/* byte 8-11 */
+	wq->wqcfg.priv = !!(wq->type == IDXD_WQT_KERNEL);
+	wq->wqcfg.mode = 1;
+
+	wq->wqcfg.priority = wq->priority;
+
+	/* bytes 12-15 */
+	wq->wqcfg.max_xfer_shift = idxd->hw.gen_cap.max_xfer_shift;
+	wq->wqcfg.max_batch_shift = idxd->hw.gen_cap.max_batch_shift;
+
+	dev_dbg(dev, "WQ %d CFGs\n", wq->id);
+	for (i = 0; i < 8; i++) {
+		wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32);
+		iowrite32(wq->wqcfg.bits[i], idxd->reg_base + wq_offset);
+		dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n",
+			wq->id, i, wq_offset,
+			ioread32(idxd->reg_base + wq_offset));
+	}
+
+	return 0;
+}
+
+static int idxd_wqs_config_write(struct idxd_device *idxd)
+{
+	int i, rc;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		rc = idxd_wq_config_write(wq);
+		if (rc < 0)
+			return rc;
+	}
+
+	return 0;
+}
+
+static void idxd_group_flags_setup(struct idxd_device *idxd)
+{
+	int i;
+
+	/* TC-A 0 and TC-B 1 should be defaults */
+	for (i = 0; i < idxd->max_groups; i++) {
+		struct idxd_group *group = &idxd->groups[i];
+
+		if (group->tc_a == -1)
+			group->grpcfg.flags.tc_a = 0;
+		else
+			group->grpcfg.flags.tc_a = group->tc_a;
+		if (group->tc_b == -1)
+			group->grpcfg.flags.tc_b = 1;
+		else
+			group->grpcfg.flags.tc_b = group->tc_b;
+		group->grpcfg.flags.use_token_limit = group->use_token_limit;
+		group->grpcfg.flags.tokens_reserved = group->tokens_reserved;
+		if (group->tokens_allowed)
+			group->grpcfg.flags.tokens_allowed =
+				group->tokens_allowed;
+		else
+			group->grpcfg.flags.tokens_allowed = idxd->max_tokens;
+	}
+}
+
+static int idxd_engines_setup(struct idxd_device *idxd)
+{
+	int i, engines = 0;
+	struct idxd_engine *eng;
+	struct idxd_group *group;
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		group = &idxd->groups[i];
+		group->grpcfg.engines = 0;
+	}
+
+	for (i = 0; i < idxd->max_engines; i++) {
+		eng = &idxd->engines[i];
+		group = eng->group;
+
+		if (!group)
+			continue;
+
+		group->grpcfg.engines |= BIT(eng->id);
+		engines++;
+	}
+
+	if (!engines)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int idxd_wqs_setup(struct idxd_device *idxd)
+{
+	struct idxd_wq *wq;
+	struct idxd_group *group;
+	int i, j, configured = 0;
+	struct device *dev = &idxd->pdev->dev;
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		group = &idxd->groups[i];
+		for (j = 0; j < 4; j++)
+			group->grpcfg.wqs[j] = 0;
+	}
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		wq = &idxd->wqs[i];
+		group = wq->group;
+
+		if (!wq->group)
+			continue;
+		if (!wq->size)
+			continue;
+
+		if (!wq_dedicated(wq)) {
+			dev_warn(dev, "No shared workqueue support.\n");
+			return -EINVAL;
+		}
+
+		group->grpcfg.wqs[wq->id / 64] |= BIT(wq->id % 64);
+		configured++;
+	}
+
+	if (configured == 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+int idxd_device_config(struct idxd_device *idxd)
+{
+	int rc;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	rc = idxd_wqs_setup(idxd);
+	if (rc < 0)
+		return rc;
+
+	rc = idxd_engines_setup(idxd);
+	if (rc < 0)
+		return rc;
+
+	idxd_group_flags_setup(idxd);
+
+	rc = idxd_wqs_config_write(idxd);
+	if (rc < 0)
+		return rc;
+
+	rc = idxd_groups_config_write(idxd);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
new file mode 100644
index 000000000000..c64c1429d160
--- /dev/null
+++ b/drivers/dma/idxd/dma.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmaengine.h>
+#include <uapi/linux/idxd.h>
+#include "../dmaengine.h"
+#include "registers.h"
+#include "idxd.h"
+
+static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c)
+{
+	return container_of(c, struct idxd_wq, dma_chan);
+}
+
+void idxd_dma_complete_txd(struct idxd_desc *desc,
+			   enum idxd_complete_type comp_type)
+{
+	struct dma_async_tx_descriptor *tx;
+	struct dmaengine_result res;
+	int complete = 1;
+
+	if (desc->completion->status == DSA_COMP_SUCCESS)
+		res.result = DMA_TRANS_NOERROR;
+	else if (desc->completion->status)
+		res.result = DMA_TRANS_WRITE_FAILED;
+	else if (comp_type == IDXD_COMPLETE_ABORT)
+		res.result = DMA_TRANS_ABORTED;
+	else
+		complete = 0;
+
+	tx = &desc->txd;
+	if (complete && tx->cookie) {
+		dma_cookie_complete(tx);
+		dma_descriptor_unmap(tx);
+		dmaengine_desc_get_callback_invoke(tx, &res);
+		tx->callback = NULL;
+		tx->callback_result = NULL;
+	}
+}
+
+static void op_flag_setup(unsigned long flags, u32 *desc_flags)
+{
+	*desc_flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR;
+	if (flags & DMA_PREP_INTERRUPT)
+		*desc_flags |= IDXD_OP_FLAG_RCI;
+}
+
+static inline void set_completion_address(struct idxd_desc *desc,
+					  u64 *compl_addr)
+{
+		*compl_addr = desc->compl_dma;
+}
+
+static inline void idxd_prep_desc_common(struct idxd_wq *wq,
+					 struct dsa_hw_desc *hw, char opcode,
+					 u64 addr_f1, u64 addr_f2, u64 len,
+					 u64 compl, u32 flags)
+{
+	struct idxd_device *idxd = wq->idxd;
+
+	hw->flags = flags;
+	hw->opcode = opcode;
+	hw->src_addr = addr_f1;
+	hw->dst_addr = addr_f2;
+	hw->xfer_size = len;
+	hw->priv = !!(wq->type == IDXD_WQT_KERNEL);
+	hw->completion_addr = compl;
+
+	/*
+	 * Descriptor completion vectors are 1-8 for MSIX. We will round
+	 * robin through the 8 vectors.
+	 */
+	wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1;
+	hw->int_handle =  wq->vec_ptr;
+}
+
+static struct dma_async_tx_descriptor *
+idxd_dma_submit_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
+		       dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct idxd_wq *wq = to_idxd_wq(c);
+	u32 desc_flags;
+	struct idxd_device *idxd = wq->idxd;
+	struct idxd_desc *desc;
+
+	if (wq->state != IDXD_WQ_ENABLED)
+		return NULL;
+
+	if (len > idxd->max_xfer_bytes)
+		return NULL;
+
+	op_flag_setup(flags, &desc_flags);
+	desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
+	if (IS_ERR(desc))
+		return NULL;
+
+	idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_MEMMOVE,
+			      dma_src, dma_dest, len, desc->compl_dma,
+			      desc_flags);
+
+	desc->txd.flags = flags;
+
+	return &desc->txd;
+}
+
+static int idxd_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct idxd_wq *wq = to_idxd_wq(chan);
+	struct device *dev = &wq->idxd->pdev->dev;
+
+	idxd_wq_get(wq);
+	dev_dbg(dev, "%s: client_count: %d\n", __func__,
+		idxd_wq_refcount(wq));
+	return 0;
+}
+
+static void idxd_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct idxd_wq *wq = to_idxd_wq(chan);
+	struct device *dev = &wq->idxd->pdev->dev;
+
+	idxd_wq_put(wq);
+	dev_dbg(dev, "%s: client_count: %d\n", __func__,
+		idxd_wq_refcount(wq));
+}
+
+static enum dma_status idxd_dma_tx_status(struct dma_chan *dma_chan,
+					  dma_cookie_t cookie,
+					  struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(dma_chan, cookie, txstate);
+}
+
+/*
+ * issue_pending() does not need to do anything since tx_submit() does the job
+ * already.
+ */
+static void idxd_dma_issue_pending(struct dma_chan *dma_chan)
+{
+}
+
+dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_chan *c = tx->chan;
+	struct idxd_wq *wq = to_idxd_wq(c);
+	dma_cookie_t cookie;
+	int rc;
+	struct idxd_desc *desc = container_of(tx, struct idxd_desc, txd);
+
+	cookie = dma_cookie_assign(tx);
+
+	rc = idxd_submit_desc(wq, desc);
+	if (rc < 0) {
+		idxd_free_desc(wq, desc);
+		return rc;
+	}
+
+	return cookie;
+}
+
+static void idxd_dma_release(struct dma_device *device)
+{
+}
+
+int idxd_register_dma_device(struct idxd_device *idxd)
+{
+	struct dma_device *dma = &idxd->dma_dev;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma->dev = &idxd->pdev->dev;
+
+	dma->device_release = idxd_dma_release;
+
+	if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMMOVE) {
+		dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+		dma->device_prep_dma_memcpy = idxd_dma_submit_memcpy;
+	}
+
+	dma->device_tx_status = idxd_dma_tx_status;
+	dma->device_issue_pending = idxd_dma_issue_pending;
+	dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = idxd_dma_free_chan_resources;
+
+	return dma_async_device_register(&idxd->dma_dev);
+}
+
+void idxd_unregister_dma_device(struct idxd_device *idxd)
+{
+	dma_async_device_unregister(&idxd->dma_dev);
+}
+
+int idxd_register_dma_channel(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct dma_device *dma = &idxd->dma_dev;
+	struct dma_chan *chan = &wq->dma_chan;
+	int rc;
+
+	memset(&wq->dma_chan, 0, sizeof(struct dma_chan));
+	chan->device = dma;
+	list_add_tail(&chan->device_node, &dma->channels);
+	rc = dma_async_device_channel_register(dma, chan);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
+void idxd_unregister_dma_channel(struct idxd_wq *wq)
+{
+	dma_async_device_channel_unregister(&wq->idxd->dma_dev, &wq->dma_chan);
+}
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
new file mode 100644
index 000000000000..b8f8a363b4a7
--- /dev/null
+++ b/drivers/dma/idxd/idxd.h
@@ -0,0 +1,316 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#ifndef _IDXD_H_
+#define _IDXD_H_
+
+#include <linux/sbitmap.h>
+#include <linux/dmaengine.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/wait.h>
+#include <linux/cdev.h>
+#include "registers.h"
+
+#define IDXD_DRIVER_VERSION	"1.00"
+
+extern struct kmem_cache *idxd_desc_pool;
+
+#define IDXD_REG_TIMEOUT	50
+#define IDXD_DRAIN_TIMEOUT	5000
+
+enum idxd_type {
+	IDXD_TYPE_UNKNOWN = -1,
+	IDXD_TYPE_DSA = 0,
+	IDXD_TYPE_MAX
+};
+
+#define IDXD_NAME_SIZE		128
+
+struct idxd_device_driver {
+	struct device_driver drv;
+};
+
+struct idxd_irq_entry {
+	struct idxd_device *idxd;
+	int id;
+	struct llist_head pending_llist;
+	struct list_head work_list;
+};
+
+struct idxd_group {
+	struct device conf_dev;
+	struct idxd_device *idxd;
+	struct grpcfg grpcfg;
+	int id;
+	int num_engines;
+	int num_wqs;
+	bool use_token_limit;
+	u8 tokens_allowed;
+	u8 tokens_reserved;
+	int tc_a;
+	int tc_b;
+};
+
+#define IDXD_MAX_PRIORITY	0xf
+
+enum idxd_wq_state {
+	IDXD_WQ_DISABLED = 0,
+	IDXD_WQ_ENABLED,
+};
+
+enum idxd_wq_flag {
+	WQ_FLAG_DEDICATED = 0,
+};
+
+enum idxd_wq_type {
+	IDXD_WQT_NONE = 0,
+	IDXD_WQT_KERNEL,
+	IDXD_WQT_USER,
+};
+
+struct idxd_cdev {
+	struct cdev cdev;
+	struct device *dev;
+	int minor;
+	struct wait_queue_head err_queue;
+};
+
+#define IDXD_ALLOCATED_BATCH_SIZE	128U
+#define WQ_NAME_SIZE   1024
+#define WQ_TYPE_SIZE   10
+
+enum idxd_op_type {
+	IDXD_OP_BLOCK = 0,
+	IDXD_OP_NONBLOCK = 1,
+};
+
+enum idxd_complete_type {
+	IDXD_COMPLETE_NORMAL = 0,
+	IDXD_COMPLETE_ABORT,
+};
+
+struct idxd_wq {
+	void __iomem *dportal;
+	struct device conf_dev;
+	struct idxd_cdev idxd_cdev;
+	struct idxd_device *idxd;
+	int id;
+	enum idxd_wq_type type;
+	struct idxd_group *group;
+	int client_count;
+	struct mutex wq_lock;	/* mutex for workqueue */
+	u32 size;
+	u32 threshold;
+	u32 priority;
+	enum idxd_wq_state state;
+	unsigned long flags;
+	union wqcfg wqcfg;
+	atomic_t dq_count;	/* dedicated queue flow control */
+	u32 vec_ptr;		/* interrupt steering */
+	struct dsa_hw_desc **hw_descs;
+	int num_descs;
+	struct dsa_completion_record *compls;
+	dma_addr_t compls_addr;
+	int compls_size;
+	struct idxd_desc **descs;
+	struct sbitmap sbmap;
+	struct dma_chan dma_chan;
+	struct percpu_rw_semaphore submit_lock;
+	wait_queue_head_t submit_waitq;
+	char name[WQ_NAME_SIZE + 1];
+};
+
+struct idxd_engine {
+	struct device conf_dev;
+	int id;
+	struct idxd_group *group;
+	struct idxd_device *idxd;
+};
+
+/* shadow registers */
+struct idxd_hw {
+	u32 version;
+	union gen_cap_reg gen_cap;
+	union wq_cap_reg wq_cap;
+	union group_cap_reg group_cap;
+	union engine_cap_reg engine_cap;
+	struct opcap opcap;
+};
+
+enum idxd_device_state {
+	IDXD_DEV_HALTED = -1,
+	IDXD_DEV_DISABLED = 0,
+	IDXD_DEV_CONF_READY,
+	IDXD_DEV_ENABLED,
+};
+
+enum idxd_device_flag {
+	IDXD_FLAG_CONFIGURABLE = 0,
+};
+
+struct idxd_device {
+	enum idxd_type type;
+	struct device conf_dev;
+	struct list_head list;
+	struct idxd_hw hw;
+	enum idxd_device_state state;
+	unsigned long flags;
+	int id;
+	int major;
+
+	struct pci_dev *pdev;
+	void __iomem *reg_base;
+
+	spinlock_t dev_lock;	/* spinlock for device */
+	struct idxd_group *groups;
+	struct idxd_wq *wqs;
+	struct idxd_engine *engines;
+
+	int num_groups;
+
+	u32 msix_perm_offset;
+	u32 wqcfg_offset;
+	u32 grpcfg_offset;
+	u32 perfmon_offset;
+
+	u64 max_xfer_bytes;
+	u32 max_batch_size;
+	int max_groups;
+	int max_engines;
+	int max_tokens;
+	int max_wqs;
+	int max_wq_size;
+	int token_limit;
+	int nr_tokens;		/* non-reserved tokens */
+
+	union sw_err_reg sw_err;
+
+	struct msix_entry *msix_entries;
+	int num_wq_irqs;
+	struct idxd_irq_entry *irq_entries;
+
+	struct dma_device dma_dev;
+};
+
+/* IDXD software descriptor */
+struct idxd_desc {
+	struct dsa_hw_desc *hw;
+	dma_addr_t desc_dma;
+	struct dsa_completion_record *completion;
+	dma_addr_t compl_dma;
+	struct dma_async_tx_descriptor txd;
+	struct llist_node llnode;
+	struct list_head list;
+	int id;
+	struct idxd_wq *wq;
+};
+
+#define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev)
+#define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev)
+
+extern struct bus_type dsa_bus_type;
+
+static inline bool wq_dedicated(struct idxd_wq *wq)
+{
+	return test_bit(WQ_FLAG_DEDICATED, &wq->flags);
+}
+
+enum idxd_portal_prot {
+	IDXD_PORTAL_UNLIMITED = 0,
+	IDXD_PORTAL_LIMITED,
+};
+
+static inline int idxd_get_wq_portal_offset(enum idxd_portal_prot prot)
+{
+	return prot * 0x1000;
+}
+
+static inline int idxd_get_wq_portal_full_offset(int wq_id,
+						 enum idxd_portal_prot prot)
+{
+	return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot);
+}
+
+static inline void idxd_set_type(struct idxd_device *idxd)
+{
+	struct pci_dev *pdev = idxd->pdev;
+
+	if (pdev->device == PCI_DEVICE_ID_INTEL_DSA_SPR0)
+		idxd->type = IDXD_TYPE_DSA;
+	else
+		idxd->type = IDXD_TYPE_UNKNOWN;
+}
+
+static inline void idxd_wq_get(struct idxd_wq *wq)
+{
+	wq->client_count++;
+}
+
+static inline void idxd_wq_put(struct idxd_wq *wq)
+{
+	wq->client_count--;
+}
+
+static inline int idxd_wq_refcount(struct idxd_wq *wq)
+{
+	return wq->client_count;
+};
+
+const char *idxd_get_dev_name(struct idxd_device *idxd);
+int idxd_register_bus_type(void);
+void idxd_unregister_bus_type(void);
+int idxd_setup_sysfs(struct idxd_device *idxd);
+void idxd_cleanup_sysfs(struct idxd_device *idxd);
+int idxd_register_driver(void);
+void idxd_unregister_driver(void);
+struct bus_type *idxd_get_bus_type(struct idxd_device *idxd);
+
+/* device interrupt control */
+irqreturn_t idxd_irq_handler(int vec, void *data);
+irqreturn_t idxd_misc_thread(int vec, void *data);
+irqreturn_t idxd_wq_thread(int irq, void *data);
+void idxd_mask_error_interrupts(struct idxd_device *idxd);
+void idxd_unmask_error_interrupts(struct idxd_device *idxd);
+void idxd_mask_msix_vectors(struct idxd_device *idxd);
+int idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id);
+int idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id);
+
+/* device control */
+int idxd_device_enable(struct idxd_device *idxd);
+int idxd_device_disable(struct idxd_device *idxd);
+int idxd_device_reset(struct idxd_device *idxd);
+int __idxd_device_reset(struct idxd_device *idxd);
+void idxd_device_cleanup(struct idxd_device *idxd);
+int idxd_device_config(struct idxd_device *idxd);
+void idxd_device_wqs_clear_state(struct idxd_device *idxd);
+
+/* work queue control */
+int idxd_wq_alloc_resources(struct idxd_wq *wq);
+void idxd_wq_free_resources(struct idxd_wq *wq);
+int idxd_wq_enable(struct idxd_wq *wq);
+int idxd_wq_disable(struct idxd_wq *wq);
+int idxd_wq_map_portal(struct idxd_wq *wq);
+void idxd_wq_unmap_portal(struct idxd_wq *wq);
+
+/* submission */
+int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
+struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype);
+void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc);
+
+/* dmaengine */
+int idxd_register_dma_device(struct idxd_device *idxd);
+void idxd_unregister_dma_device(struct idxd_device *idxd);
+int idxd_register_dma_channel(struct idxd_wq *wq);
+void idxd_unregister_dma_channel(struct idxd_wq *wq);
+void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res);
+void idxd_dma_complete_txd(struct idxd_desc *desc,
+			   enum idxd_complete_type comp_type);
+dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx);
+
+/* cdev */
+int idxd_cdev_register(void);
+void idxd_cdev_remove(void);
+int idxd_cdev_get_major(struct idxd_device *idxd);
+int idxd_wq_add_cdev(struct idxd_wq *wq);
+void idxd_wq_del_cdev(struct idxd_wq *wq);
+
+#endif
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
new file mode 100644
index 000000000000..7778c05deb5d
--- /dev/null
+++ b/drivers/dma/idxd/init.c
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/aer.h>
+#include <linux/fs.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <uapi/linux/idxd.h>
+#include <linux/dmaengine.h>
+#include "../dmaengine.h"
+#include "registers.h"
+#include "idxd.h"
+
+MODULE_VERSION(IDXD_DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
+
+#define DRV_NAME "idxd"
+
+static struct idr idxd_idrs[IDXD_TYPE_MAX];
+static struct mutex idxd_idr_lock;
+
+static struct pci_device_id idxd_pci_tbl[] = {
+	/* DSA ver 1.0 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_DSA_SPR0) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, idxd_pci_tbl);
+
+static char *idxd_name[] = {
+	"dsa",
+};
+
+const char *idxd_get_dev_name(struct idxd_device *idxd)
+{
+	return idxd_name[idxd->type];
+}
+
+static int idxd_setup_interrupts(struct idxd_device *idxd)
+{
+	struct pci_dev *pdev = idxd->pdev;
+	struct device *dev = &pdev->dev;
+	struct msix_entry *msix;
+	struct idxd_irq_entry *irq_entry;
+	int i, msixcnt;
+	int rc = 0;
+
+	msixcnt = pci_msix_vec_count(pdev);
+	if (msixcnt < 0) {
+		dev_err(dev, "Not MSI-X interrupt capable.\n");
+		goto err_no_irq;
+	}
+
+	idxd->msix_entries = devm_kzalloc(dev, sizeof(struct msix_entry) *
+			msixcnt, GFP_KERNEL);
+	if (!idxd->msix_entries) {
+		rc = -ENOMEM;
+		goto err_no_irq;
+	}
+
+	for (i = 0; i < msixcnt; i++)
+		idxd->msix_entries[i].entry = i;
+
+	rc = pci_enable_msix_exact(pdev, idxd->msix_entries, msixcnt);
+	if (rc) {
+		dev_err(dev, "Failed enabling %d MSIX entries.\n", msixcnt);
+		goto err_no_irq;
+	}
+	dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
+
+	/*
+	 * We implement 1 completion list per MSI-X entry except for
+	 * entry 0, which is for errors and others.
+	 */
+	idxd->irq_entries = devm_kcalloc(dev, msixcnt,
+					 sizeof(struct idxd_irq_entry),
+					 GFP_KERNEL);
+	if (!idxd->irq_entries) {
+		rc = -ENOMEM;
+		goto err_no_irq;
+	}
+
+	for (i = 0; i < msixcnt; i++) {
+		idxd->irq_entries[i].id = i;
+		idxd->irq_entries[i].idxd = idxd;
+	}
+
+	msix = &idxd->msix_entries[0];
+	irq_entry = &idxd->irq_entries[0];
+	rc = devm_request_threaded_irq(dev, msix->vector, idxd_irq_handler,
+				       idxd_misc_thread, 0, "idxd-misc",
+				       irq_entry);
+	if (rc < 0) {
+		dev_err(dev, "Failed to allocate misc interrupt.\n");
+		goto err_no_irq;
+	}
+
+	dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n",
+		msix->vector);
+
+	/* first MSI-X entry is not for wq interrupts */
+	idxd->num_wq_irqs = msixcnt - 1;
+
+	for (i = 1; i < msixcnt; i++) {
+		msix = &idxd->msix_entries[i];
+		irq_entry = &idxd->irq_entries[i];
+
+		init_llist_head(&idxd->irq_entries[i].pending_llist);
+		INIT_LIST_HEAD(&idxd->irq_entries[i].work_list);
+		rc = devm_request_threaded_irq(dev, msix->vector,
+					       idxd_irq_handler,
+					       idxd_wq_thread, 0,
+					       "idxd-portal", irq_entry);
+		if (rc < 0) {
+			dev_err(dev, "Failed to allocate irq %d.\n",
+				msix->vector);
+			goto err_no_irq;
+		}
+		dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n",
+			i, msix->vector);
+	}
+
+	idxd_unmask_error_interrupts(idxd);
+
+	return 0;
+
+ err_no_irq:
+	/* Disable error interrupt generation */
+	idxd_mask_error_interrupts(idxd);
+	pci_disable_msix(pdev);
+	dev_err(dev, "No usable interrupts\n");
+	return rc;
+}
+
+static void idxd_wqs_free_lock(struct idxd_device *idxd)
+{
+	int i;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		percpu_free_rwsem(&wq->submit_lock);
+	}
+}
+
+static int idxd_setup_internals(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int i;
+
+	idxd->groups = devm_kcalloc(dev, idxd->max_groups,
+				    sizeof(struct idxd_group), GFP_KERNEL);
+	if (!idxd->groups)
+		return -ENOMEM;
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		idxd->groups[i].idxd = idxd;
+		idxd->groups[i].id = i;
+		idxd->groups[i].tc_a = -1;
+		idxd->groups[i].tc_b = -1;
+	}
+
+	idxd->wqs = devm_kcalloc(dev, idxd->max_wqs, sizeof(struct idxd_wq),
+				 GFP_KERNEL);
+	if (!idxd->wqs)
+		return -ENOMEM;
+
+	idxd->engines = devm_kcalloc(dev, idxd->max_engines,
+				     sizeof(struct idxd_engine), GFP_KERNEL);
+	if (!idxd->engines)
+		return -ENOMEM;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+		int rc;
+
+		wq->id = i;
+		wq->idxd = idxd;
+		mutex_init(&wq->wq_lock);
+		atomic_set(&wq->dq_count, 0);
+		init_waitqueue_head(&wq->submit_waitq);
+		wq->idxd_cdev.minor = -1;
+		rc = percpu_init_rwsem(&wq->submit_lock);
+		if (rc < 0) {
+			idxd_wqs_free_lock(idxd);
+			return rc;
+		}
+	}
+
+	for (i = 0; i < idxd->max_engines; i++) {
+		idxd->engines[i].idxd = idxd;
+		idxd->engines[i].id = i;
+	}
+
+	return 0;
+}
+
+static void idxd_read_table_offsets(struct idxd_device *idxd)
+{
+	union offsets_reg offsets;
+	struct device *dev = &idxd->pdev->dev;
+
+	offsets.bits[0] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET);
+	offsets.bits[1] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET
+			+ sizeof(u64));
+	idxd->grpcfg_offset = offsets.grpcfg * 0x100;
+	dev_dbg(dev, "IDXD Group Config Offset: %#x\n", idxd->grpcfg_offset);
+	idxd->wqcfg_offset = offsets.wqcfg * 0x100;
+	dev_dbg(dev, "IDXD Work Queue Config Offset: %#x\n",
+		idxd->wqcfg_offset);
+	idxd->msix_perm_offset = offsets.msix_perm * 0x100;
+	dev_dbg(dev, "IDXD MSIX Permission Offset: %#x\n",
+		idxd->msix_perm_offset);
+	idxd->perfmon_offset = offsets.perfmon * 0x100;
+	dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset);
+}
+
+static void idxd_read_caps(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int i;
+
+	/* reading generic capabilities */
+	idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET);
+	dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits);
+	idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
+	dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
+	idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift;
+	dev_dbg(dev, "max batch size: %u\n", idxd->max_batch_size);
+	if (idxd->hw.gen_cap.config_en)
+		set_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags);
+
+	/* reading group capabilities */
+	idxd->hw.group_cap.bits =
+		ioread64(idxd->reg_base + IDXD_GRPCAP_OFFSET);
+	dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits);
+	idxd->max_groups = idxd->hw.group_cap.num_groups;
+	dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
+	idxd->max_tokens = idxd->hw.group_cap.total_tokens;
+	dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens);
+	idxd->nr_tokens = idxd->max_tokens;
+
+	/* read engine capabilities */
+	idxd->hw.engine_cap.bits =
+		ioread64(idxd->reg_base + IDXD_ENGCAP_OFFSET);
+	dev_dbg(dev, "engine_cap: %#llx\n", idxd->hw.engine_cap.bits);
+	idxd->max_engines = idxd->hw.engine_cap.num_engines;
+	dev_dbg(dev, "max engines: %u\n", idxd->max_engines);
+
+	/* read workqueue capabilities */
+	idxd->hw.wq_cap.bits = ioread64(idxd->reg_base + IDXD_WQCAP_OFFSET);
+	dev_dbg(dev, "wq_cap: %#llx\n", idxd->hw.wq_cap.bits);
+	idxd->max_wq_size = idxd->hw.wq_cap.total_wq_size;
+	dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size);
+	idxd->max_wqs = idxd->hw.wq_cap.num_wqs;
+	dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs);
+
+	/* reading operation capabilities */
+	for (i = 0; i < 4; i++) {
+		idxd->hw.opcap.bits[i] = ioread64(idxd->reg_base +
+				IDXD_OPCAP_OFFSET + i * sizeof(u64));
+		dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]);
+	}
+}
+
+static struct idxd_device *idxd_alloc(struct pci_dev *pdev,
+				      void __iomem * const *iomap)
+{
+	struct device *dev = &pdev->dev;
+	struct idxd_device *idxd;
+
+	idxd = devm_kzalloc(dev, sizeof(struct idxd_device), GFP_KERNEL);
+	if (!idxd)
+		return NULL;
+
+	idxd->pdev = pdev;
+	idxd->reg_base = iomap[IDXD_MMIO_BAR];
+	spin_lock_init(&idxd->dev_lock);
+
+	return idxd;
+}
+
+static int idxd_probe(struct idxd_device *idxd)
+{
+	struct pci_dev *pdev = idxd->pdev;
+	struct device *dev = &pdev->dev;
+	int rc;
+
+	dev_dbg(dev, "%s entered and resetting device\n", __func__);
+	rc = idxd_device_reset(idxd);
+	if (rc < 0)
+		return rc;
+	dev_dbg(dev, "IDXD reset complete\n");
+
+	idxd_read_caps(idxd);
+	idxd_read_table_offsets(idxd);
+
+	rc = idxd_setup_internals(idxd);
+	if (rc)
+		goto err_setup;
+
+	rc = idxd_setup_interrupts(idxd);
+	if (rc)
+		goto err_setup;
+
+	dev_dbg(dev, "IDXD interrupt setup complete.\n");
+
+	mutex_lock(&idxd_idr_lock);
+	idxd->id = idr_alloc(&idxd_idrs[idxd->type], idxd, 0, 0, GFP_KERNEL);
+	mutex_unlock(&idxd_idr_lock);
+	if (idxd->id < 0) {
+		rc = -ENOMEM;
+		goto err_idr_fail;
+	}
+
+	idxd->major = idxd_cdev_get_major(idxd);
+
+	dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
+	return 0;
+
+ err_idr_fail:
+	idxd_mask_error_interrupts(idxd);
+	idxd_mask_msix_vectors(idxd);
+ err_setup:
+	return rc;
+}
+
+static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	void __iomem * const *iomap;
+	struct device *dev = &pdev->dev;
+	struct idxd_device *idxd;
+	int rc;
+	unsigned int mask;
+
+	rc = pcim_enable_device(pdev);
+	if (rc)
+		return rc;
+
+	dev_dbg(dev, "Mapping BARs\n");
+	mask = (1 << IDXD_MMIO_BAR);
+	rc = pcim_iomap_regions(pdev, mask, DRV_NAME);
+	if (rc)
+		return rc;
+
+	iomap = pcim_iomap_table(pdev);
+	if (!iomap)
+		return -ENOMEM;
+
+	dev_dbg(dev, "Set DMA masks\n");
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc)
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc)
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	dev_dbg(dev, "Alloc IDXD context\n");
+	idxd = idxd_alloc(pdev, iomap);
+	if (!idxd)
+		return -ENOMEM;
+
+	idxd_set_type(idxd);
+
+	dev_dbg(dev, "Set PCI master\n");
+	pci_set_master(pdev);
+	pci_set_drvdata(pdev, idxd);
+
+	idxd->hw.version = ioread32(idxd->reg_base + IDXD_VER_OFFSET);
+	rc = idxd_probe(idxd);
+	if (rc) {
+		dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n");
+		return -ENODEV;
+	}
+
+	rc = idxd_setup_sysfs(idxd);
+	if (rc) {
+		dev_err(dev, "IDXD sysfs setup failed\n");
+		return -ENODEV;
+	}
+
+	idxd->state = IDXD_DEV_CONF_READY;
+
+	dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
+		 idxd->hw.version);
+
+	return 0;
+}
+
+static void idxd_flush_pending_llist(struct idxd_irq_entry *ie)
+{
+	struct idxd_desc *desc, *itr;
+	struct llist_node *head;
+
+	head = llist_del_all(&ie->pending_llist);
+	if (!head)
+		return;
+
+	llist_for_each_entry_safe(desc, itr, head, llnode) {
+		idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
+		idxd_free_desc(desc->wq, desc);
+	}
+}
+
+static void idxd_flush_work_list(struct idxd_irq_entry *ie)
+{
+	struct idxd_desc *desc, *iter;
+
+	list_for_each_entry_safe(desc, iter, &ie->work_list, list) {
+		list_del(&desc->list);
+		idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
+		idxd_free_desc(desc->wq, desc);
+	}
+}
+
+static void idxd_shutdown(struct pci_dev *pdev)
+{
+	struct idxd_device *idxd = pci_get_drvdata(pdev);
+	int rc, i;
+	struct idxd_irq_entry *irq_entry;
+	int msixcnt = pci_msix_vec_count(pdev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	rc = idxd_device_disable(idxd);
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	if (rc)
+		dev_err(&pdev->dev, "Disabling device failed\n");
+
+	dev_dbg(&pdev->dev, "%s called\n", __func__);
+	idxd_mask_msix_vectors(idxd);
+	idxd_mask_error_interrupts(idxd);
+
+	for (i = 0; i < msixcnt; i++) {
+		irq_entry = &idxd->irq_entries[i];
+		synchronize_irq(idxd->msix_entries[i].vector);
+		if (i == 0)
+			continue;
+		idxd_flush_pending_llist(irq_entry);
+		idxd_flush_work_list(irq_entry);
+	}
+}
+
+static void idxd_remove(struct pci_dev *pdev)
+{
+	struct idxd_device *idxd = pci_get_drvdata(pdev);
+
+	dev_dbg(&pdev->dev, "%s called\n", __func__);
+	idxd_cleanup_sysfs(idxd);
+	idxd_shutdown(pdev);
+	idxd_wqs_free_lock(idxd);
+	mutex_lock(&idxd_idr_lock);
+	idr_remove(&idxd_idrs[idxd->type], idxd->id);
+	mutex_unlock(&idxd_idr_lock);
+}
+
+static struct pci_driver idxd_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= idxd_pci_tbl,
+	.probe		= idxd_pci_probe,
+	.remove		= idxd_remove,
+	.shutdown	= idxd_shutdown,
+};
+
+static int __init idxd_init_module(void)
+{
+	int err, i;
+
+	/*
+	 * If the CPU does not support write512, there's no point in
+	 * enumerating the device. We can not utilize it.
+	 */
+	if (!boot_cpu_has(X86_FEATURE_MOVDIR64B)) {
+		pr_warn("idxd driver failed to load without MOVDIR64B.\n");
+		return -ENODEV;
+	}
+
+	pr_info("%s: Intel(R) Accelerator Devices Driver %s\n",
+		DRV_NAME, IDXD_DRIVER_VERSION);
+
+	mutex_init(&idxd_idr_lock);
+	for (i = 0; i < IDXD_TYPE_MAX; i++)
+		idr_init(&idxd_idrs[i]);
+
+	err = idxd_register_bus_type();
+	if (err < 0)
+		return err;
+
+	err = idxd_register_driver();
+	if (err < 0)
+		goto err_idxd_driver_register;
+
+	err = idxd_cdev_register();
+	if (err)
+		goto err_cdev_register;
+
+	err = pci_register_driver(&idxd_pci_driver);
+	if (err)
+		goto err_pci_register;
+
+	return 0;
+
+err_pci_register:
+	idxd_cdev_remove();
+err_cdev_register:
+	idxd_unregister_driver();
+err_idxd_driver_register:
+	idxd_unregister_bus_type();
+	return err;
+}
+module_init(idxd_init_module);
+
+static void __exit idxd_exit_module(void)
+{
+	pci_unregister_driver(&idxd_pci_driver);
+	idxd_cdev_remove();
+	idxd_unregister_bus_type();
+}
+module_exit(idxd_exit_module);
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
new file mode 100644
index 000000000000..d6fcd2e60103
--- /dev/null
+++ b/drivers/dma/idxd/irq.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmaengine.h>
+#include <uapi/linux/idxd.h>
+#include "../dmaengine.h"
+#include "idxd.h"
+#include "registers.h"
+
+void idxd_device_wqs_clear_state(struct idxd_device *idxd)
+{
+	int i;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		wq->state = IDXD_WQ_DISABLED;
+	}
+}
+
+static int idxd_restart(struct idxd_device *idxd)
+{
+	int i, rc;
+
+	lockdep_assert_held(&idxd->dev_lock);
+
+	rc = __idxd_device_reset(idxd);
+	if (rc < 0)
+		goto out;
+
+	rc = idxd_device_config(idxd);
+	if (rc < 0)
+		goto out;
+
+	rc = idxd_device_enable(idxd);
+	if (rc < 0)
+		goto out;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		if (wq->state == IDXD_WQ_ENABLED) {
+			rc = idxd_wq_enable(wq);
+			if (rc < 0) {
+				dev_warn(&idxd->pdev->dev,
+					 "Unable to re-enable wq %s\n",
+					 dev_name(&wq->conf_dev));
+			}
+		}
+	}
+
+	return 0;
+
+ out:
+	idxd_device_wqs_clear_state(idxd);
+	idxd->state = IDXD_DEV_HALTED;
+	return rc;
+}
+
+irqreturn_t idxd_irq_handler(int vec, void *data)
+{
+	struct idxd_irq_entry *irq_entry = data;
+	struct idxd_device *idxd = irq_entry->idxd;
+
+	idxd_mask_msix_vector(idxd, irq_entry->id);
+	return IRQ_WAKE_THREAD;
+}
+
+irqreturn_t idxd_misc_thread(int vec, void *data)
+{
+	struct idxd_irq_entry *irq_entry = data;
+	struct idxd_device *idxd = irq_entry->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	union gensts_reg gensts;
+	u32 cause, val = 0;
+	int i, rc;
+	bool err = false;
+
+	cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET);
+
+	if (cause & IDXD_INTC_ERR) {
+		spin_lock_bh(&idxd->dev_lock);
+		for (i = 0; i < 4; i++)
+			idxd->sw_err.bits[i] = ioread64(idxd->reg_base +
+					IDXD_SWERR_OFFSET + i * sizeof(u64));
+		iowrite64(IDXD_SWERR_ACK, idxd->reg_base + IDXD_SWERR_OFFSET);
+
+		if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) {
+			int id = idxd->sw_err.wq_idx;
+			struct idxd_wq *wq = &idxd->wqs[id];
+
+			if (wq->type == IDXD_WQT_USER)
+				wake_up_interruptible(&wq->idxd_cdev.err_queue);
+		} else {
+			int i;
+
+			for (i = 0; i < idxd->max_wqs; i++) {
+				struct idxd_wq *wq = &idxd->wqs[i];
+
+				if (wq->type == IDXD_WQT_USER)
+					wake_up_interruptible(&wq->idxd_cdev.err_queue);
+			}
+		}
+
+		spin_unlock_bh(&idxd->dev_lock);
+		val |= IDXD_INTC_ERR;
+
+		for (i = 0; i < 4; i++)
+			dev_warn(dev, "err[%d]: %#16.16llx\n",
+				 i, idxd->sw_err.bits[i]);
+		err = true;
+	}
+
+	if (cause & IDXD_INTC_CMD) {
+		/* Driver does use command interrupts */
+		val |= IDXD_INTC_CMD;
+	}
+
+	if (cause & IDXD_INTC_OCCUPY) {
+		/* Driver does not utilize occupancy interrupt */
+		val |= IDXD_INTC_OCCUPY;
+	}
+
+	if (cause & IDXD_INTC_PERFMON_OVFL) {
+		/*
+		 * Driver does not utilize perfmon counter overflow interrupt
+		 * yet.
+		 */
+		val |= IDXD_INTC_PERFMON_OVFL;
+	}
+
+	val ^= cause;
+	if (val)
+		dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n",
+			      val);
+
+	iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET);
+	if (!err)
+		return IRQ_HANDLED;
+
+	gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
+	if (gensts.state == IDXD_DEVICE_STATE_HALT) {
+		spin_lock_bh(&idxd->dev_lock);
+		if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) {
+			rc = idxd_restart(idxd);
+			if (rc < 0)
+				dev_err(&idxd->pdev->dev,
+					"idxd restart failed, device halt.");
+		} else {
+			idxd_device_wqs_clear_state(idxd);
+			idxd->state = IDXD_DEV_HALTED;
+			dev_err(&idxd->pdev->dev,
+				"idxd halted, need %s.\n",
+				gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
+				"FLR" : "system reset");
+		}
+		spin_unlock_bh(&idxd->dev_lock);
+	}
+
+	idxd_unmask_msix_vector(idxd, irq_entry->id);
+	return IRQ_HANDLED;
+}
+
+static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
+				     int *processed)
+{
+	struct idxd_desc *desc, *t;
+	struct llist_node *head;
+	int queued = 0;
+
+	head = llist_del_all(&irq_entry->pending_llist);
+	if (!head)
+		return 0;
+
+	llist_for_each_entry_safe(desc, t, head, llnode) {
+		if (desc->completion->status) {
+			idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL);
+			idxd_free_desc(desc->wq, desc);
+			(*processed)++;
+		} else {
+			list_add_tail(&desc->list, &irq_entry->work_list);
+			queued++;
+		}
+	}
+
+	return queued;
+}
+
+static int irq_process_work_list(struct idxd_irq_entry *irq_entry,
+				 int *processed)
+{
+	struct list_head *node, *next;
+	int queued = 0;
+
+	if (list_empty(&irq_entry->work_list))
+		return 0;
+
+	list_for_each_safe(node, next, &irq_entry->work_list) {
+		struct idxd_desc *desc =
+			container_of(node, struct idxd_desc, list);
+
+		if (desc->completion->status) {
+			list_del(&desc->list);
+			/* process and callback */
+			idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL);
+			idxd_free_desc(desc->wq, desc);
+			(*processed)++;
+		} else {
+			queued++;
+		}
+	}
+
+	return queued;
+}
+
+irqreturn_t idxd_wq_thread(int irq, void *data)
+{
+	struct idxd_irq_entry *irq_entry = data;
+	int rc, processed = 0, retry = 0;
+
+	/*
+	 * There are two lists we are processing. The pending_llist is where
+	 * submmiter adds all the submitted descriptor after sending it to
+	 * the workqueue. It's a lockless singly linked list. The work_list
+	 * is the common linux double linked list. We are in a scenario of
+	 * multiple producers and a single consumer. The producers are all
+	 * the kernel submitters of descriptors, and the consumer is the
+	 * kernel irq handler thread for the msix vector when using threaded
+	 * irq. To work with the restrictions of llist to remain lockless,
+	 * we are doing the following steps:
+	 * 1. Iterate through the work_list and process any completed
+	 *    descriptor. Delete the completed entries during iteration.
+	 * 2. llist_del_all() from the pending list.
+	 * 3. Iterate through the llist that was deleted from the pending list
+	 *    and process the completed entries.
+	 * 4. If the entry is still waiting on hardware, list_add_tail() to
+	 *    the work_list.
+	 * 5. Repeat until no more descriptors.
+	 */
+	do {
+		rc = irq_process_work_list(irq_entry, &processed);
+		if (rc != 0) {
+			retry++;
+			continue;
+		}
+
+		rc = irq_process_pending_llist(irq_entry, &processed);
+	} while (rc != 0 && retry != 10);
+
+	idxd_unmask_msix_vector(irq_entry->idxd, irq_entry->id);
+
+	if (processed == 0)
+		return IRQ_NONE;
+
+	return IRQ_HANDLED;
+}
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
new file mode 100644
index 000000000000..a39e7ae6b3d9
--- /dev/null
+++ b/drivers/dma/idxd/registers.h
@@ -0,0 +1,336 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#ifndef _IDXD_REGISTERS_H_
+#define _IDXD_REGISTERS_H_
+
+/* PCI Config */
+#define PCI_DEVICE_ID_INTEL_DSA_SPR0	0x0b25
+
+#define IDXD_MMIO_BAR		0
+#define IDXD_WQ_BAR		2
+#define IDXD_PORTAL_SIZE	0x4000
+
+/* MMIO Device BAR0 Registers */
+#define IDXD_VER_OFFSET			0x00
+#define IDXD_VER_MAJOR_MASK		0xf0
+#define IDXD_VER_MINOR_MASK		0x0f
+#define GET_IDXD_VER_MAJOR(x)		(((x) & IDXD_VER_MAJOR_MASK) >> 4)
+#define GET_IDXD_VER_MINOR(x)		((x) & IDXD_VER_MINOR_MASK)
+
+union gen_cap_reg {
+	struct {
+		u64 block_on_fault:1;
+		u64 overlap_copy:1;
+		u64 cache_control_mem:1;
+		u64 cache_control_cache:1;
+		u64 rsvd:3;
+		u64 int_handle_req:1;
+		u64 dest_readback:1;
+		u64 drain_readback:1;
+		u64 rsvd2:6;
+		u64 max_xfer_shift:5;
+		u64 max_batch_shift:4;
+		u64 max_ims_mult:6;
+		u64 config_en:1;
+		u64 max_descs_per_engine:8;
+		u64 rsvd3:24;
+	};
+	u64 bits;
+} __packed;
+#define IDXD_GENCAP_OFFSET		0x10
+
+union wq_cap_reg {
+	struct {
+		u64 total_wq_size:16;
+		u64 num_wqs:8;
+		u64 rsvd:24;
+		u64 shared_mode:1;
+		u64 dedicated_mode:1;
+		u64 rsvd2:1;
+		u64 priority:1;
+		u64 occupancy:1;
+		u64 occupancy_int:1;
+		u64 rsvd3:10;
+	};
+	u64 bits;
+} __packed;
+#define IDXD_WQCAP_OFFSET		0x20
+
+union group_cap_reg {
+	struct {
+		u64 num_groups:8;
+		u64 total_tokens:8;
+		u64 token_en:1;
+		u64 token_limit:1;
+		u64 rsvd:46;
+	};
+	u64 bits;
+} __packed;
+#define IDXD_GRPCAP_OFFSET		0x30
+
+union engine_cap_reg {
+	struct {
+		u64 num_engines:8;
+		u64 rsvd:56;
+	};
+	u64 bits;
+} __packed;
+
+#define IDXD_ENGCAP_OFFSET		0x38
+
+#define IDXD_OPCAP_NOOP			0x0001
+#define IDXD_OPCAP_BATCH			0x0002
+#define IDXD_OPCAP_MEMMOVE		0x0008
+struct opcap {
+	u64 bits[4];
+};
+
+#define IDXD_OPCAP_OFFSET		0x40
+
+#define IDXD_TABLE_OFFSET		0x60
+union offsets_reg {
+	struct {
+		u64 grpcfg:16;
+		u64 wqcfg:16;
+		u64 msix_perm:16;
+		u64 ims:16;
+		u64 perfmon:16;
+		u64 rsvd:48;
+	};
+	u64 bits[2];
+} __packed;
+
+#define IDXD_GENCFG_OFFSET		0x80
+union gencfg_reg {
+	struct {
+		u32 token_limit:8;
+		u32 rsvd:4;
+		u32 user_int_en:1;
+		u32 rsvd2:19;
+	};
+	u32 bits;
+} __packed;
+
+#define IDXD_GENCTRL_OFFSET		0x88
+union genctrl_reg {
+	struct {
+		u32 softerr_int_en:1;
+		u32 rsvd:31;
+	};
+	u32 bits;
+} __packed;
+
+#define IDXD_GENSTATS_OFFSET		0x90
+union gensts_reg {
+	struct {
+		u32 state:2;
+		u32 reset_type:2;
+		u32 rsvd:28;
+	};
+	u32 bits;
+} __packed;
+
+enum idxd_device_status_state {
+	IDXD_DEVICE_STATE_DISABLED = 0,
+	IDXD_DEVICE_STATE_ENABLED,
+	IDXD_DEVICE_STATE_DRAIN,
+	IDXD_DEVICE_STATE_HALT,
+};
+
+enum idxd_device_reset_type {
+	IDXD_DEVICE_RESET_SOFTWARE = 0,
+	IDXD_DEVICE_RESET_FLR,
+	IDXD_DEVICE_RESET_WARM,
+	IDXD_DEVICE_RESET_COLD,
+};
+
+#define IDXD_INTCAUSE_OFFSET		0x98
+#define IDXD_INTC_ERR			0x01
+#define IDXD_INTC_CMD			0x02
+#define IDXD_INTC_OCCUPY			0x04
+#define IDXD_INTC_PERFMON_OVFL		0x08
+
+#define IDXD_CMD_OFFSET			0xa0
+union idxd_command_reg {
+	struct {
+		u32 operand:20;
+		u32 cmd:5;
+		u32 rsvd:6;
+		u32 int_req:1;
+	};
+	u32 bits;
+} __packed;
+
+enum idxd_cmd {
+	IDXD_CMD_ENABLE_DEVICE = 1,
+	IDXD_CMD_DISABLE_DEVICE,
+	IDXD_CMD_DRAIN_ALL,
+	IDXD_CMD_ABORT_ALL,
+	IDXD_CMD_RESET_DEVICE,
+	IDXD_CMD_ENABLE_WQ,
+	IDXD_CMD_DISABLE_WQ,
+	IDXD_CMD_DRAIN_WQ,
+	IDXD_CMD_ABORT_WQ,
+	IDXD_CMD_RESET_WQ,
+	IDXD_CMD_DRAIN_PASID,
+	IDXD_CMD_ABORT_PASID,
+	IDXD_CMD_REQUEST_INT_HANDLE,
+};
+
+#define IDXD_CMDSTS_OFFSET		0xa8
+union cmdsts_reg {
+	struct {
+		u8 err;
+		u16 result;
+		u8 rsvd:7;
+		u8 active:1;
+	};
+	u32 bits;
+} __packed;
+#define IDXD_CMDSTS_ACTIVE		0x80000000
+
+enum idxd_cmdsts_err {
+	IDXD_CMDSTS_SUCCESS = 0,
+	IDXD_CMDSTS_INVAL_CMD,
+	IDXD_CMDSTS_INVAL_WQIDX,
+	IDXD_CMDSTS_HW_ERR,
+	/* enable device errors */
+	IDXD_CMDSTS_ERR_DEV_ENABLED = 0x10,
+	IDXD_CMDSTS_ERR_CONFIG,
+	IDXD_CMDSTS_ERR_BUSMASTER_EN,
+	IDXD_CMDSTS_ERR_PASID_INVAL,
+	IDXD_CMDSTS_ERR_WQ_SIZE_ERANGE,
+	IDXD_CMDSTS_ERR_GRP_CONFIG,
+	IDXD_CMDSTS_ERR_GRP_CONFIG2,
+	IDXD_CMDSTS_ERR_GRP_CONFIG3,
+	IDXD_CMDSTS_ERR_GRP_CONFIG4,
+	/* enable wq errors */
+	IDXD_CMDSTS_ERR_DEV_NOTEN = 0x20,
+	IDXD_CMDSTS_ERR_WQ_ENABLED,
+	IDXD_CMDSTS_ERR_WQ_SIZE,
+	IDXD_CMDSTS_ERR_WQ_PRIOR,
+	IDXD_CMDSTS_ERR_WQ_MODE,
+	IDXD_CMDSTS_ERR_BOF_EN,
+	IDXD_CMDSTS_ERR_PASID_EN,
+	IDXD_CMDSTS_ERR_MAX_BATCH_SIZE,
+	IDXD_CMDSTS_ERR_MAX_XFER_SIZE,
+	/* disable device errors */
+	IDXD_CMDSTS_ERR_DIS_DEV_EN = 0x31,
+	/* disable WQ, drain WQ, abort WQ, reset WQ */
+	IDXD_CMDSTS_ERR_DEV_NOT_EN,
+	/* request interrupt handle */
+	IDXD_CMDSTS_ERR_INVAL_INT_IDX = 0x41,
+	IDXD_CMDSTS_ERR_NO_HANDLE,
+};
+
+#define IDXD_SWERR_OFFSET		0xc0
+#define IDXD_SWERR_VALID		0x00000001
+#define IDXD_SWERR_OVERFLOW		0x00000002
+#define IDXD_SWERR_ACK			(IDXD_SWERR_VALID | IDXD_SWERR_OVERFLOW)
+union sw_err_reg {
+	struct {
+		u64 valid:1;
+		u64 overflow:1;
+		u64 desc_valid:1;
+		u64 wq_idx_valid:1;
+		u64 batch:1;
+		u64 fault_rw:1;
+		u64 priv:1;
+		u64 rsvd:1;
+		u64 error:8;
+		u64 wq_idx:8;
+		u64 rsvd2:8;
+		u64 operation:8;
+		u64 pasid:20;
+		u64 rsvd3:4;
+
+		u64 batch_idx:16;
+		u64 rsvd4:16;
+		u64 invalid_flags:32;
+
+		u64 fault_addr;
+
+		u64 rsvd5;
+	};
+	u64 bits[4];
+} __packed;
+
+union msix_perm {
+	struct {
+		u32 rsvd:2;
+		u32 ignore:1;
+		u32 pasid_en:1;
+		u32 rsvd2:8;
+		u32 pasid:20;
+	};
+	u32 bits;
+} __packed;
+
+union group_flags {
+	struct {
+		u32 tc_a:3;
+		u32 tc_b:3;
+		u32 rsvd:1;
+		u32 use_token_limit:1;
+		u32 tokens_reserved:8;
+		u32 rsvd2:4;
+		u32 tokens_allowed:8;
+		u32 rsvd3:4;
+	};
+	u32 bits;
+} __packed;
+
+struct grpcfg {
+	u64 wqs[4];
+	u64 engines;
+	union group_flags flags;
+} __packed;
+
+union wqcfg {
+	struct {
+		/* bytes 0-3 */
+		u16 wq_size;
+		u16 rsvd;
+
+		/* bytes 4-7 */
+		u16 wq_thresh;
+		u16 rsvd1;
+
+		/* bytes 8-11 */
+		u32 mode:1;	/* shared or dedicated */
+		u32 bof:1;	/* block on fault */
+		u32 rsvd2:2;
+		u32 priority:4;
+		u32 pasid:20;
+		u32 pasid_en:1;
+		u32 priv:1;
+		u32 rsvd3:2;
+
+		/* bytes 12-15 */
+		u32 max_xfer_shift:5;
+		u32 max_batch_shift:4;
+		u32 rsvd4:23;
+
+		/* bytes 16-19 */
+		u16 occupancy_inth;
+		u16 occupancy_table_sel:1;
+		u16 rsvd5:15;
+
+		/* bytes 20-23 */
+		u16 occupancy_limit;
+		u16 occupancy_int_en:1;
+		u16 rsvd6:15;
+
+		/* bytes 24-27 */
+		u16 occupancy;
+		u16 occupancy_int:1;
+		u16 rsvd7:12;
+		u16 mode_support:1;
+		u16 wq_state:2;
+
+		/* bytes 28-31 */
+		u32 rsvd8;
+	};
+	u32 bits[8];
+} __packed;
+#endif
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
new file mode 100644
index 000000000000..45a0c5869a0a
--- /dev/null
+++ b/drivers/dma/idxd/submit.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <uapi/linux/idxd.h>
+#include "idxd.h"
+#include "registers.h"
+
+struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
+{
+	struct idxd_desc *desc;
+	int idx;
+	struct idxd_device *idxd = wq->idxd;
+
+	if (idxd->state != IDXD_DEV_ENABLED)
+		return ERR_PTR(-EIO);
+
+	if (optype == IDXD_OP_BLOCK)
+		percpu_down_read(&wq->submit_lock);
+	else if (!percpu_down_read_trylock(&wq->submit_lock))
+		return ERR_PTR(-EBUSY);
+
+	if (!atomic_add_unless(&wq->dq_count, 1, wq->size)) {
+		int rc;
+
+		if (optype == IDXD_OP_NONBLOCK) {
+			percpu_up_read(&wq->submit_lock);
+			return ERR_PTR(-EAGAIN);
+		}
+
+		percpu_up_read(&wq->submit_lock);
+		percpu_down_write(&wq->submit_lock);
+		rc = wait_event_interruptible(wq->submit_waitq,
+					      atomic_add_unless(&wq->dq_count,
+								1, wq->size) ||
+					       idxd->state != IDXD_DEV_ENABLED);
+		percpu_up_write(&wq->submit_lock);
+		if (rc < 0)
+			return ERR_PTR(-EINTR);
+		if (idxd->state != IDXD_DEV_ENABLED)
+			return ERR_PTR(-EIO);
+	} else {
+		percpu_up_read(&wq->submit_lock);
+	}
+
+	idx = sbitmap_get(&wq->sbmap, 0, false);
+	if (idx < 0) {
+		atomic_dec(&wq->dq_count);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	desc = wq->descs[idx];
+	memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
+	memset(desc->completion, 0, sizeof(struct dsa_completion_record));
+	return desc;
+}
+
+void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
+{
+	atomic_dec(&wq->dq_count);
+
+	sbitmap_clear_bit(&wq->sbmap, desc->id);
+	wake_up(&wq->submit_waitq);
+}
+
+int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
+{
+	struct idxd_device *idxd = wq->idxd;
+	int vec = desc->hw->int_handle;
+	void __iomem *portal;
+
+	if (idxd->state != IDXD_DEV_ENABLED)
+		return -EIO;
+
+	portal = wq->dportal + idxd_get_wq_portal_offset(IDXD_PORTAL_UNLIMITED);
+	/*
+	 * The wmb() flushes writes to coherent DMA data before possibly
+	 * triggering a DMA read. The wmb() is necessary even on UP because
+	 * the recipient is a device.
+	 */
+	wmb();
+	iosubmit_cmds512(portal, desc->hw, 1);
+
+	/*
+	 * Pending the descriptor to the lockless list for the irq_entry
+	 * that we designated the descriptor to.
+	 */
+	if (desc->hw->flags & IDXD_OP_FLAG_RCI)
+		llist_add(&desc->llnode,
+			  &idxd->irq_entries[vec].pending_llist);
+
+	return 0;
+}
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
new file mode 100644
index 000000000000..849c50ab939a
--- /dev/null
+++ b/drivers/dma/idxd/sysfs.c
@@ -0,0 +1,1528 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <uapi/linux/idxd.h>
+#include "registers.h"
+#include "idxd.h"
+
+static char *idxd_wq_type_names[] = {
+	[IDXD_WQT_NONE]		= "none",
+	[IDXD_WQT_KERNEL]	= "kernel",
+	[IDXD_WQT_USER]		= "user",
+};
+
+static void idxd_conf_device_release(struct device *dev)
+{
+	dev_dbg(dev, "%s for %s\n", __func__, dev_name(dev));
+}
+
+static struct device_type idxd_group_device_type = {
+	.name = "group",
+	.release = idxd_conf_device_release,
+};
+
+static struct device_type idxd_wq_device_type = {
+	.name = "wq",
+	.release = idxd_conf_device_release,
+};
+
+static struct device_type idxd_engine_device_type = {
+	.name = "engine",
+	.release = idxd_conf_device_release,
+};
+
+static struct device_type dsa_device_type = {
+	.name = "dsa",
+	.release = idxd_conf_device_release,
+};
+
+static inline bool is_dsa_dev(struct device *dev)
+{
+	return dev ? dev->type == &dsa_device_type : false;
+}
+
+static inline bool is_idxd_dev(struct device *dev)
+{
+	return is_dsa_dev(dev);
+}
+
+static inline bool is_idxd_wq_dev(struct device *dev)
+{
+	return dev ? dev->type == &idxd_wq_device_type : false;
+}
+
+static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
+{
+	if (wq->type == IDXD_WQT_KERNEL &&
+	    strcmp(wq->name, "dmaengine") == 0)
+		return true;
+	return false;
+}
+
+static inline bool is_idxd_wq_cdev(struct idxd_wq *wq)
+{
+	return wq->type == IDXD_WQT_USER ? true : false;
+}
+
+static int idxd_config_bus_match(struct device *dev,
+				 struct device_driver *drv)
+{
+	int matched = 0;
+
+	if (is_idxd_dev(dev)) {
+		struct idxd_device *idxd = confdev_to_idxd(dev);
+
+		if (idxd->state != IDXD_DEV_CONF_READY)
+			return 0;
+		matched = 1;
+	} else if (is_idxd_wq_dev(dev)) {
+		struct idxd_wq *wq = confdev_to_wq(dev);
+		struct idxd_device *idxd = wq->idxd;
+
+		if (idxd->state < IDXD_DEV_CONF_READY)
+			return 0;
+
+		if (wq->state != IDXD_WQ_DISABLED) {
+			dev_dbg(dev, "%s not disabled\n", dev_name(dev));
+			return 0;
+		}
+		matched = 1;
+	}
+
+	if (matched)
+		dev_dbg(dev, "%s matched\n", dev_name(dev));
+
+	return matched;
+}
+
+static int idxd_config_bus_probe(struct device *dev)
+{
+	int rc;
+	unsigned long flags;
+
+	dev_dbg(dev, "%s called\n", __func__);
+
+	if (is_idxd_dev(dev)) {
+		struct idxd_device *idxd = confdev_to_idxd(dev);
+
+		if (idxd->state != IDXD_DEV_CONF_READY) {
+			dev_warn(dev, "Device not ready for config\n");
+			return -EBUSY;
+		}
+
+		if (!try_module_get(THIS_MODULE))
+			return -ENXIO;
+
+		spin_lock_irqsave(&idxd->dev_lock, flags);
+
+		/* Perform IDXD configuration and enabling */
+		rc = idxd_device_config(idxd);
+		if (rc < 0) {
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			dev_warn(dev, "Device config failed: %d\n", rc);
+			return rc;
+		}
+
+		/* start device */
+		rc = idxd_device_enable(idxd);
+		if (rc < 0) {
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			dev_warn(dev, "Device enable failed: %d\n", rc);
+			return rc;
+		}
+
+		spin_unlock_irqrestore(&idxd->dev_lock, flags);
+		dev_info(dev, "Device %s enabled\n", dev_name(dev));
+
+		rc = idxd_register_dma_device(idxd);
+		if (rc < 0) {
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			dev_dbg(dev, "Failed to register dmaengine device\n");
+			return rc;
+		}
+		return 0;
+	} else if (is_idxd_wq_dev(dev)) {
+		struct idxd_wq *wq = confdev_to_wq(dev);
+		struct idxd_device *idxd = wq->idxd;
+
+		mutex_lock(&wq->wq_lock);
+
+		if (idxd->state != IDXD_DEV_ENABLED) {
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "Enabling while device not enabled.\n");
+			return -EPERM;
+		}
+
+		if (wq->state != IDXD_WQ_DISABLED) {
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "WQ %d already enabled.\n", wq->id);
+			return -EBUSY;
+		}
+
+		if (!wq->group) {
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "WQ not attached to group.\n");
+			return -EINVAL;
+		}
+
+		if (strlen(wq->name) == 0) {
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "WQ name not set.\n");
+			return -EINVAL;
+		}
+
+		rc = idxd_wq_alloc_resources(wq);
+		if (rc < 0) {
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "WQ resource alloc failed\n");
+			return rc;
+		}
+
+		spin_lock_irqsave(&idxd->dev_lock, flags);
+		rc = idxd_device_config(idxd);
+		if (rc < 0) {
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "Writing WQ %d config failed: %d\n",
+				 wq->id, rc);
+			return rc;
+		}
+
+		rc = idxd_wq_enable(wq);
+		if (rc < 0) {
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "WQ %d enabling failed: %d\n",
+				 wq->id, rc);
+			return rc;
+		}
+		spin_unlock_irqrestore(&idxd->dev_lock, flags);
+
+		rc = idxd_wq_map_portal(wq);
+		if (rc < 0) {
+			dev_warn(dev, "wq portal mapping failed: %d\n", rc);
+			rc = idxd_wq_disable(wq);
+			if (rc < 0)
+				dev_warn(dev, "IDXD wq disable failed\n");
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			mutex_unlock(&wq->wq_lock);
+			return rc;
+		}
+
+		wq->client_count = 0;
+
+		dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev));
+
+		if (is_idxd_wq_dmaengine(wq)) {
+			rc = idxd_register_dma_channel(wq);
+			if (rc < 0) {
+				dev_dbg(dev, "DMA channel register failed\n");
+				mutex_unlock(&wq->wq_lock);
+				return rc;
+			}
+		} else if (is_idxd_wq_cdev(wq)) {
+			rc = idxd_wq_add_cdev(wq);
+			if (rc < 0) {
+				dev_dbg(dev, "Cdev creation failed\n");
+				mutex_unlock(&wq->wq_lock);
+				return rc;
+			}
+		}
+
+		mutex_unlock(&wq->wq_lock);
+		return 0;
+	}
+
+	return -ENODEV;
+}
+
+static void disable_wq(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	unsigned long flags;
+	int rc;
+
+	mutex_lock(&wq->wq_lock);
+	dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(&wq->conf_dev));
+	if (wq->state == IDXD_WQ_DISABLED) {
+		mutex_unlock(&wq->wq_lock);
+		return;
+	}
+
+	if (is_idxd_wq_dmaengine(wq))
+		idxd_unregister_dma_channel(wq);
+	else if (is_idxd_wq_cdev(wq))
+		idxd_wq_del_cdev(wq);
+
+	if (idxd_wq_refcount(wq))
+		dev_warn(dev, "Clients has claim on wq %d: %d\n",
+			 wq->id, idxd_wq_refcount(wq));
+
+	idxd_wq_unmap_portal(wq);
+
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	rc = idxd_wq_disable(wq);
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+
+	idxd_wq_free_resources(wq);
+	wq->client_count = 0;
+	mutex_unlock(&wq->wq_lock);
+
+	if (rc < 0)
+		dev_warn(dev, "Failed to disable %s: %d\n",
+			 dev_name(&wq->conf_dev), rc);
+	else
+		dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev));
+}
+
+static int idxd_config_bus_remove(struct device *dev)
+{
+	int rc;
+	unsigned long flags;
+
+	dev_dbg(dev, "%s called for %s\n", __func__, dev_name(dev));
+
+	/* disable workqueue here */
+	if (is_idxd_wq_dev(dev)) {
+		struct idxd_wq *wq = confdev_to_wq(dev);
+
+		disable_wq(wq);
+	} else if (is_idxd_dev(dev)) {
+		struct idxd_device *idxd = confdev_to_idxd(dev);
+		int i;
+
+		dev_dbg(dev, "%s removing dev %s\n", __func__,
+			dev_name(&idxd->conf_dev));
+		for (i = 0; i < idxd->max_wqs; i++) {
+			struct idxd_wq *wq = &idxd->wqs[i];
+
+			if (wq->state == IDXD_WQ_DISABLED)
+				continue;
+			dev_warn(dev, "Active wq %d on disable %s.\n", i,
+				 dev_name(&idxd->conf_dev));
+			device_release_driver(&wq->conf_dev);
+		}
+
+		idxd_unregister_dma_device(idxd);
+		spin_lock_irqsave(&idxd->dev_lock, flags);
+		rc = idxd_device_disable(idxd);
+		spin_unlock_irqrestore(&idxd->dev_lock, flags);
+		module_put(THIS_MODULE);
+		if (rc < 0)
+			dev_warn(dev, "Device disable failed\n");
+		else
+			dev_info(dev, "Device %s disabled\n", dev_name(dev));
+
+	}
+
+	return 0;
+}
+
+static void idxd_config_bus_shutdown(struct device *dev)
+{
+	dev_dbg(dev, "%s called\n", __func__);
+}
+
+struct bus_type dsa_bus_type = {
+	.name = "dsa",
+	.match = idxd_config_bus_match,
+	.probe = idxd_config_bus_probe,
+	.remove = idxd_config_bus_remove,
+	.shutdown = idxd_config_bus_shutdown,
+};
+
+static struct bus_type *idxd_bus_types[] = {
+	&dsa_bus_type
+};
+
+static struct idxd_device_driver dsa_drv = {
+	.drv = {
+		.name = "dsa",
+		.bus = &dsa_bus_type,
+		.owner = THIS_MODULE,
+		.mod_name = KBUILD_MODNAME,
+	},
+};
+
+static struct idxd_device_driver *idxd_drvs[] = {
+	&dsa_drv
+};
+
+struct bus_type *idxd_get_bus_type(struct idxd_device *idxd)
+{
+	return idxd_bus_types[idxd->type];
+}
+
+static struct device_type *idxd_get_device_type(struct idxd_device *idxd)
+{
+	if (idxd->type == IDXD_TYPE_DSA)
+		return &dsa_device_type;
+	else
+		return NULL;
+}
+
+/* IDXD generic driver setup */
+int idxd_register_driver(void)
+{
+	int i, rc;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++) {
+		rc = driver_register(&idxd_drvs[i]->drv);
+		if (rc < 0)
+			goto drv_fail;
+	}
+
+	return 0;
+
+drv_fail:
+	for (; i > 0; i--)
+		driver_unregister(&idxd_drvs[i]->drv);
+	return rc;
+}
+
+void idxd_unregister_driver(void)
+{
+	int i;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++)
+		driver_unregister(&idxd_drvs[i]->drv);
+}
+
+/* IDXD engine attributes */
+static ssize_t engine_group_id_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct idxd_engine *engine =
+		container_of(dev, struct idxd_engine, conf_dev);
+
+	if (engine->group)
+		return sprintf(buf, "%d\n", engine->group->id);
+	else
+		return sprintf(buf, "%d\n", -1);
+}
+
+static ssize_t engine_group_id_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct idxd_engine *engine =
+		container_of(dev, struct idxd_engine, conf_dev);
+	struct idxd_device *idxd = engine->idxd;
+	long id;
+	int rc;
+	struct idxd_group *prevg, *group;
+
+	rc = kstrtol(buf, 10, &id);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (id > idxd->max_groups - 1 || id < -1)
+		return -EINVAL;
+
+	if (id == -1) {
+		if (engine->group) {
+			engine->group->num_engines--;
+			engine->group = NULL;
+		}
+		return count;
+	}
+
+	group = &idxd->groups[id];
+	prevg = engine->group;
+
+	if (prevg)
+		prevg->num_engines--;
+	engine->group = &idxd->groups[id];
+	engine->group->num_engines++;
+
+	return count;
+}
+
+static struct device_attribute dev_attr_engine_group =
+		__ATTR(group_id, 0644, engine_group_id_show,
+		       engine_group_id_store);
+
+static struct attribute *idxd_engine_attributes[] = {
+	&dev_attr_engine_group.attr,
+	NULL,
+};
+
+static const struct attribute_group idxd_engine_attribute_group = {
+	.attrs = idxd_engine_attributes,
+};
+
+static const struct attribute_group *idxd_engine_attribute_groups[] = {
+	&idxd_engine_attribute_group,
+	NULL,
+};
+
+/* Group attributes */
+
+static void idxd_set_free_tokens(struct idxd_device *idxd)
+{
+	int i, tokens;
+
+	for (i = 0, tokens = 0; i < idxd->max_groups; i++) {
+		struct idxd_group *g = &idxd->groups[i];
+
+		tokens += g->tokens_reserved;
+	}
+
+	idxd->nr_tokens = idxd->max_tokens - tokens;
+}
+
+static ssize_t group_tokens_reserved_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+
+	return sprintf(buf, "%u\n", group->tokens_reserved);
+}
+
+static ssize_t group_tokens_reserved_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_device *idxd = group->idxd;
+	unsigned long val;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (idxd->token_limit == 0)
+		return -EPERM;
+
+	if (val > idxd->max_tokens)
+		return -EINVAL;
+
+	if (val > idxd->nr_tokens)
+		return -EINVAL;
+
+	group->tokens_reserved = val;
+	idxd_set_free_tokens(idxd);
+	return count;
+}
+
+static struct device_attribute dev_attr_group_tokens_reserved =
+		__ATTR(tokens_reserved, 0644, group_tokens_reserved_show,
+		       group_tokens_reserved_store);
+
+static ssize_t group_tokens_allowed_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+
+	return sprintf(buf, "%u\n", group->tokens_allowed);
+}
+
+static ssize_t group_tokens_allowed_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_device *idxd = group->idxd;
+	unsigned long val;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (idxd->token_limit == 0)
+		return -EPERM;
+	if (val < 4 * group->num_engines ||
+	    val > group->tokens_reserved + idxd->nr_tokens)
+		return -EINVAL;
+
+	group->tokens_allowed = val;
+	return count;
+}
+
+static struct device_attribute dev_attr_group_tokens_allowed =
+		__ATTR(tokens_allowed, 0644, group_tokens_allowed_show,
+		       group_tokens_allowed_store);
+
+static ssize_t group_use_token_limit_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+
+	return sprintf(buf, "%u\n", group->use_token_limit);
+}
+
+static ssize_t group_use_token_limit_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_device *idxd = group->idxd;
+	unsigned long val;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (idxd->token_limit == 0)
+		return -EPERM;
+
+	group->use_token_limit = !!val;
+	return count;
+}
+
+static struct device_attribute dev_attr_group_use_token_limit =
+		__ATTR(use_token_limit, 0644, group_use_token_limit_show,
+		       group_use_token_limit_store);
+
+static ssize_t group_engines_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	int i, rc = 0;
+	char *tmp = buf;
+	struct idxd_device *idxd = group->idxd;
+
+	for (i = 0; i < idxd->max_engines; i++) {
+		struct idxd_engine *engine = &idxd->engines[i];
+
+		if (!engine->group)
+			continue;
+
+		if (engine->group->id == group->id)
+			rc += sprintf(tmp + rc, "engine%d.%d ",
+					idxd->id, engine->id);
+	}
+
+	rc--;
+	rc += sprintf(tmp + rc, "\n");
+
+	return rc;
+}
+
+static struct device_attribute dev_attr_group_engines =
+		__ATTR(engines, 0444, group_engines_show, NULL);
+
+static ssize_t group_work_queues_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	int i, rc = 0;
+	char *tmp = buf;
+	struct idxd_device *idxd = group->idxd;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		if (!wq->group)
+			continue;
+
+		if (wq->group->id == group->id)
+			rc += sprintf(tmp + rc, "wq%d.%d ",
+					idxd->id, wq->id);
+	}
+
+	rc--;
+	rc += sprintf(tmp + rc, "\n");
+
+	return rc;
+}
+
+static struct device_attribute dev_attr_group_work_queues =
+		__ATTR(work_queues, 0444, group_work_queues_show, NULL);
+
+static ssize_t group_traffic_class_a_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+
+	return sprintf(buf, "%d\n", group->tc_a);
+}
+
+static ssize_t group_traffic_class_a_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_device *idxd = group->idxd;
+	long val;
+	int rc;
+
+	rc = kstrtol(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (val < 0 || val > 7)
+		return -EINVAL;
+
+	group->tc_a = val;
+	return count;
+}
+
+static struct device_attribute dev_attr_group_traffic_class_a =
+		__ATTR(traffic_class_a, 0644, group_traffic_class_a_show,
+		       group_traffic_class_a_store);
+
+static ssize_t group_traffic_class_b_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+
+	return sprintf(buf, "%d\n", group->tc_b);
+}
+
+static ssize_t group_traffic_class_b_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_device *idxd = group->idxd;
+	long val;
+	int rc;
+
+	rc = kstrtol(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (val < 0 || val > 7)
+		return -EINVAL;
+
+	group->tc_b = val;
+	return count;
+}
+
+static struct device_attribute dev_attr_group_traffic_class_b =
+		__ATTR(traffic_class_b, 0644, group_traffic_class_b_show,
+		       group_traffic_class_b_store);
+
+static struct attribute *idxd_group_attributes[] = {
+	&dev_attr_group_work_queues.attr,
+	&dev_attr_group_engines.attr,
+	&dev_attr_group_use_token_limit.attr,
+	&dev_attr_group_tokens_allowed.attr,
+	&dev_attr_group_tokens_reserved.attr,
+	&dev_attr_group_traffic_class_a.attr,
+	&dev_attr_group_traffic_class_b.attr,
+	NULL,
+};
+
+static const struct attribute_group idxd_group_attribute_group = {
+	.attrs = idxd_group_attributes,
+};
+
+static const struct attribute_group *idxd_group_attribute_groups[] = {
+	&idxd_group_attribute_group,
+	NULL,
+};
+
+/* IDXD work queue attribs */
+static ssize_t wq_clients_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%d\n", wq->client_count);
+}
+
+static struct device_attribute dev_attr_wq_clients =
+		__ATTR(clients, 0444, wq_clients_show, NULL);
+
+static ssize_t wq_state_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	switch (wq->state) {
+	case IDXD_WQ_DISABLED:
+		return sprintf(buf, "disabled\n");
+	case IDXD_WQ_ENABLED:
+		return sprintf(buf, "enabled\n");
+	}
+
+	return sprintf(buf, "unknown\n");
+}
+
+static struct device_attribute dev_attr_wq_state =
+		__ATTR(state, 0444, wq_state_show, NULL);
+
+static ssize_t wq_group_id_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	if (wq->group)
+		return sprintf(buf, "%u\n", wq->group->id);
+	else
+		return sprintf(buf, "-1\n");
+}
+
+static ssize_t wq_group_id_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_device *idxd = wq->idxd;
+	long id;
+	int rc;
+	struct idxd_group *prevg, *group;
+
+	rc = kstrtol(buf, 10, &id);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	if (id > idxd->max_groups - 1 || id < -1)
+		return -EINVAL;
+
+	if (id == -1) {
+		if (wq->group) {
+			wq->group->num_wqs--;
+			wq->group = NULL;
+		}
+		return count;
+	}
+
+	group = &idxd->groups[id];
+	prevg = wq->group;
+
+	if (prevg)
+		prevg->num_wqs--;
+	wq->group = group;
+	group->num_wqs++;
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_group_id =
+		__ATTR(group_id, 0644, wq_group_id_show, wq_group_id_store);
+
+static ssize_t wq_mode_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%s\n",
+			wq_dedicated(wq) ? "dedicated" : "shared");
+}
+
+static ssize_t wq_mode_store(struct device *dev,
+			     struct device_attribute *attr, const char *buf,
+			     size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_device *idxd = wq->idxd;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	if (sysfs_streq(buf, "dedicated")) {
+		set_bit(WQ_FLAG_DEDICATED, &wq->flags);
+		wq->threshold = 0;
+	} else {
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_mode =
+		__ATTR(mode, 0644, wq_mode_show, wq_mode_store);
+
+static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%u\n", wq->size);
+}
+
+static ssize_t wq_size_store(struct device *dev,
+			     struct device_attribute *attr, const char *buf,
+			     size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	unsigned long size;
+	struct idxd_device *idxd = wq->idxd;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &size);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	if (size > idxd->max_wq_size)
+		return -EINVAL;
+
+	wq->size = size;
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_size =
+		__ATTR(size, 0644, wq_size_show, wq_size_store);
+
+static ssize_t wq_priority_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%u\n", wq->priority);
+}
+
+static ssize_t wq_priority_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	unsigned long prio;
+	struct idxd_device *idxd = wq->idxd;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &prio);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	if (prio > IDXD_MAX_PRIORITY)
+		return -EINVAL;
+
+	wq->priority = prio;
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_priority =
+		__ATTR(priority, 0644, wq_priority_show, wq_priority_store);
+
+static ssize_t wq_type_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	switch (wq->type) {
+	case IDXD_WQT_KERNEL:
+		return sprintf(buf, "%s\n",
+			       idxd_wq_type_names[IDXD_WQT_KERNEL]);
+	case IDXD_WQT_USER:
+		return sprintf(buf, "%s\n",
+			       idxd_wq_type_names[IDXD_WQT_USER]);
+	case IDXD_WQT_NONE:
+	default:
+		return sprintf(buf, "%s\n",
+			       idxd_wq_type_names[IDXD_WQT_NONE]);
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t wq_type_store(struct device *dev,
+			     struct device_attribute *attr, const char *buf,
+			     size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	enum idxd_wq_type old_type;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	old_type = wq->type;
+	if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL]))
+		wq->type = IDXD_WQT_KERNEL;
+	else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_USER]))
+		wq->type = IDXD_WQT_USER;
+	else
+		wq->type = IDXD_WQT_NONE;
+
+	/* If we are changing queue type, clear the name */
+	if (wq->type != old_type)
+		memset(wq->name, 0, WQ_NAME_SIZE + 1);
+
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_type =
+		__ATTR(type, 0644, wq_type_show, wq_type_store);
+
+static ssize_t wq_name_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%s\n", wq->name);
+}
+
+static ssize_t wq_name_store(struct device *dev,
+			     struct device_attribute *attr, const char *buf,
+			     size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	if (strlen(buf) > WQ_NAME_SIZE || strlen(buf) == 0)
+		return -EINVAL;
+
+	memset(wq->name, 0, WQ_NAME_SIZE + 1);
+	strncpy(wq->name, buf, WQ_NAME_SIZE);
+	strreplace(wq->name, '\n', '\0');
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_name =
+		__ATTR(name, 0644, wq_name_show, wq_name_store);
+
+static ssize_t wq_cdev_minor_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%d\n", wq->idxd_cdev.minor);
+}
+
+static struct device_attribute dev_attr_wq_cdev_minor =
+		__ATTR(cdev_minor, 0444, wq_cdev_minor_show, NULL);
+
+static struct attribute *idxd_wq_attributes[] = {
+	&dev_attr_wq_clients.attr,
+	&dev_attr_wq_state.attr,
+	&dev_attr_wq_group_id.attr,
+	&dev_attr_wq_mode.attr,
+	&dev_attr_wq_size.attr,
+	&dev_attr_wq_priority.attr,
+	&dev_attr_wq_type.attr,
+	&dev_attr_wq_name.attr,
+	&dev_attr_wq_cdev_minor.attr,
+	NULL,
+};
+
+static const struct attribute_group idxd_wq_attribute_group = {
+	.attrs = idxd_wq_attributes,
+};
+
+static const struct attribute_group *idxd_wq_attribute_groups[] = {
+	&idxd_wq_attribute_group,
+	NULL,
+};
+
+/* IDXD device attribs */
+static ssize_t max_work_queues_size_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_wq_size);
+}
+static DEVICE_ATTR_RO(max_work_queues_size);
+
+static ssize_t max_groups_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_groups);
+}
+static DEVICE_ATTR_RO(max_groups);
+
+static ssize_t max_work_queues_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_wqs);
+}
+static DEVICE_ATTR_RO(max_work_queues);
+
+static ssize_t max_engines_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_engines);
+}
+static DEVICE_ATTR_RO(max_engines);
+
+static ssize_t numa_node_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%d\n", dev_to_node(&idxd->pdev->dev));
+}
+static DEVICE_ATTR_RO(numa_node);
+
+static ssize_t max_batch_size_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_batch_size);
+}
+static DEVICE_ATTR_RO(max_batch_size);
+
+static ssize_t max_transfer_size_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%llu\n", idxd->max_xfer_bytes);
+}
+static DEVICE_ATTR_RO(max_transfer_size);
+
+static ssize_t op_cap_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%#llx\n", idxd->hw.opcap.bits[0]);
+}
+static DEVICE_ATTR_RO(op_cap);
+
+static ssize_t configurable_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n",
+			test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags));
+}
+static DEVICE_ATTR_RO(configurable);
+
+static ssize_t clients_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+	unsigned long flags;
+	int count = 0, i;
+
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		count += wq->client_count;
+	}
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+
+	return sprintf(buf, "%d\n", count);
+}
+static DEVICE_ATTR_RO(clients);
+
+static ssize_t state_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	switch (idxd->state) {
+	case IDXD_DEV_DISABLED:
+	case IDXD_DEV_CONF_READY:
+		return sprintf(buf, "disabled\n");
+	case IDXD_DEV_ENABLED:
+		return sprintf(buf, "enabled\n");
+	case IDXD_DEV_HALTED:
+		return sprintf(buf, "halted\n");
+	}
+
+	return sprintf(buf, "unknown\n");
+}
+static DEVICE_ATTR_RO(state);
+
+static ssize_t errors_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+	int i, out = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	for (i = 0; i < 4; i++)
+		out += sprintf(buf + out, "%#018llx ", idxd->sw_err.bits[i]);
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	out--;
+	out += sprintf(buf + out, "\n");
+	return out;
+}
+static DEVICE_ATTR_RO(errors);
+
+static ssize_t max_tokens_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_tokens);
+}
+static DEVICE_ATTR_RO(max_tokens);
+
+static ssize_t token_limit_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->token_limit);
+}
+
+static ssize_t token_limit_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+	unsigned long val;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (!idxd->hw.group_cap.token_limit)
+		return -EPERM;
+
+	if (val > idxd->hw.group_cap.total_tokens)
+		return -EINVAL;
+
+	idxd->token_limit = val;
+	return count;
+}
+static DEVICE_ATTR_RW(token_limit);
+
+static ssize_t cdev_major_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->major);
+}
+static DEVICE_ATTR_RO(cdev_major);
+
+static struct attribute *idxd_device_attributes[] = {
+	&dev_attr_max_groups.attr,
+	&dev_attr_max_work_queues.attr,
+	&dev_attr_max_work_queues_size.attr,
+	&dev_attr_max_engines.attr,
+	&dev_attr_numa_node.attr,
+	&dev_attr_max_batch_size.attr,
+	&dev_attr_max_transfer_size.attr,
+	&dev_attr_op_cap.attr,
+	&dev_attr_configurable.attr,
+	&dev_attr_clients.attr,
+	&dev_attr_state.attr,
+	&dev_attr_errors.attr,
+	&dev_attr_max_tokens.attr,
+	&dev_attr_token_limit.attr,
+	&dev_attr_cdev_major.attr,
+	NULL,
+};
+
+static const struct attribute_group idxd_device_attribute_group = {
+	.attrs = idxd_device_attributes,
+};
+
+static const struct attribute_group *idxd_attribute_groups[] = {
+	&idxd_device_attribute_group,
+	NULL,
+};
+
+static int idxd_setup_engine_sysfs(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int i, rc;
+
+	for (i = 0; i < idxd->max_engines; i++) {
+		struct idxd_engine *engine = &idxd->engines[i];
+
+		engine->conf_dev.parent = &idxd->conf_dev;
+		dev_set_name(&engine->conf_dev, "engine%d.%d",
+			     idxd->id, engine->id);
+		engine->conf_dev.bus = idxd_get_bus_type(idxd);
+		engine->conf_dev.groups = idxd_engine_attribute_groups;
+		engine->conf_dev.type = &idxd_engine_device_type;
+		dev_dbg(dev, "Engine device register: %s\n",
+			dev_name(&engine->conf_dev));
+		rc = device_register(&engine->conf_dev);
+		if (rc < 0) {
+			put_device(&engine->conf_dev);
+			goto cleanup;
+		}
+	}
+
+	return 0;
+
+cleanup:
+	while (i--) {
+		struct idxd_engine *engine = &idxd->engines[i];
+
+		device_unregister(&engine->conf_dev);
+	}
+	return rc;
+}
+
+static int idxd_setup_group_sysfs(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int i, rc;
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		struct idxd_group *group = &idxd->groups[i];
+
+		group->conf_dev.parent = &idxd->conf_dev;
+		dev_set_name(&group->conf_dev, "group%d.%d",
+			     idxd->id, group->id);
+		group->conf_dev.bus = idxd_get_bus_type(idxd);
+		group->conf_dev.groups = idxd_group_attribute_groups;
+		group->conf_dev.type = &idxd_group_device_type;
+		dev_dbg(dev, "Group device register: %s\n",
+			dev_name(&group->conf_dev));
+		rc = device_register(&group->conf_dev);
+		if (rc < 0) {
+			put_device(&group->conf_dev);
+			goto cleanup;
+		}
+	}
+
+	return 0;
+
+cleanup:
+	while (i--) {
+		struct idxd_group *group = &idxd->groups[i];
+
+		device_unregister(&group->conf_dev);
+	}
+	return rc;
+}
+
+static int idxd_setup_wq_sysfs(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int i, rc;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		wq->conf_dev.parent = &idxd->conf_dev;
+		dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id);
+		wq->conf_dev.bus = idxd_get_bus_type(idxd);
+		wq->conf_dev.groups = idxd_wq_attribute_groups;
+		wq->conf_dev.type = &idxd_wq_device_type;
+		dev_dbg(dev, "WQ device register: %s\n",
+			dev_name(&wq->conf_dev));
+		rc = device_register(&wq->conf_dev);
+		if (rc < 0) {
+			put_device(&wq->conf_dev);
+			goto cleanup;
+		}
+	}
+
+	return 0;
+
+cleanup:
+	while (i--) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		device_unregister(&wq->conf_dev);
+	}
+	return rc;
+}
+
+static int idxd_setup_device_sysfs(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int rc;
+	char devname[IDXD_NAME_SIZE];
+
+	sprintf(devname, "%s%d", idxd_get_dev_name(idxd), idxd->id);
+	idxd->conf_dev.parent = dev;
+	dev_set_name(&idxd->conf_dev, "%s", devname);
+	idxd->conf_dev.bus = idxd_get_bus_type(idxd);
+	idxd->conf_dev.groups = idxd_attribute_groups;
+	idxd->conf_dev.type = idxd_get_device_type(idxd);
+
+	dev_dbg(dev, "IDXD device register: %s\n", dev_name(&idxd->conf_dev));
+	rc = device_register(&idxd->conf_dev);
+	if (rc < 0) {
+		put_device(&idxd->conf_dev);
+		return rc;
+	}
+
+	return 0;
+}
+
+int idxd_setup_sysfs(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int rc;
+
+	rc = idxd_setup_device_sysfs(idxd);
+	if (rc < 0) {
+		dev_dbg(dev, "Device sysfs registering failed: %d\n", rc);
+		return rc;
+	}
+
+	rc = idxd_setup_wq_sysfs(idxd);
+	if (rc < 0) {
+		/* unregister conf dev */
+		dev_dbg(dev, "Work Queue sysfs registering failed: %d\n", rc);
+		return rc;
+	}
+
+	rc = idxd_setup_group_sysfs(idxd);
+	if (rc < 0) {
+		/* unregister conf dev */
+		dev_dbg(dev, "Group sysfs registering failed: %d\n", rc);
+		return rc;
+	}
+
+	rc = idxd_setup_engine_sysfs(idxd);
+	if (rc < 0) {
+		/* unregister conf dev */
+		dev_dbg(dev, "Engine sysfs registering failed: %d\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+void idxd_cleanup_sysfs(struct idxd_device *idxd)
+{
+	int i;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		device_unregister(&wq->conf_dev);
+	}
+
+	for (i = 0; i < idxd->max_engines; i++) {
+		struct idxd_engine *engine = &idxd->engines[i];
+
+		device_unregister(&engine->conf_dev);
+	}
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		struct idxd_group *group = &idxd->groups[i];
+
+		device_unregister(&group->conf_dev);
+	}
+
+	device_unregister(&idxd->conf_dev);
+}
+
+int idxd_register_bus_type(void)
+{
+	int i, rc;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++) {
+		rc = bus_register(idxd_bus_types[i]);
+		if (rc < 0)
+			goto bus_err;
+	}
+
+	return 0;
+
+bus_err:
+	for (; i > 0; i--)
+		bus_unregister(idxd_bus_types[i]);
+	return rc;
+}
+
+void idxd_unregister_bus_type(void)
+{
+	int i;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++)
+		bus_unregister(idxd_bus_types[i]);
+}
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index c27e206a764c..066b21a32232 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -760,12 +760,8 @@ static void sdma_start_desc(struct sdma_channel *sdmac)
 		return;
 	}
 	sdmac->desc = desc = to_sdma_desc(&vd->tx);
-	/*
-	 * Do not delete the node in desc_issued list in cyclic mode, otherwise
-	 * the desc allocated will never be freed in vchan_dma_desc_free_list
-	 */
-	if (!(sdmac->flags & IMX_DMA_SG_LOOP))
-		list_del(&vd->node);
+
+	list_del(&vd->node);
 
 	sdma->channel_control[channel].base_bd_ptr = desc->bd_phys;
 	sdma->channel_control[channel].current_bd_ptr = desc->bd_phys;
@@ -1071,20 +1067,27 @@ static void sdma_channel_terminate_work(struct work_struct *work)
 
 	spin_lock_irqsave(&sdmac->vc.lock, flags);
 	vchan_get_all_descriptors(&sdmac->vc, &head);
-	sdmac->desc = NULL;
 	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
 	vchan_dma_desc_free_list(&sdmac->vc, &head);
 	sdmac->context_loaded = false;
 }
 
-static int sdma_disable_channel_async(struct dma_chan *chan)
+static int sdma_terminate_all(struct dma_chan *chan)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sdmac->vc.lock, flags);
 
 	sdma_disable_channel(chan);
 
-	if (sdmac->desc)
+	if (sdmac->desc) {
+		vchan_terminate_vdesc(&sdmac->desc->vd);
+		sdmac->desc = NULL;
 		schedule_work(&sdmac->terminate_worker);
+	}
+
+	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
 
 	return 0;
 }
@@ -1324,7 +1327,7 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
 
-	sdma_disable_channel_async(chan);
+	sdma_terminate_all(chan);
 
 	sdma_channel_synchronize(chan);
 
@@ -1648,7 +1651,7 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan,
 				      struct dma_tx_state *txstate)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
-	struct sdma_desc *desc;
+	struct sdma_desc *desc = NULL;
 	u32 residue;
 	struct virt_dma_desc *vd;
 	enum dma_status ret;
@@ -1659,19 +1662,23 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan,
 		return ret;
 
 	spin_lock_irqsave(&sdmac->vc.lock, flags);
+
 	vd = vchan_find_desc(&sdmac->vc, cookie);
-	if (vd) {
+	if (vd)
 		desc = to_sdma_desc(&vd->tx);
+	else if (sdmac->desc && sdmac->desc->vd.tx.cookie == cookie)
+		desc = sdmac->desc;
+
+	if (desc) {
 		if (sdmac->flags & IMX_DMA_SG_LOOP)
 			residue = (desc->num_bd - desc->buf_ptail) *
 				desc->period_len - desc->chn_real_count;
 		else
 			residue = desc->chn_count - desc->chn_real_count;
-	} else if (sdmac->desc && sdmac->desc->vd.tx.cookie == cookie) {
-		residue = sdmac->desc->chn_count - sdmac->desc->chn_real_count;
 	} else {
 		residue = 0;
 	}
+
 	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
 
 	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
@@ -2103,7 +2110,7 @@ static int sdma_probe(struct platform_device *pdev)
 	sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg;
 	sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic;
 	sdma->dma_device.device_config = sdma_config;
-	sdma->dma_device.device_terminate_all = sdma_disable_channel_async;
+	sdma->dma_device.device_terminate_all = sdma_terminate_all;
 	sdma->dma_device.device_synchronize = sdma_channel_synchronize;
 	sdma->dma_device.src_addr_widths = SDMA_DMA_BUSWIDTHS;
 	sdma->dma_device.dst_addr_widths = SDMA_DMA_BUSWIDTHS;
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 1a422a8b43cf..18c011e57592 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -377,10 +377,11 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
 
 		descs->virt = dma_alloc_coherent(to_dev(ioat_chan),
 						 SZ_2M, &descs->hw, flags);
-		if (!descs->virt && (i > 0)) {
+		if (!descs->virt) {
 			int idx;
 
 			for (idx = 0; idx < i; idx++) {
+				descs = &ioat_chan->descs[idx];
 				dma_free_coherent(to_dev(ioat_chan), SZ_2M,
 						  descs->virt, descs->hw);
 				descs->virt = NULL;
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index a6a6dc432db8..60e9afbb896c 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -556,10 +556,6 @@ static void ioat_dma_remove(struct ioatdma_device *ioat_dma)
 	ioat_kobject_del(ioat_dma);
 
 	dma_async_device_unregister(dma);
-
-	dma_pool_destroy(ioat_dma->completion_pool);
-
-	INIT_LIST_HEAD(&dma->channels);
 }
 
 /**
@@ -589,7 +585,7 @@ static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma)
 	dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
 
 	for (i = 0; i < dma->chancnt; i++) {
-		ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL);
+		ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
 		if (!ioat_chan)
 			break;
 
@@ -624,12 +620,16 @@ static void ioat_free_chan_resources(struct dma_chan *c)
 		return;
 
 	ioat_stop(ioat_chan);
-	ioat_reset_hw(ioat_chan);
 
-	/* Put LTR to idle */
-	if (ioat_dma->version >= IOAT_VER_3_4)
-		writeb(IOAT_CHAN_LTR_SWSEL_IDLE,
-			ioat_chan->reg_base + IOAT_CHAN_LTR_SWSEL_OFFSET);
+	if (!test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) {
+		ioat_reset_hw(ioat_chan);
+
+		/* Put LTR to idle */
+		if (ioat_dma->version >= IOAT_VER_3_4)
+			writeb(IOAT_CHAN_LTR_SWSEL_IDLE,
+			       ioat_chan->reg_base +
+			       IOAT_CHAN_LTR_SWSEL_OFFSET);
+	}
 
 	spin_lock_bh(&ioat_chan->cleanup_lock);
 	spin_lock_bh(&ioat_chan->prep_lock);
@@ -1322,16 +1322,28 @@ static struct pci_driver ioat_pci_driver = {
 	.err_handler	= &ioat_err_handler,
 };
 
+static void release_ioatdma(struct dma_device *device)
+{
+	struct ioatdma_device *d = to_ioatdma_device(device);
+	int i;
+
+	for (i = 0; i < IOAT_MAX_CHANS; i++)
+		kfree(d->idx[i]);
+
+	dma_pool_destroy(d->completion_pool);
+	kfree(d);
+}
+
 static struct ioatdma_device *
 alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
 {
-	struct device *dev = &pdev->dev;
-	struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+	struct ioatdma_device *d = kzalloc(sizeof(*d), GFP_KERNEL);
 
 	if (!d)
 		return NULL;
 	d->pdev = pdev;
 	d->reg_base = iobase;
+	d->dma_dev.device_release = release_ioatdma;
 	return d;
 }
 
@@ -1400,6 +1412,8 @@ static void ioat_remove(struct pci_dev *pdev)
 	if (!device)
 		return;
 
+	ioat_shutdown(pdev);
+
 	dev_err(&pdev->dev, "Removing dma and dca services\n");
 	if (device->dca) {
 		unregister_dca_provider(device->dca, &pdev->dev);
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index a3f942a6a946..db0e274126fb 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -173,7 +173,7 @@ static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
 					&iop_chan->chain, chain_node) {
 					zero_sum_result |=
 					    iop_desc_get_zero_result(grp_iter);
-					    pr_debug("\titer%d result: %d\n",
+					pr_debug("\titer%d result: %d\n",
 					    grp_iter->idx, zero_sum_result);
 					slot_cnt -= slots_per_op;
 					if (slot_cnt == 0)
@@ -1359,9 +1359,11 @@ static int iop_adma_probe(struct platform_device *pdev)
 	iop_adma_device_clear_err_status(iop_chan);
 
 	for (i = 0; i < 3; i++) {
-		irq_handler_t handler[] = { iop_adma_eot_handler,
-					iop_adma_eoc_handler,
-					iop_adma_err_handler };
+		static const irq_handler_t handler[] = {
+			iop_adma_eot_handler,
+			iop_adma_eoc_handler,
+			iop_adma_err_handler
+		};
 		int irq = platform_get_irq(pdev, i);
 		if (irq < 0) {
 			ret = -ENXIO;
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index 4b36c8810517..c5c1aa0dcaed 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -229,9 +229,11 @@ static irqreturn_t k3_dma_int_handler(int irq, void *dev_id)
 			c = p->vchan;
 			if (c && (tc1 & BIT(i))) {
 				spin_lock_irqsave(&c->vc.lock, flags);
-				vchan_cookie_complete(&p->ds_run->vd);
-				p->ds_done = p->ds_run;
-				p->ds_run = NULL;
+				if (p->ds_run != NULL) {
+					vchan_cookie_complete(&p->ds_run->vd);
+					p->ds_done = p->ds_run;
+					p->ds_run = NULL;
+				}
 				spin_unlock_irqrestore(&c->vc.lock, flags);
 			}
 			if (c && (tc2 & BIT(i))) {
@@ -271,6 +273,10 @@ static int k3_dma_start_txd(struct k3_dma_chan *c)
 	if (BIT(c->phy->idx) & k3_dma_get_chan_stat(d))
 		return -EAGAIN;
 
+	/* Avoid losing track of  ds_run if a transaction is in flight */
+	if (c->phy->ds_run)
+		return -EAGAIN;
+
 	if (vd) {
 		struct k3_dma_desc_sw *ds =
 			container_of(vd, struct k3_dma_desc_sw, vd);
@@ -835,13 +841,8 @@ static int k3_dma_probe(struct platform_device *op)
 	const struct k3dma_soc_data *soc_data;
 	struct k3_dma_dev *d;
 	const struct of_device_id *of_id;
-	struct resource *iores;
 	int i, ret, irq = 0;
 
-	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
-	if (!iores)
-		return -EINVAL;
-
 	d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL);
 	if (!d)
 		return -ENOMEM;
@@ -850,7 +851,7 @@ static int k3_dma_probe(struct platform_device *op)
 	if (!soc_data)
 		return -EINVAL;
 
-	d->base = devm_ioremap_resource(&op->dev, iores);
+	d->base = devm_platform_ioremap_resource(op, 0);
 	if (IS_ERR(d->base))
 		return PTR_ERR(d->base);
 
diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c
index 723b11c190b3..6bf838e63be1 100644
--- a/drivers/dma/mediatek/mtk-cqdma.c
+++ b/drivers/dma/mediatek/mtk-cqdma.c
@@ -819,15 +819,7 @@ static int mtk_cqdma_probe(struct platform_device *pdev)
 		INIT_LIST_HEAD(&cqdma->pc[i]->queue);
 		spin_lock_init(&cqdma->pc[i]->lock);
 		refcount_set(&cqdma->pc[i]->refcnt, 0);
-
-		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
-		if (!res) {
-			dev_err(&pdev->dev, "No mem resource for %s\n",
-				dev_name(&pdev->dev));
-			return -EINVAL;
-		}
-
-		cqdma->pc[i]->base = devm_ioremap_resource(&pdev->dev, res);
+		cqdma->pc[i]->base = devm_platform_ioremap_resource(pdev, i);
 		if (IS_ERR(cqdma->pc[i]->base))
 			return PTR_ERR(cqdma->pc[i]->base);
 
diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c
index 1a2028e1c29e..4c58da742143 100644
--- a/drivers/dma/mediatek/mtk-hsdma.c
+++ b/drivers/dma/mediatek/mtk-hsdma.c
@@ -997,7 +997,7 @@ static int mtk_hsdma_probe(struct platform_device *pdev)
 	if (err) {
 		dev_err(&pdev->dev,
 			"request_irq failed with err %d\n", err);
-		goto err_unregister;
+		goto err_free;
 	}
 
 	platform_set_drvdata(pdev, hsdma);
@@ -1006,6 +1006,8 @@ static int mtk_hsdma_probe(struct platform_device *pdev)
 
 	return 0;
 
+err_free:
+	of_dma_controller_free(pdev->dev.of_node);
 err_unregister:
 	dma_async_device_unregister(dd);
 
diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
index f40051d6aecb..29f1223b285a 100644
--- a/drivers/dma/mediatek/mtk-uart-apdma.c
+++ b/drivers/dma/mediatek/mtk-uart-apdma.c
@@ -430,9 +430,10 @@ static int mtk_uart_apdma_terminate_all(struct dma_chan *chan)
 
 	spin_lock_irqsave(&c->vc.lock, flags);
 	vchan_get_all_descriptors(&c->vc, &head);
-	vchan_dma_desc_free_list(&c->vc, &head);
 	spin_unlock_irqrestore(&c->vc.lock, flags);
 
+	vchan_dma_desc_free_list(&c->vc, &head);
+
 	return 0;
 }
 
@@ -475,7 +476,6 @@ static int mtk_uart_apdma_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	struct mtk_uart_apdmadev *mtkd;
 	int bit_mask = 32, rc;
-	struct resource *res;
 	struct mtk_chan *c;
 	unsigned int i;
 
@@ -532,13 +532,7 @@ static int mtk_uart_apdma_probe(struct platform_device *pdev)
 			goto err_no_dma;
 		}
 
-		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
-		if (!res) {
-			rc = -ENODEV;
-			goto err_no_dma;
-		}
-
-		c->base = devm_ioremap_resource(&pdev->dev, res);
+		c->base = devm_platform_ioremap_resource(pdev, i);
 		if (IS_ERR(c->base)) {
 			rc = PTR_ERR(c->base);
 			goto err_no_dma;
diff --git a/drivers/dma/milbeaut-hdmac.c b/drivers/dma/milbeaut-hdmac.c
new file mode 100644
index 000000000000..8853d442430b
--- /dev/null
+++ b/drivers/dma/milbeaut-hdmac.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (C) 2019 Linaro Ltd.
+// Copyright (C) 2019 Socionext Inc.
+
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/bitfield.h>
+
+#include "virt-dma.h"
+
+#define MLB_HDMAC_DMACR		0x0	/* global */
+#define MLB_HDMAC_DE		BIT(31)
+#define MLB_HDMAC_DS		BIT(30)
+#define MLB_HDMAC_PR		BIT(28)
+#define MLB_HDMAC_DH		GENMASK(27, 24)
+
+#define MLB_HDMAC_CH_STRIDE	0x10
+
+#define MLB_HDMAC_DMACA		0x0	/* channel */
+#define MLB_HDMAC_EB		BIT(31)
+#define MLB_HDMAC_PB		BIT(30)
+#define MLB_HDMAC_ST		BIT(29)
+#define MLB_HDMAC_IS		GENMASK(28, 24)
+#define MLB_HDMAC_BT		GENMASK(23, 20)
+#define MLB_HDMAC_BC		GENMASK(19, 16)
+#define MLB_HDMAC_TC		GENMASK(15, 0)
+#define MLB_HDMAC_DMACB		0x4
+#define MLB_HDMAC_TT		GENMASK(31, 30)
+#define MLB_HDMAC_MS		GENMASK(29, 28)
+#define MLB_HDMAC_TW		GENMASK(27, 26)
+#define MLB_HDMAC_FS		BIT(25)
+#define MLB_HDMAC_FD		BIT(24)
+#define MLB_HDMAC_RC		BIT(23)
+#define MLB_HDMAC_RS		BIT(22)
+#define MLB_HDMAC_RD		BIT(21)
+#define MLB_HDMAC_EI		BIT(20)
+#define MLB_HDMAC_CI		BIT(19)
+#define HDMAC_PAUSE		0x7
+#define MLB_HDMAC_SS		GENMASK(18, 16)
+#define MLB_HDMAC_SP		GENMASK(15, 12)
+#define MLB_HDMAC_DP		GENMASK(11, 8)
+#define MLB_HDMAC_DMACSA	0x8
+#define MLB_HDMAC_DMACDA	0xc
+
+#define MLB_HDMAC_BUSWIDTHS		(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+					BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+					BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+struct milbeaut_hdmac_desc {
+	struct virt_dma_desc vd;
+	struct scatterlist *sgl;
+	unsigned int sg_len;
+	unsigned int sg_cur;
+	enum dma_transfer_direction dir;
+};
+
+struct milbeaut_hdmac_chan {
+	struct virt_dma_chan vc;
+	struct milbeaut_hdmac_device *mdev;
+	struct milbeaut_hdmac_desc *md;
+	void __iomem *reg_ch_base;
+	unsigned int slave_id;
+	struct dma_slave_config	cfg;
+};
+
+struct milbeaut_hdmac_device {
+	struct dma_device ddev;
+	struct clk *clk;
+	void __iomem *reg_base;
+	struct milbeaut_hdmac_chan channels[0];
+};
+
+static struct milbeaut_hdmac_chan *
+to_milbeaut_hdmac_chan(struct virt_dma_chan *vc)
+{
+	return container_of(vc, struct milbeaut_hdmac_chan, vc);
+}
+
+static struct milbeaut_hdmac_desc *
+to_milbeaut_hdmac_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct milbeaut_hdmac_desc, vd);
+}
+
+/* mc->vc.lock must be held by caller */
+static struct milbeaut_hdmac_desc *
+milbeaut_hdmac_next_desc(struct milbeaut_hdmac_chan *mc)
+{
+	struct virt_dma_desc *vd;
+
+	vd = vchan_next_desc(&mc->vc);
+	if (!vd) {
+		mc->md = NULL;
+		return NULL;
+	}
+
+	list_del(&vd->node);
+
+	mc->md = to_milbeaut_hdmac_desc(vd);
+
+	return mc->md;
+}
+
+/* mc->vc.lock must be held by caller */
+static void milbeaut_chan_start(struct milbeaut_hdmac_chan *mc,
+				struct milbeaut_hdmac_desc *md)
+{
+	struct scatterlist *sg;
+	u32 cb, ca, src_addr, dest_addr, len;
+	u32 width, burst;
+
+	sg = &md->sgl[md->sg_cur];
+	len = sg_dma_len(sg);
+
+	cb = MLB_HDMAC_CI | MLB_HDMAC_EI;
+	if (md->dir == DMA_MEM_TO_DEV) {
+		cb |= MLB_HDMAC_FD;
+		width = mc->cfg.dst_addr_width;
+		burst = mc->cfg.dst_maxburst;
+		src_addr = sg_dma_address(sg);
+		dest_addr = mc->cfg.dst_addr;
+	} else {
+		cb |= MLB_HDMAC_FS;
+		width = mc->cfg.src_addr_width;
+		burst = mc->cfg.src_maxburst;
+		src_addr = mc->cfg.src_addr;
+		dest_addr = sg_dma_address(sg);
+	}
+	cb |= FIELD_PREP(MLB_HDMAC_TW, (width >> 1));
+	cb |= FIELD_PREP(MLB_HDMAC_MS, 2);
+
+	writel_relaxed(MLB_HDMAC_DE, mc->mdev->reg_base + MLB_HDMAC_DMACR);
+	writel_relaxed(src_addr, mc->reg_ch_base + MLB_HDMAC_DMACSA);
+	writel_relaxed(dest_addr, mc->reg_ch_base + MLB_HDMAC_DMACDA);
+	writel_relaxed(cb, mc->reg_ch_base + MLB_HDMAC_DMACB);
+
+	ca = FIELD_PREP(MLB_HDMAC_IS, mc->slave_id);
+	if (burst == 16)
+		ca |= FIELD_PREP(MLB_HDMAC_BT, 0xf);
+	else if (burst == 8)
+		ca |= FIELD_PREP(MLB_HDMAC_BT, 0xd);
+	else if (burst == 4)
+		ca |= FIELD_PREP(MLB_HDMAC_BT, 0xb);
+	burst *= width;
+	ca |= FIELD_PREP(MLB_HDMAC_TC, (len / burst - 1));
+	writel_relaxed(ca, mc->reg_ch_base + MLB_HDMAC_DMACA);
+	ca |= MLB_HDMAC_EB;
+	writel_relaxed(ca, mc->reg_ch_base + MLB_HDMAC_DMACA);
+}
+
+/* mc->vc.lock must be held by caller */
+static void milbeaut_hdmac_start(struct milbeaut_hdmac_chan *mc)
+{
+	struct milbeaut_hdmac_desc *md;
+
+	md = milbeaut_hdmac_next_desc(mc);
+	if (md)
+		milbeaut_chan_start(mc, md);
+}
+
+static irqreturn_t milbeaut_hdmac_interrupt(int irq, void *dev_id)
+{
+	struct milbeaut_hdmac_chan *mc = dev_id;
+	struct milbeaut_hdmac_desc *md;
+	u32 val;
+
+	spin_lock(&mc->vc.lock);
+
+	/* Ack and Disable irqs */
+	val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACB);
+	val &= ~(FIELD_PREP(MLB_HDMAC_SS, HDMAC_PAUSE));
+	writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACB);
+	val &= ~MLB_HDMAC_EI;
+	val &= ~MLB_HDMAC_CI;
+	writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACB);
+
+	md = mc->md;
+	if (!md)
+		goto out;
+
+	md->sg_cur++;
+
+	if (md->sg_cur >= md->sg_len) {
+		vchan_cookie_complete(&md->vd);
+		md = milbeaut_hdmac_next_desc(mc);
+		if (!md)
+			goto out;
+	}
+
+	milbeaut_chan_start(mc, md);
+
+out:
+	spin_unlock(&mc->vc.lock);
+	return IRQ_HANDLED;
+}
+
+static void milbeaut_hdmac_free_chan_resources(struct dma_chan *chan)
+{
+	vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static int
+milbeaut_hdmac_chan_config(struct dma_chan *chan, struct dma_slave_config *cfg)
+{
+	struct virt_dma_chan *vc = to_virt_chan(chan);
+	struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc);
+
+	spin_lock(&mc->vc.lock);
+	mc->cfg = *cfg;
+	spin_unlock(&mc->vc.lock);
+
+	return 0;
+}
+
+static int milbeaut_hdmac_chan_pause(struct dma_chan *chan)
+{
+	struct virt_dma_chan *vc = to_virt_chan(chan);
+	struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc);
+	u32 val;
+
+	spin_lock(&mc->vc.lock);
+	val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACA);
+	val |= MLB_HDMAC_PB;
+	writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACA);
+	spin_unlock(&mc->vc.lock);
+
+	return 0;
+}
+
+static int milbeaut_hdmac_chan_resume(struct dma_chan *chan)
+{
+	struct virt_dma_chan *vc = to_virt_chan(chan);
+	struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc);
+	u32 val;
+
+	spin_lock(&mc->vc.lock);
+	val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACA);
+	val &= ~MLB_HDMAC_PB;
+	writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACA);
+	spin_unlock(&mc->vc.lock);
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *
+milbeaut_hdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			     unsigned int sg_len,
+			     enum dma_transfer_direction direction,
+			     unsigned long flags, void *context)
+{
+	struct virt_dma_chan *vc = to_virt_chan(chan);
+	struct milbeaut_hdmac_desc *md;
+	int i;
+
+	if (!is_slave_direction(direction))
+		return NULL;
+
+	md = kzalloc(sizeof(*md), GFP_NOWAIT);
+	if (!md)
+		return NULL;
+
+	md->sgl = kzalloc(sizeof(*sgl) * sg_len, GFP_NOWAIT);
+	if (!md->sgl) {
+		kfree(md);
+		return NULL;
+	}
+
+	for (i = 0; i < sg_len; i++)
+		md->sgl[i] = sgl[i];
+
+	md->sg_len = sg_len;
+	md->dir = direction;
+
+	return vchan_tx_prep(vc, &md->vd, flags);
+}
+
+static int milbeaut_hdmac_terminate_all(struct dma_chan *chan)
+{
+	struct virt_dma_chan *vc = to_virt_chan(chan);
+	struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc);
+	unsigned long flags;
+	u32 val;
+
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&vc->lock, flags);
+
+	val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACA);
+	val &= ~MLB_HDMAC_EB; /* disable the channel */
+	writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACA);
+
+	if (mc->md) {
+		vchan_terminate_vdesc(&mc->md->vd);
+		mc->md = NULL;
+	}
+
+	vchan_get_all_descriptors(vc, &head);
+
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	vchan_dma_desc_free_list(vc, &head);
+
+	return 0;
+}
+
+static void milbeaut_hdmac_synchronize(struct dma_chan *chan)
+{
+	vchan_synchronize(to_virt_chan(chan));
+}
+
+static enum dma_status milbeaut_hdmac_tx_status(struct dma_chan *chan,
+						dma_cookie_t cookie,
+						struct dma_tx_state *txstate)
+{
+	struct virt_dma_chan *vc;
+	struct virt_dma_desc *vd;
+	struct milbeaut_hdmac_chan *mc;
+	struct milbeaut_hdmac_desc *md = NULL;
+	enum dma_status stat;
+	unsigned long flags;
+	int i;
+
+	stat = dma_cookie_status(chan, cookie, txstate);
+	/* Return immediately if we do not need to compute the residue. */
+	if (stat == DMA_COMPLETE || !txstate)
+		return stat;
+
+	vc = to_virt_chan(chan);
+
+	spin_lock_irqsave(&vc->lock, flags);
+
+	mc = to_milbeaut_hdmac_chan(vc);
+
+	/* residue from the on-flight chunk */
+	if (mc->md && mc->md->vd.tx.cookie == cookie) {
+		struct scatterlist *sg;
+		u32 done;
+
+		md = mc->md;
+		sg = &md->sgl[md->sg_cur];
+
+		if (md->dir == DMA_DEV_TO_MEM)
+			done = readl_relaxed(mc->reg_ch_base
+					     + MLB_HDMAC_DMACDA);
+		else
+			done = readl_relaxed(mc->reg_ch_base
+					     + MLB_HDMAC_DMACSA);
+		done -= sg_dma_address(sg);
+
+		txstate->residue = -done;
+	}
+
+	if (!md) {
+		vd = vchan_find_desc(vc, cookie);
+		if (vd)
+			md = to_milbeaut_hdmac_desc(vd);
+	}
+
+	if (md) {
+		/* residue from the queued chunks */
+		for (i = md->sg_cur; i < md->sg_len; i++)
+			txstate->residue += sg_dma_len(&md->sgl[i]);
+	}
+
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	return stat;
+}
+
+static void milbeaut_hdmac_issue_pending(struct dma_chan *chan)
+{
+	struct virt_dma_chan *vc = to_virt_chan(chan);
+	struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vc->lock, flags);
+
+	if (vchan_issue_pending(vc) && !mc->md)
+		milbeaut_hdmac_start(mc);
+
+	spin_unlock_irqrestore(&vc->lock, flags);
+}
+
+static void milbeaut_hdmac_desc_free(struct virt_dma_desc *vd)
+{
+	struct milbeaut_hdmac_desc *md = to_milbeaut_hdmac_desc(vd);
+
+	kfree(md->sgl);
+	kfree(md);
+}
+
+static struct dma_chan *
+milbeaut_hdmac_xlate(struct of_phandle_args *dma_spec, struct of_dma *of_dma)
+{
+	struct milbeaut_hdmac_device *mdev = of_dma->of_dma_data;
+	struct milbeaut_hdmac_chan *mc;
+	struct virt_dma_chan *vc;
+	struct dma_chan *chan;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	chan = dma_get_any_slave_channel(&mdev->ddev);
+	if (!chan)
+		return NULL;
+
+	vc = to_virt_chan(chan);
+	mc = to_milbeaut_hdmac_chan(vc);
+	mc->slave_id = dma_spec->args[0];
+
+	return chan;
+}
+
+static int milbeaut_hdmac_chan_init(struct platform_device *pdev,
+				    struct milbeaut_hdmac_device *mdev,
+				    int chan_id)
+{
+	struct device *dev = &pdev->dev;
+	struct milbeaut_hdmac_chan *mc = &mdev->channels[chan_id];
+	char *irq_name;
+	int irq, ret;
+
+	irq = platform_get_irq(pdev, chan_id);
+	if (irq < 0)
+		return irq;
+
+	irq_name = devm_kasprintf(dev, GFP_KERNEL, "milbeaut-hdmac-%d",
+				  chan_id);
+	if (!irq_name)
+		return -ENOMEM;
+
+	ret = devm_request_irq(dev, irq, milbeaut_hdmac_interrupt,
+			       IRQF_SHARED, irq_name, mc);
+	if (ret)
+		return ret;
+
+	mc->mdev = mdev;
+	mc->reg_ch_base = mdev->reg_base + MLB_HDMAC_CH_STRIDE * (chan_id + 1);
+	mc->vc.desc_free = milbeaut_hdmac_desc_free;
+	vchan_init(&mc->vc, &mdev->ddev);
+
+	return 0;
+}
+
+static int milbeaut_hdmac_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct milbeaut_hdmac_device *mdev;
+	struct dma_device *ddev;
+	int nr_chans, ret, i;
+
+	nr_chans = platform_irq_count(pdev);
+	if (nr_chans < 0)
+		return nr_chans;
+
+	ret = dma_set_mask(dev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	mdev = devm_kzalloc(dev, struct_size(mdev, channels, nr_chans),
+			    GFP_KERNEL);
+	if (!mdev)
+		return -ENOMEM;
+
+	mdev->reg_base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(mdev->reg_base))
+		return PTR_ERR(mdev->reg_base);
+
+	mdev->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(mdev->clk)) {
+		dev_err(dev, "failed to get clock\n");
+		return PTR_ERR(mdev->clk);
+	}
+
+	ret = clk_prepare_enable(mdev->clk);
+	if (ret)
+		return ret;
+
+	ddev = &mdev->ddev;
+	ddev->dev = dev;
+	dma_cap_set(DMA_SLAVE, ddev->cap_mask);
+	dma_cap_set(DMA_PRIVATE, ddev->cap_mask);
+	ddev->src_addr_widths = MLB_HDMAC_BUSWIDTHS;
+	ddev->dst_addr_widths = MLB_HDMAC_BUSWIDTHS;
+	ddev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	ddev->device_free_chan_resources = milbeaut_hdmac_free_chan_resources;
+	ddev->device_config = milbeaut_hdmac_chan_config;
+	ddev->device_pause = milbeaut_hdmac_chan_pause;
+	ddev->device_resume = milbeaut_hdmac_chan_resume;
+	ddev->device_prep_slave_sg = milbeaut_hdmac_prep_slave_sg;
+	ddev->device_terminate_all = milbeaut_hdmac_terminate_all;
+	ddev->device_synchronize = milbeaut_hdmac_synchronize;
+	ddev->device_tx_status = milbeaut_hdmac_tx_status;
+	ddev->device_issue_pending = milbeaut_hdmac_issue_pending;
+	INIT_LIST_HEAD(&ddev->channels);
+
+	for (i = 0; i < nr_chans; i++) {
+		ret = milbeaut_hdmac_chan_init(pdev, mdev, i);
+		if (ret)
+			goto disable_clk;
+	}
+
+	ret = dma_async_device_register(ddev);
+	if (ret)
+		goto disable_clk;
+
+	ret = of_dma_controller_register(dev->of_node,
+					 milbeaut_hdmac_xlate, mdev);
+	if (ret)
+		goto unregister_dmac;
+
+	platform_set_drvdata(pdev, mdev);
+
+	return 0;
+
+unregister_dmac:
+	dma_async_device_unregister(ddev);
+disable_clk:
+	clk_disable_unprepare(mdev->clk);
+
+	return ret;
+}
+
+static int milbeaut_hdmac_remove(struct platform_device *pdev)
+{
+	struct milbeaut_hdmac_device *mdev = platform_get_drvdata(pdev);
+	struct dma_chan *chan;
+	int ret;
+
+	/*
+	 * Before reaching here, almost all descriptors have been freed by the
+	 * ->device_free_chan_resources() hook. However, each channel might
+	 * be still holding one descriptor that was on-flight at that moment.
+	 * Terminate it to make sure this hardware is no longer running. Then,
+	 * free the channel resources once again to avoid memory leak.
+	 */
+	list_for_each_entry(chan, &mdev->ddev.channels, device_node) {
+		ret = dmaengine_terminate_sync(chan);
+		if (ret)
+			return ret;
+		milbeaut_hdmac_free_chan_resources(chan);
+	}
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&mdev->ddev);
+	clk_disable_unprepare(mdev->clk);
+
+	return 0;
+}
+
+static const struct of_device_id milbeaut_hdmac_match[] = {
+	{ .compatible = "socionext,milbeaut-m10v-hdmac" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, milbeaut_hdmac_match);
+
+static struct platform_driver milbeaut_hdmac_driver = {
+	.probe = milbeaut_hdmac_probe,
+	.remove = milbeaut_hdmac_remove,
+	.driver = {
+		.name = "milbeaut-m10v-hdmac",
+		.of_match_table = milbeaut_hdmac_match,
+	},
+};
+module_platform_driver(milbeaut_hdmac_driver);
+
+MODULE_DESCRIPTION("Milbeaut HDMAC DmaEngine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c
new file mode 100644
index 000000000000..ab3d2f395378
--- /dev/null
+++ b/drivers/dma/milbeaut-xdmac.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (C) 2019 Linaro Ltd.
+// Copyright (C) 2019 Socionext Inc.
+
+#include <linux/bits.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/bitfield.h>
+
+#include "virt-dma.h"
+
+/* global register */
+#define M10V_XDACS 0x00
+
+/* channel local register */
+#define M10V_XDTBC 0x10
+#define M10V_XDSSA 0x14
+#define M10V_XDDSA 0x18
+#define M10V_XDSAC 0x1C
+#define M10V_XDDAC 0x20
+#define M10V_XDDCC 0x24
+#define M10V_XDDES 0x28
+#define M10V_XDDPC 0x2C
+#define M10V_XDDSD 0x30
+
+#define M10V_XDACS_XE BIT(28)
+
+#define M10V_DEFBS	0x3
+#define M10V_DEFBL	0xf
+
+#define M10V_XDSAC_SBS	GENMASK(17, 16)
+#define M10V_XDSAC_SBL	GENMASK(11, 8)
+
+#define M10V_XDDAC_DBS	GENMASK(17, 16)
+#define M10V_XDDAC_DBL	GENMASK(11, 8)
+
+#define M10V_XDDES_CE	BIT(28)
+#define M10V_XDDES_SE	BIT(24)
+#define M10V_XDDES_SA	BIT(15)
+#define M10V_XDDES_TF	GENMASK(23, 20)
+#define M10V_XDDES_EI	BIT(1)
+#define M10V_XDDES_TI	BIT(0)
+
+#define M10V_XDDSD_IS_MASK	GENMASK(3, 0)
+#define M10V_XDDSD_IS_NORMAL	0x8
+
+#define MLB_XDMAC_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+struct milbeaut_xdmac_desc {
+	struct virt_dma_desc vd;
+	size_t len;
+	dma_addr_t src;
+	dma_addr_t dst;
+};
+
+struct milbeaut_xdmac_chan {
+	struct virt_dma_chan vc;
+	struct milbeaut_xdmac_desc *md;
+	void __iomem *reg_ch_base;
+};
+
+struct milbeaut_xdmac_device {
+	struct dma_device ddev;
+	void __iomem *reg_base;
+	struct milbeaut_xdmac_chan channels[0];
+};
+
+static struct milbeaut_xdmac_chan *
+to_milbeaut_xdmac_chan(struct virt_dma_chan *vc)
+{
+	return container_of(vc, struct milbeaut_xdmac_chan, vc);
+}
+
+static struct milbeaut_xdmac_desc *
+to_milbeaut_xdmac_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct milbeaut_xdmac_desc, vd);
+}
+
+/* mc->vc.lock must be held by caller */
+static struct milbeaut_xdmac_desc *
+milbeaut_xdmac_next_desc(struct milbeaut_xdmac_chan *mc)
+{
+	struct virt_dma_desc *vd;
+
+	vd = vchan_next_desc(&mc->vc);
+	if (!vd) {
+		mc->md = NULL;
+		return NULL;
+	}
+
+	list_del(&vd->node);
+
+	mc->md = to_milbeaut_xdmac_desc(vd);
+
+	return mc->md;
+}
+
+/* mc->vc.lock must be held by caller */
+static void milbeaut_chan_start(struct milbeaut_xdmac_chan *mc,
+				struct milbeaut_xdmac_desc *md)
+{
+	u32 val;
+
+	/* Setup the channel */
+	val = md->len - 1;
+	writel_relaxed(val, mc->reg_ch_base + M10V_XDTBC);
+
+	val = md->src;
+	writel_relaxed(val, mc->reg_ch_base + M10V_XDSSA);
+
+	val = md->dst;
+	writel_relaxed(val, mc->reg_ch_base + M10V_XDDSA);
+
+	val = readl_relaxed(mc->reg_ch_base + M10V_XDSAC);
+	val &= ~(M10V_XDSAC_SBS | M10V_XDSAC_SBL);
+	val |= FIELD_PREP(M10V_XDSAC_SBS, M10V_DEFBS) |
+		FIELD_PREP(M10V_XDSAC_SBL, M10V_DEFBL);
+	writel_relaxed(val, mc->reg_ch_base + M10V_XDSAC);
+
+	val = readl_relaxed(mc->reg_ch_base + M10V_XDDAC);
+	val &= ~(M10V_XDDAC_DBS | M10V_XDDAC_DBL);
+	val |= FIELD_PREP(M10V_XDDAC_DBS, M10V_DEFBS) |
+		FIELD_PREP(M10V_XDDAC_DBL, M10V_DEFBL);
+	writel_relaxed(val, mc->reg_ch_base + M10V_XDDAC);
+
+	/* Start the channel */
+	val = readl_relaxed(mc->reg_ch_base + M10V_XDDES);
+	val &= ~(M10V_XDDES_CE | M10V_XDDES_SE | M10V_XDDES_TF |
+		 M10V_XDDES_EI | M10V_XDDES_TI);
+	val |= FIELD_PREP(M10V_XDDES_CE, 1) | FIELD_PREP(M10V_XDDES_SE, 1) |
+		FIELD_PREP(M10V_XDDES_TF, 1) | FIELD_PREP(M10V_XDDES_EI, 1) |
+		FIELD_PREP(M10V_XDDES_TI, 1);
+	writel_relaxed(val, mc->reg_ch_base + M10V_XDDES);
+}
+
+/* mc->vc.lock must be held by caller */
+static void milbeaut_xdmac_start(struct milbeaut_xdmac_chan *mc)
+{
+	struct milbeaut_xdmac_desc *md;
+
+	md = milbeaut_xdmac_next_desc(mc);
+	if (md)
+		milbeaut_chan_start(mc, md);
+}
+
+static irqreturn_t milbeaut_xdmac_interrupt(int irq, void *dev_id)
+{
+	struct milbeaut_xdmac_chan *mc = dev_id;
+	struct milbeaut_xdmac_desc *md;
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&mc->vc.lock, flags);
+
+	/* Ack and Stop */
+	val = FIELD_PREP(M10V_XDDSD_IS_MASK, 0x0);
+	writel_relaxed(val, mc->reg_ch_base + M10V_XDDSD);
+
+	md = mc->md;
+	if (!md)
+		goto out;
+
+	vchan_cookie_complete(&md->vd);
+
+	milbeaut_xdmac_start(mc);
+out:
+	spin_unlock_irqrestore(&mc->vc.lock, flags);
+	return IRQ_HANDLED;
+}
+
+static void milbeaut_xdmac_free_chan_resources(struct dma_chan *chan)
+{
+	vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static struct dma_async_tx_descriptor *
+milbeaut_xdmac_prep_memcpy(struct dma_chan *chan, dma_addr_t dst,
+			   dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct virt_dma_chan *vc = to_virt_chan(chan);
+	struct milbeaut_xdmac_desc *md;
+
+	md = kzalloc(sizeof(*md), GFP_NOWAIT);
+	if (!md)
+		return NULL;
+
+	md->len = len;
+	md->src = src;
+	md->dst = dst;
+
+	return vchan_tx_prep(vc, &md->vd, flags);
+}
+
+static int milbeaut_xdmac_terminate_all(struct dma_chan *chan)
+{
+	struct virt_dma_chan *vc = to_virt_chan(chan);
+	struct milbeaut_xdmac_chan *mc = to_milbeaut_xdmac_chan(vc);
+	unsigned long flags;
+	u32 val;
+
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&vc->lock, flags);
+
+	/* Halt the channel */
+	val = readl(mc->reg_ch_base + M10V_XDDES);
+	val &= ~M10V_XDDES_CE;
+	val |= FIELD_PREP(M10V_XDDES_CE, 0);
+	writel(val, mc->reg_ch_base + M10V_XDDES);
+
+	if (mc->md) {
+		vchan_terminate_vdesc(&mc->md->vd);
+		mc->md = NULL;
+	}
+
+	vchan_get_all_descriptors(vc, &head);
+
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	vchan_dma_desc_free_list(vc, &head);
+
+	return 0;
+}
+
+static void milbeaut_xdmac_synchronize(struct dma_chan *chan)
+{
+	vchan_synchronize(to_virt_chan(chan));
+}
+
+static void milbeaut_xdmac_issue_pending(struct dma_chan *chan)
+{
+	struct virt_dma_chan *vc = to_virt_chan(chan);
+	struct milbeaut_xdmac_chan *mc = to_milbeaut_xdmac_chan(vc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vc->lock, flags);
+
+	if (vchan_issue_pending(vc) && !mc->md)
+		milbeaut_xdmac_start(mc);
+
+	spin_unlock_irqrestore(&vc->lock, flags);
+}
+
+static void milbeaut_xdmac_desc_free(struct virt_dma_desc *vd)
+{
+	kfree(to_milbeaut_xdmac_desc(vd));
+}
+
+static int milbeaut_xdmac_chan_init(struct platform_device *pdev,
+				    struct milbeaut_xdmac_device *mdev,
+				    int chan_id)
+{
+	struct device *dev = &pdev->dev;
+	struct milbeaut_xdmac_chan *mc = &mdev->channels[chan_id];
+	char *irq_name;
+	int irq, ret;
+
+	irq = platform_get_irq(pdev, chan_id);
+	if (irq < 0)
+		return irq;
+
+	irq_name = devm_kasprintf(dev, GFP_KERNEL, "milbeaut-xdmac-%d",
+				  chan_id);
+	if (!irq_name)
+		return -ENOMEM;
+
+	ret = devm_request_irq(dev, irq, milbeaut_xdmac_interrupt,
+			       IRQF_SHARED, irq_name, mc);
+	if (ret)
+		return ret;
+
+	mc->reg_ch_base = mdev->reg_base + chan_id * 0x30;
+
+	mc->vc.desc_free = milbeaut_xdmac_desc_free;
+	vchan_init(&mc->vc, &mdev->ddev);
+
+	return 0;
+}
+
+static void enable_xdmac(struct milbeaut_xdmac_device *mdev)
+{
+	unsigned int val;
+
+	val = readl(mdev->reg_base + M10V_XDACS);
+	val |= M10V_XDACS_XE;
+	writel(val, mdev->reg_base + M10V_XDACS);
+}
+
+static void disable_xdmac(struct milbeaut_xdmac_device *mdev)
+{
+	unsigned int val;
+
+	val = readl(mdev->reg_base + M10V_XDACS);
+	val &= ~M10V_XDACS_XE;
+	writel(val, mdev->reg_base + M10V_XDACS);
+}
+
+static int milbeaut_xdmac_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct milbeaut_xdmac_device *mdev;
+	struct dma_device *ddev;
+	int nr_chans, ret, i;
+
+	nr_chans = platform_irq_count(pdev);
+	if (nr_chans < 0)
+		return nr_chans;
+
+	mdev = devm_kzalloc(dev, struct_size(mdev, channels, nr_chans),
+			    GFP_KERNEL);
+	if (!mdev)
+		return -ENOMEM;
+
+	mdev->reg_base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(mdev->reg_base))
+		return PTR_ERR(mdev->reg_base);
+
+	ddev = &mdev->ddev;
+	ddev->dev = dev;
+	dma_cap_set(DMA_MEMCPY, ddev->cap_mask);
+	ddev->src_addr_widths = MLB_XDMAC_BUSWIDTHS;
+	ddev->dst_addr_widths = MLB_XDMAC_BUSWIDTHS;
+	ddev->device_free_chan_resources = milbeaut_xdmac_free_chan_resources;
+	ddev->device_prep_dma_memcpy = milbeaut_xdmac_prep_memcpy;
+	ddev->device_terminate_all = milbeaut_xdmac_terminate_all;
+	ddev->device_synchronize = milbeaut_xdmac_synchronize;
+	ddev->device_tx_status = dma_cookie_status;
+	ddev->device_issue_pending = milbeaut_xdmac_issue_pending;
+	INIT_LIST_HEAD(&ddev->channels);
+
+	for (i = 0; i < nr_chans; i++) {
+		ret = milbeaut_xdmac_chan_init(pdev, mdev, i);
+		if (ret)
+			return ret;
+	}
+
+	enable_xdmac(mdev);
+
+	ret = dma_async_device_register(ddev);
+	if (ret)
+		return ret;
+
+	ret = of_dma_controller_register(dev->of_node,
+					 of_dma_simple_xlate, mdev);
+	if (ret)
+		goto unregister_dmac;
+
+	platform_set_drvdata(pdev, mdev);
+
+	return 0;
+
+unregister_dmac:
+	dma_async_device_unregister(ddev);
+	return ret;
+}
+
+static int milbeaut_xdmac_remove(struct platform_device *pdev)
+{
+	struct milbeaut_xdmac_device *mdev = platform_get_drvdata(pdev);
+	struct dma_chan *chan;
+	int ret;
+
+	/*
+	 * Before reaching here, almost all descriptors have been freed by the
+	 * ->device_free_chan_resources() hook. However, each channel might
+	 * be still holding one descriptor that was on-flight at that moment.
+	 * Terminate it to make sure this hardware is no longer running. Then,
+	 * free the channel resources once again to avoid memory leak.
+	 */
+	list_for_each_entry(chan, &mdev->ddev.channels, device_node) {
+		ret = dmaengine_terminate_sync(chan);
+		if (ret)
+			return ret;
+		milbeaut_xdmac_free_chan_resources(chan);
+	}
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&mdev->ddev);
+
+	disable_xdmac(mdev);
+
+	return 0;
+}
+
+static const struct of_device_id milbeaut_xdmac_match[] = {
+	{ .compatible = "socionext,milbeaut-m10v-xdmac" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, milbeaut_xdmac_match);
+
+static struct platform_driver milbeaut_xdmac_driver = {
+	.probe = milbeaut_xdmac_probe,
+	.remove = milbeaut_xdmac_remove,
+	.driver = {
+		.name = "milbeaut-m10v-xdmac",
+		.of_match_table = milbeaut_xdmac_match,
+	},
+};
+module_platform_driver(milbeaut_xdmac_driver);
+
+MODULE_DESCRIPTION("Milbeaut XDMAC DmaEngine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index 7fe494fc50d4..ad06f260e907 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -945,6 +945,8 @@ static int mmp_pdma_remove(struct platform_device *op)
 	struct mmp_pdma_phy *phy;
 	int i, irq = 0, irq_num = 0;
 
+	if (op->dev.of_node)
+		of_dma_controller_free(op->dev.of_node);
 
 	for (i = 0; i < pdev->dma_channels; i++) {
 		if (platform_get_irq(op, i) > 0)
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index e7d1e12bf464..10117f271b12 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -544,6 +544,9 @@ static void mmp_tdma_issue_pending(struct dma_chan *chan)
 
 static int mmp_tdma_remove(struct platform_device *pdev)
 {
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
+
 	return 0;
 }
 
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index c2d779daa4b5..b2c2b5e8093c 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -15,6 +15,8 @@
 #include <linux/of.h>
 #include <linux/of_dma.h>
 
+#include "dmaengine.h"
+
 static LIST_HEAD(of_dma_list);
 static DEFINE_MUTEX(of_dma_lock);
 
diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c
index 90bbcef99ef8..c683051257fd 100644
--- a/drivers/dma/owl-dma.c
+++ b/drivers/dma/owl-dma.c
@@ -674,10 +674,11 @@ static int owl_dma_terminate_all(struct dma_chan *chan)
 	}
 
 	vchan_get_all_descriptors(&vchan->vc, &head);
-	vchan_dma_desc_free_list(&vchan->vc, &head);
 
 	spin_unlock_irqrestore(&vchan->vc.lock, flags);
 
+	vchan_dma_desc_free_list(&vchan->vc, &head);
+
 	return 0;
 }
 
@@ -1045,18 +1046,13 @@ static int owl_dma_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct owl_dma *od;
-	struct resource *res;
 	int ret, i, nr_channels, nr_requests;
 
 	od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
 	if (!od)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -EINVAL;
-
-	od->base = devm_ioremap_resource(&pdev->dev, res);
+	od->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(od->base))
 		return PTR_ERR(od->base);
 
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 6cce9ef61b29..88b884cbb7c1 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2961,12 +2961,7 @@ static int __maybe_unused pl330_suspend(struct device *dev)
 {
 	struct amba_device *pcdev = to_amba_device(dev);
 
-	pm_runtime_disable(dev);
-
-	if (!pm_runtime_status_suspended(dev)) {
-		/* amba did not disable the clock */
-		amba_pclk_disable(pcdev);
-	}
+	pm_runtime_force_suspend(dev);
 	amba_pclk_unprepare(pcdev);
 
 	return 0;
@@ -2981,15 +2976,14 @@ static int __maybe_unused pl330_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	if (!pm_runtime_status_suspended(dev))
-		ret = amba_pclk_enable(pcdev);
-
-	pm_runtime_enable(dev);
+	pm_runtime_force_resume(dev);
 
 	return ret;
 }
 
-static SIMPLE_DEV_PM_OPS(pl330_pm, pl330_suspend, pl330_resume);
+static const struct dev_pm_ops pl330_pm = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(pl330_suspend, pl330_resume)
+};
 
 static int
 pl330_probe(struct amba_device *adev, const struct amba_id *id)
diff --git a/drivers/dma/plx_dma.c b/drivers/dma/plx_dma.c
new file mode 100644
index 000000000000..db4c5fd453a9
--- /dev/null
+++ b/drivers/dma/plx_dma.c
@@ -0,0 +1,639 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microsemi Switchtec(tm) PCIe Management Driver
+ * Copyright (c) 2019, Logan Gunthorpe <logang@deltatee.com>
+ * Copyright (c) 2019, GigaIO Networks, Inc
+ */
+
+#include "dmaengine.h"
+
+#include <linux/circ_buf.h>
+#include <linux/dmaengine.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+MODULE_DESCRIPTION("PLX ExpressLane PEX PCI Switch DMA Engine");
+MODULE_VERSION("0.1");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Logan Gunthorpe");
+
+#define PLX_REG_DESC_RING_ADDR			0x214
+#define PLX_REG_DESC_RING_ADDR_HI		0x218
+#define PLX_REG_DESC_RING_NEXT_ADDR		0x21C
+#define PLX_REG_DESC_RING_COUNT			0x220
+#define PLX_REG_DESC_RING_LAST_ADDR		0x224
+#define PLX_REG_DESC_RING_LAST_SIZE		0x228
+#define PLX_REG_PREF_LIMIT			0x234
+#define PLX_REG_CTRL				0x238
+#define PLX_REG_CTRL2				0x23A
+#define PLX_REG_INTR_CTRL			0x23C
+#define PLX_REG_INTR_STATUS			0x23E
+
+#define PLX_REG_PREF_LIMIT_PREF_FOUR		8
+
+#define PLX_REG_CTRL_GRACEFUL_PAUSE		BIT(0)
+#define PLX_REG_CTRL_ABORT			BIT(1)
+#define PLX_REG_CTRL_WRITE_BACK_EN		BIT(2)
+#define PLX_REG_CTRL_START			BIT(3)
+#define PLX_REG_CTRL_RING_STOP_MODE		BIT(4)
+#define PLX_REG_CTRL_DESC_MODE_BLOCK		(0 << 5)
+#define PLX_REG_CTRL_DESC_MODE_ON_CHIP		(1 << 5)
+#define PLX_REG_CTRL_DESC_MODE_OFF_CHIP		(2 << 5)
+#define PLX_REG_CTRL_DESC_INVALID		BIT(8)
+#define PLX_REG_CTRL_GRACEFUL_PAUSE_DONE	BIT(9)
+#define PLX_REG_CTRL_ABORT_DONE			BIT(10)
+#define PLX_REG_CTRL_IMM_PAUSE_DONE		BIT(12)
+#define PLX_REG_CTRL_IN_PROGRESS		BIT(30)
+
+#define PLX_REG_CTRL_RESET_VAL	(PLX_REG_CTRL_DESC_INVALID | \
+				 PLX_REG_CTRL_GRACEFUL_PAUSE_DONE | \
+				 PLX_REG_CTRL_ABORT_DONE | \
+				 PLX_REG_CTRL_IMM_PAUSE_DONE)
+
+#define PLX_REG_CTRL_START_VAL	(PLX_REG_CTRL_WRITE_BACK_EN | \
+				 PLX_REG_CTRL_DESC_MODE_OFF_CHIP | \
+				 PLX_REG_CTRL_START | \
+				 PLX_REG_CTRL_RESET_VAL)
+
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_64B		0
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_128B	1
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_256B	2
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_512B	3
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_1KB		4
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_2KB		5
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_4B		7
+
+#define PLX_REG_INTR_CRTL_ERROR_EN		BIT(0)
+#define PLX_REG_INTR_CRTL_INV_DESC_EN		BIT(1)
+#define PLX_REG_INTR_CRTL_ABORT_DONE_EN		BIT(3)
+#define PLX_REG_INTR_CRTL_PAUSE_DONE_EN		BIT(4)
+#define PLX_REG_INTR_CRTL_IMM_PAUSE_DONE_EN	BIT(5)
+
+#define PLX_REG_INTR_STATUS_ERROR		BIT(0)
+#define PLX_REG_INTR_STATUS_INV_DESC		BIT(1)
+#define PLX_REG_INTR_STATUS_DESC_DONE		BIT(2)
+#define PLX_REG_INTR_CRTL_ABORT_DONE		BIT(3)
+
+struct plx_dma_hw_std_desc {
+	__le32 flags_and_size;
+	__le16 dst_addr_hi;
+	__le16 src_addr_hi;
+	__le32 dst_addr_lo;
+	__le32 src_addr_lo;
+};
+
+#define PLX_DESC_SIZE_MASK		0x7ffffff
+#define PLX_DESC_FLAG_VALID		BIT(31)
+#define PLX_DESC_FLAG_INT_WHEN_DONE	BIT(30)
+
+#define PLX_DESC_WB_SUCCESS		BIT(30)
+#define PLX_DESC_WB_RD_FAIL		BIT(29)
+#define PLX_DESC_WB_WR_FAIL		BIT(28)
+
+#define PLX_DMA_RING_COUNT		2048
+
+struct plx_dma_desc {
+	struct dma_async_tx_descriptor txd;
+	struct plx_dma_hw_std_desc *hw;
+	u32 orig_size;
+};
+
+struct plx_dma_dev {
+	struct dma_device dma_dev;
+	struct dma_chan dma_chan;
+	struct pci_dev __rcu *pdev;
+	void __iomem *bar;
+	struct tasklet_struct desc_task;
+
+	spinlock_t ring_lock;
+	bool ring_active;
+	int head;
+	int tail;
+	struct plx_dma_hw_std_desc *hw_ring;
+	dma_addr_t hw_ring_dma;
+	struct plx_dma_desc **desc_ring;
+};
+
+static struct plx_dma_dev *chan_to_plx_dma_dev(struct dma_chan *c)
+{
+	return container_of(c, struct plx_dma_dev, dma_chan);
+}
+
+static struct plx_dma_desc *to_plx_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct plx_dma_desc, txd);
+}
+
+static struct plx_dma_desc *plx_dma_get_desc(struct plx_dma_dev *plxdev, int i)
+{
+	return plxdev->desc_ring[i & (PLX_DMA_RING_COUNT - 1)];
+}
+
+static void plx_dma_process_desc(struct plx_dma_dev *plxdev)
+{
+	struct dmaengine_result res;
+	struct plx_dma_desc *desc;
+	u32 flags;
+
+	spin_lock_bh(&plxdev->ring_lock);
+
+	while (plxdev->tail != plxdev->head) {
+		desc = plx_dma_get_desc(plxdev, plxdev->tail);
+
+		flags = le32_to_cpu(READ_ONCE(desc->hw->flags_and_size));
+
+		if (flags & PLX_DESC_FLAG_VALID)
+			break;
+
+		res.residue = desc->orig_size - (flags & PLX_DESC_SIZE_MASK);
+
+		if (flags & PLX_DESC_WB_SUCCESS)
+			res.result = DMA_TRANS_NOERROR;
+		else if (flags & PLX_DESC_WB_WR_FAIL)
+			res.result = DMA_TRANS_WRITE_FAILED;
+		else
+			res.result = DMA_TRANS_READ_FAILED;
+
+		dma_cookie_complete(&desc->txd);
+		dma_descriptor_unmap(&desc->txd);
+		dmaengine_desc_get_callback_invoke(&desc->txd, &res);
+		desc->txd.callback = NULL;
+		desc->txd.callback_result = NULL;
+
+		plxdev->tail++;
+	}
+
+	spin_unlock_bh(&plxdev->ring_lock);
+}
+
+static void plx_dma_abort_desc(struct plx_dma_dev *plxdev)
+{
+	struct dmaengine_result res;
+	struct plx_dma_desc *desc;
+
+	plx_dma_process_desc(plxdev);
+
+	spin_lock_bh(&plxdev->ring_lock);
+
+	while (plxdev->tail != plxdev->head) {
+		desc = plx_dma_get_desc(plxdev, plxdev->tail);
+
+		res.residue = desc->orig_size;
+		res.result = DMA_TRANS_ABORTED;
+
+		dma_cookie_complete(&desc->txd);
+		dma_descriptor_unmap(&desc->txd);
+		dmaengine_desc_get_callback_invoke(&desc->txd, &res);
+		desc->txd.callback = NULL;
+		desc->txd.callback_result = NULL;
+
+		plxdev->tail++;
+	}
+
+	spin_unlock_bh(&plxdev->ring_lock);
+}
+
+static void __plx_dma_stop(struct plx_dma_dev *plxdev)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(1000);
+	u32 val;
+
+	val = readl(plxdev->bar + PLX_REG_CTRL);
+	if (!(val & ~PLX_REG_CTRL_GRACEFUL_PAUSE))
+		return;
+
+	writel(PLX_REG_CTRL_RESET_VAL | PLX_REG_CTRL_GRACEFUL_PAUSE,
+	       plxdev->bar + PLX_REG_CTRL);
+
+	while (!time_after(jiffies, timeout)) {
+		val = readl(plxdev->bar + PLX_REG_CTRL);
+		if (val & PLX_REG_CTRL_GRACEFUL_PAUSE_DONE)
+			break;
+
+		cpu_relax();
+	}
+
+	if (!(val & PLX_REG_CTRL_GRACEFUL_PAUSE_DONE))
+		dev_err(plxdev->dma_dev.dev,
+			"Timeout waiting for graceful pause!\n");
+
+	writel(PLX_REG_CTRL_RESET_VAL | PLX_REG_CTRL_GRACEFUL_PAUSE,
+	       plxdev->bar + PLX_REG_CTRL);
+
+	writel(0, plxdev->bar + PLX_REG_DESC_RING_COUNT);
+	writel(0, plxdev->bar + PLX_REG_DESC_RING_ADDR);
+	writel(0, plxdev->bar + PLX_REG_DESC_RING_ADDR_HI);
+	writel(0, plxdev->bar + PLX_REG_DESC_RING_NEXT_ADDR);
+}
+
+static void plx_dma_stop(struct plx_dma_dev *plxdev)
+{
+	rcu_read_lock();
+	if (!rcu_dereference(plxdev->pdev)) {
+		rcu_read_unlock();
+		return;
+	}
+
+	__plx_dma_stop(plxdev);
+
+	rcu_read_unlock();
+}
+
+static void plx_dma_desc_task(unsigned long data)
+{
+	struct plx_dma_dev *plxdev = (void *)data;
+
+	plx_dma_process_desc(plxdev);
+}
+
+static struct dma_async_tx_descriptor *plx_dma_prep_memcpy(struct dma_chan *c,
+		dma_addr_t dma_dst, dma_addr_t dma_src, size_t len,
+		unsigned long flags)
+	__acquires(plxdev->ring_lock)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(c);
+	struct plx_dma_desc *plxdesc;
+
+	spin_lock_bh(&plxdev->ring_lock);
+	if (!plxdev->ring_active)
+		goto err_unlock;
+
+	if (!CIRC_SPACE(plxdev->head, plxdev->tail, PLX_DMA_RING_COUNT))
+		goto err_unlock;
+
+	if (len > PLX_DESC_SIZE_MASK)
+		goto err_unlock;
+
+	plxdesc = plx_dma_get_desc(plxdev, plxdev->head);
+	plxdev->head++;
+
+	plxdesc->hw->dst_addr_lo = cpu_to_le32(lower_32_bits(dma_dst));
+	plxdesc->hw->dst_addr_hi = cpu_to_le16(upper_32_bits(dma_dst));
+	plxdesc->hw->src_addr_lo = cpu_to_le32(lower_32_bits(dma_src));
+	plxdesc->hw->src_addr_hi = cpu_to_le16(upper_32_bits(dma_src));
+
+	plxdesc->orig_size = len;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		len |= PLX_DESC_FLAG_INT_WHEN_DONE;
+
+	plxdesc->hw->flags_and_size = cpu_to_le32(len);
+	plxdesc->txd.flags = flags;
+
+	/* return with the lock held, it will be released in tx_submit */
+
+	return &plxdesc->txd;
+
+err_unlock:
+	/*
+	 * Keep sparse happy by restoring an even lock count on
+	 * this lock.
+	 */
+	__acquire(plxdev->ring_lock);
+
+	spin_unlock_bh(&plxdev->ring_lock);
+	return NULL;
+}
+
+static dma_cookie_t plx_dma_tx_submit(struct dma_async_tx_descriptor *desc)
+	__releases(plxdev->ring_lock)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(desc->chan);
+	struct plx_dma_desc *plxdesc = to_plx_desc(desc);
+	dma_cookie_t cookie;
+
+	cookie = dma_cookie_assign(desc);
+
+	/*
+	 * Ensure the descriptor updates are visible to the dma device
+	 * before setting the valid bit.
+	 */
+	wmb();
+
+	plxdesc->hw->flags_and_size |= cpu_to_le32(PLX_DESC_FLAG_VALID);
+
+	spin_unlock_bh(&plxdev->ring_lock);
+
+	return cookie;
+}
+
+static enum dma_status plx_dma_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	plx_dma_process_desc(plxdev);
+
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void plx_dma_issue_pending(struct dma_chan *chan)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+
+	rcu_read_lock();
+	if (!rcu_dereference(plxdev->pdev)) {
+		rcu_read_unlock();
+		return;
+	}
+
+	/*
+	 * Ensure the valid bits are visible before starting the
+	 * DMA engine.
+	 */
+	wmb();
+
+	writew(PLX_REG_CTRL_START_VAL, plxdev->bar + PLX_REG_CTRL);
+
+	rcu_read_unlock();
+}
+
+static irqreturn_t plx_dma_isr(int irq, void *devid)
+{
+	struct plx_dma_dev *plxdev = devid;
+	u32 status;
+
+	status = readw(plxdev->bar + PLX_REG_INTR_STATUS);
+
+	if (!status)
+		return IRQ_NONE;
+
+	if (status & PLX_REG_INTR_STATUS_DESC_DONE && plxdev->ring_active)
+		tasklet_schedule(&plxdev->desc_task);
+
+	writew(status, plxdev->bar + PLX_REG_INTR_STATUS);
+
+	return IRQ_HANDLED;
+}
+
+static int plx_dma_alloc_desc(struct plx_dma_dev *plxdev)
+{
+	struct plx_dma_desc *desc;
+	int i;
+
+	plxdev->desc_ring = kcalloc(PLX_DMA_RING_COUNT,
+				    sizeof(*plxdev->desc_ring), GFP_KERNEL);
+	if (!plxdev->desc_ring)
+		return -ENOMEM;
+
+	for (i = 0; i < PLX_DMA_RING_COUNT; i++) {
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc)
+			goto free_and_exit;
+
+		dma_async_tx_descriptor_init(&desc->txd, &plxdev->dma_chan);
+		desc->txd.tx_submit = plx_dma_tx_submit;
+		desc->hw = &plxdev->hw_ring[i];
+
+		plxdev->desc_ring[i] = desc;
+	}
+
+	return 0;
+
+free_and_exit:
+	for (i = 0; i < PLX_DMA_RING_COUNT; i++)
+		kfree(plxdev->desc_ring[i]);
+	kfree(plxdev->desc_ring);
+	return -ENOMEM;
+}
+
+static int plx_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+	size_t ring_sz = PLX_DMA_RING_COUNT * sizeof(*plxdev->hw_ring);
+	int rc;
+
+	plxdev->head = plxdev->tail = 0;
+	plxdev->hw_ring = dma_alloc_coherent(plxdev->dma_dev.dev, ring_sz,
+					     &plxdev->hw_ring_dma, GFP_KERNEL);
+	if (!plxdev->hw_ring)
+		return -ENOMEM;
+
+	rc = plx_dma_alloc_desc(plxdev);
+	if (rc)
+		goto out_free_hw_ring;
+
+	rcu_read_lock();
+	if (!rcu_dereference(plxdev->pdev)) {
+		rcu_read_unlock();
+		rc = -ENODEV;
+		goto out_free_hw_ring;
+	}
+
+	writel(PLX_REG_CTRL_RESET_VAL, plxdev->bar + PLX_REG_CTRL);
+	writel(lower_32_bits(plxdev->hw_ring_dma),
+	       plxdev->bar + PLX_REG_DESC_RING_ADDR);
+	writel(upper_32_bits(plxdev->hw_ring_dma),
+	       plxdev->bar + PLX_REG_DESC_RING_ADDR_HI);
+	writel(lower_32_bits(plxdev->hw_ring_dma),
+	       plxdev->bar + PLX_REG_DESC_RING_NEXT_ADDR);
+	writel(PLX_DMA_RING_COUNT, plxdev->bar + PLX_REG_DESC_RING_COUNT);
+	writel(PLX_REG_PREF_LIMIT_PREF_FOUR, plxdev->bar + PLX_REG_PREF_LIMIT);
+
+	plxdev->ring_active = true;
+
+	rcu_read_unlock();
+
+	return PLX_DMA_RING_COUNT;
+
+out_free_hw_ring:
+	dma_free_coherent(plxdev->dma_dev.dev, ring_sz, plxdev->hw_ring,
+			  plxdev->hw_ring_dma);
+	return rc;
+}
+
+static void plx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+	size_t ring_sz = PLX_DMA_RING_COUNT * sizeof(*plxdev->hw_ring);
+	struct pci_dev *pdev;
+	int irq = -1;
+	int i;
+
+	spin_lock_bh(&plxdev->ring_lock);
+	plxdev->ring_active = false;
+	spin_unlock_bh(&plxdev->ring_lock);
+
+	plx_dma_stop(plxdev);
+
+	rcu_read_lock();
+	pdev = rcu_dereference(plxdev->pdev);
+	if (pdev)
+		irq = pci_irq_vector(pdev, 0);
+	rcu_read_unlock();
+
+	if (irq > 0)
+		synchronize_irq(irq);
+
+	tasklet_kill(&plxdev->desc_task);
+
+	plx_dma_abort_desc(plxdev);
+
+	for (i = 0; i < PLX_DMA_RING_COUNT; i++)
+		kfree(plxdev->desc_ring[i]);
+
+	kfree(plxdev->desc_ring);
+	dma_free_coherent(plxdev->dma_dev.dev, ring_sz, plxdev->hw_ring,
+			  plxdev->hw_ring_dma);
+
+}
+
+static void plx_dma_release(struct dma_device *dma_dev)
+{
+	struct plx_dma_dev *plxdev =
+		container_of(dma_dev, struct plx_dma_dev, dma_dev);
+
+	put_device(dma_dev->dev);
+	kfree(plxdev);
+}
+
+static int plx_dma_create(struct pci_dev *pdev)
+{
+	struct plx_dma_dev *plxdev;
+	struct dma_device *dma;
+	struct dma_chan *chan;
+	int rc;
+
+	plxdev = kzalloc(sizeof(*plxdev), GFP_KERNEL);
+	if (!plxdev)
+		return -ENOMEM;
+
+	rc = request_irq(pci_irq_vector(pdev, 0), plx_dma_isr, 0,
+			 KBUILD_MODNAME, plxdev);
+	if (rc) {
+		kfree(plxdev);
+		return rc;
+	}
+
+	spin_lock_init(&plxdev->ring_lock);
+	tasklet_init(&plxdev->desc_task, plx_dma_desc_task,
+		     (unsigned long)plxdev);
+
+	RCU_INIT_POINTER(plxdev->pdev, pdev);
+	plxdev->bar = pcim_iomap_table(pdev)[0];
+
+	dma = &plxdev->dma_dev;
+	dma->chancnt = 1;
+	INIT_LIST_HEAD(&dma->channels);
+	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+	dma->copy_align = DMAENGINE_ALIGN_1_BYTE;
+	dma->dev = get_device(&pdev->dev);
+
+	dma->device_alloc_chan_resources = plx_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = plx_dma_free_chan_resources;
+	dma->device_prep_dma_memcpy = plx_dma_prep_memcpy;
+	dma->device_issue_pending = plx_dma_issue_pending;
+	dma->device_tx_status = plx_dma_tx_status;
+	dma->device_release = plx_dma_release;
+
+	chan = &plxdev->dma_chan;
+	chan->device = dma;
+	dma_cookie_init(chan);
+	list_add_tail(&chan->device_node, &dma->channels);
+
+	rc = dma_async_device_register(dma);
+	if (rc) {
+		pci_err(pdev, "Failed to register dma device: %d\n", rc);
+		free_irq(pci_irq_vector(pdev, 0),  plxdev);
+		kfree(plxdev);
+		return rc;
+	}
+
+	pci_set_drvdata(pdev, plxdev);
+
+	return 0;
+}
+
+static int plx_dma_probe(struct pci_dev *pdev,
+			 const struct pci_device_id *id)
+{
+	int rc;
+
+	rc = pcim_enable_device(pdev);
+	if (rc)
+		return rc;
+
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (rc)
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (rc)
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	rc = pcim_iomap_regions(pdev, 1, KBUILD_MODNAME);
+	if (rc)
+		return rc;
+
+	rc = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (rc <= 0)
+		return rc;
+
+	pci_set_master(pdev);
+
+	rc = plx_dma_create(pdev);
+	if (rc)
+		goto err_free_irq_vectors;
+
+	pci_info(pdev, "PLX DMA Channel Registered\n");
+
+	return 0;
+
+err_free_irq_vectors:
+	pci_free_irq_vectors(pdev);
+	return rc;
+}
+
+static void plx_dma_remove(struct pci_dev *pdev)
+{
+	struct plx_dma_dev *plxdev = pci_get_drvdata(pdev);
+
+	free_irq(pci_irq_vector(pdev, 0),  plxdev);
+
+	rcu_assign_pointer(plxdev->pdev, NULL);
+	synchronize_rcu();
+
+	spin_lock_bh(&plxdev->ring_lock);
+	plxdev->ring_active = false;
+	spin_unlock_bh(&plxdev->ring_lock);
+
+	__plx_dma_stop(plxdev);
+	plx_dma_abort_desc(plxdev);
+
+	plxdev->bar = NULL;
+	dma_async_device_unregister(&plxdev->dma_dev);
+
+	pci_free_irq_vectors(pdev);
+}
+
+static const struct pci_device_id plx_dma_pci_tbl[] = {
+	{
+		.vendor		= PCI_VENDOR_ID_PLX,
+		.device		= 0x87D0,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.class		= PCI_CLASS_SYSTEM_OTHER << 8,
+		.class_mask	= 0xFFFFFFFF,
+	},
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, plx_dma_pci_tbl);
+
+static struct pci_driver plx_dma_pci_driver = {
+	.name           = KBUILD_MODNAME,
+	.id_table       = plx_dma_pci_tbl,
+	.probe          = plx_dma_probe,
+	.remove		= plx_dma_remove,
+};
+module_pci_driver(plx_dma_pci_driver);
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 43da8eeb18ef..8e14c72d03f0 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -519,15 +519,6 @@ static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan)
 	s3c24xx_dma_start_next_sg(s3cchan, txd);
 }
 
-static void s3c24xx_dma_free_txd_list(struct s3c24xx_dma_engine *s3cdma,
-				struct s3c24xx_dma_chan *s3cchan)
-{
-	LIST_HEAD(head);
-
-	vchan_get_all_descriptors(&s3cchan->vc, &head);
-	vchan_dma_desc_free_list(&s3cchan->vc, &head);
-}
-
 /*
  * Try to allocate a physical channel.  When successful, assign it to
  * this virtual channel, and initiate the next descriptor.  The
@@ -709,8 +700,9 @@ static int s3c24xx_dma_terminate_all(struct dma_chan *chan)
 {
 	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
 	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	LIST_HEAD(head);
 	unsigned long flags;
-	int ret = 0;
+	int ret;
 
 	spin_lock_irqsave(&s3cchan->vc.lock, flags);
 
@@ -734,7 +726,15 @@ static int s3c24xx_dma_terminate_all(struct dma_chan *chan)
 	}
 
 	/* Dequeue jobs not yet fired as well */
-	s3c24xx_dma_free_txd_list(s3cdma, s3cchan);
+
+	vchan_get_all_descriptors(&s3cchan->vc, &head);
+
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	vchan_dma_desc_free_list(&s3cchan->vc, &head);
+
+	return 0;
+
 unlock:
 	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
 
@@ -1198,7 +1198,7 @@ static int s3c24xx_dma_probe(struct platform_device *pdev)
 
 	/* Basic sanity check */
 	if (pdata->num_phy_channels > MAX_DMA_CHANNELS) {
-		dev_err(&pdev->dev, "to many dma channels %d, max %d\n",
+		dev_err(&pdev->dev, "too many dma channels %d, max %d\n",
 			pdata->num_phy_channels, MAX_DMA_CHANNELS);
 		return -EINVAL;
 	}
diff --git a/drivers/dma/sf-pdma/Kconfig b/drivers/dma/sf-pdma/Kconfig
new file mode 100644
index 000000000000..f8ffa02e279f
--- /dev/null
+++ b/drivers/dma/sf-pdma/Kconfig
@@ -0,0 +1,6 @@
+config SF_PDMA
+	tristate "Sifive PDMA controller driver"
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the SiFive PDMA controller.
diff --git a/drivers/dma/sf-pdma/Makefile b/drivers/dma/sf-pdma/Makefile
new file mode 100644
index 000000000000..764552ab8d0a
--- /dev/null
+++ b/drivers/dma/sf-pdma/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SF_PDMA)   += sf-pdma.o
diff --git a/drivers/dma/sf-pdma/sf-pdma.c b/drivers/dma/sf-pdma/sf-pdma.c
new file mode 100644
index 000000000000..6d0bec947636
--- /dev/null
+++ b/drivers/dma/sf-pdma/sf-pdma.c
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SiFive FU540 Platform DMA driver
+ * Copyright (C) 2019 SiFive
+ *
+ * Based partially on:
+ * - drivers/dma/fsl-edma.c
+ * - drivers/dma/dw-edma/
+ * - drivers/dma/pxa-dma.c
+ *
+ * See the following sources for further documentation:
+ * - Chapter 12 "Platform DMA Engine (PDMA)" of
+ *   SiFive FU540-C000 v1.0
+ *   https://static.dev.sifive.com/FU540-C000-v1.0.pdf
+ */
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/dma-mapping.h>
+#include <linux/of.h>
+
+#include "sf-pdma.h"
+
+#ifndef readq
+static inline unsigned long long readq(void __iomem *addr)
+{
+	return readl(addr) | (((unsigned long long)readl(addr + 4)) << 32LL);
+}
+#endif
+
+#ifndef writeq
+static inline void writeq(unsigned long long v, void __iomem *addr)
+{
+	writel(lower_32_bits(v), addr);
+	writel(upper_32_bits(v), addr + 4);
+}
+#endif
+
+static inline struct sf_pdma_chan *to_sf_pdma_chan(struct dma_chan *dchan)
+{
+	return container_of(dchan, struct sf_pdma_chan, vchan.chan);
+}
+
+static inline struct sf_pdma_desc *to_sf_pdma_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct sf_pdma_desc, vdesc);
+}
+
+static struct sf_pdma_desc *sf_pdma_alloc_desc(struct sf_pdma_chan *chan)
+{
+	struct sf_pdma_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	if (chan->desc && !chan->desc->in_use) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return chan->desc;
+	}
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->chan = chan;
+
+	return desc;
+}
+
+static void sf_pdma_fill_desc(struct sf_pdma_desc *desc,
+			      u64 dst, u64 src, u64 size)
+{
+	desc->xfer_type = PDMA_FULL_SPEED;
+	desc->xfer_size = size;
+	desc->dst_addr = dst;
+	desc->src_addr = src;
+}
+
+static void sf_pdma_disclaim_chan(struct sf_pdma_chan *chan)
+{
+	struct pdma_regs *regs = &chan->regs;
+
+	writel(PDMA_CLEAR_CTRL, regs->ctrl);
+}
+
+static struct dma_async_tx_descriptor *
+sf_pdma_prep_dma_memcpy(struct dma_chan *dchan,	dma_addr_t dest, dma_addr_t src,
+			size_t len, unsigned long flags)
+{
+	struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+	struct sf_pdma_desc *desc;
+
+	if (chan && (!len || !dest || !src)) {
+		dev_err(chan->pdma->dma_dev.dev,
+			"Please check dma len, dest, src!\n");
+		return NULL;
+	}
+
+	desc = sf_pdma_alloc_desc(chan);
+	if (!desc)
+		return NULL;
+
+	desc->in_use = true;
+	desc->dirn = DMA_MEM_TO_MEM;
+	desc->async_tx = vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	chan->desc = desc;
+	sf_pdma_fill_desc(desc, dest, src, len);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	return desc->async_tx;
+}
+
+static int sf_pdma_slave_config(struct dma_chan *dchan,
+				struct dma_slave_config *cfg)
+{
+	struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+
+	memcpy(&chan->cfg, cfg, sizeof(*cfg));
+
+	return 0;
+}
+
+static int sf_pdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+	struct pdma_regs *regs = &chan->regs;
+
+	dma_cookie_init(dchan);
+	writel(PDMA_CLAIM_MASK, regs->ctrl);
+
+	return 0;
+}
+
+static void sf_pdma_disable_request(struct sf_pdma_chan *chan)
+{
+	struct pdma_regs *regs = &chan->regs;
+
+	writel(readl(regs->ctrl) & ~PDMA_RUN_MASK, regs->ctrl);
+}
+
+static void sf_pdma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	sf_pdma_disable_request(chan);
+	kfree(chan->desc);
+	chan->desc = NULL;
+	vchan_get_all_descriptors(&chan->vchan, &head);
+	sf_pdma_disclaim_chan(chan);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&chan->vchan, &head);
+}
+
+static size_t sf_pdma_desc_residue(struct sf_pdma_chan *chan,
+				   dma_cookie_t cookie)
+{
+	struct virt_dma_desc *vd = NULL;
+	struct pdma_regs *regs = &chan->regs;
+	unsigned long flags;
+	u64 residue = 0;
+	struct sf_pdma_desc *desc;
+	struct dma_async_tx_descriptor *tx;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+
+	tx = &chan->desc->vdesc.tx;
+	if (cookie == tx->chan->completed_cookie)
+		goto out;
+
+	if (cookie == tx->cookie) {
+		residue = readq(regs->residue);
+	} else {
+		vd = vchan_find_desc(&chan->vchan, cookie);
+		if (!vd)
+			goto out;
+
+		desc = to_sf_pdma_desc(vd);
+		residue = desc->xfer_size;
+	}
+
+out:
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+	return residue;
+}
+
+static enum dma_status
+sf_pdma_tx_status(struct dma_chan *dchan,
+		  dma_cookie_t cookie,
+		  struct dma_tx_state *txstate)
+{
+	struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+	enum dma_status status;
+
+	status = dma_cookie_status(dchan, cookie, txstate);
+
+	if (txstate && status != DMA_ERROR)
+		dma_set_residue(txstate, sf_pdma_desc_residue(chan, cookie));
+
+	return status;
+}
+
+static int sf_pdma_terminate_all(struct dma_chan *dchan)
+{
+	struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	sf_pdma_disable_request(chan);
+	kfree(chan->desc);
+	chan->desc = NULL;
+	chan->xfer_err = false;
+	vchan_get_all_descriptors(&chan->vchan, &head);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&chan->vchan, &head);
+
+	return 0;
+}
+
+static void sf_pdma_enable_request(struct sf_pdma_chan *chan)
+{
+	struct pdma_regs *regs = &chan->regs;
+	u32 v;
+
+	v = PDMA_CLAIM_MASK |
+		PDMA_ENABLE_DONE_INT_MASK |
+		PDMA_ENABLE_ERR_INT_MASK |
+		PDMA_RUN_MASK;
+
+	writel(v, regs->ctrl);
+}
+
+static void sf_pdma_xfer_desc(struct sf_pdma_chan *chan)
+{
+	struct sf_pdma_desc *desc = chan->desc;
+	struct pdma_regs *regs = &chan->regs;
+
+	if (!desc) {
+		dev_err(chan->pdma->dma_dev.dev, "NULL desc.\n");
+		return;
+	}
+
+	writel(desc->xfer_type, regs->xfer_type);
+	writeq(desc->xfer_size, regs->xfer_size);
+	writeq(desc->dst_addr, regs->dst_addr);
+	writeq(desc->src_addr, regs->src_addr);
+
+	chan->desc = desc;
+	chan->status = DMA_IN_PROGRESS;
+	sf_pdma_enable_request(chan);
+}
+
+static void sf_pdma_issue_pending(struct dma_chan *dchan)
+{
+	struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+
+	if (vchan_issue_pending(&chan->vchan) && chan->desc)
+		sf_pdma_xfer_desc(chan);
+
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static void sf_pdma_free_desc(struct virt_dma_desc *vdesc)
+{
+	struct sf_pdma_desc *desc;
+
+	desc = to_sf_pdma_desc(vdesc);
+	desc->in_use = false;
+}
+
+static void sf_pdma_donebh_tasklet(unsigned long arg)
+{
+	struct sf_pdma_chan *chan = (struct sf_pdma_chan *)arg;
+	struct sf_pdma_desc *desc = chan->desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (chan->xfer_err) {
+		chan->retries = MAX_RETRY;
+		chan->status = DMA_COMPLETE;
+		chan->xfer_err = false;
+	}
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	dmaengine_desc_get_callback_invoke(desc->async_tx, NULL);
+}
+
+static void sf_pdma_errbh_tasklet(unsigned long arg)
+{
+	struct sf_pdma_chan *chan = (struct sf_pdma_chan *)arg;
+	struct sf_pdma_desc *desc = chan->desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (chan->retries <= 0) {
+		/* fail to recover */
+		spin_unlock_irqrestore(&chan->lock, flags);
+		dmaengine_desc_get_callback_invoke(desc->async_tx, NULL);
+	} else {
+		/* retry */
+		chan->retries--;
+		chan->xfer_err = true;
+		chan->status = DMA_ERROR;
+
+		sf_pdma_enable_request(chan);
+		spin_unlock_irqrestore(&chan->lock, flags);
+	}
+}
+
+static irqreturn_t sf_pdma_done_isr(int irq, void *dev_id)
+{
+	struct sf_pdma_chan *chan = dev_id;
+	struct pdma_regs *regs = &chan->regs;
+	unsigned long flags;
+	u64 residue;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	writel((readl(regs->ctrl)) & ~PDMA_DONE_STATUS_MASK, regs->ctrl);
+	residue = readq(regs->residue);
+
+	if (!residue) {
+		list_del(&chan->desc->vdesc.node);
+		vchan_cookie_complete(&chan->desc->vdesc);
+	} else {
+		/* submit next trascatioin if possible */
+		struct sf_pdma_desc *desc = chan->desc;
+
+		desc->src_addr += desc->xfer_size - residue;
+		desc->dst_addr += desc->xfer_size - residue;
+		desc->xfer_size = residue;
+
+		sf_pdma_xfer_desc(chan);
+	}
+
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	tasklet_hi_schedule(&chan->done_tasklet);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t sf_pdma_err_isr(int irq, void *dev_id)
+{
+	struct sf_pdma_chan *chan = dev_id;
+	struct pdma_regs *regs = &chan->regs;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	writel((readl(regs->ctrl)) & ~PDMA_ERR_STATUS_MASK, regs->ctrl);
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	tasklet_schedule(&chan->err_tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * sf_pdma_irq_init() - Init PDMA IRQ Handlers
+ * @pdev: pointer of platform_device
+ * @pdma: pointer of PDMA engine. Caller should check NULL
+ *
+ * Initialize DONE and ERROR interrupt handler for 4 channels. Caller should
+ * make sure the pointer passed in are non-NULL. This function should be called
+ * only one time during the device probe.
+ *
+ * Context: Any context.
+ *
+ * Return:
+ * * 0		- OK to init all IRQ handlers
+ * * -EINVAL	- Fail to request IRQ
+ */
+static int sf_pdma_irq_init(struct platform_device *pdev, struct sf_pdma *pdma)
+{
+	int irq, r, i;
+	struct sf_pdma_chan *chan;
+
+	for (i = 0; i < pdma->n_chans; i++) {
+		chan = &pdma->chans[i];
+
+		irq = platform_get_irq(pdev, i * 2);
+		if (irq < 0) {
+			dev_err(&pdev->dev, "ch(%d) Can't get done irq.\n", i);
+			return -EINVAL;
+		}
+
+		r = devm_request_irq(&pdev->dev, irq, sf_pdma_done_isr, 0,
+				     dev_name(&pdev->dev), (void *)chan);
+		if (r) {
+			dev_err(&pdev->dev, "Fail to attach done ISR: %d\n", r);
+			return -EINVAL;
+		}
+
+		chan->txirq = irq;
+
+		irq = platform_get_irq(pdev, (i * 2) + 1);
+		if (irq < 0) {
+			dev_err(&pdev->dev, "ch(%d) Can't get err irq.\n", i);
+			return -EINVAL;
+		}
+
+		r = devm_request_irq(&pdev->dev, irq, sf_pdma_err_isr, 0,
+				     dev_name(&pdev->dev), (void *)chan);
+		if (r) {
+			dev_err(&pdev->dev, "Fail to attach err ISR: %d\n", r);
+			return -EINVAL;
+		}
+
+		chan->errirq = irq;
+	}
+
+	return 0;
+}
+
+/**
+ * sf_pdma_setup_chans() - Init settings of each channel
+ * @pdma: pointer of PDMA engine. Caller should check NULL
+ *
+ * Initialize all data structure and register base. Caller should make sure
+ * the pointer passed in are non-NULL. This function should be called only
+ * one time during the device probe.
+ *
+ * Context: Any context.
+ *
+ * Return: none
+ */
+static void sf_pdma_setup_chans(struct sf_pdma *pdma)
+{
+	int i;
+	struct sf_pdma_chan *chan;
+
+	INIT_LIST_HEAD(&pdma->dma_dev.channels);
+
+	for (i = 0; i < pdma->n_chans; i++) {
+		chan = &pdma->chans[i];
+
+		chan->regs.ctrl =
+			SF_PDMA_REG_BASE(i) + PDMA_CTRL;
+		chan->regs.xfer_type =
+			SF_PDMA_REG_BASE(i) + PDMA_XFER_TYPE;
+		chan->regs.xfer_size =
+			SF_PDMA_REG_BASE(i) + PDMA_XFER_SIZE;
+		chan->regs.dst_addr =
+			SF_PDMA_REG_BASE(i) + PDMA_DST_ADDR;
+		chan->regs.src_addr =
+			SF_PDMA_REG_BASE(i) + PDMA_SRC_ADDR;
+		chan->regs.act_type =
+			SF_PDMA_REG_BASE(i) + PDMA_ACT_TYPE;
+		chan->regs.residue =
+			SF_PDMA_REG_BASE(i) + PDMA_REMAINING_BYTE;
+		chan->regs.cur_dst_addr =
+			SF_PDMA_REG_BASE(i) + PDMA_CUR_DST_ADDR;
+		chan->regs.cur_src_addr =
+			SF_PDMA_REG_BASE(i) + PDMA_CUR_SRC_ADDR;
+
+		chan->pdma = pdma;
+		chan->pm_state = RUNNING;
+		chan->slave_id = i;
+		chan->xfer_err = false;
+		spin_lock_init(&chan->lock);
+
+		chan->vchan.desc_free = sf_pdma_free_desc;
+		vchan_init(&chan->vchan, &pdma->dma_dev);
+
+		writel(PDMA_CLEAR_CTRL, chan->regs.ctrl);
+
+		tasklet_init(&chan->done_tasklet,
+			     sf_pdma_donebh_tasklet, (unsigned long)chan);
+		tasklet_init(&chan->err_tasklet,
+			     sf_pdma_errbh_tasklet, (unsigned long)chan);
+	}
+}
+
+static int sf_pdma_probe(struct platform_device *pdev)
+{
+	struct sf_pdma *pdma;
+	struct sf_pdma_chan *chan;
+	struct resource *res;
+	int len, chans;
+	int ret;
+	const enum dma_slave_buswidth widths =
+		DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES |
+		DMA_SLAVE_BUSWIDTH_4_BYTES | DMA_SLAVE_BUSWIDTH_8_BYTES |
+		DMA_SLAVE_BUSWIDTH_16_BYTES | DMA_SLAVE_BUSWIDTH_32_BYTES |
+		DMA_SLAVE_BUSWIDTH_64_BYTES;
+
+	chans = PDMA_NR_CH;
+	len = sizeof(*pdma) + sizeof(*chan) * chans;
+	pdma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+	if (!pdma)
+		return -ENOMEM;
+
+	pdma->n_chans = chans;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pdma->membase = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pdma->membase))
+		goto ERR_MEMBASE;
+
+	ret = sf_pdma_irq_init(pdev, pdma);
+	if (ret)
+		goto ERR_INITIRQ;
+
+	sf_pdma_setup_chans(pdma);
+
+	pdma->dma_dev.dev = &pdev->dev;
+
+	/* Setup capability */
+	dma_cap_set(DMA_MEMCPY, pdma->dma_dev.cap_mask);
+	pdma->dma_dev.copy_align = 2;
+	pdma->dma_dev.src_addr_widths = widths;
+	pdma->dma_dev.dst_addr_widths = widths;
+	pdma->dma_dev.directions = BIT(DMA_MEM_TO_MEM);
+	pdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	pdma->dma_dev.descriptor_reuse = true;
+
+	/* Setup DMA APIs */
+	pdma->dma_dev.device_alloc_chan_resources =
+		sf_pdma_alloc_chan_resources;
+	pdma->dma_dev.device_free_chan_resources =
+		sf_pdma_free_chan_resources;
+	pdma->dma_dev.device_tx_status = sf_pdma_tx_status;
+	pdma->dma_dev.device_prep_dma_memcpy = sf_pdma_prep_dma_memcpy;
+	pdma->dma_dev.device_config = sf_pdma_slave_config;
+	pdma->dma_dev.device_terminate_all = sf_pdma_terminate_all;
+	pdma->dma_dev.device_issue_pending = sf_pdma_issue_pending;
+
+	platform_set_drvdata(pdev, pdma);
+
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret)
+		dev_warn(&pdev->dev,
+			 "Failed to set DMA mask. Fall back to default.\n");
+
+	ret = dma_async_device_register(&pdma->dma_dev);
+	if (ret)
+		goto ERR_REG_DMADEVICE;
+
+	return 0;
+
+ERR_MEMBASE:
+	devm_kfree(&pdev->dev, pdma);
+	return PTR_ERR(pdma->membase);
+
+ERR_INITIRQ:
+	devm_kfree(&pdev->dev, pdma);
+	return ret;
+
+ERR_REG_DMADEVICE:
+	devm_kfree(&pdev->dev, pdma);
+	dev_err(&pdev->dev,
+		"Can't register SiFive Platform DMA. (%d)\n", ret);
+	return ret;
+}
+
+static int sf_pdma_remove(struct platform_device *pdev)
+{
+	struct sf_pdma *pdma = platform_get_drvdata(pdev);
+	struct sf_pdma_chan *ch;
+	int i;
+
+	for (i = 0; i < PDMA_NR_CH; i++) {
+		ch = &pdma->chans[i];
+
+		devm_free_irq(&pdev->dev, ch->txirq, ch);
+		devm_free_irq(&pdev->dev, ch->errirq, ch);
+		list_del(&ch->vchan.chan.device_node);
+		tasklet_kill(&ch->vchan.task);
+		tasklet_kill(&ch->done_tasklet);
+		tasklet_kill(&ch->err_tasklet);
+	}
+
+	dma_async_device_unregister(&pdma->dma_dev);
+
+	return 0;
+}
+
+static const struct of_device_id sf_pdma_dt_ids[] = {
+	{ .compatible = "sifive,fu540-c000-pdma" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, sf_pdma_dt_ids);
+
+static struct platform_driver sf_pdma_driver = {
+	.probe		= sf_pdma_probe,
+	.remove		= sf_pdma_remove,
+	.driver		= {
+		.name	= "sf-pdma",
+		.of_match_table = of_match_ptr(sf_pdma_dt_ids),
+	},
+};
+
+static int __init sf_pdma_init(void)
+{
+	return platform_driver_register(&sf_pdma_driver);
+}
+
+static void __exit sf_pdma_exit(void)
+{
+	platform_driver_unregister(&sf_pdma_driver);
+}
+
+/* do early init */
+subsys_initcall(sf_pdma_init);
+module_exit(sf_pdma_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("SiFive Platform DMA driver");
+MODULE_AUTHOR("Green Wan <green.wan@sifive.com>");
diff --git a/drivers/dma/sf-pdma/sf-pdma.h b/drivers/dma/sf-pdma/sf-pdma.h
new file mode 100644
index 000000000000..0c20167b097d
--- /dev/null
+++ b/drivers/dma/sf-pdma/sf-pdma.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SiFive FU540 Platform DMA driver
+ * Copyright (C) 2019 SiFive
+ *
+ * Based partially on:
+ * - drivers/dma/fsl-edma.c
+ * - drivers/dma/dw-edma/
+ * - drivers/dma/pxa-dma.c
+ *
+ * See the following sources for further documentation:
+ * - Chapter 12 "Platform DMA Engine (PDMA)" of
+ *   SiFive FU540-C000 v1.0
+ *   https://static.dev.sifive.com/FU540-C000-v1.0.pdf
+ */
+#ifndef _SF_PDMA_H
+#define _SF_PDMA_H
+
+#include <linux/dmaengine.h>
+#include <linux/dma-direction.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+#define PDMA_NR_CH					4
+
+#if (PDMA_NR_CH != 4)
+#error "Please define PDMA_NR_CH to 4"
+#endif
+
+#define PDMA_BASE_ADDR					0x3000000
+#define PDMA_CHAN_OFFSET				0x1000
+
+/* Register Offset */
+#define PDMA_CTRL					0x000
+#define PDMA_XFER_TYPE					0x004
+#define PDMA_XFER_SIZE					0x008
+#define PDMA_DST_ADDR					0x010
+#define PDMA_SRC_ADDR					0x018
+#define PDMA_ACT_TYPE					0x104 /* Read-only */
+#define PDMA_REMAINING_BYTE				0x108 /* Read-only */
+#define PDMA_CUR_DST_ADDR				0x110 /* Read-only*/
+#define PDMA_CUR_SRC_ADDR				0x118 /* Read-only*/
+
+/* CTRL */
+#define PDMA_CLEAR_CTRL					0x0
+#define PDMA_CLAIM_MASK					GENMASK(0, 0)
+#define PDMA_RUN_MASK					GENMASK(1, 1)
+#define PDMA_ENABLE_DONE_INT_MASK			GENMASK(14, 14)
+#define PDMA_ENABLE_ERR_INT_MASK			GENMASK(15, 15)
+#define PDMA_DONE_STATUS_MASK				GENMASK(30, 30)
+#define PDMA_ERR_STATUS_MASK				GENMASK(31, 31)
+
+/* Transfer Type */
+#define PDMA_FULL_SPEED					0xFF000008
+
+/* Error Recovery */
+#define MAX_RETRY					1
+
+#define SF_PDMA_REG_BASE(ch)	(pdma->membase + (PDMA_CHAN_OFFSET * (ch)))
+
+struct pdma_regs {
+	/* read-write regs */
+	void __iomem *ctrl;		/* 4 bytes */
+
+	void __iomem *xfer_type;	/* 4 bytes */
+	void __iomem *xfer_size;	/* 8 bytes */
+	void __iomem *dst_addr;		/* 8 bytes */
+	void __iomem *src_addr;		/* 8 bytes */
+
+	/* read-only */
+	void __iomem *act_type;		/* 4 bytes */
+	void __iomem *residue;		/* 8 bytes */
+	void __iomem *cur_dst_addr;	/* 8 bytes */
+	void __iomem *cur_src_addr;	/* 8 bytes */
+};
+
+struct sf_pdma_desc {
+	u32				xfer_type;
+	u64				xfer_size;
+	u64				dst_addr;
+	u64				src_addr;
+	struct virt_dma_desc		vdesc;
+	struct sf_pdma_chan		*chan;
+	bool				in_use;
+	enum dma_transfer_direction	dirn;
+	struct dma_async_tx_descriptor *async_tx;
+};
+
+enum sf_pdma_pm_state {
+	RUNNING = 0,
+	SUSPENDED,
+};
+
+struct sf_pdma_chan {
+	struct virt_dma_chan		vchan;
+	enum dma_status			status;
+	enum sf_pdma_pm_state		pm_state;
+	u32				slave_id;
+	struct sf_pdma			*pdma;
+	struct sf_pdma_desc		*desc;
+	struct dma_slave_config		cfg;
+	u32				attr;
+	dma_addr_t			dma_dev_addr;
+	u32				dma_dev_size;
+	struct tasklet_struct		done_tasklet;
+	struct tasklet_struct		err_tasklet;
+	struct pdma_regs		regs;
+	spinlock_t			lock; /* protect chan data */
+	bool				xfer_err;
+	int				txirq;
+	int				errirq;
+	int				retries;
+};
+
+struct sf_pdma {
+	struct dma_device       dma_dev;
+	void __iomem            *membase;
+	void __iomem            *mappedbase;
+	u32			n_chans;
+	struct sf_pdma_chan	chans[PDMA_NR_CH];
+};
+
+#endif /* _SF_PDMA_H */
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 3993ab65c62c..f06016d38a05 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -203,19 +203,27 @@ struct rcar_dmac {
 
 	unsigned int n_channels;
 	struct rcar_dmac_chan *channels;
-	unsigned int channels_mask;
+	u32 channels_mask;
 
 	DECLARE_BITMAP(modules, 256);
 };
 
 #define to_rcar_dmac(d)		container_of(d, struct rcar_dmac, engine)
 
+/*
+ * struct rcar_dmac_of_data - This driver's OF data
+ * @chan_offset_base: DMAC channels base offset
+ * @chan_offset_stride: DMAC channels offset stride
+ */
+struct rcar_dmac_of_data {
+	u32 chan_offset_base;
+	u32 chan_offset_stride;
+};
+
 /* -----------------------------------------------------------------------------
  * Registers
  */
 
-#define RCAR_DMAC_CHAN_OFFSET(i)	(0x8000 + 0x80 * (i))
-
 #define RCAR_DMAISTA			0x0020
 #define RCAR_DMASEC			0x0030
 #define RCAR_DMAOR			0x0060
@@ -1726,6 +1734,7 @@ static const struct dev_pm_ops rcar_dmac_pm = {
 
 static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
 				struct rcar_dmac_chan *rchan,
+				const struct rcar_dmac_of_data *data,
 				unsigned int index)
 {
 	struct platform_device *pdev = to_platform_device(dmac->dev);
@@ -1735,7 +1744,8 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
 	int ret;
 
 	rchan->index = index;
-	rchan->iomem = dmac->iomem + RCAR_DMAC_CHAN_OFFSET(index);
+	rchan->iomem = dmac->iomem + data->chan_offset_base +
+		       data->chan_offset_stride * index;
 	rchan->mid_rid = -EINVAL;
 
 	spin_lock_init(&rchan->lock);
@@ -1800,7 +1810,15 @@ static int rcar_dmac_parse_of(struct device *dev, struct rcar_dmac *dmac)
 		return -EINVAL;
 	}
 
+	/*
+	 * If the driver is unable to read dma-channel-mask property,
+	 * the driver assumes that it can use all channels.
+	 */
 	dmac->channels_mask = GENMASK(dmac->n_channels - 1, 0);
+	of_property_read_u32(np, "dma-channel-mask", &dmac->channels_mask);
+
+	/* If the property has out-of-channel mask, this driver clears it */
+	dmac->channels_mask &= GENMASK(dmac->n_channels - 1, 0);
 
 	return 0;
 }
@@ -1813,10 +1831,14 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 		DMA_SLAVE_BUSWIDTH_32_BYTES | DMA_SLAVE_BUSWIDTH_64_BYTES;
 	struct dma_device *engine;
 	struct rcar_dmac *dmac;
-	struct resource *mem;
+	const struct rcar_dmac_of_data *data;
 	unsigned int i;
 	int ret;
 
+	data = of_device_get_match_data(&pdev->dev);
+	if (!data)
+		return -EINVAL;
+
 	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
 	if (!dmac)
 		return -ENOMEM;
@@ -1848,8 +1870,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	/* Request resources. */
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
+	dmac->iomem = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(dmac->iomem))
 		return PTR_ERR(dmac->iomem);
 
@@ -1901,7 +1922,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 		if (!(dmac->channels_mask & BIT(i)))
 			continue;
 
-		ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i], i);
+		ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i], data, i);
 		if (ret < 0)
 			goto error;
 	}
@@ -1948,8 +1969,16 @@ static void rcar_dmac_shutdown(struct platform_device *pdev)
 	rcar_dmac_stop_all_chan(dmac);
 }
 
+static const struct rcar_dmac_of_data rcar_dmac_data = {
+	.chan_offset_base = 0x8000,
+	.chan_offset_stride = 0x80,
+};
+
 static const struct of_device_id rcar_dmac_of_ids[] = {
-	{ .compatible = "renesas,rcar-dmac", },
+	{
+		.compatible = "renesas,rcar-dmac",
+		.data = &rcar_dmac_data,
+	},
 	{ /* Sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, rcar_dmac_of_ids);
diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c
index 8546ad034720..9a31a315dbef 100644
--- a/drivers/dma/sprd-dma.c
+++ b/drivers/dma/sprd-dma.c
@@ -99,6 +99,7 @@
 /* DMA_CHN_WARP_* register definition */
 #define SPRD_DMA_HIGH_ADDR_MASK		GENMASK(31, 28)
 #define SPRD_DMA_LOW_ADDR_MASK		GENMASK(31, 0)
+#define SPRD_DMA_WRAP_ADDR_MASK		GENMASK(27, 0)
 #define SPRD_DMA_HIGH_ADDR_OFFSET	4
 
 /* SPRD_DMA_CHN_INTC register definition */
@@ -118,6 +119,8 @@
 #define SPRD_DMA_SWT_MODE_OFFSET	26
 #define SPRD_DMA_REQ_MODE_OFFSET	24
 #define SPRD_DMA_REQ_MODE_MASK		GENMASK(1, 0)
+#define SPRD_DMA_WRAP_SEL_DEST		BIT(23)
+#define SPRD_DMA_WRAP_EN		BIT(22)
 #define SPRD_DMA_FIX_SEL_OFFSET		21
 #define SPRD_DMA_FIX_EN_OFFSET		20
 #define SPRD_DMA_LLIST_END		BIT(19)
@@ -804,6 +807,8 @@ static int sprd_dma_fill_desc(struct dma_chan *chan,
 	temp |= req_mode << SPRD_DMA_REQ_MODE_OFFSET;
 	temp |= fix_mode << SPRD_DMA_FIX_SEL_OFFSET;
 	temp |= fix_en << SPRD_DMA_FIX_EN_OFFSET;
+	temp |= schan->linklist.wrap_addr ?
+		SPRD_DMA_WRAP_EN | SPRD_DMA_WRAP_SEL_DEST : 0;
 	temp |= slave_cfg->src_maxburst & SPRD_DMA_FRG_LEN_MASK;
 	hw->frg_len = temp;
 
@@ -831,6 +836,12 @@ static int sprd_dma_fill_desc(struct dma_chan *chan,
 		hw->llist_ptr = lower_32_bits(llist_ptr);
 		hw->src_blk_step = (upper_32_bits(llist_ptr) << SPRD_DMA_LLIST_HIGH_SHIFT) &
 			SPRD_DMA_LLIST_HIGH_MASK;
+
+		if (schan->linklist.wrap_addr) {
+			hw->wrap_ptr |= schan->linklist.wrap_addr &
+				SPRD_DMA_WRAP_ADDR_MASK;
+			hw->wrap_to |= dst & SPRD_DMA_WRAP_ADDR_MASK;
+		}
 	} else {
 		hw->llist_ptr = 0;
 		hw->src_blk_step = 0;
@@ -939,9 +950,11 @@ sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 		schan->linklist.phy_addr = ll_cfg->phy_addr;
 		schan->linklist.virt_addr = ll_cfg->virt_addr;
+		schan->linklist.wrap_addr = ll_cfg->wrap_addr;
 	} else {
 		schan->linklist.phy_addr = 0;
 		schan->linklist.virt_addr = 0;
+		schan->linklist.wrap_addr = 0;
 	}
 
 	/*
@@ -1080,7 +1093,6 @@ static int sprd_dma_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	struct sprd_dma_dev *sdev;
 	struct sprd_dma_chn *dma_chn;
-	struct resource *res;
 	u32 chn_count;
 	int ret, i;
 
@@ -1126,8 +1138,7 @@ static int sprd_dma_probe(struct platform_device *pdev)
 		dev_warn(&pdev->dev, "no interrupts for the dma controller\n");
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	sdev->glb_base = devm_ioremap_resource(&pdev->dev, res);
+	sdev->glb_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(sdev->glb_base))
 		return PTR_ERR(sdev->glb_base);
 
diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c
index e397a50058c8..bbc2bda3b902 100644
--- a/drivers/dma/sun4i-dma.c
+++ b/drivers/dma/sun4i-dma.c
@@ -669,43 +669,41 @@ sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len,
 	dma_addr_t src, dest;
 	u32 endpoints;
 	int nr_periods, offset, plength, i;
+	u8 ram_type, io_mode, linear_mode;
 
 	if (!is_slave_direction(dir)) {
 		dev_err(chan2dev(chan), "Invalid DMA direction\n");
 		return NULL;
 	}
 
-	if (vchan->is_dedicated) {
-		/*
-		 * As we are using this just for audio data, we need to use
-		 * normal DMA. There is nothing stopping us from supporting
-		 * dedicated DMA here as well, so if a client comes up and
-		 * requires it, it will be simple to implement it.
-		 */
-		dev_err(chan2dev(chan),
-			"Cyclic transfers are only supported on Normal DMA\n");
-		return NULL;
-	}
-
 	contract = generate_dma_contract();
 	if (!contract)
 		return NULL;
 
 	contract->is_cyclic = 1;
 
-	/* Figure out the endpoints and the address we need */
+	if (vchan->is_dedicated) {
+		io_mode = SUN4I_DDMA_ADDR_MODE_IO;
+		linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR;
+		ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM;
+	} else {
+		io_mode = SUN4I_NDMA_ADDR_MODE_IO;
+		linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR;
+		ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM;
+	}
+
 	if (dir == DMA_MEM_TO_DEV) {
 		src = buf;
 		dest = sconfig->dst_addr;
-		endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) |
-			    SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) |
-			    SUN4I_DMA_CFG_DST_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO);
+		endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_DST_ADDR_MODE(io_mode) |
+			    SUN4I_DMA_CFG_SRC_DRQ_TYPE(ram_type);
 	} else {
 		src = sconfig->src_addr;
 		dest = buf;
-		endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) |
-			    SUN4I_DMA_CFG_SRC_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO) |
-			    SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM);
+		endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(ram_type) |
+			    SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_SRC_ADDR_MODE(io_mode);
 	}
 
 	/*
@@ -747,8 +745,13 @@ sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len,
 			dest = buf + offset;
 
 		/* Make the promise */
-		promise = generate_ndma_promise(chan, src, dest,
-						plength, sconfig, dir);
+		if (vchan->is_dedicated)
+			promise = generate_ddma_promise(chan, src, dest,
+							plength, sconfig);
+		else
+			promise = generate_ndma_promise(chan, src, dest,
+							plength, sconfig, dir);
+
 		if (!promise) {
 			/* TODO: should we free everything? */
 			return NULL;
@@ -885,12 +888,13 @@ static int sun4i_dma_terminate_all(struct dma_chan *chan)
 	}
 
 	spin_lock_irqsave(&vchan->vc.lock, flags);
-	vchan_dma_desc_free_list(&vchan->vc, &head);
 	/* Clear these so the vchan is usable again */
 	vchan->processing = NULL;
 	vchan->pchan = NULL;
 	spin_unlock_irqrestore(&vchan->vc.lock, flags);
 
+	vchan_dma_desc_free_list(&vchan->vc, &head);
+
 	return 0;
 }
 
diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig
index d507c24fbf31..f76e06651f80 100644
--- a/drivers/dma/ti/Kconfig
+++ b/drivers/dma/ti/Kconfig
@@ -34,5 +34,29 @@ config DMA_OMAP
 	  Enable support for the TI sDMA (System DMA or DMA4) controller. This
 	  DMA engine is found on OMAP and DRA7xx parts.
 
+config TI_K3_UDMA
+	bool "Texas Instruments UDMA support"
+	depends on ARCH_K3 || COMPILE_TEST
+	depends on TI_SCI_PROTOCOL
+	depends on TI_SCI_INTA_IRQCHIP
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	select TI_K3_RINGACC
+	select TI_K3_PSIL
+        help
+	  Enable support for the TI UDMA (Unified DMA) controller. This
+	  DMA engine is used in AM65x and j721e.
+
+config TI_K3_UDMA_GLUE_LAYER
+	bool "Texas Instruments UDMA Glue layer for non DMAengine users"
+	depends on ARCH_K3 || COMPILE_TEST
+	depends on TI_K3_UDMA
+	help
+	  Say y here to support the K3 NAVSS DMA glue interface
+	  If unsure, say N.
+
+config TI_K3_PSIL
+	bool
+
 config TI_DMA_CROSSBAR
 	bool
diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile
index 113e59ec9c32..9a29a107e374 100644
--- a/drivers/dma/ti/Makefile
+++ b/drivers/dma/ti/Makefile
@@ -2,4 +2,7 @@
 obj-$(CONFIG_TI_CPPI41) += cppi41.o
 obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
+obj-$(CONFIG_TI_K3_UDMA) += k3-udma.o
+obj-$(CONFIG_TI_K3_UDMA_GLUE_LAYER) += k3-udma-glue.o
+obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o k3-psil-am654.o k3-psil-j721e.o
 obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o
diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c
index ba7c4f07fcd6..03a7f647f7b2 100644
--- a/drivers/dma/ti/edma.c
+++ b/drivers/dma/ti/edma.c
@@ -260,6 +260,13 @@ struct edma_cc {
 	 */
 	unsigned long *slot_inuse;
 
+	/*
+	 * For tracking reserved channels used by DSP.
+	 * If the bit is cleared, the channel is allocated to be used by DSP
+	 * and Linux must not touch it.
+	 */
+	unsigned long *channels_mask;
+
 	struct dma_device		dma_slave;
 	struct dma_device		*dma_memcpy;
 	struct edma_chan		*slave_chans;
@@ -716,6 +723,12 @@ static int edma_alloc_channel(struct edma_chan *echan,
 	struct edma_cc *ecc = echan->ecc;
 	int channel = EDMA_CHAN_SLOT(echan->ch_num);
 
+	if (!test_bit(echan->ch_num, ecc->channels_mask)) {
+		dev_err(ecc->dev, "Channel%d is reserved, can not be used!\n",
+			echan->ch_num);
+		return -EINVAL;
+	}
+
 	/* ensure access through shadow region 0 */
 	edma_or_array2(ecc, EDMA_DRAE, 0, EDMA_REG_ARRAY_INDEX(channel),
 		       EDMA_CHANNEL_BIT(channel));
@@ -2249,10 +2262,8 @@ static int edma_probe(struct platform_device *pdev)
 {
 	struct edma_soc_info	*info = pdev->dev.platform_data;
 	s8			(*queue_priority_mapping)[2];
-	int			i, off;
-	const s16		(*rsv_slots)[2];
-	const s16		(*xbar_chans)[2];
-	int			irq;
+	const s16		(*reserved)[2];
+	int			i, irq;
 	char			*irq_name;
 	struct resource		*mem;
 	struct device_node	*node = pdev->dev.of_node;
@@ -2278,13 +2289,6 @@ static int edma_probe(struct platform_device *pdev)
 	if (!info)
 		return -ENODEV;
 
-	pm_runtime_enable(dev);
-	ret = pm_runtime_get_sync(dev);
-	if (ret < 0) {
-		dev_err(dev, "pm_runtime_get_sync() failed\n");
-		return ret;
-	}
-
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
 	if (ret)
 		return ret;
@@ -2315,31 +2319,54 @@ static int edma_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ecc);
 
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get_sync() failed\n");
+		pm_runtime_disable(dev);
+		return ret;
+	}
+
 	/* Get eDMA3 configuration from IP */
 	ret = edma_setup_from_hw(dev, info, ecc);
 	if (ret)
-		return ret;
+		goto err_disable_pm;
 
 	/* Allocate memory based on the information we got from the IP */
 	ecc->slave_chans = devm_kcalloc(dev, ecc->num_channels,
 					sizeof(*ecc->slave_chans), GFP_KERNEL);
-	if (!ecc->slave_chans)
-		return -ENOMEM;
 
 	ecc->slot_inuse = devm_kcalloc(dev, BITS_TO_LONGS(ecc->num_slots),
 				       sizeof(unsigned long), GFP_KERNEL);
-	if (!ecc->slot_inuse)
-		return -ENOMEM;
+
+	ecc->channels_mask = devm_kcalloc(dev,
+					   BITS_TO_LONGS(ecc->num_channels),
+					   sizeof(unsigned long), GFP_KERNEL);
+	if (!ecc->slave_chans || !ecc->slot_inuse || !ecc->channels_mask) {
+		ret = -ENOMEM;
+		goto err_disable_pm;
+	}
+
+	/* Mark all channels available initially */
+	bitmap_fill(ecc->channels_mask, ecc->num_channels);
 
 	ecc->default_queue = info->default_queue;
 
 	if (info->rsv) {
 		/* Set the reserved slots in inuse list */
-		rsv_slots = info->rsv->rsv_slots;
-		if (rsv_slots) {
-			for (i = 0; rsv_slots[i][0] != -1; i++)
-				bitmap_set(ecc->slot_inuse, rsv_slots[i][0],
-					   rsv_slots[i][1]);
+		reserved = info->rsv->rsv_slots;
+		if (reserved) {
+			for (i = 0; reserved[i][0] != -1; i++)
+				bitmap_set(ecc->slot_inuse, reserved[i][0],
+					   reserved[i][1]);
+		}
+
+		/* Clear channels not usable for Linux */
+		reserved = info->rsv->rsv_chans;
+		if (reserved) {
+			for (i = 0; reserved[i][0] != -1; i++)
+				bitmap_clear(ecc->channels_mask, reserved[i][0],
+					     reserved[i][1]);
 		}
 	}
 
@@ -2349,14 +2376,6 @@ static int edma_probe(struct platform_device *pdev)
 			edma_write_slot(ecc, i, &dummy_paramset);
 	}
 
-	/* Clear the xbar mapped channels in unused list */
-	xbar_chans = info->xbar_chans;
-	if (xbar_chans) {
-		for (i = 0; xbar_chans[i][1] != -1; i++) {
-			off = xbar_chans[i][1];
-		}
-	}
-
 	irq = platform_get_irq_byname(pdev, "edma3_ccint");
 	if (irq < 0 && node)
 		irq = irq_of_parse_and_map(node, 0);
@@ -2368,7 +2387,7 @@ static int edma_probe(struct platform_device *pdev)
 				       ecc);
 		if (ret) {
 			dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret);
-			return ret;
+			goto err_disable_pm;
 		}
 		ecc->ccint = irq;
 	}
@@ -2384,7 +2403,7 @@ static int edma_probe(struct platform_device *pdev)
 				       ecc);
 		if (ret) {
 			dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret);
-			return ret;
+			goto err_disable_pm;
 		}
 		ecc->ccerrint = irq;
 	}
@@ -2392,19 +2411,23 @@ static int edma_probe(struct platform_device *pdev)
 	ecc->dummy_slot = edma_alloc_slot(ecc, EDMA_SLOT_ANY);
 	if (ecc->dummy_slot < 0) {
 		dev_err(dev, "Can't allocate PaRAM dummy slot\n");
-		return ecc->dummy_slot;
+		ret = ecc->dummy_slot;
+		goto err_disable_pm;
 	}
 
 	queue_priority_mapping = info->queue_priority_mapping;
 
 	if (!ecc->legacy_mode) {
 		int lowest_priority = 0;
+		unsigned int array_max;
 		struct of_phandle_args tc_args;
 
 		ecc->tc_list = devm_kcalloc(dev, ecc->num_tc,
 					    sizeof(*ecc->tc_list), GFP_KERNEL);
-		if (!ecc->tc_list)
-			return -ENOMEM;
+		if (!ecc->tc_list) {
+			ret = -ENOMEM;
+			goto err_reg1;
+		}
 
 		for (i = 0;; i++) {
 			ret = of_parse_phandle_with_fixed_args(node, "ti,tptcs",
@@ -2420,6 +2443,18 @@ static int edma_probe(struct platform_device *pdev)
 				info->default_queue = i;
 			}
 		}
+
+		/* See if we have optional dma-channel-mask array */
+		array_max = DIV_ROUND_UP(ecc->num_channels, BITS_PER_TYPE(u32));
+		ret = of_property_read_variable_u32_array(node,
+						"dma-channel-mask",
+						(u32 *)ecc->channels_mask,
+						1, array_max);
+		if (ret > 0 && ret != array_max)
+			dev_warn(dev, "dma-channel-mask is not complete.\n");
+		else if (ret == -EOVERFLOW || ret == -ENODATA)
+			dev_warn(dev,
+				 "dma-channel-mask is out of range or empty\n");
 	}
 
 	/* Event queue priority mapping */
@@ -2437,6 +2472,10 @@ static int edma_probe(struct platform_device *pdev)
 	edma_dma_init(ecc, legacy_mode);
 
 	for (i = 0; i < ecc->num_channels; i++) {
+		/* Do not touch reserved channels */
+		if (!test_bit(i, ecc->channels_mask))
+			continue;
+
 		/* Assign all channels to the default queue */
 		edma_assign_channel_eventq(&ecc->slave_chans[i],
 					   info->default_queue);
@@ -2473,6 +2512,9 @@ static int edma_probe(struct platform_device *pdev)
 
 err_reg1:
 	edma_free_slot(ecc, ecc->dummy_slot);
+err_disable_pm:
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
 	return ret;
 }
 
@@ -2503,6 +2545,8 @@ static int edma_remove(struct platform_device *pdev)
 	if (ecc->dma_memcpy)
 		dma_async_device_unregister(ecc->dma_memcpy);
 	edma_free_slot(ecc, ecc->dummy_slot);
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
 
 	return 0;
 }
diff --git a/drivers/dma/ti/k3-psil-am654.c b/drivers/dma/ti/k3-psil-am654.c
new file mode 100644
index 000000000000..a896a15908cf
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-am654.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_TR(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_PDMA_XY,	\
+		},					\
+	}
+
+#define PSIL_PDMA_XY_PKT(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_PDMA_XY,	\
+			.pkt_mode = 1,			\
+		},					\
+	}
+
+#define PSIL_ETHERNET(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_NATIVE,	\
+			.pkt_mode = 1,			\
+			.needs_epib = 1,		\
+			.psd_size = 16,			\
+		},					\
+	}
+
+#define PSIL_SA2UL(x, tx)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_NATIVE,	\
+			.pkt_mode = 1,			\
+			.needs_epib = 1,		\
+			.psd_size = 64,			\
+			.notdpkt = tx,			\
+		},					\
+	}
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+static struct psil_ep am654_src_ep_map[] = {
+	/* SA2UL */
+	PSIL_SA2UL(0x4000, 0),
+	PSIL_SA2UL(0x4001, 0),
+	PSIL_SA2UL(0x4002, 0),
+	PSIL_SA2UL(0x4003, 0),
+	/* PRU_ICSSG0 */
+	PSIL_ETHERNET(0x4100),
+	PSIL_ETHERNET(0x4101),
+	PSIL_ETHERNET(0x4102),
+	PSIL_ETHERNET(0x4103),
+	/* PRU_ICSSG1 */
+	PSIL_ETHERNET(0x4200),
+	PSIL_ETHERNET(0x4201),
+	PSIL_ETHERNET(0x4202),
+	PSIL_ETHERNET(0x4203),
+	/* PRU_ICSSG2 */
+	PSIL_ETHERNET(0x4300),
+	PSIL_ETHERNET(0x4301),
+	PSIL_ETHERNET(0x4302),
+	PSIL_ETHERNET(0x4303),
+	/* PDMA0 - McASPs */
+	PSIL_PDMA_XY_TR(0x4400),
+	PSIL_PDMA_XY_TR(0x4401),
+	PSIL_PDMA_XY_TR(0x4402),
+	/* PDMA1 - SPI0-4 */
+	PSIL_PDMA_XY_PKT(0x4500),
+	PSIL_PDMA_XY_PKT(0x4501),
+	PSIL_PDMA_XY_PKT(0x4502),
+	PSIL_PDMA_XY_PKT(0x4503),
+	PSIL_PDMA_XY_PKT(0x4504),
+	PSIL_PDMA_XY_PKT(0x4505),
+	PSIL_PDMA_XY_PKT(0x4506),
+	PSIL_PDMA_XY_PKT(0x4507),
+	PSIL_PDMA_XY_PKT(0x4508),
+	PSIL_PDMA_XY_PKT(0x4509),
+	PSIL_PDMA_XY_PKT(0x450a),
+	PSIL_PDMA_XY_PKT(0x450b),
+	PSIL_PDMA_XY_PKT(0x450c),
+	PSIL_PDMA_XY_PKT(0x450d),
+	PSIL_PDMA_XY_PKT(0x450e),
+	PSIL_PDMA_XY_PKT(0x450f),
+	PSIL_PDMA_XY_PKT(0x4510),
+	PSIL_PDMA_XY_PKT(0x4511),
+	PSIL_PDMA_XY_PKT(0x4512),
+	PSIL_PDMA_XY_PKT(0x4513),
+	/* PDMA1 - USART0-2 */
+	PSIL_PDMA_XY_PKT(0x4514),
+	PSIL_PDMA_XY_PKT(0x4515),
+	PSIL_PDMA_XY_PKT(0x4516),
+	/* CPSW0 */
+	PSIL_ETHERNET(0x7000),
+	/* MCU_PDMA0 - ADCs */
+	PSIL_PDMA_XY_TR(0x7100),
+	PSIL_PDMA_XY_TR(0x7101),
+	PSIL_PDMA_XY_TR(0x7102),
+	PSIL_PDMA_XY_TR(0x7103),
+	/* MCU_PDMA1 - MCU_SPI0-2 */
+	PSIL_PDMA_XY_PKT(0x7200),
+	PSIL_PDMA_XY_PKT(0x7201),
+	PSIL_PDMA_XY_PKT(0x7202),
+	PSIL_PDMA_XY_PKT(0x7203),
+	PSIL_PDMA_XY_PKT(0x7204),
+	PSIL_PDMA_XY_PKT(0x7205),
+	PSIL_PDMA_XY_PKT(0x7206),
+	PSIL_PDMA_XY_PKT(0x7207),
+	PSIL_PDMA_XY_PKT(0x7208),
+	PSIL_PDMA_XY_PKT(0x7209),
+	PSIL_PDMA_XY_PKT(0x720a),
+	PSIL_PDMA_XY_PKT(0x720b),
+	/* MCU_PDMA1 - MCU_USART0 */
+	PSIL_PDMA_XY_PKT(0x7212),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+static struct psil_ep am654_dst_ep_map[] = {
+	/* SA2UL */
+	PSIL_SA2UL(0xc000, 1),
+	PSIL_SA2UL(0xc001, 1),
+	/* PRU_ICSSG0 */
+	PSIL_ETHERNET(0xc100),
+	PSIL_ETHERNET(0xc101),
+	PSIL_ETHERNET(0xc102),
+	PSIL_ETHERNET(0xc103),
+	PSIL_ETHERNET(0xc104),
+	PSIL_ETHERNET(0xc105),
+	PSIL_ETHERNET(0xc106),
+	PSIL_ETHERNET(0xc107),
+	/* PRU_ICSSG1 */
+	PSIL_ETHERNET(0xc200),
+	PSIL_ETHERNET(0xc201),
+	PSIL_ETHERNET(0xc202),
+	PSIL_ETHERNET(0xc203),
+	PSIL_ETHERNET(0xc204),
+	PSIL_ETHERNET(0xc205),
+	PSIL_ETHERNET(0xc206),
+	PSIL_ETHERNET(0xc207),
+	/* PRU_ICSSG2 */
+	PSIL_ETHERNET(0xc300),
+	PSIL_ETHERNET(0xc301),
+	PSIL_ETHERNET(0xc302),
+	PSIL_ETHERNET(0xc303),
+	PSIL_ETHERNET(0xc304),
+	PSIL_ETHERNET(0xc305),
+	PSIL_ETHERNET(0xc306),
+	PSIL_ETHERNET(0xc307),
+	/* CPSW0 */
+	PSIL_ETHERNET(0xf000),
+	PSIL_ETHERNET(0xf001),
+	PSIL_ETHERNET(0xf002),
+	PSIL_ETHERNET(0xf003),
+	PSIL_ETHERNET(0xf004),
+	PSIL_ETHERNET(0xf005),
+	PSIL_ETHERNET(0xf006),
+	PSIL_ETHERNET(0xf007),
+};
+
+struct psil_ep_map am654_ep_map = {
+	.name = "am654",
+	.src = am654_src_ep_map,
+	.src_count = ARRAY_SIZE(am654_src_ep_map),
+	.dst = am654_dst_ep_map,
+	.dst_count = ARRAY_SIZE(am654_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-j721e.c b/drivers/dma/ti/k3-psil-j721e.c
new file mode 100644
index 000000000000..e3cfd5f66842
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-j721e.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_TR(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_PDMA_XY,	\
+		},					\
+	}
+
+#define PSIL_PDMA_XY_PKT(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_PDMA_XY,	\
+			.pkt_mode = 1,			\
+		},					\
+	}
+
+#define PSIL_PDMA_MCASP(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_PDMA_XY,	\
+			.pdma_acc32 = 1,		\
+			.pdma_burst = 1,		\
+		},					\
+	}
+
+#define PSIL_ETHERNET(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_NATIVE,	\
+			.pkt_mode = 1,			\
+			.needs_epib = 1,		\
+			.psd_size = 16,			\
+		},					\
+	}
+
+#define PSIL_SA2UL(x, tx)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_NATIVE,	\
+			.pkt_mode = 1,			\
+			.needs_epib = 1,		\
+			.psd_size = 64,			\
+			.notdpkt = tx,			\
+		},					\
+	}
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+static struct psil_ep j721e_src_ep_map[] = {
+	/* SA2UL */
+	PSIL_SA2UL(0x4000, 0),
+	PSIL_SA2UL(0x4001, 0),
+	PSIL_SA2UL(0x4002, 0),
+	PSIL_SA2UL(0x4003, 0),
+	/* PRU_ICSSG0 */
+	PSIL_ETHERNET(0x4100),
+	PSIL_ETHERNET(0x4101),
+	PSIL_ETHERNET(0x4102),
+	PSIL_ETHERNET(0x4103),
+	/* PRU_ICSSG1 */
+	PSIL_ETHERNET(0x4200),
+	PSIL_ETHERNET(0x4201),
+	PSIL_ETHERNET(0x4202),
+	PSIL_ETHERNET(0x4203),
+	/* PDMA6 (PSIL_PDMA_MCASP_G0) - McASP0-2 */
+	PSIL_PDMA_MCASP(0x4400),
+	PSIL_PDMA_MCASP(0x4401),
+	PSIL_PDMA_MCASP(0x4402),
+	/* PDMA7 (PSIL_PDMA_MCASP_G1) - McASP3-11 */
+	PSIL_PDMA_MCASP(0x4500),
+	PSIL_PDMA_MCASP(0x4501),
+	PSIL_PDMA_MCASP(0x4502),
+	PSIL_PDMA_MCASP(0x4503),
+	PSIL_PDMA_MCASP(0x4504),
+	PSIL_PDMA_MCASP(0x4505),
+	PSIL_PDMA_MCASP(0x4506),
+	PSIL_PDMA_MCASP(0x4507),
+	PSIL_PDMA_MCASP(0x4508),
+	/* PDMA8 (PDMA_MISC_G0) - SPI0-1 */
+	PSIL_PDMA_XY_PKT(0x4600),
+	PSIL_PDMA_XY_PKT(0x4601),
+	PSIL_PDMA_XY_PKT(0x4602),
+	PSIL_PDMA_XY_PKT(0x4603),
+	PSIL_PDMA_XY_PKT(0x4604),
+	PSIL_PDMA_XY_PKT(0x4605),
+	PSIL_PDMA_XY_PKT(0x4606),
+	PSIL_PDMA_XY_PKT(0x4607),
+	/* PDMA9 (PDMA_MISC_G1) - SPI2-3 */
+	PSIL_PDMA_XY_PKT(0x460c),
+	PSIL_PDMA_XY_PKT(0x460d),
+	PSIL_PDMA_XY_PKT(0x460e),
+	PSIL_PDMA_XY_PKT(0x460f),
+	PSIL_PDMA_XY_PKT(0x4610),
+	PSIL_PDMA_XY_PKT(0x4611),
+	PSIL_PDMA_XY_PKT(0x4612),
+	PSIL_PDMA_XY_PKT(0x4613),
+	/* PDMA10 (PDMA_MISC_G2) - SPI4-5 */
+	PSIL_PDMA_XY_PKT(0x4618),
+	PSIL_PDMA_XY_PKT(0x4619),
+	PSIL_PDMA_XY_PKT(0x461a),
+	PSIL_PDMA_XY_PKT(0x461b),
+	PSIL_PDMA_XY_PKT(0x461c),
+	PSIL_PDMA_XY_PKT(0x461d),
+	PSIL_PDMA_XY_PKT(0x461e),
+	PSIL_PDMA_XY_PKT(0x461f),
+	/* PDMA11 (PDMA_MISC_G3) */
+	PSIL_PDMA_XY_PKT(0x4624),
+	PSIL_PDMA_XY_PKT(0x4625),
+	PSIL_PDMA_XY_PKT(0x4626),
+	PSIL_PDMA_XY_PKT(0x4627),
+	PSIL_PDMA_XY_PKT(0x4628),
+	PSIL_PDMA_XY_PKT(0x4629),
+	PSIL_PDMA_XY_PKT(0x4630),
+	PSIL_PDMA_XY_PKT(0x463a),
+	/* PDMA13 (PDMA_USART_G0) - UART0-1 */
+	PSIL_PDMA_XY_PKT(0x4700),
+	PSIL_PDMA_XY_PKT(0x4701),
+	/* PDMA14 (PDMA_USART_G1) - UART2-3 */
+	PSIL_PDMA_XY_PKT(0x4702),
+	PSIL_PDMA_XY_PKT(0x4703),
+	/* PDMA15 (PDMA_USART_G2) - UART4-9 */
+	PSIL_PDMA_XY_PKT(0x4704),
+	PSIL_PDMA_XY_PKT(0x4705),
+	PSIL_PDMA_XY_PKT(0x4706),
+	PSIL_PDMA_XY_PKT(0x4707),
+	PSIL_PDMA_XY_PKT(0x4708),
+	PSIL_PDMA_XY_PKT(0x4709),
+	/* CPSW9 */
+	PSIL_ETHERNET(0x4a00),
+	/* CPSW0 */
+	PSIL_ETHERNET(0x7000),
+	/* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */
+	PSIL_PDMA_XY_PKT(0x7100),
+	PSIL_PDMA_XY_PKT(0x7101),
+	PSIL_PDMA_XY_PKT(0x7102),
+	PSIL_PDMA_XY_PKT(0x7103),
+	/* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */
+	PSIL_PDMA_XY_PKT(0x7200),
+	PSIL_PDMA_XY_PKT(0x7201),
+	PSIL_PDMA_XY_PKT(0x7202),
+	PSIL_PDMA_XY_PKT(0x7203),
+	PSIL_PDMA_XY_PKT(0x7204),
+	PSIL_PDMA_XY_PKT(0x7205),
+	PSIL_PDMA_XY_PKT(0x7206),
+	PSIL_PDMA_XY_PKT(0x7207),
+	/* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */
+	PSIL_PDMA_XY_PKT(0x7300),
+	/* MCU_PDMA_ADC - ADC0-1 */
+	PSIL_PDMA_XY_TR(0x7400),
+	PSIL_PDMA_XY_TR(0x7401),
+	PSIL_PDMA_XY_TR(0x7402),
+	PSIL_PDMA_XY_TR(0x7403),
+	/* SA2UL */
+	PSIL_SA2UL(0x7500, 0),
+	PSIL_SA2UL(0x7501, 0),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+static struct psil_ep j721e_dst_ep_map[] = {
+	/* SA2UL */
+	PSIL_SA2UL(0xc000, 1),
+	PSIL_SA2UL(0xc001, 1),
+	/* PRU_ICSSG0 */
+	PSIL_ETHERNET(0xc100),
+	PSIL_ETHERNET(0xc101),
+	PSIL_ETHERNET(0xc102),
+	PSIL_ETHERNET(0xc103),
+	PSIL_ETHERNET(0xc104),
+	PSIL_ETHERNET(0xc105),
+	PSIL_ETHERNET(0xc106),
+	PSIL_ETHERNET(0xc107),
+	/* PRU_ICSSG1 */
+	PSIL_ETHERNET(0xc200),
+	PSIL_ETHERNET(0xc201),
+	PSIL_ETHERNET(0xc202),
+	PSIL_ETHERNET(0xc203),
+	PSIL_ETHERNET(0xc204),
+	PSIL_ETHERNET(0xc205),
+	PSIL_ETHERNET(0xc206),
+	PSIL_ETHERNET(0xc207),
+	/* CPSW9 */
+	PSIL_ETHERNET(0xca00),
+	PSIL_ETHERNET(0xca01),
+	PSIL_ETHERNET(0xca02),
+	PSIL_ETHERNET(0xca03),
+	PSIL_ETHERNET(0xca04),
+	PSIL_ETHERNET(0xca05),
+	PSIL_ETHERNET(0xca06),
+	PSIL_ETHERNET(0xca07),
+	/* CPSW0 */
+	PSIL_ETHERNET(0xf000),
+	PSIL_ETHERNET(0xf001),
+	PSIL_ETHERNET(0xf002),
+	PSIL_ETHERNET(0xf003),
+	PSIL_ETHERNET(0xf004),
+	PSIL_ETHERNET(0xf005),
+	PSIL_ETHERNET(0xf006),
+	PSIL_ETHERNET(0xf007),
+	/* SA2UL */
+	PSIL_SA2UL(0xf500, 1),
+};
+
+struct psil_ep_map j721e_ep_map = {
+	.name = "j721e",
+	.src = j721e_src_ep_map,
+	.src_count = ARRAY_SIZE(j721e_src_ep_map),
+	.dst = j721e_dst_ep_map,
+	.dst_count = ARRAY_SIZE(j721e_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-priv.h b/drivers/dma/ti/k3-psil-priv.h
new file mode 100644
index 000000000000..a1f389ca371e
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-priv.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ */
+
+#ifndef K3_PSIL_PRIV_H_
+#define K3_PSIL_PRIV_H_
+
+#include <linux/dma/k3-psil.h>
+
+struct psil_ep {
+	u32 thread_id;
+	struct psil_endpoint_config ep_config;
+};
+
+/**
+ * struct psil_ep_map - PSI-L thread ID configuration maps
+ * @name:	Name of the map, set it to the name of the SoC
+ * @src:	Array of source PSI-L thread configurations
+ * @src_count:	Number of entries in the src array
+ * @dst:	Array of destination PSI-L thread configurations
+ * @dst_count:	Number of entries in the dst array
+ *
+ * In case of symmetric configuration for a matching src/dst thread (for example
+ * 0x4400 and 0xc400) only the src configuration can be present. If no dst
+ * configuration found the code will look for (dst_thread_id & ~0x8000) to find
+ * the symmetric match.
+ */
+struct psil_ep_map {
+	char *name;
+	struct psil_ep	*src;
+	int src_count;
+	struct psil_ep	*dst;
+	int dst_count;
+};
+
+struct psil_endpoint_config *psil_get_ep_config(u32 thread_id);
+
+/* SoC PSI-L endpoint maps */
+extern struct psil_ep_map am654_ep_map;
+extern struct psil_ep_map j721e_ep_map;
+
+#endif /* K3_PSIL_PRIV_H_ */
diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c
new file mode 100644
index 000000000000..d7b965049ccb
--- /dev/null
+++ b/drivers/dma/ti/k3-psil.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+
+#include "k3-psil-priv.h"
+
+static DEFINE_MUTEX(ep_map_mutex);
+static struct psil_ep_map *soc_ep_map;
+
+struct psil_endpoint_config *psil_get_ep_config(u32 thread_id)
+{
+	int i;
+
+	mutex_lock(&ep_map_mutex);
+	if (!soc_ep_map) {
+		if (of_machine_is_compatible("ti,am654")) {
+			soc_ep_map = &am654_ep_map;
+		} else if (of_machine_is_compatible("ti,j721e")) {
+			soc_ep_map = &j721e_ep_map;
+		} else {
+			pr_err("PSIL: No compatible machine found for map\n");
+			return ERR_PTR(-ENOTSUPP);
+		}
+		pr_debug("%s: Using map for %s\n", __func__, soc_ep_map->name);
+	}
+	mutex_unlock(&ep_map_mutex);
+
+	if (thread_id & K3_PSIL_DST_THREAD_ID_OFFSET && soc_ep_map->dst) {
+		/* check in destination thread map */
+		for (i = 0; i < soc_ep_map->dst_count; i++) {
+			if (soc_ep_map->dst[i].thread_id == thread_id)
+				return &soc_ep_map->dst[i].ep_config;
+		}
+	}
+
+	thread_id &= ~K3_PSIL_DST_THREAD_ID_OFFSET;
+	if (soc_ep_map->src) {
+		for (i = 0; i < soc_ep_map->src_count; i++) {
+			if (soc_ep_map->src[i].thread_id == thread_id)
+				return &soc_ep_map->src[i].ep_config;
+		}
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(psil_get_ep_config);
+
+int psil_set_new_ep_config(struct device *dev, const char *name,
+			   struct psil_endpoint_config *ep_config)
+{
+	struct psil_endpoint_config *dst_ep_config;
+	struct of_phandle_args dma_spec;
+	u32 thread_id;
+	int index;
+
+	if (!dev || !dev->of_node)
+		return -EINVAL;
+
+	index = of_property_match_string(dev->of_node, "dma-names", name);
+	if (index < 0)
+		return index;
+
+	if (of_parse_phandle_with_args(dev->of_node, "dmas", "#dma-cells",
+				       index, &dma_spec))
+		return -ENOENT;
+
+	thread_id = dma_spec.args[0];
+
+	dst_ep_config = psil_get_ep_config(thread_id);
+	if (IS_ERR(dst_ep_config)) {
+		pr_err("PSIL: thread ID 0x%04x not defined in map\n",
+		       thread_id);
+		of_node_put(dma_spec.np);
+		return PTR_ERR(dst_ep_config);
+	}
+
+	memcpy(dst_ep_config, ep_config, sizeof(*dst_ep_config));
+
+	of_node_put(dma_spec.np);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(psil_set_new_ep_config);
diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c
new file mode 100644
index 000000000000..c1511298ece2
--- /dev/null
+++ b/drivers/dma/ti/k3-udma-glue.c
@@ -0,0 +1,1198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * K3 NAVSS DMA glue interface
+ *
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/soc/ti/k3-ringacc.h>
+#include <linux/dma/ti-cppi5.h>
+#include <linux/dma/k3-udma-glue.h>
+
+#include "k3-udma.h"
+#include "k3-psil-priv.h"
+
+struct k3_udma_glue_common {
+	struct device *dev;
+	struct udma_dev *udmax;
+	const struct udma_tisci_rm *tisci_rm;
+	struct k3_ringacc *ringacc;
+	u32 src_thread;
+	u32 dst_thread;
+
+	u32  hdesc_size;
+	bool epib;
+	u32  psdata_size;
+	u32  swdata_size;
+};
+
+struct k3_udma_glue_tx_channel {
+	struct k3_udma_glue_common common;
+
+	struct udma_tchan *udma_tchanx;
+	int udma_tchan_id;
+
+	struct k3_ring *ringtx;
+	struct k3_ring *ringtxcq;
+
+	bool psil_paired;
+
+	int virq;
+
+	atomic_t free_pkts;
+	bool tx_pause_on_err;
+	bool tx_filt_einfo;
+	bool tx_filt_pswords;
+	bool tx_supr_tdpkt;
+};
+
+struct k3_udma_glue_rx_flow {
+	struct udma_rflow *udma_rflow;
+	int udma_rflow_id;
+	struct k3_ring *ringrx;
+	struct k3_ring *ringrxfdq;
+
+	int virq;
+};
+
+struct k3_udma_glue_rx_channel {
+	struct k3_udma_glue_common common;
+
+	struct udma_rchan *udma_rchanx;
+	int udma_rchan_id;
+	bool remote;
+
+	bool psil_paired;
+
+	u32  swdata_size;
+	int  flow_id_base;
+
+	struct k3_udma_glue_rx_flow *flows;
+	u32 flow_num;
+	u32 flows_ready;
+};
+
+#define K3_UDMAX_TDOWN_TIMEOUT_US 1000
+
+static int of_k3_udma_glue_parse(struct device_node *udmax_np,
+				 struct k3_udma_glue_common *common)
+{
+	common->ringacc = of_k3_ringacc_get_by_phandle(udmax_np,
+						       "ti,ringacc");
+	if (IS_ERR(common->ringacc))
+		return PTR_ERR(common->ringacc);
+
+	common->udmax = of_xudma_dev_get(udmax_np, NULL);
+	if (IS_ERR(common->udmax))
+		return PTR_ERR(common->udmax);
+
+	common->tisci_rm = xudma_dev_get_tisci_rm(common->udmax);
+
+	return 0;
+}
+
+static int of_k3_udma_glue_parse_chn(struct device_node *chn_np,
+		const char *name, struct k3_udma_glue_common *common,
+		bool tx_chn)
+{
+	struct psil_endpoint_config *ep_config;
+	struct of_phandle_args dma_spec;
+	u32 thread_id;
+	int ret = 0;
+	int index;
+
+	if (unlikely(!name))
+		return -EINVAL;
+
+	index = of_property_match_string(chn_np, "dma-names", name);
+	if (index < 0)
+		return index;
+
+	if (of_parse_phandle_with_args(chn_np, "dmas", "#dma-cells", index,
+				       &dma_spec))
+		return -ENOENT;
+
+	thread_id = dma_spec.args[0];
+
+	if (tx_chn && !(thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)) {
+		ret = -EINVAL;
+		goto out_put_spec;
+	}
+
+	if (!tx_chn && (thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)) {
+		ret = -EINVAL;
+		goto out_put_spec;
+	}
+
+	/* get psil endpoint config */
+	ep_config = psil_get_ep_config(thread_id);
+	if (IS_ERR(ep_config)) {
+		dev_err(common->dev,
+			"No configuration for psi-l thread 0x%04x\n",
+			thread_id);
+		ret = PTR_ERR(ep_config);
+		goto out_put_spec;
+	}
+
+	common->epib = ep_config->needs_epib;
+	common->psdata_size = ep_config->psd_size;
+
+	if (tx_chn)
+		common->dst_thread = thread_id;
+	else
+		common->src_thread = thread_id;
+
+	ret = of_k3_udma_glue_parse(dma_spec.np, common);
+
+out_put_spec:
+	of_node_put(dma_spec.np);
+	return ret;
+};
+
+static void k3_udma_glue_dump_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	struct device *dev = tx_chn->common.dev;
+
+	dev_dbg(dev, "dump_tx_chn:\n"
+		"udma_tchan_id: %d\n"
+		"src_thread: %08x\n"
+		"dst_thread: %08x\n",
+		tx_chn->udma_tchan_id,
+		tx_chn->common.src_thread,
+		tx_chn->common.dst_thread);
+}
+
+static void k3_udma_glue_dump_tx_rt_chn(struct k3_udma_glue_tx_channel *chn,
+					char *mark)
+{
+	struct device *dev = chn->common.dev;
+
+	dev_dbg(dev, "=== dump ===> %s\n", mark);
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_CTL_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_PEER_RT_EN_REG,
+		xudma_tchanrt_read(chn->udma_tchanx,
+				   UDMA_TCHAN_RT_PEER_RT_EN_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_PCNT_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_PCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_BCNT_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_BCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_SBCNT_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_SBCNT_REG));
+}
+
+static int k3_udma_glue_cfg_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	const struct udma_tisci_rm *tisci_rm = tx_chn->common.tisci_rm;
+	struct ti_sci_msg_rm_udmap_tx_ch_cfg req;
+
+	memset(&req, 0, sizeof(req));
+
+	req.valid_params = TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_EINFO_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_PSWORDS_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID;
+	req.nav_id = tisci_rm->tisci_dev_id;
+	req.index = tx_chn->udma_tchan_id;
+	if (tx_chn->tx_pause_on_err)
+		req.tx_pause_on_err = 1;
+	if (tx_chn->tx_filt_einfo)
+		req.tx_filt_einfo = 1;
+	if (tx_chn->tx_filt_pswords)
+		req.tx_filt_pswords = 1;
+	req.tx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+	if (tx_chn->tx_supr_tdpkt)
+		req.tx_supr_tdpkt = 1;
+	req.tx_fetch_size = tx_chn->common.hdesc_size >> 2;
+	req.txcq_qnum = k3_ringacc_get_ring_id(tx_chn->ringtxcq);
+
+	return tisci_rm->tisci_udmap_ops->tx_ch_cfg(tisci_rm->tisci, &req);
+}
+
+struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
+		const char *name, struct k3_udma_glue_tx_channel_cfg *cfg)
+{
+	struct k3_udma_glue_tx_channel *tx_chn;
+	int ret;
+
+	tx_chn = devm_kzalloc(dev, sizeof(*tx_chn), GFP_KERNEL);
+	if (!tx_chn)
+		return ERR_PTR(-ENOMEM);
+
+	tx_chn->common.dev = dev;
+	tx_chn->common.swdata_size = cfg->swdata_size;
+	tx_chn->tx_pause_on_err = cfg->tx_pause_on_err;
+	tx_chn->tx_filt_einfo = cfg->tx_filt_einfo;
+	tx_chn->tx_filt_pswords = cfg->tx_filt_pswords;
+	tx_chn->tx_supr_tdpkt = cfg->tx_supr_tdpkt;
+
+	/* parse of udmap channel */
+	ret = of_k3_udma_glue_parse_chn(dev->of_node, name,
+					&tx_chn->common, true);
+	if (ret)
+		goto err;
+
+	tx_chn->common.hdesc_size = cppi5_hdesc_calc_size(tx_chn->common.epib,
+						tx_chn->common.psdata_size,
+						tx_chn->common.swdata_size);
+
+	/* request and cfg UDMAP TX channel */
+	tx_chn->udma_tchanx = xudma_tchan_get(tx_chn->common.udmax, -1);
+	if (IS_ERR(tx_chn->udma_tchanx)) {
+		ret = PTR_ERR(tx_chn->udma_tchanx);
+		dev_err(dev, "UDMAX tchanx get err %d\n", ret);
+		goto err;
+	}
+	tx_chn->udma_tchan_id = xudma_tchan_get_id(tx_chn->udma_tchanx);
+
+	atomic_set(&tx_chn->free_pkts, cfg->txcq_cfg.size);
+
+	/* request and cfg rings */
+	tx_chn->ringtx = k3_ringacc_request_ring(tx_chn->common.ringacc,
+						 tx_chn->udma_tchan_id, 0);
+	if (!tx_chn->ringtx) {
+		ret = -ENODEV;
+		dev_err(dev, "Failed to get TX ring %u\n",
+			tx_chn->udma_tchan_id);
+		goto err;
+	}
+
+	tx_chn->ringtxcq = k3_ringacc_request_ring(tx_chn->common.ringacc,
+						   -1, 0);
+	if (!tx_chn->ringtxcq) {
+		ret = -ENODEV;
+		dev_err(dev, "Failed to get TXCQ ring\n");
+		goto err;
+	}
+
+	ret = k3_ringacc_ring_cfg(tx_chn->ringtx, &cfg->tx_cfg);
+	if (ret) {
+		dev_err(dev, "Failed to cfg ringtx %d\n", ret);
+		goto err;
+	}
+
+	ret = k3_ringacc_ring_cfg(tx_chn->ringtxcq, &cfg->txcq_cfg);
+	if (ret) {
+		dev_err(dev, "Failed to cfg ringtx %d\n", ret);
+		goto err;
+	}
+
+	/* request and cfg psi-l */
+	tx_chn->common.src_thread =
+			xudma_dev_get_psil_base(tx_chn->common.udmax) +
+			tx_chn->udma_tchan_id;
+
+	ret = k3_udma_glue_cfg_tx_chn(tx_chn);
+	if (ret) {
+		dev_err(dev, "Failed to cfg tchan %d\n", ret);
+		goto err;
+	}
+
+	ret = xudma_navss_psil_pair(tx_chn->common.udmax,
+				    tx_chn->common.src_thread,
+				    tx_chn->common.dst_thread);
+	if (ret) {
+		dev_err(dev, "PSI-L request err %d\n", ret);
+		goto err;
+	}
+
+	tx_chn->psil_paired = true;
+
+	/* reset TX RT registers */
+	k3_udma_glue_disable_tx_chn(tx_chn);
+
+	k3_udma_glue_dump_tx_chn(tx_chn);
+
+	return tx_chn;
+
+err:
+	k3_udma_glue_release_tx_chn(tx_chn);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_request_tx_chn);
+
+void k3_udma_glue_release_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	if (tx_chn->psil_paired) {
+		xudma_navss_psil_unpair(tx_chn->common.udmax,
+					tx_chn->common.src_thread,
+					tx_chn->common.dst_thread);
+		tx_chn->psil_paired = false;
+	}
+
+	if (!IS_ERR_OR_NULL(tx_chn->udma_tchanx))
+		xudma_tchan_put(tx_chn->common.udmax,
+				tx_chn->udma_tchanx);
+
+	if (tx_chn->ringtxcq)
+		k3_ringacc_ring_free(tx_chn->ringtxcq);
+
+	if (tx_chn->ringtx)
+		k3_ringacc_ring_free(tx_chn->ringtx);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_release_tx_chn);
+
+int k3_udma_glue_push_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+			     struct cppi5_host_desc_t *desc_tx,
+			     dma_addr_t desc_dma)
+{
+	u32 ringtxcq_id;
+
+	if (!atomic_add_unless(&tx_chn->free_pkts, -1, 0))
+		return -ENOMEM;
+
+	ringtxcq_id = k3_ringacc_get_ring_id(tx_chn->ringtxcq);
+	cppi5_desc_set_retpolicy(&desc_tx->hdr, 0, ringtxcq_id);
+
+	return k3_ringacc_ring_push(tx_chn->ringtx, &desc_dma);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_push_tx_chn);
+
+int k3_udma_glue_pop_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+			    dma_addr_t *desc_dma)
+{
+	int ret;
+
+	ret = k3_ringacc_ring_pop(tx_chn->ringtxcq, desc_dma);
+	if (!ret)
+		atomic_inc(&tx_chn->free_pkts);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_pop_tx_chn);
+
+int k3_udma_glue_enable_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	u32 txrt_ctl;
+
+	txrt_ctl = UDMA_PEER_RT_EN_ENABLE;
+	xudma_tchanrt_write(tx_chn->udma_tchanx,
+			    UDMA_TCHAN_RT_PEER_RT_EN_REG,
+			    txrt_ctl);
+
+	txrt_ctl = xudma_tchanrt_read(tx_chn->udma_tchanx,
+				      UDMA_TCHAN_RT_CTL_REG);
+	txrt_ctl |= UDMA_CHAN_RT_CTL_EN;
+	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG,
+			    txrt_ctl);
+
+	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn en");
+	return 0;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_enable_tx_chn);
+
+void k3_udma_glue_disable_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis1");
+
+	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG, 0);
+
+	xudma_tchanrt_write(tx_chn->udma_tchanx,
+			    UDMA_TCHAN_RT_PEER_RT_EN_REG, 0);
+	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis2");
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_disable_tx_chn);
+
+void k3_udma_glue_tdown_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+			       bool sync)
+{
+	int i = 0;
+	u32 val;
+
+	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown1");
+
+	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG,
+			    UDMA_CHAN_RT_CTL_EN | UDMA_CHAN_RT_CTL_TDOWN);
+
+	val = xudma_tchanrt_read(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG);
+
+	while (sync && (val & UDMA_CHAN_RT_CTL_EN)) {
+		val = xudma_tchanrt_read(tx_chn->udma_tchanx,
+					 UDMA_TCHAN_RT_CTL_REG);
+		udelay(1);
+		if (i > K3_UDMAX_TDOWN_TIMEOUT_US) {
+			dev_err(tx_chn->common.dev, "TX tdown timeout\n");
+			break;
+		}
+		i++;
+	}
+
+	val = xudma_tchanrt_read(tx_chn->udma_tchanx,
+				 UDMA_TCHAN_RT_PEER_RT_EN_REG);
+	if (sync && (val & UDMA_PEER_RT_EN_ENABLE))
+		dev_err(tx_chn->common.dev, "TX tdown peer not stopped\n");
+	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown2");
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tdown_tx_chn);
+
+void k3_udma_glue_reset_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+			       void *data,
+			       void (*cleanup)(void *data, dma_addr_t desc_dma))
+{
+	dma_addr_t desc_dma;
+	int occ_tx, i, ret;
+
+	/* reset TXCQ as it is not input for udma - expected to be empty */
+	if (tx_chn->ringtxcq)
+		k3_ringacc_ring_reset(tx_chn->ringtxcq);
+
+	/*
+	 * TXQ reset need to be special way as it is input for udma and its
+	 * state cached by udma, so:
+	 * 1) save TXQ occ
+	 * 2) clean up TXQ and call callback .cleanup() for each desc
+	 * 3) reset TXQ in a special way
+	 */
+	occ_tx = k3_ringacc_ring_get_occ(tx_chn->ringtx);
+	dev_dbg(tx_chn->common.dev, "TX reset occ_tx %u\n", occ_tx);
+
+	for (i = 0; i < occ_tx; i++) {
+		ret = k3_ringacc_ring_pop(tx_chn->ringtx, &desc_dma);
+		if (ret) {
+			dev_err(tx_chn->common.dev, "TX reset pop %d\n", ret);
+			break;
+		}
+		cleanup(data, desc_dma);
+	}
+
+	k3_ringacc_ring_reset_dma(tx_chn->ringtx, occ_tx);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_reset_tx_chn);
+
+u32 k3_udma_glue_tx_get_hdesc_size(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	return tx_chn->common.hdesc_size;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_hdesc_size);
+
+u32 k3_udma_glue_tx_get_txcq_id(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	return k3_ringacc_get_ring_id(tx_chn->ringtxcq);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_txcq_id);
+
+int k3_udma_glue_tx_get_irq(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	tx_chn->virq = k3_ringacc_get_ring_irq_num(tx_chn->ringtxcq);
+
+	return tx_chn->virq;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_irq);
+
+static int k3_udma_glue_cfg_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+	const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+	struct ti_sci_msg_rm_udmap_rx_ch_cfg req;
+	int ret;
+
+	memset(&req, 0, sizeof(req));
+
+	req.valid_params = TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |
+			   TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID |
+			   TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |
+			   TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_START_VALID |
+			   TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID;
+
+	req.nav_id = tisci_rm->tisci_dev_id;
+	req.index = rx_chn->udma_rchan_id;
+	req.rx_fetch_size = rx_chn->common.hdesc_size >> 2;
+	/*
+	 * TODO: we can't support rxcq_qnum/RCHAN[a]_RCQ cfg with current sysfw
+	 * and udmax impl, so just configure it to invalid value.
+	 * req.rxcq_qnum = k3_ringacc_get_ring_id(rx_chn->flows[0].ringrx);
+	 */
+	req.rxcq_qnum = 0xFFFF;
+	if (rx_chn->flow_num && rx_chn->flow_id_base != rx_chn->udma_rchan_id) {
+		/* Default flow + extra ones */
+		req.flowid_start = rx_chn->flow_id_base;
+		req.flowid_cnt = rx_chn->flow_num;
+	}
+	req.rx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+
+	ret = tisci_rm->tisci_udmap_ops->rx_ch_cfg(tisci_rm->tisci, &req);
+	if (ret)
+		dev_err(rx_chn->common.dev, "rchan%d cfg failed %d\n",
+			rx_chn->udma_rchan_id, ret);
+
+	return ret;
+}
+
+static void k3_udma_glue_release_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
+					 u32 flow_num)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+
+	if (IS_ERR_OR_NULL(flow->udma_rflow))
+		return;
+
+	if (flow->ringrxfdq)
+		k3_ringacc_ring_free(flow->ringrxfdq);
+
+	if (flow->ringrx)
+		k3_ringacc_ring_free(flow->ringrx);
+
+	xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
+	flow->udma_rflow = NULL;
+	rx_chn->flows_ready--;
+}
+
+static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
+				    u32 flow_idx,
+				    struct k3_udma_glue_rx_flow_cfg *flow_cfg)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx];
+	const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+	struct device *dev = rx_chn->common.dev;
+	struct ti_sci_msg_rm_udmap_flow_cfg req;
+	int rx_ring_id;
+	int rx_ringfdq_id;
+	int ret = 0;
+
+	flow->udma_rflow = xudma_rflow_get(rx_chn->common.udmax,
+					   flow->udma_rflow_id);
+	if (IS_ERR(flow->udma_rflow)) {
+		ret = PTR_ERR(flow->udma_rflow);
+		dev_err(dev, "UDMAX rflow get err %d\n", ret);
+		goto err;
+	}
+
+	if (flow->udma_rflow_id != xudma_rflow_get_id(flow->udma_rflow)) {
+		xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
+		return -ENODEV;
+	}
+
+	/* request and cfg rings */
+	flow->ringrx = k3_ringacc_request_ring(rx_chn->common.ringacc,
+					       flow_cfg->ring_rxq_id, 0);
+	if (!flow->ringrx) {
+		ret = -ENODEV;
+		dev_err(dev, "Failed to get RX ring\n");
+		goto err;
+	}
+
+	flow->ringrxfdq = k3_ringacc_request_ring(rx_chn->common.ringacc,
+						  flow_cfg->ring_rxfdq0_id, 0);
+	if (!flow->ringrxfdq) {
+		ret = -ENODEV;
+		dev_err(dev, "Failed to get RXFDQ ring\n");
+		goto err;
+	}
+
+	ret = k3_ringacc_ring_cfg(flow->ringrx, &flow_cfg->rx_cfg);
+	if (ret) {
+		dev_err(dev, "Failed to cfg ringrx %d\n", ret);
+		goto err;
+	}
+
+	ret = k3_ringacc_ring_cfg(flow->ringrxfdq, &flow_cfg->rxfdq_cfg);
+	if (ret) {
+		dev_err(dev, "Failed to cfg ringrxfdq %d\n", ret);
+		goto err;
+	}
+
+	if (rx_chn->remote) {
+		rx_ring_id = TI_SCI_RESOURCE_NULL;
+		rx_ringfdq_id = TI_SCI_RESOURCE_NULL;
+	} else {
+		rx_ring_id = k3_ringacc_get_ring_id(flow->ringrx);
+		rx_ringfdq_id = k3_ringacc_get_ring_id(flow->ringrxfdq);
+	}
+
+	memset(&req, 0, sizeof(req));
+
+	req.valid_params =
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_EINFO_PRESENT_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PSINFO_PRESENT_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_ERROR_HANDLING_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DESC_TYPE_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_HI_SEL_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_LO_SEL_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_HI_SEL_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_LO_SEL_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+	req.nav_id = tisci_rm->tisci_dev_id;
+	req.flow_index = flow->udma_rflow_id;
+	if (rx_chn->common.epib)
+		req.rx_einfo_present = 1;
+	if (rx_chn->common.psdata_size)
+		req.rx_psinfo_present = 1;
+	if (flow_cfg->rx_error_handling)
+		req.rx_error_handling = 1;
+	req.rx_desc_type = 0;
+	req.rx_dest_qnum = rx_ring_id;
+	req.rx_src_tag_hi_sel = 0;
+	req.rx_src_tag_lo_sel = flow_cfg->src_tag_lo_sel;
+	req.rx_dest_tag_hi_sel = 0;
+	req.rx_dest_tag_lo_sel = 0;
+	req.rx_fdq0_sz0_qnum = rx_ringfdq_id;
+	req.rx_fdq1_qnum = rx_ringfdq_id;
+	req.rx_fdq2_qnum = rx_ringfdq_id;
+	req.rx_fdq3_qnum = rx_ringfdq_id;
+
+	ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req);
+	if (ret) {
+		dev_err(dev, "flow%d config failed: %d\n", flow->udma_rflow_id,
+			ret);
+		goto err;
+	}
+
+	rx_chn->flows_ready++;
+	dev_dbg(dev, "flow%d config done. ready:%d\n",
+		flow->udma_rflow_id, rx_chn->flows_ready);
+
+	return 0;
+err:
+	k3_udma_glue_release_rx_flow(rx_chn, flow_idx);
+	return ret;
+}
+
+static void k3_udma_glue_dump_rx_chn(struct k3_udma_glue_rx_channel *chn)
+{
+	struct device *dev = chn->common.dev;
+
+	dev_dbg(dev, "dump_rx_chn:\n"
+		"udma_rchan_id: %d\n"
+		"src_thread: %08x\n"
+		"dst_thread: %08x\n"
+		"epib: %d\n"
+		"hdesc_size: %u\n"
+		"psdata_size: %u\n"
+		"swdata_size: %u\n"
+		"flow_id_base: %d\n"
+		"flow_num: %d\n",
+		chn->udma_rchan_id,
+		chn->common.src_thread,
+		chn->common.dst_thread,
+		chn->common.epib,
+		chn->common.hdesc_size,
+		chn->common.psdata_size,
+		chn->common.swdata_size,
+		chn->flow_id_base,
+		chn->flow_num);
+}
+
+static void k3_udma_glue_dump_rx_rt_chn(struct k3_udma_glue_rx_channel *chn,
+					char *mark)
+{
+	struct device *dev = chn->common.dev;
+
+	dev_dbg(dev, "=== dump ===> %s\n", mark);
+
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_CTL_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_PEER_RT_EN_REG,
+		xudma_rchanrt_read(chn->udma_rchanx,
+				   UDMA_RCHAN_RT_PEER_RT_EN_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_PCNT_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_PCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_BCNT_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_BCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_SBCNT_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_SBCNT_REG));
+}
+
+static int
+k3_udma_glue_allocate_rx_flows(struct k3_udma_glue_rx_channel *rx_chn,
+			       struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+	int ret;
+
+	/* default rflow */
+	if (cfg->flow_id_use_rxchan_id)
+		return 0;
+
+	/* not a GP rflows */
+	if (rx_chn->flow_id_base != -1 &&
+	    !xudma_rflow_is_gp(rx_chn->common.udmax, rx_chn->flow_id_base))
+		return 0;
+
+	/* Allocate range of GP rflows */
+	ret = xudma_alloc_gp_rflow_range(rx_chn->common.udmax,
+					 rx_chn->flow_id_base,
+					 rx_chn->flow_num);
+	if (ret < 0) {
+		dev_err(rx_chn->common.dev, "UDMAX reserve_rflow %d cnt:%d err: %d\n",
+			rx_chn->flow_id_base, rx_chn->flow_num, ret);
+		return ret;
+	}
+	rx_chn->flow_id_base = ret;
+
+	return 0;
+}
+
+static struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_rx_chn_priv(struct device *dev, const char *name,
+				 struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+	struct k3_udma_glue_rx_channel *rx_chn;
+	int ret, i;
+
+	if (cfg->flow_id_num <= 0)
+		return ERR_PTR(-EINVAL);
+
+	if (cfg->flow_id_num != 1 &&
+	    (cfg->def_flow_cfg || cfg->flow_id_use_rxchan_id))
+		return ERR_PTR(-EINVAL);
+
+	rx_chn = devm_kzalloc(dev, sizeof(*rx_chn), GFP_KERNEL);
+	if (!rx_chn)
+		return ERR_PTR(-ENOMEM);
+
+	rx_chn->common.dev = dev;
+	rx_chn->common.swdata_size = cfg->swdata_size;
+	rx_chn->remote = false;
+
+	/* parse of udmap channel */
+	ret = of_k3_udma_glue_parse_chn(dev->of_node, name,
+					&rx_chn->common, false);
+	if (ret)
+		goto err;
+
+	rx_chn->common.hdesc_size = cppi5_hdesc_calc_size(rx_chn->common.epib,
+						rx_chn->common.psdata_size,
+						rx_chn->common.swdata_size);
+
+	/* request and cfg UDMAP RX channel */
+	rx_chn->udma_rchanx = xudma_rchan_get(rx_chn->common.udmax, -1);
+	if (IS_ERR(rx_chn->udma_rchanx)) {
+		ret = PTR_ERR(rx_chn->udma_rchanx);
+		dev_err(dev, "UDMAX rchanx get err %d\n", ret);
+		goto err;
+	}
+	rx_chn->udma_rchan_id = xudma_rchan_get_id(rx_chn->udma_rchanx);
+
+	rx_chn->flow_num = cfg->flow_id_num;
+	rx_chn->flow_id_base = cfg->flow_id_base;
+
+	/* Use RX channel id as flow id: target dev can't generate flow_id */
+	if (cfg->flow_id_use_rxchan_id)
+		rx_chn->flow_id_base = rx_chn->udma_rchan_id;
+
+	rx_chn->flows = devm_kcalloc(dev, rx_chn->flow_num,
+				     sizeof(*rx_chn->flows), GFP_KERNEL);
+	if (!rx_chn->flows) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = k3_udma_glue_allocate_rx_flows(rx_chn, cfg);
+	if (ret)
+		goto err;
+
+	for (i = 0; i < rx_chn->flow_num; i++)
+		rx_chn->flows[i].udma_rflow_id = rx_chn->flow_id_base + i;
+
+	/* request and cfg psi-l */
+	rx_chn->common.dst_thread =
+			xudma_dev_get_psil_base(rx_chn->common.udmax) +
+			rx_chn->udma_rchan_id;
+
+	ret = k3_udma_glue_cfg_rx_chn(rx_chn);
+	if (ret) {
+		dev_err(dev, "Failed to cfg rchan %d\n", ret);
+		goto err;
+	}
+
+	/* init default RX flow only if flow_num = 1 */
+	if (cfg->def_flow_cfg) {
+		ret = k3_udma_glue_cfg_rx_flow(rx_chn, 0, cfg->def_flow_cfg);
+		if (ret)
+			goto err;
+	}
+
+	ret = xudma_navss_psil_pair(rx_chn->common.udmax,
+				    rx_chn->common.src_thread,
+				    rx_chn->common.dst_thread);
+	if (ret) {
+		dev_err(dev, "PSI-L request err %d\n", ret);
+		goto err;
+	}
+
+	rx_chn->psil_paired = true;
+
+	/* reset RX RT registers */
+	k3_udma_glue_disable_rx_chn(rx_chn);
+
+	k3_udma_glue_dump_rx_chn(rx_chn);
+
+	return rx_chn;
+
+err:
+	k3_udma_glue_release_rx_chn(rx_chn);
+	return ERR_PTR(ret);
+}
+
+static struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_remote_rx_chn(struct device *dev, const char *name,
+				   struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+	struct k3_udma_glue_rx_channel *rx_chn;
+	int ret, i;
+
+	if (cfg->flow_id_num <= 0 ||
+	    cfg->flow_id_use_rxchan_id ||
+	    cfg->def_flow_cfg ||
+	    cfg->flow_id_base < 0)
+		return ERR_PTR(-EINVAL);
+
+	/*
+	 * Remote RX channel is under control of Remote CPU core, so
+	 * Linux can only request and manipulate by dedicated RX flows
+	 */
+
+	rx_chn = devm_kzalloc(dev, sizeof(*rx_chn), GFP_KERNEL);
+	if (!rx_chn)
+		return ERR_PTR(-ENOMEM);
+
+	rx_chn->common.dev = dev;
+	rx_chn->common.swdata_size = cfg->swdata_size;
+	rx_chn->remote = true;
+	rx_chn->udma_rchan_id = -1;
+	rx_chn->flow_num = cfg->flow_id_num;
+	rx_chn->flow_id_base = cfg->flow_id_base;
+	rx_chn->psil_paired = false;
+
+	/* parse of udmap channel */
+	ret = of_k3_udma_glue_parse_chn(dev->of_node, name,
+					&rx_chn->common, false);
+	if (ret)
+		goto err;
+
+	rx_chn->common.hdesc_size = cppi5_hdesc_calc_size(rx_chn->common.epib,
+						rx_chn->common.psdata_size,
+						rx_chn->common.swdata_size);
+
+	rx_chn->flows = devm_kcalloc(dev, rx_chn->flow_num,
+				     sizeof(*rx_chn->flows), GFP_KERNEL);
+	if (!rx_chn->flows) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = k3_udma_glue_allocate_rx_flows(rx_chn, cfg);
+	if (ret)
+		goto err;
+
+	for (i = 0; i < rx_chn->flow_num; i++)
+		rx_chn->flows[i].udma_rflow_id = rx_chn->flow_id_base + i;
+
+	k3_udma_glue_dump_rx_chn(rx_chn);
+
+	return rx_chn;
+
+err:
+	k3_udma_glue_release_rx_chn(rx_chn);
+	return ERR_PTR(ret);
+}
+
+struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_rx_chn(struct device *dev, const char *name,
+			    struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+	if (cfg->remote)
+		return k3_udma_glue_request_remote_rx_chn(dev, name, cfg);
+	else
+		return k3_udma_glue_request_rx_chn_priv(dev, name, cfg);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_request_rx_chn);
+
+void k3_udma_glue_release_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+	int i;
+
+	if (IS_ERR_OR_NULL(rx_chn->common.udmax))
+		return;
+
+	if (rx_chn->psil_paired) {
+		xudma_navss_psil_unpair(rx_chn->common.udmax,
+					rx_chn->common.src_thread,
+					rx_chn->common.dst_thread);
+		rx_chn->psil_paired = false;
+	}
+
+	for (i = 0; i < rx_chn->flow_num; i++)
+		k3_udma_glue_release_rx_flow(rx_chn, i);
+
+	if (xudma_rflow_is_gp(rx_chn->common.udmax, rx_chn->flow_id_base))
+		xudma_free_gp_rflow_range(rx_chn->common.udmax,
+					  rx_chn->flow_id_base,
+					  rx_chn->flow_num);
+
+	if (!IS_ERR_OR_NULL(rx_chn->udma_rchanx))
+		xudma_rchan_put(rx_chn->common.udmax,
+				rx_chn->udma_rchanx);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_release_rx_chn);
+
+int k3_udma_glue_rx_flow_init(struct k3_udma_glue_rx_channel *rx_chn,
+			      u32 flow_idx,
+			      struct k3_udma_glue_rx_flow_cfg *flow_cfg)
+{
+	if (flow_idx >= rx_chn->flow_num)
+		return -EINVAL;
+
+	return k3_udma_glue_cfg_rx_flow(rx_chn, flow_idx, flow_cfg);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_init);
+
+u32 k3_udma_glue_rx_flow_get_fdq_id(struct k3_udma_glue_rx_channel *rx_chn,
+				    u32 flow_idx)
+{
+	struct k3_udma_glue_rx_flow *flow;
+
+	if (flow_idx >= rx_chn->flow_num)
+		return -EINVAL;
+
+	flow = &rx_chn->flows[flow_idx];
+
+	return k3_ringacc_get_ring_id(flow->ringrxfdq);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_get_fdq_id);
+
+u32 k3_udma_glue_rx_get_flow_id_base(struct k3_udma_glue_rx_channel *rx_chn)
+{
+	return rx_chn->flow_id_base;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_get_flow_id_base);
+
+int k3_udma_glue_rx_flow_enable(struct k3_udma_glue_rx_channel *rx_chn,
+				u32 flow_idx)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx];
+	const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+	struct device *dev = rx_chn->common.dev;
+	struct ti_sci_msg_rm_udmap_flow_cfg req;
+	int rx_ring_id;
+	int rx_ringfdq_id;
+	int ret = 0;
+
+	if (!rx_chn->remote)
+		return -EINVAL;
+
+	rx_ring_id = k3_ringacc_get_ring_id(flow->ringrx);
+	rx_ringfdq_id = k3_ringacc_get_ring_id(flow->ringrxfdq);
+
+	memset(&req, 0, sizeof(req));
+
+	req.valid_params =
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+	req.nav_id = tisci_rm->tisci_dev_id;
+	req.flow_index = flow->udma_rflow_id;
+	req.rx_dest_qnum = rx_ring_id;
+	req.rx_fdq0_sz0_qnum = rx_ringfdq_id;
+	req.rx_fdq1_qnum = rx_ringfdq_id;
+	req.rx_fdq2_qnum = rx_ringfdq_id;
+	req.rx_fdq3_qnum = rx_ringfdq_id;
+
+	ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req);
+	if (ret) {
+		dev_err(dev, "flow%d enable failed: %d\n", flow->udma_rflow_id,
+			ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_enable);
+
+int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn,
+				 u32 flow_idx)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx];
+	const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+	struct device *dev = rx_chn->common.dev;
+	struct ti_sci_msg_rm_udmap_flow_cfg req;
+	int ret = 0;
+
+	if (!rx_chn->remote)
+		return -EINVAL;
+
+	memset(&req, 0, sizeof(req));
+	req.valid_params =
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+	req.nav_id = tisci_rm->tisci_dev_id;
+	req.flow_index = flow->udma_rflow_id;
+	req.rx_dest_qnum = TI_SCI_RESOURCE_NULL;
+	req.rx_fdq0_sz0_qnum = TI_SCI_RESOURCE_NULL;
+	req.rx_fdq1_qnum = TI_SCI_RESOURCE_NULL;
+	req.rx_fdq2_qnum = TI_SCI_RESOURCE_NULL;
+	req.rx_fdq3_qnum = TI_SCI_RESOURCE_NULL;
+
+	ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req);
+	if (ret) {
+		dev_err(dev, "flow%d disable failed: %d\n", flow->udma_rflow_id,
+			ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_disable);
+
+int k3_udma_glue_enable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+	u32 rxrt_ctl;
+
+	if (rx_chn->remote)
+		return -EINVAL;
+
+	if (rx_chn->flows_ready < rx_chn->flow_num)
+		return -EINVAL;
+
+	rxrt_ctl = xudma_rchanrt_read(rx_chn->udma_rchanx,
+				      UDMA_RCHAN_RT_CTL_REG);
+	rxrt_ctl |= UDMA_CHAN_RT_CTL_EN;
+	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG,
+			    rxrt_ctl);
+
+	xudma_rchanrt_write(rx_chn->udma_rchanx,
+			    UDMA_RCHAN_RT_PEER_RT_EN_REG,
+			    UDMA_PEER_RT_EN_ENABLE);
+
+	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt en");
+	return 0;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_enable_rx_chn);
+
+void k3_udma_glue_disable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis1");
+
+	xudma_rchanrt_write(rx_chn->udma_rchanx,
+			    UDMA_RCHAN_RT_PEER_RT_EN_REG,
+			    0);
+	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG, 0);
+
+	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis2");
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_disable_rx_chn);
+
+void k3_udma_glue_tdown_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+			       bool sync)
+{
+	int i = 0;
+	u32 val;
+
+	if (rx_chn->remote)
+		return;
+
+	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown1");
+
+	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_PEER_RT_EN_REG,
+			    UDMA_PEER_RT_EN_ENABLE | UDMA_PEER_RT_EN_TEARDOWN);
+
+	val = xudma_rchanrt_read(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG);
+
+	while (sync && (val & UDMA_CHAN_RT_CTL_EN)) {
+		val = xudma_rchanrt_read(rx_chn->udma_rchanx,
+					 UDMA_RCHAN_RT_CTL_REG);
+		udelay(1);
+		if (i > K3_UDMAX_TDOWN_TIMEOUT_US) {
+			dev_err(rx_chn->common.dev, "RX tdown timeout\n");
+			break;
+		}
+		i++;
+	}
+
+	val = xudma_rchanrt_read(rx_chn->udma_rchanx,
+				 UDMA_RCHAN_RT_PEER_RT_EN_REG);
+	if (sync && (val & UDMA_PEER_RT_EN_ENABLE))
+		dev_err(rx_chn->common.dev, "TX tdown peer not stopped\n");
+	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown2");
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tdown_rx_chn);
+
+void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+		u32 flow_num, void *data,
+		void (*cleanup)(void *data, dma_addr_t desc_dma), bool skip_fdq)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+	struct device *dev = rx_chn->common.dev;
+	dma_addr_t desc_dma;
+	int occ_rx, i, ret;
+
+	/* reset RXCQ as it is not input for udma - expected to be empty */
+	occ_rx = k3_ringacc_ring_get_occ(flow->ringrx);
+	dev_dbg(dev, "RX reset flow %u occ_rx %u\n", flow_num, occ_rx);
+	if (flow->ringrx)
+		k3_ringacc_ring_reset(flow->ringrx);
+
+	/* Skip RX FDQ in case one FDQ is used for the set of flows */
+	if (skip_fdq)
+		return;
+
+	/*
+	 * RX FDQ reset need to be special way as it is input for udma and its
+	 * state cached by udma, so:
+	 * 1) save RX FDQ occ
+	 * 2) clean up RX FDQ and call callback .cleanup() for each desc
+	 * 3) reset RX FDQ in a special way
+	 */
+	occ_rx = k3_ringacc_ring_get_occ(flow->ringrxfdq);
+	dev_dbg(dev, "RX reset flow %u occ_rx_fdq %u\n", flow_num, occ_rx);
+
+	for (i = 0; i < occ_rx; i++) {
+		ret = k3_ringacc_ring_pop(flow->ringrxfdq, &desc_dma);
+		if (ret) {
+			dev_err(dev, "RX reset pop %d\n", ret);
+			break;
+		}
+		cleanup(data, desc_dma);
+	}
+
+	k3_ringacc_ring_reset_dma(flow->ringrxfdq, occ_rx);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_reset_rx_chn);
+
+int k3_udma_glue_push_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+			     u32 flow_num, struct cppi5_host_desc_t *desc_rx,
+			     dma_addr_t desc_dma)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+
+	return k3_ringacc_ring_push(flow->ringrxfdq, &desc_dma);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_push_rx_chn);
+
+int k3_udma_glue_pop_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+			    u32 flow_num, dma_addr_t *desc_dma)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+
+	return k3_ringacc_ring_pop(flow->ringrx, desc_dma);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_pop_rx_chn);
+
+int k3_udma_glue_rx_get_irq(struct k3_udma_glue_rx_channel *rx_chn,
+			    u32 flow_num)
+{
+	struct k3_udma_glue_rx_flow *flow;
+
+	flow = &rx_chn->flows[flow_num];
+
+	flow->virq = k3_ringacc_get_ring_irq_num(flow->ringrx);
+
+	return flow->virq;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_get_irq);
diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c
new file mode 100644
index 000000000000..0b8f3dd6b146
--- /dev/null
+++ b/drivers/dma/ti/k3-udma-private.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+int xudma_navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread)
+{
+	return navss_psil_pair(ud, src_thread, dst_thread);
+}
+EXPORT_SYMBOL(xudma_navss_psil_pair);
+
+int xudma_navss_psil_unpair(struct udma_dev *ud, u32 src_thread, u32 dst_thread)
+{
+	return navss_psil_unpair(ud, src_thread, dst_thread);
+}
+EXPORT_SYMBOL(xudma_navss_psil_unpair);
+
+struct udma_dev *of_xudma_dev_get(struct device_node *np, const char *property)
+{
+	struct device_node *udma_node = np;
+	struct platform_device *pdev;
+	struct udma_dev *ud;
+
+	if (property) {
+		udma_node = of_parse_phandle(np, property, 0);
+		if (!udma_node) {
+			pr_err("UDMA node is not found\n");
+			return ERR_PTR(-ENODEV);
+		}
+	}
+
+	pdev = of_find_device_by_node(udma_node);
+	if (!pdev) {
+		pr_debug("UDMA device not found\n");
+		return ERR_PTR(-EPROBE_DEFER);
+	}
+
+	if (np != udma_node)
+		of_node_put(udma_node);
+
+	ud = platform_get_drvdata(pdev);
+	if (!ud) {
+		pr_debug("UDMA has not been probed\n");
+		return ERR_PTR(-EPROBE_DEFER);
+	}
+
+	return ud;
+}
+EXPORT_SYMBOL(of_xudma_dev_get);
+
+u32 xudma_dev_get_psil_base(struct udma_dev *ud)
+{
+	return ud->psil_base;
+}
+EXPORT_SYMBOL(xudma_dev_get_psil_base);
+
+struct udma_tisci_rm *xudma_dev_get_tisci_rm(struct udma_dev *ud)
+{
+	return &ud->tisci_rm;
+}
+EXPORT_SYMBOL(xudma_dev_get_tisci_rm);
+
+int xudma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+	return __udma_alloc_gp_rflow_range(ud, from, cnt);
+}
+EXPORT_SYMBOL(xudma_alloc_gp_rflow_range);
+
+int xudma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+	return __udma_free_gp_rflow_range(ud, from, cnt);
+}
+EXPORT_SYMBOL(xudma_free_gp_rflow_range);
+
+bool xudma_rflow_is_gp(struct udma_dev *ud, int id)
+{
+	return !test_bit(id, ud->rflow_gp_map);
+}
+EXPORT_SYMBOL(xudma_rflow_is_gp);
+
+#define XUDMA_GET_PUT_RESOURCE(res)					\
+struct udma_##res *xudma_##res##_get(struct udma_dev *ud, int id)	\
+{									\
+	return __udma_reserve_##res(ud, false, id);			\
+}									\
+EXPORT_SYMBOL(xudma_##res##_get);					\
+									\
+void xudma_##res##_put(struct udma_dev *ud, struct udma_##res *p)	\
+{									\
+	clear_bit(p->id, ud->res##_map);				\
+}									\
+EXPORT_SYMBOL(xudma_##res##_put)
+XUDMA_GET_PUT_RESOURCE(tchan);
+XUDMA_GET_PUT_RESOURCE(rchan);
+
+struct udma_rflow *xudma_rflow_get(struct udma_dev *ud, int id)
+{
+	return __udma_get_rflow(ud, id);
+}
+EXPORT_SYMBOL(xudma_rflow_get);
+
+void xudma_rflow_put(struct udma_dev *ud, struct udma_rflow *p)
+{
+	__udma_put_rflow(ud, p);
+}
+EXPORT_SYMBOL(xudma_rflow_put);
+
+#define XUDMA_GET_RESOURCE_ID(res)					\
+int xudma_##res##_get_id(struct udma_##res *p)				\
+{									\
+	return p->id;							\
+}									\
+EXPORT_SYMBOL(xudma_##res##_get_id)
+XUDMA_GET_RESOURCE_ID(tchan);
+XUDMA_GET_RESOURCE_ID(rchan);
+XUDMA_GET_RESOURCE_ID(rflow);
+
+/* Exported register access functions */
+#define XUDMA_RT_IO_FUNCTIONS(res)					\
+u32 xudma_##res##rt_read(struct udma_##res *p, int reg)			\
+{									\
+	return udma_##res##rt_read(p, reg);				\
+}									\
+EXPORT_SYMBOL(xudma_##res##rt_read);					\
+									\
+void xudma_##res##rt_write(struct udma_##res *p, int reg, u32 val)	\
+{									\
+	udma_##res##rt_write(p, reg, val);				\
+}									\
+EXPORT_SYMBOL(xudma_##res##rt_write)
+XUDMA_RT_IO_FUNCTIONS(tchan);
+XUDMA_RT_IO_FUNCTIONS(rchan);
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
new file mode 100644
index 000000000000..ea79c2df28e0
--- /dev/null
+++ b/drivers/dma/ti/k3-udma.c
@@ -0,0 +1,3432 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+#include <linux/soc/ti/k3-ringacc.h>
+#include <linux/soc/ti/ti_sci_protocol.h>
+#include <linux/soc/ti/ti_sci_inta_msi.h>
+#include <linux/dma/ti-cppi5.h>
+
+#include "../virt-dma.h"
+#include "k3-udma.h"
+#include "k3-psil-priv.h"
+
+struct udma_static_tr {
+	u8 elsize; /* RPSTR0 */
+	u16 elcnt; /* RPSTR0 */
+	u16 bstcnt; /* RPSTR1 */
+};
+
+#define K3_UDMA_MAX_RFLOWS		1024
+#define K3_UDMA_DEFAULT_RING_SIZE	16
+
+/* How SRC/DST tag should be updated by UDMA in the descriptor's Word 3 */
+#define UDMA_RFLOW_SRCTAG_NONE		0
+#define UDMA_RFLOW_SRCTAG_CFG_TAG	1
+#define UDMA_RFLOW_SRCTAG_FLOW_ID	2
+#define UDMA_RFLOW_SRCTAG_SRC_TAG	4
+
+#define UDMA_RFLOW_DSTTAG_NONE		0
+#define UDMA_RFLOW_DSTTAG_CFG_TAG	1
+#define UDMA_RFLOW_DSTTAG_FLOW_ID	2
+#define UDMA_RFLOW_DSTTAG_DST_TAG_LO	4
+#define UDMA_RFLOW_DSTTAG_DST_TAG_HI	5
+
+struct udma_chan;
+
+enum udma_mmr {
+	MMR_GCFG = 0,
+	MMR_RCHANRT,
+	MMR_TCHANRT,
+	MMR_LAST,
+};
+
+static const char * const mmr_names[] = { "gcfg", "rchanrt", "tchanrt" };
+
+struct udma_tchan {
+	void __iomem *reg_rt;
+
+	int id;
+	struct k3_ring *t_ring; /* Transmit ring */
+	struct k3_ring *tc_ring; /* Transmit Completion ring */
+};
+
+struct udma_rflow {
+	int id;
+	struct k3_ring *fd_ring; /* Free Descriptor ring */
+	struct k3_ring *r_ring; /* Receive ring */
+};
+
+struct udma_rchan {
+	void __iomem *reg_rt;
+
+	int id;
+};
+
+#define UDMA_FLAG_PDMA_ACC32		BIT(0)
+#define UDMA_FLAG_PDMA_BURST		BIT(1)
+
+struct udma_match_data {
+	u32 psil_base;
+	bool enable_memcpy_support;
+	u32 flags;
+	u32 statictr_z_mask;
+	u32 rchan_oes_offset;
+
+	u8 tpl_levels;
+	u32 level_start_idx[];
+};
+
+struct udma_dev {
+	struct dma_device ddev;
+	struct device *dev;
+	void __iomem *mmrs[MMR_LAST];
+	const struct udma_match_data *match_data;
+
+	size_t desc_align; /* alignment to use for descriptors */
+
+	struct udma_tisci_rm tisci_rm;
+
+	struct k3_ringacc *ringacc;
+
+	struct work_struct purge_work;
+	struct list_head desc_to_purge;
+	spinlock_t lock;
+
+	int tchan_cnt;
+	int echan_cnt;
+	int rchan_cnt;
+	int rflow_cnt;
+	unsigned long *tchan_map;
+	unsigned long *rchan_map;
+	unsigned long *rflow_gp_map;
+	unsigned long *rflow_gp_map_allocated;
+	unsigned long *rflow_in_use;
+
+	struct udma_tchan *tchans;
+	struct udma_rchan *rchans;
+	struct udma_rflow *rflows;
+
+	struct udma_chan *channels;
+	u32 psil_base;
+};
+
+struct udma_hwdesc {
+	size_t cppi5_desc_size;
+	void *cppi5_desc_vaddr;
+	dma_addr_t cppi5_desc_paddr;
+
+	/* TR descriptor internal pointers */
+	void *tr_req_base;
+	struct cppi5_tr_resp_t *tr_resp_base;
+};
+
+struct udma_desc {
+	struct virt_dma_desc vd;
+
+	bool terminated;
+
+	enum dma_transfer_direction dir;
+
+	struct udma_static_tr static_tr;
+	u32 residue;
+
+	unsigned int sglen;
+	unsigned int desc_idx; /* Only used for cyclic in packet mode */
+	unsigned int tr_idx;
+
+	u32 metadata_size;
+	void *metadata; /* pointer to provided metadata buffer (EPIP, PSdata) */
+
+	unsigned int hwdesc_count;
+	struct udma_hwdesc hwdesc[0];
+};
+
+enum udma_chan_state {
+	UDMA_CHAN_IS_IDLE = 0, /* not active, no teardown is in progress */
+	UDMA_CHAN_IS_ACTIVE, /* Normal operation */
+	UDMA_CHAN_IS_TERMINATING, /* channel is being terminated */
+};
+
+struct udma_tx_drain {
+	struct delayed_work work;
+	unsigned long jiffie;
+	u32 residue;
+};
+
+struct udma_chan_config {
+	bool pkt_mode; /* TR or packet */
+	bool needs_epib; /* EPIB is needed for the communication or not */
+	u32 psd_size; /* size of Protocol Specific Data */
+	u32 metadata_size; /* (needs_epib ? 16:0) + psd_size */
+	u32 hdesc_size; /* Size of a packet descriptor in packet mode */
+	bool notdpkt; /* Suppress sending TDC packet */
+	int remote_thread_id;
+	u32 src_thread;
+	u32 dst_thread;
+	enum psil_endpoint_type ep_type;
+	bool enable_acc32;
+	bool enable_burst;
+	enum udma_tp_level channel_tpl; /* Channel Throughput Level */
+
+	enum dma_transfer_direction dir;
+};
+
+struct udma_chan {
+	struct virt_dma_chan vc;
+	struct dma_slave_config	cfg;
+	struct udma_dev *ud;
+	struct udma_desc *desc;
+	struct udma_desc *terminated_desc;
+	struct udma_static_tr static_tr;
+	char *name;
+
+	struct udma_tchan *tchan;
+	struct udma_rchan *rchan;
+	struct udma_rflow *rflow;
+
+	bool psil_paired;
+
+	int irq_num_ring;
+	int irq_num_udma;
+
+	bool cyclic;
+	bool paused;
+
+	enum udma_chan_state state;
+	struct completion teardown_completed;
+
+	struct udma_tx_drain tx_drain;
+
+	u32 bcnt; /* number of bytes completed since the start of the channel */
+	u32 in_ring_cnt; /* number of descriptors in flight */
+
+	/* Channel configuration parameters */
+	struct udma_chan_config config;
+
+	/* dmapool for packet mode descriptors */
+	bool use_dma_pool;
+	struct dma_pool *hdesc_pool;
+
+	u32 id;
+};
+
+static inline struct udma_dev *to_udma_dev(struct dma_device *d)
+{
+	return container_of(d, struct udma_dev, ddev);
+}
+
+static inline struct udma_chan *to_udma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct udma_chan, vc.chan);
+}
+
+static inline struct udma_desc *to_udma_desc(struct dma_async_tx_descriptor *t)
+{
+	return container_of(t, struct udma_desc, vd.tx);
+}
+
+/* Generic register access functions */
+static inline u32 udma_read(void __iomem *base, int reg)
+{
+	return readl(base + reg);
+}
+
+static inline void udma_write(void __iomem *base, int reg, u32 val)
+{
+	writel(val, base + reg);
+}
+
+static inline void udma_update_bits(void __iomem *base, int reg,
+				    u32 mask, u32 val)
+{
+	u32 tmp, orig;
+
+	orig = readl(base + reg);
+	tmp = orig & ~mask;
+	tmp |= (val & mask);
+
+	if (tmp != orig)
+		writel(tmp, base + reg);
+}
+
+/* TCHANRT */
+static inline u32 udma_tchanrt_read(struct udma_tchan *tchan, int reg)
+{
+	if (!tchan)
+		return 0;
+	return udma_read(tchan->reg_rt, reg);
+}
+
+static inline void udma_tchanrt_write(struct udma_tchan *tchan, int reg,
+				      u32 val)
+{
+	if (!tchan)
+		return;
+	udma_write(tchan->reg_rt, reg, val);
+}
+
+static inline void udma_tchanrt_update_bits(struct udma_tchan *tchan, int reg,
+					    u32 mask, u32 val)
+{
+	if (!tchan)
+		return;
+	udma_update_bits(tchan->reg_rt, reg, mask, val);
+}
+
+/* RCHANRT */
+static inline u32 udma_rchanrt_read(struct udma_rchan *rchan, int reg)
+{
+	if (!rchan)
+		return 0;
+	return udma_read(rchan->reg_rt, reg);
+}
+
+static inline void udma_rchanrt_write(struct udma_rchan *rchan, int reg,
+				      u32 val)
+{
+	if (!rchan)
+		return;
+	udma_write(rchan->reg_rt, reg, val);
+}
+
+static inline void udma_rchanrt_update_bits(struct udma_rchan *rchan, int reg,
+					    u32 mask, u32 val)
+{
+	if (!rchan)
+		return;
+	udma_update_bits(rchan->reg_rt, reg, mask, val);
+}
+
+static int navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread)
+{
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+
+	dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET;
+	return tisci_rm->tisci_psil_ops->pair(tisci_rm->tisci,
+					      tisci_rm->tisci_navss_dev_id,
+					      src_thread, dst_thread);
+}
+
+static int navss_psil_unpair(struct udma_dev *ud, u32 src_thread,
+			     u32 dst_thread)
+{
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+
+	dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET;
+	return tisci_rm->tisci_psil_ops->unpair(tisci_rm->tisci,
+						tisci_rm->tisci_navss_dev_id,
+						src_thread, dst_thread);
+}
+
+static void udma_reset_uchan(struct udma_chan *uc)
+{
+	memset(&uc->config, 0, sizeof(uc->config));
+	uc->config.remote_thread_id = -1;
+	uc->state = UDMA_CHAN_IS_IDLE;
+}
+
+static void udma_dump_chan_stdata(struct udma_chan *uc)
+{
+	struct device *dev = uc->ud->dev;
+	u32 offset;
+	int i;
+
+	if (uc->config.dir == DMA_MEM_TO_DEV || uc->config.dir == DMA_MEM_TO_MEM) {
+		dev_dbg(dev, "TCHAN State data:\n");
+		for (i = 0; i < 32; i++) {
+			offset = UDMA_TCHAN_RT_STDATA_REG + i * 4;
+			dev_dbg(dev, "TRT_STDATA[%02d]: 0x%08x\n", i,
+				udma_tchanrt_read(uc->tchan, offset));
+		}
+	}
+
+	if (uc->config.dir == DMA_DEV_TO_MEM || uc->config.dir == DMA_MEM_TO_MEM) {
+		dev_dbg(dev, "RCHAN State data:\n");
+		for (i = 0; i < 32; i++) {
+			offset = UDMA_RCHAN_RT_STDATA_REG + i * 4;
+			dev_dbg(dev, "RRT_STDATA[%02d]: 0x%08x\n", i,
+				udma_rchanrt_read(uc->rchan, offset));
+		}
+	}
+}
+
+static inline dma_addr_t udma_curr_cppi5_desc_paddr(struct udma_desc *d,
+						    int idx)
+{
+	return d->hwdesc[idx].cppi5_desc_paddr;
+}
+
+static inline void *udma_curr_cppi5_desc_vaddr(struct udma_desc *d, int idx)
+{
+	return d->hwdesc[idx].cppi5_desc_vaddr;
+}
+
+static struct udma_desc *udma_udma_desc_from_paddr(struct udma_chan *uc,
+						   dma_addr_t paddr)
+{
+	struct udma_desc *d = uc->terminated_desc;
+
+	if (d) {
+		dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d,
+								   d->desc_idx);
+
+		if (desc_paddr != paddr)
+			d = NULL;
+	}
+
+	if (!d) {
+		d = uc->desc;
+		if (d) {
+			dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d,
+								d->desc_idx);
+
+			if (desc_paddr != paddr)
+				d = NULL;
+		}
+	}
+
+	return d;
+}
+
+static void udma_free_hwdesc(struct udma_chan *uc, struct udma_desc *d)
+{
+	if (uc->use_dma_pool) {
+		int i;
+
+		for (i = 0; i < d->hwdesc_count; i++) {
+			if (!d->hwdesc[i].cppi5_desc_vaddr)
+				continue;
+
+			dma_pool_free(uc->hdesc_pool,
+				      d->hwdesc[i].cppi5_desc_vaddr,
+				      d->hwdesc[i].cppi5_desc_paddr);
+
+			d->hwdesc[i].cppi5_desc_vaddr = NULL;
+		}
+	} else if (d->hwdesc[0].cppi5_desc_vaddr) {
+		struct udma_dev *ud = uc->ud;
+
+		dma_free_coherent(ud->dev, d->hwdesc[0].cppi5_desc_size,
+				  d->hwdesc[0].cppi5_desc_vaddr,
+				  d->hwdesc[0].cppi5_desc_paddr);
+
+		d->hwdesc[0].cppi5_desc_vaddr = NULL;
+	}
+}
+
+static void udma_purge_desc_work(struct work_struct *work)
+{
+	struct udma_dev *ud = container_of(work, typeof(*ud), purge_work);
+	struct virt_dma_desc *vd, *_vd;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&ud->lock, flags);
+	list_splice_tail_init(&ud->desc_to_purge, &head);
+	spin_unlock_irqrestore(&ud->lock, flags);
+
+	list_for_each_entry_safe(vd, _vd, &head, node) {
+		struct udma_chan *uc = to_udma_chan(vd->tx.chan);
+		struct udma_desc *d = to_udma_desc(&vd->tx);
+
+		udma_free_hwdesc(uc, d);
+		list_del(&vd->node);
+		kfree(d);
+	}
+
+	/* If more to purge, schedule the work again */
+	if (!list_empty(&ud->desc_to_purge))
+		schedule_work(&ud->purge_work);
+}
+
+static void udma_desc_free(struct virt_dma_desc *vd)
+{
+	struct udma_dev *ud = to_udma_dev(vd->tx.chan->device);
+	struct udma_chan *uc = to_udma_chan(vd->tx.chan);
+	struct udma_desc *d = to_udma_desc(&vd->tx);
+	unsigned long flags;
+
+	if (uc->terminated_desc == d)
+		uc->terminated_desc = NULL;
+
+	if (uc->use_dma_pool) {
+		udma_free_hwdesc(uc, d);
+		kfree(d);
+		return;
+	}
+
+	spin_lock_irqsave(&ud->lock, flags);
+	list_add_tail(&vd->node, &ud->desc_to_purge);
+	spin_unlock_irqrestore(&ud->lock, flags);
+
+	schedule_work(&ud->purge_work);
+}
+
+static bool udma_is_chan_running(struct udma_chan *uc)
+{
+	u32 trt_ctl = 0;
+	u32 rrt_ctl = 0;
+
+	if (uc->tchan)
+		trt_ctl = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_CTL_REG);
+	if (uc->rchan)
+		rrt_ctl = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_CTL_REG);
+
+	if (trt_ctl & UDMA_CHAN_RT_CTL_EN || rrt_ctl & UDMA_CHAN_RT_CTL_EN)
+		return true;
+
+	return false;
+}
+
+static bool udma_is_chan_paused(struct udma_chan *uc)
+{
+	u32 val, pause_mask;
+
+	switch (uc->desc->dir) {
+	case DMA_DEV_TO_MEM:
+		val = udma_rchanrt_read(uc->rchan,
+					UDMA_RCHAN_RT_PEER_RT_EN_REG);
+		pause_mask = UDMA_PEER_RT_EN_PAUSE;
+		break;
+	case DMA_MEM_TO_DEV:
+		val = udma_tchanrt_read(uc->tchan,
+					UDMA_TCHAN_RT_PEER_RT_EN_REG);
+		pause_mask = UDMA_PEER_RT_EN_PAUSE;
+		break;
+	case DMA_MEM_TO_MEM:
+		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_CTL_REG);
+		pause_mask = UDMA_CHAN_RT_CTL_PAUSE;
+		break;
+	default:
+		return false;
+	}
+
+	if (val & pause_mask)
+		return true;
+
+	return false;
+}
+
+static void udma_sync_for_device(struct udma_chan *uc, int idx)
+{
+	struct udma_desc *d = uc->desc;
+
+	if (uc->cyclic && uc->config.pkt_mode) {
+		dma_sync_single_for_device(uc->ud->dev,
+					   d->hwdesc[idx].cppi5_desc_paddr,
+					   d->hwdesc[idx].cppi5_desc_size,
+					   DMA_TO_DEVICE);
+	} else {
+		int i;
+
+		for (i = 0; i < d->hwdesc_count; i++) {
+			if (!d->hwdesc[i].cppi5_desc_vaddr)
+				continue;
+
+			dma_sync_single_for_device(uc->ud->dev,
+						d->hwdesc[i].cppi5_desc_paddr,
+						d->hwdesc[i].cppi5_desc_size,
+						DMA_TO_DEVICE);
+		}
+	}
+}
+
+static int udma_push_to_ring(struct udma_chan *uc, int idx)
+{
+	struct udma_desc *d = uc->desc;
+
+	struct k3_ring *ring = NULL;
+	int ret = -EINVAL;
+
+	switch (uc->config.dir) {
+	case DMA_DEV_TO_MEM:
+		ring = uc->rflow->fd_ring;
+		break;
+	case DMA_MEM_TO_DEV:
+	case DMA_MEM_TO_MEM:
+		ring = uc->tchan->t_ring;
+		break;
+	default:
+		break;
+	}
+
+	if (ring) {
+		dma_addr_t desc_addr = udma_curr_cppi5_desc_paddr(d, idx);
+
+		wmb(); /* Ensure that writes are not moved over this point */
+		udma_sync_for_device(uc, idx);
+		ret = k3_ringacc_ring_push(ring, &desc_addr);
+		uc->in_ring_cnt++;
+	}
+
+	return ret;
+}
+
+static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr)
+{
+	struct k3_ring *ring = NULL;
+	int ret = -ENOENT;
+
+	switch (uc->config.dir) {
+	case DMA_DEV_TO_MEM:
+		ring = uc->rflow->r_ring;
+		break;
+	case DMA_MEM_TO_DEV:
+	case DMA_MEM_TO_MEM:
+		ring = uc->tchan->tc_ring;
+		break;
+	default:
+		break;
+	}
+
+	if (ring && k3_ringacc_ring_get_occ(ring)) {
+		struct udma_desc *d = NULL;
+
+		ret = k3_ringacc_ring_pop(ring, addr);
+		if (ret)
+			return ret;
+
+		/* Teardown completion */
+		if (cppi5_desc_is_tdcm(*addr))
+			return ret;
+
+		d = udma_udma_desc_from_paddr(uc, *addr);
+
+		if (d)
+			dma_sync_single_for_cpu(uc->ud->dev, *addr,
+						d->hwdesc[0].cppi5_desc_size,
+						DMA_FROM_DEVICE);
+		rmb(); /* Ensure that reads are not moved before this point */
+
+		if (!ret)
+			uc->in_ring_cnt--;
+	}
+
+	return ret;
+}
+
+static void udma_reset_rings(struct udma_chan *uc)
+{
+	struct k3_ring *ring1 = NULL;
+	struct k3_ring *ring2 = NULL;
+
+	switch (uc->config.dir) {
+	case DMA_DEV_TO_MEM:
+		if (uc->rchan) {
+			ring1 = uc->rflow->fd_ring;
+			ring2 = uc->rflow->r_ring;
+		}
+		break;
+	case DMA_MEM_TO_DEV:
+	case DMA_MEM_TO_MEM:
+		if (uc->tchan) {
+			ring1 = uc->tchan->t_ring;
+			ring2 = uc->tchan->tc_ring;
+		}
+		break;
+	default:
+		break;
+	}
+
+	if (ring1)
+		k3_ringacc_ring_reset_dma(ring1,
+					  k3_ringacc_ring_get_occ(ring1));
+	if (ring2)
+		k3_ringacc_ring_reset(ring2);
+
+	/* make sure we are not leaking memory by stalled descriptor */
+	if (uc->terminated_desc) {
+		udma_desc_free(&uc->terminated_desc->vd);
+		uc->terminated_desc = NULL;
+	}
+
+	uc->in_ring_cnt = 0;
+}
+
+static void udma_reset_counters(struct udma_chan *uc)
+{
+	u32 val;
+
+	if (uc->tchan) {
+		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_BCNT_REG, val);
+
+		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_SBCNT_REG);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_SBCNT_REG, val);
+
+		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PCNT_REG);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PCNT_REG, val);
+
+		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG, val);
+	}
+
+	if (uc->rchan) {
+		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_BCNT_REG);
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_BCNT_REG, val);
+
+		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_SBCNT_REG);
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_SBCNT_REG, val);
+
+		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_PCNT_REG);
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PCNT_REG, val);
+
+		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_PEER_BCNT_REG);
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_BCNT_REG, val);
+	}
+
+	uc->bcnt = 0;
+}
+
+static int udma_reset_chan(struct udma_chan *uc, bool hard)
+{
+	switch (uc->config.dir) {
+	case DMA_DEV_TO_MEM:
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG, 0);
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG, 0);
+		break;
+	case DMA_MEM_TO_DEV:
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, 0);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG, 0);
+		break;
+	case DMA_MEM_TO_MEM:
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG, 0);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Reset all counters */
+	udma_reset_counters(uc);
+
+	/* Hard reset: re-initialize the channel to reset */
+	if (hard) {
+		struct udma_chan_config ucc_backup;
+		int ret;
+
+		memcpy(&ucc_backup, &uc->config, sizeof(uc->config));
+		uc->ud->ddev.device_free_chan_resources(&uc->vc.chan);
+
+		/* restore the channel configuration */
+		memcpy(&uc->config, &ucc_backup, sizeof(uc->config));
+		ret = uc->ud->ddev.device_alloc_chan_resources(&uc->vc.chan);
+		if (ret)
+			return ret;
+
+		/*
+		 * Setting forced teardown after forced reset helps recovering
+		 * the rchan.
+		 */
+		if (uc->config.dir == DMA_DEV_TO_MEM)
+			udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG,
+					   UDMA_CHAN_RT_CTL_EN |
+					   UDMA_CHAN_RT_CTL_TDOWN |
+					   UDMA_CHAN_RT_CTL_FTDOWN);
+	}
+	uc->state = UDMA_CHAN_IS_IDLE;
+
+	return 0;
+}
+
+static void udma_start_desc(struct udma_chan *uc)
+{
+	struct udma_chan_config *ucc = &uc->config;
+
+	if (ucc->pkt_mode && (uc->cyclic || ucc->dir == DMA_DEV_TO_MEM)) {
+		int i;
+
+		/* Push all descriptors to ring for packet mode cyclic or RX */
+		for (i = 0; i < uc->desc->sglen; i++)
+			udma_push_to_ring(uc, i);
+	} else {
+		udma_push_to_ring(uc, 0);
+	}
+}
+
+static bool udma_chan_needs_reconfiguration(struct udma_chan *uc)
+{
+	/* Only PDMAs have staticTR */
+	if (uc->config.ep_type == PSIL_EP_NATIVE)
+		return false;
+
+	/* Check if the staticTR configuration has changed for TX */
+	if (memcmp(&uc->static_tr, &uc->desc->static_tr, sizeof(uc->static_tr)))
+		return true;
+
+	return false;
+}
+
+static int udma_start(struct udma_chan *uc)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&uc->vc);
+
+	if (!vd) {
+		uc->desc = NULL;
+		return -ENOENT;
+	}
+
+	list_del(&vd->node);
+
+	uc->desc = to_udma_desc(&vd->tx);
+
+	/* Channel is already running and does not need reconfiguration */
+	if (udma_is_chan_running(uc) && !udma_chan_needs_reconfiguration(uc)) {
+		udma_start_desc(uc);
+		goto out;
+	}
+
+	/* Make sure that we clear the teardown bit, if it is set */
+	udma_reset_chan(uc, false);
+
+	/* Push descriptors before we start the channel */
+	udma_start_desc(uc);
+
+	switch (uc->desc->dir) {
+	case DMA_DEV_TO_MEM:
+		/* Config remote TR */
+		if (uc->config.ep_type == PSIL_EP_PDMA_XY) {
+			u32 val = PDMA_STATIC_TR_Y(uc->desc->static_tr.elcnt) |
+				  PDMA_STATIC_TR_X(uc->desc->static_tr.elsize);
+			const struct udma_match_data *match_data =
+							uc->ud->match_data;
+
+			if (uc->config.enable_acc32)
+				val |= PDMA_STATIC_TR_XY_ACC32;
+			if (uc->config.enable_burst)
+				val |= PDMA_STATIC_TR_XY_BURST;
+
+			udma_rchanrt_write(uc->rchan,
+				UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG, val);
+
+			udma_rchanrt_write(uc->rchan,
+				UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG,
+				PDMA_STATIC_TR_Z(uc->desc->static_tr.bstcnt,
+						 match_data->statictr_z_mask));
+
+			/* save the current staticTR configuration */
+			memcpy(&uc->static_tr, &uc->desc->static_tr,
+			       sizeof(uc->static_tr));
+		}
+
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN);
+
+		/* Enable remote */
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG,
+				   UDMA_PEER_RT_EN_ENABLE);
+
+		break;
+	case DMA_MEM_TO_DEV:
+		/* Config remote TR */
+		if (uc->config.ep_type == PSIL_EP_PDMA_XY) {
+			u32 val = PDMA_STATIC_TR_Y(uc->desc->static_tr.elcnt) |
+				  PDMA_STATIC_TR_X(uc->desc->static_tr.elsize);
+
+			if (uc->config.enable_acc32)
+				val |= PDMA_STATIC_TR_XY_ACC32;
+			if (uc->config.enable_burst)
+				val |= PDMA_STATIC_TR_XY_BURST;
+
+			udma_tchanrt_write(uc->tchan,
+				UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG, val);
+
+			/* save the current staticTR configuration */
+			memcpy(&uc->static_tr, &uc->desc->static_tr,
+			       sizeof(uc->static_tr));
+		}
+
+		/* Enable remote */
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG,
+				   UDMA_PEER_RT_EN_ENABLE);
+
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN);
+
+		break;
+	case DMA_MEM_TO_MEM:
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN);
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	uc->state = UDMA_CHAN_IS_ACTIVE;
+out:
+
+	return 0;
+}
+
+static int udma_stop(struct udma_chan *uc)
+{
+	enum udma_chan_state old_state = uc->state;
+
+	uc->state = UDMA_CHAN_IS_TERMINATING;
+	reinit_completion(&uc->teardown_completed);
+
+	switch (uc->config.dir) {
+	case DMA_DEV_TO_MEM:
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG,
+				   UDMA_PEER_RT_EN_ENABLE |
+				   UDMA_PEER_RT_EN_TEARDOWN);
+		break;
+	case DMA_MEM_TO_DEV:
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG,
+				   UDMA_PEER_RT_EN_ENABLE |
+				   UDMA_PEER_RT_EN_FLUSH);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN |
+				   UDMA_CHAN_RT_CTL_TDOWN);
+		break;
+	case DMA_MEM_TO_MEM:
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN |
+				   UDMA_CHAN_RT_CTL_TDOWN);
+		break;
+	default:
+		uc->state = old_state;
+		complete_all(&uc->teardown_completed);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void udma_cyclic_packet_elapsed(struct udma_chan *uc)
+{
+	struct udma_desc *d = uc->desc;
+	struct cppi5_host_desc_t *h_desc;
+
+	h_desc = d->hwdesc[d->desc_idx].cppi5_desc_vaddr;
+	cppi5_hdesc_reset_to_original(h_desc);
+	udma_push_to_ring(uc, d->desc_idx);
+	d->desc_idx = (d->desc_idx + 1) % d->sglen;
+}
+
+static inline void udma_fetch_epib(struct udma_chan *uc, struct udma_desc *d)
+{
+	struct cppi5_host_desc_t *h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+
+	memcpy(d->metadata, h_desc->epib, d->metadata_size);
+}
+
+static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d)
+{
+	u32 peer_bcnt, bcnt;
+
+	/* Only TX towards PDMA is affected */
+	if (uc->config.ep_type == PSIL_EP_NATIVE ||
+	    uc->config.dir != DMA_MEM_TO_DEV)
+		return true;
+
+	peer_bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG);
+	bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG);
+
+	if (peer_bcnt < bcnt) {
+		uc->tx_drain.residue = bcnt - peer_bcnt;
+		uc->tx_drain.jiffie = jiffies;
+		return false;
+	}
+
+	return true;
+}
+
+static void udma_check_tx_completion(struct work_struct *work)
+{
+	struct udma_chan *uc = container_of(work, typeof(*uc),
+					    tx_drain.work.work);
+	bool desc_done = true;
+	u32 residue_diff;
+	unsigned long jiffie_diff, delay;
+
+	if (uc->desc) {
+		residue_diff = uc->tx_drain.residue;
+		jiffie_diff = uc->tx_drain.jiffie;
+		desc_done = udma_is_desc_really_done(uc, uc->desc);
+	}
+
+	if (!desc_done) {
+		jiffie_diff = uc->tx_drain.jiffie - jiffie_diff;
+		residue_diff -= uc->tx_drain.residue;
+		if (residue_diff) {
+			/* Try to guess when we should check next time */
+			residue_diff /= jiffie_diff;
+			delay = uc->tx_drain.residue / residue_diff / 3;
+			if (jiffies_to_msecs(delay) < 5)
+				delay = 0;
+		} else {
+			/* No progress, check again in 1 second  */
+			delay = HZ;
+		}
+
+		schedule_delayed_work(&uc->tx_drain.work, delay);
+	} else if (uc->desc) {
+		struct udma_desc *d = uc->desc;
+
+		uc->bcnt += d->residue;
+		udma_start(uc);
+		vchan_cookie_complete(&d->vd);
+	}
+}
+
+static irqreturn_t udma_ring_irq_handler(int irq, void *data)
+{
+	struct udma_chan *uc = data;
+	struct udma_desc *d;
+	unsigned long flags;
+	dma_addr_t paddr = 0;
+
+	if (udma_pop_from_ring(uc, &paddr) || !paddr)
+		return IRQ_HANDLED;
+
+	spin_lock_irqsave(&uc->vc.lock, flags);
+
+	/* Teardown completion message */
+	if (cppi5_desc_is_tdcm(paddr)) {
+		/* Compensate our internal pop/push counter */
+		uc->in_ring_cnt++;
+
+		complete_all(&uc->teardown_completed);
+
+		if (uc->terminated_desc) {
+			udma_desc_free(&uc->terminated_desc->vd);
+			uc->terminated_desc = NULL;
+		}
+
+		if (!uc->desc)
+			udma_start(uc);
+
+		goto out;
+	}
+
+	d = udma_udma_desc_from_paddr(uc, paddr);
+
+	if (d) {
+		dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d,
+								   d->desc_idx);
+		if (desc_paddr != paddr) {
+			dev_err(uc->ud->dev, "not matching descriptors!\n");
+			goto out;
+		}
+
+		if (uc->cyclic) {
+			/* push the descriptor back to the ring */
+			if (d == uc->desc) {
+				udma_cyclic_packet_elapsed(uc);
+				vchan_cyclic_callback(&d->vd);
+			}
+		} else {
+			bool desc_done = false;
+
+			if (d == uc->desc) {
+				desc_done = udma_is_desc_really_done(uc, d);
+
+				if (desc_done) {
+					uc->bcnt += d->residue;
+					udma_start(uc);
+				} else {
+					schedule_delayed_work(&uc->tx_drain.work,
+							      0);
+				}
+			}
+
+			if (desc_done)
+				vchan_cookie_complete(&d->vd);
+		}
+	}
+out:
+	spin_unlock_irqrestore(&uc->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t udma_udma_irq_handler(int irq, void *data)
+{
+	struct udma_chan *uc = data;
+	struct udma_desc *d;
+	unsigned long flags;
+
+	spin_lock_irqsave(&uc->vc.lock, flags);
+	d = uc->desc;
+	if (d) {
+		d->tr_idx = (d->tr_idx + 1) % d->sglen;
+
+		if (uc->cyclic) {
+			vchan_cyclic_callback(&d->vd);
+		} else {
+			/* TODO: figure out the real amount of data */
+			uc->bcnt += d->residue;
+			udma_start(uc);
+			vchan_cookie_complete(&d->vd);
+		}
+	}
+
+	spin_unlock_irqrestore(&uc->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * __udma_alloc_gp_rflow_range - alloc range of GP RX flows
+ * @ud: UDMA device
+ * @from: Start the search from this flow id number
+ * @cnt: Number of consecutive flow ids to allocate
+ *
+ * Allocate range of RX flow ids for future use, those flows can be requested
+ * only using explicit flow id number. if @from is set to -1 it will try to find
+ * first free range. if @from is positive value it will force allocation only
+ * of the specified range of flows.
+ *
+ * Returns -ENOMEM if can't find free range.
+ * -EEXIST if requested range is busy.
+ * -EINVAL if wrong input values passed.
+ * Returns flow id on success.
+ */
+static int __udma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+	int start, tmp_from;
+	DECLARE_BITMAP(tmp, K3_UDMA_MAX_RFLOWS);
+
+	tmp_from = from;
+	if (tmp_from < 0)
+		tmp_from = ud->rchan_cnt;
+	/* default flows can't be allocated and accessible only by id */
+	if (tmp_from < ud->rchan_cnt)
+		return -EINVAL;
+
+	if (tmp_from + cnt > ud->rflow_cnt)
+		return -EINVAL;
+
+	bitmap_or(tmp, ud->rflow_gp_map, ud->rflow_gp_map_allocated,
+		  ud->rflow_cnt);
+
+	start = bitmap_find_next_zero_area(tmp,
+					   ud->rflow_cnt,
+					   tmp_from, cnt, 0);
+	if (start >= ud->rflow_cnt)
+		return -ENOMEM;
+
+	if (from >= 0 && start != from)
+		return -EEXIST;
+
+	bitmap_set(ud->rflow_gp_map_allocated, start, cnt);
+	return start;
+}
+
+static int __udma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+	if (from < ud->rchan_cnt)
+		return -EINVAL;
+	if (from + cnt > ud->rflow_cnt)
+		return -EINVAL;
+
+	bitmap_clear(ud->rflow_gp_map_allocated, from, cnt);
+	return 0;
+}
+
+static struct udma_rflow *__udma_get_rflow(struct udma_dev *ud, int id)
+{
+	/*
+	 * Attempt to request rflow by ID can be made for any rflow
+	 * if not in use with assumption that caller knows what's doing.
+	 * TI-SCI FW will perform additional permission check ant way, it's
+	 * safe
+	 */
+
+	if (id < 0 || id >= ud->rflow_cnt)
+		return ERR_PTR(-ENOENT);
+
+	if (test_bit(id, ud->rflow_in_use))
+		return ERR_PTR(-ENOENT);
+
+	/* GP rflow has to be allocated first */
+	if (!test_bit(id, ud->rflow_gp_map) &&
+	    !test_bit(id, ud->rflow_gp_map_allocated))
+		return ERR_PTR(-EINVAL);
+
+	dev_dbg(ud->dev, "get rflow%d\n", id);
+	set_bit(id, ud->rflow_in_use);
+	return &ud->rflows[id];
+}
+
+static void __udma_put_rflow(struct udma_dev *ud, struct udma_rflow *rflow)
+{
+	if (!test_bit(rflow->id, ud->rflow_in_use)) {
+		dev_err(ud->dev, "attempt to put unused rflow%d\n", rflow->id);
+		return;
+	}
+
+	dev_dbg(ud->dev, "put rflow%d\n", rflow->id);
+	clear_bit(rflow->id, ud->rflow_in_use);
+}
+
+#define UDMA_RESERVE_RESOURCE(res)					\
+static struct udma_##res *__udma_reserve_##res(struct udma_dev *ud,	\
+					       enum udma_tp_level tpl,	\
+					       int id)			\
+{									\
+	if (id >= 0) {							\
+		if (test_bit(id, ud->res##_map)) {			\
+			dev_err(ud->dev, "res##%d is in use\n", id);	\
+			return ERR_PTR(-ENOENT);			\
+		}							\
+	} else {							\
+		int start;						\
+									\
+		if (tpl >= ud->match_data->tpl_levels)			\
+			tpl = ud->match_data->tpl_levels - 1;		\
+									\
+		start = ud->match_data->level_start_idx[tpl];		\
+									\
+		id = find_next_zero_bit(ud->res##_map, ud->res##_cnt,	\
+					start);				\
+		if (id == ud->res##_cnt) {				\
+			return ERR_PTR(-ENOENT);			\
+		}							\
+	}								\
+									\
+	set_bit(id, ud->res##_map);					\
+	return &ud->res##s[id];						\
+}
+
+UDMA_RESERVE_RESOURCE(tchan);
+UDMA_RESERVE_RESOURCE(rchan);
+
+static int udma_get_tchan(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (uc->tchan) {
+		dev_dbg(ud->dev, "chan%d: already have tchan%d allocated\n",
+			uc->id, uc->tchan->id);
+		return 0;
+	}
+
+	uc->tchan = __udma_reserve_tchan(ud, uc->config.channel_tpl, -1);
+	if (IS_ERR(uc->tchan))
+		return PTR_ERR(uc->tchan);
+
+	return 0;
+}
+
+static int udma_get_rchan(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (uc->rchan) {
+		dev_dbg(ud->dev, "chan%d: already have rchan%d allocated\n",
+			uc->id, uc->rchan->id);
+		return 0;
+	}
+
+	uc->rchan = __udma_reserve_rchan(ud, uc->config.channel_tpl, -1);
+	if (IS_ERR(uc->rchan))
+		return PTR_ERR(uc->rchan);
+
+	return 0;
+}
+
+static int udma_get_chan_pair(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+	const struct udma_match_data *match_data = ud->match_data;
+	int chan_id, end;
+
+	if ((uc->tchan && uc->rchan) && uc->tchan->id == uc->rchan->id) {
+		dev_info(ud->dev, "chan%d: already have %d pair allocated\n",
+			 uc->id, uc->tchan->id);
+		return 0;
+	}
+
+	if (uc->tchan) {
+		dev_err(ud->dev, "chan%d: already have tchan%d allocated\n",
+			uc->id, uc->tchan->id);
+		return -EBUSY;
+	} else if (uc->rchan) {
+		dev_err(ud->dev, "chan%d: already have rchan%d allocated\n",
+			uc->id, uc->rchan->id);
+		return -EBUSY;
+	}
+
+	/* Can be optimized, but let's have it like this for now */
+	end = min(ud->tchan_cnt, ud->rchan_cnt);
+	/* Try to use the highest TPL channel pair for MEM_TO_MEM channels */
+	chan_id = match_data->level_start_idx[match_data->tpl_levels - 1];
+	for (; chan_id < end; chan_id++) {
+		if (!test_bit(chan_id, ud->tchan_map) &&
+		    !test_bit(chan_id, ud->rchan_map))
+			break;
+	}
+
+	if (chan_id == end)
+		return -ENOENT;
+
+	set_bit(chan_id, ud->tchan_map);
+	set_bit(chan_id, ud->rchan_map);
+	uc->tchan = &ud->tchans[chan_id];
+	uc->rchan = &ud->rchans[chan_id];
+
+	return 0;
+}
+
+static int udma_get_rflow(struct udma_chan *uc, int flow_id)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (!uc->rchan) {
+		dev_err(ud->dev, "chan%d: does not have rchan??\n", uc->id);
+		return -EINVAL;
+	}
+
+	if (uc->rflow) {
+		dev_dbg(ud->dev, "chan%d: already have rflow%d allocated\n",
+			uc->id, uc->rflow->id);
+		return 0;
+	}
+
+	uc->rflow = __udma_get_rflow(ud, flow_id);
+	if (IS_ERR(uc->rflow))
+		return PTR_ERR(uc->rflow);
+
+	return 0;
+}
+
+static void udma_put_rchan(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (uc->rchan) {
+		dev_dbg(ud->dev, "chan%d: put rchan%d\n", uc->id,
+			uc->rchan->id);
+		clear_bit(uc->rchan->id, ud->rchan_map);
+		uc->rchan = NULL;
+	}
+}
+
+static void udma_put_tchan(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (uc->tchan) {
+		dev_dbg(ud->dev, "chan%d: put tchan%d\n", uc->id,
+			uc->tchan->id);
+		clear_bit(uc->tchan->id, ud->tchan_map);
+		uc->tchan = NULL;
+	}
+}
+
+static void udma_put_rflow(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (uc->rflow) {
+		dev_dbg(ud->dev, "chan%d: put rflow%d\n", uc->id,
+			uc->rflow->id);
+		__udma_put_rflow(ud, uc->rflow);
+		uc->rflow = NULL;
+	}
+}
+
+static void udma_free_tx_resources(struct udma_chan *uc)
+{
+	if (!uc->tchan)
+		return;
+
+	k3_ringacc_ring_free(uc->tchan->t_ring);
+	k3_ringacc_ring_free(uc->tchan->tc_ring);
+	uc->tchan->t_ring = NULL;
+	uc->tchan->tc_ring = NULL;
+
+	udma_put_tchan(uc);
+}
+
+static int udma_alloc_tx_resources(struct udma_chan *uc)
+{
+	struct k3_ring_cfg ring_cfg;
+	struct udma_dev *ud = uc->ud;
+	int ret;
+
+	ret = udma_get_tchan(uc);
+	if (ret)
+		return ret;
+
+	uc->tchan->t_ring = k3_ringacc_request_ring(ud->ringacc,
+						    uc->tchan->id, 0);
+	if (!uc->tchan->t_ring) {
+		ret = -EBUSY;
+		goto err_tx_ring;
+	}
+
+	uc->tchan->tc_ring = k3_ringacc_request_ring(ud->ringacc, -1, 0);
+	if (!uc->tchan->tc_ring) {
+		ret = -EBUSY;
+		goto err_txc_ring;
+	}
+
+	memset(&ring_cfg, 0, sizeof(ring_cfg));
+	ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE;
+	ring_cfg.elm_size = K3_RINGACC_RING_ELSIZE_8;
+	ring_cfg.mode = K3_RINGACC_RING_MODE_MESSAGE;
+
+	ret = k3_ringacc_ring_cfg(uc->tchan->t_ring, &ring_cfg);
+	ret |= k3_ringacc_ring_cfg(uc->tchan->tc_ring, &ring_cfg);
+
+	if (ret)
+		goto err_ringcfg;
+
+	return 0;
+
+err_ringcfg:
+	k3_ringacc_ring_free(uc->tchan->tc_ring);
+	uc->tchan->tc_ring = NULL;
+err_txc_ring:
+	k3_ringacc_ring_free(uc->tchan->t_ring);
+	uc->tchan->t_ring = NULL;
+err_tx_ring:
+	udma_put_tchan(uc);
+
+	return ret;
+}
+
+static void udma_free_rx_resources(struct udma_chan *uc)
+{
+	if (!uc->rchan)
+		return;
+
+	if (uc->rflow) {
+		struct udma_rflow *rflow = uc->rflow;
+
+		k3_ringacc_ring_free(rflow->fd_ring);
+		k3_ringacc_ring_free(rflow->r_ring);
+		rflow->fd_ring = NULL;
+		rflow->r_ring = NULL;
+
+		udma_put_rflow(uc);
+	}
+
+	udma_put_rchan(uc);
+}
+
+static int udma_alloc_rx_resources(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+	struct k3_ring_cfg ring_cfg;
+	struct udma_rflow *rflow;
+	int fd_ring_id;
+	int ret;
+
+	ret = udma_get_rchan(uc);
+	if (ret)
+		return ret;
+
+	/* For MEM_TO_MEM we don't need rflow or rings */
+	if (uc->config.dir == DMA_MEM_TO_MEM)
+		return 0;
+
+	ret = udma_get_rflow(uc, uc->rchan->id);
+	if (ret) {
+		ret = -EBUSY;
+		goto err_rflow;
+	}
+
+	rflow = uc->rflow;
+	fd_ring_id = ud->tchan_cnt + ud->echan_cnt + uc->rchan->id;
+	rflow->fd_ring = k3_ringacc_request_ring(ud->ringacc, fd_ring_id, 0);
+	if (!rflow->fd_ring) {
+		ret = -EBUSY;
+		goto err_rx_ring;
+	}
+
+	rflow->r_ring = k3_ringacc_request_ring(ud->ringacc, -1, 0);
+	if (!rflow->r_ring) {
+		ret = -EBUSY;
+		goto err_rxc_ring;
+	}
+
+	memset(&ring_cfg, 0, sizeof(ring_cfg));
+
+	if (uc->config.pkt_mode)
+		ring_cfg.size = SG_MAX_SEGMENTS;
+	else
+		ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE;
+
+	ring_cfg.elm_size = K3_RINGACC_RING_ELSIZE_8;
+	ring_cfg.mode = K3_RINGACC_RING_MODE_MESSAGE;
+
+	ret = k3_ringacc_ring_cfg(rflow->fd_ring, &ring_cfg);
+	ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE;
+	ret |= k3_ringacc_ring_cfg(rflow->r_ring, &ring_cfg);
+
+	if (ret)
+		goto err_ringcfg;
+
+	return 0;
+
+err_ringcfg:
+	k3_ringacc_ring_free(rflow->r_ring);
+	rflow->r_ring = NULL;
+err_rxc_ring:
+	k3_ringacc_ring_free(rflow->fd_ring);
+	rflow->fd_ring = NULL;
+err_rx_ring:
+	udma_put_rflow(uc);
+err_rflow:
+	udma_put_rchan(uc);
+
+	return ret;
+}
+
+#define TISCI_TCHAN_VALID_PARAMS (				\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_EINFO_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_PSWORDS_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |		\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |		\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID)
+
+#define TISCI_RCHAN_VALID_PARAMS (				\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |		\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID |		\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |		\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_SHORT_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_LONG_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_START_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID)
+
+static int udma_tisci_m2m_channel_config(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+	const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+	struct udma_tchan *tchan = uc->tchan;
+	struct udma_rchan *rchan = uc->rchan;
+	int ret = 0;
+
+	/* Non synchronized - mem to mem type of transfer */
+	int tc_ring = k3_ringacc_get_ring_id(tchan->tc_ring);
+	struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
+	struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
+
+	req_tx.valid_params = TISCI_TCHAN_VALID_PARAMS;
+	req_tx.nav_id = tisci_rm->tisci_dev_id;
+	req_tx.index = tchan->id;
+	req_tx.tx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR;
+	req_tx.tx_fetch_size = sizeof(struct cppi5_desc_hdr_t) >> 2;
+	req_tx.txcq_qnum = tc_ring;
+
+	ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
+	if (ret) {
+		dev_err(ud->dev, "tchan%d cfg failed %d\n", tchan->id, ret);
+		return ret;
+	}
+
+	req_rx.valid_params = TISCI_RCHAN_VALID_PARAMS;
+	req_rx.nav_id = tisci_rm->tisci_dev_id;
+	req_rx.index = rchan->id;
+	req_rx.rx_fetch_size = sizeof(struct cppi5_desc_hdr_t) >> 2;
+	req_rx.rxcq_qnum = tc_ring;
+	req_rx.rx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR;
+
+	ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx);
+	if (ret)
+		dev_err(ud->dev, "rchan%d alloc failed %d\n", rchan->id, ret);
+
+	return ret;
+}
+
+static int udma_tisci_tx_channel_config(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+	const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+	struct udma_tchan *tchan = uc->tchan;
+	int tc_ring = k3_ringacc_get_ring_id(tchan->tc_ring);
+	struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
+	u32 mode, fetch_size;
+	int ret = 0;
+
+	if (uc->config.pkt_mode) {
+		mode = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+		fetch_size = cppi5_hdesc_calc_size(uc->config.needs_epib,
+						   uc->config.psd_size, 0);
+	} else {
+		mode = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_PBRR;
+		fetch_size = sizeof(struct cppi5_desc_hdr_t);
+	}
+
+	req_tx.valid_params = TISCI_TCHAN_VALID_PARAMS;
+	req_tx.nav_id = tisci_rm->tisci_dev_id;
+	req_tx.index = tchan->id;
+	req_tx.tx_chan_type = mode;
+	req_tx.tx_supr_tdpkt = uc->config.notdpkt;
+	req_tx.tx_fetch_size = fetch_size >> 2;
+	req_tx.txcq_qnum = tc_ring;
+
+	ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
+	if (ret)
+		dev_err(ud->dev, "tchan%d cfg failed %d\n", tchan->id, ret);
+
+	return ret;
+}
+
+static int udma_tisci_rx_channel_config(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+	const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+	struct udma_rchan *rchan = uc->rchan;
+	int fd_ring = k3_ringacc_get_ring_id(uc->rflow->fd_ring);
+	int rx_ring = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+	struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
+	struct ti_sci_msg_rm_udmap_flow_cfg flow_req = { 0 };
+	u32 mode, fetch_size;
+	int ret = 0;
+
+	if (uc->config.pkt_mode) {
+		mode = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+		fetch_size = cppi5_hdesc_calc_size(uc->config.needs_epib,
+						   uc->config.psd_size, 0);
+	} else {
+		mode = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_PBRR;
+		fetch_size = sizeof(struct cppi5_desc_hdr_t);
+	}
+
+	req_rx.valid_params = TISCI_RCHAN_VALID_PARAMS;
+	req_rx.nav_id = tisci_rm->tisci_dev_id;
+	req_rx.index = rchan->id;
+	req_rx.rx_fetch_size =  fetch_size >> 2;
+	req_rx.rxcq_qnum = rx_ring;
+	req_rx.rx_chan_type = mode;
+
+	ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx);
+	if (ret) {
+		dev_err(ud->dev, "rchan%d cfg failed %d\n", rchan->id, ret);
+		return ret;
+	}
+
+	flow_req.valid_params =
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_EINFO_PRESENT_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PSINFO_PRESENT_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_ERROR_HANDLING_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DESC_TYPE_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_HI_SEL_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_LO_SEL_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_HI_SEL_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_LO_SEL_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+
+	flow_req.nav_id = tisci_rm->tisci_dev_id;
+	flow_req.flow_index = rchan->id;
+
+	if (uc->config.needs_epib)
+		flow_req.rx_einfo_present = 1;
+	else
+		flow_req.rx_einfo_present = 0;
+	if (uc->config.psd_size)
+		flow_req.rx_psinfo_present = 1;
+	else
+		flow_req.rx_psinfo_present = 0;
+	flow_req.rx_error_handling = 1;
+	flow_req.rx_dest_qnum = rx_ring;
+	flow_req.rx_src_tag_hi_sel = UDMA_RFLOW_SRCTAG_NONE;
+	flow_req.rx_src_tag_lo_sel = UDMA_RFLOW_SRCTAG_SRC_TAG;
+	flow_req.rx_dest_tag_hi_sel = UDMA_RFLOW_DSTTAG_DST_TAG_HI;
+	flow_req.rx_dest_tag_lo_sel = UDMA_RFLOW_DSTTAG_DST_TAG_LO;
+	flow_req.rx_fdq0_sz0_qnum = fd_ring;
+	flow_req.rx_fdq1_qnum = fd_ring;
+	flow_req.rx_fdq2_qnum = fd_ring;
+	flow_req.rx_fdq3_qnum = fd_ring;
+
+	ret = tisci_ops->rx_flow_cfg(tisci_rm->tisci, &flow_req);
+
+	if (ret)
+		dev_err(ud->dev, "flow%d config failed: %d\n", rchan->id, ret);
+
+	return 0;
+}
+
+static int udma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	struct udma_dev *ud = to_udma_dev(chan->device);
+	const struct udma_match_data *match_data = ud->match_data;
+	struct k3_ring *irq_ring;
+	u32 irq_udma_idx;
+	int ret;
+
+	if (uc->config.pkt_mode || uc->config.dir == DMA_MEM_TO_MEM) {
+		uc->use_dma_pool = true;
+		/* in case of MEM_TO_MEM we have maximum of two TRs */
+		if (uc->config.dir == DMA_MEM_TO_MEM) {
+			uc->config.hdesc_size = cppi5_trdesc_calc_size(
+					sizeof(struct cppi5_tr_type15_t), 2);
+			uc->config.pkt_mode = false;
+		}
+	}
+
+	if (uc->use_dma_pool) {
+		uc->hdesc_pool = dma_pool_create(uc->name, ud->ddev.dev,
+						 uc->config.hdesc_size,
+						 ud->desc_align,
+						 0);
+		if (!uc->hdesc_pool) {
+			dev_err(ud->ddev.dev,
+				"Descriptor pool allocation failed\n");
+			uc->use_dma_pool = false;
+			return -ENOMEM;
+		}
+	}
+
+	/*
+	 * Make sure that the completion is in a known state:
+	 * No teardown, the channel is idle
+	 */
+	reinit_completion(&uc->teardown_completed);
+	complete_all(&uc->teardown_completed);
+	uc->state = UDMA_CHAN_IS_IDLE;
+
+	switch (uc->config.dir) {
+	case DMA_MEM_TO_MEM:
+		/* Non synchronized - mem to mem type of transfer */
+		dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-MEM\n", __func__,
+			uc->id);
+
+		ret = udma_get_chan_pair(uc);
+		if (ret)
+			return ret;
+
+		ret = udma_alloc_tx_resources(uc);
+		if (ret)
+			return ret;
+
+		ret = udma_alloc_rx_resources(uc);
+		if (ret) {
+			udma_free_tx_resources(uc);
+			return ret;
+		}
+
+		uc->config.src_thread = ud->psil_base + uc->tchan->id;
+		uc->config.dst_thread = (ud->psil_base + uc->rchan->id) |
+					K3_PSIL_DST_THREAD_ID_OFFSET;
+
+		irq_ring = uc->tchan->tc_ring;
+		irq_udma_idx = uc->tchan->id;
+
+		ret = udma_tisci_m2m_channel_config(uc);
+		break;
+	case DMA_MEM_TO_DEV:
+		/* Slave transfer synchronized - mem to dev (TX) trasnfer */
+		dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-DEV\n", __func__,
+			uc->id);
+
+		ret = udma_alloc_tx_resources(uc);
+		if (ret) {
+			uc->config.remote_thread_id = -1;
+			return ret;
+		}
+
+		uc->config.src_thread = ud->psil_base + uc->tchan->id;
+		uc->config.dst_thread = uc->config.remote_thread_id;
+		uc->config.dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET;
+
+		irq_ring = uc->tchan->tc_ring;
+		irq_udma_idx = uc->tchan->id;
+
+		ret = udma_tisci_tx_channel_config(uc);
+		break;
+	case DMA_DEV_TO_MEM:
+		/* Slave transfer synchronized - dev to mem (RX) trasnfer */
+		dev_dbg(uc->ud->dev, "%s: chan%d as DEV-to-MEM\n", __func__,
+			uc->id);
+
+		ret = udma_alloc_rx_resources(uc);
+		if (ret) {
+			uc->config.remote_thread_id = -1;
+			return ret;
+		}
+
+		uc->config.src_thread = uc->config.remote_thread_id;
+		uc->config.dst_thread = (ud->psil_base + uc->rchan->id) |
+					K3_PSIL_DST_THREAD_ID_OFFSET;
+
+		irq_ring = uc->rflow->r_ring;
+		irq_udma_idx = match_data->rchan_oes_offset + uc->rchan->id;
+
+		ret = udma_tisci_rx_channel_config(uc);
+		break;
+	default:
+		/* Can not happen */
+		dev_err(uc->ud->dev, "%s: chan%d invalid direction (%u)\n",
+			__func__, uc->id, uc->config.dir);
+		return -EINVAL;
+	}
+
+	/* check if the channel configuration was successful */
+	if (ret)
+		goto err_res_free;
+
+	if (udma_is_chan_running(uc)) {
+		dev_warn(ud->dev, "chan%d: is running!\n", uc->id);
+		udma_stop(uc);
+		if (udma_is_chan_running(uc)) {
+			dev_err(ud->dev, "chan%d: won't stop!\n", uc->id);
+			goto err_res_free;
+		}
+	}
+
+	/* PSI-L pairing */
+	ret = navss_psil_pair(ud, uc->config.src_thread, uc->config.dst_thread);
+	if (ret) {
+		dev_err(ud->dev, "PSI-L pairing failed: 0x%04x -> 0x%04x\n",
+			uc->config.src_thread, uc->config.dst_thread);
+		goto err_res_free;
+	}
+
+	uc->psil_paired = true;
+
+	uc->irq_num_ring = k3_ringacc_get_ring_irq_num(irq_ring);
+	if (uc->irq_num_ring <= 0) {
+		dev_err(ud->dev, "Failed to get ring irq (index: %u)\n",
+			k3_ringacc_get_ring_id(irq_ring));
+		ret = -EINVAL;
+		goto err_psi_free;
+	}
+
+	ret = request_irq(uc->irq_num_ring, udma_ring_irq_handler,
+			  IRQF_TRIGGER_HIGH, uc->name, uc);
+	if (ret) {
+		dev_err(ud->dev, "chan%d: ring irq request failed\n", uc->id);
+		goto err_irq_free;
+	}
+
+	/* Event from UDMA (TR events) only needed for slave TR mode channels */
+	if (is_slave_direction(uc->config.dir) && !uc->config.pkt_mode) {
+		uc->irq_num_udma = ti_sci_inta_msi_get_virq(ud->dev,
+							    irq_udma_idx);
+		if (uc->irq_num_udma <= 0) {
+			dev_err(ud->dev, "Failed to get udma irq (index: %u)\n",
+				irq_udma_idx);
+			free_irq(uc->irq_num_ring, uc);
+			ret = -EINVAL;
+			goto err_irq_free;
+		}
+
+		ret = request_irq(uc->irq_num_udma, udma_udma_irq_handler, 0,
+				  uc->name, uc);
+		if (ret) {
+			dev_err(ud->dev, "chan%d: UDMA irq request failed\n",
+				uc->id);
+			free_irq(uc->irq_num_ring, uc);
+			goto err_irq_free;
+		}
+	} else {
+		uc->irq_num_udma = 0;
+	}
+
+	udma_reset_rings(uc);
+
+	INIT_DELAYED_WORK_ONSTACK(&uc->tx_drain.work,
+				  udma_check_tx_completion);
+	return 0;
+
+err_irq_free:
+	uc->irq_num_ring = 0;
+	uc->irq_num_udma = 0;
+err_psi_free:
+	navss_psil_unpair(ud, uc->config.src_thread, uc->config.dst_thread);
+	uc->psil_paired = false;
+err_res_free:
+	udma_free_tx_resources(uc);
+	udma_free_rx_resources(uc);
+
+	udma_reset_uchan(uc);
+
+	if (uc->use_dma_pool) {
+		dma_pool_destroy(uc->hdesc_pool);
+		uc->use_dma_pool = false;
+	}
+
+	return ret;
+}
+
+static int udma_slave_config(struct dma_chan *chan,
+			     struct dma_slave_config *cfg)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+
+	memcpy(&uc->cfg, cfg, sizeof(uc->cfg));
+
+	return 0;
+}
+
+static struct udma_desc *udma_alloc_tr_desc(struct udma_chan *uc,
+					    size_t tr_size, int tr_count,
+					    enum dma_transfer_direction dir)
+{
+	struct udma_hwdesc *hwdesc;
+	struct cppi5_desc_hdr_t *tr_desc;
+	struct udma_desc *d;
+	u32 reload_count = 0;
+	u32 ring_id;
+
+	switch (tr_size) {
+	case 16:
+	case 32:
+	case 64:
+	case 128:
+		break;
+	default:
+		dev_err(uc->ud->dev, "Unsupported TR size of %zu\n", tr_size);
+		return NULL;
+	}
+
+	/* We have only one descriptor containing multiple TRs */
+	d = kzalloc(sizeof(*d) + sizeof(d->hwdesc[0]), GFP_NOWAIT);
+	if (!d)
+		return NULL;
+
+	d->sglen = tr_count;
+
+	d->hwdesc_count = 1;
+	hwdesc = &d->hwdesc[0];
+
+	/* Allocate memory for DMA ring descriptor */
+	if (uc->use_dma_pool) {
+		hwdesc->cppi5_desc_size = uc->config.hdesc_size;
+		hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool,
+						GFP_NOWAIT,
+						&hwdesc->cppi5_desc_paddr);
+	} else {
+		hwdesc->cppi5_desc_size = cppi5_trdesc_calc_size(tr_size,
+								 tr_count);
+		hwdesc->cppi5_desc_size = ALIGN(hwdesc->cppi5_desc_size,
+						uc->ud->desc_align);
+		hwdesc->cppi5_desc_vaddr = dma_alloc_coherent(uc->ud->dev,
+						hwdesc->cppi5_desc_size,
+						&hwdesc->cppi5_desc_paddr,
+						GFP_NOWAIT);
+	}
+
+	if (!hwdesc->cppi5_desc_vaddr) {
+		kfree(d);
+		return NULL;
+	}
+
+	/* Start of the TR req records */
+	hwdesc->tr_req_base = hwdesc->cppi5_desc_vaddr + tr_size;
+	/* Start address of the TR response array */
+	hwdesc->tr_resp_base = hwdesc->tr_req_base + tr_size * tr_count;
+
+	tr_desc = hwdesc->cppi5_desc_vaddr;
+
+	if (uc->cyclic)
+		reload_count = CPPI5_INFO0_TRDESC_RLDCNT_INFINITE;
+
+	if (dir == DMA_DEV_TO_MEM)
+		ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+	else
+		ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring);
+
+	cppi5_trdesc_init(tr_desc, tr_count, tr_size, 0, reload_count);
+	cppi5_desc_set_pktids(tr_desc, uc->id,
+			      CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+	cppi5_desc_set_retpolicy(tr_desc, 0, ring_id);
+
+	return d;
+}
+
+static struct udma_desc *
+udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl,
+		      unsigned int sglen, enum dma_transfer_direction dir,
+		      unsigned long tx_flags, void *context)
+{
+	enum dma_slave_buswidth dev_width;
+	struct scatterlist *sgent;
+	struct udma_desc *d;
+	size_t tr_size;
+	struct cppi5_tr_type1_t *tr_req = NULL;
+	unsigned int i;
+	u32 burst;
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_width = uc->cfg.src_addr_width;
+		burst = uc->cfg.src_maxburst;
+	} else if (dir == DMA_MEM_TO_DEV) {
+		dev_width = uc->cfg.dst_addr_width;
+		burst = uc->cfg.dst_maxburst;
+	} else {
+		dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (!burst)
+		burst = 1;
+
+	/* Now allocate and setup the descriptor. */
+	tr_size = sizeof(struct cppi5_tr_type1_t);
+	d = udma_alloc_tr_desc(uc, tr_size, sglen, dir);
+	if (!d)
+		return NULL;
+
+	d->sglen = sglen;
+
+	tr_req = d->hwdesc[0].tr_req_base;
+	for_each_sg(sgl, sgent, sglen, i) {
+		d->residue += sg_dma_len(sgent);
+
+		cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false,
+			      CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+		cppi5_tr_csf_set(&tr_req[i].flags, CPPI5_TR_CSF_SUPR_EVT);
+
+		tr_req[i].addr = sg_dma_address(sgent);
+		tr_req[i].icnt0 = burst * dev_width;
+		tr_req[i].dim1 = burst * dev_width;
+		tr_req[i].icnt1 = sg_dma_len(sgent) / tr_req[i].icnt0;
+	}
+
+	cppi5_tr_csf_set(&tr_req[i - 1].flags, CPPI5_TR_CSF_EOP);
+
+	return d;
+}
+
+static int udma_configure_statictr(struct udma_chan *uc, struct udma_desc *d,
+				   enum dma_slave_buswidth dev_width,
+				   u16 elcnt)
+{
+	if (uc->config.ep_type != PSIL_EP_PDMA_XY)
+		return 0;
+
+	/* Bus width translates to the element size (ES) */
+	switch (dev_width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		d->static_tr.elsize = 0;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		d->static_tr.elsize = 1;
+		break;
+	case DMA_SLAVE_BUSWIDTH_3_BYTES:
+		d->static_tr.elsize = 2;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		d->static_tr.elsize = 3;
+		break;
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		d->static_tr.elsize = 4;
+		break;
+	default: /* not reached */
+		return -EINVAL;
+	}
+
+	d->static_tr.elcnt = elcnt;
+
+	/*
+	 * PDMA must to close the packet when the channel is in packet mode.
+	 * For TR mode when the channel is not cyclic we also need PDMA to close
+	 * the packet otherwise the transfer will stall because PDMA holds on
+	 * the data it has received from the peripheral.
+	 */
+	if (uc->config.pkt_mode || !uc->cyclic) {
+		unsigned int div = dev_width * elcnt;
+
+		if (uc->cyclic)
+			d->static_tr.bstcnt = d->residue / d->sglen / div;
+		else
+			d->static_tr.bstcnt = d->residue / div;
+
+		if (uc->config.dir == DMA_DEV_TO_MEM &&
+		    d->static_tr.bstcnt > uc->ud->match_data->statictr_z_mask)
+			return -EINVAL;
+	} else {
+		d->static_tr.bstcnt = 0;
+	}
+
+	return 0;
+}
+
+static struct udma_desc *
+udma_prep_slave_sg_pkt(struct udma_chan *uc, struct scatterlist *sgl,
+		       unsigned int sglen, enum dma_transfer_direction dir,
+		       unsigned long tx_flags, void *context)
+{
+	struct scatterlist *sgent;
+	struct cppi5_host_desc_t *h_desc = NULL;
+	struct udma_desc *d;
+	u32 ring_id;
+	unsigned int i;
+
+	d = kzalloc(sizeof(*d) + sglen * sizeof(d->hwdesc[0]), GFP_NOWAIT);
+	if (!d)
+		return NULL;
+
+	d->sglen = sglen;
+	d->hwdesc_count = sglen;
+
+	if (dir == DMA_DEV_TO_MEM)
+		ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+	else
+		ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring);
+
+	for_each_sg(sgl, sgent, sglen, i) {
+		struct udma_hwdesc *hwdesc = &d->hwdesc[i];
+		dma_addr_t sg_addr = sg_dma_address(sgent);
+		struct cppi5_host_desc_t *desc;
+		size_t sg_len = sg_dma_len(sgent);
+
+		hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool,
+						GFP_NOWAIT,
+						&hwdesc->cppi5_desc_paddr);
+		if (!hwdesc->cppi5_desc_vaddr) {
+			dev_err(uc->ud->dev,
+				"descriptor%d allocation failed\n", i);
+
+			udma_free_hwdesc(uc, d);
+			kfree(d);
+			return NULL;
+		}
+
+		d->residue += sg_len;
+		hwdesc->cppi5_desc_size = uc->config.hdesc_size;
+		desc = hwdesc->cppi5_desc_vaddr;
+
+		if (i == 0) {
+			cppi5_hdesc_init(desc, 0, 0);
+			/* Flow and Packed ID */
+			cppi5_desc_set_pktids(&desc->hdr, uc->id,
+					      CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+			cppi5_desc_set_retpolicy(&desc->hdr, 0, ring_id);
+		} else {
+			cppi5_hdesc_reset_hbdesc(desc);
+			cppi5_desc_set_retpolicy(&desc->hdr, 0, 0xffff);
+		}
+
+		/* attach the sg buffer to the descriptor */
+		cppi5_hdesc_attach_buf(desc, sg_addr, sg_len, sg_addr, sg_len);
+
+		/* Attach link as host buffer descriptor */
+		if (h_desc)
+			cppi5_hdesc_link_hbdesc(h_desc,
+						hwdesc->cppi5_desc_paddr);
+
+		if (dir == DMA_MEM_TO_DEV)
+			h_desc = desc;
+	}
+
+	if (d->residue >= SZ_4M) {
+		dev_err(uc->ud->dev,
+			"%s: Transfer size %u is over the supported 4M range\n",
+			__func__, d->residue);
+		udma_free_hwdesc(uc, d);
+		kfree(d);
+		return NULL;
+	}
+
+	h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+	cppi5_hdesc_set_pktlen(h_desc, d->residue);
+
+	return d;
+}
+
+static int udma_attach_metadata(struct dma_async_tx_descriptor *desc,
+				void *data, size_t len)
+{
+	struct udma_desc *d = to_udma_desc(desc);
+	struct udma_chan *uc = to_udma_chan(desc->chan);
+	struct cppi5_host_desc_t *h_desc;
+	u32 psd_size = len;
+	u32 flags = 0;
+
+	if (!uc->config.pkt_mode || !uc->config.metadata_size)
+		return -ENOTSUPP;
+
+	if (!data || len > uc->config.metadata_size)
+		return -EINVAL;
+
+	if (uc->config.needs_epib && len < CPPI5_INFO0_HDESC_EPIB_SIZE)
+		return -EINVAL;
+
+	h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+	if (d->dir == DMA_MEM_TO_DEV)
+		memcpy(h_desc->epib, data, len);
+
+	if (uc->config.needs_epib)
+		psd_size -= CPPI5_INFO0_HDESC_EPIB_SIZE;
+
+	d->metadata = data;
+	d->metadata_size = len;
+	if (uc->config.needs_epib)
+		flags |= CPPI5_INFO0_HDESC_EPIB_PRESENT;
+
+	cppi5_hdesc_update_flags(h_desc, flags);
+	cppi5_hdesc_update_psdata_size(h_desc, psd_size);
+
+	return 0;
+}
+
+static void *udma_get_metadata_ptr(struct dma_async_tx_descriptor *desc,
+				   size_t *payload_len, size_t *max_len)
+{
+	struct udma_desc *d = to_udma_desc(desc);
+	struct udma_chan *uc = to_udma_chan(desc->chan);
+	struct cppi5_host_desc_t *h_desc;
+
+	if (!uc->config.pkt_mode || !uc->config.metadata_size)
+		return ERR_PTR(-ENOTSUPP);
+
+	h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+
+	*max_len = uc->config.metadata_size;
+
+	*payload_len = cppi5_hdesc_epib_present(&h_desc->hdr) ?
+		       CPPI5_INFO0_HDESC_EPIB_SIZE : 0;
+	*payload_len += cppi5_hdesc_get_psdata_size(h_desc);
+
+	return h_desc->epib;
+}
+
+static int udma_set_metadata_len(struct dma_async_tx_descriptor *desc,
+				 size_t payload_len)
+{
+	struct udma_desc *d = to_udma_desc(desc);
+	struct udma_chan *uc = to_udma_chan(desc->chan);
+	struct cppi5_host_desc_t *h_desc;
+	u32 psd_size = payload_len;
+	u32 flags = 0;
+
+	if (!uc->config.pkt_mode || !uc->config.metadata_size)
+		return -ENOTSUPP;
+
+	if (payload_len > uc->config.metadata_size)
+		return -EINVAL;
+
+	if (uc->config.needs_epib && payload_len < CPPI5_INFO0_HDESC_EPIB_SIZE)
+		return -EINVAL;
+
+	h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+
+	if (uc->config.needs_epib) {
+		psd_size -= CPPI5_INFO0_HDESC_EPIB_SIZE;
+		flags |= CPPI5_INFO0_HDESC_EPIB_PRESENT;
+	}
+
+	cppi5_hdesc_update_flags(h_desc, flags);
+	cppi5_hdesc_update_psdata_size(h_desc, psd_size);
+
+	return 0;
+}
+
+static struct dma_descriptor_metadata_ops metadata_ops = {
+	.attach = udma_attach_metadata,
+	.get_ptr = udma_get_metadata_ptr,
+	.set_len = udma_set_metadata_len,
+};
+
+static struct dma_async_tx_descriptor *
+udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		   unsigned int sglen, enum dma_transfer_direction dir,
+		   unsigned long tx_flags, void *context)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	enum dma_slave_buswidth dev_width;
+	struct udma_desc *d;
+	u32 burst;
+
+	if (dir != uc->config.dir) {
+		dev_err(chan->device->dev,
+			"%s: chan%d is for %s, not supporting %s\n",
+			__func__, uc->id,
+			dmaengine_get_direction_text(uc->config.dir),
+			dmaengine_get_direction_text(dir));
+		return NULL;
+	}
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_width = uc->cfg.src_addr_width;
+		burst = uc->cfg.src_maxburst;
+	} else if (dir == DMA_MEM_TO_DEV) {
+		dev_width = uc->cfg.dst_addr_width;
+		burst = uc->cfg.dst_maxburst;
+	} else {
+		dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (!burst)
+		burst = 1;
+
+	if (uc->config.pkt_mode)
+		d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags,
+					   context);
+	else
+		d = udma_prep_slave_sg_tr(uc, sgl, sglen, dir, tx_flags,
+					  context);
+
+	if (!d)
+		return NULL;
+
+	d->dir = dir;
+	d->desc_idx = 0;
+	d->tr_idx = 0;
+
+	/* static TR for remote PDMA */
+	if (udma_configure_statictr(uc, d, dev_width, burst)) {
+		dev_err(uc->ud->dev,
+			"%s: StaticTR Z is limited to maximum 4095 (%u)\n",
+			__func__, d->static_tr.bstcnt);
+
+		udma_free_hwdesc(uc, d);
+		kfree(d);
+		return NULL;
+	}
+
+	if (uc->config.metadata_size)
+		d->vd.tx.metadata_ops = &metadata_ops;
+
+	return vchan_tx_prep(&uc->vc, &d->vd, tx_flags);
+}
+
+static struct udma_desc *
+udma_prep_dma_cyclic_tr(struct udma_chan *uc, dma_addr_t buf_addr,
+			size_t buf_len, size_t period_len,
+			enum dma_transfer_direction dir, unsigned long flags)
+{
+	enum dma_slave_buswidth dev_width;
+	struct udma_desc *d;
+	size_t tr_size;
+	struct cppi5_tr_type1_t *tr_req;
+	unsigned int i;
+	unsigned int periods = buf_len / period_len;
+	u32 burst;
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_width = uc->cfg.src_addr_width;
+		burst = uc->cfg.src_maxburst;
+	} else if (dir == DMA_MEM_TO_DEV) {
+		dev_width = uc->cfg.dst_addr_width;
+		burst = uc->cfg.dst_maxburst;
+	} else {
+		dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (!burst)
+		burst = 1;
+
+	/* Now allocate and setup the descriptor. */
+	tr_size = sizeof(struct cppi5_tr_type1_t);
+	d = udma_alloc_tr_desc(uc, tr_size, periods, dir);
+	if (!d)
+		return NULL;
+
+	tr_req = d->hwdesc[0].tr_req_base;
+	for (i = 0; i < periods; i++) {
+		cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false,
+			      CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+
+		tr_req[i].addr = buf_addr + period_len * i;
+		tr_req[i].icnt0 = dev_width;
+		tr_req[i].icnt1 = period_len / dev_width;
+		tr_req[i].dim1 = dev_width;
+
+		if (!(flags & DMA_PREP_INTERRUPT))
+			cppi5_tr_csf_set(&tr_req[i].flags,
+					 CPPI5_TR_CSF_SUPR_EVT);
+	}
+
+	return d;
+}
+
+static struct udma_desc *
+udma_prep_dma_cyclic_pkt(struct udma_chan *uc, dma_addr_t buf_addr,
+			 size_t buf_len, size_t period_len,
+			 enum dma_transfer_direction dir, unsigned long flags)
+{
+	struct udma_desc *d;
+	u32 ring_id;
+	int i;
+	int periods = buf_len / period_len;
+
+	if (periods > (K3_UDMA_DEFAULT_RING_SIZE - 1))
+		return NULL;
+
+	if (period_len >= SZ_4M)
+		return NULL;
+
+	d = kzalloc(sizeof(*d) + periods * sizeof(d->hwdesc[0]), GFP_NOWAIT);
+	if (!d)
+		return NULL;
+
+	d->hwdesc_count = periods;
+
+	/* TODO: re-check this... */
+	if (dir == DMA_DEV_TO_MEM)
+		ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+	else
+		ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring);
+
+	for (i = 0; i < periods; i++) {
+		struct udma_hwdesc *hwdesc = &d->hwdesc[i];
+		dma_addr_t period_addr = buf_addr + (period_len * i);
+		struct cppi5_host_desc_t *h_desc;
+
+		hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool,
+						GFP_NOWAIT,
+						&hwdesc->cppi5_desc_paddr);
+		if (!hwdesc->cppi5_desc_vaddr) {
+			dev_err(uc->ud->dev,
+				"descriptor%d allocation failed\n", i);
+
+			udma_free_hwdesc(uc, d);
+			kfree(d);
+			return NULL;
+		}
+
+		hwdesc->cppi5_desc_size = uc->config.hdesc_size;
+		h_desc = hwdesc->cppi5_desc_vaddr;
+
+		cppi5_hdesc_init(h_desc, 0, 0);
+		cppi5_hdesc_set_pktlen(h_desc, period_len);
+
+		/* Flow and Packed ID */
+		cppi5_desc_set_pktids(&h_desc->hdr, uc->id,
+				      CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+		cppi5_desc_set_retpolicy(&h_desc->hdr, 0, ring_id);
+
+		/* attach each period to a new descriptor */
+		cppi5_hdesc_attach_buf(h_desc,
+				       period_addr, period_len,
+				       period_addr, period_len);
+	}
+
+	return d;
+}
+
+static struct dma_async_tx_descriptor *
+udma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		     size_t period_len, enum dma_transfer_direction dir,
+		     unsigned long flags)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	enum dma_slave_buswidth dev_width;
+	struct udma_desc *d;
+	u32 burst;
+
+	if (dir != uc->config.dir) {
+		dev_err(chan->device->dev,
+			"%s: chan%d is for %s, not supporting %s\n",
+			__func__, uc->id,
+			dmaengine_get_direction_text(uc->config.dir),
+			dmaengine_get_direction_text(dir));
+		return NULL;
+	}
+
+	uc->cyclic = true;
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_width = uc->cfg.src_addr_width;
+		burst = uc->cfg.src_maxburst;
+	} else if (dir == DMA_MEM_TO_DEV) {
+		dev_width = uc->cfg.dst_addr_width;
+		burst = uc->cfg.dst_maxburst;
+	} else {
+		dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (!burst)
+		burst = 1;
+
+	if (uc->config.pkt_mode)
+		d = udma_prep_dma_cyclic_pkt(uc, buf_addr, buf_len, period_len,
+					     dir, flags);
+	else
+		d = udma_prep_dma_cyclic_tr(uc, buf_addr, buf_len, period_len,
+					    dir, flags);
+
+	if (!d)
+		return NULL;
+
+	d->sglen = buf_len / period_len;
+
+	d->dir = dir;
+	d->residue = buf_len;
+
+	/* static TR for remote PDMA */
+	if (udma_configure_statictr(uc, d, dev_width, burst)) {
+		dev_err(uc->ud->dev,
+			"%s: StaticTR Z is limited to maximum 4095 (%u)\n",
+			__func__, d->static_tr.bstcnt);
+
+		udma_free_hwdesc(uc, d);
+		kfree(d);
+		return NULL;
+	}
+
+	if (uc->config.metadata_size)
+		d->vd.tx.metadata_ops = &metadata_ops;
+
+	return vchan_tx_prep(&uc->vc, &d->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		     size_t len, unsigned long tx_flags)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	struct udma_desc *d;
+	struct cppi5_tr_type15_t *tr_req;
+	int num_tr;
+	size_t tr_size = sizeof(struct cppi5_tr_type15_t);
+	u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
+
+	if (uc->config.dir != DMA_MEM_TO_MEM) {
+		dev_err(chan->device->dev,
+			"%s: chan%d is for %s, not supporting %s\n",
+			__func__, uc->id,
+			dmaengine_get_direction_text(uc->config.dir),
+			dmaengine_get_direction_text(DMA_MEM_TO_MEM));
+		return NULL;
+	}
+
+	if (len < SZ_64K) {
+		num_tr = 1;
+		tr0_cnt0 = len;
+		tr0_cnt1 = 1;
+	} else {
+		unsigned long align_to = __ffs(src | dest);
+
+		if (align_to > 3)
+			align_to = 3;
+		/*
+		 * Keep simple: tr0: SZ_64K-alignment blocks,
+		 *		tr1: the remaining
+		 */
+		num_tr = 2;
+		tr0_cnt0 = (SZ_64K - BIT(align_to));
+		if (len / tr0_cnt0 >= SZ_64K) {
+			dev_err(uc->ud->dev, "size %zu is not supported\n",
+				len);
+			return NULL;
+		}
+
+		tr0_cnt1 = len / tr0_cnt0;
+		tr1_cnt0 = len % tr0_cnt0;
+	}
+
+	d = udma_alloc_tr_desc(uc, tr_size, num_tr, DMA_MEM_TO_MEM);
+	if (!d)
+		return NULL;
+
+	d->dir = DMA_MEM_TO_MEM;
+	d->desc_idx = 0;
+	d->tr_idx = 0;
+	d->residue = len;
+
+	tr_req = d->hwdesc[0].tr_req_base;
+
+	cppi5_tr_init(&tr_req[0].flags, CPPI5_TR_TYPE15, false, true,
+		      CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+	cppi5_tr_csf_set(&tr_req[0].flags, CPPI5_TR_CSF_SUPR_EVT);
+
+	tr_req[0].addr = src;
+	tr_req[0].icnt0 = tr0_cnt0;
+	tr_req[0].icnt1 = tr0_cnt1;
+	tr_req[0].icnt2 = 1;
+	tr_req[0].icnt3 = 1;
+	tr_req[0].dim1 = tr0_cnt0;
+
+	tr_req[0].daddr = dest;
+	tr_req[0].dicnt0 = tr0_cnt0;
+	tr_req[0].dicnt1 = tr0_cnt1;
+	tr_req[0].dicnt2 = 1;
+	tr_req[0].dicnt3 = 1;
+	tr_req[0].ddim1 = tr0_cnt0;
+
+	if (num_tr == 2) {
+		cppi5_tr_init(&tr_req[1].flags, CPPI5_TR_TYPE15, false, true,
+			      CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+		cppi5_tr_csf_set(&tr_req[1].flags, CPPI5_TR_CSF_SUPR_EVT);
+
+		tr_req[1].addr = src + tr0_cnt1 * tr0_cnt0;
+		tr_req[1].icnt0 = tr1_cnt0;
+		tr_req[1].icnt1 = 1;
+		tr_req[1].icnt2 = 1;
+		tr_req[1].icnt3 = 1;
+
+		tr_req[1].daddr = dest + tr0_cnt1 * tr0_cnt0;
+		tr_req[1].dicnt0 = tr1_cnt0;
+		tr_req[1].dicnt1 = 1;
+		tr_req[1].dicnt2 = 1;
+		tr_req[1].dicnt3 = 1;
+	}
+
+	cppi5_tr_csf_set(&tr_req[num_tr - 1].flags, CPPI5_TR_CSF_EOP);
+
+	if (uc->config.metadata_size)
+		d->vd.tx.metadata_ops = &metadata_ops;
+
+	return vchan_tx_prep(&uc->vc, &d->vd, tx_flags);
+}
+
+static void udma_issue_pending(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&uc->vc.lock, flags);
+
+	/* If we have something pending and no active descriptor, then */
+	if (vchan_issue_pending(&uc->vc) && !uc->desc) {
+		/*
+		 * start a descriptor if the channel is NOT [marked as
+		 * terminating _and_ it is still running (teardown has not
+		 * completed yet)].
+		 */
+		if (!(uc->state == UDMA_CHAN_IS_TERMINATING &&
+		      udma_is_chan_running(uc)))
+			udma_start(uc);
+	}
+
+	spin_unlock_irqrestore(&uc->vc.lock, flags);
+}
+
+static enum dma_status udma_tx_status(struct dma_chan *chan,
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	enum dma_status ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&uc->vc.lock, flags);
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+
+	if (ret == DMA_IN_PROGRESS && udma_is_chan_paused(uc))
+		ret = DMA_PAUSED;
+
+	if (ret == DMA_COMPLETE || !txstate)
+		goto out;
+
+	if (uc->desc && uc->desc->vd.tx.cookie == cookie) {
+		u32 peer_bcnt = 0;
+		u32 bcnt = 0;
+		u32 residue = uc->desc->residue;
+		u32 delay = 0;
+
+		if (uc->desc->dir == DMA_MEM_TO_DEV) {
+			bcnt = udma_tchanrt_read(uc->tchan,
+						 UDMA_TCHAN_RT_SBCNT_REG);
+
+			if (uc->config.ep_type != PSIL_EP_NATIVE) {
+				peer_bcnt = udma_tchanrt_read(uc->tchan,
+						UDMA_TCHAN_RT_PEER_BCNT_REG);
+
+				if (bcnt > peer_bcnt)
+					delay = bcnt - peer_bcnt;
+			}
+		} else if (uc->desc->dir == DMA_DEV_TO_MEM) {
+			bcnt = udma_rchanrt_read(uc->rchan,
+						 UDMA_RCHAN_RT_BCNT_REG);
+
+			if (uc->config.ep_type != PSIL_EP_NATIVE) {
+				peer_bcnt = udma_rchanrt_read(uc->rchan,
+						UDMA_RCHAN_RT_PEER_BCNT_REG);
+
+				if (peer_bcnt > bcnt)
+					delay = peer_bcnt - bcnt;
+			}
+		} else {
+			bcnt = udma_tchanrt_read(uc->tchan,
+						 UDMA_TCHAN_RT_BCNT_REG);
+		}
+
+		bcnt -= uc->bcnt;
+		if (bcnt && !(bcnt % uc->desc->residue))
+			residue = 0;
+		else
+			residue -= bcnt % uc->desc->residue;
+
+		if (!residue && (uc->config.dir == DMA_DEV_TO_MEM || !delay)) {
+			ret = DMA_COMPLETE;
+			delay = 0;
+		}
+
+		dma_set_residue(txstate, residue);
+		dma_set_in_flight_bytes(txstate, delay);
+
+	} else {
+		ret = DMA_COMPLETE;
+	}
+
+out:
+	spin_unlock_irqrestore(&uc->vc.lock, flags);
+	return ret;
+}
+
+static int udma_pause(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+
+	if (!uc->desc)
+		return -EINVAL;
+
+	/* pause the channel */
+	switch (uc->desc->dir) {
+	case DMA_DEV_TO_MEM:
+		udma_rchanrt_update_bits(uc->rchan,
+					 UDMA_RCHAN_RT_PEER_RT_EN_REG,
+					 UDMA_PEER_RT_EN_PAUSE,
+					 UDMA_PEER_RT_EN_PAUSE);
+		break;
+	case DMA_MEM_TO_DEV:
+		udma_tchanrt_update_bits(uc->tchan,
+					 UDMA_TCHAN_RT_PEER_RT_EN_REG,
+					 UDMA_PEER_RT_EN_PAUSE,
+					 UDMA_PEER_RT_EN_PAUSE);
+		break;
+	case DMA_MEM_TO_MEM:
+		udma_tchanrt_update_bits(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+					 UDMA_CHAN_RT_CTL_PAUSE,
+					 UDMA_CHAN_RT_CTL_PAUSE);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int udma_resume(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+
+	if (!uc->desc)
+		return -EINVAL;
+
+	/* resume the channel */
+	switch (uc->desc->dir) {
+	case DMA_DEV_TO_MEM:
+		udma_rchanrt_update_bits(uc->rchan,
+					 UDMA_RCHAN_RT_PEER_RT_EN_REG,
+					 UDMA_PEER_RT_EN_PAUSE, 0);
+
+		break;
+	case DMA_MEM_TO_DEV:
+		udma_tchanrt_update_bits(uc->tchan,
+					 UDMA_TCHAN_RT_PEER_RT_EN_REG,
+					 UDMA_PEER_RT_EN_PAUSE, 0);
+		break;
+	case DMA_MEM_TO_MEM:
+		udma_tchanrt_update_bits(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+					 UDMA_CHAN_RT_CTL_PAUSE, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int udma_terminate_all(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&uc->vc.lock, flags);
+
+	if (udma_is_chan_running(uc))
+		udma_stop(uc);
+
+	if (uc->desc) {
+		uc->terminated_desc = uc->desc;
+		uc->desc = NULL;
+		uc->terminated_desc->terminated = true;
+		cancel_delayed_work(&uc->tx_drain.work);
+	}
+
+	uc->paused = false;
+
+	vchan_get_all_descriptors(&uc->vc, &head);
+	spin_unlock_irqrestore(&uc->vc.lock, flags);
+	vchan_dma_desc_free_list(&uc->vc, &head);
+
+	return 0;
+}
+
+static void udma_synchronize(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	unsigned long timeout = msecs_to_jiffies(1000);
+
+	vchan_synchronize(&uc->vc);
+
+	if (uc->state == UDMA_CHAN_IS_TERMINATING) {
+		timeout = wait_for_completion_timeout(&uc->teardown_completed,
+						      timeout);
+		if (!timeout) {
+			dev_warn(uc->ud->dev, "chan%d teardown timeout!\n",
+				 uc->id);
+			udma_dump_chan_stdata(uc);
+			udma_reset_chan(uc, true);
+		}
+	}
+
+	udma_reset_chan(uc, false);
+	if (udma_is_chan_running(uc))
+		dev_warn(uc->ud->dev, "chan%d refused to stop!\n", uc->id);
+
+	cancel_delayed_work_sync(&uc->tx_drain.work);
+	udma_reset_rings(uc);
+}
+
+static void udma_desc_pre_callback(struct virt_dma_chan *vc,
+				   struct virt_dma_desc *vd,
+				   struct dmaengine_result *result)
+{
+	struct udma_chan *uc = to_udma_chan(&vc->chan);
+	struct udma_desc *d;
+
+	if (!vd)
+		return;
+
+	d = to_udma_desc(&vd->tx);
+
+	if (d->metadata_size)
+		udma_fetch_epib(uc, d);
+
+	/* Provide residue information for the client */
+	if (result) {
+		void *desc_vaddr = udma_curr_cppi5_desc_vaddr(d, d->desc_idx);
+
+		if (cppi5_desc_get_type(desc_vaddr) ==
+		    CPPI5_INFO0_DESC_TYPE_VAL_HOST) {
+			result->residue = d->residue -
+					  cppi5_hdesc_get_pktlen(desc_vaddr);
+			if (result->residue)
+				result->result = DMA_TRANS_ABORTED;
+			else
+				result->result = DMA_TRANS_NOERROR;
+		} else {
+			result->residue = 0;
+			result->result = DMA_TRANS_NOERROR;
+		}
+	}
+}
+
+/*
+ * This tasklet handles the completion of a DMA descriptor by
+ * calling its callback and freeing it.
+ */
+static void udma_vchan_complete(unsigned long arg)
+{
+	struct virt_dma_chan *vc = (struct virt_dma_chan *)arg;
+	struct virt_dma_desc *vd, *_vd;
+	struct dmaengine_desc_callback cb;
+	LIST_HEAD(head);
+
+	spin_lock_irq(&vc->lock);
+	list_splice_tail_init(&vc->desc_completed, &head);
+	vd = vc->cyclic;
+	if (vd) {
+		vc->cyclic = NULL;
+		dmaengine_desc_get_callback(&vd->tx, &cb);
+	} else {
+		memset(&cb, 0, sizeof(cb));
+	}
+	spin_unlock_irq(&vc->lock);
+
+	udma_desc_pre_callback(vc, vd, NULL);
+	dmaengine_desc_callback_invoke(&cb, NULL);
+
+	list_for_each_entry_safe(vd, _vd, &head, node) {
+		struct dmaengine_result result;
+
+		dmaengine_desc_get_callback(&vd->tx, &cb);
+
+		list_del(&vd->node);
+
+		udma_desc_pre_callback(vc, vd, &result);
+		dmaengine_desc_callback_invoke(&cb, &result);
+
+		vchan_vdesc_fini(vd);
+	}
+}
+
+static void udma_free_chan_resources(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	struct udma_dev *ud = to_udma_dev(chan->device);
+
+	udma_terminate_all(chan);
+	if (uc->terminated_desc) {
+		udma_reset_chan(uc, false);
+		udma_reset_rings(uc);
+	}
+
+	cancel_delayed_work_sync(&uc->tx_drain.work);
+	destroy_delayed_work_on_stack(&uc->tx_drain.work);
+
+	if (uc->irq_num_ring > 0) {
+		free_irq(uc->irq_num_ring, uc);
+
+		uc->irq_num_ring = 0;
+	}
+	if (uc->irq_num_udma > 0) {
+		free_irq(uc->irq_num_udma, uc);
+
+		uc->irq_num_udma = 0;
+	}
+
+	/* Release PSI-L pairing */
+	if (uc->psil_paired) {
+		navss_psil_unpair(ud, uc->config.src_thread,
+				  uc->config.dst_thread);
+		uc->psil_paired = false;
+	}
+
+	vchan_free_chan_resources(&uc->vc);
+	tasklet_kill(&uc->vc.task);
+
+	udma_free_tx_resources(uc);
+	udma_free_rx_resources(uc);
+	udma_reset_uchan(uc);
+
+	if (uc->use_dma_pool) {
+		dma_pool_destroy(uc->hdesc_pool);
+		uc->use_dma_pool = false;
+	}
+}
+
+static struct platform_driver udma_driver;
+
+static bool udma_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct udma_chan_config *ucc;
+	struct psil_endpoint_config *ep_config;
+	struct udma_chan *uc;
+	struct udma_dev *ud;
+	u32 *args;
+
+	if (chan->device->dev->driver != &udma_driver.driver)
+		return false;
+
+	uc = to_udma_chan(chan);
+	ucc = &uc->config;
+	ud = uc->ud;
+	args = param;
+
+	ucc->remote_thread_id = args[0];
+
+	if (ucc->remote_thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)
+		ucc->dir = DMA_MEM_TO_DEV;
+	else
+		ucc->dir = DMA_DEV_TO_MEM;
+
+	ep_config = psil_get_ep_config(ucc->remote_thread_id);
+	if (IS_ERR(ep_config)) {
+		dev_err(ud->dev, "No configuration for psi-l thread 0x%04x\n",
+			ucc->remote_thread_id);
+		ucc->dir = DMA_MEM_TO_MEM;
+		ucc->remote_thread_id = -1;
+		return false;
+	}
+
+	ucc->pkt_mode = ep_config->pkt_mode;
+	ucc->channel_tpl = ep_config->channel_tpl;
+	ucc->notdpkt = ep_config->notdpkt;
+	ucc->ep_type = ep_config->ep_type;
+
+	if (ucc->ep_type != PSIL_EP_NATIVE) {
+		const struct udma_match_data *match_data = ud->match_data;
+
+		if (match_data->flags & UDMA_FLAG_PDMA_ACC32)
+			ucc->enable_acc32 = ep_config->pdma_acc32;
+		if (match_data->flags & UDMA_FLAG_PDMA_BURST)
+			ucc->enable_burst = ep_config->pdma_burst;
+	}
+
+	ucc->needs_epib = ep_config->needs_epib;
+	ucc->psd_size = ep_config->psd_size;
+	ucc->metadata_size =
+			(ucc->needs_epib ? CPPI5_INFO0_HDESC_EPIB_SIZE : 0) +
+			ucc->psd_size;
+
+	if (ucc->pkt_mode)
+		ucc->hdesc_size = ALIGN(sizeof(struct cppi5_host_desc_t) +
+				 ucc->metadata_size, ud->desc_align);
+
+	dev_dbg(ud->dev, "chan%d: Remote thread: 0x%04x (%s)\n", uc->id,
+		ucc->remote_thread_id, dmaengine_get_direction_text(ucc->dir));
+
+	return true;
+}
+
+static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	struct udma_dev *ud = ofdma->of_dma_data;
+	dma_cap_mask_t mask = ud->ddev.cap_mask;
+	struct dma_chan *chan;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	chan = __dma_request_channel(&mask, udma_dma_filter_fn,
+				     &dma_spec->args[0], ofdma->of_node);
+	if (!chan) {
+		dev_err(ud->dev, "get channel fail in %s.\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return chan;
+}
+
+static struct udma_match_data am654_main_data = {
+	.psil_base = 0x1000,
+	.enable_memcpy_support = true,
+	.statictr_z_mask = GENMASK(11, 0),
+	.rchan_oes_offset = 0x2000,
+	.tpl_levels = 2,
+	.level_start_idx = {
+		[0] = 8, /* Normal channels */
+		[1] = 0, /* High Throughput channels */
+	},
+};
+
+static struct udma_match_data am654_mcu_data = {
+	.psil_base = 0x6000,
+	.enable_memcpy_support = true, /* TEST: DMA domains */
+	.statictr_z_mask = GENMASK(11, 0),
+	.rchan_oes_offset = 0x2000,
+	.tpl_levels = 2,
+	.level_start_idx = {
+		[0] = 2, /* Normal channels */
+		[1] = 0, /* High Throughput channels */
+	},
+};
+
+static struct udma_match_data j721e_main_data = {
+	.psil_base = 0x1000,
+	.enable_memcpy_support = true,
+	.flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST,
+	.statictr_z_mask = GENMASK(23, 0),
+	.rchan_oes_offset = 0x400,
+	.tpl_levels = 3,
+	.level_start_idx = {
+		[0] = 16, /* Normal channels */
+		[1] = 4, /* High Throughput channels */
+		[2] = 0, /* Ultra High Throughput channels */
+	},
+};
+
+static struct udma_match_data j721e_mcu_data = {
+	.psil_base = 0x6000,
+	.enable_memcpy_support = false, /* MEM_TO_MEM is slow via MCU UDMA */
+	.flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST,
+	.statictr_z_mask = GENMASK(23, 0),
+	.rchan_oes_offset = 0x400,
+	.tpl_levels = 2,
+	.level_start_idx = {
+		[0] = 2, /* Normal channels */
+		[1] = 0, /* High Throughput channels */
+	},
+};
+
+static const struct of_device_id udma_of_match[] = {
+	{
+		.compatible = "ti,am654-navss-main-udmap",
+		.data = &am654_main_data,
+	},
+	{
+		.compatible = "ti,am654-navss-mcu-udmap",
+		.data = &am654_mcu_data,
+	}, {
+		.compatible = "ti,j721e-navss-main-udmap",
+		.data = &j721e_main_data,
+	}, {
+		.compatible = "ti,j721e-navss-mcu-udmap",
+		.data = &j721e_mcu_data,
+	},
+	{ /* Sentinel */ },
+};
+
+static int udma_get_mmrs(struct platform_device *pdev, struct udma_dev *ud)
+{
+	struct resource *res;
+	int i;
+
+	for (i = 0; i < MMR_LAST; i++) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						   mmr_names[i]);
+		ud->mmrs[i] = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(ud->mmrs[i]))
+			return PTR_ERR(ud->mmrs[i]);
+	}
+
+	return 0;
+}
+
+static int udma_setup_resources(struct udma_dev *ud)
+{
+	struct device *dev = ud->dev;
+	int ch_count, ret, i, j;
+	u32 cap2, cap3;
+	struct ti_sci_resource_desc *rm_desc;
+	struct ti_sci_resource *rm_res, irq_res;
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+	static const char * const range_names[] = { "ti,sci-rm-range-tchan",
+						    "ti,sci-rm-range-rchan",
+						    "ti,sci-rm-range-rflow" };
+
+	cap2 = udma_read(ud->mmrs[MMR_GCFG], 0x28);
+	cap3 = udma_read(ud->mmrs[MMR_GCFG], 0x2c);
+
+	ud->rflow_cnt = cap3 & 0x3fff;
+	ud->tchan_cnt = cap2 & 0x1ff;
+	ud->echan_cnt = (cap2 >> 9) & 0x1ff;
+	ud->rchan_cnt = (cap2 >> 18) & 0x1ff;
+	ch_count  = ud->tchan_cnt + ud->rchan_cnt;
+
+	ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt),
+					   sizeof(unsigned long), GFP_KERNEL);
+	ud->tchans = devm_kcalloc(dev, ud->tchan_cnt, sizeof(*ud->tchans),
+				  GFP_KERNEL);
+	ud->rchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rchan_cnt),
+					   sizeof(unsigned long), GFP_KERNEL);
+	ud->rchans = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rchans),
+				  GFP_KERNEL);
+	ud->rflow_gp_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rflow_cnt),
+					      sizeof(unsigned long),
+					      GFP_KERNEL);
+	ud->rflow_gp_map_allocated = devm_kcalloc(dev,
+						  BITS_TO_LONGS(ud->rflow_cnt),
+						  sizeof(unsigned long),
+						  GFP_KERNEL);
+	ud->rflow_in_use = devm_kcalloc(dev, BITS_TO_LONGS(ud->rflow_cnt),
+					sizeof(unsigned long),
+					GFP_KERNEL);
+	ud->rflows = devm_kcalloc(dev, ud->rflow_cnt, sizeof(*ud->rflows),
+				  GFP_KERNEL);
+
+	if (!ud->tchan_map || !ud->rchan_map || !ud->rflow_gp_map ||
+	    !ud->rflow_gp_map_allocated || !ud->tchans || !ud->rchans ||
+	    !ud->rflows || !ud->rflow_in_use)
+		return -ENOMEM;
+
+	/*
+	 * RX flows with the same Ids as RX channels are reserved to be used
+	 * as default flows if remote HW can't generate flow_ids. Those
+	 * RX flows can be requested only explicitly by id.
+	 */
+	bitmap_set(ud->rflow_gp_map_allocated, 0, ud->rchan_cnt);
+
+	/* by default no GP rflows are assigned to Linux */
+	bitmap_set(ud->rflow_gp_map, 0, ud->rflow_cnt);
+
+	/* Get resource ranges from tisci */
+	for (i = 0; i < RM_RANGE_LAST; i++)
+		tisci_rm->rm_ranges[i] =
+			devm_ti_sci_get_of_resource(tisci_rm->tisci, dev,
+						    tisci_rm->tisci_dev_id,
+						    (char *)range_names[i]);
+
+	/* tchan ranges */
+	rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN];
+	if (IS_ERR(rm_res)) {
+		bitmap_zero(ud->tchan_map, ud->tchan_cnt);
+	} else {
+		bitmap_fill(ud->tchan_map, ud->tchan_cnt);
+		for (i = 0; i < rm_res->sets; i++) {
+			rm_desc = &rm_res->desc[i];
+			bitmap_clear(ud->tchan_map, rm_desc->start,
+				     rm_desc->num);
+			dev_dbg(dev, "ti-sci-res: tchan: %d:%d\n",
+				rm_desc->start, rm_desc->num);
+		}
+	}
+	irq_res.sets = rm_res->sets;
+
+	/* rchan and matching default flow ranges */
+	rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN];
+	if (IS_ERR(rm_res)) {
+		bitmap_zero(ud->rchan_map, ud->rchan_cnt);
+	} else {
+		bitmap_fill(ud->rchan_map, ud->rchan_cnt);
+		for (i = 0; i < rm_res->sets; i++) {
+			rm_desc = &rm_res->desc[i];
+			bitmap_clear(ud->rchan_map, rm_desc->start,
+				     rm_desc->num);
+			dev_dbg(dev, "ti-sci-res: rchan: %d:%d\n",
+				rm_desc->start, rm_desc->num);
+		}
+	}
+
+	irq_res.sets += rm_res->sets;
+	irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL);
+	rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN];
+	for (i = 0; i < rm_res->sets; i++) {
+		irq_res.desc[i].start = rm_res->desc[i].start;
+		irq_res.desc[i].num = rm_res->desc[i].num;
+	}
+	rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN];
+	for (j = 0; j < rm_res->sets; j++, i++) {
+		irq_res.desc[i].start = rm_res->desc[j].start +
+					ud->match_data->rchan_oes_offset;
+		irq_res.desc[i].num = rm_res->desc[j].num;
+	}
+	ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res);
+	kfree(irq_res.desc);
+	if (ret) {
+		dev_err(ud->dev, "Failed to allocate MSI interrupts\n");
+		return ret;
+	}
+
+	/* GP rflow ranges */
+	rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW];
+	if (IS_ERR(rm_res)) {
+		/* all gp flows are assigned exclusively to Linux */
+		bitmap_clear(ud->rflow_gp_map, ud->rchan_cnt,
+			     ud->rflow_cnt - ud->rchan_cnt);
+	} else {
+		for (i = 0; i < rm_res->sets; i++) {
+			rm_desc = &rm_res->desc[i];
+			bitmap_clear(ud->rflow_gp_map, rm_desc->start,
+				     rm_desc->num);
+			dev_dbg(dev, "ti-sci-res: rflow: %d:%d\n",
+				rm_desc->start, rm_desc->num);
+		}
+	}
+
+	ch_count -= bitmap_weight(ud->tchan_map, ud->tchan_cnt);
+	ch_count -= bitmap_weight(ud->rchan_map, ud->rchan_cnt);
+	if (!ch_count)
+		return -ENODEV;
+
+	ud->channels = devm_kcalloc(dev, ch_count, sizeof(*ud->channels),
+				    GFP_KERNEL);
+	if (!ud->channels)
+		return -ENOMEM;
+
+	dev_info(dev, "Channels: %d (tchan: %u, rchan: %u, gp-rflow: %u)\n",
+		 ch_count,
+		 ud->tchan_cnt - bitmap_weight(ud->tchan_map, ud->tchan_cnt),
+		 ud->rchan_cnt - bitmap_weight(ud->rchan_map, ud->rchan_cnt),
+		 ud->rflow_cnt - bitmap_weight(ud->rflow_gp_map,
+					       ud->rflow_cnt));
+
+	return ch_count;
+}
+
+#define TI_UDMAC_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+static int udma_probe(struct platform_device *pdev)
+{
+	struct device_node *navss_node = pdev->dev.parent->of_node;
+	struct device *dev = &pdev->dev;
+	struct udma_dev *ud;
+	const struct of_device_id *match;
+	int i, ret;
+	int ch_count;
+
+	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (ret)
+		dev_err(dev, "failed to set dma mask stuff\n");
+
+	ud = devm_kzalloc(dev, sizeof(*ud), GFP_KERNEL);
+	if (!ud)
+		return -ENOMEM;
+
+	ret = udma_get_mmrs(pdev, ud);
+	if (ret)
+		return ret;
+
+	ud->tisci_rm.tisci = ti_sci_get_by_phandle(dev->of_node, "ti,sci");
+	if (IS_ERR(ud->tisci_rm.tisci))
+		return PTR_ERR(ud->tisci_rm.tisci);
+
+	ret = of_property_read_u32(dev->of_node, "ti,sci-dev-id",
+				   &ud->tisci_rm.tisci_dev_id);
+	if (ret) {
+		dev_err(dev, "ti,sci-dev-id read failure %d\n", ret);
+		return ret;
+	}
+	pdev->id = ud->tisci_rm.tisci_dev_id;
+
+	ret = of_property_read_u32(navss_node, "ti,sci-dev-id",
+				   &ud->tisci_rm.tisci_navss_dev_id);
+	if (ret) {
+		dev_err(dev, "NAVSS ti,sci-dev-id read failure %d\n", ret);
+		return ret;
+	}
+
+	ud->tisci_rm.tisci_udmap_ops = &ud->tisci_rm.tisci->ops.rm_udmap_ops;
+	ud->tisci_rm.tisci_psil_ops = &ud->tisci_rm.tisci->ops.rm_psil_ops;
+
+	ud->ringacc = of_k3_ringacc_get_by_phandle(dev->of_node, "ti,ringacc");
+	if (IS_ERR(ud->ringacc))
+		return PTR_ERR(ud->ringacc);
+
+	dev->msi_domain = of_msi_get_domain(dev, dev->of_node,
+					    DOMAIN_BUS_TI_SCI_INTA_MSI);
+	if (!dev->msi_domain) {
+		dev_err(dev, "Failed to get MSI domain\n");
+		return -EPROBE_DEFER;
+	}
+
+	match = of_match_node(udma_of_match, dev->of_node);
+	if (!match) {
+		dev_err(dev, "No compatible match found\n");
+		return -ENODEV;
+	}
+	ud->match_data = match->data;
+
+	dma_cap_set(DMA_SLAVE, ud->ddev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, ud->ddev.cap_mask);
+
+	ud->ddev.device_alloc_chan_resources = udma_alloc_chan_resources;
+	ud->ddev.device_config = udma_slave_config;
+	ud->ddev.device_prep_slave_sg = udma_prep_slave_sg;
+	ud->ddev.device_prep_dma_cyclic = udma_prep_dma_cyclic;
+	ud->ddev.device_issue_pending = udma_issue_pending;
+	ud->ddev.device_tx_status = udma_tx_status;
+	ud->ddev.device_pause = udma_pause;
+	ud->ddev.device_resume = udma_resume;
+	ud->ddev.device_terminate_all = udma_terminate_all;
+	ud->ddev.device_synchronize = udma_synchronize;
+
+	ud->ddev.device_free_chan_resources = udma_free_chan_resources;
+	ud->ddev.src_addr_widths = TI_UDMAC_BUSWIDTHS;
+	ud->ddev.dst_addr_widths = TI_UDMAC_BUSWIDTHS;
+	ud->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	ud->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	ud->ddev.copy_align = DMAENGINE_ALIGN_8_BYTES;
+	ud->ddev.desc_metadata_modes = DESC_METADATA_CLIENT |
+				       DESC_METADATA_ENGINE;
+	if (ud->match_data->enable_memcpy_support) {
+		dma_cap_set(DMA_MEMCPY, ud->ddev.cap_mask);
+		ud->ddev.device_prep_dma_memcpy = udma_prep_dma_memcpy;
+		ud->ddev.directions |= BIT(DMA_MEM_TO_MEM);
+	}
+
+	ud->ddev.dev = dev;
+	ud->dev = dev;
+	ud->psil_base = ud->match_data->psil_base;
+
+	INIT_LIST_HEAD(&ud->ddev.channels);
+	INIT_LIST_HEAD(&ud->desc_to_purge);
+
+	ch_count = udma_setup_resources(ud);
+	if (ch_count <= 0)
+		return ch_count;
+
+	spin_lock_init(&ud->lock);
+	INIT_WORK(&ud->purge_work, udma_purge_desc_work);
+
+	ud->desc_align = 64;
+	if (ud->desc_align < dma_get_cache_alignment())
+		ud->desc_align = dma_get_cache_alignment();
+
+	for (i = 0; i < ud->tchan_cnt; i++) {
+		struct udma_tchan *tchan = &ud->tchans[i];
+
+		tchan->id = i;
+		tchan->reg_rt = ud->mmrs[MMR_TCHANRT] + i * 0x1000;
+	}
+
+	for (i = 0; i < ud->rchan_cnt; i++) {
+		struct udma_rchan *rchan = &ud->rchans[i];
+
+		rchan->id = i;
+		rchan->reg_rt = ud->mmrs[MMR_RCHANRT] + i * 0x1000;
+	}
+
+	for (i = 0; i < ud->rflow_cnt; i++) {
+		struct udma_rflow *rflow = &ud->rflows[i];
+
+		rflow->id = i;
+	}
+
+	for (i = 0; i < ch_count; i++) {
+		struct udma_chan *uc = &ud->channels[i];
+
+		uc->ud = ud;
+		uc->vc.desc_free = udma_desc_free;
+		uc->id = i;
+		uc->tchan = NULL;
+		uc->rchan = NULL;
+		uc->config.remote_thread_id = -1;
+		uc->config.dir = DMA_MEM_TO_MEM;
+		uc->name = devm_kasprintf(dev, GFP_KERNEL, "%s chan%d",
+					  dev_name(dev), i);
+
+		vchan_init(&uc->vc, &ud->ddev);
+		/* Use custom vchan completion handling */
+		tasklet_init(&uc->vc.task, udma_vchan_complete,
+			     (unsigned long)&uc->vc);
+		init_completion(&uc->teardown_completed);
+	}
+
+	ret = dma_async_device_register(&ud->ddev);
+	if (ret) {
+		dev_err(dev, "failed to register slave DMA engine: %d\n", ret);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, ud);
+
+	ret = of_dma_controller_register(dev->of_node, udma_of_xlate, ud);
+	if (ret) {
+		dev_err(dev, "failed to register of_dma controller\n");
+		dma_async_device_unregister(&ud->ddev);
+	}
+
+	return ret;
+}
+
+static struct platform_driver udma_driver = {
+	.driver = {
+		.name	= "ti-udma",
+		.of_match_table = udma_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= udma_probe,
+};
+builtin_platform_driver(udma_driver);
+
+/* Private interfaces to UDMA */
+#include "k3-udma-private.c"
diff --git a/drivers/dma/ti/k3-udma.h b/drivers/dma/ti/k3-udma.h
new file mode 100644
index 000000000000..128d8744a435
--- /dev/null
+++ b/drivers/dma/ti/k3-udma.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ */
+
+#ifndef K3_UDMA_H_
+#define K3_UDMA_H_
+
+#include <linux/soc/ti/ti_sci_protocol.h>
+
+/* Global registers */
+#define UDMA_REV_REG			0x0
+#define UDMA_PERF_CTL_REG		0x4
+#define UDMA_EMU_CTL_REG		0x8
+#define UDMA_PSIL_TO_REG		0x10
+#define UDMA_UTC_CTL_REG		0x1c
+#define UDMA_CAP_REG(i)			(0x20 + ((i) * 4))
+#define UDMA_RX_FLOW_ID_FW_OES_REG	0x80
+#define UDMA_RX_FLOW_ID_FW_STATUS_REG	0x88
+
+/* TX chan RT regs */
+#define UDMA_TCHAN_RT_CTL_REG		0x0
+#define UDMA_TCHAN_RT_SWTRIG_REG	0x8
+#define UDMA_TCHAN_RT_STDATA_REG	0x80
+
+#define UDMA_TCHAN_RT_PEER_REG(i)	(0x200 + ((i) * 0x4))
+#define UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG	\
+	UDMA_TCHAN_RT_PEER_REG(0)	/* PSI-L: 0x400 */
+#define UDMA_TCHAN_RT_PEER_STATIC_TR_Z_REG	\
+	UDMA_TCHAN_RT_PEER_REG(1)	/* PSI-L: 0x401 */
+#define UDMA_TCHAN_RT_PEER_BCNT_REG		\
+	UDMA_TCHAN_RT_PEER_REG(4)	/* PSI-L: 0x404 */
+#define UDMA_TCHAN_RT_PEER_RT_EN_REG		\
+	UDMA_TCHAN_RT_PEER_REG(8)	/* PSI-L: 0x408 */
+
+#define UDMA_TCHAN_RT_PCNT_REG		0x400
+#define UDMA_TCHAN_RT_BCNT_REG		0x408
+#define UDMA_TCHAN_RT_SBCNT_REG		0x410
+
+/* RX chan RT regs */
+#define UDMA_RCHAN_RT_CTL_REG		0x0
+#define UDMA_RCHAN_RT_SWTRIG_REG	0x8
+#define UDMA_RCHAN_RT_STDATA_REG	0x80
+
+#define UDMA_RCHAN_RT_PEER_REG(i)	(0x200 + ((i) * 0x4))
+#define UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG	\
+	UDMA_RCHAN_RT_PEER_REG(0)	/* PSI-L: 0x400 */
+#define UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG	\
+	UDMA_RCHAN_RT_PEER_REG(1)	/* PSI-L: 0x401 */
+#define UDMA_RCHAN_RT_PEER_BCNT_REG		\
+	UDMA_RCHAN_RT_PEER_REG(4)	/* PSI-L: 0x404 */
+#define UDMA_RCHAN_RT_PEER_RT_EN_REG		\
+	UDMA_RCHAN_RT_PEER_REG(8)	/* PSI-L: 0x408 */
+
+#define UDMA_RCHAN_RT_PCNT_REG		0x400
+#define UDMA_RCHAN_RT_BCNT_REG		0x408
+#define UDMA_RCHAN_RT_SBCNT_REG		0x410
+
+/* UDMA_TCHAN_RT_CTL_REG/UDMA_RCHAN_RT_CTL_REG */
+#define UDMA_CHAN_RT_CTL_EN		BIT(31)
+#define UDMA_CHAN_RT_CTL_TDOWN		BIT(30)
+#define UDMA_CHAN_RT_CTL_PAUSE		BIT(29)
+#define UDMA_CHAN_RT_CTL_FTDOWN		BIT(28)
+#define UDMA_CHAN_RT_CTL_ERROR		BIT(0)
+
+/* UDMA_TCHAN_RT_PEER_RT_EN_REG/UDMA_RCHAN_RT_PEER_RT_EN_REG (PSI-L: 0x408) */
+#define UDMA_PEER_RT_EN_ENABLE		BIT(31)
+#define UDMA_PEER_RT_EN_TEARDOWN	BIT(30)
+#define UDMA_PEER_RT_EN_PAUSE		BIT(29)
+#define UDMA_PEER_RT_EN_FLUSH		BIT(28)
+#define UDMA_PEER_RT_EN_IDLE		BIT(1)
+
+/*
+ * UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG /
+ * UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG
+ */
+#define PDMA_STATIC_TR_X_MASK		GENMASK(26, 24)
+#define PDMA_STATIC_TR_X_SHIFT		(24)
+#define PDMA_STATIC_TR_Y_MASK		GENMASK(11, 0)
+#define PDMA_STATIC_TR_Y_SHIFT		(0)
+
+#define PDMA_STATIC_TR_Y(x)	\
+	(((x) << PDMA_STATIC_TR_Y_SHIFT) & PDMA_STATIC_TR_Y_MASK)
+#define PDMA_STATIC_TR_X(x)	\
+	(((x) << PDMA_STATIC_TR_X_SHIFT) & PDMA_STATIC_TR_X_MASK)
+
+#define PDMA_STATIC_TR_XY_ACC32		BIT(30)
+#define PDMA_STATIC_TR_XY_BURST		BIT(31)
+
+/*
+ * UDMA_TCHAN_RT_PEER_STATIC_TR_Z_REG /
+ * UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG
+ */
+#define PDMA_STATIC_TR_Z(x, mask)	((x) & (mask))
+
+struct udma_dev;
+struct udma_tchan;
+struct udma_rchan;
+struct udma_rflow;
+
+enum udma_rm_range {
+	RM_RANGE_TCHAN = 0,
+	RM_RANGE_RCHAN,
+	RM_RANGE_RFLOW,
+	RM_RANGE_LAST,
+};
+
+struct udma_tisci_rm {
+	const struct ti_sci_handle *tisci;
+	const struct ti_sci_rm_udmap_ops *tisci_udmap_ops;
+	u32  tisci_dev_id;
+
+	/* tisci information for PSI-L thread pairing/unpairing */
+	const struct ti_sci_rm_psil_ops *tisci_psil_ops;
+	u32  tisci_navss_dev_id;
+
+	struct ti_sci_resource *rm_ranges[RM_RANGE_LAST];
+};
+
+/* Direct access to UDMA low lever resources for the glue layer */
+int xudma_navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread);
+int xudma_navss_psil_unpair(struct udma_dev *ud, u32 src_thread,
+			    u32 dst_thread);
+
+struct udma_dev *of_xudma_dev_get(struct device_node *np, const char *property);
+void xudma_dev_put(struct udma_dev *ud);
+u32 xudma_dev_get_psil_base(struct udma_dev *ud);
+struct udma_tisci_rm *xudma_dev_get_tisci_rm(struct udma_dev *ud);
+
+int xudma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt);
+int xudma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt);
+
+struct udma_tchan *xudma_tchan_get(struct udma_dev *ud, int id);
+struct udma_rchan *xudma_rchan_get(struct udma_dev *ud, int id);
+struct udma_rflow *xudma_rflow_get(struct udma_dev *ud, int id);
+
+void xudma_tchan_put(struct udma_dev *ud, struct udma_tchan *p);
+void xudma_rchan_put(struct udma_dev *ud, struct udma_rchan *p);
+void xudma_rflow_put(struct udma_dev *ud, struct udma_rflow *p);
+
+int xudma_tchan_get_id(struct udma_tchan *p);
+int xudma_rchan_get_id(struct udma_rchan *p);
+int xudma_rflow_get_id(struct udma_rflow *p);
+
+u32 xudma_tchanrt_read(struct udma_tchan *tchan, int reg);
+void xudma_tchanrt_write(struct udma_tchan *tchan, int reg, u32 val);
+u32 xudma_rchanrt_read(struct udma_rchan *rchan, int reg);
+void xudma_rchanrt_write(struct udma_rchan *rchan, int reg, u32 val);
+bool xudma_rflow_is_gp(struct udma_dev *ud, int id);
+
+#endif /* K3_UDMA_H_ */
diff --git a/drivers/dma/uniphier-mdmac.c b/drivers/dma/uniphier-mdmac.c
index fde54687856b..21b8f1131d55 100644
--- a/drivers/dma/uniphier-mdmac.c
+++ b/drivers/dma/uniphier-mdmac.c
@@ -382,7 +382,6 @@ static int uniphier_mdmac_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct uniphier_mdmac_device *mdev;
 	struct dma_device *ddev;
-	struct resource *res;
 	int nr_chans, ret, i;
 
 	nr_chans = platform_irq_count(pdev);
@@ -398,8 +397,7 @@ static int uniphier_mdmac_probe(struct platform_device *pdev)
 	if (!mdev)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	mdev->reg_base = devm_ioremap_resource(dev, res);
+	mdev->reg_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(mdev->reg_base))
 		return PTR_ERR(mdev->reg_base);
 
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index ec4adf4260a0..23e33a85f033 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -104,9 +104,8 @@ static void vchan_complete(unsigned long arg)
 		dmaengine_desc_get_callback(&vd->tx, &cb);
 
 		list_del(&vd->node);
-		vchan_vdesc_fini(vd);
-
 		dmaengine_desc_callback_invoke(&cb, &vd->tx_result);
+		vchan_vdesc_fini(vd);
 	}
 }
 
@@ -115,13 +114,8 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
 	struct virt_dma_desc *vd, *_vd;
 
 	list_for_each_entry_safe(vd, _vd, head, node) {
-		if (dmaengine_desc_test_reuse(&vd->tx)) {
-			list_move_tail(&vd->node, &vc->desc_allocated);
-		} else {
-			dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
-			list_del(&vd->node);
-			vc->desc_free(vd);
-		}
+		list_del(&vd->node);
+		vchan_vdesc_fini(vd);
 	}
 }
 EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list);
@@ -135,6 +129,7 @@ void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev)
 	INIT_LIST_HEAD(&vc->desc_submitted);
 	INIT_LIST_HEAD(&vc->desc_issued);
 	INIT_LIST_HEAD(&vc->desc_completed);
+	INIT_LIST_HEAD(&vc->desc_terminated);
 
 	tasklet_init(&vc->task, vchan_complete, (unsigned long)vc);
 
diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h
index ab158bac03a7..e9f5250fbe4d 100644
--- a/drivers/dma/virt-dma.h
+++ b/drivers/dma/virt-dma.h
@@ -31,9 +31,9 @@ struct virt_dma_chan {
 	struct list_head desc_submitted;
 	struct list_head desc_issued;
 	struct list_head desc_completed;
+	struct list_head desc_terminated;
 
 	struct virt_dma_desc *cyclic;
-	struct virt_dma_desc *vd_terminated;
 };
 
 static inline struct virt_dma_chan *to_virt_chan(struct dma_chan *chan)
@@ -113,10 +113,15 @@ static inline void vchan_vdesc_fini(struct virt_dma_desc *vd)
 {
 	struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
 
-	if (dmaengine_desc_test_reuse(&vd->tx))
+	if (dmaengine_desc_test_reuse(&vd->tx)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&vc->lock, flags);
 		list_add(&vd->node, &vc->desc_allocated);
-	else
+		spin_unlock_irqrestore(&vc->lock, flags);
+	} else {
 		vc->desc_free(vd);
+	}
 }
 
 /**
@@ -141,11 +146,8 @@ static inline void vchan_terminate_vdesc(struct virt_dma_desc *vd)
 {
 	struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
 
-	/* free up stuck descriptor */
-	if (vc->vd_terminated)
-		vchan_vdesc_fini(vc->vd_terminated);
+	list_add_tail(&vd->node, &vc->desc_terminated);
 
-	vc->vd_terminated = vd;
 	if (vc->cyclic == vd)
 		vc->cyclic = NULL;
 }
@@ -179,6 +181,7 @@ static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc,
 	list_splice_tail_init(&vc->desc_submitted, head);
 	list_splice_tail_init(&vc->desc_issued, head);
 	list_splice_tail_init(&vc->desc_completed, head);
+	list_splice_tail_init(&vc->desc_terminated, head);
 }
 
 static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
@@ -207,16 +210,18 @@ static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
  */
 static inline void vchan_synchronize(struct virt_dma_chan *vc)
 {
+	LIST_HEAD(head);
 	unsigned long flags;
 
 	tasklet_kill(&vc->task);
 
 	spin_lock_irqsave(&vc->lock, flags);
-	if (vc->vd_terminated) {
-		vchan_vdesc_fini(vc->vd_terminated);
-		vc->vd_terminated = NULL;
-	}
+
+	list_splice_tail_init(&vc->desc_terminated, &head);
+
 	spin_unlock_irqrestore(&vc->lock, flags);
+
+	vchan_dma_desc_free_list(vc, &head);
 }
 
 #endif
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 5d56f1e4d332..a9c5d5cc9f2b 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -25,6 +25,12 @@
  * The AXI CDMA, is a soft IP, which provides high-bandwidth Direct Memory
  * Access (DMA) between a memory-mapped source address and a memory-mapped
  * destination address.
+ *
+ * The AXI Multichannel Direct Memory Access (AXI MCDMA) core is a soft
+ * Xilinx IP that provides high-bandwidth direct memory access between
+ * memory and AXI4-Stream target peripherals. It provides scatter gather
+ * (SG) interface with multiple channels independent configuration support.
+ *
  */
 
 #include <linux/bitops.h>
@@ -173,18 +179,6 @@
 #define XILINX_DMA_NUM_DESCS		255
 #define XILINX_DMA_NUM_APP_WORDS	5
 
-/* Multi-Channel DMA Descriptor offsets*/
-#define XILINX_DMA_MCRX_CDESC(x)	(0x40 + (x-1) * 0x20)
-#define XILINX_DMA_MCRX_TDESC(x)	(0x48 + (x-1) * 0x20)
-
-/* Multi-Channel DMA Masks/Shifts */
-#define XILINX_DMA_BD_HSIZE_MASK	GENMASK(15, 0)
-#define XILINX_DMA_BD_STRIDE_MASK	GENMASK(15, 0)
-#define XILINX_DMA_BD_VSIZE_MASK	GENMASK(31, 19)
-#define XILINX_DMA_BD_TDEST_MASK	GENMASK(4, 0)
-#define XILINX_DMA_BD_STRIDE_SHIFT	0
-#define XILINX_DMA_BD_VSIZE_SHIFT	19
-
 /* AXI CDMA Specific Registers/Offsets */
 #define XILINX_CDMA_REG_SRCADDR		0x18
 #define XILINX_CDMA_REG_DSTADDR		0x20
@@ -194,6 +188,31 @@
 
 #define xilinx_prep_dma_addr_t(addr)	\
 	((dma_addr_t)((u64)addr##_##msb << 32 | (addr)))
+
+/* AXI MCDMA Specific Registers/Offsets */
+#define XILINX_MCDMA_MM2S_CTRL_OFFSET		0x0000
+#define XILINX_MCDMA_S2MM_CTRL_OFFSET		0x0500
+#define XILINX_MCDMA_CHEN_OFFSET		0x0008
+#define XILINX_MCDMA_CH_ERR_OFFSET		0x0010
+#define XILINX_MCDMA_RXINT_SER_OFFSET		0x0020
+#define XILINX_MCDMA_TXINT_SER_OFFSET		0x0028
+#define XILINX_MCDMA_CHAN_CR_OFFSET(x)		(0x40 + (x) * 0x40)
+#define XILINX_MCDMA_CHAN_SR_OFFSET(x)		(0x44 + (x) * 0x40)
+#define XILINX_MCDMA_CHAN_CDESC_OFFSET(x)	(0x48 + (x) * 0x40)
+#define XILINX_MCDMA_CHAN_TDESC_OFFSET(x)	(0x50 + (x) * 0x40)
+
+/* AXI MCDMA Specific Masks/Shifts */
+#define XILINX_MCDMA_COALESCE_SHIFT		16
+#define XILINX_MCDMA_COALESCE_MAX		24
+#define XILINX_MCDMA_IRQ_ALL_MASK		GENMASK(7, 5)
+#define XILINX_MCDMA_COALESCE_MASK		GENMASK(23, 16)
+#define XILINX_MCDMA_CR_RUNSTOP_MASK		BIT(0)
+#define XILINX_MCDMA_IRQ_IOC_MASK		BIT(5)
+#define XILINX_MCDMA_IRQ_DELAY_MASK		BIT(6)
+#define XILINX_MCDMA_IRQ_ERR_MASK		BIT(7)
+#define XILINX_MCDMA_BD_EOP			BIT(30)
+#define XILINX_MCDMA_BD_SOP			BIT(31)
+
 /**
  * struct xilinx_vdma_desc_hw - Hardware Descriptor
  * @next_desc: Next Descriptor Pointer @0x00
@@ -221,8 +240,8 @@ struct xilinx_vdma_desc_hw {
  * @next_desc_msb: MSB of Next Descriptor Pointer @0x04
  * @buf_addr: Buffer address @0x08
  * @buf_addr_msb: MSB of Buffer address @0x0C
- * @mcdma_control: Control field for mcdma @0x10
- * @vsize_stride: Vsize and Stride field for mcdma @0x14
+ * @reserved1: Reserved @0x10
+ * @reserved2: Reserved @0x14
  * @control: Control field @0x18
  * @status: Status field @0x1C
  * @app: APP Fields @0x20 - 0x30
@@ -232,14 +251,38 @@ struct xilinx_axidma_desc_hw {
 	u32 next_desc_msb;
 	u32 buf_addr;
 	u32 buf_addr_msb;
-	u32 mcdma_control;
-	u32 vsize_stride;
+	u32 reserved1;
+	u32 reserved2;
 	u32 control;
 	u32 status;
 	u32 app[XILINX_DMA_NUM_APP_WORDS];
 } __aligned(64);
 
 /**
+ * struct xilinx_aximcdma_desc_hw - Hardware Descriptor for AXI MCDMA
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @next_desc_msb: MSB of Next Descriptor Pointer @0x04
+ * @buf_addr: Buffer address @0x08
+ * @buf_addr_msb: MSB of Buffer address @0x0C
+ * @rsvd: Reserved field @0x10
+ * @control: Control Information field @0x14
+ * @status: Status field @0x18
+ * @sideband_status: Status of sideband signals @0x1C
+ * @app: APP Fields @0x20 - 0x30
+ */
+struct xilinx_aximcdma_desc_hw {
+	u32 next_desc;
+	u32 next_desc_msb;
+	u32 buf_addr;
+	u32 buf_addr_msb;
+	u32 rsvd;
+	u32 control;
+	u32 status;
+	u32 sideband_status;
+	u32 app[XILINX_DMA_NUM_APP_WORDS];
+} __aligned(64);
+
+/**
  * struct xilinx_cdma_desc_hw - Hardware Descriptor
  * @next_desc: Next Descriptor Pointer @0x00
  * @next_desc_msb: Next Descriptor Pointer MSB @0x04
@@ -286,6 +329,18 @@ struct xilinx_axidma_tx_segment {
 } __aligned(64);
 
 /**
+ * struct xilinx_aximcdma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_aximcdma_tx_segment {
+	struct xilinx_aximcdma_desc_hw hw;
+	struct list_head node;
+	dma_addr_t phys;
+} __aligned(64);
+
+/**
  * struct xilinx_cdma_tx_segment - Descriptor segment
  * @hw: Hardware descriptor
  * @node: Node in the descriptor segments list
@@ -303,12 +358,16 @@ struct xilinx_cdma_tx_segment {
  * @segments: TX segments list
  * @node: Node in the channel descriptors list
  * @cyclic: Check for cyclic transfers.
+ * @err: Whether the descriptor has an error.
+ * @residue: Residue of the completed descriptor
  */
 struct xilinx_dma_tx_descriptor {
 	struct dma_async_tx_descriptor async_tx;
 	struct list_head segments;
 	struct list_head node;
 	bool cyclic;
+	bool err;
+	u32 residue;
 };
 
 /**
@@ -339,8 +398,8 @@ struct xilinx_dma_tx_descriptor {
  * @desc_pendingcount: Descriptor pending count
  * @ext_addr: Indicates 64 bit addressing is supported by dma channel
  * @desc_submitcount: Descriptor h/w submitted count
- * @residue: Residue for AXI DMA
  * @seg_v: Statically allocated segments base
+ * @seg_mv: Statically allocated segments base for MCDMA
  * @seg_p: Physical allocated segments base
  * @cyclic_seg_v: Statically allocated segment base for cyclic transfers
  * @cyclic_seg_p: Physical allocated segments base for cyclic dma
@@ -376,8 +435,8 @@ struct xilinx_dma_chan {
 	u32 desc_pendingcount;
 	bool ext_addr;
 	u32 desc_submitcount;
-	u32 residue;
 	struct xilinx_axidma_tx_segment *seg_v;
+	struct xilinx_aximcdma_tx_segment *seg_mv;
 	dma_addr_t seg_p;
 	struct xilinx_axidma_tx_segment *cyclic_seg_v;
 	dma_addr_t cyclic_seg_p;
@@ -393,12 +452,14 @@ struct xilinx_dma_chan {
  * @XDMA_TYPE_AXIDMA: Axi dma ip.
  * @XDMA_TYPE_CDMA: Axi cdma ip.
  * @XDMA_TYPE_VDMA: Axi vdma ip.
+ * @XDMA_TYPE_AXIMCDMA: Axi MCDMA ip.
  *
  */
 enum xdma_ip_type {
 	XDMA_TYPE_AXIDMA = 0,
 	XDMA_TYPE_CDMA,
 	XDMA_TYPE_VDMA,
+	XDMA_TYPE_AXIMCDMA
 };
 
 struct xilinx_dma_config {
@@ -406,6 +467,7 @@ struct xilinx_dma_config {
 	int (*clk_init)(struct platform_device *pdev, struct clk **axi_clk,
 			struct clk **tx_clk, struct clk **txs_clk,
 			struct clk **rx_clk, struct clk **rxs_clk);
+	irqreturn_t (*irq_handler)(int irq, void *data);
 };
 
 /**
@@ -414,7 +476,6 @@ struct xilinx_dma_config {
  * @dev: Device Structure
  * @common: DMA device structure
  * @chan: Driver specific DMA channel
- * @mcdma: Specifies whether Multi-Channel is present or not
  * @flush_on_fsync: Flush on frame sync
  * @ext_addr: Indicates 64 bit addressing is supported by dma device
  * @pdev: Platform device structure pointer
@@ -427,13 +488,13 @@ struct xilinx_dma_config {
  * @nr_channels: Number of channels DMA device supports
  * @chan_id: DMA channel identifier
  * @max_buffer_len: Max buffer length
+ * @s2mm_index: S2MM channel index
  */
 struct xilinx_dma_device {
 	void __iomem *regs;
 	struct device *dev;
 	struct dma_device common;
 	struct xilinx_dma_chan *chan[XILINX_DMA_MAX_CHANS_PER_DEVICE];
-	bool mcdma;
 	u32 flush_on_fsync;
 	bool ext_addr;
 	struct platform_device  *pdev;
@@ -446,6 +507,7 @@ struct xilinx_dma_device {
 	u32 nr_channels;
 	u32 chan_id;
 	u32 max_buffer_len;
+	u32 s2mm_index;
 };
 
 /* Macros */
@@ -546,6 +608,18 @@ static inline void xilinx_axidma_buf(struct xilinx_dma_chan *chan,
 	}
 }
 
+static inline void xilinx_aximcdma_buf(struct xilinx_dma_chan *chan,
+				       struct xilinx_aximcdma_desc_hw *hw,
+				       dma_addr_t buf_addr, size_t sg_used)
+{
+	if (chan->ext_addr) {
+		hw->buf_addr = lower_32_bits(buf_addr + sg_used);
+		hw->buf_addr_msb = upper_32_bits(buf_addr + sg_used);
+	} else {
+		hw->buf_addr = buf_addr + sg_used;
+	}
+}
+
 /* -----------------------------------------------------------------------------
  * Descriptors and segments alloc and free
  */
@@ -613,6 +687,33 @@ xilinx_axidma_alloc_tx_segment(struct xilinx_dma_chan *chan)
 	}
 	spin_unlock_irqrestore(&chan->lock, flags);
 
+	if (!segment)
+		dev_dbg(chan->dev, "Could not find free tx segment\n");
+
+	return segment;
+}
+
+/**
+ * xilinx_aximcdma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_aximcdma_tx_segment *
+xilinx_aximcdma_alloc_tx_segment(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_aximcdma_tx_segment *segment = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (!list_empty(&chan->free_seg_list)) {
+		segment = list_first_entry(&chan->free_seg_list,
+					   struct xilinx_aximcdma_tx_segment,
+					   node);
+		list_del(&segment->node);
+	}
+	spin_unlock_irqrestore(&chan->lock, flags);
+
 	return segment;
 }
 
@@ -627,6 +728,17 @@ static void xilinx_dma_clean_hw_desc(struct xilinx_axidma_desc_hw *hw)
 	hw->next_desc_msb = next_desc_msb;
 }
 
+static void xilinx_mcdma_clean_hw_desc(struct xilinx_aximcdma_desc_hw *hw)
+{
+	u32 next_desc = hw->next_desc;
+	u32 next_desc_msb = hw->next_desc_msb;
+
+	memset(hw, 0, sizeof(struct xilinx_aximcdma_desc_hw));
+
+	hw->next_desc = next_desc;
+	hw->next_desc_msb = next_desc_msb;
+}
+
 /**
  * xilinx_dma_free_tx_segment - Free transaction segment
  * @chan: Driver specific DMA channel
@@ -641,6 +753,20 @@ static void xilinx_dma_free_tx_segment(struct xilinx_dma_chan *chan,
 }
 
 /**
+ * xilinx_mcdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_mcdma_free_tx_segment(struct xilinx_dma_chan *chan,
+					 struct xilinx_aximcdma_tx_segment *
+					 segment)
+{
+	xilinx_mcdma_clean_hw_desc(&segment->hw);
+
+	list_add_tail(&segment->node, &chan->free_seg_list);
+}
+
+/**
  * xilinx_cdma_free_tx_segment - Free transaction segment
  * @chan: Driver specific DMA channel
  * @segment: DMA transaction segment
@@ -694,6 +820,7 @@ xilinx_dma_free_tx_descriptor(struct xilinx_dma_chan *chan,
 	struct xilinx_vdma_tx_segment *segment, *next;
 	struct xilinx_cdma_tx_segment *cdma_segment, *cdma_next;
 	struct xilinx_axidma_tx_segment *axidma_segment, *axidma_next;
+	struct xilinx_aximcdma_tx_segment *aximcdma_segment, *aximcdma_next;
 
 	if (!desc)
 		return;
@@ -709,12 +836,18 @@ xilinx_dma_free_tx_descriptor(struct xilinx_dma_chan *chan,
 			list_del(&cdma_segment->node);
 			xilinx_cdma_free_tx_segment(chan, cdma_segment);
 		}
-	} else {
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
 		list_for_each_entry_safe(axidma_segment, axidma_next,
 					 &desc->segments, node) {
 			list_del(&axidma_segment->node);
 			xilinx_dma_free_tx_segment(chan, axidma_segment);
 		}
+	} else {
+		list_for_each_entry_safe(aximcdma_segment, aximcdma_next,
+					 &desc->segments, node) {
+			list_del(&aximcdma_segment->node);
+			xilinx_mcdma_free_tx_segment(chan, aximcdma_segment);
+		}
 	}
 
 	kfree(desc);
@@ -783,10 +916,61 @@ static void xilinx_dma_free_chan_resources(struct dma_chan *dchan)
 				  chan->cyclic_seg_v, chan->cyclic_seg_p);
 	}
 
-	if (chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA) {
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+		spin_lock_irqsave(&chan->lock, flags);
+		INIT_LIST_HEAD(&chan->free_seg_list);
+		spin_unlock_irqrestore(&chan->lock, flags);
+
+		/* Free memory that is allocated for BD */
+		dma_free_coherent(chan->dev, sizeof(*chan->seg_mv) *
+				  XILINX_DMA_NUM_DESCS, chan->seg_mv,
+				  chan->seg_p);
+	}
+
+	if (chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA &&
+	    chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIMCDMA) {
 		dma_pool_destroy(chan->desc_pool);
 		chan->desc_pool = NULL;
 	}
+
+}
+
+/**
+ * xilinx_dma_get_residue - Compute residue for a given descriptor
+ * @chan: Driver specific dma channel
+ * @desc: dma transaction descriptor
+ *
+ * Return: The number of residue bytes for the descriptor.
+ */
+static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan,
+				  struct xilinx_dma_tx_descriptor *desc)
+{
+	struct xilinx_cdma_tx_segment *cdma_seg;
+	struct xilinx_axidma_tx_segment *axidma_seg;
+	struct xilinx_cdma_desc_hw *cdma_hw;
+	struct xilinx_axidma_desc_hw *axidma_hw;
+	struct list_head *entry;
+	u32 residue = 0;
+
+	list_for_each(entry, &desc->segments) {
+		if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+			cdma_seg = list_entry(entry,
+					      struct xilinx_cdma_tx_segment,
+					      node);
+			cdma_hw = &cdma_seg->hw;
+			residue += (cdma_hw->control - cdma_hw->status) &
+				   chan->xdev->max_buffer_len;
+		} else {
+			axidma_seg = list_entry(entry,
+						struct xilinx_axidma_tx_segment,
+						node);
+			axidma_hw = &axidma_seg->hw;
+			residue += (axidma_hw->control - axidma_hw->status) &
+				   chan->xdev->max_buffer_len;
+		}
+	}
+
+	return residue;
 }
 
 /**
@@ -823,7 +1007,7 @@ static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan)
 	spin_lock_irqsave(&chan->lock, flags);
 
 	list_for_each_entry_safe(desc, next, &chan->done_list, node) {
-		struct dmaengine_desc_callback cb;
+		struct dmaengine_result result;
 
 		if (desc->cyclic) {
 			xilinx_dma_chan_handle_cyclic(chan, desc, &flags);
@@ -833,14 +1017,22 @@ static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan)
 		/* Remove from the list of running transactions */
 		list_del(&desc->node);
 
-		/* Run the link descriptor callback function */
-		dmaengine_desc_get_callback(&desc->async_tx, &cb);
-		if (dmaengine_desc_callback_valid(&cb)) {
-			spin_unlock_irqrestore(&chan->lock, flags);
-			dmaengine_desc_callback_invoke(&cb, NULL);
-			spin_lock_irqsave(&chan->lock, flags);
+		if (unlikely(desc->err)) {
+			if (chan->direction == DMA_DEV_TO_MEM)
+				result.result = DMA_TRANS_READ_FAILED;
+			else
+				result.result = DMA_TRANS_WRITE_FAILED;
+		} else {
+			result.result = DMA_TRANS_NOERROR;
 		}
 
+		result.residue = desc->residue;
+
+		/* Run the link descriptor callback function */
+		spin_unlock_irqrestore(&chan->lock, flags);
+		dmaengine_desc_get_callback_invoke(&desc->async_tx, &result);
+		spin_lock_irqsave(&chan->lock, flags);
+
 		/* Run any dependencies, then free the descriptor */
 		dma_run_dependencies(&desc->async_tx);
 		xilinx_dma_free_tx_descriptor(chan, desc);
@@ -922,6 +1114,30 @@ static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan)
 			list_add_tail(&chan->seg_v[i].node,
 				      &chan->free_seg_list);
 		}
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+		/* Allocate the buffer descriptors. */
+		chan->seg_mv = dma_alloc_coherent(chan->dev,
+						  sizeof(*chan->seg_mv) *
+						  XILINX_DMA_NUM_DESCS,
+						  &chan->seg_p, GFP_KERNEL);
+		if (!chan->seg_mv) {
+			dev_err(chan->dev,
+				"unable to allocate channel %d descriptors\n",
+				chan->id);
+			return -ENOMEM;
+		}
+		for (i = 0; i < XILINX_DMA_NUM_DESCS; i++) {
+			chan->seg_mv[i].hw.next_desc =
+			lower_32_bits(chan->seg_p + sizeof(*chan->seg_mv) *
+				((i + 1) % XILINX_DMA_NUM_DESCS));
+			chan->seg_mv[i].hw.next_desc_msb =
+			upper_32_bits(chan->seg_p + sizeof(*chan->seg_mv) *
+				((i + 1) % XILINX_DMA_NUM_DESCS));
+			chan->seg_mv[i].phys = chan->seg_p +
+				sizeof(*chan->seg_v) * i;
+			list_add_tail(&chan->seg_mv[i].node,
+				      &chan->free_seg_list);
+		}
 	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
 		chan->desc_pool = dma_pool_create("xilinx_cdma_desc_pool",
 				   chan->dev,
@@ -937,7 +1153,8 @@ static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan)
 	}
 
 	if (!chan->desc_pool &&
-	    (chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA)) {
+	    ((chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA) &&
+		chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIMCDMA)) {
 		dev_err(chan->dev,
 			"unable to allocate channel %d descriptor pool\n",
 			chan->id);
@@ -1003,8 +1220,6 @@ static enum dma_status xilinx_dma_tx_status(struct dma_chan *dchan,
 {
 	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 	struct xilinx_dma_tx_descriptor *desc;
-	struct xilinx_axidma_tx_segment *segment;
-	struct xilinx_axidma_desc_hw *hw;
 	enum dma_status ret;
 	unsigned long flags;
 	u32 residue = 0;
@@ -1013,23 +1228,20 @@ static enum dma_status xilinx_dma_tx_status(struct dma_chan *dchan,
 	if (ret == DMA_COMPLETE || !txstate)
 		return ret;
 
-	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
-		spin_lock_irqsave(&chan->lock, flags);
+	spin_lock_irqsave(&chan->lock, flags);
 
-		desc = list_last_entry(&chan->active_list,
-				       struct xilinx_dma_tx_descriptor, node);
-		if (chan->has_sg) {
-			list_for_each_entry(segment, &desc->segments, node) {
-				hw = &segment->hw;
-				residue += (hw->control - hw->status) &
-					   chan->xdev->max_buffer_len;
-			}
-		}
-		spin_unlock_irqrestore(&chan->lock, flags);
+	desc = list_last_entry(&chan->active_list,
+			       struct xilinx_dma_tx_descriptor, node);
+	/*
+	 * VDMA and simple mode do not support residue reporting, so the
+	 * residue field will always be 0.
+	 */
+	if (chan->has_sg && chan->xdev->dma_config->dmatype != XDMA_TYPE_VDMA)
+		residue = xilinx_dma_get_residue(chan, desc);
 
-		chan->residue = residue;
-		dma_set_residue(txstate, chan->residue);
-	}
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	dma_set_residue(txstate, residue);
 
 	return ret;
 }
@@ -1301,53 +1513,23 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
 		dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
 	}
 
-	if (chan->has_sg && !chan->xdev->mcdma)
+	if (chan->has_sg)
 		xilinx_write(chan, XILINX_DMA_REG_CURDESC,
 			     head_desc->async_tx.phys);
 
-	if (chan->has_sg && chan->xdev->mcdma) {
-		if (chan->direction == DMA_MEM_TO_DEV) {
-			dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
-				       head_desc->async_tx.phys);
-		} else {
-			if (!chan->tdest) {
-				dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
-				       head_desc->async_tx.phys);
-			} else {
-				dma_ctrl_write(chan,
-					XILINX_DMA_MCRX_CDESC(chan->tdest),
-				       head_desc->async_tx.phys);
-			}
-		}
-	}
-
 	xilinx_dma_start(chan);
 
 	if (chan->err)
 		return;
 
 	/* Start the transfer */
-	if (chan->has_sg && !chan->xdev->mcdma) {
+	if (chan->has_sg) {
 		if (chan->cyclic)
 			xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
 				     chan->cyclic_seg_v->phys);
 		else
 			xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
 				     tail_segment->phys);
-	} else if (chan->has_sg && chan->xdev->mcdma) {
-		if (chan->direction == DMA_MEM_TO_DEV) {
-			dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
-			       tail_segment->phys);
-		} else {
-			if (!chan->tdest) {
-				dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
-					       tail_segment->phys);
-			} else {
-				dma_ctrl_write(chan,
-					XILINX_DMA_MCRX_TDESC(chan->tdest),
-					tail_segment->phys);
-			}
-		}
 	} else {
 		struct xilinx_axidma_tx_segment *segment;
 		struct xilinx_axidma_desc_hw *hw;
@@ -1371,6 +1553,76 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
 }
 
 /**
+ * xilinx_mcdma_start_transfer - Starts MCDMA transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_dma_tx_descriptor *head_desc, *tail_desc;
+	struct xilinx_axidma_tx_segment *tail_segment;
+	u32 reg;
+
+	/*
+	 * lock has been held by calling functions, so we don't need it
+	 * to take it here again.
+	 */
+
+	if (chan->err)
+		return;
+
+	if (!chan->idle)
+		return;
+
+	if (list_empty(&chan->pending_list))
+		return;
+
+	head_desc = list_first_entry(&chan->pending_list,
+				     struct xilinx_dma_tx_descriptor, node);
+	tail_desc = list_last_entry(&chan->pending_list,
+				    struct xilinx_dma_tx_descriptor, node);
+	tail_segment = list_last_entry(&tail_desc->segments,
+				       struct xilinx_axidma_tx_segment, node);
+
+	reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest));
+
+	if (chan->desc_pendingcount <= XILINX_MCDMA_COALESCE_MAX) {
+		reg &= ~XILINX_MCDMA_COALESCE_MASK;
+		reg |= chan->desc_pendingcount <<
+			XILINX_MCDMA_COALESCE_SHIFT;
+	}
+
+	reg |= XILINX_MCDMA_IRQ_ALL_MASK;
+	dma_ctrl_write(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest), reg);
+
+	/* Program current descriptor */
+	xilinx_write(chan, XILINX_MCDMA_CHAN_CDESC_OFFSET(chan->tdest),
+		     head_desc->async_tx.phys);
+
+	/* Program channel enable register */
+	reg = dma_ctrl_read(chan, XILINX_MCDMA_CHEN_OFFSET);
+	reg |= BIT(chan->tdest);
+	dma_ctrl_write(chan, XILINX_MCDMA_CHEN_OFFSET, reg);
+
+	/* Start the fetch of BDs for the channel */
+	reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest));
+	reg |= XILINX_MCDMA_CR_RUNSTOP_MASK;
+	dma_ctrl_write(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest), reg);
+
+	xilinx_dma_start(chan);
+
+	if (chan->err)
+		return;
+
+	/* Start the transfer */
+	xilinx_write(chan, XILINX_MCDMA_CHAN_TDESC_OFFSET(chan->tdest),
+		     tail_segment->phys);
+
+	list_splice_tail_init(&chan->pending_list, &chan->active_list);
+	chan->desc_pendingcount = 0;
+	chan->idle = false;
+}
+
+/**
  * xilinx_dma_issue_pending - Issue pending transactions
  * @dchan: DMA channel
  */
@@ -1399,6 +1651,13 @@ static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan *chan)
 		return;
 
 	list_for_each_entry_safe(desc, next, &chan->active_list, node) {
+		if (chan->has_sg && chan->xdev->dma_config->dmatype !=
+		    XDMA_TYPE_VDMA)
+			desc->residue = xilinx_dma_get_residue(chan, desc);
+		else
+			desc->residue = 0;
+		desc->err = chan->err;
+
 		list_del(&desc->node);
 		if (!desc->cyclic)
 			dma_cookie_complete(&desc->async_tx);
@@ -1433,6 +1692,7 @@ static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
 
 	chan->err = false;
 	chan->idle = true;
+	chan->desc_pendingcount = 0;
 	chan->desc_submitcount = 0;
 
 	return err;
@@ -1461,6 +1721,74 @@ static int xilinx_dma_chan_reset(struct xilinx_dma_chan *chan)
 }
 
 /**
+ * xilinx_mcdma_irq_handler - MCDMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the Xilinx MCDMA channel structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t xilinx_mcdma_irq_handler(int irq, void *data)
+{
+	struct xilinx_dma_chan *chan = data;
+	u32 status, ser_offset, chan_sermask, chan_offset = 0, chan_id;
+
+	if (chan->direction == DMA_DEV_TO_MEM)
+		ser_offset = XILINX_MCDMA_RXINT_SER_OFFSET;
+	else
+		ser_offset = XILINX_MCDMA_TXINT_SER_OFFSET;
+
+	/* Read the channel id raising the interrupt*/
+	chan_sermask = dma_ctrl_read(chan, ser_offset);
+	chan_id = ffs(chan_sermask);
+
+	if (!chan_id)
+		return IRQ_NONE;
+
+	if (chan->direction == DMA_DEV_TO_MEM)
+		chan_offset = chan->xdev->s2mm_index;
+
+	chan_offset = chan_offset + (chan_id - 1);
+	chan = chan->xdev->chan[chan_offset];
+	/* Read the status and ack the interrupts. */
+	status = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_SR_OFFSET(chan->tdest));
+	if (!(status & XILINX_MCDMA_IRQ_ALL_MASK))
+		return IRQ_NONE;
+
+	dma_ctrl_write(chan, XILINX_MCDMA_CHAN_SR_OFFSET(chan->tdest),
+		       status & XILINX_MCDMA_IRQ_ALL_MASK);
+
+	if (status & XILINX_MCDMA_IRQ_ERR_MASK) {
+		dev_err(chan->dev, "Channel %p has errors %x cdr %x tdr %x\n",
+			chan,
+			dma_ctrl_read(chan, XILINX_MCDMA_CH_ERR_OFFSET),
+			dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CDESC_OFFSET
+				      (chan->tdest)),
+			dma_ctrl_read(chan, XILINX_MCDMA_CHAN_TDESC_OFFSET
+				      (chan->tdest)));
+		chan->err = true;
+	}
+
+	if (status & XILINX_MCDMA_IRQ_DELAY_MASK) {
+		/*
+		 * Device takes too long to do the transfer when user requires
+		 * responsiveness.
+		 */
+		dev_dbg(chan->dev, "Inter-packet latency too long\n");
+	}
+
+	if (status & XILINX_MCDMA_IRQ_IOC_MASK) {
+		spin_lock(&chan->lock);
+		xilinx_dma_complete_descriptor(chan);
+		chan->idle = true;
+		chan->start_transfer(chan);
+		spin_unlock(&chan->lock);
+	}
+
+	tasklet_schedule(&chan->tasklet);
+	return IRQ_HANDLED;
+}
+
+/**
  * xilinx_dma_irq_handler - DMA Interrupt handler
  * @irq: IRQ number
  * @data: Pointer to the Xilinx DMA channel structure
@@ -1967,31 +2295,32 @@ error:
 }
 
 /**
- * xilinx_dma_prep_interleaved - prepare a descriptor for a
- *	DMA_SLAVE transaction
+ * xilinx_mcdma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
  * @dchan: DMA channel
- * @xt: Interleaved template pointer
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
  * @flags: transfer ack flags
+ * @context: APP words of the descriptor
  *
  * Return: Async transaction descriptor on success and NULL on failure
  */
 static struct dma_async_tx_descriptor *
-xilinx_dma_prep_interleaved(struct dma_chan *dchan,
-				 struct dma_interleaved_template *xt,
-				 unsigned long flags)
+xilinx_mcdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+			   unsigned int sg_len,
+			   enum dma_transfer_direction direction,
+			   unsigned long flags, void *context)
 {
 	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 	struct xilinx_dma_tx_descriptor *desc;
-	struct xilinx_axidma_tx_segment *segment;
-	struct xilinx_axidma_desc_hw *hw;
-
-	if (!is_slave_direction(xt->dir))
-		return NULL;
-
-	if (!xt->numf || !xt->sgl[0].size)
-		return NULL;
+	struct xilinx_aximcdma_tx_segment *segment = NULL;
+	u32 *app_w = (u32 *)context;
+	struct scatterlist *sg;
+	size_t copy;
+	size_t sg_used;
+	unsigned int i;
 
-	if (xt->frame_size != 1)
+	if (!is_slave_direction(direction))
 		return NULL;
 
 	/* Allocate a transaction descriptor. */
@@ -1999,54 +2328,67 @@ xilinx_dma_prep_interleaved(struct dma_chan *dchan,
 	if (!desc)
 		return NULL;
 
-	chan->direction = xt->dir;
 	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
 	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
 
-	/* Get a free segment */
-	segment = xilinx_axidma_alloc_tx_segment(chan);
-	if (!segment)
-		goto error;
+	/* Build transactions using information in the scatter gather list */
+	for_each_sg(sgl, sg, sg_len, i) {
+		sg_used = 0;
 
-	hw = &segment->hw;
+		/* Loop until the entire scatterlist entry is used */
+		while (sg_used < sg_dma_len(sg)) {
+			struct xilinx_aximcdma_desc_hw *hw;
 
-	/* Fill in the descriptor */
-	if (xt->dir != DMA_MEM_TO_DEV)
-		hw->buf_addr = xt->dst_start;
-	else
-		hw->buf_addr = xt->src_start;
+			/* Get a free segment */
+			segment = xilinx_aximcdma_alloc_tx_segment(chan);
+			if (!segment)
+				goto error;
 
-	hw->mcdma_control = chan->tdest & XILINX_DMA_BD_TDEST_MASK;
-	hw->vsize_stride = (xt->numf << XILINX_DMA_BD_VSIZE_SHIFT) &
-			    XILINX_DMA_BD_VSIZE_MASK;
-	hw->vsize_stride |= (xt->sgl[0].icg + xt->sgl[0].size) &
-			    XILINX_DMA_BD_STRIDE_MASK;
-	hw->control = xt->sgl[0].size & XILINX_DMA_BD_HSIZE_MASK;
+			/*
+			 * Calculate the maximum number of bytes to transfer,
+			 * making sure it is less than the hw limit
+			 */
+			copy = min_t(size_t, sg_dma_len(sg) - sg_used,
+				     chan->xdev->max_buffer_len);
+			hw = &segment->hw;
 
-	/*
-	 * Insert the segment into the descriptor segments
-	 * list.
-	 */
-	list_add_tail(&segment->node, &desc->segments);
+			/* Fill in the descriptor */
+			xilinx_aximcdma_buf(chan, hw, sg_dma_address(sg),
+					    sg_used);
+			hw->control = copy;
 
+			if (chan->direction == DMA_MEM_TO_DEV && app_w) {
+				memcpy(hw->app, app_w, sizeof(u32) *
+				       XILINX_DMA_NUM_APP_WORDS);
+			}
+
+			sg_used += copy;
+			/*
+			 * Insert the segment into the descriptor segments
+			 * list.
+			 */
+			list_add_tail(&segment->node, &desc->segments);
+		}
+	}
 
 	segment = list_first_entry(&desc->segments,
-				   struct xilinx_axidma_tx_segment, node);
+				   struct xilinx_aximcdma_tx_segment, node);
 	desc->async_tx.phys = segment->phys;
 
 	/* For the last DMA_MEM_TO_DEV transfer, set EOP */
-	if (xt->dir == DMA_MEM_TO_DEV) {
-		segment->hw.control |= XILINX_DMA_BD_SOP;
+	if (chan->direction == DMA_MEM_TO_DEV) {
+		segment->hw.control |= XILINX_MCDMA_BD_SOP;
 		segment = list_last_entry(&desc->segments,
-					  struct xilinx_axidma_tx_segment,
+					  struct xilinx_aximcdma_tx_segment,
 					  node);
-		segment->hw.control |= XILINX_DMA_BD_EOP;
+		segment->hw.control |= XILINX_MCDMA_BD_EOP;
 	}
 
 	return &desc->async_tx;
 
 error:
 	xilinx_dma_free_tx_descriptor(chan, desc);
+
 	return NULL;
 }
 
@@ -2194,7 +2536,9 @@ static int axidma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
 	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
 	if (IS_ERR(*axi_clk)) {
 		err = PTR_ERR(*axi_clk);
-		dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n", err);
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n",
+				err);
 		return err;
 	}
 
@@ -2259,14 +2603,18 @@ static int axicdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
 	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
 	if (IS_ERR(*axi_clk)) {
 		err = PTR_ERR(*axi_clk);
-		dev_err(&pdev->dev, "failed to get axi_clk (%d)\n", err);
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "failed to get axi_clk (%d)\n",
+				err);
 		return err;
 	}
 
 	*dev_clk = devm_clk_get(&pdev->dev, "m_axi_aclk");
 	if (IS_ERR(*dev_clk)) {
 		err = PTR_ERR(*dev_clk);
-		dev_err(&pdev->dev, "failed to get dev_clk (%d)\n", err);
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "failed to get dev_clk (%d)\n",
+				err);
 		return err;
 	}
 
@@ -2299,7 +2647,9 @@ static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
 	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
 	if (IS_ERR(*axi_clk)) {
 		err = PTR_ERR(*axi_clk);
-		dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n", err);
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n",
+				err);
 		return err;
 	}
 
@@ -2321,7 +2671,8 @@ static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
 
 	err = clk_prepare_enable(*axi_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err);
+		dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n",
+			err);
 		return err;
 	}
 
@@ -2454,6 +2805,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 					   "xlnx,axi-dma-s2mm-channel")) {
 		chan->direction = DMA_DEV_TO_MEM;
 		chan->id = chan_id;
+		xdev->s2mm_index = xdev->nr_channels;
 		chan->tdest = chan_id - xdev->nr_channels;
 		chan->has_vflip = of_property_read_bool(node,
 					"xlnx,enable-vert-flip");
@@ -2463,7 +2815,11 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 				XILINX_VDMA_ENABLE_VERTICAL_FLIP;
 		}
 
-		chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET;
+		if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA)
+			chan->ctrl_offset = XILINX_MCDMA_S2MM_CTRL_OFFSET;
+		else
+			chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET;
+
 		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
 			chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET;
 			chan->config.park = 1;
@@ -2478,9 +2834,9 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 	}
 
 	/* Request the interrupt */
-	chan->irq = irq_of_parse_and_map(node, 0);
-	err = request_irq(chan->irq, xilinx_dma_irq_handler, IRQF_SHARED,
-			  "xilinx-dma-controller", chan);
+	chan->irq = irq_of_parse_and_map(node, chan->tdest);
+	err = request_irq(chan->irq, xdev->dma_config->irq_handler,
+			  IRQF_SHARED, "xilinx-dma-controller", chan);
 	if (err) {
 		dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq);
 		return err;
@@ -2489,6 +2845,9 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
 		chan->start_transfer = xilinx_dma_start_transfer;
 		chan->stop_transfer = xilinx_dma_stop_transfer;
+	} else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+		chan->start_transfer = xilinx_mcdma_start_transfer;
+		chan->stop_transfer = xilinx_dma_stop_transfer;
 	} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
 		chan->start_transfer = xilinx_cdma_start_transfer;
 		chan->stop_transfer = xilinx_cdma_stop_transfer;
@@ -2545,7 +2904,7 @@ static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev,
 	int ret, i, nr_channels = 1;
 
 	ret = of_property_read_u32(node, "dma-channels", &nr_channels);
-	if ((ret < 0) && xdev->mcdma)
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0)
 		dev_warn(xdev->dev, "missing dma-channels property\n");
 
 	for (i = 0; i < nr_channels; i++)
@@ -2578,22 +2937,31 @@ static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
 static const struct xilinx_dma_config axidma_config = {
 	.dmatype = XDMA_TYPE_AXIDMA,
 	.clk_init = axidma_clk_init,
+	.irq_handler = xilinx_dma_irq_handler,
 };
 
+static const struct xilinx_dma_config aximcdma_config = {
+	.dmatype = XDMA_TYPE_AXIMCDMA,
+	.clk_init = axidma_clk_init,
+	.irq_handler = xilinx_mcdma_irq_handler,
+};
 static const struct xilinx_dma_config axicdma_config = {
 	.dmatype = XDMA_TYPE_CDMA,
 	.clk_init = axicdma_clk_init,
+	.irq_handler = xilinx_dma_irq_handler,
 };
 
 static const struct xilinx_dma_config axivdma_config = {
 	.dmatype = XDMA_TYPE_VDMA,
 	.clk_init = axivdma_clk_init,
+	.irq_handler = xilinx_dma_irq_handler,
 };
 
 static const struct of_device_id xilinx_dma_of_ids[] = {
 	{ .compatible = "xlnx,axi-dma-1.00.a", .data = &axidma_config },
 	{ .compatible = "xlnx,axi-cdma-1.00.a", .data = &axicdma_config },
 	{ .compatible = "xlnx,axi-vdma-1.00.a", .data = &axivdma_config },
+	{ .compatible = "xlnx,axi-mcdma-1.00.a", .data = &aximcdma_config },
 	{}
 };
 MODULE_DEVICE_TABLE(of, xilinx_dma_of_ids);
@@ -2612,7 +2980,6 @@ static int xilinx_dma_probe(struct platform_device *pdev)
 	struct device_node *node = pdev->dev.of_node;
 	struct xilinx_dma_device *xdev;
 	struct device_node *child, *np = pdev->dev.of_node;
-	struct resource *io;
 	u32 num_frames, addr_width, len_width;
 	int i, err;
 
@@ -2638,16 +3005,15 @@ static int xilinx_dma_probe(struct platform_device *pdev)
 		return err;
 
 	/* Request and map I/O memory */
-	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	xdev->regs = devm_ioremap_resource(&pdev->dev, io);
+	xdev->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(xdev->regs))
 		return PTR_ERR(xdev->regs);
 
 	/* Retrieve the DMA engine properties from the device tree */
 	xdev->max_buffer_len = GENMASK(XILINX_DMA_MAX_TRANS_LEN_MAX - 1, 0);
 
-	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
-		xdev->mcdma = of_property_read_bool(node, "xlnx,mcdma");
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA ||
+	    xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
 		if (!of_property_read_u32(node, "xlnx,sg-length-width",
 					  &len_width)) {
 			if (len_width < XILINX_DMA_MAX_TRANS_LEN_MIN ||
@@ -2712,14 +3078,17 @@ static int xilinx_dma_probe(struct platform_device *pdev)
 		xdev->common.device_prep_slave_sg = xilinx_dma_prep_slave_sg;
 		xdev->common.device_prep_dma_cyclic =
 					  xilinx_dma_prep_dma_cyclic;
-		xdev->common.device_prep_interleaved_dma =
-					xilinx_dma_prep_interleaved;
-		/* Residue calculation is supported by only AXI DMA */
+		/* Residue calculation is supported by only AXI DMA and CDMA */
 		xdev->common.residue_granularity =
 					  DMA_RESIDUE_GRANULARITY_SEGMENT;
 	} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
 		dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask);
 		xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy;
+		/* Residue calculation is supported by only AXI DMA and CDMA */
+		xdev->common.residue_granularity =
+					  DMA_RESIDUE_GRANULARITY_SEGMENT;
+	} else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+		xdev->common.device_prep_slave_sg = xilinx_mcdma_prep_slave_sg;
 	} else {
 		xdev->common.device_prep_interleaved_dma =
 				xilinx_vdma_dma_prep_interleaved;
@@ -2755,6 +3124,8 @@ static int xilinx_dma_probe(struct platform_device *pdev)
 		dev_info(&pdev->dev, "Xilinx AXI DMA Engine Driver Probed!!\n");
 	else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA)
 		dev_info(&pdev->dev, "Xilinx AXI CDMA Engine Driver Probed!!\n");
+	else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA)
+		dev_info(&pdev->dev, "Xilinx AXI MCDMA Engine Driver Probed!!\n");
 	else
 		dev_info(&pdev->dev, "Xilinx AXI VDMA Engine Driver Probed!!\n");
 
diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
index 9c845c07b107..d47749a35863 100644
--- a/drivers/dma/xilinx/zynqmp_dma.c
+++ b/drivers/dma/xilinx/zynqmp_dma.c
@@ -123,10 +123,12 @@
 /* Max transfer size per descriptor */
 #define ZYNQMP_DMA_MAX_TRANS_LEN	0x40000000
 
+/* Max burst lengths */
+#define ZYNQMP_DMA_MAX_DST_BURST_LEN    32768U
+#define ZYNQMP_DMA_MAX_SRC_BURST_LEN    32768U
+
 /* Reset values for data attributes */
 #define ZYNQMP_DMA_AXCACHE_VAL		0xF
-#define ZYNQMP_DMA_ARLEN_RST_VAL	0xF
-#define ZYNQMP_DMA_AWLEN_RST_VAL	0xF
 
 #define ZYNQMP_DMA_SRC_ISSUE_RST_VAL	0x1F
 
@@ -534,17 +536,19 @@ static void zynqmp_dma_handle_ovfl_int(struct zynqmp_dma_chan *chan, u32 status)
 
 static void zynqmp_dma_config(struct zynqmp_dma_chan *chan)
 {
-	u32 val;
+	u32 val, burst_val;
 
 	val = readl(chan->regs + ZYNQMP_DMA_CTRL0);
 	val |= ZYNQMP_DMA_POINT_TYPE_SG;
 	writel(val, chan->regs + ZYNQMP_DMA_CTRL0);
 
 	val = readl(chan->regs + ZYNQMP_DMA_DATA_ATTR);
+	burst_val = __ilog2_u32(chan->src_burst_len);
 	val = (val & ~ZYNQMP_DMA_ARLEN) |
-		(chan->src_burst_len << ZYNQMP_DMA_ARLEN_OFST);
+		((burst_val << ZYNQMP_DMA_ARLEN_OFST) & ZYNQMP_DMA_ARLEN);
+	burst_val = __ilog2_u32(chan->dst_burst_len);
 	val = (val & ~ZYNQMP_DMA_AWLEN) |
-		(chan->dst_burst_len << ZYNQMP_DMA_AWLEN_OFST);
+		((burst_val << ZYNQMP_DMA_AWLEN_OFST) & ZYNQMP_DMA_AWLEN);
 	writel(val, chan->regs + ZYNQMP_DMA_DATA_ATTR);
 }
 
@@ -560,8 +564,10 @@ static int zynqmp_dma_device_config(struct dma_chan *dchan,
 {
 	struct zynqmp_dma_chan *chan = to_chan(dchan);
 
-	chan->src_burst_len = config->src_maxburst;
-	chan->dst_burst_len = config->dst_maxburst;
+	chan->src_burst_len = clamp(config->src_maxburst, 1U,
+		ZYNQMP_DMA_MAX_SRC_BURST_LEN);
+	chan->dst_burst_len = clamp(config->dst_maxburst, 1U,
+		ZYNQMP_DMA_MAX_DST_BURST_LEN);
 
 	return 0;
 }
@@ -887,8 +893,8 @@ static int zynqmp_dma_chan_probe(struct zynqmp_dma_device *zdev,
 		return PTR_ERR(chan->regs);
 
 	chan->bus_width = ZYNQMP_DMA_BUS_WIDTH_64;
-	chan->dst_burst_len = ZYNQMP_DMA_AWLEN_RST_VAL;
-	chan->src_burst_len = ZYNQMP_DMA_ARLEN_RST_VAL;
+	chan->dst_burst_len = ZYNQMP_DMA_MAX_DST_BURST_LEN;
+	chan->src_burst_len = ZYNQMP_DMA_MAX_SRC_BURST_LEN;
 	err = of_property_read_u32(node, "xlnx,bus-width", &chan->bus_width);
 	if (err < 0) {
 		dev_err(&pdev->dev, "missing xlnx,bus-width property\n");
diff --git a/drivers/dma/zx_dma.c b/drivers/dma/zx_dma.c
index 9f4436f7c914..5fe2e8b9a7b8 100644
--- a/drivers/dma/zx_dma.c
+++ b/drivers/dma/zx_dma.c
@@ -754,18 +754,13 @@ static struct dma_chan *zx_of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 static int zx_dma_probe(struct platform_device *op)
 {
 	struct zx_dma_dev *d;
-	struct resource *iores;
 	int i, ret = 0;
 
-	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
-	if (!iores)
-		return -EINVAL;
-
 	d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL);
 	if (!d)
 		return -ENOMEM;
 
-	d->base = devm_ioremap_resource(&op->dev, iores);
+	d->base = devm_platform_ioremap_resource(op, 0);
 	if (IS_ERR(d->base))
 		return PTR_ERR(d->base);
 
@@ -894,7 +889,6 @@ static int zx_dma_remove(struct platform_device *op)
 		list_del(&c->vc.chan.device_node);
 	}
 	clk_disable_unprepare(d->clk);
-	dmam_pool_destroy(d->pool);
 
 	return 0;
 }
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 417dad635526..b3c99bb5fe77 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -462,7 +462,7 @@ config EDAC_ALTERA_SDMMC
 
 config EDAC_SIFIVE
 	bool "Sifive platform EDAC driver"
-	depends on EDAC=y && RISCV
+	depends on EDAC=y && SIFIVE_L2
 	help
 	  Support for error detection and correction on the SiFive SoCs.
 
@@ -491,8 +491,7 @@ config EDAC_TI
 	tristate "Texas Instruments DDR3 ECC Controller"
 	depends on ARCH_KEYSTONE || SOC_DRA7XX
 	help
-	  Support for error detection and correction on the
-          TI SoCs.
+	  Support for error detection and correction on the TI SoCs.
 
 config EDAC_QCOM
 	tristate "QCOM EDAC Controller"
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index fbda4b876afd..e91cf1147a4e 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -14,6 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/kernel.h>
+#include <linux/mfd/altera-sysmgr.h>
 #include <linux/mfd/syscon.h>
 #include <linux/notifier.h>
 #include <linux/of_address.h>
@@ -275,7 +276,6 @@ release:
 	return ret;
 }
 
-static int socfpga_is_a10(void);
 static int altr_sdram_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *id;
@@ -399,7 +399,7 @@ static int altr_sdram_probe(struct platform_device *pdev)
 		goto err;
 
 	/* Only the Arria10 has separate IRQs */
-	if (socfpga_is_a10()) {
+	if (of_machine_is_compatible("altr,socfpga-arria10")) {
 		/* Arria10 specific initialization */
 		res = a10_init(mc_vbase);
 		if (res < 0)
@@ -502,68 +502,6 @@ module_platform_driver(altr_sdram_edac_driver);
 
 #endif	/* CONFIG_EDAC_ALTERA_SDRAM */
 
-/**************** Stratix 10 EDAC Memory Controller Functions ************/
-
-/**
- * s10_protected_reg_write
- * Write to a protected SMC register.
- * @context: Not used.
- * @reg: Address of register
- * @value: Value to write
- * Return: INTEL_SIP_SMC_STATUS_OK (0) on success
- *	   INTEL_SIP_SMC_REG_ERROR on error
- *	   INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported
- */
-static int s10_protected_reg_write(void *context, unsigned int reg,
-				   unsigned int val)
-{
-	struct arm_smccc_res result;
-	unsigned long offset = (unsigned long)context;
-
-	arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, offset + reg, val, 0, 0,
-		      0, 0, 0, &result);
-
-	return (int)result.a0;
-}
-
-/**
- * s10_protected_reg_read
- * Read the status of a protected SMC register
- * @context: Not used.
- * @reg: Address of register
- * @value: Value read.
- * Return: INTEL_SIP_SMC_STATUS_OK (0) on success
- *	   INTEL_SIP_SMC_REG_ERROR on error
- *	   INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported
- */
-static int s10_protected_reg_read(void *context, unsigned int reg,
-				  unsigned int *val)
-{
-	struct arm_smccc_res result;
-	unsigned long offset = (unsigned long)context;
-
-	arm_smccc_smc(INTEL_SIP_SMC_REG_READ, offset + reg, 0, 0, 0,
-		      0, 0, 0, &result);
-
-	*val = (unsigned int)result.a1;
-
-	return (int)result.a0;
-}
-
-static const struct regmap_config s10_sdram_regmap_cfg = {
-	.name = "s10_ddr",
-	.reg_bits = 32,
-	.reg_stride = 4,
-	.val_bits = 32,
-	.max_register = 0xffd12228,
-	.reg_read = s10_protected_reg_read,
-	.reg_write = s10_protected_reg_write,
-	.use_single_read = true,
-	.use_single_write = true,
-};
-
-/************** </Stratix10 EDAC Memory Controller Functions> ***********/
-
 /************************* EDAC Parent Probe *************************/
 
 static const struct of_device_id altr_edac_device_of_match[];
@@ -1008,16 +946,6 @@ static int __maybe_unused altr_init_memory_port(void __iomem *ioaddr, int port)
 	return ret;
 }
 
-static int socfpga_is_a10(void)
-{
-	return of_machine_is_compatible("altr,socfpga-arria10");
-}
-
-static int socfpga_is_s10(void)
-{
-	return of_machine_is_compatible("altr,socfpga-stratix10");
-}
-
 static __init int __maybe_unused
 altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
 			u32 ecc_ctrl_en_mask, bool dual_port)
@@ -1033,34 +961,10 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
 	/* Get the ECC Manager - parent of the device EDACs */
 	np_eccmgr = of_get_parent(np);
 
-	if (socfpga_is_a10()) {
-		ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr,
-							      "altr,sysmgr-syscon");
-	} else {
-		struct device_node *sysmgr_np;
-		struct resource res;
-		uintptr_t base;
-
-		sysmgr_np = of_parse_phandle(np_eccmgr,
-					     "altr,sysmgr-syscon", 0);
-		if (!sysmgr_np) {
-			edac_printk(KERN_ERR, EDAC_DEVICE,
-				    "Unable to find altr,sysmgr-syscon\n");
-			return -ENODEV;
-		}
-
-		if (of_address_to_resource(sysmgr_np, 0, &res)) {
-			of_node_put(sysmgr_np);
-			return -ENOMEM;
-		}
+	ecc_mgr_map =
+		altr_sysmgr_regmap_lookup_by_phandle(np_eccmgr,
+						     "altr,sysmgr-syscon");
 
-		/* Need physical address for SMCC call */
-		base = res.start;
-
-		ecc_mgr_map = regmap_init(NULL, NULL, (void *)base,
-					  &s10_sdram_regmap_cfg);
-		of_node_put(sysmgr_np);
-	}
 	of_node_put(np_eccmgr);
 	if (IS_ERR(ecc_mgr_map)) {
 		edac_printk(KERN_ERR, EDAC_DEVICE,
@@ -1125,9 +1029,6 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
 	int irq;
 	struct device_node *child, *np;
 
-	if (!socfpga_is_a10() && !socfpga_is_s10())
-		return -ENODEV;
-
 	np = of_find_compatible_node(NULL, NULL,
 				     "altr,socfpga-a10-ecc-manager");
 	if (!np) {
@@ -2178,33 +2079,9 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, edac);
 	INIT_LIST_HEAD(&edac->a10_ecc_devices);
 
-	if (socfpga_is_a10()) {
-		edac->ecc_mgr_map =
-			syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
-							"altr,sysmgr-syscon");
-	} else {
-		struct device_node *sysmgr_np;
-		struct resource res;
-		uintptr_t base;
-
-		sysmgr_np = of_parse_phandle(pdev->dev.of_node,
-					     "altr,sysmgr-syscon", 0);
-		if (!sysmgr_np) {
-			edac_printk(KERN_ERR, EDAC_DEVICE,
-				    "Unable to find altr,sysmgr-syscon\n");
-			return -ENODEV;
-		}
-
-		if (of_address_to_resource(sysmgr_np, 0, &res))
-			return -ENOMEM;
-
-		/* Need physical address for SMCC call */
-		base = res.start;
-
-		edac->ecc_mgr_map = devm_regmap_init(&pdev->dev, NULL,
-						     (void *)base,
-						     &s10_sdram_regmap_cfg);
-	}
+	edac->ecc_mgr_map =
+		altr_sysmgr_regmap_lookup_by_phandle(pdev->dev.of_node,
+						     "altr,sysmgr-syscon");
 
 	if (IS_ERR(edac->ecc_mgr_map)) {
 		edac_printk(KERN_ERR, EDAC_DEVICE,
@@ -2270,18 +2147,7 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
 		if (!of_device_is_available(child))
 			continue;
 
-		if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc") || 
-		    of_device_is_compatible(child, "altr,socfpga-a10-ocram-ecc") ||
-		    of_device_is_compatible(child, "altr,socfpga-eth-mac-ecc") ||
-		    of_device_is_compatible(child, "altr,socfpga-nand-ecc") ||
-		    of_device_is_compatible(child, "altr,socfpga-dma-ecc") ||
-		    of_device_is_compatible(child, "altr,socfpga-usb-ecc") ||
-		    of_device_is_compatible(child, "altr,socfpga-qspi-ecc") ||
-#ifdef CONFIG_EDAC_ALTERA_SDRAM
-		    of_device_is_compatible(child, "altr,sdram-edac-s10") ||
-#endif
-		    of_device_is_compatible(child, "altr,socfpga-sdmmc-ecc"))
-
+		if (of_match_node(altr_edac_a10_device_of_match, child))
 			altr_edac_a10_device_add(edac, child);
 
 #ifdef CONFIG_EDAC_ALTERA_SDRAM
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index c1d4536ae466..9fbad908a854 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -16,12 +16,11 @@ module_param(ecc_enable_override, int, 0644);
 
 static struct msr __percpu *msrs;
 
+static struct amd64_family_type *fam_type;
+
 /* Per-node stuff */
 static struct ecc_settings **ecc_stngs;
 
-/* Number of Unified Memory Controllers */
-static u8 num_umcs;
-
 /*
  * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
  * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
@@ -215,7 +214,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
 
 	scrubval = scrubrates[i].scrubval;
 
-	if (pvt->fam == 0x17 || pvt->fam == 0x18) {
+	if (pvt->umc) {
 		__f17h_set_scrubval(pvt, scrubval);
 	} else if (pvt->fam == 0x15 && pvt->model == 0x60) {
 		f15h_select_dct(pvt, 0);
@@ -257,18 +256,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
 	int i, retval = -EINVAL;
 	u32 scrubval = 0;
 
-	switch (pvt->fam) {
-	case 0x15:
-		/* Erratum #505 */
-		if (pvt->model < 0x10)
-			f15h_select_dct(pvt, 0);
-
-		if (pvt->model == 0x60)
-			amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
-		break;
-
-	case 0x17:
-	case 0x18:
+	if (pvt->umc) {
 		amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval);
 		if (scrubval & BIT(0)) {
 			amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval);
@@ -277,11 +265,15 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
 		} else {
 			scrubval = 0;
 		}
-		break;
+	} else if (pvt->fam == 0x15) {
+		/* Erratum #505 */
+		if (pvt->model < 0x10)
+			f15h_select_dct(pvt, 0);
 
-	default:
+		if (pvt->model == 0x60)
+			amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
+	} else {
 		amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
-		break;
 	}
 
 	scrubval = scrubval & 0x001F;
@@ -454,7 +446,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
 	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
 
 #define for_each_umc(i) \
-	for (i = 0; i < num_umcs; i++)
+	for (i = 0; i < fam_type->max_mcs; i++)
 
 /*
  * @input_addr is an InputAddr associated with the node given by mci. Return the
@@ -1056,6 +1048,16 @@ static void determine_memory_type(struct amd64_pvt *pvt)
 {
 	u32 dram_ctrl, dcsm;
 
+	if (pvt->umc) {
+		if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
+			pvt->dram_type = MEM_LRDDR4;
+		else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
+			pvt->dram_type = MEM_RDDR4;
+		else
+			pvt->dram_type = MEM_DDR4;
+		return;
+	}
+
 	switch (pvt->fam) {
 	case 0xf:
 		if (pvt->ext_model >= K8_REV_F)
@@ -1101,16 +1103,6 @@ static void determine_memory_type(struct amd64_pvt *pvt)
 	case 0x16:
 		goto ddr3;
 
-	case 0x17:
-	case 0x18:
-		if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
-			pvt->dram_type = MEM_LRDDR4;
-		else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
-			pvt->dram_type = MEM_RDDR4;
-		else
-			pvt->dram_type = MEM_DDR4;
-		return;
-
 	default:
 		WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
 		pvt->dram_type = MEM_EMPTY;
@@ -2224,6 +2216,7 @@ static struct amd64_family_type family_types[] = {
 		.ctl_name = "K8",
 		.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
 		.f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
+		.max_mcs = 2,
 		.ops = {
 			.early_channel_count	= k8_early_channel_count,
 			.map_sysaddr_to_csrow	= k8_map_sysaddr_to_csrow,
@@ -2234,6 +2227,7 @@ static struct amd64_family_type family_types[] = {
 		.ctl_name = "F10h",
 		.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
 		.f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
+		.max_mcs = 2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2244,6 +2238,7 @@ static struct amd64_family_type family_types[] = {
 		.ctl_name = "F15h",
 		.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
 		.f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
+		.max_mcs = 2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2254,6 +2249,7 @@ static struct amd64_family_type family_types[] = {
 		.ctl_name = "F15h_M30h",
 		.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
 		.f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
+		.max_mcs = 2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2264,6 +2260,7 @@ static struct amd64_family_type family_types[] = {
 		.ctl_name = "F15h_M60h",
 		.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
 		.f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
+		.max_mcs = 2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2274,6 +2271,7 @@ static struct amd64_family_type family_types[] = {
 		.ctl_name = "F16h",
 		.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
 		.f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
+		.max_mcs = 2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2284,6 +2282,7 @@ static struct amd64_family_type family_types[] = {
 		.ctl_name = "F16h_M30h",
 		.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
 		.f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
+		.max_mcs = 2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2294,6 +2293,7 @@ static struct amd64_family_type family_types[] = {
 		.ctl_name = "F17h",
 		.f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0,
 		.f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6,
+		.max_mcs = 2,
 		.ops = {
 			.early_channel_count	= f17_early_channel_count,
 			.dbam_to_cs		= f17_addr_mask_to_cs_size,
@@ -2303,6 +2303,7 @@ static struct amd64_family_type family_types[] = {
 		.ctl_name = "F17h_M10h",
 		.f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0,
 		.f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6,
+		.max_mcs = 2,
 		.ops = {
 			.early_channel_count	= f17_early_channel_count,
 			.dbam_to_cs		= f17_addr_mask_to_cs_size,
@@ -2312,6 +2313,7 @@ static struct amd64_family_type family_types[] = {
 		.ctl_name = "F17h_M30h",
 		.f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
 		.f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
+		.max_mcs = 8,
 		.ops = {
 			.early_channel_count	= f17_early_channel_count,
 			.dbam_to_cs		= f17_addr_mask_to_cs_size,
@@ -2321,6 +2323,17 @@ static struct amd64_family_type family_types[] = {
 		.ctl_name = "F17h_M70h",
 		.f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0,
 		.f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6,
+		.max_mcs = 2,
+		.ops = {
+			.early_channel_count	= f17_early_channel_count,
+			.dbam_to_cs		= f17_addr_mask_to_cs_size,
+		}
+	},
+	[F19_CPUS] = {
+		.ctl_name = "F19h",
+		.f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0,
+		.f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6,
+		.max_mcs = 8,
 		.ops = {
 			.early_channel_count	= f17_early_channel_count,
 			.dbam_to_cs		= f17_addr_mask_to_cs_size,
@@ -2838,8 +2851,6 @@ skip:
 	edac_dbg(1, "  DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
 
 	determine_ecc_sym_sz(pvt);
-
-	dump_misc_regs(pvt);
 }
 
 /*
@@ -2936,6 +2947,7 @@ static int init_csrows_df(struct mem_ctl_info *mci)
 			dimm->mtype = pvt->dram_type;
 			dimm->edac_mode = edac_mode;
 			dimm->dtype = dev_type;
+			dimm->grain = 64;
 		}
 	}
 
@@ -3012,6 +3024,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 			dimm = csrow->channels[j]->dimm;
 			dimm->mtype = pvt->dram_type;
 			dimm->edac_mode = edac_mode;
+			dimm->grain = 64;
 		}
 	}
 
@@ -3178,43 +3191,27 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
 		amd64_warn("Error restoring NB MCGCTL settings!\n");
 }
 
-/*
- * EDAC requires that the BIOS have ECC enabled before
- * taking over the processing of ECC errors. A command line
- * option allows to force-enable hardware ECC later in
- * enable_ecc_error_reporting().
- */
-static const char *ecc_msg =
-	"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
-	" Either enable ECC checking or force module loading by setting "
-	"'ecc_enable_override'.\n"
-	" (Note that use of the override may cause unknown side effects.)\n";
-
-static bool ecc_enabled(struct pci_dev *F3, u16 nid)
+static bool ecc_enabled(struct amd64_pvt *pvt)
 {
+	u16 nid = pvt->mc_node_id;
 	bool nb_mce_en = false;
 	u8 ecc_en = 0, i;
 	u32 value;
 
 	if (boot_cpu_data.x86 >= 0x17) {
 		u8 umc_en_mask = 0, ecc_en_mask = 0;
+		struct amd64_umc *umc;
 
 		for_each_umc(i) {
-			u32 base = get_umc_base(i);
+			umc = &pvt->umc[i];
 
 			/* Only check enabled UMCs. */
-			if (amd_smn_read(nid, base + UMCCH_SDP_CTRL, &value))
-				continue;
-
-			if (!(value & UMC_SDP_INIT))
+			if (!(umc->sdp_ctrl & UMC_SDP_INIT))
 				continue;
 
 			umc_en_mask |= BIT(i);
 
-			if (amd_smn_read(nid, base + UMCCH_UMC_CAP_HI, &value))
-				continue;
-
-			if (value & UMC_ECC_ENABLED)
+			if (umc->umc_cap_hi & UMC_ECC_ENABLED)
 				ecc_en_mask |= BIT(i);
 		}
 
@@ -3227,7 +3224,7 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
 		/* Assume UMC MCA banks are enabled. */
 		nb_mce_en = true;
 	} else {
-		amd64_read_pci_cfg(F3, NBCFG, &value);
+		amd64_read_pci_cfg(pvt->F3, NBCFG, &value);
 
 		ecc_en = !!(value & NBCFG_ECC_ENABLE);
 
@@ -3240,11 +3237,10 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
 	amd64_info("Node %d: DRAM ECC %s.\n",
 		   nid, (ecc_en ? "enabled" : "disabled"));
 
-	if (!ecc_en || !nb_mce_en) {
-		amd64_info("%s", ecc_msg);
+	if (!ecc_en || !nb_mce_en)
 		return false;
-	}
-	return true;
+	else
+		return true;
 }
 
 static inline void
@@ -3278,8 +3274,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
 	}
 }
 
-static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
-				 struct amd64_family_type *fam)
+static void setup_mci_misc_attrs(struct mem_ctl_info *mci)
 {
 	struct amd64_pvt *pvt = mci->pvt_info;
 
@@ -3298,7 +3293,7 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
 
 	mci->edac_cap		= determine_edac_cap(pvt);
 	mci->mod_name		= EDAC_MOD_STR;
-	mci->ctl_name		= fam->ctl_name;
+	mci->ctl_name		= fam_type->ctl_name;
 	mci->dev_name		= pci_name(pvt->F3);
 	mci->ctl_page_to_phys	= NULL;
 
@@ -3312,8 +3307,6 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
  */
 static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
 {
-	struct amd64_family_type *fam_type = NULL;
-
 	pvt->ext_model  = boot_cpu_data.x86_model >> 4;
 	pvt->stepping	= boot_cpu_data.x86_stepping;
 	pvt->model	= boot_cpu_data.x86_model;
@@ -3378,6 +3371,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
 			family_types[F17_CPUS].ctl_name = "F18h";
 		break;
 
+	case 0x19:
+		fam_type	= &family_types[F19_CPUS];
+		pvt->ops	= &family_types[F19_CPUS].ops;
+		family_types[F19_CPUS].ctl_name = "F19h";
+		break;
+
 	default:
 		amd64_err("Unsupported family!\n");
 		return NULL;
@@ -3401,51 +3400,15 @@ static const struct attribute_group *amd64_edac_attr_groups[] = {
 	NULL
 };
 
-/* Set the number of Unified Memory Controllers in the system. */
-static void compute_num_umcs(void)
+static int hw_info_get(struct amd64_pvt *pvt)
 {
-	u8 model = boot_cpu_data.x86_model;
-
-	if (boot_cpu_data.x86 < 0x17)
-		return;
-
-	if (model >= 0x30 && model <= 0x3f)
-		num_umcs = 8;
-	else
-		num_umcs = 2;
-
-	edac_dbg(1, "Number of UMCs: %x", num_umcs);
-}
-
-static int init_one_instance(unsigned int nid)
-{
-	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
-	struct amd64_family_type *fam_type = NULL;
-	struct mem_ctl_info *mci = NULL;
-	struct edac_mc_layer layers[2];
-	struct amd64_pvt *pvt = NULL;
 	u16 pci_id1, pci_id2;
-	int err = 0, ret;
-
-	ret = -ENOMEM;
-	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
-	if (!pvt)
-		goto err_ret;
-
-	pvt->mc_node_id	= nid;
-	pvt->F3 = F3;
-
-	ret = -EINVAL;
-	fam_type = per_family_init(pvt);
-	if (!fam_type)
-		goto err_free;
+	int ret = -EINVAL;
 
 	if (pvt->fam >= 0x17) {
-		pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
-		if (!pvt->umc) {
-			ret = -ENOMEM;
-			goto err_free;
-		}
+		pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
+		if (!pvt->umc)
+			return -ENOMEM;
 
 		pci_id1 = fam_type->f0_id;
 		pci_id2 = fam_type->f6_id;
@@ -3454,21 +3417,37 @@ static int init_one_instance(unsigned int nid)
 		pci_id2 = fam_type->f2_id;
 	}
 
-	err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
-	if (err)
-		goto err_post_init;
+	ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
+	if (ret)
+		return ret;
 
 	read_mc_regs(pvt);
 
+	return 0;
+}
+
+static void hw_info_put(struct amd64_pvt *pvt)
+{
+	if (pvt->F0 || pvt->F1)
+		free_mc_sibling_devs(pvt);
+
+	kfree(pvt->umc);
+}
+
+static int init_one_instance(struct amd64_pvt *pvt)
+{
+	struct mem_ctl_info *mci = NULL;
+	struct edac_mc_layer layers[2];
+	int ret = -EINVAL;
+
 	/*
 	 * We need to determine how many memory channels there are. Then use
 	 * that information for calculating the size of the dynamic instance
 	 * tables in the 'mci' structure.
 	 */
-	ret = -EINVAL;
 	pvt->channel_count = pvt->ops->early_channel_count(pvt);
 	if (pvt->channel_count < 0)
-		goto err_siblings;
+		return ret;
 
 	ret = -ENOMEM;
 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
@@ -3480,24 +3459,18 @@ static int init_one_instance(unsigned int nid)
 	 * Always allocate two channels since we can have setups with DIMMs on
 	 * only one channel. Also, this simplifies handling later for the price
 	 * of a couple of KBs tops.
-	 *
-	 * On Fam17h+, the number of controllers may be greater than two. So set
-	 * the size equal to the maximum number of UMCs.
 	 */
-	if (pvt->fam >= 0x17)
-		layers[1].size = num_umcs;
-	else
-		layers[1].size = 2;
+	layers[1].size = fam_type->max_mcs;
 	layers[1].is_virt_csrow = false;
 
-	mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
+	mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);
 	if (!mci)
-		goto err_siblings;
+		return ret;
 
 	mci->pvt_info = pvt;
 	mci->pdev = &pvt->F3->dev;
 
-	setup_mci_misc_attrs(mci, fam_type);
+	setup_mci_misc_attrs(mci);
 
 	if (init_csrows(mci))
 		mci->edac_cap = EDAC_FLAG_NONE;
@@ -3505,31 +3478,30 @@ static int init_one_instance(unsigned int nid)
 	ret = -ENODEV;
 	if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) {
 		edac_dbg(1, "failed edac_mc_add_mc()\n");
-		goto err_add_mc;
+		edac_mc_free(mci);
+		return ret;
 	}
 
 	return 0;
+}
 
-err_add_mc:
-	edac_mc_free(mci);
-
-err_siblings:
-	free_mc_sibling_devs(pvt);
-
-err_post_init:
-	if (pvt->fam >= 0x17)
-		kfree(pvt->umc);
+static bool instance_has_memory(struct amd64_pvt *pvt)
+{
+	bool cs_enabled = false;
+	int cs = 0, dct = 0;
 
-err_free:
-	kfree(pvt);
+	for (dct = 0; dct < fam_type->max_mcs; dct++) {
+		for_each_chip_select(cs, dct, pvt)
+			cs_enabled |= csrow_enabled(cs, dct, pvt);
+	}
 
-err_ret:
-	return ret;
+	return cs_enabled;
 }
 
 static int probe_one_instance(unsigned int nid)
 {
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+	struct amd64_pvt *pvt = NULL;
 	struct ecc_settings *s;
 	int ret;
 
@@ -3540,8 +3512,29 @@ static int probe_one_instance(unsigned int nid)
 
 	ecc_stngs[nid] = s;
 
-	if (!ecc_enabled(F3, nid)) {
-		ret = 0;
+	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
+	if (!pvt)
+		goto err_settings;
+
+	pvt->mc_node_id	= nid;
+	pvt->F3 = F3;
+
+	fam_type = per_family_init(pvt);
+	if (!fam_type)
+		goto err_enable;
+
+	ret = hw_info_get(pvt);
+	if (ret < 0)
+		goto err_enable;
+
+	ret = 0;
+	if (!instance_has_memory(pvt)) {
+		amd64_info("Node %d: No DIMMs detected.\n", nid);
+		goto err_enable;
+	}
+
+	if (!ecc_enabled(pvt)) {
+		ret = -ENODEV;
 
 		if (!ecc_enable_override)
 			goto err_enable;
@@ -3556,7 +3549,7 @@ static int probe_one_instance(unsigned int nid)
 			goto err_enable;
 	}
 
-	ret = init_one_instance(nid);
+	ret = init_one_instance(pvt);
 	if (ret < 0) {
 		amd64_err("Error probing instance: %d\n", nid);
 
@@ -3566,9 +3559,15 @@ static int probe_one_instance(unsigned int nid)
 		goto err_enable;
 	}
 
+	dump_misc_regs(pvt);
+
 	return ret;
 
 err_enable:
+	hw_info_put(pvt);
+	kfree(pvt);
+
+err_settings:
 	kfree(s);
 	ecc_stngs[nid] = NULL;
 
@@ -3583,9 +3582,6 @@ static void remove_one_instance(unsigned int nid)
 	struct mem_ctl_info *mci;
 	struct amd64_pvt *pvt;
 
-	mci = find_mci_by_dev(&F3->dev);
-	WARN_ON(!mci);
-
 	/* Remove from EDAC CORE tracking list */
 	mci = edac_mc_del_mc(&F3->dev);
 	if (!mci)
@@ -3595,14 +3591,13 @@ static void remove_one_instance(unsigned int nid)
 
 	restore_ecc_error_reporting(s, nid, F3);
 
-	free_mc_sibling_devs(pvt);
-
 	kfree(ecc_stngs[nid]);
 	ecc_stngs[nid] = NULL;
 
 	/* Free the EDAC CORE resources */
 	mci->pvt_info = NULL;
 
+	hw_info_put(pvt);
 	kfree(pvt);
 	edac_mc_free(mci);
 }
@@ -3637,6 +3632,7 @@ static const struct x86_cpu_id amd64_cpuids[] = {
 	{ X86_VENDOR_AMD, 0x16, X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
 	{ X86_VENDOR_AMD, 0x17, X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
 	{ X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
+	{ X86_VENDOR_AMD, 0x19, X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
@@ -3668,8 +3664,6 @@ static int __init amd64_edac_init(void)
 	if (!msrs)
 		goto err_free;
 
-	compute_num_umcs();
-
 	for (i = 0; i < amd_nb_num(); i++) {
 		err = probe_one_instance(i);
 		if (err) {
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 8c3cda81e619..abbf3c274d74 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -122,6 +122,8 @@
 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496
 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440
 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446
+#define PCI_DEVICE_ID_AMD_19H_DF_F0	0x1650
+#define PCI_DEVICE_ID_AMD_19H_DF_F6	0x1656
 
 /*
  * Function 1 - Address Map
@@ -292,6 +294,7 @@ enum amd_families {
 	F17_M10H_CPUS,
 	F17_M30H_CPUS,
 	F17_M70H_CPUS,
+	F19_CPUS,
 	NUM_FAMILIES,
 };
 
@@ -479,6 +482,8 @@ struct low_ops {
 struct amd64_family_type {
 	const char *ctl_name;
 	u16 f0_id, f1_id, f2_id, f6_id;
+	/* Maximum number of memory controllers per die/node. */
+	u8 max_mcs;
 	struct low_ops ops;
 };
 
diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c
index 5634437bb39d..b194658b8b5c 100644
--- a/drivers/edac/aspeed_edac.c
+++ b/drivers/edac/aspeed_edac.c
@@ -243,7 +243,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 	if (!np) {
 		dev_err(mci->pdev, "dt: missing /memory node\n");
 		return -ENODEV;
-	};
+	}
 
 	rc = of_address_to_resource(np, 0, &r);
 
@@ -252,7 +252,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 	if (rc) {
 		dev_err(mci->pdev, "dt: failed requesting resource for /memory node\n");
 		return rc;
-	};
+	}
 
 	dev_dbg(mci->pdev, "dt: /memory node resources: first page r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n",
 		r.start, resource_size(&r), PAGE_SHIFT);
@@ -281,16 +281,11 @@ static int aspeed_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct edac_mc_layer layers[2];
 	struct mem_ctl_info *mci;
-	struct resource *res;
 	void __iomem *regs;
 	u32 reg04;
 	int rc;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENOENT;
-
-	regs = devm_ioremap_resource(dev, res);
+	regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
 
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 65cf2b9355c4..8c4d947fb848 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -555,12 +555,16 @@ static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
 	return edac_dev->panic_on_ue;
 }
 
-void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
-			int inst_nr, int block_nr, const char *msg)
+void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
+				 unsigned int count, int inst_nr, int block_nr,
+				 const char *msg)
 {
 	struct edac_device_instance *instance;
 	struct edac_device_block *block = NULL;
 
+	if (!count)
+		return;
+
 	if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
 		edac_device_printk(edac_dev, KERN_ERR,
 				"INTERNAL ERROR: 'instance' out of range "
@@ -582,27 +586,31 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
 
 	if (instance->nr_blocks > 0) {
 		block = instance->blocks + block_nr;
-		block->counters.ce_count++;
+		block->counters.ce_count += count;
 	}
 
 	/* Propagate the count up the 'totals' tree */
-	instance->counters.ce_count++;
-	edac_dev->counters.ce_count++;
+	instance->counters.ce_count += count;
+	edac_dev->counters.ce_count += count;
 
 	if (edac_device_get_log_ce(edac_dev))
 		edac_device_printk(edac_dev, KERN_WARNING,
-				"CE: %s instance: %s block: %s '%s'\n",
-				edac_dev->ctl_name, instance->name,
-				block ? block->name : "N/A", msg);
+				   "CE: %s instance: %s block: %s count: %d '%s'\n",
+				   edac_dev->ctl_name, instance->name,
+				   block ? block->name : "N/A", count, msg);
 }
-EXPORT_SYMBOL_GPL(edac_device_handle_ce);
+EXPORT_SYMBOL_GPL(edac_device_handle_ce_count);
 
-void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
-			int inst_nr, int block_nr, const char *msg)
+void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
+				 unsigned int count, int inst_nr, int block_nr,
+				 const char *msg)
 {
 	struct edac_device_instance *instance;
 	struct edac_device_block *block = NULL;
 
+	if (!count)
+		return;
+
 	if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
 		edac_device_printk(edac_dev, KERN_ERR,
 				"INTERNAL ERROR: 'instance' out of range "
@@ -624,22 +632,22 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
 
 	if (instance->nr_blocks > 0) {
 		block = instance->blocks + block_nr;
-		block->counters.ue_count++;
+		block->counters.ue_count += count;
 	}
 
 	/* Propagate the count up the 'totals' tree */
-	instance->counters.ue_count++;
-	edac_dev->counters.ue_count++;
+	instance->counters.ue_count += count;
+	edac_dev->counters.ue_count += count;
 
 	if (edac_device_get_log_ue(edac_dev))
 		edac_device_printk(edac_dev, KERN_EMERG,
-				"UE: %s instance: %s block: %s '%s'\n",
-				edac_dev->ctl_name, instance->name,
-				block ? block->name : "N/A", msg);
+				   "UE: %s instance: %s block: %s count: %d '%s'\n",
+				   edac_dev->ctl_name, instance->name,
+				   block ? block->name : "N/A", count, msg);
 
 	if (edac_device_get_panic_on_ue(edac_dev))
-		panic("EDAC %s: UE instance: %s block %s '%s'\n",
-			edac_dev->ctl_name, instance->name,
-			block ? block->name : "N/A", msg);
+		panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n",
+		      edac_dev->ctl_name, instance->name,
+		      block ? block->name : "N/A", count, msg);
 }
-EXPORT_SYMBOL_GPL(edac_device_handle_ue);
+EXPORT_SYMBOL_GPL(edac_device_handle_ue_count);
diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h
index 1aaba74ae411..c4c0e0bdce14 100644
--- a/drivers/edac/edac_device.h
+++ b/drivers/edac/edac_device.h
@@ -286,27 +286,60 @@ extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
 extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
 
 /**
- * edac_device_handle_ue():
- *	perform a common output and handling of an 'edac_dev' UE event
+ * Log correctable errors.
  *
  * @edac_dev: pointer to struct &edac_device_ctl_info
- * @inst_nr: number of the instance where the UE error happened
- * @block_nr: number of the block where the UE error happened
+ * @inst_nr: number of the instance where the CE error happened
+ * @count: Number of errors to log.
+ * @block_nr: number of the block where the CE error happened
+ * @msg: message to be printed
+ */
+void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
+				 unsigned int count, int inst_nr, int block_nr,
+				 const char *msg);
+
+/**
+ * Log uncorrectable errors.
+ *
+ * @edac_dev: pointer to struct &edac_device_ctl_info
+ * @inst_nr: number of the instance where the CE error happened
+ * @count: Number of errors to log.
+ * @block_nr: number of the block where the CE error happened
  * @msg: message to be printed
  */
-extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
-				int inst_nr, int block_nr, const char *msg);
+void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
+				 unsigned int count, int inst_nr, int block_nr,
+				 const char *msg);
+
 /**
- * edac_device_handle_ce():
- *	perform a common output and handling of an 'edac_dev' CE event
+ * edac_device_handle_ce(): Log a single correctable error
  *
  * @edac_dev: pointer to struct &edac_device_ctl_info
  * @inst_nr: number of the instance where the CE error happened
  * @block_nr: number of the block where the CE error happened
  * @msg: message to be printed
  */
-extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
-				int inst_nr, int block_nr, const char *msg);
+static inline void
+edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr,
+		      int block_nr, const char *msg)
+{
+	edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg);
+}
+
+/**
+ * edac_device_handle_ue(): Log a single uncorrectable error
+ *
+ * @edac_dev: pointer to struct &edac_device_ctl_info
+ * @inst_nr: number of the instance where the UE error happened
+ * @block_nr: number of the block where the UE error happened
+ * @msg: message to be printed
+ */
+static inline void
+edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr,
+		      int block_nr, const char *msg)
+{
+	edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg);
+}
 
 /**
  * edac_device_alloc_index: Allocate a unique device index number
@@ -316,5 +349,4 @@ extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
  */
 extern int edac_device_alloc_index(void);
 extern const char *edac_layer_name[];
-
 #endif
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index e6fd079783bd..7243b88f81d8 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -145,15 +145,18 @@ static void edac_mc_dump_channel(struct rank_info *chan)
 	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
 }
 
-static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
+static void edac_mc_dump_dimm(struct dimm_info *dimm)
 {
 	char location[80];
 
+	if (!dimm->nr_pages)
+		return;
+
 	edac_dimm_info_location(dimm, location, sizeof(location));
 
 	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
 		 dimm->mci->csbased ? "rank" : "dimm",
-		 number, location, dimm->csrow, dimm->cschannel);
+		 dimm->idx, location, dimm->csrow, dimm->cschannel);
 	edac_dbg(4, "  dimm = %p\n", dimm);
 	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
 	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
@@ -314,25 +317,28 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
 	struct dimm_info *dimm;
 	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 	unsigned int pos[EDAC_MAX_LAYERS];
-	unsigned int size, tot_dimms = 1, count = 1;
+	unsigned int idx, size, tot_dimms = 1, count = 1;
 	unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 	void *pvt, *p, *ptr = NULL;
-	int i, j, row, chn, n, len, off;
+	int i, j, row, chn, n, len;
 	bool per_rank = false;
 
-	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
+	if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
+		return NULL;
+
 	/*
 	 * Calculate the total amount of dimms and csrows/cschannels while
 	 * in the old API emulation mode
 	 */
-	for (i = 0; i < n_layers; i++) {
-		tot_dimms *= layers[i].size;
-		if (layers[i].is_virt_csrow)
-			tot_csrows *= layers[i].size;
+	for (idx = 0; idx < n_layers; idx++) {
+		tot_dimms *= layers[idx].size;
+
+		if (layers[idx].is_virt_csrow)
+			tot_csrows *= layers[idx].size;
 		else
-			tot_channels *= layers[i].size;
+			tot_channels *= layers[idx].size;
 
-		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
+		if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT)
 			per_rank = true;
 	}
 
@@ -425,19 +431,15 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
 	memset(&pos, 0, sizeof(pos));
 	row = 0;
 	chn = 0;
-	for (i = 0; i < tot_dimms; i++) {
+	for (idx = 0; idx < tot_dimms; idx++) {
 		chan = mci->csrows[row]->channels[chn];
-		off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
-		if (off < 0 || off >= tot_dimms) {
-			edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
-			goto error;
-		}
 
 		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
 		if (!dimm)
 			goto error;
-		mci->dimms[off] = dimm;
+		mci->dimms[idx] = dimm;
 		dimm->mci = mci;
+		dimm->idx = idx;
 
 		/*
 		 * Copy DIMM location and initialize it.
@@ -714,6 +716,7 @@ int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
 		edac_mc_dump_mci(mci);
 
 	if (edac_debug_level >= 4) {
+		struct dimm_info *dimm;
 		int i;
 
 		for (i = 0; i < mci->nr_csrows; i++) {
@@ -730,9 +733,9 @@ int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
 				if (csrow->channels[j]->dimm->nr_pages)
 					edac_mc_dump_channel(csrow->channels[j]);
 		}
-		for (i = 0; i < mci->tot_dimms; i++)
-			if (mci->dimms[i]->nr_pages)
-				edac_mc_dump_dimm(mci->dimms[i], i);
+
+		mci_for_each_dimm(mci, dimm)
+			edac_mc_dump_dimm(dimm);
 	}
 #endif
 	mutex_lock(&mem_ctls_mutex);
@@ -1055,6 +1058,21 @@ void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
 {
 	char detail[80];
 	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+	u8 grain_bits;
+
+	/* Sanity-check driver-supplied grain value. */
+	if (WARN_ON_ONCE(!e->grain))
+		e->grain = 1;
+
+	grain_bits = fls_long(e->grain - 1);
+
+	/* Report the error via the trace interface */
+	if (IS_ENABLED(CONFIG_RAS))
+		trace_mc_event(type, e->msg, e->label, e->error_count,
+			       mci->mc_idx, e->top_layer, e->mid_layer,
+			       e->low_layer,
+			       (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
+			       grain_bits, e->syndrome, e->other_detail);
 
 	/* Memory type dependent details about the error */
 	if (type == HW_EVENT_ERR_CORRECTED) {
@@ -1090,11 +1108,11 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  const char *msg,
 			  const char *other_detail)
 {
+	struct dimm_info *dimm;
 	char *p;
 	int row = -1, chan = -1;
 	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
 	int i, n_labels = 0;
-	u8 grain_bits;
 	struct edac_raw_error_desc *e = &mci->error_desc;
 
 	edac_dbg(3, "MC%d\n", mci->mc_idx);
@@ -1150,9 +1168,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	p = e->label;
 	*p = '\0';
 
-	for (i = 0; i < mci->tot_dimms; i++) {
-		struct dimm_info *dimm = mci->dimms[i];
-
+	mci_for_each_dimm(mci, dimm) {
 		if (top_layer >= 0 && top_layer != dimm->location[0])
 			continue;
 		if (mid_layer >= 0 && mid_layer != dimm->location[1])
@@ -1170,37 +1186,37 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 		 * channel/memory controller/...  may be affected.
 		 * Also, don't show errors for empty DIMM slots.
 		 */
-		if (e->enable_per_layer_report && dimm->nr_pages) {
-			if (n_labels >= EDAC_MAX_LABELS) {
-				e->enable_per_layer_report = false;
-				break;
-			}
-			n_labels++;
-			if (p != e->label) {
-				strcpy(p, OTHER_LABEL);
-				p += strlen(OTHER_LABEL);
-			}
-			strcpy(p, dimm->label);
-			p += strlen(p);
-			*p = '\0';
+		if (!e->enable_per_layer_report || !dimm->nr_pages)
+			continue;
 
-			/*
-			 * get csrow/channel of the DIMM, in order to allow
-			 * incrementing the compat API counters
-			 */
-			edac_dbg(4, "%s csrows map: (%d,%d)\n",
-				 mci->csbased ? "rank" : "dimm",
-				 dimm->csrow, dimm->cschannel);
-			if (row == -1)
-				row = dimm->csrow;
-			else if (row >= 0 && row != dimm->csrow)
-				row = -2;
-
-			if (chan == -1)
-				chan = dimm->cschannel;
-			else if (chan >= 0 && chan != dimm->cschannel)
-				chan = -2;
+		if (n_labels >= EDAC_MAX_LABELS) {
+			e->enable_per_layer_report = false;
+			break;
+		}
+		n_labels++;
+		if (p != e->label) {
+			strcpy(p, OTHER_LABEL);
+			p += strlen(OTHER_LABEL);
 		}
+		strcpy(p, dimm->label);
+		p += strlen(p);
+
+		/*
+		 * get csrow/channel of the DIMM, in order to allow
+		 * incrementing the compat API counters
+		 */
+		edac_dbg(4, "%s csrows map: (%d,%d)\n",
+			mci->csbased ? "rank" : "dimm",
+			dimm->csrow, dimm->cschannel);
+		if (row == -1)
+			row = dimm->csrow;
+		else if (row >= 0 && row != dimm->csrow)
+			row = -2;
+
+		if (chan == -1)
+			chan = dimm->cschannel;
+		else if (chan >= 0 && chan != dimm->cschannel)
+			chan = -2;
 	}
 
 	if (!e->enable_per_layer_report) {
@@ -1234,20 +1250,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	if (p > e->location)
 		*(p - 1) = '\0';
 
-	/* Sanity-check driver-supplied grain value. */
-	if (WARN_ON_ONCE(!e->grain))
-		e->grain = 1;
-
-	grain_bits = fls_long(e->grain - 1);
-
-	/* Report the error via the trace interface */
-	if (IS_ENABLED(CONFIG_RAS))
-		trace_mc_event(type, e->msg, e->label, e->error_count,
-			       mci->mc_idx, e->top_layer, e->mid_layer,
-			       e->low_layer,
-			       (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
-			       grain_bits, e->syndrome, e->other_detail);
-
 	edac_raw_mc_handle_error(type, mci, e);
 }
 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 32d016f1ecd1..0367554e7437 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -557,14 +557,8 @@ static ssize_t dimmdev_ce_count_show(struct device *dev,
 {
 	struct dimm_info *dimm = to_dimm(dev);
 	u32 count;
-	int off;
-
-	off = EDAC_DIMM_OFF(dimm->mci->layers,
-			    dimm->mci->n_layers,
-			    dimm->location[0],
-			    dimm->location[1],
-			    dimm->location[2]);
-	count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][off];
+
+	count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx];
 	return sprintf(data, "%u\n", count);
 }
 
@@ -574,14 +568,8 @@ static ssize_t dimmdev_ue_count_show(struct device *dev,
 {
 	struct dimm_info *dimm = to_dimm(dev);
 	u32 count;
-	int off;
-
-	off = EDAC_DIMM_OFF(dimm->mci->layers,
-			    dimm->mci->n_layers,
-			    dimm->location[0],
-			    dimm->location[1],
-			    dimm->location[2]);
-	count = dimm->mci->ue_per_layer[dimm->mci->n_layers-1][off];
+
+	count = dimm->mci->ue_per_layer[dimm->mci->n_layers-1][dimm->idx];
 	return sprintf(data, "%u\n", count);
 }
 
@@ -633,8 +621,7 @@ static const struct device_type dimm_attr_type = {
 
 /* Create a DIMM object under specifed memory controller device */
 static int edac_create_dimm_object(struct mem_ctl_info *mci,
-				   struct dimm_info *dimm,
-				   int index)
+				   struct dimm_info *dimm)
 {
 	int err;
 	dimm->mci = mci;
@@ -644,9 +631,9 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci,
 
 	dimm->dev.parent = &mci->dev;
 	if (mci->csbased)
-		dev_set_name(&dimm->dev, "rank%d", index);
+		dev_set_name(&dimm->dev, "rank%d", dimm->idx);
 	else
-		dev_set_name(&dimm->dev, "dimm%d", index);
+		dev_set_name(&dimm->dev, "dimm%d", dimm->idx);
 	dev_set_drvdata(&dimm->dev, dimm);
 	pm_runtime_forbid(&mci->dev);
 
@@ -928,7 +915,8 @@ static const struct device_type mci_attr_type = {
 int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
 				 const struct attribute_group **groups)
 {
-	int i, err;
+	struct dimm_info *dimm;
+	int err;
 
 	/* get the /sys/devices/system/edac subsys reference */
 	mci->dev.type = &mci_attr_type;
@@ -952,13 +940,12 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
 	/*
 	 * Create the dimm/rank devices
 	 */
-	for (i = 0; i < mci->tot_dimms; i++) {
-		struct dimm_info *dimm = mci->dimms[i];
+	mci_for_each_dimm(mci, dimm) {
 		/* Only expose populated DIMMs */
 		if (!dimm->nr_pages)
 			continue;
 
-		err = edac_create_dimm_object(mci, dimm, i);
+		err = edac_create_dimm_object(mci, dimm);
 		if (err)
 			goto fail_unregister_dimm;
 	}
@@ -973,12 +960,9 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
 	return 0;
 
 fail_unregister_dimm:
-	for (i--; i >= 0; i--) {
-		struct dimm_info *dimm = mci->dimms[i];
-		if (!dimm->nr_pages)
-			continue;
-
-		device_unregister(&dimm->dev);
+	mci_for_each_dimm(mci, dimm) {
+		if (device_is_registered(&dimm->dev))
+			device_unregister(&dimm->dev);
 	}
 	device_unregister(&mci->dev);
 
@@ -990,7 +974,7 @@ fail_unregister_dimm:
  */
 void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
 {
-	int i;
+	struct dimm_info *dimm;
 
 	edac_dbg(0, "\n");
 
@@ -1001,8 +985,7 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
 	edac_delete_csrow_objects(mci);
 #endif
 
-	for (i = 0; i < mci->tot_dimms; i++) {
-		struct dimm_info *dimm = mci->dimms[i];
+	mci_for_each_dimm(mci, dimm) {
 		if (dimm->nr_pages == 0)
 			continue;
 		edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev));
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 0bb62857ffb2..b99080d8a10c 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -21,14 +21,22 @@ struct ghes_edac_pvt {
 	struct mem_ctl_info *mci;
 
 	/* Buffers for the error handling routine */
-	char detail_location[240];
-	char other_detail[160];
+	char other_detail[400];
 	char msg[80];
 };
 
-static atomic_t ghes_init = ATOMIC_INIT(0);
+static refcount_t ghes_refcount = REFCOUNT_INIT(0);
+
+/*
+ * Access to ghes_pvt must be protected by ghes_lock. The spinlock
+ * also provides the necessary (implicit) memory barrier for the SMP
+ * case to make the pointer visible on another CPU.
+ */
 static struct ghes_edac_pvt *ghes_pvt;
 
+/* GHES registration mutex */
+static DEFINE_MUTEX(ghes_reg_mutex);
+
 /*
  * Sync with other, potentially concurrent callers of
  * ghes_edac_report_mem_error(). We don't know what the
@@ -79,15 +87,15 @@ static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
 		(*num_dimm)++;
 }
 
-static int get_dimm_smbios_index(u16 handle)
+static int get_dimm_smbios_index(struct mem_ctl_info *mci, u16 handle)
 {
-	struct mem_ctl_info *mci = ghes_pvt->mci;
-	int i;
+	struct dimm_info *dimm;
 
-	for (i = 0; i < mci->tot_dimms; i++) {
-		if (mci->dimms[i]->smbios_handle == handle)
-			return i;
+	mci_for_each_dimm(mci, dimm) {
+		if (dimm->smbios_handle == handle)
+			return dimm->idx;
 	}
+
 	return -1;
 }
 
@@ -98,9 +106,7 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
 
 	if (dh->type == DMI_ENTRY_MEM_DEVICE) {
 		struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
-		struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
-						       mci->n_layers,
-						       dimm_fill->count, 0, 0);
+		struct dimm_info *dimm = edac_get_dimm(mci, dimm_fill->count, 0, 0);
 		u16 rdr_mask = BIT(7) | BIT(13);
 
 		if (entry->size == 0xffff) {
@@ -198,13 +204,9 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 	enum hw_event_mc_err_type type;
 	struct edac_raw_error_desc *e;
 	struct mem_ctl_info *mci;
-	struct ghes_edac_pvt *pvt = ghes_pvt;
+	struct ghes_edac_pvt *pvt;
 	unsigned long flags;
 	char *p;
-	u8 grain_bits;
-
-	if (!pvt)
-		return;
 
 	/*
 	 * We can do the locking below because GHES defers error processing
@@ -216,12 +218,17 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 
 	spin_lock_irqsave(&ghes_lock, flags);
 
+	pvt = ghes_pvt;
+	if (!pvt)
+		goto unlock;
+
 	mci = pvt->mci;
 	e = &mci->error_desc;
 
 	/* Cleans the error report buffer */
 	memset(e, 0, sizeof (*e));
 	e->error_count = 1;
+	e->grain = 1;
 	strcpy(e->label, "unknown label");
 	e->msg = pvt->msg;
 	e->other_detail = pvt->other_detail;
@@ -311,13 +318,13 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 
 	/* Error address */
 	if (mem_err->validation_bits & CPER_MEM_VALID_PA) {
-		e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
-		e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
+		e->page_frame_number = PHYS_PFN(mem_err->physical_addr);
+		e->offset_in_page = offset_in_page(mem_err->physical_addr);
 	}
 
 	/* Error grain */
 	if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK)
-		e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
+		e->grain = ~mem_err->physical_addr_mask + 1;
 
 	/* Memory error location, mapped on e->location */
 	p = e->location;
@@ -348,7 +355,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 			p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
 				     mem_err->mem_dev_handle);
 
-		index = get_dimm_smbios_index(mem_err->mem_dev_handle);
+		index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle);
 		if (index >= 0) {
 			e->top_layer = index;
 			e->enable_per_layer_report = true;
@@ -360,6 +367,8 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 
 	/* All other fields are mapped on e->other_detail */
 	p = pvt->other_detail;
+	p += snprintf(p, sizeof(pvt->other_detail),
+		"APEI location: %s ", e->location);
 	if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
 		u64 status = mem_err->error_status;
 
@@ -433,16 +442,9 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 	if (p > pvt->other_detail)
 		*(p - 1) = '\0';
 
-	/* Generate the trace event */
-	grain_bits = fls_long(e->grain);
-	snprintf(pvt->detail_location, sizeof(pvt->detail_location),
-		 "APEI location: %s %s", e->location, e->other_detail);
-	trace_mc_event(type, e->msg, e->label, e->error_count,
-		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
-		       (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
-		       grain_bits, e->syndrome, pvt->detail_location);
-
 	edac_raw_mc_handle_error(type, mci, e);
+
+unlock:
 	spin_unlock_irqrestore(&ghes_lock, flags);
 }
 
@@ -457,10 +459,12 @@ static struct acpi_platform_list plat_list[] = {
 int ghes_edac_register(struct ghes *ghes, struct device *dev)
 {
 	bool fake = false;
-	int rc, num_dimm = 0;
+	int rc = 0, num_dimm = 0;
 	struct mem_ctl_info *mci;
+	struct ghes_edac_pvt *pvt;
 	struct edac_mc_layer layers[1];
 	struct ghes_edac_dimm_fill dimm_fill;
+	unsigned long flags;
 	int idx = -1;
 
 	if (IS_ENABLED(CONFIG_X86)) {
@@ -472,11 +476,14 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
 		idx = 0;
 	}
 
+	/* finish another registration/unregistration instance first */
+	mutex_lock(&ghes_reg_mutex);
+
 	/*
 	 * We have only one logical memory controller to which all DIMMs belong.
 	 */
-	if (atomic_inc_return(&ghes_init) > 1)
-		return 0;
+	if (refcount_inc_not_zero(&ghes_refcount))
+		goto unlock;
 
 	/* Get the number of DIMMs */
 	dmi_walk(ghes_edac_count_dimms, &num_dimm);
@@ -494,12 +501,13 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
 	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_edac_pvt));
 	if (!mci) {
 		pr_info("Can't allocate memory for EDAC data\n");
-		return -ENOMEM;
+		rc = -ENOMEM;
+		goto unlock;
 	}
 
-	ghes_pvt	= mci->pvt_info;
-	ghes_pvt->ghes	= ghes;
-	ghes_pvt->mci	= mci;
+	pvt		= mci->pvt_info;
+	pvt->ghes	= ghes;
+	pvt->mci	= mci;
 
 	mci->pdev = dev;
 	mci->mtype_cap = MEM_FLAG_EMPTY;
@@ -527,8 +535,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
 		dimm_fill.mci = mci;
 		dmi_walk(ghes_edac_dmidecode, &dimm_fill);
 	} else {
-		struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
-						       mci->n_layers, 0, 0, 0);
+		struct dimm_info *dimm = edac_get_dimm(mci, 0, 0, 0);
 
 		dimm->nr_pages = 1;
 		dimm->grain = 128;
@@ -541,23 +548,48 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
 	if (rc < 0) {
 		pr_info("Can't register at EDAC core\n");
 		edac_mc_free(mci);
-		return -ENODEV;
+		rc = -ENODEV;
+		goto unlock;
 	}
-	return 0;
+
+	spin_lock_irqsave(&ghes_lock, flags);
+	ghes_pvt = pvt;
+	spin_unlock_irqrestore(&ghes_lock, flags);
+
+	/* only set on success */
+	refcount_set(&ghes_refcount, 1);
+
+unlock:
+	mutex_unlock(&ghes_reg_mutex);
+
+	return rc;
 }
 
 void ghes_edac_unregister(struct ghes *ghes)
 {
 	struct mem_ctl_info *mci;
+	unsigned long flags;
 
-	if (!ghes_pvt)
-		return;
+	mutex_lock(&ghes_reg_mutex);
 
-	if (atomic_dec_return(&ghes_init))
-		return;
+	if (!refcount_dec_and_test(&ghes_refcount))
+		goto unlock;
 
-	mci = ghes_pvt->mci;
+	/*
+	 * Wait for the irq handler being finished.
+	 */
+	spin_lock_irqsave(&ghes_lock, flags);
+	mci = ghes_pvt ? ghes_pvt->mci : NULL;
 	ghes_pvt = NULL;
-	edac_mc_del_mc(mci->pdev);
-	edac_mc_free(mci);
+	spin_unlock_irqrestore(&ghes_lock, flags);
+
+	if (!mci)
+		goto unlock;
+
+	mci = edac_mc_del_mc(mci->pdev);
+	if (mci)
+		edac_mc_free(mci);
+
+unlock:
+	mutex_unlock(&ghes_reg_mutex);
 }
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index c370d5457e6b..059eccf0582b 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -154,8 +154,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci)
 
 		ndimms = 0;
 		for (j = 0; j < I10NM_NUM_DIMMS; j++) {
-			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
-					     mci->n_layers, i, j, 0);
+			dimm = edac_get_dimm(mci, i, j, 0);
 			mtr = I10NM_GET_DIMMMTR(imc, i, j);
 			mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
 			edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c
index f564a4a8a4ae..5c1eea96230c 100644
--- a/drivers/edac/i3000_edac.c
+++ b/drivers/edac/i3000_edac.c
@@ -324,7 +324,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
 
 	pci_read_config_dword(pdev, I3000_MCHBAR, (u32 *) & mchbar);
 	mchbar &= I3000_MCHBAR_MASK;
-	window = ioremap_nocache(mchbar, I3000_MMR_WINDOW_SIZE);
+	window = ioremap(mchbar, I3000_MMR_WINDOW_SIZE);
 	if (!window) {
 		printk(KERN_ERR "i3000: cannot map mmio space at 0x%lx\n",
 			mchbar);
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index 299b441647cd..a8988db6d423 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -280,7 +280,7 @@ static void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
 		return NULL;
 	}
 
-	window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE);
+	window = ioremap(u.mchbar, I3200_MMR_WINDOW_SIZE);
 	if (!window)
 		printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
 			(unsigned long long)u.mchbar);
@@ -392,8 +392,7 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
 		unsigned long nr_pages;
 
 		for (j = 0; j < nr_channels; j++) {
-			struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
-							       mci->n_layers, i, j, 0);
+			struct dimm_info *dimm = edac_get_dimm(mci, i, j, 0);
 
 			nr_pages = drb_to_nr_pages(drbs, stacked, j, i);
 			if (nr_pages == 0)
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index 078a7351bf05..1a6f69c859ab 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -1275,9 +1275,8 @@ static int i5000_init_csrows(struct mem_ctl_info *mci)
 			if (!MTR_DIMMS_PRESENT(mtr))
 				continue;
 
-			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
-				       channel / MAX_BRANCHES,
-				       channel % MAX_BRANCHES, slot);
+			dimm = edac_get_dimm(mci, channel / MAX_BRANCHES,
+					     channel % MAX_BRANCHES, slot);
 
 			csrow_megs = pvt->dimm_info[slot][channel].megabytes;
 			dimm->grain = 8;
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 251f2b692785..191aa7c19ded 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -259,11 +259,6 @@ static inline u32 i5100_nrecmemb_ras(u32 a)
 	return a & ((1 << 16) - 1);
 }
 
-static inline u32 i5100_redmemb_ecc_locator(u32 a)
-{
-	return a & ((1 << 18) - 1);
-}
-
 static inline u32 i5100_recmema_merr(u32 a)
 {
 	return i5100_nrecmema_merr(a);
@@ -486,7 +481,6 @@ static void i5100_read_log(struct mem_ctl_info *mci, int chan,
 	u32 dw;
 	u32 dw2;
 	unsigned syndrome = 0;
-	unsigned ecc_loc = 0;
 	unsigned merr;
 	unsigned bank;
 	unsigned rank;
@@ -499,7 +493,6 @@ static void i5100_read_log(struct mem_ctl_info *mci, int chan,
 		pci_read_config_dword(pdev, I5100_REDMEMA, &dw2);
 		syndrome = dw2;
 		pci_read_config_dword(pdev, I5100_REDMEMB, &dw2);
-		ecc_loc = i5100_redmemb_ecc_locator(dw2);
 	}
 
 	if (i5100_validlog_recmemvalid(dw)) {
@@ -713,7 +706,6 @@ static int i5100_read_spd_byte(const struct mem_ctl_info *mci,
 {
 	struct i5100_priv *priv = mci->pvt_info;
 	u16 w;
-	unsigned long et;
 
 	pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
 	if (i5100_spddata_busy(w))
@@ -724,7 +716,6 @@ static int i5100_read_spd_byte(const struct mem_ctl_info *mci,
 						   0, 0));
 
 	/* wait up to 100ms */
-	et = jiffies + HZ / 10;
 	udelay(100);
 	while (1) {
 		pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
@@ -848,21 +839,17 @@ static void i5100_init_interleaving(struct pci_dev *pdev,
 
 static void i5100_init_csrows(struct mem_ctl_info *mci)
 {
-	int i;
 	struct i5100_priv *priv = mci->pvt_info;
+	struct dimm_info *dimm;
 
-	for (i = 0; i < mci->tot_dimms; i++) {
-		struct dimm_info *dimm;
-		const unsigned long npages = i5100_npages(mci, i);
-		const unsigned int chan = i5100_csrow_to_chan(mci, i);
-		const unsigned int rank = i5100_csrow_to_rank(mci, i);
+	mci_for_each_dimm(mci, dimm) {
+		const unsigned long npages = i5100_npages(mci, dimm->idx);
+		const unsigned int chan = i5100_csrow_to_chan(mci, dimm->idx);
+		const unsigned int rank = i5100_csrow_to_rank(mci, dimm->idx);
 
 		if (!npages)
 			continue;
 
-		dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
-			       chan, rank, 0);
-
 		dimm->nr_pages = npages;
 		dimm->grain = 32;
 		dimm->dtype = (priv->mtr[chan][rank].width == 4) ?
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 6f8bcdb9256a..f131c05ade9f 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -548,8 +548,8 @@ static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
 	ras = nrec_ras(info);
 	cas = nrec_cas(info);
 
-	edac_dbg(0, "\t\tDIMM= %d  Channels= %d,%d  (Branch= %d DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
-		 rank, channel, channel + 1, branch >> 1, bank,
+	edac_dbg(0, "\t\t%s DIMM= %d  Channels= %d,%d  (Branch= %d DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
+		 type, rank, channel, channel + 1, branch >> 1, bank,
 		 buf_id, rdwr_str(rdwr), ras, cas);
 
 	/* Only 1 bit will be on */
@@ -1054,8 +1054,6 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
 	u32 actual_tolm;
 	u16 limit;
 	int slot_row;
-	int maxch;
-	int maxdimmperch;
 	int way0, way1;
 
 	pvt = mci->pvt_info;
@@ -1065,9 +1063,6 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
 	pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32),
 			&pvt->u.ambase_top);
 
-	maxdimmperch = pvt->maxdimmperch;
-	maxch = pvt->maxch;
-
 	edac_dbg(2, "AMBASE= 0x%lx  MAXCH= %d  MAX-DIMM-Per-CH= %d\n",
 		 (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch);
 
@@ -1170,17 +1165,13 @@ static int i5400_init_dimms(struct mem_ctl_info *mci)
 {
 	struct i5400_pvt *pvt;
 	struct dimm_info *dimm;
-	int ndimms, channel_count;
-	int max_dimms;
+	int ndimms;
 	int mtr;
 	int size_mb;
 	int  channel, slot;
 
 	pvt = mci->pvt_info;
 
-	channel_count = pvt->maxch;
-	max_dimms = pvt->maxdimmperch;
-
 	ndimms = 0;
 
 	/*
@@ -1196,8 +1187,7 @@ static int i5400_init_dimms(struct mem_ctl_info *mci)
 			if (!MTR_DIMMS_PRESENT(mtr))
 				continue;
 
-			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
-				       channel / 2, channel % 2, slot);
+			dimm = edac_get_dimm(mci, channel / 2, channel % 2, slot);
 
 			size_mb =  pvt->dimm_info[slot][channel].megabytes;
 
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index 7bf910d54d11..2e9bbe56cde9 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -580,7 +580,7 @@ static void i7300_enable_error_reporting(struct mem_ctl_info *mci)
  * @ch: Channel number within the branch (0 or 1)
  * @branch: Branch number (0 or 1)
  * @dinfo: Pointer to DIMM info where dimm size is stored
- * @p_csrow: Pointer to the struct csrow_info that corresponds to that element
+ * @dimm: Pointer to the struct dimm_info that corresponds to that element
  */
 static int decode_mtr(struct i7300_pvt *pvt,
 		      int slot, int ch, int branch,
@@ -794,8 +794,7 @@ static int i7300_init_csrows(struct mem_ctl_info *mci)
 			for (ch = 0; ch < max_channel; ch++) {
 				int channel = to_channel(ch, branch);
 
-				dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
-					       mci->n_layers, branch, ch, slot);
+				dimm = edac_get_dimm(mci, branch, ch, slot);
 
 				dinfo = &pvt->dimm_info[slot][channel];
 
@@ -817,7 +816,7 @@ static int i7300_init_csrows(struct mem_ctl_info *mci)
 
 /**
  * decode_mir() - Decodes Memory Interleave Register (MIR) info
- * @int mir_no: number of the MIR register to decode
+ * @mir_no: number of the MIR register to decode
  * @mir: array with the MIR data cached on the driver
  */
 static void decode_mir(int mir_no, u16 mir[MAX_MIR])
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index a71cca6eeb33..b3135b208f9a 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -585,8 +585,7 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 			if (!DIMM_PRESENT(dimm_dod[j]))
 				continue;
 
-			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
-				       i, j, 0);
+			dimm = edac_get_dimm(mci, i, j, 0);
 			banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
 			ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
 			rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c
index 7c6a2d4d2360..6be99e0d850d 100644
--- a/drivers/edac/i82975x_edac.c
+++ b/drivers/edac/i82975x_edac.c
@@ -485,7 +485,7 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx)
 		goto fail0;
 	}
 	mchbar &= 0xffffc000;	/* bits 31:14 used for 16K window */
-	mch_window = ioremap_nocache(mchbar, 0x1000);
+	mch_window = ioremap(mchbar, 0x1000);
 	if (!mch_window) {
 		edac_dbg(3, "error ioremapping MCHBAR!\n");
 		goto fail0;
diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
index d26300f9cb07..d68346a8e141 100644
--- a/drivers/edac/ie31200_edac.c
+++ b/drivers/edac/ie31200_edac.c
@@ -357,7 +357,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
 		return NULL;
 	}
 
-	window = ioremap_nocache(u.mchbar, IE31200_MMR_WINDOW_SIZE);
+	window = ioremap(u.mchbar, IE31200_MMR_WINDOW_SIZE);
 	if (!window)
 		ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n",
 			       (unsigned long long)u.mchbar);
@@ -490,9 +490,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 
 			if (dimm_info[j][i].dual_rank) {
 				nr_pages = nr_pages / 2;
-				dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
-						     mci->n_layers, (i * 2) + 1,
-						     j, 0);
+				dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0);
 				dimm->nr_pages = nr_pages;
 				edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
 				dimm->grain = 8; /* just a guess */
@@ -503,8 +501,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 				dimm->dtype = DEV_UNKNOWN;
 				dimm->edac_mode = EDAC_UNKNOWN;
 			}
-			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
-					     mci->n_layers, i * 2, j, 0);
+			dimm = edac_get_dimm(mci, i * 2, j, 0);
 			dimm->nr_pages = nr_pages;
 			edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
 			dimm->grain = 8; /* same guess */
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ea622c6f3a39..ea980c556f2e 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -6,7 +6,7 @@
 
 #include "mce_amd.h"
 
-static struct amd_decoder_ops *fam_ops;
+static struct amd_decoder_ops fam_ops;
 
 static u8 xec_mask	 = 0xf;
 
@@ -175,6 +175,33 @@ static const char * const smca_ls_mce_desc[] = {
 	"L2 Fill Data error",
 };
 
+static const char * const smca_ls2_mce_desc[] = {
+	"An ECC error was detected on a data cache read by a probe or victimization",
+	"An ECC error or L2 poison was detected on a data cache read by a load",
+	"An ECC error was detected on a data cache read-modify-write by a store",
+	"An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization",
+	"An ECC error or poison bit mismatch was detected on a tag read by a load",
+	"An ECC error or poison bit mismatch was detected on a tag read by a store",
+	"An ECC error was detected on an EMEM read by a load",
+	"An ECC error was detected on an EMEM read-modify-write by a store",
+	"A parity error was detected in an L1 TLB entry by any access",
+	"A parity error was detected in an L2 TLB entry by any access",
+	"A parity error was detected in a PWC entry by any access",
+	"A parity error was detected in an STQ entry by any access",
+	"A parity error was detected in an LDQ entry by any access",
+	"A parity error was detected in a MAB entry by any access",
+	"A parity error was detected in an SCB entry state field by any access",
+	"A parity error was detected in an SCB entry address field by any access",
+	"A parity error was detected in an SCB entry data field by any access",
+	"A parity error was detected in a WCB entry by any access",
+	"A poisoned line was detected in an SCB entry by any access",
+	"A SystemReadDataError error was reported on read data returned from L2 for a load",
+	"A SystemReadDataError error was reported on read data returned from L2 for an SCB store",
+	"A SystemReadDataError error was reported on read data returned from L2 for a WCB store",
+	"A hardware assertion error was reported",
+	"A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access",
+};
+
 static const char * const smca_if_mce_desc[] = {
 	"Op Cache Microtag Probe Port Parity Error",
 	"IC Microtag or Full Tag Multi-hit Error",
@@ -378,6 +405,7 @@ struct smca_mce_desc {
 
 static struct smca_mce_desc smca_mce_descs[] = {
 	[SMCA_LS]	= { smca_ls_mce_desc,	ARRAY_SIZE(smca_ls_mce_desc)	},
+	[SMCA_LS_V2]	= { smca_ls2_mce_desc,	ARRAY_SIZE(smca_ls2_mce_desc)	},
 	[SMCA_IF]	= { smca_if_mce_desc,	ARRAY_SIZE(smca_if_mce_desc)	},
 	[SMCA_L2_CACHE]	= { smca_l2_mce_desc,	ARRAY_SIZE(smca_l2_mce_desc)	},
 	[SMCA_DE]	= { smca_de_mce_desc,	ARRAY_SIZE(smca_de_mce_desc)	},
@@ -555,7 +583,7 @@ static void decode_mc0_mce(struct mce *m)
 					    : (xec ? "multimatch" : "parity")));
 			return;
 		}
-	} else if (fam_ops->mc0_mce(ec, xec))
+	} else if (fam_ops.mc0_mce(ec, xec))
 		;
 	else
 		pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
@@ -669,7 +697,7 @@ static void decode_mc1_mce(struct mce *m)
 			pr_cont("Hardware Assert.\n");
 		else
 			goto wrong_mc1_mce;
-	} else if (fam_ops->mc1_mce(ec, xec))
+	} else if (fam_ops.mc1_mce(ec, xec))
 		;
 	else
 		goto wrong_mc1_mce;
@@ -803,7 +831,7 @@ static void decode_mc2_mce(struct mce *m)
 
 	pr_emerg(HW_ERR "MC2 Error: ");
 
-	if (!fam_ops->mc2_mce(ec, xec))
+	if (!fam_ops.mc2_mce(ec, xec))
 		pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
 }
 
@@ -1102,7 +1130,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 	if (m->tsc)
 		pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
 
-	if (!fam_ops)
+	/* Doesn't matter which member to test. */
+	if (!fam_ops.mc0_mce)
 		goto err_code;
 
 	switch (m->bank) {
@@ -1157,80 +1186,73 @@ static int __init mce_amd_init(void)
 	    c->x86_vendor != X86_VENDOR_HYGON)
 		return -ENODEV;
 
-	fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
-	if (!fam_ops)
-		return -ENOMEM;
+	if (boot_cpu_has(X86_FEATURE_SMCA)) {
+		xec_mask = 0x3f;
+		goto out;
+	}
 
 	switch (c->x86) {
 	case 0xf:
-		fam_ops->mc0_mce = k8_mc0_mce;
-		fam_ops->mc1_mce = k8_mc1_mce;
-		fam_ops->mc2_mce = k8_mc2_mce;
+		fam_ops.mc0_mce = k8_mc0_mce;
+		fam_ops.mc1_mce = k8_mc1_mce;
+		fam_ops.mc2_mce = k8_mc2_mce;
 		break;
 
 	case 0x10:
-		fam_ops->mc0_mce = f10h_mc0_mce;
-		fam_ops->mc1_mce = k8_mc1_mce;
-		fam_ops->mc2_mce = k8_mc2_mce;
+		fam_ops.mc0_mce = f10h_mc0_mce;
+		fam_ops.mc1_mce = k8_mc1_mce;
+		fam_ops.mc2_mce = k8_mc2_mce;
 		break;
 
 	case 0x11:
-		fam_ops->mc0_mce = k8_mc0_mce;
-		fam_ops->mc1_mce = k8_mc1_mce;
-		fam_ops->mc2_mce = k8_mc2_mce;
+		fam_ops.mc0_mce = k8_mc0_mce;
+		fam_ops.mc1_mce = k8_mc1_mce;
+		fam_ops.mc2_mce = k8_mc2_mce;
 		break;
 
 	case 0x12:
-		fam_ops->mc0_mce = f12h_mc0_mce;
-		fam_ops->mc1_mce = k8_mc1_mce;
-		fam_ops->mc2_mce = k8_mc2_mce;
+		fam_ops.mc0_mce = f12h_mc0_mce;
+		fam_ops.mc1_mce = k8_mc1_mce;
+		fam_ops.mc2_mce = k8_mc2_mce;
 		break;
 
 	case 0x14:
-		fam_ops->mc0_mce = cat_mc0_mce;
-		fam_ops->mc1_mce = cat_mc1_mce;
-		fam_ops->mc2_mce = k8_mc2_mce;
+		fam_ops.mc0_mce = cat_mc0_mce;
+		fam_ops.mc1_mce = cat_mc1_mce;
+		fam_ops.mc2_mce = k8_mc2_mce;
 		break;
 
 	case 0x15:
 		xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
 
-		fam_ops->mc0_mce = f15h_mc0_mce;
-		fam_ops->mc1_mce = f15h_mc1_mce;
-		fam_ops->mc2_mce = f15h_mc2_mce;
+		fam_ops.mc0_mce = f15h_mc0_mce;
+		fam_ops.mc1_mce = f15h_mc1_mce;
+		fam_ops.mc2_mce = f15h_mc2_mce;
 		break;
 
 	case 0x16:
 		xec_mask = 0x1f;
-		fam_ops->mc0_mce = cat_mc0_mce;
-		fam_ops->mc1_mce = cat_mc1_mce;
-		fam_ops->mc2_mce = f16h_mc2_mce;
+		fam_ops.mc0_mce = cat_mc0_mce;
+		fam_ops.mc1_mce = cat_mc1_mce;
+		fam_ops.mc2_mce = f16h_mc2_mce;
 		break;
 
 	case 0x17:
 	case 0x18:
-		xec_mask = 0x3f;
-		if (!boot_cpu_has(X86_FEATURE_SMCA)) {
-			printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
-			goto err_out;
-		}
-		break;
+		pr_warn("Decoding supported only on Scalable MCA processors.\n");
+		return -EINVAL;
 
 	default:
 		printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
-		goto err_out;
+		return -EINVAL;
 	}
 
+out:
 	pr_info("MCE: In-kernel MCE decoding enabled.\n");
 
 	mce_register_decode_chain(&amd_mce_dec_nb);
 
 	return 0;
-
-err_out:
-	kfree(fam_ops);
-	fam_ops = NULL;
-	return -EINVAL;
 }
 early_initcall(mce_amd_init);
 
@@ -1238,7 +1260,6 @@ early_initcall(mce_amd_init);
 static void __exit mce_amd_exit(void)
 {
 	mce_unregister_decode_chain(&amd_mce_dec_nb);
-	kfree(fam_ops);
 }
 
 MODULE_DESCRIPTION("AMD MCE decoder");
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index b1193be1ef1d..933f7722b893 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -1231,7 +1231,7 @@ static void apl_get_dimm_config(struct mem_ctl_info *mci)
 		if (!(chan_mask & BIT(i)))
 			continue;
 
-		dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, 0, 0);
+		dimm = edac_get_dimm(mci, i, 0, 0);
 		if (!dimm) {
 			edac_dbg(0, "No allocated DIMM for channel %d\n", i);
 			continue;
@@ -1311,7 +1311,7 @@ static void dnv_get_dimm_config(struct mem_ctl_info *mci)
 			if (!ranks_of_dimm[j])
 				continue;
 
-			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0);
+			dimm = edac_get_dimm(mci, i, j, 0);
 			if (!dimm) {
 				edac_dbg(0, "No allocated DIMM for channel %d DIMM %d\n", i, j);
 				continue;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index f743502ca9b7..4957e8ee1879 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -254,18 +254,20 @@ static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
  * FIXME: Implement the error count reads directly
  */
 
-static const u32 correrrcnt[] = {
-	0x104, 0x108, 0x10c, 0x110,
-};
-
 #define RANK_ODD_OV(reg)		GET_BITFIELD(reg, 31, 31)
 #define RANK_ODD_ERR_CNT(reg)		GET_BITFIELD(reg, 16, 30)
 #define RANK_EVEN_OV(reg)		GET_BITFIELD(reg, 15, 15)
 #define RANK_EVEN_ERR_CNT(reg)		GET_BITFIELD(reg,  0, 14)
 
+#if 0 /* Currently unused*/
+static const u32 correrrcnt[] = {
+	0x104, 0x108, 0x10c, 0x110,
+};
+
 static const u32 correrrthrsld[] = {
 	0x11c, 0x120, 0x124, 0x128,
 };
+#endif
 
 #define RANK_ODD_ERR_THRSLD(reg)	GET_BITFIELD(reg, 16, 30)
 #define RANK_EVEN_ERR_THRSLD(reg)	GET_BITFIELD(reg,  0, 14)
@@ -1340,7 +1342,7 @@ static void knl_show_mc_route(u32 reg, char *s)
  */
 static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes)
 {
-	u64 sad_base, sad_size, sad_limit = 0;
+	u64 sad_base, sad_limit = 0;
 	u64 tad_base, tad_size, tad_limit, tad_deadspace, tad_livespace;
 	int sad_rule = 0;
 	int tad_rule = 0;
@@ -1427,7 +1429,6 @@ static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes)
 		edram_only = KNL_EDRAM_ONLY(dram_rule);
 
 		sad_limit = pvt->info.sad_limit(dram_rule)+1;
-		sad_size = sad_limit - sad_base;
 
 		pci_read_config_dword(pvt->pci_sad0,
 			pvt->info.interleave_list[sad_rule], &interleave_reg);
@@ -1620,7 +1621,7 @@ static int __populate_dimms(struct mem_ctl_info *mci,
 		}
 
 		for (j = 0; j < max_dimms_per_channel; j++) {
-			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0);
+			dimm = edac_get_dimm(mci, i, j, 0);
 			if (pvt->info.type == KNIGHTS_LANDING) {
 				pci_read_config_dword(pvt->knl.pci_channel[i],
 					knl_mtr_reg, &mtr);
@@ -2952,7 +2953,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
 	struct mem_ctl_info *new_mci;
 	struct sbridge_pvt *pvt = mci->pvt_info;
 	enum hw_event_mc_err_type tp_event;
-	char *type, *optype, msg[256];
+	char *optype, msg[256];
 	bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
 	bool overflow = GET_BITFIELD(m->status, 62, 62);
 	bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -2981,14 +2982,11 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
 	if (uncorrected_error) {
 		core_err_cnt = 1;
 		if (ripv) {
-			type = "FATAL";
 			tp_event = HW_EVENT_ERR_FATAL;
 		} else {
-			type = "NON_FATAL";
 			tp_event = HW_EVENT_ERR_UNCORRECTED;
 		}
 	} else {
-		type = "CORRECTED";
 		tp_event = HW_EVENT_ERR_CORRECTED;
 	}
 
@@ -3200,7 +3198,6 @@ static struct notifier_block sbridge_mce_dec = {
 static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
 {
 	struct mem_ctl_info *mci = sbridge_dev->mci;
-	struct sbridge_pvt *pvt;
 
 	if (unlikely(!mci || !mci->pvt_info)) {
 		edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev);
@@ -3209,8 +3206,6 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
 		return;
 	}
 
-	pvt = mci->pvt_info;
-
 	edac_dbg(0, "MC: mci = %p, dev = %p\n",
 		 mci, &sbridge_dev->pdev[0]->dev);
 
diff --git a/drivers/edac/sifive_edac.c b/drivers/edac/sifive_edac.c
index 413cdb4a591d..3a3dcb14ed99 100644
--- a/drivers/edac/sifive_edac.c
+++ b/drivers/edac/sifive_edac.c
@@ -10,7 +10,7 @@
 #include <linux/edac.h>
 #include <linux/platform_device.h>
 #include "edac_module.h"
-#include <asm/sifive_l2_cache.h>
+#include <soc/sifive/sifive_l2_cache.h>
 
 #define DRVNAME "sifive_edac"
 
@@ -54,8 +54,8 @@ static int ecc_register(struct platform_device *pdev)
 	p->dci = edac_device_alloc_ctl_info(0, "sifive_ecc", 1, "sifive_ecc",
 					    1, 1, NULL, 0,
 					    edac_device_alloc_index());
-	if (IS_ERR(p->dci))
-		return PTR_ERR(p->dci);
+	if (!p->dci)
+		return -ENOMEM;
 
 	p->dci->dev = &pdev->dev;
 	p->dci->mod_name = "Sifive ECC Manager";
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index 0fcf3785e8f3..83545b4facb7 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -46,7 +46,8 @@ static struct skx_dev *get_skx_dev(struct pci_bus *bus, u8 idx)
 }
 
 enum munittype {
-	CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD
+	CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD,
+	ERRCHAN0, ERRCHAN1, ERRCHAN2,
 };
 
 struct munit {
@@ -68,6 +69,9 @@ static const struct munit skx_all_munits[] = {
 	{ 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 },
 	{ 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 },
 	{ 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 },
+	{ 0x2043, { PCI_DEVFN(10, 3), PCI_DEVFN(12, 3) }, 2, 2, ERRCHAN0 },
+	{ 0x2047, { PCI_DEVFN(10, 7), PCI_DEVFN(12, 7) }, 2, 2, ERRCHAN1 },
+	{ 0x204b, { PCI_DEVFN(11, 3), PCI_DEVFN(13, 3) }, 2, 2, ERRCHAN2 },
 	{ 0x208e, { }, 1, 0, SAD },
 	{ }
 };
@@ -104,10 +108,18 @@ static int get_all_munits(const struct munit *m)
 		}
 
 		switch (m->mtype) {
-		case CHAN0: case CHAN1: case CHAN2:
+		case CHAN0:
+		case CHAN1:
+		case CHAN2:
 			pci_dev_get(pdev);
 			d->imc[i].chan[m->mtype].cdev = pdev;
 			break;
+		case ERRCHAN0:
+		case ERRCHAN1:
+		case ERRCHAN2:
+			pci_dev_get(pdev);
+			d->imc[i].chan[m->mtype - ERRCHAN0].edev = pdev;
+			break;
 		case SAD_ALL:
 			pci_dev_get(pdev);
 			d->sad_all = pdev;
@@ -177,8 +189,7 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci)
 		pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
 		pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg);
 		for (j = 0; j < SKX_NUM_DIMMS; j++) {
-			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
-					     mci->n_layers, i, j, 0);
+			dimm = edac_get_dimm(mci, i, j, 0);
 			pci_read_config_dword(imc->chan[i].cdev,
 					      0x80 + 4 * j, &mtr);
 			if (IS_DIMM_PRESENT(mtr)) {
@@ -216,6 +227,39 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci)
 #define SKX_ILV_REMOTE(tgt)	(((tgt) & 8) == 0)
 #define SKX_ILV_TARGET(tgt)	((tgt) & 7)
 
+static void skx_show_retry_rd_err_log(struct decoded_addr *res,
+				      char *msg, int len)
+{
+	u32 log0, log1, log2, log3, log4;
+	u32 corr0, corr1, corr2, corr3;
+	struct pci_dev *edev;
+	int n;
+
+	edev = res->dev->imc[res->imc].chan[res->channel].edev;
+
+	pci_read_config_dword(edev, 0x154, &log0);
+	pci_read_config_dword(edev, 0x148, &log1);
+	pci_read_config_dword(edev, 0x150, &log2);
+	pci_read_config_dword(edev, 0x15c, &log3);
+	pci_read_config_dword(edev, 0x114, &log4);
+
+	n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x]",
+		     log0, log1, log2, log3, log4);
+
+	pci_read_config_dword(edev, 0x104, &corr0);
+	pci_read_config_dword(edev, 0x108, &corr1);
+	pci_read_config_dword(edev, 0x10c, &corr2);
+	pci_read_config_dword(edev, 0x110, &corr3);
+
+	if (len - n > 0)
+		snprintf(msg + n, len - n,
+			 " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
+			 corr0 & 0xffff, corr0 >> 16,
+			 corr1 & 0xffff, corr1 >> 16,
+			 corr2 & 0xffff, corr2 >> 16,
+			 corr3 & 0xffff, corr3 >> 16);
+}
+
 static bool skx_sad_decode(struct decoded_addr *res)
 {
 	struct skx_dev *d = list_first_entry(skx_edac_list, typeof(*d), list);
@@ -659,7 +703,7 @@ static int __init skx_init(void)
 		}
 	}
 
-	skx_set_decode(skx_decode);
+	skx_set_decode(skx_decode, skx_show_retry_rd_err_log);
 
 	if (nvdimm_count && skx_adxl_get() == -ENODEV)
 		skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n");
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index d8ff63d91b86..99bbaf629b8d 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -37,6 +37,7 @@ static char *adxl_msg;
 
 static char skx_msg[MSG_SIZE];
 static skx_decode_f skx_decode;
+static skx_show_retry_log_f skx_show_retry_rd_err_log;
 static u64 skx_tolm, skx_tohm;
 static LIST_HEAD(dev_edac_list);
 
@@ -100,6 +101,7 @@ void __exit skx_adxl_put(void)
 
 static bool skx_adxl_decode(struct decoded_addr *res)
 {
+	struct skx_dev *d;
 	int i, len = 0;
 
 	if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
@@ -118,6 +120,24 @@ static bool skx_adxl_decode(struct decoded_addr *res)
 	res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
 	res->dimm    = (int)adxl_values[component_indices[INDEX_DIMM]];
 
+	if (res->imc > NUM_IMC - 1) {
+		skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
+		return false;
+	}
+
+	list_for_each_entry(d, &dev_edac_list, list) {
+		if (d->imc[0].src_id == res->socket) {
+			res->dev = d;
+			break;
+		}
+	}
+
+	if (!res->dev) {
+		skx_printk(KERN_ERR, "No device for src_id %d imc %d\n",
+			   res->socket, res->imc);
+		return false;
+	}
+
 	for (i = 0; i < adxl_component_count; i++) {
 		if (adxl_values[i] == ~0x0ull)
 			continue;
@@ -131,9 +151,10 @@ static bool skx_adxl_decode(struct decoded_addr *res)
 	return true;
 }
 
-void skx_set_decode(skx_decode_f decode)
+void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
 {
 	skx_decode = decode;
+	skx_show_retry_rd_err_log = show_retry_log;
 }
 
 int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
@@ -235,7 +256,7 @@ int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm)
 
 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, NULL);
 	if (!pdev) {
-		skx_printk(KERN_ERR, "Can't get tolm/tohm\n");
+		edac_dbg(2, "Can't get tolm/tohm\n");
 		return -ENODEV;
 	}
 
@@ -452,34 +473,17 @@ static void skx_unregister_mci(struct skx_imc *imc)
 	edac_mc_free(mci);
 }
 
-static struct mem_ctl_info *get_mci(int src_id, int lmc)
-{
-	struct skx_dev *d;
-
-	if (lmc > NUM_IMC - 1) {
-		skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
-		return NULL;
-	}
-
-	list_for_each_entry(d, &dev_edac_list, list) {
-		if (d->imc[0].src_id == src_id)
-			return d->imc[lmc].mci;
-	}
-
-	skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
-	return NULL;
-}
-
 static void skx_mce_output_error(struct mem_ctl_info *mci,
 				 const struct mce *m,
 				 struct decoded_addr *res)
 {
 	enum hw_event_mc_err_type tp_event;
-	char *type, *optype;
+	char *optype;
 	bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
 	bool overflow = GET_BITFIELD(m->status, 62, 62);
 	bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
 	bool recoverable;
+	int len;
 	u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
 	u32 mscod = GET_BITFIELD(m->status, 16, 31);
 	u32 errcode = GET_BITFIELD(m->status, 0, 15);
@@ -490,14 +494,11 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
 	if (uncorrected_error) {
 		core_err_cnt = 1;
 		if (ripv) {
-			type = "FATAL";
 			tp_event = HW_EVENT_ERR_FATAL;
 		} else {
-			type = "NON_FATAL";
 			tp_event = HW_EVENT_ERR_UNCORRECTED;
 		}
 	} else {
-		type = "CORRECTED";
 		tp_event = HW_EVENT_ERR_CORRECTED;
 	}
 
@@ -539,12 +540,12 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
 		}
 	}
 	if (adxl_component_count) {
-		snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
+		len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
 			 overflow ? " OVERFLOW" : "",
 			 (uncorrected_error && recoverable) ? " recoverable" : "",
 			 mscod, errcode, adxl_msg);
 	} else {
-		snprintf(skx_msg, MSG_SIZE,
+		len = snprintf(skx_msg, MSG_SIZE,
 			 "%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x",
 			 overflow ? " OVERFLOW" : "",
 			 (uncorrected_error && recoverable) ? " recoverable" : "",
@@ -553,6 +554,9 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
 			 res->bank_group, res->bank_address, res->row, res->column);
 	}
 
+	if (skx_show_retry_rd_err_log)
+		skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len);
+
 	edac_dbg(0, "%s\n", skx_msg);
 
 	/* Call the helper to output message */
@@ -583,15 +587,12 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 	if (adxl_component_count) {
 		if (!skx_adxl_decode(&res))
 			return NOTIFY_DONE;
-
-		mci = get_mci(res.socket, res.imc);
-	} else {
-		if (!skx_decode || !skx_decode(&res))
-			return NOTIFY_DONE;
-
-		mci = res.dev->imc[res.imc].mci;
+	} else if (!skx_decode || !skx_decode(&res)) {
+		return NOTIFY_DONE;
 	}
 
+	mci = res.dev->imc[res.imc].mci;
+
 	if (!mci)
 		return NOTIFY_DONE;
 
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 08cc971a50ea..60d1ea669afd 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -64,6 +64,7 @@ struct skx_dev {
 		u8 src_id, node_id;
 		struct skx_channel {
 			struct pci_dev	*cdev;
+			struct pci_dev	*edev;
 			struct skx_dimm {
 				u8 close_pg;
 				u8 bank_xor_enable;
@@ -113,10 +114,11 @@ struct decoded_addr {
 
 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci);
 typedef bool (*skx_decode_f)(struct decoded_addr *res);
+typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len);
 
 int __init skx_adxl_get(void);
 void __exit skx_adxl_put(void);
-void skx_set_decode(skx_decode_f decode);
+void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
 
 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
 int skx_get_node_id(struct skx_dev *d, u8 *id);
diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c
index 6ac26d1b929f..8be3e89a510e 100644
--- a/drivers/edac/ti_edac.c
+++ b/drivers/edac/ti_edac.c
@@ -135,7 +135,7 @@ static void ti_edac_setup_dimm(struct mem_ctl_info *mci, u32 type)
 	u32 val;
 	u32 memsize;
 
-	dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, 0, 0, 0);
+	dimm = edac_get_dimm(mci, 0, 0, 0);
 
 	val = ti_edac_readl(edac, EMIF_SDRAM_CONFIG);
 
diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c
index cc779f3f9e2d..a65e2f78a402 100644
--- a/drivers/edac/x38_edac.c
+++ b/drivers/edac/x38_edac.c
@@ -266,7 +266,7 @@ static void __iomem *x38_map_mchbar(struct pci_dev *pdev)
 		return NULL;
 	}
 
-	window = ioremap_nocache(u.mchbar, X38_MMR_WINDOW_SIZE);
+	window = ioremap(u.mchbar, X38_MMR_WINDOW_SIZE);
 	if (!window)
 		printk(KERN_ERR "x38: cannot map mmio space at 0x%llx\n",
 			(unsigned long long)u.mchbar);
diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index e970134c95fa..7401733db08b 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -77,8 +77,6 @@ struct arizona_extcon_info {
 	const struct arizona_micd_range *micd_ranges;
 	int num_micd_ranges;
 
-	int micd_timeout;
-
 	bool micd_reva;
 	bool micd_clamp;
 
@@ -310,9 +308,13 @@ static void arizona_start_mic(struct arizona_extcon_info *info)
 	}
 
 	if (info->micd_reva) {
-		regmap_write(arizona->regmap, 0x80, 0x3);
-		regmap_write(arizona->regmap, 0x294, 0);
-		regmap_write(arizona->regmap, 0x80, 0x0);
+		const struct reg_sequence reva[] = {
+			{ 0x80,  0x3 },
+			{ 0x294, 0x0 },
+			{ 0x80,  0x0 },
+		};
+
+		regmap_multi_reg_write(arizona->regmap, reva, ARRAY_SIZE(reva));
 	}
 
 	if (info->detecting && arizona->pdata.micd_software_compare)
@@ -361,9 +363,13 @@ static void arizona_stop_mic(struct arizona_extcon_info *info)
 	snd_soc_dapm_sync(dapm);
 
 	if (info->micd_reva) {
-		regmap_write(arizona->regmap, 0x80, 0x3);
-		regmap_write(arizona->regmap, 0x294, 2);
-		regmap_write(arizona->regmap, 0x80, 0x0);
+		const struct reg_sequence reva[] = {
+			{ 0x80,  0x3 },
+			{ 0x294, 0x2 },
+			{ 0x80,  0x0 },
+		};
+
+		regmap_multi_reg_write(arizona->regmap, reva, ARRAY_SIZE(reva));
 	}
 
 	ret = regulator_allow_bypass(info->micvdd, true);
@@ -527,67 +533,65 @@ static int arizona_hpdet_do_id(struct arizona_extcon_info *info, int *reading,
 	struct arizona *arizona = info->arizona;
 	int id_gpio = arizona->pdata.hpdet_id_gpio;
 
+	if (!arizona->pdata.hpdet_acc_id)
+		return 0;
+
 	/*
 	 * If we're using HPDET for accessory identification we need
 	 * to take multiple measurements, step through them in sequence.
 	 */
-	if (arizona->pdata.hpdet_acc_id) {
-		info->hpdet_res[info->num_hpdet_res++] = *reading;
+	info->hpdet_res[info->num_hpdet_res++] = *reading;
 
-		/* Only check the mic directly if we didn't already ID it */
-		if (id_gpio && info->num_hpdet_res == 1) {
-			dev_dbg(arizona->dev, "Measuring mic\n");
+	/* Only check the mic directly if we didn't already ID it */
+	if (id_gpio && info->num_hpdet_res == 1) {
+		dev_dbg(arizona->dev, "Measuring mic\n");
 
-			regmap_update_bits(arizona->regmap,
-					   ARIZONA_ACCESSORY_DETECT_MODE_1,
-					   ARIZONA_ACCDET_MODE_MASK |
-					   ARIZONA_ACCDET_SRC,
-					   ARIZONA_ACCDET_MODE_HPR |
-					   info->micd_modes[0].src);
+		regmap_update_bits(arizona->regmap,
+				   ARIZONA_ACCESSORY_DETECT_MODE_1,
+				   ARIZONA_ACCDET_MODE_MASK |
+				   ARIZONA_ACCDET_SRC,
+				   ARIZONA_ACCDET_MODE_HPR |
+				   info->micd_modes[0].src);
 
-			gpio_set_value_cansleep(id_gpio, 1);
+		gpio_set_value_cansleep(id_gpio, 1);
 
-			regmap_update_bits(arizona->regmap,
-					   ARIZONA_HEADPHONE_DETECT_1,
-					   ARIZONA_HP_POLL, ARIZONA_HP_POLL);
-			return -EAGAIN;
-		}
-
-		/* OK, got both.  Now, compare... */
-		dev_dbg(arizona->dev, "HPDET measured %d %d\n",
-			info->hpdet_res[0], info->hpdet_res[1]);
+		regmap_update_bits(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1,
+				   ARIZONA_HP_POLL, ARIZONA_HP_POLL);
+		return -EAGAIN;
+	}
 
-		/* Take the headphone impedance for the main report */
-		*reading = info->hpdet_res[0];
+	/* OK, got both.  Now, compare... */
+	dev_dbg(arizona->dev, "HPDET measured %d %d\n",
+		info->hpdet_res[0], info->hpdet_res[1]);
 
-		/* Sometimes we get false readings due to slow insert */
-		if (*reading >= ARIZONA_HPDET_MAX && !info->hpdet_retried) {
-			dev_dbg(arizona->dev, "Retrying high impedance\n");
-			info->num_hpdet_res = 0;
-			info->hpdet_retried = true;
-			arizona_start_hpdet_acc_id(info);
-			pm_runtime_put(info->dev);
-			return -EAGAIN;
-		}
+	/* Take the headphone impedance for the main report */
+	*reading = info->hpdet_res[0];
 
-		/*
-		 * If we measure the mic as high impedance
-		 */
-		if (!id_gpio || info->hpdet_res[1] > 50) {
-			dev_dbg(arizona->dev, "Detected mic\n");
-			*mic = true;
-			info->detecting = true;
-		} else {
-			dev_dbg(arizona->dev, "Detected headphone\n");
-		}
+	/* Sometimes we get false readings due to slow insert */
+	if (*reading >= ARIZONA_HPDET_MAX && !info->hpdet_retried) {
+		dev_dbg(arizona->dev, "Retrying high impedance\n");
+		info->num_hpdet_res = 0;
+		info->hpdet_retried = true;
+		arizona_start_hpdet_acc_id(info);
+		pm_runtime_put(info->dev);
+		return -EAGAIN;
+	}
 
-		/* Make sure everything is reset back to the real polarity */
-		regmap_update_bits(arizona->regmap,
-				   ARIZONA_ACCESSORY_DETECT_MODE_1,
-				   ARIZONA_ACCDET_SRC,
-				   info->micd_modes[0].src);
+	/*
+	 * If we measure the mic as high impedance
+	 */
+	if (!id_gpio || info->hpdet_res[1] > 50) {
+		dev_dbg(arizona->dev, "Detected mic\n");
+		*mic = true;
+		info->detecting = true;
+	} else {
+		dev_dbg(arizona->dev, "Detected headphone\n");
 	}
 
+	/* Make sure everything is reset back to the real polarity */
+	regmap_update_bits(arizona->regmap, ARIZONA_ACCESSORY_DETECT_MODE_1,
+			   ARIZONA_ACCDET_SRC, info->micd_modes[0].src);
+
 	return 0;
 }
 
@@ -662,11 +666,6 @@ done:
 	if (id_gpio)
 		gpio_set_value_cansleep(id_gpio, 0);
 
-	/* Revert back to MICDET mode */
-	regmap_update_bits(arizona->regmap,
-			   ARIZONA_ACCESSORY_DETECT_MODE_1,
-			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
-
 	/* If we have a mic then reenable MICDET */
 	if (mic || info->mic)
 		arizona_start_mic(info);
@@ -699,8 +698,7 @@ static void arizona_identify_headphone(struct arizona_extcon_info *info)
 
 	info->hpdet_active = true;
 
-	if (info->mic)
-		arizona_stop_mic(info);
+	arizona_stop_mic(info);
 
 	arizona_extcon_hp_clamp(info, true);
 
@@ -724,8 +722,8 @@ static void arizona_identify_headphone(struct arizona_extcon_info *info)
 	return;
 
 err:
-	regmap_update_bits(arizona->regmap, ARIZONA_ACCESSORY_DETECT_MODE_1,
-			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
+	arizona_extcon_hp_clamp(info, false);
+	pm_runtime_put_autosuspend(info->dev);
 
 	/* Just report headphone */
 	ret = extcon_set_state_sync(info->edev, EXTCON_JACK_HEADPHONE, true);
@@ -781,9 +779,6 @@ static void arizona_start_hpdet_acc_id(struct arizona_extcon_info *info)
 	return;
 
 err:
-	regmap_update_bits(arizona->regmap, ARIZONA_ACCESSORY_DETECT_MODE_1,
-			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
-
 	/* Just report headphone */
 	ret = extcon_set_state_sync(info->edev, EXTCON_JACK_HEADPHONE, true);
 	if (ret != 0)
@@ -806,75 +801,58 @@ static void arizona_micd_timeout_work(struct work_struct *work)
 
 	arizona_identify_headphone(info);
 
-	arizona_stop_mic(info);
-
 	mutex_unlock(&info->lock);
 }
 
-static void arizona_micd_detect(struct work_struct *work)
+static int arizona_micd_adc_read(struct arizona_extcon_info *info)
 {
-	struct arizona_extcon_info *info = container_of(work,
-						struct arizona_extcon_info,
-						micd_detect_work.work);
 	struct arizona *arizona = info->arizona;
-	unsigned int val = 0, lvl;
-	int ret, i, key;
-
-	cancel_delayed_work_sync(&info->micd_timeout_work);
+	unsigned int val;
+	int ret;
 
-	mutex_lock(&info->lock);
+	/* Must disable MICD before we read the ADCVAL */
+	regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1,
+			   ARIZONA_MICD_ENA, 0);
 
-	/* If the cable was removed while measuring ignore the result */
-	ret = extcon_get_state(info->edev, EXTCON_MECHANICAL);
-	if (ret < 0) {
-		dev_err(arizona->dev, "Failed to check cable state: %d\n",
-				ret);
-		mutex_unlock(&info->lock);
-		return;
-	} else if (!ret) {
-		dev_dbg(arizona->dev, "Ignoring MICDET for removed cable\n");
-		mutex_unlock(&info->lock);
-		return;
+	ret = regmap_read(arizona->regmap, ARIZONA_MIC_DETECT_4, &val);
+	if (ret != 0) {
+		dev_err(arizona->dev,
+			"Failed to read MICDET_ADCVAL: %d\n", ret);
+		return ret;
 	}
 
-	if (info->detecting && arizona->pdata.micd_software_compare) {
-		/* Must disable MICD before we read the ADCVAL */
-		regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1,
-				   ARIZONA_MICD_ENA, 0);
-		ret = regmap_read(arizona->regmap, ARIZONA_MIC_DETECT_4, &val);
-		if (ret != 0) {
-			dev_err(arizona->dev,
-				"Failed to read MICDET_ADCVAL: %d\n",
-				ret);
-			mutex_unlock(&info->lock);
-			return;
-		}
+	dev_dbg(arizona->dev, "MICDET_ADCVAL: %x\n", val);
 
-		dev_dbg(arizona->dev, "MICDET_ADCVAL: %x\n", val);
+	val &= ARIZONA_MICDET_ADCVAL_MASK;
+	if (val < ARRAY_SIZE(arizona_micd_levels))
+		val = arizona_micd_levels[val];
+	else
+		val = INT_MAX;
+
+	if (val <= QUICK_HEADPHONE_MAX_OHM)
+		val = ARIZONA_MICD_STS | ARIZONA_MICD_LVL_0;
+	else if (val <= MICROPHONE_MIN_OHM)
+		val = ARIZONA_MICD_STS | ARIZONA_MICD_LVL_1;
+	else if (val <= MICROPHONE_MAX_OHM)
+		val = ARIZONA_MICD_STS | ARIZONA_MICD_LVL_8;
+	else
+		val = ARIZONA_MICD_LVL_8;
 
-		val &= ARIZONA_MICDET_ADCVAL_MASK;
-		if (val < ARRAY_SIZE(arizona_micd_levels))
-			val = arizona_micd_levels[val];
-		else
-			val = INT_MAX;
-
-		if (val <= QUICK_HEADPHONE_MAX_OHM)
-			val = ARIZONA_MICD_STS | ARIZONA_MICD_LVL_0;
-		else if (val <= MICROPHONE_MIN_OHM)
-			val = ARIZONA_MICD_STS | ARIZONA_MICD_LVL_1;
-		else if (val <= MICROPHONE_MAX_OHM)
-			val = ARIZONA_MICD_STS | ARIZONA_MICD_LVL_8;
-		else
-			val = ARIZONA_MICD_LVL_8;
-	}
+	return val;
+}
+
+static int arizona_micd_read(struct arizona_extcon_info *info)
+{
+	struct arizona *arizona = info->arizona;
+	unsigned int val = 0;
+	int ret, i;
 
 	for (i = 0; i < 10 && !(val & MICD_LVL_0_TO_8); i++) {
 		ret = regmap_read(arizona->regmap, ARIZONA_MIC_DETECT_3, &val);
 		if (ret != 0) {
 			dev_err(arizona->dev,
 				"Failed to read MICDET: %d\n", ret);
-			mutex_unlock(&info->lock);
-			return;
+			return ret;
 		}
 
 		dev_dbg(arizona->dev, "MICDET: %x\n", val);
@@ -882,29 +860,44 @@ static void arizona_micd_detect(struct work_struct *work)
 		if (!(val & ARIZONA_MICD_VALID)) {
 			dev_warn(arizona->dev,
 				 "Microphone detection state invalid\n");
-			mutex_unlock(&info->lock);
-			return;
+			return -EINVAL;
 		}
 	}
 
 	if (i == 10 && !(val & MICD_LVL_0_TO_8)) {
 		dev_err(arizona->dev, "Failed to get valid MICDET value\n");
-		mutex_unlock(&info->lock);
-		return;
+		return -EINVAL;
 	}
 
+	return val;
+}
+
+static int arizona_micdet_reading(void *priv)
+{
+	struct arizona_extcon_info *info = priv;
+	struct arizona *arizona = info->arizona;
+	int ret, val;
+
+	if (info->detecting && arizona->pdata.micd_software_compare)
+		ret = arizona_micd_adc_read(info);
+	else
+		ret = arizona_micd_read(info);
+	if (ret < 0)
+		return ret;
+
+	val = ret;
+
 	/* Due to jack detect this should never happen */
 	if (!(val & ARIZONA_MICD_STS)) {
 		dev_warn(arizona->dev, "Detected open circuit\n");
 		info->mic = false;
-		arizona_stop_mic(info);
 		info->detecting = false;
 		arizona_identify_headphone(info);
-		goto handled;
+		return 0;
 	}
 
 	/* If we got a high impedence we should have a headset, report it. */
-	if (info->detecting && (val & ARIZONA_MICD_LVL_8)) {
+	if (val & ARIZONA_MICD_LVL_8) {
 		info->mic = true;
 		info->detecting = false;
 
@@ -923,7 +916,7 @@ static void arizona_micd_detect(struct work_struct *work)
 				ret);
 		}
 
-		goto handled;
+		return 0;
 	}
 
 	/* If we detected a lower impedence during initial startup
@@ -932,15 +925,13 @@ static void arizona_micd_detect(struct work_struct *work)
 	 * plain headphones.  If both polarities report a low
 	 * impedence then give up and report headphones.
 	 */
-	if (info->detecting && (val & MICD_LVL_1_TO_7)) {
+	if (val & MICD_LVL_1_TO_7) {
 		if (info->jack_flips >= info->micd_num_modes * 10) {
 			dev_dbg(arizona->dev, "Detected HP/line\n");
 
 			info->detecting = false;
 
 			arizona_identify_headphone(info);
-
-			arizona_stop_mic(info);
 		} else {
 			info->micd_mode++;
 			if (info->micd_mode == info->micd_num_modes)
@@ -948,13 +939,45 @@ static void arizona_micd_detect(struct work_struct *work)
 			arizona_extcon_set_mode(info, info->micd_mode);
 
 			info->jack_flips++;
+
+			if (arizona->pdata.micd_software_compare)
+				regmap_update_bits(arizona->regmap,
+						   ARIZONA_MIC_DETECT_1,
+						   ARIZONA_MICD_ENA,
+						   ARIZONA_MICD_ENA);
+
+			queue_delayed_work(system_power_efficient_wq,
+					   &info->micd_timeout_work,
+					   msecs_to_jiffies(arizona->pdata.micd_timeout));
 		}
 
-		goto handled;
+		return 0;
 	}
 
 	/*
 	 * If we're still detecting and we detect a short then we've
+	 * got a headphone.
+	 */
+	dev_dbg(arizona->dev, "Headphone detected\n");
+	info->detecting = false;
+
+	arizona_identify_headphone(info);
+
+	return 0;
+}
+
+static int arizona_button_reading(void *priv)
+{
+	struct arizona_extcon_info *info = priv;
+	struct arizona *arizona = info->arizona;
+	int val, key, lvl, i;
+
+	val = arizona_micd_read(info);
+	if (val < 0)
+		return val;
+
+	/*
+	 * If we're still detecting and we detect a short then we've
 	 * got a headphone.  Otherwise it's a button press.
 	 */
 	if (val & MICD_LVL_0_TO_7) {
@@ -968,20 +991,13 @@ static void arizona_micd_detect(struct work_struct *work)
 				input_report_key(info->input,
 						 info->micd_ranges[i].key, 0);
 
-			WARN_ON(!lvl);
-			WARN_ON(ffs(lvl) - 1 >= info->num_micd_ranges);
 			if (lvl && ffs(lvl) - 1 < info->num_micd_ranges) {
 				key = info->micd_ranges[ffs(lvl) - 1].key;
 				input_report_key(info->input, key, 1);
 				input_sync(info->input);
+			} else {
+				dev_err(arizona->dev, "Button out of range\n");
 			}
-
-		} else if (info->detecting) {
-			dev_dbg(arizona->dev, "Headphone detected\n");
-			info->detecting = false;
-			arizona_stop_mic(info);
-
-			arizona_identify_headphone(info);
 		} else {
 			dev_warn(arizona->dev, "Button with no mic: %x\n",
 				 val);
@@ -995,19 +1011,39 @@ static void arizona_micd_detect(struct work_struct *work)
 		arizona_extcon_pulse_micbias(info);
 	}
 
-handled:
-	if (info->detecting) {
-		if (arizona->pdata.micd_software_compare)
-			regmap_update_bits(arizona->regmap,
-					   ARIZONA_MIC_DETECT_1,
-					   ARIZONA_MICD_ENA,
-					   ARIZONA_MICD_ENA);
+	return 0;
+}
 
-		queue_delayed_work(system_power_efficient_wq,
-				   &info->micd_timeout_work,
-				   msecs_to_jiffies(info->micd_timeout));
+static void arizona_micd_detect(struct work_struct *work)
+{
+	struct arizona_extcon_info *info = container_of(work,
+						struct arizona_extcon_info,
+						micd_detect_work.work);
+	struct arizona *arizona = info->arizona;
+	int ret;
+
+	cancel_delayed_work_sync(&info->micd_timeout_work);
+
+	mutex_lock(&info->lock);
+
+	/* If the cable was removed while measuring ignore the result */
+	ret = extcon_get_state(info->edev, EXTCON_MECHANICAL);
+	if (ret < 0) {
+		dev_err(arizona->dev, "Failed to check cable state: %d\n",
+				ret);
+		mutex_unlock(&info->lock);
+		return;
+	} else if (!ret) {
+		dev_dbg(arizona->dev, "Ignoring MICDET for removed cable\n");
+		mutex_unlock(&info->lock);
+		return;
 	}
 
+	if (info->detecting)
+		arizona_micdet_reading(info);
+	else
+		arizona_button_reading(info);
+
 	pm_runtime_mark_last_busy(info->dev);
 	mutex_unlock(&info->lock);
 }
@@ -1125,7 +1161,7 @@ static irqreturn_t arizona_jackdet(int irq, void *data)
 					   msecs_to_jiffies(HPDET_DEBOUNCE));
 
 		if (cancelled_mic) {
-			int micd_timeout = info->micd_timeout;
+			int micd_timeout = arizona->pdata.micd_timeout;
 
 			queue_delayed_work(system_power_efficient_wq,
 					   &info->micd_timeout_work,
@@ -1145,11 +1181,11 @@ static irqreturn_t arizona_jackdet(int irq, void *data)
 			dev_err(arizona->dev, "Mechanical report failed: %d\n",
 				ret);
 
-		if (!arizona->pdata.hpdet_acc_id) {
-			info->detecting = true;
-			info->mic = false;
-			info->jack_flips = 0;
+		info->detecting = true;
+		info->mic = false;
+		info->jack_flips = 0;
 
+		if (!arizona->pdata.hpdet_acc_id) {
 			arizona_start_mic(info);
 		} else {
 			queue_delayed_work(system_power_efficient_wq,
@@ -1202,11 +1238,6 @@ static irqreturn_t arizona_jackdet(int irq, void *data)
 				   ARIZONA_MICD_CLAMP_DB | ARIZONA_JD1_DB);
 	}
 
-	if (arizona->pdata.micd_timeout)
-		info->micd_timeout = arizona->pdata.micd_timeout;
-	else
-		info->micd_timeout = DEFAULT_MICD_TIMEOUT;
-
 out:
 	/* Clear trig_sts to make sure DCVDD is not forced up */
 	regmap_write(arizona->regmap, ARIZONA_AOD_WKUP_AND_TRIG,
@@ -1435,6 +1466,9 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 	info->input->name = "Headset";
 	info->input->phys = "arizona/extcon";
 
+	if (!pdata->micd_timeout)
+		pdata->micd_timeout = DEFAULT_MICD_TIMEOUT;
+
 	if (pdata->num_micd_configs) {
 		info->micd_modes = pdata->micd_configs;
 		info->micd_num_modes = pdata->num_micd_configs;
diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c
index 415afaf479e7..a7f216191493 100644
--- a/drivers/extcon/extcon-axp288.c
+++ b/drivers/extcon/extcon-axp288.c
@@ -322,6 +322,25 @@ static void axp288_put_role_sw(void *data)
 	usb_role_switch_put(info->role_sw);
 }
 
+static int axp288_extcon_find_role_sw(struct axp288_extcon_info *info)
+{
+	const struct software_node *swnode;
+	struct fwnode_handle *fwnode;
+
+	if (!x86_match_cpu(cherry_trail_cpu_ids))
+		return 0;
+
+	swnode = software_node_find_by_name(NULL, "intel-xhci-usb-sw");
+	if (!swnode)
+		return -EPROBE_DEFER;
+
+	fwnode = software_node_fwnode(swnode);
+	info->role_sw = usb_role_switch_find_by_fwnode(fwnode);
+	fwnode_handle_put(fwnode);
+
+	return info->role_sw ? 0 : -EPROBE_DEFER;
+}
+
 static int axp288_extcon_probe(struct platform_device *pdev)
 {
 	struct axp288_extcon_info *info;
@@ -343,9 +362,10 @@ static int axp288_extcon_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, info);
 
-	info->role_sw = usb_role_switch_get(dev);
-	if (IS_ERR(info->role_sw))
-		return PTR_ERR(info->role_sw);
+	ret = axp288_extcon_find_role_sw(info);
+	if (ret)
+		return ret;
+
 	if (info->role_sw) {
 		ret = devm_add_action_or_reset(dev, axp288_put_role_sw, info);
 		if (ret)
@@ -440,26 +460,14 @@ static struct platform_driver axp288_extcon_driver = {
 	},
 };
 
-static struct device_connection axp288_extcon_role_sw_conn = {
-	.endpoint[0] = "axp288_extcon",
-	.endpoint[1] = "intel_xhci_usb_sw-role-switch",
-	.id = "usb-role-switch",
-};
-
 static int __init axp288_extcon_init(void)
 {
-	if (x86_match_cpu(cherry_trail_cpu_ids))
-		device_connection_add(&axp288_extcon_role_sw_conn);
-
 	return platform_driver_register(&axp288_extcon_driver);
 }
 module_init(axp288_extcon_init);
 
 static void __exit axp288_extcon_exit(void)
 {
-	if (x86_match_cpu(cherry_trail_cpu_ids))
-		device_connection_remove(&axp288_extcon_role_sw_conn);
-
 	platform_driver_unregister(&axp288_extcon_driver);
 }
 module_exit(axp288_extcon_exit);
diff --git a/drivers/extcon/extcon-intel-cht-wc.c b/drivers/extcon/extcon-intel-cht-wc.c
index 9d32150e68db..771f6f4cf92e 100644
--- a/drivers/extcon/extcon-intel-cht-wc.c
+++ b/drivers/extcon/extcon-intel-cht-wc.c
@@ -338,6 +338,7 @@ static int cht_wc_extcon_probe(struct platform_device *pdev)
 	struct intel_soc_pmic *pmic = dev_get_drvdata(pdev->dev.parent);
 	struct cht_wc_extcon_data *ext;
 	unsigned long mask = ~(CHT_WC_PWRSRC_VBUS | CHT_WC_PWRSRC_USBID_MASK);
+	int pwrsrc_sts, id;
 	int irq, ret;
 
 	irq = platform_get_irq(pdev, 0);
@@ -387,8 +388,19 @@ static int cht_wc_extcon_probe(struct platform_device *pdev)
 		goto disable_sw_control;
 	}
 
-	/* Route D+ and D- to PMIC for initial charger detection */
-	cht_wc_extcon_set_phymux(ext, MUX_SEL_PMIC);
+	ret = regmap_read(ext->regmap, CHT_WC_PWRSRC_STS, &pwrsrc_sts);
+	if (ret) {
+		dev_err(ext->dev, "Error reading pwrsrc status: %d\n", ret);
+		goto disable_sw_control;
+	}
+
+	/*
+	 * If no USB host or device connected, route D+ and D- to PMIC for
+	 * initial charger detection
+	 */
+	id = cht_wc_extcon_get_id(ext, pwrsrc_sts);
+	if (id != INTEL_USB_ID_GND)
+		cht_wc_extcon_set_phymux(ext, MUX_SEL_PMIC);
 
 	/* Get initial state */
 	cht_wc_extcon_pwrsrc_event(ext);
diff --git a/drivers/extcon/extcon-sm5502.c b/drivers/extcon/extcon-sm5502.c
index dc43847ad2b0..106d4da647bd 100644
--- a/drivers/extcon/extcon-sm5502.c
+++ b/drivers/extcon/extcon-sm5502.c
@@ -65,6 +65,10 @@ struct sm5502_muic_info {
 /* Default value of SM5502 register to bring up MUIC device. */
 static struct reg_data sm5502_reg_data[] = {
 	{
+		.reg = SM5502_REG_RESET,
+		.val = SM5502_REG_RESET_MASK,
+		.invert = true,
+	}, {
 		.reg = SM5502_REG_CONTROL,
 		.val = SM5502_REG_CONTROL_MASK_INT_MASK,
 		.invert = false,
@@ -245,7 +249,7 @@ static int sm5502_muic_set_path(struct sm5502_muic_info *info,
 		dev_err(info->dev, "Unknown DM_CON/DP_CON switch type (%d)\n",
 				con_sw);
 		return -EINVAL;
-	};
+	}
 
 	switch (vbus_sw) {
 	case VBUSIN_SWITCH_OPEN:
@@ -264,7 +268,7 @@ static int sm5502_muic_set_path(struct sm5502_muic_info *info,
 	default:
 		dev_err(info->dev, "Unknown VBUS switch type (%d)\n", vbus_sw);
 		return -EINVAL;
-	};
+	}
 
 	return 0;
 }
@@ -272,7 +276,7 @@ static int sm5502_muic_set_path(struct sm5502_muic_info *info,
 /* Return cable type of attached or detached accessories */
 static unsigned int sm5502_muic_get_cable_type(struct sm5502_muic_info *info)
 {
-	unsigned int cable_type = -1, adc, dev_type1;
+	unsigned int cable_type, adc, dev_type1;
 	int ret;
 
 	/* Read ADC value according to external cable or button */
@@ -353,13 +357,13 @@ static unsigned int sm5502_muic_get_cable_type(struct sm5502_muic_info *info)
 				"cannot identify the cable type: adc(0x%x)\n",
 				adc);
 			return -EINVAL;
-		};
+		}
 		break;
 	default:
 		dev_err(info->dev,
 			"failed to identify the cable type: adc(0x%x)\n", adc);
 		return -EINVAL;
-	};
+	}
 
 	return cable_type;
 }
@@ -401,7 +405,7 @@ static int sm5502_muic_cable_handler(struct sm5502_muic_info *info,
 		dev_dbg(info->dev,
 			"cannot handle this cable_type (0x%x)\n", cable_type);
 		return 0;
-	};
+	}
 
 	/* Change internal hardware path(DM_CON/DP_CON, VBUSIN) */
 	ret = sm5502_muic_set_path(info, con_sw, vbus_sw, attached);
diff --git a/drivers/extcon/extcon-sm5502.h b/drivers/extcon/extcon-sm5502.h
index 9dbb634d213b..ce1f1ec310c4 100644
--- a/drivers/extcon/extcon-sm5502.h
+++ b/drivers/extcon/extcon-sm5502.h
@@ -237,6 +237,8 @@ enum sm5502_reg {
 #define DM_DP_SWITCH_UART			((DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
 						| (DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
 
+#define SM5502_REG_RESET_MASK			(0x1)
+
 /* SM5502 Interrupts */
 enum sm5502_irq {
 	/* INT1 */
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 1da7ba18d399..6e291d8f3a27 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1646,14 +1646,6 @@ static long fw_device_op_ioctl(struct file *file,
 	return dispatch_ioctl(file->private_data, cmd, (void __user *)arg);
 }
 
-#ifdef CONFIG_COMPAT
-static long fw_device_op_compat_ioctl(struct file *file,
-				      unsigned int cmd, unsigned long arg)
-{
-	return dispatch_ioctl(file->private_data, cmd, compat_ptr(arg));
-}
-#endif
-
 static int fw_device_op_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct client *client = file->private_data;
@@ -1694,7 +1686,8 @@ static int fw_device_op_mmap(struct file *file, struct vm_area_struct *vma)
 	if (ret < 0)
 		goto fail;
 
-	ret = fw_iso_buffer_map_vma(&client->buffer, vma);
+	ret = vm_map_pages_zero(vma, client->buffer.pages,
+				client->buffer.page_count);
 	if (ret < 0)
 		goto fail;
 
@@ -1795,7 +1788,5 @@ const struct file_operations fw_device_ops = {
 	.mmap		= fw_device_op_mmap,
 	.release	= fw_device_op_release,
 	.poll		= fw_device_op_poll,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= fw_device_op_compat_ioctl,
-#endif
+	.compat_ioctl	= compat_ptr_ioctl,
 };
diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c
index df8a56a979b9..185b0b78b3d6 100644
--- a/drivers/firewire/core-iso.c
+++ b/drivers/firewire/core-iso.c
@@ -91,13 +91,6 @@ int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card,
 }
 EXPORT_SYMBOL(fw_iso_buffer_init);
 
-int fw_iso_buffer_map_vma(struct fw_iso_buffer *buffer,
-			  struct vm_area_struct *vma)
-{
-	return vm_map_pages_zero(vma, buffer->pages,
-					buffer->page_count);
-}
-
 void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer,
 			   struct fw_card *card)
 {
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 0f0bed3a4bbb..4b0e4ee655a1 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -158,8 +158,6 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event);
 int fw_iso_buffer_alloc(struct fw_iso_buffer *buffer, int page_count);
 int fw_iso_buffer_map_dma(struct fw_iso_buffer *buffer, struct fw_card *card,
 			  enum dma_data_direction direction);
-int fw_iso_buffer_map_vma(struct fw_iso_buffer *buffer,
-			  struct vm_area_struct *vma);
 
 
 /* -topology */
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index b132ab9ad607..715e491dfbc3 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -250,7 +250,11 @@ static int fwnet_header_cache(const struct neighbour *neigh,
 	h = (struct fwnet_header *)((u8 *)hh->hh_data + HH_DATA_OFF(sizeof(*h)));
 	h->h_proto = type;
 	memcpy(h->h_dest, neigh->ha, net->addr_len);
-	hh->hh_len = FWNET_HLEN;
+
+	/* Pairs with the READ_ONCE() in neigh_resolve_output(),
+	 * neigh_hh_output() and neigh_update_hhs().
+	 */
+	smp_store_release(&hh->hh_len, FWNET_HLEN);
 
 	return 0;
 }
diff --git a/drivers/firewire/nosy.c b/drivers/firewire/nosy.c
index 0cc746673677..6ca2f5ab6c57 100644
--- a/drivers/firewire/nosy.c
+++ b/drivers/firewire/nosy.c
@@ -551,7 +551,7 @@ add_card(struct pci_dev *dev, const struct pci_device_id *unused)
 	INIT_LIST_HEAD(&lynx->client_list);
 	kref_init(&lynx->kref);
 
-	lynx->registers = ioremap_nocache(pci_resource_start(dev, 0),
+	lynx->registers = ioremap(pci_resource_start(dev, 0),
 					  PCILYNX_MAX_REGISTER);
 	if (lynx->registers == NULL) {
 		dev_err(&dev->dev, "Failed to map registers\n");
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 522f3addb5bd..33269316f111 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -1752,7 +1752,7 @@ static u32 update_bus_time(struct fw_ohci *ohci)
 
 	if (unlikely(!ohci->bus_time_running)) {
 		reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_cycle64Seconds);
-		ohci->bus_time = (lower_32_bits(get_seconds()) & ~0x7f) |
+		ohci->bus_time = (lower_32_bits(ktime_get_seconds()) & ~0x7f) |
 		                 (cycle_time_seconds & 0x40);
 		ohci->bus_time_running = true;
 	}
diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c
index 92f843eaf1e0..7a30952b463d 100644
--- a/drivers/firmware/arm_scmi/bus.c
+++ b/drivers/firmware/arm_scmi/bus.c
@@ -135,8 +135,10 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol)
 		return NULL;
 
 	id = ida_simple_get(&scmi_bus_id, 1, 0, GFP_KERNEL);
-	if (id < 0)
-		goto free_mem;
+	if (id < 0) {
+		kfree(scmi_dev);
+		return NULL;
+	}
 
 	scmi_dev->id = id;
 	scmi_dev->protocol_id = protocol;
@@ -154,8 +156,6 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol)
 put_dev:
 	put_device(&scmi_dev->dev);
 	ida_simple_remove(&scmi_bus_id, id);
-free_mem:
-	kfree(scmi_dev);
 	return NULL;
 }
 
diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c
index 4a8012e3cb8c..601af4edad5e 100644
--- a/drivers/firmware/arm_scmi/perf.c
+++ b/drivers/firmware/arm_scmi/perf.c
@@ -323,7 +323,7 @@ static void scmi_perf_fc_ring_db(struct scmi_fc_db_info *db)
 
 		if (db->mask)
 			val = ioread64_hi_lo(db->addr) & db->mask;
-		iowrite64_hi_lo(db->set, db->addr);
+		iowrite64_hi_lo(db->set | val, db->addr);
 	}
 #endif
 }
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 9cd70d1a5622..a479023fa036 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -967,29 +967,29 @@ static int sdei_get_conduit(struct platform_device *pdev)
 	if (np) {
 		if (of_property_read_string(np, "method", &method)) {
 			pr_warn("missing \"method\" property\n");
-			return CONDUIT_INVALID;
+			return SMCCC_CONDUIT_NONE;
 		}
 
 		if (!strcmp("hvc", method)) {
 			sdei_firmware_call = &sdei_smccc_hvc;
-			return CONDUIT_HVC;
+			return SMCCC_CONDUIT_HVC;
 		} else if (!strcmp("smc", method)) {
 			sdei_firmware_call = &sdei_smccc_smc;
-			return CONDUIT_SMC;
+			return SMCCC_CONDUIT_SMC;
 		}
 
 		pr_warn("invalid \"method\" property: %s\n", method);
 	} else if (IS_ENABLED(CONFIG_ACPI) && !acpi_disabled) {
 		if (acpi_psci_use_hvc()) {
 			sdei_firmware_call = &sdei_smccc_hvc;
-			return CONDUIT_HVC;
+			return SMCCC_CONDUIT_HVC;
 		} else {
 			sdei_firmware_call = &sdei_smccc_smc;
-			return CONDUIT_SMC;
+			return SMCCC_CONDUIT_SMC;
 		}
 	}
 
-	return CONDUIT_INVALID;
+	return SMCCC_CONDUIT_NONE;
 }
 
 static int sdei_probe(struct platform_device *pdev)
diff --git a/drivers/firmware/broadcom/Kconfig b/drivers/firmware/broadcom/Kconfig
index d03ed8e43ad7..8e3d355a637a 100644
--- a/drivers/firmware/broadcom/Kconfig
+++ b/drivers/firmware/broadcom/Kconfig
@@ -22,3 +22,11 @@ config BCM47XX_SPROM
 	  In case of SoC devices SPROM content is stored on a flash used by
 	  bootloader firmware CFE. This driver provides method to ssb and bcma
 	  drivers to read SPROM on SoC.
+
+config TEE_BNXT_FW
+	tristate "Broadcom BNXT firmware manager"
+	depends on (ARCH_BCM_IPROC && OPTEE) || (COMPILE_TEST && TEE)
+	default ARCH_BCM_IPROC
+	help
+	  This module help to manage firmware on Broadcom BNXT device. The module
+	  registers on tee bus and invoke calls to manage firmware on BNXT device.
diff --git a/drivers/firmware/broadcom/Makefile b/drivers/firmware/broadcom/Makefile
index 72c7fdc20c77..17c5061c47a7 100644
--- a/drivers/firmware/broadcom/Makefile
+++ b/drivers/firmware/broadcom/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_BCM47XX_NVRAM)		+= bcm47xx_nvram.o
 obj-$(CONFIG_BCM47XX_SPROM)		+= bcm47xx_sprom.o
+obj-$(CONFIG_TEE_BNXT_FW)		+= tee_bnxt_fw.o
diff --git a/drivers/firmware/broadcom/bcm47xx_nvram.c b/drivers/firmware/broadcom/bcm47xx_nvram.c
index da04fdae62a1..835ece9c00f1 100644
--- a/drivers/firmware/broadcom/bcm47xx_nvram.c
+++ b/drivers/firmware/broadcom/bcm47xx_nvram.c
@@ -120,7 +120,7 @@ int bcm47xx_nvram_init_from_mem(u32 base, u32 lim)
 	void __iomem *iobase;
 	int err;
 
-	iobase = ioremap_nocache(base, lim);
+	iobase = ioremap(base, lim);
 	if (!iobase)
 		return -ENOMEM;
 
diff --git a/drivers/firmware/broadcom/tee_bnxt_fw.c b/drivers/firmware/broadcom/tee_bnxt_fw.c
new file mode 100644
index 000000000000..ed10da5313e8
--- /dev/null
+++ b/drivers/firmware/broadcom/tee_bnxt_fw.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Broadcom.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include <linux/uuid.h>
+
+#include <linux/firmware/broadcom/tee_bnxt_fw.h>
+
+#define MAX_SHM_MEM_SZ	SZ_4M
+
+#define MAX_TEE_PARAM_ARRY_MEMB		4
+
+enum ta_cmd {
+	/*
+	 * TA_CMD_BNXT_FASTBOOT - boot bnxt device by copying f/w into sram
+	 *
+	 *	param[0] unused
+	 *	param[1] unused
+	 *	param[2] unused
+	 *	param[3] unused
+	 *
+	 * Result:
+	 *	TEE_SUCCESS - Invoke command success
+	 *	TEE_ERROR_ITEM_NOT_FOUND - Corrupt f/w image found on memory
+	 */
+	TA_CMD_BNXT_FASTBOOT = 0,
+
+	/*
+	 * TA_CMD_BNXT_COPY_COREDUMP - copy the core dump into shm
+	 *
+	 *	param[0] (inout memref) - Coredump buffer memory reference
+	 *	param[1] (in value) - value.a: offset, data to be copied from
+	 *			      value.b: size of data to be copied
+	 *	param[2] unused
+	 *	param[3] unused
+	 *
+	 * Result:
+	 *	TEE_SUCCESS - Invoke command success
+	 *	TEE_ERROR_BAD_PARAMETERS - Incorrect input param
+	 *	TEE_ERROR_ITEM_NOT_FOUND - Corrupt core dump
+	 */
+	TA_CMD_BNXT_COPY_COREDUMP = 3,
+};
+
+/**
+ * struct tee_bnxt_fw_private - OP-TEE bnxt private data
+ * @dev:		OP-TEE based bnxt device.
+ * @ctx:		OP-TEE context handler.
+ * @session_id:		TA session identifier.
+ */
+struct tee_bnxt_fw_private {
+	struct device *dev;
+	struct tee_context *ctx;
+	u32 session_id;
+	struct tee_shm *fw_shm_pool;
+};
+
+static struct tee_bnxt_fw_private pvt_data;
+
+static void prepare_args(int cmd,
+			 struct tee_ioctl_invoke_arg *arg,
+			 struct tee_param *param)
+{
+	memset(arg, 0, sizeof(*arg));
+	memset(param, 0, MAX_TEE_PARAM_ARRY_MEMB * sizeof(*param));
+
+	arg->func = cmd;
+	arg->session = pvt_data.session_id;
+	arg->num_params = MAX_TEE_PARAM_ARRY_MEMB;
+
+	/* Fill invoke cmd params */
+	switch (cmd) {
+	case TA_CMD_BNXT_COPY_COREDUMP:
+		param[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT;
+		param[0].u.memref.shm = pvt_data.fw_shm_pool;
+		param[0].u.memref.size = MAX_SHM_MEM_SZ;
+		param[0].u.memref.shm_offs = 0;
+		param[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+		break;
+	case TA_CMD_BNXT_FASTBOOT:
+	default:
+		/* Nothing to do */
+		break;
+	}
+}
+
+/**
+ * tee_bnxt_fw_load() - Load the bnxt firmware
+ *		    Uses an OP-TEE call to start a secure
+ *		    boot process.
+ * Returns 0 on success, negative errno otherwise.
+ */
+int tee_bnxt_fw_load(void)
+{
+	int ret = 0;
+	struct tee_ioctl_invoke_arg arg;
+	struct tee_param param[MAX_TEE_PARAM_ARRY_MEMB];
+
+	if (!pvt_data.ctx)
+		return -ENODEV;
+
+	prepare_args(TA_CMD_BNXT_FASTBOOT, &arg, param);
+
+	ret = tee_client_invoke_func(pvt_data.ctx, &arg, param);
+	if (ret < 0 || arg.ret != 0) {
+		dev_err(pvt_data.dev,
+			"TA_CMD_BNXT_FASTBOOT invoke failed TEE err: %x, ret:%x\n",
+			arg.ret, ret);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(tee_bnxt_fw_load);
+
+/**
+ * tee_bnxt_copy_coredump() - Copy coredump from the allocated memory
+ *			    Uses an OP-TEE call to copy coredump
+ * @buf:	destination buffer where core dump is copied into
+ * @offset:	offset from the base address of core dump area
+ * @size:	size of the dump
+ *
+ * Returns 0 on success, negative errno otherwise.
+ */
+int tee_bnxt_copy_coredump(void *buf, u32 offset, u32 size)
+{
+	struct tee_ioctl_invoke_arg arg;
+	struct tee_param param[MAX_TEE_PARAM_ARRY_MEMB];
+	void *core_data;
+	u32 rbytes = size;
+	u32 nbytes = 0;
+	int ret = 0;
+
+	if (!pvt_data.ctx)
+		return -ENODEV;
+
+	prepare_args(TA_CMD_BNXT_COPY_COREDUMP, &arg, param);
+
+	while (rbytes)  {
+		nbytes = rbytes;
+
+		nbytes = min_t(u32, rbytes, param[0].u.memref.size);
+
+		/* Fill additional invoke cmd params */
+		param[1].u.value.a = offset;
+		param[1].u.value.b = nbytes;
+
+		ret = tee_client_invoke_func(pvt_data.ctx, &arg, param);
+		if (ret < 0 || arg.ret != 0) {
+			dev_err(pvt_data.dev,
+				"TA_CMD_BNXT_COPY_COREDUMP invoke failed TEE err: %x, ret:%x\n",
+				arg.ret, ret);
+			return -EINVAL;
+		}
+
+		core_data = tee_shm_get_va(pvt_data.fw_shm_pool, 0);
+		if (IS_ERR(core_data)) {
+			dev_err(pvt_data.dev, "tee_shm_get_va failed\n");
+			return PTR_ERR(core_data);
+		}
+
+		memcpy(buf, core_data, nbytes);
+
+		rbytes -= nbytes;
+		buf += nbytes;
+		offset += nbytes;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(tee_bnxt_copy_coredump);
+
+static int optee_ctx_match(struct tee_ioctl_version_data *ver, const void *data)
+{
+	return (ver->impl_id == TEE_IMPL_ID_OPTEE);
+}
+
+static int tee_bnxt_fw_probe(struct device *dev)
+{
+	struct tee_client_device *bnxt_device = to_tee_client_device(dev);
+	int ret, err = -ENODEV;
+	struct tee_ioctl_open_session_arg sess_arg;
+	struct tee_shm *fw_shm_pool;
+
+	memset(&sess_arg, 0, sizeof(sess_arg));
+
+	/* Open context with TEE driver */
+	pvt_data.ctx = tee_client_open_context(NULL, optee_ctx_match, NULL,
+					       NULL);
+	if (IS_ERR(pvt_data.ctx))
+		return -ENODEV;
+
+	/* Open session with Bnxt load Trusted App */
+	memcpy(sess_arg.uuid, bnxt_device->id.uuid.b, TEE_IOCTL_UUID_LEN);
+	sess_arg.clnt_login = TEE_IOCTL_LOGIN_PUBLIC;
+	sess_arg.num_params = 0;
+
+	ret = tee_client_open_session(pvt_data.ctx, &sess_arg, NULL);
+	if (ret < 0 || sess_arg.ret != 0) {
+		dev_err(dev, "tee_client_open_session failed, err: %x\n",
+			sess_arg.ret);
+		err = -EINVAL;
+		goto out_ctx;
+	}
+	pvt_data.session_id = sess_arg.session;
+
+	pvt_data.dev = dev;
+
+	fw_shm_pool = tee_shm_alloc(pvt_data.ctx, MAX_SHM_MEM_SZ,
+				    TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (IS_ERR(fw_shm_pool)) {
+		dev_err(pvt_data.dev, "tee_shm_alloc failed\n");
+		err = PTR_ERR(fw_shm_pool);
+		goto out_sess;
+	}
+
+	pvt_data.fw_shm_pool = fw_shm_pool;
+
+	return 0;
+
+out_sess:
+	tee_client_close_session(pvt_data.ctx, pvt_data.session_id);
+out_ctx:
+	tee_client_close_context(pvt_data.ctx);
+
+	return err;
+}
+
+static int tee_bnxt_fw_remove(struct device *dev)
+{
+	tee_shm_free(pvt_data.fw_shm_pool);
+	tee_client_close_session(pvt_data.ctx, pvt_data.session_id);
+	tee_client_close_context(pvt_data.ctx);
+	pvt_data.ctx = NULL;
+
+	return 0;
+}
+
+static const struct tee_client_device_id tee_bnxt_fw_id_table[] = {
+	{UUID_INIT(0x6272636D, 0x2019, 0x0716,
+		    0x42, 0x43, 0x4D, 0x5F, 0x53, 0x43, 0x48, 0x49)},
+	{}
+};
+
+MODULE_DEVICE_TABLE(tee, tee_bnxt_fw_id_table);
+
+static struct tee_client_driver tee_bnxt_fw_driver = {
+	.id_table	= tee_bnxt_fw_id_table,
+	.driver		= {
+		.name		= KBUILD_MODNAME,
+		.bus		= &tee_bus_type,
+		.probe		= tee_bnxt_fw_probe,
+		.remove		= tee_bnxt_fw_remove,
+	},
+};
+
+static int __init tee_bnxt_fw_mod_init(void)
+{
+	return driver_register(&tee_bnxt_fw_driver.driver);
+}
+
+static void __exit tee_bnxt_fw_mod_exit(void)
+{
+	driver_unregister(&tee_bnxt_fw_driver.driver);
+}
+
+module_init(tee_bnxt_fw_mod_init);
+module_exit(tee_bnxt_fw_mod_exit);
+
+MODULE_AUTHOR("Vikas Gupta <vikas.gupta@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom bnxt firmware manager");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 1e21fc3e9851..2045566d622f 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -35,6 +35,7 @@ static struct dmi_memdev_info {
 	const char *bank;
 	u64 size;		/* bytes */
 	u16 handle;
+	u8 type;		/* DDR2, DDR3, DDR4 etc */
 } *dmi_memdev;
 static int dmi_memdev_nr;
 
@@ -391,7 +392,7 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v)
 	u64 bytes;
 	u16 size;
 
-	if (dm->type != DMI_ENTRY_MEM_DEVICE || dm->length < 0x12)
+	if (dm->type != DMI_ENTRY_MEM_DEVICE || dm->length < 0x13)
 		return;
 	if (nr >= dmi_memdev_nr) {
 		pr_warn(FW_BUG "Too many DIMM entries in SMBIOS table\n");
@@ -400,6 +401,7 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v)
 	dmi_memdev[nr].handle = get_unaligned(&dm->handle);
 	dmi_memdev[nr].device = dmi_string(dm, d[0x10]);
 	dmi_memdev[nr].bank = dmi_string(dm, d[0x11]);
+	dmi_memdev[nr].type = d[0x12];
 
 	size = get_unaligned((u16 *)&d[0xC]);
 	if (size == 0)
@@ -1128,3 +1130,40 @@ u64 dmi_memdev_size(u16 handle)
 	return ~0ull;
 }
 EXPORT_SYMBOL_GPL(dmi_memdev_size);
+
+/**
+ * dmi_memdev_type - get the memory type
+ * @handle: DMI structure handle
+ *
+ * Return the DMI memory type of the module in the slot associated with the
+ * given DMI handle, or 0x0 if no such DMI handle exists.
+ */
+u8 dmi_memdev_type(u16 handle)
+{
+	int n;
+
+	if (dmi_memdev) {
+		for (n = 0; n < dmi_memdev_nr; n++) {
+			if (handle == dmi_memdev[n].handle)
+				return dmi_memdev[n].type;
+		}
+	}
+	return 0x0;	/* Not a valid value */
+}
+EXPORT_SYMBOL_GPL(dmi_memdev_type);
+
+/**
+ *	dmi_memdev_handle - get the DMI handle of a memory slot
+ *	@slot: slot number
+ *
+ *	Return the DMI handle associated with a given memory slot, or %0xFFFF
+ *      if there is no such slot.
+ */
+u16 dmi_memdev_handle(int slot)
+{
+	if (dmi_memdev && slot >= 0 && slot < dmi_memdev_nr)
+		return dmi_memdev[slot].handle;
+
+	return 0xffff;	/* Not a valid value */
+}
+EXPORT_SYMBOL_GPL(dmi_memdev_handle);
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index b248870a9806..ecc83e2f032c 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -75,6 +75,27 @@ config EFI_MAX_FAKE_MEM
 	  Ranges can be set up to this value using comma-separated list.
 	  The default value is 8.
 
+config EFI_SOFT_RESERVE
+	bool "Reserve EFI Specific Purpose Memory"
+	depends on EFI && EFI_STUB && ACPI_HMAT
+	default ACPI_HMAT
+	help
+	  On systems that have mixed performance classes of memory EFI
+	  may indicate specific purpose memory with an attribute (See
+	  EFI_MEMORY_SP in UEFI 2.8). A memory range tagged with this
+	  attribute may have unique performance characteristics compared
+	  to the system's general purpose "System RAM" pool. On the
+	  expectation that such memory has application specific usage,
+	  and its base EFI memory type is "conventional" answer Y to
+	  arrange for the kernel to reserve it as a "Soft Reserved"
+	  resource, and set aside for direct-access (device-dax) by
+	  default. The memory range can later be optionally assigned to
+	  the page allocator by system administrator policy via the
+	  device-dax kmem facility. Say N to have the kernel treat this
+	  memory as "System RAM" by default.
+
+	  If unsure, say Y.
+
 config EFI_PARAMS_FROM_FDT
 	bool
 	help
@@ -194,6 +215,28 @@ config EFI_RCI2_TABLE
 
 	  Say Y here for Dell EMC PowerEdge systems.
 
+config EFI_DISABLE_PCI_DMA
+       bool "Clear Busmaster bit on PCI bridges during ExitBootServices()"
+       help
+	  Disable the busmaster bit in the control register on all PCI bridges
+	  while calling ExitBootServices() and passing control to the runtime
+	  kernel. System firmware may configure the IOMMU to prevent malicious
+	  PCI devices from being able to attack the OS via DMA. However, since
+	  firmware can't guarantee that the OS is IOMMU-aware, it will tear
+	  down IOMMU configuration when ExitBootServices() is called. This
+	  leaves a window between where a hostile device could still cause
+	  damage before Linux configures the IOMMU again.
+
+	  If you say Y here, the EFI stub will clear the busmaster bit on all
+	  PCI bridges before ExitBootServices() is called. This will prevent
+	  any malicious PCI devices from being able to perform DMA until the
+	  kernel reenables busmastering after configuring the IOMMU.
+
+	  This option will cause failures with some poorly behaved hardware
+	  and should not be enabled without testing. The kernel commandline
+	  options "efi=disable_early_pci_dma" or "efi=no_disable_early_pci_dma"
+	  may be used to override this option.
+
 endmenu
 
 config UEFI_CPER
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 4ac2de4dfa72..554d795270d9 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -20,13 +20,16 @@ obj-$(CONFIG_UEFI_CPER)			+= cper.o
 obj-$(CONFIG_EFI_RUNTIME_MAP)		+= runtime-map.o
 obj-$(CONFIG_EFI_RUNTIME_WRAPPERS)	+= runtime-wrappers.o
 obj-$(CONFIG_EFI_STUB)			+= libstub/
-obj-$(CONFIG_EFI_FAKE_MEMMAP)		+= fake_mem.o
+obj-$(CONFIG_EFI_FAKE_MEMMAP)		+= fake_map.o
 obj-$(CONFIG_EFI_BOOTLOADER_CONTROL)	+= efibc.o
 obj-$(CONFIG_EFI_TEST)			+= test/
 obj-$(CONFIG_EFI_DEV_PATH_PARSER)	+= dev-path-parser.o
 obj-$(CONFIG_APPLE_PROPERTIES)		+= apple-properties.o
 obj-$(CONFIG_EFI_RCI2_TABLE)		+= rci2-table.o
 
+fake_map-y				+= fake_mem.o
+fake_map-$(CONFIG_X86)			+= x86_fake_mem.o
+
 arm-obj-$(CONFIG_EFI)			:= arm-init.o arm-runtime.o
 obj-$(CONFIG_ARM)			+= $(arm-obj-y)
 obj-$(CONFIG_ARM64)			+= $(arm-obj-y)
diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
index 0e206c9e0d7a..5ccf39986a14 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -53,7 +53,8 @@ static void __init unmarshal_key_value_pairs(struct dev_header *dev_header,
 
 	for (i = 0; i < dev_header->prop_count; i++) {
 		int remaining = dev_header->len - (ptr - (void *)dev_header);
-		u32 key_len, val_len;
+		u32 key_len, val_len, entry_len;
+		const u8 *entry_data;
 		char *key;
 
 		if (sizeof(key_len) > remaining)
@@ -85,17 +86,14 @@ static void __init unmarshal_key_value_pairs(struct dev_header *dev_header,
 		ucs2_as_utf8(key, ptr + sizeof(key_len),
 			     key_len - sizeof(key_len));
 
-		entry[i].name = key;
-		entry[i].length = val_len - sizeof(val_len);
-		entry[i].is_array = !!entry[i].length;
-		entry[i].type = DEV_PROP_U8;
-		entry[i].pointer.u8_data = ptr + key_len + sizeof(val_len);
-
+		entry_data = ptr + key_len + sizeof(val_len);
+		entry_len = val_len - sizeof(val_len);
+		entry[i] = PROPERTY_ENTRY_U8_ARRAY_LEN(key, entry_data,
+						       entry_len);
 		if (dump_properties) {
-			dev_info(dev, "property: %s\n", entry[i].name);
+			dev_info(dev, "property: %s\n", key);
 			print_hex_dump(KERN_INFO, pr_fmt(), DUMP_PREFIX_OFFSET,
-				16, 1, entry[i].pointer.u8_data,
-				entry[i].length, true);
+				16, 1, entry_data, entry_len, true);
 		}
 
 		ptr += key_len + val_len;
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index 311cd349a862..d99f5b0c8a09 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -10,10 +10,12 @@
 #define pr_fmt(fmt)	"efi: " fmt
 
 #include <linux/efi.h>
+#include <linux/fwnode.h>
 #include <linux/init.h>
 #include <linux/memblock.h>
 #include <linux/mm_types.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_fdt.h>
 #include <linux/platform_device.h>
 #include <linux/screen_info.h>
@@ -164,6 +166,15 @@ static __init int is_usable_memory(efi_memory_desc_t *md)
 	case EFI_CONVENTIONAL_MEMORY:
 	case EFI_PERSISTENT_MEMORY:
 		/*
+		 * Special purpose memory is 'soft reserved', which means it
+		 * is set aside initially, but can be hotplugged back in or
+		 * be assigned to the dax driver after boot.
+		 */
+		if (efi_soft_reserve_enabled() &&
+		    (md->attribute & EFI_MEMORY_SP))
+			return false;
+
+		/*
 		 * According to the spec, these regions are no longer reserved
 		 * after calling ExitBootServices(). However, we can only use
 		 * them as System RAM if they can be mapped writeback cacheable.
@@ -267,15 +278,112 @@ void __init efi_init(void)
 		efi_memmap_unmap();
 }
 
+static bool efifb_overlaps_pci_range(const struct of_pci_range *range)
+{
+	u64 fb_base = screen_info.lfb_base;
+
+	if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
+		fb_base |= (u64)(unsigned long)screen_info.ext_lfb_base << 32;
+
+	return fb_base >= range->cpu_addr &&
+	       fb_base < (range->cpu_addr + range->size);
+}
+
+static struct device_node *find_pci_overlap_node(void)
+{
+	struct device_node *np;
+
+	for_each_node_by_type(np, "pci") {
+		struct of_pci_range_parser parser;
+		struct of_pci_range range;
+		int err;
+
+		err = of_pci_range_parser_init(&parser, np);
+		if (err) {
+			pr_warn("of_pci_range_parser_init() failed: %d\n", err);
+			continue;
+		}
+
+		for_each_of_pci_range(&parser, &range)
+			if (efifb_overlaps_pci_range(&range))
+				return np;
+	}
+	return NULL;
+}
+
+/*
+ * If the efifb framebuffer is backed by a PCI graphics controller, we have
+ * to ensure that this relation is expressed using a device link when
+ * running in DT mode, or the probe order may be reversed, resulting in a
+ * resource reservation conflict on the memory window that the efifb
+ * framebuffer steals from the PCIe host bridge.
+ */
+static int efifb_add_links(const struct fwnode_handle *fwnode,
+			   struct device *dev)
+{
+	struct device_node *sup_np;
+	struct device *sup_dev;
+
+	sup_np = find_pci_overlap_node();
+
+	/*
+	 * If there's no PCI graphics controller backing the efifb, we are
+	 * done here.
+	 */
+	if (!sup_np)
+		return 0;
+
+	sup_dev = get_dev_from_fwnode(&sup_np->fwnode);
+	of_node_put(sup_np);
+
+	/*
+	 * Return -ENODEV if the PCI graphics controller device hasn't been
+	 * registered yet.  This ensures that efifb isn't allowed to probe
+	 * and this function is retried again when new devices are
+	 * registered.
+	 */
+	if (!sup_dev)
+		return -ENODEV;
+
+	/*
+	 * If this fails, retrying this function at a later point won't
+	 * change anything. So, don't return an error after this.
+	 */
+	if (!device_link_add(dev, sup_dev, 0))
+		dev_warn(dev, "device_link_add() failed\n");
+
+	put_device(sup_dev);
+
+	return 0;
+}
+
+static const struct fwnode_operations efifb_fwnode_ops = {
+	.add_links = efifb_add_links,
+};
+
+static struct fwnode_handle efifb_fwnode = {
+	.ops = &efifb_fwnode_ops,
+};
+
 static int __init register_gop_device(void)
 {
-	void *pd;
+	struct platform_device *pd;
+	int err;
 
 	if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
 		return 0;
 
-	pd = platform_device_register_data(NULL, "efi-framebuffer", 0,
-					   &screen_info, sizeof(screen_info));
-	return PTR_ERR_OR_ZERO(pd);
+	pd = platform_device_alloc("efi-framebuffer", 0);
+	if (!pd)
+		return -ENOMEM;
+
+	if (IS_ENABLED(CONFIG_PCI))
+		pd->dev.fwnode = &efifb_fwnode;
+
+	err = platform_device_add_data(pd, &screen_info, sizeof(screen_info));
+	if (err)
+		return err;
+
+	return platform_device_add(pd);
 }
 subsys_initcall(register_gop_device);
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index e2ac5fa5531b..899b803842bb 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -121,6 +121,30 @@ static int __init arm_enable_runtime_services(void)
 		return 0;
 	}
 
+	if (efi_soft_reserve_enabled()) {
+		efi_memory_desc_t *md;
+
+		for_each_efi_memory_desc(md) {
+			int md_size = md->num_pages << EFI_PAGE_SHIFT;
+			struct resource *res;
+
+			if (!(md->attribute & EFI_MEMORY_SP))
+				continue;
+
+			res = kzalloc(sizeof(*res), GFP_KERNEL);
+			if (WARN_ON(!res))
+				break;
+
+			res->start	= md->phys_addr;
+			res->end	= md->phys_addr + md_size - 1;
+			res->name	= "Soft Reserved";
+			res->flags	= IORESOURCE_MEM;
+			res->desc	= IORES_DESC_SOFT_RESERVED;
+
+			insert_resource(&iomem_resource, res);
+		}
+	}
+
 	if (efi_runtime_disabled()) {
 		pr_info("EFI runtime services will be disabled.\n");
 		return 0;
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index b1395133389e..d3067cbd5114 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/miscdevice.h>
 #include <linux/highmem.h>
+#include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/efi.h>
diff --git a/drivers/firmware/efi/earlycon.c b/drivers/firmware/efi/earlycon.c
index c9a0efca17b0..5d4f84781aa0 100644
--- a/drivers/firmware/efi/earlycon.c
+++ b/drivers/firmware/efi/earlycon.c
@@ -13,18 +13,58 @@
 
 #include <asm/early_ioremap.h>
 
+static const struct console *earlycon_console __initdata;
 static const struct font_desc *font;
 static u32 efi_x, efi_y;
 static u64 fb_base;
-static pgprot_t fb_prot;
+static bool fb_wb;
+static void *efi_fb;
+
+/*
+ * EFI earlycon needs to use early_memremap() to map the framebuffer.
+ * But early_memremap() is not usable for 'earlycon=efifb keep_bootcon',
+ * memremap() should be used instead. memremap() will be available after
+ * paging_init() which is earlier than initcall callbacks. Thus adding this
+ * early initcall function early_efi_map_fb() to map the whole EFI framebuffer.
+ */
+static int __init efi_earlycon_remap_fb(void)
+{
+	/* bail if there is no bootconsole or it has been disabled already */
+	if (!earlycon_console || !(earlycon_console->flags & CON_ENABLED))
+		return 0;
+
+	efi_fb = memremap(fb_base, screen_info.lfb_size,
+			  fb_wb ? MEMREMAP_WB : MEMREMAP_WC);
+
+	return efi_fb ? 0 : -ENOMEM;
+}
+early_initcall(efi_earlycon_remap_fb);
+
+static int __init efi_earlycon_unmap_fb(void)
+{
+	/* unmap the bootconsole fb unless keep_bootcon has left it enabled */
+	if (efi_fb && !(earlycon_console->flags & CON_ENABLED))
+		memunmap(efi_fb);
+	return 0;
+}
+late_initcall(efi_earlycon_unmap_fb);
 
 static __ref void *efi_earlycon_map(unsigned long start, unsigned long len)
 {
+	pgprot_t fb_prot;
+
+	if (efi_fb)
+		return efi_fb + start;
+
+	fb_prot = fb_wb ? PAGE_KERNEL : pgprot_writecombine(PAGE_KERNEL);
 	return early_memremap_prot(fb_base + start, len, pgprot_val(fb_prot));
 }
 
 static __ref void efi_earlycon_unmap(void *addr, unsigned long len)
 {
+	if (efi_fb)
+		return;
+
 	early_memunmap(addr, len);
 }
 
@@ -176,10 +216,7 @@ static int __init efi_earlycon_setup(struct earlycon_device *device,
 	if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
 		fb_base |= (u64)screen_info.ext_lfb_base << 32;
 
-	if (opt && !strcmp(opt, "ram"))
-		fb_prot = PAGE_KERNEL;
-	else
-		fb_prot = pgprot_writecombine(PAGE_KERNEL);
+	fb_wb = opt && !strcmp(opt, "ram");
 
 	si = &screen_info;
 	xres = si->lfb_width;
@@ -201,6 +238,7 @@ static int __init efi_earlycon_setup(struct earlycon_device *device,
 		efi_earlycon_scroll_up();
 
 	device->con->write = efi_earlycon_write;
+	earlycon_console = device->con;
 	return 0;
 }
 EARLYCON_DECLARE(efifb, efi_earlycon_setup);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index e98bbf8e56d9..621220ab3d0e 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -81,6 +81,11 @@ bool efi_runtime_disabled(void)
 	return disable_runtime;
 }
 
+bool __pure __efi_soft_reserve_enabled(void)
+{
+	return !efi_enabled(EFI_MEM_NO_SOFT_RESERVE);
+}
+
 static int __init parse_efi_cmdline(char *str)
 {
 	if (!str) {
@@ -94,6 +99,9 @@ static int __init parse_efi_cmdline(char *str)
 	if (parse_option_str(str, "noruntime"))
 		disable_runtime = true;
 
+	if (parse_option_str(str, "nosoftreserve"))
+		set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags);
+
 	return 0;
 }
 early_param("efi", parse_efi_cmdline);
@@ -296,7 +304,7 @@ static __init int efivar_ssdt_load(void)
 			goto free_data;
 		}
 
-		ret = acpi_load_table(data);
+		ret = acpi_load_table(data, NULL);
 		if (ret) {
 			pr_err("failed to load table: %d\n", ret);
 			goto free_data;
@@ -673,7 +681,7 @@ device_initcall(efi_load_efivars);
 		{ name },				   \
 		{ prop },				   \
 		offsetof(struct efi_fdt_params, field),    \
-		FIELD_SIZEOF(struct efi_fdt_params, field) \
+		sizeof_field(struct efi_fdt_params, field) \
 	}
 
 struct params {
@@ -842,15 +850,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t size,
 	if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
 		     EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
 		     EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
-		     EFI_MEMORY_NV |
+		     EFI_MEMORY_NV | EFI_MEMORY_SP |
 		     EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE))
 		snprintf(pos, size, "|attr=0x%016llx]",
 			 (unsigned long long)attr);
 	else
 		snprintf(pos, size,
-			 "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+			 "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
 			 attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
 			 attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "",
+			 attr & EFI_MEMORY_SP      ? "SP"  : "",
 			 attr & EFI_MEMORY_NV      ? "NV"  : "",
 			 attr & EFI_MEMORY_XP      ? "XP"  : "",
 			 attr & EFI_MEMORY_RP      ? "RP"  : "",
@@ -899,7 +908,7 @@ u64 efi_mem_attributes(unsigned long phys_addr)
  *
  * Search in the EFI memory map for the region covering @phys_addr.
  * Returns the EFI memory type if the region was found in the memory
- * map, EFI_RESERVED_TYPE (zero) otherwise.
+ * map, -EINVAL otherwise.
  */
 int efi_mem_type(unsigned long phys_addr)
 {
@@ -970,6 +979,24 @@ static int __init efi_memreserve_map_root(void)
 	return 0;
 }
 
+static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size)
+{
+	struct resource *res, *parent;
+
+	res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
+	if (!res)
+		return -ENOMEM;
+
+	res->name	= "reserved";
+	res->flags	= IORESOURCE_MEM;
+	res->start	= addr;
+	res->end	= addr + size - 1;
+
+	/* we expect a conflict with a 'System RAM' region */
+	parent = request_resource_conflict(&iomem_resource, res);
+	return parent ? request_resource(parent, res) : 0;
+}
+
 int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
 {
 	struct linux_efi_memreserve *rsv;
@@ -994,7 +1021,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
 			rsv->entry[index].size = size;
 
 			memunmap(rsv);
-			return 0;
+			return efi_mem_reserve_iomem(addr, size);
 		}
 		memunmap(rsv);
 	}
@@ -1004,6 +1031,12 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
 	if (!rsv)
 		return -ENOMEM;
 
+	rc = efi_mem_reserve_iomem(__pa(rsv), SZ_4K);
+	if (rc) {
+		free_page((unsigned long)rsv);
+		return rc;
+	}
+
 	/*
 	 * The memremap() call above assumes that a linux_efi_memreserve entry
 	 * never crosses a page boundary, so let's ensure that this remains true
@@ -1020,7 +1053,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
 	efi_memreserve_root->next = __pa(rsv);
 	spin_unlock(&efi_mem_reserve_persistent_lock);
 
-	return 0;
+	return efi_mem_reserve_iomem(addr, size);
 }
 
 static int __init efi_memreserve_root_init(void)
diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c
index d6dd5f503fa2..2762e0662bf4 100644
--- a/drivers/firmware/efi/esrt.c
+++ b/drivers/firmware/efi/esrt.c
@@ -246,6 +246,9 @@ void __init efi_esrt_init(void)
 	int rc;
 	phys_addr_t end;
 
+	if (!efi_enabled(EFI_MEMMAP))
+		return;
+
 	pr_debug("esrt-init: loading.\n");
 	if (!esrt_table_exists())
 		return;
diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c
index 9501edc0fcfb..6e0f34a38171 100644
--- a/drivers/firmware/efi/fake_mem.c
+++ b/drivers/firmware/efi/fake_mem.c
@@ -17,12 +17,10 @@
 #include <linux/memblock.h>
 #include <linux/types.h>
 #include <linux/sort.h>
-#include <asm/efi.h>
+#include "fake_mem.h"
 
-#define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM
-
-static struct efi_mem_range fake_mems[EFI_MAX_FAKEMEM];
-static int nr_fake_mem;
+struct efi_mem_range efi_fake_mems[EFI_MAX_FAKEMEM];
+int nr_fake_mem;
 
 static int __init cmp_fake_mem(const void *x1, const void *x2)
 {
@@ -36,46 +34,45 @@ static int __init cmp_fake_mem(const void *x1, const void *x2)
 	return 0;
 }
 
-void __init efi_fake_memmap(void)
+static void __init efi_fake_range(struct efi_mem_range *efi_range)
 {
+	struct efi_memory_map_data data = { 0 };
 	int new_nr_map = efi.memmap.nr_map;
 	efi_memory_desc_t *md;
-	phys_addr_t new_memmap_phy;
 	void *new_memmap;
-	int i;
-
-	if (!nr_fake_mem)
-		return;
 
 	/* count up the number of EFI memory descriptor */
-	for (i = 0; i < nr_fake_mem; i++) {
-		for_each_efi_memory_desc(md) {
-			struct range *r = &fake_mems[i].range;
-
-			new_nr_map += efi_memmap_split_count(md, r);
-		}
-	}
+	for_each_efi_memory_desc(md)
+		new_nr_map += efi_memmap_split_count(md, &efi_range->range);
 
 	/* allocate memory for new EFI memmap */
-	new_memmap_phy = efi_memmap_alloc(new_nr_map);
-	if (!new_memmap_phy)
+	if (efi_memmap_alloc(new_nr_map, &data) != 0)
 		return;
 
 	/* create new EFI memmap */
-	new_memmap = early_memremap(new_memmap_phy,
-				    efi.memmap.desc_size * new_nr_map);
+	new_memmap = early_memremap(data.phys_map, data.size);
 	if (!new_memmap) {
-		memblock_free(new_memmap_phy, efi.memmap.desc_size * new_nr_map);
+		__efi_memmap_free(data.phys_map, data.size, data.flags);
 		return;
 	}
 
-	for (i = 0; i < nr_fake_mem; i++)
-		efi_memmap_insert(&efi.memmap, new_memmap, &fake_mems[i]);
+	efi_memmap_insert(&efi.memmap, new_memmap, efi_range);
 
 	/* swap into new EFI memmap */
-	early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map);
+	early_memunmap(new_memmap, data.size);
 
-	efi_memmap_install(new_memmap_phy, new_nr_map);
+	efi_memmap_install(&data);
+}
+
+void __init efi_fake_memmap(void)
+{
+	int i;
+
+	if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem)
+		return;
+
+	for (i = 0; i < nr_fake_mem; i++)
+		efi_fake_range(&efi_fake_mems[i]);
 
 	/* print new EFI memmap */
 	efi_print_memmap();
@@ -104,22 +101,22 @@ static int __init setup_fake_mem(char *p)
 		if (nr_fake_mem >= EFI_MAX_FAKEMEM)
 			break;
 
-		fake_mems[nr_fake_mem].range.start = start;
-		fake_mems[nr_fake_mem].range.end = start + mem_size - 1;
-		fake_mems[nr_fake_mem].attribute = attribute;
+		efi_fake_mems[nr_fake_mem].range.start = start;
+		efi_fake_mems[nr_fake_mem].range.end = start + mem_size - 1;
+		efi_fake_mems[nr_fake_mem].attribute = attribute;
 		nr_fake_mem++;
 
 		if (*p == ',')
 			p++;
 	}
 
-	sort(fake_mems, nr_fake_mem, sizeof(struct efi_mem_range),
+	sort(efi_fake_mems, nr_fake_mem, sizeof(struct efi_mem_range),
 	     cmp_fake_mem, NULL);
 
 	for (i = 0; i < nr_fake_mem; i++)
 		pr_info("efi_fake_mem: add attr=0x%016llx to [mem 0x%016llx-0x%016llx]",
-			fake_mems[i].attribute, fake_mems[i].range.start,
-			fake_mems[i].range.end);
+			efi_fake_mems[i].attribute, efi_fake_mems[i].range.start,
+			efi_fake_mems[i].range.end);
 
 	return *p == '\0' ? 0 : -EINVAL;
 }
diff --git a/drivers/firmware/efi/fake_mem.h b/drivers/firmware/efi/fake_mem.h
new file mode 100644
index 000000000000..d52791af4b18
--- /dev/null
+++ b/drivers/firmware/efi/fake_mem.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __EFI_FAKE_MEM_H__
+#define __EFI_FAKE_MEM_H__
+#include <asm/efi.h>
+
+#define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM
+
+extern struct efi_mem_range efi_fake_mems[EFI_MAX_FAKEMEM];
+extern int nr_fake_mem;
+#endif /* __EFI_FAKE_MEM_H__ */
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index ee0661ddb25b..98a81576213d 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -38,7 +38,8 @@ OBJECT_FILES_NON_STANDARD	:= y
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
 KCOV_INSTRUMENT			:= n
 
-lib-y				:= efi-stub-helper.o gop.o secureboot.o tpm.o
+lib-y				:= efi-stub-helper.o gop.o secureboot.o tpm.o \
+				   random.o pci.o
 
 # include the stub's generic dependencies from lib/ when building for ARM/arm64
 arm-deps-y := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c
@@ -47,7 +48,7 @@ arm-deps-$(CONFIG_ARM64) += sort.c
 $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
-lib-$(CONFIG_EFI_ARMSTUB)	+= arm-stub.o fdt.o string.o random.o \
+lib-$(CONFIG_EFI_ARMSTUB)	+= arm-stub.o fdt.o string.o \
 				   $(patsubst %.c,lib-%.o,$(arm-deps-y))
 
 lib-$(CONFIG_ARM)		+= arm32-stub.o
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index c382a48c6678..7bbef4a67350 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -37,16 +37,14 @@
 
 static u64 virtmap_base = EFI_RT_VIRTUAL_BASE;
 
-void efi_char16_printk(efi_system_table_t *sys_table_arg,
-			      efi_char16_t *str)
-{
-	struct efi_simple_text_output_protocol *out;
+static efi_system_table_t *__efistub_global sys_table;
 
-	out = (struct efi_simple_text_output_protocol *)sys_table_arg->con_out;
-	out->output_string(out, str);
+__pure efi_system_table_t *efi_system_table(void)
+{
+	return sys_table;
 }
 
-static struct screen_info *setup_graphics(efi_system_table_t *sys_table_arg)
+static struct screen_info *setup_graphics(void)
 {
 	efi_guid_t gop_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
 	efi_status_t status;
@@ -55,27 +53,27 @@ static struct screen_info *setup_graphics(efi_system_table_t *sys_table_arg)
 	struct screen_info *si = NULL;
 
 	size = 0;
-	status = efi_call_early(locate_handle, EFI_LOCATE_BY_PROTOCOL,
-				&gop_proto, NULL, &size, gop_handle);
+	status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+			     &gop_proto, NULL, &size, gop_handle);
 	if (status == EFI_BUFFER_TOO_SMALL) {
-		si = alloc_screen_info(sys_table_arg);
+		si = alloc_screen_info();
 		if (!si)
 			return NULL;
-		efi_setup_gop(sys_table_arg, si, &gop_proto, size);
+		efi_setup_gop(si, &gop_proto, size);
 	}
 	return si;
 }
 
-void install_memreserve_table(efi_system_table_t *sys_table_arg)
+void install_memreserve_table(void)
 {
 	struct linux_efi_memreserve *rsv;
 	efi_guid_t memreserve_table_guid = LINUX_EFI_MEMRESERVE_TABLE_GUID;
 	efi_status_t status;
 
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*rsv),
-				(void **)&rsv);
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(*rsv),
+			     (void **)&rsv);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table_arg, "Failed to allocate memreserve entry!\n");
+		pr_efi_err("Failed to allocate memreserve entry!\n");
 		return;
 	}
 
@@ -83,11 +81,10 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg)
 	rsv->size = 0;
 	atomic_set(&rsv->count, 0);
 
-	status = efi_call_early(install_configuration_table,
-				&memreserve_table_guid,
-				rsv);
+	status = efi_bs_call(install_configuration_table,
+			     &memreserve_table_guid, rsv);
 	if (status != EFI_SUCCESS)
-		pr_efi_err(sys_table_arg, "Failed to install memreserve config table!\n");
+		pr_efi_err("Failed to install memreserve config table!\n");
 }
 
 
@@ -97,8 +94,7 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg)
  * must be reserved. On failure it is required to free all
  * all allocations it has made.
  */
-efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
-				 unsigned long *image_addr,
+efi_status_t handle_kernel_image(unsigned long *image_addr,
 				 unsigned long *image_size,
 				 unsigned long *reserve_addr,
 				 unsigned long *reserve_size,
@@ -110,7 +106,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
  * for both archictectures, with the arch-specific code provided in the
  * handle_kernel_image() function.
  */
-unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
+unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg,
 			       unsigned long *image_addr)
 {
 	efi_loaded_image_t *image;
@@ -131,11 +127,13 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	enum efi_secureboot_mode secure_boot;
 	struct screen_info *si;
 
+	sys_table = sys_table_arg;
+
 	/* Check if we were booted by the EFI firmware */
 	if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
 		goto fail;
 
-	status = check_platform_features(sys_table);
+	status = check_platform_features();
 	if (status != EFI_SUCCESS)
 		goto fail;
 
@@ -147,13 +145,13 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	status = sys_table->boottime->handle_protocol(handle,
 					&loaded_image_proto, (void *)&image);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table, "Failed to get loaded image protocol\n");
+		pr_efi_err("Failed to get loaded image protocol\n");
 		goto fail;
 	}
 
-	dram_base = get_dram_base(sys_table);
+	dram_base = get_dram_base();
 	if (dram_base == EFI_ERROR) {
-		pr_efi_err(sys_table, "Failed to find DRAM base\n");
+		pr_efi_err("Failed to find DRAM base\n");
 		goto fail;
 	}
 
@@ -162,9 +160,9 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	 * protocol. We are going to copy the command line into the
 	 * device tree, so this can be allocated anywhere.
 	 */
-	cmdline_ptr = efi_convert_cmdline(sys_table, image, &cmdline_size);
+	cmdline_ptr = efi_convert_cmdline(image, &cmdline_size);
 	if (!cmdline_ptr) {
-		pr_efi_err(sys_table, "getting command line via LOADED_IMAGE_PROTOCOL\n");
+		pr_efi_err("getting command line via LOADED_IMAGE_PROTOCOL\n");
 		goto fail;
 	}
 
@@ -176,23 +174,25 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && cmdline_size > 0)
 		efi_parse_options(cmdline_ptr);
 
-	pr_efi(sys_table, "Booting Linux Kernel...\n");
+	pr_efi("Booting Linux Kernel...\n");
 
-	si = setup_graphics(sys_table);
+	si = setup_graphics();
 
-	status = handle_kernel_image(sys_table, image_addr, &image_size,
+	status = handle_kernel_image(image_addr, &image_size,
 				     &reserve_addr,
 				     &reserve_size,
 				     dram_base, image);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table, "Failed to relocate kernel\n");
+		pr_efi_err("Failed to relocate kernel\n");
 		goto fail_free_cmdline;
 	}
 
+	efi_retrieve_tpm2_eventlog();
+
 	/* Ask the firmware to clear memory on unclean shutdown */
-	efi_enable_reset_attack_mitigation(sys_table);
+	efi_enable_reset_attack_mitigation();
 
-	secure_boot = efi_get_secureboot(sys_table);
+	secure_boot = efi_get_secureboot();
 
 	/*
 	 * Unauthenticated device tree data is a security hazard, so ignore
@@ -202,39 +202,38 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	if (!IS_ENABLED(CONFIG_EFI_ARMSTUB_DTB_LOADER) ||
 	     secure_boot != efi_secureboot_mode_disabled) {
 		if (strstr(cmdline_ptr, "dtb="))
-			pr_efi(sys_table, "Ignoring DTB from command line.\n");
+			pr_efi("Ignoring DTB from command line.\n");
 	} else {
-		status = handle_cmdline_files(sys_table, image, cmdline_ptr,
-					      "dtb=",
+		status = handle_cmdline_files(image, cmdline_ptr, "dtb=",
 					      ~0UL, &fdt_addr, &fdt_size);
 
 		if (status != EFI_SUCCESS) {
-			pr_efi_err(sys_table, "Failed to load device tree!\n");
+			pr_efi_err("Failed to load device tree!\n");
 			goto fail_free_image;
 		}
 	}
 
 	if (fdt_addr) {
-		pr_efi(sys_table, "Using DTB from command line\n");
+		pr_efi("Using DTB from command line\n");
 	} else {
 		/* Look for a device tree configuration table entry. */
-		fdt_addr = (uintptr_t)get_fdt(sys_table, &fdt_size);
+		fdt_addr = (uintptr_t)get_fdt(&fdt_size);
 		if (fdt_addr)
-			pr_efi(sys_table, "Using DTB from configuration table\n");
+			pr_efi("Using DTB from configuration table\n");
 	}
 
 	if (!fdt_addr)
-		pr_efi(sys_table, "Generating empty DTB\n");
+		pr_efi("Generating empty DTB\n");
 
-	status = handle_cmdline_files(sys_table, image, cmdline_ptr, "initrd=",
+	status = handle_cmdline_files(image, cmdline_ptr, "initrd=",
 				      efi_get_max_initrd_addr(dram_base,
 							      *image_addr),
 				      (unsigned long *)&initrd_addr,
 				      (unsigned long *)&initrd_size);
 	if (status != EFI_SUCCESS)
-		pr_efi_err(sys_table, "Failed initrd from command line!\n");
+		pr_efi_err("Failed initrd from command line!\n");
 
-	efi_random_get_seed(sys_table);
+	efi_random_get_seed();
 
 	/* hibernation expects the runtime regions to stay in the same place */
 	if (!IS_ENABLED(CONFIG_HIBERNATION) && !nokaslr()) {
@@ -249,18 +248,17 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 					    EFI_RT_VIRTUAL_SIZE;
 		u32 rnd;
 
-		status = efi_get_random_bytes(sys_table, sizeof(rnd),
-					      (u8 *)&rnd);
+		status = efi_get_random_bytes(sizeof(rnd), (u8 *)&rnd);
 		if (status == EFI_SUCCESS) {
 			virtmap_base = EFI_RT_VIRTUAL_BASE +
 				       (((headroom >> 21) * rnd) >> (32 - 21));
 		}
 	}
 
-	install_memreserve_table(sys_table);
+	install_memreserve_table();
 
 	new_fdt_addr = fdt_addr;
-	status = allocate_new_fdt_and_exit_boot(sys_table, handle,
+	status = allocate_new_fdt_and_exit_boot(handle,
 				&new_fdt_addr, efi_get_max_fdt_addr(dram_base),
 				initrd_addr, initrd_size, cmdline_ptr,
 				fdt_addr, fdt_size);
@@ -273,17 +271,17 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	if (status == EFI_SUCCESS)
 		return new_fdt_addr;
 
-	pr_efi_err(sys_table, "Failed to update FDT and exit boot services\n");
+	pr_efi_err("Failed to update FDT and exit boot services\n");
 
-	efi_free(sys_table, initrd_size, initrd_addr);
-	efi_free(sys_table, fdt_size, fdt_addr);
+	efi_free(initrd_size, initrd_addr);
+	efi_free(fdt_size, fdt_addr);
 
 fail_free_image:
-	efi_free(sys_table, image_size, *image_addr);
-	efi_free(sys_table, reserve_size, reserve_addr);
+	efi_free(image_size, *image_addr);
+	efi_free(reserve_size, reserve_addr);
 fail_free_cmdline:
-	free_screen_info(sys_table, si);
-	efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr);
+	free_screen_info(si);
+	efi_free(cmdline_size, (unsigned long)cmdline_ptr);
 fail:
 	return EFI_ERROR;
 }
diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c
index 41213bf5fcf5..7b2a6382b647 100644
--- a/drivers/firmware/efi/libstub/arm32-stub.c
+++ b/drivers/firmware/efi/libstub/arm32-stub.c
@@ -7,7 +7,7 @@
 
 #include "efistub.h"
 
-efi_status_t check_platform_features(efi_system_table_t *sys_table_arg)
+efi_status_t check_platform_features(void)
 {
 	int block;
 
@@ -18,7 +18,7 @@ efi_status_t check_platform_features(efi_system_table_t *sys_table_arg)
 	/* LPAE kernels need compatible hardware */
 	block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0);
 	if (block < 5) {
-		pr_efi_err(sys_table_arg, "This LPAE kernel is not supported by your CPU\n");
+		pr_efi_err("This LPAE kernel is not supported by your CPU\n");
 		return EFI_UNSUPPORTED;
 	}
 	return EFI_SUCCESS;
@@ -26,7 +26,7 @@ efi_status_t check_platform_features(efi_system_table_t *sys_table_arg)
 
 static efi_guid_t screen_info_guid = LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID;
 
-struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg)
+struct screen_info *alloc_screen_info(void)
 {
 	struct screen_info *si;
 	efi_status_t status;
@@ -37,32 +37,31 @@ struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg)
 	 * its contents while we hand over to the kernel proper from the
 	 * decompressor.
 	 */
-	status = efi_call_early(allocate_pool, EFI_RUNTIME_SERVICES_DATA,
-				sizeof(*si), (void **)&si);
+	status = efi_bs_call(allocate_pool, EFI_RUNTIME_SERVICES_DATA,
+			     sizeof(*si), (void **)&si);
 
 	if (status != EFI_SUCCESS)
 		return NULL;
 
-	status = efi_call_early(install_configuration_table,
-				&screen_info_guid, si);
+	status = efi_bs_call(install_configuration_table,
+			     &screen_info_guid, si);
 	if (status == EFI_SUCCESS)
 		return si;
 
-	efi_call_early(free_pool, si);
+	efi_bs_call(free_pool, si);
 	return NULL;
 }
 
-void free_screen_info(efi_system_table_t *sys_table_arg, struct screen_info *si)
+void free_screen_info(struct screen_info *si)
 {
 	if (!si)
 		return;
 
-	efi_call_early(install_configuration_table, &screen_info_guid, NULL);
-	efi_call_early(free_pool, si);
+	efi_bs_call(install_configuration_table, &screen_info_guid, NULL);
+	efi_bs_call(free_pool, si);
 }
 
-static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg,
-					unsigned long dram_base,
+static efi_status_t reserve_kernel_base(unsigned long dram_base,
 					unsigned long *reserve_addr,
 					unsigned long *reserve_size)
 {
@@ -92,8 +91,8 @@ static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg,
 	 */
 	alloc_addr = dram_base + MAX_UNCOMP_KERNEL_SIZE;
 	nr_pages = MAX_UNCOMP_KERNEL_SIZE / EFI_PAGE_SIZE;
-	status = efi_call_early(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS,
-				EFI_BOOT_SERVICES_DATA, nr_pages, &alloc_addr);
+	status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS,
+			     EFI_BOOT_SERVICES_DATA, nr_pages, &alloc_addr);
 	if (status == EFI_SUCCESS) {
 		if (alloc_addr == dram_base) {
 			*reserve_addr = alloc_addr;
@@ -119,10 +118,9 @@ static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg,
 	 * released to the OS after ExitBootServices(), the decompressor can
 	 * safely overwrite them.
 	 */
-	status = efi_get_memory_map(sys_table_arg, &map);
+	status = efi_get_memory_map(&map);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table_arg,
-			   "reserve_kernel_base(): Unable to retrieve memory map.\n");
+		pr_efi_err("reserve_kernel_base(): Unable to retrieve memory map.\n");
 		return status;
 	}
 
@@ -146,6 +144,11 @@ static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg,
 			continue;
 
 		case EFI_CONVENTIONAL_MEMORY:
+			/* Skip soft reserved conventional memory */
+			if (efi_soft_reserve_enabled() &&
+			    (desc->attribute & EFI_MEMORY_SP))
+				continue;
+
 			/*
 			 * Reserve the intersection between this entry and the
 			 * region.
@@ -153,14 +156,13 @@ static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg,
 			start = max(start, (u64)dram_base);
 			end = min(end, (u64)dram_base + MAX_UNCOMP_KERNEL_SIZE);
 
-			status = efi_call_early(allocate_pages,
-						EFI_ALLOCATE_ADDRESS,
-						EFI_LOADER_DATA,
-						(end - start) / EFI_PAGE_SIZE,
-						&start);
+			status = efi_bs_call(allocate_pages,
+					     EFI_ALLOCATE_ADDRESS,
+					     EFI_LOADER_DATA,
+					     (end - start) / EFI_PAGE_SIZE,
+					     &start);
 			if (status != EFI_SUCCESS) {
-				pr_efi_err(sys_table_arg,
-					"reserve_kernel_base(): alloc failed.\n");
+				pr_efi_err("reserve_kernel_base(): alloc failed.\n");
 				goto out;
 			}
 			break;
@@ -183,12 +185,11 @@ static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg,
 
 	status = EFI_SUCCESS;
 out:
-	efi_call_early(free_pool, memory_map);
+	efi_bs_call(free_pool, memory_map);
 	return status;
 }
 
-efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
-				 unsigned long *image_addr,
+efi_status_t handle_kernel_image(unsigned long *image_addr,
 				 unsigned long *image_size,
 				 unsigned long *reserve_addr,
 				 unsigned long *reserve_size,
@@ -216,10 +217,9 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
 	 */
 	kernel_base += TEXT_OFFSET - 5 * PAGE_SIZE;
 
-	status = reserve_kernel_base(sys_table, kernel_base, reserve_addr,
-				     reserve_size);
+	status = reserve_kernel_base(kernel_base, reserve_addr, reserve_size);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table, "Unable to allocate memory for uncompressed kernel.\n");
+		pr_efi_err("Unable to allocate memory for uncompressed kernel.\n");
 		return status;
 	}
 
@@ -228,12 +228,11 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
 	 * memory window.
 	 */
 	*image_size = image->image_size;
-	status = efi_relocate_kernel(sys_table, image_addr, *image_size,
-				     *image_size,
+	status = efi_relocate_kernel(image_addr, *image_size, *image_size,
 				     kernel_base + MAX_UNCOMP_KERNEL_SIZE, 0, 0);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table, "Failed to relocate kernel.\n");
-		efi_free(sys_table, *reserve_size, *reserve_addr);
+		pr_efi_err("Failed to relocate kernel.\n");
+		efi_free(*reserve_size, *reserve_addr);
 		*reserve_size = 0;
 		return status;
 	}
@@ -244,10 +243,10 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
 	 * address at which the zImage is loaded.
 	 */
 	if (*image_addr + *image_size > dram_base + ZIMAGE_OFFSET_LIMIT) {
-		pr_efi_err(sys_table, "Failed to relocate kernel, no low memory available.\n");
-		efi_free(sys_table, *reserve_size, *reserve_addr);
+		pr_efi_err("Failed to relocate kernel, no low memory available.\n");
+		efi_free(*reserve_size, *reserve_addr);
 		*reserve_size = 0;
-		efi_free(sys_table, *image_size, *image_addr);
+		efi_free(*image_size, *image_addr);
 		*image_size = 0;
 		return EFI_LOAD_ERROR;
 	}
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index 1550d244e996..2915b44132e6 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -21,7 +21,7 @@
 
 #include "efistub.h"
 
-efi_status_t check_platform_features(efi_system_table_t *sys_table_arg)
+efi_status_t check_platform_features(void)
 {
 	u64 tg;
 
@@ -32,16 +32,15 @@ efi_status_t check_platform_features(efi_system_table_t *sys_table_arg)
 	tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf;
 	if (tg != ID_AA64MMFR0_TGRAN_SUPPORTED) {
 		if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
-			pr_efi_err(sys_table_arg, "This 64 KB granular kernel is not supported by your CPU\n");
+			pr_efi_err("This 64 KB granular kernel is not supported by your CPU\n");
 		else
-			pr_efi_err(sys_table_arg, "This 16 KB granular kernel is not supported by your CPU\n");
+			pr_efi_err("This 16 KB granular kernel is not supported by your CPU\n");
 		return EFI_UNSUPPORTED;
 	}
 	return EFI_SUCCESS;
 }
 
-efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg,
-				 unsigned long *image_addr,
+efi_status_t handle_kernel_image(unsigned long *image_addr,
 				 unsigned long *image_size,
 				 unsigned long *reserve_addr,
 				 unsigned long *reserve_size,
@@ -56,17 +55,16 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg,
 
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
 		if (!nokaslr()) {
-			status = efi_get_random_bytes(sys_table_arg,
-						      sizeof(phys_seed),
+			status = efi_get_random_bytes(sizeof(phys_seed),
 						      (u8 *)&phys_seed);
 			if (status == EFI_NOT_FOUND) {
-				pr_efi(sys_table_arg, "EFI_RNG_PROTOCOL unavailable, no randomness supplied\n");
+				pr_efi("EFI_RNG_PROTOCOL unavailable, no randomness supplied\n");
 			} else if (status != EFI_SUCCESS) {
-				pr_efi_err(sys_table_arg, "efi_get_random_bytes() failed\n");
+				pr_efi_err("efi_get_random_bytes() failed\n");
 				return status;
 			}
 		} else {
-			pr_efi(sys_table_arg, "KASLR disabled on kernel command line\n");
+			pr_efi("KASLR disabled on kernel command line\n");
 		}
 	}
 
@@ -108,7 +106,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg,
 		 * locate the kernel at a randomized offset in physical memory.
 		 */
 		*reserve_size = kernel_memsize + offset;
-		status = efi_random_alloc(sys_table_arg, *reserve_size,
+		status = efi_random_alloc(*reserve_size,
 					  MIN_KIMG_ALIGN, reserve_addr,
 					  (u32)phys_seed);
 
@@ -131,19 +129,19 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg,
 		*image_addr = *reserve_addr = preferred_offset;
 		*reserve_size = round_up(kernel_memsize, EFI_ALLOC_ALIGN);
 
-		status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
-					EFI_LOADER_DATA,
-					*reserve_size / EFI_PAGE_SIZE,
-					(efi_physical_addr_t *)reserve_addr);
+		status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
+				     EFI_LOADER_DATA,
+				     *reserve_size / EFI_PAGE_SIZE,
+				     (efi_physical_addr_t *)reserve_addr);
 	}
 
 	if (status != EFI_SUCCESS) {
 		*reserve_size = kernel_memsize + TEXT_OFFSET;
-		status = efi_low_alloc(sys_table_arg, *reserve_size,
+		status = efi_low_alloc(*reserve_size,
 				       MIN_KIMG_ALIGN, reserve_addr);
 
 		if (status != EFI_SUCCESS) {
-			pr_efi_err(sys_table_arg, "Failed to relocate kernel\n");
+			pr_efi_err("Failed to relocate kernel\n");
 			*reserve_size = 0;
 			return status;
 		}
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 35dbc2791c97..74ddfb496140 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -27,23 +27,30 @@
  */
 #define EFI_READ_CHUNK_SIZE	(1024 * 1024)
 
-static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE;
+static unsigned long efi_chunk_size = EFI_READ_CHUNK_SIZE;
 
-static int __section(.data) __nokaslr;
-static int __section(.data) __quiet;
-static int __section(.data) __novamap;
+static bool __efistub_global efi_nokaslr;
+static bool __efistub_global efi_quiet;
+static bool __efistub_global efi_novamap;
+static bool __efistub_global efi_nosoftreserve;
+static bool __efistub_global efi_disable_pci_dma =
+					IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA);
 
-int __pure nokaslr(void)
+bool __pure nokaslr(void)
 {
-	return __nokaslr;
+	return efi_nokaslr;
 }
-int __pure is_quiet(void)
+bool __pure is_quiet(void)
 {
-	return __quiet;
+	return efi_quiet;
 }
-int __pure novamap(void)
+bool __pure novamap(void)
 {
-	return __novamap;
+	return efi_novamap;
+}
+bool __pure __efi_soft_reserve_enabled(void)
+{
+	return !efi_nosoftreserve;
 }
 
 #define EFI_MMAP_NR_SLACK_SLOTS	8
@@ -53,7 +60,7 @@ struct file_info {
 	u64 size;
 };
 
-void efi_printk(efi_system_table_t *sys_table_arg, char *str)
+void efi_printk(char *str)
 {
 	char *s8;
 
@@ -63,10 +70,10 @@ void efi_printk(efi_system_table_t *sys_table_arg, char *str)
 		ch[0] = *s8;
 		if (*s8 == '\n') {
 			efi_char16_t nl[2] = { '\r', 0 };
-			efi_char16_printk(sys_table_arg, nl);
+			efi_char16_printk(nl);
 		}
 
-		efi_char16_printk(sys_table_arg, ch);
+		efi_char16_printk(ch);
 	}
 }
 
@@ -79,8 +86,7 @@ static inline bool mmap_has_headroom(unsigned long buff_size,
 	return slack / desc_size >= EFI_MMAP_NR_SLACK_SLOTS;
 }
 
-efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
-				struct efi_boot_memmap *map)
+efi_status_t efi_get_memory_map(struct efi_boot_memmap *map)
 {
 	efi_memory_desc_t *m = NULL;
 	efi_status_t status;
@@ -91,19 +97,19 @@ efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
 	*map->map_size =	*map->desc_size * 32;
 	*map->buff_size =	*map->map_size;
 again:
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-				*map->map_size, (void **)&m);
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA,
+			     *map->map_size, (void **)&m);
 	if (status != EFI_SUCCESS)
 		goto fail;
 
 	*map->desc_size = 0;
 	key = 0;
-	status = efi_call_early(get_memory_map, map->map_size, m,
-				&key, map->desc_size, &desc_version);
+	status = efi_bs_call(get_memory_map, map->map_size, m,
+			     &key, map->desc_size, &desc_version);
 	if (status == EFI_BUFFER_TOO_SMALL ||
 	    !mmap_has_headroom(*map->buff_size, *map->map_size,
 			       *map->desc_size)) {
-		efi_call_early(free_pool, m);
+		efi_bs_call(free_pool, m);
 		/*
 		 * Make sure there is some entries of headroom so that the
 		 * buffer can be reused for a new map after allocations are
@@ -117,7 +123,7 @@ again:
 	}
 
 	if (status != EFI_SUCCESS)
-		efi_call_early(free_pool, m);
+		efi_bs_call(free_pool, m);
 
 	if (map->key_ptr && status == EFI_SUCCESS)
 		*map->key_ptr = key;
@@ -130,7 +136,7 @@ fail:
 }
 
 
-unsigned long get_dram_base(efi_system_table_t *sys_table_arg)
+unsigned long get_dram_base(void)
 {
 	efi_status_t status;
 	unsigned long map_size, buff_size;
@@ -146,7 +152,7 @@ unsigned long get_dram_base(efi_system_table_t *sys_table_arg)
 	boot_map.key_ptr =	NULL;
 	boot_map.buff_size =	&buff_size;
 
-	status = efi_get_memory_map(sys_table_arg, &boot_map);
+	status = efi_get_memory_map(&boot_map);
 	if (status != EFI_SUCCESS)
 		return membase;
 
@@ -159,7 +165,7 @@ unsigned long get_dram_base(efi_system_table_t *sys_table_arg)
 		}
 	}
 
-	efi_call_early(free_pool, map.map);
+	efi_bs_call(free_pool, map.map);
 
 	return membase;
 }
@@ -167,8 +173,7 @@ unsigned long get_dram_base(efi_system_table_t *sys_table_arg)
 /*
  * Allocate at the highest possible address that is not above 'max'.
  */
-efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg,
-			    unsigned long size, unsigned long align,
+efi_status_t efi_high_alloc(unsigned long size, unsigned long align,
 			    unsigned long *addr, unsigned long max)
 {
 	unsigned long map_size, desc_size, buff_size;
@@ -186,7 +191,7 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg,
 	boot_map.key_ptr =	NULL;
 	boot_map.buff_size =	&buff_size;
 
-	status = efi_get_memory_map(sys_table_arg, &boot_map);
+	status = efi_get_memory_map(&boot_map);
 	if (status != EFI_SUCCESS)
 		goto fail;
 
@@ -211,6 +216,10 @@ again:
 		if (desc->type != EFI_CONVENTIONAL_MEMORY)
 			continue;
 
+		if (efi_soft_reserve_enabled() &&
+		    (desc->attribute & EFI_MEMORY_SP))
+			continue;
+
 		if (desc->num_pages < nr_pages)
 			continue;
 
@@ -242,9 +251,8 @@ again:
 	if (!max_addr)
 		status = EFI_NOT_FOUND;
 	else {
-		status = efi_call_early(allocate_pages,
-					EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
-					nr_pages, &max_addr);
+		status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
+				     EFI_LOADER_DATA, nr_pages, &max_addr);
 		if (status != EFI_SUCCESS) {
 			max = max_addr;
 			max_addr = 0;
@@ -254,7 +262,7 @@ again:
 		*addr = max_addr;
 	}
 
-	efi_call_early(free_pool, map);
+	efi_bs_call(free_pool, map);
 fail:
 	return status;
 }
@@ -262,8 +270,7 @@ fail:
 /*
  * Allocate at the lowest possible address that is not below 'min'.
  */
-efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg,
-				 unsigned long size, unsigned long align,
+efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align,
 				 unsigned long *addr, unsigned long min)
 {
 	unsigned long map_size, desc_size, buff_size;
@@ -280,7 +287,7 @@ efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg,
 	boot_map.key_ptr =	NULL;
 	boot_map.buff_size =	&buff_size;
 
-	status = efi_get_memory_map(sys_table_arg, &boot_map);
+	status = efi_get_memory_map(&boot_map);
 	if (status != EFI_SUCCESS)
 		goto fail;
 
@@ -305,6 +312,10 @@ efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg,
 		if (desc->type != EFI_CONVENTIONAL_MEMORY)
 			continue;
 
+		if (efi_soft_reserve_enabled() &&
+		    (desc->attribute & EFI_MEMORY_SP))
+			continue;
+
 		if (desc->num_pages < nr_pages)
 			continue;
 
@@ -318,9 +329,8 @@ efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg,
 		if ((start + size) > end)
 			continue;
 
-		status = efi_call_early(allocate_pages,
-					EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
-					nr_pages, &start);
+		status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
+				     EFI_LOADER_DATA, nr_pages, &start);
 		if (status == EFI_SUCCESS) {
 			*addr = start;
 			break;
@@ -330,13 +340,12 @@ efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg,
 	if (i == map_size / desc_size)
 		status = EFI_NOT_FOUND;
 
-	efi_call_early(free_pool, map);
+	efi_bs_call(free_pool, map);
 fail:
 	return status;
 }
 
-void efi_free(efi_system_table_t *sys_table_arg, unsigned long size,
-	      unsigned long addr)
+void efi_free(unsigned long size, unsigned long addr)
 {
 	unsigned long nr_pages;
 
@@ -344,12 +353,11 @@ void efi_free(efi_system_table_t *sys_table_arg, unsigned long size,
 		return;
 
 	nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE;
-	efi_call_early(free_pages, addr, nr_pages);
+	efi_bs_call(free_pages, addr, nr_pages);
 }
 
-static efi_status_t efi_file_size(efi_system_table_t *sys_table_arg, void *__fh,
-				  efi_char16_t *filename_16, void **handle,
-				  u64 *file_sz)
+static efi_status_t efi_file_size(void *__fh, efi_char16_t *filename_16,
+				  void **handle, u64 *file_sz)
 {
 	efi_file_handle_t *h, *fh = __fh;
 	efi_file_info_t *info;
@@ -357,81 +365,75 @@ static efi_status_t efi_file_size(efi_system_table_t *sys_table_arg, void *__fh,
 	efi_guid_t info_guid = EFI_FILE_INFO_ID;
 	unsigned long info_sz;
 
-	status = efi_call_proto(efi_file_handle, open, fh, &h, filename_16,
-				EFI_FILE_MODE_READ, (u64)0);
+	status = fh->open(fh, &h, filename_16, EFI_FILE_MODE_READ, 0);
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table_arg, "Failed to open file: ");
-		efi_char16_printk(sys_table_arg, filename_16);
-		efi_printk(sys_table_arg, "\n");
+		efi_printk("Failed to open file: ");
+		efi_char16_printk(filename_16);
+		efi_printk("\n");
 		return status;
 	}
 
 	*handle = h;
 
 	info_sz = 0;
-	status = efi_call_proto(efi_file_handle, get_info, h, &info_guid,
-				&info_sz, NULL);
+	status = h->get_info(h, &info_guid, &info_sz, NULL);
 	if (status != EFI_BUFFER_TOO_SMALL) {
-		efi_printk(sys_table_arg, "Failed to get file info size\n");
+		efi_printk("Failed to get file info size\n");
 		return status;
 	}
 
 grow:
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-				info_sz, (void **)&info);
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, info_sz,
+			     (void **)&info);
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table_arg, "Failed to alloc mem for file info\n");
+		efi_printk("Failed to alloc mem for file info\n");
 		return status;
 	}
 
-	status = efi_call_proto(efi_file_handle, get_info, h, &info_guid,
-				&info_sz, info);
+	status = h->get_info(h, &info_guid, &info_sz, info);
 	if (status == EFI_BUFFER_TOO_SMALL) {
-		efi_call_early(free_pool, info);
+		efi_bs_call(free_pool, info);
 		goto grow;
 	}
 
 	*file_sz = info->file_size;
-	efi_call_early(free_pool, info);
+	efi_bs_call(free_pool, info);
 
 	if (status != EFI_SUCCESS)
-		efi_printk(sys_table_arg, "Failed to get initrd info\n");
+		efi_printk("Failed to get initrd info\n");
 
 	return status;
 }
 
-static efi_status_t efi_file_read(void *handle, unsigned long *size, void *addr)
+static efi_status_t efi_file_read(efi_file_handle_t *handle,
+				  unsigned long *size, void *addr)
 {
-	return efi_call_proto(efi_file_handle, read, handle, size, addr);
+	return handle->read(handle, size, addr);
 }
 
-static efi_status_t efi_file_close(void *handle)
+static efi_status_t efi_file_close(efi_file_handle_t *handle)
 {
-	return efi_call_proto(efi_file_handle, close, handle);
+	return handle->close(handle);
 }
 
-static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
-				    efi_loaded_image_t *image,
+static efi_status_t efi_open_volume(efi_loaded_image_t *image,
 				    efi_file_handle_t **__fh)
 {
 	efi_file_io_interface_t *io;
 	efi_file_handle_t *fh;
 	efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
 	efi_status_t status;
-	void *handle = (void *)(unsigned long)efi_table_attr(efi_loaded_image,
-							     device_handle,
-							     image);
+	efi_handle_t handle = image->device_handle;
 
-	status = efi_call_early(handle_protocol, handle,
-				&fs_proto, (void **)&io);
+	status = efi_bs_call(handle_protocol, handle, &fs_proto, (void **)&io);
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table_arg, "Failed to handle fs_proto\n");
+		efi_printk("Failed to handle fs_proto\n");
 		return status;
 	}
 
-	status = efi_call_proto(efi_file_io_interface, open_volume, io, &fh);
+	status = io->open_volume(io, &fh);
 	if (status != EFI_SUCCESS)
-		efi_printk(sys_table_arg, "Failed to open volume\n");
+		efi_printk("Failed to open volume\n");
 	else
 		*__fh = fh;
 
@@ -452,11 +454,11 @@ efi_status_t efi_parse_options(char const *cmdline)
 
 	str = strstr(cmdline, "nokaslr");
 	if (str == cmdline || (str && str > cmdline && *(str - 1) == ' '))
-		__nokaslr = 1;
+		efi_nokaslr = true;
 
 	str = strstr(cmdline, "quiet");
 	if (str == cmdline || (str && str > cmdline && *(str - 1) == ' '))
-		__quiet = 1;
+		efi_quiet = true;
 
 	/*
 	 * If no EFI parameters were specified on the cmdline we've got
@@ -476,12 +478,28 @@ efi_status_t efi_parse_options(char const *cmdline)
 	while (*str && *str != ' ') {
 		if (!strncmp(str, "nochunk", 7)) {
 			str += strlen("nochunk");
-			__chunk_size = -1UL;
+			efi_chunk_size = -1UL;
 		}
 
 		if (!strncmp(str, "novamap", 7)) {
 			str += strlen("novamap");
-			__novamap = 1;
+			efi_novamap = true;
+		}
+
+		if (IS_ENABLED(CONFIG_EFI_SOFT_RESERVE) &&
+		    !strncmp(str, "nosoftreserve", 7)) {
+			str += strlen("nosoftreserve");
+			efi_nosoftreserve = true;
+		}
+
+		if (!strncmp(str, "disable_early_pci_dma", 21)) {
+			str += strlen("disable_early_pci_dma");
+			efi_disable_pci_dma = true;
+		}
+
+		if (!strncmp(str, "no_disable_early_pci_dma", 24)) {
+			str += strlen("no_disable_early_pci_dma");
+			efi_disable_pci_dma = false;
 		}
 
 		/* Group words together, delimited by "," */
@@ -501,8 +519,7 @@ efi_status_t efi_parse_options(char const *cmdline)
  * We only support loading a file from the same filesystem as
  * the kernel image.
  */
-efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
-				  efi_loaded_image_t *image,
+efi_status_t handle_cmdline_files(efi_loaded_image_t *image,
 				  char *cmd_line, char *option_string,
 				  unsigned long max_addr,
 				  unsigned long *load_addr,
@@ -551,10 +568,10 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 	if (!nr_files)
 		return EFI_SUCCESS;
 
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-				nr_files * sizeof(*files), (void **)&files);
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA,
+			     nr_files * sizeof(*files), (void **)&files);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table_arg, "Failed to alloc mem for file handle list\n");
+		pr_efi_err("Failed to alloc mem for file handle list\n");
 		goto fail;
 	}
 
@@ -593,13 +610,13 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 
 		/* Only open the volume once. */
 		if (!i) {
-			status = efi_open_volume(sys_table_arg, image, &fh);
+			status = efi_open_volume(image, &fh);
 			if (status != EFI_SUCCESS)
 				goto free_files;
 		}
 
-		status = efi_file_size(sys_table_arg, fh, filename_16,
-				       (void **)&file->handle, &file->size);
+		status = efi_file_size(fh, filename_16, (void **)&file->handle,
+				       &file->size);
 		if (status != EFI_SUCCESS)
 			goto close_handles;
 
@@ -614,16 +631,16 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 		 * so allocate enough memory for all the files.  This is used
 		 * for loading multiple files.
 		 */
-		status = efi_high_alloc(sys_table_arg, file_size_total, 0x1000,
-				    &file_addr, max_addr);
+		status = efi_high_alloc(file_size_total, 0x1000, &file_addr,
+					max_addr);
 		if (status != EFI_SUCCESS) {
-			pr_efi_err(sys_table_arg, "Failed to alloc highmem for files\n");
+			pr_efi_err("Failed to alloc highmem for files\n");
 			goto close_handles;
 		}
 
 		/* We've run out of free low memory. */
 		if (file_addr > max_addr) {
-			pr_efi_err(sys_table_arg, "We've run out of free low memory\n");
+			pr_efi_err("We've run out of free low memory\n");
 			status = EFI_INVALID_PARAMETER;
 			goto free_file_total;
 		}
@@ -636,8 +653,8 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 			while (size) {
 				unsigned long chunksize;
 
-				if (IS_ENABLED(CONFIG_X86) && size > __chunk_size)
-					chunksize = __chunk_size;
+				if (IS_ENABLED(CONFIG_X86) && size > efi_chunk_size)
+					chunksize = efi_chunk_size;
 				else
 					chunksize = size;
 
@@ -645,7 +662,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 						       &chunksize,
 						       (void *)addr);
 				if (status != EFI_SUCCESS) {
-					pr_efi_err(sys_table_arg, "Failed to read file\n");
+					pr_efi_err("Failed to read file\n");
 					goto free_file_total;
 				}
 				addr += chunksize;
@@ -657,7 +674,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 
 	}
 
-	efi_call_early(free_pool, files);
+	efi_bs_call(free_pool, files);
 
 	*load_addr = file_addr;
 	*load_size = file_size_total;
@@ -665,13 +682,13 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 	return status;
 
 free_file_total:
-	efi_free(sys_table_arg, file_size_total, file_addr);
+	efi_free(file_size_total, file_addr);
 
 close_handles:
 	for (k = j; k < i; k++)
 		efi_file_close(files[k].handle);
 free_files:
-	efi_call_early(free_pool, files);
+	efi_bs_call(free_pool, files);
 fail:
 	*load_addr = 0;
 	*load_size = 0;
@@ -688,8 +705,7 @@ fail:
  * address is not available the lowest available address will
  * be used.
  */
-efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
-				 unsigned long *image_addr,
+efi_status_t efi_relocate_kernel(unsigned long *image_addr,
 				 unsigned long image_size,
 				 unsigned long alloc_size,
 				 unsigned long preferred_addr,
@@ -718,20 +734,19 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
 	 * as possible while respecting the required alignment.
 	 */
 	nr_pages = round_up(alloc_size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE;
-	status = efi_call_early(allocate_pages,
-				EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
-				nr_pages, &efi_addr);
+	status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
+			     EFI_LOADER_DATA, nr_pages, &efi_addr);
 	new_addr = efi_addr;
 	/*
 	 * If preferred address allocation failed allocate as low as
 	 * possible.
 	 */
 	if (status != EFI_SUCCESS) {
-		status = efi_low_alloc_above(sys_table_arg, alloc_size,
-					     alignment, &new_addr, min_addr);
+		status = efi_low_alloc_above(alloc_size, alignment, &new_addr,
+					     min_addr);
 	}
 	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table_arg, "Failed to allocate usable memory for kernel.\n");
+		pr_efi_err("Failed to allocate usable memory for kernel.\n");
 		return status;
 	}
 
@@ -805,8 +820,7 @@ static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n)
  * Size of memory allocated return in *cmd_line_len.
  * Returns NULL on error.
  */
-char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
-			  efi_loaded_image_t *image,
+char *efi_convert_cmdline(efi_loaded_image_t *image,
 			  int *cmd_line_len)
 {
 	const u16 *s2;
@@ -835,8 +849,8 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
 
 	options_bytes++;	/* NUL termination */
 
-	status = efi_high_alloc(sys_table_arg, options_bytes, 0,
-				&cmdline_addr, MAX_CMDLINE_ADDRESS);
+	status = efi_high_alloc(options_bytes, 0, &cmdline_addr,
+				MAX_CMDLINE_ADDRESS);
 	if (status != EFI_SUCCESS)
 		return NULL;
 
@@ -858,24 +872,26 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
  * specific structure may be passed to the function via priv.  The client
  * function may be called multiple times.
  */
-efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg,
-				    void *handle,
+efi_status_t efi_exit_boot_services(void *handle,
 				    struct efi_boot_memmap *map,
 				    void *priv,
 				    efi_exit_boot_map_processing priv_func)
 {
 	efi_status_t status;
 
-	status = efi_get_memory_map(sys_table_arg, map);
+	status = efi_get_memory_map(map);
 
 	if (status != EFI_SUCCESS)
 		goto fail;
 
-	status = priv_func(sys_table_arg, map, priv);
+	status = priv_func(map, priv);
 	if (status != EFI_SUCCESS)
 		goto free_map;
 
-	status = efi_call_early(exit_boot_services, handle, *map->key_ptr);
+	if (efi_disable_pci_dma)
+		efi_pci_disable_bridge_busmaster();
+
+	status = efi_bs_call(exit_boot_services, handle, *map->key_ptr);
 
 	if (status == EFI_INVALID_PARAMETER) {
 		/*
@@ -892,23 +908,23 @@ efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg,
 		 * to get_memory_map() is expected to succeed here.
 		 */
 		*map->map_size = *map->buff_size;
-		status = efi_call_early(get_memory_map,
-					map->map_size,
-					*map->map,
-					map->key_ptr,
-					map->desc_size,
-					map->desc_ver);
+		status = efi_bs_call(get_memory_map,
+				     map->map_size,
+				     *map->map,
+				     map->key_ptr,
+				     map->desc_size,
+				     map->desc_ver);
 
 		/* exit_boot_services() was called, thus cannot free */
 		if (status != EFI_SUCCESS)
 			goto fail;
 
-		status = priv_func(sys_table_arg, map, priv);
+		status = priv_func(map, priv);
 		/* exit_boot_services() was called, thus cannot free */
 		if (status != EFI_SUCCESS)
 			goto fail;
 
-		status = efi_call_early(exit_boot_services, handle, *map->key_ptr);
+		status = efi_bs_call(exit_boot_services, handle, *map->key_ptr);
 	}
 
 	/* exit_boot_services() was called, thus cannot free */
@@ -918,38 +934,31 @@ efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg,
 	return EFI_SUCCESS;
 
 free_map:
-	efi_call_early(free_pool, *map->map);
+	efi_bs_call(free_pool, *map->map);
 fail:
 	return status;
 }
 
-#define GET_EFI_CONFIG_TABLE(bits)					\
-static void *get_efi_config_table##bits(efi_system_table_t *_sys_table,	\
-					efi_guid_t guid)		\
-{									\
-	efi_system_table_##bits##_t *sys_table;				\
-	efi_config_table_##bits##_t *tables;				\
-	int i;								\
-									\
-	sys_table = (typeof(sys_table))_sys_table;			\
-	tables = (typeof(tables))(unsigned long)sys_table->tables;	\
-									\
-	for (i = 0; i < sys_table->nr_tables; i++) {			\
-		if (efi_guidcmp(tables[i].guid, guid) != 0)		\
-			continue;					\
-									\
-		return (void *)(unsigned long)tables[i].table;		\
-	}								\
-									\
-	return NULL;							\
+void *get_efi_config_table(efi_guid_t guid)
+{
+	unsigned long tables = efi_table_attr(efi_system_table(), tables);
+	int nr_tables = efi_table_attr(efi_system_table(), nr_tables);
+	int i;
+
+	for (i = 0; i < nr_tables; i++) {
+		efi_config_table_t *t = (void *)tables;
+
+		if (efi_guidcmp(t->guid, guid) == 0)
+			return efi_table_attr(t, table);
+
+		tables += efi_is_native() ? sizeof(efi_config_table_t)
+					  : sizeof(efi_config_table_32_t);
+	}
+	return NULL;
 }
-GET_EFI_CONFIG_TABLE(32)
-GET_EFI_CONFIG_TABLE(64)
 
-void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid)
+void efi_char16_printk(efi_char16_t *str)
 {
-	if (efi_is_64bit())
-		return get_efi_config_table64(sys_table, guid);
-	else
-		return get_efi_config_table32(sys_table, guid);
+	efi_call_proto(efi_table_attr(efi_system_table(), con_out),
+		       output_string, str);
 }
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 7f1556fd867d..c244b165005e 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -25,22 +25,30 @@
 #define EFI_ALLOC_ALIGN		EFI_PAGE_SIZE
 #endif
 
-extern int __pure nokaslr(void);
-extern int __pure is_quiet(void);
-extern int __pure novamap(void);
+#ifdef CONFIG_ARM
+#define __efistub_global	__section(.data)
+#else
+#define __efistub_global
+#endif
+
+extern bool __pure nokaslr(void);
+extern bool __pure is_quiet(void);
+extern bool __pure novamap(void);
+
+extern __pure efi_system_table_t  *efi_system_table(void);
 
-#define pr_efi(sys_table, msg)		do {				\
-	if (!is_quiet()) efi_printk(sys_table, "EFI stub: "msg);	\
+#define pr_efi(msg)		do {			\
+	if (!is_quiet()) efi_printk("EFI stub: "msg);	\
 } while (0)
 
-#define pr_efi_err(sys_table, msg) efi_printk(sys_table, "EFI stub: ERROR: "msg)
+#define pr_efi_err(msg) efi_printk("EFI stub: ERROR: "msg)
 
-void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
+void efi_char16_printk(efi_char16_t *);
+void efi_char16_printk(efi_char16_t *);
 
-unsigned long get_dram_base(efi_system_table_t *sys_table_arg);
+unsigned long get_dram_base(void);
 
-efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
-					    void *handle,
+efi_status_t allocate_new_fdt_and_exit_boot(void *handle,
 					    unsigned long *new_fdt_addr,
 					    unsigned long max_addr,
 					    u64 initrd_addr, u64 initrd_size,
@@ -48,24 +56,20 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 					    unsigned long fdt_addr,
 					    unsigned long fdt_size);
 
-void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size);
+void *get_fdt(unsigned long *fdt_size);
 
 void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
 		     unsigned long desc_size, efi_memory_desc_t *runtime_map,
 		     int *count);
 
-efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table,
-				  unsigned long size, u8 *out);
+efi_status_t efi_get_random_bytes(unsigned long size, u8 *out);
 
-efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
-			      unsigned long size, unsigned long align,
+efi_status_t efi_random_alloc(unsigned long size, unsigned long align,
 			      unsigned long *addr, unsigned long random_seed);
 
-efi_status_t check_platform_features(efi_system_table_t *sys_table_arg);
+efi_status_t check_platform_features(void);
 
-efi_status_t efi_random_get_seed(efi_system_table_t *sys_table_arg);
-
-void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid);
+void *get_efi_config_table(efi_guid_t guid);
 
 /* Helper macros for the usual case of using simple C variables: */
 #ifndef fdt_setprop_inplace_var
@@ -78,4 +82,12 @@ void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid);
 	fdt_setprop((fdt), (node_offset), (name), &(var), sizeof(var))
 #endif
 
+#define get_efi_var(name, vendor, ...)				\
+	efi_rt_call(get_variable, (efi_char16_t *)(name),	\
+		    (efi_guid_t *)(vendor), __VA_ARGS__)
+
+#define set_efi_var(name, vendor, ...)				\
+	efi_rt_call(set_variable, (efi_char16_t *)(name),	\
+		    (efi_guid_t *)(vendor), __VA_ARGS__)
+
 #endif
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index 0bf0190917e0..0a91e5232127 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -16,7 +16,7 @@
 #define EFI_DT_ADDR_CELLS_DEFAULT 2
 #define EFI_DT_SIZE_CELLS_DEFAULT 2
 
-static void fdt_update_cell_size(efi_system_table_t *sys_table, void *fdt)
+static void fdt_update_cell_size(void *fdt)
 {
 	int offset;
 
@@ -27,8 +27,7 @@ static void fdt_update_cell_size(efi_system_table_t *sys_table, void *fdt)
 	fdt_setprop_u32(fdt, offset, "#size-cells",    EFI_DT_SIZE_CELLS_DEFAULT);
 }
 
-static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
-			       unsigned long orig_fdt_size,
+static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size,
 			       void *fdt, int new_fdt_size, char *cmdline_ptr,
 			       u64 initrd_addr, u64 initrd_size)
 {
@@ -40,7 +39,7 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 	/* Do some checks on provided FDT, if it exists: */
 	if (orig_fdt) {
 		if (fdt_check_header(orig_fdt)) {
-			pr_efi_err(sys_table, "Device Tree header not valid!\n");
+			pr_efi_err("Device Tree header not valid!\n");
 			return EFI_LOAD_ERROR;
 		}
 		/*
@@ -48,7 +47,7 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 		 * configuration table:
 		 */
 		if (orig_fdt_size && fdt_totalsize(orig_fdt) > orig_fdt_size) {
-			pr_efi_err(sys_table, "Truncated device tree! foo!\n");
+			pr_efi_err("Truncated device tree! foo!\n");
 			return EFI_LOAD_ERROR;
 		}
 	}
@@ -62,7 +61,7 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 			 * Any failure from the following function is
 			 * non-critical:
 			 */
-			fdt_update_cell_size(sys_table, fdt);
+			fdt_update_cell_size(fdt);
 		}
 	}
 
@@ -111,7 +110,7 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 
 	/* Add FDT entries for EFI runtime services in chosen node. */
 	node = fdt_subnode_offset(fdt, 0, "chosen");
-	fdt_val64 = cpu_to_fdt64((u64)(unsigned long)sys_table);
+	fdt_val64 = cpu_to_fdt64((u64)(unsigned long)efi_system_table());
 
 	status = fdt_setprop_var(fdt, node, "linux,uefi-system-table", fdt_val64);
 	if (status)
@@ -140,7 +139,7 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
 		efi_status_t efi_status;
 
-		efi_status = efi_get_random_bytes(sys_table, sizeof(fdt_val64),
+		efi_status = efi_get_random_bytes(sizeof(fdt_val64),
 						  (u8 *)&fdt_val64);
 		if (efi_status == EFI_SUCCESS) {
 			status = fdt_setprop_var(fdt, node, "kaslr-seed", fdt_val64);
@@ -210,8 +209,7 @@ struct exit_boot_struct {
 	void			*new_fdt_addr;
 };
 
-static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
-				   struct efi_boot_memmap *map,
+static efi_status_t exit_boot_func(struct efi_boot_memmap *map,
 				   void *priv)
 {
 	struct exit_boot_struct *p = priv;
@@ -244,8 +242,7 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
  * with the final memory map in it.
  */
 
-efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
-					    void *handle,
+efi_status_t allocate_new_fdt_and_exit_boot(void *handle,
 					    unsigned long *new_fdt_addr,
 					    unsigned long max_addr,
 					    u64 initrd_addr, u64 initrd_size,
@@ -275,19 +272,19 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 	 * subsequent allocations adding entries, since they could not affect
 	 * the number of EFI_MEMORY_RUNTIME regions.
 	 */
-	status = efi_get_memory_map(sys_table, &map);
+	status = efi_get_memory_map(&map);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table, "Unable to retrieve UEFI memory map.\n");
+		pr_efi_err("Unable to retrieve UEFI memory map.\n");
 		return status;
 	}
 
-	pr_efi(sys_table, "Exiting boot services and installing virtual address map...\n");
+	pr_efi("Exiting boot services and installing virtual address map...\n");
 
 	map.map = &memory_map;
-	status = efi_high_alloc(sys_table, MAX_FDT_SIZE, EFI_FDT_ALIGN,
+	status = efi_high_alloc(MAX_FDT_SIZE, EFI_FDT_ALIGN,
 				new_fdt_addr, max_addr);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table, "Unable to allocate memory for new device tree.\n");
+		pr_efi_err("Unable to allocate memory for new device tree.\n");
 		goto fail;
 	}
 
@@ -295,16 +292,16 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 	 * Now that we have done our final memory allocation (and free)
 	 * we can get the memory map key needed for exit_boot_services().
 	 */
-	status = efi_get_memory_map(sys_table, &map);
+	status = efi_get_memory_map(&map);
 	if (status != EFI_SUCCESS)
 		goto fail_free_new_fdt;
 
-	status = update_fdt(sys_table, (void *)fdt_addr, fdt_size,
+	status = update_fdt((void *)fdt_addr, fdt_size,
 			    (void *)*new_fdt_addr, MAX_FDT_SIZE, cmdline_ptr,
 			    initrd_addr, initrd_size);
 
 	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table, "Unable to construct new device tree.\n");
+		pr_efi_err("Unable to construct new device tree.\n");
 		goto fail_free_new_fdt;
 	}
 
@@ -313,7 +310,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 	priv.runtime_entry_count	= &runtime_entry_count;
 	priv.new_fdt_addr		= (void *)*new_fdt_addr;
 
-	status = efi_exit_boot_services(sys_table, handle, &map, &priv, exit_boot_func);
+	status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func);
 
 	if (status == EFI_SUCCESS) {
 		efi_set_virtual_address_map_t *svam;
@@ -322,7 +319,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 			return EFI_SUCCESS;
 
 		/* Install the new virtual address map */
-		svam = sys_table->runtime->set_virtual_address_map;
+		svam = efi_system_table()->runtime->set_virtual_address_map;
 		status = svam(runtime_entry_count * desc_size, desc_size,
 			      desc_ver, runtime_map);
 
@@ -350,28 +347,28 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 		return EFI_SUCCESS;
 	}
 
-	pr_efi_err(sys_table, "Exit boot services failed.\n");
+	pr_efi_err("Exit boot services failed.\n");
 
 fail_free_new_fdt:
-	efi_free(sys_table, MAX_FDT_SIZE, *new_fdt_addr);
+	efi_free(MAX_FDT_SIZE, *new_fdt_addr);
 
 fail:
-	sys_table->boottime->free_pool(runtime_map);
+	efi_system_table()->boottime->free_pool(runtime_map);
 
 	return EFI_LOAD_ERROR;
 }
 
-void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size)
+void *get_fdt(unsigned long *fdt_size)
 {
 	void *fdt;
 
-	fdt = get_efi_config_table(sys_table, DEVICE_TREE_GUID);
+	fdt = get_efi_config_table(DEVICE_TREE_GUID);
 
 	if (!fdt)
 		return NULL;
 
 	if (fdt_check_header(fdt) != 0) {
-		pr_efi_err(sys_table, "Invalid header detected on UEFI supplied FDT, ignoring ...\n");
+		pr_efi_err("Invalid header detected on UEFI supplied FDT, ignoring ...\n");
 		return NULL;
 	}
 	*fdt_size = fdt_totalsize(fdt);
diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c
index 0101ca4c13b1..55e6b3f286fe 100644
--- a/drivers/firmware/efi/libstub/gop.c
+++ b/drivers/firmware/efi/libstub/gop.c
@@ -10,6 +10,8 @@
 #include <asm/efi.h>
 #include <asm/setup.h>
 
+#include "efistub.h"
+
 static void find_bits(unsigned long mask, u8 *pos, u8 *size)
 {
 	u8 first, len;
@@ -35,7 +37,7 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size)
 
 static void
 setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
-		 struct efi_pixel_bitmask pixel_info, int pixel_format)
+		 efi_pixel_bitmask_t pixel_info, int pixel_format)
 {
 	if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
 		si->lfb_depth = 32;
@@ -83,189 +85,44 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
 	}
 }
 
-static efi_status_t
-__gop_query32(efi_system_table_t *sys_table_arg,
-	      struct efi_graphics_output_protocol_32 *gop32,
-	      struct efi_graphics_output_mode_info **info,
-	      unsigned long *size, u64 *fb_base)
-{
-	struct efi_graphics_output_protocol_mode_32 *mode;
-	efi_graphics_output_protocol_query_mode query_mode;
-	efi_status_t status;
-	unsigned long m;
-
-	m = gop32->mode;
-	mode = (struct efi_graphics_output_protocol_mode_32 *)m;
-	query_mode = (void *)(unsigned long)gop32->query_mode;
-
-	status = __efi_call_early(query_mode, (void *)gop32, mode->mode, size,
-				  info);
-	if (status != EFI_SUCCESS)
-		return status;
-
-	*fb_base = mode->frame_buffer_base;
-	return status;
-}
-
-static efi_status_t
-setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
-            efi_guid_t *proto, unsigned long size, void **gop_handle)
+static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
+			      unsigned long size, void **handles)
 {
-	struct efi_graphics_output_protocol_32 *gop32, *first_gop;
-	unsigned long nr_gops;
+	efi_graphics_output_protocol_t *gop, *first_gop;
 	u16 width, height;
 	u32 pixels_per_scan_line;
 	u32 ext_lfb_base;
-	u64 fb_base;
-	struct efi_pixel_bitmask pixel_info;
+	efi_physical_addr_t fb_base;
+	efi_pixel_bitmask_t pixel_info;
 	int pixel_format;
-	efi_status_t status = EFI_NOT_FOUND;
-	u32 *handles = (u32 *)(unsigned long)gop_handle;
-	int i;
-
-	first_gop = NULL;
-	gop32 = NULL;
-
-	nr_gops = size / sizeof(u32);
-	for (i = 0; i < nr_gops; i++) {
-		struct efi_graphics_output_mode_info *info = NULL;
-		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
-		bool conout_found = false;
-		void *dummy = NULL;
-		efi_handle_t h = (efi_handle_t)(unsigned long)handles[i];
-		u64 current_fb_base;
-
-		status = efi_call_early(handle_protocol, h,
-					proto, (void **)&gop32);
-		if (status != EFI_SUCCESS)
-			continue;
-
-		status = efi_call_early(handle_protocol, h,
-					&conout_proto, &dummy);
-		if (status == EFI_SUCCESS)
-			conout_found = true;
-
-		status = __gop_query32(sys_table_arg, gop32, &info, &size,
-				       &current_fb_base);
-		if (status == EFI_SUCCESS && (!first_gop || conout_found) &&
-		    info->pixel_format != PIXEL_BLT_ONLY) {
-			/*
-			 * Systems that use the UEFI Console Splitter may
-			 * provide multiple GOP devices, not all of which are
-			 * backed by real hardware. The workaround is to search
-			 * for a GOP implementing the ConOut protocol, and if
-			 * one isn't found, to just fall back to the first GOP.
-			 */
-			width = info->horizontal_resolution;
-			height = info->vertical_resolution;
-			pixel_format = info->pixel_format;
-			pixel_info = info->pixel_information;
-			pixels_per_scan_line = info->pixels_per_scan_line;
-			fb_base = current_fb_base;
-
-			/*
-			 * Once we've found a GOP supporting ConOut,
-			 * don't bother looking any further.
-			 */
-			first_gop = gop32;
-			if (conout_found)
-				break;
-		}
-	}
-
-	/* Did we find any GOPs? */
-	if (!first_gop)
-		goto out;
-
-	/* EFI framebuffer */
-	si->orig_video_isVGA = VIDEO_TYPE_EFI;
-
-	si->lfb_width = width;
-	si->lfb_height = height;
-	si->lfb_base = fb_base;
-
-	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
-	if (ext_lfb_base) {
-		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
-		si->ext_lfb_base = ext_lfb_base;
-	}
-
-	si->pages = 1;
-
-	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
-
-	si->lfb_size = si->lfb_linelength * si->lfb_height;
-
-	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
-	return status;
-}
-
-static efi_status_t
-__gop_query64(efi_system_table_t *sys_table_arg,
-	      struct efi_graphics_output_protocol_64 *gop64,
-	      struct efi_graphics_output_mode_info **info,
-	      unsigned long *size, u64 *fb_base)
-{
-	struct efi_graphics_output_protocol_mode_64 *mode;
-	efi_graphics_output_protocol_query_mode query_mode;
 	efi_status_t status;
-	unsigned long m;
-
-	m = gop64->mode;
-	mode = (struct efi_graphics_output_protocol_mode_64 *)m;
-	query_mode = (void *)(unsigned long)gop64->query_mode;
-
-	status = __efi_call_early(query_mode, (void *)gop64, mode->mode, size,
-				  info);
-	if (status != EFI_SUCCESS)
-		return status;
-
-	*fb_base = mode->frame_buffer_base;
-	return status;
-}
-
-static efi_status_t
-setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
-	    efi_guid_t *proto, unsigned long size, void **gop_handle)
-{
-	struct efi_graphics_output_protocol_64 *gop64, *first_gop;
-	unsigned long nr_gops;
-	u16 width, height;
-	u32 pixels_per_scan_line;
-	u32 ext_lfb_base;
-	u64 fb_base;
-	struct efi_pixel_bitmask pixel_info;
-	int pixel_format;
-	efi_status_t status = EFI_NOT_FOUND;
-	u64 *handles = (u64 *)(unsigned long)gop_handle;
+	efi_handle_t h;
 	int i;
 
 	first_gop = NULL;
-	gop64 = NULL;
+	gop = NULL;
 
-	nr_gops = size / sizeof(u64);
-	for (i = 0; i < nr_gops; i++) {
-		struct efi_graphics_output_mode_info *info = NULL;
+	for_each_efi_handle(h, handles, size, i) {
+		efi_graphics_output_protocol_mode_t *mode;
+		efi_graphics_output_mode_info_t *info = NULL;
 		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
 		bool conout_found = false;
 		void *dummy = NULL;
-		efi_handle_t h = (efi_handle_t)(unsigned long)handles[i];
-		u64 current_fb_base;
+		efi_physical_addr_t current_fb_base;
 
-		status = efi_call_early(handle_protocol, h,
-					proto, (void **)&gop64);
+		status = efi_bs_call(handle_protocol, h, proto, (void **)&gop);
 		if (status != EFI_SUCCESS)
 			continue;
 
-		status = efi_call_early(handle_protocol, h,
-					&conout_proto, &dummy);
+		status = efi_bs_call(handle_protocol, h, &conout_proto, &dummy);
 		if (status == EFI_SUCCESS)
 			conout_found = true;
 
-		status = __gop_query64(sys_table_arg, gop64, &info, &size,
-				       &current_fb_base);
-		if (status == EFI_SUCCESS && (!first_gop || conout_found) &&
+		mode = efi_table_attr(gop, mode);
+		info = efi_table_attr(mode, info);
+		current_fb_base = efi_table_attr(mode, frame_buffer_base);
+
+		if ((!first_gop || conout_found) &&
 		    info->pixel_format != PIXEL_BLT_ONLY) {
 			/*
 			 * Systems that use the UEFI Console Splitter may
@@ -285,7 +142,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
 			 * Once we've found a GOP supporting ConOut,
 			 * don't bother looking any further.
 			 */
-			first_gop = gop64;
+			first_gop = gop;
 			if (conout_found)
 				break;
 		}
@@ -293,7 +150,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
 
 	/* Did we find any GOPs? */
 	if (!first_gop)
-		goto out;
+		return EFI_NOT_FOUND;
 
 	/* EFI framebuffer */
 	si->orig_video_isVGA = VIDEO_TYPE_EFI;
@@ -315,40 +172,32 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
 	si->lfb_size = si->lfb_linelength * si->lfb_height;
 
 	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
-	return status;
+
+	return EFI_SUCCESS;
 }
 
 /*
  * See if we have Graphics Output Protocol
  */
-efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
-			   struct screen_info *si, efi_guid_t *proto,
+efi_status_t efi_setup_gop(struct screen_info *si, efi_guid_t *proto,
 			   unsigned long size)
 {
 	efi_status_t status;
 	void **gop_handle = NULL;
 
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-				size, (void **)&gop_handle);
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
+			     (void **)&gop_handle);
 	if (status != EFI_SUCCESS)
 		return status;
 
-	status = efi_call_early(locate_handle,
-				EFI_LOCATE_BY_PROTOCOL,
-				proto, NULL, &size, gop_handle);
+	status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, proto, NULL,
+			     &size, gop_handle);
 	if (status != EFI_SUCCESS)
 		goto free_handle;
 
-	if (efi_is_64bit()) {
-		status = setup_gop64(sys_table_arg, si, proto, size,
-				     gop_handle);
-	} else {
-		status = setup_gop32(sys_table_arg, si, proto, size,
-				     gop_handle);
-	}
+	status = setup_gop(si, proto, size, gop_handle);
 
 free_handle:
-	efi_call_early(free_pool, gop_handle);
+	efi_bs_call(free_pool, gop_handle);
 	return status;
 }
diff --git a/drivers/firmware/efi/libstub/pci.c b/drivers/firmware/efi/libstub/pci.c
new file mode 100644
index 000000000000..b025e59b94df
--- /dev/null
+++ b/drivers/firmware/efi/libstub/pci.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI-related functions used by the EFI stub on multiple
+ * architectures.
+ *
+ * Copyright 2019 Google, LLC
+ */
+
+#include <linux/efi.h>
+#include <linux/pci.h>
+
+#include <asm/efi.h>
+
+#include "efistub.h"
+
+void efi_pci_disable_bridge_busmaster(void)
+{
+	efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
+	unsigned long pci_handle_size = 0;
+	efi_handle_t *pci_handle = NULL;
+	efi_handle_t handle;
+	efi_status_t status;
+	u16 class, command;
+	int i;
+
+	status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, &pci_proto,
+			     NULL, &pci_handle_size, NULL);
+
+	if (status != EFI_BUFFER_TOO_SMALL) {
+		if (status != EFI_SUCCESS && status != EFI_NOT_FOUND)
+			pr_efi_err("Failed to locate PCI I/O handles'\n");
+		return;
+	}
+
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, pci_handle_size,
+			     (void **)&pci_handle);
+	if (status != EFI_SUCCESS) {
+		pr_efi_err("Failed to allocate memory for 'pci_handle'\n");
+		return;
+	}
+
+	status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, &pci_proto,
+			     NULL, &pci_handle_size, pci_handle);
+	if (status != EFI_SUCCESS) {
+		pr_efi_err("Failed to locate PCI I/O handles'\n");
+		goto free_handle;
+	}
+
+	for_each_efi_handle(handle, pci_handle, pci_handle_size, i) {
+		efi_pci_io_protocol_t *pci;
+		unsigned long segment_nr, bus_nr, device_nr, func_nr;
+
+		status = efi_bs_call(handle_protocol, handle, &pci_proto,
+				     (void **)&pci);
+		if (status != EFI_SUCCESS)
+			continue;
+
+		/*
+		 * Disregard devices living on bus 0 - these are not behind a
+		 * bridge so no point in disconnecting them from their drivers.
+		 */
+		status = efi_call_proto(pci, get_location, &segment_nr, &bus_nr,
+					&device_nr, &func_nr);
+		if (status != EFI_SUCCESS || bus_nr == 0)
+			continue;
+
+		/*
+		 * Don't disconnect VGA controllers so we don't risk losing
+		 * access to the framebuffer. Drivers for true PCIe graphics
+		 * controllers that are behind a PCIe root port do not use
+		 * DMA to implement the GOP framebuffer anyway [although they
+		 * may use it in their implentation of Gop->Blt()], and so
+		 * disabling DMA in the PCI bridge should not interfere with
+		 * normal operation of the device.
+		 */
+		status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,
+					PCI_CLASS_DEVICE, 1, &class);
+		if (status != EFI_SUCCESS || class == PCI_CLASS_DISPLAY_VGA)
+			continue;
+
+		/* Disconnect this handle from all its drivers */
+		efi_bs_call(disconnect_controller, handle, NULL, NULL);
+	}
+
+	for_each_efi_handle(handle, pci_handle, pci_handle_size, i) {
+		efi_pci_io_protocol_t *pci;
+
+		status = efi_bs_call(handle_protocol, handle, &pci_proto,
+				     (void **)&pci);
+		if (status != EFI_SUCCESS || !pci)
+			continue;
+
+		status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,
+					PCI_CLASS_DEVICE, 1, &class);
+
+		if (status != EFI_SUCCESS || class != PCI_CLASS_BRIDGE_PCI)
+			continue;
+
+		/* Disable busmastering */
+		status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,
+					PCI_COMMAND, 1, &command);
+		if (status != EFI_SUCCESS || !(command & PCI_COMMAND_MASTER))
+			continue;
+
+		command &= ~PCI_COMMAND_MASTER;
+		status = efi_call_proto(pci, pci.write, EfiPciIoWidthUint16,
+					PCI_COMMAND, 1, &command);
+		if (status != EFI_SUCCESS)
+			pr_efi_err("Failed to disable PCI busmastering\n");
+	}
+
+free_handle:
+	efi_bs_call(free_pool, pci_handle);
+}
diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
index b4b1d1dcb5fd..316ce9ff0193 100644
--- a/drivers/firmware/efi/libstub/random.c
+++ b/drivers/firmware/efi/libstub/random.c
@@ -9,26 +9,34 @@
 
 #include "efistub.h"
 
-struct efi_rng_protocol {
-	efi_status_t (*get_info)(struct efi_rng_protocol *,
-				 unsigned long *, efi_guid_t *);
-	efi_status_t (*get_rng)(struct efi_rng_protocol *,
-				efi_guid_t *, unsigned long, u8 *out);
+typedef union efi_rng_protocol efi_rng_protocol_t;
+
+union efi_rng_protocol {
+	struct {
+		efi_status_t (__efiapi *get_info)(efi_rng_protocol_t *,
+						  unsigned long *,
+						  efi_guid_t *);
+		efi_status_t (__efiapi *get_rng)(efi_rng_protocol_t *,
+						 efi_guid_t *, unsigned long,
+						 u8 *out);
+	};
+	struct {
+		u32 get_info;
+		u32 get_rng;
+	} mixed_mode;
 };
 
-efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg,
-				  unsigned long size, u8 *out)
+efi_status_t efi_get_random_bytes(unsigned long size, u8 *out)
 {
 	efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID;
 	efi_status_t status;
-	struct efi_rng_protocol *rng;
+	efi_rng_protocol_t *rng = NULL;
 
-	status = efi_call_early(locate_protocol, &rng_proto, NULL,
-				(void **)&rng);
+	status = efi_bs_call(locate_protocol, &rng_proto, NULL, (void **)&rng);
 	if (status != EFI_SUCCESS)
 		return status;
 
-	return rng->get_rng(rng, NULL, size, out);
+	return efi_call_proto(rng, get_rng, NULL, size, out);
 }
 
 /*
@@ -46,6 +54,10 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
 	if (md->type != EFI_CONVENTIONAL_MEMORY)
 		return 0;
 
+	if (efi_soft_reserve_enabled() &&
+	    (md->attribute & EFI_MEMORY_SP))
+		return 0;
+
 	region_end = min((u64)ULONG_MAX, md->phys_addr + md->num_pages*EFI_PAGE_SIZE - 1);
 
 	first_slot = round_up(md->phys_addr, align);
@@ -65,8 +77,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
  */
 #define MD_NUM_SLOTS(md)	((md)->virt_addr)
 
-efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
-			      unsigned long size,
+efi_status_t efi_random_alloc(unsigned long size,
 			      unsigned long align,
 			      unsigned long *addr,
 			      unsigned long random_seed)
@@ -85,7 +96,7 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
 	map.key_ptr =	NULL;
 	map.buff_size =	&buff_size;
 
-	status = efi_get_memory_map(sys_table_arg, &map);
+	status = efi_get_memory_map(&map);
 	if (status != EFI_SUCCESS)
 		return status;
 
@@ -129,60 +140,59 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
 		target = round_up(md->phys_addr, align) + target_slot * align;
 		pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
 
-		status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
-					EFI_LOADER_DATA, pages, &target);
+		status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
+				     EFI_LOADER_DATA, pages, &target);
 		if (status == EFI_SUCCESS)
 			*addr = target;
 		break;
 	}
 
-	efi_call_early(free_pool, memory_map);
+	efi_bs_call(free_pool, memory_map);
 
 	return status;
 }
 
-efi_status_t efi_random_get_seed(efi_system_table_t *sys_table_arg)
+efi_status_t efi_random_get_seed(void)
 {
 	efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID;
 	efi_guid_t rng_algo_raw = EFI_RNG_ALGORITHM_RAW;
 	efi_guid_t rng_table_guid = LINUX_EFI_RANDOM_SEED_TABLE_GUID;
-	struct efi_rng_protocol *rng;
-	struct linux_efi_random_seed *seed;
+	efi_rng_protocol_t *rng = NULL;
+	struct linux_efi_random_seed *seed = NULL;
 	efi_status_t status;
 
-	status = efi_call_early(locate_protocol, &rng_proto, NULL,
-				(void **)&rng);
+	status = efi_bs_call(locate_protocol, &rng_proto, NULL, (void **)&rng);
 	if (status != EFI_SUCCESS)
 		return status;
 
-	status = efi_call_early(allocate_pool, EFI_RUNTIME_SERVICES_DATA,
-				sizeof(*seed) + EFI_RANDOM_SEED_SIZE,
-				(void **)&seed);
+	status = efi_bs_call(allocate_pool, EFI_RUNTIME_SERVICES_DATA,
+			     sizeof(*seed) + EFI_RANDOM_SEED_SIZE,
+			     (void **)&seed);
 	if (status != EFI_SUCCESS)
 		return status;
 
-	status = rng->get_rng(rng, &rng_algo_raw, EFI_RANDOM_SEED_SIZE,
-			      seed->bits);
+	status = efi_call_proto(rng, get_rng, &rng_algo_raw,
+				 EFI_RANDOM_SEED_SIZE, seed->bits);
+
 	if (status == EFI_UNSUPPORTED)
 		/*
 		 * Use whatever algorithm we have available if the raw algorithm
 		 * is not implemented.
 		 */
-		status = rng->get_rng(rng, NULL, EFI_RANDOM_SEED_SIZE,
-				      seed->bits);
+		status = efi_call_proto(rng, get_rng, NULL,
+					EFI_RANDOM_SEED_SIZE, seed->bits);
 
 	if (status != EFI_SUCCESS)
 		goto err_freepool;
 
 	seed->size = EFI_RANDOM_SEED_SIZE;
-	status = efi_call_early(install_configuration_table, &rng_table_guid,
-				seed);
+	status = efi_bs_call(install_configuration_table, &rng_table_guid, seed);
 	if (status != EFI_SUCCESS)
 		goto err_freepool;
 
 	return EFI_SUCCESS;
 
 err_freepool:
-	efi_call_early(free_pool, seed);
+	efi_bs_call(free_pool, seed);
 	return status;
 }
diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
index edba5e7a3743..a765378ad18c 100644
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -21,18 +21,13 @@ static const efi_char16_t efi_SetupMode_name[] = L"SetupMode";
 static const efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID;
 static const efi_char16_t shim_MokSBState_name[] = L"MokSBState";
 
-#define get_efi_var(name, vendor, ...) \
-	efi_call_runtime(get_variable, \
-			 (efi_char16_t *)(name), (efi_guid_t *)(vendor), \
-			 __VA_ARGS__);
-
 /*
  * Determine whether we're in secure boot mode.
  *
  * Please keep the logic in sync with
  * arch/x86/xen/efi.c:xen_efi_get_secureboot().
  */
-enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
+enum efi_secureboot_mode efi_get_secureboot(void)
 {
 	u32 attr;
 	u8 secboot, setupmode, moksbstate;
@@ -72,10 +67,10 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
 		return efi_secureboot_mode_disabled;
 
 secure_boot_enabled:
-	pr_efi(sys_table_arg, "UEFI Secure Boot is enabled.\n");
+	pr_efi("UEFI Secure Boot is enabled.\n");
 	return efi_secureboot_mode_enabled;
 
 out_efi_err:
-	pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n");
+	pr_efi_err("Could not determine UEFI Secure Boot status.\n");
 	return efi_secureboot_mode_unknown;
 }
diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c
index eb9af83e4d59..1d59e103a2e3 100644
--- a/drivers/firmware/efi/libstub/tpm.c
+++ b/drivers/firmware/efi/libstub/tpm.c
@@ -20,23 +20,13 @@ static const efi_char16_t efi_MemoryOverWriteRequest_name[] =
 #define MEMORY_ONLY_RESET_CONTROL_GUID \
 	EFI_GUID(0xe20939be, 0x32d4, 0x41be, 0xa1, 0x50, 0x89, 0x7f, 0x85, 0xd4, 0x98, 0x29)
 
-#define get_efi_var(name, vendor, ...) \
-	efi_call_runtime(get_variable, \
-			 (efi_char16_t *)(name), (efi_guid_t *)(vendor), \
-			 __VA_ARGS__)
-
-#define set_efi_var(name, vendor, ...) \
-	efi_call_runtime(set_variable, \
-			 (efi_char16_t *)(name), (efi_guid_t *)(vendor), \
-			 __VA_ARGS__)
-
 /*
  * Enable reboot attack mitigation. This requests that the firmware clear the
  * RAM on next reboot before proceeding with boot, ensuring that any secrets
  * are cleared. If userland has ensured that all secrets have been removed
  * from RAM before reboot it can simply reset this variable.
  */
-void efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg)
+void efi_enable_reset_attack_mitigation(void)
 {
 	u8 val = 1;
 	efi_guid_t var_guid = MEMORY_ONLY_RESET_CONTROL_GUID;
@@ -57,7 +47,7 @@ void efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg)
 
 #endif
 
-void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg)
+void efi_retrieve_tpm2_eventlog(void)
 {
 	efi_guid_t tcg2_guid = EFI_TCG2_PROTOCOL_GUID;
 	efi_guid_t linux_eventlog_guid = LINUX_EFI_TPM_EVENT_LOG_GUID;
@@ -69,23 +59,22 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg)
 	size_t log_size, last_entry_size;
 	efi_bool_t truncated;
 	int version = EFI_TCG2_EVENT_LOG_FORMAT_TCG_2;
-	void *tcg2_protocol = NULL;
+	efi_tcg2_protocol_t *tcg2_protocol = NULL;
 	int final_events_size = 0;
 
-	status = efi_call_early(locate_protocol, &tcg2_guid, NULL,
-				&tcg2_protocol);
+	status = efi_bs_call(locate_protocol, &tcg2_guid, NULL,
+			     (void **)&tcg2_protocol);
 	if (status != EFI_SUCCESS)
 		return;
 
-	status = efi_call_proto(efi_tcg2_protocol, get_event_log,
-				tcg2_protocol, version, &log_location,
-				&log_last_entry, &truncated);
+	status = efi_call_proto(tcg2_protocol, get_event_log, version,
+				&log_location, &log_last_entry, &truncated);
 
 	if (status != EFI_SUCCESS || !log_location) {
 		version = EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2;
-		status = efi_call_proto(efi_tcg2_protocol, get_event_log,
-					tcg2_protocol, version, &log_location,
-					&log_last_entry, &truncated);
+		status = efi_call_proto(tcg2_protocol, get_event_log, version,
+					&log_location, &log_last_entry,
+					&truncated);
 		if (status != EFI_SUCCESS || !log_location)
 			return;
 
@@ -126,13 +115,11 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg)
 	}
 
 	/* Allocate space for the logs and copy them. */
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-				sizeof(*log_tbl) + log_size,
-				(void **) &log_tbl);
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA,
+			     sizeof(*log_tbl) + log_size, (void **)&log_tbl);
 
 	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table_arg,
-			   "Unable to allocate memory for event log\n");
+		efi_printk("Unable to allocate memory for event log\n");
 		return;
 	}
 
@@ -140,8 +127,7 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg)
 	 * Figure out whether any events have already been logged to the
 	 * final events structure, and if so how much space they take up
 	 */
-	final_events_table = get_efi_config_table(sys_table_arg,
-						LINUX_EFI_TPM_FINAL_LOG_GUID);
+	final_events_table = get_efi_config_table(LINUX_EFI_TPM_FINAL_LOG_GUID);
 	if (final_events_table && final_events_table->nr_events) {
 		struct tcg_pcr_event2_head *header;
 		int offset;
@@ -169,12 +155,12 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg)
 	log_tbl->version = version;
 	memcpy(log_tbl->log, (void *) first_entry_addr, log_size);
 
-	status = efi_call_early(install_configuration_table,
-				&linux_eventlog_guid, log_tbl);
+	status = efi_bs_call(install_configuration_table,
+			     &linux_eventlog_guid, log_tbl);
 	if (status != EFI_SUCCESS)
 		goto err_free;
 	return;
 
 err_free:
-	efi_call_early(free_pool, log_tbl);
+	efi_bs_call(free_pool, log_tbl);
 }
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
index 38b686c67b17..2ff1883dc788 100644
--- a/drivers/firmware/efi/memmap.c
+++ b/drivers/firmware/efi/memmap.c
@@ -29,9 +29,32 @@ static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size)
 	return PFN_PHYS(page_to_pfn(p));
 }
 
+void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags)
+{
+	if (flags & EFI_MEMMAP_MEMBLOCK) {
+		if (slab_is_available())
+			memblock_free_late(phys, size);
+		else
+			memblock_free(phys, size);
+	} else if (flags & EFI_MEMMAP_SLAB) {
+		struct page *p = pfn_to_page(PHYS_PFN(phys));
+		unsigned int order = get_order(size);
+
+		free_pages((unsigned long) page_address(p), order);
+	}
+}
+
+static void __init efi_memmap_free(void)
+{
+	__efi_memmap_free(efi.memmap.phys_map,
+			efi.memmap.desc_size * efi.memmap.nr_map,
+			efi.memmap.flags);
+}
+
 /**
  * efi_memmap_alloc - Allocate memory for the EFI memory map
  * @num_entries: Number of entries in the allocated map.
+ * @data: efi memmap installation parameters
  *
  * Depending on whether mm_init() has already been invoked or not,
  * either memblock or "normal" page allocation is used.
@@ -39,34 +62,47 @@ static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size)
  * Returns the physical address of the allocated memory map on
  * success, zero on failure.
  */
-phys_addr_t __init efi_memmap_alloc(unsigned int num_entries)
+int __init efi_memmap_alloc(unsigned int num_entries,
+		struct efi_memory_map_data *data)
 {
-	unsigned long size = num_entries * efi.memmap.desc_size;
-
-	if (slab_is_available())
-		return __efi_memmap_alloc_late(size);
+	/* Expect allocation parameters are zero initialized */
+	WARN_ON(data->phys_map || data->size);
+
+	data->size = num_entries * efi.memmap.desc_size;
+	data->desc_version = efi.memmap.desc_version;
+	data->desc_size = efi.memmap.desc_size;
+	data->flags &= ~(EFI_MEMMAP_SLAB | EFI_MEMMAP_MEMBLOCK);
+	data->flags |= efi.memmap.flags & EFI_MEMMAP_LATE;
+
+	if (slab_is_available()) {
+		data->flags |= EFI_MEMMAP_SLAB;
+		data->phys_map = __efi_memmap_alloc_late(data->size);
+	} else {
+		data->flags |= EFI_MEMMAP_MEMBLOCK;
+		data->phys_map = __efi_memmap_alloc_early(data->size);
+	}
 
-	return __efi_memmap_alloc_early(size);
+	if (!data->phys_map)
+		return -ENOMEM;
+	return 0;
 }
 
 /**
  * __efi_memmap_init - Common code for mapping the EFI memory map
  * @data: EFI memory map data
- * @late: Use early or late mapping function?
  *
  * This function takes care of figuring out which function to use to
  * map the EFI memory map in efi.memmap based on how far into the boot
  * we are.
  *
- * During bootup @late should be %false since we only have access to
- * the early_memremap*() functions as the vmalloc space isn't setup.
- * Once the kernel is fully booted we can fallback to the more robust
- * memremap*() API.
+ * During bootup EFI_MEMMAP_LATE in data->flags should be clear since we
+ * only have access to the early_memremap*() functions as the vmalloc
+ * space isn't setup.  Once the kernel is fully booted we can fallback
+ * to the more robust memremap*() API.
  *
  * Returns zero on success, a negative error code on failure.
  */
-static int __init
-__efi_memmap_init(struct efi_memory_map_data *data, bool late)
+static int __init __efi_memmap_init(struct efi_memory_map_data *data)
 {
 	struct efi_memory_map map;
 	phys_addr_t phys_map;
@@ -76,7 +112,7 @@ __efi_memmap_init(struct efi_memory_map_data *data, bool late)
 
 	phys_map = data->phys_map;
 
-	if (late)
+	if (data->flags & EFI_MEMMAP_LATE)
 		map.map = memremap(phys_map, data->size, MEMREMAP_WB);
 	else
 		map.map = early_memremap(phys_map, data->size);
@@ -86,13 +122,16 @@ __efi_memmap_init(struct efi_memory_map_data *data, bool late)
 		return -ENOMEM;
 	}
 
+	/* NOP if data->flags & (EFI_MEMMAP_MEMBLOCK | EFI_MEMMAP_SLAB) == 0 */
+	efi_memmap_free();
+
 	map.phys_map = data->phys_map;
 	map.nr_map = data->size / data->desc_size;
 	map.map_end = map.map + data->size;
 
 	map.desc_version = data->desc_version;
 	map.desc_size = data->desc_size;
-	map.late = late;
+	map.flags = data->flags;
 
 	set_bit(EFI_MEMMAP, &efi.flags);
 
@@ -111,9 +150,10 @@ __efi_memmap_init(struct efi_memory_map_data *data, bool late)
 int __init efi_memmap_init_early(struct efi_memory_map_data *data)
 {
 	/* Cannot go backwards */
-	WARN_ON(efi.memmap.late);
+	WARN_ON(efi.memmap.flags & EFI_MEMMAP_LATE);
 
-	return __efi_memmap_init(data, false);
+	data->flags = 0;
+	return __efi_memmap_init(data);
 }
 
 void __init efi_memmap_unmap(void)
@@ -121,7 +161,7 @@ void __init efi_memmap_unmap(void)
 	if (!efi_enabled(EFI_MEMMAP))
 		return;
 
-	if (!efi.memmap.late) {
+	if (!(efi.memmap.flags & EFI_MEMMAP_LATE)) {
 		unsigned long size;
 
 		size = efi.memmap.desc_size * efi.memmap.nr_map;
@@ -162,13 +202,14 @@ int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size)
 	struct efi_memory_map_data data = {
 		.phys_map = addr,
 		.size = size,
+		.flags = EFI_MEMMAP_LATE,
 	};
 
 	/* Did we forget to unmap the early EFI memmap? */
 	WARN_ON(efi.memmap.map);
 
 	/* Were we already called? */
-	WARN_ON(efi.memmap.late);
+	WARN_ON(efi.memmap.flags & EFI_MEMMAP_LATE);
 
 	/*
 	 * It makes no sense to allow callers to register different
@@ -178,13 +219,12 @@ int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size)
 	data.desc_version = efi.memmap.desc_version;
 	data.desc_size = efi.memmap.desc_size;
 
-	return __efi_memmap_init(&data, true);
+	return __efi_memmap_init(&data);
 }
 
 /**
  * efi_memmap_install - Install a new EFI memory map in efi.memmap
- * @addr: Physical address of the memory map
- * @nr_map: Number of entries in the memory map
+ * @ctx: map allocation parameters (address, size, flags)
  *
  * Unlike efi_memmap_init_*(), this function does not allow the caller
  * to switch from early to late mappings. It simply uses the existing
@@ -192,18 +232,11 @@ int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size)
  *
  * Returns zero on success, a negative error code on failure.
  */
-int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map)
+int __init efi_memmap_install(struct efi_memory_map_data *data)
 {
-	struct efi_memory_map_data data;
-
 	efi_memmap_unmap();
 
-	data.phys_map = addr;
-	data.size = efi.memmap.desc_size * nr_map;
-	data.desc_version = efi.memmap.desc_version;
-	data.desc_size = efi.memmap.desc_size;
-
-	return __efi_memmap_init(&data, efi.memmap.late);
+	return __efi_memmap_init(data);
 }
 
 /**
diff --git a/drivers/firmware/efi/rci2-table.c b/drivers/firmware/efi/rci2-table.c
index 76b0c354a027..de1a9a1f9f14 100644
--- a/drivers/firmware/efi/rci2-table.c
+++ b/drivers/firmware/efi/rci2-table.c
@@ -81,6 +81,9 @@ static int __init efi_rci2_sysfs_init(void)
 	struct kobject *tables_kobj;
 	int ret = -ENOMEM;
 
+	if (rci2_table_phys == EFI_INVALID_TABLE_ADDR)
+		return 0;
+
 	rci2_base = memremap(rci2_table_phys,
 			     sizeof(struct rci2_table_global_hdr),
 			     MEMREMAP_WB);
diff --git a/drivers/firmware/efi/x86_fake_mem.c b/drivers/firmware/efi/x86_fake_mem.c
new file mode 100644
index 000000000000..e5d6d5a1b240
--- /dev/null
+++ b/drivers/firmware/efi/x86_fake_mem.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights reserved. */
+#include <linux/efi.h>
+#include <asm/e820/api.h>
+#include "fake_mem.h"
+
+void __init efi_fake_memmap_early(void)
+{
+	int i;
+
+	/*
+	 * The late efi_fake_mem() call can handle all requests if
+	 * EFI_MEMORY_SP support is disabled.
+	 */
+	if (!efi_soft_reserve_enabled())
+		return;
+
+	if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem)
+		return;
+
+	/*
+	 * Given that efi_fake_memmap() needs to perform memblock
+	 * allocations it needs to run after e820__memblock_setup().
+	 * However, if efi_fake_mem specifies EFI_MEMORY_SP for a given
+	 * address range that potentially needs to mark the memory as
+	 * reserved prior to e820__memblock_setup(). Update e820
+	 * directly if EFI_MEMORY_SP is specified for an
+	 * EFI_CONVENTIONAL_MEMORY descriptor.
+	 */
+	for (i = 0; i < nr_fake_mem; i++) {
+		struct efi_mem_range *mem = &efi_fake_mems[i];
+		efi_memory_desc_t *md;
+		u64 m_start, m_end;
+
+		if ((mem->attribute & EFI_MEMORY_SP) == 0)
+			continue;
+
+		m_start = mem->range.start;
+		m_end = mem->range.end;
+		for_each_efi_memory_desc(md) {
+			u64 start, end;
+
+			if (md->type != EFI_CONVENTIONAL_MEMORY)
+				continue;
+
+			start = md->phys_addr;
+			end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+
+			if (m_start <= end && m_end >= start)
+				/* fake range overlaps descriptor */;
+			else
+				continue;
+
+			/*
+			 * Trim the boundary of the e820 update to the
+			 * descriptor in case the fake range overlaps
+			 * !EFI_CONVENTIONAL_MEMORY
+			 */
+			start = max(start, m_start);
+			end = min(end, m_end);
+
+			if (end <= start)
+				continue;
+			e820__range_update(start, end - start + 1, E820_TYPE_RAM,
+					E820_TYPE_SOFT_RESERVED);
+			e820__update_table(e820_table);
+		}
+	}
+}
diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c
index 8d132e4f008a..0205987a4fd4 100644
--- a/drivers/firmware/google/coreboot_table.c
+++ b/drivers/firmware/google/coreboot_table.c
@@ -163,8 +163,15 @@ static int coreboot_table_probe(struct platform_device *pdev)
 	return ret;
 }
 
+static int __cb_dev_unregister(struct device *dev, void *dummy)
+{
+	device_unregister(dev);
+	return 0;
+}
+
 static int coreboot_table_remove(struct platform_device *pdev)
 {
+	bus_for_each_dev(&coreboot_bus_type, NULL, NULL, __cb_dev_unregister);
 	bus_unregister(&coreboot_bus_type);
 	return 0;
 }
diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c
index edaa4e5d84ad..5b2011ebbe26 100644
--- a/drivers/firmware/google/gsmi.c
+++ b/drivers/firmware/google/gsmi.c
@@ -76,6 +76,7 @@
 #define GSMI_CMD_LOG_S0IX_RESUME	0x0b
 #define GSMI_CMD_CLEAR_CONFIG		0x20
 #define GSMI_CMD_HANDSHAKE_TYPE		0xC1
+#define GSMI_CMD_RESERVED		0xff
 
 /* Magic entry type for kernel events */
 #define GSMI_LOG_ENTRY_TYPE_KERNEL     0xDEAD
@@ -746,6 +747,7 @@ MODULE_DEVICE_TABLE(dmi, gsmi_dmi_table);
 static __init int gsmi_system_valid(void)
 {
 	u32 hash;
+	u16 cmd, result;
 
 	if (!dmi_check_system(gsmi_dmi_table))
 		return -ENODEV;
@@ -780,6 +782,23 @@ static __init int gsmi_system_valid(void)
 		return -ENODEV;
 	}
 
+	/* Test the smihandler with a bogus command. If it leaves the
+	 * calling argument in %ax untouched, there is no handler for
+	 * GSMI commands.
+	 */
+	cmd = GSMI_CALLBACK | GSMI_CMD_RESERVED << 8;
+	asm volatile (
+		"outb %%al, %%dx\n\t"
+		: "=a" (result)
+		: "0" (cmd),
+		  "d" (acpi_gbl_FADT.smi_command)
+		: "memory", "cc"
+		);
+	if (cmd == result) {
+		pr_info("gsmi: no gsmi handler in firmware\n");
+		return -ENODEV;
+	}
+
 	/* Found */
 	return 0;
 }
@@ -1016,6 +1035,9 @@ out_err:
 	dma_pool_destroy(gsmi_dev.dma_pool);
 	platform_device_unregister(gsmi_dev.pdev);
 	pr_info("gsmi: failed to load: %d\n", ret);
+#ifdef CONFIG_PM
+	platform_driver_unregister(&gsmi_driver_info);
+#endif
 	return ret;
 }
 
@@ -1037,6 +1059,9 @@ static void __exit gsmi_exit(void)
 	gsmi_buf_free(gsmi_dev.name_buf);
 	dma_pool_destroy(gsmi_dev.dma_pool);
 	platform_device_unregister(gsmi_dev.pdev);
+#ifdef CONFIG_PM
+	platform_driver_unregister(&gsmi_driver_info);
+#endif
 }
 
 module_init(gsmi_init);
diff --git a/drivers/firmware/imx/imx-dsp.c b/drivers/firmware/imx/imx-dsp.c
index a43d2db5cbdb..4265e9dbed84 100644
--- a/drivers/firmware/imx/imx-dsp.c
+++ b/drivers/firmware/imx/imx-dsp.c
@@ -114,7 +114,7 @@ static int imx_dsp_probe(struct platform_device *pdev)
 
 	dev_info(dev, "NXP i.MX DSP IPC initialized\n");
 
-	return devm_of_platform_populate(dev);
+	return 0;
 out:
 	kfree(chan_name);
 	for (j = 0; j < i; j++) {
diff --git a/drivers/firmware/imx/imx-scu-irq.c b/drivers/firmware/imx/imx-scu-irq.c
index 687121f8c4d5..db655e87cdc8 100644
--- a/drivers/firmware/imx/imx-scu-irq.c
+++ b/drivers/firmware/imx/imx-scu-irq.c
@@ -8,6 +8,7 @@
 
 #include <dt-bindings/firmware/imx/rsrc.h>
 #include <linux/firmware/imx/ipc.h>
+#include <linux/firmware/imx/sci.h>
 #include <linux/mailbox_client.h>
 
 #define IMX_SC_IRQ_FUNC_ENABLE	1
diff --git a/drivers/firmware/imx/imx-scu.c b/drivers/firmware/imx/imx-scu.c
index 04a24a863d6e..03b43b7a6d1d 100644
--- a/drivers/firmware/imx/imx-scu.c
+++ b/drivers/firmware/imx/imx-scu.c
@@ -107,6 +107,12 @@ static void imx_scu_rx_callback(struct mbox_client *c, void *msg)
 	struct imx_sc_rpc_msg *hdr;
 	u32 *data = msg;
 
+	if (!sc_ipc->msg) {
+		dev_warn(sc_ipc->dev, "unexpected rx idx %d 0x%08x, ignore!\n",
+				sc_chan->idx, *data);
+		return;
+	}
+
 	if (sc_chan->idx == 0) {
 		hdr = msg;
 		sc_ipc->rx_size = hdr->size;
@@ -156,6 +162,7 @@ static int imx_scu_ipc_write(struct imx_sc_ipc *sc_ipc, void *msg)
  */
 int imx_scu_call_rpc(struct imx_sc_ipc *sc_ipc, void *msg, bool have_resp)
 {
+	uint8_t saved_svc, saved_func;
 	struct imx_sc_rpc_msg *hdr;
 	int ret;
 
@@ -165,7 +172,11 @@ int imx_scu_call_rpc(struct imx_sc_ipc *sc_ipc, void *msg, bool have_resp)
 	mutex_lock(&sc_ipc->lock);
 	reinit_completion(&sc_ipc->done);
 
-	sc_ipc->msg = msg;
+	if (have_resp) {
+		sc_ipc->msg = msg;
+		saved_svc = ((struct imx_sc_rpc_msg *)msg)->svc;
+		saved_func = ((struct imx_sc_rpc_msg *)msg)->func;
+	}
 	sc_ipc->count = 0;
 	ret = imx_scu_ipc_write(sc_ipc, msg);
 	if (ret < 0) {
@@ -184,9 +195,20 @@ int imx_scu_call_rpc(struct imx_sc_ipc *sc_ipc, void *msg, bool have_resp)
 		/* response status is stored in hdr->func field */
 		hdr = msg;
 		ret = hdr->func;
+		/*
+		 * Some special SCU firmware APIs do NOT have return value
+		 * in hdr->func, but they do have response data, those special
+		 * APIs are defined as void function in SCU firmware, so they
+		 * should be treated as return success always.
+		 */
+		if ((saved_svc == IMX_SC_RPC_SVC_MISC) &&
+			(saved_func == IMX_SC_MISC_FUNC_UNIQUE_ID ||
+			 saved_func == IMX_SC_MISC_FUNC_GET_BUTTON_STATUS))
+			ret = 0;
 	}
 
 out:
+	sc_ipc->msg = NULL;
 	mutex_unlock(&sc_ipc->lock);
 
 	dev_dbg(sc_ipc->dev, "RPC SVC done\n");
diff --git a/drivers/firmware/meson/meson_sm.c b/drivers/firmware/meson/meson_sm.c
index 8d908a8e0d20..1d5b4d74f96d 100644
--- a/drivers/firmware/meson/meson_sm.c
+++ b/drivers/firmware/meson/meson_sm.c
@@ -35,7 +35,7 @@ struct meson_sm_chip {
 	struct meson_sm_cmd cmd[];
 };
 
-struct meson_sm_chip gxbb_chip = {
+static const struct meson_sm_chip gxbb_chip = {
 	.shmem_size		= SZ_4K,
 	.cmd_shmem_in_base	= 0x82000020,
 	.cmd_shmem_out_base	= 0x82000021,
@@ -54,8 +54,6 @@ struct meson_sm_firmware {
 	void __iomem *sm_shmem_out_base;
 };
 
-static struct meson_sm_firmware fw;
-
 static u32 meson_sm_get_cmd(const struct meson_sm_chip *chip,
 			    unsigned int cmd_index)
 {
@@ -90,6 +88,7 @@ static void __iomem *meson_sm_map_shmem(u32 cmd_shmem, unsigned int size)
 /**
  * meson_sm_call - generic SMC32 call to the secure-monitor
  *
+ * @fw:		Pointer to secure-monitor firmware
  * @cmd_index:	Index of the SMC32 function ID
  * @ret:	Returned value
  * @arg0:	SMC32 Argument 0
@@ -100,15 +99,15 @@ static void __iomem *meson_sm_map_shmem(u32 cmd_shmem, unsigned int size)
  *
  * Return:	0 on success, a negative value on error
  */
-int meson_sm_call(unsigned int cmd_index, u32 *ret, u32 arg0,
-		  u32 arg1, u32 arg2, u32 arg3, u32 arg4)
+int meson_sm_call(struct meson_sm_firmware *fw, unsigned int cmd_index,
+		  u32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4)
 {
 	u32 cmd, lret;
 
-	if (!fw.chip)
+	if (!fw->chip)
 		return -ENOENT;
 
-	cmd = meson_sm_get_cmd(fw.chip, cmd_index);
+	cmd = meson_sm_get_cmd(fw->chip, cmd_index);
 	if (!cmd)
 		return -EINVAL;
 
@@ -124,6 +123,7 @@ EXPORT_SYMBOL(meson_sm_call);
 /**
  * meson_sm_call_read - retrieve data from secure-monitor
  *
+ * @fw:		Pointer to secure-monitor firmware
  * @buffer:	Buffer to store the retrieved data
  * @bsize:	Size of the buffer
  * @cmd_index:	Index of the SMC32 function ID
@@ -137,22 +137,23 @@ EXPORT_SYMBOL(meson_sm_call);
  *		When 0 is returned there is no guarantee about the amount of
  *		data read and bsize bytes are copied in buffer.
  */
-int meson_sm_call_read(void *buffer, unsigned int bsize, unsigned int cmd_index,
-		       u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4)
+int meson_sm_call_read(struct meson_sm_firmware *fw, void *buffer,
+		       unsigned int bsize, unsigned int cmd_index, u32 arg0,
+		       u32 arg1, u32 arg2, u32 arg3, u32 arg4)
 {
 	u32 size;
 	int ret;
 
-	if (!fw.chip)
+	if (!fw->chip)
 		return -ENOENT;
 
-	if (!fw.chip->cmd_shmem_out_base)
+	if (!fw->chip->cmd_shmem_out_base)
 		return -EINVAL;
 
-	if (bsize > fw.chip->shmem_size)
+	if (bsize > fw->chip->shmem_size)
 		return -EINVAL;
 
-	if (meson_sm_call(cmd_index, &size, arg0, arg1, arg2, arg3, arg4) < 0)
+	if (meson_sm_call(fw, cmd_index, &size, arg0, arg1, arg2, arg3, arg4) < 0)
 		return -EINVAL;
 
 	if (size > bsize)
@@ -164,7 +165,7 @@ int meson_sm_call_read(void *buffer, unsigned int bsize, unsigned int cmd_index,
 		size = bsize;
 
 	if (buffer)
-		memcpy(buffer, fw.sm_shmem_out_base, size);
+		memcpy(buffer, fw->sm_shmem_out_base, size);
 
 	return ret;
 }
@@ -173,6 +174,7 @@ EXPORT_SYMBOL(meson_sm_call_read);
 /**
  * meson_sm_call_write - send data to secure-monitor
  *
+ * @fw:		Pointer to secure-monitor firmware
  * @buffer:	Buffer containing data to send
  * @size:	Size of the data to send
  * @cmd_index:	Index of the SMC32 function ID
@@ -184,23 +186,24 @@ EXPORT_SYMBOL(meson_sm_call_read);
  *
  * Return:	size of sent data on success, a negative value on error
  */
-int meson_sm_call_write(void *buffer, unsigned int size, unsigned int cmd_index,
-			u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4)
+int meson_sm_call_write(struct meson_sm_firmware *fw, void *buffer,
+			unsigned int size, unsigned int cmd_index, u32 arg0,
+			u32 arg1, u32 arg2, u32 arg3, u32 arg4)
 {
 	u32 written;
 
-	if (!fw.chip)
+	if (!fw->chip)
 		return -ENOENT;
 
-	if (size > fw.chip->shmem_size)
+	if (size > fw->chip->shmem_size)
 		return -EINVAL;
 
-	if (!fw.chip->cmd_shmem_in_base)
+	if (!fw->chip->cmd_shmem_in_base)
 		return -EINVAL;
 
-	memcpy(fw.sm_shmem_in_base, buffer, size);
+	memcpy(fw->sm_shmem_in_base, buffer, size);
 
-	if (meson_sm_call(cmd_index, &written, arg0, arg1, arg2, arg3, arg4) < 0)
+	if (meson_sm_call(fw, cmd_index, &written, arg0, arg1, arg2, arg3, arg4) < 0)
 		return -EINVAL;
 
 	if (!written)
@@ -210,6 +213,24 @@ int meson_sm_call_write(void *buffer, unsigned int size, unsigned int cmd_index,
 }
 EXPORT_SYMBOL(meson_sm_call_write);
 
+/**
+ * meson_sm_get - get pointer to meson_sm_firmware structure.
+ *
+ * @sm_node:		Pointer to the secure-monitor Device Tree node.
+ *
+ * Return:		NULL is the secure-monitor device is not ready.
+ */
+struct meson_sm_firmware *meson_sm_get(struct device_node *sm_node)
+{
+	struct platform_device *pdev = of_find_device_by_node(sm_node);
+
+	if (!pdev)
+		return NULL;
+
+	return platform_get_drvdata(pdev);
+}
+EXPORT_SYMBOL_GPL(meson_sm_get);
+
 #define SM_CHIP_ID_LENGTH	119
 #define SM_CHIP_ID_OFFSET	4
 #define SM_CHIP_ID_SIZE		12
@@ -217,33 +238,25 @@ EXPORT_SYMBOL(meson_sm_call_write);
 static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
+	struct platform_device *pdev = to_platform_device(dev);
+	struct meson_sm_firmware *fw;
 	uint8_t *id_buf;
 	int ret;
 
+	fw = platform_get_drvdata(pdev);
+
 	id_buf = kmalloc(SM_CHIP_ID_LENGTH, GFP_KERNEL);
 	if (!id_buf)
 		return -ENOMEM;
 
-	ret = meson_sm_call_read(id_buf, SM_CHIP_ID_LENGTH, SM_GET_CHIP_ID,
+	ret = meson_sm_call_read(fw, id_buf, SM_CHIP_ID_LENGTH, SM_GET_CHIP_ID,
 				 0, 0, 0, 0, 0);
 	if (ret < 0) {
 		kfree(id_buf);
 		return ret;
 	}
 
-	ret = sprintf(buf, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
-			id_buf[SM_CHIP_ID_OFFSET + 0],
-			id_buf[SM_CHIP_ID_OFFSET + 1],
-			id_buf[SM_CHIP_ID_OFFSET + 2],
-			id_buf[SM_CHIP_ID_OFFSET + 3],
-			id_buf[SM_CHIP_ID_OFFSET + 4],
-			id_buf[SM_CHIP_ID_OFFSET + 5],
-			id_buf[SM_CHIP_ID_OFFSET + 6],
-			id_buf[SM_CHIP_ID_OFFSET + 7],
-			id_buf[SM_CHIP_ID_OFFSET + 8],
-			id_buf[SM_CHIP_ID_OFFSET + 9],
-			id_buf[SM_CHIP_ID_OFFSET + 10],
-			id_buf[SM_CHIP_ID_OFFSET + 11]);
+	ret = sprintf(buf, "%12phN\n", &id_buf[SM_CHIP_ID_OFFSET]);
 
 	kfree(id_buf);
 
@@ -268,25 +281,34 @@ static const struct of_device_id meson_sm_ids[] = {
 
 static int __init meson_sm_probe(struct platform_device *pdev)
 {
+	struct device *dev = &pdev->dev;
 	const struct meson_sm_chip *chip;
+	struct meson_sm_firmware *fw;
+
+	fw = devm_kzalloc(dev, sizeof(*fw), GFP_KERNEL);
+	if (!fw)
+		return -ENOMEM;
 
-	chip = of_match_device(meson_sm_ids, &pdev->dev)->data;
+	chip = of_match_device(meson_sm_ids, dev)->data;
 
 	if (chip->cmd_shmem_in_base) {
-		fw.sm_shmem_in_base = meson_sm_map_shmem(chip->cmd_shmem_in_base,
-							 chip->shmem_size);
-		if (WARN_ON(!fw.sm_shmem_in_base))
+		fw->sm_shmem_in_base = meson_sm_map_shmem(chip->cmd_shmem_in_base,
+							  chip->shmem_size);
+		if (WARN_ON(!fw->sm_shmem_in_base))
 			goto out;
 	}
 
 	if (chip->cmd_shmem_out_base) {
-		fw.sm_shmem_out_base = meson_sm_map_shmem(chip->cmd_shmem_out_base,
-							  chip->shmem_size);
-		if (WARN_ON(!fw.sm_shmem_out_base))
+		fw->sm_shmem_out_base = meson_sm_map_shmem(chip->cmd_shmem_out_base,
+							   chip->shmem_size);
+		if (WARN_ON(!fw->sm_shmem_out_base))
 			goto out_in_base;
 	}
 
-	fw.chip = chip;
+	fw->chip = chip;
+
+	platform_set_drvdata(pdev, fw);
+
 	pr_info("secure-monitor enabled\n");
 
 	if (sysfs_create_group(&pdev->dev.kobj, &meson_sm_sysfs_attr_group))
@@ -295,7 +317,7 @@ static int __init meson_sm_probe(struct platform_device *pdev)
 	return 0;
 
 out_in_base:
-	iounmap(fw.sm_shmem_in_base);
+	iounmap(fw->sm_shmem_in_base);
 out:
 	return -EINVAL;
 }
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index 84f4ff351c62..b3b6c15e7b36 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -53,10 +53,18 @@ bool psci_tos_resident_on(int cpu)
 }
 
 struct psci_operations psci_ops = {
-	.conduit = PSCI_CONDUIT_NONE,
+	.conduit = SMCCC_CONDUIT_NONE,
 	.smccc_version = SMCCC_VERSION_1_0,
 };
 
+enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void)
+{
+	if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
+		return SMCCC_CONDUIT_NONE;
+
+	return psci_ops.conduit;
+}
+
 typedef unsigned long (psci_fn)(unsigned long, unsigned long,
 				unsigned long, unsigned long);
 static psci_fn *invoke_psci_fn;
@@ -212,13 +220,13 @@ static unsigned long psci_migrate_info_up_cpu(void)
 			      0, 0, 0);
 }
 
-static void set_conduit(enum psci_conduit conduit)
+static void set_conduit(enum arm_smccc_conduit conduit)
 {
 	switch (conduit) {
-	case PSCI_CONDUIT_HVC:
+	case SMCCC_CONDUIT_HVC:
 		invoke_psci_fn = __invoke_psci_fn_hvc;
 		break;
-	case PSCI_CONDUIT_SMC:
+	case SMCCC_CONDUIT_SMC:
 		invoke_psci_fn = __invoke_psci_fn_smc;
 		break;
 	default:
@@ -240,9 +248,9 @@ static int get_set_conduit_method(struct device_node *np)
 	}
 
 	if (!strcmp("hvc", method)) {
-		set_conduit(PSCI_CONDUIT_HVC);
+		set_conduit(SMCCC_CONDUIT_HVC);
 	} else if (!strcmp("smc", method)) {
-		set_conduit(PSCI_CONDUIT_SMC);
+		set_conduit(SMCCC_CONDUIT_SMC);
 	} else {
 		pr_warn("invalid \"method\" property: %s\n", method);
 		return -EINVAL;
@@ -583,9 +591,9 @@ int __init psci_acpi_init(void)
 	pr_info("probing for conduit method from ACPI.\n");
 
 	if (acpi_psci_use_hvc())
-		set_conduit(PSCI_CONDUIT_HVC);
+		set_conduit(SMCCC_CONDUIT_HVC);
 	else
-		set_conduit(PSCI_CONDUIT_SMC);
+		set_conduit(SMCCC_CONDUIT_SMC);
 
 	return psci_probe();
 }
diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c
index 215061c581e1..48e2ef794ea3 100644
--- a/drivers/firmware/qcom_scm-32.c
+++ b/drivers/firmware/qcom_scm-32.c
@@ -442,6 +442,41 @@ int __qcom_scm_hdcp_req(struct device *dev, struct qcom_scm_hdcp_req *req,
 		req, req_cnt * sizeof(*req), resp, sizeof(*resp));
 }
 
+int __qcom_scm_ocmem_lock(struct device *dev, u32 id, u32 offset, u32 size,
+			  u32 mode)
+{
+	struct ocmem_tz_lock {
+		__le32 id;
+		__le32 offset;
+		__le32 size;
+		__le32 mode;
+	} request;
+
+	request.id = cpu_to_le32(id);
+	request.offset = cpu_to_le32(offset);
+	request.size = cpu_to_le32(size);
+	request.mode = cpu_to_le32(mode);
+
+	return qcom_scm_call(dev, QCOM_SCM_OCMEM_SVC, QCOM_SCM_OCMEM_LOCK_CMD,
+			     &request, sizeof(request), NULL, 0);
+}
+
+int __qcom_scm_ocmem_unlock(struct device *dev, u32 id, u32 offset, u32 size)
+{
+	struct ocmem_tz_unlock {
+		__le32 id;
+		__le32 offset;
+		__le32 size;
+	} request;
+
+	request.id = cpu_to_le32(id);
+	request.offset = cpu_to_le32(offset);
+	request.size = cpu_to_le32(size);
+
+	return qcom_scm_call(dev, QCOM_SCM_OCMEM_SVC, QCOM_SCM_OCMEM_UNLOCK_CMD,
+			     &request, sizeof(request), NULL, 0);
+}
+
 void __qcom_scm_init(void)
 {
 }
@@ -582,7 +617,22 @@ int __qcom_scm_assign_mem(struct device *dev, phys_addr_t mem_region,
 int __qcom_scm_restore_sec_cfg(struct device *dev, u32 device_id,
 			       u32 spare)
 {
-	return -ENODEV;
+	struct msm_scm_sec_cfg {
+		__le32 id;
+		__le32 ctx_bank_num;
+	} cfg;
+	int ret, scm_ret = 0;
+
+	cfg.id = cpu_to_le32(device_id);
+	cfg.ctx_bank_num = cpu_to_le32(spare);
+
+	ret = qcom_scm_call(dev, QCOM_SCM_SVC_MP, QCOM_SCM_RESTORE_SEC_CFG,
+			    &cfg, sizeof(cfg), &scm_ret, sizeof(scm_ret));
+
+	if (ret || scm_ret)
+		return ret ? ret : -EINVAL;
+
+	return 0;
 }
 
 int __qcom_scm_iommu_secure_ptbl_size(struct device *dev, u32 spare,
@@ -614,3 +664,8 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t addr, unsigned int val)
 	return qcom_scm_call_atomic2(QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE,
 				     addr, val);
 }
+
+int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool enable)
+{
+	return -ENODEV;
+}
diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c
index 91d5ad7cf58b..3c5850350974 100644
--- a/drivers/firmware/qcom_scm-64.c
+++ b/drivers/firmware/qcom_scm-64.c
@@ -62,32 +62,72 @@ static DEFINE_MUTEX(qcom_scm_lock);
 #define FIRST_EXT_ARG_IDX 3
 #define N_REGISTER_ARGS (MAX_QCOM_SCM_ARGS - N_EXT_QCOM_SCM_ARGS + 1)
 
-/**
- * qcom_scm_call() - Invoke a syscall in the secure world
- * @dev:	device
- * @svc_id:	service identifier
- * @cmd_id:	command identifier
- * @desc:	Descriptor structure containing arguments and return values
- *
- * Sends a command to the SCM and waits for the command to finish processing.
- * This should *only* be called in pre-emptible context.
-*/
-static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
-			 const struct qcom_scm_desc *desc,
-			 struct arm_smccc_res *res)
+static void __qcom_scm_call_do(const struct qcom_scm_desc *desc,
+			       struct arm_smccc_res *res, u32 fn_id,
+			       u64 x5, u32 type)
+{
+	u64 cmd;
+	struct arm_smccc_quirk quirk = { .id = ARM_SMCCC_QUIRK_QCOM_A6 };
+
+	cmd = ARM_SMCCC_CALL_VAL(type, qcom_smccc_convention,
+				 ARM_SMCCC_OWNER_SIP, fn_id);
+
+	quirk.state.a6 = 0;
+
+	do {
+		arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0],
+				    desc->args[1], desc->args[2], x5,
+				    quirk.state.a6, 0, res, &quirk);
+
+		if (res->a0 == QCOM_SCM_INTERRUPTED)
+			cmd = res->a0;
+
+	} while (res->a0 == QCOM_SCM_INTERRUPTED);
+}
+
+static void qcom_scm_call_do(const struct qcom_scm_desc *desc,
+			     struct arm_smccc_res *res, u32 fn_id,
+			     u64 x5, bool atomic)
+{
+	int retry_count = 0;
+
+	if (atomic) {
+		__qcom_scm_call_do(desc, res, fn_id, x5, ARM_SMCCC_FAST_CALL);
+		return;
+	}
+
+	do {
+		mutex_lock(&qcom_scm_lock);
+
+		__qcom_scm_call_do(desc, res, fn_id, x5,
+				   ARM_SMCCC_STD_CALL);
+
+		mutex_unlock(&qcom_scm_lock);
+
+		if (res->a0 == QCOM_SCM_V2_EBUSY) {
+			if (retry_count++ > QCOM_SCM_EBUSY_MAX_RETRY)
+				break;
+			msleep(QCOM_SCM_EBUSY_WAIT_MS);
+		}
+	}  while (res->a0 == QCOM_SCM_V2_EBUSY);
+}
+
+static int ___qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
+			    const struct qcom_scm_desc *desc,
+			    struct arm_smccc_res *res, bool atomic)
 {
 	int arglen = desc->arginfo & 0xf;
-	int retry_count = 0, i;
+	int i;
 	u32 fn_id = QCOM_SCM_FNID(svc_id, cmd_id);
-	u64 cmd, x5 = desc->args[FIRST_EXT_ARG_IDX];
+	u64 x5 = desc->args[FIRST_EXT_ARG_IDX];
 	dma_addr_t args_phys = 0;
 	void *args_virt = NULL;
 	size_t alloc_len;
-	struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6};
+	gfp_t flag = atomic ? GFP_ATOMIC : GFP_KERNEL;
 
 	if (unlikely(arglen > N_REGISTER_ARGS)) {
 		alloc_len = N_EXT_QCOM_SCM_ARGS * sizeof(u64);
-		args_virt = kzalloc(PAGE_ALIGN(alloc_len), GFP_KERNEL);
+		args_virt = kzalloc(PAGE_ALIGN(alloc_len), flag);
 
 		if (!args_virt)
 			return -ENOMEM;
@@ -117,46 +157,56 @@ static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
 		x5 = args_phys;
 	}
 
-	do {
-		mutex_lock(&qcom_scm_lock);
-
-		cmd = ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL,
-					 qcom_smccc_convention,
-					 ARM_SMCCC_OWNER_SIP, fn_id);
-
-		quirk.state.a6 = 0;
-
-		do {
-			arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0],
-				      desc->args[1], desc->args[2], x5,
-				      quirk.state.a6, 0, res, &quirk);
-
-			if (res->a0 == QCOM_SCM_INTERRUPTED)
-				cmd = res->a0;
-
-		} while (res->a0 == QCOM_SCM_INTERRUPTED);
-
-		mutex_unlock(&qcom_scm_lock);
-
-		if (res->a0 == QCOM_SCM_V2_EBUSY) {
-			if (retry_count++ > QCOM_SCM_EBUSY_MAX_RETRY)
-				break;
-			msleep(QCOM_SCM_EBUSY_WAIT_MS);
-		}
-	}  while (res->a0 == QCOM_SCM_V2_EBUSY);
+	qcom_scm_call_do(desc, res, fn_id, x5, atomic);
 
 	if (args_virt) {
 		dma_unmap_single(dev, args_phys, alloc_len, DMA_TO_DEVICE);
 		kfree(args_virt);
 	}
 
-	if (res->a0 < 0)
+	if ((long)res->a0 < 0)
 		return qcom_scm_remap_error(res->a0);
 
 	return 0;
 }
 
 /**
+ * qcom_scm_call() - Invoke a syscall in the secure world
+ * @dev:	device
+ * @svc_id:	service identifier
+ * @cmd_id:	command identifier
+ * @desc:	Descriptor structure containing arguments and return values
+ *
+ * Sends a command to the SCM and waits for the command to finish processing.
+ * This should *only* be called in pre-emptible context.
+ */
+static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
+			 const struct qcom_scm_desc *desc,
+			 struct arm_smccc_res *res)
+{
+	might_sleep();
+	return ___qcom_scm_call(dev, svc_id, cmd_id, desc, res, false);
+}
+
+/**
+ * qcom_scm_call_atomic() - atomic variation of qcom_scm_call()
+ * @dev:	device
+ * @svc_id:	service identifier
+ * @cmd_id:	command identifier
+ * @desc:	Descriptor structure containing arguments and return values
+ * @res:	Structure containing results from SMC/HVC call
+ *
+ * Sends a command to the SCM and waits for the command to finish processing.
+ * This can be called in atomic context.
+ */
+static int qcom_scm_call_atomic(struct device *dev, u32 svc_id, u32 cmd_id,
+				const struct qcom_scm_desc *desc,
+				struct arm_smccc_res *res)
+{
+	return ___qcom_scm_call(dev, svc_id, cmd_id, desc, res, true);
+}
+
+/**
  * qcom_scm_set_cold_boot_addr() - Set the cold boot address for cpus
  * @entry: Entry point function for the cpus
  * @cpus: The cpumask of cpus that will use the entry point
@@ -241,6 +291,18 @@ int __qcom_scm_hdcp_req(struct device *dev, struct qcom_scm_hdcp_req *req,
 	return ret;
 }
 
+int __qcom_scm_ocmem_lock(struct device *dev, uint32_t id, uint32_t offset,
+			  uint32_t size, uint32_t mode)
+{
+	return -ENOTSUPP;
+}
+
+int __qcom_scm_ocmem_unlock(struct device *dev, uint32_t id, uint32_t offset,
+			    uint32_t size)
+{
+	return -ENOTSUPP;
+}
+
 void __qcom_scm_init(void)
 {
 	u64 cmd;
@@ -502,3 +564,16 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t addr, unsigned int val)
 	return qcom_scm_call(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE,
 			     &desc, &res);
 }
+
+int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool en)
+{
+	struct qcom_scm_desc desc = {0};
+	struct arm_smccc_res res;
+
+	desc.args[0] = QCOM_SCM_CONFIG_ERRATA1_CLIENT_ALL;
+	desc.args[1] = en;
+	desc.arginfo = QCOM_SCM_ARGS(2);
+
+	return qcom_scm_call_atomic(dev, QCOM_SCM_SVC_SMMU_PROGRAM,
+				    QCOM_SCM_CONFIG_ERRATA1, &desc, &res);
+}
diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index 4802ab170fe5..1ba0df4b97ab 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -192,6 +192,46 @@ bool qcom_scm_pas_supported(u32 peripheral)
 EXPORT_SYMBOL(qcom_scm_pas_supported);
 
 /**
+ * qcom_scm_ocmem_lock_available() - is OCMEM lock/unlock interface available
+ */
+bool qcom_scm_ocmem_lock_available(void)
+{
+	return __qcom_scm_is_call_available(__scm->dev, QCOM_SCM_OCMEM_SVC,
+					    QCOM_SCM_OCMEM_LOCK_CMD);
+}
+EXPORT_SYMBOL(qcom_scm_ocmem_lock_available);
+
+/**
+ * qcom_scm_ocmem_lock() - call OCMEM lock interface to assign an OCMEM
+ * region to the specified initiator
+ *
+ * @id:     tz initiator id
+ * @offset: OCMEM offset
+ * @size:   OCMEM size
+ * @mode:   access mode (WIDE/NARROW)
+ */
+int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, u32 size,
+			u32 mode)
+{
+	return __qcom_scm_ocmem_lock(__scm->dev, id, offset, size, mode);
+}
+EXPORT_SYMBOL(qcom_scm_ocmem_lock);
+
+/**
+ * qcom_scm_ocmem_unlock() - call OCMEM unlock interface to release an OCMEM
+ * region from the specified initiator
+ *
+ * @id:     tz initiator id
+ * @offset: OCMEM offset
+ * @size:   OCMEM size
+ */
+int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, u32 size)
+{
+	return __qcom_scm_ocmem_unlock(__scm->dev, id, offset, size);
+}
+EXPORT_SYMBOL(qcom_scm_ocmem_unlock);
+
+/**
  * qcom_scm_pas_init_image() - Initialize peripheral authentication service
  *			       state machine for a given peripheral, using the
  *			       metadata
@@ -327,6 +367,19 @@ static const struct reset_control_ops qcom_scm_pas_reset_ops = {
 	.deassert = qcom_scm_pas_reset_deassert,
 };
 
+/**
+ * qcom_scm_restore_sec_cfg_available() - Check if secure environment
+ * supports restore security config interface.
+ *
+ * Return true if restore-cfg interface is supported, false if not.
+ */
+bool qcom_scm_restore_sec_cfg_available(void)
+{
+	return __qcom_scm_is_call_available(__scm->dev, QCOM_SCM_SVC_MP,
+					    QCOM_SCM_RESTORE_SEC_CFG);
+}
+EXPORT_SYMBOL(qcom_scm_restore_sec_cfg_available);
+
 int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare)
 {
 	return __qcom_scm_restore_sec_cfg(__scm->dev, device_id, spare);
@@ -345,6 +398,12 @@ int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare)
 }
 EXPORT_SYMBOL(qcom_scm_iommu_secure_ptbl_init);
 
+int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
+{
+	return __qcom_scm_qsmmu500_wait_safe_toggle(__scm->dev, en);
+}
+EXPORT_SYMBOL(qcom_scm_qsmmu500_wait_safe_toggle);
+
 int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val)
 {
 	return __qcom_scm_io_readl(__scm->dev, addr, val);
diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h
index 99506bd873c0..81dcf5f1138e 100644
--- a/drivers/firmware/qcom_scm.h
+++ b/drivers/firmware/qcom_scm.h
@@ -42,6 +42,15 @@ extern int __qcom_scm_hdcp_req(struct device *dev,
 
 extern void __qcom_scm_init(void);
 
+#define QCOM_SCM_OCMEM_SVC			0xf
+#define QCOM_SCM_OCMEM_LOCK_CMD		0x1
+#define QCOM_SCM_OCMEM_UNLOCK_CMD		0x2
+
+extern int __qcom_scm_ocmem_lock(struct device *dev, u32 id, u32 offset,
+				 u32 size, u32 mode);
+extern int __qcom_scm_ocmem_unlock(struct device *dev, u32 id, u32 offset,
+				   u32 size);
+
 #define QCOM_SCM_SVC_PIL		0x2
 #define QCOM_SCM_PAS_INIT_IMAGE_CMD	0x1
 #define QCOM_SCM_PAS_MEM_SETUP_CMD	0x2
@@ -91,10 +100,15 @@ extern int __qcom_scm_restore_sec_cfg(struct device *dev, u32 device_id,
 				      u32 spare);
 #define QCOM_SCM_IOMMU_SECURE_PTBL_SIZE	3
 #define QCOM_SCM_IOMMU_SECURE_PTBL_INIT	4
+#define QCOM_SCM_SVC_SMMU_PROGRAM	0x15
+#define QCOM_SCM_CONFIG_ERRATA1		0x3
+#define QCOM_SCM_CONFIG_ERRATA1_CLIENT_ALL	0x2
 extern int __qcom_scm_iommu_secure_ptbl_size(struct device *dev, u32 spare,
 					     size_t *size);
 extern int __qcom_scm_iommu_secure_ptbl_init(struct device *dev, u64 addr,
 					     u32 size, u32 spare);
+extern int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev,
+						bool enable);
 #define QCOM_MEM_PROT_ASSIGN_ID	0x16
 extern int  __qcom_scm_assign_mem(struct device *dev,
 				  phys_addr_t mem_region, size_t mem_sz,
diff --git a/drivers/firmware/stratix10-rsu.c b/drivers/firmware/stratix10-rsu.c
index bb008c019920..f8533338b018 100644
--- a/drivers/firmware/stratix10-rsu.c
+++ b/drivers/firmware/stratix10-rsu.c
@@ -20,7 +20,6 @@
 #define RSU_VERSION_MASK		GENMASK_ULL(63, 32)
 #define RSU_ERROR_LOCATION_MASK		GENMASK_ULL(31, 0)
 #define RSU_ERROR_DETAIL_MASK		GENMASK_ULL(63, 32)
-#define RSU_FW_VERSION_MASK		GENMASK_ULL(15, 0)
 
 #define RSU_TIMEOUT	(msecs_to_jiffies(SVC_RSU_REQUEST_TIMEOUT_MS))
 
@@ -109,9 +108,12 @@ static void rsu_command_callback(struct stratix10_svc_client *client,
 {
 	struct stratix10_rsu_priv *priv = client->priv;
 
-	if (data->status != BIT(SVC_STATUS_RSU_OK))
-		dev_err(client->dev, "RSU returned status is %i\n",
-			data->status);
+	if (data->status == BIT(SVC_STATUS_RSU_NO_SUPPORT))
+		dev_warn(client->dev, "Secure FW doesn't support notify\n");
+	else if (data->status == BIT(SVC_STATUS_RSU_ERROR))
+		dev_err(client->dev, "Failure, returned status is %lu\n",
+			BIT(data->status));
+
 	complete(&priv->completion);
 }
 
@@ -133,9 +135,11 @@ static void rsu_retry_callback(struct stratix10_svc_client *client,
 
 	if (data->status == BIT(SVC_STATUS_RSU_OK))
 		priv->retry_counter = *counter;
+	else if (data->status == BIT(SVC_STATUS_RSU_NO_SUPPORT))
+		dev_warn(client->dev, "Secure FW doesn't support retry\n");
 	else
-		dev_err(client->dev, "Failed to get retry counter %i\n",
-			data->status);
+		dev_err(client->dev, "Failed to get retry counter %lu\n",
+			BIT(data->status));
 
 	complete(&priv->completion);
 }
@@ -333,15 +337,10 @@ static ssize_t notify_store(struct device *dev,
 		return ret;
 	}
 
-	/* only 19.3 or late version FW supports retry counter feature */
-	if (FIELD_GET(RSU_FW_VERSION_MASK, priv->status.version)) {
-		ret = rsu_send_msg(priv, COMMAND_RSU_RETRY,
-				   0, rsu_retry_callback);
-		if (ret) {
-			dev_err(dev,
-				"Error, getting RSU retry %i\n", ret);
-			return ret;
-		}
+	ret = rsu_send_msg(priv, COMMAND_RSU_RETRY, 0, rsu_retry_callback);
+	if (ret) {
+		dev_err(dev, "Error, getting RSU retry %i\n", ret);
+		return ret;
 	}
 
 	return count;
@@ -413,15 +412,10 @@ static int stratix10_rsu_probe(struct platform_device *pdev)
 		stratix10_svc_free_channel(priv->chan);
 	}
 
-	/* only 19.3 or late version FW supports retry counter feature */
-	if (FIELD_GET(RSU_FW_VERSION_MASK, priv->status.version)) {
-		ret = rsu_send_msg(priv, COMMAND_RSU_RETRY, 0,
-				   rsu_retry_callback);
-		if (ret) {
-			dev_err(dev,
-				"Error, getting RSU retry %i\n", ret);
-			stratix10_svc_free_channel(priv->chan);
-		}
+	ret = rsu_send_msg(priv, COMMAND_RSU_RETRY, 0, rsu_retry_callback);
+	if (ret) {
+		dev_err(dev, "Error, getting RSU retry %i\n", ret);
+		stratix10_svc_free_channel(priv->chan);
 	}
 
 	return ret;
diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c
index b485321189e1..7ffb42b0775e 100644
--- a/drivers/firmware/stratix10-svc.c
+++ b/drivers/firmware/stratix10-svc.c
@@ -268,7 +268,7 @@ static void svc_thread_cmd_config_status(struct stratix10_svc_controller *ctrl,
 		 */
 		msleep(1000);
 		count_in_sec--;
-	};
+	}
 
 	if (res.a0 == INTEL_SIP_SMC_STATUS_OK && count_in_sec)
 		cb_data->status = BIT(SVC_STATUS_RECONFIG_COMPLETED);
@@ -493,10 +493,26 @@ static int svc_normal_to_secure_thread(void *data)
 			pdata->chan->scl->receive_cb(pdata->chan->scl, cbdata);
 			break;
 		default:
-			pr_warn("it shouldn't happen\n");
+			pr_warn("Secure firmware doesn't support...\n");
+
+			/*
+			 * be compatible with older version firmware which
+			 * doesn't support RSU notify or retry
+			 */
+			if ((pdata->command == COMMAND_RSU_RETRY) ||
+				(pdata->command == COMMAND_RSU_NOTIFY)) {
+				cbdata->status =
+					BIT(SVC_STATUS_RSU_NO_SUPPORT);
+				cbdata->kaddr1 = NULL;
+				cbdata->kaddr2 = NULL;
+				cbdata->kaddr3 = NULL;
+				pdata->chan->scl->receive_cb(
+					pdata->chan->scl, cbdata);
+			}
 			break;
+
 		}
-	};
+	}
 
 	kfree(cbdata);
 	kfree(pdata);
diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
index 19c56133234b..6741fcda0c37 100644
--- a/drivers/firmware/tegra/bpmp.c
+++ b/drivers/firmware/tegra/bpmp.c
@@ -804,7 +804,7 @@ static int __maybe_unused tegra_bpmp_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops tegra_bpmp_pm_ops = {
-	.resume_early = tegra_bpmp_resume,
+	.resume_noirq = tegra_bpmp_resume,
 };
 
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC) || \
diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index fd3d83745208..75bdfaa08380 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -711,8 +711,11 @@ static int zynqmp_firmware_probe(struct platform_device *pdev)
 	int ret;
 
 	np = of_find_compatible_node(NULL, NULL, "xlnx,zynqmp");
-	if (!np)
-		return 0;
+	if (!np) {
+		np = of_find_compatible_node(NULL, NULL, "xlnx,versal");
+		if (!np)
+			return 0;
+	}
 	of_node_put(np);
 
 	ret = get_set_conduit_method(dev->of_node);
@@ -770,6 +773,7 @@ static int zynqmp_firmware_remove(struct platform_device *pdev)
 
 static const struct of_device_id zynqmp_firmware_of_match[] = {
 	{.compatible = "xlnx,zynqmp-firmware"},
+	{.compatible = "xlnx,versal-firmware"},
 	{},
 };
 MODULE_DEVICE_TABLE(of, zynqmp_firmware_of_match);
diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
index 73c779e920ed..72380e1d31c7 100644
--- a/drivers/fpga/Kconfig
+++ b/drivers/fpga/Kconfig
@@ -156,7 +156,7 @@ config FPGA_DFL
 
 config FPGA_DFL_FME
 	tristate "FPGA DFL FME Driver"
-	depends on FPGA_DFL
+	depends on FPGA_DFL && HWMON
 	help
 	  The FPGA Management Engine (FME) is a feature device implemented
 	  under Device Feature List (DFL) framework. Select this option to
diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
index e4a34dc7947f..65437b6a6842 100644
--- a/drivers/fpga/dfl-afu-main.c
+++ b/drivers/fpga/dfl-afu-main.c
@@ -813,10 +813,8 @@ static int afu_dev_init(struct platform_device *pdev)
 static int afu_dev_destroy(struct platform_device *pdev)
 {
 	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
-	struct dfl_afu *afu;
 
 	mutex_lock(&pdata->lock);
-	afu = dfl_fpga_pdata_get_private(pdata);
 	afu_mmio_region_destroy(pdata);
 	afu_dma_region_destroy(pdata);
 	dfl_fpga_pdata_set_private(pdata, NULL);
diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c
index 4d78e182878f..1d4690c99268 100644
--- a/drivers/fpga/dfl-fme-main.c
+++ b/drivers/fpga/dfl-fme-main.c
@@ -14,6 +14,8 @@
  *   Henry Mitchel <henry.mitchel@intel.com>
  */
 
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
@@ -181,6 +183,381 @@ static const struct dfl_feature_ops fme_hdr_ops = {
 	.ioctl = fme_hdr_ioctl,
 };
 
+#define FME_THERM_THRESHOLD	0x8
+#define TEMP_THRESHOLD1		GENMASK_ULL(6, 0)
+#define TEMP_THRESHOLD1_EN	BIT_ULL(7)
+#define TEMP_THRESHOLD2		GENMASK_ULL(14, 8)
+#define TEMP_THRESHOLD2_EN	BIT_ULL(15)
+#define TRIP_THRESHOLD		GENMASK_ULL(30, 24)
+#define TEMP_THRESHOLD1_STATUS	BIT_ULL(32)		/* threshold1 reached */
+#define TEMP_THRESHOLD2_STATUS	BIT_ULL(33)		/* threshold2 reached */
+/* threshold1 policy: 0 - AP2 (90% throttle) / 1 - AP1 (50% throttle) */
+#define TEMP_THRESHOLD1_POLICY	BIT_ULL(44)
+
+#define FME_THERM_RDSENSOR_FMT1	0x10
+#define FPGA_TEMPERATURE	GENMASK_ULL(6, 0)
+
+#define FME_THERM_CAP		0x20
+#define THERM_NO_THROTTLE	BIT_ULL(0)
+
+#define MD_PRE_DEG
+
+static bool fme_thermal_throttle_support(void __iomem *base)
+{
+	u64 v = readq(base + FME_THERM_CAP);
+
+	return FIELD_GET(THERM_NO_THROTTLE, v) ? false : true;
+}
+
+static umode_t thermal_hwmon_attrs_visible(const void *drvdata,
+					   enum hwmon_sensor_types type,
+					   u32 attr, int channel)
+{
+	const struct dfl_feature *feature = drvdata;
+
+	/* temperature is always supported, and check hardware cap for others */
+	if (attr == hwmon_temp_input)
+		return 0444;
+
+	return fme_thermal_throttle_support(feature->ioaddr) ? 0444 : 0;
+}
+
+static int thermal_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			      u32 attr, int channel, long *val)
+{
+	struct dfl_feature *feature = dev_get_drvdata(dev);
+	u64 v;
+
+	switch (attr) {
+	case hwmon_temp_input:
+		v = readq(feature->ioaddr + FME_THERM_RDSENSOR_FMT1);
+		*val = (long)(FIELD_GET(FPGA_TEMPERATURE, v) * 1000);
+		break;
+	case hwmon_temp_max:
+		v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+		*val = (long)(FIELD_GET(TEMP_THRESHOLD1, v) * 1000);
+		break;
+	case hwmon_temp_crit:
+		v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+		*val = (long)(FIELD_GET(TEMP_THRESHOLD2, v) * 1000);
+		break;
+	case hwmon_temp_emergency:
+		v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+		*val = (long)(FIELD_GET(TRIP_THRESHOLD, v) * 1000);
+		break;
+	case hwmon_temp_max_alarm:
+		v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+		*val = (long)FIELD_GET(TEMP_THRESHOLD1_STATUS, v);
+		break;
+	case hwmon_temp_crit_alarm:
+		v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+		*val = (long)FIELD_GET(TEMP_THRESHOLD2_STATUS, v);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static const struct hwmon_ops thermal_hwmon_ops = {
+	.is_visible = thermal_hwmon_attrs_visible,
+	.read = thermal_hwmon_read,
+};
+
+static const struct hwmon_channel_info *thermal_hwmon_info[] = {
+	HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_EMERGENCY |
+				 HWMON_T_MAX   | HWMON_T_MAX_ALARM |
+				 HWMON_T_CRIT  | HWMON_T_CRIT_ALARM),
+	NULL
+};
+
+static const struct hwmon_chip_info thermal_hwmon_chip_info = {
+	.ops = &thermal_hwmon_ops,
+	.info = thermal_hwmon_info,
+};
+
+static ssize_t temp1_max_policy_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct dfl_feature *feature = dev_get_drvdata(dev);
+	u64 v;
+
+	v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+
+	return sprintf(buf, "%u\n",
+		       (unsigned int)FIELD_GET(TEMP_THRESHOLD1_POLICY, v));
+}
+
+static DEVICE_ATTR_RO(temp1_max_policy);
+
+static struct attribute *thermal_extra_attrs[] = {
+	&dev_attr_temp1_max_policy.attr,
+	NULL,
+};
+
+static umode_t thermal_extra_attrs_visible(struct kobject *kobj,
+					   struct attribute *attr, int index)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct dfl_feature *feature = dev_get_drvdata(dev);
+
+	return fme_thermal_throttle_support(feature->ioaddr) ? attr->mode : 0;
+}
+
+static const struct attribute_group thermal_extra_group = {
+	.attrs		= thermal_extra_attrs,
+	.is_visible	= thermal_extra_attrs_visible,
+};
+__ATTRIBUTE_GROUPS(thermal_extra);
+
+static int fme_thermal_mgmt_init(struct platform_device *pdev,
+				 struct dfl_feature *feature)
+{
+	struct device *hwmon;
+
+	/*
+	 * create hwmon to allow userspace monitoring temperature and other
+	 * threshold information.
+	 *
+	 * temp1_input      -> FPGA device temperature
+	 * temp1_max        -> hardware threshold 1 -> 50% or 90% throttling
+	 * temp1_crit       -> hardware threshold 2 -> 100% throttling
+	 * temp1_emergency  -> hardware trip_threshold to shutdown FPGA
+	 * temp1_max_alarm  -> hardware threshold 1 alarm
+	 * temp1_crit_alarm -> hardware threshold 2 alarm
+	 *
+	 * create device specific sysfs interfaces, e.g. read temp1_max_policy
+	 * to understand the actual hardware throttling action (50% vs 90%).
+	 *
+	 * If hardware doesn't support automatic throttling per thresholds,
+	 * then all above sysfs interfaces are not visible except temp1_input
+	 * for temperature.
+	 */
+	hwmon = devm_hwmon_device_register_with_info(&pdev->dev,
+						     "dfl_fme_thermal", feature,
+						     &thermal_hwmon_chip_info,
+						     thermal_extra_groups);
+	if (IS_ERR(hwmon)) {
+		dev_err(&pdev->dev, "Fail to register thermal hwmon\n");
+		return PTR_ERR(hwmon);
+	}
+
+	return 0;
+}
+
+static const struct dfl_feature_id fme_thermal_mgmt_id_table[] = {
+	{.id = FME_FEATURE_ID_THERMAL_MGMT,},
+	{0,}
+};
+
+static const struct dfl_feature_ops fme_thermal_mgmt_ops = {
+	.init = fme_thermal_mgmt_init,
+};
+
+#define FME_PWR_STATUS		0x8
+#define FME_LATENCY_TOLERANCE	BIT_ULL(18)
+#define PWR_CONSUMED		GENMASK_ULL(17, 0)
+
+#define FME_PWR_THRESHOLD	0x10
+#define PWR_THRESHOLD1		GENMASK_ULL(6, 0)	/* in Watts */
+#define PWR_THRESHOLD2		GENMASK_ULL(14, 8)	/* in Watts */
+#define PWR_THRESHOLD_MAX	0x7f			/* in Watts */
+#define PWR_THRESHOLD1_STATUS	BIT_ULL(16)
+#define PWR_THRESHOLD2_STATUS	BIT_ULL(17)
+
+#define FME_PWR_XEON_LIMIT	0x18
+#define XEON_PWR_LIMIT		GENMASK_ULL(14, 0)	/* in 0.1 Watts */
+#define XEON_PWR_EN		BIT_ULL(15)
+#define FME_PWR_FPGA_LIMIT	0x20
+#define FPGA_PWR_LIMIT		GENMASK_ULL(14, 0)	/* in 0.1 Watts */
+#define FPGA_PWR_EN		BIT_ULL(15)
+
+static int power_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			    u32 attr, int channel, long *val)
+{
+	struct dfl_feature *feature = dev_get_drvdata(dev);
+	u64 v;
+
+	switch (attr) {
+	case hwmon_power_input:
+		v = readq(feature->ioaddr + FME_PWR_STATUS);
+		*val = (long)(FIELD_GET(PWR_CONSUMED, v) * 1000000);
+		break;
+	case hwmon_power_max:
+		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+		*val = (long)(FIELD_GET(PWR_THRESHOLD1, v) * 1000000);
+		break;
+	case hwmon_power_crit:
+		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+		*val = (long)(FIELD_GET(PWR_THRESHOLD2, v) * 1000000);
+		break;
+	case hwmon_power_max_alarm:
+		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+		*val = (long)FIELD_GET(PWR_THRESHOLD1_STATUS, v);
+		break;
+	case hwmon_power_crit_alarm:
+		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+		*val = (long)FIELD_GET(PWR_THRESHOLD2_STATUS, v);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int power_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
+			     u32 attr, int channel, long val)
+{
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent);
+	struct dfl_feature *feature = dev_get_drvdata(dev);
+	int ret = 0;
+	u64 v;
+
+	val = clamp_val(val / 1000000, 0, PWR_THRESHOLD_MAX);
+
+	mutex_lock(&pdata->lock);
+
+	switch (attr) {
+	case hwmon_power_max:
+		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+		v &= ~PWR_THRESHOLD1;
+		v |= FIELD_PREP(PWR_THRESHOLD1, val);
+		writeq(v, feature->ioaddr + FME_PWR_THRESHOLD);
+		break;
+	case hwmon_power_crit:
+		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+		v &= ~PWR_THRESHOLD2;
+		v |= FIELD_PREP(PWR_THRESHOLD2, val);
+		writeq(v, feature->ioaddr + FME_PWR_THRESHOLD);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	mutex_unlock(&pdata->lock);
+
+	return ret;
+}
+
+static umode_t power_hwmon_attrs_visible(const void *drvdata,
+					 enum hwmon_sensor_types type,
+					 u32 attr, int channel)
+{
+	switch (attr) {
+	case hwmon_power_input:
+	case hwmon_power_max_alarm:
+	case hwmon_power_crit_alarm:
+		return 0444;
+	case hwmon_power_max:
+	case hwmon_power_crit:
+		return 0644;
+	}
+
+	return 0;
+}
+
+static const struct hwmon_ops power_hwmon_ops = {
+	.is_visible = power_hwmon_attrs_visible,
+	.read = power_hwmon_read,
+	.write = power_hwmon_write,
+};
+
+static const struct hwmon_channel_info *power_hwmon_info[] = {
+	HWMON_CHANNEL_INFO(power, HWMON_P_INPUT |
+				  HWMON_P_MAX   | HWMON_P_MAX_ALARM |
+				  HWMON_P_CRIT  | HWMON_P_CRIT_ALARM),
+	NULL
+};
+
+static const struct hwmon_chip_info power_hwmon_chip_info = {
+	.ops = &power_hwmon_ops,
+	.info = power_hwmon_info,
+};
+
+static ssize_t power1_xeon_limit_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct dfl_feature *feature = dev_get_drvdata(dev);
+	u16 xeon_limit = 0;
+	u64 v;
+
+	v = readq(feature->ioaddr + FME_PWR_XEON_LIMIT);
+
+	if (FIELD_GET(XEON_PWR_EN, v))
+		xeon_limit = FIELD_GET(XEON_PWR_LIMIT, v);
+
+	return sprintf(buf, "%u\n", xeon_limit * 100000);
+}
+
+static ssize_t power1_fpga_limit_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct dfl_feature *feature = dev_get_drvdata(dev);
+	u16 fpga_limit = 0;
+	u64 v;
+
+	v = readq(feature->ioaddr + FME_PWR_FPGA_LIMIT);
+
+	if (FIELD_GET(FPGA_PWR_EN, v))
+		fpga_limit = FIELD_GET(FPGA_PWR_LIMIT, v);
+
+	return sprintf(buf, "%u\n", fpga_limit * 100000);
+}
+
+static ssize_t power1_ltr_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct dfl_feature *feature = dev_get_drvdata(dev);
+	u64 v;
+
+	v = readq(feature->ioaddr + FME_PWR_STATUS);
+
+	return sprintf(buf, "%u\n",
+		       (unsigned int)FIELD_GET(FME_LATENCY_TOLERANCE, v));
+}
+
+static DEVICE_ATTR_RO(power1_xeon_limit);
+static DEVICE_ATTR_RO(power1_fpga_limit);
+static DEVICE_ATTR_RO(power1_ltr);
+
+static struct attribute *power_extra_attrs[] = {
+	&dev_attr_power1_xeon_limit.attr,
+	&dev_attr_power1_fpga_limit.attr,
+	&dev_attr_power1_ltr.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(power_extra);
+
+static int fme_power_mgmt_init(struct platform_device *pdev,
+			       struct dfl_feature *feature)
+{
+	struct device *hwmon;
+
+	hwmon = devm_hwmon_device_register_with_info(&pdev->dev,
+						     "dfl_fme_power", feature,
+						     &power_hwmon_chip_info,
+						     power_extra_groups);
+	if (IS_ERR(hwmon)) {
+		dev_err(&pdev->dev, "Fail to register power hwmon\n");
+		return PTR_ERR(hwmon);
+	}
+
+	return 0;
+}
+
+static const struct dfl_feature_id fme_power_mgmt_id_table[] = {
+	{.id = FME_FEATURE_ID_POWER_MGMT,},
+	{0,}
+};
+
+static const struct dfl_feature_ops fme_power_mgmt_ops = {
+	.init = fme_power_mgmt_init,
+};
+
 static struct dfl_feature_driver fme_feature_drvs[] = {
 	{
 		.id_table = fme_hdr_id_table,
@@ -195,6 +572,14 @@ static struct dfl_feature_driver fme_feature_drvs[] = {
 		.ops = &fme_global_err_ops,
 	},
 	{
+		.id_table = fme_thermal_mgmt_id_table,
+		.ops = &fme_thermal_mgmt_ops,
+	},
+	{
+		.id_table = fme_power_mgmt_id_table,
+		.ops = &fme_power_mgmt_ops,
+	},
+	{
 		.ops = NULL,
 	},
 };
@@ -290,10 +675,8 @@ static int fme_dev_init(struct platform_device *pdev)
 static void fme_dev_destroy(struct platform_device *pdev)
 {
 	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
-	struct dfl_fme *fme;
 
 	mutex_lock(&pdata->lock);
-	fme = dfl_fpga_pdata_get_private(pdata);
 	dfl_fpga_pdata_set_private(pdata, NULL);
 	mutex_unlock(&pdata->lock);
 }
diff --git a/drivers/fpga/ts73xx-fpga.c b/drivers/fpga/ts73xx-fpga.c
index 9a17fe98c1b0..2888ff000e4d 100644
--- a/drivers/fpga/ts73xx-fpga.c
+++ b/drivers/fpga/ts73xx-fpga.c
@@ -119,10 +119,8 @@ static int ts73xx_fpga_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	priv->io_base = devm_ioremap_resource(kdev, res);
-	if (IS_ERR(priv->io_base)) {
-		dev_err(kdev, "unable to remap registers\n");
+	if (IS_ERR(priv->io_base))
 		return PTR_ERR(priv->io_base);
-	}
 
 	mgr = devm_fpga_mgr_create(kdev, "TS-73xx FPGA Manager",
 				   &ts73xx_fpga_ops, priv);
diff --git a/drivers/fpga/xilinx-pr-decoupler.c b/drivers/fpga/xilinx-pr-decoupler.c
index af9b387c56d3..7d69af230567 100644
--- a/drivers/fpga/xilinx-pr-decoupler.c
+++ b/drivers/fpga/xilinx-pr-decoupler.c
@@ -101,7 +101,8 @@ static int xlnx_pr_decoupler_probe(struct platform_device *pdev)
 
 	priv->clk = devm_clk_get(&pdev->dev, "aclk");
 	if (IS_ERR(priv->clk)) {
-		dev_err(&pdev->dev, "input clock not found\n");
+		if (PTR_ERR(priv->clk) != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "input clock not found\n");
 		return PTR_ERR(priv->clk);
 	}
 
diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index 31ef38e38537..ee7765049607 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -578,10 +578,8 @@ static int zynq_fpga_probe(struct platform_device *pdev)
 	init_completion(&priv->dma_done);
 
 	priv->irq = platform_get_irq(pdev, 0);
-	if (priv->irq < 0) {
-		dev_err(dev, "No IRQ available\n");
+	if (priv->irq < 0)
 		return priv->irq;
-	}
 
 	priv->clk = devm_clk_get(dev, "ref_clk");
 	if (IS_ERR(priv->clk)) {
diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig
index c612db7a914a..92ce6d85802c 100644
--- a/drivers/fsi/Kconfig
+++ b/drivers/fsi/Kconfig
@@ -53,6 +53,14 @@ config FSI_MASTER_AST_CF
 	lines driven by the internal ColdFire coprocessor. This requires
 	the corresponding machine specific ColdFire firmware to be available.
 
+config FSI_MASTER_ASPEED
+	tristate "FSI ASPEED master"
+	help
+	 This option enables a FSI master that is present behind an OPB bridge
+	 in the AST2600.
+
+	 Enable it for your BMC kernel in an OpenPower or IBM Power system.
+
 config FSI_SCOM
 	tristate "SCOM FSI client device driver"
 	---help---
diff --git a/drivers/fsi/Makefile b/drivers/fsi/Makefile
index e4a2ff043c32..da218a1ad8e1 100644
--- a/drivers/fsi/Makefile
+++ b/drivers/fsi/Makefile
@@ -2,6 +2,7 @@
 
 obj-$(CONFIG_FSI) += fsi-core.o
 obj-$(CONFIG_FSI_MASTER_HUB) += fsi-master-hub.o
+obj-$(CONFIG_FSI_MASTER_ASPEED) += fsi-master-aspeed.o
 obj-$(CONFIG_FSI_MASTER_GPIO) += fsi-master-gpio.o
 obj-$(CONFIG_FSI_MASTER_AST_CF) += fsi-master-ast-cf.o
 obj-$(CONFIG_FSI_SCOM) += fsi-scom.o
diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c
index 1f76740f33b6..8244da8a7241 100644
--- a/drivers/fsi/fsi-core.c
+++ b/drivers/fsi/fsi-core.c
@@ -544,6 +544,31 @@ static int fsi_slave_scan(struct fsi_slave *slave)
 	return 0;
 }
 
+static unsigned long aligned_access_size(size_t offset, size_t count)
+{
+	unsigned long offset_unit, count_unit;
+
+	/* Criteria:
+	 *
+	 * 1. Access size must be less than or equal to the maximum access
+	 *    width or the highest power-of-two factor of offset
+	 * 2. Access size must be less than or equal to the amount specified by
+	 *    count
+	 *
+	 * The access width is optimal if we can calculate 1 to be strictly
+	 * equal while still satisfying 2.
+	 */
+
+	/* Find 1 by the bottom bit of offset (with a 4 byte access cap) */
+	offset_unit = BIT(__builtin_ctzl(offset | 4));
+
+	/* Find 2 by the top bit of count */
+	count_unit = BIT(8 * sizeof(unsigned long) - 1 - __builtin_clzl(count));
+
+	/* Constrain the maximum access width to the minimum of both criteria */
+	return BIT(__builtin_ctzl(offset_unit | count_unit));
+}
+
 static ssize_t fsi_slave_sysfs_raw_read(struct file *file,
 		struct kobject *kobj, struct bin_attribute *attr, char *buf,
 		loff_t off, size_t count)
@@ -559,8 +584,7 @@ static ssize_t fsi_slave_sysfs_raw_read(struct file *file,
 		return -EINVAL;
 
 	for (total_len = 0; total_len < count; total_len += read_len) {
-		read_len = min_t(size_t, count, 4);
-		read_len -= off & 0x3;
+		read_len = aligned_access_size(off, count - total_len);
 
 		rc = fsi_slave_read(slave, off, buf + total_len, read_len);
 		if (rc)
@@ -587,8 +611,7 @@ static ssize_t fsi_slave_sysfs_raw_write(struct file *file,
 		return -EINVAL;
 
 	for (total_len = 0; total_len < count; total_len += write_len) {
-		write_len = min_t(size_t, count, 4);
-		write_len -= off & 0x3;
+		write_len = aligned_access_size(off, count - total_len);
 
 		rc = fsi_slave_write(slave, off, buf + total_len, write_len);
 		if (rc)
@@ -1241,6 +1264,19 @@ static ssize_t master_break_store(struct device *dev,
 
 static DEVICE_ATTR(break, 0200, NULL, master_break_store);
 
+static struct attribute *master_attrs[] = {
+	&dev_attr_break.attr,
+	&dev_attr_rescan.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(master);
+
+static struct class fsi_master_class = {
+	.name = "fsi-master",
+	.dev_groups = master_groups,
+};
+
 int fsi_master_register(struct fsi_master *master)
 {
 	int rc;
@@ -1249,6 +1285,7 @@ int fsi_master_register(struct fsi_master *master)
 	mutex_init(&master->scan_lock);
 	master->idx = ida_simple_get(&master_ida, 0, INT_MAX, GFP_KERNEL);
 	dev_set_name(&master->dev, "fsi%d", master->idx);
+	master->dev.class = &fsi_master_class;
 
 	rc = device_register(&master->dev);
 	if (rc) {
@@ -1256,20 +1293,6 @@ int fsi_master_register(struct fsi_master *master)
 		return rc;
 	}
 
-	rc = device_create_file(&master->dev, &dev_attr_rescan);
-	if (rc) {
-		device_del(&master->dev);
-		ida_simple_remove(&master_ida, master->idx);
-		return rc;
-	}
-
-	rc = device_create_file(&master->dev, &dev_attr_break);
-	if (rc) {
-		device_del(&master->dev);
-		ida_simple_remove(&master_ida, master->idx);
-		return rc;
-	}
-
 	np = dev_of_node(&master->dev);
 	if (!of_property_read_bool(np, "no-scan-on-init")) {
 		mutex_lock(&master->scan_lock);
@@ -1350,8 +1373,15 @@ static int __init fsi_init(void)
 	rc = bus_register(&fsi_bus_type);
 	if (rc)
 		goto fail_bus;
+
+	rc = class_register(&fsi_master_class);
+	if (rc)
+		goto fail_class;
+
 	return 0;
 
+ fail_class:
+	bus_unregister(&fsi_bus_type);
  fail_bus:
 	unregister_chrdev_region(fsi_base_dev, FSI_CHAR_MAX_DEVICES);
 	return rc;
@@ -1360,6 +1390,7 @@ postcore_initcall(fsi_init);
 
 static void fsi_exit(void)
 {
+	class_unregister(&fsi_master_class);
 	bus_unregister(&fsi_bus_type);
 	unregister_chrdev_region(fsi_base_dev, FSI_CHAR_MAX_DEVICES);
 	ida_destroy(&fsi_minor_ida);
diff --git a/drivers/fsi/fsi-master-aspeed.c b/drivers/fsi/fsi-master-aspeed.c
new file mode 100644
index 000000000000..f49742b310c2
--- /dev/null
+++ b/drivers/fsi/fsi-master-aspeed.c
@@ -0,0 +1,544 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (C) IBM Corporation 2018
+// FSI master driver for AST2600
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/fsi.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/iopoll.h>
+
+#include "fsi-master.h"
+
+struct fsi_master_aspeed {
+	struct fsi_master	master;
+	struct device		*dev;
+	void __iomem		*base;
+	struct clk		*clk;
+};
+
+#define to_fsi_master_aspeed(m) \
+	container_of(m, struct fsi_master_aspeed, master)
+
+/* Control register (size 0x400) */
+static const u32 ctrl_base = 0x80000000;
+
+static const u32 fsi_base = 0xa0000000;
+
+#define OPB_FSI_VER	0x00
+#define OPB_TRIGGER	0x04
+#define OPB_CTRL_BASE	0x08
+#define OPB_FSI_BASE	0x0c
+#define OPB_CLK_SYNC	0x3c
+#define OPB_IRQ_CLEAR	0x40
+#define OPB_IRQ_MASK	0x44
+#define OPB_IRQ_STATUS	0x48
+
+#define OPB0_SELECT	0x10
+#define OPB0_RW		0x14
+#define OPB0_XFER_SIZE	0x18
+#define OPB0_FSI_ADDR	0x1c
+#define OPB0_FSI_DATA_W	0x20
+#define OPB0_STATUS	0x80
+#define OPB0_FSI_DATA_R	0x84
+
+#define OPB0_WRITE_ORDER1	0x4c
+#define OPB0_WRITE_ORDER2	0x50
+#define OPB1_WRITE_ORDER1	0x54
+#define OPB1_WRITE_ORDER2	0x58
+#define OPB0_READ_ORDER1	0x5c
+#define OPB1_READ_ORDER2	0x60
+
+#define OPB_RETRY_COUNTER	0x64
+
+/* OPBn_STATUS */
+#define STATUS_HALFWORD_ACK	BIT(0)
+#define STATUS_FULLWORD_ACK	BIT(1)
+#define STATUS_ERR_ACK		BIT(2)
+#define STATUS_RETRY		BIT(3)
+#define STATUS_TIMEOUT		BIT(4)
+
+/* OPB_IRQ_MASK */
+#define OPB1_XFER_ACK_EN BIT(17)
+#define OPB0_XFER_ACK_EN BIT(16)
+
+/* OPB_RW */
+#define CMD_READ	BIT(0)
+#define CMD_WRITE	0
+
+/* OPBx_XFER_SIZE */
+#define XFER_FULLWORD	(BIT(1) | BIT(0))
+#define XFER_HALFWORD	(BIT(0))
+#define XFER_BYTE	(0)
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/fsi_master_aspeed.h>
+
+#define FSI_LINK_ENABLE_SETUP_TIME	10	/* in mS */
+
+#define DEFAULT_DIVISOR			14
+#define OPB_POLL_TIMEOUT		10000
+
+static int __opb_write(struct fsi_master_aspeed *aspeed, u32 addr,
+		       u32 val, u32 transfer_size)
+{
+	void __iomem *base = aspeed->base;
+	u32 reg, status;
+	int ret;
+
+	writel(CMD_WRITE, base + OPB0_RW);
+	writel(transfer_size, base + OPB0_XFER_SIZE);
+	writel(addr, base + OPB0_FSI_ADDR);
+	writel(val, base + OPB0_FSI_DATA_W);
+	writel(0x1, base + OPB_IRQ_CLEAR);
+	writel(0x1, base + OPB_TRIGGER);
+
+	ret = readl_poll_timeout(base + OPB_IRQ_STATUS, reg,
+				(reg & OPB0_XFER_ACK_EN) != 0,
+				0, OPB_POLL_TIMEOUT);
+
+	status = readl(base + OPB0_STATUS);
+
+	trace_fsi_master_aspeed_opb_write(addr, val, transfer_size, status, reg);
+
+	/* Return error when poll timed out */
+	if (ret)
+		return ret;
+
+	/* Command failed, master will reset */
+	if (status & STATUS_ERR_ACK)
+		return -EIO;
+
+	return 0;
+}
+
+static int opb_writeb(struct fsi_master_aspeed *aspeed, u32 addr, u8 val)
+{
+	return __opb_write(aspeed, addr, val, XFER_BYTE);
+}
+
+static int opb_writew(struct fsi_master_aspeed *aspeed, u32 addr, __be16 val)
+{
+	return __opb_write(aspeed, addr, (__force u16)val, XFER_HALFWORD);
+}
+
+static int opb_writel(struct fsi_master_aspeed *aspeed, u32 addr, __be32 val)
+{
+	return __opb_write(aspeed, addr, (__force u32)val, XFER_FULLWORD);
+}
+
+static int __opb_read(struct fsi_master_aspeed *aspeed, uint32_t addr,
+		      u32 transfer_size, void *out)
+{
+	void __iomem *base = aspeed->base;
+	u32 result, reg;
+	int status, ret;
+
+	writel(CMD_READ, base + OPB0_RW);
+	writel(transfer_size, base + OPB0_XFER_SIZE);
+	writel(addr, base + OPB0_FSI_ADDR);
+	writel(0x1, base + OPB_IRQ_CLEAR);
+	writel(0x1, base + OPB_TRIGGER);
+
+	ret = readl_poll_timeout(base + OPB_IRQ_STATUS, reg,
+			   (reg & OPB0_XFER_ACK_EN) != 0,
+			   0, OPB_POLL_TIMEOUT);
+
+	status = readl(base + OPB0_STATUS);
+
+	result = readl(base + OPB0_FSI_DATA_R);
+
+	trace_fsi_master_aspeed_opb_read(addr, transfer_size, result,
+			readl(base + OPB0_STATUS),
+			reg);
+
+	/* Return error when poll timed out */
+	if (ret)
+		return ret;
+
+	/* Command failed, master will reset */
+	if (status & STATUS_ERR_ACK)
+		return -EIO;
+
+	if (out) {
+		switch (transfer_size) {
+		case XFER_BYTE:
+			*(u8 *)out = result;
+			break;
+		case XFER_HALFWORD:
+			*(u16 *)out = result;
+			break;
+		case XFER_FULLWORD:
+			*(u32 *)out = result;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+	}
+
+	return 0;
+}
+
+static int opb_readl(struct fsi_master_aspeed *aspeed, uint32_t addr, __be32 *out)
+{
+	return __opb_read(aspeed, addr, XFER_FULLWORD, out);
+}
+
+static int opb_readw(struct fsi_master_aspeed *aspeed, uint32_t addr, __be16 *out)
+{
+	return __opb_read(aspeed, addr, XFER_HALFWORD, (void *)out);
+}
+
+static int opb_readb(struct fsi_master_aspeed *aspeed, uint32_t addr, u8 *out)
+{
+	return __opb_read(aspeed, addr, XFER_BYTE, (void *)out);
+}
+
+static int check_errors(struct fsi_master_aspeed *aspeed, int err)
+{
+	int ret;
+
+	if (trace_fsi_master_aspeed_opb_error_enabled()) {
+		__be32 mresp0, mstap0, mesrb0;
+
+		opb_readl(aspeed, ctrl_base + FSI_MRESP0, &mresp0);
+		opb_readl(aspeed, ctrl_base + FSI_MSTAP0, &mstap0);
+		opb_readl(aspeed, ctrl_base + FSI_MESRB0, &mesrb0);
+
+		trace_fsi_master_aspeed_opb_error(
+				be32_to_cpu(mresp0),
+				be32_to_cpu(mstap0),
+				be32_to_cpu(mesrb0));
+	}
+
+	if (err == -EIO) {
+		/* Check MAEB (0x70) ? */
+
+		/* Then clear errors in master */
+		ret = opb_writel(aspeed, ctrl_base + FSI_MRESP0,
+				cpu_to_be32(FSI_MRESP_RST_ALL_MASTER));
+		if (ret) {
+			/* TODO: log? return different code? */
+			return ret;
+		}
+		/* TODO: confirm that 0x70 was okay */
+	}
+
+	/* This will pass through timeout errors */
+	return err;
+}
+
+static int aspeed_master_read(struct fsi_master *master, int link,
+			uint8_t id, uint32_t addr, void *val, size_t size)
+{
+	struct fsi_master_aspeed *aspeed = to_fsi_master_aspeed(master);
+	int ret;
+
+	if (id != 0)
+		return -EINVAL;
+
+	addr += link * FSI_HUB_LINK_SIZE;
+
+	switch (size) {
+	case 1:
+		ret = opb_readb(aspeed, fsi_base + addr, val);
+		break;
+	case 2:
+		ret = opb_readw(aspeed, fsi_base + addr, val);
+		break;
+	case 4:
+		ret = opb_readl(aspeed, fsi_base + addr, val);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = check_errors(aspeed, ret);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int aspeed_master_write(struct fsi_master *master, int link,
+			uint8_t id, uint32_t addr, const void *val, size_t size)
+{
+	struct fsi_master_aspeed *aspeed = to_fsi_master_aspeed(master);
+	int ret;
+
+	if (id != 0)
+		return -EINVAL;
+
+	addr += link * FSI_HUB_LINK_SIZE;
+
+	switch (size) {
+	case 1:
+		ret = opb_writeb(aspeed, fsi_base + addr, *(u8 *)val);
+		break;
+	case 2:
+		ret = opb_writew(aspeed, fsi_base + addr, *(__be16 *)val);
+		break;
+	case 4:
+		ret = opb_writel(aspeed, fsi_base + addr, *(__be32 *)val);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = check_errors(aspeed, ret);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int aspeed_master_link_enable(struct fsi_master *master, int link)
+{
+	struct fsi_master_aspeed *aspeed = to_fsi_master_aspeed(master);
+	int idx, bit, ret;
+	__be32 reg, result;
+
+	idx = link / 32;
+	bit = link % 32;
+
+	reg = cpu_to_be32(0x80000000 >> bit);
+
+	ret = opb_writel(aspeed, ctrl_base + FSI_MSENP0 + (4 * idx), reg);
+	if (ret)
+		return ret;
+
+	mdelay(FSI_LINK_ENABLE_SETUP_TIME);
+
+	ret = opb_readl(aspeed, ctrl_base + FSI_MENP0 + (4 * idx), &result);
+	if (ret)
+		return ret;
+
+	if (result != reg) {
+		dev_err(aspeed->dev, "%s failed: %08x\n", __func__, result);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int aspeed_master_term(struct fsi_master *master, int link, uint8_t id)
+{
+	uint32_t addr;
+	__be32 cmd;
+
+	addr = 0x4;
+	cmd = cpu_to_be32(0xecc00000);
+
+	return aspeed_master_write(master, link, id, addr, &cmd, 4);
+}
+
+static int aspeed_master_break(struct fsi_master *master, int link)
+{
+	uint32_t addr;
+	__be32 cmd;
+
+	addr = 0x0;
+	cmd = cpu_to_be32(0xc0de0000);
+
+	return aspeed_master_write(master, link, 0, addr, &cmd, 4);
+}
+
+static void aspeed_master_release(struct device *dev)
+{
+	struct fsi_master_aspeed *aspeed =
+		to_fsi_master_aspeed(dev_to_fsi_master(dev));
+
+	kfree(aspeed);
+}
+
+/* mmode encoders */
+static inline u32 fsi_mmode_crs0(u32 x)
+{
+	return (x & FSI_MMODE_CRS0MASK) << FSI_MMODE_CRS0SHFT;
+}
+
+static inline u32 fsi_mmode_crs1(u32 x)
+{
+	return (x & FSI_MMODE_CRS1MASK) << FSI_MMODE_CRS1SHFT;
+}
+
+static int aspeed_master_init(struct fsi_master_aspeed *aspeed)
+{
+	__be32 reg;
+
+	reg = cpu_to_be32(FSI_MRESP_RST_ALL_MASTER | FSI_MRESP_RST_ALL_LINK
+			| FSI_MRESP_RST_MCR | FSI_MRESP_RST_PYE);
+	opb_writel(aspeed, ctrl_base + FSI_MRESP0, reg);
+
+	/* Initialize the MFSI (hub master) engine */
+	reg = cpu_to_be32(FSI_MRESP_RST_ALL_MASTER | FSI_MRESP_RST_ALL_LINK
+			| FSI_MRESP_RST_MCR | FSI_MRESP_RST_PYE);
+	opb_writel(aspeed, ctrl_base + FSI_MRESP0, reg);
+
+	reg = cpu_to_be32(FSI_MECTRL_EOAE | FSI_MECTRL_P8_AUTO_TERM);
+	opb_writel(aspeed, ctrl_base + FSI_MECTRL, reg);
+
+	reg = cpu_to_be32(FSI_MMODE_ECRC | FSI_MMODE_EPC | FSI_MMODE_RELA
+			| fsi_mmode_crs0(DEFAULT_DIVISOR)
+			| fsi_mmode_crs1(DEFAULT_DIVISOR)
+			| FSI_MMODE_P8_TO_LSB);
+	opb_writel(aspeed, ctrl_base + FSI_MMODE, reg);
+
+	reg = cpu_to_be32(0xffff0000);
+	opb_writel(aspeed, ctrl_base + FSI_MDLYR, reg);
+
+	reg = cpu_to_be32(~0);
+	opb_writel(aspeed, ctrl_base + FSI_MSENP0, reg);
+
+	/* Leave enabled long enough for master logic to set up */
+	mdelay(FSI_LINK_ENABLE_SETUP_TIME);
+
+	opb_writel(aspeed, ctrl_base + FSI_MCENP0, reg);
+
+	opb_readl(aspeed, ctrl_base + FSI_MAEB, NULL);
+
+	reg = cpu_to_be32(FSI_MRESP_RST_ALL_MASTER | FSI_MRESP_RST_ALL_LINK);
+	opb_writel(aspeed, ctrl_base + FSI_MRESP0, reg);
+
+	opb_readl(aspeed, ctrl_base + FSI_MLEVP0, NULL);
+
+	/* Reset the master bridge */
+	reg = cpu_to_be32(FSI_MRESB_RST_GEN);
+	opb_writel(aspeed, ctrl_base + FSI_MRESB0, reg);
+
+	reg = cpu_to_be32(FSI_MRESB_RST_ERR);
+	opb_writel(aspeed, ctrl_base + FSI_MRESB0, reg);
+
+	return 0;
+}
+
+static int fsi_master_aspeed_probe(struct platform_device *pdev)
+{
+	struct fsi_master_aspeed *aspeed;
+	struct resource *res;
+	int rc, links, reg;
+	__be32 raw;
+
+	aspeed = devm_kzalloc(&pdev->dev, sizeof(*aspeed), GFP_KERNEL);
+	if (!aspeed)
+		return -ENOMEM;
+
+	aspeed->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	aspeed->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(aspeed->base))
+		return PTR_ERR(aspeed->base);
+
+	aspeed->clk = devm_clk_get(aspeed->dev, NULL);
+	if (IS_ERR(aspeed->clk)) {
+		dev_err(aspeed->dev, "couldn't get clock\n");
+		return PTR_ERR(aspeed->clk);
+	}
+	rc = clk_prepare_enable(aspeed->clk);
+	if (rc) {
+		dev_err(aspeed->dev, "couldn't enable clock\n");
+		return rc;
+	}
+
+	writel(0x1, aspeed->base + OPB_CLK_SYNC);
+	writel(OPB1_XFER_ACK_EN | OPB0_XFER_ACK_EN,
+			aspeed->base + OPB_IRQ_MASK);
+
+	/* TODO: determine an appropriate value */
+	writel(0x10, aspeed->base + OPB_RETRY_COUNTER);
+
+	writel(ctrl_base, aspeed->base + OPB_CTRL_BASE);
+	writel(fsi_base, aspeed->base + OPB_FSI_BASE);
+
+	/* Set read data order */
+	writel(0x00030b1b, aspeed->base + OPB0_READ_ORDER1);
+
+	/* Set write data order */
+	writel(0x0011101b, aspeed->base + OPB0_WRITE_ORDER1);
+	writel(0x0c330f3f, aspeed->base + OPB0_WRITE_ORDER2);
+
+	/*
+	 * Select OPB0 for all operations.
+	 * Will need to be reworked when enabling DMA or anything that uses
+	 * OPB1.
+	 */
+	writel(0x1, aspeed->base + OPB0_SELECT);
+
+	rc = opb_readl(aspeed, ctrl_base + FSI_MVER, &raw);
+	if (rc) {
+		dev_err(&pdev->dev, "failed to read hub version\n");
+		return rc;
+	}
+
+	reg = be32_to_cpu(raw);
+	links = (reg >> 8) & 0xff;
+	dev_info(&pdev->dev, "hub version %08x (%d links)\n", reg, links);
+
+	aspeed->master.dev.parent = &pdev->dev;
+	aspeed->master.dev.release = aspeed_master_release;
+	aspeed->master.dev.of_node = of_node_get(dev_of_node(&pdev->dev));
+
+	aspeed->master.n_links = links;
+	aspeed->master.read = aspeed_master_read;
+	aspeed->master.write = aspeed_master_write;
+	aspeed->master.send_break = aspeed_master_break;
+	aspeed->master.term = aspeed_master_term;
+	aspeed->master.link_enable = aspeed_master_link_enable;
+
+	dev_set_drvdata(&pdev->dev, aspeed);
+
+	aspeed_master_init(aspeed);
+
+	rc = fsi_master_register(&aspeed->master);
+	if (rc)
+		goto err_release;
+
+	/* At this point, fsi_master_register performs the device_initialize(),
+	 * and holds the sole reference on master.dev. This means the device
+	 * will be freed (via ->release) during any subsequent call to
+	 * fsi_master_unregister.  We add our own reference to it here, so we
+	 * can perform cleanup (in _remove()) without it being freed before
+	 * we're ready.
+	 */
+	get_device(&aspeed->master.dev);
+	return 0;
+
+err_release:
+	clk_disable_unprepare(aspeed->clk);
+	return rc;
+}
+
+static int fsi_master_aspeed_remove(struct platform_device *pdev)
+{
+	struct fsi_master_aspeed *aspeed = platform_get_drvdata(pdev);
+
+	fsi_master_unregister(&aspeed->master);
+	clk_disable_unprepare(aspeed->clk);
+
+	return 0;
+}
+
+static const struct of_device_id fsi_master_aspeed_match[] = {
+	{ .compatible = "aspeed,ast2600-fsi-master" },
+	{ },
+};
+
+static struct platform_driver fsi_master_aspeed_driver = {
+	.driver = {
+		.name		= "fsi-master-aspeed",
+		.of_match_table	= fsi_master_aspeed_match,
+	},
+	.probe	= fsi_master_aspeed_probe,
+	.remove = fsi_master_aspeed_remove,
+};
+
+module_platform_driver(fsi_master_aspeed_driver);
+MODULE_LICENSE("GPL");
diff --git a/drivers/fsi/fsi-master-hub.c b/drivers/fsi/fsi-master-hub.c
index f158b1a88286..def35cf92571 100644
--- a/drivers/fsi/fsi-master-hub.c
+++ b/drivers/fsi/fsi-master-hub.c
@@ -13,53 +13,7 @@
 
 #include "fsi-master.h"
 
-/* Control Registers */
-#define FSI_MMODE		0x0		/* R/W: mode */
-#define FSI_MDLYR		0x4		/* R/W: delay */
-#define FSI_MCRSP		0x8		/* R/W: clock rate */
-#define FSI_MENP0		0x10		/* R/W: enable */
-#define FSI_MLEVP0		0x18		/* R: plug detect */
-#define FSI_MSENP0		0x18		/* S: Set enable */
-#define FSI_MCENP0		0x20		/* C: Clear enable */
-#define FSI_MAEB		0x70		/* R: Error address */
-#define FSI_MVER		0x74		/* R: master version/type */
-#define FSI_MRESP0		0xd0		/* W: Port reset */
-#define FSI_MESRB0		0x1d0		/* R: Master error status */
-#define FSI_MRESB0		0x1d0		/* W: Reset bridge */
-#define FSI_MECTRL		0x2e0		/* W: Error control */
-
-/* MMODE: Mode control */
-#define FSI_MMODE_EIP		0x80000000	/* Enable interrupt polling */
-#define FSI_MMODE_ECRC		0x40000000	/* Enable error recovery */
-#define FSI_MMODE_EPC		0x10000000	/* Enable parity checking */
-#define FSI_MMODE_P8_TO_LSB	0x00000010	/* Timeout value LSB */
-						/*   MSB=1, LSB=0 is 0.8 ms */
-						/*   MSB=0, LSB=1 is 0.9 ms */
-#define FSI_MMODE_CRS0SHFT	18		/* Clk rate selection 0 shift */
-#define FSI_MMODE_CRS0MASK	0x3ff		/* Clk rate selection 0 mask */
-#define FSI_MMODE_CRS1SHFT	8		/* Clk rate selection 1 shift */
-#define FSI_MMODE_CRS1MASK	0x3ff		/* Clk rate selection 1 mask */
-
-/* MRESB: Reset brindge */
-#define FSI_MRESB_RST_GEN	0x80000000	/* General reset */
-#define FSI_MRESB_RST_ERR	0x40000000	/* Error Reset */
-
-/* MRESB: Reset port */
-#define FSI_MRESP_RST_ALL_MASTER 0x20000000	/* Reset all FSI masters */
-#define FSI_MRESP_RST_ALL_LINK	0x10000000	/* Reset all FSI port contr. */
-#define FSI_MRESP_RST_MCR	0x08000000	/* Reset FSI master reg. */
-#define FSI_MRESP_RST_PYE	0x04000000	/* Reset FSI parity error */
-#define FSI_MRESP_RST_ALL	0xfc000000	/* Reset any error */
-
-/* MECTRL: Error control */
-#define FSI_MECTRL_EOAE		0x8000		/* Enable machine check when */
-						/* master 0 in error */
-#define FSI_MECTRL_P8_AUTO_TERM	0x4000		/* Auto terminate */
-
 #define FSI_ENGID_HUB_MASTER		0x1c
-#define FSI_HUB_LINK_OFFSET		0x80000
-#define FSI_HUB_LINK_SIZE		0x80000
-#define FSI_HUB_MASTER_MAX_LINKS	8
 
 #define FSI_LINK_ENABLE_SETUP_TIME	10	/* in mS */
 
diff --git a/drivers/fsi/fsi-master.h b/drivers/fsi/fsi-master.h
index c7174237e864..6e8d4d4d5149 100644
--- a/drivers/fsi/fsi-master.h
+++ b/drivers/fsi/fsi-master.h
@@ -12,6 +12,71 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 
+/*
+ * Master registers
+ *
+ * These are used by hardware masters, such as the one in the FSP2, AST2600 and
+ * the hub master in POWER processors.
+ */
+
+/* Control Registers */
+#define FSI_MMODE		0x0		/* R/W: mode */
+#define FSI_MDLYR		0x4		/* R/W: delay */
+#define FSI_MCRSP		0x8		/* R/W: clock rate */
+#define FSI_MENP0		0x10		/* R/W: enable */
+#define FSI_MLEVP0		0x18		/* R: plug detect */
+#define FSI_MSENP0		0x18		/* S: Set enable */
+#define FSI_MCENP0		0x20		/* C: Clear enable */
+#define FSI_MAEB		0x70		/* R: Error address */
+#define FSI_MVER		0x74		/* R: master version/type */
+#define FSI_MSTAP0		0xd0		/* R: Port status */
+#define FSI_MRESP0		0xd0		/* W: Port reset */
+#define FSI_MESRB0		0x1d0		/* R: Master error status */
+#define FSI_MRESB0		0x1d0		/* W: Reset bridge */
+#define FSI_MSCSB0		0x1d4		/* R: Master sub command stack */
+#define FSI_MATRB0		0x1d8		/* R: Master address trace */
+#define FSI_MDTRB0		0x1dc		/* R: Master data trace */
+#define FSI_MECTRL		0x2e0		/* W: Error control */
+
+/* MMODE: Mode control */
+#define FSI_MMODE_EIP		0x80000000	/* Enable interrupt polling */
+#define FSI_MMODE_ECRC		0x40000000	/* Enable error recovery */
+#define FSI_MMODE_RELA		0x20000000	/* Enable relative address commands */
+#define FSI_MMODE_EPC		0x10000000	/* Enable parity checking */
+#define FSI_MMODE_P8_TO_LSB	0x00000010	/* Timeout value LSB */
+						/*   MSB=1, LSB=0 is 0.8 ms */
+						/*   MSB=0, LSB=1 is 0.9 ms */
+#define FSI_MMODE_CRS0SHFT	18		/* Clk rate selection 0 shift */
+#define FSI_MMODE_CRS0MASK	0x3ff		/* Clk rate selection 0 mask */
+#define FSI_MMODE_CRS1SHFT	8		/* Clk rate selection 1 shift */
+#define FSI_MMODE_CRS1MASK	0x3ff		/* Clk rate selection 1 mask */
+
+/* MRESB: Reset brindge */
+#define FSI_MRESB_RST_GEN	0x80000000	/* General reset */
+#define FSI_MRESB_RST_ERR	0x40000000	/* Error Reset */
+
+/* MRESP: Reset port */
+#define FSI_MRESP_RST_ALL_MASTER 0x20000000	/* Reset all FSI masters */
+#define FSI_MRESP_RST_ALL_LINK	0x10000000	/* Reset all FSI port contr. */
+#define FSI_MRESP_RST_MCR	0x08000000	/* Reset FSI master reg. */
+#define FSI_MRESP_RST_PYE	0x04000000	/* Reset FSI parity error */
+#define FSI_MRESP_RST_ALL	0xfc000000	/* Reset any error */
+
+/* MECTRL: Error control */
+#define FSI_MECTRL_EOAE		0x8000		/* Enable machine check when */
+						/* master 0 in error */
+#define FSI_MECTRL_P8_AUTO_TERM	0x4000		/* Auto terminate */
+
+#define FSI_HUB_LINK_OFFSET		0x80000
+#define FSI_HUB_LINK_SIZE		0x80000
+#define FSI_HUB_MASTER_MAX_LINKS	8
+
+/*
+ * Protocol definitions
+ *
+ * These are used by low level masters that bit-bang out the protocol
+ */
+
 /* Various protocol delays */
 #define	FSI_ECHO_DELAY_CLOCKS	16	/* Number clocks for echo delay */
 #define	FSI_SEND_DELAY_CLOCKS	16	/* Number clocks for send delay */
@@ -47,6 +112,12 @@
 /* fsi-master definition and flags */
 #define FSI_MASTER_FLAG_SWCLOCK		0x1
 
+/*
+ * Structures and function prototypes
+ *
+ * These are common to all masters
+ */
+
 struct fsi_master {
 	struct device	dev;
 	int		idx;
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 38e096e6925f..b585f2ef6dd9 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -120,6 +120,14 @@ config GPIO_ASPEED
 	help
 	  Say Y here to support Aspeed AST2400 and AST2500 GPIO controllers.
 
+config GPIO_ASPEED_SGPIO
+	bool "Aspeed SGPIO support"
+	depends on (ARCH_ASPEED || COMPILE_TEST) && OF_GPIO
+	select GPIO_GENERIC
+	select GPIOLIB_IRQCHIP
+	help
+	  Say Y here to support Aspeed AST2500 SGPIO functionality.
+
 config GPIO_ATH79
 	tristate "Atheros AR71XX/AR724X/AR913X GPIO support"
 	default y if ATH79
@@ -147,6 +155,15 @@ config GPIO_BCM_KONA
 	help
 	  Turn on GPIO support for Broadcom "Kona" chips.
 
+config GPIO_BCM_XGS_IPROC
+	tristate "BRCM XGS iProc GPIO support"
+	depends on OF_GPIO && (ARCH_BCM_IPROC || COMPILE_TEST)
+	select GPIO_GENERIC
+	select GPIOLIB_IRQCHIP
+	default ARCH_BCM_IPROC
+	help
+	  Say yes here to enable GPIO support for Broadcom XGS iProc SoCs.
+
 config GPIO_BRCMSTB
 	tristate "BRCMSTB GPIO support"
 	default y if (ARCH_BRCMSTB || BMIPS_GENERIC)
@@ -295,10 +312,16 @@ config GPIO_IXP4XX
 	  IXP4xx series of chips.
 
 	  If unsure, say N.
+config GPIO_LOGICVC
+	tristate "Xylon LogiCVC GPIO support"
+	depends on MFD_SYSCON && OF
+	help
+	  Say yes here to support GPIO functionality of the Xylon LogiCVC
+	  programmable logic block.
 
 config GPIO_LOONGSON
 	bool "Loongson-2/3 GPIO support"
-	depends on CPU_LOONGSON2 || CPU_LOONGSON3
+	depends on CPU_LOONGSON2EF || CPU_LOONGSON64
 	help
 	  driver for GPIO functionality on Loongson-2F/3A/3B processors.
 
@@ -318,14 +341,6 @@ config GPIO_LPC32XX
 	  Select this option to enable GPIO driver for
 	  NXP LPC32XX devices.
 
-config GPIO_LYNXPOINT
-	tristate "Intel Lynxpoint GPIO support"
-	depends on ACPI && X86
-	select GPIOLIB_IRQCHIP
-	help
-	  driver for GPIO functionality on Intel Lynxpoint PCH chipset
-	  Requires ACPI device enumeration code to set up a platform device.
-
 config GPIO_MB86S7X
 	tristate "GPIO support for Fujitsu MB86S7x Platforms"
 	help
@@ -435,6 +450,15 @@ config GPIO_RCAR
 	help
 	  Say yes here to support GPIO on Renesas R-Car SoCs.
 
+config GPIO_RDA
+	bool "RDA Micro GPIO controller support"
+	depends on ARCH_RDA || COMPILE_TEST
+	depends on OF_GPIO
+	select GPIO_GENERIC
+	select GPIOLIB_IRQCHIP
+	help
+	  Say Y here to support RDA Micro GPIO controller.
+
 config GPIO_REG
 	bool
 	help
@@ -453,6 +477,15 @@ config GPIO_SAMA5D2_PIOBU
 	  The difference from regular GPIOs is that they
 	  maintain their value during backup/self-refresh.
 
+config GPIO_SIFIVE
+	bool "SiFive GPIO support"
+	depends on OF_GPIO && IRQ_DOMAIN_HIERARCHY
+	select GPIO_GENERIC
+	select GPIOLIB_IRQCHIP
+	select REGMAP_MMIO
+	help
+	  Say yes here to support the GPIO device on SiFive SoCs.
+
 config GPIO_SIOX
 	tristate "SIOX GPIO support"
 	depends on SIOX
@@ -527,10 +560,11 @@ config GPIO_TEGRA
 
 config GPIO_TEGRA186
 	tristate "NVIDIA Tegra186 GPIO support"
-	default ARCH_TEGRA_186_SOC
-	depends on ARCH_TEGRA_186_SOC || COMPILE_TEST
+	default ARCH_TEGRA_186_SOC || ARCH_TEGRA_194_SOC
+	depends on ARCH_TEGRA_186_SOC || ARCH_TEGRA_194_SOC || COMPILE_TEST
 	depends on OF_GPIO
 	select GPIOLIB_IRQCHIP
+	select IRQ_DOMAIN_HIERARCHY
 	help
 	  Say yes here to support GPIO pins on NVIDIA Tegra186 SoCs.
 
@@ -586,6 +620,13 @@ config GPIO_VX855
 	  additional drivers must be enabled in order to use the
 	  functionality of the device.
 
+config GPIO_WCD934X
+	tristate "Qualcomm Technologies Inc WCD9340/WCD9341 gpio controller driver"
+	depends on MFD_WCD934X && OF_GPIO
+	help
+         This driver is to supprot GPIO block found on the Qualcomm Technologies
+	 Inc WCD9340/WCD9341 Audio Codec.
+
 config GPIO_XGENE
 	bool "APM X-Gene GPIO controller support"
 	depends on ARM64 && OF_GPIO
@@ -1121,6 +1162,7 @@ config GPIO_MADERA
 config GPIO_MAX77620
 	tristate "GPIO support for PMIC MAX77620 and MAX20024"
 	depends on MFD_MAX77620
+	select GPIOLIB_IRQCHIP
 	help
 	  GPIO driver for MAX77620 and MAX20024 PMIC from Maxim Semiconductor.
 	  MAX77620 PMIC has 8 pins that can be configured as GPIOs. The
@@ -1320,7 +1362,7 @@ config GPIO_BT8XX
 	  The card needs to be physically altered for using it as a
 	  GPIO card. For more information on how to build a GPIO card
 	  from a BT8xx TV card, see the documentation file at
-	  Documentation/driver-api/bt8xxgpio.rst
+	  Documentation/driver-api/gpio/bt8xxgpio.rst
 
 	  If unsure, say N.
 
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index d2fd19c15bae..76ff2a5feaba 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -32,8 +32,10 @@ obj-$(CONFIG_GPIO_AMD_FCH)		+= gpio-amd-fch.o
 obj-$(CONFIG_GPIO_AMDPT)		+= gpio-amdpt.o
 obj-$(CONFIG_GPIO_ARIZONA)		+= gpio-arizona.o
 obj-$(CONFIG_GPIO_ASPEED)		+= gpio-aspeed.o
+obj-$(CONFIG_GPIO_ASPEED_SGPIO)		+= gpio-aspeed-sgpio.o
 obj-$(CONFIG_GPIO_ATH79)		+= gpio-ath79.o
 obj-$(CONFIG_GPIO_BCM_KONA)		+= gpio-bcm-kona.o
+obj-$(CONFIG_GPIO_BCM_XGS_IPROC)	+= gpio-xgs-iproc.o
 obj-$(CONFIG_GPIO_BD70528)		+= gpio-bd70528.o
 obj-$(CONFIG_GPIO_BD9571MWV)		+= gpio-bd9571mwv.o
 obj-$(CONFIG_GPIO_BRCMSTB)		+= gpio-brcmstb.o
@@ -67,6 +69,7 @@ obj-$(CONFIG_GPIO_IT87)			+= gpio-it87.o
 obj-$(CONFIG_GPIO_IXP4XX)		+= gpio-ixp4xx.o
 obj-$(CONFIG_GPIO_JANZ_TTL)		+= gpio-janz-ttl.o
 obj-$(CONFIG_GPIO_KEMPLD)		+= gpio-kempld.o
+obj-$(CONFIG_GPIO_LOGICVC)		+= gpio-logicvc.o
 obj-$(CONFIG_GPIO_LOONGSON1)		+= gpio-loongson1.o
 obj-$(CONFIG_GPIO_LOONGSON)		+= gpio-loongson.o
 obj-$(CONFIG_GPIO_LP3943)		+= gpio-lp3943.o
@@ -74,7 +77,6 @@ obj-$(CONFIG_GPIO_LP873X)		+= gpio-lp873x.o
 obj-$(CONFIG_GPIO_LP87565)		+= gpio-lp87565.o
 obj-$(CONFIG_GPIO_LPC18XX)		+= gpio-lpc18xx.o
 obj-$(CONFIG_GPIO_LPC32XX)		+= gpio-lpc32xx.o
-obj-$(CONFIG_GPIO_LYNXPOINT)		+= gpio-lynxpoint.o
 obj-$(CONFIG_GPIO_MADERA)		+= gpio-madera.o
 obj-$(CONFIG_GPIO_MAX3191X)		+= gpio-max3191x.o
 obj-$(CONFIG_GPIO_MAX7300)		+= gpio-max7300.o
@@ -115,12 +117,14 @@ obj-$(CONFIG_GPIO_PXA)			+= gpio-pxa.o
 obj-$(CONFIG_GPIO_RASPBERRYPI_EXP)	+= gpio-raspberrypi-exp.o
 obj-$(CONFIG_GPIO_RC5T583)		+= gpio-rc5t583.o
 obj-$(CONFIG_GPIO_RCAR)			+= gpio-rcar.o
+obj-$(CONFIG_GPIO_RDA)			+= gpio-rda.o
 obj-$(CONFIG_GPIO_RDC321X)		+= gpio-rdc321x.o
 obj-$(CONFIG_GPIO_REG)			+= gpio-reg.o
 obj-$(CONFIG_ARCH_SA1100)		+= gpio-sa1100.o
 obj-$(CONFIG_GPIO_SAMA5D2_PIOBU)	+= gpio-sama5d2-piobu.o
 obj-$(CONFIG_GPIO_SCH311X)		+= gpio-sch311x.o
 obj-$(CONFIG_GPIO_SCH)			+= gpio-sch.o
+obj-$(CONFIG_GPIO_SIFIVE)		+= gpio-sifive.o
 obj-$(CONFIG_GPIO_SIOX)			+= gpio-siox.o
 obj-$(CONFIG_GPIO_SODAVILLE)		+= gpio-sodaville.o
 obj-$(CONFIG_GPIO_SPEAR_SPICS)		+= gpio-spear-spics.o
@@ -154,6 +158,7 @@ obj-$(CONFIG_GPIO_VF610)		+= gpio-vf610.o
 obj-$(CONFIG_GPIO_VIPERBOARD)		+= gpio-viperboard.o
 obj-$(CONFIG_GPIO_VR41XX)		+= gpio-vr41xx.o
 obj-$(CONFIG_GPIO_VX855)		+= gpio-vx855.o
+obj-$(CONFIG_GPIO_WCD934X)		+= gpio-wcd934x.o
 obj-$(CONFIG_GPIO_WHISKEY_COVE)		+= gpio-wcove.o
 obj-$(CONFIG_GPIO_WINBOND)		+= gpio-winbond.o
 obj-$(CONFIG_GPIO_WM831X)		+= gpio-wm831x.o
diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO
index 9c048f10c9ad..3a44e6ae52bd 100644
--- a/drivers/gpio/TODO
+++ b/drivers/gpio/TODO
@@ -10,6 +10,28 @@ approach. This means that GPIO consumers, drivers and machine descriptions
 ideally have no use or idea of the global GPIO numberspace that has/was
 used in the inception of the GPIO subsystem.
 
+The numberspace issue is the same as to why irq is moving away from irq
+numbers to IRQ descriptors.
+
+The underlying motivation for this is that the GPIO numberspace has become
+unmanageable: machine board files tend to become full of macros trying to
+establish the numberspace at compile-time, making it hard to add any numbers
+in the middle (such as if you missed a pin on a chip) without the numberspace
+breaking.
+
+Machine descriptions such as device tree or ACPI does not have a concept of the
+Linux GPIO number as those descriptions are external to the Linux kernel
+and treat GPIO lines as abstract entities.
+
+The runtime-assigned GPIO numberspace (what you get if you assign the GPIO
+base as -1 in struct gpio_chip) has also became unpredictable due to factors
+such as probe ordering and the introduction of -EPROBE_DEFER making probe
+ordering of independent GPIO chips essentially unpredictable, as their base
+number will be assigned on a first come first serve basis.
+
+The best way to get out of the problem is to make the global GPIO numbers
+unimportant by simply not using them. GPIO descriptors deal with this.
+
 Work items:
 
 - Convert all GPIO device drivers to only #include <linux/gpio/driver.h>
@@ -33,7 +55,7 @@ This header and helpers appeared at one point when there was no proper
 driver infrastructure for doing simpler MMIO GPIO devices and there was
 no core support for parsing device tree GPIOs from the core library with
 the [devm_]gpiod_get() calls we have today that will implicitly go into
-the device tree back-end.
+the device tree back-end. It is legacy and should not be used in new code.
 
 Work items:
 
@@ -59,6 +81,15 @@ Work items:
   uses <linux/gpio/consumer.h> or <linux/gpio/driver.h> instead.
 
 
+Get rid of <linux/gpio.h>
+
+This legacy header is a one stop shop for anything GPIO is closely tied
+to the global GPIO numberspace. The endgame of the above refactorings will
+be the removal of <linux/gpio.h> and from that point only the specialized
+headers under <linux/gpio/*.h> will be used. This requires all the above to
+be completed and is expected to take a long time.
+
+
 Collect drivers
 
 Collect GPIO drivers from arch/* and other places that should be placed
@@ -80,6 +111,10 @@ Work items:
 - Look over and identify any remaining easily converted drivers and
   dry-code conversions to MMIO GPIO for maintainers to test
 
+- Expand the MMIO GPIO or write a new library for regmap-based I/O
+  helpers for GPIO drivers on regmap that simply use offsets
+  0..n in some register to drive GPIO lines
+
 - Expand the MMIO GPIO or write a new library for port-mapped I/O
   helpers (x86 inb()/outb()) and convert port-mapped I/O drivers to use
   this with dry-coding and sending to maintainers to test
@@ -105,7 +140,7 @@ try to cover any generic kind of irqchip cascaded from a GPIO.
 
   int irq; /* from platform etc */
   struct my_gpio *g;
-  struct gpio_irq_chip *girq
+  struct gpio_irq_chip *girq;
 
   /* Set up the irqchip dynamically */
   g->irq.name = "my_gpio_irq";
@@ -133,9 +168,14 @@ try to cover any generic kind of irqchip cascaded from a GPIO.
 - Look over and identify any remaining easily converted drivers and
   dry-code conversions to gpiolib irqchip for maintainers to test
 
-- Support generic hierarchical GPIO interrupts: these are for the
-  non-cascading case where there is one IRQ per GPIO line, there is
-  currently no common infrastructure for this.
+- Drop gpiochip_set_chained_irqchip() when all the chained irqchips
+  have been converted to the above infrastructure.
+
+- Add more infrastructure to make it possible to also pass a threaded
+  irqchip in struct gpio_irq_chip.
+
+- Drop gpiochip_irqchip_add_nested() when all the chained irqchips
+  have been converted to the above infrastructure.
 
 
 Increase integration with pin control
diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index a44fa8af5b0d..1f7d9bbec0fc 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -59,7 +59,10 @@ static int dio48e_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 	const unsigned port = offset / 8;
 	const unsigned mask = BIT(offset % 8);
 
-	return !!(dio48egpio->io_state[port] & mask);
+	if (dio48egpio->io_state[port] & mask)
+		return  GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int dio48e_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
@@ -175,46 +178,25 @@ static int dio48e_gpio_get(struct gpio_chip *chip, unsigned offset)
 	return !!(port_state & mask);
 }
 
+static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
+
 static int dio48e_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
 	unsigned long *bits)
 {
 	struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
-	size_t i;
-	static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
-	const unsigned int gpio_reg_size = 8;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+	unsigned long offset;
+	unsigned long gpio_mask;
+	unsigned int port_addr;
 	unsigned long port_state;
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports); i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
-
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		port_addr = dio48egpio->base + ports[offset / 8];
+		port_state = inb(port_addr) & gpio_mask;
 
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
-
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
-
-		/* read bits from current gpio port */
-		port_state = inb(dio48egpio->base + ports[i]);
-
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
@@ -244,37 +226,27 @@ static void dio48e_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
-	unsigned int i;
-	const unsigned int gpio_reg_size = 8;
-	unsigned int port;
-	unsigned int out_port;
-	unsigned int bitmask;
+	unsigned long offset;
+	unsigned long gpio_mask;
+	size_t index;
+	unsigned int port_addr;
+	unsigned long bitmask;
 	unsigned long flags;
 
-	/* set bits are evaluated a gpio register size at a time */
-	for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
-		/* no more set bits in this mask word; skip to the next word */
-		if (!mask[BIT_WORD(i)]) {
-			i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
-			continue;
-		}
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		index = offset / 8;
+		port_addr = dio48egpio->base + ports[index];
 
-		port = i / gpio_reg_size;
-		out_port = (port > 2) ? port + 1 : port;
-		bitmask = mask[BIT_WORD(i)] & bits[BIT_WORD(i)];
+		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
 
 		raw_spin_lock_irqsave(&dio48egpio->lock, flags);
 
 		/* update output state data and set device gpio register */
-		dio48egpio->out_state[port] &= ~mask[BIT_WORD(i)];
-		dio48egpio->out_state[port] |= bitmask;
-		outb(dio48egpio->out_state[port], dio48egpio->base + out_port);
+		dio48egpio->out_state[index] &= ~gpio_mask;
+		dio48egpio->out_state[index] |= bitmask;
+		outb(dio48egpio->out_state[index], port_addr);
 
 		raw_spin_unlock_irqrestore(&dio48egpio->lock, flags);
-
-		/* prepare for next gpio register set */
-		mask[BIT_WORD(i)] >>= gpio_reg_size;
-		bits[BIT_WORD(i)] >>= gpio_reg_size;
 	}
 }
 
diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index ff53887bdaa8..d350ac0de06b 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -53,7 +53,7 @@ struct idi_48_gpio {
 
 static int idi_48_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 {
-	return 1;
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int idi_48_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
@@ -65,7 +65,7 @@ static int idi_48_gpio_get(struct gpio_chip *chip, unsigned offset)
 {
 	struct idi_48_gpio *const idi48gpio = gpiochip_get_data(chip);
 	unsigned i;
-	const unsigned register_offset[6] = { 0, 1, 2, 4, 5, 6 };
+	static const unsigned int register_offset[6] = { 0, 1, 2, 4, 5, 6 };
 	unsigned base_offset;
 	unsigned mask;
 
@@ -85,42 +85,20 @@ static int idi_48_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
 	unsigned long *bits)
 {
 	struct idi_48_gpio *const idi48gpio = gpiochip_get_data(chip);
-	size_t i;
+	unsigned long offset;
+	unsigned long gpio_mask;
 	static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
-	const unsigned int gpio_reg_size = 8;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+	unsigned int port_addr;
 	unsigned long port_state;
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports); i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		port_addr = idi48gpio->base + ports[offset / 8];
+		port_state = inb(port_addr) & gpio_mask;
 
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
-
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
-
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
-
-		/* read bits from current gpio port */
-		port_state = inb(idi48gpio->base + ports[i]);
-
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c
index 8d2f51cd9d91..5752d9dab148 100644
--- a/drivers/gpio/gpio-104-idio-16.c
+++ b/drivers/gpio/gpio-104-idio-16.c
@@ -51,9 +51,9 @@ struct idio_16_gpio {
 static int idio_16_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 {
 	if (offset > 15)
-		return 1;
+		return GPIO_LINE_DIRECTION_IN;
 
-	return 0;
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int idio_16_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
index e81307f9754e..05637d585152 100644
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c
@@ -6,6 +6,7 @@
  *  Copyright (C) 2010 Miguel Gaio <miguel.gaio@efixo.com>
  */
 
+#include <linux/bitops.h>
 #include <linux/gpio/consumer.h>
 #include <linux/gpio/driver.h>
 #include <linux/module.h>
@@ -72,20 +73,18 @@ static void gen_74x164_set_multiple(struct gpio_chip *gc, unsigned long *mask,
 				    unsigned long *bits)
 {
 	struct gen_74x164_chip *chip = gpiochip_get_data(gc);
-	unsigned int i, idx, shift;
-	u8 bank, bankmask;
+	unsigned long offset;
+	unsigned long bankmask;
+	size_t bank;
+	unsigned long bitmask;
 
 	mutex_lock(&chip->lock);
-	for (i = 0, bank = chip->registers - 1; i < chip->registers;
-	     i++, bank--) {
-		idx = i / sizeof(*mask);
-		shift = i % sizeof(*mask) * BITS_PER_BYTE;
-		bankmask = mask[idx] >> shift;
-		if (!bankmask)
-			continue;
+	for_each_set_clump8(offset, bankmask, mask, chip->registers * 8) {
+		bank = chip->registers - 1 - offset / 8;
+		bitmask = bitmap_get_value8(bits, offset) & bankmask;
 
 		chip->buffer[bank] &= ~bankmask;
-		chip->buffer[bank] |= bankmask & (bits[idx] >> shift);
+		chip->buffer[bank] |= bitmask;
 	}
 	__gen_74x164_write_config(chip);
 	mutex_unlock(&chip->lock);
diff --git a/drivers/gpio/gpio-74xx-mmio.c b/drivers/gpio/gpio-74xx-mmio.c
index 83a2286d93f6..173e06758e6c 100644
--- a/drivers/gpio/gpio-74xx-mmio.c
+++ b/drivers/gpio/gpio-74xx-mmio.c
@@ -77,7 +77,10 @@ static int mmio_74xx_get_direction(struct gpio_chip *gc, unsigned offset)
 {
 	struct mmio_74xx_gpio_priv *priv = gpiochip_get_data(gc);
 
-	return !(priv->flags & MMIO_74XX_DIR_OUT);
+	if (priv->flags & MMIO_74XX_DIR_OUT)
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return  GPIO_LINE_DIRECTION_IN;
 }
 
 static int mmio_74xx_dir_in(struct gpio_chip *gc, unsigned int gpio)
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index 9f2e6b04c361..cc4ba71e4fe3 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -266,7 +266,7 @@ static int altera_gpio_probe(struct platform_device *pdev)
 	altera_gc->mmchip.gc.owner		= THIS_MODULE;
 	altera_gc->mmchip.gc.parent		= &pdev->dev;
 
-	altera_gc->mapped_irq = platform_get_irq(pdev, 0);
+	altera_gc->mapped_irq = platform_get_irq_optional(pdev, 0);
 
 	if (altera_gc->mapped_irq < 0)
 		goto skip_irq;
diff --git a/drivers/gpio/gpio-amd-fch.c b/drivers/gpio/gpio-amd-fch.c
index 181df1581df5..4e44ba4d7423 100644
--- a/drivers/gpio/gpio-amd-fch.c
+++ b/drivers/gpio/gpio-amd-fch.c
@@ -92,7 +92,7 @@ static int amd_fch_gpio_get_direction(struct gpio_chip *gc, unsigned int gpio)
 	ret = (readl_relaxed(ptr) & AMD_FCH_GPIO_FLAG_DIRECTION);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	return ret;
+	return ret ? GPIO_LINE_DIRECTION_IN : GPIO_LINE_DIRECTION_OUT;
 }
 
 static void amd_fch_gpio_set(struct gpio_chip *gc,
diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c
new file mode 100644
index 000000000000..d16645c1d8d9
--- /dev/null
+++ b/drivers/gpio/gpio-aspeed-sgpio.c
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2019 American Megatrends International LLC.
+ *
+ * Author: Karthikeyan Mani <karthikeyanm@amiindia.co.in>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/gpio/driver.h>
+#include <linux/hashtable.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#define MAX_NR_SGPIO			80
+
+#define ASPEED_SGPIO_CTRL		0x54
+
+#define ASPEED_SGPIO_PINS_MASK		GENMASK(9, 6)
+#define ASPEED_SGPIO_CLK_DIV_MASK	GENMASK(31, 16)
+#define ASPEED_SGPIO_ENABLE		BIT(0)
+
+struct aspeed_sgpio {
+	struct gpio_chip chip;
+	struct clk *pclk;
+	spinlock_t lock;
+	void __iomem *base;
+	uint32_t dir_in[3];
+	int irq;
+};
+
+struct aspeed_sgpio_bank {
+	uint16_t    val_regs;
+	uint16_t    rdata_reg;
+	uint16_t    irq_regs;
+	const char  names[4][3];
+};
+
+/*
+ * Note: The "value" register returns the input value when the GPIO is
+ *	 configured as an input.
+ *
+ *	 The "rdata" register returns the output value when the GPIO is
+ *	 configured as an output.
+ */
+static const struct aspeed_sgpio_bank aspeed_sgpio_banks[] = {
+	{
+		.val_regs = 0x0000,
+		.rdata_reg = 0x0070,
+		.irq_regs = 0x0004,
+		.names = { "A", "B", "C", "D" },
+	},
+	{
+		.val_regs = 0x001C,
+		.rdata_reg = 0x0074,
+		.irq_regs = 0x0020,
+		.names = { "E", "F", "G", "H" },
+	},
+	{
+		.val_regs = 0x0038,
+		.rdata_reg = 0x0078,
+		.irq_regs = 0x003C,
+		.names = { "I", "J" },
+	},
+};
+
+enum aspeed_sgpio_reg {
+	reg_val,
+	reg_rdata,
+	reg_irq_enable,
+	reg_irq_type0,
+	reg_irq_type1,
+	reg_irq_type2,
+	reg_irq_status,
+};
+
+#define GPIO_VAL_VALUE      0x00
+#define GPIO_IRQ_ENABLE     0x00
+#define GPIO_IRQ_TYPE0      0x04
+#define GPIO_IRQ_TYPE1      0x08
+#define GPIO_IRQ_TYPE2      0x0C
+#define GPIO_IRQ_STATUS     0x10
+
+static void __iomem *bank_reg(struct aspeed_sgpio *gpio,
+				     const struct aspeed_sgpio_bank *bank,
+				     const enum aspeed_sgpio_reg reg)
+{
+	switch (reg) {
+	case reg_val:
+		return gpio->base + bank->val_regs + GPIO_VAL_VALUE;
+	case reg_rdata:
+		return gpio->base + bank->rdata_reg;
+	case reg_irq_enable:
+		return gpio->base + bank->irq_regs + GPIO_IRQ_ENABLE;
+	case reg_irq_type0:
+		return gpio->base + bank->irq_regs + GPIO_IRQ_TYPE0;
+	case reg_irq_type1:
+		return gpio->base + bank->irq_regs + GPIO_IRQ_TYPE1;
+	case reg_irq_type2:
+		return gpio->base + bank->irq_regs + GPIO_IRQ_TYPE2;
+	case reg_irq_status:
+		return gpio->base + bank->irq_regs + GPIO_IRQ_STATUS;
+	default:
+		/* acturally if code runs to here, it's an error case */
+		BUG();
+	}
+}
+
+#define GPIO_BANK(x)    ((x) >> 5)
+#define GPIO_OFFSET(x)  ((x) & 0x1f)
+#define GPIO_BIT(x)     BIT(GPIO_OFFSET(x))
+
+static const struct aspeed_sgpio_bank *to_bank(unsigned int offset)
+{
+	unsigned int bank = GPIO_BANK(offset);
+
+	WARN_ON(bank >= ARRAY_SIZE(aspeed_sgpio_banks));
+	return &aspeed_sgpio_banks[bank];
+}
+
+static int aspeed_sgpio_get(struct gpio_chip *gc, unsigned int offset)
+{
+	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
+	const struct aspeed_sgpio_bank *bank = to_bank(offset);
+	unsigned long flags;
+	enum aspeed_sgpio_reg reg;
+	bool is_input;
+	int rc = 0;
+
+	spin_lock_irqsave(&gpio->lock, flags);
+
+	is_input = gpio->dir_in[GPIO_BANK(offset)] & GPIO_BIT(offset);
+	reg = is_input ? reg_val : reg_rdata;
+	rc = !!(ioread32(bank_reg(gpio, bank, reg)) & GPIO_BIT(offset));
+
+	spin_unlock_irqrestore(&gpio->lock, flags);
+
+	return rc;
+}
+
+static void sgpio_set_value(struct gpio_chip *gc, unsigned int offset, int val)
+{
+	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
+	const struct aspeed_sgpio_bank *bank = to_bank(offset);
+	void __iomem *addr;
+	u32 reg = 0;
+
+	addr = bank_reg(gpio, bank, reg_val);
+	reg = ioread32(addr);
+
+	if (val)
+		reg |= GPIO_BIT(offset);
+	else
+		reg &= ~GPIO_BIT(offset);
+
+	iowrite32(reg, addr);
+}
+
+static void aspeed_sgpio_set(struct gpio_chip *gc, unsigned int offset, int val)
+{
+	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpio->lock, flags);
+
+	sgpio_set_value(gc, offset, val);
+
+	spin_unlock_irqrestore(&gpio->lock, flags);
+}
+
+static int aspeed_sgpio_dir_in(struct gpio_chip *gc, unsigned int offset)
+{
+	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpio->lock, flags);
+	gpio->dir_in[GPIO_BANK(offset)] |= GPIO_BIT(offset);
+	spin_unlock_irqrestore(&gpio->lock, flags);
+
+	return 0;
+}
+
+static int aspeed_sgpio_dir_out(struct gpio_chip *gc, unsigned int offset, int val)
+{
+	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpio->lock, flags);
+
+	gpio->dir_in[GPIO_BANK(offset)] &= ~GPIO_BIT(offset);
+	sgpio_set_value(gc, offset, val);
+
+	spin_unlock_irqrestore(&gpio->lock, flags);
+
+	return 0;
+}
+
+static int aspeed_sgpio_get_direction(struct gpio_chip *gc, unsigned int offset)
+{
+	int dir_status;
+	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpio->lock, flags);
+	dir_status = gpio->dir_in[GPIO_BANK(offset)] & GPIO_BIT(offset);
+	spin_unlock_irqrestore(&gpio->lock, flags);
+
+	return dir_status;
+
+}
+
+static void irqd_to_aspeed_sgpio_data(struct irq_data *d,
+					struct aspeed_sgpio **gpio,
+					const struct aspeed_sgpio_bank **bank,
+					u32 *bit, int *offset)
+{
+	struct aspeed_sgpio *internal;
+
+	*offset = irqd_to_hwirq(d);
+	internal = irq_data_get_irq_chip_data(d);
+	WARN_ON(!internal);
+
+	*gpio = internal;
+	*bank = to_bank(*offset);
+	*bit = GPIO_BIT(*offset);
+}
+
+static void aspeed_sgpio_irq_ack(struct irq_data *d)
+{
+	const struct aspeed_sgpio_bank *bank;
+	struct aspeed_sgpio *gpio;
+	unsigned long flags;
+	void __iomem *status_addr;
+	int offset;
+	u32 bit;
+
+	irqd_to_aspeed_sgpio_data(d, &gpio, &bank, &bit, &offset);
+
+	status_addr = bank_reg(gpio, bank, reg_irq_status);
+
+	spin_lock_irqsave(&gpio->lock, flags);
+
+	iowrite32(bit, status_addr);
+
+	spin_unlock_irqrestore(&gpio->lock, flags);
+}
+
+static void aspeed_sgpio_irq_set_mask(struct irq_data *d, bool set)
+{
+	const struct aspeed_sgpio_bank *bank;
+	struct aspeed_sgpio *gpio;
+	unsigned long flags;
+	u32 reg, bit;
+	void __iomem *addr;
+	int offset;
+
+	irqd_to_aspeed_sgpio_data(d, &gpio, &bank, &bit, &offset);
+	addr = bank_reg(gpio, bank, reg_irq_enable);
+
+	spin_lock_irqsave(&gpio->lock, flags);
+
+	reg = ioread32(addr);
+	if (set)
+		reg |= bit;
+	else
+		reg &= ~bit;
+
+	iowrite32(reg, addr);
+
+	spin_unlock_irqrestore(&gpio->lock, flags);
+}
+
+static void aspeed_sgpio_irq_mask(struct irq_data *d)
+{
+	aspeed_sgpio_irq_set_mask(d, false);
+}
+
+static void aspeed_sgpio_irq_unmask(struct irq_data *d)
+{
+	aspeed_sgpio_irq_set_mask(d, true);
+}
+
+static int aspeed_sgpio_set_type(struct irq_data *d, unsigned int type)
+{
+	u32 type0 = 0;
+	u32 type1 = 0;
+	u32 type2 = 0;
+	u32 bit, reg;
+	const struct aspeed_sgpio_bank *bank;
+	irq_flow_handler_t handler;
+	struct aspeed_sgpio *gpio;
+	unsigned long flags;
+	void __iomem *addr;
+	int offset;
+
+	irqd_to_aspeed_sgpio_data(d, &gpio, &bank, &bit, &offset);
+
+	switch (type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_EDGE_BOTH:
+		type2 |= bit;
+		/* fall through */
+	case IRQ_TYPE_EDGE_RISING:
+		type0 |= bit;
+		/* fall through */
+	case IRQ_TYPE_EDGE_FALLING:
+		handler = handle_edge_irq;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		type0 |= bit;
+		/* fall through */
+	case IRQ_TYPE_LEVEL_LOW:
+		type1 |= bit;
+		handler = handle_level_irq;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&gpio->lock, flags);
+
+	addr = bank_reg(gpio, bank, reg_irq_type0);
+	reg = ioread32(addr);
+	reg = (reg & ~bit) | type0;
+	iowrite32(reg, addr);
+
+	addr = bank_reg(gpio, bank, reg_irq_type1);
+	reg = ioread32(addr);
+	reg = (reg & ~bit) | type1;
+	iowrite32(reg, addr);
+
+	addr = bank_reg(gpio, bank, reg_irq_type2);
+	reg = ioread32(addr);
+	reg = (reg & ~bit) | type2;
+	iowrite32(reg, addr);
+
+	spin_unlock_irqrestore(&gpio->lock, flags);
+
+	irq_set_handler_locked(d, handler);
+
+	return 0;
+}
+
+static void aspeed_sgpio_irq_handler(struct irq_desc *desc)
+{
+	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
+	struct irq_chip *ic = irq_desc_get_chip(desc);
+	struct aspeed_sgpio *data = gpiochip_get_data(gc);
+	unsigned int i, p, girq;
+	unsigned long reg;
+
+	chained_irq_enter(ic, desc);
+
+	for (i = 0; i < ARRAY_SIZE(aspeed_sgpio_banks); i++) {
+		const struct aspeed_sgpio_bank *bank = &aspeed_sgpio_banks[i];
+
+		reg = ioread32(bank_reg(data, bank, reg_irq_status));
+
+		for_each_set_bit(p, &reg, 32) {
+			girq = irq_find_mapping(gc->irq.domain, i * 32 + p);
+			generic_handle_irq(girq);
+		}
+
+	}
+
+	chained_irq_exit(ic, desc);
+}
+
+static struct irq_chip aspeed_sgpio_irqchip = {
+	.name       = "aspeed-sgpio",
+	.irq_ack    = aspeed_sgpio_irq_ack,
+	.irq_mask   = aspeed_sgpio_irq_mask,
+	.irq_unmask = aspeed_sgpio_irq_unmask,
+	.irq_set_type   = aspeed_sgpio_set_type,
+};
+
+static int aspeed_sgpio_setup_irqs(struct aspeed_sgpio *gpio,
+				   struct platform_device *pdev)
+{
+	int rc, i;
+	const struct aspeed_sgpio_bank *bank;
+	struct gpio_irq_chip *irq;
+
+	rc = platform_get_irq(pdev, 0);
+	if (rc < 0)
+		return rc;
+
+	gpio->irq = rc;
+
+	/* Disable IRQ and clear Interrupt status registers for all SGPIO Pins. */
+	for (i = 0; i < ARRAY_SIZE(aspeed_sgpio_banks); i++) {
+		bank =  &aspeed_sgpio_banks[i];
+		/* disable irq enable bits */
+		iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_enable));
+		/* clear status bits */
+		iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_status));
+	}
+
+	irq = &gpio->chip.irq;
+	irq->chip = &aspeed_sgpio_irqchip;
+	irq->handler = handle_bad_irq;
+	irq->default_type = IRQ_TYPE_NONE;
+	irq->parent_handler = aspeed_sgpio_irq_handler;
+	irq->parent_handler_data = gpio;
+	irq->parents = &gpio->irq;
+	irq->num_parents = 1;
+
+	/* set IRQ settings and Enable Interrupt */
+	for (i = 0; i < ARRAY_SIZE(aspeed_sgpio_banks); i++) {
+		bank = &aspeed_sgpio_banks[i];
+		/* set falling or level-low irq */
+		iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type0));
+		/* trigger type is edge */
+		iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type1));
+		/* dual edge trigger mode. */
+		iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_type2));
+		/* enable irq */
+		iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_enable));
+	}
+
+	return 0;
+}
+
+static const struct of_device_id aspeed_sgpio_of_table[] = {
+	{ .compatible = "aspeed,ast2400-sgpio" },
+	{ .compatible = "aspeed,ast2500-sgpio" },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, aspeed_sgpio_of_table);
+
+static int __init aspeed_sgpio_probe(struct platform_device *pdev)
+{
+	struct aspeed_sgpio *gpio;
+	u32 nr_gpios, sgpio_freq, sgpio_clk_div;
+	int rc;
+	unsigned long apb_freq;
+
+	gpio = devm_kzalloc(&pdev->dev, sizeof(*gpio), GFP_KERNEL);
+	if (!gpio)
+		return -ENOMEM;
+
+	gpio->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(gpio->base))
+		return PTR_ERR(gpio->base);
+
+	rc = of_property_read_u32(pdev->dev.of_node, "ngpios", &nr_gpios);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "Could not read ngpios property\n");
+		return -EINVAL;
+	} else if (nr_gpios > MAX_NR_SGPIO) {
+		dev_err(&pdev->dev, "Number of GPIOs exceeds the maximum of %d: %d\n",
+			MAX_NR_SGPIO, nr_gpios);
+		return -EINVAL;
+	}
+
+	rc = of_property_read_u32(pdev->dev.of_node, "bus-frequency", &sgpio_freq);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "Could not read bus-frequency property\n");
+		return -EINVAL;
+	}
+
+	gpio->pclk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(gpio->pclk)) {
+		dev_err(&pdev->dev, "devm_clk_get failed\n");
+		return PTR_ERR(gpio->pclk);
+	}
+
+	apb_freq = clk_get_rate(gpio->pclk);
+
+	/*
+	 * From the datasheet,
+	 *	SGPIO period = 1/PCLK * 2 * (GPIO254[31:16] + 1)
+	 *	period = 2 * (GPIO254[31:16] + 1) / PCLK
+	 *	frequency = 1 / (2 * (GPIO254[31:16] + 1) / PCLK)
+	 *	frequency = PCLK / (2 * (GPIO254[31:16] + 1))
+	 *	frequency * 2 * (GPIO254[31:16] + 1) = PCLK
+	 *	GPIO254[31:16] = PCLK / (frequency * 2) - 1
+	 */
+	if (sgpio_freq == 0)
+		return -EINVAL;
+
+	sgpio_clk_div = (apb_freq / (sgpio_freq * 2)) - 1;
+
+	if (sgpio_clk_div > (1 << 16) - 1)
+		return -EINVAL;
+
+	iowrite32(FIELD_PREP(ASPEED_SGPIO_CLK_DIV_MASK, sgpio_clk_div) |
+		  FIELD_PREP(ASPEED_SGPIO_PINS_MASK, (nr_gpios / 8)) |
+		  ASPEED_SGPIO_ENABLE,
+		  gpio->base + ASPEED_SGPIO_CTRL);
+
+	spin_lock_init(&gpio->lock);
+
+	gpio->chip.parent = &pdev->dev;
+	gpio->chip.ngpio = nr_gpios;
+	gpio->chip.direction_input = aspeed_sgpio_dir_in;
+	gpio->chip.direction_output = aspeed_sgpio_dir_out;
+	gpio->chip.get_direction = aspeed_sgpio_get_direction;
+	gpio->chip.request = NULL;
+	gpio->chip.free = NULL;
+	gpio->chip.get = aspeed_sgpio_get;
+	gpio->chip.set = aspeed_sgpio_set;
+	gpio->chip.set_config = NULL;
+	gpio->chip.label = dev_name(&pdev->dev);
+	gpio->chip.base = -1;
+
+	/* set all SGPIO pins as input (1). */
+	memset(gpio->dir_in, 0xff, sizeof(gpio->dir_in));
+
+	aspeed_sgpio_setup_irqs(gpio, pdev);
+
+	rc = devm_gpiochip_add_data(&pdev->dev, &gpio->chip, gpio);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
+static struct platform_driver aspeed_sgpio_driver = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+		.of_match_table = aspeed_sgpio_of_table,
+	},
+};
+
+module_platform_driver_probe(aspeed_sgpio_driver, aspeed_sgpio_probe);
+MODULE_DESCRIPTION("Aspeed Serial GPIO Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index 09e53c5f3b0a..879db23d8454 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -487,10 +487,10 @@ static int aspeed_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
 	u32 val;
 
 	if (!have_input(gpio, offset))
-		return 0;
+		return GPIO_LINE_DIRECTION_OUT;
 
 	if (!have_output(gpio, offset))
-		return 1;
+		return GPIO_LINE_DIRECTION_IN;
 
 	spin_lock_irqsave(&gpio->lock, flags);
 
@@ -498,8 +498,7 @@ static int aspeed_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
 
 	spin_unlock_irqrestore(&gpio->lock, flags);
 
-	return !val;
-
+	return val ? GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN;
 }
 
 static inline int irqd_to_aspeed_gpio_data(struct irq_data *d,
@@ -979,7 +978,7 @@ static int aspeed_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
 }
 
 /**
- * aspeed_gpio_copro_set_ops - Sets the callbacks used for handhsaking with
+ * aspeed_gpio_copro_set_ops - Sets the callbacks used for handshaking with
  *                             the coprocessor for shared GPIO banks
  * @ops: The callbacks
  * @data: Pointer passed back to the callbacks
diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c
index f1a5ea9b3de2..53fae02c40ad 100644
--- a/drivers/gpio/gpio-ath79.c
+++ b/drivers/gpio/gpio-ath79.c
@@ -226,7 +226,6 @@ static int ath79_gpio_probe(struct platform_device *pdev)
 	struct device_node *np = dev->of_node;
 	struct ath79_gpio_ctrl *ctrl;
 	struct gpio_irq_chip *girq;
-	struct resource *res;
 	u32 ath79_gpio_count;
 	bool oe_inverted;
 	int err;
@@ -256,12 +255,9 @@ static int ath79_gpio_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -EINVAL;
-	ctrl->base = devm_ioremap_nocache(dev, res->start, resource_size(res));
-	if (!ctrl->base)
-		return -ENOMEM;
+	ctrl->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(ctrl->base))
+		return PTR_ERR(ctrl->base);
 
 	raw_spin_lock_init(&ctrl->lock);
 	err = bgpio_init(&ctrl->gc, dev, 4,
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index 9fa6d3a967d2..baee8c3f06ad 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -19,7 +19,6 @@
 #include <linux/io.h>
 #include <linux/gpio/driver.h>
 #include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/init.h>
 #include <linux/irqdomain.h>
 #include <linux/irqchip/chained_irq.h>
@@ -127,7 +126,7 @@ static int bcm_kona_gpio_get_dir(struct gpio_chip *chip, unsigned gpio)
 	u32 val;
 
 	val = readl(reg_base + GPIO_CONTROL(gpio)) & GPIO_GPCTR0_IOTR_MASK;
-	return !!val;
+	return val ? GPIO_LINE_DIRECTION_IN : GPIO_LINE_DIRECTION_OUT;
 }
 
 static void bcm_kona_gpio_set(struct gpio_chip *chip, unsigned gpio, int value)
@@ -144,7 +143,7 @@ static void bcm_kona_gpio_set(struct gpio_chip *chip, unsigned gpio, int value)
 	raw_spin_lock_irqsave(&kona_gpio->lock, flags);
 
 	/* this function only applies to output pin */
-	if (bcm_kona_gpio_get_dir(chip, gpio) == 1)
+	if (bcm_kona_gpio_get_dir(chip, gpio) == GPIO_LINE_DIRECTION_IN)
 		goto out;
 
 	reg_offset = value ? GPIO_OUT_SET(bank_id) : GPIO_OUT_CLEAR(bank_id);
@@ -170,7 +169,7 @@ static int bcm_kona_gpio_get(struct gpio_chip *chip, unsigned gpio)
 	reg_base = kona_gpio->reg_base;
 	raw_spin_lock_irqsave(&kona_gpio->lock, flags);
 
-	if (bcm_kona_gpio_get_dir(chip, gpio) == 1)
+	if (bcm_kona_gpio_get_dir(chip, gpio) == GPIO_LINE_DIRECTION_IN)
 		reg_offset = GPIO_IN_STATUS(bank_id);
 	else
 		reg_offset = GPIO_OUT_STATUS(bank_id);
@@ -586,11 +585,18 @@ static int bcm_kona_gpio_probe(struct platform_device *pdev)
 
 	kona_gpio->gpio_chip = template_chip;
 	chip = &kona_gpio->gpio_chip;
-	kona_gpio->num_bank = of_irq_count(dev->of_node);
-	if (kona_gpio->num_bank == 0) {
+	ret = platform_irq_count(pdev);
+	if (!ret) {
 		dev_err(dev, "Couldn't determine # GPIO banks\n");
 		return -ENOENT;
+	} else if (ret < 0) {
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Couldn't determine GPIO banks: (%pe)\n",
+				ERR_PTR(ret));
+		return ret;
 	}
+	kona_gpio->num_bank = ret;
+
 	if (kona_gpio->num_bank > GPIO_MAX_BANK_NUM) {
 		dev_err(dev, "Too many GPIO banks configured (max=%d)\n",
 			GPIO_MAX_BANK_NUM);
diff --git a/drivers/gpio/gpio-bd70528.c b/drivers/gpio/gpio-bd70528.c
index 4ba4d4a67881..45b3da8da336 100644
--- a/drivers/gpio/gpio-bd70528.c
+++ b/drivers/gpio/gpio-bd70528.c
@@ -54,8 +54,10 @@ static int bd70528_get_direction(struct gpio_chip *chip, unsigned int offset)
 		dev_err(bdgpio->chip.dev, "Could not read gpio direction\n");
 		return ret;
 	}
+	if (val & BD70528_GPIO_OUT_EN_MASK)
+		return GPIO_LINE_DIRECTION_OUT;
 
-	return !(val & BD70528_GPIO_OUT_EN_MASK);
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int bd70528_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
@@ -166,9 +168,9 @@ static int bd70528_gpio_get(struct gpio_chip *chip, unsigned int offset)
 	 * locking would make no sense.
 	 */
 	ret = bd70528_get_direction(chip, offset);
-	if (ret == 0)
+	if (ret == GPIO_LINE_DIRECTION_OUT)
 		ret = bd70528_gpio_get_o(bdgpio, offset);
-	else if (ret == 1)
+	else if (ret == GPIO_LINE_DIRECTION_IN)
 		ret = bd70528_gpio_get_i(bdgpio, offset);
 	else
 		dev_err(bdgpio->chip.dev, "failed to read GPIO direction\n");
@@ -230,3 +232,4 @@ module_platform_driver(bd70528_gpio);
 MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
 MODULE_DESCRIPTION("BD70528 voltage regulator driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:bd70528-gpio");
diff --git a/drivers/gpio/gpio-bd9571mwv.c b/drivers/gpio/gpio-bd9571mwv.c
index 5224a946e8ab..c0abc9c6851b 100644
--- a/drivers/gpio/gpio-bd9571mwv.c
+++ b/drivers/gpio/gpio-bd9571mwv.c
@@ -37,8 +37,10 @@ static int bd9571mwv_gpio_get_direction(struct gpio_chip *chip,
 	ret = regmap_read(gpio->bd->regmap, BD9571MWV_GPIO_DIR, &val);
 	if (ret < 0)
 		return ret;
+	if (val & BIT(offset))
+		return GPIO_LINE_DIRECTION_IN;
 
-	return val & BIT(offset);
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int bd9571mwv_gpio_direction_input(struct gpio_chip *chip,
diff --git a/drivers/gpio/gpio-creg-snps.c b/drivers/gpio/gpio-creg-snps.c
index ff19a8ad5663..1d0827e79703 100644
--- a/drivers/gpio/gpio-creg-snps.c
+++ b/drivers/gpio/gpio-creg-snps.c
@@ -64,11 +64,11 @@ static int creg_gpio_validate_pg(struct device *dev, struct creg_gpio *hcg,
 	if (layout->bit_per_gpio[i] < 1 || layout->bit_per_gpio[i] > 8)
 		return -EINVAL;
 
-	/* Check that on valiue fits it's placeholder */
+	/* Check that on value fits its placeholder */
 	if (GENMASK(31, layout->bit_per_gpio[i]) & layout->on[i])
 		return -EINVAL;
 
-	/* Check that off valiue fits it's placeholder */
+	/* Check that off value fits its placeholder */
 	if (GENMASK(31, layout->bit_per_gpio[i]) & layout->off[i])
 		return -EINVAL;
 
diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c
index 8a33c2fc174d..26b40c8b8a12 100644
--- a/drivers/gpio/gpio-dln2.c
+++ b/drivers/gpio/gpio-dln2.c
@@ -200,9 +200,9 @@ static int dln2_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 	struct dln2_gpio *dln2 = gpiochip_get_data(chip);
 
 	if (test_bit(offset, dln2->output_enabled))
-		return 0;
+		return GPIO_LINE_DIRECTION_OUT;
 
-	return 1;
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int dln2_gpio_get(struct gpio_chip *chip, unsigned int offset)
@@ -214,7 +214,7 @@ static int dln2_gpio_get(struct gpio_chip *chip, unsigned int offset)
 	if (dir < 0)
 		return dir;
 
-	if (dir == 1)
+	if (dir == GPIO_LINE_DIRECTION_IN)
 		return dln2_gpio_pin_get_in_val(dln2, offset);
 
 	return dln2_gpio_pin_get_out_val(dln2, offset);
diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c
index 620f25b7efb4..17a243c528ad 100644
--- a/drivers/gpio/gpio-em.c
+++ b/drivers/gpio/gpio-em.c
@@ -269,13 +269,12 @@ static void em_gio_irq_domain_remove(void *data)
 static int em_gio_probe(struct platform_device *pdev)
 {
 	struct em_gio_priv *p;
-	struct resource *io[2], *irq[2];
 	struct gpio_chip *gpio_chip;
 	struct irq_chip *irq_chip;
 	struct device *dev = &pdev->dev;
 	const char *name = dev_name(dev);
 	unsigned int ngpios;
-	int ret;
+	int irq[2], ret;
 
 	p = devm_kzalloc(dev, sizeof(*p), GFP_KERNEL);
 	if (!p)
@@ -285,25 +284,21 @@ static int em_gio_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, p);
 	spin_lock_init(&p->sense_lock);
 
-	io[0] = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	io[1] = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	irq[0] = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	irq[1] = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
+	irq[0] = platform_get_irq(pdev, 0);
+	if (irq[0] < 0)
+		return irq[0];
 
-	if (!io[0] || !io[1] || !irq[0] || !irq[1]) {
-		dev_err(dev, "missing IRQ or IOMEM\n");
-		return -EINVAL;
-	}
+	irq[1] = platform_get_irq(pdev, 1);
+	if (irq[1] < 0)
+		return irq[1];
 
-	p->base0 = devm_ioremap_nocache(dev, io[0]->start,
-					resource_size(io[0]));
-	if (!p->base0)
-		return -ENOMEM;
+	p->base0 = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(p->base0))
+		return PTR_ERR(p->base0);
 
-	p->base1 = devm_ioremap_nocache(dev, io[1]->start,
-				   resource_size(io[1]));
-	if (!p->base1)
-		return -ENOMEM;
+	p->base1 = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(p->base1))
+		return PTR_ERR(p->base1);
 
 	if (of_property_read_u32(dev->of_node, "ngpios", &ngpios)) {
 		dev_err(dev, "Missing ngpios OF property\n");
@@ -326,7 +321,7 @@ static int em_gio_probe(struct platform_device *pdev)
 	gpio_chip->ngpio = ngpios;
 
 	irq_chip = &p->irq_chip;
-	irq_chip->name = name;
+	irq_chip->name = "gpio-em";
 	irq_chip->irq_mask = em_gio_irq_disable;
 	irq_chip->irq_unmask = em_gio_irq_enable;
 	irq_chip->irq_set_type = em_gio_irq_set_type;
@@ -346,14 +341,12 @@ static int em_gio_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	if (devm_request_irq(dev, irq[0]->start,
-			     em_gio_irq_handler, 0, name, p)) {
+	if (devm_request_irq(dev, irq[0], em_gio_irq_handler, 0, name, p)) {
 		dev_err(dev, "failed to request low IRQ\n");
 		return -ENOENT;
 	}
 
-	if (devm_request_irq(dev, irq[1]->start,
-			     em_gio_irq_handler, 0, name, p)) {
+	if (devm_request_irq(dev, irq[1], em_gio_irq_handler, 0, name, p)) {
 		dev_err(dev, "failed to request high IRQ\n");
 		return -ENOENT;
 	}
diff --git a/drivers/gpio/gpio-exar.c b/drivers/gpio/gpio-exar.c
index fae327d5b06e..da1ef0b1c291 100644
--- a/drivers/gpio/gpio-exar.c
+++ b/drivers/gpio/gpio-exar.c
@@ -77,7 +77,10 @@ static int exar_get_direction(struct gpio_chip *chip, unsigned int offset)
 		EXAR_OFFSET_MPIOSEL_HI : EXAR_OFFSET_MPIOSEL_LO;
 	unsigned int bit  = (offset + exar_gpio->first_pin) % 8;
 
-	return !!(exar_get(chip, addr) & BIT(bit));
+	if (exar_get(chip, addr) & BIT(bit))
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int exar_get_value(struct gpio_chip *chip, unsigned int offset)
diff --git a/drivers/gpio/gpio-f7188x.c b/drivers/gpio/gpio-f7188x.c
index fdc639f856f1..cadd02993539 100644
--- a/drivers/gpio/gpio-f7188x.c
+++ b/drivers/gpio/gpio-f7188x.c
@@ -250,7 +250,10 @@ static int f7188x_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 
 	superio_exit(sio->addr);
 
-	return !(dir & 1 << offset);
+	if (dir & 1 << offset)
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int f7188x_gpio_direction_in(struct gpio_chip *chip, unsigned offset)
diff --git a/drivers/gpio/gpio-gpio-mm.c b/drivers/gpio/gpio-gpio-mm.c
index 78a1db24e931..b89b8c5ff1f5 100644
--- a/drivers/gpio/gpio-gpio-mm.c
+++ b/drivers/gpio/gpio-gpio-mm.c
@@ -52,7 +52,10 @@ static int gpiomm_gpio_get_direction(struct gpio_chip *chip,
 	const unsigned int port = offset / 8;
 	const unsigned int mask = BIT(offset % 8);
 
-	return !!(gpiommgpio->io_state[port] & mask);
+	if (gpiommgpio->io_state[port] & mask)
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int gpiomm_gpio_direction_input(struct gpio_chip *chip,
@@ -164,46 +167,25 @@ static int gpiomm_gpio_get(struct gpio_chip *chip, unsigned int offset)
 	return !!(port_state & mask);
 }
 
+static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
+
 static int gpiomm_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
 	unsigned long *bits)
 {
 	struct gpiomm_gpio *const gpiommgpio = gpiochip_get_data(chip);
-	size_t i;
-	static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
-	const unsigned int gpio_reg_size = 8;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+	unsigned long offset;
+	unsigned long gpio_mask;
+	unsigned int port_addr;
 	unsigned long port_state;
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports); i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
-
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		port_addr = gpiommgpio->base + ports[offset / 8];
+		port_state = inb(port_addr) & gpio_mask;
 
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
-
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
-
-		/* read bits from current gpio port */
-		port_state = inb(gpiommgpio->base + ports[i]);
-
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
@@ -234,37 +216,27 @@ static void gpiomm_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct gpiomm_gpio *const gpiommgpio = gpiochip_get_data(chip);
-	unsigned int i;
-	const unsigned int gpio_reg_size = 8;
-	unsigned int port;
-	unsigned int out_port;
-	unsigned int bitmask;
+	unsigned long offset;
+	unsigned long gpio_mask;
+	size_t index;
+	unsigned int port_addr;
+	unsigned long bitmask;
 	unsigned long flags;
 
-	/* set bits are evaluated a gpio register size at a time */
-	for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
-		/* no more set bits in this mask word; skip to the next word */
-		if (!mask[BIT_WORD(i)]) {
-			i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
-			continue;
-		}
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		index = offset / 8;
+		port_addr = gpiommgpio->base + ports[index];
 
-		port = i / gpio_reg_size;
-		out_port = (port > 2) ? port + 1 : port;
-		bitmask = mask[BIT_WORD(i)] & bits[BIT_WORD(i)];
+		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
 
 		spin_lock_irqsave(&gpiommgpio->lock, flags);
 
 		/* update output state data and set device gpio register */
-		gpiommgpio->out_state[port] &= ~mask[BIT_WORD(i)];
-		gpiommgpio->out_state[port] |= bitmask;
-		outb(gpiommgpio->out_state[port], gpiommgpio->base + out_port);
+		gpiommgpio->out_state[index] &= ~gpio_mask;
+		gpiommgpio->out_state[index] |= bitmask;
+		outb(gpiommgpio->out_state[index], port_addr);
 
 		spin_unlock_irqrestore(&gpiommgpio->lock, flags);
-
-		/* prepare for next gpio register set */
-		mask[BIT_WORD(i)] >>= gpio_reg_size;
-		bits[BIT_WORD(i)] >>= gpio_reg_size;
 	}
 }
 
diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c
index 08234e64993a..f954359c9544 100644
--- a/drivers/gpio/gpio-grgpio.c
+++ b/drivers/gpio/gpio-grgpio.c
@@ -253,17 +253,16 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq,
 	lirq->irq = irq;
 	uirq = &priv->uirqs[lirq->index];
 	if (uirq->refcnt == 0) {
+		spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
 		ret = request_irq(uirq->uirq, grgpio_irq_handler, 0,
 				  dev_name(priv->dev), priv);
 		if (ret) {
 			dev_err(priv->dev,
 				"Could not request underlying irq %d\n",
 				uirq->uirq);
-
-			spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
-
 			return ret;
 		}
+		spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
 	}
 	uirq->refcnt++;
 
@@ -309,8 +308,11 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq)
 	if (index >= 0) {
 		uirq = &priv->uirqs[lirq->index];
 		uirq->refcnt--;
-		if (uirq->refcnt == 0)
+		if (uirq->refcnt == 0) {
+			spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
 			free_irq(uirq->uirq, priv);
+			return;
+		}
 	}
 
 	spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
@@ -433,12 +435,9 @@ static int grgpio_probe(struct platform_device *ofdev)
 static int grgpio_remove(struct platform_device *ofdev)
 {
 	struct grgpio_priv *priv = platform_get_drvdata(ofdev);
-	unsigned long flags;
 	int i;
 	int ret = 0;
 
-	spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
-
 	if (priv->domain) {
 		for (i = 0; i < GRGPIO_MAX_NGPIO; i++) {
 			if (priv->uirqs[i].refcnt != 0) {
@@ -454,8 +453,6 @@ static int grgpio_remove(struct platform_device *ofdev)
 		irq_domain_remove(priv->domain);
 
 out:
-	spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
-
 	return ret;
 }
 
diff --git a/drivers/gpio/gpio-htc-egpio.c b/drivers/gpio/gpio-htc-egpio.c
index 6eb56f7ab9c9..a40bd56673fe 100644
--- a/drivers/gpio/gpio-htc-egpio.c
+++ b/drivers/gpio/gpio-htc-egpio.c
@@ -220,7 +220,10 @@ static int egpio_get_direction(struct gpio_chip *chip, unsigned offset)
 
 	egpio = gpiochip_get_data(chip);
 
-	return !test_bit(offset, &egpio->is_out);
+	if (test_bit(offset, &egpio->is_out))
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static void egpio_write_cache(struct egpio_info *ei)
@@ -265,7 +268,6 @@ static int __init egpio_probe(struct platform_device *pdev)
 	struct gpio_chip  *chip;
 	unsigned int      irq, irq_end;
 	int               i;
-	int               ret;
 
 	/* Initialize ei data structure. */
 	ei = devm_kzalloc(&pdev->dev, sizeof(*ei), GFP_KERNEL);
@@ -275,28 +277,24 @@ static int __init egpio_probe(struct platform_device *pdev)
 	spin_lock_init(&ei->lock);
 
 	/* Find chained irq */
-	ret = -EINVAL;
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (res)
 		ei->chained_irq = res->start;
 
 	/* Map egpio chip into virtual address space. */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		goto fail;
-	ei->base_addr = devm_ioremap_nocache(&pdev->dev, res->start,
-					     resource_size(res));
-	if (!ei->base_addr)
-		goto fail;
-	pr_debug("EGPIO phys=%08x virt=%p\n", (u32)res->start, ei->base_addr);
+	ei->base_addr = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(ei->base_addr))
+		return PTR_ERR(ei->base_addr);
 
 	if ((pdata->bus_width != 16) && (pdata->bus_width != 32))
-		goto fail;
+		return -EINVAL;
+
 	ei->bus_shift = fls(pdata->bus_width - 1) - 3;
 	pr_debug("bus_shift = %d\n", ei->bus_shift);
 
 	if ((pdata->reg_width != 8) && (pdata->reg_width != 16))
-		goto fail;
+		return -EINVAL;
+
 	ei->reg_shift = fls(pdata->reg_width - 1);
 	pr_debug("reg_shift = %d\n", ei->reg_shift);
 
@@ -308,10 +306,9 @@ static int __init egpio_probe(struct platform_device *pdev)
 	ei->chip = devm_kcalloc(&pdev->dev,
 				ei->nchips, sizeof(struct egpio_chip),
 				GFP_KERNEL);
-	if (!ei->chip) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	if (!ei->chip)
+		return -ENOMEM;
+
 	for (i = 0; i < ei->nchips; i++) {
 		ei->chip[i].reg_start = pdata->chip[i].reg_start;
 		ei->chip[i].cached_values = pdata->chip[i].initial_values;
@@ -321,10 +318,9 @@ static int __init egpio_probe(struct platform_device *pdev)
 		chip->label = devm_kasprintf(&pdev->dev, GFP_KERNEL,
 					     "htc-egpio-%d",
 					     i);
-		if (!chip->label) {
-			ret = -ENOMEM;
-			goto fail;
-		}
+		if (!chip->label)
+			return -ENOMEM;
+
 		chip->parent          = &pdev->dev;
 		chip->owner           = THIS_MODULE;
 		chip->get             = egpio_get;
@@ -366,10 +362,6 @@ static int __init egpio_probe(struct platform_device *pdev)
 	}
 
 	return 0;
-
-fail:
-	printk(KERN_ERR "EGPIO failed to setup\n");
-	return ret;
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c
index 90bf7742f9b0..2f086d0aa1f4 100644
--- a/drivers/gpio/gpio-ich.c
+++ b/drivers/gpio/gpio-ich.c
@@ -159,7 +159,10 @@ static bool ichx_gpio_check_available(struct gpio_chip *gpio, unsigned nr)
 
 static int ichx_gpio_get_direction(struct gpio_chip *gpio, unsigned nr)
 {
-	return ichx_read_bit(GPIO_IO_SEL, nr);
+	if (ichx_read_bit(GPIO_IO_SEL, nr))
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int ichx_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
diff --git a/drivers/gpio/gpio-kempld.c b/drivers/gpio/gpio-kempld.c
index ef51638f3f75..4ea15f08e0f4 100644
--- a/drivers/gpio/gpio-kempld.c
+++ b/drivers/gpio/gpio-kempld.c
@@ -104,7 +104,10 @@ static int kempld_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 	struct kempld_gpio_data *gpio = gpiochip_get_data(chip);
 	struct kempld_device_data *pld = gpio->pld;
 
-	return !kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset);
+	if (kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset))
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int kempld_gpio_pincount(struct kempld_device_data *pld)
diff --git a/drivers/gpio/gpio-logicvc.c b/drivers/gpio/gpio-logicvc.c
new file mode 100644
index 000000000000..015632cf159f
--- /dev/null
+++ b/drivers/gpio/gpio-logicvc.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Bootlin
+ * Author: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+ */
+
+#include <linux/err.h>
+#include <linux/gpio/driver.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+
+#define LOGICVC_CTRL_REG		0x40
+#define LOGICVC_CTRL_GPIO_SHIFT		11
+#define LOGICVC_CTRL_GPIO_BITS		5
+
+#define LOGICVC_POWER_CTRL_REG		0x78
+#define LOGICVC_POWER_CTRL_GPIO_SHIFT	0
+#define LOGICVC_POWER_CTRL_GPIO_BITS	4
+
+struct logicvc_gpio {
+	struct gpio_chip chip;
+	struct regmap *regmap;
+};
+
+static void logicvc_gpio_offset(struct logicvc_gpio *logicvc, unsigned offset,
+				unsigned int *reg, unsigned int *bit)
+{
+	if (offset >= LOGICVC_CTRL_GPIO_BITS) {
+		*reg = LOGICVC_POWER_CTRL_REG;
+
+		/* To the (virtual) power ctrl offset. */
+		offset -= LOGICVC_CTRL_GPIO_BITS;
+		/* To the actual bit offset in reg. */
+		offset += LOGICVC_POWER_CTRL_GPIO_SHIFT;
+	} else {
+		*reg = LOGICVC_CTRL_REG;
+
+		/* To the actual bit offset in reg. */
+		offset += LOGICVC_CTRL_GPIO_SHIFT;
+	}
+
+	*bit = BIT(offset);
+}
+
+static int logicvc_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct logicvc_gpio *logicvc = gpiochip_get_data(chip);
+	unsigned int reg, bit, value;
+	int ret;
+
+	logicvc_gpio_offset(logicvc, offset, &reg, &bit);
+
+	ret = regmap_read(logicvc->regmap, reg, &value);
+	if (ret)
+		return ret;
+
+	return !!(value & bit);
+}
+
+static void logicvc_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct logicvc_gpio *logicvc = gpiochip_get_data(chip);
+	unsigned int reg, bit;
+
+	logicvc_gpio_offset(logicvc, offset, &reg, &bit);
+
+	regmap_update_bits(logicvc->regmap, reg, bit, value ? bit : 0);
+}
+
+static int logicvc_gpio_direction_output(struct gpio_chip *chip,
+					 unsigned offset, int value)
+{
+	/* Pins are always configured as output, so just set the value. */
+	logicvc_gpio_set(chip, offset, value);
+
+	return 0;
+}
+
+static struct regmap_config logicvc_gpio_regmap_config = {
+	.reg_bits	= 32,
+	.val_bits	= 32,
+	.reg_stride	= 4,
+	.name		= "logicvc-gpio",
+};
+
+static int logicvc_gpio_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *of_node = dev->of_node;
+	struct logicvc_gpio *logicvc;
+	int ret;
+
+	logicvc = devm_kzalloc(dev, sizeof(*logicvc), GFP_KERNEL);
+	if (!logicvc)
+		return -ENOMEM;
+
+	/* Try to get regmap from parent first. */
+	logicvc->regmap = syscon_node_to_regmap(of_node->parent);
+
+	/* Grab our own regmap if that fails. */
+	if (IS_ERR(logicvc->regmap)) {
+		struct resource res;
+		void __iomem *base;
+
+		ret = of_address_to_resource(of_node, 0, &res);
+		if (ret) {
+			dev_err(dev, "Failed to get resource from address\n");
+			return ret;
+		}
+
+		base = devm_ioremap_resource(dev, &res);
+		if (IS_ERR(base)) {
+			dev_err(dev, "Failed to map I/O base\n");
+			return PTR_ERR(base);
+		}
+
+		logicvc_gpio_regmap_config.max_register = resource_size(&res) -
+			logicvc_gpio_regmap_config.reg_stride;
+
+		logicvc->regmap =
+			devm_regmap_init_mmio(dev, base,
+					      &logicvc_gpio_regmap_config);
+		if (IS_ERR(logicvc->regmap)) {
+			dev_err(dev, "Failed to create regmap for I/O\n");
+			return PTR_ERR(logicvc->regmap);
+		}
+	}
+
+	logicvc->chip.parent = dev;
+	logicvc->chip.owner = THIS_MODULE;
+	logicvc->chip.label = dev_name(dev);
+	logicvc->chip.base = -1;
+	logicvc->chip.ngpio = LOGICVC_CTRL_GPIO_BITS +
+			      LOGICVC_POWER_CTRL_GPIO_BITS;
+	logicvc->chip.get = logicvc_gpio_get;
+	logicvc->chip.set = logicvc_gpio_set;
+	logicvc->chip.direction_output = logicvc_gpio_direction_output;
+
+	platform_set_drvdata(pdev, logicvc);
+
+	return devm_gpiochip_add_data(dev, &logicvc->chip, logicvc);
+}
+
+static const struct of_device_id logicivc_gpio_of_table[] = {
+	{
+		.compatible	= "xylon,logicvc-3.02.a-gpio",
+	},
+	{ }
+};
+
+MODULE_DEVICE_TABLE(of, logicivc_gpio_of_table);
+
+static struct platform_driver logicvc_gpio_driver = {
+	.driver	= {
+		.name		= "gpio-logicvc",
+		.of_match_table	= logicivc_gpio_of_table,
+	},
+	.probe	= logicvc_gpio_probe,
+};
+
+module_platform_driver(logicvc_gpio_driver);
+
+MODULE_AUTHOR("Paul Kocialkowski <paul.kocialkowski@bootlin.com>");
+MODULE_DESCRIPTION("Xylon LogiCVC GPIO driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/gpio-loongson.c b/drivers/gpio/gpio-loongson.c
index 00943170ce36..a42145873cc9 100644
--- a/drivers/gpio/gpio-loongson.c
+++ b/drivers/gpio/gpio-loongson.c
@@ -22,7 +22,7 @@
 #define STLS2F_N_GPIO		4
 #define STLS3A_N_GPIO		16
 
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
 #define LOONGSON_N_GPIO	STLS3A_N_GPIO
 #else
 #define LOONGSON_N_GPIO	STLS2F_N_GPIO
diff --git a/drivers/gpio/gpio-lp873x.c b/drivers/gpio/gpio-lp873x.c
index 801995dd9b26..70fad87ff2db 100644
--- a/drivers/gpio/gpio-lp873x.c
+++ b/drivers/gpio/gpio-lp873x.c
@@ -33,7 +33,7 @@ static int lp873x_gpio_get_direction(struct gpio_chip *chip,
 				     unsigned int offset)
 {
 	/* This device is output only */
-	return 0;
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int lp873x_gpio_direction_input(struct gpio_chip *chip,
diff --git a/drivers/gpio/gpio-lp87565.c b/drivers/gpio/gpio-lp87565.c
index a121c8f10610..e1244520cf7d 100644
--- a/drivers/gpio/gpio-lp87565.c
+++ b/drivers/gpio/gpio-lp87565.c
@@ -57,7 +57,10 @@ static int lp87565_gpio_get_direction(struct gpio_chip *chip,
 	if (ret < 0)
 		return ret;
 
-	return !(val & BIT(offset));
+	if (val & BIT(offset))
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int lp87565_gpio_direction_input(struct gpio_chip *chip,
diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c
deleted file mode 100644
index e9e47c0d5be7..000000000000
--- a/drivers/gpio/gpio-lynxpoint.c
+++ /dev/null
@@ -1,465 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPIO controller driver for Intel Lynxpoint PCH chipset>
- * Copyright (c) 2012, Intel Corporation.
- *
- * Author: Mathias Nyman <mathias.nyman@linux.intel.com>
- */
-
-#include <linux/acpi.h>
-#include <linux/bitops.h>
-#include <linux/gpio/driver.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/pm_runtime.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-
-/* LynxPoint chipset has support for 94 gpio pins */
-
-#define LP_NUM_GPIO	94
-
-/* Bitmapped register offsets */
-#define LP_ACPI_OWNED	0x00 /* Bitmap, set by bios, 0: pin reserved for ACPI */
-#define LP_GC		0x7C /* set APIC IRQ to IRQ14 or IRQ15 for all pins */
-#define LP_INT_STAT	0x80
-#define LP_INT_ENABLE	0x90
-
-/* Each pin has two 32 bit config registers, starting at 0x100 */
-#define LP_CONFIG1	0x100
-#define LP_CONFIG2	0x104
-
-/* LP_CONFIG1 reg bits */
-#define OUT_LVL_BIT	BIT(31)
-#define IN_LVL_BIT	BIT(30)
-#define TRIG_SEL_BIT	BIT(4) /* 0: Edge, 1: Level */
-#define INT_INV_BIT	BIT(3) /* Invert interrupt triggering */
-#define DIR_BIT		BIT(2) /* 0: Output, 1: Input */
-#define USE_SEL_BIT	BIT(0) /* 0: Native, 1: GPIO */
-
-/* LP_CONFIG2 reg bits */
-#define GPINDIS_BIT	BIT(2) /* disable input sensing */
-#define GPIWP_BIT	(BIT(0) | BIT(1)) /* weak pull options */
-
-struct lp_gpio {
-	struct gpio_chip	chip;
-	struct platform_device	*pdev;
-	spinlock_t		lock;
-	unsigned long		reg_base;
-};
-
-/*
- * Lynxpoint gpios are controlled through both bitmapped registers and
- * per gpio specific registers. The bitmapped registers are in chunks of
- * 3 x 32bit registers to cover all 94 gpios
- *
- * per gpio specific registers consist of two 32bit registers per gpio
- * (LP_CONFIG1 and LP_CONFIG2), with 94 gpios there's a total of
- * 188 config registers.
- *
- * A simplified view of the register layout look like this:
- *
- * LP_ACPI_OWNED[31:0] gpio ownerships for gpios 0-31  (bitmapped registers)
- * LP_ACPI_OWNED[63:32] gpio ownerships for gpios 32-63
- * LP_ACPI_OWNED[94:64] gpio ownerships for gpios 63-94
- * ...
- * LP_INT_ENABLE[31:0] ...
- * LP_INT_ENABLE[63:31] ...
- * LP_INT_ENABLE[94:64] ...
- * LP0_CONFIG1 (gpio 0) config1 reg for gpio 0 (per gpio registers)
- * LP0_CONFIG2 (gpio 0) config2 reg for gpio 0
- * LP1_CONFIG1 (gpio 1) config1 reg for gpio 1
- * LP1_CONFIG2 (gpio 1) config2 reg for gpio 1
- * LP2_CONFIG1 (gpio 2) ...
- * LP2_CONFIG2 (gpio 2) ...
- * ...
- * LP94_CONFIG1 (gpio 94) ...
- * LP94_CONFIG2 (gpio 94) ...
- */
-
-static unsigned long lp_gpio_reg(struct gpio_chip *chip, unsigned offset,
-				 int reg)
-{
-	struct lp_gpio *lg = gpiochip_get_data(chip);
-	int reg_offset;
-
-	if (reg == LP_CONFIG1 || reg == LP_CONFIG2)
-		/* per gpio specific config registers */
-		reg_offset = offset * 8;
-	else
-		/* bitmapped registers */
-		reg_offset = (offset / 32) * 4;
-
-	return lg->reg_base + reg + reg_offset;
-}
-
-static int lp_gpio_request(struct gpio_chip *chip, unsigned offset)
-{
-	struct lp_gpio *lg = gpiochip_get_data(chip);
-	unsigned long reg = lp_gpio_reg(chip, offset, LP_CONFIG1);
-	unsigned long conf2 = lp_gpio_reg(chip, offset, LP_CONFIG2);
-	unsigned long acpi_use = lp_gpio_reg(chip, offset, LP_ACPI_OWNED);
-
-	pm_runtime_get(&lg->pdev->dev); /* should we put if failed */
-
-	/* Fail if BIOS reserved pin for ACPI use */
-	if (!(inl(acpi_use) & BIT(offset % 32))) {
-		dev_err(&lg->pdev->dev, "gpio %d reserved for ACPI\n", offset);
-		return -EBUSY;
-	}
-	/* Fail if pin is in alternate function mode (not GPIO mode) */
-	if (!(inl(reg) & USE_SEL_BIT))
-		return -ENODEV;
-
-	/* enable input sensing */
-	outl(inl(conf2) & ~GPINDIS_BIT, conf2);
-
-
-	return 0;
-}
-
-static void lp_gpio_free(struct gpio_chip *chip, unsigned offset)
-{
-	struct lp_gpio *lg = gpiochip_get_data(chip);
-	unsigned long conf2 = lp_gpio_reg(chip, offset, LP_CONFIG2);
-
-	/* disable input sensing */
-	outl(inl(conf2) | GPINDIS_BIT, conf2);
-
-	pm_runtime_put(&lg->pdev->dev);
-}
-
-static int lp_irq_type(struct irq_data *d, unsigned type)
-{
-	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
-	struct lp_gpio *lg = gpiochip_get_data(gc);
-	u32 hwirq = irqd_to_hwirq(d);
-	unsigned long flags;
-	u32 value;
-	unsigned long reg = lp_gpio_reg(&lg->chip, hwirq, LP_CONFIG1);
-
-	if (hwirq >= lg->chip.ngpio)
-		return -EINVAL;
-
-	spin_lock_irqsave(&lg->lock, flags);
-	value = inl(reg);
-
-	/* set both TRIG_SEL and INV bits to 0 for rising edge */
-	if (type & IRQ_TYPE_EDGE_RISING)
-		value &= ~(TRIG_SEL_BIT | INT_INV_BIT);
-
-	/* TRIG_SEL bit 0, INV bit 1 for falling edge */
-	if (type & IRQ_TYPE_EDGE_FALLING)
-		value = (value | INT_INV_BIT) & ~TRIG_SEL_BIT;
-
-	/* TRIG_SEL bit 1, INV bit 0 for level low */
-	if (type & IRQ_TYPE_LEVEL_LOW)
-		value = (value | TRIG_SEL_BIT) & ~INT_INV_BIT;
-
-	/* TRIG_SEL bit 1, INV bit 1 for level high */
-	if (type & IRQ_TYPE_LEVEL_HIGH)
-		value |= TRIG_SEL_BIT | INT_INV_BIT;
-
-	outl(value, reg);
-	spin_unlock_irqrestore(&lg->lock, flags);
-
-	return 0;
-}
-
-static int lp_gpio_get(struct gpio_chip *chip, unsigned offset)
-{
-	unsigned long reg = lp_gpio_reg(chip, offset, LP_CONFIG1);
-	return !!(inl(reg) & IN_LVL_BIT);
-}
-
-static void lp_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
-{
-	struct lp_gpio *lg = gpiochip_get_data(chip);
-	unsigned long reg = lp_gpio_reg(chip, offset, LP_CONFIG1);
-	unsigned long flags;
-
-	spin_lock_irqsave(&lg->lock, flags);
-
-	if (value)
-		outl(inl(reg) | OUT_LVL_BIT, reg);
-	else
-		outl(inl(reg) & ~OUT_LVL_BIT, reg);
-
-	spin_unlock_irqrestore(&lg->lock, flags);
-}
-
-static int lp_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-	struct lp_gpio *lg = gpiochip_get_data(chip);
-	unsigned long reg = lp_gpio_reg(chip, offset, LP_CONFIG1);
-	unsigned long flags;
-
-	spin_lock_irqsave(&lg->lock, flags);
-	outl(inl(reg) | DIR_BIT, reg);
-	spin_unlock_irqrestore(&lg->lock, flags);
-
-	return 0;
-}
-
-static int lp_gpio_direction_output(struct gpio_chip *chip,
-				      unsigned offset, int value)
-{
-	struct lp_gpio *lg = gpiochip_get_data(chip);
-	unsigned long reg = lp_gpio_reg(chip, offset, LP_CONFIG1);
-	unsigned long flags;
-
-	lp_gpio_set(chip, offset, value);
-
-	spin_lock_irqsave(&lg->lock, flags);
-	outl(inl(reg) & ~DIR_BIT, reg);
-	spin_unlock_irqrestore(&lg->lock, flags);
-
-	return 0;
-}
-
-static void lp_gpio_irq_handler(struct irq_desc *desc)
-{
-	struct irq_data *data = irq_desc_get_irq_data(desc);
-	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
-	struct lp_gpio *lg = gpiochip_get_data(gc);
-	struct irq_chip *chip = irq_data_get_irq_chip(data);
-	unsigned long reg, ena, pending;
-	u32 base, pin;
-
-	/* check from GPIO controller which pin triggered the interrupt */
-	for (base = 0; base < lg->chip.ngpio; base += 32) {
-		reg = lp_gpio_reg(&lg->chip, base, LP_INT_STAT);
-		ena = lp_gpio_reg(&lg->chip, base, LP_INT_ENABLE);
-
-		/* Only interrupts that are enabled */
-		pending = inl(reg) & inl(ena);
-
-		for_each_set_bit(pin, &pending, 32) {
-			unsigned irq;
-
-			/* Clear before handling so we don't lose an edge */
-			outl(BIT(pin), reg);
-
-			irq = irq_find_mapping(lg->chip.irq.domain, base + pin);
-			generic_handle_irq(irq);
-		}
-	}
-	chip->irq_eoi(data);
-}
-
-static void lp_irq_unmask(struct irq_data *d)
-{
-}
-
-static void lp_irq_mask(struct irq_data *d)
-{
-}
-
-static void lp_irq_enable(struct irq_data *d)
-{
-	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
-	struct lp_gpio *lg = gpiochip_get_data(gc);
-	u32 hwirq = irqd_to_hwirq(d);
-	unsigned long reg = lp_gpio_reg(&lg->chip, hwirq, LP_INT_ENABLE);
-	unsigned long flags;
-
-	spin_lock_irqsave(&lg->lock, flags);
-	outl(inl(reg) | BIT(hwirq % 32), reg);
-	spin_unlock_irqrestore(&lg->lock, flags);
-}
-
-static void lp_irq_disable(struct irq_data *d)
-{
-	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
-	struct lp_gpio *lg = gpiochip_get_data(gc);
-	u32 hwirq = irqd_to_hwirq(d);
-	unsigned long reg = lp_gpio_reg(&lg->chip, hwirq, LP_INT_ENABLE);
-	unsigned long flags;
-
-	spin_lock_irqsave(&lg->lock, flags);
-	outl(inl(reg) & ~BIT(hwirq % 32), reg);
-	spin_unlock_irqrestore(&lg->lock, flags);
-}
-
-static struct irq_chip lp_irqchip = {
-	.name = "LP-GPIO",
-	.irq_mask = lp_irq_mask,
-	.irq_unmask = lp_irq_unmask,
-	.irq_enable = lp_irq_enable,
-	.irq_disable = lp_irq_disable,
-	.irq_set_type = lp_irq_type,
-	.flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
-static int lp_gpio_irq_init_hw(struct gpio_chip *chip)
-{
-	struct lp_gpio *lg = gpiochip_get_data(chip);
-	unsigned long reg;
-	unsigned base;
-
-	for (base = 0; base < lg->chip.ngpio; base += 32) {
-		/* disable gpio pin interrupts */
-		reg = lp_gpio_reg(&lg->chip, base, LP_INT_ENABLE);
-		outl(0, reg);
-		/* Clear interrupt status register */
-		reg = lp_gpio_reg(&lg->chip, base, LP_INT_STAT);
-		outl(0xffffffff, reg);
-	}
-
-	return 0;
-}
-
-static int lp_gpio_probe(struct platform_device *pdev)
-{
-	struct lp_gpio *lg;
-	struct gpio_chip *gc;
-	struct resource *io_rc, *irq_rc;
-	struct device *dev = &pdev->dev;
-	unsigned long reg_len;
-	int ret = -ENODEV;
-
-	lg = devm_kzalloc(dev, sizeof(struct lp_gpio), GFP_KERNEL);
-	if (!lg)
-		return -ENOMEM;
-
-	lg->pdev = pdev;
-	platform_set_drvdata(pdev, lg);
-
-	io_rc = platform_get_resource(pdev, IORESOURCE_IO, 0);
-	irq_rc = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-
-	if (!io_rc) {
-		dev_err(dev, "missing IO resources\n");
-		return -EINVAL;
-	}
-
-	lg->reg_base = io_rc->start;
-	reg_len = resource_size(io_rc);
-
-	if (!devm_request_region(dev, lg->reg_base, reg_len, "lp-gpio")) {
-		dev_err(dev, "failed requesting IO region 0x%x\n",
-			(unsigned int)lg->reg_base);
-		return -EBUSY;
-	}
-
-	spin_lock_init(&lg->lock);
-
-	gc = &lg->chip;
-	gc->label = dev_name(dev);
-	gc->owner = THIS_MODULE;
-	gc->request = lp_gpio_request;
-	gc->free = lp_gpio_free;
-	gc->direction_input = lp_gpio_direction_input;
-	gc->direction_output = lp_gpio_direction_output;
-	gc->get = lp_gpio_get;
-	gc->set = lp_gpio_set;
-	gc->base = -1;
-	gc->ngpio = LP_NUM_GPIO;
-	gc->can_sleep = false;
-	gc->parent = dev;
-
-	/* set up interrupts  */
-	if (irq_rc && irq_rc->start) {
-		struct gpio_irq_chip *girq;
-
-		girq = &gc->irq;
-		girq->chip = &lp_irqchip;
-		girq->init_hw = lp_gpio_irq_init_hw;
-		girq->parent_handler = lp_gpio_irq_handler;
-		girq->num_parents = 1;
-		girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
-					     sizeof(*girq->parents),
-					     GFP_KERNEL);
-		if (!girq->parents)
-			return -ENOMEM;
-		girq->parents[0] = (unsigned)irq_rc->start;
-		girq->default_type = IRQ_TYPE_NONE;
-		girq->handler = handle_bad_irq;
-	}
-
-	ret = devm_gpiochip_add_data(dev, gc, lg);
-	if (ret) {
-		dev_err(dev, "failed adding lp-gpio chip\n");
-		return ret;
-	}
-
-	pm_runtime_enable(dev);
-
-	return 0;
-}
-
-static int lp_gpio_runtime_suspend(struct device *dev)
-{
-	return 0;
-}
-
-static int lp_gpio_runtime_resume(struct device *dev)
-{
-	return 0;
-}
-
-static int lp_gpio_resume(struct device *dev)
-{
-	struct lp_gpio *lg = dev_get_drvdata(dev);
-	unsigned long reg;
-	int i;
-
-	/* on some hardware suspend clears input sensing, re-enable it here */
-	for (i = 0; i < lg->chip.ngpio; i++) {
-		if (gpiochip_is_requested(&lg->chip, i) != NULL) {
-			reg = lp_gpio_reg(&lg->chip, i, LP_CONFIG2);
-			outl(inl(reg) & ~GPINDIS_BIT, reg);
-		}
-	}
-	return 0;
-}
-
-static const struct dev_pm_ops lp_gpio_pm_ops = {
-	.runtime_suspend = lp_gpio_runtime_suspend,
-	.runtime_resume = lp_gpio_runtime_resume,
-	.resume = lp_gpio_resume,
-};
-
-static const struct acpi_device_id lynxpoint_gpio_acpi_match[] = {
-	{ "INT33C7", 0 },
-	{ "INT3437", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(acpi, lynxpoint_gpio_acpi_match);
-
-static int lp_gpio_remove(struct platform_device *pdev)
-{
-	pm_runtime_disable(&pdev->dev);
-	return 0;
-}
-
-static struct platform_driver lp_gpio_driver = {
-	.probe          = lp_gpio_probe,
-	.remove         = lp_gpio_remove,
-	.driver         = {
-		.name   = "lp_gpio",
-		.pm	= &lp_gpio_pm_ops,
-		.acpi_match_table = ACPI_PTR(lynxpoint_gpio_acpi_match),
-	},
-};
-
-static int __init lp_gpio_init(void)
-{
-	return platform_driver_register(&lp_gpio_driver);
-}
-
-static void __exit lp_gpio_exit(void)
-{
-	platform_driver_unregister(&lp_gpio_driver);
-}
-
-subsys_initcall(lp_gpio_init);
-module_exit(lp_gpio_exit);
-
-MODULE_AUTHOR("Mathias Nyman (Intel)");
-MODULE_DESCRIPTION("GPIO interface for Intel Lynxpoint");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:lp_gpio");
diff --git a/drivers/gpio/gpio-madera.c b/drivers/gpio/gpio-madera.c
index 7086f8b5388f..8f38303fcbc4 100644
--- a/drivers/gpio/gpio-madera.c
+++ b/drivers/gpio/gpio-madera.c
@@ -34,7 +34,10 @@ static int madera_gpio_get_direction(struct gpio_chip *chip,
 	if (ret < 0)
 		return ret;
 
-	return !!(val & MADERA_GP1_DIR_MASK);
+	if (val & MADERA_GP1_DIR_MASK)
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int madera_gpio_direction_in(struct gpio_chip *chip, unsigned int offset)
diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c
index 4b4b2ceb82fc..310d1a248cae 100644
--- a/drivers/gpio/gpio-max3191x.c
+++ b/drivers/gpio/gpio-max3191x.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/bitmap.h>
+#include <linux/bitops.h>
 #include <linux/crc8.h>
 #include <linux/gpio/consumer.h>
 #include <linux/gpio/driver.h>
@@ -94,7 +95,7 @@ DECLARE_CRC8_TABLE(max3191x_crc8);
 
 static int max3191x_get_direction(struct gpio_chip *gpio, unsigned int offset)
 {
-	return 1; /* always in */
+	return GPIO_LINE_DIRECTION_IN; /* always in */
 }
 
 static int max3191x_direction_input(struct gpio_chip *gpio, unsigned int offset)
@@ -232,16 +233,20 @@ static int max3191x_get_multiple(struct gpio_chip *gpio, unsigned long *mask,
 				 unsigned long *bits)
 {
 	struct max3191x_chip *max3191x = gpiochip_get_data(gpio);
-	int ret, bit = 0, wordlen = max3191x_wordlen(max3191x);
+	const unsigned int wordlen = max3191x_wordlen(max3191x);
+	int ret;
+	unsigned long bit;
+	unsigned long gpio_mask;
+	unsigned long in;
 
 	mutex_lock(&max3191x->lock);
 	ret = max3191x_readout_locked(max3191x);
 	if (ret)
 		goto out_unlock;
 
-	while ((bit = find_next_bit(mask, gpio->ngpio, bit)) != gpio->ngpio) {
+	bitmap_zero(bits, gpio->ngpio);
+	for_each_set_clump8(bit, gpio_mask, mask, gpio->ngpio) {
 		unsigned int chipnum = bit / MAX3191X_NGPIO;
-		unsigned long in, shift, index;
 
 		if (max3191x_chip_is_faulting(max3191x, chipnum)) {
 			ret = -EIO;
@@ -249,12 +254,8 @@ static int max3191x_get_multiple(struct gpio_chip *gpio, unsigned long *mask,
 		}
 
 		in = ((u8 *)max3191x->xfer.rx_buf)[chipnum * wordlen];
-		shift = round_down(bit % BITS_PER_LONG, MAX3191X_NGPIO);
-		index = bit / BITS_PER_LONG;
-		bits[index] &= ~(mask[index] & (0xff << shift));
-		bits[index] |= mask[index] & (in << shift); /* copy bits */
-
-		bit = (chipnum + 1) * MAX3191X_NGPIO; /* go to next chip */
+		in &= gpio_mask;
+		bitmap_set_value8(bits, in, bit);
 	}
 
 out_unlock:
diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c
index 642c6321c22a..313bd02dd893 100644
--- a/drivers/gpio/gpio-max77620.c
+++ b/drivers/gpio/gpio-max77620.c
@@ -18,109 +18,115 @@ struct max77620_gpio {
 	struct gpio_chip	gpio_chip;
 	struct regmap		*rmap;
 	struct device		*dev;
+	struct mutex		buslock; /* irq_bus_lock */
+	unsigned int		irq_type[8];
+	bool			irq_enabled[8];
 };
 
-static const struct regmap_irq max77620_gpio_irqs[] = {
-	[0] = {
-		.reg_offset = 0,
-		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE0,
-		.type = {
-			.type_rising_val = MAX77620_CNFG_GPIO_INT_RISING,
-			.type_falling_val = MAX77620_CNFG_GPIO_INT_FALLING,
-			.type_reg_mask = MAX77620_CNFG_GPIO_INT_MASK,
-			.type_reg_offset = 0,
-			.types_supported = IRQ_TYPE_EDGE_BOTH,
-		},
-	},
-	[1] = {
-		.reg_offset = 0,
-		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE1,
-		.type = {
-			.type_rising_val = MAX77620_CNFG_GPIO_INT_RISING,
-			.type_falling_val = MAX77620_CNFG_GPIO_INT_FALLING,
-			.type_reg_mask = MAX77620_CNFG_GPIO_INT_MASK,
-			.type_reg_offset = 1,
-			.types_supported = IRQ_TYPE_EDGE_BOTH,
-		},
-	},
-	[2] = {
-		.reg_offset = 0,
-		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE2,
-		.type = {
-			.type_rising_val = MAX77620_CNFG_GPIO_INT_RISING,
-			.type_falling_val = MAX77620_CNFG_GPIO_INT_FALLING,
-			.type_reg_mask = MAX77620_CNFG_GPIO_INT_MASK,
-			.type_reg_offset = 2,
-			.types_supported = IRQ_TYPE_EDGE_BOTH,
-		},
-	},
-	[3] = {
-		.reg_offset = 0,
-		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE3,
-		.type = {
-			.type_rising_val = MAX77620_CNFG_GPIO_INT_RISING,
-			.type_falling_val = MAX77620_CNFG_GPIO_INT_FALLING,
-			.type_reg_mask = MAX77620_CNFG_GPIO_INT_MASK,
-			.type_reg_offset = 3,
-			.types_supported = IRQ_TYPE_EDGE_BOTH,
-		},
-	},
-	[4] = {
-		.reg_offset = 0,
-		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE4,
-		.type = {
-			.type_rising_val = MAX77620_CNFG_GPIO_INT_RISING,
-			.type_falling_val = MAX77620_CNFG_GPIO_INT_FALLING,
-			.type_reg_mask = MAX77620_CNFG_GPIO_INT_MASK,
-			.type_reg_offset = 4,
-			.types_supported = IRQ_TYPE_EDGE_BOTH,
-		},
-	},
-	[5] = {
-		.reg_offset = 0,
-		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE5,
-		.type = {
-			.type_rising_val = MAX77620_CNFG_GPIO_INT_RISING,
-			.type_falling_val = MAX77620_CNFG_GPIO_INT_FALLING,
-			.type_reg_mask = MAX77620_CNFG_GPIO_INT_MASK,
-			.type_reg_offset = 5,
-			.types_supported = IRQ_TYPE_EDGE_BOTH,
-		},
-	},
-	[6] = {
-		.reg_offset = 0,
-		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE6,
-		.type = {
-			.type_rising_val = MAX77620_CNFG_GPIO_INT_RISING,
-			.type_falling_val = MAX77620_CNFG_GPIO_INT_FALLING,
-			.type_reg_mask = MAX77620_CNFG_GPIO_INT_MASK,
-			.type_reg_offset = 6,
-			.types_supported = IRQ_TYPE_EDGE_BOTH,
-		},
-	},
-	[7] = {
-		.reg_offset = 0,
-		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE7,
-		.type = {
-			.type_rising_val = MAX77620_CNFG_GPIO_INT_RISING,
-			.type_falling_val = MAX77620_CNFG_GPIO_INT_FALLING,
-			.type_reg_mask = MAX77620_CNFG_GPIO_INT_MASK,
-			.type_reg_offset = 7,
-			.types_supported = IRQ_TYPE_EDGE_BOTH,
-		},
-	},
-};
+static irqreturn_t max77620_gpio_irqhandler(int irq, void *data)
+{
+	struct max77620_gpio *gpio = data;
+	unsigned int value, offset;
+	unsigned long pending;
+	int err;
+
+	err = regmap_read(gpio->rmap, MAX77620_REG_IRQ_LVL2_GPIO, &value);
+	if (err < 0) {
+		dev_err(gpio->dev, "REG_IRQ_LVL2_GPIO read failed: %d\n", err);
+		return IRQ_NONE;
+	}
+
+	pending = value;
+
+	for_each_set_bit(offset, &pending, 8) {
+		unsigned int virq;
+
+		virq = irq_find_mapping(gpio->gpio_chip.irq.domain, offset);
+		handle_nested_irq(virq);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void max77620_gpio_irq_mask(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct max77620_gpio *gpio = gpiochip_get_data(chip);
+
+	gpio->irq_enabled[data->hwirq] = false;
+}
 
-static const struct regmap_irq_chip max77620_gpio_irq_chip = {
-	.name = "max77620-gpio",
-	.irqs = max77620_gpio_irqs,
-	.num_irqs = ARRAY_SIZE(max77620_gpio_irqs),
-	.num_regs = 1,
-	.num_type_reg = 8,
-	.irq_reg_stride = 1,
-	.type_reg_stride = 1,
-	.status_base = MAX77620_REG_IRQ_LVL2_GPIO,
-	.type_base = MAX77620_REG_GPIO0,
+static void max77620_gpio_irq_unmask(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct max77620_gpio *gpio = gpiochip_get_data(chip);
+
+	gpio->irq_enabled[data->hwirq] = true;
+}
+
+static int max77620_gpio_set_irq_type(struct irq_data *data, unsigned int type)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct max77620_gpio *gpio = gpiochip_get_data(chip);
+	unsigned int irq_type;
+
+	switch (type) {
+	case IRQ_TYPE_EDGE_RISING:
+		irq_type = MAX77620_CNFG_GPIO_INT_RISING;
+		break;
+
+	case IRQ_TYPE_EDGE_FALLING:
+		irq_type = MAX77620_CNFG_GPIO_INT_FALLING;
+		break;
+
+	case IRQ_TYPE_EDGE_BOTH:
+		irq_type = MAX77620_CNFG_GPIO_INT_RISING |
+			   MAX77620_CNFG_GPIO_INT_FALLING;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	gpio->irq_type[data->hwirq] = irq_type;
+
+	return 0;
+}
+
+static void max77620_gpio_bus_lock(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct max77620_gpio *gpio = gpiochip_get_data(chip);
+
+	mutex_lock(&gpio->buslock);
+}
+
+static void max77620_gpio_bus_sync_unlock(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct max77620_gpio *gpio = gpiochip_get_data(chip);
+	unsigned int value, offset = data->hwirq;
+	int err;
+
+	value = gpio->irq_enabled[offset] ? gpio->irq_type[offset] : 0;
+
+	err = regmap_update_bits(gpio->rmap, GPIO_REG_ADDR(offset),
+				 MAX77620_CNFG_GPIO_INT_MASK, value);
+	if (err < 0)
+		dev_err(chip->parent, "failed to update interrupt mask: %d\n",
+			err);
+
+	mutex_unlock(&gpio->buslock);
+}
+
+static struct irq_chip max77620_gpio_irqchip = {
+	.name		= "max77620-gpio",
+	.irq_mask	= max77620_gpio_irq_mask,
+	.irq_unmask	= max77620_gpio_irq_unmask,
+	.irq_set_type	= max77620_gpio_set_irq_type,
+	.irq_bus_lock	= max77620_gpio_bus_lock,
+	.irq_bus_sync_unlock = max77620_gpio_bus_sync_unlock,
+	.flags		= IRQCHIP_MASK_ON_SUSPEND,
 };
 
 static int max77620_gpio_dir_input(struct gpio_chip *gc, unsigned int offset)
@@ -254,14 +260,6 @@ static int max77620_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
 	return -ENOTSUPP;
 }
 
-static int max77620_gpio_to_irq(struct gpio_chip *gc, unsigned int offset)
-{
-	struct max77620_gpio *mgpio = gpiochip_get_data(gc);
-	struct max77620_chip *chip = dev_get_drvdata(mgpio->dev->parent);
-
-	return regmap_irq_get_virq(chip->gpio_irq_data, offset);
-}
-
 static int max77620_gpio_probe(struct platform_device *pdev)
 {
 	struct max77620_chip *chip =  dev_get_drvdata(pdev->dev.parent);
@@ -287,7 +285,6 @@ static int max77620_gpio_probe(struct platform_device *pdev)
 	mgpio->gpio_chip.direction_output = max77620_gpio_dir_output;
 	mgpio->gpio_chip.set = max77620_gpio_set;
 	mgpio->gpio_chip.set_config = max77620_gpio_set_config;
-	mgpio->gpio_chip.to_irq = max77620_gpio_to_irq;
 	mgpio->gpio_chip.ngpio = MAX77620_GPIO_NR;
 	mgpio->gpio_chip.can_sleep = 1;
 	mgpio->gpio_chip.base = -1;
@@ -303,15 +300,21 @@ static int max77620_gpio_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = devm_regmap_add_irq_chip(&pdev->dev, chip->rmap, gpio_irq,
-				       IRQF_ONESHOT, -1,
-				       &max77620_gpio_irq_chip,
-				       &chip->gpio_irq_data);
+	mutex_init(&mgpio->buslock);
+
+	gpiochip_irqchip_add_nested(&mgpio->gpio_chip, &max77620_gpio_irqchip,
+				    0, handle_edge_irq, IRQ_TYPE_NONE);
+
+	ret = request_threaded_irq(gpio_irq, NULL, max77620_gpio_irqhandler,
+				   IRQF_ONESHOT, "max77620-gpio", mgpio);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to add gpio irq_chip %d\n", ret);
+		dev_err(&pdev->dev, "failed to request IRQ: %d\n", ret);
 		return ret;
 	}
 
+	gpiochip_set_nested_irqchip(&mgpio->gpio_chip, &max77620_gpio_irqchip,
+				    gpio_irq);
+
 	return 0;
 }
 
diff --git a/drivers/gpio/gpio-menz127.c b/drivers/gpio/gpio-menz127.c
index 70fdb42a8e88..1e21c661d79d 100644
--- a/drivers/gpio/gpio-menz127.c
+++ b/drivers/gpio/gpio-menz127.c
@@ -211,3 +211,4 @@ MODULE_AUTHOR("Andreas Werner <andreas.werner@men.de>");
 MODULE_DESCRIPTION("MEN 16z127 GPIO Controller");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("mcb:16z127");
+MODULE_IMPORT_NS(MCB);
diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c
index 3302125e5265..48918a016cd8 100644
--- a/drivers/gpio/gpio-merrifield.c
+++ b/drivers/gpio/gpio-merrifield.c
@@ -162,7 +162,10 @@ static int mrfld_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
 {
 	void __iomem *gpdr = gpio_reg(chip, offset, GPDR);
 
-	return !(readl(gpdr) & BIT(offset % 32));
+	if (readl(gpdr) & BIT(offset % 32))
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int mrfld_gpio_set_debounce(struct gpio_chip *chip, unsigned int offset,
@@ -362,8 +365,9 @@ static void mrfld_irq_handler(struct irq_desc *desc)
 	chained_irq_exit(irqchip, desc);
 }
 
-static void mrfld_irq_init_hw(struct mrfld_gpio *priv)
+static int mrfld_irq_init_hw(struct gpio_chip *chip)
 {
+	struct mrfld_gpio *priv = gpiochip_get_data(chip);
 	void __iomem *reg;
 	unsigned int base;
 
@@ -375,6 +379,8 @@ static void mrfld_irq_init_hw(struct mrfld_gpio *priv)
 		reg = gpio_reg(&priv->chip, base, GFER);
 		writel(0, reg);
 	}
+
+	return 0;
 }
 
 static const char *mrfld_gpio_get_pinctrl_dev_name(struct mrfld_gpio *priv)
@@ -393,14 +399,36 @@ static const char *mrfld_gpio_get_pinctrl_dev_name(struct mrfld_gpio *priv)
 	return name;
 }
 
-static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int mrfld_gpio_add_pin_ranges(struct gpio_chip *chip)
 {
+	struct mrfld_gpio *priv = gpiochip_get_data(chip);
 	const struct mrfld_gpio_pinrange *range;
 	const char *pinctrl_dev_name;
+	unsigned int i;
+	int retval;
+
+	pinctrl_dev_name = mrfld_gpio_get_pinctrl_dev_name(priv);
+	for (i = 0; i < ARRAY_SIZE(mrfld_gpio_ranges); i++) {
+		range = &mrfld_gpio_ranges[i];
+		retval = gpiochip_add_pin_range(&priv->chip, pinctrl_dev_name,
+						range->gpio_base,
+						range->pin_base,
+						range->npins);
+		if (retval) {
+			dev_err(priv->dev, "failed to add GPIO pin range\n");
+			return retval;
+		}
+	}
+
+	return 0;
+}
+
+static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct gpio_irq_chip *girq;
 	struct mrfld_gpio *priv;
 	u32 gpio_base, irq_base;
 	void __iomem *base;
-	unsigned int i;
 	int retval;
 
 	retval = pcim_enable_device(pdev);
@@ -441,42 +469,31 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
 	priv->chip.base = gpio_base;
 	priv->chip.ngpio = MRFLD_NGPIO;
 	priv->chip.can_sleep = false;
+	priv->chip.add_pin_ranges = mrfld_gpio_add_pin_ranges;
 
 	raw_spin_lock_init(&priv->lock);
 
-	pci_set_drvdata(pdev, priv);
+	girq = &priv->chip.irq;
+	girq->chip = &mrfld_irqchip;
+	girq->init_hw = mrfld_irq_init_hw;
+	girq->parent_handler = mrfld_irq_handler;
+	girq->num_parents = 1;
+	girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
+				     sizeof(*girq->parents), GFP_KERNEL);
+	if (!girq->parents)
+		return -ENOMEM;
+	girq->parents[0] = pdev->irq;
+	girq->first = irq_base;
+	girq->default_type = IRQ_TYPE_NONE;
+	girq->handler = handle_bad_irq;
+
 	retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
 	if (retval) {
 		dev_err(&pdev->dev, "gpiochip_add error %d\n", retval);
 		return retval;
 	}
 
-	pinctrl_dev_name = mrfld_gpio_get_pinctrl_dev_name(priv);
-	for (i = 0; i < ARRAY_SIZE(mrfld_gpio_ranges); i++) {
-		range = &mrfld_gpio_ranges[i];
-		retval = gpiochip_add_pin_range(&priv->chip,
-						pinctrl_dev_name,
-						range->gpio_base,
-						range->pin_base,
-						range->npins);
-		if (retval) {
-			dev_err(&pdev->dev, "failed to add GPIO pin range\n");
-			return retval;
-		}
-	}
-
-	retval = gpiochip_irqchip_add(&priv->chip, &mrfld_irqchip, irq_base,
-				      handle_bad_irq, IRQ_TYPE_NONE);
-	if (retval) {
-		dev_err(&pdev->dev, "could not connect irqchip to gpiochip\n");
-		return retval;
-	}
-
-	mrfld_irq_init_hw(priv);
-
-	gpiochip_set_chained_irqchip(&priv->chip, &mrfld_irqchip, pdev->irq,
-				     mrfld_irq_handler);
-
+	pci_set_drvdata(pdev, priv);
 	return 0;
 }
 
diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
index 6f904c874678..f729e3e9e983 100644
--- a/drivers/gpio/gpio-mmio.c
+++ b/drivers/gpio/gpio-mmio.c
@@ -370,15 +370,23 @@ static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
 static int bgpio_get_dir(struct gpio_chip *gc, unsigned int gpio)
 {
 	/* Return 0 if output, 1 if input */
-	if (gc->bgpio_dir_unreadable)
-		return !(gc->bgpio_dir & bgpio_line2mask(gc, gpio));
-	if (gc->reg_dir_out)
-		return !(gc->read_reg(gc->reg_dir_out) & bgpio_line2mask(gc, gpio));
+	if (gc->bgpio_dir_unreadable) {
+		if (gc->bgpio_dir & bgpio_line2mask(gc, gpio))
+			return GPIO_LINE_DIRECTION_OUT;
+		return GPIO_LINE_DIRECTION_IN;
+	}
+
+	if (gc->reg_dir_out) {
+		if (gc->read_reg(gc->reg_dir_out) & bgpio_line2mask(gc, gpio))
+			return GPIO_LINE_DIRECTION_OUT;
+		return GPIO_LINE_DIRECTION_IN;
+	}
+
 	if (gc->reg_dir_in)
-		return !!(gc->read_reg(gc->reg_dir_in) & bgpio_line2mask(gc, gpio));
+		if (!(gc->read_reg(gc->reg_dir_in) & bgpio_line2mask(gc, gpio)))
+			return GPIO_LINE_DIRECTION_OUT;
 
-	/* This should not happen */
-	return 1;
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 213aedc97dc2..7d343bea784a 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * GPIO Testing Device Driver
  *
@@ -7,18 +7,18 @@
  * Copyright (C) 2017 Bartosz Golaszewski <brgl@bgdev.pl>
  */
 
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/gpio/driver.h>
+#include <linux/debugfs.h>
 #include <linux/gpio/consumer.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
+#include <linux/gpio/driver.h>
+#include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irq_sim.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
 #include <linux/property.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
 
 #include "gpiolib.h"
 
@@ -34,14 +34,9 @@
 
 #define gpio_mockup_err(...)	pr_err(GPIO_MOCKUP_NAME ": " __VA_ARGS__)
 
-enum {
-	GPIO_MOCKUP_DIR_IN = 0,
-	GPIO_MOCKUP_DIR_OUT = 1,
-};
-
 /*
  * struct gpio_pin_status - structure describing a GPIO status
- * @dir:       Configures direction of gpio as "in" or "out", 0=in, 1=out
+ * @dir:       Configures direction of gpio as "in" or "out"
  * @value:     Configures status of the gpio as 0(low) or 1(high)
  */
 struct gpio_mockup_line_status {
@@ -146,13 +141,68 @@ static void gpio_mockup_set_multiple(struct gpio_chip *gc,
 	mutex_unlock(&chip->lock);
 }
 
+static int gpio_mockup_apply_pull(struct gpio_mockup_chip *chip,
+				  unsigned int offset, int value)
+{
+	struct gpio_desc *desc;
+	struct gpio_chip *gc;
+	struct irq_sim *sim;
+	int curr, irq, irq_type;
+
+	gc = &chip->gc;
+	desc = &gc->gpiodev->descs[offset];
+	sim = &chip->irqsim;
+
+	mutex_lock(&chip->lock);
+
+	if (test_bit(FLAG_REQUESTED, &desc->flags) &&
+	    !test_bit(FLAG_IS_OUT, &desc->flags)) {
+		curr = __gpio_mockup_get(chip, offset);
+		if (curr == value)
+			goto out;
+
+		irq = irq_sim_irqnum(sim, offset);
+		irq_type = irq_get_trigger_type(irq);
+
+		if ((value == 1 && (irq_type & IRQ_TYPE_EDGE_RISING)) ||
+		    (value == 0 && (irq_type & IRQ_TYPE_EDGE_FALLING)))
+			irq_sim_fire(sim, offset);
+	}
+
+	/* Change the value unless we're actively driving the line. */
+	if (!test_bit(FLAG_REQUESTED, &desc->flags) ||
+		!test_bit(FLAG_IS_OUT, &desc->flags))
+		__gpio_mockup_set(chip, offset, value);
+
+out:
+	chip->lines[offset].pull = value;
+	mutex_unlock(&chip->lock);
+	return 0;
+}
+
+static int gpio_mockup_set_config(struct gpio_chip *gc,
+				  unsigned int offset, unsigned long config)
+{
+	struct gpio_mockup_chip *chip = gpiochip_get_data(gc);
+
+	switch (pinconf_to_config_param(config)) {
+	case PIN_CONFIG_BIAS_PULL_UP:
+		return gpio_mockup_apply_pull(chip, offset, 1);
+	case PIN_CONFIG_BIAS_PULL_DOWN:
+		return gpio_mockup_apply_pull(chip, offset, 0);
+	default:
+		break;
+	}
+	return -ENOTSUPP;
+}
+
 static int gpio_mockup_dirout(struct gpio_chip *gc,
 			      unsigned int offset, int value)
 {
 	struct gpio_mockup_chip *chip = gpiochip_get_data(gc);
 
 	mutex_lock(&chip->lock);
-	chip->lines[offset].dir = GPIO_MOCKUP_DIR_OUT;
+	chip->lines[offset].dir = GPIO_LINE_DIRECTION_OUT;
 	__gpio_mockup_set(chip, offset, value);
 	mutex_unlock(&chip->lock);
 
@@ -164,7 +214,7 @@ static int gpio_mockup_dirin(struct gpio_chip *gc, unsigned int offset)
 	struct gpio_mockup_chip *chip = gpiochip_get_data(gc);
 
 	mutex_lock(&chip->lock);
-	chip->lines[offset].dir = GPIO_MOCKUP_DIR_IN;
+	chip->lines[offset].dir = GPIO_LINE_DIRECTION_IN;
 	mutex_unlock(&chip->lock);
 
 	return 0;
@@ -176,7 +226,7 @@ static int gpio_mockup_get_direction(struct gpio_chip *gc, unsigned int offset)
 	int direction;
 
 	mutex_lock(&chip->lock);
-	direction = !chip->lines[offset].dir;
+	direction = chip->lines[offset].dir;
 	mutex_unlock(&chip->lock);
 
 	return direction;
@@ -226,12 +276,8 @@ static ssize_t gpio_mockup_debugfs_write(struct file *file,
 					 size_t size, loff_t *ppos)
 {
 	struct gpio_mockup_dbgfs_private *priv;
-	int rv, val, curr, irq, irq_type;
-	struct gpio_mockup_chip *chip;
+	int rv, val;
 	struct seq_file *sfile;
-	struct gpio_desc *desc;
-	struct gpio_chip *gc;
-	struct irq_sim *sim;
 
 	if (*ppos != 0)
 		return -EINVAL;
@@ -244,35 +290,9 @@ static ssize_t gpio_mockup_debugfs_write(struct file *file,
 
 	sfile = file->private_data;
 	priv = sfile->private;
-	chip = priv->chip;
-	gc = &chip->gc;
-	desc = &gc->gpiodev->descs[priv->offset];
-	sim = &chip->irqsim;
-
-	mutex_lock(&chip->lock);
-
-	if (test_bit(FLAG_REQUESTED, &desc->flags) &&
-	    !test_bit(FLAG_IS_OUT, &desc->flags)) {
-		curr = __gpio_mockup_get(chip, priv->offset);
-		if (curr == val)
-			goto out;
-
-		irq = irq_sim_irqnum(sim, priv->offset);
-		irq_type = irq_get_trigger_type(irq);
-
-		if ((val == 1 && (irq_type & IRQ_TYPE_EDGE_RISING)) ||
-		    (val == 0 && (irq_type & IRQ_TYPE_EDGE_FALLING)))
-			irq_sim_fire(sim, priv->offset);
-	}
-
-	/* Change the value unless we're actively driving the line. */
-	if (!test_bit(FLAG_REQUESTED, &desc->flags) ||
-	    !test_bit(FLAG_IS_OUT, &desc->flags))
-		__gpio_mockup_set(chip, priv->offset, val);
-
-out:
-	chip->lines[priv->offset].pull = val;
-	mutex_unlock(&chip->lock);
+	rv = gpio_mockup_apply_pull(priv->chip, priv->offset, val);
+	if (rv)
+		return rv;
 
 	return size;
 }
@@ -375,7 +395,7 @@ static int gpio_mockup_probe(struct platform_device *pdev)
 	struct gpio_chip *gc;
 	struct device *dev;
 	const char *name;
-	int rv, base;
+	int rv, base, i;
 	u16 ngpio;
 
 	dev = &pdev->dev;
@@ -418,6 +438,7 @@ static int gpio_mockup_probe(struct platform_device *pdev)
 	gc->direction_output = gpio_mockup_dirout;
 	gc->direction_input = gpio_mockup_dirin;
 	gc->get_direction = gpio_mockup_get_direction;
+	gc->set_config = gpio_mockup_set_config;
 	gc->to_irq = gpio_mockup_to_irq;
 	gc->free = gpio_mockup_free;
 
@@ -426,6 +447,9 @@ static int gpio_mockup_probe(struct platform_device *pdev)
 	if (!chip->lines)
 		return -ENOMEM;
 
+	for (i = 0; i < gc->ngpio; i++)
+		chip->lines[i].dir = GPIO_LINE_DIRECTION_IN;
+
 	if (device_property_read_bool(dev, "named-gpio-lines")) {
 		rv = gpio_mockup_name_lines(dev, chip);
 		if (rv)
diff --git a/drivers/gpio/gpio-moxtet.c b/drivers/gpio/gpio-moxtet.c
index 3fd729994a38..8299909318f4 100644
--- a/drivers/gpio/gpio-moxtet.c
+++ b/drivers/gpio/gpio-moxtet.c
@@ -78,9 +78,9 @@ static int moxtet_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
 
 	/* All lines are hard wired to be either input or output, not both. */
 	if (chip->desc->in_mask & BIT(offset))
-		return 1;
+		return GPIO_LINE_DIRECTION_IN;
 	else if (chip->desc->out_mask & BIT(offset))
-		return 0;
+		return GPIO_LINE_DIRECTION_OUT;
 	else
 		return -EINVAL;
 }
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index 16a47de29c94..604dfec353a1 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -22,6 +22,7 @@
 #include <linux/irq.h>
 #include <linux/gpio/driver.h>
 #include <linux/bitops.h>
+#include <linux/interrupt.h>
 
 #define MPC8XXX_GPIO_PINS	32
 
@@ -127,20 +128,19 @@ static int mpc8xxx_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
 		return -ENXIO;
 }
 
-static void mpc8xxx_gpio_irq_cascade(struct irq_desc *desc)
+static irqreturn_t mpc8xxx_gpio_irq_cascade(int irq, void *data)
 {
-	struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_desc_get_handler_data(desc);
-	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct mpc8xxx_gpio_chip *mpc8xxx_gc = data;
 	struct gpio_chip *gc = &mpc8xxx_gc->gc;
-	unsigned int mask;
+	unsigned long mask;
+	int i;
 
 	mask = gc->read_reg(mpc8xxx_gc->regs + GPIO_IER)
 		& gc->read_reg(mpc8xxx_gc->regs + GPIO_IMR);
-	if (mask)
-		generic_handle_irq(irq_linear_revmap(mpc8xxx_gc->irq,
-						     32 - ffs(mask)));
-	if (chip->irq_eoi)
-		chip->irq_eoi(&desc->irq_data);
+	for_each_set_bit(i, &mask, 32)
+		generic_handle_irq(irq_linear_revmap(mpc8xxx_gc->irq, 31 - i));
+
+	return IRQ_HANDLED;
 }
 
 static void mpc8xxx_irq_unmask(struct irq_data *d)
@@ -296,6 +296,7 @@ static const struct mpc8xxx_gpio_devtype mpc512x_gpio_devtype = {
 
 static const struct mpc8xxx_gpio_devtype ls1028a_gpio_devtype = {
 	.gpio_dir_in_init = ls1028a_gpio_dir_in_init,
+	.irq_set_type = mpc8xxx_irq_set_type,
 };
 
 static const struct mpc8xxx_gpio_devtype mpc5125_gpio_devtype = {
@@ -346,6 +347,7 @@ static int mpc8xxx_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	gc = &mpc8xxx_gc->gc;
+	gc->parent = &pdev->dev;
 
 	if (of_property_read_bool(np, "little-endian")) {
 		ret = bgpio_init(gc, &pdev->dev, 4,
@@ -377,7 +379,8 @@ static int mpc8xxx_probe(struct platform_device *pdev)
 	 * It's assumed that only a single type of gpio controller is available
 	 * on the current machine, so overwriting global data is fine.
 	 */
-	mpc8xxx_irq_chip.irq_set_type = devtype->irq_set_type;
+	if (devtype->irq_set_type)
+		mpc8xxx_irq_chip.irq_set_type = devtype->irq_set_type;
 
 	if (devtype->gpio_dir_out)
 		gc->direction_output = devtype->gpio_dir_out;
@@ -386,6 +389,9 @@ static int mpc8xxx_probe(struct platform_device *pdev)
 
 	gc->to_irq = mpc8xxx_gpio_to_irq;
 
+	if (of_device_is_compatible(np, "fsl,qoriq-gpio"))
+		gc->write_reg(mpc8xxx_gc->regs + GPIO_IBE, 0xffffffff);
+
 	ret = gpiochip_add_data(gc, mpc8xxx_gc);
 	if (ret) {
 		pr_err("%pOF: GPIO chip registration failed with status %d\n",
@@ -409,8 +415,16 @@ static int mpc8xxx_probe(struct platform_device *pdev)
 	if (devtype->gpio_dir_in_init)
 		devtype->gpio_dir_in_init(gc);
 
-	irq_set_chained_handler_and_data(mpc8xxx_gc->irqn,
-					 mpc8xxx_gpio_irq_cascade, mpc8xxx_gc);
+	ret = devm_request_irq(&pdev->dev, mpc8xxx_gc->irqn,
+			       mpc8xxx_gpio_irq_cascade,
+			       IRQF_NO_THREAD | IRQF_SHARED, "gpio-cascade",
+			       mpc8xxx_gc);
+	if (ret) {
+		dev_err(&pdev->dev, "%s: failed to devm_request_irq(%d), ret = %d\n",
+			np->full_name, mpc8xxx_gc->irqn, ret);
+		goto err;
+	}
+
 	return 0;
 err:
 	iounmap(mpc8xxx_gc->regs);
diff --git a/drivers/gpio/gpio-mt7621.c b/drivers/gpio/gpio-mt7621.c
index d1d785f983a7..b992321bb852 100644
--- a/drivers/gpio/gpio-mt7621.c
+++ b/drivers/gpio/gpio-mt7621.c
@@ -253,8 +253,7 @@ mediatek_gpio_bank_probe(struct device *dev,
 
 		/*
 		 * Directly request the irq here instead of passing
-		 * a flow-handler to gpiochip_set_chained_irqchip,
-		 * because the irq is shared.
+		 * a flow-handler because the irq is shared.
 		 */
 		ret = devm_request_irq(dev, mtk->gpio_irq,
 				       mediatek_gpio_irq_handler, IRQF_SHARED,
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 6c0687694341..d2b999c7987f 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -46,7 +46,6 @@
 #include <linux/irqdomain.h>
 #include <linux/mfd/syscon.h>
 #include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
@@ -384,7 +383,10 @@ static int mvebu_gpio_get_direction(struct gpio_chip *chip, unsigned int pin)
 
 	regmap_read(mvchip->regs, GPIO_IO_CONF_OFF + mvchip->offset, &u);
 
-	return !!(u & BIT(pin));
+	if (u & BIT(pin))
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int mvebu_gpio_to_irq(struct gpio_chip *chip, unsigned int pin)
@@ -429,6 +431,7 @@ static void mvebu_gpio_edge_irq_unmask(struct irq_data *d)
 	u32 mask = d->mask;
 
 	irq_gc_lock(gc);
+	mvebu_gpio_write_edge_cause(mvchip, ~mask);
 	ct->mask_cache_priv |= mask;
 	mvebu_gpio_write_edge_mask(mvchip, ct->mask_cache_priv);
 	irq_gc_unlock(gc);
@@ -773,23 +776,12 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
 {
 	struct device *dev = &pdev->dev;
 	struct mvebu_pwm *mvpwm;
-	struct resource *res;
 	u32 set;
 
 	if (!of_device_is_compatible(mvchip->chip.of_node,
 				     "marvell,armada-370-gpio"))
 		return 0;
 
-	/*
-	 * There are only two sets of PWM configuration registers for
-	 * all the GPIO lines on those SoCs which this driver reserves
-	 * for the first two GPIO chips. So if the resource is missing
-	 * we can't treat it as an error.
-	 */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pwm");
-	if (!res)
-		return 0;
-
 	if (IS_ERR(mvchip->clk))
 		return PTR_ERR(mvchip->clk);
 
@@ -812,7 +804,13 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
 	mvchip->mvpwm = mvpwm;
 	mvpwm->mvchip = mvchip;
 
-	mvpwm->membase = devm_ioremap_resource(dev, res);
+	/*
+	 * There are only two sets of PWM configuration registers for
+	 * all the GPIO lines on those SoCs which this driver reserves
+	 * for the first two GPIO chips. So if the resource is missing
+	 * we can't treat it as an error.
+	 */
+	mvpwm->membase = devm_platform_ioremap_resource_byname(pdev, "pwm");
 	if (IS_ERR(mvpwm->membase))
 		return PTR_ERR(mvpwm->membase);
 
@@ -1104,7 +1102,11 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
 		soc_variant = MVEBU_GPIO_SOC_VARIANT_ORION;
 
 	/* Some gpio controllers do not provide irq support */
-	have_irqs = of_irq_count(np) != 0;
+	err = platform_irq_count(pdev);
+	if (err < 0)
+		return err;
+
+	have_irqs = err != 0;
 
 	mvchip = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_gpio_chip),
 			      GFP_KERNEL);
diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index 7907a8755866..c77d474185f3 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -411,6 +411,7 @@ static int mxc_gpio_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct mxc_gpio_port *port;
+	int irq_count;
 	int irq_base;
 	int err;
 
@@ -426,9 +427,15 @@ static int mxc_gpio_probe(struct platform_device *pdev)
 	if (IS_ERR(port->base))
 		return PTR_ERR(port->base);
 
-	port->irq_high = platform_get_irq(pdev, 1);
-	if (port->irq_high < 0)
-		port->irq_high = 0;
+	irq_count = platform_irq_count(pdev);
+	if (irq_count < 0)
+		return irq_count;
+
+	if (irq_count > 1) {
+		port->irq_high = platform_get_irq(pdev, 1);
+		if (port->irq_high < 0)
+			port->irq_high = 0;
+	}
 
 	port->irq = platform_get_irq(pdev, 0);
 	if (port->irq < 0)
diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c
index 5e5437a2c607..c4a314c68555 100644
--- a/drivers/gpio/gpio-mxs.c
+++ b/drivers/gpio/gpio-mxs.c
@@ -248,7 +248,10 @@ static int mxs_gpio_get_direction(struct gpio_chip *gc, unsigned offset)
 	u32 dir;
 
 	dir = readl(port->base + PINCTRL_DOE(port));
-	return !(dir & mask);
+	if (dir & mask)
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static const struct platform_device_id mxs_gpio_ids[] = {
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index d0f27084a942..3bd8adaeed9e 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -805,8 +805,10 @@ static int omap_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 {
 	struct gpio_bank *bank = gpiochip_get_data(chip);
 
-	return !!(readl_relaxed(bank->base + bank->regs->direction) &
-		  BIT(offset));
+	if (readl_relaxed(bank->base + bank->regs->direction) & BIT(offset))
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int omap_gpio_input(struct gpio_chip *chip, unsigned offset)
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index de5d1383f28d..5638b4e5355f 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/acpi.h>
-#include <linux/bits.h>
+#include <linux/bitmap.h>
 #include <linux/gpio/driver.h>
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
@@ -115,6 +115,7 @@ MODULE_DEVICE_TABLE(acpi, pca953x_acpi_ids);
 
 #define MAX_BANK 5
 #define BANK_SZ 8
+#define MAX_LINE	(MAX_BANK * BANK_SZ)
 
 #define NBANK(chip) DIV_ROUND_UP(chip->gpio_chip.ngpio, BANK_SZ)
 
@@ -146,10 +147,10 @@ struct pca953x_chip {
 
 #ifdef CONFIG_GPIO_PCA953X_IRQ
 	struct mutex irq_lock;
-	u8 irq_mask[MAX_BANK];
-	u8 irq_stat[MAX_BANK];
-	u8 irq_trig_raise[MAX_BANK];
-	u8 irq_trig_fall[MAX_BANK];
+	DECLARE_BITMAP(irq_mask, MAX_LINE);
+	DECLARE_BITMAP(irq_stat, MAX_LINE);
+	DECLARE_BITMAP(irq_trig_raise, MAX_LINE);
+	DECLARE_BITMAP(irq_trig_fall, MAX_LINE);
 	struct irq_chip irq_chip;
 #endif
 	atomic_t wakeup_path;
@@ -333,12 +334,16 @@ static u8 pca953x_recalc_addr(struct pca953x_chip *chip, int reg, int off,
 	return regaddr;
 }
 
-static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val)
+static int pca953x_write_regs(struct pca953x_chip *chip, int reg, unsigned long *val)
 {
 	u8 regaddr = pca953x_recalc_addr(chip, reg, 0, true, true);
-	int ret;
+	u8 value[MAX_BANK];
+	int i, ret;
+
+	for (i = 0; i < NBANK(chip); i++)
+		value[i] = bitmap_get_value8(val, i * BANK_SZ);
 
-	ret = regmap_bulk_write(chip->regmap, regaddr, val, NBANK(chip));
+	ret = regmap_bulk_write(chip->regmap, regaddr, value, NBANK(chip));
 	if (ret < 0) {
 		dev_err(&chip->client->dev, "failed writing register\n");
 		return ret;
@@ -347,17 +352,21 @@ static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val)
 	return 0;
 }
 
-static int pca953x_read_regs(struct pca953x_chip *chip, int reg, u8 *val)
+static int pca953x_read_regs(struct pca953x_chip *chip, int reg, unsigned long *val)
 {
 	u8 regaddr = pca953x_recalc_addr(chip, reg, 0, false, true);
-	int ret;
+	u8 value[MAX_BANK];
+	int i, ret;
 
-	ret = regmap_bulk_read(chip->regmap, regaddr, val, NBANK(chip));
+	ret = regmap_bulk_read(chip->regmap, regaddr, value, NBANK(chip));
 	if (ret < 0) {
 		dev_err(&chip->client->dev, "failed reading register\n");
 		return ret;
 	}
 
+	for (i = 0; i < NBANK(chip); i++)
+		bitmap_set_value8(val, value[i], i * BANK_SZ);
+
 	return 0;
 }
 
@@ -412,7 +421,9 @@ static int pca953x_gpio_get_value(struct gpio_chip *gc, unsigned off)
 	ret = regmap_read(chip->regmap, inreg, &reg_val);
 	mutex_unlock(&chip->i2c_lock);
 	if (ret < 0) {
-		/* NOTE:  diagnostic already emitted; that's all we should
+		/*
+		 * NOTE:
+		 * diagnostic already emitted; that's all we should
 		 * do unless gpio_*_value_cansleep() calls become different
 		 * from their nonsleeping siblings (and report faults).
 		 */
@@ -449,16 +460,17 @@ static int pca953x_gpio_get_direction(struct gpio_chip *gc, unsigned off)
 	if (ret < 0)
 		return ret;
 
-	return !!(reg_val & bit);
+	if (reg_val & bit)
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
 				      unsigned long *mask, unsigned long *bits)
 {
 	struct pca953x_chip *chip = gpiochip_get_data(gc);
-	unsigned int bank_mask, bank_val;
-	int bank;
-	u8 reg_val[MAX_BANK];
+	DECLARE_BITMAP(reg_val, MAX_LINE);
 	int ret;
 
 	mutex_lock(&chip->i2c_lock);
@@ -466,16 +478,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
 	if (ret)
 		goto exit;
 
-	for (bank = 0; bank < NBANK(chip); bank++) {
-		bank_mask = mask[bank / sizeof(*mask)] >>
-			   ((bank % sizeof(*mask)) * 8);
-		if (bank_mask) {
-			bank_val = bits[bank / sizeof(*bits)] >>
-				  ((bank % sizeof(*bits)) * 8);
-			bank_val &= bank_mask;
-			reg_val[bank] = (reg_val[bank] & ~bank_mask) | bank_val;
-		}
-	}
+	bitmap_replace(reg_val, reg_val, bits, mask, gc->ngpio);
 
 	pca953x_write_regs(chip, chip->regs->output, reg_val);
 exit:
@@ -565,16 +568,18 @@ static void pca953x_irq_mask(struct irq_data *d)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 	struct pca953x_chip *chip = gpiochip_get_data(gc);
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 
-	chip->irq_mask[d->hwirq / BANK_SZ] &= ~BIT(d->hwirq % BANK_SZ);
+	clear_bit(hwirq, chip->irq_mask);
 }
 
 static void pca953x_irq_unmask(struct irq_data *d)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 	struct pca953x_chip *chip = gpiochip_get_data(gc);
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 
-	chip->irq_mask[d->hwirq / BANK_SZ] |= BIT(d->hwirq % BANK_SZ);
+	set_bit(hwirq, chip->irq_mask);
 }
 
 static int pca953x_irq_set_wake(struct irq_data *d, unsigned int on)
@@ -602,10 +607,9 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 	struct pca953x_chip *chip = gpiochip_get_data(gc);
-	u8 new_irqs;
-	int level, i;
-	u8 invert_irq_mask[MAX_BANK];
-	u8 reg_direction[MAX_BANK];
+	DECLARE_BITMAP(irq_mask, MAX_LINE);
+	DECLARE_BITMAP(reg_direction, MAX_LINE);
+	int level;
 
 	pca953x_read_regs(chip, chip->regs->direction, reg_direction);
 
@@ -613,25 +617,18 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
 		/* Enable latch on interrupt-enabled inputs */
 		pca953x_write_regs(chip, PCAL953X_IN_LATCH, chip->irq_mask);
 
-		for (i = 0; i < NBANK(chip); i++)
-			invert_irq_mask[i] = ~chip->irq_mask[i];
+		bitmap_complement(irq_mask, chip->irq_mask, gc->ngpio);
 
 		/* Unmask enabled interrupts */
-		pca953x_write_regs(chip, PCAL953X_INT_MASK, invert_irq_mask);
+		pca953x_write_regs(chip, PCAL953X_INT_MASK, irq_mask);
 	}
 
+	bitmap_or(irq_mask, chip->irq_trig_fall, chip->irq_trig_raise, gc->ngpio);
+	bitmap_and(irq_mask, irq_mask, reg_direction, gc->ngpio);
+
 	/* Look for any newly setup interrupt */
-	for (i = 0; i < NBANK(chip); i++) {
-		new_irqs = chip->irq_trig_fall[i] | chip->irq_trig_raise[i];
-		new_irqs &= reg_direction[i];
-
-		while (new_irqs) {
-			level = __ffs(new_irqs);
-			pca953x_gpio_direction_input(&chip->gpio_chip,
-							level + (BANK_SZ * i));
-			new_irqs &= ~(1 << level);
-		}
-	}
+	for_each_set_bit(level, irq_mask, gc->ngpio)
+		pca953x_gpio_direction_input(&chip->gpio_chip, level);
 
 	mutex_unlock(&chip->irq_lock);
 }
@@ -640,8 +637,7 @@ static int pca953x_irq_set_type(struct irq_data *d, unsigned int type)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 	struct pca953x_chip *chip = gpiochip_get_data(gc);
-	int bank_nb = d->hwirq / BANK_SZ;
-	u8 mask = BIT(d->hwirq % BANK_SZ);
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 
 	if (!(type & IRQ_TYPE_EDGE_BOTH)) {
 		dev_err(&chip->client->dev, "irq %d: unsupported type %d\n",
@@ -649,15 +645,8 @@ static int pca953x_irq_set_type(struct irq_data *d, unsigned int type)
 		return -EINVAL;
 	}
 
-	if (type & IRQ_TYPE_EDGE_FALLING)
-		chip->irq_trig_fall[bank_nb] |= mask;
-	else
-		chip->irq_trig_fall[bank_nb] &= ~mask;
-
-	if (type & IRQ_TYPE_EDGE_RISING)
-		chip->irq_trig_raise[bank_nb] |= mask;
-	else
-		chip->irq_trig_raise[bank_nb] &= ~mask;
+	assign_bit(hwirq, chip->irq_trig_fall, type & IRQ_TYPE_EDGE_FALLING);
+	assign_bit(hwirq, chip->irq_trig_raise, type & IRQ_TYPE_EDGE_RISING);
 
 	return 0;
 }
@@ -666,21 +655,21 @@ static void pca953x_irq_shutdown(struct irq_data *d)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 	struct pca953x_chip *chip = gpiochip_get_data(gc);
-	u8 mask = BIT(d->hwirq % BANK_SZ);
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 
-	chip->irq_trig_raise[d->hwirq / BANK_SZ] &= ~mask;
-	chip->irq_trig_fall[d->hwirq / BANK_SZ] &= ~mask;
+	clear_bit(hwirq, chip->irq_trig_raise);
+	clear_bit(hwirq, chip->irq_trig_fall);
 }
 
-static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
+static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pending)
 {
-	u8 cur_stat[MAX_BANK];
-	u8 old_stat[MAX_BANK];
-	bool pending_seen = false;
-	bool trigger_seen = false;
-	u8 trigger[MAX_BANK];
-	u8 reg_direction[MAX_BANK];
-	int ret, i;
+	struct gpio_chip *gc = &chip->gpio_chip;
+	DECLARE_BITMAP(reg_direction, MAX_LINE);
+	DECLARE_BITMAP(old_stat, MAX_LINE);
+	DECLARE_BITMAP(cur_stat, MAX_LINE);
+	DECLARE_BITMAP(new_stat, MAX_LINE);
+	DECLARE_BITMAP(trigger, MAX_LINE);
+	int ret;
 
 	if (chip->driver_data & PCA_PCAL) {
 		/* Read the current interrupt status from the device */
@@ -689,20 +678,16 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
 			return false;
 
 		/* Check latched inputs and clear interrupt status */
-		ret = pca953x_read_regs(chip, PCA953X_INPUT, cur_stat);
+		ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
 		if (ret)
 			return false;
 
-		for (i = 0; i < NBANK(chip); i++) {
-			/* Apply filter for rising/falling edge selection */
-			pending[i] = (~cur_stat[i] & chip->irq_trig_fall[i]) |
-				(cur_stat[i] & chip->irq_trig_raise[i]);
-			pending[i] &= trigger[i];
-			if (pending[i])
-				pending_seen = true;
-		}
+		/* Apply filter for rising/falling edge selection */
+		bitmap_replace(new_stat, chip->irq_trig_fall, chip->irq_trig_raise, cur_stat, gc->ngpio);
 
-		return pending_seen;
+		bitmap_and(pending, new_stat, trigger, gc->ngpio);
+
+		return !bitmap_empty(pending, gc->ngpio);
 	}
 
 	ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
@@ -711,64 +696,49 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
 
 	/* Remove output pins from the equation */
 	pca953x_read_regs(chip, chip->regs->direction, reg_direction);
-	for (i = 0; i < NBANK(chip); i++)
-		cur_stat[i] &= reg_direction[i];
 
-	memcpy(old_stat, chip->irq_stat, NBANK(chip));
+	bitmap_copy(old_stat, chip->irq_stat, gc->ngpio);
 
-	for (i = 0; i < NBANK(chip); i++) {
-		trigger[i] = (cur_stat[i] ^ old_stat[i]) & chip->irq_mask[i];
-		if (trigger[i])
-			trigger_seen = true;
-	}
+	bitmap_and(new_stat, cur_stat, reg_direction, gc->ngpio);
+	bitmap_xor(cur_stat, new_stat, old_stat, gc->ngpio);
+	bitmap_and(trigger, cur_stat, chip->irq_mask, gc->ngpio);
 
-	if (!trigger_seen)
+	if (bitmap_empty(trigger, gc->ngpio))
 		return false;
 
-	memcpy(chip->irq_stat, cur_stat, NBANK(chip));
+	bitmap_copy(chip->irq_stat, new_stat, gc->ngpio);
 
-	for (i = 0; i < NBANK(chip); i++) {
-		pending[i] = (old_stat[i] & chip->irq_trig_fall[i]) |
-			(cur_stat[i] & chip->irq_trig_raise[i]);
-		pending[i] &= trigger[i];
-		if (pending[i])
-			pending_seen = true;
-	}
+	bitmap_and(cur_stat, chip->irq_trig_fall, old_stat, gc->ngpio);
+	bitmap_and(old_stat, chip->irq_trig_raise, new_stat, gc->ngpio);
+	bitmap_or(new_stat, old_stat, cur_stat, gc->ngpio);
+	bitmap_and(pending, new_stat, trigger, gc->ngpio);
 
-	return pending_seen;
+	return !bitmap_empty(pending, gc->ngpio);
 }
 
 static irqreturn_t pca953x_irq_handler(int irq, void *devid)
 {
 	struct pca953x_chip *chip = devid;
-	u8 pending[MAX_BANK];
-	u8 level;
-	unsigned nhandled = 0;
-	int i;
+	struct gpio_chip *gc = &chip->gpio_chip;
+	DECLARE_BITMAP(pending, MAX_LINE);
+	int level;
 
 	if (!pca953x_irq_pending(chip, pending))
 		return IRQ_NONE;
 
-	for (i = 0; i < NBANK(chip); i++) {
-		while (pending[i]) {
-			level = __ffs(pending[i]);
-			handle_nested_irq(irq_find_mapping(chip->gpio_chip.irq.domain,
-							level + (BANK_SZ * i)));
-			pending[i] &= ~(1 << level);
-			nhandled++;
-		}
-	}
+	for_each_set_bit(level, pending, gc->ngpio)
+		handle_nested_irq(irq_find_mapping(gc->irq.domain, level));
 
-	return (nhandled > 0) ? IRQ_HANDLED : IRQ_NONE;
+	return IRQ_HANDLED;
 }
 
-static int pca953x_irq_setup(struct pca953x_chip *chip,
-			     int irq_base)
+static int pca953x_irq_setup(struct pca953x_chip *chip, int irq_base)
 {
 	struct i2c_client *client = chip->client;
 	struct irq_chip *irq_chip = &chip->irq_chip;
-	u8 reg_direction[MAX_BANK];
-	int ret, i;
+	DECLARE_BITMAP(reg_direction, MAX_LINE);
+	DECLARE_BITMAP(irq_stat, MAX_LINE);
+	int ret;
 
 	if (!client->irq)
 		return 0;
@@ -779,7 +749,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 	if (!(chip->driver_data & PCA_INT))
 		return 0;
 
-	ret = pca953x_read_regs(chip, chip->regs->input, chip->irq_stat);
+	ret = pca953x_read_regs(chip, chip->regs->input, irq_stat);
 	if (ret)
 		return ret;
 
@@ -789,14 +759,12 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 	 * this purpose.
 	 */
 	pca953x_read_regs(chip, chip->regs->direction, reg_direction);
-	for (i = 0; i < NBANK(chip); i++)
-		chip->irq_stat[i] &= reg_direction[i];
+	bitmap_and(chip->irq_stat, irq_stat, reg_direction, chip->gpio_chip.ngpio);
 	mutex_init(&chip->irq_lock);
 
 	ret = devm_request_threaded_irq(&client->dev, client->irq,
 					NULL, pca953x_irq_handler,
-					IRQF_TRIGGER_LOW | IRQF_ONESHOT |
-					IRQF_SHARED,
+					IRQF_ONESHOT | IRQF_SHARED,
 					dev_name(&client->dev), chip);
 	if (ret) {
 		dev_err(&client->dev, "failed to request irq %d\n",
@@ -813,9 +781,9 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 	irq_chip->irq_set_type = pca953x_irq_set_type;
 	irq_chip->irq_shutdown = pca953x_irq_shutdown;
 
-	ret =  gpiochip_irqchip_add_nested(&chip->gpio_chip, irq_chip,
-					   irq_base, handle_simple_irq,
-					   IRQ_TYPE_NONE);
+	ret = gpiochip_irqchip_add_nested(&chip->gpio_chip, irq_chip,
+					  irq_base, handle_simple_irq,
+					  IRQ_TYPE_NONE);
 	if (ret) {
 		dev_err(&client->dev,
 			"could not connect irqchip to gpiochip\n");
@@ -842,8 +810,8 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 
 static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
 {
+	DECLARE_BITMAP(val, MAX_LINE);
 	int ret;
-	u8 val[MAX_BANK];
 
 	ret = regcache_sync_region(chip->regmap, chip->regs->output,
 				   chip->regs->output + NBANK(chip));
@@ -857,9 +825,9 @@ static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
 
 	/* set platform specific polarity inversion */
 	if (invert)
-		memset(val, 0xFF, NBANK(chip));
+		bitmap_fill(val, MAX_LINE);
 	else
-		memset(val, 0, NBANK(chip));
+		bitmap_zero(val, MAX_LINE);
 
 	ret = pca953x_write_regs(chip, chip->regs->invert, val);
 out:
@@ -868,8 +836,8 @@ out:
 
 static int device_pca957x_init(struct pca953x_chip *chip, u32 invert)
 {
+	DECLARE_BITMAP(val, MAX_LINE);
 	int ret;
-	u8 val[MAX_BANK];
 
 	ret = device_pca95xx_init(chip, invert);
 	if (ret)
@@ -886,10 +854,8 @@ out:
 	return ret;
 }
 
-static const struct of_device_id pca953x_dt_ids[];
-
 static int pca953x_probe(struct i2c_client *client,
-				   const struct i2c_device_id *i2c_id)
+			 const struct i2c_device_id *i2c_id)
 {
 	struct pca953x_platform_data *pdata;
 	struct pca953x_chip *chip;
@@ -898,8 +864,7 @@ static int pca953x_probe(struct i2c_client *client,
 	u32 invert = 0;
 	struct regulator *reg;
 
-	chip = devm_kzalloc(&client->dev,
-			sizeof(struct pca953x_chip), GFP_KERNEL);
+	chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
 	if (chip == NULL)
 		return -ENOMEM;
 
@@ -1013,7 +978,7 @@ static int pca953x_probe(struct i2c_client *client,
 
 	if (pdata && pdata->setup) {
 		ret = pdata->setup(client, chip->gpio_chip.base,
-				chip->gpio_chip.ngpio, pdata->context);
+				   chip->gpio_chip.ngpio, pdata->context);
 		if (ret < 0)
 			dev_warn(&client->dev, "setup failed, %d\n", ret);
 	}
@@ -1033,7 +998,7 @@ static int pca953x_remove(struct i2c_client *client)
 
 	if (pdata && pdata->teardown) {
 		ret = pdata->teardown(client, chip->gpio_chip.base,
-				chip->gpio_chip.ngpio, pdata->context);
+				      chip->gpio_chip.ngpio, pdata->context);
 		if (ret < 0)
 			dev_err(&client->dev, "teardown failed, %d\n", ret);
 	} else {
diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c
index 5aa136a6a3e0..638d6656ce73 100644
--- a/drivers/gpio/gpio-pci-idio-16.c
+++ b/drivers/gpio/gpio-pci-idio-16.c
@@ -61,9 +61,9 @@ static int idio_16_gpio_get_direction(struct gpio_chip *chip,
 	unsigned int offset)
 {
 	if (offset > 15)
-		return 1;
+		return GPIO_LINE_DIRECTION_IN;
 
-	return 0;
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int idio_16_gpio_direction_input(struct gpio_chip *chip,
@@ -100,45 +100,23 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	size_t i;
-	const unsigned int gpio_reg_size = 8;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
-	unsigned long port_state;
+	unsigned long offset;
+	unsigned long gpio_mask;
 	void __iomem *ports[] = {
 		&idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
 		&idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15,
 	};
+	void __iomem *port_addr;
+	unsigned long port_state;
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports); i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
-
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
-
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		port_addr = ports[offset / 8];
+		port_state = ioread8(port_addr) & gpio_mask;
 
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
-
-		/* read bits from current gpio port */
-		port_state = ioread8(ports[i]);
-
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
@@ -178,30 +156,31 @@ static void idio_16_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
+	unsigned long offset;
+	unsigned long gpio_mask;
+	void __iomem *ports[] = {
+		&idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
+	};
+	size_t index;
+	void __iomem *port_addr;
+	unsigned long bitmask;
 	unsigned long flags;
-	unsigned int out_state;
+	unsigned long out_state;
 
-	raw_spin_lock_irqsave(&idio16gpio->lock, flags);
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		index = offset / 8;
+		port_addr = ports[index];
 
-	/* process output lines 0-7 */
-	if (*mask & 0xFF) {
-		out_state = ioread8(&idio16gpio->reg->out0_7) & ~*mask;
-		out_state |= *mask & *bits;
-		iowrite8(out_state, &idio16gpio->reg->out0_7);
-	}
+		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
 
-	/* shift to next output line word */
-	*mask >>= 8;
+		raw_spin_lock_irqsave(&idio16gpio->lock, flags);
 
-	/* process output lines 8-15 */
-	if (*mask & 0xFF) {
-		*bits >>= 8;
-		out_state = ioread8(&idio16gpio->reg->out8_15) & ~*mask;
-		out_state |= *mask & *bits;
-		iowrite8(out_state, &idio16gpio->reg->out8_15);
-	}
+		out_state = ioread8(port_addr) & ~gpio_mask;
+		out_state |= bitmask;
+		iowrite8(out_state, port_addr);
 
-	raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
+		raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
+	}
 }
 
 static void idio_16_irq_ack(struct irq_data *data)
diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c
index 52f1647a46fd..1d475794a50f 100644
--- a/drivers/gpio/gpio-pcie-idio-24.c
+++ b/drivers/gpio/gpio-pcie-idio-24.c
@@ -104,15 +104,18 @@ static int idio_24_gpio_get_direction(struct gpio_chip *chip,
 
 	/* FET Outputs */
 	if (offset < 24)
-		return 0;
+		return GPIO_LINE_DIRECTION_OUT;
 
 	/* Isolated Inputs */
 	if (offset < 48)
-		return 1;
+		return GPIO_LINE_DIRECTION_IN;
 
 	/* TTL/CMOS I/O */
 	/* OUT MODE = 1 when TTL/CMOS Output Mode is set */
-	return !(ioread8(&idio24gpio->reg->ctl) & out_mode_mask);
+	if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask)
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int idio_24_gpio_direction_input(struct gpio_chip *chip,
@@ -198,52 +201,34 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	size_t i;
-	const unsigned int gpio_reg_size = 8;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
-	unsigned long port_state;
+	unsigned long offset;
+	unsigned long gpio_mask;
 	void __iomem *ports[] = {
 		&idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
 		&idio24gpio->reg->out16_23, &idio24gpio->reg->in0_7,
 		&idio24gpio->reg->in8_15, &idio24gpio->reg->in16_23,
 	};
+	size_t index;
+	unsigned long port_state;
 	const unsigned long out_mode_mask = BIT(1);
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports) + 1; i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
-
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
-
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
-
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		index = offset / 8;
 
 		/* read bits from current gpio port (port 6 is TTL GPIO) */
-		if (i < 6)
-			port_state = ioread8(ports[i]);
+		if (index < 6)
+			port_state = ioread8(ports[index]);
 		else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask)
 			port_state = ioread8(&idio24gpio->reg->ttl_out0_7);
 		else
 			port_state = ioread8(&idio24gpio->reg->ttl_in0_7);
 
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		port_state &= gpio_mask;
+
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
@@ -294,59 +279,48 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	size_t i;
-	unsigned long bits_offset;
+	unsigned long offset;
 	unsigned long gpio_mask;
-	const unsigned int gpio_reg_size = 8;
-	const unsigned long port_mask = GENMASK(gpio_reg_size, 0);
-	unsigned long flags;
-	unsigned int out_state;
 	void __iomem *ports[] = {
 		&idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
 		&idio24gpio->reg->out16_23
 	};
+	size_t index;
+	unsigned long bitmask;
+	unsigned long flags;
+	unsigned long out_state;
 	const unsigned long out_mode_mask = BIT(1);
-	const unsigned int ttl_offset = 48;
-	const size_t ttl_i = BIT_WORD(ttl_offset);
-	const unsigned int word_offset = ttl_offset % BITS_PER_LONG;
-	const unsigned long ttl_mask = (mask[ttl_i] >> word_offset) & port_mask;
-	const unsigned long ttl_bits = (bits[ttl_i] >> word_offset) & ttl_mask;
-
-	/* set bits are processed a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports); i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
-
-		/* check if any set bits for current port */
-		gpio_mask = (*mask >> bits_offset) & port_mask;
-		if (!gpio_mask) {
-			/* no set bits for this port so move on to next port */
-			continue;
-		}
 
-		raw_spin_lock_irqsave(&idio24gpio->lock, flags);
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		index = offset / 8;
 
-		/* process output lines */
-		out_state = ioread8(ports[i]) & ~gpio_mask;
-		out_state |= (*bits >> bits_offset) & gpio_mask;
-		iowrite8(out_state, ports[i]);
+		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
 
-		raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
-	}
+		raw_spin_lock_irqsave(&idio24gpio->lock, flags);
 
-	/* check if setting TTL lines and if they are in output mode */
-	if (!ttl_mask || !(ioread8(&idio24gpio->reg->ctl) & out_mode_mask))
-		return;
+		/* read bits from current gpio port (port 6 is TTL GPIO) */
+		if (index < 6) {
+			out_state = ioread8(ports[index]);
+		} else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask) {
+			out_state = ioread8(&idio24gpio->reg->ttl_out0_7);
+		} else {
+			/* skip TTL GPIO if set for input */
+			raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+			continue;
+		}
 
-	/* handle TTL output */
-	raw_spin_lock_irqsave(&idio24gpio->lock, flags);
+		/* set requested bit states */
+		out_state &= ~gpio_mask;
+		out_state |= bitmask;
 
-	/* process output lines */
-	out_state = ioread8(&idio24gpio->reg->ttl_out0_7) & ~ttl_mask;
-	out_state |= ttl_bits;
-	iowrite8(out_state, &idio24gpio->reg->ttl_out0_7);
+		/* write bits for current gpio port (port 6 is TTL GPIO) */
+		if (index < 6)
+			iowrite8(out_state, ports[index]);
+		else
+			iowrite8(out_state, &idio24gpio->reg->ttl_out0_7);
 
-	raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+		raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+	}
 }
 
 static void idio_24_irq_ack(struct irq_data *data)
diff --git a/drivers/gpio/gpio-pisosr.c b/drivers/gpio/gpio-pisosr.c
index f809a5a8e9eb..6698feabaced 100644
--- a/drivers/gpio/gpio-pisosr.c
+++ b/drivers/gpio/gpio-pisosr.c
@@ -65,7 +65,7 @@ static int pisosr_gpio_get_direction(struct gpio_chip *chip,
 				     unsigned offset)
 {
 	/* This device always input */
-	return 1;
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int pisosr_gpio_direction_input(struct gpio_chip *chip,
@@ -96,16 +96,16 @@ static int pisosr_gpio_get_multiple(struct gpio_chip *chip,
 				    unsigned long *mask, unsigned long *bits)
 {
 	struct pisosr_gpio *gpio = gpiochip_get_data(chip);
-	unsigned int nbytes = DIV_ROUND_UP(chip->ngpio, 8);
-	unsigned int i, j;
+	unsigned long offset;
+	unsigned long gpio_mask;
+	unsigned long buffer_state;
 
 	pisosr_gpio_refresh(gpio);
 
 	bitmap_zero(bits, chip->ngpio);
-	for (i = 0; i < nbytes; i++) {
-		j = i / sizeof(unsigned long);
-		bits[j] |= ((unsigned long) gpio->buffer[i])
-			   << (8 * (i % sizeof(unsigned long)));
+	for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+		buffer_state = gpio->buffer[offset / 8] & gpio_mask;
+		bitmap_set_value8(bits, buffer_state, offset);
 	}
 
 	return 0;
diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c
index 722ce5cf861e..5df7782e348f 100644
--- a/drivers/gpio/gpio-pl061.c
+++ b/drivers/gpio/gpio-pl061.c
@@ -63,7 +63,10 @@ static int pl061_get_direction(struct gpio_chip *gc, unsigned offset)
 {
 	struct pl061 *pl061 = gpiochip_get_data(gc);
 
-	return !(readb(pl061->base + GPIODIR) & BIT(offset));
+	if (readb(pl061->base + GPIODIR) & BIT(offset))
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int pl061_direction_input(struct gpio_chip *gc, unsigned offset)
diff --git a/drivers/gpio/gpio-raspberrypi-exp.c b/drivers/gpio/gpio-raspberrypi-exp.c
index b77ea16ffa03..bb100e0124e6 100644
--- a/drivers/gpio/gpio-raspberrypi-exp.c
+++ b/drivers/gpio/gpio-raspberrypi-exp.c
@@ -147,7 +147,10 @@ static int rpi_exp_gpio_get_direction(struct gpio_chip *gc, unsigned int off)
 			get.gpio);
 		return ret ? ret : -EIO;
 	}
-	return !get.direction;
+	if (get.direction)
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int rpi_exp_gpio_get(struct gpio_chip *gc, unsigned int off)
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index 187984d26f47..f800b250971c 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -279,7 +279,10 @@ static int gpio_rcar_get_direction(struct gpio_chip *chip, unsigned int offset)
 {
 	struct gpio_rcar_priv *p = gpiochip_get_data(chip);
 
-	return !(gpio_rcar_read(p, INOUTSEL) & BIT(offset));
+	if (gpio_rcar_read(p, INOUTSEL) & BIT(offset))
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int gpio_rcar_direction_input(struct gpio_chip *chip, unsigned offset)
@@ -483,7 +486,7 @@ static int gpio_rcar_probe(struct platform_device *pdev)
 	gpio_chip->ngpio = npins;
 
 	irq_chip = &p->irq_chip;
-	irq_chip->name = name;
+	irq_chip->name = "gpio-rcar";
 	irq_chip->parent_device = dev;
 	irq_chip->irq_mask = gpio_rcar_irq_disable;
 	irq_chip->irq_unmask = gpio_rcar_irq_enable;
diff --git a/drivers/gpio/gpio-rda.c b/drivers/gpio/gpio-rda.c
new file mode 100644
index 000000000000..28dcbb58b76b
--- /dev/null
+++ b/drivers/gpio/gpio-rda.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RDA Micro GPIO driver
+ *
+ * Copyright (C) 2012 RDA Micro Inc.
+ * Copyright (C) 2019 Manivannan Sadhasivam
+ */
+
+#include <linux/bitops.h>
+#include <linux/gpio/driver.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+
+#define RDA_GPIO_OEN_VAL		0x00
+#define RDA_GPIO_OEN_SET_OUT		0x04
+#define RDA_GPIO_OEN_SET_IN		0x08
+#define RDA_GPIO_VAL			0x0c
+#define RDA_GPIO_SET			0x10
+#define RDA_GPIO_CLR			0x14
+#define RDA_GPIO_INT_CTRL_SET		0x18
+#define RDA_GPIO_INT_CTRL_CLR		0x1c
+#define RDA_GPIO_INT_CLR		0x20
+#define RDA_GPIO_INT_STATUS		0x24
+
+#define RDA_GPIO_IRQ_RISE_SHIFT		0
+#define RDA_GPIO_IRQ_FALL_SHIFT		8
+#define RDA_GPIO_DEBOUCE_SHIFT		16
+#define RDA_GPIO_LEVEL_SHIFT		24
+
+#define RDA_GPIO_IRQ_MASK		0xff
+
+/* Each bank consists of 32 GPIOs */
+#define RDA_GPIO_BANK_NR	32
+
+struct rda_gpio {
+	struct gpio_chip chip;
+	void __iomem *base;
+	spinlock_t lock;
+	struct irq_chip irq_chip;
+	int irq;
+};
+
+static inline void rda_gpio_update(struct gpio_chip *chip, unsigned int offset,
+				   u16 reg, int val)
+{
+	struct rda_gpio *rda_gpio = gpiochip_get_data(chip);
+	void __iomem *base = rda_gpio->base;
+	unsigned long flags;
+	u32 tmp;
+
+	spin_lock_irqsave(&rda_gpio->lock, flags);
+	tmp = readl_relaxed(base + reg);
+
+	if (val)
+		tmp |= BIT(offset);
+	else
+		tmp &= ~BIT(offset);
+
+	writel_relaxed(tmp, base + reg);
+	spin_unlock_irqrestore(&rda_gpio->lock, flags);
+}
+
+static void rda_gpio_irq_mask(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct rda_gpio *rda_gpio = gpiochip_get_data(chip);
+	void __iomem *base = rda_gpio->base;
+	u32 offset = irqd_to_hwirq(data);
+	u32 value;
+
+	value = BIT(offset) << RDA_GPIO_IRQ_RISE_SHIFT;
+	value |= BIT(offset) << RDA_GPIO_IRQ_FALL_SHIFT;
+
+	writel_relaxed(value, base + RDA_GPIO_INT_CTRL_CLR);
+}
+
+static void rda_gpio_irq_ack(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	u32 offset = irqd_to_hwirq(data);
+
+	rda_gpio_update(chip, offset, RDA_GPIO_INT_CLR, 1);
+}
+
+static int rda_gpio_set_irq(struct gpio_chip *chip, u32 offset,
+			    unsigned int flow_type)
+{
+	struct rda_gpio *rda_gpio = gpiochip_get_data(chip);
+	void __iomem *base = rda_gpio->base;
+	u32 value;
+
+	switch (flow_type) {
+	case IRQ_TYPE_EDGE_RISING:
+		/* Set rising edge trigger */
+		value = BIT(offset) << RDA_GPIO_IRQ_RISE_SHIFT;
+		writel_relaxed(value, base + RDA_GPIO_INT_CTRL_SET);
+
+		/* Switch to edge trigger interrupt */
+		value = BIT(offset) << RDA_GPIO_LEVEL_SHIFT;
+		writel_relaxed(value, base + RDA_GPIO_INT_CTRL_CLR);
+		break;
+
+	case IRQ_TYPE_EDGE_FALLING:
+		/* Set falling edge trigger */
+		value = BIT(offset) << RDA_GPIO_IRQ_FALL_SHIFT;
+		writel_relaxed(value, base + RDA_GPIO_INT_CTRL_SET);
+
+		/* Switch to edge trigger interrupt */
+		value = BIT(offset) << RDA_GPIO_LEVEL_SHIFT;
+		writel_relaxed(value, base + RDA_GPIO_INT_CTRL_CLR);
+		break;
+
+	case IRQ_TYPE_EDGE_BOTH:
+		/* Set both edge trigger */
+		value = BIT(offset) << RDA_GPIO_IRQ_RISE_SHIFT;
+		value |= BIT(offset) << RDA_GPIO_IRQ_FALL_SHIFT;
+		writel_relaxed(value, base + RDA_GPIO_INT_CTRL_SET);
+
+		/* Switch to edge trigger interrupt */
+		value = BIT(offset) << RDA_GPIO_LEVEL_SHIFT;
+		writel_relaxed(value, base + RDA_GPIO_INT_CTRL_CLR);
+		break;
+
+	case IRQ_TYPE_LEVEL_HIGH:
+		/* Set high level trigger */
+		value = BIT(offset) << RDA_GPIO_IRQ_RISE_SHIFT;
+
+		/* Switch to level trigger interrupt */
+		value |= BIT(offset) << RDA_GPIO_LEVEL_SHIFT;
+		writel_relaxed(value, base + RDA_GPIO_INT_CTRL_SET);
+		break;
+
+	case IRQ_TYPE_LEVEL_LOW:
+		/* Set low level trigger */
+		value = BIT(offset) << RDA_GPIO_IRQ_FALL_SHIFT;
+
+		/* Switch to level trigger interrupt */
+		value |= BIT(offset) << RDA_GPIO_LEVEL_SHIFT;
+		writel_relaxed(value, base + RDA_GPIO_INT_CTRL_SET);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void rda_gpio_irq_unmask(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	u32 offset = irqd_to_hwirq(data);
+	u32 trigger = irqd_get_trigger_type(data);
+
+	rda_gpio_set_irq(chip, offset, trigger);
+}
+
+static int rda_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	u32 offset = irqd_to_hwirq(data);
+	int ret;
+
+	ret = rda_gpio_set_irq(chip, offset, flow_type);
+	if (ret)
+		return ret;
+
+	if (flow_type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
+		irq_set_handler_locked(data, handle_level_irq);
+	else if (flow_type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
+		irq_set_handler_locked(data, handle_edge_irq);
+
+	return 0;
+}
+
+static void rda_gpio_irq_handler(struct irq_desc *desc)
+{
+	struct gpio_chip *chip = irq_desc_get_handler_data(desc);
+	struct irq_chip *ic = irq_desc_get_chip(desc);
+	struct rda_gpio *rda_gpio = gpiochip_get_data(chip);
+	unsigned long status;
+	u32 n, girq;
+
+	chained_irq_enter(ic, desc);
+
+	status = readl_relaxed(rda_gpio->base + RDA_GPIO_INT_STATUS);
+	/* Only lower 8 bits are capable of generating interrupts */
+	status &= RDA_GPIO_IRQ_MASK;
+
+	for_each_set_bit(n, &status, RDA_GPIO_BANK_NR) {
+		girq = irq_find_mapping(chip->irq.domain, n);
+		generic_handle_irq(girq);
+	}
+
+	chained_irq_exit(ic, desc);
+}
+
+static int rda_gpio_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
+	struct gpio_irq_chip *girq;
+	struct rda_gpio *rda_gpio;
+	u32 ngpios;
+	int ret;
+
+	rda_gpio = devm_kzalloc(dev, sizeof(*rda_gpio), GFP_KERNEL);
+	if (!rda_gpio)
+		return -ENOMEM;
+
+	ret = device_property_read_u32(dev, "ngpios", &ngpios);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Not all ports have interrupt capability. For instance, on
+	 * RDA8810PL, GPIOC doesn't support interrupt. So we must handle
+	 * those also.
+	 */
+	rda_gpio->irq = platform_get_irq(pdev, 0);
+
+	rda_gpio->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(rda_gpio->base))
+		return PTR_ERR(rda_gpio->base);
+
+	spin_lock_init(&rda_gpio->lock);
+
+	ret = bgpio_init(&rda_gpio->chip, dev, 4,
+			 rda_gpio->base + RDA_GPIO_VAL,
+			 rda_gpio->base + RDA_GPIO_SET,
+			 rda_gpio->base + RDA_GPIO_CLR,
+			 rda_gpio->base + RDA_GPIO_OEN_SET_OUT,
+			 rda_gpio->base + RDA_GPIO_OEN_SET_IN,
+			 BGPIOF_READ_OUTPUT_REG_SET);
+	if (ret) {
+		dev_err(dev, "bgpio_init failed\n");
+		return ret;
+	}
+
+	rda_gpio->chip.label = dev_name(dev);
+	rda_gpio->chip.ngpio = ngpios;
+	rda_gpio->chip.base = -1;
+	rda_gpio->chip.parent = dev;
+	rda_gpio->chip.of_node = np;
+
+	if (rda_gpio->irq >= 0) {
+		rda_gpio->irq_chip.name = "rda-gpio",
+		rda_gpio->irq_chip.irq_ack = rda_gpio_irq_ack,
+		rda_gpio->irq_chip.irq_mask = rda_gpio_irq_mask,
+		rda_gpio->irq_chip.irq_unmask = rda_gpio_irq_unmask,
+		rda_gpio->irq_chip.irq_set_type = rda_gpio_irq_set_type,
+		rda_gpio->irq_chip.flags = IRQCHIP_SKIP_SET_WAKE,
+
+		girq = &rda_gpio->chip.irq;
+		girq->chip = &rda_gpio->irq_chip;
+		girq->handler = handle_bad_irq;
+		girq->default_type = IRQ_TYPE_NONE;
+		girq->parent_handler = rda_gpio_irq_handler;
+		girq->parent_handler_data = rda_gpio;
+		girq->num_parents = 1;
+		girq->parents = devm_kcalloc(dev, 1,
+					     sizeof(*girq->parents),
+					     GFP_KERNEL);
+		if (!girq->parents)
+			return -ENOMEM;
+		girq->parents[0] = rda_gpio->irq;
+	}
+
+	platform_set_drvdata(pdev, rda_gpio);
+
+	return devm_gpiochip_add_data(dev, &rda_gpio->chip, rda_gpio);
+}
+
+static const struct of_device_id rda_gpio_of_match[] = {
+	{ .compatible = "rda,8810pl-gpio", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rda_gpio_of_match);
+
+static struct platform_driver rda_gpio_driver = {
+	.probe = rda_gpio_probe,
+	.driver = {
+		.name = "rda-gpio",
+		.of_match_table	= rda_gpio_of_match,
+	},
+};
+
+module_platform_driver_probe(rda_gpio_driver, rda_gpio_probe);
+
+MODULE_DESCRIPTION("RDA Micro GPIO driver");
+MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c
index fdc7a9d5b382..d35169bde25a 100644
--- a/drivers/gpio/gpio-reg.c
+++ b/drivers/gpio/gpio-reg.c
@@ -26,7 +26,8 @@ static int gpio_reg_get_direction(struct gpio_chip *gc, unsigned offset)
 {
 	struct gpio_reg *r = to_gpio_reg(gc);
 
-	return r->direction & BIT(offset) ? 1 : 0;
+	return r->direction & BIT(offset) ? GPIO_LINE_DIRECTION_IN :
+					    GPIO_LINE_DIRECTION_OUT;
 }
 
 static int gpio_reg_direction_output(struct gpio_chip *gc, unsigned offset,
diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c
index 46b7cf23fb0f..edff5e81489f 100644
--- a/drivers/gpio/gpio-sa1100.c
+++ b/drivers/gpio/gpio-sa1100.c
@@ -53,7 +53,10 @@ static int sa1100_get_direction(struct gpio_chip *chip, unsigned offset)
 {
 	void __iomem *gpdr = sa1100_gpio_chip(chip)->membase + R_GPDR;
 
-	return !(readl_relaxed(gpdr) & BIT(offset));
+	if (readl_relaxed(gpdr) & BIT(offset))
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int sa1100_direction_input(struct gpio_chip *chip, unsigned offset)
diff --git a/drivers/gpio/gpio-sama5d2-piobu.c b/drivers/gpio/gpio-sama5d2-piobu.c
index 7d718557092e..4d47b2c41186 100644
--- a/drivers/gpio/gpio-sama5d2-piobu.c
+++ b/drivers/gpio/gpio-sama5d2-piobu.c
@@ -119,7 +119,8 @@ static int sama5d2_piobu_get_direction(struct gpio_chip *chip,
 	if (ret < 0)
 		return ret;
 
-	return (ret == PIOBU_IN) ? 1 : 0;
+	return (ret == PIOBU_IN) ? GPIO_LINE_DIRECTION_IN :
+				   GPIO_LINE_DIRECTION_OUT;
 }
 
 /**
@@ -154,9 +155,9 @@ static int sama5d2_piobu_get(struct gpio_chip *chip, unsigned int pin)
 	/* if pin is input, read value from PDS else read from SOD */
 	int ret = sama5d2_piobu_get_direction(chip, pin);
 
-	if (ret == 1)
+	if (ret == GPIO_LINE_DIRECTION_IN)
 		ret = sama5d2_piobu_read_value(chip, pin, PIOBU_PDS);
-	else if (!ret)
+	else if (ret == GPIO_LINE_DIRECTION_OUT)
 		ret = sama5d2_piobu_read_value(chip, pin, PIOBU_SOD);
 
 	if (ret < 0)
@@ -243,7 +244,6 @@ static struct platform_driver sama5d2_piobu_driver = {
 
 module_platform_driver(sama5d2_piobu_driver);
 
-MODULE_VERSION("1.0");
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("SAMA5D2 PIOBU controller driver");
 MODULE_AUTHOR("Andrei Stefanescu <andrei.stefanescu@microchip.com>");
diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c
index fb143f28c386..c65f35b68202 100644
--- a/drivers/gpio/gpio-sch.c
+++ b/drivers/gpio/gpio-sch.c
@@ -127,7 +127,10 @@ static int sch_gpio_get_direction(struct gpio_chip *gc, unsigned gpio_num)
 {
 	struct sch_gpio *sch = gpiochip_get_data(gc);
 
-	return sch_gpio_reg_get(sch, gpio_num, GIO);
+	if (sch_gpio_reg_get(sch, gpio_num, GIO))
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static const struct gpio_chip sch_gpio_chip = {
diff --git a/drivers/gpio/gpio-sch311x.c b/drivers/gpio/gpio-sch311x.c
index 8ecf336c9af4..da01e1cad7cb 100644
--- a/drivers/gpio/gpio-sch311x.c
+++ b/drivers/gpio/gpio-sch311x.c
@@ -228,7 +228,10 @@ static int sch311x_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 	data = inb(block->runtime_reg + block->config_regs[offset]);
 	spin_unlock(&block->lock);
 
-	return !!(data & SCH311X_GPIO_CONF_DIR);
+	if (data & SCH311X_GPIO_CONF_DIR)
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int sch311x_gpio_set_config(struct gpio_chip *chip, unsigned offset,
diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
new file mode 100644
index 000000000000..147a1bd04515
--- /dev/null
+++ b/drivers/gpio/gpio-sifive.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 SiFive
+ */
+
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/of_irq.h>
+#include <linux/gpio/driver.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/regmap.h>
+
+#define SIFIVE_GPIO_INPUT_VAL	0x00
+#define SIFIVE_GPIO_INPUT_EN	0x04
+#define SIFIVE_GPIO_OUTPUT_EN	0x08
+#define SIFIVE_GPIO_OUTPUT_VAL	0x0C
+#define SIFIVE_GPIO_RISE_IE	0x18
+#define SIFIVE_GPIO_RISE_IP	0x1C
+#define SIFIVE_GPIO_FALL_IE	0x20
+#define SIFIVE_GPIO_FALL_IP	0x24
+#define SIFIVE_GPIO_HIGH_IE	0x28
+#define SIFIVE_GPIO_HIGH_IP	0x2C
+#define SIFIVE_GPIO_LOW_IE	0x30
+#define SIFIVE_GPIO_LOW_IP	0x34
+#define SIFIVE_GPIO_OUTPUT_XOR	0x40
+
+#define SIFIVE_GPIO_MAX		32
+#define SIFIVE_GPIO_IRQ_OFFSET	7
+
+struct sifive_gpio {
+	void __iomem		*base;
+	struct gpio_chip	gc;
+	struct regmap		*regs;
+	u32			irq_state;
+	unsigned int		trigger[SIFIVE_GPIO_MAX];
+	unsigned int		irq_parent[SIFIVE_GPIO_MAX];
+};
+
+static void sifive_gpio_set_ie(struct sifive_gpio *chip, unsigned int offset)
+{
+	unsigned long flags;
+	unsigned int trigger;
+
+	spin_lock_irqsave(&chip->gc.bgpio_lock, flags);
+	trigger = (chip->irq_state & BIT(offset)) ? chip->trigger[offset] : 0;
+	regmap_update_bits(chip->regs, SIFIVE_GPIO_RISE_IE, BIT(offset),
+			   (trigger & IRQ_TYPE_EDGE_RISING) ? BIT(offset) : 0);
+	regmap_update_bits(chip->regs, SIFIVE_GPIO_FALL_IE, BIT(offset),
+			   (trigger & IRQ_TYPE_EDGE_FALLING) ? BIT(offset) : 0);
+	regmap_update_bits(chip->regs, SIFIVE_GPIO_HIGH_IE, BIT(offset),
+			   (trigger & IRQ_TYPE_LEVEL_HIGH) ? BIT(offset) : 0);
+	regmap_update_bits(chip->regs, SIFIVE_GPIO_LOW_IE, BIT(offset),
+			   (trigger & IRQ_TYPE_LEVEL_LOW) ? BIT(offset) : 0);
+	spin_unlock_irqrestore(&chip->gc.bgpio_lock, flags);
+}
+
+static int sifive_gpio_irq_set_type(struct irq_data *d, unsigned int trigger)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct sifive_gpio *chip = gpiochip_get_data(gc);
+	int offset = irqd_to_hwirq(d);
+
+	if (offset < 0 || offset >= gc->ngpio)
+		return -EINVAL;
+
+	chip->trigger[offset] = trigger;
+	sifive_gpio_set_ie(chip, offset);
+	return 0;
+}
+
+static void sifive_gpio_irq_enable(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct sifive_gpio *chip = gpiochip_get_data(gc);
+	int offset = irqd_to_hwirq(d) % SIFIVE_GPIO_MAX;
+	u32 bit = BIT(offset);
+	unsigned long flags;
+
+	irq_chip_enable_parent(d);
+
+	/* Switch to input */
+	gc->direction_input(gc, offset);
+
+	spin_lock_irqsave(&gc->bgpio_lock, flags);
+	/* Clear any sticky pending interrupts */
+	regmap_write(chip->regs, SIFIVE_GPIO_RISE_IP, bit);
+	regmap_write(chip->regs, SIFIVE_GPIO_FALL_IP, bit);
+	regmap_write(chip->regs, SIFIVE_GPIO_HIGH_IP, bit);
+	regmap_write(chip->regs, SIFIVE_GPIO_LOW_IP, bit);
+	spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+	/* Enable interrupts */
+	assign_bit(offset, (unsigned long *)&chip->irq_state, 1);
+	sifive_gpio_set_ie(chip, offset);
+}
+
+static void sifive_gpio_irq_disable(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct sifive_gpio *chip = gpiochip_get_data(gc);
+	int offset = irqd_to_hwirq(d) % SIFIVE_GPIO_MAX;
+
+	assign_bit(offset, (unsigned long *)&chip->irq_state, 0);
+	sifive_gpio_set_ie(chip, offset);
+	irq_chip_disable_parent(d);
+}
+
+static void sifive_gpio_irq_eoi(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct sifive_gpio *chip = gpiochip_get_data(gc);
+	int offset = irqd_to_hwirq(d) % SIFIVE_GPIO_MAX;
+	u32 bit = BIT(offset);
+	unsigned long flags;
+
+	spin_lock_irqsave(&gc->bgpio_lock, flags);
+	/* Clear all pending interrupts */
+	regmap_write(chip->regs, SIFIVE_GPIO_RISE_IP, bit);
+	regmap_write(chip->regs, SIFIVE_GPIO_FALL_IP, bit);
+	regmap_write(chip->regs, SIFIVE_GPIO_HIGH_IP, bit);
+	regmap_write(chip->regs, SIFIVE_GPIO_LOW_IP, bit);
+	spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+	irq_chip_eoi_parent(d);
+}
+
+static struct irq_chip sifive_gpio_irqchip = {
+	.name		= "sifive-gpio",
+	.irq_set_type	= sifive_gpio_irq_set_type,
+	.irq_mask	= irq_chip_mask_parent,
+	.irq_unmask	= irq_chip_unmask_parent,
+	.irq_enable	= sifive_gpio_irq_enable,
+	.irq_disable	= sifive_gpio_irq_disable,
+	.irq_eoi	= sifive_gpio_irq_eoi,
+};
+
+static int sifive_gpio_child_to_parent_hwirq(struct gpio_chip *gc,
+					     unsigned int child,
+					     unsigned int child_type,
+					     unsigned int *parent,
+					     unsigned int *parent_type)
+{
+	*parent_type = IRQ_TYPE_NONE;
+	*parent = child + SIFIVE_GPIO_IRQ_OFFSET;
+	return 0;
+}
+
+static const struct regmap_config sifive_gpio_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.fast_io = true,
+	.disable_locking = true,
+};
+
+static int sifive_gpio_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = pdev->dev.of_node;
+	struct device_node *irq_parent;
+	struct irq_domain *parent;
+	struct gpio_irq_chip *girq;
+	struct sifive_gpio *chip;
+	int ret, ngpio;
+
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(chip->base)) {
+		dev_err(dev, "failed to allocate device memory\n");
+		return PTR_ERR(chip->base);
+	}
+
+	chip->regs = devm_regmap_init_mmio(dev, chip->base,
+					   &sifive_gpio_regmap_config);
+	if (IS_ERR(chip->regs))
+		return PTR_ERR(chip->regs);
+
+	ngpio = of_irq_count(node);
+	if (ngpio >= SIFIVE_GPIO_MAX) {
+		dev_err(dev, "Too many GPIO interrupts (max=%d)\n",
+			SIFIVE_GPIO_MAX);
+		return -ENXIO;
+	}
+
+	irq_parent = of_irq_find_parent(node);
+	if (!irq_parent) {
+		dev_err(dev, "no IRQ parent node\n");
+		return -ENODEV;
+	}
+	parent = irq_find_host(irq_parent);
+	if (!parent) {
+		dev_err(dev, "no IRQ parent domain\n");
+		return -ENODEV;
+	}
+
+	ret = bgpio_init(&chip->gc, dev, 4,
+			 chip->base + SIFIVE_GPIO_INPUT_VAL,
+			 chip->base + SIFIVE_GPIO_OUTPUT_VAL,
+			 NULL,
+			 chip->base + SIFIVE_GPIO_OUTPUT_EN,
+			 chip->base + SIFIVE_GPIO_INPUT_EN,
+			 0);
+	if (ret) {
+		dev_err(dev, "unable to init generic GPIO\n");
+		return ret;
+	}
+
+	/* Disable all GPIO interrupts before enabling parent interrupts */
+	regmap_write(chip->regs, SIFIVE_GPIO_RISE_IE, 0);
+	regmap_write(chip->regs, SIFIVE_GPIO_FALL_IE, 0);
+	regmap_write(chip->regs, SIFIVE_GPIO_HIGH_IE, 0);
+	regmap_write(chip->regs, SIFIVE_GPIO_LOW_IE, 0);
+	chip->irq_state = 0;
+
+	chip->gc.base = -1;
+	chip->gc.ngpio = ngpio;
+	chip->gc.label = dev_name(dev);
+	chip->gc.parent = dev;
+	chip->gc.owner = THIS_MODULE;
+	girq = &chip->gc.irq;
+	girq->chip = &sifive_gpio_irqchip;
+	girq->fwnode = of_node_to_fwnode(node);
+	girq->parent_domain = parent;
+	girq->child_to_parent_hwirq = sifive_gpio_child_to_parent_hwirq;
+	girq->handler = handle_bad_irq;
+	girq->default_type = IRQ_TYPE_NONE;
+
+	platform_set_drvdata(pdev, chip);
+	return gpiochip_add_data(&chip->gc, chip);
+}
+
+static const struct of_device_id sifive_gpio_match[] = {
+	{ .compatible = "sifive,gpio0" },
+	{ .compatible = "sifive,fu540-c000-gpio" },
+	{ },
+};
+
+static struct platform_driver sifive_gpio_driver = {
+	.probe		= sifive_gpio_probe,
+	.driver = {
+		.name	= "sifive_gpio",
+		.of_match_table = of_match_ptr(sifive_gpio_match),
+	},
+};
+builtin_platform_driver(sifive_gpio_driver)
diff --git a/drivers/gpio/gpio-siox.c b/drivers/gpio/gpio-siox.c
index 006a7e6a75f2..311f66757b92 100644
--- a/drivers/gpio/gpio-siox.c
+++ b/drivers/gpio/gpio-siox.c
@@ -203,9 +203,9 @@ static int gpio_siox_direction_output(struct gpio_chip *chip,
 static int gpio_siox_get_direction(struct gpio_chip *chip, unsigned int offset)
 {
 	if (offset < 12)
-		return 1; /* input */
+		return GPIO_LINE_DIRECTION_IN;
 	else
-		return 0; /* output */
+		return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int gpio_siox_probe(struct siox_device *sdevice)
diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c
index 994d542daf53..542706a852e6 100644
--- a/drivers/gpio/gpio-stmpe.c
+++ b/drivers/gpio/gpio-stmpe.c
@@ -84,7 +84,10 @@ static int stmpe_gpio_get_direction(struct gpio_chip *chip,
 	if (ret < 0)
 		return ret;
 
-	return !(ret & mask);
+	if (ret & mask)
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int stmpe_gpio_direction_output(struct gpio_chip *chip,
diff --git a/drivers/gpio/gpio-tb10x.c b/drivers/gpio/gpio-tb10x.c
index 5e375186f90e..866201cf5f65 100644
--- a/drivers/gpio/gpio-tb10x.c
+++ b/drivers/gpio/gpio-tb10x.c
@@ -243,4 +243,3 @@ static struct platform_driver tb10x_gpio_driver = {
 module_platform_driver(tb10x_gpio_driver);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("tb10x gpio.");
-MODULE_VERSION("0.0.1");
diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c
index 75b1135b383a..6be0684cfa49 100644
--- a/drivers/gpio/gpio-tc3589x.c
+++ b/drivers/gpio/gpio-tc3589x.c
@@ -97,7 +97,10 @@ static int tc3589x_gpio_get_direction(struct gpio_chip *chip,
 	if (ret < 0)
 		return ret;
 
-	return !(ret & BIT(pos));
+	if (ret & BIT(pos))
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int tc3589x_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 8a01d3694b28..acb99eff9939 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -96,12 +96,12 @@ struct tegra_gpio_info {
 static inline void tegra_gpio_writel(struct tegra_gpio_info *tgi,
 				     u32 val, u32 reg)
 {
-	__raw_writel(val, tgi->regs + reg);
+	writel_relaxed(val, tgi->regs + reg);
 }
 
 static inline u32 tegra_gpio_readl(struct tegra_gpio_info *tgi, u32 reg)
 {
-	return __raw_readl(tgi->regs + reg);
+	return readl_relaxed(tgi->regs + reg);
 }
 
 static unsigned int tegra_gpio_compose(unsigned int bank, unsigned int port,
@@ -215,7 +215,10 @@ static int tegra_gpio_get_direction(struct gpio_chip *chip,
 
 	oe = tegra_gpio_readl(tgi, GPIO_OE(tgi, offset));
 
-	return !(oe & pin_mask);
+	if (oe & pin_mask)
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int tegra_gpio_set_debounce(struct gpio_chip *chip, unsigned int offset,
@@ -413,11 +416,8 @@ static void tegra_gpio_irq_handler(struct irq_desc *desc)
 static int tegra_gpio_resume(struct device *dev)
 {
 	struct tegra_gpio_info *tgi = dev_get_drvdata(dev);
-	unsigned long flags;
 	unsigned int b, p;
 
-	local_irq_save(flags);
-
 	for (b = 0; b < tgi->bank_count; b++) {
 		struct tegra_gpio_bank *bank = &tgi->bank_info[b];
 
@@ -445,17 +445,14 @@ static int tegra_gpio_resume(struct device *dev)
 		}
 	}
 
-	local_irq_restore(flags);
 	return 0;
 }
 
 static int tegra_gpio_suspend(struct device *dev)
 {
 	struct tegra_gpio_info *tgi = dev_get_drvdata(dev);
-	unsigned long flags;
 	unsigned int b, p;
 
-	local_irq_save(flags);
 	for (b = 0; b < tgi->bank_count; b++) {
 		struct tegra_gpio_bank *bank = &tgi->bank_info[b];
 
@@ -485,7 +482,7 @@ static int tegra_gpio_suspend(struct device *dev)
 					  GPIO_INT_ENB(tgi, gpio));
 		}
 	}
-	local_irq_restore(flags);
+
 	return 0;
 }
 
@@ -494,6 +491,11 @@ static int tegra_gpio_irq_set_wake(struct irq_data *d, unsigned int enable)
 	struct tegra_gpio_bank *bank = irq_data_get_irq_chip_data(d);
 	unsigned int gpio = d->hwirq;
 	u32 port, bit, mask;
+	int err;
+
+	err = irq_set_irq_wake(bank->irq, enable);
+	if (err)
+		return err;
 
 	port = GPIO_PORT(gpio);
 	bit = GPIO_BIT(gpio);
@@ -504,7 +506,7 @@ static int tegra_gpio_irq_set_wake(struct irq_data *d, unsigned int enable)
 	else
 		bank->wake_enb[port] &= ~mask;
 
-	return irq_set_irq_wake(bank->irq, enable);
+	return 0;
 }
 #endif
 
@@ -554,7 +556,7 @@ static inline void tegra_gpio_debuginit(struct tegra_gpio_info *tgi)
 #endif
 
 static const struct dev_pm_ops tegra_gpio_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(tegra_gpio_suspend, tegra_gpio_resume)
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(tegra_gpio_suspend, tegra_gpio_resume)
 };
 
 static int tegra_gpio_probe(struct platform_device *pdev)
diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
index a9058fda187e..de241263d4be 100644
--- a/drivers/gpio/gpio-tegra186.c
+++ b/drivers/gpio/gpio-tegra186.c
@@ -15,6 +15,14 @@
 #include <dt-bindings/gpio/tegra186-gpio.h>
 #include <dt-bindings/gpio/tegra194-gpio.h>
 
+/* security registers */
+#define TEGRA186_GPIO_CTL_SCR 0x0c
+#define  TEGRA186_GPIO_CTL_SCR_SEC_WEN BIT(28)
+#define  TEGRA186_GPIO_CTL_SCR_SEC_REN BIT(27)
+
+#define TEGRA186_GPIO_INT_ROUTE_MAPPING(p, x) (0x14 + (p) * 0x20 + (x) * 4)
+
+/* control registers */
 #define TEGRA186_GPIO_ENABLE_CONFIG 0x00
 #define  TEGRA186_GPIO_ENABLE_CONFIG_ENABLE BIT(0)
 #define  TEGRA186_GPIO_ENABLE_CONFIG_OUT BIT(1)
@@ -24,6 +32,7 @@
 #define  TEGRA186_GPIO_ENABLE_CONFIG_TRIGGER_TYPE_DOUBLE_EDGE (0x3 << 2)
 #define  TEGRA186_GPIO_ENABLE_CONFIG_TRIGGER_TYPE_MASK (0x3 << 2)
 #define  TEGRA186_GPIO_ENABLE_CONFIG_TRIGGER_LEVEL BIT(4)
+#define  TEGRA186_GPIO_ENABLE_CONFIG_DEBOUNCE BIT(5)
 #define  TEGRA186_GPIO_ENABLE_CONFIG_INTERRUPT BIT(6)
 
 #define TEGRA186_GPIO_DEBOUNCE_CONTROL 0x04
@@ -44,15 +53,16 @@
 
 struct tegra_gpio_port {
 	const char *name;
-	unsigned int offset;
+	unsigned int bank;
+	unsigned int port;
 	unsigned int pins;
-	unsigned int irq;
 };
 
 struct tegra_gpio_soc {
 	const struct tegra_gpio_port *ports;
 	unsigned int num_ports;
 	const char *name;
+	unsigned int instance;
 };
 
 struct tegra_gpio {
@@ -63,6 +73,7 @@ struct tegra_gpio {
 
 	const struct tegra_gpio_soc *soc;
 
+	void __iomem *secure;
 	void __iomem *base;
 };
 
@@ -89,12 +100,15 @@ static void __iomem *tegra186_gpio_get_base(struct tegra_gpio *gpio,
 					    unsigned int pin)
 {
 	const struct tegra_gpio_port *port;
+	unsigned int offset;
 
 	port = tegra186_gpio_get_port(gpio, &pin);
 	if (!port)
 		return NULL;
 
-	return gpio->base + port->offset + pin * 0x20;
+	offset = port->bank * 0x1000 + port->port * 0x200;
+
+	return gpio->base + offset + pin * 0x20;
 }
 
 static int tegra186_gpio_get_direction(struct gpio_chip *chip,
@@ -110,9 +124,9 @@ static int tegra186_gpio_get_direction(struct gpio_chip *chip,
 
 	value = readl(base + TEGRA186_GPIO_ENABLE_CONFIG);
 	if (value & TEGRA186_GPIO_ENABLE_CONFIG_OUT)
-		return 0;
+		return GPIO_LINE_DIRECTION_OUT;
 
-	return 1;
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int tegra186_gpio_direction_input(struct gpio_chip *chip,
@@ -204,6 +218,42 @@ static void tegra186_gpio_set(struct gpio_chip *chip, unsigned int offset,
 	writel(value, base + TEGRA186_GPIO_OUTPUT_VALUE);
 }
 
+static int tegra186_gpio_set_config(struct gpio_chip *chip,
+				    unsigned int offset,
+				    unsigned long config)
+{
+	struct tegra_gpio *gpio = gpiochip_get_data(chip);
+	u32 debounce, value;
+	void __iomem *base;
+
+	base = tegra186_gpio_get_base(gpio, offset);
+	if (base == NULL)
+		return -ENXIO;
+
+	if (pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE)
+		return -ENOTSUPP;
+
+	debounce = pinconf_to_config_argument(config);
+
+	/*
+	 * The Tegra186 GPIO controller supports a maximum of 255 ms debounce
+	 * time.
+	 */
+	if (debounce > 255000)
+		return -EINVAL;
+
+	debounce = DIV_ROUND_UP(debounce, USEC_PER_MSEC);
+
+	value = TEGRA186_GPIO_DEBOUNCE_CONTROL_THRESHOLD(debounce);
+	writel(value, base + TEGRA186_GPIO_DEBOUNCE_CONTROL);
+
+	value = readl(base + TEGRA186_GPIO_ENABLE_CONFIG);
+	value |= TEGRA186_GPIO_ENABLE_CONFIG_DEBOUNCE;
+	writel(value, base + TEGRA186_GPIO_ENABLE_CONFIG);
+
+	return 0;
+}
+
 static int tegra186_gpio_of_xlate(struct gpio_chip *chip,
 				  const struct of_phandle_args *spec,
 				  u32 *flags)
@@ -327,7 +377,7 @@ static int tegra186_irq_set_type(struct irq_data *data, unsigned int type)
 	else
 		irq_set_handler_locked(data, handle_edge_irq);
 
-	return 0;
+	return irq_chip_set_type_parent(data, type);
 }
 
 static void tegra186_gpio_irq(struct irq_desc *desc)
@@ -342,12 +392,14 @@ static void tegra186_gpio_irq(struct irq_desc *desc)
 
 	for (i = 0; i < gpio->soc->num_ports; i++) {
 		const struct tegra_gpio_port *port = &gpio->soc->ports[i];
-		void __iomem *base = gpio->base + port->offset;
 		unsigned int pin, irq;
 		unsigned long value;
+		void __iomem *base;
+
+		base = gpio->base + port->bank * 0x1000 + port->port * 0x200;
 
-		/* skip ports that are not associated with this controller */
-		if (parent != gpio->irq[port->irq])
+		/* skip ports that are not associated with this bank */
+		if (parent != gpio->irq[port->bank])
 			goto skip;
 
 		value = readl(base + TEGRA186_GPIO_INTERRUPT_STATUS(1));
@@ -367,47 +419,126 @@ skip:
 	chained_irq_exit(chip, desc);
 }
 
-static int tegra186_gpio_irq_domain_xlate(struct irq_domain *domain,
-					  struct device_node *np,
-					  const u32 *spec, unsigned int size,
-					  unsigned long *hwirq,
-					  unsigned int *type)
+static int tegra186_gpio_irq_domain_translate(struct irq_domain *domain,
+					      struct irq_fwspec *fwspec,
+					      unsigned long *hwirq,
+					      unsigned int *type)
 {
 	struct tegra_gpio *gpio = gpiochip_get_data(domain->host_data);
 	unsigned int port, pin, i, offset = 0;
 
-	if (size < 2)
+	if (WARN_ON(gpio->gpio.of_gpio_n_cells < 2))
 		return -EINVAL;
 
-	port = spec[0] / 8;
-	pin = spec[0] % 8;
+	if (WARN_ON(fwspec->param_count < gpio->gpio.of_gpio_n_cells))
+		return -EINVAL;
 
-	if (port >= gpio->soc->num_ports) {
-		dev_err(gpio->gpio.parent, "invalid port number: %u\n", port);
+	port = fwspec->param[0] / 8;
+	pin = fwspec->param[0] % 8;
+
+	if (port >= gpio->soc->num_ports)
 		return -EINVAL;
-	}
 
 	for (i = 0; i < port; i++)
 		offset += gpio->soc->ports[i].pins;
 
-	*type = spec[1] & IRQ_TYPE_SENSE_MASK;
+	*type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
 	*hwirq = offset + pin;
 
 	return 0;
 }
 
-static const struct irq_domain_ops tegra186_gpio_irq_domain_ops = {
-	.map = gpiochip_irq_map,
-	.unmap = gpiochip_irq_unmap,
-	.xlate = tegra186_gpio_irq_domain_xlate,
+static void *tegra186_gpio_populate_parent_fwspec(struct gpio_chip *chip,
+						 unsigned int parent_hwirq,
+						 unsigned int parent_type)
+{
+	struct tegra_gpio *gpio = gpiochip_get_data(chip);
+	struct irq_fwspec *fwspec;
+
+	fwspec = kmalloc(sizeof(*fwspec), GFP_KERNEL);
+	if (!fwspec)
+		return NULL;
+
+	fwspec->fwnode = chip->irq.parent_domain->fwnode;
+	fwspec->param_count = 3;
+	fwspec->param[0] = gpio->soc->instance;
+	fwspec->param[1] = parent_hwirq;
+	fwspec->param[2] = parent_type;
+
+	return fwspec;
+}
+
+static int tegra186_gpio_child_to_parent_hwirq(struct gpio_chip *chip,
+					       unsigned int hwirq,
+					       unsigned int type,
+					       unsigned int *parent_hwirq,
+					       unsigned int *parent_type)
+{
+	*parent_hwirq = chip->irq.child_offset_to_irq(chip, hwirq);
+	*parent_type = type;
+
+	return 0;
+}
+
+static unsigned int tegra186_gpio_child_offset_to_irq(struct gpio_chip *chip,
+						      unsigned int offset)
+{
+	struct tegra_gpio *gpio = gpiochip_get_data(chip);
+	unsigned int i;
+
+	for (i = 0; i < gpio->soc->num_ports; i++) {
+		if (offset < gpio->soc->ports[i].pins)
+			break;
+
+		offset -= gpio->soc->ports[i].pins;
+	}
+
+	return offset + i * 8;
+}
+
+static const struct of_device_id tegra186_pmc_of_match[] = {
+	{ .compatible = "nvidia,tegra186-pmc" },
+	{ .compatible = "nvidia,tegra194-pmc" },
+	{ /* sentinel */ }
 };
 
+static void tegra186_gpio_init_route_mapping(struct tegra_gpio *gpio)
+{
+	unsigned int i, j;
+	u32 value;
+
+	for (i = 0; i < gpio->soc->num_ports; i++) {
+		const struct tegra_gpio_port *port = &gpio->soc->ports[i];
+		unsigned int offset, p = port->port;
+		void __iomem *base;
+
+		base = gpio->secure + port->bank * 0x1000 + 0x800;
+
+		value = readl(base + TEGRA186_GPIO_CTL_SCR);
+
+		/*
+		 * For controllers that haven't been locked down yet, make
+		 * sure to program the default interrupt route mapping.
+		 */
+		if ((value & TEGRA186_GPIO_CTL_SCR_SEC_REN) == 0 &&
+		    (value & TEGRA186_GPIO_CTL_SCR_SEC_WEN) == 0) {
+			for (j = 0; j < 8; j++) {
+				offset = TEGRA186_GPIO_INT_ROUTE_MAPPING(p, j);
+
+				value = readl(base + offset);
+				value = BIT(port->pins) - 1;
+				writel(value, base + offset);
+			}
+		}
+	}
+}
+
 static int tegra186_gpio_probe(struct platform_device *pdev)
 {
 	unsigned int i, j, offset;
 	struct gpio_irq_chip *irq;
 	struct tegra_gpio *gpio;
-	struct resource *res;
+	struct device_node *np;
 	char **names;
 	int err;
 
@@ -417,8 +548,11 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
 
 	gpio->soc = of_device_get_match_data(&pdev->dev);
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "gpio");
-	gpio->base = devm_ioremap_resource(&pdev->dev, res);
+	gpio->secure = devm_platform_ioremap_resource_byname(pdev, "security");
+	if (IS_ERR(gpio->secure))
+		return PTR_ERR(gpio->secure);
+
+	gpio->base = devm_platform_ioremap_resource_byname(pdev, "gpio");
 	if (IS_ERR(gpio->base))
 		return PTR_ERR(gpio->base);
 
@@ -449,6 +583,7 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
 	gpio->gpio.direction_output = tegra186_gpio_direction_output;
 	gpio->gpio.get = tegra186_gpio_get,
 	gpio->gpio.set = tegra186_gpio_set;
+	gpio->gpio.set_config = tegra186_gpio_set_config;
 
 	gpio->gpio.base = -1;
 
@@ -487,10 +622,15 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
 	gpio->intc.irq_mask = tegra186_irq_mask;
 	gpio->intc.irq_unmask = tegra186_irq_unmask;
 	gpio->intc.irq_set_type = tegra186_irq_set_type;
+	gpio->intc.irq_set_wake = irq_chip_set_wake_parent;
 
 	irq = &gpio->gpio.irq;
 	irq->chip = &gpio->intc;
-	irq->domain_ops = &tegra186_gpio_irq_domain_ops;
+	irq->fwnode = of_node_to_fwnode(pdev->dev.of_node);
+	irq->child_to_parent_hwirq = tegra186_gpio_child_to_parent_hwirq;
+	irq->populate_parent_alloc_arg = tegra186_gpio_populate_parent_fwspec;
+	irq->child_offset_to_irq = tegra186_gpio_child_offset_to_irq;
+	irq->child_irq_domain_ops.translate = tegra186_gpio_irq_domain_translate;
 	irq->handler = handle_simple_irq;
 	irq->default_type = IRQ_TYPE_NONE;
 	irq->parent_handler = tegra186_gpio_irq;
@@ -498,6 +638,17 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
 	irq->num_parents = gpio->num_irq;
 	irq->parents = gpio->irq;
 
+	np = of_find_matching_node(NULL, tegra186_pmc_of_match);
+	if (np) {
+		irq->parent_domain = irq_find_host(np);
+		of_node_put(np);
+
+		if (!irq->parent_domain)
+			return -EPROBE_DEFER;
+	}
+
+	tegra186_gpio_init_route_mapping(gpio);
+
 	irq->map = devm_kcalloc(&pdev->dev, gpio->gpio.ngpio,
 				sizeof(*irq->map), GFP_KERNEL);
 	if (!irq->map)
@@ -507,7 +658,7 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
 		const struct tegra_gpio_port *port = &gpio->soc->ports[i];
 
 		for (j = 0; j < port->pins; j++)
-			irq->map[offset + j] = irq->parents[port->irq];
+			irq->map[offset + j] = irq->parents[port->bank];
 
 		offset += port->pins;
 	}
@@ -526,136 +677,140 @@ static int tegra186_gpio_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#define TEGRA186_MAIN_GPIO_PORT(port, base, count, controller)	\
-	[TEGRA186_MAIN_GPIO_PORT_##port] = {			\
-		.name = #port,					\
-		.offset = base,					\
-		.pins = count,					\
-		.irq = controller,				\
+#define TEGRA186_MAIN_GPIO_PORT(_name, _bank, _port, _pins)	\
+	[TEGRA186_MAIN_GPIO_PORT_##_name] = {			\
+		.name = #_name,					\
+		.bank = _bank,					\
+		.port = _port,					\
+		.pins = _pins,					\
 	}
 
 static const struct tegra_gpio_port tegra186_main_ports[] = {
-	TEGRA186_MAIN_GPIO_PORT( A, 0x2000, 7, 2),
-	TEGRA186_MAIN_GPIO_PORT( B, 0x3000, 7, 3),
-	TEGRA186_MAIN_GPIO_PORT( C, 0x3200, 7, 3),
-	TEGRA186_MAIN_GPIO_PORT( D, 0x3400, 6, 3),
-	TEGRA186_MAIN_GPIO_PORT( E, 0x2200, 8, 2),
-	TEGRA186_MAIN_GPIO_PORT( F, 0x2400, 6, 2),
-	TEGRA186_MAIN_GPIO_PORT( G, 0x4200, 6, 4),
-	TEGRA186_MAIN_GPIO_PORT( H, 0x1000, 7, 1),
-	TEGRA186_MAIN_GPIO_PORT( I, 0x0800, 8, 0),
-	TEGRA186_MAIN_GPIO_PORT( J, 0x5000, 8, 5),
-	TEGRA186_MAIN_GPIO_PORT( K, 0x5200, 1, 5),
-	TEGRA186_MAIN_GPIO_PORT( L, 0x1200, 8, 1),
-	TEGRA186_MAIN_GPIO_PORT( M, 0x5600, 6, 5),
-	TEGRA186_MAIN_GPIO_PORT( N, 0x0000, 7, 0),
-	TEGRA186_MAIN_GPIO_PORT( O, 0x0200, 4, 0),
-	TEGRA186_MAIN_GPIO_PORT( P, 0x4000, 7, 4),
-	TEGRA186_MAIN_GPIO_PORT( Q, 0x0400, 6, 0),
-	TEGRA186_MAIN_GPIO_PORT( R, 0x0a00, 6, 0),
-	TEGRA186_MAIN_GPIO_PORT( T, 0x0600, 4, 0),
-	TEGRA186_MAIN_GPIO_PORT( X, 0x1400, 8, 1),
-	TEGRA186_MAIN_GPIO_PORT( Y, 0x1600, 7, 1),
-	TEGRA186_MAIN_GPIO_PORT(BB, 0x2600, 2, 2),
-	TEGRA186_MAIN_GPIO_PORT(CC, 0x5400, 4, 5),
+	TEGRA186_MAIN_GPIO_PORT( A, 2, 0, 7),
+	TEGRA186_MAIN_GPIO_PORT( B, 3, 0, 7),
+	TEGRA186_MAIN_GPIO_PORT( C, 3, 1, 7),
+	TEGRA186_MAIN_GPIO_PORT( D, 3, 2, 6),
+	TEGRA186_MAIN_GPIO_PORT( E, 2, 1, 8),
+	TEGRA186_MAIN_GPIO_PORT( F, 2, 2, 6),
+	TEGRA186_MAIN_GPIO_PORT( G, 4, 1, 6),
+	TEGRA186_MAIN_GPIO_PORT( H, 1, 0, 7),
+	TEGRA186_MAIN_GPIO_PORT( I, 0, 4, 8),
+	TEGRA186_MAIN_GPIO_PORT( J, 5, 0, 8),
+	TEGRA186_MAIN_GPIO_PORT( K, 5, 1, 1),
+	TEGRA186_MAIN_GPIO_PORT( L, 1, 1, 8),
+	TEGRA186_MAIN_GPIO_PORT( M, 5, 3, 6),
+	TEGRA186_MAIN_GPIO_PORT( N, 0, 0, 7),
+	TEGRA186_MAIN_GPIO_PORT( O, 0, 1, 4),
+	TEGRA186_MAIN_GPIO_PORT( P, 4, 0, 7),
+	TEGRA186_MAIN_GPIO_PORT( Q, 0, 2, 6),
+	TEGRA186_MAIN_GPIO_PORT( R, 0, 5, 6),
+	TEGRA186_MAIN_GPIO_PORT( T, 0, 3, 4),
+	TEGRA186_MAIN_GPIO_PORT( X, 1, 2, 8),
+	TEGRA186_MAIN_GPIO_PORT( Y, 1, 3, 7),
+	TEGRA186_MAIN_GPIO_PORT(BB, 2, 3, 2),
+	TEGRA186_MAIN_GPIO_PORT(CC, 5, 2, 4),
 };
 
 static const struct tegra_gpio_soc tegra186_main_soc = {
 	.num_ports = ARRAY_SIZE(tegra186_main_ports),
 	.ports = tegra186_main_ports,
 	.name = "tegra186-gpio",
+	.instance = 0,
 };
 
-#define TEGRA186_AON_GPIO_PORT(port, base, count, controller)	\
-	[TEGRA186_AON_GPIO_PORT_##port] = {			\
-		.name = #port,					\
-		.offset = base,					\
-		.pins = count,					\
-		.irq = controller,				\
+#define TEGRA186_AON_GPIO_PORT(_name, _bank, _port, _pins)	\
+	[TEGRA186_AON_GPIO_PORT_##_name] = {			\
+		.name = #_name,					\
+		.bank = _bank,					\
+		.port = _port,					\
+		.pins = _pins,					\
 	}
 
 static const struct tegra_gpio_port tegra186_aon_ports[] = {
-	TEGRA186_AON_GPIO_PORT( S, 0x0200, 5, 0),
-	TEGRA186_AON_GPIO_PORT( U, 0x0400, 6, 0),
-	TEGRA186_AON_GPIO_PORT( V, 0x0800, 8, 0),
-	TEGRA186_AON_GPIO_PORT( W, 0x0a00, 8, 0),
-	TEGRA186_AON_GPIO_PORT( Z, 0x0e00, 4, 0),
-	TEGRA186_AON_GPIO_PORT(AA, 0x0c00, 8, 0),
-	TEGRA186_AON_GPIO_PORT(EE, 0x0600, 3, 0),
-	TEGRA186_AON_GPIO_PORT(FF, 0x0000, 5, 0),
+	TEGRA186_AON_GPIO_PORT( S, 0, 1, 5),
+	TEGRA186_AON_GPIO_PORT( U, 0, 2, 6),
+	TEGRA186_AON_GPIO_PORT( V, 0, 4, 8),
+	TEGRA186_AON_GPIO_PORT( W, 0, 5, 8),
+	TEGRA186_AON_GPIO_PORT( Z, 0, 7, 4),
+	TEGRA186_AON_GPIO_PORT(AA, 0, 6, 8),
+	TEGRA186_AON_GPIO_PORT(EE, 0, 3, 3),
+	TEGRA186_AON_GPIO_PORT(FF, 0, 0, 5),
 };
 
 static const struct tegra_gpio_soc tegra186_aon_soc = {
 	.num_ports = ARRAY_SIZE(tegra186_aon_ports),
 	.ports = tegra186_aon_ports,
 	.name = "tegra186-gpio-aon",
+	.instance = 1,
 };
 
-#define TEGRA194_MAIN_GPIO_PORT(port, base, count, controller)	\
-	[TEGRA194_MAIN_GPIO_PORT_##port] = {			\
-		.name = #port,					\
-		.offset = base,					\
-		.pins = count,					\
-		.irq = controller,				\
+#define TEGRA194_MAIN_GPIO_PORT(_name, _bank, _port, _pins)	\
+	[TEGRA194_MAIN_GPIO_PORT_##_name] = {			\
+		.name = #_name,					\
+		.bank = _bank,					\
+		.port = _port,					\
+		.pins = _pins,					\
 	}
 
 static const struct tegra_gpio_port tegra194_main_ports[] = {
-	TEGRA194_MAIN_GPIO_PORT( A, 0x1400, 8, 1),
-	TEGRA194_MAIN_GPIO_PORT( B, 0x4e00, 2, 4),
-	TEGRA194_MAIN_GPIO_PORT( C, 0x4600, 8, 4),
-	TEGRA194_MAIN_GPIO_PORT( D, 0x4800, 4, 4),
-	TEGRA194_MAIN_GPIO_PORT( E, 0x4a00, 8, 4),
-	TEGRA194_MAIN_GPIO_PORT( F, 0x4c00, 6, 4),
-	TEGRA194_MAIN_GPIO_PORT( G, 0x4000, 8, 4),
-	TEGRA194_MAIN_GPIO_PORT( H, 0x4200, 8, 4),
-	TEGRA194_MAIN_GPIO_PORT( I, 0x4400, 5, 4),
-	TEGRA194_MAIN_GPIO_PORT( J, 0x5200, 6, 5),
-	TEGRA194_MAIN_GPIO_PORT( K, 0x3000, 8, 3),
-	TEGRA194_MAIN_GPIO_PORT( L, 0x3200, 4, 3),
-	TEGRA194_MAIN_GPIO_PORT( M, 0x2600, 8, 2),
-	TEGRA194_MAIN_GPIO_PORT( N, 0x2800, 3, 2),
-	TEGRA194_MAIN_GPIO_PORT( O, 0x5000, 6, 5),
-	TEGRA194_MAIN_GPIO_PORT( P, 0x2a00, 8, 2),
-	TEGRA194_MAIN_GPIO_PORT( Q, 0x2c00, 8, 2),
-	TEGRA194_MAIN_GPIO_PORT( R, 0x2e00, 6, 2),
-	TEGRA194_MAIN_GPIO_PORT( S, 0x3600, 8, 3),
-	TEGRA194_MAIN_GPIO_PORT( T, 0x3800, 8, 3),
-	TEGRA194_MAIN_GPIO_PORT( U, 0x3a00, 1, 3),
-	TEGRA194_MAIN_GPIO_PORT( V, 0x1000, 8, 1),
-	TEGRA194_MAIN_GPIO_PORT( W, 0x1200, 2, 1),
-	TEGRA194_MAIN_GPIO_PORT( X, 0x2000, 8, 2),
-	TEGRA194_MAIN_GPIO_PORT( Y, 0x2200, 8, 2),
-	TEGRA194_MAIN_GPIO_PORT( Z, 0x2400, 8, 2),
-	TEGRA194_MAIN_GPIO_PORT(FF, 0x3400, 2, 3),
-	TEGRA194_MAIN_GPIO_PORT(GG, 0x0000, 2, 0)
+	TEGRA194_MAIN_GPIO_PORT( A, 1, 2, 8),
+	TEGRA194_MAIN_GPIO_PORT( B, 4, 7, 2),
+	TEGRA194_MAIN_GPIO_PORT( C, 4, 3, 8),
+	TEGRA194_MAIN_GPIO_PORT( D, 4, 4, 4),
+	TEGRA194_MAIN_GPIO_PORT( E, 4, 5, 8),
+	TEGRA194_MAIN_GPIO_PORT( F, 4, 6, 6),
+	TEGRA194_MAIN_GPIO_PORT( G, 4, 0, 8),
+	TEGRA194_MAIN_GPIO_PORT( H, 4, 1, 8),
+	TEGRA194_MAIN_GPIO_PORT( I, 4, 2, 5),
+	TEGRA194_MAIN_GPIO_PORT( J, 5, 1, 6),
+	TEGRA194_MAIN_GPIO_PORT( K, 3, 0, 8),
+	TEGRA194_MAIN_GPIO_PORT( L, 3, 1, 4),
+	TEGRA194_MAIN_GPIO_PORT( M, 2, 3, 8),
+	TEGRA194_MAIN_GPIO_PORT( N, 2, 4, 3),
+	TEGRA194_MAIN_GPIO_PORT( O, 5, 0, 6),
+	TEGRA194_MAIN_GPIO_PORT( P, 2, 5, 8),
+	TEGRA194_MAIN_GPIO_PORT( Q, 2, 6, 8),
+	TEGRA194_MAIN_GPIO_PORT( R, 2, 7, 6),
+	TEGRA194_MAIN_GPIO_PORT( S, 3, 3, 8),
+	TEGRA194_MAIN_GPIO_PORT( T, 3, 4, 8),
+	TEGRA194_MAIN_GPIO_PORT( U, 3, 5, 1),
+	TEGRA194_MAIN_GPIO_PORT( V, 1, 0, 8),
+	TEGRA194_MAIN_GPIO_PORT( W, 1, 1, 2),
+	TEGRA194_MAIN_GPIO_PORT( X, 2, 0, 8),
+	TEGRA194_MAIN_GPIO_PORT( Y, 2, 1, 8),
+	TEGRA194_MAIN_GPIO_PORT( Z, 2, 2, 8),
+	TEGRA194_MAIN_GPIO_PORT(FF, 3, 2, 2),
+	TEGRA194_MAIN_GPIO_PORT(GG, 0, 0, 2)
 };
 
 static const struct tegra_gpio_soc tegra194_main_soc = {
 	.num_ports = ARRAY_SIZE(tegra194_main_ports),
 	.ports = tegra194_main_ports,
 	.name = "tegra194-gpio",
+	.instance = 0,
 };
 
-#define TEGRA194_AON_GPIO_PORT(port, base, count, controller)	\
-	[TEGRA194_AON_GPIO_PORT_##port] = {			\
-		.name = #port,					\
-		.offset = base,					\
-		.pins = count,					\
-		.irq = controller,				\
+#define TEGRA194_AON_GPIO_PORT(_name, _bank, _port, _pins)	\
+	[TEGRA194_AON_GPIO_PORT_##_name] = {			\
+		.name = #_name,					\
+		.bank = _bank,					\
+		.port = _port,					\
+		.pins = _pins,					\
 	}
 
 static const struct tegra_gpio_port tegra194_aon_ports[] = {
-	TEGRA194_AON_GPIO_PORT(AA, 0x0600, 8, 0),
-	TEGRA194_AON_GPIO_PORT(BB, 0x0800, 4, 0),
-	TEGRA194_AON_GPIO_PORT(CC, 0x0200, 8, 0),
-	TEGRA194_AON_GPIO_PORT(DD, 0x0400, 3, 0),
-	TEGRA194_AON_GPIO_PORT(EE, 0x0000, 7, 0)
+	TEGRA194_AON_GPIO_PORT(AA, 0, 3, 8),
+	TEGRA194_AON_GPIO_PORT(BB, 0, 4, 4),
+	TEGRA194_AON_GPIO_PORT(CC, 0, 1, 8),
+	TEGRA194_AON_GPIO_PORT(DD, 0, 2, 3),
+	TEGRA194_AON_GPIO_PORT(EE, 0, 0, 7)
 };
 
 static const struct tegra_gpio_soc tegra194_aon_soc = {
 	.num_ports = ARRAY_SIZE(tegra194_aon_ports),
 	.ports = tegra194_aon_ports,
 	.name = "tegra194-gpio-aon",
+	.instance = 1,
 };
 
 static const struct of_device_id tegra186_gpio_of_match[] = {
diff --git a/drivers/gpio/gpio-thunderx.c b/drivers/gpio/gpio-thunderx.c
index ddad5c7ea617..9f66deab46ea 100644
--- a/drivers/gpio/gpio-thunderx.c
+++ b/drivers/gpio/gpio-thunderx.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/spinlock.h>
+#include <asm-generic/msi.h>
 
 
 #define GPIO_RX_DAT	0x0
@@ -169,7 +170,10 @@ static int thunderx_gpio_get_direction(struct gpio_chip *chip, unsigned int line
 
 	bit_cfg = readq(txgpio->register_base + bit_cfg_reg(line));
 
-	return !(bit_cfg & GPIO_BIT_CFG_TX_OE);
+	if (bit_cfg & GPIO_BIT_CFG_TX_OE)
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int thunderx_gpio_set_config(struct gpio_chip *chip,
@@ -392,12 +396,32 @@ static int thunderx_gpio_child_to_parent_hwirq(struct gpio_chip *gc,
 					       unsigned int *parent_type)
 {
 	struct thunderx_gpio *txgpio = gpiochip_get_data(gc);
-
-	*parent = txgpio->base_msi + (2 * child);
+	struct irq_data *irqd;
+	unsigned int irq;
+
+	irq = txgpio->msix_entries[child].vector;
+	irqd = irq_domain_get_irq_data(gc->irq.parent_domain, irq);
+	if (!irqd)
+		return -EINVAL;
+	*parent = irqd_to_hwirq(irqd);
 	*parent_type = IRQ_TYPE_LEVEL_HIGH;
 	return 0;
 }
 
+static void *thunderx_gpio_populate_parent_alloc_info(struct gpio_chip *chip,
+						      unsigned int parent_hwirq,
+						      unsigned int parent_type)
+{
+	msi_alloc_info_t *info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return NULL;
+
+	info->hwirq = parent_hwirq;
+	return info;
+}
+
 static int thunderx_gpio_probe(struct pci_dev *pdev,
 			       const struct pci_device_id *id)
 {
@@ -512,6 +536,7 @@ static int thunderx_gpio_probe(struct pci_dev *pdev,
 	girq->parent_domain =
 		irq_get_irq_data(txgpio->msix_entries[0].vector)->domain;
 	girq->child_to_parent_hwirq = thunderx_gpio_child_to_parent_hwirq;
+	girq->populate_parent_alloc_arg = thunderx_gpio_populate_parent_alloc_info;
 	girq->handler = handle_bad_irq;
 	girq->default_type = IRQ_TYPE_NONE;
 
@@ -521,9 +546,15 @@ static int thunderx_gpio_probe(struct pci_dev *pdev,
 
 	/* Push on irq_data and the domain for each line. */
 	for (i = 0; i < ngpio; i++) {
-		err = irq_domain_push_irq(chip->irq.domain,
+		struct irq_fwspec fwspec;
+
+		fwspec.fwnode = of_node_to_fwnode(dev->of_node);
+		fwspec.param_count = 2;
+		fwspec.param[0] = i;
+		fwspec.param[1] = IRQ_TYPE_NONE;
+		err = irq_domain_push_irq(girq->domain,
 					  txgpio->msix_entries[i].vector,
-					  chip);
+					  &fwspec);
 		if (err < 0)
 			dev_err(dev, "irq_domain_push_irq: %d\n", err);
 	}
diff --git a/drivers/gpio/gpio-tpic2810.c b/drivers/gpio/gpio-tpic2810.c
index c8b34d787eed..99d5a84a9129 100644
--- a/drivers/gpio/gpio-tpic2810.c
+++ b/drivers/gpio/gpio-tpic2810.c
@@ -39,7 +39,7 @@ static int tpic2810_get_direction(struct gpio_chip *chip,
 				  unsigned offset)
 {
 	/* This device always output */
-	return 0;
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int tpic2810_direction_input(struct gpio_chip *chip,
diff --git a/drivers/gpio/gpio-tps65086.c b/drivers/gpio/gpio-tps65086.c
index 2eea98ff4ea3..1e9d8262d0ff 100644
--- a/drivers/gpio/gpio-tps65086.c
+++ b/drivers/gpio/gpio-tps65086.c
@@ -21,7 +21,7 @@ static int tps65086_gpio_get_direction(struct gpio_chip *chip,
 				       unsigned offset)
 {
 	/* This device is output only */
-	return 0;
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int tps65086_gpio_direction_input(struct gpio_chip *chip,
diff --git a/drivers/gpio/gpio-tps65912.c b/drivers/gpio/gpio-tps65912.c
index 3ad68bd78282..510d9ed9fd2a 100644
--- a/drivers/gpio/gpio-tps65912.c
+++ b/drivers/gpio/gpio-tps65912.c
@@ -32,9 +32,9 @@ static int tps65912_gpio_get_direction(struct gpio_chip *gc,
 		return ret;
 
 	if (val & GPIO_CFG_MASK)
-		return 0;
+		return GPIO_LINE_DIRECTION_OUT;
 	else
-		return 1;
+		return GPIO_LINE_DIRECTION_IN;
 }
 
 static int tps65912_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
diff --git a/drivers/gpio/gpio-tps68470.c b/drivers/gpio/gpio-tps68470.c
index aff6e504c666..f7f5f770e0fb 100644
--- a/drivers/gpio/gpio-tps68470.c
+++ b/drivers/gpio/gpio-tps68470.c
@@ -47,7 +47,6 @@ static int tps68470_gpio_get(struct gpio_chip *gc, unsigned int offset)
 	return !!(val & BIT(offset));
 }
 
-/* Return 0 if output, 1 if input */
 static int tps68470_gpio_get_direction(struct gpio_chip *gc,
 				       unsigned int offset)
 {
@@ -57,7 +56,7 @@ static int tps68470_gpio_get_direction(struct gpio_chip *gc,
 
 	/* rest are always outputs */
 	if (offset >= TPS68470_N_REGULAR_GPIO)
-		return 0;
+		return GPIO_LINE_DIRECTION_OUT;
 
 	ret = regmap_read(regmap, TPS68470_GPIO_CTL_REG_A(offset), &val);
 	if (ret) {
@@ -67,7 +66,8 @@ static int tps68470_gpio_get_direction(struct gpio_chip *gc,
 	}
 
 	val &= TPS68470_GPIO_MODE_MASK;
-	return val >= TPS68470_GPIO_MODE_OUT_CMOS ? 0 : 1;
+	return val >= TPS68470_GPIO_MODE_OUT_CMOS ? GPIO_LINE_DIRECTION_OUT :
+						    GPIO_LINE_DIRECTION_IN;
 }
 
 static void tps68470_gpio_set(struct gpio_chip *gc, unsigned int offset,
diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
index a3109bcaa0ac..5022e0ad0fae 100644
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -101,7 +101,10 @@ static int tqmx86_gpio_direction_output(struct gpio_chip *chip,
 static int tqmx86_gpio_get_direction(struct gpio_chip *chip,
 				     unsigned int offset)
 {
-	return !!(TQMX86_DIR_INPUT_MASK & BIT(offset));
+	if (TQMX86_DIR_INPUT_MASK & BIT(offset))
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static void tqmx86_gpio_irq_mask(struct irq_data *data)
diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c
index 1da8d0586329..d885032cf814 100644
--- a/drivers/gpio/gpio-ts4900.c
+++ b/drivers/gpio/gpio-ts4900.c
@@ -44,7 +44,10 @@ static int ts4900_gpio_get_direction(struct gpio_chip *chip,
 
 	regmap_read(priv->regmap, offset, &reg);
 
-	return !(reg & TS4900_GPIO_OE);
+	if (reg & TS4900_GPIO_OE)
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int ts4900_gpio_direction_input(struct gpio_chip *chip,
diff --git a/drivers/gpio/gpio-twl4030.c b/drivers/gpio/gpio-twl4030.c
index fbfb648d3502..de249726230e 100644
--- a/drivers/gpio/gpio-twl4030.c
+++ b/drivers/gpio/gpio-twl4030.c
@@ -165,10 +165,10 @@ static int twl4030_get_gpio_direction(int gpio)
 	if (ret < 0)
 		return ret;
 
-	/* 1 = output, but gpiolib semantics are inverse so invert */
-	ret = !(ret & d_msk);
+	if (ret & d_msk)
+		return GPIO_LINE_DIRECTION_OUT;
 
-	return ret;
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int twl4030_set_gpio_dataout(int gpio, int enable)
@@ -380,10 +380,10 @@ static int twl_get_direction(struct gpio_chip *chip, unsigned offset)
 {
 	struct gpio_twl4030_priv *priv = gpiochip_get_data(chip);
 	/*
-	 * Default 0 = output
+	 * Default GPIO_LINE_DIRECTION_OUT
 	 * LED GPIOs >= TWL4030_GPIO_MAX are always output
 	 */
-	int ret = 0;
+	int ret = GPIO_LINE_DIRECTION_OUT;
 
 	mutex_lock(&priv->mutex);
 	if (offset < TWL4030_GPIO_MAX) {
diff --git a/drivers/gpio/gpio-twl6040.c b/drivers/gpio/gpio-twl6040.c
index c845b2ff1f43..648fb418d775 100644
--- a/drivers/gpio/gpio-twl6040.c
+++ b/drivers/gpio/gpio-twl6040.c
@@ -34,8 +34,7 @@ static int twl6040gpo_get(struct gpio_chip *chip, unsigned offset)
 
 static int twl6040gpo_get_direction(struct gpio_chip *chip, unsigned offset)
 {
-	/* This means "out" */
-	return 0;
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int twl6040gpo_direction_out(struct gpio_chip *chip, unsigned offset,
diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c
index 93cdcc41e9fb..7ec97499b7f7 100644
--- a/drivers/gpio/gpio-uniphier.c
+++ b/drivers/gpio/gpio-uniphier.c
@@ -15,9 +15,6 @@
 #include <linux/spinlock.h>
 #include <dt-bindings/gpio/uniphier-gpio.h>
 
-#define UNIPHIER_GPIO_BANK_MASK		\
-				GENMASK((UNIPHIER_GPIO_LINES_PER_BANK) - 1, 0)
-
 #define UNIPHIER_GPIO_IRQ_MAX_NUM	24
 
 #define UNIPHIER_GPIO_PORT_DATA		0x0	/* data */
@@ -113,7 +110,10 @@ static int uniphier_gpio_offset_read(struct gpio_chip *chip,
 static int uniphier_gpio_get_direction(struct gpio_chip *chip,
 				       unsigned int offset)
 {
-	return uniphier_gpio_offset_read(chip, offset, UNIPHIER_GPIO_PORT_DIR);
+	if (uniphier_gpio_offset_read(chip, offset, UNIPHIER_GPIO_PORT_DIR))
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int uniphier_gpio_direction_input(struct gpio_chip *chip,
@@ -147,15 +147,11 @@ static void uniphier_gpio_set(struct gpio_chip *chip,
 static void uniphier_gpio_set_multiple(struct gpio_chip *chip,
 				       unsigned long *mask, unsigned long *bits)
 {
-	unsigned int bank, shift, bank_mask, bank_bits;
-	int i;
+	unsigned long i, bank, bank_mask, bank_bits;
 
-	for (i = 0; i < chip->ngpio; i += UNIPHIER_GPIO_LINES_PER_BANK) {
+	for_each_set_clump8(i, bank_mask, mask, chip->ngpio) {
 		bank = i / UNIPHIER_GPIO_LINES_PER_BANK;
-		shift = i % BITS_PER_LONG;
-		bank_mask = (mask[BIT_WORD(i)] >> shift) &
-						UNIPHIER_GPIO_BANK_MASK;
-		bank_bits = bits[BIT_WORD(i)] >> shift;
+		bank_bits = bitmap_get_value8(bits, i);
 
 		uniphier_gpio_bank_write(chip, bank, UNIPHIER_GPIO_PORT_DATA,
 					 bank_mask, bank_bits);
diff --git a/drivers/gpio/gpio-vx855.c b/drivers/gpio/gpio-vx855.c
index 4ff146ca32fe..3bf397b8dfbc 100644
--- a/drivers/gpio/gpio-vx855.c
+++ b/drivers/gpio/gpio-vx855.c
@@ -71,7 +71,7 @@ static inline u_int32_t gpio_o_bit(int i)
 		return 1 << (i + 13);
 }
 
-/* Mapping betwee numeric GPIO ID and the actual GPIO hardware numbering:
+/* Mapping between numeric GPIO ID and the actual GPIO hardware numbering:
  * 0..13	GPI 0..13
  * 14..26	GPO 0..12
  * 27..41	GPIO 0..14
diff --git a/drivers/gpio/gpio-wcd934x.c b/drivers/gpio/gpio-wcd934x.c
new file mode 100644
index 000000000000..74913f2e5697
--- /dev/null
+++ b/drivers/gpio/gpio-wcd934x.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019, Linaro Limited
+
+#include <linux/module.h>
+#include <linux/gpio/driver.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/of_device.h>
+
+#define WCD_PIN_MASK(p) BIT(p - 1)
+#define WCD_REG_DIR_CTL_OFFSET 0x42
+#define WCD_REG_VAL_CTL_OFFSET 0x43
+#define WCD934X_NPINS		5
+
+struct wcd_gpio_data {
+	struct regmap *map;
+	struct gpio_chip chip;
+};
+
+static int wcd_gpio_get_direction(struct gpio_chip *chip, unsigned int pin)
+{
+	struct wcd_gpio_data *data = gpiochip_get_data(chip);
+	unsigned int value;
+	int ret;
+
+	ret = regmap_read(data->map, WCD_REG_DIR_CTL_OFFSET, &value);
+	if (ret < 0)
+		return ret;
+
+	if (value & WCD_PIN_MASK(pin))
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
+}
+
+static int wcd_gpio_direction_input(struct gpio_chip *chip, unsigned int pin)
+{
+	struct wcd_gpio_data *data = gpiochip_get_data(chip);
+
+	return regmap_update_bits(data->map, WCD_REG_DIR_CTL_OFFSET,
+				  WCD_PIN_MASK(pin), 0);
+}
+
+static int wcd_gpio_direction_output(struct gpio_chip *chip, unsigned int pin,
+				     int val)
+{
+	struct wcd_gpio_data *data = gpiochip_get_data(chip);
+
+	regmap_update_bits(data->map, WCD_REG_DIR_CTL_OFFSET,
+			   WCD_PIN_MASK(pin), WCD_PIN_MASK(pin));
+
+	return regmap_update_bits(data->map, WCD_REG_VAL_CTL_OFFSET,
+				  WCD_PIN_MASK(pin),
+				  val ? WCD_PIN_MASK(pin) : 0);
+}
+
+static int wcd_gpio_get(struct gpio_chip *chip, unsigned int pin)
+{
+	struct wcd_gpio_data *data = gpiochip_get_data(chip);
+	int value;
+
+	regmap_read(data->map, WCD_REG_VAL_CTL_OFFSET, &value);
+
+	return !!(value && WCD_PIN_MASK(pin));
+}
+
+static void wcd_gpio_set(struct gpio_chip *chip, unsigned int pin, int val)
+{
+	wcd_gpio_direction_output(chip, pin, val);
+}
+
+static int wcd_gpio_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct wcd_gpio_data *data;
+	struct gpio_chip *chip;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->map = dev_get_regmap(dev->parent, NULL);
+	if (!data->map) {
+		dev_err(dev, "%s: failed to get regmap\n", __func__);
+		return  -EINVAL;
+	}
+
+	chip = &data->chip;
+	chip->direction_input  = wcd_gpio_direction_input;
+	chip->direction_output = wcd_gpio_direction_output;
+	chip->get_direction = wcd_gpio_get_direction;
+	chip->get = wcd_gpio_get;
+	chip->set = wcd_gpio_set;
+	chip->parent = dev;
+	chip->base = -1;
+	chip->ngpio = WCD934X_NPINS;
+	chip->label = dev_name(dev);
+	chip->of_gpio_n_cells = 2;
+	chip->can_sleep = false;
+
+	return devm_gpiochip_add_data(dev, chip, data);
+}
+
+static const struct of_device_id wcd_gpio_of_match[] = {
+	{ .compatible = "qcom,wcd9340-gpio" },
+	{ .compatible = "qcom,wcd9341-gpio" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, wcd_gpio_of_match);
+
+static struct platform_driver wcd_gpio_driver = {
+	.driver = {
+		   .name = "wcd934x-gpio",
+		   .of_match_table = wcd_gpio_of_match,
+	},
+	.probe = wcd_gpio_probe,
+};
+
+module_platform_driver(wcd_gpio_driver);
+MODULE_DESCRIPTION("Qualcomm Technologies, Inc WCD GPIO control driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c
index 444fe9e7f04a..8b481b3c1ebe 100644
--- a/drivers/gpio/gpio-wcove.c
+++ b/drivers/gpio/gpio-wcove.c
@@ -170,13 +170,16 @@ static int wcove_gpio_get_direction(struct gpio_chip *chip, unsigned int gpio)
 	int ret, reg = to_reg(gpio, CTRL_OUT);
 
 	if (reg < 0)
-		return 0;
+		return GPIO_LINE_DIRECTION_OUT;
 
 	ret = regmap_read(wg->regmap, reg, &val);
 	if (ret)
 		return ret;
 
-	return !(val & CTLO_DIR_OUT);
+	if (val & CTLO_DIR_OUT)
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 static int wcove_gpio_get(struct gpio_chip *chip, unsigned int gpio)
diff --git a/drivers/gpio/gpio-ws16c48.c b/drivers/gpio/gpio-ws16c48.c
index e0ef66b6a237..cb510df2b014 100644
--- a/drivers/gpio/gpio-ws16c48.c
+++ b/drivers/gpio/gpio-ws16c48.c
@@ -56,7 +56,10 @@ static int ws16c48_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 	const unsigned port = offset / 8;
 	const unsigned mask = BIT(offset % 8);
 
-	return !!(ws16c48gpio->io_state[port] & mask);
+	if (ws16c48gpio->io_state[port] & mask)
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int ws16c48_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
@@ -126,42 +129,19 @@ static int ws16c48_gpio_get_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	const unsigned int gpio_reg_size = 8;
-	size_t i;
-	const size_t num_ports = chip->ngpio / gpio_reg_size;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+	unsigned long offset;
+	unsigned long gpio_mask;
+	unsigned int port_addr;
 	unsigned long port_state;
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < num_ports; i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
-
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
-
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
-
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
+	for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+		port_addr = ws16c48gpio->base + offset / 8;
+		port_state = inb(port_addr) & gpio_mask;
 
-		/* read bits from current gpio port */
-		port_state = inb(ws16c48gpio->base + i);
-
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
@@ -195,39 +175,29 @@ static void ws16c48_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	unsigned int i;
-	const unsigned int gpio_reg_size = 8;
-	unsigned int port;
-	unsigned int iomask;
-	unsigned int bitmask;
+	unsigned long offset;
+	unsigned long gpio_mask;
+	size_t index;
+	unsigned int port_addr;
+	unsigned long bitmask;
 	unsigned long flags;
 
-	/* set bits are evaluated a gpio register size at a time */
-	for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
-		/* no more set bits in this mask word; skip to the next word */
-		if (!mask[BIT_WORD(i)]) {
-			i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
-			continue;
-		}
-
-		port = i / gpio_reg_size;
+	for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+		index = offset / 8;
+		port_addr = ws16c48gpio->base + index;
 
 		/* mask out GPIO configured for input */
-		iomask = mask[BIT_WORD(i)] & ~ws16c48gpio->io_state[port];
-		bitmask = iomask & bits[BIT_WORD(i)];
+		gpio_mask &= ~ws16c48gpio->io_state[index];
+		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
 
 		raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
 
 		/* update output state data and set device gpio register */
-		ws16c48gpio->out_state[port] &= ~iomask;
-		ws16c48gpio->out_state[port] |= bitmask;
-		outb(ws16c48gpio->out_state[port], ws16c48gpio->base + port);
+		ws16c48gpio->out_state[index] &= ~gpio_mask;
+		ws16c48gpio->out_state[index] |= bitmask;
+		outb(ws16c48gpio->out_state[index], port_addr);
 
 		raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
-
-		/* prepare for next gpio register set */
-		mask[BIT_WORD(i)] >>= gpio_reg_size;
-		bits[BIT_WORD(i)] >>= gpio_reg_size;
 	}
 }
 
diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c
index 2918363884de..532b0df8a1f2 100644
--- a/drivers/gpio/gpio-xgene.c
+++ b/drivers/gpio/gpio-xgene.c
@@ -80,7 +80,10 @@ static int xgene_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
 	bank_offset = GPIO_SET_DR_OFFSET + GPIO_BANK_OFFSET(offset);
 	bit_offset = GPIO_BIT_OFFSET(offset);
 
-	return !!(ioread32(chip->base + bank_offset) & BIT(bit_offset));
+	if (ioread32(chip->base + bank_offset) & BIT(bit_offset))
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int xgene_gpio_dir_in(struct gpio_chip *gc, unsigned int offset)
@@ -155,28 +158,16 @@ static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume);
 
 static int xgene_gpio_probe(struct platform_device *pdev)
 {
-	struct resource *res;
 	struct xgene_gpio *gpio;
 	int err = 0;
 
 	gpio = devm_kzalloc(&pdev->dev, sizeof(*gpio), GFP_KERNEL);
-	if (!gpio) {
-		err = -ENOMEM;
-		goto err;
-	}
+	if (!gpio)
+		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		err = -EINVAL;
-		goto err;
-	}
-
-	gpio->base = devm_ioremap_nocache(&pdev->dev, res->start,
-							resource_size(res));
-	if (!gpio->base) {
-		err = -ENOMEM;
-		goto err;
-	}
+	gpio->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(gpio->base))
+		return PTR_ERR(gpio->base);
 
 	gpio->chip.ngpio = XGENE_MAX_GPIOS;
 
@@ -196,14 +187,11 @@ static int xgene_gpio_probe(struct platform_device *pdev)
 	if (err) {
 		dev_err(&pdev->dev,
 			"failed to register gpiochip.\n");
-		goto err;
+		return err;
 	}
 
 	dev_info(&pdev->dev, "X-Gene GPIO driver registered.\n");
 	return 0;
-err:
-	dev_err(&pdev->dev, "X-Gene GPIO driver registration failed.\n");
-	return err;
 }
 
 static const struct of_device_id xgene_gpio_of_match[] = {
diff --git a/drivers/gpio/gpio-xgs-iproc.c b/drivers/gpio/gpio-xgs-iproc.c
new file mode 100644
index 000000000000..ad5489a65d54
--- /dev/null
+++ b/drivers/gpio/gpio-xgs-iproc.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 Broadcom
+ */
+
+#include <linux/gpio/driver.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+
+#define IPROC_CCA_INT_F_GPIOINT		BIT(0)
+#define IPROC_CCA_INT_STS		0x20
+#define IPROC_CCA_INT_MASK		0x24
+
+#define IPROC_GPIO_CCA_DIN		0x0
+#define IPROC_GPIO_CCA_DOUT		0x4
+#define IPROC_GPIO_CCA_OUT_EN		0x8
+#define IPROC_GPIO_CCA_INT_LEVEL	0x10
+#define IPROC_GPIO_CCA_INT_LEVEL_MASK	0x14
+#define IPROC_GPIO_CCA_INT_EVENT	0x18
+#define IPROC_GPIO_CCA_INT_EVENT_MASK	0x1C
+#define IPROC_GPIO_CCA_INT_EDGE		0x24
+
+struct iproc_gpio_chip {
+	struct irq_chip irqchip;
+	struct gpio_chip gc;
+	spinlock_t lock;
+	struct device *dev;
+	void __iomem *base;
+	void __iomem *intr;
+};
+
+static inline struct iproc_gpio_chip *
+to_iproc_gpio(struct gpio_chip *gc)
+{
+	return container_of(gc, struct iproc_gpio_chip, gc);
+}
+
+static void iproc_gpio_irq_ack(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct iproc_gpio_chip *chip = to_iproc_gpio(gc);
+	int pin = d->hwirq;
+	unsigned long flags;
+	u32 irq = d->irq;
+	u32 irq_type, event_status = 0;
+
+	spin_lock_irqsave(&chip->lock, flags);
+	irq_type = irq_get_trigger_type(irq);
+	if (irq_type & IRQ_TYPE_EDGE_BOTH) {
+		event_status |= BIT(pin);
+		writel_relaxed(event_status,
+			       chip->base + IPROC_GPIO_CCA_INT_EVENT);
+	}
+	spin_unlock_irqrestore(&chip->lock, flags);
+}
+
+static void iproc_gpio_irq_unmask(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct iproc_gpio_chip *chip = to_iproc_gpio(gc);
+	int pin = d->hwirq;
+	unsigned long flags;
+	u32 irq = d->irq;
+	u32 int_mask, irq_type, event_mask;
+
+	spin_lock_irqsave(&chip->lock, flags);
+	irq_type = irq_get_trigger_type(irq);
+	event_mask = readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_EVENT_MASK);
+	int_mask = readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_LEVEL_MASK);
+
+	if (irq_type & IRQ_TYPE_EDGE_BOTH) {
+		event_mask |= 1 << pin;
+		writel_relaxed(event_mask,
+			       chip->base + IPROC_GPIO_CCA_INT_EVENT_MASK);
+	} else {
+		int_mask |= 1 << pin;
+		writel_relaxed(int_mask,
+			       chip->base + IPROC_GPIO_CCA_INT_LEVEL_MASK);
+	}
+	spin_unlock_irqrestore(&chip->lock, flags);
+}
+
+static void iproc_gpio_irq_mask(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct iproc_gpio_chip *chip = to_iproc_gpio(gc);
+	int pin = d->hwirq;
+	unsigned long flags;
+	u32 irq = d->irq;
+	u32 irq_type, int_mask, event_mask;
+
+	spin_lock_irqsave(&chip->lock, flags);
+	irq_type = irq_get_trigger_type(irq);
+	event_mask = readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_EVENT_MASK);
+	int_mask = readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_LEVEL_MASK);
+
+	if (irq_type & IRQ_TYPE_EDGE_BOTH) {
+		event_mask &= ~BIT(pin);
+		writel_relaxed(event_mask,
+			       chip->base + IPROC_GPIO_CCA_INT_EVENT_MASK);
+	} else {
+		int_mask &= ~BIT(pin);
+		writel_relaxed(int_mask,
+			       chip->base + IPROC_GPIO_CCA_INT_LEVEL_MASK);
+	}
+	spin_unlock_irqrestore(&chip->lock, flags);
+}
+
+static int iproc_gpio_irq_set_type(struct irq_data *d, u32 type)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct iproc_gpio_chip *chip = to_iproc_gpio(gc);
+	int pin = d->hwirq;
+	unsigned long flags;
+	u32 irq = d->irq;
+	u32 event_pol, int_pol;
+	int ret = 0;
+
+	spin_lock_irqsave(&chip->lock, flags);
+	switch (type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_EDGE_RISING:
+		event_pol = readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_EDGE);
+		event_pol &= ~BIT(pin);
+		writel_relaxed(event_pol, chip->base + IPROC_GPIO_CCA_INT_EDGE);
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		event_pol = readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_EDGE);
+		event_pol |= BIT(pin);
+		writel_relaxed(event_pol, chip->base + IPROC_GPIO_CCA_INT_EDGE);
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		int_pol = readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_LEVEL);
+		int_pol &= ~BIT(pin);
+		writel_relaxed(int_pol, chip->base + IPROC_GPIO_CCA_INT_LEVEL);
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		int_pol = readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_LEVEL);
+		int_pol |= BIT(pin);
+		writel_relaxed(int_pol, chip->base + IPROC_GPIO_CCA_INT_LEVEL);
+		break;
+	default:
+		/* should not come here */
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (type & IRQ_TYPE_LEVEL_MASK)
+		irq_set_handler_locked(irq_get_irq_data(irq), handle_level_irq);
+	else if (type & IRQ_TYPE_EDGE_BOTH)
+		irq_set_handler_locked(irq_get_irq_data(irq), handle_edge_irq);
+
+out_unlock:
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	return ret;
+}
+
+static irqreturn_t iproc_gpio_irq_handler(int irq, void *data)
+{
+	struct gpio_chip *gc = (struct gpio_chip *)data;
+	struct iproc_gpio_chip *chip = to_iproc_gpio(gc);
+	int bit;
+	unsigned long int_bits = 0;
+	u32 int_status;
+
+	/* go through the entire GPIOs and handle all interrupts */
+	int_status = readl_relaxed(chip->intr + IPROC_CCA_INT_STS);
+	if (int_status & IPROC_CCA_INT_F_GPIOINT) {
+		u32 event, level;
+
+		/* Get level and edge interrupts */
+		event =
+		    readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_EVENT_MASK);
+		event &= readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_EVENT);
+		level = readl_relaxed(chip->base + IPROC_GPIO_CCA_DIN);
+		level ^= readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_LEVEL);
+		level &=
+		    readl_relaxed(chip->base + IPROC_GPIO_CCA_INT_LEVEL_MASK);
+		int_bits = level | event;
+
+		for_each_set_bit(bit, &int_bits, gc->ngpio)
+			generic_handle_irq(irq_linear_revmap(gc->irq.domain, bit));
+	}
+
+	return int_bits ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int iproc_gpio_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *dn = pdev->dev.of_node;
+	struct iproc_gpio_chip *chip;
+	u32 num_gpios;
+	int irq, ret;
+
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->dev = dev;
+	platform_set_drvdata(pdev, chip);
+	spin_lock_init(&chip->lock);
+
+	chip->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(chip->base))
+		return PTR_ERR(chip->base);
+
+	ret = bgpio_init(&chip->gc, dev, 4,
+			 chip->base + IPROC_GPIO_CCA_DIN,
+			 chip->base + IPROC_GPIO_CCA_DOUT,
+			 NULL,
+			 chip->base + IPROC_GPIO_CCA_OUT_EN,
+			 NULL,
+			 0);
+	if (ret) {
+		dev_err(dev, "unable to init GPIO chip\n");
+		return ret;
+	}
+
+	chip->gc.label = dev_name(dev);
+	if (of_property_read_u32(dn, "ngpios", &num_gpios))
+		chip->gc.ngpio = num_gpios;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq > 0) {
+		struct gpio_irq_chip *girq;
+		struct irq_chip *irqc;
+		u32 val;
+
+		irqc = &chip->irqchip;
+		irqc->name = dev_name(dev);
+		irqc->irq_ack = iproc_gpio_irq_ack;
+		irqc->irq_mask = iproc_gpio_irq_mask;
+		irqc->irq_unmask = iproc_gpio_irq_unmask;
+		irqc->irq_set_type = iproc_gpio_irq_set_type;
+
+		chip->intr = devm_platform_ioremap_resource(pdev, 1);
+		if (IS_ERR(chip->intr))
+			return PTR_ERR(chip->intr);
+
+		/* Enable GPIO interrupts for CCA GPIO */
+		val = readl_relaxed(chip->intr + IPROC_CCA_INT_MASK);
+		val |= IPROC_CCA_INT_F_GPIOINT;
+		writel_relaxed(val, chip->intr + IPROC_CCA_INT_MASK);
+
+		/*
+		 * Directly request the irq here instead of passing
+		 * a flow-handler because the irq is shared.
+		 */
+		ret = devm_request_irq(dev, irq, iproc_gpio_irq_handler,
+				       IRQF_SHARED, chip->gc.label, &chip->gc);
+		if (ret) {
+			dev_err(dev, "Fail to request IRQ%d: %d\n", irq, ret);
+			return ret;
+		}
+
+		girq = &chip->gc.irq;
+		girq->chip = irqc;
+		/* This will let us handle the parent IRQ in the driver */
+		girq->parent_handler = NULL;
+		girq->num_parents = 0;
+		girq->parents = NULL;
+		girq->default_type = IRQ_TYPE_NONE;
+		girq->handler = handle_simple_irq;
+	}
+
+	ret = devm_gpiochip_add_data(dev, &chip->gc, chip);
+	if (ret) {
+		dev_err(dev, "unable to add GPIO chip\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int iproc_gpio_remove(struct platform_device *pdev)
+{
+	struct iproc_gpio_chip *chip;
+
+	chip = platform_get_drvdata(pdev);
+	if (!chip)
+		return -ENODEV;
+
+	if (chip->intr) {
+		u32 val;
+
+		val = readl_relaxed(chip->intr + IPROC_CCA_INT_MASK);
+		val &= ~IPROC_CCA_INT_F_GPIOINT;
+		writel_relaxed(val, chip->intr + IPROC_CCA_INT_MASK);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id bcm_iproc_gpio_of_match[] = {
+	{ .compatible = "brcm,iproc-gpio-cca" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, bcm_iproc_gpio_of_match);
+
+static struct platform_driver bcm_iproc_gpio_driver = {
+	.driver = {
+		.name = "iproc-xgs-gpio",
+		.of_match_table = bcm_iproc_gpio_of_match,
+	},
+	.probe = iproc_gpio_probe,
+	.remove = iproc_gpio_remove,
+};
+
+module_platform_driver(bcm_iproc_gpio_driver);
+
+MODULE_DESCRIPTION("XGS IPROC GPIO driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/gpio-xra1403.c b/drivers/gpio/gpio-xra1403.c
index 05f1998c11a4..31b5072b2df0 100644
--- a/drivers/gpio/gpio-xra1403.c
+++ b/drivers/gpio/gpio-xra1403.c
@@ -83,7 +83,10 @@ static int xra1403_get_direction(struct gpio_chip *chip, unsigned int offset)
 	if (ret)
 		return ret;
 
-	return !!(val & BIT(offset % 8));
+	if (val & BIT(offset % 8))
+		return GPIO_LINE_DIRECTION_IN;
+
+	return GPIO_LINE_DIRECTION_OUT;
 }
 
 static int xra1403_get(struct gpio_chip *chip, unsigned int offset)
diff --git a/drivers/gpio/gpio-xtensa.c b/drivers/gpio/gpio-xtensa.c
index 43d3fa5f511a..c8af34a6368f 100644
--- a/drivers/gpio/gpio-xtensa.c
+++ b/drivers/gpio/gpio-xtensa.c
@@ -44,15 +44,14 @@ static inline unsigned long enable_cp(unsigned long *cpenable)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	RSR_CPENABLE(*cpenable);
-	WSR_CPENABLE(*cpenable | BIT(XCHAL_CP_ID_XTIOP));
-
+	*cpenable = xtensa_get_sr(cpenable);
+	xtensa_set_sr(*cpenable | BIT(XCHAL_CP_ID_XTIOP), cpenable);
 	return flags;
 }
 
 static inline void disable_cp(unsigned long flags, unsigned long cpenable)
 {
-	WSR_CPENABLE(cpenable);
+	xtensa_set_sr(cpenable, cpenable);
 	local_irq_restore(flags);
 }
 
@@ -72,7 +71,7 @@ static inline void disable_cp(unsigned long flags, unsigned long cpenable)
 
 static int xtensa_impwire_get_direction(struct gpio_chip *gc, unsigned offset)
 {
-	return 1; /* input only */
+	return GPIO_LINE_DIRECTION_IN; /* input only */
 }
 
 static int xtensa_impwire_get_value(struct gpio_chip *gc, unsigned offset)
@@ -95,7 +94,7 @@ static void xtensa_impwire_set_value(struct gpio_chip *gc, unsigned offset,
 
 static int xtensa_expstate_get_direction(struct gpio_chip *gc, unsigned offset)
 {
-	return 0; /* output only */
+	return GPIO_LINE_DIRECTION_OUT; /* output only */
 }
 
 static int xtensa_expstate_get_value(struct gpio_chip *gc, unsigned offset)
diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c
index cd475ff4bcad..05ba16fffdad 100644
--- a/drivers/gpio/gpio-zynq.c
+++ b/drivers/gpio/gpio-zynq.c
@@ -360,7 +360,7 @@ static int zynq_gpio_dir_out(struct gpio_chip *chip, unsigned int pin,
  *
  * This function returns the direction of the specified GPIO.
  *
- * Return: 0 for output, 1 for input
+ * Return: GPIO_LINE_DIRECTION_OUT or GPIO_LINE_DIRECTION_IN
  */
 static int zynq_gpio_get_direction(struct gpio_chip *chip, unsigned int pin)
 {
@@ -372,7 +372,10 @@ static int zynq_gpio_get_direction(struct gpio_chip *chip, unsigned int pin)
 
 	reg = readl_relaxed(gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num));
 
-	return !(reg & BIT(bank_pin_num));
+	if (reg & BIT(bank_pin_num))
+		return GPIO_LINE_DIRECTION_OUT;
+
+	return GPIO_LINE_DIRECTION_IN;
 }
 
 /**
@@ -681,6 +684,8 @@ static void zynq_gpio_restore_context(struct zynq_gpio *gpio)
 	unsigned int bank_num;
 
 	for (bank_num = 0; bank_num < gpio->p_data->max_bank; bank_num++) {
+		writel_relaxed(ZYNQ_GPIO_IXR_DISABLE_ALL, gpio->base_addr +
+				ZYNQ_GPIO_INTDIS_OFFSET(bank_num));
 		writel_relaxed(gpio->context.datalsw[bank_num],
 			       gpio->base_addr +
 			       ZYNQ_GPIO_DATA_LSW_OFFSET(bank_num));
@@ -690,9 +695,6 @@ static void zynq_gpio_restore_context(struct zynq_gpio *gpio)
 		writel_relaxed(gpio->context.dirm[bank_num],
 			       gpio->base_addr +
 			       ZYNQ_GPIO_DIRM_OFFSET(bank_num));
-		writel_relaxed(gpio->context.int_en[bank_num],
-			       gpio->base_addr +
-			       ZYNQ_GPIO_INTEN_OFFSET(bank_num));
 		writel_relaxed(gpio->context.int_type[bank_num],
 			       gpio->base_addr +
 			       ZYNQ_GPIO_INTTYPE_OFFSET(bank_num));
@@ -702,6 +704,9 @@ static void zynq_gpio_restore_context(struct zynq_gpio *gpio)
 		writel_relaxed(gpio->context.int_any[bank_num],
 			       gpio->base_addr +
 			       ZYNQ_GPIO_INTANY_OFFSET(bank_num));
+		writel_relaxed(~(gpio->context.int_en[bank_num]),
+			       gpio->base_addr +
+			       ZYNQ_GPIO_INTEN_OFFSET(bank_num));
 	}
 }
 
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 59ccfd24627d..31fee5e918b7 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -21,11 +21,19 @@
 #include "gpiolib.h"
 #include "gpiolib-acpi.h"
 
+#define QUIRK_NO_EDGE_EVENTS_ON_BOOT		0x01l
+#define QUIRK_NO_WAKEUP				0x02l
+
 static int run_edge_events_on_boot = -1;
 module_param(run_edge_events_on_boot, int, 0444);
 MODULE_PARM_DESC(run_edge_events_on_boot,
 		 "Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto");
 
+static int honor_wakeup = -1;
+module_param(honor_wakeup, int, 0444);
+MODULE_PARM_DESC(honor_wakeup,
+		 "Honor the ACPI wake-capable flag: 0=no, 1=yes, -1=auto");
+
 /**
  * struct acpi_gpio_event - ACPI GPIO event handler data
  *
@@ -194,6 +202,7 @@ static void acpi_gpiochip_request_irqs(struct acpi_gpio_chip *acpi_gpio)
 		acpi_gpiochip_request_irq(acpi_gpio, event);
 }
 
+/* Always returns AE_OK so that we keep looping over the resources */
 static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
 					     void *context)
 {
@@ -230,19 +239,25 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
 	desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event",
 					 GPIO_ACTIVE_HIGH, GPIOD_IN);
 	if (IS_ERR(desc)) {
-		dev_err(chip->parent, "Failed to request GPIO\n");
-		return AE_ERROR;
+		dev_err(chip->parent,
+			"Failed to request GPIO for pin 0x%04X, err %ld\n",
+			pin, PTR_ERR(desc));
+		return AE_OK;
 	}
 
 	ret = gpiochip_lock_as_irq(chip, pin);
 	if (ret) {
-		dev_err(chip->parent, "Failed to lock GPIO as interrupt\n");
+		dev_err(chip->parent,
+			"Failed to lock GPIO pin 0x%04X as interrupt, err %d\n",
+			pin, ret);
 		goto fail_free_desc;
 	}
 
 	irq = gpiod_to_irq(desc);
 	if (irq < 0) {
-		dev_err(chip->parent, "Failed to translate GPIO to IRQ\n");
+		dev_err(chip->parent,
+			"Failed to translate GPIO pin 0x%04X to IRQ, err %d\n",
+			pin, irq);
 		goto fail_unlock_irq;
 	}
 
@@ -274,7 +289,7 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
 	event->handle = evt_handle;
 	event->handler = handler;
 	event->irq = irq;
-	event->irq_is_wake = agpio->wake_capable == ACPI_WAKE_CAPABLE;
+	event->irq_is_wake = honor_wakeup && agpio->wake_capable == ACPI_WAKE_CAPABLE;
 	event->pin = pin;
 	event->desc = desc;
 
@@ -287,7 +302,7 @@ fail_unlock_irq:
 fail_free_desc:
 	gpiochip_free_own_desc(desc);
 
-	return AE_ERROR;
+	return AE_OK;
 }
 
 /**
@@ -1302,7 +1317,7 @@ static int acpi_gpio_handle_deferred_request_irqs(void)
 /* We must use _sync so that this runs after the first deferred_probe run */
 late_initcall_sync(acpi_gpio_handle_deferred_request_irqs);
 
-static const struct dmi_system_id run_edge_events_on_boot_blacklist[] = {
+static const struct dmi_system_id gpiolib_acpi_quirks[] = {
 	{
 		/*
 		 * The Minix Neo Z83-4 has a micro-USB-B id-pin handler for
@@ -1312,7 +1327,8 @@ static const struct dmi_system_id run_edge_events_on_boot_blacklist[] = {
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "MINIX"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"),
-		}
+		},
+		.driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT,
 	},
 	{
 		/*
@@ -1324,20 +1340,52 @@ static const struct dmi_system_id run_edge_events_on_boot_blacklist[] = {
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"),
-		}
+		},
+		.driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT,
+	},
+	{
+		/*
+		 * Various HP X2 10 Cherry Trail models use an external
+		 * embedded-controller connected via I2C + an ACPI GPIO
+		 * event handler. The embedded controller generates various
+		 * spurious wakeup events when suspended. So disable wakeup
+		 * for its handler (it uses the only ACPI GPIO event handler).
+		 * This breaks wakeup when opening the lid, the user needs
+		 * to press the power-button to wakeup the system. The
+		 * alternative is suspend simply not working, which is worse.
+		 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"),
+		},
+		.driver_data = (void *)QUIRK_NO_WAKEUP,
 	},
 	{} /* Terminating entry */
 };
 
 static int acpi_gpio_setup_params(void)
 {
+	const struct dmi_system_id *id;
+	long quirks = 0;
+
+	id = dmi_first_match(gpiolib_acpi_quirks);
+	if (id)
+		quirks = (long)id->driver_data;
+
 	if (run_edge_events_on_boot < 0) {
-		if (dmi_check_system(run_edge_events_on_boot_blacklist))
+		if (quirks & QUIRK_NO_EDGE_EVENTS_ON_BOOT)
 			run_edge_events_on_boot = 0;
 		else
 			run_edge_events_on_boot = 1;
 	}
 
+	if (honor_wakeup < 0) {
+		if (quirks & QUIRK_NO_WAKEUP)
+			honor_wakeup = 0;
+		else
+			honor_wakeup = 1;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpio/gpiolib-devres.c b/drivers/gpio/gpiolib-devres.c
index 98e3c20d9730..4421be09b960 100644
--- a/drivers/gpio/gpiolib-devres.c
+++ b/drivers/gpio/gpiolib-devres.c
@@ -185,12 +185,11 @@ struct gpio_desc *devm_gpiod_get_from_of_node(struct device *dev,
 EXPORT_SYMBOL_GPL(devm_gpiod_get_from_of_node);
 
 /**
- * devm_fwnode_get_index_gpiod_from_child - get a GPIO descriptor from a
- *					    device's child node
+ * devm_fwnode_gpiod_get_index - get a GPIO descriptor from a given node
  * @dev:	GPIO consumer
+ * @fwnode:	firmware node containing GPIO reference
  * @con_id:	function within the GPIO consumer
  * @index:	index of the GPIO to obtain in the consumer
- * @child:	firmware node (child of @dev)
  * @flags:	GPIO initialization flags
  * @label:	label to attach to the requested GPIO
  *
@@ -200,35 +199,21 @@ EXPORT_SYMBOL_GPL(devm_gpiod_get_from_of_node);
  * On successful request the GPIO pin is configured in accordance with
  * provided @flags.
  */
-struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
-						const char *con_id, int index,
-						struct fwnode_handle *child,
-						enum gpiod_flags flags,
-						const char *label)
+struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev,
+					      struct fwnode_handle *fwnode,
+					      const char *con_id, int index,
+					      enum gpiod_flags flags,
+					      const char *label)
 {
-	char prop_name[32]; /* 32 is max size of property name */
 	struct gpio_desc **dr;
 	struct gpio_desc *desc;
-	unsigned int i;
 
 	dr = devres_alloc(devm_gpiod_release, sizeof(struct gpio_desc *),
 			  GFP_KERNEL);
 	if (!dr)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) {
-		if (con_id)
-			snprintf(prop_name, sizeof(prop_name), "%s-%s",
-					    con_id, gpio_suffixes[i]);
-		else
-			snprintf(prop_name, sizeof(prop_name), "%s",
-					    gpio_suffixes[i]);
-
-		desc = fwnode_get_named_gpiod(child, prop_name, index, flags,
-					      label);
-		if (!IS_ERR(desc) || (PTR_ERR(desc) != -ENOENT))
-			break;
-	}
+	desc = fwnode_gpiod_get_index(fwnode, con_id, index, flags, label);
 	if (IS_ERR(desc)) {
 		devres_free(dr);
 		return desc;
@@ -239,7 +224,7 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
 
 	return desc;
 }
-EXPORT_SYMBOL_GPL(devm_fwnode_get_index_gpiod_from_child);
+EXPORT_SYMBOL_GPL(devm_fwnode_gpiod_get_index);
 
 /**
  * devm_gpiod_get_index_optional - Resource-managed gpiod_get_index_optional()
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 80ea49f570f4..1b3f217a35e2 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -23,11 +23,34 @@
 #include "gpiolib.h"
 #include "gpiolib-of.h"
 
+/**
+ * of_gpio_spi_cs_get_count() - special GPIO counting for SPI
+ * Some elder GPIO controllers need special quirks. Currently we handle
+ * the Freescale GPIO controller with bindings that doesn't use the
+ * established "cs-gpios" for chip selects but instead rely on
+ * "gpios" for the chip select lines. If we detect this, we redirect
+ * the counting of "cs-gpios" to count "gpios" transparent to the
+ * driver.
+ */
+static int of_gpio_spi_cs_get_count(struct device *dev, const char *con_id)
+{
+	struct device_node *np = dev->of_node;
+
+	if (!IS_ENABLED(CONFIG_SPI_MASTER))
+		return 0;
+	if (!con_id || strcmp(con_id, "cs"))
+		return 0;
+	if (!of_device_is_compatible(np, "fsl,spi") &&
+	    !of_device_is_compatible(np, "aeroflexgaisler,spictrl"))
+		return 0;
+	return of_gpio_named_count(np, "gpios");
+}
+
 /*
  * This is used by external users of of_gpio_count() from <linux/of_gpio.h>
  *
  * FIXME: get rid of those external users by converting them to GPIO
- * descriptors and let them all use gpiod_get_count()
+ * descriptors and let them all use gpiod_count()
  */
 int of_gpio_get_count(struct device *dev, const char *con_id)
 {
@@ -35,6 +58,10 @@ int of_gpio_get_count(struct device *dev, const char *con_id)
 	char propname[32];
 	unsigned int i;
 
+	ret = of_gpio_spi_cs_get_count(dev, con_id);
+	if (ret > 0)
+		return ret;
+
 	for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) {
 		if (con_id)
 			snprintf(propname, sizeof(propname), "%s-%s",
@@ -84,8 +111,9 @@ static struct gpio_desc *of_xlate_and_get_gpiod_flags(struct gpio_chip *chip,
 /**
  * of_gpio_need_valid_mask() - figure out if the OF GPIO driver needs
  * to set the .valid_mask
- * @dev: the device for the GPIO provider
- * @return: true if the valid mask needs to be set
+ * @gc: the target gpio_chip
+ *
+ * Return: true if the valid mask needs to be set
  */
 bool of_gpio_need_valid_mask(const struct gpio_chip *gc)
 {
@@ -104,27 +132,6 @@ static void of_gpio_flags_quirks(struct device_node *np,
 				 int index)
 {
 	/*
-	 * Handle MMC "cd-inverted" and "wp-inverted" semantics.
-	 */
-	if (IS_ENABLED(CONFIG_MMC)) {
-		/*
-		 * Active low is the default according to the
-		 * SDHCI specification and the device tree
-		 * bindings. However the code in the current
-		 * kernel was written such that the phandle
-		 * flags were always respected, and "cd-inverted"
-		 * would invert the flag from the device phandle.
-		 */
-		if (!strcmp(propname, "cd-gpios")) {
-			if (of_property_read_bool(np, "cd-inverted"))
-				*flags ^= OF_GPIO_ACTIVE_LOW;
-		}
-		if (!strcmp(propname, "wp-gpios")) {
-			if (of_property_read_bool(np, "wp-inverted"))
-				*flags ^= OF_GPIO_ACTIVE_LOW;
-		}
-	}
-	/*
 	 * Some GPIO fixed regulator quirks.
 	 * Note that active low is the default.
 	 */
@@ -134,18 +141,20 @@ static void of_gpio_flags_quirks(struct device_node *np,
 	     (!(strcmp(propname, "enable-gpio") &&
 		strcmp(propname, "enable-gpios")) &&
 	      of_device_is_compatible(np, "regulator-gpio")))) {
+		bool active_low = !of_property_read_bool(np,
+							 "enable-active-high");
 		/*
 		 * The regulator GPIO handles are specified such that the
 		 * presence or absence of "enable-active-high" solely controls
 		 * the polarity of the GPIO line. Any phandle flags must
 		 * be actively ignored.
 		 */
-		if (*flags & OF_GPIO_ACTIVE_LOW) {
+		if ((*flags & OF_GPIO_ACTIVE_LOW) && !active_low) {
 			pr_warn("%s GPIO handle specifies active low - ignored\n",
 				of_node_full_name(np));
 			*flags &= ~OF_GPIO_ACTIVE_LOW;
 		}
-		if (!of_property_read_bool(np, "enable-active-high"))
+		if (active_low)
 			*flags |= OF_GPIO_ACTIVE_LOW;
 	}
 	/*
@@ -882,16 +891,13 @@ int of_gpiochip_add(struct gpio_chip *chip)
 	of_node_get(chip->of_node);
 
 	ret = of_gpiochip_scan_gpios(chip);
-	if (ret) {
+	if (ret)
 		of_node_put(chip->of_node);
-		gpiochip_remove_pin_ranges(chip);
-	}
 
 	return ret;
 }
 
 void of_gpiochip_remove(struct gpio_chip *chip)
 {
-	gpiochip_remove_pin_ranges(chip);
 	of_node_put(chip->of_node);
 }
diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index fbf6b1a0a4fa..23e3d335cd54 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -762,10 +762,9 @@ int gpiochip_sysfs_register(struct gpio_device *gdev)
 		parent = &gdev->dev;
 
 	/* use chip->base for the ID; it's already known to be unique */
-	dev = device_create_with_groups(&gpio_class, parent,
-					MKDEV(0, 0),
-					chip, gpiochip_groups,
-					"gpiochip%d", chip->base);
+	dev = device_create_with_groups(&gpio_class, parent, MKDEV(0, 0), chip,
+					gpiochip_groups, GPIOCHIP_NAME "%d",
+					chip->base);
 	if (IS_ERR(dev))
 		return PTR_ERR(dev);
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 104ed299d5ea..99ac27a72e28 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -140,7 +140,7 @@ EXPORT_SYMBOL_GPL(gpio_to_desc);
  * in the given chip for the specified hardware number.
  */
 struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip,
-				    u16 hwnum)
+				    unsigned int hwnum)
 {
 	struct gpio_device *gdev = chip->gpiodev;
 
@@ -220,19 +220,27 @@ int gpiod_get_direction(struct gpio_desc *desc)
 	chip = gpiod_to_chip(desc);
 	offset = gpio_chip_hwgpio(desc);
 
+	/*
+	 * Open drain emulation using input mode may incorrectly report
+	 * input here, fix that up.
+	 */
+	if (test_bit(FLAG_OPEN_DRAIN, &desc->flags) &&
+	    test_bit(FLAG_IS_OUT, &desc->flags))
+		return 0;
+
 	if (!chip->get_direction)
 		return -ENOTSUPP;
 
 	ret = chip->get_direction(chip, offset);
-	if (ret > 0) {
-		/* GPIOF_DIR_IN, or other positive */
+	if (ret < 0)
+		return ret;
+
+	/* GPIOF_DIR_IN or other positive, otherwise GPIOF_DIR_OUT */
+	if (ret > 0)
 		ret = 1;
-		clear_bit(FLAG_IS_OUT, &desc->flags);
-	}
-	if (ret == 0) {
-		/* GPIOF_DIR_OUT */
-		set_bit(FLAG_IS_OUT, &desc->flags);
-	}
+
+	assign_bit(FLAG_IS_OUT, &desc->flags, !ret);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(gpiod_get_direction);
@@ -390,6 +398,14 @@ static void gpiochip_free_valid_mask(struct gpio_chip *gpiochip)
 	gpiochip->valid_mask = NULL;
 }
 
+static int gpiochip_add_pin_ranges(struct gpio_chip *gc)
+{
+	if (gc->add_pin_ranges)
+		return gc->add_pin_ranges(gc);
+
+	return 0;
+}
+
 bool gpiochip_line_is_valid(const struct gpio_chip *gpiochip,
 				unsigned int offset)
 {
@@ -422,9 +438,118 @@ struct linehandle_state {
 	(GPIOHANDLE_REQUEST_INPUT | \
 	GPIOHANDLE_REQUEST_OUTPUT | \
 	GPIOHANDLE_REQUEST_ACTIVE_LOW | \
+	GPIOHANDLE_REQUEST_BIAS_PULL_UP | \
+	GPIOHANDLE_REQUEST_BIAS_PULL_DOWN | \
+	GPIOHANDLE_REQUEST_BIAS_DISABLE | \
 	GPIOHANDLE_REQUEST_OPEN_DRAIN | \
 	GPIOHANDLE_REQUEST_OPEN_SOURCE)
 
+static int linehandle_validate_flags(u32 flags)
+{
+	/* Return an error if an unknown flag is set */
+	if (flags & ~GPIOHANDLE_REQUEST_VALID_FLAGS)
+		return -EINVAL;
+
+	/*
+	 * Do not allow both INPUT & OUTPUT flags to be set as they are
+	 * contradictory.
+	 */
+	if ((flags & GPIOHANDLE_REQUEST_INPUT) &&
+	    (flags & GPIOHANDLE_REQUEST_OUTPUT))
+		return -EINVAL;
+
+	/*
+	 * Do not allow OPEN_SOURCE & OPEN_DRAIN flags in a single request. If
+	 * the hardware actually supports enabling both at the same time the
+	 * electrical result would be disastrous.
+	 */
+	if ((flags & GPIOHANDLE_REQUEST_OPEN_DRAIN) &&
+	    (flags & GPIOHANDLE_REQUEST_OPEN_SOURCE))
+		return -EINVAL;
+
+	/* OPEN_DRAIN and OPEN_SOURCE flags only make sense for output mode. */
+	if (!(flags & GPIOHANDLE_REQUEST_OUTPUT) &&
+	    ((flags & GPIOHANDLE_REQUEST_OPEN_DRAIN) ||
+	     (flags & GPIOHANDLE_REQUEST_OPEN_SOURCE)))
+		return -EINVAL;
+
+	/* Bias flags only allowed for input or output mode. */
+	if (!((flags & GPIOHANDLE_REQUEST_INPUT) ||
+	      (flags & GPIOHANDLE_REQUEST_OUTPUT)) &&
+	    ((flags & GPIOHANDLE_REQUEST_BIAS_DISABLE) ||
+	     (flags & GPIOHANDLE_REQUEST_BIAS_PULL_UP) ||
+	     (flags & GPIOHANDLE_REQUEST_BIAS_PULL_DOWN)))
+		return -EINVAL;
+
+	/* Only one bias flag can be set. */
+	if (((flags & GPIOHANDLE_REQUEST_BIAS_DISABLE) &&
+	     (flags & (GPIOHANDLE_REQUEST_BIAS_PULL_DOWN |
+			GPIOHANDLE_REQUEST_BIAS_PULL_UP))) ||
+	    ((flags & GPIOHANDLE_REQUEST_BIAS_PULL_DOWN) &&
+	     (flags & GPIOHANDLE_REQUEST_BIAS_PULL_UP)))
+		return -EINVAL;
+
+	return 0;
+}
+
+static long linehandle_set_config(struct linehandle_state *lh,
+				  void __user *ip)
+{
+	struct gpiohandle_config gcnf;
+	struct gpio_desc *desc;
+	int i, ret;
+	u32 lflags;
+	unsigned long *flagsp;
+
+	if (copy_from_user(&gcnf, ip, sizeof(gcnf)))
+		return -EFAULT;
+
+	lflags = gcnf.flags;
+	ret = linehandle_validate_flags(lflags);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < lh->numdescs; i++) {
+		desc = lh->descs[i];
+		flagsp = &desc->flags;
+
+		assign_bit(FLAG_ACTIVE_LOW, flagsp,
+			lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW);
+
+		assign_bit(FLAG_OPEN_DRAIN, flagsp,
+			lflags & GPIOHANDLE_REQUEST_OPEN_DRAIN);
+
+		assign_bit(FLAG_OPEN_SOURCE, flagsp,
+			lflags & GPIOHANDLE_REQUEST_OPEN_SOURCE);
+
+		assign_bit(FLAG_PULL_UP, flagsp,
+			lflags & GPIOHANDLE_REQUEST_BIAS_PULL_UP);
+
+		assign_bit(FLAG_PULL_DOWN, flagsp,
+			lflags & GPIOHANDLE_REQUEST_BIAS_PULL_DOWN);
+
+		assign_bit(FLAG_BIAS_DISABLE, flagsp,
+			lflags & GPIOHANDLE_REQUEST_BIAS_DISABLE);
+
+		/*
+		 * Lines have to be requested explicitly for input
+		 * or output, else the line will be treated "as is".
+		 */
+		if (lflags & GPIOHANDLE_REQUEST_OUTPUT) {
+			int val = !!gcnf.default_values[i];
+
+			ret = gpiod_direction_output(desc, val);
+			if (ret)
+				return ret;
+		} else if (lflags & GPIOHANDLE_REQUEST_INPUT) {
+			ret = gpiod_direction_input(desc);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+
 static long linehandle_ioctl(struct file *filep, unsigned int cmd,
 			     unsigned long arg)
 {
@@ -475,6 +600,8 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd,
 					      lh->descs,
 					      NULL,
 					      vals);
+	} else if (cmd == GPIOHANDLE_SET_CONFIG_IOCTL) {
+		return linehandle_set_config(lh, ip);
 	}
 	return -EINVAL;
 }
@@ -526,32 +653,9 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
 
 	lflags = handlereq.flags;
 
-	/* Return an error if an unknown flag is set */
-	if (lflags & ~GPIOHANDLE_REQUEST_VALID_FLAGS)
-		return -EINVAL;
-
-	/*
-	 * Do not allow both INPUT & OUTPUT flags to be set as they are
-	 * contradictory.
-	 */
-	if ((lflags & GPIOHANDLE_REQUEST_INPUT) &&
-	    (lflags & GPIOHANDLE_REQUEST_OUTPUT))
-		return -EINVAL;
-
-	/*
-	 * Do not allow OPEN_SOURCE & OPEN_DRAIN flags in a single request. If
-	 * the hardware actually supports enabling both at the same time the
-	 * electrical result would be disastrous.
-	 */
-	if ((lflags & GPIOHANDLE_REQUEST_OPEN_DRAIN) &&
-	    (lflags & GPIOHANDLE_REQUEST_OPEN_SOURCE))
-		return -EINVAL;
-
-	/* OPEN_DRAIN and OPEN_SOURCE flags only make sense for output mode. */
-	if (!(lflags & GPIOHANDLE_REQUEST_OUTPUT) &&
-	    ((lflags & GPIOHANDLE_REQUEST_OPEN_DRAIN) ||
-	     (lflags & GPIOHANDLE_REQUEST_OPEN_SOURCE)))
-		return -EINVAL;
+	ret = linehandle_validate_flags(lflags);
+	if (ret)
+		return ret;
 
 	lh = kzalloc(sizeof(*lh), GFP_KERNEL);
 	if (!lh)
@@ -573,14 +677,13 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
 	/* Request each GPIO */
 	for (i = 0; i < handlereq.lines; i++) {
 		u32 offset = handlereq.lineoffsets[i];
-		struct gpio_desc *desc;
+		struct gpio_desc *desc = gpiochip_get_desc(gdev->chip, offset);
 
-		if (offset >= gdev->ngpio) {
-			ret = -EINVAL;
+		if (IS_ERR(desc)) {
+			ret = PTR_ERR(desc);
 			goto out_free_descs;
 		}
 
-		desc = &gdev->descs[offset];
 		ret = gpiod_request(desc, lh->label);
 		if (ret)
 			goto out_free_descs;
@@ -593,6 +696,12 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
 			set_bit(FLAG_OPEN_DRAIN, &desc->flags);
 		if (lflags & GPIOHANDLE_REQUEST_OPEN_SOURCE)
 			set_bit(FLAG_OPEN_SOURCE, &desc->flags);
+		if (lflags & GPIOHANDLE_REQUEST_BIAS_DISABLE)
+			set_bit(FLAG_BIAS_DISABLE, &desc->flags);
+		if (lflags & GPIOHANDLE_REQUEST_BIAS_PULL_DOWN)
+			set_bit(FLAG_PULL_DOWN, &desc->flags);
+		if (lflags & GPIOHANDLE_REQUEST_BIAS_PULL_UP)
+			set_bit(FLAG_PULL_UP, &desc->flags);
 
 		ret = gpiod_set_transitory(desc, false);
 		if (ret < 0)
@@ -895,6 +1004,33 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
 	if (copy_from_user(&eventreq, ip, sizeof(eventreq)))
 		return -EFAULT;
 
+	offset = eventreq.lineoffset;
+	lflags = eventreq.handleflags;
+	eflags = eventreq.eventflags;
+
+	desc = gpiochip_get_desc(gdev->chip, offset);
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+
+	/* Return an error if a unknown flag is set */
+	if ((lflags & ~GPIOHANDLE_REQUEST_VALID_FLAGS) ||
+	    (eflags & ~GPIOEVENT_REQUEST_VALID_FLAGS))
+		return -EINVAL;
+
+	/* This is just wrong: we don't look for events on output lines */
+	if ((lflags & GPIOHANDLE_REQUEST_OUTPUT) ||
+	    (lflags & GPIOHANDLE_REQUEST_OPEN_DRAIN) ||
+	    (lflags & GPIOHANDLE_REQUEST_OPEN_SOURCE))
+		return -EINVAL;
+
+	/* Only one bias flag can be set. */
+	if (((lflags & GPIOHANDLE_REQUEST_BIAS_DISABLE) &&
+	     (lflags & (GPIOHANDLE_REQUEST_BIAS_PULL_DOWN |
+			GPIOHANDLE_REQUEST_BIAS_PULL_UP))) ||
+	    ((lflags & GPIOHANDLE_REQUEST_BIAS_PULL_DOWN) &&
+	     (lflags & GPIOHANDLE_REQUEST_BIAS_PULL_UP)))
+		return -EINVAL;
+
 	le = kzalloc(sizeof(*le), GFP_KERNEL);
 	if (!le)
 		return -ENOMEM;
@@ -912,31 +1048,6 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
 		}
 	}
 
-	offset = eventreq.lineoffset;
-	lflags = eventreq.handleflags;
-	eflags = eventreq.eventflags;
-
-	if (offset >= gdev->ngpio) {
-		ret = -EINVAL;
-		goto out_free_label;
-	}
-
-	/* Return an error if a unknown flag is set */
-	if ((lflags & ~GPIOHANDLE_REQUEST_VALID_FLAGS) ||
-	    (eflags & ~GPIOEVENT_REQUEST_VALID_FLAGS)) {
-		ret = -EINVAL;
-		goto out_free_label;
-	}
-
-	/* This is just wrong: we don't look for events on output lines */
-	if ((lflags & GPIOHANDLE_REQUEST_OUTPUT) ||
-	    (lflags & GPIOHANDLE_REQUEST_OPEN_DRAIN) ||
-	    (lflags & GPIOHANDLE_REQUEST_OPEN_SOURCE)) {
-		ret = -EINVAL;
-		goto out_free_label;
-	}
-
-	desc = &gdev->descs[offset];
 	ret = gpiod_request(desc, le->label);
 	if (ret)
 		goto out_free_label;
@@ -945,6 +1056,12 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
 
 	if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW)
 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+	if (lflags & GPIOHANDLE_REQUEST_BIAS_DISABLE)
+		set_bit(FLAG_BIAS_DISABLE, &desc->flags);
+	if (lflags & GPIOHANDLE_REQUEST_BIAS_PULL_DOWN)
+		set_bit(FLAG_PULL_DOWN, &desc->flags);
+	if (lflags & GPIOHANDLE_REQUEST_BIAS_PULL_UP)
+		set_bit(FLAG_PULL_UP, &desc->flags);
 
 	ret = gpiod_direction_input(desc);
 	if (ret)
@@ -1057,10 +1174,11 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 		if (copy_from_user(&lineinfo, ip, sizeof(lineinfo)))
 			return -EFAULT;
-		if (lineinfo.line_offset >= gdev->ngpio)
-			return -EINVAL;
 
-		desc = &gdev->descs[lineinfo.line_offset];
+		desc = gpiochip_get_desc(chip, lineinfo.line_offset);
+		if (IS_ERR(desc))
+			return PTR_ERR(desc);
+
 		if (desc->name) {
 			strncpy(lineinfo.name, desc->name,
 				sizeof(lineinfo.name));
@@ -1098,6 +1216,12 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (test_bit(FLAG_OPEN_SOURCE, &desc->flags))
 			lineinfo.flags |= (GPIOLINE_FLAG_OPEN_SOURCE |
 					   GPIOLINE_FLAG_IS_OUT);
+		if (test_bit(FLAG_BIAS_DISABLE, &desc->flags))
+			lineinfo.flags |= GPIOLINE_FLAG_BIAS_DISABLE;
+		if (test_bit(FLAG_PULL_DOWN, &desc->flags))
+			lineinfo.flags |= GPIOLINE_FLAG_BIAS_PULL_DOWN;
+		if (test_bit(FLAG_PULL_UP, &desc->flags))
+			lineinfo.flags |= GPIOLINE_FLAG_BIAS_PULL_UP;
 
 		if (copy_to_user(ip, &lineinfo, sizeof(lineinfo)))
 			return -EFAULT;
@@ -1294,7 +1418,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 		ret = gdev->id;
 		goto err_free_gdev;
 	}
-	dev_set_name(&gdev->dev, "gpiochip%d", gdev->id);
+	dev_set_name(&gdev->dev, GPIOCHIP_NAME "%d", gdev->id);
 	device_initialize(&gdev->dev);
 	dev_set_drvdata(&gdev->dev, gdev);
 	if (chip->parent && chip->parent->driver)
@@ -1319,7 +1443,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 
 	if (chip->ngpio > FASTPATH_NGPIO)
 		chip_warn(chip, "line cnt %u is greater than fast path cnt %u\n",
-		chip->ngpio, FASTPATH_NGPIO);
+			  chip->ngpio, FASTPATH_NGPIO);
 
 	gdev->label = kstrdup_const(chip->label ?: "unknown", GFP_KERNEL);
 	if (!gdev->label) {
@@ -1362,11 +1486,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 		goto err_free_label;
 	}
 
-	spin_unlock_irqrestore(&gpio_lock, flags);
-
 	for (i = 0; i < chip->ngpio; i++)
 		gdev->descs[i].gdev = gdev;
 
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
 #ifdef CONFIG_PINCTRL
 	INIT_LIST_HEAD(&gdev->pin_ranges);
 #endif
@@ -1391,27 +1515,27 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 		struct gpio_desc *desc = &gdev->descs[i];
 
 		if (chip->get_direction && gpiochip_line_is_valid(chip, i)) {
-			if (!chip->get_direction(chip, i))
-				set_bit(FLAG_IS_OUT, &desc->flags);
-			else
-				clear_bit(FLAG_IS_OUT, &desc->flags);
+			assign_bit(FLAG_IS_OUT,
+				   &desc->flags, !chip->get_direction(chip, i));
 		} else {
-			if (!chip->direction_input)
-				set_bit(FLAG_IS_OUT, &desc->flags);
-			else
-				clear_bit(FLAG_IS_OUT, &desc->flags);
+			assign_bit(FLAG_IS_OUT,
+				   &desc->flags, !chip->direction_input);
 		}
 	}
 
+	ret = gpiochip_add_pin_ranges(chip);
+	if (ret)
+		goto err_remove_of_chip;
+
 	acpi_gpiochip_add(chip);
 
 	machine_gpiochip_add(chip);
 
-	ret = gpiochip_irqchip_init_hw(chip);
+	ret = gpiochip_irqchip_init_valid_mask(chip);
 	if (ret)
 		goto err_remove_acpi_chip;
 
-	ret = gpiochip_irqchip_init_valid_mask(chip);
+	ret = gpiochip_irqchip_init_hw(chip);
 	if (ret)
 		goto err_remove_acpi_chip;
 
@@ -1444,6 +1568,7 @@ err_remove_of_chip:
 	gpiochip_free_hogs(chip);
 	of_gpiochip_remove(chip);
 err_free_gpiochip_mask:
+	gpiochip_remove_pin_ranges(chip);
 	gpiochip_free_valid_mask(chip);
 err_remove_from_list:
 	spin_lock_irqsave(&gpio_lock, flags);
@@ -1499,8 +1624,8 @@ void gpiochip_remove(struct gpio_chip *chip)
 	gdev->chip = NULL;
 	gpiochip_irqchip_remove(chip);
 	acpi_gpiochip_remove(chip);
-	gpiochip_remove_pin_ranges(chip);
 	of_gpiochip_remove(chip);
+	gpiochip_remove_pin_ranges(chip);
 	gpiochip_free_valid_mask(chip);
 	/*
 	 * We accept no more calls into the driver from this point, so
@@ -1539,7 +1664,7 @@ static void devm_gpio_chip_release(struct device *dev, void *res)
 }
 
 /**
- * devm_gpiochip_add_data() - Resource manager gpiochip_add_data()
+ * devm_gpiochip_add_data() - Resource managed gpiochip_add_data()
  * @dev: pointer to the device that gpio_chip belongs to.
  * @chip: the chip to register, with chip->base initialized
  * @data: driver-private data associated with this chip
@@ -1675,7 +1800,7 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_irq_valid);
  * gpiochip_set_cascaded_irqchip() - connects a cascaded irqchip to a gpiochip
  * @gc: the gpiochip to set the irqchip chain to
  * @parent_irq: the irq number corresponding to the parent IRQ for this
- * chained irqchip
+ * cascaded irqchip
  * @parent_handler: the parent interrupt handler for the accumulated IRQ
  * coming out of the gpiochip. If the interrupt is nested rather than
  * cascaded, pass NULL in this handler argument
@@ -1718,29 +1843,6 @@ static void gpiochip_set_cascaded_irqchip(struct gpio_chip *gc,
 }
 
 /**
- * gpiochip_set_chained_irqchip() - connects a chained irqchip to a gpiochip
- * @gpiochip: the gpiochip to set the irqchip chain to
- * @irqchip: the irqchip to chain to the gpiochip
- * @parent_irq: the irq number corresponding to the parent IRQ for this
- * chained irqchip
- * @parent_handler: the parent interrupt handler for the accumulated IRQ
- * coming out of the gpiochip.
- */
-void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip,
-				  struct irq_chip *irqchip,
-				  unsigned int parent_irq,
-				  irq_flow_handler_t parent_handler)
-{
-	if (gpiochip->irq.threaded) {
-		chip_err(gpiochip, "tried to chain a threaded gpiochip\n");
-		return;
-	}
-
-	gpiochip_set_cascaded_irqchip(gpiochip, parent_irq, parent_handler);
-}
-EXPORT_SYMBOL_GPL(gpiochip_set_chained_irqchip);
-
-/**
  * gpiochip_set_nested_irqchip() - connects a nested irqchip to a gpiochip
  * @gpiochip: the gpiochip to set the irqchip nested handler to
  * @irqchip: the irqchip to nest to the gpiochip
@@ -1865,7 +1967,7 @@ static int gpiochip_hierarchy_irq_domain_alloc(struct irq_domain *d,
 	irq_hw_number_t hwirq;
 	unsigned int type = IRQ_TYPE_NONE;
 	struct irq_fwspec *fwspec = data;
-	struct irq_fwspec parent_fwspec;
+	void *parent_arg;
 	unsigned int parent_hwirq;
 	unsigned int parent_type;
 	struct gpio_irq_chip *girq = &gc->irq;
@@ -1881,7 +1983,7 @@ static int gpiochip_hierarchy_irq_domain_alloc(struct irq_domain *d,
 	if (ret)
 		return ret;
 
-	chip_info(gc, "allocate IRQ %d, hwirq %lu\n", irq,  hwirq);
+	chip_dbg(gc, "allocate IRQ %d, hwirq %lu\n", irq,  hwirq);
 
 	ret = girq->child_to_parent_hwirq(gc, hwirq, type,
 					  &parent_hwirq, &parent_type);
@@ -1889,7 +1991,7 @@ static int gpiochip_hierarchy_irq_domain_alloc(struct irq_domain *d,
 		chip_err(gc, "can't look up hwirq %lu\n", hwirq);
 		return ret;
 	}
-	chip_info(gc, "found parent hwirq %u\n", parent_hwirq);
+	chip_dbg(gc, "found parent hwirq %u\n", parent_hwirq);
 
 	/*
 	 * We set handle_bad_irq because the .set_type() should
@@ -1904,23 +2006,27 @@ static int gpiochip_hierarchy_irq_domain_alloc(struct irq_domain *d,
 			    NULL, NULL);
 	irq_set_probe(irq);
 
-	/*
-	 * Create a IRQ fwspec to send up to the parent irqdomain:
-	 * specify the hwirq we address on the parent and tie it
-	 * all together up the chain.
-	 */
-	parent_fwspec.fwnode = d->parent->fwnode;
 	/* This parent only handles asserted level IRQs */
-	girq->populate_parent_fwspec(gc, &parent_fwspec, parent_hwirq,
-				     parent_type);
-	chip_info(gc, "alloc_irqs_parent for %d parent hwirq %d\n",
+	parent_arg = girq->populate_parent_alloc_arg(gc, parent_hwirq, parent_type);
+	if (!parent_arg)
+		return -ENOMEM;
+
+	chip_dbg(gc, "alloc_irqs_parent for %d parent hwirq %d\n",
 		  irq, parent_hwirq);
-	ret = irq_domain_alloc_irqs_parent(d, irq, 1, &parent_fwspec);
+	irq_set_lockdep_class(irq, gc->irq.lock_key, gc->irq.request_key);
+	ret = irq_domain_alloc_irqs_parent(d, irq, 1, parent_arg);
+	/*
+	 * If the parent irqdomain is msi, the interrupts have already
+	 * been allocated, so the EEXIST is good.
+	 */
+	if (irq_domain_is_msi(d->parent) && (ret == -EEXIST))
+		ret = 0;
 	if (ret)
 		chip_err(gc,
 			 "failed to allocate parent hwirq %d for hwirq %lu\n",
 			 parent_hwirq, hwirq);
 
+	kfree(parent_arg);
 	return ret;
 }
 
@@ -1957,8 +2063,8 @@ static int gpiochip_hierarchy_add_domain(struct gpio_chip *gc)
 	if (!gc->irq.child_offset_to_irq)
 		gc->irq.child_offset_to_irq = gpiochip_child_offset_to_irq_noop;
 
-	if (!gc->irq.populate_parent_fwspec)
-		gc->irq.populate_parent_fwspec =
+	if (!gc->irq.populate_parent_alloc_arg)
+		gc->irq.populate_parent_alloc_arg =
 			gpiochip_populate_parent_fwspec_twocell;
 
 	gpiochip_hierarchy_setup_domain_ops(&gc->irq.child_irq_domain_ops);
@@ -1984,27 +2090,43 @@ static bool gpiochip_hierarchy_is_hierarchical(struct gpio_chip *gc)
 	return !!gc->irq.parent_domain;
 }
 
-void gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *chip,
-					     struct irq_fwspec *fwspec,
+void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *chip,
 					     unsigned int parent_hwirq,
 					     unsigned int parent_type)
 {
+	struct irq_fwspec *fwspec;
+
+	fwspec = kmalloc(sizeof(*fwspec), GFP_KERNEL);
+	if (!fwspec)
+		return NULL;
+
+	fwspec->fwnode = chip->irq.parent_domain->fwnode;
 	fwspec->param_count = 2;
 	fwspec->param[0] = parent_hwirq;
 	fwspec->param[1] = parent_type;
+
+	return fwspec;
 }
 EXPORT_SYMBOL_GPL(gpiochip_populate_parent_fwspec_twocell);
 
-void gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *chip,
-					      struct irq_fwspec *fwspec,
+void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *chip,
 					      unsigned int parent_hwirq,
 					      unsigned int parent_type)
 {
+	struct irq_fwspec *fwspec;
+
+	fwspec = kmalloc(sizeof(*fwspec), GFP_KERNEL);
+	if (!fwspec)
+		return NULL;
+
+	fwspec->fwnode = chip->irq.parent_domain->fwnode;
 	fwspec->param_count = 4;
 	fwspec->param[0] = 0;
 	fwspec->param[1] = parent_hwirq;
 	fwspec->param[2] = 0;
 	fwspec->param[3] = parent_type;
+
+	return fwspec;
 }
 EXPORT_SYMBOL_GPL(gpiochip_populate_parent_fwspec_fourcell);
 
@@ -2790,6 +2912,9 @@ static bool gpiod_free_commit(struct gpio_desc *desc)
 		clear_bit(FLAG_REQUESTED, &desc->flags);
 		clear_bit(FLAG_OPEN_DRAIN, &desc->flags);
 		clear_bit(FLAG_OPEN_SOURCE, &desc->flags);
+		clear_bit(FLAG_PULL_UP, &desc->flags);
+		clear_bit(FLAG_PULL_DOWN, &desc->flags);
+		clear_bit(FLAG_BIAS_DISABLE, &desc->flags);
 		clear_bit(FLAG_IS_HOGGED, &desc->flags);
 		ret = true;
 	}
@@ -2857,7 +2982,8 @@ EXPORT_SYMBOL_GPL(gpiochip_is_requested);
  * A pointer to the GPIO descriptor, or an ERR_PTR()-encoded negative error
  * code on failure.
  */
-struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum,
+struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip,
+					    unsigned int hwnum,
 					    const char *label,
 					    enum gpio_lookup_flags lflags,
 					    enum gpiod_flags dflags)
@@ -2909,24 +3035,33 @@ EXPORT_SYMBOL_GPL(gpiochip_free_own_desc);
  * rely on gpio_request() having been called beforehand.
  */
 
-static int gpio_set_config(struct gpio_chip *gc, unsigned offset,
+static int gpio_set_config(struct gpio_chip *gc, unsigned int offset,
 			   enum pin_config_param mode)
 {
-	unsigned long config;
-	unsigned arg;
+	if (!gc->set_config)
+		return -ENOTSUPP;
+
+	return gc->set_config(gc, offset, mode);
+}
 
-	switch (mode) {
-	case PIN_CONFIG_BIAS_PULL_DOWN:
-	case PIN_CONFIG_BIAS_PULL_UP:
-		arg = 1;
-		break;
+static int gpio_set_bias(struct gpio_chip *chip, struct gpio_desc *desc)
+{
+	int bias = 0;
+	int ret = 0;
 
-	default:
-		arg = 0;
-	}
+	if (test_bit(FLAG_BIAS_DISABLE, &desc->flags))
+		bias = PIN_CONFIG_BIAS_DISABLE;
+	else if (test_bit(FLAG_PULL_UP, &desc->flags))
+		bias = PIN_CONFIG_BIAS_PULL_UP;
+	else if (test_bit(FLAG_PULL_DOWN, &desc->flags))
+		bias = PIN_CONFIG_BIAS_PULL_DOWN;
 
-	config = PIN_CONF_PACKED(mode, arg);
-	return gc->set_config ? gc->set_config(gc, offset, config) : -ENOTSUPP;
+	if (bias) {
+		ret = gpio_set_config(chip, gpio_chip_hwgpio(desc), bias);
+		if (ret != -ENOTSUPP)
+			return ret;
+	}
+	return 0;
 }
 
 /**
@@ -2973,15 +3108,10 @@ int gpiod_direction_input(struct gpio_desc *desc)
 			   __func__);
 		return -EIO;
 	}
-	if (ret == 0)
+	if (ret == 0) {
 		clear_bit(FLAG_IS_OUT, &desc->flags);
-
-	if (test_bit(FLAG_PULL_UP, &desc->flags))
-		gpio_set_config(chip, gpio_chip_hwgpio(desc),
-				PIN_CONFIG_BIAS_PULL_UP);
-	else if (test_bit(FLAG_PULL_DOWN, &desc->flags))
-		gpio_set_config(chip, gpio_chip_hwgpio(desc),
-				PIN_CONFIG_BIAS_PULL_DOWN);
+		ret = gpio_set_bias(chip, desc);
+	}
 
 	trace_gpio_direction(desc_to_gpio(desc), 1, ret);
 
@@ -3111,6 +3241,9 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
 	}
 
 set_output_value:
+	ret = gpio_set_bias(gc, desc);
+	if (ret)
+		return ret;
 	return gpiod_direction_output_raw_commit(desc, value);
 
 set_output_flag:
@@ -3142,15 +3275,9 @@ int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
 
 	VALIDATE_DESC(desc);
 	chip = desc->gdev->chip;
-	if (!chip->set || !chip->set_config) {
-		gpiod_dbg(desc,
-			  "%s: missing set() or set_config() operations\n",
-			  __func__);
-		return -ENOTSUPP;
-	}
 
 	config = pinconf_to_config_packed(PIN_CONFIG_INPUT_DEBOUNCE, debounce);
-	return chip->set_config(chip, gpio_chip_hwgpio(desc), config);
+	return gpio_set_config(chip, gpio_chip_hwgpio(desc), config);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_debounce);
 
@@ -3174,10 +3301,7 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory)
 	 * Handle FLAG_TRANSITORY first, enabling queries to gpiolib for
 	 * persistence state.
 	 */
-	if (transitory)
-		set_bit(FLAG_TRANSITORY, &desc->flags);
-	else
-		clear_bit(FLAG_TRANSITORY, &desc->flags);
+	assign_bit(FLAG_TRANSITORY, &desc->flags, transitory);
 
 	/* If the driver supports it, set the persistence state now */
 	chip = desc->gdev->chip;
@@ -3187,7 +3311,7 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory)
 	packed = pinconf_to_config_packed(PIN_CONFIG_PERSIST_STATE,
 					  !transitory);
 	gpio = gpio_chip_hwgpio(desc);
-	rc = chip->set_config(chip, gpio, packed);
+	rc = gpio_set_config(chip, gpio, packed);
 	if (rc == -ENOTSUPP) {
 		dev_dbg(&desc->gdev->dev, "Persistence not supported for GPIO %d\n",
 				gpio);
@@ -3211,6 +3335,17 @@ int gpiod_is_active_low(const struct gpio_desc *desc)
 }
 EXPORT_SYMBOL_GPL(gpiod_is_active_low);
 
+/**
+ * gpiod_toggle_active_low - toggle whether a GPIO is active-low or not
+ * @desc: the gpio descriptor to change
+ */
+void gpiod_toggle_active_low(struct gpio_desc *desc)
+{
+	VALIDATE_DESC_VOID(desc);
+	change_bit(FLAG_ACTIVE_LOW, &desc->flags);
+}
+EXPORT_SYMBOL_GPL(gpiod_toggle_active_low);
+
 /* I/O calls are only valid after configuration completed; the relevant
  * "is this a valid GPIO" error checks should already have been done.
  *
@@ -3633,10 +3768,7 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep,
 				gpio_set_open_source_value_commit(desc, value);
 			} else {
 				__set_bit(hwgpio, mask);
-				if (value)
-					__set_bit(hwgpio, bits);
-				else
-					__clear_bit(hwgpio, bits);
+				__assign_bit(hwgpio, bits, value);
 				count++;
 			}
 			i++;
@@ -4320,8 +4452,9 @@ static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id,
 
 		if (chip->ngpio <= p->chip_hwnum) {
 			dev_err(dev,
-				"requested GPIO %d is out of range [0..%d] for chip %s\n",
-				idx, chip->ngpio, chip->label);
+				"requested GPIO %u (%u) is out of range [0..%u] for chip %s\n",
+				idx, p->chip_hwnum, chip->ngpio - 1,
+				chip->label);
 			return ERR_PTR(-EINVAL);
 		}
 
@@ -4356,6 +4489,54 @@ static int platform_gpio_count(struct device *dev, const char *con_id)
 }
 
 /**
+ * fwnode_gpiod_get_index - obtain a GPIO from firmware node
+ * @fwnode:	handle of the firmware node
+ * @con_id:	function within the GPIO consumer
+ * @index:	index of the GPIO to obtain for the consumer
+ * @flags:	GPIO initialization flags
+ * @label:	label to attach to the requested GPIO
+ *
+ * This function can be used for drivers that get their configuration
+ * from opaque firmware.
+ *
+ * The function properly finds the corresponding GPIO using whatever is the
+ * underlying firmware interface and then makes sure that the GPIO
+ * descriptor is requested before it is returned to the caller.
+ *
+ * Returns:
+ * On successful request the GPIO pin is configured in accordance with
+ * provided @flags.
+ *
+ * In case of error an ERR_PTR() is returned.
+ */
+struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode,
+					 const char *con_id, int index,
+					 enum gpiod_flags flags,
+					 const char *label)
+{
+	struct gpio_desc *desc;
+	char prop_name[32]; /* 32 is max size of property name */
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) {
+		if (con_id)
+			snprintf(prop_name, sizeof(prop_name), "%s-%s",
+					    con_id, gpio_suffixes[i]);
+		else
+			snprintf(prop_name, sizeof(prop_name), "%s",
+					    gpio_suffixes[i]);
+
+		desc = fwnode_get_named_gpiod(fwnode, prop_name, index, flags,
+					      label);
+		if (!IS_ERR(desc) || (PTR_ERR(desc) != -ENOENT))
+			break;
+	}
+
+	return desc;
+}
+EXPORT_SYMBOL_GPL(fwnode_gpiod_get_index);
+
+/**
  * gpiod_count - return the number of GPIOs associated with a device / function
  *		or -ENOENT if no GPIO has been assigned to the requested function
  * @dev:	GPIO consumer, can be NULL for system-global GPIOs
@@ -4694,9 +4875,9 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
 
 	pr_info("GPIO line %d (%s) hogged as %s%s\n",
 		desc_to_gpio(desc), name,
-		(dflags&GPIOD_FLAGS_BIT_DIR_OUT) ? "output" : "input",
-		(dflags&GPIOD_FLAGS_BIT_DIR_OUT) ?
-		  (dflags&GPIOD_FLAGS_BIT_DIR_VAL) ? "/high" : "/low":"");
+		(dflags & GPIOD_FLAGS_BIT_DIR_OUT) ? "output" : "input",
+		(dflags & GPIOD_FLAGS_BIT_DIR_OUT) ?
+		  (dflags & GPIOD_FLAGS_BIT_DIR_VAL) ? "/high" : "/low" : "");
 
 	return 0;
 }
@@ -4904,7 +5085,7 @@ static int __init gpiolib_dev_init(void)
 		return ret;
 	}
 
-	ret = alloc_chrdev_region(&gpio_devt, 0, GPIO_DEV_MAX, "gpiochip");
+	ret = alloc_chrdev_region(&gpio_devt, 0, GPIO_DEV_MAX, GPIOCHIP_NAME);
 	if (ret < 0) {
 		pr_err("gpiolib: failed to allocate char dev region\n");
 		bus_unregister(&gpio_bus_type);
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index b8b10a409c7b..3e0aab2945d8 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -16,6 +16,8 @@
 #include <linux/module.h>
 #include <linux/cdev.h>
 
+#define GPIOCHIP_NAME	"gpiochip"
+
 /**
  * struct gpio_device - internal state container for GPIO devices
  * @id: numerical ID number for the GPIO chip
@@ -78,7 +80,8 @@ struct gpio_array {
 	unsigned long		invert_mask[];
 };
 
-struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, u16 hwnum);
+struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip,
+				    unsigned int hwnum);
 int gpiod_get_array_value_complex(bool raw, bool can_sleep,
 				  unsigned int array_size,
 				  struct gpio_desc **desc_array,
@@ -110,6 +113,7 @@ struct gpio_desc {
 #define FLAG_TRANSITORY 12	/* GPIO may lose value in sleep or reset */
 #define FLAG_PULL_UP    13	/* GPIO has pull up enabled */
 #define FLAG_PULL_DOWN  14	/* GPIO has pull down enabled */
+#define FLAG_BIAS_DISABLE    15	/* GPIO has pull disabled */
 
 	/* Connection label */
 	const char		*label;
diff --git a/drivers/gpio/sgpio-aspeed.c b/drivers/gpio/sgpio-aspeed.c
deleted file mode 100644
index 7e99860ca447..000000000000
--- a/drivers/gpio/sgpio-aspeed.c
+++ /dev/null
@@ -1,533 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright 2019 American Megatrends International LLC.
- *
- * Author: Karthikeyan Mani <karthikeyanm@amiindia.co.in>
- */
-
-#include <linux/bitfield.h>
-#include <linux/clk.h>
-#include <linux/gpio/driver.h>
-#include <linux/hashtable.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-
-#define MAX_NR_SGPIO			80
-
-#define ASPEED_SGPIO_CTRL		0x54
-
-#define ASPEED_SGPIO_PINS_MASK		GENMASK(9, 6)
-#define ASPEED_SGPIO_CLK_DIV_MASK	GENMASK(31, 16)
-#define ASPEED_SGPIO_ENABLE		BIT(0)
-
-struct aspeed_sgpio {
-	struct gpio_chip chip;
-	struct clk *pclk;
-	spinlock_t lock;
-	void __iomem *base;
-	uint32_t dir_in[3];
-	int irq;
-};
-
-struct aspeed_sgpio_bank {
-	uint16_t    val_regs;
-	uint16_t    rdata_reg;
-	uint16_t    irq_regs;
-	const char  names[4][3];
-};
-
-/*
- * Note: The "value" register returns the input value when the GPIO is
- *	 configured as an input.
- *
- *	 The "rdata" register returns the output value when the GPIO is
- *	 configured as an output.
- */
-static const struct aspeed_sgpio_bank aspeed_sgpio_banks[] = {
-	{
-		.val_regs = 0x0000,
-		.rdata_reg = 0x0070,
-		.irq_regs = 0x0004,
-		.names = { "A", "B", "C", "D" },
-	},
-	{
-		.val_regs = 0x001C,
-		.rdata_reg = 0x0074,
-		.irq_regs = 0x0020,
-		.names = { "E", "F", "G", "H" },
-	},
-	{
-		.val_regs = 0x0038,
-		.rdata_reg = 0x0078,
-		.irq_regs = 0x003C,
-		.names = { "I", "J" },
-	},
-};
-
-enum aspeed_sgpio_reg {
-	reg_val,
-	reg_rdata,
-	reg_irq_enable,
-	reg_irq_type0,
-	reg_irq_type1,
-	reg_irq_type2,
-	reg_irq_status,
-};
-
-#define GPIO_VAL_VALUE      0x00
-#define GPIO_IRQ_ENABLE     0x00
-#define GPIO_IRQ_TYPE0      0x04
-#define GPIO_IRQ_TYPE1      0x08
-#define GPIO_IRQ_TYPE2      0x0C
-#define GPIO_IRQ_STATUS     0x10
-
-static void __iomem *bank_reg(struct aspeed_sgpio *gpio,
-				     const struct aspeed_sgpio_bank *bank,
-				     const enum aspeed_sgpio_reg reg)
-{
-	switch (reg) {
-	case reg_val:
-		return gpio->base + bank->val_regs + GPIO_VAL_VALUE;
-	case reg_rdata:
-		return gpio->base + bank->rdata_reg;
-	case reg_irq_enable:
-		return gpio->base + bank->irq_regs + GPIO_IRQ_ENABLE;
-	case reg_irq_type0:
-		return gpio->base + bank->irq_regs + GPIO_IRQ_TYPE0;
-	case reg_irq_type1:
-		return gpio->base + bank->irq_regs + GPIO_IRQ_TYPE1;
-	case reg_irq_type2:
-		return gpio->base + bank->irq_regs + GPIO_IRQ_TYPE2;
-	case reg_irq_status:
-		return gpio->base + bank->irq_regs + GPIO_IRQ_STATUS;
-	default:
-		/* acturally if code runs to here, it's an error case */
-		BUG_ON(1);
-	}
-}
-
-#define GPIO_BANK(x)    ((x) >> 5)
-#define GPIO_OFFSET(x)  ((x) & 0x1f)
-#define GPIO_BIT(x)     BIT(GPIO_OFFSET(x))
-
-static const struct aspeed_sgpio_bank *to_bank(unsigned int offset)
-{
-	unsigned int bank = GPIO_BANK(offset);
-
-	WARN_ON(bank >= ARRAY_SIZE(aspeed_sgpio_banks));
-	return &aspeed_sgpio_banks[bank];
-}
-
-static int aspeed_sgpio_get(struct gpio_chip *gc, unsigned int offset)
-{
-	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
-	const struct aspeed_sgpio_bank *bank = to_bank(offset);
-	unsigned long flags;
-	enum aspeed_sgpio_reg reg;
-	bool is_input;
-	int rc = 0;
-
-	spin_lock_irqsave(&gpio->lock, flags);
-
-	is_input = gpio->dir_in[GPIO_BANK(offset)] & GPIO_BIT(offset);
-	reg = is_input ? reg_val : reg_rdata;
-	rc = !!(ioread32(bank_reg(gpio, bank, reg)) & GPIO_BIT(offset));
-
-	spin_unlock_irqrestore(&gpio->lock, flags);
-
-	return rc;
-}
-
-static void sgpio_set_value(struct gpio_chip *gc, unsigned int offset, int val)
-{
-	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
-	const struct aspeed_sgpio_bank *bank = to_bank(offset);
-	void __iomem *addr;
-	u32 reg = 0;
-
-	addr = bank_reg(gpio, bank, reg_val);
-	reg = ioread32(addr);
-
-	if (val)
-		reg |= GPIO_BIT(offset);
-	else
-		reg &= ~GPIO_BIT(offset);
-
-	iowrite32(reg, addr);
-}
-
-static void aspeed_sgpio_set(struct gpio_chip *gc, unsigned int offset, int val)
-{
-	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
-	unsigned long flags;
-
-	spin_lock_irqsave(&gpio->lock, flags);
-
-	sgpio_set_value(gc, offset, val);
-
-	spin_unlock_irqrestore(&gpio->lock, flags);
-}
-
-static int aspeed_sgpio_dir_in(struct gpio_chip *gc, unsigned int offset)
-{
-	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
-	unsigned long flags;
-
-	spin_lock_irqsave(&gpio->lock, flags);
-	gpio->dir_in[GPIO_BANK(offset)] |= GPIO_BIT(offset);
-	spin_unlock_irqrestore(&gpio->lock, flags);
-
-	return 0;
-}
-
-static int aspeed_sgpio_dir_out(struct gpio_chip *gc, unsigned int offset, int val)
-{
-	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
-	unsigned long flags;
-
-	spin_lock_irqsave(&gpio->lock, flags);
-
-	gpio->dir_in[GPIO_BANK(offset)] &= ~GPIO_BIT(offset);
-	sgpio_set_value(gc, offset, val);
-
-	spin_unlock_irqrestore(&gpio->lock, flags);
-
-	return 0;
-}
-
-static int aspeed_sgpio_get_direction(struct gpio_chip *gc, unsigned int offset)
-{
-	int dir_status;
-	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
-	unsigned long flags;
-
-	spin_lock_irqsave(&gpio->lock, flags);
-	dir_status = gpio->dir_in[GPIO_BANK(offset)] & GPIO_BIT(offset);
-	spin_unlock_irqrestore(&gpio->lock, flags);
-
-	return dir_status;
-
-}
-
-static void irqd_to_aspeed_sgpio_data(struct irq_data *d,
-					struct aspeed_sgpio **gpio,
-					const struct aspeed_sgpio_bank **bank,
-					u32 *bit, int *offset)
-{
-	struct aspeed_sgpio *internal;
-
-	*offset = irqd_to_hwirq(d);
-	internal = irq_data_get_irq_chip_data(d);
-	WARN_ON(!internal);
-
-	*gpio = internal;
-	*bank = to_bank(*offset);
-	*bit = GPIO_BIT(*offset);
-}
-
-static void aspeed_sgpio_irq_ack(struct irq_data *d)
-{
-	const struct aspeed_sgpio_bank *bank;
-	struct aspeed_sgpio *gpio;
-	unsigned long flags;
-	void __iomem *status_addr;
-	int offset;
-	u32 bit;
-
-	irqd_to_aspeed_sgpio_data(d, &gpio, &bank, &bit, &offset);
-
-	status_addr = bank_reg(gpio, bank, reg_irq_status);
-
-	spin_lock_irqsave(&gpio->lock, flags);
-
-	iowrite32(bit, status_addr);
-
-	spin_unlock_irqrestore(&gpio->lock, flags);
-}
-
-static void aspeed_sgpio_irq_set_mask(struct irq_data *d, bool set)
-{
-	const struct aspeed_sgpio_bank *bank;
-	struct aspeed_sgpio *gpio;
-	unsigned long flags;
-	u32 reg, bit;
-	void __iomem *addr;
-	int offset;
-
-	irqd_to_aspeed_sgpio_data(d, &gpio, &bank, &bit, &offset);
-	addr = bank_reg(gpio, bank, reg_irq_enable);
-
-	spin_lock_irqsave(&gpio->lock, flags);
-
-	reg = ioread32(addr);
-	if (set)
-		reg |= bit;
-	else
-		reg &= ~bit;
-
-	iowrite32(reg, addr);
-
-	spin_unlock_irqrestore(&gpio->lock, flags);
-}
-
-static void aspeed_sgpio_irq_mask(struct irq_data *d)
-{
-	aspeed_sgpio_irq_set_mask(d, false);
-}
-
-static void aspeed_sgpio_irq_unmask(struct irq_data *d)
-{
-	aspeed_sgpio_irq_set_mask(d, true);
-}
-
-static int aspeed_sgpio_set_type(struct irq_data *d, unsigned int type)
-{
-	u32 type0 = 0;
-	u32 type1 = 0;
-	u32 type2 = 0;
-	u32 bit, reg;
-	const struct aspeed_sgpio_bank *bank;
-	irq_flow_handler_t handler;
-	struct aspeed_sgpio *gpio;
-	unsigned long flags;
-	void __iomem *addr;
-	int offset;
-
-	irqd_to_aspeed_sgpio_data(d, &gpio, &bank, &bit, &offset);
-
-	switch (type & IRQ_TYPE_SENSE_MASK) {
-	case IRQ_TYPE_EDGE_BOTH:
-		type2 |= bit;
-		/* fall through */
-	case IRQ_TYPE_EDGE_RISING:
-		type0 |= bit;
-		/* fall through */
-	case IRQ_TYPE_EDGE_FALLING:
-		handler = handle_edge_irq;
-		break;
-	case IRQ_TYPE_LEVEL_HIGH:
-		type0 |= bit;
-		/* fall through */
-	case IRQ_TYPE_LEVEL_LOW:
-		type1 |= bit;
-		handler = handle_level_irq;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	spin_lock_irqsave(&gpio->lock, flags);
-
-	addr = bank_reg(gpio, bank, reg_irq_type0);
-	reg = ioread32(addr);
-	reg = (reg & ~bit) | type0;
-	iowrite32(reg, addr);
-
-	addr = bank_reg(gpio, bank, reg_irq_type1);
-	reg = ioread32(addr);
-	reg = (reg & ~bit) | type1;
-	iowrite32(reg, addr);
-
-	addr = bank_reg(gpio, bank, reg_irq_type2);
-	reg = ioread32(addr);
-	reg = (reg & ~bit) | type2;
-	iowrite32(reg, addr);
-
-	spin_unlock_irqrestore(&gpio->lock, flags);
-
-	irq_set_handler_locked(d, handler);
-
-	return 0;
-}
-
-static void aspeed_sgpio_irq_handler(struct irq_desc *desc)
-{
-	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
-	struct irq_chip *ic = irq_desc_get_chip(desc);
-	struct aspeed_sgpio *data = gpiochip_get_data(gc);
-	unsigned int i, p, girq;
-	unsigned long reg;
-
-	chained_irq_enter(ic, desc);
-
-	for (i = 0; i < ARRAY_SIZE(aspeed_sgpio_banks); i++) {
-		const struct aspeed_sgpio_bank *bank = &aspeed_sgpio_banks[i];
-
-		reg = ioread32(bank_reg(data, bank, reg_irq_status));
-
-		for_each_set_bit(p, &reg, 32) {
-			girq = irq_find_mapping(gc->irq.domain, i * 32 + p);
-			generic_handle_irq(girq);
-		}
-
-	}
-
-	chained_irq_exit(ic, desc);
-}
-
-static struct irq_chip aspeed_sgpio_irqchip = {
-	.name       = "aspeed-sgpio",
-	.irq_ack    = aspeed_sgpio_irq_ack,
-	.irq_mask   = aspeed_sgpio_irq_mask,
-	.irq_unmask = aspeed_sgpio_irq_unmask,
-	.irq_set_type   = aspeed_sgpio_set_type,
-};
-
-static int aspeed_sgpio_setup_irqs(struct aspeed_sgpio *gpio,
-				   struct platform_device *pdev)
-{
-	int rc, i;
-	const struct aspeed_sgpio_bank *bank;
-	struct gpio_irq_chip *irq;
-
-	rc = platform_get_irq(pdev, 0);
-	if (rc < 0)
-		return rc;
-
-	gpio->irq = rc;
-
-	/* Disable IRQ and clear Interrupt status registers for all SPGIO Pins. */
-	for (i = 0; i < ARRAY_SIZE(aspeed_sgpio_banks); i++) {
-		bank =  &aspeed_sgpio_banks[i];
-		/* disable irq enable bits */
-		iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_enable));
-		/* clear status bits */
-		iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_status));
-	}
-
-	irq = &gpio->chip.irq;
-	irq->chip = &aspeed_sgpio_irqchip;
-	irq->handler = handle_bad_irq;
-	irq->default_type = IRQ_TYPE_NONE;
-	irq->parent_handler = aspeed_sgpio_irq_handler;
-	irq->parent_handler_data = gpio;
-	irq->parents = &gpio->irq;
-	irq->num_parents = 1;
-
-	/* set IRQ settings and Enable Interrupt */
-	for (i = 0; i < ARRAY_SIZE(aspeed_sgpio_banks); i++) {
-		bank = &aspeed_sgpio_banks[i];
-		/* set falling or level-low irq */
-		iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type0));
-		/* trigger type is edge */
-		iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type1));
-		/* dual edge trigger mode. */
-		iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_type2));
-		/* enable irq */
-		iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_enable));
-	}
-
-	return 0;
-}
-
-static const struct of_device_id aspeed_sgpio_of_table[] = {
-	{ .compatible = "aspeed,ast2400-sgpio" },
-	{ .compatible = "aspeed,ast2500-sgpio" },
-	{}
-};
-
-MODULE_DEVICE_TABLE(of, aspeed_sgpio_of_table);
-
-static int __init aspeed_sgpio_probe(struct platform_device *pdev)
-{
-	struct aspeed_sgpio *gpio;
-	u32 nr_gpios, sgpio_freq, sgpio_clk_div;
-	int rc;
-	unsigned long apb_freq;
-
-	gpio = devm_kzalloc(&pdev->dev, sizeof(*gpio), GFP_KERNEL);
-	if (!gpio)
-		return -ENOMEM;
-
-	gpio->base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(gpio->base))
-		return PTR_ERR(gpio->base);
-
-	rc = of_property_read_u32(pdev->dev.of_node, "ngpios", &nr_gpios);
-	if (rc < 0) {
-		dev_err(&pdev->dev, "Could not read ngpios property\n");
-		return -EINVAL;
-	} else if (nr_gpios > MAX_NR_SGPIO) {
-		dev_err(&pdev->dev, "Number of GPIOs exceeds the maximum of %d: %d\n",
-			MAX_NR_SGPIO, nr_gpios);
-		return -EINVAL;
-	}
-
-	rc = of_property_read_u32(pdev->dev.of_node, "bus-frequency", &sgpio_freq);
-	if (rc < 0) {
-		dev_err(&pdev->dev, "Could not read bus-frequency property\n");
-		return -EINVAL;
-	}
-
-	gpio->pclk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(gpio->pclk)) {
-		dev_err(&pdev->dev, "devm_clk_get failed\n");
-		return PTR_ERR(gpio->pclk);
-	}
-
-	apb_freq = clk_get_rate(gpio->pclk);
-
-	/*
-	 * From the datasheet,
-	 *	SGPIO period = 1/PCLK * 2 * (GPIO254[31:16] + 1)
-	 *	period = 2 * (GPIO254[31:16] + 1) / PCLK
-	 *	frequency = 1 / (2 * (GPIO254[31:16] + 1) / PCLK)
-	 *	frequency = PCLK / (2 * (GPIO254[31:16] + 1))
-	 *	frequency * 2 * (GPIO254[31:16] + 1) = PCLK
-	 *	GPIO254[31:16] = PCLK / (frequency * 2) - 1
-	 */
-	if (sgpio_freq == 0)
-		return -EINVAL;
-
-	sgpio_clk_div = (apb_freq / (sgpio_freq * 2)) - 1;
-
-	if (sgpio_clk_div > (1 << 16) - 1)
-		return -EINVAL;
-
-	iowrite32(FIELD_PREP(ASPEED_SGPIO_CLK_DIV_MASK, sgpio_clk_div) |
-		  FIELD_PREP(ASPEED_SGPIO_PINS_MASK, (nr_gpios / 8)) |
-		  ASPEED_SGPIO_ENABLE,
-		  gpio->base + ASPEED_SGPIO_CTRL);
-
-	spin_lock_init(&gpio->lock);
-
-	gpio->chip.parent = &pdev->dev;
-	gpio->chip.ngpio = nr_gpios;
-	gpio->chip.direction_input = aspeed_sgpio_dir_in;
-	gpio->chip.direction_output = aspeed_sgpio_dir_out;
-	gpio->chip.get_direction = aspeed_sgpio_get_direction;
-	gpio->chip.request = NULL;
-	gpio->chip.free = NULL;
-	gpio->chip.get = aspeed_sgpio_get;
-	gpio->chip.set = aspeed_sgpio_set;
-	gpio->chip.set_config = NULL;
-	gpio->chip.label = dev_name(&pdev->dev);
-	gpio->chip.base = -1;
-
-	/* set all SGPIO pins as input (1). */
-	memset(gpio->dir_in, 0xff, sizeof(gpio->dir_in));
-
-	aspeed_sgpio_setup_irqs(gpio, pdev);
-
-	rc = devm_gpiochip_add_data(&pdev->dev, &gpio->chip, gpio);
-	if (rc < 0)
-		return rc;
-
-	return 0;
-}
-
-static struct platform_driver aspeed_sgpio_driver = {
-	.driver = {
-		.name = KBUILD_MODNAME,
-		.of_match_table = aspeed_sgpio_of_table,
-	},
-};
-
-module_platform_driver_probe(aspeed_sgpio_driver, aspeed_sgpio_probe);
-MODULE_DESCRIPTION("Aspeed Serial GPIO Driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index e67c194c2aca..d0aa6cff2e02 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -54,6 +54,9 @@ config DRM_DEBUG_MM
 
 	  If in doubt, say "N".
 
+config DRM_EXPORT_FOR_TESTS
+	bool
+
 config DRM_DEBUG_SELFTEST
 	tristate "kselftests for DRM"
 	depends on DRM
@@ -61,6 +64,7 @@ config DRM_DEBUG_SELFTEST
 	select PRIME_NUMBERS
 	select DRM_LIB_RANDOM
 	select DRM_KMS_HELPER
+	select DRM_EXPORT_FOR_TESTS if m
 	default n
 	help
 	  This option provides kernel modules that can be used to run
@@ -93,6 +97,21 @@ config DRM_KMS_FB_HELPER
 	help
 	  FBDEV helpers for KMS drivers.
 
+config DRM_DEBUG_DP_MST_TOPOLOGY_REFS
+        bool "Enable refcount backtrace history in the DP MST helpers"
+	depends on STACKTRACE_SUPPORT
+        select STACKDEPOT
+        depends on DRM_KMS_HELPER
+        depends on DEBUG_KERNEL
+        depends on EXPERT
+        help
+          Enables debug tracing for topology refs in DRM's DP MST helpers. A
+          history of each topology reference/dereference will be printed to the
+          kernel log once a port or branch device's topology refcount reaches 0.
+
+          This has the potential to use a lot of memory and print some very
+          large kernel messages. If in doubt, say "N".
+
 config DRM_FBDEV_EMULATION
 	bool "Enable legacy fbdev support for your modesetting driver"
 	depends on DRM
@@ -149,6 +168,7 @@ config DRM_LOAD_EDID_FIRMWARE
 
 config DRM_DP_CEC
 	bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support"
+	depends on DRM
 	select CEC_CORE
 	help
 	  Choose this option if you want to enable HDMI CEC support for
@@ -165,13 +185,26 @@ config DRM_TTM
 	  GPU memory types. Will be enabled automatically if a device driver
 	  uses it.
 
+config DRM_TTM_DMA_PAGE_POOL
+	bool
+	depends on DRM_TTM && (SWIOTLB || INTEL_IOMMU)
+	default y
+	help
+	  Choose this if you need the TTM dma page pool
+
 config DRM_VRAM_HELPER
 	tristate
 	depends on DRM
-	select DRM_TTM
 	help
 	  Helpers for VRAM memory management
 
+config DRM_TTM_HELPER
+	tristate
+	depends on DRM
+	select DRM_TTM
+	help
+	  Helpers for ttm-based gem objects
+
 config DRM_GEM_CMA_HELPER
 	bool
 	depends on DRM
@@ -226,9 +259,9 @@ config DRM_AMDGPU
 	tristate "AMD GPU"
 	depends on DRM && PCI && MMU
 	select FW_LOADER
-        select DRM_KMS_HELPER
+	select DRM_KMS_HELPER
 	select DRM_SCHED
-        select DRM_TTM
+	select DRM_TTM
 	select POWER_SUPPLY
 	select HWMON
 	select BACKLIGHT_CLASS_DEVICE
@@ -257,6 +290,7 @@ config DRM_VKMS
 	tristate "Virtual KMS (EXPERIMENTAL)"
 	depends on DRM
 	select DRM_KMS_HELPER
+	select CRC32
 	default n
 	help
 	  Virtual Kernel Mode-Setting (VKMS) is used for testing or for
@@ -265,9 +299,6 @@ config DRM_VKMS
 
 	  If M is selected the module will be called vkms.
 
-config DRM_ATI_PCIGART
-        bool
-
 source "drivers/gpu/drm/exynos/Kconfig"
 
 source "drivers/gpu/drm/rockchip/Kconfig"
@@ -364,7 +395,6 @@ menuconfig DRM_LEGACY
 	bool "Enable legacy drivers (DANGEROUS)"
 	depends on DRM && MMU
 	select DRM_VM
-	select DRM_ATI_PCIGART if PCI
 	help
 	  Enable legacy DRI1 drivers. Those drivers expose unsafe and dangerous
 	  APIs to user-space, which can be used to circumvent access
@@ -397,7 +427,7 @@ config DRM_R128
 
 config DRM_I810
 	tristate "Intel I810"
-	# !PREEMPT because of missing ioctl locking
+	# !PREEMPTION because of missing ioctl locking
 	depends on DRM && AGP && AGP_INTEL && (!PREEMPTION || BROKEN)
 	help
 	  Choose this option if you have an Intel I810 graphics card.  If M is
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 82ff826b33cc..6493088a0fdd 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -5,7 +5,7 @@
 
 drm-y       :=	drm_auth.o drm_cache.o \
 		drm_file.o drm_gem.o drm_ioctl.o drm_irq.o \
-		drm_memory.o drm_drv.o drm_pci.o \
+		drm_memory.o drm_drv.o \
 		drm_sysfs.o drm_hashtab.o drm_mm.o \
 		drm_crtc.o drm_fourcc.o drm_modes.o drm_edid.o \
 		drm_encoder_slave.o \
@@ -25,18 +25,20 @@ drm-$(CONFIG_DRM_VM) += drm_vm.o
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 drm-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_gem_cma_helper.o
 drm-$(CONFIG_DRM_GEM_SHMEM_HELPER) += drm_gem_shmem_helper.o
-drm-$(CONFIG_DRM_ATI_PCIGART) += ati_pcigart.o
 drm-$(CONFIG_DRM_PANEL) += drm_panel.o
 drm-$(CONFIG_OF) += drm_of.o
 drm-$(CONFIG_AGP) += drm_agpsupport.o
+drm-$(CONFIG_PCI) += drm_pci.o
 drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o
 drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
 
 drm_vram_helper-y := drm_gem_vram_helper.o \
-		     drm_vram_helper_common.o \
-		     drm_vram_mm_helper.o
+		     drm_vram_helper_common.o
 obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
 
+drm_ttm_helper-y := drm_gem_ttm_helper.o
+obj-$(CONFIG_DRM_TTM_HELPER) += drm_ttm_helper.o
+
 drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_dsc.o drm_probe_helper.o \
 		drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
 		drm_kms_helper_common.o drm_dp_dual_mode_helper.o \
diff --git a/drivers/gpu/drm/amd/acp/Kconfig b/drivers/gpu/drm/amd/acp/Kconfig
index d968c2471412..13340f353ea8 100644
--- a/drivers/gpu/drm/amd/acp/Kconfig
+++ b/drivers/gpu/drm/amd/acp/Kconfig
@@ -1,12 +1,12 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: MIT
 menu "ACP (Audio CoProcessor) Configuration"
 
 config DRM_AMD_ACP
-       bool "Enable AMD Audio CoProcessor IP support"
-       depends on DRM_AMDGPU
-       select MFD_CORE
-       select PM_GENERIC_DOMAINS if PM
-       help
+	bool "Enable AMD Audio CoProcessor IP support"
+	depends on DRM_AMDGPU
+	select MFD_CORE
+	select PM_GENERIC_DOMAINS if PM
+	help
 	Choose this option to enable ACP IP support for AMD SOCs.
 	This adds the ACP (Audio CoProcessor) IP driver and wires
 	it up into the amdgpu driver.  The ACP block provides the DMA
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 2e98c016cb47..9375e7f12420 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: MIT
 config DRM_AMDGPU_SI
 	bool "Enable amdgpu support for SI parts"
 	depends on DRM_AMDGPU
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 00962a659009..7ae3b22c5628 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -53,8 +53,9 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
 	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
 	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
-	amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
-	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o
+	amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
+	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
+	amdgpu_umc.o smu_v11_0_i2c.o
 
 amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
 
@@ -67,7 +68,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
 amdgpu-y += \
 	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
 	vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \
-	arct_reg_init.o navi12_reg_init.o
+	arct_reg_init.o navi12_reg_init.o mxgpu_nv.o
 
 # add DF block
 amdgpu-y += \
@@ -83,7 +84,7 @@ amdgpu-y += \
 
 # add UMC block
 amdgpu-y += \
-	umc_v6_1.o
+	umc_v6_1.o umc_v6_0.o
 
 # add IH block
 amdgpu-y += \
@@ -146,12 +147,16 @@ amdgpu-y += \
 	vce_v3_0.o \
 	vce_v4_0.o
 
-# add VCN block
+# add VCN and JPEG block
 amdgpu-y += \
 	amdgpu_vcn.o \
 	vcn_v1_0.o \
 	vcn_v2_0.o \
-	vcn_v2_5.o
+	vcn_v2_5.o \
+	amdgpu_jpeg.o \
+	jpeg_v1_0.o \
+	jpeg_v2_0.o \
+	jpeg_v2_5.o
 
 # add ATHUB block
 amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index bd37df5dd6d0..b1bb10625cd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -69,10 +69,12 @@
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
 #include "amdgpu_vcn.h"
+#include "amdgpu_jpeg.h"
 #include "amdgpu_mn.h"
 #include "amdgpu_gmc.h"
 #include "amdgpu_gfx.h"
 #include "amdgpu_sdma.h"
+#include "amdgpu_nbio.h"
 #include "amdgpu_dm.h"
 #include "amdgpu_virt.h"
 #include "amdgpu_csa.h"
@@ -88,6 +90,7 @@
 #include "amdgpu_mes.h"
 #include "amdgpu_umc.h"
 #include "amdgpu_mmhub.h"
+#include "amdgpu_df.h"
 
 #define MAX_GPU_INSTANCE		16
 
@@ -106,6 +109,8 @@ struct amdgpu_mgpu_info
 	uint32_t			num_apu;
 };
 
+#define AMDGPU_MAX_TIMEOUT_PARAM_LENGTH	256
+
 /*
  * Modules parameters.
  */
@@ -122,6 +127,7 @@ extern int amdgpu_disp_priority;
 extern int amdgpu_hw_i2c;
 extern int amdgpu_pcie_gen2;
 extern int amdgpu_msi;
+extern char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
 extern int amdgpu_dpm;
 extern int amdgpu_fw_load_type;
 extern int amdgpu_aspm;
@@ -135,6 +141,7 @@ extern int amdgpu_vm_fragment_size;
 extern int amdgpu_vm_fault_stop;
 extern int amdgpu_vm_debug;
 extern int amdgpu_vm_update_mode;
+extern int amdgpu_exp_hw_support;
 extern int amdgpu_dc;
 extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_hw_submission;
@@ -146,11 +153,7 @@ extern uint amdgpu_sdma_phase_quantum;
 extern char *amdgpu_disable_cu;
 extern char *amdgpu_virtual_display;
 extern uint amdgpu_pp_feature_mask;
-extern int amdgpu_ngg;
-extern int amdgpu_prim_buf_per_se;
-extern int amdgpu_pos_buf_per_se;
-extern int amdgpu_cntl_sb_buf_per_se;
-extern int amdgpu_param_buf_per_se;
+extern uint amdgpu_force_long_training;
 extern int amdgpu_job_hang_limit;
 extern int amdgpu_lbpw;
 extern int amdgpu_compute_multipipe;
@@ -167,6 +170,12 @@ extern int amdgpu_mcbp;
 extern int amdgpu_discovery;
 extern int amdgpu_mes;
 extern int amdgpu_noretry;
+extern int amdgpu_force_asic_type;
+#ifdef CONFIG_HSA_AMD
+extern int sched_policy;
+#else
+static const int sched_policy = KFD_SCHED_POLICY_HWS;
+#endif
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
@@ -283,6 +292,9 @@ struct amdgpu_ip_block_version {
 	const struct amd_ip_funcs *funcs;
 };
 
+#define HW_REV(_Major, _Minor, _Rev) \
+	((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev)))
+
 struct amdgpu_ip_block {
 	struct amdgpu_ip_block_status status;
 	const struct amdgpu_ip_block_version *version;
@@ -425,7 +437,6 @@ struct amdgpu_fpriv {
 };
 
 int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
-int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib);
@@ -477,7 +488,6 @@ struct amdgpu_cs_parser {
 	uint64_t			bytes_moved_vis_threshold;
 	uint64_t			bytes_moved;
 	uint64_t			bytes_moved_vis;
-	struct amdgpu_bo_list_entry	*evictable;
 
 	/* user fence */
 	struct amdgpu_bo_list_entry	uf_entry;
@@ -580,6 +590,8 @@ struct amdgpu_asic_funcs {
 	bool (*need_reset_on_init)(struct amdgpu_device *adev);
 	/* PCIe replay counter */
 	uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
+	/* device supports BACO */
+	bool (*supports_baco)(struct amdgpu_device *adev);
 };
 
 /*
@@ -624,6 +636,10 @@ struct amdgpu_fw_vram_usage {
 	u64 size;
 	struct amdgpu_bo *reserved_bo;
 	void *va;
+
+	/* GDDR6 training support flag.
+	*/
+	bool mem_train_support;
 };
 
 /*
@@ -644,91 +660,11 @@ typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
 typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
 typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
 
-
-/*
- * amdgpu nbio functions
- *
- */
-struct nbio_hdp_flush_reg {
-	u32 ref_and_mask_cp0;
-	u32 ref_and_mask_cp1;
-	u32 ref_and_mask_cp2;
-	u32 ref_and_mask_cp3;
-	u32 ref_and_mask_cp4;
-	u32 ref_and_mask_cp5;
-	u32 ref_and_mask_cp6;
-	u32 ref_and_mask_cp7;
-	u32 ref_and_mask_cp8;
-	u32 ref_and_mask_cp9;
-	u32 ref_and_mask_sdma0;
-	u32 ref_and_mask_sdma1;
-	u32 ref_and_mask_sdma2;
-	u32 ref_and_mask_sdma3;
-	u32 ref_and_mask_sdma4;
-	u32 ref_and_mask_sdma5;
-	u32 ref_and_mask_sdma6;
-	u32 ref_and_mask_sdma7;
-};
-
 struct amdgpu_mmio_remap {
 	u32 reg_offset;
 	resource_size_t bus_addr;
 };
 
-struct amdgpu_nbio_funcs {
-	const struct nbio_hdp_flush_reg *hdp_flush_reg;
-	u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
-	u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
-	u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
-	u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
-	u32 (*get_rev_id)(struct amdgpu_device *adev);
-	void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
-	void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
-	u32 (*get_memsize)(struct amdgpu_device *adev);
-	void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
-			bool use_doorbell, int doorbell_index, int doorbell_size);
-	void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
-				   int doorbell_index, int instance);
-	void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
-					 bool enable);
-	void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
-						  bool enable);
-	void (*ih_doorbell_range)(struct amdgpu_device *adev,
-				  bool use_doorbell, int doorbell_index);
-	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
-						 bool enable);
-	void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
-						bool enable);
-	void (*get_clockgating_state)(struct amdgpu_device *adev,
-				      u32 *flags);
-	void (*ih_control)(struct amdgpu_device *adev);
-	void (*init_registers)(struct amdgpu_device *adev);
-	void (*detect_hw_virt)(struct amdgpu_device *adev);
-	void (*remap_hdp_registers)(struct amdgpu_device *adev);
-};
-
-struct amdgpu_df_funcs {
-	void (*sw_init)(struct amdgpu_device *adev);
-	void (*enable_broadcast_mode)(struct amdgpu_device *adev,
-				      bool enable);
-	u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
-	u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
-	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
-						 bool enable);
-	void (*get_clockgating_state)(struct amdgpu_device *adev,
-				      u32 *flags);
-	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
-					    bool enable);
-	int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
-					 int is_enable);
-	int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
-					 int is_disable);
-	void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
-					 uint64_t *count);
-	uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
-	void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
-			 uint32_t ficadl_val, uint32_t ficadh_val);
-};
 /* Define the HW IP blocks will be used in driver , add more if necessary */
 enum amd_hw_ip_block_type {
 	GC_HWIP = 1,
@@ -748,6 +684,7 @@ enum amd_hw_ip_block_type {
 	MP1_HWIP,
 	UVD_HWIP,
 	VCN_HWIP = UVD_HWIP,
+	JPEG_HWIP = VCN_HWIP,
 	VCE_HWIP,
 	DF_HWIP,
 	DCE_HWIP,
@@ -813,6 +750,7 @@ struct amdgpu_device {
 	uint8_t				*bios;
 	uint32_t			bios_size;
 	struct amdgpu_bo		*stolen_vga_memory;
+	struct amdgpu_bo		*discovery_memory;
 	uint32_t			bios_scratch_reg_offset;
 	uint32_t			bios_scratch[AMDGPU_BIOS_NUM_SCRATCH];
 
@@ -921,6 +859,12 @@ struct amdgpu_device {
 	u32				cg_flags;
 	u32				pg_flags;
 
+	/* nbio */
+	struct amdgpu_nbio		nbio;
+
+	/* mmhub */
+	struct amdgpu_mmhub		mmhub;
+
 	/* gfx */
 	struct amdgpu_gfx		gfx;
 
@@ -936,6 +880,9 @@ struct amdgpu_device {
 	/* vcn */
 	struct amdgpu_vcn		vcn;
 
+	/* jpeg */
+	struct amdgpu_jpeg		jpeg;
+
 	/* firmwares */
 	struct amdgpu_firmware		firmware;
 
@@ -961,6 +908,9 @@ struct amdgpu_device {
 	bool                            enable_mes;
 	struct amdgpu_mes               mes;
 
+	/* df */
+	struct amdgpu_df                df;
+
 	struct amdgpu_ip_block          ip_blocks[AMDGPU_MAX_IP_NUM];
 	int				num_ip_blocks;
 	struct mutex	mn_lock;
@@ -974,10 +924,6 @@ struct amdgpu_device {
 	/* soc15 register offset based on ip, instance and  segment */
 	uint32_t 		*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
 
-	const struct amdgpu_nbio_funcs	*nbio_funcs;
-	const struct amdgpu_df_funcs	*df_funcs;
-	const struct amdgpu_mmhub_funcs	*mmhub_funcs;
-
 	/* delayed work_func for deferring clockgating during resume */
 	struct delayed_work     delayed_init_work;
 
@@ -1006,11 +952,11 @@ struct amdgpu_device {
 	struct mutex  lock_reset;
 	struct amdgpu_doorbell_index doorbell_index;
 
+	struct mutex			notifier_lock;
+
 	int asic_reset_res;
 	struct work_struct		xgmi_reset_work;
 
-	bool                            in_baco_reset;
-
 	long				gfx_timeout;
 	long				sdma_timeout;
 	long				video_timeout;
@@ -1018,6 +964,14 @@ struct amdgpu_device {
 
 	uint64_t			unique_id;
 	uint64_t	df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+	/* device pstate */
+	int				pstate;
+	/* enable runtime pm on the device */
+	bool                            runpm;
+
+	bool                            pm_sysfs_en;
+	bool                            ucode_sysfs_en;
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1032,6 +986,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 void amdgpu_device_fini(struct amdgpu_device *adev);
 int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
 
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+			       uint32_t *buf, size_t size, bool write);
 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 			uint32_t acc_flags);
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
@@ -1151,6 +1107,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
 #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
 #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
+#define amdgpu_asic_supports_baco(adev) (adev)->asic_funcs->supports_baco((adev))
+
 #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
 
 /* Common functions */
@@ -1167,9 +1125,12 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 					     const u32 *registers,
 					     const u32 array_size);
 
-bool amdgpu_device_is_px(struct drm_device *dev);
+bool amdgpu_device_supports_boco(struct drm_device *dev);
+bool amdgpu_device_supports_baco(struct drm_device *dev);
 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
 				      struct amdgpu_device *peer_adev);
+int amdgpu_device_baco_enter(struct drm_device *dev);
+int amdgpu_device_baco_exit(struct drm_device *dev);
 
 /* atpx handler */
 #if defined(CONFIG_VGA_SWITCHEROO)
@@ -1207,8 +1168,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
 void amdgpu_driver_postclose_kms(struct drm_device *dev,
 				 struct drm_file *file_priv);
 int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
-int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon);
-int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
+int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
+int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
 u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
 int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
 void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 07eb29885372..8609287620ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -63,45 +63,10 @@ void amdgpu_amdkfd_fini(void)
 
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
-	const struct kfd2kgd_calls *kfd2kgd;
-
-	switch (adev->asic_type) {
-#ifdef CONFIG_DRM_AMDGPU_CIK
-	case CHIP_KAVERI:
-	case CHIP_HAWAII:
-		kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
-		break;
-#endif
-	case CHIP_CARRIZO:
-	case CHIP_TONGA:
-	case CHIP_FIJI:
-	case CHIP_POLARIS10:
-	case CHIP_POLARIS11:
-	case CHIP_POLARIS12:
-	case CHIP_VEGAM:
-		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
-		break;
-	case CHIP_VEGA10:
-	case CHIP_VEGA12:
-	case CHIP_VEGA20:
-	case CHIP_RAVEN:
-		kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
-		break;
-	case CHIP_ARCTURUS:
-		kfd2kgd = amdgpu_amdkfd_arcturus_get_functions();
-		break;
-	case CHIP_NAVI10:
-	case CHIP_NAVI14:
-	case CHIP_NAVI12:
-		kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions();
-		break;
-	default:
-		dev_info(adev->dev, "kfd not supported on this ASIC\n");
-		return;
-	}
+	bool vf = amdgpu_sriov_vf(adev);
 
 	adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
-				      adev->pdev, kfd2kgd);
+				      adev->pdev, adev->asic_type, vf);
 
 	if (adev->kfd.dev)
 		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
@@ -165,14 +130,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 				  adev->gfx.mec.queue_bitmap,
 				  KGD_MAX_QUEUES);
 
-		/* remove the KIQ bit as well */
-		if (adev->gfx.kiq.ring.sched.ready)
-			clear_bit(amdgpu_gfx_mec_queue_to_bit(adev,
-							  adev->gfx.kiq.ring.me - 1,
-							  adev->gfx.kiq.ring.pipe,
-							  adev->gfx.kiq.ring.queue),
-				  gpu_resources.queue_bitmap);
-
 		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
 		 * nbits is not compile time constant
 		 */
@@ -202,7 +159,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 					adev->doorbell_index.last_non_cp;
 		}
 
-		kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
+		kgd2kfd_device_init(adev->kfd.dev, adev->ddev, &gpu_resources);
 	}
 }
 
@@ -656,15 +613,9 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
-	if (is_support_sw_smu(adev))
-		smu_switch_power_profile(&adev->smu,
-					 PP_SMC_POWER_PROFILE_COMPUTE,
-					 !idle);
-	else if (adev->powerplay.pp_funcs &&
-		 adev->powerplay.pp_funcs->switch_power_profile)
-		amdgpu_dpm_switch_power_profile(adev,
-						PP_SMC_POWER_PROFILE_COMPUTE,
-						!idle);
+	amdgpu_dpm_switch_power_profile(adev,
+					PP_SMC_POWER_PROFILE_COMPUTE,
+					!idle);
 }
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
@@ -677,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 	return false;
 }
 
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	if (adev->family == AMDGPU_FAMILY_AI) {
+		int i;
+
+		for (i = 0; i < adev->num_vmhubs; i++)
+			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+	} else {
+		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+	}
+
+	return 0;
+}
+
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	uint32_t flush_type = 0;
+	bool all_hub = false;
+
+	if (adev->gmc.xgmi.num_physical_nodes &&
+		adev->asic_type == CHIP_VEGA20)
+		flush_type = 2;
+
+	if (adev->family == AMDGPU_FAMILY_AI)
+		all_hub = true;
+
+	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+}
+
 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
@@ -709,38 +692,14 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
 	return 0;
 }
 
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
-{
-	return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
-{
-	return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
-{
-	return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void)
-{
-	return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void)
-{
-	return NULL;
-}
-
 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
-			      const struct kfd2kgd_calls *f2g)
+			      unsigned int asic_type, bool vf)
 {
 	return NULL;
 }
 
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
+			 struct drm_device *ddev,
 			 const struct kgd2kfd_shared_resources *gpu_resources)
 {
 	return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index e519df3fd2b6..47b0f2957d1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -57,7 +57,7 @@ struct kgd_mem {
 	unsigned int mapped_to_gpu_memory;
 	uint64_t va;
 
-	uint32_t mapping_flags;
+	uint32_t alloc_flags;
 
 	atomic_t invalid;
 	struct amdkfd_process_info *process_info;
@@ -136,12 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t *ib_cmd, uint32_t ib_len);
 void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void);
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
@@ -179,10 +175,17 @@ uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
 uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
 uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
 
+/* Read user wptr from a specified user address space with page fault
+ * disabled. The memory must be pinned and mapped to the hardware when
+ * this is called in hqd_load functions, so it should never fault in
+ * the first place. This resolves a circular lock dependency involving
+ * four locks, including the DQM lock and mmap_sem.
+ */
 #define read_user_wptr(mmptr, wptr, dst)				\
 	({								\
 		bool valid = false;					\
 		if ((mmptr) && (wptr)) {				\
+			pagefault_disable();				\
 			if ((mmptr) == current->mm) {			\
 				valid = !get_user((dst), (wptr));	\
 			} else if (current->mm == NULL) {		\
@@ -190,6 +193,7 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
 				valid = !get_user((dst), (wptr));	\
 				unuse_mm(mmptr);			\
 			}						\
+			pagefault_enable();				\
 		}							\
 		valid;							\
 	})
@@ -240,8 +244,9 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
 int kgd2kfd_init(void);
 void kgd2kfd_exit(void);
 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
-			      const struct kfd2kgd_calls *f2g);
+			      unsigned int asic_type, bool vf);
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
+			 struct drm_device *ddev,
 			 const struct kgd2kfd_shared_resources *gpu_resources);
 void kgd2kfd_device_exit(struct kfd_dev *kfd);
 void kgd2kfd_suspend(struct kfd_dev *kfd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index c79aaebeeaf0..4bcc175a149d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -19,10 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
-#undef pr_fmt
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
 #include <linux/module.h>
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
@@ -50,6 +46,8 @@
 #include "soc15.h"
 #include "soc15d.h"
 #include "amdgpu_amdkfd_gfx_v9.h"
+#include "gfxhub_v1_0.h"
+#include "mmhub_v9_4.h"
 
 #define HQD_N_REGS 56
 #define DUMP_REG(addr) do {				\
@@ -69,62 +67,60 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
 	return (struct v9_sdma_mqd *)mqd;
 }
 
-static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
 				unsigned int engine_id,
 				unsigned int queue_id)
 {
-	uint32_t base[8] = {
-		SOC15_REG_OFFSET(SDMA0, 0,
-				 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA1, 0,
-				 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA2, 0,
-				 mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA3, 0,
-				 mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA4, 0,
-				 mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA5, 0,
-				 mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA6, 0,
-				 mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA7, 0,
-				 mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL
-	};
-	uint32_t retval;
-
-	retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
-					       mmSDMA0_RLC0_RB_CNTL);
-
-	pr_debug("sdma base address: 0x%x\n", retval);
-
-	return retval;
-}
+	uint32_t sdma_engine_reg_base = 0;
+	uint32_t sdma_rlc_reg_offset;
 
-static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
-		u32 instance, u32 offset)
-{
-	switch (instance) {
+	switch (engine_id) {
+	default:
+		dev_warn(adev->dev,
+			 "Invalid sdma engine id (%d), using engine id 0\n",
+			 engine_id);
+		/* fall through */
 	case 0:
-		return (adev->reg_offset[SDMA0_HWIP][0][0] + offset);
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+		break;
 	case 1:
-		return (adev->reg_offset[SDMA1_HWIP][0][1] + offset);
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+				mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
+		break;
 	case 2:
-		return (adev->reg_offset[SDMA2_HWIP][0][1] + offset);
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
+				mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+		break;
 	case 3:
-		return (adev->reg_offset[SDMA3_HWIP][0][1] + offset);
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
+				mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
+		break;
 	case 4:
-		return (adev->reg_offset[SDMA4_HWIP][0][1] + offset);
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
+				mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
+		break;
 	case 5:
-		return (adev->reg_offset[SDMA5_HWIP][0][1] + offset);
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
+				mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
+		break;
 	case 6:
-		return (adev->reg_offset[SDMA6_HWIP][0][1] + offset);
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
+				mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
+		break;
 	case 7:
-		return (adev->reg_offset[SDMA7_HWIP][0][1] + offset);
-	default:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
+				mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
 		break;
 	}
-	return 0;
+
+	sdma_rlc_reg_offset = sdma_engine_reg_base
+		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+			queue_id, sdma_rlc_reg_offset);
+
+	return sdma_rlc_reg_offset;
 }
 
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
@@ -132,70 +128,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct v9_sdma_mqd *m;
-	uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+	uint32_t sdma_rlc_reg_offset;
 	unsigned long end_jiffies;
 	uint32_t data;
 	uint64_t data64;
 	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 					    m->sdma_queue_id);
-	sdmax_gfx_context_cntl = sdma_v4_0_get_reg_offset(adev,
-			m->sdma_engine_id, mmSDMA0_GFX_CONTEXT_CNTL);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
 		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 
 	end_jiffies = msecs_to_jiffies(2000) + jiffies;
 	while (true) {
-		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 			break;
-		if (time_after(jiffies, end_jiffies))
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
 			return -ETIME;
+		}
 		usleep_range(500, 1000);
 	}
-	data = RREG32(sdmax_gfx_context_cntl);
-	data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
-			     RESUME_CTX, 0);
-	WREG32(sdmax_gfx_context_cntl, data);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
 	       m->sdmax_rlcx_doorbell_offset);
 
 	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
 			     ENABLE, 1);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+				m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
 				m->sdmax_rlcx_rb_rptr_hi);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
 	if (read_user_wptr(mm, wptr64, data64)) {
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
 		       lower_32_bits(data64));
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
 		       upper_32_bits(data64));
 	} else {
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
 		       m->sdmax_rlcx_rb_rptr);
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
 		       m->sdmax_rlcx_rb_rptr_hi);
 	}
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
 			m->sdmax_rlcx_rb_base_hi);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 			m->sdmax_rlcx_rb_rptr_addr_lo);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 			m->sdmax_rlcx_rb_rptr_addr_hi);
 
 	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
 			     RB_ENABLE, 1);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
 
 	return 0;
 }
@@ -205,7 +198,8 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 			     uint32_t (**dump)[2], uint32_t *n_regs)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+			engine_id, queue_id);
 	uint32_t i = 0, reg;
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+6+7+10)
@@ -215,15 +209,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 		return -ENOMEM;
 
 	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
 	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
 	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 
 	WARN_ON_ONCE(i != HQD_N_REGS);
 	*n_regs = i;
@@ -235,14 +229,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct v9_sdma_mqd *m;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t sdma_rlc_rb_cntl;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 					    m->sdma_queue_id);
 
-	sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 
 	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 		return true;
@@ -255,44 +249,63 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct v9_sdma_mqd *m;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t temp;
 	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 					    m->sdma_queue_id);
 
-	temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
 
 	while (true) {
-		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 			break;
-		if (time_after(jiffies, end_jiffies))
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
 			return -ETIME;
+		}
 		usleep_range(500, 1000);
 	}
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
 		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 
-	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
 	m->sdmax_rlcx_rb_rptr_hi =
-		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
 
 	return 0;
 }
 
-static const struct kfd2kgd_calls kfd2kgd = {
+static void kgd_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint64_t page_table_base)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID %u\n",
+		       vmid);
+		return;
+	}
+
+	mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
+
+	gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+const struct kfd2kgd_calls arcturus_kfd2kgd = {
 	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
 	.init_interrupts = kgd_gfx_v9_init_interrupts,
 	.hqd_load = kgd_gfx_v9_hqd_load,
+	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
 	.hqd_dump = kgd_gfx_v9_hqd_dump,
 	.hqd_sdma_dump = kgd_hqd_sdma_dump,
@@ -304,20 +317,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.address_watch_execute = kgd_gfx_v9_address_watch_execute,
 	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
 	.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
-	.get_atc_vmid_pasid_mapping_pasid =
-			kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
-	.get_atc_vmid_pasid_mapping_valid =
-			kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
-	.set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va,
+	.get_atc_vmid_pasid_mapping_info =
+			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = kgd_gfx_v9_get_tile_config,
-	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
-	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
-	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
+	.set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
-
-struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void)
-{
-	return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index d10f483f5e27..a7b17c8deb00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -19,18 +19,9 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#undef pr_fmt
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
-#include <linux/module.h>
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
-#include <linux/firmware.h>
 #include <linux/mmu_context.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
-#include "amdgpu_ucode.h"
-#include "soc15_hw_ip.h"
 #include "gc/gc_10_1_0_offset.h"
 #include "gc/gc_10_1_0_sh_mask.h"
 #include "navi10_enum.h"
@@ -42,6 +33,7 @@
 #include "v10_structs.h"
 #include "nv.h"
 #include "nvd.h"
+#include "gfxhub_v2_0.h"
 
 enum hqd_dequeue_request_type {
 	NO_ACTION = 0,
@@ -50,63 +42,6 @@ enum hqd_dequeue_request_type {
 	SAVE_WAVES
 };
 
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
-		uint32_t sh_mem_config,
-		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
-		uint32_t sh_mem_bases);
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
-		unsigned int vmid);
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-			uint32_t queue_id, uint32_t __user *wptr,
-			uint32_t wptr_shift, uint32_t wptr_mask,
-			struct mm_struct *mm);
-static int kgd_hqd_dump(struct kgd_dev *kgd,
-			uint32_t pipe_id, uint32_t queue_id,
-			uint32_t (**dump)[2], uint32_t *n_regs);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-			     uint32_t __user *wptr, struct mm_struct *mm);
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-			     uint32_t engine_id, uint32_t queue_id,
-			     uint32_t (**dump)[2], uint32_t *n_regs);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
-		uint32_t pipe_id, uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
-				enum kfd_preempt_type reset_type,
-				unsigned int utimeout, uint32_t pipe_id,
-				uint32_t queue_id);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
-				unsigned int utimeout);
-#if 0
-static uint32_t get_watch_base_addr(struct amdgpu_device *adev);
-#endif
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
-					unsigned int watch_point_id,
-					uint32_t cntl_val,
-					uint32_t addr_hi,
-					uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
-					uint32_t gfx_index_val,
-					uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
-					unsigned int watch_point_id,
-					unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-		uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-		uint8_t vmid);
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-		uint64_t page_table_base);
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
-
 /* Because of REG_GET_FIELD() being used, we put this function in the
  * asic specific file.
  */
@@ -139,37 +74,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
 	return 0;
 }
 
-static const struct kfd2kgd_calls kfd2kgd = {
-	.program_sh_mem_settings = kgd_program_sh_mem_settings,
-	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
-	.init_interrupts = kgd_init_interrupts,
-	.hqd_load = kgd_hqd_load,
-	.hqd_sdma_load = kgd_hqd_sdma_load,
-	.hqd_dump = kgd_hqd_dump,
-	.hqd_sdma_dump = kgd_hqd_sdma_dump,
-	.hqd_is_occupied = kgd_hqd_is_occupied,
-	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
-	.hqd_destroy = kgd_hqd_destroy,
-	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
-	.address_watch_disable = kgd_address_watch_disable,
-	.address_watch_execute = kgd_address_watch_execute,
-	.wave_control_execute = kgd_wave_control_execute,
-	.address_watch_get_offset = kgd_address_watch_get_offset,
-	.get_atc_vmid_pasid_mapping_pasid =
-			get_atc_vmid_pasid_mapping_pasid,
-	.get_atc_vmid_pasid_mapping_valid =
-			get_atc_vmid_pasid_mapping_valid,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
-	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.get_tile_config = amdgpu_amdkfd_get_tile_config,
-};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions()
-{
-	return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 {
 	return (struct amdgpu_device *)kgd;
@@ -203,13 +107,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 
-static uint32_t get_queue_mask(struct amdgpu_device *adev,
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
 			       uint32_t pipe_id, uint32_t queue_id)
 {
-	unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
-			    queue_id) & 31;
+	unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+			queue_id;
 
-	return ((uint32_t)1) << bit;
+	return 1ull << bit;
 }
 
 static void release_queue(struct kgd_dev *kgd)
@@ -250,11 +154,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 			ATC_VMID0_PASID_MAPPING__VALID_MASK;
 
 	pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping);
-	/*
-	 * need to do this twice, once for gfx and once for mmhub
-	 * for ATC add 16 to VMID for mmhub, for IH different registers.
-	 * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
-	 */
 
 	pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid);
 	WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
@@ -306,11 +205,11 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 	return 0;
 }
 
-static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
 				unsigned int engine_id,
 				unsigned int queue_id)
 {
-	uint32_t base[2] = {
+	uint32_t sdma_engine_reg_base[2] = {
 		SOC15_REG_OFFSET(SDMA0, 0,
 				 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
 		/* On gfx10, mmSDMA1_xxx registers are defined NOT based
@@ -322,12 +221,12 @@ static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
 		SOC15_REG_OFFSET(SDMA1, 0,
 				 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL
 	};
-	uint32_t retval;
 
-	retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
-					       mmSDMA0_RLC0_RB_CNTL);
+	uint32_t retval = sdma_engine_reg_base[engine_id]
+		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
 
-	pr_debug("sdma base address: 0x%x\n", retval);
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+			queue_id, retval);
 
 	return retval;
 }
@@ -369,21 +268,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
 	acquire_queue(kgd, pipe_id, queue_id);
 
-	/* HIQ is set during driver init period with vmid set to 0*/
-	if (m->cp_hqd_vmid == 0) {
-		uint32_t value, mec, pipe;
-
-		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
-		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
-			mec, pipe, queue_id);
-		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
-		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
-			((mec << 5) | (pipe << 3) | queue_id | 0x80));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
-	}
-
 	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
 	mqd_hqd = &m->cp_mqd_base_addr_lo;
 	hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
@@ -433,9 +317,10 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 		       lower_32_bits((uint64_t)wptr));
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
 		       upper_32_bits((uint64_t)wptr));
-		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id));
+		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+			 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-		       get_queue_mask(adev, pipe_id, queue_id));
+		       (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
 	}
 
 	/* Start the EOP fetcher */
@@ -451,6 +336,59 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	return 0;
 }
 
+static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+	struct v10_compute_mqd *m;
+	uint32_t mec, pipe;
+	int r;
+
+	m = get_mqd(mqd);
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+		 mec, pipe, queue_id);
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	r = amdgpu_ring_alloc(kiq_ring, 7);
+	if (r) {
+		pr_err("Failed to alloc KIQ (%d).\n", r);
+		goto out_unlock;
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			  PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+			  PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+			  PACKET3_MAP_QUEUES_PIPE(pipe) |
+			  PACKET3_MAP_QUEUES_ME((mec - 1)) |
+			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+			  PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+	amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+	release_queue(kgd);
+
+	return r;
+}
+
 static int kgd_hqd_dump(struct kgd_dev *kgd,
 			uint32_t pipe_id, uint32_t queue_id,
 			uint32_t (**dump)[2], uint32_t *n_regs)
@@ -488,72 +426,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct v10_sdma_mqd *m;
-	uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+	uint32_t sdma_rlc_reg_offset;
 	unsigned long end_jiffies;
 	uint32_t data;
 	uint64_t data64;
 	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 					    m->sdma_queue_id);
-	pr_debug("sdma load base addr %x for engine %d, queue %d\n", sdma_base_addr, m->sdma_engine_id, m->sdma_queue_id);
-	sdmax_gfx_context_cntl = m->sdma_engine_id ?
-		SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
-		SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
 		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 
 	end_jiffies = msecs_to_jiffies(2000) + jiffies;
 	while (true) {
-		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 			break;
-		if (time_after(jiffies, end_jiffies))
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
 			return -ETIME;
+		}
 		usleep_range(500, 1000);
 	}
-	data = RREG32(sdmax_gfx_context_cntl);
-	data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
-			     RESUME_CTX, 0);
-	WREG32(sdmax_gfx_context_cntl, data);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
 	       m->sdmax_rlcx_doorbell_offset);
 
 	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
 			     ENABLE, 1);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+				m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
 				m->sdmax_rlcx_rb_rptr_hi);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
 	if (read_user_wptr(mm, wptr64, data64)) {
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
 		       lower_32_bits(data64));
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
 		       upper_32_bits(data64));
 	} else {
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
 		       m->sdmax_rlcx_rb_rptr);
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
 		       m->sdmax_rlcx_rb_rptr_hi);
 	}
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
 			m->sdmax_rlcx_rb_base_hi);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 			m->sdmax_rlcx_rb_rptr_addr_lo);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 			m->sdmax_rlcx_rb_rptr_addr_hi);
 
 	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
 			     RB_ENABLE, 1);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
 
 	return 0;
 }
@@ -563,28 +496,26 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 			     uint32_t (**dump)[2], uint32_t *n_regs)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+			engine_id, queue_id);
 	uint32_t i = 0, reg;
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+6+7+10)
 
-	pr_debug("sdma dump engine id %d queue_id %d\n", engine_id, queue_id);
-	pr_debug("sdma base addr %x\n", sdma_base_addr);
-
 	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
 	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
 	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
 	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 
 	WARN_ON_ONCE(i != HQD_N_REGS);
 	*n_regs = i;
@@ -618,14 +549,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct v10_sdma_mqd *m;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t sdma_rlc_rb_cntl;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 					    m->sdma_queue_id);
 
-	sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 
 	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 		return true;
@@ -746,122 +677,52 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct v10_sdma_mqd *m;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t temp;
 	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 					    m->sdma_queue_id);
 
-	temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
 
 	while (true) {
-		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 			break;
-		if (time_after(jiffies, end_jiffies))
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
 			return -ETIME;
+		}
 		usleep_range(500, 1000);
 	}
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
 		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 
-	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
 	m->sdmax_rlcx_rb_rptr_hi =
-		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
 
 	return 0;
 }
 
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-							uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid)
 {
-	uint32_t reg;
+	uint32_t value;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
-	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
 		     + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-								uint8_t vmid)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
-		     + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
-}
-
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
-{
-	signed long r;
-	uint32_t seq;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	spin_lock(&adev->gfx.kiq.ring_lock);
-	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
-	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
-	amdgpu_ring_write(ring,
-			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
-			PACKET3_INVALIDATE_TLBS_PASID(pasid));
-	amdgpu_fence_emit_polling(ring, &seq);
-	amdgpu_ring_commit(ring);
-	spin_unlock(&adev->gfx.kiq.ring_lock);
-
-	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
-	if (r < 1) {
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-		return -ETIME;
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	if (amdgpu_emu_mode == 0 && ring->sched.ready)
-		return invalidate_tlbs_with_kiq(adev, pasid);
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
-			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
-				== pasid) {
-				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-						AMDGPU_GFXHUB_0, 0);
-				break;
-			}
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return 0;
-	}
-
-	amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
-	return 0;
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
 static int kgd_address_watch_disable(struct kgd_dev *kgd)
@@ -914,7 +775,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		uint64_t page_table_base)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	uint64_t base = page_table_base | AMDGPU_PTE_VALID;
 
 	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
 		pr_err("trying to set page table base for wrong VMID %u\n",
@@ -922,18 +782,30 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		return;
 	}
 
-	/* TODO: take advantage of per-process address space size. For
-	 * now, all processes share the same address space size, like
-	 * on GFX8 and older.
-	 */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
-
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
-			lower_32_bits(adev->vm_manager.max_pfn - 1));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
-			upper_32_bits(adev->vm_manager.max_pfn - 1));
-
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+	/* SDMA is on gfxhub as well for Navi1* series */
+	gfxhub_v2_0_setup_vm_pt_regs(adev, vmid, page_table_base);
 }
+
+const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
+	.program_sh_mem_settings = kgd_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_init_interrupts,
+	.hqd_load = kgd_hqd_load,
+	.hiq_mqd_load = kgd_hiq_mqd_load,
+	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_hqd_destroy,
+	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+	.address_watch_disable = kgd_address_watch_disable,
+	.address_watch_execute = kgd_address_watch_execute,
+	.wave_control_execute = kgd_wave_control_execute,
+	.address_watch_get_offset = kgd_address_watch_get_offset,
+	.get_atc_vmid_pasid_mapping_info =
+			get_atc_vmid_pasid_mapping_info,
+	.get_tile_config = amdgpu_amdkfd_get_tile_config,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.get_hive_id = amdgpu_amdkfd_get_hive_id,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 5f459bf5f622..8f052e98a3c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -20,8 +20,6 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
 #include <linux/mmu_context.h>
 
 #include "amdgpu.h"
@@ -86,65 +84,6 @@ union TCP_WATCH_CNTL_BITS {
 	float f32All;
 };
 
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
-		uint32_t sh_mem_config,	uint32_t sh_mem_ape1_base,
-		uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
-
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
-					unsigned int vmid);
-
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-			uint32_t queue_id, uint32_t __user *wptr,
-			uint32_t wptr_shift, uint32_t wptr_mask,
-			struct mm_struct *mm);
-static int kgd_hqd_dump(struct kgd_dev *kgd,
-			uint32_t pipe_id, uint32_t queue_id,
-			uint32_t (**dump)[2], uint32_t *n_regs);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-			     uint32_t __user *wptr, struct mm_struct *mm);
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-			     uint32_t engine_id, uint32_t queue_id,
-			     uint32_t (**dump)[2], uint32_t *n_regs);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
-				uint32_t pipe_id, uint32_t queue_id);
-
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
-				enum kfd_preempt_type reset_type,
-				unsigned int utimeout, uint32_t pipe_id,
-				uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
-				unsigned int utimeout);
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
-					unsigned int watch_point_id,
-					uint32_t cntl_val,
-					uint32_t addr_hi,
-					uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
-					uint32_t gfx_index_val,
-					uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
-					unsigned int watch_point_id,
-					unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-							uint8_t vmid);
-
-static void set_scratch_backing_va(struct kgd_dev *kgd,
-					uint64_t va, uint32_t vmid);
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-		uint64_t page_table_base);
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
-static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
-
 /* Because of REG_GET_FIELD() being used, we put this function in the
  * asic specific file.
  */
@@ -170,37 +109,6 @@ static int get_tile_config(struct kgd_dev *kgd,
 	return 0;
 }
 
-static const struct kfd2kgd_calls kfd2kgd = {
-	.program_sh_mem_settings = kgd_program_sh_mem_settings,
-	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
-	.init_interrupts = kgd_init_interrupts,
-	.hqd_load = kgd_hqd_load,
-	.hqd_sdma_load = kgd_hqd_sdma_load,
-	.hqd_dump = kgd_hqd_dump,
-	.hqd_sdma_dump = kgd_hqd_sdma_dump,
-	.hqd_is_occupied = kgd_hqd_is_occupied,
-	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
-	.hqd_destroy = kgd_hqd_destroy,
-	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
-	.address_watch_disable = kgd_address_watch_disable,
-	.address_watch_execute = kgd_address_watch_execute,
-	.wave_control_execute = kgd_wave_control_execute,
-	.address_watch_get_offset = kgd_address_watch_get_offset,
-	.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
-	.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
-	.set_scratch_backing_va = set_scratch_backing_va,
-	.get_tile_config = get_tile_config,
-	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
-	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
-};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
-{
-	return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 {
 	return (struct amdgpu_device *)kgd;
@@ -303,14 +211,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 	return 0;
 }
 
-static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
+static inline uint32_t get_sdma_rlc_reg_offset(struct cik_sdma_rlc_registers *m)
 {
 	uint32_t retval;
 
 	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 			m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 
-	pr_debug("sdma base address: 0x%x\n", retval);
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
+			m->sdma_engine_id, m->sdma_queue_id, retval);
 
 	return retval;
 }
@@ -413,60 +322,52 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct cik_sdma_rlc_registers *m;
 	unsigned long end_jiffies;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t data;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(m);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
 		m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 
 	end_jiffies = msecs_to_jiffies(2000) + jiffies;
 	while (true) {
-		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 			break;
-		if (time_after(jiffies, end_jiffies))
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
 			return -ETIME;
+		}
 		usleep_range(500, 1000);
 	}
-	if (m->sdma_engine_id) {
-		data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
-		data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
-				RESUME_CTX, 0);
-		WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
-	} else {
-		data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
-		data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
-				RESUME_CTX, 0);
-		WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
-	}
 
 	data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
 			     ENABLE, 1);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+				m->sdma_rlc_rb_rptr);
 
 	if (read_user_wptr(mm, wptr, data))
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
 	else
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
 		       m->sdma_rlc_rb_rptr);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
 				m->sdma_rlc_virtual_addr);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
 			m->sdma_rlc_rb_base_hi);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 			m->sdma_rlc_rb_rptr_addr_lo);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 			m->sdma_rlc_rb_rptr_addr_hi);
 
 	data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
 			     RB_ENABLE, 1);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
 
 	return 0;
 }
@@ -524,13 +425,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct cik_sdma_rlc_registers *m;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t sdma_rlc_rb_cntl;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(m);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
 
-	sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 
 	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 		return true;
@@ -645,32 +546,34 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct cik_sdma_rlc_registers *m;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t temp;
 	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(m);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
 
-	temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
 
 	while (true) {
-		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 			break;
-		if (time_after(jiffies, end_jiffies))
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
 			return -ETIME;
+		}
 		usleep_range(500, 1000);
 	}
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
 		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 
-	m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+	m->sdma_rlc_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
 
 	return 0;
 }
@@ -758,24 +661,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 	return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
 }
 
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-							uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid)
 {
-	uint32_t reg;
+	uint32_t value;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
-	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-								uint8_t vmid)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 
-	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
 static void set_scratch_backing_va(struct kgd_dev *kgd,
@@ -801,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		lower_32_bits(page_table_base));
 }
 
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	unsigned int tmp;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
-			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-			RREG32(mmVM_INVALIDATE_RESPONSE);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid\n");
-		return 0;
-	}
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-	RREG32(mmVM_INVALIDATE_RESPONSE);
-	return 0;
-}
-
  /**
   * read_vmid_from_vmfault_reg - read vmid from register
   *
@@ -855,3 +711,26 @@ static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
 
 	return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
 }
+
+const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
+	.program_sh_mem_settings = kgd_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_init_interrupts,
+	.hqd_load = kgd_hqd_load,
+	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_hqd_destroy,
+	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+	.address_watch_disable = kgd_address_watch_disable,
+	.address_watch_execute = kgd_address_watch_execute,
+	.wave_control_execute = kgd_wave_control_execute,
+	.address_watch_get_offset = kgd_address_watch_get_offset,
+	.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info,
+	.set_scratch_backing_va = set_scratch_backing_va,
+	.get_tile_config = get_tile_config,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 6d2f61449606..19a10db93d68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -20,9 +20,6 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <linux/module.h>
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
 #include <linux/mmu_context.h>
 
 #include "amdgpu.h"
@@ -44,62 +41,6 @@ enum hqd_dequeue_request_type {
 	RESET_WAVES
 };
 
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
-		uint32_t sh_mem_config,
-		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
-		uint32_t sh_mem_bases);
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
-		unsigned int vmid);
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-			uint32_t queue_id, uint32_t __user *wptr,
-			uint32_t wptr_shift, uint32_t wptr_mask,
-			struct mm_struct *mm);
-static int kgd_hqd_dump(struct kgd_dev *kgd,
-			uint32_t pipe_id, uint32_t queue_id,
-			uint32_t (**dump)[2], uint32_t *n_regs);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-			     uint32_t __user *wptr, struct mm_struct *mm);
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-			     uint32_t engine_id, uint32_t queue_id,
-			     uint32_t (**dump)[2], uint32_t *n_regs);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
-		uint32_t pipe_id, uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
-				enum kfd_preempt_type reset_type,
-				unsigned int utimeout, uint32_t pipe_id,
-				uint32_t queue_id);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
-				unsigned int utimeout);
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
-					unsigned int watch_point_id,
-					uint32_t cntl_val,
-					uint32_t addr_hi,
-					uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
-					uint32_t gfx_index_val,
-					uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
-					unsigned int watch_point_id,
-					unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-		uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-		uint8_t vmid);
-static void set_scratch_backing_va(struct kgd_dev *kgd,
-					uint64_t va, uint32_t vmid);
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-		uint64_t page_table_base);
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
-
 /* Because of REG_GET_FIELD() being used, we put this function in the
  * asic specific file.
  */
@@ -125,38 +66,6 @@ static int get_tile_config(struct kgd_dev *kgd,
 	return 0;
 }
 
-static const struct kfd2kgd_calls kfd2kgd = {
-	.program_sh_mem_settings = kgd_program_sh_mem_settings,
-	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
-	.init_interrupts = kgd_init_interrupts,
-	.hqd_load = kgd_hqd_load,
-	.hqd_sdma_load = kgd_hqd_sdma_load,
-	.hqd_dump = kgd_hqd_dump,
-	.hqd_sdma_dump = kgd_hqd_sdma_dump,
-	.hqd_is_occupied = kgd_hqd_is_occupied,
-	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
-	.hqd_destroy = kgd_hqd_destroy,
-	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
-	.address_watch_disable = kgd_address_watch_disable,
-	.address_watch_execute = kgd_address_watch_execute,
-	.wave_control_execute = kgd_wave_control_execute,
-	.address_watch_get_offset = kgd_address_watch_get_offset,
-	.get_atc_vmid_pasid_mapping_pasid =
-			get_atc_vmid_pasid_mapping_pasid,
-	.get_atc_vmid_pasid_mapping_valid =
-			get_atc_vmid_pasid_mapping_valid,
-	.set_scratch_backing_va = set_scratch_backing_va,
-	.get_tile_config = get_tile_config,
-	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
-};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
-{
-	return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 {
 	return (struct amdgpu_device *)kgd;
@@ -260,13 +169,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 	return 0;
 }
 
-static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
+static inline uint32_t get_sdma_rlc_reg_offset(struct vi_sdma_mqd *m)
 {
 	uint32_t retval;
 
 	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 		m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
-	pr_debug("sdma base address: 0x%x\n", retval);
+
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
+			m->sdma_engine_id, m->sdma_queue_id, retval);
 
 	return retval;
 }
@@ -398,59 +309,51 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct vi_sdma_mqd *m;
 	unsigned long end_jiffies;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t data;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(m);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
 		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 
 	end_jiffies = msecs_to_jiffies(2000) + jiffies;
 	while (true) {
-		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 			break;
-		if (time_after(jiffies, end_jiffies))
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
 			return -ETIME;
+		}
 		usleep_range(500, 1000);
 	}
-	if (m->sdma_engine_id) {
-		data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
-		data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
-				RESUME_CTX, 0);
-		WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
-	} else {
-		data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
-		data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
-				RESUME_CTX, 0);
-		WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
-	}
 
 	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
 			     ENABLE, 1);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+				m->sdmax_rlcx_rb_rptr);
 
 	if (read_user_wptr(mm, wptr, data))
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
 	else
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
 		       m->sdmax_rlcx_rb_rptr);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
 				m->sdmax_rlcx_virtual_addr);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
 			m->sdmax_rlcx_rb_base_hi);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 			m->sdmax_rlcx_rb_rptr_addr_lo);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 			m->sdmax_rlcx_rb_rptr_addr_hi);
 
 	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
 			     RB_ENABLE, 1);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
 
 	return 0;
 }
@@ -517,13 +420,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct vi_sdma_mqd *m;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t sdma_rlc_rb_cntl;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(m);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
 
-	sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 
 	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 		return true;
@@ -641,54 +544,48 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct vi_sdma_mqd *m;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t temp;
 	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(m);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
 
-	temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
 
 	while (true) {
-		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 			break;
-		if (time_after(jiffies, end_jiffies))
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
 			return -ETIME;
+		}
 		usleep_range(500, 1000);
 	}
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
 		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 
-	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
 
 	return 0;
 }
 
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-							uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid)
 {
-	uint32_t reg;
+	uint32_t value;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
-	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-								uint8_t vmid)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 
-	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
 static int kgd_address_watch_disable(struct kgd_dev *kgd)
@@ -760,41 +657,25 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 			lower_32_bits(page_table_base));
 }
 
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	unsigned int tmp;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
-			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-			RREG32(mmVM_INVALIDATE_RESPONSE);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return -EINVAL;
-	}
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-	RREG32(mmVM_INVALIDATE_RESPONSE);
-	return 0;
-}
+const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
+	.program_sh_mem_settings = kgd_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_init_interrupts,
+	.hqd_load = kgd_hqd_load,
+	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_hqd_destroy,
+	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+	.address_watch_disable = kgd_address_watch_disable,
+	.address_watch_execute = kgd_address_watch_execute,
+	.wave_control_execute = kgd_wave_control_execute,
+	.address_watch_get_offset = kgd_address_watch_get_offset,
+	.get_atc_vmid_pasid_mapping_info =
+			get_atc_vmid_pasid_mapping_info,
+	.set_scratch_backing_va = set_scratch_backing_va,
+	.get_tile_config = get_tile_config,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index e262f2ac07a3..8562afe5b761 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -19,17 +19,10 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
-#include <linux/module.h>
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
 #include <linux/mmu_context.h>
 
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
-#include "soc15_hw_ip.h"
 #include "gc/gc_9_0_offset.h"
 #include "gc/gc_9_0_sh_mask.h"
 #include "vega10_enum.h"
@@ -47,11 +40,7 @@
 #include "soc15d.h"
 #include "mmhub_v1_0.h"
 #include "gfxhub_v1_0.h"
-#include "gmc_v9_0.h"
-
 
-#define V9_PIPE_PER_MEC		(4)
-#define V9_QUEUES_PER_PIPE_MEC	(8)
 
 enum hqd_dequeue_request_type {
 	NO_ACTION = 0,
@@ -114,13 +103,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 
-static uint32_t get_queue_mask(struct amdgpu_device *adev,
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
 			       uint32_t pipe_id, uint32_t queue_id)
 {
-	unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
-			    queue_id) & 31;
+	unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+			queue_id;
 
-	return ((uint32_t)1) << bit;
+	return 1ull << bit;
 }
 
 static void release_queue(struct kgd_dev *kgd)
@@ -226,22 +215,21 @@ int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 	return 0;
 }
 
-static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
 				unsigned int engine_id,
 				unsigned int queue_id)
 {
-	uint32_t base[2] = {
+	uint32_t sdma_engine_reg_base[2] = {
 		SOC15_REG_OFFSET(SDMA0, 0,
 				 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
 		SOC15_REG_OFFSET(SDMA1, 0,
 				 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
 	};
-	uint32_t retval;
+	uint32_t retval = sdma_engine_reg_base[engine_id]
+		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
 
-	retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
-					       mmSDMA0_RLC0_RB_CNTL);
-
-	pr_debug("sdma base address: 0x%x\n", retval);
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+			queue_id, retval);
 
 	return retval;
 }
@@ -270,21 +258,6 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 
 	acquire_queue(kgd, pipe_id, queue_id);
 
-	/* HIQ is set during driver init period with vmid set to 0*/
-	if (m->cp_hqd_vmid == 0) {
-		uint32_t value, mec, pipe;
-
-		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
-		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
-			mec, pipe, queue_id);
-		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
-		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
-			((mec << 5) | (pipe << 3) | queue_id | 0x80));
-		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
-	}
-
 	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
 	mqd_hqd = &m->cp_mqd_base_addr_lo;
 	hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
@@ -335,7 +308,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
 		       upper_32_bits((uintptr_t)wptr));
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-		       get_queue_mask(adev, pipe_id, queue_id));
+		       (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
 	}
 
 	/* Start the EOP fetcher */
@@ -351,6 +324,59 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	return 0;
 }
 
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+	struct v9_mqd *m;
+	uint32_t mec, pipe;
+	int r;
+
+	m = get_mqd(mqd);
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+		 mec, pipe, queue_id);
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	r = amdgpu_ring_alloc(kiq_ring, 7);
+	if (r) {
+		pr_err("Failed to alloc KIQ (%d).\n", r);
+		goto out_unlock;
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			  PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+			  PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+			  PACKET3_MAP_QUEUES_PIPE(pipe) |
+			  PACKET3_MAP_QUEUES_ME((mec - 1)) |
+			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+			  PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+	amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+	release_queue(kgd);
+
+	return r;
+}
+
 int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
 			uint32_t pipe_id, uint32_t queue_id,
 			uint32_t (**dump)[2], uint32_t *n_regs)
@@ -388,71 +414,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct v9_sdma_mqd *m;
-	uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+	uint32_t sdma_rlc_reg_offset;
 	unsigned long end_jiffies;
 	uint32_t data;
 	uint64_t data64;
 	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 					    m->sdma_queue_id);
-	sdmax_gfx_context_cntl = m->sdma_engine_id ?
-		SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
-		SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
 		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 
 	end_jiffies = msecs_to_jiffies(2000) + jiffies;
 	while (true) {
-		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 			break;
-		if (time_after(jiffies, end_jiffies))
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
 			return -ETIME;
+		}
 		usleep_range(500, 1000);
 	}
-	data = RREG32(sdmax_gfx_context_cntl);
-	data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
-			     RESUME_CTX, 0);
-	WREG32(sdmax_gfx_context_cntl, data);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
 	       m->sdmax_rlcx_doorbell_offset);
 
 	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
 			     ENABLE, 1);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+				m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
 				m->sdmax_rlcx_rb_rptr_hi);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
 	if (read_user_wptr(mm, wptr64, data64)) {
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
 		       lower_32_bits(data64));
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
 		       upper_32_bits(data64));
 	} else {
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
 		       m->sdmax_rlcx_rb_rptr);
-		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
 		       m->sdmax_rlcx_rb_rptr_hi);
 	}
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
 			m->sdmax_rlcx_rb_base_hi);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 			m->sdmax_rlcx_rb_rptr_addr_lo);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 			m->sdmax_rlcx_rb_rptr_addr_hi);
 
 	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
 			     RB_ENABLE, 1);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
 
 	return 0;
 }
@@ -462,7 +484,8 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 			     uint32_t (**dump)[2], uint32_t *n_regs)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+			engine_id, queue_id);
 	uint32_t i = 0, reg;
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+6+7+10)
@@ -472,15 +495,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 		return -ENOMEM;
 
 	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
 	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
 	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
-		DUMP_REG(sdma_base_addr + reg);
+		DUMP_REG(sdma_rlc_reg_offset + reg);
 
 	WARN_ON_ONCE(i != HQD_N_REGS);
 	*n_regs = i;
@@ -514,14 +537,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct v9_sdma_mqd *m;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t sdma_rlc_rb_cntl;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 					    m->sdma_queue_id);
 
-	sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 
 	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 		return true;
@@ -584,151 +607,52 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct v9_sdma_mqd *m;
-	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_reg_offset;
 	uint32_t temp;
 	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 
 	m = get_sdma_mqd(mqd);
-	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 					    m->sdma_queue_id);
 
-	temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
 
 	while (true) {
-		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 			break;
-		if (time_after(jiffies, end_jiffies))
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
 			return -ETIME;
+		}
 		usleep_range(500, 1000);
 	}
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
 		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 
-	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
 	m->sdmax_rlcx_rb_rptr_hi =
-		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
 
 	return 0;
 }
 
-bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-							uint8_t vmid)
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid)
 {
-	uint32_t reg;
+	uint32_t value;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
-	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
 		     + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 
-uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-								uint8_t vmid)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
-		     + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
-}
-
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
-			uint32_t flush_type)
-{
-	signed long r;
-	uint32_t seq;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	spin_lock(&adev->gfx.kiq.ring_lock);
-	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
-	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
-	amdgpu_ring_write(ring,
-			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
-			PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
-			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
-			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
-	amdgpu_fence_emit_polling(ring, &seq);
-	amdgpu_ring_commit(ring);
-	spin_unlock(&adev->gfx.kiq.ring_lock);
-
-	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
-	if (r < 1) {
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-		return -ETIME;
-	}
-
-	return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid, i;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-	uint32_t flush_type = 0;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-	if (adev->gmc.xgmi.num_physical_nodes &&
-		adev->asic_type == CHIP_VEGA20)
-		flush_type = 2;
-
-	if (ring->sched.ready)
-		return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-		if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
-			if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
-				== pasid) {
-				for (i = 0; i < adev->num_vmhubs; i++)
-					amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-								i, flush_type);
-				break;
-			}
-		}
-	}
-
-	return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int i;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return 0;
-	}
-
-	/* Use legacy mode tlb invalidation.
-	 *
-	 * Currently on Raven the code below is broken for anything but
-	 * legacy mode due to a MMHUB power gating problem. A workaround
-	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
-	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
-	 * bit.
-	 *
-	 * TODO 1: agree on the right set of invalidation registers for
-	 * KFD use. Use the last one for now. Invalidate both GC and
-	 * MMHUB.
-	 *
-	 * TODO 2: support range-based invalidation, requires kfg2kgd
-	 * interface change
-	 */
-	for (i = 0; i < adev->num_vmhubs; i++)
-		amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-
-	return 0;
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
 int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
@@ -777,17 +701,8 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
 	return 0;
 }
 
-void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd,
-					uint64_t va, uint32_t vmid)
-{
-	/* No longer needed on GFXv9. The scratch base address is
-	 * passed to the shader by the CP. It's the user mode driver's
-	 * responsibility.
-	 */
-}
-
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-		uint64_t page_table_base)
+static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
+			uint32_t vmid, uint64_t page_table_base)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
@@ -797,25 +712,17 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmi
 		return;
 	}
 
-	/* TODO: take advantage of per-process address space size. For
-	 * now, all processes share the same address space size, like
-	 * on GFX8 and older.
-	 */
-	if (adev->asic_type == CHIP_ARCTURUS) {
-		/* Two MMHUBs */
-		mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base);
-		mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base);
-	} else
-		mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+	mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
 
 	gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
 }
 
-static const struct kfd2kgd_calls kfd2kgd = {
+const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
 	.init_interrupts = kgd_gfx_v9_init_interrupts,
 	.hqd_load = kgd_gfx_v9_hqd_load,
+	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
 	.hqd_dump = kgd_gfx_v9_hqd_dump,
 	.hqd_sdma_dump = kgd_hqd_sdma_dump,
@@ -827,19 +734,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.address_watch_execute = kgd_gfx_v9_address_watch_execute,
 	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
 	.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
-	.get_atc_vmid_pasid_mapping_pasid =
-			kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
-	.get_atc_vmid_pasid_mapping_valid =
-			kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
-	.set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va,
+	.get_atc_vmid_pasid_mapping_info =
+			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = kgd_gfx_v9_get_tile_config,
 	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
-	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
-	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
-{
-	return (struct kfd2kgd_calls *)&kfd2kgd;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index 26d8879bff9d..63d3e6683dfe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -33,6 +33,9 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr,
 			uint32_t wptr_shift, uint32_t wptr_mask,
 			struct mm_struct *mm);
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off);
 int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
 			uint32_t pipe_id, uint32_t queue_id,
 			uint32_t (**dump)[2], uint32_t *n_regs);
@@ -55,15 +58,7 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
 					unsigned int watch_point_id,
 					unsigned int reg_offset);
 
-bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-		uint8_t vmid);
-uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-		uint8_t vmid);
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-		uint64_t page_table_base);
-void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd,
-					uint64_t va, uint32_t vmid);
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid);
 int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
 		struct tile_config *config);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6d021ecc8d59..b2487f4f271b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -19,9 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
 #include <linux/dma-buf.h>
 #include <linux/list.h>
 #include <linux/pagemap.h>
@@ -33,11 +30,6 @@
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_dma_buf.h"
 
-/* Special VM and GART address alignment needed for VI pre-Fiji due to
- * a HW bug.
- */
-#define VI_BO_SIZE_ALIGN (0x8000)
-
 /* BO flag to indicate a KFD userptr BO */
 #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
 
@@ -93,7 +85,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
 }
 
 /* Set memory usage limits. Current, limits are
- *  System (TTM + userptr) memory - 3/4th System RAM
+ *  System (TTM + userptr) memory - 15/16th System RAM
  *  TTM memory - 3/8th System RAM
  */
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
@@ -106,18 +98,31 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 	mem *= si.mem_unit;
 
 	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
-	kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
+	kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
 	kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
 	pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
 		(kfd_mem_limit.max_system_mem_limit >> 20),
 		(kfd_mem_limit.max_ttm_mem_limit >> 20));
 }
 
+/* Estimate page table size needed to represent a given memory size
+ *
+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
+ * for 2MB pages for TLB efficiency. However, small allocations and
+ * fragmented system memory still need some 4KB pages. We choose a
+ * compromise that should work in most cases without reserving too
+ * much memory for page tables unnecessarily (factor 16K, >> 14).
+ */
+#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
+
 static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
 		uint64_t size, u32 domain, bool sg)
 {
+	uint64_t reserved_for_pt =
+		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
 	size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
-	uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
 	int ret = 0;
 
 	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
@@ -349,11 +354,44 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
 	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
 	int ret;
 
-	ret = amdgpu_vm_update_directories(adev, vm);
+	ret = amdgpu_vm_update_pdes(adev, vm, false);
 	if (ret)
 		return ret;
 
-	return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
+	return amdgpu_sync_fence(sync, vm->last_update, false);
+}
+
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
+{
+	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+	bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT;
+	uint32_t mapping_flags;
+
+	mapping_flags = AMDGPU_VM_PAGE_READABLE;
+	if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE)
+		mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+	if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
+		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+	switch (adev->asic_type) {
+	case CHIP_ARCTURUS:
+		if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) {
+			if (bo_adev == adev)
+				mapping_flags |= coherent ?
+					AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+			else
+				mapping_flags |= AMDGPU_VM_MTYPE_UC;
+		} else {
+			mapping_flags |= coherent ?
+				AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+		}
+		break;
+	default:
+		mapping_flags |= coherent ?
+			AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+	}
+
+	return amdgpu_gem_va_map_flags(adev, mapping_flags);
 }
 
 /* add_bo_to_vm - Add a BO to a VM
@@ -404,8 +442,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 	}
 
 	bo_va_entry->va = va;
-	bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
-							 mem->mapping_flags);
+	bo_va_entry->pte_flags = get_pte_flags(adev, mem);
 	bo_va_entry->kgd_dev = (void *)adev;
 	list_add(&bo_va_entry->bo_list, list_bo_va);
 
@@ -481,8 +518,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
  *
  * Returns 0 for success, negative errno for errors.
  */
-static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
-			   uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
 {
 	struct amdkfd_process_info *process_info = mem->process_info;
 	struct amdgpu_bo *bo = mem->bo;
@@ -586,7 +622,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
 
 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-				     false, &ctx->duplicates, true);
+				     false, &ctx->duplicates);
 	if (!ret)
 		ctx->reserved = true;
 	else {
@@ -659,7 +695,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	}
 
 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-				     false, &ctx->duplicates, true);
+				     false, &ctx->duplicates);
 	if (!ret)
 		ctx->reserved = true;
 	else
@@ -714,7 +750,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
 
 	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
 
-	amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
+	amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
 
 	return 0;
 }
@@ -733,7 +769,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
 		return ret;
 	}
 
-	return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
+	return amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
 }
 
 static int map_bo_to_gpuvm(struct amdgpu_device *adev,
@@ -1079,10 +1115,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	uint64_t user_addr = 0;
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo_param bp;
-	int byte_align;
 	u32 domain, alloc_domain;
 	u64 alloc_flags;
-	uint32_t mapping_flags;
 	int ret;
 
 	/*
@@ -1135,25 +1169,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	if ((*mem)->aql_queue)
 		size = size >> 1;
 
-	/* Workaround for TLB bug on older VI chips */
-	byte_align = (adev->family == AMDGPU_FAMILY_VI &&
-			adev->asic_type != CHIP_FIJI &&
-			adev->asic_type != CHIP_POLARIS10 &&
-			adev->asic_type != CHIP_POLARIS11 &&
-			adev->asic_type != CHIP_POLARIS12 &&
-			adev->asic_type != CHIP_VEGAM) ?
-			VI_BO_SIZE_ALIGN : 1;
-
-	mapping_flags = AMDGPU_VM_PAGE_READABLE;
-	if (flags & ALLOC_MEM_FLAGS_WRITABLE)
-		mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
-	if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
-		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
-	if (flags & ALLOC_MEM_FLAGS_COHERENT)
-		mapping_flags |= AMDGPU_VM_MTYPE_UC;
-	else
-		mapping_flags |= AMDGPU_VM_MTYPE_NC;
-	(*mem)->mapping_flags = mapping_flags;
+	(*mem)->alloc_flags = flags;
 
 	amdgpu_sync_create(&(*mem)->sync);
 
@@ -1168,7 +1184,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
 	memset(&bp, 0, sizeof(bp));
 	bp.size = size;
-	bp.byte_align = byte_align;
+	bp.byte_align = 1;
 	bp.domain = alloc_domain;
 	bp.flags = alloc_flags;
 	bp.type = bo_type;
@@ -1195,7 +1211,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
 
 	if (user_addr) {
-		ret = init_user_pages(*mem, current->mm, user_addr);
+		ret = init_user_pages(*mem, user_addr);
 		if (ret)
 			goto allocate_init_user_pages_failed;
 	}
@@ -1626,9 +1642,10 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 
 	INIT_LIST_HEAD(&(*mem)->bo_va_list);
 	mutex_init(&(*mem)->lock);
-	(*mem)->mapping_flags =
-		AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
-		AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC;
+	(*mem)->alloc_flags =
+		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+		 ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT) |
+		ALLOC_MEM_FLAGS_WRITABLE | ALLOC_MEM_FLAGS_EXECUTABLE;
 
 	(*mem)->bo = amdgpu_bo_ref(bo);
 	(*mem)->va = va;
@@ -1657,10 +1674,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
 				struct mm_struct *mm)
 {
 	struct amdkfd_process_info *process_info = mem->process_info;
-	int invalid, evicted_bos;
+	int evicted_bos;
 	int r = 0;
 
-	invalid = atomic_inc_return(&mem->invalid);
+	atomic_inc(&mem->invalid);
 	evicted_bos = atomic_inc_return(&process_info->evicted_bos);
 	if (evicted_bos == 1) {
 		/* First eviction, stop the queues */
@@ -1739,6 +1756,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 			return ret;
 		}
 
+		/*
+		 * FIXME: Cannot ignore the return code, must hold
+		 * notifier_lock
+		 */
 		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 
 		/* Mark the BO as valid unless it was invalidated
@@ -1797,8 +1818,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	}
 
 	/* Reserve all BOs and page tables for validation */
-	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates,
-				     true);
+	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
 	WARN(!list_empty(&duplicates), "Duplicates should be empty");
 	if (ret)
 		goto out_free;
@@ -1996,7 +2016,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	}
 
 	ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
-				     false, &duplicate_save, true);
+				     false, &duplicate_save);
 	if (ret) {
 		pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
 		goto ttm_reserve_fail;
@@ -2028,7 +2048,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 			pr_debug("Memory eviction: Validate BOs failed. Try again\n");
 			goto validate_map_fail;
 		}
-		ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
+		ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false);
 		if (ret) {
 			pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
 			goto validate_map_fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 1c9d40f97a9b..fdd52d86a4d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -338,17 +338,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
 		path_size += le16_to_cpu(path->usSize);
 
 		if (device_support & le16_to_cpu(path->usDeviceTag)) {
-			uint8_t con_obj_id, con_obj_num, con_obj_type;
-
-			con_obj_id =
+			uint8_t con_obj_id =
 			    (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK)
 			    >> OBJECT_ID_SHIFT;
-			con_obj_num =
-			    (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK)
-			    >> ENUM_ID_SHIFT;
-			con_obj_type =
-			    (le16_to_cpu(path->usConnObjectId) &
-			     OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
 
 			/* Skip TV/CV support */
 			if ((le16_to_cpu(path->usDeviceTag) ==
@@ -373,15 +365,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
 			router.ddc_valid = false;
 			router.cd_valid = false;
 			for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) {
-				uint8_t grph_obj_id, grph_obj_num, grph_obj_type;
-
-				grph_obj_id =
-				    (le16_to_cpu(path->usGraphicObjIds[j]) &
-				     OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
-				grph_obj_num =
-				    (le16_to_cpu(path->usGraphicObjIds[j]) &
-				     ENUM_ID_MASK) >> ENUM_ID_SHIFT;
-				grph_obj_type =
+				uint8_t grph_obj_type =
 				    (le16_to_cpu(path->usGraphicObjIds[j]) &
 				     OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
 
@@ -2038,6 +2022,11 @@ int amdgpu_atombios_init(struct amdgpu_device *adev)
 	if (adev->is_atom_fw) {
 		amdgpu_atomfirmware_scratch_regs_init(adev);
 		amdgpu_atomfirmware_allocate_fb_scratch(adev);
+		ret = amdgpu_atomfirmware_get_mem_train_info(adev);
+		if (ret) {
+			DRM_ERROR("Failed to get mem train fb location.\n");
+			return ret;
+		}
 	} else {
 		amdgpu_atombios_scratch_regs_init(adev);
 		amdgpu_atombios_allocate_fb_scratch(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index daf687428cdb..58f9d8c3a17a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -27,6 +27,7 @@
 #include "amdgpu_atomfirmware.h"
 #include "atom.h"
 #include "atombios.h"
+#include "soc15_hw_ip.h"
 
 bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
 {
@@ -120,65 +121,14 @@ union vram_info {
 	struct atom_vram_info_header_v2_3 v23;
 	struct atom_vram_info_header_v2_4 v24;
 };
-/*
- * Return vram width from integrated system info table, if available,
- * or 0 if not.
- */
-int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev)
-{
-	struct amdgpu_mode_info *mode_info = &adev->mode_info;
-	int index;
-	u16 data_offset, size;
-	union igp_info *igp_info;
-	union vram_info *vram_info;
-	u32 mem_channel_number;
-	u32 mem_channel_width;
-	u8 frev, crev;
 
-	if (adev->flags & AMD_IS_APU)
-		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
-						    integratedsysteminfo);
-	else
-		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
-						    vram_info);
-
-	/* get any igp specific overrides */
-	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
-				   &frev, &crev, &data_offset)) {
-		if (adev->flags & AMD_IS_APU) {
-			igp_info = (union igp_info *)
-				(mode_info->atom_context->bios + data_offset);
-			switch (crev) {
-			case 11:
-				mem_channel_number = igp_info->v11.umachannelnumber;
-				/* channel width is 64 */
-				return mem_channel_number * 64;
-			default:
-				return 0;
-			}
-		} else {
-			vram_info = (union vram_info *)
-				(mode_info->atom_context->bios + data_offset);
-			switch (crev) {
-			case 3:
-				mem_channel_number = vram_info->v23.vram_module[0].channel_num;
-				mem_channel_width = vram_info->v23.vram_module[0].channel_width;
-				return mem_channel_number * (1 << mem_channel_width);
-			case 4:
-				mem_channel_number = vram_info->v24.vram_module[0].channel_num;
-				mem_channel_width = vram_info->v24.vram_module[0].channel_width;
-				return mem_channel_number * (1 << mem_channel_width);
-			default:
-				return 0;
-			}
-		}
-	}
-
-	return 0;
-}
+union vram_module {
+	struct atom_vram_module_v9 v9;
+	struct atom_vram_module_v10 v10;
+};
 
-static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
-					       int atom_mem_type)
+static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
+					      int atom_mem_type)
 {
 	int vram_type;
 
@@ -219,19 +169,25 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
 
 	return vram_type;
 }
-/*
- * Return vram type from either integrated system info table
- * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not
- */
-int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
+
+
+int
+amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+				  int *vram_width, int *vram_type,
+				  int *vram_vendor)
 {
 	struct amdgpu_mode_info *mode_info = &adev->mode_info;
-	int index;
+	int index, i = 0;
 	u16 data_offset, size;
 	union igp_info *igp_info;
 	union vram_info *vram_info;
+	union vram_module *vram_module;
 	u8 frev, crev;
 	u8 mem_type;
+	u8 mem_vendor;
+	u32 mem_channel_number;
+	u32 mem_channel_width;
+	u32 module_id;
 
 	if (adev->flags & AMD_IS_APU)
 		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
@@ -239,6 +195,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
 	else
 		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
 						    vram_info);
+
 	if (amdgpu_atom_parse_data_header(mode_info->atom_context,
 					  index, &size,
 					  &frev, &crev, &data_offset)) {
@@ -247,25 +204,67 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
 				(mode_info->atom_context->bios + data_offset);
 			switch (crev) {
 			case 11:
+				mem_channel_number = igp_info->v11.umachannelnumber;
+				/* channel width is 64 */
+				if (vram_width)
+					*vram_width = mem_channel_number * 64;
 				mem_type = igp_info->v11.memorytype;
-				return convert_atom_mem_type_to_vram_type(adev, mem_type);
+				if (vram_type)
+					*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+				break;
 			default:
-				return 0;
+				return -EINVAL;
 			}
 		} else {
 			vram_info = (union vram_info *)
 				(mode_info->atom_context->bios + data_offset);
+			module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
 			switch (crev) {
 			case 3:
-				mem_type = vram_info->v23.vram_module[0].memory_type;
-				return convert_atom_mem_type_to_vram_type(adev, mem_type);
+				if (module_id > vram_info->v23.vram_module_num)
+					module_id = 0;
+				vram_module = (union vram_module *)vram_info->v23.vram_module;
+				while (i < module_id) {
+					vram_module = (union vram_module *)
+						((u8 *)vram_module + vram_module->v9.vram_module_size);
+					i++;
+				}
+				mem_type = vram_module->v9.memory_type;
+				if (vram_type)
+					*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+				mem_channel_number = vram_module->v9.channel_num;
+				mem_channel_width = vram_module->v9.channel_width;
+				if (vram_width)
+					*vram_width = mem_channel_number * (1 << mem_channel_width);
+				mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+				if (vram_vendor)
+					*vram_vendor = mem_vendor;
+				break;
 			case 4:
-				mem_type = vram_info->v24.vram_module[0].memory_type;
-				return convert_atom_mem_type_to_vram_type(adev, mem_type);
+				if (module_id > vram_info->v24.vram_module_num)
+					module_id = 0;
+				vram_module = (union vram_module *)vram_info->v24.vram_module;
+				while (i < module_id) {
+					vram_module = (union vram_module *)
+						((u8 *)vram_module + vram_module->v10.vram_module_size);
+					i++;
+				}
+				mem_type = vram_module->v10.memory_type;
+				if (vram_type)
+					*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+				mem_channel_number = vram_module->v10.channel_num;
+				mem_channel_width = vram_module->v10.channel_width;
+				if (vram_width)
+					*vram_width = mem_channel_number * (1 << mem_channel_width);
+				mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
+				if (vram_vendor)
+					*vram_vendor = mem_vendor;
+				break;
 			default:
-				return 0;
+				return -EINVAL;
 			}
 		}
+
 	}
 
 	return 0;
@@ -464,3 +463,108 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
 	}
 	return -EINVAL;
 }
+
+/*
+ * Check if VBIOS supports GDDR6 training data save/restore
+ */
+static bool gddr6_mem_train_vbios_support(struct amdgpu_device *adev)
+{
+	uint16_t data_offset;
+	int index;
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+					    firmwareinfo);
+	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL,
+					  NULL, NULL, &data_offset)) {
+		struct atom_firmware_info_v3_1 *firmware_info =
+			(struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios +
+							   data_offset);
+
+		DRM_DEBUG("atom firmware capability:0x%08x.\n",
+			  le32_to_cpu(firmware_info->firmware_capability));
+
+		if (le32_to_cpu(firmware_info->firmware_capability) &
+		    ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING)
+			return true;
+	}
+
+	return false;
+}
+
+static int gddr6_mem_train_support(struct amdgpu_device *adev)
+{
+	int ret;
+	uint32_t major, minor, revision, hw_v;
+
+	if (gddr6_mem_train_vbios_support(adev)) {
+		amdgpu_discovery_get_ip_version(adev, MP0_HWID, &major, &minor, &revision);
+		hw_v = HW_REV(major, minor, revision);
+		/*
+		 * treat 0 revision as a special case since register for MP0 and MMHUB is missing
+		 * for some Navi10 A0, preventing driver from discovering the hwip information since
+		 * none of the functions will be initialized, it should not cause any problems
+		 */
+		switch (hw_v) {
+		case HW_REV(11, 0, 0):
+		case HW_REV(11, 0, 5):
+			ret = 1;
+			break;
+		default:
+			DRM_ERROR("memory training vbios supports but psp hw(%08x)"
+				  " doesn't support!\n", hw_v);
+			ret = -1;
+			break;
+		}
+	} else {
+		ret = 0;
+		hw_v = -1;
+	}
+
+
+	DRM_DEBUG("mp0 hw_v %08x, ret:%d.\n", hw_v, ret);
+	return ret;
+}
+
+int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev)
+{
+	struct atom_context *ctx = adev->mode_info.atom_context;
+	int index;
+	uint8_t frev, crev;
+	uint16_t data_offset, size;
+	int ret;
+
+	adev->fw_vram_usage.mem_train_support = false;
+
+	if (adev->asic_type != CHIP_NAVI10 &&
+	    adev->asic_type != CHIP_NAVI14)
+		return 0;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	ret = gddr6_mem_train_support(adev);
+	if (ret == -1)
+		return -EINVAL;
+	else if (ret == 0)
+		return 0;
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+					    vram_usagebyfirmware);
+	ret = amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev,
+					    &data_offset);
+	if (ret == 0) {
+		DRM_ERROR("parse data header failed.\n");
+		return -EINVAL;
+	}
+
+	DRM_DEBUG("atom firmware common table header size:0x%04x, frev:0x%02x,"
+		  " crev:0x%02x, data_offset:0x%04x.\n", size, frev, crev, data_offset);
+	/* only support 2.1+ */
+	if (((uint16_t)frev << 8 | crev) < 0x0201) {
+		DRM_ERROR("frev:0x%02x, crev:0x%02x < 2.1 !\n", frev, crev);
+		return -EINVAL;
+	}
+
+	adev->fw_vram_usage.mem_train_support = true;
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 5ec6f92f353c..434fe2fa0089 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -29,8 +29,9 @@
 bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev);
 void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
-int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
-int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+	int *vram_width, int *vram_type, int *vram_vendor);
+int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
 bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 649e68c4479b..d1495e1c9289 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
 {
 	unsigned long start_jiffies;
 	unsigned long end_jiffies;
-	struct dma_fence *fence = NULL;
+	struct dma_fence *fence;
 	int i, r;
 
 	start_jiffies = jiffies;
@@ -44,16 +44,14 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
 		if (r)
 			goto exit_do_move;
 		r = dma_fence_wait(fence, false);
+		dma_fence_put(fence);
 		if (r)
 			goto exit_do_move;
-		dma_fence_put(fence);
 	}
 	end_jiffies = jiffies;
 	r = jiffies_to_msecs(end_jiffies - start_jiffies);
 
 exit_do_move:
-	if (fence)
-		dma_fence_put(fence);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index ece55c8fa673..a62cbc8199de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -217,11 +217,10 @@ amdgpu_connector_update_scratch_regs(struct drm_connector *connector,
 	struct drm_encoder *encoder;
 	const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
 	bool connected;
-	int i;
 
 	best_encoder = connector_funcs->best_encoder(connector);
 
-	drm_connector_for_each_possible_encoder(connector, encoder, i) {
+	drm_connector_for_each_possible_encoder(connector, encoder) {
 		if ((encoder == best_encoder) && (status == connector_status_connected))
 			connected = true;
 		else
@@ -236,9 +235,8 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,
 			       int encoder_type)
 {
 	struct drm_encoder *encoder;
-	int i;
 
-	drm_connector_for_each_possible_encoder(connector, encoder, i) {
+	drm_connector_for_each_possible_encoder(connector, encoder) {
 		if (encoder->encoder_type == encoder_type)
 			return encoder;
 	}
@@ -347,10 +345,9 @@ static struct drm_encoder *
 amdgpu_connector_best_single_encoder(struct drm_connector *connector)
 {
 	struct drm_encoder *encoder;
-	int i;
 
 	/* pick the first one */
-	drm_connector_for_each_possible_encoder(connector, encoder, i)
+	drm_connector_for_each_possible_encoder(connector, encoder)
 		return encoder;
 
 	return NULL;
@@ -1022,8 +1019,12 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 			 */
 			if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) {
 				struct drm_connector *list_connector;
+				struct drm_connector_list_iter iter;
 				struct amdgpu_connector *list_amdgpu_connector;
-				list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) {
+
+				drm_connector_list_iter_begin(dev, &iter);
+				drm_for_each_connector_iter(list_connector,
+							    &iter) {
 					if (connector == list_connector)
 						continue;
 					list_amdgpu_connector = to_amdgpu_connector(list_connector);
@@ -1040,6 +1041,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 						}
 					}
 				}
+				drm_connector_list_iter_end(&iter);
 			}
 		}
 	}
@@ -1065,9 +1067,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 	/* find analog encoder */
 	if (amdgpu_connector->dac_load_detect) {
 		struct drm_encoder *encoder;
-		int i;
 
-		drm_connector_for_each_possible_encoder(connector, encoder, i) {
+		drm_connector_for_each_possible_encoder(connector, encoder) {
 			if (encoder->encoder_type != DRM_MODE_ENCODER_DAC &&
 			    encoder->encoder_type != DRM_MODE_ENCODER_TVDAC)
 				continue;
@@ -1117,9 +1118,8 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
 {
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 	struct drm_encoder *encoder;
-	int i;
 
-	drm_connector_for_each_possible_encoder(connector, encoder, i) {
+	drm_connector_for_each_possible_encoder(connector, encoder) {
 		if (amdgpu_connector->use_digital == true) {
 			if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)
 				return encoder;
@@ -1134,7 +1134,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
 
 	/* then check use digitial */
 	/* pick the first one */
-	drm_connector_for_each_possible_encoder(connector, encoder, i)
+	drm_connector_for_each_possible_encoder(connector, encoder)
 		return encoder;
 
 	return NULL;
@@ -1271,9 +1271,8 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn
 {
 	struct drm_encoder *encoder;
 	struct amdgpu_encoder *amdgpu_encoder;
-	int i;
 
-	drm_connector_for_each_possible_encoder(connector, encoder, i) {
+	drm_connector_for_each_possible_encoder(connector, encoder) {
 		amdgpu_encoder = to_amdgpu_encoder(encoder);
 
 		switch (amdgpu_encoder->encoder_id) {
@@ -1292,10 +1291,9 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector)
 {
 	struct drm_encoder *encoder;
 	struct amdgpu_encoder *amdgpu_encoder;
-	int i;
 	bool found = false;
 
-	drm_connector_for_each_possible_encoder(connector, encoder, i) {
+	drm_connector_for_each_possible_encoder(connector, encoder) {
 		amdgpu_encoder = to_amdgpu_encoder(encoder);
 		if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2)
 			found = true;
@@ -1501,6 +1499,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector;
 	struct amdgpu_connector_atom_dig *amdgpu_dig_connector;
 	struct drm_encoder *encoder;
@@ -1515,10 +1514,12 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 		return;
 
 	/* see if we already added it */
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		amdgpu_connector = to_amdgpu_connector(connector);
 		if (amdgpu_connector->connector_id == connector_id) {
 			amdgpu_connector->devices |= supported_device;
+			drm_connector_list_iter_end(&iter);
 			return;
 		}
 		if (amdgpu_connector->ddc_bus && i2c_bus->valid) {
@@ -1533,6 +1534,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 			}
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	/* check if it's a dp bridge */
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 82823d9a8ba8..a52a084158b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -35,6 +35,7 @@
 #include "amdgpu_trace.h"
 #include "amdgpu_gmc.h"
 #include "amdgpu_gem.h"
+#include "amdgpu_ras.h"
 
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 				      struct drm_amdgpu_cs_chunk_fence *data,
@@ -449,75 +450,12 @@ retry:
 	return r;
 }
 
-/* Last resort, try to evict something from the current working set */
-static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
-				struct amdgpu_bo *validated)
-{
-	uint32_t domain = validated->allowed_domains;
-	struct ttm_operation_ctx ctx = { true, false };
-	int r;
-
-	if (!p->evictable)
-		return false;
-
-	for (;&p->evictable->tv.head != &p->validated;
-	     p->evictable = list_prev_entry(p->evictable, tv.head)) {
-
-		struct amdgpu_bo_list_entry *candidate = p->evictable;
-		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
-		struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-		bool update_bytes_moved_vis;
-		uint32_t other;
-
-		/* If we reached our current BO we can forget it */
-		if (bo == validated)
-			break;
-
-		/* We can't move pinned BOs here */
-		if (bo->pin_count)
-			continue;
-
-		other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
-
-		/* Check if this BO is in one of the domains we need space for */
-		if (!(other & domain))
-			continue;
-
-		/* Check if we can move this BO somewhere else */
-		other = bo->allowed_domains & ~domain;
-		if (!other)
-			continue;
-
-		/* Good we can try to move this BO somewhere else */
-		update_bytes_moved_vis =
-				!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
-				amdgpu_bo_in_cpu_visible_vram(bo);
-		amdgpu_bo_placement_from_domain(bo, other);
-		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-		p->bytes_moved += ctx.bytes_moved;
-		if (update_bytes_moved_vis)
-			p->bytes_moved_vis += ctx.bytes_moved;
-
-		if (unlikely(r))
-			break;
-
-		p->evictable = list_prev_entry(p->evictable, tv.head);
-		list_move(&candidate->tv.head, &p->validated);
-
-		return true;
-	}
-
-	return false;
-}
-
 static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
 {
 	struct amdgpu_cs_parser *p = param;
 	int r;
 
-	do {
-		r = amdgpu_cs_bo_validate(p, bo);
-	} while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
+	r = amdgpu_cs_bo_validate(p, bo);
 	if (r)
 		return r;
 
@@ -554,9 +492,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 						     lobj->user_pages);
 		}
 
-		if (p->evictable == lobj)
-			p->evictable = NULL;
-
 		r = amdgpu_cs_validate(p, bo);
 		if (r)
 			return r;
@@ -603,8 +538,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		e->tv.num_shared = 2;
 
 	amdgpu_bo_list_get_list(p->bo_list, &p->validated);
-	if (p->bo_list->first_userptr != p->bo_list->num_entries)
-		p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
 
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
@@ -646,7 +579,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	}
 
 	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-				   &duplicates, false);
+				   &duplicates);
 	if (unlikely(r != 0)) {
 		if (r != -ERESTARTSYS)
 			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
@@ -657,9 +590,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 					  &p->bytes_moved_vis_threshold);
 	p->bytes_moved = 0;
 	p->bytes_moved_vis = 0;
-	p->evictable = list_last_entry(&p->validated,
-				       struct amdgpu_bo_list_entry,
-				       tv.head);
 
 	r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
 				      amdgpu_cs_validate, p);
@@ -865,29 +795,23 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_sync_fence(adev, &p->job->sync,
-			      fpriv->prt_va->last_pt_update, false);
+	r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
 	if (r)
 		return r;
 
 	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
-		struct dma_fence *f;
-
 		bo_va = fpriv->csa_va;
 		BUG_ON(!bo_va);
 		r = amdgpu_vm_bo_update(adev, bo_va, false);
 		if (r)
 			return r;
 
-		f = bo_va->last_pt_update;
-		r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+		r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
 		if (r)
 			return r;
 	}
 
 	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
-		struct dma_fence *f;
-
 		/* ignore duplicates */
 		bo = ttm_to_amdgpu_bo(e->tv.bo);
 		if (!bo)
@@ -901,8 +825,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 		if (r)
 			return r;
 
-		f = bo_va->last_pt_update;
-		r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+		r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
 		if (r)
 			return r;
 	}
@@ -911,11 +834,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_vm_update_directories(adev, vm);
+	r = amdgpu_vm_update_pdes(adev, vm, false);
 	if (r)
 		return r;
 
-	r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
+	r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
 	if (r)
 		return r;
 
@@ -986,6 +909,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		if (parser->entity && parser->entity != entity)
 			return -EINVAL;
 
+		/* Return if there is no run queue associated with this entity.
+		 * Possibly because of disabled HW IP*/
+		if (entity->rq == NULL)
+			return -EINVAL;
+
 		parser->entity = entity;
 
 		ring = to_amdgpu_ring(entity->rq->sched);
@@ -1057,7 +985,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
 			dma_fence_put(old);
 		}
 
-		r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
+		r = amdgpu_sync_fence(&p->job->sync, fence, true);
 		dma_fence_put(fence);
 		if (r)
 			return r;
@@ -1079,7 +1007,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
 		return r;
 	}
 
-	r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
+	r = amdgpu_sync_fence(&p->job->sync, fence, true);
 	dma_fence_put(fence);
 
 	return r;
@@ -1287,11 +1215,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_unlock;
 
-	/* No memory allocation is allowed while holding the mn lock.
-	 * p->mn is hold until amdgpu_cs_submit is finished and fence is added
-	 * to BOs.
+	/* No memory allocation is allowed while holding the notifier lock.
+	 * The lock is held until amdgpu_cs_submit is finished and fence is
+	 * added to BOs.
 	 */
-	amdgpu_mn_lock(p->mn);
+	mutex_lock(&p->adev->notifier_lock);
 
 	/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
 	 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
@@ -1306,7 +1234,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 		goto error_abort;
 	}
 
-	job->owner = p->filp;
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
 
 	amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
@@ -1334,13 +1261,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
 
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
-	amdgpu_mn_unlock(p->mn);
+	mutex_unlock(&p->adev->notifier_lock);
 
 	return 0;
 
 error_abort:
 	drm_sched_job_cleanup(&job->base);
-	amdgpu_mn_unlock(p->mn);
+	mutex_unlock(&p->adev->notifier_lock);
 
 error_unlock:
 	amdgpu_job_free(job);
@@ -1355,6 +1282,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	bool reserved_buffers = false;
 	int i, r;
 
+	if (amdgpu_ras_intr_triggered())
+		return -EHWPOISON;
+
 	if (!adev->accel_working)
 		return -EBUSY;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 35a8d3c96fc9..08047bc4d588 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -80,7 +80,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	list_add(&csa_tv.head, &list);
 	amdgpu_vm_get_pd_bo(vm, &list, &pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, false);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
 	if (r) {
 		DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 2cdaf3b2a721..64e2babbc36e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			   struct amdgpu_ctx *ctx)
 {
 	unsigned num_entities = amdgpu_ctx_total_num_entities();
-	unsigned i, j, k;
+	unsigned i, j;
 	int r;
 
 	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -121,72 +121,57 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
-		unsigned num_rings = 0;
-		unsigned num_rqs = 0;
+		struct drm_gpu_scheduler **scheds;
+		struct drm_gpu_scheduler *sched;
+		unsigned num_scheds = 0;
 
 		switch (i) {
 		case AMDGPU_HW_IP_GFX:
-			rings[0] = &adev->gfx.gfx_ring[0];
-			num_rings = 1;
+			sched = &adev->gfx.gfx_ring[0].sched;
+			scheds = &sched;
+			num_scheds = 1;
 			break;
 		case AMDGPU_HW_IP_COMPUTE:
-			for (j = 0; j < adev->gfx.num_compute_rings; ++j)
-				rings[j] = &adev->gfx.compute_ring[j];
-			num_rings = adev->gfx.num_compute_rings;
+			scheds = adev->gfx.compute_sched;
+			num_scheds = adev->gfx.num_compute_sched;
 			break;
 		case AMDGPU_HW_IP_DMA:
-			for (j = 0; j < adev->sdma.num_instances; ++j)
-				rings[j] = &adev->sdma.instance[j].ring;
-			num_rings = adev->sdma.num_instances;
+			scheds = adev->sdma.sdma_sched;
+			num_scheds = adev->sdma.num_sdma_sched;
 			break;
 		case AMDGPU_HW_IP_UVD:
-			rings[0] = &adev->uvd.inst[0].ring;
-			num_rings = 1;
+			sched = &adev->uvd.inst[0].ring.sched;
+			scheds = &sched;
+			num_scheds = 1;
 			break;
 		case AMDGPU_HW_IP_VCE:
-			rings[0] = &adev->vce.ring[0];
-			num_rings = 1;
+			sched = &adev->vce.ring[0].sched;
+			scheds = &sched;
+			num_scheds = 1;
 			break;
 		case AMDGPU_HW_IP_UVD_ENC:
-			rings[0] = &adev->uvd.inst[0].ring_enc[0];
-			num_rings = 1;
+			sched = &adev->uvd.inst[0].ring_enc[0].sched;
+			scheds = &sched;
+			num_scheds = 1;
 			break;
 		case AMDGPU_HW_IP_VCN_DEC:
-			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
-					continue;
-				rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
-			}
+			scheds = adev->vcn.vcn_dec_sched;
+			num_scheds =  adev->vcn.num_vcn_dec_sched;
 			break;
 		case AMDGPU_HW_IP_VCN_ENC:
-			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
-					continue;
-				for (k = 0; k < adev->vcn.num_enc_rings; ++k)
-					rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k];
-			}
+			scheds = adev->vcn.vcn_enc_sched;
+			num_scheds =  adev->vcn.num_vcn_enc_sched;
 			break;
 		case AMDGPU_HW_IP_VCN_JPEG:
-			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
-					continue;
-				rings[num_rings++] = &adev->vcn.inst[j].ring_jpeg;
-			}
+			scheds = adev->jpeg.jpeg_sched;
+			num_scheds =  adev->jpeg.num_jpeg_sched;
 			break;
 		}
 
-		for (j = 0; j < num_rings; ++j) {
-			if (!rings[j]->adev)
-				continue;
-
-			rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
-		}
-
 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
 			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  rqs, num_rqs, &ctx->guilty);
+						  priority, scheds,
+						  num_scheds, &ctx->guilty);
 		if (r)
 			goto error_cleanup_entities;
 	}
@@ -604,11 +589,8 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 			continue;
 		}
 
-		for (i = 0; i < num_entities; i++) {
-			mutex_lock(&ctx->adev->lock_reset);
+		for (i = 0; i < num_entities; i++)
 			drm_sched_entity_fini(&ctx->entities[0][i].entity);
-			mutex_unlock(&ctx->adev->lock_reset);
-		}
 	}
 }
 
@@ -630,3 +612,45 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 	idr_destroy(&mgr->ctx_handles);
 	mutex_destroy(&mgr->lock);
 }
+
+void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
+{
+	int i, j;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+		adev->gfx.gfx_sched[i] = &adev->gfx.gfx_ring[i].sched;
+		adev->gfx.num_gfx_sched++;
+	}
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched;
+		adev->gfx.num_compute_sched++;
+	}
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		adev->sdma.sdma_sched[i] = &adev->sdma.instance[i].ring.sched;
+		adev->sdma.num_sdma_sched++;
+	}
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		if (adev->vcn.harvest_config & (1 << i))
+			continue;
+		adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
+			&adev->vcn.inst[i].ring_dec.sched;
+	}
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		if (adev->vcn.harvest_config & (1 << i))
+			continue;
+		for (j = 0; j < adev->vcn.num_enc_rings; ++j)
+			adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
+				&adev->vcn.inst[i].ring_enc[j].sched;
+	}
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+		adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
+			&adev->jpeg.inst[i].ring_dec.sched;
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index da808633732b..4ad90a44dc3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -87,4 +87,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
+void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
+
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 5652cc72ed3a..f24ed9a1a3e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -26,6 +26,7 @@
 #include <linux/kthread.h>
 #include <linux/pci.h>
 #include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
 
 #include <drm/drm_debugfs.h>
 
@@ -129,7 +130,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 			sh_bank = 0xFFFFFFFF;
 		if (instance_bank == 0x3FF)
 			instance_bank = 0xFFFFFFFF;
-		use_bank = 1;
+		use_bank = true;
 	} else if (*pos & (1ULL << 61)) {
 
 		me = (*pos & GENMASK_ULL(33, 24)) >> 24;
@@ -137,17 +138,24 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 		queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
 		vmid = (*pos & GENMASK_ULL(58, 54)) >> 54;
 
-		use_ring = 1;
+		use_ring = true;
 	} else {
-		use_bank = use_ring = 0;
+		use_bank = use_ring = false;
 	}
 
 	*pos &= (1UL << 22) - 1;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	if (use_bank) {
 		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
-		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
+		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return -EINVAL;
+		}
 		mutex_lock(&adev->grbm_idx_mutex);
 		amdgpu_gfx_select_se_sh(adev, se_bank,
 					sh_bank, instance_bank);
@@ -193,6 +201,9 @@ end:
 	if (pm_pg_lock)
 		mutex_unlock(&adev->pm.mutex);
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -237,13 +248,20 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		value = RREG32_PCIE(*pos >> 2);
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		result += 4;
 		buf += 4;
@@ -251,6 +269,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -276,12 +297,19 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		r = get_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		WREG32_PCIE(*pos >> 2, value);
 
@@ -291,6 +319,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -316,13 +347,20 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		value = RREG32_DIDT(*pos >> 2);
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		result += 4;
 		buf += 4;
@@ -330,6 +368,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -355,12 +396,19 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		r = get_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		WREG32_DIDT(*pos >> 2, value);
 
@@ -370,6 +418,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -395,13 +446,20 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		value = RREG32_SMC(*pos);
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		result += 4;
 		buf += 4;
@@ -409,6 +467,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -434,12 +495,19 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		r = get_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		WREG32_SMC(*pos, value);
 
@@ -449,6 +517,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -572,7 +643,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 	idx = *pos >> 2;
 
 	valuesize = sizeof(values);
+
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -633,6 +713,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 	wave = (*pos & GENMASK_ULL(36, 31)) >> 31;
 	simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	/* switch to the specific se/sh/cu */
 	mutex_lock(&adev->grbm_idx_mutex);
 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -644,6 +728,9 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (!x)
 		return -EINVAL;
 
@@ -711,6 +798,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 	if (!data)
 		return -ENOMEM;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	/* switch to the specific se/sh/cu */
 	mutex_lock(&adev->grbm_idx_mutex);
 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -726,6 +817,9 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	while (size) {
 		uint32_t value;
 
@@ -859,6 +953,13 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
 	struct amdgpu_device *adev = dev->dev_private;
 	int r = 0, i;
 
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0)
+		return r;
+
+	/* Avoid accidently unparking the sched thread during GPU reset */
+	mutex_lock(&adev->lock_reset);
+
 	/* hold on the scheduler */
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
@@ -884,6 +985,11 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
 		kthread_unpark(ring->sched.thread);
 	}
 
+	mutex_unlock(&adev->lock_reset);
+
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
 	return 0;
 }
 
@@ -902,8 +1008,17 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0)
+		return r;
 
 	seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
+
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
 	return 0;
 }
 
@@ -912,8 +1027,17 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0)
+		return r;
 
 	seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
+
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
 	return 0;
 }
 
@@ -1036,6 +1160,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 	if (!fences)
 		return -ENOMEM;
 
+	/* Avoid accidently unparking the sched thread during GPU reset */
+	mutex_lock(&adev->lock_reset);
+
 	/* stop the scheduler */
 	kthread_park(ring->sched.thread);
 
@@ -1075,10 +1202,11 @@ failure:
 	/* restart the scheduler */
 	kthread_unpark(ring->sched.thread);
 
+	mutex_unlock(&adev->lock_reset);
+
 	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
 
-	if (fences)
-		kfree(fences);
+	kfree(fences);
 
 	return 0;
 }
@@ -1090,8 +1218,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
 {
 	adev->debugfs_preempt =
 		debugfs_create_file("amdgpu_preempt_ib", 0600,
-				    adev->ddev->primary->debugfs_root,
-				    (void *)adev, &fops_ib_preempt);
+				    adev->ddev->primary->debugfs_root, adev,
+				    &fops_ib_preempt);
 	if (!(adev->debugfs_preempt)) {
 		DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
 		return -EIO;
@@ -1103,8 +1231,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
 
 void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev)
 {
-	if (adev->debugfs_preempt)
-		debugfs_remove(adev->debugfs_preempt);
+	debugfs_remove(adev->debugfs_preempt);
 }
 
 #else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7a6c837c0a85..53d882000101 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -65,6 +65,9 @@
 #include "amdgpu_ras.h"
 #include "amdgpu_pmu.h"
 
+#include <linux/suspend.h>
+#include <drm/task_barrier.h>
+
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -78,7 +81,7 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
 
 #define AMDGPU_RESUME_MS		2000
 
-static const char *amdgpu_asic_name[] = {
+const char *amdgpu_asic_name[] = {
 	"TAHITI",
 	"PITCAIRN",
 	"VERDE",
@@ -135,14 +138,14 @@ static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 
 /**
- * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
+ * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
  *
  * @dev: drm_device pointer
  *
  * Returns true if the device is a dGPU with HG/PX power control,
  * otherwise return false.
  */
-bool amdgpu_device_is_px(struct drm_device *dev)
+bool amdgpu_device_supports_boco(struct drm_device *dev)
 {
 	struct amdgpu_device *adev = dev->dev_private;
 
@@ -151,6 +154,51 @@ bool amdgpu_device_is_px(struct drm_device *dev)
 	return false;
 }
 
+/**
+ * amdgpu_device_supports_baco - Does the device support BACO
+ *
+ * @dev: drm_device pointer
+ *
+ * Returns true if the device supporte BACO,
+ * otherwise return false.
+ */
+bool amdgpu_device_supports_baco(struct drm_device *dev)
+{
+	struct amdgpu_device *adev = dev->dev_private;
+
+	return amdgpu_asic_supports_baco(adev);
+}
+
+/**
+ * VRAM access helper functions.
+ *
+ * amdgpu_device_vram_access - read/write a buffer in vram
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ */
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+			       uint32_t *buf, size_t size, bool write)
+{
+	uint64_t last;
+	unsigned long flags;
+
+	last = size - 4;
+	for (last += pos; pos <= last; pos += 4) {
+		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+		WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
+		if (write)
+			WREG32_NO_KIQ(mmMM_DATA, *buf++);
+		else
+			*buf++ = RREG32_NO_KIQ(mmMM_DATA);
+		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+	}
+}
+
 /*
  * MMIO register access helper functions.
  */
@@ -984,8 +1032,6 @@ def_value:
  */
 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 {
-	int ret = 0;
-
 	if (amdgpu_sched_jobs < 4) {
 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
 			 amdgpu_sched_jobs);
@@ -1023,15 +1069,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 
 	amdgpu_device_check_block_size(adev);
 
-	ret = amdgpu_device_get_job_timeout_settings(adev);
-	if (ret) {
-		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
-		return ret;
-	}
-
 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -1046,8 +1086,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
+	int r;
 
-	if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
+	if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
 		return;
 
 	if (state == VGA_SWITCHEROO_ON) {
@@ -1055,7 +1096,12 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
 		/* don't suspend or resume card normally */
 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 
-		amdgpu_device_resume(dev, true, true);
+		pci_set_power_state(dev->pdev, PCI_D0);
+		pci_restore_state(dev->pdev);
+		r = pci_enable_device(dev->pdev);
+		if (r)
+			DRM_WARN("pci_enable_device failed (%d)\n", r);
+		amdgpu_device_resume(dev, true);
 
 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
 		drm_kms_helper_poll_enable(dev);
@@ -1063,7 +1109,11 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
 		pr_info("amdgpu: switched off\n");
 		drm_kms_helper_poll_disable(dev);
 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
-		amdgpu_device_suspend(dev, true, true);
+		amdgpu_device_suspend(dev, true);
+		pci_save_state(dev->pdev);
+		/* Shut down the device */
+		pci_disable_device(dev->pdev);
+		pci_set_power_state(dev->pdev, PCI_D3cold);
 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
 	}
 }
@@ -1469,6 +1519,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 
+		if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
+			goto parse_soc_bounding_box;
+
 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
@@ -1496,14 +1549,18 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 			adev->gfx.config.num_packer_per_sc =
 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
 		}
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
+
+parse_soc_bounding_box:
+		/*
+		 * soc bounding box info is not integrated in disocovery table,
+		 * we always need to parse it from gpu info firmware.
+		 */
 		if (hdr->version_minor == 2) {
 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
 		}
-#endif
 		break;
 	}
 	default:
@@ -1613,6 +1670,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	if (r)
 		return r;
 
+	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
+		amdgpu_discovery_get_gfx_info(adev);
+
 	amdgpu_amdkfd_device_probe(adev);
 
 	if (amdgpu_sriov_vf(adev)) {
@@ -1622,7 +1682,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	}
 
 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
-	if (amdgpu_sriov_vf(adev))
+	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1749,7 +1809,8 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
 		}
 	}
 
-	r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
+	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
+		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
 
 	return r;
 }
@@ -1816,6 +1877,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		}
 	}
 
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_init_data_exchange(adev);
+
 	r = amdgpu_ib_pool_init(adev);
 	if (r) {
 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
@@ -1839,16 +1903,26 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 	if (r)
 		goto init_failed;
 
+	/*
+	 * retired pages will be loaded from eeprom and reserved here,
+	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
+	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
+	 * for I2C communication which only true at this point.
+	 * recovery_init may fail, but it can free all resources allocated by
+	 * itself and its failure should not stop amdgpu init process.
+	 *
+	 * Note: theoretically, this should be called before all vram allocations
+	 * to protect retired page from abusing
+	 */
+	amdgpu_ras_recovery_init(adev);
+
 	if (adev->gmc.xgmi.num_physical_nodes > 1)
 		amdgpu_xgmi_add_device(adev);
 	amdgpu_amdkfd_device_init(adev);
 
 init_failed:
-	if (amdgpu_sriov_vf(adev)) {
-		if (!r)
-			amdgpu_virt_init_data_exchange(adev);
+	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_release_full_gpu(adev, true);
-	}
 
 	return r;
 }
@@ -1887,6 +1961,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
  *
  * @adev: amdgpu_device pointer
+ * @state: clockgating state (gate or ungate)
  *
  * The list of all the hardware IPs that make up the asic is walked and the
  * set_clockgating_state callbacks are run.
@@ -1911,6 +1986,7 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			/* enable clockgating to save power */
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1941,6 +2017,7 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
 			/* enable powergating to save power */
 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
@@ -2006,6 +2083,7 @@ out:
  */
 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 {
+	struct amdgpu_gpu_instance *gpu_instance;
 	int i = 0, r;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -2031,8 +2109,39 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 	if (r)
 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
 
-	/* set to low pstate by default */
-	amdgpu_xgmi_set_pstate(adev, 0);
+
+	if (adev->gmc.xgmi.num_physical_nodes > 1) {
+		mutex_lock(&mgpu_info.mutex);
+
+		/*
+		 * Reset device p-state to low as this was booted with high.
+		 *
+		 * This should be performed only after all devices from the same
+		 * hive get initialized.
+		 *
+		 * However, it's unknown how many device in the hive in advance.
+		 * As this is counted one by one during devices initializations.
+		 *
+		 * So, we wait for all XGMI interlinked devices initialized.
+		 * This may bring some delays as those devices may come from
+		 * different hives. But that should be OK.
+		 */
+		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
+			for (i = 0; i < mgpu_info.num_gpu; i++) {
+				gpu_instance = &(mgpu_info.gpu_ins[i]);
+				if (gpu_instance->adev->flags & AMD_IS_APU)
+					continue;
+
+				r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+				if (r) {
+					DRM_ERROR("pstate setting failed (%d).\n", r);
+					break;
+				}
+			}
+		}
+
+		mutex_unlock(&mgpu_info.mutex);
+	}
 
 	return 0;
 }
@@ -2220,6 +2329,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
 		/* displays are handled in phase1 */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
 			continue;
+		/* PSP lost connection when err_event_athub occurs */
+		if (amdgpu_ras_intr_triggered() &&
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
+			adev->ip_blocks[i].status.hw = false;
+			continue;
+		}
 		/* XXX handle errors */
 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
 		/* XXX handle errors */
@@ -2230,18 +2345,11 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
 		adev->ip_blocks[i].status.hw = false;
 		/* handle putting the SMC in the appropriate state */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
-			if (is_support_sw_smu(adev)) {
-				/* todo */
-			} else if (adev->powerplay.pp_funcs &&
-					   adev->powerplay.pp_funcs->set_mp1_state) {
-				r = adev->powerplay.pp_funcs->set_mp1_state(
-					adev->powerplay.pp_handle,
-					adev->mp1_state);
-				if (r) {
-					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
-						  adev->mp1_state, r);
-					return r;
-				}
+			r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
+			if (r) {
+				DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
+					  adev->mp1_state, r);
+				return r;
 			}
 		}
 
@@ -2324,7 +2432,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
 		AMD_IP_BLOCK_TYPE_GFX,
 		AMD_IP_BLOCK_TYPE_SDMA,
 		AMD_IP_BLOCK_TYPE_UVD,
-		AMD_IP_BLOCK_TYPE_VCE
+		AMD_IP_BLOCK_TYPE_VCE,
+		AMD_IP_BLOCK_TYPE_VCN
 	};
 
 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
@@ -2339,7 +2448,11 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
 				block->status.hw)
 				continue;
 
-			r = block->version->funcs->hw_init(adev);
+			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
+				r = block->version->funcs->resume(adev);
+			else
+				r = block->version->funcs->hw_init(adev);
+
 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
 			if (r)
 				return r;
@@ -2511,20 +2624,19 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	case CHIP_RAVEN:
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
 	case CHIP_NAVI12:
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	case CHIP_RENOIR:
 #endif
 		return amdgpu_dc != 0;
 #endif
 	default:
+		if (amdgpu_dc > 0)
+			DRM_INFO("Display Core has been requested via kernel parameter "
+					 "but isn't supported by ASIC, ignoring\n");
 		return false;
 	}
 }
@@ -2549,13 +2661,110 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
 {
 	struct amdgpu_device *adev =
 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
+	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+
+	/* It's a bug to not have a hive within this function */
+	if (WARN_ON(!hive))
+		return;
+
+	/*
+	 * Use task barrier to synchronize all xgmi reset works across the
+	 * hive. task_barrier_enter and task_barrier_exit will block
+	 * until all the threads running the xgmi reset works reach
+	 * those points. task_barrier_full will do both blocks.
+	 */
+	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+
+		task_barrier_enter(&hive->tb);
+		adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
+
+		if (adev->asic_reset_res)
+			goto fail;
+
+		task_barrier_exit(&hive->tb);
+		adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
 
-	adev->asic_reset_res =  amdgpu_asic_reset(adev);
+		if (adev->asic_reset_res)
+			goto fail;
+	} else {
+
+		task_barrier_full(&hive->tb);
+		adev->asic_reset_res =  amdgpu_asic_reset(adev);
+	}
+
+fail:
 	if (adev->asic_reset_res)
 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
 			 adev->asic_reset_res, adev->ddev->unique);
 }
 
+static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
+{
+	char *input = amdgpu_lockup_timeout;
+	char *timeout_setting = NULL;
+	int index = 0;
+	long timeout;
+	int ret = 0;
+
+	/*
+	 * By default timeout for non compute jobs is 10000.
+	 * And there is no timeout enforced on compute jobs.
+	 * In SR-IOV or passthrough mode, timeout for compute
+	 * jobs are 10000 by default.
+	 */
+	adev->gfx_timeout = msecs_to_jiffies(10000);
+	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+	if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
+		adev->compute_timeout = adev->gfx_timeout;
+	else
+		adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
+
+	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+		while ((timeout_setting = strsep(&input, ",")) &&
+				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+			ret = kstrtol(timeout_setting, 0, &timeout);
+			if (ret)
+				return ret;
+
+			if (timeout == 0) {
+				index++;
+				continue;
+			} else if (timeout < 0) {
+				timeout = MAX_SCHEDULE_TIMEOUT;
+			} else {
+				timeout = msecs_to_jiffies(timeout);
+			}
+
+			switch (index++) {
+			case 0:
+				adev->gfx_timeout = timeout;
+				break;
+			case 1:
+				adev->compute_timeout = timeout;
+				break;
+			case 2:
+				adev->sdma_timeout = timeout;
+				break;
+			case 3:
+				adev->video_timeout = timeout;
+				break;
+			default:
+				break;
+			}
+		}
+		/*
+		 * There is only one value specified and
+		 * it should apply to all non-compute jobs.
+		 */
+		if (index == 1) {
+			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
+				adev->compute_timeout = adev->gfx_timeout;
+		}
+	}
+
+	return ret;
+}
 
 /**
  * amdgpu_device_init - initialize the driver
@@ -2575,7 +2784,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		       uint32_t flags)
 {
 	int r, i;
-	bool runtime = false;
+	bool boco = false;
 	u32 max_MBps;
 
 	adev->shutdown = false;
@@ -2583,7 +2792,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->ddev = ddev;
 	adev->pdev = pdev;
 	adev->flags = flags;
-	adev->asic_type = flags & AMD_ASIC_MASK;
+
+	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
+		adev->asic_type = amdgpu_force_asic_type;
+	else
+		adev->asic_type = flags & AMD_ASIC_MASK;
+
 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
 	if (amdgpu_emu_mode == 1)
 		adev->usec_timeout *= 2;
@@ -2593,7 +2807,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->mman.buffer_funcs = NULL;
 	adev->mman.buffer_funcs_ring = NULL;
 	adev->vm_manager.vm_pte_funcs = NULL;
-	adev->vm_manager.vm_pte_num_rqs = 0;
+	adev->vm_manager.vm_pte_num_scheds = 0;
 	adev->gmc.gmc_funcs = NULL;
 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
@@ -2633,8 +2847,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	mutex_init(&adev->virt.vf_errors.lock);
 	hash_init(adev->mn_hash);
 	mutex_init(&adev->lock_reset);
-	mutex_init(&adev->virt.dpm_mutex);
 	mutex_init(&adev->psp.mutex);
+	mutex_init(&adev->notifier_lock);
 
 	r = amdgpu_device_check_arguments(adev);
 	if (r)
@@ -2726,6 +2940,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (r)
 		return r;
 
+	r = amdgpu_device_get_job_timeout_settings(adev);
+	if (r) {
+		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+		return r;
+	}
+
 	/* doorbell bar mapping and doorbell index init*/
 	amdgpu_device_doorbell_init(adev);
 
@@ -2734,12 +2954,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	 * ignore it */
 	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
 
-	if (amdgpu_device_is_px(ddev))
-		runtime = true;
-	if (!pci_is_thunderbolt_attached(adev->pdev))
+	if (amdgpu_device_supports_boco(ddev))
+		boco = true;
+	if (amdgpu_has_atpx() &&
+	    (amdgpu_is_atpx_hybrid() ||
+	     amdgpu_has_atpx_dgpu_power_cntl()) &&
+	    !pci_is_thunderbolt_attached(adev->pdev))
 		vga_switcheroo_register_client(adev->pdev,
-					       &amdgpu_switcheroo_ops, runtime);
-	if (runtime)
+					       &amdgpu_switcheroo_ops, boco);
+	if (boco)
 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
 
 	if (amdgpu_emu_mode == 1) {
@@ -2826,11 +3049,17 @@ fence_driver_init:
 		}
 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
-		if (amdgpu_virt_request_full_gpu(adev, false))
-			amdgpu_virt_release_full_gpu(adev, false);
 		goto failed;
 	}
 
+	DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
+			adev->gfx.config.max_shader_engines,
+			adev->gfx.config.max_sh_per_se,
+			adev->gfx.config.max_cu_per_sh,
+			adev->gfx.cu_info.number);
+
+	amdgpu_ctx_init_sched(adev);
+
 	adev->accel_working = true;
 
 	amdgpu_vm_check_compute_bug(adev);
@@ -2845,16 +3074,19 @@ fence_driver_init:
 
 	amdgpu_fbdev_init(adev);
 
-	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
-		amdgpu_pm_virt_sysfs_init(adev);
-
 	r = amdgpu_pm_sysfs_init(adev);
-	if (r)
+	if (r) {
+		adev->pm_sysfs_en = false;
 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
+	} else
+		adev->pm_sysfs_en = true;
 
 	r = amdgpu_ucode_sysfs_init(adev);
-	if (r)
+	if (r) {
+		adev->ucode_sysfs_en = false;
 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+	} else
+		adev->ucode_sysfs_en = true;
 
 	r = amdgpu_debugfs_gem_init(adev);
 	if (r)
@@ -2923,7 +3155,7 @@ fence_driver_init:
 
 failed:
 	amdgpu_vf_error_trans_all(adev);
-	if (runtime)
+	if (boco)
 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
 
 	return r;
@@ -2942,7 +3174,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	int r;
 
 	DRM_INFO("amdgpu: finishing device.\n");
+	flush_delayed_work(&adev->delayed_init_work);
 	adev->shutdown = true;
+
 	/* disable all interrupts */
 	amdgpu_irq_disable_all(adev);
 	if (adev->mode_info.mode_config_initialized){
@@ -2952,7 +3186,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 			drm_atomic_helper_shutdown(adev->ddev);
 	}
 	amdgpu_fence_driver_fini(adev);
-	amdgpu_pm_sysfs_fini(adev);
+	if (adev->pm_sysfs_en)
+		amdgpu_pm_sysfs_fini(adev);
 	amdgpu_fbdev_fini(adev);
 	r = amdgpu_device_ip_fini(adev);
 	if (adev->firmware.gpu_info_fw) {
@@ -2960,7 +3195,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 		adev->firmware.gpu_info_fw = NULL;
 	}
 	adev->accel_working = false;
-	cancel_delayed_work_sync(&adev->delayed_init_work);
 	/* free i2c buses */
 	if (!amdgpu_device_has_dc_support(adev))
 		amdgpu_i2c_fini(adev);
@@ -2970,9 +3204,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 
 	kfree(adev->bios);
 	adev->bios = NULL;
-	if (!pci_is_thunderbolt_attached(adev->pdev))
+	if (amdgpu_has_atpx() &&
+	    (amdgpu_is_atpx_hybrid() ||
+	     amdgpu_has_atpx_dgpu_power_cntl()) &&
+	    !pci_is_thunderbolt_attached(adev->pdev))
 		vga_switcheroo_unregister_client(adev->pdev);
-	if (adev->flags & AMD_IS_PX)
+	if (amdgpu_device_supports_boco(adev->ddev))
 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
 	vga_client_register(adev->pdev, NULL, NULL, NULL);
 	if (adev->rio_mem)
@@ -2981,12 +3218,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	iounmap(adev->rmmio);
 	adev->rmmio = NULL;
 	amdgpu_device_doorbell_fini(adev);
-	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
-		amdgpu_pm_virt_sysfs_fini(adev);
 
 	amdgpu_debugfs_regs_cleanup(adev);
 	device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
-	amdgpu_ucode_sysfs_fini(adev);
+	if (adev->ucode_sysfs_en)
+		amdgpu_ucode_sysfs_fini(adev);
 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
 		amdgpu_pmu_fini(adev);
 	amdgpu_debugfs_preempt_cleanup(adev);
@@ -3009,11 +3245,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
  * Returns 0 for success or an error on failure.
  * Called at driver suspend.
  */
-int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
+int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 {
 	struct amdgpu_device *adev;
 	struct drm_crtc *crtc;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	int r;
 
 	if (dev == NULL || dev->dev_private == NULL) {
@@ -3036,9 +3273,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 	if (!amdgpu_device_has_dc_support(adev)) {
 		/* turn off display hw */
 		drm_modeset_lock_all(dev);
-		list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-		}
+		drm_connector_list_iter_begin(dev, &iter);
+		drm_for_each_connector_iter(connector, &iter)
+			drm_helper_connector_dpms(connector,
+						  DRM_MODE_DPMS_OFF);
+		drm_connector_list_iter_end(&iter);
 		drm_modeset_unlock_all(dev);
 			/* unpin the front buffers and cursors */
 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -3089,17 +3328,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 	 */
 	amdgpu_bo_evict_vram(adev);
 
-	pci_save_state(dev->pdev);
-	if (suspend) {
-		/* Shut down the device */
-		pci_disable_device(dev->pdev);
-		pci_set_power_state(dev->pdev, PCI_D3hot);
-	} else {
-		r = amdgpu_asic_reset(adev);
-		if (r)
-			DRM_ERROR("amdgpu asic reset failed\n");
-	}
-
 	return 0;
 }
 
@@ -3114,9 +3342,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
  * Returns 0 for success or an error on failure.
  * Called at driver resume.
  */
-int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
+int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
 {
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_crtc *crtc;
 	int r = 0;
@@ -3124,14 +3353,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
 
-	if (resume) {
-		pci_set_power_state(dev->pdev, PCI_D0);
-		pci_restore_state(dev->pdev);
-		r = pci_enable_device(dev->pdev);
-		if (r)
-			return r;
-	}
-
 	/* post card */
 	if (amdgpu_device_need_post(adev)) {
 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -3187,9 +3408,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 
 			/* turn on display hw */
 			drm_modeset_lock_all(dev);
-			list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-			}
+
+			drm_connector_list_iter_begin(dev, &iter);
+			drm_for_each_connector_iter(connector, &iter)
+				drm_helper_connector_dpms(connector,
+							  DRM_MODE_DPMS_ON);
+			drm_connector_list_iter_end(&iter);
+
 			drm_modeset_unlock_all(dev);
 		}
 		amdgpu_fbdev_set_suspend(adev, 0);
@@ -3466,13 +3691,12 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
 	if (r)
 		return r;
 
-	amdgpu_amdkfd_pre_reset(adev);
-
 	/* Resume IP prior to SMC */
 	r = amdgpu_device_ip_reinit_early_sriov(adev);
 	if (r)
 		goto error;
 
+	amdgpu_virt_init_data_exchange(adev);
 	/* we need recover gart prior to run SMC/CP/SDMA resume */
 	amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
 
@@ -3490,7 +3714,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
 	amdgpu_amdkfd_post_reset(adev);
 
 error:
-	amdgpu_virt_init_data_exchange(adev);
 	amdgpu_virt_release_full_gpu(adev, true);
 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
 		amdgpu_inc_vram_lost(adev);
@@ -3536,6 +3759,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
 		case CHIP_VEGA10:
 		case CHIP_VEGA12:
 		case CHIP_RAVEN:
+		case CHIP_ARCTURUS:
 			break;
 		default:
 			goto disabled;
@@ -3612,7 +3836,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 			/* For XGMI run all resets in parallel to speed up the process */
 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
-				if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
+				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
 					r = -EALREADY;
 			} else
 				r = amdgpu_asic_reset(tmp_adev);
@@ -3624,7 +3848,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 			}
 		}
 
-		/* For XGMI wait for all PSP resets to complete before proceed */
+		/* For XGMI wait for all resets to complete before proceed */
 		if (!r) {
 			list_for_each_entry(tmp_adev, device_list_handle,
 					    gmc.xgmi.head) {
@@ -3635,14 +3859,11 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 						break;
 				}
 			}
-
-			list_for_each_entry(tmp_adev, device_list_handle,
-					gmc.xgmi.head) {
-				amdgpu_ras_reserve_bad_pages(tmp_adev);
-			}
 		}
 	}
 
+	if (!r && amdgpu_ras_intr_triggered())
+		amdgpu_ras_intr_cleared();
 
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 		if (need_full_reset) {
@@ -3731,7 +3952,7 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
 		mutex_lock(&adev->lock_reset);
 
 	atomic_inc(&adev->gpu_reset_counter);
-	adev->in_gpu_reset = 1;
+	adev->in_gpu_reset = true;
 	switch (amdgpu_asic_reset_method(adev)) {
 	case AMD_RESET_METHOD_MODE1:
 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
@@ -3743,25 +3964,18 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
 		adev->mp1_state = PP_MP1_STATE_NONE;
 		break;
 	}
-	/* Block kfd: SRIOV would do it separately */
-	if (!amdgpu_sriov_vf(adev))
-                amdgpu_amdkfd_pre_reset(adev);
 
 	return true;
 }
 
 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
 {
-	/*unlock kfd: SRIOV would do it separately */
-	if (!amdgpu_sriov_vf(adev))
-                amdgpu_amdkfd_post_reset(adev);
 	amdgpu_vf_error_trans_all(adev);
 	adev->mp1_state = PP_MP1_STATE_NONE;
-	adev->in_gpu_reset = 0;
+	adev->in_gpu_reset = false;
 	mutex_unlock(&adev->lock_reset);
 }
 
-
 /**
  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
  *
@@ -3781,11 +3995,28 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	struct amdgpu_hive_info *hive = NULL;
 	struct amdgpu_device *tmp_adev = NULL;
 	int i, r = 0;
+	bool in_ras_intr = amdgpu_ras_intr_triggered();
+	bool use_baco =
+		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
+		true : false;
+
+	/*
+	 * Flush RAM to disk so that after reboot
+	 * the user can read log and see why the system rebooted.
+	 */
+	if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
+
+		DRM_WARN("Emergency reboot.");
+
+		ksys_sync_helper();
+		emergency_restart();
+	}
 
 	need_full_reset = job_signaled = false;
 	INIT_LIST_HEAD(&device_list);
 
-	dev_info(adev->dev, "GPU reset begin!\n");
+	dev_info(adev->dev, "GPU %s begin!\n",
+		(in_ras_intr && !use_baco) ? "jobs stop":"reset");
 
 	cancel_delayed_work_sync(&adev->delayed_init_work);
 
@@ -3812,9 +4043,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		return 0;
 	}
 
+	/* Block kfd: SRIOV would do it separately */
+	if (!amdgpu_sriov_vf(adev))
+                amdgpu_amdkfd_pre_reset(adev);
+
 	/* Build list of devices to reset */
 	if  (adev->gmc.xgmi.num_physical_nodes > 1) {
 		if (!hive) {
+			/*unlock kfd: SRIOV would do it separately */
+			if (!amdgpu_sriov_vf(adev))
+		                amdgpu_amdkfd_post_reset(adev);
 			amdgpu_device_unlock_adev(adev);
 			return -ENODEV;
 		}
@@ -3830,17 +4068,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		device_list_handle = &device_list;
 	}
 
-	/*
-	 * Mark these ASICs to be reseted as untracked first
-	 * And add them back after reset completed
-	 */
-	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
-		amdgpu_unregister_gpu_instance(tmp_adev);
-
 	/* block all schedulers and reset given job's ring */
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+		if (tmp_adev != adev) {
+			amdgpu_device_lock_adev(tmp_adev, false);
+			if (!amdgpu_sriov_vf(tmp_adev))
+			                amdgpu_amdkfd_pre_reset(tmp_adev);
+		}
+
+		/*
+		 * Mark these ASICs to be reseted as untracked first
+		 * And add them back after reset completed
+		 */
+		amdgpu_unregister_gpu_instance(tmp_adev);
+
 		/* disable ras on ALL IPs */
-		if (amdgpu_device_ip_need_full_reset(tmp_adev))
+		if (!(in_ras_intr && !use_baco) &&
+		      amdgpu_device_ip_need_full_reset(tmp_adev))
 			amdgpu_ras_suspend(tmp_adev);
 
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -3850,10 +4094,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 				continue;
 
 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
+
+			if (in_ras_intr && !use_baco)
+				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
 		}
 	}
 
 
+	if (in_ras_intr && !use_baco)
+		goto skip_sched_resume;
+
 	/*
 	 * Must check guilty signal here since after this point all old
 	 * HW fences are force signaled.
@@ -3864,9 +4114,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	    dma_fence_is_signaled(job->base.s_fence->parent))
 		job_signaled = true;
 
-	if (!amdgpu_device_ip_need_full_reset(adev))
-		device_list_handle = &device_list;
-
 	if (job_signaled) {
 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
 		goto skip_hw_reset;
@@ -3888,7 +4135,6 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		if (tmp_adev == adev)
 			continue;
 
-		amdgpu_device_lock_adev(tmp_adev, false);
 		r = amdgpu_device_pre_asic_reset(tmp_adev,
 						 NULL,
 						 &need_full_reset);
@@ -3916,6 +4162,7 @@ skip_hw_reset:
 
 	/* Post ASIC reset for all devs .*/
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = tmp_adev->rings[i];
 
@@ -3937,12 +4184,18 @@ skip_hw_reset:
 
 		if (r) {
 			/* bad news, how to tell it to userspace ? */
-			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
+			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
 		} else {
-			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
+			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
 		}
+	}
 
+skip_sched_resume:
+	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+		/*unlock kfd: SRIOV would do it separately */
+		if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
+	                amdgpu_amdkfd_post_reset(tmp_adev);
 		amdgpu_device_unlock_adev(tmp_adev);
 	}
 
@@ -4090,3 +4343,35 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 	}
 }
 
+int amdgpu_device_baco_enter(struct drm_device *dev)
+{
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	if (!amdgpu_device_supports_baco(adev->ddev))
+		return -ENOTSUPP;
+
+	if (ras && ras->supported)
+		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
+
+	return amdgpu_dpm_baco_enter(adev);
+}
+
+int amdgpu_device_baco_exit(struct drm_device *dev)
+{
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	int ret = 0;
+
+	if (!amdgpu_device_supports_baco(adev->ddev))
+		return -ENOTSUPP;
+
+	ret = amdgpu_dpm_baco_exit(adev);
+	if (ret)
+		return ret;
+
+	if (ras && ras->supported)
+		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
new file mode 100644
index 000000000000..61a26c15c8dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DF_H__
+#define __AMDGPU_DF_H__
+
+struct amdgpu_df_hash_status {
+	bool hash_64k;
+	bool hash_2m;
+	bool hash_1g;
+};
+
+struct amdgpu_df_funcs {
+	void (*sw_init)(struct amdgpu_device *adev);
+	void (*sw_fini)(struct amdgpu_device *adev);
+	void (*enable_broadcast_mode)(struct amdgpu_device *adev,
+				      bool enable);
+	u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
+	u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
+	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+						 bool enable);
+	void (*get_clockgating_state)(struct amdgpu_device *adev,
+				      u32 *flags);
+	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
+					    bool enable);
+	int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
+					 int is_enable);
+	int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
+					 int is_disable);
+	void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
+					 uint64_t *count);
+	uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
+	void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
+			 uint32_t ficadl_val, uint32_t ficadh_val);
+};
+
+struct amdgpu_df {
+	struct amdgpu_df_hash_status	hash_status;
+	const struct amdgpu_df_funcs	*funcs;
+};
+
+#endif /* __AMDGPU_DF_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 1481899f86c1..f95092741c38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -134,20 +134,10 @@ static int hw_id_map[MAX_HWIP] = {
 
 static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
 {
-	uint32_t *p = (uint32_t *)binary;
 	uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
-	uint64_t pos = vram_size - BINARY_MAX_SIZE;
-	unsigned long flags;
-
-	while (pos < vram_size) {
-		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
-		WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
-		*p++ = RREG32_NO_KIQ(mmMM_DATA);
-		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-		pos += 4;
-	}
+	uint64_t pos = vram_size - DISCOVERY_TMR_SIZE;
 
+	amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, DISCOVERY_TMR_SIZE, false);
 	return 0;
 }
 
@@ -179,7 +169,7 @@ int amdgpu_discovery_init(struct amdgpu_device *adev)
 	uint16_t checksum;
 	int r;
 
-	adev->discovery = kzalloc(BINARY_MAX_SIZE, GFP_KERNEL);
+	adev->discovery = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL);
 	if (!adev->discovery)
 		return -ENOMEM;
 
@@ -333,7 +323,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
 }
 
 int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
-				    int *major, int *minor)
+				    int *major, int *minor, int *revision)
 {
 	struct binary_header *bhdr;
 	struct ip_discovery_header *ihdr;
@@ -369,6 +359,8 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
 					*major = ip->major;
 				if (minor)
 					*minor = ip->minor;
+				if (revision)
+					*revision = ip->revision;
 				return 0;
 			}
 			ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 85b8c4d4d576..ba78e15d9b05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -24,11 +24,13 @@
 #ifndef __AMDGPU_DISCOVERY__
 #define __AMDGPU_DISCOVERY__
 
+#define DISCOVERY_TMR_SIZE  (64 << 10)
+
 int amdgpu_discovery_init(struct amdgpu_device *adev);
 void amdgpu_discovery_fini(struct amdgpu_device *adev);
 int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
 int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
-                                    int *major, int *minor);
+                                    int *major, int *minor, int *revision);
 int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
 
 #endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 82efc1e22e61..6d520a3eec40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -370,11 +370,13 @@ void amdgpu_display_print_display_setup(struct drm_device *dev)
 	struct amdgpu_connector *amdgpu_connector;
 	struct drm_encoder *encoder;
 	struct amdgpu_encoder *amdgpu_encoder;
+	struct drm_connector_list_iter iter;
 	uint32_t devices;
 	int i = 0;
 
+	drm_connector_list_iter_begin(dev, &iter);
 	DRM_INFO("AMDGPU Display Connectors\n");
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_for_each_connector_iter(connector, &iter) {
 		amdgpu_connector = to_amdgpu_connector(connector);
 		DRM_INFO("Connector %d:\n", i);
 		DRM_INFO("  %s\n", connector->name);
@@ -438,6 +440,7 @@ void amdgpu_display_print_display_setup(struct drm_device *dev)
 		}
 		i++;
 	}
+	drm_connector_list_iter_end(&iter);
 }
 
 /**
@@ -510,13 +513,23 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
 	 * will not allow USWC mappings.
 	 * Also, don't allow GTT domain if the BO doens't have USWC falg set.
 	 */
-	if (adev->asic_type >= CHIP_CARRIZO &&
-	    adev->asic_type < CHIP_RAVEN &&
-	    (adev->flags & AMD_IS_APU) &&
-	    (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
+	if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
 	    amdgpu_bo_support_uswc(bo_flags) &&
-	    amdgpu_device_asic_has_dc_support(adev->asic_type))
-		domain |= AMDGPU_GEM_DOMAIN_GTT;
+	    amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+		switch (adev->asic_type) {
+		case CHIP_CARRIZO:
+		case CHIP_STONEY:
+			domain |= AMDGPU_GEM_DOMAIN_GTT;
+			break;
+		case CHIP_RAVEN:
+			/* enable S/G on PCO and RV2 */
+			if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
+				domain |= AMDGPU_GEM_DOMAIN_GTT;
+			break;
+		default:
+			break;
+		}
+	}
 #endif
 
 	return domain;
@@ -687,7 +700,6 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct amdgpu_encoder *amdgpu_encoder;
 	struct drm_connector *connector;
-	struct amdgpu_connector *amdgpu_connector;
 	u32 src_v = 1, dst_v = 1;
 	u32 src_h = 1, dst_h = 1;
 
@@ -699,7 +711,6 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
 			continue;
 		amdgpu_encoder = to_amdgpu_encoder(encoder);
 		connector = amdgpu_get_connector_for_encoder(encoder);
-		amdgpu_connector = to_amdgpu_connector(connector);
 
 		/* set scaling */
 		if (amdgpu_encoder->rmx_type == RMX_OFF)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 61f108ec2b5c..a59cd47aa6c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -34,27 +34,12 @@
 #include "amdgpu.h"
 #include "amdgpu_display.h"
 #include "amdgpu_gem.h"
+#include "amdgpu_dma_buf.h"
 #include <drm/amdgpu_drm.h>
 #include <linux/dma-buf.h>
 #include <linux/dma-fence-array.h>
 
 /**
- * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
- * implementation
- * @obj: GEM buffer object (BO)
- *
- * Returns:
- * A scatter/gather table for the pinned pages of the BO's memory.
- */
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
-	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	int npages = bo->tbo.num_pages;
-
-	return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
-}
-
-/**
  * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
  * @obj: GEM BO
  *
@@ -179,92 +164,126 @@ err_fences_put:
 }
 
 /**
- * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
- * @dma_buf: Shared DMA buffer
+ * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
+ *
+ * @dmabuf: DMA-buf where we attach to
+ * @attach: attachment to add
+ *
+ * Add the attachment as user to the exported DMA-buf.
+ */
+static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
+				 struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	int r;
+
+	if (attach->dev->driver == adev->dev->driver)
+		return 0;
+
+	r = amdgpu_bo_reserve(bo, false);
+	if (unlikely(r != 0))
+		return r;
+
+	/*
+	 * We only create shared fences for internal use, but importers
+	 * of the dmabuf rely on exclusive fences for implicitly
+	 * tracking write hazards. As any of the current fences may
+	 * correspond to a write, we need to convert all existing
+	 * fences on the reservation object into a single exclusive
+	 * fence.
+	 */
+	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
+	if (r)
+		return r;
+
+	bo->prime_shared_count++;
+	amdgpu_bo_unreserve(bo);
+	return 0;
+}
+
+/**
+ * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
+ *
+ * @dmabuf: DMA-buf where we remove the attachment from
+ * @attach: the attachment to remove
+ *
+ * Called when an attachment is removed from the DMA-buf.
+ */
+static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
+				  struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
+		bo->prime_shared_count--;
+}
+
+/**
+ * amdgpu_dma_buf_map - &dma_buf_ops.map_dma_buf implementation
  * @attach: DMA-buf attachment
+ * @dir: DMA direction
  *
  * Makes sure that the shared DMA buffer can be accessed by the target device.
  * For now, simply pins it to the GTT domain, where it should be accessible by
  * all DMA devices.
  *
  * Returns:
- * 0 on success or a negative error code on failure.
+ * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
+ * code.
  */
-static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
-				     struct dma_buf_attachment *attach)
+static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
+					   enum dma_data_direction dir)
 {
+	struct dma_buf *dma_buf = attach->dmabuf;
 	struct drm_gem_object *obj = dma_buf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct sg_table *sgt;
 	long r;
 
-	r = drm_gem_map_attach(dma_buf, attach);
-	if (r)
-		return r;
-
-	r = amdgpu_bo_reserve(bo, false);
-	if (unlikely(r != 0))
-		goto error_detach;
-
-
-	if (attach->dev->driver != adev->dev->driver) {
-		/*
-		 * We only create shared fences for internal use, but importers
-		 * of the dmabuf rely on exclusive fences for implicitly
-		 * tracking write hazards. As any of the current fences may
-		 * correspond to a write, we need to convert all existing
-		 * fences on the reservation object into a single exclusive
-		 * fence.
-		 */
-		r = __dma_resv_make_exclusive(bo->tbo.base.resv);
-		if (r)
-			goto error_unreserve;
-	}
-
-	/* pin buffer into GTT */
 	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
 	if (r)
-		goto error_unreserve;
+		return ERR_PTR(r);
 
-	if (attach->dev->driver != adev->dev->driver)
-		bo->prime_shared_count++;
+	sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages);
+	if (IS_ERR(sgt))
+		return sgt;
 
-error_unreserve:
-	amdgpu_bo_unreserve(bo);
+	if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+			      DMA_ATTR_SKIP_CPU_SYNC))
+		goto error_free;
 
-error_detach:
-	if (r)
-		drm_gem_map_detach(dma_buf, attach);
-	return r;
+	return sgt;
+
+error_free:
+	sg_free_table(sgt);
+	kfree(sgt);
+	return ERR_PTR(-ENOMEM);
 }
 
 /**
- * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
- * @dma_buf: Shared DMA buffer
+ * amdgpu_dma_buf_unmap - &dma_buf_ops.unmap_dma_buf implementation
  * @attach: DMA-buf attachment
+ * @sgt: sg_table to unmap
+ * @dir: DMA direction
  *
  * This is called when a shared DMA buffer no longer needs to be accessible by
  * another device. For now, simply unpins the buffer from GTT.
  */
-static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
-				      struct dma_buf_attachment *attach)
+static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
+				 struct sg_table *sgt,
+				 enum dma_data_direction dir)
 {
-	struct drm_gem_object *obj = dma_buf->priv;
+	struct drm_gem_object *obj = attach->dmabuf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	int ret = 0;
-
-	ret = amdgpu_bo_reserve(bo, true);
-	if (unlikely(ret != 0))
-		goto error;
 
+	dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
+	sg_free_table(sgt);
+	kfree(sgt);
 	amdgpu_bo_unpin(bo);
-	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
-		bo->prime_shared_count--;
-	amdgpu_bo_unreserve(bo);
-
-error:
-	drm_gem_map_detach(dma_buf, attach);
 }
 
 /**
@@ -308,10 +327,11 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 }
 
 const struct dma_buf_ops amdgpu_dmabuf_ops = {
-	.attach = amdgpu_dma_buf_map_attach,
-	.detach = amdgpu_dma_buf_map_detach,
-	.map_dma_buf = drm_gem_map_dma_buf,
-	.unmap_dma_buf = drm_gem_unmap_dma_buf,
+	.dynamic_mapping = true,
+	.attach = amdgpu_dma_buf_attach,
+	.detach = amdgpu_dma_buf_detach,
+	.map_dma_buf = amdgpu_dma_buf_map,
+	.unmap_dma_buf = amdgpu_dma_buf_unmap,
 	.release = drm_gem_dmabuf_release,
 	.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
 	.mmap = drm_gem_dmabuf_mmap,
@@ -321,7 +341,6 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {
 
 /**
  * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
- * @dev: DRM device
  * @gobj: GEM BO
  * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
  *
@@ -341,40 +360,35 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
 		return ERR_PTR(-EPERM);
 
 	buf = drm_gem_prime_export(gobj, flags);
-	if (!IS_ERR(buf)) {
-		buf->file->f_mapping = gobj->dev->anon_inode->i_mapping;
+	if (!IS_ERR(buf))
 		buf->ops = &amdgpu_dmabuf_ops;
-	}
 
 	return buf;
 }
 
 /**
- * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
- * implementation
+ * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
+ *
  * @dev: DRM device
- * @attach: DMA-buf attachment
- * @sg: Scatter/gather table
+ * @dma_buf: DMA-buf
  *
- * Imports shared DMA buffer memory exported by another device.
+ * Creates an empty SG BO for DMA-buf import.
  *
  * Returns:
  * A new GEM BO of the given DRM device, representing the memory
  * described by the given DMA-buf attachment and scatter/gather table.
  */
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-				 struct dma_buf_attachment *attach,
-				 struct sg_table *sg)
+static struct drm_gem_object *
+amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 {
-	struct dma_resv *resv = attach->dmabuf->resv;
+	struct dma_resv *resv = dma_buf->resv;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo_param bp;
 	int ret;
 
 	memset(&bp, 0, sizeof(bp));
-	bp.size = attach->dmabuf->size;
+	bp.size = dma_buf->size;
 	bp.byte_align = PAGE_SIZE;
 	bp.domain = AMDGPU_GEM_DOMAIN_CPU;
 	bp.flags = 0;
@@ -385,11 +399,9 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 	if (ret)
 		goto error;
 
-	bo->tbo.sg = sg;
-	bo->tbo.ttm->sg = sg;
 	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
 	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
-	if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+	if (dma_buf->ops != &amdgpu_dmabuf_ops)
 		bo->prime_shared_count = 1;
 
 	dma_resv_unlock(resv);
@@ -405,15 +417,15 @@ error:
  * @dev: DRM device
  * @dma_buf: Shared DMA buffer
  *
- * The main work is done by the &drm_gem_prime_import helper, which in turn
- * uses &amdgpu_gem_prime_import_sg_table.
+ * Import a dma_buf into a the driver and potentially create a new GEM object.
  *
  * Returns:
  * GEM BO representing the shared DMA buffer for the given device.
  */
 struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
-					    struct dma_buf *dma_buf)
+					       struct dma_buf *dma_buf)
 {
+	struct dma_buf_attachment *attach;
 	struct drm_gem_object *obj;
 
 	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
@@ -428,5 +440,17 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 		}
 	}
 
-	return drm_gem_prime_import(dev, dma_buf);
+	obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
+	if (IS_ERR(obj))
+		return obj;
+
+	attach = dma_buf_dynamic_attach(dma_buf, dev->dev, true);
+	if (IS_ERR(attach)) {
+		drm_gem_object_put(obj);
+		return ERR_CAST(attach);
+	}
+
+	get_dma_buf(dma_buf);
+	obj->import_attach = attach;
+	return obj;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
index 5012e6ab58f1..ec447a7b6b28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -25,11 +25,6 @@
 
 #include <drm/drm_gem.h>
 
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-				 struct dma_buf_attachment *attach,
-				 struct sg_table *sg);
 struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
 					int flags);
 struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index 5803fcbae22f..a2e8c3dfb4f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -911,7 +911,8 @@ int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low)
 	if (is_support_sw_smu(adev)) {
 		ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK,
 					     low ? &clk_freq : NULL,
-					     !low ? &clk_freq : NULL);
+					     !low ? &clk_freq : NULL,
+					     true);
 		if (ret)
 			return 0;
 		return clk_freq * 100;
@@ -928,7 +929,8 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low)
 	if (is_support_sw_smu(adev)) {
 		ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK,
 					     low ? &clk_freq : NULL,
-					     !low ? &clk_freq : NULL);
+					     !low ? &clk_freq : NULL,
+					     true);
 		if (ret)
 			return 0;
 		return clk_freq * 100;
@@ -944,20 +946,63 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
 	bool swsmu = is_support_sw_smu(adev);
 
 	switch (block_type) {
-	case AMD_IP_BLOCK_TYPE_GFX:
 	case AMD_IP_BLOCK_TYPE_UVD:
-	case AMD_IP_BLOCK_TYPE_VCN:
 	case AMD_IP_BLOCK_TYPE_VCE:
+		if (swsmu) {
+			ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
+		} else if (adev->powerplay.pp_funcs &&
+			   adev->powerplay.pp_funcs->set_powergating_by_smu) {
+			/*
+			 * TODO: need a better lock mechanism
+			 *
+			 * Here adev->pm.mutex lock protection is enforced on
+			 * UVD and VCE cases only. Since for other cases, there
+			 * may be already lock protection in amdgpu_pm.c.
+			 * This is a quick fix for the deadlock issue below.
+			 *     NFO: task ocltst:2028 blocked for more than 120 seconds.
+			 *     Tainted: G           OE     5.0.0-37-generic #40~18.04.1-Ubuntu
+			 *     echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+			 *     cltst          D    0  2028   2026 0x00000000
+			 *     all Trace:
+			 *     __schedule+0x2c0/0x870
+			 *     schedule+0x2c/0x70
+			 *     schedule_preempt_disabled+0xe/0x10
+			 *     __mutex_lock.isra.9+0x26d/0x4e0
+			 *     __mutex_lock_slowpath+0x13/0x20
+			 *     ? __mutex_lock_slowpath+0x13/0x20
+			 *     mutex_lock+0x2f/0x40
+			 *     amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu]
+			 *     gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu]
+			 *     gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu]
+			 *     amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu]
+			 *     pp_dpm_force_performance_level+0xe7/0x100 [amdgpu]
+			 *     amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu]
+			 */
+			mutex_lock(&adev->pm.mutex);
+			ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+				(adev)->powerplay.pp_handle, block_type, gate));
+			mutex_unlock(&adev->pm.mutex);
+		}
+		break;
+	case AMD_IP_BLOCK_TYPE_GFX:
+	case AMD_IP_BLOCK_TYPE_VCN:
 	case AMD_IP_BLOCK_TYPE_SDMA:
 		if (swsmu)
 			ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
-		else
+		else if (adev->powerplay.pp_funcs &&
+			 adev->powerplay.pp_funcs->set_powergating_by_smu)
 			ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
 				(adev)->powerplay.pp_handle, block_type, gate));
 		break;
+	case AMD_IP_BLOCK_TYPE_JPEG:
+		if (swsmu)
+			ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
+		break;
 	case AMD_IP_BLOCK_TYPE_GMC:
 	case AMD_IP_BLOCK_TYPE_ACP:
-		ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+		if (adev->powerplay.pp_funcs &&
+		    adev->powerplay.pp_funcs->set_powergating_by_smu)
+			ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
 				(adev)->powerplay.pp_handle, block_type, gate));
 		break;
 	default:
@@ -966,3 +1011,163 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
 
 	return ret;
 }
+
+int amdgpu_dpm_baco_enter(struct amdgpu_device *adev)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	void *pp_handle = adev->powerplay.pp_handle;
+	struct smu_context *smu = &adev->smu;
+	int ret = 0;
+
+	if (is_support_sw_smu(adev)) {
+		ret = smu_baco_enter(smu);
+	} else {
+		if (!pp_funcs || !pp_funcs->set_asic_baco_state)
+			return -ENOENT;
+
+		/* enter BACO state */
+		ret = pp_funcs->set_asic_baco_state(pp_handle, 1);
+	}
+
+	return ret;
+}
+
+int amdgpu_dpm_baco_exit(struct amdgpu_device *adev)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	void *pp_handle = adev->powerplay.pp_handle;
+	struct smu_context *smu = &adev->smu;
+	int ret = 0;
+
+	if (is_support_sw_smu(adev)) {
+		ret = smu_baco_exit(smu);
+	} else {
+		if (!pp_funcs || !pp_funcs->set_asic_baco_state)
+			return -ENOENT;
+
+		/* exit BACO state */
+		ret = pp_funcs->set_asic_baco_state(pp_handle, 0);
+	}
+
+	return ret;
+}
+
+int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
+			     enum pp_mp1_state mp1_state)
+{
+	int ret = 0;
+
+	if (is_support_sw_smu(adev)) {
+		ret = smu_set_mp1_state(&adev->smu, mp1_state);
+	} else if (adev->powerplay.pp_funcs &&
+		   adev->powerplay.pp_funcs->set_mp1_state) {
+		ret = adev->powerplay.pp_funcs->set_mp1_state(
+				adev->powerplay.pp_handle,
+				mp1_state);
+	}
+
+	return ret;
+}
+
+bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	void *pp_handle = adev->powerplay.pp_handle;
+	struct smu_context *smu = &adev->smu;
+	bool baco_cap;
+
+	if (is_support_sw_smu(adev)) {
+		return smu_baco_is_support(smu);
+	} else {
+		if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
+			return false;
+
+		if (pp_funcs->get_asic_baco_capability(pp_handle, &baco_cap))
+			return false;
+
+		return baco_cap ? true : false;
+	}
+}
+
+int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	void *pp_handle = adev->powerplay.pp_handle;
+	struct smu_context *smu = &adev->smu;
+
+	if (is_support_sw_smu(adev)) {
+		return smu_mode2_reset(smu);
+	} else {
+		if (!pp_funcs || !pp_funcs->asic_reset_mode_2)
+			return -ENOENT;
+
+		return pp_funcs->asic_reset_mode_2(pp_handle);
+	}
+}
+
+int amdgpu_dpm_baco_reset(struct amdgpu_device *adev)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	void *pp_handle = adev->powerplay.pp_handle;
+	struct smu_context *smu = &adev->smu;
+	int ret = 0;
+
+	dev_info(adev->dev, "GPU BACO reset\n");
+
+	if (is_support_sw_smu(adev)) {
+		ret = smu_baco_enter(smu);
+		if (ret)
+			return ret;
+
+		ret = smu_baco_exit(smu);
+		if (ret)
+			return ret;
+	} else {
+		if (!pp_funcs
+		    || !pp_funcs->set_asic_baco_state)
+			return -ENOENT;
+
+		/* enter BACO state */
+		ret = pp_funcs->set_asic_baco_state(pp_handle, 1);
+		if (ret)
+			return ret;
+
+		/* exit BACO state */
+		ret = pp_funcs->set_asic_baco_state(pp_handle, 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
+				    enum PP_SMC_POWER_PROFILE type,
+				    bool en)
+{
+	int ret = 0;
+
+	if (is_support_sw_smu(adev))
+		ret = smu_switch_power_profile(&adev->smu, type, en);
+	else if (adev->powerplay.pp_funcs &&
+		 adev->powerplay.pp_funcs->switch_power_profile)
+		ret = adev->powerplay.pp_funcs->switch_power_profile(
+			adev->powerplay.pp_handle, type, en);
+
+	return ret;
+}
+
+int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
+			       uint32_t pstate)
+{
+	int ret = 0;
+
+	if (is_support_sw_smu_xgmi(adev))
+		ret = smu_set_xgmi_pstate(&adev->smu, pstate);
+	else if (adev->powerplay.pp_funcs &&
+		 adev->powerplay.pp_funcs->set_xgmi_pstate)
+		ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
+								pstate);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 1c5c0fd76dbf..902ca6c00cca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -298,12 +298,6 @@ enum amdgpu_pcie_gen {
 #define amdgpu_dpm_get_current_power_state(adev) \
 		((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle))
 
-#define amdgpu_smu_get_current_power_state(adev) \
-		((adev)->smu.ppt_funcs->get_current_power_state(&((adev)->smu)))
-
-#define amdgpu_smu_set_power_state(adev) \
-		((adev)->smu.ppt_funcs->set_power_state(&((adev)->smu)))
-
 #define amdgpu_dpm_get_pp_num_states(adev, data) \
 		((adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data))
 
@@ -347,10 +341,6 @@ enum amdgpu_pcie_gen {
 		((adev)->powerplay.pp_funcs->reset_power_profile_state(\
 			(adev)->powerplay.pp_handle, request))
 
-#define amdgpu_dpm_switch_power_profile(adev, type, en) \
-		((adev)->powerplay.pp_funcs->switch_power_profile(\
-			(adev)->powerplay.pp_handle, type, en))
-
 #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
 		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
 			(adev)->powerplay.pp_handle, msg_id))
@@ -523,4 +513,24 @@ extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low);
 
 extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low);
 
+int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
+			       uint32_t pstate);
+
+int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
+				    enum PP_SMC_POWER_PROFILE type,
+				    bool en);
+
+int amdgpu_dpm_baco_reset(struct amdgpu_device *adev);
+
+int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev);
+
+bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev);
+
+int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
+			     enum pp_mp1_state mp1_state);
+
+int amdgpu_dpm_baco_exit(struct amdgpu_device *adev);
+
+int amdgpu_dpm_baco_enter(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b19157b19fa0..94e2fd758e01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -43,6 +43,8 @@
 
 #include "amdgpu_amdkfd.h"
 
+#include "amdgpu_ras.h"
+
 /*
  * KMS wrapper.
  * - 3.0.0 - initial driver
@@ -82,13 +84,12 @@
  * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS.
  * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches
  * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask
+ * - 3.36.0 - Allow reading more status registers on si/cik
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	35
+#define KMS_DRIVER_MINOR	36
 #define KMS_DRIVER_PATCHLEVEL	0
 
-#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH	256
-
 int amdgpu_vram_limit = 0;
 int amdgpu_vis_vram_limit = 0;
 int amdgpu_gart_size = -1; /* auto */
@@ -101,7 +102,7 @@ int amdgpu_disp_priority = 0;
 int amdgpu_hw_i2c = 0;
 int amdgpu_pcie_gen2 = -1;
 int amdgpu_msi = -1;
-char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
+char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
 int amdgpu_dpm = -1;
 int amdgpu_fw_load_type = -1;
 int amdgpu_aspm = -1;
@@ -128,11 +129,7 @@ char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
 /* OverDrive(bit 14) disabled by default*/
 uint amdgpu_pp_feature_mask = 0xffffbfff;
-int amdgpu_ngg = 0;
-int amdgpu_prim_buf_per_se = 0;
-int amdgpu_pos_buf_per_se = 0;
-int amdgpu_cntl_sb_buf_per_se = 0;
-int amdgpu_param_buf_per_se = 0;
+uint amdgpu_force_long_training = 0;
 int amdgpu_job_hang_limit = 0;
 int amdgpu_lbpw = -1;
 int amdgpu_compute_multipipe = -1;
@@ -145,13 +142,14 @@ int amdgpu_async_gfx_ring = 1;
 int amdgpu_mcbp = 0;
 int amdgpu_discovery = -1;
 int amdgpu_mes = 0;
-int amdgpu_noretry = 1;
+int amdgpu_noretry;
+int amdgpu_force_asic_type = -1;
 
 struct amdgpu_mgpu_info mgpu_info = {
 	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
 };
 int amdgpu_ras_enable = -1;
-uint amdgpu_ras_mask = 0xfffffffb;
+uint amdgpu_ras_mask = 0xffffffff;
 
 /**
  * DOC: vramlimit (int)
@@ -244,16 +242,21 @@ module_param_named(msi, amdgpu_msi, int, 0444);
  *
  * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
  * multiple values specified. 0 and negative values are invalidated. They will be adjusted
- * to default timeout.
- *  - With one value specified, the setting will apply to all non-compute jobs.
- *  - With multiple values specified, the first one will be for GFX. The second one is for Compute.
- *    And the third and fourth ones are for SDMA and Video.
+ * to the default timeout.
+ *
+ * - With one value specified, the setting will apply to all non-compute jobs.
+ * - With multiple values specified, the first one will be for GFX.
+ *   The second one is for Compute. The third and fourth ones are
+ *   for SDMA and Video.
+ *
  * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
  * jobs is 10000. And there is no timeout enforced on compute jobs.
  */
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and infinity timeout for compute jobs."
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; "
+		"for passthrough or sriov, 10000 for all jobs."
 		" 0: keep default value. negative: infinity timeout), "
-		"format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
+		"format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
+		"for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
 module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
 
 /**
@@ -392,6 +395,14 @@ MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
 module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
 
 /**
+ * DOC: forcelongtraining (uint)
+ * Force long memory training in resume.
+ * The default is zero, indicates short training in resume.
+ */
+MODULE_PARM_DESC(forcelongtraining, "force memory long training");
+module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444);
+
+/**
  * DOC: pcie_gen_cap (uint)
  * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
  * The default is 0 (automatic for each asic).
@@ -449,42 +460,6 @@ MODULE_PARM_DESC(virtual_display,
 module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
 
 /**
- * DOC: ngg (int)
- * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled).
- */
-MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))");
-module_param_named(ngg, amdgpu_ngg, int, 0444);
-
-/**
- * DOC: prim_buf_per_se (int)
- * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)");
-module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444);
-
-/**
- * DOC: pos_buf_per_se (int)
- * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)");
-module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444);
-
-/**
- * DOC: cntl_sb_buf_per_se (int)
- * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)");
-module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
-
-/**
- * DOC: param_buf_per_se (int)
- * Override the size of Off-Chip Parameter Cache per Shader Engine in Byte.
- * The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)");
-module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
-
-/**
  * DOC: job_hang_limit (int)
  * Set how much time allow a job hang and not drop it. The default is 0.
  */
@@ -613,9 +588,19 @@ MODULE_PARM_DESC(mes,
 module_param_named(mes, amdgpu_mes, int, 0444);
 
 MODULE_PARM_DESC(noretry,
-	"Disable retry faults (0 = retry enabled, 1 = retry disabled (default))");
+	"Disable retry faults (0 = retry enabled (default), 1 = retry disabled)");
 module_param_named(noretry, amdgpu_noretry, int, 0644);
 
+/**
+ * DOC: force_asic_type (int)
+ * A non negative value used to specify the asic type for all supported GPUs.
+ */
+MODULE_PARM_DESC(force_asic_type,
+	"A non negative value used to specify the asic type for all supported GPUs");
+module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444);
+
+
+
 #ifdef CONFIG_HSA_AMD
 /**
  * DOC: sched_policy (int)
@@ -1019,10 +1004,11 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
 
 	/* Renoir */
-	{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
 
 	/* Navi12 */
 	{0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
 
 	{0, 0, 0}
 };
@@ -1085,7 +1071,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 #endif
 
 	/* Get rid of things like offb */
-	ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb");
+	ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb");
 	if (ret)
 		return ret;
 
@@ -1128,7 +1114,10 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
 
-	DRM_ERROR("Device removal is currently not supported outside of fbcon\n");
+#ifdef MODULE
+	if (THIS_MODULE->state != MODULE_STATE_GOING)
+#endif
+		DRM_ERROR("Hotplug removal is not supported\n");
 	drm_dev_unplug(dev);
 	drm_dev_put(dev);
 	pci_disable_device(pdev);
@@ -1141,6 +1130,9 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct amdgpu_device *adev = dev->dev_private;
 
+	if (amdgpu_ras_intr_triggered())
+		return;
+
 	/* if we are running in a VM, make sure the device
 	 * torn down properly on reboot/shutdown.
 	 * unfortunately we can't detect certain
@@ -1155,7 +1147,7 @@ static int amdgpu_pmops_suspend(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 
-	return amdgpu_device_suspend(drm_dev, true, true);
+	return amdgpu_device_suspend(drm_dev, true);
 }
 
 static int amdgpu_pmops_resume(struct device *dev)
@@ -1163,66 +1155,92 @@ static int amdgpu_pmops_resume(struct device *dev)
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 
 	/* GPU comes up enabled by the bios on resume */
-	if (amdgpu_device_is_px(drm_dev)) {
+	if (amdgpu_device_supports_boco(drm_dev) ||
+	    amdgpu_device_supports_baco(drm_dev)) {
 		pm_runtime_disable(dev);
 		pm_runtime_set_active(dev);
 		pm_runtime_enable(dev);
 	}
 
-	return amdgpu_device_resume(drm_dev, true, true);
+	return amdgpu_device_resume(drm_dev, true);
 }
 
 static int amdgpu_pmops_freeze(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_dev->dev_private;
+	int r;
 
-	return amdgpu_device_suspend(drm_dev, false, true);
+	r = amdgpu_device_suspend(drm_dev, true);
+	if (r)
+		return r;
+	return amdgpu_asic_reset(adev);
 }
 
 static int amdgpu_pmops_thaw(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 
-	return amdgpu_device_resume(drm_dev, false, true);
+	return amdgpu_device_resume(drm_dev, true);
 }
 
 static int amdgpu_pmops_poweroff(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 
-	return amdgpu_device_suspend(drm_dev, true, true);
+	return amdgpu_device_suspend(drm_dev, true);
 }
 
 static int amdgpu_pmops_restore(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 
-	return amdgpu_device_resume(drm_dev, false, true);
+	return amdgpu_device_resume(drm_dev, true);
 }
 
 static int amdgpu_pmops_runtime_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
-	int ret;
+	struct amdgpu_device *adev = drm_dev->dev_private;
+	int ret, i;
 
-	if (!amdgpu_device_is_px(drm_dev)) {
+	if (!adev->runpm) {
 		pm_runtime_forbid(dev);
 		return -EBUSY;
 	}
 
-	drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+	/* wait for all rings to drain before suspending */
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+		if (ring && ring->sched.ready) {
+			ret = amdgpu_fence_wait_empty(ring);
+			if (ret)
+				return -EBUSY;
+		}
+	}
+
+	if (amdgpu_device_supports_boco(drm_dev))
+		drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 	drm_kms_helper_poll_disable(drm_dev);
 
-	ret = amdgpu_device_suspend(drm_dev, false, false);
-	pci_save_state(pdev);
-	pci_disable_device(pdev);
-	pci_ignore_hotplug(pdev);
-	if (amdgpu_is_atpx_hybrid())
-		pci_set_power_state(pdev, PCI_D3cold);
-	else if (!amdgpu_has_atpx_dgpu_power_cntl())
-		pci_set_power_state(pdev, PCI_D3hot);
-	drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
+	ret = amdgpu_device_suspend(drm_dev, false);
+	if (amdgpu_device_supports_boco(drm_dev)) {
+		/* Only need to handle PCI state in the driver for ATPX
+		 * PCI core handles it for _PR3.
+		 */
+		if (amdgpu_is_atpx_hybrid()) {
+			pci_ignore_hotplug(pdev);
+		} else {
+			pci_save_state(pdev);
+			pci_disable_device(pdev);
+			pci_ignore_hotplug(pdev);
+			pci_set_power_state(pdev, PCI_D3cold);
+		}
+		drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
+	} else if (amdgpu_device_supports_baco(drm_dev)) {
+		amdgpu_device_baco_enter(drm_dev);
+	}
 
 	return 0;
 }
@@ -1231,34 +1249,45 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_dev->dev_private;
 	int ret;
 
-	if (!amdgpu_device_is_px(drm_dev))
+	if (!adev->runpm)
 		return -EINVAL;
 
-	drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+	if (amdgpu_device_supports_boco(drm_dev)) {
+		drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 
-	if (amdgpu_is_atpx_hybrid() ||
-	    !amdgpu_has_atpx_dgpu_power_cntl())
-		pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-	ret = pci_enable_device(pdev);
-	if (ret)
-		return ret;
-	pci_set_master(pdev);
-
-	ret = amdgpu_device_resume(drm_dev, false, false);
+		/* Only need to handle PCI state in the driver for ATPX
+		 * PCI core handles it for _PR3.
+		 */
+		if (amdgpu_is_atpx_hybrid()) {
+			pci_set_master(pdev);
+		} else {
+			pci_set_power_state(pdev, PCI_D0);
+			pci_restore_state(pdev);
+			ret = pci_enable_device(pdev);
+			if (ret)
+				return ret;
+			pci_set_master(pdev);
+		}
+	} else if (amdgpu_device_supports_baco(drm_dev)) {
+		amdgpu_device_baco_exit(drm_dev);
+	}
+	ret = amdgpu_device_resume(drm_dev, false);
 	drm_kms_helper_poll_enable(drm_dev);
-	drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
+	if (amdgpu_device_supports_boco(drm_dev))
+		drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
 	return 0;
 }
 
 static int amdgpu_pmops_runtime_idle(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_dev->dev_private;
 	struct drm_crtc *crtc;
 
-	if (!amdgpu_device_is_px(drm_dev)) {
+	if (!adev->runpm) {
 		pm_runtime_forbid(dev);
 		return -EBUSY;
 	}
@@ -1348,66 +1377,6 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
 	return 0;
 }
 
-int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
-{
-	char *input = amdgpu_lockup_timeout;
-	char *timeout_setting = NULL;
-	int index = 0;
-	long timeout;
-	int ret = 0;
-
-	/*
-	 * By default timeout for non compute jobs is 10000.
-	 * And there is no timeout enforced on compute jobs.
-	 */
-	adev->gfx_timeout = msecs_to_jiffies(10000);
-	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
-	adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
-
-	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
-		while ((timeout_setting = strsep(&input, ",")) &&
-				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
-			ret = kstrtol(timeout_setting, 0, &timeout);
-			if (ret)
-				return ret;
-
-			if (timeout == 0) {
-				index++;
-				continue;
-			} else if (timeout < 0) {
-				timeout = MAX_SCHEDULE_TIMEOUT;
-			} else {
-				timeout = msecs_to_jiffies(timeout);
-			}
-
-			switch (index++) {
-			case 0:
-				adev->gfx_timeout = timeout;
-				break;
-			case 1:
-				adev->compute_timeout = timeout;
-				break;
-			case 2:
-				adev->sdma_timeout = timeout;
-				break;
-			case 3:
-				adev->video_timeout = timeout;
-				break;
-			default:
-				break;
-			}
-		}
-		/*
-		 * There is only one value specified and
-		 * it should apply to all non-compute jobs.
-		 */
-		if (index == 1)
-			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
-	}
-
-	return ret;
-}
-
 static bool
 amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 				 bool in_vblank_irq, int *vpos, int *hpos,
@@ -1422,7 +1391,8 @@ static struct drm_driver kms_driver = {
 	.driver_features =
 	    DRIVER_USE_AGP | DRIVER_ATOMIC |
 	    DRIVER_GEM |
-	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
+	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE,
 	.load = amdgpu_driver_load_kms,
 	.open = amdgpu_driver_open_kms,
 	.postclose = amdgpu_driver_postclose_kms,
@@ -1446,8 +1416,6 @@ static struct drm_driver kms_driver = {
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = amdgpu_gem_prime_export,
 	.gem_prime_import = amdgpu_gem_prime_import,
-	.gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
-	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
 	.gem_prime_vmap = amdgpu_gem_prime_vmap,
 	.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
 	.gem_prime_mmap = amdgpu_gem_prime_mmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
index 571a6dfb473e..61fcf247a638 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -37,12 +37,14 @@ amdgpu_link_encoder_connector(struct drm_device *dev)
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector;
 	struct drm_encoder *encoder;
 	struct amdgpu_encoder *amdgpu_encoder;
 
+	drm_connector_list_iter_begin(dev, &iter);
 	/* walk the list and link encoders to connectors */
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_for_each_connector_iter(connector, &iter) {
 		amdgpu_connector = to_amdgpu_connector(connector);
 		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 			amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -55,6 +57,7 @@ amdgpu_link_encoder_connector(struct drm_device *dev)
 			}
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 }
 
 void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
@@ -62,8 +65,10 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
 	struct drm_device *dev = encoder->dev;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 			amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices;
@@ -72,6 +77,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
 				  amdgpu_connector->devices, encoder->encoder_type);
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 }
 
 struct drm_connector *
@@ -79,15 +85,20 @@ amdgpu_get_connector_for_encoder(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
-	struct drm_connector *connector;
+	struct drm_connector *connector, *found = NULL;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		amdgpu_connector = to_amdgpu_connector(connector);
-		if (amdgpu_encoder->active_device & amdgpu_connector->devices)
-			return connector;
+		if (amdgpu_encoder->active_device & amdgpu_connector->devices) {
+			found = connector;
+			break;
+		}
 	}
-	return NULL;
+	drm_connector_list_iter_end(&iter);
+	return found;
 }
 
 struct drm_connector *
@@ -95,15 +106,20 @@ amdgpu_get_connector_for_encoder_init(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
-	struct drm_connector *connector;
+	struct drm_connector *connector, *found = NULL;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		amdgpu_connector = to_amdgpu_connector(connector);
-		if (amdgpu_encoder->devices & amdgpu_connector->devices)
-			return connector;
+		if (amdgpu_encoder->devices & amdgpu_connector->devices) {
+			found = connector;
+			break;
+		}
 	}
-	return NULL;
+	drm_connector_list_iter_end(&iter);
+	return found;
 }
 
 struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 143753d237e7..2672dc64a310 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -69,7 +69,7 @@ amdgpufb_release(struct fb_info *info, int user)
 	return 0;
 }
 
-static struct fb_ops amdgpufb_ops = {
+static const struct fb_ops amdgpufb_ops = {
 	.owner = THIS_MODULE,
 	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_open = amdgpufb_open,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 23085b352cf2..3c01252b1e0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -34,6 +34,7 @@
 #include <linux/kref.h>
 #include <linux/slab.h>
 #include <linux/firmware.h>
+#include <linux/pm_runtime.h>
 
 #include <drm/drm_debugfs.h>
 
@@ -154,7 +155,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
 		       seq);
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 			       seq, flags | AMDGPU_FENCE_FLAG_INT);
-
+	pm_runtime_get_noresume(adev->ddev->dev);
 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 	if (unlikely(rcu_dereference_protected(*ptr, 1))) {
 		struct dma_fence *old;
@@ -234,6 +235,7 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
 bool amdgpu_fence_process(struct amdgpu_ring *ring)
 {
 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	struct amdgpu_device *adev = ring->adev;
 	uint32_t seq, last_seq;
 	int r;
 
@@ -274,6 +276,8 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
 			BUG();
 
 		dma_fence_put(fence);
+		pm_runtime_mark_last_busy(adev->ddev->dev);
+		pm_runtime_put_autosuspend(adev->ddev->dev);
 	} while (last_seq != seq);
 
 	return true;
@@ -462,18 +466,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 			timeout = adev->gfx_timeout;
 			break;
 		case AMDGPU_RING_TYPE_COMPUTE:
-			/*
-			 * For non-sriov case, no timeout enforce
-			 * on compute ring by default. Unless user
-			 * specifies a timeout for compute ring.
-			 *
-			 * For sriov case, always use the timeout
-			 * as gfx ring
-			 */
-			if (!amdgpu_sriov_vf(ring->adev))
-				timeout = adev->compute_timeout;
-			else
-				timeout = adev->gfx_timeout;
+			timeout = adev->compute_timeout;
 			break;
 		case AMDGPU_RING_TYPE_SDMA:
 			timeout = adev->sdma_timeout;
@@ -748,10 +741,18 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0)
+		return 0;
 
 	seq_printf(m, "gpu recover\n");
 	amdgpu_device_gpu_recover(adev, NULL);
 
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 5e8bdded265f..e01e681d2a60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -71,7 +71,7 @@
  */
 static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
 {
-	struct page *dummy_page = adev->mman.bdev.glob->dummy_read_page;
+	struct page *dummy_page = ttm_bo_glob.dummy_read_page;
 
 	if (adev->dummy_page_addr)
 		return 0;
@@ -302,6 +302,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
  * @pages: number of pages to bind
  * @pagelist: pages to bind
  * @dma_addr: DMA addresses of pages
+ * @flags: page table entry flags
  *
  * Binds the requested pages to the gart page table
  * (all asics).
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 8ceb44925947..4277125a79ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -175,7 +175,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 
 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates, false);
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
@@ -527,13 +527,41 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 			goto error;
 	}
 
-	r = amdgpu_vm_update_directories(adev, vm);
+	r = amdgpu_vm_update_pdes(adev, vm, false);
 
 error:
 	if (r && r != -ERESTARTSYS)
 		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
 }
 
+/**
+ * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: GEM UAPI flags
+ *
+ * Returns the GEM UAPI flags mapped into hardware for the ASIC.
+ */
+uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
+{
+	uint64_t pte_flag = 0;
+
+	if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
+		pte_flag |= AMDGPU_PTE_EXECUTABLE;
+	if (flags & AMDGPU_VM_PAGE_READABLE)
+		pte_flag |= AMDGPU_PTE_READABLE;
+	if (flags & AMDGPU_VM_PAGE_WRITEABLE)
+		pte_flag |= AMDGPU_PTE_WRITEABLE;
+	if (flags & AMDGPU_VM_PAGE_PRT)
+		pte_flag |= AMDGPU_PTE_PRT;
+
+	if (adev->gmc.gmc_funcs->map_mtype)
+		pte_flag |= amdgpu_gmc_map_mtype(adev,
+						 flags & AMDGPU_VM_MTYPE_MASK);
+
+	return pte_flag;
+}
+
 int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp)
 {
@@ -613,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates, false);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
 	if (r)
 		goto error_unref;
 
@@ -631,7 +659,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
 	switch (args->operation) {
 	case AMDGPU_VA_OP_MAP:
-		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
+		va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
 		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
 				     args->offset_in_bo, args->map_size,
 				     va_flags);
@@ -646,7 +674,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 						args->map_size);
 		break;
 	case AMDGPU_VA_OP_REPLACE:
-		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
+		va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
 		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
 					     args->offset_in_bo, args->map_size,
 					     va_flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index 0b66d2e6b5d5..e0f025dd1b14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -67,6 +67,7 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp);
 int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *filp);
+uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags);
 int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp);
 int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index f9bef3154b99..b88b8b82bb64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "amdgpu_rlc.h"
+#include "amdgpu_ras.h"
 
 /* delay 0.1 second to enable gfx off feature */
 #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
@@ -231,12 +232,10 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 
 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
 {
-	int i, queue, pipe, me;
+	int i, queue, me;
 
 	for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
 		queue = i % adev->gfx.me.num_queue_per_pipe;
-		pipe = (i / adev->gfx.me.num_queue_per_pipe)
-			% adev->gfx.me.num_pipe_per_me;
 		me = (i / adev->gfx.me.num_queue_per_pipe)
 		      / adev->gfx.me.num_pipe_per_me;
 
@@ -320,8 +319,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 	return r;
 }
 
-void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
-			      struct amdgpu_irq_src *irq)
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
 {
 	amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
 	amdgpu_ring_fini(ring);
@@ -456,8 +454,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
 	}
 
 	ring = &adev->gfx.kiq.ring;
-	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring)
-		kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]);
 	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
 	amdgpu_bo_free_kernel(&ring->mqd_obj,
 			      &ring->mqd_gpu_addr,
@@ -547,12 +543,6 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
 		return;
 
-	if (!is_support_sw_smu(adev) &&
-	    (!adev->powerplay.pp_funcs ||
-	     !adev->powerplay.pp_funcs->set_powergating_by_smu))
-		return;
-
-
 	mutex_lock(&adev->gfx.gfx_off_mutex);
 
 	if (!enable)
@@ -569,3 +559,102 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 
 	mutex_unlock(&adev->gfx.gfx_off_mutex);
 }
+
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
+{
+	int r;
+	struct ras_fs_if fs_info = {
+		.sysfs_name = "gfx_err_count",
+		.debugfs_name = "gfx_err_inject",
+	};
+	struct ras_ih_if ih_info = {
+		.cb = amdgpu_gfx_process_ras_data_cb,
+	};
+
+	if (!adev->gfx.ras_if) {
+		adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+		if (!adev->gfx.ras_if)
+			return -ENOMEM;
+		adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX;
+		adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+		adev->gfx.ras_if->sub_block_index = 0;
+		strcpy(adev->gfx.ras_if->name, "gfx");
+	}
+	fs_info.head = ih_info.head = *adev->gfx.ras_if;
+
+	r = amdgpu_ras_late_init(adev, adev->gfx.ras_if,
+				 &fs_info, &ih_info);
+	if (r)
+		goto free;
+
+	if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
+		r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+		if (r)
+			goto late_fini;
+	} else {
+		/* free gfx ras_if if ras is not supported */
+		r = 0;
+		goto free;
+	}
+
+	return 0;
+late_fini:
+	amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info);
+free:
+	kfree(adev->gfx.ras_if);
+	adev->gfx.ras_if = NULL;
+	return r;
+}
+
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev)
+{
+	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
+			adev->gfx.ras_if) {
+		struct ras_common_if *ras_if = adev->gfx.ras_if;
+		struct ras_ih_if ih_info = {
+			.head = *ras_if,
+			.cb = amdgpu_gfx_process_ras_data_cb,
+		};
+
+		amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+		kfree(ras_if);
+	}
+}
+
+int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
+		void *err_data,
+		struct amdgpu_iv_entry *entry)
+{
+	/* TODO ue will trigger an interrupt.
+	 *
+	 * When “Full RAS” is enabled, the per-IP interrupt sources should
+	 * be disabled and the driver should only look for the aggregated
+	 * interrupt via sync flood
+	 */
+	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
+		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+		if (adev->gfx.funcs->query_ras_error_count)
+			adev->gfx.funcs->query_ras_error_count(adev, err_data);
+		amdgpu_ras_reset_gpu(adev);
+	}
+	return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
+				  struct amdgpu_irq_src *source,
+				  struct amdgpu_iv_entry *entry)
+{
+	struct ras_common_if *ras_if = adev->gfx.ras_if;
+	struct ras_dispatch_if ih_data = {
+		.entry = entry,
+	};
+
+	if (!ras_if)
+		return 0;
+
+	ih_data.head = *ras_if;
+
+	DRM_ERROR("CP ECC ERROR IRQ\n");
+	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 6d19183b478b..af4bd279f42f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
 					struct amdgpu_ring *ring,
 					u64 addr,
 					u64 seq);
+	void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub);
 	/* Packet sizes */
 	int set_resources_size;
 	int map_queues_size;
 	int unmap_queues_size;
 	int query_status_size;
+	int invalidate_tlbs_size;
 };
 
 struct amdgpu_kiq {
@@ -201,28 +205,6 @@ struct amdgpu_gfx_funcs {
 	int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status);
 };
 
-struct amdgpu_ngg_buf {
-	struct amdgpu_bo	*bo;
-	uint64_t		gpu_addr;
-	uint32_t		size;
-	uint32_t		bo_size;
-};
-
-enum {
-	NGG_PRIM = 0,
-	NGG_POS,
-	NGG_CNTL,
-	NGG_PARAM,
-	NGG_BUF_MAX
-};
-
-struct amdgpu_ngg {
-	struct amdgpu_ngg_buf	buf[NGG_BUF_MAX];
-	uint32_t		gds_reserve_addr;
-	uint32_t		gds_reserve_size;
-	bool			init;
-};
-
 struct sq_work {
 	struct work_struct	work;
 	unsigned ih_data;
@@ -247,7 +229,7 @@ struct amdgpu_me {
 	uint32_t			num_me;
 	uint32_t			num_pipe_per_me;
 	uint32_t			num_queue_per_pipe;
-	void				*mqd_backup[AMDGPU_MAX_GFX_RINGS + 1];
+	void				*mqd_backup[AMDGPU_MAX_GFX_RINGS];
 
 	/* These are the resources for which amdgpu takes ownership */
 	DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
@@ -291,8 +273,12 @@ struct amdgpu_gfx {
 	bool				me_fw_write_wait;
 	bool				cp_fw_write_wait;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
+	struct drm_gpu_scheduler	*gfx_sched[AMDGPU_MAX_GFX_RINGS];
+	uint32_t			num_gfx_sched;
 	unsigned			num_gfx_rings;
 	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
+	struct drm_gpu_scheduler	*compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
+	uint32_t			num_compute_sched;
 	unsigned			num_compute_rings;
 	struct amdgpu_irq_src		eop_irq;
 	struct amdgpu_irq_src		priv_reg_irq;
@@ -312,9 +298,6 @@ struct amdgpu_gfx {
 	uint32_t                        grbm_soft_reset;
 	uint32_t                        srbm_soft_reset;
 
-	/* NGG */
-	struct amdgpu_ngg		ngg;
-
 	/* gfx off */
 	bool                            gfx_off_state; /* true: enabled, false: disabled */
 	struct mutex                    gfx_off_mutex;
@@ -356,8 +339,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 			     struct amdgpu_ring *ring,
 			     struct amdgpu_irq_src *irq);
 
-void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
-			      struct amdgpu_irq_src *irq);
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
 
 void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
@@ -385,5 +367,12 @@ void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
 bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
 				    int pipe, int queue);
 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
-
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
+		void *err_data,
+		struct amdgpu_iv_entry *entry);
+int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
+				  struct amdgpu_irq_src *source,
+				  struct amdgpu_iv_entry *entry);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 5790db61fa2c..5884ab590486 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -27,6 +27,8 @@
 #include <linux/io-64-nonatomic-lo-hi.h>
 
 #include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
 
 /**
  * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
@@ -221,7 +223,7 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
 	u64 size_af, size_bf;
 
 	if (amdgpu_sriov_vf(adev)) {
-		mc->agp_start = 0xffffffff;
+		mc->agp_start = 0xffffffffffff;
 		mc->agp_end = 0x0;
 		mc->agp_size = 0;
 
@@ -305,3 +307,69 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
 	gmc->fault_hash[hash].idx = gmc->last_fault++;
 	return false;
 }
+
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
+		r = adev->umc.funcs->ras_late_init(adev);
+		if (r)
+			return r;
+	}
+
+	if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) {
+		r = adev->mmhub.funcs->ras_late_init(adev);
+		if (r)
+			return r;
+	}
+
+	return amdgpu_xgmi_ras_late_init(adev);
+}
+
+void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
+{
+	amdgpu_umc_ras_fini(adev);
+	amdgpu_mmhub_ras_fini(adev);
+	amdgpu_xgmi_ras_fini(adev);
+}
+
+	/*
+	 * The latest engine allocation on gfx9/10 is:
+	 * Engine 2, 3: firmware
+	 * Engine 0, 1, 4~16: amdgpu ring,
+	 *                    subject to change when ring number changes
+	 * Engine 17: Gart flushes
+	 */
+#define GFXHUB_FREE_VM_INV_ENGS_BITMAP		0x1FFF3
+#define MMHUB_FREE_VM_INV_ENGS_BITMAP		0x1FFF3
+
+int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
+		{GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
+		GFXHUB_FREE_VM_INV_ENGS_BITMAP};
+	unsigned i;
+	unsigned vmhub, inv_eng;
+
+	for (i = 0; i < adev->num_rings; ++i) {
+		ring = adev->rings[i];
+		vmhub = ring->funcs->vmhub;
+
+		inv_eng = ffs(vm_inv_engs[vmhub]);
+		if (!inv_eng) {
+			dev_err(adev->dev, "no VM inv eng for ring %s\n",
+				ring->name);
+			return -EINVAL;
+		}
+
+		ring->vm_inv_eng = inv_eng - 1;
+		vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
+
+		dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
+			 ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index b6e1d98ef01e..86267baca07c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -60,6 +60,11 @@
  */
 #define AMDGPU_GMC_FAULT_TIMEOUT	5000ULL
 
+/*
+ * Default stolen memory size, 1024 * 768 * 4
+ */
+#define AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE	0x300000ULL
+
 struct firmware;
 
 /*
@@ -77,6 +82,7 @@ struct amdgpu_gmc_fault {
 struct amdgpu_vmhub {
 	uint32_t	ctx0_ptb_addr_lo32;
 	uint32_t	ctx0_ptb_addr_hi32;
+	uint32_t	vm_inv_eng0_sem;
 	uint32_t	vm_inv_eng0_req;
 	uint32_t	vm_inv_eng0_ack;
 	uint32_t	vm_context0_cntl;
@@ -91,6 +97,9 @@ struct amdgpu_gmc_funcs {
 	/* flush the vm tlb via mmio */
 	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
 				uint32_t vmhub, uint32_t flush_type);
+	/* flush the vm tlb via pasid */
+	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+					uint32_t flush_type, bool all_hub);
 	/* flush the vm tlb via ring */
 	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
 				       uint64_t pd_addr);
@@ -99,12 +108,15 @@ struct amdgpu_gmc_funcs {
 				   unsigned pasid);
 	/* enable/disable PRT support */
 	void (*set_prt)(struct amdgpu_device *adev, bool enable);
-	/* set pte flags based per asic */
-	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
-				     uint32_t flags);
+	/* map mtype to hardware flags */
+	uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags);
 	/* get the pde for a given mc addr */
 	void (*get_vm_pde)(struct amdgpu_device *adev, int level,
 			   u64 *dst, u64 *flags);
+	/* get the pte flags to use for a BO VA mapping */
+	void (*get_vm_pte)(struct amdgpu_device *adev,
+			   struct amdgpu_bo_va_mapping *mapping,
+			   uint64_t *flags);
 };
 
 struct amdgpu_xgmi {
@@ -120,21 +132,52 @@ struct amdgpu_xgmi {
 	/* gpu list in the same hive */
 	struct list_head head;
 	bool supported;
+	struct ras_common_if *ras_if;
 };
 
 struct amdgpu_gmc {
+	/* FB's physical address in MMIO space (for CPU to
+	 * map FB). This is different compared to the agp/
+	 * gart/vram_start/end field as the later is from
+	 * GPU's view and aper_base is from CPU's view.
+	 */
 	resource_size_t		aper_size;
 	resource_size_t		aper_base;
 	/* for some chips with <= 32MB we need to lie
 	 * about vram size near mc fb location */
 	u64			mc_vram_size;
 	u64			visible_vram_size;
+	/* AGP aperture start and end in MC address space
+	 * Driver find a hole in the MC address space
+	 * to place AGP by setting MC_VM_AGP_BOT/TOP registers
+	 * Under VMID0, logical address == MC address. AGP
+	 * aperture maps to physical bus or IOVA addressed.
+	 * AGP aperture is used to simulate FB in ZFB case.
+	 * AGP aperture is also used for page table in system
+	 * memory (mainly for APU).
+	 *
+	 */
 	u64			agp_size;
 	u64			agp_start;
 	u64			agp_end;
+	/* GART aperture start and end in MC address space
+	 * Driver find a hole in the MC address space
+	 * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR
+	 * registers
+	 * Under VMID0, logical address inside GART aperture will
+	 * be translated through gpuvm gart page table to access
+	 * paged system memory
+	 */
 	u64			gart_size;
 	u64			gart_start;
 	u64			gart_end;
+	/* Frame buffer aperture of this GPU device. Different from
+	 * fb_start (see below), this only covers the local GPU device.
+	 * Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios)
+	 * and calculate vram_start of this local device by adding an
+	 * offset inside the XGMI hive.
+	 * Under VMID0, logical address == MC address
+	 */
 	u64			vram_start;
 	u64			vram_end;
 	/* FB region , it's same as local vram region in single GPU, in XGMI
@@ -153,6 +196,7 @@ struct amdgpu_gmc {
 	uint32_t                fw_version;
 	struct amdgpu_irq_src	vm_fault;
 	uint32_t		vram_type;
+	uint8_t			vram_vendor;
 	uint32_t                srbm_soft_reset;
 	bool			prt_warning;
 	uint64_t		stolen_size;
@@ -177,15 +221,17 @@ struct amdgpu_gmc {
 
 	struct amdgpu_xgmi xgmi;
 	struct amdgpu_irq_src	ecc_irq;
-	struct ras_common_if    *umc_ras_if;
-	struct ras_common_if    *mmhub_ras_if;
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
+	((adev), (pasid), (type), (allhub)))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
+#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
 #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
-#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
+#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
 
 /**
  * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
@@ -230,5 +276,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
 			     struct amdgpu_gmc *mc);
 bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
 			      uint16_t pasid, uint64_t timestamp);
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
+int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 53734da1c2df..3a67f6c046d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
 	int r;
 
 	if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
-		return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false);
+		return amdgpu_sync_fence(sync, ring->vmid_wait, false);
 
 	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
 	if (!fences)
@@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
 			return -ENOMEM;
 		}
 
-		r = amdgpu_sync_fence(adev, sync, &array->base, false);
+		r = amdgpu_sync_fence(sync, &array->base, false);
 		dma_fence_put(ring->vmid_wait);
 		ring->vmid_wait = &array->base;
 		return r;
@@ -282,7 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 	    !dma_fence_is_later(updates, (*id)->flushed_updates))
 	    updates = NULL;
 
-	if ((*id)->owner != vm->entity.fence_context ||
+	if ((*id)->owner != vm->direct.fence_context ||
 	    job->vm_pd_addr != (*id)->pd_gpu_addr ||
 	    updates || !(*id)->last_flush ||
 	    ((*id)->last_flush->context != fence_context &&
@@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 		tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
 		if (tmp) {
 			*id = NULL;
-			r = amdgpu_sync_fence(adev, sync, tmp, false);
+			r = amdgpu_sync_fence(sync, tmp, false);
 			return r;
 		}
 		needs_flush = true;
@@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 	/* Good we can use this VMID. Remember this submission as
 	* user of the VMID.
 	*/
-	r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
+	r = amdgpu_sync_fence(&(*id)->active, fence, false);
 	if (r)
 		return r;
 
@@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 		struct dma_fence *flushed;
 
 		/* Check all the prerequisites to using this VMID */
-		if ((*id)->owner != vm->entity.fence_context)
+		if ((*id)->owner != vm->direct.fence_context)
 			continue;
 
 		if ((*id)->pd_gpu_addr != job->vm_pd_addr)
@@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 		/* Good, we can use this VMID. Remember this submission as
 		 * user of the VMID.
 		 */
-		r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
+		r = amdgpu_sync_fence(&(*id)->active, fence, false);
 		if (r)
 			return r;
 
@@ -435,8 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			id = idle;
 
 			/* Remember this submission as user of the VMID */
-			r = amdgpu_sync_fence(ring->adev, &id->active,
-					      fence, false);
+			r = amdgpu_sync_fence(&id->active, fence, false);
 			if (r)
 				goto error;
 
@@ -449,7 +448,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	}
 
 	id->pd_gpu_addr = job->vm_pd_addr;
-	id->owner = vm->entity.fence_context;
+	id->owner = vm->direct.fence_context;
 
 	if (job->vm_needs_flush) {
 		dma_fence_put(id->last_flush);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 6d8f05511aba..111a301ce878 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -66,7 +66,6 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
 		if (ih->ring == NULL)
 			return -ENOMEM;
 
-		memset((void *)ih->ring, 0, ih->ring_size + 8);
 		ih->gpu_addr = dma_addr;
 		ih->wptr_addr = dma_addr + ih->ring_size;
 		ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 2a3f5ec298db..5ed4227f304b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -55,6 +55,7 @@
 #include "amdgpu_connectors.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_ras.h"
 
 #include <linux/pm_runtime.h>
 
@@ -87,10 +88,13 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
 	struct drm_device *dev = adev->ddev;
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 
 	mutex_lock(&mode_config->mutex);
-	list_for_each_entry(connector, &mode_config->connector_list, head)
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter)
 		amdgpu_connector_hotplug(connector);
+	drm_connector_list_iter_end(&iter);
 	mutex_unlock(&mode_config->mutex);
 	/* Just fire off a uevent and let userspace tell us what to do */
 	drm_helper_hpd_irq_event(dev);
@@ -153,6 +157,22 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
 	ret = amdgpu_ih_process(adev, &adev->irq.ih);
 	if (ret == IRQ_HANDLED)
 		pm_runtime_mark_last_busy(dev->dev);
+
+	/* For the hardware that cannot enable bif ring for both ras_controller_irq
+         * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
+	 * register to check whether the interrupt is triggered or not, and properly
+	 * ack the interrupt if it is there
+	 */
+	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) {
+		if (adev->nbio.funcs &&
+		    adev->nbio.funcs->handle_ras_controller_intr_no_bifring)
+			adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev);
+
+		if (adev->nbio.funcs &&
+		    adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring)
+			adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev);
+	}
+
 	return ret;
 }
 
@@ -228,10 +248,19 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 	adev->irq.msi_enabled = false;
 
 	if (amdgpu_msi_ok(adev)) {
-		int ret = pci_enable_msi(adev->pdev);
-		if (!ret) {
+		int nvec = pci_msix_vec_count(adev->pdev);
+		unsigned int flags;
+
+		if (nvec <= 0) {
+			flags = PCI_IRQ_MSI;
+		} else {
+			flags = PCI_IRQ_MSI | PCI_IRQ_MSIX;
+		}
+		/* we only need one vector */
+		nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
+		if (nvec > 0) {
 			adev->irq.msi_enabled = true;
-			dev_dbg(adev->dev, "amdgpu: using MSI.\n");
+			dev_dbg(adev->dev, "amdgpu: using MSI/MSI-X.\n");
 		}
 	}
 
@@ -254,7 +283,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 	INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2);
 
 	adev->irq.installed = true;
-	r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
+	/* Use vector 0 for MSI-X */
+	r = drm_irq_install(adev->ddev, pci_irq_vector(adev->pdev, 0));
 	if (r) {
 		adev->irq.installed = false;
 		if (!amdgpu_device_has_dc_support(adev))
@@ -284,7 +314,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
 		drm_irq_uninstall(adev->ddev);
 		adev->irq.installed = false;
 		if (adev->irq.msi_enabled)
-			pci_disable_msi(adev->pdev);
+			pci_free_irq_vectors(adev->pdev);
 		if (!amdgpu_device_has_dc_support(adev))
 			flush_work(&adev->hotplug_work);
 	}
@@ -369,7 +399,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
  * amdgpu_irq_dispatch - dispatch IRQ to IP blocks
  *
  * @adev: amdgpu device pointer
- * @entry: interrupt vector pointer
+ * @ih: interrupt ring instance
  *
  * Dispatches IRQ to IP blocks.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 96b2a31ccfed..d42be880a236 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -153,7 +153,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 	if (r)
 		return r;
 
-	job->owner = owner;
 	*f = dma_fence_get(&job->base.s_fence->finished);
 	amdgpu_job_free_resources(job);
 	priority = job->base.s_priority;
@@ -193,8 +192,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
 	fence = amdgpu_sync_get_fence(&job->sync, &explicit);
 	if (fence && explicit) {
 		if (drm_sched_dependency_optimized(fence, s_entity)) {
-			r = amdgpu_sync_fence(ring->adev, &job->sched_sync,
-					      fence, false);
+			r = amdgpu_sync_fence(&job->sched_sync, fence, false);
 			if (r)
 				DRM_ERROR("Error adding fence (%d)\n", r);
 		}
@@ -248,6 +246,44 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
 	return fence;
 }
 
+#define to_drm_sched_job(sched_job)		\
+		container_of((sched_job), struct drm_sched_job, queue_node)
+
+void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
+{
+	struct drm_sched_job *s_job;
+	struct drm_sched_entity *s_entity = NULL;
+	int i;
+
+	/* Signal all jobs not yet scheduled */
+	for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+		struct drm_sched_rq *rq = &sched->sched_rq[i];
+
+		if (!rq)
+			continue;
+
+		spin_lock(&rq->lock);
+		list_for_each_entry(s_entity, &rq->entities, list) {
+			while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
+				struct drm_sched_fence *s_fence = s_job->s_fence;
+
+				dma_fence_signal(&s_fence->scheduled);
+				dma_fence_set_error(&s_fence->finished, -EHWPOISON);
+				dma_fence_signal(&s_fence->finished);
+			}
+		}
+		spin_unlock(&rq->lock);
+	}
+
+	/* Signal all jobs already scheduled to HW */
+	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
+		struct drm_sched_fence *s_fence = s_job->s_fence;
+
+		dma_fence_set_error(&s_fence->finished, -EHWPOISON);
+		dma_fence_signal(&s_fence->finished);
+	}
+}
+
 const struct drm_sched_backend_ops amdgpu_sched_ops = {
 	.dependency = amdgpu_job_dependency,
 	.run_job = amdgpu_job_run,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 51e62504c279..3f7b8433d179 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -49,7 +49,6 @@ struct amdgpu_job {
 	uint32_t		preamble_status;
 	uint32_t                preemption_status;
 	uint32_t		num_ibs;
-	void			*owner;
 	bool                    vm_needs_flush;
 	uint64_t		vm_pd_addr;
 	unsigned		vmid;
@@ -76,4 +75,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 		      void *owner, struct dma_fence **f);
 int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
 			     struct dma_fence **fence);
+
+void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
new file mode 100644
index 000000000000..5727f00afc8e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+
+#define JPEG_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+static void amdgpu_jpeg_idle_work_handler(struct work_struct *work);
+
+int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
+{
+	INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler);
+
+	return 0;
+}
+
+int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
+{
+	int i;
+
+	cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec);
+	}
+
+	return 0;
+}
+
+int amdgpu_jpeg_suspend(struct amdgpu_device *adev)
+{
+	cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+	return 0;
+}
+
+int amdgpu_jpeg_resume(struct amdgpu_device *adev)
+{
+	return 0;
+}
+
+static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, jpeg.idle_work.work);
+	unsigned int fences = 0;
+	unsigned int i;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec);
+	}
+
+	if (fences == 0)
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+						       AMD_PG_STATE_GATE);
+	else
+		schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
+}
+
+void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+	if (set_clocks)
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+						       AMD_PG_STATE_UNGATE);
+}
+
+void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring)
+{
+	schedule_delayed_work(&ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
+}
+
+int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
+	r = amdgpu_ring_alloc(ring, 3);
+	if (r)
+		return r;
+
+	amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0));
+	amdgpu_ring_write(ring, 0xDEADBEEF);
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+
+	return r;
+}
+
+static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
+		struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct dma_fence *f = NULL;
+	const unsigned ib_size_dw = 16;
+	int i, r;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	if (r)
+		return r;
+
+	ib = &job->ibs[0];
+
+	ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0);
+	ib->ptr[1] = 0xDEADBEEF;
+	for (i = 2; i < 16; i += 2) {
+		ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+		ib->ptr[i+1] = 0;
+	}
+	ib->length_dw = 16;
+
+	r = amdgpu_job_submit_direct(job, ring, &f);
+	if (r)
+		goto err;
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return 0;
+
+err:
+	amdgpu_job_free(job);
+	return r;
+}
+
+int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t tmp = 0;
+	unsigned i;
+	struct dma_fence *fence = NULL;
+	long r = 0;
+
+	r = amdgpu_jpeg_dec_set_reg(ring, 1, &fence);
+	if (r)
+		goto error;
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		r = -ETIMEDOUT;
+		goto error;
+	} else if (r < 0) {
+		goto error;
+	} else {
+		r = 0;
+	}
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+
+	dma_fence_put(fence);
+error:
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
new file mode 100644
index 000000000000..bd9ef9cc86de
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_JPEG_H__
+#define __AMDGPU_JPEG_H__
+
+#define AMDGPU_MAX_JPEG_INSTANCES	2
+
+#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
+#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
+
+struct amdgpu_jpeg_reg{
+	unsigned jpeg_pitch;
+};
+
+struct amdgpu_jpeg_inst {
+	struct amdgpu_ring ring_dec;
+	struct amdgpu_irq_src irq;
+	struct amdgpu_jpeg_reg external;
+};
+
+struct amdgpu_jpeg {
+	uint8_t	num_jpeg_inst;
+	struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
+	struct amdgpu_jpeg_reg internal;
+	struct drm_gpu_scheduler *jpeg_sched[AMDGPU_MAX_JPEG_INSTANCES];
+	uint32_t num_jpeg_sched;
+	unsigned harvest_config;
+	struct delayed_work idle_work;
+	enum amd_powergating_state cur_state;
+};
+
+int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev);
+int amdgpu_jpeg_suspend(struct amdgpu_device *adev);
+int amdgpu_jpeg_resume(struct amdgpu_device *adev);
+
+void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring);
+
+int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+#endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index a73206784cba..60591dbc2097 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -91,7 +91,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
 	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_request_full_gpu(adev, false);
 
-	if (amdgpu_device_is_px(dev)) {
+	if (adev->runpm) {
 		pm_runtime_get_sync(dev->dev);
 		pm_runtime_forbid(dev->dev);
 	}
@@ -150,8 +150,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	}
 	dev->dev_private = (void *)adev;
 
-	if ((amdgpu_runtime_pm != 0) &&
-	    amdgpu_has_atpx() &&
+	if (amdgpu_has_atpx() &&
 	    (amdgpu_is_atpx_hybrid() ||
 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
 	    ((flags & AMD_IS_APU) == 0) &&
@@ -170,6 +169,13 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 		goto out;
 	}
 
+	if (amdgpu_device_supports_boco(dev) &&
+	    (amdgpu_runtime_pm != 0)) /* enable runpm by default */
+		adev->runpm = true;
+	else if (amdgpu_device_supports_baco(dev) &&
+		 (amdgpu_runtime_pm > 0)) /* enable runpm if runpm=1 */
+		adev->runpm = true;
+
 	/* Call ACPI methods: require modeset init
 	 * but failure is not fatal
 	 */
@@ -180,7 +186,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 				"Error during ACPI methods call\n");
 	}
 
-	if (amdgpu_device_is_px(dev)) {
+	if (adev->runpm) {
 		dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NEVER_SKIP);
 		pm_runtime_use_autosuspend(dev->dev);
 		pm_runtime_set_autosuspend_delay(dev->dev, 5000);
@@ -193,7 +199,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 out:
 	if (r) {
 		/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
-		if (adev->rmmio && amdgpu_device_is_px(dev))
+		if (adev->rmmio && adev->runpm)
 			pm_runtime_put_noidle(dev->dev);
 		amdgpu_driver_unload_kms(dev);
 	}
@@ -293,6 +299,10 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 		fw_info->ver = adev->dm.dmcu_fw_version;
 		fw_info->feature = 0;
 		break;
+	case AMDGPU_INFO_FW_DMCUB:
+		fw_info->ver = adev->dm.dmcub_fw_version;
+		fw_info->feature = 0;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -396,12 +406,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 		ib_size_alignment = 1;
 		break;
 	case AMDGPU_HW_IP_VCN_JPEG:
-		type = AMD_IP_BLOCK_TYPE_VCN;
-		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-			if (adev->uvd.harvest_config & (1 << i))
+		type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+			AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+
+		for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+			if (adev->jpeg.harvest_config & (1 << i))
 				continue;
 
-			if (adev->vcn.inst[i].ring_jpeg.sched.ready)
+			if (adev->jpeg.inst[i].ring_dec.sched.ready)
 				++num_rings;
 		}
 		ib_start_alignment = 16;
@@ -517,9 +529,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			break;
 		case AMDGPU_HW_IP_VCN_DEC:
 		case AMDGPU_HW_IP_VCN_ENC:
-		case AMDGPU_HW_IP_VCN_JPEG:
 			type = AMD_IP_BLOCK_TYPE_VCN;
 			break;
+		case AMDGPU_HW_IP_VCN_JPEG:
+			type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+				AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -583,9 +598,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		struct drm_amdgpu_info_vram_gtt vram_gtt;
 
 		vram_gtt.vram_size = adev->gmc.real_vram_size -
-			atomic64_read(&adev->vram_pin_size);
-		vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size -
-			atomic64_read(&adev->visible_pin_size);
+			atomic64_read(&adev->vram_pin_size) -
+			AMDGPU_VM_RESERVED_VRAM;
+		vram_gtt.vram_cpu_accessible_size =
+			min(adev->gmc.visible_vram_size -
+			    atomic64_read(&adev->visible_pin_size),
+			    vram_gtt.vram_size);
 		vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
 		vram_gtt.gtt_size *= PAGE_SIZE;
 		vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
@@ -598,15 +616,18 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		memset(&mem, 0, sizeof(mem));
 		mem.vram.total_heap_size = adev->gmc.real_vram_size;
 		mem.vram.usable_heap_size = adev->gmc.real_vram_size -
-			atomic64_read(&adev->vram_pin_size);
+			atomic64_read(&adev->vram_pin_size) -
+			AMDGPU_VM_RESERVED_VRAM;
 		mem.vram.heap_usage =
 			amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 		mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
 
 		mem.cpu_accessible_vram.total_heap_size =
 			adev->gmc.visible_vram_size;
-		mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size -
-			atomic64_read(&adev->visible_pin_size);
+		mem.cpu_accessible_vram.usable_heap_size =
+			min(adev->gmc.visible_vram_size -
+			    atomic64_read(&adev->visible_pin_size),
+			    mem.vram.usable_heap_size);
 		mem.cpu_accessible_vram.heap_usage =
 			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 		mem.cpu_accessible_vram.max_allocation =
@@ -682,10 +703,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		if (adev->pm.dpm_enabled) {
 			dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10;
 			dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10;
-		} else if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) &&
-			   adev->virt.ops->get_pp_clk) {
-			dev_info.max_engine_clock = amdgpu_virt_get_sclk(adev, false) * 10;
-			dev_info.max_memory_clock = amdgpu_virt_get_mclk(adev, false) * 10;
 		} else {
 			dev_info.max_engine_clock = adev->clock.default_sclk * 10;
 			dev_info.max_memory_clock = adev->clock.default_mclk * 10;
@@ -732,17 +749,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		dev_info.vce_harvest_config = adev->vce.harvest_config;
 		dev_info.gc_double_offchip_lds_buf =
 			adev->gfx.config.double_offchip_lds_buf;
-
-		if (amdgpu_ngg) {
-			dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr;
-			dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size;
-			dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr;
-			dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size;
-			dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr;
-			dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size;
-			dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr;
-			dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size;
-		}
 		dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size;
 		dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs;
 		dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
@@ -971,6 +977,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	/* Ensure IB tests are run on ring */
 	flush_delayed_work(&adev->delayed_init_work);
 
+
+	if (amdgpu_ras_intr_triggered()) {
+		DRM_ERROR("RAS Intr triggered, device disabled!!");
+		return -EHWPOISON;
+	}
+
 	file_priv->driver_priv = NULL;
 
 	r = pm_runtime_get_sync(dev->dev);
@@ -1393,6 +1405,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
 	seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n",
 		   fw_info.feature, fw_info.ver);
 
+	/* DMCUB */
+	query_fw.fw_type = AMDGPU_INFO_FW_DMCUB;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "DMCUB feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
 
 	seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
new file mode 100644
index 000000000000..676c48c02d77
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev)
+{
+	int r;
+	struct ras_ih_if ih_info = {
+		.cb = NULL,
+	};
+	struct ras_fs_if fs_info = {
+		.sysfs_name = "mmhub_err_count",
+		.debugfs_name = "mmhub_err_inject",
+	};
+
+	if (!adev->mmhub.ras_if) {
+		adev->mmhub.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+		if (!adev->mmhub.ras_if)
+			return -ENOMEM;
+		adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB;
+		adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+		adev->mmhub.ras_if->sub_block_index = 0;
+		strcpy(adev->mmhub.ras_if->name, "mmhub");
+	}
+	ih_info.head = fs_info.head = *adev->mmhub.ras_if;
+	r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if,
+				 &fs_info, &ih_info);
+	if (r || !amdgpu_ras_is_supported(adev, adev->mmhub.ras_if->block)) {
+		kfree(adev->mmhub.ras_if);
+		adev->mmhub.ras_if = NULL;
+	}
+
+	return r;
+}
+
+void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev)
+{
+	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
+			adev->mmhub.ras_if) {
+		struct ras_common_if *ras_if = adev->mmhub.ras_if;
+		struct ras_ih_if ih_info = {
+			.cb = NULL,
+		};
+
+		amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+		kfree(ras_if);
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 2d75ecfa199b..1cd78940cf82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -23,9 +23,17 @@
 
 struct amdgpu_mmhub_funcs {
 	void (*ras_init)(struct amdgpu_device *adev);
+	int (*ras_late_init)(struct amdgpu_device *adev);
 	void (*query_ras_error_count)(struct amdgpu_device *adev,
 					void *ras_error_status);
 };
 
+struct amdgpu_mmhub {
+	struct ras_common_if *ras_if;
+	const struct amdgpu_mmhub_funcs *funcs;
+};
+
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 31d4deb5d294..828b5167ff12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -51,438 +51,107 @@
 #include "amdgpu_amdkfd.h"
 
 /**
- * struct amdgpu_mn_node
+ * amdgpu_mn_invalidate_gfx - callback to notify about mm change
  *
- * @it: interval node defining start-last of the affected address range
- * @bos: list of all BOs in the affected address range
- *
- * Manages all BOs which are affected of a certain range of address space.
- */
-struct amdgpu_mn_node {
-	struct interval_tree_node	it;
-	struct list_head		bos;
-};
-
-/**
- * amdgpu_mn_destroy - destroy the HMM mirror
- *
- * @work: previously sheduled work item
- *
- * Lazy destroys the notifier from a work item
- */
-static void amdgpu_mn_destroy(struct work_struct *work)
-{
-	struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
-	struct amdgpu_device *adev = amn->adev;
-	struct amdgpu_mn_node *node, *next_node;
-	struct amdgpu_bo *bo, *next_bo;
-
-	mutex_lock(&adev->mn_lock);
-	down_write(&amn->lock);
-	hash_del(&amn->node);
-	rbtree_postorder_for_each_entry_safe(node, next_node,
-					     &amn->objects.rb_root, it.rb) {
-		list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
-			bo->mn = NULL;
-			list_del_init(&bo->mn_list);
-		}
-		kfree(node);
-	}
-	up_write(&amn->lock);
-	mutex_unlock(&adev->mn_lock);
-
-	hmm_mirror_unregister(&amn->mirror);
-	kfree(amn);
-}
-
-/**
- * amdgpu_hmm_mirror_release - callback to notify about mm destruction
- *
- * @mirror: the HMM mirror (mm) this callback is about
- *
- * Shedule a work item to lazy destroy HMM mirror.
- */
-static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
-{
-	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-
-	INIT_WORK(&amn->work, amdgpu_mn_destroy);
-	schedule_work(&amn->work);
-}
-
-/**
- * amdgpu_mn_lock - take the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_lock(struct amdgpu_mn *mn)
-{
-	if (mn)
-		down_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_unlock - drop the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_unlock(struct amdgpu_mn *mn)
-{
-	if (mn)
-		up_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_read_lock - take the read side lock for this notifier
- *
- * @amn: our notifier
- */
-static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
-{
-	if (blockable)
-		down_read(&amn->lock);
-	else if (!down_read_trylock(&amn->lock))
-		return -EAGAIN;
-
-	return 0;
-}
-
-/**
- * amdgpu_mn_read_unlock - drop the read side lock for this notifier
- *
- * @amn: our notifier
- */
-static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
-{
-	up_read(&amn->lock);
-}
-
-/**
- * amdgpu_mn_invalidate_node - unmap all BOs of a node
- *
- * @node: the node with the BOs to unmap
- * @start: start of address range affected
- * @end: end of address range affected
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
  *
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
-				      unsigned long start,
-				      unsigned long end)
+static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
+				     const struct mmu_notifier_range *range,
+				     unsigned long cur_seq)
 {
-	struct amdgpu_bo *bo;
+	struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	long r;
 
-	list_for_each_entry(bo, &node->bos, mn_list) {
-
-		if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
-			continue;
-
-		r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
-			true, false, MAX_SCHEDULE_TIMEOUT);
-		if (r <= 0)
-			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-	}
-}
-
-/**
- * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * Block for operations on BOs to finish and mark pages as accessed and
- * potentially dirty.
- */
-static int
-amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
-			      const struct mmu_notifier_range *update)
-{
-	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-	unsigned long start = update->start;
-	unsigned long end = update->end;
-	bool blockable = mmu_notifier_range_blockable(update);
-	struct interval_tree_node *it;
+	if (!mmu_notifier_range_blockable(range))
+		return false;
 
-	/* notification is exclusive, but interval is inclusive */
-	end -= 1;
+	mutex_lock(&adev->notifier_lock);
 
-	/* TODO we should be able to split locking for interval tree and
-	 * amdgpu_mn_invalidate_node
-	 */
-	if (amdgpu_mn_read_lock(amn, blockable))
-		return -EAGAIN;
+	mmu_interval_set_seq(mni, cur_seq);
 
-	it = interval_tree_iter_first(&amn->objects, start, end);
-	while (it) {
-		struct amdgpu_mn_node *node;
-
-		if (!blockable) {
-			amdgpu_mn_read_unlock(amn);
-			return -EAGAIN;
-		}
-
-		node = container_of(it, struct amdgpu_mn_node, it);
-		it = interval_tree_iter_next(it, start, end);
-
-		amdgpu_mn_invalidate_node(node, start, end);
-	}
-
-	amdgpu_mn_read_unlock(amn);
-
-	return 0;
-}
-
-/**
- * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * We temporarily evict all BOs between start and end. This
- * necessitates evicting all user-mode queues of the process. The BOs
- * are restorted in amdgpu_mn_invalidate_range_end_hsa.
- */
-static int
-amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
-			      const struct mmu_notifier_range *update)
-{
-	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-	unsigned long start = update->start;
-	unsigned long end = update->end;
-	bool blockable = mmu_notifier_range_blockable(update);
-	struct interval_tree_node *it;
-
-	/* notification is exclusive, but interval is inclusive */
-	end -= 1;
-
-	if (amdgpu_mn_read_lock(amn, blockable))
-		return -EAGAIN;
-
-	it = interval_tree_iter_first(&amn->objects, start, end);
-	while (it) {
-		struct amdgpu_mn_node *node;
-		struct amdgpu_bo *bo;
-
-		if (!blockable) {
-			amdgpu_mn_read_unlock(amn);
-			return -EAGAIN;
-		}
-
-		node = container_of(it, struct amdgpu_mn_node, it);
-		it = interval_tree_iter_next(it, start, end);
-
-		list_for_each_entry(bo, &node->bos, mn_list) {
-			struct kgd_mem *mem = bo->kfd_bo;
-
-			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
-							 start, end))
-				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
-		}
-	}
-
-	amdgpu_mn_read_unlock(amn);
-
-	return 0;
+	r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false,
+				      MAX_SCHEDULE_TIMEOUT);
+	mutex_unlock(&adev->notifier_lock);
+	if (r <= 0)
+		DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+	return true;
 }
 
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
-static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
-	[AMDGPU_MN_TYPE_GFX] = {
-		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
-		.release = amdgpu_hmm_mirror_release
-	},
-	[AMDGPU_MN_TYPE_HSA] = {
-		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
-		.release = amdgpu_hmm_mirror_release
-	},
+static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = {
+	.invalidate = amdgpu_mn_invalidate_gfx,
 };
 
 /**
- * amdgpu_mn_get - create HMM mirror context
+ * amdgpu_mn_invalidate_hsa - callback to notify about mm change
  *
- * @adev: amdgpu device pointer
- * @type: type of MMU notifier context
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
  *
- * Creates a HMM mirror context for current->mm.
+ * We temporarily evict the BO attached to this range. This necessitates
+ * evicting all user-mode queues of the process.
  */
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
-				enum amdgpu_mn_type type)
+static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni,
+				     const struct mmu_notifier_range *range,
+				     unsigned long cur_seq)
 {
-	struct mm_struct *mm = current->mm;
-	struct amdgpu_mn *amn;
-	unsigned long key = AMDGPU_MN_KEY(mm, type);
-	int r;
-
-	mutex_lock(&adev->mn_lock);
-	if (down_write_killable(&mm->mmap_sem)) {
-		mutex_unlock(&adev->mn_lock);
-		return ERR_PTR(-EINTR);
-	}
-
-	hash_for_each_possible(adev->mn_hash, amn, node, key)
-		if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
-			goto release_locks;
-
-	amn = kzalloc(sizeof(*amn), GFP_KERNEL);
-	if (!amn) {
-		amn = ERR_PTR(-ENOMEM);
-		goto release_locks;
-	}
-
-	amn->adev = adev;
-	amn->mm = mm;
-	init_rwsem(&amn->lock);
-	amn->type = type;
-	amn->objects = RB_ROOT_CACHED;
-
-	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
-	r = hmm_mirror_register(&amn->mirror, mm);
-	if (r)
-		goto free_amn;
+	struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 
-	hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
+	if (!mmu_notifier_range_blockable(range))
+		return false;
 
-release_locks:
-	up_write(&mm->mmap_sem);
-	mutex_unlock(&adev->mn_lock);
+	mutex_lock(&adev->notifier_lock);
 
-	return amn;
+	mmu_interval_set_seq(mni, cur_seq);
 
-free_amn:
-	up_write(&mm->mmap_sem);
-	mutex_unlock(&adev->mn_lock);
-	kfree(amn);
+	amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm);
+	mutex_unlock(&adev->notifier_lock);
 
-	return ERR_PTR(r);
+	return true;
 }
 
+static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = {
+	.invalidate = amdgpu_mn_invalidate_hsa,
+};
+
 /**
  * amdgpu_mn_register - register a BO for notifier updates
  *
  * @bo: amdgpu buffer object
  * @addr: userptr addr we should monitor
  *
- * Registers an HMM mirror for the given BO at the specified address.
+ * Registers a mmu_notifier for the given BO at the specified address.
  * Returns 0 on success, -ERRNO if anything goes wrong.
  */
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 {
-	unsigned long end = addr + amdgpu_bo_size(bo) - 1;
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	enum amdgpu_mn_type type =
-		bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
-	struct amdgpu_mn *amn;
-	struct amdgpu_mn_node *node = NULL, *new_node;
-	struct list_head bos;
-	struct interval_tree_node *it;
-
-	amn = amdgpu_mn_get(adev, type);
-	if (IS_ERR(amn))
-		return PTR_ERR(amn);
-
-	new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
-	if (!new_node)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&bos);
-
-	down_write(&amn->lock);
-
-	while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
-		kfree(node);
-		node = container_of(it, struct amdgpu_mn_node, it);
-		interval_tree_remove(&node->it, &amn->objects);
-		addr = min(it->start, addr);
-		end = max(it->last, end);
-		list_splice(&node->bos, &bos);
-	}
-
-	if (!node)
-		node = new_node;
-	else
-		kfree(new_node);
-
-	bo->mn = amn;
-
-	node->it.start = addr;
-	node->it.last = end;
-	INIT_LIST_HEAD(&node->bos);
-	list_splice(&bos, &node->bos);
-	list_add(&bo->mn_list, &node->bos);
-
-	interval_tree_insert(&node->it, &amn->objects);
-
-	up_write(&amn->lock);
-
-	return 0;
+	if (bo->kfd_bo)
+		return mmu_interval_notifier_insert(&bo->notifier, current->mm,
+						    addr, amdgpu_bo_size(bo),
+						    &amdgpu_mn_hsa_ops);
+	return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+					    amdgpu_bo_size(bo),
+					    &amdgpu_mn_gfx_ops);
 }
 
 /**
- * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
+ * amdgpu_mn_unregister - unregister a BO for notifier updates
  *
  * @bo: amdgpu buffer object
  *
- * Remove any registration of HMM mirror updates from the buffer object.
+ * Remove any registration of mmu notifier updates from the buffer object.
  */
 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 {
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	struct amdgpu_mn *amn;
-	struct list_head *head;
-
-	mutex_lock(&adev->mn_lock);
-
-	amn = bo->mn;
-	if (amn == NULL) {
-		mutex_unlock(&adev->mn_lock);
+	if (!bo->notifier.mm)
 		return;
-	}
-
-	down_write(&amn->lock);
-
-	/* save the next list entry for later */
-	head = bo->mn_list.next;
-
-	bo->mn = NULL;
-	list_del_init(&bo->mn_list);
-
-	if (list_empty(head)) {
-		struct amdgpu_mn_node *node;
-
-		node = container_of(head, struct amdgpu_mn_node, bos);
-		interval_tree_remove(&node->it, &amn->objects);
-		kfree(node);
-	}
-
-	up_write(&amn->lock);
-	mutex_unlock(&adev->mn_lock);
-}
-
-/* flags used by HMM internal, not related to CPU/GPU PTE flags */
-static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
-		(1 << 0), /* HMM_PFN_VALID */
-		(1 << 1), /* HMM_PFN_WRITE */
-		0 /* HMM_PFN_DEVICE_PRIVATE */
-};
-
-static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
-		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
-		0, /* HMM_PFN_NONE */
-		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
-};
-
-void amdgpu_hmm_init_range(struct hmm_range *range)
-{
-	if (range) {
-		range->flags = hmm_range_flags;
-		range->values = hmm_range_values;
-		range->pfn_shift = PAGE_SHIFT;
-	}
+	mmu_interval_notifier_remove(&bo->notifier);
+	bo->notifier.mm = NULL;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index b8ed68943625..a292238f75eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -30,63 +30,10 @@
 #include <linux/workqueue.h>
 #include <linux/interval_tree.h>
 
-enum amdgpu_mn_type {
-	AMDGPU_MN_TYPE_GFX,
-	AMDGPU_MN_TYPE_HSA,
-};
-
-/**
- * struct amdgpu_mn
- *
- * @adev: amdgpu device pointer
- * @mm: process address space
- * @type: type of MMU notifier
- * @work: destruction work item
- * @node: hash table node to find structure by adev and mn
- * @lock: rw semaphore protecting the notifier nodes
- * @objects: interval tree containing amdgpu_mn_nodes
- * @mirror: HMM mirror function support
- *
- * Data for each amdgpu device and process address space.
- */
-struct amdgpu_mn {
-	/* constant after initialisation */
-	struct amdgpu_device	*adev;
-	struct mm_struct	*mm;
-	enum amdgpu_mn_type	type;
-
-	/* only used on destruction */
-	struct work_struct	work;
-
-	/* protected by adev->mn_lock */
-	struct hlist_node	node;
-
-	/* objects protected by lock */
-	struct rw_semaphore	lock;
-	struct rb_root_cached	objects;
-
-#ifdef CONFIG_HMM_MIRROR
-	/* HMM mirror */
-	struct hmm_mirror	mirror;
-#endif
-};
-
 #if defined(CONFIG_HMM_MIRROR)
-void amdgpu_mn_lock(struct amdgpu_mn *mn);
-void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
-				enum amdgpu_mn_type type);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
-void amdgpu_hmm_init_range(struct hmm_range *range);
 #else
-static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
-static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
-					      enum amdgpu_mn_type type)
-{
-	return NULL;
-}
 static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 {
 	DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
new file mode 100644
index 000000000000..7d5c3a9de9ea
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2019  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev)
+{
+	int r;
+	struct ras_ih_if ih_info = {
+		.cb = NULL,
+	};
+	struct ras_fs_if fs_info = {
+		.sysfs_name = "pcie_bif_err_count",
+		.debugfs_name = "pcie_bif_err_inject",
+	};
+
+	if (!adev->nbio.ras_if) {
+		adev->nbio.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+		if (!adev->nbio.ras_if)
+			return -ENOMEM;
+		adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF;
+		adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+		adev->nbio.ras_if->sub_block_index = 0;
+		strcpy(adev->nbio.ras_if->name, "pcie_bif");
+	}
+	ih_info.head = fs_info.head = *adev->nbio.ras_if;
+	r = amdgpu_ras_late_init(adev, adev->nbio.ras_if,
+				 &fs_info, &ih_info);
+	if (r)
+		goto free;
+
+	if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+		r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0);
+		if (r)
+			goto late_fini;
+		r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+		if (r)
+			goto late_fini;
+	} else {
+		r = 0;
+		goto free;
+	}
+
+	return 0;
+late_fini:
+	amdgpu_ras_late_fini(adev, adev->nbio.ras_if, &ih_info);
+free:
+	kfree(adev->nbio.ras_if);
+	adev->nbio.ras_if = NULL;
+	return r;
+}
+
+void amdgpu_nbio_ras_fini(struct amdgpu_device *adev)
+{
+	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) &&
+			adev->nbio.ras_if) {
+		struct ras_common_if *ras_if = adev->nbio.ras_if;
+		struct ras_ih_if ih_info = {
+			.cb = NULL,
+		};
+
+		amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+		kfree(ras_if);
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
new file mode 100644
index 000000000000..919bd566ba3c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_NBIO_H__
+#define __AMDGPU_NBIO_H__
+
+/*
+ * amdgpu nbio functions
+ */
+struct nbio_hdp_flush_reg {
+	u32 ref_and_mask_cp0;
+	u32 ref_and_mask_cp1;
+	u32 ref_and_mask_cp2;
+	u32 ref_and_mask_cp3;
+	u32 ref_and_mask_cp4;
+	u32 ref_and_mask_cp5;
+	u32 ref_and_mask_cp6;
+	u32 ref_and_mask_cp7;
+	u32 ref_and_mask_cp8;
+	u32 ref_and_mask_cp9;
+	u32 ref_and_mask_sdma0;
+	u32 ref_and_mask_sdma1;
+	u32 ref_and_mask_sdma2;
+	u32 ref_and_mask_sdma3;
+	u32 ref_and_mask_sdma4;
+	u32 ref_and_mask_sdma5;
+	u32 ref_and_mask_sdma6;
+	u32 ref_and_mask_sdma7;
+};
+
+struct amdgpu_nbio_funcs {
+	const struct nbio_hdp_flush_reg *hdp_flush_reg;
+	u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
+	u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
+	u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
+	u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
+	u32 (*get_rev_id)(struct amdgpu_device *adev);
+	void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
+	void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+	u32 (*get_memsize)(struct amdgpu_device *adev);
+	void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
+			bool use_doorbell, int doorbell_index, int doorbell_size);
+	void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
+				   int doorbell_index, int instance);
+	void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
+					 bool enable);
+	void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
+						  bool enable);
+	void (*ih_doorbell_range)(struct amdgpu_device *adev,
+				  bool use_doorbell, int doorbell_index);
+	void (*enable_doorbell_interrupt)(struct amdgpu_device *adev,
+					  bool enable);
+	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+						 bool enable);
+	void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
+						bool enable);
+	void (*get_clockgating_state)(struct amdgpu_device *adev,
+				      u32 *flags);
+	void (*ih_control)(struct amdgpu_device *adev);
+	void (*init_registers)(struct amdgpu_device *adev);
+	void (*detect_hw_virt)(struct amdgpu_device *adev);
+	void (*remap_hdp_registers)(struct amdgpu_device *adev);
+	void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
+	void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
+	int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
+	int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
+	void (*query_ras_error_count)(struct amdgpu_device *adev,
+					void *ras_error_status);
+	int (*ras_late_init)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_nbio {
+	const struct nbio_hdp_flush_reg *hdp_flush_reg;
+	struct amdgpu_irq_src ras_controller_irq;
+	struct amdgpu_irq_src ras_err_event_athub_irq;
+	struct ras_common_if *ras_if;
+	const struct amdgpu_nbio_funcs *funcs;
+};
+
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_nbio_ras_fini(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 7289e1b4fb60..e3f16b49e970 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -343,6 +343,70 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
 }
 
 /**
+ * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location
+ *
+ * @adev: amdgpu device object
+ * @offset: offset of the BO
+ * @size: size of the BO
+ * @domain: where to place it
+ * @bo_ptr:  used to initialize BOs in structures
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Creates a kernel BO at a specific offset in the address space of the domain.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
+			       uint64_t offset, uint64_t size, uint32_t domain,
+			       struct amdgpu_bo **bo_ptr, void **cpu_addr)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	unsigned int i;
+	int r;
+
+	offset &= PAGE_MASK;
+	size = ALIGN(size, PAGE_SIZE);
+
+	r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr,
+				      NULL, cpu_addr);
+	if (r)
+		return r;
+
+	/*
+	 * Remove the original mem node and create a new one at the request
+	 * position.
+	 */
+	if (cpu_addr)
+		amdgpu_bo_kunmap(*bo_ptr);
+
+	ttm_bo_mem_put(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.mem);
+
+	for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
+		(*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
+		(*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
+	}
+	r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
+			     &(*bo_ptr)->tbo.mem, &ctx);
+	if (r)
+		goto error;
+
+	if (cpu_addr) {
+		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
+		if (r)
+			goto error;
+	}
+
+	amdgpu_bo_unreserve(*bo_ptr);
+	return 0;
+
+error:
+	amdgpu_bo_unreserve(*bo_ptr);
+	amdgpu_bo_unref(bo_ptr);
+	return r;
+}
+
+/**
  * amdgpu_bo_free_kernel - free BO for kernel use
  *
  * @bo: amdgpu BO to free
@@ -451,7 +515,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 {
 	struct ttm_operation_ctx ctx = {
 		.interruptible = (bp->type != ttm_bo_type_kernel),
-		.no_wait_gpu = false,
+		.no_wait_gpu = bp->no_wait_gpu,
 		.resv = bp->resv,
 		.flags = bp->type != ttm_bo_type_kernel ?
 			TTM_OPT_FLAG_ALLOW_RES_EVICT : 0
@@ -1059,7 +1123,10 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
 			     struct vm_area_struct *vma)
 {
-	return ttm_fbdev_mmap(vma, &bo->tbo);
+	if (vma->vm_pgoff != 0)
+		return -EACCES;
+
+	return ttm_bo_mmap_obj(vma, &bo->tbo);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 658f4c9779b7..36dec51d1ef1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -30,6 +30,9 @@
 
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#endif
 
 #define AMDGPU_BO_INVALID_OFFSET	LONG_MAX
 #define AMDGPU_BO_MAX_PLACEMENTS	3
@@ -41,6 +44,7 @@ struct amdgpu_bo_param {
 	u32				preferred_domain;
 	u64				flags;
 	enum ttm_bo_type		type;
+	bool				no_wait_gpu;
 	struct dma_resv	*resv;
 };
 
@@ -100,10 +104,12 @@ struct amdgpu_bo {
 	struct ttm_bo_kmap_obj		dma_buf_vmap;
 	struct amdgpu_mn		*mn;
 
-	union {
-		struct list_head	mn_list;
-		struct list_head	shadow_list;
-	};
+
+#ifdef CONFIG_MMU_NOTIFIER
+	struct mmu_interval_notifier	notifier;
+#endif
+
+	struct list_head		shadow_list;
 
 	struct kgd_mem                  *kfd_bo;
 };
@@ -237,6 +243,9 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
 			    unsigned long size, int align,
 			    u32 domain, struct amdgpu_bo **bo_ptr,
 			    u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
+			       uint64_t offset, uint64_t size, uint32_t domain,
+			       struct amdgpu_bo **bo_ptr, void **cpu_addr);
 void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
 			   void **cpu_addr);
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 03930313c263..b03b1eb7ba04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -37,6 +37,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/nospec.h>
+#include <linux/pm_runtime.h>
 #include "hwmgr.h"
 #define WIDTH_4K 3840
 
@@ -158,10 +159,18 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type pm;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		if (adev->smu.ppt_funcs->get_current_power_state)
-			pm = amdgpu_smu_get_current_power_state(adev);
+			pm = smu_get_current_power_state(&adev->smu);
 		else
 			pm = adev->pm.dpm.user_state;
 	} else if (adev->powerplay.pp_funcs->get_current_power_state) {
@@ -170,6 +179,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
 		pm = adev->pm.dpm.user_state;
 	}
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return snprintf(buf, PAGE_SIZE, "%s\n",
 			(pm == POWER_STATE_TYPE_BATTERY) ? "battery" :
 			(pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance");
@@ -183,6 +195,10 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type  state;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return -EINVAL;
 
 	if (strncmp("battery", buf, strlen("battery")) == 0)
 		state = POWER_STATE_TYPE_BATTERY;
@@ -190,10 +206,12 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
 		state = POWER_STATE_TYPE_BALANCED;
 	else if (strncmp("performance", buf, strlen("performance")) == 0)
 		state = POWER_STATE_TYPE_PERFORMANCE;
-	else {
-		count = -EINVAL;
-		goto fail;
-	}
+	else
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		mutex_lock(&adev->pm.mutex);
@@ -206,12 +224,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
 		adev->pm.dpm.user_state = state;
 		mutex_unlock(&adev->pm.mutex);
 
-		/* Can't set dpm state when the card is off */
-		if (!(adev->flags & AMD_IS_PX) ||
-		    (ddev->switch_power_state == DRM_SWITCH_POWER_ON))
-			amdgpu_pm_compute_clocks(adev);
+		amdgpu_pm_compute_clocks(adev);
 	}
-fail:
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return count;
 }
 
@@ -282,13 +299,14 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_dpm_forced_level level = 0xff;
+	int ret;
 
-	if (amdgpu_sriov_vf(adev))
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
-	if ((adev->flags & AMD_IS_PX) &&
-	    (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return snprintf(buf, PAGE_SIZE, "off\n");
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
 		level = smu_get_performance_level(&adev->smu);
@@ -297,6 +315,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
 	else
 		level = adev->pm.dpm.forced_level;
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return snprintf(buf, PAGE_SIZE, "%s\n",
 			(level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" :
 			(level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" :
@@ -320,9 +341,7 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
 	enum amd_dpm_forced_level current_level = 0xff;
 	int ret = 0;
 
-	/* Can't force performance level when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return -EINVAL;
 
 	if (strncmp("low", buf, strlen("low")) == 0) {
@@ -344,30 +363,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
 	} else if (strncmp("profile_peak", buf, strlen("profile_peak")) == 0) {
 		level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
 	}  else {
-		count = -EINVAL;
-		goto fail;
+		return -EINVAL;
 	}
 
-	/* handle sriov case here */
-	if (amdgpu_sriov_vf(adev)) {
-		if (amdgim_is_hwperf(adev) &&
-		    adev->virt.ops->force_dpm_level) {
-			mutex_lock(&adev->pm.mutex);
-			adev->virt.ops->force_dpm_level(adev, level);
-			mutex_unlock(&adev->pm.mutex);
-			return count;
-		} else {
-			return -EINVAL;
-		}
-	}
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
 		current_level = smu_get_performance_level(&adev->smu);
 	else if (adev->powerplay.pp_funcs->get_performance_level)
 		current_level = amdgpu_dpm_get_performance_level(adev);
 
-	if (current_level == level)
+	if (current_level == level) {
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
 		return count;
+	}
 
 	/* profile_exit setting is valid only when current mode is in profile mode */
 	if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD |
@@ -376,29 +388,40 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
 	    AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) &&
 	    (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) {
 		pr_err("Currently not in any profile mode!\n");
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
 		return -EINVAL;
 	}
 
 	if (is_support_sw_smu(adev)) {
 		ret = smu_force_performance_level(&adev->smu, level);
-		if (ret)
-			count = -EINVAL;
+		if (ret) {
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
+			return -EINVAL;
+		}
 	} else if (adev->powerplay.pp_funcs->force_performance_level) {
 		mutex_lock(&adev->pm.mutex);
 		if (adev->pm.dpm.thermal_active) {
-			count = -EINVAL;
 			mutex_unlock(&adev->pm.mutex);
-			goto fail;
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
+			return -EINVAL;
 		}
 		ret = amdgpu_dpm_force_performance_level(adev, level);
-		if (ret)
-			count = -EINVAL;
-		else
+		if (ret) {
+			mutex_unlock(&adev->pm.mutex);
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
+			return -EINVAL;
+		} else {
 			adev->pm.dpm.forced_level = level;
+		}
 		mutex_unlock(&adev->pm.mutex);
 	}
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
-fail:
 	return count;
 }
 
@@ -411,6 +434,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
 	struct pp_states_info data;
 	int i, buf_len, ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		ret = smu_get_power_num_states(&adev->smu, &data);
 		if (ret)
@@ -418,6 +445,9 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
 	} else if (adev->powerplay.pp_funcs->get_pp_num_states)
 		amdgpu_dpm_get_pp_num_states(adev, &data);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums);
 	for (i = 0; i < data.nums; i++)
 		buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i,
@@ -440,6 +470,13 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
 	enum amd_pm_state_type pm = 0;
 	int i = 0, ret = 0;
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		pm = smu_get_current_power_state(smu);
 		ret = smu_get_power_num_states(smu, &data);
@@ -451,6 +488,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
 		amdgpu_dpm_get_pp_num_states(adev, &data);
 	}
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	for (i = 0; i < data.nums; i++) {
 		if (pm == data.states[i])
 			break;
@@ -469,6 +509,9 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
 	if (adev->pp_force_state_enabled)
 		return amdgpu_get_pp_cur_state(dev, attr, buf);
 	else
@@ -486,6 +529,9 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 	unsigned long idx;
 	int ret;
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return -EINVAL;
+
 	if (strlen(buf) == 1)
 		adev->pp_force_state_enabled = false;
 	else if (is_support_sw_smu(adev))
@@ -495,14 +541,18 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 		struct pp_states_info data;
 
 		ret = kstrtoul(buf, 0, &idx);
-		if (ret || idx >= ARRAY_SIZE(data.states)) {
-			count = -EINVAL;
-			goto fail;
-		}
+		if (ret || idx >= ARRAY_SIZE(data.states))
+			return -EINVAL;
+
 		idx = array_index_nospec(idx, ARRAY_SIZE(data.states));
 
 		amdgpu_dpm_get_pp_num_states(adev, &data);
 		state = data.states[idx];
+
+		ret = pm_runtime_get_sync(ddev->dev);
+		if (ret < 0)
+			return ret;
+
 		/* only set user selected power states */
 		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
 		    state != POWER_STATE_TYPE_DEFAULT) {
@@ -510,8 +560,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 					AMD_PP_TASK_ENABLE_USER_STATE, &state);
 			adev->pp_force_state_enabled = true;
 		}
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
 	}
-fail:
+
 	return count;
 }
 
@@ -533,17 +585,32 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	char *table = NULL;
-	int size;
+	int size, ret;
+
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		size = smu_sys_get_pp_table(&adev->smu, (void **)&table);
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
 		if (size < 0)
 			return size;
-	}
-	else if (adev->powerplay.pp_funcs->get_pp_table)
+	} else if (adev->powerplay.pp_funcs->get_pp_table) {
 		size = amdgpu_dpm_get_pp_table(adev, &table);
-	else
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
+		if (size < 0)
+			return size;
+	} else {
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
 		return 0;
+	}
 
 	if (size >= PAGE_SIZE)
 		size = PAGE_SIZE - 1;
@@ -562,13 +629,26 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret = 0;
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count);
-		if (ret)
+		if (ret) {
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
 			return ret;
+		}
 	} else if (adev->powerplay.pp_funcs->set_pp_table)
 		amdgpu_dpm_set_pp_table(adev, buf, count);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return count;
 }
 
@@ -654,6 +734,9 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 	const char delimiter[3] = {' ', '\n', '\0'};
 	uint32_t type;
 
+	if (amdgpu_sriov_vf(adev))
+		return -EINVAL;
+
 	if (count > 127)
 		return -EINVAL;
 
@@ -689,18 +772,28 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 			tmp_str++;
 	}
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		ret = smu_od_edit_dpm_table(&adev->smu, type,
 					    parameter, parameter_size);
 
-		if (ret)
+		if (ret) {
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
 			return -EINVAL;
+		}
 	} else {
 		if (adev->powerplay.pp_funcs->odn_edit_dpm_table) {
 			ret = amdgpu_dpm_odn_edit_dpm_table(adev, type,
 						parameter, parameter_size);
-			if (ret)
+			if (ret) {
+				pm_runtime_mark_last_busy(ddev->dev);
+				pm_runtime_put_autosuspend(ddev->dev);
 				return -EINVAL;
+			}
 		}
 
 		if (type == PP_OD_COMMIT_DPM_TABLE) {
@@ -708,12 +801,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 				amdgpu_dpm_dispatch_task(adev,
 						AMD_PP_TASK_READJUST_POWER_STATE,
 						NULL);
+				pm_runtime_mark_last_busy(ddev->dev);
+				pm_runtime_put_autosuspend(ddev->dev);
 				return count;
 			} else {
+				pm_runtime_mark_last_busy(ddev->dev);
+				pm_runtime_put_autosuspend(ddev->dev);
 				return -EINVAL;
 			}
 		}
 	}
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	return count;
 }
@@ -724,24 +823,33 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
-	uint32_t size = 0;
+	ssize_t size;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf);
 		size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size);
 		size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size);
 		size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size);
-		return size;
 	} else if (adev->powerplay.pp_funcs->print_clock_levels) {
 		size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
 		size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
 		size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size);
 		size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
-		return size;
 	} else {
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
 	}
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
+	return size;
 }
 
 /**
@@ -770,21 +878,36 @@ static ssize_t amdgpu_set_pp_feature_status(struct device *dev,
 	uint64_t featuremask;
 	int ret;
 
+	if (amdgpu_sriov_vf(adev))
+		return -EINVAL;
+
 	ret = kstrtou64(buf, 0, &featuremask);
 	if (ret)
 		return -EINVAL;
 
 	pr_debug("featuremask = 0x%llx\n", featuremask);
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);
-		if (ret)
+		if (ret) {
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
 			return -EINVAL;
+		}
 	} else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
 		ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
-		if (ret)
+		if (ret) {
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
 			return -EINVAL;
+		}
 	}
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	return count;
 }
@@ -795,18 +918,31 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
 
-	if (is_support_sw_smu(adev)) {
-		return smu_sys_get_pp_feature_mask(&adev->smu, buf);
-	} else if (adev->powerplay.pp_funcs->get_ppfeature_status)
-		return amdgpu_dpm_get_ppfeature_status(adev, buf);
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
+	if (is_support_sw_smu(adev))
+		size = smu_sys_get_pp_feature_mask(&adev->smu, buf);
+	else if (adev->powerplay.pp_funcs->get_ppfeature_status)
+		size = amdgpu_dpm_get_ppfeature_status(adev, buf);
+	else
+		size = snprintf(buf, PAGE_SIZE, "\n");
 
-	return snprintf(buf, PAGE_SIZE, "\n");
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 /**
- * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk
- * pp_dpm_pcie
+ * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk pp_dpm_pcie
  *
  * The amdgpu driver provides a sysfs API for adjusting what power levels
  * are enabled for a given power state.  The files pp_dpm_sclk, pp_dpm_mclk,
@@ -822,9 +958,15 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev,
  *
  * To manually adjust these states, first select manual using
  * power_dpm_force_performance_level.
- * Secondly,Enter a new value for each level by inputing a string that
+ * Secondly, enter a new value for each level by inputing a string that
  * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie"
- * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6.
+ * E.g.,
+ *
+ * .. code-block:: bash
+ *
+ *	echo "4 5 6" > pp_dpm_sclk
+ *
+ * will enable sclk levels 4, 5, and 6.
  *
  * NOTE: change to the dcefclk max dpm level is not supported now
  */
@@ -835,17 +977,27 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
 
-	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) &&
-	    adev->virt.ops->get_pp_clk)
-		return adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf);
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 /*
@@ -894,18 +1046,25 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
-	if (amdgpu_sriov_vf(adev))
-		return 0;
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return -EINVAL;
 
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask);
+		ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	if (ret)
 		return -EINVAL;
 
@@ -918,17 +1077,27 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
 
-	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) &&
-	    adev->virt.ops->get_pp_clk)
-		return adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf);
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
@@ -938,21 +1107,28 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
-	int ret;
 	uint32_t mask = 0;
+	int ret;
 
-	if (amdgpu_sriov_vf(adev))
-		return 0;
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+			return -EINVAL;
 
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask);
+		ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	if (ret)
 		return -EINVAL;
 
@@ -965,13 +1141,27 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
@@ -984,14 +1174,26 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return -EINVAL;
+
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask);
+		ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask);
+	else
+		ret = 0;
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	if (ret)
 		return -EINVAL;
@@ -1005,13 +1207,27 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
@@ -1024,14 +1240,26 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return -EINVAL;
+
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask);
+		ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask);
+	else
+		ret = 0;
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	if (ret)
 		return -EINVAL;
@@ -1045,13 +1273,27 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
@@ -1064,14 +1306,26 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
+	if (amdgpu_sriov_vf(adev))
+		return -EINVAL;
+
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask);
+		ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask);
+	else
+		ret = 0;
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	if (ret)
 		return -EINVAL;
@@ -1085,13 +1339,27 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
@@ -1104,14 +1372,26 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return -EINVAL;
+
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask);
+		ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+	else
+		ret = 0;
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	if (ret)
 		return -EINVAL;
@@ -1126,12 +1406,23 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	uint32_t value = 0;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
 		value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK);
 	else if (adev->powerplay.pp_funcs->get_sclk_od)
 		value = amdgpu_dpm_get_sclk_od(adev);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return snprintf(buf, PAGE_SIZE, "%d\n", value);
 }
 
@@ -1145,12 +1436,17 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 	int ret;
 	long int value;
 
+	if (amdgpu_sriov_vf(adev))
+		return -EINVAL;
+
 	ret = kstrtol(buf, 0, &value);
 
-	if (ret) {
-		count = -EINVAL;
-		goto fail;
-	}
+	if (ret)
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value);
@@ -1166,7 +1462,9 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 		}
 	}
 
-fail:
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return count;
 }
 
@@ -1177,12 +1475,23 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	uint32_t value = 0;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
 		value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK);
 	else if (adev->powerplay.pp_funcs->get_mclk_od)
 		value = amdgpu_dpm_get_mclk_od(adev);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return snprintf(buf, PAGE_SIZE, "%d\n", value);
 }
 
@@ -1196,12 +1505,17 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 	int ret;
 	long int value;
 
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	ret = kstrtol(buf, 0, &value);
 
-	if (ret) {
-		count = -EINVAL;
-		goto fail;
-	}
+	if (ret)
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value);
@@ -1217,7 +1531,9 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 		}
 	}
 
-fail:
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return count;
 }
 
@@ -1247,13 +1563,27 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
-		return smu_get_power_profile_mode(&adev->smu, buf);
+		size = smu_get_power_profile_mode(&adev->smu, buf);
 	else if (adev->powerplay.pp_funcs->get_power_profile_mode)
-		return amdgpu_dpm_get_power_profile_mode(adev, buf);
+		size = amdgpu_dpm_get_power_profile_mode(adev, buf);
+	else
+		size = snprintf(buf, PAGE_SIZE, "\n");
 
-	return snprintf(buf, PAGE_SIZE, "\n");
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 
@@ -1278,7 +1608,10 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
 	tmp[1] = '\0';
 	ret = kstrtol(tmp, 0, &profile_mode);
 	if (ret)
-		goto fail;
+		return -EINVAL;
+
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return -EINVAL;
 
 	if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
 		if (count < 2 || count > 127)
@@ -1290,23 +1623,30 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
 		while (tmp_str[0]) {
 			sub_str = strsep(&tmp_str, delimiter);
 			ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
-			if (ret) {
-				count = -EINVAL;
-				goto fail;
-			}
+			if (ret)
+				return -EINVAL;
 			parameter_size++;
 			while (isspace(*tmp_str))
 				tmp_str++;
 		}
 	}
 	parameter[parameter_size] = profile_mode;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size);
+		ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true);
 	else if (adev->powerplay.pp_funcs->set_power_profile_mode)
 		ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	if (!ret)
 		return count;
-fail:
+
 	return -EINVAL;
 }
 
@@ -1326,10 +1666,20 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
 	struct amdgpu_device *adev = ddev->dev_private;
 	int r, value, size = sizeof(value);
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	r = pm_runtime_get_sync(ddev->dev);
+	if (r < 0)
+		return r;
+
 	/* read the IP busy sensor */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
 				   (void *)&value, &size);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	if (r)
 		return r;
 
@@ -1352,10 +1702,20 @@ static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
 	struct amdgpu_device *adev = ddev->dev_private;
 	int r, value, size = sizeof(value);
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	r = pm_runtime_get_sync(ddev->dev);
+	if (r < 0)
+		return r;
+
 	/* read the IP busy sensor */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
 				   (void *)&value, &size);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	if (r)
 		return r;
 
@@ -1381,8 +1741,20 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	uint64_t count0, count1;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return snprintf(buf, PAGE_SIZE,	"%llu %llu %i\n",
 			count0, count1, pcie_get_mps(adev->pdev));
 }
@@ -1404,6 +1776,9 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
 	if (adev->unique_id)
 		return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
 
@@ -1467,42 +1842,43 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	int channel = to_sensor_dev_attr(attr)->index;
 	int r, temp = 0, size = sizeof(temp);
 
-	/* Can't get temperature when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
-
 	if (channel >= PP_TEMP_MAX)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	switch (channel) {
 	case PP_TEMP_JUNCTION:
 		/* get current junction temperature */
 		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
 					   (void *)&temp, &size);
-		if (r)
-			return r;
 		break;
 	case PP_TEMP_EDGE:
 		/* get current edge temperature */
 		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
 					   (void *)&temp, &size);
-		if (r)
-			return r;
 		break;
 	case PP_TEMP_MEM:
 		/* get current memory temperature */
 		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
 					   (void *)&temp, &size);
-		if (r)
-			return r;
+		break;
+	default:
+		r = -EINVAL;
 		break;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (r)
+		return r;
+
 	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 }
 
@@ -1598,15 +1974,27 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	u32 pwm_mode = 0;
+	int ret;
+
+	ret = pm_runtime_get_sync(adev->ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	} else {
-		if (!adev->powerplay.pp_funcs->get_fan_control_mode)
+		if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return -EINVAL;
+		}
 
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return sprintf(buf, "%i\n", pwm_mode);
 }
 
@@ -1616,27 +2004,32 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
 					    size_t count)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	int err;
+	int err, ret;
 	int value;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
-
 	err = kstrtoint(buf, 10, &value);
 	if (err)
 		return err;
 
+	ret = pm_runtime_get_sync(adev->ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		smu_set_fan_control_mode(&adev->smu, value);
 	} else {
-		if (!adev->powerplay.pp_funcs->set_fan_control_mode)
+		if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return -EINVAL;
+		}
 
 		amdgpu_dpm_set_fan_control_mode(adev, value);
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return count;
 }
 
@@ -1663,34 +2056,43 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
 	u32 value;
 	u32 pwm_mode;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
+
 	if (is_support_sw_smu(adev))
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	else
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
+
 	if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
 		pr_info("manual fan speed control should be enabled first\n");
+		pm_runtime_mark_last_busy(adev->ddev->dev);
+		pm_runtime_put_autosuspend(adev->ddev->dev);
 		return -EINVAL;
 	}
 
 	err = kstrtou32(buf, 10, &value);
-	if (err)
+	if (err) {
+		pm_runtime_mark_last_busy(adev->ddev->dev);
+		pm_runtime_put_autosuspend(adev->ddev->dev);
 		return err;
+	}
 
 	value = (value * 100) / 255;
 
-	if (is_support_sw_smu(adev)) {
+	if (is_support_sw_smu(adev))
 		err = smu_set_fan_speed_percent(&adev->smu, value);
-		if (err)
-			return err;
-	} else if (adev->powerplay.pp_funcs->set_fan_speed_percent) {
+	else if (adev->powerplay.pp_funcs->set_fan_speed_percent)
 		err = amdgpu_dpm_set_fan_speed_percent(adev, value);
-		if (err)
-			return err;
-	}
+	else
+		err = -EINVAL;
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (err)
+		return err;
 
 	return count;
 }
@@ -1703,20 +2105,22 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
 	int err;
 	u32 speed = 0;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
 
-	if (is_support_sw_smu(adev)) {
+	if (is_support_sw_smu(adev))
 		err = smu_get_fan_speed_percent(&adev->smu, &speed);
-		if (err)
-			return err;
-	} else if (adev->powerplay.pp_funcs->get_fan_speed_percent) {
+	else if (adev->powerplay.pp_funcs->get_fan_speed_percent)
 		err = amdgpu_dpm_get_fan_speed_percent(adev, &speed);
-		if (err)
-			return err;
-	}
+	else
+		err = -EINVAL;
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (err)
+		return err;
 
 	speed = (speed * 255) / 100;
 
@@ -1731,20 +2135,22 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
 	int err;
 	u32 speed = 0;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
 
-	if (is_support_sw_smu(adev)) {
+	if (is_support_sw_smu(adev))
 		err = smu_get_fan_speed_rpm(&adev->smu, &speed);
-		if (err)
-			return err;
-	} else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
+	else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
 		err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed);
-		if (err)
-			return err;
-	}
+	else
+		err = -EINVAL;
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (err)
+		return err;
 
 	return sprintf(buf, "%i\n", speed);
 }
@@ -1758,8 +2164,16 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
 	u32 size = sizeof(min_rpm);
 	int r;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
 				   (void *)&min_rpm, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -1775,8 +2189,16 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
 	u32 size = sizeof(max_rpm);
 	int r;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
 				   (void *)&max_rpm, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -1791,20 +2213,22 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
 	int err;
 	u32 rpm = 0;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
 
-	if (is_support_sw_smu(adev)) {
+	if (is_support_sw_smu(adev))
 		err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
-		if (err)
-			return err;
-	} else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
+	else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
 		err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm);
-		if (err)
-			return err;
-	}
+	else
+		err = -EINVAL;
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (err)
+		return err;
 
 	return sprintf(buf, "%i\n", rpm);
 }
@@ -1818,32 +2242,40 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
 	u32 value;
 	u32 pwm_mode;
 
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
+
 	if (is_support_sw_smu(adev))
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	else
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 
-	if (pwm_mode != AMD_FAN_CTRL_MANUAL)
+	if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
+		pm_runtime_mark_last_busy(adev->ddev->dev);
+		pm_runtime_put_autosuspend(adev->ddev->dev);
 		return -ENODATA;
-
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	}
 
 	err = kstrtou32(buf, 10, &value);
-	if (err)
+	if (err) {
+		pm_runtime_mark_last_busy(adev->ddev->dev);
+		pm_runtime_put_autosuspend(adev->ddev->dev);
 		return err;
+	}
 
-	if (is_support_sw_smu(adev)) {
+	if (is_support_sw_smu(adev))
 		err = smu_set_fan_speed_rpm(&adev->smu, value);
-		if (err)
-			return err;
-	} else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) {
+	else if (adev->powerplay.pp_funcs->set_fan_speed_rpm)
 		err = amdgpu_dpm_set_fan_speed_rpm(adev, value);
-		if (err)
-			return err;
-	}
+	else
+		err = -EINVAL;
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (err)
+		return err;
 
 	return count;
 }
@@ -1854,15 +2286,27 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	u32 pwm_mode = 0;
+	int ret;
+
+	ret = pm_runtime_get_sync(adev->ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	} else {
-		if (!adev->powerplay.pp_funcs->get_fan_control_mode)
+		if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return -EINVAL;
+		}
 
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 	}
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1);
 }
 
@@ -1876,12 +2320,6 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
 	int value;
 	u32 pwm_mode;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
-
-
 	err = kstrtoint(buf, 10, &value);
 	if (err)
 		return err;
@@ -1893,14 +2331,24 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
 	else
 		return -EINVAL;
 
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
+
 	if (is_support_sw_smu(adev)) {
 		smu_set_fan_control_mode(&adev->smu, pwm_mode);
 	} else {
-		if (!adev->powerplay.pp_funcs->set_fan_control_mode)
+		if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return -EINVAL;
+		}
 		amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return count;
 }
 
@@ -1909,18 +2357,20 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
 					char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	u32 vddgfx;
 	int r, size = sizeof(vddgfx);
 
-	/* Can't get voltage when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	/* get the voltage */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
 				   (void *)&vddgfx, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -1939,7 +2389,6 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
 				       char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	u32 vddnb;
 	int r, size = sizeof(vddnb);
 
@@ -1947,14 +2396,17 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
 	if  (!(adev->flags & AMD_IS_APU))
 		return -EINVAL;
 
-	/* Can't get voltage when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	/* get the voltage */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
 				   (void *)&vddnb, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -1973,19 +2425,21 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
 					   char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	u32 query = 0;
 	int r, size = sizeof(u32);
 	unsigned uw;
 
-	/* Can't get power when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	/* get the voltage */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
 				   (void *)&query, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -2008,16 +2462,27 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	uint32_t limit = 0;
+	ssize_t size;
+	int r;
+
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	if (is_support_sw_smu(adev)) {
-		smu_get_power_limit(&adev->smu, &limit, true);
-		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+		smu_get_power_limit(&adev->smu, &limit, true, true);
+		size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
 	} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
 		adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true);
-		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+		size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
 	} else {
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
 	}
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
@@ -2026,16 +2491,27 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	uint32_t limit = 0;
+	ssize_t size;
+	int r;
+
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	if (is_support_sw_smu(adev)) {
-		smu_get_power_limit(&adev->smu, &limit, false);
-		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+		smu_get_power_limit(&adev->smu, &limit, false,  true);
+		size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
 	} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
 		adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false);
-		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+		size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
 	} else {
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
 	}
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	return size;
 }
 
 
@@ -2048,19 +2524,29 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
 	int err;
 	u32 value;
 
+	if (amdgpu_sriov_vf(adev))
+		return -EINVAL;
+
 	err = kstrtou32(buf, 10, &value);
 	if (err)
 		return err;
 
 	value = value / 1000000; /* convert to Watt */
 
-	if (is_support_sw_smu(adev)) {
+
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
+
+	if (is_support_sw_smu(adev))
 		err = smu_set_power_limit(&adev->smu, value);
-	} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) {
+	else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit)
 		err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value);
-	} else {
+	else
 		err = -EINVAL;
-	}
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
 
 	if (err)
 		return err;
@@ -2073,18 +2559,20 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
 				      char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	uint32_t sclk;
 	int r, size = sizeof(sclk);
 
-	/* Can't get voltage when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	/* get the sclk */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
 				   (void *)&sclk, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -2103,18 +2591,20 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
 				      char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	uint32_t mclk;
 	int r, size = sizeof(mclk);
 
-	/* Can't get voltage when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	/* get the sclk */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
 				   (void *)&mclk, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -2196,9 +2686,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
  *
  * - fan1_input: fan speed in RPM
  *
- * - fan[1-*]_target: Desired fan speed Unit: revolution/min (RPM)
+ * - fan[1-\*]_target: Desired fan speed Unit: revolution/min (RPM)
  *
- * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable
+ * - fan[1-\*]_enable: Enable or disable the sensors.1: Enable 0: Disable
  *
  * hwmon interfaces for GPU clocks:
  *
@@ -2294,6 +2784,23 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	umode_t effective_mode = attr->mode;
 
+	/* under multi-vf mode, the hwmon attributes are all not supported */
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	/* there is no fan under pp one vf mode */
+	if (amdgpu_sriov_is_pp_one_vf(adev) &&
+	    (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
+	     attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
+	     attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
+	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
+	     attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
+	     attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
+	     attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
+	     attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
+	     attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
+		return 0;
+
 	/* Skip fan attributes if fan is not present */
 	if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
 	    attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
@@ -2661,17 +3168,12 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
 void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
 {
 	int ret = 0;
-	if (is_support_sw_smu(adev)) {
-	    ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_UVD, enable);
-	    if (ret)
-		DRM_ERROR("[SW SMU]: dpm enable uvd failed, state = %s, ret = %d. \n",
-			  enable ? "true" : "false", ret);
-	} else if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
-		/* enable/disable UVD */
-		mutex_lock(&adev->pm.mutex);
-		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
-		mutex_unlock(&adev->pm.mutex);
-	}
+
+	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
+	if (ret)
+		DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
+			  enable ? "enable" : "disable", ret);
+
 	/* enable/disable Low Memory PState for UVD (4k videos) */
 	if (adev->asic_type == CHIP_STONEY &&
 		adev->uvd.decode_image_width >= WIDTH_4K) {
@@ -2688,17 +3190,11 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
 void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
 {
 	int ret = 0;
-	if (is_support_sw_smu(adev)) {
-	    ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_VCE, enable);
-	    if (ret)
-		DRM_ERROR("[SW SMU]: dpm enable vce failed, state = %s, ret = %d. \n",
-			  enable ? "true" : "false", ret);
-	} else if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
-		/* enable/disable VCE */
-		mutex_lock(&adev->pm.mutex);
-		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
-		mutex_unlock(&adev->pm.mutex);
-	}
+
+	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
+	if (ret)
+		DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
+			  enable ? "enable" : "disable", ret);
 }
 
 void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
@@ -2713,42 +3209,14 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
 
 }
 
-int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev)
+void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable)
 {
 	int ret = 0;
 
-	if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)))
-		return ret;
-
-	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
-	if (ret) {
-		DRM_ERROR("failed to create device file pp_dpm_sclk\n");
-		return ret;
-	}
-
-	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
-	if (ret) {
-		DRM_ERROR("failed to create device file pp_dpm_mclk\n");
-		return ret;
-	}
-
-	ret = device_create_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
-	if (ret) {
-		DRM_ERROR("failed to create device file for dpm state\n");
-		return ret;
-	}
-
-	return ret;
-}
-
-void amdgpu_pm_virt_sysfs_fini(struct amdgpu_device *adev)
-{
-	if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)))
-		return;
-
-	device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
-	device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
-	device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
+	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_JPEG, !enable);
+	if (ret)
+		DRM_ERROR("Dpm %s jpeg failed, ret = %d. \n",
+			  enable ? "enable" : "disable", ret);
 }
 
 int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version)
@@ -2825,6 +3293,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 		DRM_ERROR("failed to create device file pp_dpm_sclk\n");
 		return ret;
 	}
+
+	/* Arcturus does not support standalone mclk/socclk/fclk level setting */
+	if (adev->asic_type == CHIP_ARCTURUS) {
+		dev_attr_pp_dpm_mclk.attr.mode &= ~S_IWUGO;
+		dev_attr_pp_dpm_mclk.store = NULL;
+
+		dev_attr_pp_dpm_socclk.attr.mode &= ~S_IWUGO;
+		dev_attr_pp_dpm_socclk.store = NULL;
+
+		dev_attr_pp_dpm_fclk.attr.mode &= ~S_IWUGO;
+		dev_attr_pp_dpm_fclk.store = NULL;
+	}
+
 	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
 	if (ret) {
 		DRM_ERROR("failed to create device file pp_dpm_mclk\n");
@@ -3008,7 +3489,8 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 		struct smu_dpm_context *smu_dpm = &adev->smu.smu_dpm;
 		smu_handle_task(&adev->smu,
 				smu_dpm->dpm_level,
-				AMD_PP_TASK_DISPLAY_CONFIG_CHANGE);
+				AMD_PP_TASK_DISPLAY_CONFIG_CHANGE,
+				true);
 	} else {
 		if (adev->powerplay.pp_funcs->dispatch_tasks) {
 			if (!amdgpu_device_has_dc_support(adev)) {
@@ -3144,8 +3626,12 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct drm_device *ddev = adev->ddev;
 	u32 flags = 0;
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0)
+		return r;
 
 	amdgpu_device_ip_get_clockgating_state(adev, &flags);
 	seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags);
@@ -3154,23 +3640,28 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
 
 	if (!adev->pm.dpm_enabled) {
 		seq_printf(m, "dpm not enabled\n");
+		pm_runtime_mark_last_busy(dev->dev);
+		pm_runtime_put_autosuspend(dev->dev);
 		return 0;
 	}
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
-		seq_printf(m, "PX asic powered off\n");
-	} else if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
+
+	if (!is_support_sw_smu(adev) &&
+	    adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
 		mutex_lock(&adev->pm.mutex);
 		if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level)
 			adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m);
 		else
 			seq_printf(m, "Debugfs support not implemented for this asic\n");
 		mutex_unlock(&adev->pm.mutex);
+		r = 0;
 	} else {
-		return amdgpu_debugfs_pm_info_pp(m, adev);
+		r = amdgpu_debugfs_pm_info_pp(m, adev);
 	}
 
-	return 0;
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return r;
 }
 
 static const struct drm_info_list amdgpu_pm_info_list[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
index ef31448ee8d8..3da1da277805 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
@@ -41,5 +41,6 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev);
 void amdgpu_dpm_thermal_work_handler(struct work_struct *work);
 void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable);
 void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable);
+void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
index 0e6dba9f60f0..07914e34bc25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags)
 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
 		if (!(flags & PERF_EF_RELOAD))
-			pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1);
+			pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
 
-		pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0);
+		pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 0);
 		break;
 	default:
 		break;
@@ -101,13 +101,13 @@ static void amdgpu_perf_read(struct perf_event *event)
 
 		switch (pe->pmu_perf_type) {
 		case PERF_TYPE_AMDGPU_DF:
-			pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf,
+			pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->conf,
 							  &count);
 			break;
 		default:
 			count = 0;
 			break;
-		};
+		}
 	} while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev);
 
 	local64_add(count - prev, &event->count);
@@ -126,11 +126,11 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags)
 
 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
-		pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0);
+		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 0);
 		break;
 	default:
 		break;
-	};
+	}
 
 	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 	hwc->state |= PERF_HES_STOPPED;
@@ -156,11 +156,11 @@ static int amdgpu_perf_add(struct perf_event *event, int flags)
 
 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
-		retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1);
+		retval = pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
 		break;
 	default:
 		return 0;
-	};
+	}
 
 	if (retval)
 		return retval;
@@ -184,11 +184,11 @@ static void amdgpu_perf_del(struct perf_event *event, int flags)
 
 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
-		pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1);
+		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 1);
 		break;
 	default:
 		break;
-	};
+	}
 
 	perf_event_update_userpage(event);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a46090071034..3a1570dafe34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -34,6 +34,8 @@
 #include "psp_v11_0.h"
 #include "psp_v12_0.h"
 
+#include "amdgpu_ras.h"
+
 static void psp_set_funcs(struct amdgpu_device *adev);
 
 static int psp_early_init(void *handle)
@@ -88,6 +90,17 @@ static int psp_sw_init(void *handle)
 		return ret;
 	}
 
+	ret = psp_mem_training_init(psp);
+	if (ret) {
+		DRM_ERROR("Failed to initialize memory training!\n");
+		return ret;
+	}
+	ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT);
+	if (ret) {
+		DRM_ERROR("Failed to process memory training!\n");
+		return ret;
+	}
+
 	return 0;
 }
 
@@ -95,6 +108,7 @@ static int psp_sw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	psp_mem_training_fini(&adev->psp);
 	release_firmware(adev->psp.sos_fw);
 	adev->psp.sos_fw = NULL;
 	release_firmware(adev->psp.asd_fw);
@@ -144,17 +158,26 @@ psp_cmd_submit_buf(struct psp_context *psp,
 	memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
 
 	index = atomic_inc_return(&psp->fence_value);
-	ret = psp_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index);
+	ret = psp_ring_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index);
 	if (ret) {
 		atomic_dec(&psp->fence_value);
 		mutex_unlock(&psp->mutex);
 		return ret;
 	}
 
+	amdgpu_asic_invalidate_hdp(psp->adev, NULL);
 	while (*((unsigned int *)psp->fence_buf) != index) {
 		if (--timeout == 0)
 			break;
+		/*
+		 * Shouldn't wait for timeout when err_event_athub occurs,
+		 * because gpu reset thread triggered and lock resource should
+		 * be released for psp resume sequence.
+		 */
+		if (amdgpu_ras_intr_triggered())
+			break;
 		msleep(1);
+		amdgpu_asic_invalidate_hdp(psp->adev, NULL);
 	}
 
 	/* In some cases, psp response status is not 0 even there is no
@@ -168,8 +191,9 @@ psp_cmd_submit_buf(struct psp_context *psp,
 		if (ucode)
 			DRM_WARN("failed to load ucode id (%d) ",
 				  ucode->ucode_id);
-		DRM_WARN("psp command failed and response status is (0x%X)\n",
-			  psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK);
+		DRM_WARN("psp command (0x%X) failed and response status is (0x%X)\n",
+			 psp->cmd_buf_mem->cmd_id,
+			 psp->cmd_buf_mem->resp.status);
 		if (!timeout) {
 			mutex_unlock(&psp->mutex);
 			return -EINVAL;
@@ -253,7 +277,8 @@ static int psp_tmr_init(struct psp_context *psp)
 
 	/* For ASICs support RLC autoload, psp will parse the toc
 	 * and calculate the total size of TMR needed */
-	if (psp->toc_start_addr &&
+	if (!amdgpu_sriov_vf(psp->adev) &&
+	    psp->toc_start_addr &&
 	    psp->toc_bin_size &&
 	    psp->fw_pri_buf) {
 		ret = psp_load_toc(psp, &tmr_size);
@@ -287,47 +312,23 @@ static int psp_tmr_load(struct psp_context *psp)
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 				 psp->fence_buf_mc_addr);
-	if (ret)
-		goto failed;
 
 	kfree(cmd);
 
-	return 0;
-
-failed:
-	kfree(cmd);
 	return ret;
 }
 
-static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd,
-				 uint64_t asd_mc, uint64_t asd_mc_shared,
-				 uint32_t size, uint32_t shared_size)
+static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+				uint64_t asd_mc, uint32_t size)
 {
 	cmd->cmd_id = GFX_CMD_ID_LOAD_ASD;
 	cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(asd_mc);
 	cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(asd_mc);
 	cmd->cmd.cmd_load_ta.app_len = size;
 
-	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(asd_mc_shared);
-	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(asd_mc_shared);
-	cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
-}
-
-static int psp_asd_init(struct psp_context *psp)
-{
-	int ret;
-
-	/*
-	 * Allocate 16k memory aligned to 4k from Frame Buffer (local
-	 * physical) for shared ASD <-> Driver
-	 */
-	ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE,
-				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
-				      &psp->asd_shared_bo,
-				      &psp->asd_shared_mc_addr,
-				      &psp->asd_shared_buf);
-
-	return ret;
+	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = 0;
+	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = 0;
+	cmd->cmd.cmd_load_ta.cmd_buf_len = 0;
 }
 
 static int psp_asd_load(struct psp_context *psp)
@@ -349,11 +350,49 @@ static int psp_asd_load(struct psp_context *psp)
 	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
 	memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size);
 
-	psp_prep_asd_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->asd_shared_mc_addr,
-			     psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
+	psp_prep_asd_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
+				  psp->asd_ucode_size);
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd,
+				 psp->fence_buf_mc_addr);
+	if (!ret) {
+		psp->asd_context.asd_initialized = true;
+		psp->asd_context.session_id = cmd->resp.session_id;
+	}
+
+	kfree(cmd);
+
+	return ret;
+}
+
+static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+				       uint32_t session_id)
+{
+	cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
+	cmd->cmd.cmd_unload_ta.session_id = session_id;
+}
+
+static int psp_asd_unload(struct psp_context *psp)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->asd_context.asd_initialized)
+		return 0;
+
+	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	psp_prep_ta_unload_cmd_buf(cmd, psp->asd_context.session_id);
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 				 psp->fence_buf_mc_addr);
+	if (!ret)
+		psp->asd_context.asd_initialized = false;
 
 	kfree(cmd);
 
@@ -388,18 +427,20 @@ int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
 	return ret;
 }
 
-static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
-					  uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
-					  uint32_t xgmi_ta_size, uint32_t shared_size)
+static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+				     uint64_t ta_bin_mc,
+				     uint32_t ta_bin_size,
+				     uint64_t ta_shared_mc,
+				     uint32_t ta_shared_size)
 {
-        cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
-        cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc);
-        cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc);
-        cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size;
-
-        cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared);
-        cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared);
-        cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
+	cmd->cmd_id 				= GFX_CMD_ID_LOAD_TA;
+	cmd->cmd.cmd_load_ta.app_phy_addr_lo 	= lower_32_bits(ta_bin_mc);
+	cmd->cmd.cmd_load_ta.app_phy_addr_hi 	= upper_32_bits(ta_bin_mc);
+	cmd->cmd.cmd_load_ta.app_len 		= ta_bin_size;
+
+	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ta_shared_mc);
+	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ta_shared_mc);
+	cmd->cmd.cmd_load_ta.cmd_buf_len 	 = ta_shared_size;
 }
 
 static int psp_xgmi_init_shared_buf(struct psp_context *psp)
@@ -419,6 +460,36 @@ static int psp_xgmi_init_shared_buf(struct psp_context *psp)
 	return ret;
 }
 
+static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+				       uint32_t ta_cmd_id,
+				       uint32_t session_id)
+{
+	cmd->cmd_id 				= GFX_CMD_ID_INVOKE_CMD;
+	cmd->cmd.cmd_invoke_cmd.session_id 	= session_id;
+	cmd->cmd.cmd_invoke_cmd.ta_cmd_id 	= ta_cmd_id;
+}
+
+int psp_ta_invoke(struct psp_context *psp,
+		  uint32_t ta_cmd_id,
+		  uint32_t session_id)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	psp_prep_ta_invoke_cmd_buf(cmd, ta_cmd_id, session_id);
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd,
+				 psp->fence_buf_mc_addr);
+
+	kfree(cmd);
+
+	return ret;
+}
+
 static int psp_xgmi_load(struct psp_context *psp)
 {
 	int ret;
@@ -427,8 +498,6 @@ static int psp_xgmi_load(struct psp_context *psp)
 	/*
 	 * TODO: bypass the loading in sriov for now
 	 */
-	if (amdgpu_sriov_vf(psp->adev))
-		return 0;
 
 	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
 	if (!cmd)
@@ -437,9 +506,11 @@ static int psp_xgmi_load(struct psp_context *psp)
 	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
 	memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size);
 
-	psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
-				      psp->xgmi_context.xgmi_shared_mc_addr,
-				      psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE);
+	psp_prep_ta_load_cmd_buf(cmd,
+				 psp->fw_pri_mc_addr,
+				 psp->ta_xgmi_ucode_size,
+				 psp->xgmi_context.xgmi_shared_mc_addr,
+				 PSP_XGMI_SHARED_MEM_SIZE);
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 				 psp->fence_buf_mc_addr);
@@ -454,29 +525,25 @@ static int psp_xgmi_load(struct psp_context *psp)
 	return ret;
 }
 
-static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
-					    uint32_t xgmi_session_id)
-{
-	cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
-	cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id;
-}
-
 static int psp_xgmi_unload(struct psp_context *psp)
 {
 	int ret;
 	struct psp_gfx_cmd_resp *cmd;
+	struct amdgpu_device *adev = psp->adev;
+
+	/* XGMI TA unload currently is not supported on Arcturus */
+	if (adev->asic_type == CHIP_ARCTURUS)
+		return 0;
 
 	/*
 	 * TODO: bypass the unloading in sriov for now
 	 */
-	if (amdgpu_sriov_vf(psp->adev))
-		return 0;
 
 	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
 	if (!cmd)
 		return -ENOMEM;
 
-	psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
+	psp_prep_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 				 psp->fence_buf_mc_addr);
@@ -486,40 +553,9 @@ static int psp_xgmi_unload(struct psp_context *psp)
 	return ret;
 }
 
-static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
-					    uint32_t ta_cmd_id,
-					    uint32_t xgmi_session_id)
-{
-	cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
-	cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id;
-	cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
-	/* Note: cmd_invoke_cmd.buf is not used for now */
-}
-
 int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
 {
-	int ret;
-	struct psp_gfx_cmd_resp *cmd;
-
-	/*
-	 * TODO: bypass the loading in sriov for now
-	*/
-	if (amdgpu_sriov_vf(psp->adev))
-		return 0;
-
-	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
-	if (!cmd)
-		return -ENOMEM;
-
-	psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id,
-					psp->xgmi_context.session_id);
-
-	ret = psp_cmd_submit_buf(psp, NULL, cmd,
-				 psp->fence_buf_mc_addr);
-
-	kfree(cmd);
-
-        return ret;
+	return psp_ta_invoke(psp, ta_cmd_id, psp->xgmi_context.session_id);
 }
 
 static int psp_xgmi_terminate(struct psp_context *psp)
@@ -548,7 +584,9 @@ static int psp_xgmi_initialize(struct psp_context *psp)
 	struct ta_xgmi_shared_memory *xgmi_cmd;
 	int ret;
 
-	if (!psp->adev->psp.ta_fw)
+	if (!psp->adev->psp.ta_fw ||
+	    !psp->adev->psp.ta_xgmi_ucode_size ||
+	    !psp->adev->psp.ta_xgmi_start_addr)
 		return -ENOENT;
 
 	if (!psp->xgmi_context.initialized) {
@@ -573,20 +611,6 @@ static int psp_xgmi_initialize(struct psp_context *psp)
 }
 
 // ras begin
-static void psp_prep_ras_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
-		uint64_t ras_ta_mc, uint64_t ras_mc_shared,
-		uint32_t ras_ta_size, uint32_t shared_size)
-{
-	cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
-	cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ras_ta_mc);
-	cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ras_ta_mc);
-	cmd->cmd.cmd_load_ta.app_len = ras_ta_size;
-
-	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ras_mc_shared);
-	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ras_mc_shared);
-	cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
-}
-
 static int psp_ras_init_shared_buf(struct psp_context *psp)
 {
 	int ret;
@@ -622,15 +646,17 @@ static int psp_ras_load(struct psp_context *psp)
 	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
 	memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size);
 
-	psp_prep_ras_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
-			psp->ras.ras_shared_mc_addr,
-			psp->ta_ras_ucode_size, PSP_RAS_SHARED_MEM_SIZE);
+	psp_prep_ta_load_cmd_buf(cmd,
+				 psp->fw_pri_mc_addr,
+				 psp->ta_ras_ucode_size,
+				 psp->ras.ras_shared_mc_addr,
+				 PSP_RAS_SHARED_MEM_SIZE);
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 			psp->fence_buf_mc_addr);
 
 	if (!ret) {
-		psp->ras.ras_initialized = 1;
+		psp->ras.ras_initialized = true;
 		psp->ras.session_id = cmd->resp.session_id;
 	}
 
@@ -639,13 +665,6 @@ static int psp_ras_load(struct psp_context *psp)
 	return ret;
 }
 
-static void psp_prep_ras_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
-						uint32_t ras_session_id)
-{
-	cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
-	cmd->cmd.cmd_unload_ta.session_id = ras_session_id;
-}
-
 static int psp_ras_unload(struct psp_context *psp)
 {
 	int ret;
@@ -661,7 +680,7 @@ static int psp_ras_unload(struct psp_context *psp)
 	if (!cmd)
 		return -ENOMEM;
 
-	psp_prep_ras_ta_unload_cmd_buf(cmd, psp->ras.session_id);
+	psp_prep_ta_unload_cmd_buf(cmd, psp->ras.session_id);
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 			psp->fence_buf_mc_addr);
@@ -671,40 +690,15 @@ static int psp_ras_unload(struct psp_context *psp)
 	return ret;
 }
 
-static void psp_prep_ras_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
-		uint32_t ta_cmd_id,
-		uint32_t ras_session_id)
-{
-	cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
-	cmd->cmd.cmd_invoke_cmd.session_id = ras_session_id;
-	cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
-	/* Note: cmd_invoke_cmd.buf is not used for now */
-}
-
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
 {
-	int ret;
-	struct psp_gfx_cmd_resp *cmd;
-
 	/*
 	 * TODO: bypass the loading in sriov for now
 	 */
 	if (amdgpu_sriov_vf(psp->adev))
 		return 0;
 
-	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
-	if (!cmd)
-		return -ENOMEM;
-
-	psp_prep_ras_ta_invoke_cmd_buf(cmd, ta_cmd_id,
-			psp->ras.session_id);
-
-	ret = psp_cmd_submit_buf(psp, NULL, cmd,
-			psp->fence_buf_mc_addr);
-
-	kfree(cmd);
-
-	return ret;
+	return psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id);
 }
 
 int psp_ras_enable_features(struct psp_context *psp,
@@ -737,6 +731,12 @@ static int psp_ras_terminate(struct psp_context *psp)
 {
 	int ret;
 
+	/*
+	 * TODO: bypass the terminate in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
 	if (!psp->ras.ras_initialized)
 		return 0;
 
@@ -744,7 +744,7 @@ static int psp_ras_terminate(struct psp_context *psp)
 	if (ret)
 		return ret;
 
-	psp->ras.ras_initialized = 0;
+	psp->ras.ras_initialized = false;
 
 	/* free ras shared memory */
 	amdgpu_bo_free_kernel(&psp->ras.ras_shared_bo,
@@ -758,6 +758,18 @@ static int psp_ras_initialize(struct psp_context *psp)
 {
 	int ret;
 
+	/*
+	 * TODO: bypass the initialize in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->adev->psp.ta_ras_ucode_size ||
+	    !psp->adev->psp.ta_ras_start_addr) {
+		dev_warn(psp->adev->dev, "RAS: ras ta ucode is not available\n");
+		return 0;
+	}
+
 	if (!psp->ras.ras_initialized) {
 		ret = psp_ras_init_shared_buf(psp);
 		if (ret)
@@ -772,6 +784,274 @@ static int psp_ras_initialize(struct psp_context *psp)
 }
 // ras end
 
+// HDCP start
+static int psp_hdcp_init_shared_buf(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * Allocate 16k memory aligned to 4k from Frame Buffer (local
+	 * physical) for hdcp ta <-> Driver
+	 */
+	ret = amdgpu_bo_create_kernel(psp->adev, PSP_HDCP_SHARED_MEM_SIZE,
+				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+				      &psp->hdcp_context.hdcp_shared_bo,
+				      &psp->hdcp_context.hdcp_shared_mc_addr,
+				      &psp->hdcp_context.hdcp_shared_buf);
+
+	return ret;
+}
+
+static int psp_hdcp_load(struct psp_context *psp)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	/*
+	 * TODO: bypass the loading in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+	memcpy(psp->fw_pri_buf, psp->ta_hdcp_start_addr,
+	       psp->ta_hdcp_ucode_size);
+
+	psp_prep_ta_load_cmd_buf(cmd,
+				 psp->fw_pri_mc_addr,
+				 psp->ta_hdcp_ucode_size,
+				 psp->hdcp_context.hdcp_shared_mc_addr,
+				 PSP_HDCP_SHARED_MEM_SIZE);
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+	if (!ret) {
+		psp->hdcp_context.hdcp_initialized = true;
+		psp->hdcp_context.session_id = cmd->resp.session_id;
+	}
+
+	kfree(cmd);
+
+	return ret;
+}
+static int psp_hdcp_initialize(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * TODO: bypass the initialize in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->adev->psp.ta_hdcp_ucode_size ||
+	    !psp->adev->psp.ta_hdcp_start_addr) {
+		dev_warn(psp->adev->dev, "HDCP: hdcp ta ucode is not available\n");
+		return 0;
+	}
+
+	if (!psp->hdcp_context.hdcp_initialized) {
+		ret = psp_hdcp_init_shared_buf(psp);
+		if (ret)
+			return ret;
+	}
+
+	ret = psp_hdcp_load(psp);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int psp_hdcp_unload(struct psp_context *psp)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	/*
+	 * TODO: bypass the unloading in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	psp_prep_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id);
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+	kfree(cmd);
+
+	return ret;
+}
+
+int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+	/*
+	 * TODO: bypass the loading in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	return psp_ta_invoke(psp, ta_cmd_id, psp->hdcp_context.session_id);
+}
+
+static int psp_hdcp_terminate(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * TODO: bypass the terminate in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->hdcp_context.hdcp_initialized)
+		return 0;
+
+	ret = psp_hdcp_unload(psp);
+	if (ret)
+		return ret;
+
+	psp->hdcp_context.hdcp_initialized = false;
+
+	/* free hdcp shared memory */
+	amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo,
+			      &psp->hdcp_context.hdcp_shared_mc_addr,
+			      &psp->hdcp_context.hdcp_shared_buf);
+
+	return 0;
+}
+// HDCP end
+
+// DTM start
+static int psp_dtm_init_shared_buf(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * Allocate 16k memory aligned to 4k from Frame Buffer (local
+	 * physical) for dtm ta <-> Driver
+	 */
+	ret = amdgpu_bo_create_kernel(psp->adev, PSP_DTM_SHARED_MEM_SIZE,
+				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+				      &psp->dtm_context.dtm_shared_bo,
+				      &psp->dtm_context.dtm_shared_mc_addr,
+				      &psp->dtm_context.dtm_shared_buf);
+
+	return ret;
+}
+
+static int psp_dtm_load(struct psp_context *psp)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	/*
+	 * TODO: bypass the loading in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+	memcpy(psp->fw_pri_buf, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size);
+
+	psp_prep_ta_load_cmd_buf(cmd,
+				 psp->fw_pri_mc_addr,
+				 psp->ta_dtm_ucode_size,
+				 psp->dtm_context.dtm_shared_mc_addr,
+				 PSP_DTM_SHARED_MEM_SIZE);
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+	if (!ret) {
+		psp->dtm_context.dtm_initialized = true;
+		psp->dtm_context.session_id = cmd->resp.session_id;
+	}
+
+	kfree(cmd);
+
+	return ret;
+}
+
+static int psp_dtm_initialize(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * TODO: bypass the initialize in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->adev->psp.ta_dtm_ucode_size ||
+	    !psp->adev->psp.ta_dtm_start_addr) {
+		dev_warn(psp->adev->dev, "DTM: dtm ta ucode is not available\n");
+		return 0;
+	}
+
+	if (!psp->dtm_context.dtm_initialized) {
+		ret = psp_dtm_init_shared_buf(psp);
+		if (ret)
+			return ret;
+	}
+
+	ret = psp_dtm_load(psp);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+	/*
+	 * TODO: bypass the loading in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	return psp_ta_invoke(psp, ta_cmd_id, psp->dtm_context.session_id);
+}
+
+static int psp_dtm_terminate(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * TODO: bypass the terminate in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->dtm_context.dtm_initialized)
+		return 0;
+
+	ret = psp_hdcp_unload(psp);
+	if (ret)
+		return ret;
+
+	psp->dtm_context.dtm_initialized = false;
+
+	/* free hdcp shared memory */
+	amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo,
+			      &psp->dtm_context.dtm_shared_mc_addr,
+			      &psp->dtm_context.dtm_shared_buf);
+
+	return 0;
+}
+// DTM end
+
 static int psp_hw_start(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
@@ -818,35 +1098,6 @@ static int psp_hw_start(struct psp_context *psp)
 		return ret;
 	}
 
-	ret = psp_asd_init(psp);
-	if (ret) {
-		DRM_ERROR("PSP asd init failed!\n");
-		return ret;
-	}
-
-	ret = psp_asd_load(psp);
-	if (ret) {
-		DRM_ERROR("PSP load asd failed!\n");
-		return ret;
-	}
-
-	if (adev->gmc.xgmi.num_physical_nodes > 1) {
-		ret = psp_xgmi_initialize(psp);
-		/* Warning the XGMI seesion initialize failure
-		 * Instead of stop driver initialization
-		 */
-		if (ret)
-			dev_err(psp->adev->dev,
-				"XGMI: Failed to initialize XGMI session\n");
-	}
-
-	if (psp->adev->psp.ta_fw) {
-		ret = psp_ras_initialize(psp);
-		if (ret)
-			dev_err(psp->adev->dev,
-					"RAS: Failed to initialize RAS\n");
-	}
-
 	return 0;
 }
 
@@ -926,6 +1177,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
 	case AMDGPU_UCODE_ID_VCN:
 		*type = GFX_FW_TYPE_VCN;
 		break;
+	case AMDGPU_UCODE_ID_VCN1:
+		*type = GFX_FW_TYPE_VCN1;
+		break;
 	case AMDGPU_UCODE_ID_DMCU_ERAM:
 		*type = GFX_FW_TYPE_DMCU_ERAM;
 		break;
@@ -938,6 +1192,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
 	case AMDGPU_UCODE_ID_VCN1_RAM:
 		*type = GFX_FW_TYPE_VCN1_RAM;
 		break;
+	case AMDGPU_UCODE_ID_DMCUB:
+		*type = GFX_FW_TYPE_DMUB;
+		break;
 	case AMDGPU_UCODE_ID_MAXIMUM:
 	default:
 		return -EINVAL;
@@ -1064,7 +1321,11 @@ out:
 		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5
 		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6
 		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7
-		    || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G))
+                    || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G
+	            || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+	            || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+	            || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+	            || ucode->ucode_id == AMDGPU_UCODE_ID_SMC))
 			/*skip ucode loading in SRIOV VF */
 			continue;
 
@@ -1073,10 +1334,6 @@ out:
 		     ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT))
 			/* skip mec JT when autoload is enabled */
 			continue;
-		/* Renoir only needs to load mec jump table one time */
-		if (adev->asic_type == CHIP_RENOIR &&
-		    ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT)
-			continue;
 
 		psp_print_fw_hdr(psp, ucode);
 
@@ -1085,7 +1342,8 @@ out:
 			return ret;
 
 		/* Start rlc autoload after psp recieved all the gfx firmware */
-		if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+		if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
+		    AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) {
 			ret = psp_rlc_autoload(psp);
 			if (ret) {
 				DRM_ERROR("Failed to start rlc autoload\n");
@@ -1116,16 +1374,13 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	if (!psp->cmd)
 		return -ENOMEM;
 
-	/* this fw pri bo is not used under SRIOV */
-	if (!amdgpu_sriov_vf(psp->adev)) {
-		ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
-					      AMDGPU_GEM_DOMAIN_GTT,
-					      &psp->fw_pri_bo,
-					      &psp->fw_pri_mc_addr,
-					      &psp->fw_pri_buf);
-		if (ret)
-			goto failed;
-	}
+	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+					AMDGPU_GEM_DOMAIN_GTT,
+					&psp->fw_pri_bo,
+					&psp->fw_pri_mc_addr,
+					&psp->fw_pri_buf);
+	if (ret)
+		goto failed;
 
 	ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
 					AMDGPU_GEM_DOMAIN_VRAM,
@@ -1159,6 +1414,39 @@ skip_memalloc:
 	if (ret)
 		goto failed;
 
+	ret = psp_asd_load(psp);
+	if (ret) {
+		DRM_ERROR("PSP load asd failed!\n");
+		return ret;
+	}
+
+	if (adev->gmc.xgmi.num_physical_nodes > 1) {
+		ret = psp_xgmi_initialize(psp);
+		/* Warning the XGMI seesion initialize failure
+		 * Instead of stop driver initialization
+		 */
+		if (ret)
+			dev_err(psp->adev->dev,
+				"XGMI: Failed to initialize XGMI session\n");
+	}
+
+	if (psp->adev->psp.ta_fw) {
+		ret = psp_ras_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+					"RAS: Failed to initialize RAS\n");
+
+		ret = psp_hdcp_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"HDCP: Failed to initialize HDCP\n");
+
+		ret = psp_dtm_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"DTM: Failed to initialize DTM\n");
+	}
+
 	return 0;
 
 failed:
@@ -1210,8 +1498,13 @@ static int psp_hw_fini(void *handle)
 	    psp->xgmi_context.initialized == 1)
                 psp_xgmi_terminate(psp);
 
-	if (psp->adev->psp.ta_fw)
+	if (psp->adev->psp.ta_fw) {
 		psp_ras_terminate(psp);
+		psp_dtm_terminate(psp);
+		psp_hdcp_terminate(psp);
+	}
+
+	psp_asd_unload(psp);
 
 	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 
@@ -1221,8 +1514,6 @@ static int psp_hw_fini(void *handle)
 			      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
 	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
 			      &psp->fence_buf_mc_addr, &psp->fence_buf);
-	amdgpu_bo_free_kernel(&psp->asd_shared_bo, &psp->asd_shared_mc_addr,
-			      &psp->asd_shared_buf);
 	amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
 			      (void **)&psp->cmd_buf_mem);
 
@@ -1253,6 +1544,16 @@ static int psp_suspend(void *handle)
 			DRM_ERROR("Failed to terminate ras ta\n");
 			return ret;
 		}
+		ret = psp_hdcp_terminate(psp);
+		if (ret) {
+			DRM_ERROR("Failed to terminate hdcp ta\n");
+			return ret;
+		}
+		ret = psp_dtm_terminate(psp);
+		if (ret) {
+			DRM_ERROR("Failed to terminate dtm ta\n");
+			return ret;
+		}
 	}
 
 	ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
@@ -1272,6 +1573,12 @@ static int psp_resume(void *handle)
 
 	DRM_INFO("PSP is resuming...\n");
 
+	ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME);
+	if (ret) {
+		DRM_ERROR("Failed to process memory training!\n");
+		return ret;
+	}
+
 	mutex_lock(&adev->firmware.mutex);
 
 	ret = psp_hw_start(psp);
@@ -1282,6 +1589,39 @@ static int psp_resume(void *handle)
 	if (ret)
 		goto failed;
 
+	ret = psp_asd_load(psp);
+	if (ret) {
+		DRM_ERROR("PSP load asd failed!\n");
+		goto failed;
+	}
+
+	if (adev->gmc.xgmi.num_physical_nodes > 1) {
+		ret = psp_xgmi_initialize(psp);
+		/* Warning the XGMI seesion initialize failure
+		 * Instead of stop driver initialization
+		 */
+		if (ret)
+			dev_err(psp->adev->dev,
+				"XGMI: Failed to initialize XGMI session\n");
+	}
+
+	if (psp->adev->psp.ta_fw) {
+		ret = psp_ras_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+					"RAS: Failed to initialize RAS\n");
+
+		ret = psp_hdcp_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"HDCP: Failed to initialize HDCP\n");
+
+		ret = psp_dtm_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"DTM: Failed to initialize DTM\n");
+	}
+
 	mutex_unlock(&adev->firmware.mutex);
 
 	return 0;
@@ -1311,9 +1651,6 @@ int psp_rlc_autoload_start(struct psp_context *psp)
 	int ret;
 	struct psp_gfx_cmd_resp *cmd;
 
-	if (amdgpu_sriov_vf(psp->adev))
-		return 0;
-
 	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
 	if (!cmd)
 		return -ENOMEM;
@@ -1339,6 +1676,56 @@ int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
 	return psp_execute_np_fw_load(&adev->psp, &ucode);
 }
 
+int psp_ring_cmd_submit(struct psp_context *psp,
+			uint64_t cmd_buf_mc_addr,
+			uint64_t fence_mc_addr,
+			int index)
+{
+	unsigned int psp_write_ptr_reg = 0;
+	struct psp_gfx_rb_frame *write_frame;
+	struct psp_ring *ring = &psp->km_ring;
+	struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
+	struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
+		ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
+	struct amdgpu_device *adev = psp->adev;
+	uint32_t ring_size_dw = ring->ring_size / 4;
+	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
+
+	/* KM (GPCOM) prepare write pointer */
+	psp_write_ptr_reg = psp_ring_get_wptr(psp);
+
+	/* Update KM RB frame pointer to new frame */
+	/* write_frame ptr increments by size of rb_frame in bytes */
+	/* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
+	if ((psp_write_ptr_reg % ring_size_dw) == 0)
+		write_frame = ring_buffer_start;
+	else
+		write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
+	/* Check invalid write_frame ptr address */
+	if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
+		DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
+			  ring_buffer_start, ring_buffer_end, write_frame);
+		DRM_ERROR("write_frame is pointing to address out of bounds\n");
+		return -EINVAL;
+	}
+
+	/* Initialize KM RB frame */
+	memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
+
+	/* Update KM RB frame */
+	write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
+	write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
+	write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
+	write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
+	write_frame->fence_value = index;
+	amdgpu_asic_flush_hdp(adev, NULL);
+
+	/* Update the write Pointer in DWORDs */
+	psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
+	psp_ring_set_wptr(psp, psp_write_ptr_reg);
+	return 0;
+}
+
 static bool psp_check_fw_loading_status(struct amdgpu_device *adev,
 					enum AMDGPU_UCODE_ID ucode_type)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index bc0947f6bc8a..3265487b859f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -32,11 +32,13 @@
 
 #define PSP_FENCE_BUFFER_SIZE	0x1000
 #define PSP_CMD_BUFFER_SIZE	0x1000
-#define PSP_ASD_SHARED_MEM_SIZE 0x4000
 #define PSP_XGMI_SHARED_MEM_SIZE 0x4000
 #define PSP_RAS_SHARED_MEM_SIZE 0x4000
 #define PSP_1_MEG		0x100000
 #define PSP_TMR_SIZE	0x400000
+#define PSP_HDCP_SHARED_MEM_SIZE	0x4000
+#define PSP_DTM_SHARED_MEM_SIZE	0x4000
+#define PSP_SHARED_MEM_SIZE		0x4000
 
 struct psp_context;
 struct psp_xgmi_node_info;
@@ -46,6 +48,8 @@ enum psp_bootloader_cmd {
 	PSP_BL__LOAD_SYSDRV		= 0x10000,
 	PSP_BL__LOAD_SOSDRV		= 0x20000,
 	PSP_BL__LOAD_KEY_DATABASE	= 0x80000,
+	PSP_BL__DRAM_LONG_TRAIN		= 0x100000,
+	PSP_BL__DRAM_SHORT_TRAIN	= 0x200000,
 };
 
 enum psp_ring_type
@@ -89,9 +93,6 @@ struct psp_funcs
 			    enum psp_ring_type ring_type);
 	int (*ring_destroy)(struct psp_context *psp,
 			    enum psp_ring_type ring_type);
-	int (*cmd_submit)(struct psp_context *psp,
-			  uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
-			  int index);
 	bool (*compare_sram_data)(struct psp_context *psp,
 				  struct amdgpu_firmware_info *ucode,
 				  enum AMDGPU_UCODE_ID ucode_type);
@@ -108,6 +109,11 @@ struct psp_funcs
 			struct ta_ras_trigger_error_input *info);
 	int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
 	int (*rlc_autoload_start)(struct psp_context *psp);
+	int (*mem_training_init)(struct psp_context *psp);
+	void (*mem_training_fini)(struct psp_context *psp);
+	int (*mem_training)(struct psp_context *psp, uint32_t ops);
+	uint32_t (*ring_get_wptr)(struct psp_context *psp);
+	void (*ring_set_wptr)(struct psp_context *psp, uint32_t value);
 };
 
 #define AMDGPU_XGMI_MAX_CONNECTED_NODES		64
@@ -123,6 +129,11 @@ struct psp_xgmi_topology_info {
 	struct psp_xgmi_node_info	nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
 };
 
+struct psp_asd_context {
+	bool			asd_initialized;
+	uint32_t		session_id;
+};
+
 struct psp_xgmi_context {
 	uint8_t				initialized;
 	uint32_t			session_id;
@@ -142,6 +153,64 @@ struct psp_ras_context {
 	struct amdgpu_ras	*ras;
 };
 
+struct psp_hdcp_context {
+	bool			hdcp_initialized;
+	uint32_t		session_id;
+	struct amdgpu_bo	*hdcp_shared_bo;
+	uint64_t		hdcp_shared_mc_addr;
+	void			*hdcp_shared_buf;
+};
+
+struct psp_dtm_context {
+	bool			dtm_initialized;
+	uint32_t		session_id;
+	struct amdgpu_bo	*dtm_shared_bo;
+	uint64_t		dtm_shared_mc_addr;
+	void			*dtm_shared_buf;
+};
+
+#define MEM_TRAIN_SYSTEM_SIGNATURE		0x54534942
+#define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES	0x1000
+#define GDDR6_MEM_TRAINING_OFFSET		0x8000
+
+enum psp_memory_training_init_flag {
+	PSP_MEM_TRAIN_NOT_SUPPORT	= 0x0,
+	PSP_MEM_TRAIN_SUPPORT		= 0x1,
+	PSP_MEM_TRAIN_INIT_FAILED	= 0x2,
+	PSP_MEM_TRAIN_RESERVE_SUCCESS	= 0x4,
+	PSP_MEM_TRAIN_INIT_SUCCESS	= 0x8,
+};
+
+enum psp_memory_training_ops {
+	PSP_MEM_TRAIN_SEND_LONG_MSG	= 0x1,
+	PSP_MEM_TRAIN_SAVE		= 0x2,
+	PSP_MEM_TRAIN_RESTORE		= 0x4,
+	PSP_MEM_TRAIN_SEND_SHORT_MSG	= 0x8,
+	PSP_MEM_TRAIN_COLD_BOOT		= PSP_MEM_TRAIN_SEND_LONG_MSG,
+	PSP_MEM_TRAIN_RESUME		= PSP_MEM_TRAIN_SEND_SHORT_MSG,
+};
+
+struct psp_memory_training_context {
+	/*training data size*/
+	u64 train_data_size;
+	/*
+	 * sys_cache
+	 * cpu virtual address
+	 * system memory buffer that used to store the training data.
+	 */
+	void *sys_cache;
+
+	/*vram offset of the p2c training data*/
+	u64 p2c_train_data_offset;
+
+	/*vram offset of the c2p training data*/
+	u64 c2p_train_data_offset;
+	struct amdgpu_bo *c2p_bo;
+
+	enum psp_memory_training_init_flag init;
+	u32 training_cnt;
+};
+
 struct psp_context
 {
 	struct amdgpu_device            *adev;
@@ -172,15 +241,12 @@ struct psp_context
 	struct amdgpu_bo		*tmr_bo;
 	uint64_t			tmr_mc_addr;
 
-	/* asd firmware and buffer */
+	/* asd firmware */
 	const struct firmware		*asd_fw;
 	uint32_t			asd_fw_version;
 	uint32_t			asd_feature_version;
 	uint32_t			asd_ucode_size;
 	uint8_t				*asd_start_addr;
-	struct amdgpu_bo		*asd_shared_bo;
-	uint64_t			asd_shared_mc_addr;
-	void				*asd_shared_buf;
 
 	/* fence buffer */
 	struct amdgpu_bo		*fence_buf_bo;
@@ -206,9 +272,22 @@ struct psp_context
 	uint32_t			ta_ras_ucode_version;
 	uint32_t			ta_ras_ucode_size;
 	uint8_t				*ta_ras_start_addr;
+
+	uint32_t			ta_hdcp_ucode_version;
+	uint32_t			ta_hdcp_ucode_size;
+	uint8_t				*ta_hdcp_start_addr;
+
+	uint32_t			ta_dtm_ucode_version;
+	uint32_t			ta_dtm_ucode_size;
+	uint8_t				*ta_dtm_start_addr;
+
+	struct psp_asd_context		asd_context;
 	struct psp_xgmi_context		xgmi_context;
 	struct psp_ras_context		ras;
+	struct psp_hdcp_context 	hdcp_context;
+	struct psp_dtm_context		dtm_context;
 	struct mutex			mutex;
+	struct psp_memory_training_context mem_train_ctx;
 };
 
 struct amdgpu_psp_funcs {
@@ -221,8 +300,6 @@ struct amdgpu_psp_funcs {
 #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
 #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
 #define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
-#define psp_cmd_submit(psp, cmd_mc, fence_mc, index) \
-		(psp)->funcs->cmd_submit((psp), (cmd_mc), (fence_mc), (index))
 #define psp_compare_sram_data(psp, ucode, type) \
 		(psp)->funcs->compare_sram_data((psp), (ucode), (type))
 #define psp_init_microcode(psp) \
@@ -251,6 +328,12 @@ struct amdgpu_psp_funcs {
 		(psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL)
 #define psp_rlc_autoload(psp) \
 		((psp)->funcs->rlc_autoload_start ? (psp)->funcs->rlc_autoload_start((psp)) : 0)
+#define psp_mem_training_init(psp) \
+	((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0)
+#define psp_mem_training_fini(psp) \
+	((psp)->funcs->mem_training_fini ? (psp)->funcs->mem_training_fini((psp)) : 0)
+#define psp_mem_training(psp, ops) \
+	((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0)
 
 #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
 
@@ -261,6 +344,9 @@ struct amdgpu_psp_funcs {
 	((psp)->funcs->ras_cure_posion ? \
 	(psp)->funcs->ras_cure_posion(psp, (addr)) : -EINVAL)
 
+#define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp))
+#define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value))
+
 extern const struct amd_ip_funcs psp_ip_funcs;
 
 extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
@@ -279,10 +365,16 @@ int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
 int psp_ras_enable_features(struct psp_context *psp,
 		union ta_ras_cmd_input *info, bool enable);
+int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
 
 int psp_rlc_autoload_start(struct psp_context *psp);
 
 extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
 int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
 		uint32_t value);
+int psp_ring_cmd_submit(struct psp_context *psp,
+			uint64_t cmd_buf_mc_addr,
+			uint64_t fence_mc_addr,
+			int index);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 016ea274b955..766be7f18282 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -25,10 +25,13 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
+#include <linux/reboot.h>
+#include <linux/syscalls.h>
 
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_atomfirmware.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 
 const char *ras_error_string[] = {
 	"none",
@@ -65,11 +68,16 @@ const char *ras_block_string[] = {
 /* inject address is 52 bits */
 #define	RAS_UMC_INJECT_ADDR_LIMIT	(0x1ULL << 52)
 
-static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
-		uint64_t offset, uint64_t size,
-		struct amdgpu_bo **bo_ptr);
-static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
-		struct amdgpu_bo **bo_ptr);
+enum amdgpu_ras_retire_page_reservation {
+	AMDGPU_RAS_RETIRE_PAGE_RESERVED,
+	AMDGPU_RAS_RETIRE_PAGE_PENDING,
+	AMDGPU_RAS_RETIRE_PAGE_FAULT,
+};
+
+atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
+
+static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+				uint64_t addr);
 
 static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
@@ -189,6 +197,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
 
 	return 0;
 }
+
 /**
  * DOC: AMDGPU RAS debugfs control interface
  *
@@ -208,31 +217,44 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
  * As their names indicate, inject operation will write the
  * value to the address.
  *
- * Second member: struct ras_debug_if::op.
+ * The second member: struct ras_debug_if::op.
  * It has three kinds of operations.
- *  0: disable RAS on the block. Take ::head as its data.
- *  1: enable RAS on the block. Take ::head as its data.
- *  2: inject errors on the block. Take ::inject as its data.
+ *
+ * - 0: disable RAS on the block. Take ::head as its data.
+ * - 1: enable RAS on the block. Take ::head as its data.
+ * - 2: inject errors on the block. Take ::inject as its data.
  *
  * How to use the interface?
- * programs:
- * copy the struct ras_debug_if in your codes and initialize it.
- * write the struct to the control node.
  *
- * bash:
- * echo op block [error [sub_blcok address value]] > .../ras/ras_ctrl
- *	op: disable, enable, inject
- *		disable: only block is needed
- *		enable: block and error are needed
- *		inject: error, address, value are needed
- *	block: umc, smda, gfx, .........
- *		see ras_block_string[] for details
- *	error: ue, ce
- *		ue: multi_uncorrectable
- *		ce: single_correctable
- *	sub_block: sub block index, pass 0 if there is no sub block
+ * Programs
+ *
+ * Copy the struct ras_debug_if in your codes and initialize it.
+ * Write the struct to the control node.
+ *
+ * Shells
+ *
+ * .. code-block:: bash
+ *
+ *	echo op block [error [sub_block address value]] > .../ras/ras_ctrl
+ *
+ * Parameters:
+ *
+ * op: disable, enable, inject
+ *	disable: only block is needed
+ *	enable: block and error are needed
+ *	inject: error, address, value are needed
+ * block: umc, sdma, gfx, .........
+ *	see ras_block_string[] for details
+ * error: ue, ce
+ *	ue: multi_uncorrectable
+ *	ce: single_correctable
+ * sub_block:
+ *	sub block index, pass 0 if there is no sub block
+ *
+ * here are some examples for bash commands:
+ *
+ * .. code-block:: bash
  *
- * here are some examples for bash commands,
  *	echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
  *	echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
  *	echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
@@ -245,8 +267,11 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
  * For inject, please check corresponding err count at
  * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
  *
- * NOTE: operation is only allowed on blocks which are supported.
- * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ * .. note::
+ *	Operations are only allowed on blocks which are supported.
+ *	Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ *	to see which blocks support RAS on a particular asic.
+ *
  */
 static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
 		size_t size, loff_t *pos)
@@ -276,13 +301,21 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
 			break;
 		}
 
+		/* umc ce/ue error injection for a bad page is not allowed */
+		if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
+		    amdgpu_ras_check_bad_page(adev, data.inject.address)) {
+			DRM_WARN("RAS WARN: 0x%llx has been marked as bad before error injection!\n",
+					data.inject.address);
+			break;
+		}
+
 		/* data.inject.address is offset instead of absolute gpu address */
 		ret = amdgpu_ras_error_inject(adev, &data.inject);
 		break;
 	default:
 		ret = -EINVAL;
 		break;
-	};
+	}
 
 	if (ret)
 		return -EINVAL;
@@ -290,6 +323,33 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
 	return size;
 }
 
+/**
+ * DOC: AMDGPU RAS debugfs EEPROM table reset interface
+ *
+ * Some boards contain an EEPROM which is used to persistently store a list of
+ * bad pages which experiences ECC errors in vram.  This interface provides
+ * a way to reset the EEPROM, e.g., after testing error injection.
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ *	echo 1 > ../ras/ras_eeprom_reset
+ *
+ * will reset EEPROM table to 0 entries.
+ *
+ */
+static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf,
+		size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+	int ret;
+
+	ret = amdgpu_ras_eeprom_reset_table(&adev->psp.ras.ras->eeprom_control);
+
+	return ret == 1 ? size : -EIO;
+}
+
 static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
 	.owner = THIS_MODULE,
 	.read = NULL,
@@ -297,6 +357,34 @@ static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
 	.llseek = default_llseek
 };
 
+static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
+	.owner = THIS_MODULE,
+	.read = NULL,
+	.write = amdgpu_ras_debugfs_eeprom_write,
+	.llseek = default_llseek
+};
+
+/**
+ * DOC: AMDGPU RAS sysfs Error Count Interface
+ *
+ * It allows the user to read the error count for each IP block on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
+ *
+ * It outputs the multiple lines which report the uncorrected (ue) and corrected
+ * (ce) error counts.
+ *
+ * The format of one line is below,
+ *
+ * [ce|ue]: count
+ *
+ * Example:
+ *
+ * .. code-block:: bash
+ *
+ *	ue: 0
+ *	ce: 1
+ *
+ */
 static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -354,7 +442,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
 }
 
 /* return an obj equal to head, or the first when head is NULL */
-static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
 		struct ras_common_if *head)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -475,15 +563,17 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
 		return 0;
 
-	ret = psp_ras_enable_features(&adev->psp, &info, enable);
-	if (ret) {
-		DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
-				enable ? "enable":"disable",
-				ras_block_str(head->block),
-				ret);
-		if (ret == TA_RAS_STATUS__RESET_NEEDED)
-			return -EAGAIN;
-		return -EINVAL;
+	if (!amdgpu_ras_intr_triggered()) {
+		ret = psp_ras_enable_features(&adev->psp, &info, enable);
+		if (ret) {
+			DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
+					enable ? "enable":"disable",
+					ras_block_str(head->block),
+					ret);
+			if (ret == TA_RAS_STATUS__RESET_NEEDED)
+				return -EAGAIN;
+			return -EINVAL;
+		}
 	}
 
 	/* setup the obj */
@@ -596,6 +686,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
 {
 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
 	struct ras_err_data err_data = {0, 0, 0, NULL};
+	int i;
 
 	if (!obj)
 		return -EINVAL;
@@ -610,13 +701,24 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
 		if (adev->umc.funcs->query_ras_error_address)
 			adev->umc.funcs->query_ras_error_address(adev, &err_data);
 		break;
+	case AMDGPU_RAS_BLOCK__SDMA:
+		if (adev->sdma.funcs->query_ras_error_count) {
+			for (i = 0; i < adev->sdma.num_instances; i++)
+				adev->sdma.funcs->query_ras_error_count(adev, i,
+									&err_data);
+		}
+		break;
 	case AMDGPU_RAS_BLOCK__GFX:
 		if (adev->gfx.funcs->query_ras_error_count)
 			adev->gfx.funcs->query_ras_error_count(adev, &err_data);
 		break;
 	case AMDGPU_RAS_BLOCK__MMHUB:
-		if (adev->mmhub_funcs->query_ras_error_count)
-			adev->mmhub_funcs->query_ras_error_count(adev, &err_data);
+		if (adev->mmhub.funcs->query_ras_error_count)
+			adev->mmhub.funcs->query_ras_error_count(adev, &err_data);
+		break;
+	case AMDGPU_RAS_BLOCK__PCIE_BIF:
+		if (adev->nbio.funcs->query_ras_error_count)
+			adev->nbio.funcs->query_ras_error_count(adev, &err_data);
 		break;
 	default:
 		break;
@@ -628,12 +730,14 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
 	info->ue_count = obj->err_data.ue_count;
 	info->ce_count = obj->err_data.ce_count;
 
-	if (err_data.ce_count)
+	if (err_data.ce_count) {
 		dev_info(adev->dev, "%ld correctable errors detected in %s block\n",
 			 obj->err_data.ce_count, ras_block_str(info->head.block));
-	if (err_data.ue_count)
+	}
+	if (err_data.ue_count) {
 		dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n",
 			 obj->err_data.ue_count, ras_block_str(info->head.block));
+	}
 
 	return 0;
 }
@@ -664,6 +768,8 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_RAS_BLOCK__UMC:
 	case AMDGPU_RAS_BLOCK__MMHUB:
+	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+	case AMDGPU_RAS_BLOCK__PCIE_BIF:
 		ret = psp_ras_trigger_error(&adev->psp, &block_info);
 		break;
 	default:
@@ -723,18 +829,18 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
 static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
 {
 	switch (flags) {
-	case 0:
+	case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
 		return "R";
-	case 1:
+	case AMDGPU_RAS_RETIRE_PAGE_PENDING:
 		return "P";
-	case 2:
+	case AMDGPU_RAS_RETIRE_PAGE_FAULT:
 	default:
 		return "F";
 	};
 }
 
-/*
- * DOC: ras sysfs gpu_vram_bad_pages interface
+/**
+ * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
  *
  * It allows user to read the bad pages of vram on the gpu through
  * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
@@ -746,14 +852,21 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
  *
  * gpu pfn and gpu page size are printed in hex format.
  * flags can be one of below character,
+ *
  * R: reserved, this gpu page is reserved and not able to use.
+ *
  * P: pending for reserve, this gpu page is marked as bad, will be reserved
- *    in next window of page_reserve.
+ * in next window of page_reserve.
+ *
  * F: unable to reserve. this gpu page can't be reserved due to some reasons.
  *
- * examples:
- * 0x00000001 : 0x00001000 : R
- * 0x00000002 : 0x00001000 : P
+ * Examples:
+ *
+ * .. code-block:: bash
+ *
+ *	0x00000001 : 0x00001000 : R
+ *	0x00000002 : 0x00001000 : P
+ *
  */
 
 static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
@@ -927,6 +1040,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
 }
 /* sysfs end */
 
+/**
+ * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
+ *
+ * Normally when there is an uncorrectable error, the driver will reset
+ * the GPU to recover.  However, in the event of an unrecoverable error,
+ * the driver provides an interface to reboot the system automatically
+ * in that event.
+ *
+ * The following file in debugfs provides that interface:
+ * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ *	echo true > .../ras/auto_reboot
+ *
+ */
 /* debugfs begin */
 static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
 {
@@ -934,8 +1065,21 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
 	struct drm_minor *minor = adev->ddev->primary;
 
 	con->dir = debugfs_create_dir("ras", minor->debugfs_root);
-	con->ent = debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
-				       adev, &amdgpu_ras_debugfs_ctrl_ops);
+	debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
+				adev, &amdgpu_ras_debugfs_ctrl_ops);
+	debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir,
+				adev, &amdgpu_ras_debugfs_eeprom_ops);
+
+	/*
+	 * After one uncorrectable error happens, usually GPU recovery will
+	 * be scheduled. But due to the known problem in GPU recovery failing
+	 * to bring GPU back, below interface provides one direct way to
+	 * user to reboot system automatically in such case within
+	 * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
+	 * will never be called.
+	 */
+	debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir,
+				&con->reboot);
 }
 
 void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
@@ -980,10 +1124,8 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
 		amdgpu_ras_debugfs_remove(adev, &obj->head);
 	}
 
-	debugfs_remove(con->ent);
-	debugfs_remove(con->dir);
+	debugfs_remove_recursive(con->dir);
 	con->dir = NULL;
-	con->ent = NULL;
 }
 /* debugfs end */
 
@@ -1177,6 +1319,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
 	data = con->eh_data;
 	if (!data || data->count == 0) {
 		*bps = NULL;
+		ret = -EINVAL;
 		goto out;
 	}
 
@@ -1188,15 +1331,15 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
 
 	for (; i < data->count; i++) {
 		(*bps)[i] = (struct ras_badpage){
-			.bp = data->bps[i].bp,
+			.bp = data->bps[i].retired_page,
 			.size = AMDGPU_GPU_PAGE_SIZE,
-			.flags = 0,
+			.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
 		};
 
 		if (data->last_reserved <= i)
-			(*bps)[i].flags = 1;
-		else if (data->bps[i].bo == NULL)
-			(*bps)[i].flags = 2;
+			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
+		else if (data->bps_bo[i] == NULL)
+			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
 	}
 
 	*count = data->count;
@@ -1210,109 +1353,51 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
 	struct amdgpu_ras *ras =
 		container_of(work, struct amdgpu_ras, recovery_work);
 
-	amdgpu_device_gpu_recover(ras->adev, 0);
+	if (amdgpu_device_should_recover_gpu(ras->adev))
+		amdgpu_device_gpu_recover(ras->adev, 0);
 	atomic_set(&ras->in_recovery, 0);
 }
 
-static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
-		struct amdgpu_bo **bo_ptr)
-{
-	/* no need to free it actually. */
-	amdgpu_bo_free_kernel(bo_ptr, NULL, NULL);
-	return 0;
-}
-
-/* reserve vram with size@offset */
-static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
-		uint64_t offset, uint64_t size,
-		struct amdgpu_bo **bo_ptr)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	struct amdgpu_bo_param bp;
-	int r = 0;
-	int i;
-	struct amdgpu_bo *bo;
-
-	if (bo_ptr)
-		*bo_ptr = NULL;
-	memset(&bp, 0, sizeof(bp));
-	bp.size = size;
-	bp.byte_align = PAGE_SIZE;
-	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
-	bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-		AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
-	bp.type = ttm_bo_type_kernel;
-	bp.resv = NULL;
-
-	r = amdgpu_bo_create(adev, &bp, &bo);
-	if (r)
-		return -EINVAL;
-
-	r = amdgpu_bo_reserve(bo, false);
-	if (r)
-		goto error_reserve;
-
-	offset = ALIGN(offset, PAGE_SIZE);
-	for (i = 0; i < bo->placement.num_placement; ++i) {
-		bo->placements[i].fpfn = offset >> PAGE_SHIFT;
-		bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
-	}
-
-	ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem);
-	r = ttm_bo_mem_space(&bo->tbo, &bo->placement, &bo->tbo.mem, &ctx);
-	if (r)
-		goto error_pin;
-
-	r = amdgpu_bo_pin_restricted(bo,
-			AMDGPU_GEM_DOMAIN_VRAM,
-			offset,
-			offset + size);
-	if (r)
-		goto error_pin;
-
-	if (bo_ptr)
-		*bo_ptr = bo;
-
-	amdgpu_bo_unreserve(bo);
-	return r;
-
-error_pin:
-	amdgpu_bo_unreserve(bo);
-error_reserve:
-	amdgpu_bo_unref(&bo);
-	return r;
-}
-
 /* alloc/realloc bps array */
 static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
 		struct ras_err_handler_data *data, int pages)
 {
 	unsigned int old_space = data->count + data->space_left;
 	unsigned int new_space = old_space + pages;
-	unsigned int align_space = ALIGN(new_space, 1024);
-	void *tmp = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
-
-	if (!tmp)
+	unsigned int align_space = ALIGN(new_space, 512);
+	void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
+	struct amdgpu_bo **bps_bo =
+			kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL);
+
+	if (!bps || !bps_bo) {
+		kfree(bps);
+		kfree(bps_bo);
 		return -ENOMEM;
+	}
 
 	if (data->bps) {
-		memcpy(tmp, data->bps,
+		memcpy(bps, data->bps,
 				data->count * sizeof(*data->bps));
 		kfree(data->bps);
 	}
+	if (data->bps_bo) {
+		memcpy(bps_bo, data->bps_bo,
+				data->count * sizeof(*data->bps_bo));
+		kfree(data->bps_bo);
+	}
 
-	data->bps = tmp;
+	data->bps = bps;
+	data->bps_bo = bps_bo;
 	data->space_left += align_space - old_space;
 	return 0;
 }
 
 /* it deal with vram only. */
 int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
-		unsigned long *bps, int pages)
+		struct eeprom_table_record *bps, int pages)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct ras_err_handler_data *data;
-	int i = pages;
 	int ret = 0;
 
 	if (!con || !con->eh_data || !bps || pages <= 0)
@@ -1329,24 +1414,120 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
 			goto out;
 		}
 
-	while (i--)
-		data->bps[data->count++].bp = bps[i];
-
+	memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
+	data->count += pages;
 	data->space_left -= pages;
+
 out:
 	mutex_unlock(&con->recovery_lock);
 
 	return ret;
 }
 
+/*
+ * write error record array to eeprom, the function should be
+ * protected by recovery_lock
+ */
+static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_err_handler_data *data;
+	struct amdgpu_ras_eeprom_control *control;
+	int save_count;
+
+	if (!con || !con->eh_data)
+		return 0;
+
+	control = &con->eeprom_control;
+	data = con->eh_data;
+	save_count = data->count - control->num_recs;
+	/* only new entries are saved */
+	if (save_count > 0)
+		if (amdgpu_ras_eeprom_process_recods(control,
+							&data->bps[control->num_recs],
+							true,
+							save_count)) {
+			DRM_ERROR("Failed to save EEPROM table data!");
+			return -EIO;
+		}
+
+	return 0;
+}
+
+/*
+ * read error record array in eeprom and reserve enough space for
+ * storing new bad pages
+ */
+static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras_eeprom_control *control =
+					&adev->psp.ras.ras->eeprom_control;
+	struct eeprom_table_record *bps = NULL;
+	int ret = 0;
+
+	/* no bad page record, skip eeprom access */
+	if (!control->num_recs)
+		return ret;
+
+	bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL);
+	if (!bps)
+		return -ENOMEM;
+
+	if (amdgpu_ras_eeprom_process_recods(control, bps, false,
+		control->num_recs)) {
+		DRM_ERROR("Failed to load EEPROM table records!");
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs);
+
+out:
+	kfree(bps);
+	return ret;
+}
+
+/*
+ * check if an address belongs to bad page
+ *
+ * Note: this check is only for umc block
+ */
+static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+				uint64_t addr)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_err_handler_data *data;
+	int i;
+	bool ret = false;
+
+	if (!con || !con->eh_data)
+		return ret;
+
+	mutex_lock(&con->recovery_lock);
+	data = con->eh_data;
+	if (!data)
+		goto out;
+
+	addr >>= AMDGPU_GPU_PAGE_SHIFT;
+	for (i = 0; i < data->count; i++)
+		if (addr == data->bps[i].retired_page) {
+			ret = true;
+			goto out;
+		}
+
+out:
+	mutex_unlock(&con->recovery_lock);
+	return ret;
+}
+
 /* called in gpu recovery/init */
 int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct ras_err_handler_data *data;
 	uint64_t bp;
-	struct amdgpu_bo *bo;
-	int i;
+	struct amdgpu_bo *bo = NULL;
+	int i, ret = 0;
 
 	if (!con || !con->eh_data)
 		return 0;
@@ -1357,18 +1538,29 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
 		goto out;
 	/* reserve vram at driver post stage. */
 	for (i = data->last_reserved; i < data->count; i++) {
-		bp = data->bps[i].bp;
+		bp = data->bps[i].retired_page;
 
-		if (amdgpu_ras_reserve_vram(adev, bp << PAGE_SHIFT,
-					PAGE_SIZE, &bo))
-			DRM_ERROR("RAS ERROR: reserve vram %llx fail\n", bp);
+		/* There are two cases of reserve error should be ignored:
+		 * 1) a ras bad page has been allocated (used by someone);
+		 * 2) a ras bad page has been reserved (duplicate error injection
+		 *    for one page);
+		 */
+		if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
+					       AMDGPU_GPU_PAGE_SIZE,
+					       AMDGPU_GEM_DOMAIN_VRAM,
+					       &bo, NULL))
+			DRM_WARN("RAS WARN: reserve vram for retired page %llx fail\n", bp);
 
-		data->bps[i].bo = bo;
+		data->bps_bo[i] = bo;
 		data->last_reserved = i + 1;
+		bo = NULL;
 	}
+
+	/* continue to save bad pages to eeprom even reesrve_vram fails */
+	ret = amdgpu_ras_save_bad_pages(adev);
 out:
 	mutex_unlock(&con->recovery_lock);
-	return 0;
+	return ret;
 }
 
 /* called when driver unload */
@@ -1388,11 +1580,11 @@ static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev)
 		goto out;
 
 	for (i = data->last_reserved - 1; i >= 0; i--) {
-		bo = data->bps[i].bo;
+		bo = data->bps_bo[i];
 
-		amdgpu_ras_release_vram(adev, &bo);
+		amdgpu_bo_free_kernel(&bo, NULL, NULL);
 
-		data->bps[i].bo = bo;
+		data->bps_bo[i] = bo;
 		data->last_reserved = i;
 	}
 out:
@@ -1400,41 +1592,54 @@ out:
 	return 0;
 }
 
-static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
-{
-	/* TODO
-	 * write the array to eeprom when SMU disabled.
-	 */
-	return 0;
-}
-
-static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
-{
-	/* TODO
-	 * read the array to eeprom when SMU disabled.
-	 */
-	return 0;
-}
-
-static int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-	struct ras_err_handler_data **data = &con->eh_data;
+	struct ras_err_handler_data **data;
+	int ret;
 
-	*data = kmalloc(sizeof(**data),
-			GFP_KERNEL|__GFP_ZERO);
-	if (!*data)
-		return -ENOMEM;
+	if (con)
+		data = &con->eh_data;
+	else
+		return 0;
+
+	*data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
+	if (!*data) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	mutex_init(&con->recovery_lock);
 	INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
 	atomic_set(&con->in_recovery, 0);
 	con->adev = adev;
 
-	amdgpu_ras_load_bad_pages(adev);
-	amdgpu_ras_reserve_bad_pages(adev);
+	ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
+	if (ret)
+		goto free;
+
+	if (con->eeprom_control.num_recs) {
+		ret = amdgpu_ras_load_bad_pages(adev);
+		if (ret)
+			goto free;
+		ret = amdgpu_ras_reserve_bad_pages(adev);
+		if (ret)
+			goto release;
+	}
 
 	return 0;
+
+release:
+	amdgpu_ras_release_bad_pages(adev);
+free:
+	kfree((*data)->bps);
+	kfree((*data)->bps_bo);
+	kfree(*data);
+	con->eh_data = NULL;
+out:
+	DRM_WARN("Failed to initialize ras recovery!\n");
+
+	return ret;
 }
 
 static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
@@ -1442,13 +1647,17 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct ras_err_handler_data *data = con->eh_data;
 
+	/* recovery_init failed to init it, fini is useless */
+	if (!data)
+		return 0;
+
 	cancel_work_sync(&con->recovery_work);
-	amdgpu_ras_save_bad_pages(adev);
 	amdgpu_ras_release_bad_pages(adev);
 
 	mutex_lock(&con->recovery_lock);
 	con->eh_data = NULL;
 	kfree(data->bps);
+	kfree(data->bps_bo);
 	kfree(data);
 	mutex_unlock(&con->recovery_lock);
 
@@ -1485,7 +1694,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
 	*supported = 0;
 
 	if (amdgpu_sriov_vf(adev) ||
-			adev->asic_type != CHIP_VEGA20)
+	    (adev->asic_type != CHIP_VEGA20 &&
+	     adev->asic_type != CHIP_ARCTURUS))
 		return;
 
 	if (adev->is_atom_fw &&
@@ -1500,6 +1710,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
 int amdgpu_ras_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	int r;
 
 	if (con)
 		return 0;
@@ -1527,31 +1738,106 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	/* Might need get this flag from vbios. */
 	con->flags = RAS_DEFAULT_FLAGS;
 
-	if (amdgpu_ras_recovery_init(adev))
-		goto recovery_out;
+	if (adev->nbio.funcs->init_ras_controller_interrupt) {
+		r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
+		if (r)
+			return r;
+	}
+
+	if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
+		r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
+		if (r)
+			return r;
+	}
 
 	amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
 
 	if (amdgpu_ras_fs_init(adev))
 		goto fs_out;
 
-	/* ras init for each ras block */
-	if (adev->umc.funcs->ras_init)
-		adev->umc.funcs->ras_init(adev);
-
 	DRM_INFO("RAS INFO: ras initialized successfully, "
 			"hardware ability[%x] ras_mask[%x]\n",
 			con->hw_supported, con->supported);
 	return 0;
 fs_out:
-	amdgpu_ras_recovery_fini(adev);
-recovery_out:
 	amdgpu_ras_set_context(adev, NULL);
 	kfree(con);
 
 	return -EINVAL;
 }
 
+/* helper function to handle common stuff in ip late init phase */
+int amdgpu_ras_late_init(struct amdgpu_device *adev,
+			 struct ras_common_if *ras_block,
+			 struct ras_fs_if *fs_info,
+			 struct ras_ih_if *ih_info)
+{
+	int r;
+
+	/* disable RAS feature per IP block if it is not supported */
+	if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
+		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
+		return 0;
+	}
+
+	r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
+	if (r) {
+		if (r == -EAGAIN) {
+			/* request gpu reset. will run again */
+			amdgpu_ras_request_reset_on_boot(adev,
+					ras_block->block);
+			return 0;
+		} else if (adev->in_suspend || adev->in_gpu_reset) {
+			/* in resume phase, if fail to enable ras,
+			 * clean up all ras fs nodes, and disable ras */
+			goto cleanup;
+		} else
+			return r;
+	}
+
+	/* in resume phase, no need to create ras fs node */
+	if (adev->in_suspend || adev->in_gpu_reset)
+		return 0;
+
+	if (ih_info->cb) {
+		r = amdgpu_ras_interrupt_add_handler(adev, ih_info);
+		if (r)
+			goto interrupt;
+	}
+
+	amdgpu_ras_debugfs_create(adev, fs_info);
+
+	r = amdgpu_ras_sysfs_create(adev, fs_info);
+	if (r)
+		goto sysfs;
+
+	return 0;
+cleanup:
+	amdgpu_ras_sysfs_remove(adev, ras_block);
+sysfs:
+	amdgpu_ras_debugfs_remove(adev, ras_block);
+	if (ih_info->cb)
+		amdgpu_ras_interrupt_remove_handler(adev, ih_info);
+interrupt:
+	amdgpu_ras_feature_enable(adev, ras_block, 0);
+	return r;
+}
+
+/* helper function to remove ras fs node and interrupt handler */
+void amdgpu_ras_late_fini(struct amdgpu_device *adev,
+			  struct ras_common_if *ras_block,
+			  struct ras_ih_if *ih_info)
+{
+	if (!ras_block || !ih_info)
+		return;
+
+	amdgpu_ras_sysfs_remove(adev, ras_block);
+	amdgpu_ras_debugfs_remove(adev, ras_block);
+	if (ih_info->cb)
+                amdgpu_ras_interrupt_remove_handler(adev, ih_info);
+	amdgpu_ras_feature_enable(adev, ras_block, 0);
+}
+
 /* do some init work after IP late init as dependence.
  * and it runs in resume/gpu reset/booting up cases.
  */
@@ -1594,7 +1880,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)
 		 * See feature_enable_on_boot
 		 */
 		amdgpu_ras_disable_all_features(adev, 1);
-		amdgpu_ras_reset_gpu(adev, 0);
+		amdgpu_ras_reset_gpu(adev);
 	}
 }
 
@@ -1645,3 +1931,18 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
 
 	return 0;
 }
+
+void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+{
+	uint32_t hw_supported, supported;
+
+	amdgpu_ras_check_supported(adev, &hw_supported, &supported);
+	if (!hw_supported)
+		return;
+
+	if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
+		DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n");
+
+		amdgpu_ras_reset_gpu(adev);
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 6c76bb2a6843..a5fe29a9373e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -317,8 +317,6 @@ struct amdgpu_ras {
 	struct list_head head;
 	/* debugfs */
 	struct dentry *dir;
-	/* debugfs ctrl */
-	struct dentry *ent;
 	/* sysfs */
 	struct device_attribute features_attr;
 	struct bin_attribute badpages_attr;
@@ -334,7 +332,7 @@ struct amdgpu_ras {
 	struct mutex recovery_lock;
 
 	uint32_t flags;
-
+	bool reboot;
 	struct amdgpu_ras_eeprom_control eeprom_control;
 };
 
@@ -347,15 +345,14 @@ struct ras_err_data {
 	unsigned long ue_count;
 	unsigned long ce_count;
 	unsigned long err_addr_cnt;
-	uint64_t *err_addr;
+	struct eeprom_table_record *err_addr;
 };
 
 struct ras_err_handler_data {
-	/* point to bad pages array */
-	struct {
-		unsigned long bp;
-		struct amdgpu_bo *bo;
-	} *bps;
+	/* point to bad page records array */
+	struct eeprom_table_record *bps;
+	/* point to reserved bo array */
+	struct amdgpu_bo **bps_bo;
 	/* the count of entries */
 	int count;
 	/* the space can place new entries */
@@ -365,7 +362,7 @@ struct ras_err_handler_data {
 };
 
 typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
-		struct ras_err_data *err_data,
+		void *err_data,
 		struct amdgpu_iv_entry *entry);
 
 struct ras_ih_data {
@@ -481,6 +478,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
 	return ras && (ras->supported & (1 << block));
 }
 
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
 int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
 		unsigned int block);
 
@@ -492,15 +490,20 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 
 /* error handling functions */
 int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
-		unsigned long *bps, int pages);
+		struct eeprom_table_record *bps, int pages);
 
 int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
 
-static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
-		bool is_baco)
+static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
+	/* save bad page to eeprom before gpu reset,
+	 * i2c may be unstable in gpu reset
+	 */
+	if (in_task())
+		amdgpu_ras_reserve_bad_pages(adev);
+
 	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
 		schedule_work(&ras->recovery_work);
 	return 0;
@@ -566,6 +569,13 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
 int amdgpu_ras_init(struct amdgpu_device *adev);
 int amdgpu_ras_fini(struct amdgpu_device *adev);
 int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
+int amdgpu_ras_late_init(struct amdgpu_device *adev,
+			 struct ras_common_if *ras_block,
+			 struct ras_fs_if *fs_info,
+			 struct ras_ih_if *ih_info);
+void amdgpu_ras_late_fini(struct amdgpu_device *adev,
+			  struct ras_common_if *ras_block,
+			  struct ras_ih_if *ih_info);
 
 int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 		struct ras_common_if *head, bool enable);
@@ -599,4 +609,22 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
 
 int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
 		struct ras_dispatch_if *info);
+
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+		struct ras_common_if *head);
+
+extern atomic_t amdgpu_ras_in_intr;
+
+static inline bool amdgpu_ras_intr_triggered(void)
+{
+	return !!atomic_read(&amdgpu_ras_in_intr);
+}
+
+static inline void amdgpu_ras_intr_cleared(void)
+{
+	atomic_set(&amdgpu_ras_in_intr, 0);
+}
+
+void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 8a32b5c93778..2a8e04895595 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -27,7 +27,8 @@
 #include <linux/bits.h>
 #include "smu_v11_0_i2c.h"
 
-#define EEPROM_I2C_TARGET_ADDR 0xA0
+#define EEPROM_I2C_TARGET_ADDR_ARCTURUS  0xA8
+#define EEPROM_I2C_TARGET_ADDR_VEGA20    0xA0
 
 /*
  * The 2 macros bellow represent the actual size in bytes that
@@ -83,7 +84,7 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
 {
 	int ret = 0;
 	struct i2c_msg msg = {
-			.addr	= EEPROM_I2C_TARGET_ADDR,
+			.addr	= 0,
 			.flags	= 0,
 			.len	= EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
 			.buf	= buff,
@@ -93,6 +94,8 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
 	*(uint16_t *)buff = EEPROM_HDR_START;
 	__encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE);
 
+	msg.addr = control->i2c_address;
+
 	ret = i2c_transfer(&control->eeprom_accessor, &msg, 1);
 	if (ret < 1)
 		DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret);
@@ -100,7 +103,101 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
 	return ret;
 }
 
-static uint32_t  __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control);
+
+
+static uint32_t  __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control)
+{
+	int i;
+	uint32_t tbl_sum = 0;
+
+	/* Header checksum, skip checksum field in the calculation */
+	for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++)
+		tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i);
+
+	return tbl_sum;
+}
+
+static uint32_t  __calc_recs_byte_sum(struct eeprom_table_record *records,
+				      int num)
+{
+	int i, j;
+	uint32_t tbl_sum = 0;
+
+	/* Records checksum */
+	for (i = 0; i < num; i++) {
+		struct eeprom_table_record *record = &records[i];
+
+		for (j = 0; j < sizeof(*record); j++) {
+			tbl_sum += *(((unsigned char *)record) + j);
+		}
+	}
+
+	return tbl_sum;
+}
+
+static inline uint32_t  __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
+				  struct eeprom_table_record *records, int num)
+{
+	return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num);
+}
+
+/* Checksum = 256 -((sum of all table entries) mod 256) */
+static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
+				  struct eeprom_table_record *records, int num,
+				  uint32_t old_hdr_byte_sum)
+{
+	/*
+	 * This will update the table sum with new records.
+	 *
+	 * TODO: What happens when the EEPROM table is to be wrapped around
+	 * and old records from start will get overridden.
+	 */
+
+	/* need to recalculate updated header byte sum */
+	control->tbl_byte_sum -= old_hdr_byte_sum;
+	control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num);
+
+	control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256);
+}
+
+/* table sum mod 256 + checksum must equals 256 */
+static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
+			    struct eeprom_table_record *records, int num)
+{
+	control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
+
+	if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) {
+		DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum);
+		return false;
+	}
+
+	return true;
+}
+
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
+{
+	unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
+	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+	int ret = 0;
+
+	mutex_lock(&control->tbl_mutex);
+
+	hdr->header = EEPROM_TABLE_HDR_VAL;
+	hdr->version = EEPROM_TABLE_VER;
+	hdr->first_rec_offset = EEPROM_RECORD_START;
+	hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE;
+
+	control->tbl_byte_sum = 0;
+	__update_tbl_checksum(control, NULL, 0, 0);
+	control->next_addr = EEPROM_RECORD_START;
+
+	ret = __update_table_header(control, buff);
+
+	mutex_unlock(&control->tbl_mutex);
+
+	return ret;
+
+}
 
 int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
 {
@@ -109,7 +206,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
 	unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
 	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
 	struct i2c_msg msg = {
-			.addr	= EEPROM_I2C_TARGET_ADDR,
+			.addr	= 0,
 			.flags	= I2C_M_RD,
 			.len	= EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
 			.buf	= buff,
@@ -119,9 +216,15 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA20:
+		control->i2c_address = EEPROM_I2C_TARGET_ADDR_VEGA20;
 		ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor);
 		break;
 
+	case CHIP_ARCTURUS:
+		control->i2c_address = EEPROM_I2C_TARGET_ADDR_ARCTURUS;
+		ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor);
+		break;
+
 	default:
 		return 0;
 	}
@@ -131,6 +234,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
 		return ret;
 	}
 
+	msg.addr = control->i2c_address;
+
 	/* Read/Create table header from EEPROM address 0 */
 	ret = i2c_transfer(&control->eeprom_accessor, &msg, 1);
 	if (ret < 1) {
@@ -143,25 +248,18 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
 	if (hdr->header == EEPROM_TABLE_HDR_VAL) {
 		control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) /
 				    EEPROM_TABLE_RECORD_SIZE;
+		control->tbl_byte_sum = __calc_hdr_byte_sum(control);
+		control->next_addr = EEPROM_RECORD_START;
+
 		DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
 				 control->num_recs);
 
 	} else {
 		DRM_INFO("Creating new EEPROM table");
 
-		hdr->header = EEPROM_TABLE_HDR_VAL;
-		hdr->version = EEPROM_TABLE_VER;
-		hdr->first_rec_offset = EEPROM_RECORD_START;
-		hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE;
-
-		adev->psp.ras.ras->eeprom_control.tbl_byte_sum =
-				__calc_hdr_byte_sum(&adev->psp.ras.ras->eeprom_control);
-		ret = __update_table_header(control, buff);
+		ret = amdgpu_ras_eeprom_reset_table(control);
 	}
 
-	/* Start inserting records from here */
-	adev->psp.ras.ras->eeprom_control.next_addr = EEPROM_RECORD_START;
-
 	return ret == 1 ? 0 : -EIO;
 }
 
@@ -173,6 +271,9 @@ void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control)
 	case CHIP_VEGA20:
 		smu_v11_0_i2c_eeprom_control_fini(&control->eeprom_accessor);
 		break;
+	case CHIP_ARCTURUS:
+		smu_i2c_eeprom_fini(&adev->smu, &control->eeprom_accessor);
+		break;
 
 	default:
 		return;
@@ -226,8 +327,8 @@ static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *co
 	record->offset = (le64_to_cpu(tmp) & 0xffffffffffff);
 	i += 6;
 
-	buff[i++] = record->mem_channel;
-	buff[i++] = record->mcumc_id;
+	record->mem_channel = buff[i++];
+	record->mcumc_id = buff[i++];
 
 	memcpy(&tmp, buff + i,  6);
 	record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff);
@@ -266,87 +367,18 @@ static uint32_t __correct_eeprom_dest_address(uint32_t curr_address)
 	return curr_address;
 }
 
-
-static uint32_t  __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control)
-{
-	int i;
-	uint32_t tbl_sum = 0;
-
-	/* Header checksum, skip checksum field in the calculation */
-	for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++)
-		tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i);
-
-	return tbl_sum;
-}
-
-static uint32_t  __calc_recs_byte_sum(struct eeprom_table_record *records,
-				      int num)
-{
-	int i, j;
-	uint32_t tbl_sum = 0;
-
-	/* Records checksum */
-	for (i = 0; i < num; i++) {
-		struct eeprom_table_record *record = &records[i];
-
-		for (j = 0; j < sizeof(*record); j++) {
-			tbl_sum += *(((unsigned char *)record) + j);
-		}
-	}
-
-	return tbl_sum;
-}
-
-static inline uint32_t  __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
-				  struct eeprom_table_record *records, int num)
-{
-	return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num);
-}
-
-/* Checksum = 256 -((sum of all table entries) mod 256) */
-static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
-				  struct eeprom_table_record *records, int num,
-				  uint32_t old_hdr_byte_sum)
-{
-	/*
-	 * This will update the table sum with new records.
-	 *
-	 * TODO: What happens when the EEPROM table is to be wrapped around
-	 * and old records from start will get overridden.
-	 */
-
-	/* need to recalculate updated header byte sum */
-	control->tbl_byte_sum -= old_hdr_byte_sum;
-	control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num);
-
-	control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256);
-}
-
-/* table sum mod 256 + checksum must equals 256 */
-static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
-			    struct eeprom_table_record *records, int num)
-{
-	control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
-
-	if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) {
-		DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum);
-		return false;
-	}
-
-	return true;
-}
-
 int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
 					    struct eeprom_table_record *records,
 					    bool write,
 					    int num)
 {
 	int i, ret = 0;
-	struct i2c_msg *msgs;
-	unsigned char *buffs;
+	struct i2c_msg *msgs, *msg;
+	unsigned char *buffs, *buff;
+	struct eeprom_table_record *record;
 	struct amdgpu_device *adev = to_amdgpu_device(control);
 
-	if (adev->asic_type != CHIP_VEGA20)
+	if (adev->asic_type != CHIP_VEGA20 && adev->asic_type != CHIP_ARCTURUS)
 		return 0;
 
 	buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE,
@@ -373,9 +405,9 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
 	 * 256b
 	 */
 	for (i = 0; i < num; i++) {
-		unsigned char *buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
-		struct eeprom_table_record *record = &records[i];
-		struct i2c_msg *msg = &msgs[i];
+		buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
+		record = &records[i];
+		msg = &msgs[i];
 
 		control->next_addr = __correct_eeprom_dest_address(control->next_addr);
 
@@ -383,8 +415,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
 		 * Update bits 16,17 of EEPROM address in I2C address by setting them
 		 * to bits 1,2 of Device address byte
 		 */
-		msg->addr = EEPROM_I2C_TARGET_ADDR |
-			       ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
+		msg->addr = control->i2c_address |
+			        ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
 		msg->flags	= write ? 0 : I2C_M_RD;
 		msg->len	= EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE;
 		msg->buf	= buff;
@@ -415,8 +447,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
 
 	if (!write) {
 		for (i = 0; i < num; i++) {
-			unsigned char *buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
-			struct eeprom_table_record *record = &records[i];
+			buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
+			record = &records[i];
 
 			__decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE);
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index 41f3fcb9a29b..ca78f812d436 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -50,6 +50,7 @@ struct amdgpu_ras_eeprom_control {
 	struct mutex tbl_mutex;
 	bool bus_locked;
 	uint32_t tbl_byte_sum;
+	uint16_t i2c_address; // 8-bit represented address
 };
 
 /*
@@ -79,6 +80,7 @@ struct eeprom_table_record {
 
 int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control);
 void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control);
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
 
 int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
 					    struct eeprom_table_record *records,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index c8793e6cc3c5..6373bfb47d55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -124,13 +124,12 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
  */
 int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
 {
-	volatile u32 *dst_ptr;
 	u32 dws;
 	int r;
 
 	/* allocate clear state block */
 	adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
-	r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
+	r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
 				      AMDGPU_GEM_DOMAIN_VRAM,
 				      &adev->gfx.rlc.clear_state_obj,
 				      &adev->gfx.rlc.clear_state_gpu_addr,
@@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
 		return r;
 	}
 
-	/* set up the cs buffer */
-	dst_ptr = adev->gfx.rlc.cs_ptr;
-	adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
-	amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
-	amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 5c13c503e61f..a2ee30b16212 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -23,6 +23,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_sdma.h"
+#include "amdgpu_ras.h"
 
 #define AMDGPU_CSA_SDMA_SIZE 64
 /* SDMA CSA reside in the 3rd page of CSA */
@@ -83,3 +84,101 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
 
 	return csa_mc_addr;
 }
+
+int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
+			      void *ras_ih_info)
+{
+	int r, i;
+	struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info;
+	struct ras_fs_if fs_info = {
+		.sysfs_name = "sdma_err_count",
+		.debugfs_name = "sdma_err_inject",
+	};
+
+	if (!ih_info)
+		return -EINVAL;
+
+	if (!adev->sdma.ras_if) {
+		adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+		if (!adev->sdma.ras_if)
+			return -ENOMEM;
+		adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA;
+		adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+		adev->sdma.ras_if->sub_block_index = 0;
+		strcpy(adev->sdma.ras_if->name, "sdma");
+	}
+	fs_info.head = ih_info->head = *adev->sdma.ras_if;
+
+	r = amdgpu_ras_late_init(adev, adev->sdma.ras_if,
+				 &fs_info, ih_info);
+	if (r)
+		goto free;
+
+	if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) {
+		for (i = 0; i < adev->sdma.num_instances; i++) {
+			r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq,
+				AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+			if (r)
+				goto late_fini;
+		}
+	} else {
+		r = 0;
+		goto free;
+	}
+
+        return 0;
+
+late_fini:
+	amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info);
+free:
+	kfree(adev->sdma.ras_if);
+	adev->sdma.ras_if = NULL;
+	return r;
+}
+
+void amdgpu_sdma_ras_fini(struct amdgpu_device *adev)
+{
+	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
+			adev->sdma.ras_if) {
+		struct ras_common_if *ras_if = adev->sdma.ras_if;
+		struct ras_ih_if ih_info = {
+			.head = *ras_if,
+			/* the cb member will not be used by
+			 * amdgpu_ras_interrupt_remove_handler, init it only
+			 * to cheat the check in ras_late_fini
+			 */
+			.cb = amdgpu_sdma_process_ras_data_cb,
+		};
+
+		amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+		kfree(ras_if);
+	}
+}
+
+int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
+		void *err_data,
+		struct amdgpu_iv_entry *entry)
+{
+	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+	amdgpu_ras_reset_gpu(adev);
+
+	return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
+				      struct amdgpu_irq_src *source,
+				      struct amdgpu_iv_entry *entry)
+{
+	struct ras_common_if *ras_if = adev->sdma.ras_if;
+	struct ras_dispatch_if ih_data = {
+		.entry = entry,
+	};
+
+	if (!ras_if)
+		return 0;
+
+	ih_data.head = *ras_if;
+
+	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index a9ae0d8a0589..485335267d78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -50,8 +50,18 @@ struct amdgpu_sdma_instance {
 	bool			burst_nop;
 };
 
+struct amdgpu_sdma_ras_funcs {
+	int (*ras_late_init)(struct amdgpu_device *adev,
+			void *ras_ih_info);
+	void (*ras_fini)(struct amdgpu_device *adev);
+	int (*query_ras_error_count)(struct amdgpu_device *adev,
+			uint32_t instance, void *ras_error_status);
+};
+
 struct amdgpu_sdma {
 	struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+	struct drm_gpu_scheduler    *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES];
+	uint32_t		    num_sdma_sched;
 	struct amdgpu_irq_src	trap_irq;
 	struct amdgpu_irq_src	illegal_inst_irq;
 	struct amdgpu_irq_src	ecc_irq;
@@ -59,6 +69,7 @@ struct amdgpu_sdma {
 	uint32_t                    srbm_soft_reset;
 	bool			has_page_queue;
 	struct ras_common_if	*ras_if;
+	const struct amdgpu_sdma_ras_funcs	*funcs;
 };
 
 /*
@@ -104,4 +115,13 @@ struct amdgpu_sdma_instance *
 amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
 int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
 uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid);
+int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
+			      void *ras_ih_info);
+void amdgpu_sdma_ras_fini(struct amdgpu_device *adev);
+int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
+		void *err_data,
+		struct amdgpu_iv_entry *entry);
+int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
+				      struct amdgpu_irq_src *source,
+				      struct amdgpu_iv_entry *entry);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 95e5e93edd18..a09b6b9c27d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -129,7 +129,8 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep,
  * Tries to add the fence to an existing hash entry. Returns true when an entry
  * was found, false otherwise.
  */
-static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, bool explicit)
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
+				  bool explicit)
 {
 	struct amdgpu_sync_entry *e;
 
@@ -151,19 +152,18 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
  * amdgpu_sync_fence - remember to sync to this fence
  *
  * @sync: sync object to add fence to
- * @fence: fence to sync to
+ * @f: fence to sync to
+ * @explicit: if this is an explicit dependency
  *
+ * Add the fence to the sync object.
  */
-int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
-		      struct dma_fence *f, bool explicit)
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+		      bool explicit)
 {
 	struct amdgpu_sync_entry *e;
 
 	if (!f)
 		return 0;
-	if (amdgpu_sync_same_dev(adev, f) &&
-	    amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
-		amdgpu_sync_keep_later(&sync->last_vm_update, f);
 
 	if (amdgpu_sync_add_later(sync, f, explicit))
 		return 0;
@@ -180,6 +180,24 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 }
 
 /**
+ * amdgpu_sync_vm_fence - remember to sync to this VM fence
+ *
+ * @adev: amdgpu device
+ * @sync: sync object to add fence to
+ * @fence: the VM fence to add
+ *
+ * Add the fence to the sync object and remember it as VM update.
+ */
+int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
+{
+	if (!fence)
+		return 0;
+
+	amdgpu_sync_keep_later(&sync->last_vm_update, fence);
+	return amdgpu_sync_fence(sync, fence, false);
+}
+
+/**
  * amdgpu_sync_resv - sync to a reservation object
  *
  * @sync: sync object to add fences from reservation object to
@@ -204,7 +222,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 
 	/* always sync to the exclusive fence */
 	f = dma_resv_get_excl(resv);
-	r = amdgpu_sync_fence(adev, sync, f, false);
+	r = amdgpu_sync_fence(sync, f, false);
 
 	flist = dma_resv_get_list(resv);
 	if (!flist || r)
@@ -222,13 +240,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 			continue;
 
 		if (amdgpu_sync_same_dev(adev, f)) {
-			/* VM updates are only interesting
-			 * for other VM updates and moves.
+			/* VM updates only sync with moves but not with user
+			 * command submissions or KFD evictions fences
 			 */
-			if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
-			    (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
-			    ((owner == AMDGPU_FENCE_OWNER_VM) !=
-			     (fence_owner == AMDGPU_FENCE_OWNER_VM)))
+			if (owner == AMDGPU_FENCE_OWNER_VM &&
+			    fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED)
 				continue;
 
 			/* Ignore fence from the same owner and explicit one as
@@ -239,7 +255,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 				continue;
 		}
 
-		r = amdgpu_sync_fence(adev, sync, f, false);
+		r = amdgpu_sync_fence(sync, f, false);
 		if (r)
 			break;
 	}
@@ -340,7 +356,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
 	hash_for_each_safe(source->fences, i, tmp, e, node) {
 		f = e->fence;
 		if (!dma_fence_is_signaled(f)) {
-			r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
+			r = amdgpu_sync_fence(clone, f, e->explicit);
 			if (r)
 				return r;
 		} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index b5f1778a2319..d62c2b81d92b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -40,8 +40,9 @@ struct amdgpu_sync {
 };
 
 void amdgpu_sync_create(struct amdgpu_sync *sync);
-int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
-		      struct dma_fence *f, bool explicit);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+		      bool explicit);
+int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
 int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct dma_resv *resv,
@@ -49,7 +50,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     bool explicit_sync);
 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 				     struct amdgpu_ring *ring);
-struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync,
+					bool *explicit);
 int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index b66d29d5ffa2..b158230af8db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -138,6 +138,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		}
 
 		dma_fence_put(fence);
+		fence = NULL;
 
 		r = amdgpu_bo_kmap(vram_obj, &vram_map);
 		if (r) {
@@ -183,6 +184,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		}
 
 		dma_fence_put(fence);
+		fence = NULL;
 
 		r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
 		if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 77674a7b9616..63e734a125fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -170,7 +170,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 			     __field(unsigned int, context)
 			     __field(unsigned int, seqno)
 			     __field(struct dma_fence *, fence)
-			     __field(char *, ring_name)
+			     __string(ring, to_amdgpu_ring(job->base.sched)->name)
 			     __field(u32, num_ibs)
 			     ),
 
@@ -179,12 +179,12 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 			   __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
 			   __entry->context = job->base.s_fence->finished.context;
 			   __entry->seqno = job->base.s_fence->finished.seqno;
-			   __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
+			   __assign_str(ring, to_amdgpu_ring(job->base.sched)->name)
 			   __entry->num_ibs = job->num_ibs;
 			   ),
 	    TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
 		      __entry->sched_job_id, __get_str(timeline), __entry->context,
-		      __entry->seqno, __entry->ring_name, __entry->num_ibs)
+		      __entry->seqno, __get_str(ring), __entry->num_ibs)
 );
 
 TRACE_EVENT(amdgpu_sched_run_job,
@@ -195,7 +195,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 			     __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
 			     __field(unsigned int, context)
 			     __field(unsigned int, seqno)
-			     __field(char *, ring_name)
+			     __string(ring, to_amdgpu_ring(job->base.sched)->name)
 			     __field(u32, num_ibs)
 			     ),
 
@@ -204,12 +204,12 @@ TRACE_EVENT(amdgpu_sched_run_job,
 			   __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
 			   __entry->context = job->base.s_fence->finished.context;
 			   __entry->seqno = job->base.s_fence->finished.seqno;
-			   __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
+			   __assign_str(ring, to_amdgpu_ring(job->base.sched)->name)
 			   __entry->num_ibs = job->num_ibs;
 			   ),
 	    TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
 		      __entry->sched_job_id, __get_str(timeline), __entry->context,
-		      __entry->seqno, __entry->ring_name, __entry->num_ibs)
+		      __entry->seqno, __get_str(ring), __entry->num_ibs)
 );
 
 
@@ -323,14 +323,15 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
 
 TRACE_EVENT(amdgpu_vm_set_ptes,
 	    TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
-		     uint32_t incr, uint64_t flags),
-	    TP_ARGS(pe, addr, count, incr, flags),
+		     uint32_t incr, uint64_t flags, bool direct),
+	    TP_ARGS(pe, addr, count, incr, flags, direct),
 	    TP_STRUCT__entry(
 			     __field(u64, pe)
 			     __field(u64, addr)
 			     __field(u32, count)
 			     __field(u32, incr)
 			     __field(u64, flags)
+			     __field(bool, direct)
 			     ),
 
 	    TP_fast_assign(
@@ -339,28 +340,32 @@ TRACE_EVENT(amdgpu_vm_set_ptes,
 			   __entry->count = count;
 			   __entry->incr = incr;
 			   __entry->flags = flags;
+			   __entry->direct = direct;
 			   ),
-	    TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u",
-		      __entry->pe, __entry->addr, __entry->incr,
-		      __entry->flags, __entry->count)
+	    TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, "
+		      "direct=%d", __entry->pe, __entry->addr, __entry->incr,
+		      __entry->flags, __entry->count, __entry->direct)
 );
 
 TRACE_EVENT(amdgpu_vm_copy_ptes,
-	    TP_PROTO(uint64_t pe, uint64_t src, unsigned count),
-	    TP_ARGS(pe, src, count),
+	    TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct),
+	    TP_ARGS(pe, src, count, direct),
 	    TP_STRUCT__entry(
 			     __field(u64, pe)
 			     __field(u64, src)
 			     __field(u32, count)
+			     __field(bool, direct)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->pe = pe;
 			   __entry->src = src;
 			   __entry->count = count;
+			   __entry->direct = direct;
 			   ),
-	    TP_printk("pe=%010Lx, src=%010Lx, count=%u",
-		      __entry->pe, __entry->src, __entry->count)
+	    TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d",
+		      __entry->pe, __entry->src, __entry->count,
+		      __entry->direct)
 );
 
 TRACE_EVENT(amdgpu_vm_flush,
@@ -468,7 +473,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
 	    TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
 	    TP_ARGS(sched_job, fence),
 	    TP_STRUCT__entry(
-			     __field(const char *,name)
+			     __string(ring, sched_job->base.sched->name)
 			     __field(uint64_t, id)
 			     __field(struct dma_fence *, fence)
 			     __field(uint64_t, ctx)
@@ -476,14 +481,14 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
 			     ),
 
 	    TP_fast_assign(
-			   __entry->name = sched_job->base.sched->name;
+			   __assign_str(ring, sched_job->base.sched->name)
 			   __entry->id = sched_job->base.id;
 			   __entry->fence = fence;
 			   __entry->ctx = fence->context;
 			   __entry->seqno = fence->seqno;
 			   ),
 	    TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
-		      __entry->name, __entry->id,
+		      __get_str(ring), __entry->id,
 		      __entry->fence, __entry->ctx,
 		      __entry->seqno)
 );
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dff41d0a85fe..dee446278417 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -35,10 +35,13 @@
 #include <linux/hmm.h>
 #include <linux/pagemap.h>
 #include <linux/sched/task.h>
+#include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/swiotlb.h>
+#include <linux/dma-buf.h>
+#include <linux/sizes.h>
 
 #include <drm/ttm/ttm_bo_api.h>
 #include <drm/ttm/ttm_bo_driver.h>
@@ -54,6 +57,7 @@
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_sdma.h"
+#include "amdgpu_ras.h"
 #include "bif/bif_4_1_d.h"
 
 static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
@@ -484,15 +488,12 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
 				struct ttm_operation_ctx *ctx,
 				struct ttm_mem_reg *new_mem)
 {
-	struct amdgpu_device *adev;
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	struct ttm_mem_reg tmp_mem;
 	struct ttm_place placements;
 	struct ttm_placement placement;
 	int r;
 
-	adev = amdgpu_ttm_adev(bo->bdev);
-
 	/* create space/pages for new_mem in GTT space */
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
@@ -543,15 +544,12 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
 				struct ttm_operation_ctx *ctx,
 				struct ttm_mem_reg *new_mem)
 {
-	struct amdgpu_device *adev;
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	struct ttm_mem_reg tmp_mem;
 	struct ttm_placement placement;
 	struct ttm_place placements;
 	int r;
 
-	adev = amdgpu_ttm_adev(bo->bdev);
-
 	/* make space in GTT for old_mem buffer */
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
@@ -763,6 +761,7 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
  */
 struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
+	struct drm_gem_object	*gobj;
 	u64			offset;
 	uint64_t		userptr;
 	struct task_struct	*usertask;
@@ -772,6 +771,20 @@ struct amdgpu_ttm_tt {
 #endif
 };
 
+#ifdef CONFIG_DRM_AMDGPU_USERPTR
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+	(1 << 0), /* HMM_PFN_VALID */
+	(1 << 1), /* HMM_PFN_WRITE */
+	0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+	0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+	0, /* HMM_PFN_NONE */
+	0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
 /**
  * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
  * memory and start HMM tracking CPU page table update
@@ -779,85 +792,89 @@ struct amdgpu_ttm_tt {
  * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
  * once afterwards to stop HMM tracking
  */
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-
-#define MAX_RETRY_HMM_RANGE_FAULT	16
-
 int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 {
-	struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL;
 	struct ttm_tt *ttm = bo->tbo.ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	struct mm_struct *mm = gtt->usertask->mm;
 	unsigned long start = gtt->userptr;
 	struct vm_area_struct *vma;
 	struct hmm_range *range;
+	unsigned long timeout;
+	struct mm_struct *mm;
 	unsigned long i;
-	uint64_t *pfns;
 	int r = 0;
 
-	if (!mm) /* Happens during process shutdown */
-		return -ESRCH;
-
-	if (unlikely(!mirror)) {
-		DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n");
-		r = -EFAULT;
-		goto out;
+	mm = bo->notifier.mm;
+	if (unlikely(!mm)) {
+		DRM_DEBUG_DRIVER("BO is not registered?\n");
+		return -EFAULT;
 	}
 
-	vma = find_vma(mm, start);
-	if (unlikely(!vma || start < vma->vm_start)) {
-		r = -EFAULT;
-		goto out;
-	}
-	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
-		vma->vm_file)) {
-		r = -EPERM;
-		goto out;
-	}
+	/* Another get_user_pages is running at the same time?? */
+	if (WARN_ON(gtt->range))
+		return -EFAULT;
+
+	if (!mmget_not_zero(mm)) /* Happens during process shutdown */
+		return -ESRCH;
 
 	range = kzalloc(sizeof(*range), GFP_KERNEL);
 	if (unlikely(!range)) {
 		r = -ENOMEM;
 		goto out;
 	}
+	range->notifier = &bo->notifier;
+	range->flags = hmm_range_flags;
+	range->values = hmm_range_values;
+	range->pfn_shift = PAGE_SHIFT;
+	range->start = bo->notifier.interval_tree.start;
+	range->end = bo->notifier.interval_tree.last + 1;
+	range->default_flags = hmm_range_flags[HMM_PFN_VALID];
+	if (!amdgpu_ttm_tt_is_readonly(ttm))
+		range->default_flags |= range->flags[HMM_PFN_WRITE];
 
-	pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
-	if (unlikely(!pfns)) {
+	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns),
+				     GFP_KERNEL);
+	if (unlikely(!range->pfns)) {
 		r = -ENOMEM;
 		goto out_free_ranges;
 	}
 
-	amdgpu_hmm_init_range(range);
-	range->default_flags = range->flags[HMM_PFN_VALID];
-	range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
-				0 : range->flags[HMM_PFN_WRITE];
-	range->pfn_flags_mask = 0;
-	range->pfns = pfns;
-	range->start = start;
-	range->end = start + ttm->num_pages * PAGE_SIZE;
-
-	hmm_range_register(range, mirror);
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, start);
+	if (unlikely(!vma || start < vma->vm_start)) {
+		r = -EFAULT;
+		goto out_unlock;
+	}
+	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+		vma->vm_file)) {
+		r = -EPERM;
+		goto out_unlock;
+	}
+	up_read(&mm->mmap_sem);
+	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
 
-	/*
-	 * Just wait for range to be valid, safe to ignore return value as we
-	 * will use the return value of hmm_range_fault() below under the
-	 * mmap_sem to ascertain the validity of the range.
-	 */
-	hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
+retry:
+	range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
 
 	down_read(&mm->mmap_sem);
 	r = hmm_range_fault(range, 0);
 	up_read(&mm->mmap_sem);
-
-	if (unlikely(r < 0))
+	if (unlikely(r <= 0)) {
+		/*
+		 * FIXME: This timeout should encompass the retry from
+		 * mmu_interval_read_retry() as well.
+		 */
+		if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout))
+			goto retry;
 		goto out_free_pfns;
+	}
 
 	for (i = 0; i < ttm->num_pages; i++) {
-		pages[i] = hmm_device_entry_to_page(range, pfns[i]);
+		/* FIXME: The pages cannot be touched outside the notifier_lock */
+		pages[i] = hmm_device_entry_to_page(range, range->pfns[i]);
 		if (unlikely(!pages[i])) {
 			pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
-			       i, pfns[i]);
+			       i, range->pfns[i]);
 			r = -ENOMEM;
 
 			goto out_free_pfns;
@@ -865,15 +882,18 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 	}
 
 	gtt->range = range;
+	mmput(mm);
 
 	return 0;
 
+out_unlock:
+	up_read(&mm->mmap_sem);
 out_free_pfns:
-	hmm_range_unregister(range);
-	kvfree(pfns);
+	kvfree(range->pfns);
 out_free_ranges:
 	kfree(range);
 out:
+	mmput(mm);
 	return r;
 }
 
@@ -898,15 +918,18 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
 		"No user pages to check\n");
 
 	if (gtt->range) {
-		r = hmm_range_valid(gtt->range);
-		hmm_range_unregister(gtt->range);
-
+		/*
+		 * FIXME: Must always hold notifier_lock for this, and must
+		 * not ignore the return code.
+		 */
+		r = mmu_interval_read_retry(gtt->range->notifier,
+					 gtt->range->notifier_seq);
 		kvfree(gtt->range->pfns);
 		kfree(gtt->range);
 		gtt->range = NULL;
 	}
 
-	return r;
+	return !r;
 }
 #endif
 
@@ -987,10 +1010,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 	sg_free_table(ttm->sg);
 
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-	if (gtt->range &&
-	    ttm->pages[0] == hmm_device_entry_to_page(gtt->range,
-						      gtt->range->pfns[0]))
-		WARN_ONCE(1, "Missing get_user_page_done\n");
+	if (gtt->range) {
+		unsigned long i;
+
+		for (i = 0; i < ttm->num_pages; i++) {
+			if (ttm->pages[i] !=
+				hmm_device_entry_to_page(gtt->range,
+					      gtt->range->pfns[i]))
+				break;
+		}
+
+		WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
+	}
 #endif
 }
 
@@ -1217,16 +1248,14 @@ static struct ttm_backend_func amdgpu_backend_func = {
 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 					   uint32_t page_flags)
 {
-	struct amdgpu_device *adev;
 	struct amdgpu_ttm_tt *gtt;
 
-	adev = amdgpu_ttm_adev(bo->bdev);
-
 	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
 	if (gtt == NULL) {
 		return NULL;
 	}
 	gtt->ttm.ttm.func = &amdgpu_backend_func;
+	gtt->gobj = &bo->base;
 
 	/* allocate space for the uninitialized page entries */
 	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
@@ -1247,7 +1276,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 
 	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
 	if (gtt && gtt->userptr) {
@@ -1260,7 +1288,19 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 		return 0;
 	}
 
-	if (slave && ttm->sg) {
+	if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
+		if (!ttm->sg) {
+			struct dma_buf_attachment *attach;
+			struct sg_table *sgt;
+
+			attach = gtt->gobj->import_attach;
+			sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+			if (IS_ERR(sgt))
+				return PTR_ERR(sgt);
+
+			ttm->sg = sgt;
+		}
+
 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 						 gtt->ttm.dma_address,
 						 ttm->num_pages);
@@ -1287,9 +1327,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
  */
 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 {
-	struct amdgpu_device *adev;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+	struct amdgpu_device *adev;
 
 	if (gtt && gtt->userptr) {
 		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
@@ -1298,7 +1337,16 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 		return;
 	}
 
-	if (slave)
+	if (ttm->sg && gtt->gobj->import_attach) {
+		struct dma_buf_attachment *attach;
+
+		attach = gtt->gobj->import_attach;
+		dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
+		ttm->sg = NULL;
+		return;
+	}
+
+	if (ttm->page_flags & TTM_PAGE_FLAG_SG)
 		return;
 
 	adev = amdgpu_ttm_adev(ttm->bdev);
@@ -1475,11 +1523,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 	struct dma_fence *f;
 	int i;
 
-	/* Don't evict VM page tables while they are busy, otherwise we can't
-	 * cleanly handle page faults.
-	 */
 	if (bo->type == ttm_bo_type_kernel &&
-	    !dma_resv_test_signaled_rcu(bo->base.resv, true))
+	    !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
 		return false;
 
 	/* If bo is a KFD BO, check if the bo belongs to the current process.
@@ -1634,81 +1679,96 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
  */
 static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
 {
-	struct ttm_operation_ctx ctx = { false, false };
-	struct amdgpu_bo_param bp;
-	int r = 0;
-	int i;
-	u64 vram_size = adev->gmc.visible_vram_size;
-	u64 offset = adev->fw_vram_usage.start_offset;
-	u64 size = adev->fw_vram_usage.size;
-	struct amdgpu_bo *bo;
-
-	memset(&bp, 0, sizeof(bp));
-	bp.size = adev->fw_vram_usage.size;
-	bp.byte_align = PAGE_SIZE;
-	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
-	bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-	bp.type = ttm_bo_type_kernel;
-	bp.resv = NULL;
+	uint64_t vram_size = adev->gmc.visible_vram_size;
+
 	adev->fw_vram_usage.va = NULL;
 	adev->fw_vram_usage.reserved_bo = NULL;
 
-	if (adev->fw_vram_usage.size > 0 &&
-		adev->fw_vram_usage.size <= vram_size) {
+	if (adev->fw_vram_usage.size == 0 ||
+	    adev->fw_vram_usage.size > vram_size)
+		return 0;
 
-		r = amdgpu_bo_create(adev, &bp,
-				     &adev->fw_vram_usage.reserved_bo);
-		if (r)
-			goto error_create;
+	return amdgpu_bo_create_kernel_at(adev,
+					  adev->fw_vram_usage.start_offset,
+					  adev->fw_vram_usage.size,
+					  AMDGPU_GEM_DOMAIN_VRAM,
+					  &adev->fw_vram_usage.reserved_bo,
+					  &adev->fw_vram_usage.va);
+}
 
-		r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
-		if (r)
-			goto error_reserve;
+/*
+ * Memoy training reservation functions
+ */
 
-		/* remove the original mem node and create a new one at the
-		 * request position
-		 */
-		bo = adev->fw_vram_usage.reserved_bo;
-		offset = ALIGN(offset, PAGE_SIZE);
-		for (i = 0; i < bo->placement.num_placement; ++i) {
-			bo->placements[i].fpfn = offset >> PAGE_SHIFT;
-			bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
-		}
+/**
+ * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free memory training reserved vram if it has been reserved.
+ */
+static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
+{
+	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
 
-		ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem);
-		r = ttm_bo_mem_space(&bo->tbo, &bo->placement,
-				     &bo->tbo.mem, &ctx);
-		if (r)
-			goto error_pin;
+	ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+	amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
+	ctx->c2p_bo = NULL;
 
-		r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
-			AMDGPU_GEM_DOMAIN_VRAM,
-			adev->fw_vram_usage.start_offset,
-			(adev->fw_vram_usage.start_offset +
-			adev->fw_vram_usage.size));
-		if (r)
-			goto error_pin;
-		r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
-			&adev->fw_vram_usage.va);
-		if (r)
-			goto error_kmap;
+	return 0;
+}
+
+static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size)
+{
+       if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) )
+               vram_size -= SZ_1M;
 
-		amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
+       return ALIGN(vram_size, SZ_1M);
+}
+
+/**
+ * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from memory training.
+ */
+static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev)
+{
+	int ret;
+	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+
+	memset(ctx, 0, sizeof(*ctx));
+	if (!adev->fw_vram_usage.mem_train_support) {
+		DRM_DEBUG("memory training does not support!\n");
+		return 0;
 	}
-	return r;
 
-error_kmap:
-	amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo);
-error_pin:
-	amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
-error_reserve:
-	amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo);
-error_create:
-	adev->fw_vram_usage.va = NULL;
-	adev->fw_vram_usage.reserved_bo = NULL;
-	return r;
+	ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size);
+	ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
+	ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
+
+	DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+		  ctx->train_data_size,
+		  ctx->p2c_train_data_offset,
+		  ctx->c2p_train_data_offset);
+
+	ret = amdgpu_bo_create_kernel_at(adev,
+					 ctx->c2p_train_data_offset,
+					 ctx->train_data_size,
+					 AMDGPU_GEM_DOMAIN_VRAM,
+					 &ctx->c2p_bo,
+					 NULL);
+	if (ret) {
+		DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
+		amdgpu_ttm_training_reserve_vram_fini(adev);
+		return ret;
+	}
+
+	ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
+	return 0;
 }
+
 /**
  * amdgpu_ttm_init - Init the memory management (ttm) as well as various
  * gtt/vram related fields.
@@ -1731,6 +1791,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	r = ttm_bo_device_init(&adev->mman.bdev,
 			       &amdgpu_bo_driver,
 			       adev->ddev->anon_inode->i_mapping,
+			       adev->ddev->vma_offset_manager,
 			       dma_addressing_limited(adev->dev));
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
@@ -1771,6 +1832,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 	}
 
+	/*
+	 *The reserved vram for memory training must be pinned to the specified
+	 *place on the VRAM, so reserve it early.
+	 */
+	r = amdgpu_ttm_training_reserve_vram_init(adev);
+	if (r)
+		return r;
+
 	/* allocate memory as required for VGA
 	 * This is used for VGA emulation and pre-OS scanout buffers to
 	 * avoid display artifacts while transitioning between pre-OS
@@ -1781,6 +1850,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 				    NULL, &stolen_vga_buf);
 	if (r)
 		return r;
+
+	/*
+	 * reserve one TMR (64K) memory at the top of VRAM which holds
+	 * IP Discovery data and is protected by PSP.
+	 */
+	r = amdgpu_bo_create_kernel_at(adev,
+				       adev->gmc.real_vram_size - DISCOVERY_TMR_SIZE,
+				       DISCOVERY_TMR_SIZE,
+				       AMDGPU_GEM_DOMAIN_VRAM,
+				       &adev->discovery_memory,
+				       NULL);
+	if (r)
+		return r;
+
 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
 		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
 
@@ -1856,7 +1939,11 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 		return;
 
 	amdgpu_ttm_debugfs_fini(adev);
+	amdgpu_ttm_training_reserve_vram_fini(adev);
+	/* return the IP Discovery TMR memory back to VRAM */
+	amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
 	amdgpu_ttm_fw_reserve_vram_fini(adev);
+
 	if (adev->mman.aper_base_kaddr)
 		iounmap(adev->mman.aper_base_kaddr);
 	adev->mman.aper_base_kaddr = NULL;
@@ -1892,11 +1979,13 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 
 	if (enable) {
 		struct amdgpu_ring *ring;
-		struct drm_sched_rq *rq;
+		struct drm_gpu_scheduler *sched;
 
 		ring = adev->mman.buffer_funcs_ring;
-		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
-		r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL);
+		sched = &ring->sched;
+		r = drm_sched_entity_init(&adev->mman.entity,
+				          DRM_SCHED_PRIORITY_KERNEL, &sched,
+					  1, NULL);
 		if (r) {
 			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
 				  r);
@@ -1952,10 +2041,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
 	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
 		AMDGPU_GPU_PAGE_SIZE;
 
-	num_dw = adev->mman.buffer_funcs->copy_num_dw;
-	while (num_dw & 0x7)
-		num_dw++;
-
+	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
 	num_bytes = num_pages * 8;
 
 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
@@ -2015,11 +2101,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 
 	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
 	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
-	num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
-
-	/* for IB padding */
-	while (num_dw & 0x7)
-		num_dw++;
+	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
 
 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 3a6115ad0196..9ef312428231 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -360,6 +360,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
 	case CHIP_RAVEN:
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
+	case CHIP_ARCTURUS:
 	case CHIP_RENOIR:
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
@@ -368,8 +369,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
 			return AMDGPU_FW_LOAD_DIRECT;
 		else
 			return AMDGPU_FW_LOAD_PSP;
-	case CHIP_ARCTURUS:
-		return AMDGPU_FW_LOAD_DIRECT;
 
 	default:
 		DRM_ERROR("Unknown firmware load type\n");
@@ -448,6 +447,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 	const struct common_firmware_header *header = NULL;
 	const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
 	const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
+	const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
 
 	if (NULL == ucode->fw)
 		return 0;
@@ -461,6 +461,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 	header = (const struct common_firmware_header *)ucode->fw->data;
 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
 	dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
+	dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
 
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
 	    (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
@@ -471,7 +472,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
 	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM &&
 		 ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM &&
-		 ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV)) {
+		 ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV &&
+		 ucode->ucode_id != AMDGPU_UCODE_ID_DMCUB)) {
 		ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
 
 		memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@@ -507,6 +509,12 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 					      le32_to_cpu(header->ucode_array_offset_bytes) +
 					      le32_to_cpu(dmcu_hdr->intv_offset_bytes)),
 		       ucode->ucode_size);
+	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCUB) {
+		ucode->ucode_size = le32_to_cpu(dmcub_hdr->inst_const_bytes);
+		memcpy(ucode->kaddr,
+		       (void *)((uint8_t *)ucode->fw->data +
+				le32_to_cpu(header->ucode_array_offset_bytes)),
+		       ucode->ucode_size);
 	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
 		ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
 		memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index b34f00d42049..b0e656409c03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -108,6 +108,12 @@ struct ta_firmware_header_v1_0 {
 	uint32_t ta_ras_ucode_version;
 	uint32_t ta_ras_offset_bytes;
 	uint32_t ta_ras_size_bytes;
+	uint32_t ta_hdcp_ucode_version;
+	uint32_t ta_hdcp_offset_bytes;
+	uint32_t ta_hdcp_size_bytes;
+	uint32_t ta_dtm_ucode_version;
+	uint32_t ta_dtm_offset_bytes;
+	uint32_t ta_dtm_size_bytes;
 };
 
 /* version_major=1, version_minor=0 */
@@ -245,6 +251,13 @@ struct dmcu_firmware_header_v1_0 {
 	uint32_t intv_size_bytes;  /* size of interrupt vectors, in bytes */
 };
 
+/* version_major=1, version_minor=0 */
+struct dmcub_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t inst_const_bytes; /* size of instruction region, in bytes */
+	uint32_t bss_data_bytes; /* size of bss/data region, in bytes */
+};
+
 /* header is fixed size */
 union amdgpu_firmware_header {
 	struct common_firmware_header common;
@@ -262,6 +275,7 @@ union amdgpu_firmware_header {
 	struct sdma_firmware_header_v1_1 sdma_v1_1;
 	struct gpu_info_firmware_header_v1_0 gpu_info;
 	struct dmcu_firmware_header_v1_0 dmcu;
+	struct dmcub_firmware_header_v1_0 dmcub;
 	uint8_t raw[0x100];
 };
 
@@ -286,10 +300,10 @@ enum AMDGPU_UCODE_ID {
 	AMDGPU_UCODE_ID_CP_MEC2_JT,
 	AMDGPU_UCODE_ID_CP_MES,
 	AMDGPU_UCODE_ID_CP_MES_DATA,
-	AMDGPU_UCODE_ID_RLC_G,
 	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
 	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
 	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
+	AMDGPU_UCODE_ID_RLC_G,
 	AMDGPU_UCODE_ID_STORAGE,
 	AMDGPU_UCODE_ID_SMC,
 	AMDGPU_UCODE_ID_UVD,
@@ -301,6 +315,7 @@ enum AMDGPU_UCODE_ID {
 	AMDGPU_UCODE_ID_DMCU_INTV,
 	AMDGPU_UCODE_ID_VCN0_RAM,
 	AMDGPU_UCODE_ID_VCN1_RAM,
+	AMDGPU_UCODE_ID_DMCUB,
 	AMDGPU_UCODE_ID_MAXIMUM,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
new file mode 100644
index 000000000000..f4d40855147b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_ras.h"
+
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
+{
+	int r;
+	struct ras_fs_if fs_info = {
+		.sysfs_name = "umc_err_count",
+		.debugfs_name = "umc_err_inject",
+	};
+	struct ras_ih_if ih_info = {
+		.cb = amdgpu_umc_process_ras_data_cb,
+	};
+
+	if (!adev->umc.ras_if) {
+		adev->umc.ras_if =
+			kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+		if (!adev->umc.ras_if)
+			return -ENOMEM;
+		adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC;
+		adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+		adev->umc.ras_if->sub_block_index = 0;
+		strcpy(adev->umc.ras_if->name, "umc");
+	}
+	ih_info.head = fs_info.head = *adev->umc.ras_if;
+
+	r = amdgpu_ras_late_init(adev, adev->umc.ras_if,
+				 &fs_info, &ih_info);
+	if (r)
+		goto free;
+
+	if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) {
+		r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
+		if (r)
+			goto late_fini;
+	} else {
+		r = 0;
+		goto free;
+	}
+
+	/* ras init of specific umc version */
+	if (adev->umc.funcs && adev->umc.funcs->err_cnt_init)
+		adev->umc.funcs->err_cnt_init(adev);
+
+	return 0;
+
+late_fini:
+	amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info);
+free:
+	kfree(adev->umc.ras_if);
+	adev->umc.ras_if = NULL;
+	return r;
+}
+
+void amdgpu_umc_ras_fini(struct amdgpu_device *adev)
+{
+	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
+			adev->umc.ras_if) {
+		struct ras_common_if *ras_if = adev->umc.ras_if;
+		struct ras_ih_if ih_info = {
+			.head = *ras_if,
+			.cb = amdgpu_umc_process_ras_data_cb,
+		};
+
+		amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+		kfree(ras_if);
+	}
+}
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+		void *ras_error_status,
+		struct amdgpu_iv_entry *entry)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+	if (adev->umc.funcs &&
+	    adev->umc.funcs->query_ras_error_count)
+	    adev->umc.funcs->query_ras_error_count(adev, ras_error_status);
+
+	if (adev->umc.funcs &&
+	    adev->umc.funcs->query_ras_error_address &&
+	    adev->umc.max_ras_err_cnt_per_query) {
+		err_data->err_addr =
+			kcalloc(adev->umc.max_ras_err_cnt_per_query,
+				sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+		/* still call query_ras_error_address to clear error status
+		 * even NOMEM error is encountered
+		 */
+		if(!err_data->err_addr)
+			DRM_WARN("Failed to alloc memory for umc error address record!\n");
+
+		/* umc query_ras_error_address is also responsible for clearing
+		 * error status
+		 */
+		adev->umc.funcs->query_ras_error_address(adev, ras_error_status);
+	}
+
+	/* only uncorrectable error needs gpu reset */
+	if (err_data->ue_count) {
+		if (err_data->err_addr_cnt &&
+		    amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
+						err_data->err_addr_cnt))
+			DRM_WARN("Failed to add ras bad page!\n");
+
+		amdgpu_ras_reset_gpu(adev);
+	}
+
+	kfree(err_data->err_addr);
+	return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
+		struct amdgpu_irq_src *source,
+		struct amdgpu_iv_entry *entry)
+{
+	struct ras_common_if *ras_if = adev->umc.ras_if;
+	struct ras_dispatch_if ih_data = {
+		.entry = entry,
+	};
+
+	if (!ras_if)
+		return 0;
+
+	ih_data.head = *ras_if;
+
+	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 975afa04df09..a615a1eb750b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -21,47 +21,14 @@
 #ifndef __AMDGPU_UMC_H__
 #define __AMDGPU_UMC_H__
 
-/* implement 64 bits REG operations via 32 bits interface */
-#define RREG64_UMC(reg)	(RREG32(reg) | \
-				((uint64_t)RREG32((reg) + 1) << 32))
-#define WREG64_UMC(reg, v)	\
-	do {	\
-		WREG32((reg), lower_32_bits(v));	\
-		WREG32((reg) + 1, upper_32_bits(v));	\
-	} while (0)
-
-/*
- * void (*func)(struct amdgpu_device *adev, struct ras_err_data *err_data,
- *				uint32_t umc_reg_offset, uint32_t channel_index)
- */
-#define amdgpu_umc_for_each_channel(func)	\
-	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;	\
-	uint32_t umc_inst, channel_inst, umc_reg_offset, channel_index;	\
-	for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) {	\
-		/* enable the index mode to query eror count per channel */	\
-		adev->umc.funcs->enable_umc_index_mode(adev, umc_inst);	\
-		for (channel_inst = 0;	\
-			channel_inst < adev->umc.channel_inst_num;	\
-			channel_inst++) {	\
-			/* calc the register offset according to channel instance */	\
-			umc_reg_offset = adev->umc.channel_offs * channel_inst;	\
-			/* get channel index of interleaved memory */	\
-			channel_index = adev->umc.channel_idx_tbl[	\
-				umc_inst * adev->umc.channel_inst_num + channel_inst];	\
-			(func)(adev, err_data, umc_reg_offset, channel_index);	\
-		}	\
-	}	\
-	adev->umc.funcs->disable_umc_index_mode(adev);
-
 struct amdgpu_umc_funcs {
-	void (*ras_init)(struct amdgpu_device *adev);
+	void (*err_cnt_init)(struct amdgpu_device *adev);
+	int (*ras_late_init)(struct amdgpu_device *adev);
 	void (*query_ras_error_count)(struct amdgpu_device *adev,
 					void *ras_error_status);
 	void (*query_ras_error_address)(struct amdgpu_device *adev,
 					void *ras_error_status);
-	void (*enable_umc_index_mode)(struct amdgpu_device *adev,
-					uint32_t umc_instance);
-	void (*disable_umc_index_mode)(struct amdgpu_device *adev);
+	void (*init_registers)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_umc {
@@ -75,8 +42,17 @@ struct amdgpu_umc {
 	uint32_t channel_offs;
 	/* channel index table of interleaved memory */
 	const uint32_t *channel_idx_tbl;
+	struct ras_common_if *ras_if;
 
 	const struct amdgpu_umc_funcs *funcs;
 };
 
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_umc_ras_fini(struct amdgpu_device *adev);
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+		void *ras_error_status,
+		struct amdgpu_iv_entry *entry);
+int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
+		struct amdgpu_irq_src *source,
+		struct amdgpu_iv_entry *entry);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index b2c364b8695f..a92f3b18e657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -39,6 +39,8 @@
 #include "cikd.h"
 #include "uvd/uvd_4_2_d.h"
 
+#include "amdgpu_ras.h"
+
 /* 1 second timeout */
 #define UVD_IDLE_TIMEOUT	msecs_to_jiffies(1000)
 
@@ -297,6 +299,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 {
 	int i, j;
 
+	cancel_delayed_work_sync(&adev->uvd.idle_work);
 	drm_sched_entity_destroy(&adev->uvd.entity);
 
 	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
@@ -327,12 +330,13 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring;
-	struct drm_sched_rq *rq;
+	struct drm_gpu_scheduler *sched;
 	int r;
 
 	ring = &adev->uvd.inst[0].ring;
-	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-	r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL);
+	sched = &ring->sched;
+	r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
+				  &sched, 1, NULL);
 	if (r) {
 		DRM_ERROR("Failed setting up UVD kernel entity.\n");
 		return r;
@@ -346,6 +350,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 	unsigned size;
 	void *ptr;
 	int i, j;
+	bool in_ras_intr = amdgpu_ras_intr_triggered();
 
 	cancel_delayed_work_sync(&adev->uvd.idle_work);
 
@@ -372,8 +377,16 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 		if (!adev->uvd.inst[j].saved_bo)
 			return -ENOMEM;
 
-		memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+		/* re-write 0 since err_event_athub will corrupt VCPU buffer */
+		if (in_ras_intr)
+			memset(adev->uvd.inst[j].saved_bo, 0, size);
+		else
+			memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
 	}
+
+	if (in_ras_intr)
+		DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 65044b1b3d4c..ceb0dbf685f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -80,6 +80,11 @@ MODULE_FIRMWARE(FIRMWARE_VEGA12);
 MODULE_FIRMWARE(FIRMWARE_VEGA20);
 
 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
+static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+				     struct amdgpu_bo *bo,
+				     struct dma_fence **fence);
+static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+				      bool direct, struct dma_fence **fence);
 
 /**
  * amdgpu_vce_init - allocate memory, load vce firmware
@@ -211,6 +216,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 	if (adev->vce.vcpu_bo == NULL)
 		return 0;
 
+	cancel_delayed_work_sync(&adev->vce.idle_work);
 	drm_sched_entity_destroy(&adev->vce.entity);
 
 	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
@@ -234,12 +240,13 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 int amdgpu_vce_entity_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring;
-	struct drm_sched_rq *rq;
+	struct drm_gpu_scheduler *sched;
 	int r;
 
 	ring = &adev->vce.ring[0];
-	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-	r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL);
+	sched = &ring->sched;
+	r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
+				  &sched, 1, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCE run queue.\n");
 		return r;
@@ -428,9 +435,9 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
  *
  * Open up a stream for HW test
  */
-int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-			      struct amdgpu_bo *bo,
-			      struct dma_fence **fence)
+static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+				     struct amdgpu_bo *bo,
+				     struct dma_fence **fence)
 {
 	const unsigned ib_size_dw = 1024;
 	struct amdgpu_job *job;
@@ -508,8 +515,8 @@ err:
  *
  * Close up a stream for HW test or if userspace failed to do so
  */
-int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct dma_fence **fence)
+static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+				      bool direct, struct dma_fence **fence)
 {
 	const unsigned ib_size_dw = 1024;
 	struct amdgpu_job *job;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index e802f7d9db0a..d6d83a3ec803 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -58,11 +58,6 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
 int amdgpu_vce_entity_init(struct amdgpu_device *adev);
 int amdgpu_vce_suspend(struct amdgpu_device *adev);
 int amdgpu_vce_resume(struct amdgpu_device *adev);
-int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-			      struct amdgpu_bo *bo,
-			      struct dma_fence **fence);
-int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct dma_fence **fence);
 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 3199e4a5ff12..f96464e2c157 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -28,19 +28,10 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 
-#include <drm/drm.h>
-
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_vcn.h"
 #include "soc15d.h"
-#include "soc15_common.h"
-
-#include "vcn/vcn_1_0_offset.h"
-#include "vcn/vcn_1_0_sh_mask.h"
-
-/* 1 second timeout */
-#define VCN_IDLE_TIMEOUT	msecs_to_jiffies(1000)
 
 /* Firmware Names */
 #define FIRMWARE_RAVEN		"amdgpu/raven_vcn.bin"
@@ -84,6 +75,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 		break;
 	case CHIP_ARCTURUS:
 		fw_name = FIRMWARE_ARCTURUS;
+		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+			adev->vcn.indirect_sram = true;
 		break;
 	case CHIP_RENOIR:
 		fw_name = FIRMWARE_RENOIR;
@@ -174,15 +168,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 			dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
 			return r;
 		}
-	}
 
-	if (adev->vcn.indirect_sram) {
-		r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
-			    AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo,
-			    &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr);
-		if (r) {
-			dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r);
-			return r;
+		if (adev->vcn.indirect_sram) {
+			r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+					AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo,
+					&adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr);
+			if (r) {
+				dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
+				return r;
+			}
 		}
 	}
 
@@ -193,15 +187,16 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 {
 	int i, j;
 
-	if (adev->vcn.indirect_sram) {
-		amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo,
-				      &adev->vcn.dpg_sram_gpu_addr,
-				      (void **)&adev->vcn.dpg_sram_cpu_addr);
-	}
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
 
 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
 		if (adev->vcn.harvest_config & (1 << j))
 			continue;
+		if (adev->vcn.indirect_sram) {
+			amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
+						  &adev->vcn.inst[j].dpg_sram_gpu_addr,
+						  (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
+		}
 		kvfree(adev->vcn.inst[j].saved_bo);
 
 		amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
@@ -212,8 +207,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 
 		for (i = 0; i < adev->vcn.num_enc_rings; ++i)
 			amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
-
-		amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg);
 	}
 
 	release_firmware(adev->vcn.fw);
@@ -294,6 +287,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
 		if (adev->vcn.harvest_config & (1 << j))
 			continue;
+
 		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
 			fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
 		}
@@ -306,26 +300,17 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 			else
 				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
 
-			if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg))
-				new_state.jpeg = VCN_DPG_STATE__PAUSE;
-			else
-				new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
-
-			adev->vcn.pause_dpg_mode(adev, &new_state);
+			adev->vcn.pause_dpg_mode(adev, j, &new_state);
 		}
 
-		fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg);
 		fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
 		fences += fence[j];
 	}
 
 	if (fences == 0) {
 		amdgpu_gfx_off_ctrl(adev, true);
-		if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled)
-			amdgpu_dpm_enable_uvd(adev, false);
-		else
-			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
-							       AMD_PG_STATE_GATE);
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+		       AMD_PG_STATE_GATE);
 	} else {
 		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
 	}
@@ -338,11 +323,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
 	if (set_clocks) {
 		amdgpu_gfx_off_ctrl(adev, false);
-		if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled)
-			amdgpu_dpm_enable_uvd(adev, true);
-		else
-			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
-							       AMD_PG_STATE_UNGATE);
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+		       AMD_PG_STATE_UNGATE);
 	}
 
 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
@@ -358,17 +340,10 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 		else
 			new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
 
-		if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg))
-			new_state.jpeg = VCN_DPG_STATE__PAUSE;
-		else
-			new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
-
 		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
 			new_state.fw_based = VCN_DPG_STATE__PAUSE;
-		else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
-			new_state.jpeg = VCN_DPG_STATE__PAUSE;
 
-		adev->vcn.pause_dpg_mode(adev, &new_state);
+		adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
 	}
 }
 
@@ -518,9 +493,14 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
 
 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct dma_fence *fence;
 	long r;
 
+	/* temporarily disable ib test for sriov */
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
 	if (r)
 		goto error;
@@ -676,10 +656,15 @@ err:
 
 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct dma_fence *fence = NULL;
 	struct amdgpu_bo *bo = NULL;
 	long r;
 
+	/* temporarily disable ib test for sriov */
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
 				      AMDGPU_GEM_DOMAIN_VRAM,
 				      &bo, NULL, NULL);
@@ -706,108 +691,3 @@ error:
 	amdgpu_bo_unref(&bo);
 	return r;
 }
-
-int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-	uint32_t tmp = 0;
-	unsigned i;
-	int r;
-
-	WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
-	r = amdgpu_ring_alloc(ring, 3);
-	if (r)
-		return r;
-
-	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.jpeg_pitch, 0));
-	amdgpu_ring_write(ring, 0xDEADBEEF);
-	amdgpu_ring_commit(ring);
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);
-		if (tmp == 0xDEADBEEF)
-			break;
-		udelay(1);
-	}
-
-	if (i >= adev->usec_timeout)
-		r = -ETIMEDOUT;
-
-	return r;
-}
-
-static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle,
-		struct dma_fence **fence)
-{
-	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_job *job;
-	struct amdgpu_ib *ib;
-	struct dma_fence *f = NULL;
-	const unsigned ib_size_dw = 16;
-	int i, r;
-
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
-	if (r)
-		return r;
-
-	ib = &job->ibs[0];
-
-	ib->ptr[0] = PACKETJ(adev->vcn.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0);
-	ib->ptr[1] = 0xDEADBEEF;
-	for (i = 2; i < 16; i += 2) {
-		ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
-		ib->ptr[i+1] = 0;
-	}
-	ib->length_dw = 16;
-
-	r = amdgpu_job_submit_direct(job, ring, &f);
-	if (r)
-		goto err;
-
-	if (fence)
-		*fence = dma_fence_get(f);
-	dma_fence_put(f);
-
-	return 0;
-
-err:
-	amdgpu_job_free(job);
-	return r;
-}
-
-int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
-{
-	struct amdgpu_device *adev = ring->adev;
-	uint32_t tmp = 0;
-	unsigned i;
-	struct dma_fence *fence = NULL;
-	long r = 0;
-
-	r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
-	if (r)
-		goto error;
-
-	r = dma_fence_wait_timeout(fence, false, timeout);
-	if (r == 0) {
-		r = -ETIMEDOUT;
-		goto error;
-	} else if (r < 0) {
-		goto error;
-	} else {
-		r = 0;
-	}
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);
-		if (tmp == 0xDEADBEEF)
-			break;
-		udelay(1);
-	}
-
-	if (i >= adev->usec_timeout)
-		r = -ETIMEDOUT;
-
-	dma_fence_put(fence);
-error:
-	return r;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index dface275c81a..c4984c5fb2db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -31,6 +31,7 @@
 #define AMDGPU_VCN_MAX_ENC_RINGS	3
 
 #define AMDGPU_MAX_VCN_INSTANCES	2
+#define AMDGPU_MAX_VCN_ENC_RINGS  AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
 
 #define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
 #define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
@@ -56,6 +57,14 @@
 #define VCN_VID_IP_ADDRESS_2_0		0x0
 #define VCN_AON_IP_ADDRESS_2_0		0x30000
 
+#define mmUVD_RBC_XX_IB_REG_CHECK 					0x026b
+#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 				1
+#define mmUVD_REG_XX_MASK 						0x026c
+#define mmUVD_REG_XX_MASK_BASE_IDX 					1
+
+/* 1 second timeout */
+#define VCN_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
 #define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) 				\
 	({	WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); 			\
 		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, 				\
@@ -100,27 +109,27 @@
 		internal_reg_offset >>= 2;							\
 	})
 
-#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) 						\
-	({ 											\
-		WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, 					\
-			(0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | 				\
-			mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | 				\
-			offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); 			\
-		RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); 					\
+#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) 					\
+	({											\
+		WREG32_SOC15(VCN, inst, mmUVD_DPG_LMA_CTL, 					\
+			(0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |				\
+			mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |				\
+			offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));			\
+		RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA);				\
 	})
 
-#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect)				\
-	do { 											\
-		if (!indirect) { 								\
-			WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); 			\
-			WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, 				\
-				(0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | 			\
-				 mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | 			\
-				 offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); 		\
-		} else { 									\
-			*adev->vcn.dpg_sram_curr_addr++ = offset; 				\
-			*adev->vcn.dpg_sram_curr_addr++ = value; 				\
-		} 										\
+#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect)			\
+	do {											\
+		if (!indirect) {								\
+			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value);			\
+			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, 				\
+				(0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |			\
+				 mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |			\
+				 offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));		\
+		} else {									\
+			*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset;		\
+			*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value;			\
+		}										\
 	} while (0)
 
 enum engine_status_constants {
@@ -158,7 +167,6 @@ struct amdgpu_vcn_reg{
 	unsigned	ib_size;
 	unsigned	gp_scratch8;
 	unsigned	scratch9;
-	unsigned	jpeg_pitch;
 };
 
 struct amdgpu_vcn_inst {
@@ -168,9 +176,12 @@ struct amdgpu_vcn_inst {
 	void			*saved_bo;
 	struct amdgpu_ring	ring_dec;
 	struct amdgpu_ring	ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
-	struct amdgpu_ring	ring_jpeg;
 	struct amdgpu_irq_src	irq;
 	struct amdgpu_vcn_reg	external;
+	struct amdgpu_bo	*dpg_sram_bo;
+	void			*dpg_sram_cpu_addr;
+	uint64_t		dpg_sram_gpu_addr;
+	uint32_t		*dpg_sram_curr_addr;
 };
 
 struct amdgpu_vcn {
@@ -182,18 +193,18 @@ struct amdgpu_vcn {
 	struct dpg_pause_state pause_state;
 
 	bool			indirect_sram;
-	struct amdgpu_bo	*dpg_sram_bo;
-	void			*dpg_sram_cpu_addr;
-	uint64_t		dpg_sram_gpu_addr;
-	uint32_t		*dpg_sram_curr_addr;
 
 	uint8_t	num_vcn_inst;
-	struct amdgpu_vcn_inst	inst[AMDGPU_MAX_VCN_INSTANCES];
-	struct amdgpu_vcn_reg	internal;
+	struct amdgpu_vcn_inst	 inst[AMDGPU_MAX_VCN_INSTANCES];
+	struct amdgpu_vcn_reg	 internal;
+	struct drm_gpu_scheduler *vcn_enc_sched[AMDGPU_MAX_VCN_ENC_RINGS];
+	struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
+	uint32_t		 num_vcn_enc_sched;
+	uint32_t		 num_vcn_dec_sched;
 
 	unsigned	harvest_config;
 	int (*pause_dpg_mode)(struct amdgpu_device *adev,
-		struct dpg_pause_state *new_state);
+		int inst_idx, struct dpg_pause_state *new_state);
 };
 
 int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
@@ -209,7 +220,4 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
 
-int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring);
-int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout);
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index e32ae906d797..103033f96f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -379,54 +379,3 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
 		}
 	}
 }
-
-static uint32_t parse_clk(char *buf, bool min)
-{
-        char *ptr = buf;
-        uint32_t clk = 0;
-
-        do {
-                ptr = strchr(ptr, ':');
-                if (!ptr)
-                        break;
-                ptr+=2;
-		if (kstrtou32(ptr, 10, &clk))
-			return 0;
-        } while (!min);
-
-        return clk * 100;
-}
-
-uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest)
-{
-	char *buf = NULL;
-	uint32_t clk = 0;
-
-	buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf);
-	clk = parse_clk(buf, lowest);
-
-	kfree(buf);
-
-	return clk;
-}
-
-uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)
-{
-	char *buf = NULL;
-	uint32_t clk = 0;
-
-	buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf);
-	clk = parse_clk(buf, lowest);
-
-	kfree(buf);
-
-	return clk;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b0b2bdc750df..4d1ac7612967 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -57,8 +57,6 @@ struct amdgpu_virt_ops {
 	int (*reset_gpu)(struct amdgpu_device *adev);
 	int (*wait_reset)(struct amdgpu_device *adev);
 	void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
-	int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
-	int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
 };
 
 /*
@@ -85,8 +83,8 @@ enum AMDGIM_FEATURE_FLAG {
 	AMDGIM_FEATURE_GIM_LOAD_UCODES   = 0x2,
 	/* VRAM LOST by GIM */
 	AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
-	/* HW PERF SIM in GIM */
-	AMDGIM_FEATURE_HW_PERF_SIMULATION = (1 << 3),
+	/* PP ONE VF MODE in GIM */
+	AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
 };
 
 struct amd_sriov_msg_pf2vf_info_header {
@@ -257,8 +255,6 @@ struct amdgpu_virt {
 	struct amdgpu_vf_error_buffer   vf_errors;
 	struct amdgpu_virt_fw_reserve	fw_reserve;
 	uint32_t gim_feature;
-	/* protect DPM events to GIM */
-	struct mutex                    dpm_mutex;
 	uint32_t reg_access_mode;
 };
 
@@ -286,8 +282,8 @@ static inline bool is_virtual_machine(void)
 #endif
 }
 
-#define amdgim_is_hwperf(adev) \
-	((adev)->virt.gim_feature & AMDGIM_FEATURE_HW_PERF_SIMULATION)
+#define amdgpu_sriov_is_pp_one_vf(adev) \
+	((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
 
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
@@ -306,6 +302,4 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
 					unsigned int key,
 					unsigned int chksum);
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
-uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
-uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5251352f5922..d16231d6a790 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -83,6 +83,32 @@ struct amdgpu_prt_cb {
 };
 
 /**
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+	mutex_lock(&vm->eviction_lock);
+	vm->saved_flags = memalloc_nofs_save();
+}
+
+static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+	if (mutex_trylock(&vm->eviction_lock)) {
+		vm->saved_flags = memalloc_nofs_save();
+		return 1;
+	}
+	return 0;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+	memalloc_nofs_restore(vm->saved_flags);
+	mutex_unlock(&vm->eviction_lock);
+}
+
+/**
  * amdgpu_vm_level_shift - return the addr shift for each level
  *
  * @adev: amdgpu_device pointer
@@ -130,7 +156,8 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 
 	if (level == adev->vm_manager.root_level)
 		/* For the root directory */
-		return round_up(adev->vm_manager.max_pfn, 1ULL << shift) >> shift;
+		return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
+			>> shift;
 	else if (level != AMDGPU_VM_PTB)
 		/* Everything in between */
 		return 512;
@@ -341,7 +368,7 @@ static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt)
 	return container_of(parent->vm_bo, struct amdgpu_vm_pt, base);
 }
 
-/**
+/*
  * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
  */
 struct amdgpu_vm_pt_cursor {
@@ -482,6 +509,7 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
  *
  * @adev: amdgpu_device structure
  * @vm: amdgpu_vm structure
+ * @start: optional cursor to start with
  * @cursor: state to initialize
  *
  * Starts a deep first traversal of the PD/PT tree.
@@ -535,7 +563,7 @@ static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
 		amdgpu_vm_pt_ancestor(cursor);
 }
 
-/**
+/*
  * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
  */
 #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)		\
@@ -560,12 +588,20 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 {
 	entry->priority = 0;
 	entry->tv.bo = &vm->root.base.bo->tbo;
-	/* One for the VM updates, one for TTM and one for the CS job */
-	entry->tv.num_shared = 3;
+	/* One for TTM and one for the CS job */
+	entry->tv.num_shared = 2;
 	entry->user_pages = NULL;
 	list_add(&entry->tv.head, validated);
 }
 
+/**
+ * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag
+ *
+ * @bo: BO which was removed from the LRU
+ *
+ * Make sure the bulk_moveable flag is updated when a BO is removed from the
+ * LRU.
+ */
 void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
 {
 	struct amdgpu_bo *abo;
@@ -600,19 +636,18 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm)
 {
-	struct ttm_bo_global *glob = adev->mman.bdev.glob;
 	struct amdgpu_vm_bo_base *bo_base;
 
 	if (vm->bulk_moveable) {
-		spin_lock(&glob->lru_lock);
+		spin_lock(&ttm_bo_glob.lru_lock);
 		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
-		spin_unlock(&glob->lru_lock);
+		spin_unlock(&ttm_bo_glob.lru_lock);
 		return;
 	}
 
 	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&ttm_bo_glob.lru_lock);
 	list_for_each_entry(bo_base, &vm->idle, vm_status) {
 		struct amdgpu_bo *bo = bo_base->bo;
 
@@ -624,7 +659,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 			ttm_bo_move_to_lru_tail(&bo->shadow->tbo,
 						&vm->lru_bulk_move);
 	}
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&ttm_bo_glob.lru_lock);
 
 	vm->bulk_moveable = true;
 }
@@ -647,7 +682,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      void *param)
 {
 	struct amdgpu_vm_bo_base *bo_base, *tmp;
-	int r = 0;
+	int r;
 
 	vm->bulk_moveable &= list_empty(&vm->evicted);
 
@@ -656,7 +691,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 		r = validate(param, bo);
 		if (r)
-			break;
+			return r;
 
 		if (bo->tbo.type != ttm_bo_type_kernel) {
 			amdgpu_vm_bo_moved(bo_base);
@@ -669,7 +704,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		}
 	}
 
-	return r;
+	amdgpu_vm_eviction_lock(vm);
+	vm->evicting = false;
+	amdgpu_vm_eviction_unlock(vm);
+
+	return 0;
 }
 
 /**
@@ -693,6 +732,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  * @adev: amdgpu_device pointer
  * @vm: VM to clear BO from
  * @bo: BO to clear
+ * @direct: use a direct update
  *
  * Root PD needs to be reserved when calling this.
  *
@@ -701,7 +741,8 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  */
 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 			      struct amdgpu_vm *vm,
-			      struct amdgpu_bo *bo)
+			      struct amdgpu_bo *bo,
+			      bool direct)
 {
 	struct ttm_operation_ctx ctx = { true, false };
 	unsigned level = adev->vm_manager.root_level;
@@ -760,6 +801,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
+	params.direct = direct;
 
 	r = vm->update_funcs->prepare(&params, AMDGPU_FENCE_OWNER_KFD, NULL);
 	if (r)
@@ -813,10 +855,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
  *
  * @adev: amdgpu_device pointer
  * @vm: requesting vm
+ * @level: the page table level
+ * @direct: use a direct update
  * @bp: resulting BO allocation parameters
  */
 static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			       int level, struct amdgpu_bo_param *bp)
+			       int level, bool direct,
+			       struct amdgpu_bo_param *bp)
 {
 	memset(bp, 0, sizeof(*bp));
 
@@ -831,6 +876,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	else if (!vm->root.base.bo || vm->root.base.bo->shadow)
 		bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
 	bp->type = ttm_bo_type_kernel;
+	bp->no_wait_gpu = direct;
 	if (vm->root.base.bo)
 		bp->resv = vm->root.base.bo->tbo.base.resv;
 }
@@ -841,6 +887,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  * @adev: amdgpu_device pointer
  * @vm: VM to allocate page tables for
  * @cursor: Which page table to allocate
+ * @direct: use a direct update
  *
  * Make sure a specific page table or directory is allocated.
  *
@@ -850,7 +897,8 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  */
 static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 			       struct amdgpu_vm *vm,
-			       struct amdgpu_vm_pt_cursor *cursor)
+			       struct amdgpu_vm_pt_cursor *cursor,
+			       bool direct)
 {
 	struct amdgpu_vm_pt *entry = cursor->entry;
 	struct amdgpu_bo_param bp;
@@ -871,7 +919,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	if (entry->base.bo)
 		return 0;
 
-	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
+	amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp);
 
 	r = amdgpu_bo_create(adev, &bp, &pt);
 	if (r)
@@ -883,7 +931,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
 	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
 
-	r = amdgpu_vm_clear_bo(adev, vm, pt);
+	r = amdgpu_vm_clear_bo(adev, vm, pt, direct);
 	if (r)
 		goto error_free_pt;
 
@@ -1020,7 +1068,8 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
  * Returns:
  * 0 on success, errno otherwise.
  */
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
+		    bool need_pipe_sync)
 {
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
@@ -1034,10 +1083,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 		id->oa_base != job->oa_base ||
 		id->oa_size != job->oa_size);
 	bool vm_flush_needed = job->vm_needs_flush;
-	bool pasid_mapping_needed = id->pasid != job->pasid ||
-		!id->pasid_mapping ||
-		!dma_fence_is_signaled(id->pasid_mapping);
 	struct dma_fence *fence = NULL;
+	bool pasid_mapping_needed = false;
 	unsigned patch_offset = 0;
 	int r;
 
@@ -1047,6 +1094,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 		pasid_mapping_needed = true;
 	}
 
+	mutex_lock(&id_mgr->lock);
+	if (id->pasid != job->pasid || !id->pasid_mapping ||
+	    !dma_fence_is_signaled(id->pasid_mapping))
+		pasid_mapping_needed = true;
+	mutex_unlock(&id_mgr->lock);
+
 	gds_switch_needed &= !!ring->funcs->emit_gds_switch;
 	vm_flush_needed &= !!ring->funcs->emit_vm_flush  &&
 			job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
@@ -1086,9 +1139,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 	}
 
 	if (pasid_mapping_needed) {
+		mutex_lock(&id_mgr->lock);
 		id->pasid = job->pasid;
 		dma_fence_put(id->pasid_mapping);
 		id->pasid_mapping = dma_fence_get(fence);
+		mutex_unlock(&id_mgr->lock);
 	}
 	dma_fence_put(fence);
 
@@ -1172,10 +1227,10 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 	return result;
 }
 
-/*
+/**
  * amdgpu_vm_update_pde - update a single level in the hierarchy
  *
- * @param: parameters for the update
+ * @params: parameters for the update
  * @vm: requested vm
  * @entry: entry to update
  *
@@ -1199,7 +1254,7 @@ static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params,
 	return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags);
 }
 
-/*
+/**
  * amdgpu_vm_invalidate_pds - mark all PDs as invalid
  *
  * @adev: amdgpu_device pointer
@@ -1218,19 +1273,20 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
 			amdgpu_vm_bo_relocated(&entry->base);
 }
 
-/*
- * amdgpu_vm_update_directories - make sure that all directories are valid
+/**
+ * amdgpu_vm_update_pdes - make sure that all directories are valid
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @direct: submit directly to the paging queue
  *
  * Makes sure all directories are up to date.
  *
  * Returns:
  * 0 for success, error for failure.
  */
-int amdgpu_vm_update_directories(struct amdgpu_device *adev,
-				 struct amdgpu_vm *vm)
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+			  struct amdgpu_vm *vm, bool direct)
 {
 	struct amdgpu_vm_update_params params;
 	int r;
@@ -1241,6 +1297,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
+	params.direct = direct;
 
 	r = vm->update_funcs->prepare(&params, AMDGPU_FENCE_OWNER_VM, NULL);
 	if (r)
@@ -1268,7 +1325,7 @@ error:
 	return r;
 }
 
-/**
+/*
  * amdgpu_vm_update_flags - figure out flags for PTE updates
  *
  * Make sure to set the right flags for the PTEs at the desired level.
@@ -1391,7 +1448,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
 		uint64_t incr, entry_end, pe_start;
 		struct amdgpu_bo *pt;
 
-		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
+		/* make sure that the page tables covering the address range are
+		 * actually allocated
+		 */
+		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor,
+					params->direct);
 		if (r)
 			return r;
 
@@ -1463,7 +1524,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
 		} while (frag_start < entry_end);
 
 		if (amdgpu_vm_pt_descendant(adev, &cursor)) {
-			/* Free all child entries */
+			/* Free all child entries.
+			 * Update the tables with the flags and addresses and free up subsequent
+			 * tables in the case of huge pages or freed up areas.
+			 * This is the maximum you can free, because all other page tables are not
+			 * completely covered by the range and so potentially still in use.
+			 */
 			while (cursor.pfn < frag_start) {
 				amdgpu_vm_free_pts(adev, params->vm, &cursor);
 				amdgpu_vm_pt_next(adev, &cursor);
@@ -1482,13 +1548,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
  *
  * @adev: amdgpu_device pointer
- * @exclusive: fence we need to sync to
- * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
+ * @direct: direct submission in a page fault
+ * @exclusive: fence we need to sync to
  * @start: start of mapped range
  * @last: last mapped entry
  * @flags: flags for the entries
  * @addr: addr to set the area to
+ * @pages_addr: DMA addresses to use for mapping
  * @fence: optional resulting fence
  *
  * Fill in the page table entries between @start and @last.
@@ -1497,11 +1564,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  * 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+				       struct amdgpu_vm *vm, bool direct,
 				       struct dma_fence *exclusive,
-				       dma_addr_t *pages_addr,
-				       struct amdgpu_vm *vm,
 				       uint64_t start, uint64_t last,
 				       uint64_t flags, uint64_t addr,
+				       dma_addr_t *pages_addr,
 				       struct dma_fence **fence)
 {
 	struct amdgpu_vm_update_params params;
@@ -1511,21 +1578,32 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
+	params.direct = direct;
 	params.pages_addr = pages_addr;
 
 	/* sync to everything except eviction fences on unmapping */
 	if (!(flags & AMDGPU_PTE_VALID))
 		owner = AMDGPU_FENCE_OWNER_KFD;
 
+	amdgpu_vm_eviction_lock(vm);
+	if (vm->evicting) {
+		r = -EBUSY;
+		goto error_unlock;
+	}
+
 	r = vm->update_funcs->prepare(&params, owner, exclusive);
 	if (r)
-		return r;
+		goto error_unlock;
 
 	r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
 	if (r)
-		return r;
+		goto error_unlock;
 
-	return vm->update_funcs->commit(&params, fence);
+	r = vm->update_funcs->commit(&params, fence);
+
+error_unlock:
+	amdgpu_vm_eviction_unlock(vm);
+	return r;
 }
 
 /**
@@ -1569,27 +1647,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 	if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
 		flags &= ~AMDGPU_PTE_WRITEABLE;
 
-	flags &= ~AMDGPU_PTE_EXECUTABLE;
-	flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
-
-	if (adev->asic_type >= CHIP_NAVI10) {
-		flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
-		flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
-	} else {
-		flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
-		flags |= (mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK);
-	}
-
-	if ((mapping->flags & AMDGPU_PTE_PRT) &&
-	    (adev->asic_type >= CHIP_VEGA10)) {
-		flags |= AMDGPU_PTE_PRT;
-		if (adev->asic_type >= CHIP_NAVI10) {
-			flags |= AMDGPU_PTE_SNOOPED;
-			flags |= AMDGPU_PTE_LOG;
-			flags |= AMDGPU_PTE_SYSTEM;
-		}
-		flags &= ~AMDGPU_PTE_VALID;
-	}
+	/* Apply ASIC specific mapping flags */
+	amdgpu_gmc_get_vm_pte(adev, mapping, &flags);
 
 	trace_amdgpu_vm_bo_update(mapping);
 
@@ -1633,7 +1692,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				dma_addr = pages_addr;
 			} else {
 				addr = pages_addr[pfn];
-				max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+				max_entries = count *
+					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 			}
 
 		} else if (flags & AMDGPU_PTE_VALID) {
@@ -1642,9 +1702,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		}
 
 		last = min((uint64_t)mapping->last, start + max_entries - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, vm, false, exclusive,
 						start, last, flags, addr,
-						fence);
+						dma_addr, fence);
 		if (r)
 			return r;
 
@@ -1672,8 +1732,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
  * Returns:
  * 0 for success, -EINVAL for failure.
  */
-int amdgpu_vm_bo_update(struct amdgpu_device *adev,
-			struct amdgpu_bo_va *bo_va,
+int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 			bool clear)
 {
 	struct amdgpu_bo *bo = bo_va->base.bo;
@@ -1700,7 +1759,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
 			pages_addr = ttm->dma_address;
 		}
-		exclusive = dma_resv_get_excl(bo->tbo.base.resv);
+		exclusive = bo->tbo.moving;
 	}
 
 	if (bo) {
@@ -1731,12 +1790,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			return r;
 	}
 
-	if (vm->use_cpu_for_update) {
-		/* Flush HDP */
-		mb();
-		amdgpu_asic_flush_hdp(adev, NULL);
-	}
-
 	/* If the BO is not in its preferred location add it back to
 	 * the evicted list so that it gets validated again on the
 	 * next command submission.
@@ -1744,7 +1797,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) {
 		uint32_t mem_type = bo->tbo.mem.mem_type;
 
-		if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
+		if (!(bo->preferred_domains &
+		      amdgpu_mem_type_to_domain(mem_type)))
 			amdgpu_vm_bo_evicted(&bo_va->base);
 		else
 			amdgpu_vm_bo_idle(&bo_va->base);
@@ -1938,9 +1992,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		    mapping->start < AMDGPU_GMC_HOLE_START)
 			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
-		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, vm, false, NULL,
 						mapping->start, mapping->last,
-						init_pte_value, 0, &f);
+						init_pte_value, 0, NULL, &f);
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
 		if (r) {
 			dma_fence_put(f);
@@ -2486,6 +2540,41 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 }
 
 /**
+ * amdgpu_vm_evictable - check if we can evict a VM
+ *
+ * @bo: A page table of the VM.
+ *
+ * Check if it is possible to evict a VM.
+ */
+bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
+{
+	struct amdgpu_vm_bo_base *bo_base = bo->vm_bo;
+
+	/* Page tables of a destroyed VM can go away immediately */
+	if (!bo_base || !bo_base->vm)
+		return true;
+
+	/* Don't evict VM page tables while they are busy */
+	if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
+		return false;
+
+	/* Try to block ongoing updates */
+	if (!amdgpu_vm_eviction_trylock(bo_base->vm))
+		return false;
+
+	/* Don't evict VM page tables while they are updated */
+	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
+	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
+		amdgpu_vm_eviction_unlock(bo_base->vm);
+		return false;
+	}
+
+	bo_base->vm->evicting = true;
+	amdgpu_vm_eviction_unlock(bo_base->vm);
+	return true;
+}
+
+/**
  * amdgpu_vm_bo_invalidate - mark the bo as invalid
  *
  * @adev: amdgpu_device pointer
@@ -2647,8 +2736,16 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
  */
 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
 {
-	return dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv,
-						   true, true, timeout);
+	timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv,
+					    true, true, timeout);
+	if (timeout <= 0)
+		return timeout;
+
+	timeout = dma_fence_wait_timeout(vm->last_direct, true, timeout);
+	if (timeout <= 0)
+		return timeout;
+
+	return dma_fence_wait_timeout(vm->last_delayed, true, timeout);
 }
 
 /**
@@ -2682,13 +2779,22 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	spin_lock_init(&vm->invalidated_lock);
 	INIT_LIST_HEAD(&vm->freed);
 
-	/* create scheduler entity for page table updates */
-	r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs,
-				  adev->vm_manager.vm_pte_num_rqs, NULL);
+
+	/* create scheduler entities for page table updates */
+	r = drm_sched_entity_init(&vm->direct, DRM_SCHED_PRIORITY_NORMAL,
+				  adev->vm_manager.vm_pte_scheds,
+				  adev->vm_manager.vm_pte_num_scheds, NULL);
 	if (r)
 		return r;
 
+	r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
+				  adev->vm_manager.vm_pte_scheds,
+				  adev->vm_manager.vm_pte_num_scheds, NULL);
+	if (r)
+		goto error_free_direct;
+
 	vm->pte_support_ats = false;
+	vm->is_compute_context = false;
 
 	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
@@ -2702,7 +2808,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	}
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
-	WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+	WARN_ONCE((vm->use_cpu_for_update &&
+		   !amdgpu_gmc_vram_full_visible(&adev->gmc)),
 		  "CPU update of VM recommended only for large BAR system\n");
 
 	if (vm->use_cpu_for_update)
@@ -2710,13 +2817,18 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	else
 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
 	vm->last_update = NULL;
+	vm->last_direct = dma_fence_get_stub();
+	vm->last_delayed = dma_fence_get_stub();
+
+	mutex_init(&vm->eviction_lock);
+	vm->evicting = false;
 
-	amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp);
+	amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
 	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
 		bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
 	r = amdgpu_bo_create(adev, &bp, &root);
 	if (r)
-		goto error_free_sched_entity;
+		goto error_free_delayed;
 
 	r = amdgpu_bo_reserve(root, true);
 	if (r)
@@ -2728,7 +2840,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
 
-	r = amdgpu_vm_clear_bo(adev, vm, root);
+	r = amdgpu_vm_clear_bo(adev, vm, root, false);
 	if (r)
 		goto error_unreserve;
 
@@ -2759,8 +2871,13 @@ error_free_root:
 	amdgpu_bo_unref(&vm->root.base.bo);
 	vm->root.base.bo = NULL;
 
-error_free_sched_entity:
-	drm_sched_entity_destroy(&vm->entity);
+error_free_delayed:
+	dma_fence_put(vm->last_direct);
+	dma_fence_put(vm->last_delayed);
+	drm_sched_entity_destroy(&vm->delayed);
+
+error_free_direct:
+	drm_sched_entity_destroy(&vm->direct);
 
 	return r;
 }
@@ -2801,6 +2918,7 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @pasid: pasid to use
  *
  * This only works on GFX VMs that don't have any BOs added and no
  * page tables allocated yet.
@@ -2816,7 +2934,8 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
  * Returns:
  * 0 for success, -errno for errors.
  */
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid)
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			   unsigned int pasid)
 {
 	bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
 	int r;
@@ -2848,7 +2967,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 	 */
 	if (pte_support_ats != vm->pte_support_ats) {
 		vm->pte_support_ats = pte_support_ats;
-		r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo);
+		r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false);
 		if (r)
 			goto free_idr;
 	}
@@ -2858,7 +2977,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
-	WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+	WARN_ONCE((vm->use_cpu_for_update &&
+		   !amdgpu_gmc_vram_full_visible(&adev->gmc)),
 		  "CPU update of VM recommended only for large BAR system\n");
 
 	if (vm->use_cpu_for_update)
@@ -2867,6 +2987,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
 	dma_fence_put(vm->last_update);
 	vm->last_update = NULL;
+	vm->is_compute_context = true;
 
 	if (vm->pasid) {
 		unsigned long flags;
@@ -2921,6 +3042,7 @@ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 	}
 	vm->pasid = 0;
+	vm->is_compute_context = false;
 }
 
 /**
@@ -2937,31 +3059,26 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
 	struct amdgpu_bo *root;
-	int i, r;
+	int i;
 
 	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
 
+	root = amdgpu_bo_ref(vm->root.base.bo);
+	amdgpu_bo_reserve(root, true);
 	if (vm->pasid) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
 		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
 		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+		vm->pasid = 0;
 	}
 
-	drm_sched_entity_destroy(&vm->entity);
+	dma_fence_wait(vm->last_direct, false);
+	dma_fence_put(vm->last_direct);
+	dma_fence_wait(vm->last_delayed, false);
+	dma_fence_put(vm->last_delayed);
 
-	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
-		dev_err(adev->dev, "still active bo inside vm\n");
-	}
-	rbtree_postorder_for_each_entry_safe(mapping, tmp,
-					     &vm->va.rb_root, rb) {
-		/* Don't remove the mapping here, we don't want to trigger a
-		 * rebalance and the tree is about to be destroyed anyway.
-		 */
-		list_del(&mapping->list);
-		kfree(mapping);
-	}
 	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
 		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
 			amdgpu_vm_prt_fini(adev, vm);
@@ -2972,16 +3089,26 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
 	}
 
-	root = amdgpu_bo_ref(vm->root.base.bo);
-	r = amdgpu_bo_reserve(root, true);
-	if (r) {
-		dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
-	} else {
-		amdgpu_vm_free_pts(adev, vm, NULL);
-		amdgpu_bo_unreserve(root);
-	}
+	amdgpu_vm_free_pts(adev, vm, NULL);
+	amdgpu_bo_unreserve(root);
 	amdgpu_bo_unref(&root);
 	WARN_ON(vm->root.base.bo);
+
+	drm_sched_entity_destroy(&vm->direct);
+	drm_sched_entity_destroy(&vm->delayed);
+
+	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
+		dev_err(adev->dev, "still active bo inside vm\n");
+	}
+	rbtree_postorder_for_each_entry_safe(mapping, tmp,
+					     &vm->va.rb_root, rb) {
+		/* Don't remove the mapping here, we don't want to trigger a
+		 * rebalance and the tree is about to be destroyed anyway.
+		 */
+		list_del(&mapping->list);
+		kfree(mapping);
+	}
+
 	dma_fence_put(vm->last_update);
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		amdgpu_vmid_free_reserved(adev, vm, i);
@@ -3065,8 +3192,9 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 	switch (args->in.op) {
 	case AMDGPU_VM_OP_RESERVE_VMID:
-		/* current, we only have requirement to reserve vmid from gfxhub */
-		r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
+		/* We only have requirement to reserve vmid from gfxhub */
+		r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
+					       AMDGPU_GFXHUB_0);
 		if (r)
 			return r;
 		break;
@@ -3109,13 +3237,97 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
  */
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
 {
-	if (!vm->task_info.pid) {
-		vm->task_info.pid = current->pid;
-		get_task_comm(vm->task_info.task_name, current);
+	if (vm->task_info.pid)
+		return;
 
-		if (current->group_leader->mm == current->mm) {
-			vm->task_info.tgid = current->group_leader->pid;
-			get_task_comm(vm->task_info.process_name, current->group_leader);
-		}
+	vm->task_info.pid = current->pid;
+	get_task_comm(vm->task_info.task_name, current);
+
+	if (current->group_leader->mm != current->mm)
+		return;
+
+	vm->task_info.tgid = current->group_leader->pid;
+	get_task_comm(vm->task_info.process_name, current->group_leader);
+}
+
+/**
+ * amdgpu_vm_handle_fault - graceful handling of VM faults.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ *
+ * Try to gracefully handle a VM fault. Return true if the fault was handled and
+ * shouldn't be reported any more.
+ */
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+			    uint64_t addr)
+{
+	struct amdgpu_bo *root;
+	uint64_t value, flags;
+	struct amdgpu_vm *vm;
+	long r;
+
+	spin_lock(&adev->vm_manager.pasid_lock);
+	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+	if (vm)
+		root = amdgpu_bo_ref(vm->root.base.bo);
+	else
+		root = NULL;
+	spin_unlock(&adev->vm_manager.pasid_lock);
+
+	if (!root)
+		return false;
+
+	r = amdgpu_bo_reserve(root, true);
+	if (r)
+		goto error_unref;
+
+	/* Double check that the VM still exists */
+	spin_lock(&adev->vm_manager.pasid_lock);
+	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+	if (vm && vm->root.base.bo != root)
+		vm = NULL;
+	spin_unlock(&adev->vm_manager.pasid_lock);
+	if (!vm)
+		goto error_unlock;
+
+	addr /= AMDGPU_GPU_PAGE_SIZE;
+	flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
+		AMDGPU_PTE_SYSTEM;
+
+	if (vm->is_compute_context) {
+		/* Intentionally setting invalid PTE flag
+		 * combination to force a no-retry-fault
+		 */
+		flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
+			AMDGPU_PTE_TF;
+		value = 0;
+
+	} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+		/* Redirect the access to the dummy page */
+		value = adev->dummy_page_addr;
+		flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
+			AMDGPU_PTE_WRITEABLE;
+
+	} else {
+		/* Let the hw retry silently on the PTE */
+		value = 0;
 	}
+
+	r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
+					flags, value, NULL, NULL);
+	if (r)
+		goto error_unlock;
+
+	r = amdgpu_vm_update_pdes(adev, vm, true);
+
+error_unlock:
+	amdgpu_bo_unreserve(root);
+	if (r < 0)
+		DRM_ERROR("Can't handle page fault (%ld)\n", r);
+
+error_unref:
+	amdgpu_bo_unref(&root);
+
+	return false;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 2eda3a8c330d..b4640ab38c95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -30,6 +30,7 @@
 #include <drm/gpu_scheduler.h>
 #include <drm/drm_file.h>
 #include <drm/ttm/ttm_bo_driver.h>
+#include <linux/sched/mm.h>
 
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
@@ -99,6 +100,9 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_VM_FAULT_STOP_FIRST	1
 #define AMDGPU_VM_FAULT_STOP_ALWAYS	2
 
+/* Reserve 4MB VRAM for page tables */
+#define AMDGPU_VM_RESERVED_VRAM		(4ULL << 20)
+
 /* max number of VMHUB */
 #define AMDGPU_MAX_VMHUBS			3
 #define AMDGPU_GFXHUB_0				0
@@ -199,6 +203,11 @@ struct amdgpu_vm_update_params {
 	struct amdgpu_vm *vm;
 
 	/**
+	 * @direct: if changes should be made directly
+	 */
+	bool direct;
+
+	/**
 	 * @pages_addr:
 	 *
 	 * DMA addresses to use for mapping
@@ -231,6 +240,13 @@ struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	struct rb_root_cached	va;
 
+	/* Lock to prevent eviction while we are updating page tables
+	 * use vm_eviction_lock/unlock(vm)
+	 */
+	struct mutex		eviction_lock;
+	bool			evicting;
+	unsigned int		saved_flags;
+
 	/* BOs who needs a validation */
 	struct list_head	evicted;
 
@@ -254,8 +270,13 @@ struct amdgpu_vm {
 	struct amdgpu_vm_pt     root;
 	struct dma_fence	*last_update;
 
-	/* Scheduler entity for page table updates */
-	struct drm_sched_entity	entity;
+	/* Scheduler entities for page table updates */
+	struct drm_sched_entity	direct;
+	struct drm_sched_entity	delayed;
+
+	/* Last submission to the scheduler entities */
+	struct dma_fence	*last_direct;
+	struct dma_fence	*last_delayed;
 
 	unsigned int		pasid;
 	/* dedicated to vm */
@@ -289,6 +310,8 @@ struct amdgpu_vm {
 	struct ttm_lru_bulk_move lru_bulk_move;
 	/* mark whether can do the bulk move */
 	bool			bulk_moveable;
+	/* Flag to indicate if VM is used for compute */
+	bool			is_compute_context;
 };
 
 struct amdgpu_vm_manager {
@@ -308,8 +331,8 @@ struct amdgpu_vm_manager {
 	u64					vram_base_offset;
 	/* vm pte handling */
 	const struct amdgpu_vm_pte_funcs	*vm_pte_funcs;
-	struct drm_sched_rq			*vm_pte_rqs[AMDGPU_MAX_RINGS];
-	unsigned				vm_pte_num_rqs;
+	struct drm_gpu_scheduler		*vm_pte_scheds[AMDGPU_MAX_RINGS];
+	unsigned				vm_pte_num_scheds;
 	struct amdgpu_ring			*page_fault;
 
 	/* partial resident texture handling */
@@ -357,8 +380,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*callback)(void *p, struct amdgpu_bo *bo),
 			      void *param);
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
-int amdgpu_vm_update_directories(struct amdgpu_device *adev,
-				 struct amdgpu_vm *vm);
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+			  struct amdgpu_vm *vm, bool direct);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
@@ -367,6 +390,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
+bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 			     struct amdgpu_bo *bo, bool evicted);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
@@ -404,6 +428,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
 
 void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
 			     struct amdgpu_task_info *task_info);
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+			    uint64_t addr);
 
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index 5222d165abfc..73fec7a0ced5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -49,13 +49,6 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner,
 {
 	int r;
 
-	/* Wait for PT BOs to be idle. PTs share the same resv. object
-	 * as the root PD BO
-	 */
-	r = amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true);
-	if (unlikely(r))
-		return r;
-
 	/* Wait for any BO move to be completed */
 	if (exclusive) {
 		r = dma_fence_wait(exclusive, true);
@@ -63,7 +56,14 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner,
 			return r;
 	}
 
-	return 0;
+	/* Don't wait for submissions during page fault */
+	if (p->direct)
+		return 0;
+
+	/* Wait for PT BOs to be idle. PTs share the same resv. object
+	 * as the root PD BO
+	 */
+	return amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true);
 }
 
 /**
@@ -89,7 +89,7 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
 
 	pe += (unsigned long)amdgpu_bo_kptr(bo);
 
-	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
+	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct);
 
 	for (i = 0; i < count; i++) {
 		value = p->pages_addr ?
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 61fc584cbb1a..19b7f80758f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -68,17 +68,19 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
 	if (r)
 		return r;
 
-	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
-	if (r)
-		return r;
+	p->num_dw_left = ndw;
 
-	r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv,
-			     owner, false);
+	/* Wait for moves to be completed */
+	r = amdgpu_sync_fence(&p->job->sync, exclusive, false);
 	if (r)
 		return r;
 
-	p->num_dw_left = ndw;
-	return 0;
+	/* Don't wait for any submissions during page fault handling */
+	if (p->direct)
+		return 0;
+
+	return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv,
+				owner, false);
 }
 
 /**
@@ -93,24 +95,30 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
 static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 				 struct dma_fence **fence)
 {
-	struct amdgpu_bo *root = p->vm->root.base.bo;
 	struct amdgpu_ib *ib = p->job->ibs;
+	struct drm_sched_entity *entity;
+	struct dma_fence *f, *tmp;
 	struct amdgpu_ring *ring;
-	struct dma_fence *f;
 	int r;
 
-	ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
+	entity = p->direct ? &p->vm->direct : &p->vm->delayed;
+	ring = container_of(entity->rq->sched, struct amdgpu_ring, sched);
 
 	WARN_ON(ib->length_dw == 0);
 	amdgpu_ring_pad_ib(ring, ib);
 	WARN_ON(ib->length_dw > p->num_dw_left);
-	r = amdgpu_job_submit(p->job, &p->vm->entity,
-			      AMDGPU_FENCE_OWNER_VM, &f);
+	r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
 	if (r)
 		goto error;
 
-	amdgpu_bo_fence(root, f, true);
-	if (fence)
+	tmp = dma_fence_get(f);
+	if (p->direct)
+		swap(p->vm->last_direct, tmp);
+	else
+		swap(p->vm->last_delayed, tmp);
+	dma_fence_put(tmp);
+
+	if (fence && !p->direct)
 		swap(*fence, f);
 	dma_fence_put(f);
 	return 0;
@@ -120,7 +128,6 @@ error:
 	return r;
 }
 
-
 /**
  * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
  *
@@ -141,7 +148,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
 	src += p->num_dw_left * 4;
 
 	pe += amdgpu_bo_gpu_offset(bo);
-	trace_amdgpu_vm_copy_ptes(pe, src, count);
+	trace_amdgpu_vm_copy_ptes(pe, src, count, p->direct);
 
 	amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
 }
@@ -168,7 +175,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
 	struct amdgpu_ib *ib = p->job->ibs;
 
 	pe += amdgpu_bo_gpu_offset(bo);
-	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
+	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct);
 	if (count < 3) {
 		amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
 				    count, incr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 3a9d8c15fe9f..82a3299e53c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -23,6 +23,9 @@
  */
 
 #include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_atomfirmware.h"
+#include "atom.h"
 
 struct amdgpu_vram_mgr {
 	struct drm_mm mm;
@@ -101,6 +104,39 @@ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
 		amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]));
 }
 
+static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
+						 struct device_attribute *attr,
+						 char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+
+	switch (adev->gmc.vram_vendor) {
+	case SAMSUNG:
+		return snprintf(buf, PAGE_SIZE, "samsung\n");
+	case INFINEON:
+		return snprintf(buf, PAGE_SIZE, "infineon\n");
+	case ELPIDA:
+		return snprintf(buf, PAGE_SIZE, "elpida\n");
+	case ETRON:
+		return snprintf(buf, PAGE_SIZE, "etron\n");
+	case NANYA:
+		return snprintf(buf, PAGE_SIZE, "nanya\n");
+	case HYNIX:
+		return snprintf(buf, PAGE_SIZE, "hynix\n");
+	case MOSEL:
+		return snprintf(buf, PAGE_SIZE, "mosel\n");
+	case WINBOND:
+		return snprintf(buf, PAGE_SIZE, "winbond\n");
+	case ESMT:
+		return snprintf(buf, PAGE_SIZE, "esmt\n");
+	case MICRON:
+		return snprintf(buf, PAGE_SIZE, "micron\n");
+	default:
+		return snprintf(buf, PAGE_SIZE, "unknown\n");
+	}
+}
+
 static DEVICE_ATTR(mem_info_vram_total, S_IRUGO,
 		   amdgpu_mem_info_vram_total_show, NULL);
 static DEVICE_ATTR(mem_info_vis_vram_total, S_IRUGO,
@@ -109,6 +145,8 @@ static DEVICE_ATTR(mem_info_vram_used, S_IRUGO,
 		   amdgpu_mem_info_vram_used_show, NULL);
 static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO,
 		   amdgpu_mem_info_vis_vram_used_show, NULL);
+static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO,
+		   amdgpu_mem_info_vram_vendor, NULL);
 
 /**
  * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
@@ -154,6 +192,11 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man,
 		DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n");
 		return ret;
 	}
+	ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor);
+	if (ret) {
+		DRM_ERROR("Failed to create device file mem_info_vram_vendor\n");
+		return ret;
+	}
 
 	return 0;
 }
@@ -180,6 +223,7 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man)
 	device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_total);
 	device_remove_file(adev->dev, &dev_attr_mem_info_vram_used);
 	device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used);
+	device_remove_file(adev->dev, &dev_attr_mem_info_vram_vendor);
 	return 0;
 }
 
@@ -275,7 +319,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 	struct drm_mm_node *nodes;
 	enum drm_mm_insert_mode mode;
 	unsigned long lpfn, num_nodes, pages_per_node, pages_left;
-	uint64_t vis_usage = 0, mem_bytes;
+	uint64_t vis_usage = 0, mem_bytes, max_bytes;
 	unsigned i;
 	int r;
 
@@ -283,9 +327,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 	if (!lpfn)
 		lpfn = man->size;
 
+	max_bytes = adev->gmc.mc_vram_size;
+	if (tbo->type != ttm_bo_type_kernel)
+		max_bytes -= AMDGPU_VM_RESERVED_VRAM;
+
 	/* bail out quickly if there's likely not enough VRAM for this BO */
 	mem_bytes = (u64)mem->num_pages << PAGE_SHIFT;
-	if (atomic64_add_return(mem_bytes, &mgr->usage) > adev->gmc.mc_vram_size) {
+	if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) {
 		atomic64_sub(mem_bytes, &mgr->usage);
 		mem->mm_node = NULL;
 		return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 65aae75f80fd..a97af422575a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -25,6 +25,7 @@
 #include "amdgpu.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_smu.h"
+#include "amdgpu_ras.h"
 #include "df/df_3_6_offset.h"
 
 static DEFINE_MUTEX(xgmi_mutex);
@@ -145,16 +146,16 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,
 	ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200);
 	ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208);
 
-	fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in);
+	fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in);
 	if (fica_out != 0x1f)
 		pr_err("xGMI error counters not enabled!\n");
 
-	fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in);
+	fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in);
 
 	if ((fica_out & 0xffff) == 2)
 		error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63);
 
-	adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
+	adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", error_count);
 }
@@ -260,6 +261,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
 	INIT_LIST_HEAD(&tmp->device_list);
 	mutex_init(&tmp->hive_lock);
 	mutex_init(&tmp->reset_lock);
+	task_barrier_init(&tmp->tb);
 
 	if (lock)
 		mutex_lock(&tmp->hive_lock);
@@ -273,22 +275,49 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
 {
 	int ret = 0;
 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+	struct amdgpu_device *tmp_adev;
+	bool update_hive_pstate = true;
+	bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20;
 
 	if (!hive)
 		return 0;
 
-	if (hive->pstate == pstate)
-		return 0;
+	mutex_lock(&hive->hive_lock);
+
+	if (hive->pstate == pstate) {
+		adev->pstate = is_high_pstate ? pstate : adev->pstate;
+		goto out;
+	}
 
 	dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
 
-	if (is_support_sw_smu_xgmi(adev))
-		ret = smu_set_xgmi_pstate(&adev->smu, pstate);
-	if (ret)
+	ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate);
+	if (ret) {
 		dev_err(adev->dev,
 			"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
 			adev->gmc.xgmi.node_id,
 			adev->gmc.xgmi.hive_id, ret);
+		goto out;
+	}
+
+	/* Update device pstate */
+	adev->pstate = pstate;
+
+	/*
+	 * Update the hive pstate only all devices of the hive
+	 * are in the same pstate
+	 */
+	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+		if (tmp_adev->pstate != adev->pstate) {
+			update_hive_pstate = false;
+			break;
+		}
+	}
+	if (update_hive_pstate || is_high_pstate)
+		hive->pstate = pstate;
+
+out:
+	mutex_unlock(&hive->hive_lock);
 
 	return ret;
 }
@@ -363,6 +392,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 		goto exit;
 	}
 
+	/* Set default device pstate */
+	adev->pstate = -1;
+
 	top_info = &adev->psp.xgmi_context.top_info;
 
 	list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
@@ -371,6 +403,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 	top_info->num_nodes = count;
 	hive->number_devices = count;
 
+	task_barrier_add_task(&hive->tb);
+
 	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
 			/* update node list for other device in the hive */
@@ -433,7 +467,57 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
 		mutex_destroy(&hive->hive_lock);
 		mutex_destroy(&hive->reset_lock);
 	} else {
+		task_barrier_rem_task(&hive->tb);
 		amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
 		mutex_unlock(&hive->hive_lock);
 	}
 }
+
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+{
+	int r;
+	struct ras_ih_if ih_info = {
+		.cb = NULL,
+	};
+	struct ras_fs_if fs_info = {
+		.sysfs_name = "xgmi_wafl_err_count",
+		.debugfs_name = "xgmi_wafl_err_inject",
+	};
+
+	if (!adev->gmc.xgmi.supported ||
+	    adev->gmc.xgmi.num_physical_nodes == 0)
+		return 0;
+
+	if (!adev->gmc.xgmi.ras_if) {
+		adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+		if (!adev->gmc.xgmi.ras_if)
+			return -ENOMEM;
+		adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
+		adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+		adev->gmc.xgmi.ras_if->sub_block_index = 0;
+		strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl");
+	}
+	ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
+	r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
+				 &fs_info, &ih_info);
+	if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
+		kfree(adev->gmc.xgmi.ras_if);
+		adev->gmc.xgmi.ras_if = NULL;
+	}
+
+	return r;
+}
+
+void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
+{
+	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
+			adev->gmc.xgmi.ras_if) {
+		struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if;
+		struct ras_ih_if ih_info = {
+			.cb = NULL,
+		};
+
+		amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+		kfree(ras_if);
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index fbcee31788c4..74011fbc2251 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -22,6 +22,7 @@
 #ifndef __AMDGPU_XGMI_H__
 #define __AMDGPU_XGMI_H__
 
+#include <drm/task_barrier.h>
 #include "amdgpu_psp.h"
 
 struct amdgpu_hive_info {
@@ -33,6 +34,7 @@ struct amdgpu_hive_info {
 	struct device_attribute dev_attr;
 	struct amdgpu_device *adev;
 	int pstate; /*0 -- low , 1 -- high , -1 unknown*/
+	struct task_barrier tb;
 };
 
 struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);
@@ -42,6 +44,8 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
 int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
 int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
 		struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev);
 
 static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
 		struct amdgpu_device *bo_adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
index 4853899b1824..fda99c958c3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
@@ -24,7 +24,6 @@
 #include "soc15.h"
 
 #include "soc15_common.h"
-#include "soc15_hw_ip.h"
 #include "arct_ip_offset.h"
 
 int arct_reg_base_init(struct amdgpu_device *adev)
@@ -52,6 +51,8 @@ int arct_reg_base_init(struct amdgpu_device *adev)
 		adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i]));
 		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
 		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+		adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
+		adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i]));
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 6858cde9fc5d..ea702a64f807 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -361,7 +361,6 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
 			       struct drm_connector *connector)
 {
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-	struct amdgpu_connector_atom_dig *dig_connector;
 	int panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
 	u16 dp_bridge = amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector);
 	u8 tmp;
@@ -369,8 +368,6 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
 	if (!amdgpu_connector->con_priv)
 		return panel_mode;
 
-	dig_connector = amdgpu_connector->con_priv;
-
 	if (dp_bridge != ENCODER_OBJECT_ID_NONE) {
 		/* DP bridge chips */
 		if (drm_dp_dpcd_readb(&amdgpu_connector->ddc_bus->aux,
@@ -713,7 +710,6 @@ void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder,
 	struct drm_device *dev = encoder->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
-	struct amdgpu_encoder_atom_dig *dig;
 	struct amdgpu_connector *amdgpu_connector;
 	struct amdgpu_connector_atom_dig *dig_connector;
 	struct amdgpu_atombios_dp_link_train_info dp_info;
@@ -721,7 +717,6 @@ void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder,
 
 	if (!amdgpu_encoder->enc_priv)
 		return;
-	dig = amdgpu_encoder->enc_priv;
 
 	amdgpu_connector = to_amdgpu_connector(connector);
 	if (!amdgpu_connector->con_priv)
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index 980c363b1a0a..b4cc7c55fa16 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -76,11 +76,6 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan,
 		}
 		args.lpI2CDataOut = cpu_to_le16(out);
 	} else {
-		if (num > ATOM_MAX_HW_I2C_READ) {
-			DRM_ERROR("hw i2c: tried to read too many bytes (%d vs 255)\n", num);
-			r = -EINVAL;
-			goto done;
-		}
 		args.ucRegIndex = 0;
 		args.lpI2CDataOut = 0;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index b81bb414fcb3..006f21ef7ddf 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -966,6 +966,25 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
 
 static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
 	{mmGRBM_STATUS},
+	{mmGRBM_STATUS2},
+	{mmGRBM_STATUS_SE0},
+	{mmGRBM_STATUS_SE1},
+	{mmGRBM_STATUS_SE2},
+	{mmGRBM_STATUS_SE3},
+	{mmSRBM_STATUS},
+	{mmSRBM_STATUS2},
+	{mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET},
+	{mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET},
+	{mmCP_STAT},
+	{mmCP_STALLED_STAT1},
+	{mmCP_STALLED_STAT2},
+	{mmCP_STALLED_STAT3},
+	{mmCP_CPF_BUSY_STAT},
+	{mmCP_CPF_STALLED_STAT1},
+	{mmCP_CPF_STATUS},
+	{mmCP_CPC_BUSY_STAT},
+	{mmCP_CPC_STALLED_STAT1},
+	{mmCP_CPC_STATUS},
 	{mmGB_ADDR_CONFIG},
 	{mmMC_ARB_RAMCFG},
 	{mmGB_TILE_MODE0},
@@ -1270,15 +1289,15 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 }
 
 /**
- * cik_asic_reset - soft reset GPU
+ * cik_asic_pci_config_reset - soft reset GPU
  *
  * @adev: amdgpu_device pointer
  *
- * Look up which blocks are hung and attempt
- * to reset them.
+ * Use PCI Config method to reset the GPU.
+ *
  * Returns 0 for success.
  */
-static int cik_asic_reset(struct amdgpu_device *adev)
+static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
 {
 	int r;
 
@@ -1291,10 +1310,62 @@ static int cik_asic_reset(struct amdgpu_device *adev)
 	return r;
 }
 
+static bool cik_asic_supports_baco(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+		return amdgpu_dpm_is_baco_supported(adev);
+	default:
+		return false;
+	}
+}
+
 static enum amd_reset_method
 cik_asic_reset_method(struct amdgpu_device *adev)
 {
-	return AMD_RESET_METHOD_LEGACY;
+	bool baco_reset;
+
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+		/* disable baco reset until it works */
+		/* smu7_asic_get_baco_capability(adev, &baco_reset); */
+		baco_reset = false;
+		break;
+	default:
+		baco_reset = false;
+		break;
+	}
+
+	if (baco_reset)
+		return AMD_RESET_METHOD_BACO;
+	else
+		return AMD_RESET_METHOD_LEGACY;
+}
+
+/**
+ * cik_asic_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+static int cik_asic_reset(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+		if (!adev->in_suspend)
+			amdgpu_inc_vram_lost(adev);
+		r = amdgpu_dpm_baco_reset(adev);
+	} else {
+		r = cik_asic_pci_config_reset(adev);
+	}
+
+	return r;
 }
 
 static u32 cik_get_config_memsize(struct amdgpu_device *adev)
@@ -1384,7 +1455,6 @@ static int cik_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
 static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
 {
 	struct pci_dev *root = adev->pdev->bus->self;
-	int bridge_pos, gpu_pos;
 	u32 speed_cntl, current_data_rate;
 	int i;
 	u16 tmp16;
@@ -1419,12 +1489,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
 	}
 
-	bridge_pos = pci_pcie_cap(root);
-	if (!bridge_pos)
-		return;
-
-	gpu_pos = pci_pcie_cap(adev->pdev);
-	if (!gpu_pos)
+	if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
 		return;
 
 	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1434,14 +1499,17 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
 			u16 bridge_cfg2, gpu_cfg2;
 			u32 max_lw, current_lw, tmp;
 
-			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
-			pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+			pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+						  &bridge_cfg);
+			pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+						  &gpu_cfg);
 
 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
-			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+			pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
 
 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
-			pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+			pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+						   tmp16);
 
 			tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
 			max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
@@ -1465,15 +1533,23 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
 
 			for (i = 0; i < 10; i++) {
 				/* check status */
-				pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_DEVSTA,
+							  &tmp16);
 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
 					break;
 
-				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
-				pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+							  &bridge_cfg);
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_LNKCTL,
+							  &gpu_cfg);
 
-				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
-				pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+							  &bridge_cfg2);
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_LNKCTL2,
+							  &gpu_cfg2);
 
 				tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
 				tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1486,26 +1562,45 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
 				msleep(100);
 
 				/* linkctl */
-				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+							  &tmp16);
 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
-				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+				pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+							   tmp16);
 
-				pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_LNKCTL,
+							  &tmp16);
 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
-				pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+				pcie_capability_write_word(adev->pdev,
+							   PCI_EXP_LNKCTL,
+							   tmp16);
 
 				/* linkctl2 */
-				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
-				tmp16 &= ~((1 << 4) | (7 << 9));
-				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
-				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
-				pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
-				tmp16 &= ~((1 << 4) | (7 << 9));
-				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
-				pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+							  &tmp16);
+				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+					   PCI_EXP_LNKCTL2_TX_MARGIN);
+				tmp16 |= (bridge_cfg2 &
+					  (PCI_EXP_LNKCTL2_ENTER_COMP |
+					   PCI_EXP_LNKCTL2_TX_MARGIN));
+				pcie_capability_write_word(root,
+							   PCI_EXP_LNKCTL2,
+							   tmp16);
+
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_LNKCTL2,
+							  &tmp16);
+				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+					   PCI_EXP_LNKCTL2_TX_MARGIN);
+				tmp16 |= (gpu_cfg2 &
+					  (PCI_EXP_LNKCTL2_ENTER_COMP |
+					   PCI_EXP_LNKCTL2_TX_MARGIN));
+				pcie_capability_write_word(adev->pdev,
+							   PCI_EXP_LNKCTL2,
+							   tmp16);
 
 				tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
 				tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1520,15 +1615,16 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
 	speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
 	WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
 
-	pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
-	tmp16 &= ~0xf;
+	pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
 	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
-		tmp16 |= 3; /* gen3 */
+		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
 	else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
-		tmp16 |= 2; /* gen2 */
+		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
 	else
-		tmp16 |= 1; /* gen1 */
-	pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+	pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
 
 	speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
 	speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
@@ -1842,6 +1938,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.get_pcie_usage = &cik_get_pcie_usage,
 	.need_reset_on_init = &cik_need_reset_on_init,
 	.get_pcie_replay_count = &cik_get_pcie_replay_count,
+	.supports_baco = &cik_asic_supports_baco,
 };
 
 static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h
index 54c625a2e570..f91ab4c246b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik.h
@@ -31,4 +31,5 @@ void cik_srbm_select(struct amdgpu_device *adev,
 int cik_set_ip_blocks(struct amdgpu_device *adev);
 
 void legacy_doorbell_index_init(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c45304f1047c..580d3f93d670 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
 	u32 extra_bits = vmid & 0xf;
 
 	/* IB packet must end on a 8 DW boundary */
-	cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8);
+	cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
 
 	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
 	amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
@@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
 	u32 pad_count;
 	int i;
 
-	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+	pad_count = (-ib->length_dw) & 7;
 	for (i = 0; i < pad_count; i++)
 		if (sdma && sdma->burst_nop && (i == 0))
 			ib->ptr[ib->length_dw++] =
@@ -1372,16 +1372,14 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
 
 static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
-	struct drm_gpu_scheduler *sched;
 	unsigned i;
 
 	adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		sched = &adev->sdma.instance[i].ring.sched;
-		adev->vm_manager.vm_pte_rqs[i] =
-			&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+		adev->vm_manager.vm_pte_scheds[i] =
+			&adev->sdma.instance[i].ring.sched;
 	}
-	adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
 }
 
 const struct amdgpu_ip_block_version cik_sdma_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 645550e7caf5..40d2ac723dd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -330,9 +330,11 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	u32 tmp;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 
 		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -368,6 +370,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
 		amdgpu_irq_get(adev, &adev->hpd_irq,
 			       amdgpu_connector->hpd.hpd);
 	}
+	drm_connector_list_iter_end(&iter);
 }
 
 /**
@@ -382,9 +385,11 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	u32 tmp;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 
 		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -397,6 +402,7 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev)
 		amdgpu_irq_put(adev, &adev->hpd_irq,
 			       amdgpu_connector->hpd.hpd);
 	}
+	drm_connector_list_iter_end(&iter);
 }
 
 static u32 dce_v10_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1219,10 +1225,12 @@ static void dce_v10_0_afmt_audio_select_pin(struct drm_encoder *encoder)
 static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder,
 						struct drm_display_mode *mode)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	u32 tmp;
 	int interlace = 0;
@@ -1230,12 +1238,14 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder,
 	if (!dig || !dig->afmt || !dig->afmt->pin)
 		return;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1261,10 +1271,12 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder,
 
 static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	u32 tmp;
 	u8 *sadb = NULL;
@@ -1273,12 +1285,14 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder
 	if (!dig || !dig->afmt || !dig->afmt->pin)
 		return;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1313,10 +1327,12 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder
 
 static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	struct cea_sad *sads;
 	int i, sad_count;
@@ -1339,12 +1355,14 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
 	if (!dig || !dig->afmt || !dig->afmt->pin)
 		return;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1352,10 +1370,10 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
 	}
 
 	sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
-	if (sad_count <= 0) {
+	if (sad_count < 0)
 		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+	if (sad_count <= 0)
 		return;
-	}
 	BUG_ON(!sads);
 
 	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index d9f470632b2c..898ef72d423c 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -348,9 +348,11 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	u32 tmp;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 
 		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -385,6 +387,7 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
 		dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
 		amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
 	}
+	drm_connector_list_iter_end(&iter);
 }
 
 /**
@@ -399,9 +402,11 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	u32 tmp;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 
 		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -413,6 +418,7 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
 
 		amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
 	}
+	drm_connector_list_iter_end(&iter);
 }
 
 static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1245,10 +1251,12 @@ static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder)
 static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
 						struct drm_display_mode *mode)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	u32 tmp;
 	int interlace = 0;
@@ -1256,12 +1264,14 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
 	if (!dig || !dig->afmt || !dig->afmt->pin)
 		return;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1287,10 +1297,12 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
 
 static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	u32 tmp;
 	u8 *sadb = NULL;
@@ -1299,12 +1311,14 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder
 	if (!dig || !dig->afmt || !dig->afmt->pin)
 		return;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1339,10 +1353,12 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder
 
 static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	struct cea_sad *sads;
 	int i, sad_count;
@@ -1365,12 +1381,14 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
 	if (!dig || !dig->afmt || !dig->afmt->pin)
 		return;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1378,10 +1396,10 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
 	}
 
 	sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
-	if (sad_count <= 0) {
+	if (sad_count < 0)
 		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+	if (sad_count <= 0)
 		return;
-	}
 	BUG_ON(!sads);
 
 	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 3eb2e7429269..db15a112becc 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -281,9 +281,11 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	u32 tmp;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 
 		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -309,7 +311,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
 		dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
 		amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
 	}
-
+	drm_connector_list_iter_end(&iter);
 }
 
 /**
@@ -324,9 +326,11 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	u32 tmp;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 
 		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -338,6 +342,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
 
 		amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
 	}
+	drm_connector_list_iter_end(&iter);
 }
 
 static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1124,20 +1129,24 @@ static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder)
 static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
 						struct drm_display_mode *mode)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	int interlace = 0;
 	u32 tmp;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1164,21 +1173,25 @@ static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
 
 static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	u8 *sadb = NULL;
 	int sad_count;
 	u32 tmp;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1221,10 +1234,12 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
 
 static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	struct cea_sad *sads;
 	int i, sad_count;
@@ -1244,12 +1259,14 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
 		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
 	};
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1257,10 +1274,10 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
 	}
 
 	sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
-	if (sad_count <= 0) {
+	if (sad_count < 0)
 		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+	if (sad_count <= 0)
 		return;
-	}
 
 	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
 		u32 tmp = 0;
@@ -1632,6 +1649,7 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
 	int bpc = 8;
@@ -1639,12 +1657,14 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
 	if (!dig || !dig->afmt)
 		return;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index a16c5e9e610e..f06c9022c1fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -275,9 +275,11 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	u32 tmp;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 
 		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -303,6 +305,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
 		dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
 		amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
 	}
+	drm_connector_list_iter_end(&iter);
 }
 
 /**
@@ -317,9 +320,11 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	u32 tmp;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 
 		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -331,6 +336,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
 
 		amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
 	}
+	drm_connector_list_iter_end(&iter);
 }
 
 static u32 dce_v8_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1157,10 +1163,12 @@ static void dce_v8_0_afmt_audio_select_pin(struct drm_encoder *encoder)
 static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder,
 						struct drm_display_mode *mode)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	u32 tmp = 0, offset;
 
@@ -1169,12 +1177,14 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder,
 
 	offset = dig->afmt->pin->offset;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1214,10 +1224,12 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder,
 
 static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	u32 offset, tmp;
 	u8 *sadb = NULL;
@@ -1228,12 +1240,14 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
 
 	offset = dig->afmt->pin->offset;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1263,11 +1277,13 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
 
 static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
 {
-	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	u32 offset;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
 	struct cea_sad *sads;
 	int i, sad_count;
@@ -1292,12 +1308,14 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
 
 	offset = dig->afmt->pin->offset;
 
-	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
 			amdgpu_connector = to_amdgpu_connector(connector);
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 
 	if (!amdgpu_connector) {
 		DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1305,10 +1323,10 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
 	}
 
 	sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
-	if (sad_count <= 0) {
+	if (sad_count < 0)
 		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+	if (sad_count <= 0)
 		return;
-	}
 	BUG_ON(!sads);
 
 	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index c9608ae8643b..e4f94863332c 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -260,15 +260,14 @@ static struct drm_encoder *
 dce_virtual_encoder(struct drm_connector *connector)
 {
 	struct drm_encoder *encoder;
-	int i;
 
-	drm_connector_for_each_possible_encoder(connector, encoder, i) {
+	drm_connector_for_each_possible_encoder(connector, encoder) {
 		if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL)
 			return encoder;
 	}
 
 	/* pick the first one */
-	drm_connector_for_each_possible_encoder(connector, encoder, i)
+	drm_connector_for_each_possible_encoder(connector, encoder)
 		return encoder;
 
 	return NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
index 844c03868248..d6aca1c08068 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -31,6 +31,13 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
 
 static void df_v1_7_sw_init(struct amdgpu_device *adev)
 {
+	adev->df.hash_status.hash_64k = false;
+	adev->df.hash_status.hash_2m = false;
+	adev->df.hash_status.hash_1g = false;
+}
+
+static void df_v1_7_sw_fini(struct amdgpu_device *adev)
+{
 }
 
 static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev,
@@ -62,7 +69,7 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
 {
 	int fb_channel_number;
 
-	fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+	fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
 
 	return df_v1_7_channel_number[fb_channel_number];
 }
@@ -73,7 +80,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 	u32 tmp;
 
 	/* Put DF on broadcast mode */
-	adev->df_funcs->enable_broadcast_mode(adev, true);
+	adev->df.funcs->enable_broadcast_mode(adev, true);
 
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
 		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
@@ -88,7 +95,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 	}
 
 	/* Exit boradcast mode */
-	adev->df_funcs->enable_broadcast_mode(adev, false);
+	adev->df.funcs->enable_broadcast_mode(adev, false);
 }
 
 static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
@@ -111,6 +118,7 @@ static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
 
 const struct amdgpu_df_funcs df_v1_7_funcs = {
 	.sw_init = df_v1_7_sw_init,
+	.sw_fini = df_v1_7_sw_fini,
 	.enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
 	.get_fb_channel_number = df_v1_7_get_fb_channel_number,
 	.get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 5850c8e34caa..f51326598a8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -99,8 +99,8 @@ static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
 	unsigned long flags, address, data;
 	uint32_t ficadl_val, ficadh_val;
 
-	address = adev->nbio_funcs->get_pcie_index_offset(adev);
-	data = adev->nbio_funcs->get_pcie_data_offset(adev);
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
@@ -122,8 +122,8 @@ static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
 {
 	unsigned long flags, address, data;
 
-	address = adev->nbio_funcs->get_pcie_index_offset(adev);
-	data = adev->nbio_funcs->get_pcie_data_offset(adev);
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
@@ -150,8 +150,8 @@ static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
 {
 	unsigned long flags, address, data;
 
-	address = adev->nbio_funcs->get_pcie_index_offset(adev);
-	data = adev->nbio_funcs->get_pcie_data_offset(adev);
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, lo_addr);
@@ -172,8 +172,8 @@ static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
 {
 	unsigned long flags, address, data;
 
-	address = adev->nbio_funcs->get_pcie_index_offset(adev);
-	data = adev->nbio_funcs->get_pcie_data_offset(adev);
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, lo_addr);
@@ -183,6 +183,61 @@ static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 }
 
+/* same as perfmon_wreg but return status on write value check */
+static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
+					  uint32_t lo_addr, uint32_t lo_val,
+					  uint32_t hi_addr, uint32_t  hi_val)
+{
+	unsigned long flags, address, data;
+	uint32_t lo_val_rb, hi_val_rb;
+
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	WREG32(address, lo_addr);
+	WREG32(data, lo_val);
+	WREG32(address, hi_addr);
+	WREG32(data, hi_val);
+
+	WREG32(address, lo_addr);
+	lo_val_rb = RREG32(data);
+	WREG32(address, hi_addr);
+	hi_val_rb = RREG32(data);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+	if (!(lo_val == lo_val_rb && hi_val == hi_val_rb))
+		return -EBUSY;
+
+	return 0;
+}
+
+
+/*
+ * retry arming counters every 100 usecs within 1 millisecond interval.
+ * if retry fails after time out, return error.
+ */
+#define ARM_RETRY_USEC_TIMEOUT	1000
+#define ARM_RETRY_USEC_INTERVAL	100
+static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev,
+					  uint32_t lo_addr, uint32_t lo_val,
+					  uint32_t hi_addr, uint32_t  hi_val)
+{
+	int countdown = ARM_RETRY_USEC_TIMEOUT;
+
+	while (countdown) {
+
+		if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val,
+						     hi_addr, hi_val))
+			break;
+
+		countdown -= ARM_RETRY_USEC_INTERVAL;
+		udelay(ARM_RETRY_USEC_INTERVAL);
+	}
+
+	return countdown > 0 ? 0 : -ETIME;
+}
+
 /* get the number of df counters available */
 static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
 		struct device_attribute *attr,
@@ -207,6 +262,32 @@ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
 /* device attr for available perfmon counters */
 static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
 
+static void df_v3_6_query_hashes(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	adev->df.hash_status.hash_64k = false;
+	adev->df.hash_status.hash_2m = false;
+	adev->df.hash_status.hash_1g = false;
+
+	if (adev->asic_type != CHIP_ARCTURUS)
+		return;
+
+	/* encoding for hash-enabled on Arcturus */
+	if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
+		tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
+		adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
+						DF_CS_UMC_AON0_DfGlobalCtrl,
+						GlbHashIntlvCtl64K);
+		adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp,
+						DF_CS_UMC_AON0_DfGlobalCtrl,
+						GlbHashIntlvCtl2M);
+		adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp,
+						DF_CS_UMC_AON0_DfGlobalCtrl,
+						GlbHashIntlvCtl1G);
+	}
+}
+
 /* init perfmons */
 static void df_v3_6_sw_init(struct amdgpu_device *adev)
 {
@@ -218,6 +299,15 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev)
 
 	for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
 		adev->df_perfmon_config_assign_mask[i] = 0;
+
+	df_v3_6_query_hashes(adev);
+}
+
+static void df_v3_6_sw_fini(struct amdgpu_device *adev)
+{
+
+	device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
+
 }
 
 static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
@@ -249,7 +339,7 @@ static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
 {
 	int fb_channel_number;
 
-	fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+	fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
 	if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
 		fb_channel_number = 0;
 
@@ -261,23 +351,29 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 {
 	u32 tmp;
 
-	/* Put DF on broadcast mode */
-	adev->df_funcs->enable_broadcast_mode(adev, true);
-
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
-		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
-		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
-		tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
-		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
-	} else {
-		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
-		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
-		tmp |= DF_V3_6_MGCG_DISABLE;
-		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
-	}
+	if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
+		/* Put DF on broadcast mode */
+		adev->df.funcs->enable_broadcast_mode(adev, true);
+
+		if (enable) {
+			tmp = RREG32_SOC15(DF, 0,
+					mmDF_PIE_AON0_DfGlobalClkGater);
+			tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+			tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
+			WREG32_SOC15(DF, 0,
+					mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+		} else {
+			tmp = RREG32_SOC15(DF, 0,
+					mmDF_PIE_AON0_DfGlobalClkGater);
+			tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+			tmp |= DF_V3_6_MGCG_DISABLE;
+			WREG32_SOC15(DF, 0,
+					mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+		}
 
-	/* Exit broadcast mode */
-	adev->df_funcs->enable_broadcast_mode(adev, false);
+		/* Exit broadcast mode */
+		adev->df.funcs->enable_broadcast_mode(adev, false);
+	}
 }
 
 static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
@@ -321,20 +417,20 @@ static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
 	switch (target_cntr) {
 
 	case 0:
-		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0;
-		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0;
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4;
 		break;
 	case 1:
-		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1;
-		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1;
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5;
 		break;
 	case 2:
-		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2;
-		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2;
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6;
 		break;
 	case 3:
-		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3;
-		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3;
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7;
 		break;
 
 	}
@@ -409,6 +505,44 @@ static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
 	return -ENOSPC;
 }
 
+#define DEFERRED_ARM_MASK	(1 << 31)
+static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
+				    uint64_t config, bool is_deferred)
+{
+	int target_cntr;
+
+	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
+
+	if (target_cntr < 0)
+		return -EINVAL;
+
+	if (is_deferred)
+		adev->df_perfmon_config_assign_mask[target_cntr] |=
+							DEFERRED_ARM_MASK;
+	else
+		adev->df_perfmon_config_assign_mask[target_cntr] &=
+							~DEFERRED_ARM_MASK;
+
+	return 0;
+}
+
+static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
+				    uint64_t config)
+{
+	int target_cntr;
+
+	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
+
+	/*
+	 * we never get target_cntr < 0 since this funciton is only called in
+	 * pmc_count for now but we should check anyways.
+	 */
+	return (target_cntr >= 0 &&
+			(adev->df_perfmon_config_assign_mask[target_cntr]
+			& DEFERRED_ARM_MASK));
+
+}
+
 /* release performance counter */
 static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
 				     uint64_t config)
@@ -438,29 +572,33 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
 			     int is_enable)
 {
 	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
-	int ret = 0;
+	int err = 0, ret = 0;
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA20:
+		if (is_enable)
+			return df_v3_6_pmc_add_cntr(adev, config);
 
 		df_v3_6_reset_perfmon_cntr(adev, config);
 
-		if (is_enable) {
-			ret = df_v3_6_pmc_add_cntr(adev, config);
-		} else {
-			ret = df_v3_6_pmc_get_ctrl_settings(adev,
+		ret = df_v3_6_pmc_get_ctrl_settings(adev,
 					config,
 					&lo_base_addr,
 					&hi_base_addr,
 					&lo_val,
 					&hi_val);
 
-			if (ret)
-				return ret;
+		if (ret)
+			return ret;
+
+		err = df_v3_6_perfmon_arm_with_retry(adev,
+						     lo_base_addr,
+						     lo_val,
+						     hi_base_addr,
+						     hi_val);
 
-			df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val,
-					hi_base_addr, hi_val);
-		}
+		if (err)
+			ret = df_v3_6_pmc_set_deferred(adev, config, true);
 
 		break;
 	default:
@@ -488,7 +626,7 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
 		if (ret)
 			return ret;
 
-		df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
+		df_v3_6_reset_perfmon_cntr(adev, config);
 
 		if (is_disable)
 			df_v3_6_pmc_release_cntr(adev, config);
@@ -505,18 +643,29 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
 				  uint64_t config,
 				  uint64_t *count)
 {
-	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+	uint32_t lo_base_addr, hi_base_addr, lo_val = 0, hi_val = 0;
 	*count = 0;
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA20:
-
 		df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
 				      &hi_base_addr);
 
 		if ((lo_base_addr == 0) || (hi_base_addr == 0))
 			return;
 
+		/* rearm the counter or throw away count value on failure */
+		if (df_v3_6_pmc_is_deferred(adev, config)) {
+			int rearm_err = df_v3_6_perfmon_arm_with_status(adev,
+							lo_base_addr, lo_val,
+							hi_base_addr, hi_val);
+
+			if (rearm_err)
+				return;
+
+			df_v3_6_pmc_set_deferred(adev, config, false);
+		}
+
 		df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
 				hi_base_addr, &hi_val);
 
@@ -529,7 +678,6 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
 			 config, lo_base_addr, hi_base_addr, lo_val, hi_val);
 
 		break;
-
 	default:
 		break;
 	}
@@ -537,6 +685,7 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
 
 const struct amdgpu_df_funcs df_v3_6_funcs = {
 	.sw_init = df_v3_6_sw_init,
+	.sw_fini = df_v3_6_sw_fini,
 	.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
 	.get_fb_channel_number = df_v3_6_get_fb_channel_number,
 	.get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 53090eae0082..874f641de281 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,6 +40,7 @@
 #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 #include "clearstate_gfx10.h"
 #include "v10_structs.h"
@@ -50,9 +51,6 @@
  * Navi10 has two graphic rings to share each graphic pipe.
  * 1. Primary ring
  * 2. Async ring
- *
- * In bring-up phase, it just used primary ring so set gfx ring number as 1 at
- * first.
  */
 #define GFX10_NUM_GFX_RINGS	2
 #define GFX10_MEC_HPD_SIZE	2048
@@ -117,17 +115,20 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000)
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000)
 };
 
 static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =
@@ -162,16 +163,19 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000),
 };
 
 static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
@@ -342,15 +346,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
 }
 
+static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
 static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
 	.kiq_set_resources = gfx10_kiq_set_resources,
 	.kiq_map_queues = gfx10_kiq_map_queues,
 	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
 	.kiq_query_status = gfx10_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
 	.set_resources_size = 8,
 	.map_queues_size = 7,
 	.unmap_queues_size = 6,
 	.query_status_size = 7,
+	.invalidate_tlbs_size = 12,
 };
 
 static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
@@ -468,18 +486,10 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
 		else
 			udelay(1);
 	}
-	if (i < adev->usec_timeout) {
-		if (amdgpu_emu_mode == 1)
-			DRM_INFO("ring test on %d succeeded in %d msecs\n",
-				 ring->idx, i);
-		else
-			DRM_INFO("ring test on %d succeeded in %d usecs\n",
-				 ring->idx, i);
-	} else {
-		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
-			  ring->idx, scratch, tmp);
-		r = -EINVAL;
-	}
+
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+
 	amdgpu_gfx_scratch_free(adev, scratch);
 
 	return r;
@@ -529,14 +539,10 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	}
 
 	tmp = RREG32(scratch);
-	if (tmp == 0xDEADBEEF) {
-		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+	if (tmp == 0xDEADBEEF)
 		r = 0;
-	} else {
-		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
-			  scratch, tmp);
+	else
 		r = -EINVAL;
-	}
 err2:
 	amdgpu_ib_free(adev, &ib, NULL);
 	dma_fence_put(f);
@@ -585,8 +591,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
 	}
 
 	if (adev->gfx.cp_fw_write_wait == false)
-		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
-			      GRBM requires 1-cycle delay in cp firmware\n");
+		DRM_WARN_ONCE("CP firmware version too old, please update!");
 }
 
 
@@ -611,11 +616,29 @@ static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
 }
 
+static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
+{
+	bool ret = false;
+
+	switch (adev->pdev->revision) {
+	case 0xc2:
+	case 0xc3:
+		ret = true;
+		break;
+	default:
+		ret = false;
+		break;
+	}
+
+	return ret ;
+}
+
 static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_NAVI10:
-		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+		if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))
+			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 		break;
 	default:
 		break;
@@ -690,59 +713,61 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
-	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
-	if (err)
-		goto out;
-	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
-	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
-	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
-	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
-	if (version_major == 2 && version_minor == 1)
-		adev->gfx.rlc.is_rlc_v2_1 = true;
-
-	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
-	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
-	adev->gfx.rlc.save_and_restore_offset =
+	if (!amdgpu_sriov_vf(adev)) {
+		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+		err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+		if (err)
+			goto out;
+		err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+		if (version_major == 2 && version_minor == 1)
+			adev->gfx.rlc.is_rlc_v2_1 = true;
+
+		adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+		adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+		adev->gfx.rlc.save_and_restore_offset =
 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
-	adev->gfx.rlc.clear_state_descriptor_offset =
+		adev->gfx.rlc.clear_state_descriptor_offset =
 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
-	adev->gfx.rlc.avail_scratch_ram_locations =
+		adev->gfx.rlc.avail_scratch_ram_locations =
 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
-	adev->gfx.rlc.reg_restore_list_size =
+		adev->gfx.rlc.reg_restore_list_size =
 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
-	adev->gfx.rlc.reg_list_format_start =
+		adev->gfx.rlc.reg_list_format_start =
 			le32_to_cpu(rlc_hdr->reg_list_format_start);
-	adev->gfx.rlc.reg_list_format_separate_start =
+		adev->gfx.rlc.reg_list_format_separate_start =
 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
-	adev->gfx.rlc.starting_offsets_start =
+		adev->gfx.rlc.starting_offsets_start =
 			le32_to_cpu(rlc_hdr->starting_offsets_start);
-	adev->gfx.rlc.reg_list_format_size_bytes =
+		adev->gfx.rlc.reg_list_format_size_bytes =
 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
-	adev->gfx.rlc.reg_list_size_bytes =
+		adev->gfx.rlc.reg_list_size_bytes =
 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
-	adev->gfx.rlc.register_list_format =
+		adev->gfx.rlc.register_list_format =
 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
-				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
-	if (!adev->gfx.rlc.register_list_format) {
-		err = -ENOMEM;
-		goto out;
-	}
+					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+		if (!adev->gfx.rlc.register_list_format) {
+			err = -ENOMEM;
+			goto out;
+		}
 
-	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
-			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
-	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
-		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
+		tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+							   le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+		for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+			adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
 
-	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+		adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
 
-	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
-			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
-	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
-		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+		tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+							   le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+		for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+			adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
 
-	if (adev->gfx.rlc.is_rlc_v2_1)
-		gfx_v10_0_init_rlc_ext_microcode(adev);
+		if (adev->gfx.rlc.is_rlc_v2_1)
+			gfx_v10_0_init_rlc_ext_microcode(adev);
+	}
 
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
@@ -797,10 +822,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
 		info->fw = adev->gfx.rlc_fw;
-		header = (const struct common_firmware_header *)info->fw->data;
-		adev->firmware.fw_size +=
-			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
+		if (info->fw) {
+			header = (const struct common_firmware_header *)info->fw->data;
+			adev->firmware.fw_size +=
+				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+		}
 		if (adev->gfx.rlc.is_rlc_v2_1 &&
 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
@@ -993,39 +1019,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev)
-{
-	int r;
-
-	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
-	if (unlikely(r != 0))
-		return r;
-
-	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
-			AMDGPU_GEM_DOMAIN_VRAM);
-	if (!r)
-		adev->gfx.rlc.clear_state_gpu_addr =
-			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
-	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
-	return r;
-}
-
-static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
-	int r;
-
-	if (!adev->gfx.rlc.clear_state_obj)
-		return;
-
-	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
-	if (likely(r == 0)) {
-		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-	}
-}
-
 static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
 {
 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1469,7 +1462,7 @@ static int gfx_v10_0_sw_fini(void *handle)
 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
 	amdgpu_gfx_mqd_sw_fini(adev);
-	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
 	amdgpu_gfx_kiq_fini(adev);
 
 	gfx_v10_0_pfp_fini(adev);
@@ -1785,27 +1778,18 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
 }
 
-static void gfx_v10_0_init_csb(struct amdgpu_device *adev)
+static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
 {
+	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
 	/* csib */
 	WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
 		     adev->gfx.rlc.clear_state_gpu_addr >> 32);
 	WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
 		     adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
 	WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
-}
-
-static void gfx_v10_0_init_pg(struct amdgpu_device *adev)
-{
-	int i;
-
-	gfx_v10_0_init_csb(adev);
 
-	for (i = 0; i < adev->num_vmhubs; i++)
-		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-
-	/* TODO: init power gating */
-	return;
+	return 0;
 }
 
 void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
@@ -1900,18 +1884,16 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (amdgpu_sriov_vf(adev))
-		return 0;
-
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+
 		r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
 		if (r)
 			return r;
-		gfx_v10_0_init_pg(adev);
 
-		/* enable RLC SRM */
-		gfx_v10_0_rlc_enable_srm(adev);
+		gfx_v10_0_init_csb(adev);
 
+		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+			gfx_v10_0_rlc_enable_srm(adev);
 	} else {
 		adev->gfx.rlc.funcs->stop(adev);
 
@@ -1933,7 +1915,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
 				return r;
 		}
 
-		gfx_v10_0_init_pg(adev);
+		gfx_v10_0_init_csb(adev);
+
 		adev->gfx.rlc.funcs->start(adev);
 
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
@@ -1983,7 +1966,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev)
 		rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4;
 
 		rlc_toc++;
-	};
+	}
 
 	return 0;
 }
@@ -2400,7 +2383,7 @@ static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
 	return 0;
 }
 
-static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 {
 	int i;
 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
@@ -2413,7 +2396,17 @@ static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 			adev->gfx.gfx_ring[i].sched.ready = false;
 	}
 	WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
-	udelay(50);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+	return 0;
 }
 
 static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
@@ -2470,7 +2463,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
 	}
 
 	if (amdgpu_emu_mode == 1)
-		adev->nbio_funcs->hdp_flush(adev, NULL);
+		adev->nbio.funcs->hdp_flush(adev, NULL);
 
 	tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -2540,7 +2533,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
 	}
 
 	if (amdgpu_emu_mode == 1)
-		adev->nbio_funcs->hdp_flush(adev, NULL);
+		adev->nbio.funcs->hdp_flush(adev, NULL);
 
 	tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
 	tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
@@ -2609,7 +2602,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
 	}
 
 	if (amdgpu_emu_mode == 1)
-		adev->nbio_funcs->hdp_flush(adev, NULL);
+		adev->nbio.funcs->hdp_flush(adev, NULL);
 
 	tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -2784,7 +2777,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
 	/* Init gfx ring 0 for pipe 0 */
 	mutex_lock(&adev->srbm_mutex);
 	gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
-	mutex_unlock(&adev->srbm_mutex);
+
 	/* Set ring buffer size */
 	ring = &adev->gfx.gfx_ring[0];
 	rb_bufsz = order_base_2(ring->ring_size / 8);
@@ -2822,11 +2815,11 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
 	WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1);
 
 	gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+	mutex_unlock(&adev->srbm_mutex);
 
 	/* Init gfx ring 1 for pipe 1 */
 	mutex_lock(&adev->srbm_mutex);
 	gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
-	mutex_unlock(&adev->srbm_mutex);
 	ring = &adev->gfx.gfx_ring[1];
 	rb_bufsz = order_base_2(ring->ring_size / 8);
 	tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
@@ -2856,6 +2849,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
 	WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
 
 	gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+	mutex_unlock(&adev->srbm_mutex);
 
 	/* Switch to pipe 0 */
 	mutex_lock(&adev->srbm_mutex);
@@ -2930,7 +2924,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 	}
 
 	if (amdgpu_emu_mode == 1)
-		adev->nbio_funcs->hdp_flush(adev, NULL);
+		adev->nbio.funcs->hdp_flush(adev, NULL);
 
 	tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -3114,6 +3108,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
 
 	if (!adev->in_gpu_reset && !adev->in_suspend) {
 		memset((void *)mqd, 0, sizeof(*mqd));
@@ -3125,14 +3120,15 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
 #endif
 		nv_grbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
-		if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
-			memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd));
+		if (adev->gfx.me.mqd_backup[mqd_idx])
+			memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 	} else if (adev->in_gpu_reset) {
 		/* reset mqd with the backup copy */
-		if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
-			memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd));
+		if (adev->gfx.me.mqd_backup[mqd_idx])
+			memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
 		/* reset the ring */
 		ring->wptr = 0;
+		adev->wb.wb[ring->wptr_offs] = 0;
 		amdgpu_ring_clear_ring(ring);
 #ifdef BRING_UP_DEBUG
 		mutex_lock(&adev->srbm_mutex);
@@ -3341,8 +3337,11 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
 	mqd->cp_hqd_ib_control = tmp;
 
-	/* activate the queue */
-	mqd->cp_hqd_active = 1;
+	/* map_queues packet doesn't need activate the queue,
+	 * so only kiq need set this field.
+	 */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+		mqd->cp_hqd_active = 1;
 
 	return 0;
 }
@@ -3613,23 +3612,16 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
 
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 		ring = &adev->gfx.gfx_ring[i];
-		DRM_INFO("gfx %d ring me %d pipe %d q %d\n",
-			 i, ring->me, ring->pipe, ring->queue);
-		r = amdgpu_ring_test_ring(ring);
-		if (r) {
-			ring->sched.ready = false;
+		r = amdgpu_ring_test_helper(ring);
+		if (r)
 			return r;
-		}
 	}
 
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
-		ring->sched.ready = true;
-		DRM_INFO("compute ring %d mec %d pipe %d q %d\n",
-			 i, ring->me, ring->pipe, ring->queue);
-		r = amdgpu_ring_test_ring(ring);
+		r = amdgpu_ring_test_helper(ring);
 		if (r)
-			ring->sched.ready = false;
+			return r;
 	}
 
 	return 0;
@@ -3732,10 +3724,6 @@ static int gfx_v10_0_hw_init(void *handle)
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	r = gfx_v10_0_csb_vram_pin(adev);
-	if (r)
-		return r;
-
 	if (!amdgpu_emu_mode)
 		gfx_v10_0_init_golden_registers(adev);
 
@@ -3818,12 +3806,11 @@ static int gfx_v10_0_hw_fini(void *handle)
 	if (amdgpu_gfx_disable_kcq(adev))
 		DRM_ERROR("KCQ disable failed\n");
 	if (amdgpu_sriov_vf(adev)) {
-		pr_debug("For SRIOV client, shouldn't do anything.\n");
+		gfx_v10_0_cp_gfx_enable(adev, false);
 		return 0;
 	}
 	gfx_v10_0_cp_enable(adev, false);
 	gfx_v10_0_enable_gui_idle_interrupt(adev, false);
-	gfx_v10_0_csb_vram_unpin(adev);
 
 	return 0;
 }
@@ -4384,7 +4371,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	u32 ref_and_mask, reg_mem_engine;
-	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
 
 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
 		switch (ring->me) {
@@ -4404,8 +4391,8 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	}
 
 	gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
-			       adev->nbio_funcs->get_hdp_flush_req_offset(adev),
-			       adev->nbio_funcs->get_hdp_flush_done_offset(adev),
+			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
 			       ref_and_mask, ref_and_mask, 0x20);
 }
 
@@ -5331,15 +5318,12 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
 
 static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
 {
-	/* init asic gds info */
-	switch (adev->asic_type) {
-	case CHIP_NAVI10:
-	default:
-		adev->gds.gds_size = 0x10000;
-		adev->gds.gds_compute_max_wave_id = 0x4ff;
-		break;
-	}
+	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
+			    adev->gfx.config.max_sh_per_se *
+			    adev->gfx.config.max_shader_engines;
 
+	adev->gds.gds_size = 0x10000;
+	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
 	adev->gds.gws_size = 64;
 	adev->gds.oa_size = 16;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 7f0a63628c43..31f44d05e606 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1576,7 +1576,7 @@ static void gfx_v6_0_config_init(struct amdgpu_device *adev)
 static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
 {
 	u32 gb_addr_config = 0;
-	u32 mc_shared_chmap, mc_arb_ramcfg;
+	u32 mc_arb_ramcfg;
 	u32 sx_debug_1;
 	u32 hdp_host_path_cntl;
 	u32 tmp;
@@ -1678,7 +1678,6 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
 
 	WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
 
-	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 791ba398f007..8f20a5dd44fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4258,7 +4258,7 @@ static int gfx_v7_0_late_init(void *handle)
 static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
 {
 	u32 gb_addr_config;
-	u32 mc_shared_chmap, mc_arb_ramcfg;
+	u32 mc_arb_ramcfg;
 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
 	u32 tmp;
 
@@ -4335,7 +4335,6 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
 		break;
 	}
 
-	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
 
@@ -4554,6 +4553,8 @@ static int gfx_v7_0_hw_init(void *handle)
 
 	gfx_v7_0_constants_init(adev);
 
+	/* init CSB */
+	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
 	/* init rlc */
 	r = adev->gfx.rlc.funcs->resume(adev);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 87dd55e9d72b..46f0533ba43f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1321,39 +1321,6 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
-{
-	int r;
-
-	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
-	if (unlikely(r != 0))
-		return r;
-
-	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
-			AMDGPU_GEM_DOMAIN_VRAM);
-	if (!r)
-		adev->gfx.rlc.clear_state_gpu_addr =
-			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
-	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
-	return r;
-}
-
-static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
-	int r;
-
-	if (!adev->gfx.rlc.clear_state_obj)
-		return;
-
-	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
-	if (likely(r == 0)) {
-		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-	}
-}
-
 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
 {
 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1710,7 +1677,7 @@ fail:
 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 {
 	u32 gb_addr_config;
-	u32 mc_shared_chmap, mc_arb_ramcfg;
+	u32 mc_arb_ramcfg;
 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
 	u32 tmp;
 	int ret;
@@ -1850,7 +1817,6 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		break;
 	}
 
-	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
 
@@ -2103,7 +2069,7 @@ static int gfx_v8_0_sw_fini(void *handle)
 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
 	amdgpu_gfx_mqd_sw_fini(adev);
-	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
 	amdgpu_gfx_kiq_fini(adev);
 
 	gfx_v8_0_mec_fini(adev);
@@ -3917,6 +3883,7 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
 
 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
 {
+	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
 	/* csib */
 	WREG32(mmRLC_CSIB_ADDR_HI,
 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
@@ -4591,8 +4558,11 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
 	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
 
-	/* activate the queue */
-	mqd->cp_hqd_active = 1;
+	/* map_queues packet doesn't need activate the queue,
+	 * so only kiq need set this field.
+	 */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+		mqd->cp_hqd_active = 1;
 
 	return 0;
 }
@@ -4837,10 +4807,6 @@ static int gfx_v8_0_hw_init(void *handle)
 	gfx_v8_0_init_golden_registers(adev);
 	gfx_v8_0_constants_init(adev);
 
-	r = gfx_v8_0_csb_vram_pin(adev);
-	if (r)
-		return r;
-
 	r = adev->gfx.rlc.funcs->resume(adev);
 	if (r)
 		return r;
@@ -4958,8 +4924,6 @@ static int gfx_v8_0_hw_fini(void *handle)
 		pr_err("rlc is busy, skip halt rlc\n");
 	amdgpu_gfx_rlc_exit_safe_mode(adev);
 
-	gfx_v8_0_csb_vram_unpin(adev);
-
 	return 0;
 }
 
@@ -6184,7 +6148,23 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 
-	/* EVENT_WRITE_EOP - flush caches, send int */
+	/* Workaround for cache flush problems. First send a dummy EOP
+	 * event down the pipe with seq one below.
+	 */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+				 EOP_TC_ACTION_EN |
+				 EOP_TC_WB_ACTION_EN |
+				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+				 EVENT_INDEX(5)));
+	amdgpu_ring_write(ring, addr & 0xfffffffc);
+	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
+				DATA_SEL(1) | INT_SEL(0));
+	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
+	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+
+	/* Then send the real EOP event down the pipe:
+	 * EVENT_WRITE_EOP - flush caches, send int */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
 				 EOP_TC_ACTION_EN |
@@ -6926,7 +6906,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 		5 +  /* COND_EXEC */
 		7 +  /* PIPELINE_SYNC */
 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
-		8 +  /* FENCE for VM_FLUSH */
+		12 +  /* FENCE for VM_FLUSH */
 		20 + /* GDS switch */
 		4 + /* double SWITCH_BUFFER,
 		       the first COND_EXEC jump to the place just
@@ -6938,7 +6918,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 		31 + /*	DE_META */
 		3 + /* CNTX_CTRL */
 		5 + /* HDP_INVL */
-		8 + 8 + /* FENCE x2 */
+		12 + 12 + /* FENCE x2 */
 		2, /* SWITCH_BUFFER */
 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 97cf0b536873..46ab46757b25 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -517,9 +517,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
 };
 
 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
@@ -582,9 +582,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_1[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
 };
 
 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
@@ -676,9 +676,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
 };
 
 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
@@ -691,6 +691,8 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
 };
 
 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
@@ -737,6 +739,134 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
 				     void *inject_if);
 
+static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+				uint64_t queue_mask)
+{
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+	amdgpu_ring_write(kiq_ring,
+		PACKET3_SET_RESOURCES_VMID_MASK(0) |
+		/* vmid_mask:0* queue_type:0 (KIQ) */
+		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
+	amdgpu_ring_write(kiq_ring,
+			lower_32_bits(queue_mask));	/* queue mask lo */
+	amdgpu_ring_write(kiq_ring,
+			upper_32_bits(queue_mask));	/* queue mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
+	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+				 struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = kiq_ring->adev;
+	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+			 /*queue_type: normal compute queue */
+			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
+			 /* alloc format: all_on_one_pipe */
+			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
+			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+			 /* num_queues: must be 1 */
+			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+				   struct amdgpu_ring *ring,
+				   enum amdgpu_unmap_queues_action action,
+				   u64 gpu_addr, u64 seq)
+{
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_UNMAP_QUEUES_ACTION(action) |
+			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+	if (action == PREEMPT_QUEUES_NO_UNMAP) {
+		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, seq);
+	} else {
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+	}
+}
+
+static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
+				   struct amdgpu_ring *ring,
+				   u64 addr,
+				   u64 seq)
+{
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+			  PACKET3_QUERY_STATUS_COMMAND(2));
+	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
+	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
+	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
+	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
+	.kiq_query_status = gfx_v9_0_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
+	.set_resources_size = 8,
+	.map_queues_size = 7,
+	.unmap_queues_size = 6,
+	.query_status_size = 7,
+	.invalidate_tlbs_size = 12,
+};
+
+static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
+}
+
 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
@@ -977,8 +1107,7 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
 	    (adev->gfx.mec_feature_version < 46) ||
 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
 	    (adev->gfx.pfp_feature_version < 46))
-		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
-			      GRBM requires 1-cycle delay in cp firmware\n");
+		DRM_WARN_ONCE("CP firmware version too old, please update!");
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
@@ -1038,17 +1167,10 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
 	case CHIP_VEGA20:
 		break;
 	case CHIP_RAVEN:
-		/* Disable GFXOFF on original raven.  There are combinations
-		 * of sbios and platforms that are not stable.
-		 */
-		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
-			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
-		else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
-			 &&((adev->gfx.rlc_fw_version != 106 &&
-			     adev->gfx.rlc_fw_version < 531) ||
-			    (adev->gfx.rlc_fw_version == 53815) ||
-			    (adev->gfx.rlc_feature_version < 1) ||
-			    !adev->gfx.rlc.is_rlc_v2_1))
+		if (!(adev->rev_id >= 0x8 ||
+		      adev->pdev->device == 0x15d8) &&
+		    (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */
+		     !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */
 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 
 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
@@ -1342,7 +1464,8 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
 
 			/* TODO: Determine if MEC2 JT FW loading can be removed
 				 for all GFX V9 asic and above */
-			if (adev->asic_type != CHIP_ARCTURUS) {
+			if (adev->asic_type != CHIP_ARCTURUS &&
+			    adev->asic_type != CHIP_RENOIR) {
 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
 				info->fw = adev->gfx.mec2_fw;
@@ -1680,39 +1803,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
-{
-	int r;
-
-	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
-	if (unlikely(r != 0))
-		return r;
-
-	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
-			AMDGPU_GEM_DOMAIN_VRAM);
-	if (!r)
-		adev->gfx.rlc.clear_state_gpu_addr =
-			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
-	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
-	return r;
-}
-
-static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
-	int r;
-
-	if (!adev->gfx.rlc.clear_state_obj)
-		return;
-
-	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
-	if (likely(r == 0)) {
-		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-	}
-}
-
 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 {
 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1974,190 +2064,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
-				   struct amdgpu_ngg_buf *ngg_buf,
-				   int size_se,
-				   int default_size_se)
-{
-	int r;
-
-	if (size_se < 0) {
-		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
-		return -EINVAL;
-	}
-	size_se = size_se ? size_se : default_size_se;
-
-	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
-	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
-				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
-				    &ngg_buf->bo,
-				    &ngg_buf->gpu_addr,
-				    NULL);
-	if (r) {
-		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
-		return r;
-	}
-	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
-
-	return r;
-}
-
-static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
-{
-	int i;
-
-	for (i = 0; i < NGG_BUF_MAX; i++)
-		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
-				      &adev->gfx.ngg.buf[i].gpu_addr,
-				      NULL);
-
-	memset(&adev->gfx.ngg.buf[0], 0,
-			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
-
-	adev->gfx.ngg.init = false;
-
-	return 0;
-}
-
-static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
-{
-	int r;
-
-	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
-		return 0;
-
-	/* GDS reserve memory: 64 bytes alignment */
-	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
-	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
-	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
-	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
-
-	/* Primitive Buffer */
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
-				    amdgpu_prim_buf_per_se,
-				    64 * 1024);
-	if (r) {
-		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
-		goto err;
-	}
-
-	/* Position Buffer */
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
-				    amdgpu_pos_buf_per_se,
-				    256 * 1024);
-	if (r) {
-		dev_err(adev->dev, "Failed to create Position Buffer\n");
-		goto err;
-	}
-
-	/* Control Sideband */
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
-				    amdgpu_cntl_sb_buf_per_se,
-				    256);
-	if (r) {
-		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
-		goto err;
-	}
-
-	/* Parameter Cache, not created by default */
-	if (amdgpu_param_buf_per_se <= 0)
-		goto out;
-
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
-				    amdgpu_param_buf_per_se,
-				    512 * 1024);
-	if (r) {
-		dev_err(adev->dev, "Failed to create Parameter Cache\n");
-		goto err;
-	}
-
-out:
-	adev->gfx.ngg.init = true;
-	return 0;
-err:
-	gfx_v9_0_ngg_fini(adev);
-	return r;
-}
-
-static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
-	int r;
-	u32 data, base;
-
-	if (!amdgpu_ngg)
-		return 0;
-
-	/* Program buffer size */
-	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
-			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
-	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
-			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
-	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
-
-	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
-			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
-	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
-			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
-	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
-
-	/* Program buffer base address */
-	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
-	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
-	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
-
-	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
-	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
-	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
-
-	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
-	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
-	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
-
-	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
-	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
-	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
-
-	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
-	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
-	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
-
-	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
-	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
-	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
-
-	/* Clear GDS reserved memory */
-	r = amdgpu_ring_alloc(ring, 17);
-	if (r) {
-		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
-			  ring->name, r);
-		return r;
-	}
-
-	gfx_v9_0_write_data_to_reg(ring, 0, false,
-				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
-			           (adev->gds.gds_size +
-				    adev->gfx.ngg.gds_reserve_size));
-
-	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
-	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
-				PACKET3_DMA_DATA_DST_SEL(1) |
-				PACKET3_DMA_DATA_SRC_SEL(2)));
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
-				adev->gfx.ngg.gds_reserve_size);
-
-	gfx_v9_0_write_data_to_reg(ring, 0, false,
-				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
-
-	amdgpu_ring_commit(ring);
-
-	return 0;
-}
-
 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
 				      int mec, int pipe, int queue)
 {
@@ -2325,10 +2231,6 @@ static int gfx_v9_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	r = gfx_v9_0_ngg_init(adev);
-	if (r)
-		return r;
-
 	return 0;
 }
 
@@ -2338,19 +2240,7 @@ static int gfx_v9_0_sw_fini(void *handle)
 	int i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
-			adev->gfx.ras_if) {
-		struct ras_common_if *ras_if = adev->gfx.ras_if;
-		struct ras_ih_if ih_info = {
-			.head = *ras_if,
-		};
-
-		amdgpu_ras_debugfs_remove(adev, ras_if);
-		amdgpu_ras_sysfs_remove(adev, ras_if);
-		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
-		amdgpu_ras_feature_enable(adev, ras_if, 0);
-		kfree(ras_if);
-	}
+	amdgpu_gfx_ras_fini(adev);
 
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -2358,11 +2248,10 @@ static int gfx_v9_0_sw_fini(void *handle)
 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
 	amdgpu_gfx_mqd_sw_fini(adev);
-	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
 	amdgpu_gfx_kiq_fini(adev);
 
 	gfx_v9_0_mec_fini(adev);
-	gfx_v9_0_ngg_fini(adev);
 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
@@ -2601,6 +2490,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
 
 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
 {
+	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
 	/* csib */
 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
@@ -2930,7 +2820,10 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
 	 * And it's needed by gfxoff feature.
 	 */
 	if (adev->gfx.rlc.is_rlc_v2_1) {
-		gfx_v9_1_init_rlc_save_restore_list(adev);
+		if (adev->asic_type == CHIP_VEGA12 ||
+		    (adev->asic_type == CHIP_RAVEN &&
+		     adev->rev_id >= 8))
+			gfx_v9_1_init_rlc_save_restore_list(adev);
 		gfx_v9_0_enable_save_restore_machine(adev);
 	}
 
@@ -3341,74 +3234,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 }
 
-static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
-	uint64_t queue_mask = 0;
-	int r, i;
-
-	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
-		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-			continue;
-
-		/* This situation may be hit in the future if a new HW
-		 * generation exposes more than 64 queues. If so, the
-		 * definition of queue_mask needs updating */
-		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
-			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
-			break;
-		}
-
-		queue_mask |= (1ull << i);
-	}
-
-	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
-	if (r) {
-		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-		return r;
-	}
-
-	/* set resources */
-	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
-	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
-			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
-	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
-	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
-	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
-	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
-	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
-	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
-		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-
-		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
-		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
-		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
-				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
-				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
-				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
-				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
-				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
-				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
-				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
-				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
-				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
-		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
-		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
-		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
-		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
-		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
-	}
-
-	r = amdgpu_ring_test_helper(kiq_ring);
-	if (r)
-		DRM_ERROR("KCQ enable failed\n");
-
-	return r;
-}
-
 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -3545,8 +3370,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
 	mqd->cp_hqd_ib_control = tmp;
 
-	/* activate the queue */
-	mqd->cp_hqd_active = 1;
+	/* map_queues packet doesn't need activate the queue,
+	 * so only kiq need set this field.
+	 */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+		mqd->cp_hqd_active = 1;
 
 	return 0;
 }
@@ -3815,7 +3643,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
 			goto done;
 	}
 
-	r = gfx_v9_0_kiq_kcq_enable(adev);
+	r = amdgpu_gfx_enable_kcq(adev);
 done:
 	return r;
 }
@@ -3872,6 +3700,23 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
+static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	if (adev->asic_type != CHIP_ARCTURUS)
+		return;
+
+	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
+	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
+				adev->df.hash_status.hash_64k);
+	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
+				adev->df.hash_status.hash_2m);
+	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
+				adev->df.hash_status.hash_1g);
+	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
+}
+
 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
 {
 	if (adev->asic_type != CHIP_ARCTURUS)
@@ -3889,9 +3734,7 @@ static int gfx_v9_0_hw_init(void *handle)
 
 	gfx_v9_0_constants_init(adev);
 
-	r = gfx_v9_0_csb_vram_pin(adev);
-	if (r)
-		return r;
+	gfx_v9_0_init_tcp_config(adev);
 
 	r = adev->gfx.rlc.funcs->resume(adev);
 	if (r)
@@ -3901,42 +3744,6 @@ static int gfx_v9_0_hw_init(void *handle)
 	if (r)
 		return r;
 
-	if (adev->asic_type != CHIP_ARCTURUS) {
-		r = gfx_v9_0_ngg_en(adev);
-		if (r)
-			return r;
-	}
-
-	return r;
-}
-
-static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
-{
-	int r, i;
-	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
-
-	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
-	if (r)
-		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-
-		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
-		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
-						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
-						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
-						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
-						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
-		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
-		amdgpu_ring_write(kiq_ring, 0);
-		amdgpu_ring_write(kiq_ring, 0);
-		amdgpu_ring_write(kiq_ring, 0);
-	}
-	r = amdgpu_ring_test_helper(kiq_ring);
-	if (r)
-		DRM_ERROR("KCQ disable failed\n");
-
 	return r;
 }
 
@@ -3948,8 +3755,10 @@ static int gfx_v9_0_hw_fini(void *handle)
 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 
-	/* disable KCQ to avoid CPC touch memory not valid anymore */
-	gfx_v9_0_kcq_disable(adev);
+	/* DF freeze and kcq disable will fail */
+	if (!amdgpu_ras_intr_triggered())
+		/* disable KCQ to avoid CPC touch memory not valid anymore */
+		amdgpu_gfx_disable_kcq(adev);
 
 	if (amdgpu_sriov_vf(adev)) {
 		gfx_v9_0_cp_gfx_enable(adev, false);
@@ -3978,8 +3787,6 @@ static int gfx_v9_0_hw_fini(void *handle)
 	gfx_v9_0_cp_enable(adev, false);
 	adev->gfx.rlc.funcs->stop(adev);
 
-	gfx_v9_0_csb_vram_unpin(adev);
-
 	return 0;
 }
 
@@ -4085,9 +3892,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 	uint64_t clock;
 
 	mutex_lock(&adev->gfx.gpu_clock_mutex);
-	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
-	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
-		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+	if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
+		uint32_t tmp, lsb, msb, i = 0;
+		do {
+			if (i != 0)
+				udelay(1);
+			tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+			lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
+			msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+			i++;
+		} while (unlikely(tmp != msb) && (i < adev->usec_timeout));
+		clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
+	} else {
+		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+	}
 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
 	return clock;
 }
@@ -4155,30 +3975,58 @@ static const u32 sgpr_init_compute_shader[] =
 	0xbe800080, 0xbf810000,
 };
 
+/* When below register arrays changed, please update gpr_reg_size,
+  and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
+  to cover all gfx9 ASICs */
 static const struct soc15_reg_entry vgpr_init_regs[] = {
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static const struct soc15_reg_entry sgpr1_init_regs[] = {
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
 };
 
-static const struct soc15_reg_entry sgpr_init_regs[] = {
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+static const struct soc15_reg_entry sgpr2_init_regs[] = {
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
 };
 
 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
@@ -4202,6 +4050,7 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = {
    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
+   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
@@ -4214,6 +4063,7 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = {
    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
+   { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
 };
 
 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
@@ -4221,6 +4071,10 @@ static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
 	int i, r;
 
+	/* only support when RAS is enabled */
+	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+		return 0;
+
 	r = amdgpu_ring_alloc(ring, 7);
 	if (r) {
 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
@@ -4268,6 +4122,12 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 	unsigned total_size, vgpr_offset, sgpr_offset;
 	u64 gpu_addr;
 
+	int compute_dim_x = adev->gfx.config.max_shader_engines *
+						adev->gfx.config.max_cu_per_sh *
+						adev->gfx.config.max_sh_per_se;
+	int sgpr_work_group_size = 5;
+	int gpr_reg_size = compute_dim_x / 16 + 6;
+
 	/* only support when RAS is enabled */
 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
 		return 0;
@@ -4277,9 +4137,11 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 		return 0;
 
 	total_size =
-		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
+	total_size +=
+		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
 	total_size +=
-		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
 	total_size = ALIGN(total_size, 256);
 	vgpr_offset = total_size;
 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
@@ -4306,7 +4168,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 
 	/* VGPR */
 	/* write the register state for the compute dispatch */
-	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
+	for (i = 0; i < gpr_reg_size; i++) {
 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
 								- PACKET3_SET_SH_REG_START;
@@ -4322,7 +4184,35 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 
 	/* write dispatch packet */
 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
-	ib.ptr[ib.length_dw++] = 128; /* x */
+	ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
+	ib.ptr[ib.length_dw++] = 1; /* y */
+	ib.ptr[ib.length_dw++] = 1; /* z */
+	ib.ptr[ib.length_dw++] =
+		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+	/* write CS partial flush packet */
+	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+	/* SGPR1 */
+	/* write the register state for the compute dispatch */
+	for (i = 0; i < gpr_reg_size; i++) {
+		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
+								- PACKET3_SET_SH_REG_START;
+		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
+	}
+	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
+	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+							- PACKET3_SET_SH_REG_START;
+	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+	/* write dispatch packet */
+	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
 	ib.ptr[ib.length_dw++] = 1; /* y */
 	ib.ptr[ib.length_dw++] = 1; /* z */
 	ib.ptr[ib.length_dw++] =
@@ -4332,13 +4222,13 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
 
-	/* SGPR */
+	/* SGPR2 */
 	/* write the register state for the compute dispatch */
-	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
+	for (i = 0; i < gpr_reg_size; i++) {
 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
-		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
+		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
 								- PACKET3_SET_SH_REG_START;
-		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
+		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
 	}
 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
@@ -4350,7 +4240,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 
 	/* write dispatch packet */
 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
-	ib.ptr[ib.length_dw++] = 128; /* x */
+	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
 	ib.ptr[ib.length_dw++] = 1; /* y */
 	ib.ptr[ib.length_dw++] = 1; /* z */
 	ib.ptr[ib.length_dw++] =
@@ -4403,6 +4293,7 @@ static int gfx_v9_0_early_init(void *handle)
 	else
 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+	gfx_v9_0_set_kiq_pm4_funcs(adev);
 	gfx_v9_0_set_ring_funcs(adev);
 	gfx_v9_0_set_irq_funcs(adev);
 	gfx_v9_0_set_gds_init(adev);
@@ -4411,34 +4302,11 @@ static int gfx_v9_0_early_init(void *handle)
 	return 0;
 }
 
-static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
-		struct ras_err_data *err_data,
-		struct amdgpu_iv_entry *entry);
-
 static int gfx_v9_0_ecc_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct ras_common_if **ras_if = &adev->gfx.ras_if;
-	struct ras_ih_if ih_info = {
-		.cb = gfx_v9_0_process_ras_data_cb,
-	};
-	struct ras_fs_if fs_info = {
-		.sysfs_name = "gfx_err_count",
-		.debugfs_name = "gfx_err_inject",
-	};
-	struct ras_common_if ras_block = {
-		.block = AMDGPU_RAS_BLOCK__GFX,
-		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
-		.sub_block_index = 0,
-		.name = "gfx",
-	};
 	int r;
 
-	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
-		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
-		return 0;
-	}
-
 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
 	if (r)
 		return r;
@@ -4448,72 +4316,11 @@ static int gfx_v9_0_ecc_late_init(void *handle)
 	if (r)
 		return r;
 
-	/* handle resume path. */
-	if (*ras_if) {
-		/* resend ras TA enable cmd during resume.
-		 * prepare to handle failure.
-		 */
-		ih_info.head = **ras_if;
-		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
-		if (r) {
-			if (r == -EAGAIN) {
-				/* request a gpu reset. will run again. */
-				amdgpu_ras_request_reset_on_boot(adev,
-						AMDGPU_RAS_BLOCK__GFX);
-				return 0;
-			}
-			/* fail to enable ras, cleanup all. */
-			goto irq;
-		}
-		/* enable successfully. continue. */
-		goto resume;
-	}
-
-	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
-	if (!*ras_if)
-		return -ENOMEM;
-
-	**ras_if = ras_block;
-
-	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
-	if (r) {
-		if (r == -EAGAIN) {
-			amdgpu_ras_request_reset_on_boot(adev,
-					AMDGPU_RAS_BLOCK__GFX);
-			r = 0;
-		}
-		goto feature;
-	}
-
-	ih_info.head = **ras_if;
-	fs_info.head = **ras_if;
-
-	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
+	r = amdgpu_gfx_ras_late_init(adev);
 	if (r)
-		goto interrupt;
-
-	amdgpu_ras_debugfs_create(adev, &fs_info);
-
-	r = amdgpu_ras_sysfs_create(adev, &fs_info);
-	if (r)
-		goto sysfs;
-resume:
-	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
-	if (r)
-		goto irq;
+		return r;
 
 	return 0;
-irq:
-	amdgpu_ras_sysfs_remove(adev, *ras_if);
-sysfs:
-	amdgpu_ras_debugfs_remove(adev, *ras_if);
-	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
-interrupt:
-	amdgpu_ras_feature_enable(adev, *ras_if, 0);
-feature:
-	kfree(*ras_if);
-	*ras_if = NULL;
-	return r;
 }
 
 static int gfx_v9_0_late_init(void *handle)
@@ -4578,16 +4385,14 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
 {
 	amdgpu_gfx_rlc_enter_safe_mode(adev);
 
-	if (is_support_sw_smu(adev) && !enable)
-		smu_set_gfx_cgpg(&adev->smu, enable);
-
 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
 	} else {
 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
-		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
+		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
 	}
 
 	amdgpu_gfx_rlc_exit_safe_mode(adev);
@@ -4856,8 +4661,6 @@ static int gfx_v9_0_set_powergating_state(void *handle,
 			gfx_v9_0_enable_cp_power_gating(adev, false);
 
 		/* update gfx cgpg state */
-		if (is_support_sw_smu(adev) && enable)
-			smu_set_gfx_cgpg(&adev->smu, enable);
 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
 
 		/* update mgcg state */
@@ -4988,7 +4791,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	u32 ref_and_mask, reg_mem_engine;
-	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
 
 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
 		switch (ring->me) {
@@ -5008,8 +4811,8 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	}
 
 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
-			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
-			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
+			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
 			      ref_and_mask, ref_and_mask, 0x20);
 }
 
@@ -5741,313 +5544,446 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
-		struct ras_err_data *err_data,
-		struct amdgpu_iv_entry *entry)
-{
-	/* TODO ue will trigger an interrupt. */
-	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
-	if (adev->gfx.funcs->query_ras_error_count)
-		adev->gfx.funcs->query_ras_error_count(adev, err_data);
-	amdgpu_ras_reset_gpu(adev, 0);
-	return AMDGPU_RAS_SUCCESS;
-}
-
-static const struct {
-	const char *name;
-	uint32_t ip;
-	uint32_t inst;
-	uint32_t seg;
-	uint32_t reg_offset;
-	uint32_t per_se_instance;
-	int32_t num_instance;
-	uint32_t sec_count_mask;
-	uint32_t ded_count_mask;
-} gfx_ras_edc_regs[] = {
-	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
-	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
-	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
-	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
-	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
-	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
-	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
-	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
-	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
-	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
-	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
-	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
-	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
-	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
-	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
-	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
-	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
-	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
-	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
-	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
-	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
-	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
-	  REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
-	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
-	  REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
-	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
-	  REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
-	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
-	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
-	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
-	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
-	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
+
+static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = {
+	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
+	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
+	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
+	},
+	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
+	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
+	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
+	},
+	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
+	  0, 0
+	},
+	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
+	  0, 0
+	},
+	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
+	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
+	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
+	},
+	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
+	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
+	  0, 0
+	},
+	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
+	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
+	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
+	},
+	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
+	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
+	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
+	},
+	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
+	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
+	  0, 0
+	},
+	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
+	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
+	  0, 0
+	},
+	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
+	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
+	  0, 0
+	},
+	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
+	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
+	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
+	},
+	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
+	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
+	  0, 0
+	},
 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
-	  0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
-	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
+	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
+	},
 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
-	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
-	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
-	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
+	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
+	},
 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
-	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
-	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
+	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
+	  0, 0
+	},
 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
-	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
-	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
-	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
+	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
+	},
 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
-	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
-	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
-	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
+	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
+	},
 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
-	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
-	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
-	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
+	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
+	},
 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
-	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
-	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
-	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
-	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
-	  REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
-	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
-	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
-	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
-	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
-	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
-	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
-	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
-	  REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
-	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
-	  REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
-	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
-	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
-	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
-	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
-	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
-	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
-	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
-	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
-	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
-	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
-	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
-	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
-	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
-	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
-	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
-	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
-	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
-	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
-	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
-	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
-	  16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
+	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
+	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
+	},
+	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
+	  0, 0
+	},
+	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
+	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
+	},
+	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
+	},
+	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
+	},
+	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
+	},
+	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
+	},
+	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
+	},
+	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
+	  0, 0
+	},
+	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
+	  0, 0
+	},
+	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
+	  0, 0
+	},
+	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
+	  0, 0
+	},
+	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
+	  0, 0
+	},
+	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
+	  0, 0
+	},
+	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
+	  0, 0
+	},
 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
-	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
-	  0 },
-	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
-	  16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
+	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
+	  0, 0
+	},
+	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
+	  0, 0
+	},
 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
-	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
-	  0 },
-	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
-	  16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
-	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
-	  REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
-	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
-	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
-	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
-	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
-	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
-	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
-	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
-	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
-	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
-	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
-	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
-	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
-	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
-	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
-	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
-	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
-	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
-	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
-	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
-	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
-	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
-	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
-	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
-	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
-	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
-	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
-	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
-	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
-	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
-	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
-	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
-	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
-	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
-	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
-	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
-	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
-	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
-	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
+	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
+	  0, 0
+	},
+	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
+	  0, 0
+	},
+	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
+	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
+	  0, 0
+	},
+	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
+	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
+	},
+	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
+	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
+	},
+	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
+	  0, 0
+	},
+	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
+	  0, 0
+	},
+	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
+	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
+	},
+	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
+	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
+	},
+	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
+	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
+	},
+	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
+	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
+	},
+	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
+	},
+	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
+	},
+	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
+	},
+	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
+	},
+	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
+	},
+	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
+	},
+	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
+	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
+	},
 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
-	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
-	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
+	},
+	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
+	},
 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
-	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
-	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
+	},
+	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
+	},
 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
-	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
-	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
-	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
-	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
-	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
-	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
-	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO",
-	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
-	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
-	  0 },
-	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
-	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
-	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
-	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM",
-	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
-	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
-	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
-	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
-	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
-	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
-	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
-	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
-	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
-	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO",
-	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
-	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
-	  0 },
-	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
-	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
-	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
-	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
-	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM",
-	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
-	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
-	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
-	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
-	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
-	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
-	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
-	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
-	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
-	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
-	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
-	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
-	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
-	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
-	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
-	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
-	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
-	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
-	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
-	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
-	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
-	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
-	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
-	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
-	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
-	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
-	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
+	},
+	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
+	},
+	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
+	},
+	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
+	},
+	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
+	},
+	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
+	},
+	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
+	  0, 0
+	},
+	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
+	},
+	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
+	},
+	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
+	},
+	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
+	},
+	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
+	},
+	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
+	  0, 0
+	},
+	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
+	  0, 0
+	},
+	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
+	},
+	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
+	},
+	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
+	},
+	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
+	},
+	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
+	},
+	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+	  0, 0
+	},
+	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+	  0, 0
+	},
+	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+	  0, 0
+	},
+	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+	  0, 0
+	},
+	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+	  0, 0
+	},
+	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
+	},
+	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
+	},
+	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
+	},
+	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+	  0, 0
+	},
+	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+	  0, 0
+	},
+	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+	  0, 0
+	},
+	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+	  0, 0
+	},
+	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+	  0, 0
+	},
+	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+	  0, 0
+	}
 };
 
 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
@@ -6096,14 +6032,217 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
 	return ret;
 }
 
+static const char *vml2_mems[] = {
+	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
+	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
+	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
+	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
+	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
+	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
+	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
+	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
+	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
+	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
+	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
+	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
+	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
+	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
+	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
+	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
+};
+
+static const char *vml2_walker_mems[] = {
+	"UTC_VML2_CACHE_PDE0_MEM0",
+	"UTC_VML2_CACHE_PDE0_MEM1",
+	"UTC_VML2_CACHE_PDE1_MEM0",
+	"UTC_VML2_CACHE_PDE1_MEM1",
+	"UTC_VML2_CACHE_PDE2_MEM0",
+	"UTC_VML2_CACHE_PDE2_MEM1",
+	"UTC_VML2_RDIF_LOG_FIFO",
+};
+
+static const char *atc_l2_cache_2m_mems[] = {
+	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
+	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
+	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
+	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
+};
+
+static const char *atc_l2_cache_4k_mems[] = {
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
+	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
+	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
+};
+
+static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
+					 struct ras_err_data *err_data)
+{
+	uint32_t i, data;
+	uint32_t sec_count, ded_count;
+
+	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
+	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
+	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
+	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
+
+	for (i = 0; i < 16; i++) {
+		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
+		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
+
+		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
+		if (sec_count) {
+			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+				 vml2_mems[i], sec_count);
+			err_data->ce_count += sec_count;
+		}
+
+		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
+		if (ded_count) {
+			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+				 vml2_mems[i], ded_count);
+			err_data->ue_count += ded_count;
+		}
+	}
+
+	for (i = 0; i < 7; i++) {
+		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
+		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
+
+		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
+						SEC_COUNT);
+		if (sec_count) {
+			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+				 vml2_walker_mems[i], sec_count);
+			err_data->ce_count += sec_count;
+		}
+
+		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
+						DED_COUNT);
+		if (ded_count) {
+			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+				 vml2_walker_mems[i], ded_count);
+			err_data->ue_count += ded_count;
+		}
+	}
+
+	for (i = 0; i < 4; i++) {
+		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
+		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
+
+		sec_count = (data & 0x00006000L) >> 0xd;
+		if (sec_count) {
+			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+				 atc_l2_cache_2m_mems[i], sec_count);
+			err_data->ce_count += sec_count;
+		}
+	}
+
+	for (i = 0; i < 32; i++) {
+		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
+		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
+
+		sec_count = (data & 0x00006000L) >> 0xd;
+		if (sec_count) {
+			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+				 atc_l2_cache_4k_mems[i], sec_count);
+			err_data->ce_count += sec_count;
+		}
+
+		ded_count = (data & 0x00018000L) >> 0xf;
+		if (ded_count) {
+			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+				 atc_l2_cache_4k_mems[i], ded_count);
+			err_data->ue_count += ded_count;
+		}
+	}
+
+	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+
+	return 0;
+}
+
+static int __get_ras_error_count(const struct soc15_reg_entry *reg,
+	uint32_t se_id, uint32_t inst_id, uint32_t value,
+	uint32_t *sec_count, uint32_t *ded_count)
+{
+	uint32_t i;
+	uint32_t sec_cnt, ded_cnt;
+
+	for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) {
+		if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset ||
+			gc_ras_fields_vg20[i].seg != reg->seg ||
+			gc_ras_fields_vg20[i].inst != reg->inst)
+			continue;
+
+		sec_cnt = (value &
+				gc_ras_fields_vg20[i].sec_count_mask) >>
+				gc_ras_fields_vg20[i].sec_count_shift;
+		if (sec_cnt) {
+			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
+				gc_ras_fields_vg20[i].name,
+				se_id, inst_id,
+				sec_cnt);
+			*sec_count += sec_cnt;
+		}
+
+		ded_cnt = (value &
+				gc_ras_fields_vg20[i].ded_count_mask) >>
+				gc_ras_fields_vg20[i].ded_count_shift;
+		if (ded_cnt) {
+			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
+				gc_ras_fields_vg20[i].name,
+				se_id, inst_id,
+				ded_cnt);
+			*ded_count += ded_cnt;
+		}
+	}
+
+	return 0;
+}
+
 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 					  void *ras_error_status)
 {
 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-	uint32_t sec_count, ded_count;
-	uint32_t i;
+	uint32_t sec_count = 0, ded_count = 0;
+	uint32_t i, j, k;
 	uint32_t reg_value;
-	uint32_t se_id, instance_id;
 
 	if (adev->asic_type != CHIP_VEGA20)
 		return -EINVAL;
@@ -6112,71 +6251,29 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 	err_data->ce_count = 0;
 
 	mutex_lock(&adev->grbm_idx_mutex);
-	for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
-		for (instance_id = 0; instance_id < 256; instance_id++) {
-			for (i = 0;
-			     i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
-			     i++) {
-				if (se_id != 0 &&
-				    !gfx_ras_edc_regs[i].per_se_instance)
-					continue;
-				if (instance_id >= gfx_ras_edc_regs[i].num_instance)
-					continue;
 
-				gfx_v9_0_select_se_sh(adev, se_id, 0,
-						      instance_id);
-
-				reg_value = RREG32(
-					adev->reg_offset[gfx_ras_edc_regs[i].ip]
-							[gfx_ras_edc_regs[i].inst]
-							[gfx_ras_edc_regs[i].seg] +
-					gfx_ras_edc_regs[i].reg_offset);
-				sec_count = reg_value &
-					    gfx_ras_edc_regs[i].sec_count_mask;
-				ded_count = reg_value &
-					    gfx_ras_edc_regs[i].ded_count_mask;
-				if (sec_count) {
-					DRM_INFO(
-						"Instance[%d][%d]: SubBlock %s, SEC %d\n",
-						se_id, instance_id,
-						gfx_ras_edc_regs[i].name,
-						sec_count);
-					err_data->ce_count++;
-				}
-
-				if (ded_count) {
-					DRM_INFO(
-						"Instance[%d][%d]: SubBlock %s, DED %d\n",
-						se_id, instance_id,
-						gfx_ras_edc_regs[i].name,
-						ded_count);
-					err_data->ue_count++;
-				}
+	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
+		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
+			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
+				gfx_v9_0_select_se_sh(adev, j, 0, k);
+				reg_value =
+					RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+				if (reg_value)
+					__get_ras_error_count(&sec_ded_counter_registers[i],
+							j, k, reg_value,
+							&sec_count, &ded_count);
 			}
 		}
 	}
-	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-	mutex_unlock(&adev->grbm_idx_mutex);
 
-	return 0;
-}
+	err_data->ce_count += sec_count;
+	err_data->ue_count += ded_count;
 
-static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
-				  struct amdgpu_irq_src *source,
-				  struct amdgpu_iv_entry *entry)
-{
-	struct ras_common_if *ras_if = adev->gfx.ras_if;
-	struct ras_dispatch_if ih_data = {
-		.entry = entry,
-	};
-
-	if (!ras_if)
-		return 0;
+	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	mutex_unlock(&adev->grbm_idx_mutex);
 
-	ih_data.head = *ras_if;
+	gfx_v9_0_query_utc_edc_status(adev, err_data);
 
-	DRM_ERROR("CP ECC ERROR IRQ\n");
-	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
 	return 0;
 }
 
@@ -6343,7 +6440,7 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
 
 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
 	.set = gfx_v9_0_set_cp_ecc_error_state,
-	.process = gfx_v9_0_cp_ecc_error_irq,
+	.process = amdgpu_gfx_cp_ecc_error_irq,
 };
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 6ce37ce77d14..1a2f18b908fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -75,40 +75,45 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 	WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
 	WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
 
-	/* Program the system aperture low logical page number. */
-	WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
-		     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
-
-	if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
-		/*
-		 * Raven2 has a HW issue that it is unable to use the vram which
-		 * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
-		 * workaround that increase system aperture high address (add 1)
-		 * to get rid of the VM fault and hardware hang.
-		 */
-		WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-			     max((adev->gmc.fb_end >> 18) + 0x1,
-				 adev->gmc.agp_end >> 18));
-	else
-		WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-			     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
-
-	/* Set default page address. */
-	value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
-		+ adev->vm_manager.vram_base_offset;
-	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
-		     (u32)(value >> 12));
-	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
-		     (u32)(value >> 44));
-
-	/* Program "protection fault". */
-	WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
-		     (u32)(adev->dummy_page_addr >> 12));
-	WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
-		     (u32)((u64)adev->dummy_page_addr >> 44));
-
-	WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
-		       ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+	if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+		/* Program the system aperture low logical page number. */
+		WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+			min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+		if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
+			/*
+			* Raven2 has a HW issue that it is unable to use the
+			* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
+			* So here is the workaround that increase system
+			* aperture high address (add 1) to get rid of the VM
+			* fault and hardware hang.
+			*/
+			WREG32_SOC15_RLC(GC, 0,
+					 mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+					 max((adev->gmc.fb_end >> 18) + 0x1,
+					     adev->gmc.agp_end >> 18));
+		else
+			WREG32_SOC15_RLC(
+				GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+				max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+		/* Set default page address. */
+		value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
+			adev->vm_manager.vram_base_offset;
+		WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+			     (u32)(value >> 12));
+		WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+			     (u32)(value >> 44));
+
+		/* Program "protection fault". */
+		WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+			     (u32)(adev->dummy_page_addr >> 12));
+		WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+			     (u32)((u64)adev->dummy_page_addr >> 44));
+
+		WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
+			       ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+	}
 }
 
 static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
@@ -178,6 +183,8 @@ static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
 	tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+			    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
 	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp);
 }
 
@@ -262,7 +269,7 @@ static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
 
 int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
 {
-	if (amdgpu_sriov_vf(adev)) {
+	if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) {
 		/*
 		 * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
 		 * VF copy registers so vbios post doesn't program them, for
@@ -278,10 +285,12 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
 	gfxhub_v1_0_init_gart_aperture_regs(adev);
 	gfxhub_v1_0_init_system_aperture_regs(adev);
 	gfxhub_v1_0_init_tlb_regs(adev);
-	gfxhub_v1_0_init_cache_regs(adev);
+	if (!amdgpu_sriov_vf(adev))
+		gfxhub_v1_0_init_cache_regs(adev);
 
 	gfxhub_v1_0_enable_system_domain(adev);
-	gfxhub_v1_0_disable_identity_aperture(adev);
+	if (!amdgpu_sriov_vf(adev))
+		gfxhub_v1_0_disable_identity_aperture(adev);
 	gfxhub_v1_0_setup_vmid_config(adev);
 	gfxhub_v1_0_program_invalidation(adev);
 
@@ -365,6 +374,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev)
 	hub->ctx0_ptb_addr_hi32 =
 		SOC15_REG_OFFSET(GC, 0,
 				 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+	hub->vm_inv_eng0_sem =
+		SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM);
 	hub->vm_inv_eng0_req =
 		SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ);
 	hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
index 5e9ab8eb214a..c0ab71df0d90 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
@@ -33,16 +33,31 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
 	u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);
 	u32 max_region =
 		REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+	u32 max_num_physical_nodes   = 0;
+	u32 max_physical_node_id     = 0;
+
+	switch (adev->asic_type) {
+	case CHIP_VEGA20:
+		max_num_physical_nodes   = 4;
+		max_physical_node_id     = 3;
+		break;
+	case CHIP_ARCTURUS:
+		max_num_physical_nodes   = 8;
+		max_physical_node_id     = 7;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	/* PF_MAX_REGION=0 means xgmi is disabled */
 	if (max_region) {
 		adev->gmc.xgmi.num_physical_nodes = max_region + 1;
-		if (adev->gmc.xgmi.num_physical_nodes > 4)
+		if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
 			return -EINVAL;
 
 		adev->gmc.xgmi.physical_node_id =
 			REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
-		if (adev->gmc.xgmi.physical_node_id > 3)
+		if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
 			return -EINVAL;
 		adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
 			RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index db10640a3b2f..b70c7b483c24 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -46,21 +46,25 @@ u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev)
 	return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24;
 }
 
-static void gfxhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev)
+void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+				uint64_t page_table_base)
 {
-	uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
+	/* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */
+	int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
+			- mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
 
+	WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+				offset * vmid, lower_32_bits(page_table_base));
 
-	WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
-		     lower_32_bits(value));
-
-	WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
-		     upper_32_bits(value));
+	WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+				offset * vmid, upper_32_bits(page_table_base));
 }
 
 static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
 {
-	gfxhub_v2_0_init_gart_pt_regs(adev);
+	uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+	gfxhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
 
 	WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
 		     (u32)(adev->gmc.gart_start >> 12));
@@ -175,6 +179,8 @@ static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
 	tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL);
 	tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+	tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+			    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
 	WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp);
 }
 
@@ -350,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev)
 	hub->ctx0_ptb_addr_hi32 =
 		SOC15_REG_OFFSET(GC, 0,
 				 mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+	hub->vm_inv_eng0_sem =
+		SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM);
 	hub->vm_inv_eng0_req =
 		SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
 	hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
index 06807940748b..392b8cd94fc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
@@ -31,5 +31,7 @@ void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,
 					  bool value);
 void gfxhub_v2_0_init(struct amdgpu_device *adev);
 u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev);
+void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+				uint64_t page_table_base);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 5c7d5f73f54f..bbede09983e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -30,6 +30,8 @@
 #include "hdp/hdp_5_0_0_sh_mask.h"
 #include "gc/gc_10_1_0_sh_mask.h"
 #include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_sh_mask.h"
 #include "oss/osssys_5_0_0_offset.h"
@@ -37,6 +39,7 @@
 #include "navi10_enum.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 
 #include "nbio_v2_3.h"
@@ -219,6 +222,34 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid,
 	return req;
 }
 
+/**
+ * gmc_v10_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+				       uint32_t vmhub)
+{
+	return ((vmhub == AMDGPU_MMHUB_0 ||
+		 vmhub == AMDGPU_MMHUB_1) &&
+		(!amdgpu_sriov_vf(adev)));
+}
+
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
+					struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -229,12 +260,35 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid,
 static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 				   unsigned int vmhub, uint32_t flush_type)
 {
+	bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
 	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
 	u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
 	/* Use register 17 for GART */
 	const unsigned eng = 17;
 	unsigned int i;
 
+	spin_lock(&adev->gmc.invalidate_lock);
+	/*
+	 * It may lose gpuvm invalidate acknowldege state across power-gating
+	 * off cycle, add semaphore acquire before invalidation and semaphore
+	 * release after invalidation to avoid entering power gated state
+	 * to WA the Issue
+	 */
+
+	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+	if (use_semaphore) {
+		for (i = 0; i < adev->usec_timeout; i++) {
+			/* a read return value of 1 means semaphore acuqire */
+			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+			if (tmp & 0x1)
+				break;
+			udelay(1);
+		}
+
+		if (i >= adev->usec_timeout)
+			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+	}
+
 	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
 
 	/*
@@ -254,6 +308,16 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 		udelay(1);
 	}
 
+	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+	if (use_semaphore)
+		/*
+		 * add semaphore release after invalidation,
+		 * write with 0 means semaphore release
+		 */
+		WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
+
+	spin_unlock(&adev->gmc.invalidate_lock);
+
 	if (i < adev->usec_timeout)
 		return;
 
@@ -278,7 +342,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	int r;
 
 	/* flush hdp cache */
-	adev->nbio_funcs->hdp_flush(adev, NULL);
+	adev->nbio.funcs->hdp_flush(adev, NULL);
 
 	mutex_lock(&adev->mman.gtt_window_lock);
 
@@ -292,7 +356,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 
 	if (!adev->mman.buffer_funcs_enabled ||
 	    !adev->ib_pool_ready ||
-	    adev->in_gpu_reset) {
+	    adev->in_gpu_reset ||
+	    ring->sched.ready == false) {
 		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
 		mutex_unlock(&adev->mman.gtt_window_lock);
 		return;
@@ -331,13 +396,84 @@ error_alloc:
 	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
 }
 
+/**
+ * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid, i;
+	signed long r;
+	uint32_t seq;
+	uint16_t queried_pasid;
+	bool ret;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	if (amdgpu_emu_mode == 0 && ring->sched.ready) {
+		spin_lock(&adev->gfx.kiq.ring_lock);
+		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
+		kiq->pmf->kiq_invalidate_tlbs(ring,
+					pasid, flush_type, all_hub);
+		amdgpu_fence_emit_polling(ring, &seq);
+		amdgpu_ring_commit(ring);
+		spin_unlock(&adev->gfx.kiq.ring_lock);
+		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+		if (r < 1) {
+			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+			return -ETIME;
+		}
+
+		return 0;
+	}
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+				&queried_pasid);
+		if (ret	&& queried_pasid == pasid) {
+			if (all_hub) {
+				for (i = 0; i < adev->num_vmhubs; i++)
+					gmc_v10_0_flush_gpu_tlb(adev, vmid,
+							i, 0);
+			} else {
+				gmc_v10_0_flush_gpu_tlb(adev, vmid,
+						AMDGPU_GFXHUB_0, 0);
+			}
+			break;
+		}
+	}
+
+	return 0;
+}
+
 static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					     unsigned vmid, uint64_t pd_addr)
 {
+	bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0);
 	unsigned eng = ring->vm_inv_eng;
 
+	/*
+	 * It may lose gpuvm invalidate acknowldege state across power-gating
+	 * off cycle, add semaphore acquire before invalidation and semaphore
+	 * release after invalidation to avoid entering power gated state
+	 * to WA the Issue
+	 */
+
+	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+	if (use_semaphore)
+		/* a read return value of 1 means semaphore acuqire */
+		amdgpu_ring_emit_reg_wait(ring,
+					  hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
+
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
 			      lower_32_bits(pd_addr));
 
@@ -348,6 +484,14 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					    hub->vm_inv_eng0_ack + eng,
 					    req, 1 << vmid);
 
+	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+	if (use_semaphore)
+		/*
+		 * add semaphore release after invalidation,
+		 * write with 0 means semaphore release
+		 */
+		amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
+
 	return pd_addr;
 }
 
@@ -396,43 +540,23 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
  * 1 system
  * 0 valid
  */
-static uint64_t gmc_v10_0_get_vm_pte_flags(struct amdgpu_device *adev,
-					   uint32_t flags)
-{
-	uint64_t pte_flag = 0;
 
-	if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
-		pte_flag |= AMDGPU_PTE_EXECUTABLE;
-	if (flags & AMDGPU_VM_PAGE_READABLE)
-		pte_flag |= AMDGPU_PTE_READABLE;
-	if (flags & AMDGPU_VM_PAGE_WRITEABLE)
-		pte_flag |= AMDGPU_PTE_WRITEABLE;
-
-	switch (flags & AMDGPU_VM_MTYPE_MASK) {
+static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
+{
+	switch (flags) {
 	case AMDGPU_VM_MTYPE_DEFAULT:
-		pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
-		break;
+		return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
 	case AMDGPU_VM_MTYPE_NC:
-		pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
-		break;
+		return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
 	case AMDGPU_VM_MTYPE_WC:
-		pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
-		break;
+		return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
 	case AMDGPU_VM_MTYPE_CC:
-		pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
-		break;
+		return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
 	case AMDGPU_VM_MTYPE_UC:
-		pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
-		break;
+		return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
 	default:
-		pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
-		break;
+		return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
 	}
-
-	if (flags & AMDGPU_VM_PAGE_PRT)
-		pte_flag |= AMDGPU_PTE_PRT;
-
-	return pte_flag;
 }
 
 static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
@@ -459,12 +583,33 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
 	}
 }
 
+static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
+				 struct amdgpu_bo_va_mapping *mapping,
+				 uint64_t *flags)
+{
+	*flags &= ~AMDGPU_PTE_EXECUTABLE;
+	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+
+	*flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
+	*flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+
+	if (mapping->flags & AMDGPU_PTE_PRT) {
+		*flags |= AMDGPU_PTE_PRT;
+		*flags |= AMDGPU_PTE_SNOOPED;
+		*flags |= AMDGPU_PTE_LOG;
+		*flags |= AMDGPU_PTE_SYSTEM;
+		*flags &= ~AMDGPU_PTE_VALID;
+	}
+}
+
 static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
-	.get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags,
-	.get_vm_pde = gmc_v10_0_get_vm_pde
+	.map_mtype = gmc_v10_0_map_mtype,
+	.get_vm_pde = gmc_v10_0_get_vm_pde,
+	.get_vm_pte = gmc_v10_0_get_vm_pte
 };
 
 static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -493,22 +638,18 @@ static int gmc_v10_0_early_init(void *handle)
 static int gmc_v10_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 };
-	unsigned i;
-
-	for(i = 0; i < adev->num_rings; ++i) {
-		struct amdgpu_ring *ring = adev->rings[i];
-		unsigned vmhub = ring->funcs->vmhub;
+	int r;
 
-		ring->vm_inv_eng = vm_inv_eng[vmhub]++;
-		dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n",
-			 ring->idx, ring->name, ring->vm_inv_eng,
-			 ring->funcs->vmhub);
-	}
+	/*
+	 * Can't free the stolen VGA memory when it might be used for memory
+	 * training again.
+	 */
+	if (!adev->fw_vram_usage.mem_train_support)
+		amdgpu_bo_late_init(adev);
 
-	/* Engine 17 is used for GART flushes */
-	for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
-		BUG_ON(vm_inv_eng[i] > 17);
+	r = amdgpu_gmc_allocate_vm_inv_eng(adev);
+	if (r)
+		return r;
 
 	return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 }
@@ -518,8 +659,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
 {
 	u64 base = 0;
 
-	if (!amdgpu_sriov_vf(adev))
-		base = gfxhub_v2_0_get_fb_location(adev);
+	base = gfxhub_v2_0_get_fb_location(adev);
 
 	amdgpu_gmc_vram_location(adev, &adev->gmc, base);
 	amdgpu_gmc_gart_location(adev, mc);
@@ -539,24 +679,13 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
  */
 static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
 {
-	int chansize, numchan;
-
-	if (!amdgpu_emu_mode)
-		adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
-	else {
-		/* hard code vram_width for emulation */
-		chansize = 128;
-		numchan = 1;
-		adev->gmc.vram_width = numchan * chansize;
-	}
-
 	/* Could aper size report 0 ? */
 	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
 	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
 
 	/* size in MB on si */
 	adev->gmc.mc_vram_size =
-		adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+		adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
 	adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
 	adev->gmc.visible_vram_size = adev->gmc.aper_size;
 
@@ -635,7 +764,7 @@ static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
 
 static int gmc_v10_0_sw_init(void *handle)
 {
-	int r;
+	int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	gfxhub_v2_0_init(adev);
@@ -643,7 +772,15 @@ static int gmc_v10_0_sw_init(void *handle)
 
 	spin_lock_init(&adev->gmc.invalidate_lock);
 
-	adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
+	r = amdgpu_atomfirmware_get_vram_info(adev,
+		&vram_width, &vram_type, &vram_vendor);
+	if (!amdgpu_emu_mode)
+		adev->gmc.vram_width = vram_width;
+	else
+		adev->gmc.vram_width = 1 * 128; /* numchan * chansize */
+
+	adev->gmc.vram_type = vram_type;
+	adev->gmc.vram_vendor = vram_vendor;
 	switch (adev->asic_type) {
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
@@ -664,6 +801,10 @@ static int gmc_v10_0_sw_init(void *handle)
 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC,
 			      VMC_1_0__SRCID__VM_FAULT,
 			      &adev->gmc.vm_fault);
+
+	if (r)
+		return r;
+
 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2,
 			      UTCL2_1_0__SRCID__FAULT,
 			      &adev->gmc.vm_fault);
@@ -676,15 +817,6 @@ static int gmc_v10_0_sw_init(void *handle)
 	 */
 	adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
 
-	/*
-	 * Reserve 8M stolen memory for navi10 like vega10
-	 * TODO: will check if it's really needed on asic.
-	 */
-	if (amdgpu_emu_mode == 1)
-		adev->gmc.stolen_size = 0;
-	else
-		adev->gmc.stolen_size = 9 * 1024 *1024;
-
 	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
 	if (r) {
 		printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
@@ -697,6 +829,19 @@ static int gmc_v10_0_sw_init(void *handle)
 
 	adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev);
 
+	/*
+	 * In dual GPUs scenario, stolen_size is assigned to zero on the
+	 * secondary GPU, since there is no pre-OS console using that memory.
+	 * Then the bottom region of VRAM was allocated as GTT, unfortunately a
+	 * small region of bottom VRAM was encroached by UMC firmware during
+	 * GDDR6 BIST training, this cause page fault.
+	 * The page fault can be fixed by forcing stolen_size to 3MB, then the
+	 * bottom region of VRAM was allocated as stolen memory, GTT corruption
+	 * avoid.
+	 */
+	adev->gmc.stolen_size = max(adev->gmc.stolen_size,
+				    AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE);
+
 	/* Memory manager */
 	r = amdgpu_bo_init(adev);
 	if (r)
@@ -736,6 +881,13 @@ static void gmc_v10_0_gart_fini(struct amdgpu_device *adev)
 static int gmc_v10_0_sw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	void *stolen_vga_buf;
+
+	/*
+	 * Free the stolen memory if it wasn't already freed in late_init
+	 * because of memory training.
+	 */
+	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
 
 	amdgpu_vm_manager_fini(adev);
 	gmc_v10_0_gart_fini(adev);
@@ -793,7 +945,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
 	WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
 
 	/* Flush HDP after it is initialized */
-	adev->nbio_funcs->hdp_flush(adev, NULL);
+	adev->nbio.funcs->hdp_flush(adev, NULL);
 
 	value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
 		false : true;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 9fb1765e92d1..b205039350b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -386,27 +386,20 @@ static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	return pd_addr;
 }
 
-static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
-					  uint32_t flags)
-{
-	uint64_t pte_flag = 0;
-
-	if (flags & AMDGPU_VM_PAGE_READABLE)
-		pte_flag |= AMDGPU_PTE_READABLE;
-	if (flags & AMDGPU_VM_PAGE_WRITEABLE)
-		pte_flag |= AMDGPU_PTE_WRITEABLE;
-	if (flags & AMDGPU_VM_PAGE_PRT)
-		pte_flag |= AMDGPU_PTE_PRT;
-
-	return pte_flag;
-}
-
 static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
 				uint64_t *addr, uint64_t *flags)
 {
 	BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
+static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev,
+				struct amdgpu_bo_va_mapping *mapping,
+				uint64_t *flags)
+{
+	*flags &= ~AMDGPU_PTE_EXECUTABLE;
+	*flags &= ~AMDGPU_PTE_PRT;
+}
+
 static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
 					      bool value)
 {
@@ -1153,7 +1146,7 @@ static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = {
 	.emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb,
 	.set_prt = gmc_v6_0_set_prt,
 	.get_vm_pde = gmc_v6_0_get_vm_pde,
-	.get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
+	.get_vm_pte = gmc_v6_0_get_vm_pte,
 };
 
 static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 0c3d9bc3a641..19d5b133e1d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid;
+	unsigned int tmp;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -463,27 +495,20 @@ static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
 	amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
 }
 
-static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
-					  uint32_t flags)
-{
-	uint64_t pte_flag = 0;
-
-	if (flags & AMDGPU_VM_PAGE_READABLE)
-		pte_flag |= AMDGPU_PTE_READABLE;
-	if (flags & AMDGPU_VM_PAGE_WRITEABLE)
-		pte_flag |= AMDGPU_PTE_WRITEABLE;
-	if (flags & AMDGPU_VM_PAGE_PRT)
-		pte_flag |= AMDGPU_PTE_PRT;
-
-	return pte_flag;
-}
-
 static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
 				uint64_t *addr, uint64_t *flags)
 {
 	BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
+static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev,
+				struct amdgpu_bo_va_mapping *mapping,
+				uint64_t *flags)
+{
+	*flags &= ~AMDGPU_PTE_EXECUTABLE;
+	*flags &= ~AMDGPU_PTE_PRT;
+}
+
 /**
  * gmc_v8_0_set_fault_enable_default - update VM fault handling
  *
@@ -1340,11 +1365,12 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
 
 static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
 	.set_prt = gmc_v7_0_set_prt,
-	.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
-	.get_vm_pde = gmc_v7_0_get_vm_pde
+	.get_vm_pde = gmc_v7_0_get_vm_pde,
+	.get_vm_pte = gmc_v7_0_get_vm_pte
 };
 
 static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index ea764dd9245d..27d83204fa2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid;
+	unsigned int tmp;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -686,29 +719,21 @@ static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
  * 0 valid
  */
 
-static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
-					  uint32_t flags)
-{
-	uint64_t pte_flag = 0;
-
-	if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
-		pte_flag |= AMDGPU_PTE_EXECUTABLE;
-	if (flags & AMDGPU_VM_PAGE_READABLE)
-		pte_flag |= AMDGPU_PTE_READABLE;
-	if (flags & AMDGPU_VM_PAGE_WRITEABLE)
-		pte_flag |= AMDGPU_PTE_WRITEABLE;
-	if (flags & AMDGPU_VM_PAGE_PRT)
-		pte_flag |= AMDGPU_PTE_PRT;
-
-	return pte_flag;
-}
-
 static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
 				uint64_t *addr, uint64_t *flags)
 {
 	BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
+static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev,
+				struct amdgpu_bo_va_mapping *mapping,
+				uint64_t *flags)
+{
+	*flags &= ~AMDGPU_PTE_EXECUTABLE;
+	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+	*flags &= ~AMDGPU_PTE_PRT;
+}
+
 /**
  * gmc_v8_0_set_fault_enable_default - update VM fault handling
  *
@@ -1708,11 +1733,12 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
 
 static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
 	.set_prt = gmc_v8_0_set_prt,
-	.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
-	.get_vm_pde = gmc_v8_0_get_vm_pde
+	.get_vm_pde = gmc_v8_0_get_vm_pde,
+	.get_vm_pte = gmc_v8_0_get_vm_pte
 };
 
 static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f91337030dc0..40a496804356 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -38,10 +38,12 @@
 #include "dce/dce_12_0_sh_mask.h"
 #include "vega10_enum.h"
 #include "mmhub/mmhub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
 #include "athub/athub_1_0_offset.h"
 #include "oss/osssys_4_0_offset.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 #include "umc/umc_6_0_sh_mask.h"
 
@@ -51,10 +53,12 @@
 #include "gfxhub_v1_1.h"
 #include "mmhub_v9_4.h"
 #include "umc_v6_1.h"
+#include "umc_v6_0.h"
 
 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
 
 #include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
 
 /* add these here since we already include dce12 headers and these are for DCN */
 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
@@ -205,6 +209,11 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
 {
 	u32 bits, i, tmp, reg;
 
+	/* Devices newer then VEGA10/12 shall have these programming
+	     sequences performed by PSP BL */
+	if (adev->asic_type >= CHIP_VEGA20)
+		return 0;
+
 	bits = 0x7f;
 
 	switch (state) {
@@ -243,44 +252,6 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
-		struct ras_err_data *err_data,
-		struct amdgpu_iv_entry *entry)
-{
-	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
-	if (adev->umc.funcs->query_ras_error_count)
-		adev->umc.funcs->query_ras_error_count(adev, err_data);
-	/* umc query_ras_error_address is also responsible for clearing
-	 * error status
-	 */
-	if (adev->umc.funcs->query_ras_error_address)
-		adev->umc.funcs->query_ras_error_address(adev, err_data);
-
-	/* only uncorrectable error needs gpu reset */
-	if (err_data->ue_count)
-		amdgpu_ras_reset_gpu(adev, 0);
-
-	return AMDGPU_RAS_SUCCESS;
-}
-
-static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev,
-		struct amdgpu_irq_src *source,
-		struct amdgpu_iv_entry *entry)
-{
-	struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
-	struct ras_dispatch_if ih_data = {
-		.entry = entry,
-	};
-
-	if (!ras_if)
-		return 0;
-
-	ih_data.head = *ras_if;
-
-	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
-	return 0;
-}
-
 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *src,
 					unsigned type,
@@ -355,6 +326,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	}
 
 	/* If it's the first fault for this address, process it normally */
+	if (retry_fault && !in_interrupt() &&
+	    amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+		return 1; /* This also prevents sending it to KFD */
+
 	if (!amdgpu_sriov_vf(adev)) {
 		/*
 		 * Issue a dummy read to wait for the status register to
@@ -417,7 +392,7 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
 
 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
 	.set = gmc_v9_0_ecc_interrupt_state,
-	.process = gmc_v9_0_process_ecc_irq,
+	.process = amdgpu_umc_process_ecc_irq,
 };
 
 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
@@ -425,8 +400,10 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
 	adev->gmc.vm_fault.num_types = 1;
 	adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
 
-	adev->gmc.ecc_irq.num_types = 1;
-	adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
+	if (!amdgpu_sriov_vf(adev)) {
+		adev->gmc.ecc_irq.num_types = 1;
+		adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
+	}
 }
 
 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
@@ -448,6 +425,36 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
 	return req;
 }
 
+/**
+ * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+				       uint32_t vmhub)
+{
+	return ((vmhub == AMDGPU_MMHUB_0 ||
+		 vmhub == AMDGPU_MMHUB_1) &&
+		(!amdgpu_sriov_vf(adev)) &&
+		(!(adev->asic_type == CHIP_RAVEN &&
+		   adev->rev_id < 0x8 &&
+		   adev->pdev->device == 0x15d8)));
+}
+
+static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -467,6 +474,7 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 					uint32_t vmhub, uint32_t flush_type)
 {
+	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
 	const unsigned eng = 17;
 	u32 j, tmp;
 	struct amdgpu_vmhub *hub;
@@ -491,6 +499,28 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	}
 
 	spin_lock(&adev->gmc.invalidate_lock);
+
+	/*
+	 * It may lose gpuvm invalidate acknowldege state across power-gating
+	 * off cycle, add semaphore acquire before invalidation and semaphore
+	 * release after invalidation to avoid entering power gated state
+	 * to WA the Issue
+	 */
+
+	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+	if (use_semaphore) {
+		for (j = 0; j < adev->usec_timeout; j++) {
+			/* a read return value of 1 means semaphore acuqire */
+			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+			if (tmp & 0x1)
+				break;
+			udelay(1);
+		}
+
+		if (j >= adev->usec_timeout)
+			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+	}
+
 	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
 
 	/*
@@ -506,21 +536,106 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 			break;
 		udelay(1);
 	}
+
+	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+	if (use_semaphore)
+		/*
+		 * add semaphore release after invalidation,
+		 * write with 0 means semaphore release
+		 */
+		WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
+
 	spin_unlock(&adev->gmc.invalidate_lock);
+
 	if (j < adev->usec_timeout)
 		return;
 
 	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 }
 
+/**
+ * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid, i;
+	signed long r;
+	uint32_t seq;
+	uint16_t queried_pasid;
+	bool ret;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	if (ring->sched.ready) {
+		spin_lock(&adev->gfx.kiq.ring_lock);
+		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
+		kiq->pmf->kiq_invalidate_tlbs(ring,
+					pasid, flush_type, all_hub);
+		amdgpu_fence_emit_polling(ring, &seq);
+		amdgpu_ring_commit(ring);
+		spin_unlock(&adev->gfx.kiq.ring_lock);
+		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+		if (r < 1) {
+			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+			return -ETIME;
+		}
+
+		return 0;
+	}
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+				&queried_pasid);
+		if (ret && queried_pasid == pasid) {
+			if (all_hub) {
+				for (i = 0; i < adev->num_vmhubs; i++)
+					gmc_v9_0_flush_gpu_tlb(adev, vmid,
+							i, 0);
+			} else {
+				gmc_v9_0_flush_gpu_tlb(adev, vmid,
+						AMDGPU_GFXHUB_0, 0);
+			}
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					    unsigned vmid, uint64_t pd_addr)
 {
+	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
 	unsigned eng = ring->vm_inv_eng;
 
+	/*
+	 * It may lose gpuvm invalidate acknowldege state across power-gating
+	 * off cycle, add semaphore acquire before invalidation and semaphore
+	 * release after invalidation to avoid entering power gated state
+	 * to WA the Issue
+	 */
+
+	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+	if (use_semaphore)
+		/* a read return value of 1 means semaphore acuqire */
+		amdgpu_ring_emit_reg_wait(ring,
+					  hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
+
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
 			      lower_32_bits(pd_addr));
 
@@ -531,6 +646,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					    hub->vm_inv_eng0_ack + eng,
 					    req, 1 << vmid);
 
+	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+	if (use_semaphore)
+		/*
+		 * add semaphore release after invalidation,
+		 * write with 0 means semaphore release
+		 */
+		amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
+
 	return pd_addr;
 }
 
@@ -584,44 +707,25 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
  * 0 valid
  */
 
-static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
-						uint32_t flags)
+static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
 
 {
-	uint64_t pte_flag = 0;
-
-	if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
-		pte_flag |= AMDGPU_PTE_EXECUTABLE;
-	if (flags & AMDGPU_VM_PAGE_READABLE)
-		pte_flag |= AMDGPU_PTE_READABLE;
-	if (flags & AMDGPU_VM_PAGE_WRITEABLE)
-		pte_flag |= AMDGPU_PTE_WRITEABLE;
-
-	switch (flags & AMDGPU_VM_MTYPE_MASK) {
+	switch (flags) {
 	case AMDGPU_VM_MTYPE_DEFAULT:
-		pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
-		break;
+		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
 	case AMDGPU_VM_MTYPE_NC:
-		pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
-		break;
+		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
 	case AMDGPU_VM_MTYPE_WC:
-		pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
-		break;
+		return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
+	case AMDGPU_VM_MTYPE_RW:
+		return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
 	case AMDGPU_VM_MTYPE_CC:
-		pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
-		break;
+		return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
 	case AMDGPU_VM_MTYPE_UC:
-		pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
-		break;
+		return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
 	default:
-		pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
-		break;
+		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
 	}
-
-	if (flags & AMDGPU_VM_PAGE_PRT)
-		pte_flag |= AMDGPU_PTE_PRT;
-
-	return pte_flag;
 }
 
 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
@@ -648,12 +752,35 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
 	}
 }
 
+static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
+				struct amdgpu_bo_va_mapping *mapping,
+				uint64_t *flags)
+{
+	*flags &= ~AMDGPU_PTE_EXECUTABLE;
+	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+
+	*flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
+	*flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
+
+	if (mapping->flags & AMDGPU_PTE_PRT) {
+		*flags |= AMDGPU_PTE_PRT;
+		*flags &= ~AMDGPU_PTE_VALID;
+	}
+
+	if (adev->asic_type == CHIP_ARCTURUS &&
+	    !(*flags & AMDGPU_PTE_SYSTEM) &&
+	    mapping->bo_va->is_xgmi)
+		*flags |= AMDGPU_PTE_SNOOPED;
+}
+
 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
-	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
-	.get_vm_pde = gmc_v9_0_get_vm_pde
+	.map_mtype = gmc_v9_0_map_mtype,
+	.get_vm_pde = gmc_v9_0_get_vm_pde,
+	.get_vm_pte = gmc_v9_0_get_vm_pte
 };
 
 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -664,11 +791,22 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
+	case CHIP_VEGA10:
+		adev->umc.funcs = &umc_v6_0_funcs;
+		break;
 	case CHIP_VEGA20:
 		adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
 		adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
 		adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
-		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET;
+		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+		adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
+		adev->umc.funcs = &umc_v6_1_funcs;
+		break;
+	case CHIP_ARCTURUS:
+		adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
+		adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
+		adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
+		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
 		adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
 		adev->umc.funcs = &umc_v6_1_funcs;
 		break;
@@ -681,7 +819,10 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_VEGA20:
-		adev->mmhub_funcs = &mmhub_v1_0_funcs;
+		adev->mmhub.funcs = &mmhub_v1_0_funcs;
+		break;
+	case CHIP_ARCTURUS:
+		adev->mmhub.funcs = &mmhub_v9_4_funcs;
 		break;
 	default:
 		break;
@@ -732,175 +873,15 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
 	}
 }
 
-static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *ring;
-	unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
-		{GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
-		GFXHUB_FREE_VM_INV_ENGS_BITMAP};
-	unsigned i;
-	unsigned vmhub, inv_eng;
-
-	for (i = 0; i < adev->num_rings; ++i) {
-		ring = adev->rings[i];
-		vmhub = ring->funcs->vmhub;
-
-		inv_eng = ffs(vm_inv_engs[vmhub]);
-		if (!inv_eng) {
-			dev_err(adev->dev, "no VM inv eng for ring %s\n",
-				ring->name);
-			return -EINVAL;
-		}
-
-		ring->vm_inv_eng = inv_eng - 1;
-		vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
-
-		dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
-			 ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
-	}
-
-	return 0;
-}
-
-static int gmc_v9_0_ecc_ras_block_late_init(void *handle,
-			struct ras_fs_if *fs_info, struct ras_common_if *ras_block)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct ras_common_if **ras_if = NULL;
-	struct ras_ih_if ih_info = {
-		.cb = gmc_v9_0_process_ras_data_cb,
-	};
-	int r;
-
-	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC)
-		ras_if = &adev->gmc.umc_ras_if;
-	else if (ras_block->block == AMDGPU_RAS_BLOCK__MMHUB)
-		ras_if = &adev->gmc.mmhub_ras_if;
-	else
-		BUG();
-
-	if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
-		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
-		return 0;
-	}
-
-	/* handle resume path. */
-	if (*ras_if) {
-		/* resend ras TA enable cmd during resume.
-		 * prepare to handle failure.
-		 */
-		ih_info.head = **ras_if;
-		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
-		if (r) {
-			if (r == -EAGAIN) {
-				/* request a gpu reset. will run again. */
-				amdgpu_ras_request_reset_on_boot(adev,
-						ras_block->block);
-				return 0;
-			}
-			/* fail to enable ras, cleanup all. */
-			goto irq;
-		}
-		/* enable successfully. continue. */
-		goto resume;
-	}
-
-	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
-	if (!*ras_if)
-		return -ENOMEM;
-
-	**ras_if = *ras_block;
-
-	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
-	if (r) {
-		if (r == -EAGAIN) {
-			amdgpu_ras_request_reset_on_boot(adev,
-					ras_block->block);
-			r = 0;
-		}
-		goto feature;
-	}
-
-	ih_info.head = **ras_if;
-	fs_info->head = **ras_if;
-
-	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) {
-		r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
-		if (r)
-			goto interrupt;
-	}
-
-	amdgpu_ras_debugfs_create(adev, fs_info);
-
-	r = amdgpu_ras_sysfs_create(adev, fs_info);
-	if (r)
-		goto sysfs;
-resume:
-	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) {
-		r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
-		if (r)
-			goto irq;
-	}
-
-	return 0;
-irq:
-	amdgpu_ras_sysfs_remove(adev, *ras_if);
-sysfs:
-	amdgpu_ras_debugfs_remove(adev, *ras_if);
-	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC)
-		amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
-interrupt:
-	amdgpu_ras_feature_enable(adev, *ras_if, 0);
-feature:
-	kfree(*ras_if);
-	*ras_if = NULL;
-	return r;
-}
-
-static int gmc_v9_0_ecc_late_init(void *handle)
-{
-	int r;
-
-	struct ras_fs_if umc_fs_info = {
-		.sysfs_name = "umc_err_count",
-		.debugfs_name = "umc_err_inject",
-	};
-	struct ras_common_if umc_ras_block = {
-		.block = AMDGPU_RAS_BLOCK__UMC,
-		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
-		.sub_block_index = 0,
-		.name = "umc",
-	};
-	struct ras_fs_if mmhub_fs_info = {
-		.sysfs_name = "mmhub_err_count",
-		.debugfs_name = "mmhub_err_inject",
-	};
-	struct ras_common_if mmhub_ras_block = {
-		.block = AMDGPU_RAS_BLOCK__MMHUB,
-		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
-		.sub_block_index = 0,
-		.name = "mmhub",
-	};
-
-	r = gmc_v9_0_ecc_ras_block_late_init(handle,
-			&umc_fs_info, &umc_ras_block);
-	if (r)
-		return r;
-
-	r = gmc_v9_0_ecc_ras_block_late_init(handle,
-			&mmhub_fs_info, &mmhub_ras_block);
-	return r;
-}
-
 static int gmc_v9_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	bool r;
+	int r;
 
 	if (!gmc_v9_0_keep_stolen_memory(adev))
 		amdgpu_bo_late_init(adev);
 
-	r = gmc_v9_0_allocate_vm_inv_eng(adev);
+	r = amdgpu_gmc_allocate_vm_inv_eng(adev);
 	if (r)
 		return r;
 	/* Check if ecc is available */
@@ -908,11 +889,12 @@ static int gmc_v9_0_late_init(void *handle)
 		switch (adev->asic_type) {
 		case CHIP_VEGA10:
 		case CHIP_VEGA20:
+		case CHIP_ARCTURUS:
 			r = amdgpu_atomfirmware_mem_ecc_supported(adev);
 			if (!r) {
 				DRM_INFO("ECC is not present.\n");
-				if (adev->df_funcs->enable_ecc_force_par_wr_rmw)
-					adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false);
+				if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
+					adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
 			} else {
 				DRM_INFO("ECC is active.\n");
 			}
@@ -929,7 +911,7 @@ static int gmc_v9_0_late_init(void *handle)
 		}
 	}
 
-	r = gmc_v9_0_ecc_late_init(handle);
+	r = amdgpu_gmc_ras_late_init(adev);
 	if (r)
 		return r;
 
@@ -970,33 +952,11 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
  */
 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 {
-	int chansize, numchan;
 	int r;
 
-	if (amdgpu_sriov_vf(adev)) {
-		/* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
-		 * and DF related registers is not readable, seems hardcord is the
-		 * only way to set the correct vram_width
-		 */
-		adev->gmc.vram_width = 2048;
-	} else if (amdgpu_emu_mode != 1) {
-		adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
-	}
-
-	if (!adev->gmc.vram_width) {
-		/* hbm memory channel size */
-		if (adev->flags & AMD_IS_APU)
-			chansize = 64;
-		else
-			chansize = 128;
-
-		numchan = adev->df_funcs->get_hbm_channel_number(adev);
-		adev->gmc.vram_width = numchan * chansize;
-	}
-
 	/* size in MB on si */
 	adev->gmc.mc_vram_size =
-		adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+		adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
 	adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
 
 	if (!(adev->flags & AMD_IS_APU)) {
@@ -1108,7 +1068,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
 
 static int gmc_v9_0_sw_init(void *handle)
 {
-	int r;
+	int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	gfxhub_v1_0_init(adev);
@@ -1119,7 +1079,32 @@ static int gmc_v9_0_sw_init(void *handle)
 
 	spin_lock_init(&adev->gmc.invalidate_lock);
 
-	adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
+	r = amdgpu_atomfirmware_get_vram_info(adev,
+		&vram_width, &vram_type, &vram_vendor);
+	if (amdgpu_sriov_vf(adev))
+		/* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
+		 * and DF related registers is not readable, seems hardcord is the
+		 * only way to set the correct vram_width
+		 */
+		adev->gmc.vram_width = 2048;
+	else if (amdgpu_emu_mode != 1)
+		adev->gmc.vram_width = vram_width;
+
+	if (!adev->gmc.vram_width) {
+		int chansize, numchan;
+
+		/* hbm memory channel size */
+		if (adev->flags & AMD_IS_APU)
+			chansize = 64;
+		else
+			chansize = 128;
+
+		numchan = adev->df.funcs->get_hbm_channel_number(adev);
+		adev->gmc.vram_width = numchan * chansize;
+	}
+
+	adev->gmc.vram_type = vram_type;
+	adev->gmc.vram_vendor = vram_vendor;
 	switch (adev->asic_type) {
 	case CHIP_RAVEN:
 		adev->num_vmhubs = 2;
@@ -1180,11 +1165,13 @@ static int gmc_v9_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	/* interrupt sent to DF. */
-	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
-			&adev->gmc.ecc_irq);
-	if (r)
-		return r;
+	if (!amdgpu_sriov_vf(adev)) {
+		/* interrupt sent to DF. */
+		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
+				      &adev->gmc.ecc_irq);
+		if (r)
+			return r;
+	}
 
 	/* Set the internal MC address mask
 	 * This is the max address of the GPU's
@@ -1240,33 +1227,7 @@ static int gmc_v9_0_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	void *stolen_vga_buf;
 
-	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
-			adev->gmc.umc_ras_if) {
-		struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
-		struct ras_ih_if ih_info = {
-			.head = *ras_if,
-		};
-
-		/* remove fs first */
-		amdgpu_ras_debugfs_remove(adev, ras_if);
-		amdgpu_ras_sysfs_remove(adev, ras_if);
-		/* remove the IH */
-		amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
-		amdgpu_ras_feature_enable(adev, ras_if, 0);
-		kfree(ras_if);
-	}
-
-	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
-			adev->gmc.mmhub_ras_if) {
-		struct ras_common_if *ras_if = adev->gmc.mmhub_ras_if;
-
-		/* remove fs and disable ras feature */
-		amdgpu_ras_debugfs_remove(adev, ras_if);
-		amdgpu_ras_sysfs_remove(adev, ras_if);
-		amdgpu_ras_feature_enable(adev, ras_if, 0);
-		kfree(ras_if);
-	}
-
+	amdgpu_gmc_ras_fini(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_vm_manager_fini(adev);
 
@@ -1316,13 +1277,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
  */
 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 {
-	int r, i;
-	bool value;
-	u32 tmp;
-
-	amdgpu_device_program_register_sequence(adev,
-						golden_settings_vega10_hdp,
-						ARRAY_SIZE(golden_settings_vega10_hdp));
+	int r;
 
 	if (adev->gart.bo == NULL) {
 		dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
@@ -1332,15 +1287,6 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 	if (r)
 		return r;
 
-	switch (adev->asic_type) {
-	case CHIP_RAVEN:
-		/* TODO for renoir */
-		mmhub_v1_0_update_power_gating(adev, true);
-		break;
-	default:
-		break;
-	}
-
 	r = gfxhub_v1_0_gart_enable(adev);
 	if (r)
 		return r;
@@ -1352,6 +1298,49 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 	if (r)
 		return r;
 
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(adev->gmc.gart_size >> 20),
+		 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+	adev->gart.ready = true;
+	return 0;
+}
+
+static int gmc_v9_0_hw_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	bool value;
+	int r, i;
+	u32 tmp;
+
+	/* The sequence of these two function calls matters.*/
+	gmc_v9_0_init_golden_registers(adev);
+
+	if (adev->mode_info.num_crtc) {
+		if (adev->asic_type != CHIP_ARCTURUS) {
+			/* Lockout access through VGA aperture*/
+			WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+
+			/* disable VGA render */
+			WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+		}
+	}
+
+	amdgpu_device_program_register_sequence(adev,
+						golden_settings_vega10_hdp,
+						ARRAY_SIZE(golden_settings_vega10_hdp));
+
+	switch (adev->asic_type) {
+	case CHIP_RAVEN:
+		/* TODO for renoir */
+		mmhub_v1_0_update_power_gating(adev, true);
+		break;
+	case CHIP_ARCTURUS:
+		WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
+		break;
+	default:
+		break;
+	}
+
 	WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
 
 	tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
@@ -1361,44 +1350,25 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
 
 	/* After HDP is initialized, flush HDP.*/
-	adev->nbio_funcs->hdp_flush(adev, NULL);
+	adev->nbio.funcs->hdp_flush(adev, NULL);
 
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
 		value = false;
 	else
 		value = true;
 
-	gfxhub_v1_0_set_fault_enable_default(adev, value);
-	if (adev->asic_type == CHIP_ARCTURUS)
-		mmhub_v9_4_set_fault_enable_default(adev, value);
-	else
-		mmhub_v1_0_set_fault_enable_default(adev, value);
-
+	if (!amdgpu_sriov_vf(adev)) {
+		gfxhub_v1_0_set_fault_enable_default(adev, value);
+		if (adev->asic_type == CHIP_ARCTURUS)
+			mmhub_v9_4_set_fault_enable_default(adev, value);
+		else
+			mmhub_v1_0_set_fault_enable_default(adev, value);
+	}
 	for (i = 0; i < adev->num_vmhubs; ++i)
 		gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
 
-	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
-		 (unsigned)(adev->gmc.gart_size >> 20),
-		 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
-	adev->gart.ready = true;
-	return 0;
-}
-
-static int gmc_v9_0_hw_init(void *handle)
-{
-	int r;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	/* The sequence of these two function calls matters.*/
-	gmc_v9_0_init_golden_registers(adev);
-
-	if (adev->mode_info.num_crtc) {
-		/* Lockout access through VGA aperture*/
-		WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
-
-		/* disable VGA render */
-		WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
-	}
+	if (adev->umc.funcs && adev->umc.funcs->init_registers)
+		adev->umc.funcs->init_registers(adev);
 
 	r = gmc_v9_0_gart_enable(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
index 971c0840358f..e0585e8c6c1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
@@ -24,24 +24,6 @@
 #ifndef __GMC_V9_0_H__
 #define __GMC_V9_0_H__
 
-	/*
-	 * The latest engine allocation on gfx9 is:
-	 * Engine 2, 3: firmware
-	 * Engine 0, 1, 4~16: amdgpu ring,
-	 *                    subject to change when ring number changes
-	 * Engine 17: Gart flushes
-	 */
-#define GFXHUB_FREE_VM_INV_ENGS_BITMAP		0x1FFF3
-#define MMHUB_FREE_VM_INV_ENGS_BITMAP		0x1FFF3
-
 extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
 extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;
-
-/* amdgpu_amdkfd*.c */
-void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
-				uint64_t value);
-void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
-				uint64_t value);
-void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid,
-				uint32_t vmid, uint64_t value);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
new file mode 100644
index 000000000000..0debfd9f428c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -0,0 +1,586 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "vcn_v1_0.h"
+
+#include "vcn/vcn_1_0_offset.h"
+#include "vcn/vcn_1_0_sh_mask.h"
+
+static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+
+static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
+{
+	struct amdgpu_device *adev = ring->adev;
+	ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+		ring->ring[(*ptr)++] = 0;
+		ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0);
+	} else {
+		ring->ring[(*ptr)++] = reg_offset;
+		ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
+	}
+	ring->ring[(*ptr)++] = val;
+}
+
+static void jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	uint32_t reg, reg_offset, val, mask, i;
+
+	// 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
+	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
+	reg_offset = (reg << 2);
+	val = lower_32_bits(ring->gpu_addr);
+	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+	// 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
+	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
+	reg_offset = (reg << 2);
+	val = upper_32_bits(ring->gpu_addr);
+	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+	// 3rd to 5th: issue MEM_READ commands
+	for (i = 0; i <= 2; i++) {
+		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
+		ring->ring[ptr++] = 0;
+	}
+
+	// 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability
+	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+	reg_offset = (reg << 2);
+	val = 0x13;
+	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+	// 7th: program mmUVD_JRBC_RB_REF_DATA
+	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA);
+	reg_offset = (reg << 2);
+	val = 0x1;
+	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+	// 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
+	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+	reg_offset = (reg << 2);
+	val = 0x1;
+	mask = 0x1;
+
+	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
+	ring->ring[ptr++] = 0x01400200;
+	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
+	ring->ring[ptr++] = val;
+	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+		ring->ring[ptr++] = 0;
+		ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3);
+	} else {
+		ring->ring[ptr++] = reg_offset;
+		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
+	}
+	ring->ring[ptr++] = mask;
+
+	//9th to 21st: insert no-op
+	for (i = 0; i <= 12; i++) {
+		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+		ring->ring[ptr++] = 0;
+	}
+
+	//22nd: reset mmUVD_JRBC_RB_RPTR
+	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+	reg_offset = (reg << 2);
+	val = 0;
+	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+	//23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
+	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+	reg_offset = (reg << 2);
+	val = 0x12;
+	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+/**
+ * jpeg_v1_0_decode_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x68e04);
+
+	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x80010000);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x68e04);
+
+	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x00010000);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+				     unsigned flags)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, seq);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, seq);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x8);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+	amdgpu_ring_write(ring, 0);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x01400200);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, seq);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+
+	amdgpu_ring_write(ring,
+		PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2));
+	amdgpu_ring_write(ring, 0xffffffff);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x3fbc);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x1);
+
+	/* emit trap */
+	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+	amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
+					struct amdgpu_job *job,
+					struct amdgpu_ib *ib,
+					uint32_t flags)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, ib->length_dw);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+	amdgpu_ring_write(ring,
+		PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+	amdgpu_ring_write(ring, 0);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x01400200);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x2);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+	amdgpu_ring_write(ring, 0x2);
+}
+
+static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring,
+					    uint32_t reg, uint32_t val,
+					    uint32_t mask)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t reg_offset = (reg << 2);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x01400200);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, val);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+		amdgpu_ring_write(ring, 0);
+		amdgpu_ring_write(ring,
+			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+	} else {
+		amdgpu_ring_write(ring, reg_offset);
+		amdgpu_ring_write(ring,
+			PACKETJ(0, 0, 0, PACKETJ_TYPE3));
+	}
+	amdgpu_ring_write(ring, mask);
+}
+
+static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring,
+		unsigned vmid, uint64_t pd_addr)
+{
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+	uint32_t data0, data1, mask;
+
+	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+	/* wait for register write */
+	data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
+	data1 = lower_32_bits(pd_addr);
+	mask = 0xffffffff;
+	jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring *ring,
+					uint32_t reg, uint32_t val)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t reg_offset = (reg << 2);
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+			((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+		amdgpu_ring_write(ring, 0);
+		amdgpu_ring_write(ring,
+			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+	} else {
+		amdgpu_ring_write(ring, reg_offset);
+		amdgpu_ring_write(ring,
+			PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+	}
+	amdgpu_ring_write(ring, val);
+}
+
+static void jpeg_v1_0_decode_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	int i;
+
+	WARN_ON(ring->wptr % 2 || count % 2);
+
+	for (i = 0; i < count / 2; i++) {
+		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+		amdgpu_ring_write(ring, 0);
+	}
+}
+
+static int jpeg_v1_0_set_interrupt_state(struct amdgpu_device *adev,
+					struct amdgpu_irq_src *source,
+					unsigned type,
+					enum amdgpu_interrupt_state state)
+{
+	return 0;
+}
+
+static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
+				      struct amdgpu_irq_src *source,
+				      struct amdgpu_iv_entry *entry)
+{
+	DRM_DEBUG("IH: JPEG decode TRAP\n");
+
+	switch (entry->src_id) {
+	case 126:
+		amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+		break;
+	default:
+		DRM_ERROR("Unhandled interrupt: %d %d\n",
+			  entry->src_id, entry->src_data[0]);
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * jpeg_v1_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+int jpeg_v1_0_early_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	adev->jpeg.num_jpeg_inst = 1;
+
+	jpeg_v1_0_set_dec_ring_funcs(adev);
+	jpeg_v1_0_set_irq_funcs(adev);
+
+	return 0;
+}
+
+/**
+ * jpeg_v1_0_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+int jpeg_v1_0_sw_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct amdgpu_ring *ring;
+	int r;
+
+	/* JPEG TRAP */
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->jpeg.inst->irq);
+	if (r)
+		return r;
+
+	ring = &adev->jpeg.inst->ring_dec;
+	sprintf(ring->name, "jpeg_dec");
+	r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0);
+	if (r)
+		return r;
+
+	adev->jpeg.internal.jpeg_pitch = adev->jpeg.inst->external.jpeg_pitch =
+		SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+
+	return 0;
+}
+
+/**
+ * jpeg_v1_0_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG free up sw allocation
+ */
+void jpeg_v1_0_sw_fini(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	amdgpu_ring_fini(&adev->jpeg.inst[0].ring_dec);
+}
+
+/**
+ * jpeg_v1_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+void jpeg_v1_0_start(struct amdgpu_device *adev, int mode)
+{
+	struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+
+	if (mode == 0) {
+		WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+		WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
+				UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+		WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr));
+		WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr));
+		WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
+		WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
+		WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+	}
+
+	/* initialize wptr */
+	ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+
+	/* copy patch commands to the jpeg ring */
+	jpeg_v1_0_decode_ring_set_patch_ring(ring,
+		(ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+}
+
+static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
+	.type = AMDGPU_RING_TYPE_VCN_JPEG,
+	.align_mask = 0xf,
+	.nop = PACKET0(0x81ff, 0),
+	.support_64bit_ptrs = false,
+	.no_user_fence = true,
+	.vmhub = AMDGPU_MMHUB_0,
+	.extra_dw = 64,
+	.get_rptr = jpeg_v1_0_decode_ring_get_rptr,
+	.get_wptr = jpeg_v1_0_decode_ring_get_wptr,
+	.set_wptr = jpeg_v1_0_decode_ring_set_wptr,
+	.emit_frame_size =
+		6 + 6 + /* hdp invalidate / flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+		8 + /* jpeg_v1_0_decode_ring_emit_vm_flush */
+		26 + 26 + /* jpeg_v1_0_decode_ring_emit_fence x2 vm fence */
+		6,
+	.emit_ib_size = 22, /* jpeg_v1_0_decode_ring_emit_ib */
+	.emit_ib = jpeg_v1_0_decode_ring_emit_ib,
+	.emit_fence = jpeg_v1_0_decode_ring_emit_fence,
+	.emit_vm_flush = jpeg_v1_0_decode_ring_emit_vm_flush,
+	.test_ring = amdgpu_jpeg_dec_ring_test_ring,
+	.test_ib = amdgpu_jpeg_dec_ring_test_ib,
+	.insert_nop = jpeg_v1_0_decode_ring_nop,
+	.insert_start = jpeg_v1_0_decode_ring_insert_start,
+	.insert_end = jpeg_v1_0_decode_ring_insert_end,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = vcn_v1_0_ring_begin_use,
+	.end_use = amdgpu_vcn_ring_end_use,
+	.emit_wreg = jpeg_v1_0_decode_ring_emit_wreg,
+	.emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+	adev->jpeg.inst->ring_dec.funcs = &jpeg_v1_0_decode_ring_vm_funcs;
+	DRM_INFO("JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = {
+	.set = jpeg_v1_0_set_interrupt_state,
+	.process = jpeg_v1_0_process_interrupt,
+};
+
+static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+	adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
new file mode 100644
index 000000000000..bbf33a6a3972
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V1_0_H__
+#define __JPEG_V1_0_H__
+
+int jpeg_v1_0_early_init(void *handle);
+int jpeg_v1_0_sw_init(void *handle);
+void jpeg_v1_0_sw_fini(void *handle);
+void jpeg_v1_0_start(struct amdgpu_device *adev, int mode);
+
+#endif /*__JPEG_V1_0_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
new file mode 100644
index 000000000000..a78292d84854
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -0,0 +1,827 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+#include "vcn/vcn_2_0_0_offset.h"
+#include "vcn/vcn_2_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 			0x1bfff
+#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET				0x4029
+#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET				0x402a
+#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET				0x402b
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET		0x40ea
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x40eb
+#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET				0x40cf
+#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET				0x40d1
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 		0x40e8
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x40e9
+#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET				0x4082
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET		0x40ec
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x40ed
+#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET			0x4085
+#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET				0x4084
+#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET				0x4089
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET				0x401f
+
+#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR				0x18000
+
+static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v2_0_set_powergating_state(void *handle,
+				enum amd_powergating_state state);
+
+/**
+ * jpeg_v2_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v2_0_early_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	adev->jpeg.num_jpeg_inst = 1;
+
+	jpeg_v2_0_set_dec_ring_funcs(adev);
+	jpeg_v2_0_set_irq_funcs(adev);
+
+	return 0;
+}
+
+/**
+ * jpeg_v2_0_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v2_0_sw_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct amdgpu_ring *ring;
+	int r;
+
+	/* JPEG TRAP */
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+		VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+	if (r)
+		return r;
+
+	r = amdgpu_jpeg_sw_init(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_jpeg_resume(adev);
+	if (r)
+		return r;
+
+	ring = &adev->jpeg.inst->ring_dec;
+	ring->use_doorbell = true;
+	ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+	sprintf(ring->name, "jpeg_dec");
+	r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0);
+	if (r)
+		return r;
+
+	adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+	adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+
+	return 0;
+}
+
+/**
+ * jpeg_v2_0_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v2_0_sw_fini(void *handle)
+{
+	int r;
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	r = amdgpu_jpeg_suspend(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_jpeg_sw_fini(adev);
+
+	return r;
+}
+
+/**
+ * jpeg_v2_0_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v2_0_hw_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+	int r;
+
+	adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+		(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+	r = amdgpu_ring_test_helper(ring);
+	if (!r)
+		DRM_INFO("JPEG decode initialized successfully.\n");
+
+	return r;
+}
+
+/**
+ * jpeg_v2_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v2_0_hw_fini(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+
+	if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+	      RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
+		jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+	ring->sched.ready = false;
+
+	return 0;
+}
+
+/**
+ * jpeg_v2_0_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v2_0_suspend(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int r;
+
+	r = jpeg_v2_0_hw_fini(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_jpeg_suspend(adev);
+
+	return r;
+}
+
+/**
+ * jpeg_v2_0_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v2_0_resume(void *handle)
+{
+	int r;
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	r = amdgpu_jpeg_resume(adev);
+	if (r)
+		return r;
+
+	r = jpeg_v2_0_hw_init(adev);
+
+	return r;
+}
+
+static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev)
+{
+	uint32_t data;
+	int r = 0;
+
+	if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+		data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+		WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+		SOC15_WAIT_ON_RREG(JPEG, 0,
+			mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
+			UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
+
+		if (r) {
+			DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
+			return r;
+		}
+	}
+
+	/* Removing the anti hang mechanism to indicate the UVDJ tile is ON */
+	data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1;
+	WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data);
+
+	return 0;
+}
+
+static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev)
+{
+	if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+		uint32_t data;
+		int r = 0;
+
+		data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS));
+		data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK;
+		data |=  0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF;
+		WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data);
+
+		data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+		WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+		SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS,
+			(2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
+			UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
+
+		if (r) {
+			DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
+			return r;
+		}
+	}
+
+	return 0;
+}
+
+static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device* adev)
+{
+	uint32_t data;
+
+	data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+	if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+		data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+	else
+		data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+	data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+	data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+	WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+
+	data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+	data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+		| JPEG_CGC_GATE__JPEG2_DEC_MASK
+		| JPEG_CGC_GATE__JPEG_ENC_MASK
+		| JPEG_CGC_GATE__JMCIF_MASK
+		| JPEG_CGC_GATE__JRBBM_MASK);
+	WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device* adev)
+{
+	uint32_t data;
+
+	data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+	if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+		data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+	else
+		data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+	data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+	data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+	WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+
+	data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+	data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+		|JPEG_CGC_GATE__JPEG2_DEC_MASK
+		|JPEG_CGC_GATE__JPEG_ENC_MASK
+		|JPEG_CGC_GATE__JMCIF_MASK
+		|JPEG_CGC_GATE__JRBBM_MASK);
+	WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+}
+
+/**
+ * jpeg_v2_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v2_0_start(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+	int r;
+
+	if (adev->pm.dpm_enabled)
+		amdgpu_dpm_enable_jpeg(adev, true);
+
+	/* disable power gating */
+	r = jpeg_v2_0_disable_power_gating(adev);
+	if (r)
+		return r;
+
+	/* JPEG disable CGC */
+	jpeg_v2_0_disable_clock_gating(adev);
+
+	WREG32_SOC15(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+
+	/* enable JMI channel */
+	WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), 0,
+		~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+	/* enable System Interrupt for JRBC */
+	WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmJPEG_SYS_INT_EN),
+		JPEG_SYS_INT_EN__DJRBC_MASK,
+		~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+	WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+	WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+	WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+		lower_32_bits(ring->gpu_addr));
+	WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+		upper_32_bits(ring->gpu_addr));
+	WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
+	WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
+	WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+	WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+	ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+
+	return 0;
+}
+
+/**
+ * jpeg_v2_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v2_0_stop(struct amdgpu_device *adev)
+{
+	int r;
+
+	/* reset JMI */
+	WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL),
+		UVD_JMI_CNTL__SOFT_RESET_MASK,
+		~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+	/* enable JPEG CGC */
+	jpeg_v2_0_enable_clock_gating(adev);
+
+	/* enable power gating */
+	r = jpeg_v2_0_enable_power_gating(adev);
+	if (r)
+		return r;
+
+	if (adev->pm.dpm_enabled)
+		amdgpu_dpm_enable_jpeg(adev, false);
+
+	return 0;
+}
+
+/**
+ * jpeg_v2_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v2_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->use_doorbell)
+		return adev->wb.wb[ring->wptr_offs];
+	else
+		return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->use_doorbell) {
+		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+	} else {
+		WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+	}
+}
+
+/**
+ * jpeg_v2_0_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x68e04);
+
+	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x80010000);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x68e04);
+
+	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x00010000);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+				unsigned flags)
+{
+	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+	amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, seq);
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, seq);
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x8);
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+		0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+	amdgpu_ring_write(ring, 0);
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x3fbc);
+
+	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x1);
+
+	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+	amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
+				struct amdgpu_job *job,
+				struct amdgpu_ib *ib,
+				uint32_t flags)
+{
+	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+	amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+	amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, ib->length_dw);
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+	amdgpu_ring_write(ring,	PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+	amdgpu_ring_write(ring, 0);
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x01400200);
+
+	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x2);
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET,
+		0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+	amdgpu_ring_write(ring, 0x2);
+}
+
+void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+				uint32_t val, uint32_t mask)
+{
+	uint32_t reg_offset = (reg << 2);
+
+	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, 0x01400200);
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	amdgpu_ring_write(ring, val);
+
+	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+		amdgpu_ring_write(ring, 0);
+		amdgpu_ring_write(ring,
+			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+	} else {
+		amdgpu_ring_write(ring, reg_offset);
+		amdgpu_ring_write(ring,	PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+			0, 0, PACKETJ_TYPE3));
+	}
+	amdgpu_ring_write(ring, mask);
+}
+
+void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+				unsigned vmid, uint64_t pd_addr)
+{
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+	uint32_t data0, data1, mask;
+
+	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+	/* wait for register write */
+	data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
+	data1 = lower_32_bits(pd_addr);
+	mask = 0xffffffff;
+	jpeg_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+	uint32_t reg_offset = (reg << 2);
+
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+		0, 0, PACKETJ_TYPE0));
+	if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+		amdgpu_ring_write(ring, 0);
+		amdgpu_ring_write(ring,
+			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+	} else {
+		amdgpu_ring_write(ring, reg_offset);
+		amdgpu_ring_write(ring,	PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+			0, 0, PACKETJ_TYPE0));
+	}
+	amdgpu_ring_write(ring, val);
+}
+
+void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	int i;
+
+	WARN_ON(ring->wptr % 2 || count % 2);
+
+	for (i = 0; i < count / 2; i++) {
+		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+		amdgpu_ring_write(ring, 0);
+	}
+}
+
+static bool jpeg_v2_0_is_idle(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	return ((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
+		UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+		UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v2_0_wait_for_idle(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int ret = 0;
+
+	SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+		UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret);
+
+	return ret;
+}
+
+static int jpeg_v2_0_set_clockgating_state(void *handle,
+					  enum amd_clockgating_state state)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+
+	if (enable) {
+		if (jpeg_v2_0_is_idle(handle))
+			return -EBUSY;
+		jpeg_v2_0_enable_clock_gating(adev);
+	} else {
+		jpeg_v2_0_disable_clock_gating(adev);
+	}
+
+	return 0;
+}
+
+static int jpeg_v2_0_set_powergating_state(void *handle,
+					enum amd_powergating_state state)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int ret;
+
+	if (state == adev->jpeg.cur_state)
+		return 0;
+
+	if (state == AMD_PG_STATE_GATE)
+		ret = jpeg_v2_0_stop(adev);
+	else
+		ret = jpeg_v2_0_start(adev);
+
+	if (!ret)
+		adev->jpeg.cur_state = state;
+
+	return ret;
+}
+
+static int jpeg_v2_0_set_interrupt_state(struct amdgpu_device *adev,
+					struct amdgpu_irq_src *source,
+					unsigned type,
+					enum amdgpu_interrupt_state state)
+{
+	return 0;
+}
+
+static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
+				      struct amdgpu_irq_src *source,
+				      struct amdgpu_iv_entry *entry)
+{
+	DRM_DEBUG("IH: JPEG TRAP\n");
+
+	switch (entry->src_id) {
+	case VCN_2_0__SRCID__JPEG_DECODE:
+		amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+		break;
+	default:
+		DRM_ERROR("Unhandled interrupt: %d %d\n",
+			  entry->src_id, entry->src_data[0]);
+		break;
+	}
+
+	return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
+	.name = "jpeg_v2_0",
+	.early_init = jpeg_v2_0_early_init,
+	.late_init = NULL,
+	.sw_init = jpeg_v2_0_sw_init,
+	.sw_fini = jpeg_v2_0_sw_fini,
+	.hw_init = jpeg_v2_0_hw_init,
+	.hw_fini = jpeg_v2_0_hw_fini,
+	.suspend = jpeg_v2_0_suspend,
+	.resume = jpeg_v2_0_resume,
+	.is_idle = jpeg_v2_0_is_idle,
+	.wait_for_idle = jpeg_v2_0_wait_for_idle,
+	.check_soft_reset = NULL,
+	.pre_soft_reset = NULL,
+	.soft_reset = NULL,
+	.post_soft_reset = NULL,
+	.set_clockgating_state = jpeg_v2_0_set_clockgating_state,
+	.set_powergating_state = jpeg_v2_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
+	.type = AMDGPU_RING_TYPE_VCN_JPEG,
+	.align_mask = 0xf,
+	.vmhub = AMDGPU_MMHUB_0,
+	.get_rptr = jpeg_v2_0_dec_ring_get_rptr,
+	.get_wptr = jpeg_v2_0_dec_ring_get_wptr,
+	.set_wptr = jpeg_v2_0_dec_ring_set_wptr,
+	.emit_frame_size =
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+		8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */
+		18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */
+		8 + 16,
+	.emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */
+	.emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+	.emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+	.emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+	.test_ring = amdgpu_jpeg_dec_ring_test_ring,
+	.test_ib = amdgpu_jpeg_dec_ring_test_ib,
+	.insert_nop = jpeg_v2_0_dec_ring_nop,
+	.insert_start = jpeg_v2_0_dec_ring_insert_start,
+	.insert_end = jpeg_v2_0_dec_ring_insert_end,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_jpeg_ring_begin_use,
+	.end_use = amdgpu_jpeg_ring_end_use,
+	.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+	.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+	adev->jpeg.inst->ring_dec.funcs = &jpeg_v2_0_dec_ring_vm_funcs;
+	DRM_INFO("JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v2_0_irq_funcs = {
+	.set = jpeg_v2_0_set_interrupt_state,
+	.process = jpeg_v2_0_process_interrupt,
+};
+
+static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+	adev->jpeg.inst->irq.num_types = 1;
+	adev->jpeg.inst->irq.funcs = &jpeg_v2_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v2_0_ip_block =
+{
+		.type = AMD_IP_BLOCK_TYPE_JPEG,
+		.major = 2,
+		.minor = 0,
+		.rev = 0,
+		.funcs = &jpeg_v2_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
new file mode 100644
index 000000000000..15a344ed340f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V2_0_H__
+#define __JPEG_V2_0_H__
+
+void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
+void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
+void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+				unsigned flags);
+void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+				struct amdgpu_ib *ib, uint32_t flags);
+void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+				uint32_t val, uint32_t mask);
+void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+				unsigned vmid, uint64_t pd_addr);
+void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
+
+extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block;
+
+#endif /* __JPEG_V2_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
new file mode 100644
index 000000000000..2c58939e6ad0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -0,0 +1,641 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+
+#include "vcn/vcn_2_5_offset.h"
+#include "vcn/vcn_2_5_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET			0x401f
+
+#define JPEG25_MAX_HW_INSTANCES_ARCTURUS			2
+
+static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v2_5_set_powergating_state(void *handle,
+				enum amd_powergating_state state);
+
+static int amdgpu_ih_clientid_jpeg[] = {
+	SOC15_IH_CLIENTID_VCN,
+	SOC15_IH_CLIENTID_VCN1
+};
+
+/**
+ * jpeg_v2_5_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v2_5_early_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	if (adev->asic_type == CHIP_ARCTURUS) {
+		u32 harvest;
+		int i;
+
+		adev->jpeg.num_jpeg_inst = JPEG25_MAX_HW_INSTANCES_ARCTURUS;
+		for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+			harvest = RREG32_SOC15(JPEG, i, mmCC_UVD_HARVESTING);
+			if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+				adev->jpeg.harvest_config |= 1 << i;
+		}
+
+		if (adev->jpeg.harvest_config == (AMDGPU_JPEG_HARVEST_JPEG0 |
+						 AMDGPU_JPEG_HARVEST_JPEG1))
+			return -ENOENT;
+	} else
+		adev->jpeg.num_jpeg_inst = 1;
+
+	jpeg_v2_5_set_dec_ring_funcs(adev);
+	jpeg_v2_5_set_irq_funcs(adev);
+
+	return 0;
+}
+
+/**
+ * jpeg_v2_5_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v2_5_sw_init(void *handle)
+{
+	struct amdgpu_ring *ring;
+	int i, r;
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		/* JPEG TRAP */
+		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+				VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_jpeg_sw_init(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_jpeg_resume(adev);
+	if (r)
+		return r;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		ring = &adev->jpeg.inst[i].ring_dec;
+		ring->use_doorbell = true;
+		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i;
+		sprintf(ring->name, "jpeg_dec_%d", i);
+		r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, 0);
+		if (r)
+			return r;
+
+		adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+		adev->jpeg.inst[i].external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH);
+	}
+
+	return 0;
+}
+
+/**
+ * jpeg_v2_5_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v2_5_sw_fini(void *handle)
+{
+	int r;
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	r = amdgpu_jpeg_suspend(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_jpeg_sw_fini(adev);
+
+	return r;
+}
+
+/**
+ * jpeg_v2_5_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v2_5_hw_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct amdgpu_ring *ring;
+	int i, r;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		ring = &adev->jpeg.inst[i].ring_dec;
+		adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+			(adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i);
+
+		r = amdgpu_ring_test_helper(ring);
+		if (r)
+			return r;
+	}
+
+	DRM_INFO("JPEG decode initialized successfully.\n");
+
+	return 0;
+}
+
+/**
+ * jpeg_v2_5_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v2_5_hw_fini(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct amdgpu_ring *ring;
+	int i;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		ring = &adev->jpeg.inst[i].ring_dec;
+		if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+		      RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
+			jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+		ring->sched.ready = false;
+	}
+
+	return 0;
+}
+
+/**
+ * jpeg_v2_5_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v2_5_suspend(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int r;
+
+	r = jpeg_v2_5_hw_fini(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_jpeg_suspend(adev);
+
+	return r;
+}
+
+/**
+ * jpeg_v2_5_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v2_5_resume(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int r;
+
+	r = amdgpu_jpeg_resume(adev);
+	if (r)
+		return r;
+
+	r = jpeg_v2_5_hw_init(adev);
+
+	return r;
+}
+
+static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device* adev, int inst)
+{
+	uint32_t data;
+
+	data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL);
+	if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+		data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+	else
+		data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+	data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+	data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+	WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data);
+
+	data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE);
+	data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+		| JPEG_CGC_GATE__JPEG2_DEC_MASK
+		| JPEG_CGC_GATE__JPEG_ENC_MASK
+		| JPEG_CGC_GATE__JMCIF_MASK
+		| JPEG_CGC_GATE__JRBBM_MASK);
+	WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
+
+	data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL);
+	data &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
+		| JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
+		| JPEG_CGC_CTRL__JMCIF_MODE_MASK
+		| JPEG_CGC_CTRL__JRBBM_MODE_MASK);
+	WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data);
+}
+
+static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device* adev, int inst)
+{
+	uint32_t data;
+
+	data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE);
+	data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+		|JPEG_CGC_GATE__JPEG2_DEC_MASK
+		|JPEG_CGC_GATE__JPEG_ENC_MASK
+		|JPEG_CGC_GATE__JMCIF_MASK
+		|JPEG_CGC_GATE__JRBBM_MASK);
+	WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
+}
+
+/**
+ * jpeg_v2_5_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v2_5_start(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	int i;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		ring = &adev->jpeg.inst[i].ring_dec;
+		/* disable anti hang mechanism */
+		WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
+			~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+		/* JPEG disable CGC */
+		jpeg_v2_5_disable_clock_gating(adev, i);
+
+		/* MJPEG global tiling registers */
+		WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
+			adev->gfx.config.gb_addr_config);
+		WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
+			adev->gfx.config.gb_addr_config);
+
+		/* enable JMI channel */
+		WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
+			~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+		/* enable System Interrupt for JRBC */
+		WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
+			JPEG_SYS_INT_EN__DJRBC_MASK,
+			~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+		WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
+		WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+		WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+			lower_32_bits(ring->gpu_addr));
+		WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+			upper_32_bits(ring->gpu_addr));
+		WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
+		WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
+		WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+		WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+		ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
+	}
+
+	return 0;
+}
+
+/**
+ * jpeg_v2_5_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v2_5_stop(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		/* reset JMI */
+		WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
+			UVD_JMI_CNTL__SOFT_RESET_MASK,
+			~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+		jpeg_v2_5_enable_clock_gating(adev, i);
+
+		/* enable anti hang mechanism */
+		WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
+			UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+			~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+	}
+
+	return 0;
+}
+
+/**
+ * jpeg_v2_5_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v2_5_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->use_doorbell)
+		return adev->wb.wb[ring->wptr_offs];
+	else
+		return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v2_5_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->use_doorbell) {
+		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+	} else {
+		WREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+	}
+}
+
+static bool jpeg_v2_5_is_idle(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int i, ret = 1;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		ret &= (((RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS) &
+			UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+			UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+	}
+
+	return ret;
+}
+
+static int jpeg_v2_5_wait_for_idle(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int i, ret = 0;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS,
+			UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+			UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int jpeg_v2_5_set_clockgating_state(void *handle,
+					  enum amd_clockgating_state state)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+	int i;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		if (enable) {
+			if (jpeg_v2_5_is_idle(handle))
+				return -EBUSY;
+			jpeg_v2_5_enable_clock_gating(adev, i);
+		} else {
+			jpeg_v2_5_disable_clock_gating(adev, i);
+		}
+	}
+
+	return 0;
+}
+
+static int jpeg_v2_5_set_powergating_state(void *handle,
+					  enum amd_powergating_state state)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int ret;
+
+	if(state == adev->jpeg.cur_state)
+		return 0;
+
+	if (state == AMD_PG_STATE_GATE)
+		ret = jpeg_v2_5_stop(adev);
+	else
+		ret = jpeg_v2_5_start(adev);
+
+	if(!ret)
+		adev->jpeg.cur_state = state;
+
+	return ret;
+}
+
+static int jpeg_v2_5_set_interrupt_state(struct amdgpu_device *adev,
+					struct amdgpu_irq_src *source,
+					unsigned type,
+					enum amdgpu_interrupt_state state)
+{
+	return 0;
+}
+
+static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
+				      struct amdgpu_irq_src *source,
+				      struct amdgpu_iv_entry *entry)
+{
+	uint32_t ip_instance;
+
+	switch (entry->client_id) {
+	case SOC15_IH_CLIENTID_VCN:
+		ip_instance = 0;
+		break;
+	case SOC15_IH_CLIENTID_VCN1:
+		ip_instance = 1;
+		break;
+	default:
+		DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+		return 0;
+	}
+
+	DRM_DEBUG("IH: JPEG TRAP\n");
+
+	switch (entry->src_id) {
+	case VCN_2_0__SRCID__JPEG_DECODE:
+		amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec);
+		break;
+	default:
+		DRM_ERROR("Unhandled interrupt: %d %d\n",
+			  entry->src_id, entry->src_data[0]);
+		break;
+	}
+
+	return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
+	.name = "jpeg_v2_5",
+	.early_init = jpeg_v2_5_early_init,
+	.late_init = NULL,
+	.sw_init = jpeg_v2_5_sw_init,
+	.sw_fini = jpeg_v2_5_sw_fini,
+	.hw_init = jpeg_v2_5_hw_init,
+	.hw_fini = jpeg_v2_5_hw_fini,
+	.suspend = jpeg_v2_5_suspend,
+	.resume = jpeg_v2_5_resume,
+	.is_idle = jpeg_v2_5_is_idle,
+	.wait_for_idle = jpeg_v2_5_wait_for_idle,
+	.check_soft_reset = NULL,
+	.pre_soft_reset = NULL,
+	.soft_reset = NULL,
+	.post_soft_reset = NULL,
+	.set_clockgating_state = jpeg_v2_5_set_clockgating_state,
+	.set_powergating_state = jpeg_v2_5_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
+	.type = AMDGPU_RING_TYPE_VCN_JPEG,
+	.align_mask = 0xf,
+	.vmhub = AMDGPU_MMHUB_1,
+	.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
+	.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
+	.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+	.emit_frame_size =
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+		8 + /* jpeg_v2_5_dec_ring_emit_vm_flush */
+		18 + 18 + /* jpeg_v2_5_dec_ring_emit_fence x2 vm fence */
+		8 + 16,
+	.emit_ib_size = 22, /* jpeg_v2_5_dec_ring_emit_ib */
+	.emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+	.emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+	.emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+	.test_ring = amdgpu_jpeg_dec_ring_test_ring,
+	.test_ib = amdgpu_jpeg_dec_ring_test_ib,
+	.insert_nop = jpeg_v2_0_dec_ring_nop,
+	.insert_start = jpeg_v2_0_dec_ring_insert_start,
+	.insert_end = jpeg_v2_0_dec_ring_insert_end,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_jpeg_ring_begin_use,
+	.end_use = amdgpu_jpeg_ring_end_use,
+	.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+	.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_5_dec_ring_vm_funcs;
+		adev->jpeg.inst[i].ring_dec.me = i;
+		DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i);
+	}
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v2_5_irq_funcs = {
+	.set = jpeg_v2_5_set_interrupt_state,
+	.process = jpeg_v2_5_process_interrupt,
+};
+
+static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		adev->jpeg.inst[i].irq.num_types = 1;
+		adev->jpeg.inst[i].irq.funcs = &jpeg_v2_5_irq_funcs;
+	}
+}
+
+const struct amdgpu_ip_block_version jpeg_v2_5_ip_block =
+{
+		.type = AMD_IP_BLOCK_TYPE_JPEG,
+		.major = 2,
+		.minor = 5,
+		.rev = 0,
+		.funcs = &jpeg_v2_5_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
new file mode 100644
index 000000000000..2b4087c02620
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V2_5_H__
+#define __JPEG_V2_5_H__
+
+extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block;
+
+#endif /* __JPEG_V2_5_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 04cd4b6f95d4..adfd8a6171eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -27,17 +27,13 @@
 #include "mmhub/mmhub_1_0_offset.h"
 #include "mmhub/mmhub_1_0_sh_mask.h"
 #include "mmhub/mmhub_1_0_default.h"
-#include "mmhub/mmhub_9_4_0_offset.h"
 #include "vega10_enum.h"
-
+#include "soc15.h"
 #include "soc15_common.h"
 
 #define mmDAGB0_CNTL_MISC2_RV 0x008f
 #define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0
 
-#define EA_EDC_CNT_MASK 0x3
-#define EA_EDC_CNT_SHIFT 0x2
-
 u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
 {
 	u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE);
@@ -206,6 +202,8 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
 	tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+			    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
 	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp);
 }
 
@@ -418,6 +416,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev)
 	hub->ctx0_ptb_addr_hi32 =
 		SOC15_REG_OFFSET(MMHUB, 0,
 				 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+	hub->vm_inv_eng0_sem =
+		SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM);
 	hub->vm_inv_eng0_req =
 		SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ);
 	hub->vm_inv_eng0_ack =
@@ -560,61 +560,194 @@ void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
 		*flags |= AMD_CG_SUPPORT_MC_LS;
 }
 
+static const struct soc15_ras_field_entry mmhub_v1_0_ras_fields[] = {
+	{ "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT),
+	},
+	{ "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT),
+	},
+	{ "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT),
+	},
+	{ "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT),
+	},
+	{ "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT),
+	},
+	{ "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT),
+	},
+	{ "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT),
+	},
+	{ "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT),
+	},
+	{ "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT),
+	0, 0,
+	}
+};
+
+static const struct soc15_reg_entry mmhub_v1_0_edc_cnt_regs[] = {
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), 0, 0, 0},
+};
+
+static int mmhub_v1_0_get_ras_error_count(const struct soc15_reg_entry *reg,
+	uint32_t value, uint32_t *sec_count, uint32_t *ded_count)
+{
+	uint32_t i;
+	uint32_t sec_cnt, ded_cnt;
+
+	for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_ras_fields); i++) {
+		if(mmhub_v1_0_ras_fields[i].reg_offset != reg->reg_offset)
+			continue;
+
+		sec_cnt = (value &
+				mmhub_v1_0_ras_fields[i].sec_count_mask) >>
+				mmhub_v1_0_ras_fields[i].sec_count_shift;
+		if (sec_cnt) {
+			DRM_INFO("MMHUB SubBlock %s, SEC %d\n",
+				mmhub_v1_0_ras_fields[i].name,
+				sec_cnt);
+			*sec_count += sec_cnt;
+		}
+
+		ded_cnt = (value &
+				mmhub_v1_0_ras_fields[i].ded_count_mask) >>
+				mmhub_v1_0_ras_fields[i].ded_count_shift;
+		if (ded_cnt) {
+			DRM_INFO("MMHUB SubBlock %s, DED %d\n",
+				mmhub_v1_0_ras_fields[i].name,
+				ded_cnt);
+			*ded_count += ded_cnt;
+		}
+	}
+
+	return 0;
+}
+
 static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev,
 					   void *ras_error_status)
 {
-	int i;
-	uint32_t ea0_edc_cnt, ea0_edc_cnt2;
-	uint32_t ea1_edc_cnt, ea1_edc_cnt2;
 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-
-	/* EDC CNT will be cleared automatically after read */
-	ea0_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT_VG20);
-	ea0_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20);
-	ea1_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT_VG20);
-	ea1_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20);
-
-	/* error count of each error type is recorded by 2 bits,
-	 * ce and ue count in EDC_CNT
-	 */
-	for (i = 0; i < 5; i++) {
-		err_data->ce_count += (ea0_edc_cnt & EA_EDC_CNT_MASK);
-		err_data->ce_count += (ea1_edc_cnt & EA_EDC_CNT_MASK);
-		ea0_edc_cnt >>= EA_EDC_CNT_SHIFT;
-		ea1_edc_cnt >>= EA_EDC_CNT_SHIFT;
-		err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK);
-		err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK);
-		ea0_edc_cnt >>= EA_EDC_CNT_SHIFT;
-		ea1_edc_cnt >>= EA_EDC_CNT_SHIFT;
-	}
-	/* successive ue count in EDC_CNT */
-	for (i = 0; i < 5; i++) {
-		err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK);
-		err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK);
-		ea0_edc_cnt >>= EA_EDC_CNT_SHIFT;
-		ea1_edc_cnt >>= EA_EDC_CNT_SHIFT;
+	uint32_t sec_count = 0, ded_count = 0;
+	uint32_t i;
+	uint32_t reg_value;
+
+	err_data->ue_count = 0;
+	err_data->ce_count = 0;
+
+	for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_edc_cnt_regs); i++) {
+		reg_value =
+			RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_0_edc_cnt_regs[i]));
+		if (reg_value)
+			mmhub_v1_0_get_ras_error_count(&mmhub_v1_0_edc_cnt_regs[i],
+				reg_value, &sec_count, &ded_count);
 	}
 
-	/* ce and ue count in EDC_CNT2 */
-	for (i = 0; i < 3; i++) {
-		err_data->ce_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK);
-		err_data->ce_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK);
-		ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
-		ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
-		err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK);
-		err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK);
-		ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
-		ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
-	}
-	/* successive ue count in EDC_CNT2 */
-	for (i = 0; i < 6; i++) {
-		err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK);
-		err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK);
-		ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
-		ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
-	}
+	err_data->ce_count += sec_count;
+	err_data->ue_count += ded_count;
 }
 
 const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
+	.ras_late_init = amdgpu_mmhub_ras_late_init,
 	.query_ras_error_count = mmhub_v1_0_query_ras_error_count,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index b39bea6f54e9..a7cb185d639a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -31,20 +31,25 @@
 
 #include "soc15_common.h"
 
-static void mmhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev)
+void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+				uint64_t page_table_base)
 {
-	uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
+	/* two registers distance between mmMMVM_CONTEXT0_* to mmMMVM_CONTEXT1_* */
+	int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
+			- mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
 
-	WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
-		     lower_32_bits(value));
+	WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+			offset * vmid, lower_32_bits(page_table_base));
 
-	WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
-		     upper_32_bits(value));
+	WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+			offset * vmid, upper_32_bits(page_table_base));
 }
 
 static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
 {
-	mmhub_v2_0_init_gart_pt_regs(adev);
+	uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+	mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
 
 	WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
 		     (u32)(adev->gmc.gart_start >> 12));
@@ -161,6 +166,8 @@ static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
 	tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL);
 	tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+	tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+			    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
 	WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp);
 }
 
@@ -341,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev)
 	hub->ctx0_ptb_addr_hi32 =
 		SOC15_REG_OFFSET(MMHUB, 0,
 				 mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+	hub->vm_inv_eng0_sem =
+		SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM);
 	hub->vm_inv_eng0_req =
 		SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ);
 	hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
index db16f3ece218..3ea4344f0315 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
@@ -31,5 +31,7 @@ void mmhub_v2_0_init(struct amdgpu_device *adev);
 int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
 			       enum amd_clockgating_state state);
 void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+				uint64_t page_table_base);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 9ed178fa241c..5c42387c9274 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -21,6 +21,7 @@
  *
  */
 #include "amdgpu.h"
+#include "amdgpu_ras.h"
 #include "mmhub_v9_4.h"
 
 #include "mmhub/mmhub_9_4_1_offset.h"
@@ -29,7 +30,7 @@
 #include "athub/athub_1_0_offset.h"
 #include "athub/athub_1_0_sh_mask.h"
 #include "vega10_enum.h"
-
+#include "soc15.h"
 #include "soc15_common.h"
 
 #define MMHUB_NUM_INSTANCES			2
@@ -53,7 +54,7 @@ u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev)
 	return base;
 }
 
-void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid,
+static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid,
 				uint32_t vmid, uint64_t value)
 {
 	/* two registers distance between mmVML2VC0_VM_CONTEXT0_* to
@@ -79,7 +80,7 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev,
 {
 	uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
 
-	mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base);
+	mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base);
 
 	WREG32_SOC15_OFFSET(MMHUB, 0,
 			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
@@ -100,6 +101,16 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev,
 			    (u32)(adev->gmc.gart_end >> 44));
 }
 
+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+				uint64_t page_table_base)
+{
+	int i;
+
+	for (i = 0; i < MMHUB_NUM_INSTANCES; i++)
+		mmhub_v9_4_setup_hubid_vm_pt_regs(adev, i, vmid,
+				page_table_base);
+}
+
 static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
 					         int hubid)
 {
@@ -117,45 +128,53 @@ static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
 			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
 			    adev->gmc.agp_start >> 24);
 
-	/* Program the system aperture low logical page number. */
-	WREG32_SOC15_OFFSET(MMHUB, 0,
-			    mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR,
-			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
-			    min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
-	WREG32_SOC15_OFFSET(MMHUB, 0,
-			    mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
-			    max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+	if (!amdgpu_sriov_vf(adev)) {
+		/* Program the system aperture low logical page number. */
+		WREG32_SOC15_OFFSET(
+			MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+			hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+			min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+		WREG32_SOC15_OFFSET(
+			MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+			hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+			max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
 
-	/* Set default page address. */
-	value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
-		adev->vm_manager.vram_base_offset;
-	WREG32_SOC15_OFFSET(MMHUB, 0,
+		/* Set default page address. */
+		value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
+			adev->vm_manager.vram_base_offset;
+		WREG32_SOC15_OFFSET(
+			MMHUB, 0,
 			mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
 			hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
 			(u32)(value >> 12));
-	WREG32_SOC15_OFFSET(MMHUB, 0,
+		WREG32_SOC15_OFFSET(
+			MMHUB, 0,
 			mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
 			hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
 			(u32)(value >> 44));
 
-	/* Program "protection fault". */
-	WREG32_SOC15_OFFSET(MMHUB, 0,
-			    mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
-			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
-			    (u32)(adev->dummy_page_addr >> 12));
-	WREG32_SOC15_OFFSET(MMHUB, 0,
-			    mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
-			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
-			    (u32)((u64)adev->dummy_page_addr >> 44));
+		/* Program "protection fault". */
+		WREG32_SOC15_OFFSET(
+			MMHUB, 0,
+			mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+			hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+			(u32)(adev->dummy_page_addr >> 12));
+		WREG32_SOC15_OFFSET(
+			MMHUB, 0,
+			mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+			hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+			(u32)((u64)adev->dummy_page_addr >> 44));
 
-	tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
-				  mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
-				  hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
-	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
-			    ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
-	WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
-			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+		tmp = RREG32_SOC15_OFFSET(
+			MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+			hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+				    ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+		WREG32_SOC15_OFFSET(MMHUB, 0,
+				    mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+				    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+				    tmp);
+	}
 }
 
 static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid)
@@ -249,6 +268,8 @@ static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev,
 				  hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
 	tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+	tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL,
+			    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
 	WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL,
 			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
 }
@@ -311,7 +332,8 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
 				    adev->vm_manager.block_size - 9);
 		/* Send no-retry XNACK on fault to suppress VM fault storm. */
 		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
-				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+				    !amdgpu_noretry);
 		WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
 				    hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i,
 				    tmp);
@@ -354,30 +376,16 @@ int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
 	int i;
 
 	for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
-		if (amdgpu_sriov_vf(adev)) {
-			/*
-			 * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase
-			 * they are VF copy registers so vbios post doesn't
-			 * program them, for SRIOV driver need to program them
-			 */
-			WREG32_SOC15_OFFSET(MMHUB, 0,
-				     mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE,
-				     i * MMHUB_INSTANCE_REGISTER_OFFSET,
-				     adev->gmc.vram_start >> 24);
-			WREG32_SOC15_OFFSET(MMHUB, 0,
-				     mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP,
-				     i * MMHUB_INSTANCE_REGISTER_OFFSET,
-				     adev->gmc.vram_end >> 24);
-		}
-
 		/* GART Enable. */
 		mmhub_v9_4_init_gart_aperture_regs(adev, i);
 		mmhub_v9_4_init_system_aperture_regs(adev, i);
 		mmhub_v9_4_init_tlb_regs(adev, i);
-		mmhub_v9_4_init_cache_regs(adev, i);
+		if (!amdgpu_sriov_vf(adev))
+			mmhub_v9_4_init_cache_regs(adev, i);
 
 		mmhub_v9_4_enable_system_domain(adev, i);
-		mmhub_v9_4_disable_identity_aperture(adev, i);
+		if (!amdgpu_sriov_vf(adev))
+			mmhub_v9_4_disable_identity_aperture(adev, i);
 		mmhub_v9_4_setup_vmid_config(adev, i);
 		mmhub_v9_4_program_invalidation(adev, i);
 	}
@@ -502,6 +510,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev)
 			SOC15_REG_OFFSET(MMHUB, 0,
 			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) +
 			    i * MMHUB_INSTANCE_REGISTER_OFFSET;
+		hub[i]->vm_inv_eng0_sem =
+			SOC15_REG_OFFSET(MMHUB, 0,
+					 mmVML2VC0_VM_INVALIDATE_ENG0_SEM) +
+					 i * MMHUB_INSTANCE_REGISTER_OFFSET;
 		hub[i]->vm_inv_eng0_req =
 			SOC15_REG_OFFSET(MMHUB, 0,
 					 mmVML2VC0_VM_INVALIDATE_ENG0_REQ) +
@@ -649,3 +661,253 @@ void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags)
 	if (data & ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
 		*flags |= AMD_CG_SUPPORT_MC_LS;
 }
+
+static const struct soc15_ras_field_entry mmhub_v9_4_ras_fields[] = {
+	{ "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+	},
+	{ "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+	},
+	{ "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+	},
+	{ "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+	},
+	{ "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+	SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+	},
+	{ "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+	},
+	{ "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA0_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+	},
+	{ "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+	},
+	{ "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+	},
+	{ "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+	},
+	{ "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+	},
+	{ "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+	},
+	{ "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+	},
+	{ "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+	SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+	0, 0,
+	},
+	{ "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+	},
+	{ "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+	},
+	{ "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA1_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+	},
+	{ "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+	},
+	{ "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+	},
+	{ "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+	0, 0,
+	SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+	}
+};
+
+static const struct soc15_reg_entry mmhub_v9_4_edc_cnt_regs[] = {
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), 0, 0, 0},
+};
+
+static int mmhub_v9_4_get_ras_error_count(const struct soc15_reg_entry *reg,
+	uint32_t value, uint32_t *sec_count, uint32_t *ded_count)
+{
+	uint32_t i;
+	uint32_t sec_cnt, ded_cnt;
+
+	for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_ras_fields); i++) {
+		if(mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset)
+			continue;
+
+		sec_cnt = (value &
+				mmhub_v9_4_ras_fields[i].sec_count_mask) >>
+				mmhub_v9_4_ras_fields[i].sec_count_shift;
+		if (sec_cnt) {
+			DRM_INFO("MMHUB SubBlock %s, SEC %d\n",
+				mmhub_v9_4_ras_fields[i].name,
+				sec_cnt);
+			*sec_count += sec_cnt;
+		}
+
+		ded_cnt = (value &
+				mmhub_v9_4_ras_fields[i].ded_count_mask) >>
+				mmhub_v9_4_ras_fields[i].ded_count_shift;
+		if (ded_cnt) {
+			DRM_INFO("MMHUB SubBlock %s, DED %d\n",
+				mmhub_v9_4_ras_fields[i].name,
+				ded_cnt);
+			*ded_count += ded_cnt;
+		}
+	}
+
+	return 0;
+}
+
+static void mmhub_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+					   void *ras_error_status)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+	uint32_t sec_count = 0, ded_count = 0;
+	uint32_t i;
+	uint32_t reg_value;
+
+	err_data->ue_count = 0;
+	err_data->ce_count = 0;
+
+	for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_edc_cnt_regs); i++) {
+		reg_value =
+			RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i]));
+		if (reg_value)
+			mmhub_v9_4_get_ras_error_count(&mmhub_v9_4_edc_cnt_regs[i],
+				reg_value, &sec_count, &ded_count);
+	}
+
+	err_data->ce_count += sec_count;
+	err_data->ue_count += ded_count;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
+	.ras_late_init = amdgpu_mmhub_ras_late_init,
+	.query_ras_error_count = mmhub_v9_4_query_ras_error_count,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
index d435cfcec1a8..1b979773776c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
@@ -23,6 +23,8 @@
 #ifndef __MMHUB_V9_4_H__
 #define __MMHUB_V9_4_H__
 
+extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs;
+
 u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev);
 int mmhub_v9_4_gart_enable(struct amdgpu_device *adev);
 void mmhub_v9_4_gart_disable(struct amdgpu_device *adev);
@@ -32,5 +34,7 @@ void mmhub_v9_4_init(struct amdgpu_device *adev);
 int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
 			       enum amd_clockgating_state state);
 void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+				uint64_t page_table_base);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
index 8af0bddf85e4..20958639b601 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
@@ -47,6 +47,18 @@ struct mmsch_v1_0_init_header {
 	uint32_t uvd_table_size;
 };
 
+struct mmsch_vf_eng_init_header {
+	uint32_t init_status;
+	uint32_t table_offset;
+	uint32_t table_size;
+};
+
+struct mmsch_v1_1_init_header {
+	uint32_t version;
+	uint32_t total_size;
+	struct mmsch_vf_eng_init_header eng[2];
+};
+
 struct mmsch_v1_0_cmd_direct_reg_header {
 	uint32_t reg_offset   : 28;
 	uint32_t command_type : 4;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index cc5bf595f9b1..5fd67e1cc2a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -158,82 +158,6 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
 	xgpu_ai_mailbox_set_valid(adev, false);
 }
 
-static int xgpu_ai_get_pp_clk(struct amdgpu_device *adev, u32 type, char *buf)
-{
-        int r = 0;
-        u32 req, val, size;
-
-        if (!amdgim_is_hwperf(adev) || buf == NULL)
-                return -EBADRQC;
-
-        switch(type) {
-        case PP_SCLK:
-                req = IDH_IRQ_GET_PP_SCLK;
-                break;
-        case PP_MCLK:
-                req = IDH_IRQ_GET_PP_MCLK;
-                break;
-        default:
-                return -EBADRQC;
-        }
-
-        mutex_lock(&adev->virt.dpm_mutex);
-
-        xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0);
-
-        r = xgpu_ai_poll_msg(adev, IDH_SUCCESS);
-        if (!r && adev->fw_vram_usage.va != NULL) {
-                val = RREG32_NO_KIQ(
-                        SOC15_REG_OFFSET(NBIO, 0,
-                                         mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW1));
-                size = strnlen((((char *)adev->virt.fw_reserve.p_pf2vf) +
-                                val), PAGE_SIZE);
-
-                if (size < PAGE_SIZE)
-                        strcpy(buf,((char *)adev->virt.fw_reserve.p_pf2vf + val));
-                else
-                        size = 0;
-
-                r = size;
-                goto out;
-        }
-
-        r = xgpu_ai_poll_msg(adev, IDH_FAIL);
-        if(r)
-                pr_info("%s DPM request failed",
-                        (type == PP_SCLK)? "SCLK" : "MCLK");
-
-out:
-        mutex_unlock(&adev->virt.dpm_mutex);
-        return r;
-}
-
-static int xgpu_ai_force_dpm_level(struct amdgpu_device *adev, u32 level)
-{
-        int r = 0;
-        u32 req = IDH_IRQ_FORCE_DPM_LEVEL;
-
-        if (!amdgim_is_hwperf(adev))
-                return -EBADRQC;
-
-        mutex_lock(&adev->virt.dpm_mutex);
-        xgpu_ai_mailbox_trans_msg(adev, req, level, 0, 0);
-
-        r = xgpu_ai_poll_msg(adev, IDH_SUCCESS);
-        if (!r)
-                goto out;
-
-        r = xgpu_ai_poll_msg(adev, IDH_FAIL);
-        if (!r)
-                pr_info("DPM request failed");
-        else
-                pr_info("Mailbox is broken");
-
-out:
-        mutex_unlock(&adev->virt.dpm_mutex);
-        return r;
-}
-
 static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 					enum idh_request req)
 {
@@ -326,7 +250,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	 */
 	locked = mutex_trylock(&adev->lock_reset);
 	if (locked)
-		adev->in_gpu_reset = 1;
+		adev->in_gpu_reset = true;
 
 	do {
 		if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
@@ -338,7 +262,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 
 flr_done:
 	if (locked) {
-		adev->in_gpu_reset = 0;
+		adev->in_gpu_reset = false;
 		mutex_unlock(&adev->lock_reset);
 	}
 
@@ -455,6 +379,4 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
 	.reset_gpu = xgpu_ai_request_reset,
 	.wait_reset = NULL,
 	.trans_msg = xgpu_ai_mailbox_trans_msg,
-	.get_pp_clk = xgpu_ai_get_pp_clk,
-	.force_dpm_level = xgpu_ai_force_dpm_level,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 077e91a33d62..37dbe0f2142f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -35,10 +35,6 @@ enum idh_request {
 	IDH_REL_GPU_FINI_ACCESS,
 	IDH_REQ_GPU_RESET_ACCESS,
 
-	IDH_IRQ_FORCE_DPM_LEVEL = 10,
-	IDH_IRQ_GET_PP_SCLK,
-	IDH_IRQ_GET_PP_MCLK,
-
 	IDH_LOG_VF_ERROR       = 200,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
new file mode 100644
index 000000000000..237fa5e16b7c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "nbio/nbio_2_3_offset.h"
+#include "nbio/nbio_2_3_sh_mask.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "soc15.h"
+#include "navi10_ih.h"
+#include "soc15_common.h"
+#include "mxgpu_nv.h"
+#include "mxgpu_ai.h"
+
+static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev)
+{
+	WREG8(NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
+}
+
+static void xgpu_nv_mailbox_set_valid(struct amdgpu_device *adev, bool val)
+{
+	WREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0);
+}
+
+/*
+ * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
+ * RCV_MSG_VALID filed of BIF_BX_PF_MAILBOX_CONTROL must already be set to 1
+ * by host.
+ *
+ * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
+ * correct value since it doesn't return the RCV_DW0 under the case that
+ * RCV_MSG_VALID is set by host.
+ */
+static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
+{
+	return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+				mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0));
+}
+
+
+static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
+				   enum idh_event event)
+{
+	u32 reg;
+
+	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+					     mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0));
+	if (reg != event)
+		return -ENOENT;
+
+	xgpu_nv_mailbox_send_ack(adev);
+
+	return 0;
+}
+
+static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev)
+{
+	return RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2;
+}
+
+static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
+{
+	int timeout  = NV_MAILBOX_POLL_ACK_TIMEDOUT;
+	u8 reg;
+
+	do {
+		reg = RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE);
+		if (reg & 2)
+			return 0;
+
+		mdelay(5);
+		timeout -= 5;
+	} while (timeout > 1);
+
+	pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT);
+
+	return -ETIME;
+}
+
+static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
+{
+	int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT;
+
+	do {
+		r = xgpu_nv_mailbox_rcv_msg(adev, event);
+		if (!r)
+			return 0;
+
+		msleep(10);
+		timeout -= 10;
+	} while (timeout > 1);
+
+	pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+
+	return -ETIME;
+}
+
+static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
+	      enum idh_request req, u32 data1, u32 data2, u32 data3)
+{
+	u32 reg;
+	int r;
+	uint8_t trn;
+
+	/* IMPORTANT:
+	 * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK
+	 * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK
+	 * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_nv_poll_ack()
+	 * will return immediatly
+	 */
+	do {
+		xgpu_nv_mailbox_set_valid(adev, false);
+		trn = xgpu_nv_peek_ack(adev);
+		if (trn) {
+			pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+			msleep(1);
+		}
+	} while (trn);
+
+	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+					     mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0));
+	reg = REG_SET_FIELD(reg, BIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0,
+			    MSGBUF_DATA, req);
+	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0),
+		      reg);
+	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW1),
+				data1);
+	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW2),
+				data2);
+	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW3),
+				data3);
+
+	xgpu_nv_mailbox_set_valid(adev, true);
+
+	/* start to poll ack */
+	r = xgpu_nv_poll_ack(adev);
+	if (r)
+		pr_err("Doesn't get ack from pf, continue\n");
+
+	xgpu_nv_mailbox_set_valid(adev, false);
+}
+
+static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
+					enum idh_request req)
+{
+	int r;
+
+	xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
+
+	/* start to check msg if request is idh_req_gpu_init_access */
+	if (req == IDH_REQ_GPU_INIT_ACCESS ||
+		req == IDH_REQ_GPU_FINI_ACCESS ||
+		req == IDH_REQ_GPU_RESET_ACCESS) {
+		r = xgpu_nv_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
+		if (r) {
+			pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
+			return r;
+		}
+		/* Retrieve checksum from mailbox2 */
+		if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) {
+			adev->virt.fw_reserve.checksum_key =
+				RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+					mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW2));
+		}
+	}
+
+	return 0;
+}
+
+static int xgpu_nv_request_reset(struct amdgpu_device *adev)
+{
+	return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+}
+
+static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev,
+					   bool init)
+{
+	enum idh_request req;
+
+	req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS;
+	return xgpu_nv_send_access_requests(adev, req);
+}
+
+static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
+					   bool init)
+{
+	enum idh_request req;
+	int r = 0;
+
+	req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS;
+	r = xgpu_nv_send_access_requests(adev, req);
+
+	return r;
+}
+
+static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
+					struct amdgpu_irq_src *source,
+					struct amdgpu_iv_entry *entry)
+{
+	DRM_DEBUG("get ack intr and do nothing.\n");
+	return 0;
+}
+
+static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev,
+					struct amdgpu_irq_src *source,
+					unsigned type,
+					enum amdgpu_interrupt_state state)
+{
+	u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL));
+
+	tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, ACK_INT_EN,
+				(state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp);
+
+	return 0;
+}
+
+static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+{
+	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+	int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
+	int locked;
+
+	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
+	 * otherwise the mailbox msg will be ruined/reseted by
+	 * the VF FLR.
+	 *
+	 * we can unlock the lock_reset to allow "amdgpu_job_timedout"
+	 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
+	 * which means host side had finished this VF's FLR.
+	 */
+	locked = mutex_trylock(&adev->lock_reset);
+	if (locked)
+		adev->in_gpu_reset = true;
+
+	do {
+		if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
+			goto flr_done;
+
+		msleep(10);
+		timeout -= 10;
+	} while (timeout > 1);
+
+flr_done:
+	if (locked) {
+		adev->in_gpu_reset = false;
+		mutex_unlock(&adev->lock_reset);
+	}
+
+	/* Trigger recovery for world switch failure if no TDR */
+	if (amdgpu_device_should_recover_gpu(adev)
+		&& (adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
+		adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
+		adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
+		adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
+		amdgpu_device_gpu_recover(adev, NULL);
+}
+
+static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
+				       struct amdgpu_irq_src *src,
+				       unsigned type,
+				       enum amdgpu_interrupt_state state)
+{
+	u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL));
+
+	tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, VALID_INT_EN,
+			    (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp);
+
+	return 0;
+}
+
+static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
+				   struct amdgpu_irq_src *source,
+				   struct amdgpu_iv_entry *entry)
+{
+	enum idh_event event = xgpu_nv_mailbox_peek_msg(adev);
+
+	switch (event) {
+	case IDH_FLR_NOTIFICATION:
+		if (amdgpu_sriov_runtime(adev))
+			schedule_work(&adev->virt.flr_work);
+		break;
+		/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+		 * it byfar since that polling thread will handle it,
+		 * other msg like flr complete is not handled here.
+		 */
+	case IDH_CLR_MSG_BUF:
+	case IDH_FLR_NOTIFICATION_CMPL:
+	case IDH_READY_TO_ACCESS_GPU:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_ack_irq_funcs = {
+	.set = xgpu_nv_set_mailbox_ack_irq,
+	.process = xgpu_nv_mailbox_ack_irq,
+};
+
+static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_rcv_irq_funcs = {
+	.set = xgpu_nv_set_mailbox_rcv_irq,
+	.process = xgpu_nv_mailbox_rcv_irq,
+};
+
+void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev)
+{
+	adev->virt.ack_irq.num_types = 1;
+	adev->virt.ack_irq.funcs = &xgpu_nv_mailbox_ack_irq_funcs;
+	adev->virt.rcv_irq.num_types = 1;
+	adev->virt.rcv_irq.funcs = &xgpu_nv_mailbox_rcv_irq_funcs;
+}
+
+int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
+	if (r) {
+		amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+		return r;
+	}
+
+	return 0;
+}
+
+int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0);
+	if (r)
+		return r;
+	r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0);
+	if (r) {
+		amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+		return r;
+	}
+
+	INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
+
+	return 0;
+}
+
+void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev)
+{
+	amdgpu_irq_put(adev, &adev->virt.ack_irq, 0);
+	amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+}
+
+const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
+	.req_full_gpu	= xgpu_nv_request_full_gpu_access,
+	.rel_full_gpu	= xgpu_nv_release_full_gpu_access,
+	.reset_gpu = xgpu_nv_request_reset,
+	.wait_reset = NULL,
+	.trans_msg = xgpu_nv_mailbox_trans_msg,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
new file mode 100644
index 000000000000..99b15f6865cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MXGPU_NV_H__
+#define __MXGPU_NV_H__
+
+#define NV_MAILBOX_POLL_ACK_TIMEDOUT	500
+#define NV_MAILBOX_POLL_MSG_TIMEDOUT	12000
+#define NV_MAILBOX_POLL_FLR_TIMEDOUT	500
+
+extern const struct amdgpu_virt_ops xgpu_nv_virt_ops;
+
+void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev);
+int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev);
+int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev);
+void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev);
+
+#define NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4)
+#define NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4 + 1)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 9fe08408db58..f737ce459c28 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -110,14 +110,13 @@ static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl
 static int navi10_ih_irq_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ih_ring *ih = &adev->irq.ih;
-	int ret = 0;
 	u32 ih_rb_cntl, ih_doorbell_rtpr, ih_chicken;
 	u32 tmp;
 
 	/* disable irqs */
 	navi10_ih_disable_interrupts(adev);
 
-	adev->nbio_funcs->ih_control(adev);
+	adev->nbio.funcs->ih_control(adev);
 
 	/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8);
@@ -162,7 +161,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
 	}
 	WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr);
 
-	adev->nbio_funcs->ih_doorbell_range(adev, ih->use_doorbell,
+	adev->nbio.funcs->ih_doorbell_range(adev, ih->use_doorbell,
 					    ih->doorbell_index);
 
 	tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL);
@@ -179,7 +178,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
 	/* enable interrupts */
 	navi10_ih_enable_interrupts(adev);
 
-	return ret;
+	return 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
index a56c93620e78..88efaecf9f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
@@ -24,7 +24,6 @@
 #include "nv.h"
 
 #include "soc15_common.h"
-#include "soc15_hw_ip.h"
 #include "navi10_ip_offset.h"
 
 int navi10_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
index cadc7603ca41..a786d159e5e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
@@ -24,7 +24,6 @@
 #include "nv.h"
 
 #include "soc15_common.h"
-#include "soc15_hw_ip.h"
 #include "navi12_ip_offset.h"
 
 int navi12_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
index 3b5f0f65e096..4ea1e8fbb601 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
@@ -24,7 +24,6 @@
 #include "nv.h"
 
 #include "soc15_common.h"
-#include "soc15_hw_ip.h"
 #include "navi14_ip_offset.h"
 
 int navi14_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index c05d78d4efc6..f3a3fe746222 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -27,11 +27,21 @@
 #include "nbio/nbio_2_3_default.h"
 #include "nbio/nbio_2_3_offset.h"
 #include "nbio/nbio_2_3_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
 
 #define smnPCIE_CONFIG_CNTL	0x11180044
 #define smnCPM_CONTROL		0x11180460
 #define smnPCIE_CNTL2		0x11180070
 
+
+static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
+{
+	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
 static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev)
 {
 	u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -56,10 +66,9 @@ static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev,
 				struct amdgpu_ring *ring)
 {
 	if (!ring || !ring->funcs->emit_wreg)
-		WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+		WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
 	else
-		amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
-			NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+		amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
 }
 
 static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev)
@@ -311,7 +320,6 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
-	.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg,
 	.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
 	.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
 	.get_pcie_index_offset = nbio_v2_3_get_pcie_index_offset,
@@ -331,4 +339,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
 	.ih_control = nbio_v2_3_ih_control,
 	.init_registers = nbio_v2_3_init_registers,
 	.detect_hw_virt = nbio_v2_3_detect_hw_virt,
+	.remap_hdp_registers = nbio_v2_3_remap_hdp_registers,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
index 5ae52085f6b7..a43b60acf7f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
@@ -26,6 +26,7 @@
 
 #include "soc15_common.h"
 
+extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg;
 extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs;
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 6590143c3f75..635d9e1fc0a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -226,7 +226,7 @@ static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev)
 	return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
 }
 
-static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
+const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
 	.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
 	.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
 	.ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
@@ -277,7 +277,6 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
-	.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg,
 	.get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset,
 	.get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset,
 	.get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
index 0743a6f016f3..6dc743b73218 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
@@ -26,6 +26,7 @@
 
 #include "soc15_common.h"
 
+extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
 extern const struct amdgpu_nbio_funcs nbio_v6_1_funcs;
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 74eecb768a82..d6cbf26074bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -292,7 +292,6 @@ static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
-	.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg,
 	.get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset,
 	.get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset,
 	.get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
index 508d549c5029..e7aefb252550 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
@@ -26,6 +26,7 @@
 
 #include "soc15_common.h"
 
+extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
 extern const struct amdgpu_nbio_funcs nbio_v7_0_funcs;
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 910fffced43b..65eb378fa035 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -23,10 +23,12 @@
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "nbio_v7_4.h"
+#include "amdgpu_ras.h"
 
 #include "nbio/nbio_7_4_offset.h"
 #include "nbio/nbio_7_4_sh_mask.h"
 #include "nbio/nbio_7_4_0_smn.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 #include <uapi/linux/kfd_ioctl.h>
 
 #define smnNBIF_MGCG_CTRL_LCLK	0x1013a21c
@@ -50,6 +52,9 @@
 #define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK          0x00000FFCL
 #define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK            0x001F0000L
 
+static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
+					void *ras_error_status);
+
 static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
 {
 	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
@@ -266,7 +271,7 @@ static u32 nbio_v7_4_get_pcie_data_offset(struct amdgpu_device *adev)
 	return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
 }
 
-static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
+const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
 	.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
 	.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
 	.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
@@ -306,17 +311,224 @@ static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev)
 
 static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
 {
-	uint32_t def, data;
 
-	def = data = RREG32_PCIE(smnPCIE_CI_CNTL);
-	data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1);
+}
 
-	if (def != data)
-		WREG32_PCIE(smnPCIE_CI_CNTL, data);
+static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
+{
+	uint32_t bif_doorbell_intr_cntl;
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
+
+	bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+	if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+		BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) {
+		/* driver has to clear the interrupt status when bif ring is disabled */
+		bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+						BIF_DOORBELL_INT_CNTL,
+						RAS_CNTLR_INTERRUPT_CLEAR, 1);
+		WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+		/*
+		 * clear error status after ras_controller_intr according to
+		 * hw team and count ue number for query
+		 */
+		nbio_v7_4_query_ras_error_count(adev, &obj->err_data);
+
+		DRM_WARN("RAS controller interrupt triggered by NBIF error\n");
+
+		/* ras_controller_int is dedicated for nbif ras error,
+		 * not the global interrupt for sync flood
+		 */
+		amdgpu_ras_reset_gpu(adev);
+	}
+}
+
+static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+	uint32_t bif_doorbell_intr_cntl;
+
+	bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+	if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+		BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+		/* driver has to clear the interrupt status when bif ring is disabled */
+		bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+						BIF_DOORBELL_INT_CNTL,
+						RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+		WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+		amdgpu_ras_global_ras_isr(adev);
+	}
+}
+
+
+static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev,
+						  struct amdgpu_irq_src *src,
+						  unsigned type,
+						  enum amdgpu_interrupt_state state)
+{
+	/* The ras_controller_irq enablement should be done in psp bl when it
+	 * tries to enable ras feature. Driver only need to set the correct interrupt
+	 * vector for bare-metal and sriov use case respectively
+	 */
+	uint32_t bif_intr_cntl;
+
+	bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+	if (state == AMDGPU_IRQ_STATE_ENABLE) {
+		/* set interrupt vector select bit to 0 to select
+		 * vetcor 1 for bare metal case */
+		bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
+					      BIF_INTR_CNTL,
+					      RAS_INTR_VEC_SEL, 0);
+		WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+	}
+
+	return 0;
+}
+
+static int nbio_v7_4_process_ras_controller_irq(struct amdgpu_device *adev,
+						struct amdgpu_irq_src *source,
+						struct amdgpu_iv_entry *entry)
+{
+	/* By design, the ih cookie for ras_controller_irq should be written
+	 * to BIFring instead of general iv ring. However, due to known bif ring
+	 * hw bug, it has to be disabled. There is no chance the process function
+	 * will be involked. Just left it as a dummy one.
+	 */
+	return 0;
+}
+
+static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+						       struct amdgpu_irq_src *src,
+						       unsigned type,
+						       enum amdgpu_interrupt_state state)
+{
+	/* The ras_controller_irq enablement should be done in psp bl when it
+	 * tries to enable ras feature. Driver only need to set the correct interrupt
+	 * vector for bare-metal and sriov use case respectively
+	 */
+	uint32_t bif_intr_cntl;
+
+	bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+	if (state == AMDGPU_IRQ_STATE_ENABLE) {
+		/* set interrupt vector select bit to 0 to select
+		 * vetcor 1 for bare metal case */
+		bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
+					      BIF_INTR_CNTL,
+					      RAS_INTR_VEC_SEL, 0);
+		WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+	}
+
+	return 0;
+}
+
+static int nbio_v7_4_process_err_event_athub_irq(struct amdgpu_device *adev,
+						 struct amdgpu_irq_src *source,
+						 struct amdgpu_iv_entry *entry)
+{
+	/* By design, the ih cookie for err_event_athub_irq should be written
+	 * to BIFring instead of general iv ring. However, due to known bif ring
+	 * hw bug, it has to be disabled. There is no chance the process function
+	 * will be involked. Just left it as a dummy one.
+	 */
+	return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_controller_irq_funcs = {
+	.set = nbio_v7_4_set_ras_controller_irq_state,
+	.process = nbio_v7_4_process_ras_controller_irq,
+};
+
+static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_err_event_athub_irq_funcs = {
+	.set = nbio_v7_4_set_ras_err_event_athub_irq_state,
+	.process = nbio_v7_4_process_err_event_athub_irq,
+};
+
+static int nbio_v7_4_init_ras_controller_interrupt (struct amdgpu_device *adev)
+{
+	int r;
+
+	/* init the irq funcs */
+	adev->nbio.ras_controller_irq.funcs =
+		&nbio_v7_4_ras_controller_irq_funcs;
+	adev->nbio.ras_controller_irq.num_types = 1;
+
+	/* register ras controller interrupt */
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+			      NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT,
+			      &adev->nbio.ras_controller_irq);
+
+	return r;
+}
+
+static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev)
+{
+
+	int r;
+
+	/* init the irq funcs */
+	adev->nbio.ras_err_event_athub_irq.funcs =
+		&nbio_v7_4_ras_err_event_athub_irq_funcs;
+	adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+	/* register ras err event athub interrupt */
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+			      NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+			      &adev->nbio.ras_err_event_athub_irq);
+
+	return r;
+}
+
+#define smnPARITY_ERROR_STATUS_UNCORR_GRP2	0x13a20030
+
+static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
+					void *ras_error_status)
+{
+	uint32_t global_sts, central_sts, int_eoi, parity_sts;
+	uint32_t corr, fatal, non_fatal;
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+	global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO);
+	corr = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrCorr);
+	fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal);
+	non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO,
+				ParityErrNonFatal);
+	parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2);
+
+	if (corr)
+		err_data->ce_count++;
+	if (fatal)
+		err_data->ue_count++;
+
+	if (corr || fatal || non_fatal) {
+		central_sts = RREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS);
+		/* clear error status register */
+		WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts);
+
+		if (fatal)
+			/* clear parity fatal error indication field */
+			WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2,
+				    parity_sts);
+
+		if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS,
+				BIFL_RasContller_Intr_Recv)) {
+			/* clear interrupt status register */
+			WREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS, central_sts);
+			int_eoi = RREG32_PCIE(smnIOHC_INTERRUPT_EOI);
+			int_eoi = REG_SET_FIELD(int_eoi,
+					IOHC_INTERRUPT_EOI, SMI_EOI, 1);
+			WREG32_PCIE(smnIOHC_INTERRUPT_EOI, int_eoi);
+		}
+	}
+}
+
+static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev,
+						bool enable)
+{
+	WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL,
+		       DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
 }
 
 const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
-	.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg,
 	.get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
 	.get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
 	.get_pcie_index_offset = nbio_v7_4_get_pcie_index_offset,
@@ -330,6 +542,7 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
 	.enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture,
 	.enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture,
 	.ih_doorbell_range = nbio_v7_4_ih_doorbell_range,
+	.enable_doorbell_interrupt = nbio_v7_4_enable_doorbell_interrupt,
 	.update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating,
 	.update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep,
 	.get_clockgating_state = nbio_v7_4_get_clockgating_state,
@@ -337,4 +550,10 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
 	.init_registers = nbio_v7_4_init_registers,
 	.detect_hw_virt = nbio_v7_4_detect_hw_virt,
 	.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
+	.handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
+	.handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
+	.init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt,
+	.init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt,
+	.query_ras_error_count = nbio_v7_4_query_ras_error_count,
+	.ras_late_init = amdgpu_nbio_ras_late_init,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
index c442865bac4f..b1ac82872752 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
@@ -26,6 +26,7 @@
 
 #include "soc15_common.h"
 
+extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg;
 extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs;
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index de9b995b65b1..2e0f8933410e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -40,19 +40,23 @@
 #include "gc/gc_10_1_0_sh_mask.h"
 #include "hdp/hdp_5_0_0_offset.h"
 #include "hdp/hdp_5_0_0_sh_mask.h"
+#include "smuio/smuio_11_0_0_offset.h"
 
 #include "soc15.h"
 #include "soc15_common.h"
 #include "gmc_v10_0.h"
 #include "gfxhub_v2_0.h"
 #include "mmhub_v2_0.h"
+#include "nbio_v2_3.h"
 #include "nv.h"
 #include "navi10_ih.h"
 #include "gfx_v10_0.h"
 #include "sdma_v5_0.h"
 #include "vcn_v2_0.h"
+#include "jpeg_v2_0.h"
 #include "dce_virtual.h"
 #include "mes_v10_1.h"
+#include "mxgpu_nv.h"
 
 static const struct amd_ip_funcs nv_common_ip_funcs;
 
@@ -63,8 +67,8 @@ static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg)
 {
 	unsigned long flags, address, data;
 	u32 r;
-	address = adev->nbio_funcs->get_pcie_index_offset(adev);
-	data = adev->nbio_funcs->get_pcie_data_offset(adev);
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, reg);
@@ -78,8 +82,8 @@ static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 {
 	unsigned long flags, address, data;
 
-	address = adev->nbio_funcs->get_pcie_index_offset(adev);
-	data = adev->nbio_funcs->get_pcie_data_offset(adev);
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, reg);
@@ -119,7 +123,7 @@ static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 
 static u32 nv_get_config_memsize(struct amdgpu_device *adev)
 {
-	return adev->nbio_funcs->get_memsize(adev);
+	return adev->nbio.funcs->get_memsize(adev);
 }
 
 static u32 nv_get_xclk(struct amdgpu_device *adev)
@@ -154,8 +158,27 @@ static bool nv_read_disabled_bios(struct amdgpu_device *adev)
 static bool nv_read_bios_from_rom(struct amdgpu_device *adev,
 				  u8 *bios, u32 length_bytes)
 {
-	/* TODO: will implement it when SMU header is available */
-	return false;
+	u32 *dw_ptr;
+	u32 i, length_dw;
+
+	if (bios == NULL)
+		return false;
+	if (length_bytes == 0)
+		return false;
+	/* APU vbios image is part of sbios image */
+	if (adev->flags & AMD_IS_APU)
+		return false;
+
+	dw_ptr = (u32 *)bios;
+	length_dw = ALIGN(length_bytes, 4) / 4;
+
+	/* set rom index to 0 */
+	WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0);
+	/* read out the rom data */
+	for (i = 0; i < length_dw; i++)
+		dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA));
+
+	return true;
 }
 
 static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
@@ -176,6 +199,7 @@ static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
+	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)},
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
 	{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
@@ -279,7 +303,7 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev)
 
 	/* wait for asic to come out of reset */
 	for (i = 0; i < adev->usec_timeout; i++) {
-		u32 memsize = adev->nbio_funcs->get_memsize(adev);
+		u32 memsize = adev->nbio.funcs->get_memsize(adev);
 
 		if (memsize != 0xffffffff)
 			break;
@@ -291,12 +315,22 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev)
 	return ret;
 }
 
+static bool nv_asic_supports_baco(struct amdgpu_device *adev)
+{
+	struct smu_context *smu = &adev->smu;
+
+	if (smu_baco_is_support(smu))
+		return true;
+	else
+		return false;
+}
+
 static enum amd_reset_method
 nv_asic_reset_method(struct amdgpu_device *adev)
 {
 	struct smu_context *smu = &adev->smu;
 
-	if (smu_baco_is_support(smu))
+	if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu))
 		return AMD_RESET_METHOD_BACO;
 	else
 		return AMD_RESET_METHOD_MODE1;
@@ -319,7 +353,12 @@ static int nv_asic_reset(struct amdgpu_device *adev)
 	if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
 		if (!adev->in_suspend)
 			amdgpu_inc_vram_lost(adev);
-		ret = smu_baco_reset(smu);
+		ret = smu_baco_enter(smu);
+		if (ret)
+			return ret;
+		ret = smu_baco_exit(smu);
+		if (ret)
+			return ret;
 	} else {
 		if (!adev->in_suspend)
 			amdgpu_inc_vram_lost(adev);
@@ -368,8 +407,8 @@ static void nv_program_aspm(struct amdgpu_device *adev)
 static void nv_enable_doorbell_aperture(struct amdgpu_device *adev,
 					bool enable)
 {
-	adev->nbio_funcs->enable_doorbell_aperture(adev, enable);
-	adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable);
+	adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
+	adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
 }
 
 static const struct amdgpu_ip_block_version nv_common_ip_block =
@@ -423,9 +462,13 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 	if (r)
 		return r;
 
-	adev->nbio_funcs = &nbio_v2_3_funcs;
+	adev->nbio.funcs = &nbio_v2_3_funcs;
+	adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
 
-	adev->nbio_funcs->detect_hw_virt(adev);
+	adev->nbio.funcs->detect_hw_virt(adev);
+
+	if (amdgpu_sriov_vf(adev))
+		adev->virt.ops = &xgpu_nv_virt_ops;
 
 	switch (adev->asic_type) {
 	case CHIP_NAVI10:
@@ -435,7 +478,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
 		amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
-		    is_support_sw_smu(adev))
+		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -446,9 +489,10 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
-		    is_support_sw_smu(adev))
+		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
 		if (adev->enable_mes)
 			amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
 		break;
@@ -458,7 +502,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
 		amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
-		    is_support_sw_smu(adev))
+		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -469,9 +513,10 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
-		    is_support_sw_smu(adev))
+		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
 		break;
 	default:
 		return -EINVAL;
@@ -482,12 +527,12 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 
 static uint32_t nv_get_rev_id(struct amdgpu_device *adev)
 {
-	return adev->nbio_funcs->get_rev_id(adev);
+	return adev->nbio.funcs->get_rev_id(adev);
 }
 
 static void nv_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
 {
-	adev->nbio_funcs->hdp_flush(adev, ring);
+	adev->nbio.funcs->hdp_flush(adev, ring);
 }
 
 static void nv_invalidate_hdp(struct amdgpu_device *adev,
@@ -532,6 +577,16 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev)
 	return false;
 }
 
+static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+
+	/* TODO
+	 * dummy implement for pcie_replay_count sysfs interface
+	 * */
+
+	return 0;
+}
+
 static void nv_init_doorbell_index(struct amdgpu_device *adev)
 {
 	adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
@@ -579,12 +634,17 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
 	.need_full_reset = &nv_need_full_reset,
 	.get_pcie_usage = &nv_get_pcie_usage,
 	.need_reset_on_init = &nv_need_reset_on_init,
+	.get_pcie_replay_count = &nv_get_pcie_replay_count,
+	.supports_baco = &nv_asic_supports_baco,
 };
 
 static int nv_common_early_init(void *handle)
 {
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+	adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
 	adev->smc_rreg = NULL;
 	adev->smc_wreg = NULL;
 	adev->pcie_rreg = &nv_pcie_rreg;
@@ -615,10 +675,12 @@ static int nv_common_early_init(void *handle)
 			AMD_CG_SUPPORT_ATHUB_MGCG |
 			AMD_CG_SUPPORT_ATHUB_LS |
 			AMD_CG_SUPPORT_VCN_MGCG |
+			AMD_CG_SUPPORT_JPEG_MGCG |
 			AMD_CG_SUPPORT_BIF_MGCG |
 			AMD_CG_SUPPORT_BIF_LS;
 		adev->pg_flags = AMD_PG_SUPPORT_VCN |
 			AMD_PG_SUPPORT_VCN_DPG |
+			AMD_PG_SUPPORT_JPEG |
 			AMD_PG_SUPPORT_ATHUB;
 		adev->external_rev_id = adev->rev_id + 0x1;
 		break;
@@ -635,9 +697,11 @@ static int nv_common_early_init(void *handle)
 			AMD_CG_SUPPORT_ATHUB_MGCG |
 			AMD_CG_SUPPORT_ATHUB_LS |
 			AMD_CG_SUPPORT_VCN_MGCG |
+			AMD_CG_SUPPORT_JPEG_MGCG |
 			AMD_CG_SUPPORT_BIF_MGCG |
 			AMD_CG_SUPPORT_BIF_LS;
 		adev->pg_flags = AMD_PG_SUPPORT_VCN |
+			AMD_PG_SUPPORT_JPEG |
 			AMD_PG_SUPPORT_VCN_DPG;
 		adev->external_rev_id = adev->rev_id + 20;
 		break;
@@ -656,10 +720,18 @@ static int nv_common_early_init(void *handle)
 			AMD_CG_SUPPORT_MC_LS |
 			AMD_CG_SUPPORT_ATHUB_MGCG |
 			AMD_CG_SUPPORT_ATHUB_LS |
-			AMD_CG_SUPPORT_VCN_MGCG;
+			AMD_CG_SUPPORT_VCN_MGCG |
+			AMD_CG_SUPPORT_JPEG_MGCG;
 		adev->pg_flags = AMD_PG_SUPPORT_VCN |
 			AMD_PG_SUPPORT_VCN_DPG |
+			AMD_PG_SUPPORT_JPEG |
 			AMD_PG_SUPPORT_ATHUB;
+		/* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0,
+		 * as a consequence, the rev_id and external_rev_id are wrong.
+		 * workaround it by hardcoding rev_id to 0 (default value).
+		 */
+		if (amdgpu_sriov_vf(adev))
+			adev->rev_id = 0;
 		adev->external_rev_id = adev->rev_id + 0xa;
 		break;
 	default:
@@ -667,16 +739,31 @@ static int nv_common_early_init(void *handle)
 		return -EINVAL;
 	}
 
+	if (amdgpu_sriov_vf(adev)) {
+		amdgpu_virt_init_setting(adev);
+		xgpu_nv_mailbox_set_irq_funcs(adev);
+	}
+
 	return 0;
 }
 
 static int nv_common_late_init(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (amdgpu_sriov_vf(adev))
+		xgpu_nv_mailbox_get_irq(adev);
+
 	return 0;
 }
 
 static int nv_common_sw_init(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (amdgpu_sriov_vf(adev))
+		xgpu_nv_mailbox_add_irq_id(adev);
+
 	return 0;
 }
 
@@ -694,7 +781,13 @@ static int nv_common_hw_init(void *handle)
 	/* enable aspm */
 	nv_program_aspm(adev);
 	/* setup nbio registers */
-	adev->nbio_funcs->init_registers(adev);
+	adev->nbio.funcs->init_registers(adev);
+	/* remap HDP registers to a hole in mmio space,
+	 * for the purpose of expose those registers
+	 * to process space
+	 */
+	if (adev->nbio.funcs->remap_hdp_registers)
+		adev->nbio.funcs->remap_hdp_registers(adev);
 	/* enable the doorbell aperture */
 	nv_enable_doorbell_aperture(adev, true);
 
@@ -856,9 +949,9 @@ static int nv_common_set_clockgating_state(void *handle,
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
 	case CHIP_NAVI12:
-		adev->nbio_funcs->update_medium_grain_clock_gating(adev,
+		adev->nbio.funcs->update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
-		adev->nbio_funcs->update_medium_grain_light_sleep(adev,
+		adev->nbio.funcs->update_medium_grain_light_sleep(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 		nv_update_hdp_mem_power_gating(adev,
 				   state == AMD_CG_STATE_GATE ? true : false);
@@ -886,7 +979,7 @@ static void nv_common_get_clockgating_state(void *handle, u32 *flags)
 	if (amdgpu_sriov_vf(adev))
 		*flags = 0;
 
-	adev->nbio_funcs->get_clockgating_state(adev, flags);
+	adev->nbio.funcs->get_clockgating_state(adev, flags);
 
 	/* AMD_CG_SUPPORT_HDP_MGCG */
 	tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 74a9fe8e0cfb..36b65797434e 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -242,6 +242,7 @@ enum psp_gfx_fw_type {
 	GFX_FW_TYPE_SDMA5                           = 55,   /* SDMA5                    MI      */
 	GFX_FW_TYPE_SDMA6                           = 56,   /* SDMA6                    MI      */
 	GFX_FW_TYPE_SDMA7                           = 57,   /* SDMA7                    MI      */
+	GFX_FW_TYPE_VCN1                            = 58,   /* VCN1                     MI      */
 	GFX_FW_TYPE_MAX
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 5d95e614369a..7539104175e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -40,6 +40,9 @@
 MODULE_FIRMWARE("amdgpu/raven_asd.bin");
 MODULE_FIRMWARE("amdgpu/picasso_asd.bin");
 MODULE_FIRMWARE("amdgpu/raven2_asd.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven_ta.bin");
 
 static int psp_v10_0_init_microcode(struct psp_context *psp)
 {
@@ -48,7 +51,7 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
 	char fw_name[30];
 	int err = 0;
 	const struct psp_firmware_header_v1_0 *hdr;
-
+	const struct ta_firmware_header_v1_0 *ta_hdr;
 	DRM_DEBUG("\n");
 
 	switch (adev->asic_type) {
@@ -79,7 +82,45 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
 	adev->psp.asd_start_addr = (uint8_t *)hdr +
 				le32_to_cpu(hdr->header.ucode_array_offset_bytes);
 
+	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
+	err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
+	if (err) {
+		release_firmware(adev->psp.ta_fw);
+		adev->psp.ta_fw = NULL;
+		dev_info(adev->dev,
+			 "psp v10.0: Failed to load firmware \"%s\"\n",
+			 fw_name);
+	} else {
+		err = amdgpu_ucode_validate(adev->psp.ta_fw);
+		if (err)
+			goto out2;
+
+		ta_hdr = (const struct ta_firmware_header_v1_0 *)
+				 adev->psp.ta_fw->data;
+		adev->psp.ta_hdcp_ucode_version =
+			le32_to_cpu(ta_hdr->ta_hdcp_ucode_version);
+		adev->psp.ta_hdcp_ucode_size =
+			le32_to_cpu(ta_hdr->ta_hdcp_size_bytes);
+		adev->psp.ta_hdcp_start_addr =
+			(uint8_t *)ta_hdr +
+			le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+		adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+
+		adev->psp.ta_dtm_ucode_version =
+			le32_to_cpu(ta_hdr->ta_dtm_ucode_version);
+		adev->psp.ta_dtm_ucode_size =
+			le32_to_cpu(ta_hdr->ta_dtm_size_bytes);
+		adev->psp.ta_dtm_start_addr =
+			(uint8_t *)adev->psp.ta_hdcp_start_addr +
+			le32_to_cpu(ta_hdr->ta_dtm_offset_bytes);
+	}
+
 	return 0;
+
+out2:
+	release_firmware(adev->psp.ta_fw);
+	adev->psp.ta_fw = NULL;
 out:
 	if (err) {
 		dev_err(adev->dev,
@@ -189,53 +230,6 @@ static int psp_v10_0_ring_destroy(struct psp_context *psp,
 	return ret;
 }
 
-static int psp_v10_0_cmd_submit(struct psp_context *psp,
-				uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
-				int index)
-{
-	unsigned int psp_write_ptr_reg = 0;
-	struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
-	struct psp_ring *ring = &psp->km_ring;
-	struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
-	struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
-		ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
-	struct amdgpu_device *adev = psp->adev;
-	uint32_t ring_size_dw = ring->ring_size / 4;
-	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
-
-	/* KM (GPCOM) prepare write pointer */
-	psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
-
-	/* Update KM RB frame pointer to new frame */
-	if ((psp_write_ptr_reg % ring_size_dw) == 0)
-		write_frame = ring_buffer_start;
-	else
-		write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
-	/* Check invalid write_frame ptr address */
-	if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
-		DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
-			  ring_buffer_start, ring_buffer_end, write_frame);
-		DRM_ERROR("write_frame is pointing to address out of bounds\n");
-		return -EINVAL;
-	}
-
-	/* Initialize KM RB frame */
-	memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-
-	/* Update KM RB frame */
-	write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
-	write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
-	write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
-	write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
-	write_frame->fence_value = index;
-
-	/* Update the write Pointer in DWORDs */
-	psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
-
-	return 0;
-}
-
 static int
 psp_v10_0_sram_map(struct amdgpu_device *adev,
 		   unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
@@ -365,15 +359,30 @@ static int psp_v10_0_mode1_reset(struct psp_context *psp)
 	return -EINVAL;
 }
 
+static uint32_t psp_v10_0_ring_get_wptr(struct psp_context *psp)
+{
+	struct amdgpu_device *adev = psp->adev;
+
+	return RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+}
+
+static void psp_v10_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+	struct amdgpu_device *adev = psp->adev;
+
+	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
 static const struct psp_funcs psp_v10_0_funcs = {
 	.init_microcode = psp_v10_0_init_microcode,
 	.ring_init = psp_v10_0_ring_init,
 	.ring_create = psp_v10_0_ring_create,
 	.ring_stop = psp_v10_0_ring_stop,
 	.ring_destroy = psp_v10_0_ring_destroy,
-	.cmd_submit = psp_v10_0_cmd_submit,
 	.compare_sram_data = psp_v10_0_compare_sram_data,
 	.mode1_reset = psp_v10_0_mode1_reset,
+	.ring_get_wptr = psp_v10_0_ring_get_wptr,
+	.ring_set_wptr = psp_v10_0_ring_set_wptr,
 };
 
 void psp_v10_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 10166104b8a3..685dd9754c67 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -43,12 +43,16 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
 MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
 MODULE_FIRMWARE("amdgpu/navi10_sos.bin");
 MODULE_FIRMWARE("amdgpu/navi10_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi10_ta.bin");
 MODULE_FIRMWARE("amdgpu/navi14_sos.bin");
 MODULE_FIRMWARE("amdgpu/navi14_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi14_ta.bin");
 MODULE_FIRMWARE("amdgpu/navi12_sos.bin");
 MODULE_FIRMWARE("amdgpu/navi12_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi12_ta.bin");
 MODULE_FIRMWARE("amdgpu/arcturus_sos.bin");
 MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_ta.bin");
 
 /* address block */
 #define smnMP1_FIRMWARE_FLAGS		0x3010024
@@ -57,6 +61,8 @@ MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
 #define mmRLC_GPM_UCODE_DATA_NV10	0x5b62
 #define mmSDMA0_UCODE_ADDR_NV10		0x5880
 #define mmSDMA0_UCODE_DATA_NV10		0x5881
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US	3000000
 
 static int psp_v11_0_init_microcode(struct psp_context *psp)
 {
@@ -155,6 +161,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA20:
+	case CHIP_ARCTURUS:
 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
 		err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
 		if (err) {
@@ -182,7 +189,31 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
 	case CHIP_NAVI12:
-	case CHIP_ARCTURUS:
+		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
+		err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
+		if (err) {
+			release_firmware(adev->psp.ta_fw);
+			adev->psp.ta_fw = NULL;
+			dev_info(adev->dev,
+				 "psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
+		} else {
+			err = amdgpu_ucode_validate(adev->psp.ta_fw);
+			if (err)
+				goto out2;
+
+			ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
+			adev->psp.ta_hdcp_ucode_version = le32_to_cpu(ta_hdr->ta_hdcp_ucode_version);
+			adev->psp.ta_hdcp_ucode_size = le32_to_cpu(ta_hdr->ta_hdcp_size_bytes);
+			adev->psp.ta_hdcp_start_addr = (uint8_t *)ta_hdr +
+				le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+			adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+
+			adev->psp.ta_dtm_ucode_version = le32_to_cpu(ta_hdr->ta_dtm_ucode_version);
+			adev->psp.ta_dtm_ucode_size = le32_to_cpu(ta_hdr->ta_dtm_size_bytes);
+			adev->psp.ta_dtm_start_addr = (uint8_t *)adev->psp.ta_hdcp_start_addr +
+				le32_to_cpu(ta_hdr->ta_dtm_offset_bytes);
+		}
 		break;
 	default:
 		BUG();
@@ -205,26 +236,55 @@ out:
 	return err;
 }
 
+int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
+{
+	struct amdgpu_device *adev = psp->adev;
+
+	int ret;
+	int retry_loop;
+
+	for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+		/* Wait for bootloader to signify that is
+		    ready having bit 31 of C2PMSG_35 set to 1 */
+		ret = psp_wait_for(psp,
+				   SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+				   0x80000000,
+				   0x80000000,
+				   false);
+
+		if (ret == 0)
+			return 0;
+	}
+
+	return ret;
+}
+
+static bool psp_v11_0_is_sos_alive(struct psp_context *psp)
+{
+	struct amdgpu_device *adev = psp->adev;
+	uint32_t sol_reg;
+
+	sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+
+	return sol_reg != 0x0;
+}
+
 static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
 {
 	int ret;
 	uint32_t psp_gfxdrv_command_reg = 0;
 	struct amdgpu_device *adev = psp->adev;
-	uint32_t sol_reg;
 
 	/* Check tOS sign of life register to confirm sys driver and sOS
 	 * are already been loaded.
 	 */
-	sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
-	if (sol_reg) {
+	if (psp_v11_0_is_sos_alive(psp)) {
 		psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
 		dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version);
 		return 0;
 	}
 
-	/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
-	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
-			   0x80000000, 0x80000000, false);
+	ret = psp_v11_0_wait_for_bootloader(psp);
 	if (ret)
 		return ret;
 
@@ -233,16 +293,14 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
 	/* Copy PSP KDB binary to memory */
 	memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size);
 
-	/* Provide the sys driver to bootloader */
+	/* Provide the PSP KDB to bootloader */
 	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
 	       (uint32_t)(psp->fw_pri_mc_addr >> 20));
 	psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE;
 	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
 	       psp_gfxdrv_command_reg);
 
-	/* Wait for bootloader to signify that is ready having  bit 31 of C2PMSG_35 set to 1*/
-	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
-			   0x80000000, 0x80000000, false);
+	ret = psp_v11_0_wait_for_bootloader(psp);
 
 	return ret;
 }
@@ -252,21 +310,17 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
 	int ret;
 	uint32_t psp_gfxdrv_command_reg = 0;
 	struct amdgpu_device *adev = psp->adev;
-	uint32_t sol_reg;
 
 	/* Check sOS sign of life register to confirm sys driver and sOS
 	 * are already been loaded.
 	 */
-	sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
-	if (sol_reg) {
+	if (psp_v11_0_is_sos_alive(psp)) {
 		psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
 		dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version);
 		return 0;
 	}
 
-	/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
-	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
-			   0x80000000, 0x80000000, false);
+	ret = psp_v11_0_wait_for_bootloader(psp);
 	if (ret)
 		return ret;
 
@@ -285,8 +339,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
 	/* there might be handshake issue with hardware which needs delay */
 	mdelay(20);
 
-	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
-			   0x80000000, 0x80000000, false);
+	ret = psp_v11_0_wait_for_bootloader(psp);
 
 	return ret;
 }
@@ -296,18 +349,14 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
 	int ret;
 	unsigned int psp_gfxdrv_command_reg = 0;
 	struct amdgpu_device *adev = psp->adev;
-	uint32_t sol_reg;
 
 	/* Check sOS sign of life register to confirm sys driver and sOS
 	 * are already been loaded.
 	 */
-	sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
-	if (sol_reg)
+	if (psp_v11_0_is_sos_alive(psp))
 		return 0;
 
-	/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
-	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
-			   0x80000000, 0x80000000, false);
+	ret = psp_v11_0_wait_for_bootloader(psp);
 	if (ret)
 		return ret;
 
@@ -398,6 +447,34 @@ static bool psp_v11_0_support_vmr_ring(struct psp_context *psp)
 	return false;
 }
 
+static int psp_v11_0_ring_stop(struct psp_context *psp,
+			      enum psp_ring_type ring_type)
+{
+	int ret = 0;
+	struct amdgpu_device *adev = psp->adev;
+
+	/* Write the ring destroy command*/
+	if (psp_v11_0_support_vmr_ring(psp))
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+				     GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+	else
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
+				     GFX_CTRL_CMD_ID_DESTROY_RINGS);
+
+	/* there might be handshake issue with hardware which needs delay */
+	mdelay(20);
+
+	/* Wait for response flag (bit 31) */
+	if (psp_v11_0_support_vmr_ring(psp))
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+				   0x80000000, 0x80000000, false);
+	else
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+				   0x80000000, 0x80000000, false);
+
+	return ret;
+}
+
 static int psp_v11_0_ring_create(struct psp_context *psp,
 				enum psp_ring_type ring_type)
 {
@@ -407,6 +484,12 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
 	struct amdgpu_device *adev = psp->adev;
 
 	if (psp_v11_0_support_vmr_ring(psp)) {
+		ret = psp_v11_0_ring_stop(psp, ring_type);
+		if (ret) {
+			DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n");
+			return ret;
+		}
+
 		/* Write low address of the ring to C2PMSG_102 */
 		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
@@ -426,6 +509,14 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
 				   0x80000000, 0x8000FFFF, false);
 
 	} else {
+		/* Wait for sOS ready for ring creation */
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+				   0x80000000, 0x80000000, false);
+		if (ret) {
+			DRM_ERROR("Failed to wait for sOS ready for ring creation\n");
+			return ret;
+		}
+
 		/* Write low address of the ring to C2PMSG_69 */
 		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
@@ -451,33 +542,6 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
 	return ret;
 }
 
-static int psp_v11_0_ring_stop(struct psp_context *psp,
-			      enum psp_ring_type ring_type)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = psp->adev;
-
-	/* Write the ring destroy command*/
-	if (psp_v11_0_support_vmr_ring(psp))
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
-				     GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
-	else
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
-				     GFX_CTRL_CMD_ID_DESTROY_RINGS);
-
-	/* there might be handshake issue with hardware which needs delay */
-	mdelay(20);
-
-	/* Wait for response flag (bit 31) */
-	if (psp_v11_0_support_vmr_ring(psp))
-		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
-				   0x80000000, 0x80000000, false);
-	else
-		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
-				   0x80000000, 0x80000000, false);
-
-	return ret;
-}
 
 static int psp_v11_0_ring_destroy(struct psp_context *psp,
 				 enum psp_ring_type ring_type)
@@ -497,62 +561,6 @@ static int psp_v11_0_ring_destroy(struct psp_context *psp,
 	return ret;
 }
 
-static int psp_v11_0_cmd_submit(struct psp_context *psp,
-			       uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
-			       int index)
-{
-	unsigned int psp_write_ptr_reg = 0;
-	struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem;
-	struct psp_ring *ring = &psp->km_ring;
-	struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
-	struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
-		ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
-	struct amdgpu_device *adev = psp->adev;
-	uint32_t ring_size_dw = ring->ring_size / 4;
-	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
-
-	/* KM (GPCOM) prepare write pointer */
-	if (psp_v11_0_support_vmr_ring(psp))
-		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
-	else
-		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
-
-	/* Update KM RB frame pointer to new frame */
-	/* write_frame ptr increments by size of rb_frame in bytes */
-	/* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
-	if ((psp_write_ptr_reg % ring_size_dw) == 0)
-		write_frame = ring_buffer_start;
-	else
-		write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
-	/* Check invalid write_frame ptr address */
-	if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
-		DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
-			  ring_buffer_start, ring_buffer_end, write_frame);
-		DRM_ERROR("write_frame is pointing to address out of bounds\n");
-		return -EINVAL;
-	}
-
-	/* Initialize KM RB frame */
-	memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-
-	/* Update KM RB frame */
-	write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
-	write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
-	write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
-	write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
-	write_frame->fence_value = index;
-
-	/* Update the write Pointer in DWORDs */
-	psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
-	if (psp_v11_0_support_vmr_ring(psp)) {
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
-	} else
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
-
-	return 0;
-}
-
 static int
 psp_v11_0_sram_map(struct amdgpu_device *adev,
 		  unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
@@ -889,6 +897,186 @@ static int psp_v11_0_rlc_autoload_start(struct psp_context *psp)
 	return psp_rlc_autoload_start(psp);
 }
 
+static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+	int ret;
+	int i;
+	uint32_t data_32;
+	int max_wait;
+	struct amdgpu_device *adev = psp->adev;
+
+	data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32);
+	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg);
+
+	max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+	for (i = 0; i < max_wait; i++) {
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+				   0x80000000, 0x80000000, false);
+		if (ret == 0)
+			break;
+	}
+	if (i < max_wait)
+		ret = 0;
+	else
+		ret = -ETIME;
+
+	DRM_DEBUG("training %s %s, cost %d @ %d ms\n",
+		  (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+		  (ret == 0) ? "succeed" : "failed",
+		  i, adev->usec_timeout/1000);
+	return ret;
+}
+
+static void psp_v11_0_memory_training_fini(struct psp_context *psp)
+{
+	struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+	ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+	kfree(ctx->sys_cache);
+	ctx->sys_cache = NULL;
+}
+
+static int psp_v11_0_memory_training_init(struct psp_context *psp)
+{
+	int ret;
+	struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+	if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
+		DRM_DEBUG("memory training is not supported!\n");
+		return 0;
+	}
+
+	ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
+	if (ctx->sys_cache == NULL) {
+		DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
+		ret = -ENOMEM;
+		goto Err_out;
+	}
+
+	DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+		  ctx->train_data_size,
+		  ctx->p2c_train_data_offset,
+		  ctx->c2p_train_data_offset);
+	ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
+	return 0;
+
+Err_out:
+	psp_v11_0_memory_training_fini(psp);
+	return ret;
+}
+
+/*
+ * save and restore proces
+ */
+static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
+{
+	int ret;
+	uint32_t p2c_header[4];
+	struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+	uint32_t *pcache = (uint32_t*)ctx->sys_cache;
+
+	if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
+		DRM_DEBUG("Memory training is not supported.\n");
+		return 0;
+	} else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
+		DRM_ERROR("Memory training initialization failure.\n");
+		return -EINVAL;
+	}
+
+	if (psp_v11_0_is_sos_alive(psp)) {
+		DRM_DEBUG("SOS is alive, skip memory training.\n");
+		return 0;
+	}
+
+	amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+	DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
+		  pcache[0], pcache[1], pcache[2], pcache[3],
+		  p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
+
+	if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+		DRM_DEBUG("Short training depends on restore.\n");
+		ops |= PSP_MEM_TRAIN_RESTORE;
+	}
+
+	if ((ops & PSP_MEM_TRAIN_RESTORE) &&
+	    pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+		DRM_DEBUG("sys_cache[0] is invalid, restore depends on save.\n");
+		ops |= PSP_MEM_TRAIN_SAVE;
+	}
+
+	if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+	    !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+	      pcache[3] == p2c_header[3])) {
+		DRM_DEBUG("sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
+		ops |= PSP_MEM_TRAIN_SAVE;
+	}
+
+	if ((ops & PSP_MEM_TRAIN_SAVE) &&
+	    p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+		DRM_DEBUG("p2c_header[0] is invalid, save depends on long training.\n");
+		ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
+	}
+
+	if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+		ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
+		ops |= PSP_MEM_TRAIN_SAVE;
+	}
+
+	DRM_DEBUG("Memory training ops:%x.\n", ops);
+
+	if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+		ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+		if (ret) {
+			DRM_ERROR("Send long training msg failed.\n");
+			return ret;
+		}
+	}
+
+	if (ops & PSP_MEM_TRAIN_SAVE) {
+		amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
+	}
+
+	if (ops & PSP_MEM_TRAIN_RESTORE) {
+		amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
+	}
+
+	if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+		ret = psp_v11_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
+							 PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
+		if (ret) {
+			DRM_ERROR("send training msg failed.\n");
+			return ret;
+		}
+	}
+	ctx->training_cnt++;
+	return 0;
+}
+
+static uint32_t psp_v11_0_ring_get_wptr(struct psp_context *psp)
+{
+	uint32_t data;
+	struct amdgpu_device *adev = psp->adev;
+
+	if (psp_v11_0_support_vmr_ring(psp))
+		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+	else
+		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+
+	return data;
+}
+
+static void psp_v11_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+	struct amdgpu_device *adev = psp->adev;
+
+	if (psp_v11_0_support_vmr_ring(psp)) {
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
+	} else
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
 static const struct psp_funcs psp_v11_0_funcs = {
 	.init_microcode = psp_v11_0_init_microcode,
 	.bootloader_load_kdb = psp_v11_0_bootloader_load_kdb,
@@ -898,7 +1086,6 @@ static const struct psp_funcs psp_v11_0_funcs = {
 	.ring_create = psp_v11_0_ring_create,
 	.ring_stop = psp_v11_0_ring_stop,
 	.ring_destroy = psp_v11_0_ring_destroy,
-	.cmd_submit = psp_v11_0_cmd_submit,
 	.compare_sram_data = psp_v11_0_compare_sram_data,
 	.mode1_reset = psp_v11_0_mode1_reset,
 	.xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info,
@@ -909,6 +1096,11 @@ static const struct psp_funcs psp_v11_0_funcs = {
 	.ras_trigger_error = psp_v11_0_ras_trigger_error,
 	.ras_cure_posion = psp_v11_0_ras_cure_posion,
 	.rlc_autoload_start = psp_v11_0_rlc_autoload_start,
+	.mem_training_init = psp_v11_0_memory_training_init,
+	.mem_training_fini = psp_v11_0_memory_training_fini,
+	.mem_training = psp_v11_0_memory_training,
+	.ring_get_wptr = psp_v11_0_ring_get_wptr,
+	.ring_set_wptr = psp_v11_0_ring_set_wptr,
 };
 
 void psp_v11_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index c72e43f8e0be..58d8b6d732e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -334,62 +334,6 @@ static int psp_v12_0_ring_destroy(struct psp_context *psp,
 	return ret;
 }
 
-static int psp_v12_0_cmd_submit(struct psp_context *psp,
-			       uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
-			       int index)
-{
-	unsigned int psp_write_ptr_reg = 0;
-	struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem;
-	struct psp_ring *ring = &psp->km_ring;
-	struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
-	struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
-		ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
-	struct amdgpu_device *adev = psp->adev;
-	uint32_t ring_size_dw = ring->ring_size / 4;
-	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
-
-	/* KM (GPCOM) prepare write pointer */
-	if (psp_v12_0_support_vmr_ring(psp))
-		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
-	else
-		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
-
-	/* Update KM RB frame pointer to new frame */
-	/* write_frame ptr increments by size of rb_frame in bytes */
-	/* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
-	if ((psp_write_ptr_reg % ring_size_dw) == 0)
-		write_frame = ring_buffer_start;
-	else
-		write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
-	/* Check invalid write_frame ptr address */
-	if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
-		DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
-			  ring_buffer_start, ring_buffer_end, write_frame);
-		DRM_ERROR("write_frame is pointing to address out of bounds\n");
-		return -EINVAL;
-	}
-
-	/* Initialize KM RB frame */
-	memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-
-	/* Update KM RB frame */
-	write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
-	write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
-	write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
-	write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
-	write_frame->fence_value = index;
-
-	/* Update the write Pointer in DWORDs */
-	psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
-	if (psp_v12_0_support_vmr_ring(psp)) {
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
-	} else
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
-
-	return 0;
-}
-
 static int
 psp_v12_0_sram_map(struct amdgpu_device *adev,
 		  unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
@@ -546,6 +490,30 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp)
 	return 0;
 }
 
+static uint32_t psp_v12_0_ring_get_wptr(struct psp_context *psp)
+{
+	uint32_t data;
+	struct amdgpu_device *adev = psp->adev;
+
+	if (psp_v12_0_support_vmr_ring(psp))
+		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+	else
+		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+
+	return data;
+}
+
+static void psp_v12_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+	struct amdgpu_device *adev = psp->adev;
+
+	if (psp_v12_0_support_vmr_ring(psp)) {
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
+	} else
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
 static const struct psp_funcs psp_v12_0_funcs = {
 	.init_microcode = psp_v12_0_init_microcode,
 	.bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv,
@@ -554,9 +522,10 @@ static const struct psp_funcs psp_v12_0_funcs = {
 	.ring_create = psp_v12_0_ring_create,
 	.ring_stop = psp_v12_0_ring_stop,
 	.ring_destroy = psp_v12_0_ring_destroy,
-	.cmd_submit = psp_v12_0_cmd_submit,
 	.compare_sram_data = psp_v12_0_compare_sram_data,
 	.mode1_reset = psp_v12_0_mode1_reset,
+	.ring_get_wptr = psp_v12_0_ring_get_wptr,
+	.ring_set_wptr = psp_v12_0_ring_set_wptr,
 };
 
 void psp_v12_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index d2c727f6a8bd..735c43c7daab 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -179,7 +179,7 @@ static bool psp_v3_1_match_version(struct amdgpu_device *adev, uint32_t ver)
 	 * Double check if the latest four legacy versions.
 	 * If yes, it is still the right version.
 	 */
-	for (i = 0; i < sizeof(sos_old_versions) / sizeof(uint32_t); i++) {
+	for (i = 0; i < ARRAY_SIZE(sos_old_versions); i++) {
 		if (sos_old_versions[i] == adev->psp.sos_fw_version)
 			return true;
 	}
@@ -410,64 +410,6 @@ static int psp_v3_1_ring_destroy(struct psp_context *psp,
 	return ret;
 }
 
-static int psp_v3_1_cmd_submit(struct psp_context *psp,
-			       uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
-			       int index)
-{
-	unsigned int psp_write_ptr_reg = 0;
-	struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
-	struct psp_ring *ring = &psp->km_ring;
-	struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
-	struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
-		ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
-	struct amdgpu_device *adev = psp->adev;
-	uint32_t ring_size_dw = ring->ring_size / 4;
-	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
-
-	/* KM (GPCOM) prepare write pointer */
-	if (psp_v3_1_support_vmr_ring(psp))
-		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
-	else
-		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
-
-	/* Update KM RB frame pointer to new frame */
-	/* write_frame ptr increments by size of rb_frame in bytes */
-	/* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
-	if ((psp_write_ptr_reg % ring_size_dw) == 0)
-		write_frame = ring_buffer_start;
-	else
-		write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
-	/* Check invalid write_frame ptr address */
-	if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
-		DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
-			  ring_buffer_start, ring_buffer_end, write_frame);
-		DRM_ERROR("write_frame is pointing to address out of bounds\n");
-		return -EINVAL;
-	}
-
-	/* Initialize KM RB frame */
-	memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-
-	/* Update KM RB frame */
-	write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
-	write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
-	write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
-	write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
-	write_frame->fence_value = index;
-
-	/* Update the write Pointer in DWORDs */
-	psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
-	if (psp_v3_1_support_vmr_ring(psp)) {
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
-		/* send interrupt to PSP for SRIOV ring write pointer update */
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
-					GFX_CTRL_CMD_ID_CONSUME_CMD);
-	} else
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
-
-	return 0;
-}
-
 static int
 psp_v3_1_sram_map(struct amdgpu_device *adev,
 		  unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
@@ -641,6 +583,31 @@ static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
 	return false;
 }
 
+static uint32_t psp_v3_1_ring_get_wptr(struct psp_context *psp)
+{
+	uint32_t data;
+	struct amdgpu_device *adev = psp->adev;
+
+	if (psp_v3_1_support_vmr_ring(psp))
+		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+	else
+		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+	return data;
+}
+
+static void psp_v3_1_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+	struct amdgpu_device *adev = psp->adev;
+
+	if (psp_v3_1_support_vmr_ring(psp)) {
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+		/* send interrupt to PSP for SRIOV ring write pointer update */
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+			GFX_CTRL_CMD_ID_CONSUME_CMD);
+	} else
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
 static const struct psp_funcs psp_v3_1_funcs = {
 	.init_microcode = psp_v3_1_init_microcode,
 	.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
@@ -649,11 +616,12 @@ static const struct psp_funcs psp_v3_1_funcs = {
 	.ring_create = psp_v3_1_ring_create,
 	.ring_stop = psp_v3_1_ring_stop,
 	.ring_destroy = psp_v3_1_ring_destroy,
-	.cmd_submit = psp_v3_1_cmd_submit,
 	.compare_sram_data = psp_v3_1_compare_sram_data,
 	.smu_reload_quirk = psp_v3_1_smu_reload_quirk,
 	.mode1_reset = psp_v3_1_mode1_reset,
 	.support_vmr_ring = psp_v3_1_support_vmr_ring,
+	.ring_get_wptr = psp_v3_1_ring_get_wptr,
+	.ring_set_wptr = psp_v3_1_ring_set_wptr,
 };
 
 void psp_v3_1_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index a10175838013..7d509a40076f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
 	/* IB packet must end on a 8 DW boundary */
-	sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+	sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
 			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
 	u32 pad_count;
 	int i;
 
-	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+	pad_count = (-ib->length_dw) & 7;
 	for (i = 0; i < pad_count; i++)
 		if (sdma && sdma->burst_nop && (i == 0))
 			ib->ptr[ib->length_dw++] =
@@ -1260,16 +1260,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
 
 static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
-	struct drm_gpu_scheduler *sched;
 	unsigned i;
 
 	adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		sched = &adev->sdma.instance[i].ring.sched;
-		adev->vm_manager.vm_pte_rqs[i] =
-			&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+		adev->vm_manager.vm_pte_scheds[i] =
+			&adev->sdma.instance[i].ring.sched;
 	}
-	adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
 }
 
 const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 5f4e2c616241..b6109a99fc43 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
 	/* IB packet must end on a 8 DW boundary */
-	sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+	sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
 			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
 	u32 pad_count;
 	int i;
 
-	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+	pad_count = (-ib->length_dw) & 7;
 	for (i = 0; i < pad_count; i++)
 		if (sdma && sdma->burst_nop && (i == 0))
 			ib->ptr[ib->length_dw++] =
@@ -1698,16 +1698,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
 
 static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
-	struct drm_gpu_scheduler *sched;
 	unsigned i;
 
 	adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		sched = &adev->sdma.instance[i].ring.sched;
-		adev->vm_manager.vm_pte_rqs[i] =
-			&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+		adev->vm_manager.vm_pte_scheds[i] =
+			 &adev->sdma.instance[i].ring.sched;
 	}
-	adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
 }
 
 const struct amdgpu_ip_block_version sdma_v3_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 4554e72c8378..27c7001be1ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -82,6 +82,7 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev);
 
 static const struct soc15_reg_golden golden_settings_sdma_4[] = {
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
@@ -254,7 +255,106 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
-	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
+};
+
+static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = {
+	{ "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED),
+	0, 0,
+	},
+	{ "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED),
+	0, 0,
+	},
+	{ "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED),
+	0, 0,
+	},
+	{ "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED),
+	0, 0,
+	},
+	{ "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED),
+	0, 0,
+	},
+	{ "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
+	0, 0,
+	},
+	{ "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED),
+	0, 0,
+	},
+	{ "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED),
+	0, 0,
+	},
 };
 
 static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
@@ -698,7 +798,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
 	/* IB packet must end on a 8 DW boundary */
-	sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+	sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
 			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -747,13 +847,13 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	u32 ref_and_mask = 0;
-	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
 
 	ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
 
 	sdma_v4_0_wait_reg_mem(ring, 0, 1,
-			       adev->nbio_funcs->get_hdp_flush_done_offset(adev),
-			       adev->nbio_funcs->get_hdp_flush_req_offset(adev),
+			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
 			       ref_and_mask, ref_and_mask, 10);
 }
 
@@ -1579,7 +1679,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
 	u32 pad_count;
 	int i;
 
-	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+	pad_count = (-ib->length_dw) & 7;
 	for (i = 0; i < pad_count; i++)
 		if (sdma && sdma->burst_nop && (i == 0))
 			ib->ptr[ib->length_dw++] =
@@ -1686,107 +1786,33 @@ static int sdma_v4_0_early_init(void *handle)
 	sdma_v4_0_set_buffer_funcs(adev);
 	sdma_v4_0_set_vm_pte_funcs(adev);
 	sdma_v4_0_set_irq_funcs(adev);
+	sdma_v4_0_set_ras_funcs(adev);
 
 	return 0;
 }
 
 static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
-		struct ras_err_data *err_data,
+		void *err_data,
 		struct amdgpu_iv_entry *entry);
 
 static int sdma_v4_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct ras_common_if **ras_if = &adev->sdma.ras_if;
 	struct ras_ih_if ih_info = {
 		.cb = sdma_v4_0_process_ras_data_cb,
 	};
-	struct ras_fs_if fs_info = {
-		.sysfs_name = "sdma_err_count",
-		.debugfs_name = "sdma_err_inject",
-	};
-	struct ras_common_if ras_block = {
-		.block = AMDGPU_RAS_BLOCK__SDMA,
-		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
-		.sub_block_index = 0,
-		.name = "sdma",
-	};
-	int r, i;
-
-	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
-		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
-		return 0;
-	}
-
-	/* handle resume path. */
-	if (*ras_if) {
-		/* resend ras TA enable cmd during resume.
-		 * prepare to handle failure.
-		 */
-		ih_info.head = **ras_if;
-		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
-		if (r) {
-			if (r == -EAGAIN) {
-				/* request a gpu reset. will run again. */
-				amdgpu_ras_request_reset_on_boot(adev,
-						AMDGPU_RAS_BLOCK__SDMA);
-				return 0;
-			}
-			/* fail to enable ras, cleanup all. */
-			goto irq;
-		}
-		/* enable successfully. continue. */
-		goto resume;
-	}
-
-	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
-	if (!*ras_if)
-		return -ENOMEM;
-
-	**ras_if = ras_block;
-
-	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
-	if (r) {
-		if (r == -EAGAIN) {
-			amdgpu_ras_request_reset_on_boot(adev,
-					AMDGPU_RAS_BLOCK__SDMA);
-			r = 0;
-		}
-		goto feature;
-	}
-
-	ih_info.head = **ras_if;
-	fs_info.head = **ras_if;
-
-	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
-	if (r)
-		goto interrupt;
-
-	amdgpu_ras_debugfs_create(adev, &fs_info);
+	int i;
 
-	r = amdgpu_ras_sysfs_create(adev, &fs_info);
-	if (r)
-		goto sysfs;
-resume:
-	for (i = 0; i < adev->sdma.num_instances; i++) {
-		r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq,
-				   AMDGPU_SDMA_IRQ_INSTANCE0 + i);
-		if (r)
-			goto irq;
+	/* read back edc counter registers to clear the counters */
+	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+		for (i = 0; i < adev->sdma.num_instances; i++)
+			RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
 	}
 
-	return 0;
-irq:
-	amdgpu_ras_sysfs_remove(adev, *ras_if);
-sysfs:
-	amdgpu_ras_debugfs_remove(adev, *ras_if);
-	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
-interrupt:
-	amdgpu_ras_feature_enable(adev, *ras_if, 0);
-feature:
-	kfree(*ras_if);
-	*ras_if = NULL;
-	return r;
+	if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
+		return adev->sdma.funcs->ras_late_init(adev, &ih_info);
+	else
+		return 0;
 }
 
 static int sdma_v4_0_sw_init(void *handle)
@@ -1858,21 +1884,8 @@ static int sdma_v4_0_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int i;
 
-	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
-			adev->sdma.ras_if) {
-		struct ras_common_if *ras_if = adev->sdma.ras_if;
-		struct ras_ih_if ih_info = {
-			.head = *ras_if,
-		};
-
-		/*remove fs first*/
-		amdgpu_ras_debugfs_remove(adev, ras_if);
-		amdgpu_ras_sysfs_remove(adev, ras_if);
-		/*remove the IH*/
-		amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
-		amdgpu_ras_feature_enable(adev, ras_if, 0);
-		kfree(ras_if);
-	}
+	if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
+		adev->sdma.funcs->ras_fini(adev);
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
@@ -1892,7 +1905,7 @@ static int sdma_v4_0_hw_init(void *handle)
 
 	if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
 			adev->powerplay.pp_funcs->set_powergating_by_smu) ||
-			adev->asic_type == CHIP_RENOIR)
+			(adev->asic_type == CHIP_RENOIR && !adev->in_gpu_reset))
 		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
 
 	if (!amdgpu_sriov_vf(adev))
@@ -2025,52 +2038,28 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
 }
 
 static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
-		struct ras_err_data *err_data,
+		void *err_data,
 		struct amdgpu_iv_entry *entry)
 {
-	uint32_t err_source;
 	int instance;
 
+	/* When “Full RAS” is enabled, the per-IP interrupt sources should
+	 * be disabled and the driver should only look for the aggregated
+	 * interrupt via sync flood
+	 */
+	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+		goto out;
+
 	instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
 	if (instance < 0)
-		return 0;
-
-	switch (entry->src_id) {
-	case SDMA0_4_0__SRCID__SDMA_SRAM_ECC:
-		err_source = 0;
-		break;
-	case SDMA0_4_0__SRCID__SDMA_ECC:
-		err_source = 1;
-		break;
-	default:
-		return 0;
-	}
-
-	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+		goto out;
 
-	amdgpu_ras_reset_gpu(adev, 0);
+	amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
 
+out:
 	return AMDGPU_RAS_SUCCESS;
 }
 
-static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev,
-				      struct amdgpu_irq_src *source,
-				      struct amdgpu_iv_entry *entry)
-{
-	struct ras_common_if *ras_if = adev->sdma.ras_if;
-	struct ras_dispatch_if ih_data = {
-		.entry = entry,
-	};
-
-	if (!ras_if)
-		return 0;
-
-	ih_data.head = *ras_if;
-
-	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
-	return 0;
-}
-
 static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
 					      struct amdgpu_irq_src *source,
 					      struct amdgpu_iv_entry *entry)
@@ -2418,7 +2407,7 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = {
 
 static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
 	.set = sdma_v4_0_set_ecc_irq_state,
-	.process = sdma_v4_0_process_ecc_irq,
+	.process = amdgpu_sdma_process_ecc_irq,
 };
 
 
@@ -2532,10 +2521,73 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
 			sched = &adev->sdma.instance[i].page.sched;
 		else
 			sched = &adev->sdma.instance[i].ring.sched;
-		adev->vm_manager.vm_pte_rqs[i] =
-			&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+		adev->vm_manager.vm_pte_scheds[i] = sched;
+	}
+	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+static void sdma_v4_0_get_ras_error_count(uint32_t value,
+					uint32_t instance,
+					uint32_t *sec_count)
+{
+	uint32_t i;
+	uint32_t sec_cnt;
+
+	/* double bits error (multiple bits) error detection is not supported */
+	for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) {
+		/* the SDMA_EDC_COUNTER register in each sdma instance
+		 * shares the same sed shift_mask
+		 * */
+		sec_cnt = (value &
+			sdma_v4_0_ras_fields[i].sec_count_mask) >>
+			sdma_v4_0_ras_fields[i].sec_count_shift;
+		if (sec_cnt) {
+			DRM_INFO("Detected %s in SDMA%d, SED %d\n",
+				sdma_v4_0_ras_fields[i].name,
+				instance, sec_cnt);
+			*sec_count += sec_cnt;
+		}
+	}
+}
+
+static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+			uint32_t instance, void *ras_error_status)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+	uint32_t sec_count = 0;
+	uint32_t reg_value = 0;
+
+	reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER);
+	/* double bit error is not supported */
+	if (reg_value)
+		sdma_v4_0_get_ras_error_count(reg_value,
+				instance, &sec_count);
+	/* err_data->ce_count should be initialized to 0
+	 * before calling into this function */
+	err_data->ce_count += sec_count;
+	/* double bit error is not supported
+	 * set ue count to 0 */
+	err_data->ue_count = 0;
+
+	return 0;
+};
+
+static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
+	.ras_late_init = amdgpu_sdma_ras_late_init,
+	.ras_fini = amdgpu_sdma_ras_fini,
+	.query_ras_error_count = sdma_v4_0_query_ras_error_count,
+};
+
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_VEGA20:
+	case CHIP_ARCTURUS:
+		adev->sdma.funcs = &sdma_v4_0_ras_funcs;
+		break;
+	default:
+		break;
 	}
-	adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
 }
 
 const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 8493bfbbc148..4c6bf1f8a528 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -382,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
 
-	/* IB packet must end on a 8 DW boundary */
-	sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+	/* An IB packet must end on a 8 DW boundary--the next dword
+	 * must be on a 8-dword boundary. Our IB packet below is 6
+	 * dwords long, thus add x number of NOPs, such that, in
+	 * modular arithmetic,
+	 * wptr + 6 + x = 8k, k >= 0, which in C is,
+	 * (wptr + 6 + x) % 8 = 0.
+	 * The expression below, is a solution of x.
+	 */
+	sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
 			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -406,7 +413,7 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	u32 ref_and_mask = 0;
-	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
 
 	if (ring->me == 0)
 		ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
@@ -416,8 +423,8 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
-	amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2);
-	amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
+	amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+	amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
 	amdgpu_ring_write(ring, ref_and_mask); /* reference */
 	amdgpu_ring_write(ring, ref_and_mask); /* mask */
 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
@@ -683,7 +690,7 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
 		WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
 		WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
 
-		adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+		adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
 						      ring->doorbell_index, 20);
 
 		if (amdgpu_sriov_vf(adev))
@@ -907,16 +914,9 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
 			udelay(1);
 	}
 
-	if (i < adev->usec_timeout) {
-		if (amdgpu_emu_mode == 1)
-			DRM_INFO("ring test on %d succeeded in %d msecs\n", ring->idx, i);
-		else
-			DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
-	} else {
-		DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
-			  ring->idx, tmp);
-		r = -EINVAL;
-	}
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+
 	amdgpu_device_wb_free(adev, index);
 
 	return r;
@@ -981,13 +981,10 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		goto err1;
 	}
 	tmp = le32_to_cpu(adev->wb.wb[index]);
-	if (tmp == 0xDEADBEEF) {
-		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+	if (tmp == 0xDEADBEEF)
 		r = 0;
-	} else {
-		DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
+	else
 		r = -EINVAL;
-	}
 
 err1:
 	amdgpu_ib_free(adev, &ib, NULL);
@@ -1086,10 +1083,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
 }
 
 /**
- * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
- *
+ * sdma_v5_0_ring_pad_ib - pad the IB
  * @ib: indirect buffer to fill with padding
  *
+ * Pad the IB with NOPs to a boundary multiple of 8.
  */
 static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
 {
@@ -1097,7 +1094,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
 	u32 pad_count;
 	int i;
 
-	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+	pad_count = (-ib->length_dw) & 0x7;
 	for (i = 0; i < pad_count; i++)
 		if (sdma && sdma->burst_nop && (i == 0))
 			ib->ptr[ib->length_dw++] =
@@ -1721,17 +1718,15 @@ static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = {
 
 static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
-	struct drm_gpu_scheduler *sched;
 	unsigned i;
 
 	if (adev->vm_manager.vm_pte_funcs == NULL) {
 		adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs;
 		for (i = 0; i < adev->sdma.num_instances; i++) {
-			sched = &adev->sdma.instance[i].ring.sched;
-			adev->vm_manager.vm_pte_rqs[i] =
-				&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+			adev->vm_manager.vm_pte_scheds[i] =
+				&adev->sdma.instance[i].ring.sched;
 		}
-		adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+		adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 493af42152f2..4d415bfdb42f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -975,6 +975,17 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 
 static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
 	{GRBM_STATUS},
+	{mmGRBM_STATUS2},
+	{mmGRBM_STATUS_SE0},
+	{mmGRBM_STATUS_SE1},
+	{mmSRBM_STATUS},
+	{mmSRBM_STATUS2},
+	{DMA_STATUS_REG + DMA0_REGISTER_OFFSET},
+	{DMA_STATUS_REG + DMA1_REGISTER_OFFSET},
+	{mmCP_STAT},
+	{mmCP_STALLED_STAT1},
+	{mmCP_STALLED_STAT2},
+	{mmCP_STALLED_STAT3},
 	{GB_ADDR_CONFIG},
 	{MC_ARB_RAMCFG},
 	{GB_TILE_MODE0},
@@ -1186,6 +1197,11 @@ static int si_asic_reset(struct amdgpu_device *adev)
 	return 0;
 }
 
+static bool si_asic_supports_baco(struct amdgpu_device *adev)
+{
+	return false;
+}
+
 static enum amd_reset_method
 si_asic_reset_method(struct amdgpu_device *adev)
 {
@@ -1414,6 +1430,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
 	.get_pcie_usage = &si_get_pcie_usage,
 	.need_reset_on_init = &si_need_reset_on_init,
 	.get_pcie_replay_count = &si_get_pcie_replay_count,
+	.supports_baco = &si_asic_supports_baco,
 };
 
 static uint32_t si_get_rev_id(struct amdgpu_device *adev)
@@ -1633,7 +1650,6 @@ static void si_init_golden_registers(struct amdgpu_device *adev)
 static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 {
 	struct pci_dev *root = adev->pdev->bus->self;
-	int bridge_pos, gpu_pos;
 	u32 speed_cntl, current_data_rate;
 	int i;
 	u16 tmp16;
@@ -1668,12 +1684,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
 	}
 
-	bridge_pos = pci_pcie_cap(root);
-	if (!bridge_pos)
-		return;
-
-	gpu_pos = pci_pcie_cap(adev->pdev);
-	if (!gpu_pos)
+	if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
 		return;
 
 	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1682,14 +1693,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 			u16 bridge_cfg2, gpu_cfg2;
 			u32 max_lw, current_lw, tmp;
 
-			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
-			pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+			pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+						  &bridge_cfg);
+			pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+						  &gpu_cfg);
 
 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
-			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+			pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
 
 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
-			pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+			pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+						   tmp16);
 
 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
@@ -1706,15 +1720,23 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 			}
 
 			for (i = 0; i < 10; i++) {
-				pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_DEVSTA,
+							  &tmp16);
 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
 					break;
 
-				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
-				pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+							  &bridge_cfg);
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_LNKCTL,
+							  &gpu_cfg);
 
-				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
-				pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+							  &bridge_cfg2);
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_LNKCTL2,
+							  &gpu_cfg2);
 
 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
 				tmp |= LC_SET_QUIESCE;
@@ -1726,25 +1748,44 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 
 				mdelay(100);
 
-				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+							  &tmp16);
 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
-				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+				pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+							   tmp16);
 
-				pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_LNKCTL,
+							  &tmp16);
 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
-				pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
-
-				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
-				tmp16 &= ~((1 << 4) | (7 << 9));
-				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
-				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
-				pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
-				tmp16 &= ~((1 << 4) | (7 << 9));
-				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
-				pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+				pcie_capability_write_word(adev->pdev,
+							   PCI_EXP_LNKCTL,
+							   tmp16);
+
+				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+							  &tmp16);
+				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+					   PCI_EXP_LNKCTL2_TX_MARGIN);
+				tmp16 |= (bridge_cfg2 &
+					  (PCI_EXP_LNKCTL2_ENTER_COMP |
+					   PCI_EXP_LNKCTL2_TX_MARGIN));
+				pcie_capability_write_word(root,
+							   PCI_EXP_LNKCTL2,
+							   tmp16);
+
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_LNKCTL2,
+							  &tmp16);
+				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+					   PCI_EXP_LNKCTL2_TX_MARGIN);
+				tmp16 |= (gpu_cfg2 &
+					  (PCI_EXP_LNKCTL2_ENTER_COMP |
+					   PCI_EXP_LNKCTL2_TX_MARGIN));
+				pcie_capability_write_word(adev->pdev,
+							   PCI_EXP_LNKCTL2,
+							   tmp16);
 
 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
 				tmp &= ~LC_SET_QUIESCE;
@@ -1757,15 +1798,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 
-	pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
-	tmp16 &= ~0xf;
+	pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
 	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
-		tmp16 |= 3;
+		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
 	else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
-		tmp16 |= 2;
+		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
 	else
-		tmp16 |= 1;
-	pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+	pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
 
 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index bdda8b4e03f0..9aac9f9c50bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -834,16 +834,14 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
 
 static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
-	struct drm_gpu_scheduler *sched;
 	unsigned i;
 
 	adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs;
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		sched = &adev->sdma.instance[i].ring.sched;
-		adev->vm_manager.vm_pte_rqs[i] =
-			&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+		adev->vm_manager.vm_pte_scheds[i] =
+			&adev->sdma.instance[i].ring.sched;
 	}
-	adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
 }
 
 const struct amdgpu_ip_block_version si_dma_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 57bb5f9e08b2..88ae27a5a03d 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -64,7 +64,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev)
 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
 
 	si_ih_disable_interrupts(adev);
-	WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8);
+	/* set dummy read address to dummy page address */
+	WREG32(INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
index c44723c267c9..c902f26cf50d 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
@@ -234,7 +234,7 @@ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control,
 	DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ",
 		 (uint16_t)address, numbytes);
 
-	if (drm_debug & DRM_UT_DRIVER) {
+	if (drm_debug_enabled(DRM_UT_DRIVER)) {
 		print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
 			       16, 1, data, numbytes, false);
 	}
@@ -388,7 +388,7 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control,
 	DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :",
 		  (uint16_t)address, bytes_received);
 
-	if (drm_debug & DRM_UT_DRIVER) {
+	if (drm_debug_enabled(DRM_UT_DRIVER)) {
 		print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
 			       16, 1, data, bytes_received, false);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 4ccfcdf8f16a..317803f6a561 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -58,13 +58,18 @@
 #include "mmhub_v1_0.h"
 #include "df_v1_7.h"
 #include "df_v3_6.h"
+#include "nbio_v6_1.h"
+#include "nbio_v7_0.h"
+#include "nbio_v7_4.h"
 #include "vega10_ih.h"
 #include "sdma_v4_0.h"
 #include "uvd_v7_0.h"
 #include "vce_v4_0.h"
 #include "vcn_v1_0.h"
 #include "vcn_v2_0.h"
+#include "jpeg_v2_0.h"
 #include "vcn_v2_5.h"
+#include "jpeg_v2_5.h"
 #include "dce_virtual.h"
 #include "mxgpu_ai.h"
 #include "amdgpu_smu.h"
@@ -91,8 +96,8 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg)
 {
 	unsigned long flags, address, data;
 	u32 r;
-	address = adev->nbio_funcs->get_pcie_index_offset(adev);
-	data = adev->nbio_funcs->get_pcie_data_offset(adev);
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, reg);
@@ -106,8 +111,8 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 {
 	unsigned long flags, address, data;
 
-	address = adev->nbio_funcs->get_pcie_index_offset(adev);
-	data = adev->nbio_funcs->get_pcie_data_offset(adev);
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, reg);
@@ -121,8 +126,8 @@ static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg)
 {
 	unsigned long flags, address, data;
 	u64 r;
-	address = adev->nbio_funcs->get_pcie_index_offset(adev);
-	data = adev->nbio_funcs->get_pcie_data_offset(adev);
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	/* read low 32 bit */
@@ -142,8 +147,8 @@ static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)
 {
 	unsigned long flags, address, data;
 
-	address = adev->nbio_funcs->get_pcie_index_offset(adev);
-	data = adev->nbio_funcs->get_pcie_data_offset(adev);
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	/* write low 32 bit */
@@ -262,7 +267,7 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 
 static u32 soc15_get_config_memsize(struct amdgpu_device *adev)
 {
-	return adev->nbio_funcs->get_memsize(adev);
+	return adev->nbio.funcs->get_memsize(adev);
 }
 
 static u32 soc15_get_xclk(struct amdgpu_device *adev)
@@ -336,6 +341,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
+	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)},
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
 	{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
@@ -461,7 +467,7 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev)
 
 	/* wait for asic to come out of reset */
 	for (i = 0; i < adev->usec_timeout; i++) {
-		u32 memsize = adev->nbio_funcs->get_memsize(adev);
+		u32 memsize = adev->nbio.funcs->get_memsize(adev);
 
 		if (memsize != 0xffffffff)
 			break;
@@ -473,78 +479,53 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev)
 	return ret;
 }
 
-static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap)
-{
-	void *pp_handle = adev->powerplay.pp_handle;
-	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
-	if (!pp_funcs || !pp_funcs->get_asic_baco_capability) {
-		*cap = false;
-		return -ENOENT;
-	}
-
-	return pp_funcs->get_asic_baco_capability(pp_handle, cap);
-}
-
 static int soc15_asic_baco_reset(struct amdgpu_device *adev)
 {
-	void *pp_handle = adev->powerplay.pp_handle;
-	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
-	if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
-		return -ENOENT;
-
-	/* enter BACO state */
-	if (pp_funcs->set_asic_baco_state(pp_handle, 1))
-		return -EIO;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	int ret = 0;
 
-	/* exit BACO state */
-	if (pp_funcs->set_asic_baco_state(pp_handle, 0))
-		return -EIO;
+	/* avoid NBIF got stuck when do RAS recovery in BACO reset */
+	if (ras && ras->supported)
+		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
 
-	dev_info(adev->dev, "GPU BACO reset\n");
+	ret = amdgpu_dpm_baco_reset(adev);
+	if (ret)
+		return ret;
 
-	adev->in_baco_reset = 1;
+	/* re-enable doorbell interrupt after BACO exit */
+	if (ras && ras->supported)
+		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
 
 	return 0;
 }
 
-static int soc15_mode2_reset(struct amdgpu_device *adev)
-{
-	if (!adev->powerplay.pp_funcs ||
-	    !adev->powerplay.pp_funcs->asic_reset_mode_2)
-		return -ENOENT;
-
-	return adev->powerplay.pp_funcs->asic_reset_mode_2(adev->powerplay.pp_handle);
-}
-
 static enum amd_reset_method
 soc15_asic_reset_method(struct amdgpu_device *adev)
 {
-	bool baco_reset;
+	bool baco_reset = false;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
 	switch (adev->asic_type) {
 	case CHIP_RAVEN:
+	case CHIP_RENOIR:
 		return AMD_RESET_METHOD_MODE2;
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
-		soc15_asic_get_baco_capability(adev, &baco_reset);
+	case CHIP_ARCTURUS:
+		baco_reset = amdgpu_dpm_is_baco_supported(adev);
 		break;
 	case CHIP_VEGA20:
 		if (adev->psp.sos_fw_version >= 0x80067)
-			soc15_asic_get_baco_capability(adev, &baco_reset);
-		else
-			baco_reset = false;
-		if (baco_reset) {
-			struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
-			struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+			baco_reset = amdgpu_dpm_is_baco_supported(adev);
 
-			if (hive || (ras && ras->supported))
-				baco_reset = false;
-		}
+		/*
+		 * 1. PMFW version > 0x284300: all cases use baco
+		 * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
+		 */
+		if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
+			baco_reset = false;
 		break;
 	default:
-		baco_reset = false;
 		break;
 	}
 
@@ -562,7 +543,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 				amdgpu_inc_vram_lost(adev);
 			return soc15_asic_baco_reset(adev);
 		case AMD_RESET_METHOD_MODE2:
-			return soc15_mode2_reset(adev);
+			return amdgpu_dpm_mode2_reset(adev);
 		default:
 			if (!adev->in_suspend)
 				amdgpu_inc_vram_lost(adev);
@@ -570,6 +551,22 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 	}
 }
 
+static bool soc15_supports_baco(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_VEGA10:
+	case CHIP_VEGA12:
+	case CHIP_ARCTURUS:
+		return amdgpu_dpm_is_baco_supported(adev);
+	case CHIP_VEGA20:
+		if (adev->psp.sos_fw_version >= 0x80067)
+			return amdgpu_dpm_is_baco_supported(adev);
+		return false;
+	default:
+		return false;
+	}
+}
+
 /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
 			u32 cntl_reg, u32 status_reg)
 {
@@ -626,8 +623,8 @@ static void soc15_program_aspm(struct amdgpu_device *adev)
 static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev,
 					   bool enable)
 {
-	adev->nbio_funcs->enable_doorbell_aperture(adev, enable);
-	adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable);
+	adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
+	adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
 }
 
 static const struct amdgpu_ip_block_version vega10_common_ip_block =
@@ -641,7 +638,7 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block =
 
 static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
 {
-	return adev->nbio_funcs->get_rev_id(adev);
+	return adev->nbio.funcs->get_rev_id(adev);
 }
 
 int soc15_set_ip_blocks(struct amdgpu_device *adev)
@@ -667,21 +664,25 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 	if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
 		adev->gmc.xgmi.supported = true;
 
-	if (adev->flags & AMD_IS_APU)
-		adev->nbio_funcs = &nbio_v7_0_funcs;
-	else if (adev->asic_type == CHIP_VEGA20 ||
-		adev->asic_type == CHIP_ARCTURUS)
-		adev->nbio_funcs = &nbio_v7_4_funcs;
-	else
-		adev->nbio_funcs = &nbio_v6_1_funcs;
+	if (adev->flags & AMD_IS_APU) {
+		adev->nbio.funcs = &nbio_v7_0_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg;
+	} else if (adev->asic_type == CHIP_VEGA20 ||
+		   adev->asic_type == CHIP_ARCTURUS) {
+		adev->nbio.funcs = &nbio_v7_4_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
+	} else {
+		adev->nbio.funcs = &nbio_v6_1_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg;
+	}
 
 	if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
-		adev->df_funcs = &df_v3_6_funcs;
+		adev->df.funcs = &df_v3_6_funcs;
 	else
-		adev->df_funcs = &df_v1_7_funcs;
+		adev->df.funcs = &df_v1_7_funcs;
 
 	adev->rev_id = soc15_get_rev_id(adev);
-	adev->nbio_funcs->detect_hw_virt(adev);
+	adev->nbio.funcs->detect_hw_virt(adev);
 
 	if (amdgpu_sriov_vf(adev))
 		adev->virt.ops = &xgpu_ai_virt_ops;
@@ -713,11 +714,11 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 		}
 		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
-		if (!amdgpu_sriov_vf(adev)) {
-			if (is_support_sw_smu(adev))
+		if (is_support_sw_smu(adev)) {
+			if (!amdgpu_sriov_vf(adev))
 				amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-			else
-				amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+		} else {
+			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
 		}
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -750,13 +751,31 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 	case CHIP_ARCTURUS:
 		amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
-		amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+
+		if (amdgpu_sriov_vf(adev)) {
+			if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+				amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+		} else {
+			amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+			if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+				amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+		}
+
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-		amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+
+		if (amdgpu_sriov_vf(adev)) {
+			if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+				amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+		} else {
+			amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+		}
+		if (!amdgpu_sriov_vf(adev))
+			amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block);
 		break;
 	case CHIP_RENOIR:
 		amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
@@ -764,8 +783,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
 		if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
 			amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block);
-		if (is_support_sw_smu(adev))
-			amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
@@ -775,6 +793,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
                         amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #endif
 		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
 		break;
 	default:
 		return -EINVAL;
@@ -785,7 +804,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 
 static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
 {
-	adev->nbio_funcs->hdp_flush(adev, ring);
+	adev->nbio.funcs->hdp_flush(adev, ring);
 }
 
 static void soc15_invalidate_hdp(struct amdgpu_device *adev,
@@ -953,6 +972,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
 	.get_pcie_usage = &soc15_get_pcie_usage,
 	.need_reset_on_init = &soc15_need_reset_on_init,
 	.get_pcie_replay_count = &soc15_get_pcie_replay_count,
+	.supports_baco = &soc15_supports_baco,
 };
 
 static const struct amdgpu_asic_funcs vega20_asic_funcs =
@@ -961,6 +981,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
 	.read_bios_from_rom = &soc15_read_bios_from_rom,
 	.read_register = &soc15_read_register,
 	.reset = &soc15_asic_reset,
+	.reset_method = &soc15_asic_reset_method,
 	.set_vga_state = &soc15_vga_set_state,
 	.get_xclk = &soc15_get_xclk,
 	.set_uvd_clocks = &soc15_set_uvd_clocks,
@@ -973,7 +994,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
 	.get_pcie_usage = &vega20_get_pcie_usage,
 	.need_reset_on_init = &soc15_need_reset_on_init,
 	.get_pcie_replay_count = &soc15_get_pcie_replay_count,
-	.reset_method = &soc15_asic_reset_method
+	.supports_baco = &soc15_supports_baco,
 };
 
 static int soc15_common_early_init(void *handle)
@@ -1157,7 +1178,10 @@ static int soc15_common_early_init(void *handle)
 			AMD_CG_SUPPORT_SDMA_MGCG |
 			AMD_CG_SUPPORT_SDMA_LS |
 			AMD_CG_SUPPORT_MC_MGCG |
-			AMD_CG_SUPPORT_MC_LS;
+			AMD_CG_SUPPORT_MC_LS |
+			AMD_CG_SUPPORT_IH_CG |
+			AMD_CG_SUPPORT_VCN_MGCG |
+			AMD_CG_SUPPORT_JPEG_MGCG;
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x32;
 		break;
@@ -1178,12 +1202,14 @@ static int soc15_common_early_init(void *handle)
 				 AMD_CG_SUPPORT_HDP_LS |
 				 AMD_CG_SUPPORT_ROM_MGCG |
 				 AMD_CG_SUPPORT_VCN_MGCG |
+				 AMD_CG_SUPPORT_JPEG_MGCG |
 				 AMD_CG_SUPPORT_IH_CG |
 				 AMD_CG_SUPPORT_ATHUB_LS |
 				 AMD_CG_SUPPORT_ATHUB_MGCG |
 				 AMD_CG_SUPPORT_DF_MGCG;
 		adev->pg_flags = AMD_PG_SUPPORT_SDMA |
 				 AMD_PG_SUPPORT_VCN |
+				 AMD_PG_SUPPORT_JPEG |
 				 AMD_PG_SUPPORT_VCN_DPG;
 		adev->external_rev_id = adev->rev_id + 0x91;
 		break;
@@ -1203,11 +1229,15 @@ static int soc15_common_early_init(void *handle)
 static int soc15_common_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int r = 0;
 
 	if (amdgpu_sriov_vf(adev))
 		xgpu_ai_mailbox_get_irq(adev);
 
-	return 0;
+	if (adev->nbio.funcs->ras_late_init)
+		r = adev->nbio.funcs->ras_late_init(adev);
+
+	return r;
 }
 
 static int soc15_common_sw_init(void *handle)
@@ -1217,13 +1247,17 @@ static int soc15_common_sw_init(void *handle)
 	if (amdgpu_sriov_vf(adev))
 		xgpu_ai_mailbox_add_irq_id(adev);
 
-	adev->df_funcs->sw_init(adev);
+	adev->df.funcs->sw_init(adev);
 
 	return 0;
 }
 
 static int soc15_common_sw_fini(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	amdgpu_nbio_ras_fini(adev);
+	adev->df.funcs->sw_fini(adev);
 	return 0;
 }
 
@@ -1236,12 +1270,12 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
 	if (!amdgpu_sriov_vf(adev)) {
 		for (i = 0; i < adev->sdma.num_instances; i++) {
 			ring = &adev->sdma.instance[i].ring;
-			adev->nbio_funcs->sdma_doorbell_range(adev, i,
+			adev->nbio.funcs->sdma_doorbell_range(adev, i,
 				ring->use_doorbell, ring->doorbell_index,
 				adev->doorbell_index.sdma_doorbell_range);
 		}
 
-		adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+		adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
 						adev->irq.ih.doorbell_index);
 	}
 }
@@ -1255,13 +1289,13 @@ static int soc15_common_hw_init(void *handle)
 	/* enable aspm */
 	soc15_program_aspm(adev);
 	/* setup nbio registers */
-	adev->nbio_funcs->init_registers(adev);
+	adev->nbio.funcs->init_registers(adev);
 	/* remap HDP registers to a hole in mmio space,
 	 * for the purpose of expose those registers
 	 * to process space
 	 */
-	if (adev->nbio_funcs->remap_hdp_registers)
-		adev->nbio_funcs->remap_hdp_registers(adev);
+	if (adev->nbio.funcs->remap_hdp_registers)
+		adev->nbio.funcs->remap_hdp_registers(adev);
 
 	/* enable the doorbell aperture */
 	soc15_enable_doorbell_aperture(adev, true);
@@ -1284,6 +1318,14 @@ static int soc15_common_hw_fini(void *handle)
 	if (amdgpu_sriov_vf(adev))
 		xgpu_ai_mailbox_put_irq(adev);
 
+	if (adev->nbio.ras_if &&
+	    amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+		if (adev->nbio.funcs->init_ras_controller_interrupt)
+			amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0);
+		if (adev->nbio.funcs->init_ras_err_event_athub_interrupt)
+			amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+	}
+
 	return 0;
 }
 
@@ -1424,9 +1466,9 @@ static int soc15_common_set_clockgating_state(void *handle,
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
-		adev->nbio_funcs->update_medium_grain_clock_gating(adev,
+		adev->nbio.funcs->update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
-		adev->nbio_funcs->update_medium_grain_light_sleep(adev,
+		adev->nbio.funcs->update_medium_grain_light_sleep(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 		soc15_update_hdp_light_sleep(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
@@ -1436,14 +1478,14 @@ static int soc15_common_set_clockgating_state(void *handle,
 				state == AMD_CG_STATE_GATE ? true : false);
 		soc15_update_rom_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
-		adev->df_funcs->update_medium_grain_clock_gating(adev,
+		adev->df.funcs->update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 		break;
 	case CHIP_RAVEN:
 	case CHIP_RENOIR:
-		adev->nbio_funcs->update_medium_grain_clock_gating(adev,
+		adev->nbio.funcs->update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
-		adev->nbio_funcs->update_medium_grain_light_sleep(adev,
+		adev->nbio.funcs->update_medium_grain_light_sleep(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 		soc15_update_hdp_light_sleep(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
@@ -1472,7 +1514,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
 	if (amdgpu_sriov_vf(adev))
 		*flags = 0;
 
-	adev->nbio_funcs->get_clockgating_state(adev, flags);
+	adev->nbio.funcs->get_clockgating_state(adev, flags);
 
 	/* AMD_CG_SUPPORT_HDP_LS */
 	data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
@@ -1494,7 +1536,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
 	if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
 		*flags |= AMD_CG_SUPPORT_ROM_MGCG;
 
-	adev->df_funcs->get_clockgating_state(adev, flags);
+	adev->df.funcs->get_clockgating_state(adev, flags);
 }
 
 static int soc15_common_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index a3dde0c31f57..d0fb7a67c1a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -28,8 +28,8 @@
 #include "nbio_v7_0.h"
 #include "nbio_v7_4.h"
 
-#define SOC15_FLUSH_GPU_TLB_NUM_WREG		4
-#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	1
+#define SOC15_FLUSH_GPU_TLB_NUM_WREG		6
+#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	3
 
 extern const struct amd_ip_funcs soc15_common_ip_funcs;
 
@@ -60,6 +60,18 @@ struct soc15_allowed_register_entry {
 	bool grbm_indexed;
 };
 
+struct soc15_ras_field_entry {
+	const char *name;
+	uint32_t hwip;
+	uint32_t inst;
+	uint32_t seg;
+	uint32_t reg_offset;
+	uint32_t sec_count_mask;
+	uint32_t sec_count_shift;
+	uint32_t ded_count_mask;
+	uint32_t ded_count_shift;
+};
+
 #define SOC15_REG_ENTRY(ip, inst, reg)	ip##_HWIP, inst, reg##_BASE_IDX, reg
 
 #define SOC15_REG_ENTRY_OFFSET(entry)	(adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
@@ -67,6 +79,8 @@ struct soc15_allowed_register_entry {
 #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
 	{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
 
+#define SOC15_REG_FIELD(reg, field) reg##__##field##_MASK, reg##__##field##__SHIFT
+
 void soc15_grbm_select(struct amdgpu_device *adev,
 		    u32 me, u32 pipe, u32 queue, u32 vmid);
 int soc15_set_ip_blocks(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 839f186e1182..19e870c79896 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -52,6 +52,7 @@
 		uint32_t old_ = 0;	\
 		uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
 		uint32_t loop = adev->usec_timeout;		\
+		ret = 0;					\
 		while ((tmp_ & (mask)) != (expected_value)) {	\
 			if (old_ != tmp_) {			\
 				loop = adev->usec_timeout;	\
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
new file mode 100644
index 000000000000..0d6b50528d76
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v6_0.h"
+#include "amdgpu.h"
+
+static void umc_v6_0_init_registers(struct amdgpu_device *adev)
+{
+	unsigned i,j;
+
+	for (i = 0; i < 4; i++)
+		for (j = 0; j < 4; j++)
+			WREG32((i*0x100000 + 0x5010c + j*0x2000)/4, 0x1002);
+}
+
+const struct amdgpu_umc_funcs umc_v6_0_funcs = {
+	.init_registers = umc_v6_0_init_registers,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h
new file mode 100644
index 000000000000..109f1a57a46e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V6_0_H__
+#define __UMC_V6_0_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+extern const struct amdgpu_umc_funcs umc_v6_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 8502e736f721..793bf70e64b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -28,19 +28,24 @@
 #include "rsmu/rsmu_0_0_2_sh_mask.h"
 #include "umc/umc_6_1_1_offset.h"
 #include "umc/umc_6_1_1_sh_mask.h"
+#include "umc/umc_6_1_2_offset.h"
 
-#define smnMCA_UMC0_MCUMC_ADDRT0	0x50f10
+#define UMC_6_INST_DIST			0x40000
 
 /*
  * (addr / 256) * 8192, the higher 26 bits in ErrorAddr
  * is the index of 8KB block
  */
-#define ADDR_OF_8KB_BLOCK(addr)		(((addr) & ~0xffULL) << 5)
+#define ADDR_OF_8KB_BLOCK(addr)			(((addr) & ~0xffULL) << 5)
 /* channel index is the index of 256B block */
 #define ADDR_OF_256B_BLOCK(channel_index)	((channel_index) << 8)
 /* offset in 256B block */
 #define OFFSET_IN_256B_BLOCK(addr)		((addr) & 0xffULL)
 
+#define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++)
+#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
+#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
+
 const uint32_t
 	umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
 		{2, 18, 11, 27},	{4, 20, 13, 29},
@@ -49,30 +54,35 @@ const uint32_t
 		{9, 25, 0, 16},		{15, 31, 6, 22}
 };
 
-static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev,
-					   uint32_t umc_instance)
+static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev)
+{
+	WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+			RSMU_UMC_INDEX_MODE_EN, 1);
+}
+
+static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
+{
+	WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+			RSMU_UMC_INDEX_MODE_EN, 0);
+}
+
+static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev)
 {
 	uint32_t rsmu_umc_index;
 
 	rsmu_umc_index = RREG32_SOC15(RSMU, 0,
 			mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
-	rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
-			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
-			RSMU_UMC_INDEX_MODE_EN, 1);
-	rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
-			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
-			RSMU_UMC_INDEX_INSTANCE, umc_instance);
-	rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
+
+	return REG_GET_FIELD(rsmu_umc_index,
 			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
-			RSMU_UMC_INDEX_WREN, 1 << umc_instance);
-	WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
-				rsmu_umc_index);
+			RSMU_UMC_INDEX_MODE_EN);
 }
 
-static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
+static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
+					    uint32_t umc_inst,
+					    uint32_t ch_inst)
 {
-	WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
-			RSMU_UMC_INDEX_MODE_EN, 0);
+	return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst;
 }
 
 static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
@@ -84,39 +94,50 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
 	uint64_t mc_umc_status;
 	uint32_t mc_umc_status_addr;
 
-	ecc_err_cnt_sel_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
-	ecc_err_cnt_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
-	mc_umc_status_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+	if (adev->asic_type == CHIP_ARCTURUS) {
+		/* UMC 6_1_2 registers */
+		ecc_err_cnt_sel_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+		ecc_err_cnt_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+	} else {
+		/* UMC 6_1_1 registers */
+		ecc_err_cnt_sel_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+		ecc_err_cnt_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+	}
 
 	/* select the lower chip and check the error count */
-	ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
+	ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
 					EccErrCntCsSel, 0);
-	WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
-	ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
+	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+	ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
 	*error_count +=
 		(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
 		 UMC_V6_1_CE_CNT_INIT);
 	/* clear the lower chip err count */
-	WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
 
 	/* select the higher chip and check the err counter */
 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
 					EccErrCntCsSel, 1);
-	WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
-	ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
+	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+	ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
 	*error_count +=
 		(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
 		 UMC_V6_1_CE_CNT_INIT);
 	/* clear the higher chip err count */
-	WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
 
 	/* check for SRAM correctable error
 	  MCUMC_STATUS is a 64 bit register */
-	mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
+	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
@@ -130,11 +151,18 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev
 	uint64_t mc_umc_status;
 	uint32_t mc_umc_status_addr;
 
-	mc_umc_status_addr =
-                SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+	if (adev->asic_type == CHIP_ARCTURUS) {
+		/* UMC 6_1_2 registers */
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+	} else {
+		/* UMC 6_1_1 registers */
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+	}
 
 	/* check the MCUMC_STATUS */
-	mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
+	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
 	if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
 	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
@@ -144,112 +172,202 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev
 		*error_count += 1;
 }
 
-static void umc_v6_1_query_error_count(struct amdgpu_device *adev,
-					   struct ras_err_data *err_data, uint32_t umc_reg_offset,
-					   uint32_t channel_index)
-{
-	umc_v6_1_query_correctable_error_count(adev, umc_reg_offset,
-						   &(err_data->ce_count));
-	umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset,
-						  &(err_data->ue_count));
-}
-
 static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
 					   void *ras_error_status)
 {
-	amdgpu_umc_for_each_channel(umc_v6_1_query_error_count);
+	struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
+
+	uint32_t umc_inst        = 0;
+	uint32_t ch_inst         = 0;
+	uint32_t umc_reg_offset  = 0;
+
+	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_disable_umc_index_mode(adev);
+
+	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+		umc_reg_offset = get_umc_6_reg_offset(adev,
+						      umc_inst,
+						      ch_inst);
+
+		umc_v6_1_query_correctable_error_count(adev,
+						       umc_reg_offset,
+						       &(err_data->ce_count));
+		umc_v6_1_querry_uncorrectable_error_count(adev,
+							  umc_reg_offset,
+							  &(err_data->ue_count));
+	}
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_enable_umc_index_mode(adev);
 }
 
 static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
 					 struct ras_err_data *err_data,
-					 uint32_t umc_reg_offset, uint32_t channel_index)
+					 uint32_t umc_reg_offset,
+					 uint32_t ch_inst,
+					 uint32_t umc_inst)
 {
 	uint32_t lsb, mc_umc_status_addr;
-	uint64_t mc_umc_status, err_addr;
-
-	mc_umc_status_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+	uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
+	struct eeprom_table_record *err_rec;
+	uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+	if (adev->asic_type == CHIP_ARCTURUS) {
+		/* UMC 6_1_2 registers */
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+		mc_umc_addrt0 =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT);
+	} else {
+		/* UMC 6_1_1 registers */
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+		mc_umc_addrt0 =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
+	}
 
 	/* skip error address process if -ENOMEM */
 	if (!err_data->err_addr) {
 		/* clear umc status */
-		WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL);
+		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
 		return;
 	}
 
-	mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
+	err_rec = &err_data->err_addr[err_data->err_addr_cnt];
+	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
 
 	/* calculate error address if ue/ce error is detected */
 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
 	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-		err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4);
 
+		err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
 		/* the lowest lsb bits should be ignored */
 		lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
 		err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
 		err_addr &= ~((0x1ULL << lsb) - 1);
 
 		/* translate umc channel address to soc pa, 3 parts are included */
-		err_data->err_addr[err_data->err_addr_cnt] =
-						ADDR_OF_8KB_BLOCK(err_addr) |
-						ADDR_OF_256B_BLOCK(channel_index) |
-						OFFSET_IN_256B_BLOCK(err_addr);
-
-		err_data->err_addr_cnt++;
+		retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
+				ADDR_OF_256B_BLOCK(channel_index) |
+				OFFSET_IN_256B_BLOCK(err_addr);
+
+		/* we only save ue error information currently, ce is skipped */
+		if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
+				== 1) {
+			err_rec->address = err_addr;
+			/* page frame address is saved */
+			err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+			err_rec->ts = (uint64_t)ktime_get_real_seconds();
+			err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+			err_rec->cu = 0;
+			err_rec->mem_channel = channel_index;
+			err_rec->mcumc_id = umc_inst;
+
+			err_data->err_addr_cnt++;
+		}
 	}
 
 	/* clear umc status */
-	WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL);
+	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
 }
 
 static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
 					     void *ras_error_status)
 {
-	amdgpu_umc_for_each_channel(umc_v6_1_query_error_address);
+	struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
+
+	uint32_t umc_inst        = 0;
+	uint32_t ch_inst         = 0;
+	uint32_t umc_reg_offset  = 0;
+
+	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_disable_umc_index_mode(adev);
+
+	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+		umc_reg_offset = get_umc_6_reg_offset(adev,
+						      umc_inst,
+						      ch_inst);
+
+		umc_v6_1_query_error_address(adev,
+					     err_data,
+					     umc_reg_offset,
+					     ch_inst,
+					     umc_inst);
+	}
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_enable_umc_index_mode(adev);
 }
 
-static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev,
-					 struct ras_err_data *err_data,
-					 uint32_t umc_reg_offset, uint32_t channel_index)
+static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
+					      uint32_t umc_reg_offset)
 {
 	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
 	uint32_t ecc_err_cnt_addr;
 
-	ecc_err_cnt_sel_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
-	ecc_err_cnt_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+	if (adev->asic_type == CHIP_ARCTURUS) {
+		/* UMC 6_1_2 registers */
+		ecc_err_cnt_sel_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+		ecc_err_cnt_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+	} else {
+		/* UMC 6_1_1 registers */
+		ecc_err_cnt_sel_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+		ecc_err_cnt_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+	}
 
 	/* select the lower chip and check the error count */
-	ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
+	ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
 					EccErrCntCsSel, 0);
 	/* set ce error interrupt type to APIC based interrupt */
 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
 					EccErrInt, 0x1);
-	WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
+	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
 	/* set error count to initial value */
-	WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
 
 	/* select the higher chip and check the err counter */
 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
 					EccErrCntCsSel, 1);
-	WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
-	WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
 }
 
-static void umc_v6_1_ras_init(struct amdgpu_device *adev)
+static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
 {
-	void *ras_error_status = NULL;
+	uint32_t umc_inst        = 0;
+	uint32_t ch_inst         = 0;
+	uint32_t umc_reg_offset  = 0;
+
+	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_disable_umc_index_mode(adev);
+
+	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+		umc_reg_offset = get_umc_6_reg_offset(adev,
+						      umc_inst,
+						      ch_inst);
+
+		umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset);
+	}
 
-	amdgpu_umc_for_each_channel(umc_v6_1_ras_init_per_channel);
+	if (rsmu_umc_index_state)
+		umc_v6_1_enable_umc_index_mode(adev);
 }
 
 const struct amdgpu_umc_funcs umc_v6_1_funcs = {
-	.ras_init = umc_v6_1_ras_init,
+	.err_cnt_init = umc_v6_1_err_cnt_init,
+	.ras_late_init = amdgpu_umc_ras_late_init,
 	.query_ras_error_count = umc_v6_1_query_ras_error_count,
 	.query_ras_error_address = umc_v6_1_query_ras_error_address,
-	.enable_umc_index_mode = umc_v6_1_enable_umc_index_mode,
-	.disable_umc_index_mode = umc_v6_1_disable_umc_index_mode,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
index dab9cbd292c5..0ce1d323cfdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
@@ -35,7 +35,8 @@
 /* total channel instances in one umc block */
 #define UMC_V6_1_TOTAL_CHANNEL_NUM	(UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM)
 /* UMC regiser per channel offset */
-#define UMC_V6_1_PER_CHANNEL_OFFSET		0x800
+#define UMC_V6_1_PER_CHANNEL_OFFSET_VG20	0x800
+#define UMC_V6_1_PER_CHANNEL_OFFSET_ARCT	0x400
 
 /* EccErrCnt max value */
 #define UMC_V6_1_CE_CNT_MAX		0xffff
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 93b3500e522b..e654938f6cca 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -25,6 +25,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
 #include "soc15.h"
 #include "soc15d.h"
 #include "soc15_common.h"
@@ -36,21 +37,22 @@
 #include "mmhub/mmhub_9_1_sh_mask.h"
 
 #include "ivsrcid/vcn/irqsrcs_vcn_1_0.h"
+#include "jpeg_v1_0.h"
 
-#define mmUVD_RBC_XX_IB_REG_CHECK				0x05ab
-#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX	1
-#define mmUVD_REG_XX_MASK							0x05ac
-#define mmUVD_REG_XX_MASK_BASE_IDX				1
+#define mmUVD_RBC_XX_IB_REG_CHECK_1_0		0x05ab
+#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX	1
+#define mmUVD_REG_XX_MASK_1_0			0x05ac
+#define mmUVD_REG_XX_MASK_1_0_BASE_IDX		1
 
 static int vcn_v1_0_stop(struct amdgpu_device *adev);
 static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
-static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
-static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
 static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
 static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
-				struct dpg_pause_state *new_state);
+				int inst_idx, struct dpg_pause_state *new_state);
+
+static void vcn_v1_0_idle_work_handler(struct work_struct *work);
 
 /**
  * vcn_v1_0_early_init - set function pointers
@@ -68,9 +70,10 @@ static int vcn_v1_0_early_init(void *handle)
 
 	vcn_v1_0_set_dec_ring_funcs(adev);
 	vcn_v1_0_set_enc_ring_funcs(adev);
-	vcn_v1_0_set_jpeg_ring_funcs(adev);
 	vcn_v1_0_set_irq_funcs(adev);
 
+	jpeg_v1_0_early_init(handle);
+
 	return 0;
 }
 
@@ -101,15 +104,13 @@ static int vcn_v1_0_sw_init(void *handle)
 			return r;
 	}
 
-	/* VCN JPEG TRAP */
-	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.inst->irq);
-	if (r)
-		return r;
-
 	r = amdgpu_vcn_sw_init(adev);
 	if (r)
 		return r;
 
+	/* Override the work func */
+	adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler;
+
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 		const struct common_firmware_header *hdr;
 		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
@@ -149,17 +150,11 @@ static int vcn_v1_0_sw_init(void *handle)
 			return r;
 	}
 
-	ring = &adev->vcn.inst->ring_jpeg;
-	sprintf(ring->name, "vcn_jpeg");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
-	if (r)
-		return r;
-
 	adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
-	adev->vcn.internal.jpeg_pitch = adev->vcn.inst->external.jpeg_pitch =
-		SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
 
-	return 0;
+	r = jpeg_v1_0_sw_init(handle);
+
+	return r;
 }
 
 /**
@@ -178,6 +173,8 @@ static int vcn_v1_0_sw_fini(void *handle)
 	if (r)
 		return r;
 
+	jpeg_v1_0_sw_fini(handle);
+
 	r = amdgpu_vcn_sw_fini(adev);
 
 	return r;
@@ -202,13 +199,12 @@ static int vcn_v1_0_hw_init(void *handle)
 
 	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
 		ring = &adev->vcn.inst->ring_enc[i];
-		ring->sched.ready = true;
 		r = amdgpu_ring_test_helper(ring);
 		if (r)
 			goto done;
 	}
 
-	ring = &adev->vcn.inst->ring_jpeg;
+	ring = &adev->jpeg.inst->ring_dec;
 	r = amdgpu_ring_test_helper(ring);
 	if (r)
 		goto done;
@@ -839,9 +835,9 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
 
 	vcn_v1_0_mc_resume_spg_mode(adev);
 
-	WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK, 0x10);
-	WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK,
-		RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK) | 0x3);
+	WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10);
+	WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0,
+		RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0) | 0x3);
 
 	/* enable VCPU clock */
 	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK);
@@ -948,22 +944,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
 	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
 	WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
 
-	ring = &adev->vcn.inst->ring_jpeg;
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
-	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
-			UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr));
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr));
-	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0);
-	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0);
-	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
-
-	/* initialize wptr */
-	ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-
-	/* copy patch commands to the jpeg ring */
-	vcn_v1_0_jpeg_ring_set_patch_ring(ring,
-		(ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+	jpeg_v1_0_start(adev, 0);
 
 	return 0;
 }
@@ -1107,13 +1088,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
 	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
 			~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
 
-	/* initialize JPEG wptr */
-	ring = &adev->vcn.inst->ring_jpeg;
-	ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-
-	/* copy patch commands to the jpeg ring */
-	vcn_v1_0_jpeg_ring_set_patch_ring(ring,
-		(ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+	jpeg_v1_0_start(adev, 1);
 
 	return 0;
 }
@@ -1224,7 +1199,7 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
 }
 
 static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
-				struct dpg_pause_state *new_state)
+				int inst_idx, struct dpg_pause_state *new_state)
 {
 	int ret_code;
 	uint32_t reg_data = 0;
@@ -1317,7 +1292,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
 							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
 
 				/* Restore */
-				ring = &adev->vcn.inst->ring_jpeg;
+				ring = &adev->jpeg.inst->ring_dec;
 				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
 				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
 							UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
@@ -1717,389 +1692,6 @@ static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, val);
 }
 
-
-/**
- * vcn_v1_0_jpeg_ring_get_rptr - get read pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware read pointer
- */
-static uint64_t vcn_v1_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_get_wptr - get write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware write pointer
- */
-static uint64_t vcn_v1_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_set_wptr - set write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Commits the write pointer to the hardware
- */
-static void vcn_v1_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
-}
-
-/**
- * vcn_v1_0_jpeg_ring_insert_start - insert a start command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a start command to the ring.
- */
-static void vcn_v1_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x68e04);
-
-	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x80010000);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_insert_end - insert a end command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a end command to the ring.
- */
-static void vcn_v1_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x68e04);
-
-	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x00010000);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_emit_fence - emit an fence & trap command
- *
- * @ring: amdgpu_ring pointer
- * @fence: fence to emit
- *
- * Write a fence and a trap command to the ring.
- */
-static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
-				     unsigned flags)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, seq);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, seq);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, lower_32_bits(addr));
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, upper_32_bits(addr));
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x8);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
-	amdgpu_ring_write(ring, 0);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x01400200);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, seq);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, lower_32_bits(addr));
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, upper_32_bits(addr));
-
-	amdgpu_ring_write(ring,
-		PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2));
-	amdgpu_ring_write(ring, 0xffffffff);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x3fbc);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(0, 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x1);
-
-	/* emit trap */
-	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
-	amdgpu_ring_write(ring, 0);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_emit_ib - execute indirect buffer
- *
- * @ring: amdgpu_ring pointer
- * @ib: indirect buffer to execute
- *
- * Write ring commands to execute the indirect buffer.
- */
-static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
-					struct amdgpu_job *job,
-					struct amdgpu_ib *ib,
-					uint32_t flags)
-{
-	struct amdgpu_device *adev = ring->adev;
-	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, ib->length_dw);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
-
-	amdgpu_ring_write(ring,
-		PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
-	amdgpu_ring_write(ring, 0);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x01400200);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x2);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
-	amdgpu_ring_write(ring, 0x2);
-}
-
-static void vcn_v1_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring,
-					    uint32_t reg, uint32_t val,
-					    uint32_t mask)
-{
-	struct amdgpu_device *adev = ring->adev;
-	uint32_t reg_offset = (reg << 2);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x01400200);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, val);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
-	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
-		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring,
-			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
-	} else {
-		amdgpu_ring_write(ring, reg_offset);
-		amdgpu_ring_write(ring,
-			PACKETJ(0, 0, 0, PACKETJ_TYPE3));
-	}
-	amdgpu_ring_write(ring, mask);
-}
-
-static void vcn_v1_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
-		unsigned vmid, uint64_t pd_addr)
-{
-	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	uint32_t data0, data1, mask;
-
-	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
-
-	/* wait for register write */
-	data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
-	data1 = lower_32_bits(pd_addr);
-	mask = 0xffffffff;
-	vcn_v1_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask);
-}
-
-static void vcn_v1_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring,
-					uint32_t reg, uint32_t val)
-{
-	struct amdgpu_device *adev = ring->adev;
-	uint32_t reg_offset = (reg << 2);
-
-	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
-	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
-			((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring,
-			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
-	} else {
-		amdgpu_ring_write(ring, reg_offset);
-		amdgpu_ring_write(ring,
-			PACKETJ(0, 0, 0, PACKETJ_TYPE0));
-	}
-	amdgpu_ring_write(ring, val);
-}
-
-static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)
-{
-	int i;
-
-	WARN_ON(ring->wptr % 2 || count % 2);
-
-	for (i = 0; i < count / 2; i++) {
-		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
-		amdgpu_ring_write(ring, 0);
-	}
-}
-
-static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
-{
-	struct amdgpu_device *adev = ring->adev;
-	ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
-	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
-		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
-		ring->ring[(*ptr)++] = 0;
-		ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0);
-	} else {
-		ring->ring[(*ptr)++] = reg_offset;
-		ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
-	}
-	ring->ring[(*ptr)++] = val;
-}
-
-static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	uint32_t reg, reg_offset, val, mask, i;
-
-	// 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
-	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
-	reg_offset = (reg << 2);
-	val = lower_32_bits(ring->gpu_addr);
-	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
-	// 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
-	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
-	reg_offset = (reg << 2);
-	val = upper_32_bits(ring->gpu_addr);
-	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
-	// 3rd to 5th: issue MEM_READ commands
-	for (i = 0; i <= 2; i++) {
-		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
-		ring->ring[ptr++] = 0;
-	}
-
-	// 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability
-	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
-	reg_offset = (reg << 2);
-	val = 0x13;
-	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
-	// 7th: program mmUVD_JRBC_RB_REF_DATA
-	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA);
-	reg_offset = (reg << 2);
-	val = 0x1;
-	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
-	// 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
-	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
-	reg_offset = (reg << 2);
-	val = 0x1;
-	mask = 0x1;
-
-	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
-	ring->ring[ptr++] = 0x01400200;
-	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
-	ring->ring[ptr++] = val;
-	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
-	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
-		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
-		ring->ring[ptr++] = 0;
-		ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3);
-	} else {
-		ring->ring[ptr++] = reg_offset;
-		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
-	}
-	ring->ring[ptr++] = mask;
-
-	//9th to 21st: insert no-op
-	for (i = 0; i <= 12; i++) {
-		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
-		ring->ring[ptr++] = 0;
-	}
-
-	//22nd: reset mmUVD_JRBC_RB_RPTR
-	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR);
-	reg_offset = (reg << 2);
-	val = 0;
-	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
-	//23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
-	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
-	reg_offset = (reg << 2);
-	val = 0x12;
-	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-}
-
 static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *source,
 					unsigned type,
@@ -2124,9 +1716,6 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
 	case 120:
 		amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);
 		break;
-	case 126:
-		amdgpu_fence_process(&adev->vcn.inst->ring_jpeg);
-		break;
 	default:
 		DRM_ERROR("Unhandled interrupt: %d %d\n",
 			  entry->src_id, entry->src_data[0]);
@@ -2175,6 +1764,86 @@ static int vcn_v1_0_set_powergating_state(void *handle,
 	return ret;
 }
 
+static void vcn_v1_0_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, vcn.idle_work.work);
+	unsigned int fences = 0, i;
+
+	for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+		fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
+
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+		struct dpg_pause_state new_state;
+
+		if (fences)
+			new_state.fw_based = VCN_DPG_STATE__PAUSE;
+		else
+			new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+		if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+			new_state.jpeg = VCN_DPG_STATE__PAUSE;
+		else
+			new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+		adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+	}
+
+	fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec);
+	fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_dec);
+
+	if (fences == 0) {
+		amdgpu_gfx_off_ctrl(adev, true);
+		if (adev->pm.dpm_enabled)
+			amdgpu_dpm_enable_uvd(adev, false);
+		else
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+			       AMD_PG_STATE_GATE);
+	} else {
+		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+	}
+}
+
+void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+	if (set_clocks) {
+		amdgpu_gfx_off_ctrl(adev, false);
+		if (adev->pm.dpm_enabled)
+			amdgpu_dpm_enable_uvd(adev, true);
+		else
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+			       AMD_PG_STATE_UNGATE);
+	}
+
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+		struct dpg_pause_state new_state;
+		unsigned int fences = 0, i;
+
+		for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+			fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
+
+		if (fences)
+			new_state.fw_based = VCN_DPG_STATE__PAUSE;
+		else
+			new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+		if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+			new_state.jpeg = VCN_DPG_STATE__PAUSE;
+		else
+			new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+			new_state.fw_based = VCN_DPG_STATE__PAUSE;
+		else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+			new_state.jpeg = VCN_DPG_STATE__PAUSE;
+
+		adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+	}
+}
+
 static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
 	.name = "vcn_v1_0",
 	.early_init = vcn_v1_0_early_init,
@@ -2221,7 +1890,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
 	.insert_start = vcn_v1_0_dec_ring_insert_start,
 	.insert_end = vcn_v1_0_dec_ring_insert_end,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
-	.begin_use = amdgpu_vcn_ring_begin_use,
+	.begin_use = vcn_v1_0_ring_begin_use,
 	.end_use = amdgpu_vcn_ring_end_use,
 	.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
 	.emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
@@ -2253,48 +1922,13 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
 	.insert_nop = amdgpu_ring_insert_nop,
 	.insert_end = vcn_v1_0_enc_ring_insert_end,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
-	.begin_use = amdgpu_vcn_ring_begin_use,
+	.begin_use = vcn_v1_0_ring_begin_use,
 	.end_use = amdgpu_vcn_ring_end_use,
 	.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
 	.emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
 };
 
-static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
-	.type = AMDGPU_RING_TYPE_VCN_JPEG,
-	.align_mask = 0xf,
-	.nop = PACKET0(0x81ff, 0),
-	.support_64bit_ptrs = false,
-	.no_user_fence = true,
-	.vmhub = AMDGPU_MMHUB_0,
-	.extra_dw = 64,
-	.get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
-	.get_wptr = vcn_v1_0_jpeg_ring_get_wptr,
-	.set_wptr = vcn_v1_0_jpeg_ring_set_wptr,
-	.emit_frame_size =
-		6 + 6 + /* hdp invalidate / flush */
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
-		8 + /* vcn_v1_0_jpeg_ring_emit_vm_flush */
-		26 + 26 + /* vcn_v1_0_jpeg_ring_emit_fence x2 vm fence */
-		6,
-	.emit_ib_size = 22, /* vcn_v1_0_jpeg_ring_emit_ib */
-	.emit_ib = vcn_v1_0_jpeg_ring_emit_ib,
-	.emit_fence = vcn_v1_0_jpeg_ring_emit_fence,
-	.emit_vm_flush = vcn_v1_0_jpeg_ring_emit_vm_flush,
-	.test_ring = amdgpu_vcn_jpeg_ring_test_ring,
-	.test_ib = amdgpu_vcn_jpeg_ring_test_ib,
-	.insert_nop = vcn_v1_0_jpeg_ring_nop,
-	.insert_start = vcn_v1_0_jpeg_ring_insert_start,
-	.insert_end = vcn_v1_0_jpeg_ring_insert_end,
-	.pad_ib = amdgpu_ring_generic_pad_ib,
-	.begin_use = amdgpu_vcn_ring_begin_use,
-	.end_use = amdgpu_vcn_ring_end_use,
-	.emit_wreg = vcn_v1_0_jpeg_ring_emit_wreg,
-	.emit_reg_wait = vcn_v1_0_jpeg_ring_emit_reg_wait,
-	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
 static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
 {
 	adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
@@ -2311,12 +1945,6 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
 	DRM_INFO("VCN encode is enabled in VM mode\n");
 }
 
-static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)
-{
-	adev->vcn.inst->ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs;
-	DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
-}
-
 static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
 	.set = vcn_v1_0_set_interrupt_state,
 	.process = vcn_v1_0_process_interrupt,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
index 2a497a7a4840..f67d7391fc21 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
@@ -24,6 +24,8 @@
 #ifndef __VCN_V1_0_H__
 #define __VCN_V1_0_H__
 
+void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
+
 extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block;
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 36ad0c0e8efb..f4db8af6536b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -47,39 +47,13 @@
 #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET		0x5a7
 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x1e2
 
-#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 			0x1bfff
-#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET				0x4029
-#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET				0x402a
-#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET				0x402b
-#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET		0x40ea
-#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x40eb
-#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET				0x40cf
-#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET				0x40d1
-#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 		0x40e8
-#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x40e9
-#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET				0x4082
-#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET		0x40ec
-#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x40ed
-#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET			0x4085
-#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET				0x4084
-#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET				0x4089
-#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET				0x401f
-
-#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR				0x18000
-
-#define mmUVD_RBC_XX_IB_REG_CHECK 					0x026b
-#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 				1
-#define mmUVD_REG_XX_MASK 						0x026c
-#define mmUVD_REG_XX_MASK_BASE_IDX 					1
-
 static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
-static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
 static int vcn_v2_0_set_powergating_state(void *handle,
 				enum amd_powergating_state state);
 static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
-				struct dpg_pause_state *new_state);
+				int inst_idx, struct dpg_pause_state *new_state);
 
 /**
  * vcn_v2_0_early_init - set function pointers
@@ -97,7 +71,6 @@ static int vcn_v2_0_early_init(void *handle)
 
 	vcn_v2_0_set_dec_ring_funcs(adev);
 	vcn_v2_0_set_enc_ring_funcs(adev);
-	vcn_v2_0_set_jpeg_ring_funcs(adev);
 	vcn_v2_0_set_irq_funcs(adev);
 
 	return 0;
@@ -132,12 +105,6 @@ static int vcn_v2_0_sw_init(void *handle)
 			return r;
 	}
 
-	/* VCN JPEG TRAP */
-	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
-			      VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst->irq);
-	if (r)
-		return r;
-
 	r = amdgpu_vcn_sw_init(adev);
 	if (r)
 		return r;
@@ -194,19 +161,8 @@ static int vcn_v2_0_sw_init(void *handle)
 			return r;
 	}
 
-	ring = &adev->vcn.inst->ring_jpeg;
-	ring->use_doorbell = true;
-	ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
-	sprintf(ring->name, "vcn_jpeg");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
-	if (r)
-		return r;
-
 	adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
 
-	adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
-	adev->vcn.inst->external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
-
 	return 0;
 }
 
@@ -244,32 +200,18 @@ static int vcn_v2_0_hw_init(void *handle)
 	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
 	int i, r;
 
-	adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+	adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
 					     ring->doorbell_index, 0);
 
-	ring->sched.ready = true;
-	r = amdgpu_ring_test_ring(ring);
-	if (r) {
-		ring->sched.ready = false;
+	r = amdgpu_ring_test_helper(ring);
+	if (r)
 		goto done;
-	}
 
 	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
 		ring = &adev->vcn.inst->ring_enc[i];
-		ring->sched.ready = true;
-		r = amdgpu_ring_test_ring(ring);
-		if (r) {
-			ring->sched.ready = false;
+		r = amdgpu_ring_test_helper(ring);
+		if (r)
 			goto done;
-		}
-	}
-
-	ring = &adev->vcn.inst->ring_jpeg;
-	ring->sched.ready = true;
-	r = amdgpu_ring_test_ring(ring);
-	if (r) {
-		ring->sched.ready = false;
-		goto done;
 	}
 
 done:
@@ -305,9 +247,6 @@ static int vcn_v2_0_hw_fini(void *handle)
 		ring->sched.ready = false;
 	}
 
-	ring = &adev->vcn.inst->ring_jpeg;
-	ring->sched.ready = false;
-
 	return 0;
 }
 
@@ -402,7 +341,6 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
 	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
 
 	WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
-	WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
 }
 
 static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect)
@@ -413,88 +351,88 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
 	/* cache window 0: fw */
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 		if (!indirect) {
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect);
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect);
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
 		} else {
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
 		}
 		offset = 0;
 	} else {
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
 			lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 			upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
 		offset = size;
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
 			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
 	}
 
 	if (!indirect)
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
 	else
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
 
 	/* cache window 1: stack */
 	if (!indirect) {
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
 			lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
 			upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
 	} else {
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
 	}
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
 
 	/* cache window 2: context */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
 		lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
 		upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
 
 	/* non-cache window */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
 
 	/* VCN global tiling registers */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
 }
 
@@ -640,146 +578,23 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
 		 UVD_CGC_CTRL__WCB_MODE_MASK |
 		 UVD_CGC_CTRL__VCPU_MODE_MASK |
 		 UVD_CGC_CTRL__SCPU_MODE_MASK);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
 
 	/* turn off clock gating */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
 
 	/* turn on SUVD clock gating */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
 
 	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
 }
 
 /**
- * jpeg_v2_0_start - start JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * Setup and start the JPEG block
- */
-static int jpeg_v2_0_start(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *ring = &adev->vcn.inst->ring_jpeg;
-	uint32_t tmp;
-	int r = 0;
-
-	/* disable power gating */
-	tmp = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
-	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp);
-
-	SOC15_WAIT_ON_RREG(VCN, 0,
-		mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
-		UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
-
-	if (r) {
-		DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
-		return r;
-	}
-
-	/* Removing the anti hang mechanism to indicate the UVDJ tile is ON */
-	tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1;
-	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp);
-
-	/* JPEG disable CGC */
-	tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
-	tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
-	tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
-	tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
-	WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp);
-
-	tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
-	tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
-		| JPEG_CGC_GATE__JPEG2_DEC_MASK
-		| JPEG_CGC_GATE__JPEG_ENC_MASK
-		| JPEG_CGC_GATE__JMCIF_MASK
-		| JPEG_CGC_GATE__JRBBM_MASK);
-	WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp);
-
-	/* enable JMI channel */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), 0,
-		~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
-	/* enable System Interrupt for JRBC */
-	WREG32_P(SOC15_REG_OFFSET(VCN, 0, mmJPEG_SYS_INT_EN),
-		JPEG_SYS_INT_EN__DJRBC_MASK,
-		~JPEG_SYS_INT_EN__DJRBC_MASK);
-
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
-	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
-		lower_32_bits(ring->gpu_addr));
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
-		upper_32_bits(ring->gpu_addr));
-	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0);
-	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0);
-	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
-	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
-	ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-
-	return 0;
-}
-
-/**
- * jpeg_v2_0_stop - stop JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * stop the JPEG block
- */
-static int jpeg_v2_0_stop(struct amdgpu_device *adev)
-{
-	uint32_t tmp;
-	int r = 0;
-
-	/* reset JMI */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL),
-		UVD_JMI_CNTL__SOFT_RESET_MASK,
-		~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
-	/* enable JPEG CGC */
-	tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
-	tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
-	tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
-	tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
-	WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp);
-
-
-	tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
-	tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK
-		|JPEG_CGC_GATE__JPEG2_DEC_MASK
-		|JPEG_CGC_GATE__JPEG_ENC_MASK
-		|JPEG_CGC_GATE__JMCIF_MASK
-		|JPEG_CGC_GATE__JRBBM_MASK);
-	WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp);
-
-	/* enable power gating */
-	tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS));
-	tmp &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK;
-	tmp |=  0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF;
-	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp);
-
-	tmp = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
-	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp);
-
-	SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS,
-		(2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
-		UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
-
-	if (r) {
-		DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
-		return r;
-	}
-
-	return r;
-}
-
-/**
  * vcn_v2_0_enable_clock_gating - enable VCN clock gating
  *
  * @adev: amdgpu_device pointer
@@ -939,7 +754,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 	WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
 
 	if (indirect)
-		adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr;
+		adev->vcn.inst->dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst->dpg_sram_cpu_addr;
 
 	/* enable clock gating */
 	vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect);
@@ -948,11 +763,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
 	tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
 
 	/* disable master interupt */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
 
 	/* setup mmUVD_LMI_CTRL */
@@ -964,28 +779,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
 		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
 		0x00100000L);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
 
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MPC_CNTL),
 		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
 
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MPC_SET_MUXA0),
 		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
 		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
 		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
 		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
 
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MPC_SET_MUXB0),
 		((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
 		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
 		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
 		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
 
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MPC_SET_MUX),
 		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
 		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
@@ -993,29 +808,29 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 
 	vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
 
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
 
 	/* release VCPU reset to boot */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect);
 
 	/* enable LMI MC and UMC channels */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_CTRL2),
 		0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect);
 
 	/* enable master interrupt */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MASTINT_EN),
 		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
 
 	if (indirect)
-		psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr,
-				    (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr -
-					       (uintptr_t)adev->vcn.dpg_sram_cpu_addr));
+		psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr,
+				    (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr -
+					       (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr));
 
 	/* force RBC into idle state */
 	rb_bufsz = order_base_2(ring->ring_size);
@@ -1061,12 +876,8 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
 	if (adev->pm.dpm_enabled)
 		amdgpu_dpm_enable_uvd(adev, true);
 
-	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
-		r = vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
-		if (r)
-			return r;
-		goto jpeg;
-	}
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+		return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
 
 	vcn_v2_0_disable_static_power_gating(adev);
 
@@ -1218,10 +1029,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
 	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
 	WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
 
-jpeg:
-	r = jpeg_v2_0_start(adev);
-
-	return r;
+	return 0;
 }
 
 static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
@@ -1240,9 +1048,6 @@ static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
 	tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
 	SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
 
-	tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-	SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
-
 	tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
 	SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
 
@@ -1261,10 +1066,6 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
 	uint32_t tmp;
 	int r;
 
-	r = jpeg_v2_0_stop(adev);
-	if (r)
-		return r;
-
 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
 		r = vcn_v2_0_stop_dpg_mode(adev);
 		if (r)
@@ -1329,7 +1130,7 @@ power_off:
 }
 
 static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
-				struct dpg_pause_state *new_state)
+				int inst_idx, struct dpg_pause_state *new_state)
 {
 	struct amdgpu_ring *ring;
 	uint32_t reg_data = 0;
@@ -1790,272 +1591,6 @@ void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_
 	amdgpu_ring_write(ring, val);
 }
 
-/**
- * vcn_v2_0_jpeg_ring_get_rptr - get read pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware read pointer
- */
-static uint64_t vcn_v2_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_get_wptr - get write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware write pointer
- */
-static uint64_t vcn_v2_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	if (ring->use_doorbell)
-		return adev->wb.wb[ring->wptr_offs];
-	else
-		return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_set_wptr - set write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Commits the write pointer to the hardware
- */
-static void vcn_v2_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	if (ring->use_doorbell) {
-		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
-		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
-	} else {
-		WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
-	}
-}
-
-/**
- * vcn_v2_0_jpeg_ring_insert_start - insert a start command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a start command to the ring.
- */
-void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
-{
-	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x68e04);
-
-	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x80010000);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_insert_end - insert a end command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a end command to the ring.
- */
-void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
-{
-	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x68e04);
-
-	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x00010000);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_emit_fence - emit an fence & trap command
- *
- * @ring: amdgpu_ring pointer
- * @fence: fence to emit
- *
- * Write a fence and a trap command to the ring.
- */
-void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
-				unsigned flags)
-{
-	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
-
-	amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, seq);
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, seq);
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, lower_32_bits(addr));
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, upper_32_bits(addr));
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x8);
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
-		0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
-	amdgpu_ring_write(ring, 0);
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x3fbc);
-
-	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x1);
-
-	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
-	amdgpu_ring_write(ring, 0);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_emit_ib - execute indirect buffer
- *
- * @ring: amdgpu_ring pointer
- * @ib: indirect buffer to execute
- *
- * Write ring commands to execute the indirect buffer.
- */
-void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
-				struct amdgpu_job *job,
-				struct amdgpu_ib *ib,
-				uint32_t flags)
-{
-	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
-
-	amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
-	amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, ib->length_dw);
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
-
-	amdgpu_ring_write(ring,	PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
-	amdgpu_ring_write(ring, 0);
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x01400200);
-
-	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x2);
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET,
-		0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
-	amdgpu_ring_write(ring, 0x2);
-}
-
-void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
-				uint32_t val, uint32_t mask)
-{
-	uint32_t reg_offset = (reg << 2);
-
-	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x01400200);
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, val);
-
-	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring,
-			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
-	} else {
-		amdgpu_ring_write(ring, reg_offset);
-		amdgpu_ring_write(ring,	PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
-			0, 0, PACKETJ_TYPE3));
-	}
-	amdgpu_ring_write(ring, mask);
-}
-
-void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
-				unsigned vmid, uint64_t pd_addr)
-{
-	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	uint32_t data0, data1, mask;
-
-	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
-
-	/* wait for register write */
-	data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
-	data1 = lower_32_bits(pd_addr);
-	mask = 0xffffffff;
-	vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask);
-}
-
-void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
-{
-	uint32_t reg_offset = (reg << 2);
-
-	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring,
-			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
-	} else {
-		amdgpu_ring_write(ring, reg_offset);
-		amdgpu_ring_write(ring,	PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
-			0, 0, PACKETJ_TYPE0));
-	}
-	amdgpu_ring_write(ring, val);
-}
-
-void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)
-{
-	int i;
-
-	WARN_ON(ring->wptr % 2 || count % 2);
-
-	for (i = 0; i < count / 2; i++) {
-		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
-		amdgpu_ring_write(ring, 0);
-	}
-}
-
 static int vcn_v2_0_set_interrupt_state(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *source,
 					unsigned type,
@@ -2080,9 +1615,6 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,
 	case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
 		amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);
 		break;
-	case VCN_2_0__SRCID__JPEG_DECODE:
-		amdgpu_fence_process(&adev->vcn.inst->ring_jpeg);
-		break;
 	default:
 		DRM_ERROR("Unhandled interrupt: %d %d\n",
 			  entry->src_id, entry->src_data[0]);
@@ -2228,36 +1760,6 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
 };
 
-static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = {
-	.type = AMDGPU_RING_TYPE_VCN_JPEG,
-	.align_mask = 0xf,
-	.vmhub = AMDGPU_MMHUB_0,
-	.get_rptr = vcn_v2_0_jpeg_ring_get_rptr,
-	.get_wptr = vcn_v2_0_jpeg_ring_get_wptr,
-	.set_wptr = vcn_v2_0_jpeg_ring_set_wptr,
-	.emit_frame_size =
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
-		8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */
-		18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */
-		8 + 16,
-	.emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */
-	.emit_ib = vcn_v2_0_jpeg_ring_emit_ib,
-	.emit_fence = vcn_v2_0_jpeg_ring_emit_fence,
-	.emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush,
-	.test_ring = amdgpu_vcn_jpeg_ring_test_ring,
-	.test_ib = amdgpu_vcn_jpeg_ring_test_ib,
-	.insert_nop = vcn_v2_0_jpeg_ring_nop,
-	.insert_start = vcn_v2_0_jpeg_ring_insert_start,
-	.insert_end = vcn_v2_0_jpeg_ring_insert_end,
-	.pad_ib = amdgpu_ring_generic_pad_ib,
-	.begin_use = amdgpu_vcn_ring_begin_use,
-	.end_use = amdgpu_vcn_ring_end_use,
-	.emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg,
-	.emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait,
-	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
 static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
 {
 	adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;
@@ -2274,12 +1776,6 @@ static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev)
 	DRM_INFO("VCN encode is enabled in VM mode\n");
 }
 
-static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)
-{
-	adev->vcn.inst->ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs;
-	DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
-}
-
 static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
 	.set = vcn_v2_0_set_interrupt_state,
 	.process = vcn_v2_0_process_interrupt,
@@ -2287,7 +1783,7 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
 
 static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev)
 {
-	adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2;
+	adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 1;
 	adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
index 8467292f32e5..ef749b02ded9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
@@ -49,19 +49,6 @@ extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 				unsigned int vmid, uint64_t pd_addr);
 extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
 
-extern void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring);
-extern void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring);
-extern void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
-				unsigned flags);
-extern void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
-				struct amdgpu_ib *ib, uint32_t flags);
-extern void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
-				uint32_t val, uint32_t mask);
-extern void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
-				unsigned vmid, uint64_t pd_addr);
-extern void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
-extern void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count);
-
 extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block;
 
 #endif /* __VCN_V2_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 395c2259f979..c8b63d57a541 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -25,9 +25,11 @@
 
 #include "amdgpu.h"
 #include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
 #include "soc15.h"
 #include "soc15d.h"
 #include "vcn_v2_0.h"
+#include "mmsch_v1_0.h"
 
 #include "vcn/vcn_2_5_offset.h"
 #include "vcn/vcn_2_5_sh_mask.h"
@@ -46,16 +48,16 @@
 #define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x3b5
 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x25c
 
-#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET			0x401f
-
-#define VCN25_MAX_HW_INSTANCES_ARCTURUS				2
+#define VCN25_MAX_HW_INSTANCES_ARCTURUS			2
 
 static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
-static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
 static int vcn_v2_5_set_powergating_state(void *handle,
 				enum amd_powergating_state state);
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+				int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
 
 static int amdgpu_ih_clientid_vcns[] = {
 	SOC15_IH_CLIENTID_VCN,
@@ -90,11 +92,16 @@ static int vcn_v2_5_early_init(void *handle)
 	} else
 		adev->vcn.num_vcn_inst = 1;
 
-	adev->vcn.num_enc_rings = 2;
+	if (amdgpu_sriov_vf(adev)) {
+		adev->vcn.num_vcn_inst = 2;
+		adev->vcn.harvest_config = 0;
+		adev->vcn.num_enc_rings = 1;
+	} else {
+		adev->vcn.num_enc_rings = 2;
+	}
 
 	vcn_v2_5_set_dec_ring_funcs(adev);
 	vcn_v2_5_set_enc_ring_funcs(adev);
-	vcn_v2_5_set_jpeg_ring_funcs(adev);
 	vcn_v2_5_set_irq_funcs(adev);
 
 	return 0;
@@ -129,12 +136,6 @@ static int vcn_v2_5_sw_init(void *handle)
 			if (r)
 				return r;
 		}
-
-		/* VCN JPEG TRAP */
-		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
-				VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq);
-		if (r)
-			return r;
 	}
 
 	r = amdgpu_vcn_sw_init(adev);
@@ -183,12 +184,11 @@ static int vcn_v2_5_sw_init(void *handle)
 		adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
 		adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP);
 
-		adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
-		adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH);
-
 		ring = &adev->vcn.inst[j].ring_dec;
 		ring->use_doorbell = true;
-		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j;
+
+		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+				(amdgpu_sriov_vf(adev) ? 2*j : 8*j);
 		sprintf(ring->name, "vcn_dec_%d", j);
 		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
 		if (r)
@@ -197,22 +197,26 @@ static int vcn_v2_5_sw_init(void *handle)
 		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
 			ring = &adev->vcn.inst[j].ring_enc[i];
 			ring->use_doorbell = true;
-			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j;
+
+			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+					(amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));
+
 			sprintf(ring->name, "vcn_enc_%d.%d", j, i);
 			r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
 			if (r)
 				return r;
 		}
+	}
 
-		ring = &adev->vcn.inst[j].ring_jpeg;
-		ring->use_doorbell = true;
-		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j;
-		sprintf(ring->name, "vcn_jpeg_%d", j);
-		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
+	if (amdgpu_sriov_vf(adev)) {
+		r = amdgpu_virt_alloc_mm_table(adev);
 		if (r)
 			return r;
 	}
 
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+		adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
+
 	return 0;
 }
 
@@ -228,6 +232,9 @@ static int vcn_v2_5_sw_fini(void *handle)
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_free_mm_table(adev);
+
 	r = amdgpu_vcn_suspend(adev);
 	if (r)
 		return r;
@@ -248,43 +255,44 @@ static int vcn_v2_5_hw_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_ring *ring;
-	int i, j, r;
+	int i, j, r = 0;
+
+	if (amdgpu_sriov_vf(adev))
+		r = vcn_v2_5_sriov_start(adev);
 
 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
 		if (adev->vcn.harvest_config & (1 << j))
 			continue;
-		ring = &adev->vcn.inst[j].ring_dec;
 
-		adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell,
-						     ring->doorbell_index, j);
+		if (amdgpu_sriov_vf(adev)) {
+			adev->vcn.inst[j].ring_enc[0].sched.ready = true;
+			adev->vcn.inst[j].ring_enc[1].sched.ready = false;
+			adev->vcn.inst[j].ring_enc[2].sched.ready = false;
+			adev->vcn.inst[j].ring_dec.sched.ready = true;
+		} else {
 
-		r = amdgpu_ring_test_ring(ring);
-		if (r) {
-			ring->sched.ready = false;
-			goto done;
-		}
+			ring = &adev->vcn.inst[j].ring_dec;
 
-		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
-			ring = &adev->vcn.inst[j].ring_enc[i];
-			ring->sched.ready = false;
-			continue;
-			r = amdgpu_ring_test_ring(ring);
-			if (r) {
-				ring->sched.ready = false;
+			adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+						     ring->doorbell_index, j);
+
+			r = amdgpu_ring_test_helper(ring);
+			if (r)
 				goto done;
-			}
-		}
 
-		ring = &adev->vcn.inst[j].ring_jpeg;
-		r = amdgpu_ring_test_ring(ring);
-		if (r) {
-			ring->sched.ready = false;
-			goto done;
+			for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+				ring = &adev->vcn.inst[j].ring_enc[i];
+				r = amdgpu_ring_test_helper(ring);
+				if (r)
+					goto done;
+			}
 		}
 	}
+
 done:
 	if (!r)
-		DRM_INFO("VCN decode and encode initialized successfully.\n");
+		DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
 
 	return r;
 }
@@ -300,25 +308,24 @@ static int vcn_v2_5_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_ring *ring;
-	int i;
+	int i, j;
 
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
 		ring = &adev->vcn.inst[i].ring_dec;
 
-		if (RREG32_SOC15(VCN, i, mmUVD_STATUS))
+		if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+		    (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+		     RREG32_SOC15(VCN, i, mmUVD_STATUS)))
 			vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
 
 		ring->sched.ready = false;
 
-		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
-			ring = &adev->vcn.inst[i].ring_enc[i];
+		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+			ring = &adev->vcn.inst[i].ring_enc[j];
 			ring->sched.ready = false;
 		}
-
-		ring = &adev->vcn.inst[i].ring_jpeg;
-		ring->sched.ready = false;
 	}
 
 	return 0;
@@ -385,9 +392,9 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
 		/* cache window 0: fw */
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
-				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
+				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
 			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
-				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
+				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
 			WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
 			offset = 0;
 		} else {
@@ -419,11 +426,103 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
 	}
 }
 
+static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+	uint32_t offset;
+
+	/* cache window 0: fw */
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		if (!indirect) {
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+		} else {
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+		}
+		offset = 0;
+	} else {
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+		offset = size;
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0),
+			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+	}
+
+	if (!indirect)
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+	else
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+	/* cache window 1: stack */
+	if (!indirect) {
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+	} else {
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+	}
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+	/* cache window 2: context */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+		lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+		upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+	/* non-cache window */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
+
+	/* VCN global tiling registers */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
 /**
  * vcn_v2_5_disable_clock_gating - disable VCN clock gating
  *
  * @adev: amdgpu_device pointer
- * @sw: enable SW clock gating
  *
  * Disable clock gating for VCN block
  */
@@ -538,11 +637,58 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
 	}
 }
 
+static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
+		uint8_t sram_sel, int inst_idx, uint8_t indirect)
+{
+	uint32_t reg_data = 0;
+
+	/* enable sw clock gating control */
+	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+		reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+	else
+		reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+	reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+	reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+	reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+		 UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+		 UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+		 UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+		 UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+		 UVD_CGC_CTRL__SYS_MODE_MASK |
+		 UVD_CGC_CTRL__UDEC_MODE_MASK |
+		 UVD_CGC_CTRL__MPEG2_MODE_MASK |
+		 UVD_CGC_CTRL__REGS_MODE_MASK |
+		 UVD_CGC_CTRL__RBC_MODE_MASK |
+		 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+		 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+		 UVD_CGC_CTRL__IDCT_MODE_MASK |
+		 UVD_CGC_CTRL__MPRD_MODE_MASK |
+		 UVD_CGC_CTRL__MPC_MODE_MASK |
+		 UVD_CGC_CTRL__LBSI_MODE_MASK |
+		 UVD_CGC_CTRL__LRBBM_MODE_MASK |
+		 UVD_CGC_CTRL__WCB_MODE_MASK |
+		 UVD_CGC_CTRL__VCPU_MODE_MASK |
+		 UVD_CGC_CTRL__MMSCH_MODE_MASK);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+	/* turn off clock gating */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect);
+
+	/* turn on SUVD clock gating */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
 /**
  * vcn_v2_5_enable_clock_gating - enable VCN clock gating
  *
  * @adev: amdgpu_device pointer
- * @sw: enable SW clock gating
  *
  * Enable clock gating for VCN block
  */
@@ -601,111 +747,134 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
 	}
 }
 
-/**
- * jpeg_v2_5_start - start JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * Setup and start the JPEG block
- */
-static int jpeg_v2_5_start(struct amdgpu_device *adev)
+static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
 {
 	struct amdgpu_ring *ring;
-	uint32_t tmp;
-	int i;
-
-	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-		if (adev->vcn.harvest_config & (1 << i))
-			continue;
-		ring = &adev->vcn.inst[i].ring_jpeg;
-		/* disable anti hang mechanism */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0,
-			~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-
-		/* JPEG disable CGC */
-		tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL);
-		tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
-		tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
-		tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
-		WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp);
-
-		tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE);
-		tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
-			| JPEG_CGC_GATE__JPEG2_DEC_MASK
-			| JPEG_CGC_GATE__JMCIF_MASK
-			| JPEG_CGC_GATE__JRBBM_MASK);
-		WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp);
-
-		tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL);
-		tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
-			| JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
-			| JPEG_CGC_CTRL__JMCIF_MODE_MASK
-			| JPEG_CGC_CTRL__JRBBM_MODE_MASK);
-		WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp);
-
-		/* MJPEG global tiling registers */
-		WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
-			adev->gfx.config.gb_addr_config);
-		WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
-			adev->gfx.config.gb_addr_config);
-
-		/* enable JMI channel */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0,
-			~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
-		/* enable System Interrupt for JRBC */
-		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN),
-			JPEG_SYS_INT_EN__DJRBC_MASK,
-			~JPEG_SYS_INT_EN__DJRBC_MASK);
-
-		WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0);
-		WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
-		WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
-			lower_32_bits(ring->gpu_addr));
-		WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
-			upper_32_bits(ring->gpu_addr));
-		WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0);
-		WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0);
-		WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
-		WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
-		ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR);
-	}
-
-	return 0;
-}
-
-/**
- * jpeg_v2_5_stop - stop JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * stop the JPEG block
- */
-static int jpeg_v2_5_stop(struct amdgpu_device *adev)
-{
-	uint32_t tmp;
-	int i;
+	uint32_t rb_bufsz, tmp;
 
-	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-		if (adev->vcn.harvest_config & (1 << i))
-			continue;
-		/* reset JMI */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL),
-			UVD_JMI_CNTL__SOFT_RESET_MASK,
-			~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
-		tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE);
-		tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK
-			|JPEG_CGC_GATE__JPEG2_DEC_MASK
-			|JPEG_CGC_GATE__JMCIF_MASK
-			|JPEG_CGC_GATE__JRBBM_MASK);
-		WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp);
-
-		/* enable anti hang mechanism */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS),
-			UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
-			~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-	}
+	/* disable register anti-hang mechanism */
+	WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1,
+		~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+	/* enable dynamic power gating mode */
+	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS);
+	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+	WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp);
+
+	if (indirect)
+		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+	/* enable clock gating */
+	vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+
+	/* enable VCPU clock */
+	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+	tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+	/* disable master interupt */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect);
+
+	/* setup mmUVD_LMI_CTRL */
+	tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+		UVD_LMI_CTRL__REQ_MODE_MASK |
+		UVD_LMI_CTRL__CRC_RESET_MASK |
+		UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+		0x00100000L);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect);
+
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MPC_CNTL),
+		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MPC_SET_MUXA0),
+		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MPC_SET_MUXB0),
+		((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MPC_SET_MUX),
+		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+	vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
+
+	/* enable LMI MC and UMC channels */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect);
+
+	/* unblock VCPU register access */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
+
+	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+	/* enable master interrupt */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MASTINT_EN),
+		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+	if (indirect)
+		psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+				    (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+					       (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
+
+	ring = &adev->vcn.inst[inst_idx].ring_dec;
+	/* force RBC into idle state */
+	rb_bufsz = order_base_2(ring->ring_size);
+	tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
+
+	/* set the write pointer delay */
+	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+	/* set the wb address */
+	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
+		(upper_32_bits(ring->gpu_addr) >> 2));
+
+	/* programm the RB_BASE for ring buffer */
+	WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+		lower_32_bits(ring->gpu_addr));
+	WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+		upper_32_bits(ring->gpu_addr));
+
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0);
+
+	WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0);
+
+	ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR);
+	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
+		lower_32_bits(ring->wptr));
 
 	return 0;
 }
@@ -716,9 +885,15 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
 	uint32_t rb_bufsz, tmp;
 	int i, j, k, r;
 
+	if (adev->pm.dpm_enabled)
+		amdgpu_dpm_enable_uvd(adev, true);
+
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
+		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+			return vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+
 		/* disable register anti-hang mechanism */
 		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0,
 			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -880,23 +1055,251 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
 		WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
 		WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
 	}
-	r = jpeg_v2_5_start(adev);
 
-	return r;
+	return 0;
 }
 
-static int vcn_v2_5_stop(struct amdgpu_device *adev)
+static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev,
+				struct amdgpu_mm_table *table)
 {
+	uint32_t data = 0, loop = 0, size = 0;
+	uint64_t addr = table->gpu_addr;
+	struct mmsch_v1_1_init_header *header = NULL;;
+
+	header = (struct mmsch_v1_1_init_header *)table->cpu_addr;
+	size = header->total_size;
+
+	/*
+	 * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of
+	 *  memory descriptor location
+	 */
+	WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
+	WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
+
+	/* 2, update vmid of descriptor */
+	data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID);
+	data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+	/* use domain0 for MM scheduler */
+	data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+	WREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID, data);
+
+	/* 3, notify mmsch about the size of this descriptor */
+	WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_SIZE, size);
+
+	/* 4, set resp to zero */
+	WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
+
+	/*
+	 * 5, kick off the initialization and wait until
+	 * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
+	 */
+	WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
+
+	data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+	loop = 10;
+	while ((data & 0x10000002) != 0x10000002) {
+		udelay(100);
+		data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+		loop--;
+		if (!loop)
+			break;
+	}
+
+	if (!loop) {
+		dev_err(adev->dev,
+			"failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n",
+			data);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	uint32_t offset, size, tmp, i, rb_bufsz;
+	uint32_t table_size = 0;
+	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
+	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
+	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
+	struct mmsch_v1_0_cmd_end end = { { 0 } };
+	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+	struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table;
+
+	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
+	end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+	header->version = MMSCH_VERSION;
+	header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2;
+	init_table += header->total_size;
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		header->eng[i].table_offset = header->total_size;
+		header->eng[i].init_status = 0;
+		header->eng[i].table_size = 0;
+
+		table_size = 0;
+
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(
+			SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+		size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+		/* mc resume*/
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+			MMSCH_V1_0_INSERT_DIRECT_WT(
+				SOC15_REG_OFFSET(UVD, i,
+					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+			MMSCH_V1_0_INSERT_DIRECT_WT(
+				SOC15_REG_OFFSET(UVD, i,
+					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+			offset = 0;
+			MMSCH_V1_0_INSERT_DIRECT_WT(
+				SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), 0);
+		} else {
+			MMSCH_V1_0_INSERT_DIRECT_WT(
+				SOC15_REG_OFFSET(UVD, i,
+					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+				lower_32_bits(adev->vcn.inst[i].gpu_addr));
+			MMSCH_V1_0_INSERT_DIRECT_WT(
+				SOC15_REG_OFFSET(UVD, i,
+					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+				upper_32_bits(adev->vcn.inst[i].gpu_addr));
+			offset = size;
+			MMSCH_V1_0_INSERT_DIRECT_WT(
+				SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+		}
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0),
+			size);
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i,
+				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i,
+				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1),
+			0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1),
+			AMDGPU_VCN_STACK_SIZE);
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i,
+				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset +
+				AMDGPU_VCN_STACK_SIZE));
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i,
+				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset +
+				AMDGPU_VCN_STACK_SIZE));
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2),
+			0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+			AMDGPU_VCN_CONTEXT_SIZE);
+
+		ring = &adev->vcn.inst[i].ring_enc[0];
+		ring->wptr = 0;
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO),
+			lower_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI),
+			upper_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE),
+			ring->ring_size / 4);
+
+		ring = &adev->vcn.inst[i].ring_dec;
+		ring->wptr = 0;
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i,
+				mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
+			lower_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i,
+				mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
+			upper_32_bits(ring->gpu_addr));
+
+		/* force RBC into idle state */
+		rb_bufsz = order_base_2(ring->ring_size);
+		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+		MMSCH_V1_0_INSERT_DIRECT_WT(
+			SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+
+		/* add end packet */
+		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
+		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+		init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+
+		/* refine header */
+		header->eng[i].table_size = table_size;
+		header->total_size += table_size;
+	}
+
+	return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
+}
+
+static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+	int ret_code = 0;
 	uint32_t tmp;
-	int i, r;
 
-	r = jpeg_v2_5_stop(adev);
-	if (r)
-		return r;
+	/* Wait for power status to be 1 */
+	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+	/* wait for read ptr to be equal to write ptr */
+	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR);
+	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2);
+	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
+
+	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+	/* disable dynamic power gating mode */
+	WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0,
+			~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+	return 0;
+}
+
+static int vcn_v2_5_stop(struct amdgpu_device *adev)
+{
+	uint32_t tmp;
+	int i, r = 0;
 
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
+
+		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+			r = vcn_v2_5_stop_dpg_mode(adev, i);
+			goto power_off;
+		}
+
 		/* wait for vcn idle */
 		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
 		if (r)
@@ -946,6 +1349,71 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
 			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
 	}
 
+power_off:
+	if (adev->pm.dpm_enabled)
+		amdgpu_dpm_enable_uvd(adev, false);
+
+	return 0;
+}
+
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+				int inst_idx, struct dpg_pause_state *new_state)
+{
+	struct amdgpu_ring *ring;
+	uint32_t reg_data = 0;
+	int ret_code;
+
+	/* pause/unpause if state is changed */
+	if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+		DRM_DEBUG("dpg pause state changed %d -> %d",
+			adev->vcn.pause_state.fw_based,	new_state->fw_based);
+		reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) &
+			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+			ret_code = 0;
+			SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1,
+				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+			if (!ret_code) {
+				/* pause DPG */
+				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+				WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+
+				/* wait for ACK */
+				SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE,
+					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+
+				/* Restore */
+				ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+				ring = &adev->vcn.inst[inst_idx].ring_enc[1];
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
+					   RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+
+				SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS,
+					   0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+			}
+		} else {
+			/* unpause dpg, no need to wait */
+			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+			WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+		}
+		adev->vcn.pause_state.fw_based = new_state->fw_based;
+	}
+
 	return 0;
 }
 
@@ -991,6 +1459,10 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+		WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2,
+			lower_32_bits(ring->wptr) | 0x80000000);
+
 	if (ring->use_doorbell) {
 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
@@ -1128,86 +1600,6 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
 };
 
-/**
- * vcn_v2_5_jpeg_ring_get_rptr - get read pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware read pointer
- */
-static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR);
-}
-
-/**
- * vcn_v2_5_jpeg_ring_get_wptr - get write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware write pointer
- */
-static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	if (ring->use_doorbell)
-		return adev->wb.wb[ring->wptr_offs];
-	else
-		return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR);
-}
-
-/**
- * vcn_v2_5_jpeg_ring_set_wptr - set write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Commits the write pointer to the hardware
- */
-static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	if (ring->use_doorbell) {
-		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
-		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
-	} else {
-		WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
-	}
-}
-
-static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = {
-	.type = AMDGPU_RING_TYPE_VCN_JPEG,
-	.align_mask = 0xf,
-	.vmhub = AMDGPU_MMHUB_1,
-	.get_rptr = vcn_v2_5_jpeg_ring_get_rptr,
-	.get_wptr = vcn_v2_5_jpeg_ring_get_wptr,
-	.set_wptr = vcn_v2_5_jpeg_ring_set_wptr,
-	.emit_frame_size =
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
-		8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */
-		18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */
-		8 + 16,
-	.emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */
-	.emit_ib = vcn_v2_0_jpeg_ring_emit_ib,
-	.emit_fence = vcn_v2_0_jpeg_ring_emit_fence,
-	.emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush,
-	.test_ring = amdgpu_vcn_jpeg_ring_test_ring,
-	.test_ib = amdgpu_vcn_jpeg_ring_test_ib,
-	.insert_nop = vcn_v2_0_jpeg_ring_nop,
-	.insert_start = vcn_v2_0_jpeg_ring_insert_start,
-	.insert_end = vcn_v2_0_jpeg_ring_insert_end,
-	.pad_ib = amdgpu_ring_generic_pad_ib,
-	.begin_use = amdgpu_vcn_ring_begin_use,
-	.end_use = amdgpu_vcn_ring_end_use,
-	.emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg,
-	.emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait,
-	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
 static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
 {
 	int i;
@@ -1236,19 +1628,6 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
 	}
 }
 
-static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev)
-{
-	int i;
-
-	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-		if (adev->vcn.harvest_config & (1 << i))
-			continue;
-		adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs;
-		adev->vcn.inst[i].ring_jpeg.me = i;
-		DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i);
-	}
-}
-
 static bool vcn_v2_5_is_idle(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1286,6 +1665,9 @@ static int vcn_v2_5_set_clockgating_state(void *handle,
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	if (enable) {
 		if (vcn_v2_5_is_idle(handle))
 			return -EBUSY;
@@ -1303,6 +1685,9 @@ static int vcn_v2_5_set_powergating_state(void *handle,
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret;
 
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	if(state == adev->vcn.cur_state)
 		return 0;
 
@@ -1355,9 +1740,6 @@ static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
 	case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
 		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
 		break;
-	case VCN_2_0__SRCID__JPEG_DECODE:
-		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg);
-		break;
 	default:
 		DRM_ERROR("Unhandled interrupt: %d %d\n",
 			  entry->src_id, entry->src_data[0]);
@@ -1379,7 +1761,7 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
-		adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2;
+		adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
 		adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 9eae3536ddad..d9e331084ea0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -226,7 +226,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
 	/* disable irqs */
 	vega10_ih_disable_interrupts(adev);
 
-	adev->nbio_funcs->ih_control(adev);
+	adev->nbio.funcs->ih_control(adev);
 
 	ih = &adev->irq.ih;
 	/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
@@ -234,16 +234,9 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff);
 
 	ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL);
-	ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
 	ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
-	if (adev->irq.ih.use_bus_addr) {
-		ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
-	} else {
-		ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_FBPA_ENABLE, 1);
-	}
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
 				   !!adev->irq.msi_enabled);
-
 	if (amdgpu_sriov_vf(adev)) {
 		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
 			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
@@ -253,10 +246,19 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
 		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
 	}
 
-	if ((adev->asic_type == CHIP_ARCTURUS
-		&& adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
-		|| adev->asic_type == CHIP_RENOIR)
+	if ((adev->asic_type == CHIP_ARCTURUS &&
+	     adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+	    adev->asic_type == CHIP_RENOIR) {
+		ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
+		if (adev->irq.ih.use_bus_addr) {
+			ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+						   MC_SPACE_GPA_ENABLE, 1);
+		} else {
+			ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+						   MC_SPACE_FBPA_ENABLE, 1);
+		}
 		WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
+	}
 
 	/* set the writeback address whether it's enabled or not */
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
@@ -675,10 +677,49 @@ static int vega10_ih_soft_reset(void *handle)
 	return 0;
 }
 
+static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev,
+					       bool enable)
+{
+	uint32_t data, def, field_val;
+
+	if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+		def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL);
+		field_val = enable ? 0 : 1;
+		/**
+		 * Vega10 does not have IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE
+		 * and IH_BUFFER_MEM_CLK_SOFT_OVERRIDE field.
+		 */
+		if (adev->asic_type > CHIP_VEGA10) {
+			data = REG_SET_FIELD(data, IH_CLK_CTRL,
+				     IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE, field_val);
+			data = REG_SET_FIELD(data, IH_CLK_CTRL,
+				     IH_BUFFER_MEM_CLK_SOFT_OVERRIDE, field_val);
+		}
+
+		data = REG_SET_FIELD(data, IH_CLK_CTRL,
+				     DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+		data = REG_SET_FIELD(data, IH_CLK_CTRL,
+				     OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+		data = REG_SET_FIELD(data, IH_CLK_CTRL,
+				     LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+		data = REG_SET_FIELD(data, IH_CLK_CTRL,
+				     DYN_CLK_SOFT_OVERRIDE, field_val);
+		data = REG_SET_FIELD(data, IH_CLK_CTRL,
+				     REG_CLK_SOFT_OVERRIDE, field_val);
+		if (def != data)
+			WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data);
+	}
+}
+
 static int vega10_ih_set_clockgating_state(void *handle,
 					  enum amd_clockgating_state state)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	vega10_ih_update_clockgating_state(adev,
+				state == AMD_CG_STATE_GATE ? true : false);
 	return 0;
+
 }
 
 static int vega10_ih_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index bd0580334f83..6b52a539d51b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -24,7 +24,6 @@
 #include "soc15.h"
 
 #include "soc15_common.h"
-#include "soc15_hw_ip.h"
 #include "vega10_ip_offset.h"
 
 int vega10_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 587e33f5dcce..556f854e3551 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -24,7 +24,6 @@
 #include "soc15.h"
 
 #include "soc15_common.h"
-#include "soc15_hw_ip.h"
 #include "vega20_ip_offset.h"
 
 int vega20_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 5f8c8786cac5..78b35901643b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -690,15 +690,15 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev)
 }
 
 /**
- * vi_asic_reset - soft reset GPU
+ * vi_asic_pci_config_reset - soft reset GPU
  *
  * @adev: amdgpu_device pointer
  *
- * Look up which blocks are hung and attempt
- * to reset them.
+ * Use PCI Config method to reset the GPU.
+ *
  * Returns 0 for success.
  */
-static int vi_asic_reset(struct amdgpu_device *adev)
+static int vi_asic_pci_config_reset(struct amdgpu_device *adev)
 {
 	int r;
 
@@ -711,10 +711,68 @@ static int vi_asic_reset(struct amdgpu_device *adev)
 	return r;
 }
 
+static bool vi_asic_supports_baco(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_FIJI:
+	case CHIP_TONGA:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_TOPAZ:
+		return amdgpu_dpm_is_baco_supported(adev);
+	default:
+		return false;
+	}
+}
+
 static enum amd_reset_method
 vi_asic_reset_method(struct amdgpu_device *adev)
 {
-	return AMD_RESET_METHOD_LEGACY;
+	bool baco_reset;
+
+	switch (adev->asic_type) {
+	case CHIP_FIJI:
+	case CHIP_TONGA:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_TOPAZ:
+		baco_reset = amdgpu_dpm_is_baco_supported(adev);
+		break;
+	default:
+		baco_reset = false;
+		break;
+	}
+
+	if (baco_reset)
+		return AMD_RESET_METHOD_BACO;
+	else
+		return AMD_RESET_METHOD_LEGACY;
+}
+
+/**
+ * vi_asic_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+static int vi_asic_reset(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+		if (!adev->in_suspend)
+			amdgpu_inc_vram_lost(adev);
+		r = amdgpu_dpm_baco_reset(adev);
+	} else {
+		r = vi_asic_pci_config_reset(adev);
+	}
+
+	return r;
 }
 
 static u32 vi_get_config_memsize(struct amdgpu_device *adev)
@@ -1042,6 +1100,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
 	.get_pcie_usage = &vi_get_pcie_usage,
 	.need_reset_on_init = &vi_need_reset_on_init,
 	.get_pcie_replay_count = &vi_get_pcie_replay_count,
+	.supports_baco = &vi_asic_supports_baco,
 };
 
 #define CZ_REV_BRISTOL(rev)	 \
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h
index 8de0772f986c..defb4aaf929a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.h
+++ b/drivers/gpu/drm/amd/amdgpu/vi.h
@@ -31,4 +31,5 @@ void vi_srbm_select(struct amdgpu_device *adev,
 int vi_set_ip_blocks(struct amdgpu_device *adev);
 
 void legacy_doorbell_index_init(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index a1a35d4d594b..b3672d10ea54 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -1,11 +1,11 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: MIT
 #
 # Heterogenous system architecture configuration
 #
 
 config HSA_AMD
 	bool "HSA kernel driver for AMD GPU devices"
-	depends on DRM_AMDGPU && (X86_64 || ARM64)
+	depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
 	imply AMD_IOMMU_V2 if X86_64
 	select MMU_NOTIFIER
 	help
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 48155060a57c..61474627a32c 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -38,11 +38,9 @@ AMDKFD_FILES	:= $(AMDKFD_PATH)/kfd_module.o \
 		$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
 		$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
 		$(AMDKFD_PATH)/kfd_kernel_queue.o \
-		$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
-		$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
-		$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
-		$(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
 		$(AMDKFD_PATH)/kfd_packet_manager.o \
+		$(AMDKFD_PATH)/kfd_packet_manager_vi.o \
+		$(AMDKFD_PATH)/kfd_packet_manager_v9.o \
 		$(AMDKFD_PATH)/kfd_process_queue_manager.o \
 		$(AMDKFD_PATH)/kfd_device_queue_manager.o \
 		$(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 177d1e5329a5..9f59ba93cfe0 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -33,7 +33,9 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
 	const struct cik_ih_ring_entry *ihre =
 			(const struct cik_ih_ring_entry *)ih_ring_entry;
 	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
-	unsigned int vmid, pasid;
+	unsigned int vmid;
+	uint16_t pasid;
+	bool ret;
 
 	/* This workaround is due to HW/FW limitation on Hawaii that
 	 * VMID and PASID are not written into ih_ring_entry
@@ -48,13 +50,13 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
 		*tmp_ihre = *ihre;
 
 		vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
-		pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid);
+		ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid);
 
 		tmp_ihre->ring_id &= 0x000000ff;
 		tmp_ihre->ring_id |= vmid << 8;
 		tmp_ihre->ring_id |= pasid << 16;
 
-		return (pasid != 0) &&
+		return ret && (pasid != 0) &&
 			vmid >= dev->vm_info.first_vmid_kfd &&
 			vmid <= dev->vm_info.last_vmid_kfd;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 901fe3590165..d3400da6ab64 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -905,7 +905,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0x7a5d0000, 0x807c817c,
 	0x807aff7a, 0x00000080,
 	0xbf0a717c, 0xbf85fff8,
-	0xbf820141, 0xbef4037e,
+	0xbf820142, 0xbef4037e,
 	0x8775ff7f, 0x0000ffff,
 	0x8875ff75, 0x00040000,
 	0xbef60380, 0xbef703ff,
@@ -967,7 +967,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0x725d0000, 0xe0304080,
 	0x725d0100, 0xe0304100,
 	0x725d0200, 0xe0304180,
-	0x725d0300, 0xbf820031,
+	0x725d0300, 0xbf820032,
 	0xbef603ff, 0x01000000,
 	0xbef20378, 0x8078ff78,
 	0x00000400, 0xbefc0384,
@@ -992,83 +992,84 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0x725d0000, 0xe0304100,
 	0x725d0100, 0xe0304200,
 	0x725d0200, 0xe0304300,
-	0x725d0300, 0xb9782a05,
-	0x80788178, 0x907c9973,
-	0x877c817c, 0xbf06817c,
-	0xbf850002, 0x8f788978,
-	0xbf820001, 0x8f788a78,
-	0xb9721e06, 0x8f728a72,
-	0x80787278, 0x8078ff78,
-	0x00000200, 0x80f8ff78,
-	0x00000050, 0xbef603ff,
-	0x01000000, 0xbefc03ff,
-	0x0000006c, 0x80f89078,
-	0xf429003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc847c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0x80f8a078,
-	0xf42d003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc887c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0xbe843104,
-	0xbe863106, 0x80f8c078,
-	0xf431003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc907c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0xbe843104,
-	0xbe863106, 0xbe883108,
-	0xbe8a310a, 0xbe8c310c,
-	0xbe8e310e, 0xbf06807c,
-	0xbf84fff0, 0xb9782a05,
-	0x80788178, 0x907c9973,
-	0x877c817c, 0xbf06817c,
-	0xbf850002, 0x8f788978,
-	0xbf820001, 0x8f788a78,
-	0xb9721e06, 0x8f728a72,
-	0x80787278, 0x8078ff78,
-	0x00000200, 0xbef603ff,
-	0x01000000, 0xf4211bfa,
+	0x725d0300, 0xbf8c3f70,
+	0xb9782a05, 0x80788178,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850002,
+	0x8f788978, 0xbf820001,
+	0x8f788a78, 0xb9721e06,
+	0x8f728a72, 0x80787278,
+	0x8078ff78, 0x00000200,
+	0x80f8ff78, 0x00000050,
+	0xbef603ff, 0x01000000,
+	0xbefc03ff, 0x0000006c,
+	0x80f89078, 0xf429003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc847c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0x80f8a078, 0xf42d003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc887c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0xbe843104, 0xbe863106,
+	0x80f8c078, 0xf431003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc907c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0xbe843104, 0xbe863106,
+	0xbe883108, 0xbe8a310a,
+	0xbe8c310c, 0xbe8e310e,
+	0xbf06807c, 0xbf84fff0,
+	0xb9782a05, 0x80788178,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850002,
+	0x8f788978, 0xbf820001,
+	0x8f788a78, 0xb9721e06,
+	0x8f728a72, 0x80787278,
+	0x8078ff78, 0x00000200,
+	0xbef603ff, 0x01000000,
+	0xf4211bfa, 0xf0000000,
+	0x80788478, 0xf4211b3a,
 	0xf0000000, 0x80788478,
-	0xf4211b3a, 0xf0000000,
-	0x80788478, 0xf4211b7a,
+	0xf4211b7a, 0xf0000000,
+	0x80788478, 0xf4211eba,
 	0xf0000000, 0x80788478,
-	0xf4211eba, 0xf0000000,
-	0x80788478, 0xf4211efa,
+	0xf4211efa, 0xf0000000,
+	0x80788478, 0xf4211c3a,
 	0xf0000000, 0x80788478,
-	0xf4211c3a, 0xf0000000,
-	0x80788478, 0xf4211c7a,
+	0xf4211c7a, 0xf0000000,
+	0x80788478, 0xf4211e7a,
 	0xf0000000, 0x80788478,
-	0xf4211e7a, 0xf0000000,
-	0x80788478, 0xf4211cfa,
+	0xf4211cfa, 0xf0000000,
+	0x80788478, 0xf4211bba,
 	0xf0000000, 0x80788478,
+	0xbf8cc07f, 0xb9eef814,
 	0xf4211bba, 0xf0000000,
 	0x80788478, 0xbf8cc07f,
-	0xb9eef814, 0xf4211bba,
-	0xf0000000, 0x80788478,
-	0xbf8cc07f, 0xb9eef815,
-	0xbef2036d, 0x876dff72,
-	0x0000ffff, 0xbefc036f,
-	0xbefe037a, 0xbeff037b,
-	0x876f71ff, 0x000003ff,
-	0xb9ef4803, 0xb9f9f816,
-	0x876f71ff, 0xfffff800,
-	0x906f8b6f, 0xb9efa2c3,
-	0xb9f3f801, 0x876fff72,
-	0xfc000000, 0x906f9a6f,
-	0x8f6f906f, 0xbef30380,
+	0xb9eef815, 0xbef2036d,
+	0x876dff72, 0x0000ffff,
+	0xbefc036f, 0xbefe037a,
+	0xbeff037b, 0x876f71ff,
+	0x000003ff, 0xb9ef4803,
+	0xb9f9f816, 0x876f71ff,
+	0xfffff800, 0x906f8b6f,
+	0xb9efa2c3, 0xb9f3f801,
+	0x876fff72, 0xfc000000,
+	0x906f9a6f, 0x8f6f906f,
+	0xbef30380, 0x88736f73,
+	0x876fff72, 0x02000000,
+	0x906f996f, 0x8f6f8f6f,
 	0x88736f73, 0x876fff72,
-	0x02000000, 0x906f996f,
-	0x8f6f8f6f, 0x88736f73,
-	0x876fff72, 0x01000000,
-	0x906f986f, 0x8f6f996f,
-	0x88736f73, 0x876fff70,
-	0x00800000, 0x906f976f,
-	0xb9f3f807, 0x87fe7e7e,
-	0x87ea6a6a, 0xb9f0f802,
-	0xbf8a0000, 0xbe80226c,
-	0xbf810000, 0xbf9f0000,
+	0x01000000, 0x906f986f,
+	0x8f6f996f, 0x88736f73,
+	0x876fff70, 0x00800000,
+	0x906f976f, 0xb9f3f807,
+	0x87fe7e7e, 0x87ea6a6a,
+	0xb9f0f802, 0xbf8a0000,
+	0xbe80226c, 0xbf810000,
 	0xbf9f0000, 0xbf9f0000,
 	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0x00000000,
 };
 static const uint32_t cwsr_trap_arcturus_hex[] = {
 	0xbf820001, 0xbf8202c4,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index cdaa523ce6be..4433bda2ce25 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -758,6 +758,7 @@ L_RESTORE_V0:
 	buffer_load_dword	v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
 	buffer_load_dword	v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
 	buffer_load_dword	v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
+	s_waitcnt	vmcnt(0)
 
 	/* restore SGPRs */
 	//will be 2+8+16*6
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 1d3cd5c50d5f..3f0300e53727 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -42,6 +42,7 @@
 
 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
 static int kfd_open(struct inode *, struct file *);
+static int kfd_release(struct inode *, struct file *);
 static int kfd_mmap(struct file *, struct vm_area_struct *);
 
 static const char kfd_dev_name[] = "kfd";
@@ -49,8 +50,9 @@ static const char kfd_dev_name[] = "kfd";
 static const struct file_operations kfd_fops = {
 	.owner = THIS_MODULE,
 	.unlocked_ioctl = kfd_ioctl,
-	.compat_ioctl = kfd_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
 	.open = kfd_open,
+	.release = kfd_release,
 	.mmap = kfd_mmap,
 };
 
@@ -124,8 +126,13 @@ static int kfd_open(struct inode *inode, struct file *filep)
 	if (IS_ERR(process))
 		return PTR_ERR(process);
 
-	if (kfd_is_locked())
+	if (kfd_is_locked()) {
+		kfd_unref_process(process);
 		return -EAGAIN;
+	}
+
+	/* filep now owns the reference returned by kfd_create_process */
+	filep->private_data = process;
 
 	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
 		process->pasid, process->is_32bit_user_mode);
@@ -133,6 +140,16 @@ static int kfd_open(struct inode *inode, struct file *filep)
 	return 0;
 }
 
+static int kfd_release(struct inode *inode, struct file *filep)
+{
+	struct kfd_process *process = filep->private_data;
+
+	if (process)
+		kfd_unref_process(process);
+
+	return 0;
+}
+
 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
 					void *data)
 {
@@ -258,6 +275,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 	unsigned int queue_id;
 	struct kfd_process_device *pdd;
 	struct queue_properties q_properties;
+	uint32_t doorbell_offset_in_process = 0;
 
 	memset(&q_properties, 0, sizeof(struct queue_properties));
 
@@ -282,11 +300,12 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 		goto err_bind_process;
 	}
 
-	pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
+	pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
 			p->pasid,
 			dev->id);
 
-	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
+	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
+			&doorbell_offset_in_process);
 	if (err != 0)
 		goto err_create_queue;
 
@@ -296,14 +315,11 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 	/* Return gpu_id as doorbell offset for mmap usage */
 	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
 	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
-	args->doorbell_offset <<= PAGE_SHIFT;
 	if (KFD_IS_SOC15(dev->device_info->asic_family))
-		/* On SOC15 ASICs, doorbell allocation must be
-		 * per-device, and independent from the per-process
-		 * queue_id. Return the doorbell offset within the
-		 * doorbell aperture to user mode.
+		/* On SOC15 ASICs, include the doorbell offset within the
+		 * process doorbell frame, which is 2 pages.
 		 */
-		args->doorbell_offset |= q_properties.doorbell_off;
+		args->doorbell_offset |= doorbell_offset_in_process;
 
 	mutex_unlock(&p->mutex);
 
@@ -332,7 +348,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
 	int retval;
 	struct kfd_ioctl_destroy_queue_args *args = data;
 
-	pr_debug("Destroying queue id %d for pasid %d\n",
+	pr_debug("Destroying queue id %d for pasid 0x%x\n",
 				args->queue_id,
 				p->pasid);
 
@@ -378,7 +394,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
 	properties.queue_percent = args->queue_percentage;
 	properties.priority = args->queue_priority;
 
-	pr_debug("Updating queue id %d for pasid %d\n",
+	pr_debug("Updating queue id %d for pasid 0x%x\n",
 			args->queue_id, p->pasid);
 
 	mutex_lock(&p->mutex);
@@ -855,7 +871,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
 	struct kfd_process_device_apertures *pAperture;
 	struct kfd_process_device *pdd;
 
-	dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 
 	args->num_of_nodes = 0;
 
@@ -913,7 +929,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
 	uint32_t nodes = 0;
 	int ret;
 
-	dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 
 	if (args->num_of_nodes == 0) {
 		/* Return number of nodes, so that user space can alloacate
@@ -1128,7 +1144,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
 	mutex_unlock(&p->mutex);
 
 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
-	    pdd->qpd.vmid != 0)
+	    pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
 		dev->kfd2kgd->set_scratch_backing_va(
 			dev->kgd, args->va_addr, pdd->qpd.vmid);
 
@@ -1312,10 +1328,9 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 	/* MMIO is mapped through kfd device
 	 * Generate a kfd mmap offset
 	 */
-	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
-		args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
-		args->mmap_offset <<= PAGE_SHIFT;
-	}
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
+		args->mmap_offset = KFD_MMAP_TYPE_MMIO
+					| KFD_MMAP_GPU_ID(args->gpu_id);
 
 	return 0;
 
@@ -1801,11 +1816,16 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 	} else
 		goto err_i1;
 
-	dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
+	dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
 
-	process = kfd_get_process(current);
-	if (IS_ERR(process)) {
-		dev_dbg(kfd_device, "no process\n");
+	/* Get the process struct from the filep. Only the process
+	 * that opened /dev/kfd can use the file descriptor. Child
+	 * processes need to create their own KFD device context.
+	 */
+	process = filep->private_data;
+	if (process->lead_thread != current->group_leader) {
+		dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
+		retcode = -EBADF;
 		goto err_i1;
 	}
 
@@ -1856,7 +1876,8 @@ err_i1:
 		kfree(kdata);
 
 	if (retcode)
-		dev_dbg(kfd_device, "ret = %d\n", retcode);
+		dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
+				nr, arg, retcode);
 
 	return retcode;
 }
@@ -1877,7 +1898,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	pr_debug("Process %d mapping mmio page\n"
+	pr_debug("pasid 0x%x mapping mmio page\n"
 		 "     target user address == 0x%08llX\n"
 		 "     physical address    == 0x%08llX\n"
 		 "     vm_flags            == 0x%04lX\n"
@@ -1898,20 +1919,19 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct kfd_process *process;
 	struct kfd_dev *dev = NULL;
-	unsigned long vm_pgoff;
+	unsigned long mmap_offset;
 	unsigned int gpu_id;
 
 	process = kfd_get_process(current);
 	if (IS_ERR(process))
 		return PTR_ERR(process);
 
-	vm_pgoff = vma->vm_pgoff;
-	vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
-	gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
+	mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
+	gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
 	if (gpu_id)
 		dev = kfd_device_by_id(gpu_id);
 
-	switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
+	switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
 	case KFD_MMAP_TYPE_DOORBELL:
 		if (!dev)
 			return -ENODEV;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 66387caf966e..de9f68d5c312 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -138,6 +138,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
 /* TODO - check & update Vega10 cache details */
 #define vega10_cache_info carrizo_cache_info
 #define raven_cache_info carrizo_cache_info
+#define renoir_cache_info carrizo_cache_info
 /* TODO - check & update Navi10 cache details */
 #define navi10_cache_info carrizo_cache_info
 
@@ -670,7 +671,13 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
 		pcache_info = raven_cache_info;
 		num_of_cache_types = ARRAY_SIZE(raven_cache_info);
 		break;
+	case CHIP_RENOIR:
+		pcache_info = renoir_cache_info;
+		num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
+		break;
 	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_NAVI14:
 		pcache_info = navi10_cache_info;
 		num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
 		break;
@@ -703,7 +710,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
 						pcache_info,
 						cu_info,
 						mem_available,
-						cu_info->cu_bitmap[i][j],
+						cu_info->cu_bitmap[i % 4][j + i / 4],
 						ct,
 						cu_processor_id,
 						k);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index a3441b0e385b..27bcc5b472f6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -72,11 +72,11 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 	 * The receive packet buff will be sitting on the Indirect Buffer
 	 * and in the PQ we put the IB packet + sync packet(s).
 	 */
-	status = kq->ops.acquire_packet_buffer(kq,
+	status = kq_acquire_packet_buffer(kq,
 				pq_packets_size_in_bytes / sizeof(uint32_t),
 				&ib_packet_buff);
 	if (status) {
-		pr_err("acquire_packet_buffer failed\n");
+		pr_err("kq_acquire_packet_buffer failed\n");
 		return status;
 	}
 
@@ -115,7 +115,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 
 	if (status) {
 		pr_err("Failed to allocate GART memory\n");
-		kq->ops.rollback_packet(kq);
+		kq_rollback_packet(kq);
 		return status;
 	}
 
@@ -151,7 +151,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 
 	rm_packet->data_lo = QUEUESTATE__ACTIVE;
 
-	kq->ops.submit_packet(kq);
+	kq_submit_packet(kq);
 
 	/* Wait till CP writes sync code: */
 	status = amdkfd_fence_wait_timeout(
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
 	properties.type = KFD_QUEUE_TYPE_DIQ;
 
 	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
-				&properties, &qid);
+				&properties, &qid, NULL);
 
 	if (status) {
 		pr_err("Failed to create DIQ\n");
@@ -761,6 +761,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 {
 	int status = 0;
 	unsigned int vmid;
+	uint16_t queried_pasid;
 	union SQ_CMD_BITS reg_sq_cmd;
 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
 	struct kfd_process_device *pdd;
@@ -782,19 +783,18 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 	 */
 
 	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
-		if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
-				(dev->kgd, vmid)) {
-			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
-					(dev->kgd, vmid) == p->pasid) {
-				pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
-						vmid, p->pasid);
-				break;
-			}
+		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
+				(dev->kgd, vmid, &queried_pasid);
+
+		if (status && queried_pasid == p->pasid) {
+			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
+					vmid, p->pasid);
+			break;
 		}
 	}
 
 	if (vmid > last_vmid_to_scan) {
-		pr_err("Didn't find vmid for pasid %d\n", p->pasid);
+		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
 		return -EFAULT;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
index 9d4af961c5d1..9bfa50633654 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -96,7 +96,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
 long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
 {
 	if (pmgr->pasid != 0) {
-		pr_debug("H/W debugger is already active using pasid %d\n",
+		pr_debug("H/W debugger is already active using pasid 0x%x\n",
 				pmgr->pasid);
 		return -EBUSY;
 	}
@@ -117,7 +117,7 @@ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
 {
 	/* Is the requests coming from the already registered process? */
 	if (pmgr->pasid != p->pasid) {
-		pr_debug("H/W debugger is not registered by calling pasid %d\n",
+		pr_debug("H/W debugger is not registered by calling pasid 0x%x\n",
 				p->pasid);
 		return -EINVAL;
 	}
@@ -134,7 +134,7 @@ long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
 {
 	/* Is the requests coming from the already registered process? */
 	if (pmgr->pasid != wac_info->process->pasid) {
-		pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+		pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
 				wac_info->process->pasid);
 		return -EINVAL;
 	}
@@ -147,7 +147,7 @@ long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
 {
 	/* Is the requests coming from the already registered process? */
 	if (pmgr->pasid != adw_info->process->pasid) {
-		pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+		pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
 				adw_info->process->pasid);
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 15c523027285..511712c2e382 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -93,7 +93,7 @@ void kfd_debugfs_init(void)
 			    kfd_debugfs_hqds_by_device, &kfd_debugfs_fops);
 	debugfs_create_file("rls", S_IFREG | 0444, debugfs_root,
 			    kfd_debugfs_rls_by_device, &kfd_debugfs_fops);
-	debugfs_create_file("hang_hws", S_IFREG | 0644, debugfs_root,
+	debugfs_create_file("hang_hws", S_IFREG | 0200, debugfs_root,
 			    NULL, &kfd_debugfs_hang_hws_fops);
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0dc1084b5e82..2a9e40131735 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -39,6 +39,41 @@
  */
 static atomic_t kfd_locked = ATOMIC_INIT(0);
 
+#ifdef CONFIG_DRM_AMDGPU_CIK
+extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
+#endif
+extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
+extern const struct kfd2kgd_calls arcturus_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
+
+static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
+#ifdef KFD_SUPPORT_IOMMU_V2
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	[CHIP_KAVERI] = &gfx_v7_kfd2kgd,
+#endif
+	[CHIP_CARRIZO] = &gfx_v8_kfd2kgd,
+	[CHIP_RAVEN] = &gfx_v9_kfd2kgd,
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	[CHIP_HAWAII] = &gfx_v7_kfd2kgd,
+#endif
+	[CHIP_TONGA] = &gfx_v8_kfd2kgd,
+	[CHIP_FIJI] = &gfx_v8_kfd2kgd,
+	[CHIP_POLARIS10] = &gfx_v8_kfd2kgd,
+	[CHIP_POLARIS11] = &gfx_v8_kfd2kgd,
+	[CHIP_POLARIS12] = &gfx_v8_kfd2kgd,
+	[CHIP_VEGAM] = &gfx_v8_kfd2kgd,
+	[CHIP_VEGA10] = &gfx_v9_kfd2kgd,
+	[CHIP_VEGA12] = &gfx_v9_kfd2kgd,
+	[CHIP_VEGA20] = &gfx_v9_kfd2kgd,
+	[CHIP_RENOIR] = &gfx_v9_kfd2kgd,
+	[CHIP_ARCTURUS] = &arcturus_kfd2kgd,
+	[CHIP_NAVI10] = &gfx_v10_kfd2kgd,
+	[CHIP_NAVI12] = &gfx_v10_kfd2kgd,
+	[CHIP_NAVI14] = &gfx_v10_kfd2kgd,
+};
+
 #ifdef KFD_SUPPORT_IOMMU_V2
 static const struct kfd_device_info kaveri_device_info = {
 	.asic_family = CHIP_KAVERI,
@@ -351,6 +386,24 @@ static const struct kfd_device_info arcturus_device_info = {
 	.num_sdma_queues_per_engine = 8,
 };
 
+static const struct kfd_device_info renoir_device_info = {
+	.asic_family = CHIP_RENOIR,
+	.asic_name = "renoir",
+	.max_pasid_bits = 16,
+	.max_no_of_hqd  = 24,
+	.doorbell_size  = 8,
+	.ih_ring_entry_size = 8 * sizeof(uint32_t),
+	.event_interrupt_class = &event_interrupt_class_v9,
+	.num_of_watch_points = 4,
+	.mqd_size_aligned = MQD_SIZE_ALIGNED,
+	.supports_cwsr = true,
+	.needs_iommu_device = false,
+	.needs_pci_atomics = false,
+	.num_sdma_engines = 1,
+	.num_xgmi_sdma_engines = 0,
+	.num_sdma_queues_per_engine = 2,
+};
+
 static const struct kfd_device_info navi10_device_info = {
 	.asic_family = CHIP_NAVI10,
 	.asic_name = "navi10",
@@ -369,133 +422,64 @@ static const struct kfd_device_info navi10_device_info = {
 	.num_sdma_queues_per_engine = 8,
 };
 
-struct kfd_deviceid {
-	unsigned short did;
-	const struct kfd_device_info *device_info;
+static const struct kfd_device_info navi12_device_info = {
+	.asic_family = CHIP_NAVI12,
+	.asic_name = "navi12",
+	.max_pasid_bits = 16,
+	.max_no_of_hqd  = 24,
+	.doorbell_size  = 8,
+	.ih_ring_entry_size = 8 * sizeof(uint32_t),
+	.event_interrupt_class = &event_interrupt_class_v9,
+	.num_of_watch_points = 4,
+	.mqd_size_aligned = MQD_SIZE_ALIGNED,
+	.needs_iommu_device = false,
+	.supports_cwsr = true,
+	.needs_pci_atomics = false,
+	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
+	.num_sdma_queues_per_engine = 8,
 };
 
-static const struct kfd_deviceid supported_devices[] = {
+static const struct kfd_device_info navi14_device_info = {
+	.asic_family = CHIP_NAVI14,
+	.asic_name = "navi14",
+	.max_pasid_bits = 16,
+	.max_no_of_hqd  = 24,
+	.doorbell_size  = 8,
+	.ih_ring_entry_size = 8 * sizeof(uint32_t),
+	.event_interrupt_class = &event_interrupt_class_v9,
+	.num_of_watch_points = 4,
+	.mqd_size_aligned = MQD_SIZE_ALIGNED,
+	.needs_iommu_device = false,
+	.supports_cwsr = true,
+	.needs_pci_atomics = false,
+	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
+	.num_sdma_queues_per_engine = 8,
+};
+
+/* For each entry, [0] is regular and [1] is virtualisation device. */
+static const struct kfd_device_info *kfd_supported_devices[][2] = {
 #ifdef KFD_SUPPORT_IOMMU_V2
-	{ 0x1304, &kaveri_device_info },	/* Kaveri */
-	{ 0x1305, &kaveri_device_info },	/* Kaveri */
-	{ 0x1306, &kaveri_device_info },	/* Kaveri */
-	{ 0x1307, &kaveri_device_info },	/* Kaveri */
-	{ 0x1309, &kaveri_device_info },	/* Kaveri */
-	{ 0x130A, &kaveri_device_info },	/* Kaveri */
-	{ 0x130B, &kaveri_device_info },	/* Kaveri */
-	{ 0x130C, &kaveri_device_info },	/* Kaveri */
-	{ 0x130D, &kaveri_device_info },	/* Kaveri */
-	{ 0x130E, &kaveri_device_info },	/* Kaveri */
-	{ 0x130F, &kaveri_device_info },	/* Kaveri */
-	{ 0x1310, &kaveri_device_info },	/* Kaveri */
-	{ 0x1311, &kaveri_device_info },	/* Kaveri */
-	{ 0x1312, &kaveri_device_info },	/* Kaveri */
-	{ 0x1313, &kaveri_device_info },	/* Kaveri */
-	{ 0x1315, &kaveri_device_info },	/* Kaveri */
-	{ 0x1316, &kaveri_device_info },	/* Kaveri */
-	{ 0x1317, &kaveri_device_info },	/* Kaveri */
-	{ 0x1318, &kaveri_device_info },	/* Kaveri */
-	{ 0x131B, &kaveri_device_info },	/* Kaveri */
-	{ 0x131C, &kaveri_device_info },	/* Kaveri */
-	{ 0x131D, &kaveri_device_info },	/* Kaveri */
-	{ 0x9870, &carrizo_device_info },	/* Carrizo */
-	{ 0x9874, &carrizo_device_info },	/* Carrizo */
-	{ 0x9875, &carrizo_device_info },	/* Carrizo */
-	{ 0x9876, &carrizo_device_info },	/* Carrizo */
-	{ 0x9877, &carrizo_device_info },	/* Carrizo */
-	{ 0x15DD, &raven_device_info },		/* Raven */
-	{ 0x15D8, &raven_device_info },		/* Raven */
+	[CHIP_KAVERI] = {&kaveri_device_info, NULL},
+	[CHIP_CARRIZO] = {&carrizo_device_info, NULL},
+	[CHIP_RAVEN] = {&raven_device_info, NULL},
 #endif
-	{ 0x67A0, &hawaii_device_info },	/* Hawaii */
-	{ 0x67A1, &hawaii_device_info },	/* Hawaii */
-	{ 0x67A2, &hawaii_device_info },	/* Hawaii */
-	{ 0x67A8, &hawaii_device_info },	/* Hawaii */
-	{ 0x67A9, &hawaii_device_info },	/* Hawaii */
-	{ 0x67AA, &hawaii_device_info },	/* Hawaii */
-	{ 0x67B0, &hawaii_device_info },	/* Hawaii */
-	{ 0x67B1, &hawaii_device_info },	/* Hawaii */
-	{ 0x67B8, &hawaii_device_info },	/* Hawaii */
-	{ 0x67B9, &hawaii_device_info },	/* Hawaii */
-	{ 0x67BA, &hawaii_device_info },	/* Hawaii */
-	{ 0x67BE, &hawaii_device_info },	/* Hawaii */
-	{ 0x6920, &tonga_device_info },		/* Tonga */
-	{ 0x6921, &tonga_device_info },		/* Tonga */
-	{ 0x6928, &tonga_device_info },		/* Tonga */
-	{ 0x6929, &tonga_device_info },		/* Tonga */
-	{ 0x692B, &tonga_device_info },		/* Tonga */
-	{ 0x6938, &tonga_device_info },		/* Tonga */
-	{ 0x6939, &tonga_device_info },		/* Tonga */
-	{ 0x7300, &fiji_device_info },		/* Fiji */
-	{ 0x730F, &fiji_vf_device_info },	/* Fiji vf*/
-	{ 0x67C0, &polaris10_device_info },	/* Polaris10 */
-	{ 0x67C1, &polaris10_device_info },	/* Polaris10 */
-	{ 0x67C2, &polaris10_device_info },	/* Polaris10 */
-	{ 0x67C4, &polaris10_device_info },	/* Polaris10 */
-	{ 0x67C7, &polaris10_device_info },	/* Polaris10 */
-	{ 0x67C8, &polaris10_device_info },	/* Polaris10 */
-	{ 0x67C9, &polaris10_device_info },	/* Polaris10 */
-	{ 0x67CA, &polaris10_device_info },	/* Polaris10 */
-	{ 0x67CC, &polaris10_device_info },	/* Polaris10 */
-	{ 0x67CF, &polaris10_device_info },	/* Polaris10 */
-	{ 0x67D0, &polaris10_vf_device_info },	/* Polaris10 vf*/
-	{ 0x67DF, &polaris10_device_info },	/* Polaris10 */
-	{ 0x6FDF, &polaris10_device_info },	/* Polaris10 */
-	{ 0x67E0, &polaris11_device_info },	/* Polaris11 */
-	{ 0x67E1, &polaris11_device_info },	/* Polaris11 */
-	{ 0x67E3, &polaris11_device_info },	/* Polaris11 */
-	{ 0x67E7, &polaris11_device_info },	/* Polaris11 */
-	{ 0x67E8, &polaris11_device_info },	/* Polaris11 */
-	{ 0x67E9, &polaris11_device_info },	/* Polaris11 */
-	{ 0x67EB, &polaris11_device_info },	/* Polaris11 */
-	{ 0x67EF, &polaris11_device_info },	/* Polaris11 */
-	{ 0x67FF, &polaris11_device_info },	/* Polaris11 */
-	{ 0x6980, &polaris12_device_info },	/* Polaris12 */
-	{ 0x6981, &polaris12_device_info },	/* Polaris12 */
-	{ 0x6985, &polaris12_device_info },	/* Polaris12 */
-	{ 0x6986, &polaris12_device_info },	/* Polaris12 */
-	{ 0x6987, &polaris12_device_info },	/* Polaris12 */
-	{ 0x6995, &polaris12_device_info },	/* Polaris12 */
-	{ 0x6997, &polaris12_device_info },	/* Polaris12 */
-	{ 0x699F, &polaris12_device_info },	/* Polaris12 */
-	{ 0x694C, &vegam_device_info },		/* VegaM */
-	{ 0x694E, &vegam_device_info },		/* VegaM */
-	{ 0x694F, &vegam_device_info },		/* VegaM */
-	{ 0x6860, &vega10_device_info },	/* Vega10 */
-	{ 0x6861, &vega10_device_info },	/* Vega10 */
-	{ 0x6862, &vega10_device_info },	/* Vega10 */
-	{ 0x6863, &vega10_device_info },	/* Vega10 */
-	{ 0x6864, &vega10_device_info },	/* Vega10 */
-	{ 0x6867, &vega10_device_info },	/* Vega10 */
-	{ 0x6868, &vega10_device_info },	/* Vega10 */
-	{ 0x6869, &vega10_device_info },	/* Vega10 */
-	{ 0x686A, &vega10_device_info },	/* Vega10 */
-	{ 0x686B, &vega10_device_info },	/* Vega10 */
-	{ 0x686C, &vega10_vf_device_info },	/* Vega10  vf*/
-	{ 0x686D, &vega10_device_info },	/* Vega10 */
-	{ 0x686E, &vega10_device_info },	/* Vega10 */
-	{ 0x686F, &vega10_device_info },	/* Vega10 */
-	{ 0x687F, &vega10_device_info },	/* Vega10 */
-	{ 0x69A0, &vega12_device_info },	/* Vega12 */
-	{ 0x69A1, &vega12_device_info },	/* Vega12 */
-	{ 0x69A2, &vega12_device_info },	/* Vega12 */
-	{ 0x69A3, &vega12_device_info },	/* Vega12 */
-	{ 0x69AF, &vega12_device_info },	/* Vega12 */
-	{ 0x66a0, &vega20_device_info },	/* Vega20 */
-	{ 0x66a1, &vega20_device_info },	/* Vega20 */
-	{ 0x66a2, &vega20_device_info },	/* Vega20 */
-	{ 0x66a3, &vega20_device_info },	/* Vega20 */
-	{ 0x66a4, &vega20_device_info },	/* Vega20 */
-	{ 0x66a7, &vega20_device_info },	/* Vega20 */
-	{ 0x66af, &vega20_device_info },	/* Vega20 */
-	{ 0x738C, &arcturus_device_info },	/* Arcturus */
-	{ 0x7388, &arcturus_device_info },	/* Arcturus */
-	{ 0x738E, &arcturus_device_info },	/* Arcturus */
-	{ 0x7390, &arcturus_device_info },	/* Arcturus vf */
-	{ 0x7310, &navi10_device_info },	/* Navi10 */
-	{ 0x7312, &navi10_device_info },	/* Navi10 */
-	{ 0x7318, &navi10_device_info },	/* Navi10 */
-	{ 0x731a, &navi10_device_info },	/* Navi10 */
-	{ 0x731f, &navi10_device_info },	/* Navi10 */
+	[CHIP_HAWAII] = {&hawaii_device_info, NULL},
+	[CHIP_TONGA] = {&tonga_device_info, NULL},
+	[CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info},
+	[CHIP_POLARIS10] = {&polaris10_device_info, &polaris10_vf_device_info},
+	[CHIP_POLARIS11] = {&polaris11_device_info, NULL},
+	[CHIP_POLARIS12] = {&polaris12_device_info, NULL},
+	[CHIP_VEGAM] = {&vegam_device_info, NULL},
+	[CHIP_VEGA10] = {&vega10_device_info, &vega10_vf_device_info},
+	[CHIP_VEGA12] = {&vega12_device_info, NULL},
+	[CHIP_VEGA20] = {&vega20_device_info, NULL},
+	[CHIP_RENOIR] = {&renoir_device_info, NULL},
+	[CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
+	[CHIP_NAVI10] = {&navi10_device_info, NULL},
+	[CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
+	[CHIP_NAVI14] = {&navi14_device_info, NULL},
 };
 
 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -504,32 +488,25 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
 
 static int kfd_resume(struct kfd_dev *kfd);
 
-static const struct kfd_device_info *lookup_device_info(unsigned short did)
+struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
+	struct pci_dev *pdev, unsigned int asic_type, bool vf)
 {
-	size_t i;
+	struct kfd_dev *kfd;
+	const struct kfd_device_info *device_info;
+	const struct kfd2kgd_calls *f2g;
 
-	for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
-		if (supported_devices[i].did == did) {
-			WARN_ON(!supported_devices[i].device_info);
-			return supported_devices[i].device_info;
-		}
+	if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2)
+		|| asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
+		dev_err(kfd_device, "asic_type %d out of range\n", asic_type);
+		return NULL; /* asic_type out of range */
 	}
 
-	dev_warn(kfd_device, "DID %04x is missing in supported_devices\n",
-		 did);
+	device_info = kfd_supported_devices[asic_type][vf];
+	f2g = kfd2kgd_funcs[asic_type];
 
-	return NULL;
-}
-
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
-	struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
-{
-	struct kfd_dev *kfd;
-	const struct kfd_device_info *device_info =
-					lookup_device_info(pdev->device);
-
-	if (!device_info) {
-		dev_err(kfd_device, "kgd2kfd_probe failed\n");
+	if (!device_info || !f2g) {
+		dev_err(kfd_device, "%s %s not supported in kfd\n",
+			amdgpu_asic_name[asic_type], vf ? "VF" : "");
 		return NULL;
 	}
 
@@ -593,10 +570,12 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
 }
 
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
+			 struct drm_device *ddev,
 			 const struct kgd2kfd_shared_resources *gpu_resources)
 {
 	unsigned int size;
 
+	kfd->ddev = ddev;
 	kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 			KGD_ENGINE_MEC1);
 	kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
@@ -749,10 +728,10 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
 {
 	if (!kfd->init_complete)
 		return 0;
-	kgd2kfd_suspend(kfd);
 
-	/* hold dqm->lock to prevent further execution*/
-	dqm_lock(kfd->dqm);
+	kfd->dqm->ops.pre_reset(kfd->dqm);
+
+	kgd2kfd_suspend(kfd);
 
 	kfd_signal_reset_event(kfd);
 	return 0;
@@ -766,17 +745,15 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
 
 int kgd2kfd_post_reset(struct kfd_dev *kfd)
 {
-	int ret, count;
+	int ret;
 
 	if (!kfd->init_complete)
 		return 0;
 
-	dqm_unlock(kfd->dqm);
-
 	ret = kfd_resume(kfd);
 	if (ret)
 		return ret;
-	count = atomic_dec_return(&kfd_locked);
+	atomic_dec(&kfd_locked);
 
 	atomic_set(&kfd->sram_ecc_flag, 0);
 
@@ -848,6 +825,21 @@ dqm_start_error:
 	return err;
 }
 
+static inline void kfd_queue_work(struct workqueue_struct *wq,
+				  struct work_struct *work)
+{
+	int cpu, new_cpu;
+
+	cpu = new_cpu = smp_processor_id();
+	do {
+		new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
+		if (cpu_to_node(new_cpu) == numa_node_id())
+			break;
+	} while (cpu != new_cpu);
+
+	queue_work_on(new_cpu, wq, work);
+}
+
 /* This is called directly from KGD at ISR. */
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 {
@@ -870,7 +862,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 				   patched_ihre, &is_patched)
 	    && enqueue_ih_ring_entry(kfd,
 				     is_patched ? patched_ihre : ih_ring_entry))
-		queue_work(kfd->ih_wq, &kfd->interrupt_work);
+		kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
 
 	spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d985e31fcc1e..2870553a2ce0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -170,7 +170,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 	}
 
 	q->properties.doorbell_off =
-		kfd_doorbell_id_to_offset(dev, q->process,
+		kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
 					  q->doorbell_id);
 
 	return 0;
@@ -195,20 +195,30 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 			struct qcm_process_device *qpd,
 			struct queue *q)
 {
-	int bit, allocated_vmid;
+	int allocated_vmid = -1, i;
 
-	if (dqm->vmid_bitmap == 0)
-		return -ENOMEM;
+	for (i = dqm->dev->vm_info.first_vmid_kfd;
+			i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
+		if (!dqm->vmid_pasid[i]) {
+			allocated_vmid = i;
+			break;
+		}
+	}
+
+	if (allocated_vmid < 0) {
+		pr_err("no more vmid to allocate\n");
+		return -ENOSPC;
+	}
+
+	pr_debug("vmid allocated: %d\n", allocated_vmid);
 
-	bit = ffs(dqm->vmid_bitmap) - 1;
-	dqm->vmid_bitmap &= ~(1 << bit);
+	dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
+
+	set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
 
-	allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
-	pr_debug("vmid allocation %d\n", allocated_vmid);
 	qpd->vmid = allocated_vmid;
 	q->properties.vmid = allocated_vmid;
 
-	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
 	program_sh_mem_settings(dqm, qpd);
 
 	/* qpd->page_table_base is set earlier when register_process()
@@ -220,8 +230,9 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 	/* invalidate the VM context after pasid and vmid mapping is set up */
 	kfd_flush_tlb(qpd_to_pdd(qpd));
 
-	dqm->dev->kfd2kgd->set_scratch_backing_va(
-		dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
+	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
+		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
+				qpd->sh_hidden_private_base, qpd->vmid);
 
 	return 0;
 }
@@ -248,8 +259,6 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 				struct qcm_process_device *qpd,
 				struct queue *q)
 {
-	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
-
 	/* On GFX v7, CP doesn't flush TC at dequeue */
 	if (q->device->device_info->asic_family == CHIP_HAWAII)
 		if (flush_texture_cache_nocpsch(q->device, qpd))
@@ -259,8 +268,8 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 
 	/* Release the vmid mapping */
 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
+	dqm->vmid_pasid[qpd->vmid] = 0;
 
-	dqm->vmid_bitmap |= (1 << bit);
 	qpd->vmid = 0;
 	q->properties.vmid = 0;
 }
@@ -331,6 +340,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 	mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
 				&q->gart_mqd_addr, &q->properties);
 	if (q->properties.is_active) {
+		if (!dqm->sched_running) {
+			WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
+			goto add_queue_to_list;
+		}
 
 		if (WARN(q->process->mm != current->mm,
 					"should only run in user thread"))
@@ -342,6 +355,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 			goto out_free_mqd;
 	}
 
+add_queue_to_list:
 	list_add(&q->list, &qpd->queues_list);
 	qpd->queue_count++;
 	if (q->properties.is_active)
@@ -449,6 +463,11 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
 
 	deallocate_doorbell(qpd, q);
 
+	if (!dqm->sched_running) {
+		WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
+		return 0;
+	}
+
 	retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
 				KFD_UNMAP_LATENCY_MS,
@@ -524,6 +543,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
 		    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
 		    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+
+		if (!dqm->sched_running) {
+			WARN_ONCE(1, "Update non-HWS queue while stopped\n");
+			goto out_unlock;
+		}
+
 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
 				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
@@ -579,7 +604,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
 		goto out;
 
 	pdd = qpd_to_pdd(qpd);
-	pr_info_ratelimited("Evicting PASID %u queues\n",
+	pr_info_ratelimited("Evicting PASID 0x%x queues\n",
 			    pdd->process->pasid);
 
 	/* Mark all queues as evicted. Deactivate all active queues on
@@ -593,6 +618,11 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
 				q->properties.type)];
 		q->properties.is_active = false;
+		dqm->queue_count--;
+
+		if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
+			continue;
+
 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
 				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
@@ -601,7 +631,6 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
 			 * maintain a consistent eviction state
 			 */
 			ret = retval;
-		dqm->queue_count--;
 	}
 
 out:
@@ -621,7 +650,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
 		goto out;
 
 	pdd = qpd_to_pdd(qpd);
-	pr_info_ratelimited("Evicting PASID %u queues\n",
+	pr_info_ratelimited("Evicting PASID 0x%x queues\n",
 			    pdd->process->pasid);
 
 	/* Mark all queues as evicted. Deactivate all active queues on
@@ -667,7 +696,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 		goto out;
 	}
 
-	pr_info_ratelimited("Restoring PASID %u queues\n",
+	pr_info_ratelimited("Restoring PASID 0x%x queues\n",
 			    pdd->process->pasid);
 
 	/* Update PD Base in QPD */
@@ -702,6 +731,11 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
 				q->properties.type)];
 		q->properties.is_active = true;
+		dqm->queue_count++;
+
+		if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
+			continue;
+
 		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
 				       q->queue, &q->properties, mm);
 		if (retval && !ret)
@@ -709,7 +743,6 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 			 * maintain a consistent eviction state
 			 */
 			ret = retval;
-		dqm->queue_count++;
 	}
 	qpd->evicted = 0;
 out:
@@ -739,7 +772,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
 		goto out;
 	}
 
-	pr_info_ratelimited("Restoring PASID %u queues\n",
+	pr_info_ratelimited("Restoring PASID 0x%x queues\n",
 			    pdd->process->pasid);
 
 	/* Update PD Base in QPD */
@@ -879,7 +912,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
 				dqm->allocated_queues[pipe] |= 1 << queue;
 	}
 
-	dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
+	memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
+
 	dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
 	dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
 
@@ -896,21 +930,36 @@ static void uninitialize(struct device_queue_manager *dqm)
 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
 		kfree(dqm->mqd_mgrs[i]);
 	mutex_destroy(&dqm->lock_hidden);
-	kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
 }
 
 static int start_nocpsch(struct device_queue_manager *dqm)
 {
+	pr_info("SW scheduler is used");
 	init_interrupts(dqm);
-	return pm_init(&dqm->packets, dqm);
+	
+	if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
+		return pm_init(&dqm->packets, dqm);
+	dqm->sched_running = true;
+
+	return 0;
 }
 
 static int stop_nocpsch(struct device_queue_manager *dqm)
 {
-	pm_uninit(&dqm->packets);
+	if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
+		pm_uninit(&dqm->packets, false);
+	dqm->sched_running = false;
+
 	return 0;
 }
 
+static void pre_reset(struct device_queue_manager *dqm)
+{
+	dqm_lock(dqm);
+	dqm->is_resetting = true;
+	dqm_unlock(dqm);
+}
+
 static int allocate_sdma_queue(struct device_queue_manager *dqm,
 				struct queue *q)
 {
@@ -1058,25 +1107,32 @@ static int start_cpsch(struct device_queue_manager *dqm)
 	dqm_lock(dqm);
 	/* clear hang status when driver try to start the hw scheduler */
 	dqm->is_hws_hang = false;
+	dqm->is_resetting = false;
+	dqm->sched_running = true;
 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
 	dqm_unlock(dqm);
 
 	return 0;
 fail_allocate_vidmem:
 fail_set_sched_resources:
-	pm_uninit(&dqm->packets);
+	pm_uninit(&dqm->packets, false);
 fail_packet_manager_init:
 	return retval;
 }
 
 static int stop_cpsch(struct device_queue_manager *dqm)
 {
+	bool hanging;
+
 	dqm_lock(dqm);
-	unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+	if (!dqm->is_hws_hang)
+		unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+	hanging = dqm->is_hws_hang || dqm->is_resetting;
+	dqm->sched_running = false;
 	dqm_unlock(dqm);
 
 	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
-	pm_uninit(&dqm->packets);
+	pm_uninit(&dqm->packets, hanging);
 
 	return 0;
 }
@@ -1259,9 +1315,10 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
 {
 	int retval;
 
+	if (!dqm->sched_running)
+		return 0;
 	if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
 		return 0;
-
 	if (dqm->active_runlist)
 		return 0;
 
@@ -1283,6 +1340,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
 {
 	int retval = 0;
 
+	if (!dqm->sched_running)
+		return 0;
 	if (dqm->is_hws_hang)
 		return -EIO;
 	if (!dqm->active_runlist)
@@ -1305,8 +1364,17 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
 	/* should be timed out */
 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
 				queue_preemption_timeout_ms);
-	if (retval)
+	if (retval) {
+		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
+		dqm->is_hws_hang = true;
+		/* It's possible we're detecting a HWS hang in the
+		 * middle of a GPU reset. No need to schedule another
+		 * reset in this case.
+		 */
+		if (!dqm->is_resetting)
+			schedule_work(&dqm->hw_exception_work);
 		return retval;
+	}
 
 	pm_release_ib(&dqm->packets);
 	dqm->active_runlist = false;
@@ -1324,12 +1392,8 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
 	if (dqm->is_hws_hang)
 		return -EIO;
 	retval = unmap_queues_cpsch(dqm, filter, filter_param);
-	if (retval) {
-		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
-		dqm->is_hws_hang = true;
-		schedule_work(&dqm->hw_exception_work);
+	if (retval)
 		return retval;
-	}
 
 	return map_queues_cpsch(dqm);
 }
@@ -1548,7 +1612,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
 		goto dqm_unlock;
 	}
 
-	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
+	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
 
 	if (!mqd_mgr->get_wave_state) {
 		r = -EINVAL;
@@ -1676,7 +1740,8 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
 	struct kfd_dev *dev = dqm->dev;
 	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
 	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
-		dev->device_info->num_sdma_engines *
+		(dev->device_info->num_sdma_engines +
+		dev->device_info->num_xgmi_sdma_engines) *
 		dev->device_info->num_sdma_queues_per_engine +
 		dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
 
@@ -1722,6 +1787,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		dqm->ops.initialize = initialize_cpsch;
 		dqm->ops.start = start_cpsch;
 		dqm->ops.stop = stop_cpsch;
+		dqm->ops.pre_reset = pre_reset;
 		dqm->ops.destroy_queue = destroy_queue_cpsch;
 		dqm->ops.update_queue = update_queue;
 		dqm->ops.register_process = register_process;
@@ -1740,6 +1806,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		/* initialize dqm for no cp scheduling */
 		dqm->ops.start = start_nocpsch;
 		dqm->ops.stop = stop_nocpsch;
+		dqm->ops.pre_reset = pre_reset;
 		dqm->ops.create_queue = create_queue_nocpsch;
 		dqm->ops.destroy_queue = destroy_queue_nocpsch;
 		dqm->ops.update_queue = update_queue;
@@ -1786,10 +1853,13 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
 	case CHIP_RAVEN:
+	case CHIP_RENOIR:
 	case CHIP_ARCTURUS:
 		device_queue_manager_init_v9(&dqm->asic_ops);
 		break;
 	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_NAVI14:
 		device_queue_manager_init_v10_navi10(&dqm->asic_ops);
 		break;
 	default:
@@ -1883,6 +1953,12 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
 	int pipe, queue;
 	int r = 0;
 
+	if (!dqm->sched_running) {
+		seq_printf(m, " Device is stopped\n");
+
+		return 0;
+	}
+
 	r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
 					KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
 					&dump, &n_regs);
@@ -1917,7 +1993,8 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
 		}
 	}
 
-	for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) {
+	for (pipe = 0; pipe < get_num_sdma_engines(dqm) +
+			get_num_xgmi_sdma_engines(dqm); pipe++) {
 		for (queue = 0;
 		     queue < dqm->dev->device_info->num_sdma_queues_per_engine;
 		     queue++) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 90db2c9275f6..871d3b628d2d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -32,6 +32,8 @@
 #include "kfd_mqd_manager.h"
 
 
+#define VMID_NUM 16
+
 struct device_process_node {
 	struct qcm_process_device *qpd;
 	struct list_head list;
@@ -102,6 +104,7 @@ struct device_queue_manager_ops {
 	int	(*initialize)(struct device_queue_manager *dqm);
 	int	(*start)(struct device_queue_manager *dqm);
 	int	(*stop)(struct device_queue_manager *dqm);
+	void	(*pre_reset)(struct device_queue_manager *dqm);
 	void	(*uninitialize)(struct device_queue_manager *dqm);
 	int	(*create_kernel_queue)(struct device_queue_manager *dqm,
 					struct kernel_queue *kq,
@@ -185,9 +188,9 @@ struct device_queue_manager {
 	unsigned int		*allocated_queues;
 	uint64_t		sdma_bitmap;
 	uint64_t		xgmi_sdma_bitmap;
-	unsigned int		vmid_bitmap;
+	/* the pasid mapping for each kfd vmid */
+	uint16_t		vmid_pasid[VMID_NUM];
 	uint64_t		pipelines_addr;
-	struct kfd_mem_obj	*pipeline_mem;
 	uint64_t		fence_gpu_addr;
 	unsigned int		*fence_addr;
 	struct kfd_mem_obj	*fence_mem;
@@ -196,8 +199,10 @@ struct device_queue_manager {
 
 	/* hw exception  */
 	bool			is_hws_hang;
+	bool			is_resetting;
 	struct work_struct	hw_exception_work;
 	struct kfd_mem_obj	hiq_sdma_mqd;
+	bool			sched_running;
 };
 
 void device_queue_manager_init_cik(
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index ebe79bf00145..8e0c00b9555e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -91,7 +91,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
 	kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
 				doorbell_start_offset;
 
-	kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
+	kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
 
 	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
 					   kfd_doorbell_process_slice(kfd));
@@ -103,8 +103,8 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
 	pr_debug("doorbell base           == 0x%08lX\n",
 			(uintptr_t)kfd->doorbell_base);
 
-	pr_debug("doorbell_id_offset      == 0x%08lX\n",
-			kfd->doorbell_id_offset);
+	pr_debug("doorbell_base_dw_offset      == 0x%08lX\n",
+			kfd->doorbell_base_dw_offset);
 
 	pr_debug("doorbell_process_limit  == 0x%08lX\n",
 			doorbell_process_limit);
@@ -185,7 +185,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 	 * Calculating the kernel doorbell offset using the first
 	 * doorbell page.
 	 */
-	*doorbell_off = kfd->doorbell_id_offset + inx;
+	*doorbell_off = kfd->doorbell_base_dw_offset + inx;
 
 	pr_debug("Get kernel queue doorbell\n"
 			"     doorbell offset   == 0x%08X\n"
@@ -225,17 +225,17 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
 	}
 }
 
-unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
+unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
 					struct kfd_process *process,
 					unsigned int doorbell_id)
 {
 	/*
-	 * doorbell_id_offset accounts for doorbells taken by KGD.
+	 * doorbell_base_dw_offset accounts for doorbells taken by KGD.
 	 * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
 	 * the process's doorbells. The offset returned is in dword
 	 * units regardless of the ASIC-dependent doorbell size.
 	 */
-	return kfd->doorbell_id_offset +
+	return kfd->doorbell_base_dw_offset +
 		process->doorbell_index
 		* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
 		doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index d674d4b3340f..1f8365575b12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 		ret = create_signal_event(devkfd, p, ev);
 		if (!ret) {
 			*event_page_offset = KFD_MMAP_TYPE_EVENTS;
-			*event_page_offset <<= PAGE_SHIFT;
 			*event_slot_index = ev->event_id;
 		}
 		break;
@@ -852,8 +851,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
 
 	if (type == KFD_EVENT_TYPE_MEMORY) {
 		dev_warn(kfd_device,
-			"Sending SIGSEGV to HSA Process with PID %d ",
-				p->lead_thread->pid);
+			"Sending SIGSEGV to process %d (pasid 0x%x)",
+				p->lead_thread->pid, p->pasid);
 		send_sig(SIGSEGV, p->lead_thread, 0);
 	}
 
@@ -861,13 +860,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
 	if (send_signal) {
 		if (send_sigterm) {
 			dev_warn(kfd_device,
-				"Sending SIGTERM to HSA Process with PID %d ",
-					p->lead_thread->pid);
+				"Sending SIGTERM to process %d (pasid 0x%x)",
+					p->lead_thread->pid, p->pasid);
 			send_sig(SIGTERM, p->lead_thread, 0);
 		} else {
 			dev_err(kfd_device,
-				"HSA Process (PID %d) got unhandled exception",
-				p->lead_thread->pid);
+				"Process %d (pasid 0x%x) got unhandled exception",
+				p->lead_thread->pid, p->pasid);
 		}
 	}
 }
@@ -936,7 +935,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
 	/* Workaround on Raven to not kill the process when memory is freed
 	 * before IOMMU is able to finish processing all the excessive PPRs
 	 */
-	if (dev->device_info->asic_family != CHIP_RAVEN) {
+	if (dev->device_info->asic_family != CHIP_RAVEN &&
+	    dev->device_info->asic_family != CHIP_RENOIR) {
 		mutex_lock(&p->event_mutex);
 
 		/* Lookup events by type and signal them */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 9dc4bff8085e..bb77b8890e77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -369,8 +369,13 @@ int kfd_init_apertures(struct kfd_process *process)
 
 	/*Iterating over all devices*/
 	while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
-		if (!dev) {
-			id++; /* Skip non GPU devices */
+		if (!dev || kfd_devcgroup_check_permission(dev)) {
+			/* Skip non GPU devices and devices to which the
+			 * current process have no access to. Access can be
+			 * limited by placing the process in a specific
+			 * cgroup hierarchy
+			 */
+			id++;
 			continue;
 		}
 
@@ -405,8 +410,11 @@ int kfd_init_apertures(struct kfd_process *process)
 			case CHIP_VEGA12:
 			case CHIP_VEGA20:
 			case CHIP_RAVEN:
+			case CHIP_RENOIR:
 			case CHIP_ARCTURUS:
 			case CHIP_NAVI10:
+			case CHIP_NAVI12:
+			case CHIP_NAVI14:
 				kfd_init_apertures_v9(pdd, id);
 				break;
 			default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 3ef67d2e0d9f..e05d75ecda21 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -54,8 +54,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
 		memcpy(patched_ihre, ih_ring_entry,
 				dev->device_info->ih_ring_entry_size);
 
-		pasid = dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid(
-				dev->kgd, vmid);
+		pasid = dev->dqm->vmid_pasid[vmid];
 
 		/* Patch the pasid field */
 		patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3])
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index c56ac47cd318..bc47f6a44456 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -62,6 +62,11 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
 	}
 
 	kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
+	if (unlikely(!kfd->ih_wq)) {
+		kfifo_free(&kfd->ih_fifo);
+		dev_err(kfd_chardev(), "Failed to allocate KFD IH workqueue\n");
+		return -ENOMEM;
+	}
 	spin_lock_init(&kfd->interrupt_lock);
 
 	INIT_WORK(&kfd->interrupt_work, interrupt_wq);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 5f35df23fb18..8d871514671e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -62,9 +62,6 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
 	struct amd_iommu_device_info iommu_info;
 	unsigned int pasid_limit;
 	int err;
-	struct kfd_topology_device *top_dev;
-
-	top_dev = kfd_topology_device_by_id(kfd->id);
 
 	if (!kfd->device_info->needs_iommu_device)
 		return 0;
@@ -160,7 +157,7 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
 	if (!p)
 		return;
 
-	pr_debug("Unbinding process %d from IOMMU\n", pasid);
+	pr_debug("Unbinding process 0x%x from IOMMU\n", pasid);
 
 	mutex_lock(kfd_get_dbgmgr_mutex());
 
@@ -194,7 +191,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
 	struct kfd_dev *dev;
 
 	dev_warn_ratelimited(kfd_device,
-			"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
+			"Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X",
 			PCI_BUS_NUM(pdev->devfn),
 			PCI_SLOT(pdev->devfn),
 			PCI_FUNC(pdev->devfn),
@@ -235,7 +232,7 @@ static int kfd_bind_processes_to_device(struct kfd_dev *kfd)
 		err = amd_iommu_bind_pasid(kfd->pdev, p->pasid,
 				p->lead_thread);
 		if (err < 0) {
-			pr_err("Unexpected pasid %d binding failure\n",
+			pr_err("Unexpected pasid 0x%x binding failure\n",
 					p->pasid);
 			mutex_unlock(&p->mutex);
 			break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 8b4564f71a7a..bae706462f96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -34,7 +34,10 @@
 
 #define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
 
-static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
+/* Initialize a kernel queue, including allocations of GART memory
+ * needed for the queue.
+ */
+static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
 		enum kfd_queue_type type, unsigned int queue_size)
 {
 	struct queue_properties prop;
@@ -87,9 +90,17 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
 	kq->pq_kernel_addr = kq->pq->cpu_ptr;
 	kq->pq_gpu_addr = kq->pq->gpu_addr;
 
-	retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
-	if (!retval)
-		goto err_eop_allocate_vidmem;
+	/* For CIK family asics, kq->eop_mem is not needed */
+	if (dev->device_info->asic_family > CHIP_MULLINS) {
+		retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
+		if (retval != 0)
+			goto err_eop_allocate_vidmem;
+
+		kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
+		kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
+
+		memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
+	}
 
 	retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
 					&kq->rptr_mem);
@@ -183,9 +194,10 @@ err_get_kernel_doorbell:
 
 }
 
-static void uninitialize(struct kernel_queue *kq)
+/* Uninitialize a kernel queue and free all its memory usages. */
+static void kq_uninitialize(struct kernel_queue *kq, bool hanging)
 {
-	if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
+	if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && !hanging)
 		kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
 					kq->queue->mqd,
 					KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
@@ -200,14 +212,19 @@ static void uninitialize(struct kernel_queue *kq)
 
 	kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
 	kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
-	kq->ops_asic_specific.uninitialize(kq);
+
+	/* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free()
+	 * is able to handle NULL properly.
+	 */
+	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
+
 	kfd_gtt_sa_free(kq->dev, kq->pq);
 	kfd_release_kernel_doorbell(kq->dev,
 					kq->queue->properties.doorbell_ptr);
 	uninit_queue(kq->queue);
 }
 
-static int acquire_packet_buffer(struct kernel_queue *kq,
+int kq_acquire_packet_buffer(struct kernel_queue *kq,
 		size_t packet_size_in_dwords, unsigned int **buffer_ptr)
 {
 	size_t available_size;
@@ -268,7 +285,7 @@ err_no_space:
 	return -ENOMEM;
 }
 
-static void submit_packet(struct kernel_queue *kq)
+void kq_submit_packet(struct kernel_queue *kq)
 {
 #ifdef DEBUG
 	int i;
@@ -280,11 +297,18 @@ static void submit_packet(struct kernel_queue *kq)
 	}
 	pr_debug("\n");
 #endif
-
-	kq->ops_asic_specific.submit_packet(kq);
+	if (kq->dev->device_info->doorbell_size == 8) {
+		*kq->wptr64_kernel = kq->pending_wptr64;
+		write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
+					kq->pending_wptr64);
+	} else {
+		*kq->wptr_kernel = kq->pending_wptr;
+		write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+					kq->pending_wptr);
+	}
 }
 
-static void rollback_packet(struct kernel_queue *kq)
+void kq_rollback_packet(struct kernel_queue *kq)
 {
 	if (kq->dev->device_info->doorbell_size == 8) {
 		kq->pending_wptr64 = *kq->wptr64_kernel;
@@ -304,57 +328,18 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 	if (!kq)
 		return NULL;
 
-	kq->ops.initialize = initialize;
-	kq->ops.uninitialize = uninitialize;
-	kq->ops.acquire_packet_buffer = acquire_packet_buffer;
-	kq->ops.submit_packet = submit_packet;
-	kq->ops.rollback_packet = rollback_packet;
-
-	switch (dev->device_info->asic_family) {
-	case CHIP_CARRIZO:
-	case CHIP_TONGA:
-	case CHIP_FIJI:
-	case CHIP_POLARIS10:
-	case CHIP_POLARIS11:
-	case CHIP_POLARIS12:
-	case CHIP_VEGAM:
-		kernel_queue_init_vi(&kq->ops_asic_specific);
-		break;
-
-	case CHIP_KAVERI:
-	case CHIP_HAWAII:
-		kernel_queue_init_cik(&kq->ops_asic_specific);
-		break;
-
-	case CHIP_VEGA10:
-	case CHIP_VEGA12:
-	case CHIP_VEGA20:
-	case CHIP_RAVEN:
-	case CHIP_ARCTURUS:
-		kernel_queue_init_v9(&kq->ops_asic_specific);
-		break;
-	case CHIP_NAVI10:
-		kernel_queue_init_v10(&kq->ops_asic_specific);
-		break;
-	default:
-		WARN(1, "Unexpected ASIC family %u",
-		     dev->device_info->asic_family);
-		goto out_free;
-	}
-
-	if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
+	if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
 		return kq;
 
 	pr_err("Failed to init kernel queue\n");
 
-out_free:
 	kfree(kq);
 	return NULL;
 }
 
-void kernel_queue_uninit(struct kernel_queue *kq)
+void kernel_queue_uninit(struct kernel_queue *kq, bool hanging)
 {
-	kq->ops.uninitialize(kq);
+	kq_uninitialize(kq, hanging);
 	kfree(kq);
 }
 
@@ -374,7 +359,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
 		return;
 	}
 
-	retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
+	retval = kq_acquire_packet_buffer(kq, 5, &buffer);
 	if (unlikely(retval != 0)) {
 		pr_err("  Failed to acquire packet buffer\n");
 		pr_err("Kernel queue test failed\n");
@@ -382,7 +367,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
 	}
 	for (i = 0; i < 5; i++)
 		buffer[i] = kq->nop_packet;
-	kq->ops.submit_packet(kq);
+	kq_submit_packet(kq);
 
 	pr_err("Ending kernel queue test\n");
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 365fc674fea4..f4cfe9f1871c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -29,45 +29,28 @@
 #include "kfd_priv.h"
 
 /**
- * struct kernel_queue_ops
- *
- * @initialize: Initialize a kernel queue, including allocations of GART memory
- * needed for the queue.
- *
- * @uninitialize: Uninitialize a kernel queue and free all its memory usages.
- *
- * @acquire_packet_buffer: Returns a pointer to the location in the kernel
+ * kq_acquire_packet_buffer: Returns a pointer to the location in the kernel
  * queue ring buffer where the calling function can write its packet. It is
  * Guaranteed that there is enough space for that packet. It also updates the
  * pending write pointer to that location so subsequent calls to
  * acquire_packet_buffer will get a correct write pointer
  *
- * @submit_packet: Update the write pointer and doorbell of a kernel queue.
- *
- * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
- * queue are equal, which means the CP has read all the submitted packets.
+ * kq_submit_packet: Update the write pointer and doorbell of a kernel queue.
  *
- * @rollback_packet: This routine is called if we failed to build an acquired
+ * kq_rollback_packet: This routine is called if we failed to build an acquired
  * packet for some reason. It just overwrites the pending wptr with the current
  * one
  *
  */
-struct kernel_queue_ops {
-	bool	(*initialize)(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size);
-	void	(*uninitialize)(struct kernel_queue *kq);
-	int	(*acquire_packet_buffer)(struct kernel_queue *kq,
-					size_t packet_size_in_dwords,
-					unsigned int **buffer_ptr);
 
-	void	(*submit_packet)(struct kernel_queue *kq);
-	void	(*rollback_packet)(struct kernel_queue *kq);
-};
+int kq_acquire_packet_buffer(struct kernel_queue *kq,
+				size_t packet_size_in_dwords,
+				unsigned int **buffer_ptr);
+void kq_submit_packet(struct kernel_queue *kq);
+void kq_rollback_packet(struct kernel_queue *kq);
 
-struct kernel_queue {
-	struct kernel_queue_ops ops;
-	struct kernel_queue_ops ops_asic_specific;
 
+struct kernel_queue {
 	/* data */
 	struct kfd_dev		*dev;
 	struct mqd_manager	*mqd_mgr;
@@ -99,9 +82,4 @@ struct kernel_queue {
 	struct list_head	list;
 };
 
-void kernel_queue_init_cik(struct kernel_queue_ops *ops);
-void kernel_queue_init_vi(struct kernel_queue_ops *ops);
-void kernel_queue_init_v9(struct kernel_queue_ops *ops);
-void kernel_queue_init_v10(struct kernel_queue_ops *ops);
-
 #endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
deleted file mode 100644
index 19e54acb4125..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-
-static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_cik(struct kernel_queue *kq);
-static void submit_packet_cik(struct kernel_queue *kq);
-
-void kernel_queue_init_cik(struct kernel_queue_ops *ops)
-{
-	ops->initialize = initialize_cik;
-	ops->uninitialize = uninitialize_cik;
-	ops->submit_packet = submit_packet_cik;
-}
-
-static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size)
-{
-	return true;
-}
-
-static void uninitialize_cik(struct kernel_queue *kq)
-{
-}
-
-static void submit_packet_cik(struct kernel_queue *kq)
-{
-	*kq->wptr_kernel = kq->pending_wptr;
-	write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
-				kq->pending_wptr);
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
deleted file mode 100644
index aed32ab7102e..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-#include "kfd_device_queue_manager.h"
-#include "kfd_pm4_headers_ai.h"
-#include "kfd_pm4_opcodes.h"
-#include "gc/gc_10_1_0_sh_mask.h"
-
-static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v10(struct kernel_queue *kq);
-static void submit_packet_v10(struct kernel_queue *kq);
-
-void kernel_queue_init_v10(struct kernel_queue_ops *ops)
-{
-	ops->initialize = initialize_v10;
-	ops->uninitialize = uninitialize_v10;
-	ops->submit_packet = submit_packet_v10;
-}
-
-static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size)
-{
-	int retval;
-
-	retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
-	if (retval != 0)
-		return false;
-
-	kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
-	kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
-
-	memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
-
-	return true;
-}
-
-static void uninitialize_v10(struct kernel_queue *kq)
-{
-	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
-}
-
-static void submit_packet_v10(struct kernel_queue *kq)
-{
-	*kq->wptr64_kernel = kq->pending_wptr64;
-	write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
-				kq->pending_wptr64);
-}
-
-static int pm_map_process_v10(struct packet_manager *pm,
-		uint32_t *buffer, struct qcm_process_device *qpd)
-{
-	struct pm4_mes_map_process *packet;
-	uint64_t vm_page_table_base_addr = qpd->page_table_base;
-
-	packet = (struct pm4_mes_map_process *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
-	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
-					sizeof(struct pm4_mes_map_process));
-	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
-	packet->bitfields2.process_quantum = 1;
-	packet->bitfields2.pasid = qpd->pqm->process->pasid;
-	packet->bitfields14.gds_size = qpd->gds_size;
-	packet->bitfields14.num_gws = qpd->num_gws;
-	packet->bitfields14.num_oac = qpd->num_oac;
-	packet->bitfields14.sdma_enable = 1;
-
-	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
-
-	packet->sh_mem_config = qpd->sh_mem_config;
-	packet->sh_mem_bases = qpd->sh_mem_bases;
-	if (qpd->tba_addr) {
-		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
-		packet->sq_shader_tba_hi = (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) |
-			upper_32_bits(qpd->tba_addr >> 8);
-		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
-		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
-	}
-
-	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
-	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
-
-	packet->vm_context_page_table_base_addr_lo32 =
-			lower_32_bits(vm_page_table_base_addr);
-	packet->vm_context_page_table_base_addr_hi32 =
-			upper_32_bits(vm_page_table_base_addr);
-
-	return 0;
-}
-
-static int pm_runlist_v10(struct packet_manager *pm, uint32_t *buffer,
-			uint64_t ib, size_t ib_size_in_dwords, bool chain)
-{
-	struct pm4_mes_runlist *packet;
-
-	int concurrent_proc_cnt = 0;
-	struct kfd_dev *kfd = pm->dqm->dev;
-
-	/* Determine the number of processes to map together to HW:
-	 * it can not exceed the number of VMIDs available to the
-	 * scheduler, and it is determined by the smaller of the number
-	 * of processes in the runlist and kfd module parameter
-	 * hws_max_conc_proc.
-	 * Note: the arbitration between the number of VMIDs and
-	 * hws_max_conc_proc has been done in
-	 * kgd2kfd_device_init().
-	 */
-	concurrent_proc_cnt = min(pm->dqm->processes_count,
-			kfd->max_proc_per_quantum);
-
-
-	packet = (struct pm4_mes_runlist *)buffer;
-
-	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
-	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
-						sizeof(struct pm4_mes_runlist));
-
-	packet->bitfields4.ib_size = ib_size_in_dwords;
-	packet->bitfields4.chain = chain ? 1 : 0;
-	packet->bitfields4.offload_polling = 0;
-	packet->bitfields4.valid = 1;
-	packet->bitfields4.process_cnt = concurrent_proc_cnt;
-	packet->ordinal2 = lower_32_bits(ib);
-	packet->ib_base_hi = upper_32_bits(ib);
-
-	return 0;
-}
-
-static int pm_map_queues_v10(struct packet_manager *pm, uint32_t *buffer,
-		struct queue *q, bool is_static)
-{
-	struct pm4_mes_map_queues *packet;
-	bool use_static = is_static;
-
-	packet = (struct pm4_mes_map_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
-
-	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
-					sizeof(struct pm4_mes_map_queues));
-	packet->bitfields2.num_queues = 1;
-	packet->bitfields2.queue_sel =
-		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
-
-	packet->bitfields2.engine_sel =
-		engine_sel__mes_map_queues__compute_vi;
-	packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_compute_vi;
-
-	switch (q->properties.type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-		if (use_static)
-			packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_latency_static_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.queue_type =
-			queue_type__mes_map_queues__debug_interface_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-	case KFD_QUEUE_TYPE_SDMA_XGMI:
-		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
-				engine_sel__mes_map_queues__sdma0_vi;
-		use_static = false; /* no static queues under SDMA */
-		break;
-	default:
-		WARN(1, "queue type %d\n", q->properties.type);
-		return -EINVAL;
-	}
-	packet->bitfields3.doorbell_offset =
-			q->properties.doorbell_off;
-
-	packet->mqd_addr_lo =
-			lower_32_bits(q->gart_mqd_addr);
-
-	packet->mqd_addr_hi =
-			upper_32_bits(q->gart_mqd_addr);
-
-	packet->wptr_addr_lo =
-			lower_32_bits((uint64_t)q->properties.write_ptr);
-
-	packet->wptr_addr_hi =
-			upper_32_bits((uint64_t)q->properties.write_ptr);
-
-	return 0;
-}
-
-static int pm_unmap_queues_v10(struct packet_manager *pm, uint32_t *buffer,
-			enum kfd_queue_type type,
-			enum kfd_unmap_queues_filter filter,
-			uint32_t filter_param, bool reset,
-			unsigned int sdma_engine)
-{
-	struct pm4_mes_unmap_queues *packet;
-
-	packet = (struct pm4_mes_unmap_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
-
-	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
-					sizeof(struct pm4_mes_unmap_queues));
-	switch (type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.engine_sel =
-			engine_sel__mes_unmap_queues__compute;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-	case KFD_QUEUE_TYPE_SDMA_XGMI:
-		packet->bitfields2.engine_sel =
-			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
-		break;
-	default:
-		WARN(1, "queue type %d\n", type);
-		break;
-	}
-
-	if (reset)
-		packet->bitfields2.action =
-			action__mes_unmap_queues__reset_queues;
-	else
-		packet->bitfields2.action =
-			action__mes_unmap_queues__preempt_queues;
-
-	switch (filter) {
-	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
-		packet->bitfields2.num_queues = 1;
-		packet->bitfields3b.doorbell_offset0 = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
-		packet->bitfields3a.pasid = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__unmap_all_queues;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
-		/* in this case, we do not preempt static queues */
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
-		break;
-	default:
-		WARN(1, "filter %d\n", filter);
-		break;
-	}
-
-	return 0;
-
-}
-
-static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
-			uint64_t fence_address,	uint32_t fence_value)
-{
-	struct pm4_mes_query_status *packet;
-
-	packet = (struct pm4_mes_query_status *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
-
-
-	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
-					sizeof(struct pm4_mes_query_status));
-
-	packet->bitfields2.context_id = 0;
-	packet->bitfields2.interrupt_sel =
-			interrupt_sel__mes_query_status__completion_status;
-	packet->bitfields2.command =
-			command__mes_query_status__fence_only_after_write_ack;
-
-	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
-	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
-	packet->data_hi = upper_32_bits((uint64_t)fence_value);
-	packet->data_lo = lower_32_bits((uint64_t)fence_value);
-
-	return 0;
-}
-
-
-static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
-{
-	struct pm4_mec_release_mem *packet;
-
-	WARN_ON(!buffer);
-
-	packet = (struct pm4_mec_release_mem *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
-
-	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
-					sizeof(struct pm4_mec_release_mem));
-
-	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
-	packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
-	packet->bitfields2.tcl1_action_ena = 1;
-	packet->bitfields2.tc_action_ena = 1;
-	packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
-
-	packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
-	packet->bitfields3.int_sel =
-		int_sel__mec_release_mem__send_interrupt_after_write_confirm;
-
-	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
-	packet->address_hi = upper_32_bits(gpu_addr);
-
-	packet->data_lo = 0;
-
-	return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
-}
-
-const struct packet_manager_funcs kfd_v10_pm_funcs = {
-	.map_process			= pm_map_process_v10,
-	.runlist			= pm_runlist_v10,
-	.set_resources			= pm_set_resources_vi,
-	.map_queues			= pm_map_queues_v10,
-	.unmap_queues			= pm_unmap_queues_v10,
-	.query_status			= pm_query_status_v10,
-	.release_mem			= pm_release_mem_v10,
-	.map_process_size		= sizeof(struct pm4_mes_map_process),
-	.runlist_size			= sizeof(struct pm4_mes_runlist),
-	.set_resources_size		= sizeof(struct pm4_mes_set_resources),
-	.map_queues_size		= sizeof(struct pm4_mes_map_queues),
-	.unmap_queues_size		= sizeof(struct pm4_mes_unmap_queues),
-	.query_status_size		= sizeof(struct pm4_mes_query_status),
-	.release_mem_size		= sizeof(struct pm4_mec_release_mem)
-};
-
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
deleted file mode 100644
index 9a4bafb2e175..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ /dev/null
@@ -1,388 +0,0 @@
-/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-#include "kfd_device_queue_manager.h"
-#include "kfd_pm4_headers_ai.h"
-#include "kfd_pm4_opcodes.h"
-
-static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v9(struct kernel_queue *kq);
-static void submit_packet_v9(struct kernel_queue *kq);
-
-void kernel_queue_init_v9(struct kernel_queue_ops *ops)
-{
-	ops->initialize = initialize_v9;
-	ops->uninitialize = uninitialize_v9;
-	ops->submit_packet = submit_packet_v9;
-}
-
-static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size)
-{
-	int retval;
-
-	retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
-	if (retval)
-		return false;
-
-	kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
-	kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
-
-	memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
-
-	return true;
-}
-
-static void uninitialize_v9(struct kernel_queue *kq)
-{
-	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
-}
-
-static void submit_packet_v9(struct kernel_queue *kq)
-{
-	*kq->wptr64_kernel = kq->pending_wptr64;
-	write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
-				kq->pending_wptr64);
-}
-
-static int pm_map_process_v9(struct packet_manager *pm,
-		uint32_t *buffer, struct qcm_process_device *qpd)
-{
-	struct pm4_mes_map_process *packet;
-	uint64_t vm_page_table_base_addr = qpd->page_table_base;
-
-	packet = (struct pm4_mes_map_process *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
-	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
-					sizeof(struct pm4_mes_map_process));
-	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
-	packet->bitfields2.process_quantum = 1;
-	packet->bitfields2.pasid = qpd->pqm->process->pasid;
-	packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
-	packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
-	packet->bitfields14.num_gws = qpd->num_gws;
-	packet->bitfields14.num_oac = qpd->num_oac;
-	packet->bitfields14.sdma_enable = 1;
-	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
-
-	packet->sh_mem_config = qpd->sh_mem_config;
-	packet->sh_mem_bases = qpd->sh_mem_bases;
-	packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
-	packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
-	packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
-	packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
-
-	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
-	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
-
-	packet->vm_context_page_table_base_addr_lo32 =
-			lower_32_bits(vm_page_table_base_addr);
-	packet->vm_context_page_table_base_addr_hi32 =
-			upper_32_bits(vm_page_table_base_addr);
-
-	return 0;
-}
-
-static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
-			uint64_t ib, size_t ib_size_in_dwords, bool chain)
-{
-	struct pm4_mes_runlist *packet;
-
-	int concurrent_proc_cnt = 0;
-	struct kfd_dev *kfd = pm->dqm->dev;
-
-	/* Determine the number of processes to map together to HW:
-	 * it can not exceed the number of VMIDs available to the
-	 * scheduler, and it is determined by the smaller of the number
-	 * of processes in the runlist and kfd module parameter
-	 * hws_max_conc_proc.
-	 * Note: the arbitration between the number of VMIDs and
-	 * hws_max_conc_proc has been done in
-	 * kgd2kfd_device_init().
-	 */
-	concurrent_proc_cnt = min(pm->dqm->processes_count,
-			kfd->max_proc_per_quantum);
-
-	packet = (struct pm4_mes_runlist *)buffer;
-
-	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
-	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
-						sizeof(struct pm4_mes_runlist));
-
-	packet->bitfields4.ib_size = ib_size_in_dwords;
-	packet->bitfields4.chain = chain ? 1 : 0;
-	packet->bitfields4.offload_polling = 0;
-	packet->bitfields4.chained_runlist_idle_disable = chain ? 1 : 0;
-	packet->bitfields4.valid = 1;
-	packet->bitfields4.process_cnt = concurrent_proc_cnt;
-	packet->ordinal2 = lower_32_bits(ib);
-	packet->ib_base_hi = upper_32_bits(ib);
-
-	return 0;
-}
-
-static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
-				struct scheduling_resources *res)
-{
-	struct pm4_mes_set_resources *packet;
-
-	packet = (struct pm4_mes_set_resources *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
-
-	packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
-					sizeof(struct pm4_mes_set_resources));
-
-	packet->bitfields2.queue_type =
-			queue_type__mes_set_resources__hsa_interface_queue_hiq;
-	packet->bitfields2.vmid_mask = res->vmid_mask;
-	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
-	packet->bitfields7.oac_mask = res->oac_mask;
-	packet->bitfields8.gds_heap_base = res->gds_heap_base;
-	packet->bitfields8.gds_heap_size = res->gds_heap_size;
-
-	packet->gws_mask_lo = lower_32_bits(res->gws_mask);
-	packet->gws_mask_hi = upper_32_bits(res->gws_mask);
-
-	packet->queue_mask_lo = lower_32_bits(res->queue_mask);
-	packet->queue_mask_hi = upper_32_bits(res->queue_mask);
-
-	return 0;
-}
-
-static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
-		struct queue *q, bool is_static)
-{
-	struct pm4_mes_map_queues *packet;
-	bool use_static = is_static;
-
-	packet = (struct pm4_mes_map_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
-
-	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
-					sizeof(struct pm4_mes_map_queues));
-	packet->bitfields2.num_queues = 1;
-	packet->bitfields2.queue_sel =
-		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
-
-	packet->bitfields2.engine_sel =
-		engine_sel__mes_map_queues__compute_vi;
-	packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
-	packet->bitfields2.extended_engine_sel =
-		extended_engine_sel__mes_map_queues__legacy_engine_sel;
-	packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_compute_vi;
-
-	switch (q->properties.type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-		if (use_static)
-			packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_latency_static_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.queue_type =
-			queue_type__mes_map_queues__debug_interface_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-	case KFD_QUEUE_TYPE_SDMA_XGMI:
-		use_static = false; /* no static queues under SDMA */
-		if (q->properties.sdma_engine_id < 2)
-			packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
-				engine_sel__mes_map_queues__sdma0_vi;
-		else {
-			packet->bitfields2.extended_engine_sel =
-				extended_engine_sel__mes_map_queues__sdma0_to_7_sel;
-			packet->bitfields2.engine_sel = q->properties.sdma_engine_id;
-		}
-		break;
-	default:
-		WARN(1, "queue type %d", q->properties.type);
-		return -EINVAL;
-	}
-	packet->bitfields3.doorbell_offset =
-			q->properties.doorbell_off;
-
-	packet->mqd_addr_lo =
-			lower_32_bits(q->gart_mqd_addr);
-
-	packet->mqd_addr_hi =
-			upper_32_bits(q->gart_mqd_addr);
-
-	packet->wptr_addr_lo =
-			lower_32_bits((uint64_t)q->properties.write_ptr);
-
-	packet->wptr_addr_hi =
-			upper_32_bits((uint64_t)q->properties.write_ptr);
-
-	return 0;
-}
-
-static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
-			enum kfd_queue_type type,
-			enum kfd_unmap_queues_filter filter,
-			uint32_t filter_param, bool reset,
-			unsigned int sdma_engine)
-{
-	struct pm4_mes_unmap_queues *packet;
-
-	packet = (struct pm4_mes_unmap_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
-
-	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
-					sizeof(struct pm4_mes_unmap_queues));
-	switch (type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.extended_engine_sel =
-			extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
-		packet->bitfields2.engine_sel =
-			engine_sel__mes_unmap_queues__compute;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-	case KFD_QUEUE_TYPE_SDMA_XGMI:
-		if (sdma_engine < 2) {
-			packet->bitfields2.extended_engine_sel =
-				extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
-			packet->bitfields2.engine_sel =
-				engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
-		} else {
-			packet->bitfields2.extended_engine_sel =
-				extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel;
-			packet->bitfields2.engine_sel = sdma_engine;
-		}
-		break;
-	default:
-		WARN(1, "queue type %d", type);
-		return -EINVAL;
-	}
-
-	if (reset)
-		packet->bitfields2.action =
-			action__mes_unmap_queues__reset_queues;
-	else
-		packet->bitfields2.action =
-			action__mes_unmap_queues__preempt_queues;
-
-	switch (filter) {
-	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
-		packet->bitfields2.num_queues = 1;
-		packet->bitfields3b.doorbell_offset0 = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
-		packet->bitfields3a.pasid = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__unmap_all_queues;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
-		/* in this case, we do not preempt static queues */
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
-		break;
-	default:
-		WARN(1, "filter %d", filter);
-		return -EINVAL;
-	}
-
-	return 0;
-
-}
-
-static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
-			uint64_t fence_address,	uint32_t fence_value)
-{
-	struct pm4_mes_query_status *packet;
-
-	packet = (struct pm4_mes_query_status *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
-
-
-	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
-					sizeof(struct pm4_mes_query_status));
-
-	packet->bitfields2.context_id = 0;
-	packet->bitfields2.interrupt_sel =
-			interrupt_sel__mes_query_status__completion_status;
-	packet->bitfields2.command =
-			command__mes_query_status__fence_only_after_write_ack;
-
-	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
-	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
-	packet->data_hi = upper_32_bits((uint64_t)fence_value);
-	packet->data_lo = lower_32_bits((uint64_t)fence_value);
-
-	return 0;
-}
-
-
-static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
-{
-	struct pm4_mec_release_mem *packet;
-
-	packet = (struct pm4_mec_release_mem *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
-
-	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
-					sizeof(struct pm4_mec_release_mem));
-
-	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
-	packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
-	packet->bitfields2.tcl1_action_ena = 1;
-	packet->bitfields2.tc_action_ena = 1;
-	packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
-
-	packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
-	packet->bitfields3.int_sel =
-		int_sel__mec_release_mem__send_interrupt_after_write_confirm;
-
-	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
-	packet->address_hi = upper_32_bits(gpu_addr);
-
-	packet->data_lo = 0;
-
-	return 0;
-}
-
-const struct packet_manager_funcs kfd_v9_pm_funcs = {
-	.map_process		= pm_map_process_v9,
-	.runlist		= pm_runlist_v9,
-	.set_resources		= pm_set_resources_v9,
-	.map_queues		= pm_map_queues_v9,
-	.unmap_queues		= pm_unmap_queues_v9,
-	.query_status		= pm_query_status_v9,
-	.release_mem		= pm_release_mem_v9,
-	.map_process_size	= sizeof(struct pm4_mes_map_process),
-	.runlist_size		= sizeof(struct pm4_mes_runlist),
-	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
-	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
-	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
-	.query_status_size	= sizeof(struct pm4_mes_query_status),
-	.release_mem_size	= sizeof(struct pm4_mec_release_mem)
-};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
deleted file mode 100644
index 2adaf40027eb..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+++ /dev/null
@@ -1,375 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-#include "kfd_device_queue_manager.h"
-#include "kfd_pm4_headers_vi.h"
-#include "kfd_pm4_opcodes.h"
-
-static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_vi(struct kernel_queue *kq);
-static void submit_packet_vi(struct kernel_queue *kq);
-
-void kernel_queue_init_vi(struct kernel_queue_ops *ops)
-{
-	ops->initialize = initialize_vi;
-	ops->uninitialize = uninitialize_vi;
-	ops->submit_packet = submit_packet_vi;
-}
-
-static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size)
-{
-	int retval;
-
-	retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
-	if (retval != 0)
-		return false;
-
-	kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
-	kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
-
-	memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
-
-	return true;
-}
-
-static void uninitialize_vi(struct kernel_queue *kq)
-{
-	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
-}
-
-static void submit_packet_vi(struct kernel_queue *kq)
-{
-	*kq->wptr_kernel = kq->pending_wptr;
-	write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
-				kq->pending_wptr);
-}
-
-unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
-{
-	union PM4_MES_TYPE_3_HEADER header;
-
-	header.u32All = 0;
-	header.opcode = opcode;
-	header.count = packet_size / 4 - 2;
-	header.type = PM4_TYPE_3;
-
-	return header.u32All;
-}
-
-static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
-				struct qcm_process_device *qpd)
-{
-	struct pm4_mes_map_process *packet;
-
-	packet = (struct pm4_mes_map_process *)buffer;
-
-	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
-	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
-					sizeof(struct pm4_mes_map_process));
-	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
-	packet->bitfields2.process_quantum = 1;
-	packet->bitfields2.pasid = qpd->pqm->process->pasid;
-	packet->bitfields3.page_table_base = qpd->page_table_base;
-	packet->bitfields10.gds_size = qpd->gds_size;
-	packet->bitfields10.num_gws = qpd->num_gws;
-	packet->bitfields10.num_oac = qpd->num_oac;
-	packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
-
-	packet->sh_mem_config = qpd->sh_mem_config;
-	packet->sh_mem_bases = qpd->sh_mem_bases;
-	packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
-	packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
-
-	packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
-
-	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
-	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
-
-	return 0;
-}
-
-static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
-			uint64_t ib, size_t ib_size_in_dwords, bool chain)
-{
-	struct pm4_mes_runlist *packet;
-	int concurrent_proc_cnt = 0;
-	struct kfd_dev *kfd = pm->dqm->dev;
-
-	if (WARN_ON(!ib))
-		return -EFAULT;
-
-	/* Determine the number of processes to map together to HW:
-	 * it can not exceed the number of VMIDs available to the
-	 * scheduler, and it is determined by the smaller of the number
-	 * of processes in the runlist and kfd module parameter
-	 * hws_max_conc_proc.
-	 * Note: the arbitration between the number of VMIDs and
-	 * hws_max_conc_proc has been done in
-	 * kgd2kfd_device_init().
-	 */
-	concurrent_proc_cnt = min(pm->dqm->processes_count,
-			kfd->max_proc_per_quantum);
-
-	packet = (struct pm4_mes_runlist *)buffer;
-
-	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
-	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
-						sizeof(struct pm4_mes_runlist));
-
-	packet->bitfields4.ib_size = ib_size_in_dwords;
-	packet->bitfields4.chain = chain ? 1 : 0;
-	packet->bitfields4.offload_polling = 0;
-	packet->bitfields4.valid = 1;
-	packet->bitfields4.process_cnt = concurrent_proc_cnt;
-	packet->ordinal2 = lower_32_bits(ib);
-	packet->bitfields3.ib_base_hi = upper_32_bits(ib);
-
-	return 0;
-}
-
-int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
-				struct scheduling_resources *res)
-{
-	struct pm4_mes_set_resources *packet;
-
-	packet = (struct pm4_mes_set_resources *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
-
-	packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
-					sizeof(struct pm4_mes_set_resources));
-
-	packet->bitfields2.queue_type =
-			queue_type__mes_set_resources__hsa_interface_queue_hiq;
-	packet->bitfields2.vmid_mask = res->vmid_mask;
-	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
-	packet->bitfields7.oac_mask = res->oac_mask;
-	packet->bitfields8.gds_heap_base = res->gds_heap_base;
-	packet->bitfields8.gds_heap_size = res->gds_heap_size;
-
-	packet->gws_mask_lo = lower_32_bits(res->gws_mask);
-	packet->gws_mask_hi = upper_32_bits(res->gws_mask);
-
-	packet->queue_mask_lo = lower_32_bits(res->queue_mask);
-	packet->queue_mask_hi = upper_32_bits(res->queue_mask);
-
-	return 0;
-}
-
-static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
-		struct queue *q, bool is_static)
-{
-	struct pm4_mes_map_queues *packet;
-	bool use_static = is_static;
-
-	packet = (struct pm4_mes_map_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
-
-	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
-					sizeof(struct pm4_mes_map_queues));
-	packet->bitfields2.num_queues = 1;
-	packet->bitfields2.queue_sel =
-		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
-
-	packet->bitfields2.engine_sel =
-		engine_sel__mes_map_queues__compute_vi;
-	packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_compute_vi;
-
-	switch (q->properties.type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-		if (use_static)
-			packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_latency_static_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.queue_type =
-			queue_type__mes_map_queues__debug_interface_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-	case KFD_QUEUE_TYPE_SDMA_XGMI:
-		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
-				engine_sel__mes_map_queues__sdma0_vi;
-		use_static = false; /* no static queues under SDMA */
-		break;
-	default:
-		WARN(1, "queue type %d", q->properties.type);
-		return -EINVAL;
-	}
-	packet->bitfields3.doorbell_offset =
-			q->properties.doorbell_off;
-
-	packet->mqd_addr_lo =
-			lower_32_bits(q->gart_mqd_addr);
-
-	packet->mqd_addr_hi =
-			upper_32_bits(q->gart_mqd_addr);
-
-	packet->wptr_addr_lo =
-			lower_32_bits((uint64_t)q->properties.write_ptr);
-
-	packet->wptr_addr_hi =
-			upper_32_bits((uint64_t)q->properties.write_ptr);
-
-	return 0;
-}
-
-static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
-			enum kfd_queue_type type,
-			enum kfd_unmap_queues_filter filter,
-			uint32_t filter_param, bool reset,
-			unsigned int sdma_engine)
-{
-	struct pm4_mes_unmap_queues *packet;
-
-	packet = (struct pm4_mes_unmap_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
-
-	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
-					sizeof(struct pm4_mes_unmap_queues));
-	switch (type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.engine_sel =
-			engine_sel__mes_unmap_queues__compute;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-	case KFD_QUEUE_TYPE_SDMA_XGMI:
-		packet->bitfields2.engine_sel =
-			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
-		break;
-	default:
-		WARN(1, "queue type %d", type);
-		return -EINVAL;
-	}
-
-	if (reset)
-		packet->bitfields2.action =
-			action__mes_unmap_queues__reset_queues;
-	else
-		packet->bitfields2.action =
-			action__mes_unmap_queues__preempt_queues;
-
-	switch (filter) {
-	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
-		packet->bitfields2.num_queues = 1;
-		packet->bitfields3b.doorbell_offset0 = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
-		packet->bitfields3a.pasid = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__unmap_all_queues;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
-		/* in this case, we do not preempt static queues */
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
-		break;
-	default:
-		WARN(1, "filter %d", filter);
-		return -EINVAL;
-	}
-
-	return 0;
-
-}
-
-static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer,
-			uint64_t fence_address,	uint32_t fence_value)
-{
-	struct pm4_mes_query_status *packet;
-
-	packet = (struct pm4_mes_query_status *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
-
-	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
-					sizeof(struct pm4_mes_query_status));
-
-	packet->bitfields2.context_id = 0;
-	packet->bitfields2.interrupt_sel =
-			interrupt_sel__mes_query_status__completion_status;
-	packet->bitfields2.command =
-			command__mes_query_status__fence_only_after_write_ack;
-
-	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
-	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
-	packet->data_hi = upper_32_bits((uint64_t)fence_value);
-	packet->data_lo = lower_32_bits((uint64_t)fence_value);
-
-	return 0;
-}
-
-static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer)
-{
-	struct pm4_mec_release_mem *packet;
-
-	packet = (struct pm4_mec_release_mem *)buffer;
-	memset(buffer, 0, sizeof(*packet));
-
-	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
-						 sizeof(*packet));
-
-	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
-	packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
-	packet->bitfields2.tcl1_action_ena = 1;
-	packet->bitfields2.tc_action_ena = 1;
-	packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
-	packet->bitfields2.atc = 0;
-
-	packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
-	packet->bitfields3.int_sel =
-		int_sel___release_mem__send_interrupt_after_write_confirm;
-
-	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
-	packet->address_hi = upper_32_bits(gpu_addr);
-
-	packet->data_lo = 0;
-
-	return 0;
-}
-
-const struct packet_manager_funcs kfd_vi_pm_funcs = {
-	.map_process		= pm_map_process_vi,
-	.runlist		= pm_runlist_vi,
-	.set_resources		= pm_set_resources_vi,
-	.map_queues		= pm_map_queues_vi,
-	.unmap_queues		= pm_unmap_queues_vi,
-	.query_status		= pm_query_status_vi,
-	.release_mem		= pm_release_mem_vi,
-	.map_process_size	= sizeof(struct pm4_mes_map_process),
-	.runlist_size		= sizeof(struct pm4_mes_runlist),
-	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
-	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
-	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
-	.query_status_size	= sizeof(struct pm4_mes_query_status),
-	.release_mem_size	= sizeof(struct pm4_mec_release_mem)
-};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 986ff52d5750..f4b7f7e6c40e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -82,7 +82,7 @@ static void kfd_exit(void)
 	kfd_chardev_exit();
 }
 
-int kgd2kfd_init()
+int kgd2kfd_init(void)
 {
 	return kfd_init();
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 28876aceb14b..19f0fe547c57 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -374,7 +374,6 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
 
 	switch (type) {
 	case KFD_MQD_TYPE_CP:
-	case KFD_MQD_TYPE_COMPUTE:
 		mqd->allocate_mqd = allocate_mqd;
 		mqd->init_mqd = init_mqd;
 		mqd->free_mqd = free_mqd;
@@ -401,7 +400,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
 #endif
 		break;
 	case KFD_MQD_TYPE_DIQ:
-		mqd->allocate_mqd = allocate_hiq_mqd;
+		mqd->allocate_mqd = allocate_mqd;
 		mqd->init_mqd = init_mqd_hiq;
 		mqd->free_mqd = free_mqd;
 		mqd->load_mqd = load_mqd;
@@ -442,7 +441,7 @@ struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
 	mqd = mqd_manager_init_cik(type, dev);
 	if (!mqd)
 		return NULL;
-	if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
+	if (type == KFD_MQD_TYPE_CP)
 		mqd->update_mqd = update_mqd_hawaii;
 	return mqd;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 9cd3eb2d90bd..d1d68a51bfb8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -66,38 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 		m->compute_static_thread_mgmt_se3);
 }
 
+static void set_priority(struct v10_compute_mqd *m, struct queue_properties *q)
+{
+	m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+	m->cp_hqd_queue_priority = q->priority;
+}
+
 static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
 		struct queue_properties *q)
 {
-	int retval;
-	struct kfd_mem_obj *mqd_mem_obj = NULL;
-
-	/* From V9,  for CWSR, the control stack is located on the next page
-	 * boundary after the mqd, we will use the gtt allocation function
-	 * instead of sub-allocation function.
-	 */
-	if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
-		mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
-		if (!mqd_mem_obj)
-			return NULL;
-		retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
-			ALIGN(q->ctl_stack_size, PAGE_SIZE) +
-				ALIGN(sizeof(struct v10_compute_mqd), PAGE_SIZE),
-			&(mqd_mem_obj->gtt_mem),
-			&(mqd_mem_obj->gpu_addr),
-			(void *)&(mqd_mem_obj->cpu_ptr), true);
-	} else {
-		retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
-				&mqd_mem_obj);
-	}
+	struct kfd_mem_obj *mqd_mem_obj;
 
-	if (retval) {
-		kfree(mqd_mem_obj);
+	if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
+			&mqd_mem_obj))
 		return NULL;
-	}
 
 	return mqd_mem_obj;
-
 }
 
 static void init_mqd(struct mqd_manager *mm, void **mqd,
@@ -131,9 +115,6 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
 			1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
 			10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
 
-	m->cp_hqd_pipe_priority = 1;
-	m->cp_hqd_queue_priority = 15;
-
 	if (q->format == KFD_QUEUE_FORMAT_AQL) {
 		m->cp_hqd_aql_control =
 			1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
@@ -172,6 +153,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
 	return r;
 }
 
+static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    struct queue_properties *p, struct mm_struct *mms)
+{
+	return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
+					      queue_id, p->doorbell_off);
+}
+
 static void update_mqd(struct mqd_manager *mm, void *mqd,
 		      struct queue_properties *q)
 {
@@ -230,11 +219,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
 		m->cp_hqd_ctx_save_control = 0;
 
 	update_cu_mask(mm, mqd, q);
+	set_priority(m, q);
 
-	q->is_active = (q->queue_size > 0 &&
-			q->queue_address != 0 &&
-			q->queue_percent > 0 &&
-			!q->is_evicted);
+	q->is_active = QUEUE_IS_ACTIVE(*q);
 }
 
 static int destroy_mqd(struct mqd_manager *mm, void *mqd,
@@ -250,14 +237,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
 static void free_mqd(struct mqd_manager *mm, void *mqd,
 			struct kfd_mem_obj *mqd_mem_obj)
 {
-	struct kfd_dev *kfd = mm->dev;
-
-	if (mqd_mem_obj->gtt_mem) {
-		amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
-		kfree(mqd_mem_obj);
-	} else {
-		kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-	}
+	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
 }
 
 static bool is_occupied(struct mqd_manager *mm, void *mqd,
@@ -276,18 +256,22 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
 {
 	struct v10_compute_mqd *m;
 
-	/* Control stack is located one page after MQD. */
-	void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
-
 	m = get_mqd(mqd);
 
+	/* Control stack is written backwards, while workgroup context data
+	 * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
+	 * Current position is at m->cp_hqd_cntl_stack_offset and
+	 * m->cp_hqd_wg_state_offset, respectively.
+	 */
 	*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
 		m->cp_hqd_cntl_stack_offset;
 	*save_area_used_size = m->cp_hqd_wg_state_offset -
 		m->cp_hqd_cntl_stack_size;
 
-	if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
-		return -EFAULT;
+	/* Control stack is not copied to user mode for GFXv10 because
+	 * it's part of the context save area that is already
+	 * accessible to user mode
+	 */
 
 	return 0;
 }
@@ -306,18 +290,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
 			1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
 }
 
-static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
-			struct queue_properties *q)
-{
-	struct v10_compute_mqd *m;
-
-	update_mqd(mm, mqd, q);
-
-	/* TODO: what's the point? update_mqd already does this. */
-	m = get_mqd(mqd);
-	m->cp_hqd_vmid = q->vmid;
-}
-
 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 		struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
 		struct queue_properties *q)
@@ -369,11 +341,7 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
 	m->sdma_queue_id = q->sdma_queue_id;
 	m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
 
-
-	q->is_active = (q->queue_size > 0 &&
-			q->queue_address != 0 &&
-			q->queue_percent > 0 &&
-			!q->is_evicted);
+	q->is_active = QUEUE_IS_ACTIVE(*q);
 }
 
 /*
@@ -421,7 +389,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
 	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
 		return NULL;
 
-	mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
+	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
 	if (!mqd)
 		return NULL;
 
@@ -429,7 +397,6 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
 
 	switch (type) {
 	case KFD_MQD_TYPE_CP:
-	case KFD_MQD_TYPE_COMPUTE:
 		pr_debug("%s@%i\n", __func__, __LINE__);
 		mqd->allocate_mqd = allocate_mqd;
 		mqd->init_mqd = init_mqd;
@@ -450,8 +417,8 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
 		mqd->allocate_mqd = allocate_hiq_mqd;
 		mqd->init_mqd = init_mqd_hiq;
 		mqd->free_mqd = free_mqd_hiq_sdma;
-		mqd->load_mqd = load_mqd;
-		mqd->update_mqd = update_mqd_hiq;
+		mqd->load_mqd = hiq_load_mqd_kiq;
+		mqd->update_mqd = update_mqd;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
 		mqd->mqd_size = sizeof(struct v10_compute_mqd);
@@ -461,11 +428,11 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
 		pr_debug("%s@%i\n", __func__, __LINE__);
 		break;
 	case KFD_MQD_TYPE_DIQ:
-		mqd->allocate_mqd = allocate_hiq_mqd;
+		mqd->allocate_mqd = allocate_mqd;
 		mqd->init_mqd = init_mqd_hiq;
 		mqd->free_mqd = free_mqd;
 		mqd->load_mqd = load_mqd;
-		mqd->update_mqd = update_mqd_hiq;
+		mqd->update_mqd = update_mqd;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
 		mqd->mqd_size = sizeof(struct v10_compute_mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index d3380c5bdbde..436b7f518979 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -92,7 +92,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
 	 * instead of sub-allocation function.
 	 */
 	if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
-		mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
+		mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
 		if (!mqd_mem_obj)
 			return NULL;
 		retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
@@ -191,6 +191,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
 					  wptr_shift, 0, mms);
 }
 
+static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    struct queue_properties *p, struct mm_struct *mms)
+{
+	return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
+					      queue_id, p->doorbell_off);
+}
+
 static void update_mqd(struct mqd_manager *mm, void *mqd,
 		      struct queue_properties *q)
 {
@@ -302,7 +310,8 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
 
 	*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
 		m->cp_hqd_cntl_stack_offset;
-	*save_area_used_size = m->cp_hqd_wg_state_offset;
+	*save_area_used_size = m->cp_hqd_wg_state_offset -
+		m->cp_hqd_cntl_stack_size;
 
 	if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
 		return -EFAULT;
@@ -324,18 +333,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
 			1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
 }
 
-static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
-			struct queue_properties *q)
-{
-	struct v9_mqd *m;
-
-	update_mqd(mm, mqd, q);
-
-	/* TODO: what's the point? update_mqd already does this. */
-	m = get_mqd(mqd);
-	m->cp_hqd_vmid = q->vmid;
-}
-
 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 		struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
 		struct queue_properties *q)
@@ -443,7 +440,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
 
 	switch (type) {
 	case KFD_MQD_TYPE_CP:
-	case KFD_MQD_TYPE_COMPUTE:
 		mqd->allocate_mqd = allocate_mqd;
 		mqd->init_mqd = init_mqd;
 		mqd->free_mqd = free_mqd;
@@ -461,8 +457,8 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
 		mqd->allocate_mqd = allocate_hiq_mqd;
 		mqd->init_mqd = init_mqd_hiq;
 		mqd->free_mqd = free_mqd_hiq_sdma;
-		mqd->load_mqd = load_mqd;
-		mqd->update_mqd = update_mqd_hiq;
+		mqd->load_mqd = hiq_load_mqd_kiq;
+		mqd->update_mqd = update_mqd;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
 		mqd->mqd_size = sizeof(struct v9_mqd);
@@ -471,11 +467,11 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
 #endif
 		break;
 	case KFD_MQD_TYPE_DIQ:
-		mqd->allocate_mqd = allocate_hiq_mqd;
+		mqd->allocate_mqd = allocate_mqd;
 		mqd->init_mqd = init_mqd_hiq;
 		mqd->free_mqd = free_mqd;
 		mqd->load_mqd = load_mqd;
-		mqd->update_mqd = update_mqd_hiq;
+		mqd->update_mqd = update_mqd;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
 		mqd->mqd_size = sizeof(struct v9_mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 7d144f56f421..a5e8ff1e5945 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -312,11 +312,7 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
 static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
 			struct queue_properties *q)
 {
-	struct vi_mqd *m;
 	__update_mqd(mm, mqd, q, MTYPE_UC, 0);
-
-	m = get_mqd(mqd);
-	m->cp_hqd_vmid = q->vmid;
 }
 
 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
@@ -425,7 +421,6 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
 
 	switch (type) {
 	case KFD_MQD_TYPE_CP:
-	case KFD_MQD_TYPE_COMPUTE:
 		mqd->allocate_mqd = allocate_mqd;
 		mqd->init_mqd = init_mqd;
 		mqd->free_mqd = free_mqd;
@@ -453,7 +448,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
 #endif
 		break;
 	case KFD_MQD_TYPE_DIQ:
-		mqd->allocate_mqd = allocate_hiq_mqd;
+		mqd->allocate_mqd = allocate_mqd;
 		mqd->init_mqd = init_mqd_hiq;
 		mqd->free_mqd = free_mqd;
 		mqd->load_mqd = load_mqd;
@@ -494,7 +489,7 @@ struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
 	mqd = mqd_manager_init_vi(type, dev);
 	if (!mqd)
 		return NULL;
-	if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
+	if (type == KFD_MQD_TYPE_CP)
 		mqd->update_mqd = update_mqd_tonga;
 	return mqd;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 2c8624c5b42c..dc406e6dee23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -239,11 +239,12 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
 	case CHIP_RAVEN:
+	case CHIP_RENOIR:
 	case CHIP_ARCTURUS:
-		pm->pmf = &kfd_v9_pm_funcs;
-		break;
 	case CHIP_NAVI10:
-		pm->pmf = &kfd_v10_pm_funcs;
+	case CHIP_NAVI12:
+	case CHIP_NAVI14:
+		pm->pmf = &kfd_v9_pm_funcs;
 		break;
 	default:
 		WARN(1, "Unexpected ASIC family %u",
@@ -263,10 +264,10 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
 	return 0;
 }
 
-void pm_uninit(struct packet_manager *pm)
+void pm_uninit(struct packet_manager *pm, bool hanging)
 {
 	mutex_destroy(&pm->lock);
-	kernel_queue_uninit(pm->priv_queue);
+	kernel_queue_uninit(pm->priv_queue, hanging);
 }
 
 int pm_send_set_resources(struct packet_manager *pm,
@@ -277,7 +278,7 @@ int pm_send_set_resources(struct packet_manager *pm,
 
 	size = pm->pmf->set_resources_size;
 	mutex_lock(&pm->lock);
-	pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+	kq_acquire_packet_buffer(pm->priv_queue,
 					size / sizeof(uint32_t),
 					(unsigned int **)&buffer);
 	if (!buffer) {
@@ -288,9 +289,9 @@ int pm_send_set_resources(struct packet_manager *pm,
 
 	retval = pm->pmf->set_resources(pm, buffer, res);
 	if (!retval)
-		pm->priv_queue->ops.submit_packet(pm->priv_queue);
+		kq_submit_packet(pm->priv_queue);
 	else
-		pm->priv_queue->ops.rollback_packet(pm->priv_queue);
+		kq_rollback_packet(pm->priv_queue);
 
 out:
 	mutex_unlock(&pm->lock);
@@ -315,7 +316,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
 	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
 	mutex_lock(&pm->lock);
 
-	retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+	retval = kq_acquire_packet_buffer(pm->priv_queue,
 					packet_size_dwords, &rl_buffer);
 	if (retval)
 		goto fail_acquire_packet_buffer;
@@ -325,14 +326,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
 	if (retval)
 		goto fail_create_runlist;
 
-	pm->priv_queue->ops.submit_packet(pm->priv_queue);
+	kq_submit_packet(pm->priv_queue);
 
 	mutex_unlock(&pm->lock);
 
 	return retval;
 
 fail_create_runlist:
-	pm->priv_queue->ops.rollback_packet(pm->priv_queue);
+	kq_rollback_packet(pm->priv_queue);
 fail_acquire_packet_buffer:
 	mutex_unlock(&pm->lock);
 fail_create_runlist_ib:
@@ -351,7 +352,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
 
 	size = pm->pmf->query_status_size;
 	mutex_lock(&pm->lock);
-	pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+	kq_acquire_packet_buffer(pm->priv_queue,
 			size / sizeof(uint32_t), (unsigned int **)&buffer);
 	if (!buffer) {
 		pr_err("Failed to allocate buffer on kernel queue\n");
@@ -361,9 +362,9 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
 
 	retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
 	if (!retval)
-		pm->priv_queue->ops.submit_packet(pm->priv_queue);
+		kq_submit_packet(pm->priv_queue);
 	else
-		pm->priv_queue->ops.rollback_packet(pm->priv_queue);
+		kq_rollback_packet(pm->priv_queue);
 
 out:
 	mutex_unlock(&pm->lock);
@@ -380,7 +381,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 
 	size = pm->pmf->unmap_queues_size;
 	mutex_lock(&pm->lock);
-	pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+	kq_acquire_packet_buffer(pm->priv_queue,
 			size / sizeof(uint32_t), (unsigned int **)&buffer);
 	if (!buffer) {
 		pr_err("Failed to allocate buffer on kernel queue\n");
@@ -391,9 +392,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 	retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
 				       reset, sdma_engine);
 	if (!retval)
-		pm->priv_queue->ops.submit_packet(pm->priv_queue);
+		kq_submit_packet(pm->priv_queue);
 	else
-		pm->priv_queue->ops.rollback_packet(pm->priv_queue);
+		kq_rollback_packet(pm->priv_queue);
 
 out:
 	mutex_unlock(&pm->lock);
@@ -438,7 +439,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
 
 	size = pm->pmf->query_status_size;
 	mutex_lock(&pm->lock);
-	pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+	kq_acquire_packet_buffer(pm->priv_queue,
 			size / sizeof(uint32_t), (unsigned int **)&buffer);
 	if (!buffer) {
 		pr_err("Failed to allocate buffer on kernel queue\n");
@@ -446,7 +447,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
 		goto out;
 	}
 	memset(buffer, 0x55, size);
-	pm->priv_queue->ops.submit_packet(pm->priv_queue);
+	kq_submit_packet(pm->priv_queue);
 
 	pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
 		buffer[0], buffer[1], buffer[2], buffer[3],
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
new file mode 100644
index 000000000000..2de01009f1b6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -0,0 +1,326 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_ai.h"
+#include "kfd_pm4_opcodes.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+
+static int pm_map_process_v9(struct packet_manager *pm,
+		uint32_t *buffer, struct qcm_process_device *qpd)
+{
+	struct pm4_mes_map_process *packet;
+	uint64_t vm_page_table_base_addr = qpd->page_table_base;
+
+	packet = (struct pm4_mes_map_process *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+					sizeof(struct pm4_mes_map_process));
+	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+	packet->bitfields2.process_quantum = 1;
+	packet->bitfields2.pasid = qpd->pqm->process->pasid;
+	packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
+	packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
+	packet->bitfields14.num_gws = qpd->num_gws;
+	packet->bitfields14.num_oac = qpd->num_oac;
+	packet->bitfields14.sdma_enable = 1;
+	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+	packet->sh_mem_config = qpd->sh_mem_config;
+	packet->sh_mem_bases = qpd->sh_mem_bases;
+	if (qpd->tba_addr) {
+		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+		/* On GFX9, unlike GFX10, bit TRAP_EN of SQ_SHADER_TBA_HI is
+		 * not defined, so setting it won't do any harm.
+		 */
+		packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8)
+				| 1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT;
+
+		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+	}
+
+	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+	packet->vm_context_page_table_base_addr_lo32 =
+			lower_32_bits(vm_page_table_base_addr);
+	packet->vm_context_page_table_base_addr_hi32 =
+			upper_32_bits(vm_page_table_base_addr);
+
+	return 0;
+}
+
+static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+	struct pm4_mes_runlist *packet;
+
+	int concurrent_proc_cnt = 0;
+	struct kfd_dev *kfd = pm->dqm->dev;
+
+	/* Determine the number of processes to map together to HW:
+	 * it can not exceed the number of VMIDs available to the
+	 * scheduler, and it is determined by the smaller of the number
+	 * of processes in the runlist and kfd module parameter
+	 * hws_max_conc_proc.
+	 * Note: the arbitration between the number of VMIDs and
+	 * hws_max_conc_proc has been done in
+	 * kgd2kfd_device_init().
+	 */
+	concurrent_proc_cnt = min(pm->dqm->processes_count,
+			kfd->max_proc_per_quantum);
+
+	packet = (struct pm4_mes_runlist *)buffer;
+
+	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+						sizeof(struct pm4_mes_runlist));
+
+	packet->bitfields4.ib_size = ib_size_in_dwords;
+	packet->bitfields4.chain = chain ? 1 : 0;
+	packet->bitfields4.offload_polling = 0;
+	packet->bitfields4.chained_runlist_idle_disable = chain ? 1 : 0;
+	packet->bitfields4.valid = 1;
+	packet->bitfields4.process_cnt = concurrent_proc_cnt;
+	packet->ordinal2 = lower_32_bits(ib);
+	packet->ib_base_hi = upper_32_bits(ib);
+
+	return 0;
+}
+
+static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
+				struct scheduling_resources *res)
+{
+	struct pm4_mes_set_resources *packet;
+
+	packet = (struct pm4_mes_set_resources *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
+
+	packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
+					sizeof(struct pm4_mes_set_resources));
+
+	packet->bitfields2.queue_type =
+			queue_type__mes_set_resources__hsa_interface_queue_hiq;
+	packet->bitfields2.vmid_mask = res->vmid_mask;
+	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+	packet->bitfields7.oac_mask = res->oac_mask;
+	packet->bitfields8.gds_heap_base = res->gds_heap_base;
+	packet->bitfields8.gds_heap_size = res->gds_heap_size;
+
+	packet->gws_mask_lo = lower_32_bits(res->gws_mask);
+	packet->gws_mask_hi = upper_32_bits(res->gws_mask);
+
+	packet->queue_mask_lo = lower_32_bits(res->queue_mask);
+	packet->queue_mask_hi = upper_32_bits(res->queue_mask);
+
+	return 0;
+}
+
+static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+		struct queue *q, bool is_static)
+{
+	struct pm4_mes_map_queues *packet;
+	bool use_static = is_static;
+
+	packet = (struct pm4_mes_map_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+					sizeof(struct pm4_mes_map_queues));
+	packet->bitfields2.num_queues = 1;
+	packet->bitfields2.queue_sel =
+		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+	packet->bitfields2.engine_sel =
+		engine_sel__mes_map_queues__compute_vi;
+	packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
+	packet->bitfields2.extended_engine_sel =
+		extended_engine_sel__mes_map_queues__legacy_engine_sel;
+	packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_compute_vi;
+
+	switch (q->properties.type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+		if (use_static)
+			packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_latency_static_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.queue_type =
+			queue_type__mes_map_queues__debug_interface_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
+		use_static = false; /* no static queues under SDMA */
+		if (q->properties.sdma_engine_id < 2)
+			packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+				engine_sel__mes_map_queues__sdma0_vi;
+		else {
+			packet->bitfields2.extended_engine_sel =
+				extended_engine_sel__mes_map_queues__sdma0_to_7_sel;
+			packet->bitfields2.engine_sel = q->properties.sdma_engine_id;
+		}
+		break;
+	default:
+		WARN(1, "queue type %d", q->properties.type);
+		return -EINVAL;
+	}
+	packet->bitfields3.doorbell_offset =
+			q->properties.doorbell_off;
+
+	packet->mqd_addr_lo =
+			lower_32_bits(q->gart_mqd_addr);
+
+	packet->mqd_addr_hi =
+			upper_32_bits(q->gart_mqd_addr);
+
+	packet->wptr_addr_lo =
+			lower_32_bits((uint64_t)q->properties.write_ptr);
+
+	packet->wptr_addr_hi =
+			upper_32_bits((uint64_t)q->properties.write_ptr);
+
+	return 0;
+}
+
+static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+			enum kfd_queue_type type,
+			enum kfd_unmap_queues_filter filter,
+			uint32_t filter_param, bool reset,
+			unsigned int sdma_engine)
+{
+	struct pm4_mes_unmap_queues *packet;
+
+	packet = (struct pm4_mes_unmap_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+					sizeof(struct pm4_mes_unmap_queues));
+	switch (type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.extended_engine_sel =
+			extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
+		packet->bitfields2.engine_sel =
+			engine_sel__mes_unmap_queues__compute;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
+		if (sdma_engine < 2) {
+			packet->bitfields2.extended_engine_sel =
+				extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
+			packet->bitfields2.engine_sel =
+				engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+		} else {
+			packet->bitfields2.extended_engine_sel =
+				extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel;
+			packet->bitfields2.engine_sel = sdma_engine;
+		}
+		break;
+	default:
+		WARN(1, "queue type %d", type);
+		return -EINVAL;
+	}
+
+	if (reset)
+		packet->bitfields2.action =
+			action__mes_unmap_queues__reset_queues;
+	else
+		packet->bitfields2.action =
+			action__mes_unmap_queues__preempt_queues;
+
+	switch (filter) {
+	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
+		packet->bitfields2.num_queues = 1;
+		packet->bitfields3b.doorbell_offset0 = filter_param;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+		packet->bitfields3a.pasid = filter_param;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_queues;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+		/* in this case, we do not preempt static queues */
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+		break;
+	default:
+		WARN(1, "filter %d", filter);
+		return -EINVAL;
+	}
+
+	return 0;
+
+}
+
+static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t fence_address,	uint32_t fence_value)
+{
+	struct pm4_mes_query_status *packet;
+
+	packet = (struct pm4_mes_query_status *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+
+	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+					sizeof(struct pm4_mes_query_status));
+
+	packet->bitfields2.context_id = 0;
+	packet->bitfields2.interrupt_sel =
+			interrupt_sel__mes_query_status__completion_status;
+	packet->bitfields2.command =
+			command__mes_query_status__fence_only_after_write_ack;
+
+	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+	packet->data_hi = upper_32_bits((uint64_t)fence_value);
+	packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+	return 0;
+}
+
+const struct packet_manager_funcs kfd_v9_pm_funcs = {
+	.map_process		= pm_map_process_v9,
+	.runlist		= pm_runlist_v9,
+	.set_resources		= pm_set_resources_v9,
+	.map_queues		= pm_map_queues_v9,
+	.unmap_queues		= pm_unmap_queues_v9,
+	.query_status		= pm_query_status_v9,
+	.release_mem		= NULL,
+	.map_process_size	= sizeof(struct pm4_mes_map_process),
+	.runlist_size		= sizeof(struct pm4_mes_runlist),
+	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
+	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
+	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
+	.query_status_size	= sizeof(struct pm4_mes_query_status),
+	.release_mem_size	= 0,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
new file mode 100644
index 000000000000..bed4d0ccb6b1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_vi.h"
+#include "kfd_pm4_opcodes.h"
+
+unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
+{
+	union PM4_MES_TYPE_3_HEADER header;
+
+	header.u32All = 0;
+	header.opcode = opcode;
+	header.count = packet_size / 4 - 2;
+	header.type = PM4_TYPE_3;
+
+	return header.u32All;
+}
+
+static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
+				struct qcm_process_device *qpd)
+{
+	struct pm4_mes_map_process *packet;
+
+	packet = (struct pm4_mes_map_process *)buffer;
+
+	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+					sizeof(struct pm4_mes_map_process));
+	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+	packet->bitfields2.process_quantum = 1;
+	packet->bitfields2.pasid = qpd->pqm->process->pasid;
+	packet->bitfields3.page_table_base = qpd->page_table_base;
+	packet->bitfields10.gds_size = qpd->gds_size;
+	packet->bitfields10.num_gws = qpd->num_gws;
+	packet->bitfields10.num_oac = qpd->num_oac;
+	packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+	packet->sh_mem_config = qpd->sh_mem_config;
+	packet->sh_mem_bases = qpd->sh_mem_bases;
+	packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
+	packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
+
+	packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
+
+	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+	return 0;
+}
+
+static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+	struct pm4_mes_runlist *packet;
+	int concurrent_proc_cnt = 0;
+	struct kfd_dev *kfd = pm->dqm->dev;
+
+	if (WARN_ON(!ib))
+		return -EFAULT;
+
+	/* Determine the number of processes to map together to HW:
+	 * it can not exceed the number of VMIDs available to the
+	 * scheduler, and it is determined by the smaller of the number
+	 * of processes in the runlist and kfd module parameter
+	 * hws_max_conc_proc.
+	 * Note: the arbitration between the number of VMIDs and
+	 * hws_max_conc_proc has been done in
+	 * kgd2kfd_device_init().
+	 */
+	concurrent_proc_cnt = min(pm->dqm->processes_count,
+			kfd->max_proc_per_quantum);
+
+	packet = (struct pm4_mes_runlist *)buffer;
+
+	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+						sizeof(struct pm4_mes_runlist));
+
+	packet->bitfields4.ib_size = ib_size_in_dwords;
+	packet->bitfields4.chain = chain ? 1 : 0;
+	packet->bitfields4.offload_polling = 0;
+	packet->bitfields4.valid = 1;
+	packet->bitfields4.process_cnt = concurrent_proc_cnt;
+	packet->ordinal2 = lower_32_bits(ib);
+	packet->bitfields3.ib_base_hi = upper_32_bits(ib);
+
+	return 0;
+}
+
+int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+				struct scheduling_resources *res)
+{
+	struct pm4_mes_set_resources *packet;
+
+	packet = (struct pm4_mes_set_resources *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
+
+	packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
+					sizeof(struct pm4_mes_set_resources));
+
+	packet->bitfields2.queue_type =
+			queue_type__mes_set_resources__hsa_interface_queue_hiq;
+	packet->bitfields2.vmid_mask = res->vmid_mask;
+	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+	packet->bitfields7.oac_mask = res->oac_mask;
+	packet->bitfields8.gds_heap_base = res->gds_heap_base;
+	packet->bitfields8.gds_heap_size = res->gds_heap_size;
+
+	packet->gws_mask_lo = lower_32_bits(res->gws_mask);
+	packet->gws_mask_hi = upper_32_bits(res->gws_mask);
+
+	packet->queue_mask_lo = lower_32_bits(res->queue_mask);
+	packet->queue_mask_hi = upper_32_bits(res->queue_mask);
+
+	return 0;
+}
+
+static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+		struct queue *q, bool is_static)
+{
+	struct pm4_mes_map_queues *packet;
+	bool use_static = is_static;
+
+	packet = (struct pm4_mes_map_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+					sizeof(struct pm4_mes_map_queues));
+	packet->bitfields2.num_queues = 1;
+	packet->bitfields2.queue_sel =
+		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+	packet->bitfields2.engine_sel =
+		engine_sel__mes_map_queues__compute_vi;
+	packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_compute_vi;
+
+	switch (q->properties.type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+		if (use_static)
+			packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_latency_static_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.queue_type =
+			queue_type__mes_map_queues__debug_interface_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
+		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+				engine_sel__mes_map_queues__sdma0_vi;
+		use_static = false; /* no static queues under SDMA */
+		break;
+	default:
+		WARN(1, "queue type %d", q->properties.type);
+		return -EINVAL;
+	}
+	packet->bitfields3.doorbell_offset =
+			q->properties.doorbell_off;
+
+	packet->mqd_addr_lo =
+			lower_32_bits(q->gart_mqd_addr);
+
+	packet->mqd_addr_hi =
+			upper_32_bits(q->gart_mqd_addr);
+
+	packet->wptr_addr_lo =
+			lower_32_bits((uint64_t)q->properties.write_ptr);
+
+	packet->wptr_addr_hi =
+			upper_32_bits((uint64_t)q->properties.write_ptr);
+
+	return 0;
+}
+
+static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+			enum kfd_queue_type type,
+			enum kfd_unmap_queues_filter filter,
+			uint32_t filter_param, bool reset,
+			unsigned int sdma_engine)
+{
+	struct pm4_mes_unmap_queues *packet;
+
+	packet = (struct pm4_mes_unmap_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+					sizeof(struct pm4_mes_unmap_queues));
+	switch (type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.engine_sel =
+			engine_sel__mes_unmap_queues__compute;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
+		packet->bitfields2.engine_sel =
+			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+		break;
+	default:
+		WARN(1, "queue type %d", type);
+		return -EINVAL;
+	}
+
+	if (reset)
+		packet->bitfields2.action =
+			action__mes_unmap_queues__reset_queues;
+	else
+		packet->bitfields2.action =
+			action__mes_unmap_queues__preempt_queues;
+
+	switch (filter) {
+	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
+		packet->bitfields2.num_queues = 1;
+		packet->bitfields3b.doorbell_offset0 = filter_param;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+		packet->bitfields3a.pasid = filter_param;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_queues;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+		/* in this case, we do not preempt static queues */
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+		break;
+	default:
+		WARN(1, "filter %d", filter);
+		return -EINVAL;
+	}
+
+	return 0;
+
+}
+
+static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t fence_address,	uint32_t fence_value)
+{
+	struct pm4_mes_query_status *packet;
+
+	packet = (struct pm4_mes_query_status *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+					sizeof(struct pm4_mes_query_status));
+
+	packet->bitfields2.context_id = 0;
+	packet->bitfields2.interrupt_sel =
+			interrupt_sel__mes_query_status__completion_status;
+	packet->bitfields2.command =
+			command__mes_query_status__fence_only_after_write_ack;
+
+	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+	packet->data_hi = upper_32_bits((uint64_t)fence_value);
+	packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+	return 0;
+}
+
+static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer)
+{
+	struct pm4_mec_release_mem *packet;
+
+	packet = (struct pm4_mec_release_mem *)buffer;
+	memset(buffer, 0, sizeof(*packet));
+
+	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
+						 sizeof(*packet));
+
+	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+	packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
+	packet->bitfields2.tcl1_action_ena = 1;
+	packet->bitfields2.tc_action_ena = 1;
+	packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+	packet->bitfields2.atc = 0;
+
+	packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
+	packet->bitfields3.int_sel =
+		int_sel___release_mem__send_interrupt_after_write_confirm;
+
+	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+	packet->address_hi = upper_32_bits(gpu_addr);
+
+	packet->data_lo = 0;
+
+	return 0;
+}
+
+const struct packet_manager_funcs kfd_vi_pm_funcs = {
+	.map_process		= pm_map_process_vi,
+	.runlist		= pm_runlist_vi,
+	.set_resources		= pm_set_resources_vi,
+	.map_queues		= pm_map_queues_vi,
+	.unmap_queues		= pm_unmap_queues_vi,
+	.query_status		= pm_query_status_vi,
+	.release_mem		= pm_release_mem_vi,
+	.map_process_size	= sizeof(struct pm4_mes_map_process),
+	.runlist_size		= sizeof(struct pm4_mes_runlist),
+	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
+	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
+	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
+	.query_status_size	= sizeof(struct pm4_mes_query_status),
+	.release_mem_size	= sizeof(struct pm4_mec_release_mem)
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index c89326125d71..6af1b5881f43 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -36,6 +36,10 @@
 #include <linux/seq_file.h>
 #include <linux/kref.h>
 #include <linux/sysfs.h>
+#include <linux/device_cgroup.h>
+#include <drm/drm_file.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_device.h>
 #include <kgd_kfd_interface.h>
 
 #include "amd_shared.h"
@@ -55,24 +59,21 @@
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define KFD_MMAP_TYPE_SHIFT	(62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_SHIFT	62
 #define KFD_MMAP_TYPE_MASK	(0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_DOORBELL	(0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_EVENTS	(0x2ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_RESERVED_MEM	(0x1ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_MMIO	(0x0ULL << KFD_MMAP_TYPE_SHIFT)
 
-#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_SHIFT 46
 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
 				<< KFD_MMAP_GPU_ID_SHIFT)
 #define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
 				& KFD_MMAP_GPU_ID_MASK)
-#define KFD_MMAP_GPU_ID_GET(offset)    ((offset & KFD_MMAP_GPU_ID_MASK) \
+#define KFD_MMAP_GET_GPU_ID(offset)    ((offset & KFD_MMAP_GPU_ID_MASK) \
 				>> KFD_MMAP_GPU_ID_SHIFT)
 
-#define KFD_MMAP_OFFSET_VALUE_MASK	(0x3FFFFFFFFFFFULL >> PAGE_SHIFT)
-#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
-
 /*
  * When working with cp scheduler we should assign the HIQ manually or via
  * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
@@ -179,10 +180,6 @@ enum cache_policy {
 	cache_policy_noncoherent
 };
 
-#define KFD_IS_VI(chip) ((chip) >= CHIP_CARRIZO && (chip) <= CHIP_POLARIS11)
-#define KFD_IS_DGPU(chip) (((chip) >= CHIP_TONGA && \
-			   (chip) <= CHIP_NAVI10) || \
-			   (chip) == CHIP_HAWAII)
 #define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
 
 struct kfd_event_interrupt_class {
@@ -230,6 +227,7 @@ struct kfd_dev {
 
 	const struct kfd_device_info *device_info;
 	struct pci_dev *pdev;
+	struct drm_device *ddev;
 
 	unsigned int id;		/* topology stub index */
 
@@ -237,9 +235,10 @@ struct kfd_dev {
 					 * KFD. It is aligned for mapping
 					 * into user mode
 					 */
-	size_t doorbell_id_offset;	/* Doorbell offset (from KFD doorbell
-					 * to HW doorbell, GFX reserved some
-					 * at the start)
+	size_t doorbell_base_dw_offset;	/* Offset from the start of the PCI
+					 * doorbell BAR to the first KFD
+					 * doorbell in dwords. GFX reserves
+					 * the segment before this offset.
 					 */
 	u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
 					   * page used by kernel queue
@@ -509,8 +508,7 @@ struct queue {
  * Please read the kfd_mqd_manager.h description.
  */
 enum KFD_MQD_TYPE {
-	KFD_MQD_TYPE_COMPUTE = 0,	/* for no cp scheduling */
-	KFD_MQD_TYPE_HIQ,		/* for hiq */
+	KFD_MQD_TYPE_HIQ = 0,		/* for hiq */
 	KFD_MQD_TYPE_CP,		/* for cp queues and diq */
 	KFD_MQD_TYPE_SDMA,		/* for sdma queues */
 	KFD_MQD_TYPE_DIQ,		/* for diq */
@@ -687,7 +685,7 @@ struct kfd_process {
 	/* We want to receive a notification when the mm_struct is destroyed */
 	struct mmu_notifier mmu_notifier;
 
-	unsigned int pasid;
+	uint16_t pasid;
 	unsigned int doorbell_index;
 
 	/*
@@ -817,7 +815,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
 u32 read_kernel_doorbell(u32 __iomem *db);
 void write_kernel_doorbell(void __iomem *db, u32 value);
 void write_kernel_doorbell64(void __iomem *db, u64 value);
-unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
+unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
 					struct kfd_process *process,
 					unsigned int doorbell_id);
 phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
@@ -885,7 +883,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
 void device_queue_manager_uninit(struct device_queue_manager *dqm);
 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 					enum kfd_queue_type type);
-void kernel_queue_uninit(struct kernel_queue *kq);
+void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
 int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid);
 
 /* Process Queue Manager */
@@ -903,7 +901,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 			    struct kfd_dev *dev,
 			    struct file *f,
 			    struct queue_properties *properties,
-			    unsigned int *qid);
+			    unsigned int *qid,
+			    uint32_t *p_doorbell_offset_in_process);
 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
 			struct queue_properties *p);
@@ -971,10 +970,9 @@ struct packet_manager_funcs {
 
 extern const struct packet_manager_funcs kfd_vi_pm_funcs;
 extern const struct packet_manager_funcs kfd_v9_pm_funcs;
-extern const struct packet_manager_funcs kfd_v10_pm_funcs;
 
 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
-void pm_uninit(struct packet_manager *pm);
+void pm_uninit(struct packet_manager *pm, bool hanging);
 int pm_send_set_resources(struct packet_manager *pm,
 				struct scheduling_resources *res);
 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
@@ -990,9 +988,6 @@ void pm_release_ib(struct packet_manager *pm);
 
 /* Following PM funcs can be shared among VI and AI */
 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
-int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
-			struct scheduling_resources *res);
-
 
 uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
 
@@ -1040,6 +1035,21 @@ bool kfd_is_locked(void);
 void kfd_inc_compute_active(struct kfd_dev *dev);
 void kfd_dec_compute_active(struct kfd_dev *dev);
 
+/* Cgroup Support */
+/* Check with device cgroup if @kfd device is accessible */
+static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)
+{
+#if defined(CONFIG_CGROUP_DEVICE)
+	struct drm_device *ddev = kfd->ddev;
+
+	return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major,
+					  ddev->render->index,
+					  DEVCG_ACC_WRITE | DEVCG_ACC_READ);
+#else
+	return 0;
+#endif
+}
+
 /* Debugfs */
 #if defined(CONFIG_DEBUG_FS)
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 40e3fc0c6942..25b90f70aecd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -32,6 +32,7 @@
 #include <linux/mman.h>
 #include <linux/file.h>
 #include "amdgpu_amdkfd.h"
+#include "amdgpu.h"
 
 struct mm_struct;
 
@@ -324,6 +325,8 @@ struct kfd_process *kfd_create_process(struct file *filep)
 					(int)process->lead_thread->pid);
 	}
 out:
+	if (!IS_ERR(process))
+		kref_get(&process->ref);
 	mutex_unlock(&kfd_processes_mutex);
 
 	return process;
@@ -416,7 +419,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 
 	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
 				 per_device_list) {
-		pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
+		pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
 				pdd->dev->id, p->pasid);
 
 		if (pdd->drm_file) {
@@ -560,8 +563,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
 		if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
 			continue;
 
-		offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
-			<< PAGE_SHIFT;
+		offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
 		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
 			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
 			MAP_SHARED, offset);
@@ -687,6 +689,8 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
 			struct kfd_dev *dev)
 {
 	unsigned int i;
+	int range_start = dev->shared_resources.non_cp_doorbells_start;
+	int range_end = dev->shared_resources.non_cp_doorbells_end;
 
 	if (!KFD_IS_SOC15(dev->device_info->asic_family))
 		return 0;
@@ -698,14 +702,16 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
 		return -ENOMEM;
 
 	/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
+	pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
+	pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
+			range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+			range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
+
 	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
-		if (i >= dev->shared_resources.non_cp_doorbells_start
-			&& i <= dev->shared_resources.non_cp_doorbells_end) {
+		if (i >= range_start && i <= range_end) {
 			set_bit(i, qpd->doorbell_bitmap);
 			set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
 				qpd->doorbell_bitmap);
-			pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i,
-				i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
 		}
 	}
 
@@ -1020,7 +1026,7 @@ static void evict_process_worker(struct work_struct *work)
 	 */
 	flush_delayed_work(&p->restore_work);
 
-	pr_debug("Started evicting pasid %d\n", p->pasid);
+	pr_debug("Started evicting pasid 0x%x\n", p->pasid);
 	ret = kfd_process_evict_queues(p);
 	if (!ret) {
 		dma_fence_signal(p->ef);
@@ -1029,9 +1035,9 @@ static void evict_process_worker(struct work_struct *work)
 		queue_delayed_work(kfd_restore_wq, &p->restore_work,
 				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
 
-		pr_debug("Finished evicting pasid %d\n", p->pasid);
+		pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
 	} else
-		pr_err("Failed to evict queues of pasid %d\n", p->pasid);
+		pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
 }
 
 static void restore_process_worker(struct work_struct *work)
@@ -1046,7 +1052,7 @@ static void restore_process_worker(struct work_struct *work)
 	 * lifetime of this thread, kfd_process p will be valid
 	 */
 	p = container_of(dwork, struct kfd_process, restore_work);
-	pr_debug("Started restoring pasid %d\n", p->pasid);
+	pr_debug("Started restoring pasid 0x%x\n", p->pasid);
 
 	/* Setting last_restore_timestamp before successful restoration.
 	 * Otherwise this would have to be set by KGD (restore_process_bos)
@@ -1062,7 +1068,7 @@ static void restore_process_worker(struct work_struct *work)
 	ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
 						     &p->ef);
 	if (ret) {
-		pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
+		pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
 			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
 		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
 				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
@@ -1072,9 +1078,9 @@ static void restore_process_worker(struct work_struct *work)
 
 	ret = kfd_process_restore_queues(p);
 	if (!ret)
-		pr_debug("Finished restoring pasid %d\n", p->pasid);
+		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
 	else
-		pr_err("Failed to restore queues of pasid %d\n", p->pasid);
+		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
 }
 
 void kfd_suspend_all_processes(void)
@@ -1088,7 +1094,7 @@ void kfd_suspend_all_processes(void)
 		cancel_delayed_work_sync(&p->restore_work);
 
 		if (kfd_process_evict_queues(p))
-			pr_err("Failed to suspend process %d\n", p->pasid);
+			pr_err("Failed to suspend process 0x%x\n", p->pasid);
 		dma_fence_signal(p->ef);
 		dma_fence_put(p->ef);
 		p->ef = NULL;
@@ -1147,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 void kfd_flush_tlb(struct kfd_process_device *pdd)
 {
 	struct kfd_dev *dev = pdd->dev;
-	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
 
 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
 		/* Nothing to flush until a VMID is assigned, which
 		 * only happens when the first queue is created.
 		 */
 		if (pdd->qpd.vmid)
-			f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
+			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
+							pdd->qpd.vmid);
 	} else {
-		f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
+		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
+						pdd->process->pasid);
 	}
 }
 
@@ -1171,7 +1178,7 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
 	int idx = srcu_read_lock(&kfd_processes_srcu);
 
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-		seq_printf(m, "Process %d PASID %d:\n",
+		seq_printf(m, "Process %d PASID 0x%x:\n",
 			   p->lead_thread->tgid, p->pasid);
 
 		mutex_lock(&p->mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 7e6c3ee82f5b..31fcd1b51f00 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -53,7 +53,7 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
 	pr_debug("The new slot id %lu\n", found);
 
 	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
-		pr_info("Cannot open more queues for process with pasid %d\n",
+		pr_info("Cannot open more queues for process with pasid 0x%x\n",
 				pqm->process->pasid);
 		return -ENOMEM;
 	}
@@ -162,7 +162,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
 	pqm->queue_slot_bitmap = NULL;
 }
 
-static int create_cp_queue(struct process_queue_manager *pqm,
+static int init_user_queue(struct process_queue_manager *pqm,
 				struct kfd_dev *dev, struct queue **q,
 				struct queue_properties *q_properties,
 				struct file *f, unsigned int qid)
@@ -192,7 +192,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 			    struct kfd_dev *dev,
 			    struct file *f,
 			    struct queue_properties *properties,
-			    unsigned int *qid)
+			    unsigned int *qid,
+			    uint32_t *p_doorbell_offset_in_process)
 {
 	int retval;
 	struct kfd_process_device *pdd;
@@ -250,7 +251,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 			goto err_create_queue;
 		}
 
-		retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
+		retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
 		if (retval != 0)
 			goto err_create_queue;
 		pqn->q = q;
@@ -271,7 +272,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 			goto err_create_queue;
 		}
 
-		retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
+		retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
 		if (retval != 0)
 			goto err_create_queue;
 		pqn->q = q;
@@ -298,17 +299,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	}
 
 	if (retval != 0) {
-		pr_err("Pasid %d DQM create queue %d failed. ret %d\n",
+		pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n",
 			pqm->process->pasid, type, retval);
 		goto err_create_queue;
 	}
 
-	if (q)
+	if (q && p_doorbell_offset_in_process)
 		/* Return the doorbell offset within the doorbell page
 		 * to the caller so it can be passed up to user mode
 		 * (in bytes).
+		 * There are always 1024 doorbells per process, so in case
+		 * of 8-byte doorbells, there are two doorbell pages per
+		 * process.
 		 */
-		properties->doorbell_off =
+		*p_doorbell_offset_in_process =
 			(q->properties.doorbell_off * sizeof(uint32_t)) &
 			(kfd_doorbell_process_slice(dev) - 1);
 
@@ -370,14 +374,14 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 		/* destroy kernel queue (DIQ) */
 		dqm = pqn->kq->dev->dqm;
 		dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
-		kernel_queue_uninit(pqn->kq);
+		kernel_queue_uninit(pqn->kq, false);
 	}
 
 	if (pqn->q) {
 		dqm = pqn->q->device->dqm;
 		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
 		if (retval) {
-			pr_err("Pasid %d destroy queue %d failed, ret %d\n",
+			pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
 				pqm->process->pasid,
 				pqn->q->properties.queue_id, retval);
 			if (retval != -ETIME)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 7551761f2aa9..203c823d65f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -269,6 +269,8 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
 	buffer[0] = 0;
 
 	iolink = container_of(attr, struct kfd_iolink_properties, attr);
+	if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu))
+		return -EPERM;
 	sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
 	sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
 	sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
@@ -305,6 +307,8 @@ static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
 	buffer[0] = 0;
 
 	mem = container_of(attr, struct kfd_mem_properties, attr);
+	if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu))
+		return -EPERM;
 	sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
 	sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
 	sysfs_show_32bit_prop(buffer, "flags", mem->flags);
@@ -334,6 +338,8 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
 	buffer[0] = 0;
 
 	cache = container_of(attr, struct kfd_cache_properties, attr);
+	if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
+		return -EPERM;
 	sysfs_show_32bit_prop(buffer, "processor_id_low",
 			cache->processor_id_low);
 	sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
@@ -414,6 +420,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 	if (strcmp(attr->name, "gpu_id") == 0) {
 		dev = container_of(attr, struct kfd_topology_device,
 				attr_gpuid);
+		if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+			return -EPERM;
 		return sysfs_show_32bit_val(buffer, dev->gpu_id);
 	}
 
@@ -421,11 +429,15 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 		dev = container_of(attr, struct kfd_topology_device,
 				attr_name);
 
+		if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+			return -EPERM;
 		return sysfs_show_str_val(buffer, dev->node_props.name);
 	}
 
 	dev = container_of(attr, struct kfd_topology_device,
 			attr_props);
+	if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+		return -EPERM;
 	sysfs_show_32bit_prop(buffer, "cpu_cores_count",
 			dev->node_props.cpu_cores_count);
 	sysfs_show_32bit_prop(buffer, "simd_count",
@@ -474,6 +486,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.num_sdma_engines);
 	sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines",
 			dev->node_props.num_sdma_xgmi_engines);
+	sysfs_show_32bit_prop(buffer, "num_sdma_queues_per_engine",
+			dev->node_props.num_sdma_queues_per_engine);
+	sysfs_show_32bit_prop(buffer, "num_cp_queues",
+			dev->node_props.num_cp_queues);
 
 	if (dev->gpu) {
 		log_max_watch_addr =
@@ -1098,6 +1114,9 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
 {
 	struct kfd_topology_device *dev;
 	struct kfd_topology_device *out_dev = NULL;
+	struct kfd_mem_properties *mem;
+	struct kfd_cache_properties *cache;
+	struct kfd_iolink_properties *iolink;
 
 	down_write(&topology_lock);
 	list_for_each_entry(dev, &topology_device_list, list) {
@@ -1111,6 +1130,13 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
 		if (!dev->gpu && (dev->node_props.simd_count > 0)) {
 			dev->gpu = gpu;
 			out_dev = dev;
+
+			list_for_each_entry(mem, &dev->mem_props, list)
+				mem->gpu = dev->gpu;
+			list_for_each_entry(cache, &dev->cache_props, list)
+				cache->gpu = dev->gpu;
+			list_for_each_entry(iolink, &dev->io_link_props, list)
+				iolink->gpu = dev->gpu;
 			break;
 		}
 	}
@@ -1287,9 +1313,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 	dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
 	dev->node_props.num_sdma_xgmi_engines =
 				gpu->device_info->num_xgmi_sdma_engines;
+	dev->node_props.num_sdma_queues_per_engine =
+				gpu->device_info->num_sdma_queues_per_engine;
 	dev->node_props.num_gws = (hws_gws_support &&
 		dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
 		amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
+	dev->node_props.num_cp_queues = get_queues_num(dev->gpu->dqm);
 
 	kfd_fill_mem_clk_max_info(dev);
 	kfd_fill_iolink_non_crat_info(dev);
@@ -1317,8 +1346,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
 	case CHIP_RAVEN:
+	case CHIP_RENOIR:
 	case CHIP_ARCTURUS:
 	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_NAVI14:
 		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index d4718d58d0f2..74e9b1682af8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -81,6 +81,8 @@ struct kfd_node_properties {
 	int32_t  drm_render_minor;
 	uint32_t num_sdma_engines;
 	uint32_t num_sdma_xgmi_engines;
+	uint32_t num_sdma_queues_per_engine;
+	uint32_t num_cp_queues;
 	char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
 };
 
@@ -102,6 +104,7 @@ struct kfd_mem_properties {
 	uint32_t		flags;
 	uint32_t		width;
 	uint32_t		mem_clk_max;
+	struct kfd_dev		*gpu;
 	struct kobject		*kobj;
 	struct attribute	attr;
 };
@@ -123,6 +126,7 @@ struct kfd_cache_properties {
 	uint32_t		cache_latency;
 	uint32_t		cache_type;
 	uint8_t			sibling_map[CRAT_SIBLINGMAP_SIZE];
+	struct kfd_dev		*gpu;
 	struct kobject		*kobj;
 	struct attribute	attr;
 };
@@ -141,6 +145,7 @@ struct kfd_iolink_properties {
 	uint32_t		max_bandwidth;
 	uint32_t		rec_transfer_size;
 	uint32_t		flags;
+	struct kfd_dev		*gpu;
 	struct kobject		*kobj;
 	struct attribute	attr;
 };
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 71991a28a775..87858bc57e64 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: MIT
 menu "Display Engine Configuration"
 	depends on DRM && DRM_AMDGPU
 
@@ -6,43 +6,24 @@ config DRM_AMD_DC
 	bool "AMD DC - Enable new display engine"
 	default y
 	select SND_HDA_COMPONENT if SND_HDA_CORE
-	select DRM_AMD_DC_DCN1_0 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
+	select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
 	help
 	  Choose this option if you want to use the new display engine
 	  support for AMDGPU. This adds required support for Vega and
 	  Raven ASICs.
 
-config DRM_AMD_DC_DCN1_0
+config DRM_AMD_DC_DCN
 	def_bool n
 	help
-	  RV family support for display engine
+	  Raven, Navi and Renoir family support for display engine
 
-config DRM_AMD_DC_DCN2_0
-	bool "DCN 2.0 family"
-	default y
-	depends on DRM_AMD_DC && X86
-	depends on DRM_AMD_DC_DCN1_0
-	help
-	    Choose this option if you want to have
-	    Navi support for display engine
-
-config DRM_AMD_DC_DCN2_1
-        bool "DCN 2.1 family"
-        depends on DRM_AMD_DC && X86
-        depends on DRM_AMD_DC_DCN2_0
-        help
-            Choose this option if you want to have
-            Renoir support for display engine
-
-config DRM_AMD_DC_DSC_SUPPORT
-	bool "DSC support"
-	default y
-	depends on DRM_AMD_DC && X86
-	depends on DRM_AMD_DC_DCN1_0
-	depends on DRM_AMD_DC_DCN2_0
+config DRM_AMD_DC_HDCP
+	bool "Enable HDCP support in DC"
+	depends on DRM_AMD_DC
 	help
-	    Choose this option if you want to have
-	    Dynamic Stream Compression support
+	 Choose this option
+	 if you want to support
+	 HDCP authentication
 
 config DEBUG_KERNEL_DC
 	bool "Enable kgdb break in DC"
diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile
index 496cee000f10..2633de77de5e 100644
--- a/drivers/gpu/drm/amd/display/Makefile
+++ b/drivers/gpu/drm/amd/display/Makefile
@@ -34,11 +34,20 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/info_packet
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/power
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dmub/inc
+
+ifdef CONFIG_DRM_AMD_DC_HDCP
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/hdcp
+endif
 
 #TODO: remove when Timing Sync feature is complete
 subdir-ccflags-y += -DBUILD_FEATURE_TIMING_SYNC=0
 
-DAL_LIBS = amdgpu_dm dc	modules/freesync modules/color modules/info_packet modules/power
+DAL_LIBS = amdgpu_dm dc	modules/freesync modules/color modules/info_packet modules/power dmub/src
+
+ifdef CONFIG_DRM_AMD_DC_HDCP
+DAL_LIBS += modules/hdcp
+endif
 
 AMD_DAL = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/,$(DAL_LIBS)))
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index 94911871eb9b..9a3b7bf8ab0b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -31,6 +31,10 @@ ifneq ($(CONFIG_DRM_AMD_DC),)
 AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o
 endif
 
+ifdef CONFIG_DRM_AMD_DC_HDCP
+AMDGPUDM += amdgpu_dm_hdcp.o
+endif
+
 ifneq ($(CONFIG_DEBUG_FS),)
 AMDGPUDM += amdgpu_dm_crc.o amdgpu_dm_debugfs.o
 endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 4139f129eafb..9402374d2466 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -30,6 +30,10 @@
 #include "dc.h"
 #include "dc/inc/core_types.h"
 #include "dal_asic_id.h"
+#include "dmub/inc/dmub_srv.h"
+#include "dc/inc/hw/dmcu.h"
+#include "dc/inc/hw/abm.h"
+#include "dc/dc_dmub_srv.h"
 
 #include "vid.h"
 #include "amdgpu.h"
@@ -37,6 +41,10 @@
 #include "amdgpu_ucode.h"
 #include "atom.h"
 #include "amdgpu_dm.h"
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+#include "amdgpu_dm_hdcp.h"
+#include <drm/drm_hdcp.h>
+#endif
 #include "amdgpu_pm.h"
 
 #include "amd_shared.h"
@@ -67,8 +75,9 @@
 #include <drm/drm_edid.h>
 #include <drm/drm_vblank.h>
 #include <drm/drm_audio_component.h>
+#include <drm/drm_hdcp.h>
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
 
 #include "dcn/dcn_1_0_offset.h"
@@ -83,9 +92,18 @@
 #include "modules/power/power_helpers.h"
 #include "modules/inc/mod_info_packet.h"
 
+#define FIRMWARE_RENOIR_DMUB "amdgpu/renoir_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_RENOIR_DMUB);
+
 #define FIRMWARE_RAVEN_DMCU		"amdgpu/raven_dmcu.bin"
 MODULE_FIRMWARE(FIRMWARE_RAVEN_DMCU);
 
+/* Number of bytes in PSP header for firmware. */
+#define PSP_HEADER_BYTES 0x100
+
+/* Number of bytes in PSP footer for firmware. */
+#define PSP_FOOTER_BYTES 0x100
+
 /**
  * DOC: overview
  *
@@ -143,6 +161,12 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 static void handle_cursor_update(struct drm_plane *plane,
 				 struct drm_plane_state *old_plane_state);
 
+static void amdgpu_dm_set_psr_caps(struct dc_link *link);
+static bool amdgpu_dm_psr_enable(struct dc_stream_state *stream);
+static bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream);
+static bool amdgpu_dm_psr_disable(struct dc_stream_state *stream);
+
+
 /*
  * dm_vblank_get_counter
  *
@@ -263,6 +287,13 @@ static inline bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state)
 	       dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED;
 }
 
+/**
+ * dm_pflip_high_irq() - Handle pageflip interrupt
+ * @interrupt_params: ignored
+ *
+ * Handles the pageflip interrupt by notifying all interested parties
+ * that the pageflip has been completed.
+ */
 static void dm_pflip_high_irq(void *interrupt_params)
 {
 	struct amdgpu_crtc *amdgpu_crtc;
@@ -407,6 +438,13 @@ static void dm_vupdate_high_irq(void *interrupt_params)
 	}
 }
 
+/**
+ * dm_crtc_high_irq() - Handles CRTC interrupt
+ * @interrupt_params: ignored
+ *
+ * Handles the CRTC/VSYNC interrupt by notfying DRM's VBLANK
+ * event handler.
+ */
 static void dm_crtc_high_irq(void *interrupt_params)
 {
 	struct common_irq_params *irq_params = interrupt_params;
@@ -454,6 +492,70 @@ static void dm_crtc_high_irq(void *interrupt_params)
 	}
 }
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+/**
+ * dm_dcn_crtc_high_irq() - Handles VStartup interrupt for DCN generation ASICs
+ * @interrupt params - interrupt parameters
+ *
+ * Notify DRM's vblank event handler at VSTARTUP
+ *
+ * Unlike DCE hardware, we trigger the handler at VSTARTUP. at which:
+ * * We are close enough to VUPDATE - the point of no return for hw
+ * * We are in the fixed portion of variable front porch when vrr is enabled
+ * * We are before VUPDATE, where double-buffered vrr registers are swapped
+ *
+ * It is therefore the correct place to signal vblank, send user flip events,
+ * and update VRR.
+ */
+static void dm_dcn_crtc_high_irq(void *interrupt_params)
+{
+	struct common_irq_params *irq_params = interrupt_params;
+	struct amdgpu_device *adev = irq_params->adev;
+	struct amdgpu_crtc *acrtc;
+	struct dm_crtc_state *acrtc_state;
+	unsigned long flags;
+
+	acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK);
+
+	if (!acrtc)
+		return;
+
+	acrtc_state = to_dm_crtc_state(acrtc->base.state);
+
+	DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id,
+				amdgpu_dm_vrr_active(acrtc_state));
+
+	amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
+	drm_crtc_handle_vblank(&acrtc->base);
+
+	spin_lock_irqsave(&adev->ddev->event_lock, flags);
+
+	if (acrtc_state->vrr_params.supported &&
+	    acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) {
+		mod_freesync_handle_v_update(
+		adev->dm.freesync_module,
+		acrtc_state->stream,
+		&acrtc_state->vrr_params);
+
+		dc_stream_adjust_vmin_vmax(
+			adev->dm.dc,
+			acrtc_state->stream,
+			&acrtc_state->vrr_params.adjust);
+	}
+
+	if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED) {
+		if (acrtc->event) {
+			drm_crtc_send_vblank_event(&acrtc->base, acrtc->event);
+			acrtc->event = NULL;
+			drm_crtc_vblank_put(&acrtc->base);
+		}
+		acrtc->pflip_status = AMDGPU_FLIP_NONE;
+	}
+
+	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
+}
+#endif
+
 static int dm_set_clockgating_state(void *handle,
 		  enum amd_clockgating_state state)
 {
@@ -643,14 +745,135 @@ void amdgpu_dm_audio_eld_notify(struct amdgpu_device *adev, int pin)
 	}
 }
 
+static int dm_dmub_hw_init(struct amdgpu_device *adev)
+{
+	const struct dmcub_firmware_header_v1_0 *hdr;
+	struct dmub_srv *dmub_srv = adev->dm.dmub_srv;
+	struct dmub_srv_fb_info *fb_info = adev->dm.dmub_fb_info;
+	const struct firmware *dmub_fw = adev->dm.dmub_fw;
+	struct dmcu *dmcu = adev->dm.dc->res_pool->dmcu;
+	struct abm *abm = adev->dm.dc->res_pool->abm;
+	struct dmub_srv_hw_params hw_params;
+	enum dmub_status status;
+	const unsigned char *fw_inst_const, *fw_bss_data;
+	uint32_t i, fw_inst_const_size, fw_bss_data_size;
+	bool has_hw_support;
+
+	if (!dmub_srv)
+		/* DMUB isn't supported on the ASIC. */
+		return 0;
+
+	if (!fb_info) {
+		DRM_ERROR("No framebuffer info for DMUB service.\n");
+		return -EINVAL;
+	}
+
+	if (!dmub_fw) {
+		/* Firmware required for DMUB support. */
+		DRM_ERROR("No firmware provided for DMUB.\n");
+		return -EINVAL;
+	}
+
+	status = dmub_srv_has_hw_support(dmub_srv, &has_hw_support);
+	if (status != DMUB_STATUS_OK) {
+		DRM_ERROR("Error checking HW support for DMUB: %d\n", status);
+		return -EINVAL;
+	}
+
+	if (!has_hw_support) {
+		DRM_INFO("DMUB unsupported on ASIC\n");
+		return 0;
+	}
+
+	hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data;
+
+	fw_inst_const = dmub_fw->data +
+			le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
+			PSP_HEADER_BYTES;
+
+	fw_bss_data = dmub_fw->data +
+		      le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
+		      le32_to_cpu(hdr->inst_const_bytes);
+
+	/* Copy firmware and bios info into FB memory. */
+	fw_inst_const_size = le32_to_cpu(hdr->inst_const_bytes) -
+			     PSP_HEADER_BYTES - PSP_FOOTER_BYTES;
+
+	fw_bss_data_size = le32_to_cpu(hdr->bss_data_bytes);
+
+	memcpy(fb_info->fb[DMUB_WINDOW_0_INST_CONST].cpu_addr, fw_inst_const,
+	       fw_inst_const_size);
+	memcpy(fb_info->fb[DMUB_WINDOW_2_BSS_DATA].cpu_addr, fw_bss_data,
+	       fw_bss_data_size);
+	memcpy(fb_info->fb[DMUB_WINDOW_3_VBIOS].cpu_addr, adev->bios,
+	       adev->bios_size);
+
+	/* Reset regions that need to be reset. */
+	memset(fb_info->fb[DMUB_WINDOW_4_MAILBOX].cpu_addr, 0,
+	fb_info->fb[DMUB_WINDOW_4_MAILBOX].size);
+
+	memset(fb_info->fb[DMUB_WINDOW_5_TRACEBUFF].cpu_addr, 0,
+	       fb_info->fb[DMUB_WINDOW_5_TRACEBUFF].size);
+
+	memset(fb_info->fb[DMUB_WINDOW_6_FW_STATE].cpu_addr, 0,
+	       fb_info->fb[DMUB_WINDOW_6_FW_STATE].size);
+
+	/* Initialize hardware. */
+	memset(&hw_params, 0, sizeof(hw_params));
+	hw_params.fb_base = adev->gmc.fb_start;
+	hw_params.fb_offset = adev->gmc.aper_base;
+
+	if (dmcu)
+		hw_params.psp_version = dmcu->psp_version;
+
+	for (i = 0; i < fb_info->num_fb; ++i)
+		hw_params.fb[i] = &fb_info->fb[i];
+
+	status = dmub_srv_hw_init(dmub_srv, &hw_params);
+	if (status != DMUB_STATUS_OK) {
+		DRM_ERROR("Error initializing DMUB HW: %d\n", status);
+		return -EINVAL;
+	}
+
+	/* Wait for firmware load to finish. */
+	status = dmub_srv_wait_for_auto_load(dmub_srv, 100000);
+	if (status != DMUB_STATUS_OK)
+		DRM_WARN("Wait for DMUB auto-load failed: %d\n", status);
+
+	/* Init DMCU and ABM if available. */
+	if (dmcu && abm) {
+		dmcu->funcs->dmcu_init(dmcu);
+		abm->dmcu_is_running = dmcu->funcs->is_dmcu_initialized(dmcu);
+	}
+
+	adev->dm.dc->ctx->dmub_srv = dc_dmub_srv_create(adev->dm.dc, dmub_srv);
+	if (!adev->dm.dc->ctx->dmub_srv) {
+		DRM_ERROR("Couldn't allocate DC DMUB server!\n");
+		return -ENOMEM;
+	}
+
+	DRM_INFO("DMUB hardware initialized: version=0x%08X\n",
+		 adev->dm.dmcub_fw_version);
+
+	return 0;
+}
+
 static int amdgpu_dm_init(struct amdgpu_device *adev)
 {
 	struct dc_init_data init_data;
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	struct dc_callback_init init_params;
+#endif
+	int r;
+
 	adev->dm.ddev = adev->ddev;
 	adev->dm.adev = adev;
 
 	/* Zero all the fields */
 	memset(&init_data, 0, sizeof(init_data));
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	memset(&init_params, 0, sizeof(init_params));
+#endif
 
 	mutex_init(&adev->dm.dc_lock);
 	mutex_init(&adev->dm.audio_lock);
@@ -683,13 +906,16 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 
 	init_data.dce_environment = DCE_ENV_PRODUCTION_DRV;
 
-	/*
-	 * TODO debug why this doesn't work on Raven
-	 */
-	if (adev->flags & AMD_IS_APU &&
-	    adev->asic_type >= CHIP_CARRIZO &&
-	    adev->asic_type < CHIP_RAVEN)
+	switch (adev->asic_type) {
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+	case CHIP_RAVEN:
+	case CHIP_RENOIR:
 		init_data.flags.gpu_vm_support = true;
+		break;
+	default:
+		break;
+	}
 
 	if (amdgpu_dc_feature_mask & DC_FBC_MASK)
 		init_data.flags.fbc_support = true;
@@ -697,11 +923,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	if (amdgpu_dc_feature_mask & DC_MULTI_MON_PP_MCLK_SWITCH_MASK)
 		init_data.flags.multi_mon_pp_mclk_switch = true;
 
+	if (amdgpu_dc_feature_mask & DC_DISABLE_FRACTIONAL_PWM_MASK)
+		init_data.flags.disable_fractional_pwm = true;
+
 	init_data.flags.power_down_display_on_boot = true;
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	init_data.soc_bounding_box = adev->dm.soc_bounding_box;
-#endif
 
 	/* Display Core create. */
 	adev->dm.dc = dc_create(&init_data);
@@ -713,6 +940,14 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 		goto error;
 	}
 
+	dc_hardware_init(adev->dm.dc);
+
+	r = dm_dmub_hw_init(adev);
+	if (r) {
+		DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+		goto error;
+	}
+
 	adev->dm.freesync_module = mod_freesync_create(adev->dm.dc);
 	if (!adev->dm.freesync_module) {
 		DRM_ERROR(
@@ -723,6 +958,18 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 
 	amdgpu_dm_init_color_mod();
 
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	if (adev->asic_type >= CHIP_RAVEN) {
+		adev->dm.hdcp_workqueue = hdcp_create_workqueue(&adev->psp, &init_params.cp_psp, adev->dm.dc);
+
+		if (!adev->dm.hdcp_workqueue)
+			DRM_ERROR("amdgpu: failed to initialize hdcp_workqueue.\n");
+		else
+			DRM_DEBUG_DRIVER("amdgpu: hdcp_workqueue init done %p.\n", adev->dm.hdcp_workqueue);
+
+		dc_init_callbacks(adev->dm.dc, &init_params);
+	}
+#endif
 	if (amdgpu_dm_initialize_drm_device(adev)) {
 		DRM_ERROR(
 		"amdgpu: failed to initialize sw for display support.\n");
@@ -764,6 +1011,25 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
 
 	amdgpu_dm_destroy_drm_device(&adev->dm);
 
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	if (adev->dm.hdcp_workqueue) {
+		hdcp_destroy(adev->dm.hdcp_workqueue);
+		adev->dm.hdcp_workqueue = NULL;
+	}
+
+	if (adev->dm.dc)
+		dc_deinit_callbacks(adev->dm.dc);
+#endif
+	if (adev->dm.dc->ctx->dmub_srv) {
+		dc_dmub_srv_destroy(&adev->dm.dc->ctx->dmub_srv);
+		adev->dm.dc->ctx->dmub_srv = NULL;
+	}
+
+	if (adev->dm.dmub_bo)
+		amdgpu_bo_free_kernel(&adev->dm.dmub_bo,
+				      &adev->dm.dmub_bo_gpu_addr,
+				      &adev->dm.dmub_bo_cpu_addr);
+
 	/* DC Destroy TODO: Replace destroy DAL */
 	if (adev->dm.dc)
 		dc_destroy(&adev->dm.dc);
@@ -874,9 +1140,160 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
 	return 0;
 }
 
+static uint32_t amdgpu_dm_dmub_reg_read(void *ctx, uint32_t address)
+{
+	struct amdgpu_device *adev = ctx;
+
+	return dm_read_reg(adev->dm.dc->ctx, address);
+}
+
+static void amdgpu_dm_dmub_reg_write(void *ctx, uint32_t address,
+				     uint32_t value)
+{
+	struct amdgpu_device *adev = ctx;
+
+	return dm_write_reg(adev->dm.dc->ctx, address, value);
+}
+
+static int dm_dmub_sw_init(struct amdgpu_device *adev)
+{
+	struct dmub_srv_create_params create_params;
+	struct dmub_srv_region_params region_params;
+	struct dmub_srv_region_info region_info;
+	struct dmub_srv_fb_params fb_params;
+	struct dmub_srv_fb_info *fb_info;
+	struct dmub_srv *dmub_srv;
+	const struct dmcub_firmware_header_v1_0 *hdr;
+	const char *fw_name_dmub;
+	enum dmub_asic dmub_asic;
+	enum dmub_status status;
+	int r;
+
+	switch (adev->asic_type) {
+	case CHIP_RENOIR:
+		dmub_asic = DMUB_ASIC_DCN21;
+		fw_name_dmub = FIRMWARE_RENOIR_DMUB;
+		break;
+
+	default:
+		/* ASIC doesn't support DMUB. */
+		return 0;
+	}
+
+	r = request_firmware_direct(&adev->dm.dmub_fw, fw_name_dmub, adev->dev);
+	if (r) {
+		DRM_ERROR("DMUB firmware loading failed: %d\n", r);
+		return 0;
+	}
+
+	r = amdgpu_ucode_validate(adev->dm.dmub_fw);
+	if (r) {
+		DRM_ERROR("Couldn't validate DMUB firmware: %d\n", r);
+		return 0;
+	}
+
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+		DRM_WARN("Only PSP firmware loading is supported for DMUB\n");
+		return 0;
+	}
+
+	hdr = (const struct dmcub_firmware_header_v1_0 *)adev->dm.dmub_fw->data;
+	adev->firmware.ucode[AMDGPU_UCODE_ID_DMCUB].ucode_id =
+		AMDGPU_UCODE_ID_DMCUB;
+	adev->firmware.ucode[AMDGPU_UCODE_ID_DMCUB].fw = adev->dm.dmub_fw;
+	adev->firmware.fw_size +=
+		ALIGN(le32_to_cpu(hdr->inst_const_bytes), PAGE_SIZE);
+
+	adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version);
+
+	DRM_INFO("Loading DMUB firmware via PSP: version=0x%08X\n",
+		 adev->dm.dmcub_fw_version);
+
+	adev->dm.dmub_srv = kzalloc(sizeof(*adev->dm.dmub_srv), GFP_KERNEL);
+	dmub_srv = adev->dm.dmub_srv;
+
+	if (!dmub_srv) {
+		DRM_ERROR("Failed to allocate DMUB service!\n");
+		return -ENOMEM;
+	}
+
+	memset(&create_params, 0, sizeof(create_params));
+	create_params.user_ctx = adev;
+	create_params.funcs.reg_read = amdgpu_dm_dmub_reg_read;
+	create_params.funcs.reg_write = amdgpu_dm_dmub_reg_write;
+	create_params.asic = dmub_asic;
+
+	/* Create the DMUB service. */
+	status = dmub_srv_create(dmub_srv, &create_params);
+	if (status != DMUB_STATUS_OK) {
+		DRM_ERROR("Error creating DMUB service: %d\n", status);
+		return -EINVAL;
+	}
+
+	/* Calculate the size of all the regions for the DMUB service. */
+	memset(&region_params, 0, sizeof(region_params));
+
+	region_params.inst_const_size = le32_to_cpu(hdr->inst_const_bytes) -
+					PSP_HEADER_BYTES - PSP_FOOTER_BYTES;
+	region_params.bss_data_size = le32_to_cpu(hdr->bss_data_bytes);
+	region_params.vbios_size = adev->bios_size;
+	region_params.fw_bss_data =
+		adev->dm.dmub_fw->data +
+		le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
+		le32_to_cpu(hdr->inst_const_bytes);
+
+	status = dmub_srv_calc_region_info(dmub_srv, &region_params,
+					   &region_info);
+
+	if (status != DMUB_STATUS_OK) {
+		DRM_ERROR("Error calculating DMUB region info: %d\n", status);
+		return -EINVAL;
+	}
+
+	/*
+	 * Allocate a framebuffer based on the total size of all the regions.
+	 * TODO: Move this into GART.
+	 */
+	r = amdgpu_bo_create_kernel(adev, region_info.fb_size, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM, &adev->dm.dmub_bo,
+				    &adev->dm.dmub_bo_gpu_addr,
+				    &adev->dm.dmub_bo_cpu_addr);
+	if (r)
+		return r;
+
+	/* Rebase the regions on the framebuffer address. */
+	memset(&fb_params, 0, sizeof(fb_params));
+	fb_params.cpu_addr = adev->dm.dmub_bo_cpu_addr;
+	fb_params.gpu_addr = adev->dm.dmub_bo_gpu_addr;
+	fb_params.region_info = &region_info;
+
+	adev->dm.dmub_fb_info =
+		kzalloc(sizeof(*adev->dm.dmub_fb_info), GFP_KERNEL);
+	fb_info = adev->dm.dmub_fb_info;
+
+	if (!fb_info) {
+		DRM_ERROR(
+			"Failed to allocate framebuffer info for DMUB service!\n");
+		return -ENOMEM;
+	}
+
+	status = dmub_srv_calc_fb_info(dmub_srv, &fb_params, fb_info);
+	if (status != DMUB_STATUS_OK) {
+		DRM_ERROR("Error calculating DMUB FB info: %d\n", status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int dm_sw_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int r;
+
+	r = dm_dmub_sw_init(adev);
+	if (r)
+		return r;
 
 	return load_dmcu_fw(adev);
 }
@@ -885,6 +1302,19 @@ static int dm_sw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	kfree(adev->dm.dmub_fb_info);
+	adev->dm.dmub_fb_info = NULL;
+
+	if (adev->dm.dmub_srv) {
+		dmub_srv_destroy(adev->dm.dmub_srv);
+		adev->dm.dmub_srv = NULL;
+	}
+
+	if (adev->dm.dmub_fw) {
+		release_firmware(adev->dm.dmub_fw);
+		adev->dm.dmub_fw = NULL;
+	}
+
 	if(adev->dm.fw_dmcu) {
 		release_firmware(adev->dm.fw_dmcu);
 		adev->dm.fw_dmcu = NULL;
@@ -897,27 +1327,29 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev)
 {
 	struct amdgpu_dm_connector *aconnector;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	int ret = 0;
 
-	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
-
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (aconnector->dc_link->type == dc_connection_mst_branch &&
 		    aconnector->mst_mgr.aux) {
 			DRM_DEBUG_DRIVER("DM_MST: starting TM on aconnector: %p [id: %d]\n",
-					aconnector, aconnector->base.base.id);
+					 aconnector,
+					 aconnector->base.base.id);
 
 			ret = drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true);
 			if (ret < 0) {
 				DRM_ERROR("DM_MST: Failed to start MST\n");
-				((struct dc_link *)aconnector->dc_link)->type = dc_connection_single;
-				return ret;
-				}
+				aconnector->dc_link->type =
+					dc_connection_single;
+				break;
 			}
+		}
 	}
+	drm_connector_list_iter_end(&iter);
 
-	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 	return ret;
 }
 
@@ -940,6 +1372,11 @@ static int dm_late_init(void *handle)
 	params.backlight_lut_array_size = 16;
 	params.backlight_lut_array = linear_lut;
 
+	/* Min backlight level after ABM reduction,  Don't allow below 1%
+	 * 0xFFFF x 0.01 = 0x28F
+	 */
+	params.min_abm_backlight = 0x28F;
+
 	/* todo will enable for navi10 */
 	if (adev->asic_type <= CHIP_RAVEN) {
 		ret = dmcu_load_iram(dmcu, params);
@@ -955,14 +1392,13 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
 {
 	struct amdgpu_dm_connector *aconnector;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct drm_dp_mst_topology_mgr *mgr;
 	int ret;
 	bool need_hotplug = false;
 
-	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
-
-	list_for_each_entry(connector, &dev->mode_config.connector_list,
-			    head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (aconnector->dc_link->type != dc_connection_mst_branch ||
 		    aconnector->mst_port)
@@ -973,15 +1409,14 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
 		if (suspend) {
 			drm_dp_mst_topology_mgr_suspend(mgr);
 		} else {
-			ret = drm_dp_mst_topology_mgr_resume(mgr);
+			ret = drm_dp_mst_topology_mgr_resume(mgr, true);
 			if (ret < 0) {
 				drm_dp_mst_topology_mgr_set_mst(mgr, false);
 				need_hotplug = true;
 			}
 		}
 	}
-
-	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+	drm_connector_list_iter_end(&iter);
 
 	if (need_hotplug)
 		drm_kms_helper_hotplug_event(dev);
@@ -989,7 +1424,7 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
 
 /**
  * dm_hw_init() - Initialize DC device
- * @handle: The base driver device containing the amdpgu_dm device.
+ * @handle: The base driver device containing the amdgpu_dm device.
  *
  * Initialize the &struct amdgpu_display_manager device. This involves calling
  * the initializers of each DM component, then populating the struct with them.
@@ -1019,7 +1454,7 @@ static int dm_hw_init(void *handle)
 
 /**
  * dm_hw_fini() - Teardown DC device
- * @handle: The base driver device containing the amdpgu_dm device.
+ * @handle: The base driver device containing the amdgpu_dm device.
  *
  * Teardown components within &struct amdgpu_display_manager that require
  * cleanup. This involves cleaning up the DRM device, DC, and any modules that
@@ -1163,6 +1598,7 @@ static int dm_resume(void *handle)
 	struct amdgpu_display_manager *dm = &adev->dm;
 	struct amdgpu_dm_connector *aconnector;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *new_crtc_state;
 	struct dm_crtc_state *dm_new_crtc_state;
@@ -1171,7 +1607,7 @@ static int dm_resume(void *handle)
 	struct dm_plane_state *dm_new_plane_state;
 	struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state);
 	enum dc_connection_type new_connection_type = dc_connection_none;
-	int i;
+	int i, r;
 
 	/* Recreate dc_state - DC invalidates it when setting power state to S3. */
 	dc_release_state(dm_state->context);
@@ -1179,23 +1615,29 @@ static int dm_resume(void *handle)
 	/* TODO: Remove dc_state->dccg, use dc->dccg directly. */
 	dc_resource_state_construct(dm->dc, dm_state->context);
 
+	/* Before powering on DC we need to re-initialize DMUB. */
+	r = dm_dmub_hw_init(adev);
+	if (r)
+		DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+
 	/* power on hardware */
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
 
 	/* program HPD filter */
 	dc_resume(dm->dc);
 
-	/* On resume we need to  rewrite the MSTM control bits to enamble MST*/
-	s3_handle_mst(ddev, false);
-
 	/*
 	 * early enable HPD Rx IRQ, should be done before set mode as short
 	 * pulse interrupts are used for MST
 	 */
 	amdgpu_dm_irq_resume_early(adev);
 
+	/* On resume we need to rewrite the MSTM control bits to enable MST*/
+	s3_handle_mst(ddev, false);
+
 	/* Do detection*/
-	list_for_each_entry(connector, &ddev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(ddev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		aconnector = to_amdgpu_dm_connector(connector);
 
 		/*
@@ -1223,6 +1665,7 @@ static int dm_resume(void *handle)
 		amdgpu_dm_update_connector_after_detect(aconnector);
 		mutex_unlock(&aconnector->hpd_lock);
 	}
+	drm_connector_list_iter_end(&iter);
 
 	/* Force mode set in atomic commit */
 	for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i)
@@ -1438,6 +1881,11 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector)
 		dc_sink_release(aconnector->dc_sink);
 		aconnector->dc_sink = NULL;
 		aconnector->edid = NULL;
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+		/* Set CP to DESIRED if it was ENABLED, so we can re-enable it again on hotplug */
+		if (connector->state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED)
+			connector->state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
+#endif
 	}
 
 	mutex_unlock(&dev->mode_config.mutex);
@@ -1452,6 +1900,9 @@ static void handle_hpd_irq(void *param)
 	struct drm_connector *connector = &aconnector->base;
 	struct drm_device *dev = connector->dev;
 	enum dc_connection_type new_connection_type = dc_connection_none;
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	struct amdgpu_device *adev = dev->dev_private;
+#endif
 
 	/*
 	 * In case of failure or MST no need to update connector status or notify the OS
@@ -1459,6 +1910,10 @@ static void handle_hpd_irq(void *param)
 	 */
 	mutex_lock(&aconnector->hpd_lock);
 
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	if (adev->asic_type >= CHIP_RAVEN)
+		hdcp_reset_display(adev->dm.hdcp_workqueue, aconnector->dc_link->link_index);
+#endif
 	if (aconnector->fake_enable)
 		aconnector->fake_enable = false;
 
@@ -1577,6 +2032,12 @@ static void handle_hpd_rx_irq(void *param)
 	struct dc_link *dc_link = aconnector->dc_link;
 	bool is_mst_root_connector = aconnector->mst_mgr.mst_state;
 	enum dc_connection_type new_connection_type = dc_connection_none;
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	union hpd_irq_data hpd_irq_data;
+	struct amdgpu_device *adev = dev->dev_private;
+
+	memset(&hpd_irq_data, 0, sizeof(hpd_irq_data));
+#endif
 
 	/*
 	 * TODO:Temporary add mutex to protect hpd interrupt not have a gpio
@@ -1586,7 +2047,12 @@ static void handle_hpd_rx_irq(void *param)
 	if (dc_link->type != dc_connection_mst_branch)
 		mutex_lock(&aconnector->hpd_lock);
 
+
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	if (dc_link_handle_hpd_rx_irq(dc_link, &hpd_irq_data, NULL) &&
+#else
 	if (dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL) &&
+#endif
 			!is_mst_root_connector) {
 		/* Downstream Port status changed. */
 		if (!dc_link_detect_sink(dc_link, &new_connection_type))
@@ -1621,6 +2087,10 @@ static void handle_hpd_rx_irq(void *param)
 			drm_kms_helper_hotplug_event(dev);
 		}
 	}
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	if (hpd_irq_data.bytes.device_service_irq.bits.CP_IRQ)
+		hdcp_handle_cpirq(adev->dm.hdcp_workqueue,  aconnector->base.index);
+#endif
 	if ((dc_link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) ||
 	    (dc_link->type == dc_connection_mst_branch))
 		dm_handle_hpd_rx_irq(aconnector);
@@ -1775,7 +2245,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
 	return 0;
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 /* Register IRQ sources and initialize IRQ callbacks */
 static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 {
@@ -1821,35 +2291,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 		c_irq_params->irq_src = int_params.irq_source;
 
 		amdgpu_dm_irq_register_interrupt(adev, &int_params,
-				dm_crtc_high_irq, c_irq_params);
-	}
-
-	/* Use VUPDATE_NO_LOCK interrupt on DCN, which seems to correspond to
-	 * the regular VUPDATE interrupt on DCE. We want DC_IRQ_SOURCE_VUPDATEx
-	 * to trigger at end of each vblank, regardless of state of the lock,
-	 * matching DCE behaviour.
-	 */
-	for (i = DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT;
-	     i <= DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT + adev->mode_info.num_crtc - 1;
-	     i++) {
-		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq);
-
-		if (r) {
-			DRM_ERROR("Failed to add vupdate irq id!\n");
-			return r;
-		}
-
-		int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
-		int_params.irq_source =
-			dc_interrupt_to_irq_source(dc, i, 0);
-
-		c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1];
-
-		c_irq_params->adev = adev;
-		c_irq_params->irq_src = int_params.irq_source;
-
-		amdgpu_dm_irq_register_interrupt(adev, &int_params,
-				dm_vupdate_high_irq, c_irq_params);
+				dm_dcn_crtc_high_irq, c_irq_params);
 	}
 
 	/* Use GRPH_PFLIP interrupt */
@@ -2334,6 +2776,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 		} else if (dc_link_detect(link, DETECT_REASON_BOOT)) {
 			amdgpu_dm_update_connector_after_detect(aconnector);
 			register_backlight_device(dm, link);
+			if (amdgpu_dc_feature_mask & DC_PSR_MASK)
+				amdgpu_dm_set_psr_caps(link);
 		}
 
 
@@ -2362,16 +2806,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 			goto fail;
 		}
 		break;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	case CHIP_RAVEN:
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	case CHIP_NAVI12:
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	case CHIP_RENOIR:
-#endif
 		if (dcn10_register_irq_handlers(dm->adev)) {
 			DRM_ERROR("DM: Failed to initialize IRQ\n");
 			goto fail;
@@ -2517,14 +2957,13 @@ static int dm_early_init(void *handle)
 		adev->mode_info.num_hpd = 6;
 		adev->mode_info.num_dig = 6;
 		break;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	case CHIP_RAVEN:
 		adev->mode_info.num_crtc = 4;
 		adev->mode_info.num_hpd = 4;
 		adev->mode_info.num_dig = 4;
 		break;
 #endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	case CHIP_NAVI10:
 	case CHIP_NAVI12:
 		adev->mode_info.num_crtc = 6;
@@ -2536,14 +2975,11 @@ static int dm_early_init(void *handle)
 		adev->mode_info.num_hpd = 5;
 		adev->mode_info.num_dig = 5;
 		break;
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	case CHIP_RENOIR:
 		adev->mode_info.num_crtc = 4;
 		adev->mode_info.num_hpd = 4;
 		adev->mode_info.num_dig = 4;
 		break;
-#endif
 	default:
 		DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
 		return -EINVAL;
@@ -2836,14 +3272,10 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
 	if (adev->asic_type == CHIP_VEGA10 ||
 	    adev->asic_type == CHIP_VEGA12 ||
 	    adev->asic_type == CHIP_VEGA20 ||
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	    adev->asic_type == CHIP_NAVI10 ||
 	    adev->asic_type == CHIP_NAVI14 ||
 	    adev->asic_type == CHIP_NAVI12 ||
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	    adev->asic_type == CHIP_RENOIR ||
-#endif
 	    adev->asic_type == CHIP_RAVEN) {
 		/* Fill GFX9 params */
 		tiling_info->gfx9.num_pipes =
@@ -3161,12 +3593,26 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode,
 
 static enum dc_color_depth
 convert_color_depth_from_display_info(const struct drm_connector *connector,
-				      const struct drm_connector_state *state)
+				      const struct drm_connector_state *state,
+				      bool is_y420)
 {
-	uint8_t bpc = (uint8_t)connector->display_info.bpc;
+	uint8_t bpc;
+
+	if (is_y420) {
+		bpc = 8;
 
-	/* Assume 8 bpc by default if no bpc is specified. */
-	bpc = bpc ? bpc : 8;
+		/* Cap display bpc based on HDMI 2.0 HF-VSDB */
+		if (connector->display_info.hdmi.y420_dc_modes & DRM_EDID_YCBCR420_DC_48)
+			bpc = 16;
+		else if (connector->display_info.hdmi.y420_dc_modes & DRM_EDID_YCBCR420_DC_36)
+			bpc = 12;
+		else if (connector->display_info.hdmi.y420_dc_modes & DRM_EDID_YCBCR420_DC_30)
+			bpc = 10;
+	} else {
+		bpc = (uint8_t)connector->display_info.bpc;
+		/* Assume 8 bpc by default if no bpc is specified. */
+		bpc = bpc ? bpc : 8;
+	}
 
 	if (!state)
 		state = connector->state;
@@ -3261,27 +3707,21 @@ get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing)
 	return color_space;
 }
 
-static void reduce_mode_colour_depth(struct dc_crtc_timing *timing_out)
-{
-	if (timing_out->display_color_depth <= COLOR_DEPTH_888)
-		return;
-
-	timing_out->display_color_depth--;
-}
-
-static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_out,
-						const struct drm_display_info *info)
+static bool adjust_colour_depth_from_display_info(
+	struct dc_crtc_timing *timing_out,
+	const struct drm_display_info *info)
 {
+	enum dc_color_depth depth = timing_out->display_color_depth;
 	int normalized_clk;
-	if (timing_out->display_color_depth <= COLOR_DEPTH_888)
-		return;
 	do {
 		normalized_clk = timing_out->pix_clk_100hz / 10;
 		/* YCbCr 4:2:0 requires additional adjustment of 1/2 */
 		if (timing_out->pixel_encoding == PIXEL_ENCODING_YCBCR420)
 			normalized_clk /= 2;
 		/* Adjusting pix clock following on HDMI spec based on colour depth */
-		switch (timing_out->display_color_depth) {
+		switch (depth) {
+		case COLOR_DEPTH_888:
+			break;
 		case COLOR_DEPTH_101010:
 			normalized_clk = (normalized_clk * 30) / 24;
 			break;
@@ -3292,14 +3732,15 @@ static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_
 			normalized_clk = (normalized_clk * 48) / 24;
 			break;
 		default:
-			return;
+			/* The above depths are the only ones valid for HDMI. */
+			return false;
 		}
-		if (normalized_clk <= info->max_tmds_clock)
-			return;
-		reduce_mode_colour_depth(timing_out);
-
-	} while (timing_out->display_color_depth > COLOR_DEPTH_888);
-
+		if (normalized_clk <= info->max_tmds_clock) {
+			timing_out->display_color_depth = depth;
+			return true;
+		}
+	} while (--depth > COLOR_DEPTH_666);
+	return false;
 }
 
 static void fill_stream_properties_from_drm_display_mode(
@@ -3311,8 +3752,12 @@ static void fill_stream_properties_from_drm_display_mode(
 {
 	struct dc_crtc_timing *timing_out = &stream->timing;
 	const struct drm_display_info *info = &connector->display_info;
+	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct hdmi_vendor_infoframe hv_frame;
+	struct hdmi_avi_infoframe avi_frame;
 
-	memset(timing_out, 0, sizeof(struct dc_crtc_timing));
+	memset(&hv_frame, 0, sizeof(hv_frame));
+	memset(&avi_frame, 0, sizeof(avi_frame));
 
 	timing_out->h_border_left = 0;
 	timing_out->h_border_right = 0;
@@ -3322,6 +3767,9 @@ static void fill_stream_properties_from_drm_display_mode(
 	if (drm_mode_is_420_only(info, mode_in)
 			&& stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
 		timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
+	else if (drm_mode_is_420_also(info, mode_in)
+			&& aconnector->force_yuv420_output)
+		timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
 	else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCRCB444)
 			&& stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
 		timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR444;
@@ -3330,7 +3778,8 @@ static void fill_stream_properties_from_drm_display_mode(
 
 	timing_out->timing_3d_format = TIMING_3D_FORMAT_NONE;
 	timing_out->display_color_depth = convert_color_depth_from_display_info(
-		connector, connector_state);
+		connector, connector_state,
+		(timing_out->pixel_encoding == PIXEL_ENCODING_YCBCR420));
 	timing_out->scan_type = SCANNING_TYPE_NODATA;
 	timing_out->hdmi_vic = 0;
 
@@ -3346,6 +3795,13 @@ static void fill_stream_properties_from_drm_display_mode(
 			timing_out->flags.VSYNC_POSITIVE_POLARITY = 1;
 	}
 
+	if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
+		drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, (struct drm_connector *)connector, mode_in);
+		timing_out->vic = avi_frame.video_code;
+		drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, (struct drm_connector *)connector, mode_in);
+		timing_out->hdmi_vic = hv_frame.vic;
+	}
+
 	timing_out->h_addressable = mode_in->crtc_hdisplay;
 	timing_out->h_total = mode_in->crtc_htotal;
 	timing_out->h_sync_width =
@@ -3365,8 +3821,14 @@ static void fill_stream_properties_from_drm_display_mode(
 
 	stream->out_transfer_func->type = TF_TYPE_PREDEFINED;
 	stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
-	if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
-		adjust_colour_depth_from_display_info(timing_out, info);
+	if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
+		if (!adjust_colour_depth_from_display_info(timing_out, info) &&
+		    drm_mode_is_420_also(info, mode_in) &&
+		    timing_out->pixel_encoding != PIXEL_ENCODING_YCBCR420) {
+			timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
+			adjust_colour_depth_from_display_info(timing_out, info);
+		}
+	}
 }
 
 static void fill_audio_info(struct audio_info *audio_info,
@@ -3535,10 +3997,10 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false;
 	int mode_refresh;
 	int preferred_refresh = 0;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	struct dsc_dec_dpcd_caps dsc_caps;
-	uint32_t link_bandwidth_kbps;
 #endif
+	uint32_t link_bandwidth_kbps;
 
 	struct dc_sink *sink = NULL;
 	if (aconnector == NULL) {
@@ -3566,6 +4028,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
 	stream->dm_stream_context = aconnector;
 
+	stream->timing.flags.LTE_340MCSC_SCRAMBLE =
+		drm_connector->display_info.hdmi.scdc.scrambling.low_rates;
+
 	list_for_each_entry(preferred_mode, &aconnector->base.modes, head) {
 		/* Search for preferred mode */
 		if (preferred_mode->type & DRM_MODE_TYPE_PREFERRED) {
@@ -3610,25 +4075,29 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		fill_stream_properties_from_drm_display_mode(stream,
 			&mode, &aconnector->base, con_state, old_stream);
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	stream->timing.flags.DSC = 0;
 
 	if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) {
-		dc_dsc_parse_dsc_dpcd(aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.raw,
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+		dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc,
+				      aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.raw,
 				      aconnector->dc_link->dpcd_caps.dsc_caps.dsc_ext_caps.raw,
 				      &dsc_caps);
+#endif
 		link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
 							     dc_link_get_link_cap(aconnector->dc_link));
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 		if (dsc_caps.is_dsc_supported)
-			if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc,
+			if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0],
 						  &dsc_caps,
+						  aconnector->dc_link->ctx->dc->debug.dsc_min_slice_height_override,
 						  link_bandwidth_kbps,
 						  &stream->timing,
 						  &stream->timing.dsc_cfg))
 				stream->timing.flags.DSC = 1;
-	}
 #endif
+	}
 
 	update_stream_scaling_settings(&mode, dm_state, stream);
 
@@ -3639,6 +4108,20 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
 	update_stream_signal(stream, sink);
 
+	if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
+		mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket, false, false);
+	if (stream->link->psr_feature_enabled)	{
+		struct dc  *core_dc = stream->link->ctx->dc;
+
+		if (dc_is_dmcu_initialized(core_dc)) {
+			struct dmcu *dmcu = core_dc->res_pool->dmcu;
+
+			stream->psr_version = dmcu->dmcu_version.psr_version;
+			mod_build_vsc_infopacket(stream,
+					&stream->vsc_infopacket,
+					&stream->use_vsc_sdp_for_colorimetry);
+		}
+	}
 finish:
 	dc_sink_release(sink);
 
@@ -3727,6 +4210,10 @@ static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable)
 	struct amdgpu_device *adev = crtc->dev->dev_private;
 	int rc;
 
+	/* Do not set vupdate for DCN hardware */
+	if (adev->family > AMDGPU_FAMILY_AI)
+		return 0;
+
 	irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst;
 
 	rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
@@ -3970,7 +4457,8 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)
 		state->underscan_hborder = 0;
 		state->underscan_vborder = 0;
 		state->base.max_requested_bpc = 8;
-
+		state->vcpi_slots = 0;
+		state->pbn = 0;
 		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
 			state->abm_level = amdgpu_dm_abm_level;
 
@@ -3998,7 +4486,8 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector)
 	new_state->underscan_enable = state->underscan_enable;
 	new_state->underscan_hborder = state->underscan_hborder;
 	new_state->underscan_vborder = state->underscan_vborder;
-
+	new_state->vcpi_slots = state->vcpi_slots;
+	new_state->pbn = state->pbn;
 	return &new_state->base;
 }
 
@@ -4114,8 +4603,8 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec
 		result = MODE_OK;
 	else
 		DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d\n",
-			      mode->vdisplay,
 			      mode->hdisplay,
+			      mode->vdisplay,
 			      mode->clock,
 			      dc_result);
 
@@ -4395,10 +4884,69 @@ static void dm_encoder_helper_disable(struct drm_encoder *encoder)
 
 }
 
+static int convert_dc_color_depth_into_bpc (enum dc_color_depth display_color_depth)
+{
+	switch (display_color_depth) {
+		case COLOR_DEPTH_666:
+			return 6;
+		case COLOR_DEPTH_888:
+			return 8;
+		case COLOR_DEPTH_101010:
+			return 10;
+		case COLOR_DEPTH_121212:
+			return 12;
+		case COLOR_DEPTH_141414:
+			return 14;
+		case COLOR_DEPTH_161616:
+			return 16;
+		default:
+			break;
+		}
+	return 0;
+}
+
 static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
 					  struct drm_crtc_state *crtc_state,
 					  struct drm_connector_state *conn_state)
 {
+	struct drm_atomic_state *state = crtc_state->state;
+	struct drm_connector *connector = conn_state->connector;
+	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct dm_connector_state *dm_new_connector_state = to_dm_connector_state(conn_state);
+	const struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode;
+	struct drm_dp_mst_topology_mgr *mst_mgr;
+	struct drm_dp_mst_port *mst_port;
+	enum dc_color_depth color_depth;
+	int clock, bpp = 0;
+	bool is_y420 = false;
+
+	if (!aconnector->port || !aconnector->dc_sink)
+		return 0;
+
+	mst_port = aconnector->port;
+	mst_mgr = &aconnector->mst_port->mst_mgr;
+
+	if (!crtc_state->connectors_changed && !crtc_state->mode_changed)
+		return 0;
+
+	if (!state->duplicated) {
+		is_y420 = drm_mode_is_420_also(&connector->display_info, adjusted_mode) &&
+				aconnector->force_yuv420_output;
+		color_depth = convert_color_depth_from_display_info(connector, conn_state,
+								    is_y420);
+		bpp = convert_dc_color_depth_into_bpc(color_depth) * 3;
+		clock = adjusted_mode->clock;
+		dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp, false);
+	}
+	dm_new_connector_state->vcpi_slots = drm_dp_atomic_find_vcpi_slots(state,
+									   mst_mgr,
+									   mst_port,
+									   dm_new_connector_state->pbn,
+									   0);
+	if (dm_new_connector_state->vcpi_slots < 0) {
+		DRM_DEBUG_ATOMIC("failed finding vcpi slots: %d\n", (int)dm_new_connector_state->vcpi_slots);
+		return dm_new_connector_state->vcpi_slots;
+	}
 	return 0;
 }
 
@@ -4407,6 +4955,71 @@ const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs = {
 	.atomic_check = dm_encoder_helper_atomic_check
 };
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
+					    struct dc_state *dc_state)
+{
+	struct dc_stream_state *stream = NULL;
+	struct drm_connector *connector;
+	struct drm_connector_state *new_con_state, *old_con_state;
+	struct amdgpu_dm_connector *aconnector;
+	struct dm_connector_state *dm_conn_state;
+	int i, j, clock, bpp;
+	int vcpi, pbn_div, pbn = 0;
+
+	for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
+
+		aconnector = to_amdgpu_dm_connector(connector);
+
+		if (!aconnector->port)
+			continue;
+
+		if (!new_con_state || !new_con_state->crtc)
+			continue;
+
+		dm_conn_state = to_dm_connector_state(new_con_state);
+
+		for (j = 0; j < dc_state->stream_count; j++) {
+			stream = dc_state->streams[j];
+			if (!stream)
+				continue;
+
+			if ((struct amdgpu_dm_connector*)stream->dm_stream_context == aconnector)
+				break;
+
+			stream = NULL;
+		}
+
+		if (!stream)
+			continue;
+
+		if (stream->timing.flags.DSC != 1) {
+			drm_dp_mst_atomic_enable_dsc(state,
+						     aconnector->port,
+						     dm_conn_state->pbn,
+						     0,
+						     false);
+			continue;
+		}
+
+		pbn_div = dm_mst_get_pbn_divider(stream->link);
+		bpp = stream->timing.dsc_cfg.bits_per_pixel;
+		clock = stream->timing.pix_clk_100hz / 10;
+		pbn = drm_dp_calc_pbn_mode(clock, bpp, true);
+		vcpi = drm_dp_mst_atomic_enable_dsc(state,
+						    aconnector->port,
+						    pbn, pbn_div,
+						    true);
+		if (vcpi < 0)
+			return vcpi;
+
+		dm_conn_state->pbn = pbn;
+		dm_conn_state->vcpi_slots = vcpi;
+	}
+	return 0;
+}
+#endif
+
 static void dm_drm_plane_reset(struct drm_plane *plane)
 {
 	struct dm_plane_state *amdgpu_state = NULL;
@@ -4494,7 +5107,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 	tv.num_shared = 1;
 	list_add(&tv.head, &list);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL, true);
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
 	if (r) {
 		dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
 		return r;
@@ -4837,7 +5450,13 @@ static int to_drm_connector_type(enum signal_type st)
 
 static struct drm_encoder *amdgpu_dm_connector_to_encoder(struct drm_connector *connector)
 {
-	return drm_encoder_find(connector->dev, NULL, connector->encoder_ids[0]);
+	struct drm_encoder *encoder;
+
+	/* There is only one encoder per connector */
+	drm_connector_for_each_possible_encoder(connector, encoder)
+		return encoder;
+
+	return NULL;
 }
 
 static void amdgpu_dm_get_native_mode(struct drm_connector *connector)
@@ -5063,9 +5682,9 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
 
 	drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16);
 
-	/* This defaults to the max in the range, but we want 8bpc. */
-	aconnector->base.state->max_bpc = 8;
-	aconnector->base.state->max_requested_bpc = 8;
+	/* This defaults to the max in the range, but we want 8bpc for non-edp. */
+	aconnector->base.state->max_bpc = (connector_type == DRM_MODE_CONNECTOR_eDP) ? 16 : 8;
+	aconnector->base.state->max_requested_bpc = aconnector->base.state->max_bpc;
 
 	if (connector_type == DRM_MODE_CONNECTOR_eDP &&
 	    dc_is_dmcu_initialized(adev->dm.dc)) {
@@ -5082,6 +5701,10 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
 
 		drm_connector_attach_vrr_capable_property(
 			&aconnector->base);
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+		if (adev->asic_type >= CHIP_RAVEN)
+			drm_connector_attach_content_protection_property(&aconnector->base, true);
+#endif
 	}
 }
 
@@ -5189,11 +5812,12 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
 
 	connector_type = to_drm_connector_type(link->connector_signal);
 
-	res = drm_connector_init(
+	res = drm_connector_init_with_ddc(
 			dm->ddev,
 			&aconnector->base,
 			&amdgpu_dm_connector_funcs,
-			connector_type);
+			connector_type,
+			&i2c->base);
 
 	if (res) {
 		DRM_ERROR("connector_init failed\n");
@@ -5324,6 +5948,48 @@ is_scaling_state_different(const struct dm_connector_state *dm_state,
 	return false;
 }
 
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+static bool is_content_protection_different(struct drm_connector_state *state,
+					    const struct drm_connector_state *old_state,
+					    const struct drm_connector *connector, struct hdcp_workqueue *hdcp_w)
+{
+	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+
+	if (old_state->hdcp_content_type != state->hdcp_content_type &&
+	    state->content_protection != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) {
+		state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
+		return true;
+	}
+
+	/* CP is being re enabled, ignore this */
+	if (old_state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED &&
+	    state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) {
+		state->content_protection = DRM_MODE_CONTENT_PROTECTION_ENABLED;
+		return false;
+	}
+
+	/* S3 resume case, since old state will always be 0 (UNDESIRED) and the restored state will be ENABLED */
+	if (old_state->content_protection == DRM_MODE_CONTENT_PROTECTION_UNDESIRED &&
+	    state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED)
+		state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
+
+	/* Check if something is connected/enabled, otherwise we start hdcp but nothing is connected/enabled
+	 * hot-plug, headless s3, dpms
+	 */
+	if (state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED && connector->dpms == DRM_MODE_DPMS_ON &&
+	    aconnector->dc_sink != NULL)
+		return true;
+
+	if (old_state->content_protection == state->content_protection)
+		return false;
+
+	if (state->content_protection == DRM_MODE_CONTENT_PROTECTION_UNDESIRED)
+		return true;
+
+	return false;
+}
+
+#endif
 static void remove_stream(struct amdgpu_device *adev,
 			  struct amdgpu_crtc *acrtc,
 			  struct dc_stream_state *stream)
@@ -5665,6 +6331,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 	uint32_t target_vblank, last_flip_vblank;
 	bool vrr_active = amdgpu_dm_vrr_active(acrtc_state);
 	bool pflip_present = false;
+	bool swizzle = true;
 	struct {
 		struct dc_surface_update surface_updates[MAX_SURFACES];
 		struct dc_plane_info plane_infos[MAX_SURFACES];
@@ -5710,6 +6377,9 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 
 		dc_plane = dm_new_plane_state->dc_state;
 
+		if (dc_plane && !dc_plane->tiling_info.gfx9.swizzle)
+			swizzle = false;
+
 		bundle->surface_updates[planes_count].surface = dc_plane;
 		if (new_pcrtc_state->color_mgmt_changed) {
 			bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction;
@@ -5864,6 +6534,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 	/* Update the planes if changed or disable if we don't have any. */
 	if ((planes_count || acrtc_state->active_planes == 0) &&
 		acrtc_state->stream) {
+		bundle->stream_update.stream = acrtc_state->stream;
 		if (new_pcrtc_state->mode_changed) {
 			bundle->stream_update.src = acrtc_state->stream->src;
 			bundle->stream_update.dst = acrtc_state->stream->dst;
@@ -5899,14 +6570,29 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 				&acrtc_state->vrr_params.adjust);
 			spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags);
 		}
-
 		mutex_lock(&dm->dc_lock);
+		if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
+				acrtc_state->stream->link->psr_allow_active)
+			amdgpu_dm_psr_disable(acrtc_state->stream);
+
 		dc_commit_updates_for_stream(dm->dc,
 						     bundle->surface_updates,
 						     planes_count,
 						     acrtc_state->stream,
 						     &bundle->stream_update,
 						     dc_state);
+
+		if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
+						acrtc_state->stream->psr_version &&
+						!acrtc_state->stream->link->psr_feature_enabled)
+			amdgpu_dm_link_setup_psr(acrtc_state->stream);
+		else if ((acrtc_state->update_type == UPDATE_TYPE_FAST) &&
+						acrtc_state->stream->link->psr_feature_enabled &&
+						!acrtc_state->stream->link->psr_allow_active &&
+						swizzle) {
+			amdgpu_dm_psr_enable(acrtc_state->stream);
+		}
+
 		mutex_unlock(&dm->dc_lock);
 	}
 
@@ -6215,10 +6901,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 			crtc->hwmode = new_crtc_state->mode;
 		} else if (modereset_required(new_crtc_state)) {
 			DRM_DEBUG_DRIVER("Atomic commit: RESET. crtc id %d:[%p]\n", acrtc->crtc_id, acrtc);
-
 			/* i.e. reset mode */
-			if (dm_old_crtc_state->stream)
+			if (dm_old_crtc_state->stream) {
+				if (dm_old_crtc_state->stream->link->psr_allow_active)
+					amdgpu_dm_psr_disable(dm_old_crtc_state->stream);
+
 				remove_stream(adev, acrtc, dm_old_crtc_state->stream);
+			}
 		}
 	} /* for_each_crtc_in_state() */
 
@@ -6248,6 +6937,34 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 				acrtc->otg_inst = status->primary_otg_inst;
 		}
 	}
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
+		struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
+		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
+		struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+
+		new_crtc_state = NULL;
+
+		if (acrtc)
+			new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
+
+		dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+		if (dm_new_crtc_state && dm_new_crtc_state->stream == NULL &&
+		    connector->state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED) {
+			hdcp_reset_display(adev->dm.hdcp_workqueue, aconnector->dc_link->link_index);
+			new_con_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
+			continue;
+		}
+
+		if (is_content_protection_different(new_con_state, old_con_state, connector, adev->dm.hdcp_workqueue))
+			hdcp_update_display(
+				adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector,
+				new_con_state->hdcp_content_type,
+				new_con_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED ? true
+													 : false);
+	}
+#endif
 
 	/* Handle connector state changes */
 	for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
@@ -6287,9 +7004,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 		if (!scaling_changed && !abm_changed && !hdr_changed)
 			continue;
 
+		stream_update.stream = dm_new_crtc_state->stream;
 		if (scaling_changed) {
 			update_stream_scaling_settings(&dm_new_con_state->base.crtc->mode,
-					dm_new_con_state, (struct dc_stream_state *)dm_new_crtc_state->stream);
+					dm_new_con_state, dm_new_crtc_state->stream);
 
 			stream_update.src = dm_new_crtc_state->stream->src;
 			stream_update.dst = dm_new_crtc_state->stream->dst;
@@ -7034,7 +7752,7 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm,
 	int i, j, num_plane, ret = 0;
 	struct drm_plane_state *old_plane_state, *new_plane_state;
 	struct dm_plane_state *new_dm_plane_state, *old_dm_plane_state;
-	struct drm_crtc *new_plane_crtc, *old_plane_crtc;
+	struct drm_crtc *new_plane_crtc;
 	struct drm_plane *plane;
 
 	struct drm_crtc *crtc;
@@ -7080,7 +7798,6 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm,
 			uint64_t tiling_flags;
 
 			new_plane_crtc = new_plane_state->crtc;
-			old_plane_crtc = old_plane_state->crtc;
 			new_dm_plane_state = to_dm_plane_state(new_plane_state);
 			old_dm_plane_state = to_dm_plane_state(old_plane_state);
 
@@ -7158,7 +7875,7 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm,
 
 		status = dc_stream_get_status_from_state(old_dm_state->context,
 							 new_dm_crtc_state->stream);
-
+		stream_update.stream = new_dm_crtc_state->stream;
 		/*
 		 * TODO: DC modifies the surface during this call so we need
 		 * to lock here - find a way to do this without locking.
@@ -7181,6 +7898,29 @@ cleanup:
 	return ret;
 }
 
+static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm_crtc *crtc)
+{
+	struct drm_connector *connector;
+	struct drm_connector_state *conn_state;
+	struct amdgpu_dm_connector *aconnector = NULL;
+	int i;
+	for_each_new_connector_in_state(state, connector, conn_state, i) {
+		if (conn_state->crtc != crtc)
+			continue;
+
+		aconnector = to_amdgpu_dm_connector(connector);
+		if (!aconnector->port || !aconnector->mst_port)
+			aconnector = NULL;
+		else
+			break;
+	}
+
+	if (!aconnector)
+		return 0;
+
+	return drm_dp_mst_add_affected_dsc_crtcs(state, &aconnector->mst_port->mst_mgr);
+}
+
 /**
  * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM.
  * @dev: The DRM device
@@ -7233,6 +7973,16 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 	if (ret)
 		goto fail;
 
+	if (adev->asic_type >= CHIP_NAVI10) {
+		for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+			if (drm_atomic_crtc_needs_modeset(new_crtc_state)) {
+				ret = add_affected_mst_dsc_crtcs(state, crtc);
+				if (ret)
+					goto fail;
+			}
+		}
+	}
+
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
 		if (!drm_atomic_crtc_needs_modeset(new_crtc_state) &&
 		    !new_crtc_state->color_mgmt_changed &&
@@ -7404,6 +8154,15 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 		if (ret)
 			goto fail;
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+		if (!compute_mst_dsc_configs_for_state(state, dm_state->context))
+			goto fail;
+
+		ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context);
+		if (ret)
+			goto fail;
+#endif
+
 		if (dc_validate_global_state(dc, dm_state->context, false) != DC_OK) {
 			ret = -EINVAL;
 			goto fail;
@@ -7432,6 +8191,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 				dc_retain_state(old_dm_state->context);
 		}
 	}
+	/* Perform validation of MST topology in the state*/
+	ret = drm_dp_mst_atomic_check(state);
+	if (ret)
+		goto fail;
 
 	/* Store the overall update type for use later in atomic check. */
 	for_each_new_crtc_in_state (state, crtc, new_crtc_state, i) {
@@ -7569,3 +8332,112 @@ update:
 						       freesync_capable);
 }
 
+static void amdgpu_dm_set_psr_caps(struct dc_link *link)
+{
+	uint8_t dpcd_data[EDP_PSR_RECEIVER_CAP_SIZE];
+
+	if (!(link->connector_signal & SIGNAL_TYPE_EDP))
+		return;
+	if (link->type == dc_connection_none)
+		return;
+	if (dm_helpers_dp_read_dpcd(NULL, link, DP_PSR_SUPPORT,
+					dpcd_data, sizeof(dpcd_data))) {
+		link->psr_feature_enabled = dpcd_data[0] ? true:false;
+		DRM_INFO("PSR support:%d\n", link->psr_feature_enabled);
+	}
+}
+
+/*
+ * amdgpu_dm_link_setup_psr() - configure psr link
+ * @stream: stream state
+ *
+ * Return: true if success
+ */
+static bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
+{
+	struct dc_link *link = NULL;
+	struct psr_config psr_config = {0};
+	struct psr_context psr_context = {0};
+	struct dc *dc = NULL;
+	bool ret = false;
+
+	if (stream == NULL)
+		return false;
+
+	link = stream->link;
+	dc = link->ctx->dc;
+
+	psr_config.psr_version = dc->res_pool->dmcu->dmcu_version.psr_version;
+
+	if (psr_config.psr_version > 0) {
+		psr_config.psr_exit_link_training_required = 0x1;
+		psr_config.psr_frame_capture_indication_req = 0;
+		psr_config.psr_rfb_setup_time = 0x37;
+		psr_config.psr_sdp_transmit_line_num_deadline = 0x20;
+		psr_config.allow_smu_optimizations = 0x0;
+
+		ret = dc_link_setup_psr(link, stream, &psr_config, &psr_context);
+
+	}
+	DRM_DEBUG_DRIVER("PSR link: %d\n",	link->psr_feature_enabled);
+
+	return ret;
+}
+
+/*
+ * amdgpu_dm_psr_enable() - enable psr f/w
+ * @stream: stream state
+ *
+ * Return: true if success
+ */
+bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
+{
+	struct dc_link *link = stream->link;
+	unsigned int vsync_rate_hz = 0;
+	struct dc_static_screen_params params = {0};
+	/* Calculate number of static frames before generating interrupt to
+	 * enter PSR.
+	 */
+	unsigned int frame_time_microsec = 1000000 / vsync_rate_hz;
+	// Init fail safe of 2 frames static
+	unsigned int num_frames_static = 2;
+
+	DRM_DEBUG_DRIVER("Enabling psr...\n");
+
+	vsync_rate_hz = div64_u64(div64_u64((
+			stream->timing.pix_clk_100hz * 100),
+			stream->timing.v_total),
+			stream->timing.h_total);
+
+	/* Round up
+	 * Calculate number of frames such that at least 30 ms of time has
+	 * passed.
+	 */
+	if (vsync_rate_hz != 0)
+		num_frames_static = (30000 / frame_time_microsec) + 1;
+
+	params.triggers.cursor_update = true;
+	params.triggers.overlay_update = true;
+	params.triggers.surface_update = true;
+	params.num_frames = num_frames_static;
+
+	dc_stream_set_static_screen_params(link->ctx->dc,
+					   &stream, 1,
+					   &params);
+
+	return dc_link_set_psr_allow_active(link, true, false);
+}
+
+/*
+ * amdgpu_dm_psr_disable() - disable psr f/w
+ * @stream:  stream state
+ *
+ * Return: true if success
+ */
+static bool amdgpu_dm_psr_disable(struct dc_stream_state *stream)
+{
+
+	DRM_DEBUG_DRIVER("Disabling psr...\n");
+
+	return dc_link_set_psr_allow_active(stream->link, false, true);
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index c8c525a2b505..7ea9acb0358d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -57,6 +57,8 @@ struct amdgpu_device;
 struct drm_device;
 struct amdgpu_dm_irq_handler_data;
 struct dc;
+struct amdgpu_bo;
+struct dmub_srv;
 
 struct common_irq_params {
 	struct amdgpu_device *adev;
@@ -108,6 +110,12 @@ struct amdgpu_dm_backlight_caps {
  * @display_indexes_num: Max number of display streams supported
  * @irq_handler_list_table_lock: Synchronizes access to IRQ tables
  * @backlight_dev: Backlight control device
+ * @backlight_link: Link on which to control backlight
+ * @backlight_caps: Capabilities of the backlight device
+ * @freesync_module: Module handling freesync calculations
+ * @fw_dmcu: Reference to DMCU firmware
+ * @dmcu_fw_version: Version of the DMCU firmware
+ * @soc_bounding_box: SOC bounding box values provided by gpu_info FW
  * @cached_state: Caches device atomic state for suspend/resume
  * @compressor: Frame buffer compression buffer. See &struct dm_comressor_info
  */
@@ -116,6 +124,57 @@ struct amdgpu_display_manager {
 	struct dc *dc;
 
 	/**
+	 * @dmub_srv:
+	 *
+	 * DMUB service, used for controlling the DMUB on hardware
+	 * that supports it. The pointer to the dmub_srv will be
+	 * NULL on hardware that does not support it.
+	 */
+	struct dmub_srv *dmub_srv;
+
+	/**
+	 * @dmub_fb_info:
+	 *
+	 * Framebuffer regions for the DMUB.
+	 */
+	struct dmub_srv_fb_info *dmub_fb_info;
+
+	/**
+	 * @dmub_fw:
+	 *
+	 * DMUB firmware, required on hardware that has DMUB support.
+	 */
+	const struct firmware *dmub_fw;
+
+	/**
+	 * @dmub_bo:
+	 *
+	 * Buffer object for the DMUB.
+	 */
+	struct amdgpu_bo *dmub_bo;
+
+	/**
+	 * @dmub_bo_gpu_addr:
+	 *
+	 * GPU virtual address for the DMUB buffer object.
+	 */
+	u64 dmub_bo_gpu_addr;
+
+	/**
+	 * @dmub_bo_cpu_addr:
+	 *
+	 * CPU address for the DMUB buffer object.
+	 */
+	void *dmub_bo_cpu_addr;
+
+	/**
+	 * @dmcub_fw_version:
+	 *
+	 * DMCUB firmware version.
+	 */
+	uint32_t dmcub_fw_version;
+
+	/**
 	 * @cgs_device:
 	 *
 	 * The Common Graphics Services device. It provides an interface for
@@ -128,7 +187,7 @@ struct amdgpu_display_manager {
 	u16 display_indexes_num;
 
 	/**
-	 * @atomic_obj
+	 * @atomic_obj:
 	 *
 	 * In combination with &dm_atomic_state it helps manage
 	 * global atomic state that doesn't map cleanly into existing
@@ -225,6 +284,9 @@ struct amdgpu_display_manager {
 	struct amdgpu_dm_backlight_caps backlight_caps;
 
 	struct mod_freesync *freesync_module;
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	struct hdcp_workqueue *hdcp_workqueue;
+#endif
 
 	struct drm_atomic_state *cached_state;
 
@@ -232,13 +294,13 @@ struct amdgpu_display_manager {
 
 	const struct firmware *fw_dmcu;
 	uint32_t dmcu_fw_version;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	/**
+	 * @soc_bounding_box:
+	 *
 	 * gpu_info FW provided soc bounding box struct or 0 if not
 	 * available in FW
 	 */
 	const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box;
-#endif
 };
 
 struct amdgpu_dm_connector {
@@ -268,6 +330,7 @@ struct amdgpu_dm_connector {
 	struct drm_dp_mst_port *port;
 	struct amdgpu_dm_connector *mst_port;
 	struct amdgpu_encoder *mst_encoder;
+	struct drm_dp_aux *dsc_aux;
 
 	/* TODO see if we can merge with ddc_bus or make a dm_connector */
 	struct amdgpu_i2c_adapter *i2c;
@@ -287,6 +350,7 @@ struct amdgpu_dm_connector {
 	uint32_t debugfs_dpcd_address;
 	uint32_t debugfs_dpcd_size;
 #endif
+	bool force_yuv420_output;
 };
 
 #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
@@ -347,6 +411,8 @@ struct dm_connector_state {
 	bool underscan_enable;
 	bool freesync_capable;
 	uint8_t abm_level;
+	int vcpi_slots;
+	uint64_t pbn;
 };
 
 #define to_dm_connector_state(x)\
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index b43bb7f90e4e..2233d293a707 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -210,6 +210,8 @@ static int __set_legacy_tf(struct dc_transfer_func *func,
 	res = mod_color_calculate_regamma_params(func, gamma, true, has_rom,
 						 NULL);
 
+	dc_gamma_release(&gamma);
+
 	return res ? 0 : -ENOMEM;
 }
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
index a549c7c717dd..eaad9099bc0b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
@@ -122,11 +122,16 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc,
 	}
 
 	/* Configure dithering */
-	if (!dm_need_crc_dither(source))
+	if (!dm_need_crc_dither(source)) {
 		dc_stream_set_dither_option(stream_state, DITHER_OPTION_TRUN8);
-	else
+		dc_stream_set_dyn_expansion(stream_state->ctx->dc, stream_state,
+					    DYN_EXPANSION_DISABLE);
+	} else {
 		dc_stream_set_dither_option(stream_state,
 					    DITHER_OPTION_DEFAULT);
+		dc_stream_set_dyn_expansion(stream_state->ctx->dc, stream_state,
+					    DYN_EXPANSION_AUTO);
+	}
 
 unlock:
 	mutex_unlock(&adev->dm.dc_lock);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index f3dfb2887ae0..f81d3439ee8c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -657,6 +657,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
 	dc_link_set_test_pattern(
 		link,
 		test_pattern,
+		DP_TEST_PATTERN_COLOR_SPACE_RGB,
 		&link_training_settings,
 		custom_pattern,
 		10);
@@ -942,6 +943,52 @@ static const struct {
 		{"aux_dpcd_data", &dp_dpcd_data_debugfs_fops}
 };
 
+/*
+ * Force YUV420 output if available from the given mode
+ */
+static int force_yuv420_output_set(void *data, u64 val)
+{
+	struct amdgpu_dm_connector *connector = data;
+
+	connector->force_yuv420_output = (bool)val;
+
+	return 0;
+}
+
+/*
+ * Check if YUV420 is forced when available from the given mode
+ */
+static int force_yuv420_output_get(void *data, u64 *val)
+{
+	struct amdgpu_dm_connector *connector = data;
+
+	*val = connector->force_yuv420_output;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(force_yuv420_output_fops, force_yuv420_output_get,
+			 force_yuv420_output_set, "%llu\n");
+
+/*
+ *  Read PSR state
+ */
+static int psr_get(void *data, u64 *val)
+{
+	struct amdgpu_dm_connector *connector = data;
+	struct dc_link *link = connector->dc_link;
+	uint32_t psr_state = 0;
+
+	dc_link_get_psr_state(link, &psr_state);
+
+	*val = psr_state;
+
+	return 0;
+}
+
+
+DEFINE_DEBUGFS_ATTRIBUTE(psr_fops, psr_get, NULL, "%llu\n");
+
 void connector_debugfs_init(struct amdgpu_dm_connector *connector)
 {
 	int i;
@@ -955,6 +1002,12 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
 					    dp_debugfs_entries[i].fops);
 		}
 	}
+	if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP)
+		debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops);
+
+	debugfs_create_file_unsafe("force_yuv420_output", 0644, dir, connector,
+				   &force_yuv420_output_fops);
+
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
new file mode 100644
index 000000000000..ae329335dfcc
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "amdgpu_dm_hdcp.h"
+#include "amdgpu.h"
+#include "amdgpu_dm.h"
+#include "dm_helpers.h"
+#include <drm/drm_hdcp.h>
+
+static bool
+lp_write_i2c(void *handle, uint32_t address, const uint8_t *data, uint32_t size)
+{
+
+	struct dc_link *link = handle;
+	struct i2c_payload i2c_payloads[] = {{true, address, size, (void *)data} };
+	struct i2c_command cmd = {i2c_payloads, 1, I2C_COMMAND_ENGINE_HW, link->dc->caps.i2c_speed_in_khz};
+
+	return dm_helpers_submit_i2c(link->ctx, link, &cmd);
+}
+
+static bool
+lp_read_i2c(void *handle, uint32_t address, uint8_t offset, uint8_t *data, uint32_t size)
+{
+	struct dc_link *link = handle;
+
+	struct i2c_payload i2c_payloads[] = {{true, address, 1, &offset}, {false, address, size, data} };
+	struct i2c_command cmd = {i2c_payloads, 2, I2C_COMMAND_ENGINE_HW, link->dc->caps.i2c_speed_in_khz};
+
+	return dm_helpers_submit_i2c(link->ctx, link, &cmd);
+}
+
+static bool
+lp_write_dpcd(void *handle, uint32_t address, const uint8_t *data, uint32_t size)
+{
+	struct dc_link *link = handle;
+
+	return dm_helpers_dp_write_dpcd(link->ctx, link, address, data, size);
+}
+
+static bool
+lp_read_dpcd(void *handle, uint32_t address, uint8_t *data, uint32_t size)
+{
+	struct dc_link *link = handle;
+
+	return dm_helpers_dp_read_dpcd(link->ctx, link, address, data, size);
+}
+
+static void process_output(struct hdcp_workqueue *hdcp_work)
+{
+	struct mod_hdcp_output output = hdcp_work->output;
+
+	if (output.callback_stop)
+		cancel_delayed_work(&hdcp_work->callback_dwork);
+
+	if (output.callback_needed)
+		schedule_delayed_work(&hdcp_work->callback_dwork,
+				      msecs_to_jiffies(output.callback_delay));
+
+	if (output.watchdog_timer_stop)
+		cancel_delayed_work(&hdcp_work->watchdog_timer_dwork);
+
+	if (output.watchdog_timer_needed)
+		schedule_delayed_work(&hdcp_work->watchdog_timer_dwork,
+				      msecs_to_jiffies(output.watchdog_timer_delay));
+
+	schedule_delayed_work(&hdcp_work->property_validate_dwork, msecs_to_jiffies(0));
+}
+
+void hdcp_update_display(struct hdcp_workqueue *hdcp_work,
+			 unsigned int link_index,
+			 struct amdgpu_dm_connector *aconnector,
+			 uint8_t content_type,
+			 bool enable_encryption)
+{
+	struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index];
+	struct mod_hdcp_display *display = &hdcp_work[link_index].display;
+	struct mod_hdcp_link *link = &hdcp_work[link_index].link;
+	struct mod_hdcp_display_query query;
+
+	mutex_lock(&hdcp_w->mutex);
+	hdcp_w->aconnector = aconnector;
+
+	query.display = NULL;
+	mod_hdcp_query_display(&hdcp_w->hdcp, aconnector->base.index, &query);
+
+	if (query.display != NULL) {
+		memcpy(display, query.display, sizeof(struct mod_hdcp_display));
+		mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output);
+
+		hdcp_w->link.adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0;
+
+		if (enable_encryption) {
+			display->adjust.disable = 0;
+			if (content_type == DRM_MODE_HDCP_CONTENT_TYPE0)
+				hdcp_w->link.adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0;
+			else if (content_type == DRM_MODE_HDCP_CONTENT_TYPE1)
+				hdcp_w->link.adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_1;
+
+			schedule_delayed_work(&hdcp_w->property_validate_dwork,
+					      msecs_to_jiffies(DRM_HDCP_CHECK_PERIOD_MS));
+		} else {
+			display->adjust.disable = 1;
+			hdcp_w->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+			cancel_delayed_work(&hdcp_w->property_validate_dwork);
+		}
+
+		display->state = MOD_HDCP_DISPLAY_ACTIVE;
+	}
+
+	mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output);
+
+	process_output(hdcp_w);
+	mutex_unlock(&hdcp_w->mutex);
+}
+
+void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_index)
+{
+	struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index];
+
+	mutex_lock(&hdcp_w->mutex);
+
+	mod_hdcp_reset_connection(&hdcp_w->hdcp,  &hdcp_w->output);
+
+	cancel_delayed_work(&hdcp_w->property_validate_dwork);
+	hdcp_w->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+
+	process_output(hdcp_w);
+
+	mutex_unlock(&hdcp_w->mutex);
+}
+
+void hdcp_handle_cpirq(struct hdcp_workqueue *hdcp_work, unsigned int link_index)
+{
+	struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index];
+
+	schedule_work(&hdcp_w->cpirq_work);
+}
+
+
+
+
+static void event_callback(struct work_struct *work)
+{
+	struct hdcp_workqueue *hdcp_work;
+
+	hdcp_work = container_of(to_delayed_work(work), struct hdcp_workqueue,
+				      callback_dwork);
+
+	mutex_lock(&hdcp_work->mutex);
+
+	cancel_delayed_work(&hdcp_work->watchdog_timer_dwork);
+
+	mod_hdcp_process_event(&hdcp_work->hdcp, MOD_HDCP_EVENT_CALLBACK,
+			       &hdcp_work->output);
+
+	process_output(hdcp_work);
+
+	mutex_unlock(&hdcp_work->mutex);
+
+
+}
+static void event_property_update(struct work_struct *work)
+{
+
+	struct hdcp_workqueue *hdcp_work = container_of(work, struct hdcp_workqueue, property_update_work);
+	struct amdgpu_dm_connector *aconnector = hdcp_work->aconnector;
+	struct drm_device *dev = hdcp_work->aconnector->base.dev;
+	long ret;
+
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+	mutex_lock(&hdcp_work->mutex);
+
+
+	if (aconnector->base.state->commit) {
+		ret = wait_for_completion_interruptible_timeout(&aconnector->base.state->commit->hw_done, 10 * HZ);
+
+		if (ret == 0) {
+			DRM_ERROR("HDCP state unknown! Setting it to DESIRED");
+			hdcp_work->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+		}
+	}
+
+	if (hdcp_work->encryption_status != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) {
+		if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE0 &&
+		    hdcp_work->encryption_status <= MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON)
+			drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED);
+		else if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE1 &&
+			 hdcp_work->encryption_status == MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON)
+			drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED);
+	} else {
+		drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_DESIRED);
+	}
+
+
+	mutex_unlock(&hdcp_work->mutex);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+}
+
+static void event_property_validate(struct work_struct *work)
+{
+	struct hdcp_workqueue *hdcp_work =
+		container_of(to_delayed_work(work), struct hdcp_workqueue, property_validate_dwork);
+	struct mod_hdcp_display_query query;
+	struct amdgpu_dm_connector *aconnector = hdcp_work->aconnector;
+
+	if (!aconnector)
+		return;
+
+	mutex_lock(&hdcp_work->mutex);
+
+	query.encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+	mod_hdcp_query_display(&hdcp_work->hdcp, aconnector->base.index, &query);
+
+	if (query.encryption_status != hdcp_work->encryption_status) {
+		hdcp_work->encryption_status = query.encryption_status;
+		schedule_work(&hdcp_work->property_update_work);
+	}
+
+	mutex_unlock(&hdcp_work->mutex);
+}
+
+static void event_watchdog_timer(struct work_struct *work)
+{
+	struct hdcp_workqueue *hdcp_work;
+
+	hdcp_work = container_of(to_delayed_work(work),
+				      struct hdcp_workqueue,
+				      watchdog_timer_dwork);
+
+	mutex_lock(&hdcp_work->mutex);
+
+	mod_hdcp_process_event(&hdcp_work->hdcp,
+			       MOD_HDCP_EVENT_WATCHDOG_TIMEOUT,
+			       &hdcp_work->output);
+
+	process_output(hdcp_work);
+
+	mutex_unlock(&hdcp_work->mutex);
+
+}
+
+static void event_cpirq(struct work_struct *work)
+{
+	struct hdcp_workqueue *hdcp_work;
+
+	hdcp_work = container_of(work, struct hdcp_workqueue, cpirq_work);
+
+	mutex_lock(&hdcp_work->mutex);
+
+	mod_hdcp_process_event(&hdcp_work->hdcp, MOD_HDCP_EVENT_CPIRQ, &hdcp_work->output);
+
+	process_output(hdcp_work);
+
+	mutex_unlock(&hdcp_work->mutex);
+
+}
+
+
+void hdcp_destroy(struct hdcp_workqueue *hdcp_work)
+{
+	int i = 0;
+
+	for (i = 0; i < hdcp_work->max_link; i++) {
+		cancel_delayed_work_sync(&hdcp_work[i].callback_dwork);
+		cancel_delayed_work_sync(&hdcp_work[i].watchdog_timer_dwork);
+	}
+
+	kfree(hdcp_work);
+
+}
+
+static void update_config(void *handle, struct cp_psp_stream_config *config)
+{
+	struct hdcp_workqueue *hdcp_work = handle;
+	struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx;
+	int link_index = aconnector->dc_link->link_index;
+	struct mod_hdcp_display *display = &hdcp_work[link_index].display;
+	struct mod_hdcp_link *link = &hdcp_work[link_index].link;
+
+	memset(display, 0, sizeof(*display));
+	memset(link, 0, sizeof(*link));
+
+	display->index = aconnector->base.index;
+	display->state = MOD_HDCP_DISPLAY_ACTIVE;
+
+	if (aconnector->dc_sink != NULL)
+		link->mode = mod_hdcp_signal_type_to_operation_mode(aconnector->dc_sink->sink_signal);
+
+	display->controller = CONTROLLER_ID_D0 + config->otg_inst;
+	display->dig_fe = config->stream_enc_inst;
+	link->dig_be = config->link_enc_inst;
+	link->ddc_line = aconnector->dc_link->ddc_hw_inst + 1;
+	link->dp.rev = aconnector->dc_link->dpcd_caps.dpcd_rev.raw;
+	display->adjust.disable = 1;
+	link->adjust.auth_delay = 2;
+
+	hdcp_update_display(hdcp_work, link_index, aconnector, DRM_MODE_HDCP_CONTENT_TYPE0, false);
+}
+
+struct hdcp_workqueue *hdcp_create_workqueue(void *psp_context, struct cp_psp *cp_psp, struct dc *dc)
+{
+
+	int max_caps = dc->caps.max_links;
+	struct hdcp_workqueue *hdcp_work = kzalloc(max_caps*sizeof(*hdcp_work), GFP_KERNEL);
+	int i = 0;
+
+	if (hdcp_work == NULL)
+		goto fail_alloc_context;
+
+	hdcp_work->max_link = max_caps;
+
+	for (i = 0; i < max_caps; i++) {
+
+		mutex_init(&hdcp_work[i].mutex);
+
+		INIT_WORK(&hdcp_work[i].cpirq_work, event_cpirq);
+		INIT_WORK(&hdcp_work[i].property_update_work, event_property_update);
+		INIT_DELAYED_WORK(&hdcp_work[i].callback_dwork, event_callback);
+		INIT_DELAYED_WORK(&hdcp_work[i].watchdog_timer_dwork, event_watchdog_timer);
+		INIT_DELAYED_WORK(&hdcp_work[i].property_validate_dwork, event_property_validate);
+
+		hdcp_work[i].hdcp.config.psp.handle =  psp_context;
+		hdcp_work[i].hdcp.config.ddc.handle = dc_get_link_at_index(dc, i);
+		hdcp_work[i].hdcp.config.ddc.funcs.write_i2c = lp_write_i2c;
+		hdcp_work[i].hdcp.config.ddc.funcs.read_i2c = lp_read_i2c;
+		hdcp_work[i].hdcp.config.ddc.funcs.write_dpcd = lp_write_dpcd;
+		hdcp_work[i].hdcp.config.ddc.funcs.read_dpcd = lp_read_dpcd;
+	}
+
+	cp_psp->funcs.update_stream_config = update_config;
+	cp_psp->handle = hdcp_work;
+
+	return hdcp_work;
+
+fail_alloc_context:
+	kfree(hdcp_work);
+
+	return NULL;
+
+
+
+}
+
+
+
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h
new file mode 100644
index 000000000000..6abde86bce4a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef AMDGPU_DM_AMDGPU_DM_HDCP_H_
+#define AMDGPU_DM_AMDGPU_DM_HDCP_H_
+
+#include "mod_hdcp.h"
+#include "hdcp.h"
+#include "dc.h"
+#include "dm_cp_psp.h"
+
+struct mod_hdcp;
+struct mod_hdcp_link;
+struct mod_hdcp_display;
+struct cp_psp;
+
+struct hdcp_workqueue {
+	struct work_struct cpirq_work;
+	struct work_struct property_update_work;
+	struct delayed_work callback_dwork;
+	struct delayed_work watchdog_timer_dwork;
+	struct delayed_work property_validate_dwork;
+	struct amdgpu_dm_connector *aconnector;
+	struct mutex mutex;
+
+	struct mod_hdcp hdcp;
+	struct mod_hdcp_output output;
+	struct mod_hdcp_display display;
+	struct mod_hdcp_link link;
+
+	enum mod_hdcp_encryption_status encryption_status;
+	uint8_t max_link;
+};
+
+void hdcp_update_display(struct hdcp_workqueue *hdcp_work,
+			 unsigned int link_index,
+			 struct amdgpu_dm_connector *aconnector,
+			 uint8_t content_type,
+			 bool enable_encryption);
+
+void hdcp_reset_display(struct hdcp_workqueue *work, unsigned int link_index);
+void hdcp_handle_cpirq(struct hdcp_workqueue *work, unsigned int link_index);
+void hdcp_destroy(struct hdcp_workqueue *work);
+
+struct hdcp_workqueue *hdcp_create_workqueue(void *psp_context, struct cp_psp *cp_psp, struct dc *dc);
+
+#endif /* AMDGPU_DM_AMDGPU_DM_HDCP_H_ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index ee1dc75f5ddc..069b7a6f5597 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -37,6 +37,7 @@
 #include "dc.h"
 #include "amdgpu_dm.h"
 #include "amdgpu_dm_irq.h"
+#include "amdgpu_dm_mst_types.h"
 
 #include "dm_helpers.h"
 
@@ -97,11 +98,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
 			(struct edid *) edid->raw_edid);
 
 	sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads);
-	if (sad_count <= 0) {
-		DRM_INFO("SADs count is: %d, don't need to read it\n",
-				sad_count);
+	if (sad_count <= 0)
 		return result;
-	}
 
 	edid_caps->audio_mode_count = sad_count < DC_MAX_AUDIO_DESC_COUNT ? sad_count : DC_MAX_AUDIO_DESC_COUNT;
 	for (i = 0; i < edid_caps->audio_mode_count; ++i) {
@@ -183,19 +181,22 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
 		bool enable)
 {
 	struct amdgpu_dm_connector *aconnector;
+	struct dm_connector_state *dm_conn_state;
 	struct drm_dp_mst_topology_mgr *mst_mgr;
 	struct drm_dp_mst_port *mst_port;
-	int slots = 0;
 	bool ret;
-	int clock;
-	int bpp = 0;
-	int pbn = 0;
 
 	aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+	/* Accessing the connector state is required for vcpi_slots allocation
+	 * and directly relies on behaviour in commit check
+	 * that blocks before commit guaranteeing that the state
+	 * is not gonna be swapped while still in use in commit tail */
 
 	if (!aconnector || !aconnector->mst_port)
 		return false;
 
+	dm_conn_state = to_dm_connector_state(aconnector->base.state);
+
 	mst_mgr = &aconnector->mst_port->mst_mgr;
 
 	if (!mst_mgr->mst_state)
@@ -204,42 +205,10 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
 	mst_port = aconnector->port;
 
 	if (enable) {
-		clock = stream->timing.pix_clk_100hz / 10;
-
-		switch (stream->timing.display_color_depth) {
-
-		case COLOR_DEPTH_666:
-			bpp = 6;
-			break;
-		case COLOR_DEPTH_888:
-			bpp = 8;
-			break;
-		case COLOR_DEPTH_101010:
-			bpp = 10;
-			break;
-		case COLOR_DEPTH_121212:
-			bpp = 12;
-			break;
-		case COLOR_DEPTH_141414:
-			bpp = 14;
-			break;
-		case COLOR_DEPTH_161616:
-			bpp = 16;
-			break;
-		default:
-			ASSERT(bpp != 0);
-			break;
-		}
-
-		bpp = bpp * 3;
-
-		/* TODO need to know link rate */
-
-		pbn = drm_dp_calc_pbn_mode(clock, bpp);
-
-		slots = drm_dp_find_vcpi_slots(mst_mgr, pbn);
-		ret = drm_dp_mst_allocate_vcpi(mst_mgr, mst_port, pbn, slots);
 
+		ret = drm_dp_mst_allocate_vcpi(mst_mgr, mst_port,
+					       dm_conn_state->pbn,
+					       dm_conn_state->vcpi_slots);
 		if (!ret)
 			return false;
 
@@ -282,7 +251,7 @@ void dm_helpers_dp_mst_clear_payload_allocation_table(
  * Polls for ACT (allocation change trigger) handled and sends
  * ALLOCATE_PAYLOAD message.
  */
-bool dm_helpers_dp_mst_poll_for_allocation_change_trigger(
+enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 		struct dc_context *ctx,
 		const struct dc_stream_state *stream)
 {
@@ -293,19 +262,19 @@ bool dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 	aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
 
 	if (!aconnector || !aconnector->mst_port)
-		return false;
+		return ACT_FAILED;
 
 	mst_mgr = &aconnector->mst_port->mst_mgr;
 
 	if (!mst_mgr->mst_state)
-		return false;
+		return ACT_FAILED;
 
 	ret = drm_dp_check_act_status(mst_mgr);
 
 	if (ret)
-		return false;
+		return ACT_FAILED;
 
-	return true;
+	return ACT_SUCCESS;
 }
 
 bool dm_helpers_dp_mst_send_payload_allocation(
@@ -541,7 +510,6 @@ bool dm_helpers_submit_i2c(
 
 	return result;
 }
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 bool dm_helpers_dp_write_dsc_enable(
 		struct dc_context *ctx,
 		const struct dc_stream_state *stream,
@@ -549,10 +517,25 @@ bool dm_helpers_dp_write_dsc_enable(
 )
 {
 	uint8_t enable_dsc = enable ? 1 : 0;
+	struct amdgpu_dm_connector *aconnector;
+
+	if (!stream)
+		return false;
+
+	if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+		aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+		if (!aconnector->dsc_aux)
+			return false;
+
+		return (drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1) >= 0);
+	}
+
+	if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT)
+		return dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
 
-	return dm_helpers_dp_write_dpcd(ctx, stream->sink->link, DP_DSC_ENABLE, &enable_dsc, 1);
+	return false;
 }
-#endif
 
 bool dm_helpers_is_dp_sink_present(struct dc_link *link)
 {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index fa5d503d379c..cbcf504f73a5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -111,17 +111,12 @@ static void init_handler_common_data(struct amdgpu_dm_irq_handler_data *hcd,
  */
 static void dm_irq_work_func(struct work_struct *work)
 {
-	struct list_head *entry;
 	struct irq_list_head *irq_list_head =
 		container_of(work, struct irq_list_head, work);
 	struct list_head *handler_list = &irq_list_head->head;
 	struct amdgpu_dm_irq_handler_data *handler_data;
 
-	list_for_each(entry, handler_list) {
-		handler_data = list_entry(entry,
-					  struct amdgpu_dm_irq_handler_data,
-					  list);
-
+	list_for_each_entry(handler_data, handler_list, list) {
 		DRM_DEBUG_KMS("DM_IRQ: work_func: for dal_src=%d\n",
 				handler_data->irq_source);
 
@@ -528,19 +523,13 @@ static void amdgpu_dm_irq_immediate_work(struct amdgpu_device *adev,
 					 enum dc_irq_source irq_source)
 {
 	struct amdgpu_dm_irq_handler_data *handler_data;
-	struct list_head *entry;
 	unsigned long irq_table_flags;
 
 	DM_IRQ_TABLE_LOCK(adev, irq_table_flags);
 
-	list_for_each(
-		entry,
-		&adev->dm.irq_handler_list_high_tab[irq_source]) {
-
-		handler_data = list_entry(entry,
-					  struct amdgpu_dm_irq_handler_data,
-					  list);
-
+	list_for_each_entry(handler_data,
+			    &adev->dm.irq_handler_list_high_tab[irq_source],
+			    list) {
 		/* Call a subcomponent which registered for immediate
 		 * interrupt notification */
 		handler_data->handler(handler_data->handler_arg);
@@ -732,8 +721,10 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		struct amdgpu_dm_connector *amdgpu_dm_connector =
 				to_amdgpu_dm_connector(connector);
 
@@ -751,6 +742,7 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev)
 					true);
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 }
 
 /**
@@ -765,8 +757,10 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
 		struct amdgpu_dm_connector *amdgpu_dm_connector =
 				to_amdgpu_dm_connector(connector);
 		const struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
@@ -779,4 +773,5 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev)
 					false);
 		}
 	}
+	drm_connector_list_iter_end(&iter);
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 16218a202b59..96b391e4b3e7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -25,6 +25,7 @@
 
 #include <linux/version.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_dp_mst_helper.h>
 #include "dm_services.h"
 #include "amdgpu.h"
 #include "amdgpu_dm.h"
@@ -36,6 +37,14 @@
 #include "dc_link_ddc.h"
 
 #include "i2caux_interface.h"
+#if defined(CONFIG_DEBUG_FS)
+#include "amdgpu_dm_debugfs.h"
+#endif
+
+
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+#include "dc/dcn20/dcn20_resource.h"
+#endif
 
 /* #define TRACE_DPCD */
 
@@ -113,6 +122,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
 			result = -EIO;
 			break;
 		case AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY:
+		case AUX_CHANNEL_OPERATION_FAILED_ENGINE_ACQUIRE:
 			result = -EBUSY;
 			break;
 		case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT:
@@ -123,31 +133,14 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
 	return result;
 }
 
-static enum drm_connector_status
-dm_dp_mst_detect(struct drm_connector *connector, bool force)
-{
-	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
-	struct amdgpu_dm_connector *master = aconnector->mst_port;
-
-	enum drm_connector_status status =
-		drm_dp_mst_detect_port(
-			connector,
-			&master->mst_mgr,
-			aconnector->port);
-
-	return status;
-}
-
 static void
 dm_dp_mst_connector_destroy(struct drm_connector *connector)
 {
 	struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
 	struct amdgpu_encoder *amdgpu_encoder = amdgpu_dm_connector->mst_encoder;
 
-	if (amdgpu_dm_connector->edid) {
-		kfree(amdgpu_dm_connector->edid);
-		amdgpu_dm_connector->edid = NULL;
-	}
+	kfree(amdgpu_dm_connector->edid);
+	amdgpu_dm_connector->edid = NULL;
 
 	drm_encoder_cleanup(&amdgpu_encoder->base);
 	kfree(amdgpu_encoder);
@@ -163,6 +156,12 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector)
 		to_amdgpu_dm_connector(connector);
 	struct drm_dp_mst_port *port = amdgpu_dm_connector->port;
 
+#if defined(CONFIG_DEBUG_FS)
+	connector_debugfs_init(amdgpu_dm_connector);
+	amdgpu_dm_connector->debugfs_dpcd_address = 0;
+	amdgpu_dm_connector->debugfs_dpcd_size = 0;
+#endif
+
 	return drm_dp_mst_connector_late_register(connector, port);
 }
 
@@ -177,7 +176,6 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector)
 }
 
 static const struct drm_connector_funcs dm_dp_mst_connector_funcs = {
-	.detect = dm_dp_mst_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = dm_dp_mst_connector_destroy,
 	.reset = amdgpu_dm_connector_funcs_reset,
@@ -189,6 +187,30 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = {
 	.early_unregister = amdgpu_dm_mst_connector_early_unregister,
 };
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector)
+{
+	struct dc_sink *dc_sink = aconnector->dc_sink;
+	struct drm_dp_mst_port *port = aconnector->port;
+	u8 dsc_caps[16] = { 0 };
+
+	aconnector->dsc_aux = drm_dp_mst_dsc_aux_for_port(port);
+
+	if (!aconnector->dsc_aux)
+		return false;
+
+	if (drm_dp_dpcd_read(aconnector->dsc_aux, DP_DSC_SUPPORT, dsc_caps, 16) < 0)
+		return false;
+
+	if (!dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc,
+				   dsc_caps, NULL,
+				   &dc_sink->sink_dsc_caps.dsc_dec_caps))
+		return false;
+
+	return true;
+}
+#endif
+
 static int dm_dp_mst_get_modes(struct drm_connector *connector)
 {
 	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -231,10 +253,16 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
 		/* dc_link_add_remote_sink returns a new reference */
 		aconnector->dc_sink = dc_sink;
 
-		if (aconnector->dc_sink)
+		if (aconnector->dc_sink) {
 			amdgpu_dm_update_freesync_caps(
 					connector, aconnector->edid);
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+			if (!validate_dsc_caps_on_connector(aconnector))
+				memset(&aconnector->dc_sink->sink_dsc_caps,
+				       0, sizeof(aconnector->dc_sink->sink_dsc_caps));
+#endif
+		}
 	}
 
 	drm_connector_update_edid_property(
@@ -245,17 +273,61 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
-static struct drm_encoder *dm_mst_best_encoder(struct drm_connector *connector)
+static struct drm_encoder *
+dm_mst_atomic_best_encoder(struct drm_connector *connector,
+			   struct drm_connector_state *connector_state)
 {
-	struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+	return &to_amdgpu_dm_connector(connector)->mst_encoder->base;
+}
+
+static int
+dm_dp_mst_detect(struct drm_connector *connector,
+		 struct drm_modeset_acquire_ctx *ctx, bool force)
+{
+	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct amdgpu_dm_connector *master = aconnector->mst_port;
+
+	return drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr,
+				      aconnector->port);
+}
+
+static int dm_dp_mst_atomic_check(struct drm_connector *connector,
+				struct drm_atomic_state *state)
+{
+	struct drm_connector_state *new_conn_state =
+			drm_atomic_get_new_connector_state(state, connector);
+	struct drm_connector_state *old_conn_state =
+			drm_atomic_get_old_connector_state(state, connector);
+	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct drm_crtc_state *new_crtc_state;
+	struct drm_dp_mst_topology_mgr *mst_mgr;
+	struct drm_dp_mst_port *mst_port;
+
+	mst_port = aconnector->port;
+	mst_mgr = &aconnector->mst_port->mst_mgr;
+
+	if (!old_conn_state->crtc)
+		return 0;
+
+	if (new_conn_state->crtc) {
+		new_crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc);
+		if (!new_crtc_state ||
+		    !drm_atomic_crtc_needs_modeset(new_crtc_state) ||
+		    new_crtc_state->enable)
+			return 0;
+		}
 
-	return &amdgpu_dm_connector->mst_encoder->base;
+	return drm_dp_atomic_release_vcpi_slots(state,
+						mst_mgr,
+						mst_port);
 }
 
 static const struct drm_connector_helper_funcs dm_dp_mst_connector_helper_funcs = {
 	.get_modes = dm_dp_mst_get_modes,
 	.mode_valid = amdgpu_dm_connector_mode_valid,
-	.best_encoder = dm_mst_best_encoder,
+	.atomic_best_encoder = dm_mst_atomic_best_encoder,
+	.detect_ctx = dm_dp_mst_detect,
+	.atomic_check = dm_dp_mst_atomic_check,
 };
 
 static void amdgpu_dm_encoder_destroy(struct drm_encoder *encoder)
@@ -416,7 +488,11 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
 
 	drm_dp_aux_register(&aconnector->dm_dp_aux.aux);
 	drm_dp_cec_register_connector(&aconnector->dm_dp_aux.aux,
-				      aconnector->base.name, dm->adev->dev);
+				      &aconnector->base);
+
+	if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_eDP)
+		return;
+
 	aconnector->mst_mgr.cbs = &dm_mst_cbs;
 	drm_dp_mst_topology_mgr_init(
 		&aconnector->mst_mgr,
@@ -427,3 +503,384 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
 		aconnector->connector_id);
 }
 
+int dm_mst_get_pbn_divider(struct dc_link *link)
+{
+	if (!link)
+		return 0;
+
+	return dc_link_bandwidth_kbps(link,
+			dc_link_get_link_cap(link)) / (8 * 1000 * 54);
+}
+
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+
+struct dsc_mst_fairness_params {
+	struct dc_crtc_timing *timing;
+	struct dc_sink *sink;
+	struct dc_dsc_bw_range bw_range;
+	bool compression_possible;
+	struct drm_dp_mst_port *port;
+};
+
+struct dsc_mst_fairness_vars {
+	int pbn;
+	bool dsc_enabled;
+	int bpp_x16;
+};
+
+static int kbps_to_peak_pbn(int kbps)
+{
+	u64 peak_kbps = kbps;
+
+	peak_kbps *= 1006;
+	peak_kbps = div_u64(peak_kbps, 1000);
+	return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000));
+}
+
+static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *params,
+		struct dsc_mst_fairness_vars *vars,
+		int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		memset(&params[i].timing->dsc_cfg, 0, sizeof(params[i].timing->dsc_cfg));
+		if (vars[i].dsc_enabled && dc_dsc_compute_config(
+					params[i].sink->ctx->dc->res_pool->dscs[0],
+					&params[i].sink->sink_dsc_caps.dsc_dec_caps,
+					params[i].sink->ctx->dc->debug.dsc_min_slice_height_override,
+					0,
+					params[i].timing,
+					&params[i].timing->dsc_cfg)) {
+			params[i].timing->flags.DSC = 1;
+			params[i].timing->dsc_cfg.bits_per_pixel = vars[i].bpp_x16;
+		} else {
+			params[i].timing->flags.DSC = 0;
+		}
+	}
+}
+
+static int bpp_x16_from_pbn(struct dsc_mst_fairness_params param, int pbn)
+{
+	struct dc_dsc_config dsc_config;
+	u64 kbps;
+
+	kbps = div_u64((u64)pbn * 994 * 8 * 54, 64);
+	dc_dsc_compute_config(
+			param.sink->ctx->dc->res_pool->dscs[0],
+			&param.sink->sink_dsc_caps.dsc_dec_caps,
+			param.sink->ctx->dc->debug.dsc_min_slice_height_override,
+			(int) kbps, param.timing, &dsc_config);
+
+	return dsc_config.bits_per_pixel;
+}
+
+static void increase_dsc_bpp(struct drm_atomic_state *state,
+			     struct dc_link *dc_link,
+			     struct dsc_mst_fairness_params *params,
+			     struct dsc_mst_fairness_vars *vars,
+			     int count)
+{
+	int i;
+	bool bpp_increased[MAX_PIPES];
+	int initial_slack[MAX_PIPES];
+	int min_initial_slack;
+	int next_index;
+	int remaining_to_increase = 0;
+	int pbn_per_timeslot;
+	int link_timeslots_used;
+	int fair_pbn_alloc;
+
+	for (i = 0; i < count; i++) {
+		if (vars[i].dsc_enabled) {
+			initial_slack[i] = kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i].pbn;
+			bpp_increased[i] = false;
+			remaining_to_increase += 1;
+		} else {
+			initial_slack[i] = 0;
+			bpp_increased[i] = true;
+		}
+	}
+
+	pbn_per_timeslot = dc_link_bandwidth_kbps(dc_link,
+			dc_link_get_link_cap(dc_link)) / (8 * 1000 * 54);
+
+	while (remaining_to_increase) {
+		next_index = -1;
+		min_initial_slack = -1;
+		for (i = 0; i < count; i++) {
+			if (!bpp_increased[i]) {
+				if (min_initial_slack == -1 || min_initial_slack > initial_slack[i]) {
+					min_initial_slack = initial_slack[i];
+					next_index = i;
+				}
+			}
+		}
+
+		if (next_index == -1)
+			break;
+
+		link_timeslots_used = 0;
+
+		for (i = 0; i < count; i++)
+			link_timeslots_used += DIV_ROUND_UP(vars[i].pbn, pbn_per_timeslot);
+
+		fair_pbn_alloc = (63 - link_timeslots_used) / remaining_to_increase * pbn_per_timeslot;
+
+		if (initial_slack[next_index] > fair_pbn_alloc) {
+			vars[next_index].pbn += fair_pbn_alloc;
+			if (drm_dp_atomic_find_vcpi_slots(state,
+							  params[next_index].port->mgr,
+							  params[next_index].port,
+							  vars[next_index].pbn,\
+							  dm_mst_get_pbn_divider(dc_link)) < 0)
+				return;
+			if (!drm_dp_mst_atomic_check(state)) {
+				vars[next_index].bpp_x16 = bpp_x16_from_pbn(params[next_index], vars[next_index].pbn);
+			} else {
+				vars[next_index].pbn -= fair_pbn_alloc;
+				if (drm_dp_atomic_find_vcpi_slots(state,
+								  params[next_index].port->mgr,
+								  params[next_index].port,
+								  vars[next_index].pbn,
+								  dm_mst_get_pbn_divider(dc_link)) < 0)
+					return;
+			}
+		} else {
+			vars[next_index].pbn += initial_slack[next_index];
+			if (drm_dp_atomic_find_vcpi_slots(state,
+							  params[next_index].port->mgr,
+							  params[next_index].port,
+							  vars[next_index].pbn,
+							  dm_mst_get_pbn_divider(dc_link)) < 0)
+				return;
+			if (!drm_dp_mst_atomic_check(state)) {
+				vars[next_index].bpp_x16 = params[next_index].bw_range.max_target_bpp_x16;
+			} else {
+				vars[next_index].pbn -= initial_slack[next_index];
+				if (drm_dp_atomic_find_vcpi_slots(state,
+								  params[next_index].port->mgr,
+								  params[next_index].port,
+								  vars[next_index].pbn,
+								  dm_mst_get_pbn_divider(dc_link)) < 0)
+					return;
+			}
+		}
+
+		bpp_increased[next_index] = true;
+		remaining_to_increase--;
+	}
+}
+
+static void try_disable_dsc(struct drm_atomic_state *state,
+			    struct dc_link *dc_link,
+			    struct dsc_mst_fairness_params *params,
+			    struct dsc_mst_fairness_vars *vars,
+			    int count)
+{
+	int i;
+	bool tried[MAX_PIPES];
+	int kbps_increase[MAX_PIPES];
+	int max_kbps_increase;
+	int next_index;
+	int remaining_to_try = 0;
+
+	for (i = 0; i < count; i++) {
+		if (vars[i].dsc_enabled && vars[i].bpp_x16 == params[i].bw_range.max_target_bpp_x16) {
+			kbps_increase[i] = params[i].bw_range.stream_kbps - params[i].bw_range.max_kbps;
+			tried[i] = false;
+			remaining_to_try += 1;
+		} else {
+			kbps_increase[i] = 0;
+			tried[i] = true;
+		}
+	}
+
+	while (remaining_to_try) {
+		next_index = -1;
+		max_kbps_increase = -1;
+		for (i = 0; i < count; i++) {
+			if (!tried[i]) {
+				if (max_kbps_increase == -1 || max_kbps_increase < kbps_increase[i]) {
+					max_kbps_increase = kbps_increase[i];
+					next_index = i;
+				}
+			}
+		}
+
+		if (next_index == -1)
+			break;
+
+		vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps);
+		if (drm_dp_atomic_find_vcpi_slots(state,
+						  params[next_index].port->mgr,
+						  params[next_index].port,
+						  vars[next_index].pbn,
+						  0) < 0)
+			return;
+
+		if (!drm_dp_mst_atomic_check(state)) {
+			vars[next_index].dsc_enabled = false;
+			vars[next_index].bpp_x16 = 0;
+		} else {
+			vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps);
+			if (drm_dp_atomic_find_vcpi_slots(state,
+							  params[next_index].port->mgr,
+							  params[next_index].port,
+							  vars[next_index].pbn,
+							  dm_mst_get_pbn_divider(dc_link)) < 0)
+				return;
+		}
+
+		tried[next_index] = true;
+		remaining_to_try--;
+	}
+}
+
+static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
+					     struct dc_state *dc_state,
+					     struct dc_link *dc_link)
+{
+	int i;
+	struct dc_stream_state *stream;
+	struct dsc_mst_fairness_params params[MAX_PIPES];
+	struct dsc_mst_fairness_vars vars[MAX_PIPES];
+	struct amdgpu_dm_connector *aconnector;
+	int count = 0;
+
+	memset(params, 0, sizeof(params));
+
+	/* Set up params */
+	for (i = 0; i < dc_state->stream_count; i++) {
+		struct dc_dsc_policy dsc_policy = {0};
+
+		stream = dc_state->streams[i];
+
+		if (stream->link != dc_link)
+			continue;
+
+		stream->timing.flags.DSC = 0;
+
+		params[count].timing = &stream->timing;
+		params[count].sink = stream->sink;
+		aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+		params[count].port = aconnector->port;
+		params[count].compression_possible = stream->sink->sink_dsc_caps.dsc_dec_caps.is_dsc_supported;
+		dc_dsc_get_policy_for_timing(params[count].timing, &dsc_policy);
+		if (!dc_dsc_compute_bandwidth_range(
+				stream->sink->ctx->dc->res_pool->dscs[0],
+				stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
+				dsc_policy.min_target_bpp,
+				dsc_policy.max_target_bpp,
+				&stream->sink->sink_dsc_caps.dsc_dec_caps,
+				&stream->timing, &params[count].bw_range))
+			params[count].bw_range.stream_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing);
+
+		count++;
+	}
+	/* Try no compression */
+	for (i = 0; i < count; i++) {
+		vars[i].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
+		vars[i].dsc_enabled = false;
+		vars[i].bpp_x16 = 0;
+		if (drm_dp_atomic_find_vcpi_slots(state,
+						 params[i].port->mgr,
+						 params[i].port,
+						 vars[i].pbn,
+						 0) < 0)
+			return false;
+	}
+	if (!drm_dp_mst_atomic_check(state)) {
+		set_dsc_configs_from_fairness_vars(params, vars, count);
+		return true;
+	}
+
+	/* Try max compression */
+	for (i = 0; i < count; i++) {
+		if (params[i].compression_possible) {
+			vars[i].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps);
+			vars[i].dsc_enabled = true;
+			vars[i].bpp_x16 = params[i].bw_range.min_target_bpp_x16;
+			if (drm_dp_atomic_find_vcpi_slots(state,
+							  params[i].port->mgr,
+							  params[i].port,
+							  vars[i].pbn,
+							  dm_mst_get_pbn_divider(dc_link)) < 0)
+				return false;
+		} else {
+			vars[i].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
+			vars[i].dsc_enabled = false;
+			vars[i].bpp_x16 = 0;
+			if (drm_dp_atomic_find_vcpi_slots(state,
+							  params[i].port->mgr,
+							  params[i].port,
+							  vars[i].pbn,
+							  0) < 0)
+				return false;
+		}
+	}
+	if (drm_dp_mst_atomic_check(state))
+		return false;
+
+	/* Optimize degree of compression */
+	increase_dsc_bpp(state, dc_link, params, vars, count);
+
+	try_disable_dsc(state, dc_link, params, vars, count);
+
+	set_dsc_configs_from_fairness_vars(params, vars, count);
+
+	return true;
+}
+
+bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
+				       struct dc_state *dc_state)
+{
+	int i, j;
+	struct dc_stream_state *stream;
+	bool computed_streams[MAX_PIPES];
+	struct amdgpu_dm_connector *aconnector;
+
+	for (i = 0; i < dc_state->stream_count; i++)
+		computed_streams[i] = false;
+
+	for (i = 0; i < dc_state->stream_count; i++) {
+		stream = dc_state->streams[i];
+
+		if (stream->signal != SIGNAL_TYPE_DISPLAY_PORT_MST)
+			continue;
+
+		aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+		if (!aconnector || !aconnector->dc_sink)
+			continue;
+
+		if (!aconnector->dc_sink->sink_dsc_caps.dsc_dec_caps.is_dsc_supported)
+			continue;
+
+		if (computed_streams[i])
+			continue;
+
+		mutex_lock(&aconnector->mst_mgr.lock);
+		if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link)) {
+			mutex_unlock(&aconnector->mst_mgr.lock);
+			return false;
+		}
+		mutex_unlock(&aconnector->mst_mgr.lock);
+
+		for (j = 0; j < dc_state->stream_count; j++) {
+			if (dc_state->streams[j]->link == stream->link)
+				computed_streams[j] = true;
+		}
+	}
+
+	for (i = 0; i < dc_state->stream_count; i++) {
+		stream = dc_state->streams[i];
+
+		if (stream->timing.flags.DSC == 1)
+			dcn20_add_dsc_to_stream_resource(stream->ctx->dc, dc_state, stream);
+	}
+
+	return true;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
index 2da851b40042..d6813ce67bbd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -29,7 +29,14 @@
 struct amdgpu_display_manager;
 struct amdgpu_dm_connector;
 
+int dm_mst_get_pbn_divider(struct dc_link *link);
+
 void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
 				       struct amdgpu_dm_connector *aconnector);
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
+				       struct dc_state *dc_state);
+#endif
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index f4cfa0caeba8..a2e1a73f66b8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -342,10 +342,11 @@ bool dm_pp_get_clock_levels_by_type(
 	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) {
 		if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle,
 			dc_to_pp_clock_type(clk_type), &pp_clks)) {
-		/* Error in pplib. Provide default values. */
+			/* Error in pplib. Provide default values. */
+			get_default_clock_levels(clk_type, dc_clks);
 			return true;
 		}
-	} else if (adev->smu.funcs && adev->smu.funcs->get_clock_by_type) {
+	} else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_clock_by_type) {
 		if (smu_get_clock_by_type(&adev->smu,
 					  dc_to_pp_clock_type(clk_type),
 					  &pp_clks)) {
@@ -365,7 +366,7 @@ bool dm_pp_get_clock_levels_by_type(
 			validation_clks.memory_max_clock = 80000;
 			validation_clks.level = 0;
 		}
-	} else if (adev->smu.funcs && adev->smu.funcs->get_max_high_clocks) {
+	} else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_max_high_clocks) {
 		if (smu_get_max_high_clocks(&adev->smu, &validation_clks)) {
 			DRM_INFO("DM_PPLIB: Warning: using default validation clocks!\n");
 			validation_clks.engine_max_clock = 72000;
@@ -506,8 +507,8 @@ bool dm_pp_apply_clock_for_voltage_request(
 		ret = adev->powerplay.pp_funcs->display_clock_voltage_request(
 			adev->powerplay.pp_handle,
 			&pp_clock_request);
-	else if (adev->smu.funcs &&
-		 adev->smu.funcs->display_clock_voltage_request)
+	else if (adev->smu.ppt_funcs &&
+		 adev->smu.ppt_funcs->display_clock_voltage_request)
 		ret = smu_display_clock_voltage_request(&adev->smu,
 							&pp_clock_request);
 	if (ret)
@@ -527,7 +528,7 @@ bool dm_pp_get_static_clocks(
 		ret = adev->powerplay.pp_funcs->get_current_clocks(
 			adev->powerplay.pp_handle,
 			&pp_clk_info);
-	else if (adev->smu.funcs)
+	else if (adev->smu.ppt_funcs)
 		ret = smu_get_current_clocks(&adev->smu, &pp_clk_info);
 	if (ret)
 		return false;
@@ -589,10 +590,9 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp,
 	if (pp_funcs && pp_funcs->set_watermarks_for_clocks_ranges)
 		pp_funcs->set_watermarks_for_clocks_ranges(pp_handle,
 							   &wm_with_clock_ranges);
-	else if (adev->smu.funcs &&
-		 adev->smu.funcs->set_watermarks_for_clock_ranges)
+	else
 		smu_set_watermarks_for_clock_ranges(&adev->smu,
-						    &wm_with_clock_ranges);
+				&wm_with_clock_ranges);
 }
 
 void pp_rv_set_pme_wa_enable(struct pp_smu *pp)
@@ -604,7 +604,7 @@ void pp_rv_set_pme_wa_enable(struct pp_smu *pp)
 
 	if (pp_funcs && pp_funcs->notify_smu_enable_pwe)
 		pp_funcs->notify_smu_enable_pwe(pp_handle);
-	else if (adev->smu.funcs)
+	else if (adev->smu.ppt_funcs)
 		smu_notify_smu_enable_pwe(&adev->smu);
 }
 
@@ -665,7 +665,6 @@ enum pp_smu_status pp_nv_set_wm_ranges(struct pp_smu *pp,
 {
 	const struct dc_context *ctx = pp->dm;
 	struct amdgpu_device *adev = ctx->driver_context;
-	struct smu_context *smu = &adev->smu;
 	struct dm_pp_wm_sets_with_clock_ranges_soc15 wm_with_clock_ranges;
 	struct dm_pp_clock_range_for_dmif_wm_set_soc15 *wm_dce_clocks =
 			wm_with_clock_ranges.wm_dmif_clocks_ranges;
@@ -708,15 +707,7 @@ enum pp_smu_status pp_nv_set_wm_ranges(struct pp_smu *pp,
 			ranges->writer_wm_sets[i].min_drain_clk_mhz * 1000;
 	}
 
-	if (!smu->funcs)
-		return PP_SMU_RESULT_UNSUPPORTED;
-
-	/* 0: successful or smu.funcs->set_watermarks_for_clock_ranges = NULL;
-	 * 1: fail
-	 */
-	if (smu_set_watermarks_for_clock_ranges(&adev->smu,
-			&wm_with_clock_ranges))
-		return PP_SMU_RESULT_UNSUPPORTED;
+	smu_set_watermarks_for_clock_ranges(&adev->smu,	&wm_with_clock_ranges);
 
 	return PP_SMU_RESULT_OK;
 }
@@ -727,10 +718,10 @@ enum pp_smu_status pp_nv_set_pme_wa_enable(struct pp_smu *pp)
 	struct amdgpu_device *adev = ctx->driver_context;
 	struct smu_context *smu = &adev->smu;
 
-	if (!smu->funcs)
+	if (!smu->ppt_funcs)
 		return PP_SMU_RESULT_UNSUPPORTED;
 
-	/* 0: successful or smu.funcs->set_azalia_d3_pme = NULL;  1: fail */
+	/* 0: successful or smu.ppt_funcs->set_azalia_d3_pme = NULL;  1: fail */
 	if (smu_set_azalia_d3_pme(smu))
 		return PP_SMU_RESULT_FAIL;
 
@@ -743,10 +734,10 @@ enum pp_smu_status pp_nv_set_display_count(struct pp_smu *pp, int count)
 	struct amdgpu_device *adev = ctx->driver_context;
 	struct smu_context *smu = &adev->smu;
 
-	if (!smu->funcs)
+	if (!smu->ppt_funcs)
 		return PP_SMU_RESULT_UNSUPPORTED;
 
-	/* 0: successful or smu.funcs->set_display_count = NULL;  1: fail */
+	/* 0: successful or smu.ppt_funcs->set_display_count = NULL;  1: fail */
 	if (smu_set_display_count(smu, count))
 		return PP_SMU_RESULT_FAIL;
 
@@ -759,10 +750,10 @@ enum pp_smu_status pp_nv_set_min_deep_sleep_dcfclk(struct pp_smu *pp, int mhz)
 	struct amdgpu_device *adev = ctx->driver_context;
 	struct smu_context *smu = &adev->smu;
 
-	if (!smu->funcs)
+	if (!smu->ppt_funcs)
 		return PP_SMU_RESULT_UNSUPPORTED;
 
-	/* 0: successful or smu.funcs->set_deep_sleep_dcefclk = NULL;1: fail */
+	/* 0: successful or smu.ppt_funcs->set_deep_sleep_dcefclk = NULL;1: fail */
 	if (smu_set_deep_sleep_dcefclk(smu, mhz))
 		return PP_SMU_RESULT_FAIL;
 
@@ -777,13 +768,13 @@ enum pp_smu_status pp_nv_set_hard_min_dcefclk_by_freq(
 	struct smu_context *smu = &adev->smu;
 	struct pp_display_clock_request clock_req;
 
-	if (!smu->funcs)
+	if (!smu->ppt_funcs)
 		return PP_SMU_RESULT_UNSUPPORTED;
 
 	clock_req.clock_type = amd_pp_dcef_clock;
 	clock_req.clock_freq_in_khz = mhz * 1000;
 
-	/* 0: successful or smu.funcs->display_clock_voltage_request = NULL
+	/* 0: successful or smu.ppt_funcs->display_clock_voltage_request = NULL
 	 * 1: fail
 	 */
 	if (smu_display_clock_voltage_request(smu, &clock_req))
@@ -799,13 +790,13 @@ enum pp_smu_status pp_nv_set_hard_min_uclk_by_freq(struct pp_smu *pp, int mhz)
 	struct smu_context *smu = &adev->smu;
 	struct pp_display_clock_request clock_req;
 
-	if (!smu->funcs)
+	if (!smu->ppt_funcs)
 		return PP_SMU_RESULT_UNSUPPORTED;
 
 	clock_req.clock_type = amd_pp_mem_clock;
 	clock_req.clock_freq_in_khz = mhz * 1000;
 
-	/* 0: successful or smu.funcs->display_clock_voltage_request = NULL
+	/* 0: successful or smu.ppt_funcs->display_clock_voltage_request = NULL
 	 * 1: fail
 	 */
 	if (smu_display_clock_voltage_request(smu, &clock_req))
@@ -835,7 +826,7 @@ enum pp_smu_status pp_nv_set_voltage_by_freq(struct pp_smu *pp,
 	struct smu_context *smu = &adev->smu;
 	struct pp_display_clock_request clock_req;
 
-	if (!smu->funcs)
+	if (!smu->ppt_funcs)
 		return PP_SMU_RESULT_UNSUPPORTED;
 
 	switch (clock_id) {
@@ -853,7 +844,7 @@ enum pp_smu_status pp_nv_set_voltage_by_freq(struct pp_smu *pp,
 	}
 	clock_req.clock_freq_in_khz = mhz * 1000;
 
-	/* 0: successful or smu.funcs->display_clock_voltage_request = NULL
+	/* 0: successful or smu.ppt_funcs->display_clock_voltage_request = NULL
 	 * 1: fail
 	 */
 	if (smu_display_clock_voltage_request(smu, &clock_req))
@@ -869,13 +860,13 @@ enum pp_smu_status pp_nv_get_maximum_sustainable_clocks(
 	struct amdgpu_device *adev = ctx->driver_context;
 	struct smu_context *smu = &adev->smu;
 
-	if (!smu->funcs)
+	if (!smu->ppt_funcs)
 		return PP_SMU_RESULT_UNSUPPORTED;
 
-	if (!smu->funcs->get_max_sustainable_clocks_by_dc)
+	if (!smu->ppt_funcs->get_max_sustainable_clocks_by_dc)
 		return PP_SMU_RESULT_UNSUPPORTED;
 
-	if (!smu->funcs->get_max_sustainable_clocks_by_dc(smu, max_clocks))
+	if (!smu_get_max_sustainable_clocks_by_dc(smu, max_clocks))
 		return PP_SMU_RESULT_OK;
 
 	return PP_SMU_RESULT_FAIL;
@@ -894,13 +885,95 @@ enum pp_smu_status pp_nv_get_uclk_dpm_states(struct pp_smu *pp,
 	if (!smu->ppt_funcs->get_uclk_dpm_states)
 		return PP_SMU_RESULT_UNSUPPORTED;
 
-	if (!smu->ppt_funcs->get_uclk_dpm_states(smu,
+	if (!smu_get_uclk_dpm_states(smu,
 			clock_values_in_khz, num_states))
 		return PP_SMU_RESULT_OK;
 
 	return PP_SMU_RESULT_FAIL;
 }
 
+enum pp_smu_status pp_rn_get_dpm_clock_table(
+		struct pp_smu *pp, struct dpm_clocks *clock_table)
+{
+	const struct dc_context *ctx = pp->dm;
+	struct amdgpu_device *adev = ctx->driver_context;
+	struct smu_context *smu = &adev->smu;
+
+	if (!smu->ppt_funcs)
+		return PP_SMU_RESULT_UNSUPPORTED;
+
+	if (!smu->ppt_funcs->get_dpm_clock_table)
+		return PP_SMU_RESULT_UNSUPPORTED;
+
+	if (!smu_get_dpm_clock_table(smu, clock_table))
+		return PP_SMU_RESULT_OK;
+
+	return PP_SMU_RESULT_FAIL;
+}
+
+enum pp_smu_status pp_rn_set_wm_ranges(struct pp_smu *pp,
+		struct pp_smu_wm_range_sets *ranges)
+{
+	const struct dc_context *ctx = pp->dm;
+	struct amdgpu_device *adev = ctx->driver_context;
+	struct smu_context *smu = &adev->smu;
+	struct dm_pp_wm_sets_with_clock_ranges_soc15 wm_with_clock_ranges;
+	struct dm_pp_clock_range_for_dmif_wm_set_soc15 *wm_dce_clocks =
+			wm_with_clock_ranges.wm_dmif_clocks_ranges;
+	struct dm_pp_clock_range_for_mcif_wm_set_soc15 *wm_soc_clocks =
+			wm_with_clock_ranges.wm_mcif_clocks_ranges;
+	int32_t i;
+
+	if (!smu->ppt_funcs)
+		return PP_SMU_RESULT_UNSUPPORTED;
+
+	wm_with_clock_ranges.num_wm_dmif_sets = ranges->num_reader_wm_sets;
+	wm_with_clock_ranges.num_wm_mcif_sets = ranges->num_writer_wm_sets;
+
+	for (i = 0; i < wm_with_clock_ranges.num_wm_dmif_sets; i++) {
+		if (ranges->reader_wm_sets[i].wm_inst > 3)
+			wm_dce_clocks[i].wm_set_id = WM_SET_A;
+		else
+			wm_dce_clocks[i].wm_set_id =
+					ranges->reader_wm_sets[i].wm_inst;
+
+		wm_dce_clocks[i].wm_min_dcfclk_clk_in_khz =
+			ranges->reader_wm_sets[i].min_drain_clk_mhz;
+
+		wm_dce_clocks[i].wm_max_dcfclk_clk_in_khz =
+			ranges->reader_wm_sets[i].max_drain_clk_mhz;
+
+		wm_dce_clocks[i].wm_min_mem_clk_in_khz =
+			ranges->reader_wm_sets[i].min_fill_clk_mhz;
+
+		wm_dce_clocks[i].wm_max_mem_clk_in_khz =
+			ranges->reader_wm_sets[i].max_fill_clk_mhz;
+	}
+
+	for (i = 0; i < wm_with_clock_ranges.num_wm_mcif_sets; i++) {
+		if (ranges->writer_wm_sets[i].wm_inst > 3)
+			wm_soc_clocks[i].wm_set_id = WM_SET_A;
+		else
+			wm_soc_clocks[i].wm_set_id =
+					ranges->writer_wm_sets[i].wm_inst;
+		wm_soc_clocks[i].wm_min_socclk_clk_in_khz =
+				ranges->writer_wm_sets[i].min_fill_clk_mhz;
+
+		wm_soc_clocks[i].wm_max_socclk_clk_in_khz =
+			ranges->writer_wm_sets[i].max_fill_clk_mhz;
+
+		wm_soc_clocks[i].wm_min_mem_clk_in_khz =
+			ranges->writer_wm_sets[i].min_drain_clk_mhz;
+
+		wm_soc_clocks[i].wm_max_mem_clk_in_khz =
+			ranges->writer_wm_sets[i].max_drain_clk_mhz;
+	}
+
+	smu_set_watermarks_for_clock_ranges(&adev->smu, &wm_with_clock_ranges);
+
+	return PP_SMU_RESULT_OK;
+}
+
 void dm_pp_get_funcs(
 		struct dc_context *ctx,
 		struct pp_smu_funcs *funcs)
@@ -921,7 +994,6 @@ void dm_pp_get_funcs(
 		funcs->rv_funcs.set_hard_min_fclk_by_freq =
 				pp_rv_set_hard_min_fclk_by_freq;
 		break;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	case DCN_VERSION_2_0:
 		funcs->ctx.ver = PP_SMU_VER_NV;
 		funcs->nv_funcs.pp_smu.dm = ctx;
@@ -944,7 +1016,13 @@ void dm_pp_get_funcs(
 		funcs->nv_funcs.get_uclk_dpm_states = pp_nv_get_uclk_dpm_states;
 		funcs->nv_funcs.set_pstate_handshake_support = pp_nv_set_pstate_handshake_support;
 		break;
-#endif
+
+	case DCN_VERSION_2_1:
+		funcs->ctx.ver = PP_SMU_VER_RN;
+		funcs->rn_funcs.pp_smu.dm = ctx;
+		funcs->rn_funcs.set_wm_ranges = pp_rn_set_wm_ranges;
+		funcs->rn_funcs.get_dpm_clock_table = pp_rn_get_dpm_clock_table;
+		break;
 	default:
 		DRM_ERROR("smu version is not supported !\n");
 		break;
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index 627982cb15d2..6e3dddc73246 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -25,19 +25,10 @@
 
 DC_LIBS = basics bios calcs clk_mgr dce gpio irq virtual
 
-ifdef CONFIG_DRM_AMD_DC_DCN2_0
+ifdef CONFIG_DRM_AMD_DC_DCN
 DC_LIBS += dcn20
-endif
-
-
-ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 DC_LIBS += dsc
-endif
-
-ifdef CONFIG_DRM_AMD_DC_DCN1_0
 DC_LIBS += dcn10 dml
-endif
-ifdef CONFIG_DRM_AMD_DC_DCN2_1
 DC_LIBS += dcn21
 endif
 
@@ -48,6 +39,10 @@ DC_LIBS += dce110
 DC_LIBS += dce100
 DC_LIBS += dce80
 
+ifdef CONFIG_DRM_AMD_DC_HDCP
+DC_LIBS += hdcp
+endif
+
 AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LIBS)))
 
 include $(AMD_DC)
@@ -55,7 +50,7 @@ include $(AMD_DC)
 DISPLAY_CORE = dc.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
 dc_surface.o dc_link_hwss.o dc_link_dp.o dc_link_ddc.o dc_debug.o dc_stream.o
 
-ifdef CONFIG_DRM_AMD_DC_DCN2_0
+ifdef CONFIG_DRM_AMD_DC_DCN
 DISPLAY_CORE += dc_vm_helper.o
 endif
 
@@ -66,5 +61,6 @@ AMD_DM_REG_UPDATE = $(addprefix $(AMDDALPATH)/dc/,dc_helper.o)
 AMD_DISPLAY_FILES += $(AMD_DISPLAY_CORE)
 AMD_DISPLAY_FILES += $(AMD_DM_REG_UPDATE)
 
-
-
+DC_DMUB += dc_dmub_srv.o
+AMD_DISPLAY_DMUB = $(addprefix $(AMDDALPATH)/dc/,$(DC_DMUB))
+AMD_DISPLAY_FILES += $(AMD_DISPLAY_DMUB)
diff --git a/drivers/gpu/drm/amd/display/dc/basics/Makefile b/drivers/gpu/drm/amd/display/dc/basics/Makefile
index a50a76471107..7ad0cad0f4ef 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/basics/Makefile
@@ -25,7 +25,7 @@
 # subcomponents.
 
 BASICS = conversion.o fixpt31_32.o \
-	log_helpers.o vector.o
+	log_helpers.o vector.o dc_common.o
 
 AMD_DAL_BASICS = $(addprefix $(AMDDALPATH)/dc/basics/,$(BASICS))
 
diff --git a/drivers/gpu/drm/amd/display/dc/basics/dc_common.c b/drivers/gpu/drm/amd/display/dc/basics/dc_common.c
new file mode 100644
index 000000000000..b2fc4f8e6482
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/basics/dc_common.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "dc_common.h"
+#include "basics/conversion.h"
+
+bool is_rgb_cspace(enum dc_color_space output_color_space)
+{
+	switch (output_color_space) {
+	case COLOR_SPACE_SRGB:
+	case COLOR_SPACE_SRGB_LIMITED:
+	case COLOR_SPACE_2020_RGB_FULLRANGE:
+	case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
+	case COLOR_SPACE_ADOBERGB:
+		return true;
+	case COLOR_SPACE_YCBCR601:
+	case COLOR_SPACE_YCBCR709:
+	case COLOR_SPACE_YCBCR601_LIMITED:
+	case COLOR_SPACE_YCBCR709_LIMITED:
+	case COLOR_SPACE_2020_YCBCR:
+		return false;
+	default:
+		/* Add a case to switch */
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+}
+
+bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
+{
+	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
+		return true;
+	if (pipe_ctx->bottom_pipe && is_lower_pipe_tree_visible(pipe_ctx->bottom_pipe))
+		return true;
+	return false;
+}
+
+bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
+{
+	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
+		return true;
+	if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))
+		return true;
+	return false;
+}
+
+bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
+{
+	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
+		return true;
+	if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))
+		return true;
+	if (pipe_ctx->bottom_pipe && is_lower_pipe_tree_visible(pipe_ctx->bottom_pipe))
+		return true;
+	return false;
+}
+
+void build_prescale_params(struct  dc_bias_and_scale *bias_and_scale,
+		const struct dc_plane_state *plane_state)
+{
+	if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN
+			&& plane_state->format != SURFACE_PIXEL_FORMAT_INVALID
+			&& plane_state->input_csc_color_matrix.enable_adjustment
+			&& plane_state->coeff_reduction_factor.value != 0) {
+		bias_and_scale->scale_blue = fixed_point_to_int_frac(
+			dc_fixpt_mul(plane_state->coeff_reduction_factor,
+					dc_fixpt_from_fraction(256, 255)),
+				2,
+				13);
+		bias_and_scale->scale_red = bias_and_scale->scale_blue;
+		bias_and_scale->scale_green = bias_and_scale->scale_blue;
+	} else {
+		bias_and_scale->scale_blue = 0x2000;
+		bias_and_scale->scale_red = 0x2000;
+		bias_and_scale->scale_green = 0x2000;
+	}
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/basics/dc_common.h b/drivers/gpu/drm/amd/display/dc/basics/dc_common.h
new file mode 100644
index 000000000000..7c0cbf47e8ce
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/basics/dc_common.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DAL_DC_COMMON_H__
+#define __DAL_DC_COMMON_H__
+
+#include "core_types.h"
+
+bool is_rgb_cspace(enum dc_color_space output_color_space);
+
+bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx);
+
+bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx);
+
+bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx);
+
+void build_prescale_params(struct  dc_bias_and_scale *bias_and_scale,
+		const struct dc_plane_state *plane_state);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
index 221e0f56389f..008d4d11339d 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
@@ -111,7 +111,7 @@ struct dc_bios *bios_parser_create(
 	return NULL;
 }
 
-static void destruct(struct bios_parser *bp)
+static void bios_parser_destruct(struct bios_parser *bp)
 {
 	kfree(bp->base.bios_local_image);
 	kfree(bp->base.integrated_info);
@@ -126,7 +126,7 @@ static void bios_parser_destroy(struct dc_bios **dcb)
 		return;
 	}
 
-	destruct(bp);
+	bios_parser_destruct(bp);
 
 	kfree(bp);
 	*dcb = NULL;
@@ -2189,7 +2189,7 @@ static uint32_t get_support_mask_for_device_id(struct device_id device_id)
 		break;
 	default:
 		break;
-	};
+	}
 
 	/* Unidentified device ID, return empty support mask. */
 	return 0;
@@ -2543,7 +2543,6 @@ static enum bp_result construct_integrated_info(
 
 	/* Sort voltage table from low to high*/
 	if (result == BP_RESULT_OK) {
-		struct clock_voltage_caps temp = {0, 0};
 		uint32_t i;
 		uint32_t j;
 
@@ -2553,10 +2552,8 @@ static enum bp_result construct_integrated_info(
 						info->disp_clk_voltage[j].max_supported_clk <
 						info->disp_clk_voltage[j-1].max_supported_clk) {
 					/* swap j and j - 1*/
-					temp = info->disp_clk_voltage[j-1];
-					info->disp_clk_voltage[j-1] =
-							info->disp_clk_voltage[j];
-					info->disp_clk_voltage[j] = temp;
+					swap(info->disp_clk_voltage[j - 1],
+					     info->disp_clk_voltage[j]);
 				}
 			}
 		}
@@ -2742,7 +2739,6 @@ static enum bp_result bios_get_board_layout_info(
 	struct board_layout_info *board_layout_info)
 {
 	unsigned int i;
-	struct bios_parser *bp;
 	enum bp_result record_result;
 
 	const unsigned int slot_index_to_vbios_id[MAX_BOARD_SLOTS] = {
@@ -2751,7 +2747,6 @@ static enum bp_result bios_get_board_layout_info(
 		0, 0
 	};
 
-	bp = BP_FROM_DCB(dcb);
 	if (board_layout_info == NULL) {
 		DC_LOG_DETECTION_EDID_PARSER("Invalid board_layout_info\n");
 		return BP_RESULT_BADINPUT;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index dff65c0fe82f..2f1c9584ac32 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -111,7 +111,7 @@ static struct atom_encoder_caps_record *get_encoder_cap_record(
 
 #define DATA_TABLES(table) (bp->master_data_tbl->listOfdatatables.table)
 
-static void destruct(struct bios_parser *bp)
+static void bios_parser2_destruct(struct bios_parser *bp)
 {
 	kfree(bp->base.bios_local_image);
 	kfree(bp->base.integrated_info);
@@ -126,7 +126,7 @@ static void firmware_parser_destroy(struct dc_bios **dcb)
 		return;
 	}
 
-	destruct(bp);
+	bios_parser2_destruct(bp);
 
 	kfree(bp);
 	*dcb = NULL;
@@ -294,11 +294,21 @@ static enum bp_result bios_parser_get_i2c_info(struct dc_bios *dcb,
 	struct atom_display_object_path_v2 *object;
 	struct atom_common_record_header *header;
 	struct atom_i2c_record *record;
+	struct atom_i2c_record dummy_record = {0};
 	struct bios_parser *bp = BP_FROM_DCB(dcb);
 
 	if (!info)
 		return BP_RESULT_BADINPUT;
 
+	if (id.type == OBJECT_TYPE_GENERIC) {
+		dummy_record.i2c_id = id.id;
+
+		if (get_gpio_i2c_info(bp, &dummy_record, info) == BP_RESULT_OK)
+			return BP_RESULT_OK;
+		else
+			return BP_RESULT_NORECORD;
+	}
+
 	object = get_bios_object(bp, id);
 
 	if (!object)
@@ -341,6 +351,7 @@ static enum bp_result get_gpio_i2c_info(
 	struct atom_gpio_pin_lut_v2_1 *header;
 	uint32_t count = 0;
 	unsigned int table_index = 0;
+	bool find_valid = false;
 
 	if (!info)
 		return BP_RESULT_BADINPUT;
@@ -368,33 +379,28 @@ static enum bp_result get_gpio_i2c_info(
 			- sizeof(struct atom_common_table_header))
 				/ sizeof(struct atom_gpio_pin_assignment);
 
-	table_index = record->i2c_id  & I2C_HW_LANE_MUX;
-
-	if (count < table_index) {
-		bool find_valid = false;
-
-		for (table_index = 0; table_index < count; table_index++) {
-			if (((record->i2c_id & I2C_HW_CAP) == (
-			header->gpio_pin[table_index].gpio_id &
-							I2C_HW_CAP)) &&
-			((record->i2c_id & I2C_HW_ENGINE_ID_MASK)  ==
-			(header->gpio_pin[table_index].gpio_id &
-						I2C_HW_ENGINE_ID_MASK)) &&
-			((record->i2c_id & I2C_HW_LANE_MUX) ==
-			(header->gpio_pin[table_index].gpio_id &
-							I2C_HW_LANE_MUX))) {
-				/* still valid */
-				find_valid = true;
-				break;
-			}
+	for (table_index = 0; table_index < count; table_index++) {
+		if (((record->i2c_id & I2C_HW_CAP) == (
+		header->gpio_pin[table_index].gpio_id &
+						I2C_HW_CAP)) &&
+		((record->i2c_id & I2C_HW_ENGINE_ID_MASK)  ==
+		(header->gpio_pin[table_index].gpio_id &
+					I2C_HW_ENGINE_ID_MASK)) &&
+		((record->i2c_id & I2C_HW_LANE_MUX) ==
+		(header->gpio_pin[table_index].gpio_id &
+						I2C_HW_LANE_MUX))) {
+			/* still valid */
+			find_valid = true;
+			break;
 		}
-		/* If we don't find the entry that we are looking for then
-		 *  we will return BP_Result_BadBiosTable.
-		 */
-		if (find_valid == false)
-			return BP_RESULT_BADBIOSTABLE;
 	}
 
+	/* If we don't find the entry that we are looking for then
+	 *  we will return BP_Result_BadBiosTable.
+	 */
+	if (find_valid == false)
+		return BP_RESULT_BADBIOSTABLE;
+
 	/* get the GPIO_I2C_INFO */
 	info->i2c_hw_assist = (record->i2c_id & I2C_HW_CAP) ? true : false;
 	info->i2c_line = record->i2c_id & I2C_HW_LANE_MUX;
@@ -828,6 +834,7 @@ static enum bp_result bios_parser_get_spread_spectrum_info(
 		case 1:
 			return get_ss_info_v4_1(bp, signal, index, ss_info);
 		case 2:
+		case 3:
 			return get_ss_info_v4_2(bp, signal, index, ss_info);
 		default:
 			break;
@@ -986,7 +993,7 @@ static uint32_t get_support_mask_for_device_id(struct device_id device_id)
 		break;
 	default:
 		break;
-	};
+	}
 
 	/* Unidentified device ID, return empty support mask. */
 	return 0;
@@ -1205,6 +1212,8 @@ static enum bp_result get_firmware_info_v3_1(
 				bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10;
 	}
 
+	info->oem_i2c_present = false;
+
 	return BP_RESULT_OK;
 }
 
@@ -1283,6 +1292,13 @@ static enum bp_result get_firmware_info_v3_2(
 					bp->cmd_tbl.get_smu_clock_info(bp, SMU11_SYSPLL3_0_ID) * 10;
 	}
 
+	if (firmware_info->board_i2c_feature_id == 0x2) {
+		info->oem_i2c_present = true;
+		info->oem_i2c_obj_id = firmware_info->board_i2c_feature_gpio_id;
+	} else {
+		info->oem_i2c_present = false;
+	}
+
 	return BP_RESULT_OK;
 }
 
@@ -1402,10 +1418,8 @@ static enum bp_result get_integrated_info_v11(
 	info->ma_channel_number = info_v11->umachannelnumber;
 	info->lvds_ss_percentage =
 	le16_to_cpu(info_v11->lvds_ss_percentage);
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	info->dp_ss_control =
 	le16_to_cpu(info_v11->reserved1);
-#endif
 	info->lvds_sspread_rate_in_10hz =
 	le16_to_cpu(info_v11->lvds_ss_rate_10hz);
 	info->hdmi_ss_percentage =
@@ -1613,8 +1627,6 @@ static enum bp_result construct_integrated_info(
 
 	struct atom_common_table_header *header;
 	struct atom_data_revision revision;
-
-	struct clock_voltage_caps temp = {0, 0};
 	uint32_t i;
 	uint32_t j;
 
@@ -1627,6 +1639,7 @@ static enum bp_result construct_integrated_info(
 		/* Don't need to check major revision as they are all 1 */
 		switch (revision.minor) {
 		case 11:
+		case 12:
 			result = get_integrated_info_v11(bp, info);
 			break;
 		default:
@@ -1644,10 +1657,8 @@ static enum bp_result construct_integrated_info(
 				info->disp_clk_voltage[j-1].max_supported_clk
 				) {
 				/* swap j and j - 1*/
-				temp = info->disp_clk_voltage[j-1];
-				info->disp_clk_voltage[j-1] =
-					info->disp_clk_voltage[j];
-				info->disp_clk_voltage[j] = temp;
+				swap(info->disp_clk_voltage[j - 1],
+				     info->disp_clk_voltage[j]);
 			}
 		}
 	}
@@ -1829,7 +1840,6 @@ static enum bp_result bios_get_board_layout_info(
 	struct board_layout_info *board_layout_info)
 {
 	unsigned int i;
-	struct bios_parser *bp;
 	enum bp_result record_result;
 
 	const unsigned int slot_index_to_vbios_id[MAX_BOARD_SLOTS] = {
@@ -1838,7 +1848,6 @@ static enum bp_result bios_get_board_layout_info(
 		0, 0
 	};
 
-	bp = BP_FROM_DCB(dcb);
 	if (board_layout_info == NULL) {
 		DC_LOG_DETECTION_EDID_PARSER("Invalid board_layout_info\n");
 		return BP_RESULT_BADINPUT;
@@ -1918,7 +1927,7 @@ static const struct dc_vbios_funcs vbios_funcs = {
 	.get_board_layout_info = bios_get_board_layout_info,
 };
 
-static bool bios_parser_construct(
+static bool bios_parser2_construct(
 	struct bios_parser *bp,
 	struct bp_init_data *init,
 	enum dce_version dce_version)
@@ -2011,7 +2020,7 @@ struct dc_bios *firmware_parser_create(
 	if (!bp)
 		return NULL;
 
-	if (bios_parser_construct(bp, init, dce_version))
+	if (bios_parser2_construct(bp, init, dce_version))
 		return &bp->base;
 
 	kfree(bp);
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index bb2e8105e6ab..2cb7a4288cb7 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -37,6 +37,8 @@
 #include "bios_parser_types_internal2.h"
 #include "amdgpu.h"
 
+#include "dc_dmub_srv.h"
+#include "dc.h"
 
 #define DC_LOGGER \
 	bp->base.ctx->logger
@@ -103,6 +105,21 @@ static void init_dig_encoder_control(struct bios_parser *bp)
 	}
 }
 
+static void encoder_control_dmcub(
+		struct dc_dmub_srv *dmcub,
+		struct dig_encoder_stream_setup_parameters_v1_5 *dig)
+{
+	struct dmub_rb_cmd_digx_encoder_control encoder_control = { 0 };
+
+	encoder_control.header.type = DMUB_CMD__VBIOS;
+	encoder_control.header.sub_type = DMUB_CMD__VBIOS_DIGX_ENCODER_CONTROL;
+	encoder_control.encoder_control.dig.stream_param = *dig;
+
+	dc_dmub_srv_cmd_queue(dmcub, &encoder_control.header);
+	dc_dmub_srv_cmd_execute(dmcub);
+	dc_dmub_srv_wait_idle(dmcub);
+}
+
 static enum bp_result encoder_control_digx_v1_5(
 	struct bios_parser *bp,
 	struct bp_encoder_control *cntl)
@@ -155,6 +172,12 @@ static enum bp_result encoder_control_digx_v1_5(
 			break;
 		}
 
+	if (bp->base.ctx->dc->ctx->dmub_srv &&
+	    bp->base.ctx->dc->debug.dmub_command_table) {
+		encoder_control_dmcub(bp->base.ctx->dmub_srv, &params);
+		return BP_RESULT_OK;
+	}
+
 	if (EXEC_BIOS_CMD_TABLE(digxencodercontrol, params))
 		result = BP_RESULT_OK;
 
@@ -191,6 +214,22 @@ static void init_transmitter_control(struct bios_parser *bp)
 	}
 }
 
+static void transmitter_control_dmcub(
+		struct dc_dmub_srv *dmcub,
+		struct dig_transmitter_control_parameters_v1_6 *dig)
+{
+	struct dmub_rb_cmd_dig1_transmitter_control transmitter_control;
+
+	transmitter_control.header.type = DMUB_CMD__VBIOS;
+	transmitter_control.header.sub_type =
+		DMUB_CMD__VBIOS_DIG1_TRANSMITTER_CONTROL;
+	transmitter_control.transmitter_control.dig = *dig;
+
+	dc_dmub_srv_cmd_queue(dmcub, &transmitter_control.header);
+	dc_dmub_srv_cmd_execute(dmcub);
+	dc_dmub_srv_wait_idle(dmcub);
+}
+
 static enum bp_result transmitter_control_v1_6(
 	struct bios_parser *bp,
 	struct bp_transmitter_control *cntl)
@@ -222,6 +261,11 @@ static enum bp_result transmitter_control_v1_6(
 		__func__, ps.param.symclk_10khz);
 	}
 
+	if (bp->base.ctx->dc->ctx->dmub_srv &&
+	    bp->base.ctx->dc->debug.dmub_command_table) {
+		transmitter_control_dmcub(bp->base.ctx->dmub_srv, &ps.param);
+		return BP_RESULT_OK;
+	}
 
 /*color_depth not used any more, driver has deep color factor in the Phyclk*/
 	if (EXEC_BIOS_CMD_TABLE(dig1transmittercontrol, ps))
@@ -255,7 +299,20 @@ static void init_set_pixel_clock(struct bios_parser *bp)
 	}
 }
 
+static void set_pixel_clock_dmcub(
+		struct dc_dmub_srv *dmcub,
+		struct set_pixel_clock_parameter_v1_7 *clk)
+{
+	struct dmub_rb_cmd_set_pixel_clock pixel_clock = { 0 };
+
+	pixel_clock.header.type = DMUB_CMD__VBIOS;
+	pixel_clock.header.sub_type = DMUB_CMD__VBIOS_SET_PIXEL_CLOCK;
+	pixel_clock.pixel_clock.clk = *clk;
 
+	dc_dmub_srv_cmd_queue(dmcub, &pixel_clock.header);
+	dc_dmub_srv_cmd_execute(dmcub);
+	dc_dmub_srv_wait_idle(dmcub);
+}
 
 static enum bp_result set_pixel_clock_v7(
 	struct bios_parser *bp,
@@ -331,6 +388,12 @@ static enum bp_result set_pixel_clock_v7(
 		if (bp_params->signal_type == SIGNAL_TYPE_DVI_DUAL_LINK)
 			clk.miscinfo |= PIXEL_CLOCK_V7_MISC_DVI_DUALLINK_EN;
 
+		if (bp->base.ctx->dc->ctx->dmub_srv &&
+		    bp->base.ctx->dc->debug.dmub_command_table) {
+			set_pixel_clock_dmcub(bp->base.ctx->dmub_srv, &clk);
+			return BP_RESULT_OK;
+		}
+
 		if (EXEC_BIOS_CMD_TABLE(setpixelclock, clk))
 			result = BP_RESULT_OK;
 	}
@@ -585,6 +648,21 @@ static void init_enable_disp_power_gating(
 	}
 }
 
+static void enable_disp_power_gating_dmcub(
+	struct dc_dmub_srv *dmcub,
+	struct enable_disp_power_gating_parameters_v2_1 *pwr)
+{
+	struct dmub_rb_cmd_enable_disp_power_gating power_gating;
+
+	power_gating.header.type = DMUB_CMD__VBIOS;
+	power_gating.header.sub_type = DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING;
+	power_gating.power_gating.pwr = *pwr;
+
+	dc_dmub_srv_cmd_queue(dmcub, &power_gating.header);
+	dc_dmub_srv_cmd_execute(dmcub);
+	dc_dmub_srv_wait_idle(dmcub);
+}
+
 static enum bp_result enable_disp_power_gating_v2_1(
 	struct bios_parser *bp,
 	enum controller_id crtc_id,
@@ -604,6 +682,13 @@ static enum bp_result enable_disp_power_gating_v2_1(
 	ps.param.enable =
 		bp->cmd_helper->disp_power_gating_action_to_atom(action);
 
+	if (bp->base.ctx->dc->ctx->dmub_srv &&
+	    bp->base.ctx->dc->debug.dmub_command_table) {
+		enable_disp_power_gating_dmcub(bp->base.ctx->dmub_srv,
+					       &ps.param);
+		return BP_RESULT_OK;
+	}
+
 	if (EXEC_BIOS_CMD_TABLE(enabledisppowergating, ps.param))
 		result = BP_RESULT_OK;
 
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
index db153ddf0fee..7388c987c595 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
@@ -55,23 +55,19 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
 	case DCE_VERSION_11_22:
 		*h = dal_cmd_tbl_helper_dce112_get_table2();
 		return true;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	case DCN_VERSION_1_0:
 	case DCN_VERSION_1_01:
 		*h = dal_cmd_tbl_helper_dce112_get_table2();
 		return true;
 #endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	case DCN_VERSION_2_0:
 		*h = dal_cmd_tbl_helper_dce112_get_table2();
 		return true;
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	case DCN_VERSION_2_1:
 		*h = dal_cmd_tbl_helper_dce112_get_table2();
 		return true;
-#endif
 	case DCE_VERSION_12_0:
 	case DCE_VERSION_12_1:
 		*h = dal_cmd_tbl_helper_dce112_get_table2();
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
index 26c6d735cdc7..4674aca8f206 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
@@ -1,5 +1,6 @@
 #
 # Copyright 2017 Advanced Micro Devices, Inc.
+# Copyright 2019 Raptor Engineering, LLC
 #
 # Permission is hereby granted, free of charge, to any person obtaining a
 # copy of this software and associated documentation files (the "Software"),
@@ -24,7 +25,13 @@
 # It calculates Bandwidth and Watermarks values for HW programming
 #
 
+ifdef CONFIG_X86
 calcs_ccflags := -mhard-float -msse
+endif
+
+ifdef CONFIG_PPC64
+calcs_ccflags := -mhard-float -maltivec
+endif
 
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
@@ -32,6 +39,7 @@ IS_OLD_GCC = 1
 endif
 endif
 
+ifdef CONFIG_X86
 ifdef IS_OLD_GCC
 # Stack alignment mismatch, proceed with caution.
 # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
@@ -40,6 +48,7 @@ calcs_ccflags += -mpreferred-stack-boundary=4
 else
 calcs_ccflags += -msse2
 endif
+endif
 
 CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calcs.o := $(calcs_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calc_auto.o := $(calcs_ccflags)
@@ -47,7 +56,7 @@ CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calc_math.o := $(calcs_ccflags) -Wno-tautologi
 
 BW_CALCS = dce_calcs.o bw_fixed.o custom_float.o
 
-ifdef CONFIG_DRM_AMD_DC_DCN1_0
+ifdef CONFIG_DRM_AMD_DC_DCN
 BW_CALCS += dcn_calcs.o dcn_calc_math.o dcn_calc_auto.o
 endif
 
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
index a1d49256fab7..5d081c42e81b 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
@@ -154,14 +154,14 @@ static void calculate_bandwidth(
 
 
 
-	if (data->d0_underlay_mode == bw_def_none) { d0_underlay_enable = 0; }
-	else {
-		d0_underlay_enable = 1;
-	}
-	if (data->d1_underlay_mode == bw_def_none) { d1_underlay_enable = 0; }
-	else {
-		d1_underlay_enable = 1;
-	}
+	if (data->d0_underlay_mode == bw_def_none)
+		d0_underlay_enable = false;
+	else
+		d0_underlay_enable = true;
+	if (data->d1_underlay_mode == bw_def_none)
+		d1_underlay_enable = false;
+	else
+		d1_underlay_enable = true;
 	data->number_of_underlay_surfaces = d0_underlay_enable + d1_underlay_enable;
 	switch (data->underlay_surface_type) {
 	case bw_def_420:
@@ -286,8 +286,8 @@ static void calculate_bandwidth(
 	data->cursor_width_pixels[2] = bw_int_to_fixed(0);
 	data->cursor_width_pixels[3] = bw_int_to_fixed(0);
 	/* graphics surface parameters from spreadsheet*/
-	fbc_enabled = 0;
-	lpt_enabled = 0;
+	fbc_enabled = false;
+	lpt_enabled = false;
 	for (i = 4; i <= maximum_number_of_surfaces - 3; i++) {
 		if (i < data->number_of_displays + 4) {
 			if (i == 4 && data->d0_underlay_mode == bw_def_underlay_only) {
@@ -338,9 +338,9 @@ static void calculate_bandwidth(
 			data->access_one_channel_only[i] = 0;
 		}
 		if (data->fbc_en[i] == 1) {
-			fbc_enabled = 1;
+			fbc_enabled = true;
 			if (data->lpt_en[i] == 1) {
-				lpt_enabled = 1;
+				lpt_enabled = true;
 			}
 		}
 		data->cursor_width_pixels[i] = bw_int_to_fixed(vbios->cursor_width);
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index 9b2cb57bf2ba..a27d84ca15a5 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2017 Advanced Micro Devices, Inc.
+ * Copyright 2019 Raptor Engineering, LLC
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -53,13 +54,9 @@
  * remain as-is as it provides us with a guarantee from HW that it is correct.
  */
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 /* Defaults from spreadsheet rev#247.
  * RV2 delta: dram_clock_change_latency, max_num_dpp
  */
-#else
-/* Defaults from spreadsheet rev#247 */
-#endif
 const struct dcn_soc_bounding_box dcn10_soc_defaults = {
 		/* latencies */
 		.sr_exit_time = 17, /*us*/
@@ -626,7 +623,7 @@ static bool dcn_bw_apply_registry_override(struct dc *dc)
 {
 	bool updated = false;
 
-	kernel_fpu_begin();
+	DC_FP_START();
 	if ((int)(dc->dcn_soc->sr_exit_time * 1000) != dc->debug.sr_exit_time_ns
 			&& dc->debug.sr_exit_time_ns) {
 		updated = true;
@@ -662,7 +659,7 @@ static bool dcn_bw_apply_registry_override(struct dc *dc)
 		dc->dcn_soc->dram_clock_change_latency =
 				dc->debug.dram_clock_change_latency_ns / 1000.0;
 	}
-	kernel_fpu_end();
+	DC_FP_END();
 
 	return updated;
 }
@@ -708,8 +705,8 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v,
 
 unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev)
 {
-	/* for dali, the highest voltage level we want is 0 */
-	if (ASICREV_IS_DALI(hw_internal_rev))
+	/* for dali & pollock, the highest voltage level we want is 0 */
+	if (ASICREV_IS_POLLOCK(hw_internal_rev) || ASICREV_IS_DALI(hw_internal_rev))
 		return 0;
 
 	/* we are ok with all levels */
@@ -742,7 +739,7 @@ bool dcn_validate_bandwidth(
 		dcn_bw_sync_calcs_and_dml(dc);
 
 	memset(v, 0, sizeof(*v));
-	kernel_fpu_begin();
+	DC_FP_START();
 
 	v->sr_exit_time = dc->dcn_soc->sr_exit_time;
 	v->sr_enter_plus_exit_time = dc->dcn_soc->sr_enter_plus_exit_time;
@@ -1275,7 +1272,7 @@ bool dcn_validate_bandwidth(
 	bw_limit = dc->dcn_soc->percent_disp_bw_limit * v->fabric_and_dram_bandwidth_vmax0p9;
 	bw_limit_pass = (v->total_data_read_bandwidth / 1000.0) < bw_limit;
 
-	kernel_fpu_end();
+	DC_FP_END();
 
 	PERFORMANCE_TRACE_END();
 	BW_VAL_TRACE_FINISH();
@@ -1443,7 +1440,7 @@ void dcn_bw_update_from_pplib(struct dc *dc)
 	res = dm_pp_get_clock_levels_by_type_with_voltage(
 			ctx, DM_PP_CLOCK_TYPE_FCLK, &fclks);
 
-	kernel_fpu_begin();
+	DC_FP_START();
 
 	if (res)
 		res = verify_clock_values(&fclks);
@@ -1463,12 +1460,12 @@ void dcn_bw_update_from_pplib(struct dc *dc)
 	} else
 		BREAK_TO_DEBUGGER();
 
-	kernel_fpu_end();
+	DC_FP_END();
 
 	res = dm_pp_get_clock_levels_by_type_with_voltage(
 			ctx, DM_PP_CLOCK_TYPE_DCFCLK, &dcfclks);
 
-	kernel_fpu_begin();
+	DC_FP_START();
 
 	if (res)
 		res = verify_clock_values(&dcfclks);
@@ -1481,7 +1478,7 @@ void dcn_bw_update_from_pplib(struct dc *dc)
 	} else
 		BREAK_TO_DEBUGGER();
 
-	kernel_fpu_end();
+	DC_FP_END();
 }
 
 void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc)
@@ -1496,11 +1493,11 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc)
 	if (!pp || !pp->set_wm_ranges)
 		return;
 
-	kernel_fpu_begin();
+	DC_FP_START();
 	min_fclk_khz = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 * 1000000 / 32;
 	min_dcfclk_khz = dc->dcn_soc->dcfclkv_min0p65 * 1000;
 	socclk_khz = dc->dcn_soc->socclk * 1000;
-	kernel_fpu_end();
+	DC_FP_END();
 
 	/* Now notify PPLib/SMU about which Watermarks sets they should select
 	 * depending on DPM state they are in. And update BW MGR GFX Engine and
@@ -1551,7 +1548,7 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc)
 
 void dcn_bw_sync_calcs_and_dml(struct dc *dc)
 {
-	kernel_fpu_begin();
+	DC_FP_START();
 	DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n"
 			"sr_enter_plus_exit_time: %f ns\n"
 			"urgent_latency: %f ns\n"
@@ -1740,5 +1737,5 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc)
 	dc->dml.ip.bug_forcing_LC_req_same_size_fixed =
 		dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed == dcn_bw_yes;
 	dc->dml.ip.dcfclk_cstate_latency = dc->dcn_ip->dcfclk_cstate_latency;
-	kernel_fpu_end();
+	DC_FP_END();
 }
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
index b864869cc7e3..3cd283195091 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -63,7 +63,7 @@ CLK_MGR_DCE120 = dce120_clk_mgr.o
 AMD_DAL_CLK_MGR_DCE120 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dce120/,$(CLK_MGR_DCE120))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCE120)
-ifdef CONFIG_DRM_AMD_DC_DCN1_0
+ifdef CONFIG_DRM_AMD_DC_DCN
 ###############################################################################
 # DCN10
 ###############################################################################
@@ -72,9 +72,7 @@ CLK_MGR_DCN10 = rv1_clk_mgr.o rv1_clk_mgr_vbios_smu.o rv2_clk_mgr.o
 AMD_DAL_CLK_MGR_DCN10 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn10/,$(CLK_MGR_DCN10))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN10)
-endif
 
-ifdef CONFIG_DRM_AMD_DC_DCN2_0
 ###############################################################################
 # DCN20
 ###############################################################################
@@ -83,9 +81,7 @@ CLK_MGR_DCN20 = dcn20_clk_mgr.o
 AMD_DAL_CLK_MGR_DCN20 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn20/,$(CLK_MGR_DCN20))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN20)
-endif
 
-ifdef CONFIG_DRM_AMD_DC_DCN2_1
 ###############################################################################
 # DCN21
 ###############################################################################
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index c43797bea413..a78e5c74c79c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -37,9 +37,7 @@
 #include "dcn10/rv1_clk_mgr.h"
 #include "dcn10/rv2_clk_mgr.h"
 #include "dcn20/dcn20_clk_mgr.h"
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #include "dcn21/rn_clk_mgr.h"
-#endif
 
 
 int clk_mgr_helper_get_active_display_cnt(
@@ -65,6 +63,31 @@ int clk_mgr_helper_get_active_display_cnt(
 	return display_count;
 }
 
+void clk_mgr_exit_optimized_pwr_state(const struct dc *dc, struct clk_mgr *clk_mgr)
+{
+	struct dc_link *edp_link = get_edp_link(dc);
+
+	if (dc->hwss.exit_optimized_pwr_state)
+		dc->hwss.exit_optimized_pwr_state(dc, dc->current_state);
+
+	if (edp_link) {
+		clk_mgr->psr_allow_active_cache = edp_link->psr_allow_active;
+		dc_link_set_psr_allow_active(edp_link, false, false);
+	}
+
+}
+
+void clk_mgr_optimize_pwr_state(const struct dc *dc, struct clk_mgr *clk_mgr)
+{
+	struct dc_link *edp_link = get_edp_link(dc);
+
+	if (edp_link)
+		dc_link_set_psr_allow_active(edp_link, clk_mgr->psr_allow_active_cache, false);
+
+	if (dc->hwss.optimize_pwr_state)
+		dc->hwss.optimize_pwr_state(dc, dc->current_state);
+
+}
 
 struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *pp_smu, struct dccg *dccg)
 {
@@ -109,14 +132,19 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
 			dce120_clk_mgr_construct(ctx, clk_mgr);
 		break;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	case FAMILY_RV:
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
+		if (ASICREV_IS_DALI(asic_id.hw_internal_rev) ||
+				ASICREV_IS_POLLOCK(asic_id.hw_internal_rev)) {
+			/* TEMP: this check has to come before ASICREV_IS_RENOIR */
+			/* which also incorrectly returns true for Dali/Pollock*/
+			rv2_clk_mgr_construct(ctx, clk_mgr, pp_smu);
+			break;
+		}
 		if (ASICREV_IS_RENOIR(asic_id.hw_internal_rev)) {
 			rn_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
 			break;
 		}
-#endif	/* DCN2_1 */
 		if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev)) {
 			rv2_clk_mgr_construct(ctx, clk_mgr, pp_smu);
 			break;
@@ -127,13 +155,11 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
 			break;
 		}
 		break;
-#endif	/* Family RV */
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	case FAMILY_NV:
 		dcn20_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
 		break;
-#endif /* Family NV */
+#endif	/* Family RV and NV*/
 
 	default:
 		ASSERT(0); /* Unknown Asic */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
index c5c8c4901eed..26db1c5d4e4d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
@@ -147,7 +147,7 @@ int dce_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base)
 
 	/* Calculate the current DFS clock, in kHz.*/
 	dp_ref_clk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
-		* clk_mgr->dentist_vco_freq_khz) / target_div;
+		* clk_mgr->base.dentist_vco_freq_khz) / target_div;
 
 	return dce_adjust_dp_ref_freq_for_ss(clk_mgr, dp_ref_clk_khz);
 }
@@ -239,7 +239,7 @@ int dce_set_clock(
 	/* Make sure requested clock isn't lower than minimum threshold*/
 	if (requested_clk_khz > 0)
 		requested_clk_khz = max(requested_clk_khz,
-				clk_mgr_dce->dentist_vco_freq_khz / 64);
+				clk_mgr_dce->base.dentist_vco_freq_khz / 64);
 
 	/* Prepare to program display clock*/
 	pxl_clk_params.target_pixel_clock_100hz = requested_clk_khz * 10;
@@ -276,11 +276,11 @@ static void dce_clock_read_integrated_info(struct clk_mgr_internal *clk_mgr_dce)
 	int i;
 
 	if (bp->integrated_info)
-		clk_mgr_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq;
-	if (clk_mgr_dce->dentist_vco_freq_khz == 0) {
-		clk_mgr_dce->dentist_vco_freq_khz = bp->fw_info.smu_gpu_pll_output_freq;
-		if (clk_mgr_dce->dentist_vco_freq_khz == 0)
-			clk_mgr_dce->dentist_vco_freq_khz = 3600000;
+		clk_mgr_dce->base.dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq;
+	if (clk_mgr_dce->base.dentist_vco_freq_khz == 0) {
+		clk_mgr_dce->base.dentist_vco_freq_khz = bp->fw_info.smu_gpu_pll_output_freq;
+		if (clk_mgr_dce->base.dentist_vco_freq_khz == 0)
+			clk_mgr_dce->base.dentist_vco_freq_khz = 3600000;
 	}
 
 	/*update the maximum display clock for each power state*/
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c
index 7c746ef1e32e..d031bd3d3072 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c
@@ -72,8 +72,8 @@ int dce112_set_clock(struct clk_mgr *clk_mgr_base, int requested_clk_khz)
 	struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
 	struct bp_set_dce_clock_parameters dce_clk_params;
 	struct dc_bios *bp = clk_mgr_base->ctx->dc_bios;
-	struct dc *core_dc = clk_mgr_base->ctx->dc;
-	struct dmcu *dmcu = core_dc->res_pool->dmcu;
+	struct dc *dc = clk_mgr_base->ctx->dc;
+	struct dmcu *dmcu = dc->res_pool->dmcu;
 	int actual_clock = requested_clk_khz;
 	/* Prepare to program display clock*/
 	memset(&dce_clk_params, 0, sizeof(dce_clk_params));
@@ -81,7 +81,7 @@ int dce112_set_clock(struct clk_mgr *clk_mgr_base, int requested_clk_khz)
 	/* Make sure requested clock isn't lower than minimum threshold*/
 	if (requested_clk_khz > 0)
 		requested_clk_khz = max(requested_clk_khz,
-				clk_mgr_dce->dentist_vco_freq_khz / 62);
+				clk_mgr_dce->base.dentist_vco_freq_khz / 62);
 
 	dce_clk_params.target_clock_frequency = requested_clk_khz;
 	dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
@@ -110,7 +110,7 @@ int dce112_set_clock(struct clk_mgr *clk_mgr_base, int requested_clk_khz)
 
 	bp->funcs->set_dce_clock(bp, &dce_clk_params);
 
-	if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 		if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
 			if (clk_mgr_dce->dfs_bypass_disp_clk != actual_clock)
 				dmcu->funcs->set_psr_wait_loop(dmcu,
@@ -126,8 +126,8 @@ int dce112_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_clk_khz)
 {
 	struct bp_set_dce_clock_parameters dce_clk_params;
 	struct dc_bios *bp = clk_mgr->base.ctx->dc_bios;
-	struct dc *core_dc = clk_mgr->base.ctx->dc;
-	struct dmcu *dmcu = core_dc->res_pool->dmcu;
+	struct dc *dc = clk_mgr->base.ctx->dc;
+	struct dmcu *dmcu = dc->res_pool->dmcu;
 	int actual_clock = requested_clk_khz;
 	/* Prepare to program display clock*/
 	memset(&dce_clk_params, 0, sizeof(dce_clk_params));
@@ -135,7 +135,7 @@ int dce112_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_clk_khz)
 	/* Make sure requested clock isn't lower than minimum threshold*/
 	if (requested_clk_khz > 0)
 		requested_clk_khz = max(requested_clk_khz,
-				clk_mgr->dentist_vco_freq_khz / 62);
+				clk_mgr->base.dentist_vco_freq_khz / 62);
 
 	dce_clk_params.target_clock_frequency = requested_clk_khz;
 	dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
@@ -152,7 +152,7 @@ int dce112_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_clk_khz)
 		clk_mgr->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL;
 
 
-	if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 		if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
 			if (clk_mgr->dfs_bypass_disp_clk != actual_clock)
 				dmcu->funcs->set_psr_wait_loop(dmcu,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c
index 47f529ce280a..3fab9296918a 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c
@@ -139,6 +139,9 @@ static void rv1_update_clocks(struct clk_mgr *clk_mgr_base,
 
 	ASSERT(clk_mgr->pp_smu);
 
+	if (dc->work_arounds.skip_clock_update)
+		return;
+
 	pp_smu = &clk_mgr->pp_smu->rv_funcs;
 
 	display_count = clk_mgr_helper_get_active_display_cnt(dc, context);
@@ -266,11 +269,11 @@ void rv1_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_
 	clk_mgr->base.dprefclk_khz = 600000;
 
 	if (bp->integrated_info)
-		clk_mgr->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq;
-	if (bp->fw_info_valid && clk_mgr->dentist_vco_freq_khz == 0) {
-		clk_mgr->dentist_vco_freq_khz = bp->fw_info.smu_gpu_pll_output_freq;
-		if (clk_mgr->dentist_vco_freq_khz == 0)
-			clk_mgr->dentist_vco_freq_khz = 3600000;
+		clk_mgr->base.dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq;
+	if (bp->fw_info_valid && clk_mgr->base.dentist_vco_freq_khz == 0) {
+		clk_mgr->base.dentist_vco_freq_khz = bp->fw_info.smu_gpu_pll_output_freq;
+		if (clk_mgr->base.dentist_vco_freq_khz == 0)
+			clk_mgr->base.dentist_vco_freq_khz = 3600000;
 	}
 
 	if (!debug->disable_dfs_bypass && bp->integrated_info)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
index 1897e91c8ccb..97b7f32294fd 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
@@ -88,8 +88,8 @@ int rv1_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned
 int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz)
 {
 	int actual_dispclk_set_mhz = -1;
-	struct dc *core_dc = clk_mgr->base.ctx->dc;
-	struct dmcu *dmcu = core_dc->res_pool->dmcu;
+	struct dc *dc = clk_mgr->base.ctx->dc;
+	struct dmcu *dmcu = dc->res_pool->dmcu;
 
 	/*  Unit of SMU msg parameter is Mhz */
 	actual_dispclk_set_mhz = rv1_vbios_smu_send_msg_with_param(
@@ -100,7 +100,7 @@ int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_di
 	/* Actual dispclk set is returned in the parameter register */
 	actual_dispclk_set_mhz = REG_READ(MP1_SMN_C2PMSG_83) * 1000;
 
-	if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 		if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
 			if (clk_mgr->dfs_bypass_disp_clk != actual_dispclk_set_mhz)
 				dmcu->funcs->set_psr_wait_loop(dmcu,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index 3e8ac303bd52..495f01e9f2ca 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -27,6 +27,7 @@
 #include "clk_mgr_internal.h"
 
 #include "dce100/dce_clk_mgr.h"
+#include "dcn20_clk_mgr.h"
 #include "reg_helper.h"
 #include "core_types.h"
 #include "dm_helpers.h"
@@ -100,88 +101,47 @@ uint32_t dentist_get_did_from_divider(int divider)
 }
 
 void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
-		struct dc_state *context)
+		struct dc_state *context, bool safe_to_lower)
 {
 	int i;
 
+	clk_mgr->dccg->ref_dppclk = clk_mgr->base.clks.dppclk_khz;
 	for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
-		int dpp_inst, dppclk_khz;
+		int dpp_inst, dppclk_khz, prev_dppclk_khz;
 
-		if (!context->res_ctx.pipe_ctx[i].plane_state)
-			continue;
-
-		dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst;
+		/* Loop index will match dpp->inst if resource exists,
+		 * and we want to avoid dependency on dpp object
+		 */
+		dpp_inst = i;
 		dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
-		clk_mgr->dccg->funcs->update_dpp_dto(
-				clk_mgr->dccg, dpp_inst, dppclk_khz, false);
-	}
-}
-
-static void update_global_dpp_clk(struct clk_mgr_internal *clk_mgr, unsigned int khz)
-{
-	int dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
-			* clk_mgr->dentist_vco_freq_khz / khz;
 
-	uint32_t dppclk_wdivider = dentist_get_did_from_divider(dpp_divider);
+		prev_dppclk_khz = clk_mgr->base.ctx->dc->current_state->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
 
-	REG_UPDATE(DENTIST_DISPCLK_CNTL,
-			DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider);
-	REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100);
+		if (safe_to_lower || prev_dppclk_khz < dppclk_khz) {
+			clk_mgr->dccg->funcs->update_dpp_dto(
+							clk_mgr->dccg, dpp_inst, dppclk_khz);
+		}
+	}
 }
 
-static void update_display_clk(struct clk_mgr_internal *clk_mgr, unsigned int khz)
+void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr)
 {
+	int dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+			* clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dppclk_khz;
 	int disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
-			* clk_mgr->dentist_vco_freq_khz / khz;
+			* clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz;
 
+	uint32_t dppclk_wdivider = dentist_get_did_from_divider(dpp_divider);
 	uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider);
 
 	REG_UPDATE(DENTIST_DISPCLK_CNTL,
 			DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider);
+//	REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 5, 100);
+	REG_UPDATE(DENTIST_DISPCLK_CNTL,
+			DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider);
+	REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100);
 }
 
-static void request_voltage_and_program_disp_clk(struct clk_mgr *clk_mgr_base, unsigned int khz)
-{
-	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
-	struct dc *dc = clk_mgr_base->ctx->dc;
-	struct pp_smu_funcs_nv *pp_smu = NULL;
-	bool going_up = clk_mgr->base.clks.dispclk_khz < khz;
-
-	if (dc->res_pool->pp_smu)
-		pp_smu = &dc->res_pool->pp_smu->nv_funcs;
-
-	clk_mgr->base.clks.dispclk_khz = khz;
-
-	if (going_up && pp_smu && pp_smu->set_voltage_by_freq)
-		pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000);
-
-	update_display_clk(clk_mgr, khz);
-
-	if (!going_up && pp_smu && pp_smu->set_voltage_by_freq)
-		pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000);
-}
-
-static void request_voltage_and_program_global_dpp_clk(struct clk_mgr *clk_mgr_base, unsigned int khz)
-{
-	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
-	struct dc *dc = clk_mgr_base->ctx->dc;
-	struct pp_smu_funcs_nv *pp_smu = NULL;
-	bool going_up = clk_mgr->base.clks.dppclk_khz < khz;
-
-	if (dc->res_pool->pp_smu)
-		pp_smu = &dc->res_pool->pp_smu->nv_funcs;
-
-	clk_mgr->base.clks.dppclk_khz = khz;
-	clk_mgr->dccg->ref_dppclk = khz;
-
-	if (going_up && pp_smu && pp_smu->set_voltage_by_freq)
-		pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000);
-
-	update_global_dpp_clk(clk_mgr, khz);
-
-	if (!going_up && pp_smu && pp_smu->set_voltage_by_freq)
-		pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000);
-}
 
 void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
 			struct dc_state *context,
@@ -192,11 +152,12 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
 	struct dc *dc = clk_mgr_base->ctx->dc;
 	struct pp_smu_funcs_nv *pp_smu = NULL;
 	int display_count;
+	bool update_dppclk = false;
 	bool update_dispclk = false;
 	bool enter_display_off = false;
+	bool dpp_clock_lowered = false;
 	struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
 	bool force_reset = false;
-	int i;
 
 	if (dc->work_arounds.skip_clock_update)
 		return;
@@ -205,6 +166,9 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
 		dc->debug.force_clock_mode & 0x1) {
 		//this is from resume or boot up, if forced_clock cfg option used, we bypass program dispclk and DPPCLK, but need set them for S3.
 		force_reset = true;
+
+		dcn2_read_clocks_from_hw_dentist(clk_mgr_base);
+
 		//force_clock_mode 0x1:  force reset the clock even it is the same clock as long as it is in Passive level.
 	}
 	display_count = clk_mgr_helper_get_active_display_cnt(dc, context);
@@ -251,12 +215,10 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
 
 	if (should_update_pstate_support(safe_to_lower, new_clocks->p_state_change_support, clk_mgr_base->clks.p_state_change_support)) {
 		clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support;
-
 		clk_mgr_base->clks.p_state_change_support = new_clocks->p_state_change_support;
 		if (pp_smu && pp_smu->set_pstate_handshake_support)
 			pp_smu->set_pstate_handshake_support(&pp_smu->pp_smu, clk_mgr_base->clks.p_state_change_support);
 	}
-	clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support;
 
 	if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) {
 		clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz;
@@ -264,50 +226,40 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
 			pp_smu->set_hard_min_uclk_by_freq(&pp_smu->pp_smu, clk_mgr_base->clks.dramclk_khz / 1000);
 	}
 
-	if (dc->config.forced_clocks == false) {
-		// First update display clock
-		if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz))
-			request_voltage_and_program_disp_clk(clk_mgr_base, new_clocks->dispclk_khz);
+	if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
+		if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
+			dpp_clock_lowered = true;
+		clk_mgr->base.clks.dppclk_khz = new_clocks->dppclk_khz;
 
-		// Updating DPP clock requires some more logic
-		if (!safe_to_lower) {
-			// For pre-programming, we need to make sure any DPP clock that will go up has to go up
-
-			// First raise the global reference if needed
-			if (new_clocks->dppclk_khz > clk_mgr_base->clks.dppclk_khz)
-				request_voltage_and_program_global_dpp_clk(clk_mgr_base, new_clocks->dppclk_khz);
+		if (pp_smu && pp_smu->set_voltage_by_freq)
+			pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000);
 
-			// Then raise any dividers that need raising
-			for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
-				int dpp_inst, dppclk_khz;
+		update_dppclk = true;
+	}
 
-				if (!context->res_ctx.pipe_ctx[i].plane_state)
-					continue;
+	if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
+		clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
+		if (pp_smu && pp_smu->set_voltage_by_freq)
+			pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000);
 
-				dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst;
-				dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
+		update_dispclk = true;
+	}
 
-				clk_mgr->dccg->funcs->update_dpp_dto(clk_mgr->dccg, dpp_inst, dppclk_khz, true);
-			}
+	if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) {
+		if (dpp_clock_lowered) {
+			// if clock is being lowered, increase DTO before lowering refclk
+			dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+			dcn20_update_clocks_update_dentist(clk_mgr);
 		} else {
-			// For post-programming, we can lower ref clk if needed, and unconditionally set all the DTOs
-
-			if (new_clocks->dppclk_khz < clk_mgr_base->clks.dppclk_khz)
-				request_voltage_and_program_global_dpp_clk(clk_mgr_base, new_clocks->dppclk_khz);
-
-			for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
-				int dpp_inst, dppclk_khz;
-
-				if (!context->res_ctx.pipe_ctx[i].plane_state)
-					continue;
-
-				dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst;
-				dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
-
-				clk_mgr->dccg->funcs->update_dpp_dto(clk_mgr->dccg, dpp_inst, dppclk_khz, false);
-			}
+			// if clock is being raised, increase refclk before lowering DTO
+			if (update_dppclk || update_dispclk)
+				dcn20_update_clocks_update_dentist(clk_mgr);
+			// always update dtos unless clock is lowered and not safe to lower
+			if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz)
+				dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
 		}
 	}
+
 	if (update_dispclk &&
 			dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
 		/*update dmcu for wait_loop count*/
@@ -320,6 +272,8 @@ void dcn2_update_clocks_fpga(struct clk_mgr *clk_mgr,
 		struct dc_state *context,
 		bool safe_to_lower)
 {
+	struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+
 	struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
 	/* Min fclk = 1.2GHz since all the extra scemi logic seems to run off of it */
 	int fclk_adj = new_clocks->fclk_khz > 1200000 ? new_clocks->fclk_khz : 1200000;
@@ -357,14 +311,18 @@ void dcn2_update_clocks_fpga(struct clk_mgr *clk_mgr,
 		clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz;
 	}
 
-	/* Both fclk and dppclk ref are run on the same scemi clock so we
-	 * need to keep the same value for both
+	/* Both fclk and ref_dppclk run on the same scemi clock.
+	 * So take the higher value since the DPP DTO is typically programmed
+	 * such that max dppclk is 1:1 with ref_dppclk.
 	 */
 	if (clk_mgr->clks.fclk_khz > clk_mgr->clks.dppclk_khz)
 		clk_mgr->clks.dppclk_khz = clk_mgr->clks.fclk_khz;
 	if (clk_mgr->clks.dppclk_khz > clk_mgr->clks.fclk_khz)
 		clk_mgr->clks.fclk_khz = clk_mgr->clks.dppclk_khz;
 
+	// Both fclk and ref_dppclk run on the same scemi clock.
+	clk_mgr_int->dccg->ref_dppclk = clk_mgr->clks.fclk_khz;
+
 	dm_set_dcn_clocks(clk_mgr->ctx, &clk_mgr->clks);
 }
 
@@ -389,6 +347,32 @@ void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base)
 	}
 }
 
+
+void dcn2_read_clocks_from_hw_dentist(struct clk_mgr *clk_mgr_base)
+{
+	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	uint32_t dispclk_wdivider;
+	uint32_t dppclk_wdivider;
+	int disp_divider;
+	int dpp_divider;
+
+	REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, &dispclk_wdivider);
+	REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_WDIVIDER, &dppclk_wdivider);
+
+	disp_divider = dentist_get_divider_from_did(dispclk_wdivider);
+	dpp_divider = dentist_get_divider_from_did(dispclk_wdivider);
+
+	if (disp_divider && dpp_divider) {
+		/* Calculate the current DFS clock, in kHz.*/
+		clk_mgr_base->clks.dispclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+			* clk_mgr->base.dentist_vco_freq_khz) / disp_divider;
+
+		clk_mgr_base->clks.dppclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+				* clk_mgr->base.dentist_vco_freq_khz) / dpp_divider;
+	}
+
+}
+
 void dcn2_get_clock(struct clk_mgr *clk_mgr,
 		struct dc_state *context,
 			enum dc_clock_type clock_type,
@@ -409,12 +393,36 @@ void dcn2_get_clock(struct clk_mgr *clk_mgr,
 	}
 }
 
+static bool dcn2_are_clock_states_equal(struct dc_clocks *a,
+		struct dc_clocks *b)
+{
+	if (a->dispclk_khz != b->dispclk_khz)
+		return false;
+	else if (a->dppclk_khz != b->dppclk_khz)
+		return false;
+	else if (a->dcfclk_khz != b->dcfclk_khz)
+		return false;
+	else if (a->socclk_khz != b->socclk_khz)
+		return false;
+	else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
+		return false;
+	else if (a->phyclk_khz != b->phyclk_khz)
+		return false;
+	else if (a->dramclk_khz != b->dramclk_khz)
+		return false;
+	else if (a->p_state_change_support != b->p_state_change_support)
+		return false;
+
+	return true;
+}
+
 static struct clk_mgr_funcs dcn2_funcs = {
 	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
 	.update_clocks = dcn2_update_clocks,
 	.init_clocks = dcn2_init_clocks,
 	.enable_pme_wa = dcn2_enable_pme_wa,
 	.get_clock = dcn2_get_clock,
+	.are_clock_states_equal = dcn2_are_clock_states_equal,
 };
 
 
@@ -442,7 +450,7 @@ void dcn20_clk_mgr_construct(
 
 	if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) {
 		dcn2_funcs.update_clocks = dcn2_update_clocks_fpga;
-		clk_mgr->dentist_vco_freq_khz = 3850000;
+		clk_mgr->base.dentist_vco_freq_khz = 3850000;
 
 	} else {
 		/* DFS Slice 2 should be used for DPREFCLK */
@@ -466,15 +474,15 @@ void dcn20_clk_mgr_construct(
 		pll_req = dc_fixpt_mul_int(pll_req, 100000);
 
 		/* integer part is now VCO frequency in kHz */
-		clk_mgr->dentist_vco_freq_khz = dc_fixpt_floor(pll_req);
+		clk_mgr->base.dentist_vco_freq_khz = dc_fixpt_floor(pll_req);
 
 		/* in case we don't get a value from the register, use default */
-		if (clk_mgr->dentist_vco_freq_khz == 0)
-			clk_mgr->dentist_vco_freq_khz = 3850000;
+		if (clk_mgr->base.dentist_vco_freq_khz == 0)
+			clk_mgr->base.dentist_vco_freq_khz = 3850000;
 
 		/* Calculate the DPREFCLK in kHz.*/
 		clk_mgr->base.dprefclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
-			* clk_mgr->dentist_vco_freq_khz) / target_div;
+			* clk_mgr->base.dentist_vco_freq_khz) / target_div;
 	}
 	//Integrated_info table does not exist on dGPU projects so should not be referenced
 	//anywhere in code for dGPUs.
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h
index ac31a9787305..0b9c045b0c8e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h
@@ -34,7 +34,7 @@ void dcn2_update_clocks_fpga(struct clk_mgr *clk_mgr,
 			struct dc_state *context,
 			bool safe_to_lower);
 void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
-		struct dc_state *context);
+		struct dc_state *context, bool safe_to_lower);
 
 void dcn2_init_clocks(struct clk_mgr *clk_mgr);
 
@@ -50,4 +50,9 @@ void dcn2_get_clock(struct clk_mgr *clk_mgr,
 			enum dc_clock_type clock_type,
 			struct dc_clock_config *clock_cfg);
 
+void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr);
+
+void dcn2_read_clocks_from_hw_dentist(struct clk_mgr *clk_mgr_base);
+
+
 #endif //__DCN20_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index 787f94d815f4..7ae4c06232dd 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -52,6 +52,47 @@
 #define REG(reg_name) \
 	(CLK_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name)
 
+
+/* TODO: evaluate how to lower or disable all dcn clocks in screen off case */
+int rn_get_active_display_cnt_wa(
+		struct dc *dc,
+		struct dc_state *context)
+{
+	int i, display_count;
+	bool tmds_present = false;
+
+	display_count = 0;
+	for (i = 0; i < context->stream_count; i++) {
+		const struct dc_stream_state *stream = context->streams[i];
+
+		if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A ||
+				stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
+				stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+			tmds_present = true;
+	}
+
+	for (i = 0; i < dc->link_count; i++) {
+		const struct dc_link *link = dc->links[i];
+
+		/*
+		 * Only notify active stream or virtual stream.
+		 * Need to notify virtual stream to work around
+		 * headless case. HPD does not fire when system is in
+		 * S0i2.
+		 */
+		/* abusing the fact that the dig and phy are coupled to see if the phy is enabled */
+		if (link->connector_signal == SIGNAL_TYPE_VIRTUAL ||
+				link->link_enc->funcs->is_dig_enabled(link->link_enc))
+			display_count++;
+	}
+
+	/* WA for hang on HDMI after display off back back on*/
+	if (display_count == 0 && tmds_present)
+		display_count = 1;
+
+	return display_count;
+}
+
 void rn_update_clocks(struct clk_mgr *clk_mgr_base,
 			struct dc_state *context,
 			bool safe_to_lower)
@@ -62,17 +103,36 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
 	int display_count;
 	bool update_dppclk = false;
 	bool update_dispclk = false;
-	bool enter_display_off = false;
 	bool dpp_clock_lowered = false;
-	struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
 
-	display_count = clk_mgr_helper_get_active_display_cnt(dc, context);
+	struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
 
-	if (display_count == 0)
-		enter_display_off = true;
+	if (dc->work_arounds.skip_clock_update)
+		return;
 
-	if (enter_display_off == safe_to_lower) {
-		rn_vbios_smu_set_display_count(clk_mgr, display_count);
+	/*
+	 * if it is safe to lower, but we are already in the lower state, we don't have to do anything
+	 * also if safe to lower is false, we just go in the higher state
+	 */
+	if (safe_to_lower) {
+		/* check that we're not already in lower */
+		if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
+
+			display_count = rn_get_active_display_cnt_wa(dc, context);
+			/* if we can go lower, go lower */
+			if (display_count == 0) {
+				rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER);
+				/* update power state */
+				clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+			}
+		}
+	} else {
+		/* check that we're not already in D0 */
+		if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_MISSION_MODE) {
+			rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_MISSION_MODE);
+			/* update power state */
+			clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_MISSION_MODE;
+		}
 	}
 
 	if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) {
@@ -106,15 +166,16 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
 	}
 
 	if (dpp_clock_lowered) {
-		// if clock is being lowered, increase DTO before lowering refclk
-		dcn20_update_clocks_update_dpp_dto(clk_mgr, context);
+		// increase per DPP DTO before lowering global dppclk
+		dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
 		rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
 	} else {
-		// if clock is being raised, increase refclk before lowering DTO
+		// increase global DPPCLK before lowering per DPP DTO
 		if (update_dppclk || update_dispclk)
 			rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
-		if (update_dppclk)
-			dcn20_update_clocks_update_dpp_dto(clk_mgr, context);
+		// always update dtos unless clock is lowered and not safe to lower
+		if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz)
+			dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
 	}
 
 	if (update_dispclk &&
@@ -319,7 +380,7 @@ void rn_get_clk_states(struct clk_mgr *clk_mgr_base, struct clk_states *s)
 
 	rn_dump_clk_registers(&sb, clk_mgr_base, &log_info);
 
-	s->dprefclk_khz = sb.dprefclk;
+	s->dprefclk_khz = sb.dprefclk * 1000;
 }
 
 void rn_enable_pme_wa(struct clk_mgr *clk_mgr_base)
@@ -329,81 +390,14 @@ void rn_enable_pme_wa(struct clk_mgr *clk_mgr_base)
 	rn_vbios_smu_enable_pme_wa(clk_mgr);
 }
 
-static struct clk_mgr_funcs dcn21_funcs = {
-	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
-	.update_clocks = rn_update_clocks,
-	.init_clocks = dcn2_init_clocks,
-	.enable_pme_wa = rn_enable_pme_wa,
-	/* .dump_clk_registers = rn_dump_clk_registers */
-};
-
-struct clk_bw_params rn_bw_params = {
-	.vram_type = Ddr4MemType,
-	.num_channels = 1,
-	.clk_table = {
-		.entries = {
-			{
-				.voltage = 0,
-				.dcfclk_mhz = 400,
-				.fclk_mhz = 400,
-				.memclk_mhz = 800,
-				.socclk_mhz = 0,
-			},
-			{
-				.voltage = 0,
-				.dcfclk_mhz = 483,
-				.fclk_mhz = 800,
-				.memclk_mhz = 1600,
-				.socclk_mhz = 0,
-			},
-			{
-				.voltage = 0,
-				.dcfclk_mhz = 602,
-				.fclk_mhz = 1067,
-				.memclk_mhz = 1067,
-				.socclk_mhz = 0,
-			},
-			{
-				.voltage = 0,
-				.dcfclk_mhz = 738,
-				.fclk_mhz = 1333,
-				.memclk_mhz = 1600,
-				.socclk_mhz = 0,
-			},
-		},
-
-		.num_entries = 4,
-	},
-
-	.wm_table = {
-		.entries = {
-			{
-				.wm_inst = WM_A,
-				.wm_type = WM_TYPE_PSTATE_CHG,
-				.pstate_latency_us = 23.84,
-				.valid = true,
-			},
-			{
-				.wm_inst = WM_B,
-				.wm_type = WM_TYPE_PSTATE_CHG,
-				.pstate_latency_us = 23.84,
-				.valid = true,
-			},
-			{
-				.wm_inst = WM_C,
-				.wm_type = WM_TYPE_PSTATE_CHG,
-				.pstate_latency_us = 23.84,
-				.valid = true,
-			},
-			{
-				.wm_inst = WM_D,
-				.wm_type = WM_TYPE_PSTATE_CHG,
-				.pstate_latency_us = 23.84,
-				.valid = true,
-			},
-		},
-	}
-};
+void rn_init_clocks(struct clk_mgr *clk_mgr)
+{
+	memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+	// Assumption is that boot state always supports pstate
+	clk_mgr->clks.p_state_change_support = true;
+	clk_mgr->clks.prev_p_state_change_support = true;
+	clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
+}
 
 void build_watermark_ranges(struct clk_bw_params *bw_params, struct pp_smu_wm_range_sets *ranges)
 {
@@ -417,7 +411,7 @@ void build_watermark_ranges(struct clk_bw_params *bw_params, struct pp_smu_wm_ra
 			continue;
 
 		ranges->reader_wm_sets[num_valid_sets].wm_inst = bw_params->wm_table.entries[i].wm_inst;
-		ranges->reader_wm_sets[num_valid_sets].wm_type = bw_params->wm_table.entries[i].wm_type;;
+		ranges->reader_wm_sets[num_valid_sets].wm_type = bw_params->wm_table.entries[i].wm_type;
 		/* We will not select WM based on dcfclk, so leave it as unconstrained */
 		ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
 		ranges->reader_wm_sets[num_valid_sets].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
@@ -462,31 +456,215 @@ void build_watermark_ranges(struct clk_bw_params *bw_params, struct pp_smu_wm_ra
 
 }
 
-void clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct hw_asic_id *asic_id)
+static void rn_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
+{
+	struct dc_debug_options *debug = &clk_mgr_base->ctx->dc->debug;
+	struct pp_smu_wm_range_sets ranges = {0};
+	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	struct pp_smu_funcs *pp_smu = clk_mgr->pp_smu;
+
+	if (!debug->disable_pplib_wm_range) {
+		build_watermark_ranges(clk_mgr_base->bw_params, &ranges);
+
+		/* Notify PP Lib/SMU which Watermarks to use for which clock ranges */
+		if (pp_smu && pp_smu->rn_funcs.set_wm_ranges)
+			pp_smu->rn_funcs.set_wm_ranges(&pp_smu->rn_funcs.pp_smu, &ranges);
+	}
+
+}
+
+static bool rn_are_clock_states_equal(struct dc_clocks *a,
+		struct dc_clocks *b)
+{
+	if (a->dispclk_khz != b->dispclk_khz)
+		return false;
+	else if (a->dppclk_khz != b->dppclk_khz)
+		return false;
+	else if (a->dcfclk_khz != b->dcfclk_khz)
+		return false;
+	else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
+		return false;
+
+	return true;
+}
+
+
+static struct clk_mgr_funcs dcn21_funcs = {
+	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+	.update_clocks = rn_update_clocks,
+	.init_clocks = rn_init_clocks,
+	.enable_pme_wa = rn_enable_pme_wa,
+	.are_clock_states_equal = rn_are_clock_states_equal,
+	.notify_wm_ranges = rn_notify_wm_ranges
+};
+
+struct clk_bw_params rn_bw_params = {
+	.vram_type = Ddr4MemType,
+	.num_channels = 1,
+	.clk_table = {
+		.entries = {
+			{
+				.voltage = 0,
+				.dcfclk_mhz = 400,
+				.fclk_mhz = 400,
+				.memclk_mhz = 800,
+				.socclk_mhz = 0,
+			},
+			{
+				.voltage = 0,
+				.dcfclk_mhz = 483,
+				.fclk_mhz = 800,
+				.memclk_mhz = 1600,
+				.socclk_mhz = 0,
+			},
+			{
+				.voltage = 0,
+				.dcfclk_mhz = 602,
+				.fclk_mhz = 1067,
+				.memclk_mhz = 1067,
+				.socclk_mhz = 0,
+			},
+			{
+				.voltage = 0,
+				.dcfclk_mhz = 738,
+				.fclk_mhz = 1333,
+				.memclk_mhz = 1600,
+				.socclk_mhz = 0,
+			},
+		},
+
+		.num_entries = 4,
+	},
+
+};
+
+struct wm_table ddr4_wm_table = {
+	.entries = {
+		{
+			.wm_inst = WM_A,
+			.wm_type = WM_TYPE_PSTATE_CHG,
+			.pstate_latency_us = 11.72,
+			.sr_exit_time_us = 6.09,
+			.sr_enter_plus_exit_time_us = 7.14,
+			.valid = true,
+		},
+		{
+			.wm_inst = WM_B,
+			.wm_type = WM_TYPE_PSTATE_CHG,
+			.pstate_latency_us = 11.72,
+			.sr_exit_time_us = 10.12,
+			.sr_enter_plus_exit_time_us = 11.48,
+			.valid = true,
+		},
+		{
+			.wm_inst = WM_C,
+			.wm_type = WM_TYPE_PSTATE_CHG,
+			.pstate_latency_us = 11.72,
+			.sr_exit_time_us = 10.12,
+			.sr_enter_plus_exit_time_us = 11.48,
+			.valid = true,
+		},
+		{
+			.wm_inst = WM_D,
+			.wm_type = WM_TYPE_PSTATE_CHG,
+			.pstate_latency_us = 11.72,
+			.sr_exit_time_us = 10.12,
+			.sr_enter_plus_exit_time_us = 11.48,
+			.valid = true,
+		},
+	}
+};
+
+struct wm_table lpddr4_wm_table = {
+	.entries = {
+		{
+			.wm_inst = WM_A,
+			.wm_type = WM_TYPE_PSTATE_CHG,
+			.pstate_latency_us = 11.65333,
+			.sr_exit_time_us = 5.32,
+			.sr_enter_plus_exit_time_us = 6.38,
+			.valid = true,
+		},
+		{
+			.wm_inst = WM_B,
+			.wm_type = WM_TYPE_PSTATE_CHG,
+			.pstate_latency_us = 11.65333,
+			.sr_exit_time_us = 9.82,
+			.sr_enter_plus_exit_time_us = 11.196,
+			.valid = true,
+		},
+		{
+			.wm_inst = WM_C,
+			.wm_type = WM_TYPE_PSTATE_CHG,
+			.pstate_latency_us = 11.65333,
+			.sr_exit_time_us = 9.89,
+			.sr_enter_plus_exit_time_us = 11.24,
+			.valid = true,
+		},
+		{
+			.wm_inst = WM_D,
+			.wm_type = WM_TYPE_PSTATE_CHG,
+			.pstate_latency_us = 11.65333,
+			.sr_exit_time_us = 9.748,
+			.sr_enter_plus_exit_time_us = 11.102,
+			.valid = true,
+		},
+	}
+};
+
+
+static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
 {
 	int i;
 
+	for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; i++) {
+		if (clock_table->DcfClocks[i].Vol == voltage)
+			return clock_table->DcfClocks[i].Freq;
+	}
+
+	ASSERT(0);
+	return 0;
+}
+
+static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct integrated_info *bios_info)
+{
+	int i, j = 0;
+
+	j = -1;
+
 	ASSERT(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL);
 
-	for (i = 0; i < PP_SMU_NUM_FCLK_DPM_LEVELS; i++) {
-		if (clock_table->FClocks[i].Freq == 0)
+	/* Find lowest DPM, FCLK is filled in reverse order*/
+
+	for (i = PP_SMU_NUM_FCLK_DPM_LEVELS - 1; i >= 0; i--) {
+		if (clock_table->FClocks[i].Freq != 0) {
+			j = i;
 			break;
+		}
+	}
+
+	if (j == -1) {
+		/* clock table is all 0s, just use our own hardcode */
+		ASSERT(0);
+		return;
+	}
+
+	bw_params->clk_table.num_entries = j + 1;
 
-		bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i].Freq;
-		bw_params->clk_table.entries[i].fclk_mhz = clock_table->FClocks[i].Freq;
-		bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemClocks[i].Freq;
-		bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i].Freq;
-		bw_params->clk_table.entries[i].voltage = clock_table->FClocks[i].Vol;
+	for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
+		bw_params->clk_table.entries[i].fclk_mhz = clock_table->FClocks[j].Freq;
+		bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemClocks[j].Freq;
+		bw_params->clk_table.entries[i].voltage = clock_table->FClocks[j].Vol;
+		bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->FClocks[j].Vol);
 	}
-	bw_params->clk_table.num_entries = i;
 
-	bw_params->vram_type = asic_id->vram_type;
-	bw_params->num_channels = asic_id->vram_width / DDR4_DRAM_WIDTH;
+	bw_params->vram_type = bios_info->memory_type;
+	bw_params->num_channels = bios_info->ma_channel_number;
 
 	for (i = 0; i < WM_SET_COUNT; i++) {
 		bw_params->wm_table.entries[i].wm_inst = i;
 
-		if (clock_table->FClocks[i].Freq == 0) {
+		if (i >= bw_params->clk_table.num_entries) {
 			bw_params->wm_table.entries[i].valid = false;
 			continue;
 		}
@@ -515,7 +693,6 @@ void rn_clk_mgr_construct(
 {
 	struct dc_debug_options *debug = &ctx->dc->debug;
 	struct dpm_clocks clock_table = { 0 };
-	struct clk_state_registers_and_bypass s = { 0 };
 
 	clk_mgr->base.ctx = ctx;
 	clk_mgr->base.funcs = &dcn21_funcs;
@@ -534,57 +711,42 @@ void rn_clk_mgr_construct(
 
 	if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) {
 		dcn21_funcs.update_clocks = dcn2_update_clocks_fpga;
-		clk_mgr->dentist_vco_freq_khz = 3600000;
-		clk_mgr->base.dprefclk_khz = 600000;
+		clk_mgr->base.dentist_vco_freq_khz = 3600000;
 	} else {
 		struct clk_log_info log_info = {0};
 
 		/* TODO: Check we get what we expect during bringup */
-		clk_mgr->dentist_vco_freq_khz = get_vco_frequency_from_reg(clk_mgr);
+		clk_mgr->base.dentist_vco_freq_khz = get_vco_frequency_from_reg(clk_mgr);
 
 		/* in case we don't get a value from the register, use default */
-		if (clk_mgr->dentist_vco_freq_khz == 0)
-			clk_mgr->dentist_vco_freq_khz = 3600000;
-
-		rn_dump_clk_registers(&s, &clk_mgr->base, &log_info);
-		clk_mgr->base.dprefclk_khz = s.dprefclk;
+		if (clk_mgr->base.dentist_vco_freq_khz == 0)
+			clk_mgr->base.dentist_vco_freq_khz = 3600000;
 
-		if (clk_mgr->base.dprefclk_khz != 600000) {
-			clk_mgr->base.dprefclk_khz = 600000;
-			ASSERT(1); //TODO: Renoir follow up.
+		if (ctx->dc_bios->integrated_info->memory_type == LpDdr4MemType) {
+			rn_bw_params.wm_table = lpddr4_wm_table;
+		} else {
+			rn_bw_params.wm_table = ddr4_wm_table;
 		}
-
-		/* in case we don't get a value from the register, use default */
-		if (clk_mgr->base.dprefclk_khz == 0)
-			clk_mgr->base.dprefclk_khz = 600000;
+		/* Saved clocks configured at boot for debug purposes */
+		rn_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info);
 	}
 
+	clk_mgr->base.dprefclk_khz = 600000;
 	dce_clock_read_ss_info(clk_mgr);
 
+
 	clk_mgr->base.bw_params = &rn_bw_params;
 
-	if (pp_smu) {
+	if (pp_smu && pp_smu->rn_funcs.get_dpm_clock_table) {
 		pp_smu->rn_funcs.get_dpm_clock_table(&pp_smu->rn_funcs.pp_smu, &clock_table);
-		clk_mgr_helper_populate_bw_params(clk_mgr->base.bw_params, &clock_table, &ctx->asic_id);
+		if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+			rn_clk_mgr_helper_populate_bw_params (clk_mgr->base.bw_params, &clock_table, ctx->dc_bios->integrated_info);
+		}
 	}
 
-	/*
-	 * Notify SMU which set of WM should be selected for different ranges of fclk
-	 * On Renoir there is a maximumum of 4 DF pstates supported, could be less
-	 * depending on DDR speed and fused maximum fclk.
-	 */
-	if (!debug->disable_pplib_wm_range) {
-		struct pp_smu_wm_range_sets ranges = {0};
-
-		build_watermark_ranges(clk_mgr->base.bw_params, &ranges);
-
-		/* Notify PP Lib/SMU which Watermarks to use for which clock ranges */
-		if (pp_smu && pp_smu->rn_funcs.set_wm_ranges)
-			pp_smu->rn_funcs.set_wm_ranges(&pp_smu->rn_funcs.pp_smu, &ranges);
+	if (!IS_FPGA_MAXIMUS_DC(ctx->dce_environment) && clk_mgr->smu_ver >= 0x00371500) {
+		/* enable powerfeatures when displaycount goes to 0 */
+		rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(clk_mgr, !debug->disable_48mhz_pwrdwn);
 	}
-
-	/* enable powerfeatures when displaycount goes to 0 */
-	if (!debug->disable_48mhz_pwrdwn)
-		rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(clk_mgr);
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
index aadec06fde10..e4322fa5475b 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
@@ -26,11 +26,13 @@
 #ifndef __RN_CLK_MGR_H__
 #define __RN_CLK_MGR_H__
 
+#include "clk_mgr.h"
+#include "dm_pp_smu.h"
+
 struct rn_clk_registers {
 	uint32_t CLK1_CLK0_CURRENT_CNT; /* DPREFCLK */
 };
 
-
 void rn_clk_mgr_construct(struct dc_context *ctx,
 		struct clk_mgr_internal *clk_mgr,
 		struct pp_smu_funcs *pp_smu,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
index 50984c1811bb..6878aedf1d3e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
@@ -33,7 +33,7 @@
 #include "mp/mp_12_0_0_sh_mask.h"
 
 #define REG(reg_name) \
-	(MP1_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name)
+	(MP0_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name)
 
 #define FN(reg_name, field) \
 	FD(reg_name##__##field)
@@ -82,20 +82,16 @@ int rn_vbios_smu_get_smu_version(struct clk_mgr_internal *clk_mgr)
 int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz)
 {
 	int actual_dispclk_set_mhz = -1;
-	struct dc *core_dc = clk_mgr->base.ctx->dc;
-	struct dmcu *dmcu = core_dc->res_pool->dmcu;
-	uint32_t clk = requested_dispclk_khz / 1000;
-
-	if (clk <= 100)
-		clk = 101;
+	struct dc *dc = clk_mgr->base.ctx->dc;
+	struct dmcu *dmcu = dc->res_pool->dmcu;
 
 	/*  Unit of SMU msg parameter is Mhz */
 	actual_dispclk_set_mhz = rn_vbios_smu_send_msg_with_param(
 			clk_mgr,
 			VBIOSSMC_MSG_SetDispclkFreq,
-			clk);
+			requested_dispclk_khz / 1000);
 
-	if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 		if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
 			if (clk_mgr->dfs_bypass_disp_clk != actual_dispclk_set_mhz)
 				dmcu->funcs->set_psr_wait_loop(dmcu,
@@ -124,7 +120,7 @@ int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int reque
 {
 	int actual_dcfclk_set_mhz = -1;
 
-	if (clk_mgr->smu_ver < 0xFFFFFFFF)
+	if (clk_mgr->smu_ver < 0x370c00)
 		return actual_dcfclk_set_mhz;
 
 	actual_dcfclk_set_mhz = rn_vbios_smu_send_msg_with_param(
@@ -139,7 +135,7 @@ int rn_vbios_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int
 {
 	int actual_min_ds_dcfclk_mhz = -1;
 
-	if (clk_mgr->smu_ver < 0xFFFFFFFF)
+	if (clk_mgr->smu_ver < 0x370c00)
 		return actual_min_ds_dcfclk_mhz;
 
 	actual_min_ds_dcfclk_mhz = rn_vbios_smu_send_msg_with_param(
@@ -162,33 +158,35 @@ int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_
 {
 	int actual_dppclk_set_mhz = -1;
 
-	uint32_t clk = requested_dpp_khz / 1000;
-
-	if (clk <= 100)
-		clk = 101;
-
 	actual_dppclk_set_mhz = rn_vbios_smu_send_msg_with_param(
 			clk_mgr,
 			VBIOSSMC_MSG_SetDppclkFreq,
-			clk);
+			requested_dpp_khz / 1000);
 
 	return actual_dppclk_set_mhz * 1000;
 }
 
-void rn_vbios_smu_set_display_count(struct clk_mgr_internal *clk_mgr, int display_count)
+void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, enum dcn_pwr_state state)
 {
+	int disp_count;
+
+	if (state == DCN_PWR_STATE_LOW_POWER)
+		disp_count = 0;
+	else
+		disp_count = 1;
+
 	rn_vbios_smu_send_msg_with_param(
-			clk_mgr,
-			VBIOSSMC_MSG_SetDisplayCount,
-			display_count);
+		clk_mgr,
+		VBIOSSMC_MSG_SetDisplayCount,
+		disp_count);
 }
 
-void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr)
+void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
 {
 	rn_vbios_smu_send_msg_with_param(
 			clk_mgr,
 			VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown,
-			0);
+			enable);
 }
 
 void rn_vbios_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
index da3a49487c6d..ccc01879c9d4 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
@@ -33,8 +33,8 @@ int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int reque
 int rn_vbios_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz);
 void rn_vbios_smu_set_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz);
 int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz);
-void rn_vbios_smu_set_display_count(struct clk_mgr_internal *clk_mgr, int display_count);
-void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr);
+void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, int display_count);
+void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
 void rn_vbios_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
 
 #endif /* DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 4b8819c27fcd..6c797fac189d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -58,21 +58,21 @@
 #include "hubp.h"
 
 #include "dc_link_dp.h"
+#include "dc_dmub_srv.h"
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #include "dsc.h"
-#endif
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 #include "vm_helper.h"
-#endif
 
 #include "dce/dce_i2c.h"
 
+#define CTX \
+	dc->ctx
+
 #define DC_LOGGER \
 	dc->ctx->logger
 
-const static char DC_BUILD_ID[] = "production-build";
+static const char DC_BUILD_ID[] = "production-build";
 
 /**
  * DOC: Overview
@@ -194,7 +194,7 @@ static bool create_links(
 				}
 			}
 
-			if (!should_destory_link) {
+			if (dc->config.force_enum_edp || !should_destory_link) {
 				dc->links[dc->link_count] = link;
 				link->dc = dc;
 				++dc->link_count;
@@ -287,7 +287,6 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
 		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
 		if (pipe->stream == stream && pipe->stream_res.tg) {
-			pipe->stream->adjust = *adjust;
 			dc->hwss.set_drr(&pipe,
 					1,
 					adjust->v_total_min,
@@ -411,6 +410,27 @@ bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream,
 	return false;
 }
 
+void dc_stream_set_dyn_expansion(struct dc *dc, struct dc_stream_state *stream,
+		enum dc_dynamic_expansion option)
+{
+	/* OPP FMT dyn expansion updates*/
+	int i = 0;
+	struct pipe_ctx *pipe_ctx;
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		if (dc->current_state->res_ctx.pipe_ctx[i].stream
+				== stream) {
+			pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+			pipe_ctx->stream_res.opp->dyn_expansion = option;
+			pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion(
+					pipe_ctx->stream_res.opp,
+					COLOR_SPACE_YCBCR601,
+					stream->timing.display_color_depth,
+					stream->signal);
+		}
+	}
+}
+
 void dc_stream_set_dither_option(struct dc_stream_state *stream,
 		enum dc_dither_option option)
 {
@@ -490,10 +510,10 @@ bool dc_stream_program_csc_matrix(struct dc *dc, struct dc_stream_state *stream)
 	return ret;
 }
 
-void dc_stream_set_static_screen_events(struct dc *dc,
+void dc_stream_set_static_screen_params(struct dc *dc,
 		struct dc_stream_state **streams,
 		int num_streams,
-		const struct dc_static_screen_events *events)
+		const struct dc_static_screen_params *params)
 {
 	int i = 0;
 	int j = 0;
@@ -512,10 +532,10 @@ void dc_stream_set_static_screen_events(struct dc *dc,
 		}
 	}
 
-	dc->hwss.set_static_screen_control(pipes_affected, num_pipes_affected, events);
+	dc->hwss.set_static_screen_control(pipes_affected, num_pipes_affected, params);
 }
 
-static void destruct(struct dc *dc)
+static void dc_destruct(struct dc *dc)
 {
 	if (dc->current_state) {
 		dc_release_state(dc->current_state);
@@ -548,7 +568,7 @@ static void destruct(struct dc *dc)
 	kfree(dc->bw_dceip);
 	dc->bw_dceip = NULL;
 
-#ifdef CONFIG_DRM_AMD_DC_DCN1_0
+#ifdef CONFIG_DRM_AMD_DC_DCN
 	kfree(dc->dcn_soc);
 	dc->dcn_soc = NULL;
 
@@ -556,28 +576,58 @@ static void destruct(struct dc *dc)
 	dc->dcn_ip = NULL;
 
 #endif
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	kfree(dc->vm_helper);
 	dc->vm_helper = NULL;
 
-#endif
 }
 
-static bool construct(struct dc *dc,
+static bool dc_construct_ctx(struct dc *dc,
+		const struct dc_init_data *init_params)
+{
+	struct dc_context *dc_ctx;
+	enum dce_version dc_version = DCE_VERSION_UNKNOWN;
+
+	dc_ctx = kzalloc(sizeof(*dc_ctx), GFP_KERNEL);
+	if (!dc_ctx)
+		return false;
+
+	dc_ctx->cgs_device = init_params->cgs_device;
+	dc_ctx->driver_context = init_params->driver;
+	dc_ctx->dc = dc;
+	dc_ctx->asic_id = init_params->asic_id;
+	dc_ctx->dc_sink_id_count = 0;
+	dc_ctx->dc_stream_id_count = 0;
+	dc_ctx->dce_environment = init_params->dce_environment;
+
+	/* Create logger */
+
+	dc_version = resource_parse_asic_id(init_params->asic_id);
+	dc_ctx->dce_version = dc_version;
+
+	dc_ctx->perf_trace = dc_perf_trace_create();
+	if (!dc_ctx->perf_trace) {
+		ASSERT_CRITICAL(false);
+		return false;
+	}
+
+	dc->ctx = dc_ctx;
+
+	return true;
+}
+
+static bool dc_construct(struct dc *dc,
 		const struct dc_init_data *init_params)
 {
 	struct dc_context *dc_ctx;
 	struct bw_calcs_dceip *dc_dceip;
 	struct bw_calcs_vbios *dc_vbios;
-#ifdef CONFIG_DRM_AMD_DC_DCN1_0
+#ifdef CONFIG_DRM_AMD_DC_DCN
 	struct dcn_soc_bounding_box *dcn_soc;
 	struct dcn_ip_params *dcn_ip;
 #endif
 
-	enum dce_version dc_version = DCE_VERSION_UNKNOWN;
 	dc->config = init_params->flags;
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	// Allocate memory for the vm_helper
 	dc->vm_helper = kzalloc(sizeof(struct vm_helper), GFP_KERNEL);
 	if (!dc->vm_helper) {
@@ -585,7 +635,6 @@ static bool construct(struct dc *dc,
 		goto fail;
 	}
 
-#endif
 	memcpy(&dc->bb_overrides, &init_params->bb_overrides, sizeof(dc->bb_overrides));
 
 	dc_dceip = kzalloc(sizeof(*dc_dceip), GFP_KERNEL);
@@ -603,7 +652,7 @@ static bool construct(struct dc *dc,
 	}
 
 	dc->bw_vbios = dc_vbios;
-#ifdef CONFIG_DRM_AMD_DC_DCN1_0
+#ifdef CONFIG_DRM_AMD_DC_DCN
 	dcn_soc = kzalloc(sizeof(*dcn_soc), GFP_KERNEL);
 	if (!dcn_soc) {
 		dm_error("%s: failed to create dcn_soc\n", __func__);
@@ -619,31 +668,15 @@ static bool construct(struct dc *dc,
 	}
 
 	dc->dcn_ip = dcn_ip;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	dc->soc_bounding_box = init_params->soc_bounding_box;
 #endif
-#endif
 
-	dc_ctx = kzalloc(sizeof(*dc_ctx), GFP_KERNEL);
-	if (!dc_ctx) {
+	if (!dc_construct_ctx(dc, init_params)) {
 		dm_error("%s: failed to create ctx\n", __func__);
 		goto fail;
 	}
 
-	dc_ctx->cgs_device = init_params->cgs_device;
-	dc_ctx->driver_context = init_params->driver;
-	dc_ctx->dc = dc;
-	dc_ctx->asic_id = init_params->asic_id;
-	dc_ctx->dc_sink_id_count = 0;
-	dc_ctx->dc_stream_id_count = 0;
-	dc->ctx = dc_ctx;
-
-	/* Create logger */
-
-	dc_ctx->dce_environment = init_params->dce_environment;
-
-	dc_version = resource_parse_asic_id(init_params->asic_id);
-	dc_ctx->dce_version = dc_version;
+        dc_ctx = dc->ctx;
 
 	/* Resource should construct all asic specific resources.
 	 * This should be the only place where we need to parse the asic id
@@ -658,7 +691,7 @@ static bool construct(struct dc *dc,
 		bp_init_data.bios = init_params->asic_id.atombios_base_address;
 
 		dc_ctx->dc_bios = dal_bios_parser_create(
-				&bp_init_data, dc_version);
+				&bp_init_data, dc_ctx->dce_version);
 
 		if (!dc_ctx->dc_bios) {
 			ASSERT_CRITICAL(false);
@@ -666,17 +699,13 @@ static bool construct(struct dc *dc,
 		}
 
 		dc_ctx->created_bios = true;
-		}
-
-	dc_ctx->perf_trace = dc_perf_trace_create();
-	if (!dc_ctx->perf_trace) {
-		ASSERT_CRITICAL(false);
-		goto fail;
 	}
 
+
+
 	/* Create GPIO service */
 	dc_ctx->gpio_service = dal_gpio_service_create(
-			dc_version,
+			dc_ctx->dce_version,
 			dc_ctx->dce_environment,
 			dc_ctx);
 
@@ -685,7 +714,7 @@ static bool construct(struct dc *dc,
 		goto fail;
 	}
 
-	dc->res_pool = dc_create_resource_pool(dc, init_params, dc_version);
+	dc->res_pool = dc_create_resource_pool(dc, init_params, dc_ctx->dce_version);
 	if (!dc->res_pool)
 		goto fail;
 
@@ -693,10 +722,8 @@ static bool construct(struct dc *dc,
 	if (!dc->clk_mgr)
 		goto fail;
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_1
 	if (dc->res_pool->funcs->update_bw_bounding_box)
 		dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params);
-#endif
 
 	/* Creation of current_state must occur after dc->dml
 	 * is initialized in dc_create_resource_pool because
@@ -718,12 +745,9 @@ static bool construct(struct dc *dc,
 	return true;
 
 fail:
-
-	destruct(dc);
 	return false;
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 static bool disable_all_writeback_pipes_for_stream(
 		const struct dc *dc,
 		struct dc_stream_state *stream,
@@ -736,7 +760,6 @@ static bool disable_all_writeback_pipes_for_stream(
 
 	return true;
 }
-#endif
 
 static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 {
@@ -762,11 +785,12 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 		}
 		if (should_disable && old_stream) {
 			dc_rem_all_planes_for_stream(dc, old_stream, dangling_context);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 			disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context);
-#endif
-			dc->hwss.apply_ctx_for_surface(dc, old_stream, 0, dangling_context);
+			if (dc->hwss.apply_ctx_for_surface)
+				dc->hwss.apply_ctx_for_surface(dc, old_stream, 0, dangling_context);
 		}
+		if (dc->hwss.program_front_end_for_ctx)
+			dc->hwss.program_front_end_for_ctx(dc, dangling_context);
 	}
 
 	current_ctx = dc->current_state;
@@ -774,6 +798,33 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 	dc_release_state(current_ctx);
 }
 
+static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
+{
+	int i;
+	int count = 0;
+	struct pipe_ctx *pipe;
+	PERF_TRACE();
+	for (i = 0; i < MAX_PIPES; i++) {
+		pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (!pipe->plane_state)
+			continue;
+
+		/* Timeout 100 ms */
+		while (count < 100000) {
+			/* Must set to false to start with, due to OR in update function */
+			pipe->plane_state->status.is_flip_pending = false;
+			dc->hwss.update_pending_status(pipe);
+			if (!pipe->plane_state->status.is_flip_pending)
+				break;
+			udelay(1);
+			count++;
+		}
+		ASSERT(!pipe->plane_state->status.is_flip_pending);
+	}
+	PERF_TRACE();
+}
+
 /*******************************************************************************
  * Public functions
  ******************************************************************************/
@@ -786,29 +837,38 @@ struct dc *dc_create(const struct dc_init_data *init_params)
 	if (NULL == dc)
 		goto alloc_fail;
 
-	if (false == construct(dc, init_params))
-		goto construct_fail;
+	if (init_params->dce_environment == DCE_ENV_VIRTUAL_HW) {
+		if (false == dc_construct_ctx(dc, init_params)) {
+			dc_destruct(dc);
+			goto construct_fail;
+		}
+	} else {
+		if (false == dc_construct(dc, init_params)) {
+			dc_destruct(dc);
+			goto construct_fail;
+		}
+
+		full_pipe_count = dc->res_pool->pipe_count;
+		if (dc->res_pool->underlay_pipe_index != NO_UNDERLAY_PIPE)
+			full_pipe_count--;
+		dc->caps.max_streams = min(
+				full_pipe_count,
+				dc->res_pool->stream_enc_count);
 
-	/*TODO: separate HW and SW initialization*/
-	dc->hwss.init_hw(dc);
+		dc->optimize_seamless_boot_streams = 0;
+		dc->caps.max_links = dc->link_count;
+		dc->caps.max_audios = dc->res_pool->audio_count;
+		dc->caps.linear_pitch_alignment = 64;
 
-	full_pipe_count = dc->res_pool->pipe_count;
-	if (dc->res_pool->underlay_pipe_index != NO_UNDERLAY_PIPE)
-		full_pipe_count--;
-	dc->caps.max_streams = min(
-			full_pipe_count,
-			dc->res_pool->stream_enc_count);
+		dc->caps.max_dp_protocol_version = DP_VERSION_1_4;
 
-	dc->caps.max_links = dc->link_count;
-	dc->caps.max_audios = dc->res_pool->audio_count;
-	dc->caps.linear_pitch_alignment = 64;
+		if (dc->res_pool->dmcu != NULL)
+			dc->versions.dmcu_version = dc->res_pool->dmcu->dmcu_version;
+	}
 
 	/* Populate versioning information */
 	dc->versions.dc_ver = DC_VER;
 
-	if (dc->res_pool->dmcu != NULL)
-		dc->versions.dmcu_version = dc->res_pool->dmcu->dmcu_version;
-
 	dc->build_id = DC_BUILD_ID;
 
 	DC_LOG_DC("Display Core initialized\n");
@@ -824,14 +884,30 @@ alloc_fail:
 	return NULL;
 }
 
+void dc_hardware_init(struct dc *dc)
+{
+	if (dc->ctx->dce_environment != DCE_ENV_VIRTUAL_HW)
+		dc->hwss.init_hw(dc);
+}
+
 void dc_init_callbacks(struct dc *dc,
 		const struct dc_callback_init *init_params)
 {
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	dc->ctx->cp_psp = init_params->cp_psp;
+#endif
+}
+
+void dc_deinit_callbacks(struct dc *dc)
+{
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	memset(&dc->ctx->cp_psp, 0, sizeof(dc->ctx->cp_psp));
+#endif
 }
 
 void dc_destroy(struct dc **dc)
 {
-	destruct(*dc);
+	dc_destruct(*dc);
 	kfree(*dc);
 	*dc = NULL;
 }
@@ -905,15 +981,11 @@ static void program_timing_sync(
 
 		/* set first pipe with plane as master */
 		for (j = 0; j < group_size; j++) {
-			struct pipe_ctx *temp;
-
 			if (pipe_set[j]->plane_state) {
 				if (j == 0)
 					break;
 
-				temp = pipe_set[0];
-				pipe_set[0] = pipe_set[j];
-				pipe_set[j] = temp;
+				swap(pipe_set[0], pipe_set[j]);
 				break;
 			}
 		}
@@ -970,40 +1042,87 @@ bool dc_validate_seamless_boot_timing(const struct dc *dc,
 				struct dc_crtc_timing *crtc_timing)
 {
 	struct timing_generator *tg;
+	struct stream_encoder *se = NULL;
+
+	struct dc_crtc_timing hw_crtc_timing = {0};
+
 	struct dc_link *link = sink->link;
-	unsigned int enc_inst, tg_inst;
+	unsigned int i, enc_inst, tg_inst = 0;
+
+	// Seamless port only support single DP and EDP so far
+	if (sink->sink_signal != SIGNAL_TYPE_DISPLAY_PORT &&
+		sink->sink_signal != SIGNAL_TYPE_EDP)
+		return false;
 
 	/* Check for enabled DIG to identify enabled display */
 	if (!link->link_enc->funcs->is_dig_enabled(link->link_enc))
 		return false;
 
-	/* Check for which front end is used by this encoder.
-	 * Note the inst is 1 indexed, where 0 is undefined.
-	 * Note that DIG_FE can source from different OTG but our
-	 * current implementation always map 1-to-1, so this code makes
-	 * the same assumption and doesn't check OTG source.
-	 */
 	enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc);
 
-	/* Instance should be within the range of the pool */
-	if (enc_inst >= dc->res_pool->pipe_count)
+	if (enc_inst == ENGINE_ID_UNKNOWN)
 		return false;
 
-	if (enc_inst >= dc->res_pool->stream_enc_count)
-		return false;
+	for (i = 0; i < dc->res_pool->stream_enc_count; i++) {
+		if (dc->res_pool->stream_enc[i]->id == enc_inst) {
+
+			se = dc->res_pool->stream_enc[i];
 
-	tg_inst = dc->res_pool->stream_enc[enc_inst]->funcs->dig_source_otg(
-		dc->res_pool->stream_enc[enc_inst]);
+			tg_inst = dc->res_pool->stream_enc[i]->funcs->dig_source_otg(
+				dc->res_pool->stream_enc[i]);
+			break;
+		}
+	}
+
+	// tg_inst not found
+	if (i == dc->res_pool->stream_enc_count)
+		return false;
 
 	if (tg_inst >= dc->res_pool->timing_generator_count)
 		return false;
 
 	tg = dc->res_pool->timing_generators[tg_inst];
 
-	if (!tg->funcs->is_matching_timing)
+	if (!tg->funcs->get_hw_timing)
+		return false;
+
+	if (!tg->funcs->get_hw_timing(tg, &hw_crtc_timing))
+		return false;
+
+	if (crtc_timing->h_total != hw_crtc_timing.h_total)
+		return false;
+
+	if (crtc_timing->h_border_left != hw_crtc_timing.h_border_left)
 		return false;
 
-	if (!tg->funcs->is_matching_timing(tg, crtc_timing))
+	if (crtc_timing->h_addressable != hw_crtc_timing.h_addressable)
+		return false;
+
+	if (crtc_timing->h_border_right != hw_crtc_timing.h_border_right)
+		return false;
+
+	if (crtc_timing->h_front_porch != hw_crtc_timing.h_front_porch)
+		return false;
+
+	if (crtc_timing->h_sync_width != hw_crtc_timing.h_sync_width)
+		return false;
+
+	if (crtc_timing->v_total != hw_crtc_timing.v_total)
+		return false;
+
+	if (crtc_timing->v_border_top != hw_crtc_timing.v_border_top)
+		return false;
+
+	if (crtc_timing->v_addressable != hw_crtc_timing.v_addressable)
+		return false;
+
+	if (crtc_timing->v_border_bottom != hw_crtc_timing.v_border_bottom)
+		return false;
+
+	if (crtc_timing->v_front_porch != hw_crtc_timing.v_front_porch)
+		return false;
+
+	if (crtc_timing->v_sync_width != hw_crtc_timing.v_sync_width)
 		return false;
 
 	if (dc_is_dp_signal(link->connector_signal)) {
@@ -1016,6 +1135,20 @@ bool dc_validate_seamless_boot_timing(const struct dc *dc,
 		if (crtc_timing->pix_clk_100hz != pix_clk_100hz)
 			return false;
 
+		if (!se->funcs->dp_get_pixel_format)
+			return false;
+
+		if (!se->funcs->dp_get_pixel_format(
+			se,
+			&hw_crtc_timing.pixel_encoding,
+			&hw_crtc_timing.display_color_depth))
+			return false;
+
+		if (hw_crtc_timing.display_color_depth != crtc_timing->display_color_depth)
+			return false;
+
+		if (hw_crtc_timing.pixel_encoding != crtc_timing->pixel_encoding)
+			return false;
 	}
 
 	return true;
@@ -1068,24 +1201,25 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 
 	for (i = 0; i < context->stream_count; i++) {
 		if (context->streams[i]->apply_seamless_boot_optimization)
-			dc->optimize_seamless_boot = true;
+			dc->optimize_seamless_boot_streams++;
 	}
 
-	if (!dc->optimize_seamless_boot)
+	if (dc->optimize_seamless_boot_streams == 0)
 		dc->hwss.prepare_bandwidth(dc, context);
 
 	/* re-program planes for existing stream, in case we need to
 	 * free up plane resource for later use
 	 */
-	for (i = 0; i < context->stream_count; i++) {
-		if (context->streams[i]->mode_changed)
-			continue;
+	if (dc->hwss.apply_ctx_for_surface)
+		for (i = 0; i < context->stream_count; i++) {
+			if (context->streams[i]->mode_changed)
+				continue;
 
-		dc->hwss.apply_ctx_for_surface(
-			dc, context->streams[i],
-			context->stream_status[i].plane_count,
-			context); /* use new pipe config in new context */
-	}
+			dc->hwss.apply_ctx_for_surface(
+				dc, context->streams[i],
+				context->stream_status[i].plane_count,
+				context); /* use new pipe config in new context */
+		}
 
 	/* Program hardware */
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1104,16 +1238,19 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 	}
 
 	/* Program all planes within new context*/
+	if (dc->hwss.program_front_end_for_ctx)
+		dc->hwss.program_front_end_for_ctx(dc, context);
 	for (i = 0; i < context->stream_count; i++) {
 		const struct dc_link *link = context->streams[i]->link;
 
 		if (!context->streams[i]->mode_changed)
 			continue;
 
-		dc->hwss.apply_ctx_for_surface(
-				dc, context->streams[i],
-				context->stream_status[i].plane_count,
-				context);
+		if (dc->hwss.apply_ctx_for_surface)
+			dc->hwss.apply_ctx_for_surface(
+					dc, context->streams[i],
+					context->stream_status[i].plane_count,
+					context);
 
 		/*
 		 * enable stereo
@@ -1140,15 +1277,16 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 
 	dc_enable_stereo(dc, context, dc_streams, context->stream_count);
 
-	if (!dc->optimize_seamless_boot)
+	if (dc->optimize_seamless_boot_streams == 0) {
+		/* Must wait for no flips to be pending before doing optimize bw */
+		wait_for_no_pipes_pending(dc, context);
 		/* pplib is notified if disp_num changed */
 		dc->hwss.optimize_bandwidth(dc, context);
+	}
 
 	for (i = 0; i < context->stream_count; i++)
 		context->streams[i]->mode_changed = false;
 
-	memset(&context->commit_hints, 0, sizeof(context->commit_hints));
-
 	dc_release_state(dc->current_state);
 
 	dc->current_state = context;
@@ -1180,12 +1318,18 @@ bool dc_commit_state(struct dc *dc, struct dc_state *context)
 	return (result == DC_OK);
 }
 
+bool dc_is_hw_initialized(struct dc *dc)
+{
+	struct dc_bios *dcb = dc->ctx->dc_bios;
+	return dcb->funcs->is_accelerated_mode(dcb);
+}
+
 bool dc_post_update_surfaces_to_stream(struct dc *dc)
 {
 	int i;
 	struct dc_state *context = dc->current_state;
 
-	if (!dc->optimized_required || dc->optimize_seamless_boot)
+	if (!dc->optimized_required || dc->optimize_seamless_boot_streams > 0)
 		return true;
 
 	post_surface_trace(dc);
@@ -1214,7 +1358,7 @@ struct dc_state *dc_create_state(struct dc *dc)
 	 * initialize and obtain IP and SOC the base DML instance from DC is
 	 * initially copied into every context
 	 */
-#ifdef CONFIG_DRM_AMD_DC_DCN1_0
+#ifdef CONFIG_DRM_AMD_DC_DCN
 	memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
 #endif
 
@@ -1387,11 +1531,6 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
 		elevate_update_type(&update_type, UPDATE_TYPE_MED);
 	}
 
-	if (u->plane_info->sdr_white_level != u->surface->sdr_white_level) {
-		update_flags->bits.sdr_white_level = 1;
-		elevate_update_type(&update_type, UPDATE_TYPE_MED);
-	}
-
 	if (u->plane_info->dcc.enable != u->surface->dcc.enable
 			|| u->plane_info->dcc.independent_64b_blks != u->surface->dcc.independent_64b_blks
 			|| u->plane_info->dcc.meta_pitch != u->surface->dcc.meta_pitch) {
@@ -1409,7 +1548,6 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
 	}
 
 	if (u->plane_info->plane_size.surface_pitch != u->surface->plane_size.surface_pitch
-			|| u->plane_info->plane_size.surface_pitch != u->surface->plane_size.surface_pitch
 			|| u->plane_info->plane_size.chroma_pitch != u->surface->plane_size.chroma_pitch) {
 		update_flags->bits.plane_size_change = 1;
 		elevate_update_type(&update_type, UPDATE_TYPE_MED);
@@ -1448,7 +1586,10 @@ static enum surface_update_type get_scaling_info_update_type(
 	if (u->scaling_info->clip_rect.width != u->surface->clip_rect.width
 			|| u->scaling_info->clip_rect.height != u->surface->clip_rect.height
 			|| u->scaling_info->dst_rect.width != u->surface->dst_rect.width
-			|| u->scaling_info->dst_rect.height != u->surface->dst_rect.height) {
+			|| u->scaling_info->dst_rect.height != u->surface->dst_rect.height
+			|| u->scaling_info->scaling_quality.integer_scaling !=
+				u->surface->scaling_quality.integer_scaling
+			) {
 		update_flags->bits.scaling_change = 1;
 
 		if ((u->scaling_info->dst_rect.width < u->surface->dst_rect.width
@@ -1464,7 +1605,7 @@ static enum surface_update_type get_scaling_info_update_type(
 
 		update_flags->bits.scaling_change = 1;
 		if (u->scaling_info->src_rect.width > u->surface->src_rect.width
-				&& u->scaling_info->src_rect.height > u->surface->src_rect.height)
+				|| u->scaling_info->src_rect.height > u->surface->src_rect.height)
 			/* Making src rect bigger requires a bandwidth change */
 			update_flags->bits.clock_change = 1;
 	}
@@ -1478,11 +1619,11 @@ static enum surface_update_type get_scaling_info_update_type(
 		update_flags->bits.position_change = 1;
 
 	if (update_flags->bits.clock_change
-			|| update_flags->bits.bandwidth_change)
+			|| update_flags->bits.bandwidth_change
+			|| update_flags->bits.scaling_change)
 		return UPDATE_TYPE_FULL;
 
-	if (update_flags->bits.scaling_change
-			|| update_flags->bits.position_change)
+	if (update_flags->bits.position_change)
 		return UPDATE_TYPE_MED;
 
 	return UPDATE_TYPE_FAST;
@@ -1496,20 +1637,15 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
 	enum surface_update_type overall_type = UPDATE_TYPE_FAST;
 	union surface_update_flags *update_flags = &u->surface->update_flags;
 
-	update_flags->raw = 0; // Reset all flags
-
 	if (u->flip_addr)
 		update_flags->bits.addr_update = 1;
 
-	if (!is_surface_in_context(context, u->surface)) {
-		update_flags->bits.new_plane = 1;
+	if (!is_surface_in_context(context, u->surface) || u->surface->force_full_update) {
+		update_flags->raw = 0xFFFFFFFF;
 		return UPDATE_TYPE_FULL;
 	}
 
-	if (u->surface->force_full_update) {
-		update_flags->bits.full_update = 1;
-		return UPDATE_TYPE_FULL;
-	}
+	update_flags->raw = 0; // Reset all flags
 
 	type = get_plane_info_update_type(u);
 	elevate_update_type(&overall_type, type);
@@ -1541,6 +1677,12 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
 			update_flags->bits.gamma_change = 1;
 	}
 
+	if (u->hdr_mult.value)
+		if (u->hdr_mult.value != u->surface->hdr_mult.value) {
+			update_flags->bits.hdr_mult = 1;
+			elevate_update_type(&overall_type, UPDATE_TYPE_MED);
+		}
+
 	if (update_flags->bits.in_transfer_func_change) {
 		type = UPDATE_TYPE_MED;
 		elevate_update_type(&overall_type, type);
@@ -1567,40 +1709,45 @@ static enum surface_update_type check_update_surfaces_for_stream(
 	enum surface_update_type overall_type = UPDATE_TYPE_FAST;
 
 	if (stream_status == NULL || stream_status->plane_count != surface_count)
-		return UPDATE_TYPE_FULL;
+		overall_type = UPDATE_TYPE_FULL;
 
 	/* some stream updates require passive update */
 	if (stream_update) {
-		if ((stream_update->src.height != 0) &&
-				(stream_update->src.width != 0))
-			return UPDATE_TYPE_FULL;
+		union stream_update_flags *su_flags = &stream_update->stream->update_flags;
 
-		if ((stream_update->dst.height != 0) &&
-				(stream_update->dst.width != 0))
-			return UPDATE_TYPE_FULL;
+		if ((stream_update->src.height != 0 && stream_update->src.width != 0) ||
+			(stream_update->dst.height != 0 && stream_update->dst.width != 0) ||
+			stream_update->integer_scaling_update)
+			su_flags->bits.scaling = 1;
 
 		if (stream_update->out_transfer_func)
-			return UPDATE_TYPE_FULL;
+			su_flags->bits.out_tf = 1;
 
 		if (stream_update->abm_level)
-			return UPDATE_TYPE_FULL;
+			su_flags->bits.abm_level = 1;
 
 		if (stream_update->dpms_off)
-			return UPDATE_TYPE_FULL;
+			su_flags->bits.dpms_off = 1;
+
+		if (stream_update->gamut_remap)
+			su_flags->bits.gamut_remap = 1;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 		if (stream_update->wb_update)
-			return UPDATE_TYPE_FULL;
-#endif
+			su_flags->bits.wb_update = 1;
+		if (su_flags->raw != 0)
+			overall_type = UPDATE_TYPE_FULL;
+
+		if (stream_update->output_csc_transform || stream_update->output_color_space)
+			su_flags->bits.out_csc = 1;
+
+		if (stream_update->dsc_config)
+			overall_type = UPDATE_TYPE_FULL;
 	}
 
 	for (i = 0 ; i < surface_count; i++) {
 		enum surface_update_type type =
 				det_surface_update(dc, &updates[i]);
 
-		if (type == UPDATE_TYPE_FULL)
-			return type;
-
 		elevate_update_type(&overall_type, type);
 	}
 
@@ -1622,16 +1769,29 @@ enum surface_update_type dc_check_update_surfaces_for_stream(
 	int i;
 	enum surface_update_type type;
 
+	if (stream_update)
+		stream_update->stream->update_flags.raw = 0;
 	for (i = 0; i < surface_count; i++)
 		updates[i].surface->update_flags.raw = 0;
 
 	type = check_update_surfaces_for_stream(dc, updates, surface_count, stream_update, stream_status);
-	if (type == UPDATE_TYPE_FULL)
+	if (type == UPDATE_TYPE_FULL) {
+		if (stream_update)
+			stream_update->stream->update_flags.raw = 0xFFFFFFFF;
 		for (i = 0; i < surface_count; i++)
 			updates[i].surface->update_flags.raw = 0xFFFFFFFF;
+	}
 
-	if (type == UPDATE_TYPE_FAST && memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0)
-		dc->optimized_required = true;
+	if (type == UPDATE_TYPE_FAST) {
+		// If there's an available clock comparator, we use that.
+		if (dc->clk_mgr->funcs->are_clock_states_equal) {
+			if (!dc->clk_mgr->funcs->are_clock_states_equal(&dc->clk_mgr->clks, &dc->current_state->bw_ctx.bw.dcn.clk))
+				dc->optimized_required = true;
+		// Else we fallback to mem compare.
+		} else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) {
+			dc->optimized_required = true;
+		}
+	}
 
 	return type;
 }
@@ -1707,8 +1867,6 @@ static void copy_surface_update_to_plane(
 				srf_update->plane_info->global_alpha_value;
 		surface->dcc =
 				srf_update->plane_info->dcc;
-		surface->sdr_white_level =
-				srf_update->plane_info->sdr_white_level;
 		surface->layer_index =
 				srf_update->plane_info->layer_index;
 	}
@@ -1741,7 +1899,6 @@ static void copy_surface_update_to_plane(
 			sizeof(struct dc_transfer_func_distributed_points));
 	}
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	if (srf_update->func_shaper &&
 			(surface->in_shaper_func !=
 			srf_update->func_shaper))
@@ -1754,13 +1911,16 @@ static void copy_surface_update_to_plane(
 		memcpy(surface->lut3d_func, srf_update->lut3d_func,
 		sizeof(*surface->lut3d_func));
 
+	if (srf_update->hdr_mult.value)
+		surface->hdr_mult =
+				srf_update->hdr_mult;
+
 	if (srf_update->blend_tf &&
 			(surface->blend_tf !=
 			srf_update->blend_tf))
 		memcpy(surface->blend_tf, srf_update->blend_tf,
 		sizeof(*surface->blend_tf));
 
-#endif
 	if (srf_update->input_csc_color_matrix)
 		surface->input_csc_color_matrix =
 			*srf_update->input_csc_color_matrix;
@@ -1773,8 +1933,10 @@ static void copy_surface_update_to_plane(
 static void copy_stream_update_to_stream(struct dc *dc,
 					 struct dc_state *context,
 					 struct dc_stream_state *stream,
-					 const struct dc_stream_update *update)
+					 struct dc_stream_update *update)
 {
+	struct dc_context *dc_ctx = dc->ctx;
+
 	if (update == NULL || stream == NULL)
 		return;
 
@@ -1835,7 +1997,6 @@ static void copy_stream_update_to_stream(struct dc *dc,
 
 	if (update->dither_option)
 		stream->dither_option = *update->dither_option;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	/* update current stream with writeback info */
 	if (update->wb_update) {
 		int i;
@@ -1846,23 +2007,32 @@ static void copy_stream_update_to_stream(struct dc *dc,
 			stream->writeback_info[i] =
 				update->wb_update->writeback_info[i];
 	}
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DSC_SUPPORT)
 	if (update->dsc_config) {
 		struct dc_dsc_config old_dsc_cfg = stream->timing.dsc_cfg;
 		uint32_t old_dsc_enabled = stream->timing.flags.DSC;
 		uint32_t enable_dsc = (update->dsc_config->num_slices_h != 0 &&
 				       update->dsc_config->num_slices_v != 0);
 
-		stream->timing.dsc_cfg = *update->dsc_config;
-		stream->timing.flags.DSC = enable_dsc;
-		if (!dc->res_pool->funcs->validate_bandwidth(dc, context,
-							     true)) {
-			stream->timing.dsc_cfg = old_dsc_cfg;
-			stream->timing.flags.DSC = old_dsc_enabled;
+		/* Use temporarry context for validating new DSC config */
+		struct dc_state *dsc_validate_context = dc_create_state(dc);
+
+		if (dsc_validate_context) {
+			dc_resource_state_copy_construct(dc->current_state, dsc_validate_context);
+
+			stream->timing.dsc_cfg = *update->dsc_config;
+			stream->timing.flags.DSC = enable_dsc;
+			if (!dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true)) {
+				stream->timing.dsc_cfg = old_dsc_cfg;
+				stream->timing.flags.DSC = old_dsc_enabled;
+				update->dsc_config = NULL;
+			}
+
+			dc_release_state(dsc_validate_context);
+		} else {
+			DC_ERROR("Failed to allocate new validate context for DSC change\n");
+			update->dsc_config = NULL;
 		}
 	}
-#endif
 }
 
 static void commit_planes_do_stream_update(struct dc *dc,
@@ -1872,6 +2042,7 @@ static void commit_planes_do_stream_update(struct dc *dc,
 		struct dc_state *context)
 {
 	int j;
+	bool should_program_abm;
 
 	// Stream updates
 	for (j = 0; j < dc->res_pool->pipe_count; j++) {
@@ -1881,11 +2052,11 @@ static void commit_planes_do_stream_update(struct dc *dc,
 
 			if (stream_update->periodic_interrupt0 &&
 					dc->hwss.setup_periodic_interrupt)
-				dc->hwss.setup_periodic_interrupt(pipe_ctx, VLINE0);
+				dc->hwss.setup_periodic_interrupt(dc, pipe_ctx, VLINE0);
 
 			if (stream_update->periodic_interrupt1 &&
 					dc->hwss.setup_periodic_interrupt)
-				dc->hwss.setup_periodic_interrupt(pipe_ctx, VLINE1);
+				dc->hwss.setup_periodic_interrupt(dc, pipe_ctx, VLINE1);
 
 			if ((stream_update->hdr_static_metadata && !stream->use_dynamic_meta) ||
 					stream_update->vrr_infopacket ||
@@ -1895,6 +2066,12 @@ static void commit_planes_do_stream_update(struct dc *dc,
 				dc->hwss.update_info_frame(pipe_ctx);
 			}
 
+			if (stream_update->hdr_static_metadata &&
+					stream->use_dynamic_meta &&
+					dc->hwss.set_dmdata_attributes &&
+					pipe_ctx->stream->dmdata_address.quad_part != 0)
+				dc->hwss.set_dmdata_attributes(pipe_ctx);
+
 			if (stream_update->gamut_remap)
 				dc_stream_set_gamut_remap(dc, stream);
 
@@ -1902,31 +2079,25 @@ static void commit_planes_do_stream_update(struct dc *dc,
 				dc_stream_program_csc_matrix(dc, stream);
 
 			if (stream_update->dither_option) {
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 				struct pipe_ctx *odm_pipe = pipe_ctx->next_odm_pipe;
-#endif
 				resource_build_bit_depth_reduction_params(pipe_ctx->stream,
 									&pipe_ctx->stream->bit_depth_params);
 				pipe_ctx->stream_res.opp->funcs->opp_program_fmt(pipe_ctx->stream_res.opp,
 						&stream->bit_depth_params,
 						&stream->clamping);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 				while (odm_pipe) {
 					odm_pipe->stream_res.opp->funcs->opp_program_fmt(odm_pipe->stream_res.opp,
 							&stream->bit_depth_params,
 							&stream->clamping);
 					odm_pipe = odm_pipe->next_odm_pipe;
 				}
-#endif
 			}
 
-#if defined(CONFIG_DRM_AMD_DC_DSC_SUPPORT)
 			if (stream_update->dsc_config && dc->hwss.pipe_control_lock_global) {
 				dc->hwss.pipe_control_lock_global(dc, pipe_ctx, true);
 				dp_update_dsc_config(pipe_ctx);
 				dc->hwss.pipe_control_lock_global(dc, pipe_ctx, false);
 			}
-#endif
 			/* Full fe update*/
 			if (update_type == UPDATE_TYPE_FAST)
 				continue;
@@ -1942,7 +2113,7 @@ static void commit_planes_do_stream_update(struct dc *dc,
 
 					dc->hwss.optimize_bandwidth(dc, dc->current_state);
 				} else {
-					if (!dc->optimize_seamless_boot)
+					if (dc->optimize_seamless_boot_streams == 0)
 						dc->hwss.prepare_bandwidth(dc, dc->current_state);
 
 					core_link_enable_stream(dc->current_state, pipe_ctx);
@@ -1952,14 +2123,21 @@ static void commit_planes_do_stream_update(struct dc *dc,
 			}
 
 			if (stream_update->abm_level && pipe_ctx->stream_res.abm) {
-				if (pipe_ctx->stream_res.tg->funcs->is_blanked) {
-					// if otg funcs defined check if blanked before programming
-					if (!pipe_ctx->stream_res.tg->funcs->is_blanked(pipe_ctx->stream_res.tg))
+				should_program_abm = true;
+
+				// if otg funcs defined check if blanked before programming
+				if (pipe_ctx->stream_res.tg->funcs->is_blanked)
+					if (pipe_ctx->stream_res.tg->funcs->is_blanked(pipe_ctx->stream_res.tg))
+						should_program_abm = false;
+
+				if (should_program_abm) {
+					if (*stream_update->abm_level == ABM_LEVEL_IMMEDIATE_DISABLE) {
+						pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm);
+					} else {
 						pipe_ctx->stream_res.abm->funcs->set_abm_level(
 							pipe_ctx->stream_res.abm, stream->abm_level);
-				} else
-					pipe_ctx->stream_res.abm->funcs->set_abm_level(
-						pipe_ctx->stream_res.abm, stream->abm_level);
+					}
+				}
 			}
 		}
 	}
@@ -1976,7 +2154,7 @@ static void commit_planes_for_stream(struct dc *dc,
 	int i, j;
 	struct pipe_ctx *top_pipe_to_program = NULL;
 
-	if (dc->optimize_seamless_boot && surface_count > 0) {
+	if (dc->optimize_seamless_boot_streams > 0 && surface_count > 0) {
 		/* Optimize seamless boot flag keeps clocks and watermarks high until
 		 * first flip. After first flip, optimization is required to lower
 		 * bandwidth. Important to note that it is expected UEFI will
@@ -1985,12 +2163,14 @@ static void commit_planes_for_stream(struct dc *dc,
 		 */
 		if (stream->apply_seamless_boot_optimization) {
 			stream->apply_seamless_boot_optimization = false;
-			dc->optimize_seamless_boot = false;
-			dc->optimized_required = true;
+			dc->optimize_seamless_boot_streams--;
+
+			if (dc->optimize_seamless_boot_streams == 0)
+				dc->optimized_required = true;
 		}
 	}
 
-	if (update_type == UPDATE_TYPE_FULL && !dc->optimize_seamless_boot) {
+	if (update_type == UPDATE_TYPE_FULL && dc->optimize_seamless_boot_streams == 0) {
 		dc->hwss.prepare_bandwidth(dc, context);
 		context_clock_trace(dc, context);
 	}
@@ -2004,11 +2184,14 @@ static void commit_planes_for_stream(struct dc *dc,
 		 * In case of turning off screen, no need to program front end a second time.
 		 * just return after program blank.
 		 */
-		dc->hwss.apply_ctx_for_surface(dc, stream, 0, context);
+		if (dc->hwss.apply_ctx_for_surface)
+			dc->hwss.apply_ctx_for_surface(dc, stream, 0, context);
+		if (dc->hwss.program_front_end_for_ctx)
+			dc->hwss.program_front_end_for_ctx(dc, context);
+
 		return;
 	}
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
 		for (i = 0; i < surface_count; i++) {
 			struct dc_plane_state *plane_state = srf_updates[i].surface;
@@ -2030,7 +2213,6 @@ static void commit_planes_for_stream(struct dc *dc,
 			}
 		}
 	}
-#endif
 
 	// Update Type FULL, Surface updates
 	for (j = 0; j < dc->res_pool->pipe_count; j++) {
@@ -2051,7 +2233,6 @@ static void commit_planes_for_stream(struct dc *dc,
 			if (update_type == UPDATE_TYPE_FAST)
 				continue;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 			ASSERT(!pipe_ctx->plane_state->triplebuffer_flips);
 
 			if (dc->hwss.program_triplebuffer != NULL &&
@@ -2060,14 +2241,32 @@ static void commit_planes_for_stream(struct dc *dc,
 				dc->hwss.program_triplebuffer(
 					dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips);
 			}
-#endif
 			stream_status =
 				stream_get_status(context, pipe_ctx->stream);
 
-			dc->hwss.apply_ctx_for_surface(
+			if (dc->hwss.apply_ctx_for_surface)
+				dc->hwss.apply_ctx_for_surface(
 					dc, pipe_ctx->stream, stream_status->plane_count, context);
 		}
 	}
+	if (dc->hwss.program_front_end_for_ctx && update_type != UPDATE_TYPE_FAST) {
+		dc->hwss.program_front_end_for_ctx(dc, context);
+#ifdef CONFIG_DRM_AMD_DC_DCN
+		if (dc->debug.validate_dml_output) {
+			for (i = 0; i < dc->res_pool->pipe_count; i++) {
+				struct pipe_ctx cur_pipe = context->res_ctx.pipe_ctx[i];
+				if (cur_pipe.stream == NULL)
+					continue;
+
+				cur_pipe.plane_res.hubp->funcs->validate_dml_output(
+						cur_pipe.plane_res.hubp, dc->ctx,
+						&context->res_ctx.pipe_ctx[i].rq_regs,
+						&context->res_ctx.pipe_ctx[i].dlg_regs,
+						&context->res_ctx.pipe_ctx[i].ttu_regs);
+			}
+		}
+#endif
+	}
 
 	// Update Type FAST, Surface updates
 	if (update_type == UPDATE_TYPE_FAST) {
@@ -2077,7 +2276,6 @@ static void commit_planes_for_stream(struct dc *dc,
 		 */
 		dc->hwss.pipe_control_lock(dc, top_pipe_to_program, true);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 		if (dc->hwss.set_flip_control_gsl)
 			for (i = 0; i < surface_count; i++) {
 				struct dc_plane_state *plane_state = srf_updates[i].surface;
@@ -2096,7 +2294,6 @@ static void commit_planes_for_stream(struct dc *dc,
 							plane_state->flip_immediate);
 				}
 			}
-#endif
 		/* Perform requested Updates */
 		for (i = 0; i < surface_count; i++) {
 			struct dc_plane_state *plane_state = srf_updates[i].surface;
@@ -2109,7 +2306,6 @@ static void commit_planes_for_stream(struct dc *dc,
 
 				if (pipe_ctx->plane_state != plane_state)
 					continue;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 				/*program triple buffer after lock based on flip type*/
 				if (dc->hwss.program_triplebuffer != NULL &&
 					!dc->debug.disable_tri_buf) {
@@ -2117,7 +2313,6 @@ static void commit_planes_for_stream(struct dc *dc,
 					dc->hwss.program_triplebuffer(
 						dc, pipe_ctx, plane_state->triplebuffer_flips);
 				}
-#endif
 				if (srf_updates[i].flip_addr)
 					dc->hwss.update_plane_addr(dc, pipe_ctx);
 			}
@@ -2278,14 +2473,15 @@ void dc_set_power_state(
 	case DC_ACPI_CM_POWER_STATE_D0:
 		dc_resource_state_construct(dc, dc->current_state);
 
+		if (dc->ctx->dmub_srv)
+			dc_dmub_srv_wait_phy_init(dc->ctx->dmub_srv);
+
 		dc->hwss.init_hw(dc);
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 		if (dc->hwss.init_sys_ctx != NULL &&
 			dc->vm_pa_config.valid) {
 			dc->hwss.init_sys_ctx(dc->hwseq, dc, &dc->vm_pa_config);
 		}
-#endif
 
 		break;
 	default:
@@ -2365,6 +2561,17 @@ bool dc_submit_i2c(
 		cmd);
 }
 
+bool dc_submit_i2c_oem(
+		struct dc *dc,
+		struct i2c_command *cmd)
+{
+	struct ddc_service *ddc = dc->res_pool->oem_device;
+	return dce_i2c_submit_command(
+		dc->res_pool,
+		ddc->ddc_pin,
+		cmd);
+}
+
 static bool link_add_remote_sink_helper(struct dc_link *dc_link, struct dc_sink *sink)
 {
 	if (dc_link->sink_count >= MAX_SINKS_PER_LINK) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
index b9227d5de3a3..502ed3c7959d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
@@ -33,7 +33,6 @@
 
 #include "core_status.h"
 #include "core_types.h"
-#include "hw_sequencer.h"
 
 #include "resource.h"
 
@@ -310,14 +309,13 @@ void context_timing_trace(
 		struct resource_context *res_ctx)
 {
 	int i;
-	struct dc  *core_dc = dc;
 	int h_pos[MAX_PIPES] = {0}, v_pos[MAX_PIPES] = {0};
 	struct crtc_position position;
-	unsigned int underlay_idx = core_dc->res_pool->underlay_pipe_index;
+	unsigned int underlay_idx = dc->res_pool->underlay_pipe_index;
 	DC_LOGGER_INIT(dc->ctx->logger);
 
 
-	for (i = 0; i < core_dc->res_pool->pipe_count; i++) {
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
 		/* get_position() returns CRTC vertical/horizontal counter
 		 * hence not applicable for underlay pipe
@@ -329,7 +327,7 @@ void context_timing_trace(
 		h_pos[i] = position.horizontal_count;
 		v_pos[i] = position.vertical_count;
 	}
-	for (i = 0; i < core_dc->res_pool->pipe_count; i++) {
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
 
 		if (pipe_ctx->stream == NULL || pipe_ctx->pipe_idx == underlay_idx)
@@ -347,7 +345,7 @@ void context_clock_trace(
 		struct dc *dc,
 		struct dc_state *context)
 {
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	DC_LOGGER_INIT(dc->ctx->logger);
 	CLOCK_TRACE("Current: dispclk_khz:%d  max_dppclk_khz:%d  dcfclk_khz:%d\n"
 			"dcfclk_deep_sleep_khz:%d  fclk_khz:%d  socclk_khz:%d\n",
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index ca20b150afcc..260c0b62d37d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -45,6 +45,7 @@
 #include "dpcd_defs.h"
 #include "dmcu.h"
 #include "hw/clk_mgr.h"
+#include "../dce/dmub_psr.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -74,12 +75,11 @@ enum {
 /*******************************************************************************
  * Private functions
  ******************************************************************************/
-static void destruct(struct dc_link *link)
+static void dc_link_destruct(struct dc_link *link)
 {
 	int i;
 
 	if (link->hpd_gpio != NULL) {
-		dal_gpio_close(link->hpd_gpio);
 		dal_gpio_destroy_irq(&link->hpd_gpio);
 		link->hpd_gpio = NULL;
 	}
@@ -373,7 +373,7 @@ bool dc_link_is_dp_sink_present(struct dc_link *link)
 
 	if (GPIO_RESULT_OK != dal_ddc_open(
 		ddc, GPIO_MODE_INPUT, GPIO_DDC_CONFIG_TYPE_MODE_I2C)) {
-		dal_gpio_destroy_ddc(&ddc);
+		dal_ddc_close(ddc);
 
 		return present;
 	}
@@ -520,7 +520,7 @@ static void link_disconnect_remap(struct dc_sink *prev_sink, struct dc_link *lin
 }
 
 
-static void read_edp_current_link_settings_on_detect(struct dc_link *link)
+static void read_current_link_settings_on_detect(struct dc_link *link)
 {
 	union lane_count_set lane_count_set = { {0} };
 	uint8_t link_bw_set;
@@ -555,17 +555,23 @@ static void read_edp_current_link_settings_on_detect(struct dc_link *link)
 			&link_bw_set, sizeof(link_bw_set));
 
 	if (link_bw_set == 0) {
-		/* If standard link rates are not being used,
-		 * Read DPCD 00115h to find the link rate set used
-		 */
-		core_link_read_dpcd(link, DP_LINK_RATE_SET,
-				&link_rate_set, sizeof(link_rate_set));
-
-		if (link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) {
-			link->cur_link_settings.link_rate =
-				link->dpcd_caps.edp_supported_link_rates[link_rate_set];
-			link->cur_link_settings.link_rate_set = link_rate_set;
-			link->cur_link_settings.use_link_rate_set = true;
+		if (link->connector_signal == SIGNAL_TYPE_EDP) {
+			/* If standard link rates are not being used,
+			 * Read DPCD 00115h to find the edp link rate set used
+			 */
+			core_link_read_dpcd(link, DP_LINK_RATE_SET,
+					&link_rate_set, sizeof(link_rate_set));
+
+			// edp_supported_link_rates_count = 0 for DP
+			if (link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) {
+				link->cur_link_settings.link_rate =
+						link->dpcd_caps.edp_supported_link_rates[link_rate_set];
+				link->cur_link_settings.link_rate_set = link_rate_set;
+				link->cur_link_settings.use_link_rate_set = true;
+			}
+		} else {
+			// Link Rate not found. Seamless boot may not work.
+			ASSERT(false);
 		}
 	} else {
 		link->cur_link_settings.link_rate = link_bw_set;
@@ -680,7 +686,7 @@ static bool is_same_edid(struct dc_edid *old_edid, struct dc_edid *new_edid)
 	return (memcmp(old_edid->raw_edid, new_edid->raw_edid, new_edid->length) == 0);
 }
 
-bool wait_for_alt_mode(struct dc_link *link)
+static bool wait_for_alt_mode(struct dc_link *link)
 {
 
 	/**
@@ -738,7 +744,8 @@ bool wait_for_alt_mode(struct dc_link *link)
  * This does not create remote sinks but will trigger DM
  * to start MST detection if a branch is detected.
  */
-bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
+static bool dc_link_detect_helper(struct dc_link *link,
+				  enum dc_detect_reason reason)
 {
 	struct dc_sink_init_data sink_init_data = { 0 };
 	struct display_sink_capability sink_caps = { 0 };
@@ -753,6 +760,8 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 	struct dpcd_caps prev_dpcd_caps;
 	bool same_dpcd = true;
 	enum dc_connection_type new_connection_type = dc_connection_none;
+	bool perform_dp_seamless_boot = false;
+
 	DC_LOGGER_INIT(link->ctx->logger);
 
 	if (dc_is_virtual_signal(link->connector_signal))
@@ -809,15 +818,15 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 		}
 
 		case SIGNAL_TYPE_EDP: {
-			read_edp_current_link_settings_on_detect(link);
 			detect_edp_sink_caps(link);
-			sink_caps.transaction_type =
-				DDC_TRANSACTION_TYPE_I2C_OVER_AUX;
+			read_current_link_settings_on_detect(link);
+			sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C_OVER_AUX;
 			sink_caps.signal = SIGNAL_TYPE_EDP;
 			break;
 		}
 
 		case SIGNAL_TYPE_DISPLAY_PORT: {
+
 			/* wa HPD high coming too early*/
 			if (link->link_enc->features.flags.bits.DP_IS_USB_C == 1) {
 
@@ -865,12 +874,24 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 				 * empty which leads to allocate_mst_payload() has "0"
 				 * pbn_per_slot value leading to exception on dc_fixpt_div()
 				 */
-				link->verified_link_cap = link->reported_link_cap;
+				dp_verify_mst_link_cap(link);
+
 				if (prev_sink != NULL)
 					dc_sink_release(prev_sink);
 				return false;
 			}
 
+			// For seamless boot, to skip verify link cap, we read UEFI settings and set them as verified.
+			if (reason == DETECT_REASON_BOOT &&
+					dc_ctx->dc->config.power_down_display_on_boot == false &&
+					link->link_status.link_active == true)
+				perform_dp_seamless_boot = true;
+
+			if (perform_dp_seamless_boot) {
+				read_current_link_settings_on_detect(link);
+				link->verified_link_cap = link->reported_link_cap;
+			}
+
 			break;
 		}
 
@@ -955,10 +976,11 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 			 *  two link trainings
 			 */
 
-			/* deal with non-mst cases */
-			dp_verify_link_cap_with_retries(link,
-					&link->reported_link_cap,
-					LINK_TRAINING_MAX_VERIFY_RETRY);
+			// verify link cap for SST non-seamless boot
+			if (!perform_dp_seamless_boot)
+				dp_verify_link_cap_with_retries(link,
+						&link->reported_link_cap,
+						LINK_TRAINING_MAX_VERIFY_RETRY);
 		} else {
 			// If edid is the same, then discard new sink and revert back to original sink
 			if (same_edid) {
@@ -1047,6 +1069,23 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 		dc_sink_release(prev_sink);
 
 	return true;
+
+}
+
+bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
+{
+	const struct dc *dc = link->dc;
+	bool ret;
+
+	/* get out of low power state */
+	clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
+
+	ret = dc_link_detect_helper(link, reason);
+
+	/* Go back to power optimized state */
+	clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
+
+	return ret;
 }
 
 bool dc_link_get_hpd_state(struct dc_link *dc_link)
@@ -1206,7 +1245,7 @@ static enum transmitter translate_encoder_to_transmitter(
 	}
 }
 
-static bool construct(
+static bool dc_link_construct(
 	struct dc_link *link,
 	const struct link_init_data *init_params)
 {
@@ -1408,7 +1447,7 @@ struct dc_link *link_create(const struct link_init_data *init_params)
 	if (NULL == link)
 		goto alloc_fail;
 
-	if (false == construct(link, init_params))
+	if (false == dc_link_construct(link, init_params))
 		goto construct_fail;
 
 	return link;
@@ -1422,7 +1461,7 @@ alloc_fail:
 
 void link_destroy(struct dc_link **link)
 {
-	destruct(*link);
+	dc_link_destruct(*link);
 	kfree(*link);
 	*link = NULL;
 }
@@ -1457,10 +1496,7 @@ static enum dc_status enable_link_dp(
 	bool skip_video_pattern;
 	struct dc_link *link = stream->link;
 	struct dc_link_settings link_settings = {0};
-	enum dp_panel_mode panel_mode;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	bool fec_enable;
-#endif
 	int i;
 	bool apply_seamless_boot_optimization = false;
 
@@ -1476,15 +1512,6 @@ static enum dc_status enable_link_dp(
 	decide_link_settings(stream, &link_settings);
 
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) {
-		/* If link settings are different than current and link already enabled
-		 * then need to disable before programming to new rate.
-		 */
-		if (link->link_status.link_active &&
-			(link->cur_link_settings.lane_count != link_settings.lane_count ||
-			 link->cur_link_settings.link_rate != link_settings.link_rate)) {
-			dp_disable_link_phy(link, pipe_ctx->stream->signal);
-		}
-
 		/*in case it is not on*/
 		link->dc->hwss.edp_power_control(link, true);
 		link->dc->hwss.edp_wait_for_hpd_ready(link, true);
@@ -1492,53 +1519,32 @@ static enum dc_status enable_link_dp(
 
 	pipe_ctx->stream_res.pix_clk_params.requested_sym_clk =
 			link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
-	if (!apply_seamless_boot_optimization)
+	if (state->clk_mgr && !apply_seamless_boot_optimization)
 		state->clk_mgr->funcs->update_clocks(state->clk_mgr, state, false);
 
-	dp_enable_link_phy(
-		link,
-		pipe_ctx->stream->signal,
-		pipe_ctx->clock_source->id,
-		&link_settings);
-
-	if (stream->sink_patches.dppowerup_delay > 0) {
-		int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay;
-
-		msleep(delay_dp_power_up_in_ms);
-	}
-
-	panel_mode = dp_get_panel_mode(link);
-	dp_set_panel_mode(link, panel_mode);
-
 	skip_video_pattern = true;
 
 	if (link_settings.link_rate == LINK_RATE_LOW)
 			skip_video_pattern = false;
 
-	if (link->aux_access_disabled) {
-		dc_link_dp_perform_link_training_skip_aux(link, &link_settings);
-
-		link->cur_link_settings = link_settings;
-		status = DC_OK;
-	} else if (perform_link_training_with_retries(
-			link,
+	if (perform_link_training_with_retries(
 			&link_settings,
 			skip_video_pattern,
-			LINK_TRAINING_ATTEMPTS)) {
+			LINK_TRAINING_ATTEMPTS,
+			pipe_ctx,
+			pipe_ctx->stream->signal)) {
 		link->cur_link_settings = link_settings;
 		status = DC_OK;
 	}
 	else
 		status = DC_FAIL_DP_LINK_TRAINING;
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	if (link->preferred_training_settings.fec_enable != NULL)
 		fec_enable = *link->preferred_training_settings.fec_enable;
 	else
 		fec_enable = true;
 
 	dp_set_fec_enable(link, fec_enable);
-#endif
 	return status;
 }
 
@@ -2025,6 +2031,45 @@ static void write_i2c_redriver_setting(
 		ASSERT(i2c_success);
 }
 
+static void disable_link(struct dc_link *link, enum signal_type signal)
+{
+	/*
+	 * TODO: implement call for dp_set_hw_test_pattern
+	 * it is needed for compliance testing
+	 */
+
+	/* Here we need to specify that encoder output settings
+	 * need to be calculated as for the set mode,
+	 * it will lead to querying dynamic link capabilities
+	 * which should be done before enable output
+	 */
+
+	if (dc_is_dp_signal(signal)) {
+		/* SST DP, eDP */
+		if (dc_is_dp_sst_signal(signal))
+			dp_disable_link_phy(link, signal);
+		else
+			dp_disable_link_phy_mst(link, signal);
+
+		if (dc_is_dp_sst_signal(signal) ||
+				link->mst_stream_alloc_table.stream_count == 0) {
+			dp_set_fec_enable(link, false);
+			dp_set_fec_ready(link, false);
+		}
+	} else {
+		if (signal != SIGNAL_TYPE_VIRTUAL)
+			link->link_enc->funcs->disable_output(link->link_enc, signal);
+	}
+
+	if (signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+		/* MST disable link only when no stream use the link */
+		if (link->mst_stream_alloc_table.stream_count <= 0)
+			link->link_status.link_active = false;
+	} else {
+		link->link_status.link_active = false;
+	}
+}
+
 static void enable_link_hdmi(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_stream_state *stream = pipe_ctx->stream;
@@ -2109,6 +2154,19 @@ static enum dc_status enable_link(
 		struct pipe_ctx *pipe_ctx)
 {
 	enum dc_status status = DC_ERROR_UNEXPECTED;
+	struct dc_stream_state *stream = pipe_ctx->stream;
+	struct dc_link *link = stream->link;
+
+	/* There's some scenarios where driver is unloaded with display
+	 * still enabled. When driver is reloaded, it may cause a display
+	 * to not light up if there is a mismatch between old and new
+	 * link settings. Need to call disable first before enabling at
+	 * new link settings.
+	 */
+	if (link->link_status.link_active) {
+		disable_link(link, pipe_ctx->stream->signal);
+	}
+
 	switch (pipe_ctx->stream->signal) {
 	case SIGNAL_TYPE_DISPLAY_PORT:
 		status = enable_link_dp(state, pipe_ctx);
@@ -2143,44 +2201,6 @@ static enum dc_status enable_link(
 	return status;
 }
 
-static void disable_link(struct dc_link *link, enum signal_type signal)
-{
-	/*
-	 * TODO: implement call for dp_set_hw_test_pattern
-	 * it is needed for compliance testing
-	 */
-
-	/* here we need to specify that encoder output settings
-	 * need to be calculated as for the set mode,
-	 * it will lead to querying dynamic link capabilities
-	 * which should be done before enable output */
-
-	if (dc_is_dp_signal(signal)) {
-		/* SST DP, eDP */
-		if (dc_is_dp_sst_signal(signal))
-			dp_disable_link_phy(link, signal);
-		else
-			dp_disable_link_phy_mst(link, signal);
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-
-		if (dc_is_dp_sst_signal(signal) ||
-				link->mst_stream_alloc_table.stream_count == 0) {
-			dp_set_fec_enable(link, false);
-			dp_set_fec_ready(link, false);
-		}
-#endif
-	} else
-		link->link_enc->funcs->disable_output(link->link_enc, signal);
-
-	if (signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
-		/* MST disable link only when no stream use the link */
-		if (link->mst_stream_alloc_table.stream_count <= 0)
-			link->link_status.link_active = false;
-	} else {
-		link->link_status.link_active = false;
-	}
-}
-
 static uint32_t get_timing_pixel_clock_100hz(const struct dc_crtc_timing *timing)
 {
 
@@ -2217,7 +2237,7 @@ static bool dp_active_dongle_validate_timing(
 		break;
 	}
 
-	if (dongle_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER ||
+	if (dpcd_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER ||
 		dongle_caps->extendedCapValid == false)
 		return true;
 
@@ -2317,9 +2337,9 @@ bool dc_link_set_backlight_level(const struct dc_link *link,
 		uint32_t backlight_pwm_u16_16,
 		uint32_t frame_ramp)
 {
-	struct dc  *core_dc = link->ctx->dc;
-	struct abm *abm = core_dc->res_pool->abm;
-	struct dmcu *dmcu = core_dc->res_pool->dmcu;
+	struct dc  *dc = link->ctx->dc;
+	struct abm *abm = dc->res_pool->abm;
+	struct dmcu *dmcu = dc->res_pool->dmcu;
 	unsigned int controller_id = 0;
 	bool use_smooth_brightness = true;
 	int i;
@@ -2337,22 +2357,22 @@ bool dc_link_set_backlight_level(const struct dc_link *link,
 
 	if (dc_is_embedded_signal(link->connector_signal)) {
 		for (i = 0; i < MAX_PIPES; i++) {
-			if (core_dc->current_state->res_ctx.pipe_ctx[i].stream) {
-				if (core_dc->current_state->res_ctx.
+			if (dc->current_state->res_ctx.pipe_ctx[i].stream) {
+				if (dc->current_state->res_ctx.
 						pipe_ctx[i].stream->link
 						== link) {
 					/* DMCU -1 for all controller id values,
 					 * therefore +1 here
 					 */
 					controller_id =
-						core_dc->current_state->
+						dc->current_state->
 						res_ctx.pipe_ctx[i].stream_res.tg->inst +
 						1;
 
 					/* Disable brightness ramping when the display is blanked
 					 * as it can hang the DMCU
 					 */
-					if (core_dc->current_state->res_ctx.pipe_ctx[i].plane_state == NULL)
+					if (dc->current_state->res_ctx.pipe_ctx[i].plane_state == NULL)
 						frame_ramp = 0;
 				}
 			}
@@ -2370,8 +2390,8 @@ bool dc_link_set_backlight_level(const struct dc_link *link,
 
 bool dc_link_set_abm_disable(const struct dc_link *link)
 {
-	struct dc  *core_dc = link->ctx->dc;
-	struct abm *abm = core_dc->res_pool->abm;
+	struct dc  *dc = link->ctx->dc;
+	struct abm *abm = dc->res_pool->abm;
 
 	if ((abm == NULL) || (abm->funcs->set_backlight_level_pwm == NULL))
 		return false;
@@ -2381,17 +2401,215 @@ bool dc_link_set_abm_disable(const struct dc_link *link)
 	return true;
 }
 
-bool dc_link_set_psr_enable(const struct dc_link *link, bool enable, bool wait)
+bool dc_link_set_psr_allow_active(struct dc_link *link, bool allow_active, bool wait)
 {
-	struct dc  *core_dc = link->ctx->dc;
-	struct dmcu *dmcu = core_dc->res_pool->dmcu;
+	struct dc  *dc = link->ctx->dc;
+	struct dmcu *dmcu = dc->res_pool->dmcu;
+	struct dmub_psr *psr = dc->res_pool->psr;
+
+	if ((psr != NULL) && link->psr_feature_enabled)
+		psr->funcs->set_psr_enable(psr, allow_active);
+	else if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_feature_enabled)
+		dmcu->funcs->set_psr_enable(dmcu, allow_active, wait);
 
-	if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_enabled)
-		dmcu->funcs->set_psr_enable(dmcu, enable, wait);
+	link->psr_allow_active = allow_active;
 
 	return true;
 }
 
+bool dc_link_get_psr_state(const struct dc_link *link, uint32_t *psr_state)
+{
+	struct dc  *dc = link->ctx->dc;
+	struct dmcu *dmcu = dc->res_pool->dmcu;
+	struct dmub_psr *psr = dc->res_pool->psr;
+
+	if (psr != NULL && link->psr_feature_enabled)
+		psr->funcs->get_psr_state(psr_state);
+	else if (dmcu != NULL && link->psr_feature_enabled)
+		dmcu->funcs->get_psr_state(dmcu, psr_state);
+
+	return true;
+}
+
+static inline enum physical_phy_id
+transmitter_to_phy_id(enum transmitter transmitter_value)
+{
+	switch (transmitter_value) {
+	case TRANSMITTER_UNIPHY_A:
+		return PHYLD_0;
+	case TRANSMITTER_UNIPHY_B:
+		return PHYLD_1;
+	case TRANSMITTER_UNIPHY_C:
+		return PHYLD_2;
+	case TRANSMITTER_UNIPHY_D:
+		return PHYLD_3;
+	case TRANSMITTER_UNIPHY_E:
+		return PHYLD_4;
+	case TRANSMITTER_UNIPHY_F:
+		return PHYLD_5;
+	case TRANSMITTER_NUTMEG_CRT:
+		return PHYLD_6;
+	case TRANSMITTER_TRAVIS_CRT:
+		return PHYLD_7;
+	case TRANSMITTER_TRAVIS_LCD:
+		return PHYLD_8;
+	case TRANSMITTER_UNIPHY_G:
+		return PHYLD_9;
+	case TRANSMITTER_COUNT:
+		return PHYLD_COUNT;
+	case TRANSMITTER_UNKNOWN:
+		return PHYLD_UNKNOWN;
+	default:
+		WARN_ONCE(1, "Unknown transmitter value %d\n",
+			  transmitter_value);
+		return PHYLD_UNKNOWN;
+	}
+}
+
+bool dc_link_setup_psr(struct dc_link *link,
+		const struct dc_stream_state *stream, struct psr_config *psr_config,
+		struct psr_context *psr_context)
+{
+	struct dc *dc;
+	struct dmcu *dmcu;
+	struct dmub_psr *psr;
+	int i;
+	/* updateSinkPsrDpcdConfig*/
+	union dpcd_psr_configuration psr_configuration;
+
+	psr_context->controllerId = CONTROLLER_ID_UNDEFINED;
+
+	if (!link)
+		return false;
+
+	dc = link->ctx->dc;
+	dmcu = dc->res_pool->dmcu;
+	psr = dc->res_pool->psr;
+
+	if (!dmcu && !psr)
+		return false;
+
+
+	memset(&psr_configuration, 0, sizeof(psr_configuration));
+
+	psr_configuration.bits.ENABLE                    = 1;
+	psr_configuration.bits.CRC_VERIFICATION          = 1;
+	psr_configuration.bits.FRAME_CAPTURE_INDICATION  =
+			psr_config->psr_frame_capture_indication_req;
+
+	/* Check for PSR v2*/
+	if (psr_config->psr_version == 0x2) {
+		/* For PSR v2 selective update.
+		 * Indicates whether sink should start capturing
+		 * immediately following active scan line,
+		 * or starting with the 2nd active scan line.
+		 */
+		psr_configuration.bits.LINE_CAPTURE_INDICATION = 0;
+		/*For PSR v2, determines whether Sink should generate
+		 * IRQ_HPD when CRC mismatch is detected.
+		 */
+		psr_configuration.bits.IRQ_HPD_WITH_CRC_ERROR    = 1;
+	}
+
+	dm_helpers_dp_write_dpcd(
+		link->ctx,
+		link,
+		368,
+		&psr_configuration.raw,
+		sizeof(psr_configuration.raw));
+
+	psr_context->channel = link->ddc->ddc_pin->hw_info.ddc_channel;
+	psr_context->transmitterId = link->link_enc->transmitter;
+	psr_context->engineId = link->link_enc->preferred_engine;
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		if (dc->current_state->res_ctx.pipe_ctx[i].stream
+				== stream) {
+			/* dmcu -1 for all controller id values,
+			 * therefore +1 here
+			 */
+			psr_context->controllerId =
+				dc->current_state->res_ctx.
+				pipe_ctx[i].stream_res.tg->inst + 1;
+			break;
+		}
+	}
+
+	/* Hardcoded for now.  Can be Pcie or Uniphy (or Unknown)*/
+	psr_context->phyType = PHY_TYPE_UNIPHY;
+	/*PhyId is associated with the transmitter id*/
+	psr_context->smuPhyId =
+		transmitter_to_phy_id(link->link_enc->transmitter);
+
+	psr_context->crtcTimingVerticalTotal = stream->timing.v_total;
+	psr_context->vsync_rate_hz = div64_u64(div64_u64((stream->
+					timing.pix_clk_100hz * 100),
+					stream->timing.v_total),
+					stream->timing.h_total);
+
+	psr_context->psrSupportedDisplayConfig = true;
+	psr_context->psrExitLinkTrainingRequired =
+		psr_config->psr_exit_link_training_required;
+	psr_context->sdpTransmitLineNumDeadline =
+		psr_config->psr_sdp_transmit_line_num_deadline;
+	psr_context->psrFrameCaptureIndicationReq =
+		psr_config->psr_frame_capture_indication_req;
+
+	psr_context->skipPsrWaitForPllLock = 0; /* only = 1 in KV */
+
+	psr_context->numberOfControllers =
+			link->dc->res_pool->timing_generator_count;
+
+	psr_context->rfb_update_auto_en = true;
+
+	/* 2 frames before enter PSR. */
+	psr_context->timehyst_frames = 2;
+	/* half a frame
+	 * (units in 100 lines, i.e. a value of 1 represents 100 lines)
+	 */
+	psr_context->hyst_lines = stream->timing.v_total / 2 / 100;
+	psr_context->aux_repeats = 10;
+
+	psr_context->psr_level.u32all = 0;
+
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+	/*skip power down the single pipe since it blocks the cstate*/
+	if (ASICREV_IS_RAVEN(link->ctx->asic_id.hw_internal_rev))
+		psr_context->psr_level.bits.SKIP_CRTC_DISABLE = true;
+#endif
+
+	/* SMU will perform additional powerdown sequence.
+	 * For unsupported ASICs, set psr_level flag to skip PSR
+	 *  static screen notification to SMU.
+	 *  (Always set for DAL2, did not check ASIC)
+	 */
+	psr_context->allow_smu_optimizations = psr_config->allow_smu_optimizations;
+
+	/* Complete PSR entry before aborting to prevent intermittent
+	 * freezes on certain eDPs
+	 */
+	psr_context->psr_level.bits.DISABLE_PSR_ENTRY_ABORT = 1;
+
+	/* Controls additional delay after remote frame capture before
+	 * continuing power down, default = 0
+	 */
+	psr_context->frame_delay = 0;
+
+	if (psr)
+		link->psr_feature_enabled = psr->funcs->setup_psr(psr, link, psr_context);
+	else
+		link->psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context);
+
+	/* psr_enabled == 0 indicates setup_psr did not succeed, but this
+	 * should not happen since firmware should be running at this point
+	 */
+	if (link->psr_feature_enabled == 0)
+		ASSERT(0);
+
+	return true;
+
+}
+
 const struct dc_link_status *dc_link_get_status(const struct dc_link *link)
 {
 	return &link->link_status;
@@ -2415,28 +2633,13 @@ static struct fixed31_32 get_pbn_per_slot(struct dc_stream_state *stream)
 	return dc_fixpt_div_int(mbytes_per_sec, 54);
 }
 
-static int get_color_depth(enum dc_color_depth color_depth)
-{
-	switch (color_depth) {
-	case COLOR_DEPTH_666: return 6;
-	case COLOR_DEPTH_888: return 8;
-	case COLOR_DEPTH_101010: return 10;
-	case COLOR_DEPTH_121212: return 12;
-	case COLOR_DEPTH_141414: return 14;
-	case COLOR_DEPTH_161616: return 16;
-	default: return 0;
-	}
-}
-
 static struct fixed31_32 get_pbn_from_timing(struct pipe_ctx *pipe_ctx)
 {
-	uint32_t bpc;
 	uint64_t kbps;
 	struct fixed31_32 peak_kbps;
 	uint32_t numerator;
 	uint32_t denominator;
 
-	bpc = get_color_depth(pipe_ctx->stream_res.pix_clk_params.color_depth);
 	kbps = dc_bandwidth_in_kbps_from_timing(&pipe_ctx->stream->timing);
 
 	/*
@@ -2510,7 +2713,7 @@ static void update_mst_stream_alloc_table(
 /* convert link_mst_stream_alloc_table to dm dp_mst_stream_alloc_table
  * because stream_encoder is not exposed to dm
  */
-static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx)
+enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct dc_link *link = stream->link;
@@ -2521,6 +2724,7 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx)
 	struct fixed31_32 pbn;
 	struct fixed31_32 pbn_per_slot;
 	uint8_t i;
+	enum act_return_status ret;
 	DC_LOGGER_INIT(link->ctx->logger);
 
 	/* enable_link_dp_mst already check link->enabled_stream_count
@@ -2568,14 +2772,16 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx)
 		&link->mst_stream_alloc_table);
 
 	/* send down message */
-	dm_helpers_dp_mst_poll_for_allocation_change_trigger(
+	ret = dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 			stream->ctx,
 			stream);
 
-	dm_helpers_dp_mst_send_payload_allocation(
-			stream->ctx,
-			stream,
-			true);
+	if (ret != ACT_LINK_LOST) {
+		dm_helpers_dp_mst_send_payload_allocation(
+				stream->ctx,
+				stream,
+				true);
+	}
 
 	/* slot X.Y for only current stream */
 	pbn_per_slot = get_pbn_per_slot(stream);
@@ -2668,15 +2874,70 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
 	return DC_OK;
 }
 
+enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link)
+{
+	int i;
+	struct pipe_ctx *pipe_ctx;
+
+	// Clear all of MST payload then reallocate
+	for (i = 0; i < MAX_PIPES; i++) {
+		pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+		if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link &&
+				pipe_ctx->stream->dpms_off == false &&
+				pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+			deallocate_mst_payload(pipe_ctx);
+		}
+	}
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+		if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link &&
+				pipe_ctx->stream->dpms_off == false &&
+				pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+			/* enable/disable PHY will clear connection between BE and FE
+			 * need to restore it.
+			 */
+			link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
+									pipe_ctx->stream_res.stream_enc->id, true);
+			dc_link_allocate_mst_payload(pipe_ctx);
+		}
+	}
+
+	return DC_OK;
+}
+
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off)
+{
+	struct cp_psp *cp_psp = &pipe_ctx->stream->ctx->cp_psp;
+	if (cp_psp && cp_psp->funcs.update_stream_config) {
+		struct cp_psp_stream_config config;
+
+		memset(&config, 0, sizeof(config));
+
+		config.otg_inst = (uint8_t) pipe_ctx->stream_res.tg->inst;
+		config.stream_enc_inst = (uint8_t) pipe_ctx->stream_res.stream_enc->id;
+		config.link_enc_inst = pipe_ctx->stream->link->link_enc_hw_inst;
+		config.dpms_off = dpms_off;
+		config.dm_stream_ctx = pipe_ctx->stream->dm_stream_context;
+		cp_psp->funcs.update_stream_config(cp_psp->handle, &config);
+	}
+}
+#endif
+
 void core_link_enable_stream(
 		struct dc_state *state,
 		struct pipe_ctx *pipe_ctx)
 {
-	struct dc *core_dc = pipe_ctx->stream->ctx->dc;
+	struct dc *dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	enum dc_status status;
 	DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
 
+	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) &&
+			dc_is_virtual_signal(pipe_ctx->stream->signal))
+		return;
+
 	if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) {
 		stream->link->link_enc->funcs->setup(
 			stream->link->link_enc,
@@ -2692,6 +2953,7 @@ void core_link_enable_stream(
 			pipe_ctx->stream_res.stream_enc,
 			&stream->timing,
 			stream->output_color_space,
+			stream->use_vsc_sdp_for_colorimetry,
 			stream->link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP);
 
 	if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
@@ -2715,18 +2977,21 @@ void core_link_enable_stream(
 			pipe_ctx->stream_res.stream_enc,
 			&stream->timing);
 
-	if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 		bool apply_edp_fast_boot_optimization =
 			pipe_ctx->stream->apply_edp_fast_boot_optimization;
 
 		pipe_ctx->stream->apply_edp_fast_boot_optimization = false;
 
 		resource_build_info_frame(pipe_ctx);
-		core_dc->hwss.update_info_frame(pipe_ctx);
+		dc->hwss.update_info_frame(pipe_ctx);
 
 		/* Do not touch link on seamless boot optimization. */
 		if (pipe_ctx->stream->apply_seamless_boot_optimization) {
 			pipe_ctx->stream->dpms_off = false;
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+			update_psp_stream_config(pipe_ctx, false);
+#endif
 			return;
 		}
 
@@ -2734,6 +2999,9 @@ void core_link_enable_stream(
 		if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP &&
 					apply_edp_fast_boot_optimization) {
 			pipe_ctx->stream->dpms_off = false;
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+			update_psp_stream_config(pipe_ctx, false);
+#endif
 			return;
 		}
 
@@ -2759,7 +3027,7 @@ void core_link_enable_stream(
 			}
 		}
 
-		core_dc->hwss.enable_audio_stream(pipe_ctx);
+		dc->hwss.enable_audio_stream(pipe_ctx);
 
 		/* turn off otg test pattern if enable */
 		if (pipe_ctx->stream_res.tg->funcs->set_test_pattern)
@@ -2767,50 +3035,54 @@ void core_link_enable_stream(
 					CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
 					COLOR_DEPTH_UNDEFINED);
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 		if (pipe_ctx->stream->timing.flags.DSC) {
 			if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
 					dc_is_virtual_signal(pipe_ctx->stream->signal))
 				dp_set_dsc_enable(pipe_ctx, true);
 		}
-#endif
-		core_dc->hwss.enable_stream(pipe_ctx);
+		dc->hwss.enable_stream(pipe_ctx);
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 		/* Set DPS PPS SDP (AKA "info frames") */
 		if (pipe_ctx->stream->timing.flags.DSC) {
 			if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
 					dc_is_virtual_signal(pipe_ctx->stream->signal))
 				dp_set_dsc_pps_sdp(pipe_ctx, true);
 		}
-#endif
 
 		if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
-			allocate_mst_payload(pipe_ctx);
+			dc_link_allocate_mst_payload(pipe_ctx);
 
-		core_dc->hwss.unblank_stream(pipe_ctx,
+		dc->hwss.unblank_stream(pipe_ctx,
 			&pipe_ctx->stream->link->cur_link_settings);
 
 		if (dc_is_dp_signal(pipe_ctx->stream->signal))
 			enable_stream_features(pipe_ctx);
-	}
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-	else { // if (IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment))
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+		update_psp_stream_config(pipe_ctx, false);
+#endif
+	} else { // if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
 		if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
 				dc_is_virtual_signal(pipe_ctx->stream->signal))
 			dp_set_dsc_enable(pipe_ctx, true);
 
 	}
-#endif
 }
 
 void core_link_disable_stream(struct pipe_ctx *pipe_ctx)
 {
-	struct dc  *core_dc = pipe_ctx->stream->ctx->dc;
+	struct dc  *dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct dc_link *link = stream->sink->link;
 
-	core_dc->hwss.blank_stream(pipe_ctx);
+	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) &&
+			dc_is_virtual_signal(pipe_ctx->stream->signal))
+		return;
+
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+	update_psp_stream_config(pipe_ctx, true);
+#endif
+
+	dc->hwss.blank_stream(pipe_ctx);
 
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 		deallocate_mst_payload(pipe_ctx);
@@ -2839,25 +3111,23 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx)
 			write_i2c_redriver_setting(pipe_ctx, false);
 		}
 	}
-	core_dc->hwss.disable_stream(pipe_ctx);
+	dc->hwss.disable_stream(pipe_ctx);
 
 	disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal);
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	if (pipe_ctx->stream->timing.flags.DSC) {
 		if (dc_is_dp_signal(pipe_ctx->stream->signal))
 			dp_set_dsc_enable(pipe_ctx, false);
 	}
-#endif
 }
 
 void core_link_set_avmute(struct pipe_ctx *pipe_ctx, bool enable)
 {
-	struct dc  *core_dc = pipe_ctx->stream->ctx->dc;
+	struct dc  *dc = pipe_ctx->stream->ctx->dc;
 
 	if (!dc_is_hdmi_signal(pipe_ctx->stream->signal))
 		return;
 
-	core_dc->hwss.set_avmute(pipe_ctx, enable);
+	dc->hwss.set_avmute(pipe_ctx, enable);
 }
 
 /**
@@ -2915,13 +3185,11 @@ uint32_t dc_bandwidth_in_kbps_from_timing(
 	uint32_t bits_per_channel = 0;
 	uint32_t kbps;
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	if (timing->flags.DSC) {
 		kbps = (timing->pix_clk_100hz * timing->dsc_cfg.bits_per_pixel);
 		kbps = kbps / 160 + ((kbps % 160) ? 1 : 0);
 		return kbps;
 	}
-#endif
 
 	switch (timing->display_color_depth) {
 	case COLOR_DEPTH_666:
@@ -3074,6 +3342,7 @@ void dc_link_disable_hpd(const struct dc_link *link)
 
 void dc_link_set_test_pattern(struct dc_link *link,
 			      enum dp_test_pattern test_pattern,
+			      enum dp_test_pattern_color_space test_pattern_color_space,
 			      const struct link_training_settings *p_link_settings,
 			      const unsigned char *p_custom_pattern,
 			      unsigned int cust_pattern_size)
@@ -3082,6 +3351,7 @@ void dc_link_set_test_pattern(struct dc_link *link,
 		dc_link_dp_set_test_pattern(
 			link,
 			test_pattern,
+			test_pattern_color_space,
 			p_link_settings,
 			p_custom_pattern,
 			cust_pattern_size);
@@ -3097,7 +3367,6 @@ uint32_t dc_link_bandwidth_kbps(
 	link_bw_kbps *= 8;   /* 8 bits per byte*/
 	link_bw_kbps *= link_setting->lane_count;
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	if (link->dpcd_caps.fec_cap.bits.FEC_CAPABLE) {
 		/* Account for FEC overhead.
 		 * We have to do it based on caps,
@@ -3122,7 +3391,6 @@ uint32_t dc_link_bandwidth_kbps(
 		link_bw_kbps = mul_u64_u32_shr(BIT_ULL(32) * 970LL / 1000,
 					       link_bw_kbps, 32);
 	}
-#endif
 
 	return link_bw_kbps;
 
@@ -3136,3 +3404,10 @@ const struct dc_link_settings *dc_link_get_link_cap(
 		return &link->preferred_link_setting;
 	return &link->verified_link_cap;
 }
+
+void dc_link_overwrite_extended_receiver_cap(
+		struct dc_link *link)
+{
+	dp_overwrite_extended_receiver_cap(link);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
index 51991bf26a93..a49c10d5df26 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
@@ -187,7 +187,7 @@ void dal_ddc_i2c_payloads_add(
 
 }
 
-static void construct(
+static void ddc_service_construct(
 	struct ddc_service *ddc_service,
 	struct ddc_service_init_data *init_data)
 {
@@ -206,7 +206,10 @@ static void construct(
 		ddc_service->ddc_pin = NULL;
 	} else {
 		hw_info.ddc_channel = i2c_info.i2c_line;
-		hw_info.hw_supported = i2c_info.i2c_hw_assist;
+		if (ddc_service->link != NULL)
+			hw_info.hw_supported = i2c_info.i2c_hw_assist;
+		else
+			hw_info.hw_supported = false;
 
 		ddc_service->ddc_pin = dal_gpio_create_ddc(
 			gpio_service,
@@ -236,11 +239,11 @@ struct ddc_service *dal_ddc_service_create(
 	if (!ddc_service)
 		return NULL;
 
-	construct(ddc_service, init_data);
+	ddc_service_construct(ddc_service, init_data);
 	return ddc_service;
 }
 
-static void destruct(struct ddc_service *ddc)
+static void ddc_service_destruct(struct ddc_service *ddc)
 {
 	if (ddc->ddc_pin)
 		dal_gpio_destroy_ddc(&ddc->ddc_pin);
@@ -252,7 +255,7 @@ void dal_ddc_service_destroy(struct ddc_service **ddc)
 		BREAK_TO_DEBUGGER();
 		return;
 	}
-	destruct(*ddc);
+	ddc_service_destruct(*ddc);
 	kfree(*ddc);
 	*ddc = NULL;
 }
@@ -508,7 +511,7 @@ bool dal_ddc_service_query_ddc_data(
 	uint8_t *read_buf,
 	uint32_t read_size)
 {
-	bool ret;
+	bool ret = false;
 	uint32_t payload_size =
 		dal_ddc_service_is_in_aux_transaction_mode(ddc) ?
 			DEFAULT_AUX_MAX_DATA_SIZE : EDID_SEGMENT_SIZE;
@@ -527,34 +530,32 @@ bool dal_ddc_service_query_ddc_data(
 	/*TODO: len of payload data for i2c and aux is uint8!!!!,
 	 *  but we want to read 256 over i2c!!!!*/
 	if (dal_ddc_service_is_in_aux_transaction_mode(ddc)) {
-		struct aux_payload write_payload = {
-			.i2c_over_aux = true,
-			.write = true,
-			.mot = true,
-			.address = address,
-			.length = write_size,
-			.data = write_buf,
-			.reply = NULL,
-			.defer_delay = get_defer_delay(ddc),
-		};
-
-		struct aux_payload read_payload = {
-			.i2c_over_aux = true,
-			.write = false,
-			.mot = false,
-			.address = address,
-			.length = read_size,
-			.data = read_buf,
-			.reply = NULL,
-			.defer_delay = get_defer_delay(ddc),
-		};
-
-		ret = dc_link_aux_transfer_with_retries(ddc, &write_payload);
+		struct aux_payload payload;
+		bool read_available = true;
+
+		payload.i2c_over_aux = true;
+		payload.address = address;
+		payload.reply = NULL;
+		payload.defer_delay = get_defer_delay(ddc);
+
+		if (write_size != 0) {
+			payload.write = true;
+			payload.mot = false;
+			payload.length = write_size;
+			payload.data = write_buf;
+
+			ret = dal_ddc_submit_aux_command(ddc, &payload);
+			read_available = ret;
+		}
 
-		if (!ret)
-			return false;
+		if (read_size != 0 && read_available) {
+			payload.write = false;
+			payload.mot = false;
+			payload.length = read_size;
+			payload.data = read_buf;
 
-		ret = dc_link_aux_transfer_with_retries(ddc, &read_payload);
+			ret = dal_ddc_submit_aux_command(ddc, &payload);
+		}
 	} else {
 		struct i2c_payloads *payloads =
 			dal_ddc_i2c_payloads_create(ddc->ctx, payloads_num);
@@ -585,6 +586,41 @@ bool dal_ddc_service_query_ddc_data(
 	return ret;
 }
 
+bool dal_ddc_submit_aux_command(struct ddc_service *ddc,
+		struct aux_payload *payload)
+{
+	uint32_t retrieved = 0;
+	bool ret = false;
+
+	if (!ddc)
+		return false;
+
+	if (!payload)
+		return false;
+
+	do {
+		struct aux_payload current_payload;
+		bool is_end_of_payload = (retrieved + DEFAULT_AUX_MAX_DATA_SIZE) >
+			payload->length ? true : false;
+
+		current_payload.address = payload->address;
+		current_payload.data = &payload->data[retrieved];
+		current_payload.defer_delay = payload->defer_delay;
+		current_payload.i2c_over_aux = payload->i2c_over_aux;
+		current_payload.length = is_end_of_payload ?
+			payload->length - retrieved : DEFAULT_AUX_MAX_DATA_SIZE;
+		current_payload.mot = !is_end_of_payload;
+		current_payload.reply = payload->reply;
+		current_payload.write = payload->write;
+
+		ret = dc_link_aux_transfer_with_retries(ddc, &current_payload);
+
+		retrieved += current_payload.length;
+	} while (retrieved < payload->length && ret == true);
+
+	return ret;
+}
+
 /* dc_link_aux_transfer_raw() - Attempt to transfer
  * the given aux payload.  This function does not perform
  * retries or handle error states.  The reply is returned
@@ -613,6 +649,19 @@ bool dc_link_aux_transfer_with_retries(struct ddc_service *ddc,
 	return dce_aux_transfer_with_retries(ddc, payload);
 }
 
+
+uint32_t dc_link_aux_configure_timeout(struct ddc_service *ddc,
+		uint32_t timeout)
+{
+	uint32_t prev_timeout = 0;
+	struct ddc *ddc_pin = ddc->ddc_pin;
+
+	if (ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout)
+		prev_timeout =
+				ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout(ddc, timeout);
+	return prev_timeout;
+}
+
 /*test only function*/
 void dal_ddc_service_set_ddc_pin(
 	struct ddc_service *ddc_service,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index f5742719b5d9..6ab298c65247 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -4,12 +4,8 @@
 #include "dc_link_dp.h"
 #include "dm_helpers.h"
 #include "opp.h"
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #include "dsc.h"
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #include "resource.h"
-#endif
 
 #include "inc/core_types.h"
 #include "link_hwss.h"
@@ -21,6 +17,9 @@
 #define DC_LOGGER \
 	link->ctx->logger
 
+
+#define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE   0x50
+
 /* maximum pre emphasis level allowed for each voltage swing level*/
 static const enum dc_pre_emphasis voltage_swing_to_pre_emphasis[] = {
 		PRE_EMPHASIS_LEVEL3,
@@ -221,19 +220,31 @@ static enum dpcd_training_patterns
 	return dpcd_tr_pattern;
 }
 
+static inline bool is_repeater(struct dc_link *link, uint32_t offset)
+{
+	return (!link->is_lttpr_mode_transparent && offset != 0);
+}
+
 static void dpcd_set_lt_pattern_and_lane_settings(
 	struct dc_link *link,
 	const struct link_training_settings *lt_settings,
-	enum dc_dp_training_pattern pattern)
+	enum dc_dp_training_pattern pattern,
+	uint32_t offset)
 {
 	union dpcd_training_lane dpcd_lane[LANE_COUNT_DP_MAX] = { { {0} } };
-	const uint32_t dpcd_base_lt_offset =
-	DP_TRAINING_PATTERN_SET;
+
+	uint32_t dpcd_base_lt_offset;
+
 	uint8_t dpcd_lt_buffer[5] = {0};
 	union dpcd_training_pattern dpcd_pattern = { {0} };
 	uint32_t lane;
 	uint32_t size_in_bytes;
 	bool edp_workaround = false; /* TODO link_prop.INTERNAL */
+	dpcd_base_lt_offset = DP_TRAINING_PATTERN_SET;
+
+	if (is_repeater(link, offset))
+		dpcd_base_lt_offset = DP_TRAINING_PATTERN_SET_PHY_REPEATER1 +
+			((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1));
 
 	/*****************************************************************
 	* DpcdAddress_TrainingPatternSet
@@ -241,14 +252,21 @@ static void dpcd_set_lt_pattern_and_lane_settings(
 	dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
 		dc_dp_training_pattern_to_dpcd_training_pattern(link, pattern);
 
-	dpcd_lt_buffer[DP_TRAINING_PATTERN_SET - dpcd_base_lt_offset]
+	dpcd_lt_buffer[DP_TRAINING_PATTERN_SET - DP_TRAINING_PATTERN_SET]
 		= dpcd_pattern.raw;
 
-	DC_LOG_HW_LINK_TRAINING("%s\n %x pattern = %x\n",
-		__func__,
-		DP_TRAINING_PATTERN_SET,
-		dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
-
+	if (is_repeater(link, offset)) {
+		DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Repeater ID: %d\n 0x%X pattern = %x\n",
+			__func__,
+			offset,
+			dpcd_base_lt_offset,
+			dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
+	} else {
+		DC_LOG_HW_LINK_TRAINING("%s\n 0x%X pattern = %x\n",
+			__func__,
+			dpcd_base_lt_offset,
+			dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
+	}
 	/*****************************************************************
 	* DpcdAddress_Lane0Set -> DpcdAddress_Lane3Set
 	*****************************************************************/
@@ -268,24 +286,35 @@ static void dpcd_set_lt_pattern_and_lane_settings(
 		PRE_EMPHASIS_MAX_LEVEL ? 1 : 0);
 	}
 
-	/* concatinate everything into one buffer*/
+	/* concatenate everything into one buffer*/
 
 	size_in_bytes = lt_settings->link_settings.lane_count * sizeof(dpcd_lane[0]);
 
 	 // 0x00103 - 0x00102
 	memmove(
-		&dpcd_lt_buffer[DP_TRAINING_LANE0_SET - dpcd_base_lt_offset],
+		&dpcd_lt_buffer[DP_TRAINING_LANE0_SET - DP_TRAINING_PATTERN_SET],
 		dpcd_lane,
 		size_in_bytes);
 
-	DC_LOG_HW_LINK_TRAINING("%s:\n %x VS set = %x  PE set = %x max VS Reached = %x  max PE Reached = %x\n",
-		__func__,
-		DP_TRAINING_LANE0_SET,
-		dpcd_lane[0].bits.VOLTAGE_SWING_SET,
-		dpcd_lane[0].bits.PRE_EMPHASIS_SET,
-		dpcd_lane[0].bits.MAX_SWING_REACHED,
-		dpcd_lane[0].bits.MAX_PRE_EMPHASIS_REACHED);
-
+	if (is_repeater(link, offset)) {
+		DC_LOG_HW_LINK_TRAINING("%s:\n LTTPR Repeater ID: %d\n"
+				" 0x%X VS set = %x PE set = %x max VS Reached = %x  max PE Reached = %x\n",
+			__func__,
+			offset,
+			dpcd_base_lt_offset,
+			dpcd_lane[0].bits.VOLTAGE_SWING_SET,
+			dpcd_lane[0].bits.PRE_EMPHASIS_SET,
+			dpcd_lane[0].bits.MAX_SWING_REACHED,
+			dpcd_lane[0].bits.MAX_PRE_EMPHASIS_REACHED);
+	} else {
+		DC_LOG_HW_LINK_TRAINING("%s:\n 0x%X VS set = %x  PE set = %x max VS Reached = %x  max PE Reached = %x\n",
+			__func__,
+			dpcd_base_lt_offset,
+			dpcd_lane[0].bits.VOLTAGE_SWING_SET,
+			dpcd_lane[0].bits.PRE_EMPHASIS_SET,
+			dpcd_lane[0].bits.MAX_SWING_REACHED,
+			dpcd_lane[0].bits.MAX_PRE_EMPHASIS_REACHED);
+	}
 	if (edp_workaround) {
 		/* for eDP write in 2 parts because the 5-byte burst is
 		* causing issues on some eDP panels (EPR#366724)
@@ -495,8 +524,12 @@ static void get_lane_status_and_drive_settings(
 	const struct link_training_settings *link_training_setting,
 	union lane_status *ln_status,
 	union lane_align_status_updated *ln_status_updated,
-	struct link_training_settings *req_settings)
+	struct link_training_settings *req_settings,
+	uint32_t offset)
 {
+	unsigned int lane01_status_address = DP_LANE0_1_STATUS;
+	uint8_t lane_adjust_offset = 4;
+	unsigned int lane01_adjust_address;
 	uint8_t dpcd_buf[6] = {0};
 	union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = { { {0} } };
 	struct link_training_settings request_settings = { {0} };
@@ -504,9 +537,16 @@ static void get_lane_status_and_drive_settings(
 
 	memset(req_settings, '\0', sizeof(struct link_training_settings));
 
+	if (is_repeater(link, offset)) {
+		lane01_status_address =
+				DP_LANE0_1_STATUS_PHY_REPEATER1 +
+				((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1));
+		lane_adjust_offset = 3;
+	}
+
 	core_link_read_dpcd(
 		link,
-		DP_LANE0_1_STATUS,
+		lane01_status_address,
 		(uint8_t *)(dpcd_buf),
 		sizeof(dpcd_buf));
 
@@ -517,22 +557,47 @@ static void get_lane_status_and_drive_settings(
 		ln_status[lane].raw =
 			get_nibble_at_index(&dpcd_buf[0], lane);
 		dpcd_lane_adjust[lane].raw =
-			get_nibble_at_index(&dpcd_buf[4], lane);
+			get_nibble_at_index(&dpcd_buf[lane_adjust_offset], lane);
 	}
 
 	ln_status_updated->raw = dpcd_buf[2];
 
-	DC_LOG_HW_LINK_TRAINING("%s:\n%x Lane01Status = %x\n %x Lane23Status = %x\n ",
-		__func__,
-		DP_LANE0_1_STATUS, dpcd_buf[0],
-		DP_LANE2_3_STATUS, dpcd_buf[1]);
-
-	DC_LOG_HW_LINK_TRAINING("%s:\n %x Lane01AdjustRequest = %x\n %x Lane23AdjustRequest = %x\n",
-		__func__,
-		DP_ADJUST_REQUEST_LANE0_1,
-		dpcd_buf[4],
-		DP_ADJUST_REQUEST_LANE2_3,
-		dpcd_buf[5]);
+	if (is_repeater(link, offset)) {
+		DC_LOG_HW_LINK_TRAINING("%s:\n LTTPR Repeater ID: %d\n"
+				" 0x%X Lane01Status = %x\n 0x%X Lane23Status = %x\n ",
+			__func__,
+			offset,
+			lane01_status_address, dpcd_buf[0],
+			lane01_status_address + 1, dpcd_buf[1]);
+	} else {
+		DC_LOG_HW_LINK_TRAINING("%s:\n 0x%X Lane01Status = %x\n 0x%X Lane23Status = %x\n ",
+			__func__,
+			lane01_status_address, dpcd_buf[0],
+			lane01_status_address + 1, dpcd_buf[1]);
+	}
+	lane01_adjust_address = DP_ADJUST_REQUEST_LANE0_1;
+
+	if (is_repeater(link, offset))
+		lane01_adjust_address = DP_ADJUST_REQUEST_LANE0_1_PHY_REPEATER1 +
+				((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1));
+
+	if (is_repeater(link, offset)) {
+		DC_LOG_HW_LINK_TRAINING("%s:\n LTTPR Repeater ID: %d\n"
+				" 0x%X Lane01AdjustRequest = %x\n 0x%X Lane23AdjustRequest = %x\n",
+					__func__,
+					offset,
+					lane01_adjust_address,
+					dpcd_buf[lane_adjust_offset],
+					lane01_adjust_address + 1,
+					dpcd_buf[lane_adjust_offset + 1]);
+	} else {
+		DC_LOG_HW_LINK_TRAINING("%s:\n 0x%X Lane01AdjustRequest = %x\n 0x%X Lane23AdjustRequest = %x\n",
+			__func__,
+			lane01_adjust_address,
+			dpcd_buf[lane_adjust_offset],
+			lane01_adjust_address + 1,
+			dpcd_buf[lane_adjust_offset + 1]);
+	}
 
 	/*copy to req_settings*/
 	request_settings.link_settings.lane_count =
@@ -571,10 +636,18 @@ static void get_lane_status_and_drive_settings(
 
 static void dpcd_set_lane_settings(
 	struct dc_link *link,
-	const struct link_training_settings *link_training_setting)
+	const struct link_training_settings *link_training_setting,
+	uint32_t offset)
 {
 	union dpcd_training_lane dpcd_lane[LANE_COUNT_DP_MAX] = {{{0}}};
 	uint32_t lane;
+	unsigned int lane0_set_address;
+
+	lane0_set_address = DP_TRAINING_LANE0_SET;
+
+	if (is_repeater(link, offset))
+		lane0_set_address = DP_TRAINING_LANE0_SET_PHY_REPEATER1 +
+		((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1));
 
 	for (lane = 0; lane <
 		(uint32_t)(link_training_setting->
@@ -597,7 +670,7 @@ static void dpcd_set_lane_settings(
 	}
 
 	core_link_write_dpcd(link,
-		DP_TRAINING_LANE0_SET,
+		lane0_set_address,
 		(uint8_t *)(dpcd_lane),
 		link_training_setting->link_settings.lane_count);
 
@@ -620,14 +693,26 @@ static void dpcd_set_lane_settings(
 	}
 	*/
 
-	DC_LOG_HW_LINK_TRAINING("%s\n %x VS set = %x  PE set = %x max VS Reached = %x  max PE Reached = %x\n",
-		__func__,
-		DP_TRAINING_LANE0_SET,
-		dpcd_lane[0].bits.VOLTAGE_SWING_SET,
-		dpcd_lane[0].bits.PRE_EMPHASIS_SET,
-		dpcd_lane[0].bits.MAX_SWING_REACHED,
-		dpcd_lane[0].bits.MAX_PRE_EMPHASIS_REACHED);
+	if (is_repeater(link, offset)) {
+		DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Repeater ID: %d\n"
+				" 0x%X VS set = %x  PE set = %x max VS Reached = %x  max PE Reached = %x\n",
+			__func__,
+			offset,
+			lane0_set_address,
+			dpcd_lane[0].bits.VOLTAGE_SWING_SET,
+			dpcd_lane[0].bits.PRE_EMPHASIS_SET,
+			dpcd_lane[0].bits.MAX_SWING_REACHED,
+			dpcd_lane[0].bits.MAX_PRE_EMPHASIS_REACHED);
 
+	} else {
+		DC_LOG_HW_LINK_TRAINING("%s\n 0x%X VS set = %x  PE set = %x max VS Reached = %x  max PE Reached = %x\n",
+			__func__,
+			lane0_set_address,
+			dpcd_lane[0].bits.VOLTAGE_SWING_SET,
+			dpcd_lane[0].bits.PRE_EMPHASIS_SET,
+			dpcd_lane[0].bits.MAX_SWING_REACHED,
+			dpcd_lane[0].bits.MAX_PRE_EMPHASIS_REACHED);
+	}
 	link->cur_lane_setting = link_training_setting->lane_settings[0];
 
 }
@@ -647,17 +732,6 @@ static bool is_max_vs_reached(
 
 }
 
-void dc_link_dp_set_drive_settings(
-	struct dc_link *link,
-	struct link_training_settings *lt_settings)
-{
-	/* program ASIC PHY settings*/
-	dp_set_hw_lane_settings(link, lt_settings);
-
-	/* Notify DP sink the PHY settings from source */
-	dpcd_set_lane_settings(link, lt_settings);
-}
-
 static bool perform_post_lt_adj_req_sequence(
 	struct dc_link *link,
 	struct link_training_settings *lt_settings)
@@ -690,7 +764,8 @@ static bool perform_post_lt_adj_req_sequence(
 			lt_settings,
 			dpcd_lane_status,
 			&dpcd_lane_status_updated,
-			&req_settings);
+			&req_settings,
+			DPRX);
 
 			if (dpcd_lane_status_updated.bits.
 					POST_LT_ADJ_REQ_IN_PROGRESS == 0)
@@ -747,6 +822,31 @@ static bool perform_post_lt_adj_req_sequence(
 
 }
 
+/* Only used for channel equalization */
+static uint32_t translate_training_aux_read_interval(uint32_t dpcd_aux_read_interval)
+{
+	unsigned int aux_rd_interval_us = 400;
+
+	switch (dpcd_aux_read_interval) {
+	case 0x01:
+		aux_rd_interval_us = 400;
+		break;
+	case 0x02:
+		aux_rd_interval_us = 4000;
+		break;
+	case 0x03:
+		aux_rd_interval_us = 8000;
+		break;
+	case 0x04:
+		aux_rd_interval_us = 16000;
+		break;
+	default:
+		break;
+	}
+
+	return aux_rd_interval_us;
+}
+
 static enum link_training_result get_cr_failure(enum dc_lane_count ln_count,
 					union lane_status *dpcd_lane_status)
 {
@@ -765,37 +865,55 @@ static enum link_training_result get_cr_failure(enum dc_lane_count ln_count,
 
 static enum link_training_result perform_channel_equalization_sequence(
 	struct dc_link *link,
-	struct link_training_settings *lt_settings)
+	struct link_training_settings *lt_settings,
+	uint32_t offset)
 {
 	struct link_training_settings req_settings;
 	enum dc_dp_training_pattern tr_pattern;
 	uint32_t retries_ch_eq;
+	uint32_t wait_time_microsec;
 	enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
 	union lane_align_status_updated dpcd_lane_status_updated = { {0} };
 	union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = { { {0} } };
 
+	/* Note: also check that TPS4 is a supported feature*/
+
 	tr_pattern = lt_settings->pattern_for_eq;
 
-	dp_set_hw_training_pattern(link, tr_pattern);
+	if (is_repeater(link, offset))
+		tr_pattern = DP_TRAINING_PATTERN_SEQUENCE_4;
+
+	dp_set_hw_training_pattern(link, tr_pattern, offset);
 
 	for (retries_ch_eq = 0; retries_ch_eq <= LINK_TRAINING_MAX_RETRY_COUNT;
 		retries_ch_eq++) {
 
-		dp_set_hw_lane_settings(link, lt_settings);
+		dp_set_hw_lane_settings(link, lt_settings, offset);
 
 		/* 2. update DPCD*/
 		if (!retries_ch_eq)
 			/* EPR #361076 - write as a 5-byte burst,
-			 * but only for the 1-st iteration*/
+			 * but only for the 1-st iteration
+			 */
+
 			dpcd_set_lt_pattern_and_lane_settings(
 				link,
 				lt_settings,
-				tr_pattern);
+				tr_pattern, offset);
 		else
-			dpcd_set_lane_settings(link, lt_settings);
+			dpcd_set_lane_settings(link, lt_settings, offset);
 
 		/* 3. wait for receiver to lock-on*/
-		wait_for_training_aux_rd_interval(link, lt_settings->eq_pattern_time);
+		wait_time_microsec = lt_settings->eq_pattern_time;
+
+		if (is_repeater(link, offset))
+			wait_time_microsec =
+					translate_training_aux_read_interval(
+						link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]);
+
+		wait_for_training_aux_rd_interval(
+				link,
+				wait_time_microsec);
 
 		/* 4. Read lane status and requested
 		 * drive settings as set by the sink*/
@@ -805,7 +923,8 @@ static enum link_training_result perform_channel_equalization_sequence(
 			lt_settings,
 			dpcd_lane_status,
 			&dpcd_lane_status_updated,
-			&req_settings);
+			&req_settings,
+			offset);
 
 		/* 5. check CR done*/
 		if (!is_cr_done(lane_count, dpcd_lane_status))
@@ -824,13 +943,16 @@ static enum link_training_result perform_channel_equalization_sequence(
 	return LINK_TRAINING_EQ_FAIL_EQ;
 
 }
+#define TRAINING_AUX_RD_INTERVAL 100 //us
 
 static enum link_training_result perform_clock_recovery_sequence(
 	struct dc_link *link,
-	struct link_training_settings *lt_settings)
+	struct link_training_settings *lt_settings,
+	uint32_t offset)
 {
 	uint32_t retries_cr;
 	uint32_t retry_count;
+	uint32_t wait_time_microsec;
 	struct link_training_settings req_settings;
 	enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
 	enum dc_dp_training_pattern tr_pattern = DP_TRAINING_PATTERN_SEQUENCE_1;
@@ -840,7 +962,7 @@ static enum link_training_result perform_clock_recovery_sequence(
 	retries_cr = 0;
 	retry_count = 0;
 
-	dp_set_hw_training_pattern(link, tr_pattern);
+	dp_set_hw_training_pattern(link, tr_pattern, offset);
 
 	/* najeeb - The synaptics MST hub can put the LT in
 	* infinite loop by switching the VS
@@ -857,7 +979,8 @@ static enum link_training_result perform_clock_recovery_sequence(
 		/* 1. call HWSS to set lane settings*/
 		dp_set_hw_lane_settings(
 				link,
-				lt_settings);
+				lt_settings,
+				offset);
 
 		/* 2. update DPCD of the receiver*/
 		if (!retries_cr)
@@ -866,16 +989,23 @@ static enum link_training_result perform_clock_recovery_sequence(
 			dpcd_set_lt_pattern_and_lane_settings(
 					link,
 					lt_settings,
-					tr_pattern);
+					tr_pattern,
+					offset);
 		else
 			dpcd_set_lane_settings(
 					link,
-					lt_settings);
+					lt_settings,
+					offset);
 
 		/* 3. wait receiver to lock-on*/
+		wait_time_microsec = lt_settings->cr_pattern_time;
+
+		if (!link->is_lttpr_mode_transparent)
+			wait_time_microsec = TRAINING_AUX_RD_INTERVAL;
+
 		wait_for_training_aux_rd_interval(
 				link,
-				lt_settings->cr_pattern_time);
+				wait_time_microsec);
 
 		/* 4. Read lane status and requested drive
 		* settings as set by the sink
@@ -885,7 +1015,8 @@ static enum link_training_result perform_clock_recovery_sequence(
 				lt_settings,
 				dpcd_lane_status,
 				&dpcd_lane_status_updated,
-				&req_settings);
+				&req_settings,
+				offset);
 
 		/* 5. check CR done*/
 		if (is_cr_done(lane_count, dpcd_lane_status))
@@ -1054,6 +1185,102 @@ static void initialize_training_settings(
 		lt_settings->enhanced_framing = 1;
 }
 
+static uint8_t convert_to_count(uint8_t lttpr_repeater_count)
+{
+	switch (lttpr_repeater_count) {
+	case 0x80: // 1 lttpr repeater
+		return 1;
+	case 0x40: // 2 lttpr repeaters
+		return 2;
+	case 0x20: // 3 lttpr repeaters
+		return 3;
+	case 0x10: // 4 lttpr repeaters
+		return 4;
+	case 0x08: // 5 lttpr repeaters
+		return 5;
+	case 0x04: // 6 lttpr repeaters
+		return 6;
+	case 0x02: // 7 lttpr repeaters
+		return 7;
+	case 0x01: // 8 lttpr repeaters
+		return 8;
+	default:
+		break;
+	}
+	return 0; // invalid value
+}
+
+static void configure_lttpr_mode(struct dc_link *link)
+{
+	/* aux timeout is already set to extended */
+	/* RESET/SET lttpr mode to enable non transparent mode */
+	uint8_t repeater_cnt;
+	uint32_t aux_interval_address;
+	uint8_t repeater_id;
+	enum dc_status result = DC_ERROR_UNEXPECTED;
+	uint8_t repeater_mode = DP_PHY_REPEATER_MODE_TRANSPARENT;
+
+	DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Transparent Mode\n", __func__);
+	result = core_link_write_dpcd(link,
+			DP_PHY_REPEATER_MODE,
+			(uint8_t *)&repeater_mode,
+			sizeof(repeater_mode));
+
+	if (result == DC_OK) {
+		link->dpcd_caps.lttpr_caps.mode = repeater_mode;
+	}
+
+	if (!link->is_lttpr_mode_transparent) {
+
+		DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Non Transparent Mode\n", __func__);
+
+		repeater_mode = DP_PHY_REPEATER_MODE_NON_TRANSPARENT;
+		result = core_link_write_dpcd(link,
+				DP_PHY_REPEATER_MODE,
+				(uint8_t *)&repeater_mode,
+				sizeof(repeater_mode));
+
+		if (result == DC_OK) {
+			link->dpcd_caps.lttpr_caps.mode = repeater_mode;
+		}
+
+		repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+		for (repeater_id = repeater_cnt; repeater_id > 0; repeater_id--) {
+			aux_interval_address = DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 +
+						((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (repeater_id - 1));
+			core_link_read_dpcd(
+				link,
+				aux_interval_address,
+				(uint8_t *)&link->dpcd_caps.lttpr_caps.aux_rd_interval[repeater_id - 1],
+				sizeof(link->dpcd_caps.lttpr_caps.aux_rd_interval[repeater_id - 1]));
+			link->dpcd_caps.lttpr_caps.aux_rd_interval[repeater_id - 1] &= 0x7F;
+		}
+	}
+}
+
+static void repeater_training_done(struct dc_link *link, uint32_t offset)
+{
+	union dpcd_training_pattern dpcd_pattern = { {0} };
+
+	const uint32_t dpcd_base_lt_offset =
+			DP_TRAINING_PATTERN_SET_PHY_REPEATER1 +
+				((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1));
+	/* Set training not in progress*/
+	dpcd_pattern.v1_4.TRAINING_PATTERN_SET = DPCD_TRAINING_PATTERN_VIDEOIDLE;
+
+	core_link_write_dpcd(
+		link,
+		dpcd_base_lt_offset,
+		&dpcd_pattern.raw,
+		1);
+
+	DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Id: %d 0x%X pattern = %x\n",
+		__func__,
+		offset,
+		dpcd_base_lt_offset,
+		dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
+}
+
 static void print_status_message(
 	struct dc_link *link,
 	const struct link_training_settings *lt_settings,
@@ -1133,6 +1360,17 @@ static void print_status_message(
 				lt_spread);
 }
 
+void dc_link_dp_set_drive_settings(
+	struct dc_link *link,
+	struct link_training_settings *lt_settings)
+{
+	/* program ASIC PHY settings*/
+	dp_set_hw_lane_settings(link, lt_settings, DPRX);
+
+	/* Notify DP sink the PHY settings from source */
+	dpcd_set_lane_settings(link, lt_settings, DPRX);
+}
+
 bool dc_link_dp_perform_link_training_skip_aux(
 	struct dc_link *link,
 	const struct dc_link_settings *link_setting)
@@ -1149,10 +1387,10 @@ bool dc_link_dp_perform_link_training_skip_aux(
 	/* 1. Perform_clock_recovery_sequence. */
 
 	/* transmit training pattern for clock recovery */
-	dp_set_hw_training_pattern(link, pattern_for_cr);
+	dp_set_hw_training_pattern(link, pattern_for_cr, DPRX);
 
 	/* call HWSS to set lane settings*/
-	dp_set_hw_lane_settings(link, &lt_settings);
+	dp_set_hw_lane_settings(link, &lt_settings, DPRX);
 
 	/* wait receiver to lock-on*/
 	wait_for_training_aux_rd_interval(link, lt_settings.cr_pattern_time);
@@ -1160,10 +1398,10 @@ bool dc_link_dp_perform_link_training_skip_aux(
 	/* 2. Perform_channel_equalization_sequence. */
 
 	/* transmit training pattern for channel equalization. */
-	dp_set_hw_training_pattern(link, lt_settings.pattern_for_eq);
+	dp_set_hw_training_pattern(link, lt_settings.pattern_for_eq, DPRX);
 
 	/* call HWSS to set lane settings*/
-	dp_set_hw_lane_settings(link, &lt_settings);
+	dp_set_hw_lane_settings(link, &lt_settings, DPRX);
 
 	/* wait receiver to lock-on. */
 	wait_for_training_aux_rd_interval(link, lt_settings.eq_pattern_time);
@@ -1185,9 +1423,10 @@ enum link_training_result dc_link_dp_perform_link_training(
 {
 	enum link_training_result status = LINK_TRAINING_SUCCESS;
 	struct link_training_settings lt_settings;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+
 	bool fec_enable;
-#endif
+	uint8_t repeater_cnt;
+	uint8_t repeater_id;
 
 	initialize_training_settings(
 			link,
@@ -1198,23 +1437,47 @@ enum link_training_result dc_link_dp_perform_link_training(
 	/* 1. set link rate, lane count and spread. */
 	dpcd_set_link_settings(link, &lt_settings);
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	if (link->preferred_training_settings.fec_enable != NULL)
 		fec_enable = *link->preferred_training_settings.fec_enable;
 	else
 		fec_enable = true;
 
 	dp_set_fec_ready(link, fec_enable);
-#endif
 
+	if (!link->is_lttpr_mode_transparent) {
+		/* Configure lttpr mode */
+		configure_lttpr_mode(link);
+
+		/* 2. perform link training (set link training done
+		 *  to false is done as well)
+		 */
+		repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+
+		for (repeater_id = repeater_cnt; (repeater_id > 0 && status == LINK_TRAINING_SUCCESS);
+				repeater_id--) {
+			status = perform_clock_recovery_sequence(link, &lt_settings, repeater_id);
+
+			if (status != LINK_TRAINING_SUCCESS)
+				break;
+
+			status = perform_channel_equalization_sequence(link,
+					&lt_settings,
+					repeater_id);
+
+			if (status != LINK_TRAINING_SUCCESS)
+				break;
+
+			repeater_training_done(link, repeater_id);
+		}
+	}
 
-	/* 2. perform link training (set link training done
-	 *  to false is done as well)
-	 */
-	status = perform_clock_recovery_sequence(link, &lt_settings);
+	if (status == LINK_TRAINING_SUCCESS) {
+		status = perform_clock_recovery_sequence(link, &lt_settings, DPRX);
 	if (status == LINK_TRAINING_SUCCESS) {
 		status = perform_channel_equalization_sequence(link,
-				&lt_settings);
+					&lt_settings,
+					DPRX);
+		}
 	}
 
 	if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) {
@@ -1233,23 +1496,58 @@ enum link_training_result dc_link_dp_perform_link_training(
 }
 
 bool perform_link_training_with_retries(
-	struct dc_link *link,
 	const struct dc_link_settings *link_setting,
 	bool skip_video_pattern,
-	int attempts)
+	int attempts,
+	struct pipe_ctx *pipe_ctx,
+	enum signal_type signal)
 {
 	uint8_t j;
 	uint8_t delay_between_attempts = LINK_TRAINING_RETRY_DELAY;
+	struct dc_stream_state *stream = pipe_ctx->stream;
+	struct dc_link *link = stream->link;
+	enum dp_panel_mode panel_mode = dp_get_panel_mode(link);
 
 	for (j = 0; j < attempts; ++j) {
 
-		if (dc_link_dp_perform_link_training(
+		dp_enable_link_phy(
+			link,
+			signal,
+			pipe_ctx->clock_source->id,
+			link_setting);
+
+		if (stream->sink_patches.dppowerup_delay > 0) {
+			int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay;
+
+			msleep(delay_dp_power_up_in_ms);
+		}
+
+		dp_set_panel_mode(link, panel_mode);
+
+		/* We need to do this before the link training to ensure the idle pattern in SST
+		 * mode will be sent right after the link training
+		 */
+		link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
+								pipe_ctx->stream_res.stream_enc->id, true);
+
+		if (link->aux_access_disabled) {
+			dc_link_dp_perform_link_training_skip_aux(link, link_setting);
+			return true;
+		} else if (dc_link_dp_perform_link_training(
 				link,
 				link_setting,
 				skip_video_pattern) == LINK_TRAINING_SUCCESS)
 			return true;
 
+		/* latest link training still fail, skip delay and keep PHY on
+		 */
+		if (j == (attempts - 1))
+			break;
+
+		dp_disable_link_phy(link, signal);
+
 		msleep(delay_between_attempts);
+
 		delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
 	}
 
@@ -1321,9 +1619,7 @@ enum link_training_result dc_link_dp_sync_lt_attempt(
 	enum link_training_result lt_status = LINK_TRAINING_SUCCESS;
 	enum dp_panel_mode panel_mode = DP_PANEL_MODE_DEFAULT;
 	enum clock_source_id dp_cs_id = CLOCK_SOURCE_ID_EXTERNAL;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	bool fec_enable = false;
-#endif
 
 	initialize_training_settings(
 		link,
@@ -1343,11 +1639,9 @@ enum link_training_result dc_link_dp_sync_lt_attempt(
 	dp_enable_link_phy(link, link->connector_signal,
 		dp_cs_id, link_settings);
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	/* Set FEC enable */
 	fec_enable = lt_overrides->fec_enable && *lt_overrides->fec_enable;
 	dp_set_fec_ready(link, fec_enable);
-#endif
 
 	if (lt_overrides->alternate_scrambler_reset) {
 		if (*lt_overrides->alternate_scrambler_reset)
@@ -1367,10 +1661,11 @@ enum link_training_result dc_link_dp_sync_lt_attempt(
 	/* 2. perform link training (set link training done
 	 *  to false is done as well)
 	 */
-	lt_status = perform_clock_recovery_sequence(link, &lt_settings);
+	lt_status = perform_clock_recovery_sequence(link, &lt_settings, DPRX);
 	if (lt_status == LINK_TRAINING_SUCCESS) {
 		lt_status = perform_channel_equalization_sequence(link,
-						&lt_settings);
+						&lt_settings,
+						DPRX);
 	}
 
 	/* 3. Sync LT must skip TRAINING_PATTERN_SET:0 (video pattern)*/
@@ -1387,9 +1682,7 @@ bool dc_link_dp_sync_lt_end(struct dc_link *link, bool link_down)
 	 */
 	if (link_down == true) {
 		dp_disable_link_phy(link, link->connector_signal);
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 		dp_set_fec_ready(link, false);
-#endif
 	}
 
 	link->sync_lt_in_progress = false;
@@ -1409,6 +1702,9 @@ static struct dc_link_settings get_max_link_cap(struct dc_link *link)
 	if (link->link_enc->features.flags.bits.IS_HBR3_CAPABLE)
 		max_link_cap.link_rate = LINK_RATE_HIGH3;
 
+	if (link->link_enc->funcs->get_max_link_cap)
+		link->link_enc->funcs->get_max_link_cap(link->link_enc, &max_link_cap);
+
 	/* Lower link settings based on sink's link cap */
 	if (link->reported_link_cap.lane_count < max_link_cap.lane_count)
 		max_link_cap.lane_count =
@@ -1420,6 +1716,22 @@ static struct dc_link_settings get_max_link_cap(struct dc_link *link)
 			max_link_cap.link_spread)
 		max_link_cap.link_spread =
 				link->reported_link_cap.link_spread;
+	/*
+	 * account for lttpr repeaters cap
+	 * notes: repeaters do not snoop in the DPRX Capabilities addresses (3.6.3).
+	 */
+	if (!link->is_lttpr_mode_transparent) {
+		if (link->dpcd_caps.lttpr_caps.max_lane_count < max_link_cap.lane_count)
+			max_link_cap.lane_count = link->dpcd_caps.lttpr_caps.max_lane_count;
+
+		if (link->dpcd_caps.lttpr_caps.max_link_rate < max_link_cap.link_rate)
+			max_link_cap.link_rate = link->dpcd_caps.lttpr_caps.max_link_rate;
+
+		DC_LOG_HW_LINK_TRAINING("%s\n Training with LTTPR,  max_lane count %d max_link rate %d \n",
+						__func__,
+						max_link_cap.lane_count,
+						max_link_cap.link_rate);
+	}
 	return max_link_cap;
 }
 
@@ -1565,6 +1877,13 @@ bool dp_verify_link_cap(
 
 	max_link_cap = get_max_link_cap(link);
 
+	/* Grant extended timeout request */
+	if (!link->is_lttpr_mode_transparent && link->dpcd_caps.lttpr_caps.max_ext_timeout > 0) {
+		uint8_t grant = link->dpcd_caps.lttpr_caps.max_ext_timeout & 0x80;
+
+		core_link_write_dpcd(link, DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT, &grant, sizeof(grant));
+	}
+
 	/* TODO implement override and monitor patch later */
 
 	/* try to train the link from high to low to
@@ -1573,6 +1892,16 @@ bool dp_verify_link_cap(
 	/* disable PHY done possible by BIOS, will be done by driver itself */
 	dp_disable_link_phy(link, link->connector_signal);
 
+	/* Temporary Renoir-specific workaround for SWDEV-215184;
+	 * PHY will sometimes be in bad state on hotplugging display from certain USB-C dongle,
+	 * so add extra cycle of enabling and disabling the PHY before first link training.
+	 */
+	if (link->link_enc->features.flags.bits.DP_IS_USB_C &&
+			link->dc->debug.usbc_combo_phy_reset_wa) {
+		dp_enable_link_phy(link, link->connector_signal, dp_cs_id, cur);
+		dp_disable_link_phy(link, link->connector_signal);
+	}
+
 	dp_cs_id = get_clock_source_id(link);
 
 	/* link training starts with the maximum common settings
@@ -1653,11 +1982,14 @@ bool dp_verify_link_cap_with_retries(
 
 	for (i = 0; i < attempts; i++) {
 		int fail_count = 0;
-		enum dc_connection_type type;
+		enum dc_connection_type type = dc_connection_none;
 
 		memset(&link->verified_link_cap, 0,
 				sizeof(struct dc_link_settings));
-		if (!dc_link_detect_sink(link, &type)) {
+		if (!dc_link_detect_sink(link, &type) || type == dc_connection_none) {
+			link->verified_link_cap.lane_count = LANE_COUNT_ONE;
+			link->verified_link_cap.link_rate = LINK_RATE_LOW;
+			link->verified_link_cap.link_spread = LINK_SPREAD_DISABLED;
 			break;
 		} else if (dp_verify_link_cap(link,
 				&link->reported_link_cap,
@@ -1670,6 +2002,19 @@ bool dp_verify_link_cap_with_retries(
 	return success;
 }
 
+bool dp_verify_mst_link_cap(
+	struct dc_link *link)
+{
+	struct dc_link_settings max_link_cap = {0};
+
+	max_link_cap = get_max_link_cap(link);
+	link->verified_link_cap = get_common_supported_link_settings(
+		link->reported_link_cap,
+		max_link_cap);
+
+	return true;
+}
+
 static struct dc_link_settings get_common_supported_link_settings(
 		struct dc_link_settings link_setting_a,
 		struct dc_link_settings link_setting_b)
@@ -2057,11 +2402,11 @@ static bool allow_hpd_rx_irq(const struct dc_link *link)
 	return false;
 }
 
-static bool handle_hpd_irq_psr_sink(const struct dc_link *link)
+static bool handle_hpd_irq_psr_sink(struct dc_link *link)
 {
 	union dpcd_psr_configuration psr_configuration;
 
-	if (!link->psr_enabled)
+	if (!link->psr_feature_enabled)
 		return false;
 
 	dm_helpers_dp_read_dpcd(
@@ -2100,8 +2445,8 @@ static bool handle_hpd_irq_psr_sink(const struct dc_link *link)
 				sizeof(psr_error_status.raw));
 
 			/* PSR error, disable and re-enable PSR */
-			dc_link_set_psr_enable(link, false, true);
-			dc_link_set_psr_enable(link, true, true);
+			dc_link_set_psr_allow_active(link, false, true);
+			dc_link_set_psr_allow_active(link, true, true);
 
 			return true;
 		} else if (psr_sink_psr_status.bits.SINK_SELF_REFRESH_STATUS ==
@@ -2261,6 +2606,7 @@ static void dp_test_send_phy_test_pattern(struct dc_link *link)
 	dc_link_dp_set_test_pattern(
 		link,
 		test_pattern,
+		DP_TEST_PATTERN_COLOR_SPACE_UNDEFINED,
 		&link_training_settings,
 		test_80_bit_pattern,
 		(DP_TEST_80BIT_CUSTOM_PATTERN_79_72 -
@@ -2272,6 +2618,8 @@ static void dp_test_send_link_test_pattern(struct dc_link *link)
 	union link_test_pattern dpcd_test_pattern;
 	union test_misc dpcd_test_params;
 	enum dp_test_pattern test_pattern;
+	enum dp_test_pattern_color_space test_pattern_color_space =
+			DP_TEST_PATTERN_COLOR_SPACE_UNDEFINED;
 
 	memset(&dpcd_test_pattern, 0, sizeof(dpcd_test_pattern));
 	memset(&dpcd_test_params, 0, sizeof(dpcd_test_params));
@@ -2306,14 +2654,105 @@ static void dp_test_send_link_test_pattern(struct dc_link *link)
 	break;
 	}
 
+	test_pattern_color_space = dpcd_test_params.bits.YCBCR_COEFS ?
+			DP_TEST_PATTERN_COLOR_SPACE_YCBCR709 :
+			DP_TEST_PATTERN_COLOR_SPACE_YCBCR601;
+
 	dc_link_dp_set_test_pattern(
 			link,
 			test_pattern,
+			test_pattern_color_space,
 			NULL,
 			NULL,
 			0);
 }
 
+static void dp_test_get_audio_test_data(struct dc_link *link, bool disable_video)
+{
+	union audio_test_mode            dpcd_test_mode = {0};
+	struct audio_test_pattern_type   dpcd_pattern_type = {0};
+	union audio_test_pattern_period  dpcd_pattern_period[AUDIO_CHANNELS_COUNT] = {0};
+	enum dp_test_pattern test_pattern = DP_TEST_PATTERN_AUDIO_OPERATOR_DEFINED;
+
+	struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx;
+	struct pipe_ctx *pipe_ctx = &pipes[0];
+	unsigned int channel_count;
+	unsigned int channel = 0;
+	unsigned int modes = 0;
+	unsigned int sampling_rate_in_hz = 0;
+
+	// get audio test mode and test pattern parameters
+	core_link_read_dpcd(
+		link,
+		DP_TEST_AUDIO_MODE,
+		&dpcd_test_mode.raw,
+		sizeof(dpcd_test_mode));
+
+	core_link_read_dpcd(
+		link,
+		DP_TEST_AUDIO_PATTERN_TYPE,
+		&dpcd_pattern_type.value,
+		sizeof(dpcd_pattern_type));
+
+	channel_count = dpcd_test_mode.bits.channel_count + 1;
+
+	// read pattern periods for requested channels when sawTooth pattern is requested
+	if (dpcd_pattern_type.value == AUDIO_TEST_PATTERN_SAWTOOTH ||
+			dpcd_pattern_type.value == AUDIO_TEST_PATTERN_OPERATOR_DEFINED) {
+
+		test_pattern = (dpcd_pattern_type.value == AUDIO_TEST_PATTERN_SAWTOOTH) ?
+				DP_TEST_PATTERN_AUDIO_SAWTOOTH : DP_TEST_PATTERN_AUDIO_OPERATOR_DEFINED;
+		// read period for each channel
+		for (channel = 0; channel < channel_count; channel++) {
+			core_link_read_dpcd(
+							link,
+							DP_TEST_AUDIO_PERIOD_CH1 + channel,
+							&dpcd_pattern_period[channel].raw,
+							sizeof(dpcd_pattern_period[channel]));
+		}
+	}
+
+	// translate sampling rate
+	switch (dpcd_test_mode.bits.sampling_rate) {
+	case AUDIO_SAMPLING_RATE_32KHZ:
+		sampling_rate_in_hz = 32000;
+		break;
+	case AUDIO_SAMPLING_RATE_44_1KHZ:
+		sampling_rate_in_hz = 44100;
+		break;
+	case AUDIO_SAMPLING_RATE_48KHZ:
+		sampling_rate_in_hz = 48000;
+		break;
+	case AUDIO_SAMPLING_RATE_88_2KHZ:
+		sampling_rate_in_hz = 88200;
+		break;
+	case AUDIO_SAMPLING_RATE_96KHZ:
+		sampling_rate_in_hz = 96000;
+		break;
+	case AUDIO_SAMPLING_RATE_176_4KHZ:
+		sampling_rate_in_hz = 176400;
+		break;
+	case AUDIO_SAMPLING_RATE_192KHZ:
+		sampling_rate_in_hz = 192000;
+		break;
+	default:
+		sampling_rate_in_hz = 0;
+		break;
+	}
+
+	link->audio_test_data.flags.test_requested = 1;
+	link->audio_test_data.flags.disable_video = disable_video;
+	link->audio_test_data.sampling_rate = sampling_rate_in_hz;
+	link->audio_test_data.channel_count = channel_count;
+	link->audio_test_data.pattern_type = test_pattern;
+
+	if (test_pattern == DP_TEST_PATTERN_AUDIO_SAWTOOTH) {
+		for (modes = 0; modes < pipe_ctx->stream->audio_info.mode_count; modes++) {
+			link->audio_test_data.pattern_period[modes] = dpcd_pattern_period[modes].bits.pattern_period;
+		}
+	}
+}
+
 static void handle_automated_test(struct dc_link *link)
 {
 	union test_request test_request;
@@ -2343,6 +2782,12 @@ static void handle_automated_test(struct dc_link *link)
 		dp_test_send_link_test_pattern(link);
 		test_response.bits.ACK = 1;
 	}
+
+	if (test_request.bits.AUDIO_TEST_PATTERN) {
+		dp_test_get_audio_test_data(link, test_request.bits.TEST_AUDIO_DISABLED_VIDEO);
+		test_response.bits.ACK = 1;
+	}
+
 	if (test_request.bits.PHY_TEST_PATTERN) {
 		dp_test_send_phy_test_pattern(link);
 		test_response.bits.ACK = 1;
@@ -2362,8 +2807,10 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
 	union hpd_irq_data hpd_irq_dpcd_data = { { { {0} } } };
 	union device_service_irq device_service_clear = { { 0 } };
 	enum dc_status result;
-
 	bool status = false;
+	struct pipe_ctx *pipe_ctx;
+	struct dc_link_settings previous_link_settings;
+	int i;
 
 	if (out_link_loss)
 		*out_link_loss = false;
@@ -2426,19 +2873,36 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
 	/* For now we only handle 'Downstream port status' case.
 	 * If we got sink count changed it means
 	 * Downstream port status changed,
-	 * then DM should call DC to do the detection. */
-	if (hpd_rx_irq_check_link_loss_status(
-		link,
-		&hpd_irq_dpcd_data)) {
+	 * then DM should call DC to do the detection.
+	 * NOTE: Do not handle link loss on eDP since it is internal link*/
+	if ((link->connector_signal != SIGNAL_TYPE_EDP) &&
+		hpd_rx_irq_check_link_loss_status(
+			link,
+			&hpd_irq_dpcd_data)) {
 		/* Connectivity log: link loss */
 		CONN_DATA_LINK_LOSS(link,
 					hpd_irq_dpcd_data.raw,
 					sizeof(hpd_irq_dpcd_data),
 					"Status: ");
 
-		perform_link_training_with_retries(link,
-			&link->cur_link_settings,
-			true, LINK_TRAINING_ATTEMPTS);
+		for (i = 0; i < MAX_PIPES; i++) {
+			pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+			if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link)
+				break;
+		}
+
+		if (pipe_ctx == NULL || pipe_ctx->stream == NULL)
+			return false;
+
+		previous_link_settings = link->cur_link_settings;
+
+		perform_link_training_with_retries(&previous_link_settings,
+			true, LINK_TRAINING_ATTEMPTS,
+			pipe_ctx,
+			pipe_ctx->stream->signal);
+
+		if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
+			dc_link_reallocate_mst_payload(link);
 
 		status = false;
 		if (out_link_loss)
@@ -2545,6 +3009,7 @@ static void get_active_converter_info(
 	uint8_t data, struct dc_link *link)
 {
 	union dp_downstream_port_present ds_port = { .byte = data };
+	memset(&link->dpcd_caps.dongle_caps, 0, sizeof(link->dpcd_caps.dongle_caps));
 
 	/* decode converter info*/
 	if (!ds_port.fields.PORT_PRESENT) {
@@ -2666,7 +3131,6 @@ static void dp_wa_power_up_0010FA(struct dc_link *link, uint8_t *dpcd_data,
 		int length)
 {
 	int retry = 0;
-	union dp_downstream_port_present ds_port = { 0 };
 
 	if (!link->dpcd_caps.dpcd_rev.raw) {
 		do {
@@ -2679,9 +3143,6 @@ static void dp_wa_power_up_0010FA(struct dc_link *link, uint8_t *dpcd_data,
 		} while (retry++ < 4 && !link->dpcd_caps.dpcd_rev.raw);
 	}
 
-	ds_port.byte = dpcd_data[DP_DOWNSTREAMPORT_PRESENT -
-				 DP_DPCD_REV];
-
 	if (link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_VGA_CONVERTER) {
 		switch (link->dpcd_caps.branch_dev_id) {
 		/* 0010FA active dongles (DP-VGA, DP-DLDVI converters) power down
@@ -2691,6 +3152,7 @@ static void dp_wa_power_up_0010FA(struct dc_link *link, uint8_t *dpcd_data,
 		 * keep receiver powered all the time.*/
 		case DP_BRANCH_DEVICE_ID_0010FA:
 		case DP_BRANCH_DEVICE_ID_0080E1:
+		case DP_BRANCH_DEVICE_ID_00E04C:
 			link->wa_flags.dp_keep_receiver_powered = true;
 			break;
 
@@ -2705,7 +3167,11 @@ static void dp_wa_power_up_0010FA(struct dc_link *link, uint8_t *dpcd_data,
 
 static bool retrieve_link_cap(struct dc_link *link)
 {
-	uint8_t dpcd_data[DP_ADAPTER_CAP - DP_DPCD_REV + 1];
+	/* DP_ADAPTER_CAP - DP_DPCD_REV + 1 == 16 and also DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT + 1 == 16,
+	 * which means size 16 will be good for both of those DPCD register block reads
+	 */
+	uint8_t dpcd_data[16];
+	uint8_t lttpr_dpcd_data[6];
 
 	/*Only need to read 1 byte starting from DP_DPRX_FEATURE_ENUMERATION_LIST.
 	 */
@@ -2721,7 +3187,19 @@ static bool retrieve_link_cap(struct dc_link *link)
 	int i;
 	struct dp_sink_hw_fw_revision dp_hw_fw_revision;
 
+	/* Set default timeout to 3.2ms and read LTTPR capabilities */
+	bool ext_timeout_support = link->dc->caps.extended_aux_timeout_support &&
+			!link->dc->config.disable_extended_timeout_support;
+
+	link->is_lttpr_mode_transparent = true;
+
+	if (ext_timeout_support) {
+		dc_link_aux_configure_timeout(link->ddc,
+					LINK_AUX_DEFAULT_EXTENDED_TIMEOUT_PERIOD);
+	}
+
 	memset(dpcd_data, '\0', sizeof(dpcd_data));
+	memset(lttpr_dpcd_data, '\0', sizeof(lttpr_dpcd_data));
 	memset(&down_strm_port_count,
 		'\0', sizeof(union down_stream_port_count));
 	memset(&edp_config_cap, '\0',
@@ -2753,6 +3231,52 @@ static bool retrieve_link_cap(struct dc_link *link)
 		return false;
 	}
 
+	if (ext_timeout_support) {
+
+		status = core_link_read_dpcd(
+				link,
+				DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV,
+				lttpr_dpcd_data,
+				sizeof(lttpr_dpcd_data));
+
+		link->dpcd_caps.lttpr_caps.revision.raw =
+				lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		link->dpcd_caps.lttpr_caps.max_link_rate =
+				lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		link->dpcd_caps.lttpr_caps.phy_repeater_cnt =
+				lttpr_dpcd_data[DP_PHY_REPEATER_CNT -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		link->dpcd_caps.lttpr_caps.max_lane_count =
+				lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		link->dpcd_caps.lttpr_caps.mode =
+				lttpr_dpcd_data[DP_PHY_REPEATER_MODE -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		link->dpcd_caps.lttpr_caps.max_ext_timeout =
+				lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		if (link->dpcd_caps.lttpr_caps.phy_repeater_cnt > 0 &&
+				link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
+				link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
+				link->dpcd_caps.lttpr_caps.revision.raw >= 0x14) {
+			link->is_lttpr_mode_transparent = false;
+		} else {
+			/*No lttpr reset timeout to its default value*/
+			link->is_lttpr_mode_transparent = true;
+			dc_link_aux_configure_timeout(link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);
+		}
+
+		CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: ");
+	}
+
 	{
 		union training_aux_rd_interval aux_rd_interval;
 
@@ -2760,7 +3284,7 @@ static bool retrieve_link_cap(struct dc_link *link)
 			dpcd_data[DP_TRAINING_AUX_RD_INTERVAL];
 
 		link->dpcd_caps.ext_receiver_cap_field_present =
-				aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1 ? true:false;
+				aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1;
 
 		if (aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1) {
 			uint8_t ext_cap_data[16];
@@ -2891,7 +3415,6 @@ static bool retrieve_link_cap(struct dc_link *link)
 		dp_hw_fw_revision.ieee_fw_rev,
 		sizeof(dp_hw_fw_revision.ieee_fw_rev));
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	memset(&link->dpcd_caps.dsc_caps, '\0',
 			sizeof(link->dpcd_caps.dsc_caps));
 	memset(&link->dpcd_caps.fec_cap, '\0', sizeof(link->dpcd_caps.fec_cap));
@@ -2913,7 +3436,6 @@ static bool retrieve_link_cap(struct dc_link *link)
 				link->dpcd_caps.dsc_caps.dsc_ext_caps.raw,
 				sizeof(link->dpcd_caps.dsc_caps.dsc_ext_caps.raw));
 	}
-#endif
 
 	/* Connectivity log: detection */
 	CONN_DATA_DETECT(link, dpcd_data, sizeof(dpcd_data), "Rx Caps: ");
@@ -2921,6 +3443,68 @@ static bool retrieve_link_cap(struct dc_link *link)
 	return true;
 }
 
+bool dp_overwrite_extended_receiver_cap(struct dc_link *link)
+{
+	uint8_t dpcd_data[16];
+	uint32_t read_dpcd_retry_cnt = 3;
+	enum dc_status status = DC_ERROR_UNEXPECTED;
+	union dp_downstream_port_present ds_port = { 0 };
+	union down_stream_port_count down_strm_port_count;
+	union edp_configuration_cap edp_config_cap;
+
+	int i;
+
+	for (i = 0; i < read_dpcd_retry_cnt; i++) {
+		status = core_link_read_dpcd(
+				link,
+				DP_DPCD_REV,
+				dpcd_data,
+				sizeof(dpcd_data));
+		if (status == DC_OK)
+			break;
+	}
+
+	link->dpcd_caps.dpcd_rev.raw =
+		dpcd_data[DP_DPCD_REV - DP_DPCD_REV];
+
+	if (dpcd_data[DP_MAX_LANE_COUNT - DP_DPCD_REV] == 0)
+		return false;
+
+	ds_port.byte = dpcd_data[DP_DOWNSTREAMPORT_PRESENT -
+			DP_DPCD_REV];
+
+	get_active_converter_info(ds_port.byte, link);
+
+	down_strm_port_count.raw = dpcd_data[DP_DOWN_STREAM_PORT_COUNT -
+			DP_DPCD_REV];
+
+	link->dpcd_caps.allow_invalid_MSA_timing_param =
+		down_strm_port_count.bits.IGNORE_MSA_TIMING_PARAM;
+
+	link->dpcd_caps.max_ln_count.raw = dpcd_data[
+		DP_MAX_LANE_COUNT - DP_DPCD_REV];
+
+	link->dpcd_caps.max_down_spread.raw = dpcd_data[
+		DP_MAX_DOWNSPREAD - DP_DPCD_REV];
+
+	link->reported_link_cap.lane_count =
+		link->dpcd_caps.max_ln_count.bits.MAX_LANE_COUNT;
+	link->reported_link_cap.link_rate = dpcd_data[
+		DP_MAX_LINK_RATE - DP_DPCD_REV];
+	link->reported_link_cap.link_spread =
+		link->dpcd_caps.max_down_spread.bits.MAX_DOWN_SPREAD ?
+		LINK_SPREAD_05_DOWNSPREAD_30KHZ : LINK_SPREAD_DISABLED;
+
+	edp_config_cap.raw = dpcd_data[
+		DP_EDP_CONFIGURATION_CAP - DP_DPCD_REV];
+	link->dpcd_caps.panel_mode_edp =
+		edp_config_cap.bits.ALT_SCRAMBLER_RESET;
+	link->dpcd_caps.dpcd_display_control_capable =
+		edp_config_cap.bits.DPCD_DISPLAY_CONTROL_CAPABLE;
+
+	return true;
+}
+
 bool detect_dp_sink_caps(struct dc_link *link)
 {
 	return retrieve_link_cap(link);
@@ -3035,21 +3619,20 @@ static bool is_dp_phy_pattern(enum dp_test_pattern test_pattern)
 
 static void set_crtc_test_pattern(struct dc_link *link,
 				struct pipe_ctx *pipe_ctx,
-				enum dp_test_pattern test_pattern)
+				enum dp_test_pattern test_pattern,
+				enum dp_test_pattern_color_space test_pattern_color_space)
 {
 	enum controller_dp_test_pattern controller_test_pattern;
 	enum dc_color_depth color_depth = pipe_ctx->
 		stream->timing.display_color_depth;
 	struct bit_depth_reduction_params params;
 	struct output_pixel_processor *opp = pipe_ctx->stream_res.opp;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	int width = pipe_ctx->stream->timing.h_addressable +
 		pipe_ctx->stream->timing.h_border_left +
 		pipe_ctx->stream->timing.h_border_right;
 	int height = pipe_ctx->stream->timing.v_addressable +
 		pipe_ctx->stream->timing.v_border_bottom +
 		pipe_ctx->stream->timing.v_border_top;
-#endif
 
 	memset(&params, 0, sizeof(params));
 
@@ -3093,10 +3676,29 @@ static void set_crtc_test_pattern(struct dc_link *link,
 		if (pipe_ctx->stream_res.tg->funcs->set_test_pattern)
 			pipe_ctx->stream_res.tg->funcs->set_test_pattern(pipe_ctx->stream_res.tg,
 				controller_test_pattern, color_depth);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 		else if (opp->funcs->opp_set_disp_pattern_generator) {
 			struct pipe_ctx *odm_pipe;
+			enum controller_dp_color_space controller_color_space;
 			int opp_cnt = 1;
+			uint8_t count = 0;
+
+			switch (test_pattern_color_space) {
+			case DP_TEST_PATTERN_COLOR_SPACE_RGB:
+				controller_color_space = CONTROLLER_DP_COLOR_SPACE_RGB;
+				break;
+			case DP_TEST_PATTERN_COLOR_SPACE_YCBCR601:
+				controller_color_space = CONTROLLER_DP_COLOR_SPACE_YCBCR601;
+				break;
+			case DP_TEST_PATTERN_COLOR_SPACE_YCBCR709:
+				controller_color_space = CONTROLLER_DP_COLOR_SPACE_YCBCR709;
+				break;
+			case DP_TEST_PATTERN_COLOR_SPACE_UNDEFINED:
+			default:
+				controller_color_space = CONTROLLER_DP_COLOR_SPACE_UDEFINED;
+				DC_LOG_ERROR("%s: Color space must be defined for test pattern", __func__);
+				ASSERT(0);
+				break;
+			}
 
 			for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
 				opp_cnt++;
@@ -3109,6 +3711,7 @@ static void set_crtc_test_pattern(struct dc_link *link,
 				odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
 				odm_opp->funcs->opp_set_disp_pattern_generator(odm_opp,
 					controller_test_pattern,
+					controller_color_space,
 					color_depth,
 					NULL,
 					width,
@@ -3116,12 +3719,18 @@ static void set_crtc_test_pattern(struct dc_link *link,
 			}
 			opp->funcs->opp_set_disp_pattern_generator(opp,
 				controller_test_pattern,
+				controller_color_space,
 				color_depth,
 				NULL,
 				width,
 				height);
+			/* wait for dpg to blank pixel data with test pattern */
+			for (count = 0; count < 1000; count++)
+				if (opp->funcs->dpg_is_blanked(opp))
+					break;
+				else
+					udelay(100);
 		}
-#endif
 	}
 	break;
 	case DP_TEST_PATTERN_VIDEO_MODE:
@@ -3134,7 +3743,6 @@ static void set_crtc_test_pattern(struct dc_link *link,
 			pipe_ctx->stream_res.tg->funcs->set_test_pattern(pipe_ctx->stream_res.tg,
 				CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
 				color_depth);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 		else if (opp->funcs->opp_set_disp_pattern_generator) {
 			struct pipe_ctx *odm_pipe;
 			int opp_cnt = 1;
@@ -3149,6 +3757,7 @@ static void set_crtc_test_pattern(struct dc_link *link,
 				odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
 				odm_opp->funcs->opp_set_disp_pattern_generator(odm_opp,
 					CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
+					CONTROLLER_DP_COLOR_SPACE_UDEFINED,
 					color_depth,
 					NULL,
 					width,
@@ -3156,12 +3765,12 @@ static void set_crtc_test_pattern(struct dc_link *link,
 			}
 			opp->funcs->opp_set_disp_pattern_generator(opp,
 				CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
+				CONTROLLER_DP_COLOR_SPACE_UDEFINED,
 				color_depth,
 				NULL,
 				width,
 				height);
 		}
-#endif
 	}
 	break;
 
@@ -3173,6 +3782,7 @@ static void set_crtc_test_pattern(struct dc_link *link,
 bool dc_link_dp_set_test_pattern(
 	struct dc_link *link,
 	enum dp_test_pattern test_pattern,
+	enum dp_test_pattern_color_space test_pattern_color_space,
 	const struct link_training_settings *p_link_settings,
 	const unsigned char *p_custom_pattern,
 	unsigned int cust_pattern_size)
@@ -3201,7 +3811,7 @@ bool dc_link_dp_set_test_pattern(
 	if (link->test_pattern_enabled && test_pattern ==
 			DP_TEST_PATTERN_VIDEO_MODE) {
 		/* Set CRTC Test Pattern */
-		set_crtc_test_pattern(link, pipe_ctx, test_pattern);
+		set_crtc_test_pattern(link, pipe_ctx, test_pattern, test_pattern_color_space);
 		dp_set_hw_test_pattern(link, test_pattern,
 				(uint8_t *)p_custom_pattern,
 				(uint32_t)cust_pattern_size);
@@ -3224,8 +3834,8 @@ bool dc_link_dp_set_test_pattern(
 	if (is_dp_phy_pattern(test_pattern)) {
 		/* Set DPCD Lane Settings before running test pattern */
 		if (p_link_settings != NULL) {
-			dp_set_hw_lane_settings(link, p_link_settings);
-			dpcd_set_lane_settings(link, p_link_settings);
+			dp_set_hw_lane_settings(link, p_link_settings, DPRX);
+			dpcd_set_lane_settings(link, p_link_settings, DPRX);
 		}
 
 		/* Blank stream if running test pattern */
@@ -3316,7 +3926,7 @@ bool dc_link_dp_set_test_pattern(
 		}
 	} else {
 	/* CRTC Patterns */
-		set_crtc_test_pattern(link, pipe_ctx, test_pattern);
+		set_crtc_test_pattern(link, pipe_ctx, test_pattern, test_pattern_color_space);
 		/* Set Test Pattern state */
 		link->test_pattern_enabled = true;
 	}
@@ -3436,7 +4046,6 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link)
 	return DP_PANEL_MODE_DEFAULT;
 }
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 void dp_set_fec_ready(struct dc_link *link, bool ready)
 {
 	/* FEC has to be "set ready" before the link training.
@@ -3490,7 +4099,14 @@ void dp_set_fec_enable(struct dc_link *link, bool enable)
 	if (link_enc->funcs->fec_set_enable &&
 			link->dpcd_caps.fec_cap.bits.FEC_CAPABLE) {
 		if (link->fec_state == dc_link_fec_ready && enable) {
-			msleep(1);
+			/* Accord to DP spec, FEC enable sequence can first
+			 * be transmitted anytime after 1000 LL codes have
+			 * been transmitted on the link after link training
+			 * completion. Using 1 lane RBR should have the maximum
+			 * time for transmitting 1000 LL codes which is 6.173 us.
+			 * So use 7 microseconds delay instead.
+			 */
+			udelay(7);
 			link_enc->funcs->fec_set_enable(link_enc, true);
 			link->fec_state = dc_link_fec_enabled;
 		} else if (link->fec_state == dc_link_fec_enabled && !enable) {
@@ -3499,5 +4115,4 @@ void dp_set_fec_enable(struct dc_link *link, bool enable)
 		}
 	}
 }
-#endif
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
index 79438c4f1e20..ddb855045767 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
@@ -12,12 +12,38 @@
 #include "dc_link_ddc.h"
 #include "dm_helpers.h"
 #include "dpcd_defs.h"
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #include "dsc.h"
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #include "resource.h"
-#endif
+
+static uint8_t convert_to_count(uint8_t lttpr_repeater_count)
+{
+	switch (lttpr_repeater_count) {
+	case 0x80: // 1 lttpr repeater
+		return 1;
+	case 0x40: // 2 lttpr repeaters
+		return 2;
+	case 0x20: // 3 lttpr repeaters
+		return 3;
+	case 0x10: // 4 lttpr repeaters
+		return 4;
+	case 0x08: // 5 lttpr repeaters
+		return 5;
+	case 0x04: // 6 lttpr repeaters
+		return 6;
+	case 0x02: // 7 lttpr repeaters
+		return 7;
+	case 0x01: // 8 lttpr repeaters
+		return 8;
+	default:
+		break;
+	}
+	return 0; // invalid value
+}
+
+static inline bool is_immediate_downstream(struct dc_link *link, uint32_t offset)
+{
+	return (convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == offset);
+}
 
 enum dc_status core_link_read_dpcd(
 	struct dc_link *link,
@@ -69,8 +95,8 @@ void dp_enable_link_phy(
 	const struct dc_link_settings *link_settings)
 {
 	struct link_encoder *link_enc = link->link_enc;
-	struct dc  *core_dc = link->ctx->dc;
-	struct dmcu *dmcu = core_dc->res_pool->dmcu;
+	struct dc  *dc = link->ctx->dc;
+	struct dmcu *dmcu = dc->res_pool->dmcu;
 
 	struct pipe_ctx *pipes =
 			link->dc->current_state->res_ctx.pipe_ctx;
@@ -147,15 +173,20 @@ bool edp_receiver_ready_T9(struct dc_link *link)
 }
 bool edp_receiver_ready_T7(struct dc_link *link)
 {
-	unsigned int tries = 0;
 	unsigned char sinkstatus = 0;
 	unsigned char edpRev = 0;
 	enum dc_status result = DC_OK;
 
+	/* use absolute time stamp to constrain max T7*/
+	unsigned long long enter_timestamp = 0;
+	unsigned long long finish_timestamp = 0;
+	unsigned long long time_taken_in_ns = 0;
+
 	result = core_link_read_dpcd(link, DP_EDP_DPCD_REV, &edpRev, sizeof(edpRev));
 	if (result == DC_OK && edpRev < DP_EDP_12)
 		return true;
 	/* start from eDP version 1.2, SINK_STAUS indicate the sink is ready.*/
+	enter_timestamp = dm_get_timestamp(link->ctx);
 	do {
 		sinkstatus = 0;
 		result = core_link_read_dpcd(link, DP_SINK_STATUS, &sinkstatus, sizeof(sinkstatus));
@@ -163,8 +194,10 @@ bool edp_receiver_ready_T7(struct dc_link *link)
 			break;
 		if (result != DC_OK)
 			break;
-		udelay(25); //MAx T7 is 50ms
-	} while (++tries < 300);
+		udelay(25);
+		finish_timestamp = dm_get_timestamp(link->ctx);
+		time_taken_in_ns = dm_get_elapse_time_in_ns(link->ctx, finish_timestamp, enter_timestamp);
+	} while (time_taken_in_ns < 50 * 1000000); //MAx T7 is 50ms
 
 	if (link->local_sink->edid_caps.panel_patch.extra_t7_ms > 0)
 		udelay(link->local_sink->edid_caps.panel_patch.extra_t7_ms * 1000);
@@ -174,8 +207,8 @@ bool edp_receiver_ready_T7(struct dc_link *link)
 
 void dp_disable_link_phy(struct dc_link *link, enum signal_type signal)
 {
-	struct dc  *core_dc = link->ctx->dc;
-	struct dmcu *dmcu = core_dc->res_pool->dmcu;
+	struct dc  *dc = link->ctx->dc;
+	struct dmcu *dmcu = dc->res_pool->dmcu;
 
 	if (!link->wa_flags.dp_keep_receiver_powered)
 		dp_receiver_power_ctrl(link, false);
@@ -212,7 +245,8 @@ void dp_disable_link_phy_mst(struct dc_link *link, enum signal_type signal)
 
 bool dp_set_hw_training_pattern(
 	struct dc_link *link,
-	enum dc_dp_training_pattern pattern)
+	enum dc_dp_training_pattern pattern,
+	uint32_t offset)
 {
 	enum dp_test_pattern test_pattern = DP_TEST_PATTERN_UNSUPPORTED;
 
@@ -240,10 +274,14 @@ bool dp_set_hw_training_pattern(
 
 void dp_set_hw_lane_settings(
 	struct dc_link *link,
-	const struct link_training_settings *link_settings)
+	const struct link_training_settings *link_settings,
+	uint32_t offset)
 {
 	struct link_encoder *encoder = link->link_enc;
 
+	if (!link->is_lttpr_mode_transparent && !is_immediate_downstream(link, offset))
+		return;
+
 	/* call Encoder to set lane settings */
 	encoder->funcs->dp_set_lane_settings(encoder, link_settings);
 }
@@ -277,7 +315,8 @@ void dp_retrain_link_dp_test(struct dc_link *link,
 		if (pipes[i].stream != NULL &&
 			!pipes[i].top_pipe && !pipes[i].prev_odm_pipe &&
 			pipes[i].stream->link != NULL &&
-			pipes[i].stream_res.stream_enc != NULL) {
+			pipes[i].stream_res.stream_enc != NULL &&
+			pipes[i].stream->link == link) {
 			udelay(100);
 
 			pipes[i].stream_res.stream_enc->funcs->dp_blank(
@@ -301,20 +340,12 @@ void dp_retrain_link_dp_test(struct dc_link *link,
 			memset(&link->cur_link_settings, 0,
 				sizeof(link->cur_link_settings));
 
-			link->link_enc->funcs->enable_dp_output(
-						link->link_enc,
-						link_setting,
-						pipes[i].clock_source->id);
-			link->cur_link_settings = *link_setting;
-
-			dp_receiver_power_ctrl(link, true);
-
 			perform_link_training_with_retries(
-					link,
 					link_setting,
 					skip_video_pattern,
-					LINK_TRAINING_ATTEMPTS);
-
+					LINK_TRAINING_ATTEMPTS,
+					&pipes[i],
+					SIGNAL_TYPE_DISPLAY_PORT);
 
 			link->dc->hwss.enable_stream(&pipes[i]);
 
@@ -338,7 +369,6 @@ void dp_retrain_link_dp_test(struct dc_link *link,
 	}
 }
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #define DC_LOGGER \
 	dsc->ctx->logger
 static void dsc_optc_config_log(struct display_stream_compressor *dsc,
@@ -364,14 +394,14 @@ static void dsc_optc_config_log(struct display_stream_compressor *dsc,
 
 static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable)
 {
-	struct dc *core_dc = pipe_ctx->stream->ctx->dc;
+	struct dc *dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	bool result = false;
 
-	if (IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment))
+	if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
 		result = true;
 	else
-		result = dm_helpers_dp_write_dsc_enable(core_dc->ctx, stream, enable);
+		result = dm_helpers_dp_write_dsc_enable(dc->ctx, stream, enable);
 	return result;
 }
 
@@ -381,7 +411,7 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable)
 void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 {
 	struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
-	struct dc *core_dc = pipe_ctx->stream->ctx->dc;
+	struct dc *dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct pipe_ctx *odm_pipe;
 	int opp_cnt = 1;
@@ -417,7 +447,7 @@ void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 		optc_dsc_mode = dsc_optc_cfg.is_pixel_format_444 ? OPTC_DSC_ENABLED_444 : OPTC_DSC_ENABLED_NATIVE_SUBSAMPLED;
 
 		/* Enable DSC in encoder */
-		if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+		if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 			DC_LOG_DSC("Setting stream encoder DSC config for engine %d:", (int)pipe_ctx->stream_res.stream_enc->id);
 			dsc_optc_config_log(dsc, &dsc_optc_cfg);
 			pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config(pipe_ctx->stream_res.stream_enc,
@@ -442,7 +472,7 @@ void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 				OPTC_DSC_DISABLED, 0, 0);
 
 		/* disable DSC in stream encoder */
-		if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+		if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 			pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config(
 					pipe_ctx->stream_res.stream_enc,
 					OPTC_DSC_DISABLED, 0, 0);
@@ -485,7 +515,7 @@ out:
 bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable)
 {
 	struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
-	struct dc *core_dc = pipe_ctx->stream->ctx->dc;
+	struct dc *dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 
 	if (!pipe_ctx->stream->timing.flags.DSC || !dsc)
@@ -495,6 +525,9 @@ bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable)
 		struct dsc_config dsc_cfg;
 		uint8_t dsc_packed_pps[128];
 
+		memset(&dsc_cfg, 0, sizeof(dsc_cfg));
+		memset(dsc_packed_pps, 0, 128);
+
 		/* Enable DSC hw block */
 		dsc_cfg.pic_width = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
 		dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
@@ -504,7 +537,7 @@ bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable)
 
 		DC_LOG_DSC(" ");
 		dsc->funcs->dsc_get_packed_pps(dsc, &dsc_cfg, &dsc_packed_pps[0]);
-		if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+		if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 			DC_LOG_DSC("Setting stream encoder DSC PPS SDP for engine %d\n", (int)pipe_ctx->stream_res.stream_enc->id);
 			pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_pps_info_packet(
 									pipe_ctx->stream_res.stream_enc,
@@ -513,7 +546,7 @@ bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable)
 		}
 	} else {
 		/* disable DSC PPS in stream encoder */
-		if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+		if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 			pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_pps_info_packet(
 						pipe_ctx->stream_res.stream_enc, false, NULL);
 		}
@@ -536,5 +569,4 @@ bool dp_update_dsc_config(struct pipe_ctx *pipe_ctx)
 	dp_set_dsc_pps_sdp(pipe_ctx, true);
 	return true;
 }
-#endif
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index f25ac17f47fa..a0eb9e533a61 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -46,15 +46,11 @@
 #include "dce100/dce100_resource.h"
 #include "dce110/dce110_resource.h"
 #include "dce112/dce112_resource.h"
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 #include "dcn10/dcn10_resource.h"
 #endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #include "dcn20/dcn20_resource.h"
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #include "dcn21/dcn21_resource.h"
-#endif
 #include "dce120/dce120_resource.h"
 
 #define DC_LOGGER_INIT(logger)
@@ -99,23 +95,19 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
 		else
 			dc_version = DCE_VERSION_12_0;
 		break;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	case FAMILY_RV:
 		dc_version = DCN_VERSION_1_0;
 		if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev))
 			dc_version = DCN_VERSION_1_01;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 		if (ASICREV_IS_RENOIR(asic_id.hw_internal_rev))
 			dc_version = DCN_VERSION_2_1;
-#endif
 		break;
 #endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	case FAMILY_NV:
 		dc_version = DCN_VERSION_2_0;
 		break;
-#endif
 	default:
 		dc_version = DCE_VERSION_UNKNOWN;
 		break;
@@ -162,20 +154,16 @@ struct resource_pool *dc_create_resource_pool(struct dc  *dc,
 				init_data->num_virtual_links, dc);
 		break;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	case DCN_VERSION_1_0:
 	case DCN_VERSION_1_01:
 		res_pool = dcn10_create_resource_pool(init_data, dc);
 		break;
-#endif
 
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	case DCN_VERSION_2_0:
 		res_pool = dcn20_create_resource_pool(init_data, dc);
 		break;
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	case DCN_VERSION_2_1:
 		res_pool = dcn21_create_resource_pool(init_data, dc);
 		break;
@@ -951,19 +939,52 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx)
 	data->inits.v_c_bot = dc_fixpt_add(data->inits.v_c, data->ratios.vert_c);
 
 }
-static bool are_rect_integer_multiples(struct rect src, struct rect dest)
+
+/*
+ * When handling 270 rotation in mixed SLS mode, we have
+ * stream->timing.h_border_left that is non zero.  If we are doing
+ * pipe-splitting, this h_border_left value gets added to recout.x and when it
+ * calls calculate_inits_and_adj_vp() and
+ * adjust_vp_and_init_for_seamless_clip(), it can cause viewport.height for a
+ * pipe to be incorrect.
+ *
+ * To fix this, instead of using stream->timing.h_border_left, we can use
+ * stream->dst.x to represent the border instead.  So we will set h_border_left
+ * to 0 and shift the appropriate amount in stream->dst.x.  We will then
+ * perform all calculations in resource_build_scaling_params() based on this
+ * and then restore the h_border_left and stream->dst.x to their original
+ * values.
+ *
+ * shift_border_left_to_dst() will shift the amount of h_border_left to
+ * stream->dst.x and set h_border_left to 0.  restore_border_left_from_dst()
+ * will restore h_border_left and stream->dst.x back to their original values
+ * We also need to make sure pipe_ctx->plane_res.scl_data.h_active uses the
+ * original h_border_left value in its calculation.
+ */
+int shift_border_left_to_dst(struct pipe_ctx *pipe_ctx)
 {
-	if (dest.width  >= src.width  && dest.width  % src.width  == 0 &&
-		dest.height >= src.height && dest.height % src.height == 0)
-		return true;
+	int store_h_border_left = pipe_ctx->stream->timing.h_border_left;
 
-	return false;
+	if (store_h_border_left) {
+		pipe_ctx->stream->timing.h_border_left = 0;
+		pipe_ctx->stream->dst.x += store_h_border_left;
+	}
+	return store_h_border_left;
 }
+
+void restore_border_left_from_dst(struct pipe_ctx *pipe_ctx,
+                                  int store_h_border_left)
+{
+	pipe_ctx->stream->dst.x -= store_h_border_left;
+	pipe_ctx->stream->timing.h_border_left = store_h_border_left;
+}
+
 bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
 {
 	const struct dc_plane_state *plane_state = pipe_ctx->plane_state;
 	struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
 	bool res = false;
+	int store_h_border_left = shift_border_left_to_dst(pipe_ctx);
 	DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
 	/* Important: scaling ratio calculation requires pixel format,
 	 * lb depth calculation requires recout and taps require scaling ratios.
@@ -976,8 +997,14 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
 
 	calculate_viewport(pipe_ctx);
 
-	if (pipe_ctx->plane_res.scl_data.viewport.height < 16 || pipe_ctx->plane_res.scl_data.viewport.width < 16)
+	if (pipe_ctx->plane_res.scl_data.viewport.height < 16 ||
+		pipe_ctx->plane_res.scl_data.viewport.width < 16) {
+		if (store_h_border_left) {
+			restore_border_left_from_dst(pipe_ctx,
+				store_h_border_left);
+		}
 		return false;
+	}
 
 	calculate_recout(pipe_ctx);
 
@@ -990,8 +1017,10 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
 	pipe_ctx->plane_res.scl_data.recout.x += timing->h_border_left;
 	pipe_ctx->plane_res.scl_data.recout.y += timing->v_border_top;
 
-	pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right;
-	pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
+	pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable +
+		store_h_border_left + timing->h_border_right;
+	pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable +
+		timing->v_border_top + timing->v_border_bottom;
 
 	/* Taps calculations */
 	if (pipe_ctx->plane_res.xfm != NULL)
@@ -1002,13 +1031,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
 		res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps(
 				pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality);
 
-	if (res &&
-	    plane_state->scaling_quality.integer_scaling &&
-	    are_rect_integer_multiples(pipe_ctx->plane_res.scl_data.viewport,
-				       pipe_ctx->plane_res.scl_data.recout)) {
-		pipe_ctx->plane_res.scl_data.taps.v_taps = 1;
-		pipe_ctx->plane_res.scl_data.taps.h_taps = 1;
-	}
 
 	if (!res) {
 		/* Try 24 bpp linebuffer */
@@ -1045,6 +1067,9 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
 				plane_state->dst_rect.x,
 				plane_state->dst_rect.y);
 
+	if (store_h_border_left)
+		restore_border_left_from_dst(pipe_ctx, store_h_border_left);
+
 	return res;
 }
 
@@ -1190,7 +1215,7 @@ static struct pipe_ctx *acquire_free_pipe_for_head(
 	return pool->funcs->acquire_idle_pipe_for_layer(context, pool, head_pipe->stream);
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 static int acquire_first_split_pipe(
 		struct resource_context *res_ctx,
 		const struct resource_pool *pool,
@@ -1271,7 +1296,7 @@ bool dc_add_plane_to_context(
 
 		free_pipe = acquire_free_pipe_for_head(context, pool, head_pipe);
 
-	#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+	#if defined(CONFIG_DRM_AMD_DC_DCN)
 		if (!free_pipe) {
 			int pipe_idx = acquire_first_split_pipe(&context->res_ctx, pool, stream);
 			if (pipe_idx >= 0)
@@ -1635,7 +1660,8 @@ static int acquire_first_free_pipe(
 static struct audio *find_first_free_audio(
 		struct resource_context *res_ctx,
 		const struct resource_pool *pool,
-		enum engine_id id)
+		enum engine_id id,
+		enum dce_version dc_version)
 {
 	int i, available_audio_count;
 
@@ -1854,31 +1880,31 @@ static int acquire_resource_from_hw_enabled_state(
 		struct dc_stream_state *stream)
 {
 	struct dc_link *link = stream->link;
-	unsigned int inst, tg_inst;
+	unsigned int i, inst, tg_inst = 0;
 
 	/* Check for enabled DIG to identify enabled display */
 	if (!link->link_enc->funcs->is_dig_enabled(link->link_enc))
 		return -1;
 
-	/* Check for which front end is used by this encoder.
-	 * Note the inst is 1 indexed, where 0 is undefined.
-	 * Note that DIG_FE can source from different OTG but our
-	 * current implementation always map 1-to-1, so this code makes
-	 * the same assumption and doesn't check OTG source.
-	 */
 	inst = link->link_enc->funcs->get_dig_frontend(link->link_enc);
 
-	/* Instance should be within the range of the pool */
-	if (inst >= pool->pipe_count)
+	if (inst == ENGINE_ID_UNKNOWN)
 		return -1;
 
-	if (inst >= pool->stream_enc_count)
-		return -1;
+	for (i = 0; i < pool->stream_enc_count; i++) {
+		if (pool->stream_enc[i]->id == inst) {
+			tg_inst = pool->stream_enc[i]->funcs->dig_source_otg(
+				pool->stream_enc[i]);
+			break;
+		}
+	}
 
-	tg_inst = pool->stream_enc[inst]->funcs->dig_source_otg(pool->stream_enc[inst]);
+	// tg_inst not found
+	if (i == pool->stream_enc_count)
+		return -1;
 
 	if (tg_inst >= pool->timing_generator_count)
-		return false;
+		return -1;
 
 	if (!res_ctx->pipe_ctx[tg_inst].stream) {
 		struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[tg_inst];
@@ -1891,8 +1917,26 @@ static int acquire_resource_from_hw_enabled_state(
 		pipe_ctx->plane_res.dpp = pool->dpps[tg_inst];
 		pipe_ctx->stream_res.opp = pool->opps[tg_inst];
 
-		if (pool->dpps[tg_inst])
+		if (pool->dpps[tg_inst]) {
 			pipe_ctx->plane_res.mpcc_inst = pool->dpps[tg_inst]->inst;
+
+			// Read DPP->MPCC->OPP Pipe from HW State
+			if (pool->mpc->funcs->read_mpcc_state) {
+				struct mpcc_state s = {0};
+
+				pool->mpc->funcs->read_mpcc_state(pool->mpc, pipe_ctx->plane_res.mpcc_inst, &s);
+
+				if (s.dpp_id < MAX_MPCC)
+					pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].dpp_id = s.dpp_id;
+
+				if (s.bot_mpcc_id < MAX_MPCC)
+					pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].mpcc_bot =
+							&pool->mpc->mpcc_array[s.bot_mpcc_id];
+
+				if (s.opp_id < MAX_OPP)
+					pipe_ctx->stream_res.opp->mpc_tree_params.opp_id = s.opp_id;
+			}
+		}
 		pipe_ctx->pipe_idx = tg_inst;
 
 		pipe_ctx->stream = stream;
@@ -1944,7 +1988,7 @@ enum dc_status resource_map_pool_resources(
 		/* acquire new resources */
 		pipe_idx = acquire_first_free_pipe(&context->res_ctx, pool, stream);
 
-#ifdef CONFIG_DRM_AMD_DC_DCN1_0
+#ifdef CONFIG_DRM_AMD_DC_DCN
 	if (pipe_idx < 0)
 		pipe_idx = acquire_first_split_pipe(&context->res_ctx, pool, stream);
 #endif
@@ -1971,7 +2015,7 @@ enum dc_status resource_map_pool_resources(
 	    dc_is_audio_capable_signal(pipe_ctx->stream->signal) &&
 	    stream->audio_info.mode_count && stream->audio_info.flags.all) {
 		pipe_ctx->stream_res.audio = find_first_free_audio(
-		&context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id);
+		&context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id, dc_ctx->dce_version);
 
 		/*
 		 * Audio assigned in order first come first get.
@@ -2022,6 +2066,13 @@ void dc_resource_state_construct(
 	dst_ctx->clk_mgr = dc->clk_mgr;
 }
 
+
+bool dc_resource_is_dsc_encoding_supported(const struct dc *dc)
+{
+	return dc->res_pool->res_cap->num_dsc > 0;
+}
+
+
 /**
  * dc_validate_global_state() - Determine if HW can support a given state
  * Checks HW resource availability and bandwidth requirement.
@@ -2278,7 +2329,7 @@ static void set_avi_info_frame(
 		if (color_space == COLOR_SPACE_SRGB ||
 			color_space == COLOR_SPACE_2020_RGB_FULLRANGE) {
 			hdmi_info.bits.Q0_Q1   = RGB_QUANTIZATION_FULL_RANGE;
-			hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_FULL_RANGE;
+			hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
 		} else if (color_space == COLOR_SPACE_SRGB_LIMITED ||
 					color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE) {
 			hdmi_info.bits.Q0_Q1   = RGB_QUANTIZATION_LIMITED_RANGE;
@@ -2744,9 +2795,8 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
 
 enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream)
 {
-	struct dc  *core_dc = dc;
 	struct dc_link *link = stream->link;
-	struct timing_generator *tg = core_dc->res_pool->timing_generators[0];
+	struct timing_generator *tg = dc->res_pool->timing_generators[0];
 	enum dc_status res = DC_OK;
 
 	calculate_phy_pix_clks(stream);
@@ -2809,3 +2859,48 @@ unsigned int resource_pixel_format_to_bpp(enum surface_pixel_format format)
 		return -1;
 	}
 }
+static unsigned int get_max_audio_sample_rate(struct audio_mode *modes)
+{
+	if (modes) {
+		if (modes->sample_rates.rate.RATE_192)
+			return 192000;
+		if (modes->sample_rates.rate.RATE_176_4)
+			return 176400;
+		if (modes->sample_rates.rate.RATE_96)
+			return 96000;
+		if (modes->sample_rates.rate.RATE_88_2)
+			return 88200;
+		if (modes->sample_rates.rate.RATE_48)
+			return 48000;
+		if (modes->sample_rates.rate.RATE_44_1)
+			return 44100;
+		if (modes->sample_rates.rate.RATE_32)
+			return 32000;
+	}
+	/*original logic when no audio info*/
+	return 441000;
+}
+
+void get_audio_check(struct audio_info *aud_modes,
+	struct audio_check *audio_chk)
+{
+	unsigned int i;
+	unsigned int max_sample_rate = 0;
+
+	if (aud_modes) {
+		audio_chk->audio_packet_type = 0x2;/*audio sample packet AP = .25 for layout0, 1 for layout1*/
+
+		audio_chk->max_audiosample_rate = 0;
+		for (i = 0; i < aud_modes->mode_count; i++) {
+			max_sample_rate = get_max_audio_sample_rate(&aud_modes->modes[i]);
+			if (audio_chk->max_audiosample_rate < max_sample_rate)
+				audio_chk->max_audiosample_rate = max_sample_rate;
+			/*dts takes the same as type 2: AP = 0.25*/
+		}
+		/*check which one take more bandwidth*/
+		if (audio_chk->max_audiosample_rate > 192000)
+			audio_chk->audio_packet_type = 0x9;/*AP =1*/
+		audio_chk->acat = 0;/*not support*/
+	}
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
index 5cbfdf1c4b11..a249a0e5edd0 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
@@ -33,7 +33,7 @@
  * Private functions
  ******************************************************************************/
 
-static void destruct(struct dc_sink *sink)
+static void dc_sink_destruct(struct dc_sink *sink)
 {
 	if (sink->dc_container_id) {
 		kfree(sink->dc_container_id);
@@ -41,7 +41,7 @@ static void destruct(struct dc_sink *sink)
 	}
 }
 
-static bool construct(struct dc_sink *sink, const struct dc_sink_init_data *init_params)
+static bool dc_sink_construct(struct dc_sink *sink, const struct dc_sink_init_data *init_params)
 {
 
 	struct dc_link *link = init_params->link;
@@ -75,7 +75,7 @@ void dc_sink_retain(struct dc_sink *sink)
 static void dc_sink_free(struct kref *kref)
 {
 	struct dc_sink *sink = container_of(kref, struct dc_sink, refcount);
-	destruct(sink);
+	dc_sink_destruct(sink);
 	kfree(sink);
 }
 
@@ -91,7 +91,7 @@ struct dc_sink *dc_sink_create(const struct dc_sink_init_data *init_params)
 	if (NULL == sink)
 		goto alloc_fail;
 
-	if (false == construct(sink, init_params))
+	if (false == dc_sink_construct(sink, init_params))
 		goto construct_fail;
 
 	kref_init(&sink->refcount);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index bf1d7bb90e0f..6ddbb00ed37a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -27,14 +27,12 @@
 #include <linux/slab.h>
 
 #include "dm_services.h"
+#include "basics/dc_common.h"
 #include "dc.h"
 #include "core_types.h"
 #include "resource.h"
 #include "ipp.h"
 #include "timing_generator.h"
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
-#include "dcn10/dcn10_hw_sequencer.h"
-#endif
 
 #define DC_LOGGER dc->ctx->logger
 
@@ -58,7 +56,7 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink)
 	}
 }
 
-static void construct(struct dc_stream_state *stream,
+static void dc_stream_construct(struct dc_stream_state *stream,
 	struct dc_sink *dc_sink_data)
 {
 	uint32_t i = 0;
@@ -108,7 +106,6 @@ static void construct(struct dc_stream_state *stream,
 	/* EDID CAP translation for HDMI 2.0 */
 	stream->timing.flags.LTE_340MCSC_SCRAMBLE = dc_sink_data->edid_caps.lte_340mcsc_scramble;
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	memset(&stream->timing.dsc_cfg, 0, sizeof(stream->timing.dsc_cfg));
 	stream->timing.dsc_cfg.num_slices_h = 0;
 	stream->timing.dsc_cfg.num_slices_v = 0;
@@ -117,7 +114,6 @@ static void construct(struct dc_stream_state *stream,
 	stream->timing.dsc_cfg.linebuf_depth = 9;
 	stream->timing.dsc_cfg.version_minor = 2;
 	stream->timing.dsc_cfg.ycbcr422_simple = 0;
-#endif
 
 	update_stream_signal(stream, dc_sink_data);
 
@@ -129,7 +125,7 @@ static void construct(struct dc_stream_state *stream,
 	stream->ctx->dc_stream_id_count++;
 }
 
-static void destruct(struct dc_stream_state *stream)
+static void dc_stream_destruct(struct dc_stream_state *stream)
 {
 	dc_sink_release(stream->sink);
 	if (stream->out_transfer_func != NULL) {
@@ -147,7 +143,7 @@ static void dc_stream_free(struct kref *kref)
 {
 	struct dc_stream_state *stream = container_of(kref, struct dc_stream_state, refcount);
 
-	destruct(stream);
+	dc_stream_destruct(stream);
 	kfree(stream);
 }
 
@@ -170,7 +166,7 @@ struct dc_stream_state *dc_create_stream_for_sink(
 	if (stream == NULL)
 		return NULL;
 
-	construct(stream, sink);
+	dc_stream_construct(stream, sink);
 
 	kref_init(&stream->refcount);
 
@@ -237,7 +233,7 @@ struct dc_stream_status *dc_stream_get_status(
 
 static void delay_cursor_until_vupdate(struct pipe_ctx *pipe_ctx, struct dc *dc)
 {
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	unsigned int vupdate_line;
 	unsigned int lines_to_vupdate, us_to_vupdate, vpos, nvpos;
 	struct dc_stream_state *stream = pipe_ctx->stream;
@@ -246,7 +242,7 @@ static void delay_cursor_until_vupdate(struct pipe_ctx *pipe_ctx, struct dc *dc)
 	if (stream->ctx->asic_id.chip_family == FAMILY_RV &&
 			ASICREV_IS_RAVEN(stream->ctx->asic_id.hw_internal_rev)) {
 
-		vupdate_line = get_vupdate_offset_from_vsync(pipe_ctx);
+		vupdate_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
 		if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos))
 			return;
 
@@ -272,7 +268,7 @@ bool dc_stream_set_cursor_attributes(
 	const struct dc_cursor_attributes *attributes)
 {
 	int i;
-	struct dc  *core_dc;
+	struct dc  *dc;
 	struct resource_context *res_ctx;
 	struct pipe_ctx *pipe_to_program = NULL;
 
@@ -290,8 +286,8 @@ bool dc_stream_set_cursor_attributes(
 		return false;
 	}
 
-	core_dc = stream->ctx->dc;
-	res_ctx = &core_dc->current_state->res_ctx;
+	dc = stream->ctx->dc;
+	res_ctx = &dc->current_state->res_ctx;
 	stream->cursor_attributes = *attributes;
 
 	for (i = 0; i < MAX_PIPES; i++) {
@@ -303,17 +299,17 @@ bool dc_stream_set_cursor_attributes(
 		if (!pipe_to_program) {
 			pipe_to_program = pipe_ctx;
 
-			delay_cursor_until_vupdate(pipe_ctx, core_dc);
-			core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true);
+			delay_cursor_until_vupdate(pipe_ctx, dc);
+			dc->hwss.pipe_control_lock(dc, pipe_to_program, true);
 		}
 
-		core_dc->hwss.set_cursor_attribute(pipe_ctx);
-		if (core_dc->hwss.set_cursor_sdr_white_level)
-			core_dc->hwss.set_cursor_sdr_white_level(pipe_ctx);
+		dc->hwss.set_cursor_attribute(pipe_ctx);
+		if (dc->hwss.set_cursor_sdr_white_level)
+			dc->hwss.set_cursor_sdr_white_level(pipe_ctx);
 	}
 
 	if (pipe_to_program)
-		core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, false);
+		dc->hwss.pipe_control_lock(dc, pipe_to_program, false);
 
 	return true;
 }
@@ -323,7 +319,7 @@ bool dc_stream_set_cursor_position(
 	const struct dc_cursor_position *position)
 {
 	int i;
-	struct dc  *core_dc;
+	struct dc  *dc;
 	struct resource_context *res_ctx;
 	struct pipe_ctx *pipe_to_program = NULL;
 
@@ -337,8 +333,8 @@ bool dc_stream_set_cursor_position(
 		return false;
 	}
 
-	core_dc = stream->ctx->dc;
-	res_ctx = &core_dc->current_state->res_ctx;
+	dc = stream->ctx->dc;
+	res_ctx = &dc->current_state->res_ctx;
 	stream->cursor_position = *position;
 
 	for (i = 0; i < MAX_PIPES; i++) {
@@ -354,20 +350,19 @@ bool dc_stream_set_cursor_position(
 		if (!pipe_to_program) {
 			pipe_to_program = pipe_ctx;
 
-			delay_cursor_until_vupdate(pipe_ctx, core_dc);
-			core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true);
+			delay_cursor_until_vupdate(pipe_ctx, dc);
+			dc->hwss.pipe_control_lock(dc, pipe_to_program, true);
 		}
 
-		core_dc->hwss.set_cursor_position(pipe_ctx);
+		dc->hwss.set_cursor_position(pipe_ctx);
 	}
 
 	if (pipe_to_program)
-		core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, false);
+		dc->hwss.pipe_control_lock(dc, pipe_to_program, false);
 
 	return true;
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 bool dc_stream_add_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		struct dc_writeback_info *wb_info)
@@ -411,25 +406,30 @@ bool dc_stream_add_writeback(struct dc *dc,
 		stream->writeback_info[stream->num_wb_info++] = *wb_info;
 	}
 
-	if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
-		dm_error("DC: update_bandwidth failed!\n");
-		return false;
-	}
-
-	/* enable writeback */
 	if (dc->hwss.enable_writeback) {
 		struct dc_stream_status *stream_status = dc_stream_get_status(stream);
 		struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
+		dwb->otg_inst = stream_status->primary_otg_inst;
+	}
+	if (IS_DIAG_DC(dc->ctx->dce_environment)) {
+		if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+			dm_error("DC: update_bandwidth failed!\n");
+			return false;
+		}
 
-		if (dwb->funcs->is_enabled(dwb)) {
-			/* writeback pipe already enabled, only need to update */
-			dc->hwss.update_writeback(dc, stream_status, wb_info);
-		} else {
-			/* Enable writeback pipe from scratch*/
-			dc->hwss.enable_writeback(dc, stream_status, wb_info);
+		/* enable writeback */
+		if (dc->hwss.enable_writeback) {
+			struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
+
+			if (dwb->funcs->is_enabled(dwb)) {
+				/* writeback pipe already enabled, only need to update */
+				dc->hwss.update_writeback(dc, wb_info, dc->current_state);
+			} else {
+				/* Enable writeback pipe from scratch*/
+				dc->hwss.enable_writeback(dc, wb_info, dc->current_state);
+			}
 		}
 	}
-
 	return true;
 }
 
@@ -468,26 +468,35 @@ bool dc_stream_remove_writeback(struct dc *dc,
 	}
 	stream->num_wb_info = j;
 
-	/* recalculate and apply DML parameters */
-	if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
-		dm_error("DC: update_bandwidth failed!\n");
-		return false;
-	}
-
-	/* disable writeback */
-	if (dc->hwss.disable_writeback)
-		dc->hwss.disable_writeback(dc, dwb_pipe_inst);
+	if (IS_DIAG_DC(dc->ctx->dce_environment)) {
+		/* recalculate and apply DML parameters */
+		if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+			dm_error("DC: update_bandwidth failed!\n");
+			return false;
+		}
 
+		/* disable writeback */
+		if (dc->hwss.disable_writeback)
+			dc->hwss.disable_writeback(dc, dwb_pipe_inst);
+	}
 	return true;
 }
-#endif
 
+bool dc_stream_warmup_writeback(struct dc *dc,
+		int num_dwb,
+		struct dc_writeback_info *wb_info)
+{
+	if (dc->hwss.mmhubbub_warmup)
+		return dc->hwss.mmhubbub_warmup(dc, num_dwb, wb_info);
+	else
+		return false;
+}
 uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream)
 {
 	uint8_t i;
-	struct dc  *core_dc = stream->ctx->dc;
+	struct dc  *dc = stream->ctx->dc;
 	struct resource_context *res_ctx =
-		&core_dc->current_state->res_ctx;
+		&dc->current_state->res_ctx;
 
 	for (i = 0; i < MAX_PIPES; i++) {
 		struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg;
@@ -544,9 +553,9 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream,
 {
 	uint8_t i;
 	bool ret = false;
-	struct dc  *core_dc = stream->ctx->dc;
+	struct dc  *dc = stream->ctx->dc;
 	struct resource_context *res_ctx =
-		&core_dc->current_state->res_ctx;
+		&dc->current_state->res_ctx;
 
 	for (i = 0; i < MAX_PIPES; i++) {
 		struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg;
@@ -567,10 +576,8 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream,
 	return ret;
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 bool dc_stream_dmdata_status_done(struct dc *dc, struct dc_stream_state *stream)
 {
-	bool status = true;
 	struct pipe_ctx *pipe = NULL;
 	int i;
 
@@ -586,8 +593,7 @@ bool dc_stream_dmdata_status_done(struct dc *dc, struct dc_stream_state *stream)
 	if (i == MAX_PIPES)
 		return true;
 
-	status = dc->hwss.dmdata_status_done(pipe);
-	return status;
+	return dc->hwss.dmdata_status_done(pipe);
 }
 
 bool dc_stream_set_dynamic_metadata(struct dc *dc,
@@ -630,7 +636,6 @@ bool dc_stream_set_dynamic_metadata(struct dc *dc,
 
 	return true;
 }
-#endif
 
 void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index b9d6a5bd8522..ea1229a3e2b2 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -37,7 +37,7 @@
 /*******************************************************************************
  * Private functions
  ******************************************************************************/
-static void construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
+static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
 {
 	plane_state->ctx = ctx;
 
@@ -50,7 +50,6 @@ static void construct(struct dc_context *ctx, struct dc_plane_state *plane_state
 		plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
 		plane_state->in_transfer_func->ctx = ctx;
 	}
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	plane_state->in_shaper_func = dc_create_transfer_func();
 	if (plane_state->in_shaper_func != NULL) {
 		plane_state->in_shaper_func->type = TF_TYPE_BYPASS;
@@ -67,10 +66,9 @@ static void construct(struct dc_context *ctx, struct dc_plane_state *plane_state
 		plane_state->blend_tf->ctx = ctx;
 	}
 
-#endif
 }
 
-static void destruct(struct dc_plane_state *plane_state)
+static void dc_plane_destruct(struct dc_plane_state *plane_state)
 {
 	if (plane_state->gamma_correction != NULL) {
 		dc_gamma_release(&plane_state->gamma_correction);
@@ -80,7 +78,6 @@ static void destruct(struct dc_plane_state *plane_state)
 				plane_state->in_transfer_func);
 		plane_state->in_transfer_func = NULL;
 	}
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	if (plane_state->in_shaper_func != NULL) {
 		dc_transfer_func_release(
 				plane_state->in_shaper_func);
@@ -97,7 +94,6 @@ static void destruct(struct dc_plane_state *plane_state)
 		plane_state->blend_tf = NULL;
 	}
 
-#endif
 }
 
 /*******************************************************************************
@@ -112,16 +108,14 @@ void enable_surface_flip_reporting(struct dc_plane_state *plane_state,
 
 struct dc_plane_state *dc_create_plane_state(struct dc *dc)
 {
-	struct dc *core_dc = dc;
-
 	struct dc_plane_state *plane_state = kvzalloc(sizeof(*plane_state),
-						      GFP_KERNEL);
+							GFP_KERNEL);
 
 	if (NULL == plane_state)
 		return NULL;
 
 	kref_init(&plane_state->refcount);
-	construct(core_dc->ctx, plane_state);
+	dc_plane_construct(dc->ctx, plane_state);
 
 	return plane_state;
 }
@@ -141,7 +135,7 @@ const struct dc_plane_status *dc_plane_get_status(
 		const struct dc_plane_state *plane_state)
 {
 	const struct dc_plane_status *plane_status;
-	struct dc  *core_dc;
+	struct dc  *dc;
 	int i;
 
 	if (!plane_state ||
@@ -152,15 +146,15 @@ const struct dc_plane_status *dc_plane_get_status(
 	}
 
 	plane_status = &plane_state->status;
-	core_dc = plane_state->ctx->dc;
+	dc = plane_state->ctx->dc;
 
-	if (core_dc->current_state == NULL)
+	if (dc->current_state == NULL)
 		return NULL;
 
 	/* Find the current plane state and set its pending bit to false */
-	for (i = 0; i < core_dc->res_pool->pipe_count; i++) {
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx =
-				&core_dc->current_state->res_ctx.pipe_ctx[i];
+				&dc->current_state->res_ctx.pipe_ctx[i];
 
 		if (pipe_ctx->plane_state != plane_state)
 			continue;
@@ -170,14 +164,14 @@ const struct dc_plane_status *dc_plane_get_status(
 		break;
 	}
 
-	for (i = 0; i < core_dc->res_pool->pipe_count; i++) {
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx =
-				&core_dc->current_state->res_ctx.pipe_ctx[i];
+				&dc->current_state->res_ctx.pipe_ctx[i];
 
 		if (pipe_ctx->plane_state != plane_state)
 			continue;
 
-		core_dc->hwss.update_pending_status(pipe_ctx);
+		dc->hwss.update_pending_status(pipe_ctx);
 	}
 
 	return plane_status;
@@ -191,7 +185,7 @@ void dc_plane_state_retain(struct dc_plane_state *plane_state)
 static void dc_plane_state_free(struct kref *kref)
 {
 	struct dc_plane_state *plane_state = container_of(kref, struct dc_plane_state, refcount);
-	destruct(plane_state);
+	dc_plane_destruct(plane_state);
 	kvfree(plane_state);
 }
 
@@ -262,7 +256,6 @@ alloc_fail:
 	return NULL;
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 static void dc_3dlut_func_free(struct kref *kref)
 {
 	struct dc_3dlut *lut = container_of(kref, struct dc_3dlut, refcount);
@@ -296,6 +289,5 @@ void dc_3dlut_func_retain(struct dc_3dlut *lut)
 {
 	kref_get(&lut->refcount);
 }
-#endif
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index a82352a87808..3fa85a54360f 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -39,7 +39,7 @@
 #include "inc/hw/dmcu.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.2.48"
+#define DC_VER "3.2.68"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -54,6 +54,10 @@ struct dc_versions {
 	struct dmcu_version dmcu_version;
 };
 
+enum dp_protocol_version {
+	DP_VERSION_1_4,
+};
+
 enum dc_plane_type {
 	DC_PLANE_TYPE_INVALID,
 	DC_PLANE_TYPE_DCE_RGB,
@@ -111,19 +115,18 @@ struct dc_caps {
 	bool force_dp_tps4_for_cp2520;
 	bool disable_dp_clk_share;
 	bool psp_setup_panel_mode;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
+	bool extended_aux_timeout_support;
+	bool dmcub_support;
 	bool hw_3d_lut;
-#endif
+	enum dp_protocol_version max_dp_protocol_version;
 	struct dc_plane_cap planes[MAX_PLANES];
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 struct dc_bug_wa {
 	bool no_connect_phy_config;
 	bool dedcn20_305_wa;
 	bool skip_clock_update;
 };
-#endif
 
 struct dc_dcc_surface_param {
 	struct dc_size surface_size;
@@ -154,11 +157,14 @@ struct dc_surface_dcc_cap {
 	bool const_color_support;
 };
 
-struct dc_static_screen_events {
-	bool force_trigger;
-	bool cursor_update;
-	bool surface_update;
-	bool overlay_update;
+struct dc_static_screen_params {
+	struct {
+		bool force_trigger;
+		bool cursor_update;
+		bool surface_update;
+		bool overlay_update;
+	} triggers;
+	unsigned int num_frames;
 };
 
 
@@ -219,7 +225,9 @@ struct dc_config {
 	bool allow_seamless_boot_optimization;
 	bool power_down_display_on_boot;
 	bool edp_not_connected;
+	bool force_enum_edp;
 	bool forced_clocks;
+	bool disable_extended_timeout_support; // Used to disable extended timeout and lttpr feature as well
 	bool multi_mon_pp_mclk_switch;
 };
 
@@ -227,6 +235,7 @@ enum visual_confirm {
 	VISUAL_CONFIRM_DISABLE = 0,
 	VISUAL_CONFIRM_SURFACE = 1,
 	VISUAL_CONFIRM_HDR = 2,
+	VISUAL_CONFIRM_MPCTREE = 4,
 };
 
 enum dcc_option {
@@ -245,6 +254,19 @@ enum wm_report_mode {
 	WM_REPORT_DEFAULT = 0,
 	WM_REPORT_OVERRIDE = 1,
 };
+enum dtm_pstate{
+	dtm_level_p0 = 0,/*highest voltage*/
+	dtm_level_p1,
+	dtm_level_p2,
+	dtm_level_p3,
+	dtm_level_p4,/*when active_display_count = 0*/
+};
+
+enum dcn_pwr_state {
+	DCN_PWR_STATE_UNKNOWN = -1,
+	DCN_PWR_STATE_MISSION_MODE = 0,
+	DCN_PWR_STATE_LOW_POWER = 3,
+};
 
 /*
  * For any clocks that may differ per pipe
@@ -252,11 +274,7 @@ enum wm_report_mode {
  */
 struct dc_clocks {
 	int dispclk_khz;
-	int max_supported_dppclk_khz;
-	int max_supported_dispclk_khz;
 	int dppclk_khz;
-	int bw_dppclk_khz; /*a copy of dppclk_khz*/
-	int bw_dispclk_khz;
 	int dcfclk_khz;
 	int socclk_khz;
 	int dcfclk_deep_sleep_khz;
@@ -264,12 +282,17 @@ struct dc_clocks {
 	int phyclk_khz;
 	int dramclk_khz;
 	bool p_state_change_support;
-
+	enum dcn_pwr_state pwr_state;
 	/*
 	 * Elements below are not compared for the purposes of
 	 * optimization required
 	 */
 	bool prev_p_state_change_support;
+	enum dtm_pstate dtm_level;
+	int max_supported_dppclk_khz;
+	int max_supported_dispclk_khz;
+	int bw_dppclk_khz; /*a copy of dppclk_khz*/
+	int bw_dispclk_khz;
 };
 
 struct dc_bw_validation_profile {
@@ -345,9 +368,10 @@ struct dc_debug_options {
 	bool disable_dfs_bypass;
 	bool disable_dpp_power_gate;
 	bool disable_hubp_power_gate;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	bool disable_dsc_power_gate;
-#endif
+	int dsc_min_slice_height_override;
+	int dsc_bpp_increment_div;
+	bool native422_support;
 	bool disable_pplib_wm_range;
 	enum wm_report_mode pplib_wm_report_mode;
 	unsigned int min_disp_clk_khz;
@@ -382,22 +406,25 @@ struct dc_debug_options {
 	unsigned int force_odm_combine; //bit vector based on otg inst
 	unsigned int force_fclk_khz;
 	bool disable_tri_buf;
+	bool dmub_offload_enabled;
+	bool dmcub_emulation;
+	bool dmub_command_table; /* for testing only */
 	struct dc_bw_validation_profile bw_val_profile;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	bool disable_fec;
-#endif
-#ifdef CONFIG_DRM_AMD_DC_DCN2_1
 	bool disable_48mhz_pwrdwn;
-#endif
 	/* This forces a hard min on the DCFCLK requested to SMU/PP
 	 * watermarks are not affected.
 	 */
 	unsigned int force_min_dcfclk_mhz;
 	bool disable_timing_sync;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	bool cm_in_bypass;
-#endif
 	int force_clock_mode;/*every mode change.*/
+
+	bool nv12_iflip_vm_wa;
+	bool disable_dram_clock_change_vactive_support;
+	bool validate_dml_output;
+	bool enable_dmcub_surface_flip;
+	bool usbc_combo_phy_reset_wa;
 };
 
 struct dc_debug_data {
@@ -406,7 +433,6 @@ struct dc_debug_data {
 	uint32_t auxErrorCount;
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 struct dc_phy_addr_space_config {
 	struct {
 		uint64_t start_addr;
@@ -436,7 +462,6 @@ struct dc_virtual_addr_space_config {
 	uint32_t	page_table_block_size_in_bytes;
 	uint8_t		page_table_depth; // 1 = 1 level, 2 = 2 level, etc.  0 = invalid
 };
-#endif
 
 struct dc_bounding_box_overrides {
 	int sr_exit_time_ns;
@@ -462,13 +487,9 @@ struct dc {
 	struct dc_config config;
 	struct dc_debug_options debug;
 	struct dc_bounding_box_overrides bb_overrides;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	struct dc_bug_wa work_arounds;
-#endif
 	struct dc_context *ctx;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	struct dc_phy_addr_space_config vm_pa_config;
-#endif
 
 	uint8_t link_count;
 	struct dc_link *links[MAX_PIPES * 2];
@@ -484,7 +505,7 @@ struct dc {
 	/* Inputs into BW and WM calculations. */
 	struct bw_calcs_dceip *bw_dceip;
 	struct bw_calcs_vbios *bw_vbios;
-#ifdef CONFIG_DRM_AMD_DC_DCN1_0
+#ifdef CONFIG_DRM_AMD_DC_DCN
 	struct dcn_soc_bounding_box *dcn_soc;
 	struct dcn_ip_params *dcn_ip;
 	struct display_mode_lib dml;
@@ -498,7 +519,7 @@ struct dc {
 	bool optimized_required;
 
 	/* Require to maintain clocks and bandwidth for UEFI enabled HW */
-	bool optimize_seamless_boot;
+	int optimize_seamless_boot_streams;
 
 	/* FBC compressor */
 	struct compressor *fbc_compressor;
@@ -506,10 +527,8 @@ struct dc {
 	struct dc_debug_data debug_data;
 
 	const char *build_id;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	struct vm_helper *vm_helper;
 	const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box;
-#endif
 };
 
 enum frame_buffer_mode {
@@ -541,30 +560,36 @@ struct dc_init_data {
 	struct dc_bios *vbios_override;
 	enum dce_environment dce_environment;
 
+	struct dmub_offload_funcs *dmub_if;
+	struct dc_reg_helper_state *dmub_offload;
+
 	struct dc_config flags;
 	uint32_t log_mask;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	/**
 	 * gpu_info FW provided soc bounding box struct or 0 if not
 	 * available in FW
 	 */
 	const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box;
-#endif
 };
 
 struct dc_callback_init {
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	struct cp_psp cp_psp;
+#else
 	uint8_t reserved;
+#endif
 };
 
 struct dc *dc_create(const struct dc_init_data *init_params);
+void dc_hardware_init(struct dc *dc);
+
 int dc_get_vmid_use_vector(struct dc *dc);
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 void dc_setup_vm_context(struct dc *dc, struct dc_virtual_addr_space_config *va_config, int vmid);
 /* Returns the number of vmids supported */
 int dc_setup_system_context(struct dc *dc, struct dc_phy_addr_space_config *pa_config);
-#endif
 void dc_init_callbacks(struct dc *dc,
 		const struct dc_callback_init *init_params);
+void dc_deinit_callbacks(struct dc *dc);
 void dc_destroy(struct dc **dc);
 
 /*******************************************************************************
@@ -637,7 +662,6 @@ struct dc_transfer_func {
 	};
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 
 union dc_3dlut_state {
 	struct {
@@ -656,12 +680,11 @@ union dc_3dlut_state {
 struct dc_3dlut {
 	struct kref refcount;
 	struct tetrahedral_params lut_3d;
-	uint32_t hdr_multiplier;
+	struct fixed31_32 hdr_multiplier;
 	bool initialized; /*remove after diag fix*/
 	union dc_3dlut_state state;
 	struct dc_context *ctx;
 };
-#endif
 /*
  * This structure is filled in by dc_surface_get_status and contains
  * the last requested address and the currently active address so the called
@@ -684,7 +707,7 @@ union surface_update_flags {
 		uint32_t horizontal_mirror_change:1;
 		uint32_t per_pixel_alpha_change:1;
 		uint32_t global_alpha_change:1;
-		uint32_t sdr_white_level:1;
+		uint32_t hdr_mult:1;
 		uint32_t rotation_change:1;
 		uint32_t swizzle_change:1;
 		uint32_t scaling_change:1;
@@ -712,9 +735,7 @@ union surface_update_flags {
 struct dc_plane_state {
 	struct dc_plane_address address;
 	struct dc_plane_flip_time time;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	bool triplebuffer_flips;
-#endif
 	struct scaling_taps scaling_quality;
 	struct rect src_rect;
 	struct rect dst_rect;
@@ -730,18 +751,16 @@ struct dc_plane_state {
 	struct dc_bias_and_scale *bias_and_scale;
 	struct dc_csc_transform input_csc_color_matrix;
 	struct fixed31_32 coeff_reduction_factor;
-	uint32_t sdr_white_level;
+	struct fixed31_32 hdr_mult;
 
 	// TODO: No longer used, remove
 	struct dc_hdr_static_metadata hdr_static_ctx;
 
 	enum dc_color_space color_space;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	struct dc_3dlut *lut3d_func;
 	struct dc_transfer_func *in_shaper_func;
 	struct dc_transfer_func *blend_tf;
-#endif
 
 	enum surface_pixel_format format;
 	enum dc_rotation_angle rotation;
@@ -777,7 +796,6 @@ struct dc_plane_info {
 	enum dc_rotation_angle rotation;
 	enum plane_stereo_format stereo_format;
 	enum dc_color_space color_space;
-	unsigned int sdr_white_level;
 	bool horizontal_mirror;
 	bool visible;
 	bool per_pixel_alpha;
@@ -801,7 +819,7 @@ struct dc_surface_update {
 	const struct dc_flip_addrs *flip_addr;
 	const struct dc_plane_info *plane_info;
 	const struct dc_scaling_info *scaling_info;
-
+	struct fixed31_32 hdr_mult;
 	/* following updates require alloc/sleep/spin that is not isr safe,
 	 * null means no updates
 	 */
@@ -810,11 +828,9 @@ struct dc_surface_update {
 
 	const struct dc_csc_transform *input_csc_color_matrix;
 	const struct fixed31_32 *coeff_reduction_factor;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	const struct dc_transfer_func *func_shaper;
 	const struct dc_3dlut *lut3d_func;
 	const struct dc_transfer_func *blend_tf;
-#endif
 };
 
 /*
@@ -835,11 +851,9 @@ void dc_transfer_func_retain(struct dc_transfer_func *dc_tf);
 void dc_transfer_func_release(struct dc_transfer_func *dc_tf);
 struct dc_transfer_func *dc_create_transfer_func(void);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 struct dc_3dlut *dc_create_3dlut_func(void);
 void dc_3dlut_func_release(struct dc_3dlut *lut);
 void dc_3dlut_func_retain(struct dc_3dlut *lut);
-#endif
 /*
  * This structure holds a surface address.  There could be multiple addresses
  * in cases such as Stereo 3D, Planar YUV, etc.  Other per-flip attributes such
@@ -901,6 +915,8 @@ void dc_resource_state_copy_construct_current(
 
 void dc_resource_state_destruct(struct dc_state *context);
 
+bool dc_resource_is_dsc_encoding_supported(const struct dc *dc);
+
 /*
  * TODO update to make it about validation sets
  * Set up streams and links associated to drive sinks
@@ -956,10 +972,10 @@ struct dpcd_caps {
 	bool panel_mode_edp;
 	bool dpcd_display_control_capable;
 	bool ext_receiver_cap_field_present;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	union dpcd_fec_capability fec_cap;
 	struct dpcd_dsc_capabilities dsc_caps;
-#endif
+	struct dc_lttpr_caps lttpr_caps;
+
 };
 
 #include "dc_link.h"
@@ -980,14 +996,12 @@ struct dc_container_id {
 };
 
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 struct dc_sink_dsc_caps {
 	// 'true' if these are virtual DPCD's DSC caps (immediately upstream of sink in MST topology),
 	// 'false' if they are sink's DSC caps
 	bool is_virtual_dpcd_dsc;
 	struct dsc_dec_dpcd_caps dsc_dec_caps;
 };
-#endif
 
 /*
  * The sink structure contains EDID and other display device properties
@@ -1002,9 +1016,7 @@ struct dc_sink {
 	struct stereo_3d_features features_3d[TIMING_3D_FORMAT_MAX];
 	bool converter_disable_audio;
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	struct dc_sink_dsc_caps sink_dsc_caps;
-#endif
 
 	/* private to DC core */
 	struct dc_link *link;
@@ -1062,13 +1074,12 @@ unsigned int dc_get_current_backlight_pwm(struct dc *dc);
 unsigned int dc_get_target_backlight_pwm(struct dc *dc);
 
 bool dc_is_dmcu_initialized(struct dc *dc);
+bool dc_is_hw_initialized(struct dc *dc);
 
 enum dc_status dc_set_clock(struct dc *dc, enum dc_clock_type clock_type, uint32_t clk_khz, uint32_t stepping);
 void dc_get_clock(struct dc *dc, enum dc_clock_type clock_type, struct dc_clock_config *clock_cfg);
-#if defined(CONFIG_DRM_AMD_DC_DSC_SUPPORT)
 /*******************************************************************************
  * DSC Interfaces
  ******************************************************************************/
 #include "dc_dsc.h"
-#endif
 #endif /* DC_INTERFACE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h b/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h
index 4ef97f65e55d..4f8f576d5fcf 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h
@@ -49,7 +49,8 @@ enum aux_channel_operation_result {
 	AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN,
 	AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY,
 	AUX_CHANNEL_OPERATION_FAILED_TIMEOUT,
-	AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON
+	AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON,
+	AUX_CHANNEL_OPERATION_FAILED_ENGINE_ACQUIRE
 };
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
new file mode 100644
index 000000000000..59c298a6484f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dc.h"
+#include "dc_dmub_srv.h"
+#include "../dmub/inc/dmub_srv.h"
+
+static void dc_dmub_srv_construct(struct dc_dmub_srv *dc_srv, struct dc *dc,
+				  struct dmub_srv *dmub)
+{
+	dc_srv->dmub = dmub;
+	dc_srv->ctx = dc->ctx;
+}
+
+struct dc_dmub_srv *dc_dmub_srv_create(struct dc *dc, struct dmub_srv *dmub)
+{
+	struct dc_dmub_srv *dc_srv =
+		kzalloc(sizeof(struct dc_dmub_srv), GFP_KERNEL);
+
+	if (dc_srv == NULL) {
+		BREAK_TO_DEBUGGER();
+		return NULL;
+	}
+
+	dc_dmub_srv_construct(dc_srv, dc, dmub);
+
+	return dc_srv;
+}
+
+void dc_dmub_srv_destroy(struct dc_dmub_srv **dmub_srv)
+{
+	if (*dmub_srv) {
+		kfree(*dmub_srv);
+		*dmub_srv = NULL;
+	}
+}
+
+void dc_dmub_srv_cmd_queue(struct dc_dmub_srv *dc_dmub_srv,
+			   struct dmub_cmd_header *cmd)
+{
+	struct dmub_srv *dmub = dc_dmub_srv->dmub;
+	struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+	enum dmub_status status;
+
+	status = dmub_srv_cmd_queue(dmub, cmd);
+	if (status == DMUB_STATUS_OK)
+		return;
+
+	if (status != DMUB_STATUS_QUEUE_FULL)
+		goto error;
+
+	/* Execute and wait for queue to become empty again. */
+	dc_dmub_srv_cmd_execute(dc_dmub_srv);
+	dc_dmub_srv_wait_idle(dc_dmub_srv);
+
+	/* Requeue the command. */
+	status = dmub_srv_cmd_queue(dmub, cmd);
+	if (status == DMUB_STATUS_OK)
+		return;
+
+error:
+	DC_ERROR("Error queuing DMUB command: status=%d\n", status);
+}
+
+void dc_dmub_srv_cmd_execute(struct dc_dmub_srv *dc_dmub_srv)
+{
+	struct dmub_srv *dmub = dc_dmub_srv->dmub;
+	struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+	enum dmub_status status;
+
+	status = dmub_srv_cmd_execute(dmub);
+	if (status != DMUB_STATUS_OK)
+		DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+}
+
+void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv)
+{
+	struct dmub_srv *dmub = dc_dmub_srv->dmub;
+	struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+	enum dmub_status status;
+
+	status = dmub_srv_wait_for_idle(dmub, 100000);
+	if (status != DMUB_STATUS_OK)
+		DC_ERROR("Error waiting for DMUB idle: status=%d\n", status);
+}
+
+void dc_dmub_srv_wait_phy_init(struct dc_dmub_srv *dc_dmub_srv)
+{
+	struct dmub_srv *dmub = dc_dmub_srv->dmub;
+	struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+	enum dmub_status status;
+
+	for (;;) {
+		/* Wait up to a second for PHY init. */
+		status = dmub_srv_wait_for_phy_init(dmub, 1000000);
+		if (status == DMUB_STATUS_OK)
+			/* Initialization OK */
+			break;
+
+		DC_ERROR("DMCUB PHY init failed: status=%d\n", status);
+		ASSERT(0);
+
+		if (status != DMUB_STATUS_TIMEOUT)
+			/*
+			 * Server likely initialized or we don't have
+			 * DMCUB HW support - this won't end.
+			 */
+			break;
+
+		/* Continue spinning so we don't hang the ASIC. */
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
new file mode 100644
index 000000000000..754b6077539c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_DC_SRV_H_
+#define _DMUB_DC_SRV_H_
+
+#include "os_types.h"
+#include "../dmub/inc/dmub_cmd.h"
+
+struct dmub_srv;
+struct dmub_cmd_header;
+
+struct dc_reg_helper_state {
+	bool gather_in_progress;
+	uint32_t same_addr_count;
+	bool should_burst_write;
+	union dmub_rb_cmd cmd_data;
+	unsigned int reg_seq_count;
+};
+
+struct dc_dmub_srv {
+	struct dmub_srv *dmub;
+	struct dc_reg_helper_state reg_helper_offload;
+
+	struct dc_context *ctx;
+	void *dm;
+};
+
+void dc_dmub_srv_cmd_queue(struct dc_dmub_srv *dc_dmub_srv,
+			   struct dmub_cmd_header *cmd);
+
+void dc_dmub_srv_cmd_execute(struct dc_dmub_srv *dc_dmub_srv);
+
+void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv);
+
+void dc_dmub_srv_wait_phy_init(struct dc_dmub_srv *dc_dmub_srv);
+
+#endif /* _DMUB_DC_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index ef79a686e4c2..dfe4472c9e40 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -129,9 +129,7 @@ struct dc_link_training_overrides {
 	bool *alternate_scrambler_reset;
 	bool *enhanced_framing;
 	bool *mst_enable;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	bool *fec_enable;
-#endif
 };
 
 union dpcd_rev {
@@ -471,13 +469,13 @@ union training_aux_rd_interval {
 /* Automated test structures */
 union test_request {
 	struct {
-	uint8_t LINK_TRAINING         :1;
-	uint8_t LINK_TEST_PATTRN      :1;
-	uint8_t EDID_READ             :1;
-	uint8_t PHY_TEST_PATTERN      :1;
-	uint8_t AUDIO_TEST_PATTERN    :1;
-	uint8_t RESERVED              :1;
-	uint8_t TEST_STEREO_3D        :1;
+	uint8_t LINK_TRAINING                :1;
+	uint8_t LINK_TEST_PATTRN             :1;
+	uint8_t EDID_READ                    :1;
+	uint8_t PHY_TEST_PATTERN             :1;
+	uint8_t RESERVED                     :1;
+	uint8_t AUDIO_TEST_PATTERN           :1;
+	uint8_t TEST_AUDIO_DISABLED_VIDEO    :1;
 	} bits;
 	uint8_t raw;
 };
@@ -524,19 +522,52 @@ union link_test_pattern {
 
 union test_misc {
 	struct dpcd_test_misc_bits {
-		unsigned char SYNC_CLOCK :1;
+		unsigned char SYNC_CLOCK  :1;
 		/* dpcd_test_color_format */
-		unsigned char CLR_FORMAT :2;
+		unsigned char CLR_FORMAT  :2;
 		/* dpcd_test_dyn_range */
-		unsigned char DYN_RANGE  :1;
-		unsigned char YCBCR      :1;
+		unsigned char DYN_RANGE   :1;
+		unsigned char YCBCR_COEFS :1;
 		/* dpcd_test_bit_depth */
-		unsigned char BPC        :3;
+		unsigned char BPC         :3;
 	} bits;
 	unsigned char raw;
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+union audio_test_mode {
+	struct {
+		unsigned char sampling_rate   :4;
+		unsigned char channel_count   :4;
+	} bits;
+	unsigned char raw;
+};
+
+union audio_test_pattern_period {
+	struct {
+		unsigned char pattern_period   :4;
+		unsigned char reserved         :4;
+	} bits;
+	unsigned char raw;
+};
+
+struct audio_test_pattern_type {
+	unsigned char value;
+};
+
+struct dp_audio_test_data_flags {
+	uint8_t test_requested  :1;
+	uint8_t disable_video   :1;
+};
+
+struct dp_audio_test_data {
+
+	struct dp_audio_test_data_flags flags;
+	uint8_t sampling_rate;
+	uint8_t channel_count;
+	uint8_t pattern_type;
+	uint8_t pattern_period[8];
+};
+
 /* FEC capability DPCD register field bits-*/
 union dpcd_fec_capability {
 	struct {
@@ -661,6 +692,5 @@ struct dpcd_dsc_capabilities {
 	union dpcd_dsc_ext_capabilities dsc_ext_caps;
 };
 
-#endif /* CONFIG_DRM_AMD_DC_DSC_SUPPORT */
 
 #endif /* DC_DP_TYPES_H */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h
index 6e42209f0e20..3800340a5b4f 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h
@@ -1,4 +1,3 @@
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #ifndef DC_DSC_H_
 #define DC_DSC_H_
 /*
@@ -30,6 +29,7 @@
 #define DP_DSC_BRANCH_OVERALL_THROUGHPUT_0  0x0a0   /* DP 1.4a SCR */
 #define DP_DSC_BRANCH_OVERALL_THROUGHPUT_1  0x0a1
 #define DP_DSC_BRANCH_MAX_LINE_WIDTH        0x0a2
+#include "dc_types.h"
 
 struct dc_dsc_bw_range {
 	uint32_t min_kbps; /* Bandwidth if min_target_bpp_x16 is used */
@@ -39,24 +39,45 @@ struct dc_dsc_bw_range {
 	uint32_t stream_kbps; /* Uncompressed stream bandwidth */
 };
 
+struct display_stream_compressor {
+	const struct dsc_funcs *funcs;
+	struct dc_context *ctx;
+	int inst;
+};
+
+struct dc_dsc_policy {
+	bool use_min_slices_h;
+	int max_slices_h; // Maximum available if 0
+	int min_slice_height; // Must not be less than 8
+	uint32_t max_target_bpp;
+	uint32_t min_target_bpp;
+};
 
-bool dc_dsc_parse_dsc_dpcd(const uint8_t *dpcd_dsc_basic_data,
+bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
+		const uint8_t *dpcd_dsc_basic_data,
 		const uint8_t *dpcd_dsc_ext_data,
 		struct dsc_dec_dpcd_caps *dsc_sink_caps);
 
 bool dc_dsc_compute_bandwidth_range(
-		const struct dc *dc,
-		const uint32_t min_kbps,
-		const uint32_t max_kbps,
+		const struct display_stream_compressor *dsc,
+		const uint32_t dsc_min_slice_height_override,
+		const uint32_t min_bpp,
+		const uint32_t max_bpp,
 		const struct dsc_dec_dpcd_caps *dsc_sink_caps,
 		const struct dc_crtc_timing *timing,
 		struct dc_dsc_bw_range *range);
 
 bool dc_dsc_compute_config(
-		const struct dc *dc,
+		const struct display_stream_compressor *dsc,
 		const struct dsc_dec_dpcd_caps *dsc_sink_caps,
+		const uint32_t dsc_min_slice_height_override,
 		uint32_t target_bandwidth_kbps,
 		const struct dc_crtc_timing *timing,
 		struct dc_dsc_config *dsc_cfg);
-#endif
+
+void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
+		struct dc_dsc_policy *policy);
+
+void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index 30b2f9edd42f..737048d8a96c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -32,6 +32,74 @@
 #include "dm_services.h"
 #include <stdarg.h>
 
+#include "dc.h"
+#include "dc_dmub_srv.h"
+
+static inline void submit_dmub_read_modify_write(
+	struct dc_reg_helper_state *offload,
+	const struct dc_context *ctx)
+{
+	struct dmub_rb_cmd_read_modify_write *cmd_buf = &offload->cmd_data.read_modify_write;
+	bool gather = false;
+
+	offload->should_burst_write =
+			(offload->same_addr_count == (DMUB_READ_MODIFY_WRITE_SEQ__MAX - 1));
+	cmd_buf->header.payload_bytes =
+			sizeof(struct dmub_cmd_read_modify_write_sequence) * offload->reg_seq_count;
+
+	gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress;
+	ctx->dmub_srv->reg_helper_offload.gather_in_progress = false;
+
+	dc_dmub_srv_cmd_queue(ctx->dmub_srv, &cmd_buf->header);
+
+	ctx->dmub_srv->reg_helper_offload.gather_in_progress = gather;
+
+	memset(cmd_buf, 0, sizeof(*cmd_buf));
+
+	offload->reg_seq_count = 0;
+	offload->same_addr_count = 0;
+}
+
+static inline void submit_dmub_burst_write(
+	struct dc_reg_helper_state *offload,
+	const struct dc_context *ctx)
+{
+	struct dmub_rb_cmd_burst_write *cmd_buf = &offload->cmd_data.burst_write;
+	bool gather = false;
+
+	cmd_buf->header.payload_bytes =
+			sizeof(uint32_t) * offload->reg_seq_count;
+
+	gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress;
+	ctx->dmub_srv->reg_helper_offload.gather_in_progress = false;
+
+	dc_dmub_srv_cmd_queue(ctx->dmub_srv, &cmd_buf->header);
+
+	ctx->dmub_srv->reg_helper_offload.gather_in_progress = gather;
+
+	memset(cmd_buf, 0, sizeof(*cmd_buf));
+
+	offload->reg_seq_count = 0;
+}
+
+static inline void submit_dmub_reg_wait(
+		struct dc_reg_helper_state *offload,
+		const struct dc_context *ctx)
+{
+	struct dmub_rb_cmd_reg_wait *cmd_buf = &offload->cmd_data.reg_wait;
+	bool gather = false;
+
+	gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress;
+	ctx->dmub_srv->reg_helper_offload.gather_in_progress = false;
+
+	dc_dmub_srv_cmd_queue(ctx->dmub_srv, &cmd_buf->header);
+
+	memset(cmd_buf, 0, sizeof(*cmd_buf));
+	offload->reg_seq_count = 0;
+
+	ctx->dmub_srv->reg_helper_offload.gather_in_progress = gather;
+}
+
 struct dc_reg_value_masks {
 	uint32_t value;
 	uint32_t mask;
@@ -77,6 +145,100 @@ static void set_reg_field_values(struct dc_reg_value_masks *field_value_mask,
 	}
 }
 
+static void dmub_flush_buffer_execute(
+		struct dc_reg_helper_state *offload,
+		const struct dc_context *ctx)
+{
+	submit_dmub_read_modify_write(offload, ctx);
+	dc_dmub_srv_cmd_execute(ctx->dmub_srv);
+}
+
+static void dmub_flush_burst_write_buffer_execute(
+		struct dc_reg_helper_state *offload,
+		const struct dc_context *ctx)
+{
+	submit_dmub_burst_write(offload, ctx);
+	dc_dmub_srv_cmd_execute(ctx->dmub_srv);
+}
+
+static bool dmub_reg_value_burst_set_pack(const struct dc_context *ctx, uint32_t addr,
+		uint32_t reg_val)
+{
+	struct dc_reg_helper_state *offload = &ctx->dmub_srv->reg_helper_offload;
+	struct dmub_rb_cmd_burst_write *cmd_buf = &offload->cmd_data.burst_write;
+
+	/* flush command if buffer is full */
+	if (offload->reg_seq_count == DMUB_BURST_WRITE_VALUES__MAX)
+		dmub_flush_burst_write_buffer_execute(offload, ctx);
+
+	if (offload->cmd_data.cmd_common.header.type == DMUB_CMD__REG_SEQ_BURST_WRITE &&
+			addr != cmd_buf->addr) {
+		dmub_flush_burst_write_buffer_execute(offload, ctx);
+		return false;
+	}
+
+	cmd_buf->header.type = DMUB_CMD__REG_SEQ_BURST_WRITE;
+	cmd_buf->header.sub_type = 0;
+	cmd_buf->addr = addr;
+	cmd_buf->write_values[offload->reg_seq_count] = reg_val;
+	offload->reg_seq_count++;
+
+	return true;
+}
+
+static uint32_t dmub_reg_value_pack(const struct dc_context *ctx, uint32_t addr,
+		struct dc_reg_value_masks *field_value_mask)
+{
+	struct dc_reg_helper_state *offload = &ctx->dmub_srv->reg_helper_offload;
+	struct dmub_rb_cmd_read_modify_write *cmd_buf = &offload->cmd_data.read_modify_write;
+	struct dmub_cmd_read_modify_write_sequence *seq;
+
+	/* flush command if buffer is full */
+	if (offload->cmd_data.cmd_common.header.type != DMUB_CMD__REG_SEQ_BURST_WRITE &&
+			offload->reg_seq_count == DMUB_READ_MODIFY_WRITE_SEQ__MAX)
+		dmub_flush_buffer_execute(offload, ctx);
+
+	if (offload->should_burst_write) {
+		if (dmub_reg_value_burst_set_pack(ctx, addr, field_value_mask->value))
+			return field_value_mask->value;
+		else
+			offload->should_burst_write = false;
+	}
+
+	/* pack commands */
+	cmd_buf->header.type = DMUB_CMD__REG_SEQ_READ_MODIFY_WRITE;
+	cmd_buf->header.sub_type = 0;
+	seq = &cmd_buf->seq[offload->reg_seq_count];
+
+	if (offload->reg_seq_count) {
+		if (cmd_buf->seq[offload->reg_seq_count - 1].addr == addr)
+			offload->same_addr_count++;
+		else
+			offload->same_addr_count = 0;
+	}
+
+	seq->addr = addr;
+	seq->modify_mask = field_value_mask->mask;
+	seq->modify_value = field_value_mask->value;
+	offload->reg_seq_count++;
+
+	return field_value_mask->value;
+}
+
+static void dmub_reg_wait_done_pack(const struct dc_context *ctx, uint32_t addr,
+		uint32_t mask, uint32_t shift, uint32_t condition_value, uint32_t time_out_us)
+{
+	struct dc_reg_helper_state *offload = &ctx->dmub_srv->reg_helper_offload;
+	struct dmub_rb_cmd_reg_wait *cmd_buf = &offload->cmd_data.reg_wait;
+
+	cmd_buf->header.type = DMUB_CMD__REG_REG_WAIT;
+	cmd_buf->header.sub_type = 0;
+	cmd_buf->reg_wait.addr = addr;
+	cmd_buf->reg_wait.condition_field_value = mask & (condition_value << shift);
+	cmd_buf->reg_wait.mask = mask;
+	cmd_buf->reg_wait.time_out_us = time_out_us;
+}
+
 uint32_t generic_reg_update_ex(const struct dc_context *ctx,
 		uint32_t addr, int n,
 		uint8_t shift1, uint32_t mask1, uint32_t field_value1,
@@ -93,6 +255,11 @@ uint32_t generic_reg_update_ex(const struct dc_context *ctx,
 
 	va_end(ap);
 
+	if (ctx->dmub_srv &&
+	    ctx->dmub_srv->reg_helper_offload.gather_in_progress)
+		return dmub_reg_value_pack(ctx, addr, &field_value_mask);
+		/* todo: return void so we can decouple code running in driver from register states */
+
 	/* mmio write directly */
 	reg_val = dm_read_reg(ctx, addr);
 	reg_val = (reg_val & ~field_value_mask.mask) | field_value_mask.value;
@@ -118,6 +285,13 @@ uint32_t generic_reg_set_ex(const struct dc_context *ctx,
 
 	/* mmio write directly */
 	reg_val = (reg_val & ~field_value_mask.mask) | field_value_mask.value;
+
+	if (ctx->dmub_srv &&
+	    ctx->dmub_srv->reg_helper_offload.gather_in_progress) {
+		return dmub_reg_value_burst_set_pack(ctx, addr, reg_val);
+		/* todo: return void so we can decouple code running in driver from register states */
+	}
+
 	dm_write_reg(ctx, addr, reg_val);
 	return reg_val;
 }
@@ -134,6 +308,14 @@ uint32_t dm_read_reg_func(
 		return 0;
 	}
 #endif
+
+	if (ctx->dmub_srv &&
+	    ctx->dmub_srv->reg_helper_offload.gather_in_progress &&
+	    !ctx->dmub_srv->reg_helper_offload.should_burst_write) {
+		ASSERT(false);
+		return 0;
+	}
+
 	value = cgs_read_register(ctx->cgs_device, address);
 	trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value);
 
@@ -299,7 +481,19 @@ void generic_reg_wait(const struct dc_context *ctx,
 	uint32_t reg_val;
 	int i;
 
-	/* something is terribly wrong if time out is > 200ms. (5Hz) */
+	if (ctx->dmub_srv &&
+	    ctx->dmub_srv->reg_helper_offload.gather_in_progress) {
+		dmub_reg_wait_done_pack(ctx, addr, mask, shift, condition_value,
+				delay_between_poll_us * time_out_num_tries);
+		return;
+	}
+
+	/*
+	 * Something is terribly wrong if time out is > 3000ms.
+	 * 3000ms is the maximum time needed for SMU to pass values back.
+	 * This value comes from experiments.
+	 *
+	 */
 	ASSERT(delay_between_poll_us * time_out_num_tries <= 3000000);
 
 	for (i = 0; i <= time_out_num_tries; i++) {
@@ -346,12 +540,48 @@ uint32_t generic_read_indirect_reg(const struct dc_context *ctx,
 {
 	uint32_t value = 0;
 
+	// when reg read, there should not be any offload.
+	if (ctx->dmub_srv &&
+	    ctx->dmub_srv->reg_helper_offload.gather_in_progress) {
+		ASSERT(false);
+	}
+
 	dm_write_reg(ctx, addr_index, index);
 	value = dm_read_reg(ctx, addr_data);
 
 	return value;
 }
 
+uint32_t generic_indirect_reg_get(const struct dc_context *ctx,
+		uint32_t addr_index, uint32_t addr_data,
+		uint32_t index, int n,
+		uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+		...)
+{
+	uint32_t shift, mask, *field_value;
+	uint32_t value = 0;
+	int i = 1;
+
+	va_list ap;
+
+	va_start(ap, field_value1);
+
+	value = generic_read_indirect_reg(ctx, addr_index, addr_data, index);
+	*field_value1 = get_reg_field_value_ex(value, mask1, shift1);
+
+	while (i < n) {
+		shift = va_arg(ap, uint32_t);
+		mask = va_arg(ap, uint32_t);
+		field_value = va_arg(ap, uint32_t *);
+
+		*field_value = get_reg_field_value_ex(value, mask, shift);
+		i++;
+	}
+
+	va_end(ap);
+
+	return value;
+}
 
 uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx,
 		uint32_t addr_index, uint32_t addr_data,
@@ -382,3 +612,68 @@ uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx,
 
 	return reg_val;
 }
+
+void reg_sequence_start_gather(const struct dc_context *ctx)
+{
+	/* if reg sequence is supported and enabled, set flag to
+	 * indicate we want to have REG_SET, REG_UPDATE macro build
+	 * reg sequence command buffer rather than MMIO directly.
+	 */
+
+	if (ctx->dmub_srv && ctx->dc->debug.dmub_offload_enabled) {
+		struct dc_reg_helper_state *offload =
+			&ctx->dmub_srv->reg_helper_offload;
+
+		/* caller sequence mismatch.  need to debug caller.  offload will not work!!! */
+		ASSERT(!offload->gather_in_progress);
+
+		offload->gather_in_progress = true;
+	}
+}
+
+void reg_sequence_start_execute(const struct dc_context *ctx)
+{
+	struct dc_reg_helper_state *offload;
+
+	if (!ctx->dmub_srv)
+		return;
+
+	offload = &ctx->dmub_srv->reg_helper_offload;
+
+	if (offload && offload->gather_in_progress) {
+		offload->gather_in_progress = false;
+		offload->should_burst_write = false;
+		switch (offload->cmd_data.cmd_common.header.type) {
+		case DMUB_CMD__REG_SEQ_READ_MODIFY_WRITE:
+			submit_dmub_read_modify_write(offload, ctx);
+			break;
+		case DMUB_CMD__REG_REG_WAIT:
+			submit_dmub_reg_wait(offload, ctx);
+			break;
+		case DMUB_CMD__REG_SEQ_BURST_WRITE:
+			submit_dmub_burst_write(offload, ctx);
+			break;
+		default:
+			return;
+		}
+
+		dc_dmub_srv_cmd_execute(ctx->dmub_srv);
+	}
+}
+
+void reg_sequence_wait_done(const struct dc_context *ctx)
+{
+	/* callback to DM to poll for last submission done*/
+	struct dc_reg_helper_state *offload;
+
+	if (!ctx->dmub_srv)
+		return;
+
+	offload = &ctx->dmub_srv->reg_helper_offload;
+
+	if (offload &&
+	    ctx->dc->debug.dmub_offload_enabled &&
+	    !ctx->dc->debug.dmcub_emulation) {
+		dc_dmub_srv_wait_idle(ctx->dmub_srv);
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index 0b8700a8a94a..25c50bcab9e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -124,20 +124,6 @@ struct plane_size {
 	int chroma_pitch;
 	struct rect surface_size;
 	struct rect chroma_size;
-
-	union {
-		struct {
-			struct rect surface_size;
-			int surface_pitch;
-		} grph;
-
-		struct {
-			struct rect luma_size;
-			int luma_pitch;
-			struct rect chroma_size;
-			int chroma_pitch;
-		} video;
-	};
 };
 
 struct dc_plane_dcc_param {
@@ -148,21 +134,6 @@ struct dc_plane_dcc_param {
 
 	int meta_pitch_c;
 	bool independent_64b_blks_c;
-
-	union {
-		struct {
-			int meta_pitch;
-			bool independent_64b_blks;
-		} grph;
-
-		struct {
-			int meta_pitch_l;
-			bool independent_64b_blks_l;
-
-			int meta_pitch_c;
-			bool independent_64b_blks_c;
-		} video;
-	};
 };
 
 /*Displayable pixel format in fb*/
@@ -194,12 +165,10 @@ enum surface_pixel_format {
 	/*swaped & float*/
 	SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F,
 	/*grow graphics here if necessary */
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX,
 	SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FIX,
 	SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT,
 	SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FLOAT,
-#endif
 	SURFACE_PIXEL_FORMAT_VIDEO_BEGIN,
 	SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr =
 		SURFACE_PIXEL_FORMAT_VIDEO_BEGIN,
@@ -207,10 +176,8 @@ enum surface_pixel_format {
 	SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr,
 	SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb,
 		SURFACE_PIXEL_FORMAT_SUBSAMPLE_END,
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010,
 	SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102,
-#endif
 	SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888,
 	SURFACE_PIXEL_FORMAT_INVALID
 
@@ -249,12 +216,10 @@ enum tile_split_values {
 	DC_ROTATED_MICRO_TILING = 0x3,
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 enum tripleBuffer_enable {
 	DC_TRIPLEBUFFER_DISABLE = 0x0,
 	DC_TRIPLEBUFFER_ENABLE = 0x1,
 };
-#endif
 
 /* TODO: These values come from hardware spec. We need to readdress this
  * if they ever change.
@@ -454,13 +419,11 @@ struct dc_csc_transform {
 	bool enable_adjustment;
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 struct dc_rgb_fixed {
 	struct fixed31_32 red;
 	struct fixed31_32 green;
 	struct fixed31_32 blue;
 };
-#endif
 
 struct dc_gamma {
 	struct kref refcount;
@@ -495,10 +458,8 @@ enum dc_cursor_color_format {
 	CURSOR_MODE_COLOR_1BIT_AND,
 	CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA,
 	CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA,
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED,
 	CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED
-#endif
 };
 
 /*
@@ -605,6 +566,11 @@ enum dc_quantization_range {
 	QUANTIZATION_RANGE_LIMITED
 };
 
+enum dc_dynamic_expansion {
+	DYN_EXPANSION_AUTO,
+	DYN_EXPANSION_DISABLE
+};
+
 /* XFM */
 
 /* used in  struct dc_plane_state */
@@ -646,10 +612,8 @@ enum dc_color_depth {
 	COLOR_DEPTH_121212,
 	COLOR_DEPTH_141414,
 	COLOR_DEPTH_161616,
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	COLOR_DEPTH_999,
 	COLOR_DEPTH_111111,
-#endif
 	COLOR_DEPTH_COUNT
 };
 
@@ -710,9 +674,7 @@ struct dc_crtc_timing_flags {
 	 * rates less than or equal to 340Mcsc */
 	uint32_t LTE_340MCSC_SCRAMBLE:1;
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	uint32_t DSC : 1; /* Use DSC with this timing */
-#endif
 };
 
 enum dc_timing_3d_format {
@@ -737,31 +699,6 @@ enum dc_timing_3d_format {
 	TIMING_3D_FORMAT_MAX,
 };
 
-enum trigger_delay {
-	TRIGGER_DELAY_NEXT_PIXEL = 0,
-	TRIGGER_DELAY_NEXT_LINE,
-};
-
-enum crtc_event {
-	CRTC_EVENT_VSYNC_RISING = 0,
-	CRTC_EVENT_VSYNC_FALLING
-};
-
-struct crtc_trigger_info {
-	bool enabled;
-	struct dc_stream_state *event_source;
-	enum crtc_event event;
-	enum trigger_delay delay;
-};
-
-struct dc_crtc_timing_adjust {
-	uint32_t v_total_min;
-	uint32_t v_total_max;
-	uint32_t v_total_mid;
-	uint32_t v_total_mid_frame_num;
-};
-
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 struct dc_dsc_config {
 	uint32_t num_slices_h; /* Number of DSC slices - horizontal */
 	uint32_t num_slices_v; /* Number of DSC slices - vertical */
@@ -772,7 +709,6 @@ struct dc_dsc_config {
 	bool ycbcr422_simple; /* Tell DSC engine to convert YCbCr 4:2:2 to 'YCbCr 4:2:2 simple'. */
 	int32_t rc_buffer_size; /* DSC RC buffer block size in bytes */
 };
-#endif
 struct dc_crtc_timing {
 	uint32_t h_total;
 	uint32_t h_border_left;
@@ -799,11 +735,34 @@ struct dc_crtc_timing {
 	enum scanning_type scan_type;
 
 	struct dc_crtc_timing_flags flags;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	struct dc_dsc_config dsc_cfg;
-#endif
 };
 
+enum trigger_delay {
+	TRIGGER_DELAY_NEXT_PIXEL = 0,
+	TRIGGER_DELAY_NEXT_LINE,
+};
+
+enum crtc_event {
+	CRTC_EVENT_VSYNC_RISING = 0,
+	CRTC_EVENT_VSYNC_FALLING
+};
+
+struct crtc_trigger_info {
+	bool enabled;
+	struct dc_stream_state *event_source;
+	enum crtc_event event;
+	enum trigger_delay delay;
+};
+
+struct dc_crtc_timing_adjust {
+	uint32_t v_total_min;
+	uint32_t v_total_max;
+	uint32_t v_total_mid;
+	uint32_t v_total_mid_frame_num;
+};
+
+
 /* Passed on init */
 enum vram_type {
 	VIDEO_MEMORY_TYPE_GDDR5  = 2,
@@ -813,7 +772,6 @@ enum vram_type {
 	VIDEO_MEMORY_TYPE_GDDR6  = 6,
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 enum dwb_cnv_out_bpc {
 	DWB_CNV_OUT_BPC_8BPC  = 0,
 	DWB_CNV_OUT_BPC_10BPC = 1,
@@ -864,7 +822,6 @@ struct mcif_buf_params {
 	unsigned int		swlock;
 };
 
-#endif
 
 #define MAX_TG_COLOR_VALUE 0x3FF
 struct tg_color {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index 9ea75db3484e..d25603128394 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -29,13 +29,11 @@
 #include "dc_types.h"
 #include "grph_object_defs.h"
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 enum dc_link_fec_state {
 	dc_link_fec_not_ready,
 	dc_link_fec_ready,
 	dc_link_fec_enabled
 };
-#endif
 struct dc_link_status {
 	bool link_active;
 	struct dpcd_caps *dpcd_caps;
@@ -85,6 +83,7 @@ struct dc_link {
 	bool link_state_valid;
 	bool aux_access_disabled;
 	bool sync_lt_in_progress;
+	bool is_lttpr_mode_transparent;
 
 	/* caps is the same as reported_link_cap. link_traing use
 	 * reported_link_cap. Will clean up.  TODO
@@ -95,6 +94,7 @@ struct dc_link {
 	struct dc_lane_settings cur_lane_setting;
 	struct dc_link_settings preferred_link_setting;
 	struct dc_link_training_overrides preferred_training_settings;
+	struct dp_audio_test_data audio_test_data;
 
 	uint8_t ddc_hw_inst;
 
@@ -126,12 +126,14 @@ struct dc_link {
 	unsigned short chip_caps;
 	unsigned int dpcd_sink_count;
 	enum edp_revision edp_revision;
-	bool psr_enabled;
+	bool psr_feature_enabled;
+	bool psr_allow_active;
 
 	/* MST record stream using this link */
 	struct link_flags {
 		bool dp_keep_receiver_powered;
 		bool dp_skip_DID2;
+		bool dp_skip_reset_segment;
 	} wa_flags;
 	struct link_mst_stream_allocation_table mst_stream_alloc_table;
 
@@ -139,9 +141,7 @@ struct dc_link {
 
 	struct link_trace link_trace;
 	struct gpio *hpd_gpio;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	enum dc_link_fec_state fec_state;
-#endif
 };
 
 const struct dc_link_status *dc_link_get_status(const struct dc_link *dc_link);
@@ -158,6 +158,18 @@ static inline struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_
 	return dc->links[link_index];
 }
 
+static inline struct dc_link *get_edp_link(const struct dc *dc)
+{
+	int i;
+
+	// report any eDP links, even unconnected DDI's
+	for (i = 0; i < dc->link_count; i++) {
+		if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP)
+			return dc->links[i];
+	}
+	return NULL;
+}
+
 /* Set backlight level of an embedded panel (eDP, LVDS).
  * backlight_pwm_u16_16 is unsigned 32 bit with 16 bit integer
  * and 16 bit fractional, where 1.0 is max backlight value.
@@ -170,7 +182,7 @@ int dc_link_get_backlight_level(const struct dc_link *dc_link);
 
 bool dc_link_set_abm_disable(const struct dc_link *dc_link);
 
-bool dc_link_set_psr_enable(const struct dc_link *dc_link, bool enable, bool wait);
+bool dc_link_set_psr_allow_active(struct dc_link *dc_link, bool enable, bool wait);
 
 bool dc_link_get_psr_state(const struct dc_link *dc_link, uint32_t *psr_state);
 
@@ -192,6 +204,8 @@ enum dc_detect_reason {
 
 bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason);
 bool dc_link_get_hpd_state(struct dc_link *dc_link);
+enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx);
+enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link);
 
 /* Notify DC about DP RX Interrupt (aka Short Pulse Interrupt).
  * Return:
@@ -245,6 +259,7 @@ void dc_link_dp_disable_hpd(const struct dc_link *link);
 bool dc_link_dp_set_test_pattern(
 	struct dc_link *link,
 	enum dp_test_pattern test_pattern,
+	enum dp_test_pattern_color_space test_pattern_color_space,
 	const struct link_training_settings *p_link_settings,
 	const unsigned char *p_custom_pattern,
 	unsigned int cust_pattern_size);
@@ -276,6 +291,7 @@ void dc_link_enable_hpd(const struct dc_link *link);
 void dc_link_disable_hpd(const struct dc_link *link);
 void dc_link_set_test_pattern(struct dc_link *link,
 			enum dp_test_pattern test_pattern,
+			enum dp_test_pattern_color_space test_pattern_color_space,
 			const struct link_training_settings *p_link_settings,
 			const unsigned char *p_custom_pattern,
 			unsigned int cust_pattern_size);
@@ -286,11 +302,18 @@ uint32_t dc_link_bandwidth_kbps(
 const struct dc_link_settings *dc_link_get_link_cap(
 		const struct dc_link *link);
 
+void dc_link_overwrite_extended_receiver_cap(
+		struct dc_link *link);
+
 bool dc_submit_i2c(
 		struct dc *dc,
 		uint32_t link_index,
 		struct i2c_command *cmd);
 
+bool dc_submit_i2c_oem(
+		struct dc *dc,
+		struct i2c_command *cmd);
+
 uint32_t dc_bandwidth_in_kbps_from_timing(
 	const struct dc_crtc_timing *timing);
 #endif /* DC_LINK_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 0fa1c26bc20d..92096de79dec 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -52,7 +52,6 @@ struct freesync_context {
 	bool dummy;
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 enum hubp_dmdata_mode {
 	DMDATA_SW_MODE,
 	DMDATA_HW_MODE
@@ -82,9 +81,7 @@ struct dc_dmdata_attributes {
 	/* An unbounded array of uint32s, represents software dmdata to be loaded */
 	uint32_t *dmdata_sw_data;
 };
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 struct dc_writeback_info {
 	bool wb_enabled;
 	int dwb_pipe_inst;
@@ -96,7 +93,6 @@ struct dc_writeback_update {
 	unsigned int num_wb_info;
 	struct dc_writeback_info writeback_info[MAX_DWB_PIPES];
 };
-#endif
 
 enum vertical_interrupt_ref_point {
 	START_V_UPDATE = 0,
@@ -113,6 +109,19 @@ struct periodic_interrupt_config {
 	int lines_offset;
 };
 
+union stream_update_flags {
+	struct {
+		uint32_t scaling:1;
+		uint32_t out_tf:1;
+		uint32_t out_csc:1;
+		uint32_t abm_level:1;
+		uint32_t dpms_off:1;
+		uint32_t gamut_remap:1;
+		uint32_t wb_update:1;
+	} bits;
+
+	uint32_t raw;
+};
 
 struct dc_stream_state {
 	// sink is deprecated, new code should not reference
@@ -149,6 +158,7 @@ struct dc_stream_state {
 
 	enum view_3d_format view_format;
 
+	bool use_vsc_sdp_for_colorimetry;
 	bool ignore_msa_timing_param;
 	bool converter_disable_audio;
 	uint8_t qs_bit;
@@ -188,11 +198,9 @@ struct dc_stream_state {
 
 	struct crtc_trigger_info triggered_crtc_reset;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	/* writeback */
 	unsigned int num_wb_info;
 	struct dc_writeback_info writeback_info[MAX_DWB_PIPES];
-#endif
 	/* Computed state bits */
 	bool mode_changed : 1;
 
@@ -211,12 +219,15 @@ struct dc_stream_state {
 	bool apply_seamless_boot_optimization;
 
 	uint32_t stream_id;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	bool is_dsc_enabled;
-#endif
+	union stream_update_flags update_flags;
 };
 
+#define ABM_LEVEL_IMMEDIATE_DISABLE 0xFFFFFFFF
+
 struct dc_stream_update {
+	struct dc_stream_state *stream;
+
 	struct rect src;
 	struct rect dst;
 	struct dc_transfer_func *out_transfer_func;
@@ -231,6 +242,7 @@ struct dc_stream_update {
 	struct dc_info_packet *vsp_infopacket;
 
 	bool *dpms_off;
+	bool integer_scaling_update;
 
 	struct colorspace_transform *gamut_remap;
 	enum dc_color_space *output_color_space;
@@ -238,12 +250,8 @@ struct dc_stream_update {
 
 	struct dc_csc_transform *output_csc_transform;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	struct dc_writeback_update *wb_update;
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DSC_SUPPORT)
 	struct dc_dsc_config *dsc_config;
-#endif
 };
 
 bool dc_is_stream_unchanged(
@@ -333,18 +341,23 @@ bool dc_add_all_planes_for_stream(
 		int plane_count,
 		struct dc_state *context);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 bool dc_stream_add_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		struct dc_writeback_info *wb_info);
+
 bool dc_stream_remove_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		uint32_t dwb_pipe_inst);
+
+bool dc_stream_warmup_writeback(struct dc *dc,
+		int num_dwb,
+		struct dc_writeback_info *wb_info);
+
 bool dc_stream_dmdata_status_done(struct dc *dc, struct dc_stream_state *stream);
+
 bool dc_stream_set_dynamic_metadata(struct dc *dc,
 		struct dc_stream_state *stream,
 		struct dc_dmdata_attributes *dmdata_attr);
-#endif
 
 enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream);
 
@@ -426,10 +439,13 @@ bool dc_stream_get_crc(struct dc *dc,
 		       uint32_t *g_y,
 		       uint32_t *b_cb);
 
-void dc_stream_set_static_screen_events(struct dc *dc,
+void dc_stream_set_static_screen_params(struct dc *dc,
 					struct dc_stream_state **stream,
 					int num_streams,
-					const struct dc_static_screen_events *events);
+					const struct dc_static_screen_params *params);
+
+void dc_stream_set_dyn_expansion(struct dc *dc, struct dc_stream_state *stream,
+		enum dc_dynamic_expansion option);
 
 void dc_stream_set_dither_option(struct dc_stream_state *stream,
 				 enum dc_dither_option option);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index b273735b6a3e..e59532d98cb4 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -25,6 +25,10 @@
 #ifndef DC_TYPES_H_
 #define DC_TYPES_H_
 
+/* AND EdidUtility only needs a portion
+ * of this file, including the rest only
+ * causes additional issues.
+ */
 #include "os_types.h"
 #include "fixed31_32.h"
 #include "irq_types.h"
@@ -33,12 +37,17 @@
 #include "dal_types.h"
 #include "grph_object_defs.h"
 
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+#include "dm_cp_psp.h"
+#endif
+
 /* forward declarations */
 struct dc_plane_state;
 struct dc_stream_state;
 struct dc_link;
 struct dc_sink;
 struct dal;
+struct dc_dmub_srv;
 
 /********************************
  * Environment definitions
@@ -51,7 +60,12 @@ enum dce_environment {
 	DCE_ENV_FPGA_MAXIMUS,
 	/* Emulation on real HW or on FPGA. Used by Diagnostics, enforces
 	 * requirements of Diagnostics team. */
-	DCE_ENV_DIAG
+	DCE_ENV_DIAG,
+	/*
+	 * Guest VM system, DC HW may exist but is not virtualized and
+	 * should not be used.  SW support for VDI only.
+	 */
+	DCE_ENV_VIRTUAL_HW
 };
 
 /* Note: use these macro definitions instead of direct comparison! */
@@ -100,6 +114,11 @@ struct dc_context {
 	uint32_t dc_sink_id_count;
 	uint32_t dc_stream_id_count;
 	uint64_t fbc_gpu_addr;
+	struct dc_dmub_srv *dmub_srv;
+
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	struct cp_psp cp_psp;
+#endif
 };
 
 
@@ -107,6 +126,7 @@ struct dc_context {
 #define DC_EDID_BLOCK_SIZE 128
 #define MAX_SURFACE_NUM 4
 #define NUM_PIXEL_FORMATS 10
+#define MAX_REPEATER_CNT 8
 
 #include "dc_ddc_types.h"
 
@@ -159,6 +179,12 @@ enum dc_edid_status {
 	EDID_THE_SAME,
 };
 
+enum act_return_status {
+	ACT_SUCCESS,
+	ACT_LINK_LOST,
+	ACT_FAILED
+};
+
 /* audio capability from EDID*/
 struct dc_cea_audio_mode {
 	uint8_t format_code; /* ucData[0] [6:3]*/
@@ -203,6 +229,7 @@ struct dc_panel_patch {
 	unsigned int extra_t12_ms;
 	unsigned int extra_delay_backlight_off;
 	unsigned int extra_t7_ms;
+	unsigned int manage_secondary_link;
 };
 
 struct dc_edid_caps {
@@ -384,6 +411,30 @@ enum dpcd_downstream_port_max_bpc {
 	DOWN_STREAM_MAX_12BPC,
 	DOWN_STREAM_MAX_16BPC
 };
+
+
+enum link_training_offset {
+	DPRX                = 0,
+	LTTPR_PHY_REPEATER1 = 1,
+	LTTPR_PHY_REPEATER2 = 2,
+	LTTPR_PHY_REPEATER3 = 3,
+	LTTPR_PHY_REPEATER4 = 4,
+	LTTPR_PHY_REPEATER5 = 5,
+	LTTPR_PHY_REPEATER6 = 6,
+	LTTPR_PHY_REPEATER7 = 7,
+	LTTPR_PHY_REPEATER8 = 8
+};
+
+struct dc_lttpr_caps {
+	union dpcd_rev revision;
+	uint8_t mode;
+	uint8_t max_lane_count;
+	uint8_t max_link_rate;
+	uint8_t phy_repeater_cnt;
+	uint8_t max_ext_timeout;
+	uint8_t aux_rd_interval[MAX_REPEATER_CNT - 1];
+};
+
 struct dc_dongle_caps {
 	/* dongle type (DP converter, CV smart dongle) */
 	enum display_dongle_type dongle_type;
@@ -422,7 +473,6 @@ enum display_content_type {
 	DISPLAY_CONTENT_TYPE_GAME = 8
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 /* writeback */
 struct dwb_stereo_params {
 	bool				stereo_enabled;		/* false: normal mode, true: 3D stereo */
@@ -453,7 +503,6 @@ struct dc_dwb_params {
 	enum dwb_subsample_position	subsample_position;
 	struct dc_transfer_func *out_transfer_func;
 };
-#endif
 
 /* audio*/
 
@@ -555,15 +604,17 @@ struct audio_info {
 	/* this field must be last in this struct */
 	struct audio_mode modes[DC_MAX_AUDIO_DESC_COUNT];
 };
-
+struct audio_check {
+	unsigned int audio_packet_type;
+	unsigned int max_audiosample_rate;
+	unsigned int acat;
+};
 enum dc_infoframe_type {
 	DC_HDMI_INFOFRAME_TYPE_VENDOR = 0x81,
 	DC_HDMI_INFOFRAME_TYPE_AVI = 0x82,
 	DC_HDMI_INFOFRAME_TYPE_SPD = 0x83,
 	DC_HDMI_INFOFRAME_TYPE_AUDIO = 0x84,
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	DC_DP_INFOFRAME_TYPE_PPS = 0x10,
-#endif
 };
 
 struct dc_info_packet {
@@ -678,7 +729,7 @@ struct psr_context {
 	/* The VSync rate in Hz used to calculate the
 	 * step size for smooth brightness feature
 	 */
-	unsigned int vsyncRateHz;
+	unsigned int vsync_rate_hz;
 	unsigned int skipPsrWaitForPllLock;
 	unsigned int numberOfControllers;
 	/* Unused, for future use. To indicate that first changed frame from
@@ -739,7 +790,6 @@ struct dc_clock_config {
 	uint32_t current_clock_khz;/*current clock in use*/
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 /* DSC DPCD capabilities */
 union dsc_slice_caps1 {
 	struct {
@@ -809,5 +859,5 @@ struct dsc_dec_dpcd_caps {
 	uint32_t branch_overall_throughput_1_mps; /* In MPs */
 	uint32_t branch_max_line_width;
 };
-#endif
+
 #endif /* DC_TYPES_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index 58bd131d5b48..b8a3fc505c9b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -77,6 +77,9 @@ static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id)
 	/* notifyDMCUMsg */
 	REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1);
 
+	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0,
+			1, 80000);
+
 	return true;
 }
 
@@ -401,6 +404,10 @@ static bool dce_abm_init_backlight(struct abm *abm)
 	/* Enable the backlight output */
 	REG_UPDATE(BL_PWM_CNTL, BL_PWM_EN, 1);
 
+	/* Disable fractional pwm if configured */
+	REG_UPDATE(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN,
+		   abm->ctx->dc->config.disable_fractional_pwm ? 0 : 1);
+
 	/* Unlock group 2 backlight registers */
 	REG_UPDATE(BL_PWM_GRP1_REG_LOCK,
 			BL_PWM_GRP1_REG_LOCK, 0);
@@ -489,9 +496,6 @@ void dce_abm_destroy(struct abm **abm)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(*abm);
 
-	if (abm_dce->base.dmcu_is_running == true)
-		abm_dce->base.funcs->set_abm_immediate_disable(*abm);
-
 	kfree(abm_dce);
 	*abm = NULL;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
index 7ba7e6f722f6..ba0caaffa24b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
@@ -67,7 +67,6 @@
 	SRI(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \
 	NBIO_SR(BIOS_SCRATCH_2)
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define ABM_DCN20_REG_LIST() \
 	ABM_COMMON_REG_LIST_DCE_BASE(), \
 	SR(DC_ABM1_HG_SAMPLE_RATE), \
@@ -81,7 +80,6 @@
 	SR(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES), \
 	SR(DC_ABM1_HGLS_REG_READ_PROGRESS), \
 	NBIO_SR(BIOS_SCRATCH_2)
-#endif
 
 #define ABM_SF(reg_name, field_name, post_fix)\
 	.field_name = reg_name ## __ ## field_name ## post_fix
@@ -163,9 +161,7 @@
 	ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
 			ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, mask_sh)
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define ABM_MASK_SH_LIST_DCN20(mask_sh) ABM_MASK_SH_LIST_DCE110(mask_sh)
-#endif
 
 #define ABM_REG_FIELD_LIST(type) \
 	type ABM1_HG_NUM_OF_BINS_SEL; \
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
index c3f9f4185ce8..f1a5d2c6aa37 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@@ -42,6 +42,10 @@
 
 #include "reg_helper.h"
 
+#undef FN
+#define FN(reg_name, field_name) \
+	aux110->shift->field_name, aux110->mask->field_name
+
 #define FROM_AUX_ENGINE(ptr) \
 	container_of((ptr), struct aux_engine_dce110, base)
 
@@ -55,6 +59,16 @@ enum {
 	AUX_TIMED_OUT_RETRY_COUNTER = 2,
 	AUX_DEFER_RETRY_COUNTER = 6
 };
+
+#define TIME_OUT_INCREMENT        1016
+#define TIME_OUT_MULTIPLIER_8     8
+#define TIME_OUT_MULTIPLIER_16    16
+#define TIME_OUT_MULTIPLIER_32    32
+#define TIME_OUT_MULTIPLIER_64    64
+#define MAX_TIMEOUT_LENGTH        127
+#define DEFAULT_AUX_ENGINE_MULT   0
+#define DEFAULT_AUX_ENGINE_LENGTH 69
+
 static void release_engine(
 	struct dce_aux *engine)
 {
@@ -198,7 +212,7 @@ static void submit_channel_request(
 	REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1);
 
 	REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0,
-				10, aux110->timeout_period/10);
+				10, aux110->polling_timeout_period/10);
 
 	/* set the delay and the number of bytes to write */
 
@@ -327,7 +341,7 @@ static enum aux_channel_operation_result get_channel_status(
 
 	/* poll to make sure that SW_DONE is asserted */
 	REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 1,
-				10, aux110->timeout_period/10);
+				10, aux110->polling_timeout_period/10);
 
 	value = REG_READ(AUX_SW_STATUS);
 	/* in case HPD is LOW, exit AUX transaction */
@@ -414,20 +428,103 @@ void dce110_engine_destroy(struct dce_aux **engine)
 	*engine = NULL;
 
 }
+
+static uint32_t dce_aux_configure_timeout(struct ddc_service *ddc,
+		uint32_t timeout_in_us)
+{
+	uint32_t multiplier = 0;
+	uint32_t length = 0;
+	uint32_t prev_length = 0;
+	uint32_t prev_mult = 0;
+	uint32_t prev_timeout_val = 0;
+	struct ddc *ddc_pin = ddc->ddc_pin;
+	struct dce_aux *aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en];
+	struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(aux_engine);
+
+	/* 1-Update polling timeout period */
+	aux110->polling_timeout_period = timeout_in_us * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER;
+
+	/* 2-Update aux timeout period length and multiplier */
+	if (timeout_in_us == 0) {
+		multiplier = DEFAULT_AUX_ENGINE_MULT;
+		length = DEFAULT_AUX_ENGINE_LENGTH;
+	} else if (timeout_in_us <= TIME_OUT_INCREMENT) {
+		multiplier = 0;
+		length = timeout_in_us/TIME_OUT_MULTIPLIER_8;
+		if (timeout_in_us % TIME_OUT_MULTIPLIER_8 != 0)
+			length++;
+	} else if (timeout_in_us <= 2 * TIME_OUT_INCREMENT) {
+		multiplier = 1;
+		length = timeout_in_us/TIME_OUT_MULTIPLIER_16;
+		if (timeout_in_us % TIME_OUT_MULTIPLIER_16 != 0)
+			length++;
+	} else if (timeout_in_us <= 4 * TIME_OUT_INCREMENT) {
+		multiplier = 2;
+		length = timeout_in_us/TIME_OUT_MULTIPLIER_32;
+		if (timeout_in_us % TIME_OUT_MULTIPLIER_32 != 0)
+			length++;
+	} else if (timeout_in_us > 4 * TIME_OUT_INCREMENT) {
+		multiplier = 3;
+		length = timeout_in_us/TIME_OUT_MULTIPLIER_64;
+		if (timeout_in_us % TIME_OUT_MULTIPLIER_64 != 0)
+			length++;
+	}
+
+	length = (length < MAX_TIMEOUT_LENGTH) ? length : MAX_TIMEOUT_LENGTH;
+
+	REG_GET_2(AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, &prev_length, AUX_RX_TIMEOUT_LEN_MUL, &prev_mult);
+
+	switch (prev_mult) {
+	case 0:
+		prev_timeout_val = prev_length * TIME_OUT_MULTIPLIER_8;
+		break;
+	case 1:
+		prev_timeout_val = prev_length * TIME_OUT_MULTIPLIER_16;
+		break;
+	case 2:
+		prev_timeout_val = prev_length * TIME_OUT_MULTIPLIER_32;
+		break;
+	case 3:
+		prev_timeout_val = prev_length * TIME_OUT_MULTIPLIER_64;
+		break;
+	default:
+		prev_timeout_val = DEFAULT_AUX_ENGINE_LENGTH * TIME_OUT_MULTIPLIER_8;
+		break;
+	}
+
+	REG_UPDATE_SEQ_2(AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, length, AUX_RX_TIMEOUT_LEN_MUL, multiplier);
+
+	return prev_timeout_val;
+}
+
+static struct dce_aux_funcs aux_functions = {
+	.configure_timeout = NULL,
+	.destroy = NULL,
+};
+
 struct dce_aux *dce110_aux_engine_construct(struct aux_engine_dce110 *aux_engine110,
 		struct dc_context *ctx,
 		uint32_t inst,
 		uint32_t timeout_period,
-		const struct dce110_aux_registers *regs)
+		const struct dce110_aux_registers *regs,
+		const struct dce110_aux_registers_mask *mask,
+		const struct dce110_aux_registers_shift *shift,
+		bool is_ext_aux_timeout_configurable)
 {
 	aux_engine110->base.ddc = NULL;
 	aux_engine110->base.ctx = ctx;
 	aux_engine110->base.delay = 0;
 	aux_engine110->base.max_defer_write_retry = 0;
 	aux_engine110->base.inst = inst;
-	aux_engine110->timeout_period = timeout_period;
+	aux_engine110->polling_timeout_period = timeout_period;
 	aux_engine110->regs = regs;
 
+	aux_engine110->mask = mask;
+	aux_engine110->shift = shift;
+	aux_engine110->base.funcs = &aux_functions;
+	if (is_ext_aux_timeout_configurable)
+		aux_engine110->base.funcs->configure_timeout = &dce_aux_configure_timeout;
+
 	return &aux_engine110->base;
 }
 
@@ -464,8 +561,10 @@ int dce_aux_transfer_raw(struct ddc_service *ddc,
 	memset(&aux_rep, 0, sizeof(aux_rep));
 
 	aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en];
-	if (!acquire(aux_engine, ddc_pin))
+	if (!acquire(aux_engine, ddc_pin)) {
+		*operation_result = AUX_CHANNEL_OPERATION_FAILED_ENGINE_ACQUIRE;
 		return -1;
+	}
 
 	if (payload->i2c_over_aux)
 		aux_req.type = AUX_TRANSACTION_TYPE_I2C;
@@ -475,7 +574,7 @@ int dce_aux_transfer_raw(struct ddc_service *ddc,
 	aux_req.action = i2caux_action_from_payload(payload);
 
 	aux_req.address = payload->address;
-	aux_req.delay = payload->defer_delay * 10;
+	aux_req.delay = 0;
 	aux_req.length = payload->length;
 	aux_req.data = payload->data;
 
@@ -512,6 +611,8 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
 	uint8_t reply;
 	bool payload_reply = true;
 	enum aux_channel_operation_result operation_result;
+	bool retry_on_defer = false;
+
 	int aux_ack_retries = 0,
 		aux_defer_retries = 0,
 		aux_i2c_defer_retries = 0,
@@ -542,10 +643,19 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
 			break;
 
 			case AUX_TRANSACTION_REPLY_AUX_DEFER:
-			case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK:
 			case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER:
-				if (++aux_defer_retries >= AUX_MAX_DEFER_RETRIES)
+				retry_on_defer = true;
+				/* fall through */
+			case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK:
+				if (++aux_defer_retries >= AUX_MAX_DEFER_RETRIES) {
 					goto fail;
+				} else {
+					if ((*payload->reply == AUX_TRANSACTION_REPLY_AUX_DEFER) ||
+						(*payload->reply == AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER)) {
+						if (payload->defer_delay > 0)
+							msleep(payload->defer_delay);
+					}
+				}
 				break;
 
 			case AUX_TRANSACTION_REPLY_I2C_DEFER:
@@ -569,19 +679,29 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
 			break;
 
 		case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT:
-			if (++aux_timeout_retries >= AUX_MAX_TIMEOUT_RETRIES)
-				goto fail;
-			else {
-				/*
-				 * DP 1.4, 2.8.2:  AUX Transaction Response/Reply Timeouts
-				 * According to the DP spec there should be 3 retries total
-				 * with a 400us wait inbetween each. Hardware already waits
-				 * for 550us therefore no wait is required here.
-				 */
+			// Check whether a DEFER had occurred before the timeout.
+			// If so, treat timeout as a DEFER.
+			if (retry_on_defer) {
+				if (++aux_defer_retries >= AUX_MAX_DEFER_RETRIES)
+					goto fail;
+				else if (payload->defer_delay > 0)
+					msleep(payload->defer_delay);
+			} else {
+				if (++aux_timeout_retries >= AUX_MAX_TIMEOUT_RETRIES)
+					goto fail;
+				else {
+					/*
+					 * DP 1.4, 2.8.2:  AUX Transaction Response/Reply Timeouts
+					 * According to the DP spec there should be 3 retries total
+					 * with a 400us wait inbetween each. Hardware already waits
+					 * for 550us therefore no wait is required here.
+					 */
+				}
 			}
 			break;
 
 		case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON:
+		case AUX_CHANNEL_OPERATION_FAILED_ENGINE_ACQUIRE:
 		case AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN:
 		default:
 			goto fail;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
index ed7fec8fe253..382465862f29 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
@@ -29,15 +29,15 @@
 #include "i2caux_interface.h"
 #include "inc/hw/aux_engine.h"
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
+
 #define AUX_COMMON_REG_LIST0(id)\
 	SRI(AUX_CONTROL, DP_AUX, id), \
 	SRI(AUX_ARB_CONTROL, DP_AUX, id), \
 	SRI(AUX_SW_DATA, DP_AUX, id), \
 	SRI(AUX_SW_CONTROL, DP_AUX, id), \
 	SRI(AUX_INTERRUPT_CONTROL, DP_AUX, id), \
+	SRI(AUX_DPHY_RX_CONTROL1, DP_AUX, id), \
 	SRI(AUX_SW_STATUS, DP_AUX, id)
-#endif
 
 #define AUX_COMMON_REG_LIST(id)\
 	SRI(AUX_CONTROL, DP_AUX, id), \
@@ -55,6 +55,7 @@ struct dce110_aux_registers {
 	uint32_t AUX_SW_DATA;
 	uint32_t AUX_SW_CONTROL;
 	uint32_t AUX_INTERRUPT_CONTROL;
+	uint32_t AUX_DPHY_RX_CONTROL1;
 	uint32_t AUX_SW_STATUS;
 	uint32_t AUXN_IMPCAL;
 	uint32_t AUXP_IMPCAL;
@@ -62,6 +63,156 @@ struct dce110_aux_registers {
 	uint32_t AUX_RESET_MASK;
 };
 
+#define DCE_AUX_REG_FIELD_LIST(type)\
+	type AUX_EN;\
+	type AUX_RESET;\
+	type AUX_RESET_DONE;\
+	type AUX_REG_RW_CNTL_STATUS;\
+	type AUX_SW_USE_AUX_REG_REQ;\
+	type AUX_SW_DONE_USING_AUX_REG;\
+	type AUX_SW_AUTOINCREMENT_DISABLE;\
+	type AUX_SW_DATA_RW;\
+	type AUX_SW_INDEX;\
+	type AUX_SW_GO;\
+	type AUX_SW_DATA;\
+	type AUX_SW_REPLY_BYTE_COUNT;\
+	type AUX_SW_DONE;\
+	type AUX_SW_DONE_ACK;\
+	type AUXN_IMPCAL_ENABLE;\
+	type AUXP_IMPCAL_ENABLE;\
+	type AUXN_IMPCAL_OVERRIDE_ENABLE;\
+	type AUXP_IMPCAL_OVERRIDE_ENABLE;\
+	type AUX_RX_TIMEOUT_LEN;\
+	type AUX_RX_TIMEOUT_LEN_MUL;\
+	type AUXN_CALOUT_ERROR_AK;\
+	type AUXP_CALOUT_ERROR_AK;\
+	type AUX_SW_START_DELAY;\
+	type AUX_SW_WR_BYTES
+
+#define DCE10_AUX_MASK_SH_LIST(mask_sh)\
+	AUX_SF(AUX_CONTROL, AUX_EN, mask_sh),\
+	AUX_SF(AUX_ARB_CONTROL, AUX_REG_RW_CNTL_STATUS, mask_sh),\
+	AUX_SF(AUX_ARB_CONTROL, AUX_SW_USE_AUX_REG_REQ, mask_sh),\
+	AUX_SF(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, mask_sh),\
+	AUX_SF(AUX_SW_CONTROL, AUX_SW_START_DELAY, mask_sh),\
+	AUX_SF(AUX_SW_CONTROL, AUX_SW_WR_BYTES, mask_sh),\
+	AUX_SF(AUX_SW_CONTROL, AUX_SW_GO, mask_sh),\
+	AUX_SF(AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\
+	AUX_SF(AUX_SW_DATA, AUX_SW_DATA_RW, mask_sh),\
+	AUX_SF(AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\
+	AUX_SF(AUX_SW_DATA, AUX_SW_INDEX, mask_sh),\
+	AUX_SF(AUX_SW_DATA, AUX_SW_DATA, mask_sh),\
+	AUX_SF(AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, mask_sh),\
+	AUX_SF(AUX_SW_STATUS, AUX_SW_DONE, mask_sh),\
+	AUX_SF(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_ENABLE, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_ENABLE, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_OVERRIDE_ENABLE, mask_sh)
+
+#define DCE_AUX_MASK_SH_LIST(mask_sh)\
+	AUX_SF(AUX_CONTROL, AUX_EN, mask_sh),\
+	AUX_SF(AUX_CONTROL, AUX_RESET, mask_sh),\
+	AUX_SF(AUX_CONTROL, AUX_RESET_DONE, mask_sh),\
+	AUX_SF(AUX_ARB_CONTROL, AUX_REG_RW_CNTL_STATUS, mask_sh),\
+	AUX_SF(AUX_ARB_CONTROL, AUX_SW_USE_AUX_REG_REQ, mask_sh),\
+	AUX_SF(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, mask_sh),\
+	AUX_SF(AUX_SW_CONTROL, AUX_SW_START_DELAY, mask_sh),\
+	AUX_SF(AUX_SW_CONTROL, AUX_SW_WR_BYTES, mask_sh),\
+	AUX_SF(AUX_SW_CONTROL, AUX_SW_GO, mask_sh),\
+	AUX_SF(AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\
+	AUX_SF(AUX_SW_DATA, AUX_SW_DATA_RW, mask_sh),\
+	AUX_SF(AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\
+	AUX_SF(AUX_SW_DATA, AUX_SW_INDEX, mask_sh),\
+	AUX_SF(AUX_SW_DATA, AUX_SW_DATA, mask_sh),\
+	AUX_SF(AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, mask_sh),\
+	AUX_SF(AUX_SW_STATUS, AUX_SW_DONE, mask_sh),\
+	AUX_SF(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_ENABLE, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_ENABLE, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_OVERRIDE_ENABLE, mask_sh)
+
+#define DCE12_AUX_MASK_SH_LIST(mask_sh)\
+	AUX_SF(DP_AUX0_AUX_CONTROL, AUX_EN, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET_DONE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_REG_RW_CNTL_STATUS, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_USE_AUX_REG_REQ, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_START_DELAY, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_WR_BYTES, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_GO, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA_RW, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_INDEX, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_DONE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_ENABLE, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_ENABLE, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_OVERRIDE_ENABLE, mask_sh)
+
+/* DCN10 MASK */
+#define DCN10_AUX_MASK_SH_LIST(mask_sh)\
+	AUX_SF(DP_AUX0_AUX_CONTROL, AUX_EN, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET_DONE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_REG_RW_CNTL_STATUS, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_USE_AUX_REG_REQ, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_START_DELAY, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_WR_BYTES, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_GO, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA_RW, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_INDEX, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_DONE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_ENABLE, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_ENABLE, mask_sh),\
+	AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, mask_sh),\
+	AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_OVERRIDE_ENABLE, mask_sh)
+
+/* for all other DCN */
+#define DCN_AUX_MASK_SH_LIST(mask_sh)\
+	AUX_SF(DP_AUX0_AUX_CONTROL, AUX_EN, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET_DONE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_REG_RW_CNTL_STATUS, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_USE_AUX_REG_REQ, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_START_DELAY, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_WR_BYTES, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_GO, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA_RW, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_INDEX, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_DONE, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, mask_sh),\
+	AUX_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN_MUL, mask_sh)
+
+#define AUX_SF(reg_name, field_name, post_fix)\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 enum {	/* This is the timeout as defined in DP 1.2a,
 	 * 2.3.4 "Detailed uPacket TX AUX CH State Description".
 	 */
@@ -97,20 +248,34 @@ struct dce_aux {
 	uint32_t max_defer_write_retry;
 
 	bool acquire_reset;
+	struct dce_aux_funcs *funcs;
 };
 
+struct dce110_aux_registers_mask {
+	DCE_AUX_REG_FIELD_LIST(uint32_t);
+};
+
+struct dce110_aux_registers_shift {
+	DCE_AUX_REG_FIELD_LIST(uint8_t);
+};
+
+
 struct aux_engine_dce110 {
 	struct dce_aux base;
 	const struct dce110_aux_registers *regs;
+	const struct dce110_aux_registers_mask *mask;
+	const struct dce110_aux_registers_shift *shift;
 	struct {
 		uint32_t aux_control;
 		uint32_t aux_arb_control;
 		uint32_t aux_sw_data;
 		uint32_t aux_sw_control;
 		uint32_t aux_interrupt_control;
+		uint32_t aux_dphy_rx_control1;
+		uint32_t aux_dphy_rx_control0;
 		uint32_t aux_sw_status;
 	} addr;
-	uint32_t timeout_period;
+	uint32_t polling_timeout_period;
 };
 
 struct aux_engine_dce110_init_data {
@@ -120,12 +285,15 @@ struct aux_engine_dce110_init_data {
 	const struct dce110_aux_registers *regs;
 };
 
-struct dce_aux *dce110_aux_engine_construct(
-		struct aux_engine_dce110 *aux_engine110,
+struct dce_aux *dce110_aux_engine_construct(struct aux_engine_dce110 *aux_engine110,
 		struct dc_context *ctx,
 		uint32_t inst,
 		uint32_t timeout_period,
-		const struct dce110_aux_registers *regs);
+		const struct dce110_aux_registers *regs,
+
+		const struct dce110_aux_registers_mask *mask,
+		const struct dce110_aux_registers_shift *shift,
+		bool is_ext_aux_timeout_configurable);
 
 void dce110_engine_destroy(struct dce_aux **engine);
 
@@ -139,4 +307,13 @@ int dce_aux_transfer_raw(struct ddc_service *ddc,
 
 bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
 		struct aux_payload *cmd);
+
+struct dce_aux_funcs {
+	uint32_t (*configure_timeout)
+		(struct ddc_service *ddc,
+		 uint32_t timeout);
+	void (*destroy)
+		(struct aux_engine **ptr);
+};
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index f787a6b94781..2e992fbc0d71 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -905,7 +905,7 @@ static bool dce112_program_pix_clk(
 	struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
 	struct bp_pixel_clock_parameters bp_pc_params = {0};
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	if (IS_FPGA_MAXIMUS_DC(clock_source->ctx->dce_environment)) {
 		unsigned int inst = pix_clk_params->controller_id - CONTROLLER_ID_D0;
 		unsigned dp_dto_ref_100hz = 7000000;
@@ -1004,7 +1004,6 @@ static bool get_pixel_clk_frequency_100hz(
 	return false;
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 
 /* this table is use to find *1.001 and /1.001 pixel rates from non-precise pixel rate */
 struct pixel_rate_range_table_entry {
@@ -1064,7 +1063,6 @@ static const struct clock_source_funcs dcn20_clk_src_funcs = {
 	.get_pix_clk_dividers = dce112_get_pix_clk_dividers,
 	.get_pixel_clk_frequency_100hz = get_pixel_clk_frequency_100hz
 };
-#endif
 
 /*****************************************/
 /* Constructor                           */
@@ -1435,7 +1433,6 @@ bool dce112_clk_src_construct(
 	return true;
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 bool dcn20_clk_src_construct(
 	struct dce110_clk_src *clk_src,
 	struct dc_context *ctx,
@@ -1451,4 +1448,3 @@ bool dcn20_clk_src_construct(
 
 	return ret;
 }
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
index 43c1bf60b83c..51bd25079606 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
@@ -55,7 +55,6 @@
 	CS_SF(PHYPLLA_PIXCLK_RESYNC_CNTL, PHYPLLA_DCCG_DEEP_COLOR_CNTL, mask_sh),\
 	CS_SF(PHYPLLA_PIXCLK_RESYNC_CNTL, PHYPLLA_PIXCLK_DOUBLE_RATE_ENABLE, mask_sh)
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define CS_COMMON_REG_LIST_DCN2_0(index, pllid) \
 		SRI(PIXCLK_RESYNC_CNTL, PHYPLL, pllid),\
 		SRII(PHASE, DP_DTO, 0),\
@@ -76,9 +75,7 @@
 		SRII(PIXEL_RATE_CNTL, OTG, 3),\
 		SRII(PIXEL_RATE_CNTL, OTG, 4),\
 		SRII(PIXEL_RATE_CNTL, OTG, 5)
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #define CS_COMMON_REG_LIST_DCN2_1(index, pllid) \
 		SRI(PIXCLK_RESYNC_CNTL, PHYPLL, pllid),\
 		SRII(PHASE, DP_DTO, 0),\
@@ -93,17 +90,14 @@
 		SRII(PIXEL_RATE_CNTL, OTG, 1),\
 		SRII(PIXEL_RATE_CNTL, OTG, 2),\
 		SRII(PIXEL_RATE_CNTL, OTG, 3)
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define CS_COMMON_MASK_SH_LIST_DCN2_0(mask_sh)\
 	CS_SF(DP_DTO0_PHASE, DP_DTO0_PHASE, mask_sh),\
 	CS_SF(DP_DTO0_MODULO, DP_DTO0_MODULO, mask_sh),\
 	CS_SF(PHYPLLA_PIXCLK_RESYNC_CNTL, PHYPLLA_DCCG_DEEP_COLOR_CNTL, mask_sh),\
 	CS_SF(OTG0_PIXEL_RATE_CNTL, DP_DTO0_ENABLE, mask_sh)
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 
 #define CS_COMMON_REG_LIST_DCN1_0(index, pllid) \
 		SRI(PIXCLK_RESYNC_CNTL, PHYPLL, pllid),\
@@ -201,7 +195,6 @@ bool dce112_clk_src_construct(
 	const struct dce110_clk_src_shift *cs_shift,
 	const struct dce110_clk_src_mask *cs_mask);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 bool dcn20_clk_src_construct(
 	struct dce110_clk_src *clk_src,
 	struct dc_context *ctx,
@@ -210,6 +203,5 @@ bool dcn20_clk_src_construct(
 	const struct dce110_clk_src_regs *regs,
 	const struct dce110_clk_src_shift *cs_shift,
 	const struct dce110_clk_src_mask *cs_mask);
-#endif
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
index 0b86cee4876f..30d953acd016 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
@@ -59,6 +59,12 @@
 #define MCP_BL_SET_PWM_FRAC 0x6A  /* Enable or disable Fractional PWM */
 #define MASTER_COMM_CNTL_REG__MASTER_COMM_INTERRUPT_MASK   0x00000001L
 
+// PSP FW version
+#define mmMP0_SMN_C2PMSG_58				0x1607A
+
+//Register access policy version
+#define mmMP0_SMN_C2PMSG_91				0x1609B
+
 static bool dce_dmcu_init(struct dmcu *dmcu)
 {
 	// Do nothing
@@ -318,7 +324,7 @@ static void dce_get_psr_wait_loop(
 	return;
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 static void dcn10_get_dmcu_version(struct dmcu *dmcu)
 {
 	struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu);
@@ -373,6 +379,7 @@ static bool dcn10_dmcu_init(struct dmcu *dmcu)
 	const struct dc_config *config = &dmcu->ctx->dc->config;
 	bool status = false;
 
+	PERF_TRACE();
 	/*  Definition of DC_DMCU_SCRATCH
 	 *  0 : firmare not loaded
 	 *  1 : PSP load DMCU FW but not initialized
@@ -429,9 +436,21 @@ static bool dcn10_dmcu_init(struct dmcu *dmcu)
 		break;
 	}
 
+	PERF_TRACE();
 	return status;
 }
 
+static bool dcn21_dmcu_init(struct dmcu *dmcu)
+{
+	struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu);
+	uint32_t dmcub_psp_version = REG_READ(DMCUB_SCRATCH15);
+
+	if (dmcu->auto_load_dmcu && dmcub_psp_version == 0) {
+		return false;
+	}
+
+	return dcn10_dmcu_init(dmcu);
+}
 
 static bool dcn10_dmcu_load_iram(struct dmcu *dmcu,
 		unsigned int start_offset,
@@ -518,9 +537,6 @@ static void dcn10_dmcu_set_psr_enable(struct dmcu *dmcu, bool enable, bool wait)
 	if (dmcu->dmcu_state != DMCU_RUNNING)
 		return;
 
-	dcn10_get_dmcu_psr_state(dmcu, &psr_state);
-	if (psr_state == 0 && !enable)
-		return;
 	/* waitDMCUReadyForCmd */
 	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0,
 				dmcu_wait_reg_ready_interval,
@@ -727,9 +743,7 @@ static bool dcn10_is_dmcu_initialized(struct dmcu *dmcu)
 	return true;
 }
 
-#endif //(CONFIG_DRM_AMD_DC_DCN1_0)
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 
 static bool dcn20_lock_phy(struct dmcu *dmcu)
 {
@@ -777,7 +791,7 @@ static bool dcn20_unlock_phy(struct dmcu *dmcu)
 	return true;
 }
 
-#endif //(CONFIG_DRM_AMD_DC_DCN2_0)
+#endif //(CONFIG_DRM_AMD_DC_DCN)
 
 static const struct dmcu_funcs dce_funcs = {
 	.dmcu_init = dce_dmcu_init,
@@ -790,7 +804,7 @@ static const struct dmcu_funcs dce_funcs = {
 	.is_dmcu_initialized = dce_is_dmcu_initialized
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 static const struct dmcu_funcs dcn10_funcs = {
 	.dmcu_init = dcn10_dmcu_init,
 	.load_iram = dcn10_dmcu_load_iram,
@@ -801,9 +815,7 @@ static const struct dmcu_funcs dcn10_funcs = {
 	.get_psr_wait_loop = dcn10_get_psr_wait_loop,
 	.is_dmcu_initialized = dcn10_is_dmcu_initialized
 };
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 static const struct dmcu_funcs dcn20_funcs = {
 	.dmcu_init = dcn10_dmcu_init,
 	.load_iram = dcn10_dmcu_load_iram,
@@ -816,6 +828,19 @@ static const struct dmcu_funcs dcn20_funcs = {
 	.lock_phy = dcn20_lock_phy,
 	.unlock_phy = dcn20_unlock_phy
 };
+
+static const struct dmcu_funcs dcn21_funcs = {
+	.dmcu_init = dcn21_dmcu_init,
+	.load_iram = dcn10_dmcu_load_iram,
+	.set_psr_enable = dcn10_dmcu_set_psr_enable,
+	.setup_psr = dcn10_dmcu_setup_psr,
+	.get_psr_state = dcn10_get_dmcu_psr_state,
+	.set_psr_wait_loop = dcn10_psr_wait_loop,
+	.get_psr_wait_loop = dcn10_get_psr_wait_loop,
+	.is_dmcu_initialized = dcn10_is_dmcu_initialized,
+	.lock_phy = dcn20_lock_phy,
+	.unlock_phy = dcn20_unlock_phy
+};
 #endif
 
 static void dce_dmcu_construct(
@@ -836,6 +861,26 @@ static void dce_dmcu_construct(
 	dmcu_dce->dmcu_mask = dmcu_mask;
 }
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+static void dcn21_dmcu_construct(
+		struct dce_dmcu *dmcu_dce,
+		struct dc_context *ctx,
+		const struct dce_dmcu_registers *regs,
+		const struct dce_dmcu_shift *dmcu_shift,
+		const struct dce_dmcu_mask *dmcu_mask)
+{
+	uint32_t psp_version = 0;
+
+	dce_dmcu_construct(dmcu_dce, ctx, regs, dmcu_shift, dmcu_mask);
+
+	if (!IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) {
+		psp_version = dm_read_reg(ctx, mmMP0_SMN_C2PMSG_58);
+		dmcu_dce->base.auto_load_dmcu = ((psp_version & 0x00FF00FF) > 0x00110029);
+		dmcu_dce->base.psp_version = psp_version;
+	}
+}
+#endif
+
 struct dmcu *dce_dmcu_create(
 	struct dc_context *ctx,
 	const struct dce_dmcu_registers *regs,
@@ -857,7 +902,7 @@ struct dmcu *dce_dmcu_create(
 	return &dmcu_dce->base;
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 struct dmcu *dcn10_dmcu_create(
 	struct dc_context *ctx,
 	const struct dce_dmcu_registers *regs,
@@ -878,9 +923,7 @@ struct dmcu *dcn10_dmcu_create(
 
 	return &dmcu_dce->base;
 }
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 struct dmcu *dcn20_dmcu_create(
 	struct dc_context *ctx,
 	const struct dce_dmcu_registers *regs,
@@ -901,15 +944,33 @@ struct dmcu *dcn20_dmcu_create(
 
 	return &dmcu_dce->base;
 }
+
+struct dmcu *dcn21_dmcu_create(
+	struct dc_context *ctx,
+	const struct dce_dmcu_registers *regs,
+	const struct dce_dmcu_shift *dmcu_shift,
+	const struct dce_dmcu_mask *dmcu_mask)
+{
+	struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_KERNEL);
+
+	if (dmcu_dce == NULL) {
+		BREAK_TO_DEBUGGER();
+		return NULL;
+	}
+
+	dcn21_dmcu_construct(
+		dmcu_dce, ctx, regs, dmcu_shift, dmcu_mask);
+
+	dmcu_dce->base.funcs = &dcn21_funcs;
+
+	return &dmcu_dce->base;
+}
 #endif
 
 void dce_dmcu_destroy(struct dmcu **dmcu)
 {
 	struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(*dmcu);
 
-	if (dmcu_dce->base.dmcu_state == DMCU_RUNNING)
-		dmcu_dce->base.funcs->set_psr_enable(*dmcu, false, true);
-
 	kfree(dmcu_dce);
 	*dmcu = NULL;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
index cc8587683b4b..5e044c2d3d6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
@@ -71,6 +71,10 @@
 	DMCU_COMMON_REG_LIST_DCE_BASE(), \
 	SR(DMU_MEM_PWR_CNTL)
 
+#define DMCU_DCN20_REG_LIST()\
+	DMCU_DCN10_REG_LIST(), \
+	SR(DMCUB_SCRATCH15)
+
 #define DMCU_SF(reg_name, field_name, post_fix)\
 	.field_name = reg_name ## __ ## field_name ## post_fix
 
@@ -175,6 +179,7 @@ struct dce_dmcu_registers {
 	uint32_t DMCU_INTERRUPT_TO_UC_EN_MASK;
 	uint32_t SMU_INTERRUPT_CONTROL;
 	uint32_t DC_DMCU_SCRATCH;
+	uint32_t DMCUB_SCRATCH15;
 };
 
 struct dce_dmcu {
@@ -261,13 +266,17 @@ struct dmcu *dcn10_dmcu_create(
 	const struct dce_dmcu_shift *dmcu_shift,
 	const struct dce_dmcu_mask *dmcu_mask);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 struct dmcu *dcn20_dmcu_create(
 	struct dc_context *ctx,
 	const struct dce_dmcu_registers *regs,
 	const struct dce_dmcu_shift *dmcu_shift,
 	const struct dce_dmcu_mask *dmcu_mask);
-#endif
+
+struct dmcu *dcn21_dmcu_create(
+	struct dc_context *ctx,
+	const struct dce_dmcu_registers *regs,
+	const struct dce_dmcu_shift *dmcu_shift,
+	const struct dce_dmcu_mask *dmcu_mask);
 
 void dce_dmcu_destroy(struct dmcu **dmcu);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c
index 0275d6d60da4..e1c5839a80dc 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c
@@ -25,7 +25,7 @@
 
 #include "dce_hwseq.h"
 #include "reg_helper.h"
-#include "hw_sequencer.h"
+#include "hw_sequencer_private.h"
 #include "core_types.h"
 
 #define CTX \
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
index ac04d77058f0..c5aa1f48593a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
@@ -25,7 +25,7 @@
 #ifndef __DCE_HWSEQ_H__
 #define __DCE_HWSEQ_H__
 
-#include "hw_sequencer.h"
+#include "dc_types.h"
 
 #define BL_REG_LIST()\
 	SR(LVTMA_PWRSEQ_CNTL), \
@@ -210,7 +210,6 @@
 	SR(DC_IP_REQUEST_CNTL), \
 	BL_REG_LIST()
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define HWSEQ_DCN2_REG_LIST()\
 	HWSEQ_DCN_REG_LIST(), \
 	HSWEQ_DCN_PIXEL_RATE_REG_LIST(OTG, 0), \
@@ -276,9 +275,7 @@
 	SR(D6VGA_CONTROL), \
 	SR(DC_IP_REQUEST_CNTL), \
 	BL_REG_LIST()
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #define HWSEQ_DCN21_REG_LIST()\
 	HWSEQ_DCN_REG_LIST(), \
 	HSWEQ_DCN_PIXEL_RATE_REG_LIST(OTG, 0), \
@@ -329,7 +326,6 @@
 	SR(D6VGA_CONTROL), \
 	SR(DC_IP_REQUEST_CNTL), \
 	BL_REG_LIST()
-#endif
 
 struct dce_hwseq_registers {
 
@@ -577,7 +573,6 @@ struct dce_hwseq_registers {
 	HWS_SF(, VGA_TEST_CONTROL, VGA_TEST_RENDER_START, mask_sh),\
 	HWSEQ_LVTMA_MASK_SH_LIST(mask_sh)
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define HWSEQ_DCN2_MASK_SH_LIST(mask_sh)\
 	HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
 	HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
@@ -637,9 +632,7 @@ struct dce_hwseq_registers {
 	HWS_SF(, DOMAIN21_PG_STATUS, DOMAIN21_PGFSM_PWR_STATUS, mask_sh), \
 	HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \
 	HWSEQ_LVTMA_MASK_SH_LIST(mask_sh)
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #define HWSEQ_DCN21_MASK_SH_LIST(mask_sh)\
 	HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
 	HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
@@ -679,9 +672,9 @@ struct dce_hwseq_registers {
 	HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN17_PGFSM_PWR_STATUS, mask_sh), \
 	HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN18_PGFSM_PWR_STATUS, mask_sh), \
 	HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \
+	HWSEQ_LVTMA_MASK_SH_LIST(mask_sh), \
 	HWS_SF(, LVTMA_PWRSEQ_CNTL, LVTMA_BLON, mask_sh), \
 	HWS_SF(, LVTMA_PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, mask_sh)
-#endif
 
 #define HWSEQ_REG_FIELD_LIST(type) \
 	type DCFE_CLOCK_ENABLE; \
@@ -799,8 +792,7 @@ struct dce_hwseq_registers {
 	type D2VGA_MODE_ENABLE; \
 	type D3VGA_MODE_ENABLE; \
 	type D4VGA_MODE_ENABLE; \
-	type AZALIA_AUDIO_DTO_MODULE;\
-	type HPO_HDMISTREAMCLK_GATE_DIS;
+	type AZALIA_AUDIO_DTO_MODULE;
 
 struct dce_hwseq_shift {
 	HWSEQ_REG_FIELD_LIST(uint8_t)
@@ -819,6 +811,10 @@ enum blnd_mode {
 	BLND_MODE_BLENDING,/* Alpha blending - blend 'current' and 'other' */
 };
 
+struct dce_hwseq;
+struct pipe_ctx;
+struct clock_source;
+
 void dce_enable_fe_clock(struct dce_hwseq *hwss,
 		unsigned int inst, bool enable);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c
index 35a75398fcb4..dd41736bb5c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c
@@ -31,7 +31,7 @@ bool dce_i2c_submit_command(
 	struct i2c_command *cmd)
 {
 	struct dce_i2c_hw *dce_i2c_hw;
-	struct dce_i2c_sw *dce_i2c_sw;
+	struct dce_i2c_sw dce_i2c_sw = {0};
 
 	if (!ddc) {
 		BREAK_TO_DEBUGGER();
@@ -43,18 +43,15 @@ bool dce_i2c_submit_command(
 		return false;
 	}
 
-	/* The software engine is only available on dce8 */
-	dce_i2c_sw = dce_i2c_acquire_i2c_sw_engine(pool, ddc);
-
-	if (!dce_i2c_sw) {
-		dce_i2c_hw = acquire_i2c_hw_engine(pool, ddc);
-
-		if (!dce_i2c_hw)
-			return false;
+	dce_i2c_hw = acquire_i2c_hw_engine(pool, ddc);
 
+	if (dce_i2c_hw)
 		return dce_i2c_submit_command_hw(pool, ddc, cmd, dce_i2c_hw);
-	}
 
-	return dce_i2c_submit_command_sw(pool, ddc, cmd, dce_i2c_sw);
+	dce_i2c_sw.ctx = ddc->ctx;
+	if (dce_i2c_engine_acquire_sw(&dce_i2c_sw, ddc)) {
+		return dce_i2c_submit_command_sw(pool, ddc, cmd, &dce_i2c_sw);
+	}
 
+	return false;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
index aad7b52165be..1cd4d8fc361f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
@@ -296,9 +296,7 @@ static bool setup_engine(
 	struct dce_i2c_hw *dce_i2c_hw)
 {
 	uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	uint32_t  reset_length = 0;
-#endif
 	/* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/
 	REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1);
 
@@ -322,14 +320,12 @@ static bool setup_engine(
 		REG_UPDATE_N(SETUP, 2,
 			     FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_TIME_LIMIT), i2c_setup_limit,
 			     FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_ENABLE), 1);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	} else {
 		reset_length = dce_i2c_hw->send_reset_length;
 		REG_UPDATE_N(SETUP, 3,
 			     FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_TIME_LIMIT), i2c_setup_limit,
 			     FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_SEND_RESET_LENGTH), reset_length,
 			     FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_ENABLE), 1);
-#endif
 	}
 	/* Program HW priority
 	 * set to High - interrupt software I2C at any time
@@ -705,7 +701,6 @@ void dcn1_i2c_hw_construct(
 	dce_i2c_hw->setup_limit = I2C_SETUP_TIME_LIMIT_DCN;
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 void dcn2_i2c_hw_construct(
 	struct dce_i2c_hw *dce_i2c_hw,
 	struct dc_context *ctx,
@@ -724,4 +719,3 @@ void dcn2_i2c_hw_construct(
 	if (ctx->dc->debug.scl_reset_length10)
 		dce_i2c_hw->send_reset_length = I2C_SEND_RESET_LENGTH_10;
 }
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
index cb0234e5d597..d4b2037f7d74 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
@@ -177,9 +177,7 @@ struct dce_i2c_shift {
 	uint8_t DC_I2C_INDEX;
 	uint8_t DC_I2C_INDEX_WRITE;
 	uint8_t XTAL_REF_DIV;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	uint8_t DC_I2C_DDC1_SEND_RESET_LENGTH;
-#endif
 	uint8_t DC_I2C_REG_RW_CNTL_STATUS;
 };
 
@@ -220,17 +218,13 @@ struct dce_i2c_mask {
 	uint32_t DC_I2C_INDEX;
 	uint32_t DC_I2C_INDEX_WRITE;
 	uint32_t XTAL_REF_DIV;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	uint32_t DC_I2C_DDC1_SEND_RESET_LENGTH;
-#endif
 	uint32_t DC_I2C_REG_RW_CNTL_STATUS;
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define I2C_COMMON_MASK_SH_LIST_DCN2(mask_sh)\
 	I2C_COMMON_MASK_SH_LIST_DCE110(mask_sh),\
 	I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_SEND_RESET_LENGTH, mask_sh)
-#endif
 
 struct dce_i2c_registers {
 	uint32_t SETUP;
@@ -312,7 +306,6 @@ void dcn1_i2c_hw_construct(
 	const struct dce_i2c_shift *shifts,
 	const struct dce_i2c_mask *masks);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 void dcn2_i2c_hw_construct(
 	struct dce_i2c_hw *dce_i2c_hw,
 	struct dc_context *ctx,
@@ -320,7 +313,6 @@ void dcn2_i2c_hw_construct(
 	const struct dce_i2c_registers *regs,
 	const struct dce_i2c_shift *shifts,
 	const struct dce_i2c_mask *masks);
-#endif
 
 bool dce_i2c_submit_command_hw(
 	struct resource_pool *pool,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
index a5a11c251e25..87d8428df6c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
@@ -73,31 +73,6 @@ static void release_engine_dce_sw(
 	dce_i2c_sw->ddc = NULL;
 }
 
-static bool get_hw_supported_ddc_line(
-	struct ddc *ddc,
-	enum gpio_ddc_line *line)
-{
-	enum gpio_ddc_line line_found;
-
-	*line = GPIO_DDC_LINE_UNKNOWN;
-
-	if (!ddc) {
-		BREAK_TO_DEBUGGER();
-		return false;
-	}
-
-	if (!ddc->hw_info.hw_supported)
-		return false;
-
-	line_found = dal_ddc_get_line(ddc);
-
-	if (line_found >= GPIO_DDC_LINE_COUNT)
-		return false;
-
-	*line = line_found;
-
-	return true;
-}
 static bool wait_for_scl_high_sw(
 	struct dc_context *ctx,
 	struct ddc *ddc,
@@ -524,21 +499,3 @@ bool dce_i2c_submit_command_sw(
 
 	return result;
 }
-struct dce_i2c_sw *dce_i2c_acquire_i2c_sw_engine(
-	struct resource_pool *pool,
-	struct ddc *ddc)
-{
-	enum gpio_ddc_line line;
-	struct dce_i2c_sw *engine = NULL;
-
-	if (get_hw_supported_ddc_line(ddc, &line))
-		engine = pool->sw_i2cs[line];
-
-	if (!engine)
-		return NULL;
-
-	if (!dce_i2c_engine_acquire_sw(engine, ddc))
-		return NULL;
-
-	return engine;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.h
index 5bbcdd455614..019fc47bb767 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.h
@@ -49,9 +49,9 @@ bool dce_i2c_submit_command_sw(
 	struct i2c_command *cmd,
 	struct dce_i2c_sw *dce_i2c_sw);
 
-struct dce_i2c_sw *dce_i2c_acquire_i2c_sw_engine(
-	struct resource_pool *pool,
-	struct ddc *ddc);
+bool dce_i2c_engine_acquire_sw(
+	struct dce_i2c_sw *dce_i2c_sw,
+	struct ddc *ddc_handle);
 
 #endif
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
index 31b698bf9cfc..8aa937f496c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
@@ -606,11 +606,11 @@ static void dce_mi_allocate_dmif(
 	}
 
 	if (dce_mi->wa.single_head_rdreq_dmif_limit) {
-		uint32_t eanble =  (total_stream_num > 1) ? 0 :
+		uint32_t enable =  (total_stream_num > 1) ? 0 :
 				dce_mi->wa.single_head_rdreq_dmif_limit;
 
 		REG_UPDATE(MC_HUB_RDREQ_DMIF_LIMIT,
-				ENABLE, eanble);
+				ENABLE, enable);
 	}
 }
 
@@ -636,11 +636,11 @@ static void dce_mi_free_dmif(
 			10, 3500);
 
 	if (dce_mi->wa.single_head_rdreq_dmif_limit) {
-		uint32_t eanble =  (total_stream_num > 1) ? 0 :
+		uint32_t enable =  (total_stream_num > 1) ? 0 :
 				dce_mi->wa.single_head_rdreq_dmif_limit;
 
 		REG_UPDATE(MC_HUB_RDREQ_DMIF_LIMIT,
-				ENABLE, eanble);
+				ENABLE, enable);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
index 6ed922a3c1cd..451574971b96 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
@@ -137,7 +137,7 @@ static void dce110_update_generic_info_packet(
 			AFMT_GENERIC0_UPDATE, (packet_index == 0),
 			AFMT_GENERIC2_UPDATE, (packet_index == 2));
 	}
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	if (REG(AFMT_VBI_PACKET_CONTROL1)) {
 		switch (packet_index) {
 		case 0:
@@ -231,7 +231,7 @@ static void dce110_update_hdmi_info_packet(
 				HDMI_GENERIC1_SEND, send,
 				HDMI_GENERIC1_LINE, line);
 		break;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	case 4:
 		if (REG(HDMI_GENERIC_PACKET_CONTROL2))
 			REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL2,
@@ -275,9 +275,10 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
 	struct stream_encoder *enc,
 	struct dc_crtc_timing *crtc_timing,
 	enum dc_color_space output_color_space,
+	bool use_vsc_sdp_for_colorimetry,
 	uint32_t enable_sdp_splitting)
 {
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	uint32_t h_active_start;
 	uint32_t v_active_start;
 	uint32_t misc0 = 0;
@@ -329,7 +330,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
 		if (enc110->se_mask->DP_VID_M_DOUBLE_VALUE_EN)
 			REG_UPDATE(DP_VID_TIMING, DP_VID_M_DOUBLE_VALUE_EN, 1);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 		if (enc110->se_mask->DP_VID_N_MUL)
 			REG_UPDATE(DP_VID_TIMING, DP_VID_N_MUL, 1);
 #endif
@@ -340,7 +341,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
 		break;
 	}
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	if (REG(DP_MSA_MISC))
 		misc1 = REG_READ(DP_MSA_MISC);
 #endif
@@ -374,7 +375,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
 	/* set dynamic range and YCbCr range */
 
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	switch (hw_crtc_timing.display_color_depth) {
 	case COLOR_DEPTH_666:
 		colorimetry_bpc = 0;
@@ -454,7 +455,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
 				DP_DYN_RANGE, dynamic_range_rgb,
 				DP_YCBCR_RANGE, dynamic_range_ycbcr);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 		if (REG(DP_MSA_COLORIMETRY))
 			REG_SET(DP_MSA_COLORIMETRY, 0, DP_MSA_MISC0, misc0);
 
@@ -489,7 +490,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
 				hw_crtc_timing.v_front_porch;
 
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 		/* start at begining of left border */
 		if (REG(DP_MSA_TIMING_PARAM2))
 			REG_SET_2(DP_MSA_TIMING_PARAM2, 0,
@@ -786,7 +787,7 @@ static void dce110_stream_encoder_update_hdmi_info_packets(
 		dce110_update_hdmi_info_packet(enc110, 3, &info_frame->hdrsmd);
 	}
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	if (enc110->se_mask->HDMI_DB_DISABLE) {
 		/* for bring up, disable dp double  TODO */
 		if (REG(HDMI_DB_CONTROL))
@@ -824,7 +825,7 @@ static void dce110_stream_encoder_stop_hdmi_info_packets(
 		HDMI_GENERIC1_LINE, 0,
 		HDMI_GENERIC1_SEND, 0);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	/* stop generic packets 2 & 3 on HDMI */
 	if (REG(HDMI_GENERIC_PACKET_CONTROL2))
 		REG_SET_6(HDMI_GENERIC_PACKET_CONTROL2, 0,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
new file mode 100644
index 000000000000..225955ec6d39
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dmub_psr.h"
+#include "dc.h"
+#include "dc_dmub_srv.h"
+#include "../../dmub/inc/dmub_srv.h"
+#include "dmub_fw_state.h"
+#include "core_types.h"
+#include "ipp.h"
+
+#define MAX_PIPES 6
+
+/**
+ * Get PSR state from firmware.
+ */
+static void dmub_get_psr_state(uint32_t *psr_state)
+{
+	// Not yet implemented
+	// Trigger GPINT interrupt from firmware
+}
+
+/**
+ * Enable/Disable PSR.
+ */
+static void dmub_set_psr_enable(struct dmub_psr *dmub, bool enable)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *dc = dmub->ctx;
+
+	cmd.psr_enable.header.type = DMUB_CMD__PSR;
+
+	if (enable)
+		cmd.psr_enable.header.sub_type = DMUB_CMD__PSR_ENABLE;
+	else
+		cmd.psr_enable.header.sub_type = DMUB_CMD__PSR_DISABLE;
+
+	cmd.psr_enable.header.payload_bytes = 0; // Send header only
+
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_enable.header);
+	dc_dmub_srv_cmd_execute(dc->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->dmub_srv);
+}
+
+/**
+ * Set PSR level.
+ */
+static void dmub_set_psr_level(struct dmub_psr *dmub, uint16_t psr_level)
+{
+	union dmub_rb_cmd cmd;
+	uint32_t psr_state = 0;
+	struct dc_context *dc = dmub->ctx;
+
+	dmub_get_psr_state(&psr_state);
+
+	if (psr_state == 0)
+		return;
+
+	cmd.psr_set_level.header.type = DMUB_CMD__PSR;
+	cmd.psr_set_level.header.sub_type = DMUB_CMD__PSR_SET_LEVEL;
+	cmd.psr_set_level.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_level_data);
+	cmd.psr_set_level.psr_set_level_data.psr_level = psr_level;
+
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_set_level.header);
+	dc_dmub_srv_cmd_execute(dc->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->dmub_srv);
+}
+
+/**
+ * Setup PSR by programming phy registers and sending psr hw context values to firmware.
+ */
+static bool dmub_setup_psr(struct dmub_psr *dmub,
+		struct dc_link *link,
+		struct psr_context *psr_context)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *dc = dmub->ctx;
+	struct dmub_cmd_psr_copy_settings_data *copy_settings_data
+		= &cmd.psr_copy_settings.psr_copy_settings_data;
+	struct pipe_ctx *pipe_ctx = NULL;
+	struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx;
+
+	for (int i = 0; i < MAX_PIPES; i++) {
+		if (res_ctx &&
+				res_ctx->pipe_ctx[i].stream &&
+				res_ctx->pipe_ctx[i].stream->link &&
+				res_ctx->pipe_ctx[i].stream->link == link &&
+				res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) {
+			pipe_ctx = &res_ctx->pipe_ctx[i];
+			break;
+		}
+	}
+
+	if (!pipe_ctx ||
+			!&pipe_ctx->plane_res ||
+			!&pipe_ctx->stream_res)
+		return false;
+
+	// Program DP DPHY fast training registers
+	link->link_enc->funcs->psr_program_dp_dphy_fast_training(link->link_enc,
+			psr_context->psrExitLinkTrainingRequired);
+
+	// Program DP_SEC_CNTL1 register to set transmission GPS0 line num and priority to high
+	link->link_enc->funcs->psr_program_secondary_packet(link->link_enc,
+			psr_context->sdpTransmitLineNumDeadline);
+
+	cmd.psr_copy_settings.header.type = DMUB_CMD__PSR;
+	cmd.psr_copy_settings.header.sub_type = DMUB_CMD__PSR_COPY_SETTINGS;
+	cmd.psr_copy_settings.header.payload_bytes = sizeof(struct dmub_cmd_psr_copy_settings_data);
+
+	// Hw insts
+	copy_settings_data->dpphy_inst				= psr_context->phyType;
+	copy_settings_data->aux_inst				= psr_context->channel;
+	copy_settings_data->digfe_inst				= psr_context->engineId;
+	copy_settings_data->digbe_inst				= psr_context->transmitterId;
+
+	copy_settings_data->mpcc_inst				= pipe_ctx->plane_res.mpcc_inst;
+
+	if (pipe_ctx->plane_res.hubp)
+		copy_settings_data->hubp_inst			= pipe_ctx->plane_res.hubp->inst;
+	else
+		copy_settings_data->hubp_inst			= 0;
+	if (pipe_ctx->plane_res.dpp)
+		copy_settings_data->dpp_inst			= pipe_ctx->plane_res.dpp->inst;
+	else
+		copy_settings_data->dpp_inst			= 0;
+	if (pipe_ctx->stream_res.opp)
+		copy_settings_data->opp_inst			= pipe_ctx->stream_res.opp->inst;
+	else
+		copy_settings_data->opp_inst			= 0;
+	if (pipe_ctx->stream_res.tg)
+		copy_settings_data->otg_inst			= pipe_ctx->stream_res.tg->inst;
+	else
+		copy_settings_data->otg_inst			= 0;
+
+	// Misc
+	copy_settings_data->psr_level				= psr_context->psr_level.u32all;
+	copy_settings_data->hyst_frames				= psr_context->timehyst_frames;
+	copy_settings_data->hyst_lines				= psr_context->hyst_lines;
+	copy_settings_data->phy_type				= psr_context->phyType;
+	copy_settings_data->aux_repeat				= psr_context->aux_repeats;
+	copy_settings_data->smu_optimizations_en	= psr_context->allow_smu_optimizations;
+	copy_settings_data->skip_wait_for_pll_lock	= psr_context->skipPsrWaitForPllLock;
+	copy_settings_data->frame_delay				= psr_context->frame_delay;
+	copy_settings_data->smu_phy_id				= psr_context->smuPhyId;
+	copy_settings_data->num_of_controllers		= psr_context->numberOfControllers;
+	copy_settings_data->frame_cap_ind			= psr_context->psrFrameCaptureIndicationReq;
+	copy_settings_data->phy_num					= psr_context->frame_delay & 0x7;
+	copy_settings_data->link_rate				= psr_context->frame_delay & 0xF;
+
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_copy_settings.header);
+	dc_dmub_srv_cmd_execute(dc->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->dmub_srv);
+
+	return true;
+}
+
+static const struct dmub_psr_funcs psr_funcs = {
+	.set_psr_enable			= dmub_set_psr_enable,
+	.setup_psr				= dmub_setup_psr,
+	.get_psr_state			= dmub_get_psr_state,
+	.set_psr_level			= dmub_set_psr_level,
+};
+
+/**
+ * Construct PSR object.
+ */
+static void dmub_psr_construct(struct dmub_psr *psr, struct dc_context *ctx)
+{
+	psr->ctx = ctx;
+	psr->funcs = &psr_funcs;
+}
+
+/**
+ * Allocate and initialize PSR object.
+ */
+struct dmub_psr *dmub_psr_create(struct dc_context *ctx)
+{
+	struct dmub_psr *psr = kzalloc(sizeof(struct dmub_psr), GFP_KERNEL);
+
+	if (psr == NULL) {
+		BREAK_TO_DEBUGGER();
+		return NULL;
+	}
+
+	dmub_psr_construct(psr, ctx);
+
+	return psr;
+}
+
+/**
+ * Deallocate PSR object.
+ */
+void dmub_psr_destroy(struct dmub_psr **dmub)
+{
+	kfree(dmub);
+	*dmub = NULL;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h
new file mode 100644
index 000000000000..229958de3035
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_PSR_H_
+#define _DMUB_PSR_H_
+
+#include "os_types.h"
+
+struct dmub_psr {
+	struct dc_context *ctx;
+	const struct dmub_psr_funcs *funcs;
+};
+
+struct dmub_psr_funcs {
+	void (*set_psr_enable)(struct dmub_psr *dmub, bool enable);
+	bool (*setup_psr)(struct dmub_psr *dmub, struct dc_link *link, struct psr_context *psr_context);
+	void (*get_psr_state)(uint32_t *psr_state);
+	void (*set_psr_level)(struct dmub_psr *dmub, uint16_t psr_level);
+};
+
+struct dmub_psr *dmub_psr_create(struct dc_context *ctx);
+void dmub_psr_destroy(struct dmub_psr **dmub);
+
+
+#endif /* _DCE_DMUB_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
index 799d36299c9b..753cb8edd996 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
@@ -26,7 +26,6 @@
 #include "dc.h"
 #include "core_types.h"
 #include "clk_mgr.h"
-#include "hw_sequencer.h"
 #include "dce100_hw_sequencer.h"
 #include "resource.h"
 
@@ -136,7 +135,7 @@ void dce100_hw_sequencer_construct(struct dc *dc)
 {
 	dce110_hw_sequencer_construct(dc);
 
-	dc->hwss.enable_display_power_gating = dce100_enable_display_power_gating;
+	dc->hwseq->funcs.enable_display_power_gating = dce100_enable_display_power_gating;
 	dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth;
 	dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h
index a6b80fdaa666..34518da20009 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h
@@ -27,6 +27,7 @@
 #define __DC_HWSS_DCE100_H__
 
 #include "core_types.h"
+#include "hw_sequencer_private.h"
 
 struct dc;
 struct dc_state;
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
index 76d54885374a..8f78bf9abbca 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
@@ -399,6 +399,37 @@ static const struct dc_plane_cap plane_cap = {
 #define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8
 #endif
 
+static int map_transmitter_id_to_phy_instance(
+	enum transmitter transmitter)
+{
+	switch (transmitter) {
+	case TRANSMITTER_UNIPHY_A:
+		return 0;
+	break;
+	case TRANSMITTER_UNIPHY_B:
+		return 1;
+	break;
+	case TRANSMITTER_UNIPHY_C:
+		return 2;
+	break;
+	case TRANSMITTER_UNIPHY_D:
+		return 3;
+	break;
+	case TRANSMITTER_UNIPHY_E:
+		return 4;
+	break;
+	case TRANSMITTER_UNIPHY_F:
+		return 5;
+	break;
+	case TRANSMITTER_UNIPHY_G:
+		return 6;
+	break;
+	default:
+		ASSERT(0);
+		return 0;
+	}
+}
+
 static void read_dce_straps(
 	struct dc_context *ctx,
 	struct resource_straps *straps)
@@ -506,6 +537,14 @@ static const struct dce_mem_input_mask mi_masks = {
 		.ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE_MASK
 };
 
+static const struct dce110_aux_registers_shift aux_shift = {
+	DCE10_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+	DCE10_AUX_MASK_SH_LIST(_MASK)
+};
+
 static struct mem_input *dce100_mem_input_create(
 	struct dc_context *ctx,
 	uint32_t inst)
@@ -571,14 +610,18 @@ struct link_encoder *dce100_link_encoder_create(
 {
 	struct dce110_link_encoder *enc110 =
 		kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
+	int link_regs_id;
 
 	if (!enc110)
 		return NULL;
 
+	link_regs_id =
+		map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
+
 	dce110_link_encoder_construct(enc110,
 				      enc_init_data,
 				      &link_enc_feature,
-				      &link_enc_regs[enc_init_data->transmitter],
+				      &link_enc_regs[link_regs_id],
 				      &link_enc_aux_regs[enc_init_data->channel - 1],
 				      &link_enc_hpd_regs[enc_init_data->hpd_source]);
 	return &enc110->base;
@@ -611,7 +654,10 @@ struct dce_aux *dce100_aux_engine_create(
 
 	dce110_aux_engine_construct(aux_engine, ctx, inst,
 				    SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
-				    &aux_engine_regs[inst]);
+				    &aux_engine_regs[inst],
+					&aux_mask,
+					&aux_shift,
+					ctx->dc->caps.extended_aux_timeout_support);
 
 	return &aux_engine->base;
 }
@@ -679,7 +725,7 @@ void dce100_clock_source_destroy(struct clock_source **clk_src)
 	*clk_src = NULL;
 }
 
-static void destruct(struct dce110_resource_pool *pool)
+static void dce100_resource_destruct(struct dce110_resource_pool *pool)
 {
 	unsigned int i;
 
@@ -839,7 +885,7 @@ static void dce100_destroy_resource_pool(struct resource_pool **pool)
 {
 	struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
 
-	destruct(dce110_pool);
+	dce100_resource_destruct(dce110_pool);
 	kfree(dce110_pool);
 	*pool = NULL;
 }
@@ -904,7 +950,7 @@ static const struct resource_funcs dce100_res_pool_funcs = {
 	.find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link
 };
 
-static bool construct(
+static bool dce100_resource_construct(
 	uint8_t num_virtual_links,
 	struct dc  *dc,
 	struct dce110_resource_pool *pool)
@@ -997,6 +1043,8 @@ static bool construct(
 	dc->caps.max_cursor_size = 128;
 	dc->caps.dual_link_dvi = true;
 	dc->caps.disable_dp_clk_share = true;
+	dc->caps.extended_aux_timeout_support = false;
+
 	for (i = 0; i < pool->base.pipe_count; i++) {
 		pool->base.timing_generators[i] =
 			dce100_timing_generator_create(
@@ -1074,7 +1122,7 @@ static bool construct(
 	return true;
 
 res_create_fail:
-	destruct(pool);
+	dce100_resource_destruct(pool);
 
 	return false;
 }
@@ -1089,7 +1137,7 @@ struct resource_pool *dce100_create_resource_pool(
 	if (!pool)
 		return NULL;
 
-	if (construct(num_virtual_links, dc, pool))
+	if (dce100_resource_construct(num_virtual_links, dc, pool))
 		return &pool->base;
 
 	kfree(pool);
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 01a924bf477a..5b689273ff44 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -61,6 +61,8 @@
 
 #include "atomfirmware.h"
 
+#define GAMMA_HW_POINTS_NUM 256
+
 /*
  * All values are in milliseconds;
  * For eDP, after power-up/power/down,
@@ -268,7 +270,7 @@ static void build_prescale_params(struct ipp_prescale_params *prescale_params,
 }
 
 static bool
-dce110_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
+dce110_set_input_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
 			       const struct dc_plane_state *plane_state)
 {
 	struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp;
@@ -596,7 +598,7 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
 }
 
 static bool
-dce110_set_output_transfer_func(struct pipe_ctx *pipe_ctx,
+dce110_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
 				const struct dc_stream_state *stream)
 {
 	struct transform *xfm = pipe_ctx->plane_res.xfm;
@@ -651,10 +653,9 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
 {
 	enum dc_lane_count lane_count =
 		pipe_ctx->stream->link->cur_link_settings.lane_count;
-
 	struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
 	struct dc_link *link = pipe_ctx->stream->link;
-
+	const struct dc *dc = link->dc;
 
 	uint32_t active_total_with_borders;
 	uint32_t early_control = 0;
@@ -667,7 +668,7 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
 	link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
 						    pipe_ctx->stream_res.stream_enc->id, true);
 
-	link->dc->hwss.update_info_frame(pipe_ctx);
+	dc->hwss.update_info_frame(pipe_ctx);
 
 	/* enable early control to avoid corruption on DP monitor*/
 	active_total_with_borders =
@@ -943,27 +944,23 @@ void dce110_edp_backlight_control(
 void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
 {
 	/* notify audio driver for audio modes of monitor */
-	struct dc *core_dc;
-	struct pp_smu_funcs *pp_smu = NULL;
+	struct dc *dc;
 	struct clk_mgr *clk_mgr;
 	unsigned int i, num_audio = 1;
 
 	if (!pipe_ctx->stream)
 		return;
 
-	core_dc = pipe_ctx->stream->ctx->dc;
-	clk_mgr = core_dc->clk_mgr;
+	dc = pipe_ctx->stream->ctx->dc;
+	clk_mgr = dc->clk_mgr;
 
 	if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == true)
 		return;
 
-	if (core_dc->res_pool->pp_smu)
-		pp_smu = core_dc->res_pool->pp_smu;
-
 	if (pipe_ctx->stream_res.audio) {
 		for (i = 0; i < MAX_PIPES; i++) {
 			/*current_state not updated yet*/
-			if (core_dc->current_state->res_ctx.pipe_ctx[i].stream_res.audio != NULL)
+			if (dc->current_state->res_ctx.pipe_ctx[i].stream_res.audio != NULL)
 				num_audio++;
 		}
 
@@ -984,7 +981,6 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
 void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx)
 {
 	struct dc *dc;
-	struct pp_smu_funcs *pp_smu = NULL;
 	struct clk_mgr *clk_mgr;
 
 	if (!pipe_ctx || !pipe_ctx->stream)
@@ -1001,9 +997,6 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx)
 	if (pipe_ctx->stream_res.audio) {
 		pipe_ctx->stream_res.audio->enabled = false;
 
-		if (dc->res_pool->pp_smu)
-			pp_smu = dc->res_pool->pp_smu;
-
 		if (dc_is_dp_signal(pipe_ctx->stream->signal))
 			pipe_ctx->stream_res.stream_enc->funcs->dp_audio_disable(
 					pipe_ctx->stream_res.stream_enc);
@@ -1055,6 +1048,7 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx,
 	struct encoder_unblank_param params = { { 0 } };
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct dc_link *link = stream->link;
+	struct dce_hwseq *hws = link->dc->hwseq;
 
 	/* only 3 items below are used by unblank */
 	params.timing = pipe_ctx->stream->timing;
@@ -1064,7 +1058,7 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx,
 		pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(pipe_ctx->stream_res.stream_enc, &params);
 
 	if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
-		link->dc->hwss.edp_backlight_control(link, true);
+		hws->funcs.edp_backlight_control(link, true);
 	}
 }
 
@@ -1072,9 +1066,10 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct dc_link *link = stream->link;
+	struct dce_hwseq *hws = link->dc->hwseq;
 
 	if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
-		link->dc->hwss.edp_backlight_control(link, false);
+		hws->funcs.edp_backlight_control(link, false);
 		dc_link_set_abm_disable(link);
 	}
 
@@ -1169,8 +1164,9 @@ static void build_audio_output(
 		}
 	}
 
-	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
-			pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+	if (state->clk_mgr &&
+		(pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+			pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) {
 		audio_output->pll_info.dp_dto_source_clock_in_khz =
 				state->clk_mgr->funcs->get_dp_ref_clk_frequency(
 						state->clk_mgr);
@@ -1230,7 +1226,7 @@ static void program_scaler(const struct dc *dc,
 {
 	struct tg_color color = {0};
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	/* TOFPGA */
 	if (pipe_ctx->plane_res.xfm->funcs->transform_set_pixel_storage_depth == NULL)
 		return;
@@ -1329,12 +1325,11 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct drr_params params = {0};
 	unsigned int event_triggers = 0;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	struct pipe_ctx *odm_pipe = pipe_ctx->next_odm_pipe;
-#endif
+	struct dce_hwseq *hws = dc->hwseq;
 
-	if (dc->hwss.disable_stream_gating) {
-		dc->hwss.disable_stream_gating(dc, pipe_ctx);
+	if (hws->funcs.disable_stream_gating) {
+		hws->funcs.disable_stream_gating(dc, pipe_ctx);
 	}
 
 	if (pipe_ctx->stream_res.audio != NULL) {
@@ -1364,10 +1359,10 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 	/*  */
 	/* Do not touch stream timing on seamless boot optimization. */
 	if (!pipe_ctx->stream->apply_seamless_boot_optimization)
-		dc->hwss.enable_stream_timing(pipe_ctx, context, dc);
+		hws->funcs.enable_stream_timing(pipe_ctx, context, dc);
 
-	if (dc->hwss.setup_vupdate_interrupt)
-		dc->hwss.setup_vupdate_interrupt(pipe_ctx);
+	if (hws->funcs.setup_vupdate_interrupt)
+		hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
 
 	params.vertical_total_min = stream->adjust.v_total_min;
 	params.vertical_total_max = stream->adjust.v_total_max;
@@ -1378,9 +1373,13 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 	// DRR should set trigger event to monitor surface update event
 	if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0)
 		event_triggers = 0x80;
+	/* Event triggers and num frames initialized for DRR, but can be
+	 * later updated for PSR use. Note DRR trigger events are generated
+	 * regardless of whether num frames met.
+	 */
 	if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control)
 		pipe_ctx->stream_res.tg->funcs->set_static_screen_control(
-				pipe_ctx->stream_res.tg, event_triggers);
+				pipe_ctx->stream_res.tg, event_triggers, 2);
 
 	if (!dc_is_virtual_signal(pipe_ctx->stream->signal))
 		pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg(
@@ -1397,7 +1396,6 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 		pipe_ctx->stream_res.opp,
 		&stream->bit_depth_params,
 		&stream->clamping);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	while (odm_pipe) {
 		odm_pipe->stream_res.opp->funcs->opp_set_dyn_expansion(
 				odm_pipe->stream_res.opp,
@@ -1411,14 +1409,13 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 				&stream->clamping);
 		odm_pipe = odm_pipe->next_odm_pipe;
 	}
-#endif
 
 	if (!stream->dpms_off)
 		core_link_enable_stream(context, pipe_ctx);
 
 	pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != 0;
 
-	pipe_ctx->stream->link->psr_enabled = false;
+	pipe_ctx->stream->link->psr_feature_enabled = false;
 
 	return DC_OK;
 }
@@ -1428,8 +1425,6 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 static void power_down_encoders(struct dc *dc)
 {
 	int i;
-	enum connector_id connector_id;
-	enum signal_type signal = SIGNAL_TYPE_NONE;
 
 	/* do not know BIOS back-front mapping, simply blank all. It will not
 	 * hurt for non-DP
@@ -1440,15 +1435,15 @@ static void power_down_encoders(struct dc *dc)
 	}
 
 	for (i = 0; i < dc->link_count; i++) {
-		connector_id = dal_graphics_object_id_get_connector_id(dc->links[i]->link_id);
-		if ((connector_id == CONNECTOR_ID_DISPLAY_PORT) ||
-			(connector_id == CONNECTOR_ID_EDP)) {
+		enum signal_type signal = dc->links[i]->connector_signal;
 
+		if ((signal == SIGNAL_TYPE_EDP) ||
+			(signal == SIGNAL_TYPE_DISPLAY_PORT))
 			if (!dc->links[i]->wa_flags.dp_keep_receiver_powered)
 				dp_receiver_power_ctrl(dc->links[i], false);
-			if (connector_id == CONNECTOR_ID_EDP)
-				signal = SIGNAL_TYPE_EDP;
-		}
+
+		if (signal != SIGNAL_TYPE_EDP)
+			signal = SIGNAL_TYPE_NONE;
 
 		dc->links[i]->link_enc->funcs->disable_output(
 				dc->links[i]->link_enc, signal);
@@ -1529,18 +1524,6 @@ static struct dc_stream_state *get_edp_stream(struct dc_state *context)
 	return NULL;
 }
 
-static struct dc_link *get_edp_link(struct dc *dc)
-{
-	int i;
-
-	// report any eDP links, even unconnected DDI's
-	for (i = 0; i < dc->link_count; i++) {
-		if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP)
-			return dc->links[i];
-	}
-	return NULL;
-}
-
 static struct dc_link *get_edp_link_with_sink(
 		struct dc *dc,
 		struct dc_state *context)
@@ -1576,9 +1559,10 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
 	bool can_apply_edp_fast_boot = false;
 	bool can_apply_seamless_boot = false;
 	bool keep_edp_vdd_on = false;
+	struct dce_hwseq *hws = dc->hwseq;
 
-	if (dc->hwss.init_pipes)
-		dc->hwss.init_pipes(dc, context);
+	if (hws->funcs.init_pipes)
+		hws->funcs.init_pipes(dc, context);
 
 	edp_stream = get_edp_stream(context);
 
@@ -1615,7 +1599,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
 	if (!can_apply_edp_fast_boot && !can_apply_seamless_boot) {
 		if (edp_link_with_sink && !keep_edp_vdd_on) {
 			/*turn off backlight before DP_blank and encoder powered down*/
-			dc->hwss.edp_backlight_control(edp_link_with_sink, false);
+			hws->funcs.edp_backlight_control(edp_link_with_sink, false);
 		}
 		/*resume from S3, no vbios posting, no need to power down again*/
 		power_down_all_hw_blocks(dc);
@@ -1726,6 +1710,8 @@ static void set_drr(struct pipe_ctx **pipe_ctx,
 	struct drr_params params = {0};
 	// DRR should set trigger event to monitor surface update event
 	unsigned int event_triggers = 0x80;
+	// Note DRR trigger events are generated regardless of whether num frames met.
+	unsigned int num_frames = 2;
 
 	params.vertical_total_max = vmax;
 	params.vertical_total_min = vmin;
@@ -1741,7 +1727,7 @@ static void set_drr(struct pipe_ctx **pipe_ctx,
 		if (vmax != 0 && vmin != 0)
 			pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
 					pipe_ctx[i]->stream_res.tg,
-					event_triggers);
+					event_triggers, num_frames);
 	}
 }
 
@@ -1758,30 +1744,31 @@ static void get_position(struct pipe_ctx **pipe_ctx,
 }
 
 static void set_static_screen_control(struct pipe_ctx **pipe_ctx,
-		int num_pipes, const struct dc_static_screen_events *events)
+		int num_pipes, const struct dc_static_screen_params *params)
 {
 	unsigned int i;
-	unsigned int value = 0;
+	unsigned int triggers = 0;
 
-	if (events->overlay_update)
-		value |= 0x100;
-	if (events->surface_update)
-		value |= 0x80;
-	if (events->cursor_update)
-		value |= 0x2;
-	if (events->force_trigger)
-		value |= 0x1;
+	if (params->triggers.overlay_update)
+		triggers |= 0x100;
+	if (params->triggers.surface_update)
+		triggers |= 0x80;
+	if (params->triggers.cursor_update)
+		triggers |= 0x2;
+	if (params->triggers.force_trigger)
+		triggers |= 0x1;
 
 	if (num_pipes) {
 		struct dc *dc = pipe_ctx[0]->stream->ctx->dc;
 
 		if (dc->fbc_compressor)
-			value |= 0x84;
+			triggers |= 0x84;
 	}
 
 	for (i = 0; i < num_pipes; i++)
 		pipe_ctx[i]->stream_res.tg->funcs->
-			set_static_screen_control(pipe_ctx[i]->stream_res.tg, value);
+			set_static_screen_control(pipe_ctx[i]->stream_res.tg,
+					triggers, params->num_frames);
 }
 
 /*
@@ -1834,7 +1821,7 @@ static bool should_enable_fbc(struct dc *dc,
 		return false;
 
 	/* PSR should not be enabled */
-	if (pipe_ctx->stream->link->psr_enabled)
+	if (pipe_ctx->stream->link->psr_feature_enabled)
 		return false;
 
 	/* Nothing to compress */
@@ -2030,13 +2017,14 @@ enum dc_status dce110_apply_ctx_to_hw(
 		struct dc *dc,
 		struct dc_state *context)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 	enum dc_status status;
 	int i;
 
 	/* Reset old context */
 	/* look up the targets that have been removed since last commit */
-	dc->hwss.reset_hw_ctx_wrap(dc, context);
+	hws->funcs.reset_hw_ctx_wrap(dc, context);
 
 	/* Skip applying if no targets */
 	if (context->stream_count <= 0)
@@ -2061,7 +2049,7 @@ enum dc_status dce110_apply_ctx_to_hw(
 			continue;
 		}
 
-		dc->hwss.enable_display_power_gating(
+		hws->funcs.enable_display_power_gating(
 				dc, i, dc->ctx->dc_bios,
 				PIPE_GATING_CONTROL_DISABLE);
 	}
@@ -2370,19 +2358,20 @@ static void init_hw(struct dc *dc)
 	struct transform *xfm;
 	struct abm *abm;
 	struct dmcu *dmcu;
+	struct dce_hwseq *hws = dc->hwseq;
 
 	bp = dc->ctx->dc_bios;
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		xfm = dc->res_pool->transforms[i];
 		xfm->funcs->transform_reset(xfm);
 
-		dc->hwss.enable_display_power_gating(
+		hws->funcs.enable_display_power_gating(
 				dc, i, bp,
 				PIPE_GATING_CONTROL_INIT);
-		dc->hwss.enable_display_power_gating(
+		hws->funcs.enable_display_power_gating(
 				dc, i, bp,
 				PIPE_GATING_CONTROL_DISABLE);
-		dc->hwss.enable_display_pipe_clock_gating(
+		hws->funcs.enable_display_pipe_clock_gating(
 			dc->ctx,
 			true);
 	}
@@ -2464,17 +2453,15 @@ static void dce110_program_front_end_for_pipe(
 		struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
 	struct mem_input *mi = pipe_ctx->plane_res.mi;
-	struct pipe_ctx *old_pipe = NULL;
 	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
 	struct xfm_grph_csc_adjustment adjust;
 	struct out_csc_color_matrix tbl_entry;
 	unsigned int i;
+	struct dce_hwseq *hws = dc->hwseq;
+
 	DC_LOGGER_INIT();
 	memset(&tbl_entry, 0, sizeof(tbl_entry));
 
-	if (dc->current_state)
-		old_pipe = &dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx];
-
 	memset(&adjust, 0, sizeof(adjust));
 	adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
 
@@ -2530,10 +2517,10 @@ static void dce110_program_front_end_for_pipe(
 	if (pipe_ctx->plane_state->update_flags.bits.full_update ||
 			pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
 			pipe_ctx->plane_state->update_flags.bits.gamma_change)
-		dc->hwss.set_input_transfer_func(pipe_ctx, pipe_ctx->plane_state);
+		hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
 
 	if (pipe_ctx->plane_state->update_flags.bits.full_update)
-		dc->hwss.set_output_transfer_func(pipe_ctx, pipe_ctx->stream);
+		hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
 
 	DC_LOG_SURFACE(
 			"Pipe:%d %p: addr hi:0x%x, "
@@ -2636,6 +2623,7 @@ static void dce110_apply_ctx_for_surface(
 
 static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	int fe_idx = pipe_ctx->plane_res.mi ?
 		pipe_ctx->plane_res.mi->inst : pipe_ctx->pipe_idx;
 
@@ -2643,7 +2631,7 @@ static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	if (dc->current_state->res_ctx.pipe_ctx[fe_idx].stream)
 		return;
 
-	dc->hwss.enable_display_power_gating(
+	hws->funcs.enable_display_power_gating(
 		dc, fe_idx, dc->ctx->dc_bios, PIPE_GATING_CONTROL_ENABLE);
 
 	dc->res_pool->transforms[fe_idx]->funcs->transform_reset(
@@ -2732,14 +2720,10 @@ static const struct hw_sequencer_funcs dce110_funcs = {
 	.program_gamut_remap = program_gamut_remap,
 	.program_output_csc = program_output_csc,
 	.init_hw = init_hw,
-	.init_pipes = init_pipes,
 	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
 	.apply_ctx_for_surface = dce110_apply_ctx_for_surface,
 	.update_plane_addr = update_plane_addr,
 	.update_pending_status = dce110_update_pending_status,
-	.set_input_transfer_func = dce110_set_input_transfer_func,
-	.set_output_transfer_func = dce110_set_output_transfer_func,
-	.power_down = dce110_power_down,
 	.enable_accelerated_mode = dce110_enable_accelerated_mode,
 	.enable_timing_synchronization = dce110_enable_timing_synchronization,
 	.enable_per_frame_crtc_position_reset = dce110_enable_per_frame_crtc_position_reset,
@@ -2750,8 +2734,6 @@ static const struct hw_sequencer_funcs dce110_funcs = {
 	.blank_stream = dce110_blank_stream,
 	.enable_audio_stream = dce110_enable_audio_stream,
 	.disable_audio_stream = dce110_disable_audio_stream,
-	.enable_display_pipe_clock_gating = enable_display_pipe_clock_gating,
-	.enable_display_power_gating = dce110_enable_display_power_gating,
 	.disable_plane = dce110_power_down_fe,
 	.pipe_control_lock = dce_pipe_control_lock,
 	.prepare_bandwidth = dce110_prepare_bandwidth,
@@ -2759,22 +2741,33 @@ static const struct hw_sequencer_funcs dce110_funcs = {
 	.set_drr = set_drr,
 	.get_position = get_position,
 	.set_static_screen_control = set_static_screen_control,
-	.reset_hw_ctx_wrap = dce110_reset_hw_ctx_wrap,
-	.enable_stream_timing = dce110_enable_stream_timing,
-	.disable_stream_gating = NULL,
-	.enable_stream_gating = NULL,
 	.setup_stereo = NULL,
 	.set_avmute = dce110_set_avmute,
 	.wait_for_mpcc_disconnect = dce110_wait_for_mpcc_disconnect,
-	.edp_backlight_control = dce110_edp_backlight_control,
 	.edp_power_control = dce110_edp_power_control,
 	.edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
 	.set_cursor_position = dce110_set_cursor_position,
 	.set_cursor_attribute = dce110_set_cursor_attribute
 };
 
+static const struct hwseq_private_funcs dce110_private_funcs = {
+	.init_pipes = init_pipes,
+	.update_plane_addr = update_plane_addr,
+	.set_input_transfer_func = dce110_set_input_transfer_func,
+	.set_output_transfer_func = dce110_set_output_transfer_func,
+	.power_down = dce110_power_down,
+	.enable_display_pipe_clock_gating = enable_display_pipe_clock_gating,
+	.enable_display_power_gating = dce110_enable_display_power_gating,
+	.reset_hw_ctx_wrap = dce110_reset_hw_ctx_wrap,
+	.enable_stream_timing = dce110_enable_stream_timing,
+	.disable_stream_gating = NULL,
+	.enable_stream_gating = NULL,
+	.edp_backlight_control = dce110_edp_backlight_control,
+};
+
 void dce110_hw_sequencer_construct(struct dc *dc)
 {
 	dc->hwss = dce110_funcs;
+	dc->hwseq->funcs = dce110_private_funcs;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
index 2f9b7dbdf415..26a9c14a58b1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
@@ -27,8 +27,8 @@
 #define __DC_HWSS_DCE110_H__
 
 #include "core_types.h"
+#include "hw_sequencer_private.h"
 
-#define GAMMA_HW_POINTS_NUM 256
 struct dc;
 struct dc_state;
 struct dm_pp_display_configuration;
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index 89620adc81d8..bf14e9ab040c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -275,6 +275,14 @@ static const struct dce_stream_encoder_mask se_mask = {
 		SE_COMMON_MASK_SH_LIST_DCE110(_MASK)
 };
 
+static const struct dce110_aux_registers_shift aux_shift = {
+	DCE_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+	DCE_AUX_MASK_SH_LIST(_MASK)
+};
+
 #define opp_regs(id)\
 [id] = {\
 	OPP_DCE_110_REG_LIST(id),\
@@ -440,6 +448,37 @@ static const struct dc_plane_cap underlay_plane_cap = {
 #define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8
 #endif
 
+static int map_transmitter_id_to_phy_instance(
+	enum transmitter transmitter)
+{
+	switch (transmitter) {
+	case TRANSMITTER_UNIPHY_A:
+		return 0;
+	break;
+	case TRANSMITTER_UNIPHY_B:
+		return 1;
+	break;
+	case TRANSMITTER_UNIPHY_C:
+		return 2;
+	break;
+	case TRANSMITTER_UNIPHY_D:
+		return 3;
+	break;
+	case TRANSMITTER_UNIPHY_E:
+		return 4;
+	break;
+	case TRANSMITTER_UNIPHY_F:
+		return 5;
+	break;
+	case TRANSMITTER_UNIPHY_G:
+		return 6;
+	break;
+	default:
+		ASSERT(0);
+		return 0;
+	}
+}
+
 static void read_dce_straps(
 	struct dc_context *ctx,
 	struct resource_straps *straps)
@@ -617,14 +656,18 @@ static struct link_encoder *dce110_link_encoder_create(
 {
 	struct dce110_link_encoder *enc110 =
 		kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
+	int link_regs_id;
 
 	if (!enc110)
 		return NULL;
 
+	link_regs_id =
+		map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
+
 	dce110_link_encoder_construct(enc110,
 				      enc_init_data,
 				      &link_enc_feature,
-				      &link_enc_regs[enc_init_data->transmitter],
+				      &link_enc_regs[link_regs_id],
 				      &link_enc_aux_regs[enc_init_data->channel - 1],
 				      &link_enc_hpd_regs[enc_init_data->hpd_source]);
 	return &enc110->base;
@@ -657,7 +700,10 @@ struct dce_aux *dce110_aux_engine_create(
 
 	dce110_aux_engine_construct(aux_engine, ctx, inst,
 				    SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
-				    &aux_engine_regs[inst]);
+				    &aux_engine_regs[inst],
+					&aux_mask,
+					&aux_shift,
+					ctx->dc->caps.extended_aux_timeout_support);
 
 	return &aux_engine->base;
 }
@@ -736,7 +782,7 @@ void dce110_clock_source_destroy(struct clock_source **clk_src)
 	*clk_src = NULL;
 }
 
-static void destruct(struct dce110_resource_pool *pool)
+static void dce110_resource_destruct(struct dce110_resource_pool *pool)
 {
 	unsigned int i;
 
@@ -1051,6 +1097,7 @@ static struct pipe_ctx *dce110_acquire_underlay(
 		struct dc_stream_state *stream)
 {
 	struct dc *dc = stream->ctx->dc;
+	struct dce_hwseq *hws = dc->hwseq;
 	struct resource_context *res_ctx = &context->res_ctx;
 	unsigned int underlay_idx = pool->underlay_pipe_index;
 	struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[underlay_idx];
@@ -1071,7 +1118,7 @@ static struct pipe_ctx *dce110_acquire_underlay(
 		struct tg_color black_color = {0};
 		struct dc_bios *dcb = dc->ctx->dc_bios;
 
-		dc->hwss.enable_display_power_gating(
+		hws->funcs.enable_display_power_gating(
 				dc,
 				pipe_ctx->stream_res.tg->inst,
 				dcb, PIPE_GATING_CONTROL_DISABLE);
@@ -1115,7 +1162,7 @@ static void dce110_destroy_resource_pool(struct resource_pool **pool)
 {
 	struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
 
-	destruct(dce110_pool);
+	dce110_resource_destruct(dce110_pool);
 	kfree(dce110_pool);
 	*pool = NULL;
 }
@@ -1267,7 +1314,7 @@ const struct resource_caps *dce110_resource_cap(
 		return &carrizo_resource_cap;
 }
 
-static bool construct(
+static bool dce110_resource_construct(
 	uint8_t num_virtual_links,
 	struct dc *dc,
 	struct dce110_resource_pool *pool,
@@ -1293,6 +1340,7 @@ static bool construct(
 	dc->caps.i2c_speed_in_khz = 100;
 	dc->caps.max_cursor_size = 128;
 	dc->caps.is_apu = true;
+	dc->caps.extended_aux_timeout_support = false;
 
 	/*************************************************
 	 *  Create resources                             *
@@ -1445,7 +1493,7 @@ static bool construct(
 	return true;
 
 res_create_fail:
-	destruct(pool);
+	dce110_resource_destruct(pool);
 	return false;
 }
 
@@ -1460,7 +1508,7 @@ struct resource_pool *dce110_create_resource_pool(
 	if (!pool)
 		return NULL;
 
-	if (construct(num_virtual_links, dc, pool, asic_id))
+	if (dce110_resource_construct(num_virtual_links, dc, pool, asic_id))
 		return &pool->base;
 
 	kfree(pool);
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
index 5f7c2c5641c4..1ea7db8eeb98 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
@@ -469,22 +469,27 @@ void dce110_timing_generator_set_drr(
 
 void dce110_timing_generator_set_static_screen_control(
 	struct timing_generator *tg,
-	uint32_t value)
+	uint32_t event_triggers,
+	uint32_t num_frames)
 {
 	struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
 	uint32_t static_screen_cntl = 0;
 	uint32_t addr = 0;
 
+	// By register spec, it only takes 8 bit value
+	if (num_frames > 0xFF)
+		num_frames = 0xFF;
+
 	addr = CRTC_REG(mmCRTC_STATIC_SCREEN_CONTROL);
 	static_screen_cntl = dm_read_reg(tg->ctx, addr);
 
 	set_reg_field_value(static_screen_cntl,
-				value,
+				event_triggers,
 				CRTC_STATIC_SCREEN_CONTROL,
 				CRTC_STATIC_SCREEN_EVENT_MASK);
 
 	set_reg_field_value(static_screen_cntl,
-				2,
+				num_frames,
 				CRTC_STATIC_SCREEN_CONTROL,
 				CRTC_STATIC_SCREEN_FRAME_COUNT);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
index 768ccf27ada9..d8a5ed7b485d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
@@ -231,7 +231,8 @@ void dce110_timing_generator_set_drr(
 
 void dce110_timing_generator_set_static_screen_control(
 	struct timing_generator *tg,
-	uint32_t value);
+	uint32_t event_triggers,
+	uint32_t num_frames);
 
 void dce110_timing_generator_get_crtc_scanoutpos(
 	struct timing_generator *tg,
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c
index 1e4a7c13f0ed..19873ee1f78d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c
@@ -158,6 +158,6 @@ void dce112_hw_sequencer_construct(struct dc *dc)
 	 * structure
 	 */
 	dce110_hw_sequencer_construct(dc);
-	dc->hwss.enable_display_power_gating = dce112_enable_display_power_gating;
+	dc->hwseq->funcs.enable_display_power_gating = dce112_enable_display_power_gating;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.h
index e646f4a37fa2..943f1b2c5b2f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.h
@@ -27,6 +27,7 @@
 #define __DC_HWSS_DCE112_H__
 
 #include "core_types.h"
+#include "hw_sequencer_private.h"
 
 struct dc;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
index 21a657e79306..700ad8b3e54b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
@@ -172,6 +172,14 @@ static const struct dce_abm_mask abm_mask = {
 		ABM_MASK_SH_LIST_DCE110(_MASK)
 };
 
+static const struct dce110_aux_registers_shift aux_shift = {
+	DCE_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+	DCE_AUX_MASK_SH_LIST(_MASK)
+};
+
 #define ipp_regs(id)\
 [id] = {\
 		IPP_DCE110_REG_LIST_DCE_BASE(id)\
@@ -417,6 +425,37 @@ static const struct dc_plane_cap plane_cap = {
 #define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8
 #endif
 
+static int map_transmitter_id_to_phy_instance(
+	enum transmitter transmitter)
+{
+	switch (transmitter) {
+	case TRANSMITTER_UNIPHY_A:
+		return 0;
+	break;
+	case TRANSMITTER_UNIPHY_B:
+		return 1;
+	break;
+	case TRANSMITTER_UNIPHY_C:
+		return 2;
+	break;
+	case TRANSMITTER_UNIPHY_D:
+		return 3;
+	break;
+	case TRANSMITTER_UNIPHY_E:
+		return 4;
+	break;
+	case TRANSMITTER_UNIPHY_F:
+		return 5;
+	break;
+	case TRANSMITTER_UNIPHY_G:
+		return 6;
+	break;
+	default:
+		ASSERT(0);
+		return 0;
+	}
+}
+
 static void read_dce_straps(
 	struct dc_context *ctx,
 	struct resource_straps *straps)
@@ -575,14 +614,18 @@ struct link_encoder *dce112_link_encoder_create(
 {
 	struct dce110_link_encoder *enc110 =
 		kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
+	int link_regs_id;
 
 	if (!enc110)
 		return NULL;
 
+	link_regs_id =
+		map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
+
 	dce110_link_encoder_construct(enc110,
 				      enc_init_data,
 				      &link_enc_feature,
-				      &link_enc_regs[enc_init_data->transmitter],
+				      &link_enc_regs[link_regs_id],
 				      &link_enc_aux_regs[enc_init_data->channel - 1],
 				      &link_enc_hpd_regs[enc_init_data->hpd_source]);
 	return &enc110->base;
@@ -630,7 +673,10 @@ struct dce_aux *dce112_aux_engine_create(
 
 	dce110_aux_engine_construct(aux_engine, ctx, inst,
 				    SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
-				    &aux_engine_regs[inst]);
+				    &aux_engine_regs[inst],
+					&aux_mask,
+					&aux_shift,
+					ctx->dc->caps.extended_aux_timeout_support);
 
 	return &aux_engine->base;
 }
@@ -698,7 +744,7 @@ void dce112_clock_source_destroy(struct clock_source **clk_src)
 	*clk_src = NULL;
 }
 
-static void destruct(struct dce110_resource_pool *pool)
+static void dce112_resource_destruct(struct dce110_resource_pool *pool)
 {
 	unsigned int i;
 
@@ -967,7 +1013,7 @@ static void dce112_destroy_resource_pool(struct resource_pool **pool)
 {
 	struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
 
-	destruct(dce110_pool);
+	dce112_resource_destruct(dce110_pool);
 	kfree(dce110_pool);
 	*pool = NULL;
 }
@@ -1140,7 +1186,7 @@ const struct resource_caps *dce112_resource_cap(
 		return &polaris_10_resource_cap;
 }
 
-static bool construct(
+static bool dce112_resource_construct(
 	uint8_t num_virtual_links,
 	struct dc *dc,
 	struct dce110_resource_pool *pool)
@@ -1163,7 +1209,7 @@ static bool construct(
 	dc->caps.i2c_speed_in_khz = 100;
 	dc->caps.max_cursor_size = 128;
 	dc->caps.dual_link_dvi = true;
-
+	dc->caps.extended_aux_timeout_support = false;
 
 	/*************************************************
 	 *  Create resources                             *
@@ -1326,7 +1372,7 @@ static bool construct(
 	return true;
 
 res_create_fail:
-	destruct(pool);
+	dce112_resource_destruct(pool);
 	return false;
 }
 
@@ -1340,7 +1386,7 @@ struct resource_pool *dce112_create_resource_pool(
 	if (!pool)
 		return NULL;
 
-	if (construct(num_virtual_links, dc, pool))
+	if (dce112_resource_construct(num_virtual_links, dc, pool))
 		return &pool->base;
 
 	kfree(pool);
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c
index 1ca30928025e..66a13aa39c95 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c
@@ -265,7 +265,7 @@ void dce120_hw_sequencer_construct(struct dc *dc)
 	 * structure
 	 */
 	dce110_hw_sequencer_construct(dc);
-	dc->hwss.enable_display_power_gating = dce120_enable_display_power_gating;
+	dc->hwseq->funcs.enable_display_power_gating = dce120_enable_display_power_gating;
 	dc->hwss.update_dchub = dce120_update_dchub;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h
index c51afbd0b012..bc024534732f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h
@@ -27,6 +27,7 @@
 #define __DC_HWSS_DCE120_H__
 
 #include "core_types.h"
+#include "hw_sequencer_private.h"
 
 struct dc;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
index 7c52f7f9196c..53ab88ef71f5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
@@ -63,8 +63,8 @@
 #include "soc15_hw_ip.h"
 #include "vega10_ip_offset.h"
 #include "nbio/nbio_6_1_offset.h"
-#include "mmhub/mmhub_9_4_0_offset.h"
-#include "mmhub/mmhub_9_4_0_sh_mask.h"
+#include "mmhub/mmhub_1_0_offset.h"
+#include "mmhub/mmhub_1_0_sh_mask.h"
 #include "reg_helper.h"
 
 #include "dce100/dce100_resource.h"
@@ -293,6 +293,14 @@ static const struct dce_stream_encoder_mask se_mask = {
 		SE_COMMON_MASK_SH_LIST_DCE120(_MASK)
 };
 
+static const struct dce110_aux_registers_shift aux_shift = {
+	DCE12_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+	DCE12_AUX_MASK_SH_LIST(_MASK)
+};
+
 #define opp_regs(id)\
 [id] = {\
 	OPP_DCE_120_REG_LIST(id),\
@@ -356,6 +364,37 @@ static const struct dce_audio_mask audio_mask = {
 		DCE120_AUD_COMMON_MASK_SH_LIST(_MASK)
 };
 
+static int map_transmitter_id_to_phy_instance(
+	enum transmitter transmitter)
+{
+	switch (transmitter) {
+	case TRANSMITTER_UNIPHY_A:
+		return 0;
+	break;
+	case TRANSMITTER_UNIPHY_B:
+		return 1;
+	break;
+	case TRANSMITTER_UNIPHY_C:
+		return 2;
+	break;
+	case TRANSMITTER_UNIPHY_D:
+		return 3;
+	break;
+	case TRANSMITTER_UNIPHY_E:
+		return 4;
+	break;
+	case TRANSMITTER_UNIPHY_F:
+		return 5;
+	break;
+	case TRANSMITTER_UNIPHY_G:
+		return 6;
+	break;
+	default:
+		ASSERT(0);
+		return 0;
+	}
+}
+
 #define clk_src_regs(index, id)\
 [index] = {\
 	CS_COMMON_REG_LIST_DCE_112(id),\
@@ -404,7 +443,10 @@ struct dce_aux *dce120_aux_engine_create(
 
 	dce110_aux_engine_construct(aux_engine, ctx, inst,
 				    SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
-				    &aux_engine_regs[inst]);
+				    &aux_engine_regs[inst],
+					&aux_mask,
+					&aux_shift,
+					ctx->dc->caps.extended_aux_timeout_support);
 
 	return &aux_engine->base;
 }
@@ -545,7 +587,7 @@ static void dce120_transform_destroy(struct transform **xfm)
 	*xfm = NULL;
 }
 
-static void destruct(struct dce110_resource_pool *pool)
+static void dce120_resource_destruct(struct dce110_resource_pool *pool)
 {
 	unsigned int i;
 
@@ -655,14 +697,18 @@ static struct link_encoder *dce120_link_encoder_create(
 {
 	struct dce110_link_encoder *enc110 =
 		kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
+	int link_regs_id;
 
 	if (!enc110)
 		return NULL;
 
+	link_regs_id =
+		map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
+
 	dce110_link_encoder_construct(enc110,
 				      enc_init_data,
 				      &link_enc_feature,
-				      &link_enc_regs[enc_init_data->transmitter],
+				      &link_enc_regs[link_regs_id],
 				      &link_enc_aux_regs[enc_init_data->channel - 1],
 				      &link_enc_hpd_regs[enc_init_data->hpd_source]);
 
@@ -826,7 +872,7 @@ static void dce120_destroy_resource_pool(struct resource_pool **pool)
 {
 	struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
 
-	destruct(dce110_pool);
+	dce120_resource_destruct(dce110_pool);
 	kfree(dce110_pool);
 	*pool = NULL;
 }
@@ -978,7 +1024,7 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
 	return value;
 }
 
-static bool construct(
+static bool dce120_resource_construct(
 	uint8_t num_virtual_links,
 	struct dc *dc,
 	struct dce110_resource_pool *pool)
@@ -1006,7 +1052,7 @@ static bool construct(
 	dc->caps.max_cursor_size = 128;
 	dc->caps.dual_link_dvi = true;
 	dc->caps.psp_setup_panel_mode = true;
-
+	dc->caps.extended_aux_timeout_support = false;
 	dc->debug = debug_defaults;
 
 	/*************************************************
@@ -1191,7 +1237,7 @@ controller_create_fail:
 clk_src_create_fail:
 res_create_fail:
 
-	destruct(pool);
+	dce120_resource_destruct(pool);
 
 	return false;
 }
@@ -1206,7 +1252,7 @@ struct resource_pool *dce120_create_resource_pool(
 	if (!pool)
 		return NULL;
 
-	if (construct(num_virtual_links, dc, pool))
+	if (dce120_resource_construct(num_virtual_links, dc, pool))
 		return &pool->base;
 
 	kfree(pool);
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
index 098e56962f2a..82bc4e192bbf 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
@@ -819,13 +819,18 @@ void dce120_tg_set_colors(struct timing_generator *tg,
 
 static void dce120_timing_generator_set_static_screen_control(
 	struct timing_generator *tg,
-	uint32_t value)
+	uint32_t event_triggers,
+	uint32_t num_frames)
 {
 	struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
 
+	// By register spec, it only takes 8 bit value
+	if (num_frames > 0xFF)
+		num_frames = 0xFF;
+
 	CRTC_REG_UPDATE_2(CRTC0_CRTC_STATIC_SCREEN_CONTROL,
-			CRTC_STATIC_SCREEN_EVENT_MASK, value,
-			CRTC_STATIC_SCREEN_FRAME_COUNT, 2);
+			CRTC_STATIC_SCREEN_EVENT_MASK, event_triggers,
+			CRTC_STATIC_SCREEN_FRAME_COUNT, num_frames);
 }
 
 void dce120_timing_generator_set_test_pattern(
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c
index c4543178ba20..893261c81854 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c
@@ -74,7 +74,7 @@ void dce80_hw_sequencer_construct(struct dc *dc)
 {
 	dce110_hw_sequencer_construct(dc);
 
-	dc->hwss.enable_display_power_gating = dce100_enable_display_power_gating;
+	dc->hwseq->funcs.enable_display_power_gating = dce100_enable_display_power_gating;
 	dc->hwss.pipe_control_lock = dce_pipe_control_lock;
 	dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth;
 	dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth;
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.h
index 7a1b31def66f..e43af832d00c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.h
@@ -27,6 +27,7 @@
 #define __DC_HWSS_DCE80_H__
 
 #include "core_types.h"
+#include "hw_sequencer_private.h"
 
 struct dc;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index 643ccb0ade00..2ad5c28c6e66 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -288,6 +288,14 @@ static const struct dce_opp_mask opp_mask = {
 	OPP_COMMON_MASK_SH_LIST_DCE_80(_MASK)
 };
 
+static const struct dce110_aux_registers_shift aux_shift = {
+	DCE10_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+	DCE10_AUX_MASK_SH_LIST(_MASK)
+};
+
 #define aux_engine_regs(id)\
 [id] = {\
 	AUX_COMMON_REG_LIST(id), \
@@ -431,6 +439,37 @@ static const struct dce_abm_mask abm_mask = {
 #define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8
 #endif
 
+static int map_transmitter_id_to_phy_instance(
+	enum transmitter transmitter)
+{
+	switch (transmitter) {
+	case TRANSMITTER_UNIPHY_A:
+		return 0;
+	break;
+	case TRANSMITTER_UNIPHY_B:
+		return 1;
+	break;
+	case TRANSMITTER_UNIPHY_C:
+		return 2;
+	break;
+	case TRANSMITTER_UNIPHY_D:
+		return 3;
+	break;
+	case TRANSMITTER_UNIPHY_E:
+		return 4;
+	break;
+	case TRANSMITTER_UNIPHY_F:
+		return 5;
+	break;
+	case TRANSMITTER_UNIPHY_G:
+		return 6;
+	break;
+	default:
+		ASSERT(0);
+		return 0;
+	}
+}
+
 static void read_dce_straps(
 	struct dc_context *ctx,
 	struct resource_straps *straps)
@@ -491,7 +530,10 @@ struct dce_aux *dce80_aux_engine_create(
 
 	dce110_aux_engine_construct(aux_engine, ctx, inst,
 				    SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
-				    &aux_engine_regs[inst]);
+				    &aux_engine_regs[inst],
+					&aux_mask,
+					&aux_shift,
+					ctx->dc->caps.extended_aux_timeout_support);
 
 	return &aux_engine->base;
 }
@@ -669,14 +711,18 @@ struct link_encoder *dce80_link_encoder_create(
 {
 	struct dce110_link_encoder *enc110 =
 		kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
+	int link_regs_id;
 
 	if (!enc110)
 		return NULL;
 
+	link_regs_id =
+		map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
+
 	dce110_link_encoder_construct(enc110,
 				      enc_init_data,
 				      &link_enc_feature,
-				      &link_enc_regs[enc_init_data->transmitter],
+				      &link_enc_regs[link_regs_id],
 				      &link_enc_aux_regs[enc_init_data->channel - 1],
 				      &link_enc_hpd_regs[enc_init_data->hpd_source]);
 	return &enc110->base;
@@ -727,7 +773,7 @@ static struct input_pixel_processor *dce80_ipp_create(
 	return &ipp->base;
 }
 
-static void destruct(struct dce110_resource_pool *pool)
+static void dce80_resource_destruct(struct dce110_resource_pool *pool)
 {
 	unsigned int i;
 
@@ -855,7 +901,7 @@ static void dce80_destroy_resource_pool(struct resource_pool **pool)
 {
 	struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
 
-	destruct(dce110_pool);
+	dce80_resource_destruct(dce110_pool);
 	kfree(dce110_pool);
 	*pool = NULL;
 }
@@ -895,6 +941,7 @@ static bool dce80_construct(
 	dc->caps.i2c_speed_in_khz = 40;
 	dc->caps.max_cursor_size = 128;
 	dc->caps.dual_link_dvi = true;
+	dc->caps.extended_aux_timeout_support = false;
 
 	/*************************************************
 	 *  Create resources                             *
@@ -1046,7 +1093,7 @@ static bool dce80_construct(
 	return true;
 
 res_create_fail:
-	destruct(pool);
+	dce80_resource_destruct(pool);
 	return false;
 }
 
@@ -1243,7 +1290,7 @@ static bool dce81_construct(
 	return true;
 
 res_create_fail:
-	destruct(pool);
+	dce80_resource_destruct(pool);
 	return false;
 }
 
@@ -1436,7 +1483,7 @@ static bool dce83_construct(
 	return true;
 
 res_create_fail:
-	destruct(pool);
+	dce80_resource_destruct(pool);
 	return false;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 032f872be89c..62ad1a11bff9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -22,7 +22,8 @@
 #
 # Makefile for DCN.
 
-DCN10 = dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o dcn10_hw_sequencer_debug.o \
+DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \
+		dcn10_hw_sequencer_debug.o \
 		dcn10_dpp.o dcn10_opp.o dcn10_optc.o \
 		dcn10_hubp.o dcn10_mpc.o \
 		dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index d8b2da18db39..0e682b5aa3eb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -129,7 +129,7 @@ void dpp_set_gamut_remap_bypass(struct dcn10_dpp *dpp)
 
 #define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19))
 
-static bool dpp_get_optimal_number_of_taps(
+bool dpp1_get_optimal_number_of_taps(
 		struct dpp *dpp,
 		struct scaler_data *scl_data,
 		const struct scaling_taps *in_taps)
@@ -290,12 +290,8 @@ void dpp1_cnv_setup (
 		enum surface_pixel_format format,
 		enum expansion_mode mode,
 		struct dc_csc_transform input_csc_color_matrix,
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 		enum dc_color_space input_color_space,
 		struct cnv_alpha_2bit_lut *alpha_2bit_lut)
-#else
-		enum dc_color_space input_color_space)
-#endif
 {
 	uint32_t pixel_format;
 	uint32_t alpha_en;
@@ -521,7 +517,7 @@ static const struct dpp_funcs dcn10_dpp_funcs = {
 		.dpp_read_state = dpp_read_state,
 		.dpp_reset = dpp_reset,
 		.dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale,
-		.dpp_get_optimal_number_of_taps = dpp_get_optimal_number_of_taps,
+		.dpp_get_optimal_number_of_taps = dpp1_get_optimal_number_of_taps,
 		.dpp_set_gamut_remap = dpp1_cm_set_gamut_remap,
 		.dpp_set_csc_adjustment = dpp1_cm_set_output_csc_adjustment,
 		.dpp_set_csc_default = dpp1_cm_set_output_csc_default,
@@ -542,11 +538,9 @@ static const struct dpp_funcs dcn10_dpp_funcs = {
 		.set_optional_cursor_attributes = dpp1_cnv_set_optional_cursor_attributes,
 		.dpp_dppclk_control = dpp1_dppclk_control,
 		.dpp_set_hdr_multiplier = dpp1_set_hdr_multiplier,
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 		.dpp_program_blnd_lut = NULL,
 		.dpp_program_shaper_lut = NULL,
 		.dpp_program_3dlut = NULL
-#endif
 };
 
 static struct dpp_caps dcn10_dpp_cap = {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
index e2c613611ac9..2edf566b3a72 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
@@ -1486,12 +1486,8 @@ void dpp1_cnv_setup (
 		enum surface_pixel_format format,
 		enum expansion_mode mode,
 		struct dc_csc_transform input_csc_color_matrix,
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 		enum dc_color_space input_color_space,
 		struct cnv_alpha_2bit_lut *alpha_2bit_lut);
-#else
-		enum dc_color_space input_color_space);
-#endif
 
 void dpp1_full_bypass(struct dpp *dpp_base);
 
@@ -1504,6 +1500,11 @@ void dpp1_set_hdr_multiplier(
 		struct dpp *dpp_base,
 		uint32_t multiplier);
 
+bool dpp1_get_optimal_number_of_taps(
+		struct dpp *dpp,
+		struct scaler_data *scl_data,
+		const struct scaling_taps *in_taps);
+
 void dpp1_construct(struct dcn10_dpp *dpp1,
 	struct dc_context *ctx,
 	uint32_t inst,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
index aa0c7a7d13a0..4d3f7d5e1473 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
@@ -88,26 +88,6 @@ enum dscl_mode_sel {
 	DSCL_MODE_DSCL_BYPASS = 6
 };
 
-static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = {
-	{COLOR_SPACE_SRGB,
-		{0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} },
-	{COLOR_SPACE_SRGB_LIMITED,
-		{0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} },
-	{COLOR_SPACE_YCBCR601,
-		{0x2cdd, 0x2000, 0, 0xe991, 0xe926, 0x2000, 0xf4fd, 0x10ef,
-						0, 0x2000, 0x38b4, 0xe3a6} },
-	{COLOR_SPACE_YCBCR601_LIMITED,
-		{0x3353, 0x2568, 0, 0xe400, 0xe5dc, 0x2568, 0xf367, 0x1108,
-						0, 0x2568, 0x40de, 0xdd3a} },
-	{COLOR_SPACE_YCBCR709,
-		{0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0,
-						0x2000, 0x3b61, 0xe24f} },
-
-	{COLOR_SPACE_YCBCR709_LIMITED,
-		{0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0,
-						0x2568, 0x43ee, 0xdbb2} }
-};
-
 static void program_gamut_remap(
 		struct dcn10_dpp *dpp,
 		const uint16_t *regval,
@@ -352,6 +332,8 @@ void dpp1_cm_program_regamma_lut(struct dpp *dpp_base,
 	uint32_t i;
 	struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
 
+	REG_SEQ_START();
+
 	for (i = 0 ; i < num; i++) {
 		REG_SET(CM_RGAM_LUT_DATA, 0, CM_RGAM_LUT_DATA, rgb[i].red_reg);
 		REG_SET(CM_RGAM_LUT_DATA, 0, CM_RGAM_LUT_DATA, rgb[i].green_reg);
@@ -626,10 +608,16 @@ void dpp1_set_degamma(
 	case IPP_DEGAMMA_MODE_HW_xvYCC:
 		REG_UPDATE(CM_DGAM_CONTROL, CM_DGAM_LUT_MODE, 2);
 			break;
+	case IPP_DEGAMMA_MODE_USER_PWL:
+		REG_UPDATE(CM_DGAM_CONTROL, CM_DGAM_LUT_MODE, 3);
+		break;
 	default:
 		BREAK_TO_DEBUGGER();
 		break;
 	}
+
+	REG_SEQ_SUBMIT();
+	REG_SEQ_WAIT_DONE();
 }
 
 void dpp1_degamma_ram_select(
@@ -731,10 +719,8 @@ void dpp1_full_bypass(struct dpp *dpp_base)
 	/* COLOR_KEYER_CONTROL.COLOR_KEYER_EN = 0 this should be default */
 	if (dpp->tf_mask->CM_BYPASS_EN)
 		REG_SET(CM_CONTROL, 0, CM_BYPASS_EN, 1);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	else
 		REG_SET(CM_CONTROL, 0, CM_BYPASS, 1);
-#endif
 
 	/* Setting degamma bypass for now */
 	REG_SET(CM_DGAM_CONTROL, 0, CM_DGAM_LUT_MODE, 0);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
index d67e0abeee93..fce37c527a0b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
@@ -218,14 +218,12 @@ static void dpp1_dscl_set_lb(
 			INTERLEAVE_EN, lb_params->interleave_en, /* Interleave source enable */
 			LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */
 	}
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	else {
 		/* DSCL caps: pixel data processed in float format */
 		REG_SET_2(LB_DATA_FORMAT, 0,
 			INTERLEAVE_EN, lb_params->interleave_en, /* Interleave source enable */
 			LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */
 	}
-#endif
 
 	REG_SET_2(LB_MEMORY_CTRL, 0,
 		MEMORY_CONFIG, mem_size_config,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.c
index 374cc9acda3b..b6391a5ead78 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.c
@@ -23,7 +23,7 @@
  *
  */
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 
 #include "reg_helper.h"
 #include "resource.h"
@@ -109,9 +109,7 @@ const struct dwbc_funcs dcn10_dwbc_funcs = {
 	.update				= NULL,
 	.set_stereo			= NULL,
 	.set_new_content		= NULL,
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	.set_warmup			= NULL,
-#endif
 	.dwb_set_scaler			= NULL,
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.h
index c175edd0bae7..d56ea7c8171e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.h
@@ -24,7 +24,7 @@
 #ifndef __DC_DWBC_DCN10_H__
 #define __DC_DWBC_DCN10_H__
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 
 /* DCN */
 #define BASE_INNER(seg) \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index a02c10e23e0d..f36a0d8cedfe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -930,6 +930,9 @@ static bool hubbub1_get_dcc_compression_cap(struct hubbub *hubbub,
 		output->grph.rgb.max_compressed_blk_size = 64;
 		output->grph.rgb.independent_64b_blks = true;
 		break;
+	default:
+		ASSERT(false);
+		break;
 	}
 
 	output->capable = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
index 69d903d68661..af57751253de 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
@@ -121,7 +121,6 @@ struct dcn_hubbub_registers {
 	uint32_t DCN_VM_AGP_BASE;
 	uint32_t DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_MSB;
 	uint32_t DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_LSB;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	uint32_t DCHUBBUB_ARB_FRAC_URG_BW_NOM_A;
 	uint32_t DCHUBBUB_ARB_FRAC_URG_BW_NOM_B;
 	uint32_t DCHUBBUB_ARB_FRAC_URG_BW_NOM_C;
@@ -140,7 +139,6 @@ struct dcn_hubbub_registers {
 	uint32_t DCHVM_CLK_CTRL;
 	uint32_t DCHVM_RIOMMU_CTRL0;
 	uint32_t DCHVM_RIOMMU_STAT0;
-#endif
 };
 
 /* set field name */
@@ -232,7 +230,6 @@ struct dcn_hubbub_registers {
 		type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C;\
 		type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #define HUBBUB_HVM_REG_FIELD_LIST(type) \
 		type DCHUBBUB_ARB_MIN_REQ_OUTSTAND_COMMIT_THRESHOLD;\
 		type DCHUBBUB_ARB_VM_ROW_URGENCY_WATERMARK_A;\
@@ -278,22 +275,17 @@ struct dcn_hubbub_registers {
 		type HOSTVM_POWERSTATUS; \
 		type RIOMMU_ACTIVE; \
 		type HOSTVM_PREFETCH_DONE
-#endif
 
 struct dcn_hubbub_shift {
 	DCN_HUBBUB_REG_FIELD_LIST(uint8_t);
 	HUBBUB_STUTTER_REG_FIELD_LIST(uint8_t);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	HUBBUB_HVM_REG_FIELD_LIST(uint8_t);
-#endif
 };
 
 struct dcn_hubbub_mask {
 	DCN_HUBBUB_REG_FIELD_LIST(uint32_t);
 	HUBBUB_STUTTER_REG_FIELD_LIST(uint32_t);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	HUBBUB_HVM_REG_FIELD_LIST(uint32_t);
-#endif
 };
 
 struct dc;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index 001db49e4bb2..31b64733d693 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -306,7 +306,6 @@ void hubp1_program_pixel_format(
 		REG_UPDATE(DCSURF_SURFACE_CONFIG,
 				SURFACE_PIXEL_FORMAT, 12);
 		break;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX:
 		REG_UPDATE(DCSURF_SURFACE_CONFIG,
 				SURFACE_PIXEL_FORMAT, 112);
@@ -327,7 +326,6 @@ void hubp1_program_pixel_format(
 		REG_UPDATE(DCSURF_SURFACE_CONFIG,
 				SURFACE_PIXEL_FORMAT, 119);
 		break;
-#endif
 	default:
 		BREAK_TO_DEBUGGER();
 		break;
@@ -841,6 +839,14 @@ void min_set_viewport(
 	REG_SET_2(DCSURF_PRI_VIEWPORT_START_C, 0,
 		  PRI_VIEWPORT_X_START_C, viewport_c->x,
 		  PRI_VIEWPORT_Y_START_C, viewport_c->y);
+
+	REG_SET_2(DCSURF_SEC_VIEWPORT_DIMENSION_C, 0,
+		  SEC_VIEWPORT_WIDTH_C, viewport_c->width,
+		  SEC_VIEWPORT_HEIGHT_C, viewport_c->height);
+
+	REG_SET_2(DCSURF_SEC_VIEWPORT_START_C, 0,
+		  SEC_VIEWPORT_X_START_C, viewport_c->x,
+		  SEC_VIEWPORT_Y_START_C, viewport_c->y);
 }
 
 void hubp1_read_state_common(struct hubp *hubp)
@@ -1006,6 +1012,9 @@ void hubp1_read_state_common(struct hubp *hubp)
 			HUBP_TTU_DISABLE, &s->ttu_disable,
 			HUBP_UNDERFLOW_STATUS, &s->underflow_status);
 
+	REG_GET(HUBP_CLK_CNTL,
+			HUBP_CLOCK_ENABLE, &s->clock_en);
+
 	REG_GET(DCN_GLOBAL_TTU_CNTL,
 			MIN_TTU_VBLANK, &s->min_ttu_vblank);
 
@@ -1240,10 +1249,8 @@ static const struct hubp_funcs dcn10_hubp_funcs = {
 	.hubp_get_underflow_status = hubp1_get_underflow_status,
 	.hubp_init = hubp1_init,
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	.dmdata_set_attributes = NULL,
 	.dmdata_load = NULL,
-#endif
 };
 
 /*****************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
index cb20d10288c0..780af5b3c16f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
@@ -47,6 +47,8 @@
 	SRI(DCSURF_SEC_VIEWPORT_START, HUBP, id), \
 	SRI(DCSURF_PRI_VIEWPORT_DIMENSION_C, HUBP, id), \
 	SRI(DCSURF_PRI_VIEWPORT_START_C, HUBP, id), \
+	SRI(DCSURF_SEC_VIEWPORT_DIMENSION_C, HUBP, id), \
+	SRI(DCSURF_SEC_VIEWPORT_START_C, HUBP, id), \
 	SRI(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id),\
 	SRI(DCSURF_PRIMARY_SURFACE_ADDRESS, HUBPREQ, id),\
 	SRI(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id),\
@@ -57,8 +59,12 @@
 	SRI(DCSURF_SECONDARY_META_SURFACE_ADDRESS, HUBPREQ, id),\
 	SRI(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id),\
 	SRI(DCSURF_PRIMARY_SURFACE_ADDRESS_C, HUBPREQ, id),\
+	SRI(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id),\
+	SRI(DCSURF_SECONDARY_SURFACE_ADDRESS_C, HUBPREQ, id),\
 	SRI(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id),\
 	SRI(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, HUBPREQ, id),\
+	SRI(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id),\
+	SRI(DCSURF_SECONDARY_META_SURFACE_ADDRESS_C, HUBPREQ, id),\
 	SRI(DCSURF_SURFACE_INUSE, HUBPREQ, id),\
 	SRI(DCSURF_SURFACE_INUSE_HIGH, HUBPREQ, id),\
 	SRI(DCSURF_SURFACE_INUSE_C, HUBPREQ, id),\
@@ -150,6 +156,8 @@
 	uint32_t DCSURF_SEC_VIEWPORT_START; \
 	uint32_t DCSURF_PRI_VIEWPORT_DIMENSION_C; \
 	uint32_t DCSURF_PRI_VIEWPORT_START_C; \
+	uint32_t DCSURF_SEC_VIEWPORT_DIMENSION_C; \
+	uint32_t DCSURF_SEC_VIEWPORT_START_C; \
 	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; \
 	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; \
 	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH; \
@@ -160,8 +168,12 @@
 	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS; \
 	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; \
 	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; \
+	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C; \
+	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_C; \
 	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C; \
 	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C; \
+	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH_C; \
+	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_C; \
 	uint32_t DCSURF_SURFACE_INUSE; \
 	uint32_t DCSURF_SURFACE_INUSE_HIGH; \
 	uint32_t DCSURF_SURFACE_INUSE_C; \
@@ -279,6 +291,10 @@
 	HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_C, PRI_VIEWPORT_HEIGHT_C, mask_sh),\
 	HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START_C, PRI_VIEWPORT_X_START_C, mask_sh),\
 	HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START_C, PRI_VIEWPORT_Y_START_C, mask_sh),\
+	HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION_C, SEC_VIEWPORT_WIDTH_C, mask_sh),\
+	HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION_C, SEC_VIEWPORT_HEIGHT_C, mask_sh),\
+	HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START_C, SEC_VIEWPORT_X_START_C, mask_sh),\
+	HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START_C, SEC_VIEWPORT_Y_START_C, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, PRIMARY_SURFACE_ADDRESS_HIGH, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS, PRIMARY_SURFACE_ADDRESS, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, SECONDARY_SURFACE_ADDRESS_HIGH, mask_sh),\
@@ -289,8 +305,12 @@
 	HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_META_SURFACE_ADDRESS, SECONDARY_META_SURFACE_ADDRESS, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, PRIMARY_SURFACE_ADDRESS_HIGH_C, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_C, PRIMARY_SURFACE_ADDRESS_C, mask_sh),\
+	HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, SECONDARY_SURFACE_ADDRESS_HIGH_C, mask_sh),\
+	HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_C, SECONDARY_SURFACE_ADDRESS_C, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, PRIMARY_META_SURFACE_ADDRESS_HIGH_C, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, PRIMARY_META_SURFACE_ADDRESS_C, mask_sh),\
+	HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH_C, SECONDARY_META_SURFACE_ADDRESS_HIGH_C, mask_sh),\
+	HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_META_SURFACE_ADDRESS_C, SECONDARY_META_SURFACE_ADDRESS_C, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE, SURFACE_INUSE_ADDRESS, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE_HIGH, SURFACE_INUSE_ADDRESS_HIGH, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE_C, SURFACE_INUSE_ADDRESS_C, mask_sh),\
@@ -469,6 +489,10 @@
 	type PRI_VIEWPORT_HEIGHT_C; \
 	type PRI_VIEWPORT_X_START_C; \
 	type PRI_VIEWPORT_Y_START_C; \
+	type SEC_VIEWPORT_WIDTH_C; \
+	type SEC_VIEWPORT_HEIGHT_C; \
+	type SEC_VIEWPORT_X_START_C; \
+	type SEC_VIEWPORT_Y_START_C; \
 	type PRIMARY_SURFACE_ADDRESS_HIGH;\
 	type PRIMARY_SURFACE_ADDRESS;\
 	type SECONDARY_SURFACE_ADDRESS_HIGH;\
@@ -479,8 +503,12 @@
 	type SECONDARY_META_SURFACE_ADDRESS;\
 	type PRIMARY_SURFACE_ADDRESS_HIGH_C;\
 	type PRIMARY_SURFACE_ADDRESS_C;\
+	type SECONDARY_SURFACE_ADDRESS_HIGH_C;\
+	type SECONDARY_SURFACE_ADDRESS_C;\
 	type PRIMARY_META_SURFACE_ADDRESS_HIGH_C;\
 	type PRIMARY_META_SURFACE_ADDRESS_C;\
+	type SECONDARY_META_SURFACE_ADDRESS_HIGH_C;\
+	type SECONDARY_META_SURFACE_ADDRESS_C;\
 	type SURFACE_INUSE_ADDRESS;\
 	type SURFACE_INUSE_ADDRESS_HIGH;\
 	type SURFACE_INUSE_ADDRESS_C;\
@@ -642,6 +670,7 @@ struct dcn_hubp_state {
 	uint32_t sw_mode;
 	uint32_t dcc_en;
 	uint32_t blank_en;
+	uint32_t clock_en;
 	uint32_t underflow_status;
 	uint32_t ttu_disable;
 	uint32_t min_ttu_vblank;
@@ -700,13 +729,11 @@ void hubp1_dcc_control(struct hubp *hubp,
 		bool enable,
 		enum hubp_ind_block_size independent_64b_blks);
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 bool hubp1_program_surface_flip_and_addr(
 	struct hubp *hubp,
 	const struct dc_plane_address *address,
 	bool flip_immediate);
 
-#endif
 bool hubp1_is_flip_pending(struct hubp *hubp);
 
 void hubp1_cursor_set_attributes(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 60123db7ba02..f2127afb37b2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -25,17 +25,18 @@
 
 #include <linux/delay.h>
 #include "dm_services.h"
+#include "basics/dc_common.h"
 #include "core_types.h"
 #include "resource.h"
 #include "custom_float.h"
 #include "dcn10_hw_sequencer.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dcn10_hw_sequencer_debug.h"
 #include "dce/dce_hwseq.h"
 #include "abm.h"
 #include "dmcu.h"
 #include "dcn10_optc.h"
-#include "dcn10/dcn10_dpp.h"
-#include "dcn10/dcn10_mpc.h"
+#include "dcn10_dpp.h"
+#include "dcn10_mpc.h"
 #include "timing_generator.h"
 #include "opp.h"
 #include "ipp.h"
@@ -49,9 +50,7 @@
 #include "clk_mgr.h"
 
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #include "dsc.h"
-#endif
 
 #define DC_LOGGER_INIT(logger)
 
@@ -68,6 +67,8 @@
 #define DTN_INFO_MICRO_SEC(ref_cycle) \
 	print_microsec(dc_ctx, log_ctx, ref_cycle)
 
+#define GAMMA_HW_POINTS_NUM 256
+
 void print_microsec(struct dc_context *dc_ctx,
 	struct dc_log_buffer_ctx *log_ctx,
 	uint32_t ref_cycle)
@@ -81,6 +82,33 @@ void print_microsec(struct dc_context *dc_ctx,
 			us_x10 % frac);
 }
 
+static void dcn10_lock_all_pipes(struct dc *dc,
+	struct dc_state *context,
+	bool lock)
+{
+	struct pipe_ctx *pipe_ctx;
+	struct timing_generator *tg;
+	int i;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		pipe_ctx = &context->res_ctx.pipe_ctx[i];
+		tg = pipe_ctx->stream_res.tg;
+		/*
+		 * Only lock the top pipe's tg to prevent redundant
+		 * (un)locking. Also skip if pipe is disabled.
+		 */
+		if (pipe_ctx->top_pipe ||
+		    !pipe_ctx->stream || !pipe_ctx->plane_state ||
+		    !tg->funcs->is_tg_enabled(tg))
+			continue;
+
+		if (lock)
+			tg->funcs->lock(tg);
+		else
+			tg->funcs->unlock(tg);
+	}
+}
+
 static void log_mpc_crc(struct dc *dc,
 	struct dc_log_buffer_ctx *log_ctx)
 {
@@ -129,9 +157,8 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx)
 	struct resource_pool *pool = dc->res_pool;
 	int i;
 
-	DTN_INFO("HUBP:  format  addr_hi  width  height"
-			"  rot  mir  sw_mode  dcc_en  blank_en  ttu_dis  underflow"
-			"   min_ttu_vblank       qos_low_wm      qos_high_wm\n");
+	DTN_INFO(
+		"HUBP:  format  addr_hi  width  height  rot  mir  sw_mode  dcc_en  blank_en  clock_en  ttu_dis  underflow   min_ttu_vblank       qos_low_wm      qos_high_wm\n");
 	for (i = 0; i < pool->pipe_count; i++) {
 		struct hubp *hubp = pool->hubps[i];
 		struct dcn_hubp_state *s = &(TO_DCN10_HUBP(hubp)->state);
@@ -139,8 +166,7 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx)
 		hubp->funcs->hubp_read_state(hubp);
 
 		if (!s->blank_en) {
-			DTN_INFO("[%2d]:  %5xh  %6xh  %5d  %6d  %2xh  %2xh  %6xh"
-					"  %6d  %8d  %7d  %8xh",
+			DTN_INFO("[%2d]:  %5xh  %6xh  %5d  %6d  %2xh  %2xh  %6xh  %6d  %8d  %8d  %7d  %8xh",
 					hubp->inst,
 					s->pixel_format,
 					s->inuse_addr_hi,
@@ -151,6 +177,7 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx)
 					s->sw_mode,
 					s->dcc_en,
 					s->blank_en,
+					s->clock_en,
 					s->ttu_disable,
 					s->underflow_status);
 			DTN_INFO_MICRO_SEC(s->min_ttu_vblank);
@@ -308,21 +335,31 @@ void dcn10_log_hw_state(struct dc *dc,
 	}
 	DTN_INFO("\n");
 
-	DTN_INFO("OTG:  v_bs  v_be  v_ss  v_se  vpol  vmax  vmin  vmax_sel  vmin_sel"
-			"  h_bs  h_be  h_ss  h_se  hpol  htot  vtot  underflow\n");
+	DTN_INFO("OTG:  v_bs  v_be  v_ss  v_se  vpol  vmax  vmin  vmax_sel  vmin_sel  h_bs  h_be  h_ss  h_se  hpol  htot  vtot  underflow blank_en\n");
 
 	for (i = 0; i < pool->timing_generator_count; i++) {
 		struct timing_generator *tg = pool->timing_generators[i];
 		struct dcn_otg_state s = {0};
-
+		/* Read shared OTG state registers for all DCNx */
 		optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s);
 
+		/*
+		 * For DCN2 and greater, a register on the OPP is used to
+		 * determine if the CRTC is blanked instead of the OTG. So use
+		 * dpg_is_blanked() if exists, otherwise fallback on otg.
+		 *
+		 * TODO: Implement DCN-specific read_otg_state hooks.
+		 */
+		if (pool->opps[i]->funcs->dpg_is_blanked)
+			s.blank_enabled = pool->opps[i]->funcs->dpg_is_blanked(pool->opps[i]);
+		else
+			s.blank_enabled = tg->funcs->is_blanked(tg);
+
 		//only print if OTG master is enabled
 		if ((s.otg_enabled & 1) == 0)
 			continue;
 
-		DTN_INFO("[%d]: %5d %5d %5d %5d %5d %5d %5d %9d %9d %5d %5d %5d"
-				" %5d %5d %5d %5d  %9d\n",
+		DTN_INFO("[%d]: %5d %5d %5d %5d %5d %5d %5d %9d %9d %5d %5d %5d %5d %5d %5d %5d  %9d %8d\n",
 				tg->inst,
 				s.v_blank_start,
 				s.v_blank_end,
@@ -340,7 +377,8 @@ void dcn10_log_hw_state(struct dc *dc,
 				s.h_sync_a_pol,
 				s.h_total,
 				s.v_total,
-				s.underflow_occurred_status);
+				s.underflow_occurred_status,
+				s.blank_enabled);
 
 		// Clear underflow for debug purposes
 		// We want to keep underflow sticky bit on for the longevity tests outside of test environment.
@@ -350,7 +388,6 @@ void dcn10_log_hw_state(struct dc *dc,
 	}
 	DTN_INFO("\n");
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	DTN_INFO("DSC: CLOCK_EN  SLICE_WIDTH  Bytes_pp\n");
 	for (i = 0; i < pool->res_cap->num_dsc; i++) {
 		struct display_stream_compressor *dsc = pool->dscs[i];
@@ -387,7 +424,7 @@ void dcn10_log_hw_state(struct dc *dc,
 	}
 	DTN_INFO("\n");
 
-	DTN_INFO("L_ENC: DPHY_FEC_EN  DPHY_FEC_READY_SHADOW  DPHY_FEC_ACTIVE_STATUS\n");
+	DTN_INFO("L_ENC: DPHY_FEC_EN  DPHY_FEC_READY_SHADOW  DPHY_FEC_ACTIVE_STATUS  DP_LINK_TRAINING_COMPLETE\n");
 	for (i = 0; i < dc->link_count; i++) {
 		struct link_encoder *lenc = dc->links[i]->link_enc;
 
@@ -395,16 +432,16 @@ void dcn10_log_hw_state(struct dc *dc,
 
 		if (lenc->funcs->read_state) {
 			lenc->funcs->read_state(lenc, &s);
-			DTN_INFO("[%-3d]: %-12d %-22d %-22d\n",
+			DTN_INFO("[%-3d]: %-12d %-22d %-22d %-25d\n",
 				i,
 				s.dphy_fec_en,
 				s.dphy_fec_ready_shadow,
-				s.dphy_fec_active_status);
+				s.dphy_fec_active_status,
+				s.dp_link_training_complete);
 			DTN_INFO("\n");
 		}
 	}
 	DTN_INFO("\n");
-#endif
 
 	DTN_INFO("\nCALCULATED Clocks: dcfclk_khz:%d  dcfclk_deep_sleep_khz:%d  dispclk_khz:%d\n"
 		"dppclk_khz:%d  max_supported_dppclk_khz:%d  fclk_khz:%d  socclk_khz:%d\n\n",
@@ -438,14 +475,14 @@ bool dcn10_did_underflow_occur(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	return false;
 }
 
-static void dcn10_enable_power_gating_plane(
+void dcn10_enable_power_gating_plane(
 	struct dce_hwseq *hws,
 	bool enable)
 {
-	bool force_on = 1; /* disable power gating */
+	bool force_on = true; /* disable power gating */
 
 	if (enable)
-		force_on = 0;
+		force_on = false;
 
 	/* DCHUBP0/1/2/3 */
 	REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN0_POWER_FORCEON, force_on);
@@ -460,7 +497,7 @@ static void dcn10_enable_power_gating_plane(
 	REG_UPDATE(DOMAIN7_PG_CONFIG, DOMAIN7_POWER_FORCEON, force_on);
 }
 
-static void dcn10_disable_vga(
+void dcn10_disable_vga(
 	struct dce_hwseq *hws)
 {
 	unsigned int in_vga1_mode = 0;
@@ -493,7 +530,7 @@ static void dcn10_disable_vga(
 	REG_UPDATE(VGA_TEST_CONTROL, VGA_TEST_RENDER_START, 1);
 }
 
-static void dcn10_dpp_pg_control(
+void dcn10_dpp_pg_control(
 		struct dce_hwseq *hws,
 		unsigned int dpp_inst,
 		bool power_on)
@@ -545,7 +582,7 @@ static void dcn10_dpp_pg_control(
 	}
 }
 
-static void dcn10_hubp_pg_control(
+void dcn10_hubp_pg_control(
 		struct dce_hwseq *hws,
 		unsigned int hubp_inst,
 		bool power_on)
@@ -605,8 +642,8 @@ static void power_on_plane(
 	if (REG(DC_IP_REQUEST_CNTL)) {
 		REG_SET(DC_IP_REQUEST_CNTL, 0,
 				IP_REQUEST_EN, 1);
-		hws->ctx->dc->hwss.dpp_pg_control(hws, plane_id, true);
-		hws->ctx->dc->hwss.hubp_pg_control(hws, plane_id, true);
+		hws->funcs.dpp_pg_control(hws, plane_id, true);
+		hws->funcs.hubp_pg_control(hws, plane_id, true);
 		REG_SET(DC_IP_REQUEST_CNTL, 0,
 				IP_REQUEST_EN, 0);
 		DC_LOG_DEBUG(
@@ -627,7 +664,7 @@ static void undo_DEGVIDCN10_253_wa(struct dc *dc)
 	REG_SET(DC_IP_REQUEST_CNTL, 0,
 			IP_REQUEST_EN, 1);
 
-	dc->hwss.hubp_pg_control(hws, 0, false);
+	hws->funcs.hubp_pg_control(hws, 0, false);
 	REG_SET(DC_IP_REQUEST_CNTL, 0,
 			IP_REQUEST_EN, 0);
 
@@ -656,7 +693,7 @@ static void apply_DEGVIDCN10_253_wa(struct dc *dc)
 	REG_SET(DC_IP_REQUEST_CNTL, 0,
 			IP_REQUEST_EN, 1);
 
-	dc->hwss.hubp_pg_control(hws, 0, true);
+	hws->funcs.hubp_pg_control(hws, 0, true);
 	REG_SET(DC_IP_REQUEST_CNTL, 0,
 			IP_REQUEST_EN, 0);
 
@@ -664,12 +701,16 @@ static void apply_DEGVIDCN10_253_wa(struct dc *dc)
 	hws->wa_state.DEGVIDCN10_253_applied = true;
 }
 
-static void dcn10_bios_golden_init(struct dc *dc)
+void dcn10_bios_golden_init(struct dc *dc)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	struct dc_bios *bp = dc->ctx->dc_bios;
 	int i;
 	bool allow_self_fresh_force_enable = true;
 
+	if (hws->funcs.s0i3_golden_init_wa && hws->funcs.s0i3_golden_init_wa(dc))
+		return;
+
 	if (dc->res_pool->hubbub->funcs->is_allow_self_refresh_enabled)
 		allow_self_fresh_force_enable =
 				dc->res_pool->hubbub->funcs->is_allow_self_refresh_enabled(dc->res_pool->hubbub);
@@ -728,7 +769,7 @@ static void false_optc_underflow_wa(
 		tg->funcs->clear_optc_underflow(tg);
 }
 
-static enum dc_status dcn10_enable_stream_timing(
+enum dc_status dcn10_enable_stream_timing(
 		struct pipe_ctx *pipe_ctx,
 		struct dc_state *context,
 		struct dc *dc)
@@ -819,6 +860,7 @@ static void dcn10_reset_back_end_for_pipe(
 		struct dc_state *context)
 {
 	int i;
+	struct dc_link *link;
 	DC_LOGGER_INIT(dc->ctx->logger);
 	if (pipe_ctx->stream_res.stream_enc == NULL) {
 		pipe_ctx->stream = NULL;
@@ -826,8 +868,14 @@ static void dcn10_reset_back_end_for_pipe(
 	}
 
 	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
-		/* DPMS may already disable */
-		if (!pipe_ctx->stream->dpms_off)
+		link = pipe_ctx->stream->link;
+		/* DPMS may already disable or */
+		/* dpms_off status is incorrect due to fastboot
+		 * feature. When system resume from S4 with second
+		 * screen only, the dpms_off would be true but
+		 * VBIOS lit up eDP, so check link status too.
+		 */
+		if (!pipe_ctx->stream->dpms_off || link->link_status.link_active)
 			core_link_disable_stream(pipe_ctx);
 		else if (pipe_ctx->stream_res.audio)
 			dc->hwss.disable_audio_stream(pipe_ctx);
@@ -974,8 +1022,9 @@ void dcn10_verify_allow_pstate_change_high(struct dc *dc)
 }
 
 /* trigger HW to start disconnect plane from stream on the next vsync */
-void hwss1_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
 	int dpp_id = pipe_ctx->plane_res.dpp->inst;
 	struct mpc *mpc = dc->res_pool->mpc;
@@ -1000,10 +1049,10 @@ void hwss1_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
 		hubp->funcs->hubp_disconnect(hubp);
 
 	if (dc->debug.sanity_checks)
-		dcn10_verify_allow_pstate_change_high(dc);
+		hws->funcs.verify_allow_pstate_change_high(dc);
 }
 
-static void dcn10_plane_atomic_power_down(struct dc *dc,
+void dcn10_plane_atomic_power_down(struct dc *dc,
 		struct dpp *dpp,
 		struct hubp *hubp)
 {
@@ -1013,8 +1062,8 @@ static void dcn10_plane_atomic_power_down(struct dc *dc,
 	if (REG(DC_IP_REQUEST_CNTL)) {
 		REG_SET(DC_IP_REQUEST_CNTL, 0,
 				IP_REQUEST_EN, 1);
-		dc->hwss.dpp_pg_control(hws, dpp->inst, false);
-		dc->hwss.hubp_pg_control(hws, hubp->inst, false);
+		hws->funcs.dpp_pg_control(hws, dpp->inst, false);
+		hws->funcs.hubp_pg_control(hws, hubp->inst, false);
 		dpp->funcs->dpp_reset(dpp);
 		REG_SET(DC_IP_REQUEST_CNTL, 0,
 				IP_REQUEST_EN, 0);
@@ -1026,8 +1075,9 @@ static void dcn10_plane_atomic_power_down(struct dc *dc,
 /* disable HW used by plane.
  * note:  cannot disable until disconnect is complete
  */
-static void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
 	struct dpp *dpp = pipe_ctx->plane_res.dpp;
 	int opp_id = hubp->opp_id;
@@ -1046,7 +1096,7 @@ static void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	hubp->power_gated = true;
 	dc->optimized_required = false; /* We're powering off, no need to optimize */
 
-	dc->hwss.plane_atomic_power_down(dc,
+	hws->funcs.plane_atomic_power_down(dc,
 			pipe_ctx->plane_res.dpp,
 			pipe_ctx->plane_res.hubp);
 
@@ -1058,14 +1108,15 @@ static void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	pipe_ctx->plane_state = NULL;
 }
 
-static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	DC_LOGGER_INIT(dc->ctx->logger);
 
 	if (!pipe_ctx->plane_res.hubp || pipe_ctx->plane_res.hubp->power_gated)
 		return;
 
-	dc->hwss.plane_atomic_disable(dc, pipe_ctx);
+	hws->funcs.plane_atomic_disable(dc, pipe_ctx);
 
 	apply_DEGVIDCN10_253_wa(dc);
 
@@ -1073,9 +1124,10 @@ static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
 					pipe_ctx->pipe_idx);
 }
 
-static void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
+void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 {
 	int i;
+	struct dce_hwseq *hws = dc->hwseq;
 	bool can_apply_seamless_boot = false;
 
 	for (i = 0; i < context->stream_count; i++) {
@@ -1100,8 +1152,8 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 		 * command table.
 		 */
 		if (tg->funcs->is_tg_enabled(tg)) {
-			if (dc->hwss.init_blank != NULL) {
-				dc->hwss.init_blank(dc, tg);
+			if (hws->funcs.init_blank != NULL) {
+				hws->funcs.init_blank(dc, tg);
 				tg->funcs->lock(tg);
 			} else {
 				tg->funcs->lock(tg);
@@ -1111,7 +1163,8 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 		}
 	}
 
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+	/* num_opp will be equal to number of mpcc */
+	for (i = 0; i < dc->res_pool->res_cap->num_opp; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
 		/* Cannot reset the MPC mux if seamless boot */
@@ -1135,8 +1188,14 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 		if (can_apply_seamless_boot &&
 			pipe_ctx->stream != NULL &&
 			pipe_ctx->stream_res.tg->funcs->is_tg_enabled(
-				pipe_ctx->stream_res.tg))
+				pipe_ctx->stream_res.tg)) {
+			// Enable double buffering for OTG_BLANK no matter if
+			// seamless boot is enabled or not to suppress global sync
+			// signals when OTG blanked. This is to prevent pipe from
+			// requesting data while in PSR.
+			tg->funcs->tg_init(tg);
 			continue;
+		}
 
 		/* Disable on the current state so the new one isn't cleared. */
 		pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -1158,7 +1217,7 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 
-		dc->hwss.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
 
 		if (tg->funcs->is_tg_enabled(tg))
 			tg->funcs->unlock(tg);
@@ -1172,7 +1231,7 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 	}
 }
 
-static void dcn10_init_hw(struct dc *dc)
+void dcn10_init_hw(struct dc *dc)
 {
 	int i;
 	struct abm *abm = dc->res_pool->abm;
@@ -1204,15 +1263,15 @@ static void dcn10_init_hw(struct dc *dc)
 		}
 
 		//Enable ability to power gate / don't force power on permanently
-		dc->hwss.enable_power_gating_plane(hws, true);
+		hws->funcs.enable_power_gating_plane(hws, true);
 
 		return;
 	}
 
 	if (!dcb->funcs->is_accelerated_mode(dcb))
-		dc->hwss.disable_vga(dc->hwseq);
+		hws->funcs.disable_vga(dc->hwseq);
 
-	dc->hwss.bios_golden_init(dc);
+	hws->funcs.bios_golden_init(dc);
 	if (dc->ctx->dc_bios->fw_info_valid) {
 		res_pool->ref_clocks.xtalin_clock_inKhz =
 				dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency;
@@ -1254,11 +1313,9 @@ static void dcn10_init_hw(struct dc *dc)
 	}
 
 	/* Power gate DSCs */
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	for (i = 0; i < res_pool->res_cap->num_dsc; i++)
-		if (dc->hwss.dsc_pg_control != NULL)
-			dc->hwss.dsc_pg_control(hws, res_pool->dscs[i]->inst, false);
-#endif
+		if (hws->funcs.dsc_pg_control != NULL)
+			hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false);
 
 	/* If taking control over from VBIOS, we may want to optimize our first
 	 * mode set, so we need to skip powering down pipes until we know which
@@ -1267,7 +1324,7 @@ static void dcn10_init_hw(struct dc *dc)
 	 * everything down.
 	 */
 	if (dcb->funcs->is_accelerated_mode(dcb) || dc->config.power_down_display_on_boot) {
-		dc->hwss.init_pipes(dc, dc->current_state);
+		hws->funcs.init_pipes(dc, dc->current_state);
 	}
 
 	for (i = 0; i < res_pool->audio_count; i++) {
@@ -1281,7 +1338,7 @@ static void dcn10_init_hw(struct dc *dc)
 		abm->funcs->abm_init(abm);
 	}
 
-	if (dmcu != NULL)
+	if (dmcu != NULL && !dmcu->auto_load_dmcu)
 		dmcu->funcs->dmcu_init(dmcu);
 
 	if (abm != NULL && dmcu != NULL)
@@ -1299,14 +1356,19 @@ static void dcn10_init_hw(struct dc *dc)
 		REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
 	}
 
-	dc->hwss.enable_power_gating_plane(dc->hwseq, true);
+	hws->funcs.enable_power_gating_plane(dc->hwseq, true);
+
+	if (dc->clk_mgr->funcs->notify_wm_ranges)
+		dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
+
 }
 
-static void dcn10_reset_hw_ctx_wrap(
+void dcn10_reset_hw_ctx_wrap(
 		struct dc *dc,
 		struct dc_state *context)
 {
 	int i;
+	struct dce_hwseq *hws = dc->hwseq;
 
 	/* Reset Back End*/
 	for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) {
@@ -1325,8 +1387,8 @@ static void dcn10_reset_hw_ctx_wrap(
 			struct clock_source *old_clk = pipe_ctx_old->clock_source;
 
 			dcn10_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
-			if (dc->hwss.enable_stream_gating)
-				dc->hwss.enable_stream_gating(dc, pipe_ctx);
+			if (hws->funcs.enable_stream_gating)
+				hws->funcs.enable_stream_gating(dc, pipe_ctx);
 			if (old_clk)
 				old_clk->funcs->cs_power_down(old_clk);
 		}
@@ -1359,9 +1421,7 @@ static bool patch_address_for_sbs_tb_stereo(
 	return false;
 }
 
-
-
-static void dcn10_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
 	bool addr_patched = false;
 	PHYSICAL_ADDRESS_LOC addr;
@@ -1386,8 +1446,8 @@ static void dcn10_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_c
 		pipe_ctx->plane_state->address.grph_stereo.left_addr = addr;
 }
 
-static bool dcn10_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
-					  const struct dc_plane_state *plane_state)
+bool dcn10_set_input_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
+			const struct dc_plane_state *plane_state)
 {
 	struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
 	const struct dc_transfer_func *tf = NULL;
@@ -1419,6 +1479,11 @@ static bool dcn10_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
 			dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_BYPASS);
 			break;
 		case TRANSFER_FUNCTION_PQ:
+			dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_USER_PWL);
+			cm_helper_translate_curve_to_degamma_hw_format(tf, &dpp_base->degamma_params);
+			dpp_base->funcs->dpp_program_degamma_pwl(dpp_base, &dpp_base->degamma_params);
+			result = true;
+			break;
 		default:
 			result = false;
 			break;
@@ -1452,21 +1517,20 @@ static void log_tf(struct dc_context *ctx,
 	DC_LOG_ALL_TF_CHANNELS("Logging all channels...");
 
 	for (i = 0; i < hw_points_num; i++) {
-		DC_LOG_GAMMA("R\t%d\t%llu\n", i, tf->tf_pts.red[i].value);
-		DC_LOG_ALL_TF_CHANNELS("G\t%d\t%llu\n", i, tf->tf_pts.green[i].value);
-		DC_LOG_ALL_TF_CHANNELS("B\t%d\t%llu\n", i, tf->tf_pts.blue[i].value);
+		DC_LOG_GAMMA("R\t%d\t%llu", i, tf->tf_pts.red[i].value);
+		DC_LOG_ALL_TF_CHANNELS("G\t%d\t%llu", i, tf->tf_pts.green[i].value);
+		DC_LOG_ALL_TF_CHANNELS("B\t%d\t%llu", i, tf->tf_pts.blue[i].value);
 	}
 
 	for (i = hw_points_num; i < MAX_NUM_HW_POINTS; i++) {
-		DC_LOG_ALL_GAMMA("R\t%d\t%llu\n", i, tf->tf_pts.red[i].value);
-		DC_LOG_ALL_TF_CHANNELS("G\t%d\t%llu\n", i, tf->tf_pts.green[i].value);
-		DC_LOG_ALL_TF_CHANNELS("B\t%d\t%llu\n", i, tf->tf_pts.blue[i].value);
+		DC_LOG_ALL_GAMMA("R\t%d\t%llu", i, tf->tf_pts.red[i].value);
+		DC_LOG_ALL_TF_CHANNELS("G\t%d\t%llu", i, tf->tf_pts.green[i].value);
+		DC_LOG_ALL_TF_CHANNELS("B\t%d\t%llu", i, tf->tf_pts.blue[i].value);
 	}
 }
 
-static bool
-dcn10_set_output_transfer_func(struct pipe_ctx *pipe_ctx,
-			       const struct dc_stream_state *stream)
+bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
+				const struct dc_stream_state *stream)
 {
 	struct dpp *dpp = pipe_ctx->plane_res.dpp;
 
@@ -1502,11 +1566,13 @@ dcn10_set_output_transfer_func(struct pipe_ctx *pipe_ctx,
 	return true;
 }
 
-static void dcn10_pipe_control_lock(
+void dcn10_pipe_control_lock(
 	struct dc *dc,
 	struct pipe_ctx *pipe,
 	bool lock)
 {
+	struct dce_hwseq *hws = dc->hwseq;
+
 	/* use TG master update lock to lock everything on the TG
 	 * therefore only top pipe need to lock
 	 */
@@ -1514,7 +1580,7 @@ static void dcn10_pipe_control_lock(
 		return;
 
 	if (dc->debug.sanity_checks)
-		dcn10_verify_allow_pstate_change_high(dc);
+		hws->funcs.verify_allow_pstate_change_high(dc);
 
 	if (lock)
 		pipe->stream_res.tg->funcs->lock(pipe->stream_res.tg);
@@ -1522,7 +1588,7 @@ static void dcn10_pipe_control_lock(
 		pipe->stream_res.tg->funcs->unlock(pipe->stream_res.tg);
 
 	if (dc->debug.sanity_checks)
-		dcn10_verify_allow_pstate_change_high(dc);
+		hws->funcs.verify_allow_pstate_change_high(dc);
 }
 
 static bool wait_for_reset_trigger_to_occur(
@@ -1562,7 +1628,7 @@ static bool wait_for_reset_trigger_to_occur(
 	return rc;
 }
 
-static void dcn10_enable_timing_synchronization(
+void dcn10_enable_timing_synchronization(
 	struct dc *dc,
 	int group_index,
 	int group_size,
@@ -1592,7 +1658,7 @@ static void dcn10_enable_timing_synchronization(
 	DC_SYNC_INFO("Sync complete\n");
 }
 
-static void dcn10_enable_per_frame_crtc_position_reset(
+void dcn10_enable_per_frame_crtc_position_reset(
 	struct dc *dc,
 	int group_size,
 	struct pipe_ctx *grouped_pipes[])
@@ -1617,10 +1683,10 @@ static void dcn10_enable_per_frame_crtc_position_reset(
 }
 
 /*static void print_rq_dlg_ttu(
-		struct dc *core_dc,
+		struct dc *dc,
 		struct pipe_ctx *pipe_ctx)
 {
-	DC_LOG_BANDWIDTH_CALCS(core_dc->ctx->logger,
+	DC_LOG_BANDWIDTH_CALCS(dc->ctx->logger,
 			"\n============== DML TTU Output parameters [%d] ==============\n"
 			"qos_level_low_wm: %d, \n"
 			"qos_level_high_wm: %d, \n"
@@ -1650,7 +1716,7 @@ static void dcn10_enable_per_frame_crtc_position_reset(
 			pipe_ctx->ttu_regs.refcyc_per_req_delivery_pre_c
 			);
 
-	DC_LOG_BANDWIDTH_CALCS(core_dc->ctx->logger,
+	DC_LOG_BANDWIDTH_CALCS(dc->ctx->logger,
 			"\n============== DML DLG Output parameters [%d] ==============\n"
 			"refcyc_h_blank_end: %d, \n"
 			"dlg_vblank_end: %d, \n"
@@ -1685,7 +1751,7 @@ static void dcn10_enable_per_frame_crtc_position_reset(
 			pipe_ctx->dlg_regs.refcyc_per_pte_group_nom_l
 			);
 
-	DC_LOG_BANDWIDTH_CALCS(core_dc->ctx->logger,
+	DC_LOG_BANDWIDTH_CALCS(dc->ctx->logger,
 			"\ndst_y_per_meta_row_nom_l: %d, \n"
 			"refcyc_per_meta_chunk_nom_l: %d, \n"
 			"refcyc_per_line_delivery_pre_l: %d, \n"
@@ -1715,7 +1781,7 @@ static void dcn10_enable_per_frame_crtc_position_reset(
 			pipe_ctx->dlg_regs.refcyc_per_line_delivery_c
 			);
 
-	DC_LOG_BANDWIDTH_CALCS(core_dc->ctx->logger,
+	DC_LOG_BANDWIDTH_CALCS(dc->ctx->logger,
 			"\n============== DML RQ Output parameters [%d] ==============\n"
 			"chunk_size: %d \n"
 			"min_chunk_size: %d \n"
@@ -1830,7 +1896,7 @@ static void dcn10_enable_plane(
 	struct dce_hwseq *hws = dc->hwseq;
 
 	if (dc->debug.sanity_checks) {
-		dcn10_verify_allow_pstate_change_high(dc);
+		hws->funcs.verify_allow_pstate_change_high(dc);
 	}
 
 	undo_DEGVIDCN10_253_wa(dc);
@@ -1887,11 +1953,11 @@ static void dcn10_enable_plane(
 		dcn10_program_pte_vm(hws, pipe_ctx->plane_res.hubp);
 
 	if (dc->debug.sanity_checks) {
-		dcn10_verify_allow_pstate_change_high(dc);
+		hws->funcs.verify_allow_pstate_change_high(dc);
 	}
 }
 
-static void dcn10_program_gamut_remap(struct pipe_ctx *pipe_ctx)
+void dcn10_program_gamut_remap(struct pipe_ctx *pipe_ctx)
 {
 	int i = 0;
 	struct dpp_grph_csc_adjustment adjust;
@@ -1939,7 +2005,7 @@ static void dcn10_set_csc_adjustment_rgb_mpo_fix(struct pipe_ctx *pipe_ctx, uint
 	matrix[11] = rgb_bias;
 }
 
-static void dcn10_program_output_csc(struct dc *dc,
+void dcn10_program_output_csc(struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		enum dc_color_space colorspace,
 		uint16_t *matrix,
@@ -1971,57 +2037,6 @@ static void dcn10_program_output_csc(struct dc *dc,
 	}
 }
 
-bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
-{
-	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
-		return true;
-	if (pipe_ctx->bottom_pipe && is_lower_pipe_tree_visible(pipe_ctx->bottom_pipe))
-		return true;
-	return false;
-}
-
-bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
-{
-	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
-		return true;
-	if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))
-		return true;
-	return false;
-}
-
-bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
-{
-	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
-		return true;
-	if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))
-		return true;
-	if (pipe_ctx->bottom_pipe && is_lower_pipe_tree_visible(pipe_ctx->bottom_pipe))
-		return true;
-	return false;
-}
-
-bool is_rgb_cspace(enum dc_color_space output_color_space)
-{
-	switch (output_color_space) {
-	case COLOR_SPACE_SRGB:
-	case COLOR_SPACE_SRGB_LIMITED:
-	case COLOR_SPACE_2020_RGB_FULLRANGE:
-	case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
-	case COLOR_SPACE_ADOBERGB:
-		return true;
-	case COLOR_SPACE_YCBCR601:
-	case COLOR_SPACE_YCBCR709:
-	case COLOR_SPACE_YCBCR601_LIMITED:
-	case COLOR_SPACE_YCBCR709_LIMITED:
-	case COLOR_SPACE_2020_YCBCR:
-		return false;
-	default:
-		/* Add a case to switch */
-		BREAK_TO_DEBUGGER();
-		return false;
-	}
-}
-
 void dcn10_get_surface_visual_confirm_color(
 		const struct pipe_ctx *pipe_ctx,
 		struct tg_color *color)
@@ -2095,70 +2110,7 @@ void dcn10_get_hdr_visual_confirm_color(
 	}
 }
 
-static uint16_t fixed_point_to_int_frac(
-	struct fixed31_32 arg,
-	uint8_t integer_bits,
-	uint8_t fractional_bits)
-{
-	int32_t numerator;
-	int32_t divisor = 1 << fractional_bits;
-
-	uint16_t result;
-
-	uint16_t d = (uint16_t)dc_fixpt_floor(
-		dc_fixpt_abs(
-			arg));
-
-	if (d <= (uint16_t)(1 << integer_bits) - (1 / (uint16_t)divisor))
-		numerator = (uint16_t)dc_fixpt_floor(
-			dc_fixpt_mul_int(
-				arg,
-				divisor));
-	else {
-		numerator = dc_fixpt_floor(
-			dc_fixpt_sub(
-				dc_fixpt_from_int(
-					1LL << integer_bits),
-				dc_fixpt_recip(
-					dc_fixpt_from_int(
-						divisor))));
-	}
-
-	if (numerator >= 0)
-		result = (uint16_t)numerator;
-	else
-		result = (uint16_t)(
-		(1 << (integer_bits + fractional_bits + 1)) + numerator);
-
-	if ((result != 0) && dc_fixpt_lt(
-		arg, dc_fixpt_zero))
-		result |= 1 << (integer_bits + fractional_bits);
-
-	return result;
-}
-
-void dcn10_build_prescale_params(struct  dc_bias_and_scale *bias_and_scale,
-		const struct dc_plane_state *plane_state)
-{
-	if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN
-			&& plane_state->format != SURFACE_PIXEL_FORMAT_INVALID
-			&& plane_state->input_csc_color_matrix.enable_adjustment
-			&& plane_state->coeff_reduction_factor.value != 0) {
-		bias_and_scale->scale_blue = fixed_point_to_int_frac(
-			dc_fixpt_mul(plane_state->coeff_reduction_factor,
-					dc_fixpt_from_fraction(256, 255)),
-				2,
-				13);
-		bias_and_scale->scale_red = bias_and_scale->scale_blue;
-		bias_and_scale->scale_green = bias_and_scale->scale_blue;
-	} else {
-		bias_and_scale->scale_blue = 0x2000;
-		bias_and_scale->scale_red = 0x2000;
-		bias_and_scale->scale_green = 0x2000;
-	}
-}
-
-static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state)
+static void dcn10_update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state)
 {
 	struct dc_bias_and_scale bns_params = {0};
 
@@ -2167,21 +2119,18 @@ static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state)
 			plane_state->format,
 			EXPANSION_MODE_ZERO,
 			plane_state->input_csc_color_matrix,
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 			plane_state->color_space,
 			NULL);
-#else
-			plane_state->color_space);
-#endif
 
 	//set scale and bias registers
-	dcn10_build_prescale_params(&bns_params, plane_state);
+	build_prescale_params(&bns_params, plane_state);
 	if (dpp->funcs->dpp_program_bias_and_scale)
 		dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params);
 }
 
-static void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
 	struct mpcc_blnd_cfg blnd_cfg = {{0}};
 	bool per_pixel_alpha = pipe_ctx->plane_state->per_pixel_alpha && pipe_ctx->bottom_pipe;
@@ -2191,10 +2140,10 @@ static void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params);
 
 	if (dc->debug.visual_confirm == VISUAL_CONFIRM_HDR) {
-		dcn10_get_hdr_visual_confirm_color(
+		hws->funcs.get_hdr_visual_confirm_color(
 				pipe_ctx, &blnd_cfg.black_color);
 	} else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SURFACE) {
-		dcn10_get_surface_visual_confirm_color(
+		hws->funcs.get_surface_visual_confirm_color(
 				pipe_ctx, &blnd_cfg.black_color);
 	} else {
 		color_space_to_black_color(
@@ -2276,11 +2225,12 @@ static void update_scaler(struct pipe_ctx *pipe_ctx)
 			pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data);
 }
 
-void update_dchubp_dpp(
+static void dcn10_update_dchubp_dpp(
 	struct dc *dc,
 	struct pipe_ctx *pipe_ctx,
 	struct dc_state *context)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
 	struct dpp *dpp = pipe_ctx->plane_res.dpp;
 	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
@@ -2304,8 +2254,7 @@ void update_dchubp_dpp(
 			dc->res_pool->dccg->funcs->update_dpp_dto(
 					dc->res_pool->dccg,
 					dpp->inst,
-					pipe_ctx->plane_res.bw.dppclk_khz,
-					false);
+					pipe_ctx->plane_res.bw.dppclk_khz);
 		else
 			dc->clk_mgr->clks.dppclk_khz = should_divided_by_2 ?
 						dc->clk_mgr->clks.dispclk_khz / 2 :
@@ -2335,12 +2284,12 @@ void update_dchubp_dpp(
 
 	if (plane_state->update_flags.bits.full_update ||
 		plane_state->update_flags.bits.bpp_change)
-		update_dpp(dpp, plane_state);
+		dcn10_update_dpp(dpp, plane_state);
 
 	if (plane_state->update_flags.bits.full_update ||
 		plane_state->update_flags.bits.per_pixel_alpha_change ||
 		plane_state->update_flags.bits.global_alpha_change)
-		dc->hwss.update_mpcc(dc, pipe_ctx);
+		hws->funcs.update_mpcc(dc, pipe_ctx);
 
 	if (plane_state->update_flags.bits.full_update ||
 		plane_state->update_flags.bits.per_pixel_alpha_change ||
@@ -2400,13 +2349,13 @@ void update_dchubp_dpp(
 
 	hubp->power_gated = false;
 
-	dc->hwss.update_plane_addr(dc, pipe_ctx);
+	hws->funcs.update_plane_addr(dc, pipe_ctx);
 
 	if (is_pipe_tree_visible(pipe_ctx))
 		hubp->funcs->set_blank(hubp, false);
 }
 
-static void dcn10_blank_pixel_data(
+void dcn10_blank_pixel_data(
 		struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		bool blank)
@@ -2449,10 +2398,9 @@ static void dcn10_blank_pixel_data(
 	}
 }
 
-void set_hdr_multiplier(struct pipe_ctx *pipe_ctx)
+void dcn10_set_hdr_multiplier(struct pipe_ctx *pipe_ctx)
 {
-	struct fixed31_32 multiplier = dc_fixpt_from_fraction(
-			pipe_ctx->plane_state->sdr_white_level, 80);
+	struct fixed31_32 multiplier = pipe_ctx->plane_state->hdr_mult;
 	uint32_t hw_mult = 0x1f000; // 1.0 default multiplier
 	struct custom_float_format fmt;
 
@@ -2460,7 +2408,8 @@ void set_hdr_multiplier(struct pipe_ctx *pipe_ctx)
 	fmt.mantissa_bits = 12;
 	fmt.sign = true;
 
-	if (pipe_ctx->plane_state->sdr_white_level > 80)
+
+	if (!dc_fixpt_eq(multiplier, dc_fixpt_from_int(0))) // check != 0
 		convert_to_custom_float_format(multiplier, &fmt, &hw_mult);
 
 	pipe_ctx->plane_res.dpp->funcs->dpp_set_hdr_multiplier(
@@ -2472,17 +2421,19 @@ void dcn10_program_pipe(
 		struct pipe_ctx *pipe_ctx,
 		struct dc_state *context)
 {
+	struct dce_hwseq *hws = dc->hwseq;
+
 	if (pipe_ctx->plane_state->update_flags.bits.full_update)
 		dcn10_enable_plane(dc, pipe_ctx, context);
 
-	update_dchubp_dpp(dc, pipe_ctx, context);
+	dcn10_update_dchubp_dpp(dc, pipe_ctx, context);
 
-	set_hdr_multiplier(pipe_ctx);
+	hws->funcs.set_hdr_multiplier(pipe_ctx);
 
 	if (pipe_ctx->plane_state->update_flags.bits.full_update ||
 			pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
 			pipe_ctx->plane_state->update_flags.bits.gamma_change)
-		dc->hwss.set_input_transfer_func(pipe_ctx, pipe_ctx->plane_state);
+		hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
 
 	/* dcn10_translate_regamma_to_hw_format takes 750us to finish
 	 * only do gamma programming for full update.
@@ -2491,14 +2442,16 @@ void dcn10_program_pipe(
 	 * doing heavy calculation and programming
 	 */
 	if (pipe_ctx->plane_state->update_flags.bits.full_update)
-		dc->hwss.set_output_transfer_func(pipe_ctx, pipe_ctx->stream);
+		hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
 }
 
-static void program_all_pipe_in_tree(
+static void dcn10_program_all_pipe_in_tree(
 		struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		struct dc_state *context)
 {
+	struct dce_hwseq *hws = dc->hwseq;
+
 	if (pipe_ctx->top_pipe == NULL) {
 		bool blank = !is_pipe_tree_visible(pipe_ctx);
 
@@ -2512,18 +2465,20 @@ static void program_all_pipe_in_tree(
 		pipe_ctx->stream_res.tg->funcs->set_vtg_params(
 				pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
 
-		dc->hwss.blank_pixel_data(dc, pipe_ctx, blank);
+		if (hws->funcs.setup_vupdate_interrupt)
+			hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
 
+		hws->funcs.blank_pixel_data(dc, pipe_ctx, blank);
 	}
 
 	if (pipe_ctx->plane_state != NULL)
-		dcn10_program_pipe(dc, pipe_ctx, context);
+		hws->funcs.program_pipe(dc, pipe_ctx, context);
 
 	if (pipe_ctx->bottom_pipe != NULL && pipe_ctx->bottom_pipe != pipe_ctx)
-		program_all_pipe_in_tree(dc, pipe_ctx->bottom_pipe, context);
+		dcn10_program_all_pipe_in_tree(dc, pipe_ctx->bottom_pipe, context);
 }
 
-struct pipe_ctx *find_top_pipe_for_stream(
+static struct pipe_ctx *dcn10_find_top_pipe_for_stream(
 		struct dc *dc,
 		struct dc_state *context,
 		const struct dc_stream_state *stream)
@@ -2547,19 +2502,20 @@ struct pipe_ctx *find_top_pipe_for_stream(
 	return NULL;
 }
 
-static void dcn10_apply_ctx_for_surface(
+void dcn10_apply_ctx_for_surface(
 		struct dc *dc,
 		const struct dc_stream_state *stream,
 		int num_planes,
 		struct dc_state *context)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	int i;
 	struct timing_generator *tg;
 	uint32_t underflow_check_delay_us;
 	bool removed_pipe[4] = { false };
 	bool interdependent_update = false;
 	struct pipe_ctx *top_pipe_to_program =
-			find_top_pipe_for_stream(dc, context, stream);
+			dcn10_find_top_pipe_for_stream(dc, context, stream);
 	DC_LOGGER_INIT(dc->ctx->logger);
 
 	if (!top_pipe_to_program)
@@ -2572,23 +2528,23 @@ static void dcn10_apply_ctx_for_surface(
 
 	underflow_check_delay_us = dc->debug.underflow_assert_delay_us;
 
-	if (underflow_check_delay_us != 0xFFFFFFFF && dc->hwss.did_underflow_occur)
-		ASSERT(dc->hwss.did_underflow_occur(dc, top_pipe_to_program));
+	if (underflow_check_delay_us != 0xFFFFFFFF && hws->funcs.did_underflow_occur)
+		ASSERT(hws->funcs.did_underflow_occur(dc, top_pipe_to_program));
 
 	if (interdependent_update)
-		lock_all_pipes(dc, context, true);
+		dcn10_lock_all_pipes(dc, context, true);
 	else
 		dcn10_pipe_control_lock(dc, top_pipe_to_program, true);
 
 	if (underflow_check_delay_us != 0xFFFFFFFF)
 		udelay(underflow_check_delay_us);
 
-	if (underflow_check_delay_us != 0xFFFFFFFF && dc->hwss.did_underflow_occur)
-		ASSERT(dc->hwss.did_underflow_occur(dc, top_pipe_to_program));
+	if (underflow_check_delay_us != 0xFFFFFFFF && hws->funcs.did_underflow_occur)
+		ASSERT(hws->funcs.did_underflow_occur(dc, top_pipe_to_program));
 
 	if (num_planes == 0) {
 		/* OTG blank before remove all front end */
-		dc->hwss.blank_pixel_data(dc, top_pipe_to_program, true);
+		hws->funcs.blank_pixel_data(dc, top_pipe_to_program, true);
 	}
 
 	/* Disconnect unused mpcc */
@@ -2614,7 +2570,7 @@ static void dcn10_apply_ctx_for_surface(
 		    old_pipe_ctx->plane_state &&
 		    old_pipe_ctx->stream_res.tg == tg) {
 
-			dc->hwss.plane_atomic_disconnect(dc, old_pipe_ctx);
+			hws->funcs.plane_atomic_disconnect(dc, old_pipe_ctx);
 			removed_pipe[i] = true;
 
 			DC_LOG_DC("Reset mpcc for pipe %d\n",
@@ -2623,13 +2579,11 @@ static void dcn10_apply_ctx_for_surface(
 	}
 
 	if (num_planes > 0)
-		program_all_pipe_in_tree(dc, top_pipe_to_program, context);
+		dcn10_program_all_pipe_in_tree(dc, top_pipe_to_program, context);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	/* Program secondary blending tree and writeback pipes */
-	if ((stream->num_wb_info > 0) && (dc->hwss.program_all_writeback_pipes_in_tree))
-		dc->hwss.program_all_writeback_pipes_in_tree(dc, stream, context);
-#endif
+	if ((stream->num_wb_info > 0) && (hws->funcs.program_all_writeback_pipes_in_tree))
+		hws->funcs.program_all_writeback_pipes_in_tree(dc, stream, context);
 	if (interdependent_update)
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
@@ -2645,7 +2599,7 @@ static void dcn10_apply_ctx_for_surface(
 		}
 
 	if (interdependent_update)
-		lock_all_pipes(dc, context, false);
+		dcn10_lock_all_pipes(dc, context, false);
 	else
 		dcn10_pipe_control_lock(dc, top_pipe_to_program, false);
 
@@ -2682,14 +2636,15 @@ static void dcn10_stereo_hw_frame_pack_wa(struct dc *dc, struct dc_state *contex
 	}
 }
 
-static void dcn10_prepare_bandwidth(
+void dcn10_prepare_bandwidth(
 		struct dc *dc,
 		struct dc_state *context)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	struct hubbub *hubbub = dc->res_pool->hubbub;
 
 	if (dc->debug.sanity_checks)
-		dcn10_verify_allow_pstate_change_high(dc);
+		hws->funcs.verify_allow_pstate_change_high(dc);
 
 	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 		if (context->stream_count == 0)
@@ -2711,17 +2666,18 @@ static void dcn10_prepare_bandwidth(
 		dcn_bw_notify_pplib_of_wm_ranges(dc);
 
 	if (dc->debug.sanity_checks)
-		dcn10_verify_allow_pstate_change_high(dc);
+		hws->funcs.verify_allow_pstate_change_high(dc);
 }
 
-static void dcn10_optimize_bandwidth(
+void dcn10_optimize_bandwidth(
 		struct dc *dc,
 		struct dc_state *context)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	struct hubbub *hubbub = dc->res_pool->hubbub;
 
 	if (dc->debug.sanity_checks)
-		dcn10_verify_allow_pstate_change_high(dc);
+		hws->funcs.verify_allow_pstate_change_high(dc);
 
 	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 		if (context->stream_count == 0)
@@ -2743,10 +2699,10 @@ static void dcn10_optimize_bandwidth(
 		dcn_bw_notify_pplib_of_wm_ranges(dc);
 
 	if (dc->debug.sanity_checks)
-		dcn10_verify_allow_pstate_change_high(dc);
+		hws->funcs.verify_allow_pstate_change_high(dc);
 }
 
-static void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
+void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
 		int num_pipes, unsigned int vmin, unsigned int vmax,
 		unsigned int vmid, unsigned int vmid_frame_number)
 {
@@ -2754,6 +2710,8 @@ static void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
 	struct drr_params params = {0};
 	// DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow
 	unsigned int event_triggers = 0x800;
+	// Note DRR trigger events are generated regardless of whether num frames met.
+	unsigned int num_frames = 2;
 
 	params.vertical_total_max = vmax;
 	params.vertical_total_min = vmin;
@@ -2770,11 +2728,11 @@ static void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
 		if (vmax != 0 && vmin != 0)
 			pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
 					pipe_ctx[i]->stream_res.tg,
-					event_triggers);
+					event_triggers, num_frames);
 	}
 }
 
-static void dcn10_get_position(struct pipe_ctx **pipe_ctx,
+void dcn10_get_position(struct pipe_ctx **pipe_ctx,
 		int num_pipes,
 		struct crtc_position *position)
 {
@@ -2786,22 +2744,23 @@ static void dcn10_get_position(struct pipe_ctx **pipe_ctx,
 		pipe_ctx[i]->stream_res.tg->funcs->get_position(pipe_ctx[i]->stream_res.tg, position);
 }
 
-static void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx,
-		int num_pipes, const struct dc_static_screen_events *events)
+void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+		int num_pipes, const struct dc_static_screen_params *params)
 {
 	unsigned int i;
-	unsigned int value = 0;
+	unsigned int triggers = 0;
 
-	if (events->surface_update)
-		value |= 0x80;
-	if (events->cursor_update)
-		value |= 0x2;
-	if (events->force_trigger)
-		value |= 0x1;
+	if (params->triggers.surface_update)
+		triggers |= 0x80;
+	if (params->triggers.cursor_update)
+		triggers |= 0x2;
+	if (params->triggers.force_trigger)
+		triggers |= 0x1;
 
 	for (i = 0; i < num_pipes; i++)
 		pipe_ctx[i]->stream_res.tg->funcs->
-			set_static_screen_control(pipe_ctx[i]->stream_res.tg, value);
+			set_static_screen_control(pipe_ctx[i]->stream_res.tg,
+					triggers, params->num_frames);
 }
 
 static void dcn10_config_stereo_parameters(
@@ -2841,7 +2800,7 @@ static void dcn10_config_stereo_parameters(
 	return;
 }
 
-static void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc)
+void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc)
 {
 	struct crtc_stereo_flags flags = { 0 };
 	struct dc_stream_state *stream = pipe_ctx->stream;
@@ -2880,15 +2839,16 @@ static struct hubp *get_hubp_by_inst(struct resource_pool *res_pool, int mpcc_in
 	return NULL;
 }
 
-static void dcn10_wait_for_mpcc_disconnect(
+void dcn10_wait_for_mpcc_disconnect(
 		struct dc *dc,
 		struct resource_pool *res_pool,
 		struct pipe_ctx *pipe_ctx)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	int mpcc_inst;
 
 	if (dc->debug.sanity_checks) {
-		dcn10_verify_allow_pstate_change_high(dc);
+		hws->funcs.verify_allow_pstate_change_high(dc);
 	}
 
 	if (!pipe_ctx->stream_res.opp)
@@ -2905,12 +2865,12 @@ static void dcn10_wait_for_mpcc_disconnect(
 	}
 
 	if (dc->debug.sanity_checks) {
-		dcn10_verify_allow_pstate_change_high(dc);
+		hws->funcs.verify_allow_pstate_change_high(dc);
 	}
 
 }
 
-static bool dcn10_dummy_display_power_gating(
+bool dcn10_dummy_display_power_gating(
 	struct dc *dc,
 	uint8_t controller_id,
 	struct dc_bios *dcb,
@@ -2919,7 +2879,7 @@ static bool dcn10_dummy_display_power_gating(
 	return true;
 }
 
-static void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx)
+void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
 	struct timing_generator *tg = pipe_ctx->stream_res.tg;
@@ -2943,7 +2903,7 @@ static void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx)
 	}
 }
 
-static void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data)
+void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data)
 {
 	struct hubbub *hubbub = hws->ctx->dc->res_pool->hubbub;
 
@@ -2951,7 +2911,7 @@ static void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh
 	hubbub->funcs->update_dchub(hubbub, dh_data);
 }
 
-static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
+void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
@@ -2965,15 +2925,32 @@ static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
 		.rotation = pipe_ctx->plane_state->rotation,
 		.mirror = pipe_ctx->plane_state->horizontal_mirror
 	};
-	uint32_t x_plane = pipe_ctx->plane_state->dst_rect.x;
-	uint32_t y_plane = pipe_ctx->plane_state->dst_rect.y;
-	uint32_t x_offset = min(x_plane, pos_cpy.x);
-	uint32_t y_offset = min(y_plane, pos_cpy.y);
+	bool pipe_split_on = (pipe_ctx->top_pipe != NULL) ||
+		(pipe_ctx->bottom_pipe != NULL);
+
+	int x_plane = pipe_ctx->plane_state->dst_rect.x;
+	int y_plane = pipe_ctx->plane_state->dst_rect.y;
+	int x_pos = pos_cpy.x;
+	int y_pos = pos_cpy.y;
+
+	// translate cursor from stream space to plane space
+	x_pos = (x_pos - x_plane) * pipe_ctx->plane_state->src_rect.width /
+			pipe_ctx->plane_state->dst_rect.width;
+	y_pos = (y_pos - y_plane) * pipe_ctx->plane_state->src_rect.height /
+			pipe_ctx->plane_state->dst_rect.height;
 
-	pos_cpy.x -= x_offset;
-	pos_cpy.y -= y_offset;
-	pos_cpy.x_hotspot += (x_plane - x_offset);
-	pos_cpy.y_hotspot += (y_plane - y_offset);
+	if (x_pos < 0) {
+		pos_cpy.x_hotspot -= x_pos;
+		x_pos = 0;
+	}
+
+	if (y_pos < 0) {
+		pos_cpy.y_hotspot -= y_pos;
+		y_pos = 0;
+	}
+
+	pos_cpy.x = (uint32_t)x_pos;
+	pos_cpy.y = (uint32_t)y_pos;
 
 	if (pipe_ctx->plane_state->address.type
 			== PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
@@ -2982,6 +2959,7 @@ static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
 	// Swap axis and mirror horizontally
 	if (param.rotation == ROTATION_ANGLE_90) {
 		uint32_t temp_x = pos_cpy.x;
+
 		pos_cpy.x = pipe_ctx->plane_res.scl_data.viewport.width -
 				(pos_cpy.y - pipe_ctx->plane_res.scl_data.viewport.x) + pipe_ctx->plane_res.scl_data.viewport.x;
 		pos_cpy.y = temp_x;
@@ -2989,26 +2967,44 @@ static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
 	// Swap axis and mirror vertically
 	else if (param.rotation == ROTATION_ANGLE_270) {
 		uint32_t temp_y = pos_cpy.y;
-		if (pos_cpy.x >  pipe_ctx->plane_res.scl_data.viewport.height) {
-			pos_cpy.x = pos_cpy.x - pipe_ctx->plane_res.scl_data.viewport.height;
-			pos_cpy.y = pipe_ctx->plane_res.scl_data.viewport.height - pos_cpy.x;
-		} else {
-			pos_cpy.y = 2 * pipe_ctx->plane_res.scl_data.viewport.height - pos_cpy.x;
-		}
+		int viewport_height =
+			pipe_ctx->plane_res.scl_data.viewport.height;
+
+		if (pipe_split_on) {
+			if (pos_cpy.x > viewport_height) {
+				pos_cpy.x = pos_cpy.x - viewport_height;
+				pos_cpy.y = viewport_height - pos_cpy.x;
+			} else {
+				pos_cpy.y = 2 * viewport_height - pos_cpy.x;
+			}
+		} else
+			pos_cpy.y = viewport_height - pos_cpy.x;
 		pos_cpy.x = temp_y;
 	}
 	// Mirror horizontally and vertically
 	else if (param.rotation == ROTATION_ANGLE_180) {
-		if (pos_cpy.x >= pipe_ctx->plane_res.scl_data.viewport.width + pipe_ctx->plane_res.scl_data.viewport.x) {
-			pos_cpy.x = 2 * pipe_ctx->plane_res.scl_data.viewport.width
-					- pos_cpy.x + 2 * pipe_ctx->plane_res.scl_data.viewport.x;
-		} else {
-			uint32_t temp_x = pos_cpy.x;
-			pos_cpy.x = 2 * pipe_ctx->plane_res.scl_data.viewport.x - pos_cpy.x;
-			if (temp_x >= pipe_ctx->plane_res.scl_data.viewport.x + (int)hubp->curs_attr.width
-					|| pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) {
-				pos_cpy.x = temp_x + pipe_ctx->plane_res.scl_data.viewport.width;
+		int viewport_width =
+			pipe_ctx->plane_res.scl_data.viewport.width;
+		int viewport_x =
+			pipe_ctx->plane_res.scl_data.viewport.x;
+
+		if (pipe_split_on) {
+			if (pos_cpy.x >= viewport_width + viewport_x) {
+				pos_cpy.x = 2 * viewport_width
+						- pos_cpy.x + 2 * viewport_x;
+			} else {
+				uint32_t temp_x = pos_cpy.x;
+
+				pos_cpy.x = 2 * viewport_x - pos_cpy.x;
+				if (temp_x >= viewport_x +
+					(int)hubp->curs_attr.width || pos_cpy.x
+					<= (int)hubp->curs_attr.width +
+					pipe_ctx->plane_state->src_rect.x) {
+					pos_cpy.x = temp_x + viewport_width;
+				}
 			}
+		} else {
+			pos_cpy.x = viewport_width - pos_cpy.x + 2 * viewport_x;
 		}
 		pos_cpy.y = pipe_ctx->plane_res.scl_data.viewport.height - pos_cpy.y;
 	}
@@ -3017,7 +3013,7 @@ static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
 	dpp->funcs->set_cursor_position(dpp, &pos_cpy, &param, hubp->curs_attr.width, hubp->curs_attr.height);
 }
 
-static void dcn10_set_cursor_attribute(struct pipe_ctx *pipe_ctx)
+void dcn10_set_cursor_attribute(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_cursor_attributes *attributes = &pipe_ctx->stream->cursor_attributes;
 
@@ -3027,7 +3023,7 @@ static void dcn10_set_cursor_attribute(struct pipe_ctx *pipe_ctx)
 		pipe_ctx->plane_res.dpp, attributes);
 }
 
-static void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx)
+void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx)
 {
 	uint32_t sdr_white_level = pipe_ctx->stream->cursor_attributes.sdr_white_level;
 	struct fixed31_32 multiplier;
@@ -3054,12 +3050,12 @@ static void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx)
 			pipe_ctx->plane_res.dpp, &opt_attr);
 }
 
-/**
-* apply_front_porch_workaround  TODO FPGA still need?
-*
-* This is a workaround for a bug that has existed since R5xx and has not been
-* fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive.
-*/
+/*
+ * apply_front_porch_workaround  TODO FPGA still need?
+ *
+ * This is a workaround for a bug that has existed since R5xx and has not been
+ * fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive.
+ */
 static void apply_front_porch_workaround(
 	struct dc_crtc_timing *timing)
 {
@@ -3072,7 +3068,7 @@ static void apply_front_porch_workaround(
 	}
 }
 
-int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
+int dcn10_get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
 {
 	const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing;
 	struct dc_crtc_timing patched_crtc_timing;
@@ -3101,34 +3097,8 @@ int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
 	return vertical_line_start;
 }
 
-void lock_all_pipes(struct dc *dc,
-	struct dc_state *context,
-	bool lock)
-{
-	struct pipe_ctx *pipe_ctx;
-	struct timing_generator *tg;
-	int i;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		pipe_ctx = &context->res_ctx.pipe_ctx[i];
-		tg = pipe_ctx->stream_res.tg;
-		/*
-		 * Only lock the top pipe's tg to prevent redundant
-		 * (un)locking. Also skip if pipe is disabled.
-		 */
-		if (pipe_ctx->top_pipe ||
-		    !pipe_ctx->stream || !pipe_ctx->plane_state ||
-		    !tg->funcs->is_tg_enabled(tg))
-			continue;
-
-		if (lock)
-			tg->funcs->lock(tg);
-		else
-			tg->funcs->unlock(tg);
-	}
-}
-
-static void calc_vupdate_position(
+static void dcn10_calc_vupdate_position(
+		struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		uint32_t *start_line,
 		uint32_t *end_line)
@@ -3136,7 +3106,7 @@ static void calc_vupdate_position(
 	const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing;
 	int vline_int_offset_from_vupdate =
 			pipe_ctx->stream->periodic_interrupt0.lines_offset;
-	int vupdate_offset_from_vsync = get_vupdate_offset_from_vsync(pipe_ctx);
+	int vupdate_offset_from_vsync = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
 	int start_position;
 
 	if (vline_int_offset_from_vupdate > 0)
@@ -3157,7 +3127,8 @@ static void calc_vupdate_position(
 		*end_line = 2;
 }
 
-static void cal_vline_position(
+static void dcn10_cal_vline_position(
+		struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		enum vline_select vline,
 		uint32_t *start_line,
@@ -3172,7 +3143,8 @@ static void cal_vline_position(
 
 	switch (ref_point) {
 	case START_V_UPDATE:
-		calc_vupdate_position(
+		dcn10_calc_vupdate_position(
+				dc,
 				pipe_ctx,
 				start_line,
 				end_line);
@@ -3186,7 +3158,8 @@ static void cal_vline_position(
 	}
 }
 
-static void dcn10_setup_periodic_interrupt(
+void dcn10_setup_periodic_interrupt(
+		struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		enum vline_select vline)
 {
@@ -3196,7 +3169,7 @@ static void dcn10_setup_periodic_interrupt(
 		uint32_t start_line = 0;
 		uint32_t end_line = 0;
 
-		cal_vline_position(pipe_ctx, vline, &start_line, &end_line);
+		dcn10_cal_vline_position(dc, pipe_ctx, vline, &start_line, &end_line);
 
 		tg->funcs->setup_vertical_interrupt0(tg, start_line, end_line);
 
@@ -3207,10 +3180,10 @@ static void dcn10_setup_periodic_interrupt(
 	}
 }
 
-static void dcn10_setup_vupdate_interrupt(struct pipe_ctx *pipe_ctx)
+void dcn10_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
 	struct timing_generator *tg = pipe_ctx->stream_res.tg;
-	int start_line = get_vupdate_offset_from_vsync(pipe_ctx);
+	int start_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
 
 	if (start_line < 0) {
 		ASSERT(0);
@@ -3221,12 +3194,13 @@ static void dcn10_setup_vupdate_interrupt(struct pipe_ctx *pipe_ctx)
 		tg->funcs->setup_vertical_interrupt2(tg, start_line);
 }
 
-static void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx,
+void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx,
 		struct dc_link_settings *link_settings)
 {
 	struct encoder_unblank_param params = { { 0 } };
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct dc_link *link = stream->link;
+	struct dce_hwseq *hws = link->dc->hwseq;
 
 	/* only 3 items below are used by unblank */
 	params.timing = pipe_ctx->stream->timing;
@@ -3240,11 +3214,11 @@ static void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx,
 	}
 
 	if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
-		link->dc->hwss.edp_backlight_control(link, true);
+		hws->funcs.edp_backlight_control(link, true);
 	}
 }
 
-static void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx,
+void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx,
 				const uint8_t *custom_sdp_message,
 				unsigned int sdp_message_size)
 {
@@ -3255,7 +3229,7 @@ static void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx,
 				sdp_message_size);
 	}
 }
-static enum dc_status dcn10_set_clock(struct dc *dc,
+enum dc_status dcn10_set_clock(struct dc *dc,
 			enum dc_clock_type clock_type,
 			uint32_t clk_khz,
 			uint32_t stepping)
@@ -3295,7 +3269,7 @@ static enum dc_status dcn10_set_clock(struct dc *dc,
 
 }
 
-static void dcn10_get_clock(struct dc *dc,
+void dcn10_get_clock(struct dc *dc,
 			enum dc_clock_type clock_type,
 			struct dc_clock_config *clock_cfg)
 {
@@ -3305,77 +3279,3 @@ static void dcn10_get_clock(struct dc *dc,
 				dc->clk_mgr->funcs->get_clock(dc->clk_mgr, context, clock_type, clock_cfg);
 
 }
-
-static const struct hw_sequencer_funcs dcn10_funcs = {
-	.program_gamut_remap = dcn10_program_gamut_remap,
-	.init_hw = dcn10_init_hw,
-	.init_pipes = dcn10_init_pipes,
-	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
-	.apply_ctx_for_surface = dcn10_apply_ctx_for_surface,
-	.update_plane_addr = dcn10_update_plane_addr,
-	.plane_atomic_disconnect = hwss1_plane_atomic_disconnect,
-	.update_dchub = dcn10_update_dchub,
-	.update_mpcc = dcn10_update_mpcc,
-	.update_pending_status = dcn10_update_pending_status,
-	.set_input_transfer_func = dcn10_set_input_transfer_func,
-	.set_output_transfer_func = dcn10_set_output_transfer_func,
-	.program_output_csc = dcn10_program_output_csc,
-	.power_down = dce110_power_down,
-	.enable_accelerated_mode = dce110_enable_accelerated_mode,
-	.enable_timing_synchronization = dcn10_enable_timing_synchronization,
-	.enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
-	.update_info_frame = dce110_update_info_frame,
-	.send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
-	.enable_stream = dce110_enable_stream,
-	.disable_stream = dce110_disable_stream,
-	.unblank_stream = dcn10_unblank_stream,
-	.blank_stream = dce110_blank_stream,
-	.enable_audio_stream = dce110_enable_audio_stream,
-	.disable_audio_stream = dce110_disable_audio_stream,
-	.enable_display_power_gating = dcn10_dummy_display_power_gating,
-	.disable_plane = dcn10_disable_plane,
-	.blank_pixel_data = dcn10_blank_pixel_data,
-	.pipe_control_lock = dcn10_pipe_control_lock,
-	.prepare_bandwidth = dcn10_prepare_bandwidth,
-	.optimize_bandwidth = dcn10_optimize_bandwidth,
-	.reset_hw_ctx_wrap = dcn10_reset_hw_ctx_wrap,
-	.enable_stream_timing = dcn10_enable_stream_timing,
-	.set_drr = dcn10_set_drr,
-	.get_position = dcn10_get_position,
-	.set_static_screen_control = dcn10_set_static_screen_control,
-	.setup_stereo = dcn10_setup_stereo,
-	.set_avmute = dce110_set_avmute,
-	.log_hw_state = dcn10_log_hw_state,
-	.get_hw_state = dcn10_get_hw_state,
-	.clear_status_bits = dcn10_clear_status_bits,
-	.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
-	.edp_backlight_control = dce110_edp_backlight_control,
-	.edp_power_control = dce110_edp_power_control,
-	.edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
-	.set_cursor_position = dcn10_set_cursor_position,
-	.set_cursor_attribute = dcn10_set_cursor_attribute,
-	.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
-	.disable_stream_gating = NULL,
-	.enable_stream_gating = NULL,
-	.setup_periodic_interrupt = dcn10_setup_periodic_interrupt,
-	.setup_vupdate_interrupt = dcn10_setup_vupdate_interrupt,
-	.set_clock = dcn10_set_clock,
-	.get_clock = dcn10_get_clock,
-	.did_underflow_occur = dcn10_did_underflow_occur,
-	.init_blank = NULL,
-	.disable_vga = dcn10_disable_vga,
-	.bios_golden_init = dcn10_bios_golden_init,
-	.plane_atomic_disable = dcn10_plane_atomic_disable,
-	.plane_atomic_power_down = dcn10_plane_atomic_power_down,
-	.enable_power_gating_plane = dcn10_enable_power_gating_plane,
-	.dpp_pg_control = dcn10_dpp_pg_control,
-	.hubp_pg_control = dcn10_hubp_pg_control,
-	.dsc_pg_control = NULL,
-};
-
-
-void dcn10_hw_sequencer_construct(struct dc *dc)
-{
-	dc->hwss = dcn10_funcs;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
index d3616b1948cc..4d20f6586bb5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
@@ -27,68 +27,160 @@
 #define __DC_HWSS_DCN10_H__
 
 #include "core_types.h"
+#include "hw_sequencer_private.h"
 
 struct dc;
 
 void dcn10_hw_sequencer_construct(struct dc *dc);
-extern void fill_display_configs(
-	const struct dc_state *context,
-	struct dm_pp_display_configuration *pp_display_cfg);
-
-bool is_rgb_cspace(enum dc_color_space output_color_space);
-
-void hwss1_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
-
-void dcn10_verify_allow_pstate_change_high(struct dc *dc);
 
+int dcn10_get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx);
+void dcn10_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx);
+enum dc_status dcn10_enable_stream_timing(
+		struct pipe_ctx *pipe_ctx,
+		struct dc_state *context,
+		struct dc *dc);
+void dcn10_optimize_bandwidth(
+		struct dc *dc,
+		struct dc_state *context);
+void dcn10_prepare_bandwidth(
+		struct dc *dc,
+		struct dc_state *context);
+void dcn10_pipe_control_lock(
+	struct dc *dc,
+	struct pipe_ctx *pipe,
+	bool lock);
+void dcn10_blank_pixel_data(
+		struct dc *dc,
+		struct pipe_ctx *pipe_ctx,
+		bool blank);
+void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx,
+		struct dc_link_settings *link_settings);
+void dcn10_program_output_csc(struct dc *dc,
+		struct pipe_ctx *pipe_ctx,
+		enum dc_color_space colorspace,
+		uint16_t *matrix,
+		int opp_id);
+bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
+				const struct dc_stream_state *stream);
+bool dcn10_set_input_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
+			const struct dc_plane_state *plane_state);
+void dcn10_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_reset_hw_ctx_wrap(
+		struct dc *dc,
+		struct dc_state *context);
+void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_apply_ctx_for_surface(
+		struct dc *dc,
+		const struct dc_stream_state *stream,
+		int num_planes,
+		struct dc_state *context);
+void dcn10_hubp_pg_control(
+		struct dce_hwseq *hws,
+		unsigned int hubp_inst,
+		bool power_on);
+void dcn10_dpp_pg_control(
+		struct dce_hwseq *hws,
+		unsigned int dpp_inst,
+		bool power_on);
+void dcn10_enable_power_gating_plane(
+	struct dce_hwseq *hws,
+	bool enable);
+void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_disable_vga(
+	struct dce_hwseq *hws);
 void dcn10_program_pipe(
 		struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		struct dc_state *context);
-
-void dcn10_get_hw_state(
+void dcn10_program_gamut_remap(struct pipe_ctx *pipe_ctx);
+void dcn10_init_hw(struct dc *dc);
+void dcn10_init_pipes(struct dc *dc, struct dc_state *context);
+enum dc_status dce110_apply_ctx_to_hw(
+		struct dc *dc,
+		struct dc_state *context);
+void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data);
+void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx);
+void dce110_power_down(struct dc *dc);
+void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context);
+void dcn10_enable_timing_synchronization(
+		struct dc *dc,
+		int group_index,
+		int group_size,
+		struct pipe_ctx *grouped_pipes[]);
+void dcn10_enable_per_frame_crtc_position_reset(
+		struct dc *dc,
+		int group_size,
+		struct pipe_ctx *grouped_pipes[]);
+void dce110_update_info_frame(struct pipe_ctx *pipe_ctx);
+void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx,
+		const uint8_t *custom_sdp_message,
+		unsigned int sdp_message_size);
+void dce110_blank_stream(struct pipe_ctx *pipe_ctx);
+void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx);
+void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx);
+bool dcn10_dummy_display_power_gating(
 		struct dc *dc,
-		char *pBuf, unsigned int bufSize,
+		uint8_t controller_id,
+		struct dc_bios *dcb,
+		enum pipe_gating_control power_gating);
+void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
+		int num_pipes, unsigned int vmin, unsigned int vmax,
+		unsigned int vmid, unsigned int vmid_frame_number);
+void dcn10_get_position(struct pipe_ctx **pipe_ctx,
+		int num_pipes,
+		struct crtc_position *position);
+void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+		int num_pipes, const struct dc_static_screen_params *params);
+void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc);
+void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable);
+void dcn10_log_hw_state(struct dc *dc,
+		struct dc_log_buffer_ctx *log_ctx);
+void dcn10_get_hw_state(struct dc *dc,
+		char *pBuf,
+		unsigned int bufSize,
 		unsigned int mask);
-
 void dcn10_clear_status_bits(struct dc *dc, unsigned int mask);
-
-bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx);
-
-bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx);
-
-bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx);
-
-void dcn10_program_pte_vm(struct dce_hwseq *hws, struct hubp *hubp);
-
-void set_hdr_multiplier(struct pipe_ctx *pipe_ctx);
-
+void dcn10_wait_for_mpcc_disconnect(
+		struct dc *dc,
+		struct resource_pool *res_pool,
+		struct pipe_ctx *pipe_ctx);
+void dce110_edp_backlight_control(
+		struct dc_link *link,
+		bool enable);
+void dce110_edp_power_control(
+		struct dc_link *link,
+		bool power_up);
+void dce110_edp_wait_for_hpd_ready(
+		struct dc_link *link,
+		bool power_up);
+void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx);
+void dcn10_set_cursor_attribute(struct pipe_ctx *pipe_ctx);
+void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx);
+void dcn10_setup_periodic_interrupt(
+		struct dc *dc,
+		struct pipe_ctx *pipe_ctx,
+		enum vline_select vline);
+enum dc_status dcn10_set_clock(struct dc *dc,
+		enum dc_clock_type clock_type,
+		uint32_t clk_khz,
+		uint32_t stepping);
+void dcn10_get_clock(struct dc *dc,
+		enum dc_clock_type clock_type,
+		struct dc_clock_config *clock_cfg);
+bool dcn10_did_underflow_occur(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_bios_golden_init(struct dc *dc);
+void dcn10_plane_atomic_power_down(struct dc *dc,
+		struct dpp *dpp,
+		struct hubp *hubp);
 void dcn10_get_surface_visual_confirm_color(
 		const struct pipe_ctx *pipe_ctx,
 		struct tg_color *color);
-
 void dcn10_get_hdr_visual_confirm_color(
 		struct pipe_ctx *pipe_ctx,
 		struct tg_color *color);
-
-bool dcn10_did_underflow_occur(struct dc *dc, struct pipe_ctx *pipe_ctx);
-
-void update_dchubp_dpp(
-	struct dc *dc,
-	struct pipe_ctx *pipe_ctx,
-	struct dc_state *context);
-
-struct pipe_ctx *find_top_pipe_for_stream(
-		struct dc *dc,
-		struct dc_state *context,
-		const struct dc_stream_state *stream);
-
-int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx);
-
-void dcn10_build_prescale_params(struct  dc_bias_and_scale *bias_and_scale,
-		const struct dc_plane_state *plane_state);
-void lock_all_pipes(struct dc *dc,
-	struct dc_state *context,
-	bool lock);
+void dcn10_set_hdr_multiplier(struct pipe_ctx *pipe_ctx);
+void dcn10_verify_allow_pstate_change_high(struct dc *dc);
 
 #endif /* __DC_HWSS_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.h
new file mode 100644
index 000000000000..596f95c22e85
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_HWSS_DCN10_DEBUG_H__
+#define __DC_HWSS_DCN10_DEBUG_H__
+
+#include "core_types.h"
+
+struct dc;
+
+void dcn10_clear_status_bits(struct dc *dc, unsigned int mask);
+
+void dcn10_log_hw_state(struct dc *dc,
+		struct dc_log_buffer_ctx *log_ctx);
+
+void dcn10_get_hw_state(struct dc *dc,
+		char *pBuf,
+		unsigned int bufSize,
+		unsigned int mask);
+
+#endif /* __DC_HWSS_DCN10_DEBUG_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
new file mode 100644
index 000000000000..e7e5352ec424
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "hw_sequencer_private.h"
+#include "dce110/dce110_hw_sequencer.h"
+#include "dcn10_hw_sequencer.h"
+
+static const struct hw_sequencer_funcs dcn10_funcs = {
+	.program_gamut_remap = dcn10_program_gamut_remap,
+	.init_hw = dcn10_init_hw,
+	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
+	.apply_ctx_for_surface = dcn10_apply_ctx_for_surface,
+	.update_plane_addr = dcn10_update_plane_addr,
+	.update_dchub = dcn10_update_dchub,
+	.update_pending_status = dcn10_update_pending_status,
+	.program_output_csc = dcn10_program_output_csc,
+	.enable_accelerated_mode = dce110_enable_accelerated_mode,
+	.enable_timing_synchronization = dcn10_enable_timing_synchronization,
+	.enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
+	.update_info_frame = dce110_update_info_frame,
+	.send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
+	.enable_stream = dce110_enable_stream,
+	.disable_stream = dce110_disable_stream,
+	.unblank_stream = dcn10_unblank_stream,
+	.blank_stream = dce110_blank_stream,
+	.enable_audio_stream = dce110_enable_audio_stream,
+	.disable_audio_stream = dce110_disable_audio_stream,
+	.disable_plane = dcn10_disable_plane,
+	.pipe_control_lock = dcn10_pipe_control_lock,
+	.prepare_bandwidth = dcn10_prepare_bandwidth,
+	.optimize_bandwidth = dcn10_optimize_bandwidth,
+	.set_drr = dcn10_set_drr,
+	.get_position = dcn10_get_position,
+	.set_static_screen_control = dcn10_set_static_screen_control,
+	.setup_stereo = dcn10_setup_stereo,
+	.set_avmute = dce110_set_avmute,
+	.log_hw_state = dcn10_log_hw_state,
+	.get_hw_state = dcn10_get_hw_state,
+	.clear_status_bits = dcn10_clear_status_bits,
+	.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+	.edp_power_control = dce110_edp_power_control,
+	.edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
+	.set_cursor_position = dcn10_set_cursor_position,
+	.set_cursor_attribute = dcn10_set_cursor_attribute,
+	.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
+	.setup_periodic_interrupt = dcn10_setup_periodic_interrupt,
+	.set_clock = dcn10_set_clock,
+	.get_clock = dcn10_get_clock,
+	.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+};
+
+static const struct hwseq_private_funcs dcn10_private_funcs = {
+	.init_pipes = dcn10_init_pipes,
+	.update_plane_addr = dcn10_update_plane_addr,
+	.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
+	.program_pipe = dcn10_program_pipe,
+	.update_mpcc = dcn10_update_mpcc,
+	.set_input_transfer_func = dcn10_set_input_transfer_func,
+	.set_output_transfer_func = dcn10_set_output_transfer_func,
+	.power_down = dce110_power_down,
+	.enable_display_power_gating = dcn10_dummy_display_power_gating,
+	.blank_pixel_data = dcn10_blank_pixel_data,
+	.reset_hw_ctx_wrap = dcn10_reset_hw_ctx_wrap,
+	.enable_stream_timing = dcn10_enable_stream_timing,
+	.edp_backlight_control = dce110_edp_backlight_control,
+	.disable_stream_gating = NULL,
+	.enable_stream_gating = NULL,
+	.setup_vupdate_interrupt = dcn10_setup_vupdate_interrupt,
+	.did_underflow_occur = dcn10_did_underflow_occur,
+	.init_blank = NULL,
+	.disable_vga = dcn10_disable_vga,
+	.bios_golden_init = dcn10_bios_golden_init,
+	.plane_atomic_disable = dcn10_plane_atomic_disable,
+	.plane_atomic_power_down = dcn10_plane_atomic_power_down,
+	.enable_power_gating_plane = dcn10_enable_power_gating_plane,
+	.dpp_pg_control = dcn10_dpp_pg_control,
+	.hubp_pg_control = dcn10_hubp_pg_control,
+	.dsc_pg_control = NULL,
+	.get_surface_visual_confirm_color = dcn10_get_surface_visual_confirm_color,
+	.get_hdr_visual_confirm_color = dcn10_get_hdr_visual_confirm_color,
+	.set_hdr_multiplier = dcn10_set_hdr_multiplier,
+	.verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high,
+};
+
+void dcn10_hw_sequencer_construct(struct dc *dc)
+{
+	dc->hwss = dcn10_funcs;
+	dc->hwseq->funcs = dcn10_private_funcs;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h
new file mode 100644
index 000000000000..8c6fd7b844a4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DCN10_INIT_H__
+#define __DC_DCN10_INIT_H__
+
+struct dc;
+
+void dcn10_hw_sequencer_construct(struct dc *dc);
+
+#endif /* __DC_DCN10_INIT_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c
index 0fb9e440cb9d..f05371c1fc36 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c
@@ -53,11 +53,9 @@ static const struct ipp_funcs dcn10_ipp_funcs = {
 	.ipp_destroy			= dcn10_ipp_destroy
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 static const struct ipp_funcs dcn20_ipp_funcs = {
 	.ipp_destroy			= dcn10_ipp_destroy
 };
-#endif
 
 void dcn10_ipp_construct(
 	struct dcn10_ipp *ippn10,
@@ -76,7 +74,6 @@ void dcn10_ipp_construct(
 	ippn10->ipp_mask = ipp_mask;
 }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 void dcn20_ipp_construct(
 	struct dcn10_ipp *ippn10,
 	struct dc_context *ctx,
@@ -93,4 +90,3 @@ void dcn20_ipp_construct(
 	ippn10->ipp_shift = ipp_shift;
 	ippn10->ipp_mask = ipp_mask;
 }
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.h
index cfa24459242b..f0e0d07b0311 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.h
@@ -49,7 +49,6 @@
 	SRI(CURSOR_HOT_SPOT, CURSOR, id), \
 	SRI(CURSOR_DST_OFFSET, CURSOR, id)
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define IPP_REG_LIST_DCN20(id) \
 	IPP_REG_LIST_DCN(id), \
 	SRI(CURSOR_SETTINGS, HUBPREQ, id), \
@@ -60,7 +59,6 @@
 	SRI(CURSOR_POSITION, CURSOR0_, id), \
 	SRI(CURSOR_HOT_SPOT, CURSOR0_, id), \
 	SRI(CURSOR_DST_OFFSET, CURSOR0_, id)
-#endif
 
 #define CURSOR0_CURSOR_CONTROL__CURSOR_2X_MAGNIFY__SHIFT	0x4
 #define CURSOR0_CURSOR_CONTROL__CURSOR_2X_MAGNIFY_MASK		0x00000010L
@@ -105,7 +103,6 @@
 	IPP_SF(CURSOR0_CURSOR_DST_OFFSET, CURSOR_DST_X_OFFSET, mask_sh), \
 	IPP_SF(CNVC_CFG0_FORMAT_CONTROL, OUTPUT_FP, mask_sh)
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define IPP_MASK_SH_LIST_DCN20(mask_sh) \
 	IPP_MASK_SH_LIST_DCN(mask_sh), \
 	IPP_SF(HUBPREQ0_CURSOR_SETTINGS, CURSOR0_DST_Y_OFFSET, mask_sh), \
@@ -124,7 +121,6 @@
 	IPP_SF(CURSOR0_0_CURSOR_HOT_SPOT, CURSOR_HOT_SPOT_X, mask_sh), \
 	IPP_SF(CURSOR0_0_CURSOR_HOT_SPOT, CURSOR_HOT_SPOT_Y, mask_sh), \
 	IPP_SF(CURSOR0_0_CURSOR_DST_OFFSET, CURSOR_DST_X_OFFSET, mask_sh)
-#endif
 
 #define IPP_DCN10_REG_FIELD_LIST(type) \
 	type CNVC_SURFACE_PIXEL_FORMAT; \
@@ -196,13 +192,11 @@ void dcn10_ipp_construct(struct dcn10_ipp *ippn10,
 	const struct dcn10_ipp_shift *ipp_shift,
 	const struct dcn10_ipp_mask *ipp_mask);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 void dcn20_ipp_construct(struct dcn10_ipp *ippn10,
 	struct dc_context *ctx,
 	int inst,
 	const struct dcn10_ipp_registers *regs,
 	const struct dcn10_ipp_shift *ipp_shift,
 	const struct dcn10_ipp_mask *ipp_mask);
-#endif
 
 #endif /* _DCN10_IPP_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
index 8bf5f0f2301d..eb13589b9a81 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
@@ -72,9 +72,7 @@
 struct dcn10_link_enc_aux_registers {
 	uint32_t AUX_CONTROL;
 	uint32_t AUX_DPHY_RX_CONTROL0;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	uint32_t AUX_DPHY_TX_CONTROL;
-#endif
 };
 
 struct dcn10_link_enc_hpd_registers {
@@ -106,13 +104,46 @@ struct dcn10_link_enc_registers {
 	uint32_t DP_DPHY_HBR2_PATTERN_CONTROL;
 	uint32_t DP_SEC_CNTL1;
 	uint32_t TMDS_CTL_BITS;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	/* DCCG  */
 	uint32_t CLOCK_ENABLE;
 	/* DIG */
 	uint32_t DIG_LANE_ENABLE;
 	/* UNIPHY */
 	uint32_t CHANNEL_XBAR_CNTL;
+	/* DPCS */
+	uint32_t RDPCSTX_PHY_CNTL3;
+	uint32_t RDPCSTX_PHY_CNTL4;
+	uint32_t RDPCSTX_PHY_CNTL5;
+	uint32_t RDPCSTX_PHY_CNTL6;
+	uint32_t RDPCSTX_PHY_CNTL7;
+	uint32_t RDPCSTX_PHY_CNTL8;
+	uint32_t RDPCSTX_PHY_CNTL9;
+	uint32_t RDPCSTX_PHY_CNTL10;
+	uint32_t RDPCSTX_PHY_CNTL11;
+	uint32_t RDPCSTX_PHY_CNTL12;
+	uint32_t RDPCSTX_PHY_CNTL13;
+	uint32_t RDPCSTX_PHY_CNTL14;
+	uint32_t RDPCSTX_PHY_CNTL15;
+	uint32_t RDPCSTX_CNTL;
+	uint32_t RDPCSTX_CLOCK_CNTL;
+	uint32_t RDPCSTX_PHY_CNTL0;
+	uint32_t RDPCSTX_PHY_CNTL2;
+	uint32_t RDPCSTX_PLL_UPDATE_DATA;
+	uint32_t RDPCS_TX_CR_ADDR;
+	uint32_t RDPCS_TX_CR_DATA;
+	uint32_t DPCSTX_TX_CLOCK_CNTL;
+	uint32_t DPCSTX_TX_CNTL;
+	uint32_t RDPCSTX_INTERRUPT_CONTROL;
+	uint32_t RDPCSTX_PHY_FUSE0;
+	uint32_t RDPCSTX_PHY_FUSE1;
+	uint32_t RDPCSTX_PHY_FUSE2;
+	uint32_t RDPCSTX_PHY_FUSE3;
+	uint32_t RDPCSTX_PHY_RX_LD_VAL;
+	uint32_t DPCSTX_DEBUG_CONFIG;
+	uint32_t RDPCSTX_DEBUG_CONFIG;
+	uint32_t RDPCSTX0_RDPCSTX_SCRATCH;
+	uint32_t RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG;
+	uint32_t DCIO_SOFT_RESET;
 	/* indirect registers */
 	uint32_t RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_2;
 	uint32_t RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_3;
@@ -122,7 +153,6 @@ struct dcn10_link_enc_registers {
 	uint32_t RAWLANE2_DIG_PCS_XF_RX_OVRD_IN_3;
 	uint32_t RAWLANE3_DIG_PCS_XF_RX_OVRD_IN_2;
 	uint32_t RAWLANE3_DIG_PCS_XF_RX_OVRD_IN_3;
-#endif
 };
 
 #define LE_SF(reg_name, field_name, post_fix)\
@@ -228,7 +258,6 @@ struct dcn10_link_enc_registers {
 	type AUX_LS_READ_EN;\
 	type AUX_RX_RECEIVE_WINDOW
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 
 #define DCN20_LINK_ENCODER_DPCS_REG_FIELD_LIST(type) \
 		type RDPCS_PHY_DP_TX0_DATA_EN;\
@@ -250,6 +279,10 @@ struct dcn10_link_enc_registers {
 		type RDPCS_EXT_REFCLK_EN;\
 		type RDPCS_TX_FIFO_EN;\
 		type UNIPHY_LINK_ENABLE;\
+		type UNIPHY_CHANNEL0_XBAR_SOURCE;\
+		type UNIPHY_CHANNEL1_XBAR_SOURCE;\
+		type UNIPHY_CHANNEL2_XBAR_SOURCE;\
+		type UNIPHY_CHANNEL3_XBAR_SOURCE;\
 		type UNIPHY_CHANNEL0_INVERT;\
 		type UNIPHY_CHANNEL1_INVERT;\
 		type UNIPHY_CHANNEL2_INVERT;\
@@ -337,16 +370,46 @@ struct dcn10_link_enc_registers {
 		type RDPCS_TX_FIFO_ERROR_MASK;\
 		type RDPCS_DPALT_DISABLE_TOGGLE_MASK;\
 		type RDPCS_DPALT_4LANE_TOGGLE_MASK;\
+		type RDPCS_PHY_DPALT_DP4;\
 		type RDPCS_PHY_DPALT_DISABLE;\
 		type RDPCS_PHY_DPALT_DISABLE_ACK;\
 		type RDPCS_PHY_DP_MPLLB_V2I;\
 		type RDPCS_PHY_DP_MPLLB_FREQ_VCO;\
+		type RDPCS_PHY_DP_MPLLB_CP_INT_GS;\
+		type RDPCS_PHY_RX_VREF_CTRL;\
 		type RDPCS_PHY_DP_MPLLB_CP_INT;\
 		type RDPCS_PHY_DP_MPLLB_CP_PROP;\
 		type RDPCS_PHY_RX_REF_LD_VAL;\
 		type RDPCS_PHY_RX_VCO_LD_VAL;\
 		type DPCSTX_DEBUG_CONFIG; \
-		type RDPCSTX_DEBUG_CONFIG
+		type RDPCSTX_DEBUG_CONFIG; \
+		type RDPCS_PHY_DP_TX0_EQ_MAIN;\
+		type RDPCS_PHY_DP_TX0_EQ_PRE;\
+		type RDPCS_PHY_DP_TX0_EQ_POST;\
+		type RDPCS_PHY_DP_TX1_EQ_MAIN;\
+		type RDPCS_PHY_DP_TX1_EQ_PRE;\
+		type RDPCS_PHY_DP_TX1_EQ_POST;\
+		type RDPCS_PHY_DP_TX2_EQ_MAIN;\
+		type RDPCS_PHY_DP_MPLLB_CP_PROP_GS;\
+		type RDPCS_PHY_DP_TX2_EQ_PRE;\
+		type RDPCS_PHY_DP_TX2_EQ_POST;\
+		type RDPCS_PHY_DP_TX3_EQ_MAIN;\
+		type RDPCS_PHY_DCO_RANGE;\
+		type RDPCS_PHY_DCO_FINETUNE;\
+		type RDPCS_PHY_DP_TX3_EQ_PRE;\
+		type RDPCS_PHY_DP_TX3_EQ_POST;\
+		type RDPCS_PHY_SUP_PRE_HP;\
+		type RDPCS_PHY_DP_TX0_VREGDRV_BYP;\
+		type RDPCS_PHY_DP_TX1_VREGDRV_BYP;\
+		type RDPCS_PHY_DP_TX2_VREGDRV_BYP;\
+		type RDPCS_PHY_DP_TX3_VREGDRV_BYP;\
+		type RDPCS_DMCU_DPALT_DIS_BLOCK_REG;\
+		type UNIPHYA_SOFT_RESET;\
+		type UNIPHYB_SOFT_RESET;\
+		type UNIPHYC_SOFT_RESET;\
+		type UNIPHYD_SOFT_RESET;\
+		type UNIPHYE_SOFT_RESET;\
+		type UNIPHYF_SOFT_RESET
 
 #define DCN20_LINK_ENCODER_REG_FIELD_LIST(type) \
 	type DIG_LANE0EN;\
@@ -375,20 +438,15 @@ struct dcn10_link_enc_registers {
 	type AUX_TX_PRECHARGE_SYMBOLS; \
 	type AUX_MODE_DET_CHECK_DELAY;\
 	type DPCS_DBG_CBUS_DIS
-#endif
 
 struct dcn10_link_enc_shift {
 	DCN_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	DCN20_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
-#endif
 };
 
 struct dcn10_link_enc_mask {
 	DCN_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	DCN20_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
-#endif
 };
 
 struct dcn10_link_encoder {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
index 8b2f29f6dabd..04f863499cfb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
@@ -42,20 +42,27 @@ void mpc1_set_bg_color(struct mpc *mpc,
 		int mpcc_id)
 {
 	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
+	struct mpcc *bottommost_mpcc = mpc1_get_mpcc(mpc, mpcc_id);
+	uint32_t bg_r_cr, bg_g_y, bg_b_cb;
+
+	/* find bottommost mpcc. */
+	while (bottommost_mpcc->mpcc_bot) {
+		bottommost_mpcc = bottommost_mpcc->mpcc_bot;
+	}
 
 	/* mpc color is 12 bit.  tg_color is 10 bit */
 	/* todo: might want to use 16 bit to represent color and have each
 	 * hw block translate to correct color depth.
 	 */
-	uint32_t bg_r_cr = bg_color->color_r_cr << 2;
-	uint32_t bg_g_y = bg_color->color_g_y << 2;
-	uint32_t bg_b_cb = bg_color->color_b_cb << 2;
+	bg_r_cr = bg_color->color_r_cr << 2;
+	bg_g_y = bg_color->color_g_y << 2;
+	bg_b_cb = bg_color->color_b_cb << 2;
 
-	REG_SET(MPCC_BG_R_CR[mpcc_id], 0,
+	REG_SET(MPCC_BG_R_CR[bottommost_mpcc->mpcc_id], 0,
 			MPCC_BG_R_CR, bg_r_cr);
-	REG_SET(MPCC_BG_G_Y[mpcc_id], 0,
+	REG_SET(MPCC_BG_G_Y[bottommost_mpcc->mpcc_id], 0,
 			MPCC_BG_G_Y, bg_g_y);
-	REG_SET(MPCC_BG_B_CB[mpcc_id], 0,
+	REG_SET(MPCC_BG_B_CB[bottommost_mpcc->mpcc_id], 0,
 			MPCC_BG_B_CB, bg_b_cb);
 }
 
@@ -457,12 +464,10 @@ static const struct mpc_funcs dcn10_mpc_funcs = {
 	.assert_mpcc_idle_before_connect = mpc1_assert_mpcc_idle_before_connect,
 	.init_mpcc_list_from_hw = mpc1_init_mpcc_list_from_hw,
 	.update_blending = mpc1_update_blending,
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	.set_denorm = NULL,
 	.set_denorm_clamp = NULL,
 	.set_output_csc = NULL,
 	.set_output_gamma = NULL,
-#endif
 };
 
 void dcn10_mpc_construct(struct dcn10_mpc *mpc10,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c
index e9ebbbe256b4..d79718fde5a6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c
@@ -168,7 +168,10 @@ static void opp1_set_pixel_encoding(
 		REG_UPDATE(FMT_CONTROL, FMT_PIXEL_ENCODING, 0);
 		break;
 	case PIXEL_ENCODING_YCBCR422:
-		REG_UPDATE(FMT_CONTROL, FMT_PIXEL_ENCODING, 1);
+		REG_UPDATE_3(FMT_CONTROL,
+				FMT_PIXEL_ENCODING, 1,
+				FMT_SUBSAMPLING_MODE, 2,
+				FMT_CBCR_BIT_REDUCTION_BYPASS, 0);
 		break;
 	case PIXEL_ENCODING_YCBCR420:
 		REG_UPDATE(FMT_CONTROL, FMT_PIXEL_ENCODING, 2);
@@ -237,6 +240,9 @@ void opp1_set_dyn_expansion(
 			FMT_DYNAMIC_EXP_EN, 0,
 			FMT_DYNAMIC_EXP_MODE, 0);
 
+	if (opp->dyn_expansion == DYN_EXPANSION_DISABLE)
+		return;
+
 	/*00 - 10-bit -> 12-bit dynamic expansion*/
 	/*01 - 8-bit  -> 12-bit dynamic expansion*/
 	if (signal == SIGNAL_TYPE_HDMI_TYPE_A ||
@@ -367,11 +373,9 @@ void opp1_program_oppbuf(
 	 */
 	REG_UPDATE(OPPBUF_CONTROL, OPPBUF_PIXEL_REPETITION, oppbuf->pixel_repetition);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	/* Controls the number of padded pixels at the end of a segment */
 	if (REG(OPPBUF_CONTROL1))
 		REG_UPDATE(OPPBUF_CONTROL1, OPPBUF_NUM_SEGMENT_PADDED_PIXELS, oppbuf->num_segment_padded_pixels);
-#endif
 }
 
 void opp1_pipe_clock_control(struct output_pixel_processor *opp, bool enable)
@@ -398,9 +402,8 @@ static const struct opp_funcs dcn10_opp_funcs = {
 		.opp_program_bit_depth_reduction = opp1_program_bit_depth_reduction,
 		.opp_program_stereo = opp1_program_stereo,
 		.opp_pipe_clock_control = opp1_pipe_clock_control,
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 		.opp_set_disp_pattern_generator = NULL,
-#endif
+		.dpg_is_blanked = NULL,
 		.opp_destroy = opp1_destroy
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h
index 0f10adea000c..2c0ecfa5a643 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h
@@ -116,6 +116,8 @@
 	type FMT_RAND_G_SEED; \
 	type FMT_RAND_B_SEED; \
 	type FMT_PIXEL_ENCODING; \
+	type FMT_SUBSAMPLING_MODE; \
+	type FMT_CBCR_BIT_REDUCTION_BYPASS; \
 	type FMT_CLAMP_DATA_EN; \
 	type FMT_CLAMP_COLOR_FORMAT; \
 	type FMT_DYNAMIC_EXP_EN; \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index e74a07d03fde..a9a43b397db9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -457,11 +457,16 @@ static bool optc1_enable_crtc(struct timing_generator *optc)
 	REG_UPDATE(CONTROL,
 			VTG0_ENABLE, 1);
 
+	REG_SEQ_START();
+
 	/* Enable CRTC */
 	REG_UPDATE_2(OTG_CONTROL,
 			OTG_DISABLE_POINT_CNTL, 3,
 			OTG_MASTER_EN, 1);
 
+	REG_SEQ_SUBMIT();
+	REG_SEQ_WAIT_DONE();
+
 	return true;
 }
 
@@ -784,21 +789,26 @@ void optc1_set_early_control(
 
 void optc1_set_static_screen_control(
 	struct timing_generator *optc,
-	uint32_t value)
+	uint32_t event_triggers,
+	uint32_t num_frames)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
+	// By register spec, it only takes 8 bit value
+	if (num_frames > 0xFF)
+		num_frames = 0xFF;
+
 	/* Bit 8 is no longer applicable in RV for PSR case,
 	 * set bit 8 to 0 if given
 	 */
-	if ((value & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN)
+	if ((event_triggers & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN)
 			!= 0)
-		value = value &
+		event_triggers = event_triggers &
 		~STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN;
 
 	REG_SET_2(OTG_STATIC_SCREEN_CONTROL, 0,
-			OTG_STATIC_SCREEN_EVENT_MASK, value,
-			OTG_STATIC_SCREEN_FRAME_COUNT, 2);
+			OTG_STATIC_SCREEN_EVENT_MASK, event_triggers,
+			OTG_STATIC_SCREEN_FRAME_COUNT, num_frames);
 }
 
 void optc1_setup_manual_trigger(struct timing_generator *optc)
@@ -1230,59 +1240,25 @@ bool optc1_is_stereo_left_eye(struct timing_generator *optc)
 	return ret;
 }
 
-bool optc1_is_matching_timing(struct timing_generator *tg,
-		const struct dc_crtc_timing *otg_timing)
+bool optc1_get_hw_timing(struct timing_generator *tg,
+		struct dc_crtc_timing *hw_crtc_timing)
 {
-	struct dc_crtc_timing hw_crtc_timing = {0};
 	struct dcn_otg_state s = {0};
 
-	if (tg == NULL || otg_timing == NULL)
+	if (tg == NULL || hw_crtc_timing == NULL)
 		return false;
 
 	optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s);
 
-	hw_crtc_timing.h_total = s.h_total + 1;
-	hw_crtc_timing.h_addressable = s.h_total - ((s.h_total - s.h_blank_start) + s.h_blank_end);
-	hw_crtc_timing.h_front_porch = s.h_total + 1 - s.h_blank_start;
-	hw_crtc_timing.h_sync_width = s.h_sync_a_end - s.h_sync_a_start;
-
-	hw_crtc_timing.v_total = s.v_total + 1;
-	hw_crtc_timing.v_addressable = s.v_total - ((s.v_total - s.v_blank_start) + s.v_blank_end);
-	hw_crtc_timing.v_front_porch = s.v_total + 1 - s.v_blank_start;
-	hw_crtc_timing.v_sync_width = s.v_sync_a_end - s.v_sync_a_start;
+	hw_crtc_timing->h_total = s.h_total + 1;
+	hw_crtc_timing->h_addressable = s.h_total - ((s.h_total - s.h_blank_start) + s.h_blank_end);
+	hw_crtc_timing->h_front_porch = s.h_total + 1 - s.h_blank_start;
+	hw_crtc_timing->h_sync_width = s.h_sync_a_end - s.h_sync_a_start;
 
-	if (otg_timing->h_total != hw_crtc_timing.h_total)
-		return false;
-
-	if (otg_timing->h_border_left != hw_crtc_timing.h_border_left)
-		return false;
-
-	if (otg_timing->h_addressable != hw_crtc_timing.h_addressable)
-		return false;
-
-	if (otg_timing->h_border_right != hw_crtc_timing.h_border_right)
-		return false;
-
-	if (otg_timing->h_front_porch != hw_crtc_timing.h_front_porch)
-		return false;
-
-	if (otg_timing->h_sync_width != hw_crtc_timing.h_sync_width)
-		return false;
-
-	if (otg_timing->v_total != hw_crtc_timing.v_total)
-		return false;
-
-	if (otg_timing->v_border_top != hw_crtc_timing.v_border_top)
-		return false;
-
-	if (otg_timing->v_addressable != hw_crtc_timing.v_addressable)
-		return false;
-
-	if (otg_timing->v_border_bottom != hw_crtc_timing.v_border_bottom)
-		return false;
-
-	if (otg_timing->v_sync_width != hw_crtc_timing.v_sync_width)
-		return false;
+	hw_crtc_timing->v_total = s.v_total + 1;
+	hw_crtc_timing->v_addressable = s.v_total - ((s.v_total - s.v_blank_start) + s.v_blank_end);
+	hw_crtc_timing->v_front_porch = s.v_total + 1 - s.v_blank_start;
+	hw_crtc_timing->v_sync_width = s.v_sync_a_end - s.v_sync_a_start;
 
 	return true;
 }
@@ -1486,7 +1462,6 @@ static const struct timing_generator_funcs dcn10_tg_funcs = {
 		.get_frame_count = optc1_get_vblank_counter,
 		.get_scanoutpos = optc1_get_crtc_scanoutpos,
 		.get_otg_active_size = optc1_get_otg_active_size,
-		.is_matching_timing = optc1_is_matching_timing,
 		.set_early_control = optc1_set_early_control,
 		/* used by enable_timing_synchronization. Not need for FPGA */
 		.wait_for_state = optc1_wait_for_state,
@@ -1514,7 +1489,8 @@ static const struct timing_generator_funcs dcn10_tg_funcs = {
 		.configure_crc = optc1_configure_crc,
 		.set_vtg_params = optc1_set_vtg_params,
 		.program_manual_trigger = optc1_program_manual_trigger,
-		.setup_manual_trigger = optc1_setup_manual_trigger
+		.setup_manual_trigger = optc1_setup_manual_trigger,
+		.get_hw_timing = optc1_get_hw_timing,
 };
 
 void dcn10_timing_generator_init(struct optc *optc1)
@@ -1531,7 +1507,6 @@ void dcn10_timing_generator_init(struct optc *optc1)
 	optc1->min_v_sync_width = 1;
 }
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 /* "Containter" vs. "pixel" is a concept within HW blocks, mostly those closer to the back-end. It works like this:
  *
  * - In most of the formats (RGB or YCbCr 4:4:4, 4:2:2 uncompressed and DSC 4:2:2 Simple) pixel rate is the same as
@@ -1544,15 +1519,12 @@ void dcn10_timing_generator_init(struct optc *optc1)
  *   to it) and has to be treated the same as 4:2:0, i.e. target containter rate has to be halved in this case as well.
  *
  */
-#endif
 bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
 {
 	bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
 			&& !timing->dsc_cfg.ycbcr422_simple);
-#endif
 	return two_pix;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
index 83575599672e..f277656d5464 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
@@ -165,13 +165,11 @@ struct dcn_optc_registers {
 	uint32_t OTG_CRC0_WINDOWB_X_CONTROL;
 	uint32_t OTG_CRC0_WINDOWB_Y_CONTROL;
 	uint32_t GSL_SOURCE_SELECT;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	uint32_t DWB_SOURCE_SELECT;
 	uint32_t OTG_DSC_START_POSITION;
 	uint32_t OPTC_DATA_FORMAT_CONTROL;
 	uint32_t OPTC_BYTES_PER_PIXEL;
 	uint32_t OPTC_WIDTH_CONTROL;
-#endif
 };
 
 #define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\
@@ -456,7 +454,6 @@ struct dcn_optc_registers {
 	type MANUAL_FLOW_CONTROL;\
 	type MANUAL_FLOW_CONTROL_SEL;
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 
 #define TG_REG_FIELD_LIST(type) \
 	TG_REG_FIELD_LIST_DCN1_0(type)\
@@ -479,12 +476,6 @@ struct dcn_optc_registers {
 	type OPTC_DWB0_SOURCE_SELECT;\
 	type OPTC_DWB1_SOURCE_SELECT;
 
-#else
-
-#define TG_REG_FIELD_LIST(type) \
-	TG_REG_FIELD_LIST_DCN1_0(type)
-
-#endif
 
 
 struct dcn_optc_shift {
@@ -542,14 +533,14 @@ struct dcn_otg_state {
 	uint32_t h_total;
 	uint32_t underflow_occurred_status;
 	uint32_t otg_enabled;
+	uint32_t blank_enabled;
 };
 
 void optc1_read_otg_state(struct optc *optc1,
 		struct dcn_otg_state *s);
 
-bool optc1_is_matching_timing(
-	struct timing_generator *tg,
-	const struct dc_crtc_timing *otg_timing);
+bool optc1_get_hw_timing(struct timing_generator *tg,
+		struct dc_crtc_timing *hw_crtc_timing);
 
 bool optc1_validate_timing(
 	struct timing_generator *optc,
@@ -634,7 +625,8 @@ void optc1_set_drr(
 
 void optc1_set_static_screen_control(
 	struct timing_generator *optc,
-	uint32_t value);
+	uint32_t event_triggers,
+	uint32_t num_frames);
 
 void optc1_program_stereo(struct timing_generator *optc,
 	const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 1599bb971111..3b71898e859e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -28,6 +28,8 @@
 #include "dm_services.h"
 #include "dc.h"
 
+#include "dcn10_init.h"
+
 #include "resource.h"
 #include "include/irq_service_interface.h"
 #include "dcn10_resource.h"
@@ -319,6 +321,14 @@ static const struct dcn10_link_enc_mask le_mask = {
 		LINK_ENCODER_MASK_SH_LIST_DCN10(_MASK)
 };
 
+static const struct dce110_aux_registers_shift aux_shift = {
+	DCN10_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+	DCN10_AUX_MASK_SH_LIST(_MASK)
+};
+
 #define ipp_regs(id)\
 [id] = {\
 	IPP_REG_LIST_DCN10(id),\
@@ -471,6 +481,28 @@ static const struct dcn_hubbub_mask hubbub_mask = {
 		HUBBUB_MASK_SH_LIST_DCN10(_MASK)
 };
 
+static int map_transmitter_id_to_phy_instance(
+	enum transmitter transmitter)
+{
+	switch (transmitter) {
+	case TRANSMITTER_UNIPHY_A:
+		return 0;
+	break;
+	case TRANSMITTER_UNIPHY_B:
+		return 1;
+	break;
+	case TRANSMITTER_UNIPHY_C:
+		return 2;
+	break;
+	case TRANSMITTER_UNIPHY_D:
+		return 3;
+	break;
+	default:
+		ASSERT(0);
+		return 0;
+	}
+}
+
 #define clk_src_regs(index, pllid)\
 [index] = {\
 	CS_COMMON_REG_LIST_DCN1_0(index, pllid),\
@@ -642,7 +674,10 @@ struct dce_aux *dcn10_aux_engine_create(
 
 	dce110_aux_engine_construct(aux_engine, ctx, inst,
 				    SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
-				    &aux_engine_regs[inst]);
+				    &aux_engine_regs[inst],
+					&aux_mask,
+					&aux_shift,
+					ctx->dc->caps.extended_aux_timeout_support);
 
 	return &aux_engine->base;
 }
@@ -751,14 +786,18 @@ struct link_encoder *dcn10_link_encoder_create(
 {
 	struct dcn10_link_encoder *enc10 =
 		kzalloc(sizeof(struct dcn10_link_encoder), GFP_KERNEL);
+	int link_regs_id;
 
 	if (!enc10)
 		return NULL;
 
+	link_regs_id =
+		map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
+
 	dcn10_link_encoder_construct(enc10,
 				      enc_init_data,
 				      &link_enc_feature,
-				      &link_enc_regs[enc_init_data->transmitter],
+				      &link_enc_regs[link_regs_id],
 				      &link_enc_aux_regs[enc_init_data->channel - 1],
 				      &link_enc_hpd_regs[enc_init_data->hpd_source],
 				      &le_shift,
@@ -882,7 +921,7 @@ static struct pp_smu_funcs *dcn10_pp_smu_create(struct dc_context *ctx)
 	return pp_smu;
 }
 
-static void destruct(struct dcn10_resource_pool *pool)
+static void dcn10_resource_destruct(struct dcn10_resource_pool *pool)
 {
 	unsigned int i;
 
@@ -1129,7 +1168,7 @@ static void dcn10_destroy_resource_pool(struct resource_pool **pool)
 {
 	struct dcn10_resource_pool *dcn10_pool = TO_DCN10_RES_POOL(*pool);
 
-	destruct(dcn10_pool);
+	dcn10_resource_destruct(dcn10_pool);
 	kfree(dcn10_pool);
 	*pool = NULL;
 }
@@ -1268,7 +1307,7 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
 	return value;
 }
 
-static bool construct(
+static bool dcn10_resource_construct(
 	uint8_t num_virtual_links,
 	struct dc *dc,
 	struct dcn10_resource_pool *pool)
@@ -1308,6 +1347,8 @@ static bool construct(
 	dc->caps.max_slave_planes = 1;
 	dc->caps.is_apu = true;
 	dc->caps.post_blend_color_processing = false;
+	dc->caps.extended_aux_timeout_support = false;
+
 	/* Raven DP PHY HBR2 eye diagram pattern is not stable. Use TP4 */
 	dc->caps.force_dp_tps4_for_cp2520 = true;
 
@@ -1553,7 +1594,7 @@ static bool construct(
 
 fail:
 
-	destruct(pool);
+	dcn10_resource_destruct(pool);
 
 	return false;
 }
@@ -1568,7 +1609,7 @@ struct resource_pool *dcn10_create_resource_pool(
 	if (!pool)
 		return NULL;
 
-	if (construct(init_data->num_virtual_links, dc, pool))
+	if (dcn10_resource_construct(init_data->num_virtual_links, dc, pool))
 		return &pool->base;
 
 	kfree(pool);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
index 9aa258f3550b..376c4264d295 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -247,6 +247,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(
 	struct stream_encoder *enc,
 	struct dc_crtc_timing *crtc_timing,
 	enum dc_color_space output_color_space,
+	bool use_vsc_sdp_for_colorimetry,
 	uint32_t enable_sdp_splitting)
 {
 	uint32_t h_active_start;
@@ -312,10 +313,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(
 	 * Pixel Encoding/Colorimetry Format and that a Sink device shall ignore MISC1, bit 7,
 	 * and MISC0, bits 7:1 (MISC1, bit 7, and MISC0, bits 7:1, become "don't care").
 	 */
-	if ((hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) ||
-			(output_color_space == COLOR_SPACE_2020_YCBCR) ||
-			(output_color_space == COLOR_SPACE_2020_RGB_FULLRANGE) ||
-			(output_color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE))
+	if (use_vsc_sdp_for_colorimetry)
 		misc1 = misc1 | 0x40;
 	else
 		misc1 = misc1 & ~0x40;
@@ -1553,6 +1551,66 @@ unsigned int enc1_dig_source_otg(
 	return tg_inst;
 }
 
+bool enc1_stream_encoder_dp_get_pixel_format(
+	struct stream_encoder *enc,
+	enum dc_pixel_encoding *encoding,
+	enum dc_color_depth *depth)
+{
+	uint32_t hw_encoding = 0;
+	uint32_t hw_depth = 0;
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+	if (enc == NULL ||
+		encoding == NULL ||
+		depth == NULL)
+		return false;
+
+	REG_GET_2(DP_PIXEL_FORMAT,
+		DP_PIXEL_ENCODING, &hw_encoding,
+		DP_COMPONENT_DEPTH, &hw_depth);
+
+	switch (hw_depth) {
+	case DP_COMPONENT_PIXEL_DEPTH_6BPC:
+		*depth = COLOR_DEPTH_666;
+		break;
+	case DP_COMPONENT_PIXEL_DEPTH_8BPC:
+		*depth = COLOR_DEPTH_888;
+		break;
+	case DP_COMPONENT_PIXEL_DEPTH_10BPC:
+		*depth = COLOR_DEPTH_101010;
+		break;
+	case DP_COMPONENT_PIXEL_DEPTH_12BPC:
+		*depth = COLOR_DEPTH_121212;
+		break;
+	case DP_COMPONENT_PIXEL_DEPTH_16BPC:
+		*depth = COLOR_DEPTH_161616;
+		break;
+	default:
+		*depth = COLOR_DEPTH_UNDEFINED;
+		break;
+	}
+
+	switch (hw_encoding) {
+	case DP_PIXEL_ENCODING_TYPE_RGB444:
+		*encoding = PIXEL_ENCODING_RGB;
+		break;
+	case DP_PIXEL_ENCODING_TYPE_YCBCR422:
+		*encoding = PIXEL_ENCODING_YCBCR422;
+		break;
+	case DP_PIXEL_ENCODING_TYPE_YCBCR444:
+	case DP_PIXEL_ENCODING_TYPE_Y_ONLY:
+		*encoding = PIXEL_ENCODING_YCBCR444;
+		break;
+	case DP_PIXEL_ENCODING_TYPE_YCBCR420:
+		*encoding = PIXEL_ENCODING_YCBCR420;
+		break;
+	default:
+		*encoding = PIXEL_ENCODING_UNDEFINED;
+		break;
+	}
+	return true;
+}
+
 static const struct stream_encoder_funcs dcn10_str_enc_funcs = {
 	.dp_set_stream_attribute =
 		enc1_stream_encoder_dp_set_stream_attribute,
@@ -1589,6 +1647,8 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = {
 	.dig_connect_to_otg  = enc1_dig_connect_to_otg,
 	.hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute,
 	.dig_source_otg = enc1_dig_source_otg,
+
+	.dp_get_pixel_format  = enc1_stream_encoder_dp_get_pixel_format,
 };
 
 void dcn10_stream_encoder_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
index a512cbea00d1..f9b9e221c698 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
@@ -163,14 +163,12 @@ struct dcn10_stream_enc_registers {
 	uint32_t DP_MSA_TIMING_PARAM3;
 	uint32_t DP_MSA_TIMING_PARAM4;
 	uint32_t HDMI_DB_CONTROL;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	uint32_t DP_DSC_CNTL;
 	uint32_t DP_DSC_BYTES_PER_PIXEL;
 	uint32_t DME_CONTROL;
 	uint32_t DP_SEC_METADATA_TRANSMISSION;
 	uint32_t HDMI_METADATA_PACKET_CONTROL;
 	uint32_t DP_SEC_FRAMING4;
-#endif
 	uint32_t DIG_CLOCK_PATTERN;
 };
 
@@ -466,7 +464,6 @@ struct dcn10_stream_enc_registers {
 	type DIG_SOURCE_SELECT;\
 	type DIG_CLOCK_PATTERN
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define SE_REG_FIELD_LIST_DCN2_0(type) \
 	type DP_DSC_MODE;\
 	type DP_DSC_SLICE_WIDTH;\
@@ -485,20 +482,15 @@ struct dcn10_stream_enc_registers {
 	type DOLBY_VISION_EN;\
 	type DP_PIXEL_COMBINE;\
 	type DP_SST_SDP_SPLITTING
-#endif
 
 struct dcn10_stream_encoder_shift {
 	SE_REG_FIELD_LIST_DCN1_0(uint8_t);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	SE_REG_FIELD_LIST_DCN2_0(uint8_t);
-#endif
 };
 
 struct dcn10_stream_encoder_mask {
 	SE_REG_FIELD_LIST_DCN1_0(uint32_t);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	SE_REG_FIELD_LIST_DCN2_0(uint32_t);
-#endif
 };
 
 struct dcn10_stream_encoder {
@@ -526,6 +518,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(
 	struct stream_encoder *enc,
 	struct dc_crtc_timing *crtc_timing,
 	enum dc_color_space output_color_space,
+	bool use_vsc_sdp_for_colorimetry,
 	uint32_t enable_sdp_splitting);
 
 void enc1_stream_encoder_hdmi_set_stream_attribute(
@@ -621,4 +614,9 @@ void get_audio_clock_info(
 void enc1_reset_hdmi_stream_attribute(
 	struct stream_encoder *enc);
 
+bool enc1_stream_encoder_dp_get_pixel_format(
+	struct stream_encoder *enc,
+	enum dc_pixel_encoding *encoding,
+	enum dc_color_depth *depth);
+
 #endif /* __DC_STREAM_ENCODER_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index 63f3bddba7da..5fcaf78334ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -1,16 +1,21 @@
+# SPDX-License-Identifier: MIT
 #
 # Makefile for DCN.
 
-DCN20 = dcn20_resource.o dcn20_hwseq.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
+DCN20 = dcn20_resource.o dcn20_init.o dcn20_hwseq.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
 		dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_optc.o dcn20_mmhubbub.o \
 		dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \
 		dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o
 
-ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 DCN20 += dcn20_dsc.o
-endif
 
+ifdef CONFIG_X86
 CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse
+endif
+
+ifdef CONFIG_PPC64
+CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -maltivec
+endif
 
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
@@ -18,6 +23,7 @@ IS_OLD_GCC = 1
 endif
 endif
 
+ifdef CONFIG_X86
 ifdef IS_OLD_GCC
 # Stack alignment mismatch, proceed with caution.
 # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
@@ -26,6 +32,7 @@ CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4
 else
 CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2
 endif
+endif
 
 AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c
index 16476ed25536..50bffbfdd394 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c
@@ -44,49 +44,26 @@
 #define DC_LOGGER \
 	dccg->ctx->logger
 
-void dccg2_update_dpp_dto(struct dccg *dccg,
-		int dpp_inst,
-		int req_dppclk,
-		bool reduce_divider_only)
+void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk)
 {
 	struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
 
 	if (dccg->ref_dppclk && req_dppclk) {
 		int ref_dppclk = dccg->ref_dppclk;
-		int current_phase, current_modulo;
+		int modulo, phase;
 
-		ASSERT(req_dppclk <= ref_dppclk);
-		/* need to clamp to 8 bits */
-		if (ref_dppclk > 0xff) {
-			int divider = (ref_dppclk + 0xfe) / 0xff;
+		// phase / modulo = dpp pipe clk / dpp global clk
+		modulo = 0xff;   // use FF at the end
+		phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk;
 
-			ref_dppclk /= divider;
-			req_dppclk = (req_dppclk + divider - 1) / divider;
-			if (req_dppclk > ref_dppclk)
-				req_dppclk = ref_dppclk;
-		}
-
-		REG_GET_2(DPPCLK_DTO_PARAM[dpp_inst],
-				DPPCLK0_DTO_PHASE, &current_phase,
-				DPPCLK0_DTO_MODULO, &current_modulo);
-
-		if (reduce_divider_only) {
-			// requested phase/modulo greater than current
-			if (req_dppclk * current_modulo >= current_phase * ref_dppclk) {
-				REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
-						DPPCLK0_DTO_PHASE, req_dppclk,
-						DPPCLK0_DTO_MODULO, ref_dppclk);
-			} else {
-				REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
-						DPPCLK0_DTO_PHASE, current_phase,
-						DPPCLK0_DTO_MODULO, current_modulo);
-			}
-		} else {
-			REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
-					DPPCLK0_DTO_PHASE, req_dppclk,
-					DPPCLK0_DTO_MODULO, ref_dppclk);
+		if (phase > 0xff) {
+			ASSERT(false);
+			phase = 0xff;
 		}
 
+		REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+				DPPCLK0_DTO_PHASE, phase,
+				DPPCLK0_DTO_MODULO, modulo);
 		REG_UPDATE(DPPCLK_DTO_CTRL,
 				DPPCLK_DTO_ENABLE[dpp_inst], 1);
 	} else {
@@ -119,32 +96,6 @@ void dccg2_get_dccg_ref_freq(struct dccg *dccg,
 
 void dccg2_init(struct dccg *dccg)
 {
-	struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
-	// Fallthrough intentional to program all available dpp_dto's
-	switch (dccg_dcn->base.ctx->dc->res_pool->pipe_count) {
-	case 6:
-		REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[5], 1);
-		/* Fall through */
-	case 5:
-		REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[4], 1);
-		/* Fall through */
-	case 4:
-		REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[3], 1);
-		/* Fall through */
-	case 3:
-		REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[2], 1);
-		/* Fall through */
-	case 2:
-		REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[1], 1);
-		/* Fall through */
-	case 1:
-		REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[0], 1);
-		break;
-	default:
-		ASSERT(false);
-		break;
-	}
 }
 
 static const struct dccg_funcs dccg2_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
index 74a074a873cd..2205cb0204e7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
@@ -97,7 +97,7 @@ struct dcn_dccg {
 	const struct dccg_mask *dccg_mask;
 };
 
-void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk, bool raise_divider_only);
+void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk);
 
 void dccg2_get_dccg_ref_freq(struct dccg *dccg,
 		unsigned int xtalin_freq_inKhz,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
index 2f5aade1e882..13e057d7ee93 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
@@ -104,7 +104,7 @@ static void dpp2_cnv_setup (
 	uint32_t pixel_format = 0;
 	uint32_t alpha_en = 1;
 	enum dc_color_space color_space = COLOR_SPACE_SRGB;
-	enum dcn10_input_csc_select select = INPUT_CSC_SELECT_BYPASS;
+	enum dcn20_input_csc_select select = DCN2_ICSC_SELECT_BYPASS;
 	bool force_disable_cursor = false;
 	struct out_csc_color_matrix tbl_entry;
 	uint32_t is_2bit = 0;
@@ -145,25 +145,25 @@ static void dpp2_cnv_setup (
 		force_disable_cursor = false;
 		pixel_format = 65;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		break;
 	case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
 		force_disable_cursor = true;
 		pixel_format = 64;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		break;
 	case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
 		force_disable_cursor = true;
 		pixel_format = 67;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		break;
 	case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
 		force_disable_cursor = true;
 		pixel_format = 66;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
 		pixel_format = 22;
@@ -177,7 +177,7 @@ static void dpp2_cnv_setup (
 	case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888:
 		pixel_format = 12;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX:
 		pixel_format = 112;
@@ -188,13 +188,13 @@ static void dpp2_cnv_setup (
 	case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010:
 		pixel_format = 114;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		is_2bit = 1;
 		break;
 	case SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102:
 		pixel_format = 115;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		is_2bit = 1;
 		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT:
@@ -227,13 +227,13 @@ static void dpp2_cnv_setup (
 		tbl_entry.color_space = input_color_space;
 
 		if (color_space >= COLOR_SPACE_YCBCR601)
-			select = INPUT_CSC_SELECT_ICSC;
+			select = DCN2_ICSC_SELECT_ICSC_A;
 		else
-			select = INPUT_CSC_SELECT_BYPASS;
+			select = DCN2_ICSC_SELECT_BYPASS;
 
-		dpp1_program_input_csc(dpp_base, color_space, select, &tbl_entry);
+		dpp2_program_input_csc(dpp_base, color_space, select, &tbl_entry);
 	} else
-	dpp1_program_input_csc(dpp_base, color_space, select, NULL);
+	dpp2_program_input_csc(dpp_base, color_space, select, NULL);
 
 	if (force_disable_cursor) {
 		REG_UPDATE(CURSOR_CONTROL,
@@ -376,13 +376,6 @@ bool dpp2_get_optimal_number_of_taps(
 		struct scaler_data *scl_data,
 		const struct scaling_taps *in_taps)
 {
-	uint32_t pixel_width;
-
-	if (scl_data->viewport.width > scl_data->recout.width)
-		pixel_width = scl_data->recout.width;
-	else
-		pixel_width = scl_data->viewport.width;
-
 	/* Some ASICs does not support  FP16 scaling, so we reject modes require this*/
 	if (scl_data->viewport.width  != scl_data->h_active &&
 		scl_data->viewport.height != scl_data->v_active &&
@@ -464,8 +457,8 @@ static struct dpp_funcs dcn20_dpp_funcs = {
 	.dpp_read_state = dpp20_read_state,
 	.dpp_reset = dpp_reset,
 	.dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale,
-	.dpp_get_optimal_number_of_taps = dpp2_get_optimal_number_of_taps,
-	.dpp_set_gamut_remap = dpp1_cm_set_gamut_remap,
+	.dpp_get_optimal_number_of_taps = dpp1_get_optimal_number_of_taps,
+	.dpp_set_gamut_remap = dpp2_cm_set_gamut_remap,
 	.dpp_set_csc_adjustment = NULL,
 	.dpp_set_csc_default = NULL,
 	.dpp_program_regamma_pwl = oppn20_dummy_program_regamma_pwl,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h
index 290b2854bd2c..27610251c57f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h
@@ -30,16 +30,20 @@
 #define TO_DCN20_DPP(dpp)\
 	container_of(dpp, struct dcn20_dpp, base)
 
-#define TF_REG_LIST_DCN20(id) \
-	TF_REG_LIST_DCN(id), \
+#define TF_REG_LIST_DCN20_COMMON_UPDATED(id) \
 	SRI(CM_BLNDGAM_LUT_WRITE_EN_MASK, CM, id), \
+	SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_B, CM, id), \
+	SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_G, CM, id), \
+	SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_R, CM, id), \
+	SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_B, CM, id), \
+	SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_G, CM, id), \
+	SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_R, CM, id)
+
+#define TF_REG_LIST_DCN20_COMMON(id) \
 	SRI(CM_BLNDGAM_CONTROL, CM, id), \
 	SRI(CM_BLNDGAM_RAMB_START_CNTL_B, CM, id), \
 	SRI(CM_BLNDGAM_RAMB_START_CNTL_G, CM, id), \
 	SRI(CM_BLNDGAM_RAMB_START_CNTL_R, CM, id), \
-	SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_B, CM, id), \
-	SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_G, CM, id), \
-	SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_R, CM, id), \
 	SRI(CM_BLNDGAM_RAMB_END_CNTL1_B, CM, id), \
 	SRI(CM_BLNDGAM_RAMB_END_CNTL2_B, CM, id), \
 	SRI(CM_BLNDGAM_RAMB_END_CNTL1_G, CM, id), \
@@ -66,9 +70,6 @@
 	SRI(CM_BLNDGAM_RAMA_START_CNTL_B, CM, id), \
 	SRI(CM_BLNDGAM_RAMA_START_CNTL_G, CM, id), \
 	SRI(CM_BLNDGAM_RAMA_START_CNTL_R, CM, id), \
-	SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_B, CM, id), \
-	SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_G, CM, id), \
-	SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_R, CM, id), \
 	SRI(CM_BLNDGAM_RAMA_END_CNTL1_B, CM, id), \
 	SRI(CM_BLNDGAM_RAMA_END_CNTL2_B, CM, id), \
 	SRI(CM_BLNDGAM_RAMA_END_CNTL1_G, CM, id), \
@@ -147,7 +148,22 @@
 	SRI(CM_SHAPER_RAMA_REGION_28_29, CM, id), \
 	SRI(CM_SHAPER_RAMA_REGION_30_31, CM, id), \
 	SRI(CM_SHAPER_RAMA_REGION_32_33, CM, id), \
-	SRI(CM_SHAPER_LUT_INDEX, CM, id), \
+	SRI(CM_SHAPER_LUT_INDEX, CM, id)
+
+#define TF_REG_LIST_DCN20_COMMON_APPEND(id) \
+	SRI(CM_GAMUT_REMAP_B_C11_C12, CM, id),\
+	SRI(CM_GAMUT_REMAP_B_C13_C14, CM, id),\
+	SRI(CM_GAMUT_REMAP_B_C21_C22, CM, id),\
+	SRI(CM_GAMUT_REMAP_B_C23_C24, CM, id),\
+	SRI(CM_GAMUT_REMAP_B_C31_C32, CM, id),\
+	SRI(CM_GAMUT_REMAP_B_C33_C34, CM, id),\
+	SRI(CM_ICSC_B_C11_C12, CM, id), \
+	SRI(CM_ICSC_B_C33_C34, CM, id)
+
+#define TF_REG_LIST_DCN20(id) \
+	TF_REG_LIST_DCN(id), \
+	TF_REG_LIST_DCN20_COMMON(id), \
+	TF_REG_LIST_DCN20_COMMON_UPDATED(id), \
 	SRI(CURSOR_CONTROL, CURSOR0_, id), \
 	SRI(ALPHA_2BIT_LUT, CNVC_CFG, id), \
 	SRI(FCNV_FP_BIAS_R, CNVC_CFG, id), \
@@ -166,27 +182,41 @@
 	SRI(OBUF_MEM_PWR_CTRL, DSCL, id),\
 	SRI(DSCL_MEM_PWR_CTRL, DSCL, id)
 
-#define TF_REG_LIST_SH_MASK_DCN20(mask_sh)\
-	TF_REG_LIST_SH_MASK_DCN(mask_sh), \
+
+#define TF_REG_LIST_SH_MASK_DCN20_UPDATED(mask_sh)\
+	TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_B, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_B, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_G, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_G, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_R, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_R, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_B, CM_BLNDGAM_RAMB_EXP_REGION_END_B, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_G, CM_BLNDGAM_RAMB_EXP_REGION_END_G, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_R, CM_BLNDGAM_RAMB_EXP_REGION_END_R, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_B, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_B, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_G, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_G, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_R, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_R, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_B, CM_BLNDGAM_RAMA_EXP_REGION_END_B, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_G, CM_BLNDGAM_RAMA_EXP_REGION_END_G, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_R, CM_BLNDGAM_RAMA_EXP_REGION_END_R, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_B, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_B, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_G, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_G, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_R, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_R, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_CONTROL, CM_BLNDGAM_LUT_MODE, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_LUT_WRITE_EN_MASK, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_LUT_WRITE_SEL, mask_sh), \
+	TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_CONFIG_STATUS, mask_sh), \
+	TF_SF(CM0_CM_SHAPER_CONTROL, CM_SHAPER_LUT_MODE, mask_sh)
+
+
+#define TF_REG_LIST_SH_MASK_DCN20_COMMON(mask_sh)\
+	TF_SF(CM0_CM_3DLUT_MODE, CM_3DLUT_MODE, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_B, CM_BLNDGAM_RAMB_EXP_REGION_START_B, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_B, CM_BLNDGAM_RAMB_EXP_REGION_START_SEGMENT_B, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_G, CM_BLNDGAM_RAMB_EXP_REGION_START_G, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_G, CM_BLNDGAM_RAMB_EXP_REGION_START_SEGMENT_G, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_R, CM_BLNDGAM_RAMB_EXP_REGION_START_R, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_R, CM_BLNDGAM_RAMB_EXP_REGION_START_SEGMENT_R, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_B, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_B, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_G, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_G, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_R, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_R, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_B, CM_BLNDGAM_RAMB_EXP_REGION_END_B, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_B, CM_BLNDGAM_RAMB_EXP_REGION_END_SLOPE_B, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_B, CM_BLNDGAM_RAMB_EXP_REGION_END_BASE_B, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_G, CM_BLNDGAM_RAMB_EXP_REGION_END_G, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_G, CM_BLNDGAM_RAMB_EXP_REGION_END_SLOPE_G, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_G, CM_BLNDGAM_RAMB_EXP_REGION_END_BASE_G, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_R, CM_BLNDGAM_RAMB_EXP_REGION_END_R, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_R, CM_BLNDGAM_RAMB_EXP_REGION_END_SLOPE_R, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_R, CM_BLNDGAM_RAMB_EXP_REGION_END_BASE_R, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_REGION_0_1, CM_BLNDGAM_RAMB_EXP_REGION0_LUT_OFFSET, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_REGION_0_1, CM_BLNDGAM_RAMB_EXP_REGION0_NUM_SEGMENTS, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMB_REGION_0_1, CM_BLNDGAM_RAMB_EXP_REGION1_LUT_OFFSET, mask_sh), \
@@ -261,18 +291,9 @@
 	TF_SF(CM0_CM_BLNDGAM_RAMA_START_CNTL_G, CM_BLNDGAM_RAMA_EXP_REGION_START_SEGMENT_G, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMA_START_CNTL_R, CM_BLNDGAM_RAMA_EXP_REGION_START_R, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMA_START_CNTL_R, CM_BLNDGAM_RAMA_EXP_REGION_START_SEGMENT_R, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_B, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_B, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_G, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_G, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_R, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_R, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_B, CM_BLNDGAM_RAMA_EXP_REGION_END_B, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_B, CM_BLNDGAM_RAMA_EXP_REGION_END_SLOPE_B, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_B, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_B, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_G, CM_BLNDGAM_RAMA_EXP_REGION_END_G, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_G, CM_BLNDGAM_RAMA_EXP_REGION_END_SLOPE_G, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_G, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_G, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_R, CM_BLNDGAM_RAMA_EXP_REGION_END_R, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_R, CM_BLNDGAM_RAMA_EXP_REGION_END_SLOPE_R, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_R, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_R, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_0_1, CM_BLNDGAM_RAMA_EXP_REGION0_LUT_OFFSET, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_0_1, CM_BLNDGAM_RAMA_EXP_REGION0_NUM_SEGMENTS, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_0_1, CM_BLNDGAM_RAMA_EXP_REGION1_LUT_OFFSET, mask_sh), \
@@ -341,9 +362,6 @@
 	TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_32_33, CM_BLNDGAM_RAMA_EXP_REGION32_NUM_SEGMENTS, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_32_33, CM_BLNDGAM_RAMA_EXP_REGION33_LUT_OFFSET, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_32_33, CM_BLNDGAM_RAMA_EXP_REGION33_NUM_SEGMENTS, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_LUT_WRITE_EN_MASK, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_LUT_WRITE_SEL, mask_sh), \
-	TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_CONFIG_STATUS, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_LUT_INDEX, CM_BLNDGAM_LUT_INDEX, mask_sh), \
 	TF_SF(CM0_CM_BLNDGAM_LUT_DATA, CM_BLNDGAM_LUT_DATA, mask_sh), \
 	TF_SF(CM0_CM_MEM_PWR_CTRL, BLNDGAM_MEM_PWR_FORCE, mask_sh), \
@@ -356,7 +374,6 @@
 	TF_SF(CM0_CM_3DLUT_READ_WRITE_CONTROL, CM_3DLUT_WRITE_EN_MASK, mask_sh), \
 	TF_SF(CM0_CM_3DLUT_READ_WRITE_CONTROL, CM_3DLUT_RAM_SEL, mask_sh), \
 	TF_SF(CM0_CM_3DLUT_READ_WRITE_CONTROL, CM_3DLUT_30BIT_EN, mask_sh), \
-	TF_SF(CM0_CM_3DLUT_READ_WRITE_CONTROL, CM_3DLUT_CONFIG_STATUS, mask_sh), \
 	TF_SF(CM0_CM_3DLUT_READ_WRITE_CONTROL, CM_3DLUT_READ_SEL, mask_sh), \
 	TF_SF(CM0_CM_SHAPER_CONTROL, CM_SHAPER_LUT_MODE, mask_sh), \
 	TF_SF(CM0_CM_SHAPER_RAMB_START_CNTL_B, CM_SHAPER_RAMB_EXP_REGION_START_B, mask_sh), \
@@ -521,9 +538,14 @@
 	TF_SF(CM0_CM_SHAPER_RAMA_REGION_32_33, CM_SHAPER_RAMA_EXP_REGION33_NUM_SEGMENTS, mask_sh), \
 	TF_SF(CM0_CM_SHAPER_LUT_WRITE_EN_MASK, CM_SHAPER_LUT_WRITE_EN_MASK, mask_sh), \
 	TF_SF(CM0_CM_SHAPER_LUT_WRITE_EN_MASK, CM_SHAPER_LUT_WRITE_SEL, mask_sh), \
-	TF_SF(CM0_CM_SHAPER_LUT_WRITE_EN_MASK, CM_SHAPER_CONFIG_STATUS, mask_sh), \
 	TF_SF(CM0_CM_SHAPER_LUT_INDEX, CM_SHAPER_LUT_INDEX, mask_sh), \
-	TF_SF(CM0_CM_SHAPER_LUT_DATA, CM_SHAPER_LUT_DATA, mask_sh), \
+	TF_SF(CM0_CM_SHAPER_LUT_DATA, CM_SHAPER_LUT_DATA, mask_sh)
+
+
+#define TF_REG_LIST_SH_MASK_DCN20(mask_sh)\
+	TF_REG_LIST_SH_MASK_DCN(mask_sh), \
+	TF_REG_LIST_SH_MASK_DCN20_COMMON(mask_sh), \
+	TF_REG_LIST_SH_MASK_DCN20_UPDATED(mask_sh), \
 	TF_SF(CM0_CM_DGAM_LUT_WRITE_EN_MASK, CM_DGAM_CONFIG_STATUS, mask_sh), \
 	TF_SF(CM0_CM_CONTROL, CM_BYPASS, mask_sh), \
 	TF_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_MODE, mask_sh), \
@@ -560,9 +582,29 @@
 	TF_SF(DSCL0_OBUF_MEM_PWR_CTRL, OBUF_MEM_PWR_FORCE, mask_sh),\
 	TF_SF(DSCL0_DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, mask_sh)
 
+/* DPP CM debug status register:
+ *
+ *		Status index including current ICSC, Gamut Remap Mode is 9
+ *			ICSC Mode: [4..3]
+ *			Gamut Remap Mode: [10..9]
+ */
+#define CM_TEST_DEBUG_DATA_STATUS_IDX 9
+
+#define TF_DEBUG_REG_LIST_SH_DCN20 \
+	TF_DEBUG_REG_LIST_SH_DCN10, \
+	.CM_TEST_DEBUG_DATA_ICSC_MODE = 3, \
+	.CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE = 9
+
+#define TF_DEBUG_REG_LIST_MASK_DCN20 \
+	TF_DEBUG_REG_LIST_MASK_DCN10, \
+	.CM_TEST_DEBUG_DATA_ICSC_MODE = 0x18, \
+	.CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE = 0x600
+
 #define TF_REG_FIELD_LIST_DCN2_0(type) \
 	TF_REG_FIELD_LIST(type) \
 	type CM_BLNDGAM_LUT_DATA; \
+	type CM_TEST_DEBUG_DATA_ICSC_MODE; \
+	type CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE; \
 	type FORMAT_CNV16; \
 	type CNVC_BYPASS_MSB_ALIGN; \
 	type CLAMP_POSITIVE; \
@@ -593,6 +635,7 @@
 	type OBUF_MEM_PWR_FORCE;\
 	type LUT_MEM_PWR_FORCE
 
+
 struct dcn2_dpp_shift {
 	TF_REG_FIELD_LIST_DCN2_0(uint8_t);
 };
@@ -616,11 +659,22 @@ struct dcn2_dpp_mask {
 	uint32_t COLOR_KEYER_RED; \
 	uint32_t COLOR_KEYER_GREEN; \
 	uint32_t COLOR_KEYER_BLUE; \
-	uint32_t OBUF_MEM_PWR_CTRL;\
+	uint32_t OBUF_MEM_PWR_CTRL; \
 	uint32_t DSCL_MEM_PWR_CTRL
 
+#define DPP_DCN2_REG_VARIABLE_LIST_CM_APPEND \
+	uint32_t CM_GAMUT_REMAP_B_C11_C12; \
+	uint32_t CM_GAMUT_REMAP_B_C13_C14; \
+	uint32_t CM_GAMUT_REMAP_B_C21_C22; \
+	uint32_t CM_GAMUT_REMAP_B_C23_C24; \
+	uint32_t CM_GAMUT_REMAP_B_C31_C32; \
+	uint32_t CM_GAMUT_REMAP_B_C33_C34; \
+	uint32_t CM_ICSC_B_C11_C12; \
+	uint32_t CM_ICSC_B_C33_C34
+
 struct dcn2_dpp_registers {
 	DPP_DCN2_REG_VARIABLE_LIST;
+	DPP_DCN2_REG_VARIABLE_LIST_CM_APPEND;
 };
 
 struct dcn20_dpp {
@@ -642,6 +696,18 @@ struct dcn20_dpp {
 	struct pwl_params pwl_data;
 };
 
+enum dcn20_input_csc_select {
+	DCN2_ICSC_SELECT_BYPASS = 0,
+	DCN2_ICSC_SELECT_ICSC_A = 1,
+	DCN2_ICSC_SELECT_ICSC_B = 2
+};
+
+enum dcn20_gamut_remap_select {
+	DCN2_GAMUT_REMAP_BYPASS = 0,
+	DCN2_GAMUT_REMAP_COEF_A = 1,
+	DCN2_GAMUT_REMAP_COEF_B = 2
+};
+
 void dpp20_read_state(struct dpp *dpp_base,
 		struct dcn_dpp_state *s);
 
@@ -653,6 +719,16 @@ void dpp2_set_degamma(
 		struct dpp *dpp_base,
 		enum ipp_degamma_mode mode);
 
+void dpp2_cm_set_gamut_remap(
+	struct dpp *dpp_base,
+	const struct dpp_grph_csc_adjustment *adjust);
+
+void dpp2_program_input_csc(
+		struct dpp *dpp_base,
+		enum dc_color_space color_space,
+		enum dcn20_input_csc_select input_select,
+		const struct out_csc_color_matrix *tbl_entry);
+
 bool dpp20_program_blnd_lut(
 	struct dpp *dpp_base, const struct pwl_params *params);
 
@@ -691,11 +767,6 @@ void dpp2_set_hdr_multiplier(
 		struct dpp *dpp_base,
 		uint32_t multiplier);
 
-bool dpp2_get_optimal_number_of_taps(
-		struct dpp *dpp,
-		struct scaler_data *scl_data,
-		const struct scaling_taps *in_taps);
-
 bool dpp2_construct(struct dcn20_dpp *dpp2,
 	struct dc_context *ctx,
 	uint32_t inst,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c
index 2d112c316424..8dc3d1f73984 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c
@@ -36,6 +36,9 @@
 #define REG(reg)\
 	dpp->tf_regs->reg
 
+#define IND_REG(index) \
+	(index)
+
 #define CTX \
 	dpp->base.ctx
 
@@ -44,9 +47,6 @@
 	dpp->tf_shift->field_name, dpp->tf_mask->field_name
 
 
-
-
-
 static void dpp2_enable_cm_block(
 		struct dpp *dpp_base)
 {
@@ -149,12 +149,164 @@ void dpp2_set_degamma(
 	case IPP_DEGAMMA_MODE_HW_xvYCC:
 		REG_UPDATE(CM_DGAM_CONTROL, CM_DGAM_LUT_MODE, 2);
 			break;
+	case IPP_DEGAMMA_MODE_USER_PWL:
+		REG_UPDATE(CM_DGAM_CONTROL, CM_DGAM_LUT_MODE, 3);
+		break;
 	default:
 		BREAK_TO_DEBUGGER();
 		break;
 	}
 }
 
+static void program_gamut_remap(
+		struct dcn20_dpp *dpp,
+		const uint16_t *regval,
+		enum dcn20_gamut_remap_select select)
+{
+	uint32_t cur_select = 0;
+	struct color_matrices_reg gam_regs;
+
+	if (regval == NULL || select == DCN2_GAMUT_REMAP_BYPASS) {
+		REG_SET(CM_GAMUT_REMAP_CONTROL, 0,
+				CM_GAMUT_REMAP_MODE, 0);
+		return;
+	}
+
+	/* determine which gamut_remap coefficients (A or B) we are using
+	 * currently. select the alternate set to double buffer
+	 * the update so gamut_remap is updated on frame boundary
+	 */
+	IX_REG_GET(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA,
+					CM_TEST_DEBUG_DATA_STATUS_IDX,
+					CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE, &cur_select);
+
+	/* value stored in dbg reg will be 1 greater than mode we want */
+	if (cur_select != DCN2_GAMUT_REMAP_COEF_A)
+		select = DCN2_GAMUT_REMAP_COEF_A;
+	else
+		select = DCN2_GAMUT_REMAP_COEF_B;
+
+	gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_GAMUT_REMAP_C11;
+	gam_regs.masks.csc_c11  = dpp->tf_mask->CM_GAMUT_REMAP_C11;
+	gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_GAMUT_REMAP_C12;
+	gam_regs.masks.csc_c12 = dpp->tf_mask->CM_GAMUT_REMAP_C12;
+
+	if (select == DCN2_GAMUT_REMAP_COEF_A) {
+		gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_C11_C12);
+		gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_C33_C34);
+	} else {
+		gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_B_C11_C12);
+		gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_B_C33_C34);
+	}
+
+	cm_helper_program_color_matrices(
+				dpp->base.ctx,
+				regval,
+				&gam_regs);
+
+	REG_SET(
+			CM_GAMUT_REMAP_CONTROL, 0,
+			CM_GAMUT_REMAP_MODE, select);
+
+}
+
+void dpp2_cm_set_gamut_remap(
+	struct dpp *dpp_base,
+	const struct dpp_grph_csc_adjustment *adjust)
+{
+	struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base);
+	int i = 0;
+
+	if (adjust->gamut_adjust_type != GRAPHICS_GAMUT_ADJUST_TYPE_SW)
+		/* Bypass if type is bypass or hw */
+		program_gamut_remap(dpp, NULL, DCN2_GAMUT_REMAP_BYPASS);
+	else {
+		struct fixed31_32 arr_matrix[12];
+		uint16_t arr_reg_val[12];
+
+		for (i = 0; i < 12; i++)
+			arr_matrix[i] = adjust->temperature_matrix[i];
+
+		convert_float_matrix(
+			arr_reg_val, arr_matrix, 12);
+
+		program_gamut_remap(dpp, arr_reg_val, DCN2_GAMUT_REMAP_COEF_A);
+	}
+}
+
+void dpp2_program_input_csc(
+		struct dpp *dpp_base,
+		enum dc_color_space color_space,
+		enum dcn20_input_csc_select input_select,
+		const struct out_csc_color_matrix *tbl_entry)
+{
+	struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base);
+	int i;
+	int arr_size = sizeof(dpp_input_csc_matrix)/sizeof(struct dpp_input_csc_matrix);
+	const uint16_t *regval = NULL;
+	uint32_t cur_select = 0;
+	enum dcn20_input_csc_select select;
+	struct color_matrices_reg icsc_regs;
+
+	if (input_select == DCN2_ICSC_SELECT_BYPASS) {
+		REG_SET(CM_ICSC_CONTROL, 0, CM_ICSC_MODE, 0);
+		return;
+	}
+
+	if (tbl_entry == NULL) {
+		for (i = 0; i < arr_size; i++)
+			if (dpp_input_csc_matrix[i].color_space == color_space) {
+				regval = dpp_input_csc_matrix[i].regval;
+				break;
+			}
+
+		if (regval == NULL) {
+			BREAK_TO_DEBUGGER();
+			return;
+		}
+	} else {
+		regval = tbl_entry->regval;
+	}
+
+	/* determine which CSC coefficients (A or B) we are using
+	 * currently.  select the alternate set to double buffer
+	 * the CSC update so CSC is updated on frame boundary
+	 */
+	IX_REG_GET(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA,
+					CM_TEST_DEBUG_DATA_STATUS_IDX,
+					CM_TEST_DEBUG_DATA_ICSC_MODE, &cur_select);
+
+	if (cur_select != DCN2_ICSC_SELECT_ICSC_A)
+		select = DCN2_ICSC_SELECT_ICSC_A;
+	else
+		select = DCN2_ICSC_SELECT_ICSC_B;
+
+	icsc_regs.shifts.csc_c11 = dpp->tf_shift->CM_ICSC_C11;
+	icsc_regs.masks.csc_c11  = dpp->tf_mask->CM_ICSC_C11;
+	icsc_regs.shifts.csc_c12 = dpp->tf_shift->CM_ICSC_C12;
+	icsc_regs.masks.csc_c12 = dpp->tf_mask->CM_ICSC_C12;
+
+	if (select == DCN2_ICSC_SELECT_ICSC_A) {
+
+		icsc_regs.csc_c11_c12 = REG(CM_ICSC_C11_C12);
+		icsc_regs.csc_c33_c34 = REG(CM_ICSC_C33_C34);
+
+	} else {
+
+		icsc_regs.csc_c11_c12 = REG(CM_ICSC_B_C11_C12);
+		icsc_regs.csc_c33_c34 = REG(CM_ICSC_B_C33_C34);
+
+	}
+
+	cm_helper_program_color_matrices(
+			dpp->base.ctx,
+			regval,
+			&icsc_regs);
+
+	REG_SET(CM_ICSC_CONTROL, 0,
+				CM_ICSC_MODE, select);
+}
+
 static void dpp20_power_on_blnd_lut(
 	struct dpp *dpp_base,
 	bool power_on)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
index 1b419407af94..6bdfee20b6a7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
@@ -23,7 +23,6 @@
  *
  */
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #include "reg_helper.h"
 #include "dcn20_dsc.h"
 #include "dsc/dscc_types.h"
@@ -118,7 +117,7 @@ static void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock
 
 	dsc_enc_caps->color_formats.bits.RGB = 1;
 	dsc_enc_caps->color_formats.bits.YCBCR_444 = 1;
-	dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 0;
+	dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 1;
 	dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 0;
 	dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_420 = 1;
 
@@ -207,6 +206,9 @@ static bool dsc2_get_packed_pps(struct display_stream_compressor *dsc, const str
 	struct dsc_reg_values dsc_reg_vals;
 	struct dsc_optc_config dsc_optc_cfg;
 
+	memset(&dsc_reg_vals, 0, sizeof(dsc_reg_vals));
+	memset(&dsc_optc_cfg, 0, sizeof(dsc_optc_cfg));
+
 	DC_LOG_DSC("Getting packed DSC PPS for DSC Config:");
 	dsc_config_log(dsc, dsc_cfg);
 	DC_LOG_DSC("DSC Picture Parameter Set (PPS):");
@@ -222,9 +224,18 @@ static bool dsc2_get_packed_pps(struct display_stream_compressor *dsc, const str
 static void dsc2_enable(struct display_stream_compressor *dsc, int opp_pipe)
 {
 	struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc);
+	int dsc_clock_en;
+	int dsc_fw_config;
+	int enabled_opp_pipe;
+
+	DC_LOG_DSC("enable DSC %d at opp pipe %d", dsc->inst, opp_pipe);
 
-	/* TODO Check if DSC alreay in use? */
-	DC_LOG_DSC("enable DSC at opp pipe %d", opp_pipe);
+	REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en);
+	REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &dsc_fw_config, DSCRM_DSC_OPP_PIPE_SOURCE, &enabled_opp_pipe);
+	if ((dsc_clock_en || dsc_fw_config) && enabled_opp_pipe != opp_pipe) {
+		DC_LOG_DSC("ERROR: DSC %d at opp pipe %d already enabled!", dsc->inst, enabled_opp_pipe);
+		ASSERT(0);
+	}
 
 	REG_UPDATE(DSC_TOP_CONTROL,
 		DSC_CLOCK_EN, 1);
@@ -238,8 +249,18 @@ static void dsc2_enable(struct display_stream_compressor *dsc, int opp_pipe)
 static void dsc2_disable(struct display_stream_compressor *dsc)
 {
 	struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc);
+	int dsc_clock_en;
+	int dsc_fw_config;
+	int enabled_opp_pipe;
 
-	DC_LOG_DSC("disable DSC");
+	DC_LOG_DSC("disable DSC %d", dsc->inst);
+
+	REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en);
+	REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &dsc_fw_config, DSCRM_DSC_OPP_PIPE_SOURCE, &enabled_opp_pipe);
+	if (!dsc_clock_en || !dsc_fw_config) {
+		DC_LOG_DSC("ERROR: DSC %d at opp pipe %d already disabled!", dsc->inst, enabled_opp_pipe);
+		ASSERT(0);
+	}
 
 	REG_UPDATE(DSCRM_DSC_FORWARD_CONFIG,
 		DSCRM_DSC_FORWARD_EN, 0);
@@ -715,4 +736,3 @@ static void dsc_write_to_registers(struct display_stream_compressor *dsc, const
 	}
 }
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h
index 4e2fb38390a4..9855a7ed0387 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h
@@ -21,7 +21,6 @@
  * Authors: AMD
  *
  */
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #ifndef __DCN20_DSC_H__
 #define __DCN20_DSC_H__
 
@@ -572,4 +571,3 @@ void dsc2_construct(struct dcn20_dsc *dsc,
 
 #endif
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c
index cd8bc92ce3ba..880954ac0b02 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c
@@ -722,7 +722,6 @@ bool dwb_program_horz_scalar(struct dcn20_dwbc *dwbc20,
 		struct scaling_taps num_taps)
 {
 	uint32_t h_ratio_luma = 1;
-	uint32_t h_ratio_chroma = 1;
 	uint32_t h_taps_luma = num_taps.h_taps;
 	uint32_t h_taps_chroma = num_taps.h_taps_c;
 	int32_t h_init_phase_luma = 0;
@@ -747,7 +746,6 @@ bool dwb_program_horz_scalar(struct dcn20_dwbc *dwbc20,
 		h_ratio_luma = -1;
 	else
 		h_ratio_luma = dc_fixpt_u3d19(tmp_h_ratio_luma) << 5;
-	h_ratio_chroma = h_ratio_luma * 2;
 
 	/*Program ratio*/
 	REG_UPDATE(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL_H_SCALE_RATIO, h_ratio_luma);
@@ -803,7 +801,6 @@ bool dwb_program_vert_scalar(struct dcn20_dwbc *dwbc20,
 		enum dwb_subsample_position subsample_position)
 {
 	uint32_t v_ratio_luma = 1;
-	uint32_t v_ratio_chroma = 1;
 	uint32_t v_taps_luma = num_taps.v_taps;
 	uint32_t v_taps_chroma = num_taps.v_taps_c;
 	int32_t v_init_phase_luma = 0;
@@ -827,7 +824,6 @@ bool dwb_program_vert_scalar(struct dcn20_dwbc *dwbc20,
 		v_ratio_luma = -1;
 	else
 		v_ratio_luma = dc_fixpt_u3d19(tmp_v_ratio_luma) << 5;
-	v_ratio_chroma = v_ratio_luma * 2;
 
 	/*Program ratio*/
 	REG_UPDATE(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL_V_SCALE_RATIO, v_ratio_luma);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
index b83c022e2c6f..9235f7d29454 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
@@ -186,14 +186,13 @@ static void hubbub2_get_blk256_size(unsigned int *blk256_width, unsigned int *bl
 }
 
 static void hubbub2_det_request_size(
+		unsigned int detile_buf_size,
 		unsigned int height,
 		unsigned int width,
 		unsigned int bpe,
 		bool *req128_horz_wc,
 		bool *req128_vert_wc)
 {
-	unsigned int detile_buf_size = 164 * 1024;  /* 164KB for DCN1.0 */
-
 	unsigned int blk256_height = 0;
 	unsigned int blk256_width = 0;
 	unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc;
@@ -236,7 +235,8 @@ bool hubbub2_get_dcc_compression_cap(struct hubbub *hubbub,
 			&segment_order_horz, &segment_order_vert))
 		return false;
 
-	hubbub2_det_request_size(input->surface_size.height,  input->surface_size.width,
+	hubbub2_det_request_size(TO_DCN20_HUBBUB(hubbub)->detile_buf_size,
+			input->surface_size.height,  input->surface_size.width,
 			bpe, &req128_horz_wc, &req128_vert_wc);
 
 	if (!req128_horz_wc && !req128_vert_wc) {
@@ -293,6 +293,9 @@ bool hubbub2_get_dcc_compression_cap(struct hubbub *hubbub,
 		output->grph.rgb.max_compressed_blk_size = 64;
 		output->grph.rgb.independent_64b_blks = true;
 		break;
+	default:
+		ASSERT(false);
+		break;
 	}
 	output->capable = true;
 	output->const_color_support = true;
@@ -588,7 +591,7 @@ static void hubbub2_program_watermarks(
 			DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz);
 	REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 180);
 
-	hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
+	hubbub->funcs->allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
 }
 
 static const struct hubbub_funcs hubbub2_funcs = {
@@ -600,7 +603,9 @@ static const struct hubbub_funcs hubbub2_funcs = {
 	.get_dcc_compression_cap = hubbub2_get_dcc_compression_cap,
 	.wm_read_state = hubbub2_wm_read_state,
 	.get_dchub_ref_freq = hubbub2_get_dchub_ref_freq,
-	.program_watermarks = hubbub2_program_watermarks
+	.program_watermarks = hubbub2_program_watermarks,
+	.is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled,
+	.allow_self_refresh_control = hubbub1_allow_self_refresh_control,
 };
 
 void hubbub2_construct(struct dcn20_hubbub *hubbub,
@@ -618,4 +623,5 @@ void hubbub2_construct(struct dcn20_hubbub *hubbub,
 	hubbub->masks = hubbub_mask;
 
 	hubbub->debug_test_index_pstate = 0xB;
+	hubbub->detile_buf_size = 164 * 1024; /* 164KB for DCN2.0 */
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h
index 626117d3b4e9..501532dd523a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h
@@ -81,6 +81,7 @@ struct dcn20_hubbub {
 	unsigned int debug_test_index_pstate;
 	struct dcn_watermark_set watermarks;
 	struct dcn20_vmid vmid[16];
+	unsigned int detile_buf_size;
 };
 
 void hubbub2_construct(struct dcn20_hubbub *hubbub,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
index 69e2aae42394..84d7ac5dd206 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
@@ -30,6 +30,8 @@
 #include "reg_helper.h"
 #include "basics/conversion.h"
 
+#define DC_LOGGER_INIT(logger)
+
 #define REG(reg)\
 	hubp2->hubp_regs->reg
 
@@ -483,7 +485,6 @@ void hubp2_program_pixel_format(
 		REG_UPDATE(DCSURF_SURFACE_CONFIG,
 				SURFACE_PIXEL_FORMAT, 12);
 		break;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX:
 		REG_UPDATE(DCSURF_SURFACE_CONFIG,
 				SURFACE_PIXEL_FORMAT, 112);
@@ -504,7 +505,6 @@ void hubp2_program_pixel_format(
 		REG_UPDATE(DCSURF_SURFACE_CONFIG,
 				SURFACE_PIXEL_FORMAT, 119);
 		break;
-#endif
 	default:
 		BREAK_TO_DEBUGGER();
 		break;
@@ -1204,6 +1204,9 @@ void hubp2_read_state_common(struct hubp *hubp)
 			HUBP_TTU_DISABLE, &s->ttu_disable,
 			HUBP_UNDERFLOW_STATUS, &s->underflow_status);
 
+	REG_GET(HUBP_CLK_CNTL,
+			HUBP_CLOCK_ENABLE, &s->clock_en);
+
 	REG_GET(DCN_GLOBAL_TTU_CNTL,
 			MIN_TTU_VBLANK, &s->min_ttu_vblank);
 
@@ -1243,6 +1246,314 @@ void hubp2_read_state(struct hubp *hubp)
 
 }
 
+void hubp2_validate_dml_output(struct hubp *hubp,
+		struct dc_context *ctx,
+		struct _vcs_dpi_display_rq_regs_st *dml_rq_regs,
+		struct _vcs_dpi_display_dlg_regs_st *dml_dlg_attr,
+		struct _vcs_dpi_display_ttu_regs_st *dml_ttu_attr)
+{
+	struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+	struct _vcs_dpi_display_rq_regs_st rq_regs = {0};
+	struct _vcs_dpi_display_dlg_regs_st dlg_attr = {0};
+	struct _vcs_dpi_display_ttu_regs_st ttu_attr = {0};
+	DC_LOGGER_INIT(ctx->logger);
+	DC_LOG_DEBUG("DML Validation | Running Validation");
+
+	/* Requestor Regs */
+	REG_GET(HUBPRET_CONTROL,
+		DET_BUF_PLANE1_BASE_ADDRESS, &rq_regs.plane1_base_address);
+	REG_GET_4(DCN_EXPANSION_MODE,
+		DRQ_EXPANSION_MODE, &rq_regs.drq_expansion_mode,
+		PRQ_EXPANSION_MODE, &rq_regs.prq_expansion_mode,
+		MRQ_EXPANSION_MODE, &rq_regs.mrq_expansion_mode,
+		CRQ_EXPANSION_MODE, &rq_regs.crq_expansion_mode);
+	REG_GET_8(DCHUBP_REQ_SIZE_CONFIG,
+		CHUNK_SIZE, &rq_regs.rq_regs_l.chunk_size,
+		MIN_CHUNK_SIZE, &rq_regs.rq_regs_l.min_chunk_size,
+		META_CHUNK_SIZE, &rq_regs.rq_regs_l.meta_chunk_size,
+		MIN_META_CHUNK_SIZE, &rq_regs.rq_regs_l.min_meta_chunk_size,
+		DPTE_GROUP_SIZE, &rq_regs.rq_regs_l.dpte_group_size,
+		MPTE_GROUP_SIZE, &rq_regs.rq_regs_l.mpte_group_size,
+		SWATH_HEIGHT, &rq_regs.rq_regs_l.swath_height,
+		PTE_ROW_HEIGHT_LINEAR, &rq_regs.rq_regs_l.pte_row_height_linear);
+	REG_GET_8(DCHUBP_REQ_SIZE_CONFIG_C,
+		CHUNK_SIZE_C, &rq_regs.rq_regs_c.chunk_size,
+		MIN_CHUNK_SIZE_C, &rq_regs.rq_regs_c.min_chunk_size,
+		META_CHUNK_SIZE_C, &rq_regs.rq_regs_c.meta_chunk_size,
+		MIN_META_CHUNK_SIZE_C, &rq_regs.rq_regs_c.min_meta_chunk_size,
+		DPTE_GROUP_SIZE_C, &rq_regs.rq_regs_c.dpte_group_size,
+		MPTE_GROUP_SIZE_C, &rq_regs.rq_regs_c.mpte_group_size,
+		SWATH_HEIGHT_C, &rq_regs.rq_regs_c.swath_height,
+		PTE_ROW_HEIGHT_LINEAR_C, &rq_regs.rq_regs_c.pte_row_height_linear);
+
+	if (rq_regs.plane1_base_address != dml_rq_regs->plane1_base_address)
+		DC_LOG_DEBUG("DML Validation | HUBPRET_CONTROL:DET_BUF_PLANE1_BASE_ADDRESS - Expected: %u  Actual: %u\n",
+				dml_rq_regs->plane1_base_address, rq_regs.plane1_base_address);
+	if (rq_regs.drq_expansion_mode != dml_rq_regs->drq_expansion_mode)
+		DC_LOG_DEBUG("DML Validation | DCN_EXPANSION_MODE:DRQ_EXPANSION_MODE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->drq_expansion_mode, rq_regs.drq_expansion_mode);
+	if (rq_regs.prq_expansion_mode != dml_rq_regs->prq_expansion_mode)
+		DC_LOG_DEBUG("DML Validation | DCN_EXPANSION_MODE:MRQ_EXPANSION_MODE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->prq_expansion_mode, rq_regs.prq_expansion_mode);
+	if (rq_regs.mrq_expansion_mode != dml_rq_regs->mrq_expansion_mode)
+		DC_LOG_DEBUG("DML Validation | DCN_EXPANSION_MODE:DET_BUF_PLANE1_BASE_ADDRESS - Expected: %u  Actual: %u\n",
+				dml_rq_regs->mrq_expansion_mode, rq_regs.mrq_expansion_mode);
+	if (rq_regs.crq_expansion_mode != dml_rq_regs->crq_expansion_mode)
+		DC_LOG_DEBUG("DML Validation | DCN_EXPANSION_MODE:CRQ_EXPANSION_MODE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->crq_expansion_mode, rq_regs.crq_expansion_mode);
+
+	if (rq_regs.rq_regs_l.chunk_size != dml_rq_regs->rq_regs_l.chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:CHUNK_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.chunk_size, rq_regs.rq_regs_l.chunk_size);
+	if (rq_regs.rq_regs_l.min_chunk_size != dml_rq_regs->rq_regs_l.min_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:MIN_CHUNK_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.min_chunk_size, rq_regs.rq_regs_l.min_chunk_size);
+	if (rq_regs.rq_regs_l.meta_chunk_size != dml_rq_regs->rq_regs_l.meta_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:META_CHUNK_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.meta_chunk_size, rq_regs.rq_regs_l.meta_chunk_size);
+	if (rq_regs.rq_regs_l.min_meta_chunk_size != dml_rq_regs->rq_regs_l.min_meta_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:MIN_META_CHUNK_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.min_meta_chunk_size, rq_regs.rq_regs_l.min_meta_chunk_size);
+	if (rq_regs.rq_regs_l.dpte_group_size != dml_rq_regs->rq_regs_l.dpte_group_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:DPTE_GROUP_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.dpte_group_size, rq_regs.rq_regs_l.dpte_group_size);
+	if (rq_regs.rq_regs_l.mpte_group_size != dml_rq_regs->rq_regs_l.mpte_group_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:MPTE_GROUP_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.mpte_group_size, rq_regs.rq_regs_l.mpte_group_size);
+	if (rq_regs.rq_regs_l.swath_height != dml_rq_regs->rq_regs_l.swath_height)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:SWATH_HEIGHT - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.swath_height, rq_regs.rq_regs_l.swath_height);
+	if (rq_regs.rq_regs_l.pte_row_height_linear != dml_rq_regs->rq_regs_l.pte_row_height_linear)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:PTE_ROW_HEIGHT_LINEAR - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.pte_row_height_linear, rq_regs.rq_regs_l.pte_row_height_linear);
+
+	if (rq_regs.rq_regs_c.chunk_size != dml_rq_regs->rq_regs_c.chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:CHUNK_SIZE_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.chunk_size, rq_regs.rq_regs_c.chunk_size);
+	if (rq_regs.rq_regs_c.min_chunk_size != dml_rq_regs->rq_regs_c.min_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:MIN_CHUNK_SIZE_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.min_chunk_size, rq_regs.rq_regs_c.min_chunk_size);
+	if (rq_regs.rq_regs_c.meta_chunk_size != dml_rq_regs->rq_regs_c.meta_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:META_CHUNK_SIZE_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.meta_chunk_size, rq_regs.rq_regs_c.meta_chunk_size);
+	if (rq_regs.rq_regs_c.min_meta_chunk_size != dml_rq_regs->rq_regs_c.min_meta_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:MIN_META_CHUNK_SIZE_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.min_meta_chunk_size, rq_regs.rq_regs_c.min_meta_chunk_size);
+	if (rq_regs.rq_regs_c.dpte_group_size != dml_rq_regs->rq_regs_c.dpte_group_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:DPTE_GROUP_SIZE_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.dpte_group_size, rq_regs.rq_regs_c.dpte_group_size);
+	if (rq_regs.rq_regs_c.mpte_group_size != dml_rq_regs->rq_regs_c.mpte_group_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:MPTE_GROUP_SIZE_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.mpte_group_size, rq_regs.rq_regs_c.mpte_group_size);
+	if (rq_regs.rq_regs_c.swath_height != dml_rq_regs->rq_regs_c.swath_height)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:SWATH_HEIGHT_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.swath_height, rq_regs.rq_regs_c.swath_height);
+	if (rq_regs.rq_regs_c.pte_row_height_linear != dml_rq_regs->rq_regs_c.pte_row_height_linear)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:PTE_ROW_HEIGHT_LINEAR_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.pte_row_height_linear, rq_regs.rq_regs_c.pte_row_height_linear);
+
+	/* DLG - Per hubp */
+	REG_GET_2(BLANK_OFFSET_0,
+		REFCYC_H_BLANK_END, &dlg_attr.refcyc_h_blank_end,
+		DLG_V_BLANK_END, &dlg_attr.dlg_vblank_end);
+	REG_GET(BLANK_OFFSET_1,
+		MIN_DST_Y_NEXT_START, &dlg_attr.min_dst_y_next_start);
+	REG_GET(DST_DIMENSIONS,
+		REFCYC_PER_HTOTAL, &dlg_attr.refcyc_per_htotal);
+	REG_GET_2(DST_AFTER_SCALER,
+		REFCYC_X_AFTER_SCALER, &dlg_attr.refcyc_x_after_scaler,
+		DST_Y_AFTER_SCALER, &dlg_attr.dst_y_after_scaler);
+	REG_GET(REF_FREQ_TO_PIX_FREQ,
+		REF_FREQ_TO_PIX_FREQ, &dlg_attr.ref_freq_to_pix_freq);
+
+	if (dlg_attr.refcyc_h_blank_end != dml_dlg_attr->refcyc_h_blank_end)
+		DC_LOG_DEBUG("DML Validation | BLANK_OFFSET_0:REFCYC_H_BLANK_END - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_h_blank_end, dlg_attr.refcyc_h_blank_end);
+	if (dlg_attr.dlg_vblank_end != dml_dlg_attr->dlg_vblank_end)
+		DC_LOG_DEBUG("DML Validation | BLANK_OFFSET_0:DLG_V_BLANK_END - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dlg_vblank_end, dlg_attr.dlg_vblank_end);
+	if (dlg_attr.min_dst_y_next_start != dml_dlg_attr->min_dst_y_next_start)
+		DC_LOG_DEBUG("DML Validation | BLANK_OFFSET_1:MIN_DST_Y_NEXT_START - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->min_dst_y_next_start, dlg_attr.min_dst_y_next_start);
+	if (dlg_attr.refcyc_per_htotal != dml_dlg_attr->refcyc_per_htotal)
+		DC_LOG_DEBUG("DML Validation | DST_DIMENSIONS:REFCYC_PER_HTOTAL - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_htotal, dlg_attr.refcyc_per_htotal);
+	if (dlg_attr.refcyc_x_after_scaler != dml_dlg_attr->refcyc_x_after_scaler)
+		DC_LOG_DEBUG("DML Validation | DST_AFTER_SCALER:REFCYC_X_AFTER_SCALER - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_x_after_scaler, dlg_attr.refcyc_x_after_scaler);
+	if (dlg_attr.dst_y_after_scaler != dml_dlg_attr->dst_y_after_scaler)
+		DC_LOG_DEBUG("DML Validation | DST_AFTER_SCALER:DST_Y_AFTER_SCALER - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dst_y_after_scaler, dlg_attr.dst_y_after_scaler);
+	if (dlg_attr.ref_freq_to_pix_freq != dml_dlg_attr->ref_freq_to_pix_freq)
+		DC_LOG_DEBUG("DML Validation | REF_FREQ_TO_PIX_FREQ:REF_FREQ_TO_PIX_FREQ - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->ref_freq_to_pix_freq, dlg_attr.ref_freq_to_pix_freq);
+
+	/* DLG - Per luma/chroma */
+	REG_GET(VBLANK_PARAMETERS_1,
+		REFCYC_PER_PTE_GROUP_VBLANK_L, &dlg_attr.refcyc_per_pte_group_vblank_l);
+	if (REG(NOM_PARAMETERS_0))
+		REG_GET(NOM_PARAMETERS_0,
+			DST_Y_PER_PTE_ROW_NOM_L, &dlg_attr.dst_y_per_pte_row_nom_l);
+	if (REG(NOM_PARAMETERS_1))
+		REG_GET(NOM_PARAMETERS_1,
+			REFCYC_PER_PTE_GROUP_NOM_L, &dlg_attr.refcyc_per_pte_group_nom_l);
+	REG_GET(NOM_PARAMETERS_4,
+		DST_Y_PER_META_ROW_NOM_L, &dlg_attr.dst_y_per_meta_row_nom_l);
+	REG_GET(NOM_PARAMETERS_5,
+		REFCYC_PER_META_CHUNK_NOM_L, &dlg_attr.refcyc_per_meta_chunk_nom_l);
+	REG_GET_2(PER_LINE_DELIVERY,
+		REFCYC_PER_LINE_DELIVERY_L, &dlg_attr.refcyc_per_line_delivery_l,
+		REFCYC_PER_LINE_DELIVERY_C, &dlg_attr.refcyc_per_line_delivery_c);
+	REG_GET_2(PER_LINE_DELIVERY_PRE,
+		REFCYC_PER_LINE_DELIVERY_PRE_L, &dlg_attr.refcyc_per_line_delivery_pre_l,
+		REFCYC_PER_LINE_DELIVERY_PRE_C, &dlg_attr.refcyc_per_line_delivery_pre_c);
+	REG_GET(VBLANK_PARAMETERS_2,
+		REFCYC_PER_PTE_GROUP_VBLANK_C, &dlg_attr.refcyc_per_pte_group_vblank_c);
+	if (REG(NOM_PARAMETERS_2))
+		REG_GET(NOM_PARAMETERS_2,
+			DST_Y_PER_PTE_ROW_NOM_C, &dlg_attr.dst_y_per_pte_row_nom_c);
+	if (REG(NOM_PARAMETERS_3))
+		REG_GET(NOM_PARAMETERS_3,
+			REFCYC_PER_PTE_GROUP_NOM_C, &dlg_attr.refcyc_per_pte_group_nom_c);
+	REG_GET(NOM_PARAMETERS_6,
+		DST_Y_PER_META_ROW_NOM_C, &dlg_attr.dst_y_per_meta_row_nom_c);
+	REG_GET(NOM_PARAMETERS_7,
+		REFCYC_PER_META_CHUNK_NOM_C, &dlg_attr.refcyc_per_meta_chunk_nom_c);
+	REG_GET(VBLANK_PARAMETERS_3,
+			REFCYC_PER_META_CHUNK_VBLANK_L, &dlg_attr.refcyc_per_meta_chunk_vblank_l);
+	REG_GET(VBLANK_PARAMETERS_4,
+			REFCYC_PER_META_CHUNK_VBLANK_C, &dlg_attr.refcyc_per_meta_chunk_vblank_c);
+
+	if (dlg_attr.refcyc_per_pte_group_vblank_l != dml_dlg_attr->refcyc_per_pte_group_vblank_l)
+		DC_LOG_DEBUG("DML Validation | VBLANK_PARAMETERS_1:REFCYC_PER_PTE_GROUP_VBLANK_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_pte_group_vblank_l, dlg_attr.refcyc_per_pte_group_vblank_l);
+	if (dlg_attr.dst_y_per_pte_row_nom_l != dml_dlg_attr->dst_y_per_pte_row_nom_l)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_0:DST_Y_PER_PTE_ROW_NOM_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dst_y_per_pte_row_nom_l, dlg_attr.dst_y_per_pte_row_nom_l);
+	if (dlg_attr.refcyc_per_pte_group_nom_l != dml_dlg_attr->refcyc_per_pte_group_nom_l)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_1:REFCYC_PER_PTE_GROUP_NOM_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_pte_group_nom_l, dlg_attr.refcyc_per_pte_group_nom_l);
+	if (dlg_attr.dst_y_per_meta_row_nom_l != dml_dlg_attr->dst_y_per_meta_row_nom_l)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_4:DST_Y_PER_META_ROW_NOM_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dst_y_per_meta_row_nom_l, dlg_attr.dst_y_per_meta_row_nom_l);
+	if (dlg_attr.refcyc_per_meta_chunk_nom_l != dml_dlg_attr->refcyc_per_meta_chunk_nom_l)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_5:REFCYC_PER_META_CHUNK_NOM_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_meta_chunk_nom_l, dlg_attr.refcyc_per_meta_chunk_nom_l);
+	if (dlg_attr.refcyc_per_line_delivery_l != dml_dlg_attr->refcyc_per_line_delivery_l)
+		DC_LOG_DEBUG("DML Validation | PER_LINE_DELIVERY:REFCYC_PER_LINE_DELIVERY_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_line_delivery_l, dlg_attr.refcyc_per_line_delivery_l);
+	if (dlg_attr.refcyc_per_line_delivery_c != dml_dlg_attr->refcyc_per_line_delivery_c)
+		DC_LOG_DEBUG("DML Validation | PER_LINE_DELIVERY:REFCYC_PER_LINE_DELIVERY_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_line_delivery_c, dlg_attr.refcyc_per_line_delivery_c);
+	if (dlg_attr.refcyc_per_pte_group_vblank_c != dml_dlg_attr->refcyc_per_pte_group_vblank_c)
+		DC_LOG_DEBUG("DML Validation | VBLANK_PARAMETERS_2:REFCYC_PER_PTE_GROUP_VBLANK_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_pte_group_vblank_c, dlg_attr.refcyc_per_pte_group_vblank_c);
+	if (dlg_attr.dst_y_per_pte_row_nom_c != dml_dlg_attr->dst_y_per_pte_row_nom_c)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_2:DST_Y_PER_PTE_ROW_NOM_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dst_y_per_pte_row_nom_c, dlg_attr.dst_y_per_pte_row_nom_c);
+	if (dlg_attr.refcyc_per_pte_group_nom_c != dml_dlg_attr->refcyc_per_pte_group_nom_c)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_3:REFCYC_PER_PTE_GROUP_NOM_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_pte_group_nom_c, dlg_attr.refcyc_per_pte_group_nom_c);
+	if (dlg_attr.dst_y_per_meta_row_nom_c != dml_dlg_attr->dst_y_per_meta_row_nom_c)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_6:DST_Y_PER_META_ROW_NOM_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dst_y_per_meta_row_nom_c, dlg_attr.dst_y_per_meta_row_nom_c);
+	if (dlg_attr.refcyc_per_meta_chunk_nom_c != dml_dlg_attr->refcyc_per_meta_chunk_nom_c)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_7:REFCYC_PER_META_CHUNK_NOM_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_meta_chunk_nom_c, dlg_attr.refcyc_per_meta_chunk_nom_c);
+	if (dlg_attr.refcyc_per_line_delivery_pre_l != dml_dlg_attr->refcyc_per_line_delivery_pre_l)
+		DC_LOG_DEBUG("DML Validation | PER_LINE_DELIVERY_PRE:REFCYC_PER_LINE_DELIVERY_PRE_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_line_delivery_pre_l, dlg_attr.refcyc_per_line_delivery_pre_l);
+	if (dlg_attr.refcyc_per_line_delivery_pre_c != dml_dlg_attr->refcyc_per_line_delivery_pre_c)
+		DC_LOG_DEBUG("DML Validation | PER_LINE_DELIVERY_PRE:REFCYC_PER_LINE_DELIVERY_PRE_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_line_delivery_pre_c, dlg_attr.refcyc_per_line_delivery_pre_c);
+	if (dlg_attr.refcyc_per_meta_chunk_vblank_l != dml_dlg_attr->refcyc_per_meta_chunk_vblank_l)
+		DC_LOG_DEBUG("DML Validation | VBLANK_PARAMETERS_3:REFCYC_PER_META_CHUNK_VBLANK_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_meta_chunk_vblank_l, dlg_attr.refcyc_per_meta_chunk_vblank_l);
+	if (dlg_attr.refcyc_per_meta_chunk_vblank_c != dml_dlg_attr->refcyc_per_meta_chunk_vblank_c)
+		DC_LOG_DEBUG("DML Validation | VBLANK_PARAMETERS_4:REFCYC_PER_META_CHUNK_VBLANK_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_meta_chunk_vblank_c, dlg_attr.refcyc_per_meta_chunk_vblank_c);
+
+	/* TTU - per hubp */
+	REG_GET_2(DCN_TTU_QOS_WM,
+		QoS_LEVEL_LOW_WM, &ttu_attr.qos_level_low_wm,
+		QoS_LEVEL_HIGH_WM, &ttu_attr.qos_level_high_wm);
+
+	if (ttu_attr.qos_level_low_wm != dml_ttu_attr->qos_level_low_wm)
+		DC_LOG_DEBUG("DML Validation | DCN_TTU_QOS_WM:QoS_LEVEL_LOW_WM - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_level_low_wm, ttu_attr.qos_level_low_wm);
+	if (ttu_attr.qos_level_high_wm != dml_ttu_attr->qos_level_high_wm)
+		DC_LOG_DEBUG("DML Validation | DCN_TTU_QOS_WM:QoS_LEVEL_HIGH_WM - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_level_high_wm, ttu_attr.qos_level_high_wm);
+
+	/* TTU - per luma/chroma */
+	/* Assumed surf0 is luma and 1 is chroma */
+	REG_GET_3(DCN_SURF0_TTU_CNTL0,
+		REFCYC_PER_REQ_DELIVERY, &ttu_attr.refcyc_per_req_delivery_l,
+		QoS_LEVEL_FIXED, &ttu_attr.qos_level_fixed_l,
+		QoS_RAMP_DISABLE, &ttu_attr.qos_ramp_disable_l);
+	REG_GET_3(DCN_SURF1_TTU_CNTL0,
+		REFCYC_PER_REQ_DELIVERY, &ttu_attr.refcyc_per_req_delivery_c,
+		QoS_LEVEL_FIXED, &ttu_attr.qos_level_fixed_c,
+		QoS_RAMP_DISABLE, &ttu_attr.qos_ramp_disable_c);
+	REG_GET_3(DCN_CUR0_TTU_CNTL0,
+		REFCYC_PER_REQ_DELIVERY, &ttu_attr.refcyc_per_req_delivery_cur0,
+		QoS_LEVEL_FIXED, &ttu_attr.qos_level_fixed_cur0,
+		QoS_RAMP_DISABLE, &ttu_attr.qos_ramp_disable_cur0);
+	REG_GET(FLIP_PARAMETERS_1,
+		REFCYC_PER_PTE_GROUP_FLIP_L, &dlg_attr.refcyc_per_pte_group_flip_l);
+	REG_GET(DCN_CUR0_TTU_CNTL1,
+			REFCYC_PER_REQ_DELIVERY_PRE, &ttu_attr.refcyc_per_req_delivery_pre_cur0);
+	REG_GET(DCN_CUR1_TTU_CNTL1,
+			REFCYC_PER_REQ_DELIVERY_PRE, &ttu_attr.refcyc_per_req_delivery_pre_cur1);
+	REG_GET(DCN_SURF0_TTU_CNTL1,
+			REFCYC_PER_REQ_DELIVERY_PRE, &ttu_attr.refcyc_per_req_delivery_pre_l);
+	REG_GET(DCN_SURF1_TTU_CNTL1,
+			REFCYC_PER_REQ_DELIVERY_PRE, &ttu_attr.refcyc_per_req_delivery_pre_c);
+
+	if (ttu_attr.refcyc_per_req_delivery_l != dml_ttu_attr->refcyc_per_req_delivery_l)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF0_TTU_CNTL0:REFCYC_PER_REQ_DELIVERY - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_l, ttu_attr.refcyc_per_req_delivery_l);
+	if (ttu_attr.qos_level_fixed_l != dml_ttu_attr->qos_level_fixed_l)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF0_TTU_CNTL0:QoS_LEVEL_FIXED - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_level_fixed_l, ttu_attr.qos_level_fixed_l);
+	if (ttu_attr.qos_ramp_disable_l != dml_ttu_attr->qos_ramp_disable_l)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF0_TTU_CNTL0:QoS_RAMP_DISABLE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_ramp_disable_l, ttu_attr.qos_ramp_disable_l);
+	if (ttu_attr.refcyc_per_req_delivery_c != dml_ttu_attr->refcyc_per_req_delivery_c)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF1_TTU_CNTL0:REFCYC_PER_REQ_DELIVERY - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_c, ttu_attr.refcyc_per_req_delivery_c);
+	if (ttu_attr.qos_level_fixed_c != dml_ttu_attr->qos_level_fixed_c)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF1_TTU_CNTL0:QoS_LEVEL_FIXED - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_level_fixed_c, ttu_attr.qos_level_fixed_c);
+	if (ttu_attr.qos_ramp_disable_c != dml_ttu_attr->qos_ramp_disable_c)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF1_TTU_CNTL0:QoS_RAMP_DISABLE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_ramp_disable_c, ttu_attr.qos_ramp_disable_c);
+	if (ttu_attr.refcyc_per_req_delivery_cur0 != dml_ttu_attr->refcyc_per_req_delivery_cur0)
+		DC_LOG_DEBUG("DML Validation | DCN_CUR0_TTU_CNTL0:REFCYC_PER_REQ_DELIVERY - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_cur0, ttu_attr.refcyc_per_req_delivery_cur0);
+	if (ttu_attr.qos_level_fixed_cur0 != dml_ttu_attr->qos_level_fixed_cur0)
+		DC_LOG_DEBUG("DML Validation | DCN_CUR0_TTU_CNTL0:QoS_LEVEL_FIXED - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_level_fixed_cur0, ttu_attr.qos_level_fixed_cur0);
+	if (ttu_attr.qos_ramp_disable_cur0 != dml_ttu_attr->qos_ramp_disable_cur0)
+		DC_LOG_DEBUG("DML Validation | DCN_CUR0_TTU_CNTL0:QoS_RAMP_DISABLE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_ramp_disable_cur0, ttu_attr.qos_ramp_disable_cur0);
+	if (dlg_attr.refcyc_per_pte_group_flip_l != dml_dlg_attr->refcyc_per_pte_group_flip_l)
+		DC_LOG_DEBUG("DML Validation | FLIP_PARAMETERS_1:REFCYC_PER_PTE_GROUP_FLIP_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_pte_group_flip_l, dlg_attr.refcyc_per_pte_group_flip_l);
+	if (ttu_attr.refcyc_per_req_delivery_pre_cur0 != dml_ttu_attr->refcyc_per_req_delivery_pre_cur0)
+		DC_LOG_DEBUG("DML Validation | DCN_CUR0_TTU_CNTL1:REFCYC_PER_REQ_DELIVERY_PRE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_pre_cur0, ttu_attr.refcyc_per_req_delivery_pre_cur0);
+	if (ttu_attr.refcyc_per_req_delivery_pre_cur1 != dml_ttu_attr->refcyc_per_req_delivery_pre_cur1)
+		DC_LOG_DEBUG("DML Validation | DCN_CUR1_TTU_CNTL1:REFCYC_PER_REQ_DELIVERY_PRE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_pre_cur1, ttu_attr.refcyc_per_req_delivery_pre_cur1);
+	if (ttu_attr.refcyc_per_req_delivery_pre_l != dml_ttu_attr->refcyc_per_req_delivery_pre_l)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF0_TTU_CNTL1:REFCYC_PER_REQ_DELIVERY_PRE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_pre_l, ttu_attr.refcyc_per_req_delivery_pre_l);
+	if (ttu_attr.refcyc_per_req_delivery_pre_c != dml_ttu_attr->refcyc_per_req_delivery_pre_c)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF1_TTU_CNTL1:REFCYC_PER_REQ_DELIVERY_PRE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_pre_c, ttu_attr.refcyc_per_req_delivery_pre_c);
+}
+
 static struct hubp_funcs dcn20_hubp_funcs = {
 	.hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
 	.hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled,
@@ -1266,6 +1577,7 @@ static struct hubp_funcs dcn20_hubp_funcs = {
 	.hubp_clear_underflow = hubp2_clear_underflow,
 	.hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl,
 	.hubp_init = hubp1_init,
+	.validate_dml_output = hubp2_validate_dml_output,
 };
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h
index d5c8615af45e..8c04a3606a54 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h
@@ -148,7 +148,6 @@
 	uint32_t VMID_SETTINGS_0
 
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #define DCN21_HUBP_REG_COMMON_VARIABLE_LIST \
 	DCN2_HUBP_REG_COMMON_VARIABLE_LIST; \
 	uint32_t FLIP_PARAMETERS_3;\
@@ -157,7 +156,6 @@
 	uint32_t FLIP_PARAMETERS_6;\
 	uint32_t VBLANK_PARAMETERS_5;\
 	uint32_t VBLANK_PARAMETERS_6
-#endif
 
 #define DCN2_HUBP_REG_FIELD_VARIABLE_LIST(type) \
 	DCN_HUBP_REG_FIELD_BASE_LIST(type); \
@@ -184,7 +182,6 @@
 	type SURFACE_TRIPLE_BUFFER_ENABLE;\
 	type VMID
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_1
 #define DCN21_HUBP_REG_FIELD_VARIABLE_LIST(type) \
 	DCN2_HUBP_REG_FIELD_VARIABLE_LIST(type);\
 	type REFCYC_PER_VM_GROUP_FLIP;\
@@ -194,31 +191,18 @@
 	type REFCYC_PER_PTE_GROUP_FLIP_C; \
 	type REFCYC_PER_META_CHUNK_FLIP_C; \
 	type VM_GROUP_SIZE
-#endif
 
 
 struct dcn_hubp2_registers {
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	DCN21_HUBP_REG_COMMON_VARIABLE_LIST;
-#else
-	DCN2_HUBP_REG_COMMON_VARIABLE_LIST;
-#endif
 };
 
 struct dcn_hubp2_shift {
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	DCN21_HUBP_REG_FIELD_VARIABLE_LIST(uint8_t);
-#else
-	DCN2_HUBP_REG_FIELD_VARIABLE_LIST(uint8_t);
-#endif
 };
 
 struct dcn_hubp2_mask {
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	DCN21_HUBP_REG_FIELD_VARIABLE_LIST(uint32_t);
-#else
-	DCN2_HUBP_REG_FIELD_VARIABLE_LIST(uint32_t);
-#endif
 };
 
 struct dcn20_hubp {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 1212da12c414..cfbbaffa8654 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -25,17 +25,15 @@
 #include <linux/delay.h>
 
 #include "dm_services.h"
+#include "basics/dc_common.h"
 #include "dm_helpers.h"
 #include "core_types.h"
 #include "resource.h"
-#include "dcn20/dcn20_resource.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10/dcn10_hw_sequencer.h"
+#include "dcn20_resource.h"
 #include "dcn20_hwseq.h"
 #include "dce/dce_hwseq.h"
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-#include "dcn20/dcn20_dsc.h"
-#endif
+#include "dcn20_dsc.h"
+#include "dcn20_optc.h"
 #include "abm.h"
 #include "clk_mgr.h"
 #include "dmcu.h"
@@ -45,10 +43,9 @@
 #include "ipp.h"
 #include "mpc.h"
 #include "mcif_wb.h"
+#include "dchubbub.h"
 #include "reg_helper.h"
 #include "dcn10/dcn10_cm_common.h"
-#include "dcn10/dcn10_hubbub.h"
-#include "dcn10/dcn10_optc.h"
 #include "dc_link_dp.h"
 #include "vm_helper.h"
 #include "dccg.h"
@@ -64,14 +61,132 @@
 #define FN(reg_name, field_name) \
 	hws->shifts->field_name, hws->masks->field_name
 
-static void dcn20_enable_power_gating_plane(
+static int find_free_gsl_group(const struct dc *dc)
+{
+	if (dc->res_pool->gsl_groups.gsl_0 == 0)
+		return 1;
+	if (dc->res_pool->gsl_groups.gsl_1 == 0)
+		return 2;
+	if (dc->res_pool->gsl_groups.gsl_2 == 0)
+		return 3;
+
+	return 0;
+}
+
+/* NOTE: This is not a generic setup_gsl function (hence the suffix as_lock)
+ * This is only used to lock pipes in pipe splitting case with immediate flip
+ * Ordinary MPC/OTG locks suppress VUPDATE which doesn't help with immediate,
+ * so we get tearing with freesync since we cannot flip multiple pipes
+ * atomically.
+ * We use GSL for this:
+ * - immediate flip: find first available GSL group if not already assigned
+ *                   program gsl with that group, set current OTG as master
+ *                   and always us 0x4 = AND of flip_ready from all pipes
+ * - vsync flip: disable GSL if used
+ *
+ * Groups in stream_res are stored as +1 from HW registers, i.e.
+ * gsl_0 <=> pipe_ctx->stream_res.gsl_group == 1
+ * Using a magic value like -1 would require tracking all inits/resets
+ */
+static void dcn20_setup_gsl_group_as_lock(
+		const struct dc *dc,
+		struct pipe_ctx *pipe_ctx,
+		bool enable)
+{
+	struct gsl_params gsl;
+	int group_idx;
+
+	memset(&gsl, 0, sizeof(struct gsl_params));
+
+	if (enable) {
+		/* return if group already assigned since GSL was set up
+		 * for vsync flip, we would unassign so it can't be "left over"
+		 */
+		if (pipe_ctx->stream_res.gsl_group > 0)
+			return;
+
+		group_idx = find_free_gsl_group(dc);
+		ASSERT(group_idx != 0);
+		pipe_ctx->stream_res.gsl_group = group_idx;
+
+		/* set gsl group reg field and mark resource used */
+		switch (group_idx) {
+		case 1:
+			gsl.gsl0_en = 1;
+			dc->res_pool->gsl_groups.gsl_0 = 1;
+			break;
+		case 2:
+			gsl.gsl1_en = 1;
+			dc->res_pool->gsl_groups.gsl_1 = 1;
+			break;
+		case 3:
+			gsl.gsl2_en = 1;
+			dc->res_pool->gsl_groups.gsl_2 = 1;
+			break;
+		default:
+			BREAK_TO_DEBUGGER();
+			return; // invalid case
+		}
+		gsl.gsl_master_en = 1;
+	} else {
+		group_idx = pipe_ctx->stream_res.gsl_group;
+		if (group_idx == 0)
+			return; // if not in use, just return
+
+		pipe_ctx->stream_res.gsl_group = 0;
+
+		/* unset gsl group reg field and mark resource free */
+		switch (group_idx) {
+		case 1:
+			gsl.gsl0_en = 0;
+			dc->res_pool->gsl_groups.gsl_0 = 0;
+			break;
+		case 2:
+			gsl.gsl1_en = 0;
+			dc->res_pool->gsl_groups.gsl_1 = 0;
+			break;
+		case 3:
+			gsl.gsl2_en = 0;
+			dc->res_pool->gsl_groups.gsl_2 = 0;
+			break;
+		default:
+			BREAK_TO_DEBUGGER();
+			return;
+		}
+		gsl.gsl_master_en = 0;
+	}
+
+	/* at this point we want to program whether it's to enable or disable */
+	if (pipe_ctx->stream_res.tg->funcs->set_gsl != NULL &&
+		pipe_ctx->stream_res.tg->funcs->set_gsl_source_select != NULL) {
+		pipe_ctx->stream_res.tg->funcs->set_gsl(
+			pipe_ctx->stream_res.tg,
+			&gsl);
+
+		pipe_ctx->stream_res.tg->funcs->set_gsl_source_select(
+			pipe_ctx->stream_res.tg, group_idx,	enable ? 4 : 0);
+	} else
+		BREAK_TO_DEBUGGER();
+}
+
+void dcn20_set_flip_control_gsl(
+		struct pipe_ctx *pipe_ctx,
+		bool flip_immediate)
+{
+	if (pipe_ctx && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_control_surface_gsl)
+		pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_control_surface_gsl(
+				pipe_ctx->plane_res.hubp, flip_immediate);
+
+}
+
+void dcn20_enable_power_gating_plane(
 	struct dce_hwseq *hws,
 	bool enable)
 {
-	bool force_on = 1; /* disable power gating */
+	bool force_on = true; /* disable power gating */
 
 	if (enable)
-		force_on = 0;
+		force_on = false;
 
 	/* DCHUBP0/1/2/3/4/5 */
 	REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN0_POWER_FORCEON, force_on);
@@ -128,44 +243,6 @@ void dcn20_dccg_init(struct dce_hwseq *hws)
 	/* This value is dependent on the hardware pipeline delay so set once per SOC */
 	REG_WRITE(DISPCLK_FREQ_CHANGE_CNTL, 0x801003c);
 }
-void dcn20_display_init(struct dc *dc)
-{
-	struct dce_hwseq *hws = dc->hwseq;
-
-	/* RBBMIF
-	 * disable RBBMIF timeout detection for all clients
-	 * Ensure RBBMIF does not drop register accesses due to the per-client timeout
-	 */
-	REG_WRITE(RBBMIF_TIMEOUT_DIS, 0xFFFFFFFF);
-	REG_WRITE(RBBMIF_TIMEOUT_DIS_2, 0xFFFFFFFF);
-
-	/* DCCG */
-	dcn20_dccg_init(hws);
-
-	REG_UPDATE(DC_MEM_GLOBAL_PWR_REQ_CNTL, DC_MEM_GLOBAL_PWR_REQ_DIS, 0);
-
-	/* DCHUB/MMHUBBUB
-	 * set global timer refclk divider
-	 * 100Mhz refclk -> 2
-	 * 27Mhz refclk ->  1
-	 * 48Mhz refclk ->  1
-	 */
-	REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, 2);
-	REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1);
-	REG_WRITE(REFCLK_CNTL, 0);
-
-	/* OPTC
-	 * OTG_CONTROL.OTG_DISABLE_POINT_CNTL = 0x3; will be set during optc2_enable_crtc
-	 */
-
-	/* AZ
-	 * default value is 0x64 for 100Mhz ref clock, if the ref clock is 100Mhz, no need to program this regiser,
-	 * if not, it should be programmed according to the ref clock
-	 */
-	REG_UPDATE(AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, 0x64);
-	/* Enable controller clock gating */
-	REG_WRITE(AZALIA_CONTROLLER_CLOCK_GATING, 0x1);
-}
 
 void dcn20_disable_vga(
 	struct dce_hwseq *hws)
@@ -178,15 +255,15 @@ void dcn20_disable_vga(
 	REG_WRITE(D6VGA_CONTROL, 0);
 }
 
-void dcn20_program_tripleBuffer(
+void dcn20_program_triple_buffer(
 	const struct dc *dc,
 	struct pipe_ctx *pipe_ctx,
-	bool enableTripleBuffer)
+	bool enable_triple_buffer)
 {
 	if (pipe_ctx->plane_res.hubp && pipe_ctx->plane_res.hubp->funcs) {
 		pipe_ctx->plane_res.hubp->funcs->hubp_enable_tripleBuffer(
 			pipe_ctx->plane_res.hubp,
-			enableTripleBuffer);
+			enable_triple_buffer);
 	}
 }
 
@@ -195,6 +272,7 @@ void dcn20_init_blank(
 		struct dc *dc,
 		struct timing_generator *tg)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	enum dc_color_space color_space;
 	struct tg_color black_color = {0};
 	struct output_pixel_processor *opp = NULL;
@@ -225,6 +303,7 @@ void dcn20_init_blank(
 	opp->funcs->opp_set_disp_pattern_generator(
 			opp,
 			CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR,
+			CONTROLLER_DP_COLOR_SPACE_UDEFINED,
 			COLOR_DEPTH_UNDEFINED,
 			&black_color,
 			otg_active_width,
@@ -234,17 +313,17 @@ void dcn20_init_blank(
 		bottom_opp->funcs->opp_set_disp_pattern_generator(
 				bottom_opp,
 				CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR,
+				CONTROLLER_DP_COLOR_SPACE_UDEFINED,
 				COLOR_DEPTH_UNDEFINED,
 				&black_color,
 				otg_active_width,
 				otg_active_height);
 	}
 
-	dcn20_hwss_wait_for_blank_complete(opp);
+	hws->funcs.wait_for_blank_complete(opp);
 }
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-static void dcn20_dsc_pg_control(
+void dcn20_dsc_pg_control(
 		struct dce_hwseq *hws,
 		unsigned int dsc_inst,
 		bool power_on)
@@ -320,9 +399,8 @@ static void dcn20_dsc_pg_control(
 	if (org_ip_request_cntl == 0)
 		REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0);
 }
-#endif
 
-static void dcn20_dpp_pg_control(
+void dcn20_dpp_pg_control(
 		struct dce_hwseq *hws,
 		unsigned int dpp_inst,
 		bool power_on)
@@ -396,7 +474,7 @@ static void dcn20_dpp_pg_control(
 }
 
 
-static void dcn20_hubp_pg_control(
+void dcn20_hubp_pg_control(
 		struct dce_hwseq *hws,
 		unsigned int hubp_inst,
 		bool power_on)
@@ -473,8 +551,9 @@ static void dcn20_hubp_pg_control(
 /* disable HW used by plane.
  * note:  cannot disable until disconnect is complete
  */
-static void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
 	struct dpp *dpp = pipe_ctx->plane_res.dpp;
 
@@ -495,7 +574,7 @@ static void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	hubp->power_gated = true;
 	dc->optimized_required = false; /* We're powering off, no need to optimize */
 
-	dc->hwss.plane_atomic_power_down(dc,
+	hws->funcs.plane_atomic_power_down(dc,
 			pipe_ctx->plane_res.dpp,
 			pipe_ctx->plane_res.hubp);
 
@@ -526,6 +605,7 @@ enum dc_status dcn20_enable_stream_timing(
 		struct dc_state *context,
 		struct dc *dc)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct drr_params params = {0};
 	unsigned int event_triggers = 0;
@@ -585,7 +665,7 @@ enum dc_status dcn20_enable_stream_timing(
 			pipe_ctx->stream_res.opp,
 			true);
 
-	dc->hwss.blank_pixel_data(dc, pipe_ctx, true);
+	hws->funcs.blank_pixel_data(dc, pipe_ctx, true);
 
 	/* VTG is  within DCHUB command block. DCFCLK is always on */
 	if (false == pipe_ctx->stream_res.tg->funcs->enable_crtc(pipe_ctx->stream_res.tg)) {
@@ -593,7 +673,7 @@ enum dc_status dcn20_enable_stream_timing(
 		return DC_ERROR_UNEXPECTED;
 	}
 
-	dcn20_hwss_wait_for_blank_complete(pipe_ctx->stream_res.opp);
+	hws->funcs.wait_for_blank_complete(pipe_ctx->stream_res.opp);
 
 	params.vertical_total_min = stream->adjust.v_total_min;
 	params.vertical_total_max = stream->adjust.v_total_max;
@@ -606,9 +686,13 @@ enum dc_status dcn20_enable_stream_timing(
 	// DRR should set trigger event to monitor surface update event
 	if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0)
 		event_triggers = 0x80;
+	/* Event triggers and num frames initialized for DRR, but can be
+	 * later updated for PSR use. Note DRR trigger events are generated
+	 * regardless of whether num frames met.
+	 */
 	if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control)
 		pipe_ctx->stream_res.tg->funcs->set_static_screen_control(
-				pipe_ctx->stream_res.tg, event_triggers);
+				pipe_ctx->stream_res.tg, event_triggers, 2);
 
 	/* TODO program crtc source select for non-virtual signal*/
 	/* TODO program FMT */
@@ -649,7 +733,7 @@ void dcn20_program_output_csc(struct dc *dc,
 	}
 }
 
-bool dcn20_set_output_transfer_func(struct pipe_ctx *pipe_ctx,
+bool dcn20_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
 				const struct dc_stream_state *stream)
 {
 	int mpcc_id = pipe_ctx->plane_res.hubp->inst;
@@ -688,7 +772,7 @@ bool dcn20_set_output_transfer_func(struct pipe_ctx *pipe_ctx,
 	return true;
 }
 
-static bool dcn20_set_blend_lut(
+bool dcn20_set_blend_lut(
 	struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state)
 {
 	struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
@@ -710,7 +794,7 @@ static bool dcn20_set_blend_lut(
 	return result;
 }
 
-static bool dcn20_set_shaper_3dlut(
+bool dcn20_set_shaper_3dlut(
 	struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state)
 {
 	struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
@@ -736,20 +820,14 @@ static bool dcn20_set_shaper_3dlut(
 	else
 		result = dpp_base->funcs->dpp_program_3dlut(dpp_base, NULL);
 
-	if (plane_state->lut3d_func &&
-		plane_state->lut3d_func->state.bits.initialized == 1 &&
-		plane_state->lut3d_func->hdr_multiplier != 0)
-		dpp_base->funcs->dpp_set_hdr_multiplier(dpp_base,
-				plane_state->lut3d_func->hdr_multiplier);
-	else
-		dpp_base->funcs->dpp_set_hdr_multiplier(dpp_base, 0x1f000);
-
 	return result;
 }
 
-bool dcn20_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
-					  const struct dc_plane_state *plane_state)
+bool dcn20_set_input_transfer_func(struct dc *dc,
+				struct pipe_ctx *pipe_ctx,
+				const struct dc_plane_state *plane_state)
 {
+	struct dce_hwseq *hws = dc->hwseq;
 	struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
 	const struct dc_transfer_func *tf = NULL;
 	bool result = true;
@@ -758,8 +836,8 @@ bool dcn20_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
 	if (dpp_base == NULL || plane_state == NULL)
 		return false;
 
-	dcn20_set_shaper_3dlut(pipe_ctx, plane_state);
-	dcn20_set_blend_lut(pipe_ctx, plane_state);
+	hws->funcs.set_shaper_3dlut(pipe_ctx, plane_state);
+	hws->funcs.set_blend_lut(pipe_ctx, plane_state);
 
 	if (plane_state->in_transfer_func)
 		tf = plane_state->in_transfer_func;
@@ -804,6 +882,11 @@ bool dcn20_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
 					IPP_DEGAMMA_MODE_BYPASS);
 			break;
 		case TRANSFER_FUNCTION_PQ:
+			dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_USER_PWL);
+			cm_helper_translate_curve_to_degamma_hw_format(tf, &dpp_base->degamma_params);
+			dpp_base->funcs->dpp_program_degamma_pwl(dpp_base, &dpp_base->degamma_params);
+			result = true;
+			break;
 		default:
 			result = false;
 			break;
@@ -824,7 +907,7 @@ bool dcn20_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
 	return result;
 }
 
-static void dcn20_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
+void dcn20_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
 {
 	struct pipe_ctx *odm_pipe;
 	int opp_cnt = 1;
@@ -855,12 +938,16 @@ void dcn20_blank_pixel_data(
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	enum dc_color_space color_space = stream->output_color_space;
 	enum controller_dp_test_pattern test_pattern = CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR;
+	enum controller_dp_color_space test_pattern_color_space = CONTROLLER_DP_COLOR_SPACE_UDEFINED;
 	struct pipe_ctx *odm_pipe;
 	int odm_cnt = 1;
 
 	int width = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
 	int height = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
 
+	if (stream->link->test_pattern_enabled)
+		return;
+
 	/* get opp dpg blank color */
 	color_space_to_black_color(dc, color_space, &black_color);
 
@@ -873,8 +960,10 @@ void dcn20_blank_pixel_data(
 		if (stream_res->abm)
 			stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm);
 
-		if (dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE)
+		if (dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE) {
 			test_pattern = CONTROLLER_DP_TEST_PATTERN_COLORSQUARES;
+			test_pattern_color_space = CONTROLLER_DP_COLOR_SPACE_RGB;
+		}
 	} else {
 		test_pattern = CONTROLLER_DP_TEST_PATTERN_VIDEOMODE;
 	}
@@ -882,6 +971,7 @@ void dcn20_blank_pixel_data(
 	stream_res->opp->funcs->opp_set_disp_pattern_generator(
 			stream_res->opp,
 			test_pattern,
+			test_pattern_color_space,
 			stream->timing.display_color_depth,
 			&black_color,
 			width,
@@ -892,6 +982,7 @@ void dcn20_blank_pixel_data(
 				odm_pipe->stream_res.opp,
 				dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE && blank ?
 						CONTROLLER_DP_TEST_PATTERN_COLORRAMP : test_pattern,
+				test_pattern_color_space,
 				stream->timing.display_color_depth,
 				&black_color,
 				width,
@@ -999,72 +1090,6 @@ void dcn20_enable_plane(
 }
 
 
-static void dcn20_program_pipe(
-		struct dc *dc,
-		struct pipe_ctx *pipe_ctx,
-		struct dc_state *context)
-{
-	pipe_ctx->plane_state->update_flags.bits.full_update =
-			context->commit_hints.full_update_needed ? 1 : pipe_ctx->plane_state->update_flags.bits.full_update;
-
-	if (pipe_ctx->plane_state->update_flags.bits.full_update)
-		dcn20_enable_plane(dc, pipe_ctx, context);
-
-	update_dchubp_dpp(dc, pipe_ctx, context);
-
-	set_hdr_multiplier(pipe_ctx);
-
-	if (pipe_ctx->plane_state->update_flags.bits.full_update ||
-			pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
-			pipe_ctx->plane_state->update_flags.bits.gamma_change)
-		dc->hwss.set_input_transfer_func(pipe_ctx, pipe_ctx->plane_state);
-
-	/* dcn10_translate_regamma_to_hw_format takes 750us to finish
-	 * only do gamma programming for full update.
-	 * TODO: This can be further optimized/cleaned up
-	 * Always call this for now since it does memcmp inside before
-	 * doing heavy calculation and programming
-	 */
-	if (pipe_ctx->plane_state->update_flags.bits.full_update)
-		dc->hwss.set_output_transfer_func(pipe_ctx, pipe_ctx->stream);
-}
-
-static void dcn20_program_all_pipe_in_tree(
-		struct dc *dc,
-		struct pipe_ctx *pipe_ctx,
-		struct dc_state *context)
-{
-	if (pipe_ctx->top_pipe == NULL && !pipe_ctx->prev_odm_pipe) {
-		bool blank = !is_pipe_tree_visible(pipe_ctx);
-
-		pipe_ctx->stream_res.tg->funcs->program_global_sync(
-				pipe_ctx->stream_res.tg,
-				pipe_ctx->pipe_dlg_param.vready_offset,
-				pipe_ctx->pipe_dlg_param.vstartup_start,
-				pipe_ctx->pipe_dlg_param.vupdate_offset,
-				pipe_ctx->pipe_dlg_param.vupdate_width);
-
-		pipe_ctx->stream_res.tg->funcs->set_vtg_params(
-				pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
-
-		dc->hwss.blank_pixel_data(dc, pipe_ctx, blank);
-
-		if (dc->hwss.update_odm)
-			dc->hwss.update_odm(dc, context, pipe_ctx);
-	}
-
-	if (pipe_ctx->plane_state != NULL)
-		dcn20_program_pipe(dc, pipe_ctx, context);
-
-	if (pipe_ctx->bottom_pipe != NULL) {
-		ASSERT(pipe_ctx->bottom_pipe != pipe_ctx);
-		dcn20_program_all_pipe_in_tree(dc, pipe_ctx->bottom_pipe, context);
-	} else if (pipe_ctx->next_odm_pipe != NULL) {
-		ASSERT(pipe_ctx->next_odm_pipe != pipe_ctx);
-		dcn20_program_all_pipe_in_tree(dc, pipe_ctx->next_odm_pipe, context);
-	}
-}
-
 void dcn20_pipe_control_lock_global(
 		struct dc *dc,
 		struct pipe_ctx *pipe,
@@ -1103,6 +1128,25 @@ void dcn20_pipe_control_lock(
 	if (pipe->plane_state != NULL)
 		flip_immediate = pipe->plane_state->flip_immediate;
 
+	if (flip_immediate && lock) {
+		const int TIMEOUT_FOR_FLIP_PENDING = 100000;
+		int i;
+
+		for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) {
+			if (!pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->plane_res.hubp))
+				break;
+			udelay(1);
+		}
+
+		if (pipe->bottom_pipe != NULL) {
+			for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) {
+				if (!pipe->bottom_pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->bottom_pipe->plane_res.hubp))
+					break;
+				udelay(1);
+			}
+		}
+	}
+
 	/* In flip immediate and pipe splitting case, we need to use GSL
 	 * for synchronization. Only do setup on locking and on flip type change.
 	 */
@@ -1124,114 +1168,469 @@ void dcn20_pipe_control_lock(
 	}
 }
 
-static void dcn20_apply_ctx_for_surface(
-		struct dc *dc,
-		const struct dc_stream_state *stream,
-		int num_planes,
-		struct dc_state *context)
+static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe)
 {
-	const unsigned int TIMEOUT_FOR_PIPE_ENABLE_MS = 100;
-	int i;
-	struct timing_generator *tg;
-	bool removed_pipe[6] = { false };
-	bool interdependent_update = false;
-	struct pipe_ctx *top_pipe_to_program =
-			find_top_pipe_for_stream(dc, context, stream);
-	struct pipe_ctx *prev_top_pipe_to_program =
-			find_top_pipe_for_stream(dc, dc->current_state, stream);
-	DC_LOGGER_INIT(dc->ctx->logger);
+	new_pipe->update_flags.raw = 0;
 
-	if (!top_pipe_to_program)
+	/* Exit on unchanged, unused pipe */
+	if (!old_pipe->plane_state && !new_pipe->plane_state)
 		return;
+	/* Detect pipe enable/disable */
+	if (!old_pipe->plane_state && new_pipe->plane_state) {
+		new_pipe->update_flags.bits.enable = 1;
+		new_pipe->update_flags.bits.mpcc = 1;
+		new_pipe->update_flags.bits.dppclk = 1;
+		new_pipe->update_flags.bits.hubp_interdependent = 1;
+		new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
+		new_pipe->update_flags.bits.gamut_remap = 1;
+		new_pipe->update_flags.bits.scaler = 1;
+		new_pipe->update_flags.bits.viewport = 1;
+		if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) {
+			new_pipe->update_flags.bits.odm = 1;
+			new_pipe->update_flags.bits.global_sync = 1;
+		}
+		return;
+	}
+	if (old_pipe->plane_state && !new_pipe->plane_state) {
+		new_pipe->update_flags.bits.disable = 1;
+		return;
+	}
 
-	/* Carry over GSL groups in case the context is changing. */
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-		struct pipe_ctx *old_pipe_ctx =
-			&dc->current_state->res_ctx.pipe_ctx[i];
-
-		if (pipe_ctx->stream == stream &&
-		    pipe_ctx->stream == old_pipe_ctx->stream)
-			pipe_ctx->stream_res.gsl_group =
-				old_pipe_ctx->stream_res.gsl_group;
+	/* Detect top pipe only changes */
+	if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) {
+		/* Detect odm changes */
+		if ((old_pipe->next_odm_pipe && new_pipe->next_odm_pipe
+			&& old_pipe->next_odm_pipe->pipe_idx != new_pipe->next_odm_pipe->pipe_idx)
+				|| (!old_pipe->next_odm_pipe && new_pipe->next_odm_pipe)
+				|| (old_pipe->next_odm_pipe && !new_pipe->next_odm_pipe)
+				|| old_pipe->stream_res.opp != new_pipe->stream_res.opp)
+			new_pipe->update_flags.bits.odm = 1;
+
+		/* Detect global sync changes */
+		if (old_pipe->pipe_dlg_param.vready_offset != new_pipe->pipe_dlg_param.vready_offset
+				|| old_pipe->pipe_dlg_param.vstartup_start != new_pipe->pipe_dlg_param.vstartup_start
+				|| old_pipe->pipe_dlg_param.vupdate_offset != new_pipe->pipe_dlg_param.vupdate_offset
+				|| old_pipe->pipe_dlg_param.vupdate_width != new_pipe->pipe_dlg_param.vupdate_width)
+			new_pipe->update_flags.bits.global_sync = 1;
 	}
 
-	tg = top_pipe_to_program->stream_res.tg;
+	/*
+	 * Detect opp / tg change, only set on change, not on enable
+	 * Assume mpcc inst = pipe index, if not this code needs to be updated
+	 * since mpcc is what is affected by these. In fact all of our sequence
+	 * makes this assumption at the moment with how hubp reset is matched to
+	 * same index mpcc reset.
+	 */
+	if (old_pipe->stream_res.opp != new_pipe->stream_res.opp)
+		new_pipe->update_flags.bits.opp_changed = 1;
+	if (old_pipe->stream_res.tg != new_pipe->stream_res.tg)
+		new_pipe->update_flags.bits.tg_changed = 1;
+
+	/* Detect mpcc blending changes, only dpp inst and bot matter here */
+	if (old_pipe->plane_res.dpp != new_pipe->plane_res.dpp
+			|| old_pipe->stream_res.opp != new_pipe->stream_res.opp
+			|| (!old_pipe->bottom_pipe && new_pipe->bottom_pipe)
+			|| (old_pipe->bottom_pipe && !new_pipe->bottom_pipe)
+			|| (old_pipe->bottom_pipe && new_pipe->bottom_pipe
+				&& old_pipe->bottom_pipe->plane_res.mpcc_inst
+					!= new_pipe->bottom_pipe->plane_res.mpcc_inst))
+		new_pipe->update_flags.bits.mpcc = 1;
+
+	/* Detect dppclk change */
+	if (old_pipe->plane_res.bw.dppclk_khz != new_pipe->plane_res.bw.dppclk_khz)
+		new_pipe->update_flags.bits.dppclk = 1;
+
+	/* Check for scl update */
+	if (memcmp(&old_pipe->plane_res.scl_data, &new_pipe->plane_res.scl_data, sizeof(struct scaler_data)))
+			new_pipe->update_flags.bits.scaler = 1;
+	/* Check for vp update */
+	if (memcmp(&old_pipe->plane_res.scl_data.viewport, &new_pipe->plane_res.scl_data.viewport, sizeof(struct rect))
+			|| memcmp(&old_pipe->plane_res.scl_data.viewport_c,
+				&new_pipe->plane_res.scl_data.viewport_c, sizeof(struct rect)))
+		new_pipe->update_flags.bits.viewport = 1;
+
+	/* Detect dlg/ttu/rq updates */
+	{
+		struct _vcs_dpi_display_dlg_regs_st old_dlg_attr = old_pipe->dlg_regs;
+		struct _vcs_dpi_display_ttu_regs_st old_ttu_attr = old_pipe->ttu_regs;
+		struct _vcs_dpi_display_dlg_regs_st *new_dlg_attr = &new_pipe->dlg_regs;
+		struct _vcs_dpi_display_ttu_regs_st *new_ttu_attr = &new_pipe->ttu_regs;
+
+		/* Detect pipe interdependent updates */
+		if (old_dlg_attr.dst_y_prefetch != new_dlg_attr->dst_y_prefetch ||
+				old_dlg_attr.vratio_prefetch != new_dlg_attr->vratio_prefetch ||
+				old_dlg_attr.vratio_prefetch_c != new_dlg_attr->vratio_prefetch_c ||
+				old_dlg_attr.dst_y_per_vm_vblank != new_dlg_attr->dst_y_per_vm_vblank ||
+				old_dlg_attr.dst_y_per_row_vblank != new_dlg_attr->dst_y_per_row_vblank ||
+				old_dlg_attr.dst_y_per_vm_flip != new_dlg_attr->dst_y_per_vm_flip ||
+				old_dlg_attr.dst_y_per_row_flip != new_dlg_attr->dst_y_per_row_flip ||
+				old_dlg_attr.refcyc_per_meta_chunk_vblank_l != new_dlg_attr->refcyc_per_meta_chunk_vblank_l ||
+				old_dlg_attr.refcyc_per_meta_chunk_vblank_c != new_dlg_attr->refcyc_per_meta_chunk_vblank_c ||
+				old_dlg_attr.refcyc_per_meta_chunk_flip_l != new_dlg_attr->refcyc_per_meta_chunk_flip_l ||
+				old_dlg_attr.refcyc_per_line_delivery_pre_l != new_dlg_attr->refcyc_per_line_delivery_pre_l ||
+				old_dlg_attr.refcyc_per_line_delivery_pre_c != new_dlg_attr->refcyc_per_line_delivery_pre_c ||
+				old_ttu_attr.refcyc_per_req_delivery_pre_l != new_ttu_attr->refcyc_per_req_delivery_pre_l ||
+				old_ttu_attr.refcyc_per_req_delivery_pre_c != new_ttu_attr->refcyc_per_req_delivery_pre_c ||
+				old_ttu_attr.refcyc_per_req_delivery_pre_cur0 != new_ttu_attr->refcyc_per_req_delivery_pre_cur0 ||
+				old_ttu_attr.refcyc_per_req_delivery_pre_cur1 != new_ttu_attr->refcyc_per_req_delivery_pre_cur1 ||
+				old_ttu_attr.min_ttu_vblank != new_ttu_attr->min_ttu_vblank ||
+				old_ttu_attr.qos_level_flip != new_ttu_attr->qos_level_flip) {
+			old_dlg_attr.dst_y_prefetch = new_dlg_attr->dst_y_prefetch;
+			old_dlg_attr.vratio_prefetch = new_dlg_attr->vratio_prefetch;
+			old_dlg_attr.vratio_prefetch_c = new_dlg_attr->vratio_prefetch_c;
+			old_dlg_attr.dst_y_per_vm_vblank = new_dlg_attr->dst_y_per_vm_vblank;
+			old_dlg_attr.dst_y_per_row_vblank = new_dlg_attr->dst_y_per_row_vblank;
+			old_dlg_attr.dst_y_per_vm_flip = new_dlg_attr->dst_y_per_vm_flip;
+			old_dlg_attr.dst_y_per_row_flip = new_dlg_attr->dst_y_per_row_flip;
+			old_dlg_attr.refcyc_per_meta_chunk_vblank_l = new_dlg_attr->refcyc_per_meta_chunk_vblank_l;
+			old_dlg_attr.refcyc_per_meta_chunk_vblank_c = new_dlg_attr->refcyc_per_meta_chunk_vblank_c;
+			old_dlg_attr.refcyc_per_meta_chunk_flip_l = new_dlg_attr->refcyc_per_meta_chunk_flip_l;
+			old_dlg_attr.refcyc_per_line_delivery_pre_l = new_dlg_attr->refcyc_per_line_delivery_pre_l;
+			old_dlg_attr.refcyc_per_line_delivery_pre_c = new_dlg_attr->refcyc_per_line_delivery_pre_c;
+			old_ttu_attr.refcyc_per_req_delivery_pre_l = new_ttu_attr->refcyc_per_req_delivery_pre_l;
+			old_ttu_attr.refcyc_per_req_delivery_pre_c = new_ttu_attr->refcyc_per_req_delivery_pre_c;
+			old_ttu_attr.refcyc_per_req_delivery_pre_cur0 = new_ttu_attr->refcyc_per_req_delivery_pre_cur0;
+			old_ttu_attr.refcyc_per_req_delivery_pre_cur1 = new_ttu_attr->refcyc_per_req_delivery_pre_cur1;
+			old_ttu_attr.min_ttu_vblank = new_ttu_attr->min_ttu_vblank;
+			old_ttu_attr.qos_level_flip = new_ttu_attr->qos_level_flip;
+			new_pipe->update_flags.bits.hubp_interdependent = 1;
+		}
+		/* Detect any other updates to ttu/rq/dlg */
+		if (memcmp(&old_dlg_attr, &new_pipe->dlg_regs, sizeof(old_dlg_attr)) ||
+				memcmp(&old_ttu_attr, &new_pipe->ttu_regs, sizeof(old_ttu_attr)) ||
+				memcmp(&old_pipe->rq_regs, &new_pipe->rq_regs, sizeof(old_pipe->rq_regs)))
+			new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
+	}
+}
 
-	interdependent_update = top_pipe_to_program->plane_state &&
-		top_pipe_to_program->plane_state->update_flags.bits.full_update;
+static void dcn20_update_dchubp_dpp(
+	struct dc *dc,
+	struct pipe_ctx *pipe_ctx,
+	struct dc_state *context)
+{
+	struct dce_hwseq *hws = dc->hwseq;
+	struct hubp *hubp = pipe_ctx->plane_res.hubp;
+	struct dpp *dpp = pipe_ctx->plane_res.dpp;
+	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
+	bool viewport_changed = false;
 
-	if (interdependent_update)
-		lock_all_pipes(dc, context, true);
-	else
-		dcn20_pipe_control_lock(dc, top_pipe_to_program, true);
+	if (pipe_ctx->update_flags.bits.dppclk)
+		dpp->funcs->dpp_dppclk_control(dpp, false, true);
 
-	if (num_planes == 0) {
-		/* OTG blank before remove all front end */
-		dc->hwss.blank_pixel_data(dc, top_pipe_to_program, true);
+	/* TODO: Need input parameter to tell current DCHUB pipe tie to which OTG
+	 * VTG is within DCHUBBUB which is commond block share by each pipe HUBP.
+	 * VTG is 1:1 mapping with OTG. Each pipe HUBP will select which VTG
+	 */
+	if (pipe_ctx->update_flags.bits.hubp_rq_dlg_ttu) {
+		hubp->funcs->hubp_vtg_sel(hubp, pipe_ctx->stream_res.tg->inst);
+
+		hubp->funcs->hubp_setup(
+			hubp,
+			&pipe_ctx->dlg_regs,
+			&pipe_ctx->ttu_regs,
+			&pipe_ctx->rq_regs,
+			&pipe_ctx->pipe_dlg_param);
+	}
+	if (pipe_ctx->update_flags.bits.hubp_interdependent)
+		hubp->funcs->hubp_setup_interdependent(
+			hubp,
+			&pipe_ctx->dlg_regs,
+			&pipe_ctx->ttu_regs);
+
+	if (pipe_ctx->update_flags.bits.enable ||
+			plane_state->update_flags.bits.bpp_change ||
+			plane_state->update_flags.bits.input_csc_change ||
+			plane_state->update_flags.bits.color_space_change ||
+			plane_state->update_flags.bits.coeff_reduction_change) {
+		struct dc_bias_and_scale bns_params = {0};
+
+		// program the input csc
+		dpp->funcs->dpp_setup(dpp,
+				plane_state->format,
+				EXPANSION_MODE_ZERO,
+				plane_state->input_csc_color_matrix,
+				plane_state->color_space,
+				NULL);
+
+		if (dpp->funcs->dpp_program_bias_and_scale) {
+			//TODO :for CNVC set scale and bias registers if necessary
+			build_prescale_params(&bns_params, plane_state);
+			dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params);
+		}
 	}
 
-	/* Disconnect unused mpcc */
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-		struct pipe_ctx *old_pipe_ctx =
-				&dc->current_state->res_ctx.pipe_ctx[i];
-		/*
-		 * Powergate reused pipes that are not powergated
-		 * fairly hacky right now, using opp_id as indicator
-		 * TODO: After move dc_post to dc_update, this will
-		 * be removed.
-		 */
-		if (pipe_ctx->plane_state && !old_pipe_ctx->plane_state) {
-			if (old_pipe_ctx->stream_res.tg == tg &&
-			    old_pipe_ctx->plane_res.hubp &&
-			    old_pipe_ctx->plane_res.hubp->opp_id != OPP_ID_INVALID)
-				dc->hwss.disable_plane(dc, old_pipe_ctx);
+	if (pipe_ctx->update_flags.bits.mpcc
+			|| plane_state->update_flags.bits.global_alpha_change
+			|| plane_state->update_flags.bits.per_pixel_alpha_change) {
+		// MPCC inst is equal to pipe index in practice
+		int mpcc_inst = hubp->inst;
+		int opp_inst;
+		int opp_count = dc->res_pool->pipe_count;
+
+		for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
+			if (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst]) {
+				dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
+				dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
+				break;
+			}
 		}
+		hws->funcs.update_mpcc(dc, pipe_ctx);
+	}
 
-		if ((!pipe_ctx->plane_state ||
-		     pipe_ctx->stream_res.tg != old_pipe_ctx->stream_res.tg) &&
-		     old_pipe_ctx->plane_state &&
-		     old_pipe_ctx->stream_res.tg == tg) {
+	if (pipe_ctx->update_flags.bits.scaler ||
+			plane_state->update_flags.bits.scaling_change ||
+			plane_state->update_flags.bits.position_change ||
+			plane_state->update_flags.bits.per_pixel_alpha_change ||
+			pipe_ctx->stream->update_flags.bits.scaling) {
+		pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->plane_state->per_pixel_alpha;
+		ASSERT(pipe_ctx->plane_res.scl_data.lb_params.depth == LB_PIXEL_DEPTH_30BPP);
+		/* scaler configuration */
+		pipe_ctx->plane_res.dpp->funcs->dpp_set_scaler(
+				pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data);
+	}
 
-			dc->hwss.plane_atomic_disconnect(dc, old_pipe_ctx);
-			removed_pipe[i] = true;
+	if (pipe_ctx->update_flags.bits.viewport ||
+			(context == dc->current_state && plane_state->update_flags.bits.scaling_change) ||
+			(context == dc->current_state && pipe_ctx->stream->update_flags.bits.scaling)) {
 
-			DC_LOG_DC("Reset mpcc for pipe %d\n",
-					old_pipe_ctx->pipe_idx);
-		}
+		hubp->funcs->mem_program_viewport(
+			hubp,
+			&pipe_ctx->plane_res.scl_data.viewport,
+			&pipe_ctx->plane_res.scl_data.viewport_c);
+		viewport_changed = true;
 	}
 
-	if (num_planes > 0)
-		dcn20_program_all_pipe_in_tree(dc, top_pipe_to_program, context);
+	/* Any updates are handled in dc interface, just need to apply existing for plane enable */
+	if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed ||
+			pipe_ctx->update_flags.bits.scaler || pipe_ctx->update_flags.bits.viewport)
+			&& pipe_ctx->stream->cursor_attributes.address.quad_part != 0) {
+		dc->hwss.set_cursor_position(pipe_ctx);
+		dc->hwss.set_cursor_attribute(pipe_ctx);
 
-	/* Program secondary blending tree and writeback pipes */
-	if ((stream->num_wb_info > 0) && (dc->hwss.program_all_writeback_pipes_in_tree))
-		dc->hwss.program_all_writeback_pipes_in_tree(dc, stream, context);
+		if (dc->hwss.set_cursor_sdr_white_level)
+			dc->hwss.set_cursor_sdr_white_level(pipe_ctx);
+	}
 
-	if (interdependent_update)
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+	/* Any updates are handled in dc interface, just need
+	 * to apply existing for plane enable / opp change */
+	if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed
+			|| pipe_ctx->stream->update_flags.bits.gamut_remap
+			|| pipe_ctx->stream->update_flags.bits.out_csc) {
+			/* dpp/cm gamut remap*/
+			dc->hwss.program_gamut_remap(pipe_ctx);
+
+		/*call the dcn2 method which uses mpc csc*/
+		dc->hwss.program_output_csc(dc,
+				pipe_ctx,
+				pipe_ctx->stream->output_color_space,
+				pipe_ctx->stream->csc_color_matrix.matrix,
+				hubp->opp_id);
+	}
+
+	if (pipe_ctx->update_flags.bits.enable ||
+			pipe_ctx->update_flags.bits.opp_changed ||
+			plane_state->update_flags.bits.pixel_format_change ||
+			plane_state->update_flags.bits.horizontal_mirror_change ||
+			plane_state->update_flags.bits.rotation_change ||
+			plane_state->update_flags.bits.swizzle_change ||
+			plane_state->update_flags.bits.dcc_change ||
+			plane_state->update_flags.bits.bpp_change ||
+			plane_state->update_flags.bits.scaling_change ||
+			plane_state->update_flags.bits.plane_size_change) {
+		struct plane_size size = plane_state->plane_size;
+
+		size.surface_size = pipe_ctx->plane_res.scl_data.viewport;
+		hubp->funcs->hubp_program_surface_config(
+			hubp,
+			plane_state->format,
+			&plane_state->tiling_info,
+			&size,
+			plane_state->rotation,
+			&plane_state->dcc,
+			plane_state->horizontal_mirror,
+			0);
+		hubp->power_gated = false;
+	}
+
+	if (hubp->funcs->apply_PLAT_54186_wa && viewport_changed)
+		hubp->funcs->apply_PLAT_54186_wa(hubp, &plane_state->address);
+
+	if (pipe_ctx->update_flags.bits.enable || plane_state->update_flags.bits.addr_update)
+		hws->funcs.update_plane_addr(dc, pipe_ctx);
+
+
+
+	if (pipe_ctx->update_flags.bits.enable)
+		hubp->funcs->set_blank(hubp, false);
+}
+
+
+static void dcn20_program_pipe(
+		struct dc *dc,
+		struct pipe_ctx *pipe_ctx,
+		struct dc_state *context)
+{
+	struct dce_hwseq *hws = dc->hwseq;
+	/* Only need to unblank on top pipe */
+	if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.abm_level)
+			&& !pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe)
+		hws->funcs.blank_pixel_data(dc, pipe_ctx, !pipe_ctx->plane_state->visible);
+
+	if (pipe_ctx->update_flags.bits.global_sync) {
+		pipe_ctx->stream_res.tg->funcs->program_global_sync(
+				pipe_ctx->stream_res.tg,
+				pipe_ctx->pipe_dlg_param.vready_offset,
+				pipe_ctx->pipe_dlg_param.vstartup_start,
+				pipe_ctx->pipe_dlg_param.vupdate_offset,
+				pipe_ctx->pipe_dlg_param.vupdate_width);
+
+		pipe_ctx->stream_res.tg->funcs->set_vtg_params(
+				pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
+
+		if (hws->funcs.setup_vupdate_interrupt)
+			hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
+	}
+
+	if (pipe_ctx->update_flags.bits.odm)
+		hws->funcs.update_odm(dc, context, pipe_ctx);
+
+	if (pipe_ctx->update_flags.bits.enable)
+		dcn20_enable_plane(dc, pipe_ctx, context);
+
+	if (pipe_ctx->update_flags.raw || pipe_ctx->plane_state->update_flags.raw || pipe_ctx->stream->update_flags.raw)
+		dcn20_update_dchubp_dpp(dc, pipe_ctx, context);
+
+	if (pipe_ctx->update_flags.bits.enable
+			|| pipe_ctx->plane_state->update_flags.bits.hdr_mult)
+		hws->funcs.set_hdr_multiplier(pipe_ctx);
+
+	if (pipe_ctx->update_flags.bits.enable ||
+			pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
+			pipe_ctx->plane_state->update_flags.bits.gamma_change)
+		hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
+
+	/* dcn10_translate_regamma_to_hw_format takes 750us to finish
+	 * only do gamma programming for powering on, internal memcmp to avoid
+	 * updating on slave planes
+	 */
+	if (pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.out_tf)
+		hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
+
+	/* If the pipe has been enabled or has a different opp, we
+	 * should reprogram the fmt. This deals with cases where
+	 * interation between mpc and odm combine on different streams
+	 * causes a different pipe to be chosen to odm combine with.
+	 */
+	if (pipe_ctx->update_flags.bits.enable
+	    || pipe_ctx->update_flags.bits.opp_changed) {
+
+		pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion(
+			pipe_ctx->stream_res.opp,
+			COLOR_SPACE_YCBCR601,
+			pipe_ctx->stream->timing.display_color_depth,
+			pipe_ctx->stream->signal);
+
+		pipe_ctx->stream_res.opp->funcs->opp_program_fmt(
+			pipe_ctx->stream_res.opp,
+			&pipe_ctx->stream->bit_depth_params,
+			&pipe_ctx->stream->clamping);
+	}
+}
+
+static bool does_pipe_need_lock(struct pipe_ctx *pipe)
+{
+	if ((pipe->plane_state && pipe->plane_state->update_flags.raw)
+			|| pipe->update_flags.raw)
+		return true;
+	if (pipe->bottom_pipe)
+		return does_pipe_need_lock(pipe->bottom_pipe);
+
+	return false;
+}
+
+void dcn20_program_front_end_for_ctx(
+		struct dc *dc,
+		struct dc_state *context)
+{
+	const unsigned int TIMEOUT_FOR_PIPE_ENABLE_MS = 100;
+	int i;
+	struct dce_hwseq *hws = dc->hwseq;
+	bool pipe_locked[MAX_PIPES] = {false};
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	/* Carry over GSL groups in case the context is changing. */
+	for (i = 0; i < dc->res_pool->pipe_count; i++)
+		if (context->res_ctx.pipe_ctx[i].stream == dc->current_state->res_ctx.pipe_ctx[i].stream)
+			context->res_ctx.pipe_ctx[i].stream_res.gsl_group =
+				dc->current_state->res_ctx.pipe_ctx[i].stream_res.gsl_group;
+
+	/* Set pipe update flags and lock pipes */
+	for (i = 0; i < dc->res_pool->pipe_count; i++)
+		dcn20_detect_pipe_changes(&dc->current_state->res_ctx.pipe_ctx[i],
+				&context->res_ctx.pipe_ctx[i]);
+	for (i = 0; i < dc->res_pool->pipe_count; i++)
+		if (!context->res_ctx.pipe_ctx[i].top_pipe &&
+				does_pipe_need_lock(&context->res_ctx.pipe_ctx[i])) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-			/* Skip inactive pipes and ones already updated */
-			if (!pipe_ctx->stream || pipe_ctx->stream == stream ||
-			    !pipe_ctx->plane_state || !tg->funcs->is_tg_enabled(tg))
-				continue;
+			if (pipe_ctx->update_flags.bits.tg_changed || pipe_ctx->update_flags.bits.enable)
+				dc->hwss.pipe_control_lock(dc, pipe_ctx, true);
+			if (!pipe_ctx->update_flags.bits.enable)
+				dc->hwss.pipe_control_lock(dc, &dc->current_state->res_ctx.pipe_ctx[i], true);
+			pipe_locked[i] = true;
+		}
 
-			pipe_ctx->plane_res.hubp->funcs->hubp_setup_interdependent(
-				pipe_ctx->plane_res.hubp,
-				&pipe_ctx->dlg_regs,
-				&pipe_ctx->ttu_regs);
+	/* OTG blank before disabling all front ends */
+	for (i = 0; i < dc->res_pool->pipe_count; i++)
+		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
+				&& !context->res_ctx.pipe_ctx[i].top_pipe
+				&& !context->res_ctx.pipe_ctx[i].prev_odm_pipe
+				&& context->res_ctx.pipe_ctx[i].stream)
+			hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true);
+
+	/* Disconnect mpcc */
+	for (i = 0; i < dc->res_pool->pipe_count; i++)
+		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
+				|| context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) {
+			hws->funcs.plane_atomic_disconnect(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+			DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx);
 		}
 
-	if (interdependent_update)
-		lock_all_pipes(dc, context, false);
-	else
-		dcn20_pipe_control_lock(dc, top_pipe_to_program, false);
+	/*
+	 * Program all updated pipes, order matters for mpcc setup. Start with
+	 * top pipe and program all pipes that follow in order
+	 */
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (pipe->plane_state && !pipe->top_pipe) {
+			while (pipe) {
+				dcn20_program_pipe(dc, pipe, context);
+				pipe = pipe->bottom_pipe;
+			}
+			/* Program secondary blending tree and writeback pipes */
+			pipe = &context->res_ctx.pipe_ctx[i];
+			if (!pipe->prev_odm_pipe && pipe->stream->num_wb_info > 0
+					&& (pipe->update_flags.raw || pipe->plane_state->update_flags.raw || pipe->stream->update_flags.raw)
+					&& hws->funcs.program_all_writeback_pipes_in_tree)
+				hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context);
+		}
+	}
 
+	/* Unlock all locked pipes */
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
-		if (removed_pipe[i])
-			dcn20_disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+		if (pipe_locked[i]) {
+			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+			if (pipe_ctx->update_flags.bits.tg_changed || pipe_ctx->update_flags.bits.enable)
+				dc->hwss.pipe_control_lock(dc, pipe_ctx, false);
+			if (!pipe_ctx->update_flags.bits.enable)
+				dc->hwss.pipe_control_lock(dc, &dc->current_state->res_ctx.pipe_ctx[i], false);
+		}
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++)
+		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
+			dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
 
 	/*
 	 * If we are enabling a pipe, we need to wait for pending clear as this is a critical
@@ -1239,15 +1638,22 @@ static void dcn20_apply_ctx_for_surface(
 	 * will cause HW to perform an "immediate enable" (as opposed to "vsync enable") which
 	 * is unsupported on DCN.
 	 */
-	i = 0;
-	if (num_planes > 0 && top_pipe_to_program &&
-			(prev_top_pipe_to_program == NULL || prev_top_pipe_to_program->plane_state == NULL)) {
-		while (i < TIMEOUT_FOR_PIPE_ENABLE_MS &&
-				top_pipe_to_program->plane_res.hubp->funcs->hubp_is_flip_pending(top_pipe_to_program->plane_res.hubp)) {
-			i += 1;
-			msleep(1);
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable) {
+			struct hubp *hubp = pipe->plane_res.hubp;
+			int j = 0;
+
+			for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS*1000
+					&& hubp->funcs->hubp_is_flip_pending(hubp); j++)
+				mdelay(1);
 		}
 	}
+
+	/* WA to apply WM setting*/
+	if (dc->hwseq->wa.DEGVIDCN21)
+		dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub);
 }
 
 
@@ -1292,6 +1698,7 @@ bool dcn20_update_bandwidth(
 		struct dc_state *context)
 {
 	int i;
+	struct dce_hwseq *hws = dc->hwseq;
 
 	/* recalculate DML parameters */
 	if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false))
@@ -1319,8 +1726,12 @@ bool dcn20_update_bandwidth(
 
 			pipe_ctx->stream_res.tg->funcs->set_vtg_params(
 					pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
+
 			if (pipe_ctx->prev_odm_pipe == NULL)
-				dc->hwss.blank_pixel_data(dc, pipe_ctx, blank);
+				hws->funcs.blank_pixel_data(dc, pipe_ctx, blank);
+
+			if (hws->funcs.setup_vupdate_interrupt)
+				hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
 		}
 
 		pipe_ctx->plane_res.hubp->funcs->hubp_setup(
@@ -1334,10 +1745,10 @@ bool dcn20_update_bandwidth(
 	return true;
 }
 
-static void dcn20_enable_writeback(
+void dcn20_enable_writeback(
 		struct dc *dc,
-		const struct dc_stream_status *stream_status,
-		struct dc_writeback_info *wb_info)
+		struct dc_writeback_info *wb_info,
+		struct dc_state *context)
 {
 	struct dwbc *dwb;
 	struct mcif_wb *mcif_wb;
@@ -1349,12 +1760,11 @@ static void dcn20_enable_writeback(
 	mcif_wb = dc->res_pool->mcif_wb[wb_info->dwb_pipe_inst];
 
 	/* set the OPTC source mux */
-	ASSERT(stream_status->primary_otg_inst < MAX_PIPES);
-	optc = dc->res_pool->timing_generators[stream_status->primary_otg_inst];
+	optc = dc->res_pool->timing_generators[dwb->otg_inst];
 	optc->funcs->set_dwb_source(optc, wb_info->dwb_pipe_inst);
 	/* set MCIF_WB buffer and arbitration configuration */
 	mcif_wb->funcs->config_mcif_buf(mcif_wb, &wb_info->mcif_buf_params, wb_info->dwb_params.dest_height);
-	mcif_wb->funcs->config_mcif_arb(mcif_wb, &dc->current_state->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[wb_info->dwb_pipe_inst]);
+	mcif_wb->funcs->config_mcif_arb(mcif_wb, &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[wb_info->dwb_pipe_inst]);
 	/* Enable MCIF_WB */
 	mcif_wb->funcs->enable_mcif(mcif_wb);
 	/* Enable DWB */
@@ -1377,7 +1787,7 @@ void dcn20_disable_writeback(
 	mcif_wb->funcs->disable_mcif(mcif_wb);
 }
 
-bool dcn20_hwss_wait_for_blank_complete(
+bool dcn20_wait_for_blank_complete(
 		struct output_pixel_processor *opp)
 {
 	int counter;
@@ -1406,9 +1816,8 @@ bool dcn20_dmdata_status_done(struct pipe_ctx *pipe_ctx)
 	return hubp->funcs->dmdata_status_done(hubp);
 }
 
-static void dcn20_disable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn20_disable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	struct dce_hwseq *hws = dc->hwseq;
 
 	if (pipe_ctx->stream_res.dsc) {
@@ -1420,12 +1829,10 @@ static void dcn20_disable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx
 			odm_pipe = odm_pipe->next_odm_pipe;
 		}
 	}
-#endif
 }
 
-static void dcn20_enable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn20_enable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	struct dce_hwseq *hws = dc->hwseq;
 
 	if (pipe_ctx->stream_res.dsc) {
@@ -1437,7 +1844,6 @@ static void dcn20_enable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx)
 			odm_pipe = odm_pipe->next_odm_pipe;
 		}
 	}
-#endif
 }
 
 void dcn20_set_dmdata_attributes(struct pipe_ctx *pipe_ctx)
@@ -1460,12 +1866,7 @@ void dcn20_set_dmdata_attributes(struct pipe_ctx *pipe_ctx)
 	hubp->funcs->dmdata_set_attributes(hubp, &attr);
 }
 
-void dcn20_disable_stream(struct pipe_ctx *pipe_ctx)
-{
-	dce110_disable_stream(pipe_ctx);
-}
-
-static void dcn20_init_vm_ctx(
+void dcn20_init_vm_ctx(
 		struct dce_hwseq *hws,
 		struct dc *dc,
 		struct dc_virtual_addr_space_config *va_config,
@@ -1487,7 +1888,7 @@ static void dcn20_init_vm_ctx(
 	dc->res_pool->hubbub->funcs->init_vm_ctx(dc->res_pool->hubbub, &config, vmid);
 }
 
-static int dcn20_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config)
+int dcn20_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config)
 {
 	struct dcn_hubbub_phys_addr_config config;
 
@@ -1531,8 +1932,7 @@ static bool patch_address_for_sbs_tb_stereo(
 	return false;
 }
 
-
-static void dcn20_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn20_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
 	bool addr_patched = false;
 	PHYSICAL_ADDRESS_LOC addr;
@@ -1566,6 +1966,7 @@ void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx,
 	struct encoder_unblank_param params = { { 0 } };
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct dc_link *link = stream->link;
+	struct dce_hwseq *hws = link->dc->hwseq;
 	struct pipe_ctx *odm_pipe;
 
 	params.opp_cnt = 1;
@@ -1578,7 +1979,7 @@ void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx,
 	params.link_settings.link_rate = link_settings->link_rate;
 
 	if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
-		if (optc1_is_two_pixels_per_containter(&stream->timing) || params.opp_cnt > 1)
+		if (optc2_is_two_pixels_per_containter(&stream->timing) || params.opp_cnt > 1)
 			params.timing.pix_clk_100hz /= 2;
 		pipe_ctx->stream_res.stream_enc->funcs->dp_set_odm_combine(
 				pipe_ctx->stream_res.stream_enc, params.opp_cnt > 1);
@@ -1586,14 +1987,14 @@ void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx,
 	}
 
 	if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
-		link->dc->hwss.edp_backlight_control(link, true);
+		hws->funcs.edp_backlight_control(link, true);
 	}
 }
 
-void dcn20_setup_vupdate_interrupt(struct pipe_ctx *pipe_ctx)
+void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
 	struct timing_generator *tg = pipe_ctx->stream_res.tg;
-	int start_line = get_vupdate_offset_from_vsync(pipe_ctx);
+	int start_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
 
 	if (start_line < 0)
 		start_line = 0;
@@ -1608,6 +2009,7 @@ static void dcn20_reset_back_end_for_pipe(
 		struct dc_state *context)
 {
 	int i;
+	struct dc_link *link;
 	DC_LOGGER_INIT(dc->ctx->logger);
 	if (pipe_ctx->stream_res.stream_enc == NULL) {
 		pipe_ctx->stream = NULL;
@@ -1615,8 +2017,14 @@ static void dcn20_reset_back_end_for_pipe(
 	}
 
 	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
-		/* DPMS may already disable */
-		if (!pipe_ctx->stream->dpms_off)
+		link = pipe_ctx->stream->link;
+		/* DPMS may already disable or */
+		/* dpms_off status is incorrect due to fastboot
+		 * feature. When system resume from S4 with second
+		 * screen only, the dpms_off would be true but
+		 * VBIOS lit up eDP, so check link status too.
+		 */
+		if (!pipe_ctx->stream->dpms_off || link->link_status.link_active)
 			core_link_disable_stream(pipe_ctx);
 		else if (pipe_ctx->stream_res.audio)
 			dc->hwss.disable_audio_stream(pipe_ctx);
@@ -1636,11 +2044,9 @@ static void dcn20_reset_back_end_for_pipe(
 			}
 		}
 	}
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	else if (pipe_ctx->stream_res.dsc) {
 		dp_set_dsc_enable(pipe_ctx, false);
 	}
-#endif
 
 	/* by upper caller loop, parent pipe: pipe0, will be reset last.
 	 * back end share by all pipes and will be disable only when disable
@@ -1671,11 +2077,12 @@ static void dcn20_reset_back_end_for_pipe(
 					pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst);
 }
 
-static void dcn20_reset_hw_ctx_wrap(
+void dcn20_reset_hw_ctx_wrap(
 		struct dc *dc,
 		struct dc_state *context)
 {
 	int i;
+	struct dce_hwseq *hws = dc->hwseq;
 
 	/* Reset Back End*/
 	for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) {
@@ -1694,16 +2101,39 @@ static void dcn20_reset_hw_ctx_wrap(
 			struct clock_source *old_clk = pipe_ctx_old->clock_source;
 
 			dcn20_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
-			if (dc->hwss.enable_stream_gating)
-				dc->hwss.enable_stream_gating(dc, pipe_ctx);
+			if (hws->funcs.enable_stream_gating)
+				hws->funcs.enable_stream_gating(dc, pipe_ctx);
 			if (old_clk)
 				old_clk->funcs->cs_power_down(old_clk);
 		}
 	}
 }
 
-static void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn20_get_mpctree_visual_confirm_color(
+		struct pipe_ctx *pipe_ctx,
+		struct tg_color *color)
 {
+	const struct tg_color pipe_colors[6] = {
+			{MAX_TG_COLOR_VALUE, 0, 0}, // red
+			{MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, // yellow
+			{0, MAX_TG_COLOR_VALUE, 0}, // blue
+			{MAX_TG_COLOR_VALUE / 2, 0, MAX_TG_COLOR_VALUE / 2}, // purple
+			{0, 0, MAX_TG_COLOR_VALUE}, // green
+			{MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE * 2 / 3, 0}, // orange
+	};
+
+	struct pipe_ctx *top_pipe = pipe_ctx;
+
+	while (top_pipe->top_pipe) {
+		top_pipe = top_pipe->top_pipe;
+	}
+
+	*color = pipe_colors[top_pipe->pipe_idx];
+}
+
+void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
+{
+	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
 	struct mpcc_blnd_cfg blnd_cfg = { {0} };
 	bool per_pixel_alpha = pipe_ctx->plane_state->per_pixel_alpha;
@@ -1714,10 +2144,13 @@ static void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 
 	// input to MPCC is always RGB, by default leave black_color at 0
 	if (dc->debug.visual_confirm == VISUAL_CONFIRM_HDR) {
-		dcn10_get_hdr_visual_confirm_color(
+		hws->funcs.get_hdr_visual_confirm_color(
 				pipe_ctx, &blnd_cfg.black_color);
 	} else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SURFACE) {
-		dcn10_get_surface_visual_confirm_color(
+		hws->funcs.get_surface_visual_confirm_color(
+				pipe_ctx, &blnd_cfg.black_color);
+	} else if (dc->debug.visual_confirm == VISUAL_CONFIRM_MPCTREE) {
+		dcn20_get_mpctree_visual_confirm_color(
 				pipe_ctx, &blnd_cfg.black_color);
 	}
 
@@ -1751,12 +2184,6 @@ static void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	 */
 	mpcc_id = hubp->inst;
 
-	/* If there is no full update, don't need to touch MPC tree*/
-	if (!pipe_ctx->plane_state->update_flags.bits.full_update) {
-		mpc->funcs->update_blending(mpc, &blnd_cfg, mpcc_id);
-		return;
-	}
-
 	/* check if this MPCC is already being used */
 	new_mpcc = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, mpcc_id);
 	/* remove MPCC if being used */
@@ -1781,125 +2208,7 @@ static void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	hubp->mpcc_id = mpcc_id;
 }
 
-static int find_free_gsl_group(const struct dc *dc)
-{
-	if (dc->res_pool->gsl_groups.gsl_0 == 0)
-		return 1;
-	if (dc->res_pool->gsl_groups.gsl_1 == 0)
-		return 2;
-	if (dc->res_pool->gsl_groups.gsl_2 == 0)
-		return 3;
-
-	return 0;
-}
-
-/* NOTE: This is not a generic setup_gsl function (hence the suffix as_lock)
- * This is only used to lock pipes in pipe splitting case with immediate flip
- * Ordinary MPC/OTG locks suppress VUPDATE which doesn't help with immediate,
- * so we get tearing with freesync since we cannot flip multiple pipes
- * atomically.
- * We use GSL for this:
- * - immediate flip: find first available GSL group if not already assigned
- *                   program gsl with that group, set current OTG as master
- *                   and always us 0x4 = AND of flip_ready from all pipes
- * - vsync flip: disable GSL if used
- *
- * Groups in stream_res are stored as +1 from HW registers, i.e.
- * gsl_0 <=> pipe_ctx->stream_res.gsl_group == 1
- * Using a magic value like -1 would require tracking all inits/resets
- */
-void dcn20_setup_gsl_group_as_lock(
-		const struct dc *dc,
-		struct pipe_ctx *pipe_ctx,
-		bool enable)
-{
-	struct gsl_params gsl;
-	int group_idx;
-
-	memset(&gsl, 0, sizeof(struct gsl_params));
-
-	if (enable) {
-		/* return if group already assigned since GSL was set up
-		 * for vsync flip, we would unassign so it can't be "left over"
-		 */
-		if (pipe_ctx->stream_res.gsl_group > 0)
-			return;
-
-		group_idx = find_free_gsl_group(dc);
-		ASSERT(group_idx != 0);
-		pipe_ctx->stream_res.gsl_group = group_idx;
-
-		/* set gsl group reg field and mark resource used */
-		switch (group_idx) {
-		case 1:
-			gsl.gsl0_en = 1;
-			dc->res_pool->gsl_groups.gsl_0 = 1;
-			break;
-		case 2:
-			gsl.gsl1_en = 1;
-			dc->res_pool->gsl_groups.gsl_1 = 1;
-			break;
-		case 3:
-			gsl.gsl2_en = 1;
-			dc->res_pool->gsl_groups.gsl_2 = 1;
-			break;
-		default:
-			BREAK_TO_DEBUGGER();
-			return; // invalid case
-		}
-		gsl.gsl_master_en = 1;
-	} else {
-		group_idx = pipe_ctx->stream_res.gsl_group;
-		if (group_idx == 0)
-			return; // if not in use, just return
-
-		pipe_ctx->stream_res.gsl_group = 0;
-
-		/* unset gsl group reg field and mark resource free */
-		switch (group_idx) {
-		case 1:
-			gsl.gsl0_en = 0;
-			dc->res_pool->gsl_groups.gsl_0 = 0;
-			break;
-		case 2:
-			gsl.gsl1_en = 0;
-			dc->res_pool->gsl_groups.gsl_1 = 0;
-			break;
-		case 3:
-			gsl.gsl2_en = 0;
-			dc->res_pool->gsl_groups.gsl_2 = 0;
-			break;
-		default:
-			BREAK_TO_DEBUGGER();
-			return;
-		}
-		gsl.gsl_master_en = 0;
-	}
-
-	/* at this point we want to program whether it's to enable or disable */
-	if (pipe_ctx->stream_res.tg->funcs->set_gsl != NULL &&
-		pipe_ctx->stream_res.tg->funcs->set_gsl_source_select != NULL) {
-		pipe_ctx->stream_res.tg->funcs->set_gsl(
-			pipe_ctx->stream_res.tg,
-			&gsl);
-
-		pipe_ctx->stream_res.tg->funcs->set_gsl_source_select(
-			pipe_ctx->stream_res.tg, group_idx,	enable ? 4 : 0);
-	} else
-		BREAK_TO_DEBUGGER();
-}
-
-static void dcn20_set_flip_control_gsl(
-		struct pipe_ctx *pipe_ctx,
-		bool flip_immediate)
-{
-	if (pipe_ctx && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_control_surface_gsl)
-		pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_control_surface_gsl(
-				pipe_ctx->plane_res.hubp, flip_immediate);
-
-}
-
-static void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
+void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
 {
 	enum dc_lane_count lane_count =
 		pipe_ctx->stream->link->cur_link_settings.lane_count;
@@ -1919,8 +2228,10 @@ static void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
 	link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
 						    pipe_ctx->stream_res.stream_enc->id, true);
 
-	if (link->dc->hwss.program_dmdata_engine)
-		link->dc->hwss.program_dmdata_engine(pipe_ctx);
+	if (pipe_ctx->plane_state && pipe_ctx->plane_state->flip_immediate != 1) {
+		if (link->dc->hwss.program_dmdata_engine)
+			link->dc->hwss.program_dmdata_engine(pipe_ctx);
+	}
 
 	link->dc->hwss.update_info_frame(pipe_ctx);
 
@@ -1945,7 +2256,7 @@ static void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
 	}
 }
 
-static void dcn20_program_dmdata_engine(struct pipe_ctx *pipe_ctx)
+void dcn20_program_dmdata_engine(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_stream_state    *stream     = pipe_ctx->stream;
 	struct hubp               *hubp       = pipe_ctx->plane_res.hubp;
@@ -1971,7 +2282,7 @@ static void dcn20_program_dmdata_engine(struct pipe_ctx *pipe_ctx)
 						hubp->inst, mode);
 }
 
-static void dcn20_fpga_init_hw(struct dc *dc)
+void dcn20_fpga_init_hw(struct dc *dc)
 {
 	int i, j;
 	struct dce_hwseq *hws = dc->hwseq;
@@ -1986,13 +2297,13 @@ static void dcn20_fpga_init_hw(struct dc *dc)
 		res_pool->dccg->funcs->dccg_init(res_pool->dccg);
 
 	//Enable ability to power gate / don't force power on permanently
-	dc->hwss.enable_power_gating_plane(hws, true);
+	hws->funcs.enable_power_gating_plane(hws, true);
 
 	// Specific to FPGA dccg and registers
 	REG_WRITE(RBBMIF_TIMEOUT_DIS, 0xFFFFFFFF);
 	REG_WRITE(RBBMIF_TIMEOUT_DIS_2, 0xFFFFFFFF);
 
-	dcn20_dccg_init(hws);
+	hws->funcs.dccg_init(hws);
 
 	REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, 2);
 	REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1);
@@ -2056,7 +2367,7 @@ static void dcn20_fpga_init_hw(struct dc *dc)
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 		/*to do*/
-		hwss1_plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
 	}
 
 	/* initialize DWB pointer to MCIF_WB */
@@ -2085,56 +2396,3 @@ static void dcn20_fpga_init_hw(struct dc *dc)
 		tg->funcs->tg_init(tg);
 	}
 }
-
-void dcn20_hw_sequencer_construct(struct dc *dc)
-{
-	dcn10_hw_sequencer_construct(dc);
-	dc->hwss.unblank_stream = dcn20_unblank_stream;
-	dc->hwss.update_plane_addr = dcn20_update_plane_addr;
-	dc->hwss.enable_stream_timing = dcn20_enable_stream_timing;
-	dc->hwss.program_triplebuffer = dcn20_program_tripleBuffer;
-	dc->hwss.set_input_transfer_func = dcn20_set_input_transfer_func;
-	dc->hwss.set_output_transfer_func = dcn20_set_output_transfer_func;
-	dc->hwss.apply_ctx_for_surface = dcn20_apply_ctx_for_surface;
-	dc->hwss.pipe_control_lock = dcn20_pipe_control_lock;
-	dc->hwss.pipe_control_lock_global = dcn20_pipe_control_lock_global;
-	dc->hwss.optimize_bandwidth = dcn20_optimize_bandwidth;
-	dc->hwss.prepare_bandwidth = dcn20_prepare_bandwidth;
-	dc->hwss.update_bandwidth = dcn20_update_bandwidth;
-	dc->hwss.enable_writeback = dcn20_enable_writeback;
-	dc->hwss.disable_writeback = dcn20_disable_writeback;
-	dc->hwss.program_output_csc = dcn20_program_output_csc;
-	dc->hwss.update_odm = dcn20_update_odm;
-	dc->hwss.blank_pixel_data = dcn20_blank_pixel_data;
-	dc->hwss.dmdata_status_done = dcn20_dmdata_status_done;
-	dc->hwss.program_dmdata_engine = dcn20_program_dmdata_engine;
-	dc->hwss.enable_stream = dcn20_enable_stream;
-	dc->hwss.disable_stream = dcn20_disable_stream;
-	dc->hwss.init_sys_ctx = dcn20_init_sys_ctx;
-	dc->hwss.init_vm_ctx = dcn20_init_vm_ctx;
-	dc->hwss.disable_stream_gating = dcn20_disable_stream_gating;
-	dc->hwss.enable_stream_gating = dcn20_enable_stream_gating;
-	dc->hwss.setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt;
-	dc->hwss.reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap;
-	dc->hwss.update_mpcc = dcn20_update_mpcc;
-	dc->hwss.set_flip_control_gsl = dcn20_set_flip_control_gsl;
-	dc->hwss.init_blank = dcn20_init_blank;
-	dc->hwss.disable_plane = dcn20_disable_plane;
-	dc->hwss.plane_atomic_disable = dcn20_plane_atomic_disable;
-	dc->hwss.enable_power_gating_plane = dcn20_enable_power_gating_plane;
-	dc->hwss.dpp_pg_control = dcn20_dpp_pg_control;
-	dc->hwss.hubp_pg_control = dcn20_hubp_pg_control;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-	dc->hwss.dsc_pg_control = dcn20_dsc_pg_control;
-#else
-	dc->hwss.dsc_pg_control = NULL;
-#endif
-	dc->hwss.disable_vga = dcn20_disable_vga;
-
-	if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
-		dc->hwss.init_hw = dcn20_fpga_init_hw;
-		dc->hwss.init_pipes = NULL;
-	}
-
-
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h
index 92ab3dd91814..02c9be5ebd47 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h
@@ -26,74 +26,112 @@
 #ifndef __DC_HWSS_DCN20_H__
 #define __DC_HWSS_DCN20_H__
 
-struct dc;
+#include "hw_sequencer_private.h"
 
-void dcn20_hw_sequencer_construct(struct dc *dc);
-
-enum dc_status dcn20_enable_stream_timing(
-		struct pipe_ctx *pipe_ctx,
-		struct dc_state *context,
-		struct dc *dc);
-
-void dcn20_blank_pixel_data(
+bool dcn20_set_blend_lut(
+	struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state);
+bool dcn20_set_shaper_3dlut(
+	struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state);
+void dcn20_program_front_end_for_ctx(
 		struct dc *dc,
-		struct pipe_ctx *pipe_ctx,
-		bool blank);
-
+		struct dc_state *context);
+void dcn20_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx);
+bool dcn20_set_input_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
+			const struct dc_plane_state *plane_state);
+bool dcn20_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
+			const struct dc_stream_state *stream);
 void dcn20_program_output_csc(struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		enum dc_color_space colorspace,
 		uint16_t *matrix,
 		int opp_id);
-
+void dcn20_enable_stream(struct pipe_ctx *pipe_ctx);
+void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx,
+		struct dc_link_settings *link_settings);
+void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_blank_pixel_data(
+		struct dc *dc,
+		struct pipe_ctx *pipe_ctx,
+		bool blank);
+void dcn20_pipe_control_lock(
+	struct dc *dc,
+	struct pipe_ctx *pipe,
+	bool lock);
+void dcn20_pipe_control_lock_global(
+		struct dc *dc,
+		struct pipe_ctx *pipe,
+		bool lock);
 void dcn20_prepare_bandwidth(
 		struct dc *dc,
 		struct dc_state *context);
-
 void dcn20_optimize_bandwidth(
 		struct dc *dc,
 		struct dc_state *context);
-
 bool dcn20_update_bandwidth(
 		struct dc *dc,
 		struct dc_state *context);
-
+void dcn20_reset_hw_ctx_wrap(
+		struct dc *dc,
+		struct dc_state *context);
+enum dc_status dcn20_enable_stream_timing(
+		struct pipe_ctx *pipe_ctx,
+		struct dc_state *context,
+		struct dc *dc);
+void dcn20_disable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_enable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_init_blank(
+		struct dc *dc,
+		struct timing_generator *tg);
+void dcn20_disable_vga(
+	struct dce_hwseq *hws);
+void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_enable_power_gating_plane(
+	struct dce_hwseq *hws,
+	bool enable);
+void dcn20_dpp_pg_control(
+		struct dce_hwseq *hws,
+		unsigned int dpp_inst,
+		bool power_on);
+void dcn20_hubp_pg_control(
+		struct dce_hwseq *hws,
+		unsigned int hubp_inst,
+		bool power_on);
+void dcn20_program_triple_buffer(
+	const struct dc *dc,
+	struct pipe_ctx *pipe_ctx,
+	bool enable_triple_buffer);
+void dcn20_enable_writeback(
+		struct dc *dc,
+		struct dc_writeback_info *wb_info,
+		struct dc_state *context);
 void dcn20_disable_writeback(
 		struct dc *dc,
 		unsigned int dwb_pipe_inst);
-
-bool dcn20_hwss_wait_for_blank_complete(
-		struct output_pixel_processor *opp);
-
-bool dcn20_set_output_transfer_func(struct pipe_ctx *pipe_ctx,
-			const struct dc_stream_state *stream);
-
-bool dcn20_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
-			const struct dc_plane_state *plane_state);
-
+void dcn20_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx);
 bool dcn20_dmdata_status_done(struct pipe_ctx *pipe_ctx);
-
+void dcn20_program_dmdata_engine(struct pipe_ctx *pipe_ctx);
 void dcn20_set_dmdata_attributes(struct pipe_ctx *pipe_ctx);
-
-void dcn20_disable_stream(struct pipe_ctx *pipe_ctx);
-
-void dcn20_program_tripleBuffer(
-		const struct dc *dc,
-		struct pipe_ctx *pipe_ctx,
-		bool enableTripleBuffer);
-
-void dcn20_setup_vupdate_interrupt(struct pipe_ctx *pipe_ctx);
-
-void dcn20_pipe_control_lock_global(
+void dcn20_init_vm_ctx(
+		struct dce_hwseq *hws,
 		struct dc *dc,
-		struct pipe_ctx *pipe,
-		bool lock);
-void dcn20_setup_gsl_group_as_lock(const struct dc *dc,
-				struct pipe_ctx *pipe_ctx,
-				bool enable);
+		struct dc_virtual_addr_space_config *va_config,
+		int vmid);
+void dcn20_set_flip_control_gsl(
+		struct pipe_ctx *pipe_ctx,
+		bool flip_immediate);
+void dcn20_dsc_pg_control(
+		struct dce_hwseq *hws,
+		unsigned int dsc_inst,
+		bool power_on);
+void dcn20_fpga_init_hw(struct dc *dc);
+bool dcn20_wait_for_blank_complete(
+		struct output_pixel_processor *opp);
 void dcn20_dccg_init(struct dce_hwseq *hws);
-void dcn20_init_blank(
-	   struct dc *dc,
-	   struct timing_generator *tg);
-void dcn20_display_init(struct dc *dc);
+int dcn20_init_sys_ctx(struct dce_hwseq *hws,
+		struct dc *dc,
+		struct dc_phy_addr_space_config *pa_config);
+
 #endif /* __DC_HWSS_DCN20_H__ */
+
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
new file mode 100644
index 000000000000..d51e02fdab4d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dce110/dce110_hw_sequencer.h"
+#include "dcn10/dcn10_hw_sequencer.h"
+#include "dcn20_hwseq.h"
+
+static const struct hw_sequencer_funcs dcn20_funcs = {
+	.program_gamut_remap = dcn10_program_gamut_remap,
+	.init_hw = dcn10_init_hw,
+	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
+	.apply_ctx_for_surface = NULL,
+	.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+	.update_plane_addr = dcn20_update_plane_addr,
+	.update_dchub = dcn10_update_dchub,
+	.update_pending_status = dcn10_update_pending_status,
+	.program_output_csc = dcn20_program_output_csc,
+	.enable_accelerated_mode = dce110_enable_accelerated_mode,
+	.enable_timing_synchronization = dcn10_enable_timing_synchronization,
+	.enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
+	.update_info_frame = dce110_update_info_frame,
+	.send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
+	.enable_stream = dcn20_enable_stream,
+	.disable_stream = dce110_disable_stream,
+	.unblank_stream = dcn20_unblank_stream,
+	.blank_stream = dce110_blank_stream,
+	.enable_audio_stream = dce110_enable_audio_stream,
+	.disable_audio_stream = dce110_disable_audio_stream,
+	.disable_plane = dcn20_disable_plane,
+	.pipe_control_lock = dcn20_pipe_control_lock,
+	.pipe_control_lock_global = dcn20_pipe_control_lock_global,
+	.prepare_bandwidth = dcn20_prepare_bandwidth,
+	.optimize_bandwidth = dcn20_optimize_bandwidth,
+	.update_bandwidth = dcn20_update_bandwidth,
+	.set_drr = dcn10_set_drr,
+	.get_position = dcn10_get_position,
+	.set_static_screen_control = dcn10_set_static_screen_control,
+	.setup_stereo = dcn10_setup_stereo,
+	.set_avmute = dce110_set_avmute,
+	.log_hw_state = dcn10_log_hw_state,
+	.get_hw_state = dcn10_get_hw_state,
+	.clear_status_bits = dcn10_clear_status_bits,
+	.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+	.edp_power_control = dce110_edp_power_control,
+	.edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
+	.set_cursor_position = dcn10_set_cursor_position,
+	.set_cursor_attribute = dcn10_set_cursor_attribute,
+	.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
+	.setup_periodic_interrupt = dcn10_setup_periodic_interrupt,
+	.set_clock = dcn10_set_clock,
+	.get_clock = dcn10_get_clock,
+	.program_triplebuffer = dcn20_program_triple_buffer,
+	.enable_writeback = dcn20_enable_writeback,
+	.disable_writeback = dcn20_disable_writeback,
+	.dmdata_status_done = dcn20_dmdata_status_done,
+	.program_dmdata_engine = dcn20_program_dmdata_engine,
+	.set_dmdata_attributes = dcn20_set_dmdata_attributes,
+	.init_sys_ctx = dcn20_init_sys_ctx,
+	.init_vm_ctx = dcn20_init_vm_ctx,
+	.set_flip_control_gsl = dcn20_set_flip_control_gsl,
+	.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+};
+
+static const struct hwseq_private_funcs dcn20_private_funcs = {
+	.init_pipes = dcn10_init_pipes,
+	.update_plane_addr = dcn20_update_plane_addr,
+	.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
+	.update_mpcc = dcn20_update_mpcc,
+	.set_input_transfer_func = dcn20_set_input_transfer_func,
+	.set_output_transfer_func = dcn20_set_output_transfer_func,
+	.power_down = dce110_power_down,
+	.enable_display_power_gating = dcn10_dummy_display_power_gating,
+	.blank_pixel_data = dcn20_blank_pixel_data,
+	.reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap,
+	.enable_stream_timing = dcn20_enable_stream_timing,
+	.edp_backlight_control = dce110_edp_backlight_control,
+	.disable_stream_gating = dcn20_disable_stream_gating,
+	.enable_stream_gating = dcn20_enable_stream_gating,
+	.setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt,
+	.did_underflow_occur = dcn10_did_underflow_occur,
+	.init_blank = dcn20_init_blank,
+	.disable_vga = dcn20_disable_vga,
+	.bios_golden_init = dcn10_bios_golden_init,
+	.plane_atomic_disable = dcn20_plane_atomic_disable,
+	.plane_atomic_power_down = dcn10_plane_atomic_power_down,
+	.enable_power_gating_plane = dcn20_enable_power_gating_plane,
+	.dpp_pg_control = dcn20_dpp_pg_control,
+	.hubp_pg_control = dcn20_hubp_pg_control,
+	.dsc_pg_control = NULL,
+	.update_odm = dcn20_update_odm,
+	.dsc_pg_control = dcn20_dsc_pg_control,
+	.get_surface_visual_confirm_color = dcn10_get_surface_visual_confirm_color,
+	.get_hdr_visual_confirm_color = dcn10_get_hdr_visual_confirm_color,
+	.set_hdr_multiplier = dcn10_set_hdr_multiplier,
+	.verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high,
+	.wait_for_blank_complete = dcn20_wait_for_blank_complete,
+	.dccg_init = dcn20_dccg_init,
+	.set_blend_lut = dcn20_set_blend_lut,
+	.set_shaper_3dlut = dcn20_set_shaper_3dlut,
+};
+
+void dcn20_hw_sequencer_construct(struct dc *dc)
+{
+	dc->hwss = dcn20_funcs;
+	dc->hwseq->funcs = dcn20_private_funcs;
+
+	if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
+		dc->hwss.init_hw = dcn20_fpga_init_hw;
+		dc->hwseq->funcs.init_pipes = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h
new file mode 100644
index 000000000000..12277797cd71
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DCN20_INIT_H__
+#define __DC_DCN20_INIT_H__
+
+struct dc;
+
+void dcn20_hw_sequencer_construct(struct dc *dc);
+
+#endif /* __DC_DCN20_INIT_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c
index e476f27aa3a9..e4ac73035c84 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c
@@ -168,10 +168,8 @@ static struct mpll_cfg dcn2_mpll_cfg[] = {
 void enc2_fec_set_enable(struct link_encoder *enc, bool enable)
 {
 	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	DC_LOG_DSC("%s FEC at link encoder inst %d",
 			enable ? "Enabling" : "Disabling", enc->id.enum_id);
-#endif
 	REG_UPDATE(DP_DPHY_CNTL, DPHY_FEC_EN, enable);
 }
 
@@ -192,7 +190,6 @@ bool enc2_fec_is_active(struct link_encoder *enc)
 	return (active != 0);
 }
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 /* this function reads dsc related register fields to be logged later in dcn10_log_hw_state
  * into a dcn_dsc_state struct.
  */
@@ -203,8 +200,8 @@ void link_enc2_read_state(struct link_encoder *enc, struct link_enc_state *s)
 	REG_GET(DP_DPHY_CNTL, DPHY_FEC_EN, &s->dphy_fec_en);
 	REG_GET(DP_DPHY_CNTL, DPHY_FEC_READY_SHADOW, &s->dphy_fec_ready_shadow);
 	REG_GET(DP_DPHY_CNTL, DPHY_FEC_ACTIVE_STATUS, &s->dphy_fec_active_status);
+	REG_GET(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, &s->dp_link_training_complete);
 }
-#endif
 
 static bool update_cfg_data(
 		struct dcn10_link_encoder *enc10,
@@ -315,9 +312,7 @@ void enc2_hw_init(struct link_encoder *enc)
 }
 
 static const struct link_encoder_funcs dcn20_link_enc_funcs = {
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	.read_state = link_enc2_read_state,
-#endif
 	.validate_output_with_stream =
 		dcn10_link_encoder_validate_output_with_stream,
 	.hw_init = enc2_hw_init,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h
index 3736b5548a25..8cab8107fd94 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h
@@ -33,7 +33,142 @@
 	SRI(AUX_DPHY_TX_CONTROL, DP_AUX, id)
 
 #define UNIPHY_MASK_SH_LIST(mask_sh)\
-	LE_SF(UNIPHYA_CHANNEL_XBAR_CNTL, UNIPHY_LINK_ENABLE, mask_sh)
+	LE_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_CLOCK_ENABLE, mask_sh),\
+	LE_SF(UNIPHYA_CHANNEL_XBAR_CNTL, UNIPHY_LINK_ENABLE, mask_sh),\
+	LE_SF(UNIPHYA_CHANNEL_XBAR_CNTL, UNIPHY_CHANNEL0_XBAR_SOURCE, mask_sh),\
+	LE_SF(UNIPHYA_CHANNEL_XBAR_CNTL, UNIPHY_CHANNEL1_XBAR_SOURCE, mask_sh),\
+	LE_SF(UNIPHYA_CHANNEL_XBAR_CNTL, UNIPHY_CHANNEL2_XBAR_SOURCE, mask_sh),\
+	LE_SF(UNIPHYA_CHANNEL_XBAR_CNTL, UNIPHY_CHANNEL3_XBAR_SOURCE, mask_sh)
+
+#define DPCS_MASK_SH_LIST(mask_sh)\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX0_CLK_RDY, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX0_DATA_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX1_CLK_RDY, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX1_DATA_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX2_CLK_RDY, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX2_DATA_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX3_CLK_RDY, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX3_DATA_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL4, RDPCS_PHY_DP_TX0_TERM_CTRL, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL4, RDPCS_PHY_DP_TX1_TERM_CTRL, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL4, RDPCS_PHY_DP_TX2_TERM_CTRL, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL4, RDPCS_PHY_DP_TX3_TERM_CTRL, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL11, RDPCS_PHY_DP_MPLLB_MULTIPLIER, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL5, RDPCS_PHY_DP_TX0_WIDTH, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL5, RDPCS_PHY_DP_TX0_RATE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL5, RDPCS_PHY_DP_TX1_WIDTH, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL5, RDPCS_PHY_DP_TX1_RATE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_TX2_PSTATE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_TX3_PSTATE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_TX2_MPLL_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_TX3_MPLL_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL7, RDPCS_PHY_DP_MPLLB_FRACN_QUOT, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL7, RDPCS_PHY_DP_MPLLB_FRACN_DEN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL8, RDPCS_PHY_DP_MPLLB_SSC_PEAK, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL9, RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL9, RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL10, RDPCS_PHY_DP_MPLLB_FRACN_REM, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL11, RDPCS_PHY_DP_REF_CLK_MPLLB_DIV, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL11, RDPCS_PHY_HDMI_MPLLB_HDMI_DIV, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL12, RDPCS_PHY_DP_MPLLB_SSC_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL12, RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL12, RDPCS_PHY_DP_MPLLB_TX_CLK_DIV, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL12, RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL12, RDPCS_PHY_DP_MPLLB_STATE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL13, RDPCS_PHY_DP_MPLLB_DIV_CLK_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL13, RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL14, RDPCS_PHY_DP_MPLLB_FRACN_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL14, RDPCS_PHY_DP_MPLLB_PMIX_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CNTL, RDPCS_TX_FIFO_LANE0_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CNTL, RDPCS_TX_FIFO_LANE1_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CNTL, RDPCS_TX_FIFO_LANE2_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CNTL, RDPCS_TX_FIFO_LANE3_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CNTL, RDPCS_TX_FIFO_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CNTL, RDPCS_TX_FIFO_RD_START_DELAY, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CLOCK_CNTL, RDPCS_EXT_REFCLK_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CLOCK_CNTL, RDPCS_SRAMCLK_BYPASS, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CLOCK_CNTL, RDPCS_SRAMCLK_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CLOCK_CNTL, RDPCS_SRAMCLK_CLOCK_ON, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CLOCK_CNTL, RDPCS_SYMCLK_DIV2_CLOCK_ON, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CLOCK_CNTL, RDPCS_SYMCLK_DIV2_GATE_DIS, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_CLOCK_CNTL, RDPCS_SYMCLK_DIV2_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX0_DISABLE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX1_DISABLE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX2_DISABLE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX3_DISABLE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX0_REQ, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX1_REQ, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX2_REQ, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX3_REQ, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX0_ACK, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX1_ACK, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX2_ACK, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX3_ACK, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX0_RESET, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX1_RESET, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX2_RESET, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL3, RDPCS_PHY_DP_TX3_RESET, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL0, RDPCS_PHY_RESET, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL0, RDPCS_PHY_CR_MUX_SEL, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL0, RDPCS_PHY_REF_RANGE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL0, RDPCS_SRAM_BYPASS, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL0, RDPCS_SRAM_EXT_LD_DONE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL0, RDPCS_PHY_HDMIMODE_ENABLE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL0, RDPCS_SRAM_INIT_DONE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL2, RDPCS_PHY_DP4_POR, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PLL_UPDATE_DATA, RDPCS_PLL_UPDATE_DATA, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL, RDPCS_REG_FIFO_ERROR_MASK, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL, RDPCS_TX_FIFO_ERROR_MASK, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL, RDPCS_DPALT_DISABLE_TOGGLE_MASK, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL, RDPCS_DPALT_4LANE_TOGGLE_MASK, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCS_TX_CR_ADDR, RDPCS_TX_CR_ADDR, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCS_TX_CR_DATA, RDPCS_TX_CR_DATA, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE0, RDPCS_PHY_DP_MPLLB_V2I, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE0, RDPCS_PHY_DP_TX0_EQ_MAIN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE0, RDPCS_PHY_DP_TX0_EQ_PRE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE0, RDPCS_PHY_DP_TX0_EQ_POST, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE0, RDPCS_PHY_DP_MPLLB_FREQ_VCO, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE1, RDPCS_PHY_DP_MPLLB_CP_INT, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE1, RDPCS_PHY_DP_MPLLB_CP_PROP, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE1, RDPCS_PHY_DP_TX1_EQ_MAIN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE1, RDPCS_PHY_DP_TX1_EQ_PRE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE1, RDPCS_PHY_DP_TX1_EQ_POST, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE2, RDPCS_PHY_DP_TX2_EQ_MAIN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE2, RDPCS_PHY_DP_TX2_EQ_PRE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE2, RDPCS_PHY_DP_TX2_EQ_POST, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DP_TX3_EQ_MAIN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DCO_FINETUNE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DCO_RANGE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DP_TX3_EQ_PRE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DP_TX3_EQ_POST, mask_sh),\
+	LE_SF(DPCSTX0_DPCSTX_TX_CLOCK_CNTL, DPCS_SYMCLK_CLOCK_ON, mask_sh),\
+	LE_SF(DPCSTX0_DPCSTX_TX_CLOCK_CNTL, DPCS_SYMCLK_GATE_DIS, mask_sh),\
+	LE_SF(DPCSTX0_DPCSTX_TX_CLOCK_CNTL, DPCS_SYMCLK_EN, mask_sh),\
+	LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_DATA_SWAP, mask_sh),\
+	LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_DATA_ORDER_INVERT, mask_sh),\
+	LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_FIFO_EN, mask_sh),\
+	LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_FIFO_RD_START_DELAY, mask_sh),\
+	LE_SF(DPCSTX0_DPCSTX_DEBUG_CONFIG, DPCS_DBG_CBUS_DIS, mask_sh)
+
+#define DPCS_DCN2_MASK_SH_LIST(mask_sh)\
+	DPCS_MASK_SH_LIST(mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_RX_LD_VAL, RDPCS_PHY_RX_REF_LD_VAL, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_RX_LD_VAL, RDPCS_PHY_RX_VCO_LD_VAL, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE_ACK, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_TX0_PSTATE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_TX1_PSTATE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_TX0_MPLL_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_TX1_MPLL_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_REF_CLK_EN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL5, RDPCS_PHY_DP_TX2_WIDTH, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL5, RDPCS_PHY_DP_TX2_RATE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL5, RDPCS_PHY_DP_TX3_WIDTH, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL5, RDPCS_PHY_DP_TX3_RATE, mask_sh),\
+	LE_SF(DCIO_SOFT_RESET, UNIPHYA_SOFT_RESET, mask_sh),\
+	LE_SF(DCIO_SOFT_RESET, UNIPHYB_SOFT_RESET, mask_sh),\
+	LE_SF(DCIO_SOFT_RESET, UNIPHYC_SOFT_RESET, mask_sh),\
+	LE_SF(DCIO_SOFT_RESET, UNIPHYD_SOFT_RESET, mask_sh),\
+	LE_SF(DCIO_SOFT_RESET, UNIPHYE_SOFT_RESET, mask_sh)
 
 #define LINK_ENCODER_MASK_SH_LIST_DCN20(mask_sh)\
 	LINK_ENCODER_MASK_SH_LIST_DCN10(mask_sh),\
@@ -63,6 +198,49 @@
 	SRI(CLOCK_ENABLE, SYMCLK, id), \
 	SRI(CHANNEL_XBAR_CNTL, UNIPHY, id)
 
+#define DPCS_DCN2_CMN_REG_LIST(id) \
+	SRI(DIG_LANE_ENABLE, DIG, id), \
+	SRI(TMDS_CTL_BITS, DIG, id), \
+	SRI(RDPCSTX_PHY_CNTL3, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL4, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL5, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL6, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL7, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL8, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL9, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL10, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL11, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL12, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL13, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL14, RDPCSTX, id), \
+	SRI(RDPCSTX_CNTL, RDPCSTX, id), \
+	SRI(RDPCSTX_CLOCK_CNTL, RDPCSTX, id), \
+	SRI(RDPCSTX_INTERRUPT_CONTROL, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL0, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_CNTL2, RDPCSTX, id), \
+	SRI(RDPCSTX_PLL_UPDATE_DATA, RDPCSTX, id), \
+	SRI(RDPCS_TX_CR_ADDR, RDPCSTX, id), \
+	SRI(RDPCS_TX_CR_DATA, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_FUSE0, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_FUSE1, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_FUSE2, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_FUSE3, RDPCSTX, id), \
+	SRI(DPCSTX_TX_CLOCK_CNTL, DPCSTX, id), \
+	SRI(DPCSTX_TX_CNTL, DPCSTX, id), \
+	SRI(DPCSTX_DEBUG_CONFIG, DPCSTX, id), \
+	SRI(RDPCSTX_DEBUG_CONFIG, RDPCSTX, id), \
+	SR(RDPCSTX0_RDPCSTX_SCRATCH)
+
+
+#define DPCS_DCN2_REG_LIST(id) \
+	DPCS_DCN2_CMN_REG_LIST(id), \
+	SRI(RDPCSTX_PHY_RX_LD_VAL, RDPCSTX, id),\
+	SRI(RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG, RDPCSTX, id)
+
+#define LE_DCN2_REG_LIST(id) \
+		LE_DCN10_REG_LIST(id), \
+		SR(DCIO_SOFT_RESET)
+
 struct mpll_cfg {
 	uint32_t mpllb_ana_v2i;
 	uint32_t mpllb_ana_freq_vco;
@@ -91,6 +269,13 @@ struct mpll_cfg {
 	uint32_t ref_range;
 	uint32_t ref_clk;
 	bool hdmimode_enable;
+	bool sup_pre_hp;
+	bool dp_tx0_vergdrv_byp;
+	bool dp_tx1_vergdrv_byp;
+	bool dp_tx2_vergdrv_byp;
+	bool dp_tx3_vergdrv_byp;
+
+
 };
 
 struct dpcssys_phy_seq_cfg {
@@ -151,9 +336,7 @@ void enc2_fec_set_ready(struct link_encoder *enc, bool ready);
 bool enc2_fec_is_active(struct link_encoder *enc);
 void enc2_hw_init(struct link_encoder *enc);
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 void link_enc2_read_state(struct link_encoder *enc, struct link_enc_state *s);
-#endif
 
 void dcn20_link_encoder_enable_dp_output(
 	struct link_encoder *enc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
index 5a188b2bc033..de9c857ab3e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
@@ -33,6 +33,9 @@
 #define REG(reg)\
 	mpc20->mpc_regs->reg
 
+#define IND_REG(index) \
+	(index)
+
 #define CTX \
 	mpc20->base.ctx
 
@@ -132,19 +135,33 @@ void mpc2_set_output_csc(
 		const uint16_t *regval,
 		enum mpc_output_csc_mode ocsc_mode)
 {
+	uint32_t cur_mode;
 	struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc);
 	struct color_matrices_reg ocsc_regs;
 
-	REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
-
-	if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE)
+	if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) {
+		REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
 		return;
+	}
 
 	if (regval == NULL) {
 		BREAK_TO_DEBUGGER();
 		return;
 	}
 
+	/* determine which CSC coefficients (A or B) we are using
+	 * currently.  select the alternate set to double buffer
+	 * the CSC update so CSC is updated on frame boundary
+	 */
+	IX_REG_GET(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA,
+						MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX,
+						MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE, &cur_mode);
+
+	if (cur_mode != MPC_OUTPUT_CSC_COEF_A)
+		ocsc_mode = MPC_OUTPUT_CSC_COEF_A;
+	else
+		ocsc_mode = MPC_OUTPUT_CSC_COEF_B;
+
 	ocsc_regs.shifts.csc_c11 = mpc20->mpc_shift->MPC_OCSC_C11_A;
 	ocsc_regs.masks.csc_c11  = mpc20->mpc_mask->MPC_OCSC_C11_A;
 	ocsc_regs.shifts.csc_c12 = mpc20->mpc_shift->MPC_OCSC_C12_A;
@@ -157,10 +174,13 @@ void mpc2_set_output_csc(
 		ocsc_regs.csc_c11_c12 = REG(CSC_C11_C12_B[opp_id]);
 		ocsc_regs.csc_c33_c34 = REG(CSC_C33_C34_B[opp_id]);
 	}
+
 	cm_helper_program_color_matrices(
 			mpc20->base.ctx,
 			regval,
 			&ocsc_regs);
+
+	REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
 }
 
 void mpc2_set_ocsc_default(
@@ -169,14 +189,16 @@ void mpc2_set_ocsc_default(
 		enum dc_color_space color_space,
 		enum mpc_output_csc_mode ocsc_mode)
 {
+	uint32_t cur_mode;
 	struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc);
 	uint32_t arr_size;
 	struct color_matrices_reg ocsc_regs;
 	const uint16_t *regval = NULL;
 
-	REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
-	if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE)
+	if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) {
+		REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
 		return;
+	}
 
 	regval = find_color_matrix(color_space, &arr_size);
 
@@ -185,6 +207,19 @@ void mpc2_set_ocsc_default(
 		return;
 	}
 
+	/* determine which CSC coefficients (A or B) we are using
+	 * currently.  select the alternate set to double buffer
+	 * the CSC update so CSC is updated on frame boundary
+	 */
+	IX_REG_GET(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA,
+						MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX,
+						MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE, &cur_mode);
+
+	if (cur_mode != MPC_OUTPUT_CSC_COEF_A)
+		ocsc_mode = MPC_OUTPUT_CSC_COEF_A;
+	else
+		ocsc_mode = MPC_OUTPUT_CSC_COEF_B;
+
 	ocsc_regs.shifts.csc_c11 = mpc20->mpc_shift->MPC_OCSC_C11_A;
 	ocsc_regs.masks.csc_c11  = mpc20->mpc_mask->MPC_OCSC_C11_A;
 	ocsc_regs.shifts.csc_c12 = mpc20->mpc_shift->MPC_OCSC_C12_A;
@@ -203,6 +238,8 @@ void mpc2_set_ocsc_default(
 			mpc20->base.ctx,
 			regval,
 			&ocsc_regs);
+
+	REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
 }
 
 static void mpc2_ogam_get_reg_field(
@@ -345,6 +382,9 @@ static void mpc20_program_ogam_pwl(
 	uint32_t i;
 	struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc);
 
+	PERF_TRACE();
+	REG_SEQ_START();
+
 	for (i = 0 ; i < num; i++) {
 		REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, rgb[i].red_reg);
 		REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, rgb[i].green_reg);
@@ -463,6 +503,11 @@ void mpc2_assert_mpcc_idle_before_connect(struct mpc *mpc, int mpcc_id)
 		ASSERT(!mpc_disabled);
 		ASSERT(!mpc_idle);
 	}
+
+	REG_SEQ_SUBMIT();
+	PERF_TRACE();
+	REG_SEQ_WAIT_DONE();
+	PERF_TRACE();
 }
 
 static void mpc2_init_mpcc(struct mpcc *mpcc, int mpcc_inst)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h
index 9f53192da2dc..c78fd5123497 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h
@@ -80,6 +80,10 @@
 	SRII(DENORM_CLAMP_G_Y, MPC_OUT, inst),\
 	SRII(DENORM_CLAMP_B_CB, MPC_OUT, inst)
 
+#define MPC_DBG_REG_LIST_DCN2_0() \
+	SR(MPC_OCSC_TEST_DEBUG_DATA),\
+	SR(MPC_OCSC_TEST_DEBUG_INDEX)
+
 #define MPC_REG_VARIABLE_LIST_DCN2_0 \
 	MPC_COMMON_REG_VARIABLE_LIST \
 	uint32_t MPCC_TOP_GAIN[MAX_MPCC]; \
@@ -118,6 +122,8 @@
 	uint32_t MPCC_OGAM_LUT_RAM_CONTROL[MAX_MPCC];\
 	uint32_t MPCC_OGAM_LUT_DATA[MAX_MPCC];\
 	uint32_t MPCC_OGAM_MODE[MAX_MPCC];\
+	uint32_t MPC_OCSC_TEST_DEBUG_DATA;\
+	uint32_t MPC_OCSC_TEST_DEBUG_INDEX;\
 	uint32_t CSC_MODE[MAX_OPP]; \
 	uint32_t CSC_C11_C12_A[MAX_OPP]; \
 	uint32_t CSC_C33_C34_A[MAX_OPP]; \
@@ -134,6 +140,7 @@
 	SF(MPCC0_MPCC_TOP_GAIN, MPCC_TOP_GAIN, mask_sh),\
 	SF(MPCC0_MPCC_BOT_GAIN_INSIDE, MPCC_BOT_GAIN_INSIDE, mask_sh),\
 	SF(MPCC0_MPCC_BOT_GAIN_OUTSIDE, MPCC_BOT_GAIN_OUTSIDE, mask_sh),\
+	SF(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_INDEX, mask_sh),\
 	SF(MPC_OUT0_CSC_MODE, MPC_OCSC_MODE, mask_sh),\
 	SF(MPC_OUT0_CSC_C11_C12_A, MPC_OCSC_C11_A, mask_sh),\
 	SF(MPC_OUT0_CSC_C11_C12_A, MPC_OCSC_C12_A, mask_sh),\
@@ -174,6 +181,19 @@
 	SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MAX_B_CB, mask_sh),\
 	SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MIN_B_CB, mask_sh)
 
+/*
+ *	DCN2 MPC_OCSC debug status register:
+ *
+ *		Status index including current OCSC Mode is 1
+ *			OCSC Mode: [1..0]
+ */
+#define MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX 1
+
+#define MPC_DEBUG_REG_LIST_SH_DCN20 \
+	.MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE = 0
+
+#define MPC_DEBUG_REG_LIST_MASK_DCN20 \
+	.MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE = 0x3
 
 #define MPC_REG_FIELD_LIST_DCN2_0(type) \
 	MPC_REG_FIELD_LIST(type)\
@@ -182,6 +202,8 @@
 	type MPCC_TOP_GAIN;\
 	type MPCC_BOT_GAIN_INSIDE;\
 	type MPCC_BOT_GAIN_OUTSIDE;\
+	type MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE;\
+	type MPC_OCSC_TEST_DEBUG_INDEX;\
 	type MPC_OCSC_MODE;\
 	type MPC_OCSC_C11_A;\
 	type MPC_OCSC_C12_A;\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c
index 40164ed015ea..023cc71fad0f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c
@@ -41,6 +41,7 @@
 void opp2_set_disp_pattern_generator(
 		struct output_pixel_processor *opp,
 		enum controller_dp_test_pattern test_pattern,
+		enum controller_dp_color_space color_space,
 		enum dc_color_depth color_depth,
 		const struct tg_color *solid_color,
 		int width,
@@ -100,9 +101,22 @@ void opp2_set_disp_pattern_generator(
 				TEST_PATTERN_DYN_RANGE_CEA :
 				TEST_PATTERN_DYN_RANGE_VESA);
 
+		switch (color_space) {
+		case CONTROLLER_DP_COLOR_SPACE_YCBCR601:
+			mode = TEST_PATTERN_MODE_COLORSQUARES_YCBCR601;
+		break;
+		case CONTROLLER_DP_COLOR_SPACE_YCBCR709:
+			mode = TEST_PATTERN_MODE_COLORSQUARES_YCBCR709;
+		break;
+		case CONTROLLER_DP_COLOR_SPACE_RGB:
+		default:
+			mode = TEST_PATTERN_MODE_COLORSQUARES_RGB;
+		break;
+		}
+
 		REG_UPDATE_6(DPG_CONTROL,
 			DPG_EN, 1,
-			DPG_MODE, TEST_PATTERN_MODE_COLORSQUARES_RGB,
+			DPG_MODE, mode,
 			DPG_DYNAMIC_RANGE, dyn_range,
 			DPG_BIT_DEPTH, bit_depth,
 			DPG_VRES, 6,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h
index abd8de9a78f8..4093bec172c1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h
@@ -140,6 +140,7 @@ void dcn20_opp_construct(struct dcn20_opp *oppn20,
 void opp2_set_disp_pattern_generator(
 	struct output_pixel_processor *opp,
 	enum controller_dp_test_pattern test_pattern,
+	enum controller_dp_color_space color_space,
 	enum dc_color_depth color_depth,
 	const struct tg_color *solid_color,
 	int width,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
index 2137e2be2140..d875b0c38fde 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
@@ -59,11 +59,16 @@ bool optc2_enable_crtc(struct timing_generator *optc)
 	REG_UPDATE(CONTROL,
 			VTG0_ENABLE, 1);
 
+	REG_SEQ_START();
+
 	/* Enable CRTC */
 	REG_UPDATE_2(OTG_CONTROL,
 			OTG_DISABLE_POINT_CNTL, 3,
 			OTG_MASTER_EN, 1);
 
+	REG_SEQ_SUBMIT();
+	REG_SEQ_WAIT_DONE();
+
 	return true;
 }
 
@@ -167,7 +172,6 @@ void optc2_set_gsl_source_select(
 	}
 }
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 /* DSC encoder frame start controls: x = h position, line_num = # of lines from vstartup */
 void optc2_set_dsc_encoder_frame_start(struct timing_generator *optc,
 					int x_position,
@@ -201,13 +205,12 @@ void optc2_set_dsc_config(struct timing_generator *optc,
 	REG_UPDATE(OPTC_WIDTH_CONTROL,
 		OPTC_DSC_SLICE_WIDTH, dsc_slice_width);
 }
-#endif
 
-/**
- * PTI i think is already done somewhere else for 2ka
- * (opp?, please double check.
- * OPTC side only has 1 register to set for PTI_ENABLE)
- */
+/*TEMP: Need to figure out inheritance model here.*/
+bool optc2_is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
+{
+	return optc1_is_two_pixels_per_containter(timing);
+}
 
 void optc2_set_odm_bypass(struct timing_generator *optc,
 		const struct dc_crtc_timing *dc_crtc_timing)
@@ -221,7 +224,7 @@ void optc2_set_odm_bypass(struct timing_generator *optc,
 			OPTC_SEG1_SRC_SEL, 0xf);
 	REG_WRITE(OTG_H_TIMING_CNTL, 0);
 
-	h_div_2 = optc1_is_two_pixels_per_containter(dc_crtc_timing);
+	h_div_2 = optc2_is_two_pixels_per_containter(dc_crtc_timing);
 	REG_UPDATE(OTG_H_TIMING_CNTL,
 			OTG_H_TIMING_DIV_BY2, h_div_2);
 	REG_SET(OPTC_MEMORY_CONFIG, 0,
@@ -233,12 +236,13 @@ void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c
 		struct dc_crtc_timing *timing)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
-	/* 2 pieces of memory required for up to 5120 displays, 4 for up to 8192 */
 	int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right)
 			/ opp_cnt;
-	int memory_mask = mpcc_hactive <= 2560 ? 0x3 : 0xf;
+	uint32_t memory_mask;
 	uint32_t data_fmt = 0;
 
+	ASSERT(opp_cnt == 2);
+
 	/* TODO: In pseudocode but does not affect maximus, delete comment if we dont need on asic
 	 * REG_SET(OTG_GLOBAL_CONTROL2, 0, GLOBAL_UPDATE_LOCK_EN, 1);
 	 * Program OTG register MASTER_UPDATE_LOCK_DB_X/Y to the position before DP frame start
@@ -246,9 +250,17 @@ void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c
 	 *		MASTER_UPDATE_LOCK_DB_X, 160,
 	 *		MASTER_UPDATE_LOCK_DB_Y, 240);
 	 */
+
+	/* 2 pieces of memory required for up to 5120 displays, 4 for up to 8192,
+	 * however, for ODM combine we can simplify by always using 4.
+	 * To make sure there's no overlap, each instance "reserves" 2 memories and
+	 * they are uniquely combined here.
+	 */
+	memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2);
+
 	if (REG(OPTC_MEMORY_CONFIG))
 		REG_SET(OPTC_MEMORY_CONFIG, 0,
-			OPTC_MEM_SEL, memory_mask << (optc->inst * 4));
+			OPTC_MEM_SEL, memory_mask);
 
 	if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
 		data_fmt = 1;
@@ -257,7 +269,6 @@ void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c
 
 	REG_UPDATE(OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, data_fmt);
 
-	ASSERT(opp_cnt == 2);
 	REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0,
 			OPTC_NUM_OF_INPUT_SEGMENT, 1,
 			OPTC_SEG0_SRC_SEL, opp_id[0],
@@ -287,6 +298,10 @@ void optc2_get_optc_source(struct timing_generator *optc,
 		*num_of_src_opp = 2;
 	else
 		*num_of_src_opp = 1;
+
+	/* Work around VBIOS not updating OPTC_NUM_OF_INPUT_SEGMENT */
+	if (*src_opp_id_1 == 0xf)
+		*num_of_src_opp = 1;
 }
 
 void optc2_set_dwb_source(struct timing_generator *optc,
@@ -375,14 +390,8 @@ void optc2_setup_manual_trigger(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
-	REG_SET(OTG_MANUAL_FLOW_CONTROL, 0,
-			MANUAL_FLOW_CONTROL, 1);
-
-	REG_SET(OTG_GLOBAL_CONTROL2, 0,
-			MANUAL_FLOW_CONTROL_SEL, optc->inst);
-
 	REG_SET_8(OTG_TRIGA_CNTL, 0,
-			OTG_TRIGA_SOURCE_SELECT, 22,
+			OTG_TRIGA_SOURCE_SELECT, 21,
 			OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst,
 			OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1,
 			OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 0,
@@ -444,9 +453,7 @@ static struct timing_generator_funcs dcn20_tg_funcs = {
 		.setup_global_swap_lock = NULL,
 		.get_crc = optc1_get_crc,
 		.configure_crc = optc1_configure_crc,
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 		.set_dsc_config = optc2_set_dsc_config,
-#endif
 		.set_dwb_source = optc2_set_dwb_source,
 		.set_odm_bypass = optc2_set_odm_bypass,
 		.set_odm_combine = optc2_set_odm_combine,
@@ -456,7 +463,7 @@ static struct timing_generator_funcs dcn20_tg_funcs = {
 		.set_vtg_params = optc1_set_vtg_params,
 		.program_manual_trigger = optc2_program_manual_trigger,
 		.setup_manual_trigger = optc2_setup_manual_trigger,
-		.is_matching_timing = optc1_is_matching_timing
+		.get_hw_timing = optc1_get_hw_timing,
 };
 
 void dcn20_timing_generator_init(struct optc *optc1)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h
index 32a58431fd09..239cc40ae474 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h
@@ -86,12 +86,10 @@ void optc2_set_gsl_source_select(struct timing_generator *optc,
 		int group_idx,
 		uint32_t gsl_ready_signal);
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 void optc2_set_dsc_config(struct timing_generator *optc,
 					enum optc_dsc_mode dsc_mode,
 					uint32_t dsc_bytes_per_pixel,
 					uint32_t dsc_slice_width);
-#endif
 
 void optc2_set_odm_bypass(struct timing_generator *optc,
 		const struct dc_crtc_timing *dc_crtc_timing);
@@ -108,6 +106,7 @@ void optc2_triplebuffer_lock(struct timing_generator *optc);
 void optc2_triplebuffer_unlock(struct timing_generator *optc);
 void optc2_lock_doublebuffer_disable(struct timing_generator *optc);
 void optc2_lock_doublebuffer_enable(struct timing_generator *optc);
+void optc2_setup_manual_trigger(struct timing_generator *optc);
 void optc2_program_manual_trigger(struct timing_generator *optc);
-
+bool optc2_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
 #endif /* __DC_OPTC_DCN20_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 6b2f2f1a1c9c..85f90f3e24cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -1,5 +1,6 @@
 /*
 * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2019 Raptor Engineering, LLC
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -28,6 +29,8 @@
 #include "dm_services.h"
 #include "dc.h"
 
+#include "dcn20_init.h"
+
 #include "resource.h"
 #include "include/irq_service_interface.h"
 #include "dcn20/dcn20_resource.h"
@@ -45,9 +48,7 @@
 #include "dcn10/dcn10_resource.h"
 #include "dcn20_opp.h"
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #include "dcn20_dsc.h"
-#endif
 
 #include "dcn20_link_encoder.h"
 #include "dcn20_stream_encoder.h"
@@ -59,11 +60,14 @@
 #include "dml/display_mode_vba.h"
 #include "dcn20_dccg.h"
 #include "dcn20_vmid.h"
+#include "dc_link_ddc.h"
 
 #include "navi10_ip_offset.h"
 
 #include "dcn/dcn_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_sh_mask.h"
+#include "dpcs/dpcs_2_0_0_offset.h"
+#include "dpcs/dpcs_2_0_0_sh_mask.h"
 
 #include "nbio/nbio_2_3_offset.h"
 
@@ -82,8 +86,6 @@
 
 #include "amdgpu_socbb.h"
 
-/* NV12 SOC BB is currently in FW, mark SW bounding box invalid. */
-#define SOC_BOUNDING_BOX_VALID false
 #define DC_LOGGER_INIT(logger)
 
 struct _vcs_dpi_ip_params_st dcn2_0_ip = {
@@ -94,11 +96,7 @@ struct _vcs_dpi_ip_params_st dcn2_0_ip = {
 	.hostvm_max_page_table_levels = 4,
 	.hostvm_cached_page_table_levels = 0,
 	.pte_group_size_bytes = 2048,
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	.num_dsc = 6,
-#else
-	.num_dsc = 0,
-#endif
 	.rob_buffer_size_kbytes = 168,
 	.det_buffer_size_kbytes = 164,
 	.dpte_buffer_size_in_pte_reqs_luma = 84,
@@ -157,6 +155,74 @@ struct _vcs_dpi_ip_params_st dcn2_0_ip = {
 	.xfc_fill_constant_bytes = 0,
 };
 
+struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip = {
+	.odm_capable = 1,
+	.gpuvm_enable = 0,
+	.hostvm_enable = 0,
+	.gpuvm_max_page_table_levels = 4,
+	.hostvm_max_page_table_levels = 4,
+	.hostvm_cached_page_table_levels = 0,
+	.num_dsc = 5,
+	.rob_buffer_size_kbytes = 168,
+	.det_buffer_size_kbytes = 164,
+	.dpte_buffer_size_in_pte_reqs_luma = 84,
+	.dpte_buffer_size_in_pte_reqs_chroma = 42,//todo
+	.dpp_output_buffer_pixels = 2560,
+	.opp_output_buffer_lines = 1,
+	.pixel_chunk_size_kbytes = 8,
+	.pte_enable = 1,
+	.max_page_table_levels = 4,
+	.pte_chunk_size_kbytes = 2,
+	.meta_chunk_size_kbytes = 2,
+	.writeback_chunk_size_kbytes = 2,
+	.line_buffer_size_bits = 789504,
+	.is_line_buffer_bpp_fixed = 0,
+	.line_buffer_fixed_bpp = 0,
+	.dcc_supported = true,
+	.max_line_buffer_lines = 12,
+	.writeback_luma_buffer_size_kbytes = 12,
+	.writeback_chroma_buffer_size_kbytes = 8,
+	.writeback_chroma_line_buffer_width_pixels = 4,
+	.writeback_max_hscl_ratio = 1,
+	.writeback_max_vscl_ratio = 1,
+	.writeback_min_hscl_ratio = 1,
+	.writeback_min_vscl_ratio = 1,
+	.writeback_max_hscl_taps = 12,
+	.writeback_max_vscl_taps = 12,
+	.writeback_line_buffer_luma_buffer_size = 0,
+	.writeback_line_buffer_chroma_buffer_size = 14643,
+	.cursor_buffer_size = 8,
+	.cursor_chunk_size = 2,
+	.max_num_otg = 5,
+	.max_num_dpp = 5,
+	.max_num_wb = 1,
+	.max_dchub_pscl_bw_pix_per_clk = 4,
+	.max_pscl_lb_bw_pix_per_clk = 2,
+	.max_lb_vscl_bw_pix_per_clk = 4,
+	.max_vscl_hscl_bw_pix_per_clk = 4,
+	.max_hscl_ratio = 8,
+	.max_vscl_ratio = 8,
+	.hscl_mults = 4,
+	.vscl_mults = 4,
+	.max_hscl_taps = 8,
+	.max_vscl_taps = 8,
+	.dispclk_ramp_margin_percent = 1,
+	.underscan_factor = 1.10,
+	.min_vblank_lines = 32, //
+	.dppclk_delay_subtotal = 77, //
+	.dppclk_delay_scl_lb_only = 16,
+	.dppclk_delay_scl = 50,
+	.dppclk_delay_cnvc_formatter = 8,
+	.dppclk_delay_cnvc_cursor = 6,
+	.dispclk_delay_subtotal = 87, //
+	.dcfclk_cstate_latency = 10, // SRExitTime
+	.max_inter_dcn_tile_repeaters = 8,
+	.xfc_supported = true,
+	.xfc_fill_bw_overhead_percent = 10.0,
+	.xfc_fill_constant_bytes = 0,
+	.ptoi_supported = 0
+};
+
 struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = {
 	/* Defaults that get patched on driver load from firmware. */
 	.clock_limits = {
@@ -485,6 +551,7 @@ static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = {
 [id] = {\
 	LE_DCN10_REG_LIST(id), \
 	UNIPHY_DCN2_REG_LIST(phyid), \
+	DPCS_DCN2_REG_LIST(id), \
 	SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \
 }
 
@@ -498,11 +565,13 @@ static const struct dcn10_link_enc_registers link_enc_regs[] = {
 };
 
 static const struct dcn10_link_enc_shift le_shift = {
-	LINK_ENCODER_MASK_SH_LIST_DCN20(__SHIFT)
+	LINK_ENCODER_MASK_SH_LIST_DCN20(__SHIFT),\
+	DPCS_DCN2_MASK_SH_LIST(__SHIFT)
 };
 
 static const struct dcn10_link_enc_mask le_mask = {
-	LINK_ENCODER_MASK_SH_LIST_DCN20(_MASK)
+	LINK_ENCODER_MASK_SH_LIST_DCN20(_MASK),\
+	DPCS_DCN2_MASK_SH_LIST(_MASK)
 };
 
 #define ipp_regs(id)\
@@ -569,6 +638,7 @@ static const struct dce110_aux_registers aux_engine_regs[] = {
 #define tf_regs(id)\
 [id] = {\
 	TF_REG_LIST_DCN20(id),\
+	TF_REG_LIST_DCN20_COMMON_APPEND(id),\
 }
 
 static const struct dcn2_dpp_registers tf_regs[] = {
@@ -581,11 +651,13 @@ static const struct dcn2_dpp_registers tf_regs[] = {
 };
 
 static const struct dcn2_dpp_shift tf_shift = {
-		TF_REG_LIST_SH_MASK_DCN20(__SHIFT)
+		TF_REG_LIST_SH_MASK_DCN20(__SHIFT),
+		TF_DEBUG_REG_LIST_SH_DCN20
 };
 
 static const struct dcn2_dpp_mask tf_mask = {
-		TF_REG_LIST_SH_MASK_DCN20(_MASK)
+		TF_REG_LIST_SH_MASK_DCN20(_MASK),
+		TF_DEBUG_REG_LIST_MASK_DCN20
 };
 
 #define dwbc_regs_dcn2(id)\
@@ -635,14 +707,17 @@ static const struct dcn20_mpc_registers mpc_regs = {
 		MPC_OUT_MUX_REG_LIST_DCN2_0(3),
 		MPC_OUT_MUX_REG_LIST_DCN2_0(4),
 		MPC_OUT_MUX_REG_LIST_DCN2_0(5),
+		MPC_DBG_REG_LIST_DCN2_0()
 };
 
 static const struct dcn20_mpc_shift mpc_shift = {
-	MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
+	MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT),
+	MPC_DEBUG_REG_LIST_SH_DCN20
 };
 
 static const struct dcn20_mpc_mask mpc_mask = {
-	MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
+	MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK),
+	MPC_DEBUG_REG_LIST_MASK_DCN20
 };
 
 #define tg_regs(id)\
@@ -732,7 +807,42 @@ static const struct dcn20_vmid_mask vmid_masks = {
 		DCN20_VMID_MASK_SH_LIST(_MASK)
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+static const struct dce110_aux_registers_shift aux_shift = {
+		DCN_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+		DCN_AUX_MASK_SH_LIST(_MASK)
+};
+
+static int map_transmitter_id_to_phy_instance(
+	enum transmitter transmitter)
+{
+	switch (transmitter) {
+	case TRANSMITTER_UNIPHY_A:
+		return 0;
+	break;
+	case TRANSMITTER_UNIPHY_B:
+		return 1;
+	break;
+	case TRANSMITTER_UNIPHY_C:
+		return 2;
+	break;
+	case TRANSMITTER_UNIPHY_D:
+		return 3;
+	break;
+	case TRANSMITTER_UNIPHY_E:
+		return 4;
+	break;
+	case TRANSMITTER_UNIPHY_F:
+		return 5;
+	break;
+	default:
+		ASSERT(0);
+		return 0;
+	}
+}
+
 #define dsc_regsDCN20(id)\
 [id] = {\
 	DSC_REG_LIST_DCN20(id)\
@@ -754,7 +864,6 @@ static const struct dcn20_dsc_shift dsc_shift = {
 static const struct dcn20_dsc_mask dsc_mask = {
 	DSC_REG_LIST_SH_MASK_DCN20(_MASK)
 };
-#endif
 
 static const struct dccg_registers dccg_regs = {
 		DCCG_REG_LIST_DCN2()
@@ -778,9 +887,7 @@ static const struct resource_caps res_cap_nv10 = {
 		.num_dwb = 1,
 		.num_ddc = 6,
 		.num_vmid = 16,
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 		.num_dsc = 6,
-#endif
 };
 
 static const struct dc_plane_cap plane_cap = {
@@ -816,6 +923,8 @@ static const struct resource_caps res_cap_nv14 = {
 		.num_pll = 5,
 		.num_dwb = 1,
 		.num_ddc = 5,
+		.num_vmid = 16,
+		.num_dsc = 5,
 };
 
 static const struct dc_debug_options debug_defaults_drv = {
@@ -825,7 +934,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 		.clock_trace = true,
 		.disable_pplib_clock_request = true,
 		.pipe_split_policy = MPC_SPLIT_DYNAMIC,
-		.force_single_disp_pipe_split = true,
+		.force_single_disp_pipe_split = false,
 		.disable_dcc = DCC_ENABLE,
 		.vsr_support = true,
 		.performance_trace = false,
@@ -922,7 +1031,10 @@ struct dce_aux *dcn20_aux_engine_create(
 
 	dce110_aux_engine_construct(aux_engine, ctx, inst,
 				    SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
-				    &aux_engine_regs[inst]);
+				    &aux_engine_regs[inst],
+					&aux_mask,
+					&aux_shift,
+					ctx->dc->caps.extended_aux_timeout_support);
 
 	return &aux_engine->base;
 }
@@ -1042,14 +1154,18 @@ struct link_encoder *dcn20_link_encoder_create(
 {
 	struct dcn20_link_encoder *enc20 =
 		kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
+	int link_regs_id;
 
 	if (!enc20)
 		return NULL;
 
+	link_regs_id =
+		map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
+
 	dcn20_link_encoder_construct(enc20,
 				      enc_init_data,
 				      &link_enc_feature,
-				      &link_enc_regs[enc_init_data->transmitter],
+				      &link_enc_regs[link_regs_id],
 				      &link_enc_aux_regs[enc_init_data->channel - 1],
 				      &link_enc_hpd_regs[enc_init_data->hpd_source],
 				      &le_shift,
@@ -1159,13 +1275,14 @@ static const struct resource_create_funcs res_create_maximus_funcs = {
 	.create_hwseq = dcn20_hwseq_create,
 };
 
+static void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu);
+
 void dcn20_clock_source_destroy(struct clock_source **clk_src)
 {
 	kfree(TO_DCE110_CLK_SRC(*clk_src));
 	*clk_src = NULL;
 }
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 
 struct display_stream_compressor *dcn20_dsc_create(
 	struct dc_context *ctx, uint32_t inst)
@@ -1188,9 +1305,8 @@ void dcn20_dsc_destroy(struct display_stream_compressor **dsc)
 	*dsc = NULL;
 }
 
-#endif
 
-static void destruct(struct dcn20_resource_pool *pool)
+static void dcn20_resource_destruct(struct dcn20_resource_pool *pool)
 {
 	unsigned int i;
 
@@ -1201,12 +1317,10 @@ static void destruct(struct dcn20_resource_pool *pool)
 		}
 	}
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
 		if (pool->base.dscs[i] != NULL)
 			dcn20_dsc_destroy(&pool->base.dscs[i]);
 	}
-#endif
 
 	if (pool->base.mpc != NULL) {
 		kfree(TO_DCN20_MPC(pool->base.mpc));
@@ -1299,6 +1413,8 @@ static void destruct(struct dcn20_resource_pool *pool)
 	if (pool->base.pp_smu != NULL)
 		dcn20_pp_smu_destroy(&pool->base.pp_smu);
 
+	if (pool->base.oem_device != NULL)
+		dal_ddc_service_destroy(&pool->base.oem_device);
 }
 
 struct hubp *dcn20_hubp_create(
@@ -1349,7 +1465,7 @@ static void get_pixel_clock_parameters(
 
 	if (opp_cnt == 4)
 		pixel_clk_params->requested_pix_clk_100hz /= 4;
-	else if (optc1_is_two_pixels_per_containter(&stream->timing) || opp_cnt == 2)
+	else if (optc2_is_two_pixels_per_containter(&stream->timing) || opp_cnt == 2)
 		pixel_clk_params->requested_pix_clk_100hz /= 2;
 
 	if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
@@ -1415,17 +1531,23 @@ enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state
 	return status;
 }
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 
 static void acquire_dsc(struct resource_context *res_ctx,
 			const struct resource_pool *pool,
-			struct display_stream_compressor **dsc)
+			struct display_stream_compressor **dsc,
+			int pipe_idx)
 {
 	int i;
 
 	ASSERT(*dsc == NULL);
 	*dsc = NULL;
 
+	if (pool->res_cap->num_dsc == pool->res_cap->num_opp) {
+		*dsc = pool->dscs[pipe_idx];
+		res_ctx->is_dsc_acquired[pipe_idx] = true;
+		return;
+	}
+
 	/* Find first free DSC */
 	for (i = 0; i < pool->res_cap->num_dsc; i++)
 		if (!res_ctx->is_dsc_acquired[i]) {
@@ -1449,11 +1571,9 @@ static void release_dsc(struct resource_context *res_ctx,
 		}
 }
 
-#endif
 
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-static enum dc_status add_dsc_to_stream_resource(struct dc *dc,
+enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc,
 		struct dc_state *dc_ctx,
 		struct dc_stream_state *dc_stream)
 {
@@ -1468,11 +1588,13 @@ static enum dc_status add_dsc_to_stream_resource(struct dc *dc,
 		if (pipe_ctx->stream != dc_stream)
 			continue;
 
-		acquire_dsc(&dc_ctx->res_ctx, pool, &pipe_ctx->stream_res.dsc);
+		if (pipe_ctx->stream_res.dsc)
+			continue;
+
+		acquire_dsc(&dc_ctx->res_ctx, pool, &pipe_ctx->stream_res.dsc, i);
 
 		/* The number of DSCs can be less than the number of pipes */
 		if (!pipe_ctx->stream_res.dsc) {
-			dm_output_to_console("No DSCs available\n");
 			result = DC_NO_DSC_RESOURCE;
 		}
 
@@ -1504,7 +1626,6 @@ static enum dc_status remove_dsc_from_stream_resource(struct dc *dc,
 	else
 		return DC_OK;
 }
-#endif
 
 
 enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream)
@@ -1516,11 +1637,9 @@ enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx,
 	if (result == DC_OK)
 		result = resource_map_phy_clock_resources(dc, new_ctx, dc_stream);
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	/* Get a DSC if required and available */
 	if (result == DC_OK && dc_stream->timing.flags.DSC)
-		result = add_dsc_to_stream_resource(dc, new_ctx, dc_stream);
-#endif
+		result = dcn20_add_dsc_to_stream_resource(dc, new_ctx, dc_stream);
 
 	if (result == DC_OK)
 		result = dcn20_build_mapped_resource(dc, new_ctx, dc_stream);
@@ -1533,9 +1652,7 @@ enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_
 {
 	enum dc_status result = DC_OK;
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	result = remove_dsc_from_stream_resource(dc, new_ctx, dc_stream);
-#endif
 
 	return result;
 }
@@ -1601,7 +1718,7 @@ static void swizzle_to_dml_params(
 	}
 }
 
-static bool dcn20_split_stream_for_odm(
+bool dcn20_split_stream_for_odm(
 		struct resource_context *res_ctx,
 		const struct resource_pool *pool,
 		struct pipe_ctx *prev_odm_pipe,
@@ -1618,11 +1735,8 @@ static bool dcn20_split_stream_for_odm(
 	next_odm_pipe->plane_res.xfm = pool->transforms[next_odm_pipe->pipe_idx];
 	next_odm_pipe->plane_res.dpp = pool->dpps[next_odm_pipe->pipe_idx];
 	next_odm_pipe->plane_res.mpcc_inst = pool->dpps[next_odm_pipe->pipe_idx]->inst;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	next_odm_pipe->stream_res.dsc = NULL;
-#endif
 	if (prev_odm_pipe->next_odm_pipe && prev_odm_pipe->next_odm_pipe != next_odm_pipe) {
-		ASSERT(!next_odm_pipe->next_odm_pipe);
 		next_odm_pipe->next_odm_pipe = prev_odm_pipe->next_odm_pipe;
 		next_odm_pipe->next_odm_pipe->prev_odm_pipe = next_odm_pipe;
 	}
@@ -1667,19 +1781,17 @@ static bool dcn20_split_stream_for_odm(
 		sd->recout.x = 0;
 	}
 	next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx];
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	if (next_odm_pipe->stream->timing.flags.DSC == 1) {
-		acquire_dsc(res_ctx, pool, &next_odm_pipe->stream_res.dsc);
+		acquire_dsc(res_ctx, pool, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx);
 		ASSERT(next_odm_pipe->stream_res.dsc);
 		if (next_odm_pipe->stream_res.dsc == NULL)
 			return false;
 	}
-#endif
 
 	return true;
 }
 
-static void dcn20_split_stream_for_mpc(
+void dcn20_split_stream_for_mpc(
 		struct resource_context *res_ctx,
 		const struct resource_pool *pool,
 		struct pipe_ctx *primary_pipe,
@@ -1698,9 +1810,7 @@ static void dcn20_split_stream_for_mpc(
 	secondary_pipe->plane_res.xfm = pool->transforms[secondary_pipe->pipe_idx];
 	secondary_pipe->plane_res.dpp = pool->dpps[secondary_pipe->pipe_idx];
 	secondary_pipe->plane_res.mpcc_inst = pool->dpps[secondary_pipe->pipe_idx]->inst;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	secondary_pipe->stream_res.dsc = NULL;
-#endif
 	if (primary_pipe->bottom_pipe && primary_pipe->bottom_pipe != secondary_pipe) {
 		ASSERT(!secondary_pipe->bottom_pipe);
 		secondary_pipe->bottom_pipe = primary_pipe->bottom_pipe;
@@ -1751,11 +1861,28 @@ void dcn20_populate_dml_writeback_from_context(
 
 }
 
+static int get_num_odm_heads(struct pipe_ctx *pipe)
+{
+	int odm_head_count = 0;
+	struct pipe_ctx *next_pipe = pipe->next_odm_pipe;
+	while (next_pipe) {
+		odm_head_count++;
+		next_pipe = next_pipe->next_odm_pipe;
+	}
+	pipe = pipe->prev_odm_pipe;
+	while (pipe) {
+		odm_head_count++;
+		pipe = pipe->prev_odm_pipe;
+	}
+	return odm_head_count ? odm_head_count + 1 : 0;
+}
+
 int dcn20_populate_dml_pipes_from_context(
-		struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
+		struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes)
 {
 	int pipe_cnt, i;
 	bool synchronized_vblank = true;
+	struct resource_context *res_ctx = &context->res_ctx;
 
 	for (i = 0, pipe_cnt = -1; i < dc->res_pool->pipe_count; i++) {
 		if (!res_ctx->pipe_ctx[i].stream)
@@ -1765,7 +1892,7 @@ int dcn20_populate_dml_pipes_from_context(
 			pipe_cnt = i;
 			continue;
 		}
-		if (!resource_are_streams_timing_synchronizable(
+		if (dc->debug.disable_timing_sync || !resource_are_streams_timing_synchronizable(
 				res_ctx->pipe_ctx[pipe_cnt].stream,
 				res_ctx->pipe_ctx[i].stream)) {
 			synchronized_vblank = false;
@@ -1775,25 +1902,30 @@ int dcn20_populate_dml_pipes_from_context(
 
 	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
 		struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing;
+		unsigned int v_total;
+		unsigned int front_porch;
 		int output_bpc;
 
 		if (!res_ctx->pipe_ctx[i].stream)
 			continue;
+
+		v_total = timing->v_total;
+		front_porch = timing->v_front_porch;
 		/* todo:
 		pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0;
 		pipes[pipe_cnt].pipe.src.dcc = 0;
 		pipes[pipe_cnt].pipe.src.vm = 0;*/
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+		pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+
 		pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC;
 		/* todo: rotation?*/
 		pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h;
-#endif
 		if (res_ctx->pipe_ctx[i].stream->use_dynamic_meta) {
 			pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = true;
 			/* 1/2 vblank */
 			pipes[pipe_cnt].pipe.src.dynamic_metadata_lines_before_active =
-				(timing->v_total - timing->v_addressable
+				(v_total - timing->v_addressable
 					- timing->v_border_top - timing->v_border_bottom) / 2;
 			/* 36 bytes dp, 32 hdmi */
 			pipes[pipe_cnt].pipe.src.dynamic_metadata_xmit_bytes =
@@ -1807,13 +1939,13 @@ int dcn20_populate_dml_pipes_from_context(
 				- timing->h_addressable
 				- timing->h_border_left
 				- timing->h_border_right;
-		pipes[pipe_cnt].pipe.dest.vblank_start = timing->v_total - timing->v_front_porch;
+		pipes[pipe_cnt].pipe.dest.vblank_start = v_total - front_porch;
 		pipes[pipe_cnt].pipe.dest.vblank_end = pipes[pipe_cnt].pipe.dest.vblank_start
 				- timing->v_addressable
 				- timing->v_border_top
 				- timing->v_border_bottom;
 		pipes[pipe_cnt].pipe.dest.htotal = timing->h_total;
-		pipes[pipe_cnt].pipe.dest.vtotal = timing->v_total;
+		pipes[pipe_cnt].pipe.dest.vtotal = v_total;
 		pipes[pipe_cnt].pipe.dest.hactive = timing->h_addressable;
 		pipes[pipe_cnt].pipe.dest.vactive = timing->v_addressable;
 		pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE;
@@ -1824,8 +1956,13 @@ int dcn20_populate_dml_pipes_from_context(
 		pipes[pipe_cnt].dout.dp_lanes = 4;
 		pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min;
 		pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max;
-		pipes[pipe_cnt].pipe.dest.odm_combine = res_ctx->pipe_ctx[i].prev_odm_pipe
-							|| res_ctx->pipe_ctx[i].next_odm_pipe;
+		switch (get_num_odm_heads(&res_ctx->pipe_ctx[i])) {
+		case 2:
+			pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_2to1;
+			break;
+		default:
+			pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_disabled;
+		}
 		pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx;
 		if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state
 				== res_ctx->pipe_ctx[i].plane_state)
@@ -1876,14 +2013,12 @@ int dcn20_populate_dml_pipes_from_context(
 		case COLOR_DEPTH_161616:
 			output_bpc = 16;
 			break;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 		case COLOR_DEPTH_999:
 			output_bpc = 9;
 			break;
 		case COLOR_DEPTH_111111:
 			output_bpc = 11;
 			break;
-#endif
 		default:
 			output_bpc = 8;
 			break;
@@ -1897,7 +2032,7 @@ int dcn20_populate_dml_pipes_from_context(
 			break;
 		case PIXEL_ENCODING_YCBCR420:
 			pipes[pipe_cnt].dout.output_format = dm_420;
-			pipes[pipe_cnt].dout.output_bpp = (output_bpc * 3) / 2;
+			pipes[pipe_cnt].dout.output_bpp = (output_bpc * 3.0) / 2;
 			break;
 		case PIXEL_ENCODING_YCBCR422:
 			if (true) /* todo */
@@ -1911,6 +2046,9 @@ int dcn20_populate_dml_pipes_from_context(
 			pipes[pipe_cnt].dout.output_bpp = output_bpc * 3;
 		}
 
+		if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC)
+			pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0;
+
 		/* todo: default max for now, until there is logic reflecting this in dc*/
 		pipes[pipe_cnt].dout.output_bpc = 12;
 		/*
@@ -1933,6 +2071,10 @@ int dcn20_populate_dml_pipes_from_context(
 			pipes[pipe_cnt].pipe.src.viewport_height = timing->v_addressable;
 			if (pipes[pipe_cnt].pipe.src.viewport_height > 1080)
 				pipes[pipe_cnt].pipe.src.viewport_height = 1080;
+			pipes[pipe_cnt].pipe.src.surface_height_y = pipes[pipe_cnt].pipe.src.viewport_height;
+			pipes[pipe_cnt].pipe.src.surface_width_y = pipes[pipe_cnt].pipe.src.viewport_width;
+			pipes[pipe_cnt].pipe.src.surface_height_c = pipes[pipe_cnt].pipe.src.viewport_height;
+			pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width;
 			pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 63) / 64) * 64; /* linear sw only */
 			pipes[pipe_cnt].pipe.src.source_format = dm_444_32;
 			pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/
@@ -1947,8 +2089,8 @@ int dcn20_populate_dml_pipes_from_context(
 			pipes[pipe_cnt].pipe.scale_taps.vtaps = 1;
 			pipes[pipe_cnt].pipe.src.is_hsplit = 0;
 			pipes[pipe_cnt].pipe.dest.odm_combine = 0;
-			pipes[pipe_cnt].pipe.dest.vtotal_min = timing->v_total;
-			pipes[pipe_cnt].pipe.dest.vtotal_max = timing->v_total;
+			pipes[pipe_cnt].pipe.dest.vtotal_min = v_total;
+			pipes[pipe_cnt].pipe.dest.vtotal_max = v_total;
 		} else {
 			struct dc_plane_state *pln = res_ctx->pipe_ctx[i].plane_state;
 			struct scaler_data *scl = &res_ctx->pipe_ctx[i].plane_res.scl_data;
@@ -1966,6 +2108,10 @@ int dcn20_populate_dml_pipes_from_context(
 			pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width;
 			pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height;
 			pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c.height;
+			pipes[pipe_cnt].pipe.src.surface_width_y = pln->plane_size.surface_size.width;
+			pipes[pipe_cnt].pipe.src.surface_height_y = pln->plane_size.surface_size.height;
+			pipes[pipe_cnt].pipe.src.surface_width_c = pln->plane_size.chroma_size.width;
+			pipes[pipe_cnt].pipe.src.surface_height_c = pln->plane_size.chroma_size.height;
 			if (pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
 				pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch;
 				pipes[pipe_cnt].pipe.src.data_pitch_c = pln->plane_size.chroma_pitch;
@@ -2131,8 +2277,7 @@ void dcn20_set_mcif_arb_params(
 	}
 }
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-static bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx)
+bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx)
 {
 	int i;
 
@@ -2165,9 +2310,8 @@ static bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx)
 	}
 	return true;
 }
-#endif
 
-static struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc,
+struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc,
 		struct resource_context *res_ctx,
 		const struct resource_pool *pool,
 		const struct pipe_ctx *primary_pipe)
@@ -2207,7 +2351,8 @@ static struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc,
 		 */
 		if (secondary_pipe == NULL) {
 			for (j = dc->res_pool->pipe_count - 1; j >= 0; j--) {
-				if (dc->current_state->res_ctx.pipe_ctx[j].top_pipe == NULL) {
+				if (dc->current_state->res_ctx.pipe_ctx[j].top_pipe == NULL
+						&& dc->current_state->res_ctx.pipe_ctx[j].prev_odm_pipe == NULL) {
 					preferred_pipe_idx = j;
 
 					if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) {
@@ -2243,29 +2388,11 @@ static struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc,
 	return secondary_pipe;
 }
 
-bool dcn20_fast_validate_bw(
+void dcn20_merge_pipes_for_validate(
 		struct dc *dc,
-		struct dc_state *context,
-		display_e2e_pipe_params_st *pipes,
-		int *pipe_cnt_out,
-		int *pipe_split_from,
-		int *vlevel_out)
+		struct dc_state *context)
 {
-	bool out = false;
-
-	int pipe_cnt, i, pipe_idx, vlevel, vlevel_unsplit;
-	bool odm_capable = context->bw_ctx.dml.ip.odm_capable;
-	bool force_split = false;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-	bool failed_non_odm_dsc = false;
-#endif
-	int split_threshold = dc->res_pool->pipe_count / 2;
-	bool avoid_split = dc->debug.pipe_split_policy != MPC_SPLIT_DYNAMIC;
-
-
-	ASSERT(pipes);
-	if (!pipes)
-		return false;
+	int i;
 
 	/* merge previously split odm pipes since mode support needs to make the decision */
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -2285,10 +2412,8 @@ bool dcn20_fast_validate_bw(
 			odm_pipe->bottom_pipe = NULL;
 			odm_pipe->prev_odm_pipe = NULL;
 			odm_pipe->next_odm_pipe = NULL;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 			if (odm_pipe->stream_res.dsc)
 				release_dsc(&context->res_ctx, dc->res_pool, &odm_pipe->stream_res.dsc);
-#endif
 			/* Clear plane_res and stream_res */
 			memset(&odm_pipe->plane_res, 0, sizeof(odm_pipe->plane_res));
 			memset(&odm_pipe->stream_res, 0, sizeof(odm_pipe->stream_res));
@@ -2320,51 +2445,19 @@ bool dcn20_fast_validate_bw(
 		if (pipe->plane_state)
 			resource_build_scaling_params(pipe);
 	}
+}
 
-	if (dc->res_pool->funcs->populate_dml_pipes)
-		pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc,
-			&context->res_ctx, pipes);
-	else
-		pipe_cnt = dcn20_populate_dml_pipes_from_context(dc,
-			&context->res_ctx, pipes);
-
-	*pipe_cnt_out = pipe_cnt;
-
-	if (!pipe_cnt) {
-		out = true;
-		goto validate_out;
-	}
-
-	context->bw_ctx.dml.ip.odm_capable = 0;
-
-	vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
-
-	context->bw_ctx.dml.ip.odm_capable = odm_capable;
-
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-	/* 1 dsc per stream dsc validation */
-	if (vlevel <= context->bw_ctx.dml.soc.num_states)
-		if (!dcn20_validate_dsc(dc, context)) {
-			failed_non_odm_dsc = true;
-			vlevel = context->bw_ctx.dml.soc.num_states + 1;
-		}
-#endif
-
-	if (vlevel > context->bw_ctx.dml.soc.num_states && odm_capable)
-		vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
-
-	if (vlevel > context->bw_ctx.dml.soc.num_states)
-		goto validate_fail;
-
-	if ((context->stream_count > split_threshold && dc->current_state->stream_count <= split_threshold)
-		|| (context->stream_count <= split_threshold && dc->current_state->stream_count > split_threshold))
-		context->commit_hints.full_update_needed = true;
-
-	/*initialize pipe_just_split_from to invalid idx*/
-	for (i = 0; i < MAX_PIPES; i++)
-		pipe_split_from[i] = -1;
+int dcn20_validate_apply_pipe_split_flags(
+		struct dc *dc,
+		struct dc_state *context,
+		int vlevel,
+		bool *split)
+{
+	int i, pipe_idx, vlevel_split;
+	bool force_split = false;
+	bool avoid_split = dc->debug.pipe_split_policy != MPC_SPLIT_DYNAMIC;
 
-	/* Single display only conditionals get set here */
+	/* Single display loop, exits if there is more than one display */
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		bool exit_loop = false;
@@ -2391,38 +2484,107 @@ bool dcn20_fast_validate_bw(
 		if (exit_loop)
 			break;
 	}
-
-	if (context->stream_count > split_threshold)
+	/* TODO: fix dc bugs and remove this split threshold thing */
+	if (context->stream_count > dc->res_pool->pipe_count / 2)
 		avoid_split = true;
 
-	vlevel_unsplit = vlevel;
+	/* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */
+	if (avoid_split) {
+		for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+			if (!context->res_ctx.pipe_ctx[i].stream)
+				continue;
+
+			for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++)
+				if (context->bw_ctx.dml.vba.NoOfDPP[vlevel][0][pipe_idx] == 1)
+					break;
+			/* Impossible to not split this pipe */
+			if (vlevel > context->bw_ctx.dml.soc.num_states)
+				vlevel = vlevel_split;
+			pipe_idx++;
+		}
+		context->bw_ctx.dml.vba.maxMpcComb = 0;
+	}
+
+	/* Split loop sets which pipe should be split based on dml outputs and dc flags */
 	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
 		if (!context->res_ctx.pipe_ctx[i].stream)
 			continue;
-		for (; vlevel_unsplit <= context->bw_ctx.dml.soc.num_states; vlevel_unsplit++)
-			if (context->bw_ctx.dml.vba.NoOfDPP[vlevel_unsplit][0][pipe_idx] == 1)
-				break;
+
+		if (force_split || context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] > 1)
+			split[i] = true;
+		if ((pipe->stream->view_format ==
+				VIEW_3D_FORMAT_SIDE_BY_SIDE ||
+				pipe->stream->view_format ==
+				VIEW_3D_FORMAT_TOP_AND_BOTTOM) &&
+				(pipe->stream->timing.timing_3d_format ==
+				TIMING_3D_FORMAT_TOP_AND_BOTTOM ||
+				 pipe->stream->timing.timing_3d_format ==
+				TIMING_3D_FORMAT_SIDE_BY_SIDE))
+			split[i] = true;
+		if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) {
+			split[i] = true;
+			context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = dm_odm_combine_mode_2to1;
+		}
+		context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx] =
+			context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx];
+		/* Adjust dppclk when split is forced, do not bother with dispclk */
+		if (split[i] && context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] == 1)
+			context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] /= 2;
 		pipe_idx++;
 	}
 
+	return vlevel;
+}
+
+bool dcn20_fast_validate_bw(
+		struct dc *dc,
+		struct dc_state *context,
+		display_e2e_pipe_params_st *pipes,
+		int *pipe_cnt_out,
+		int *pipe_split_from,
+		int *vlevel_out)
+{
+	bool out = false;
+	bool split[MAX_PIPES] = { false };
+	int pipe_cnt, i, pipe_idx, vlevel;
+
+	ASSERT(pipes);
+	if (!pipes)
+		return false;
+
+	dcn20_merge_pipes_for_validate(dc, context);
+
+	pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes);
+
+	*pipe_cnt_out = pipe_cnt;
+
+	if (!pipe_cnt) {
+		out = true;
+		goto validate_out;
+	}
+
+	vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+	if (vlevel > context->bw_ctx.dml.soc.num_states)
+		goto validate_fail;
+
+	vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split);
+
+	/*initialize pipe_just_split_from to invalid idx*/
+	for (i = 0; i < MAX_PIPES; i++)
+		pipe_split_from[i] = -1;
+
 	for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe;
-		bool need_split = true;
-		bool need_split3d;
 
 		if (!pipe->stream || pipe_split_from[i] >= 0)
 			continue;
 
 		pipe_idx++;
 
-		if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) {
-			force_split = true;
-			context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx] = true;
-			context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = true;
-		}
-		if (force_split && context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] == 1)
-			context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] /= 2;
 		if (!pipe->top_pipe && !pipe->plane_state && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) {
 			hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe);
 			ASSERT(hsplit_pipe);
@@ -2440,40 +2602,26 @@ bool dcn20_fast_validate_bw(
 		if (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state)
 			continue;
 
-		need_split3d = ((pipe->stream->view_format ==
-				VIEW_3D_FORMAT_SIDE_BY_SIDE ||
-				pipe->stream->view_format ==
-				VIEW_3D_FORMAT_TOP_AND_BOTTOM) &&
-				(pipe->stream->timing.timing_3d_format ==
-				TIMING_3D_FORMAT_TOP_AND_BOTTOM ||
-				 pipe->stream->timing.timing_3d_format ==
-				TIMING_3D_FORMAT_SIDE_BY_SIDE));
-
-		if (avoid_split && vlevel_unsplit <= context->bw_ctx.dml.soc.num_states && !force_split && !need_split3d) {
-			need_split = false;
-			vlevel = vlevel_unsplit;
-			context->bw_ctx.dml.vba.maxMpcComb = 0;
-		} else
-			need_split = context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] == 2;
-
 		/* We do not support mpo + odm at the moment */
 		if (hsplit_pipe && hsplit_pipe->plane_state != pipe->plane_state
 				&& context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx])
 			goto validate_fail;
 
-		if (need_split3d || need_split || force_split) {
+		if (split[i]) {
 			if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) {
 				/* pipe not split previously needs split */
 				hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe);
-				ASSERT(hsplit_pipe || force_split);
-				if (!hsplit_pipe)
+				ASSERT(hsplit_pipe);
+				if (!hsplit_pipe) {
+					context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] *= 2;
 					continue;
-
+				}
 				if (context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) {
 					if (!dcn20_split_stream_for_odm(
 							&context->res_ctx, dc->res_pool,
 							pipe, hsplit_pipe))
 						goto validate_fail;
+					dcn20_build_mapped_resource(dc, context, pipe->stream);
 				} else
 					dcn20_split_stream_for_mpc(
 						&context->res_ctx, dc->res_pool,
@@ -2485,14 +2633,12 @@ bool dcn20_fast_validate_bw(
 			ASSERT(0);
 		}
 	}
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	/* Actual dsc count per stream dsc validation*/
-	if (failed_non_odm_dsc && !dcn20_validate_dsc(dc, context)) {
+	if (!dcn20_validate_dsc(dc, context)) {
 		context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] =
 				DML_FAIL_DSC_VALIDATION_FAILURE;
 		goto validate_fail;
 	}
-#endif
 
 	*vlevel_out = vlevel;
 
@@ -2506,7 +2652,7 @@ validate_out:
 	return out;
 }
 
-void dcn20_calculate_wm(
+static void dcn20_calculate_wm(
 		struct dc *dc, struct dc_state *context,
 		display_e2e_pipe_params_st *pipes,
 		int *out_pipe_cnt,
@@ -2527,7 +2673,7 @@ void dcn20_calculate_wm(
 					context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx];
 			if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_idx] == pipe_idx)
 				pipes[pipe_cnt].pipe.dest.odm_combine =
-						context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx];
+						context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx];
 			else
 				pipes[pipe_cnt].pipe.dest.odm_combine = 0;
 			pipe_idx++;
@@ -2536,7 +2682,7 @@ void dcn20_calculate_wm(
 					context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_split_from[i]];
 			if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_split_from[i]] == pipe_split_from[i])
 				pipes[pipe_cnt].pipe.dest.odm_combine =
-						context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_split_from[i]];
+						context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_split_from[i]];
 			else
 				pipes[pipe_cnt].pipe.dest.odm_combine = 0;
 		}
@@ -2556,10 +2702,10 @@ void dcn20_calculate_wm(
 	if (pipe_cnt != pipe_idx) {
 		if (dc->res_pool->funcs->populate_dml_pipes)
 			pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc,
-				&context->res_ctx, pipes);
+				context, pipes);
 		else
 			pipe_cnt = dcn20_populate_dml_pipes_from_context(dc,
-				&context->res_ctx, pipes);
+				context, pipes);
 	}
 
 	*out_pipe_cnt = pipe_cnt;
@@ -2579,6 +2725,9 @@ void dcn20_calculate_wm(
 	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 	context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 
 	if (vlevel < 2) {
 		pipes[0].clks_cfg.voltage = 2;
@@ -2590,6 +2739,8 @@ void dcn20_calculate_wm(
 	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 	context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 
 	if (vlevel < 3) {
 		pipes[0].clks_cfg.voltage = 3;
@@ -2601,6 +2752,8 @@ void dcn20_calculate_wm(
 	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 	context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 
 	pipes[0].clks_cfg.voltage = vlevel;
 	pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].dcfclk_mhz;
@@ -2610,6 +2763,8 @@ void dcn20_calculate_wm(
 	context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 	context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 	context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 }
 
 void dcn20_calculate_dlg_params(
@@ -2629,7 +2784,7 @@ void dcn20_calculate_dlg_params(
 	context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000;
 	context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16;
 	context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000;
-	context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
+	context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000;
 	context->bw_ctx.bw.dcn.clk.p_state_change_support =
 		context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
 							!= dm_dram_clock_change_unsupported;
@@ -2645,8 +2800,8 @@ void dcn20_calculate_dlg_params(
 			continue;
 
 		if (!visited[pipe_idx]) {
-			display_pipe_source_params_st *src = &pipes[pipe_idx_unsplit].pipe.src;
-			display_pipe_dest_params_st *dst = &pipes[pipe_idx_unsplit].pipe.dest;
+			display_pipe_source_params_st *src = &pipes[pipe_idx].pipe.src;
+			display_pipe_dest_params_st *dst = &pipes[pipe_idx].pipe.dest;
 
 			dst->vstartup_start = context->bw_ctx.dml.vba.VStartup[pipe_idx_unsplit];
 			dst->vupdate_offset = context->bw_ctx.dml.vba.VUpdateOffsetPix[pipe_idx_unsplit];
@@ -2775,11 +2930,19 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
 	bool voltage_supported = false;
 	bool full_pstate_supported = false;
 	bool dummy_pstate_supported = false;
-	double p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us;
+	double p_state_latency_us;
 
-	if (fast_validate)
-		return dcn20_validate_bandwidth_internal(dc, context, true);
+	DC_FP_START();
+	p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us;
+	context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support =
+		dc->debug.disable_dram_clock_change_vactive_support;
 
+	if (fast_validate) {
+		voltage_supported = dcn20_validate_bandwidth_internal(dc, context, true);
+
+		DC_FP_END();
+		return voltage_supported;
+	}
 
 	// Best case, we support full UCLK switch latency
 	voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false);
@@ -2787,7 +2950,7 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
 
 	if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 ||
 		(voltage_supported && full_pstate_supported)) {
-		context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
+		context->bw_ctx.bw.dcn.clk.p_state_change_support = full_pstate_supported;
 		goto restore_dml_state;
 	}
 
@@ -2806,9 +2969,9 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
 	ASSERT(false);
 
 restore_dml_state:
-	memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
 	context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us;
 
+	DC_FP_END();
 	return voltage_supported;
 }
 
@@ -2853,7 +3016,7 @@ static void dcn20_destroy_resource_pool(struct resource_pool **pool)
 {
 	struct dcn20_resource_pool *dcn20_pool = TO_DCN20_RES_POOL(*pool);
 
-	destruct(dcn20_pool);
+	dcn20_resource_destruct(dcn20_pool);
 	kfree(dcn20_pool);
 	*pool = NULL;
 }
@@ -2892,6 +3055,7 @@ static struct resource_funcs dcn20_res_pool_funcs = {
 	.populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context,
 	.get_default_swizzle_mode = dcn20_get_default_swizzle_mode,
 	.set_mcif_arb_params = dcn20_set_mcif_arb_params,
+	.populate_dml_pipes = dcn20_populate_dml_pipes_from_context,
 	.find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link
 };
 
@@ -2900,8 +3064,6 @@ bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
 	int i;
 	uint32_t pipe_count = pool->res_cap->num_dwb;
 
-	ASSERT(pipe_count > 0);
-
 	for (i = 0; i < pipe_count; i++) {
 		struct dcn20_dwbc *dwbc20 = kzalloc(sizeof(struct dcn20_dwbc),
 						    GFP_KERNEL);
@@ -2947,7 +3109,7 @@ bool dcn20_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool)
 	return true;
 }
 
-struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx)
+static struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx)
 {
 	struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL);
 
@@ -2962,7 +3124,7 @@ struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx)
 	return pp_smu;
 }
 
-void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu)
+static void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu)
 {
 	if (pp_smu && *pp_smu) {
 		kfree(*pp_smu);
@@ -2970,7 +3132,7 @@ void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu)
 	}
 }
 
-static void cap_soc_clocks(
+void dcn20_cap_soc_clocks(
 		struct _vcs_dpi_soc_bounding_box_st *bb,
 		struct pp_smu_nv_clock_table max_clocks)
 {
@@ -3037,10 +3199,10 @@ static void cap_soc_clocks(
 	}
 }
 
-static void update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb,
+void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb,
 		struct pp_smu_nv_clock_table *max_clocks, unsigned int *uclk_states, unsigned int num_states)
 {
-	struct _vcs_dpi_voltage_scaling_st calculated_states[MAX_CLOCK_LIMIT_STATES] = {0};
+	struct _vcs_dpi_voltage_scaling_st calculated_states[MAX_CLOCK_LIMIT_STATES];
 	int i;
 	int num_calculated_states = 0;
 	int min_dcfclk = 0;
@@ -3048,12 +3210,18 @@ static void update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_
 	if (num_states == 0)
 		return;
 
+	memset(calculated_states, 0, sizeof(calculated_states));
+
 	if (dc->bb_overrides.min_dcfclk_mhz > 0)
 		min_dcfclk = dc->bb_overrides.min_dcfclk_mhz;
-	else
-		// Accounting for SOC/DCF relationship, we can go as high as
-		// 506Mhz in Vmin.  We need to code 507 since SMU will round down to 506.
-		min_dcfclk = 507;
+	else {
+		if (ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev))
+			min_dcfclk = 310;
+		else
+			// Accounting for SOC/DCF relationship, we can go as high as
+			// 506Mhz in Vmin.
+			min_dcfclk = 506;
+	}
 
 	for (i = 0; i < num_states; i++) {
 		int min_fclk_required_by_uclk;
@@ -3093,9 +3261,8 @@ static void update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_
 	bb->clock_limits[num_calculated_states].state = bb->num_states;
 }
 
-static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb)
+void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb)
 {
-	kernel_fpu_begin();
 	if ((int)(bb->sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
 			&& dc->bb_overrides.sr_exit_time_ns) {
 		bb->sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
@@ -3119,7 +3286,6 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s
 		bb->dram_clock_change_latency_us =
 				dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
 	}
-	kernel_fpu_end();
 }
 
 static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb(
@@ -3134,6 +3300,10 @@ static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb(
 static struct _vcs_dpi_ip_params_st *get_asic_rev_ip_params(
 	uint32_t hw_internal_rev)
 {
+	/* NV14 */
+	if (ASICREV_IS_NAVI14_M(hw_internal_rev))
+		return &dcn2_0_nv14_ip;
+
 	/* NV12 and NV10 */
 	return &dcn2_0_ip;
 }
@@ -3157,12 +3327,13 @@ static bool init_soc_bounding_box(struct dc *dc,
 
 	DC_LOGGER_INIT(dc->ctx->logger);
 
-	if (!bb && !SOC_BOUNDING_BOX_VALID) {
+	/* TODO: upstream NV12 bounding box when its launched */
+	if (!bb && ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev)) {
 		DC_LOG_ERROR("%s: not valid soc bounding box/n", __func__);
 		return false;
 	}
 
-	if (bb && !SOC_BOUNDING_BOX_VALID) {
+	if (bb && ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev)) {
 		int i;
 
 		dcn2_0_nv12_soc.sr_exit_time_us =
@@ -3292,19 +3463,19 @@ static bool init_soc_bounding_box(struct dc *dc,
 		}
 
 		if (clock_limits_available && uclk_states_available && num_states)
-			update_bounding_box(dc, loaded_bb, &max_clocks, uclk_states, num_states);
+			dcn20_update_bounding_box(dc, loaded_bb, &max_clocks, uclk_states, num_states);
 		else if (clock_limits_available)
-			cap_soc_clocks(loaded_bb, max_clocks);
+			dcn20_cap_soc_clocks(loaded_bb, max_clocks);
 	}
 
 	loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator;
 	loaded_ip->max_num_dpp = pool->base.pipe_count;
-	patch_bounding_box(dc, loaded_bb);
+	dcn20_patch_bounding_box(dc, loaded_bb);
 
 	return true;
 }
 
-static bool construct(
+static bool dcn20_resource_construct(
 	uint8_t num_virtual_links,
 	struct dc *dc,
 	struct dcn20_resource_pool *pool)
@@ -3312,6 +3483,7 @@ static bool construct(
 	int i;
 	struct dc_context *ctx = dc->ctx;
 	struct irq_service_init_data init_data;
+	struct ddc_service_init_data ddc_init_data;
 	struct _vcs_dpi_soc_bounding_box_st *loaded_bb =
 			get_asic_rev_soc_bb(ctx->asic_id.hw_internal_rev);
 	struct _vcs_dpi_ip_params_st *loaded_ip =
@@ -3319,6 +3491,8 @@ static bool construct(
 	enum dml_project dml_project_version =
 			get_dml_project_version(ctx->asic_id.hw_internal_rev);
 
+	DC_FP_START();
+
 	ctx->dc_bios->regs = &bios_regs;
 	pool->base.funcs = &dcn20_res_pool_funcs;
 
@@ -3345,6 +3519,7 @@ static bool construct(
 	dc->caps.post_blend_color_processing = true;
 	dc->caps.force_dp_tps4_for_cp2520 = true;
 	dc->caps.hw_3d_lut = true;
+	dc->caps.extended_aux_timeout_support = true;
 
 	if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) {
 		dc->debug = debug_defaults_drv;
@@ -3570,7 +3745,6 @@ static bool construct(
 		goto create_fail;
 	}
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
 		pool->base.dscs[i] = dcn20_dsc_create(ctx, i);
 		if (pool->base.dscs[i] == NULL) {
@@ -3579,7 +3753,6 @@ static bool construct(
 			goto create_fail;
 		}
 	}
-#endif
 
 	if (!dcn20_dwbc_create(ctx, &pool->base)) {
 		BREAK_TO_DEBUGGER();
@@ -3606,11 +3779,24 @@ static bool construct(
 
 	dc->cap_funcs = cap_funcs;
 
+	if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
+		ddc_init_data.ctx = dc->ctx;
+		ddc_init_data.link = NULL;
+		ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id;
+		ddc_init_data.id.enum_id = 0;
+		ddc_init_data.id.type = OBJECT_TYPE_GENERIC;
+		pool->base.oem_device = dal_ddc_service_create(&ddc_init_data);
+	} else {
+		pool->base.oem_device = NULL;
+	}
+
+	DC_FP_END();
 	return true;
 
 create_fail:
 
-	destruct(pool);
+	DC_FP_END();
+	dcn20_resource_destruct(pool);
 
 	return false;
 }
@@ -3625,7 +3811,7 @@ struct resource_pool *dcn20_create_resource_pool(
 	if (!pool)
 		return NULL;
 
-	if (construct(init_data->num_virtual_links, dc, pool))
+	if (dcn20_resource_construct(init_data->num_virtual_links, dc, pool))
 		return &pool->base;
 
 	BREAK_TO_DEBUGGER();
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
index 44f95aa0d61e..f5893840b79b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
@@ -50,7 +50,7 @@ unsigned int dcn20_calc_max_scaled_time(
 		enum mmhubbub_wbif_mode mode,
 		unsigned int urgent_watermark);
 int dcn20_populate_dml_pipes_from_context(
-		struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes);
+		struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes);
 struct pipe_ctx *dcn20_acquire_idle_pipe_for_layer(
 		struct dc_state *state,
 		const struct resource_pool *pool,
@@ -95,9 +95,12 @@ struct display_stream_compressor *dcn20_dsc_create(
 	struct dc_context *ctx, uint32_t inst);
 void dcn20_dsc_destroy(struct display_stream_compressor **dsc);
 
-struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx);
-void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu);
-
+void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb);
+void dcn20_cap_soc_clocks(
+		struct _vcs_dpi_soc_bounding_box_st *bb,
+		struct pp_smu_nv_clock_table max_clocks);
+void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb,
+		struct pp_smu_nv_clock_table *max_clocks, unsigned int *uclk_states, unsigned int num_states);
 struct hubp *dcn20_hubp_create(
 	struct dc_context *ctx,
 	uint32_t inst);
@@ -116,6 +119,29 @@ void dcn20_set_mcif_arb_params(
 		display_e2e_pipe_params_st *pipes,
 		int pipe_cnt);
 bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate);
+void dcn20_merge_pipes_for_validate(
+		struct dc *dc,
+		struct dc_state *context);
+int dcn20_validate_apply_pipe_split_flags(
+		struct dc *dc,
+		struct dc_state *context,
+		int vlevel,
+		bool *split);
+bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx);
+void dcn20_split_stream_for_mpc(
+		struct resource_context *res_ctx,
+		const struct resource_pool *pool,
+		struct pipe_ctx *primary_pipe,
+		struct pipe_ctx *secondary_pipe);
+bool dcn20_split_stream_for_odm(
+		struct resource_context *res_ctx,
+		const struct resource_pool *pool,
+		struct pipe_ctx *prev_odm_pipe,
+		struct pipe_ctx *next_odm_pipe);
+struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc,
+		struct resource_context *res_ctx,
+		const struct resource_pool *pool,
+		const struct pipe_ctx *primary_pipe);
 bool dcn20_fast_validate_bw(
 		struct dc *dc,
 		struct dc_state *context,
@@ -131,6 +157,7 @@ void dcn20_calculate_dlg_params(
 
 enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream);
 enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream);
+enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, struct dc_state *dc_ctx, struct dc_stream_state *dc_stream);
 enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream);
 enum dc_status dcn20_get_default_swizzle_mode(struct dc_plane_state *plane_state);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
index 5ab9d6240498..9b70a1e7b962 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
@@ -205,7 +205,6 @@ static void enc2_stream_encoder_stop_hdmi_info_packets(
 		HDMI_GENERIC7_LINE, 0);
 }
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 
 /* Update GSP7 SDP 128 byte long */
 static void enc2_update_gsp7_128_info_packet(
@@ -360,7 +359,6 @@ static void enc2_read_state(struct stream_encoder *enc, struct enc_state *s)
 		REG_GET(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, &s->sec_stream_enable);
 	}
 }
-#endif
 
 /* Set Dynamic Metadata-configuration.
  *   enable_dme:         TRUE: enables Dynamic Metadata Enfine, FALSE: disables DME
@@ -440,10 +438,8 @@ static bool is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
 {
 	bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
 			&& !timing->dsc_cfg.ycbcr422_simple);
-#endif
 	return two_pix;
 }
 
@@ -492,15 +488,23 @@ void enc2_stream_encoder_dp_unblank(
 				DP_VID_N_MUL, n_multiply);
 	}
 
-	/* set DIG_START to 0x1 to reset FIFO */
+	/* make sure stream is disabled before resetting steer fifo */
+	REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, false);
+	REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, 5000);
 
+	/* set DIG_START to 0x1 to reset FIFO */
 	REG_UPDATE(DIG_FE_CNTL, DIG_START, 1);
+	udelay(1);
 
 	/* write 0 to take the FIFO out of reset */
 
 	REG_UPDATE(DIG_FE_CNTL, DIG_START, 0);
 
-	/* switch DP encoder to CRTC data */
+	/* switch DP encoder to CRTC data, but reset it the fifo first. It may happen
+	 * that it overflows during mode transition, and sometimes doesn't recover.
+	 */
+	REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1);
+	udelay(10);
 
 	REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0);
 
@@ -533,11 +537,16 @@ void enc2_stream_encoder_dp_set_stream_attribute(
 	struct stream_encoder *enc,
 	struct dc_crtc_timing *crtc_timing,
 	enum dc_color_space output_color_space,
+	bool use_vsc_sdp_for_colorimetry,
 	uint32_t enable_sdp_splitting)
 {
 	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
 
-	enc1_stream_encoder_dp_set_stream_attribute(enc, crtc_timing, output_color_space, enable_sdp_splitting);
+	enc1_stream_encoder_dp_set_stream_attribute(enc,
+			crtc_timing,
+			output_color_space,
+			use_vsc_sdp_for_colorimetry,
+			enable_sdp_splitting);
 
 	REG_UPDATE(DP_SEC_FRAMING4,
 		DP_SST_SDP_SPLITTING, enable_sdp_splitting);
@@ -560,6 +569,8 @@ static const struct stream_encoder_funcs dcn20_str_enc_funcs = {
 		enc2_stream_encoder_stop_hdmi_info_packets,
 	.update_dp_info_packets =
 		enc2_stream_encoder_update_dp_info_packets,
+	.send_immediate_sdp_message =
+		enc1_stream_encoder_send_immediate_sdp_message,
 	.stop_dp_info_packets =
 		enc1_stream_encoder_stop_dp_info_packets,
 	.dp_blank =
@@ -578,11 +589,13 @@ static const struct stream_encoder_funcs dcn20_str_enc_funcs = {
 	.set_avmute = enc1_stream_encoder_set_avmute,
 	.dig_connect_to_otg  = enc1_dig_connect_to_otg,
 	.dig_source_otg = enc1_dig_source_otg,
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+
+	.dp_get_pixel_format =
+		enc1_stream_encoder_dp_get_pixel_format,
+
 	.enc_read_state = enc2_read_state,
 	.dp_set_dsc_config = enc2_dp_set_dsc_config,
 	.dp_set_dsc_pps_info_packet = enc2_dp_set_dsc_pps_info_packet,
-#endif
 	.set_dynamic_metadata = enc2_set_dynamic_metadata,
 	.hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute,
 };
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h
index 3f94a9f13c4a..d2a805bd4573 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h
@@ -98,6 +98,7 @@ void enc2_stream_encoder_dp_set_stream_attribute(
 	struct stream_encoder *enc,
 	struct dc_crtc_timing *crtc_timing,
 	enum dc_color_space output_color_space,
+	bool use_vsc_sdp_for_colorimetry,
 	uint32_t enable_sdp_splitting);
 
 void enc2_stream_encoder_dp_unblank(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index ff50ae71fe27..07684d3e375a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -1,9 +1,17 @@
+# SPDX-License-Identifier: MIT
 #
 # Makefile for DCN21.
 
-DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o
+DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o \
+	 dcn21_hwseq.o dcn21_link_encoder.o
 
+ifdef CONFIG_X86
 CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse
+endif
+
+ifdef CONFIG_PPC64
+CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -maltivec
+endif
 
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
@@ -11,6 +19,7 @@ IS_OLD_GCC = 1
 endif
 endif
 
+ifdef CONFIG_X86
 ifdef IS_OLD_GCC
 # Stack alignment mismatch, proceed with caution.
 # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
@@ -19,6 +28,7 @@ CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4
 else
 CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2
 endif
+endif
 
 AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
index d1266741763b..f546260c15b7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
@@ -22,6 +22,7 @@
  * Authors: AMD
  *
  */
+#include <linux/delay.h>
 #include "dm_services.h"
 #include "dcn20/dcn20_hubbub.h"
 #include "dcn21_hubbub.h"
@@ -51,7 +52,7 @@
 #ifdef NUM_VMID
 #undef NUM_VMID
 #endif
-#define NUM_VMID 1
+#define NUM_VMID 16
 
 static uint32_t convert_and_clamp(
 	uint32_t wm_ns,
@@ -71,56 +72,76 @@ static uint32_t convert_and_clamp(
 void dcn21_dchvm_init(struct hubbub *hubbub)
 {
 	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
+	uint32_t riommu_active;
+	int i;
 
 	//Init DCHVM block
 	REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1);
 
 	//Poll until RIOMMU_ACTIVE = 1
-	//TODO: Figure out interval us and retry count
-	REG_WAIT(DCHVM_RIOMMU_STAT0, RIOMMU_ACTIVE, 1, 5, 100);
+	for (i = 0; i < 100; i++) {
+		REG_GET(DCHVM_RIOMMU_STAT0, RIOMMU_ACTIVE, &riommu_active);
 
-	//Reflect the power status of DCHUBBUB
-	REG_UPDATE(DCHVM_RIOMMU_CTRL0, HOSTVM_POWERSTATUS, 1);
+		if (riommu_active)
+			break;
+		else
+			udelay(5);
+	}
+
+	if (riommu_active) {
+		//Reflect the power status of DCHUBBUB
+		REG_UPDATE(DCHVM_RIOMMU_CTRL0, HOSTVM_POWERSTATUS, 1);
 
-	//Start rIOMMU prefetching
-	REG_UPDATE(DCHVM_RIOMMU_CTRL0, HOSTVM_PREFETCH_REQ, 1);
+		//Start rIOMMU prefetching
+		REG_UPDATE(DCHVM_RIOMMU_CTRL0, HOSTVM_PREFETCH_REQ, 1);
 
-	// Enable dynamic clock gating
-	REG_UPDATE_4(DCHVM_CLK_CTRL,
-					HVM_DISPCLK_R_GATE_DIS, 0,
-					HVM_DISPCLK_G_GATE_DIS, 0,
-					HVM_DCFCLK_R_GATE_DIS, 0,
-					HVM_DCFCLK_G_GATE_DIS, 0);
+		// Enable dynamic clock gating
+		REG_UPDATE_4(DCHVM_CLK_CTRL,
+						HVM_DISPCLK_R_GATE_DIS, 0,
+						HVM_DISPCLK_G_GATE_DIS, 0,
+						HVM_DCFCLK_R_GATE_DIS, 0,
+						HVM_DCFCLK_G_GATE_DIS, 0);
 
-	//Poll until HOSTVM_PREFETCH_DONE = 1
-	//TODO: Figure out interval us and retry count
-	REG_WAIT(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, 1, 5, 100);
+		//Poll until HOSTVM_PREFETCH_DONE = 1
+		REG_WAIT(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, 1, 5, 100);
+	}
 }
 
-static int hubbub21_init_dchub(struct hubbub *hubbub,
+int hubbub21_init_dchub(struct hubbub *hubbub,
 		struct dcn_hubbub_phys_addr_config *pa_config)
 {
 	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
+	struct dcn_vmid_page_table_config phys_config;
 
 	REG_SET(DCN_VM_FB_LOCATION_BASE, 0,
-		FB_BASE, pa_config->system_aperture.fb_base);
+			FB_BASE, pa_config->system_aperture.fb_base >> 24);
 	REG_SET(DCN_VM_FB_LOCATION_TOP, 0,
-			FB_TOP, pa_config->system_aperture.fb_top);
+			FB_TOP, pa_config->system_aperture.fb_top >> 24);
 	REG_SET(DCN_VM_FB_OFFSET, 0,
-			FB_OFFSET, pa_config->system_aperture.fb_offset);
+			FB_OFFSET, pa_config->system_aperture.fb_offset >> 24);
 	REG_SET(DCN_VM_AGP_BOT, 0,
-			AGP_BOT, pa_config->system_aperture.agp_bot);
+			AGP_BOT, pa_config->system_aperture.agp_bot >> 24);
 	REG_SET(DCN_VM_AGP_TOP, 0,
-			AGP_TOP, pa_config->system_aperture.agp_top);
+			AGP_TOP, pa_config->system_aperture.agp_top >> 24);
 	REG_SET(DCN_VM_AGP_BASE, 0,
-			AGP_BASE, pa_config->system_aperture.agp_base);
+			AGP_BASE, pa_config->system_aperture.agp_base >> 24);
+
+	if (pa_config->gart_config.page_table_start_addr != pa_config->gart_config.page_table_end_addr) {
+		phys_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr >> 12;
+		phys_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr >> 12;
+		phys_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr | 1; //Note: hack
+		phys_config.depth = 0;
+		phys_config.block_size = 0;
+		// Init VMID 0 based on PA config
+		dcn20_vmid_setup(&hubbub1->vmid[0], &phys_config);
+	}
 
 	dcn21_dchvm_init(hubbub);
 
 	return NUM_VMID;
 }
 
-static void hubbub21_program_urgent_watermarks(
+void hubbub21_program_urgent_watermarks(
 		struct hubbub *hubbub,
 		struct dcn_watermark_set *watermarks,
 		unsigned int refclk_mhz,
@@ -160,6 +181,13 @@ static void hubbub21_program_urgent_watermarks(
 		REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, 0,
 				DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->a.frac_urg_bw_nom);
 	}
+	if (safe_to_lower || watermarks->a.urgent_latency_ns > hubbub1->watermarks.a.urgent_latency_ns) {
+		hubbub1->watermarks.a.urgent_latency_ns = watermarks->a.urgent_latency_ns;
+		prog_wm_value = convert_and_clamp(watermarks->a.urgent_latency_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, 0,
+				DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, prog_wm_value);
+	}
 
 	/* clock state B */
 	if (safe_to_lower || watermarks->b.urgent_ns > hubbub1->watermarks.b.urgent_ns) {
@@ -192,6 +220,14 @@ static void hubbub21_program_urgent_watermarks(
 				DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->a.frac_urg_bw_nom);
 	}
 
+	if (safe_to_lower || watermarks->b.urgent_latency_ns > hubbub1->watermarks.b.urgent_latency_ns) {
+		hubbub1->watermarks.b.urgent_latency_ns = watermarks->b.urgent_latency_ns;
+		prog_wm_value = convert_and_clamp(watermarks->b.urgent_latency_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, 0,
+				DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, prog_wm_value);
+	}
+
 	/* clock state C */
 	if (safe_to_lower || watermarks->c.urgent_ns > hubbub1->watermarks.c.urgent_ns) {
 		hubbub1->watermarks.c.urgent_ns = watermarks->c.urgent_ns;
@@ -223,6 +259,14 @@ static void hubbub21_program_urgent_watermarks(
 				DCHUBBUB_ARB_FRAC_URG_BW_NOM_C, watermarks->a.frac_urg_bw_nom);
 	}
 
+	if (safe_to_lower || watermarks->c.urgent_latency_ns > hubbub1->watermarks.c.urgent_latency_ns) {
+		hubbub1->watermarks.c.urgent_latency_ns = watermarks->c.urgent_latency_ns;
+		prog_wm_value = convert_and_clamp(watermarks->c.urgent_latency_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, 0,
+				DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, prog_wm_value);
+	}
+
 	/* clock state D */
 	if (safe_to_lower || watermarks->d.urgent_ns > hubbub1->watermarks.d.urgent_ns) {
 		hubbub1->watermarks.d.urgent_ns = watermarks->d.urgent_ns;
@@ -253,9 +297,17 @@ static void hubbub21_program_urgent_watermarks(
 		REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_D, 0,
 				DCHUBBUB_ARB_FRAC_URG_BW_NOM_D, watermarks->a.frac_urg_bw_nom);
 	}
+
+	if (safe_to_lower || watermarks->d.urgent_latency_ns > hubbub1->watermarks.d.urgent_latency_ns) {
+		hubbub1->watermarks.d.urgent_latency_ns = watermarks->d.urgent_latency_ns;
+		prog_wm_value = convert_and_clamp(watermarks->d.urgent_latency_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, 0,
+				DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, prog_wm_value);
+	}
 }
 
-static void hubbub21_program_stutter_watermarks(
+void hubbub21_program_stutter_watermarks(
 		struct hubbub *hubbub,
 		struct dcn_watermark_set *watermarks,
 		unsigned int refclk_mhz,
@@ -389,7 +441,7 @@ static void hubbub21_program_stutter_watermarks(
 	}
 }
 
-static void hubbub21_program_pstate_watermarks(
+void hubbub21_program_pstate_watermarks(
 		struct hubbub *hubbub,
 		struct dcn_watermark_set *watermarks,
 		unsigned int refclk_mhz,
@@ -564,17 +616,26 @@ void hubbub21_wm_read_state(struct hubbub *hubbub,
 			DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_chanage);
 }
 
+void hubbub21_apply_DEDCN21_147_wa(struct hubbub *hubbub)
+{
+	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
+	uint32_t prog_wm_value;
+
+	prog_wm_value = REG_READ(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A);
+	REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value);
+}
 
 static const struct hubbub_funcs hubbub21_funcs = {
 	.update_dchub = hubbub2_update_dchub,
 	.init_dchub_sys_ctx = hubbub21_init_dchub,
-	.init_vm_ctx = NULL,
+	.init_vm_ctx = hubbub2_init_vm_ctx,
 	.dcc_support_swizzle = hubbub2_dcc_support_swizzle,
 	.dcc_support_pixel_format = hubbub2_dcc_support_pixel_format,
 	.get_dcc_compression_cap = hubbub2_get_dcc_compression_cap,
 	.wm_read_state = hubbub21_wm_read_state,
 	.get_dchub_ref_freq = hubbub2_get_dchub_ref_freq,
 	.program_watermarks = hubbub21_program_watermarks,
+	.apply_DEDCN21_147_wa = hubbub21_apply_DEDCN21_147_wa,
 };
 
 void hubbub21_construct(struct dcn20_hubbub *hubbub,
@@ -592,4 +653,5 @@ void hubbub21_construct(struct dcn20_hubbub *hubbub,
 	hubbub->masks = hubbub_mask;
 
 	hubbub->debug_test_index_pstate = 0xB;
+	hubbub->detile_buf_size = 164 * 1024; /* 164KB for DCN2.0 */
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h
index 6ff3cdb89178..c4840dfb1fa5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h
@@ -36,6 +36,10 @@
 	SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B),\
 	SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_C),\
 	SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_D),\
+	SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A),\
+	SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B),\
+	SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C),\
+	SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D),\
 	SR(DCHUBBUB_ARB_HOSTVM_CNTL), \
 	SR(DCHVM_CTRL0), \
 	SR(DCHVM_MEM_CTRL), \
@@ -44,16 +48,9 @@
 	SR(DCHVM_RIOMMU_STAT0)
 
 #define HUBBUB_REG_LIST_DCN21()\
-	HUBBUB_REG_LIST_DCN_COMMON(), \
+	HUBBUB_REG_LIST_DCN20_COMMON(), \
 	HUBBUB_SR_WATERMARK_REG_LIST(), \
-	HUBBUB_HVM_REG_LIST(), \
-	SR(DCHUBBUB_CRC_CTRL), \
-	SR(DCN_VM_FB_LOCATION_BASE),\
-	SR(DCN_VM_FB_LOCATION_TOP),\
-	SR(DCN_VM_FB_OFFSET),\
-	SR(DCN_VM_AGP_BOT),\
-	SR(DCN_VM_AGP_TOP),\
-	SR(DCN_VM_AGP_BASE)
+	HUBBUB_HVM_REG_LIST()
 
 #define HUBBUB_MASK_SH_LIST_HVM(mask_sh) \
 	HUBBUB_SF(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND_COMMIT_THRESHOLD, mask_sh), \
@@ -102,7 +99,7 @@
 	HUBBUB_SF(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, mask_sh)
 
 #define HUBBUB_MASK_SH_LIST_DCN21(mask_sh)\
-	HUBBUB_MASK_SH_LIST_HVM(mask_sh),\
+	HUBBUB_MASK_SH_LIST_HVM(mask_sh), \
 	HUBBUB_MASK_SH_LIST_DCN_COMMON(mask_sh), \
 	HUBBUB_MASK_SH_LIST_STUTTER(mask_sh), \
 	HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
@@ -114,11 +111,28 @@
 	HUBBUB_SF(DCN_VM_AGP_BASE, AGP_BASE, mask_sh)
 
 void dcn21_dchvm_init(struct hubbub *hubbub);
+int hubbub21_init_dchub(struct hubbub *hubbub,
+		struct dcn_hubbub_phys_addr_config *pa_config);
 void hubbub21_program_watermarks(
 		struct hubbub *hubbub,
 		struct dcn_watermark_set *watermarks,
 		unsigned int refclk_mhz,
 		bool safe_to_lower);
+void hubbub21_program_urgent_watermarks(
+		struct hubbub *hubbub,
+		struct dcn_watermark_set *watermarks,
+		unsigned int refclk_mhz,
+		bool safe_to_lower);
+void hubbub21_program_stutter_watermarks(
+		struct hubbub *hubbub,
+		struct dcn_watermark_set *watermarks,
+		unsigned int refclk_mhz,
+		bool safe_to_lower);
+void hubbub21_program_pstate_watermarks(
+		struct hubbub *hubbub,
+		struct dcn_watermark_set *watermarks,
+		unsigned int refclk_mhz,
+		bool safe_to_lower);
 
 void hubbub21_wm_read_state(struct hubbub *hubbub,
 		struct dcn_hubbub_wm *wm);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
index a00af513aa2b..da63fc53cc4a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
@@ -22,11 +22,17 @@
  * Authors: AMD
  *
  */
+
+#include "dcn10/dcn10_hubp.h"
 #include "dcn21_hubp.h"
 
 #include "dm_services.h"
 #include "reg_helper.h"
 
+#include "dc_dmub_srv.h"
+
+#define DC_LOGGER_INIT(logger)
+
 #define REG(reg)\
 	hubp21->hubp_regs->reg
 
@@ -162,6 +168,158 @@ static void hubp21_setup(
 
 }
 
+void hubp21_set_viewport(
+	struct hubp *hubp,
+	const struct rect *viewport,
+	const struct rect *viewport_c)
+{
+	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
+
+	REG_SET_2(DCSURF_PRI_VIEWPORT_DIMENSION, 0,
+		  PRI_VIEWPORT_WIDTH, viewport->width,
+		  PRI_VIEWPORT_HEIGHT, viewport->height);
+
+	REG_SET_2(DCSURF_PRI_VIEWPORT_START, 0,
+		  PRI_VIEWPORT_X_START, viewport->x,
+		  PRI_VIEWPORT_Y_START, viewport->y);
+
+	/*for stereo*/
+	REG_SET_2(DCSURF_SEC_VIEWPORT_DIMENSION, 0,
+		  SEC_VIEWPORT_WIDTH, viewport->width,
+		  SEC_VIEWPORT_HEIGHT, viewport->height);
+
+	REG_SET_2(DCSURF_SEC_VIEWPORT_START, 0,
+		  SEC_VIEWPORT_X_START, viewport->x,
+		  SEC_VIEWPORT_Y_START, viewport->y);
+
+	/* DC supports NV12 only at the moment */
+	REG_SET_2(DCSURF_PRI_VIEWPORT_DIMENSION_C, 0,
+		  PRI_VIEWPORT_WIDTH_C, viewport_c->width,
+		  PRI_VIEWPORT_HEIGHT_C, viewport_c->height);
+
+	REG_SET_2(DCSURF_PRI_VIEWPORT_START_C, 0,
+		  PRI_VIEWPORT_X_START_C, viewport_c->x,
+		  PRI_VIEWPORT_Y_START_C, viewport_c->y);
+
+	REG_SET_2(DCSURF_SEC_VIEWPORT_DIMENSION_C, 0,
+		  SEC_VIEWPORT_WIDTH_C, viewport_c->width,
+		  SEC_VIEWPORT_HEIGHT_C, viewport_c->height);
+
+	REG_SET_2(DCSURF_SEC_VIEWPORT_START_C, 0,
+		  SEC_VIEWPORT_X_START_C, viewport_c->x,
+		  SEC_VIEWPORT_Y_START_C, viewport_c->y);
+}
+
+static void hubp21_apply_PLAT_54186_wa(
+		struct hubp *hubp,
+		const struct dc_plane_address *address)
+{
+	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
+	struct dc_debug_options *debug = &hubp->ctx->dc->debug;
+	unsigned int chroma_bpe = 2;
+	unsigned int luma_addr_high_part = 0;
+	unsigned int row_height = 0;
+	unsigned int chroma_pitch = 0;
+	unsigned int viewport_c_height = 0;
+	unsigned int viewport_c_width = 0;
+	unsigned int patched_viewport_height = 0;
+	unsigned int patched_viewport_width = 0;
+	unsigned int rotation_angle = 0;
+	unsigned int pix_format = 0;
+	unsigned int h_mirror_en = 0;
+	unsigned int tile_blk_size = 64 * 1024; /* 64KB for 64KB SW, 4KB for 4KB SW */
+
+
+	if (!debug->nv12_iflip_vm_wa)
+		return;
+
+	REG_GET(DCHUBP_REQ_SIZE_CONFIG_C,
+		PTE_ROW_HEIGHT_LINEAR_C, &row_height);
+
+	REG_GET_2(DCSURF_PRI_VIEWPORT_DIMENSION_C,
+			PRI_VIEWPORT_WIDTH_C, &viewport_c_width,
+			PRI_VIEWPORT_HEIGHT_C, &viewport_c_height);
+
+	REG_GET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C,
+			PRIMARY_SURFACE_ADDRESS_HIGH_C, &luma_addr_high_part);
+
+	REG_GET(DCSURF_SURFACE_PITCH_C,
+			PITCH_C, &chroma_pitch);
+
+	chroma_pitch += 1;
+
+	REG_GET_3(DCSURF_SURFACE_CONFIG,
+			SURFACE_PIXEL_FORMAT, &pix_format,
+			ROTATION_ANGLE, &rotation_angle,
+			H_MIRROR_EN, &h_mirror_en);
+
+	/* reset persistent cached data */
+	hubp21->PLAT_54186_wa_chroma_addr_offset = 0;
+	/* apply wa only for NV12 surface with scatter gather enabled with viewport > 512 along
+	 * the vertical direction*/
+	if (address->type != PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
+			address->video_progressive.luma_addr.high_part == 0xf4)
+		return;
+
+	if ((rotation_angle == 0 || rotation_angle == 180)
+			&& viewport_c_height <= 512)
+		return;
+
+	if ((rotation_angle == 90 || rotation_angle == 270)
+				&& viewport_c_width <= 512)
+		return;
+
+	switch (rotation_angle) {
+	case 0: /* 0 degree rotation */
+		row_height = 128;
+		patched_viewport_height = (viewport_c_height / row_height + 1) * row_height + 1;
+		patched_viewport_width = viewport_c_width;
+		hubp21->PLAT_54186_wa_chroma_addr_offset = 0;
+		break;
+	case 2: /* 180 degree rotation */
+		row_height = 128;
+		patched_viewport_height = viewport_c_height + row_height;
+		patched_viewport_width = viewport_c_width;
+		hubp21->PLAT_54186_wa_chroma_addr_offset = 0 - chroma_pitch * row_height * chroma_bpe;
+		break;
+	case 1: /* 90 degree rotation */
+		row_height = 256;
+		if (h_mirror_en) {
+			patched_viewport_height = viewport_c_height;
+			patched_viewport_width = viewport_c_width + row_height;
+			hubp21->PLAT_54186_wa_chroma_addr_offset = 0;
+		} else {
+			patched_viewport_height = viewport_c_height;
+			patched_viewport_width = viewport_c_width + row_height;
+			hubp21->PLAT_54186_wa_chroma_addr_offset = 0 - tile_blk_size;
+		}
+		break;
+	case 3:	/* 270 degree rotation */
+		row_height = 256;
+		if (h_mirror_en) {
+			patched_viewport_height = viewport_c_height;
+			patched_viewport_width = viewport_c_width + row_height;
+			hubp21->PLAT_54186_wa_chroma_addr_offset = 0 - tile_blk_size;
+		} else {
+			patched_viewport_height = viewport_c_height;
+			patched_viewport_width = viewport_c_width + row_height;
+			hubp21->PLAT_54186_wa_chroma_addr_offset = 0;
+		}
+		break;
+	default:
+		ASSERT(0);
+		break;
+	}
+
+	/* catch cases where viewport keep growing */
+	ASSERT(patched_viewport_height && patched_viewport_height < 5000);
+	ASSERT(patched_viewport_width && patched_viewport_width < 5000);
+
+	REG_UPDATE_2(DCSURF_PRI_VIEWPORT_DIMENSION_C,
+			PRI_VIEWPORT_WIDTH_C, patched_viewport_width,
+			PRI_VIEWPORT_HEIGHT_C, patched_viewport_height);
+}
+
 void hubp21_set_vm_system_aperture_settings(struct hubp *hubp,
 		struct vm_system_aperture_param *apt)
 {
@@ -189,6 +347,562 @@ void hubp21_set_vm_system_aperture_settings(struct hubp *hubp,
 			SYSTEM_ACCESS_MODE, 0x3);
 }
 
+void hubp21_validate_dml_output(struct hubp *hubp,
+		struct dc_context *ctx,
+		struct _vcs_dpi_display_rq_regs_st *dml_rq_regs,
+		struct _vcs_dpi_display_dlg_regs_st *dml_dlg_attr,
+		struct _vcs_dpi_display_ttu_regs_st *dml_ttu_attr)
+{
+	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
+	struct _vcs_dpi_display_rq_regs_st rq_regs = {0};
+	struct _vcs_dpi_display_dlg_regs_st dlg_attr = {0};
+	struct _vcs_dpi_display_ttu_regs_st ttu_attr = {0};
+	DC_LOGGER_INIT(ctx->logger);
+	DC_LOG_DEBUG("DML Validation | Running Validation");
+
+	/* Requester - Per hubp */
+	REG_GET(HUBPRET_CONTROL,
+		DET_BUF_PLANE1_BASE_ADDRESS, &rq_regs.plane1_base_address);
+	REG_GET_4(DCN_EXPANSION_MODE,
+		DRQ_EXPANSION_MODE, &rq_regs.drq_expansion_mode,
+		PRQ_EXPANSION_MODE, &rq_regs.prq_expansion_mode,
+		MRQ_EXPANSION_MODE, &rq_regs.mrq_expansion_mode,
+		CRQ_EXPANSION_MODE, &rq_regs.crq_expansion_mode);
+	REG_GET_8(DCHUBP_REQ_SIZE_CONFIG,
+		CHUNK_SIZE, &rq_regs.rq_regs_l.chunk_size,
+		MIN_CHUNK_SIZE, &rq_regs.rq_regs_l.min_chunk_size,
+		META_CHUNK_SIZE, &rq_regs.rq_regs_l.meta_chunk_size,
+		MIN_META_CHUNK_SIZE, &rq_regs.rq_regs_l.min_meta_chunk_size,
+		DPTE_GROUP_SIZE, &rq_regs.rq_regs_l.dpte_group_size,
+		VM_GROUP_SIZE, &rq_regs.rq_regs_l.mpte_group_size,
+		SWATH_HEIGHT, &rq_regs.rq_regs_l.swath_height,
+		PTE_ROW_HEIGHT_LINEAR, &rq_regs.rq_regs_l.pte_row_height_linear);
+	REG_GET_7(DCHUBP_REQ_SIZE_CONFIG_C,
+		CHUNK_SIZE_C, &rq_regs.rq_regs_c.chunk_size,
+		MIN_CHUNK_SIZE_C, &rq_regs.rq_regs_c.min_chunk_size,
+		META_CHUNK_SIZE_C, &rq_regs.rq_regs_c.meta_chunk_size,
+		MIN_META_CHUNK_SIZE_C, &rq_regs.rq_regs_c.min_meta_chunk_size,
+		DPTE_GROUP_SIZE_C, &rq_regs.rq_regs_c.dpte_group_size,
+		SWATH_HEIGHT_C, &rq_regs.rq_regs_c.swath_height,
+		PTE_ROW_HEIGHT_LINEAR_C, &rq_regs.rq_regs_c.pte_row_height_linear);
+
+	if (rq_regs.plane1_base_address != dml_rq_regs->plane1_base_address)
+		DC_LOG_DEBUG("DML Validation | HUBPRET_CONTROL:DET_BUF_PLANE1_BASE_ADDRESS - Expected: %u  Actual: %u\n",
+				dml_rq_regs->plane1_base_address, rq_regs.plane1_base_address);
+	if (rq_regs.drq_expansion_mode != dml_rq_regs->drq_expansion_mode)
+		DC_LOG_DEBUG("DML Validation | DCN_EXPANSION_MODE:DRQ_EXPANSION_MODE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->drq_expansion_mode, rq_regs.drq_expansion_mode);
+	if (rq_regs.prq_expansion_mode != dml_rq_regs->prq_expansion_mode)
+		DC_LOG_DEBUG("DML Validation | DCN_EXPANSION_MODE:MRQ_EXPANSION_MODE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->prq_expansion_mode, rq_regs.prq_expansion_mode);
+	if (rq_regs.mrq_expansion_mode != dml_rq_regs->mrq_expansion_mode)
+		DC_LOG_DEBUG("DML Validation | DCN_EXPANSION_MODE:DET_BUF_PLANE1_BASE_ADDRESS - Expected: %u  Actual: %u\n",
+				dml_rq_regs->mrq_expansion_mode, rq_regs.mrq_expansion_mode);
+	if (rq_regs.crq_expansion_mode != dml_rq_regs->crq_expansion_mode)
+		DC_LOG_DEBUG("DML Validation | DCN_EXPANSION_MODE:CRQ_EXPANSION_MODE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->crq_expansion_mode, rq_regs.crq_expansion_mode);
+
+	if (rq_regs.rq_regs_l.chunk_size != dml_rq_regs->rq_regs_l.chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:CHUNK_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.chunk_size, rq_regs.rq_regs_l.chunk_size);
+	if (rq_regs.rq_regs_l.min_chunk_size != dml_rq_regs->rq_regs_l.min_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:MIN_CHUNK_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.min_chunk_size, rq_regs.rq_regs_l.min_chunk_size);
+	if (rq_regs.rq_regs_l.meta_chunk_size != dml_rq_regs->rq_regs_l.meta_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:META_CHUNK_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.meta_chunk_size, rq_regs.rq_regs_l.meta_chunk_size);
+	if (rq_regs.rq_regs_l.min_meta_chunk_size != dml_rq_regs->rq_regs_l.min_meta_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:MIN_META_CHUNK_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.min_meta_chunk_size, rq_regs.rq_regs_l.min_meta_chunk_size);
+	if (rq_regs.rq_regs_l.dpte_group_size != dml_rq_regs->rq_regs_l.dpte_group_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:DPTE_GROUP_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.dpte_group_size, rq_regs.rq_regs_l.dpte_group_size);
+	if (rq_regs.rq_regs_l.mpte_group_size != dml_rq_regs->rq_regs_l.mpte_group_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:VM_GROUP_SIZE - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.mpte_group_size, rq_regs.rq_regs_l.mpte_group_size);
+	if (rq_regs.rq_regs_l.swath_height != dml_rq_regs->rq_regs_l.swath_height)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:SWATH_HEIGHT - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.swath_height, rq_regs.rq_regs_l.swath_height);
+	if (rq_regs.rq_regs_l.pte_row_height_linear != dml_rq_regs->rq_regs_l.pte_row_height_linear)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG_C:PTE_ROW_HEIGHT_LINEAR - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_l.pte_row_height_linear, rq_regs.rq_regs_l.pte_row_height_linear);
+
+	if (rq_regs.rq_regs_c.chunk_size != dml_rq_regs->rq_regs_c.chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:CHUNK_SIZE_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.chunk_size, rq_regs.rq_regs_c.chunk_size);
+	if (rq_regs.rq_regs_c.min_chunk_size != dml_rq_regs->rq_regs_c.min_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:MIN_CHUNK_SIZE_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.min_chunk_size, rq_regs.rq_regs_c.min_chunk_size);
+	if (rq_regs.rq_regs_c.meta_chunk_size != dml_rq_regs->rq_regs_c.meta_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:META_CHUNK_SIZE_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.meta_chunk_size, rq_regs.rq_regs_c.meta_chunk_size);
+	if (rq_regs.rq_regs_c.min_meta_chunk_size != dml_rq_regs->rq_regs_c.min_meta_chunk_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:MIN_META_CHUNK_SIZE_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.min_meta_chunk_size, rq_regs.rq_regs_c.min_meta_chunk_size);
+	if (rq_regs.rq_regs_c.dpte_group_size != dml_rq_regs->rq_regs_c.dpte_group_size)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:DPTE_GROUP_SIZE_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.dpte_group_size, rq_regs.rq_regs_c.dpte_group_size);
+	if (rq_regs.rq_regs_c.swath_height != dml_rq_regs->rq_regs_c.swath_height)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:SWATH_HEIGHT_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.swath_height, rq_regs.rq_regs_c.swath_height);
+	if (rq_regs.rq_regs_c.pte_row_height_linear != dml_rq_regs->rq_regs_c.pte_row_height_linear)
+		DC_LOG_DEBUG("DML Validation | DCHUBP_REQ_SIZE_CONFIG:PTE_ROW_HEIGHT_LINEAR_C - Expected: %u  Actual: %u\n",
+				dml_rq_regs->rq_regs_c.pte_row_height_linear, rq_regs.rq_regs_c.pte_row_height_linear);
+
+
+	/* DLG - Per hubp */
+	REG_GET_2(BLANK_OFFSET_0,
+		REFCYC_H_BLANK_END, &dlg_attr.refcyc_h_blank_end,
+		DLG_V_BLANK_END, &dlg_attr.dlg_vblank_end);
+	REG_GET(BLANK_OFFSET_1,
+		MIN_DST_Y_NEXT_START, &dlg_attr.min_dst_y_next_start);
+	REG_GET(DST_DIMENSIONS,
+		REFCYC_PER_HTOTAL, &dlg_attr.refcyc_per_htotal);
+	REG_GET_2(DST_AFTER_SCALER,
+		REFCYC_X_AFTER_SCALER, &dlg_attr.refcyc_x_after_scaler,
+		DST_Y_AFTER_SCALER, &dlg_attr.dst_y_after_scaler);
+	REG_GET(REF_FREQ_TO_PIX_FREQ,
+		REF_FREQ_TO_PIX_FREQ, &dlg_attr.ref_freq_to_pix_freq);
+
+	if (dlg_attr.refcyc_h_blank_end != dml_dlg_attr->refcyc_h_blank_end)
+		DC_LOG_DEBUG("DML Validation | BLANK_OFFSET_0:REFCYC_H_BLANK_END - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_h_blank_end, dlg_attr.refcyc_h_blank_end);
+	if (dlg_attr.dlg_vblank_end != dml_dlg_attr->dlg_vblank_end)
+		DC_LOG_DEBUG("DML Validation | BLANK_OFFSET_0:DLG_V_BLANK_END - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dlg_vblank_end, dlg_attr.dlg_vblank_end);
+	if (dlg_attr.min_dst_y_next_start != dml_dlg_attr->min_dst_y_next_start)
+		DC_LOG_DEBUG("DML Validation | BLANK_OFFSET_1:MIN_DST_Y_NEXT_START - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->min_dst_y_next_start, dlg_attr.min_dst_y_next_start);
+	if (dlg_attr.refcyc_per_htotal != dml_dlg_attr->refcyc_per_htotal)
+		DC_LOG_DEBUG("DML Validation | DST_DIMENSIONS:REFCYC_PER_HTOTAL - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_htotal, dlg_attr.refcyc_per_htotal);
+	if (dlg_attr.refcyc_x_after_scaler != dml_dlg_attr->refcyc_x_after_scaler)
+		DC_LOG_DEBUG("DML Validation | DST_AFTER_SCALER:REFCYC_X_AFTER_SCALER - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_x_after_scaler, dlg_attr.refcyc_x_after_scaler);
+	if (dlg_attr.dst_y_after_scaler != dml_dlg_attr->dst_y_after_scaler)
+		DC_LOG_DEBUG("DML Validation | DST_AFTER_SCALER:DST_Y_AFTER_SCALER - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dst_y_after_scaler, dlg_attr.dst_y_after_scaler);
+	if (dlg_attr.ref_freq_to_pix_freq != dml_dlg_attr->ref_freq_to_pix_freq)
+		DC_LOG_DEBUG("DML Validation | REF_FREQ_TO_PIX_FREQ:REF_FREQ_TO_PIX_FREQ - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->ref_freq_to_pix_freq, dlg_attr.ref_freq_to_pix_freq);
+
+	/* DLG - Per luma/chroma */
+	REG_GET(VBLANK_PARAMETERS_1,
+		REFCYC_PER_PTE_GROUP_VBLANK_L, &dlg_attr.refcyc_per_pte_group_vblank_l);
+	if (REG(NOM_PARAMETERS_0))
+		REG_GET(NOM_PARAMETERS_0,
+			DST_Y_PER_PTE_ROW_NOM_L, &dlg_attr.dst_y_per_pte_row_nom_l);
+	if (REG(NOM_PARAMETERS_1))
+		REG_GET(NOM_PARAMETERS_1,
+			REFCYC_PER_PTE_GROUP_NOM_L, &dlg_attr.refcyc_per_pte_group_nom_l);
+	REG_GET(NOM_PARAMETERS_4,
+		DST_Y_PER_META_ROW_NOM_L, &dlg_attr.dst_y_per_meta_row_nom_l);
+	REG_GET(NOM_PARAMETERS_5,
+		REFCYC_PER_META_CHUNK_NOM_L, &dlg_attr.refcyc_per_meta_chunk_nom_l);
+	REG_GET_2(PER_LINE_DELIVERY,
+		REFCYC_PER_LINE_DELIVERY_L, &dlg_attr.refcyc_per_line_delivery_l,
+		REFCYC_PER_LINE_DELIVERY_C, &dlg_attr.refcyc_per_line_delivery_c);
+	REG_GET_2(PER_LINE_DELIVERY_PRE,
+		REFCYC_PER_LINE_DELIVERY_PRE_L, &dlg_attr.refcyc_per_line_delivery_pre_l,
+		REFCYC_PER_LINE_DELIVERY_PRE_C, &dlg_attr.refcyc_per_line_delivery_pre_c);
+	REG_GET(VBLANK_PARAMETERS_2,
+		REFCYC_PER_PTE_GROUP_VBLANK_C, &dlg_attr.refcyc_per_pte_group_vblank_c);
+	if (REG(NOM_PARAMETERS_2))
+		REG_GET(NOM_PARAMETERS_2,
+			DST_Y_PER_PTE_ROW_NOM_C, &dlg_attr.dst_y_per_pte_row_nom_c);
+	if (REG(NOM_PARAMETERS_3))
+		REG_GET(NOM_PARAMETERS_3,
+			REFCYC_PER_PTE_GROUP_NOM_C, &dlg_attr.refcyc_per_pte_group_nom_c);
+	REG_GET(NOM_PARAMETERS_6,
+		DST_Y_PER_META_ROW_NOM_C, &dlg_attr.dst_y_per_meta_row_nom_c);
+	REG_GET(NOM_PARAMETERS_7,
+		REFCYC_PER_META_CHUNK_NOM_C, &dlg_attr.refcyc_per_meta_chunk_nom_c);
+	REG_GET(VBLANK_PARAMETERS_3,
+			REFCYC_PER_META_CHUNK_VBLANK_L, &dlg_attr.refcyc_per_meta_chunk_vblank_l);
+	REG_GET(VBLANK_PARAMETERS_4,
+			REFCYC_PER_META_CHUNK_VBLANK_C, &dlg_attr.refcyc_per_meta_chunk_vblank_c);
+
+	if (dlg_attr.refcyc_per_pte_group_vblank_l != dml_dlg_attr->refcyc_per_pte_group_vblank_l)
+		DC_LOG_DEBUG("DML Validation | VBLANK_PARAMETERS_1:REFCYC_PER_PTE_GROUP_VBLANK_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_pte_group_vblank_l, dlg_attr.refcyc_per_pte_group_vblank_l);
+	if (dlg_attr.dst_y_per_pte_row_nom_l != dml_dlg_attr->dst_y_per_pte_row_nom_l)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_0:DST_Y_PER_PTE_ROW_NOM_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dst_y_per_pte_row_nom_l, dlg_attr.dst_y_per_pte_row_nom_l);
+	if (dlg_attr.refcyc_per_pte_group_nom_l != dml_dlg_attr->refcyc_per_pte_group_nom_l)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_1:REFCYC_PER_PTE_GROUP_NOM_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_pte_group_nom_l, dlg_attr.refcyc_per_pte_group_nom_l);
+	if (dlg_attr.dst_y_per_meta_row_nom_l != dml_dlg_attr->dst_y_per_meta_row_nom_l)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_4:DST_Y_PER_META_ROW_NOM_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dst_y_per_meta_row_nom_l, dlg_attr.dst_y_per_meta_row_nom_l);
+	if (dlg_attr.refcyc_per_meta_chunk_nom_l != dml_dlg_attr->refcyc_per_meta_chunk_nom_l)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_5:REFCYC_PER_META_CHUNK_NOM_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_meta_chunk_nom_l, dlg_attr.refcyc_per_meta_chunk_nom_l);
+	if (dlg_attr.refcyc_per_line_delivery_l != dml_dlg_attr->refcyc_per_line_delivery_l)
+		DC_LOG_DEBUG("DML Validation | PER_LINE_DELIVERY:REFCYC_PER_LINE_DELIVERY_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_line_delivery_l, dlg_attr.refcyc_per_line_delivery_l);
+	if (dlg_attr.refcyc_per_line_delivery_c != dml_dlg_attr->refcyc_per_line_delivery_c)
+		DC_LOG_DEBUG("DML Validation | PER_LINE_DELIVERY:REFCYC_PER_LINE_DELIVERY_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_line_delivery_c, dlg_attr.refcyc_per_line_delivery_c);
+	if (dlg_attr.refcyc_per_pte_group_vblank_c != dml_dlg_attr->refcyc_per_pte_group_vblank_c)
+		DC_LOG_DEBUG("DML Validation | VBLANK_PARAMETERS_2:REFCYC_PER_PTE_GROUP_VBLANK_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_pte_group_vblank_c, dlg_attr.refcyc_per_pte_group_vblank_c);
+	if (dlg_attr.dst_y_per_pte_row_nom_c != dml_dlg_attr->dst_y_per_pte_row_nom_c)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_2:DST_Y_PER_PTE_ROW_NOM_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dst_y_per_pte_row_nom_c, dlg_attr.dst_y_per_pte_row_nom_c);
+	if (dlg_attr.refcyc_per_pte_group_nom_c != dml_dlg_attr->refcyc_per_pte_group_nom_c)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_3:REFCYC_PER_PTE_GROUP_NOM_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_pte_group_nom_c, dlg_attr.refcyc_per_pte_group_nom_c);
+	if (dlg_attr.dst_y_per_meta_row_nom_c != dml_dlg_attr->dst_y_per_meta_row_nom_c)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_6:DST_Y_PER_META_ROW_NOM_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->dst_y_per_meta_row_nom_c, dlg_attr.dst_y_per_meta_row_nom_c);
+	if (dlg_attr.refcyc_per_meta_chunk_nom_c != dml_dlg_attr->refcyc_per_meta_chunk_nom_c)
+		DC_LOG_DEBUG("DML Validation | NOM_PARAMETERS_7:REFCYC_PER_META_CHUNK_NOM_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_meta_chunk_nom_c, dlg_attr.refcyc_per_meta_chunk_nom_c);
+	if (dlg_attr.refcyc_per_line_delivery_pre_l != dml_dlg_attr->refcyc_per_line_delivery_pre_l)
+		DC_LOG_DEBUG("DML Validation | PER_LINE_DELIVERY_PRE:REFCYC_PER_LINE_DELIVERY_PRE_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_line_delivery_pre_l, dlg_attr.refcyc_per_line_delivery_pre_l);
+	if (dlg_attr.refcyc_per_line_delivery_pre_c != dml_dlg_attr->refcyc_per_line_delivery_pre_c)
+		DC_LOG_DEBUG("DML Validation | PER_LINE_DELIVERY_PRE:REFCYC_PER_LINE_DELIVERY_PRE_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_line_delivery_pre_c, dlg_attr.refcyc_per_line_delivery_pre_c);
+	if (dlg_attr.refcyc_per_meta_chunk_vblank_l != dml_dlg_attr->refcyc_per_meta_chunk_vblank_l)
+		DC_LOG_DEBUG("DML Validation | VBLANK_PARAMETERS_3:REFCYC_PER_META_CHUNK_VBLANK_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_meta_chunk_vblank_l, dlg_attr.refcyc_per_meta_chunk_vblank_l);
+	if (dlg_attr.refcyc_per_meta_chunk_vblank_c != dml_dlg_attr->refcyc_per_meta_chunk_vblank_c)
+		DC_LOG_DEBUG("DML Validation | VBLANK_PARAMETERS_4:REFCYC_PER_META_CHUNK_VBLANK_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_meta_chunk_vblank_c, dlg_attr.refcyc_per_meta_chunk_vblank_c);
+
+	/* TTU - per hubp */
+	REG_GET_2(DCN_TTU_QOS_WM,
+		QoS_LEVEL_LOW_WM, &ttu_attr.qos_level_low_wm,
+		QoS_LEVEL_HIGH_WM, &ttu_attr.qos_level_high_wm);
+
+	if (ttu_attr.qos_level_low_wm != dml_ttu_attr->qos_level_low_wm)
+		DC_LOG_DEBUG("DML Validation | DCN_TTU_QOS_WM:QoS_LEVEL_LOW_WM - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_level_low_wm, ttu_attr.qos_level_low_wm);
+	if (ttu_attr.qos_level_high_wm != dml_ttu_attr->qos_level_high_wm)
+		DC_LOG_DEBUG("DML Validation | DCN_TTU_QOS_WM:QoS_LEVEL_HIGH_WM - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_level_high_wm, ttu_attr.qos_level_high_wm);
+
+	/* TTU - per luma/chroma */
+	/* Assumed surf0 is luma and 1 is chroma */
+	REG_GET_3(DCN_SURF0_TTU_CNTL0,
+		REFCYC_PER_REQ_DELIVERY, &ttu_attr.refcyc_per_req_delivery_l,
+		QoS_LEVEL_FIXED, &ttu_attr.qos_level_fixed_l,
+		QoS_RAMP_DISABLE, &ttu_attr.qos_ramp_disable_l);
+	REG_GET_3(DCN_SURF1_TTU_CNTL0,
+		REFCYC_PER_REQ_DELIVERY, &ttu_attr.refcyc_per_req_delivery_c,
+		QoS_LEVEL_FIXED, &ttu_attr.qos_level_fixed_c,
+		QoS_RAMP_DISABLE, &ttu_attr.qos_ramp_disable_c);
+	REG_GET_3(DCN_CUR0_TTU_CNTL0,
+		REFCYC_PER_REQ_DELIVERY, &ttu_attr.refcyc_per_req_delivery_cur0,
+		QoS_LEVEL_FIXED, &ttu_attr.qos_level_fixed_cur0,
+		QoS_RAMP_DISABLE, &ttu_attr.qos_ramp_disable_cur0);
+	REG_GET(FLIP_PARAMETERS_1,
+		REFCYC_PER_PTE_GROUP_FLIP_L, &dlg_attr.refcyc_per_pte_group_flip_l);
+	REG_GET(DCN_CUR0_TTU_CNTL1,
+			REFCYC_PER_REQ_DELIVERY_PRE, &ttu_attr.refcyc_per_req_delivery_pre_cur0);
+	REG_GET(DCN_CUR1_TTU_CNTL1,
+			REFCYC_PER_REQ_DELIVERY_PRE, &ttu_attr.refcyc_per_req_delivery_pre_cur1);
+	REG_GET(DCN_SURF0_TTU_CNTL1,
+			REFCYC_PER_REQ_DELIVERY_PRE, &ttu_attr.refcyc_per_req_delivery_pre_l);
+	REG_GET(DCN_SURF1_TTU_CNTL1,
+			REFCYC_PER_REQ_DELIVERY_PRE, &ttu_attr.refcyc_per_req_delivery_pre_c);
+
+	if (ttu_attr.refcyc_per_req_delivery_l != dml_ttu_attr->refcyc_per_req_delivery_l)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF0_TTU_CNTL0:REFCYC_PER_REQ_DELIVERY - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_l, ttu_attr.refcyc_per_req_delivery_l);
+	if (ttu_attr.qos_level_fixed_l != dml_ttu_attr->qos_level_fixed_l)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF0_TTU_CNTL0:QoS_LEVEL_FIXED - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_level_fixed_l, ttu_attr.qos_level_fixed_l);
+	if (ttu_attr.qos_ramp_disable_l != dml_ttu_attr->qos_ramp_disable_l)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF0_TTU_CNTL0:QoS_RAMP_DISABLE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_ramp_disable_l, ttu_attr.qos_ramp_disable_l);
+	if (ttu_attr.refcyc_per_req_delivery_c != dml_ttu_attr->refcyc_per_req_delivery_c)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF1_TTU_CNTL0:REFCYC_PER_REQ_DELIVERY - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_c, ttu_attr.refcyc_per_req_delivery_c);
+	if (ttu_attr.qos_level_fixed_c != dml_ttu_attr->qos_level_fixed_c)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF1_TTU_CNTL0:QoS_LEVEL_FIXED - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_level_fixed_c, ttu_attr.qos_level_fixed_c);
+	if (ttu_attr.qos_ramp_disable_c != dml_ttu_attr->qos_ramp_disable_c)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF1_TTU_CNTL0:QoS_RAMP_DISABLE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_ramp_disable_c, ttu_attr.qos_ramp_disable_c);
+	if (ttu_attr.refcyc_per_req_delivery_cur0 != dml_ttu_attr->refcyc_per_req_delivery_cur0)
+		DC_LOG_DEBUG("DML Validation | DCN_CUR0_TTU_CNTL0:REFCYC_PER_REQ_DELIVERY - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_cur0, ttu_attr.refcyc_per_req_delivery_cur0);
+	if (ttu_attr.qos_level_fixed_cur0 != dml_ttu_attr->qos_level_fixed_cur0)
+		DC_LOG_DEBUG("DML Validation | DCN_CUR0_TTU_CNTL0:QoS_LEVEL_FIXED - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_level_fixed_cur0, ttu_attr.qos_level_fixed_cur0);
+	if (ttu_attr.qos_ramp_disable_cur0 != dml_ttu_attr->qos_ramp_disable_cur0)
+		DC_LOG_DEBUG("DML Validation | DCN_CUR0_TTU_CNTL0:QoS_RAMP_DISABLE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->qos_ramp_disable_cur0, ttu_attr.qos_ramp_disable_cur0);
+	if (dlg_attr.refcyc_per_pte_group_flip_l != dml_dlg_attr->refcyc_per_pte_group_flip_l)
+		DC_LOG_DEBUG("DML Validation | FLIP_PARAMETERS_1:REFCYC_PER_PTE_GROUP_FLIP_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_pte_group_flip_l, dlg_attr.refcyc_per_pte_group_flip_l);
+	if (ttu_attr.refcyc_per_req_delivery_pre_cur0 != dml_ttu_attr->refcyc_per_req_delivery_pre_cur0)
+		DC_LOG_DEBUG("DML Validation | DCN_CUR0_TTU_CNTL1:REFCYC_PER_REQ_DELIVERY_PRE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_pre_cur0, ttu_attr.refcyc_per_req_delivery_pre_cur0);
+	if (ttu_attr.refcyc_per_req_delivery_pre_cur1 != dml_ttu_attr->refcyc_per_req_delivery_pre_cur1)
+		DC_LOG_DEBUG("DML Validation | DCN_CUR1_TTU_CNTL1:REFCYC_PER_REQ_DELIVERY_PRE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_pre_cur1, ttu_attr.refcyc_per_req_delivery_pre_cur1);
+	if (ttu_attr.refcyc_per_req_delivery_pre_l != dml_ttu_attr->refcyc_per_req_delivery_pre_l)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF0_TTU_CNTL1:REFCYC_PER_REQ_DELIVERY_PRE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_pre_l, ttu_attr.refcyc_per_req_delivery_pre_l);
+	if (ttu_attr.refcyc_per_req_delivery_pre_c != dml_ttu_attr->refcyc_per_req_delivery_pre_c)
+		DC_LOG_DEBUG("DML Validation | DCN_SURF1_TTU_CNTL1:REFCYC_PER_REQ_DELIVERY_PRE - Expected: %u  Actual: %u\n",
+				dml_ttu_attr->refcyc_per_req_delivery_pre_c, ttu_attr.refcyc_per_req_delivery_pre_c);
+
+	/* Host VM deadline regs */
+	REG_GET(VBLANK_PARAMETERS_5,
+		REFCYC_PER_VM_GROUP_VBLANK, &dlg_attr.refcyc_per_vm_group_vblank);
+	REG_GET(VBLANK_PARAMETERS_6,
+		REFCYC_PER_VM_REQ_VBLANK, &dlg_attr.refcyc_per_vm_req_vblank);
+	REG_GET(FLIP_PARAMETERS_3,
+		REFCYC_PER_VM_GROUP_FLIP, &dlg_attr.refcyc_per_vm_group_flip);
+	REG_GET(FLIP_PARAMETERS_4,
+		REFCYC_PER_VM_REQ_FLIP, &dlg_attr.refcyc_per_vm_req_flip);
+	REG_GET(FLIP_PARAMETERS_5,
+		REFCYC_PER_PTE_GROUP_FLIP_C, &dlg_attr.refcyc_per_pte_group_flip_c);
+	REG_GET(FLIP_PARAMETERS_6,
+		REFCYC_PER_META_CHUNK_FLIP_C, &dlg_attr.refcyc_per_meta_chunk_flip_c);
+	REG_GET(FLIP_PARAMETERS_2,
+		REFCYC_PER_META_CHUNK_FLIP_L, &dlg_attr.refcyc_per_meta_chunk_flip_l);
+
+	if (dlg_attr.refcyc_per_vm_group_vblank != dml_dlg_attr->refcyc_per_vm_group_vblank)
+		DC_LOG_DEBUG("DML Validation | VBLANK_PARAMETERS_5:REFCYC_PER_VM_GROUP_VBLANK - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_vm_group_vblank, dlg_attr.refcyc_per_vm_group_vblank);
+	if (dlg_attr.refcyc_per_vm_req_vblank != dml_dlg_attr->refcyc_per_vm_req_vblank)
+		DC_LOG_DEBUG("DML Validation | VBLANK_PARAMETERS_6:REFCYC_PER_VM_REQ_VBLANK - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_vm_req_vblank, dlg_attr.refcyc_per_vm_req_vblank);
+	if (dlg_attr.refcyc_per_vm_group_flip != dml_dlg_attr->refcyc_per_vm_group_flip)
+		DC_LOG_DEBUG("DML Validation | FLIP_PARAMETERS_3:REFCYC_PER_VM_GROUP_FLIP - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_vm_group_flip, dlg_attr.refcyc_per_vm_group_flip);
+	if (dlg_attr.refcyc_per_vm_req_flip != dml_dlg_attr->refcyc_per_vm_req_flip)
+		DC_LOG_DEBUG("DML Validation | FLIP_PARAMETERS_4:REFCYC_PER_VM_REQ_FLIP - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_vm_req_flip, dlg_attr.refcyc_per_vm_req_flip);
+	if (dlg_attr.refcyc_per_pte_group_flip_c != dml_dlg_attr->refcyc_per_pte_group_flip_c)
+		DC_LOG_DEBUG("DML Validation | FLIP_PARAMETERS_5:REFCYC_PER_PTE_GROUP_FLIP_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_pte_group_flip_c, dlg_attr.refcyc_per_pte_group_flip_c);
+	if (dlg_attr.refcyc_per_meta_chunk_flip_c != dml_dlg_attr->refcyc_per_meta_chunk_flip_c)
+		DC_LOG_DEBUG("DML Validation | FLIP_PARAMETERS_6:REFCYC_PER_META_CHUNK_FLIP_C - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_meta_chunk_flip_c, dlg_attr.refcyc_per_meta_chunk_flip_c);
+	if (dlg_attr.refcyc_per_meta_chunk_flip_l != dml_dlg_attr->refcyc_per_meta_chunk_flip_l)
+		DC_LOG_DEBUG("DML Validation | FLIP_PARAMETERS_2:REFCYC_PER_META_CHUNK_FLIP_L - Expected: %u  Actual: %u\n",
+				dml_dlg_attr->refcyc_per_meta_chunk_flip_l, dlg_attr.refcyc_per_meta_chunk_flip_l);
+}
+
+static void program_surface_flip_and_addr(struct hubp *hubp, struct surface_flip_registers *flip_regs)
+{
+	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
+
+	REG_UPDATE_3(DCSURF_FLIP_CONTROL,
+					SURFACE_FLIP_TYPE, flip_regs->immediate,
+					SURFACE_FLIP_MODE_FOR_STEREOSYNC, flip_regs->grph_stereo,
+					SURFACE_FLIP_IN_STEREOSYNC, flip_regs->grph_stereo);
+
+	REG_UPDATE(VMID_SETTINGS_0,
+				VMID, flip_regs->vmid);
+
+	REG_UPDATE_8(DCSURF_SURFACE_CONTROL,
+			PRIMARY_SURFACE_TMZ, flip_regs->tmz_surface,
+			PRIMARY_SURFACE_TMZ_C, flip_regs->tmz_surface,
+			PRIMARY_META_SURFACE_TMZ, flip_regs->tmz_surface,
+			PRIMARY_META_SURFACE_TMZ_C, flip_regs->tmz_surface,
+			SECONDARY_SURFACE_TMZ, flip_regs->tmz_surface,
+			SECONDARY_SURFACE_TMZ_C, flip_regs->tmz_surface,
+			SECONDARY_META_SURFACE_TMZ, flip_regs->tmz_surface,
+			SECONDARY_META_SURFACE_TMZ_C, flip_regs->tmz_surface);
+
+	REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0,
+			PRIMARY_META_SURFACE_ADDRESS_HIGH_C,
+			flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C);
+
+	REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0,
+			PRIMARY_META_SURFACE_ADDRESS_C,
+			flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_C);
+
+	REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0,
+			PRIMARY_META_SURFACE_ADDRESS_HIGH,
+			flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH);
+
+	REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0,
+			PRIMARY_META_SURFACE_ADDRESS,
+			flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS);
+
+	REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, 0,
+			SECONDARY_META_SURFACE_ADDRESS_HIGH,
+			flip_regs->DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH);
+
+	REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS, 0,
+			SECONDARY_META_SURFACE_ADDRESS,
+			flip_regs->DCSURF_SECONDARY_META_SURFACE_ADDRESS);
+
+
+	REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0,
+			SECONDARY_SURFACE_ADDRESS_HIGH,
+			flip_regs->DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH);
+
+	REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0,
+			SECONDARY_SURFACE_ADDRESS,
+			flip_regs->DCSURF_SECONDARY_SURFACE_ADDRESS);
+
+
+	REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0,
+			PRIMARY_SURFACE_ADDRESS_HIGH_C,
+			flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C);
+
+	REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0,
+			PRIMARY_SURFACE_ADDRESS_C,
+			flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C);
+
+	REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
+			PRIMARY_SURFACE_ADDRESS_HIGH,
+			flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH);
+
+	REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
+			PRIMARY_SURFACE_ADDRESS,
+			flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS);
+}
+
+void dmcub_PLAT_54186_wa(struct hubp *hubp, struct surface_flip_registers *flip_regs)
+{
+	struct dc_dmub_srv *dmcub = hubp->ctx->dmub_srv;
+	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
+	struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa = { 0 };
+
+	PLAT_54186_wa.header.type = DMUB_CMD__PLAT_54186_WA;
+	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS;
+	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C;
+	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH;
+	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C;
+	PLAT_54186_wa.flip.flip_params.grph_stereo = flip_regs->grph_stereo;
+	PLAT_54186_wa.flip.flip_params.hubp_inst = hubp->inst;
+	PLAT_54186_wa.flip.flip_params.immediate = flip_regs->immediate;
+	PLAT_54186_wa.flip.flip_params.tmz_surface = flip_regs->tmz_surface;
+	PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid;
+
+	PERF_TRACE();  // TODO: remove after performance is stable.
+	dc_dmub_srv_cmd_queue(dmcub, &PLAT_54186_wa.header);
+	PERF_TRACE();  // TODO: remove after performance is stable.
+	dc_dmub_srv_cmd_execute(dmcub);
+	PERF_TRACE();  // TODO: remove after performance is stable.
+	dc_dmub_srv_wait_idle(dmcub);
+	PERF_TRACE();  // TODO: remove after performance is stable.
+}
+
+bool hubp21_program_surface_flip_and_addr(
+		struct hubp *hubp,
+		const struct dc_plane_address *address,
+		bool flip_immediate)
+{
+	struct dc_debug_options *debug = &hubp->ctx->dc->debug;
+	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
+	struct surface_flip_registers flip_regs = { 0 };
+
+	flip_regs.vmid = address->vmid;
+
+	switch (address->type) {
+	case PLN_ADDR_TYPE_GRAPHICS:
+		if (address->grph.addr.quad_part == 0) {
+			BREAK_TO_DEBUGGER();
+			break;
+		}
+
+		if (address->grph.meta_addr.quad_part != 0) {
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS =
+					address->grph.meta_addr.low_part;
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH =
+					address->grph.meta_addr.high_part;
+		}
+
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS =
+				address->grph.addr.low_part;
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH =
+				address->grph.addr.high_part;
+		break;
+	case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE:
+		if (address->video_progressive.luma_addr.quad_part == 0
+				|| address->video_progressive.chroma_addr.quad_part == 0)
+			break;
+
+		if (address->video_progressive.luma_meta_addr.quad_part != 0) {
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS =
+					address->video_progressive.luma_meta_addr.low_part;
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH =
+					address->video_progressive.luma_meta_addr.high_part;
+
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_C =
+					address->video_progressive.chroma_meta_addr.low_part;
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C =
+					address->video_progressive.chroma_meta_addr.high_part;
+		}
+
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS =
+				address->video_progressive.luma_addr.low_part;
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH =
+				address->video_progressive.luma_addr.high_part;
+
+		if (debug->nv12_iflip_vm_wa) {
+			flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_C =
+					address->video_progressive.chroma_addr.low_part + hubp21->PLAT_54186_wa_chroma_addr_offset;
+		} else
+			flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_C =
+					address->video_progressive.chroma_addr.low_part;
+
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C =
+				address->video_progressive.chroma_addr.high_part;
+
+		break;
+	case PLN_ADDR_TYPE_GRPH_STEREO:
+		if (address->grph_stereo.left_addr.quad_part == 0)
+			break;
+		if (address->grph_stereo.right_addr.quad_part == 0)
+			break;
+
+		flip_regs.grph_stereo = true;
+
+		if (address->grph_stereo.right_meta_addr.quad_part != 0) {
+			flip_regs.DCSURF_SECONDARY_META_SURFACE_ADDRESS =
+					address->grph_stereo.right_meta_addr.low_part;
+			flip_regs.DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH =
+					address->grph_stereo.right_meta_addr.high_part;
+		}
+
+		if (address->grph_stereo.left_meta_addr.quad_part != 0) {
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS =
+					address->grph_stereo.left_meta_addr.low_part;
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH =
+					address->grph_stereo.left_meta_addr.high_part;
+		}
+
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS =
+				address->grph_stereo.left_addr.low_part;
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH =
+				address->grph_stereo.left_addr.high_part;
+
+		flip_regs.DCSURF_SECONDARY_SURFACE_ADDRESS =
+				address->grph_stereo.right_addr.low_part;
+		flip_regs.DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH =
+				address->grph_stereo.right_addr.high_part;
+
+		break;
+	default:
+		BREAK_TO_DEBUGGER();
+		break;
+	}
+
+	flip_regs.tmz_surface = address->tmz_surface;
+	flip_regs.immediate = flip_immediate;
+
+	if (hubp->ctx->dc->debug.enable_dmcub_surface_flip && address->type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
+		dmcub_PLAT_54186_wa(hubp, &flip_regs);
+	else
+		program_surface_flip_and_addr(hubp, &flip_regs);
+
+	hubp->request_address = *address;
+
+	return true;
+}
+
 void hubp21_init(struct hubp *hubp)
 {
 	// DEDCN21-133: Inconsistent row starting line for flip between DPTE and Meta
@@ -201,15 +915,16 @@ void hubp21_init(struct hubp *hubp)
 static struct hubp_funcs dcn21_hubp_funcs = {
 	.hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
 	.hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled,
-	.hubp_program_surface_flip_and_addr = hubp2_program_surface_flip_and_addr,
-	.hubp_program_surface_config = hubp2_program_surface_config,
+	.hubp_program_surface_flip_and_addr = hubp21_program_surface_flip_and_addr,
+	.hubp_program_surface_config = hubp1_program_surface_config,
 	.hubp_is_flip_pending = hubp1_is_flip_pending,
 	.hubp_setup = hubp21_setup,
 	.hubp_setup_interdependent = hubp2_setup_interdependent,
 	.hubp_set_vm_system_aperture_settings = hubp21_set_vm_system_aperture_settings,
 	.set_blank = hubp1_set_blank,
 	.dcc_control = hubp1_dcc_control,
-	.mem_program_viewport = min_set_viewport,
+	.mem_program_viewport = hubp21_set_viewport,
+	.apply_PLAT_54186_wa = hubp21_apply_PLAT_54186_wa,
 	.set_cursor_attributes	= hubp2_cursor_set_attributes,
 	.set_cursor_position	= hubp1_cursor_set_position,
 	.hubp_clk_cntl = hubp1_clk_cntl,
@@ -221,6 +936,7 @@ static struct hubp_funcs dcn21_hubp_funcs = {
 	.hubp_clear_underflow = hubp1_clear_underflow,
 	.hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl,
 	.hubp_init = hubp21_init,
+	.validate_dml_output = hubp21_validate_dml_output,
 };
 
 bool hubp21_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.h
index aeda719a2a13..9873b6cbc5ba 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.h
@@ -108,6 +108,7 @@ struct dcn21_hubp {
 	const struct dcn_hubp2_registers *hubp_regs;
 	const struct dcn_hubp2_shift *hubp_shift;
 	const struct dcn_hubp2_mask *hubp_mask;
+	int PLAT_54186_wa_chroma_addr_offset;
 };
 
 bool hubp21_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c
new file mode 100644
index 000000000000..081ad8e43d58
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services.h"
+#include "dm_helpers.h"
+#include "core_types.h"
+#include "resource.h"
+#include "dce/dce_hwseq.h"
+#include "dcn21_hwseq.h"
+#include "vmid.h"
+#include "reg_helper.h"
+#include "hw/clk_mgr.h"
+
+
+#define DC_LOGGER_INIT(logger)
+
+#define CTX \
+	hws->ctx
+#define REG(reg)\
+	hws->regs->reg
+
+#undef FN
+#define FN(reg_name, field_name) \
+	hws->shifts->field_name, hws->masks->field_name
+
+/* Temporary read settings, future will get values from kmd directly */
+static void mmhub_update_page_table_config(struct dcn_hubbub_phys_addr_config *config,
+		struct dce_hwseq *hws)
+{
+	uint32_t page_table_base_hi;
+	uint32_t page_table_base_lo;
+
+	REG_GET(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+			PAGE_DIRECTORY_ENTRY_HI32, &page_table_base_hi);
+	REG_GET(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+			PAGE_DIRECTORY_ENTRY_LO32, &page_table_base_lo);
+
+	config->gart_config.page_table_base_addr = ((uint64_t)page_table_base_hi << 32) | page_table_base_lo;
+
+}
+
+int dcn21_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config)
+{
+	struct dcn_hubbub_phys_addr_config config;
+
+	config.system_aperture.fb_top = pa_config->system_aperture.fb_top;
+	config.system_aperture.fb_offset = pa_config->system_aperture.fb_offset;
+	config.system_aperture.fb_base = pa_config->system_aperture.fb_base;
+	config.system_aperture.agp_top = pa_config->system_aperture.agp_top;
+	config.system_aperture.agp_bot = pa_config->system_aperture.agp_bot;
+	config.system_aperture.agp_base = pa_config->system_aperture.agp_base;
+	config.gart_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr;
+	config.gart_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr;
+	config.gart_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr;
+
+	mmhub_update_page_table_config(&config, hws);
+
+	return dc->res_pool->hubbub->funcs->init_dchub_sys_ctx(dc->res_pool->hubbub, &config);
+}
+
+// work around for Renoir s0i3, if register is programmed, bypass golden init.
+
+bool dcn21_s0i3_golden_init_wa(struct dc *dc)
+{
+	struct dce_hwseq *hws = dc->hwseq;
+	uint32_t value = 0;
+
+	value = REG_READ(MICROSECOND_TIME_BASE_DIV);
+
+	return value != 0x00120464;
+}
+
+void dcn21_exit_optimized_pwr_state(
+		const struct dc *dc,
+		struct dc_state *context)
+{
+	dc->clk_mgr->funcs->update_clocks(
+			dc->clk_mgr,
+			context,
+			false);
+}
+
+void dcn21_optimize_pwr_state(
+		const struct dc *dc,
+		struct dc_state *context)
+{
+	dc->clk_mgr->funcs->update_clocks(
+			dc->clk_mgr,
+			context,
+			true);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.h
new file mode 100644
index 000000000000..182736096123
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.h
@@ -0,0 +1,47 @@
+/*
+* Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_HWSS_DCN21_H__
+#define __DC_HWSS_DCN21_H__
+
+#include "hw_sequencer_private.h"
+
+struct dc;
+
+int dcn21_init_sys_ctx(struct dce_hwseq *hws,
+		struct dc *dc,
+		struct dc_phy_addr_space_config *pa_config);
+
+bool dcn21_s0i3_golden_init_wa(struct dc *dc);
+
+void dcn21_exit_optimized_pwr_state(
+		const struct dc *dc,
+		struct dc_state *context);
+
+void dcn21_optimize_pwr_state(
+		const struct dc *dc,
+		struct dc_state *context);
+
+#endif /* __DC_HWSS_DCN21_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
new file mode 100644
index 000000000000..4861aa5c59ae
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dce110/dce110_hw_sequencer.h"
+#include "dcn10/dcn10_hw_sequencer.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn21_hwseq.h"
+
+static const struct hw_sequencer_funcs dcn21_funcs = {
+	.program_gamut_remap = dcn10_program_gamut_remap,
+	.init_hw = dcn10_init_hw,
+	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
+	.apply_ctx_for_surface = NULL,
+	.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+	.update_plane_addr = dcn20_update_plane_addr,
+	.update_dchub = dcn10_update_dchub,
+	.update_pending_status = dcn10_update_pending_status,
+	.program_output_csc = dcn20_program_output_csc,
+	.enable_accelerated_mode = dce110_enable_accelerated_mode,
+	.enable_timing_synchronization = dcn10_enable_timing_synchronization,
+	.enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
+	.update_info_frame = dce110_update_info_frame,
+	.send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
+	.enable_stream = dcn20_enable_stream,
+	.disable_stream = dce110_disable_stream,
+	.unblank_stream = dcn20_unblank_stream,
+	.blank_stream = dce110_blank_stream,
+	.enable_audio_stream = dce110_enable_audio_stream,
+	.disable_audio_stream = dce110_disable_audio_stream,
+	.disable_plane = dcn20_disable_plane,
+	.pipe_control_lock = dcn20_pipe_control_lock,
+	.pipe_control_lock_global = dcn20_pipe_control_lock_global,
+	.prepare_bandwidth = dcn20_prepare_bandwidth,
+	.optimize_bandwidth = dcn20_optimize_bandwidth,
+	.update_bandwidth = dcn20_update_bandwidth,
+	.set_drr = dcn10_set_drr,
+	.get_position = dcn10_get_position,
+	.set_static_screen_control = dcn10_set_static_screen_control,
+	.setup_stereo = dcn10_setup_stereo,
+	.set_avmute = dce110_set_avmute,
+	.log_hw_state = dcn10_log_hw_state,
+	.get_hw_state = dcn10_get_hw_state,
+	.clear_status_bits = dcn10_clear_status_bits,
+	.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+	.edp_power_control = dce110_edp_power_control,
+	.edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
+	.set_cursor_position = dcn10_set_cursor_position,
+	.set_cursor_attribute = dcn10_set_cursor_attribute,
+	.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
+	.setup_periodic_interrupt = dcn10_setup_periodic_interrupt,
+	.set_clock = dcn10_set_clock,
+	.get_clock = dcn10_get_clock,
+	.program_triplebuffer = dcn20_program_triple_buffer,
+	.enable_writeback = dcn20_enable_writeback,
+	.disable_writeback = dcn20_disable_writeback,
+	.dmdata_status_done = dcn20_dmdata_status_done,
+	.program_dmdata_engine = dcn20_program_dmdata_engine,
+	.set_dmdata_attributes = dcn20_set_dmdata_attributes,
+	.init_sys_ctx = dcn21_init_sys_ctx,
+	.init_vm_ctx = dcn20_init_vm_ctx,
+	.set_flip_control_gsl = dcn20_set_flip_control_gsl,
+	.optimize_pwr_state = dcn21_optimize_pwr_state,
+	.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
+	.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+	.set_cursor_position = dcn10_set_cursor_position,
+	.set_cursor_attribute = dcn10_set_cursor_attribute,
+	.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
+	.optimize_pwr_state = dcn21_optimize_pwr_state,
+	.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
+};
+
+static const struct hwseq_private_funcs dcn21_private_funcs = {
+	.init_pipes = dcn10_init_pipes,
+	.update_plane_addr = dcn20_update_plane_addr,
+	.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
+	.update_mpcc = dcn20_update_mpcc,
+	.set_input_transfer_func = dcn20_set_input_transfer_func,
+	.set_output_transfer_func = dcn20_set_output_transfer_func,
+	.power_down = dce110_power_down,
+	.enable_display_power_gating = dcn10_dummy_display_power_gating,
+	.blank_pixel_data = dcn20_blank_pixel_data,
+	.reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap,
+	.enable_stream_timing = dcn20_enable_stream_timing,
+	.edp_backlight_control = dce110_edp_backlight_control,
+	.disable_stream_gating = dcn20_disable_stream_gating,
+	.enable_stream_gating = dcn20_enable_stream_gating,
+	.setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt,
+	.did_underflow_occur = dcn10_did_underflow_occur,
+	.init_blank = dcn20_init_blank,
+	.disable_vga = dcn20_disable_vga,
+	.bios_golden_init = dcn10_bios_golden_init,
+	.plane_atomic_disable = dcn20_plane_atomic_disable,
+	.plane_atomic_power_down = dcn10_plane_atomic_power_down,
+	.enable_power_gating_plane = dcn20_enable_power_gating_plane,
+	.dpp_pg_control = dcn20_dpp_pg_control,
+	.hubp_pg_control = dcn20_hubp_pg_control,
+	.dsc_pg_control = NULL,
+	.update_odm = dcn20_update_odm,
+	.dsc_pg_control = dcn20_dsc_pg_control,
+	.get_surface_visual_confirm_color = dcn10_get_surface_visual_confirm_color,
+	.get_hdr_visual_confirm_color = dcn10_get_hdr_visual_confirm_color,
+	.set_hdr_multiplier = dcn10_set_hdr_multiplier,
+	.verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high,
+	.s0i3_golden_init_wa = dcn21_s0i3_golden_init_wa,
+	.wait_for_blank_complete = dcn20_wait_for_blank_complete,
+	.dccg_init = dcn20_dccg_init,
+	.set_blend_lut = dcn20_set_blend_lut,
+	.set_shaper_3dlut = dcn20_set_shaper_3dlut,
+};
+
+void dcn21_hw_sequencer_construct(struct dc *dc)
+{
+	dc->hwss = dcn21_funcs;
+	dc->hwseq->funcs = dcn21_private_funcs;
+
+	if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
+		dc->hwss.init_hw = dcn20_fpga_init_hw;
+		dc->hwseq->funcs.init_pipes = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h
new file mode 100644
index 000000000000..3ed24292648a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DCN21_INIT_H__
+#define __DC_DCN21_INIT_H__
+
+struct dc;
+
+void dcn21_hw_sequencer_construct(struct dc *dc);
+
+#endif /* __DC_DCN20_INIT_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c
new file mode 100644
index 000000000000..e45683ac871a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c
@@ -0,0 +1,468 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+
+#include <linux/delay.h>
+#include "core_types.h"
+#include "link_encoder.h"
+#include "dcn21_link_encoder.h"
+#include "stream_encoder.h"
+
+#include "i2caux_interface.h"
+#include "dc_bios_types.h"
+
+#include "gpio_service_interface.h"
+
+#define CTX \
+	enc10->base.ctx
+#define DC_LOGGER \
+	enc10->base.ctx->logger
+
+#define REG(reg)\
+	(enc10->link_regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+	enc10->link_shift->field_name, enc10->link_mask->field_name
+
+#define IND_REG(index) \
+	(enc10->link_regs->index)
+
+static struct mpll_cfg dcn21_mpll_cfg_ref[] = {
+	// RBR
+	{
+		.hdmimode_enable = 0,
+		.ref_range = 1,
+		.ref_clk_mpllb_div = 1,
+		.mpllb_ssc_en = 1,
+		.mpllb_div5_clk_en = 1,
+		.mpllb_multiplier = 238,
+		.mpllb_fracn_en = 0,
+		.mpllb_fracn_quot = 0,
+		.mpllb_fracn_rem = 0,
+		.mpllb_fracn_den = 1,
+		.mpllb_ssc_up_spread = 0,
+		.mpllb_ssc_peak = 44237,
+		.mpllb_ssc_stepsize = 59454,
+		.mpllb_div_clk_en = 0,
+		.mpllb_div_multiplier = 0,
+		.mpllb_hdmi_div = 0,
+		.mpllb_tx_clk_div = 2,
+		.tx_vboost_lvl = 5,
+		.mpllb_pmix_en = 1,
+		.mpllb_word_div2_en = 0,
+		.mpllb_ana_v2i = 2,
+		.mpllb_ana_freq_vco = 2,
+		.mpllb_ana_cp_int = 9,
+		.mpllb_ana_cp_prop = 15,
+		.hdmi_pixel_clk_div = 0,
+	},
+	// HBR
+	{
+		.hdmimode_enable = 0,
+		.ref_range = 1,
+		.ref_clk_mpllb_div = 1,
+		.mpllb_ssc_en = 1,
+		.mpllb_div5_clk_en = 1,
+		.mpllb_multiplier = 192,
+		.mpllb_fracn_en = 1,
+		.mpllb_fracn_quot = 32768,
+		.mpllb_fracn_rem = 0,
+		.mpllb_fracn_den = 1,
+		.mpllb_ssc_up_spread = 0,
+		.mpllb_ssc_peak = 36864,
+		.mpllb_ssc_stepsize = 49545,
+		.mpllb_div_clk_en = 0,
+		.mpllb_div_multiplier = 0,
+		.mpllb_hdmi_div = 0,
+		.mpllb_tx_clk_div = 1,
+		.tx_vboost_lvl = 5,
+		.mpllb_pmix_en = 1,
+		.mpllb_word_div2_en = 0,
+		.mpllb_ana_v2i = 2,
+		.mpllb_ana_freq_vco = 3,
+		.mpllb_ana_cp_int = 9,
+		.mpllb_ana_cp_prop = 15,
+		.hdmi_pixel_clk_div = 0,
+	},
+	//HBR2
+	{
+		.hdmimode_enable = 0,
+		.ref_range = 1,
+		.ref_clk_mpllb_div = 1,
+		.mpllb_ssc_en = 1,
+		.mpllb_div5_clk_en = 1,
+		.mpllb_multiplier = 192,
+		.mpllb_fracn_en = 1,
+		.mpllb_fracn_quot = 32768,
+		.mpllb_fracn_rem = 0,
+		.mpllb_fracn_den = 1,
+		.mpllb_ssc_up_spread = 0,
+		.mpllb_ssc_peak = 36864,
+		.mpllb_ssc_stepsize = 49545,
+		.mpllb_div_clk_en = 0,
+		.mpllb_div_multiplier = 0,
+		.mpllb_hdmi_div = 0,
+		.mpllb_tx_clk_div = 0,
+		.tx_vboost_lvl = 5,
+		.mpllb_pmix_en = 1,
+		.mpllb_word_div2_en = 0,
+		.mpllb_ana_v2i = 2,
+		.mpllb_ana_freq_vco = 3,
+		.mpllb_ana_cp_int = 9,
+		.mpllb_ana_cp_prop = 15,
+		.hdmi_pixel_clk_div = 0,
+	},
+	//HBR3
+	{
+		.hdmimode_enable = 0,
+		.ref_range = 1,
+		.ref_clk_mpllb_div = 1,
+		.mpllb_ssc_en = 1,
+		.mpllb_div5_clk_en = 1,
+		.mpllb_multiplier = 304,
+		.mpllb_fracn_en = 1,
+		.mpllb_fracn_quot = 49152,
+		.mpllb_fracn_rem = 0,
+		.mpllb_fracn_den = 1,
+		.mpllb_ssc_up_spread = 0,
+		.mpllb_ssc_peak = 55296,
+		.mpllb_ssc_stepsize = 74318,
+		.mpllb_div_clk_en = 0,
+		.mpllb_div_multiplier = 0,
+		.mpllb_hdmi_div = 0,
+		.mpllb_tx_clk_div = 0,
+		.tx_vboost_lvl = 5,
+		.mpllb_pmix_en = 1,
+		.mpllb_word_div2_en = 0,
+		.mpllb_ana_v2i = 2,
+		.mpllb_ana_freq_vco = 1,
+		.mpllb_ana_cp_int = 7,
+		.mpllb_ana_cp_prop = 16,
+		.hdmi_pixel_clk_div = 0,
+	},
+};
+
+
+static bool update_cfg_data(
+		struct dcn10_link_encoder *enc10,
+		const struct dc_link_settings *link_settings,
+		struct dpcssys_phy_seq_cfg *cfg)
+{
+	int i;
+
+	cfg->load_sram_fw = false;
+	cfg->use_calibration_setting = true;
+
+	//TODO: need to implement a proper lane mapping for Renoir.
+	for (i = 0; i < 4; i++)
+		cfg->lane_en[i] = true;
+
+	switch (link_settings->link_rate) {
+	case LINK_RATE_LOW:
+		cfg->mpll_cfg = dcn21_mpll_cfg_ref[0];
+		break;
+	case LINK_RATE_HIGH:
+		cfg->mpll_cfg = dcn21_mpll_cfg_ref[1];
+		break;
+	case LINK_RATE_HIGH2:
+		cfg->mpll_cfg = dcn21_mpll_cfg_ref[2];
+		break;
+	case LINK_RATE_HIGH3:
+		cfg->mpll_cfg = dcn21_mpll_cfg_ref[3];
+		break;
+	default:
+		DC_LOG_ERROR("%s: No supported link rate found %X!\n",
+				__func__, link_settings->link_rate);
+		return false;
+	}
+
+	return true;
+}
+
+void dcn21_link_encoder_get_max_link_cap(struct link_encoder *enc,
+	struct dc_link_settings *link_settings)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	uint32_t value;
+
+	REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &value);
+
+	if (!value && link_settings->lane_count > LANE_COUNT_TWO)
+		link_settings->lane_count = LANE_COUNT_TWO;
+}
+
+bool dcn21_link_encoder_is_in_alt_mode(struct link_encoder *enc)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	uint32_t value;
+
+	REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &value);
+
+	// if value == 1 alt mode is disabled, otherwise it is enabled
+	return !value;
+}
+
+bool dcn21_link_encoder_acquire_phy(struct link_encoder *enc)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	int value;
+
+	if (enc->features.flags.bits.DP_IS_USB_C) {
+		REG_GET(RDPCSTX_PHY_CNTL6,
+				RDPCS_PHY_DPALT_DISABLE, &value);
+
+		if (value == 1) {
+			ASSERT(0);
+			return false;
+		}
+		REG_UPDATE(RDPCSTX_PHY_CNTL6,
+				RDPCS_PHY_DPALT_DISABLE_ACK, 0);
+
+		udelay(40);
+
+		REG_GET(RDPCSTX_PHY_CNTL6,
+						RDPCS_PHY_DPALT_DISABLE, &value);
+		if (value == 1) {
+			ASSERT(0);
+			REG_UPDATE(RDPCSTX_PHY_CNTL6,
+					RDPCS_PHY_DPALT_DISABLE_ACK, 1);
+			return false;
+		}
+	}
+
+	REG_UPDATE(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_REF_CLK_EN, 1);
+
+	return true;
+}
+
+
+
+static void dcn21_link_encoder_release_phy(struct link_encoder *enc)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+	if (enc->features.flags.bits.DP_IS_USB_C) {
+		REG_UPDATE(RDPCSTX_PHY_CNTL6,
+				RDPCS_PHY_DPALT_DISABLE_ACK, 1);
+	}
+
+	REG_UPDATE(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DP_REF_CLK_EN, 0);
+
+}
+
+void dcn21_link_encoder_enable_dp_output(
+	struct link_encoder *enc,
+	const struct dc_link_settings *link_settings,
+	enum clock_source_id clock_source)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	struct dcn21_link_encoder *enc21 = (struct dcn21_link_encoder *) enc10;
+	struct dpcssys_phy_seq_cfg *cfg = &enc21->phy_seq_cfg;
+
+	if (!dcn21_link_encoder_acquire_phy(enc))
+		return;
+
+	if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+		dcn10_link_encoder_enable_dp_output(enc, link_settings, clock_source);
+		return;
+	}
+
+	if (!update_cfg_data(enc10, link_settings, cfg))
+		return;
+
+	enc1_configure_encoder(enc10, link_settings);
+
+	dcn10_link_encoder_setup(enc, SIGNAL_TYPE_DISPLAY_PORT);
+
+}
+
+void dcn21_link_encoder_enable_dp_mst_output(
+	struct link_encoder *enc,
+	const struct dc_link_settings *link_settings,
+	enum clock_source_id clock_source)
+{
+	if (!dcn21_link_encoder_acquire_phy(enc))
+		return;
+
+	dcn10_link_encoder_enable_dp_mst_output(enc, link_settings, clock_source);
+}
+
+void dcn21_link_encoder_disable_output(
+	struct link_encoder *enc,
+	enum signal_type signal)
+{
+	dcn10_link_encoder_disable_output(enc, signal);
+
+	if (dc_is_dp_signal(signal))
+		dcn21_link_encoder_release_phy(enc);
+}
+
+
+static const struct link_encoder_funcs dcn21_link_enc_funcs = {
+	.read_state = link_enc2_read_state,
+	.validate_output_with_stream =
+		dcn10_link_encoder_validate_output_with_stream,
+	.hw_init = enc2_hw_init,
+	.setup = dcn10_link_encoder_setup,
+	.enable_tmds_output = dcn10_link_encoder_enable_tmds_output,
+	.enable_dp_output = dcn21_link_encoder_enable_dp_output,
+	.enable_dp_mst_output = dcn21_link_encoder_enable_dp_mst_output,
+	.disable_output = dcn21_link_encoder_disable_output,
+	.dp_set_lane_settings = dcn10_link_encoder_dp_set_lane_settings,
+	.dp_set_phy_pattern = dcn10_link_encoder_dp_set_phy_pattern,
+	.update_mst_stream_allocation_table =
+		dcn10_link_encoder_update_mst_stream_allocation_table,
+	.psr_program_dp_dphy_fast_training =
+			dcn10_psr_program_dp_dphy_fast_training,
+	.psr_program_secondary_packet = dcn10_psr_program_secondary_packet,
+	.connect_dig_be_to_fe = dcn10_link_encoder_connect_dig_be_to_fe,
+	.enable_hpd = dcn10_link_encoder_enable_hpd,
+	.disable_hpd = dcn10_link_encoder_disable_hpd,
+	.is_dig_enabled = dcn10_is_dig_enabled,
+	.destroy = dcn10_link_encoder_destroy,
+	.fec_set_enable = enc2_fec_set_enable,
+	.fec_set_ready = enc2_fec_set_ready,
+	.fec_is_active = enc2_fec_is_active,
+	.get_dig_frontend = dcn10_get_dig_frontend,
+	.is_in_alt_mode = dcn21_link_encoder_is_in_alt_mode,
+	.get_max_link_cap = dcn21_link_encoder_get_max_link_cap,
+};
+
+void dcn21_link_encoder_construct(
+	struct dcn21_link_encoder *enc21,
+	const struct encoder_init_data *init_data,
+	const struct encoder_feature_support *enc_features,
+	const struct dcn10_link_enc_registers *link_regs,
+	const struct dcn10_link_enc_aux_registers *aux_regs,
+	const struct dcn10_link_enc_hpd_registers *hpd_regs,
+	const struct dcn10_link_enc_shift *link_shift,
+	const struct dcn10_link_enc_mask *link_mask)
+{
+	struct bp_encoder_cap_info bp_cap_info = {0};
+	const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs;
+	enum bp_result result = BP_RESULT_OK;
+	struct dcn10_link_encoder *enc10 = &enc21->enc10;
+
+	enc10->base.funcs = &dcn21_link_enc_funcs;
+	enc10->base.ctx = init_data->ctx;
+	enc10->base.id = init_data->encoder;
+
+	enc10->base.hpd_source = init_data->hpd_source;
+	enc10->base.connector = init_data->connector;
+
+	enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+	enc10->base.features = *enc_features;
+
+	enc10->base.transmitter = init_data->transmitter;
+
+	/* set the flag to indicate whether driver poll the I2C data pin
+	 * while doing the DP sink detect
+	 */
+
+/*	if (dal_adapter_service_is_feature_supported(as,
+		FEATURE_DP_SINK_DETECT_POLL_DATA_PIN))
+		enc10->base.features.flags.bits.
+			DP_SINK_DETECT_POLL_DATA_PIN = true;*/
+
+	enc10->base.output_signals =
+		SIGNAL_TYPE_DVI_SINGLE_LINK |
+		SIGNAL_TYPE_DVI_DUAL_LINK |
+		SIGNAL_TYPE_LVDS |
+		SIGNAL_TYPE_DISPLAY_PORT |
+		SIGNAL_TYPE_DISPLAY_PORT_MST |
+		SIGNAL_TYPE_EDP |
+		SIGNAL_TYPE_HDMI_TYPE_A;
+
+	/* For DCE 8.0 and 8.1, by design, UNIPHY is hardwired to DIG_BE.
+	 * SW always assign DIG_FE 1:1 mapped to DIG_FE for non-MST UNIPHY.
+	 * SW assign DIG_FE to non-MST UNIPHY first and MST last. So prefer
+	 * DIG is per UNIPHY and used by SST DP, eDP, HDMI, DVI and LVDS.
+	 * Prefer DIG assignment is decided by board design.
+	 * For DCE 8.0, there are only max 6 UNIPHYs, we assume board design
+	 * and VBIOS will filter out 7 UNIPHY for DCE 8.0.
+	 * By this, adding DIGG should not hurt DCE 8.0.
+	 * This will let DCE 8.1 share DCE 8.0 as much as possible
+	 */
+
+	enc10->link_regs = link_regs;
+	enc10->aux_regs = aux_regs;
+	enc10->hpd_regs = hpd_regs;
+	enc10->link_shift = link_shift;
+	enc10->link_mask = link_mask;
+
+	switch (enc10->base.transmitter) {
+	case TRANSMITTER_UNIPHY_A:
+		enc10->base.preferred_engine = ENGINE_ID_DIGA;
+	break;
+	case TRANSMITTER_UNIPHY_B:
+		enc10->base.preferred_engine = ENGINE_ID_DIGB;
+	break;
+	case TRANSMITTER_UNIPHY_C:
+		enc10->base.preferred_engine = ENGINE_ID_DIGC;
+	break;
+	case TRANSMITTER_UNIPHY_D:
+		enc10->base.preferred_engine = ENGINE_ID_DIGD;
+	break;
+	case TRANSMITTER_UNIPHY_E:
+		enc10->base.preferred_engine = ENGINE_ID_DIGE;
+	break;
+	case TRANSMITTER_UNIPHY_F:
+		enc10->base.preferred_engine = ENGINE_ID_DIGF;
+	break;
+	case TRANSMITTER_UNIPHY_G:
+		enc10->base.preferred_engine = ENGINE_ID_DIGG;
+	break;
+	default:
+		ASSERT_CRITICAL(false);
+		enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+	}
+
+	/* default to one to mirror Windows behavior */
+	enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+
+	result = bp_funcs->get_encoder_cap_info(enc10->base.ctx->dc_bios,
+						enc10->base.id, &bp_cap_info);
+
+	/* Override features with DCE-specific values */
+	if (result == BP_RESULT_OK) {
+		enc10->base.features.flags.bits.IS_HBR2_CAPABLE =
+				bp_cap_info.DP_HBR2_EN;
+		enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
+				bp_cap_info.DP_HBR3_EN;
+		enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+		enc10->base.features.flags.bits.DP_IS_USB_C =
+				bp_cap_info.DP_IS_USB_C;
+	} else {
+		DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
+				__func__,
+				result);
+	}
+	if (enc10->base.ctx->dc->debug.hdmi20_disable) {
+		enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.h
new file mode 100644
index 000000000000..033d5d76f195
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_LINK_ENCODER__DCN21_H__
+#define __DC_LINK_ENCODER__DCN21_H__
+
+#include "dcn20/dcn20_link_encoder.h"
+
+struct dcn21_link_encoder {
+	struct dcn10_link_encoder enc10;
+	struct dpcssys_phy_seq_cfg phy_seq_cfg;
+};
+
+#define DPCS_DCN21_MASK_SH_LIST(mask_sh)\
+	DPCS_DCN2_MASK_SH_LIST(mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_TX_VBOOST_LVL, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE2, RDPCS_PHY_DP_MPLLB_CP_PROP_GS, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE0, RDPCS_PHY_RX_VREF_CTRL, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE0, RDPCS_PHY_DP_MPLLB_CP_INT_GS, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG, RDPCS_DMCU_DPALT_DIS_BLOCK_REG, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL15, RDPCS_PHY_SUP_PRE_HP, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL15, RDPCS_PHY_DP_TX0_VREGDRV_BYP, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL15, RDPCS_PHY_DP_TX1_VREGDRV_BYP, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL15, RDPCS_PHY_DP_TX2_VREGDRV_BYP, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL15, RDPCS_PHY_DP_TX3_VREGDRV_BYP, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE0, RDPCS_PHY_DP_TX0_EQ_MAIN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE0, RDPCS_PHY_DP_TX0_EQ_PRE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE0, RDPCS_PHY_DP_TX0_EQ_POST, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE1, RDPCS_PHY_DP_TX1_EQ_MAIN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE1, RDPCS_PHY_DP_TX1_EQ_PRE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE1, RDPCS_PHY_DP_TX1_EQ_POST, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE2, RDPCS_PHY_DP_TX2_EQ_MAIN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE2, RDPCS_PHY_DP_TX2_EQ_PRE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE2, RDPCS_PHY_DP_TX2_EQ_POST, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DP_TX3_EQ_MAIN, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DCO_FINETUNE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DCO_RANGE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DP_TX3_EQ_PRE, mask_sh),\
+	LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DP_TX3_EQ_POST, mask_sh),\
+	LE_SF(DCIO_SOFT_RESET, UNIPHYA_SOFT_RESET, mask_sh),\
+	LE_SF(DCIO_SOFT_RESET, UNIPHYB_SOFT_RESET, mask_sh),\
+	LE_SF(DCIO_SOFT_RESET, UNIPHYC_SOFT_RESET, mask_sh),\
+	LE_SF(DCIO_SOFT_RESET, UNIPHYD_SOFT_RESET, mask_sh),\
+	LE_SF(DCIO_SOFT_RESET, UNIPHYE_SOFT_RESET, mask_sh)
+
+#define DPCS_DCN21_REG_LIST(id) \
+	DPCS_DCN2_REG_LIST(id),\
+	SRI(RDPCSTX_PHY_CNTL15, RDPCSTX, id),\
+	SRI(RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG, RDPCSTX, id)
+
+#define LINK_ENCODER_MASK_SH_LIST_DCN21(mask_sh)\
+	LINK_ENCODER_MASK_SH_LIST_DCN20(mask_sh),\
+	LE_SF(UNIPHYA_CHANNEL_XBAR_CNTL, UNIPHY_CHANNEL0_XBAR_SOURCE, mask_sh),\
+	LE_SF(UNIPHYA_CHANNEL_XBAR_CNTL, UNIPHY_CHANNEL1_XBAR_SOURCE, mask_sh),\
+	LE_SF(UNIPHYA_CHANNEL_XBAR_CNTL, UNIPHY_CHANNEL2_XBAR_SOURCE, mask_sh),\
+	LE_SF(UNIPHYA_CHANNEL_XBAR_CNTL, UNIPHY_CHANNEL3_XBAR_SOURCE, mask_sh), \
+	SRI(RDPCSTX_PHY_FUSE2, RDPCSTX, id), \
+	SRI(RDPCSTX_PHY_FUSE3, RDPCSTX, id), \
+	SR(RDPCSTX0_RDPCSTX_SCRATCH)
+
+void dcn21_link_encoder_enable_dp_output(
+	struct link_encoder *enc,
+	const struct dc_link_settings *link_settings,
+	enum clock_source_id clock_source);
+
+void dcn21_link_encoder_construct(
+	struct dcn21_link_encoder *enc21,
+	const struct encoder_init_data *init_data,
+	const struct encoder_feature_support *enc_features,
+	const struct dcn10_link_enc_registers *link_regs,
+	const struct dcn10_link_enc_aux_registers *aux_regs,
+	const struct dcn10_link_enc_hpd_registers *hpd_regs,
+	const struct dcn10_link_enc_shift *link_shift,
+	const struct dcn10_link_enc_mask *link_mask);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index de182185fe1f..1d741bca2211 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -1,5 +1,6 @@
 /*
 * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2019 Raptor Engineering, LLC
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -28,6 +29,8 @@
 #include "dm_services.h"
 #include "dc.h"
 
+#include "dcn21_init.h"
+
 #include "resource.h"
 #include "include/irq_service_interface.h"
 #include "dcn20/dcn20_resource.h"
@@ -42,11 +45,11 @@
 #include "irq/dcn21/irq_service_dcn21.h"
 #include "dcn20/dcn20_dpp.h"
 #include "dcn20/dcn20_optc.h"
-#include "dcn20/dcn20_hwseq.h"
+#include "dcn21/dcn21_hwseq.h"
 #include "dce110/dce110_hw_sequencer.h"
 #include "dcn20/dcn20_opp.h"
 #include "dcn20/dcn20_dsc.h"
-#include "dcn20/dcn20_link_encoder.h"
+#include "dcn21/dcn21_link_encoder.h"
 #include "dcn20/dcn20_stream_encoder.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
@@ -60,6 +63,8 @@
 
 #include "dcn20/dcn20_dwb.h"
 #include "dcn20/dcn20_mmhubbub.h"
+#include "dpcs/dpcs_2_1_0_offset.h"
+#include "dpcs/dpcs_2_1_0_sh_mask.h"
 
 #include "renoir_ip_offset.h"
 #include "dcn/dcn_2_1_0_offset.h"
@@ -78,22 +83,20 @@
 #include "dcn21_resource.h"
 #include "vm_helper.h"
 #include "dcn20/dcn20_vmid.h"
+#include "../dce/dmub_psr.h"
 
 #define SOC_BOUNDING_BOX_VALID false
 #define DC_LOGGER_INIT(logger)
 
 
 struct _vcs_dpi_ip_params_st dcn2_1_ip = {
-	.gpuvm_enable = 0,
-	.hostvm_enable = 0,
+	.odm_capable = 1,
+	.gpuvm_enable = 1,
+	.hostvm_enable = 1,
 	.gpuvm_max_page_table_levels = 1,
 	.hostvm_max_page_table_levels = 4,
 	.hostvm_cached_page_table_levels = 2,
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	.num_dsc = 3,
-#else
-	.num_dsc = 0,
-#endif
 	.rob_buffer_size_kbytes = 168,
 	.det_buffer_size_kbytes = 164,
 	.dpte_buffer_size_in_pte_reqs_luma = 44,
@@ -205,11 +208,11 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
 				.state = 4,
 				.dcfclk_mhz = 810.0,
 				.fabricclk_mhz = 1600.0,
-				.dispclk_mhz = 1015.0,
-				.dppclk_mhz = 1015.0,
-				.phyclk_mhz = 810.0,
+				.dispclk_mhz = 1395.0,
+				.dppclk_mhz = 1285.0,
+				.phyclk_mhz = 1325.0,
 				.socclk_mhz = 953.0,
-				.dscclk_mhz = 318.334,
+				.dscclk_mhz = 489.0,
 				.dram_speed_mts = 4266.0,
 			},
 			/*Extra state, no dispclk ramping*/
@@ -217,18 +220,18 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
 				.state = 5,
 				.dcfclk_mhz = 810.0,
 				.fabricclk_mhz = 1600.0,
-				.dispclk_mhz = 1015.0,
-				.dppclk_mhz = 1015.0,
-				.phyclk_mhz = 810.0,
+				.dispclk_mhz = 1395.0,
+				.dppclk_mhz = 1285.0,
+				.phyclk_mhz = 1325.0,
 				.socclk_mhz = 953.0,
-				.dscclk_mhz = 318.334,
+				.dscclk_mhz = 489.0,
 				.dram_speed_mts = 4266.0,
 			},
 
 		},
 
-	.sr_exit_time_us = 9.0,
-	.sr_enter_plus_exit_time_us = 11.0,
+	.sr_exit_time_us = 12.5,
+	.sr_enter_plus_exit_time_us = 17.0,
 	.urgent_latency_us = 4.0,
 	.urgent_latency_pixel_data_only_us = 4.0,
 	.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
@@ -258,7 +261,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
 	.vmm_page_size_bytes = 4096,
 	.dram_clock_change_latency_us = 23.84,
 	.return_bus_width_bytes = 64,
-	.dispclk_dppclk_vco_speed_mhz = 3550,
+	.dispclk_dppclk_vco_speed_mhz = 3600,
 	.xfc_bus_transport_time_us = 4,
 	.xfc_xbuf_latency_tolerance_us = 4,
 	.use_urgent_burst_bw = 1,
@@ -350,19 +353,29 @@ static const struct bios_registers bios_regs = {
 		NBIO_SR(BIOS_SCRATCH_6)
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DMUB
-static const struct dcn21_dmcub_registers dmcub_regs = {
-		DMCUB_REG_LIST_DCN()
+static const struct dce_dmcu_registers dmcu_regs = {
+		DMCU_DCN20_REG_LIST()
 };
 
-static const struct dcn21_dmcub_shift dmcub_shift = {
-		DMCUB_COMMON_MASK_SH_LIST_BASE(__SHIFT)
+static const struct dce_dmcu_shift dmcu_shift = {
+		DMCU_MASK_SH_LIST_DCN10(__SHIFT)
 };
 
-static const struct dcn21_dmcub_mask dmcub_mask = {
-		DMCUB_COMMON_MASK_SH_LIST_BASE(_MASK)
+static const struct dce_dmcu_mask dmcu_mask = {
+		DMCU_MASK_SH_LIST_DCN10(_MASK)
+};
+
+static const struct dce_abm_registers abm_regs = {
+		ABM_DCN20_REG_LIST()
+};
+
+static const struct dce_abm_shift abm_shift = {
+		ABM_MASK_SH_LIST_DCN20(__SHIFT)
+};
+
+static const struct dce_abm_mask abm_mask = {
+		ABM_MASK_SH_LIST_DCN20(_MASK)
 };
-#endif
 
 #define audio_regs(id)\
 [id] = {\
@@ -453,15 +466,18 @@ static const struct dcn20_mpc_registers mpc_regs = {
 		MPC_OUT_MUX_REG_LIST_DCN2_0(0),
 		MPC_OUT_MUX_REG_LIST_DCN2_0(1),
 		MPC_OUT_MUX_REG_LIST_DCN2_0(2),
-		MPC_OUT_MUX_REG_LIST_DCN2_0(3)
+		MPC_OUT_MUX_REG_LIST_DCN2_0(3),
+		MPC_DBG_REG_LIST_DCN2_0()
 };
 
 static const struct dcn20_mpc_shift mpc_shift = {
-	MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
+	MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT),
+	MPC_DEBUG_REG_LIST_SH_DCN20
 };
 
 static const struct dcn20_mpc_mask mpc_mask = {
-	MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
+	MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK),
+	MPC_DEBUG_REG_LIST_MASK_DCN20
 };
 
 #define hubp_regs(id)\
@@ -529,7 +545,6 @@ static const struct dcn20_vmid_mask vmid_masks = {
 		DCN20_VMID_MASK_SH_LIST(_MASK)
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #define dsc_regsDCN20(id)\
 [id] = {\
 	DSC_REG_LIST_DCN20(id)\
@@ -551,7 +566,6 @@ static const struct dcn20_dsc_shift dsc_shift = {
 static const struct dcn20_dsc_mask dsc_mask = {
 	DSC_REG_LIST_SH_MASK_DCN20(_MASK)
 };
-#endif
 
 #define ipp_regs(id)\
 [id] = {\
@@ -598,6 +612,7 @@ static const struct dce110_aux_registers aux_engine_regs[] = {
 #define tf_regs(id)\
 [id] = {\
 	TF_REG_LIST_DCN20(id),\
+	TF_REG_LIST_DCN20_COMMON_APPEND(id),\
 }
 
 static const struct dcn2_dpp_registers tf_regs[] = {
@@ -608,11 +623,13 @@ static const struct dcn2_dpp_registers tf_regs[] = {
 };
 
 static const struct dcn2_dpp_shift tf_shift = {
-		TF_REG_LIST_SH_MASK_DCN20(__SHIFT)
+		TF_REG_LIST_SH_MASK_DCN20(__SHIFT),
+		TF_DEBUG_REG_LIST_SH_DCN20
 };
 
 static const struct dcn2_dpp_mask tf_mask = {
-		TF_REG_LIST_SH_MASK_DCN20(_MASK)
+		TF_REG_LIST_SH_MASK_DCN20(_MASK),
+		TF_DEBUG_REG_LIST_MASK_DCN20
 };
 
 #define stream_enc_regs(id)\
@@ -628,6 +645,14 @@ static const struct dcn10_stream_enc_registers stream_enc_regs[] = {
 	stream_enc_regs(4),
 };
 
+static const struct dce110_aux_registers_shift aux_shift = {
+	DCN_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+	DCN_AUX_MASK_SH_LIST(_MASK)
+};
+
 static const struct dcn10_stream_encoder_shift se_shift = {
 		SE_COMMON_MASK_SH_LIST_DCN20(__SHIFT)
 };
@@ -636,6 +661,11 @@ static const struct dcn10_stream_encoder_mask se_mask = {
 		SE_COMMON_MASK_SH_LIST_DCN20(_MASK)
 };
 
+static void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu);
+
+static int dcn21_populate_dml_pipes_from_context(
+		struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes);
+
 static struct input_pixel_processor *dcn21_ipp_create(
 	struct dc_context *ctx, uint32_t inst)
 {
@@ -683,7 +713,10 @@ static struct dce_aux *dcn21_aux_engine_create(
 
 	dce110_aux_engine_construct(aux_engine, ctx, inst,
 				    SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
-				    &aux_engine_regs[inst]);
+				    &aux_engine_regs[inst],
+					&aux_mask,
+					&aux_shift,
+					ctx->dc->caps.extended_aux_timeout_support);
 
 	return &aux_engine->base;
 }
@@ -726,14 +759,13 @@ static const struct resource_caps res_cap_rn = {
 		.num_timing_generator = 4,
 		.num_opp = 4,
 		.num_video_plane = 4,
-		.num_audio = 6, // 6 audio endpoints.  4 audio streams
+		.num_audio = 4, // 4 audio endpoints.  4 audio streams
 		.num_stream_encoder = 5,
 		.num_pll = 5,  // maybe 3 because the last two used for USB-c
 		.num_dwb = 1,
 		.num_ddc = 5,
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+		.num_vmid = 1,
 		.num_dsc = 3,
-#endif
 };
 
 #ifdef DIAGS_BUILD
@@ -758,9 +790,7 @@ static const struct resource_caps res_cap_rn_FPGA_2pipe_dsc = {
 		.num_pll = 4,
 		.num_dwb = 1,
 		.num_ddc = 4,
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 		.num_dsc = 2,
-#endif
 };
 #endif
 
@@ -796,15 +826,17 @@ static const struct dc_debug_options debug_defaults_drv = {
 		.clock_trace = true,
 		.disable_pplib_clock_request = true,
 		.pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
-		.force_single_disp_pipe_split = true,
+		.force_single_disp_pipe_split = false,
 		.disable_dcc = DCC_ENABLE,
 		.vsr_support = true,
 		.performance_trace = false,
-		.max_downscale_src_width = 5120,/*upto 5K*/
+		.max_downscale_src_width = 3840,
 		.disable_pplib_wm_range = false,
 		.scl_reset_length10 = true,
 		.sanity_checks = true,
-		.disable_48mhz_pwrdwn = true,
+		.disable_48mhz_pwrdwn = false,
+		.nv12_iflip_vm_wa = true,
+		.usbc_combo_phy_reset_wa = true
 };
 
 static const struct dc_debug_options debug_defaults_diags = {
@@ -827,7 +859,7 @@ enum dcn20_clk_src_array_id {
 	DCN20_CLK_SRC_TOTAL_DCN21
 };
 
-static void destruct(struct dcn21_resource_pool *pool)
+static void dcn21_resource_destruct(struct dcn21_resource_pool *pool)
 {
 	unsigned int i;
 
@@ -838,12 +870,10 @@ static void destruct(struct dcn21_resource_pool *pool)
 		}
 	}
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
 		if (pool->base.dscs[i] != NULL)
 			dcn20_dsc_destroy(&pool->base.dscs[i]);
 	}
-#endif
 
 	if (pool->base.mpc != NULL) {
 		kfree(TO_DCN20_MPC(pool->base.mpc));
@@ -930,16 +960,11 @@ static void destruct(struct dcn21_resource_pool *pool)
 	if (pool->base.dmcu != NULL)
 		dce_dmcu_destroy(&pool->base.dmcu);
 
-#ifdef CONFIG_DRM_AMD_DC_DMUB
-	if (pool->base.dmcub != NULL)
-		dcn21_dmcub_destroy(&pool->base.dmcub);
-#endif
-
 	if (pool->base.dccg != NULL)
 		dcn_dccg_destroy(&pool->base.dccg);
 
 	if (pool->base.pp_smu != NULL)
-		dcn20_pp_smu_destroy(&pool->base.pp_smu);
+		dcn21_pp_smu_destroy(&pool->base.pp_smu);
 }
 
 
@@ -960,20 +985,55 @@ static void calculate_wm_set_for_vlevel(
 	pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz;
 
 	dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us;
+	dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us;
+	dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us;
 
 	wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000;
 	wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000;
 	wm_set->cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(dml, pipes, pipe_cnt) * 1000;
 	wm_set->cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
 	wm_set->pte_meta_urgent_ns = get_wm_memory_trip(dml, pipes, pipe_cnt) * 1000;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000;
 	wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000;
-#endif
+	wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000;
 	dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached;
 
 }
 
+static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb)
+{
+	int i;
+
+	DC_FP_START();
+
+	if (dc->bb_overrides.sr_exit_time_ns) {
+		for (i = 0; i < WM_SET_COUNT; i++) {
+			  dc->clk_mgr->bw_params->wm_table.entries[i].sr_exit_time_us =
+					  dc->bb_overrides.sr_exit_time_ns / 1000.0;
+		}
+	}
+
+	if (dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+		for (i = 0; i < WM_SET_COUNT; i++) {
+			  dc->clk_mgr->bw_params->wm_table.entries[i].sr_enter_plus_exit_time_us =
+					  dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+		}
+	}
+
+	if (dc->bb_overrides.urgent_latency_ns) {
+		bb->urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+	}
+
+	if (dc->bb_overrides.dram_clock_change_latency_ns) {
+		for (i = 0; i < WM_SET_COUNT; i++) {
+			dc->clk_mgr->bw_params->wm_table.entries[i].pstate_latency_us =
+				dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+		}
+	}
+
+	DC_FP_END();
+}
+
 void dcn21_calculate_wm(
 		struct dc *dc, struct dc_state *context,
 		display_e2e_pipe_params_st *pipes,
@@ -988,6 +1048,8 @@ void dcn21_calculate_wm(
 
 	ASSERT(bw_params);
 
+	patch_bounding_box(dc, &context->bw_ctx.dml.soc);
+
 	for (i = 0, pipe_idx = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
 			if (!context->res_ctx.pipe_ctx[i].stream)
 				continue;
@@ -1019,10 +1081,10 @@ void dcn21_calculate_wm(
 	if (pipe_cnt != pipe_idx) {
 		if (dc->res_pool->funcs->populate_dml_pipes)
 			pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc,
-				&context->res_ctx, pipes);
+				context, pipes);
 		else
-			pipe_cnt = dcn20_populate_dml_pipes_from_context(dc,
-				&context->res_ctx, pipes);
+			pipe_cnt = dcn21_populate_dml_pipes_from_context(dc,
+				context, pipes);
 	}
 
 	*out_pipe_cnt = pipe_cnt;
@@ -1112,7 +1174,7 @@ static void dcn21_destroy_resource_pool(struct resource_pool **pool)
 {
 	struct dcn21_resource_pool *dcn21_pool = TO_DCN21_RES_POOL(*pool);
 
-	destruct(dcn21_pool);
+	dcn21_resource_destruct(dcn21_pool);
 	kfree(dcn21_pool);
 	*pool = NULL;
 }
@@ -1251,7 +1313,6 @@ static void read_dce_straps(
 
 }
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 
 struct display_stream_compressor *dcn21_dsc_create(
 	struct dc_context *ctx, uint32_t inst)
@@ -1267,7 +1328,6 @@ struct display_stream_compressor *dcn21_dsc_create(
 	dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask);
 	return &dsc->base;
 }
-#endif
 
 static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
 {
@@ -1278,7 +1338,6 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
 	dcn2_1_ip.max_num_otg = pool->base.res_cap->num_timing_generator;
 	dcn2_1_ip.max_num_dpp = pool->base.pipe_count;
 	dcn2_1_soc.num_chans = bw_params->num_channels;
-	dcn2_1_soc.num_states = 0;
 
 	for (i = 0; i < clk_table->num_entries; i++) {
 
@@ -1286,10 +1345,14 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
 		dcn2_1_soc.clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
 		dcn2_1_soc.clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
 		dcn2_1_soc.clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
-		/* This is probably wrong, TODO: find correct calculation */
-		dcn2_1_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 16 / 1000;
-		dcn2_1_soc.num_states++;
+		dcn2_1_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
 	}
+	dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i - 1];
+	dcn2_1_soc.num_states = i;
+
+	// diags does not retrieve proper values from SMU, do not update DML instance for diags
+	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) && !IS_DIAG_DC(dc->ctx->dce_environment))
+		dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21);
 }
 
 /* Temporary Place holder until we can get them from fuse */
@@ -1317,32 +1380,42 @@ static struct dpm_clocks dummy_clocks = {
 
 };
 
-enum pp_smu_status dummy_set_wm_ranges(struct pp_smu *pp,
+static enum pp_smu_status dummy_set_wm_ranges(struct pp_smu *pp,
 		struct pp_smu_wm_range_sets *ranges)
 {
 	return PP_SMU_RESULT_OK;
 }
 
-enum pp_smu_status dummy_get_dpm_clock_table(struct pp_smu *pp,
+static enum pp_smu_status dummy_get_dpm_clock_table(struct pp_smu *pp,
 		struct dpm_clocks *clock_table)
 {
 	*clock_table = dummy_clocks;
 	return PP_SMU_RESULT_OK;
 }
 
-struct pp_smu_funcs *dcn21_pp_smu_create(struct dc_context *ctx)
+static struct pp_smu_funcs *dcn21_pp_smu_create(struct dc_context *ctx)
 {
 	struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL);
 
-	pp_smu->ctx.ver = PP_SMU_VER_RN;
+	if (!pp_smu)
+		return pp_smu;
 
-	pp_smu->rn_funcs.get_dpm_clock_table = dummy_get_dpm_clock_table;
-	pp_smu->rn_funcs.set_wm_ranges = dummy_set_wm_ranges;
+	if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment) || IS_DIAG_DC(ctx->dce_environment)) {
+		pp_smu->ctx.ver = PP_SMU_VER_RN;
+		pp_smu->rn_funcs.get_dpm_clock_table = dummy_get_dpm_clock_table;
+		pp_smu->rn_funcs.set_wm_ranges = dummy_set_wm_ranges;
+	} else {
+
+		dm_pp_get_funcs(ctx, pp_smu);
+
+		if (pp_smu->ctx.ver != PP_SMU_VER_RN)
+			pp_smu = memset(pp_smu, 0, sizeof(struct pp_smu_funcs));
+	}
 
 	return pp_smu;
 }
 
-void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu)
+static void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu)
 {
 	if (pp_smu && *pp_smu) {
 		kfree(*pp_smu);
@@ -1400,6 +1473,7 @@ static struct dce_hwseq *dcn21_hwseq_create(
 		hws->regs = &hwseq_reg;
 		hws->shifts = &hwseq_shift;
 		hws->masks = &hwseq_mask;
+		hws->wa.DEGVIDCN21 = true;
 	}
 	return hws;
 }
@@ -1418,10 +1492,156 @@ static const struct resource_create_funcs res_create_maximus_funcs = {
 	.create_hwseq = dcn21_hwseq_create,
 };
 
+static const struct encoder_feature_support link_enc_feature = {
+		.max_hdmi_deep_color = COLOR_DEPTH_121212,
+		.max_hdmi_pixel_clock = 600000,
+		.hdmi_ycbcr420_supported = true,
+		.dp_ycbcr420_supported = true,
+		.flags.bits.IS_HBR2_CAPABLE = true,
+		.flags.bits.IS_HBR3_CAPABLE = true,
+		.flags.bits.IS_TPS3_CAPABLE = true,
+		.flags.bits.IS_TPS4_CAPABLE = true
+};
+
+
+#define link_regs(id, phyid)\
+[id] = {\
+	LE_DCN2_REG_LIST(id), \
+	UNIPHY_DCN2_REG_LIST(phyid), \
+	DPCS_DCN21_REG_LIST(id), \
+	SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \
+}
+
+static const struct dcn10_link_enc_registers link_enc_regs[] = {
+	link_regs(0, A),
+	link_regs(1, B),
+	link_regs(2, C),
+	link_regs(3, D),
+	link_regs(4, E),
+};
+
+#define aux_regs(id)\
+[id] = {\
+	DCN2_AUX_REG_LIST(id)\
+}
+
+static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = {
+		aux_regs(0),
+		aux_regs(1),
+		aux_regs(2),
+		aux_regs(3),
+		aux_regs(4)
+};
+
+#define hpd_regs(id)\
+[id] = {\
+	HPD_REG_LIST(id)\
+}
+
+static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = {
+		hpd_regs(0),
+		hpd_regs(1),
+		hpd_regs(2),
+		hpd_regs(3),
+		hpd_regs(4)
+};
+
+static const struct dcn10_link_enc_shift le_shift = {
+	LINK_ENCODER_MASK_SH_LIST_DCN20(__SHIFT),\
+	DPCS_DCN21_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn10_link_enc_mask le_mask = {
+	LINK_ENCODER_MASK_SH_LIST_DCN20(_MASK),\
+	DPCS_DCN21_MASK_SH_LIST(_MASK)
+};
+
+static int map_transmitter_id_to_phy_instance(
+	enum transmitter transmitter)
+{
+	switch (transmitter) {
+	case TRANSMITTER_UNIPHY_A:
+		return 0;
+	break;
+	case TRANSMITTER_UNIPHY_B:
+		return 1;
+	break;
+	case TRANSMITTER_UNIPHY_C:
+		return 2;
+	break;
+	case TRANSMITTER_UNIPHY_D:
+		return 3;
+	break;
+	case TRANSMITTER_UNIPHY_E:
+		return 4;
+	break;
+	default:
+		ASSERT(0);
+		return 0;
+	}
+}
+
+static struct link_encoder *dcn21_link_encoder_create(
+	const struct encoder_init_data *enc_init_data)
+{
+	struct dcn21_link_encoder *enc21 =
+		kzalloc(sizeof(struct dcn21_link_encoder), GFP_KERNEL);
+	int link_regs_id;
+
+	if (!enc21)
+		return NULL;
+
+	link_regs_id =
+		map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
+
+	dcn21_link_encoder_construct(enc21,
+				      enc_init_data,
+				      &link_enc_feature,
+				      &link_enc_regs[link_regs_id],
+				      &link_enc_aux_regs[enc_init_data->channel - 1],
+				      &link_enc_hpd_regs[enc_init_data->hpd_source],
+				      &le_shift,
+				      &le_mask);
+
+	return &enc21->enc10.base;
+}
+#define CTX ctx
+
+#define REG(reg_name) \
+	(DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name)
+
+static uint32_t read_pipe_fuses(struct dc_context *ctx)
+{
+	uint32_t value = REG_READ(CC_DC_PIPE_DIS);
+	/* RV1 support max 4 pipes */
+	value = value & 0xf;
+	return value;
+}
+
+static int dcn21_populate_dml_pipes_from_context(
+		struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes)
+{
+	uint32_t pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes);
+	int i;
+	struct resource_context *res_ctx = &context->res_ctx;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+
+		if (!res_ctx->pipe_ctx[i].stream)
+			continue;
+
+		pipes[i].pipe.src.hostvm = 1;
+		pipes[i].pipe.src.gpuvm = 1;
+	}
+
+	return pipe_cnt;
+}
+
 static struct resource_funcs dcn21_res_pool_funcs = {
 	.destroy = dcn21_destroy_resource_pool,
-	.link_enc_create = dcn20_link_encoder_create,
+	.link_enc_create = dcn21_link_encoder_create,
 	.validate_bandwidth = dcn21_validate_bandwidth,
+	.populate_dml_pipes = dcn21_populate_dml_pipes_from_context,
 	.add_stream_to_ctx = dcn20_add_stream_to_ctx,
 	.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
 	.acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
@@ -1432,14 +1652,16 @@ static struct resource_funcs dcn21_res_pool_funcs = {
 	.update_bw_bounding_box = update_bw_bounding_box
 };
 
-static bool construct(
+static bool dcn21_resource_construct(
 	uint8_t num_virtual_links,
 	struct dc *dc,
 	struct dcn21_resource_pool *pool)
 {
-	int i;
+	int i, j;
 	struct dc_context *ctx = dc->ctx;
 	struct irq_service_init_data init_data;
+	uint32_t pipe_fuses = read_pipe_fuses(ctx);
+	uint32_t num_pipes;
 
 	ctx->dc_bios->regs = &bios_regs;
 
@@ -1457,7 +1679,9 @@ static bool construct(
 	 *************************************************/
 	pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE;
 
-	pool->base.pipe_count = 4;
+	/* max pipe num for ASIC before check pipe fuses */
+	pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
+
 	dc->caps.max_downscale_ratio = 200;
 	dc->caps.i2c_speed_in_khz = 100;
 	dc->caps.max_cursor_size = 256;
@@ -1467,6 +1691,8 @@ static bool construct(
 	dc->caps.max_slave_planes = 1;
 	dc->caps.post_blend_color_processing = true;
 	dc->caps.force_dp_tps4_for_cp2520 = true;
+	dc->caps.extended_aux_timeout_support = true;
+	dc->caps.dmcub_support = true;
 
 	if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
 		dc->debug = debug_defaults_drv;
@@ -1516,20 +1742,40 @@ static bool construct(
 		goto create_fail;
 	}
 
-#ifdef CONFIG_DRM_AMD_DC_DMUB
-	pool->base.dmcub = dcn21_dmcub_create(ctx,
-			&dmcub_regs,
-			&dmcub_shift,
-			&dmcub_mask);
-	if (pool->base.dmcub == NULL) {
-		dm_error("DC: failed to create dmcub!\n");
+	pool->base.dmcu = dcn21_dmcu_create(ctx,
+			&dmcu_regs,
+			&dmcu_shift,
+			&dmcu_mask);
+	if (pool->base.dmcu == NULL) {
+		dm_error("DC: failed to create dmcu!\n");
+		BREAK_TO_DEBUGGER();
+		goto create_fail;
+	}
+
+	// Leave as NULL to not affect current dmcu psr programming sequence
+	// Will be uncommented when functionality is confirmed to be working
+	pool->base.psr = NULL;
+
+	pool->base.abm = dce_abm_create(ctx,
+			&abm_regs,
+			&abm_shift,
+			&abm_mask);
+	if (pool->base.abm == NULL) {
+		dm_error("DC: failed to create abm!\n");
 		BREAK_TO_DEBUGGER();
 		goto create_fail;
 	}
-#endif
 
 	pool->base.pp_smu = dcn21_pp_smu_create(ctx);
 
+	num_pipes = dcn2_1_ip.max_num_dpp;
+
+	for (i = 0; i < dcn2_1_ip.max_num_dpp; i++)
+		if (pipe_fuses & 1 << i)
+			num_pipes--;
+	dcn2_1_ip.max_num_dpp = num_pipes;
+	dcn2_1_ip.max_num_otg = num_pipes;
+
 	dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21);
 
 	init_data.ctx = dc->ctx;
@@ -1537,31 +1783,55 @@ static bool construct(
 	if (!pool->base.irqs)
 		goto create_fail;
 
+	j = 0;
 	/* mem input -> ipp -> dpp -> opp -> TG */
 	for (i = 0; i < pool->base.pipe_count; i++) {
-		pool->base.hubps[i] = dcn21_hubp_create(ctx, i);
-		if (pool->base.hubps[i] == NULL) {
+		/* if pipe is disabled, skip instance of HW pipe,
+		 * i.e, skip ASIC register instance
+		 */
+		if ((pipe_fuses & (1 << i)) != 0)
+			continue;
+
+		pool->base.hubps[j] = dcn21_hubp_create(ctx, i);
+		if (pool->base.hubps[j] == NULL) {
 			BREAK_TO_DEBUGGER();
 			dm_error(
 				"DC: failed to create memory input!\n");
 			goto create_fail;
 		}
 
-		pool->base.ipps[i] = dcn21_ipp_create(ctx, i);
-		if (pool->base.ipps[i] == NULL) {
+		pool->base.ipps[j] = dcn21_ipp_create(ctx, i);
+		if (pool->base.ipps[j] == NULL) {
 			BREAK_TO_DEBUGGER();
 			dm_error(
 				"DC: failed to create input pixel processor!\n");
 			goto create_fail;
 		}
 
-		pool->base.dpps[i] = dcn21_dpp_create(ctx, i);
-		if (pool->base.dpps[i] == NULL) {
+		pool->base.dpps[j] = dcn21_dpp_create(ctx, i);
+		if (pool->base.dpps[j] == NULL) {
 			BREAK_TO_DEBUGGER();
 			dm_error(
 				"DC: failed to create dpps!\n");
 			goto create_fail;
 		}
+
+		pool->base.opps[j] = dcn21_opp_create(ctx, i);
+		if (pool->base.opps[j] == NULL) {
+			BREAK_TO_DEBUGGER();
+			dm_error(
+				"DC: failed to create output pixel processor!\n");
+			goto create_fail;
+		}
+
+		pool->base.timing_generators[j] = dcn21_timing_generator_create(
+				ctx, i);
+		if (pool->base.timing_generators[j] == NULL) {
+			BREAK_TO_DEBUGGER();
+			dm_error("DC: failed to create tg!\n");
+			goto create_fail;
+		}
+		j++;
 	}
 
 	for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
@@ -1582,27 +1852,9 @@ static bool construct(
 		pool->base.sw_i2cs[i] = NULL;
 	}
 
-	for (i = 0; i < pool->base.res_cap->num_opp; i++) {
-		pool->base.opps[i] = dcn21_opp_create(ctx, i);
-		if (pool->base.opps[i] == NULL) {
-			BREAK_TO_DEBUGGER();
-			dm_error(
-				"DC: failed to create output pixel processor!\n");
-			goto create_fail;
-		}
-	}
-
-	for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
-		pool->base.timing_generators[i] = dcn21_timing_generator_create(
-				ctx, i);
-		if (pool->base.timing_generators[i] == NULL) {
-			BREAK_TO_DEBUGGER();
-			dm_error("DC: failed to create tg!\n");
-			goto create_fail;
-		}
-	}
-
-	pool->base.timing_generator_count = i;
+	pool->base.timing_generator_count = j;
+	pool->base.pipe_count = j;
+	pool->base.mpcc_count = j;
 
 	pool->base.mpc = dcn21_mpc_create(ctx);
 	if (pool->base.mpc == NULL) {
@@ -1618,7 +1870,6 @@ static bool construct(
 		goto create_fail;
 	}
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
 		pool->base.dscs[i] = dcn21_dsc_create(ctx, i);
 		if (pool->base.dscs[i] == NULL) {
@@ -1627,7 +1878,6 @@ static bool construct(
 			goto create_fail;
 		}
 	}
-#endif
 
 	if (!dcn20_dwbc_create(ctx, &pool->base)) {
 		BREAK_TO_DEBUGGER();
@@ -1645,7 +1895,7 @@ static bool construct(
 			&res_create_funcs : &res_create_maximus_funcs)))
 			goto create_fail;
 
-	dcn20_hw_sequencer_construct(dc);
+	dcn21_hw_sequencer_construct(dc);
 
 	dc->caps.max_planes =  pool->base.pipe_count;
 
@@ -1658,7 +1908,7 @@ static bool construct(
 
 create_fail:
 
-	destruct(pool);
+	dcn21_resource_destruct(pool);
 
 	return false;
 }
@@ -1673,7 +1923,7 @@ struct resource_pool *dcn21_create_resource_pool(
 	if (!pool)
 		return NULL;
 
-	if (construct(init_data->num_virtual_links, dc, pool))
+	if (dcn21_resource_construct(init_data->num_virtual_links, dc, pool))
 		return &pool->base;
 
 	BREAK_TO_DEBUGGER();
diff --git a/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h b/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h
new file mode 100644
index 000000000000..626d22d437f4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DM_CP_PSP_IF__H
+#define DM_CP_PSP_IF__H
+
+struct dc_link;
+
+struct cp_psp_stream_config {
+	uint8_t otg_inst;
+	uint8_t link_enc_inst;
+	uint8_t stream_enc_inst;
+	void *dm_stream_ctx;
+	bool dpms_off;
+};
+
+struct cp_psp_funcs {
+	void (*update_stream_config)(void *handle, struct cp_psp_stream_config *config);
+};
+
+struct cp_psp {
+	void *handle;
+	struct cp_psp_funcs funcs;
+};
+
+
+#endif /* DM_CP_PSP_IF__H */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
index b6b4333737f2..8bde1d688f2e 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
@@ -74,7 +74,7 @@ void dm_helpers_dp_mst_clear_payload_allocation_table(
 /*
  * Polls for ACT (allocation change trigger) handled and
  */
-bool dm_helpers_dp_mst_poll_for_allocation_change_trigger(
+enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 		struct dc_context *ctx,
 		const struct dc_stream_state *stream);
 /*
@@ -118,13 +118,11 @@ bool dm_helpers_submit_i2c(
 		const struct dc_link *link,
 		struct i2c_command *cmd);
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 bool dm_helpers_dp_write_dsc_enable(
 		struct dc_context *ctx,
 		const struct dc_stream_state *stream,
 		bool enable
 );
-#endif
 bool dm_helpers_is_dp_sink_present(
 		struct dc_link *link);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index c03a441ee638..ae608c329366 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -41,12 +41,8 @@ enum pp_smu_ver {
 	 */
 	PP_SMU_UNSUPPORTED,
 	PP_SMU_VER_RV,
-#ifndef CONFIG_TRIM_DRM_AMD_DC_DCN2_0
 	PP_SMU_VER_NV,
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	PP_SMU_VER_RN,
-#endif
 
 	PP_SMU_VER_MAX
 };
@@ -143,7 +139,6 @@ struct pp_smu_funcs_rv {
 	void (*set_pme_wa_enable)(struct pp_smu *pp);
 };
 
-#ifndef CONFIG_TRIM_DRM_AMD_DC_DCN2_0
 /* Used by pp_smu_funcs_nv.set_voltage_by_freq
  *
  */
@@ -247,12 +242,9 @@ struct pp_smu_funcs_nv {
 	enum pp_smu_status (*set_pstate_handshake_support)(struct pp_smu *pp,
 			BOOLEAN pstate_handshake_supported);
 };
-#endif
-
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 
 #define PP_SMU_NUM_SOCCLK_DPM_LEVELS  8
-#define PP_SMU_NUM_DCFCLK_DPM_LEVELS  4
+#define PP_SMU_NUM_DCFCLK_DPM_LEVELS  8
 #define PP_SMU_NUM_FCLK_DPM_LEVELS    4
 #define PP_SMU_NUM_MEMCLK_DPM_LEVELS  4
 
@@ -288,18 +280,13 @@ struct pp_smu_funcs_rn {
 	enum pp_smu_status (*get_dpm_clock_table) (struct pp_smu *pp,
 			struct dpm_clocks *clock_table);
 };
-#endif
 
 struct pp_smu_funcs {
 	struct pp_smu ctx;
 	union {
 		struct pp_smu_funcs_rv rv_funcs;
-#ifndef CONFIG_TRIM_DRM_AMD_DC_DCN2_0
 		struct pp_smu_funcs_nv nv_funcs;
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 		struct pp_smu_funcs_rn rn_funcs;
-#endif
 
 	};
 };
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h
index 1a0429744630..968ff1fef486 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services.h
@@ -40,6 +40,9 @@
 
 #undef DEPRECATED
 
+struct dmub_srv;
+struct dc_dmub_srv;
+
 irq_handler_idx dm_register_interrupt(
 	struct dc_context *ctx,
 	struct dc_interrupt_params *int_params,
@@ -139,6 +142,13 @@ uint32_t generic_reg_update_ex(const struct dc_context *ctx,
 		uint32_t addr, int n,
 		uint8_t shift1, uint32_t mask1, uint32_t field_value1, ...);
 
+struct dc_dmub_srv *dc_dmub_srv_create(struct dc *dc, struct dmub_srv *dmub);
+void dc_dmub_srv_destroy(struct dc_dmub_srv **dmub_srv);
+
+void reg_sequence_start_gather(const struct dc_context *ctx);
+void reg_sequence_start_execute(const struct dc_context *ctx);
+void reg_sequence_wait_done(const struct dc_context *ctx);
+
 #define FD(reg_field)	reg_field ## __SHIFT, \
 						reg_field ## _MASK
 
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services_types.h b/drivers/gpu/drm/amd/display/dc/dm_services_types.h
index a3d1be20dd9d..b52ba6ffabe1 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services_types.h
@@ -220,6 +220,7 @@ struct dm_bl_data_point {
 };
 
 /* Total size of the structure should not exceed 256 bytes */
+#define BL_DATA_POINTS 99
 struct dm_acpi_atif_backlight_caps {
 	uint16_t size; /* Bytes 0-1 (2 bytes) */
 	uint16_t flags; /* Byted 2-3 (2 bytes) */
@@ -229,7 +230,7 @@ struct dm_acpi_atif_backlight_caps {
 	uint8_t  min_input_signal; /* Byte 7 */
 	uint8_t  max_input_signal; /* Byte 8 */
 	uint8_t  num_data_points; /* Byte 9 */
-	struct dm_bl_data_point data_points[99]; /* Bytes 10-207 (198 bytes)*/
+	struct dm_bl_data_point data_points[BL_DATA_POINTS]; /* Bytes 10-207 (198 bytes)*/
 };
 
 enum dm_acpi_display_type {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 8df251626e22..7ee8b8460a9b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -1,5 +1,6 @@
 #
 # Copyright 2017 Advanced Micro Devices, Inc.
+# Copyright 2019 Raptor Engineering, LLC
 #
 # Permission is hereby granted, free of charge, to any person obtaining a
 # copy of this software and associated documentation files (the "Software"),
@@ -24,7 +25,13 @@
 # It provides the general basic services required by other DAL
 # subcomponents.
 
+ifdef CONFIG_X86
 dml_ccflags := -mhard-float -msse
+endif
+
+ifdef CONFIG_PPC64
+dml_ccflags := -mhard-float -maltivec
+endif
 
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
@@ -32,6 +39,7 @@ IS_OLD_GCC = 1
 endif
 endif
 
+ifdef CONFIG_X86
 ifdef IS_OLD_GCC
 # Stack alignment mismatch, proceed with caution.
 # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
@@ -40,17 +48,16 @@ dml_ccflags += -mpreferred-stack-boundary=4
 else
 dml_ccflags += -msse2
 endif
+endif
 
 CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
 
-ifdef CONFIG_DRM_AMD_DC_DCN2_0
+ifdef CONFIG_DRM_AMD_DC_DCN
 CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
-endif
-ifdef CONFIG_DRM_AMD_DC_DCN2_1
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
 endif
@@ -61,11 +68,9 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dml_common_defs.o := $(dml_ccflags)
 DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \
 	dml_common_defs.o
 
-ifdef CONFIG_DRM_AMD_DC_DCN2_0
+ifdef CONFIG_DRM_AMD_DC_DCN
 DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o
 DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o
-endif
-ifdef CONFIG_DRM_AMD_DC_DCN2_1
 DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o
 endif
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
index 6c6c486b774a..e7a8ac7a1f22 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
@@ -937,7 +937,7 @@ static unsigned int CalculateVMAndRowBytes(
 		*MetaRowByte = 0;
 	}
 
-	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) {
+	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
 		MacroTileSizeBytes = 256;
 		MacroTileHeight = BlockHeight256Bytes;
 	} else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
@@ -1335,11 +1335,11 @@ static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPer
 		else
 			mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true)
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 			MainPlaneDoesODMCombine = true;
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true)
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 				MainPlaneDoesODMCombine = true;
 
 		if (MainPlaneDoesODMCombine == true)
@@ -2577,7 +2577,8 @@ static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPer
 			mode_lib->vba.MinActiveDRAMClockChangeMargin
 					+ mode_lib->vba.DRAMClockChangeLatency;
 
-	if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 50) {
+	if (mode_lib->vba.DRAMClockChangeSupportsVActive &&
+			mode_lib->vba.MinActiveDRAMClockChangeMargin > 60) {
 		mode_lib->vba.DRAMClockChangeWatermark += 25;
 		mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive;
 	} else {
@@ -2847,12 +2848,12 @@ static void dml20_DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
 			SwathWidth = mode_lib->vba.ViewportHeight[k];
 		}
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true) {
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 			MainPlaneDoesODMCombine = true;
 		}
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true) {
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 				MainPlaneDoesODMCombine = true;
 			}
 		}
@@ -3347,7 +3348,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 										== dm_420_10))
 				|| (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
 						|| mode_lib->vba.SurfaceTiling[k]
-								== dm_sw_gfx7_2d_thin_lvp)
+								== dm_sw_gfx7_2d_thin_l_vp)
 						&& !((mode_lib->vba.SourcePixelFormat[k]
 								== dm_444_64
 								|| mode_lib->vba.SourcePixelFormat[k]
@@ -3445,10 +3446,10 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				locals->FabricAndDRAMBandwidthPerState[i] * 1000)
 				* locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
 
-		locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState;
+		locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState;
 
 		if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 					locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency /
 					((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 					/ (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i]
@@ -3459,7 +3460,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				+ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024);
 
 		if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 				4 * locals->ReturnBWToDCNPerState *
 				(locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 				* locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency /
@@ -3471,7 +3472,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000);
 
 		if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 					locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency /
 					((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 					/ (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i]
@@ -3482,7 +3483,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				+ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024);
 
 		if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 				4 * locals->ReturnBWToDCNPerState *
 				(locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 				* locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency /
@@ -3520,12 +3521,12 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
 		locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] =
 				(mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i]
-				+ locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i];
-		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i]
+				+ locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0];
+		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0]
 				> locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
-			locals->ROBSupport[i] = true;
+			locals->ROBSupport[i][0] = true;
 		} else {
-			locals->ROBSupport[i] = false;
+			locals->ROBSupport[i][0] = false;
 		}
 	}
 	/*Writeback Mode Support Check*/
@@ -3902,7 +3903,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				}
 				if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
 						&& locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
-						&& locals->ODMCombineEnablePerState[i][k] == false) {
+						&& locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->NoOfDPP[i][j][k] = 1;
 					locals->RequiredDPPCLK[i][j][k] =
 						locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
@@ -3991,16 +3992,16 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	/*Viewport Size Check*/
 
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		locals->ViewportSizeSupport[i] = true;
+		locals->ViewportSizeSupport[i][0] = true;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-			if (locals->ODMCombineEnablePerState[i][k] == true) {
+			if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 				if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
 						> locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			} else {
 				if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			}
 		}
@@ -4182,8 +4183,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						mode_lib->vba.DSCFormatFactor = 1;
 					}
 					if (locals->RequiresDSC[i][k] == true) {
-						if (locals->ODMCombineEnablePerState[i][k]
-								== true) {
+						if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 							if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
 									> (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
 								locals->DSCCLKRequiredMoreThanSupported[i] =
@@ -4206,7 +4206,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		mode_lib->vba.TotalDSCUnitsRequired = 0.0;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 			if (locals->RequiresDSC[i][k] == true) {
-				if (locals->ODMCombineEnablePerState[i][k] == true) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 					mode_lib->vba.TotalDSCUnitsRequired =
 							mode_lib->vba.TotalDSCUnitsRequired + 2.0;
 				} else {
@@ -4248,7 +4248,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
 			}
 			if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
-				if (locals->ODMCombineEnablePerState[i][k] == false) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->DSCDelayPerState[i][k] =
 							dscceComputeDelay(
 									mode_lib->vba.DSCInputBitPerComponent[k],
@@ -4291,7 +4291,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-				if (locals->ODMCombineEnablePerState[i][k] == true)
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1)
 					locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k]));
 				else
 					locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k];
@@ -4344,28 +4344,28 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 				locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma +  dml_min(
 						locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] *
-						locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i],
+						locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0],
 						locals->EffectiveLBLatencyHidingSourceLinesLuma),
 						locals->SwathHeightYPerState[i][j][k]);
 
 				locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
 						locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
-						locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i],
+						locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
 						locals->EffectiveLBLatencyHidingSourceLinesChroma),
 						locals->SwathHeightCPerState[i][j][k]);
 
 				if (locals->BytePerPixelInDETC[k] == 0) {
 					locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
 							/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
-								dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]);
+								dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]);
 				} else {
 					locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min(
 						locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
 						/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
-						dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]),
+						dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]),
 							locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) -
 							locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 *
-							dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]));
+							dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]));
 				}
 			}
 		}
@@ -4405,14 +4405,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k];
 				locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k];
 				locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k];
-				mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max(
-						mode_lib->vba.ProjectedDCFCLKDeepSleep,
+				mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max(
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 						mode_lib->vba.PixelClock[k] / 16.0);
 				if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) {
 					if (mode_lib->vba.VRatio[k] <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4422,9 +4422,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4435,9 +4435,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					}
 				} else {
 					if (mode_lib->vba.VRatio[k] <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4447,9 +4447,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4459,9 +4459,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 												* mode_lib->vba.RequiredDPPCLK[i][j][k]);
 					}
 					if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETC[k],
@@ -4472,9 +4472,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETC[k],
@@ -4510,7 +4510,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						&mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k],
 						&mode_lib->vba.dpte_row_height[k],
 						&mode_lib->vba.meta_row_height[k]);
-				mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines(
+				mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines(
 						mode_lib,
 						mode_lib->vba.VRatio[k],
 						mode_lib->vba.vtaps[k],
@@ -4549,7 +4549,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							&mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k],
 							&mode_lib->vba.dpte_row_height_chroma[k],
 							&mode_lib->vba.meta_row_height_chroma[k]);
-					mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines(
+					mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines(
 							mode_lib,
 							mode_lib->vba.VRatio[k] / 2.0,
 							mode_lib->vba.VTAPsChroma[k],
@@ -4563,14 +4563,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
 					mode_lib->vba.MetaRowBytesC = 0.0;
 					mode_lib->vba.DPTEBytesPerRowC = 0.0;
-					locals->PrefetchLinesC[k] = 0.0;
+					locals->PrefetchLinesC[0][0][k] = 0.0;
 					locals->PTEBufferSizeNotExceededC[i][j][k] = true;
 					locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
 				}
-				locals->PDEAndMetaPTEBytesPerFrame[k] =
+				locals->PDEAndMetaPTEBytesPerFrame[0][0][k] =
 						mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
-				locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
-				locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
+				locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
+				locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
 
 				CalculateActiveRowBandwidth(
 						mode_lib->vba.GPUVMEnable,
@@ -4597,14 +4597,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 									+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j]
 											* mode_lib->vba.MetaChunkSize)
 									* 1024.0
-									/ mode_lib->vba.ReturnBWPerState[i];
+									/ mode_lib->vba.ReturnBWPerState[i][0];
 			if (mode_lib->vba.GPUVMEnable == true) {
 				mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency
 						+ mode_lib->vba.TotalNumberOfActiveDPP[i][j]
 								* mode_lib->vba.PTEGroupSize
-								/ mode_lib->vba.ReturnBWPerState[i];
+								/ mode_lib->vba.ReturnBWPerState[i][0];
 			}
-			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep;
+			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
 
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 				if (mode_lib->vba.BlendingAndTiming[k] == k) {
@@ -4654,7 +4654,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			}
 
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-				locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
+				locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
 					- dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
 			}
 
@@ -4699,7 +4699,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 									mode_lib->vba.RequiredDPPCLK[i][j][k],
 									mode_lib->vba.RequiredDISPCLK[i][j],
 									mode_lib->vba.PixelClock[k],
-									mode_lib->vba.ProjectedDCFCLKDeepSleep,
+									mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 									mode_lib->vba.DSCDelayPerState[i][k],
 									mode_lib->vba.NoOfDPP[i][j][k],
 									mode_lib->vba.ScalerEnabled[k],
@@ -4717,7 +4717,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 											- mode_lib->vba.VActive[k],
 									mode_lib->vba.HTotal[k],
 									mode_lib->vba.MaxInterDCNTileRepeaters,
-									mode_lib->vba.MaximumVStartup[k],
+									mode_lib->vba.MaximumVStartup[0][0][k],
 									mode_lib->vba.GPUVMMaxPageTableLevels,
 									mode_lib->vba.GPUVMEnable,
 									mode_lib->vba.DynamicMetadataEnable[k],
@@ -4727,15 +4727,15 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 									mode_lib->vba.UrgentLatencyPixelDataOnly,
 									mode_lib->vba.ExtraLatency,
 									mode_lib->vba.TimeCalc,
-									mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k],
-									mode_lib->vba.MetaRowBytes[k],
-									mode_lib->vba.DPTEBytesPerRow[k],
-									mode_lib->vba.PrefetchLinesY[k],
+									mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k],
+									mode_lib->vba.MetaRowBytes[0][0][k],
+									mode_lib->vba.DPTEBytesPerRow[0][0][k],
+									mode_lib->vba.PrefetchLinesY[0][0][k],
 									mode_lib->vba.SwathWidthYPerState[i][j][k],
 									mode_lib->vba.BytePerPixelInDETY[k],
 									mode_lib->vba.PrefillY[k],
 									mode_lib->vba.MaxNumSwY[k],
-									mode_lib->vba.PrefetchLinesC[k],
+									mode_lib->vba.PrefetchLinesC[0][0][k],
 									mode_lib->vba.BytePerPixelInDETC[k],
 									mode_lib->vba.PrefillC[k],
 									mode_lib->vba.MaxNumSwC[k],
@@ -4766,19 +4766,19 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				locals->prefetch_vm_bw_valid = true;
 				locals->prefetch_row_bw_valid = true;
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-					if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0)
+					if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0)
 						locals->prefetch_vm_bw[k] = 0;
 					else if (locals->LinesForMetaPTE[k] > 0)
-						locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k]
+						locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k]
 							/ (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]);
 					else {
 						locals->prefetch_vm_bw[k] = 0;
 						locals->prefetch_vm_bw_valid = false;
 					}
-					if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0)
+					if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0)
 						locals->prefetch_row_bw[k] = 0;
 					else if (locals->LinesForMetaAndDPTERow[k] > 0)
-						locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k])
+						locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k])
 							/ (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]);
 					else {
 						locals->prefetch_row_bw[k] = 0;
@@ -4797,13 +4797,13 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 											mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k])
 											+ mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]);
 				}
-				locals->BandwidthWithoutPrefetchSupported[i] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) {
-					locals->BandwidthWithoutPrefetchSupported[i] = false;
+				locals->BandwidthWithoutPrefetchSupported[i][0] = true;
+				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) {
+					locals->BandwidthWithoutPrefetchSupported[i][0] = false;
 				}
 
 				locals->PrefetchSupported[i][j] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) {
+				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) {
 					locals->PrefetchSupported[i][j] = false;
 				}
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4828,7 +4828,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			if (mode_lib->vba.PrefetchSupported[i][j] == true
 					&& mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) {
 				mode_lib->vba.BandwidthAvailableForImmediateFlip =
-						mode_lib->vba.ReturnBWPerState[i];
+						mode_lib->vba.ReturnBWPerState[i][0];
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 					mode_lib->vba.BandwidthAvailableForImmediateFlip =
 							mode_lib->vba.BandwidthAvailableForImmediateFlip
@@ -4842,9 +4842,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
 							&& mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
 						mode_lib->vba.ImmediateFlipBytes[k] =
-								mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k]
-										+ mode_lib->vba.MetaRowBytes[k]
-										+ mode_lib->vba.DPTEBytesPerRow[k];
+								mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k]
+										+ mode_lib->vba.MetaRowBytes[0][0][k]
+										+ mode_lib->vba.DPTEBytesPerRow[0][0][k];
 					}
 				}
 				mode_lib->vba.TotImmediateFlipBytes = 0.0;
@@ -4872,9 +4872,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 									/ mode_lib->vba.PixelClock[k],
 							mode_lib->vba.VRatio[k],
 							mode_lib->vba.Tno_bw[k],
-							mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k],
-							mode_lib->vba.MetaRowBytes[k],
-							mode_lib->vba.DPTEBytesPerRow[k],
+							mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k],
+							mode_lib->vba.MetaRowBytes[0][0][k],
+							mode_lib->vba.DPTEBytesPerRow[0][0][k],
 							mode_lib->vba.DCCEnable[k],
 							mode_lib->vba.dpte_row_height[k],
 							mode_lib->vba.meta_row_height[k],
@@ -4899,7 +4899,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				}
 				mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true;
 				if (mode_lib->vba.total_dcn_read_bw_with_flip
-						> mode_lib->vba.ReturnBWPerState[i]) {
+						> mode_lib->vba.ReturnBWPerState[i][0]) {
 					mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
 				}
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4918,13 +4918,13 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++)
 		mode_lib->vba.MaxTotalVActiveRDBandwidth = mode_lib->vba.MaxTotalVActiveRDBandwidth + mode_lib->vba.ReadBandwidth[k];
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth *
+		mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth *
 				mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) *
 				mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100;
-		if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i])
-			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true;
+		if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0])
+			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true;
 		else
-			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false;
+			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false;
 	}
 
 	/*PTE Buffer Size Check*/
@@ -5012,7 +5012,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_SCALE_RATIO_TAP;
 			} else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) {
 				status = DML_FAIL_SOURCE_PIXEL_FORMAT;
-			} else if (locals->ViewportSizeSupport[i] != true) {
+			} else if (locals->ViewportSizeSupport[i][0] != true) {
 				status = DML_FAIL_VIEWPORT_SIZE;
 			} else if (locals->DIOSupport[i] != true) {
 				status = DML_FAIL_DIO_SUPPORT;
@@ -5022,7 +5022,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_DSC_CLK_REQUIRED;
 			} else if (locals->UrgentLatencySupport[i][j] != true) {
 				status = DML_FAIL_URGENT_LATENCY;
-			} else if (locals->ROBSupport[i] != true) {
+			} else if (locals->ROBSupport[i][0] != true) {
 				status = DML_FAIL_REORDERING_BUFFER;
 			} else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) {
 				status = DML_FAIL_DISPCLK_DPPCLK;
@@ -5042,7 +5042,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_PITCH_SUPPORT;
 			} else if (locals->PrefetchSupported[i][j] != true) {
 				status = DML_FAIL_PREFETCH_SUPPORT;
-			} else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) {
+			} else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) {
 				status = DML_FAIL_TOTAL_V_ACTIVE_BW;
 			} else if (locals->VRatioInPrefetchSupported[i][j] != true) {
 				status = DML_FAIL_V_RATIO_PREFETCH;
@@ -5088,7 +5088,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
-	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel];
+	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
 	mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel];
 	for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 		if (mode_lib->vba.BlendingAndTiming[k] == k) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index 0fafd693ffb4..22f3b5a4b3b9 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -38,6 +38,7 @@
 
 #define BPP_INVALID 0
 #define BPP_BLENDED_PIPE 0xffffffff
+#define DCN20_MAX_DSC_IMAGE_WIDTH 5184
 
 static double adjust_ReturnBW(
 		struct display_mode_lib *mode_lib,
@@ -996,7 +997,7 @@ static unsigned int CalculateVMAndRowBytes(
 		*MetaRowByte = 0;
 	}
 
-	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) {
+	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
 		MacroTileSizeBytes = 256;
 		MacroTileHeight = BlockHeight256Bytes;
 	} else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
@@ -1394,11 +1395,11 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP
 		else
 			mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true)
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 			MainPlaneDoesODMCombine = true;
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true)
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 				MainPlaneDoesODMCombine = true;
 
 		if (MainPlaneDoesODMCombine == true)
@@ -2610,7 +2611,12 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP
 			mode_lib->vba.MinActiveDRAMClockChangeMargin
 					+ mode_lib->vba.DRAMClockChangeLatency;
 
-	if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
+	if (mode_lib->vba.DRAMClockChangeSupportsVActive &&
+			mode_lib->vba.MinActiveDRAMClockChangeMargin > 60) {
+		mode_lib->vba.DRAMClockChangeWatermark += 25;
+		mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive;
+	} else if (mode_lib->vba.DummyPStateCheck &&
+			mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
 		mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive;
 	} else {
 		if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
@@ -2879,12 +2885,12 @@ static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
 			SwathWidth = mode_lib->vba.ViewportHeight[k];
 		}
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true) {
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 			MainPlaneDoesODMCombine = true;
 		}
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true) {
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 				MainPlaneDoesODMCombine = true;
 			}
 		}
@@ -3379,7 +3385,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 										== dm_420_10))
 				|| (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
 						|| mode_lib->vba.SurfaceTiling[k]
-								== dm_sw_gfx7_2d_thin_lvp)
+								== dm_sw_gfx7_2d_thin_l_vp)
 						&& !((mode_lib->vba.SourcePixelFormat[k]
 								== dm_444_64
 								|| mode_lib->vba.SourcePixelFormat[k]
@@ -3477,10 +3483,10 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				locals->FabricAndDRAMBandwidthPerState[i] * 1000)
 				* locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
 
-		locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState;
+		locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState;
 
 		if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 					locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency /
 					((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 					/ (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i]
@@ -3491,7 +3497,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				+ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024);
 
 		if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 				4 * locals->ReturnBWToDCNPerState *
 				(locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 				* locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency /
@@ -3503,7 +3509,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000);
 
 		if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 					locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency /
 					((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 					/ (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i]
@@ -3514,7 +3520,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				+ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024);
 
 		if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 				4 * locals->ReturnBWToDCNPerState *
 				(locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 				* locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency /
@@ -3552,12 +3558,12 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
 		locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] =
 				(mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i]
-				+ locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i];
-		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i]
+				+ locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0];
+		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0]
 				> locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
-			locals->ROBSupport[i] = true;
+			locals->ROBSupport[i][0] = true;
 		} else {
-			locals->ROBSupport[i] = false;
+			locals->ROBSupport[i][0] = false;
 		}
 	}
 	/*Writeback Mode Support Check*/
@@ -3901,6 +3907,10 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				mode_lib->vba.MaximumSwathWidthInLineBuffer);
 	}
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
+		double MaxMaxDispclkRoundedDown = RoundToDFSGranularityDown(
+			mode_lib->vba.MaxDispclk[mode_lib->vba.soc.num_states],
+			mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
+
 		for (j = 0; j < 2; j++) {
 			mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
 				mode_lib->vba.MaxDispclk[i],
@@ -3925,7 +3935,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 						&& i == mode_lib->vba.soc.num_states)
 					mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
 							* (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
-				if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) {
+				if (mode_lib->vba.ODMCapability == false ||
+						(locals->PlaneRequiredDISPCLKWithoutODMCombine <= MaxMaxDispclkRoundedDown
+							&& (!locals->DSCEnabled[k] || locals->HActive[k] <= DCN20_MAX_DSC_IMAGE_WIDTH))) {
 					locals->ODMCombineEnablePerState[i][k] = false;
 					mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
 				} else {
@@ -3934,7 +3946,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				}
 				if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
 						&& locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
-						&& locals->ODMCombineEnablePerState[i][k] == false) {
+						&& locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->NoOfDPP[i][j][k] = 1;
 					locals->RequiredDPPCLK[i][j][k] =
 						locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
@@ -4023,16 +4035,16 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 	/*Viewport Size Check*/
 
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		locals->ViewportSizeSupport[i] = true;
+		locals->ViewportSizeSupport[i][0] = true;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-			if (locals->ODMCombineEnablePerState[i][k] == true) {
+			if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 				if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
 						> locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			} else {
 				if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			}
 		}
@@ -4214,8 +4226,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 						mode_lib->vba.DSCFormatFactor = 1;
 					}
 					if (locals->RequiresDSC[i][k] == true) {
-						if (locals->ODMCombineEnablePerState[i][k]
-								== true) {
+						if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 							if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
 									> (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
 								locals->DSCCLKRequiredMoreThanSupported[i] =
@@ -4238,7 +4249,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 		mode_lib->vba.TotalDSCUnitsRequired = 0.0;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 			if (locals->RequiresDSC[i][k] == true) {
-				if (locals->ODMCombineEnablePerState[i][k] == true) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 					mode_lib->vba.TotalDSCUnitsRequired =
 							mode_lib->vba.TotalDSCUnitsRequired + 2.0;
 				} else {
@@ -4280,7 +4291,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
 			}
 			if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
-				if (locals->ODMCombineEnablePerState[i][k] == false) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->DSCDelayPerState[i][k] =
 							dscceComputeDelay(
 									mode_lib->vba.DSCInputBitPerComponent[k],
@@ -4323,7 +4334,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-				if (locals->ODMCombineEnablePerState[i][k] == true)
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1)
 					locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k]));
 				else
 					locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k];
@@ -4376,28 +4387,28 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 
 				locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma +  dml_min(
 						locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] *
-						locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i],
+						locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0],
 						locals->EffectiveLBLatencyHidingSourceLinesLuma),
 						locals->SwathHeightYPerState[i][j][k]);
 
 				locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
 						locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
-						locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i],
+						locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
 						locals->EffectiveLBLatencyHidingSourceLinesChroma),
 						locals->SwathHeightCPerState[i][j][k]);
 
 				if (locals->BytePerPixelInDETC[k] == 0) {
 					locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
 							/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
-								dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]);
+								dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]);
 				} else {
 					locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min(
 						locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
 						/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
-						dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]),
+						dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]),
 							locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) -
 							locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 *
-							dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]));
+							dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]));
 				}
 			}
 		}
@@ -4442,14 +4453,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k];
 				locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k];
 				locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k];
-				mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max(
-						mode_lib->vba.ProjectedDCFCLKDeepSleep,
+				mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max(
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 						mode_lib->vba.PixelClock[k] / 16.0);
 				if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) {
 					if (mode_lib->vba.VRatio[k] <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4459,9 +4470,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4472,9 +4483,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 					}
 				} else {
 					if (mode_lib->vba.VRatio[k] <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4484,9 +4495,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4496,9 +4507,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 												* mode_lib->vba.RequiredDPPCLK[i][j][k]);
 					}
 					if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETC[k],
@@ -4509,9 +4520,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETC[k],
@@ -4547,7 +4558,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 						&mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k],
 						&mode_lib->vba.dpte_row_height[k],
 						&mode_lib->vba.meta_row_height[k]);
-				mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines(
+				mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines(
 						mode_lib,
 						mode_lib->vba.VRatio[k],
 						mode_lib->vba.vtaps[k],
@@ -4586,7 +4597,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 							&mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k],
 							&mode_lib->vba.dpte_row_height_chroma[k],
 							&mode_lib->vba.meta_row_height_chroma[k]);
-					mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines(
+					mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines(
 							mode_lib,
 							mode_lib->vba.VRatio[k] / 2.0,
 							mode_lib->vba.VTAPsChroma[k],
@@ -4600,14 +4611,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 					mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
 					mode_lib->vba.MetaRowBytesC = 0.0;
 					mode_lib->vba.DPTEBytesPerRowC = 0.0;
-					locals->PrefetchLinesC[k] = 0.0;
+					locals->PrefetchLinesC[0][0][k] = 0.0;
 					locals->PTEBufferSizeNotExceededC[i][j][k] = true;
 					locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
 				}
-				locals->PDEAndMetaPTEBytesPerFrame[k] =
+				locals->PDEAndMetaPTEBytesPerFrame[0][0][k] =
 						mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
-				locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
-				locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
+				locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
+				locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
 
 				CalculateActiveRowBandwidth(
 						mode_lib->vba.GPUVMEnable,
@@ -4634,14 +4645,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 									+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j]
 											* mode_lib->vba.MetaChunkSize)
 									* 1024.0
-									/ mode_lib->vba.ReturnBWPerState[i];
+									/ mode_lib->vba.ReturnBWPerState[i][0];
 			if (mode_lib->vba.GPUVMEnable == true) {
 				mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency
 						+ mode_lib->vba.TotalNumberOfActiveDPP[i][j]
 								* mode_lib->vba.PTEGroupSize
-								/ mode_lib->vba.ReturnBWPerState[i];
+								/ mode_lib->vba.ReturnBWPerState[i][0];
 			}
-			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep;
+			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
 
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 				if (mode_lib->vba.BlendingAndTiming[k] == k) {
@@ -4691,7 +4702,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 			}
 
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-				locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
+				locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
 					- dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
 			}
 
@@ -4731,7 +4742,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 						mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
 					}
 
-					CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth,
+					CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i][0], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth,
 						mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k],
 						mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k],
 						mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal,
@@ -4745,14 +4756,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 									mode_lib->vba.RequiredDPPCLK[i][j][k],
 									mode_lib->vba.RequiredDISPCLK[i][j],
 									mode_lib->vba.PixelClock[k],
-									mode_lib->vba.ProjectedDCFCLKDeepSleep,
+									mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 									mode_lib->vba.NoOfDPP[i][j][k],
 									mode_lib->vba.NumberOfCursors[k],
 									mode_lib->vba.VTotal[k]
 											- mode_lib->vba.VActive[k],
 									mode_lib->vba.HTotal[k],
 									mode_lib->vba.MaxInterDCNTileRepeaters,
-									mode_lib->vba.MaximumVStartup[k],
+									mode_lib->vba.MaximumVStartup[0][0][k],
 									mode_lib->vba.GPUVMMaxPageTableLevels,
 									mode_lib->vba.GPUVMEnable,
 									mode_lib->vba.DynamicMetadataEnable[k],
@@ -4762,15 +4773,15 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 									mode_lib->vba.UrgentLatencyPixelDataOnly,
 									mode_lib->vba.ExtraLatency,
 									mode_lib->vba.TimeCalc,
-									mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k],
-									mode_lib->vba.MetaRowBytes[k],
-									mode_lib->vba.DPTEBytesPerRow[k],
-									mode_lib->vba.PrefetchLinesY[k],
+									mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k],
+									mode_lib->vba.MetaRowBytes[0][0][k],
+									mode_lib->vba.DPTEBytesPerRow[0][0][k],
+									mode_lib->vba.PrefetchLinesY[0][0][k],
 									mode_lib->vba.SwathWidthYPerState[i][j][k],
 									mode_lib->vba.BytePerPixelInDETY[k],
 									mode_lib->vba.PrefillY[k],
 									mode_lib->vba.MaxNumSwY[k],
-									mode_lib->vba.PrefetchLinesC[k],
+									mode_lib->vba.PrefetchLinesC[0][0][k],
 									mode_lib->vba.BytePerPixelInDETC[k],
 									mode_lib->vba.PrefillC[k],
 									mode_lib->vba.MaxNumSwC[k],
@@ -4800,19 +4811,19 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				locals->prefetch_vm_bw_valid = true;
 				locals->prefetch_row_bw_valid = true;
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-					if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0)
+					if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0)
 						locals->prefetch_vm_bw[k] = 0;
 					else if (locals->LinesForMetaPTE[k] > 0)
-						locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k]
+						locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k]
 							/ (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]);
 					else {
 						locals->prefetch_vm_bw[k] = 0;
 						locals->prefetch_vm_bw_valid = false;
 					}
-					if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0)
+					if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0)
 						locals->prefetch_row_bw[k] = 0;
 					else if (locals->LinesForMetaAndDPTERow[k] > 0)
-						locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k])
+						locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k])
 							/ (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]);
 					else {
 						locals->prefetch_row_bw[k] = 0;
@@ -4831,13 +4842,13 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 											mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k])
 											+ mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]);
 				}
-				locals->BandwidthWithoutPrefetchSupported[i] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) {
-					locals->BandwidthWithoutPrefetchSupported[i] = false;
+				locals->BandwidthWithoutPrefetchSupported[i][0] = true;
+				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) {
+					locals->BandwidthWithoutPrefetchSupported[i][0] = false;
 				}
 
 				locals->PrefetchSupported[i][j] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) {
+				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) {
 					locals->PrefetchSupported[i][j] = false;
 				}
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4862,7 +4873,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 			if (mode_lib->vba.PrefetchSupported[i][j] == true
 					&& mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) {
 				mode_lib->vba.BandwidthAvailableForImmediateFlip =
-						mode_lib->vba.ReturnBWPerState[i];
+						mode_lib->vba.ReturnBWPerState[i][0];
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 					mode_lib->vba.BandwidthAvailableForImmediateFlip =
 							mode_lib->vba.BandwidthAvailableForImmediateFlip
@@ -4876,9 +4887,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 					if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
 							&& mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
 						mode_lib->vba.ImmediateFlipBytes[k] =
-								mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k]
-										+ mode_lib->vba.MetaRowBytes[k]
-										+ mode_lib->vba.DPTEBytesPerRow[k];
+								mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k]
+										+ mode_lib->vba.MetaRowBytes[0][0][k]
+										+ mode_lib->vba.DPTEBytesPerRow[0][0][k];
 					}
 				}
 				mode_lib->vba.TotImmediateFlipBytes = 0.0;
@@ -4906,9 +4917,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 									/ mode_lib->vba.PixelClock[k],
 							mode_lib->vba.VRatio[k],
 							mode_lib->vba.Tno_bw[k],
-							mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k],
-							mode_lib->vba.MetaRowBytes[k],
-							mode_lib->vba.DPTEBytesPerRow[k],
+							mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k],
+							mode_lib->vba.MetaRowBytes[0][0][k],
+							mode_lib->vba.DPTEBytesPerRow[0][0][k],
 							mode_lib->vba.DCCEnable[k],
 							mode_lib->vba.dpte_row_height[k],
 							mode_lib->vba.meta_row_height[k],
@@ -4933,7 +4944,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				}
 				mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true;
 				if (mode_lib->vba.total_dcn_read_bw_with_flip
-						> mode_lib->vba.ReturnBWPerState[i]) {
+						> mode_lib->vba.ReturnBWPerState[i][0]) {
 					mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
 				}
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4949,13 +4960,13 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 
 	/*Vertical Active BW support*/
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth *
+		mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth *
 				mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) *
 				mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100;
-		if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i])
-			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true;
+		if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0])
+			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true;
 		else
-			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false;
+			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false;
 	}
 
 	/*PTE Buffer Size Check*/
@@ -5043,7 +5054,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				status = DML_FAIL_SCALE_RATIO_TAP;
 			} else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) {
 				status = DML_FAIL_SOURCE_PIXEL_FORMAT;
-			} else if (locals->ViewportSizeSupport[i] != true) {
+			} else if (locals->ViewportSizeSupport[i][0] != true) {
 				status = DML_FAIL_VIEWPORT_SIZE;
 			} else if (locals->DIOSupport[i] != true) {
 				status = DML_FAIL_DIO_SUPPORT;
@@ -5053,7 +5064,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				status = DML_FAIL_DSC_CLK_REQUIRED;
 			} else if (locals->UrgentLatencySupport[i][j] != true) {
 				status = DML_FAIL_URGENT_LATENCY;
-			} else if (locals->ROBSupport[i] != true) {
+			} else if (locals->ROBSupport[i][0] != true) {
 				status = DML_FAIL_REORDERING_BUFFER;
 			} else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) {
 				status = DML_FAIL_DISPCLK_DPPCLK;
@@ -5073,7 +5084,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				status = DML_FAIL_PITCH_SUPPORT;
 			} else if (locals->PrefetchSupported[i][j] != true) {
 				status = DML_FAIL_PREFETCH_SUPPORT;
-			} else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) {
+			} else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) {
 				status = DML_FAIL_TOTAL_V_ACTIVE_BW;
 			} else if (locals->VRatioInPrefetchSupported[i][j] != true) {
 				status = DML_FAIL_V_RATIO_PREFETCH;
@@ -5119,7 +5130,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 	mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
-	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel];
+	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
 	mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel];
 	for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 		if (mode_lib->vba.BlendingAndTiming[k] == k) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
index 878bf4782ce6..ca807846032f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
@@ -107,10 +107,10 @@ static unsigned int get_bytes_per_element(enum source_format_class source_format
 
 static bool is_dual_plane(enum source_format_class source_format)
 {
-	bool ret_val = 0;
+	bool ret_val = false;
 
 	if ((source_format == dm_420_8) || (source_format == dm_420_10))
-		ret_val = 1;
+		ret_val = true;
 
 	return ret_val;
 }
@@ -207,7 +207,7 @@ static void extract_rq_regs(struct display_mode_lib *mode_lib,
 	rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height);
 	rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height);
 
-	// FIXME: take the max between luma, chroma chunk size?
+	// TODO: take the max between luma, chroma chunk size?
 	// okay for now, as we are setting chunk_bytes to 8kb anyways
 	if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb
 		rq_regs->drq_expansion_mode = 0;
@@ -240,8 +240,8 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
 	unsigned int swath_bytes_c = 0;
 	unsigned int full_swath_bytes_packed_l = 0;
 	unsigned int full_swath_bytes_packed_c = 0;
-	bool req128_l = 0;
-	bool req128_c = 0;
+	bool req128_l = false;
+	bool req128_c = false;
 	bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear);
 	bool surf_vert = (pipe_src_param.source_scan == dm_vert);
 	unsigned int log2_swath_height_l = 0;
@@ -264,13 +264,13 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
 		total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c;
 
 		if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request
-			req128_l = 0;
-			req128_c = 0;
+			req128_l = false;
+			req128_c = false;
 			swath_bytes_l = full_swath_bytes_packed_l;
 			swath_bytes_c = full_swath_bytes_packed_c;
 		} else { //128b request (for luma only for yuv420 8bpc)
-			req128_l = 1;
-			req128_c = 0;
+			req128_l = true;
+			req128_c = false;
 			swath_bytes_l = full_swath_bytes_packed_l / 2;
 			swath_bytes_c = full_swath_bytes_packed_c;
 		}
@@ -280,9 +280,9 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
 		total_swath_bytes = 2 * full_swath_bytes_packed_l;
 
 		if (total_swath_bytes <= detile_buf_size_in_bytes)
-			req128_l = 0;
+			req128_l = false;
 		else
-			req128_l = 1;
+			req128_l = true;
 
 		swath_bytes_l = total_swath_bytes;
 		swath_bytes_c = 0;
@@ -670,14 +670,14 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
 		const display_pipe_source_params_st pipe_src_param,
 		bool is_chroma)
 {
-	bool mode_422 = 0;
+	bool mode_422 = false;
 	unsigned int vp_width = 0;
 	unsigned int vp_height = 0;
 	unsigned int data_pitch = 0;
 	unsigned int meta_pitch = 0;
 	unsigned int ppe = mode_422 ? 2 : 1;
 
-	// FIXME check if ppe apply for both luma and chroma in 422 case
+	// TODO check if ppe apply for both luma and chroma in 422 case
 	if (is_chroma) {
 		vp_width = pipe_src_param.viewport_width_c / ppe;
 		vp_height = pipe_src_param.viewport_height_c;
@@ -929,8 +929,7 @@ static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
 	min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal;
 	dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
 
-	disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start
-			+ min_dst_y_ttu_vblank) * dml_pow(2, 2));
+	disp_dlg_regs->min_dst_y_next_start = (unsigned int) ((double) dlg_vblank_start * dml_pow(2, 2));
 	ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int) dml_pow(2, 18));
 
 	dml_print("DML_DLG: %s: min_dcfclk_mhz                         = %3.2f\n",
@@ -959,7 +958,7 @@ static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
 	// Source
 //             dcc_en              = src.dcc;
 	dual_plane = is_dual_plane((enum source_format_class)(src->source_format));
-	mode_422 = 0; // FIXME
+	mode_422 = false; // TODO
 	access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
 //      bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
 //      bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
@@ -1655,7 +1654,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib,
 		cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1)
 				* (double) cur_req_width;
 		cur_req_per_width = cur_width_ub / (double) cur_req_width;
-		hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor
+		hactive_cur = (double) cur_src_width / hscl_ratio; // TODO: oswin to think about what to do for cursor
 
 		if (vratio_pre_l <= 1.0) {
 			*refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
index ed8bf5f723c9..287b7a0ad108 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
@@ -107,10 +107,10 @@ static unsigned int get_bytes_per_element(enum source_format_class source_format
 
 static bool is_dual_plane(enum source_format_class source_format)
 {
-	bool ret_val = 0;
+	bool ret_val = false;
 
 	if ((source_format == dm_420_8) || (source_format == dm_420_10))
-		ret_val = 1;
+		ret_val = true;
 
 	return ret_val;
 }
@@ -207,7 +207,7 @@ static void extract_rq_regs(struct display_mode_lib *mode_lib,
 	rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height);
 	rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height);
 
-	// FIXME: take the max between luma, chroma chunk size?
+	// TODO: take the max between luma, chroma chunk size?
 	// okay for now, as we are setting chunk_bytes to 8kb anyways
 	if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb
 		rq_regs->drq_expansion_mode = 0;
@@ -240,8 +240,8 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
 	unsigned int swath_bytes_c = 0;
 	unsigned int full_swath_bytes_packed_l = 0;
 	unsigned int full_swath_bytes_packed_c = 0;
-	bool req128_l = 0;
-	bool req128_c = 0;
+	bool req128_l = false;
+	bool req128_c = false;
 	bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear);
 	bool surf_vert = (pipe_src_param.source_scan == dm_vert);
 	unsigned int log2_swath_height_l = 0;
@@ -264,13 +264,13 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
 		total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c;
 
 		if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request
-			req128_l = 0;
-			req128_c = 0;
+			req128_l = false;
+			req128_c = false;
 			swath_bytes_l = full_swath_bytes_packed_l;
 			swath_bytes_c = full_swath_bytes_packed_c;
 		} else { //128b request (for luma only for yuv420 8bpc)
-			req128_l = 1;
-			req128_c = 0;
+			req128_l = true;
+			req128_c = false;
 			swath_bytes_l = full_swath_bytes_packed_l / 2;
 			swath_bytes_c = full_swath_bytes_packed_c;
 		}
@@ -280,9 +280,9 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
 		total_swath_bytes = 2 * full_swath_bytes_packed_l;
 
 		if (total_swath_bytes <= detile_buf_size_in_bytes)
-			req128_l = 0;
+			req128_l = false;
 		else
-			req128_l = 1;
+			req128_l = true;
 
 		swath_bytes_l = total_swath_bytes;
 		swath_bytes_c = 0;
@@ -670,14 +670,14 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
 		const display_pipe_source_params_st pipe_src_param,
 		bool is_chroma)
 {
-	bool mode_422 = 0;
+	bool mode_422 = false;
 	unsigned int vp_width = 0;
 	unsigned int vp_height = 0;
 	unsigned int data_pitch = 0;
 	unsigned int meta_pitch = 0;
 	unsigned int ppe = mode_422 ? 2 : 1;
 
-	// FIXME check if ppe apply for both luma and chroma in 422 case
+	// TODO check if ppe apply for both luma and chroma in 422 case
 	if (is_chroma) {
 		vp_width = pipe_src_param.viewport_width_c / ppe;
 		vp_height = pipe_src_param.viewport_height_c;
@@ -959,7 +959,7 @@ static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
 	// Source
 //             dcc_en              = src.dcc;
 	dual_plane = is_dual_plane((enum source_format_class)(src->source_format));
-	mode_422 = 0; // FIXME
+	mode_422 = false; // TODO
 	access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
 //      bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
 //      bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
@@ -1655,7 +1655,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib,
 		cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1)
 				* (double) cur_req_width;
 		cur_req_per_width = cur_width_ub / (double) cur_req_width;
-		hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor
+		hactive_cur = (double) cur_src_width / hscl_ratio; // TODO: oswin to think about what to do for cursor
 
 		if (vratio_pre_l <= 1.0) {
 			*refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
index 3b6ed60dcd35..af35b3bea909 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -23,7 +23,6 @@
  *
  */
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 
 #include "../display_mode_lib.h"
 #include "../dml_inline_defs.h"
@@ -65,6 +64,7 @@ typedef struct {
 
 #define BPP_INVALID 0
 #define BPP_BLENDED_PIPE 0xffffffff
+#define DCN21_MAX_DSC_IMAGE_WIDTH 5184
 
 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
@@ -197,7 +197,7 @@ static unsigned int CalculateVMAndRowBytes(
 		unsigned int *meta_row_width,
 		unsigned int *meta_row_height,
 		unsigned int *vm_group_bytes,
-		long         *dpte_group_bytes,
+		unsigned int *dpte_group_bytes,
 		unsigned int *PixelPTEReqWidth,
 		unsigned int *PixelPTEReqHeight,
 		unsigned int *PTERequestSize,
@@ -295,7 +295,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
 		double UrgentOutOfOrderReturn,
 		double ReturnBW,
 		bool GPUVMEnable,
-		long dpte_group_bytes[],
+		int dpte_group_bytes[],
 		unsigned int MetaChunkSize,
 		double UrgentLatency,
 		double ExtraLatency,
@@ -309,13 +309,13 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
 		int DPPPerPlane[],
 		bool DCCEnable[],
 		double DPPCLK[],
-		unsigned int SwathWidthSingleDPPY[],
+		double SwathWidthSingleDPPY[],
 		unsigned int SwathHeightY[],
 		double ReadBandwidthPlaneLuma[],
 		unsigned int SwathHeightC[],
 		double ReadBandwidthPlaneChroma[],
 		unsigned int LBBitPerPixel[],
-		unsigned int SwathWidthY[],
+		double SwathWidthY[],
 		double HRatio[],
 		unsigned int vtaps[],
 		unsigned int VTAPsChroma[],
@@ -344,7 +344,7 @@ static void CalculateDCFCLKDeepSleep(
 		double BytePerPixelDETY[],
 		double BytePerPixelDETC[],
 		double VRatio[],
-		unsigned int SwathWidthY[],
+		double SwathWidthY[],
 		int DPPPerPlane[],
 		double HRatio[],
 		double PixelClock[],
@@ -435,7 +435,7 @@ static void CalculateMetaAndPTETimes(
 		unsigned int           meta_row_height[],
 		unsigned int           meta_req_width[],
 		unsigned int           meta_req_height[],
-		long                   dpte_group_bytes[],
+		int                   dpte_group_bytes[],
 		unsigned int           PTERequestSizeY[],
 		unsigned int           PTERequestSizeC[],
 		unsigned int           PixelPTEReqWidthY[],
@@ -477,7 +477,7 @@ static double CalculateExtraLatency(
 		bool HostVMEnable,
 		int NumberOfActivePlanes,
 		int NumberOfDPP[],
-		long dpte_group_bytes[],
+		int dpte_group_bytes[],
 		double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
 		double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
 		int HostVMMaxPageTableLevels,
@@ -1280,7 +1280,7 @@ static unsigned int CalculateVMAndRowBytes(
 		unsigned int *meta_row_width,
 		unsigned int *meta_row_height,
 		unsigned int *vm_group_bytes,
-		long         *dpte_group_bytes,
+		unsigned int *dpte_group_bytes,
 		unsigned int *PixelPTEReqWidth,
 		unsigned int *PixelPTEReqHeight,
 		unsigned int *PTERequestSize,
@@ -1338,7 +1338,7 @@ static unsigned int CalculateVMAndRowBytes(
 		*MetaRowByte = 0;
 	}
 
-	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) {
+	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
 		MacroTileSizeBytes = 256;
 		MacroTileHeight = BlockHeight256Bytes;
 	} else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
@@ -1683,11 +1683,11 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 		else
 			locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true)
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 			MainPlaneDoesODMCombine = true;
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true)
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 				MainPlaneDoesODMCombine = true;
 
 		if (MainPlaneDoesODMCombine == true)
@@ -2940,12 +2940,12 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
 			SwathWidth = mode_lib->vba.ViewportHeight[k];
 		}
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true) {
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 			MainPlaneDoesODMCombine = true;
 		}
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true) {
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 				MainPlaneDoesODMCombine = true;
 			}
 		}
@@ -3379,6 +3379,8 @@ static unsigned int TruncToValidBPP(
 				return 30;
 			else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
 				return 24;
+			else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
+				return 18;
 			else
 				return BPP_INVALID;
 		}
@@ -3451,7 +3453,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 										== dm_420_10))
 				|| (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
 						|| mode_lib->vba.SurfaceTiling[k]
-								== dm_sw_gfx7_2d_thin_lvp)
+								== dm_sw_gfx7_2d_thin_l_vp)
 						&& !((mode_lib->vba.SourcePixelFormat[k]
 								== dm_444_64
 								|| mode_lib->vba.SourcePixelFormat[k]
@@ -3540,17 +3542,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		}
 	}
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		locals->IdealSDPPortBandwidthPerState[i] = dml_min3(
+		locals->IdealSDPPortBandwidthPerState[i][0] = dml_min3(
 				mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i],
 				mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels
 						* mode_lib->vba.DRAMChannelWidth,
 				mode_lib->vba.FabricClockPerState[i]
 						* mode_lib->vba.FabricDatapathToDCNDataReturn);
 		if (mode_lib->vba.HostVMEnable == false) {
-			locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i]
+			locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
 					* mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0;
 		} else {
-			locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i]
+			locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
 					* mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0;
 		}
 	}
@@ -3587,12 +3589,12 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				+ dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
 						mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
 						mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly)
-					* mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i];
-		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i]
+					* mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0];
+		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0]
 				> locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
-			locals->ROBSupport[i] = true;
+			locals->ROBSupport[i][0] = true;
 		} else {
-			locals->ROBSupport[i] = false;
+			locals->ROBSupport[i][0] = false;
 		}
 	}
 	/*Writeback Mode Support Check*/
@@ -3936,6 +3938,10 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				mode_lib->vba.MaximumSwathWidthInLineBuffer);
 	}
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
+		double MaxMaxDispclkRoundedDown = RoundToDFSGranularityDown(
+			mode_lib->vba.MaxDispclk[mode_lib->vba.soc.num_states],
+			mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
+
 		for (j = 0; j < 2; j++) {
 			mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
 				mode_lib->vba.MaxDispclk[i],
@@ -3965,7 +3971,9 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						&& i == mode_lib->vba.soc.num_states)
 					mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
 							* (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
-				if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) {
+				if (mode_lib->vba.ODMCapability == false ||
+						(locals->PlaneRequiredDISPCLKWithoutODMCombine <= MaxMaxDispclkRoundedDown
+							&& (!locals->DSCEnabled[k] || locals->HActive[k] <= DCN21_MAX_DSC_IMAGE_WIDTH))) {
 					locals->ODMCombineEnablePerState[i][k] = false;
 					mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
 				} else {
@@ -3974,7 +3982,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				}
 				if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
 						&& locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
-						&& locals->ODMCombineEnablePerState[i][k] == false) {
+						&& locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->NoOfDPP[i][j][k] = 1;
 					locals->RequiredDPPCLK[i][j][k] =
 						locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
@@ -4063,16 +4071,16 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	/*Viewport Size Check*/
 
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		locals->ViewportSizeSupport[i] = true;
+		locals->ViewportSizeSupport[i][0] = true;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-			if (locals->ODMCombineEnablePerState[i][k] == true) {
+			if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 				if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
 						> locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			} else {
 				if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			}
 		}
@@ -4113,11 +4121,11 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	}
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-			locals->RequiresDSC[i][k] = 0;
+			locals->RequiresDSC[i][k] = false;
 			locals->RequiresFEC[i][k] = 0;
 			if (mode_lib->vba.BlendingAndTiming[k] == k) {
 				if (mode_lib->vba.Output[k] == dm_hdmi) {
-					locals->RequiresDSC[i][k] = 0;
+					locals->RequiresDSC[i][k] = false;
 					locals->RequiresFEC[i][k] = 0;
 					locals->OutputBppPerState[i][k] = TruncToValidBPP(
 							dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24,
@@ -4261,8 +4269,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						mode_lib->vba.DSCFormatFactor = 1;
 					}
 					if (locals->RequiresDSC[i][k] == true) {
-						if (locals->ODMCombineEnablePerState[i][k]
-								== true) {
+						if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 							if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
 									> (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
 								locals->DSCCLKRequiredMoreThanSupported[i] =
@@ -4285,7 +4292,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		mode_lib->vba.TotalDSCUnitsRequired = 0.0;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 			if (locals->RequiresDSC[i][k] == true) {
-				if (locals->ODMCombineEnablePerState[i][k] == true) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 					mode_lib->vba.TotalDSCUnitsRequired =
 							mode_lib->vba.TotalDSCUnitsRequired + 2.0;
 				} else {
@@ -4327,7 +4334,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
 			}
 			if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
-				if (locals->ODMCombineEnablePerState[i][k] == false) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->DSCDelayPerState[i][k] =
 							dscceComputeDelay(
 									mode_lib->vba.DSCInputBitPerComponent[k],
@@ -4391,7 +4398,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 				locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k];
 				locals->NoOfDPPThisState[k]        = locals->NoOfDPP[i][j][k];
-				if (locals->ODMCombineEnablePerState[i][k] == true) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 					locals->SwathWidthYThisState[k] =
 						dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]));
 				} else {
@@ -4443,7 +4450,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					locals->PSCL_FACTOR,
 					locals->PSCL_FACTOR_CHROMA,
 					locals->RequiredDPPCLKThisState,
-					&mode_lib->vba.ProjectedDCFCLKDeepSleep);
+					&mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]);
 
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 				if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
@@ -4488,7 +4495,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							locals->PTERequestSizeC,
 							locals->dpde0_bytes_per_frame_ub_c,
 							locals->meta_pte_bytes_per_frame_ub_c);
-					locals->PrefetchLinesC[k] = CalculatePrefetchSourceLines(
+					locals->PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines(
 							mode_lib,
 							mode_lib->vba.VRatio[k]/2,
 							mode_lib->vba.VTAPsChroma[k],
@@ -4503,7 +4510,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
 					mode_lib->vba.MetaRowBytesC = 0.0;
 					mode_lib->vba.DPTEBytesPerRowC = 0.0;
-					locals->PrefetchLinesC[k] = 0.0;
+					locals->PrefetchLinesC[0][0][k] = 0.0;
 					locals->PTEBufferSizeNotExceededC[i][j][k] = true;
 					locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
 				}
@@ -4544,7 +4551,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						locals->PTERequestSizeY,
 						locals->dpde0_bytes_per_frame_ub_l,
 						locals->meta_pte_bytes_per_frame_ub_l);
-				locals->PrefetchLinesY[k] = CalculatePrefetchSourceLines(
+				locals->PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines(
 						mode_lib,
 						mode_lib->vba.VRatio[k],
 						mode_lib->vba.vtaps[k],
@@ -4554,10 +4561,10 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						mode_lib->vba.ViewportYStartY[k],
 						&locals->PrefillY[k],
 						&locals->MaxNumSwY[k]);
-				locals->PDEAndMetaPTEBytesPerFrame[k] =
+				locals->PDEAndMetaPTEBytesPerFrame[0][0][k] =
 						mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
-				locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
-				locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
+				locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
+				locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
 
 				CalculateActiveRowBandwidth(
 						mode_lib->vba.GPUVMEnable,
@@ -4583,7 +4590,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.PixelChunkSizeInKByte,
 					locals->TotalNumberOfDCCActiveDPP[i][j],
 					mode_lib->vba.MetaChunkSize,
-					locals->ReturnBWPerState[i],
+					locals->ReturnBWPerState[i][0],
 					mode_lib->vba.GPUVMEnable,
 					mode_lib->vba.HostVMEnable,
 					mode_lib->vba.NumberOfActivePlanes,
@@ -4594,7 +4601,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.HostVMMaxPageTableLevels,
 					mode_lib->vba.HostVMCachedPageTableLevels);
 
-			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep;
+			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 				if (mode_lib->vba.BlendingAndTiming[k] == k) {
 					if (mode_lib->vba.WritebackEnable[k] == true) {
@@ -4636,15 +4643,15 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					}
 				}
 			}
-			mode_lib->vba.MaxMaxVStartup = 0;
+			mode_lib->vba.MaxMaxVStartup[0][0] = 0;
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-				locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
+				locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
 					- dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
-				mode_lib->vba.MaxMaxVStartup = dml_max(mode_lib->vba.MaxMaxVStartup, locals->MaximumVStartup[k]);
+				mode_lib->vba.MaxMaxVStartup[0][0] = dml_max(mode_lib->vba.MaxMaxVStartup[0][0], locals->MaximumVStartup[0][0][k]);
 			}
 
 			mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode;
-			mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup;
+			mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
 			do {
 				mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode;
 				mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup;
@@ -4685,7 +4692,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
 					myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
 					myPipe.PixelClock = mode_lib->vba.PixelClock[k];
-					myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep;
+					myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
 					myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
 					myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
 					myPipe.SourceScan = mode_lib->vba.SourceScan[k];
@@ -4719,8 +4726,8 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
 							mode_lib->vba.OutputFormat[k],
 							mode_lib->vba.MaxInterDCNTileRepeaters,
-							dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[k]),
-							locals->MaximumVStartup[k],
+							dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
+							locals->MaximumVStartup[0][0][k],
 							mode_lib->vba.GPUVMMaxPageTableLevels,
 							mode_lib->vba.GPUVMEnable,
 							&myHostVM,
@@ -4731,15 +4738,15 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							mode_lib->vba.UrgentLatency,
 							mode_lib->vba.ExtraLatency,
 							mode_lib->vba.TimeCalc,
-							locals->PDEAndMetaPTEBytesPerFrame[k],
-							locals->MetaRowBytes[k],
-							locals->DPTEBytesPerRow[k],
-							locals->PrefetchLinesY[k],
+							locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
+							locals->MetaRowBytes[0][0][k],
+							locals->DPTEBytesPerRow[0][0][k],
+							locals->PrefetchLinesY[0][0][k],
 							locals->SwathWidthYThisState[k],
 							locals->BytePerPixelInDETY[k],
 							locals->PrefillY[k],
 							locals->MaxNumSwY[k],
-							locals->PrefetchLinesC[k],
+							locals->PrefetchLinesC[0][0][k],
 							locals->BytePerPixelInDETC[k],
 							locals->PrefillC[k],
 							locals->MaxNumSwC[k],
@@ -4828,14 +4835,14 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						+ locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
 						+ locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
 				}
-				locals->BandwidthWithoutPrefetchSupported[i] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]
+				locals->BandwidthWithoutPrefetchSupported[i][0] = true;
+				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]
 						|| locals->NotEnoughUrgentLatencyHiding == 1) {
-					locals->BandwidthWithoutPrefetchSupported[i] = false;
+					locals->BandwidthWithoutPrefetchSupported[i][0] = false;
 				}
 
 				locals->PrefetchSupported[i][j] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]
+				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]
 						|| locals->NotEnoughUrgentLatencyHiding == 1
 						|| locals->NotEnoughUrgentLatencyHidingPre == 1) {
 					locals->PrefetchSupported[i][j] = false;
@@ -4864,17 +4871,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				}
 
 				if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) {
-					mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup;
+					mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
 					mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1;
 				} else {
 					mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1;
 				}
 			} while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true)
-					&& (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup
+					&& (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0]
 						|| mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode));
 
 			if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) {
-				mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i];
+				mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0];
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 					mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip
 						- dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
@@ -4887,7 +4894,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				mode_lib->vba.TotImmediateFlipBytes = 0.0;
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 					mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes
-						+ locals->PDEAndMetaPTEBytesPerFrame[k] + locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k];
+						+ locals->PDEAndMetaPTEBytesPerFrame[0][0][k] + locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k];
 				}
 
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4902,9 +4909,9 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							mode_lib->vba.HostVMMaxPageTableLevels,
 							mode_lib->vba.HostVMCachedPageTableLevels,
 							mode_lib->vba.GPUVMEnable,
-							locals->PDEAndMetaPTEBytesPerFrame[k],
-							locals->MetaRowBytes[k],
-							locals->DPTEBytesPerRow[k],
+							locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
+							locals->MetaRowBytes[0][0][k],
+							locals->DPTEBytesPerRow[0][0][k],
 							mode_lib->vba.BandwidthAvailableForImmediateFlip,
 							mode_lib->vba.TotImmediateFlipBytes,
 							mode_lib->vba.SourcePixelFormat[k],
@@ -4935,7 +4942,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				}
 				locals->ImmediateFlipSupportedForState[i][j] = true;
 				if (mode_lib->vba.total_dcn_read_bw_with_flip
-						> locals->ReturnBWPerState[i]) {
+						> locals->ReturnBWPerState[i][0]) {
 					locals->ImmediateFlipSupportedForState[i][j] = false;
 				}
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4962,7 +4969,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.WritebackInterfaceChromaBufferSize,
 					mode_lib->vba.DCFCLKPerState[i],
 					mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
-					locals->ReturnBWPerState[i],
+					locals->ReturnBWPerState[i][0],
 					mode_lib->vba.GPUVMEnable,
 					locals->dpte_group_bytes,
 					mode_lib->vba.MetaChunkSize,
@@ -4974,7 +4981,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.DRAMClockChangeLatency,
 					mode_lib->vba.SRExitTime,
 					mode_lib->vba.SREnterPlusExitTime,
-					mode_lib->vba.ProjectedDCFCLKDeepSleep,
+					mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 					locals->NoOfDPPThisState,
 					mode_lib->vba.DCCEnable,
 					locals->RequiredDPPCLKThisState,
@@ -5017,8 +5024,8 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k];
 		}
 		for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
-			locals->MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(
-				locals->IdealSDPPortBandwidthPerState[i] *
+			locals->MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(
+				locals->IdealSDPPortBandwidthPerState[i][0] *
 				mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation
 				/ 100.0, mode_lib->vba.DRAMSpeedPerState[i] *
 				mode_lib->vba.NumberOfChannels *
@@ -5026,10 +5033,10 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
 				/ 100.0);
 
-			if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i]) {
-				locals->TotalVerticalActiveBandwidthSupport[i] = true;
+			if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i][0]) {
+				locals->TotalVerticalActiveBandwidthSupport[i][0] = true;
 			} else {
-				locals->TotalVerticalActiveBandwidthSupport[i] = false;
+				locals->TotalVerticalActiveBandwidthSupport[i][0] = false;
 			}
 		}
 	}
@@ -5108,7 +5115,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_SCALE_RATIO_TAP;
 			} else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) {
 				status = DML_FAIL_SOURCE_PIXEL_FORMAT;
-			} else if (locals->ViewportSizeSupport[i] != true) {
+			} else if (locals->ViewportSizeSupport[i][0] != true) {
 				status = DML_FAIL_VIEWPORT_SIZE;
 			} else if (locals->DIOSupport[i] != true) {
 				status = DML_FAIL_DIO_SUPPORT;
@@ -5116,7 +5123,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_NOT_ENOUGH_DSC;
 			} else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) {
 				status = DML_FAIL_DSC_CLK_REQUIRED;
-			} else if (locals->ROBSupport[i] != true) {
+			} else if (locals->ROBSupport[i][0] != true) {
 				status = DML_FAIL_REORDERING_BUFFER;
 			} else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) {
 				status = DML_FAIL_DISPCLK_DPPCLK;
@@ -5134,7 +5141,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_CURSOR_SUPPORT;
 			} else if (mode_lib->vba.PitchSupport != true) {
 				status = DML_FAIL_PITCH_SUPPORT;
-			} else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) {
+			} else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) {
 				status = DML_FAIL_TOTAL_V_ACTIVE_BW;
 			} else if (locals->PTEBufferSizeNotExceeded[i][j] != true) {
 				status = DML_FAIL_PTE_BUFFER_SIZE;
@@ -5190,13 +5197,13 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
-	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel];
+	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
 	for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 		if (mode_lib->vba.BlendingAndTiming[k] == k) {
 			mode_lib->vba.ODMCombineEnabled[k] =
 					locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
 		} else {
-			mode_lib->vba.ODMCombineEnabled[k] = 0;
+			mode_lib->vba.ODMCombineEnabled[k] = false;
 		}
 		mode_lib->vba.DSCEnabled[k] =
 				locals->RequiresDSC[mode_lib->vba.VoltageLevel][k];
@@ -5219,7 +5226,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
 		double UrgentOutOfOrderReturn,
 		double ReturnBW,
 		bool GPUVMEnable,
-		long dpte_group_bytes[],
+		int dpte_group_bytes[],
 		unsigned int MetaChunkSize,
 		double UrgentLatency,
 		double ExtraLatency,
@@ -5233,13 +5240,13 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
 		int DPPPerPlane[],
 		bool DCCEnable[],
 		double DPPCLK[],
-		unsigned int SwathWidthSingleDPPY[],
+		double SwathWidthSingleDPPY[],
 		unsigned int SwathHeightY[],
 		double ReadBandwidthPlaneLuma[],
 		unsigned int SwathHeightC[],
 		double ReadBandwidthPlaneChroma[],
 		unsigned int LBBitPerPixel[],
-		unsigned int SwathWidthY[],
+		double SwathWidthY[],
 		double HRatio[],
 		unsigned int vtaps[],
 		unsigned int VTAPsChroma[],
@@ -5495,7 +5502,7 @@ static void CalculateDCFCLKDeepSleep(
 		double BytePerPixelDETY[],
 		double BytePerPixelDETC[],
 		double VRatio[],
-		unsigned int SwathWidthY[],
+		double SwathWidthY[],
 		int DPPPerPlane[],
 		double HRatio[],
 		double PixelClock[],
@@ -5823,7 +5830,7 @@ static void CalculateMetaAndPTETimes(
 		unsigned int meta_row_height[],
 		unsigned int meta_req_width[],
 		unsigned int meta_req_height[],
-		long dpte_group_bytes[],
+		int dpte_group_bytes[],
 		unsigned int PTERequestSizeY[],
 		unsigned int PTERequestSizeC[],
 		unsigned int PixelPTEReqWidthY[],
@@ -6079,7 +6086,7 @@ static double CalculateExtraLatency(
 		bool HostVMEnable,
 		int NumberOfActivePlanes,
 		int NumberOfDPP[],
-		long dpte_group_bytes[],
+		int dpte_group_bytes[],
 		double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
 		double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
 		int HostVMMaxPageTableLevels,
@@ -6117,4 +6124,3 @@ static double CalculateExtraLatency(
 	return CalculateExtraLatency;
 }
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
index a1f207cbb966..a38baa73d484 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
@@ -23,7 +23,6 @@
  *
  */
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 
 #include "../display_mode_lib.h"
 #include "../display_mode_vba.h"
@@ -83,10 +82,10 @@ static unsigned int get_bytes_per_element(enum source_format_class source_format
 
 static bool is_dual_plane(enum source_format_class source_format)
 {
-	bool ret_val = 0;
+	bool ret_val = false;
 
 	if ((source_format == dm_420_8) || (source_format == dm_420_10))
-		ret_val = 1;
+		ret_val = true;
 
 	return ret_val;
 }
@@ -223,8 +222,8 @@ static void handle_det_buf_split(
 	unsigned int swath_bytes_c = 0;
 	unsigned int full_swath_bytes_packed_l = 0;
 	unsigned int full_swath_bytes_packed_c = 0;
-	bool req128_l = 0;
-	bool req128_c = 0;
+	bool req128_l = false;
+	bool req128_c = false;
 	bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear);
 	bool surf_vert = (pipe_src_param.source_scan == dm_vert);
 	unsigned int log2_swath_height_l = 0;
@@ -249,13 +248,13 @@ static void handle_det_buf_split(
 		total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c;
 
 		if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request
-			req128_l = 0;
-			req128_c = 0;
+			req128_l = false;
+			req128_c = false;
 			swath_bytes_l = full_swath_bytes_packed_l;
 			swath_bytes_c = full_swath_bytes_packed_c;
 		} else { //128b request (for luma only for yuv420 8bpc)
-			req128_l = 1;
-			req128_c = 0;
+			req128_l = true;
+			req128_c = false;
 			swath_bytes_l = full_swath_bytes_packed_l / 2;
 			swath_bytes_c = full_swath_bytes_packed_c;
 		}
@@ -265,9 +264,9 @@ static void handle_det_buf_split(
 		total_swath_bytes = 2 * full_swath_bytes_packed_l;
 
 		if (total_swath_bytes <= detile_buf_size_in_bytes)
-			req128_l = 0;
+			req128_l = false;
 		else
-			req128_l = 1;
+			req128_l = true;
 
 		swath_bytes_l = total_swath_bytes;
 		swath_bytes_c = 0;
@@ -680,7 +679,7 @@ static void get_surf_rq_param(
 		const display_pipe_params_st pipe_param,
 		bool is_chroma)
 {
-	bool mode_422 = 0;
+	bool mode_422 = false;
 	unsigned int vp_width = 0;
 	unsigned int vp_height = 0;
 	unsigned int data_pitch = 0;
@@ -1011,7 +1010,7 @@ static void dml_rq_dlg_get_dlg_params(
 	// Source
 	//             dcc_en              = src.dcc;
 	dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
-	mode_422 = 0; // FIXME
+	mode_422 = false; // FIXME
 	access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
 						    //      bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
 						    //      bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
@@ -1523,8 +1522,8 @@ static void dml_rq_dlg_get_dlg_params(
 
 	disp_dlg_regs->refcyc_per_vm_group_vblank   = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
 	disp_dlg_regs->refcyc_per_vm_group_flip     = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
-	disp_dlg_regs->refcyc_per_vm_req_vblank     = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;;
-	disp_dlg_regs->refcyc_per_vm_req_flip       = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;;
+	disp_dlg_regs->refcyc_per_vm_req_vblank     = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
+	disp_dlg_regs->refcyc_per_vm_req_flip       = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
 
 	// Clamp to max for now
 	if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23))
@@ -1820,4 +1819,3 @@ static void calculate_ttu_cursor(
 	}
 }
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
index 1c97083b8d0b..bfc2f39bd1ef 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
@@ -85,7 +85,7 @@ enum dm_swizzle_mode {
 	dm_sw_var_s_x = 29,
 	dm_sw_var_d_x = 30,
 	dm_sw_64kb_r_x,
-	dm_sw_gfx7_2d_thin_lvp,
+	dm_sw_gfx7_2d_thin_l_vp,
 	dm_sw_gfx7_2d_thin_gl,
 };
 enum lb_depth {
@@ -119,6 +119,10 @@ enum mpc_combine_affinity {
 	dm_mpc_never
 };
 
+enum RequestType {
+	REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
+};
+
 enum self_refresh_affinity {
 	dm_try_to_allow_self_refresh_and_mclk_switch,
 	dm_allow_self_refresh_and_mclk_switch,
@@ -135,9 +139,7 @@ enum dm_validation_status {
 	DML_FAIL_DIO_SUPPORT,
 	DML_FAIL_NOT_ENOUGH_DSC,
 	DML_FAIL_DSC_CLK_REQUIRED,
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	DML_FAIL_DSC_VALIDATION_FAILURE,
-#endif
 	DML_FAIL_URGENT_LATENCY,
 	DML_FAIL_REORDERING_BUFFER,
 	DML_FAIL_DISPCLK_DPPCLK,
@@ -167,4 +169,16 @@ enum odm_combine_mode {
 	dm_odm_combine_mode_4to1,
 };
 
+enum odm_combine_policy {
+	dm_odm_combine_policy_dal,
+	dm_odm_combine_policy_none,
+	dm_odm_combine_policy_2to1,
+	dm_odm_combine_policy_4to1,
+};
+
+enum immediate_flip_requirement {
+	dm_immediate_flip_not_required,
+	dm_immediate_flip_required,
+};
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
index 704efefdcba8..2689401a03a3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
@@ -25,18 +25,13 @@
 
 #include "display_mode_lib.h"
 #include "dc_features.h"
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #include "dcn20/display_mode_vba_20.h"
 #include "dcn20/display_rq_dlg_calc_20.h"
 #include "dcn20/display_mode_vba_20v2.h"
 #include "dcn20/display_rq_dlg_calc_20v2.h"
-#endif
-#ifdef CONFIG_DRM_AMD_DC_DCN2_1
 #include "dcn21/display_mode_vba_21.h"
 #include "dcn21/display_rq_dlg_calc_21.h"
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 const struct dml_funcs dml20_funcs = {
 	.validate = dml20_ModeSupportAndSystemConfigurationFull,
 	.recalculate = dml20_recalculate,
@@ -50,16 +45,13 @@ const struct dml_funcs dml20v2_funcs = {
 	.rq_dlg_get_dlg_reg = dml20v2_rq_dlg_get_dlg_reg,
 	.rq_dlg_get_rq_reg = dml20v2_rq_dlg_get_rq_reg
 };
-#endif
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_1
 const struct dml_funcs dml21_funcs = {
         .validate = dml21_ModeSupportAndSystemConfigurationFull,
         .recalculate = dml21_recalculate,
         .rq_dlg_get_dlg_reg = dml21_rq_dlg_get_dlg_reg,
         .rq_dlg_get_rq_reg = dml21_rq_dlg_get_rq_reg
 };
-#endif
 
 void dml_init_instance(struct display_mode_lib *lib,
 		const struct _vcs_dpi_soc_bounding_box_st *soc_bb,
@@ -70,19 +62,15 @@ void dml_init_instance(struct display_mode_lib *lib,
 	lib->ip = *ip_params;
 	lib->project = project;
 	switch (project) {
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	case DML_PROJECT_NAVI10:
 		lib->funcs = dml20_funcs;
 		break;
 	case DML_PROJECT_NAVI10v2:
 		lib->funcs = dml20v2_funcs;
 		break;
-#endif
-#ifdef CONFIG_DRM_AMD_DC_DCN2_1
         case DML_PROJECT_DCN21:
                 lib->funcs = dml21_funcs;
                 break;
-#endif
 
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
index d8c59aa356b6..cf2758ca5b02 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
@@ -27,20 +27,14 @@
 
 
 #include "dml_common_defs.h"
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 #include "display_mode_vba.h"
-#endif
 
 enum dml_project {
 	DML_PROJECT_UNDEFINED,
 	DML_PROJECT_RAVEN1,
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	DML_PROJECT_NAVI10,
 	DML_PROJECT_NAVI10v2,
-#endif
-#ifdef CONFIG_DRM_AMD_DC_DCN2_1
 	DML_PROJECT_DCN21,
-#endif
 };
 
 struct display_mode_lib;
@@ -70,9 +64,7 @@ struct display_mode_lib {
 	struct _vcs_dpi_ip_params_st ip;
 	struct _vcs_dpi_soc_bounding_box_st soc;
 	enum dml_project project;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	struct vba_vars_st vba;
-#endif
 	struct dal_logger *logger;
 	struct dml_funcs funcs;
 };
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index f4c1ef9046bf..658f81e757e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -63,6 +63,7 @@ struct _vcs_dpi_voltage_scaling_st {
 	double dispclk_mhz;
 	double phyclk_mhz;
 	double dppclk_mhz;
+	double dtbclk_mhz;
 };
 
 struct _vcs_dpi_soc_bounding_box_st {
@@ -99,6 +100,7 @@ struct _vcs_dpi_soc_bounding_box_st {
 	unsigned int num_chans;
 	unsigned int vmm_page_size_bytes;
 	unsigned int hostvm_min_page_size_bytes;
+	unsigned int gpuvm_min_page_size_bytes;
 	double dram_clock_change_latency_us;
 	double dummy_pstate_latency_us;
 	double writeback_dram_clock_change_latency_us;
@@ -112,6 +114,7 @@ struct _vcs_dpi_soc_bounding_box_st {
 	bool do_urgent_latency_adjustment;
 	double urgent_latency_adjustment_fabric_clock_component_us;
 	double urgent_latency_adjustment_fabric_clock_reference_mhz;
+	bool disable_dram_clock_change_vactive_support;
 };
 
 struct _vcs_dpi_ip_params_st {
@@ -145,7 +148,6 @@ struct _vcs_dpi_ip_params_st {
 	unsigned int writeback_interface_buffer_size_kbytes;
 	unsigned int writeback_line_buffer_buffer_size;
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	unsigned int writeback_10bpc420_supported;
 	double writeback_max_hscl_ratio;
 	double writeback_max_vscl_ratio;
@@ -155,7 +157,6 @@ struct _vcs_dpi_ip_params_st {
 	unsigned int writeback_max_vscl_taps;
 	unsigned int writeback_line_buffer_luma_buffer_size;
 	unsigned int writeback_line_buffer_chroma_buffer_size;
-#endif
 
 	unsigned int max_page_table_levels;
 	unsigned int max_num_dpp;
@@ -214,6 +215,7 @@ struct _vcs_dpi_display_pipe_source_params_st {
 	int source_format;
 	unsigned char dcc;
 	unsigned int dcc_rate;
+	unsigned int dcc_rate_chroma;
 	unsigned char dcc_use_global;
 	unsigned char vm;
 	bool gpuvm;    // gpuvm enabled
@@ -225,6 +227,10 @@ struct _vcs_dpi_display_pipe_source_params_st {
 	int source_scan;
 	int sw_mode;
 	int macro_tile_size;
+	unsigned int surface_width_y;
+	unsigned int surface_height_y;
+	unsigned int surface_width_c;
+	unsigned int surface_height_c;
 	unsigned int viewport_width;
 	unsigned int viewport_height;
 	unsigned int viewport_y_y;
@@ -269,7 +275,7 @@ struct writeback_st {
 
 struct _vcs_dpi_display_output_params_st {
 	int dp_lanes;
-	int output_bpp;
+	double output_bpp;
 	int dsc_enable;
 	int wb_enable;
 	int num_active_wb;
@@ -277,6 +283,7 @@ struct _vcs_dpi_display_output_params_st {
 	int output_type;
 	int output_format;
 	int dsc_slices;
+	int max_audio_sample_rate;
 	struct writeback_st wb;
 };
 
@@ -318,10 +325,11 @@ struct _vcs_dpi_display_pipe_dest_params_st {
 	unsigned int vupdate_width;
 	unsigned int vready_offset;
 	unsigned char interlaced;
+	unsigned char embedded;
 	double pixel_rate_mhz;
 	unsigned char synchronized_vblank_all_planes;
 	unsigned char otg_inst;
-	unsigned char odm_combine;
+	unsigned int odm_combine;
 	unsigned char use_maximum_vstartup;
 	unsigned int vtotal_max;
 	unsigned int vtotal_min;
@@ -400,6 +408,7 @@ struct _vcs_dpi_display_rq_misc_params_st {
 struct _vcs_dpi_display_rq_params_st {
 	unsigned char yuv420;
 	unsigned char yuv420_10bpc;
+	unsigned char rgbe_alpha;
 	display_rq_misc_params_st misc;
 	display_rq_sizing_params_st sizing;
 	display_rq_dlg_params_st dlg;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 65cf4edddaff..b3c96d9b472f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -23,7 +23,6 @@
  *
  */
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 
 #include "display_mode_lib.h"
 #include "display_mode_vba.h"
@@ -222,13 +221,17 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib)
 	mode_lib->vba.SRExitTime = soc->sr_exit_time_us;
 	mode_lib->vba.SREnterPlusExitTime = soc->sr_enter_plus_exit_time_us;
 	mode_lib->vba.DRAMClockChangeLatency = soc->dram_clock_change_latency_us;
+	mode_lib->vba.DummyPStateCheck = soc->dram_clock_change_latency_us == soc->dummy_pstate_latency_us;
+	mode_lib->vba.DRAMClockChangeSupportsVActive = !soc->disable_dram_clock_change_vactive_support ||
+			mode_lib->vba.DummyPStateCheck;
+
 	mode_lib->vba.Downspreading = soc->downspread_percent;
 	mode_lib->vba.DRAMChannelWidth = soc->dram_channel_width_bytes;   // new!
 	mode_lib->vba.FabricDatapathToDCNDataReturn = soc->fabric_datapath_to_dcn_data_return_bytes; // new!
 	mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading = soc->dcn_downspread_percent;   // new
 	mode_lib->vba.DISPCLKDPPCLKVCOSpeed = soc->dispclk_dppclk_vco_speed_mhz;   // new
 	mode_lib->vba.VMMPageSize = soc->vmm_page_size_bytes;
-	mode_lib->vba.GPUVMMinPageSize = soc->vmm_page_size_bytes / 1024;
+	mode_lib->vba.GPUVMMinPageSize = soc->gpuvm_min_page_size_bytes / 1024;
 	mode_lib->vba.HostVMMinPageSize = soc->hostvm_min_page_size_bytes / 1024;
 	// Set the voltage scaling clocks as the defaults. Most of these will
 	// be set to different values by the test
@@ -261,7 +264,10 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib)
 		mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mts;
 		//mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mhz;
 		mode_lib->vba.MaxDispclk[i] = soc->clock_limits[i].dispclk_mhz;
+		mode_lib->vba.DTBCLKPerState[i] = soc->clock_limits[i].dtbclk_mhz;
 	}
+	mode_lib->vba.MinVoltageLevel = 0;
+	mode_lib->vba.MaxVoltageLevel = mode_lib->vba.soc.num_states;
 
 	mode_lib->vba.DoUrgentLatencyAdjustment =
 		soc->do_urgent_latency_adjustment;
@@ -303,8 +309,6 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib)
 
 	mode_lib->vba.WritebackInterfaceBufferSize = ip->writeback_interface_buffer_size_kbytes;
 	mode_lib->vba.WritebackLineBufferSize = ip->writeback_line_buffer_buffer_size;
-	mode_lib->vba.MinVoltageLevel = 0;
-	mode_lib->vba.MaxVoltageLevel = 5;
 
 	mode_lib->vba.WritebackChromaLineBufferWidth =
 			ip->writeback_chroma_line_buffer_width_pixels;
@@ -375,6 +379,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
 
 		mode_lib->vba.pipe_plane[j] = mode_lib->vba.NumberOfActivePlanes;
 
+		mode_lib->vba.EmbeddedPanel[mode_lib->vba.NumberOfActivePlanes] = dst->embedded;
 		mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes] = 1;
 		mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes] =
 				(enum scan_direction_class) (src->source_scan);
@@ -419,8 +424,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
 						ip->dcc_supported : src->dcc && ip->dcc_supported;
 		mode_lib->vba.DCCRate[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate;
 		/* TODO: Needs to be set based on src->dcc_rate_luma/chroma */
-		mode_lib->vba.DCCRateLuma[mode_lib->vba.NumberOfActivePlanes] = 0;
-		mode_lib->vba.DCCRateChroma[mode_lib->vba.NumberOfActivePlanes] = 0;
+		mode_lib->vba.DCCRateLuma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate;
+		mode_lib->vba.DCCRateChroma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate_chroma;
 
 		mode_lib->vba.SourcePixelFormat[mode_lib->vba.NumberOfActivePlanes] =
 				(enum source_format_class) (src->source_format);
@@ -434,6 +439,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
 				dst->odm_combine;
 		mode_lib->vba.OutputFormat[mode_lib->vba.NumberOfActivePlanes] =
 				(enum output_format_class) (dout->output_format);
+		mode_lib->vba.OutputBpp[mode_lib->vba.NumberOfActivePlanes] =
+				dout->output_bpp;
 		mode_lib->vba.Output[mode_lib->vba.NumberOfActivePlanes] =
 				(enum output_encoder_class) (dout->output_type);
 
@@ -446,7 +453,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
 				dout->dp_lanes;
 		/* TODO: Needs to be set based on dout->audio.audio_sample_rate_khz/sample_layout */
 		mode_lib->vba.AudioSampleRate[mode_lib->vba.NumberOfActivePlanes] =
-			44.1 * 1000;
+			dout->max_audio_sample_rate;
 		mode_lib->vba.AudioSampleLayout[mode_lib->vba.NumberOfActivePlanes] =
 			1;
 		mode_lib->vba.DRAMClockChangeLatencyOverride = 0.0;
@@ -582,6 +589,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
 			for (k = j + 1; k < mode_lib->vba.cache_num_pipes; ++k) {
 				display_pipe_source_params_st *src_k = &pipes[k].pipe.src;
 				display_pipe_dest_params_st *dst_k = &pipes[k].pipe.dest;
+				display_output_params_st *dout_k = &pipes[j].dout;
 
 				if (src_k->is_hsplit && !visited[k]
 						&& src->hsplit_grp == src_k->hsplit_grp) {
@@ -592,12 +600,18 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
 							== dm_horz) {
 						mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] +=
 								src_k->viewport_width;
+						mode_lib->vba.ViewportWidthChroma[mode_lib->vba.NumberOfActivePlanes] +=
+								src_k->viewport_width;
 						mode_lib->vba.ScalerRecoutWidth[mode_lib->vba.NumberOfActivePlanes] +=
 								dst_k->recout_width;
 					} else {
 						mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] +=
 								src_k->viewport_height;
+						mode_lib->vba.ViewportHeightChroma[mode_lib->vba.NumberOfActivePlanes] +=
+								src_k->viewport_height;
 					}
+					mode_lib->vba.NumberOfDSCSlices[mode_lib->vba.NumberOfActivePlanes] +=
+							dout_k->dsc_slices;
 
 					visited[k] = true;
 				}
@@ -803,7 +817,9 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib)
 	unsigned int total_pipes = 0;
 
 	mode_lib->vba.VoltageLevel = mode_lib->vba.cache_pipes[0].clks_cfg.voltage;
-	mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel];
+	mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb];
+	if (mode_lib->vba.ReturnBW == 0)
+		mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
 	mode_lib->vba.FabricAndDRAMBandwidth = mode_lib->vba.FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel];
 
 	fetch_socbb_params(mode_lib);
@@ -853,4 +869,3 @@ double CalculateWriteBackDISPCLK(
 	return CalculateWriteBackDISPCLK;
 }
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 91decac50557..e7a44df676ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -23,7 +23,6 @@
  *
  */
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 
 #ifndef __DML2_DISPLAY_MODE_VBA_H__
 #define __DML2_DISPLAY_MODE_VBA_H__
@@ -155,7 +154,10 @@ struct vba_vars_st {
 	double UrgentLatencySupportUsChroma;
 	unsigned int DSCFormatFactor;
 
+	bool DummyPStateCheck;
+	bool DRAMClockChangeSupportsVActive;
 	bool PrefetchModeSupported;
+	bool PrefetchAndImmediateFlipSupported;
 	enum self_refresh_affinity AllowDRAMSelfRefreshOrDRAMClockChangeInVblank; // Mode Support only
 	double XFCRemoteSurfaceFlipDelay;
 	double TInitXFill;
@@ -317,8 +319,7 @@ struct vba_vars_st {
 	unsigned int DynamicMetadataTransmittedBytes[DC__NUM_DPP__MAX];
 	double DCCRate[DC__NUM_DPP__MAX];
 	double AverageDCCCompressionRate;
-	bool ODMCombineEnabled[DC__NUM_DPP__MAX];
-	enum odm_combine_mode ODMCombineTypeEnabled[DC__NUM_DPP__MAX];
+	enum odm_combine_mode ODMCombineEnabled[DC__NUM_DPP__MAX];
 	double OutputBpp[DC__NUM_DPP__MAX];
 	bool DSCEnabled[DC__NUM_DPP__MAX];
 	unsigned int DSCInputBitPerComponent[DC__NUM_DPP__MAX];
@@ -346,6 +347,7 @@ struct vba_vars_st {
 	unsigned int EffectiveLBLatencyHidingSourceLinesChroma;
 	double BandwidthAvailableForImmediateFlip;
 	unsigned int PrefetchMode[DC__VOLTAGE_STATES + 1][2];
+	unsigned int PrefetchModePerState[DC__VOLTAGE_STATES + 1][2];
 	unsigned int MinPrefetchMode;
 	unsigned int MaxPrefetchMode;
 	bool AnyLinesForVMOrRowTooLarge;
@@ -387,6 +389,7 @@ struct vba_vars_st {
 
 	/* vba mode support */
 	/*inputs*/
+	bool EmbeddedPanel[DC__NUM_DPP__MAX];
 	bool SupportGFX7CompatibleTilingIn32bppAnd64bpp;
 	double MaxHSCLRatio;
 	double MaxVSCLRatio;
@@ -394,6 +397,7 @@ struct vba_vars_st {
 	bool WritebackLumaAndChromaScalingSupported;
 	bool Cursor64BppSupport;
 	double DCFCLKPerState[DC__VOLTAGE_STATES + 1];
+	double DCFCLKState[DC__VOLTAGE_STATES + 1][2];
 	double FabricClockPerState[DC__VOLTAGE_STATES + 1];
 	double SOCCLKPerState[DC__VOLTAGE_STATES + 1];
 	double PHYCLKPerState[DC__VOLTAGE_STATES + 1];
@@ -442,7 +446,7 @@ struct vba_vars_st {
 	double OutputLinkDPLanes[DC__NUM_DPP__MAX];
 	double ForcedOutputLinkBPP[DC__NUM_DPP__MAX]; // Mode Support only
 	double ImmediateFlipBW[DC__NUM_DPP__MAX];
-	double MaxMaxVStartup;
+	double MaxMaxVStartup[DC__VOLTAGE_STATES + 1][2];
 
 	double WritebackLumaVExtra;
 	double WritebackChromaVExtra;
@@ -469,7 +473,7 @@ struct vba_vars_st {
 	double RoundedUpMaxSwathSizeBytesC;
 	double EffectiveDETLBLinesLuma;
 	double EffectiveDETLBLinesChroma;
-	double ProjectedDCFCLKDeepSleep;
+	double ProjectedDCFCLKDeepSleep[DC__VOLTAGE_STATES + 1][2];
 	double PDEAndMetaPTEBytesPerFrameY;
 	double PDEAndMetaPTEBytesPerFrameC;
 	unsigned int MetaRowBytesY;
@@ -487,12 +491,11 @@ struct vba_vars_st {
 	double FractionOfUrgentBandwidthImmediateFlip; // Mode Support debugging output
 
 	/* ms locals */
-	double IdealSDPPortBandwidthPerState[DC__VOLTAGE_STATES + 1];
+	double IdealSDPPortBandwidthPerState[DC__VOLTAGE_STATES + 1][2];
 	unsigned int NoOfDPP[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	int NoOfDPPThisState[DC__NUM_DPP__MAX];
-	bool ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
-	enum odm_combine_mode ODMCombineTypeEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
-	unsigned int SwathWidthYThisState[DC__NUM_DPP__MAX];
+	enum odm_combine_mode ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
+	double SwathWidthYThisState[DC__NUM_DPP__MAX];
 	unsigned int SwathHeightCPerState[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	unsigned int SwathHeightYThisState[DC__NUM_DPP__MAX];
 	unsigned int SwathHeightCThisState[DC__NUM_DPP__MAX];
@@ -504,7 +507,7 @@ struct vba_vars_st {
 	double RequiredDPPCLKThisState[DC__NUM_DPP__MAX];
 	bool PTEBufferSizeNotExceededY[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	bool PTEBufferSizeNotExceededC[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
-	bool BandwidthWithoutPrefetchSupported[DC__VOLTAGE_STATES + 1];
+	bool BandwidthWithoutPrefetchSupported[DC__VOLTAGE_STATES + 1][2];
 	bool PrefetchSupported[DC__VOLTAGE_STATES + 1][2];
 	bool VRatioInPrefetchSupported[DC__VOLTAGE_STATES + 1][2];
 	double RequiredDISPCLK[DC__VOLTAGE_STATES + 1][2];
@@ -513,22 +516,22 @@ struct vba_vars_st {
 	unsigned int TotalNumberOfActiveDPP[DC__VOLTAGE_STATES + 1][2];
 	unsigned int TotalNumberOfDCCActiveDPP[DC__VOLTAGE_STATES + 1][2];
 	bool ModeSupport[DC__VOLTAGE_STATES + 1][2];
-	double ReturnBWPerState[DC__VOLTAGE_STATES + 1];
+	double ReturnBWPerState[DC__VOLTAGE_STATES + 1][2];
 	bool DIOSupport[DC__VOLTAGE_STATES + 1];
 	bool NotEnoughDSCUnits[DC__VOLTAGE_STATES + 1];
 	bool DSCCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1];
 	bool DTBCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1];
 	double UrgentRoundTripAndOutOfOrderLatencyPerState[DC__VOLTAGE_STATES + 1];
-	bool ROBSupport[DC__VOLTAGE_STATES + 1];
+	bool ROBSupport[DC__VOLTAGE_STATES + 1][2];
 	bool PTEBufferSizeNotExceeded[DC__VOLTAGE_STATES + 1][2];
-	bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES + 1];
-	double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES + 1];
+	bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES + 1][2];
+	double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES + 1][2];
 	double PrefetchBW[DC__NUM_DPP__MAX];
-	double PDEAndMetaPTEBytesPerFrame[DC__NUM_DPP__MAX];
-	double MetaRowBytes[DC__NUM_DPP__MAX];
-	double DPTEBytesPerRow[DC__NUM_DPP__MAX];
-	double PrefetchLinesY[DC__NUM_DPP__MAX];
-	double PrefetchLinesC[DC__NUM_DPP__MAX];
+	double PDEAndMetaPTEBytesPerFrame[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double MetaRowBytes[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double DPTEBytesPerRow[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double PrefetchLinesY[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double PrefetchLinesC[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	unsigned int MaxNumSwY[DC__NUM_DPP__MAX];
 	unsigned int MaxNumSwC[DC__NUM_DPP__MAX];
 	double PrefillY[DC__NUM_DPP__MAX];
@@ -537,7 +540,7 @@ struct vba_vars_st {
 	double LinesForMetaPTE[DC__NUM_DPP__MAX];
 	double LinesForMetaAndDPTERow[DC__NUM_DPP__MAX];
 	double MinDPPCLKUsingSingleDPP[DC__NUM_DPP__MAX];
-	unsigned int SwathWidthYSingleDPP[DC__NUM_DPP__MAX];
+	double SwathWidthYSingleDPP[DC__NUM_DPP__MAX];
 	double BytePerPixelInDETY[DC__NUM_DPP__MAX];
 	double BytePerPixelInDETC[DC__NUM_DPP__MAX];
 	bool RequiresDSC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
@@ -545,7 +548,7 @@ struct vba_vars_st {
 	double RequiresFEC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
 	double OutputBppPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
 	double DSCDelayPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
-	bool ViewportSizeSupport[DC__VOLTAGE_STATES + 1];
+	bool ViewportSizeSupport[DC__VOLTAGE_STATES + 1][2];
 	unsigned int Read256BlockHeightY[DC__NUM_DPP__MAX];
 	unsigned int Read256BlockWidthY[DC__NUM_DPP__MAX];
 	unsigned int Read256BlockHeightC[DC__NUM_DPP__MAX];
@@ -560,7 +563,7 @@ struct vba_vars_st {
 	double WriteBandwidth[DC__NUM_DPP__MAX];
 	double PSCL_FACTOR[DC__NUM_DPP__MAX];
 	double PSCL_FACTOR_CHROMA[DC__NUM_DPP__MAX];
-	double MaximumVStartup[DC__NUM_DPP__MAX];
+	double MaximumVStartup[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	unsigned int MacroTileWidthY[DC__NUM_DPP__MAX];
 	unsigned int MacroTileWidthC[DC__NUM_DPP__MAX];
 	double AlignedDCCMetaPitch[DC__NUM_DPP__MAX];
@@ -577,7 +580,7 @@ struct vba_vars_st {
 	bool ImmediateFlipSupportedForState[DC__VOLTAGE_STATES + 1][2];
 	double WritebackDelay[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
 	unsigned int vm_group_bytes[DC__NUM_DPP__MAX];
-	long dpte_group_bytes[DC__NUM_DPP__MAX];
+	unsigned int dpte_group_bytes[DC__NUM_DPP__MAX];
 	unsigned int dpte_row_height[DC__NUM_DPP__MAX];
 	unsigned int meta_req_height[DC__NUM_DPP__MAX];
 	unsigned int meta_req_width[DC__NUM_DPP__MAX];
@@ -603,14 +606,14 @@ struct vba_vars_st {
 	double UrgentBurstFactorChroma[DC__NUM_DPP__MAX];
 	double UrgentBurstFactorChromaPre[DC__NUM_DPP__MAX];
 
+
 	bool           MPCCombine[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	double         SwathWidthCSingleDPP[DC__NUM_DPP__MAX];
 	double         MaximumSwathWidthInLineBufferLuma;
 	double         MaximumSwathWidthInLineBufferChroma;
 	double         MaximumSwathWidthLuma[DC__NUM_DPP__MAX];
 	double         MaximumSwathWidthChroma[DC__NUM_DPP__MAX];
-	bool odm_combine_dummy[DC__NUM_DPP__MAX];
-	enum odm_combine_mode odm_combine_mode_dummy[DC__NUM_DPP__MAX];
+	enum odm_combine_mode odm_combine_dummy[DC__NUM_DPP__MAX];
 	double         dummy1[DC__NUM_DPP__MAX];
 	double         dummy2[DC__NUM_DPP__MAX];
 	double         dummy3[DC__NUM_DPP__MAX];
@@ -620,9 +623,9 @@ struct vba_vars_st {
 	double         dummy7[DC__NUM_DPP__MAX];
 	double         dummy8[DC__NUM_DPP__MAX];
 	unsigned int        dummyinteger1ms[DC__NUM_DPP__MAX];
-	unsigned int        dummyinteger2ms[DC__NUM_DPP__MAX];
+	double        dummyinteger2ms[DC__NUM_DPP__MAX];
 	unsigned int        dummyinteger3[DC__NUM_DPP__MAX];
-	unsigned int        dummyinteger4;
+	unsigned int        dummyinteger4[DC__NUM_DPP__MAX];
 	unsigned int        dummyinteger5;
 	unsigned int        dummyinteger6;
 	unsigned int        dummyinteger7;
@@ -635,7 +638,6 @@ struct vba_vars_st {
 	unsigned int        dummyintegerarr2[DC__NUM_DPP__MAX];
 	unsigned int        dummyintegerarr3[DC__NUM_DPP__MAX];
 	unsigned int        dummyintegerarr4[DC__NUM_DPP__MAX];
-	long                dummylongarr1[DC__NUM_DPP__MAX];
 	bool           dummysinglestring;
 	bool           SingleDPPViewportSizeSupportPerPlane[DC__NUM_DPP__MAX];
 	double         PlaneRequiredDISPCLKWithODMCombine2To1;
@@ -643,20 +645,19 @@ struct vba_vars_st {
 	unsigned int   TotalNumberOfSingleDPPPlanes[DC__VOLTAGE_STATES + 1][2];
 	bool           LinkDSCEnable;
 	bool           ODMCombine4To1SupportCheckOK[DC__VOLTAGE_STATES + 1];
-	bool ODMCombineEnableThisState[DC__NUM_DPP__MAX];
-	enum odm_combine_mode ODMCombineEnableTypeThisState[DC__NUM_DPP__MAX];
-	unsigned int   SwathWidthCThisState[DC__NUM_DPP__MAX];
+	enum odm_combine_mode ODMCombineEnableThisState[DC__NUM_DPP__MAX];
+	double   SwathWidthCThisState[DC__NUM_DPP__MAX];
 	bool           ViewportSizeSupportPerPlane[DC__NUM_DPP__MAX];
 	double         AlignedDCCMetaPitchY[DC__NUM_DPP__MAX];
 	double         AlignedDCCMetaPitchC[DC__NUM_DPP__MAX];
 
 	unsigned int NotEnoughUrgentLatencyHiding;
 	unsigned int NotEnoughUrgentLatencyHidingPre;
-	long PTEBufferSizeInRequestsForLuma;
-	long PTEBufferSizeInRequestsForChroma;
+	int PTEBufferSizeInRequestsForLuma;
+	int PTEBufferSizeInRequestsForChroma;
 
 	// Missing from VBA
-	long dpte_group_bytes_chroma;
+	int dpte_group_bytes_chroma;
 	unsigned int vm_group_bytes_chroma;
 	double dst_x_after_scaler;
 	double dst_y_after_scaler;
@@ -681,8 +682,8 @@ struct vba_vars_st {
 	double MinTTUVBlank[DC__NUM_DPP__MAX];
 	double BytePerPixelDETY[DC__NUM_DPP__MAX];
 	double BytePerPixelDETC[DC__NUM_DPP__MAX];
-	unsigned int SwathWidthY[DC__NUM_DPP__MAX];
-	unsigned int SwathWidthSingleDPPY[DC__NUM_DPP__MAX];
+	double SwathWidthY[DC__NUM_DPP__MAX];
+	double SwathWidthSingleDPPY[DC__NUM_DPP__MAX];
 	double CursorRequestDeliveryTime[DC__NUM_DPP__MAX];
 	double CursorRequestDeliveryTimePrefetch[DC__NUM_DPP__MAX];
 	double ReadBandwidthPlaneLuma[DC__NUM_DPP__MAX];
@@ -758,8 +759,8 @@ struct vba_vars_st {
 	double LinesInDETY[DC__NUM_DPP__MAX];
 	double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
 
-	unsigned int SwathWidthSingleDPPC[DC__NUM_DPP__MAX];
-	unsigned int SwathWidthC[DC__NUM_DPP__MAX];
+	double SwathWidthSingleDPPC[DC__NUM_DPP__MAX];
+	double SwathWidthC[DC__NUM_DPP__MAX];
 	unsigned int BytePerPixelY[DC__NUM_DPP__MAX];
 	unsigned int BytePerPixelC[DC__NUM_DPP__MAX];
 	long dummyinteger1;
@@ -777,6 +778,7 @@ struct vba_vars_st {
 	unsigned int DCCCMaxCompressedBlock[DC__NUM_DPP__MAX];
 	unsigned int DCCCIndependent64ByteBlock[DC__NUM_DPP__MAX];
 	double VStartupMargin;
+	bool NotEnoughTimeForDynamicMetadata;
 
 	/* Missing from VBA */
 	unsigned int MaximumMaxVStartupLines;
@@ -812,7 +814,7 @@ struct vba_vars_st {
 	unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX];
 	double HRatioChroma[DC__NUM_DPP__MAX];
 	double VRatioChroma[DC__NUM_DPP__MAX];
-	long WritebackSourceWidth[DC__NUM_DPP__MAX];
+	int WritebackSourceWidth[DC__NUM_DPP__MAX];
 
 	bool ModeIsSupported;
 	bool ODMCombine4To1Supported;
@@ -848,6 +850,58 @@ struct vba_vars_st {
 	unsigned int MaxNumHDMIFRLOutputs;
 	int    AudioSampleRate[DC__NUM_DPP__MAX];
 	int    AudioSampleLayout[DC__NUM_DPP__MAX];
+
+	int PercentMarginOverMinimumRequiredDCFCLK;
+	bool DynamicMetadataSupported[DC__VOLTAGE_STATES + 1][2];
+	enum immediate_flip_requirement ImmediateFlipRequirement;
+	double DETBufferSizeYThisState[DC__NUM_DPP__MAX];
+	double DETBufferSizeCThisState[DC__NUM_DPP__MAX];
+	bool NoUrgentLatencyHiding[DC__NUM_DPP__MAX];
+	bool NoUrgentLatencyHidingPre[DC__NUM_DPP__MAX];
+	int swath_width_luma_ub_this_state[DC__NUM_DPP__MAX];
+	int swath_width_chroma_ub_this_state[DC__NUM_DPP__MAX];
+	double UrgLatency[DC__VOLTAGE_STATES + 1];
+	double VActiveCursorBandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double VActivePixelBandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	bool NoTimeForPrefetch[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	bool NoTimeForDynamicMetadata[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double dpte_row_bandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double meta_row_bandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double DETBufferSizeYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double DETBufferSizeCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	int swath_width_luma_ub_all_states[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	int swath_width_chroma_ub_all_states[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	bool NotUrgentLatencyHiding[DC__VOLTAGE_STATES + 1][2];
+	unsigned int SwathHeightYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	unsigned int SwathHeightCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	unsigned int SwathWidthYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	unsigned int SwathWidthCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double TotalDPTERowBandwidth[DC__VOLTAGE_STATES + 1][2];
+	double TotalMetaRowBandwidth[DC__VOLTAGE_STATES + 1][2];
+	double TotalVActiveCursorBandwidth[DC__VOLTAGE_STATES + 1][2];
+	double TotalVActivePixelBandwidth[DC__VOLTAGE_STATES + 1][2];
+	bool UseMinimumRequiredDCFCLK;
+	double WritebackDelayTime[DC__NUM_DPP__MAX];
+	unsigned int DCCYIndependentBlock[DC__NUM_DPP__MAX];
+	unsigned int DCCCIndependentBlock[DC__NUM_DPP__MAX];
+	unsigned int dummyinteger15;
+	unsigned int dummyinteger16;
+	unsigned int dummyinteger17;
+	unsigned int dummyinteger18;
+	unsigned int dummyinteger19;
+	unsigned int dummyinteger20;
+	unsigned int dummyinteger21;
+	unsigned int dummyinteger22;
+	unsigned int dummyinteger23;
+	unsigned int dummyinteger24;
+	unsigned int dummyinteger25;
+	unsigned int dummyinteger26;
+	unsigned int dummyinteger27;
+	unsigned int dummyinteger28;
+	unsigned int dummyinteger29;
+	bool dummystring[DC__NUM_DPP__MAX];
+	double BPP;
+	enum odm_combine_policy ODMCombinePolicy;
 };
 
 bool CalculateMinAndMaxPrefetchMode(
@@ -869,4 +923,3 @@ double CalculateWriteBackDISPCLK(
 		unsigned int WritebackChromaLineBufferWidth);
 
 #endif /* _DML2_DISPLAY_MODE_VBA_H_ */
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
index ad8571f5a142..4c3e9cc30167 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
@@ -243,7 +243,7 @@ void dml1_extract_rq_regs(
 	rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height);
 	rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height);
 
-	/* FIXME: take the max between luma, chroma chunk size?
+	/* TODO: take the max between luma, chroma chunk size?
 	 * okay for now, as we are setting chunk_bytes to 8kb anyways
 	 */
 	if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { /*32kb */
@@ -602,7 +602,7 @@ static void get_surf_rq_param(
 	unsigned int log2_dpte_group_length;
 	unsigned int func_meta_row_height, func_dpte_row_height;
 
-	/* FIXME check if ppe apply for both luma and chroma in 422 case */
+	/* TODO check if ppe apply for both luma and chroma in 422 case */
 	if (is_chroma) {
 		vp_width = pipe_src_param.viewport_width_c / ppe;
 		vp_height = pipe_src_param.viewport_height_c;
@@ -1141,7 +1141,7 @@ void dml1_rq_dlg_get_dlg_params(
 	ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int) dml_pow(2, 13));
 	disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; /* 15 bits */
 
-	prefetch_xy_calc_in_dcfclk = 24.0; /* FIXME: ip_param */
+	prefetch_xy_calc_in_dcfclk = 24.0; /* TODO: ip_param */
 	min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz;
 	t_calc_us = prefetch_xy_calc_in_dcfclk / min_dcfclk_mhz;
 	min_ttu_vblank = dlg_sys_param.t_urg_wm_us;
@@ -1182,7 +1182,7 @@ void dml1_rq_dlg_get_dlg_params(
 	dcc_en = e2e_pipe_param.pipe.src.dcc;
 	dual_plane = is_dual_plane(
 			(enum source_format_class) e2e_pipe_param.pipe.src.source_format);
-	mode_422 = 0; /* FIXME */
+	mode_422 = 0; /* TODO */
 	access_dir = (e2e_pipe_param.pipe.src.source_scan == dm_vert); /* vp access direction: horizontal or vertical accessed */
 	bytes_per_element_l = get_bytes_per_element(
 			(enum source_format_class) e2e_pipe_param.pipe.src.source_format,
@@ -1837,7 +1837,7 @@ void dml1_rq_dlg_get_dlg_params(
 		cur0_width_ub = dml_ceil((double) cur0_src_width / (double) cur0_req_width, 1)
 				* (double) cur0_req_width;
 		cur0_req_per_width = cur0_width_ub / (double) cur0_req_width;
-		hactive_cur0 = (double) cur0_src_width / hratios_cur0; /* FIXME: oswin to think about what to do for cursor */
+		hactive_cur0 = (double) cur0_src_width / hratios_cur0; /* TODO: oswin to think about what to do for cursor */
 
 		if (vratio_pre_l <= 1.0) {
 			refcyc_per_req_delivery_pre_cur0 = hactive_cur0 * ref_freq_to_pix_freq
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c
index b953b02a1512..723af0b2dda0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c
@@ -24,7 +24,7 @@
  */
 
 #include "dml_common_defs.h"
-#include "../calcs/dcn_calc_math.h"
+#include "dcn_calc_math.h"
 
 #include "dml_inline_defs.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
index eca140da13d8..ded71ea82413 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
@@ -27,7 +27,7 @@
 #define __DML_INLINE_DEFS_H__
 
 #include "dml_common_defs.h"
-#include "../calcs/dcn_calc_math.h"
+#include "dcn_calc_math.h"
 #include "dml_logger.h"
 
 static inline double dml_min(double a, double b)
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
index 970737217e53..3f66868df171 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
@@ -1,7 +1,14 @@
+# SPDX-License-Identifier: MIT
 #
 # Makefile for the 'dsc' sub-component of DAL.
 
+ifdef CONFIG_X86
 dsc_ccflags := -mhard-float -msse
+endif
+
+ifdef CONFIG_PPC64
+dsc_ccflags := -mhard-float -maltivec
+endif
 
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
@@ -9,6 +16,7 @@ IS_OLD_GCC = 1
 endif
 endif
 
+ifdef CONFIG_X86
 ifdef IS_OLD_GCC
 # Stack alignment mismatch, proceed with caution.
 # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
@@ -17,6 +25,7 @@ dsc_ccflags += -mpreferred-stack-boundary=4
 else
 dsc_ccflags += -msse2
 endif
+endif
 
 CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc_dpi.o := $(dsc_ccflags)
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index 5995bcdfed54..8b78fcbfe746 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -22,30 +22,68 @@
  * Author: AMD
  */
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-#include "dc.h"
-#include "core_types.h"
+#include "dc_hw_types.h"
 #include "dsc.h"
 #include <drm/drm_dp_helper.h>
+#include "dc.h"
 
-struct dc_dsc_policy {
-	bool use_min_slices_h;
-	int max_slices_h; // Maximum available if 0
-	int min_sice_height; // Must not be less than 8
-	int max_target_bpp;
-	int min_target_bpp; // Minimum target bits per pixel
-};
+/* This module's internal functions */
 
-const struct dc_dsc_policy dsc_policy = {
-	.use_min_slices_h = true, // DSC Policy: Use minimum number of slices that fits the pixel clock
-	.max_slices_h = 0, // DSC Policy: Use max available slices (in our case 4 for or 8, depending on the mode)
-	.min_sice_height = 108, // DSC Policy: Use slice height recommended by VESA DSC Spreadsheet user guide
-	.max_target_bpp = 16,
-	.min_target_bpp = 8,
-};
+/* default DSC policy target bitrate limit is 16bpp */
+static uint32_t dsc_policy_max_target_bpp_limit = 16;
 
+static uint32_t dc_dsc_bandwidth_in_kbps_from_timing(
+	const struct dc_crtc_timing *timing)
+{
+	uint32_t bits_per_channel = 0;
+	uint32_t kbps;
 
-/* This module's internal functions */
+	if (timing->flags.DSC) {
+		kbps = (timing->pix_clk_100hz * timing->dsc_cfg.bits_per_pixel);
+		kbps = kbps / 160 + ((kbps % 160) ? 1 : 0);
+		return kbps;
+	}
+
+	switch (timing->display_color_depth) {
+	case COLOR_DEPTH_666:
+		bits_per_channel = 6;
+		break;
+	case COLOR_DEPTH_888:
+		bits_per_channel = 8;
+		break;
+	case COLOR_DEPTH_101010:
+		bits_per_channel = 10;
+		break;
+	case COLOR_DEPTH_121212:
+		bits_per_channel = 12;
+		break;
+	case COLOR_DEPTH_141414:
+		bits_per_channel = 14;
+		break;
+	case COLOR_DEPTH_161616:
+		bits_per_channel = 16;
+		break;
+	default:
+		break;
+	}
+
+	ASSERT(bits_per_channel != 0);
+
+	kbps = timing->pix_clk_100hz / 10;
+	kbps *= bits_per_channel;
+
+	if (timing->flags.Y_ONLY != 1) {
+		/*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/
+		kbps *= 3;
+		if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
+			kbps /= 2;
+		else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+			kbps = kbps * 2 / 3;
+	}
+
+	return kbps;
+
+}
 
 static bool dsc_buff_block_size_from_dpcd(int dpcd_buff_block_size, int *buff_block_size)
 {
@@ -178,16 +216,18 @@ static bool dsc_bpp_increment_div_from_dpcd(int bpp_increment_dpcd, uint32_t *bp
 }
 
 static void get_dsc_enc_caps(
-	const struct dc *dc,
+	const struct display_stream_compressor *dsc,
 	struct dsc_enc_caps *dsc_enc_caps,
 	int pixel_clock_100Hz)
 {
 	// This is a static HW query, so we can use any DSC
-	struct display_stream_compressor *dsc = dc->res_pool->dscs[0];
 
 	memset(dsc_enc_caps, 0, sizeof(struct dsc_enc_caps));
-	if (dsc)
+	if (dsc) {
 		dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz);
+		if (dsc->ctx->dc->debug.native422_support)
+			dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1;
+	}
 }
 
 /* Returns 'false' if no intersection was found for at least one capablity.
@@ -290,7 +330,7 @@ static void get_dsc_bandwidth_range(
 		struct dc_dsc_bw_range *range)
 {
 	/* native stream bandwidth */
-	range->stream_kbps = dc_bandwidth_in_kbps_from_timing(timing);
+	range->stream_kbps = dc_dsc_bandwidth_in_kbps_from_timing(timing);
 
 	/* max dsc target bpp */
 	range->max_kbps = dsc_div_by_10_round_up(max_bpp * timing->pix_clk_100hz);
@@ -512,6 +552,7 @@ static bool setup_dsc_config(
 		const struct dsc_enc_caps *dsc_enc_caps,
 		int target_bandwidth_kbps,
 		const struct dc_crtc_timing *timing,
+		int min_slice_height_override,
 		struct dc_dsc_config *dsc_cfg)
 {
 	struct dsc_enc_caps dsc_common_caps;
@@ -526,9 +567,11 @@ static bool setup_dsc_config(
 	bool is_dsc_possible = false;
 	int pic_height;
 	int slice_height;
+	struct dc_dsc_policy policy;
 
 	memset(dsc_cfg, 0, sizeof(struct dc_dsc_config));
 
+	dc_dsc_get_policy_for_timing(timing, &policy);
 	pic_width = timing->h_addressable + timing->h_border_left + timing->h_border_right;
 	pic_height = timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
 
@@ -544,7 +587,12 @@ static bool setup_dsc_config(
 		goto done;
 
 	if (target_bandwidth_kbps > 0) {
-		is_dsc_possible = decide_dsc_target_bpp_x16(&dsc_policy, &dsc_common_caps, target_bandwidth_kbps, timing, &target_bpp);
+		is_dsc_possible = decide_dsc_target_bpp_x16(
+				&policy,
+				&dsc_common_caps,
+				target_bandwidth_kbps,
+				timing,
+				&target_bpp);
 		dsc_cfg->bits_per_pixel = target_bpp;
 	}
 	if (!is_dsc_possible)
@@ -646,20 +694,20 @@ static bool setup_dsc_config(
 	if (!is_dsc_possible)
 		goto done;
 
-	if (dsc_policy.use_min_slices_h) {
+	if (policy.use_min_slices_h) {
 		if (min_slices_h > 0)
 			num_slices_h = min_slices_h;
 		else if (max_slices_h > 0) { // Fall back to max slices if min slices is not working out
-			if (dsc_policy.max_slices_h)
-				num_slices_h = min(dsc_policy.max_slices_h, max_slices_h);
+			if (policy.max_slices_h)
+				num_slices_h = min(policy.max_slices_h, max_slices_h);
 			else
 				num_slices_h = max_slices_h;
 		} else
 			is_dsc_possible = false;
 	} else {
 		if (max_slices_h > 0) {
-			if (dsc_policy.max_slices_h)
-				num_slices_h = min(dsc_policy.max_slices_h, max_slices_h);
+			if (policy.max_slices_h)
+				num_slices_h = min(policy.max_slices_h, max_slices_h);
 			else
 				num_slices_h = max_slices_h;
 		} else if (min_slices_h > 0) // Fall back to min slices if max slices is not possible
@@ -680,7 +728,10 @@ static bool setup_dsc_config(
 
 	// Slice height (i.e. number of slices per column): start with policy and pick the first one that height is divisible by.
 	// For 4:2:0 make sure the slice height is divisible by 2 as well.
-	slice_height = min(dsc_policy.min_sice_height, pic_height);
+	if (min_slice_height_override == 0)
+		slice_height = min(policy.min_slice_height, pic_height);
+	else
+		slice_height = min(min_slice_height_override, pic_height);
 
 	while (slice_height < pic_height && (pic_height % slice_height != 0 ||
 		(timing->pixel_encoding == PIXEL_ENCODING_YCBCR420 && slice_height % 2 != 0)))
@@ -709,7 +760,7 @@ done:
 	return is_dsc_possible;
 }
 
-bool dc_dsc_parse_dsc_dpcd(const uint8_t *dpcd_dsc_basic_data, const uint8_t *dpcd_dsc_ext_data, struct dsc_dec_dpcd_caps *dsc_sink_caps)
+bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, const uint8_t *dpcd_dsc_basic_data, const uint8_t *dpcd_dsc_ext_data, struct dsc_dec_dpcd_caps *dsc_sink_caps)
 {
 	if (!dpcd_dsc_basic_data)
 		return false;
@@ -762,6 +813,23 @@ bool dc_dsc_parse_dsc_dpcd(const uint8_t *dpcd_dsc_basic_data, const uint8_t *dp
 	if (!dsc_bpp_increment_div_from_dpcd(dpcd_dsc_basic_data[DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT], &dsc_sink_caps->bpp_increment_div))
 		return false;
 
+	if (dc->debug.dsc_bpp_increment_div) {
+		/* dsc_bpp_increment_div should onl be 1, 2, 4, 8 or 16, but rather than rejecting invalid values,
+		 * we'll accept all and get it into range. This also makes the above check against 0 redundant,
+		 * but that one stresses out the override will be only used if it's not 0.
+		 */
+		if (dc->debug.dsc_bpp_increment_div >= 1)
+			dsc_sink_caps->bpp_increment_div = 1;
+		if (dc->debug.dsc_bpp_increment_div >= 2)
+			dsc_sink_caps->bpp_increment_div = 2;
+		if (dc->debug.dsc_bpp_increment_div >= 4)
+			dsc_sink_caps->bpp_increment_div = 4;
+		if (dc->debug.dsc_bpp_increment_div >= 8)
+			dsc_sink_caps->bpp_increment_div = 8;
+		if (dc->debug.dsc_bpp_increment_div >= 16)
+			dsc_sink_caps->bpp_increment_div = 16;
+	}
+
 	/* Extended caps */
 	if (dpcd_dsc_ext_data == NULL) { // Extended DPCD DSC data can be null, e.g. because it doesn't apply to SST
 		dsc_sink_caps->branch_overall_throughput_0_mps = 0;
@@ -802,7 +870,8 @@ bool dc_dsc_parse_dsc_dpcd(const uint8_t *dpcd_dsc_basic_data, const uint8_t *dp
  * If DSC is not possible, leave '*range' untouched.
  */
 bool dc_dsc_compute_bandwidth_range(
-		const struct dc *dc,
+		const struct display_stream_compressor *dsc,
+		const uint32_t dsc_min_slice_height_override,
 		const uint32_t min_bpp,
 		const uint32_t max_bpp,
 		const struct dsc_dec_dpcd_caps *dsc_sink_caps,
@@ -814,16 +883,14 @@ bool dc_dsc_compute_bandwidth_range(
 	struct dsc_enc_caps dsc_common_caps;
 	struct dc_dsc_config config;
 
-	get_dsc_enc_caps(dc, &dsc_enc_caps, timing->pix_clk_100hz);
+	get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz);
 
 	is_dsc_possible = intersect_dsc_caps(dsc_sink_caps, &dsc_enc_caps,
 			timing->pixel_encoding, &dsc_common_caps);
 
 	if (is_dsc_possible)
-		is_dsc_possible = setup_dsc_config(dsc_sink_caps,
-				&dsc_enc_caps,
-				0,
-				timing, &config);
+		is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, 0, timing,
+				dsc_min_slice_height_override, &config);
 
 	if (is_dsc_possible)
 		get_dsc_bandwidth_range(min_bpp, max_bpp, &dsc_common_caps, timing, range);
@@ -832,8 +899,9 @@ bool dc_dsc_compute_bandwidth_range(
 }
 
 bool dc_dsc_compute_config(
-		const struct dc *dc,
+		const struct display_stream_compressor *dsc,
 		const struct dsc_dec_dpcd_caps *dsc_sink_caps,
+		const uint32_t dsc_min_slice_height_override,
 		uint32_t target_bandwidth_kbps,
 		const struct dc_crtc_timing *timing,
 		struct dc_dsc_config *dsc_cfg)
@@ -841,11 +909,74 @@ bool dc_dsc_compute_config(
 	bool is_dsc_possible = false;
 	struct dsc_enc_caps dsc_enc_caps;
 
-	get_dsc_enc_caps(dc, &dsc_enc_caps, timing->pix_clk_100hz);
+	get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz);
 	is_dsc_possible = setup_dsc_config(dsc_sink_caps,
 			&dsc_enc_caps,
 			target_bandwidth_kbps,
-			timing, dsc_cfg);
+			timing, dsc_min_slice_height_override, dsc_cfg);
 	return is_dsc_possible;
 }
-#endif /* CONFIG_DRM_AMD_DC_DSC_SUPPORT */
+
+void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, struct dc_dsc_policy *policy)
+{
+	uint32_t bpc = 0;
+
+	policy->min_target_bpp = 0;
+	policy->max_target_bpp = 0;
+
+	/* DSC Policy: Use minimum number of slices that fits the pixel clock */
+	policy->use_min_slices_h = true;
+
+	/* DSC Policy: Use max available slices
+	 * (in our case 4 for or 8, depending on the mode)
+	 */
+	policy->max_slices_h = 0;
+
+	/* DSC Policy: Use slice height recommended
+	 * by VESA DSC Spreadsheet user guide
+	 */
+	policy->min_slice_height = 108;
+
+	/* DSC Policy: follow DP specs with an internal upper limit to 16 bpp
+	 * for better interoperability
+	 */
+	switch (timing->display_color_depth) {
+	case COLOR_DEPTH_888:
+		bpc = 8;
+		break;
+	case COLOR_DEPTH_101010:
+		bpc = 10;
+		break;
+	case COLOR_DEPTH_121212:
+		bpc = 12;
+		break;
+	default:
+		return;
+	}
+	switch (timing->pixel_encoding) {
+	case PIXEL_ENCODING_RGB:
+	case PIXEL_ENCODING_YCBCR444:
+	case PIXEL_ENCODING_YCBCR422: /* assume no YCbCr422 native support */
+		/* DP specs limits to 8 */
+		policy->min_target_bpp = 8;
+		/* DP specs limits to 3 x bpc */
+		policy->max_target_bpp = 3 * bpc;
+		break;
+	case PIXEL_ENCODING_YCBCR420:
+		/* DP specs limits to 6 */
+		policy->min_target_bpp = 6;
+		/* DP specs limits to 1.5 x bpc assume bpc is an even number */
+		policy->max_target_bpp = bpc * 3 / 2;
+		break;
+	default:
+		return;
+	}
+	/* internal upper limit, default 16 bpp */
+	if (policy->max_target_bpp > dsc_policy_max_target_bpp_limit)
+		policy->max_target_bpp = dsc_policy_max_target_bpp_limit;
+}
+
+void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit)
+{
+	dsc_policy_max_target_bpp_limit = limit;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dscc_types.h b/drivers/gpu/drm/amd/display/dc/dsc/dscc_types.h
index 020ad8f685ea..9f70e87b3ecb 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dscc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dscc_types.h
@@ -1,4 +1,3 @@
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 
 /*
  * Copyright 2017 Advanced Micro Devices, Inc.
@@ -51,4 +50,3 @@ int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_par
 
 #endif
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/qp_tables.h b/drivers/gpu/drm/amd/display/dc/dsc/qp_tables.h
index f66d006eac5d..e5fac9f4181d 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/qp_tables.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/qp_tables.h
@@ -1,4 +1,3 @@
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 
 /*
  * Copyright 2017 Advanced Micro Devices, Inc.
@@ -703,4 +702,3 @@ const qp_table   qp_table_422_8bpc_max = {
 	{  16, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 4} }
 };
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
index ca51e83f8764..03ae15946c6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
@@ -1,4 +1,3 @@
-#if defined(CONFIG_DRM_AMD_DC_DSC_SUPPORT)
 
 /*
  * Copyright 2017 Advanced Micro Devices, Inc.
@@ -177,7 +176,6 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com
 {
 	float bpp_group;
 	float initial_xmit_delay_factor;
-	int source_bpp;
 	int padding_pixels;
 	int i;
 
@@ -217,8 +215,6 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com
 			rc->initial_xmit_delay++;
 	}
 
-	source_bpp = MODE_SELECT(bpc * 3, bpc * 2, bpc * 1.5);
-
 	rc->flatness_min_qp     = ((bpc == BPC_8) ?  (3) : ((bpc == BPC_10) ? (7)  : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
 	rc->flatness_max_qp     = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
 	rc->flatness_det_thresh = 2 << (bpc - 8);
@@ -255,4 +251,3 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com
 	rc->rc_buf_thresh[13] = 8064;
 }
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
index f1d6e793bc61..b6b1f09c2009 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
@@ -1,4 +1,3 @@
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 
 /*
  * Copyright 2017 Advanced Micro Devices, Inc.
@@ -82,4 +81,3 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com
 
 #endif
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
index 73172fd0b529..1f6e63b71456 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
@@ -1,4 +1,3 @@
-#if defined(CONFIG_DRM_AMD_DC_DSC_SUPPORT)
 /*
  * Copyright 2012-17 Advanced Micro Devices, Inc.
  *
@@ -144,4 +143,3 @@ int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_par
 	return ret;
 }
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/Makefile b/drivers/gpu/drm/amd/display/dc/gpio/Makefile
index b3062275711e..202baa210cc8 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/gpio/Makefile
@@ -61,26 +61,25 @@ AMD_DISPLAY_FILES += $(AMD_DAL_GPIO_DCE120)
 ###############################################################################
 # DCN 1x
 ###############################################################################
-ifdef CONFIG_DRM_AMD_DC_DCN1_0
+ifdef CONFIG_DRM_AMD_DC_DCN
 GPIO_DCN10 = hw_translate_dcn10.o hw_factory_dcn10.o
 
 AMD_DAL_GPIO_DCN10 = $(addprefix $(AMDDALPATH)/dc/gpio/dcn10/,$(GPIO_DCN10))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_GPIO_DCN10)
-endif
 
 ###############################################################################
 # DCN 2
 ###############################################################################
-ifdef CONFIG_DRM_AMD_DC_DCN2_0
 GPIO_DCN20 = hw_translate_dcn20.o hw_factory_dcn20.o
 
 AMD_DAL_GPIO_DCN20 = $(addprefix $(AMDDALPATH)/dc/gpio/dcn20/,$(GPIO_DCN20))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_GPIO_DCN20)
-endif
 
-ifdef CONFIG_DRM_AMD_DC_DCN2_1
+###############################################################################
+# DCN 21
+###############################################################################
 GPIO_DCN21 = hw_translate_dcn21.o hw_factory_dcn21.o
 
 AMD_DAL_GPIO_DCN21 = $(addprefix $(AMDDALPATH)/dc/gpio/dcn21/,$(GPIO_DCN21))
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.c
index 43a440385b43..83f798cb8b21 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.c
@@ -22,7 +22,6 @@
  * Authors: AMD
  *
  */
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #include "dm_services.h"
 #include "include/gpio_types.h"
 #include "../hw_factory.h"
@@ -110,6 +109,12 @@ static const struct ddc_registers ddc_data_regs_dcn[] = {
 	ddc_data_regs_dcn2(4),
 	ddc_data_regs_dcn2(5),
 	ddc_data_regs_dcn2(6),
+	{
+			DDC_GPIO_VGA_REG_LIST(DATA),
+			.ddc_setup = 0,
+			.phy_aux_cntl = 0,
+			.dc_gpio_aux_ctrl_5 = 0
+	}
 };
 
 static const struct ddc_registers ddc_clk_regs_dcn[] = {
@@ -119,6 +124,12 @@ static const struct ddc_registers ddc_clk_regs_dcn[] = {
 	ddc_clk_regs_dcn2(4),
 	ddc_clk_regs_dcn2(5),
 	ddc_clk_regs_dcn2(6),
+	{
+			DDC_GPIO_VGA_REG_LIST(CLK),
+			.ddc_setup = 0,
+			.phy_aux_cntl = 0,
+			.dc_gpio_aux_ctrl_5 = 0
+	}
 };
 
 static const struct ddc_sh_mask ddc_shift[] = {
@@ -246,4 +257,3 @@ void dal_hw_factory_dcn20_init(struct hw_factory *factory)
 	factory->funcs = &funcs;
 }
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.h b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.h
index 43a4ce7aa3bf..0fd9b315bd7a 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.h
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.h
@@ -22,7 +22,6 @@
  * Authors: AMD
  *
  */
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #ifndef __DAL_HW_FACTORY_DCN20_H__
 #define __DAL_HW_FACTORY_DCN20_H__
 
@@ -30,4 +29,3 @@
 void dal_hw_factory_dcn20_init(struct hw_factory *factory);
 
 #endif /* __DAL_HW_FACTORY_DCN20_H__ */
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.c
index 915e896e0e91..52ba62b3b5e4 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.c
@@ -26,7 +26,6 @@
 /*
  * Pre-requisites: headers required by header of this unit
  */
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #include "hw_translate_dcn20.h"
 
 #include "dm_services.h"
@@ -379,4 +378,3 @@ void dal_hw_translate_dcn20_init(struct hw_translate *tr)
 	tr->funcs = &funcs;
 }
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.h b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.h
index 01f52c7bed86..5f7a35530e26 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.h
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.h
@@ -22,7 +22,6 @@
  * Authors: AMD
  *
  */
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #ifndef __DAL_HW_TRANSLATE_DCN20_H__
 #define __DAL_HW_TRANSLATE_DCN20_H__
 
@@ -32,4 +31,3 @@ struct hw_translate;
 void dal_hw_translate_dcn20_init(struct hw_translate *tr);
 
 #endif /* __DAL_HW_TRANSLATE_DCN20_H__ */
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c
index 8572678f8d4f..907c5911eb9e 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c
@@ -22,7 +22,6 @@
  * Authors: AMD
  *
  */
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #include "dm_services.h"
 #include "include/gpio_types.h"
 #include "../hw_factory.h"
@@ -239,4 +238,3 @@ void dal_hw_factory_dcn21_init(struct hw_factory *factory)
 	factory->funcs = &funcs;
 }
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.h b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.h
index 2443f9e7afbf..4949e0c7fa06 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.h
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.h
@@ -22,7 +22,6 @@
  * Authors: AMD
  *
  */
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #ifndef __DAL_HW_FACTORY_DCN21_H__
 #define __DAL_HW_FACTORY_DCN21_H__
 
@@ -30,4 +29,3 @@
 void dal_hw_factory_dcn21_init(struct hw_factory *factory);
 
 #endif /* __DAL_HW_FACTORY_DCN20_H__ */
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c
index fbb58fb8c318..291966efe63d 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c
@@ -26,7 +26,6 @@
 /*
  * Pre-requisites: headers required by header of this unit
  */
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #include "hw_translate_dcn21.h"
 
 #include "dm_services.h"
@@ -382,4 +381,3 @@ void dal_hw_translate_dcn21_init(struct hw_translate *tr)
 	tr->funcs = &funcs;
 }
 
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.h b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.h
index 2bfaac24c574..9462b0a65200 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.h
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.h
@@ -22,7 +22,6 @@
  * Authors: AMD
  *
  */
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #ifndef __DAL_HW_TRANSLATE_DCN21_H__
 #define __DAL_HW_TRANSLATE_DCN21_H__
 
@@ -32,4 +31,3 @@ struct hw_translate;
 void dal_hw_translate_dcn21_init(struct hw_translate *tr);
 
 #endif /* __DAL_HW_TRANSLATE_DCN21_H__ */
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/ddc_regs.h b/drivers/gpu/drm/amd/display/dc/gpio/ddc_regs.h
index f91e85b04956..308a543178a5 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/ddc_regs.h
+++ b/drivers/gpu/drm/amd/display/dc/gpio/ddc_regs.h
@@ -48,13 +48,11 @@
 	DDC_GPIO_REG_LIST(cd,id),\
 	.ddc_setup = REG(DC_I2C_DDC ## id ## _SETUP)
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	#define DDC_REG_LIST_DCN2(cd, id) \
 	DDC_GPIO_REG_LIST(cd, id),\
 	.ddc_setup = REG(DC_I2C_DDC ## id ## _SETUP),\
 	.phy_aux_cntl = REG(PHY_AUX_CNTL), \
 	.dc_gpio_aux_ctrl_5 = REG(DC_GPIO_AUX_CTRL_5)
-#endif
 
 #define DDC_GPIO_VGA_REG_LIST_ENTRY(type,cd)\
 	.type ## _reg =   REG(DC_GPIO_DDCVGA_ ## type),\
@@ -90,13 +88,11 @@
 	DDC_GPIO_I2C_REG_LIST(cd),\
 	.ddc_setup = 0
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define DDC_I2C_REG_LIST_DCN2(cd) \
 	DDC_GPIO_I2C_REG_LIST(cd),\
 	.ddc_setup = 0,\
 	.phy_aux_cntl = REG(PHY_AUX_CNTL), \
 	.dc_gpio_aux_ctrl_5 = REG(DC_GPIO_AUX_CTRL_5)
-#endif
 #define DDC_MASK_SH_LIST_COMMON(mask_sh) \
 		SF_DDC(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_ENABLE, mask_sh),\
 		SF_DDC(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_EDID_DETECT_ENABLE, mask_sh),\
@@ -110,22 +106,18 @@
 		SF_DDC(DC_GPIO_I2CPAD_MASK, DC_GPIO_SDA_PD_DIS, mask_sh),\
 		SF_DDC(DC_GPIO_I2CPAD_MASK, DC_GPIO_SCL_PD_DIS, mask_sh)
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define DDC_MASK_SH_LIST_DCN2(mask_sh, cd) \
 	{DDC_MASK_SH_LIST_COMMON(mask_sh),\
 	0,\
 	0,\
 	(PHY_AUX_CNTL__AUX## cd ##_PAD_RXSEL## mask_sh),\
 	(DC_GPIO_AUX_CTRL_5__DDC_PAD## cd ##_I2CMODE## mask_sh)}
-#endif
 
 struct ddc_registers {
 	struct gpio_registers gpio;
 	uint32_t ddc_setup;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	uint32_t phy_aux_cntl;
 	uint32_t dc_gpio_aux_ctrl_5;
-#endif
 };
 
 struct ddc_sh_mask {
@@ -140,11 +132,9 @@ struct ddc_sh_mask {
 	/* i2cpad_mask */
 	uint32_t DC_GPIO_SDA_PD_DIS;
 	uint32_t DC_GPIO_SCL_PD_DIS;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	//phy_aux_cntl
 	uint32_t AUX_PAD_RXSEL;
 	uint32_t DDC_PAD_I2CMODE;
-#endif
 };
 
 
@@ -180,7 +170,6 @@ struct ddc_sh_mask {
 {\
 	DDC_I2C_REG_LIST(SCL)\
 }
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define ddc_data_regs_dcn2(id) \
 {\
 	DDC_REG_LIST_DCN2(DATA, id)\
@@ -200,7 +189,6 @@ struct ddc_sh_mask {
 {\
 	DDC_REG_LIST_DCN2(SCL)\
 }
-#endif
 
 
 #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_GPIO_DDC_REGS_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c
index f8f85490e77e..f67c18375bfd 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c
@@ -321,8 +321,6 @@ void dal_gpio_destroy(
 		return;
 	}
 
-	dal_gpio_close(*gpio);
-
 	switch ((*gpio)->id) {
 	case GPIO_ID_DDC_DATA:
 		kfree((*gpio)->hw_container.ddc);
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
index d03165e71dc6..92280cc05e2d 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
@@ -169,7 +169,6 @@ void dal_gpio_destroy_generic_mux(
 		return;
 	}
 
-	dal_gpio_close(*mux);
 	dal_gpio_destroy(mux);
 	kfree(*mux);
 
@@ -460,7 +459,6 @@ void dal_gpio_destroy_irq(
 		return;
 	}
 
-	dal_gpio_close(*irq);
 	dal_gpio_destroy(irq);
 	kfree(*irq);
 
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
index 1c12961f6472..1ae153eab31d 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
@@ -48,18 +48,18 @@
 
 struct gpio;
 
-static void destruct(
+static void dal_hw_ddc_destruct(
 	struct hw_ddc *pin)
 {
 	dal_hw_gpio_destruct(&pin->base);
 }
 
-static void destroy(
+static void dal_hw_ddc_destroy(
 	struct hw_gpio_pin **ptr)
 {
 	struct hw_ddc *pin = HW_DDC_FROM_BASE(*ptr);
 
-	destruct(pin);
+	dal_hw_ddc_destruct(pin);
 
 	kfree(pin);
 
@@ -150,7 +150,6 @@ static enum gpio_result set_config(
 					AUX_PAD1_MODE, 0);
 		}
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 		if (ddc->regs->dc_gpio_aux_ctrl_5 != 0) {
 				REG_UPDATE(dc_gpio_aux_ctrl_5, DDC_PAD_I2CMODE, 1);
 		}
@@ -158,7 +157,6 @@ static enum gpio_result set_config(
 		if (ddc->regs->phy_aux_cntl != 0) {
 				REG_UPDATE(phy_aux_cntl, AUX_PAD_RXSEL, 1);
 		}
-#endif
 		return GPIO_RESULT_OK;
 	case GPIO_DDC_CONFIG_TYPE_MODE_AUX:
 		/* set the AUX pad mode */
@@ -166,12 +164,10 @@ static enum gpio_result set_config(
 			REG_SET(gpio.MASK_reg, regval,
 					AUX_PAD1_MODE, 1);
 		}
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 		if (ddc->regs->dc_gpio_aux_ctrl_5 != 0) {
 			REG_UPDATE(dc_gpio_aux_ctrl_5,
 					DDC_PAD_I2CMODE, 0);
 		}
-#endif
 
 		return GPIO_RESULT_OK;
 	case GPIO_DDC_CONFIG_TYPE_POLL_FOR_CONNECT:
@@ -211,7 +207,7 @@ static enum gpio_result set_config(
 }
 
 static const struct hw_gpio_pin_funcs funcs = {
-	.destroy = destroy,
+	.destroy = dal_hw_ddc_destroy,
 	.open = dal_hw_gpio_open,
 	.get_value = dal_hw_gpio_get_value,
 	.set_value = dal_hw_gpio_set_value,
@@ -220,7 +216,7 @@ static const struct hw_gpio_pin_funcs funcs = {
 	.close = dal_hw_gpio_close,
 };
 
-static void construct(
+static void dal_hw_ddc_construct(
 	struct hw_ddc *ddc,
 	enum gpio_id id,
 	uint32_t en,
@@ -247,7 +243,7 @@ void dal_hw_ddc_init(
 		return;
 	}
 
-	construct(*hw_ddc, id, en, ctx);
+	dal_hw_ddc_construct(*hw_ddc, id, en, ctx);
 }
 
 struct hw_gpio_pin *dal_hw_ddc_get_pin(struct gpio *gpio)
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
index fa9f1d055ec8..d2d36d48caaa 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
@@ -45,15 +45,11 @@
 #include "dce80/hw_factory_dce80.h"
 #include "dce110/hw_factory_dce110.h"
 #include "dce120/hw_factory_dce120.h"
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 #include "dcn10/hw_factory_dcn10.h"
 #endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #include "dcn20/hw_factory_dcn20.h"
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #include "dcn21/hw_factory_dcn21.h"
-#endif
 
 #include "diagnostics/hw_factory_diag.h"
 
@@ -90,19 +86,15 @@ bool dal_hw_factory_init(
 	case DCE_VERSION_12_1:
 		dal_hw_factory_dce120_init(factory);
 		return true;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	case DCN_VERSION_1_0:
 	case DCN_VERSION_1_01:
 		dal_hw_factory_dcn10_init(factory);
 		return true;
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	case DCN_VERSION_2_0:
 		dal_hw_factory_dcn20_init(factory);
 		return true;
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	case DCN_VERSION_2_1:
 		dal_hw_factory_dcn21_init(factory);
 		return true;
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_generic.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_generic.c
index 69b899741f6d..f9e847e6555d 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_generic.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_generic.c
@@ -46,22 +46,13 @@
 
 struct gpio;
 
-static void dal_hw_generic_construct(
-	struct hw_generic *pin,
-	enum gpio_id id,
-	uint32_t en,
-	struct dc_context *ctx)
-{
-	dal_hw_gpio_construct(&pin->base, id, en, ctx);
-}
-
 static void dal_hw_generic_destruct(
 	struct hw_generic *pin)
 {
 	dal_hw_gpio_destruct(&pin->base);
 }
 
-static void destroy(
+static void dal_hw_generic_destroy(
 	struct hw_gpio_pin **ptr)
 {
 	struct hw_generic *generic = HW_GENERIC_FROM_BASE(*ptr);
@@ -90,7 +81,7 @@ static enum gpio_result set_config(
 }
 
 static const struct hw_gpio_pin_funcs funcs = {
-	.destroy = destroy,
+	.destroy = dal_hw_generic_destroy,
 	.open = dal_hw_gpio_open,
 	.get_value = dal_hw_gpio_get_value,
 	.set_value = dal_hw_gpio_set_value,
@@ -99,14 +90,14 @@ static const struct hw_gpio_pin_funcs funcs = {
 	.close = dal_hw_gpio_close,
 };
 
-static void construct(
-	struct hw_generic *generic,
+static void dal_hw_generic_construct(
+	struct hw_generic *pin,
 	enum gpio_id id,
 	uint32_t en,
 	struct dc_context *ctx)
 {
-	dal_hw_generic_construct(generic, id, en, ctx);
-	generic->base.base.funcs = &funcs;
+	dal_hw_gpio_construct(&pin->base, id, en, ctx);
+	pin->base.base.funcs = &funcs;
 }
 
 void dal_hw_generic_init(
@@ -126,7 +117,7 @@ void dal_hw_generic_init(
 		return;
 	}
 
-	construct(*hw_generic, id, en, ctx);
+	dal_hw_generic_construct(*hw_generic, id, en, ctx);
 }
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c
index 00c9bcf660a3..692f29de7797 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c
@@ -46,34 +46,18 @@
 
 struct gpio;
 
-static void dal_hw_hpd_construct(
-	struct hw_hpd *pin,
-	enum gpio_id id,
-	uint32_t en,
-	struct dc_context *ctx)
-{
-	dal_hw_gpio_construct(&pin->base, id, en, ctx);
-}
-
 static void dal_hw_hpd_destruct(
 	struct hw_hpd *pin)
 {
 	dal_hw_gpio_destruct(&pin->base);
 }
 
-
-static void destruct(
-	struct hw_hpd *hpd)
-{
-	dal_hw_hpd_destruct(hpd);
-}
-
-static void destroy(
+static void dal_hw_hpd_destroy(
 	struct hw_gpio_pin **ptr)
 {
 	struct hw_hpd *hpd = HW_HPD_FROM_BASE(*ptr);
 
-	destruct(hpd);
+	dal_hw_hpd_destruct(hpd);
 
 	kfree(hpd);
 
@@ -120,7 +104,7 @@ static enum gpio_result set_config(
 }
 
 static const struct hw_gpio_pin_funcs funcs = {
-	.destroy = destroy,
+	.destroy = dal_hw_hpd_destroy,
 	.open = dal_hw_gpio_open,
 	.get_value = get_value,
 	.set_value = dal_hw_gpio_set_value,
@@ -129,14 +113,14 @@ static const struct hw_gpio_pin_funcs funcs = {
 	.close = dal_hw_gpio_close,
 };
 
-static void construct(
-	struct hw_hpd *hpd,
+static void dal_hw_hpd_construct(
+	struct hw_hpd *pin,
 	enum gpio_id id,
 	uint32_t en,
 	struct dc_context *ctx)
 {
-	dal_hw_hpd_construct(hpd, id, en, ctx);
-	hpd->base.base.funcs = &funcs;
+	dal_hw_gpio_construct(&pin->base, id, en, ctx);
+	pin->base.base.funcs = &funcs;
 }
 
 void dal_hw_hpd_init(
@@ -156,7 +140,7 @@ void dal_hw_hpd_init(
 		return;
 	}
 
-	construct(*hw_hpd, id, en, ctx);
+	dal_hw_hpd_construct(*hw_hpd, id, en, ctx);
 }
 
 struct hw_gpio_pin *dal_hw_hpd_get_pin(struct gpio *gpio)
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
index f2046f55d6a8..5d396657a1ee 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
@@ -43,15 +43,11 @@
 #include "dce80/hw_translate_dce80.h"
 #include "dce110/hw_translate_dce110.h"
 #include "dce120/hw_translate_dce120.h"
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 #include "dcn10/hw_translate_dcn10.h"
 #endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #include "dcn20/hw_translate_dcn20.h"
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #include "dcn21/hw_translate_dcn21.h"
-#endif
 
 #include "diagnostics/hw_translate_diag.h"
 
@@ -85,19 +81,15 @@ bool dal_hw_translate_init(
 	case DCE_VERSION_12_1:
 		dal_hw_translate_dce120_init(translate);
 		return true;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	case DCN_VERSION_1_0:
 	case DCN_VERSION_1_01:
 		dal_hw_translate_dcn10_init(translate);
 		return true;
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	case DCN_VERSION_2_0:
 		dal_hw_translate_dcn20_init(translate);
 		return true;
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	case DCN_VERSION_2_1:
 		dal_hw_translate_dcn21_init(translate);
 		return true;
diff --git a/drivers/gpu/drm/amd/display/dc/hdcp/Makefile b/drivers/gpu/drm/amd/display/dc/hdcp/Makefile
new file mode 100644
index 000000000000..4170b6eb9ec0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hdcp/Makefile
@@ -0,0 +1,28 @@
+# Copyright 2019 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'hdcp' sub-component of DAL.
+#
+
+HDCP_MSG = hdcp_msg.o
+
+AMD_DAL_HDCP_MSG = $(addprefix $(AMDDALPATH)/dc/hdcp/,$(HDCP_MSG))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HDCP_MSG)
diff --git a/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c
new file mode 100644
index 000000000000..6f730b5bfe42
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <linux/slab.h>
+
+#include "dm_services.h"
+#include "dm_helpers.h"
+#include "include/hdcp_types.h"
+#include "include/i2caux_interface.h"
+#include "include/signal_types.h"
+#include "core_types.h"
+#include "dc_link_ddc.h"
+#include "link_hwss.h"
+
+#define DC_LOGGER \
+	link->ctx->logger
+#define HDCP14_KSV_SIZE 5
+#define HDCP14_MAX_KSV_FIFO_SIZE 127*HDCP14_KSV_SIZE
+
+static const bool hdcp_cmd_is_read[] = {
+	[HDCP_MESSAGE_ID_READ_BKSV] = true,
+	[HDCP_MESSAGE_ID_READ_RI_R0] = true,
+	[HDCP_MESSAGE_ID_READ_PJ] = true,
+	[HDCP_MESSAGE_ID_WRITE_AKSV] = false,
+	[HDCP_MESSAGE_ID_WRITE_AINFO] = false,
+	[HDCP_MESSAGE_ID_WRITE_AN] = false,
+	[HDCP_MESSAGE_ID_READ_VH_X] = true,
+	[HDCP_MESSAGE_ID_READ_VH_0] = true,
+	[HDCP_MESSAGE_ID_READ_VH_1] = true,
+	[HDCP_MESSAGE_ID_READ_VH_2] = true,
+	[HDCP_MESSAGE_ID_READ_VH_3] = true,
+	[HDCP_MESSAGE_ID_READ_VH_4] = true,
+	[HDCP_MESSAGE_ID_READ_BCAPS] = true,
+	[HDCP_MESSAGE_ID_READ_BSTATUS] = true,
+	[HDCP_MESSAGE_ID_READ_KSV_FIFO] = true,
+	[HDCP_MESSAGE_ID_READ_BINFO] = true,
+	[HDCP_MESSAGE_ID_HDCP2VERSION] = true,
+	[HDCP_MESSAGE_ID_RX_CAPS] = true,
+	[HDCP_MESSAGE_ID_WRITE_AKE_INIT] = false,
+	[HDCP_MESSAGE_ID_READ_AKE_SEND_CERT] = true,
+	[HDCP_MESSAGE_ID_WRITE_AKE_NO_STORED_KM] = false,
+	[HDCP_MESSAGE_ID_WRITE_AKE_STORED_KM] = false,
+	[HDCP_MESSAGE_ID_READ_AKE_SEND_H_PRIME] = true,
+	[HDCP_MESSAGE_ID_READ_AKE_SEND_PAIRING_INFO] = true,
+	[HDCP_MESSAGE_ID_WRITE_LC_INIT] = false,
+	[HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME] = true,
+	[HDCP_MESSAGE_ID_WRITE_SKE_SEND_EKS] = false,
+	[HDCP_MESSAGE_ID_READ_REPEATER_AUTH_SEND_RECEIVERID_LIST] = true,
+	[HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_SEND_ACK] = false,
+	[HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_STREAM_MANAGE] = false,
+	[HDCP_MESSAGE_ID_READ_REPEATER_AUTH_STREAM_READY] = true,
+	[HDCP_MESSAGE_ID_READ_RXSTATUS] = true,
+	[HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE] = false
+};
+
+static const uint8_t hdcp_i2c_offsets[] = {
+	[HDCP_MESSAGE_ID_READ_BKSV] = 0x0,
+	[HDCP_MESSAGE_ID_READ_RI_R0] = 0x8,
+	[HDCP_MESSAGE_ID_READ_PJ] = 0xA,
+	[HDCP_MESSAGE_ID_WRITE_AKSV] = 0x10,
+	[HDCP_MESSAGE_ID_WRITE_AINFO] = 0x15,
+	[HDCP_MESSAGE_ID_WRITE_AN] = 0x18,
+	[HDCP_MESSAGE_ID_READ_VH_X] = 0x20,
+	[HDCP_MESSAGE_ID_READ_VH_0] = 0x20,
+	[HDCP_MESSAGE_ID_READ_VH_1] = 0x24,
+	[HDCP_MESSAGE_ID_READ_VH_2] = 0x28,
+	[HDCP_MESSAGE_ID_READ_VH_3] = 0x2C,
+	[HDCP_MESSAGE_ID_READ_VH_4] = 0x30,
+	[HDCP_MESSAGE_ID_READ_BCAPS] = 0x40,
+	[HDCP_MESSAGE_ID_READ_BSTATUS] = 0x41,
+	[HDCP_MESSAGE_ID_READ_KSV_FIFO] = 0x43,
+	[HDCP_MESSAGE_ID_READ_BINFO] = 0xFF,
+	[HDCP_MESSAGE_ID_HDCP2VERSION] = 0x50,
+	[HDCP_MESSAGE_ID_WRITE_AKE_INIT] = 0x60,
+	[HDCP_MESSAGE_ID_READ_AKE_SEND_CERT] = 0x80,
+	[HDCP_MESSAGE_ID_WRITE_AKE_NO_STORED_KM] = 0x60,
+	[HDCP_MESSAGE_ID_WRITE_AKE_STORED_KM] = 0x60,
+	[HDCP_MESSAGE_ID_READ_AKE_SEND_H_PRIME] = 0x80,
+	[HDCP_MESSAGE_ID_READ_AKE_SEND_PAIRING_INFO] = 0x80,
+	[HDCP_MESSAGE_ID_WRITE_LC_INIT] = 0x60,
+	[HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME] = 0x80,
+	[HDCP_MESSAGE_ID_WRITE_SKE_SEND_EKS] = 0x60,
+	[HDCP_MESSAGE_ID_READ_REPEATER_AUTH_SEND_RECEIVERID_LIST] = 0x80,
+	[HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_SEND_ACK] = 0x60,
+	[HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_STREAM_MANAGE] = 0x60,
+	[HDCP_MESSAGE_ID_READ_REPEATER_AUTH_STREAM_READY] = 0x80,
+	[HDCP_MESSAGE_ID_READ_RXSTATUS] = 0x70
+};
+
+struct protection_properties {
+	bool supported;
+	bool (*process_transaction)(
+		struct dc_link *link,
+		struct hdcp_protection_message *message_info);
+};
+
+static const struct protection_properties non_supported_protection = {
+	.supported = false
+};
+
+static bool hdmi_14_process_transaction(
+	struct dc_link *link,
+	struct hdcp_protection_message *message_info)
+{
+	uint8_t *buff = NULL;
+	bool result;
+	const uint8_t hdcp_i2c_addr_link_primary = 0x3a; /* 0x74 >> 1*/
+	const uint8_t hdcp_i2c_addr_link_secondary = 0x3b; /* 0x76 >> 1*/
+	struct i2c_command i2c_command;
+	uint8_t offset = hdcp_i2c_offsets[message_info->msg_id];
+	struct i2c_payload i2c_payloads[] = {
+		{ true, 0, 1, &offset },
+		/* actual hdcp payload, will be filled later, zeroed for now*/
+		{ 0 }
+	};
+
+	switch (message_info->link) {
+	case HDCP_LINK_SECONDARY:
+		i2c_payloads[0].address = hdcp_i2c_addr_link_secondary;
+		i2c_payloads[1].address = hdcp_i2c_addr_link_secondary;
+		break;
+	case HDCP_LINK_PRIMARY:
+	default:
+		i2c_payloads[0].address = hdcp_i2c_addr_link_primary;
+		i2c_payloads[1].address = hdcp_i2c_addr_link_primary;
+		break;
+	}
+
+	if (hdcp_cmd_is_read[message_info->msg_id]) {
+		i2c_payloads[1].write = false;
+		i2c_command.number_of_payloads = ARRAY_SIZE(i2c_payloads);
+		i2c_payloads[1].length = message_info->length;
+		i2c_payloads[1].data = message_info->data;
+	} else {
+		i2c_command.number_of_payloads = 1;
+		buff = kzalloc(message_info->length + 1, GFP_KERNEL);
+
+		if (!buff)
+			return false;
+
+		buff[0] = offset;
+		memmove(&buff[1], message_info->data, message_info->length);
+		i2c_payloads[0].length = message_info->length + 1;
+		i2c_payloads[0].data = buff;
+	}
+
+	i2c_command.payloads = i2c_payloads;
+	i2c_command.engine = I2C_COMMAND_ENGINE_HW;//only HW
+	i2c_command.speed = link->ddc->ctx->dc->caps.i2c_speed_in_khz;
+
+	result = dm_helpers_submit_i2c(
+			link->ctx,
+			link,
+			&i2c_command);
+	kfree(buff);
+
+	return result;
+}
+
+static const struct protection_properties hdmi_14_protection = {
+	.supported = true,
+	.process_transaction = hdmi_14_process_transaction
+};
+
+static const uint32_t hdcp_dpcd_addrs[] = {
+	[HDCP_MESSAGE_ID_READ_BKSV] = 0x68000,
+	[HDCP_MESSAGE_ID_READ_RI_R0] = 0x68005,
+	[HDCP_MESSAGE_ID_READ_PJ] = 0xFFFFFFFF,
+	[HDCP_MESSAGE_ID_WRITE_AKSV] = 0x68007,
+	[HDCP_MESSAGE_ID_WRITE_AINFO] = 0x6803B,
+	[HDCP_MESSAGE_ID_WRITE_AN] = 0x6800c,
+	[HDCP_MESSAGE_ID_READ_VH_X] = 0x68014,
+	[HDCP_MESSAGE_ID_READ_VH_0] = 0x68014,
+	[HDCP_MESSAGE_ID_READ_VH_1] = 0x68018,
+	[HDCP_MESSAGE_ID_READ_VH_2] = 0x6801c,
+	[HDCP_MESSAGE_ID_READ_VH_3] = 0x68020,
+	[HDCP_MESSAGE_ID_READ_VH_4] = 0x68024,
+	[HDCP_MESSAGE_ID_READ_BCAPS] = 0x68028,
+	[HDCP_MESSAGE_ID_READ_BSTATUS] = 0x68029,
+	[HDCP_MESSAGE_ID_READ_KSV_FIFO] = 0x6802c,
+	[HDCP_MESSAGE_ID_READ_BINFO] = 0x6802a,
+	[HDCP_MESSAGE_ID_RX_CAPS] = 0x6921d,
+	[HDCP_MESSAGE_ID_WRITE_AKE_INIT] = 0x69000,
+	[HDCP_MESSAGE_ID_READ_AKE_SEND_CERT] = 0x6900b,
+	[HDCP_MESSAGE_ID_WRITE_AKE_NO_STORED_KM] = 0x69220,
+	[HDCP_MESSAGE_ID_WRITE_AKE_STORED_KM] = 0x692a0,
+	[HDCP_MESSAGE_ID_READ_AKE_SEND_H_PRIME] = 0x692c0,
+	[HDCP_MESSAGE_ID_READ_AKE_SEND_PAIRING_INFO] = 0x692e0,
+	[HDCP_MESSAGE_ID_WRITE_LC_INIT] = 0x692f0,
+	[HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME] = 0x692f8,
+	[HDCP_MESSAGE_ID_WRITE_SKE_SEND_EKS] = 0x69318,
+	[HDCP_MESSAGE_ID_READ_REPEATER_AUTH_SEND_RECEIVERID_LIST] = 0x69330,
+	[HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_SEND_ACK] = 0x693e0,
+	[HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_STREAM_MANAGE] = 0x693f0,
+	[HDCP_MESSAGE_ID_READ_REPEATER_AUTH_STREAM_READY] = 0x69473,
+	[HDCP_MESSAGE_ID_READ_RXSTATUS] = 0x69493,
+	[HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE] = 0x69494
+};
+
+static bool dpcd_access_helper(
+	struct dc_link *link,
+	uint32_t length,
+	uint8_t *data,
+	uint32_t dpcd_addr,
+	bool is_read)
+{
+	enum dc_status status;
+	uint32_t cur_length = 0;
+	uint32_t offset = 0;
+	uint32_t ksv_read_size = 0x6803b - 0x6802c;
+
+	/* Read KSV, need repeatedly handle */
+	if (dpcd_addr == 0x6802c) {
+		if (length % HDCP14_KSV_SIZE) {
+			DC_LOG_ERROR("%s: KsvFifo Size(%d) is not a multiple of HDCP14_KSV_SIZE(%d)\n",
+				__func__,
+				length,
+				HDCP14_KSV_SIZE);
+		}
+		if (length > HDCP14_MAX_KSV_FIFO_SIZE) {
+			DC_LOG_ERROR("%s: KsvFifo Size(%d) is greater than HDCP14_MAX_KSV_FIFO_SIZE(%d)\n",
+				__func__,
+				length,
+				HDCP14_MAX_KSV_FIFO_SIZE);
+		}
+
+		DC_LOG_ERROR("%s: Reading %d Ksv(s) from KsvFifo\n",
+			__func__,
+			length / HDCP14_KSV_SIZE);
+
+		while (length > 0) {
+			if (length > ksv_read_size) {
+				status = core_link_read_dpcd(
+					link,
+					dpcd_addr + offset,
+					data + offset,
+					ksv_read_size);
+
+				data += ksv_read_size;
+				length -= ksv_read_size;
+			} else {
+				status = core_link_read_dpcd(
+					link,
+					dpcd_addr + offset,
+					data + offset,
+					length);
+
+				data += length;
+				length = 0;
+			}
+
+			if (status != DC_OK)
+				return false;
+		}
+	} else {
+		while (length > 0) {
+			if (length > DEFAULT_AUX_MAX_DATA_SIZE)
+				cur_length = DEFAULT_AUX_MAX_DATA_SIZE;
+			else
+				cur_length = length;
+
+			if (is_read) {
+				status = core_link_read_dpcd(
+					link,
+					dpcd_addr + offset,
+					data + offset,
+					cur_length);
+			} else {
+				status = core_link_write_dpcd(
+					link,
+					dpcd_addr + offset,
+					data + offset,
+					cur_length);
+			}
+
+			if (status != DC_OK)
+				return false;
+
+			length -= cur_length;
+			offset += cur_length;
+		}
+	}
+	return true;
+}
+
+static bool dp_11_process_transaction(
+	struct dc_link *link,
+	struct hdcp_protection_message *message_info)
+{
+	return dpcd_access_helper(
+		link,
+		message_info->length,
+		message_info->data,
+		hdcp_dpcd_addrs[message_info->msg_id],
+		hdcp_cmd_is_read[message_info->msg_id]);
+}
+
+static const struct protection_properties dp_11_protection = {
+	.supported = true,
+	.process_transaction = dp_11_process_transaction
+};
+
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h
index fd39e2abe2ed..4ead89dd7c41 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h
@@ -43,10 +43,8 @@ enum dc_status {
 	DC_FAIL_BANDWIDTH_VALIDATE = 13, /* BW and Watermark validation */
 	DC_FAIL_SCALING = 14,
 	DC_FAIL_DP_LINK_TRAINING = 15,
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	DC_FAIL_DSC_VALIDATE = 16,
 	DC_NO_DSC_RESOURCE = 17,
-#endif
 	DC_FAIL_UNSUPPORTED_1 = 18,
 	DC_FAIL_CLK_EXCEED_MAX = 21,
 	DC_FAIL_CLK_BELOW_MIN = 22, /*THIS IS MIN PER IP*/
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index f189307750ab..f285b76888fb 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -33,13 +33,11 @@
 #include "dc_bios_types.h"
 #include "mem_input.h"
 #include "hubp.h"
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 #include "mpc.h"
 #endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #include "dwb.h"
 #include "mcif_wb.h"
-#endif
 
 #define MAX_CLOCK_SOURCES 7
 
@@ -52,7 +50,9 @@ void enable_surface_flip_reporting(struct dc_plane_state *plane_state,
 #include "clock_source.h"
 #include "audio.h"
 #include "dm_pp_smu.h"
-
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+#include "dm_cp_psp.h"
+#endif
 
 /************ link *****************/
 struct link_init_data {
@@ -87,9 +87,7 @@ void core_link_set_avmute(struct pipe_ctx *pipe_ctx, bool enable);
 struct resource_pool;
 struct dc_state;
 struct resource_context;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 struct clk_bw_params;
-#endif
 
 struct resource_funcs {
 	void (*destroy)(struct resource_pool **pool);
@@ -103,7 +101,7 @@ struct resource_funcs {
 
 	int (*populate_dml_pipes)(
 		struct dc *dc,
-		struct resource_context *res_ctx,
+		struct dc_state *context,
 		display_e2e_pipe_params_st *pipes);
 
 	enum dc_status (*validate_global)(
@@ -133,7 +131,6 @@ struct resource_funcs {
 			struct resource_context *res_ctx,
 			const struct resource_pool *pool,
 			struct dc_stream_state *stream);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	void (*populate_dml_writeback_from_context)(
 			struct dc *dc,
 			struct resource_context *res_ctx,
@@ -144,12 +141,9 @@ struct resource_funcs {
 			struct dc_state *context,
 			display_e2e_pipe_params_st *pipes,
 			int pipe_cnt);
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	void (*update_bw_bounding_box)(
 			struct dc *dc,
 			struct clk_bw_params *bw_params);
-#endif
 
 };
 
@@ -178,7 +172,6 @@ struct resource_pool {
 	struct dce_i2c_sw *sw_i2cs[MAX_PIPES];
 	bool i2c_hw_buffer_in_use;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	struct dwbc *dwbc[MAX_DWB_PIPES];
 	struct mcif_wb *mcif_wb[MAX_DWB_PIPES];
 	struct {
@@ -186,11 +179,8 @@ struct resource_pool {
 		unsigned int gsl_1:1;
 		unsigned int gsl_2:1;
 	} gsl_groups;
-#endif
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	struct display_stream_compressor *dscs[MAX_PIPES];
-#endif
 
 	unsigned int pipe_count;
 	unsigned int underlay_pipe_index;
@@ -204,9 +194,7 @@ struct resource_pool {
 	unsigned int timing_generator_count;
 	unsigned int mpcc_count;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	unsigned int writeback_pipe_count;
-#endif
 	/*
 	 * reserved clock source for DP
 	 */
@@ -224,20 +212,22 @@ struct resource_pool {
 
 	struct abm *abm;
 	struct dmcu *dmcu;
+	struct dmub_psr *psr;
 
 	const struct resource_funcs *funcs;
 	const struct resource_caps *res_cap;
+
+	struct ddc_service *oem_device;
 };
 
 struct dcn_fe_bandwidth {
 	int dppclk_khz;
+
 };
 
 struct stream_resource {
 	struct output_pixel_processor *opp;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	struct display_stream_compressor *dsc;
-#endif
 	struct timing_generator *tg;
 	struct stream_encoder *stream_enc;
 	struct audio *audio;
@@ -246,12 +236,10 @@ struct stream_resource {
 	struct encoder_info_frame encoder_info_frame;
 
 	struct abm *abm;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	/* There are only (num_pipes+1)/2 groups. 0 means unassigned,
 	 * otherwise it's using group number 'gsl_group-1'
 	 */
 	uint8_t gsl_group;
-#endif
 };
 
 struct plane_resource {
@@ -303,17 +291,15 @@ struct pipe_ctx {
 	struct pipe_ctx *next_odm_pipe;
 	struct pipe_ctx *prev_odm_pipe;
 
-#ifdef CONFIG_DRM_AMD_DC_DCN1_0
+#ifdef CONFIG_DRM_AMD_DC_DCN
 	struct _vcs_dpi_display_dlg_regs_st dlg_regs;
 	struct _vcs_dpi_display_ttu_regs_st ttu_regs;
 	struct _vcs_dpi_display_rq_regs_st rq_regs;
 	struct _vcs_dpi_display_pipe_dest_params_st pipe_dlg_param;
 #endif
 	union pipe_update_flags update_flags;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	struct dwbc *dwbc;
 	struct mcif_wb *mcif_wb;
-#endif
 };
 
 struct resource_context {
@@ -322,9 +308,7 @@ struct resource_context {
 	bool is_audio_acquired[MAX_PIPES];
 	uint8_t clock_source_ref_count[MAX_CLOCK_SOURCES];
 	uint8_t dp_clock_source_ref_count;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	bool is_dsc_acquired[MAX_PIPES];
-#endif
 };
 
 struct dce_bw_output {
@@ -344,18 +328,14 @@ struct dce_bw_output {
 	int blackout_recovery_time_us;
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 struct dcn_bw_writeback {
 	struct mcif_arb_params mcif_wb_arb[MAX_DWB_PIPES];
 };
-#endif
 
 struct dcn_bw_output {
 	struct dc_clocks clk;
 	struct dcn_watermark_set watermarks;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	struct dcn_bw_writeback bw_writeback;
-#endif
 };
 
 union bw_output {
@@ -389,16 +369,12 @@ struct dc_state {
 
 	/* Note: these are big structures, do *not* put on stack! */
 	struct dm_pp_display_configuration pp_display_cfg;
-#ifdef CONFIG_DRM_AMD_DC_DCN1_0
+#ifdef CONFIG_DRM_AMD_DC_DCN
 	struct dcn_bw_internal_vars dcn_bw_vars;
 #endif
 
 	struct clk_mgr *clk_mgr;
 
-	struct {
-		bool full_update_needed : 1;
-	} commit_hints;
-
 	struct kref refcount;
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h
index b1fab251c09b..de2d160114db 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h
@@ -95,6 +95,9 @@ bool dal_ddc_service_query_ddc_data(
 		uint8_t *read_buf,
 		uint32_t read_size);
 
+bool dal_ddc_submit_aux_command(struct ddc_service *ddc,
+		struct aux_payload *payload);
+
 int dc_link_aux_transfer_raw(struct ddc_service *ddc,
 		struct aux_payload *payload,
 		enum aux_channel_operation_result *operation_result);
@@ -102,6 +105,9 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc,
 bool dc_link_aux_transfer_with_retries(struct ddc_service *ddc,
 		struct aux_payload *payload);
 
+uint32_t dc_link_aux_configure_timeout(struct ddc_service *ddc,
+		uint32_t timeout);
+
 void dal_ddc_service_write_scdc_data(
 		struct ddc_service *ddc_service,
 		uint32_t pix_clk,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
index 08a4df2c61a8..8b1f0ce6c2a7 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
@@ -28,6 +28,8 @@
 
 #define LINK_TRAINING_ATTEMPTS 4
 #define LINK_TRAINING_RETRY_DELAY 50 /* ms */
+#define LINK_AUX_DEFAULT_EXTENDED_TIMEOUT_PERIOD 3200 /*us*/
+#define LINK_AUX_DEFAULT_TIMEOUT_PERIOD 552 /*us*/
 
 struct dc_link;
 struct dc_stream_state;
@@ -43,6 +45,9 @@ bool dp_verify_link_cap_with_retries(
 	struct dc_link_settings *known_limit_link_setting,
 	int attempts);
 
+bool dp_verify_mst_link_cap(
+	struct dc_link *link);
+
 bool dp_validate_mode_timing(
 	struct dc_link *link,
 	const struct dc_crtc_timing *timing);
@@ -52,10 +57,11 @@ void decide_link_settings(
 	struct dc_link_settings *link_setting);
 
 bool perform_link_training_with_retries(
-	struct dc_link *link,
 	const struct dc_link_settings *link_setting,
 	bool skip_video_pattern,
-	int attempts);
+	int attempts,
+	struct pipe_ctx *pipe_ctx,
+	enum signal_type signal);
 
 bool is_mst_supported(struct dc_link *link);
 
@@ -70,13 +76,13 @@ void dp_enable_mst_on_sink(struct dc_link *link, bool enable);
 enum dp_panel_mode dp_get_panel_mode(struct dc_link *link);
 void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode);
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+bool dp_overwrite_extended_receiver_cap(struct dc_link *link);
+
 void dp_set_fec_ready(struct dc_link *link, bool ready);
 void dp_set_fec_enable(struct dc_link *link, bool enable);
 bool dp_set_dsc_enable(struct pipe_ctx *pipe_ctx, bool enable);
 bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable);
 void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable);
 bool dp_update_dsc_config(struct pipe_ctx *pipe_ctx);
-#endif
 
 #endif /* __DC_LINK_DP_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h
index 45a07eeffbb6..45a07eeffbb6 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/aux_engine.h b/drivers/gpu/drm/amd/display/dc/inc/hw/aux_engine.h
index e79cd4e92919..e77b3a76766d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/aux_engine.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/aux_engine.h
@@ -140,6 +140,9 @@ struct write_command_context {
 
 
 struct aux_engine_funcs {
+	bool (*configure_timeout)(
+		struct ddc_service *ddc,
+		uint32_t timeout);
 	void (*destroy)(
 		struct aux_engine **ptr);
 	bool (*acquire_engine)(
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index 76f9ad1b23df..ac530c057ddd 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -31,7 +31,6 @@
 #define DCN_MINIMUM_DISPCLK_Khz 100000
 #define DCN_MINIMUM_DPPCLK_Khz 100000
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_1
 /* Constants */
 #define DDR4_DRAM_WIDTH   64
 #define WM_A 0
@@ -39,15 +38,13 @@
 #define WM_C 2
 #define WM_D 3
 #define WM_SET_COUNT 4
-#endif
 
 #define DCN_MINIMUM_DISPCLK_Khz 100000
 #define DCN_MINIMUM_DPPCLK_Khz 100000
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_1
 /* Will these bw structures be ASIC specific? */
 
-#define MAX_NUM_DPM_LVL		4
+#define MAX_NUM_DPM_LVL		8
 #define WM_SET_COUNT 		4
 
 
@@ -69,6 +66,8 @@ struct wm_range_table_entry {
 	unsigned int wm_inst;
 	unsigned int wm_type;
 	double pstate_latency_us;
+	double sr_exit_time_us;
+	double sr_enter_plus_exit_time_us;
 	bool valid;
 };
 
@@ -152,7 +151,6 @@ struct clk_bw_params {
 	struct clk_limit_table clk_table;
 	struct wm_table wm_table;
 };
-#endif
 /* Public interfaces */
 
 struct clk_states {
@@ -180,16 +178,21 @@ struct clk_mgr_funcs {
 			struct dc_state *context,
 			enum dc_clock_type clock_type,
 			struct dc_clock_config *clock_cfg);
+
+	bool (*are_clock_states_equal) (struct dc_clocks *a,
+			struct dc_clocks *b);
+	void (*notify_wm_ranges)(struct clk_mgr *clk_mgr);
 };
 
 struct clk_mgr {
 	struct dc_context *ctx;
 	struct clk_mgr_funcs *funcs;
 	struct dc_clocks clks;
+	bool psr_allow_active_cache;
 	int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes
-#ifdef CONFIG_DRM_AMD_DC_DCN2_1
+	int dentist_vco_freq_khz;
+	struct clk_state_registers_and_bypass boot_snapshot;
 	struct clk_bw_params *bw_params;
-#endif
 };
 
 /* forward declarations */
@@ -199,4 +202,8 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
 
 void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr);
 
+void clk_mgr_exit_optimized_pwr_state(const struct dc *dc, struct clk_mgr *clk_mgr);
+
+void clk_mgr_optimize_pwr_state(const struct dc *dc, struct clk_mgr *clk_mgr);
+
 #endif /* __DAL_CLK_MGR_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
index 7dd46eb96d67..862952c0286a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
@@ -96,12 +96,10 @@ enum dentist_divider_range {
 	.MP1_SMN_C2PMSG_83 = mmMP1_SMN_C2PMSG_83, \
 	.MP1_SMN_C2PMSG_67 = mmMP1_SMN_C2PMSG_67
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 #define CLK_REG_LIST_NV10() \
 	SR(DENTIST_DISPCLK_CNTL), \
 	CLK_SRI(CLK3_CLK_PLL_REQ, CLK3, 0), \
 	CLK_SRI(CLK3_CLK2_DFS_CNTL, CLK3, 0)
-#endif
 
 #define CLK_SF(reg_name, field_name, post_fix)\
 	.field_name = reg_name ## __ ## field_name ## post_fix
@@ -120,7 +118,6 @@ enum dentist_divider_range {
 	CLK_SF(MP1_SMN_C2PMSG_83, CONTENT, mask_sh),\
 	CLK_SF(MP1_SMN_C2PMSG_91, CONTENT, mask_sh),
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 #define CLK_COMMON_MASK_SH_LIST_DCN20_BASE(mask_sh) \
 	CLK_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh),\
 	CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_WDIVIDER, mask_sh),\
@@ -130,7 +127,6 @@ enum dentist_divider_range {
 	CLK_COMMON_MASK_SH_LIST_DCN20_BASE(mask_sh),\
 	CLK_SF(CLK3_0_CLK3_CLK_PLL_REQ, FbMult_int, mask_sh),\
 	CLK_SF(CLK3_0_CLK3_CLK_PLL_REQ, FbMult_frac, mask_sh)
-#endif
 
 #define CLK_REG_FIELD_LIST(type) \
 	type DPREFCLK_SRC_SEL; \
@@ -143,30 +139,24 @@ enum dentist_divider_range {
  ****************** Clock Manager Private Structures ***********************************
  ***************************************************************************************
  */
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 #define CLK20_REG_FIELD_LIST(type) \
 	type DENTIST_DPPCLK_WDIVIDER; \
 	type DENTIST_DPPCLK_CHG_DONE; \
 	type FbMult_int; \
 	type FbMult_frac;
-#endif
 
 #define VBIOS_SMU_REG_FIELD_LIST(type) \
 	type CONTENT;
 
 struct clk_mgr_shift {
 	CLK_REG_FIELD_LIST(uint8_t)
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	CLK20_REG_FIELD_LIST(uint8_t)
-#endif
 	VBIOS_SMU_REG_FIELD_LIST(uint32_t)
 };
 
 struct clk_mgr_mask {
 	CLK_REG_FIELD_LIST(uint32_t)
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	CLK20_REG_FIELD_LIST(uint32_t)
-#endif
 	VBIOS_SMU_REG_FIELD_LIST(uint32_t)
 };
 
@@ -174,16 +164,29 @@ struct clk_mgr_registers {
 	uint32_t DPREFCLK_CNTL;
 	uint32_t DENTIST_DISPCLK_CNTL;
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	uint32_t CLK3_CLK2_DFS_CNTL;
 	uint32_t CLK3_CLK_PLL_REQ;
-#endif
 
 	uint32_t MP1_SMN_C2PMSG_67;
 	uint32_t MP1_SMN_C2PMSG_83;
 	uint32_t MP1_SMN_C2PMSG_91;
 };
 
+enum clock_type {
+	clock_type_dispclk = 1,
+	clock_type_dcfclk,
+	clock_type_socclk,
+	clock_type_pixelclk,
+	clock_type_phyclk,
+	clock_type_dppclk,
+	clock_type_fclk,
+	clock_type_dcfdsclk,
+	clock_type_dscclk,
+	clock_type_uclk,
+	clock_type_dramclk,
+};
+
+
 struct state_dependent_clocks {
 	int display_clk_khz;
 	int pixel_clk_khz;
@@ -210,8 +213,6 @@ struct clk_mgr_internal {
 	struct state_dependent_clocks max_clks_by_state[DM_PP_CLOCKS_MAX_STATES];
 
 	/*TODO: figure out which of the below fields should be here vs in asic specific portion */
-	int dentist_vco_freq_khz;
-
 	/* Cache the status of DFS-bypass feature*/
 	bool dfs_bypass_enabled;
 	/* True if the DFS-bypass feature is enabled and active. */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
index d8e744f366e5..05ee5295d2c1 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
@@ -38,8 +38,7 @@ struct dccg {
 struct dccg_funcs {
 	void (*update_dpp_dto)(struct dccg *dccg,
 			int dpp_inst,
-			int req_dppclk,
-			bool reduce_divider_only);
+			int req_dppclk);
 	void (*get_dccg_ref_freq)(struct dccg *dccg,
 			unsigned int xtalin_freq_inKhz,
 			unsigned int *dccg_ref_freq_inKhz);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
index a6297219d7fc..c0dc1d0f5cae 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
@@ -52,7 +52,6 @@ struct dcn_hubbub_wm {
 	struct dcn_hubbub_wm_set sets[4];
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 enum dcn_hubbub_page_table_depth {
 	DCN_PAGE_TABLE_DEPTH_1_LEVEL,
 	DCN_PAGE_TABLE_DEPTH_2_LEVEL,
@@ -101,13 +100,11 @@ struct hubbub_addr_config {
 	} default_addrs;
 };
 
-#endif
 struct hubbub_funcs {
 	void (*update_dchub)(
 			struct hubbub *hubbub,
 			struct dchub_init_data *dh_data);
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	int (*init_dchub_sys_ctx)(
 			struct hubbub *hubbub,
 			struct dcn_hubbub_phys_addr_config *pa_config);
@@ -116,7 +113,6 @@ struct hubbub_funcs {
 			struct dcn_hubbub_virt_addr_config *va_config,
 			int vmid);
 
-#endif
 	bool (*get_dcc_compression_cap)(struct hubbub *hubbub,
 			const struct dc_dcc_surface_param *input,
 			struct dc_surface_dcc_cap *output);
@@ -147,6 +143,7 @@ struct hubbub_funcs {
 	bool (*is_allow_self_refresh_enabled)(struct hubbub *hubbub);
 	void (*allow_self_refresh_control)(struct hubbub *hubbub, bool allow);
 
+	void (*apply_DEDCN21_147_wa)(struct hubbub *hubbub);
 };
 
 struct hubbub {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
index c68f0ce346c7..5315f1f86b21 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
@@ -52,6 +52,8 @@ struct dmcu {
 	enum dmcu_state dmcu_state;
 	struct dmcu_version dmcu_version;
 	unsigned int cached_wait_loop_number;
+	uint32_t psp_version;
+	bool auto_load_dmcu;
 };
 
 struct dmcu_funcs {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
index 474c7194a9f8..45ef390ae052 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
@@ -36,14 +36,10 @@ struct dpp {
 	struct dpp_caps *caps;
 	struct pwl_params regamma_params;
 	struct pwl_params degamma_params;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	struct dpp_cursor_attributes cur_attr;
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	struct pwl_params shaper_params;
 	bool cm_bypass_mode;
-#endif
 };
 
 struct dpp_input_csc_matrix {
@@ -51,12 +47,31 @@ struct dpp_input_csc_matrix {
 	uint16_t regval[12];
 };
 
+static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = {
+	{COLOR_SPACE_SRGB,
+		{0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} },
+	{COLOR_SPACE_SRGB_LIMITED,
+		{0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} },
+	{COLOR_SPACE_YCBCR601,
+		{0x2cdd, 0x2000, 0, 0xe991, 0xe926, 0x2000, 0xf4fd, 0x10ef,
+						0, 0x2000, 0x38b4, 0xe3a6} },
+	{COLOR_SPACE_YCBCR601_LIMITED,
+		{0x3353, 0x2568, 0, 0xe400, 0xe5dc, 0x2568, 0xf367, 0x1108,
+						0, 0x2568, 0x40de, 0xdd3a} },
+	{COLOR_SPACE_YCBCR709,
+		{0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0,
+						0x2000, 0x3b61, 0xe24f} },
+
+	{COLOR_SPACE_YCBCR709_LIMITED,
+		{0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0,
+						0x2568, 0x43ee, 0xdbb2} }
+};
+
 struct dpp_grph_csc_adjustment {
 	struct fixed31_32 temperature_matrix[CSC_TEMPERATURE_MATRIX_SIZE];
 	enum graphics_gamut_adjust_type gamut_adjust_type;
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 struct cnv_color_keyer_params {
 	int color_keyer_en;
 	int color_keyer_mode;
@@ -82,7 +97,6 @@ struct cnv_alpha_2bit_lut {
 	int lut2;
 	int lut3;
 };
-#endif
 
 struct dcn_dpp_state {
 	uint32_t is_enabled;
@@ -190,12 +204,8 @@ struct dpp_funcs {
 			enum surface_pixel_format format,
 			enum expansion_mode mode,
 			struct dc_csc_transform input_csc_color_matrix,
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 			enum dc_color_space input_color_space,
 			struct cnv_alpha_2bit_lut *alpha_2bit_lut);
-#else
-			enum dc_color_space input_color_space);
-#endif
 
 	void (*dpp_full_bypass)(struct dpp *dpp_base);
 
@@ -224,7 +234,6 @@ struct dpp_funcs {
 			bool dppclk_div,
 			bool enable);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	bool (*dpp_program_blnd_lut)(
 			struct dpp *dpp,
 			const struct pwl_params *params);
@@ -237,7 +246,6 @@ struct dpp_funcs {
 	void (*dpp_cnv_set_alpha_keyer)(
 			struct dpp *dpp_base,
 			struct cnv_color_keyer_params *color_keyer);
-#endif
 };
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h
index 1ddb1c6fa149..c59740084ebc 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h
@@ -22,13 +22,16 @@
  * Authors: AMD
  *
  */
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #ifndef __DAL_DSC_H__
 #define __DAL_DSC_H__
 
 #include "dc_dsc.h"
 #include "dc_hw_types.h"
-#include "dc_dp_types.h"
+#include "dc_types.h"
+/* do not include any other headers
+ * or else it might break Edid Utility functionality.
+ */
+
 
 /* Input parameters for configuring DSC from the outside of DSC */
 struct dsc_config {
@@ -81,12 +84,6 @@ struct dsc_enc_caps {
 	uint32_t bpp_increment_div; /* bpp increment divisor, e.g. if 16, it's 1/16th of a bit */
 };
 
-struct display_stream_compressor {
-	const struct dsc_funcs *funcs;
-	struct dc_context *ctx;
-	int inst;
-};
-
 struct dsc_funcs {
 	void (*dsc_get_enc_caps)(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz);
 	void (*dsc_read_state)(struct display_stream_compressor *dsc, struct dcn_dsc_state *s);
@@ -100,4 +97,3 @@ struct dsc_funcs {
 };
 
 #endif
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
index ff1a07b35c85..459f95f52486 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
@@ -51,20 +51,15 @@ enum dwb_source {
 	dwb_src_otg3,		/* for DCN1.x/DCN2.x */
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 /* DCN1.x, DCN2.x support 2 pipes */
-#else
-/* DCN1.x supports 2 pipes */
-#endif
 enum dwb_pipe {
 	dwb_pipe0 = 0,
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	dwb_pipe1,
 #endif
 	dwb_pipe_max_num,
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 enum dwb_frame_capture_enable {
 	DWB_FRAME_CAPTURE_DISABLE = 0,
 	DWB_FRAME_CAPTURE_ENABLE = 1,
@@ -77,9 +72,7 @@ enum wbscl_coef_filter_type_sel {
 	WBSCL_COEF_CHROMA_HORZ_FILTER = 3
 };
 
-#endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 struct dwb_warmup_params {
 	bool	warmup_en;	/* false: normal mode, true: enable pattern generator */
 	bool	warmup_mode;	/* false: 420, true: 444 */
@@ -88,7 +81,6 @@ struct dwb_warmup_params {
 	int	warmup_width;	/* Pattern width (pixels) */
 	int	warmup_height;	/* Pattern height (lines) */
 };
-#endif
 
 struct dwb_caps {
 	enum dce_version hw_version;	/* DCN engine version. */
@@ -121,7 +113,8 @@ struct dwbc {
 	int wb_src_plane_inst;/*hubp, mpcc, inst*/
 	bool update_privacymask;
 	uint32_t mask_id;
-
+        int otg_inst;
+        bool mvc_cfg;
 };
 
 struct dwbc_funcs {
@@ -150,13 +143,11 @@ struct dwbc_funcs {
 		struct dwbc *dwbc,
 		bool is_new_content);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 
 	void (*set_warmup)(
 		struct dwbc *dwbc,
 		struct dwb_warmup_params *warmup_params);
 
-#endif
 
 	bool (*get_dwb_status)(
 		struct dwbc *dwbc);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
index 809b62b51a43..2cb8466e657b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
@@ -38,9 +38,7 @@ enum cursor_pitch {
 };
 
 enum cursor_lines_per_chunk {
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	CURSOR_LINE_PER_CHUNK_1 = 0, /* new for DCN2 */
-#endif
 	CURSOR_LINE_PER_CHUNK_2 = 1,
 	CURSOR_LINE_PER_CHUNK_4,
 	CURSOR_LINE_PER_CHUNK_8,
@@ -65,6 +63,26 @@ struct hubp {
 	bool power_gated;
 };
 
+struct surface_flip_registers {
+	uint32_t DCSURF_SURFACE_CONTROL;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C;
+	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS;
+	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS;
+	bool tmz_surface;
+	bool immediate;
+	uint8_t vmid;
+	bool grph_stereo;
+};
+
 struct hubp_funcs {
 	void (*hubp_setup)(
 			struct hubp *hubp,
@@ -86,6 +104,9 @@ struct hubp_funcs {
 			const struct rect *viewport,
 			const struct rect *viewport_c);
 
+	void (*apply_PLAT_54186_wa)(struct hubp *hubp,
+			const struct dc_plane_address *address);
+
 	bool (*hubp_program_surface_flip_and_addr)(
 		struct hubp *hubp,
 		const struct dc_plane_address *address,
@@ -139,7 +160,6 @@ struct hubp_funcs {
 	unsigned int (*hubp_get_underflow_status)(struct hubp *hubp);
 	void (*hubp_init)(struct hubp *hubp);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	void (*dmdata_set_attributes)(
 			struct hubp *hubp,
 			const struct dc_dmdata_attributes *attr);
@@ -159,7 +179,13 @@ struct hubp_funcs {
 	void (*hubp_set_flip_control_surface_gsl)(
 		struct hubp *hubp,
 		bool enable);
-#endif
+
+	void (*validate_dml_output)(
+			struct hubp *hubp,
+			struct dc_context *ctx,
+			struct _vcs_dpi_display_rq_regs_st *dml_rq_regs,
+			struct _vcs_dpi_display_dlg_regs_st *dml_dlg_attr,
+			struct _vcs_dpi_display_ttu_regs_st *dml_ttu_attr);
 
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
index f82365e2d03c..75d419081e76 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
@@ -36,9 +36,7 @@
 
 #define MAX_AUDIOS 7
 #define MAX_PIPES 6
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define MAX_DWB_PIPES	1
-#endif
 
 struct gamma_curve {
 	uint32_t offset;
@@ -81,7 +79,6 @@ struct pwl_result_data {
 	uint32_t delta_blue_reg;
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 struct dc_rgb {
 	uint32_t red;
 	uint32_t green;
@@ -110,7 +107,6 @@ struct tetrahedral_params {
 	bool use_12bits;
 
 };
-#endif
 
 /* arr_curve_points - regamma regions/segments specification
  * arr_points - beginning and end point specified separately (only one on DCE)
@@ -195,13 +191,11 @@ enum opp_regamma {
 	OPP_REGAMMA_USER
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 enum optc_dsc_mode {
 	OPTC_DSC_DISABLED = 0,
 	OPTC_DSC_ENABLED_444 = 1, /* 'RGB 444' or 'Simple YCbCr 4:2:2' (4:2:2 upsampled to 4:4:4) */
 	OPTC_DSC_ENABLED_NATIVE_SUBSAMPLED = 2 /* Native 4:2:2 or 4:2:0 */
 };
-#endif
 
 struct dc_bias_and_scale {
 	uint16_t scale_red;
@@ -224,12 +218,8 @@ enum test_pattern_mode {
 	TEST_PATTERN_MODE_VERTICALBARS,
 	TEST_PATTERN_MODE_HORIZONTALBARS,
 	TEST_PATTERN_MODE_SINGLERAMP_RGB,
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	TEST_PATTERN_MODE_DUALRAMP_RGB,
 	TEST_PATTERN_MODE_XR_BIAS_RGB
-#else
-	TEST_PATTERN_MODE_DUALRAMP_RGB
-#endif
 };
 
 enum test_pattern_color_format {
@@ -255,6 +245,13 @@ enum controller_dp_test_pattern {
 	CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR
 };
 
+enum controller_dp_color_space {
+	CONTROLLER_DP_COLOR_SPACE_RGB,
+	CONTROLLER_DP_COLOR_SPACE_YCBCR601,
+	CONTROLLER_DP_COLOR_SPACE_YCBCR709,
+	CONTROLLER_DP_COLOR_SPACE_UDEFINED
+};
+
 enum dc_lut_mode {
 	LUT_BYPASS,
 	LUT_RAM_A,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
index abb4e4237fb6..fb748f082c56 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
@@ -113,26 +113,21 @@ struct link_encoder {
 	struct encoder_feature_support features;
 	enum transmitter transmitter;
 	enum hpd_source_id hpd_source;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	bool usbc_combo_phy;
-#endif
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 struct link_enc_state {
 
 		uint32_t dphy_fec_en;
 		uint32_t dphy_fec_ready_shadow;
 		uint32_t dphy_fec_active_status;
+		uint32_t dp_link_training_complete;
 
 };
-#endif
 
 struct link_encoder_funcs {
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	void (*read_state)(
 			struct link_encoder *enc, struct link_enc_state *s);
-#endif
 	bool (*validate_output_with_stream)(
 		struct link_encoder *enc, const struct dc_stream_state *stream);
 	void (*hw_init)(struct link_encoder *enc);
@@ -174,7 +169,6 @@ struct link_encoder_funcs {
 	unsigned int (*get_dig_frontend)(struct link_encoder *enc);
 	void (*destroy)(struct link_encoder **enc);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	void (*fec_set_enable)(struct link_encoder *enc,
 		bool enable);
 
@@ -182,8 +176,11 @@ struct link_encoder_funcs {
 		bool ready);
 
 	bool (*fec_is_active)(struct link_encoder *enc);
-#endif
 	bool (*is_in_alt_mode) (struct link_encoder *enc);
+
+	void (*get_max_link_cap)(struct link_encoder *enc,
+		struct dc_link_settings *link_settings);
+
 	enum signal_type (*get_dig_mode)(
 		struct link_encoder *enc);
 };
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
index e8668388581b..2e2310f1901a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
@@ -40,10 +40,9 @@ struct cstate_pstate_watermarks_st {
 struct dcn_watermarks {
 	uint32_t pte_meta_urgent_ns;
 	uint32_t urgent_ns;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	uint32_t frac_urg_bw_nom;
 	uint32_t frac_urg_bw_flip;
-#endif
+	int32_t urgent_latency_ns;
 	struct cstate_pstate_watermarks_st cstate_pstate;
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
index 58826be81395..094afc4c8173 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
@@ -31,9 +31,7 @@
 #define MAX_MPCC 6
 #define MAX_OPP 6
 
-#if   defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define MAX_DWB		1
-#endif
 
 enum mpc_output_csc_mode {
 	MPC_OUTPUT_CSC_DISABLE = 0,
@@ -66,14 +64,12 @@ struct mpcc_blnd_cfg {
 	int global_alpha;
 	bool overlap_only;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	/* MPCC top/bottom gain settings */
 	int bottom_gain_mode;
 	int background_color_bpc;
 	int top_gain;
 	int bottom_inside_gain;
 	int bottom_outside_gain;
-#endif
 };
 
 struct mpcc_sm_cfg {
@@ -90,7 +86,6 @@ struct mpcc_sm_cfg {
 	int force_next_field_polarity;
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 struct mpc_denorm_clamp {
 	int clamp_max_r_cr;
 	int clamp_min_r_cr;
@@ -99,7 +94,6 @@ struct mpc_denorm_clamp {
 	int clamp_max_b_cb;
 	int clamp_min_b_cb;
 };
-#endif
 
 /*
  * MPCC connection and blending configuration for a single MPCC instance.
@@ -126,10 +120,8 @@ struct mpc {
 	struct dc_context *ctx;
 
 	struct mpcc mpcc_array[MAX_MPCC];
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	struct pwl_params blender_params;
 	bool cm_bypass_mode;
-#endif
 };
 
 struct mpcc_state {
@@ -230,7 +222,6 @@ struct mpc_funcs {
 		struct mpc *mpc,
 		struct mpc_tree *tree);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	void (*set_denorm)(struct mpc *mpc,
 			int opp_id,
 			enum dc_color_depth output_depth);
@@ -258,7 +249,6 @@ struct mpc_funcs {
 			struct mpc *mpc,
 			int mpcc_id,
 			bool power_on);
-#endif
 
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
index 957e9047381a..7575564b2265 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
@@ -208,6 +208,7 @@ struct output_pixel_processor {
 	struct mpc_tree mpc_tree_params;
 	bool mpcc_disconnect_pending[MAX_PIPES];
 	const struct opp_funcs *funcs;
+	uint32_t dyn_expansion;
 };
 
 enum fmt_stereo_action {
@@ -262,9 +263,7 @@ struct oppbuf_params {
 	enum oppbuf_display_segmentation mso_segmentation;
 	uint32_t mso_overlap_pixel_num;
 	uint32_t pixel_repetition;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	uint32_t num_segment_padded_pixels;
-#endif
 };
 
 struct opp_funcs {
@@ -304,10 +303,10 @@ struct opp_funcs {
 			struct output_pixel_processor *opp,
 			bool enable);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	void (*opp_set_disp_pattern_generator)(
 			struct output_pixel_processor *opp,
 			enum controller_dp_test_pattern test_pattern,
+			enum controller_dp_color_space color_space,
 			enum dc_color_depth color_depth,
 			const struct tg_color *solid_color,
 			int width,
@@ -323,7 +322,6 @@ struct opp_funcs {
 	void (*opp_program_left_edge_extra_pixel)(
 			struct output_pixel_processor *opp,
 			bool count);
-#endif
 
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
index fe9b7a10a1c3..351b387ad606 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
@@ -65,13 +65,11 @@ struct audio_clock_info {
 	uint32_t cts_48khz;
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 enum dynamic_metadata_mode {
 	dmdata_dp,
 	dmdata_hdmi,
 	dmdata_dolby_vision
 };
-#endif
 
 struct encoder_info_frame {
 	/* auxiliary video information */
@@ -90,9 +88,7 @@ struct encoder_info_frame {
 struct encoder_unblank_param {
 	struct dc_link_settings link_settings;
 	struct dc_crtc_timing timing;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	int opp_cnt;
-#endif
 };
 
 struct encoder_set_dp_phy_pattern_param {
@@ -109,7 +105,6 @@ struct stream_encoder {
 	enum engine_id id;
 };
 
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 struct enc_state {
 	uint32_t dsc_mode;  // DISABLED  0; 1 or 2 indicate enabled state.
 	uint32_t dsc_slice_width;
@@ -119,13 +114,13 @@ struct enc_state {
 	uint32_t sec_gsp_pps_enable;
 	uint32_t sec_stream_enable;
 };
-#endif
 
 struct stream_encoder_funcs {
 	void (*dp_set_stream_attribute)(
 		struct stream_encoder *enc,
 		struct dc_crtc_timing *crtc_timing,
 		enum dc_color_space output_color_space,
+		bool use_vsc_sdp_for_colorimetry,
 		uint32_t enable_sdp_splitting);
 
 	void (*hdmi_set_stream_attribute)(
@@ -214,8 +209,11 @@ struct stream_encoder_funcs {
 	unsigned int (*dig_source_otg)(
 		struct stream_encoder *enc);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+	bool (*dp_get_pixel_format)(
+		struct stream_encoder *enc,
+		enum dc_pixel_encoding *encoding,
+		enum dc_color_depth *depth);
+
 	void (*enc_read_state)(struct stream_encoder *enc, struct enc_state *s);
 
 	void (*dp_set_dsc_config)(
@@ -227,7 +225,6 @@ struct stream_encoder_funcs {
 	void (*dp_set_dsc_pps_info_packet)(struct stream_encoder *enc,
 				bool enable,
 				uint8_t *dsc_packed_pps);
-#endif
 
 	void (*set_dynamic_metadata)(struct stream_encoder *enc,
 			bool enable,
@@ -237,7 +234,6 @@ struct stream_encoder_funcs {
 	void (*dp_set_odm_combine)(
 		struct stream_encoder *enc,
 		bool odm_combine);
-#endif
 };
 
 #endif /* STREAM_ENCODER_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index 6196cc32356e..e5e7d94026fc 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -195,10 +195,8 @@ struct timing_generator_funcs {
 	void (*lock)(struct timing_generator *tg);
 	void (*lock_doublebuffer_disable)(struct timing_generator *tg);
 	void (*lock_doublebuffer_enable)(struct timing_generator *tg);
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	void(*triplebuffer_unlock)(struct timing_generator *tg);
 	void(*triplebuffer_lock)(struct timing_generator *tg);
-#endif
 	void (*enable_reset_trigger)(struct timing_generator *tg,
 				     int source_tg_inst);
 	void (*enable_crtc_reset)(struct timing_generator *tg,
@@ -210,7 +208,8 @@ struct timing_generator_funcs {
 					bool enable, const struct dc_crtc_timing *timing);
 	void (*set_drr)(struct timing_generator *tg, const struct drr_params *params);
 	void (*set_static_screen_control)(struct timing_generator *tg,
-							uint32_t value);
+						uint32_t event_triggers,
+						uint32_t num_frames);
 	void (*set_test_pattern)(
 		struct timing_generator *tg,
 		enum controller_dp_test_pattern test_pattern,
@@ -235,7 +234,6 @@ struct timing_generator_funcs {
 	bool (*is_optc_underflow_occurred)(struct timing_generator *tg);
 	void (*clear_optc_underflow)(struct timing_generator *tg);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	void (*set_dwb_source)(struct timing_generator *optc,
 		uint32_t dwb_pipe_inst);
 
@@ -243,7 +241,6 @@ struct timing_generator_funcs {
 			uint32_t *num_of_input_segments,
 			uint32_t *seg0_src_sel,
 			uint32_t *seg1_src_sel);
-#endif
 
 	/**
 	 * Configure CRCs for the given timing generator. Return false if TG is
@@ -261,17 +258,16 @@ struct timing_generator_funcs {
 
 	void (*program_manual_trigger)(struct timing_generator *optc);
 	void (*setup_manual_trigger)(struct timing_generator *optc);
+	bool (*get_hw_timing)(struct timing_generator *optc,
+			struct dc_crtc_timing *hw_crtc_timing);
 
 	void (*set_vtg_params)(struct timing_generator *optc,
 			const struct dc_crtc_timing *dc_crtc_timing);
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	void (*set_dsc_config)(struct timing_generator *optc,
 			       enum optc_dsc_mode dsc_mode,
 			       uint32_t dsc_bytes_per_pixel,
 			       uint32_t dsc_slice_width);
-#endif
 	void (*set_odm_bypass)(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing);
 	void (*set_odm_combine)(struct timing_generator *optc, int *opp_id, int opp_cnt,
 			struct dc_crtc_timing *timing);
@@ -279,7 +275,6 @@ struct timing_generator_funcs {
 	void (*set_gsl_source_select)(struct timing_generator *optc,
 			int group_idx,
 			uint32_t gsl_ready_signal);
-#endif
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index 3a938cd414ea..209118f9f193 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -32,310 +32,159 @@
 #include "inc/hw/link_encoder.h"
 #include "core_status.h"
 
-enum pipe_gating_control {
-	PIPE_GATING_CONTROL_DISABLE = 0,
-	PIPE_GATING_CONTROL_ENABLE,
-	PIPE_GATING_CONTROL_INIT
-};
-
 enum vline_select {
 	VLINE0,
 	VLINE1
 };
 
-struct dce_hwseq_wa {
-	bool blnd_crtc_trigger;
-	bool DEGVIDCN10_253;
-	bool false_optc_underflow;
-	bool DEGVIDCN10_254;
-	bool DEGVIDCN21;
-};
-
-struct hwseq_wa_state {
-	bool DEGVIDCN10_253_applied;
-};
-
-struct dce_hwseq {
-	struct dc_context *ctx;
-	const struct dce_hwseq_registers *regs;
-	const struct dce_hwseq_shift *shifts;
-	const struct dce_hwseq_mask *masks;
-	struct dce_hwseq_wa wa;
-	struct hwseq_wa_state wa_state;
-};
-
 struct pipe_ctx;
 struct dc_state;
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 struct dc_stream_status;
 struct dc_writeback_info;
-#endif
 struct dchub_init_data;
-struct dc_static_screen_events;
+struct dc_static_screen_params;
 struct resource_pool;
-struct resource_context;
-struct stream_resource;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 struct dc_phy_addr_space_config;
 struct dc_virtual_addr_space_config;
-#endif
-struct hubp;
 struct dpp;
+struct dce_hwseq;
 
 struct hw_sequencer_funcs {
+	/* Embedded Display Related */
+	void (*edp_power_control)(struct dc_link *link, bool enable);
+	void (*edp_wait_for_hpd_ready)(struct dc_link *link, bool power_up);
 
-	void (*disable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx);
-
-	void (*enable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx);
-
+	/* Pipe Programming Related */
 	void (*init_hw)(struct dc *dc);
-
-	void (*init_pipes)(struct dc *dc, struct dc_state *context);
-
-	enum dc_status (*apply_ctx_to_hw)(
-			struct dc *dc, struct dc_state *context);
-
-	void (*reset_hw_ctx_wrap)(
-			struct dc *dc, struct dc_state *context);
-
-	void (*apply_ctx_for_surface)(
-			struct dc *dc,
+	void (*enable_accelerated_mode)(struct dc *dc,
+			struct dc_state *context);
+	enum dc_status (*apply_ctx_to_hw)(struct dc *dc,
+			struct dc_state *context);
+	void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+	void (*apply_ctx_for_surface)(struct dc *dc,
 			const struct dc_stream_state *stream,
-			int num_planes,
+			int num_planes, struct dc_state *context);
+	void (*program_front_end_for_ctx)(struct dc *dc,
 			struct dc_state *context);
-
-	void (*program_gamut_remap)(
+	void (*update_plane_addr)(const struct dc *dc,
 			struct pipe_ctx *pipe_ctx);
-
-	void (*program_output_csc)(struct dc *dc,
-			struct pipe_ctx *pipe_ctx,
-			enum dc_color_space colorspace,
-			uint16_t *matrix,
-			int opp_id);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
-	void (*program_triplebuffer)(
-		const struct dc *dc,
-		struct pipe_ctx *pipe_ctx,
-		bool enableTripleBuffer);
-	void (*set_flip_control_gsl)(
-		struct pipe_ctx *pipe_ctx,
-		bool flip_immediate);
-#endif
-
-	void (*update_plane_addr)(
-		const struct dc *dc,
-		struct pipe_ctx *pipe_ctx);
-
-	void (*plane_atomic_disconnect)(
-		struct dc *dc,
-		struct pipe_ctx *pipe_ctx);
-
-	void (*update_dchub)(
-		struct dce_hwseq *hws,
-		struct dchub_init_data *dh_data);
-
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
-	int (*init_sys_ctx)(
-			struct dce_hwseq *hws,
-			struct dc *dc,
-			struct dc_phy_addr_space_config *pa_config);
-	void (*init_vm_ctx)(
-			struct dce_hwseq *hws,
-			struct dc *dc,
-			struct dc_virtual_addr_space_config *va_config,
-			int vmid);
-#endif
-	void (*update_mpcc)(
-		struct dc *dc,
-		struct pipe_ctx *pipe_ctx);
-
-	void (*update_pending_status)(
+	void (*update_dchub)(struct dce_hwseq *hws,
+			struct dchub_init_data *dh_data);
+	void (*wait_for_mpcc_disconnect)(struct dc *dc,
+			struct resource_pool *res_pool,
 			struct pipe_ctx *pipe_ctx);
-
-	bool (*set_input_transfer_func)(
-				struct pipe_ctx *pipe_ctx,
-				const struct dc_plane_state *plane_state);
-
-	bool (*set_output_transfer_func)(
-				struct pipe_ctx *pipe_ctx,
-				const struct dc_stream_state *stream);
-
-	void (*power_down)(struct dc *dc);
-
-	void (*enable_accelerated_mode)(struct dc *dc, struct dc_state *context);
-
-	void (*enable_timing_synchronization)(
-			struct dc *dc,
-			int group_index,
-			int group_size,
-			struct pipe_ctx *grouped_pipes[]);
-
-	void (*enable_per_frame_crtc_position_reset)(
-			struct dc *dc,
-			int group_size,
+	void (*program_triplebuffer)(const struct dc *dc,
+		struct pipe_ctx *pipe_ctx, bool enableTripleBuffer);
+	void (*update_pending_status)(struct pipe_ctx *pipe_ctx);
+
+	/* Pipe Lock Related */
+	void (*pipe_control_lock_global)(struct dc *dc,
+			struct pipe_ctx *pipe, bool lock);
+	void (*pipe_control_lock)(struct dc *dc,
+			struct pipe_ctx *pipe, bool lock);
+	void (*set_flip_control_gsl)(struct pipe_ctx *pipe_ctx,
+			bool flip_immediate);
+
+	/* Timing Related */
+	void (*get_position)(struct pipe_ctx **pipe_ctx, int num_pipes,
+			struct crtc_position *position);
+	int (*get_vupdate_offset_from_vsync)(struct pipe_ctx *pipe_ctx);
+	void (*enable_per_frame_crtc_position_reset)(struct dc *dc,
+			int group_size, struct pipe_ctx *grouped_pipes[]);
+	void (*enable_timing_synchronization)(struct dc *dc,
+			int group_index, int group_size,
 			struct pipe_ctx *grouped_pipes[]);
+	void (*setup_periodic_interrupt)(struct dc *dc,
+			struct pipe_ctx *pipe_ctx,
+			enum vline_select vline);
+	void (*set_drr)(struct pipe_ctx **pipe_ctx, int num_pipes,
+			unsigned int vmin, unsigned int vmax,
+			unsigned int vmid, unsigned int vmid_frame_number);
+	void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx,
+			int num_pipes,
+			const struct dc_static_screen_params *events);
 
-	void (*enable_display_pipe_clock_gating)(
-					struct dc_context *ctx,
-					bool clock_gating);
-
-	bool (*enable_display_power_gating)(
-					struct dc *dc,
-					uint8_t controller_id,
-					struct dc_bios *dcb,
-					enum pipe_gating_control power_gating);
-
-	void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx);
-
-	void (*update_info_frame)(struct pipe_ctx *pipe_ctx);
-
-	void (*send_immediate_sdp_message)(
-				struct pipe_ctx *pipe_ctx,
-				const uint8_t *custom_sdp_message,
-				unsigned int sdp_message_size);
-
+	/* Stream Related */
 	void (*enable_stream)(struct pipe_ctx *pipe_ctx);
-
 	void (*disable_stream)(struct pipe_ctx *pipe_ctx);
-
+	void (*blank_stream)(struct pipe_ctx *pipe_ctx);
 	void (*unblank_stream)(struct pipe_ctx *pipe_ctx,
 			struct dc_link_settings *link_settings);
 
-	void (*blank_stream)(struct pipe_ctx *pipe_ctx);
-
-	void (*enable_audio_stream)(struct pipe_ctx *pipe_ctx);
-
-	void (*disable_audio_stream)(struct pipe_ctx *pipe_ctx);
-
-	void (*pipe_control_lock)(
-				struct dc *dc,
-				struct pipe_ctx *pipe,
-				bool lock);
+	/* Bandwidth Related */
+	void (*prepare_bandwidth)(struct dc *dc, struct dc_state *context);
+	bool (*update_bandwidth)(struct dc *dc, struct dc_state *context);
+	void (*optimize_bandwidth)(struct dc *dc, struct dc_state *context);
 
-	void (*pipe_control_lock_global)(
-				struct dc *dc,
-				struct pipe_ctx *pipe,
-				bool lock);
-	void (*blank_pixel_data)(
-			struct dc *dc,
+	/* Infopacket Related */
+	void (*set_avmute)(struct pipe_ctx *pipe_ctx, bool enable);
+	void (*send_immediate_sdp_message)(
 			struct pipe_ctx *pipe_ctx,
-			bool blank);
-
-	void (*prepare_bandwidth)(
-			struct dc *dc,
-			struct dc_state *context);
-	void (*optimize_bandwidth)(
-			struct dc *dc,
-			struct dc_state *context);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
-	bool (*update_bandwidth)(
-			struct dc *dc,
-			struct dc_state *context);
+			const uint8_t *custom_sdp_message,
+			unsigned int sdp_message_size);
+	void (*update_info_frame)(struct pipe_ctx *pipe_ctx);
+	void (*set_dmdata_attributes)(struct pipe_ctx *pipe);
 	void (*program_dmdata_engine)(struct pipe_ctx *pipe_ctx);
 	bool (*dmdata_status_done)(struct pipe_ctx *pipe_ctx);
-#endif
-
-	void (*set_drr)(struct pipe_ctx **pipe_ctx, int num_pipes,
-			unsigned int vmin, unsigned int vmax,
-			unsigned int vmid, unsigned int vmid_frame_number);
-
-	void (*get_position)(struct pipe_ctx **pipe_ctx, int num_pipes,
-			struct crtc_position *position);
-
-	void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx,
-			int num_pipes, const struct dc_static_screen_events *events);
-
-	enum dc_status (*enable_stream_timing)(
-			struct pipe_ctx *pipe_ctx,
-			struct dc_state *context,
-			struct dc *dc);
-
-	void (*setup_stereo)(
-			struct pipe_ctx *pipe_ctx,
-			struct dc *dc);
-
-	void (*set_avmute)(struct pipe_ctx *pipe_ctx, bool enable);
-
-	void (*log_hw_state)(struct dc *dc,
-		struct dc_log_buffer_ctx *log_ctx);
-	void (*get_hw_state)(struct dc *dc, char *pBuf, unsigned int bufSize, unsigned int mask);
-	void (*clear_status_bits)(struct dc *dc, unsigned int mask);
-
-	void (*wait_for_mpcc_disconnect)(struct dc *dc,
-			struct resource_pool *res_pool,
-			struct pipe_ctx *pipe_ctx);
-
-	void (*edp_power_control)(
-			struct dc_link *link,
-			bool enable);
-	void (*edp_backlight_control)(
-			struct dc_link *link,
-			bool enable);
-	void (*edp_wait_for_hpd_ready)(struct dc_link *link, bool power_up);
 
+	/* Cursor Related */
 	void (*set_cursor_position)(struct pipe_ctx *pipe);
 	void (*set_cursor_attribute)(struct pipe_ctx *pipe);
 	void (*set_cursor_sdr_white_level)(struct pipe_ctx *pipe);
 
-	void (*setup_periodic_interrupt)(struct pipe_ctx *pipe_ctx, enum vline_select vline);
-	void (*setup_vupdate_interrupt)(struct pipe_ctx *pipe_ctx);
-	bool (*did_underflow_occur)(struct dc *dc, struct pipe_ctx *pipe_ctx);
-
-	void (*init_blank)(struct dc *dc, struct timing_generator *tg);
-	void (*disable_vga)(struct dce_hwseq *hws);
-	void (*bios_golden_init)(struct dc *dc);
-	void (*plane_atomic_power_down)(struct dc *dc,
-			struct dpp *dpp,
-			struct hubp *hubp);
-
-	void (*plane_atomic_disable)(
-			struct dc *dc, struct pipe_ctx *pipe_ctx);
-
-	void (*enable_power_gating_plane)(
-		struct dce_hwseq *hws,
-		bool enable);
-
-	void (*dpp_pg_control)(
-			struct dce_hwseq *hws,
-			unsigned int dpp_inst,
-			bool power_on);
-
-	void (*hubp_pg_control)(
-			struct dce_hwseq *hws,
-			unsigned int hubp_inst,
-			bool power_on);
-
-	void (*dsc_pg_control)(
-			struct dce_hwseq *hws,
-			unsigned int dsc_inst,
-			bool power_on);
-
+	/* Colour Related */
+	void (*program_gamut_remap)(struct pipe_ctx *pipe_ctx);
+	void (*program_output_csc)(struct dc *dc, struct pipe_ctx *pipe_ctx,
+			enum dc_color_space colorspace,
+			uint16_t *matrix, int opp_id);
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
-	void (*update_odm)(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx);
-	void (*program_all_writeback_pipes_in_tree)(
+	/* VM Related */
+	int (*init_sys_ctx)(struct dce_hwseq *hws,
 			struct dc *dc,
-			const struct dc_stream_state *stream,
-			struct dc_state *context);
+			struct dc_phy_addr_space_config *pa_config);
+	void (*init_vm_ctx)(struct dce_hwseq *hws,
+			struct dc *dc,
+			struct dc_virtual_addr_space_config *va_config,
+			int vmid);
+
+	/* Writeback Related */
 	void (*update_writeback)(struct dc *dc,
-			const struct dc_stream_status *stream_status,
-			struct dc_writeback_info *wb_info);
+			struct dc_writeback_info *wb_info,
+			struct dc_state *context);
 	void (*enable_writeback)(struct dc *dc,
-			const struct dc_stream_status *stream_status,
-			struct dc_writeback_info *wb_info);
+			struct dc_writeback_info *wb_info,
+			struct dc_state *context);
 	void (*disable_writeback)(struct dc *dc,
 			unsigned int dwb_pipe_inst);
-#endif
-	enum dc_status (*set_clock)(struct dc *dc,
-			enum dc_clock_type clock_type,
-			uint32_t clk_khz,
-			uint32_t stepping);
 
-	void (*get_clock)(struct dc *dc,
+	bool (*mmhubbub_warmup)(struct dc *dc,
+			unsigned int num_dwb,
+			struct dc_writeback_info *wb_info);
+
+	/* Clock Related */
+	enum dc_status (*set_clock)(struct dc *dc,
 			enum dc_clock_type clock_type,
+			uint32_t clk_khz, uint32_t stepping);
+	void (*get_clock)(struct dc *dc, enum dc_clock_type clock_type,
 			struct dc_clock_config *clock_cfg);
+	void (*optimize_pwr_state)(const struct dc *dc,
+			struct dc_state *context);
+	void (*exit_optimized_pwr_state)(const struct dc *dc,
+			struct dc_state *context);
+
+	/* Audio Related */
+	void (*enable_audio_stream)(struct pipe_ctx *pipe_ctx);
+	void (*disable_audio_stream)(struct pipe_ctx *pipe_ctx);
+
+	/* Stereo 3D Related */
+	void (*setup_stereo)(struct pipe_ctx *pipe_ctx, struct dc *dc);
+
+	/* HW State Logging Related */
+	void (*log_hw_state)(struct dc *dc, struct dc_log_buffer_ctx *log_ctx);
+	void (*get_hw_state)(struct dc *dc, char *pBuf,
+			unsigned int bufSize, unsigned int mask);
+	void (*clear_status_bits)(struct dc *dc, unsigned int mask);
+
 
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
new file mode 100644
index 000000000000..ecf566378ccd
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_HW_SEQUENCER_PRIVATE_H__
+#define __DC_HW_SEQUENCER_PRIVATE_H__
+
+#include "dc_types.h"
+
+enum pipe_gating_control {
+	PIPE_GATING_CONTROL_DISABLE = 0,
+	PIPE_GATING_CONTROL_ENABLE,
+	PIPE_GATING_CONTROL_INIT
+};
+
+struct dce_hwseq_wa {
+	bool blnd_crtc_trigger;
+	bool DEGVIDCN10_253;
+	bool false_optc_underflow;
+	bool DEGVIDCN10_254;
+	bool DEGVIDCN21;
+};
+
+struct hwseq_wa_state {
+	bool DEGVIDCN10_253_applied;
+};
+
+struct pipe_ctx;
+struct dc_state;
+struct dc_stream_status;
+struct dc_writeback_info;
+struct dchub_init_data;
+struct dc_static_screen_params;
+struct resource_pool;
+struct resource_context;
+struct stream_resource;
+struct dc_phy_addr_space_config;
+struct dc_virtual_addr_space_config;
+struct hubp;
+struct dpp;
+struct dce_hwseq;
+struct timing_generator;
+struct tg_color;
+struct output_pixel_processor;
+
+struct hwseq_private_funcs {
+
+	void (*disable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+	void (*enable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+	void (*init_pipes)(struct dc *dc, struct dc_state *context);
+	void (*reset_hw_ctx_wrap)(struct dc *dc, struct dc_state *context);
+	void (*update_plane_addr)(const struct dc *dc,
+			struct pipe_ctx *pipe_ctx);
+	void (*plane_atomic_disconnect)(struct dc *dc,
+			struct pipe_ctx *pipe_ctx);
+	void (*update_mpcc)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+	bool (*set_input_transfer_func)(struct dc *dc,
+				struct pipe_ctx *pipe_ctx,
+				const struct dc_plane_state *plane_state);
+	bool (*set_output_transfer_func)(struct dc *dc,
+				struct pipe_ctx *pipe_ctx,
+				const struct dc_stream_state *stream);
+	void (*power_down)(struct dc *dc);
+	void (*enable_display_pipe_clock_gating)(struct dc_context *ctx,
+					bool clock_gating);
+	bool (*enable_display_power_gating)(struct dc *dc,
+					uint8_t controller_id,
+					struct dc_bios *dcb,
+					enum pipe_gating_control power_gating);
+	void (*blank_pixel_data)(struct dc *dc,
+			struct pipe_ctx *pipe_ctx,
+			bool blank);
+	enum dc_status (*enable_stream_timing)(
+			struct pipe_ctx *pipe_ctx,
+			struct dc_state *context,
+			struct dc *dc);
+	void (*edp_backlight_control)(struct dc_link *link,
+			bool enable);
+	void (*setup_vupdate_interrupt)(struct dc *dc,
+			struct pipe_ctx *pipe_ctx);
+	bool (*did_underflow_occur)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+	void (*init_blank)(struct dc *dc, struct timing_generator *tg);
+	void (*disable_vga)(struct dce_hwseq *hws);
+	void (*bios_golden_init)(struct dc *dc);
+	void (*plane_atomic_power_down)(struct dc *dc,
+			struct dpp *dpp,
+			struct hubp *hubp);
+	void (*plane_atomic_disable)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+	void (*enable_power_gating_plane)(struct dce_hwseq *hws,
+		bool enable);
+	void (*dpp_pg_control)(struct dce_hwseq *hws,
+			unsigned int dpp_inst,
+			bool power_on);
+	void (*hubp_pg_control)(struct dce_hwseq *hws,
+			unsigned int hubp_inst,
+			bool power_on);
+	void (*dsc_pg_control)(struct dce_hwseq *hws,
+			unsigned int dsc_inst,
+			bool power_on);
+	void (*update_odm)(struct dc *dc, struct dc_state *context,
+			struct pipe_ctx *pipe_ctx);
+	void (*program_all_writeback_pipes_in_tree)(struct dc *dc,
+			const struct dc_stream_state *stream,
+			struct dc_state *context);
+	bool (*s0i3_golden_init_wa)(struct dc *dc);
+	void (*get_surface_visual_confirm_color)(
+			const struct pipe_ctx *pipe_ctx,
+			struct tg_color *color);
+	void (*get_hdr_visual_confirm_color)(struct pipe_ctx *pipe_ctx,
+			struct tg_color *color);
+	void (*set_hdr_multiplier)(struct pipe_ctx *pipe_ctx);
+	void (*verify_allow_pstate_change_high)(struct dc *dc);
+	void (*program_pipe)(struct dc *dc,
+			struct pipe_ctx *pipe_ctx,
+			struct dc_state *context);
+	bool (*wait_for_blank_complete)(struct output_pixel_processor *opp);
+	void (*dccg_init)(struct dce_hwseq *hws);
+	bool (*set_blend_lut)(struct pipe_ctx *pipe_ctx,
+			const struct dc_plane_state *plane_state);
+	bool (*set_shaper_3dlut)(struct pipe_ctx *pipe_ctx,
+			const struct dc_plane_state *plane_state);
+};
+
+struct dce_hwseq {
+	struct dc_context *ctx;
+	const struct dce_hwseq_registers *regs;
+	const struct dce_hwseq_shift *shifts;
+	const struct dce_hwseq_mask *masks;
+	struct dce_hwseq_wa wa;
+	struct hwseq_wa_state wa_state;
+	struct hwseq_private_funcs funcs;
+
+};
+
+#endif /* __DC_HW_SEQUENCER_PRIVATE_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h
index 4eff5d38a2f9..9af7ee5bc8ee 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h
@@ -60,11 +60,13 @@ void dp_disable_link_phy_mst(struct dc_link *link, enum signal_type signal);
 
 bool dp_set_hw_training_pattern(
 	struct dc_link *link,
-	enum dc_dp_training_pattern pattern);
+	enum dc_dp_training_pattern pattern,
+	uint32_t offset);
 
 void dp_set_hw_lane_settings(
 	struct dc_link *link,
-	const struct link_training_settings *link_settings);
+	const struct link_training_settings *link_settings,
+	uint32_t offset);
 
 void dp_set_hw_test_pattern(
 	struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
index 8503d9cc4763..2470405e996b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
@@ -458,7 +458,14 @@ uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr,
 #define IX_REG_READ(index_reg_name, data_reg_name, index) \
 		generic_read_indirect_reg(CTX, REG(index_reg_name), REG(data_reg_name), IND_REG(index))
 
+#define IX_REG_GET_N(index_reg_name, data_reg_name, index, n, ...) \
+		generic_indirect_reg_get(CTX, REG(index_reg_name), REG(data_reg_name), \
+				IND_REG(index), \
+				n, __VA_ARGS__)
 
+#define IX_REG_GET(index_reg_name, data_reg_name, index, field, val) \
+		IX_REG_GET_N(index_reg_name, data_reg_name, index, 1, \
+				FN(data_reg_name, field), val)
 
 #define IX_REG_UPDATE_N(index_reg_name, data_reg_name, index, n, ...)	\
 		generic_indirect_reg_update_ex(CTX, \
@@ -479,10 +486,35 @@ uint32_t generic_read_indirect_reg(const struct dc_context *ctx,
 		uint32_t addr_index, uint32_t addr_data,
 		uint32_t index);
 
+uint32_t generic_indirect_reg_get(const struct dc_context *ctx,
+		uint32_t addr_index, uint32_t addr_data,
+		uint32_t index, int n,
+		uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+		...);
+
 uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx,
 		uint32_t addr_index, uint32_t addr_data,
 		uint32_t index, uint32_t reg_val, int n,
 		uint8_t shift1, uint32_t mask1, uint32_t field_value1,
 		...);
 
+/* register offload macros
+ *
+ * instead of MMIO to register directly, in some cases we want
+ * to gather register sequence and execute the register sequence
+ * from another thread so we optimize time required for lengthy ops
+ */
+
+/* start gathering register sequence */
+#define REG_SEQ_START() \
+	reg_sequence_start_gather(CTX)
+
+/* start execution of register sequence gathered since REG_SEQ_START */
+#define REG_SEQ_SUBMIT() \
+	reg_sequence_start_execute(CTX)
+
+/* wait for the last REG_SEQ_SUBMIT to finish */
+#define REG_SEQ_WAIT_DONE() \
+	reg_sequence_wait_done(CTX)
+
 #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_REG_HELPER_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index bef224bf803e..5ae8ada154ef 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -46,12 +46,8 @@ struct resource_caps {
 	int num_pll;
 	int num_dwb;
 	int num_ddc;
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
 	int num_vmid;
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	int num_dsc;
-#endif
-#endif
 };
 
 struct resource_straps {
@@ -181,4 +177,6 @@ void update_audio_usage(
 
 unsigned int resource_pixel_format_to_bpp(enum surface_pixel_format format);
 
+void get_audio_check(struct audio_info *aud_modes,
+	struct audio_check *aud_chk);
 #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/irq/Makefile b/drivers/gpu/drm/amd/display/dc/irq/Makefile
index ea75420fc876..0f682ac53bb2 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/irq/Makefile
@@ -60,27 +60,23 @@ AMD_DISPLAY_FILES += $(AMD_DAL_IRQ_DCE12)
 ###############################################################################
 # DCN 1x
 ###############################################################################
-ifdef CONFIG_DRM_AMD_DC_DCN1_0
+ifdef CONFIG_DRM_AMD_DC_DCN
 IRQ_DCN1 = irq_service_dcn10.o
 
 AMD_DAL_IRQ_DCN1 = $(addprefix $(AMDDALPATH)/dc/irq/dcn10/,$(IRQ_DCN1))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_IRQ_DCN1)
-endif
 ###############################################################################
 # DCN 20
 ###############################################################################
-ifdef CONFIG_DRM_AMD_DC_DCN2_0
 IRQ_DCN2 = irq_service_dcn20.o
 
 AMD_DAL_IRQ_DCN2 = $(addprefix $(AMDDALPATH)/dc/irq/dcn20/,$(IRQ_DCN2))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_IRQ_DCN2)
-endif
 ###############################################################################
 # DCN 21
 ###############################################################################
-ifdef CONFIG_DRM_AMD_DC_DCN2_1
 IRQ_DCN21 = irq_service_dcn21.o
 
 AMD_DAL_IRQ_DCN21= $(addprefix $(AMDDALPATH)/dc/irq/dcn21/,$(IRQ_DCN21))
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
index 1a581c464345..378cc11aa047 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
@@ -204,7 +204,7 @@ bool dce110_vblank_set(struct irq_service *irq_service,
 		       bool enable)
 {
 	struct dc_context *dc_ctx = irq_service->ctx;
-	struct dc *core_dc = irq_service->ctx->dc;
+	struct dc *dc = irq_service->ctx->dc;
 	enum dc_irq_source dal_irq_src =
 			dc_interrupt_to_irq_source(irq_service->ctx->dc,
 						   info->src_id,
@@ -212,7 +212,7 @@ bool dce110_vblank_set(struct irq_service *irq_service,
 	uint8_t pipe_offset = dal_irq_src - IRQ_TYPE_VBLANK;
 
 	struct timing_generator *tg =
-			core_dc->current_state->res_ctx.pipe_ctx[pipe_offset].stream_res.tg;
+			dc->current_state->res_ctx.pipe_ctx[pipe_offset].stream_res.tg;
 
 	if (enable) {
 		if (!tg || !tg->funcs->arm_vert_intr(tg, 2)) {
@@ -403,7 +403,7 @@ static const struct irq_service_funcs irq_service_funcs_dce110 = {
 		.to_dal_irq_source = to_dal_irq_source_dce110
 };
 
-static void construct(struct irq_service *irq_service,
+static void dce110_irq_construct(struct irq_service *irq_service,
 		      struct irq_service_init_data *init_data)
 {
 	dal_irq_service_construct(irq_service, init_data);
@@ -421,6 +421,6 @@ dal_irq_service_dce110_create(struct irq_service_init_data *init_data)
 	if (!irq_service)
 		return NULL;
 
-	construct(irq_service, init_data);
+	dce110_irq_construct(irq_service, init_data);
 	return irq_service;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c
index 15380336cb51..2fe4703395f3 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c
@@ -273,7 +273,7 @@ static const struct irq_service_funcs irq_service_funcs_dce120 = {
 		.to_dal_irq_source = to_dal_irq_source_dce110
 };
 
-static void construct(
+static void dce120_irq_construct(
 	struct irq_service *irq_service,
 	struct irq_service_init_data *init_data)
 {
@@ -292,6 +292,6 @@ struct irq_service *dal_irq_service_dce120_create(
 	if (!irq_service)
 		return NULL;
 
-	construct(irq_service, init_data);
+	dce120_irq_construct(irq_service, init_data);
 	return irq_service;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c b/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c
index 281fee8ad1e5..17e426b80a00 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c
@@ -283,7 +283,7 @@ static const struct irq_service_funcs irq_service_funcs_dce80 = {
 		.to_dal_irq_source = to_dal_irq_source_dce110
 };
 
-static void construct(
+static void dce80_irq_construct(
 	struct irq_service *irq_service,
 	struct irq_service_init_data *init_data)
 {
@@ -302,7 +302,7 @@ struct irq_service *dal_irq_service_dce80_create(
 	if (!irq_service)
 		return NULL;
 
-	construct(irq_service, init_data);
+	dce80_irq_construct(irq_service, init_data);
 	return irq_service;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c
index cc8e7dedccce..f956b3bde680 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c
@@ -355,7 +355,7 @@ static const struct irq_service_funcs irq_service_funcs_dcn10 = {
 		.to_dal_irq_source = to_dal_irq_source_dcn10
 };
 
-static void construct(
+static void dcn10_irq_construct(
 	struct irq_service *irq_service,
 	struct irq_service_init_data *init_data)
 {
@@ -374,6 +374,6 @@ struct irq_service *dal_irq_service_dcn10_create(
 	if (!irq_service)
 		return NULL;
 
-	construct(irq_service, init_data);
+	dcn10_irq_construct(irq_service, init_data);
 	return irq_service;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
index 5db29bf582d3..2a1fea501f8c 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
@@ -359,7 +359,7 @@ static const struct irq_service_funcs irq_service_funcs_dcn20 = {
 		.to_dal_irq_source = to_dal_irq_source_dcn20
 };
 
-static void construct(
+static void dcn20_irq_construct(
 	struct irq_service *irq_service,
 	struct irq_service_init_data *init_data)
 {
@@ -378,6 +378,6 @@ struct irq_service *dal_irq_service_dcn20_create(
 	if (!irq_service)
 		return NULL;
 
-	construct(irq_service, init_data);
+	dcn20_irq_construct(irq_service, init_data);
 	return irq_service;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
index cbe7818529bb..1b971265418b 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
@@ -350,7 +350,7 @@ static const struct irq_service_funcs irq_service_funcs_dcn21 = {
 		.to_dal_irq_source = to_dal_irq_source_dcn21
 };
 
-static void construct(
+static void dcn21_irq_construct(
 	struct irq_service *irq_service,
 	struct irq_service_init_data *init_data)
 {
@@ -369,6 +369,6 @@ struct irq_service *dal_irq_service_dcn21_create(
 	if (!irq_service)
 		return NULL;
 
-	construct(irq_service, init_data);
+	dcn21_irq_construct(irq_service, init_data);
 	return irq_service;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
index 0878550a8178..33053b9fe6bd 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
@@ -38,7 +38,7 @@
 #include "dce120/irq_service_dce120.h"
 
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 #include "dcn10/irq_service_dcn10.h"
 #endif
 
diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h
index 30ec80ac6fc8..c34eba19860a 100644
--- a/drivers/gpu/drm/amd/display/dc/os_types.h
+++ b/drivers/gpu/drm/amd/display/dc/os_types.h
@@ -1,5 +1,6 @@
 /*
  * Copyright 2012-16 Advanced Micro Devices, Inc.
+ * Copyright 2019 Raptor Engineering, LLC
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -29,6 +30,7 @@
 #include <linux/kgdb.h>
 #include <linux/kref.h>
 #include <linux/types.h>
+#include <linux/slab.h>
 
 #include <asm/byteorder.h>
 
@@ -48,8 +50,39 @@
 
 #define dm_error(fmt, ...) DRM_ERROR(fmt, ##__VA_ARGS__)
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+#if defined(CONFIG_X86)
 #include <asm/fpu/api.h>
+#define DC_FP_START() kernel_fpu_begin()
+#define DC_FP_END() kernel_fpu_end()
+#elif defined(CONFIG_PPC64)
+#include <asm/switch_to.h>
+#include <asm/cputable.h>
+#define DC_FP_START() { \
+	if (cpu_has_feature(CPU_FTR_VSX_COMP)) { \
+		preempt_disable(); \
+		enable_kernel_vsx(); \
+	} else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) { \
+		preempt_disable(); \
+		enable_kernel_altivec(); \
+	} else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) { \
+		preempt_disable(); \
+		enable_kernel_fp(); \
+	} \
+}
+#define DC_FP_END() { \
+	if (cpu_has_feature(CPU_FTR_VSX_COMP)) { \
+		disable_kernel_vsx(); \
+		preempt_enable(); \
+	} else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) { \
+		disable_kernel_altivec(); \
+		preempt_enable(); \
+	} else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) { \
+		disable_kernel_fp(); \
+		preempt_enable(); \
+	} \
+}
+#endif
 #endif
 
 /*
diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c
index ff664bdb1482..b8040da94b9d 100644
--- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c
@@ -32,6 +32,7 @@ static void virtual_stream_encoder_dp_set_stream_attribute(
 	struct stream_encoder *enc,
 	struct dc_crtc_timing *crtc_timing,
 	enum dc_color_space output_color_space,
+	bool use_vsc_sdp_for_colorimetry,
 	uint32_t enable_sdp_splitting) {}
 
 static void virtual_stream_encoder_hdmi_set_stream_attribute(
@@ -81,22 +82,14 @@ static void virtual_stream_encoder_reset_hdmi_stream_attribute(
 		struct stream_encoder *enc)
 {}
 
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 static void virtual_enc_dp_set_odm_combine(
 	struct stream_encoder *enc,
 	bool odm_combine)
 {}
-#endif
-#endif
 
 static const struct stream_encoder_funcs virtual_str_enc_funcs = {
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	.dp_set_odm_combine =
 		virtual_enc_dp_set_odm_combine,
-#endif
-#endif
 	.dp_set_stream_attribute =
 		virtual_stream_encoder_dp_set_stream_attribute,
 	.hdmi_set_stream_attribute =
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
new file mode 100644
index 000000000000..cd9532b4f14d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_CMD_H_
+#define _DMUB_CMD_H_
+
+#include "dmub_types.h"
+#include "dmub_cmd_dal.h"
+#include "dmub_cmd_vbios.h"
+#include "atomfirmware.h"
+
+#define DMUB_RB_CMD_SIZE 64
+#define DMUB_RB_MAX_ENTRY 128
+#define DMUB_RB_SIZE (DMUB_RB_CMD_SIZE * DMUB_RB_MAX_ENTRY)
+#define REG_SET_MASK 0xFFFF
+
+
+/*
+ * Command IDs should be treated as stable ABI.
+ * Do not reuse or modify IDs.
+ */
+
+enum dmub_cmd_type {
+	DMUB_CMD__NULL = 0,
+	DMUB_CMD__REG_SEQ_READ_MODIFY_WRITE = 1,
+	DMUB_CMD__REG_SEQ_FIELD_UPDATE_SEQ = 2,
+	DMUB_CMD__REG_SEQ_BURST_WRITE = 3,
+	DMUB_CMD__REG_REG_WAIT = 4,
+	DMUB_CMD__PLAT_54186_WA = 5,
+	DMUB_CMD__PSR = 64,
+	DMUB_CMD__VBIOS = 128,
+};
+
+#pragma pack(push, 1)
+
+struct dmub_cmd_header {
+	unsigned int type : 8;
+	unsigned int sub_type : 8;
+	unsigned int reserved0 : 8;
+	unsigned int payload_bytes : 6;  /* up to 60 bytes */
+	unsigned int reserved1 : 2;
+};
+
+/*
+ * Read modify write
+ *
+ * 60 payload bytes can hold up to 5 sets of read modify writes,
+ * each take 3 dwords.
+ *
+ * number of sequences = header.payload_bytes / sizeof(struct dmub_cmd_read_modify_write_sequence)
+ *
+ * modify_mask = 0xffff'ffff means all fields are going to be updated.  in this case
+ * command parser will skip the read and we can use modify_mask = 0xffff'ffff as reg write
+ */
+struct dmub_cmd_read_modify_write_sequence {
+	uint32_t addr;
+	uint32_t modify_mask;
+	uint32_t modify_value;
+};
+
+#define DMUB_READ_MODIFY_WRITE_SEQ__MAX		5
+struct dmub_rb_cmd_read_modify_write {
+	struct dmub_cmd_header header;  // type = DMUB_CMD__REG_SEQ_READ_MODIFY_WRITE
+	struct dmub_cmd_read_modify_write_sequence seq[DMUB_READ_MODIFY_WRITE_SEQ__MAX];
+};
+
+/*
+ * Update a register with specified masks and values sequeunce
+ *
+ * 60 payload bytes can hold address + up to 7 sets of mask/value combo, each take 2 dword
+ *
+ * number of field update sequence = (header.payload_bytes - sizeof(addr)) / sizeof(struct read_modify_write_sequence)
+ *
+ *
+ * USE CASE:
+ *   1. auto-increment register where additional read would update pointer and produce wrong result
+ *   2. toggle a bit without read in the middle
+ */
+
+struct dmub_cmd_reg_field_update_sequence {
+	uint32_t modify_mask;  // 0xffff'ffff to skip initial read
+	uint32_t modify_value;
+};
+
+#define DMUB_REG_FIELD_UPDATE_SEQ__MAX		7
+
+struct dmub_rb_cmd_reg_field_update_sequence {
+	struct dmub_cmd_header header;
+	uint32_t addr;
+	struct dmub_cmd_reg_field_update_sequence seq[DMUB_REG_FIELD_UPDATE_SEQ__MAX];
+};
+
+
+/*
+ * Burst write
+ *
+ * support use case such as writing out LUTs.
+ *
+ * 60 payload bytes can hold up to 14 values to write to given address
+ *
+ * number of payload = header.payload_bytes / sizeof(struct read_modify_write_sequence)
+ */
+#define DMUB_BURST_WRITE_VALUES__MAX  14
+struct dmub_rb_cmd_burst_write {
+	struct dmub_cmd_header header;  // type = DMUB_CMD__REG_SEQ_BURST_WRITE
+	uint32_t addr;
+	uint32_t write_values[DMUB_BURST_WRITE_VALUES__MAX];
+};
+
+
+struct dmub_rb_cmd_common {
+	struct dmub_cmd_header header;
+	uint8_t cmd_buffer[DMUB_RB_CMD_SIZE - sizeof(struct dmub_cmd_header)];
+};
+
+struct dmub_cmd_reg_wait_data {
+	uint32_t addr;
+	uint32_t mask;
+	uint32_t condition_field_value;
+	uint32_t time_out_us;
+};
+
+struct dmub_rb_cmd_reg_wait {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_reg_wait_data reg_wait;
+};
+
+#ifndef PHYSICAL_ADDRESS_LOC
+#define PHYSICAL_ADDRESS_LOC union large_integer
+#endif
+
+struct dmub_cmd_PLAT_54186_wa {
+	uint32_t DCSURF_SURFACE_CONTROL;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C;
+	struct {
+		uint8_t hubp_inst : 4;
+		uint8_t tmz_surface : 1;
+		uint8_t immediate :1;
+		uint8_t vmid : 4;
+		uint8_t grph_stereo : 1;
+		uint32_t reserved : 21;
+	} flip_params;
+	uint32_t reserved[9];
+};
+
+struct dmub_rb_cmd_PLAT_54186_wa {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_PLAT_54186_wa flip;
+};
+
+struct dmub_cmd_digx_encoder_control_data {
+	union dig_encoder_control_parameters_v1_5 dig;
+};
+
+struct dmub_rb_cmd_digx_encoder_control {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_digx_encoder_control_data encoder_control;
+};
+
+struct dmub_cmd_set_pixel_clock_data {
+	struct set_pixel_clock_parameter_v1_7 clk;
+};
+
+struct dmub_rb_cmd_set_pixel_clock {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_set_pixel_clock_data pixel_clock;
+};
+
+struct dmub_cmd_enable_disp_power_gating_data {
+	struct enable_disp_power_gating_parameters_v2_1 pwr;
+};
+
+struct dmub_rb_cmd_enable_disp_power_gating {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_enable_disp_power_gating_data power_gating;
+};
+
+struct dmub_cmd_dig1_transmitter_control_data {
+	struct dig_transmitter_control_parameters_v1_6 dig;
+};
+
+struct dmub_rb_cmd_dig1_transmitter_control {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_dig1_transmitter_control_data transmitter_control;
+};
+
+struct dmub_rb_cmd_dpphy_init {
+	struct dmub_cmd_header header;
+	uint8_t reserved[60];
+};
+
+struct dmub_cmd_psr_copy_settings_data {
+	uint16_t psr_level;
+	uint8_t hubp_inst;
+	uint8_t dpp_inst;
+	uint8_t mpcc_inst;
+	uint8_t opp_inst;
+	uint8_t otg_inst;
+	uint8_t digfe_inst;
+	uint8_t digbe_inst;
+	uint8_t dpphy_inst;
+	uint8_t aux_inst;
+	uint8_t hyst_frames;
+	uint8_t hyst_lines;
+	uint8_t phy_num;
+	uint8_t phy_type;
+	uint8_t aux_repeat;
+	uint8_t smu_optimizations_en;
+	uint8_t skip_wait_for_pll_lock;
+	uint8_t frame_delay;
+	uint8_t smu_phy_id;
+	uint8_t num_of_controllers;
+	uint8_t link_rate;
+	uint8_t frame_cap_ind;
+};
+
+struct dmub_rb_cmd_psr_copy_settings {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_psr_copy_settings_data psr_copy_settings_data;
+};
+
+struct dmub_cmd_psr_set_level_data {
+	uint16_t psr_level;
+};
+
+struct dmub_rb_cmd_psr_set_level {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_psr_set_level_data psr_set_level_data;
+};
+
+struct dmub_rb_cmd_psr_enable {
+	struct dmub_cmd_header header;
+};
+
+struct dmub_cmd_psr_setup_data {
+	enum psr_version version; // PSR version 1 or 2
+};
+
+struct dmub_rb_cmd_psr_setup {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_psr_setup_data psr_setup_data;
+};
+
+union dmub_rb_cmd {
+	struct dmub_rb_cmd_read_modify_write read_modify_write;
+	struct dmub_rb_cmd_reg_field_update_sequence reg_field_update_seq;
+	struct dmub_rb_cmd_burst_write burst_write;
+	struct dmub_rb_cmd_reg_wait reg_wait;
+	struct dmub_rb_cmd_common cmd_common;
+	struct dmub_rb_cmd_digx_encoder_control digx_encoder_control;
+	struct dmub_rb_cmd_set_pixel_clock set_pixel_clock;
+	struct dmub_rb_cmd_enable_disp_power_gating enable_disp_power_gating;
+	struct dmub_rb_cmd_dpphy_init dpphy_init;
+	struct dmub_rb_cmd_dig1_transmitter_control dig1_transmitter_control;
+	struct dmub_rb_cmd_psr_enable psr_enable;
+	struct dmub_rb_cmd_psr_copy_settings psr_copy_settings;
+	struct dmub_rb_cmd_psr_set_level psr_set_level;
+	struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa;
+	struct dmub_rb_cmd_psr_setup psr_setup;
+};
+
+#pragma pack(pop)
+
+#endif /* _DMUB_CMD_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h
new file mode 100644
index 000000000000..7b69eb37f762
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_CMD_DAL_H_
+#define _DMUB_CMD_DAL_H_
+
+/*
+ * Command IDs should be treated as stable ABI.
+ * Do not reuse or modify IDs.
+ */
+
+enum dmub_cmd_psr_type {
+	DMUB_CMD__PSR_SETUP		= 0,
+	DMUB_CMD__PSR_COPY_SETTINGS	= 1,
+	DMUB_CMD__PSR_ENABLE		= 2,
+	DMUB_CMD__PSR_DISABLE		= 3,
+	DMUB_CMD__PSR_SET_LEVEL		= 4,
+};
+
+enum psr_version {
+	PSR_VERSION_1			= 0x10, // PSR Version 1
+	PSR_VERSION_2			= 0x20, // PSR Version 2, includes selective update
+	PSR_VERSION_2_Y_COORD		= 0x21, // PSR Version 2, includes Y-coordinate support for SU
+};
+
+#endif /* _DMUB_CMD_DAL_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_vbios.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_vbios.h
new file mode 100644
index 000000000000..b6deb8e2590f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_vbios.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_CMD_VBIOS_H_
+#define _DMUB_CMD_VBIOS_H_
+
+/*
+ * Command IDs should be treated as stable ABI.
+ * Do not reuse or modify IDs.
+ */
+
+enum dmub_cmd_vbios_type {
+	DMUB_CMD__VBIOS_DIGX_ENCODER_CONTROL = 0,
+	DMUB_CMD__VBIOS_DIG1_TRANSMITTER_CONTROL = 1,
+	DMUB_CMD__VBIOS_SET_PIXEL_CLOCK = 2,
+	DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING = 3,
+};
+
+#endif /* _DMUB_CMD_VBIOS_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_fw_meta.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_fw_meta.h
new file mode 100644
index 000000000000..242ec257998c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_fw_meta.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#ifndef _DMUB_META_H_
+#define _DMUB_META_H_
+
+#include "dmub_types.h"
+
+#pragma pack(push, 1)
+
+/* Magic value for identifying dmub_fw_meta_info */
+#define DMUB_FW_META_MAGIC 0x444D5542
+
+/* Offset from the end of the file to the dmub_fw_meta_info */
+#define DMUB_FW_META_OFFSET 0x24
+
+/**
+ * struct dmub_fw_meta_info - metadata associated with fw binary
+ *
+ * NOTE: This should be considered a stable API. Fields should
+ *       not be repurposed or reordered. New fields should be
+ *       added instead to extend the structure.
+ *
+ * @magic_value: magic value identifying DMUB firmware meta info
+ * @fw_region_size: size of the firmware state region
+ * @trace_buffer_size: size of the tracebuffer region
+ */
+struct dmub_fw_meta_info {
+	uint32_t magic_value;
+	uint32_t fw_region_size;
+	uint32_t trace_buffer_size;
+};
+
+/* Ensure that the structure remains 64 bytes. */
+union dmub_fw_meta {
+	struct dmub_fw_meta_info info;
+	uint8_t reserved[64];
+};
+
+#pragma pack(pop)
+
+#endif /* _DMUB_META_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h
new file mode 100644
index 000000000000..df875fdd2ab0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_RB_H_
+#define _DMUB_RB_H_
+
+#include "dmub_types.h"
+#include "dmub_cmd.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+struct dmub_cmd_header;
+
+struct dmub_rb_init_params {
+	void *ctx;
+	void *base_address;
+	uint32_t capacity;
+};
+
+struct dmub_rb {
+	void *base_address;
+	uint32_t data_count;
+	uint32_t rptr;
+	uint32_t wrpt;
+	uint32_t capacity;
+
+	void *ctx;
+	void *dmub;
+};
+
+
+static inline bool dmub_rb_empty(struct dmub_rb *rb)
+{
+	return (rb->wrpt == rb->rptr);
+}
+
+static inline bool dmub_rb_full(struct dmub_rb *rb)
+{
+	uint32_t data_count;
+
+	if (rb->wrpt >= rb->rptr)
+		data_count = rb->wrpt - rb->rptr;
+	else
+		data_count = rb->capacity - (rb->rptr - rb->wrpt);
+
+	return (data_count == (rb->capacity - DMUB_RB_CMD_SIZE));
+}
+
+static inline bool dmub_rb_push_front(struct dmub_rb *rb,
+				      const struct dmub_cmd_header *cmd)
+{
+	uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t);
+	const uint64_t *src = (const uint64_t *)cmd;
+	int i;
+
+	if (dmub_rb_full(rb))
+		return false;
+
+	// copying data
+	for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
+		*dst++ = *src++;
+
+	rb->wrpt += DMUB_RB_CMD_SIZE;
+
+	if (rb->wrpt >= rb->capacity)
+		rb->wrpt %= rb->capacity;
+
+	return true;
+}
+
+static inline bool dmub_rb_front(struct dmub_rb *rb,
+				 struct dmub_cmd_header *cmd)
+{
+	uint8_t *rd_ptr = (uint8_t *)rb->base_address + rb->rptr;
+
+	if (dmub_rb_empty(rb))
+		return false;
+
+	dmub_memcpy(cmd, rd_ptr, DMUB_RB_CMD_SIZE);
+
+	return true;
+}
+
+static inline bool dmub_rb_pop_front(struct dmub_rb *rb)
+{
+	if (dmub_rb_empty(rb))
+		return false;
+
+	rb->rptr += DMUB_RB_CMD_SIZE;
+
+	if (rb->rptr >= rb->capacity)
+		rb->rptr %= rb->capacity;
+
+	return true;
+}
+
+static inline void dmub_rb_flush_pending(const struct dmub_rb *rb)
+{
+	uint32_t rptr = rb->rptr;
+	uint32_t wptr = rb->wrpt;
+
+	while (rptr != wptr) {
+		uint64_t volatile *data = (uint64_t volatile *)rb->base_address + rptr / sizeof(uint64_t);
+		//uint64_t volatile *p = (uint64_t volatile *)data;
+		uint64_t temp;
+		int i;
+
+		for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
+			temp = *data++;
+
+		rptr += DMUB_RB_CMD_SIZE;
+		if (rptr >= rb->capacity)
+			rptr %= rb->capacity;
+	}
+}
+
+static inline void dmub_rb_init(struct dmub_rb *rb,
+				struct dmub_rb_init_params *init_params)
+{
+	rb->base_address = init_params->base_address;
+	rb->capacity = init_params->capacity;
+	rb->rptr = 0;
+	rb->wrpt = 0;
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _DMUB_RB_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h
new file mode 100644
index 000000000000..8e23a7017588
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h
@@ -0,0 +1,506 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_SRV_H_
+#define _DMUB_SRV_H_
+
+/**
+ * DOC: DMUB interface and operation
+ *
+ * DMUB is the interface to the display DMCUB microcontroller on DCN hardware.
+ * It delegates hardware initialization and command submission to the
+ * microcontroller. DMUB is the shortname for DMCUB.
+ *
+ * This interface is not thread-safe. Ensure that all access to the interface
+ * is properly synchronized by the caller.
+ *
+ * Initialization and usage of the DMUB service should be done in the
+ * steps given below:
+ *
+ * 1. dmub_srv_create()
+ * 2. dmub_srv_has_hw_support()
+ * 3. dmub_srv_calc_region_info()
+ * 4. dmub_srv_hw_init()
+ *
+ * The call to dmub_srv_create() is required to use the server.
+ *
+ * The calls to dmub_srv_has_hw_support() and dmub_srv_calc_region_info()
+ * are helpers to query cache window size and allocate framebuffer(s)
+ * for the cache windows.
+ *
+ * The call to dmub_srv_hw_init() programs the DMCUB registers to prepare
+ * for command submission. Commands can be queued via dmub_srv_cmd_queue()
+ * and executed via dmub_srv_cmd_execute().
+ *
+ * If the queue is full the dmub_srv_wait_for_idle() call can be used to
+ * wait until the queue has been cleared.
+ *
+ * Destroying the DMUB service can be done by calling dmub_srv_destroy().
+ * This does not clear DMUB hardware state, only software state.
+ *
+ * The interface is intended to be standalone and should not depend on any
+ * other component within DAL.
+ */
+
+#include "dmub_types.h"
+#include "dmub_cmd.h"
+#include "dmub_rb.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Forward declarations */
+struct dmub_srv;
+struct dmub_cmd_header;
+struct dmub_srv_common_regs;
+
+/* enum dmub_status - return code for dmcub functions */
+enum dmub_status {
+	DMUB_STATUS_OK = 0,
+	DMUB_STATUS_NO_CTX,
+	DMUB_STATUS_QUEUE_FULL,
+	DMUB_STATUS_TIMEOUT,
+	DMUB_STATUS_INVALID,
+};
+
+/* enum dmub_asic - dmub asic identifier */
+enum dmub_asic {
+	DMUB_ASIC_NONE = 0,
+	DMUB_ASIC_DCN20,
+	DMUB_ASIC_DCN21,
+	DMUB_ASIC_MAX,
+};
+
+/* enum dmub_window_id - dmub window identifier */
+enum dmub_window_id {
+	DMUB_WINDOW_0_INST_CONST = 0,
+	DMUB_WINDOW_1_STACK,
+	DMUB_WINDOW_2_BSS_DATA,
+	DMUB_WINDOW_3_VBIOS,
+	DMUB_WINDOW_4_MAILBOX,
+	DMUB_WINDOW_5_TRACEBUFF,
+	DMUB_WINDOW_6_FW_STATE,
+	DMUB_WINDOW_7_RESERVED,
+	DMUB_WINDOW_TOTAL,
+};
+
+/**
+ * struct dmub_region - dmub hw memory region
+ * @base: base address for region, must be 256 byte aligned
+ * @top: top address for region
+ */
+struct dmub_region {
+	uint32_t base;
+	uint32_t top;
+};
+
+/**
+ * struct dmub_window - dmub hw cache window
+ * @off: offset to the fb memory in gpu address space
+ * @r: region in uc address space for cache window
+ */
+struct dmub_window {
+	union dmub_addr offset;
+	struct dmub_region region;
+};
+
+/**
+ * struct dmub_fb - defines a dmub framebuffer memory region
+ * @cpu_addr: cpu virtual address for the region, NULL if invalid
+ * @gpu_addr: gpu virtual address for the region, NULL if invalid
+ * @size: size of the region in bytes, zero if invalid
+ */
+struct dmub_fb {
+	void *cpu_addr;
+	uint64_t gpu_addr;
+	uint32_t size;
+};
+
+/**
+ * struct dmub_srv_region_params - params used for calculating dmub regions
+ * @inst_const_size: size of the fw inst const section
+ * @bss_data_size: size of the fw bss data section
+ * @vbios_size: size of the vbios data
+ * @fw_bss_data: raw firmware bss data section
+ */
+struct dmub_srv_region_params {
+	uint32_t inst_const_size;
+	uint32_t bss_data_size;
+	uint32_t vbios_size;
+	const uint8_t *fw_bss_data;
+};
+
+/**
+ * struct dmub_srv_region_info - output region info from the dmub service
+ * @fb_size: required minimum fb size for all regions, aligned to 4096 bytes
+ * @num_regions: number of regions used by the dmub service
+ * @regions: region info
+ *
+ * The regions are aligned such that they can be all placed within the
+ * same framebuffer but they can also be placed into different framebuffers.
+ *
+ * The size of each region can be calculated by the caller:
+ * size = reg.top - reg.base
+ *
+ * Care must be taken when performing custom allocations to ensure that each
+ * region base address is 256 byte aligned.
+ */
+struct dmub_srv_region_info {
+	uint32_t fb_size;
+	uint8_t num_regions;
+	struct dmub_region regions[DMUB_WINDOW_TOTAL];
+};
+
+/**
+ * struct dmub_srv_fb_params - parameters used for driver fb setup
+ * @region_info: region info calculated by dmub service
+ * @cpu_addr: base cpu address for the framebuffer
+ * @gpu_addr: base gpu virtual address for the framebuffer
+ */
+struct dmub_srv_fb_params {
+	const struct dmub_srv_region_info *region_info;
+	void *cpu_addr;
+	uint64_t gpu_addr;
+};
+
+/**
+ * struct dmub_srv_fb_info - output fb info from the dmub service
+ * @num_fbs: number of required dmub framebuffers
+ * @fbs: fb data for each region
+ *
+ * Output from the dmub service helper that can be used by the
+ * driver to prepare dmub_fb that can be passed into the dmub
+ * hw init service.
+ *
+ * Assumes that all regions are within the same framebuffer
+ * and have been setup according to the region_info generated
+ * by the dmub service.
+ */
+struct dmub_srv_fb_info {
+	uint8_t num_fb;
+	struct dmub_fb fb[DMUB_WINDOW_TOTAL];
+};
+
+/**
+ * struct dmub_srv_base_funcs - Driver specific base callbacks
+ */
+struct dmub_srv_base_funcs {
+	/**
+	 * @reg_read:
+	 *
+	 * Hook for reading a register.
+	 *
+	 * Return: The 32-bit register value from the given address.
+	 */
+	uint32_t (*reg_read)(void *ctx, uint32_t address);
+
+	/**
+	 * @reg_write:
+	 *
+	 * Hook for writing a value to the register specified by address.
+	 */
+	void (*reg_write)(void *ctx, uint32_t address, uint32_t value);
+};
+
+/**
+ * struct dmub_srv_hw_funcs - hardware sequencer funcs for dmub
+ */
+struct dmub_srv_hw_funcs {
+	/* private: internal use only */
+
+	void (*reset)(struct dmub_srv *dmub);
+
+	void (*reset_release)(struct dmub_srv *dmub);
+
+	void (*backdoor_load)(struct dmub_srv *dmub,
+			      const struct dmub_window *cw0,
+			      const struct dmub_window *cw1);
+
+	void (*setup_windows)(struct dmub_srv *dmub,
+			      const struct dmub_window *cw2,
+			      const struct dmub_window *cw3,
+			      const struct dmub_window *cw4,
+			      const struct dmub_window *cw5,
+			      const struct dmub_window *cw6);
+
+	void (*setup_mailbox)(struct dmub_srv *dmub,
+			      const struct dmub_region *inbox1);
+
+	uint32_t (*get_inbox1_rptr)(struct dmub_srv *dmub);
+
+	void (*set_inbox1_wptr)(struct dmub_srv *dmub, uint32_t wptr_offset);
+
+	bool (*is_supported)(struct dmub_srv *dmub);
+
+	bool (*is_hw_init)(struct dmub_srv *dmub);
+
+	bool (*is_phy_init)(struct dmub_srv *dmub);
+
+	bool (*is_auto_load_done)(struct dmub_srv *dmub);
+};
+
+/**
+ * struct dmub_srv_create_params - params for dmub service creation
+ * @base_funcs: driver supplied base routines
+ * @hw_funcs: optional overrides for hw funcs
+ * @user_ctx: context data for callback funcs
+ * @asic: driver supplied asic
+ * @is_virtual: false for hw support only
+ */
+struct dmub_srv_create_params {
+	struct dmub_srv_base_funcs funcs;
+	struct dmub_srv_hw_funcs *hw_funcs;
+	void *user_ctx;
+	enum dmub_asic asic;
+	bool is_virtual;
+};
+
+/*
+ * struct dmub_srv_hw_params - params for dmub hardware initialization
+ * @fb: framebuffer info for each region
+ * @fb_base: base of the framebuffer aperture
+ * @fb_offset: offset of the framebuffer aperture
+ * @psp_version: psp version to pass for DMCU init
+ * @load_inst_const: true if DMUB should load inst const fw
+ */
+struct dmub_srv_hw_params {
+	struct dmub_fb *fb[DMUB_WINDOW_TOTAL];
+	uint64_t fb_base;
+	uint64_t fb_offset;
+	uint32_t psp_version;
+	bool load_inst_const;
+};
+
+/**
+ * struct dmub_srv - software state for dmcub
+ * @asic: dmub asic identifier
+ * @user_ctx: user provided context for the dmub_srv
+ * @is_virtual: false if hardware support only
+ * @fw_state: dmub firmware state pointer
+ */
+struct dmub_srv {
+	enum dmub_asic asic;
+	void *user_ctx;
+	bool is_virtual;
+	volatile const struct dmub_fw_state *fw_state;
+
+	/* private: internal use only */
+	const struct dmub_srv_common_regs *regs;
+
+	struct dmub_srv_base_funcs funcs;
+	struct dmub_srv_hw_funcs hw_funcs;
+	struct dmub_rb inbox1_rb;
+
+	bool sw_init;
+	bool hw_init;
+
+	uint64_t fb_base;
+	uint64_t fb_offset;
+	uint32_t psp_version;
+};
+
+/**
+ * dmub_srv_create() - creates the DMUB service.
+ * @dmub: the dmub service
+ * @params: creation parameters for the service
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_create(struct dmub_srv *dmub,
+				 const struct dmub_srv_create_params *params);
+
+/**
+ * dmub_srv_destroy() - destroys the DMUB service.
+ * @dmub: the dmub service
+ */
+void dmub_srv_destroy(struct dmub_srv *dmub);
+
+/**
+ * dmub_srv_calc_region_info() - retreives region info from the dmub service
+ * @dmub: the dmub service
+ * @params: parameters used to calculate region locations
+ * @info_out: the output region info from dmub
+ *
+ * Calculates the base and top address for all relevant dmub regions
+ * using the parameters given (if any).
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status
+dmub_srv_calc_region_info(struct dmub_srv *dmub,
+			  const struct dmub_srv_region_params *params,
+			  struct dmub_srv_region_info *out);
+
+/**
+ * dmub_srv_calc_region_info() - retreives fb info from the dmub service
+ * @dmub: the dmub service
+ * @params: parameters used to calculate fb locations
+ * @info_out: the output fb info from dmub
+ *
+ * Calculates the base and top address for all relevant dmub regions
+ * using the parameters given (if any).
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
+				       const struct dmub_srv_fb_params *params,
+				       struct dmub_srv_fb_info *out);
+
+/**
+ * dmub_srv_has_hw_support() - returns hw support state for dmcub
+ * @dmub: the dmub service
+ * @is_supported: hw support state
+ *
+ * Queries the hardware for DMCUB support and returns the result.
+ *
+ * Can be called before dmub_srv_hw_init().
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_has_hw_support(struct dmub_srv *dmub,
+					 bool *is_supported);
+
+/**
+ * dmub_srv_is_hw_init() - returns hardware init state
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_is_hw_init(struct dmub_srv *dmub, bool *is_hw_init);
+
+/**
+ * dmub_srv_hw_init() - initializes the underlying DMUB hardware
+ * @dmub: the dmub service
+ * @params: params for hardware initialization
+ *
+ * Resets the DMUB hardware and performs backdoor loading of the
+ * required cache regions based on the input framebuffer regions.
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_NO_CTX - dmcub context not initialized
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
+				  const struct dmub_srv_hw_params *params);
+
+/**
+ * dmub_srv_cmd_queue() - queues a command to the DMUB
+ * @dmub: the dmub service
+ * @cmd: the command to queue
+ *
+ * Queues a command to the DMUB service but does not begin execution
+ * immediately.
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_QUEUE_FULL - no remaining room in queue
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
+				    const struct dmub_cmd_header *cmd);
+
+/**
+ * dmub_srv_cmd_execute() - Executes a queued sequence to the dmub
+ * @dmub: the dmub service
+ *
+ * Begins execution of queued commands on the dmub.
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub);
+
+/**
+ * dmub_srv_wait_for_auto_load() - Waits for firmware auto load to complete
+ * @dmub: the dmub service
+ * @timeout_us: the maximum number of microseconds to wait
+ *
+ * Waits until firmware has been autoloaded by the DMCUB. The maximum
+ * wait time is given in microseconds to prevent spinning forever.
+ *
+ * On ASICs without firmware autoload support this function will return
+ * immediately.
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_TIMEOUT - wait for phy init timed out
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub,
+					     uint32_t timeout_us);
+
+/**
+ * dmub_srv_wait_for_phy_init() - Waits for DMUB PHY init to complete
+ * @dmub: the dmub service
+ * @timeout_us: the maximum number of microseconds to wait
+ *
+ * Waits until the PHY has been initialized by the DMUB. The maximum
+ * wait time is given in microseconds to prevent spinning forever.
+ *
+ * On ASICs without PHY init support this function will return
+ * immediately.
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_TIMEOUT - wait for phy init timed out
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_wait_for_phy_init(struct dmub_srv *dmub,
+					    uint32_t timeout_us);
+
+/**
+ * dmub_srv_wait_for_idle() - Waits for the DMUB to be idle
+ * @dmub: the dmub service
+ * @timeout_us: the maximum number of microseconds to wait
+ *
+ * Waits until the DMUB buffer is empty and all commands have
+ * finished processing. The maximum wait time is given in
+ * microseconds to prevent spinning forever.
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_wait_for_idle(struct dmub_srv *dmub,
+					uint32_t timeout_us);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _DMUB_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_trace_buffer.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_trace_buffer.h
new file mode 100644
index 000000000000..6b3ee42db350
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_trace_buffer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#ifndef _DMUB_TRACE_BUFFER_H_
+#define _DMUB_TRACE_BUFFER_H_
+
+#include "dmub_types.h"
+
+#define LOAD_DMCU_FW	1
+#define LOAD_PHY_FW	2
+
+
+enum dmucb_trace_code {
+	DMCUB__UNKNOWN,
+	DMCUB__MAIN_BEGIN,
+	DMCUB__PHY_INIT_BEGIN,
+	DMCUB__PHY_FW_SRAM_LOAD_BEGIN,
+	DMCUB__PHY_FW_SRAM_LOAD_END,
+	DMCUB__PHY_INIT_POLL_DONE,
+	DMCUB__PHY_INIT_END,
+	DMCUB__DMCU_ERAM_LOAD_BEGIN,
+	DMCUB__DMCU_ERAM_LOAD_END,
+	DMCUB__DMCU_ISR_LOAD_BEGIN,
+	DMCUB__DMCU_ISR_LOAD_END,
+	DMCUB__MAIN_IDLE,
+	DMCUB__PERF_TRACE,
+	DMCUB__PG_DONE,
+};
+
+struct dmcub_trace_buf_entry {
+	enum dmucb_trace_code trace_code;
+	uint32_t tick_count;
+	uint32_t param0;
+	uint32_t param1;
+};
+
+#define TRACE_BUF_SIZE (1024) //1 kB
+#define PERF_TRACE_MAX_ENTRY ((TRACE_BUF_SIZE - 8)/sizeof(struct dmcub_trace_buf_entry))
+
+
+struct dmcub_trace_buf {
+	uint32_t entry_count;
+	uint32_t clk_freq;
+	struct dmcub_trace_buf_entry entries[PERF_TRACE_MAX_ENTRY];
+};
+
+
+#endif /* _DMUB_TRACE_BUFFER_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h
new file mode 100644
index 000000000000..41d524b0db2f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_TYPES_H_
+#define _DMUB_TYPES_H_
+
+/* Basic type definitions. */
+#include <asm/byteorder.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <stdarg.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#ifndef dmub_memcpy
+#define dmub_memcpy(dest, source, bytes) memcpy((dest), (source), (bytes))
+#endif
+
+#ifndef dmub_memset
+#define dmub_memset(dest, val, bytes) memset((dest), (val), (bytes))
+#endif
+
+#ifndef dmub_udelay
+#define dmub_udelay(microseconds) udelay(microseconds)
+#endif
+
+union dmub_addr {
+	struct {
+		uint32_t low_part;
+		uint32_t high_part;
+	} u;
+	uint64_t quad_part;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _DMUB_TYPES_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/Makefile b/drivers/gpu/drm/amd/display/dmub/src/Makefile
new file mode 100644
index 000000000000..e08dfeea24b0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/Makefile
@@ -0,0 +1,27 @@
+#
+# Copyright 2019 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+
+DMUB = dmub_srv.o dmub_reg.o dmub_dcn20.o dmub_dcn21.o
+
+AMD_DAL_DMUB = $(addprefix $(AMDDALPATH)/dmub/src/,$(DMUB))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DMUB)
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
new file mode 100644
index 000000000000..cd51c6138894
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "../inc/dmub_srv.h"
+#include "dmub_reg.h"
+#include "dmub_dcn20.h"
+
+#include "dcn/dcn_2_0_0_offset.h"
+#include "dcn/dcn_2_0_0_sh_mask.h"
+#include "soc15_hw_ip.h"
+#include "vega10_ip_offset.h"
+
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG##seg
+#define CTX dmub
+#define REGS dmub->regs
+
+/* Registers. */
+
+const struct dmub_srv_common_regs dmub_srv_dcn20_regs = {
+#define DMUB_SR(reg) REG_OFFSET(reg),
+	{ DMUB_COMMON_REGS() },
+#undef DMUB_SR
+
+#define DMUB_SF(reg, field) FD_MASK(reg, field),
+	{ DMUB_COMMON_FIELDS() },
+#undef DMUB_SF
+
+#define DMUB_SF(reg, field) FD_SHIFT(reg, field),
+	{ DMUB_COMMON_FIELDS() },
+#undef DMUB_SF
+};
+
+/* Shared functions. */
+
+static inline void dmub_dcn20_translate_addr(const union dmub_addr *addr_in,
+					     uint64_t fb_base,
+					     uint64_t fb_offset,
+					     union dmub_addr *addr_out)
+{
+	addr_out->quad_part = addr_in->quad_part - fb_base + fb_offset;
+}
+
+void dmub_dcn20_reset(struct dmub_srv *dmub)
+{
+	REG_UPDATE(DMCUB_CNTL, DMCUB_SOFT_RESET, 1);
+	REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0);
+	REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1);
+}
+
+void dmub_dcn20_reset_release(struct dmub_srv *dmub)
+{
+	REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 0);
+	REG_WRITE(DMCUB_SCRATCH15, dmub->psp_version & 0x001100FF);
+	REG_UPDATE_2(DMCUB_CNTL, DMCUB_ENABLE, 1, DMCUB_TRACEPORT_EN, 1);
+	REG_UPDATE(DMCUB_CNTL, DMCUB_SOFT_RESET, 0);
+}
+
+void dmub_dcn20_backdoor_load(struct dmub_srv *dmub,
+			      const struct dmub_window *cw0,
+			      const struct dmub_window *cw1)
+{
+	union dmub_addr offset;
+	uint64_t fb_base = dmub->fb_base, fb_offset = dmub->fb_offset;
+
+	REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1);
+	REG_UPDATE_2(DMCUB_MEM_CNTL, DMCUB_MEM_READ_SPACE, 0x3,
+		     DMCUB_MEM_WRITE_SPACE, 0x3);
+
+	dmub_dcn20_translate_addr(&cw0->offset, fb_base, fb_offset, &offset);
+
+	REG_WRITE(DMCUB_REGION3_CW0_OFFSET, offset.u.low_part);
+	REG_WRITE(DMCUB_REGION3_CW0_OFFSET_HIGH, offset.u.high_part);
+	REG_WRITE(DMCUB_REGION3_CW0_BASE_ADDRESS, cw0->region.base);
+	REG_SET_2(DMCUB_REGION3_CW0_TOP_ADDRESS, 0,
+		  DMCUB_REGION3_CW0_TOP_ADDRESS, cw0->region.top,
+		  DMCUB_REGION3_CW0_ENABLE, 1);
+
+	dmub_dcn20_translate_addr(&cw1->offset, fb_base, fb_offset, &offset);
+
+	REG_WRITE(DMCUB_REGION3_CW1_OFFSET, offset.u.low_part);
+	REG_WRITE(DMCUB_REGION3_CW1_OFFSET_HIGH, offset.u.high_part);
+	REG_WRITE(DMCUB_REGION3_CW1_BASE_ADDRESS, cw1->region.base);
+	REG_SET_2(DMCUB_REGION3_CW1_TOP_ADDRESS, 0,
+		  DMCUB_REGION3_CW1_TOP_ADDRESS, cw1->region.top,
+		  DMCUB_REGION3_CW1_ENABLE, 1);
+
+	REG_UPDATE_2(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 0, DMCUB_MEM_UNIT_ID,
+		     0x20);
+}
+
+void dmub_dcn20_setup_windows(struct dmub_srv *dmub,
+			      const struct dmub_window *cw2,
+			      const struct dmub_window *cw3,
+			      const struct dmub_window *cw4,
+			      const struct dmub_window *cw5,
+			      const struct dmub_window *cw6)
+{
+	union dmub_addr offset;
+	uint64_t fb_base = dmub->fb_base, fb_offset = dmub->fb_offset;
+
+	dmub_dcn20_translate_addr(&cw2->offset, fb_base, fb_offset, &offset);
+
+	REG_WRITE(DMCUB_REGION3_CW2_OFFSET, offset.u.low_part);
+	REG_WRITE(DMCUB_REGION3_CW2_OFFSET_HIGH, offset.u.high_part);
+	REG_WRITE(DMCUB_REGION3_CW2_BASE_ADDRESS, cw2->region.base);
+	REG_SET_2(DMCUB_REGION3_CW2_TOP_ADDRESS, 0,
+		  DMCUB_REGION3_CW2_TOP_ADDRESS, cw2->region.top,
+		  DMCUB_REGION3_CW2_ENABLE, 1);
+
+	dmub_dcn20_translate_addr(&cw3->offset, fb_base, fb_offset, &offset);
+
+	REG_WRITE(DMCUB_REGION3_CW3_OFFSET, offset.u.low_part);
+	REG_WRITE(DMCUB_REGION3_CW3_OFFSET_HIGH, offset.u.high_part);
+	REG_WRITE(DMCUB_REGION3_CW3_BASE_ADDRESS, cw3->region.base);
+	REG_SET_2(DMCUB_REGION3_CW3_TOP_ADDRESS, 0,
+		  DMCUB_REGION3_CW3_TOP_ADDRESS, cw3->region.top,
+		  DMCUB_REGION3_CW3_ENABLE, 1);
+
+	/* TODO: Move this to CW4. */
+	dmub_dcn20_translate_addr(&cw4->offset, fb_base, fb_offset, &offset);
+
+	REG_WRITE(DMCUB_REGION4_OFFSET, offset.u.low_part);
+	REG_WRITE(DMCUB_REGION4_OFFSET_HIGH, offset.u.high_part);
+	REG_SET_2(DMCUB_REGION4_TOP_ADDRESS, 0, DMCUB_REGION4_TOP_ADDRESS,
+		  cw4->region.top - cw4->region.base - 1, DMCUB_REGION4_ENABLE,
+		  1);
+
+	dmub_dcn20_translate_addr(&cw5->offset, fb_base, fb_offset, &offset);
+
+	REG_WRITE(DMCUB_REGION3_CW5_OFFSET, offset.u.low_part);
+	REG_WRITE(DMCUB_REGION3_CW5_OFFSET_HIGH, offset.u.high_part);
+	REG_WRITE(DMCUB_REGION3_CW5_BASE_ADDRESS, cw5->region.base);
+	REG_SET_2(DMCUB_REGION3_CW5_TOP_ADDRESS, 0,
+		  DMCUB_REGION3_CW5_TOP_ADDRESS, cw5->region.top,
+		  DMCUB_REGION3_CW5_ENABLE, 1);
+
+	dmub_dcn20_translate_addr(&cw6->offset, fb_base, fb_offset, &offset);
+
+	REG_WRITE(DMCUB_REGION3_CW6_OFFSET, offset.u.low_part);
+	REG_WRITE(DMCUB_REGION3_CW6_OFFSET_HIGH, offset.u.high_part);
+	REG_WRITE(DMCUB_REGION3_CW6_BASE_ADDRESS, cw6->region.base);
+	REG_SET_2(DMCUB_REGION3_CW6_TOP_ADDRESS, 0,
+		  DMCUB_REGION3_CW6_TOP_ADDRESS, cw6->region.top,
+		  DMCUB_REGION3_CW6_ENABLE, 1);
+}
+
+void dmub_dcn20_setup_mailbox(struct dmub_srv *dmub,
+			      const struct dmub_region *inbox1)
+{
+	/* TODO: Use CW4 instead of region 4. */
+
+	REG_WRITE(DMCUB_INBOX1_BASE_ADDRESS, 0x80000000);
+	REG_WRITE(DMCUB_INBOX1_SIZE, inbox1->top - inbox1->base);
+	REG_WRITE(DMCUB_INBOX1_RPTR, 0);
+	REG_WRITE(DMCUB_INBOX1_WPTR, 0);
+}
+
+uint32_t dmub_dcn20_get_inbox1_rptr(struct dmub_srv *dmub)
+{
+	return REG_READ(DMCUB_INBOX1_RPTR);
+}
+
+void dmub_dcn20_set_inbox1_wptr(struct dmub_srv *dmub, uint32_t wptr_offset)
+{
+	REG_WRITE(DMCUB_INBOX1_WPTR, wptr_offset);
+}
+
+bool dmub_dcn20_is_hw_init(struct dmub_srv *dmub)
+{
+	return REG_READ(DMCUB_REGION3_CW2_BASE_ADDRESS) != 0;
+}
+
+bool dmub_dcn20_is_supported(struct dmub_srv *dmub)
+{
+	uint32_t supported = 0;
+
+	REG_GET(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE, &supported);
+
+	return supported;
+}
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
new file mode 100644
index 000000000000..53bfd4da69ad
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_DCN20_H_
+#define _DMUB_DCN20_H_
+
+#include "../inc/dmub_types.h"
+
+struct dmub_srv;
+
+/* DCN20 register definitions. */
+
+#define DMUB_COMMON_REGS() \
+	DMUB_SR(DMCUB_CNTL) \
+	DMUB_SR(DMCUB_MEM_CNTL) \
+	DMUB_SR(DMCUB_SEC_CNTL) \
+	DMUB_SR(DMCUB_INBOX1_BASE_ADDRESS) \
+	DMUB_SR(DMCUB_INBOX1_SIZE) \
+	DMUB_SR(DMCUB_INBOX1_RPTR) \
+	DMUB_SR(DMCUB_INBOX1_WPTR) \
+	DMUB_SR(DMCUB_REGION3_CW0_OFFSET) \
+	DMUB_SR(DMCUB_REGION3_CW1_OFFSET) \
+	DMUB_SR(DMCUB_REGION3_CW2_OFFSET) \
+	DMUB_SR(DMCUB_REGION3_CW3_OFFSET) \
+	DMUB_SR(DMCUB_REGION3_CW4_OFFSET) \
+	DMUB_SR(DMCUB_REGION3_CW5_OFFSET) \
+	DMUB_SR(DMCUB_REGION3_CW6_OFFSET) \
+	DMUB_SR(DMCUB_REGION3_CW7_OFFSET) \
+	DMUB_SR(DMCUB_REGION3_CW0_OFFSET_HIGH) \
+	DMUB_SR(DMCUB_REGION3_CW1_OFFSET_HIGH) \
+	DMUB_SR(DMCUB_REGION3_CW2_OFFSET_HIGH) \
+	DMUB_SR(DMCUB_REGION3_CW3_OFFSET_HIGH) \
+	DMUB_SR(DMCUB_REGION3_CW4_OFFSET_HIGH) \
+	DMUB_SR(DMCUB_REGION3_CW5_OFFSET_HIGH) \
+	DMUB_SR(DMCUB_REGION3_CW6_OFFSET_HIGH) \
+	DMUB_SR(DMCUB_REGION3_CW7_OFFSET_HIGH) \
+	DMUB_SR(DMCUB_REGION3_CW0_BASE_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW1_BASE_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW2_BASE_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW3_BASE_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW4_BASE_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW5_BASE_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW6_BASE_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW7_BASE_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW0_TOP_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW1_TOP_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW2_TOP_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW3_TOP_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW4_TOP_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW5_TOP_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW6_TOP_ADDRESS) \
+	DMUB_SR(DMCUB_REGION3_CW7_TOP_ADDRESS) \
+	DMUB_SR(DMCUB_REGION4_OFFSET) \
+	DMUB_SR(DMCUB_REGION4_OFFSET_HIGH) \
+	DMUB_SR(DMCUB_REGION4_TOP_ADDRESS) \
+	DMUB_SR(DMCUB_SCRATCH0) \
+	DMUB_SR(DMCUB_SCRATCH1) \
+	DMUB_SR(DMCUB_SCRATCH2) \
+	DMUB_SR(DMCUB_SCRATCH3) \
+	DMUB_SR(DMCUB_SCRATCH4) \
+	DMUB_SR(DMCUB_SCRATCH5) \
+	DMUB_SR(DMCUB_SCRATCH6) \
+	DMUB_SR(DMCUB_SCRATCH7) \
+	DMUB_SR(DMCUB_SCRATCH8) \
+	DMUB_SR(DMCUB_SCRATCH9) \
+	DMUB_SR(DMCUB_SCRATCH10) \
+	DMUB_SR(DMCUB_SCRATCH11) \
+	DMUB_SR(DMCUB_SCRATCH12) \
+	DMUB_SR(DMCUB_SCRATCH13) \
+	DMUB_SR(DMCUB_SCRATCH14) \
+	DMUB_SR(DMCUB_SCRATCH15) \
+	DMUB_SR(CC_DC_PIPE_DIS) \
+	DMUB_SR(MMHUBBUB_SOFT_RESET)
+
+#define DMUB_COMMON_FIELDS() \
+	DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \
+	DMUB_SF(DMCUB_CNTL, DMCUB_SOFT_RESET) \
+	DMUB_SF(DMCUB_CNTL, DMCUB_TRACEPORT_EN) \
+	DMUB_SF(DMCUB_MEM_CNTL, DMCUB_MEM_READ_SPACE) \
+	DMUB_SF(DMCUB_MEM_CNTL, DMCUB_MEM_WRITE_SPACE) \
+	DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET) \
+	DMUB_SF(DMCUB_SEC_CNTL, DMCUB_MEM_UNIT_ID) \
+	DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_TOP_ADDRESS) \
+	DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE) \
+	DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_TOP_ADDRESS) \
+	DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_ENABLE) \
+	DMUB_SF(DMCUB_REGION3_CW2_TOP_ADDRESS, DMCUB_REGION3_CW2_TOP_ADDRESS) \
+	DMUB_SF(DMCUB_REGION3_CW2_TOP_ADDRESS, DMCUB_REGION3_CW2_ENABLE) \
+	DMUB_SF(DMCUB_REGION3_CW3_TOP_ADDRESS, DMCUB_REGION3_CW3_TOP_ADDRESS) \
+	DMUB_SF(DMCUB_REGION3_CW3_TOP_ADDRESS, DMCUB_REGION3_CW3_ENABLE) \
+	DMUB_SF(DMCUB_REGION3_CW4_TOP_ADDRESS, DMCUB_REGION3_CW4_TOP_ADDRESS) \
+	DMUB_SF(DMCUB_REGION3_CW4_TOP_ADDRESS, DMCUB_REGION3_CW4_ENABLE) \
+	DMUB_SF(DMCUB_REGION3_CW5_TOP_ADDRESS, DMCUB_REGION3_CW5_TOP_ADDRESS) \
+	DMUB_SF(DMCUB_REGION3_CW5_TOP_ADDRESS, DMCUB_REGION3_CW5_ENABLE) \
+	DMUB_SF(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_TOP_ADDRESS) \
+	DMUB_SF(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE) \
+	DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_TOP_ADDRESS) \
+	DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_ENABLE) \
+	DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_TOP_ADDRESS) \
+	DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \
+	DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \
+	DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET)
+
+struct dmub_srv_common_reg_offset {
+#define DMUB_SR(reg) uint32_t reg;
+	DMUB_COMMON_REGS()
+#undef DMUB_SR
+};
+
+struct dmub_srv_common_reg_shift {
+#define DMUB_SF(reg, field) uint8_t reg##__##field;
+	DMUB_COMMON_FIELDS()
+#undef DMUB_SF
+};
+
+struct dmub_srv_common_reg_mask {
+#define DMUB_SF(reg, field) uint32_t reg##__##field;
+	DMUB_COMMON_FIELDS()
+#undef DMUB_SF
+};
+
+struct dmub_srv_common_regs {
+	const struct dmub_srv_common_reg_offset offset;
+	const struct dmub_srv_common_reg_mask mask;
+	const struct dmub_srv_common_reg_shift shift;
+};
+
+extern const struct dmub_srv_common_regs dmub_srv_dcn20_regs;
+
+/* Hardware functions. */
+
+void dmub_dcn20_init(struct dmub_srv *dmub);
+
+void dmub_dcn20_reset(struct dmub_srv *dmub);
+
+void dmub_dcn20_reset_release(struct dmub_srv *dmub);
+
+void dmub_dcn20_backdoor_load(struct dmub_srv *dmub,
+			      const struct dmub_window *cw0,
+			      const struct dmub_window *cw1);
+
+void dmub_dcn20_setup_windows(struct dmub_srv *dmub,
+			      const struct dmub_window *cw2,
+			      const struct dmub_window *cw3,
+			      const struct dmub_window *cw4,
+			      const struct dmub_window *cw5,
+			      const struct dmub_window *cw6);
+
+void dmub_dcn20_setup_mailbox(struct dmub_srv *dmub,
+			      const struct dmub_region *inbox1);
+
+uint32_t dmub_dcn20_get_inbox1_rptr(struct dmub_srv *dmub);
+
+void dmub_dcn20_set_inbox1_wptr(struct dmub_srv *dmub, uint32_t wptr_offset);
+
+bool dmub_dcn20_is_hw_init(struct dmub_srv *dmub);
+
+bool dmub_dcn20_is_supported(struct dmub_srv *dmub);
+
+#endif /* _DMUB_DCN20_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c
new file mode 100644
index 000000000000..5bed9fcd6b5c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "../inc/dmub_srv.h"
+#include "dmub_reg.h"
+#include "dmub_dcn21.h"
+
+#include "dcn/dcn_2_1_0_offset.h"
+#include "dcn/dcn_2_1_0_sh_mask.h"
+#include "renoir_ip_offset.h"
+
+#define BASE_INNER(seg) DMU_BASE__INST0_SEG##seg
+#define CTX dmub
+#define REGS dmub->regs
+
+/* Registers. */
+
+const struct dmub_srv_common_regs dmub_srv_dcn21_regs = {
+#define DMUB_SR(reg) REG_OFFSET(reg),
+	{ DMUB_COMMON_REGS() },
+#undef DMUB_SR
+
+#define DMUB_SF(reg, field) FD_MASK(reg, field),
+	{ DMUB_COMMON_FIELDS() },
+#undef DMUB_SF
+
+#define DMUB_SF(reg, field) FD_SHIFT(reg, field),
+	{ DMUB_COMMON_FIELDS() },
+#undef DMUB_SF
+};
+
+/* Shared functions. */
+
+bool dmub_dcn21_is_auto_load_done(struct dmub_srv *dmub)
+{
+	return (REG_READ(DMCUB_SCRATCH0) == 3);
+}
+
+bool dmub_dcn21_is_phy_init(struct dmub_srv *dmub)
+{
+	return REG_READ(DMCUB_SCRATCH10) == 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.h
new file mode 100644
index 000000000000..2bbea237137b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_DCN21_H_
+#define _DMUB_DCN21_H_
+
+#include "dmub_dcn20.h"
+
+/* Registers. */
+
+extern const struct dmub_srv_common_regs dmub_srv_dcn21_regs;
+
+/* Hardware functions. */
+
+bool dmub_dcn21_is_auto_load_done(struct dmub_srv *dmub);
+
+bool dmub_dcn21_is_phy_init(struct dmub_srv *dmub);
+
+#endif /* _DMUB_DCN21_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c
new file mode 100644
index 000000000000..4094eca212f0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dmub_reg.h"
+#include "../inc/dmub_srv.h"
+
+struct dmub_reg_value_masks {
+	uint32_t value;
+	uint32_t mask;
+};
+
+static inline void
+set_reg_field_value_masks(struct dmub_reg_value_masks *field_value_mask,
+			  uint32_t value, uint32_t mask, uint8_t shift)
+{
+	field_value_mask->value =
+		(field_value_mask->value & ~mask) | (mask & (value << shift));
+	field_value_mask->mask = field_value_mask->mask | mask;
+}
+
+static void set_reg_field_values(struct dmub_reg_value_masks *field_value_mask,
+				 uint32_t addr, int n, uint8_t shift1,
+				 uint32_t mask1, uint32_t field_value1,
+				 va_list ap)
+{
+	uint32_t shift, mask, field_value;
+	int i = 1;
+
+	/* gather all bits value/mask getting updated in this register */
+	set_reg_field_value_masks(field_value_mask, field_value1, mask1,
+				  shift1);
+
+	while (i < n) {
+		shift = va_arg(ap, uint32_t);
+		mask = va_arg(ap, uint32_t);
+		field_value = va_arg(ap, uint32_t);
+
+		set_reg_field_value_masks(field_value_mask, field_value, mask,
+					  shift);
+		i++;
+	}
+}
+
+static inline uint32_t get_reg_field_value_ex(uint32_t reg_value, uint32_t mask,
+					      uint8_t shift)
+{
+	return (mask & reg_value) >> shift;
+}
+
+void dmub_reg_update(struct dmub_srv *srv, uint32_t addr, int n, uint8_t shift1,
+		     uint32_t mask1, uint32_t field_value1, ...)
+{
+	struct dmub_reg_value_masks field_value_mask = { 0 };
+	uint32_t reg_val;
+	va_list ap;
+
+	va_start(ap, field_value1);
+	set_reg_field_values(&field_value_mask, addr, n, shift1, mask1,
+			     field_value1, ap);
+	va_end(ap);
+
+	reg_val = srv->funcs.reg_read(srv->user_ctx, addr);
+	reg_val = (reg_val & ~field_value_mask.mask) | field_value_mask.value;
+	srv->funcs.reg_write(srv->user_ctx, addr, reg_val);
+}
+
+void dmub_reg_set(struct dmub_srv *srv, uint32_t addr, uint32_t reg_val, int n,
+		  uint8_t shift1, uint32_t mask1, uint32_t field_value1, ...)
+{
+	struct dmub_reg_value_masks field_value_mask = { 0 };
+	va_list ap;
+
+	va_start(ap, field_value1);
+	set_reg_field_values(&field_value_mask, addr, n, shift1, mask1,
+			     field_value1, ap);
+	va_end(ap);
+
+	reg_val = (reg_val & ~field_value_mask.mask) | field_value_mask.value;
+	srv->funcs.reg_write(srv->user_ctx, addr, reg_val);
+}
+
+void dmub_reg_get(struct dmub_srv *srv, uint32_t addr, uint8_t shift,
+		  uint32_t mask, uint32_t *field_value)
+{
+	uint32_t reg_val = srv->funcs.reg_read(srv->user_ctx, addr);
+	*field_value = get_reg_field_value_ex(reg_val, mask, shift);
+}
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h
new file mode 100644
index 000000000000..c1f4030929a4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_REG_H_
+#define _DMUB_REG_H_
+
+#include "../inc/dmub_types.h"
+
+struct dmub_srv;
+
+/* Register offset and field lookup. */
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define REG_OFFSET(reg_name) (BASE(mm##reg_name##_BASE_IDX) + mm##reg_name)
+
+#define FD_SHIFT(reg_name, field) reg_name##__##field##__SHIFT
+
+#define FD_MASK(reg_name, field) reg_name##__##field##_MASK
+
+#define REG(reg) (REGS)->offset.reg
+
+#define FD(reg_field) (REGS)->shift.reg_field, (REGS)->mask.reg_field
+
+#define FN(reg_name, field) FD(reg_name##__##field)
+
+/* Register reads and writes. */
+
+#define REG_READ(reg) ((CTX)->funcs.reg_read((CTX)->user_ctx, REG(reg)))
+
+#define REG_WRITE(reg, val) \
+	((CTX)->funcs.reg_write((CTX)->user_ctx, REG(reg), (val)))
+
+/* Register field setting. */
+
+#define REG_SET_N(reg_name, n, initial_val, ...) \
+	dmub_reg_set(CTX, REG(reg_name), initial_val, n, __VA_ARGS__)
+
+#define REG_SET(reg_name, initial_val, field, val) \
+		REG_SET_N(reg_name, 1, initial_val, \
+				FN(reg_name, field), val)
+
+#define REG_SET_2(reg, init_value, f1, v1, f2, v2) \
+		REG_SET_N(reg, 2, init_value, \
+				FN(reg, f1), v1, \
+				FN(reg, f2), v2)
+
+#define REG_SET_3(reg, init_value, f1, v1, f2, v2, f3, v3) \
+		REG_SET_N(reg, 3, init_value, \
+				FN(reg, f1), v1, \
+				FN(reg, f2), v2, \
+				FN(reg, f3), v3)
+
+#define REG_SET_4(reg, init_value, f1, v1, f2, v2, f3, v3, f4, v4) \
+		REG_SET_N(reg, 4, init_value, \
+				FN(reg, f1), v1, \
+				FN(reg, f2), v2, \
+				FN(reg, f3), v3, \
+				FN(reg, f4), v4)
+
+/* Register field updating. */
+
+#define REG_UPDATE_N(reg_name, n, ...)\
+		dmub_reg_update(CTX, REG(reg_name), n, __VA_ARGS__)
+
+#define REG_UPDATE(reg_name, field, val)	\
+		REG_UPDATE_N(reg_name, 1, \
+				FN(reg_name, field), val)
+
+#define REG_UPDATE_2(reg, f1, v1, f2, v2)	\
+		REG_UPDATE_N(reg, 2,\
+				FN(reg, f1), v1,\
+				FN(reg, f2), v2)
+
+#define REG_UPDATE_3(reg, f1, v1, f2, v2, f3, v3) \
+		REG_UPDATE_N(reg, 3, \
+				FN(reg, f1), v1, \
+				FN(reg, f2), v2, \
+				FN(reg, f3), v3)
+
+#define REG_UPDATE_4(reg, f1, v1, f2, v2, f3, v3, f4, v4) \
+		REG_UPDATE_N(reg, 4, \
+				FN(reg, f1), v1, \
+				FN(reg, f2), v2, \
+				FN(reg, f3), v3, \
+				FN(reg, f4), v4)
+
+/* Register field getting. */
+
+#define REG_GET(reg_name, field, val) \
+	dmub_reg_get(CTX, REG(reg_name), FN(reg_name, field), val)
+
+void dmub_reg_set(struct dmub_srv *srv, uint32_t addr, uint32_t reg_val, int n,
+		  uint8_t shift1, uint32_t mask1, uint32_t field_value1, ...);
+
+void dmub_reg_update(struct dmub_srv *srv, uint32_t addr, int n, uint8_t shift1,
+		     uint32_t mask1, uint32_t field_value1, ...);
+
+void dmub_reg_get(struct dmub_srv *srv, uint32_t addr, uint8_t shift,
+		  uint32_t mask, uint32_t *field_value);
+
+#endif /* _DMUB_REG_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
new file mode 100644
index 000000000000..dee676335d73
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -0,0 +1,505 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "../inc/dmub_srv.h"
+#include "dmub_dcn20.h"
+#include "dmub_dcn21.h"
+#include "dmub_fw_meta.h"
+#include "os_types.h"
+/*
+ * Note: the DMUB service is standalone. No additional headers should be
+ * added below or above this line unless they reside within the DMUB
+ * folder.
+ */
+
+/* Alignment for framebuffer memory. */
+#define DMUB_FB_ALIGNMENT (1024 * 1024)
+
+/* Stack size. */
+#define DMUB_STACK_SIZE (128 * 1024)
+
+/* Context size. */
+#define DMUB_CONTEXT_SIZE (512 * 1024)
+
+/* Mailbox size */
+#define DMUB_MAILBOX_SIZE (DMUB_RB_SIZE)
+
+/* Default state size if meta is absent. */
+#define DMUB_FW_STATE_SIZE (1024)
+
+/* Default tracebuffer size if meta is absent. */
+#define DMUB_TRACE_BUFFER_SIZE (1024)
+
+/* Number of windows in use. */
+#define DMUB_NUM_WINDOWS (DMUB_WINDOW_6_FW_STATE + 1)
+/* Base addresses. */
+
+#define DMUB_CW0_BASE (0x60000000)
+#define DMUB_CW1_BASE (0x61000000)
+#define DMUB_CW3_BASE (0x63000000)
+#define DMUB_CW5_BASE (0x65000000)
+#define DMUB_CW6_BASE (0x66000000)
+
+static inline uint32_t dmub_align(uint32_t val, uint32_t factor)
+{
+	return (val + factor - 1) / factor * factor;
+}
+
+static void dmub_flush_buffer_mem(const struct dmub_fb *fb)
+{
+	const uint8_t *base = (const uint8_t *)fb->cpu_addr;
+	uint8_t buf[64];
+	uint32_t pos, end;
+
+	/**
+	 * Read 64-byte chunks since we don't want to store a
+	 * large temporary buffer for this purpose.
+	 */
+	end = fb->size / sizeof(buf) * sizeof(buf);
+
+	for (pos = 0; pos < end; pos += sizeof(buf))
+		dmub_memcpy(buf, base + pos, sizeof(buf));
+
+	/* Read anything leftover into the buffer. */
+	if (end < fb->size)
+		dmub_memcpy(buf, base + pos, fb->size - end);
+}
+
+static const struct dmub_fw_meta_info *
+dmub_get_fw_meta_info(const uint8_t *fw_bss_data, uint32_t fw_bss_data_size)
+{
+	const union dmub_fw_meta *meta;
+
+	if (fw_bss_data == NULL)
+		return NULL;
+
+	if (fw_bss_data_size < sizeof(union dmub_fw_meta) + DMUB_FW_META_OFFSET)
+		return NULL;
+
+	meta = (const union dmub_fw_meta *)(fw_bss_data + fw_bss_data_size -
+					    DMUB_FW_META_OFFSET -
+					    sizeof(union dmub_fw_meta));
+
+	if (meta->info.magic_value != DMUB_FW_META_MAGIC)
+		return NULL;
+
+	return &meta->info;
+}
+
+static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic)
+{
+	struct dmub_srv_hw_funcs *funcs = &dmub->hw_funcs;
+
+	switch (asic) {
+	case DMUB_ASIC_DCN20:
+	case DMUB_ASIC_DCN21:
+		dmub->regs = &dmub_srv_dcn20_regs;
+
+		funcs->reset = dmub_dcn20_reset;
+		funcs->reset_release = dmub_dcn20_reset_release;
+		funcs->backdoor_load = dmub_dcn20_backdoor_load;
+		funcs->setup_windows = dmub_dcn20_setup_windows;
+		funcs->setup_mailbox = dmub_dcn20_setup_mailbox;
+		funcs->get_inbox1_rptr = dmub_dcn20_get_inbox1_rptr;
+		funcs->set_inbox1_wptr = dmub_dcn20_set_inbox1_wptr;
+		funcs->is_supported = dmub_dcn20_is_supported;
+		funcs->is_hw_init = dmub_dcn20_is_hw_init;
+
+		if (asic == DMUB_ASIC_DCN21) {
+			dmub->regs = &dmub_srv_dcn21_regs;
+
+			funcs->is_auto_load_done = dmub_dcn21_is_auto_load_done;
+			funcs->is_phy_init = dmub_dcn21_is_phy_init;
+		}
+		break;
+
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+enum dmub_status dmub_srv_create(struct dmub_srv *dmub,
+				 const struct dmub_srv_create_params *params)
+{
+	enum dmub_status status = DMUB_STATUS_OK;
+
+	dmub_memset(dmub, 0, sizeof(*dmub));
+
+	dmub->funcs = params->funcs;
+	dmub->user_ctx = params->user_ctx;
+	dmub->asic = params->asic;
+	dmub->is_virtual = params->is_virtual;
+
+	/* Setup asic dependent hardware funcs. */
+	if (!dmub_srv_hw_setup(dmub, params->asic)) {
+		status = DMUB_STATUS_INVALID;
+		goto cleanup;
+	}
+
+	/* Override (some) hardware funcs based on user params. */
+	if (params->hw_funcs) {
+		if (params->hw_funcs->get_inbox1_rptr)
+			dmub->hw_funcs.get_inbox1_rptr =
+				params->hw_funcs->get_inbox1_rptr;
+
+		if (params->hw_funcs->set_inbox1_wptr)
+			dmub->hw_funcs.set_inbox1_wptr =
+				params->hw_funcs->set_inbox1_wptr;
+
+		if (params->hw_funcs->is_supported)
+			dmub->hw_funcs.is_supported =
+				params->hw_funcs->is_supported;
+	}
+
+	/* Sanity checks for required hw func pointers. */
+	if (!dmub->hw_funcs.get_inbox1_rptr ||
+	    !dmub->hw_funcs.set_inbox1_wptr) {
+		status = DMUB_STATUS_INVALID;
+		goto cleanup;
+	}
+
+cleanup:
+	if (status == DMUB_STATUS_OK)
+		dmub->sw_init = true;
+	else
+		dmub_srv_destroy(dmub);
+
+	return status;
+}
+
+void dmub_srv_destroy(struct dmub_srv *dmub)
+{
+	dmub_memset(dmub, 0, sizeof(*dmub));
+}
+
+enum dmub_status
+dmub_srv_calc_region_info(struct dmub_srv *dmub,
+			  const struct dmub_srv_region_params *params,
+			  struct dmub_srv_region_info *out)
+{
+	struct dmub_region *inst = &out->regions[DMUB_WINDOW_0_INST_CONST];
+	struct dmub_region *stack = &out->regions[DMUB_WINDOW_1_STACK];
+	struct dmub_region *data = &out->regions[DMUB_WINDOW_2_BSS_DATA];
+	struct dmub_region *bios = &out->regions[DMUB_WINDOW_3_VBIOS];
+	struct dmub_region *mail = &out->regions[DMUB_WINDOW_4_MAILBOX];
+	struct dmub_region *trace_buff = &out->regions[DMUB_WINDOW_5_TRACEBUFF];
+	struct dmub_region *fw_state = &out->regions[DMUB_WINDOW_6_FW_STATE];
+	const struct dmub_fw_meta_info *fw_info;
+	uint32_t fw_state_size = DMUB_FW_STATE_SIZE;
+	uint32_t trace_buffer_size = DMUB_TRACE_BUFFER_SIZE;
+
+	if (!dmub->sw_init)
+		return DMUB_STATUS_INVALID;
+
+	memset(out, 0, sizeof(*out));
+
+	out->num_regions = DMUB_NUM_WINDOWS;
+
+	inst->base = 0x0;
+	inst->top = inst->base + params->inst_const_size;
+
+	data->base = dmub_align(inst->top, 256);
+	data->top = data->base + params->bss_data_size;
+
+	/*
+	 * All cache windows below should be aligned to the size
+	 * of the DMCUB cache line, 64 bytes.
+	 */
+
+	stack->base = dmub_align(data->top, 256);
+	stack->top = stack->base + DMUB_STACK_SIZE + DMUB_CONTEXT_SIZE;
+
+	bios->base = dmub_align(stack->top, 256);
+	bios->top = bios->base + params->vbios_size;
+
+	mail->base = dmub_align(bios->top, 256);
+	mail->top = mail->base + DMUB_MAILBOX_SIZE;
+
+	fw_info = dmub_get_fw_meta_info(params->fw_bss_data,
+					params->bss_data_size);
+
+	if (fw_info) {
+		fw_state_size = fw_info->fw_region_size;
+		trace_buffer_size = fw_info->trace_buffer_size;
+	}
+
+	trace_buff->base = dmub_align(mail->top, 256);
+	trace_buff->top = trace_buff->base + dmub_align(trace_buffer_size, 64);
+
+	fw_state->base = dmub_align(trace_buff->top, 256);
+	fw_state->top = fw_state->base + dmub_align(fw_state_size, 64);
+
+	out->fb_size = dmub_align(fw_state->top, 4096);
+
+	return DMUB_STATUS_OK;
+}
+
+enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
+				       const struct dmub_srv_fb_params *params,
+				       struct dmub_srv_fb_info *out)
+{
+	uint8_t *cpu_base;
+	uint64_t gpu_base;
+	uint32_t i;
+
+	if (!dmub->sw_init)
+		return DMUB_STATUS_INVALID;
+
+	memset(out, 0, sizeof(*out));
+
+	if (params->region_info->num_regions != DMUB_NUM_WINDOWS)
+		return DMUB_STATUS_INVALID;
+
+	cpu_base = (uint8_t *)params->cpu_addr;
+	gpu_base = params->gpu_addr;
+
+	for (i = 0; i < DMUB_NUM_WINDOWS; ++i) {
+		const struct dmub_region *reg =
+			&params->region_info->regions[i];
+
+		out->fb[i].cpu_addr = cpu_base + reg->base;
+		out->fb[i].gpu_addr = gpu_base + reg->base;
+		out->fb[i].size = reg->top - reg->base;
+	}
+
+	out->num_fb = DMUB_NUM_WINDOWS;
+
+	return DMUB_STATUS_OK;
+}
+
+enum dmub_status dmub_srv_has_hw_support(struct dmub_srv *dmub,
+					 bool *is_supported)
+{
+	*is_supported = false;
+
+	if (!dmub->sw_init)
+		return DMUB_STATUS_INVALID;
+
+	if (dmub->hw_funcs.is_supported)
+		*is_supported = dmub->hw_funcs.is_supported(dmub);
+
+	return DMUB_STATUS_OK;
+}
+
+enum dmub_status dmub_srv_is_hw_init(struct dmub_srv *dmub, bool *is_hw_init)
+{
+	*is_hw_init = false;
+
+	if (!dmub->sw_init)
+		return DMUB_STATUS_INVALID;
+
+	if (dmub->hw_funcs.is_hw_init)
+		*is_hw_init = dmub->hw_funcs.is_hw_init(dmub);
+
+	return DMUB_STATUS_OK;
+}
+
+enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
+				  const struct dmub_srv_hw_params *params)
+{
+	struct dmub_fb *inst_fb = params->fb[DMUB_WINDOW_0_INST_CONST];
+	struct dmub_fb *stack_fb = params->fb[DMUB_WINDOW_1_STACK];
+	struct dmub_fb *data_fb = params->fb[DMUB_WINDOW_2_BSS_DATA];
+	struct dmub_fb *bios_fb = params->fb[DMUB_WINDOW_3_VBIOS];
+	struct dmub_fb *mail_fb = params->fb[DMUB_WINDOW_4_MAILBOX];
+	struct dmub_fb *tracebuff_fb = params->fb[DMUB_WINDOW_5_TRACEBUFF];
+	struct dmub_fb *fw_state_fb = params->fb[DMUB_WINDOW_6_FW_STATE];
+
+	struct dmub_rb_init_params rb_params;
+	struct dmub_window cw0, cw1, cw2, cw3, cw4, cw5, cw6;
+	struct dmub_region inbox1;
+
+	if (!dmub->sw_init)
+		return DMUB_STATUS_INVALID;
+
+	dmub->fb_base = params->fb_base;
+	dmub->fb_offset = params->fb_offset;
+	dmub->psp_version = params->psp_version;
+
+	if (inst_fb && data_fb) {
+		cw0.offset.quad_part = inst_fb->gpu_addr;
+		cw0.region.base = DMUB_CW0_BASE;
+		cw0.region.top = cw0.region.base + inst_fb->size - 1;
+
+		cw1.offset.quad_part = stack_fb->gpu_addr;
+		cw1.region.base = DMUB_CW1_BASE;
+		cw1.region.top = cw1.region.base + stack_fb->size - 1;
+
+		/**
+		 * Read back all the instruction memory so we don't hang the
+		 * DMCUB when backdoor loading if the write from x86 hasn't been
+		 * flushed yet. This only occurs in backdoor loading.
+		 */
+		dmub_flush_buffer_mem(inst_fb);
+
+		if (params->load_inst_const && dmub->hw_funcs.backdoor_load)
+			dmub->hw_funcs.backdoor_load(dmub, &cw0, &cw1);
+	}
+
+	if (dmub->hw_funcs.reset)
+		dmub->hw_funcs.reset(dmub);
+
+	if (inst_fb && data_fb && bios_fb && mail_fb && tracebuff_fb &&
+	    fw_state_fb) {
+		cw2.offset.quad_part = data_fb->gpu_addr;
+		cw2.region.base = DMUB_CW0_BASE + inst_fb->size;
+		cw2.region.top = cw2.region.base + data_fb->size;
+
+		cw3.offset.quad_part = bios_fb->gpu_addr;
+		cw3.region.base = DMUB_CW3_BASE;
+		cw3.region.top = cw3.region.base + bios_fb->size;
+
+		cw4.offset.quad_part = mail_fb->gpu_addr;
+		cw4.region.base = cw3.region.top + 1;
+		cw4.region.top = cw4.region.base + mail_fb->size;
+
+		inbox1.base = cw4.region.base;
+		inbox1.top = cw4.region.top;
+
+		cw5.offset.quad_part = tracebuff_fb->gpu_addr;
+		cw5.region.base = DMUB_CW5_BASE;
+		cw5.region.top = cw5.region.base + tracebuff_fb->size;
+
+		cw6.offset.quad_part = fw_state_fb->gpu_addr;
+		cw6.region.base = DMUB_CW6_BASE;
+		cw6.region.top = cw6.region.base + fw_state_fb->size;
+
+		dmub->fw_state = fw_state_fb->cpu_addr;
+
+		if (dmub->hw_funcs.setup_windows)
+			dmub->hw_funcs.setup_windows(dmub, &cw2, &cw3, &cw4,
+						     &cw5, &cw6);
+
+		if (dmub->hw_funcs.setup_mailbox)
+			dmub->hw_funcs.setup_mailbox(dmub, &inbox1);
+	}
+
+	if (mail_fb) {
+		dmub_memset(&rb_params, 0, sizeof(rb_params));
+		rb_params.ctx = dmub;
+		rb_params.base_address = mail_fb->cpu_addr;
+		rb_params.capacity = DMUB_RB_SIZE;
+
+		dmub_rb_init(&dmub->inbox1_rb, &rb_params);
+	}
+
+	if (dmub->hw_funcs.reset_release)
+		dmub->hw_funcs.reset_release(dmub);
+
+	dmub->hw_init = true;
+
+	return DMUB_STATUS_OK;
+}
+
+enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
+				    const struct dmub_cmd_header *cmd)
+{
+	if (!dmub->hw_init)
+		return DMUB_STATUS_INVALID;
+
+	if (dmub_rb_push_front(&dmub->inbox1_rb, cmd))
+		return DMUB_STATUS_OK;
+
+	return DMUB_STATUS_QUEUE_FULL;
+}
+
+enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub)
+{
+	if (!dmub->hw_init)
+		return DMUB_STATUS_INVALID;
+
+	/**
+	 * Read back all the queued commands to ensure that they've
+	 * been flushed to framebuffer memory. Otherwise DMCUB might
+	 * read back stale, fully invalid or partially invalid data.
+	 */
+	dmub_rb_flush_pending(&dmub->inbox1_rb);
+
+	dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt);
+	return DMUB_STATUS_OK;
+}
+
+enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub,
+					     uint32_t timeout_us)
+{
+	uint32_t i;
+
+	if (!dmub->hw_init)
+		return DMUB_STATUS_INVALID;
+
+	if (!dmub->hw_funcs.is_auto_load_done)
+		return DMUB_STATUS_OK;
+
+	for (i = 0; i <= timeout_us; i += 100) {
+		if (dmub->hw_funcs.is_auto_load_done(dmub))
+			return DMUB_STATUS_OK;
+
+		udelay(100);
+	}
+
+	return DMUB_STATUS_TIMEOUT;
+}
+
+enum dmub_status dmub_srv_wait_for_phy_init(struct dmub_srv *dmub,
+					    uint32_t timeout_us)
+{
+	uint32_t i = 0;
+
+	if (!dmub->hw_init)
+		return DMUB_STATUS_INVALID;
+
+	if (!dmub->hw_funcs.is_phy_init)
+		return DMUB_STATUS_OK;
+
+	for (i = 0; i <= timeout_us; i += 10) {
+		if (dmub->hw_funcs.is_phy_init(dmub))
+			return DMUB_STATUS_OK;
+
+		udelay(10);
+	}
+
+	return DMUB_STATUS_TIMEOUT;
+}
+
+enum dmub_status dmub_srv_wait_for_idle(struct dmub_srv *dmub,
+					uint32_t timeout_us)
+{
+	uint32_t i;
+
+	if (!dmub->hw_init)
+		return DMUB_STATUS_INVALID;
+
+	for (i = 0; i <= timeout_us; ++i) {
+		dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+		if (dmub_rb_empty(&dmub->inbox1_rb))
+			return DMUB_STATUS_OK;
+
+		udelay(1);
+	}
+
+	return DMUB_STATUS_TIMEOUT;
+}
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index 1be6c44fd32f..a2903985b9e8 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -134,23 +134,34 @@
 #define PICASSO_A0 0x41
 /* DCN1_01 */
 #define RAVEN2_A0 0x81
+#define RAVEN2_15D8_REV_94 0x94
+#define RAVEN2_15D8_REV_95 0x95
+#define RAVEN2_15D8_REV_E3 0xE3
+#define RAVEN2_15D8_REV_E4 0xE4
+#define RAVEN2_15D8_REV_E9 0xE9
+#define RAVEN2_15D8_REV_EA 0xEA
+#define RAVEN2_15D8_REV_EB 0xEB
 #define RAVEN1_F0 0xF0
 #define RAVEN_UNKNOWN 0xFF
-
-#define PICASSO_15D8_REV_E3 0xE3
-#define PICASSO_15D8_REV_E4 0xE4
-
+#ifndef ASICREV_IS_RAVEN
 #define ASICREV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN)
-#define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0))
-#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < PICASSO_15D8_REV_E3))
-#define ASICREV_IS_DALI(eChipRev) ((eChipRev >= PICASSO_15D8_REV_E3) && (eChipRev < RAVEN1_F0))
+#endif
 
+#define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0))
+#ifndef ASICREV_IS_RAVEN2
+#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < RAVEN1_F0))
+#endif
 #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN))
-
+#define ASICREV_IS_DALI(eChipRev) ((eChipRev == RAVEN2_15D8_REV_E3) \
+		|| (eChipRev == RAVEN2_15D8_REV_E4))
+#define ASICREV_IS_POLLOCK(eChipRev) (eChipRev == RAVEN2_15D8_REV_94 \
+		|| eChipRev == RAVEN2_15D8_REV_95 \
+			|| eChipRev == RAVEN2_15D8_REV_E9 \
+				|| eChipRev == RAVEN2_15D8_REV_EA \
+					|| eChipRev == RAVEN2_15D8_REV_EB)
 
 #define FAMILY_RV 142 /* DCN 1*/
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 
 #define FAMILY_NV 143 /* DCN 2*/
 
@@ -164,12 +175,9 @@ enum {
 #define ASICREV_IS_NAVI10_P(eChipRev)        (eChipRev < NV_NAVI12_P_A0)
 #define ASICREV_IS_NAVI12_P(eChipRev)        ((eChipRev >= NV_NAVI12_P_A0) && (eChipRev < NV_NAVI14_M_A0))
 #define ASICREV_IS_NAVI14_M(eChipRev)        ((eChipRev >= NV_NAVI14_M_A0) && (eChipRev < NV_UNKNOWN))
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 #define RENOIR_A0 0x91
 #define DEVICE_ID_RENOIR_1636 0x1636   // Renoir
 #define ASICREV_IS_RENOIR(eChipRev) ((eChipRev >= RENOIR_A0) && (eChipRev < 0xFF))
-#endif
 
 /*
  * ASIC chip ID
diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h
index fcc42372b6cf..0b6859189ca7 100644
--- a/drivers/gpu/drm/amd/display/include/dal_types.h
+++ b/drivers/gpu/drm/amd/display/include/dal_types.h
@@ -46,12 +46,8 @@ enum dce_version {
 	DCE_VERSION_MAX,
 	DCN_VERSION_1_0,
 	DCN_VERSION_1_01,
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 	DCN_VERSION_2_0,
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
 	DCN_VERSION_2_1,
-#endif
 	DCN_VERSION_MAX
 };
 
diff --git a/drivers/gpu/drm/amd/display/include/ddc_service_types.h b/drivers/gpu/drm/amd/display/include/ddc_service_types.h
index 18961707db23..9ad49da50a17 100644
--- a/drivers/gpu/drm/amd/display/include/ddc_service_types.h
+++ b/drivers/gpu/drm/amd/display/include/ddc_service_types.h
@@ -31,6 +31,8 @@
 #define DP_BRANCH_DEVICE_ID_0022B9 0x0022B9
 #define DP_BRANCH_DEVICE_ID_00001A 0x00001A
 #define DP_BRANCH_DEVICE_ID_0080E1 0x0080e1
+#define DP_BRANCH_DEVICE_ID_90CC24 0x90CC24
+#define DP_BRANCH_DEVICE_ID_00E04C 0x00E04C
 
 enum ddc_result {
 	DDC_RESULT_UNKNOWN = 0,
diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
index f312834fef50..d51de94e4bc3 100644
--- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
+++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
@@ -178,7 +178,8 @@ struct dc_firmware_info {
 	uint32_t default_engine_clk; /* in KHz */
 	uint32_t dp_phy_ref_clk; /* in KHz - DCE12 only */
 	uint32_t i2c_engine_ref_clk; /* in KHz - DCE12 only */
-
+	bool oem_i2c_present;
+	uint8_t oem_i2c_obj_id;
 
 };
 
diff --git a/drivers/gpu/drm/amd/display/include/hdcp_types.h b/drivers/gpu/drm/amd/display/include/hdcp_types.h
new file mode 100644
index 000000000000..f31e6befc8d6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/include/hdcp_types.h
@@ -0,0 +1,96 @@
+/*
+* Copyright 2019 Advanced Micro Devices, Inc.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+* OTHER DEALINGS IN THE SOFTWARE.
+*
+* Authors: AMD
+*
+*/
+
+#ifndef __DC_HDCP_TYPES_H__
+#define __DC_HDCP_TYPES_H__
+
+enum hdcp_message_id {
+	HDCP_MESSAGE_ID_INVALID = -1,
+
+	/* HDCP 1.4 */
+
+	HDCP_MESSAGE_ID_READ_BKSV = 0,
+	/* HDMI is called Ri', DP is called R0' */
+	HDCP_MESSAGE_ID_READ_RI_R0,
+	HDCP_MESSAGE_ID_READ_PJ,
+	HDCP_MESSAGE_ID_WRITE_AKSV,
+	HDCP_MESSAGE_ID_WRITE_AINFO,
+	HDCP_MESSAGE_ID_WRITE_AN,
+	HDCP_MESSAGE_ID_READ_VH_X,
+	HDCP_MESSAGE_ID_READ_VH_0,
+	HDCP_MESSAGE_ID_READ_VH_1,
+	HDCP_MESSAGE_ID_READ_VH_2,
+	HDCP_MESSAGE_ID_READ_VH_3,
+	HDCP_MESSAGE_ID_READ_VH_4,
+	HDCP_MESSAGE_ID_READ_BCAPS,
+	HDCP_MESSAGE_ID_READ_BSTATUS,
+	HDCP_MESSAGE_ID_READ_KSV_FIFO,
+	HDCP_MESSAGE_ID_READ_BINFO,
+
+	/* HDCP 2.2 */
+
+	HDCP_MESSAGE_ID_HDCP2VERSION,
+	HDCP_MESSAGE_ID_RX_CAPS,
+	HDCP_MESSAGE_ID_WRITE_AKE_INIT,
+	HDCP_MESSAGE_ID_READ_AKE_SEND_CERT,
+	HDCP_MESSAGE_ID_WRITE_AKE_NO_STORED_KM,
+	HDCP_MESSAGE_ID_WRITE_AKE_STORED_KM,
+	HDCP_MESSAGE_ID_READ_AKE_SEND_H_PRIME,
+	HDCP_MESSAGE_ID_READ_AKE_SEND_PAIRING_INFO,
+	HDCP_MESSAGE_ID_WRITE_LC_INIT,
+	HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME,
+	HDCP_MESSAGE_ID_WRITE_SKE_SEND_EKS,
+	HDCP_MESSAGE_ID_READ_REPEATER_AUTH_SEND_RECEIVERID_LIST,
+	HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_SEND_ACK,
+	HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_STREAM_MANAGE,
+	HDCP_MESSAGE_ID_READ_REPEATER_AUTH_STREAM_READY,
+	HDCP_MESSAGE_ID_READ_RXSTATUS,
+	HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE,
+
+	HDCP_MESSAGE_ID_MAX
+};
+
+enum hdcp_version {
+	HDCP_Unknown = 0,
+	HDCP_VERSION_14,
+	HDCP_VERSION_22,
+};
+
+enum hdcp_link {
+	HDCP_LINK_PRIMARY,
+	HDCP_LINK_SECONDARY
+};
+
+struct hdcp_protection_message {
+	enum hdcp_version version;
+	/* relevant only for DVI */
+	enum hdcp_link link;
+	enum hdcp_message_id msg_id;
+	uint32_t length;
+	uint8_t max_retries;
+	uint8_t *data;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/include/i2caux_interface.h b/drivers/gpu/drm/amd/display/include/i2caux_interface.h
index bb012cb1a9f5..c7fbb9c3ad6b 100644
--- a/drivers/gpu/drm/amd/display/include/i2caux_interface.h
+++ b/drivers/gpu/drm/amd/display/include/i2caux_interface.h
@@ -42,7 +42,7 @@ struct aux_payload {
 	bool write;
 	bool mot;
 	uint32_t address;
-	uint8_t length;
+	uint32_t length;
 	uint8_t *data;
 	/*
 	 * used to return the reply type of the transaction
diff --git a/drivers/gpu/drm/amd/display/include/link_service_types.h b/drivers/gpu/drm/amd/display/include/link_service_types.h
index 876b0b3e1a9c..4869d4562e4d 100644
--- a/drivers/gpu/drm/amd/display/include/link_service_types.h
+++ b/drivers/gpu/drm/amd/display/include/link_service_types.h
@@ -123,6 +123,13 @@ enum dp_test_pattern {
 	DP_TEST_PATTERN_UNSUPPORTED
 };
 
+enum dp_test_pattern_color_space {
+	DP_TEST_PATTERN_COLOR_SPACE_RGB,
+	DP_TEST_PATTERN_COLOR_SPACE_YCBCR601,
+	DP_TEST_PATTERN_COLOR_SPACE_YCBCR709,
+	DP_TEST_PATTERN_COLOR_SPACE_UNDEFINED
+};
+
 enum dp_panel_mode {
 	/* not required */
 	DP_PANEL_MODE_DEFAULT,
diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h
index 2b219cdb13ad..89a709267019 100644
--- a/drivers/gpu/drm/amd/display/include/logger_types.h
+++ b/drivers/gpu/drm/amd/display/include/logger_types.h
@@ -66,12 +66,8 @@
 #define DC_LOG_GAMMA(...) pr_debug("[GAMMA]:"__VA_ARGS__)
 #define DC_LOG_ALL_GAMMA(...) pr_debug("[GAMMA]:"__VA_ARGS__)
 #define DC_LOG_ALL_TF_CHANNELS(...) pr_debug("[GAMMA]:"__VA_ARGS__)
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 #define DC_LOG_DSC(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN3_0) || defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define DC_LOG_DWB(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#endif
 
 struct dal_logger;
 
@@ -116,9 +112,7 @@ enum dc_log_type {
 	LOG_PERF_TRACE,
 	LOG_DISPLAYSTATS,
 	LOG_HDMI_RETIMER_REDRIVER,
-#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 	LOG_DSC,
-#endif
 	LOG_DWB,
 	LOG_GAMMA_DEBUG,
 	LOG_MAX_HW_POINTS,
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index 2d8f14b69117..1b278c42809a 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -154,6 +154,7 @@ static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
 
 	struct fixed31_32 l_pow_m1;
 	struct fixed31_32 base, div;
+	struct fixed31_32 base2;
 
 
 	if (dc_fixpt_lt(in_x, dc_fixpt_zero))
@@ -163,13 +164,15 @@ static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
 			dc_fixpt_div(dc_fixpt_one, m2));
 	base = dc_fixpt_sub(l_pow_m1, c1);
 
-	if (dc_fixpt_lt(base, dc_fixpt_zero))
-		base = dc_fixpt_zero;
-
 	div = dc_fixpt_sub(c2, dc_fixpt_mul(c3, l_pow_m1));
 
-	*out_y = dc_fixpt_pow(dc_fixpt_div(base, div),
-			dc_fixpt_div(dc_fixpt_one, m1));
+	base2 = dc_fixpt_div(base, div);
+	//avoid complex numbers
+	if (dc_fixpt_lt(base2, dc_fixpt_zero))
+		base2 = dc_fixpt_sub(dc_fixpt_zero, base2);
+
+
+	*out_y = dc_fixpt_pow(base2, dc_fixpt_div(dc_fixpt_one, m1));
 
 }
 
@@ -361,8 +364,10 @@ static struct fixed31_32 translate_from_linear_space(
 			scratch_2 = dc_fixpt_mul(gamma_of_2,
 					pow_buffer[pow_buffer_ptr%16]);
 
-		pow_buffer[pow_buffer_ptr%16] = scratch_2;
-		pow_buffer_ptr++;
+		if (pow_buffer_ptr != -1) {
+			pow_buffer[pow_buffer_ptr%16] = scratch_2;
+			pow_buffer_ptr++;
+		}
 
 		scratch_1 = dc_fixpt_mul(scratch_1, scratch_2);
 		scratch_1 = dc_fixpt_sub(scratch_1, args->a2);
@@ -373,7 +378,42 @@ static struct fixed31_32 translate_from_linear_space(
 		return dc_fixpt_mul(args->arg, args->a1);
 }
 
-static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg)
+
+static struct fixed31_32 translate_from_linear_space_long(
+		struct translate_from_linear_space_args *args)
+{
+	const struct fixed31_32 one = dc_fixpt_from_int(1);
+
+	if (dc_fixpt_lt(one, args->arg))
+		return one;
+
+	if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0)))
+		return dc_fixpt_sub(
+			args->a2,
+			dc_fixpt_mul(
+				dc_fixpt_add(
+					one,
+					args->a3),
+				dc_fixpt_pow(
+					dc_fixpt_neg(args->arg),
+					dc_fixpt_recip(args->gamma))));
+	else if (dc_fixpt_le(args->a0, args->arg))
+		return dc_fixpt_sub(
+			dc_fixpt_mul(
+				dc_fixpt_add(
+					one,
+					args->a3),
+				dc_fixpt_pow(
+						args->arg,
+					dc_fixpt_recip(args->gamma))),
+					args->a2);
+	else
+		return dc_fixpt_mul(
+			args->arg,
+			args->a1);
+}
+
+static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg, bool use_eetf)
 {
 	struct fixed31_32 gamma = dc_fixpt_from_fraction(22, 10);
 
@@ -384,9 +424,13 @@ static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg)
 	scratch_gamma_args.a3 = dc_fixpt_zero;
 	scratch_gamma_args.gamma = gamma;
 
+	if (use_eetf)
+		return translate_from_linear_space_long(&scratch_gamma_args);
+
 	return translate_from_linear_space(&scratch_gamma_args);
 }
 
+
 static struct fixed31_32 translate_to_linear_space(
 	struct fixed31_32 arg,
 	struct fixed31_32 a0,
@@ -898,7 +942,6 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
 	struct fixed31_32 max_display;
 	struct fixed31_32 min_display;
 	struct fixed31_32 max_content;
-	struct fixed31_32 min_content;
 	struct fixed31_32 clip = dc_fixpt_one;
 	struct fixed31_32 output;
 	bool use_eetf = false;
@@ -912,7 +955,6 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
 	max_display = dc_fixpt_from_int(fs_params->max_display);
 	min_display = dc_fixpt_from_fraction(fs_params->min_display, 10000);
 	max_content = dc_fixpt_from_int(fs_params->max_content);
-	min_content = dc_fixpt_from_fraction(fs_params->min_content, 10000);
 	sdr_white_level = dc_fixpt_from_int(fs_params->sdr_white_level);
 
 	if (fs_params->min_display > 1000) // cap at 0.1 at the bottom
@@ -920,11 +962,7 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
 	if (fs_params->max_display < 100) // cap at 100 at the top
 		max_display = dc_fixpt_from_int(100);
 
-	if (fs_params->min_content < fs_params->min_display)
-		use_eetf = true;
-	else
-		min_content = min_display;
-
+	// only max used, we don't adjust min luminance
 	if (fs_params->max_content > fs_params->max_display)
 		use_eetf = true;
 	else
@@ -950,7 +988,7 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
 				if (dc_fixpt_lt(scaledX, dc_fixpt_zero))
 					output = dc_fixpt_zero;
 				else
-					output = calculate_gamma22(scaledX);
+					output = calculate_gamma22(scaledX, use_eetf);
 
 				rgb->r = output;
 				rgb->g = output;
@@ -1965,10 +2003,28 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 	tf_pts->x_point_at_y1_green = 1;
 	tf_pts->x_point_at_y1_blue = 1;
 
-	map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
-			coordinates_x, axis_x, curve,
-			MAX_HW_POINTS, tf_pts,
-			mapUserRamp && ramp && ramp->type == GAMMA_RGB_256);
+	if (input_tf->tf == TRANSFER_FUNCTION_PQ) {
+		/* just copy current rgb_regamma into  tf_pts */
+		struct pwl_float_data_ex *curvePt = curve;
+		int i = 0;
+
+		while (i <= MAX_HW_POINTS) {
+			tf_pts->red[i]   = curvePt->r;
+			tf_pts->green[i] = curvePt->g;
+			tf_pts->blue[i]  = curvePt->b;
+			++curvePt;
+			++i;
+		}
+	} else {
+		//clamps to 0-1
+		map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
+				coordinates_x, axis_x, curve,
+				MAX_HW_POINTS, tf_pts,
+				mapUserRamp && ramp && ramp->type == GAMMA_RGB_256);
+	}
+
+
+
 	if (ramp->type == GAMMA_CUSTOM)
 		apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
 
@@ -2173,5 +2229,3 @@ bool  mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
 rgb_degamma_alloc_fail:
 	return ret;
 }
-
-
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index ec70c9b12e1a..6e5ecefe7d9d 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -37,8 +37,8 @@
 #define STATIC_SCREEN_RAMP_DELTA_REFRESH_RATE_PER_FRAME ((1000 / 60) * 65)
 /* Number of elements in the render times cache array */
 #define RENDER_TIMES_MAX_COUNT 10
-/* Threshold to exit BTR (to avoid frequent enter-exits at the lower limit) */
-#define BTR_EXIT_MARGIN 2000
+/* Threshold to exit/exit BTR (to avoid frequent enter-exits at the lower limit) */
+#define BTR_MAX_MARGIN 2500
 /* Threshold to change BTR multiplier (to avoid frequent changes) */
 #define BTR_DRIFT_MARGIN 2000
 /*Threshold to exit fixed refresh rate*/
@@ -122,7 +122,7 @@ static unsigned int calc_v_total_from_refresh(
 		const struct dc_stream_state *stream,
 		unsigned int refresh_in_uhz)
 {
-	unsigned int v_total = stream->timing.v_total;
+	unsigned int v_total;
 	unsigned int frame_duration_in_ns;
 
 	frame_duration_in_ns =
@@ -234,6 +234,10 @@ static void update_v_total_for_static_ramp(
 			current_duration_in_us) * (stream->timing.pix_clk_100hz / 10)),
 				stream->timing.h_total), 1000);
 
+	/* v_total cannot be less than nominal */
+	if (v_total < stream->timing.v_total)
+		v_total = stream->timing.v_total;
+
 	in_out_vrr->adjust.v_total_min = v_total;
 	in_out_vrr->adjust.v_total_max = v_total;
 }
@@ -250,24 +254,22 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
 	unsigned int delta_from_mid_point_in_us_1 = 0xFFFFFFFF;
 	unsigned int delta_from_mid_point_in_us_2 = 0xFFFFFFFF;
 	unsigned int frames_to_insert = 0;
-	unsigned int min_frame_duration_in_ns = 0;
-	unsigned int max_render_time_in_us = in_out_vrr->max_duration_in_us;
 	unsigned int delta_from_mid_point_delta_in_us;
-
-	min_frame_duration_in_ns = ((unsigned int) (div64_u64(
-		(1000000000ULL * 1000000),
-		in_out_vrr->max_refresh_in_uhz)));
+	unsigned int max_render_time_in_us =
+			in_out_vrr->max_duration_in_us - in_out_vrr->btr.margin_in_us;
 
 	/* Program BTR */
-	if (last_render_time_in_us + BTR_EXIT_MARGIN < max_render_time_in_us) {
+	if ((last_render_time_in_us + in_out_vrr->btr.margin_in_us / 2) < max_render_time_in_us) {
 		/* Exit Below the Range */
 		if (in_out_vrr->btr.btr_active) {
 			in_out_vrr->btr.frame_counter = 0;
 			in_out_vrr->btr.btr_active = false;
 		}
-	} else if (last_render_time_in_us > max_render_time_in_us) {
+	} else if (last_render_time_in_us > (max_render_time_in_us + in_out_vrr->btr.margin_in_us / 2)) {
 		/* Enter Below the Range */
-		in_out_vrr->btr.btr_active = true;
+		if (!in_out_vrr->btr.btr_active) {
+			in_out_vrr->btr.btr_active = true;
+		}
 	}
 
 	/* BTR set to "not active" so disengage */
@@ -323,7 +325,9 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
 		/* Choose number of frames to insert based on how close it
 		 * can get to the mid point of the variable range.
 		 */
-		if (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2) {
+		if ((frame_time_in_us / mid_point_frames_ceil) > in_out_vrr->min_duration_in_us &&
+				(delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2 ||
+						mid_point_frames_floor < 2)) {
 			frames_to_insert = mid_point_frames_ceil;
 			delta_from_mid_point_delta_in_us = delta_from_mid_point_in_us_2 -
 					delta_from_mid_point_in_us_1;
@@ -339,7 +343,7 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
 		if (in_out_vrr->btr.frames_to_insert != 0 &&
 				delta_from_mid_point_delta_in_us < BTR_DRIFT_MARGIN) {
 			if (((last_render_time_in_us / in_out_vrr->btr.frames_to_insert) <
-					in_out_vrr->max_duration_in_us) &&
+					max_render_time_in_us) &&
 				((last_render_time_in_us / in_out_vrr->btr.frames_to_insert) >
 					in_out_vrr->min_duration_in_us))
 				frames_to_insert = in_out_vrr->btr.frames_to_insert;
@@ -743,6 +747,10 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
 	nominal_field_rate_in_uhz =
 			mod_freesync_calc_nominal_field_rate(stream);
 
+	/* Rounded to the nearest Hz */
+	nominal_field_rate_in_uhz = 1000000ULL *
+			div_u64(nominal_field_rate_in_uhz + 500000, 1000000);
+
 	min_refresh_in_uhz = in_config->min_refresh_in_uhz;
 	max_refresh_in_uhz = in_config->max_refresh_in_uhz;
 
@@ -788,6 +796,11 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
 		refresh_range = in_out_vrr->max_refresh_in_uhz -
 				in_out_vrr->min_refresh_in_uhz;
 
+		in_out_vrr->btr.margin_in_us = in_out_vrr->max_duration_in_us -
+				2 * in_out_vrr->min_duration_in_us;
+		if (in_out_vrr->btr.margin_in_us > BTR_MAX_MARGIN)
+			in_out_vrr->btr.margin_in_us = BTR_MAX_MARGIN;
+
 		in_out_vrr->supported = true;
 	}
 
@@ -803,6 +816,9 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
 	in_out_vrr->btr.inserted_duration_in_us = 0;
 	in_out_vrr->btr.frames_to_insert = 0;
 	in_out_vrr->btr.frame_counter = 0;
+	in_out_vrr->fixed.fixed_active = false;
+	in_out_vrr->fixed.target_refresh_in_uhz = 0;
+
 	in_out_vrr->btr.mid_point_in_us =
 				(in_out_vrr->min_duration_in_us +
 				 in_out_vrr->max_duration_in_us) / 2;
@@ -818,6 +834,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
 		in_out_vrr->adjust.v_total_max = stream->timing.v_total;
 	} else if (in_out_vrr->state == VRR_STATE_ACTIVE_VARIABLE &&
 			refresh_range >= MIN_REFRESH_RANGE_IN_US) {
+
 		in_out_vrr->adjust.v_total_min =
 			calc_v_total_from_refresh(stream,
 				in_out_vrr->max_refresh_in_uhz);
@@ -975,13 +992,9 @@ void mod_freesync_get_settings(struct mod_freesync *mod_freesync,
 		unsigned int *inserted_frames,
 		unsigned int *inserted_duration_in_us)
 {
-	struct core_freesync *core_freesync = NULL;
-
 	if (mod_freesync == NULL)
 		return;
 
-	core_freesync = MOD_FREESYNC_TO_CORE(mod_freesync);
-
 	if (vrr->supported) {
 		*v_total_min = vrr->adjust.v_total_min;
 		*v_total_max = vrr->adjust.v_total_max;
@@ -996,14 +1009,13 @@ unsigned long long mod_freesync_calc_nominal_field_rate(
 			const struct dc_stream_state *stream)
 {
 	unsigned long long nominal_field_rate_in_uhz = 0;
+	unsigned int total = stream->timing.h_total * stream->timing.v_total;
 
-	/* Calculate nominal field rate for stream */
+	/* Calculate nominal field rate for stream, rounded up to nearest integer */
 	nominal_field_rate_in_uhz = stream->timing.pix_clk_100hz / 10;
 	nominal_field_rate_in_uhz *= 1000ULL * 1000ULL * 1000ULL;
-	nominal_field_rate_in_uhz = div_u64(nominal_field_rate_in_uhz,
-						stream->timing.h_total);
-	nominal_field_rate_in_uhz = div_u64(nominal_field_rate_in_uhz,
-						stream->timing.v_total);
+
+	nominal_field_rate_in_uhz =	div_u64(nominal_field_rate_in_uhz, total);
 
 	return nominal_field_rate_in_uhz;
 }
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/Makefile b/drivers/gpu/drm/amd/display/modules/hdcp/Makefile
new file mode 100644
index 000000000000..904424da01b5
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/Makefile
@@ -0,0 +1,33 @@
+#
+# Copyright 2019 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
+# Makefile for the 'hdcp' sub-module of DAL.
+#
+
+HDCP = hdcp_ddc.o hdcp_log.o hdcp_psp.o hdcp.o \
+		hdcp1_execution.o hdcp1_transition.o \
+		hdcp2_execution.o hdcp2_transition.o
+
+AMD_DAL_HDCP = $(addprefix $(AMDDALPATH)/modules/hdcp/,$(HDCP))
+#$(info ************  DAL-HDCP_MAKEFILE ************)
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HDCP)
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
new file mode 100644
index 000000000000..8aa528e874c4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
@@ -0,0 +1,509 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "hdcp.h"
+
+static void push_error_status(struct mod_hdcp *hdcp,
+		enum mod_hdcp_status status)
+{
+	struct mod_hdcp_trace *trace = &hdcp->connection.trace;
+
+	if (trace->error_count < MAX_NUM_OF_ERROR_TRACE) {
+		trace->errors[trace->error_count].status = status;
+		trace->errors[trace->error_count].state_id = hdcp->state.id;
+		trace->error_count++;
+		HDCP_ERROR_TRACE(hdcp, status);
+	}
+
+	if (is_hdcp1(hdcp)) {
+		hdcp->connection.hdcp1_retry_count++;
+	} else if (is_hdcp2(hdcp)) {
+		hdcp->connection.hdcp2_retry_count++;
+	}
+}
+
+static uint8_t is_cp_desired_hdcp1(struct mod_hdcp *hdcp)
+{
+	int i, is_auth_needed = 0;
+
+	/* if all displays on the link don't need authentication,
+	 * hdcp is not desired
+	 */
+	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
+		if (hdcp->connection.displays[i].state != MOD_HDCP_DISPLAY_INACTIVE &&
+				!hdcp->connection.displays[i].adjust.disable) {
+			is_auth_needed = 1;
+			break;
+		}
+	}
+
+	return (hdcp->connection.hdcp1_retry_count < MAX_NUM_OF_ATTEMPTS) &&
+			is_auth_needed &&
+			!hdcp->connection.link.adjust.hdcp1.disable;
+}
+
+static uint8_t is_cp_desired_hdcp2(struct mod_hdcp *hdcp)
+{
+	int i, is_auth_needed = 0;
+
+	/* if all displays on the link don't need authentication,
+	 * hdcp is not desired
+	 */
+	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
+		if (hdcp->connection.displays[i].state != MOD_HDCP_DISPLAY_INACTIVE &&
+				!hdcp->connection.displays[i].adjust.disable) {
+			is_auth_needed = 1;
+			break;
+		}
+	}
+
+	return (hdcp->connection.hdcp2_retry_count < MAX_NUM_OF_ATTEMPTS) &&
+			is_auth_needed &&
+			!hdcp->connection.link.adjust.hdcp2.disable &&
+			!hdcp->connection.is_hdcp2_revoked;
+}
+
+static enum mod_hdcp_status execution(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		union mod_hdcp_transition_input *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (is_in_initialized_state(hdcp)) {
+		if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+			event_ctx->unexpected_event = 1;
+			goto out;
+		}
+		/* initialize transition input */
+		memset(input, 0, sizeof(union mod_hdcp_transition_input));
+	} else if (is_in_cp_not_desired_state(hdcp)) {
+		if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+			event_ctx->unexpected_event = 1;
+			goto out;
+		}
+		/* update topology event if hdcp is not desired */
+		status = mod_hdcp_add_display_topology(hdcp);
+	} else if (is_in_hdcp1_states(hdcp)) {
+		status = mod_hdcp_hdcp1_execution(hdcp, event_ctx, &input->hdcp1);
+	} else if (is_in_hdcp1_dp_states(hdcp)) {
+		status = mod_hdcp_hdcp1_dp_execution(hdcp,
+				event_ctx, &input->hdcp1);
+	} else if (is_in_hdcp2_states(hdcp)) {
+		status = mod_hdcp_hdcp2_execution(hdcp, event_ctx, &input->hdcp2);
+	} else if (is_in_hdcp2_dp_states(hdcp)) {
+		status = mod_hdcp_hdcp2_dp_execution(hdcp,
+				event_ctx, &input->hdcp2);
+	}
+out:
+	return status;
+}
+
+static enum mod_hdcp_status transition(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		union mod_hdcp_transition_input *input,
+		struct mod_hdcp_output *output)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->unexpected_event)
+		goto out;
+
+	if (is_in_initialized_state(hdcp)) {
+		if (is_dp_hdcp(hdcp))
+			if (is_cp_desired_hdcp2(hdcp)) {
+				callback_in_ms(0, output);
+				set_state_id(hdcp, output, D2_A0_DETERMINE_RX_HDCP_CAPABLE);
+			} else if (is_cp_desired_hdcp1(hdcp)) {
+				callback_in_ms(0, output);
+				set_state_id(hdcp, output, D1_A0_DETERMINE_RX_HDCP_CAPABLE);
+			} else {
+				callback_in_ms(0, output);
+				set_state_id(hdcp, output, HDCP_CP_NOT_DESIRED);
+			}
+		else if (is_hdmi_dvi_sl_hdcp(hdcp))
+			if (is_cp_desired_hdcp2(hdcp)) {
+				callback_in_ms(0, output);
+				set_state_id(hdcp, output, H2_A0_KNOWN_HDCP2_CAPABLE_RX);
+			} else if (is_cp_desired_hdcp1(hdcp)) {
+				callback_in_ms(0, output);
+				set_state_id(hdcp, output, H1_A0_WAIT_FOR_ACTIVE_RX);
+			} else {
+				callback_in_ms(0, output);
+				set_state_id(hdcp, output, HDCP_CP_NOT_DESIRED);
+			}
+		else {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, HDCP_CP_NOT_DESIRED);
+		}
+	} else if (is_in_cp_not_desired_state(hdcp)) {
+		increment_stay_counter(hdcp);
+	} else if (is_in_hdcp1_states(hdcp)) {
+		status = mod_hdcp_hdcp1_transition(hdcp,
+				event_ctx, &input->hdcp1, output);
+	} else if (is_in_hdcp1_dp_states(hdcp)) {
+		status = mod_hdcp_hdcp1_dp_transition(hdcp,
+				event_ctx, &input->hdcp1, output);
+	} else if (is_in_hdcp2_states(hdcp)) {
+		status = mod_hdcp_hdcp2_transition(hdcp,
+				event_ctx, &input->hdcp2, output);
+	} else if (is_in_hdcp2_dp_states(hdcp)) {
+		status = mod_hdcp_hdcp2_dp_transition(hdcp,
+				event_ctx, &input->hdcp2, output);
+	} else {
+		status = MOD_HDCP_STATUS_INVALID_STATE;
+	}
+out:
+	return status;
+}
+
+static enum mod_hdcp_status reset_authentication(struct mod_hdcp *hdcp,
+		struct mod_hdcp_output *output)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (is_hdcp1(hdcp)) {
+		if (hdcp->auth.trans_input.hdcp1.create_session != UNKNOWN) {
+			/* TODO - update psp to unify create session failure
+			 * recovery between hdcp1 and 2.
+			 */
+			mod_hdcp_hdcp1_destroy_session(hdcp);
+
+		}
+		if (hdcp->auth.trans_input.hdcp1.add_topology == PASS) {
+			status = mod_hdcp_remove_display_topology(hdcp);
+			if (status != MOD_HDCP_STATUS_SUCCESS) {
+				output->callback_needed = 0;
+				output->watchdog_timer_needed = 0;
+				goto out;
+			}
+		}
+		HDCP_TOP_RESET_AUTH_TRACE(hdcp);
+		memset(&hdcp->auth, 0, sizeof(struct mod_hdcp_authentication));
+		memset(&hdcp->state, 0, sizeof(struct mod_hdcp_state));
+		set_state_id(hdcp, output, HDCP_INITIALIZED);
+	} else if (is_hdcp2(hdcp)) {
+		if (hdcp->auth.trans_input.hdcp2.create_session == PASS) {
+			status = mod_hdcp_hdcp2_destroy_session(hdcp);
+			if (status != MOD_HDCP_STATUS_SUCCESS) {
+				output->callback_needed = 0;
+				output->watchdog_timer_needed = 0;
+				goto out;
+			}
+		}
+		if (hdcp->auth.trans_input.hdcp2.add_topology == PASS) {
+			status = mod_hdcp_remove_display_topology(hdcp);
+			if (status != MOD_HDCP_STATUS_SUCCESS) {
+				output->callback_needed = 0;
+				output->watchdog_timer_needed = 0;
+				goto out;
+			}
+		}
+		HDCP_TOP_RESET_AUTH_TRACE(hdcp);
+		memset(&hdcp->auth, 0, sizeof(struct mod_hdcp_authentication));
+		memset(&hdcp->state, 0, sizeof(struct mod_hdcp_state));
+		set_state_id(hdcp, output, HDCP_INITIALIZED);
+	} else if (is_in_cp_not_desired_state(hdcp)) {
+		status = mod_hdcp_remove_display_topology(hdcp);
+		if (status != MOD_HDCP_STATUS_SUCCESS) {
+			output->callback_needed = 0;
+			output->watchdog_timer_needed = 0;
+			goto out;
+		}
+		HDCP_TOP_RESET_AUTH_TRACE(hdcp);
+		memset(&hdcp->auth, 0, sizeof(struct mod_hdcp_authentication));
+		memset(&hdcp->state, 0, sizeof(struct mod_hdcp_state));
+		set_state_id(hdcp, output, HDCP_INITIALIZED);
+	}
+
+out:
+	/* stop callback and watchdog requests from previous authentication*/
+	output->watchdog_timer_stop = 1;
+	output->callback_stop = 1;
+	return status;
+}
+
+static enum mod_hdcp_status reset_connection(struct mod_hdcp *hdcp,
+		struct mod_hdcp_output *output)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	memset(output, 0, sizeof(struct mod_hdcp_output));
+
+	status = reset_authentication(hdcp, output);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		goto out;
+
+	if (current_state(hdcp) != HDCP_UNINITIALIZED) {
+		HDCP_TOP_RESET_CONN_TRACE(hdcp);
+		set_state_id(hdcp, output, HDCP_UNINITIALIZED);
+	}
+	memset(&hdcp->connection, 0, sizeof(hdcp->connection));
+out:
+	return status;
+}
+
+/*
+ * Implementation of functions in mod_hdcp.h
+ */
+size_t mod_hdcp_get_memory_size(void)
+{
+	return sizeof(struct mod_hdcp);
+}
+
+enum mod_hdcp_status mod_hdcp_setup(struct mod_hdcp *hdcp,
+		struct mod_hdcp_config *config)
+{
+	struct mod_hdcp_output output;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	memset(hdcp, 0, sizeof(struct mod_hdcp));
+	memset(&output, 0, sizeof(output));
+	hdcp->config = *config;
+	HDCP_TOP_INTERFACE_TRACE(hdcp);
+	status = reset_connection(hdcp, &output);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		push_error_status(hdcp, status);
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_teardown(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+	struct mod_hdcp_output output;
+
+	HDCP_TOP_INTERFACE_TRACE(hdcp);
+	memset(&output, 0,  sizeof(output));
+	status = reset_connection(hdcp, &output);
+	if (status == MOD_HDCP_STATUS_SUCCESS)
+		memset(hdcp, 0, sizeof(struct mod_hdcp));
+	else
+		push_error_status(hdcp, status);
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_add_display(struct mod_hdcp *hdcp,
+		struct mod_hdcp_link *link, struct mod_hdcp_display *display,
+		struct mod_hdcp_output *output)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+	struct mod_hdcp_display *display_container = NULL;
+
+	HDCP_TOP_INTERFACE_TRACE_WITH_INDEX(hdcp, display->index);
+	memset(output, 0, sizeof(struct mod_hdcp_output));
+
+	/* skip inactive display */
+	if (display->state != MOD_HDCP_DISPLAY_ACTIVE) {
+		status = MOD_HDCP_STATUS_SUCCESS;
+		goto out;
+	}
+
+	/* check existing display container */
+	if (get_active_display_at_index(hdcp, display->index)) {
+		status = MOD_HDCP_STATUS_SUCCESS;
+		goto out;
+	}
+
+	/* find an empty display container */
+	display_container = get_empty_display_container(hdcp);
+	if (!display_container) {
+		status = MOD_HDCP_STATUS_DISPLAY_OUT_OF_BOUND;
+		goto out;
+	}
+
+	/* reset existing authentication status */
+	status = reset_authentication(hdcp, output);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		goto out;
+
+	/* add display to connection */
+	hdcp->connection.link = *link;
+	*display_container = *display;
+
+	/* reset retry counters */
+	reset_retry_counts(hdcp);
+
+	/* reset error trace */
+	memset(&hdcp->connection.trace, 0, sizeof(hdcp->connection.trace));
+
+	/* request authentication */
+	if (current_state(hdcp) != HDCP_INITIALIZED)
+		set_state_id(hdcp, output, HDCP_INITIALIZED);
+	callback_in_ms(hdcp->connection.link.adjust.auth_delay * 1000, output);
+out:
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		push_error_status(hdcp, status);
+
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_remove_display(struct mod_hdcp *hdcp,
+		uint8_t index, struct mod_hdcp_output *output)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+	struct mod_hdcp_display *display = NULL;
+
+	HDCP_TOP_INTERFACE_TRACE_WITH_INDEX(hdcp, index);
+	memset(output, 0, sizeof(struct mod_hdcp_output));
+
+	/* find display in connection */
+	display = get_active_display_at_index(hdcp, index);
+	if (!display) {
+		status = MOD_HDCP_STATUS_SUCCESS;
+		goto out;
+	}
+
+	/* stop current authentication */
+	status = reset_authentication(hdcp, output);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		goto out;
+
+	/* remove display */
+	display->state = MOD_HDCP_DISPLAY_INACTIVE;
+
+	/* clear retry counters */
+	reset_retry_counts(hdcp);
+
+	/* reset error trace */
+	memset(&hdcp->connection.trace, 0, sizeof(hdcp->connection.trace));
+
+	/* request authentication for remaining displays*/
+	if (get_active_display_count(hdcp) > 0)
+		callback_in_ms(hdcp->connection.link.adjust.auth_delay * 1000,
+				output);
+out:
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		push_error_status(hdcp, status);
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_query_display(struct mod_hdcp *hdcp,
+		uint8_t index, struct mod_hdcp_display_query *query)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+	struct mod_hdcp_display *display = NULL;
+
+	/* find display in connection */
+	display = get_active_display_at_index(hdcp, index);
+	if (!display) {
+		status = MOD_HDCP_STATUS_DISPLAY_NOT_FOUND;
+		goto out;
+	}
+
+	/* populate query */
+	query->link = &hdcp->connection.link;
+	query->display = display;
+	query->trace = &hdcp->connection.trace;
+	query->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+
+	if (is_display_encryption_enabled(display)) {
+		if (is_hdcp1(hdcp)) {
+			query->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP1_ON;
+		} else if (is_hdcp2(hdcp)) {
+			if (query->link->adjust.hdcp2.force_type == MOD_HDCP_FORCE_TYPE_0)
+				query->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON;
+			else if (query->link->adjust.hdcp2.force_type == MOD_HDCP_FORCE_TYPE_1)
+				query->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON;
+			else
+				query->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP2_ON;
+		}
+	} else {
+		query->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+	}
+
+out:
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_reset_connection(struct mod_hdcp *hdcp,
+		struct mod_hdcp_output *output)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	HDCP_TOP_INTERFACE_TRACE(hdcp);
+	status = reset_connection(hdcp, output);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		push_error_status(hdcp, status);
+
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_process_event(struct mod_hdcp *hdcp,
+		enum mod_hdcp_event event, struct mod_hdcp_output *output)
+{
+	enum mod_hdcp_status exec_status, trans_status, reset_status, status;
+	struct mod_hdcp_event_context event_ctx;
+
+	HDCP_EVENT_TRACE(hdcp, event);
+	memset(output, 0, sizeof(struct mod_hdcp_output));
+	memset(&event_ctx, 0, sizeof(struct mod_hdcp_event_context));
+	event_ctx.event = event;
+
+	/* execute and transition */
+	exec_status = execution(hdcp, &event_ctx, &hdcp->auth.trans_input);
+	trans_status = transition(
+			hdcp, &event_ctx, &hdcp->auth.trans_input, output);
+	if (trans_status == MOD_HDCP_STATUS_SUCCESS) {
+		status = MOD_HDCP_STATUS_SUCCESS;
+	} else if (exec_status == MOD_HDCP_STATUS_SUCCESS) {
+		status = MOD_HDCP_STATUS_INTERNAL_POLICY_FAILURE;
+		push_error_status(hdcp, status);
+	} else {
+		status = exec_status;
+		push_error_status(hdcp, status);
+	}
+
+	/* reset authentication if needed */
+	if (trans_status == MOD_HDCP_STATUS_RESET_NEEDED) {
+		HDCP_FULL_DDC_TRACE(hdcp);
+		reset_status = reset_authentication(hdcp, output);
+		if (reset_status != MOD_HDCP_STATUS_SUCCESS)
+			push_error_status(hdcp, reset_status);
+	}
+	return status;
+}
+
+enum mod_hdcp_operation_mode mod_hdcp_signal_type_to_operation_mode(
+		enum signal_type signal)
+{
+	enum mod_hdcp_operation_mode mode = MOD_HDCP_MODE_OFF;
+
+	switch (signal) {
+	case SIGNAL_TYPE_DVI_SINGLE_LINK:
+	case SIGNAL_TYPE_HDMI_TYPE_A:
+		mode = MOD_HDCP_MODE_DEFAULT;
+		break;
+	case SIGNAL_TYPE_EDP:
+	case SIGNAL_TYPE_DISPLAY_PORT:
+		mode = MOD_HDCP_MODE_DP;
+		break;
+	case SIGNAL_TYPE_DISPLAY_PORT_MST:
+		mode = MOD_HDCP_MODE_DP_MST;
+		break;
+	default:
+		break;
+	}
+
+	return mode;
+}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
new file mode 100644
index 000000000000..f98d3d9ecb6d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
@@ -0,0 +1,587 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef HDCP_H_
+#define HDCP_H_
+
+#include "mod_hdcp.h"
+#include "hdcp_log.h"
+
+#include <drm/drm_hdcp.h>
+#include <drm/drm_dp_helper.h>
+
+enum mod_hdcp_trans_input_result {
+	UNKNOWN = 0,
+	PASS,
+	FAIL
+};
+
+struct mod_hdcp_transition_input_hdcp1 {
+	uint8_t bksv_read;
+	uint8_t bksv_validation;
+	uint8_t add_topology;
+	uint8_t create_session;
+	uint8_t an_write;
+	uint8_t aksv_write;
+	uint8_t ainfo_write;
+	uint8_t bcaps_read;
+	uint8_t r0p_read;
+	uint8_t rx_validation;
+	uint8_t encryption;
+	uint8_t link_maintenance;
+	uint8_t ready_check;
+	uint8_t bstatus_read;
+	uint8_t max_cascade_check;
+	uint8_t max_devs_check;
+	uint8_t device_count_check;
+	uint8_t ksvlist_read;
+	uint8_t vp_read;
+	uint8_t ksvlist_vp_validation;
+
+	uint8_t hdcp_capable_dp;
+	uint8_t binfo_read_dp;
+	uint8_t r0p_available_dp;
+	uint8_t link_integiry_check;
+	uint8_t reauth_request_check;
+	uint8_t stream_encryption_dp;
+};
+
+struct mod_hdcp_transition_input_hdcp2 {
+	uint8_t hdcp2version_read;
+	uint8_t hdcp2_capable_check;
+	uint8_t add_topology;
+	uint8_t create_session;
+	uint8_t ake_init_prepare;
+	uint8_t ake_init_write;
+	uint8_t rxstatus_read;
+	uint8_t ake_cert_available;
+	uint8_t ake_cert_read;
+	uint8_t ake_cert_validation;
+	uint8_t stored_km_write;
+	uint8_t no_stored_km_write;
+	uint8_t h_prime_available;
+	uint8_t h_prime_read;
+	uint8_t pairing_available;
+	uint8_t pairing_info_read;
+	uint8_t h_prime_validation;
+	uint8_t lc_init_prepare;
+	uint8_t lc_init_write;
+	uint8_t l_prime_available_poll;
+	uint8_t l_prime_read;
+	uint8_t l_prime_validation;
+	uint8_t eks_prepare;
+	uint8_t eks_write;
+	uint8_t enable_encryption;
+	uint8_t reauth_request_check;
+	uint8_t rx_id_list_read;
+	uint8_t device_count_check;
+	uint8_t rx_id_list_validation;
+	uint8_t repeater_auth_ack_write;
+	uint8_t prepare_stream_manage;
+	uint8_t stream_manage_write;
+	uint8_t stream_ready_available;
+	uint8_t stream_ready_read;
+	uint8_t stream_ready_validation;
+
+	uint8_t rx_caps_read_dp;
+	uint8_t content_stream_type_write;
+	uint8_t link_integrity_check_dp;
+	uint8_t stream_encryption_dp;
+};
+
+union mod_hdcp_transition_input {
+	struct mod_hdcp_transition_input_hdcp1 hdcp1;
+	struct mod_hdcp_transition_input_hdcp2 hdcp2;
+};
+
+struct mod_hdcp_message_hdcp1 {
+	uint8_t		an[8];
+	uint8_t		aksv[5];
+	uint8_t		ainfo;
+	uint8_t		bksv[5];
+	uint16_t	r0p;
+	uint8_t		bcaps;
+	uint16_t	bstatus;
+	uint8_t		ksvlist[635];
+	uint16_t	ksvlist_size;
+	uint8_t		vp[20];
+
+	uint16_t	binfo_dp;
+};
+
+struct mod_hdcp_message_hdcp2 {
+	uint8_t		hdcp2version_hdmi;
+	uint8_t		rxcaps_dp[3];
+	uint8_t		rxstatus[2];
+
+	uint8_t		ake_init[12];
+	uint8_t		ake_cert[534];
+	uint8_t		ake_no_stored_km[129];
+	uint8_t		ake_stored_km[33];
+	uint8_t		ake_h_prime[33];
+	uint8_t		ake_pairing_info[17];
+	uint8_t		lc_init[9];
+	uint8_t		lc_l_prime[33];
+	uint8_t		ske_eks[25];
+	uint8_t		rx_id_list[177]; // 22 + 5 * 31
+	uint16_t	rx_id_list_size;
+	uint8_t		repeater_auth_ack[17];
+	uint8_t		repeater_auth_stream_manage[68]; // 6 + 2 * 31
+	uint16_t	stream_manage_size;
+	uint8_t		repeater_auth_stream_ready[33];
+	uint8_t		rxstatus_dp;
+	uint8_t		content_stream_type_dp[2];
+};
+
+union mod_hdcp_message {
+	struct mod_hdcp_message_hdcp1 hdcp1;
+	struct mod_hdcp_message_hdcp2 hdcp2;
+};
+
+struct mod_hdcp_auth_counters {
+	uint8_t stream_management_retry_count;
+};
+
+/* contains values per connection */
+struct mod_hdcp_connection {
+	struct mod_hdcp_link link;
+	struct mod_hdcp_display displays[MAX_NUM_OF_DISPLAYS];
+	uint8_t is_repeater;
+	uint8_t is_km_stored;
+	uint8_t is_hdcp2_revoked;
+	struct mod_hdcp_trace trace;
+	uint8_t hdcp1_retry_count;
+	uint8_t hdcp2_retry_count;
+};
+
+/* contains values per authentication cycle */
+struct mod_hdcp_authentication {
+	uint32_t id;
+	union mod_hdcp_message msg;
+	union mod_hdcp_transition_input trans_input;
+	struct mod_hdcp_auth_counters count;
+};
+
+/* contains values per state change */
+struct mod_hdcp_state {
+	uint8_t id;
+	uint32_t stay_count;
+};
+
+/* per event in a state */
+struct mod_hdcp_event_context {
+	enum mod_hdcp_event event;
+	uint8_t rx_id_list_ready;
+	uint8_t unexpected_event;
+};
+
+struct mod_hdcp {
+	/* per link */
+	struct mod_hdcp_config config;
+	/* per connection */
+	struct mod_hdcp_connection connection;
+	/* per authentication attempt */
+	struct mod_hdcp_authentication auth;
+	/* per state in an authentication */
+	struct mod_hdcp_state state;
+	/* reserved memory buffer */
+	uint8_t buf[2025];
+};
+
+enum mod_hdcp_initial_state_id {
+	HDCP_UNINITIALIZED = 0x0,
+	HDCP_INITIAL_STATE_START = HDCP_UNINITIALIZED,
+	HDCP_INITIALIZED,
+	HDCP_CP_NOT_DESIRED,
+	HDCP_INITIAL_STATE_END = HDCP_CP_NOT_DESIRED
+};
+
+enum mod_hdcp_hdcp1_state_id {
+	HDCP1_STATE_START = HDCP_INITIAL_STATE_END,
+	H1_A0_WAIT_FOR_ACTIVE_RX,
+	H1_A1_EXCHANGE_KSVS,
+	H1_A2_COMPUTATIONS_A3_VALIDATE_RX_A6_TEST_FOR_REPEATER,
+	H1_A45_AUTHENTICATED,
+	H1_A8_WAIT_FOR_READY,
+	H1_A9_READ_KSV_LIST,
+	HDCP1_STATE_END = H1_A9_READ_KSV_LIST
+};
+
+enum mod_hdcp_hdcp1_dp_state_id {
+	HDCP1_DP_STATE_START = HDCP1_STATE_END,
+	D1_A0_DETERMINE_RX_HDCP_CAPABLE,
+	D1_A1_EXCHANGE_KSVS,
+	D1_A23_WAIT_FOR_R0_PRIME,
+	D1_A2_COMPUTATIONS_A3_VALIDATE_RX_A5_TEST_FOR_REPEATER,
+	D1_A4_AUTHENTICATED,
+	D1_A6_WAIT_FOR_READY,
+	D1_A7_READ_KSV_LIST,
+	HDCP1_DP_STATE_END = D1_A7_READ_KSV_LIST,
+};
+
+enum mod_hdcp_hdcp2_state_id {
+	HDCP2_STATE_START = HDCP1_DP_STATE_END,
+	H2_A0_KNOWN_HDCP2_CAPABLE_RX,
+	H2_A1_SEND_AKE_INIT,
+	H2_A1_VALIDATE_AKE_CERT,
+	H2_A1_SEND_NO_STORED_KM,
+	H2_A1_READ_H_PRIME,
+	H2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME,
+	H2_A1_SEND_STORED_KM,
+	H2_A1_VALIDATE_H_PRIME,
+	H2_A2_LOCALITY_CHECK,
+	H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER,
+	H2_ENABLE_ENCRYPTION,
+	H2_A5_AUTHENTICATED,
+	H2_A6_WAIT_FOR_RX_ID_LIST,
+	H2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK,
+	H2_A9_SEND_STREAM_MANAGEMENT,
+	H2_A9_VALIDATE_STREAM_READY,
+	HDCP2_STATE_END = H2_A9_VALIDATE_STREAM_READY,
+};
+
+enum mod_hdcp_hdcp2_dp_state_id {
+	HDCP2_DP_STATE_START = HDCP2_STATE_END,
+	D2_A0_DETERMINE_RX_HDCP_CAPABLE,
+	D2_A1_SEND_AKE_INIT,
+	D2_A1_VALIDATE_AKE_CERT,
+	D2_A1_SEND_NO_STORED_KM,
+	D2_A1_READ_H_PRIME,
+	D2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME,
+	D2_A1_SEND_STORED_KM,
+	D2_A1_VALIDATE_H_PRIME,
+	D2_A2_LOCALITY_CHECK,
+	D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER,
+	D2_SEND_CONTENT_STREAM_TYPE,
+	D2_ENABLE_ENCRYPTION,
+	D2_A5_AUTHENTICATED,
+	D2_A6_WAIT_FOR_RX_ID_LIST,
+	D2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK,
+	D2_A9_SEND_STREAM_MANAGEMENT,
+	D2_A9_VALIDATE_STREAM_READY,
+	HDCP2_DP_STATE_END = D2_A9_VALIDATE_STREAM_READY,
+	HDCP_STATE_END = HDCP2_DP_STATE_END,
+};
+
+/* hdcp1 executions and transitions */
+typedef enum mod_hdcp_status (*mod_hdcp_action)(struct mod_hdcp *hdcp);
+uint8_t mod_hdcp_execute_and_set(
+		mod_hdcp_action func, uint8_t *flag,
+		enum mod_hdcp_status *status, struct mod_hdcp *hdcp, char *str);
+enum mod_hdcp_status mod_hdcp_hdcp1_execution(struct mod_hdcp *hdcp,
+	struct mod_hdcp_event_context *event_ctx,
+	struct mod_hdcp_transition_input_hdcp1 *input);
+enum mod_hdcp_status mod_hdcp_hdcp1_dp_execution(struct mod_hdcp *hdcp,
+	struct mod_hdcp_event_context *event_ctx,
+	struct mod_hdcp_transition_input_hdcp1 *input);
+enum mod_hdcp_status mod_hdcp_hdcp1_transition(struct mod_hdcp *hdcp,
+	struct mod_hdcp_event_context *event_ctx,
+	struct mod_hdcp_transition_input_hdcp1 *input,
+	struct mod_hdcp_output *output);
+enum mod_hdcp_status mod_hdcp_hdcp1_dp_transition(struct mod_hdcp *hdcp,
+	struct mod_hdcp_event_context *event_ctx,
+	struct mod_hdcp_transition_input_hdcp1 *input,
+	struct mod_hdcp_output *output);
+
+/* hdcp2 executions and transitions */
+enum mod_hdcp_status mod_hdcp_hdcp2_execution(struct mod_hdcp *hdcp,
+	struct mod_hdcp_event_context *event_ctx,
+	struct mod_hdcp_transition_input_hdcp2 *input);
+enum mod_hdcp_status mod_hdcp_hdcp2_dp_execution(struct mod_hdcp *hdcp,
+	struct mod_hdcp_event_context *event_ctx,
+	struct mod_hdcp_transition_input_hdcp2 *input);
+enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct mod_hdcp *hdcp,
+	struct mod_hdcp_event_context *event_ctx,
+	struct mod_hdcp_transition_input_hdcp2 *input,
+	struct mod_hdcp_output *output);
+enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp,
+	struct mod_hdcp_event_context *event_ctx,
+	struct mod_hdcp_transition_input_hdcp2 *input,
+	struct mod_hdcp_output *output);
+
+/* log functions */
+void mod_hdcp_dump_binary_message(uint8_t *msg, uint32_t msg_size,
+		uint8_t *buf, uint32_t buf_size);
+/* TODO: add adjustment log */
+
+/* psp functions */
+enum mod_hdcp_status mod_hdcp_add_display_topology(
+		struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_remove_display_topology(
+		struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp1_destroy_session(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp1_validate_rx(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp1_enable_encryption(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp1_validate_ksvlist_vp(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp1_enable_dp_stream_encryption(
+	struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp1_link_maintenance(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp1_get_link_encryption_status(struct mod_hdcp *hdcp,
+							       enum mod_hdcp_encryption_status *encryption_status);
+enum mod_hdcp_status mod_hdcp_hdcp2_create_session(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_destroy_session(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_prepare_ake_init(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_validate_ake_cert(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_validate_h_prime(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_prepare_lc_init(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_validate_l_prime(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_prepare_eks(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_enable_encryption(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_validate_rx_id_list(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(
+		struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_prepare_stream_management(
+		struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_validate_stream_ready(
+		struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_hdcp2_get_link_encryption_status(struct mod_hdcp *hdcp,
+							       enum mod_hdcp_encryption_status *encryption_status);
+
+/* ddc functions */
+enum mod_hdcp_status mod_hdcp_read_bksv(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_bcaps(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_bstatus(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_r0p(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_ksvlist(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_vp(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_binfo(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_aksv(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_ainfo(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_an(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_hdcp2version(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_rxcaps(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_rxstatus(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_ake_cert(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_h_prime(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_pairing_info(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_l_prime(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_rx_id_list(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_read_stream_ready(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_ake_init(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_no_stored_km(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_stored_km(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_lc_init(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_eks(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_repeater_auth_ack(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_stream_manage(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_content_type(struct mod_hdcp *hdcp);
+
+/* hdcp version helpers */
+static inline uint8_t is_dp_hdcp(struct mod_hdcp *hdcp)
+{
+	return (hdcp->connection.link.mode == MOD_HDCP_MODE_DP ||
+			hdcp->connection.link.mode == MOD_HDCP_MODE_DP_MST);
+}
+
+static inline uint8_t is_dp_mst_hdcp(struct mod_hdcp *hdcp)
+{
+	return (hdcp->connection.link.mode == MOD_HDCP_MODE_DP_MST);
+}
+
+static inline uint8_t is_hdmi_dvi_sl_hdcp(struct mod_hdcp *hdcp)
+{
+	return (hdcp->connection.link.mode == MOD_HDCP_MODE_DEFAULT);
+}
+
+/* hdcp state helpers */
+static inline uint8_t current_state(struct mod_hdcp *hdcp)
+{
+	return hdcp->state.id;
+}
+
+static inline void set_state_id(struct mod_hdcp *hdcp,
+		struct mod_hdcp_output *output, uint8_t id)
+{
+	memset(&hdcp->state, 0, sizeof(hdcp->state));
+	hdcp->state.id = id;
+	/* callback timer should be reset per state */
+	output->callback_stop = 1;
+	output->watchdog_timer_stop = 1;
+	HDCP_NEXT_STATE_TRACE(hdcp, id, output);
+}
+
+static inline uint8_t is_in_hdcp1_states(struct mod_hdcp *hdcp)
+{
+	return (current_state(hdcp) > HDCP1_STATE_START &&
+			current_state(hdcp) <= HDCP1_STATE_END);
+}
+
+static inline uint8_t is_in_hdcp1_dp_states(struct mod_hdcp *hdcp)
+{
+	return (current_state(hdcp) > HDCP1_DP_STATE_START &&
+			current_state(hdcp) <= HDCP1_DP_STATE_END);
+}
+
+static inline uint8_t is_in_hdcp2_states(struct mod_hdcp *hdcp)
+{
+	return (current_state(hdcp) > HDCP2_STATE_START &&
+			current_state(hdcp) <= HDCP2_STATE_END);
+}
+
+static inline uint8_t is_in_hdcp2_dp_states(struct mod_hdcp *hdcp)
+{
+	return (current_state(hdcp) > HDCP2_DP_STATE_START &&
+			current_state(hdcp) <= HDCP2_DP_STATE_END);
+}
+
+static inline uint8_t is_hdcp1(struct mod_hdcp *hdcp)
+{
+	return (is_in_hdcp1_states(hdcp) || is_in_hdcp1_dp_states(hdcp));
+}
+
+static inline uint8_t is_hdcp2(struct mod_hdcp *hdcp)
+{
+	return (is_in_hdcp2_states(hdcp) || is_in_hdcp2_dp_states(hdcp));
+}
+
+static inline uint8_t is_in_cp_not_desired_state(struct mod_hdcp *hdcp)
+{
+	return current_state(hdcp) == HDCP_CP_NOT_DESIRED;
+}
+
+static inline uint8_t is_in_initialized_state(struct mod_hdcp *hdcp)
+{
+	return current_state(hdcp) == HDCP_INITIALIZED;
+}
+
+/* transition operation helpers */
+static inline void increment_stay_counter(struct mod_hdcp *hdcp)
+{
+	hdcp->state.stay_count++;
+}
+
+static inline void fail_and_restart_in_ms(uint16_t time,
+		enum mod_hdcp_status *status,
+		struct mod_hdcp_output *output)
+{
+	output->callback_needed = 1;
+	output->callback_delay = time;
+	output->watchdog_timer_needed = 0;
+	output->watchdog_timer_delay = 0;
+	*status = MOD_HDCP_STATUS_RESET_NEEDED;
+}
+
+static inline void callback_in_ms(uint16_t time, struct mod_hdcp_output *output)
+{
+	output->callback_needed = 1;
+	output->callback_delay = time;
+}
+
+static inline void set_watchdog_in_ms(struct mod_hdcp *hdcp, uint16_t time,
+		struct mod_hdcp_output *output)
+{
+	output->watchdog_timer_needed = 1;
+	output->watchdog_timer_delay = time;
+}
+
+/* connection topology helpers */
+static inline uint8_t is_display_active(struct mod_hdcp_display *display)
+{
+	return display->state >= MOD_HDCP_DISPLAY_ACTIVE;
+}
+
+static inline uint8_t is_display_added(struct mod_hdcp_display *display)
+{
+	return display->state >= MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED;
+}
+
+static inline uint8_t is_display_encryption_enabled(struct mod_hdcp_display *display)
+{
+	return display->state >= MOD_HDCP_DISPLAY_ENCRYPTION_ENABLED;
+}
+
+static inline uint8_t get_active_display_count(struct mod_hdcp *hdcp)
+{
+	uint8_t added_count = 0;
+	uint8_t i;
+
+	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
+		if (is_display_active(&hdcp->connection.displays[i]))
+			added_count++;
+	return added_count;
+}
+
+static inline uint8_t get_added_display_count(struct mod_hdcp *hdcp)
+{
+	uint8_t added_count = 0;
+	uint8_t i;
+
+	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
+		if (is_display_added(&hdcp->connection.displays[i]))
+			added_count++;
+	return added_count;
+}
+
+static inline struct mod_hdcp_display *get_first_added_display(
+		struct mod_hdcp *hdcp)
+{
+	uint8_t i;
+	struct mod_hdcp_display *display = NULL;
+
+	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
+		if (is_display_added(&hdcp->connection.displays[i])) {
+			display = &hdcp->connection.displays[i];
+			break;
+		}
+	return display;
+}
+
+static inline struct mod_hdcp_display *get_active_display_at_index(
+		struct mod_hdcp *hdcp, uint8_t index)
+{
+	uint8_t i;
+	struct mod_hdcp_display *display = NULL;
+
+	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
+		if (hdcp->connection.displays[i].index == index &&
+				is_display_active(&hdcp->connection.displays[i])) {
+			display = &hdcp->connection.displays[i];
+			break;
+		}
+	return display;
+}
+
+static inline struct mod_hdcp_display *get_empty_display_container(
+		struct mod_hdcp *hdcp)
+{
+	uint8_t i;
+	struct mod_hdcp_display *display = NULL;
+
+	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
+		if (!is_display_active(&hdcp->connection.displays[i])) {
+			display = &hdcp->connection.displays[i];
+			break;
+		}
+	return display;
+}
+
+static inline void reset_retry_counts(struct mod_hdcp *hdcp)
+{
+	hdcp->connection.hdcp1_retry_count = 0;
+	hdcp->connection.hdcp2_retry_count = 0;
+}
+
+#endif /* HDCP_H_ */
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
new file mode 100644
index 000000000000..04845e43df15
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
@@ -0,0 +1,529 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "hdcp.h"
+
+static inline enum mod_hdcp_status validate_bksv(struct mod_hdcp *hdcp)
+{
+	uint64_t n = 0;
+	uint8_t count = 0;
+
+	memcpy(&n, hdcp->auth.msg.hdcp1.bksv, sizeof(uint64_t));
+
+	while (n) {
+		count++;
+		n &= (n - 1);
+	}
+	return (count == 20) ? MOD_HDCP_STATUS_SUCCESS :
+			MOD_HDCP_STATUS_HDCP1_INVALID_BKSV;
+}
+
+static inline enum mod_hdcp_status check_ksv_ready(struct mod_hdcp *hdcp)
+{
+	if (is_dp_hdcp(hdcp))
+		return (hdcp->auth.msg.hdcp1.bstatus & DP_BSTATUS_READY) ?
+				MOD_HDCP_STATUS_SUCCESS :
+				MOD_HDCP_STATUS_HDCP1_KSV_LIST_NOT_READY;
+	return (hdcp->auth.msg.hdcp1.bcaps & DRM_HDCP_DDC_BCAPS_KSV_FIFO_READY) ?
+			MOD_HDCP_STATUS_SUCCESS :
+			MOD_HDCP_STATUS_HDCP1_KSV_LIST_NOT_READY;
+}
+
+static inline enum mod_hdcp_status check_hdcp_capable_dp(struct mod_hdcp *hdcp)
+{
+	return (hdcp->auth.msg.hdcp1.bcaps & DP_BCAPS_HDCP_CAPABLE) ?
+			MOD_HDCP_STATUS_SUCCESS :
+			MOD_HDCP_STATUS_HDCP1_NOT_CAPABLE;
+}
+
+static inline enum mod_hdcp_status check_r0p_available_dp(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+	if (is_dp_hdcp(hdcp)) {
+		status = (hdcp->auth.msg.hdcp1.bstatus &
+				DP_BSTATUS_R0_PRIME_READY) ?
+			MOD_HDCP_STATUS_SUCCESS :
+			MOD_HDCP_STATUS_HDCP1_R0_PRIME_PENDING;
+	} else {
+		status = MOD_HDCP_STATUS_INVALID_OPERATION;
+	}
+	return status;
+}
+
+static inline enum mod_hdcp_status check_link_integrity_dp(
+		struct mod_hdcp *hdcp)
+{
+	return (hdcp->auth.msg.hdcp1.bstatus &
+			DP_BSTATUS_LINK_FAILURE) ?
+			MOD_HDCP_STATUS_HDCP1_LINK_INTEGRITY_FAILURE :
+			MOD_HDCP_STATUS_SUCCESS;
+}
+
+static inline enum mod_hdcp_status check_no_reauthentication_request_dp(
+		struct mod_hdcp *hdcp)
+{
+	return (hdcp->auth.msg.hdcp1.bstatus & DP_BSTATUS_REAUTH_REQ) ?
+			MOD_HDCP_STATUS_HDCP1_REAUTH_REQUEST_ISSUED :
+			MOD_HDCP_STATUS_SUCCESS;
+}
+
+static inline enum mod_hdcp_status check_no_max_cascade(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = DRM_HDCP_MAX_CASCADE_EXCEEDED(hdcp->auth.msg.hdcp1.binfo_dp >> 8)
+				 ? MOD_HDCP_STATUS_HDCP1_MAX_CASCADE_EXCEEDED_FAILURE
+				 : MOD_HDCP_STATUS_SUCCESS;
+	else
+		status = DRM_HDCP_MAX_CASCADE_EXCEEDED(hdcp->auth.msg.hdcp1.bstatus >> 8)
+				 ? MOD_HDCP_STATUS_HDCP1_MAX_CASCADE_EXCEEDED_FAILURE
+				 : MOD_HDCP_STATUS_SUCCESS;
+	return status;
+}
+
+static inline enum mod_hdcp_status check_no_max_devs(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = DRM_HDCP_MAX_DEVICE_EXCEEDED(hdcp->auth.msg.hdcp1.binfo_dp) ?
+				MOD_HDCP_STATUS_HDCP1_MAX_DEVS_EXCEEDED_FAILURE :
+				MOD_HDCP_STATUS_SUCCESS;
+	else
+		status = DRM_HDCP_MAX_DEVICE_EXCEEDED(hdcp->auth.msg.hdcp1.bstatus) ?
+				MOD_HDCP_STATUS_HDCP1_MAX_DEVS_EXCEEDED_FAILURE :
+				MOD_HDCP_STATUS_SUCCESS;
+	return status;
+}
+
+static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
+{
+	return is_dp_hdcp(hdcp) ?
+			DRM_HDCP_NUM_DOWNSTREAM(hdcp->auth.msg.hdcp1.binfo_dp) :
+			DRM_HDCP_NUM_DOWNSTREAM(hdcp->auth.msg.hdcp1.bstatus);
+}
+
+static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
+{
+	/* device count must be greater than or equal to tracked hdcp displays */
+	return (get_device_count(hdcp) < get_added_display_count(hdcp)) ?
+			MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE :
+			MOD_HDCP_STATUS_SUCCESS;
+}
+
+static enum mod_hdcp_status wait_for_active_rx(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_bksv,
+			&input->bksv_read, &status,
+			hdcp, "bksv_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_bcaps,
+			&input->bcaps_read, &status,
+			hdcp, "bcaps_read"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status exchange_ksvs(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_add_display_topology,
+			&input->add_topology, &status,
+			hdcp, "add_topology"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp1_create_session,
+			&input->create_session, &status,
+			hdcp, "create_session"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_write_an,
+			&input->an_write, &status,
+			hdcp, "an_write"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_write_aksv,
+			&input->aksv_write, &status,
+			hdcp, "aksv_write"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_bksv,
+			&input->bksv_read, &status,
+			hdcp, "bksv_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(validate_bksv,
+			&input->bksv_validation, &status,
+			hdcp, "bksv_validation"))
+		goto out;
+	if (hdcp->auth.msg.hdcp1.ainfo) {
+		if (!mod_hdcp_execute_and_set(mod_hdcp_write_ainfo,
+				&input->ainfo_write, &status,
+				hdcp, "ainfo_write"))
+			goto out;
+	}
+out:
+	return status;
+}
+
+static enum mod_hdcp_status computations_validate_rx_test_for_repeater(
+		struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_r0p,
+			&input->r0p_read, &status,
+			hdcp, "r0p_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp1_validate_rx,
+			&input->rx_validation, &status,
+			hdcp, "rx_validation"))
+		goto out;
+	if (hdcp->connection.is_repeater) {
+		if (!hdcp->connection.link.adjust.hdcp1.postpone_encryption)
+			if (!mod_hdcp_execute_and_set(
+					mod_hdcp_hdcp1_enable_encryption,
+					&input->encryption, &status,
+					hdcp, "encryption"))
+				goto out;
+	} else {
+		if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp1_enable_encryption,
+				&input->encryption, &status,
+				hdcp, "encryption"))
+			goto out;
+		if (is_dp_mst_hdcp(hdcp))
+			if (!mod_hdcp_execute_and_set(
+					mod_hdcp_hdcp1_enable_dp_stream_encryption,
+					&input->stream_encryption_dp, &status,
+					hdcp, "stream_encryption_dp"))
+				goto out;
+	}
+out:
+	return status;
+}
+
+static enum mod_hdcp_status authenticated(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp1_link_maintenance,
+			&input->link_maintenance, &status,
+			hdcp, "link_maintenance"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status wait_for_ready(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_CPIRQ &&
+			event_ctx->event != MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (is_dp_hdcp(hdcp)) {
+		if (!mod_hdcp_execute_and_set(mod_hdcp_read_bstatus,
+				&input->bstatus_read, &status,
+				hdcp, "bstatus_read"))
+			goto out;
+		if (!mod_hdcp_execute_and_set(check_link_integrity_dp,
+				&input->link_integiry_check, &status,
+				hdcp, "link_integiry_check"))
+			goto out;
+		if (!mod_hdcp_execute_and_set(check_no_reauthentication_request_dp,
+				&input->reauth_request_check, &status,
+				hdcp, "reauth_request_check"))
+			goto out;
+	} else {
+		if (!mod_hdcp_execute_and_set(mod_hdcp_read_bcaps,
+				&input->bcaps_read, &status,
+				hdcp, "bcaps_read"))
+			goto out;
+	}
+	if (!mod_hdcp_execute_and_set(check_ksv_ready,
+			&input->ready_check, &status,
+			hdcp, "ready_check"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status read_ksv_list(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+	uint8_t device_count;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (is_dp_hdcp(hdcp)) {
+		if (!mod_hdcp_execute_and_set(mod_hdcp_read_binfo,
+				&input->binfo_read_dp, &status,
+				hdcp, "binfo_read_dp"))
+			goto out;
+	} else {
+		if (!mod_hdcp_execute_and_set(mod_hdcp_read_bstatus,
+				&input->bstatus_read, &status,
+				hdcp, "bstatus_read"))
+			goto out;
+	}
+	if (!mod_hdcp_execute_and_set(check_no_max_cascade,
+			&input->max_cascade_check, &status,
+			hdcp, "max_cascade_check"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(check_no_max_devs,
+			&input->max_devs_check, &status,
+			hdcp, "max_devs_check"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(check_device_count,
+			&input->device_count_check, &status,
+			hdcp, "device_count_check"))
+		goto out;
+	device_count = get_device_count(hdcp);
+	hdcp->auth.msg.hdcp1.ksvlist_size = device_count*5;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_ksvlist,
+			&input->ksvlist_read, &status,
+			hdcp, "ksvlist_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_vp,
+			&input->vp_read, &status,
+			hdcp, "vp_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp1_validate_ksvlist_vp,
+			&input->ksvlist_vp_validation, &status,
+			hdcp, "ksvlist_vp_validation"))
+		goto out;
+	if (input->encryption != PASS)
+		if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp1_enable_encryption,
+				&input->encryption, &status,
+				hdcp, "encryption"))
+			goto out;
+	if (is_dp_mst_hdcp(hdcp))
+		if (!mod_hdcp_execute_and_set(
+				mod_hdcp_hdcp1_enable_dp_stream_encryption,
+				&input->stream_encryption_dp, &status,
+				hdcp, "stream_encryption_dp"))
+			goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status determine_rx_hdcp_capable_dp(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_bcaps,
+			&input->bcaps_read, &status,
+			hdcp, "bcaps_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(check_hdcp_capable_dp,
+			&input->hdcp_capable_dp, &status,
+			hdcp, "hdcp_capable_dp"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status wait_for_r0_prime_dp(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CPIRQ &&
+			event_ctx->event != MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_bstatus,
+			&input->bstatus_read, &status,
+			hdcp, "bstatus_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(check_r0p_available_dp,
+			&input->r0p_available_dp, &status,
+			hdcp, "r0p_available_dp"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status authenticated_dp(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CPIRQ) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_bstatus,
+			&input->bstatus_read, &status,
+			hdcp, "bstatus_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(check_link_integrity_dp,
+			&input->link_integiry_check, &status,
+			hdcp, "link_integiry_check"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(check_no_reauthentication_request_dp,
+			&input->reauth_request_check, &status,
+			hdcp, "reauth_request_check"))
+		goto out;
+out:
+	return status;
+}
+
+uint8_t mod_hdcp_execute_and_set(
+		mod_hdcp_action func, uint8_t *flag,
+		enum mod_hdcp_status *status, struct mod_hdcp *hdcp, char *str)
+{
+	*status = func(hdcp);
+	if (*status == MOD_HDCP_STATUS_SUCCESS && *flag != PASS) {
+		HDCP_INPUT_PASS_TRACE(hdcp, str);
+		*flag = PASS;
+	} else if (*status != MOD_HDCP_STATUS_SUCCESS && *flag != FAIL) {
+		HDCP_INPUT_FAIL_TRACE(hdcp, str);
+		*flag = FAIL;
+	}
+	return (*status == MOD_HDCP_STATUS_SUCCESS);
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp1_execution(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	switch (current_state(hdcp)) {
+	case H1_A0_WAIT_FOR_ACTIVE_RX:
+		status = wait_for_active_rx(hdcp, event_ctx, input);
+		break;
+	case H1_A1_EXCHANGE_KSVS:
+		status = exchange_ksvs(hdcp, event_ctx, input);
+		break;
+	case H1_A2_COMPUTATIONS_A3_VALIDATE_RX_A6_TEST_FOR_REPEATER:
+		status = computations_validate_rx_test_for_repeater(hdcp,
+				event_ctx, input);
+		break;
+	case H1_A45_AUTHENTICATED:
+		status = authenticated(hdcp, event_ctx, input);
+		break;
+	case H1_A8_WAIT_FOR_READY:
+		status = wait_for_ready(hdcp, event_ctx, input);
+		break;
+	case H1_A9_READ_KSV_LIST:
+		status = read_ksv_list(hdcp, event_ctx, input);
+		break;
+	default:
+		status = MOD_HDCP_STATUS_INVALID_STATE;
+		break;
+	}
+
+	return status;
+}
+
+extern enum mod_hdcp_status mod_hdcp_hdcp1_dp_execution(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	switch (current_state(hdcp)) {
+	case D1_A0_DETERMINE_RX_HDCP_CAPABLE:
+		status = determine_rx_hdcp_capable_dp(hdcp, event_ctx, input);
+		break;
+	case D1_A1_EXCHANGE_KSVS:
+		status = exchange_ksvs(hdcp, event_ctx, input);
+		break;
+	case D1_A23_WAIT_FOR_R0_PRIME:
+		status = wait_for_r0_prime_dp(hdcp, event_ctx, input);
+		break;
+	case D1_A2_COMPUTATIONS_A3_VALIDATE_RX_A5_TEST_FOR_REPEATER:
+		status = computations_validate_rx_test_for_repeater(
+				hdcp, event_ctx, input);
+		break;
+	case D1_A4_AUTHENTICATED:
+		status = authenticated_dp(hdcp, event_ctx, input);
+		break;
+	case D1_A6_WAIT_FOR_READY:
+		status = wait_for_ready(hdcp, event_ctx, input);
+		break;
+	case D1_A7_READ_KSV_LIST:
+		status = read_ksv_list(hdcp, event_ctx, input);
+		break;
+	default:
+		status = MOD_HDCP_STATUS_INVALID_STATE;
+		break;
+	}
+
+	return status;
+}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c
new file mode 100644
index 000000000000..21ebc62bb9d9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "hdcp.h"
+
+enum mod_hdcp_status mod_hdcp_hdcp1_transition(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input,
+		struct mod_hdcp_output *output)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+	struct mod_hdcp_connection *conn = &hdcp->connection;
+	struct mod_hdcp_link_adjustment *adjust = &hdcp->connection.link.adjust;
+
+	switch (current_state(hdcp)) {
+	case H1_A0_WAIT_FOR_ACTIVE_RX:
+		if (input->bksv_read != PASS || input->bcaps_read != PASS) {
+			/* 1A-04: repeatedly attempts on port access failure */
+			callback_in_ms(500, output);
+			increment_stay_counter(hdcp);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H1_A1_EXCHANGE_KSVS);
+		break;
+	case H1_A1_EXCHANGE_KSVS:
+		if (input->add_topology != PASS ||
+				input->create_session != PASS) {
+			/* out of sync with psp state */
+			adjust->hdcp1.disable = 1;
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->an_write != PASS ||
+				input->aksv_write != PASS ||
+				input->bksv_read != PASS ||
+				input->bksv_validation != PASS ||
+				input->ainfo_write == FAIL) {
+			/* 1A-05: consider invalid bksv a failure */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(300, output);
+		set_state_id(hdcp, output,
+			H1_A2_COMPUTATIONS_A3_VALIDATE_RX_A6_TEST_FOR_REPEATER);
+		break;
+	case H1_A2_COMPUTATIONS_A3_VALIDATE_RX_A6_TEST_FOR_REPEATER:
+		if (input->bcaps_read != PASS ||
+				input->r0p_read != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->rx_validation != PASS) {
+			/* 1A-06: consider invalid r0' a failure */
+			/* 1A-08: consider bksv listed in SRM a failure */
+			/*
+			 * some slow RX will fail rx validation when it is
+			 * not ready. give it more time to react before retry.
+			 */
+			fail_and_restart_in_ms(1000, &status, output);
+			break;
+		} else if (!conn->is_repeater && input->encryption != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		if (conn->is_repeater) {
+			callback_in_ms(0, output);
+			set_watchdog_in_ms(hdcp, 5000, output);
+			set_state_id(hdcp, output, H1_A8_WAIT_FOR_READY);
+		} else {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, H1_A45_AUTHENTICATED);
+			HDCP_FULL_DDC_TRACE(hdcp);
+		}
+		break;
+	case H1_A45_AUTHENTICATED:
+		if (input->link_maintenance != PASS) {
+			/* 1A-07: consider invalid ri' a failure */
+			/* 1A-07a: consider read ri' not returned a failure */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(500, output);
+		increment_stay_counter(hdcp);
+		break;
+	case H1_A8_WAIT_FOR_READY:
+		if (input->ready_check != PASS) {
+			if (event_ctx->event ==
+					MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+				/* 1B-03: fail hdcp on ksv list READY timeout */
+				/* prevent black screen in next attempt */
+				adjust->hdcp1.postpone_encryption = 1;
+				fail_and_restart_in_ms(0, &status, output);
+			} else {
+				/* continue ksv list READY polling*/
+				callback_in_ms(500, output);
+				increment_stay_counter(hdcp);
+			}
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H1_A9_READ_KSV_LIST);
+		break;
+	case H1_A9_READ_KSV_LIST:
+		if (input->bstatus_read != PASS ||
+				input->max_cascade_check != PASS ||
+				input->max_devs_check != PASS ||
+				input->device_count_check != PASS ||
+				input->ksvlist_read != PASS ||
+				input->vp_read != PASS ||
+				input->ksvlist_vp_validation != PASS ||
+				input->encryption != PASS) {
+			/* 1B-06: consider MAX_CASCADE_EXCEEDED a failure */
+			/* 1B-05: consider MAX_DEVS_EXCEEDED a failure */
+			/* 1B-04: consider invalid v' a failure */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H1_A45_AUTHENTICATED);
+		HDCP_FULL_DDC_TRACE(hdcp);
+		break;
+	default:
+		status = MOD_HDCP_STATUS_INVALID_STATE;
+		fail_and_restart_in_ms(0, &status, output);
+		break;
+	}
+
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp1_dp_transition(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp1 *input,
+		struct mod_hdcp_output *output)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+	struct mod_hdcp_connection *conn = &hdcp->connection;
+	struct mod_hdcp_link_adjustment *adjust = &hdcp->connection.link.adjust;
+
+	switch (current_state(hdcp)) {
+	case D1_A0_DETERMINE_RX_HDCP_CAPABLE:
+		if (input->bcaps_read != PASS) {
+			/* 1A-04: no authentication on bcaps read failure */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->hdcp_capable_dp != PASS) {
+			adjust->hdcp1.disable = 1;
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, D1_A1_EXCHANGE_KSVS);
+		break;
+	case D1_A1_EXCHANGE_KSVS:
+		if (input->add_topology != PASS ||
+				input->create_session != PASS) {
+			/* out of sync with psp state */
+			adjust->hdcp1.disable = 1;
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->an_write != PASS ||
+				input->aksv_write != PASS ||
+				input->bksv_read != PASS ||
+				input->bksv_validation != PASS ||
+				input->ainfo_write == FAIL) {
+			/* 1A-05: consider invalid bksv a failure */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		set_watchdog_in_ms(hdcp, 100, output);
+		set_state_id(hdcp, output, D1_A23_WAIT_FOR_R0_PRIME);
+		break;
+	case D1_A23_WAIT_FOR_R0_PRIME:
+		if (input->bstatus_read != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->r0p_available_dp != PASS) {
+			if (event_ctx->event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT)
+				fail_and_restart_in_ms(0, &status, output);
+			else
+				increment_stay_counter(hdcp);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, D1_A2_COMPUTATIONS_A3_VALIDATE_RX_A5_TEST_FOR_REPEATER);
+		break;
+	case D1_A2_COMPUTATIONS_A3_VALIDATE_RX_A5_TEST_FOR_REPEATER:
+		if (input->r0p_read != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->rx_validation != PASS) {
+			if (hdcp->state.stay_count < 2) {
+				/* allow 2 additional retries */
+				callback_in_ms(0, output);
+				increment_stay_counter(hdcp);
+			} else {
+				/*
+				 * 1A-06: consider invalid r0' a failure
+				 * after 3 attempts.
+				 * 1A-08: consider bksv listed in SRM a failure
+				 */
+				/*
+				 * some slow RX will fail rx validation when it is
+				 * not ready. give it more time to react before retry.
+				 */
+				fail_and_restart_in_ms(1000, &status, output);
+			}
+			break;
+		} else if ((!conn->is_repeater && input->encryption != PASS) ||
+				(!conn->is_repeater && is_dp_mst_hdcp(hdcp) && input->stream_encryption_dp != PASS)) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		if (conn->is_repeater) {
+			set_watchdog_in_ms(hdcp, 5000, output);
+			set_state_id(hdcp, output, D1_A6_WAIT_FOR_READY);
+		} else {
+			set_state_id(hdcp, output, D1_A4_AUTHENTICATED);
+			HDCP_FULL_DDC_TRACE(hdcp);
+		}
+		break;
+	case D1_A4_AUTHENTICATED:
+		if (input->link_integiry_check != PASS ||
+				input->reauth_request_check != PASS) {
+			/* 1A-07: restart hdcp on a link integrity failure */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		break;
+	case D1_A6_WAIT_FOR_READY:
+		if (input->link_integiry_check == FAIL ||
+				input->reauth_request_check == FAIL) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->ready_check != PASS) {
+			if (event_ctx->event ==
+					MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+				/* 1B-04: fail hdcp on ksv list READY timeout */
+				/* prevent black screen in next attempt */
+				adjust->hdcp1.postpone_encryption = 1;
+				fail_and_restart_in_ms(0, &status, output);
+			} else {
+				increment_stay_counter(hdcp);
+			}
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, D1_A7_READ_KSV_LIST);
+		break;
+	case D1_A7_READ_KSV_LIST:
+		if (input->binfo_read_dp != PASS ||
+				input->max_cascade_check != PASS ||
+				input->max_devs_check != PASS) {
+			/* 1B-06: consider MAX_DEVS_EXCEEDED a failure */
+			/* 1B-07: consider MAX_CASCADE_EXCEEDED a failure */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->device_count_check != PASS) {
+			/*
+			 * some slow dongle doesn't update
+			 * device count as soon as downstream is connected.
+			 * give it more time to react.
+			 */
+			adjust->hdcp1.postpone_encryption = 1;
+			fail_and_restart_in_ms(1000, &status, output);
+			break;
+		} else if (input->ksvlist_read != PASS ||
+				input->vp_read != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->ksvlist_vp_validation != PASS) {
+			if (hdcp->state.stay_count < 2) {
+				/* allow 2 additional retries */
+				callback_in_ms(0, output);
+				increment_stay_counter(hdcp);
+			} else {
+				/*
+				 * 1B-05: consider invalid v' a failure
+				 * after 3 attempts.
+				 */
+				fail_and_restart_in_ms(0, &status, output);
+			}
+			break;
+		} else if (input->encryption != PASS ||
+				(is_dp_mst_hdcp(hdcp) && input->stream_encryption_dp != PASS)) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		set_state_id(hdcp, output, D1_A4_AUTHENTICATED);
+		HDCP_FULL_DDC_TRACE(hdcp);
+		break;
+	default:
+		fail_and_restart_in_ms(0, &status, output);
+		break;
+	}
+
+	return status;
+}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
new file mode 100644
index 000000000000..f730b94ac3c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
@@ -0,0 +1,886 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <linux/delay.h>
+
+#include "hdcp.h"
+
+static inline enum mod_hdcp_status check_receiver_id_list_ready(struct mod_hdcp *hdcp)
+{
+	uint8_t is_ready = 0;
+
+	if (is_dp_hdcp(hdcp))
+		is_ready = HDCP_2_2_DP_RXSTATUS_READY(hdcp->auth.msg.hdcp2.rxstatus_dp) ? 1 : 0;
+	else
+		is_ready = (HDCP_2_2_HDMI_RXSTATUS_READY(hdcp->auth.msg.hdcp2.rxstatus[0]) &&
+				(HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
+						hdcp->auth.msg.hdcp2.rxstatus[0])) ? 1 : 0;
+	return is_ready ? MOD_HDCP_STATUS_SUCCESS :
+			MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY;
+}
+
+static inline enum mod_hdcp_status check_hdcp2_capable(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = (hdcp->auth.msg.hdcp2.rxcaps_dp[2] & HDCP_2_2_RX_CAPS_VERSION_VAL) &&
+				HDCP_2_2_DP_HDCP_CAPABLE(hdcp->auth.msg.hdcp2.rxcaps_dp[0]) ?
+				MOD_HDCP_STATUS_SUCCESS :
+				MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE;
+	else
+		status = (hdcp->auth.msg.hdcp2.hdcp2version_hdmi & HDCP_2_2_HDMI_SUPPORT_MASK) ?
+				MOD_HDCP_STATUS_SUCCESS :
+				MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE;
+	return status;
+}
+
+static inline enum mod_hdcp_status check_reauthentication_request(
+		struct mod_hdcp *hdcp)
+{
+	uint8_t ret = 0;
+
+	if (is_dp_hdcp(hdcp))
+		ret = HDCP_2_2_DP_RXSTATUS_REAUTH_REQ(hdcp->auth.msg.hdcp2.rxstatus_dp) ?
+				MOD_HDCP_STATUS_HDCP2_REAUTH_REQUEST :
+				MOD_HDCP_STATUS_SUCCESS;
+	else
+		ret = HDCP_2_2_HDMI_RXSTATUS_REAUTH_REQ(hdcp->auth.msg.hdcp2.rxstatus[0]) ?
+				MOD_HDCP_STATUS_HDCP2_REAUTH_REQUEST :
+				MOD_HDCP_STATUS_SUCCESS;
+	return ret;
+}
+
+static inline enum mod_hdcp_status check_link_integrity_failure_dp(
+		struct mod_hdcp *hdcp)
+{
+	return HDCP_2_2_DP_RXSTATUS_LINK_FAILED(hdcp->auth.msg.hdcp2.rxstatus_dp) ?
+			MOD_HDCP_STATUS_HDCP2_REAUTH_LINK_INTEGRITY_FAILURE :
+			MOD_HDCP_STATUS_SUCCESS;
+}
+
+static enum mod_hdcp_status check_ake_cert_available(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+	uint16_t size;
+
+	if (is_dp_hdcp(hdcp)) {
+		status = MOD_HDCP_STATUS_SUCCESS;
+	} else {
+		status = mod_hdcp_read_rxstatus(hdcp);
+		if (status == MOD_HDCP_STATUS_SUCCESS) {
+			size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
+			       hdcp->auth.msg.hdcp2.rxstatus[0];
+			status = (size == sizeof(hdcp->auth.msg.hdcp2.ake_cert)) ?
+					MOD_HDCP_STATUS_SUCCESS :
+					MOD_HDCP_STATUS_HDCP2_AKE_CERT_PENDING;
+		}
+	}
+	return status;
+}
+
+static enum mod_hdcp_status check_h_prime_available(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+	uint8_t size;
+
+	status = mod_hdcp_read_rxstatus(hdcp);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		goto out;
+
+	if (is_dp_hdcp(hdcp)) {
+		status = HDCP_2_2_DP_RXSTATUS_H_PRIME(hdcp->auth.msg.hdcp2.rxstatus_dp) ?
+				MOD_HDCP_STATUS_SUCCESS :
+				MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING;
+	} else {
+		size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
+		       hdcp->auth.msg.hdcp2.rxstatus[0];
+		status = (size == sizeof(hdcp->auth.msg.hdcp2.ake_h_prime)) ?
+				MOD_HDCP_STATUS_SUCCESS :
+				MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING;
+	}
+out:
+	return status;
+}
+
+static enum mod_hdcp_status check_pairing_info_available(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+	uint8_t size;
+
+	status = mod_hdcp_read_rxstatus(hdcp);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		goto out;
+
+	if (is_dp_hdcp(hdcp)) {
+		status = HDCP_2_2_DP_RXSTATUS_PAIRING(hdcp->auth.msg.hdcp2.rxstatus_dp) ?
+				MOD_HDCP_STATUS_SUCCESS :
+				MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING;
+	} else {
+		size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
+		       hdcp->auth.msg.hdcp2.rxstatus[0];
+		status = (size == sizeof(hdcp->auth.msg.hdcp2.ake_pairing_info)) ?
+				MOD_HDCP_STATUS_SUCCESS :
+				MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING;
+	}
+out:
+	return status;
+}
+
+static enum mod_hdcp_status poll_l_prime_available(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+	uint8_t size;
+	uint16_t max_wait = 20; // units of ms
+	uint16_t num_polls = 5;
+	uint16_t wait_time = max_wait / num_polls;
+
+	if (is_dp_hdcp(hdcp))
+		status = MOD_HDCP_STATUS_INVALID_OPERATION;
+	else
+		for (; num_polls; num_polls--) {
+			msleep(wait_time);
+
+			status = mod_hdcp_read_rxstatus(hdcp);
+			if (status != MOD_HDCP_STATUS_SUCCESS)
+				break;
+
+			size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
+			       hdcp->auth.msg.hdcp2.rxstatus[0];
+			status = (size == sizeof(hdcp->auth.msg.hdcp2.lc_l_prime)) ?
+					MOD_HDCP_STATUS_SUCCESS :
+					MOD_HDCP_STATUS_HDCP2_L_PRIME_PENDING;
+			if (status == MOD_HDCP_STATUS_SUCCESS)
+				break;
+		}
+	return status;
+}
+
+static enum mod_hdcp_status check_stream_ready_available(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+	uint8_t size;
+
+	if (is_dp_hdcp(hdcp)) {
+		status = MOD_HDCP_STATUS_INVALID_OPERATION;
+	} else {
+		status = mod_hdcp_read_rxstatus(hdcp);
+		if (status != MOD_HDCP_STATUS_SUCCESS)
+			goto out;
+		size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
+		       hdcp->auth.msg.hdcp2.rxstatus[0];
+		status = (size == sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_ready)) ?
+				MOD_HDCP_STATUS_SUCCESS :
+				MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING;
+	}
+out:
+	return status;
+}
+
+static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
+{
+	return HDCP_2_2_DEV_COUNT_LO(hdcp->auth.msg.hdcp2.rx_id_list[2]) +
+			(HDCP_2_2_DEV_COUNT_HI(hdcp->auth.msg.hdcp2.rx_id_list[1]) << 4);
+}
+
+static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
+{
+	/* device count must be greater than or equal to tracked hdcp displays */
+	return (get_device_count(hdcp) < get_added_display_count(hdcp)) ?
+			MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE :
+			MOD_HDCP_STATUS_SUCCESS;
+}
+
+static uint8_t process_rxstatus(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input,
+		enum mod_hdcp_status *status)
+{
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_rxstatus,
+			&input->rxstatus_read, status,
+			hdcp, "rxstatus_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(check_reauthentication_request,
+			&input->reauth_request_check, status,
+			hdcp, "reauth_request_check"))
+		goto out;
+	if (is_dp_hdcp(hdcp)) {
+		if (!mod_hdcp_execute_and_set(check_link_integrity_failure_dp,
+				&input->link_integrity_check_dp, status,
+				hdcp, "link_integrity_check_dp"))
+			goto out;
+	}
+	if (hdcp->connection.is_repeater)
+		if (check_receiver_id_list_ready(hdcp) ==
+				MOD_HDCP_STATUS_SUCCESS) {
+			HDCP_INPUT_PASS_TRACE(hdcp, "rx_id_list_ready");
+			event_ctx->rx_id_list_ready = 1;
+			if (is_dp_hdcp(hdcp))
+				hdcp->auth.msg.hdcp2.rx_id_list_size =
+						sizeof(hdcp->auth.msg.hdcp2.rx_id_list);
+			else
+				hdcp->auth.msg.hdcp2.rx_id_list_size =
+					HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
+					hdcp->auth.msg.hdcp2.rxstatus[0];
+		}
+out:
+	return (*status == MOD_HDCP_STATUS_SUCCESS);
+}
+
+static enum mod_hdcp_status known_hdcp2_capable_rx(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_hdcp2version,
+			&input->hdcp2version_read, &status,
+			hdcp, "hdcp2version_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(check_hdcp2_capable,
+			&input->hdcp2_capable_check, &status,
+			hdcp, "hdcp2_capable"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status send_ake_init(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+	if (!mod_hdcp_execute_and_set(mod_hdcp_add_display_topology,
+			&input->add_topology, &status,
+			hdcp, "add_topology"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_create_session,
+			&input->create_session, &status,
+			hdcp, "create_session"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_ake_init,
+			&input->ake_init_prepare, &status,
+			hdcp, "ake_init_prepare"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_write_ake_init,
+			&input->ake_init_write, &status,
+			hdcp, "ake_init_write"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status validate_ake_cert(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (is_hdmi_dvi_sl_hdcp(hdcp))
+		if (!mod_hdcp_execute_and_set(check_ake_cert_available,
+				&input->ake_cert_available, &status,
+				hdcp, "ake_cert_available"))
+			goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_ake_cert,
+			&input->ake_cert_read, &status,
+			hdcp, "ake_cert_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_validate_ake_cert,
+			&input->ake_cert_validation, &status,
+			hdcp, "ake_cert_validation"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status send_no_stored_km(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_write_no_stored_km,
+			&input->no_stored_km_write, &status,
+			hdcp, "no_stored_km_write"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status read_h_prime(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_CPIRQ &&
+			event_ctx->event != MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(check_h_prime_available,
+			&input->h_prime_available, &status,
+			hdcp, "h_prime_available"))
+		goto out;
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_h_prime,
+			&input->h_prime_read, &status,
+			hdcp, "h_prime_read"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status read_pairing_info_and_validate_h_prime(
+		struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_CPIRQ &&
+			event_ctx->event != MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(check_pairing_info_available,
+			&input->pairing_available, &status,
+			hdcp, "pairing_available"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_pairing_info,
+			&input->pairing_info_read, &status,
+			hdcp, "pairing_info_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_validate_h_prime,
+			&input->h_prime_validation, &status,
+			hdcp, "h_prime_validation"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status send_stored_km(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_write_stored_km,
+			&input->stored_km_write, &status,
+			hdcp, "stored_km_write"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status validate_h_prime(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_CPIRQ &&
+			event_ctx->event != MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(check_h_prime_available,
+			&input->h_prime_available, &status,
+			hdcp, "h_prime_available"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_h_prime,
+			&input->h_prime_read, &status,
+			hdcp, "h_prime_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_validate_h_prime,
+			&input->h_prime_validation, &status,
+			hdcp, "h_prime_validation"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_lc_init,
+			&input->lc_init_prepare, &status,
+			hdcp, "lc_init_prepare"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_write_lc_init,
+			&input->lc_init_write, &status,
+			 hdcp, "lc_init_write"))
+		goto out;
+	if (is_dp_hdcp(hdcp))
+		msleep(16);
+	else
+		if (!mod_hdcp_execute_and_set(poll_l_prime_available,
+				&input->l_prime_available_poll, &status,
+				hdcp, "l_prime_available_poll"))
+			goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_l_prime,
+			&input->l_prime_read, &status,
+			hdcp, "l_prime_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_validate_l_prime,
+			&input->l_prime_validation, &status,
+			hdcp, "l_prime_validation"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status exchange_ks_and_test_for_repeater(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_eks,
+			&input->eks_prepare, &status,
+			hdcp, "eks_prepare"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_write_eks,
+			&input->eks_write, &status,
+			hdcp, "eks_write"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status enable_encryption(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_CPIRQ) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+	if (event_ctx->event == MOD_HDCP_EVENT_CPIRQ) {
+		process_rxstatus(hdcp, event_ctx, input, &status);
+		goto out;
+	}
+
+	if (is_hdmi_dvi_sl_hdcp(hdcp)) {
+		if (!process_rxstatus(hdcp, event_ctx, input, &status))
+			goto out;
+		if (event_ctx->rx_id_list_ready)
+			goto out;
+	}
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_enable_encryption,
+			&input->enable_encryption, &status,
+			hdcp, "enable_encryption"))
+		goto out;
+	if (is_dp_mst_hdcp(hdcp)) {
+		if (!mod_hdcp_execute_and_set(
+				mod_hdcp_hdcp2_enable_dp_stream_encryption,
+				&input->stream_encryption_dp, &status,
+				hdcp, "stream_encryption_dp"))
+			goto out;
+	}
+out:
+	return status;
+}
+
+static enum mod_hdcp_status authenticated(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_CPIRQ) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!process_rxstatus(hdcp, event_ctx, input, &status))
+		goto out;
+	if (event_ctx->rx_id_list_ready)
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status wait_for_rx_id_list(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_CPIRQ &&
+			event_ctx->event != MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!process_rxstatus(hdcp, event_ctx, input, &status))
+		goto out;
+	if (!event_ctx->rx_id_list_ready) {
+		status = MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY;
+		goto out;
+	}
+out:
+	return status;
+}
+
+static enum mod_hdcp_status verify_rx_id_list_and_send_ack(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_CPIRQ) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+	if (event_ctx->event == MOD_HDCP_EVENT_CPIRQ) {
+		process_rxstatus(hdcp, event_ctx, input, &status);
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_rx_id_list,
+			&input->rx_id_list_read,
+			&status, hdcp, "receiver_id_list_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(check_device_count,
+			&input->device_count_check,
+			&status, hdcp, "device_count_check"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_validate_rx_id_list,
+			&input->rx_id_list_validation,
+			&status, hdcp, "rx_id_list_validation"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_write_repeater_auth_ack,
+			&input->repeater_auth_ack_write,
+			&status, hdcp, "repeater_auth_ack_write"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status send_stream_management(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_CPIRQ) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+	if (event_ctx->event == MOD_HDCP_EVENT_CPIRQ) {
+		process_rxstatus(hdcp, event_ctx, input, &status);
+		goto out;
+	}
+
+	if (is_hdmi_dvi_sl_hdcp(hdcp)) {
+		if (!process_rxstatus(hdcp, event_ctx, input, &status))
+			goto out;
+		if (event_ctx->rx_id_list_ready)
+			goto out;
+	}
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_stream_management,
+			&input->prepare_stream_manage,
+			&status, hdcp, "prepare_stream_manage"))
+		goto out;
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_write_stream_manage,
+			&input->stream_manage_write,
+			&status, hdcp, "stream_manage_write"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status validate_stream_ready(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_CPIRQ &&
+			event_ctx->event != MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+	if (event_ctx->event == MOD_HDCP_EVENT_CPIRQ) {
+		process_rxstatus(hdcp, event_ctx, input, &status);
+		goto out;
+	}
+
+	if (is_hdmi_dvi_sl_hdcp(hdcp)) {
+		if (!process_rxstatus(hdcp, event_ctx, input, &status))
+			goto out;
+		if (event_ctx->rx_id_list_ready) {
+			goto out;
+		}
+	}
+	if (is_hdmi_dvi_sl_hdcp(hdcp))
+		if (!mod_hdcp_execute_and_set(check_stream_ready_available,
+				&input->stream_ready_available,
+				&status, hdcp, "stream_ready_available"))
+			goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_stream_ready,
+			&input->stream_ready_read,
+			&status, hdcp, "stream_ready_read"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_validate_stream_ready,
+			&input->stream_ready_validation,
+			&status, hdcp, "stream_ready_validation"))
+		goto out;
+
+out:
+	return status;
+}
+
+static enum mod_hdcp_status determine_rx_hdcp_capable_dp(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_read_rxcaps,
+			&input->rx_caps_read_dp,
+			&status, hdcp, "rx_caps_read_dp"))
+		goto out;
+	if (!mod_hdcp_execute_and_set(check_hdcp2_capable,
+			&input->hdcp2_capable_check, &status,
+			hdcp, "hdcp2_capable_check"))
+		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status send_content_stream_type_dp(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK &&
+			event_ctx->event != MOD_HDCP_EVENT_CPIRQ) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!process_rxstatus(hdcp, event_ctx, input, &status))
+		goto out;
+	if (!mod_hdcp_execute_and_set(mod_hdcp_write_content_type,
+			&input->content_stream_type_write, &status,
+			hdcp, "content_stream_type_write"))
+		goto out;
+out:
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_execution(struct mod_hdcp *hdcp,
+	struct mod_hdcp_event_context *event_ctx,
+	struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	switch (current_state(hdcp)) {
+	case H2_A0_KNOWN_HDCP2_CAPABLE_RX:
+		status = known_hdcp2_capable_rx(hdcp, event_ctx, input);
+		break;
+	case H2_A1_SEND_AKE_INIT:
+		status = send_ake_init(hdcp, event_ctx, input);
+		break;
+	case H2_A1_VALIDATE_AKE_CERT:
+		status = validate_ake_cert(hdcp, event_ctx, input);
+		break;
+	case H2_A1_SEND_NO_STORED_KM:
+		status = send_no_stored_km(hdcp, event_ctx, input);
+		break;
+	case H2_A1_READ_H_PRIME:
+		status = read_h_prime(hdcp, event_ctx, input);
+		break;
+	case H2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME:
+		status = read_pairing_info_and_validate_h_prime(hdcp,
+				event_ctx, input);
+		break;
+	case H2_A1_SEND_STORED_KM:
+		status = send_stored_km(hdcp, event_ctx, input);
+		break;
+	case H2_A1_VALIDATE_H_PRIME:
+		status = validate_h_prime(hdcp, event_ctx, input);
+		break;
+	case H2_A2_LOCALITY_CHECK:
+		status = locality_check(hdcp, event_ctx, input);
+		break;
+	case H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER:
+		status = exchange_ks_and_test_for_repeater(hdcp, event_ctx, input);
+		break;
+	case H2_ENABLE_ENCRYPTION:
+		status = enable_encryption(hdcp, event_ctx, input);
+		break;
+	case H2_A5_AUTHENTICATED:
+		status = authenticated(hdcp, event_ctx, input);
+		break;
+	case H2_A6_WAIT_FOR_RX_ID_LIST:
+		status = wait_for_rx_id_list(hdcp, event_ctx, input);
+		break;
+	case H2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK:
+		status = verify_rx_id_list_and_send_ack(hdcp, event_ctx, input);
+		break;
+	case H2_A9_SEND_STREAM_MANAGEMENT:
+		status = send_stream_management(hdcp, event_ctx, input);
+		break;
+	case H2_A9_VALIDATE_STREAM_READY:
+		status = validate_stream_ready(hdcp, event_ctx, input);
+		break;
+	default:
+		status = MOD_HDCP_STATUS_INVALID_STATE;
+		break;
+	}
+
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_dp_execution(struct mod_hdcp *hdcp,
+	struct mod_hdcp_event_context *event_ctx,
+	struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	switch (current_state(hdcp)) {
+	case D2_A0_DETERMINE_RX_HDCP_CAPABLE:
+		status = determine_rx_hdcp_capable_dp(hdcp, event_ctx, input);
+		break;
+	case D2_A1_SEND_AKE_INIT:
+		status = send_ake_init(hdcp, event_ctx, input);
+		break;
+	case D2_A1_VALIDATE_AKE_CERT:
+		status = validate_ake_cert(hdcp, event_ctx, input);
+		break;
+	case D2_A1_SEND_NO_STORED_KM:
+		status = send_no_stored_km(hdcp, event_ctx, input);
+		break;
+	case D2_A1_READ_H_PRIME:
+		status = read_h_prime(hdcp, event_ctx, input);
+		break;
+	case D2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME:
+		status = read_pairing_info_and_validate_h_prime(hdcp,
+				event_ctx, input);
+		break;
+	case D2_A1_SEND_STORED_KM:
+		status = send_stored_km(hdcp, event_ctx, input);
+		break;
+	case D2_A1_VALIDATE_H_PRIME:
+		status = validate_h_prime(hdcp, event_ctx, input);
+		break;
+	case D2_A2_LOCALITY_CHECK:
+		status = locality_check(hdcp, event_ctx, input);
+		break;
+	case D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER:
+		status = exchange_ks_and_test_for_repeater(hdcp,
+				event_ctx, input);
+		break;
+	case D2_SEND_CONTENT_STREAM_TYPE:
+		status = send_content_stream_type_dp(hdcp, event_ctx, input);
+		break;
+	case D2_ENABLE_ENCRYPTION:
+		status = enable_encryption(hdcp, event_ctx, input);
+		break;
+	case D2_A5_AUTHENTICATED:
+		status = authenticated(hdcp, event_ctx, input);
+		break;
+	case D2_A6_WAIT_FOR_RX_ID_LIST:
+		status = wait_for_rx_id_list(hdcp, event_ctx, input);
+		break;
+	case D2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK:
+		status = verify_rx_id_list_and_send_ack(hdcp, event_ctx, input);
+		break;
+	case D2_A9_SEND_STREAM_MANAGEMENT:
+		status = send_stream_management(hdcp, event_ctx, input);
+		break;
+	case D2_A9_VALIDATE_STREAM_READY:
+		status = validate_stream_ready(hdcp, event_ctx, input);
+		break;
+	default:
+		status = MOD_HDCP_STATUS_INVALID_STATE;
+		break;
+	}
+
+	return status;
+}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
new file mode 100644
index 000000000000..8cae3e3aacd5
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
@@ -0,0 +1,679 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "hdcp.h"
+
+enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input,
+		struct mod_hdcp_output *output)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+	struct mod_hdcp_connection *conn = &hdcp->connection;
+	struct mod_hdcp_link_adjustment *adjust = &hdcp->connection.link.adjust;
+
+	switch (current_state(hdcp)) {
+	case H2_A0_KNOWN_HDCP2_CAPABLE_RX:
+		if (input->hdcp2version_read != PASS ||
+				input->hdcp2_capable_check != PASS) {
+			adjust->hdcp2.disable = 1;
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, HDCP_INITIALIZED);
+		} else {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, H2_A1_SEND_AKE_INIT);
+		}
+		break;
+	case H2_A1_SEND_AKE_INIT:
+		if (input->add_topology != PASS ||
+				input->create_session != PASS ||
+				input->ake_init_prepare != PASS) {
+			/* out of sync with psp state */
+			adjust->hdcp2.disable = 1;
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->ake_init_write != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		set_watchdog_in_ms(hdcp, 100, output);
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H2_A1_VALIDATE_AKE_CERT);
+		break;
+	case H2_A1_VALIDATE_AKE_CERT:
+		if (input->ake_cert_available != PASS) {
+			if (event_ctx->event ==
+					MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+				/* 1A-08: consider ake timeout a failure */
+				/* some hdmi receivers are not ready for HDCP
+				 * immediately after video becomes active,
+				 * delay 1s before retry on first HDCP message
+				 * timeout.
+				 */
+				fail_and_restart_in_ms(1000, &status, output);
+			} else {
+				/* continue ake cert polling*/
+				callback_in_ms(10, output);
+				increment_stay_counter(hdcp);
+			}
+			break;
+		} else if (input->ake_cert_read != PASS ||
+				input->ake_cert_validation != PASS) {
+			/*
+			 * 1A-09: consider invalid ake cert a failure
+			 * 1A-10: consider receiver id listed in SRM a failure
+			 */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		if (conn->is_km_stored &&
+				!adjust->hdcp2.force_no_stored_km) {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, H2_A1_SEND_STORED_KM);
+		} else {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, H2_A1_SEND_NO_STORED_KM);
+		}
+		break;
+	case H2_A1_SEND_NO_STORED_KM:
+		if (input->no_stored_km_write != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		if (adjust->hdcp2.increase_h_prime_timeout)
+			set_watchdog_in_ms(hdcp, 2000, output);
+		else
+			set_watchdog_in_ms(hdcp, 1000, output);
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H2_A1_READ_H_PRIME);
+		break;
+	case H2_A1_READ_H_PRIME:
+		if (input->h_prime_available != PASS) {
+			if (event_ctx->event ==
+					MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+				/* 1A-11-3: consider h' timeout a failure */
+				fail_and_restart_in_ms(1000, &status, output);
+			} else {
+				/* continue h' polling */
+				callback_in_ms(100, output);
+				increment_stay_counter(hdcp);
+			}
+			break;
+		} else if (input->h_prime_read != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		set_watchdog_in_ms(hdcp, 200, output);
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME);
+		break;
+	case H2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME:
+		if (input->pairing_available != PASS) {
+			if (event_ctx->event ==
+					MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+				/* 1A-12: consider pairing info timeout
+				 * a failure
+				 */
+				fail_and_restart_in_ms(0, &status, output);
+			} else {
+				/* continue pairing info polling */
+				callback_in_ms(20, output);
+				increment_stay_counter(hdcp);
+			}
+			break;
+		} else if (input->pairing_info_read != PASS ||
+				input->h_prime_validation != PASS) {
+			/* 1A-11-1: consider invalid h' a failure */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H2_A2_LOCALITY_CHECK);
+		break;
+	case H2_A1_SEND_STORED_KM:
+		if (input->stored_km_write != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		set_watchdog_in_ms(hdcp, 200, output);
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H2_A1_VALIDATE_H_PRIME);
+		break;
+	case H2_A1_VALIDATE_H_PRIME:
+		if (input->h_prime_available != PASS) {
+			if (event_ctx->event ==
+					MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+				/* 1A-11-2: consider h' timeout a failure */
+				fail_and_restart_in_ms(1000, &status, output);
+			} else {
+				/* continue h' polling */
+				callback_in_ms(20, output);
+				increment_stay_counter(hdcp);
+			}
+			break;
+		} else if (input->h_prime_read != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->h_prime_validation != PASS) {
+			/* 1A-11-1: consider invalid h' a failure */
+			adjust->hdcp2.force_no_stored_km = 1;
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H2_A2_LOCALITY_CHECK);
+		break;
+	case H2_A2_LOCALITY_CHECK:
+		if (hdcp->state.stay_count > 10 ||
+				input->lc_init_prepare != PASS ||
+				input->lc_init_write != PASS ||
+				input->l_prime_available_poll != PASS ||
+				input->l_prime_read != PASS) {
+			/*
+			 * 1A-05: consider disconnection after LC init a failure
+			 * 1A-13-1: consider invalid l' a failure
+			 * 1A-13-2: consider l' timeout a failure
+			 */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->l_prime_validation != PASS) {
+			callback_in_ms(0, output);
+			increment_stay_counter(hdcp);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER);
+		break;
+	case H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER:
+		if (input->eks_prepare != PASS ||
+				input->eks_write != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		if (conn->is_repeater) {
+			set_watchdog_in_ms(hdcp, 3000, output);
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, H2_A6_WAIT_FOR_RX_ID_LIST);
+		} else {
+			/* some CTS equipment requires a delay GREATER than
+			 * 200 ms, so delay 210 ms instead of 200 ms
+			 */
+			callback_in_ms(210, output);
+			set_state_id(hdcp, output, H2_ENABLE_ENCRYPTION);
+		}
+		break;
+	case H2_ENABLE_ENCRYPTION:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS) {
+			/*
+			 * 1A-07: restart hdcp on REAUTH_REQ
+			 * 1B-08: restart hdcp on REAUTH_REQ
+			 */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (event_ctx->rx_id_list_ready && conn->is_repeater) {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, H2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK);
+			break;
+		} else if (input->enable_encryption != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H2_A5_AUTHENTICATED);
+		HDCP_FULL_DDC_TRACE(hdcp);
+		break;
+	case H2_A5_AUTHENTICATED:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (event_ctx->rx_id_list_ready && conn->is_repeater) {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, H2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK);
+			break;
+		}
+		callback_in_ms(500, output);
+		increment_stay_counter(hdcp);
+		break;
+	case H2_A6_WAIT_FOR_RX_ID_LIST:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (!event_ctx->rx_id_list_ready) {
+			if (event_ctx->event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+				/* 1B-02: consider rx id list timeout a failure */
+				/* some CTS equipment's actual timeout
+				 * measurement is slightly greater than 3000 ms.
+				 * Delay 100 ms to ensure it is fully timeout
+				 * before re-authentication.
+				 */
+				fail_and_restart_in_ms(100, &status, output);
+			} else {
+				callback_in_ms(300, output);
+				increment_stay_counter(hdcp);
+			}
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK);
+		break;
+	case H2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS ||
+				input->rx_id_list_read != PASS ||
+				input->device_count_check != PASS ||
+				input->rx_id_list_validation != PASS ||
+				input->repeater_auth_ack_write != PASS) {
+			/* 1B-03: consider invalid v' a failure
+			 * 1B-04: consider MAX_DEVS_EXCEEDED a failure
+			 * 1B-05: consider MAX_CASCADE_EXCEEDED a failure
+			 * 1B-06: consider invalid seq_num_V a failure
+			 * 1B-09: consider seq_num_V rollover a failure
+			 */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H2_A9_SEND_STREAM_MANAGEMENT);
+		break;
+	case H2_A9_SEND_STREAM_MANAGEMENT:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (event_ctx->rx_id_list_ready && conn->is_repeater) {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, H2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK);
+			break;
+		} else if (input->prepare_stream_manage != PASS ||
+				input->stream_manage_write != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		set_watchdog_in_ms(hdcp, 100, output);
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, H2_A9_VALIDATE_STREAM_READY);
+		break;
+	case H2_A9_VALIDATE_STREAM_READY:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (event_ctx->rx_id_list_ready && conn->is_repeater) {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, H2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK);
+			break;
+		} else if (input->stream_ready_available != PASS) {
+			if (event_ctx->event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
+				/* 1B-10-2: restart content stream management on
+				 * stream ready timeout
+				 */
+				hdcp->auth.count.stream_management_retry_count++;
+				callback_in_ms(0, output);
+				set_state_id(hdcp, output, H2_A9_SEND_STREAM_MANAGEMENT);
+			} else {
+				callback_in_ms(10, output);
+				increment_stay_counter(hdcp);
+			}
+			break;
+		} else if (input->stream_ready_read != PASS ||
+				input->stream_ready_validation != PASS) {
+			/*
+			 * 1B-10-1: restart content stream management
+			 * on invalid M'
+			 */
+			if (hdcp->auth.count.stream_management_retry_count > 10) {
+				fail_and_restart_in_ms(0, &status, output);
+			} else {
+				hdcp->auth.count.stream_management_retry_count++;
+				callback_in_ms(0, output);
+				set_state_id(hdcp, output, H2_A9_SEND_STREAM_MANAGEMENT);
+			}
+			break;
+		}
+		callback_in_ms(200, output);
+		set_state_id(hdcp, output, H2_ENABLE_ENCRYPTION);
+		break;
+	default:
+		status = MOD_HDCP_STATUS_INVALID_STATE;
+		fail_and_restart_in_ms(0, &status, output);
+		break;
+	}
+
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input,
+		struct mod_hdcp_output *output)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+	struct mod_hdcp_connection *conn = &hdcp->connection;
+	struct mod_hdcp_link_adjustment *adjust = &hdcp->connection.link.adjust;
+
+	switch (current_state(hdcp)) {
+	case D2_A0_DETERMINE_RX_HDCP_CAPABLE:
+		if (input->rx_caps_read_dp != PASS ||
+				input->hdcp2_capable_check != PASS) {
+			adjust->hdcp2.disable = 1;
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, HDCP_INITIALIZED);
+		} else {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, D2_A1_SEND_AKE_INIT);
+		}
+		break;
+	case D2_A1_SEND_AKE_INIT:
+		if (input->add_topology != PASS ||
+				input->create_session != PASS ||
+				input->ake_init_prepare != PASS) {
+			/* out of sync with psp state */
+			adjust->hdcp2.disable = 1;
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->ake_init_write != PASS) {
+			/* possibly display not ready */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(100, output);
+		set_state_id(hdcp, output, D2_A1_VALIDATE_AKE_CERT);
+		break;
+	case D2_A1_VALIDATE_AKE_CERT:
+		if (input->ake_cert_read != PASS ||
+				input->ake_cert_validation != PASS) {
+			/*
+			 * 1A-08: consider invalid ake cert a failure
+			 * 1A-09: consider receiver id listed in SRM a failure
+			 */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		if (conn->is_km_stored &&
+				!adjust->hdcp2.force_no_stored_km) {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, D2_A1_SEND_STORED_KM);
+		} else {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, D2_A1_SEND_NO_STORED_KM);
+		}
+		break;
+	case D2_A1_SEND_NO_STORED_KM:
+		if (input->no_stored_km_write != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		if (adjust->hdcp2.increase_h_prime_timeout)
+			set_watchdog_in_ms(hdcp, 2000, output);
+		else
+			set_watchdog_in_ms(hdcp, 1000, output);
+		set_state_id(hdcp, output, D2_A1_READ_H_PRIME);
+		break;
+	case D2_A1_READ_H_PRIME:
+		if (input->h_prime_available != PASS) {
+			if (event_ctx->event ==
+					MOD_HDCP_EVENT_WATCHDOG_TIMEOUT)
+				/* 1A-10-3: consider h' timeout a failure */
+				fail_and_restart_in_ms(1000, &status, output);
+			else
+				increment_stay_counter(hdcp);
+			break;
+		} else if (input->h_prime_read != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		set_watchdog_in_ms(hdcp, 200, output);
+		set_state_id(hdcp, output, D2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME);
+		break;
+	case D2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME:
+		if (input->pairing_available != PASS) {
+			if (event_ctx->event ==
+					MOD_HDCP_EVENT_WATCHDOG_TIMEOUT)
+				/*
+				 * 1A-11: consider pairing info timeout
+				 * a failure
+				 */
+				fail_and_restart_in_ms(0, &status, output);
+			else
+				increment_stay_counter(hdcp);
+			break;
+		} else if (input->pairing_info_read != PASS ||
+				input->h_prime_validation != PASS) {
+			/* 1A-10-1: consider invalid h' a failure */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, D2_A2_LOCALITY_CHECK);
+		break;
+	case D2_A1_SEND_STORED_KM:
+		if (input->stored_km_write != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		set_watchdog_in_ms(hdcp, 200, output);
+		set_state_id(hdcp, output, D2_A1_VALIDATE_H_PRIME);
+		break;
+	case D2_A1_VALIDATE_H_PRIME:
+		if (input->h_prime_available != PASS) {
+			if (event_ctx->event ==
+					MOD_HDCP_EVENT_WATCHDOG_TIMEOUT)
+				/* 1A-10-2: consider h' timeout a failure */
+				fail_and_restart_in_ms(1000, &status, output);
+			else
+				increment_stay_counter(hdcp);
+			break;
+		} else if (input->h_prime_read != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->h_prime_validation != PASS) {
+			/* 1A-10-1: consider invalid h' a failure */
+			adjust->hdcp2.force_no_stored_km = 1;
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, D2_A2_LOCALITY_CHECK);
+		break;
+	case D2_A2_LOCALITY_CHECK:
+		if (hdcp->state.stay_count > 10 ||
+				input->lc_init_prepare != PASS ||
+				input->lc_init_write != PASS ||
+				input->l_prime_read != PASS) {
+			/* 1A-12: consider invalid l' a failure */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->l_prime_validation != PASS) {
+			callback_in_ms(0, output);
+			increment_stay_counter(hdcp);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER);
+		break;
+	case D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER:
+		if (input->eks_prepare != PASS ||
+				input->eks_write != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		if (conn->is_repeater) {
+			set_watchdog_in_ms(hdcp, 3000, output);
+			set_state_id(hdcp, output, D2_A6_WAIT_FOR_RX_ID_LIST);
+		} else {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, D2_SEND_CONTENT_STREAM_TYPE);
+		}
+		break;
+	case D2_SEND_CONTENT_STREAM_TYPE:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS ||
+				input->link_integrity_check_dp != PASS ||
+				input->content_stream_type_write != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(210, output);
+		set_state_id(hdcp, output, D2_ENABLE_ENCRYPTION);
+		break;
+	case D2_ENABLE_ENCRYPTION:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS ||
+				input->link_integrity_check_dp != PASS) {
+			/*
+			 * 1A-07: restart hdcp on REAUTH_REQ
+			 * 1B-08: restart hdcp on REAUTH_REQ
+			 */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (event_ctx->rx_id_list_ready && conn->is_repeater) {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, D2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK);
+			break;
+		} else if (input->enable_encryption != PASS ||
+				(is_dp_mst_hdcp(hdcp) && input->stream_encryption_dp != PASS)) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		set_state_id(hdcp, output, D2_A5_AUTHENTICATED);
+		HDCP_FULL_DDC_TRACE(hdcp);
+		break;
+	case D2_A5_AUTHENTICATED:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->link_integrity_check_dp != PASS) {
+			if (hdcp->connection.hdcp2_retry_count >= 1)
+				adjust->hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0;
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (event_ctx->rx_id_list_ready && conn->is_repeater) {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, D2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK);
+			break;
+		}
+		increment_stay_counter(hdcp);
+		break;
+	case D2_A6_WAIT_FOR_RX_ID_LIST:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS ||
+				input->link_integrity_check_dp != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (!event_ctx->rx_id_list_ready) {
+			if (event_ctx->event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT)
+				/* 1B-02: consider rx id list timeout a failure */
+				fail_and_restart_in_ms(0, &status, output);
+			else
+				increment_stay_counter(hdcp);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, D2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK);
+		break;
+	case D2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS ||
+				input->link_integrity_check_dp != PASS ||
+				input->rx_id_list_read != PASS ||
+				input->device_count_check != PASS ||
+				input->rx_id_list_validation != PASS ||
+				input->repeater_auth_ack_write != PASS) {
+			/*
+			 * 1B-03: consider invalid v' a failure
+			 * 1B-04: consider MAX_DEVS_EXCEEDED a failure
+			 * 1B-05: consider MAX_CASCADE_EXCEEDED a failure
+			 * 1B-06: consider invalid seq_num_V a failure
+			 * 1B-09: consider seq_num_V rollover a failure
+			 */
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		}
+		callback_in_ms(0, output);
+		set_state_id(hdcp, output, D2_A9_SEND_STREAM_MANAGEMENT);
+		break;
+	case D2_A9_SEND_STREAM_MANAGEMENT:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS ||
+				input->link_integrity_check_dp != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (event_ctx->rx_id_list_ready) {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, D2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK);
+			break;
+		} else if (input->prepare_stream_manage != PASS ||
+				input->stream_manage_write != PASS) {
+			if (event_ctx->event == MOD_HDCP_EVENT_CALLBACK)
+				fail_and_restart_in_ms(0, &status, output);
+			else
+				increment_stay_counter(hdcp);
+			break;
+		}
+		callback_in_ms(100, output);
+		set_state_id(hdcp, output, D2_A9_VALIDATE_STREAM_READY);
+		break;
+	case D2_A9_VALIDATE_STREAM_READY:
+		if (input->rxstatus_read != PASS ||
+				input->reauth_request_check != PASS ||
+				input->link_integrity_check_dp != PASS) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (event_ctx->rx_id_list_ready) {
+			callback_in_ms(0, output);
+			set_state_id(hdcp, output, D2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK);
+			break;
+		} else if (input->stream_ready_read != PASS ||
+				input->stream_ready_validation != PASS) {
+			/*
+			 * 1B-10-1: restart content stream management
+			 * on invalid M'
+			 * 1B-10-2: consider stream ready timeout a failure
+			 */
+			if (hdcp->auth.count.stream_management_retry_count > 10) {
+				fail_and_restart_in_ms(0, &status, output);
+			} else if (event_ctx->event == MOD_HDCP_EVENT_CALLBACK) {
+				hdcp->auth.count.stream_management_retry_count++;
+				callback_in_ms(0, output);
+				set_state_id(hdcp, output, D2_A9_SEND_STREAM_MANAGEMENT);
+			} else {
+				increment_stay_counter(hdcp);
+			}
+			break;
+		}
+		callback_in_ms(200, output);
+		set_state_id(hdcp, output, D2_ENABLE_ENCRYPTION);
+		break;
+	default:
+		status = MOD_HDCP_STATUS_INVALID_STATE;
+		fail_and_restart_in_ms(0, &status, output);
+		break;
+	}
+	return status;
+}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c
new file mode 100644
index 000000000000..ff9d54812e62
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c
@@ -0,0 +1,631 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "hdcp.h"
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define HDCP_I2C_ADDR 0x3a	/* 0x74 >> 1*/
+#define KSV_READ_SIZE 0xf	/* 0x6803b - 0x6802c */
+#define HDCP_MAX_AUX_TRANSACTION_SIZE 16
+
+enum mod_hdcp_ddc_message_id {
+	MOD_HDCP_MESSAGE_ID_INVALID = -1,
+
+	/* HDCP 1.4 */
+
+	MOD_HDCP_MESSAGE_ID_READ_BKSV = 0,
+	MOD_HDCP_MESSAGE_ID_READ_RI_R0,
+	MOD_HDCP_MESSAGE_ID_WRITE_AKSV,
+	MOD_HDCP_MESSAGE_ID_WRITE_AINFO,
+	MOD_HDCP_MESSAGE_ID_WRITE_AN,
+	MOD_HDCP_MESSAGE_ID_READ_VH_X,
+	MOD_HDCP_MESSAGE_ID_READ_VH_0,
+	MOD_HDCP_MESSAGE_ID_READ_VH_1,
+	MOD_HDCP_MESSAGE_ID_READ_VH_2,
+	MOD_HDCP_MESSAGE_ID_READ_VH_3,
+	MOD_HDCP_MESSAGE_ID_READ_VH_4,
+	MOD_HDCP_MESSAGE_ID_READ_BCAPS,
+	MOD_HDCP_MESSAGE_ID_READ_BSTATUS,
+	MOD_HDCP_MESSAGE_ID_READ_KSV_FIFO,
+	MOD_HDCP_MESSAGE_ID_READ_BINFO,
+
+	/* HDCP 2.2 */
+
+	MOD_HDCP_MESSAGE_ID_HDCP2VERSION,
+	MOD_HDCP_MESSAGE_ID_RX_CAPS,
+	MOD_HDCP_MESSAGE_ID_WRITE_AKE_INIT,
+	MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_CERT,
+	MOD_HDCP_MESSAGE_ID_WRITE_AKE_NO_STORED_KM,
+	MOD_HDCP_MESSAGE_ID_WRITE_AKE_STORED_KM,
+	MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_H_PRIME,
+	MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_PAIRING_INFO,
+	MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT,
+	MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME,
+	MOD_HDCP_MESSAGE_ID_WRITE_SKE_SEND_EKS,
+	MOD_HDCP_MESSAGE_ID_READ_REPEATER_AUTH_SEND_RECEIVERID_LIST,
+	MOD_HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_SEND_ACK,
+	MOD_HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_STREAM_MANAGE,
+	MOD_HDCP_MESSAGE_ID_READ_REPEATER_AUTH_STREAM_READY,
+	MOD_HDCP_MESSAGE_ID_READ_RXSTATUS,
+	MOD_HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE,
+
+	MOD_HDCP_MESSAGE_ID_MAX
+};
+
+static const uint8_t hdcp_i2c_offsets[] = {
+	[MOD_HDCP_MESSAGE_ID_READ_BKSV] = 0x0,
+	[MOD_HDCP_MESSAGE_ID_READ_RI_R0] = 0x8,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AKSV] = 0x10,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AINFO] = 0x15,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AN] = 0x18,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_X] = 0x20,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_0] = 0x20,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_1] = 0x24,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_2] = 0x28,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_3] = 0x2C,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_4] = 0x30,
+	[MOD_HDCP_MESSAGE_ID_READ_BCAPS] = 0x40,
+	[MOD_HDCP_MESSAGE_ID_READ_BSTATUS] = 0x41,
+	[MOD_HDCP_MESSAGE_ID_READ_KSV_FIFO] = 0x43,
+	[MOD_HDCP_MESSAGE_ID_READ_BINFO] = 0xFF,
+	[MOD_HDCP_MESSAGE_ID_HDCP2VERSION] = 0x50,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AKE_INIT] = 0x60,
+	[MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_CERT] = 0x80,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AKE_NO_STORED_KM] = 0x60,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AKE_STORED_KM] = 0x60,
+	[MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_H_PRIME] = 0x80,
+	[MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_PAIRING_INFO] = 0x80,
+	[MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT] = 0x60,
+	[MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME] = 0x80,
+	[MOD_HDCP_MESSAGE_ID_WRITE_SKE_SEND_EKS] = 0x60,
+	[MOD_HDCP_MESSAGE_ID_READ_REPEATER_AUTH_SEND_RECEIVERID_LIST] = 0x80,
+	[MOD_HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_SEND_ACK] = 0x60,
+	[MOD_HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_STREAM_MANAGE] = 0x60,
+	[MOD_HDCP_MESSAGE_ID_READ_REPEATER_AUTH_STREAM_READY] = 0x80,
+	[MOD_HDCP_MESSAGE_ID_READ_RXSTATUS] = 0x70,
+	[MOD_HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE] = 0x0
+};
+
+static const uint32_t hdcp_dpcd_addrs[] = {
+	[MOD_HDCP_MESSAGE_ID_READ_BKSV] = 0x68000,
+	[MOD_HDCP_MESSAGE_ID_READ_RI_R0] = 0x68005,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AKSV] = 0x68007,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AINFO] = 0x6803B,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AN] = 0x6800c,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_X] = 0x68014,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_0] = 0x68014,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_1] = 0x68018,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_2] = 0x6801c,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_3] = 0x68020,
+	[MOD_HDCP_MESSAGE_ID_READ_VH_4] = 0x68024,
+	[MOD_HDCP_MESSAGE_ID_READ_BCAPS] = 0x68028,
+	[MOD_HDCP_MESSAGE_ID_READ_BSTATUS] = 0x68029,
+	[MOD_HDCP_MESSAGE_ID_READ_KSV_FIFO] = 0x6802c,
+	[MOD_HDCP_MESSAGE_ID_READ_BINFO] = 0x6802a,
+	[MOD_HDCP_MESSAGE_ID_RX_CAPS] = 0x6921d,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AKE_INIT] = 0x69000,
+	[MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_CERT] = 0x6900b,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AKE_NO_STORED_KM] = 0x69220,
+	[MOD_HDCP_MESSAGE_ID_WRITE_AKE_STORED_KM] = 0x692a0,
+	[MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_H_PRIME] = 0x692c0,
+	[MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_PAIRING_INFO] = 0x692e0,
+	[MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT] = 0x692f0,
+	[MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME] = 0x692f8,
+	[MOD_HDCP_MESSAGE_ID_WRITE_SKE_SEND_EKS] = 0x69318,
+	[MOD_HDCP_MESSAGE_ID_READ_REPEATER_AUTH_SEND_RECEIVERID_LIST] = 0x69330,
+	[MOD_HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_SEND_ACK] = 0x693e0,
+	[MOD_HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_STREAM_MANAGE] = 0x693f0,
+	[MOD_HDCP_MESSAGE_ID_READ_REPEATER_AUTH_STREAM_READY] = 0x69473,
+	[MOD_HDCP_MESSAGE_ID_READ_RXSTATUS] = 0x69493,
+	[MOD_HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE] = 0x69494
+};
+
+static enum mod_hdcp_status read(struct mod_hdcp *hdcp,
+		enum mod_hdcp_ddc_message_id msg_id,
+		uint8_t *buf,
+		uint32_t buf_len)
+{
+	bool success = true;
+	uint32_t cur_size = 0;
+	uint32_t data_offset = 0;
+
+	if (is_dp_hdcp(hdcp)) {
+		while (buf_len > 0) {
+			cur_size = MIN(buf_len, HDCP_MAX_AUX_TRANSACTION_SIZE);
+			success = hdcp->config.ddc.funcs.read_dpcd(hdcp->config.ddc.handle,
+					hdcp_dpcd_addrs[msg_id] + data_offset,
+					buf + data_offset,
+					cur_size);
+
+			if (!success)
+				break;
+
+			buf_len -= cur_size;
+			data_offset += cur_size;
+		}
+	} else {
+		success = hdcp->config.ddc.funcs.read_i2c(
+				hdcp->config.ddc.handle,
+				HDCP_I2C_ADDR,
+				hdcp_i2c_offsets[msg_id],
+				buf,
+				(uint32_t)buf_len);
+	}
+
+	return success ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_DDC_FAILURE;
+}
+
+static enum mod_hdcp_status read_repeatedly(struct mod_hdcp *hdcp,
+		enum mod_hdcp_ddc_message_id msg_id,
+		uint8_t *buf,
+		uint32_t buf_len,
+		uint8_t read_size)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_DDC_FAILURE;
+	uint32_t cur_size = 0;
+	uint32_t data_offset = 0;
+
+	while (buf_len > 0) {
+		cur_size = MIN(buf_len, read_size);
+		status = read(hdcp, msg_id, buf + data_offset, cur_size);
+
+		if (status != MOD_HDCP_STATUS_SUCCESS)
+			break;
+
+		buf_len -= cur_size;
+		data_offset += cur_size;
+	}
+
+	return status;
+}
+
+static enum mod_hdcp_status write(struct mod_hdcp *hdcp,
+		enum mod_hdcp_ddc_message_id msg_id,
+		uint8_t *buf,
+		uint32_t buf_len)
+{
+	bool success = true;
+	uint32_t cur_size = 0;
+	uint32_t data_offset = 0;
+
+	if (is_dp_hdcp(hdcp)) {
+		while (buf_len > 0) {
+			cur_size = MIN(buf_len, HDCP_MAX_AUX_TRANSACTION_SIZE);
+			success = hdcp->config.ddc.funcs.write_dpcd(
+					hdcp->config.ddc.handle,
+					hdcp_dpcd_addrs[msg_id] + data_offset,
+					buf + data_offset,
+					cur_size);
+
+			if (!success)
+				break;
+
+			buf_len -= cur_size;
+			data_offset += cur_size;
+		}
+	} else {
+		hdcp->buf[0] = hdcp_i2c_offsets[msg_id];
+		memmove(&hdcp->buf[1], buf, buf_len);
+		success = hdcp->config.ddc.funcs.write_i2c(
+				hdcp->config.ddc.handle,
+				HDCP_I2C_ADDR,
+				hdcp->buf,
+				(uint32_t)(buf_len+1));
+	}
+
+	return success ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_DDC_FAILURE;
+}
+
+enum mod_hdcp_status mod_hdcp_read_bksv(struct mod_hdcp *hdcp)
+{
+	return read(hdcp, MOD_HDCP_MESSAGE_ID_READ_BKSV,
+			hdcp->auth.msg.hdcp1.bksv,
+			sizeof(hdcp->auth.msg.hdcp1.bksv));
+}
+
+enum mod_hdcp_status mod_hdcp_read_bcaps(struct mod_hdcp *hdcp)
+{
+	return read(hdcp, MOD_HDCP_MESSAGE_ID_READ_BCAPS,
+			&hdcp->auth.msg.hdcp1.bcaps,
+			sizeof(hdcp->auth.msg.hdcp1.bcaps));
+}
+
+enum mod_hdcp_status mod_hdcp_read_bstatus(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_BSTATUS,
+					(uint8_t *)&hdcp->auth.msg.hdcp1.bstatus,
+					1);
+	else
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_BSTATUS,
+				(uint8_t *)&hdcp->auth.msg.hdcp1.bstatus,
+				sizeof(hdcp->auth.msg.hdcp1.bstatus));
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_read_r0p(struct mod_hdcp *hdcp)
+{
+	return read(hdcp, MOD_HDCP_MESSAGE_ID_READ_RI_R0,
+			(uint8_t *)&hdcp->auth.msg.hdcp1.r0p,
+			sizeof(hdcp->auth.msg.hdcp1.r0p));
+}
+
+/* special case, reading repeatedly at the same address, don't use read() */
+enum mod_hdcp_status mod_hdcp_read_ksvlist(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = read_repeatedly(hdcp, MOD_HDCP_MESSAGE_ID_READ_KSV_FIFO,
+				hdcp->auth.msg.hdcp1.ksvlist,
+				hdcp->auth.msg.hdcp1.ksvlist_size,
+				KSV_READ_SIZE);
+	else
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_KSV_FIFO,
+				(uint8_t *)&hdcp->auth.msg.hdcp1.ksvlist,
+				hdcp->auth.msg.hdcp1.ksvlist_size);
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_read_vp(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_VH_0,
+			&hdcp->auth.msg.hdcp1.vp[0], 4);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		goto out;
+
+	status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_VH_1,
+			&hdcp->auth.msg.hdcp1.vp[4], 4);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		goto out;
+
+	status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_VH_2,
+			&hdcp->auth.msg.hdcp1.vp[8], 4);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		goto out;
+
+	status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_VH_3,
+			&hdcp->auth.msg.hdcp1.vp[12], 4);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		goto out;
+
+	status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_VH_4,
+			&hdcp->auth.msg.hdcp1.vp[16], 4);
+out:
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_read_binfo(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_BINFO,
+				(uint8_t *)&hdcp->auth.msg.hdcp1.binfo_dp,
+				sizeof(hdcp->auth.msg.hdcp1.binfo_dp));
+	else
+		status = MOD_HDCP_STATUS_INVALID_OPERATION;
+
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_write_aksv(struct mod_hdcp *hdcp)
+{
+	return write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_AKSV,
+			hdcp->auth.msg.hdcp1.aksv,
+			sizeof(hdcp->auth.msg.hdcp1.aksv));
+}
+
+enum mod_hdcp_status mod_hdcp_write_ainfo(struct mod_hdcp *hdcp)
+{
+	return write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_AINFO,
+			&hdcp->auth.msg.hdcp1.ainfo,
+			sizeof(hdcp->auth.msg.hdcp1.ainfo));
+}
+
+enum mod_hdcp_status mod_hdcp_write_an(struct mod_hdcp *hdcp)
+{
+	return write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_AN,
+			hdcp->auth.msg.hdcp1.an,
+			sizeof(hdcp->auth.msg.hdcp1.an));
+}
+
+enum mod_hdcp_status mod_hdcp_read_hdcp2version(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = MOD_HDCP_STATUS_INVALID_OPERATION;
+	else
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_HDCP2VERSION,
+				&hdcp->auth.msg.hdcp2.hdcp2version_hdmi,
+				sizeof(hdcp->auth.msg.hdcp2.hdcp2version_hdmi));
+
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_read_rxcaps(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (!is_dp_hdcp(hdcp))
+		status = MOD_HDCP_STATUS_INVALID_OPERATION;
+	else
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_RX_CAPS,
+				hdcp->auth.msg.hdcp2.rxcaps_dp,
+				sizeof(hdcp->auth.msg.hdcp2.rxcaps_dp));
+
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_read_rxstatus(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp)) {
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_RXSTATUS,
+				&hdcp->auth.msg.hdcp2.rxstatus_dp,
+				1);
+	} else {
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_RXSTATUS,
+					(uint8_t *)&hdcp->auth.msg.hdcp2.rxstatus,
+					sizeof(hdcp->auth.msg.hdcp2.rxstatus));
+	}
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_read_ake_cert(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp)) {
+		hdcp->auth.msg.hdcp2.ake_cert[0] = 3;
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_CERT,
+				hdcp->auth.msg.hdcp2.ake_cert+1,
+				sizeof(hdcp->auth.msg.hdcp2.ake_cert)-1);
+
+	} else {
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_CERT,
+					hdcp->auth.msg.hdcp2.ake_cert,
+					sizeof(hdcp->auth.msg.hdcp2.ake_cert));
+	}
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_read_h_prime(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp)) {
+		hdcp->auth.msg.hdcp2.ake_h_prime[0] = 7;
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_H_PRIME,
+				hdcp->auth.msg.hdcp2.ake_h_prime+1,
+				sizeof(hdcp->auth.msg.hdcp2.ake_h_prime)-1);
+
+	} else {
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_H_PRIME,
+				hdcp->auth.msg.hdcp2.ake_h_prime,
+				sizeof(hdcp->auth.msg.hdcp2.ake_h_prime));
+	}
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_read_pairing_info(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp)) {
+		hdcp->auth.msg.hdcp2.ake_pairing_info[0] = 8;
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_PAIRING_INFO,
+				hdcp->auth.msg.hdcp2.ake_pairing_info+1,
+				sizeof(hdcp->auth.msg.hdcp2.ake_pairing_info)-1);
+
+	} else {
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_AKE_SEND_PAIRING_INFO,
+				hdcp->auth.msg.hdcp2.ake_pairing_info,
+				sizeof(hdcp->auth.msg.hdcp2.ake_pairing_info));
+	}
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_read_l_prime(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp)) {
+		hdcp->auth.msg.hdcp2.lc_l_prime[0] = 10;
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME,
+				hdcp->auth.msg.hdcp2.lc_l_prime+1,
+				sizeof(hdcp->auth.msg.hdcp2.lc_l_prime)-1);
+
+	} else {
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME,
+				hdcp->auth.msg.hdcp2.lc_l_prime,
+				sizeof(hdcp->auth.msg.hdcp2.lc_l_prime));
+	}
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_read_rx_id_list(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp)) {
+		hdcp->auth.msg.hdcp2.rx_id_list[0] = 12;
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_REPEATER_AUTH_SEND_RECEIVERID_LIST,
+				hdcp->auth.msg.hdcp2.rx_id_list+1,
+				sizeof(hdcp->auth.msg.hdcp2.rx_id_list)-1);
+
+	} else {
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_REPEATER_AUTH_SEND_RECEIVERID_LIST,
+				hdcp->auth.msg.hdcp2.rx_id_list,
+				hdcp->auth.msg.hdcp2.rx_id_list_size);
+	}
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_read_stream_ready(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp)) {
+		hdcp->auth.msg.hdcp2.repeater_auth_stream_ready[0] = 17;
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_REPEATER_AUTH_STREAM_READY,
+				hdcp->auth.msg.hdcp2.repeater_auth_stream_ready+1,
+				sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_ready)-1);
+
+	} else {
+		status = read(hdcp, MOD_HDCP_MESSAGE_ID_READ_REPEATER_AUTH_STREAM_READY,
+				hdcp->auth.msg.hdcp2.repeater_auth_stream_ready,
+				sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_ready));
+	}
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_write_ake_init(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_AKE_INIT,
+				hdcp->auth.msg.hdcp2.ake_init+1,
+				sizeof(hdcp->auth.msg.hdcp2.ake_init)-1);
+	else
+		status = write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_AKE_INIT,
+					hdcp->auth.msg.hdcp2.ake_init,
+					sizeof(hdcp->auth.msg.hdcp2.ake_init));
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_write_no_stored_km(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_AKE_NO_STORED_KM,
+				hdcp->auth.msg.hdcp2.ake_no_stored_km+1,
+				sizeof(hdcp->auth.msg.hdcp2.ake_no_stored_km)-1);
+	else
+		status = write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_AKE_NO_STORED_KM,
+			hdcp->auth.msg.hdcp2.ake_no_stored_km,
+			sizeof(hdcp->auth.msg.hdcp2.ake_no_stored_km));
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_write_stored_km(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_AKE_STORED_KM,
+				hdcp->auth.msg.hdcp2.ake_stored_km+1,
+				sizeof(hdcp->auth.msg.hdcp2.ake_stored_km)-1);
+	else
+		status = write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_AKE_STORED_KM,
+				hdcp->auth.msg.hdcp2.ake_stored_km,
+				sizeof(hdcp->auth.msg.hdcp2.ake_stored_km));
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_write_lc_init(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT,
+				hdcp->auth.msg.hdcp2.lc_init+1,
+				sizeof(hdcp->auth.msg.hdcp2.lc_init)-1);
+	else
+		status = write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT,
+				hdcp->auth.msg.hdcp2.lc_init,
+				sizeof(hdcp->auth.msg.hdcp2.lc_init));
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_write_eks(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = write(hdcp,
+				MOD_HDCP_MESSAGE_ID_WRITE_SKE_SEND_EKS,
+				hdcp->auth.msg.hdcp2.ske_eks+1,
+				sizeof(hdcp->auth.msg.hdcp2.ske_eks)-1);
+	else
+		status = write(hdcp,
+			MOD_HDCP_MESSAGE_ID_WRITE_SKE_SEND_EKS,
+			hdcp->auth.msg.hdcp2.ske_eks,
+			sizeof(hdcp->auth.msg.hdcp2.ske_eks));
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_write_repeater_auth_ack(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_SEND_ACK,
+				hdcp->auth.msg.hdcp2.repeater_auth_ack+1,
+				sizeof(hdcp->auth.msg.hdcp2.repeater_auth_ack)-1);
+	else
+		status = write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_SEND_ACK,
+				hdcp->auth.msg.hdcp2.repeater_auth_ack,
+				sizeof(hdcp->auth.msg.hdcp2.repeater_auth_ack));
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_write_stream_manage(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = write(hdcp,
+				MOD_HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_STREAM_MANAGE,
+				hdcp->auth.msg.hdcp2.repeater_auth_stream_manage+1,
+				hdcp->auth.msg.hdcp2.stream_manage_size-1);
+	else
+		status = write(hdcp,
+				MOD_HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_STREAM_MANAGE,
+				hdcp->auth.msg.hdcp2.repeater_auth_stream_manage,
+				hdcp->auth.msg.hdcp2.stream_manage_size);
+	return status;
+}
+
+enum mod_hdcp_status mod_hdcp_write_content_type(struct mod_hdcp *hdcp)
+{
+	enum mod_hdcp_status status;
+
+	if (is_dp_hdcp(hdcp))
+		status = write(hdcp, MOD_HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE,
+				hdcp->auth.msg.hdcp2.content_stream_type_dp+1,
+				sizeof(hdcp->auth.msg.hdcp2.content_stream_type_dp)-1);
+	else
+		status = MOD_HDCP_STATUS_INVALID_OPERATION;
+	return status;
+}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c
new file mode 100644
index 000000000000..724ebcee9a19
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "hdcp.h"
+
+void mod_hdcp_dump_binary_message(uint8_t *msg, uint32_t msg_size,
+		uint8_t *buf, uint32_t buf_size)
+{
+	const uint8_t bytes_per_line = 16,
+			byte_size = 3,
+			newline_size = 1,
+			terminator_size = 1;
+	uint32_t line_count = msg_size / bytes_per_line,
+			trailing_bytes = msg_size % bytes_per_line;
+	uint32_t target_size = (byte_size * bytes_per_line + newline_size) * line_count +
+			byte_size * trailing_bytes + newline_size + terminator_size;
+	uint32_t buf_pos = 0;
+	uint32_t i = 0;
+
+	if (buf_size >= target_size) {
+		for (i = 0; i < msg_size; i++) {
+			if (i % bytes_per_line == 0)
+				buf[buf_pos++] = '\n';
+			sprintf(&buf[buf_pos], "%02X ", msg[i]);
+			buf_pos += byte_size;
+		}
+		buf[buf_pos++] = '\0';
+	}
+}
+
+char *mod_hdcp_status_to_str(int32_t status)
+{
+	switch (status) {
+	case MOD_HDCP_STATUS_SUCCESS:
+		return "MOD_HDCP_STATUS_SUCCESS";
+	case MOD_HDCP_STATUS_FAILURE:
+		return "MOD_HDCP_STATUS_FAILURE";
+	case MOD_HDCP_STATUS_RESET_NEEDED:
+		return "MOD_HDCP_STATUS_RESET_NEEDED";
+	case MOD_HDCP_STATUS_DISPLAY_OUT_OF_BOUND:
+		return "MOD_HDCP_STATUS_DISPLAY_OUT_OF_BOUND";
+	case MOD_HDCP_STATUS_DISPLAY_NOT_FOUND:
+		return "MOD_HDCP_STATUS_DISPLAY_NOT_FOUND";
+	case MOD_HDCP_STATUS_INVALID_STATE:
+		return "MOD_HDCP_STATUS_INVALID_STATE";
+	case MOD_HDCP_STATUS_NOT_IMPLEMENTED:
+		return "MOD_HDCP_STATUS_NOT_IMPLEMENTED";
+	case MOD_HDCP_STATUS_INTERNAL_POLICY_FAILURE:
+		return "MOD_HDCP_STATUS_INTERNAL_POLICY_FAILURE";
+	case MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE:
+		return "MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE";
+	case MOD_HDCP_STATUS_CREATE_PSP_SERVICE_FAILURE:
+		return "MOD_HDCP_STATUS_CREATE_PSP_SERVICE_FAILURE";
+	case MOD_HDCP_STATUS_DESTROY_PSP_SERVICE_FAILURE:
+		return "MOD_HDCP_STATUS_DESTROY_PSP_SERVICE_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_CREATE_SESSION_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_CREATE_SESSION_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_DESTROY_SESSION_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_DESTROY_SESSION_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_VALIDATE_ENCRYPTION_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_VALIDATE_ENCRYPTION_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_NOT_HDCP_REPEATER:
+		return "MOD_HDCP_STATUS_HDCP1_NOT_HDCP_REPEATER";
+	case MOD_HDCP_STATUS_HDCP1_NOT_CAPABLE:
+		return "MOD_HDCP_STATUS_HDCP1_NOT_CAPABLE";
+	case MOD_HDCP_STATUS_HDCP1_R0_PRIME_PENDING:
+		return "MOD_HDCP_STATUS_HDCP1_R0_PRIME_PENDING";
+	case MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_KSV_LIST_NOT_READY:
+		return "MOD_HDCP_STATUS_HDCP1_KSV_LIST_NOT_READY";
+	case MOD_HDCP_STATUS_HDCP1_VALIDATE_KSV_LIST_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_VALIDATE_KSV_LIST_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION:
+		return "MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION";
+	case MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_MAX_CASCADE_EXCEEDED_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_MAX_CASCADE_EXCEEDED_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_MAX_DEVS_EXCEEDED_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_MAX_DEVS_EXCEEDED_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_LINK_INTEGRITY_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_LINK_INTEGRITY_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_REAUTH_REQUEST_ISSUED:
+		return "MOD_HDCP_STATUS_HDCP1_REAUTH_REQUEST_ISSUED";
+	case MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE";
+	case MOD_HDCP_STATUS_HDCP1_INVALID_BKSV:
+		return "MOD_HDCP_STATUS_HDCP1_INVALID_BKSV";
+	case MOD_HDCP_STATUS_DDC_FAILURE:
+		return "MOD_HDCP_STATUS_DDC_FAILURE";
+	case MOD_HDCP_STATUS_INVALID_OPERATION:
+		return "MOD_HDCP_STATUS_INVALID_OPERATION";
+	case MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE:
+		return "MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE";
+	case MOD_HDCP_STATUS_HDCP2_CREATE_SESSION_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_CREATE_SESSION_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_DESTROY_SESSION_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_DESTROY_SESSION_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_PREP_AKE_INIT_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_PREP_AKE_INIT_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_AKE_CERT_PENDING:
+		return "MOD_HDCP_STATUS_HDCP2_AKE_CERT_PENDING";
+	case MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING:
+		return "MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING";
+	case MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING:
+		return "MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING";
+	case MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_AKE_CERT_REVOKED:
+		return "MOD_HDCP_STATUS_HDCP2_AKE_CERT_REVOKED";
+	case MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_VALIDATE_PAIRING_INFO_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_VALIDATE_PAIRING_INFO_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_PREP_LC_INIT_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_PREP_LC_INIT_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_L_PRIME_PENDING:
+		return "MOD_HDCP_STATUS_HDCP2_L_PRIME_PENDING";
+	case MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_PREP_EKS_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_PREP_EKS_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_ENABLE_ENCRYPTION_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_ENABLE_ENCRYPTION_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED:
+		return "MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED";
+	case MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY:
+		return "MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY";
+	case MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION:
+		return "MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION";
+	case MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING:
+		return "MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING";
+	case MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_PREPARE_STREAM_MANAGEMENT_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_PREPARE_STREAM_MANAGEMENT_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_REAUTH_REQUEST:
+		return "MOD_HDCP_STATUS_HDCP2_REAUTH_REQUEST";
+	case MOD_HDCP_STATUS_HDCP2_REAUTH_LINK_INTEGRITY_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_REAUTH_LINK_INTEGRITY_FAILURE";
+	case MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE";
+	default:
+		return "MOD_HDCP_STATUS_UNKNOWN";
+	}
+}
+
+char *mod_hdcp_state_id_to_str(int32_t id)
+{
+	switch (id) {
+	case HDCP_UNINITIALIZED:
+		return "HDCP_UNINITIALIZED";
+	case HDCP_INITIALIZED:
+		return "HDCP_INITIALIZED";
+	case HDCP_CP_NOT_DESIRED:
+		return "HDCP_CP_NOT_DESIRED";
+	case H1_A0_WAIT_FOR_ACTIVE_RX:
+		return "H1_A0_WAIT_FOR_ACTIVE_RX";
+	case H1_A1_EXCHANGE_KSVS:
+		return "H1_A1_EXCHANGE_KSVS";
+	case H1_A2_COMPUTATIONS_A3_VALIDATE_RX_A6_TEST_FOR_REPEATER:
+		return "H1_A2_COMPUTATIONS_A3_VALIDATE_RX_A6_TEST_FOR_REPEATER";
+	case H1_A45_AUTHENTICATED:
+		return "H1_A45_AUTHENTICATED";
+	case H1_A8_WAIT_FOR_READY:
+		return "H1_A8_WAIT_FOR_READY";
+	case H1_A9_READ_KSV_LIST:
+		return "H1_A9_READ_KSV_LIST";
+	case D1_A0_DETERMINE_RX_HDCP_CAPABLE:
+		return "D1_A0_DETERMINE_RX_HDCP_CAPABLE";
+	case D1_A1_EXCHANGE_KSVS:
+		return "D1_A1_EXCHANGE_KSVS";
+	case D1_A23_WAIT_FOR_R0_PRIME:
+		return "D1_A23_WAIT_FOR_R0_PRIME";
+	case D1_A2_COMPUTATIONS_A3_VALIDATE_RX_A5_TEST_FOR_REPEATER:
+		return "D1_A2_COMPUTATIONS_A3_VALIDATE_RX_A5_TEST_FOR_REPEATER";
+	case D1_A4_AUTHENTICATED:
+		return "D1_A4_AUTHENTICATED";
+	case D1_A6_WAIT_FOR_READY:
+		return "D1_A6_WAIT_FOR_READY";
+	case D1_A7_READ_KSV_LIST:
+		return "D1_A7_READ_KSV_LIST";
+	case H2_A0_KNOWN_HDCP2_CAPABLE_RX:
+		return "H2_A0_KNOWN_HDCP2_CAPABLE_RX";
+	case H2_A1_SEND_AKE_INIT:
+		return "H2_A1_SEND_AKE_INIT";
+	case H2_A1_VALIDATE_AKE_CERT:
+		return "H2_A1_VALIDATE_AKE_CERT";
+	case H2_A1_SEND_NO_STORED_KM:
+		return "H2_A1_SEND_NO_STORED_KM";
+	case H2_A1_READ_H_PRIME:
+		return "H2_A1_READ_H_PRIME";
+	case H2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME:
+		return "H2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME";
+	case H2_A1_SEND_STORED_KM:
+		return "H2_A1_SEND_STORED_KM";
+	case H2_A1_VALIDATE_H_PRIME:
+		return "H2_A1_VALIDATE_H_PRIME";
+	case H2_A2_LOCALITY_CHECK:
+		return "H2_A2_LOCALITY_CHECK";
+	case H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER:
+		return "H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER";
+	case H2_ENABLE_ENCRYPTION:
+		return "H2_ENABLE_ENCRYPTION";
+	case H2_A5_AUTHENTICATED:
+		return "H2_A5_AUTHENTICATED";
+	case H2_A6_WAIT_FOR_RX_ID_LIST:
+		return "H2_A6_WAIT_FOR_RX_ID_LIST";
+	case H2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK:
+		return "H2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK";
+	case H2_A9_SEND_STREAM_MANAGEMENT:
+		return "H2_A9_SEND_STREAM_MANAGEMENT";
+	case H2_A9_VALIDATE_STREAM_READY:
+		return "H2_A9_VALIDATE_STREAM_READY";
+	case D2_A0_DETERMINE_RX_HDCP_CAPABLE:
+		return "D2_A0_DETERMINE_RX_HDCP_CAPABLE";
+	case D2_A1_SEND_AKE_INIT:
+		return "D2_A1_SEND_AKE_INIT";
+	case D2_A1_VALIDATE_AKE_CERT:
+		return "D2_A1_VALIDATE_AKE_CERT";
+	case D2_A1_SEND_NO_STORED_KM:
+		return "D2_A1_SEND_NO_STORED_KM";
+	case D2_A1_READ_H_PRIME:
+		return "D2_A1_READ_H_PRIME";
+	case D2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME:
+		return "D2_A1_READ_PAIRING_INFO_AND_VALIDATE_H_PRIME";
+	case D2_A1_SEND_STORED_KM:
+		return "D2_A1_SEND_STORED_KM";
+	case D2_A1_VALIDATE_H_PRIME:
+		return "D2_A1_VALIDATE_H_PRIME";
+	case D2_A2_LOCALITY_CHECK:
+		return "D2_A2_LOCALITY_CHECK";
+	case D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER:
+		return "D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER";
+	case D2_SEND_CONTENT_STREAM_TYPE:
+		return "D2_SEND_CONTENT_STREAM_TYPE";
+	case D2_ENABLE_ENCRYPTION:
+		return "D2_ENABLE_ENCRYPTION";
+	case D2_A5_AUTHENTICATED:
+		return "D2_A5_AUTHENTICATED";
+	case D2_A6_WAIT_FOR_RX_ID_LIST:
+		return "D2_A6_WAIT_FOR_RX_ID_LIST";
+	case D2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK:
+		return "D2_A78_VERIFY_RX_ID_LIST_AND_SEND_ACK";
+	case D2_A9_SEND_STREAM_MANAGEMENT:
+		return "D2_A9_SEND_STREAM_MANAGEMENT";
+	case D2_A9_VALIDATE_STREAM_READY:
+		return "D2_A9_VALIDATE_STREAM_READY";
+	default:
+		return "UNKNOWN_STATE_ID";
+	};
+}
+
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
new file mode 100644
index 000000000000..ff91373ebada
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef MOD_HDCP_LOG_H_
+#define MOD_HDCP_LOG_H_
+
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+#define HDCP_LOG_ERR(hdcp, ...) DRM_WARN(__VA_ARGS__)
+#define HDCP_LOG_VER(hdcp, ...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define HDCP_LOG_FSM(hdcp, ...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define HDCP_LOG_TOP(hdcp, ...) pr_debug("[HDCP_TOP]:"__VA_ARGS__)
+#define HDCP_LOG_DDC(hdcp, ...) pr_debug("[HDCP_DDC]:"__VA_ARGS__)
+#endif
+
+/* default logs */
+#define HDCP_ERROR_TRACE(hdcp, status) \
+		HDCP_LOG_ERR(hdcp, \
+			"[Link %d] WARNING %s IN STATE %s", \
+			hdcp->config.index, \
+			mod_hdcp_status_to_str(status), \
+			mod_hdcp_state_id_to_str(hdcp->state.id))
+#define HDCP_HDCP1_ENABLED_TRACE(hdcp, displayIndex) \
+		HDCP_LOG_VER(hdcp, \
+			"[Link %d] HDCP 1.4 enabled on display %d", \
+			hdcp->config.index, displayIndex)
+#define HDCP_HDCP2_ENABLED_TRACE(hdcp, displayIndex) \
+		HDCP_LOG_VER(hdcp, \
+			"[Link %d] HDCP 2.2 enabled on display %d", \
+			hdcp->config.index, displayIndex)
+/* state machine logs */
+#define HDCP_REMOVE_DISPLAY_TRACE(hdcp, displayIndex) \
+		HDCP_LOG_FSM(hdcp, \
+			"[Link %d] HDCP_REMOVE_DISPLAY index %d", \
+			hdcp->config.index, displayIndex)
+#define HDCP_INPUT_PASS_TRACE(hdcp, str) \
+		HDCP_LOG_FSM(hdcp, \
+			"[Link %d]\tPASS %s", \
+			hdcp->config.index, str)
+#define HDCP_INPUT_FAIL_TRACE(hdcp, str) \
+		HDCP_LOG_FSM(hdcp, \
+			"[Link %d]\tFAIL %s", \
+			hdcp->config.index, str)
+#define HDCP_NEXT_STATE_TRACE(hdcp, id, output) do { \
+		if (output->watchdog_timer_needed) \
+			HDCP_LOG_FSM(hdcp, \
+				"[Link %d] > %s with %d ms watchdog", \
+				hdcp->config.index, \
+				mod_hdcp_state_id_to_str(id), output->watchdog_timer_delay); \
+		else \
+			HDCP_LOG_FSM(hdcp, \
+				"[Link %d] > %s", hdcp->config.index, \
+				mod_hdcp_state_id_to_str(id)); \
+} while (0)
+#define HDCP_TIMEOUT_TRACE(hdcp) \
+		HDCP_LOG_FSM(hdcp, "[Link %d] --> TIMEOUT", hdcp->config.index)
+#define HDCP_CPIRQ_TRACE(hdcp) \
+		HDCP_LOG_FSM(hdcp, "[Link %d] --> CPIRQ", hdcp->config.index)
+#define HDCP_EVENT_TRACE(hdcp, event) \
+		if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \
+			HDCP_TIMEOUT_TRACE(hdcp); \
+		else if (event == MOD_HDCP_EVENT_CPIRQ) \
+			HDCP_CPIRQ_TRACE(hdcp)
+/* TODO: find some way to tell if logging is off to save time */
+#define HDCP_DDC_READ_TRACE(hdcp, msg_name, msg, msg_size) do { \
+		mod_hdcp_dump_binary_message(msg, msg_size, hdcp->buf, \
+				sizeof(hdcp->buf)); \
+		HDCP_LOG_DDC(hdcp, "[Link %d] Read %s%s", hdcp->config.index, \
+				msg_name, hdcp->buf); \
+} while (0)
+#define HDCP_DDC_WRITE_TRACE(hdcp, msg_name, msg, msg_size) do { \
+		mod_hdcp_dump_binary_message(msg, msg_size, hdcp->buf, \
+				sizeof(hdcp->buf)); \
+		HDCP_LOG_DDC(hdcp, "[Link %d] Write %s%s", \
+				hdcp->config.index, msg_name,\
+				hdcp->buf); \
+} while (0)
+#define HDCP_FULL_DDC_TRACE(hdcp) do { \
+	if (is_hdcp1(hdcp)) { \
+		HDCP_DDC_READ_TRACE(hdcp, "BKSV", hdcp->auth.msg.hdcp1.bksv, \
+				sizeof(hdcp->auth.msg.hdcp1.bksv)); \
+		HDCP_DDC_READ_TRACE(hdcp, "BCAPS", &hdcp->auth.msg.hdcp1.bcaps, \
+				sizeof(hdcp->auth.msg.hdcp1.bcaps)); \
+		HDCP_DDC_WRITE_TRACE(hdcp, "AN", hdcp->auth.msg.hdcp1.an, \
+				sizeof(hdcp->auth.msg.hdcp1.an)); \
+		HDCP_DDC_WRITE_TRACE(hdcp, "AKSV", hdcp->auth.msg.hdcp1.aksv, \
+				sizeof(hdcp->auth.msg.hdcp1.aksv)); \
+		HDCP_DDC_WRITE_TRACE(hdcp, "AINFO", &hdcp->auth.msg.hdcp1.ainfo, \
+				sizeof(hdcp->auth.msg.hdcp1.ainfo)); \
+		HDCP_DDC_READ_TRACE(hdcp, "RI' / R0'", \
+				(uint8_t *)&hdcp->auth.msg.hdcp1.r0p, \
+				sizeof(hdcp->auth.msg.hdcp1.r0p)); \
+		HDCP_DDC_READ_TRACE(hdcp, "BINFO", \
+				(uint8_t *)&hdcp->auth.msg.hdcp1.binfo_dp, \
+				sizeof(hdcp->auth.msg.hdcp1.binfo_dp)); \
+		HDCP_DDC_READ_TRACE(hdcp, "KSVLIST", hdcp->auth.msg.hdcp1.ksvlist, \
+				hdcp->auth.msg.hdcp1.ksvlist_size); \
+		HDCP_DDC_READ_TRACE(hdcp, "V'", hdcp->auth.msg.hdcp1.vp, \
+				sizeof(hdcp->auth.msg.hdcp1.vp)); \
+	} else { \
+		HDCP_DDC_READ_TRACE(hdcp, "HDCP2Version", \
+				&hdcp->auth.msg.hdcp2.hdcp2version_hdmi, \
+				sizeof(hdcp->auth.msg.hdcp2.hdcp2version_hdmi)); \
+		HDCP_DDC_READ_TRACE(hdcp, "Rx Caps", hdcp->auth.msg.hdcp2.rxcaps_dp, \
+				sizeof(hdcp->auth.msg.hdcp2.rxcaps_dp)); \
+		HDCP_DDC_WRITE_TRACE(hdcp, "AKE Init", hdcp->auth.msg.hdcp2.ake_init, \
+				sizeof(hdcp->auth.msg.hdcp2.ake_init)); \
+		HDCP_DDC_READ_TRACE(hdcp, "AKE Cert", hdcp->auth.msg.hdcp2.ake_cert, \
+				sizeof(hdcp->auth.msg.hdcp2.ake_cert)); \
+		HDCP_DDC_WRITE_TRACE(hdcp, "Stored KM", \
+				hdcp->auth.msg.hdcp2.ake_stored_km, \
+				sizeof(hdcp->auth.msg.hdcp2.ake_stored_km)); \
+		HDCP_DDC_WRITE_TRACE(hdcp, "No Stored KM", \
+				hdcp->auth.msg.hdcp2.ake_no_stored_km, \
+				sizeof(hdcp->auth.msg.hdcp2.ake_no_stored_km)); \
+		HDCP_DDC_READ_TRACE(hdcp, "H'", hdcp->auth.msg.hdcp2.ake_h_prime, \
+				sizeof(hdcp->auth.msg.hdcp2.ake_h_prime)); \
+		HDCP_DDC_READ_TRACE(hdcp, "Pairing Info", \
+				hdcp->auth.msg.hdcp2.ake_pairing_info, \
+				sizeof(hdcp->auth.msg.hdcp2.ake_pairing_info)); \
+		HDCP_DDC_WRITE_TRACE(hdcp, "LC Init", hdcp->auth.msg.hdcp2.lc_init, \
+				sizeof(hdcp->auth.msg.hdcp2.lc_init)); \
+		HDCP_DDC_READ_TRACE(hdcp, "L'", hdcp->auth.msg.hdcp2.lc_l_prime, \
+				sizeof(hdcp->auth.msg.hdcp2.lc_l_prime)); \
+		HDCP_DDC_WRITE_TRACE(hdcp, "Exchange KS", hdcp->auth.msg.hdcp2.ske_eks, \
+				sizeof(hdcp->auth.msg.hdcp2.ske_eks)); \
+		HDCP_DDC_READ_TRACE(hdcp, "Rx Status", \
+				(uint8_t *)&hdcp->auth.msg.hdcp2.rxstatus, \
+				sizeof(hdcp->auth.msg.hdcp2.rxstatus)); \
+		HDCP_DDC_READ_TRACE(hdcp, "Rx Id List", \
+				hdcp->auth.msg.hdcp2.rx_id_list, \
+				hdcp->auth.msg.hdcp2.rx_id_list_size); \
+		HDCP_DDC_WRITE_TRACE(hdcp, "Rx Id List Ack", \
+				hdcp->auth.msg.hdcp2.repeater_auth_ack, \
+				sizeof(hdcp->auth.msg.hdcp2.repeater_auth_ack)); \
+		HDCP_DDC_WRITE_TRACE(hdcp, "Content Stream Management", \
+				hdcp->auth.msg.hdcp2.repeater_auth_stream_manage, \
+				hdcp->auth.msg.hdcp2.stream_manage_size); \
+		HDCP_DDC_READ_TRACE(hdcp, "Stream Ready", \
+				hdcp->auth.msg.hdcp2.repeater_auth_stream_ready, \
+				sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_ready)); \
+		HDCP_DDC_WRITE_TRACE(hdcp, "Content Stream Type", \
+				hdcp->auth.msg.hdcp2.content_stream_type_dp, \
+				sizeof(hdcp->auth.msg.hdcp2.content_stream_type_dp)); \
+	} \
+} while (0)
+#define HDCP_TOP_ADD_DISPLAY_TRACE(hdcp, i) \
+		HDCP_LOG_TOP(hdcp, "[Link %d]\tadd display %d", \
+				hdcp->config.index, i)
+#define HDCP_TOP_REMOVE_DISPLAY_TRACE(hdcp, i) \
+		HDCP_LOG_TOP(hdcp, "[Link %d]\tremove display %d", \
+				hdcp->config.index, i)
+#define HDCP_TOP_HDCP1_DESTROY_SESSION_TRACE(hdcp) \
+		HDCP_LOG_TOP(hdcp, "[Link %d]\tdestroy hdcp1 session", \
+				hdcp->config.index)
+#define HDCP_TOP_HDCP2_DESTROY_SESSION_TRACE(hdcp) \
+		HDCP_LOG_TOP(hdcp, "[Link %d]\tdestroy hdcp2 session", \
+				hdcp->config.index)
+#define HDCP_TOP_RESET_AUTH_TRACE(hdcp) \
+		HDCP_LOG_TOP(hdcp, "[Link %d]\treset authentication", hdcp->config.index)
+#define HDCP_TOP_RESET_CONN_TRACE(hdcp) \
+		HDCP_LOG_TOP(hdcp, "[Link %d]\treset connection", hdcp->config.index)
+#define HDCP_TOP_INTERFACE_TRACE(hdcp) do { \
+		HDCP_LOG_TOP(hdcp, "\n"); \
+		HDCP_LOG_TOP(hdcp, "[Link %d] %s", hdcp->config.index, __func__); \
+} while (0)
+#define HDCP_TOP_INTERFACE_TRACE_WITH_INDEX(hdcp, i) do { \
+		HDCP_LOG_TOP(hdcp, "\n"); \
+		HDCP_LOG_TOP(hdcp, "[Link %d] %s display %d", hdcp->config.index, __func__, i); \
+} while (0)
+
+#endif // MOD_HDCP_LOG_H_
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
new file mode 100644
index 000000000000..7911dc157d5a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -0,0 +1,832 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#define MAX_NUM_DISPLAYS 24
+
+
+#include "hdcp.h"
+
+#include "amdgpu.h"
+#include "hdcp_psp.h"
+
+static void hdcp2_message_init(struct mod_hdcp *hdcp,
+			       struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *in)
+{
+	in->session_handle = hdcp->auth.id;
+	in->prepare.msg1_id = TA_HDCP_HDCP2_MSG_ID__NULL_MESSAGE;
+	in->prepare.msg2_id = TA_HDCP_HDCP2_MSG_ID__NULL_MESSAGE;
+	in->process.msg1_desc.msg_id = TA_HDCP_HDCP2_MSG_ID__NULL_MESSAGE;
+	in->process.msg1_desc.msg_size = 0;
+	in->process.msg2_desc.msg_id = TA_HDCP_HDCP2_MSG_ID__NULL_MESSAGE;
+	in->process.msg2_desc.msg_size = 0;
+	in->process.msg3_desc.msg_id = TA_HDCP_HDCP2_MSG_ID__NULL_MESSAGE;
+	in->process.msg3_desc.msg_size = 0;
+}
+enum mod_hdcp_status mod_hdcp_remove_display_topology(struct mod_hdcp *hdcp)
+{
+
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_dtm_shared_memory *dtm_cmd;
+	struct mod_hdcp_display *display = NULL;
+	uint8_t i;
+
+	dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.dtm_shared_buf;
+
+	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
+		if (is_display_added(&(hdcp->connection.displays[i]))) {
+
+			memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory));
+
+			display = &hdcp->connection.displays[i];
+
+			dtm_cmd->cmd_id = TA_DTM_COMMAND__TOPOLOGY_UPDATE_V2;
+			dtm_cmd->dtm_in_message.topology_update_v2.display_handle = display->index;
+			dtm_cmd->dtm_in_message.topology_update_v2.is_active = 0;
+			dtm_cmd->dtm_status = TA_DTM_STATUS__GENERIC_FAILURE;
+
+			psp_dtm_invoke(psp, dtm_cmd->cmd_id);
+
+			if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS)
+				return MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE;
+
+			display->state = MOD_HDCP_DISPLAY_ACTIVE;
+			HDCP_TOP_REMOVE_DISPLAY_TRACE(hdcp, display->index);
+		}
+	}
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_add_display_topology(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_dtm_shared_memory *dtm_cmd;
+	struct mod_hdcp_display *display = NULL;
+	struct mod_hdcp_link *link = &hdcp->connection.link;
+	uint8_t i;
+
+	if (!psp->dtm_context.dtm_initialized) {
+		DRM_ERROR("Failed to add display topology, DTM TA is not initialized.");
+		return MOD_HDCP_STATUS_FAILURE;
+	}
+
+	dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.dtm_shared_buf;
+
+	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
+		if (hdcp->connection.displays[i].state == MOD_HDCP_DISPLAY_ACTIVE) {
+			display = &hdcp->connection.displays[i];
+
+			memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory));
+
+			dtm_cmd->cmd_id = TA_DTM_COMMAND__TOPOLOGY_UPDATE_V2;
+			dtm_cmd->dtm_in_message.topology_update_v2.display_handle = display->index;
+			dtm_cmd->dtm_in_message.topology_update_v2.is_active = 1;
+			dtm_cmd->dtm_in_message.topology_update_v2.controller = display->controller;
+			dtm_cmd->dtm_in_message.topology_update_v2.ddc_line = link->ddc_line;
+			dtm_cmd->dtm_in_message.topology_update_v2.dig_be = link->dig_be;
+			dtm_cmd->dtm_in_message.topology_update_v2.dig_fe = display->dig_fe;
+			dtm_cmd->dtm_in_message.topology_update_v2.dp_mst_vcid = display->vc_id;
+			dtm_cmd->dtm_in_message.topology_update_v2.max_hdcp_supported_version =
+				TA_DTM_HDCP_VERSION_MAX_SUPPORTED__2_2;
+			dtm_cmd->dtm_status = TA_DTM_STATUS__GENERIC_FAILURE;
+
+			psp_dtm_invoke(psp, dtm_cmd->cmd_id);
+
+			if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS)
+				return MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE;
+
+			display->state = MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED;
+			HDCP_TOP_ADD_DISPLAY_TRACE(hdcp, display->index);
+		}
+	}
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp)
+{
+
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct mod_hdcp_display *display = get_first_added_display(hdcp);
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+
+	if (!psp->hdcp_context.hdcp_initialized) {
+		DRM_ERROR("Failed to create hdcp session. HDCP TA is not initialized.");
+		return MOD_HDCP_STATUS_FAILURE;
+	}
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	hdcp_cmd->in_msg.hdcp1_create_session.display_handle = display->index;
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP1_CREATE_SESSION;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	hdcp->auth.id = hdcp_cmd->out_msg.hdcp1_create_session.session_handle;
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP1_CREATE_SESSION_FAILURE;
+
+	hdcp->auth.msg.hdcp1.ainfo = hdcp_cmd->out_msg.hdcp1_create_session.ainfo_primary;
+	memcpy(hdcp->auth.msg.hdcp1.aksv, hdcp_cmd->out_msg.hdcp1_create_session.aksv_primary,
+		sizeof(hdcp->auth.msg.hdcp1.aksv));
+	memcpy(hdcp->auth.msg.hdcp1.an, hdcp_cmd->out_msg.hdcp1_create_session.an_primary,
+		sizeof(hdcp->auth.msg.hdcp1.an));
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp1_destroy_session(struct mod_hdcp *hdcp)
+{
+
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	hdcp_cmd->in_msg.hdcp1_destroy_session.session_handle = hdcp->auth.id;
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP1_DESTROY_SESSION;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP1_DESTROY_SESSION_FAILURE;
+
+	HDCP_TOP_HDCP1_DESTROY_SESSION_TRACE(hdcp);
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp1_validate_rx(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	hdcp_cmd->in_msg.hdcp1_first_part_authentication.session_handle = hdcp->auth.id;
+
+	memcpy(hdcp_cmd->in_msg.hdcp1_first_part_authentication.bksv_primary, hdcp->auth.msg.hdcp1.bksv,
+		TA_HDCP__HDCP1_KSV_SIZE);
+
+	hdcp_cmd->in_msg.hdcp1_first_part_authentication.r0_prime_primary = hdcp->auth.msg.hdcp1.r0p;
+	hdcp_cmd->in_msg.hdcp1_first_part_authentication.bcaps = hdcp->auth.msg.hdcp1.bcaps;
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP1_FIRST_PART_AUTHENTICATION;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE;
+
+	if (hdcp_cmd->out_msg.hdcp1_first_part_authentication.authentication_status ==
+	    TA_HDCP_AUTHENTICATION_STATUS__HDCP1_FIRST_PART_COMPLETE) {
+		/* needs second part of authentication */
+		hdcp->connection.is_repeater = 1;
+	} else if (hdcp_cmd->out_msg.hdcp1_first_part_authentication.authentication_status ==
+		   TA_HDCP_AUTHENTICATION_STATUS__HDCP1_AUTHENTICATED) {
+		hdcp->connection.is_repeater = 0;
+	} else
+		return MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE;
+
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp1_enable_encryption(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct mod_hdcp_display *display = get_first_added_display(hdcp);
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	hdcp_cmd->in_msg.hdcp1_enable_encryption.session_handle = hdcp->auth.id;
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP1_ENABLE_ENCRYPTION;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION;
+
+	if (!is_dp_mst_hdcp(hdcp)) {
+		display->state = MOD_HDCP_DISPLAY_ENCRYPTION_ENABLED;
+		HDCP_HDCP1_ENABLED_TRACE(hdcp, display->index);
+	}
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp1_validate_ksvlist_vp(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	hdcp_cmd->in_msg.hdcp1_second_part_authentication.session_handle = hdcp->auth.id;
+
+	hdcp_cmd->in_msg.hdcp1_second_part_authentication.ksv_list_size = hdcp->auth.msg.hdcp1.ksvlist_size;
+	memcpy(hdcp_cmd->in_msg.hdcp1_second_part_authentication.ksv_list, hdcp->auth.msg.hdcp1.ksvlist,
+	       hdcp->auth.msg.hdcp1.ksvlist_size);
+
+	memcpy(hdcp_cmd->in_msg.hdcp1_second_part_authentication.v_prime, hdcp->auth.msg.hdcp1.vp,
+	       sizeof(hdcp->auth.msg.hdcp1.vp));
+
+	hdcp_cmd->in_msg.hdcp1_second_part_authentication.bstatus_binfo =
+		is_dp_hdcp(hdcp) ? hdcp->auth.msg.hdcp1.binfo_dp : hdcp->auth.msg.hdcp1.bstatus;
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP1_SECOND_PART_AUTHENTICATION;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP1_VALIDATE_KSV_LIST_FAILURE;
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp1_enable_dp_stream_encryption(struct mod_hdcp *hdcp)
+{
+
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	int i = 0;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+
+	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
+
+		if (hdcp->connection.displays[i].state != MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED ||
+		    hdcp->connection.displays[i].adjust.disable)
+			continue;
+
+		memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+		hdcp_cmd->in_msg.hdcp1_enable_dp_stream_encryption.session_handle = hdcp->auth.id;
+		hdcp_cmd->in_msg.hdcp1_enable_dp_stream_encryption.display_handle = hdcp->connection.displays[i].index;
+		hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP1_ENABLE_DP_STREAM_ENCRYPTION;
+
+		psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+		if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+			return MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE;
+
+		hdcp->connection.displays[i].state = MOD_HDCP_DISPLAY_ENCRYPTION_ENABLED;
+		HDCP_HDCP1_ENABLED_TRACE(hdcp, hdcp->connection.displays[i].index);
+	}
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp1_link_maintenance(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	hdcp_cmd->in_msg.hdcp1_get_encryption_status.session_handle = hdcp->auth.id;
+
+	hdcp_cmd->out_msg.hdcp1_get_encryption_status.protection_level = 0;
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP1_GET_ENCRYPTION_STATUS;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE;
+
+	return (hdcp_cmd->out_msg.hdcp1_get_encryption_status.protection_level == 1)
+		       ? MOD_HDCP_STATUS_SUCCESS
+		       : MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp1_get_link_encryption_status(struct mod_hdcp *hdcp,
+							       enum mod_hdcp_encryption_status *encryption_status)
+{
+	*encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+
+	if (mod_hdcp_hdcp1_link_maintenance(hdcp) != MOD_HDCP_STATUS_SUCCESS)
+		return MOD_HDCP_STATUS_FAILURE;
+
+	*encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP1_ON;
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_create_session(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct mod_hdcp_display *display = get_first_added_display(hdcp);
+
+	if (!psp->hdcp_context.hdcp_initialized) {
+		DRM_ERROR("Failed to create hdcp session, HDCP TA is not initialized");
+		return MOD_HDCP_STATUS_FAILURE;
+	}
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	if (!display)
+		return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND;
+
+	hdcp_cmd->in_msg.hdcp2_create_session_v2.display_handle = display->index;
+
+	if (hdcp->connection.link.adjust.hdcp2.force_type == MOD_HDCP_FORCE_TYPE_0)
+		hdcp_cmd->in_msg.hdcp2_create_session_v2.negotiate_content_type =
+			TA_HDCP2_CONTENT_TYPE_NEGOTIATION_TYPE__FORCE_TYPE0;
+	else if (hdcp->connection.link.adjust.hdcp2.force_type == MOD_HDCP_FORCE_TYPE_1)
+		hdcp_cmd->in_msg.hdcp2_create_session_v2.negotiate_content_type =
+			TA_HDCP2_CONTENT_TYPE_NEGOTIATION_TYPE__FORCE_TYPE1;
+	else if (hdcp->connection.link.adjust.hdcp2.force_type == MOD_HDCP_FORCE_TYPE_MAX)
+		hdcp_cmd->in_msg.hdcp2_create_session_v2.negotiate_content_type =
+			TA_HDCP2_CONTENT_TYPE_NEGOTIATION_TYPE__MAX_SUPPORTED;
+
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_CREATE_SESSION_V2;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_CREATE_SESSION_FAILURE;
+
+	hdcp->auth.id = hdcp_cmd->out_msg.hdcp2_create_session_v2.session_handle;
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_destroy_session(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	hdcp_cmd->in_msg.hdcp2_destroy_session.session_handle = hdcp->auth.id;
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_DESTROY_SESSION;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_DESTROY_SESSION_FAILURE;
+
+	HDCP_TOP_HDCP2_DESTROY_SESSION_TRACE(hdcp);
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_prepare_ake_init(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2;
+	msg_out = &hdcp_cmd->out_msg.hdcp2_prepare_process_authentication_message_v2;
+
+	hdcp2_message_init(hdcp, msg_in);
+
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
+	msg_in->prepare.msg1_id = TA_HDCP_HDCP2_MSG_ID__AKE_INIT;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_PREP_AKE_INIT_FAILURE;
+
+	memcpy(&hdcp->auth.msg.hdcp2.ake_init[0], &msg_out->prepare.transmitter_message[0],
+	       sizeof(hdcp->auth.msg.hdcp2.ake_init));
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_validate_ake_cert(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2;
+	msg_out = &hdcp_cmd->out_msg.hdcp2_prepare_process_authentication_message_v2;
+
+	hdcp2_message_init(hdcp, msg_in);
+
+	msg_in->process.msg1_desc.msg_id = TA_HDCP_HDCP2_MSG_ID__AKE_SEND_CERT;
+	msg_in->process.msg1_desc.msg_size = TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT;
+
+	memcpy(&msg_in->process.receiver_message[0], hdcp->auth.msg.hdcp2.ake_cert,
+	       sizeof(hdcp->auth.msg.hdcp2.ake_cert));
+
+	msg_in->prepare.msg1_id = TA_HDCP_HDCP2_MSG_ID__AKE_NO_STORED_KM;
+	msg_in->prepare.msg2_id = TA_HDCP_HDCP2_MSG_ID__AKE_STORED_KM;
+
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE;
+
+	memcpy(hdcp->auth.msg.hdcp2.ake_no_stored_km, &msg_out->prepare.transmitter_message[0],
+	       sizeof(hdcp->auth.msg.hdcp2.ake_no_stored_km));
+
+	memcpy(hdcp->auth.msg.hdcp2.ake_stored_km,
+	       &msg_out->prepare.transmitter_message[sizeof(hdcp->auth.msg.hdcp2.ake_no_stored_km)],
+	       sizeof(hdcp->auth.msg.hdcp2.ake_stored_km));
+
+	if (msg_out->process.msg1_status == TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS) {
+		hdcp->connection.is_km_stored = msg_out->process.is_km_stored ? 1 : 0;
+		hdcp->connection.is_repeater = msg_out->process.is_repeater ? 1 : 0;
+		return MOD_HDCP_STATUS_SUCCESS;
+	}
+
+	return MOD_HDCP_STATUS_FAILURE;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_validate_h_prime(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2;
+	msg_out = &hdcp_cmd->out_msg.hdcp2_prepare_process_authentication_message_v2;
+
+	hdcp2_message_init(hdcp, msg_in);
+
+	msg_in->process.msg1_desc.msg_id = TA_HDCP_HDCP2_MSG_ID__AKE_SEND_H_PRIME;
+	msg_in->process.msg1_desc.msg_size = TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_H_PRIME;
+
+	memcpy(&msg_in->process.receiver_message[0], hdcp->auth.msg.hdcp2.ake_h_prime,
+	       sizeof(hdcp->auth.msg.hdcp2.ake_h_prime));
+
+	if (!hdcp->connection.is_km_stored) {
+		msg_in->process.msg2_desc.msg_id = TA_HDCP_HDCP2_MSG_ID__AKE_SEND_PAIRING_INFO;
+		msg_in->process.msg2_desc.msg_size = TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_PAIRING_INFO;
+		memcpy(&msg_in->process.receiver_message[sizeof(hdcp->auth.msg.hdcp2.ake_h_prime)],
+		       hdcp->auth.msg.hdcp2.ake_pairing_info, sizeof(hdcp->auth.msg.hdcp2.ake_pairing_info));
+	}
+
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE;
+
+	if (msg_out->process.msg1_status != TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE;
+	else if (!hdcp->connection.is_km_stored &&
+		 msg_out->process.msg2_status != TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_VALIDATE_PAIRING_INFO_FAILURE;
+
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_prepare_lc_init(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2;
+	msg_out = &hdcp_cmd->out_msg.hdcp2_prepare_process_authentication_message_v2;
+
+	hdcp2_message_init(hdcp, msg_in);
+
+	msg_in->prepare.msg1_id = TA_HDCP_HDCP2_MSG_ID__LC_INIT;
+
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_PREP_LC_INIT_FAILURE;
+
+	memcpy(hdcp->auth.msg.hdcp2.lc_init, &msg_out->prepare.transmitter_message[0],
+	       sizeof(hdcp->auth.msg.hdcp2.lc_init));
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_validate_l_prime(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2;
+	msg_out = &hdcp_cmd->out_msg.hdcp2_prepare_process_authentication_message_v2;
+
+	hdcp2_message_init(hdcp, msg_in);
+
+	msg_in->process.msg1_desc.msg_id = TA_HDCP_HDCP2_MSG_ID__LC_SEND_L_PRIME;
+	msg_in->process.msg1_desc.msg_size = TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__LC_SEND_L_PRIME;
+
+	memcpy(&msg_in->process.receiver_message[0], hdcp->auth.msg.hdcp2.lc_l_prime,
+	       sizeof(hdcp->auth.msg.hdcp2.lc_l_prime));
+
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE;
+
+	if (msg_out->process.msg1_status != TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE;
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_prepare_eks(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2;
+	msg_out = &hdcp_cmd->out_msg.hdcp2_prepare_process_authentication_message_v2;
+
+	hdcp2_message_init(hdcp, msg_in);
+
+	msg_in->prepare.msg1_id = TA_HDCP_HDCP2_MSG_ID__SKE_SEND_EKS;
+
+	if (is_dp_hdcp(hdcp))
+		msg_in->prepare.msg2_id = TA_HDCP_HDCP2_MSG_ID__SIGNAL_CONTENT_STREAM_TYPE_DP;
+
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_PREP_EKS_FAILURE;
+
+	memcpy(hdcp->auth.msg.hdcp2.ske_eks, &msg_out->prepare.transmitter_message[0],
+	       sizeof(hdcp->auth.msg.hdcp2.ske_eks));
+	msg_out->prepare.msg1_desc.msg_size = sizeof(hdcp->auth.msg.hdcp2.ske_eks);
+
+	if (is_dp_hdcp(hdcp)) {
+		memcpy(hdcp->auth.msg.hdcp2.content_stream_type_dp,
+		       &msg_out->prepare.transmitter_message[sizeof(hdcp->auth.msg.hdcp2.ske_eks)],
+		       sizeof(hdcp->auth.msg.hdcp2.content_stream_type_dp));
+	}
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_enable_encryption(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
+	struct mod_hdcp_display *display = get_first_added_display(hdcp);
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2;
+
+	hdcp2_message_init(hdcp, msg_in);
+
+	if (!display)
+		return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND;
+
+	hdcp_cmd->in_msg.hdcp1_enable_encryption.session_handle = hdcp->auth.id;
+
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_SET_ENCRYPTION;
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_ENABLE_ENCRYPTION_FAILURE;
+
+	if (!is_dp_mst_hdcp(hdcp)) {
+		display->state = MOD_HDCP_DISPLAY_ENCRYPTION_ENABLED;
+		HDCP_HDCP2_ENABLED_TRACE(hdcp, display->index);
+	}
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_validate_rx_id_list(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2;
+	msg_out = &hdcp_cmd->out_msg.hdcp2_prepare_process_authentication_message_v2;
+
+	hdcp2_message_init(hdcp, msg_in);
+
+	msg_in->process.msg1_desc.msg_id = TA_HDCP_HDCP2_MSG_ID__REPEATERAUTH_SEND_RECEIVERID_LIST;
+	msg_in->process.msg1_desc.msg_size = sizeof(hdcp->auth.msg.hdcp2.rx_id_list);
+	memcpy(&msg_in->process.receiver_message[0], hdcp->auth.msg.hdcp2.rx_id_list,
+	       sizeof(hdcp->auth.msg.hdcp2.rx_id_list));
+
+	msg_in->prepare.msg1_id = TA_HDCP_HDCP2_MSG_ID__REPEATERAUTH_SEND_ACK;
+
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE;
+
+	memcpy(hdcp->auth.msg.hdcp2.repeater_auth_ack, &msg_out->prepare.transmitter_message[0],
+	       sizeof(hdcp->auth.msg.hdcp2.repeater_auth_ack));
+
+	if (msg_out->process.msg1_status == TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS) {
+		hdcp->connection.is_km_stored = msg_out->process.is_km_stored ? 1 : 0;
+		hdcp->connection.is_repeater = msg_out->process.is_repeater ? 1 : 0;
+		return MOD_HDCP_STATUS_SUCCESS;
+	}
+
+
+	return MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
+	uint8_t i;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2;
+
+	hdcp2_message_init(hdcp, msg_in);
+
+
+	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
+		if (hdcp->connection.displays[i].state != MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED ||
+		    hdcp->connection.displays[i].adjust.disable)
+			continue;
+		hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.display_handle = hdcp->connection.displays[i].index;
+		hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.session_handle = hdcp->auth.id;
+
+		hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_ENABLE_DP_STREAM_ENCRYPTION;
+		psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+		if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+			break;
+
+		hdcp->connection.displays[i].state = MOD_HDCP_DISPLAY_ENCRYPTION_ENABLED;
+		HDCP_HDCP2_ENABLED_TRACE(hdcp, hdcp->connection.displays[i].index);
+	}
+
+	return (hdcp_cmd->hdcp_status == TA_HDCP_STATUS__SUCCESS) ? MOD_HDCP_STATUS_SUCCESS
+								  : MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_prepare_stream_management(struct mod_hdcp *hdcp)
+{
+
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2;
+	msg_out = &hdcp_cmd->out_msg.hdcp2_prepare_process_authentication_message_v2;
+
+	hdcp2_message_init(hdcp, msg_in);
+
+	msg_in->prepare.msg1_id = TA_HDCP_HDCP2_MSG_ID__REPEATERAUTH_STREAM_MANAGE;
+
+
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_HDCP2_PREPARE_STREAM_MANAGEMENT_FAILURE;
+
+	hdcp->auth.msg.hdcp2.stream_manage_size = msg_out->prepare.msg1_desc.msg_size;
+
+	memcpy(hdcp->auth.msg.hdcp2.repeater_auth_stream_manage, &msg_out->prepare.transmitter_message[0],
+	       sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_manage));
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_validate_stream_ready(struct mod_hdcp *hdcp)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2;
+	msg_out = &hdcp_cmd->out_msg.hdcp2_prepare_process_authentication_message_v2;
+
+	hdcp2_message_init(hdcp, msg_in);
+
+	msg_in->process.msg1_desc.msg_id = TA_HDCP_HDCP2_MSG_ID__REPEATERAUTH_STREAM_READY;
+
+	msg_in->process.msg1_desc.msg_size = sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_ready);
+
+	memcpy(&msg_in->process.receiver_message[0], hdcp->auth.msg.hdcp2.repeater_auth_stream_ready,
+	       sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_ready));
+
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	return (hdcp_cmd->hdcp_status == TA_HDCP_STATUS__SUCCESS) &&
+			       (msg_out->process.msg1_status == TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
+		       ? MOD_HDCP_STATUS_SUCCESS
+		       : MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE;
+}
+
+enum mod_hdcp_status mod_hdcp_hdcp2_get_link_encryption_status(struct mod_hdcp *hdcp,
+							       enum mod_hdcp_encryption_status *encryption_status)
+{
+	struct psp_context *psp = hdcp->config.psp.handle;
+	struct ta_hdcp_shared_memory *hdcp_cmd;
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
+	hdcp_cmd->in_msg.hdcp2_get_encryption_status.session_handle = hdcp->auth.id;
+	hdcp_cmd->out_msg.hdcp2_get_encryption_status.protection_level = 0;
+	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_GET_ENCRYPTION_STATUS;
+	*encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+
+	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
+
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		return MOD_HDCP_STATUS_FAILURE;
+
+	if (hdcp_cmd->out_msg.hdcp2_get_encryption_status.protection_level == 1) {
+		if (hdcp_cmd->out_msg.hdcp2_get_encryption_status.hdcp2_type == TA_HDCP2_CONTENT_TYPE__TYPE1)
+			*encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON;
+		else
+			*encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON;
+	}
+
+	return MOD_HDCP_STATUS_SUCCESS;
+}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
new file mode 100644
index 000000000000..82a5e997d573
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
@@ -0,0 +1,466 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef MODULES_HDCP_HDCP_PSP_H_
+#define MODULES_HDCP_HDCP_PSP_H_
+
+/*
+ * NOTE: These parameters are a one-to-one copy of the
+ * parameters required by PSP
+ */
+enum bgd_security_hdcp_encryption_level {
+	HDCP_ENCRYPTION_LEVEL__INVALID = 0,
+	HDCP_ENCRYPTION_LEVEL__OFF,
+	HDCP_ENCRYPTION_LEVEL__ON
+};
+
+enum bgd_security_hdcp2_content_type {
+	HDCP2_CONTENT_TYPE__INVALID = 0,
+	HDCP2_CONTENT_TYPE__TYPE0,
+	HDCP2_CONTENT_TYPE__TYPE1
+};
+enum ta_dtm_command {
+	TA_DTM_COMMAND__UNUSED_1 = 1,
+	TA_DTM_COMMAND__TOPOLOGY_UPDATE_V2,
+	TA_DTM_COMMAND__TOPOLOGY_ASSR_ENABLE
+};
+
+/* DTM related enumerations */
+/**********************************************************/
+
+enum ta_dtm_status {
+	TA_DTM_STATUS__SUCCESS = 0x00,
+	TA_DTM_STATUS__GENERIC_FAILURE = 0x01,
+	TA_DTM_STATUS__INVALID_PARAMETER = 0x02,
+	TA_DTM_STATUS__NULL_POINTER = 0x3
+};
+
+/* input/output structures for DTM commands */
+/**********************************************************/
+/**
+ * Input structures
+ */
+enum ta_dtm_hdcp_version_max_supported {
+	TA_DTM_HDCP_VERSION_MAX_SUPPORTED__NONE = 0,
+	TA_DTM_HDCP_VERSION_MAX_SUPPORTED__1_x = 10,
+	TA_DTM_HDCP_VERSION_MAX_SUPPORTED__2_0 = 20,
+	TA_DTM_HDCP_VERSION_MAX_SUPPORTED__2_1 = 21,
+	TA_DTM_HDCP_VERSION_MAX_SUPPORTED__2_2 = 22,
+	TA_DTM_HDCP_VERSION_MAX_SUPPORTED__2_3 = 23
+};
+
+struct ta_dtm_topology_update_input_v2 {
+	/* display handle is unique across the driver and is used to identify a display */
+	/* for all security interfaces which reference displays such as HDCP */
+	uint32_t display_handle;
+	uint32_t is_active;
+	uint32_t is_miracast;
+	uint32_t controller;
+	uint32_t ddc_line;
+	uint32_t dig_be;
+	uint32_t dig_fe;
+	uint32_t dp_mst_vcid;
+	uint32_t is_assr;
+	uint32_t max_hdcp_supported_version;
+};
+
+struct ta_dtm_topology_assr_enable {
+	uint32_t display_topology_dig_be_index;
+};
+
+/**
+ * Output structures
+ */
+
+/* No output structures yet */
+
+union ta_dtm_cmd_input {
+	struct ta_dtm_topology_update_input_v2 topology_update_v2;
+	struct ta_dtm_topology_assr_enable topology_assr_enable;
+};
+
+union ta_dtm_cmd_output {
+	uint32_t reserved;
+};
+
+struct ta_dtm_shared_memory {
+	uint32_t cmd_id;
+	uint32_t resp_id;
+	enum ta_dtm_status dtm_status;
+	uint32_t reserved;
+	union ta_dtm_cmd_input dtm_in_message;
+	union ta_dtm_cmd_output dtm_out_message;
+};
+
+int psp_cmd_submit_buf(struct psp_context *psp, struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd,
+		uint64_t fence_mc_addr);
+
+enum ta_hdcp_command {
+	TA_HDCP_COMMAND__INITIALIZE,
+	TA_HDCP_COMMAND__HDCP1_CREATE_SESSION,
+	TA_HDCP_COMMAND__HDCP1_DESTROY_SESSION,
+	TA_HDCP_COMMAND__HDCP1_FIRST_PART_AUTHENTICATION,
+	TA_HDCP_COMMAND__HDCP1_SECOND_PART_AUTHENTICATION,
+	TA_HDCP_COMMAND__HDCP1_ENABLE_ENCRYPTION,
+	TA_HDCP_COMMAND__HDCP1_ENABLE_DP_STREAM_ENCRYPTION,
+	TA_HDCP_COMMAND__HDCP1_GET_ENCRYPTION_STATUS,
+	TA_HDCP_COMMAND__UNUSED_1,
+	TA_HDCP_COMMAND__HDCP2_DESTROY_SESSION,
+	TA_HDCP_COMMAND__UNUSED_2,
+	TA_HDCP_COMMAND__HDCP2_SET_ENCRYPTION,
+	TA_HDCP_COMMAND__HDCP2_GET_ENCRYPTION_STATUS,
+	TA_HDCP_COMMAND__UNUSED_3,
+	TA_HDCP_COMMAND__HDCP2_CREATE_SESSION_V2,
+	TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2,
+	TA_HDCP_COMMAND__HDCP2_ENABLE_DP_STREAM_ENCRYPTION
+};
+
+enum ta_hdcp2_msg_id {
+	TA_HDCP_HDCP2_MSG_ID__NULL_MESSAGE = 1,
+	TA_HDCP_HDCP2_MSG_ID__AKE_INIT = 2,
+	TA_HDCP_HDCP2_MSG_ID__AKE_SEND_CERT = 3,
+	TA_HDCP_HDCP2_MSG_ID__AKE_NO_STORED_KM = 4,
+	TA_HDCP_HDCP2_MSG_ID__AKE_STORED_KM = 5,
+	TA_HDCP_HDCP2_MSG_ID__AKE_SEND_RRX = 6,
+	TA_HDCP_HDCP2_MSG_ID__AKE_SEND_H_PRIME = 7,
+	TA_HDCP_HDCP2_MSG_ID__AKE_SEND_PAIRING_INFO = 8,
+	TA_HDCP_HDCP2_MSG_ID__LC_INIT = 9,
+	TA_HDCP_HDCP2_MSG_ID__LC_SEND_L_PRIME = 10,
+	TA_HDCP_HDCP2_MSG_ID__SKE_SEND_EKS = 11,
+	TA_HDCP_HDCP2_MSG_ID__REPEATERAUTH_SEND_RECEIVERID_LIST = 12,
+	TA_HDCP_HDCP2_MSG_ID__RTT_READY = 13,
+	TA_HDCP_HDCP2_MSG_ID__RTT_CHALLENGE = 14,
+	TA_HDCP_HDCP2_MSG_ID__REPEATERAUTH_SEND_ACK = 15,
+	TA_HDCP_HDCP2_MSG_ID__REPEATERAUTH_STREAM_MANAGE = 16,
+	TA_HDCP_HDCP2_MSG_ID__REPEATERAUTH_STREAM_READY = 17,
+	TA_HDCP_HDCP2_MSG_ID__RECEIVER_AUTH_STATUS = 18,
+	TA_HDCP_HDCP2_MSG_ID__AKE_TRANSMITTER_INFO = 19,
+	TA_HDCP_HDCP2_MSG_ID__AKE_RECEIVER_INFO = 20,
+	TA_HDCP_HDCP2_MSG_ID__SIGNAL_CONTENT_STREAM_TYPE_DP = 129
+};
+
+enum ta_hdcp2_hdcp2_msg_id_max_size {
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__NULL_MESSAGE = 0,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_INIT = 12,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT = 534,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM = 129,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM = 33,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_RRX = 9,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_H_PRIME = 33,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_PAIRING_INFO = 17,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__LC_INIT = 9,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__LC_SEND_L_PRIME = 33,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__SKE_SEND_EKS = 25,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__REPEATERAUTH_SEND_RECEIVERID_LIST = 181,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__RTT_READY = 1,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__RTT_CHALLENGE = 17,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__REPEATERAUTH_SEND_RACK = 17,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__REPEATERAUTH_STREAM_MANAGE = 13,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__REPEATERAUTH_STREAM_READY = 33,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__RECEIVER_AUTH_STATUS = 4,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_TRANSMITTER_INFO = 6,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO = 6,
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__SIGNAL_CONTENT_STREAM_TYPE_DP = 1
+};
+
+/* HDCP related enumerations */
+/**********************************************************/
+#define TA_HDCP__INVALID_SESSION 0xFFFF
+#define TA_HDCP__HDCP1_AN_SIZE 8
+#define TA_HDCP__HDCP1_KSV_SIZE 5
+#define TA_HDCP__HDCP1_KSV_LIST_MAX_ENTRIES 127
+#define TA_HDCP__HDCP1_V_PRIME_SIZE 20
+#define TA_HDCP__HDCP2_TX_BUF_MAX_SIZE                                                                                 \
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6
+
+// 64 bits boundaries
+#define TA_HDCP__HDCP2_RX_BUF_MAX_SIZE                                                                                 \
+	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4
+
+enum ta_hdcp_status {
+	TA_HDCP_STATUS__SUCCESS = 0x00,
+	TA_HDCP_STATUS__GENERIC_FAILURE = 0x01,
+	TA_HDCP_STATUS__NULL_POINTER = 0x02,
+	TA_HDCP_STATUS__FAILED_ALLOCATING_SESSION = 0x03,
+	TA_HDCP_STATUS__FAILED_SETUP_TX = 0x04,
+	TA_HDCP_STATUS__INVALID_PARAMETER = 0x05,
+	TA_HDCP_STATUS__VHX_ERROR = 0x06,
+	TA_HDCP_STATUS__SESSION_NOT_CLOSED_PROPERLY = 0x07,
+	TA_HDCP_STATUS__SRM_FAILURE = 0x08,
+	TA_HDCP_STATUS__MST_AUTHENTICATED_ALREADY_STARTED = 0x09,
+	TA_HDCP_STATUS__AKE_SEND_CERT_FAILURE = 0x0A,
+	TA_HDCP_STATUS__AKE_NO_STORED_KM_FAILURE = 0x0B,
+	TA_HDCP_STATUS__AKE_SEND_HPRIME_FAILURE = 0x0C,
+	TA_HDCP_STATUS__LC_SEND_LPRIME_FAILURE = 0x0D,
+	TA_HDCP_STATUS__SKE_SEND_EKS_FAILURE = 0x0E,
+	TA_HDCP_STATUS__REPAUTH_SEND_RXIDLIST_FAILURE = 0x0F,
+	TA_HDCP_STATUS__REPAUTH_STREAM_READY_FAILURE = 0x10,
+	TA_HDCP_STATUS__ASD_GENERIC_FAILURE = 0x11,
+	TA_HDCP_STATUS__UNWRAP_SECRET_FAILURE = 0x12,
+	TA_HDCP_STATUS__ENABLE_ENCR_FAILURE = 0x13,
+	TA_HDCP_STATUS__DISABLE_ENCR_FAILURE = 0x14,
+	TA_HDCP_STATUS__NOT_ENOUGH_MEMORY_FAILURE = 0x15,
+	TA_HDCP_STATUS__UNKNOWN_MESSAGE = 0x16,
+	TA_HDCP_STATUS__TOO_MANY_STREAM = 0x17
+};
+
+enum ta_hdcp_authentication_status {
+	TA_HDCP_AUTHENTICATION_STATUS__NOT_STARTED = 0x00,
+	TA_HDCP_AUTHENTICATION_STATUS__HDCP1_FIRST_PART_FAILED = 0x01,
+	TA_HDCP_AUTHENTICATION_STATUS__HDCP1_FIRST_PART_COMPLETE = 0x02,
+	TA_HDCP_AUTHENTICATION_STATUS__HDCP1_SECOND_PART_FAILED = 0x03,
+	TA_HDCP_AUTHENTICATION_STATUS__HDCP1_AUTHENTICATED = 0x04,
+	TA_HDCP_AUTHENTICATION_STATUS__HDCP22_AUTHENTICATION_PENDING = 0x06,
+	TA_HDCP_AUTHENTICATION_STATUS__HDCP22_AUTHENTICATION_FAILED = 0x07,
+	TA_HDCP_AUTHENTICATION_STATUS__HDCP22_AUTHENTICATED = 0x08,
+	TA_HDCP_AUTHENTICATION_STATUS__HDCP1_KSV_VALIDATION_FAILED = 0x09
+};
+
+enum ta_hdcp2_msg_authentication_status {
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS = 0,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__KM_NOT_AVAILABLE,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__UNUSED,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__INVALID = 100, // everything above does not fail the request
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__NOT_ENOUGH_MEMORY,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__NOT_EXPECTED_MSG,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__SIGNATURE_CERTIFICAT_ERROR,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__INCORRECT_HDCP_VERSION,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__UNKNOWN_MESSAGE,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__INVALID_HMAC,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__INVALID_TOPOLOGY,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__INVALID_SEQ_NUM,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__INVALID_SIZE,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__INVALID_LENGTH,
+	TA_HDCP2_MSG_AUTHENTICATION_STATUS__REAUTH_REQUEST
+};
+
+enum ta_hdcp_content_type {
+	TA_HDCP2_CONTENT_TYPE__TYPE0 = 1,
+	TA_HDCP2_CONTENT_TYPE__TYPE1,
+};
+
+enum ta_hdcp_content_type_negotiation_type {
+	TA_HDCP2_CONTENT_TYPE_NEGOTIATION_TYPE__FORCE_TYPE0 = 1,
+	TA_HDCP2_CONTENT_TYPE_NEGOTIATION_TYPE__FORCE_TYPE1,
+	TA_HDCP2_CONTENT_TYPE_NEGOTIATION_TYPE__MAX_SUPPORTED
+};
+
+enum ta_hdcp2_version {
+	TA_HDCP2_VERSION_UNKNOWN = 0,
+	TA_HDCP2_VERSION_2_0 = 20,
+	TA_HDCP2_VERSION_2_1 = 21,
+	TA_HDCP2_VERSION_2_2 = 22
+};
+
+/* input/output structures for HDCP commands */
+/**********************************************************/
+struct ta_hdcp_cmd_hdcp1_create_session_input {
+	uint8_t display_handle;
+};
+
+struct ta_hdcp_cmd_hdcp1_create_session_output {
+	uint32_t session_handle;
+	uint8_t an_primary[TA_HDCP__HDCP1_AN_SIZE];
+	uint8_t aksv_primary[TA_HDCP__HDCP1_KSV_SIZE];
+	uint8_t ainfo_primary;
+	uint8_t an_secondary[TA_HDCP__HDCP1_AN_SIZE];
+	uint8_t aksv_secondary[TA_HDCP__HDCP1_KSV_SIZE];
+	uint8_t ainfo_secondary;
+};
+
+struct ta_hdcp_cmd_hdcp1_destroy_session_input {
+	uint32_t session_handle;
+};
+
+struct ta_hdcp_cmd_hdcp1_first_part_authentication_input {
+	uint32_t session_handle;
+	uint8_t bksv_primary[TA_HDCP__HDCP1_KSV_SIZE];
+	uint8_t bksv_secondary[TA_HDCP__HDCP1_KSV_SIZE];
+	uint8_t bcaps;
+	uint16_t r0_prime_primary;
+	uint16_t r0_prime_secondary;
+};
+
+struct ta_hdcp_cmd_hdcp1_first_part_authentication_output {
+	enum ta_hdcp_authentication_status authentication_status;
+};
+
+struct ta_hdcp_cmd_hdcp1_second_part_authentication_input {
+	uint32_t session_handle;
+	uint16_t bstatus_binfo;
+	uint8_t ksv_list[TA_HDCP__HDCP1_KSV_LIST_MAX_ENTRIES][TA_HDCP__HDCP1_KSV_SIZE];
+	uint32_t ksv_list_size;
+	uint8_t pj_prime;
+	uint8_t v_prime[TA_HDCP__HDCP1_V_PRIME_SIZE];
+};
+
+struct ta_hdcp_cmd_hdcp1_second_part_authentication_output {
+	enum ta_hdcp_authentication_status authentication_status;
+};
+
+struct ta_hdcp_cmd_hdcp1_enable_encryption_input {
+	uint32_t session_handle;
+};
+
+struct ta_hdcp_cmd_hdcp1_enable_dp_stream_encryption_input {
+	uint32_t session_handle;
+	uint32_t display_handle;
+};
+
+struct ta_hdcp_cmd_hdcp1_get_encryption_status_input {
+	uint32_t session_handle;
+};
+
+struct ta_hdcp_cmd_hdcp1_get_encryption_status_output {
+	uint32_t protection_level;
+};
+
+struct ta_hdcp_cmd_hdcp2_create_session_input_v2 {
+	uint32_t display_handle;
+	enum ta_hdcp_content_type_negotiation_type negotiate_content_type;
+};
+
+struct ta_hdcp_cmd_hdcp2_create_session_output_v2 {
+	uint32_t session_handle;
+};
+
+struct ta_hdcp_cmd_hdcp2_destroy_session_input {
+	uint32_t session_handle;
+};
+
+struct ta_hdcp_cmd_hdcp2_authentication_message_v2 {
+	enum ta_hdcp2_msg_id msg_id;
+	uint32_t msg_size;
+};
+
+struct ta_hdcp_cmd_hdcp2_process_authentication_message_input_v2 {
+	struct ta_hdcp_cmd_hdcp2_authentication_message_v2 msg1_desc;
+	struct ta_hdcp_cmd_hdcp2_authentication_message_v2 msg2_desc;
+	struct ta_hdcp_cmd_hdcp2_authentication_message_v2 msg3_desc;
+	uint8_t receiver_message[TA_HDCP__HDCP2_RX_BUF_MAX_SIZE];
+};
+
+struct ta_hdcp_cmd_hdcp2_process_authentication_message_output_v2 {
+	uint32_t hdcp_version;
+	uint32_t is_km_stored;
+	uint32_t is_locality_precompute_support;
+	uint32_t is_repeater;
+	enum ta_hdcp2_msg_authentication_status msg1_status;
+	enum ta_hdcp2_msg_authentication_status msg2_status;
+	enum ta_hdcp2_msg_authentication_status msg3_status;
+};
+
+struct ta_hdcp_cmd_hdcp2_prepare_authentication_message_input_v2 {
+	enum ta_hdcp2_msg_id msg1_id;
+	enum ta_hdcp2_msg_id msg2_id;
+};
+
+struct ta_hdcp_cmd_hdcp2_prepare_authentication_message_output_v2 {
+	enum ta_hdcp2_msg_authentication_status msg1_status;
+	enum ta_hdcp2_msg_authentication_status msg2_status;
+	struct ta_hdcp_cmd_hdcp2_authentication_message_v2 msg1_desc;
+	struct ta_hdcp_cmd_hdcp2_authentication_message_v2 msg2_desc;
+	uint8_t transmitter_message[TA_HDCP__HDCP2_TX_BUF_MAX_SIZE];
+};
+
+struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 {
+	uint32_t session_handle;
+	struct ta_hdcp_cmd_hdcp2_process_authentication_message_input_v2 process;
+	struct ta_hdcp_cmd_hdcp2_prepare_authentication_message_input_v2 prepare;
+};
+
+struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 {
+	uint32_t authentication_status;
+	struct ta_hdcp_cmd_hdcp2_process_authentication_message_output_v2 process;
+	struct ta_hdcp_cmd_hdcp2_prepare_authentication_message_output_v2 prepare;
+};
+
+struct ta_hdcp_cmd_hdcp2_set_encryption_input {
+	uint32_t session_handle;
+};
+
+struct ta_hdcp_cmd_hdcp2_get_encryption_status_input {
+	uint32_t session_handle;
+};
+
+struct ta_hdcp_cmd_hdcp2_get_encryption_status_output {
+	enum ta_hdcp_content_type hdcp2_type;
+	uint32_t protection_level;
+};
+
+struct ta_hdcp_cmd_hdcp2_enable_dp_stream_encryption_input {
+	uint32_t session_handle;
+	uint32_t display_handle;
+};
+
+/**********************************************************/
+/* Common input structure for HDCP callbacks */
+union ta_hdcp_cmd_input {
+	struct ta_hdcp_cmd_hdcp1_create_session_input hdcp1_create_session;
+	struct ta_hdcp_cmd_hdcp1_destroy_session_input hdcp1_destroy_session;
+	struct ta_hdcp_cmd_hdcp1_first_part_authentication_input hdcp1_first_part_authentication;
+	struct ta_hdcp_cmd_hdcp1_second_part_authentication_input hdcp1_second_part_authentication;
+	struct ta_hdcp_cmd_hdcp1_enable_encryption_input hdcp1_enable_encryption;
+	struct ta_hdcp_cmd_hdcp1_enable_dp_stream_encryption_input hdcp1_enable_dp_stream_encryption;
+	struct ta_hdcp_cmd_hdcp1_get_encryption_status_input hdcp1_get_encryption_status;
+	struct ta_hdcp_cmd_hdcp2_destroy_session_input hdcp2_destroy_session;
+	struct ta_hdcp_cmd_hdcp2_set_encryption_input hdcp2_set_encryption;
+	struct ta_hdcp_cmd_hdcp2_get_encryption_status_input hdcp2_get_encryption_status;
+	struct ta_hdcp_cmd_hdcp2_create_session_input_v2 hdcp2_create_session_v2;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2
+		hdcp2_prepare_process_authentication_message_v2;
+	struct ta_hdcp_cmd_hdcp2_enable_dp_stream_encryption_input hdcp2_enable_dp_stream_encryption;
+};
+
+/* Common output structure for HDCP callbacks */
+union ta_hdcp_cmd_output {
+	struct ta_hdcp_cmd_hdcp1_create_session_output hdcp1_create_session;
+	struct ta_hdcp_cmd_hdcp1_first_part_authentication_output hdcp1_first_part_authentication;
+	struct ta_hdcp_cmd_hdcp1_second_part_authentication_output hdcp1_second_part_authentication;
+	struct ta_hdcp_cmd_hdcp1_get_encryption_status_output hdcp1_get_encryption_status;
+	struct ta_hdcp_cmd_hdcp2_get_encryption_status_output hdcp2_get_encryption_status;
+	struct ta_hdcp_cmd_hdcp2_create_session_output_v2 hdcp2_create_session_v2;
+	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2
+		hdcp2_prepare_process_authentication_message_v2;
+};
+/**********************************************************/
+
+struct ta_hdcp_shared_memory {
+	uint32_t cmd_id;
+	enum ta_hdcp_status hdcp_status;
+	uint32_t reserved;
+	union ta_hdcp_cmd_input in_msg;
+	union ta_hdcp_cmd_output out_msg;
+};
+
+enum psp_status {
+	PSP_STATUS__SUCCESS = 0,
+	PSP_STATUS__ERROR_INVALID_PARAMS,
+	PSP_STATUS__ERROR_GENERIC,
+	PSP_STATUS__ERROR_OUT_OF_MEMORY,
+	PSP_STATUS__ERROR_UNSUPPORTED_FEATURE
+};
+
+#endif /* MODULES_HDCP_HDCP_PSP_H_ */
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
index dc187844d10b..dbe7835aabcf 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
@@ -92,6 +92,7 @@ struct mod_vrr_params_btr {
 	uint32_t inserted_duration_in_us;
 	uint32_t frames_to_insert;
 	uint32_t frame_counter;
+	uint32_t margin_in_us;
 };
 
 struct mod_vrr_params_fixed_refresh {
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
new file mode 100644
index 000000000000..f2a0e1a064da
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
@@ -0,0 +1,298 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef MOD_HDCP_H_
+#define MOD_HDCP_H_
+
+#include "os_types.h"
+#include "signal_types.h"
+
+/* Forward Declarations */
+struct mod_hdcp;
+
+#define MAX_NUM_OF_DISPLAYS 6
+#define MAX_NUM_OF_ATTEMPTS 4
+#define MAX_NUM_OF_ERROR_TRACE 10
+
+/* detailed return status */
+enum mod_hdcp_status {
+	MOD_HDCP_STATUS_SUCCESS = 0,
+	MOD_HDCP_STATUS_FAILURE,
+	MOD_HDCP_STATUS_RESET_NEEDED,
+	MOD_HDCP_STATUS_DISPLAY_OUT_OF_BOUND,
+	MOD_HDCP_STATUS_DISPLAY_NOT_FOUND,
+	MOD_HDCP_STATUS_INVALID_STATE,
+	MOD_HDCP_STATUS_NOT_IMPLEMENTED,
+	MOD_HDCP_STATUS_INTERNAL_POLICY_FAILURE,
+	MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE,
+	MOD_HDCP_STATUS_CREATE_PSP_SERVICE_FAILURE,
+	MOD_HDCP_STATUS_DESTROY_PSP_SERVICE_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_CREATE_SESSION_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_DESTROY_SESSION_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_VALIDATE_ENCRYPTION_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_NOT_HDCP_REPEATER,
+	MOD_HDCP_STATUS_HDCP1_NOT_CAPABLE,
+	MOD_HDCP_STATUS_HDCP1_R0_PRIME_PENDING,
+	MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_KSV_LIST_NOT_READY,
+	MOD_HDCP_STATUS_HDCP1_VALIDATE_KSV_LIST_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION,
+	MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_MAX_CASCADE_EXCEEDED_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_MAX_DEVS_EXCEEDED_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_LINK_INTEGRITY_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_REAUTH_REQUEST_ISSUED,
+	MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE,
+	MOD_HDCP_STATUS_HDCP1_INVALID_BKSV,
+	MOD_HDCP_STATUS_DDC_FAILURE, /* TODO: specific errors */
+	MOD_HDCP_STATUS_INVALID_OPERATION,
+	MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE,
+	MOD_HDCP_STATUS_HDCP2_CREATE_SESSION_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_DESTROY_SESSION_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_PREP_AKE_INIT_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_AKE_CERT_PENDING,
+	MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING,
+	MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING,
+	MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_AKE_CERT_REVOKED,
+	MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_VALIDATE_PAIRING_INFO_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_PREP_LC_INIT_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_L_PRIME_PENDING,
+	MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_PREP_EKS_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_ENABLE_ENCRYPTION_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY,
+	MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED,
+	MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION,
+	MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING,
+	MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_PREPARE_STREAM_MANAGEMENT_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_REAUTH_REQUEST,
+	MOD_HDCP_STATUS_HDCP2_REAUTH_LINK_INTEGRITY_FAILURE,
+	MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE,
+};
+
+struct mod_hdcp_displayport {
+	uint8_t rev;
+	uint8_t assr_supported;
+};
+
+struct mod_hdcp_hdmi {
+	uint8_t reserved;
+};
+enum mod_hdcp_operation_mode {
+	MOD_HDCP_MODE_OFF,
+	MOD_HDCP_MODE_DEFAULT,
+	MOD_HDCP_MODE_DP,
+	MOD_HDCP_MODE_DP_MST
+};
+
+enum mod_hdcp_display_state {
+	MOD_HDCP_DISPLAY_INACTIVE = 0,
+	MOD_HDCP_DISPLAY_ACTIVE,
+	MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED,
+	MOD_HDCP_DISPLAY_ENCRYPTION_ENABLED
+};
+
+struct mod_hdcp_ddc {
+	void *handle;
+	struct {
+		bool (*read_i2c)(void *handle,
+				uint32_t address,
+				uint8_t offset,
+				uint8_t *data,
+				uint32_t size);
+		bool (*write_i2c)(void *handle,
+				uint32_t address,
+				const uint8_t *data,
+				uint32_t size);
+		bool (*read_dpcd)(void *handle,
+				uint32_t address,
+				uint8_t *data,
+				uint32_t size);
+		bool (*write_dpcd)(void *handle,
+				uint32_t address,
+				const uint8_t *data,
+				uint32_t size);
+	} funcs;
+};
+
+struct mod_hdcp_psp {
+	void *handle;
+	void *funcs;
+};
+
+struct mod_hdcp_display_adjustment {
+	uint8_t disable			: 1;
+	uint8_t reserved		: 7;
+};
+
+struct mod_hdcp_link_adjustment_hdcp1 {
+	uint8_t disable			: 1;
+	uint8_t postpone_encryption	: 1;
+	uint8_t reserved		: 6;
+};
+
+enum mod_hdcp_force_hdcp_type {
+	MOD_HDCP_FORCE_TYPE_MAX = 0,
+	MOD_HDCP_FORCE_TYPE_0,
+	MOD_HDCP_FORCE_TYPE_1
+};
+
+struct mod_hdcp_link_adjustment_hdcp2 {
+	uint8_t disable			: 1;
+	uint8_t force_type		: 2;
+	uint8_t force_no_stored_km	: 1;
+	uint8_t increase_h_prime_timeout: 1;
+	uint8_t reserved		: 3;
+};
+
+struct mod_hdcp_link_adjustment {
+	uint8_t auth_delay;
+	struct mod_hdcp_link_adjustment_hdcp1 hdcp1;
+	struct mod_hdcp_link_adjustment_hdcp2 hdcp2;
+};
+
+struct mod_hdcp_error {
+	enum mod_hdcp_status status;
+	uint8_t state_id;
+};
+
+struct mod_hdcp_trace {
+	struct mod_hdcp_error errors[MAX_NUM_OF_ERROR_TRACE];
+	uint8_t error_count;
+};
+
+enum mod_hdcp_encryption_status {
+	MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF = 0,
+	MOD_HDCP_ENCRYPTION_STATUS_HDCP1_ON,
+	MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON,
+	MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON,
+	MOD_HDCP_ENCRYPTION_STATUS_HDCP2_ON
+};
+
+/* per link events dm has to notify to hdcp module */
+enum mod_hdcp_event {
+	MOD_HDCP_EVENT_CALLBACK = 0,
+	MOD_HDCP_EVENT_WATCHDOG_TIMEOUT,
+	MOD_HDCP_EVENT_CPIRQ
+};
+
+/* output flags from module requesting timer operations */
+struct mod_hdcp_output {
+	uint8_t callback_needed;
+	uint8_t callback_stop;
+	uint8_t watchdog_timer_needed;
+	uint8_t watchdog_timer_stop;
+	uint16_t callback_delay;
+	uint16_t watchdog_timer_delay;
+};
+
+/* used to represent per display info */
+struct mod_hdcp_display {
+	enum mod_hdcp_display_state state;
+	uint8_t index;
+	uint8_t controller;
+	uint8_t dig_fe;
+	union {
+		uint8_t vc_id;
+	};
+	struct mod_hdcp_display_adjustment adjust;
+};
+
+/* used to represent per link info */
+/* in case a link has multiple displays, they share the same link info */
+struct mod_hdcp_link {
+	enum mod_hdcp_operation_mode mode;
+	uint8_t dig_be;
+	uint8_t ddc_line;
+	union {
+		struct mod_hdcp_displayport dp;
+		struct mod_hdcp_hdmi hdmi;
+	};
+	struct mod_hdcp_link_adjustment adjust;
+};
+
+/* a query structure for a display's hdcp information */
+struct mod_hdcp_display_query {
+	const struct mod_hdcp_display *display;
+	const struct mod_hdcp_link *link;
+	const struct mod_hdcp_trace *trace;
+	enum mod_hdcp_encryption_status encryption_status;
+};
+
+/* contains values per on external display configuration change */
+struct mod_hdcp_config {
+	struct mod_hdcp_psp psp;
+	struct mod_hdcp_ddc ddc;
+	uint8_t index;
+};
+
+struct mod_hdcp;
+
+/* dm allocates memory of mod_hdcp per dc_link on dm init based on memory size*/
+size_t mod_hdcp_get_memory_size(void);
+
+/* called per link on link creation */
+enum mod_hdcp_status mod_hdcp_setup(struct mod_hdcp *hdcp,
+		struct mod_hdcp_config *config);
+
+/* called per link on link destroy */
+enum mod_hdcp_status mod_hdcp_teardown(struct mod_hdcp *hdcp);
+
+/* called per display on cp_desired set to true */
+enum mod_hdcp_status mod_hdcp_add_display(struct mod_hdcp *hdcp,
+		struct mod_hdcp_link *link, struct mod_hdcp_display *display,
+		struct mod_hdcp_output *output);
+
+/* called per display on cp_desired set to false */
+enum mod_hdcp_status mod_hdcp_remove_display(struct mod_hdcp *hdcp,
+		uint8_t index, struct mod_hdcp_output *output);
+
+/* called to query hdcp information on a specific index */
+enum mod_hdcp_status mod_hdcp_query_display(struct mod_hdcp *hdcp,
+		uint8_t index, struct mod_hdcp_display_query *query);
+
+/* called per link on connectivity change */
+enum mod_hdcp_status mod_hdcp_reset_connection(struct mod_hdcp *hdcp,
+		struct mod_hdcp_output *output);
+
+/* called per link on events (i.e. callback, watchdog, CP_IRQ) */
+enum mod_hdcp_status mod_hdcp_process_event(struct mod_hdcp *hdcp,
+		enum mod_hdcp_event event, struct mod_hdcp_output *output);
+
+/* called to convert enum mod_hdcp_status to c string */
+char *mod_hdcp_status_to_str(int32_t status);
+
+/* called to convert state id to c string */
+char *mod_hdcp_state_id_to_str(int32_t id);
+
+/* called to convert signal type to operation mode */
+enum mod_hdcp_operation_mode mod_hdcp_signal_type_to_operation_mode(
+		enum signal_type signal);
+#endif /* MOD_HDCP_H_ */
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
index d930bdecb117..42cbeffac640 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
@@ -26,6 +26,7 @@
 #ifndef MOD_INFO_PACKET_H_
 #define MOD_INFO_PACKET_H_
 
+#include "dm_services.h"
 #include "mod_shared.h"
 //Forward Declarations
 struct dc_stream_state;
@@ -33,6 +34,10 @@ struct dc_info_packet;
 struct mod_vrr_params;
 
 void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
-		struct dc_info_packet *info_packet);
+		struct dc_info_packet *info_packet,
+		bool *use_vsc_sdp_for_colorimetry);
+
+void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream,
+		struct dc_info_packet *info_packet, int ALLMEnabled, int ALLMValue);
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h
index b45f7d65e76a..fe2117904329 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h
@@ -45,7 +45,6 @@ enum vrr_packet_type {
 	PACKET_TYPE_VTEM
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 union lut3d_control_flags {
 	unsigned int raw;
 	struct {
@@ -104,6 +103,5 @@ struct lut3d_settings {
 	enum lut3d_control_gamut_map map2;
 	enum lut3d_control_rotation_mode rotation2;
 };
-#endif
 
 #endif /* MOD_SHARED_H_ */
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index d885d642ed7f..6a8a056424b8 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -30,7 +30,22 @@
 #include "mod_freesync.h"
 #include "dc.h"
 
+enum vsc_packet_revision {
+	vsc_packet_undefined = 0,
+	//01h = VSC SDP supports only 3D stereo.
+	vsc_packet_rev1 = 1,
+	//02h = 3D stereo + PSR.
+	vsc_packet_rev2 = 2,
+	//03h = 3D stereo + PSR2.
+	vsc_packet_rev3 = 3,
+	//04h = 3D stereo + PSR/PSR2 + Y-coordinate.
+	vsc_packet_rev4 = 4,
+	//05h = 3D stereo + PSR/PSR2 + Y-coordinate + Pixel Encoding/Colorimetry Format
+	vsc_packet_rev5 = 5,
+};
+
 #define HDMI_INFOFRAME_TYPE_VENDOR 0x81
+#define HF_VSIF_VERSION 1
 
 // VTEM Byte Offset
 #define VTEM_PB0		0
@@ -115,35 +130,41 @@ enum ColorimetryYCCDP {
 };
 
 void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
-		struct dc_info_packet *info_packet)
+		struct dc_info_packet *info_packet,
+		bool *use_vsc_sdp_for_colorimetry)
 {
-	unsigned int vscPacketRevision = 0;
+	unsigned int vsc_packet_revision = vsc_packet_undefined;
 	unsigned int i;
 	unsigned int pixelEncoding = 0;
 	unsigned int colorimetryFormat = 0;
 	bool stereo3dSupport = false;
 
+	/* Initialize first, later if infopacket is valid determine if VSC SDP
+	 * should be used to signal colorimetry format and pixel encoding.
+	 */
+	*use_vsc_sdp_for_colorimetry = false;
+
 	if (stream->timing.timing_3d_format != TIMING_3D_FORMAT_NONE && stream->view_format != VIEW_3D_FORMAT_NONE) {
-		vscPacketRevision = 1;
+		vsc_packet_revision = vsc_packet_rev1;
 		stereo3dSupport = true;
 	}
 
 	/*VSC packet set to 2 when DP revision >= 1.2*/
 	if (stream->psr_version != 0)
-		vscPacketRevision = 2;
+		vsc_packet_revision = vsc_packet_rev2;
 
 	/* Update to revision 5 for extended colorimetry support for DPCD 1.4+ */
 	if (stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
 			stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
-		vscPacketRevision = 5;
+		vsc_packet_revision = vsc_packet_rev5;
 
 	/* VSC packet not needed based on the features
 	 * supported by this DP display
 	 */
-	if (vscPacketRevision == 0)
+	if (vsc_packet_revision == vsc_packet_undefined)
 		return;
 
-	if (vscPacketRevision == 0x2) {
+	if (vsc_packet_revision == vsc_packet_rev2) {
 		/* Secondary-data Packet ID = 0*/
 		info_packet->hb0 = 0x00;
 		/* 07h - Packet Type Value indicating Video
@@ -165,7 +186,7 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 		info_packet->valid = true;
 	}
 
-	if (vscPacketRevision == 0x1) {
+	if (vsc_packet_revision == vsc_packet_rev1) {
 
 		info_packet->hb0 = 0x00;	// Secondary-data Packet ID = 0
 		info_packet->hb1 = 0x07;	// 07h = Packet Type Value indicating Video Stream Configuration packet
@@ -236,7 +257,7 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 	 *   the Pixel Encoding/Colorimetry Format and that a Sink device must ignore MISC1, bit 7, and
 	 *   MISC0, bits 7:1 (MISC1, bit 7. and MISC0, bits 7:1 become "don't care").)
 	 */
-	if (vscPacketRevision == 0x5) {
+	if (vsc_packet_revision == vsc_packet_rev5) {
 		/* Secondary-data Packet ID = 0 */
 		info_packet->hb0 = 0x00;
 		/* 07h - Packet Type Value indicating Video Stream Configuration packet */
@@ -248,6 +269,13 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 
 		info_packet->valid = true;
 
+		/* If we are using VSC SDP revision 05h, use this to signal for
+		 * colorimetry format and pixel encoding. HW should later be
+		 * programmed to set MSA MISC1 bit 6 to indicate ignore
+		 * colorimetry format and pixel encoding in the MSA.
+		 */
+		*use_vsc_sdp_for_colorimetry = true;
+
 		/* Set VSC SDP fields for pixel encoding and colorimetry format from DP 1.3 specs
 		 * Data Bytes DB 18~16
 		 * Bits 3:0 (Colorimetry Format)        |  Bits 7:4 (Pixel Encoding)
@@ -392,6 +420,102 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 		 */
 		info_packet->sb[18] = 0;
 	}
+}
+
+/**
+ *****************************************************************************
+ *  Function: mod_build_hf_vsif_infopacket
+ *
+ *  @brief
+ *     Prepare HDMI Vendor Specific info frame.
+ *     Follows HDMI Spec to build up Vendor Specific info frame
+ *
+ *  @param [in] stream: contains data we may need to construct VSIF (i.e. timing_3d_format, etc.)
+ *  @param [out] info_packet:   output structure where to store VSIF
+ *****************************************************************************
+ */
+void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream,
+		struct dc_info_packet *info_packet, int ALLMEnabled, int ALLMValue)
+{
+		unsigned int length = 5;
+		bool hdmi_vic_mode = false;
+		uint8_t checksum = 0;
+		uint32_t i = 0;
+		enum dc_timing_3d_format format;
+		bool bALLM = (bool)ALLMEnabled;
+		bool bALLMVal = (bool)ALLMValue;
+
+		info_packet->valid = false;
+		format = stream->timing.timing_3d_format;
+		if (stream->view_format == VIEW_3D_FORMAT_NONE)
+			format = TIMING_3D_FORMAT_NONE;
+
+		if (stream->timing.hdmi_vic != 0
+				&& stream->timing.h_total >= 3840
+				&& stream->timing.v_total >= 2160
+				&& format == TIMING_3D_FORMAT_NONE)
+			hdmi_vic_mode = true;
+
+		if ((format == TIMING_3D_FORMAT_NONE) && !hdmi_vic_mode && !bALLM)
+			return;
+
+		info_packet->sb[1] = 0x03;
+		info_packet->sb[2] = 0x0C;
+		info_packet->sb[3] = 0x00;
+
+		if (bALLM) {
+			info_packet->sb[1] = 0xD8;
+			info_packet->sb[2] = 0x5D;
+			info_packet->sb[3] = 0xC4;
+			info_packet->sb[4] = HF_VSIF_VERSION;
+		}
+
+		if (format != TIMING_3D_FORMAT_NONE)
+			info_packet->sb[4] = (2 << 5);
+
+		else if (hdmi_vic_mode)
+			info_packet->sb[4] = (1 << 5);
+
+		switch (format) {
+		case TIMING_3D_FORMAT_HW_FRAME_PACKING:
+		case TIMING_3D_FORMAT_SW_FRAME_PACKING:
+			info_packet->sb[5] = (0x0 << 4);
+			break;
 
+		case TIMING_3D_FORMAT_SIDE_BY_SIDE:
+		case TIMING_3D_FORMAT_SBS_SW_PACKED:
+			info_packet->sb[5] = (0x8 << 4);
+			length = 6;
+			break;
+
+		case TIMING_3D_FORMAT_TOP_AND_BOTTOM:
+		case TIMING_3D_FORMAT_TB_SW_PACKED:
+			info_packet->sb[5] = (0x6 << 4);
+			break;
+
+		default:
+			break;
+		}
+
+		if (hdmi_vic_mode)
+			info_packet->sb[5] = stream->timing.hdmi_vic;
+
+		info_packet->hb0 = HDMI_INFOFRAME_TYPE_VENDOR;
+		info_packet->hb1 = 0x01;
+		info_packet->hb2 = (uint8_t) (length);
+
+		if (bALLM)
+			info_packet->sb[5] = (info_packet->sb[5] & ~0x02) | (bALLMVal << 1);
+
+		checksum += info_packet->hb0;
+		checksum += info_packet->hb1;
+		checksum += info_packet->hb2;
+
+		for (i = 1; i <= length; i++)
+			checksum += info_packet->sb[i];
+
+		info_packet->sb[0] = (uint8_t) (0x100 - checksum);
+
+		info_packet->valid = true;
 }
 
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index 05e2be856037..e75a4bb94488 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -80,18 +80,18 @@ struct abm_parameters {
 
 static const struct abm_parameters abm_settings_config0[abm_defines_max_level] = {
 //  min_red  max_red  bright_pos  dark_pos  brightness_gain  contrast  deviation  min_knee  max_knee
-	{0xff,   0xbf,    0x20,       0x00,     0xff,            0x99,     0xb3,      0x40,     0xE0},
-	{0xff,   0x85,    0x20,       0x00,     0xff,            0x90,     0xa8,      0x40,     0xE0},
-	{0xff,   0x40,    0x20,       0x00,     0xff,            0x90,     0x68,      0x40,     0xE0},
-	{0x82,   0x4d,    0x20,       0x00,     0x00,            0x90,     0xb3,      0x70,     0x70},
+	{0xff,   0xbf,    0x20,       0x00,     0xff,            0x99,     0xb3,      0x40,     0xe0},
+	{0xde,   0x85,    0x20,       0x00,     0xff,            0x90,     0xa8,      0x40,     0xdf},
+	{0xb0,   0x50,    0x20,       0x00,     0xc0,            0x88,     0x78,      0x70,     0xa0},
+	{0x82,   0x40,    0x20,       0x00,     0x00,            0xff,     0xb3,      0x70,     0x70},
 };
 
 static const struct abm_parameters abm_settings_config1[abm_defines_max_level] = {
 //  min_red  max_red  bright_pos  dark_pos  brightness_gain  contrast  deviation  min_knee  max_knee
-	{0xf0,   0xd9,    0x20,       0x00,     0x00,            0xa8,     0xb3,      0x70,     0x70},
-	{0xcd,   0xa5,    0x20,       0x00,     0x00,            0xa8,     0xb3,      0x70,     0x70},
-	{0x99,   0x65,    0x20,       0x00,     0x00,            0xa8,     0xb3,      0x70,     0x70},
-	{0x82,   0x4d,    0x20,       0x00,     0x00,            0xa8,     0xb3,      0x70,     0x70},
+	{0xf0,   0xd9,    0x20,       0x00,     0x00,            0xff,     0xb3,      0x70,     0x70},
+	{0xcd,   0xa5,    0x20,       0x00,     0x00,            0xff,     0xb3,      0x70,     0x70},
+	{0x99,   0x65,    0x20,       0x00,     0x00,            0xff,     0xb3,      0x70,     0x70},
+	{0x82,   0x4d,    0x20,       0x00,     0x00,            0xff,     0xb3,      0x70,     0x70},
 };
 
 static const struct abm_parameters * const abm_settings[] = {
@@ -115,7 +115,7 @@ static const struct abm_parameters * const abm_settings[] = {
 /* NOTE: iRAM is 256B in size */
 struct iram_table_v_2 {
 	/* flags                      */
-	uint16_t flags;							/* 0x00 U16  */
+	uint16_t min_abm_backlight;					/* 0x00 U16  */
 
 	/* parameters for ABM2.0 algorithm */
 	uint8_t min_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL];		/* 0x02 U0.8 */
@@ -140,10 +140,10 @@ struct iram_table_v_2 {
 
 	/* For reading PSR State directly from IRAM */
 	uint8_t psr_state;						/* 0xf0       */
-	uint8_t dmcu_mcp_interface_version;							/* 0xf1       */
-	uint8_t dmcu_abm_feature_version;							/* 0xf2       */
-	uint8_t dmcu_psr_feature_version;							/* 0xf3       */
-	uint16_t dmcu_version;										/* 0xf4       */
+	uint8_t dmcu_mcp_interface_version;				/* 0xf1       */
+	uint8_t dmcu_abm_feature_version;				/* 0xf2       */
+	uint8_t dmcu_psr_feature_version;				/* 0xf3       */
+	uint16_t dmcu_version;						/* 0xf4       */
 	uint8_t dmcu_state;						/* 0xf6       */
 
 	uint16_t blRampReduction;					/* 0xf7       */
@@ -164,42 +164,43 @@ struct iram_table_v_2_2 {
 	uint8_t max_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL];		/* 0x16 U0.8 */
 	uint8_t bright_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL];	/* 0x2a U2.6 */
 	uint8_t dark_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL];		/* 0x3e U2.6 */
-	uint8_t hybrid_factor[NUM_AGGR_LEVEL];						/* 0x52 U0.8 */
-	uint8_t contrast_factor[NUM_AGGR_LEVEL];					/* 0x56 U0.8 */
-	uint8_t deviation_gain[NUM_AGGR_LEVEL];						/* 0x5a U0.8 */
-	uint8_t iir_curve[NUM_AMBI_LEVEL];							/* 0x5e U0.8 */
-	uint8_t min_knee[NUM_AGGR_LEVEL];							/* 0x63 U0.8 */
-	uint8_t max_knee[NUM_AGGR_LEVEL];							/* 0x67 U0.8 */
-	uint8_t pad[21];											/* 0x6b U0.8 */
+	uint8_t hybrid_factor[NUM_AGGR_LEVEL];				/* 0x52 U0.8 */
+	uint8_t contrast_factor[NUM_AGGR_LEVEL];			/* 0x56 U0.8 */
+	uint8_t deviation_gain[NUM_AGGR_LEVEL];				/* 0x5a U0.8 */
+	uint8_t iir_curve[NUM_AMBI_LEVEL];				/* 0x5e U0.8 */
+	uint8_t min_knee[NUM_AGGR_LEVEL];				/* 0x63 U0.8 */
+	uint8_t max_knee[NUM_AGGR_LEVEL];				/* 0x67 U0.8 */
+	uint16_t min_abm_backlight;					/* 0x6b U16  */
+	uint8_t pad[19];						/* 0x6d U0.8 */
 
 	/* parameters for crgb conversion */
-	uint16_t crgb_thresh[NUM_POWER_FN_SEGS];					/* 0x80 U3.13 */
-	uint16_t crgb_offset[NUM_POWER_FN_SEGS];					/* 0x90 U1.15 */
-	uint16_t crgb_slope[NUM_POWER_FN_SEGS];						/* 0xa0 U4.12 */
+	uint16_t crgb_thresh[NUM_POWER_FN_SEGS];			/* 0x80 U3.13 */
+	uint16_t crgb_offset[NUM_POWER_FN_SEGS];			/* 0x90 U1.15 */
+	uint16_t crgb_slope[NUM_POWER_FN_SEGS];				/* 0xa0 U4.12 */
 
 	/* parameters for custom curve */
 	/* thresholds for brightness --> backlight */
-	uint16_t backlight_thresholds[NUM_BL_CURVE_SEGS];			/* 0xb0 U16.0 */
+	uint16_t backlight_thresholds[NUM_BL_CURVE_SEGS];		/* 0xb0 U16.0 */
 	/* offsets for brightness --> backlight */
-	uint16_t backlight_offsets[NUM_BL_CURVE_SEGS];				/* 0xd0 U16.0 */
+	uint16_t backlight_offsets[NUM_BL_CURVE_SEGS];			/* 0xd0 U16.0 */
 
 	/* For reading PSR State directly from IRAM */
-	uint8_t psr_state;											/* 0xf0       */
-	uint8_t dmcu_mcp_interface_version;							/* 0xf1       */
-	uint8_t dmcu_abm_feature_version;							/* 0xf2       */
-	uint8_t dmcu_psr_feature_version;							/* 0xf3       */
-	uint16_t dmcu_version;										/* 0xf4       */
-	uint8_t dmcu_state;											/* 0xf6       */
-
-	uint8_t dummy1;												/* 0xf7       */
-	uint8_t dummy2;												/* 0xf8       */
-	uint8_t dummy3;												/* 0xf9       */
-	uint8_t dummy4;												/* 0xfa       */
-	uint8_t dummy5;												/* 0xfb       */
-	uint8_t dummy6;												/* 0xfc       */
-	uint8_t dummy7;												/* 0xfd       */
-	uint8_t dummy8;												/* 0xfe       */
-	uint8_t dummy9;												/* 0xff       */
+	uint8_t psr_state;						/* 0xf0       */
+	uint8_t dmcu_mcp_interface_version;				/* 0xf1       */
+	uint8_t dmcu_abm_feature_version;				/* 0xf2       */
+	uint8_t dmcu_psr_feature_version;				/* 0xf3       */
+	uint16_t dmcu_version;						/* 0xf4       */
+	uint8_t dmcu_state;						/* 0xf6       */
+
+	uint8_t dummy1;							/* 0xf7       */
+	uint8_t dummy2;							/* 0xf8       */
+	uint8_t dummy3;							/* 0xf9       */
+	uint8_t dummy4;							/* 0xfa       */
+	uint8_t dummy5;							/* 0xfb       */
+	uint8_t dummy6;							/* 0xfc       */
+	uint8_t dummy7;							/* 0xfd       */
+	uint8_t dummy8;							/* 0xfe       */
+	uint8_t dummy9;							/* 0xff       */
 };
 #pragma pack(pop)
 
@@ -271,7 +272,8 @@ void fill_iram_v_2(struct iram_table_v_2 *ram_table, struct dmcu_iram_parameters
 {
 	unsigned int set = params.set;
 
-	ram_table->flags = 0x0;
+	ram_table->min_abm_backlight =
+			cpu_to_be16(params.min_abm_backlight);
 	ram_table->deviation_gain = 0xb3;
 
 	ram_table->blRampReduction =
@@ -445,6 +447,9 @@ void fill_iram_v_2_2(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parame
 
 	ram_table->flags = 0x0;
 
+	ram_table->min_abm_backlight =
+			cpu_to_be16(params.min_abm_backlight);
+
 	ram_table->deviation_gain[0] = 0xb3;
 	ram_table->deviation_gain[1] = 0xa8;
 	ram_table->deviation_gain[2] = 0x98;
@@ -588,6 +593,10 @@ void fill_iram_v_2_3(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parame
 	unsigned int set = params.set;
 
 	ram_table->flags = 0x0;
+
+	ram_table->min_abm_backlight =
+			cpu_to_be16(params.min_abm_backlight);
+
 	for (i = 0; i < NUM_AGGR_LEVEL; i++) {
 		ram_table->hybrid_factor[i] = abm_settings[set][i].brightness_gain;
 		ram_table->contrast_factor[i] = abm_settings[set][i].contrast_factor;
@@ -653,7 +662,11 @@ bool dmcu_load_iram(struct dmcu *dmcu,
 
 	memset(&ram_table, 0, sizeof(ram_table));
 
-	if (dmcu->dmcu_version.abm_version == 0x23) {
+	if (dmcu->dmcu_version.abm_version == 0x24) {
+		fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params);
+		result = dmcu->funcs->load_iram(
+				dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START_V2_2);
+	} else if (dmcu->dmcu_version.abm_version == 0x23) {
 		fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params);
 
 		result = dmcu->funcs->load_iram(
@@ -678,3 +691,4 @@ bool dmcu_load_iram(struct dmcu *dmcu,
 
 	return result;
 }
+
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
index da5df00fedce..e54157026330 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
@@ -38,6 +38,7 @@ struct dmcu_iram_parameters {
 	unsigned int backlight_lut_array_size;
 	unsigned int backlight_ramping_reduction;
 	unsigned int backlight_ramping_start;
+	unsigned int min_abm_backlight;
 	unsigned int set;
 };
 
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 8889aaceec60..d655a76bedc6 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -53,7 +53,8 @@ enum amd_ip_block_type {
 	AMD_IP_BLOCK_TYPE_VCE,
 	AMD_IP_BLOCK_TYPE_ACP,
 	AMD_IP_BLOCK_TYPE_VCN,
-	AMD_IP_BLOCK_TYPE_MES
+	AMD_IP_BLOCK_TYPE_MES,
+	AMD_IP_BLOCK_TYPE_JPEG
 };
 
 enum amd_clockgating_state {
@@ -99,6 +100,7 @@ enum amd_powergating_state {
 #define AMD_CG_SUPPORT_IH_CG			(1 << 27)
 #define AMD_CG_SUPPORT_ATHUB_LS			(1 << 28)
 #define AMD_CG_SUPPORT_ATHUB_MGCG		(1 << 29)
+#define AMD_CG_SUPPORT_JPEG_MGCG		(1 << 30)
 /* PG flags */
 #define AMD_PG_SUPPORT_GFX_PG			(1 << 0)
 #define AMD_PG_SUPPORT_GFX_SMG			(1 << 1)
@@ -117,6 +119,7 @@ enum amd_powergating_state {
 #define AMD_PG_SUPPORT_VCN			(1 << 14)
 #define AMD_PG_SUPPORT_VCN_DPG			(1 << 15)
 #define AMD_PG_SUPPORT_ATHUB			(1 << 16)
+#define AMD_PG_SUPPORT_JPEG			(1 << 17)
 
 enum PP_FEATURE_MASK {
 	PP_SCLK_DPM_MASK = 0x1,
@@ -143,6 +146,8 @@ enum PP_FEATURE_MASK {
 enum DC_FEATURE_MASK {
 	DC_FBC_MASK = 0x1,
 	DC_MULTI_MON_PP_MCLK_SWITCH_MASK = 0x2,
+	DC_DISABLE_FRACTIONAL_PWM_MASK = 0x4,
+	DC_PSR_MASK = 0x8,
 };
 
 enum amd_dpm_forced_level;
diff --git a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_d.h b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_d.h
index a761ba07f937..fce965984e76 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_d.h
@@ -27,6 +27,7 @@
 #define mmMM_INDEX                                                              0x0
 #define mmMM_INDEX_HI                                                           0x6
 #define mmMM_DATA                                                               0x1
+#define mmCC_BIF_BX_FUSESTRAP0							0x14D7
 #define mmBUS_CNTL                                                              0x1508
 #define mmCONFIG_CNTL                                                           0x1509
 #define mmCONFIG_MEMSIZE                                                        0x150a
diff --git a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_sh_mask.h
index 8fbfd0261d27..39cc4880beb4 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_sh_mask.h
@@ -32,6 +32,8 @@
 #define MM_INDEX_HI__MM_OFFSET_HI__SHIFT 0x0
 #define MM_DATA__MM_DATA_MASK 0xffffffff
 #define MM_DATA__MM_DATA__SHIFT 0x0
+#define CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK    0x2
+#define CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE__SHIFT  0x1
 #define BUS_CNTL__BIOS_ROM_WRT_EN_MASK 0x1
 #define BUS_CNTL__BIOS_ROM_WRT_EN__SHIFT 0x0
 #define BUS_CNTL__BIOS_ROM_DIS_MASK 0x2
diff --git a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h
index 809759f7bb81..8d05d6ca1c8d 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h
@@ -27,6 +27,7 @@
 #define mmMM_INDEX                                                              0x0
 #define mmMM_INDEX_HI                                                           0x6
 #define mmMM_DATA                                                               0x1
+#define mmCC_BIF_BX_FUSESTRAP0							0x14D7
 #define mmCC_BIF_BX_STRAP2							0x152A
 #define mmBIF_MM_INDACCESS_CNTL                                                 0x1500
 #define mmBIF_DOORBELL_APER_EN                                                  0x1501
diff --git a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_sh_mask.h
index adc71b01f793..73435687d049 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_sh_mask.h
@@ -32,6 +32,8 @@
 #define MM_INDEX_HI__MM_OFFSET_HI__SHIFT 0x0
 #define MM_DATA__MM_DATA_MASK 0xffffffff
 #define MM_DATA__MM_DATA__SHIFT 0x0
+#define CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK    0x2
+#define CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE__SHIFT  0x1
 #define BIF_MM_INDACCESS_CNTL__MM_INDACCESS_DIS_MASK 0x2
 #define BIF_MM_INDACCESS_CNTL__MM_INDACCESS_DIS__SHIFT 0x1
 #define BIF_DOORBELL_APER_EN__BIF_DOORBELL_APER_EN_MASK 0x1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h
index cff8f91555d3..e9b2bd84cfed 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h
@@ -8134,6 +8134,10 @@
 #define mmMPC_OUT5_CSC_C33_C34_B                                                                       0x1604
 #define mmMPC_OUT5_CSC_C33_C34_B_BASE_IDX                                                              2
 
+#define mmMPC_OCSC_TEST_DEBUG_INDEX                                                                    0x163b
+#define mmMPC_OCSC_TEST_DEBUG_INDEX_BASE_IDX                                                           2
+#define mmMPC_OCSC_TEST_DEBUG_DATA_BASE_IDX                                                            2
+#define mmMPC_OCSC_TEST_DEBUG_DATA                                                                     0x163c
 
 // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec
 // base address: 0x5964
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h
index 10c83fecd147..dc8ce7aaa0cf 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h
@@ -28263,7 +28263,14 @@
 #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C34_B__SHIFT                                                         0x10
 #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C33_B_MASK                                                           0x0000FFFFL
 #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C34_B_MASK                                                           0xFFFF0000L
-
+//MPC_OCSC_TEST_DEBUG_INDEX
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX__SHIFT                                           0x0
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN__SHIFT                                        0x8
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX_MASK                                             0x000000FFL
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN_MASK                                          0x00000100L
+//MPC_OCSC_TEST_DEBUG_DATA
+#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA__SHIFT                                             0x0
+#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA_MASK                                               0xFFFFFFFFL
 
 // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec
 //DC_PERFMON17_PERFCOUNTER_CNTL
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h
index be4249adb356..7cd0ee61c030 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h
@@ -7103,7 +7103,10 @@
 #define mmMPC_OUT3_CSC_C31_C32_B_BASE_IDX                                                              2
 #define mmMPC_OUT3_CSC_C33_C34_B                                                                       0x15ea
 #define mmMPC_OUT3_CSC_C33_C34_B_BASE_IDX                                                              2
-
+#define mmMPC_OCSC_TEST_DEBUG_INDEX                                                                    0x163b
+#define mmMPC_OCSC_TEST_DEBUG_INDEX_BASE_IDX                                                           2
+#define mmMPC_OCSC_TEST_DEBUG_DATA_BASE_IDX                                                            2
+#define mmMPC_OCSC_TEST_DEBUG_DATA                                                                     0x163c
 
 // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec
 // base address: 0x5964
@@ -9859,6 +9862,8 @@
 #define mmDP0_DP_STEER_FIFO_BASE_IDX                                                                   2
 #define mmDP0_DP_MSA_MISC                                                                              0x210e
 #define mmDP0_DP_MSA_MISC_BASE_IDX                                                                     2
+#define mmDP0_DP_DPHY_INTERNAL_CTRL                                                                    0x210f
+#define mmDP0_DP_DPHY_INTERNAL_CTRL_BASE_IDX                                                           2
 #define mmDP0_DP_VID_TIMING                                                                            0x2110
 #define mmDP0_DP_VID_TIMING_BASE_IDX                                                                   2
 #define mmDP0_DP_VID_N                                                                                 0x2111
@@ -10187,6 +10192,8 @@
 #define mmDP1_DP_STEER_FIFO_BASE_IDX                                                                   2
 #define mmDP1_DP_MSA_MISC                                                                              0x220e
 #define mmDP1_DP_MSA_MISC_BASE_IDX                                                                     2
+#define mmDP1_DP_DPHY_INTERNAL_CTRL                                                                    0x220f
+#define mmDP1_DP_DPHY_INTERNAL_CTRL_BASE_IDX                                                           2
 #define mmDP1_DP_VID_TIMING                                                                            0x2210
 #define mmDP1_DP_VID_TIMING_BASE_IDX                                                                   2
 #define mmDP1_DP_VID_N                                                                                 0x2211
@@ -10515,6 +10522,8 @@
 #define mmDP2_DP_STEER_FIFO_BASE_IDX                                                                   2
 #define mmDP2_DP_MSA_MISC                                                                              0x230e
 #define mmDP2_DP_MSA_MISC_BASE_IDX                                                                     2
+#define mmDP2_DP_DPHY_INTERNAL_CTRL                                                                    0x230f
+#define mmDP2_DP_DPHY_INTERNAL_CTRL_BASE_IDX                                                           2
 #define mmDP2_DP_VID_TIMING                                                                            0x2310
 #define mmDP2_DP_VID_TIMING_BASE_IDX                                                                   2
 #define mmDP2_DP_VID_N                                                                                 0x2311
@@ -10843,6 +10852,8 @@
 #define mmDP3_DP_STEER_FIFO_BASE_IDX                                                                   2
 #define mmDP3_DP_MSA_MISC                                                                              0x240e
 #define mmDP3_DP_MSA_MISC_BASE_IDX                                                                     2
+#define mmDP3_DP_DPHY_INTERNAL_CTRL                                                                    0x240f
+#define mmDP3_DP_DPHY_INTERNAL_CTRL_BASE_IDX                                                           2
 #define mmDP3_DP_VID_TIMING                                                                            0x2410
 #define mmDP3_DP_VID_TIMING_BASE_IDX                                                                   2
 #define mmDP3_DP_VID_N                                                                                 0x2411
@@ -11171,6 +11182,8 @@
 #define mmDP4_DP_STEER_FIFO_BASE_IDX                                                                   2
 #define mmDP4_DP_MSA_MISC                                                                              0x250e
 #define mmDP4_DP_MSA_MISC_BASE_IDX                                                                     2
+#define mmDP4_DP_DPHY_INTERNAL_CTRL                                                                    0x250f
+#define mmDP4_DP_DPHY_INTERNAL_CTRL_BASE_IDX                                                           2
 #define mmDP4_DP_VID_TIMING                                                                            0x2510
 #define mmDP4_DP_VID_TIMING_BASE_IDX                                                                   2
 #define mmDP4_DP_VID_N                                                                                 0x2511
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h
index faa0e76e32b4..2f780aefc722 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h
@@ -56634,5 +56634,13 @@
 #define AZF0INPUTENDPOINT7_AZALIA_F0_CODEC_INPUT_PIN_CONTROL_INFOFRAME__INFOFRAME_BYTE_5_MASK                 0x00FF0000L
 #define AZF0INPUTENDPOINT7_AZALIA_F0_CODEC_INPUT_PIN_CONTROL_INFOFRAME__INFOFRAME_VALID_MASK                  0x80000000L
 
+//MPC_OCSC_TEST_DEBUG_INDEX
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX__SHIFT                                           0x0
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN__SHIFT                                        0x8
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX_MASK                                             0x000000FFL
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN_MASK                                          0x00000100L
+//MPC_OCSC_TEST_DEBUG_DATA
+#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA__SHIFT                                             0x0
+#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA_MASK                                               0xFFFFFFFFL
 
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
index c2bd25589e84..87c84691b5be 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
@@ -27,6 +27,9 @@
 #define mmDF_PIE_AON0_DfGlobalClkGater									0x00fc
 #define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX								0
 
+#define mmDF_CS_UMC_AON0_DfGlobalCtrl									0x00fe
+#define mmDF_CS_UMC_AON0_DfGlobalCtrl_BASE_IDX								0
+
 #define mmDF_CS_UMC_AON0_DramBaseAddress0								0x0044
 #define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX							0
 
@@ -38,6 +41,14 @@
 #define smnPerfMonCtlHi2					0x01d464UL
 #define smnPerfMonCtlLo3					0x01d470UL
 #define smnPerfMonCtlHi3					0x01d474UL
+#define smnPerfMonCtlLo4					0x01d880UL
+#define smnPerfMonCtlHi4					0x01d884UL
+#define smnPerfMonCtlLo5					0x01d888UL
+#define smnPerfMonCtlHi5					0x01d88cUL
+#define smnPerfMonCtlLo6					0x01d890UL
+#define smnPerfMonCtlHi6					0x01d894UL
+#define smnPerfMonCtlLo7					0x01d898UL
+#define smnPerfMonCtlHi7					0x01d89cUL
 
 #define smnPerfMonCtrLo0					0x01d448UL
 #define smnPerfMonCtrHi0					0x01d44cUL
@@ -47,6 +58,14 @@
 #define smnPerfMonCtrHi2					0x01d46cUL
 #define smnPerfMonCtrLo3					0x01d478UL
 #define smnPerfMonCtrHi3					0x01d47cUL
+#define smnPerfMonCtrLo4					0x01d790UL
+#define smnPerfMonCtrHi4					0x01d794UL
+#define smnPerfMonCtrLo5					0x01d798UL
+#define smnPerfMonCtrHi5					0x01d79cUL
+#define smnPerfMonCtrLo6					0x01d7a0UL
+#define smnPerfMonCtrHi6					0x01d7a4UL
+#define smnPerfMonCtrLo7					0x01d7a8UL
+#define smnPerfMonCtrHi7					0x01d7acUL
 
 #define smnDF_PIE_AON_FabricIndirectConfigAccessAddress3	0x1d05cUL
 #define smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3		0x1d098UL
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
index 06fac509e987..65e9f756e86e 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
@@ -33,6 +33,14 @@
 #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT							0x0
 #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK							0x0000000FL
 
+/* DF_CS_UMC_AON0_DfGlobalCtrl */
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl64K__SHIFT						0x14
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl2M__SHIFT						0x15
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl1G__SHIFT						0x16
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl64K_MASK						0x00100000L
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl2M_MASK						0x00200000L
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl1G_MASK						0x00400000L
+
 /* DF_CS_AON0_DramBaseAddress0 */
 #define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal__SHIFT						0x0
 #define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT						0x1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_0_offset.h
new file mode 100644
index 000000000000..36ae5b7fe88e
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_0_offset.h
@@ -0,0 +1,647 @@
+/*
+ * Copyright (C) 2019  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _dpcs_2_0_0_OFFSET_HEADER
+#define _dpcs_2_0_0_OFFSET_HEADER
+
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx0_dispdec
+// base address: 0x0
+#define mmDPCSTX0_DPCSTX_TX_CLOCK_CNTL                                                                 0x2928
+#define mmDPCSTX0_DPCSTX_TX_CLOCK_CNTL_BASE_IDX                                                        2
+#define mmDPCSTX0_DPCSTX_TX_CNTL                                                                       0x2929
+#define mmDPCSTX0_DPCSTX_TX_CNTL_BASE_IDX                                                              2
+#define mmDPCSTX0_DPCSTX_CBUS_CNTL                                                                     0x292a
+#define mmDPCSTX0_DPCSTX_CBUS_CNTL_BASE_IDX                                                            2
+#define mmDPCSTX0_DPCSTX_INTERRUPT_CNTL                                                                0x292b
+#define mmDPCSTX0_DPCSTX_INTERRUPT_CNTL_BASE_IDX                                                       2
+#define mmDPCSTX0_DPCSTX_PLL_UPDATE_ADDR                                                               0x292c
+#define mmDPCSTX0_DPCSTX_PLL_UPDATE_ADDR_BASE_IDX                                                      2
+#define mmDPCSTX0_DPCSTX_PLL_UPDATE_DATA                                                               0x292d
+#define mmDPCSTX0_DPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                      2
+#define mmDPCSTX0_DPCSTX_DEBUG_CONFIG                                                                  0x292e
+#define mmDPCSTX0_DPCSTX_DEBUG_CONFIG_BASE_IDX                                                         2
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx0_dispdec
+// base address: 0x0
+#define mmRDPCSTX0_RDPCSTX_CNTL                                                                        0x2930
+#define mmRDPCSTX0_RDPCSTX_CNTL_BASE_IDX                                                               2
+#define mmRDPCSTX0_RDPCSTX_CLOCK_CNTL                                                                  0x2931
+#define mmRDPCSTX0_RDPCSTX_CLOCK_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX0_RDPCSTX_INTERRUPT_CONTROL                                                           0x2932
+#define mmRDPCSTX0_RDPCSTX_INTERRUPT_CONTROL_BASE_IDX                                                  2
+#define mmRDPCSTX0_RDPCSTX_PLL_UPDATE_DATA                                                             0x2933
+#define mmRDPCSTX0_RDPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                    2
+#define mmRDPCSTX0_RDPCS_TX_CR_ADDR                                                                    0x2934
+#define mmRDPCSTX0_RDPCS_TX_CR_ADDR_BASE_IDX                                                           2
+#define mmRDPCSTX0_RDPCS_TX_CR_DATA                                                                    0x2935
+#define mmRDPCSTX0_RDPCS_TX_CR_DATA_BASE_IDX                                                           2
+#define mmRDPCSTX0_RDPCS_TX_SRAM_CNTL                                                                  0x2936
+#define mmRDPCSTX0_RDPCS_TX_SRAM_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX0_RDPCSTX_MEM_POWER_CTRL                                                              0x2937
+#define mmRDPCSTX0_RDPCSTX_MEM_POWER_CTRL_BASE_IDX                                                     2
+#define mmRDPCSTX0_RDPCSTX_MEM_POWER_CTRL2                                                             0x2938
+#define mmRDPCSTX0_RDPCSTX_MEM_POWER_CTRL2_BASE_IDX                                                    2
+#define mmRDPCSTX0_RDPCSTX_SCRATCH                                                                     0x2939
+#define mmRDPCSTX0_RDPCSTX_SCRATCH_BASE_IDX                                                            2
+#define mmRDPCSTX0_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG                                                    0x293c
+#define mmRDPCSTX0_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG_BASE_IDX                                           2
+#define mmRDPCSTX0_RDPCSTX_DEBUG_CONFIG                                                                0x293d
+#define mmRDPCSTX0_RDPCSTX_DEBUG_CONFIG_BASE_IDX                                                       2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL0                                                                   0x2940
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL0_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL1                                                                   0x2941
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL1_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL2                                                                   0x2942
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL2_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL3                                                                   0x2943
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL3_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL4                                                                   0x2944
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL4_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL5                                                                   0x2945
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL5_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL6                                                                   0x2946
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL6_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL7                                                                   0x2947
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL7_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL8                                                                   0x2948
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL8_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL9                                                                   0x2949
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL9_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL10                                                                  0x294a
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL10_BASE_IDX                                                         2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL11                                                                  0x294b
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL11_BASE_IDX                                                         2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL12                                                                  0x294c
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL12_BASE_IDX                                                         2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL13                                                                  0x294d
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL13_BASE_IDX                                                         2
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL14                                                                  0x294e
+#define mmRDPCSTX0_RDPCSTX_PHY_CNTL14_BASE_IDX                                                         2
+#define mmRDPCSTX0_RDPCSTX_PHY_FUSE0                                                                   0x294f
+#define mmRDPCSTX0_RDPCSTX_PHY_FUSE0_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_FUSE1                                                                   0x2950
+#define mmRDPCSTX0_RDPCSTX_PHY_FUSE1_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_FUSE2                                                                   0x2951
+#define mmRDPCSTX0_RDPCSTX_PHY_FUSE2_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_FUSE3                                                                   0x2952
+#define mmRDPCSTX0_RDPCSTX_PHY_FUSE3_BASE_IDX                                                          2
+#define mmRDPCSTX0_RDPCSTX_PHY_RX_LD_VAL                                                               0x2953
+#define mmRDPCSTX0_RDPCSTX_PHY_RX_LD_VAL_BASE_IDX                                                      2
+#define mmRDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3                                                        0x2954
+#define mmRDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3_BASE_IDX                                               2
+#define mmRDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6                                                        0x2955
+#define mmRDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6_BASE_IDX                                               2
+#define mmRDPCSTX0_RDPCSTX_DPALT_CONTROL_REG                                                           0x2956
+#define mmRDPCSTX0_RDPCSTX_DPALT_CONTROL_REG_BASE_IDX                                                  2
+
+
+// addressBlock: dpcssys_dpcssys_cr0_dispdec
+// base address: 0x0
+#define mmDPCSSYS_CR0_DPCSSYS_CR_ADDR                                                                  0x2934
+#define mmDPCSSYS_CR0_DPCSSYS_CR_ADDR_BASE_IDX                                                         2
+#define mmDPCSSYS_CR0_DPCSSYS_CR_DATA                                                                  0x2935
+#define mmDPCSSYS_CR0_DPCSSYS_CR_DATA_BASE_IDX                                                         2
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx1_dispdec
+// base address: 0x360
+#define mmDPCSTX1_DPCSTX_TX_CLOCK_CNTL                                                                 0x2a00
+#define mmDPCSTX1_DPCSTX_TX_CLOCK_CNTL_BASE_IDX                                                        2
+#define mmDPCSTX1_DPCSTX_TX_CNTL                                                                       0x2a01
+#define mmDPCSTX1_DPCSTX_TX_CNTL_BASE_IDX                                                              2
+#define mmDPCSTX1_DPCSTX_CBUS_CNTL                                                                     0x2a02
+#define mmDPCSTX1_DPCSTX_CBUS_CNTL_BASE_IDX                                                            2
+#define mmDPCSTX1_DPCSTX_INTERRUPT_CNTL                                                                0x2a03
+#define mmDPCSTX1_DPCSTX_INTERRUPT_CNTL_BASE_IDX                                                       2
+#define mmDPCSTX1_DPCSTX_PLL_UPDATE_ADDR                                                               0x2a04
+#define mmDPCSTX1_DPCSTX_PLL_UPDATE_ADDR_BASE_IDX                                                      2
+#define mmDPCSTX1_DPCSTX_PLL_UPDATE_DATA                                                               0x2a05
+#define mmDPCSTX1_DPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                      2
+#define mmDPCSTX1_DPCSTX_DEBUG_CONFIG                                                                  0x2a06
+#define mmDPCSTX1_DPCSTX_DEBUG_CONFIG_BASE_IDX                                                         2
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx1_dispdec
+// base address: 0x360
+#define mmRDPCSTX1_RDPCSTX_CNTL                                                                        0x2a08
+#define mmRDPCSTX1_RDPCSTX_CNTL_BASE_IDX                                                               2
+#define mmRDPCSTX1_RDPCSTX_CLOCK_CNTL                                                                  0x2a09
+#define mmRDPCSTX1_RDPCSTX_CLOCK_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX1_RDPCSTX_INTERRUPT_CONTROL                                                           0x2a0a
+#define mmRDPCSTX1_RDPCSTX_INTERRUPT_CONTROL_BASE_IDX                                                  2
+#define mmRDPCSTX1_RDPCSTX_PLL_UPDATE_DATA                                                             0x2a0b
+#define mmRDPCSTX1_RDPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                    2
+#define mmRDPCSTX1_RDPCS_TX_CR_ADDR                                                                    0x2a0c
+#define mmRDPCSTX1_RDPCS_TX_CR_ADDR_BASE_IDX                                                           2
+#define mmRDPCSTX1_RDPCS_TX_CR_DATA                                                                    0x2a0d
+#define mmRDPCSTX1_RDPCS_TX_CR_DATA_BASE_IDX                                                           2
+#define mmRDPCSTX1_RDPCS_TX_SRAM_CNTL                                                                  0x2a0e
+#define mmRDPCSTX1_RDPCS_TX_SRAM_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX1_RDPCSTX_MEM_POWER_CTRL                                                              0x2a0f
+#define mmRDPCSTX1_RDPCSTX_MEM_POWER_CTRL_BASE_IDX                                                     2
+#define mmRDPCSTX1_RDPCSTX_MEM_POWER_CTRL2                                                             0x2a10
+#define mmRDPCSTX1_RDPCSTX_MEM_POWER_CTRL2_BASE_IDX                                                    2
+#define mmRDPCSTX1_RDPCSTX_SCRATCH                                                                     0x2a11
+#define mmRDPCSTX1_RDPCSTX_SCRATCH_BASE_IDX                                                            2
+#define mmRDPCSTX1_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG                                                    0x2a14
+#define mmRDPCSTX1_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG_BASE_IDX                                           2
+#define mmRDPCSTX1_RDPCSTX_DEBUG_CONFIG                                                                0x2a15
+#define mmRDPCSTX1_RDPCSTX_DEBUG_CONFIG_BASE_IDX                                                       2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL0                                                                   0x2a18
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL0_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL1                                                                   0x2a19
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL1_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL2                                                                   0x2a1a
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL2_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL3                                                                   0x2a1b
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL3_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL4                                                                   0x2a1c
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL4_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL5                                                                   0x2a1d
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL5_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL6                                                                   0x2a1e
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL6_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL7                                                                   0x2a1f
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL7_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL8                                                                   0x2a20
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL8_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL9                                                                   0x2a21
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL9_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL10                                                                  0x2a22
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL10_BASE_IDX                                                         2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL11                                                                  0x2a23
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL11_BASE_IDX                                                         2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL12                                                                  0x2a24
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL12_BASE_IDX                                                         2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL13                                                                  0x2a25
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL13_BASE_IDX                                                         2
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL14                                                                  0x2a26
+#define mmRDPCSTX1_RDPCSTX_PHY_CNTL14_BASE_IDX                                                         2
+#define mmRDPCSTX1_RDPCSTX_PHY_FUSE0                                                                   0x2a27
+#define mmRDPCSTX1_RDPCSTX_PHY_FUSE0_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_FUSE1                                                                   0x2a28
+#define mmRDPCSTX1_RDPCSTX_PHY_FUSE1_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_FUSE2                                                                   0x2a29
+#define mmRDPCSTX1_RDPCSTX_PHY_FUSE2_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_FUSE3                                                                   0x2a2a
+#define mmRDPCSTX1_RDPCSTX_PHY_FUSE3_BASE_IDX                                                          2
+#define mmRDPCSTX1_RDPCSTX_PHY_RX_LD_VAL                                                               0x2a2b
+#define mmRDPCSTX1_RDPCSTX_PHY_RX_LD_VAL_BASE_IDX                                                      2
+#define mmRDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3                                                        0x2a2c
+#define mmRDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3_BASE_IDX                                               2
+#define mmRDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6                                                        0x2a2d
+#define mmRDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6_BASE_IDX                                               2
+#define mmRDPCSTX1_RDPCSTX_DPALT_CONTROL_REG                                                           0x2a2e
+#define mmRDPCSTX1_RDPCSTX_DPALT_CONTROL_REG_BASE_IDX                                                  2
+
+
+// addressBlock: dpcssys_dpcssys_cr1_dispdec
+// base address: 0x360
+#define mmDPCSSYS_CR1_DPCSSYS_CR_ADDR                                                                  0x2a0c
+#define mmDPCSSYS_CR1_DPCSSYS_CR_ADDR_BASE_IDX                                                         2
+#define mmDPCSSYS_CR1_DPCSSYS_CR_DATA                                                                  0x2a0d
+#define mmDPCSSYS_CR1_DPCSSYS_CR_DATA_BASE_IDX                                                         2
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx2_dispdec
+// base address: 0x6c0
+#define mmDPCSTX2_DPCSTX_TX_CLOCK_CNTL                                                                 0x2ad8
+#define mmDPCSTX2_DPCSTX_TX_CLOCK_CNTL_BASE_IDX                                                        2
+#define mmDPCSTX2_DPCSTX_TX_CNTL                                                                       0x2ad9
+#define mmDPCSTX2_DPCSTX_TX_CNTL_BASE_IDX                                                              2
+#define mmDPCSTX2_DPCSTX_CBUS_CNTL                                                                     0x2ada
+#define mmDPCSTX2_DPCSTX_CBUS_CNTL_BASE_IDX                                                            2
+#define mmDPCSTX2_DPCSTX_INTERRUPT_CNTL                                                                0x2adb
+#define mmDPCSTX2_DPCSTX_INTERRUPT_CNTL_BASE_IDX                                                       2
+#define mmDPCSTX2_DPCSTX_PLL_UPDATE_ADDR                                                               0x2adc
+#define mmDPCSTX2_DPCSTX_PLL_UPDATE_ADDR_BASE_IDX                                                      2
+#define mmDPCSTX2_DPCSTX_PLL_UPDATE_DATA                                                               0x2add
+#define mmDPCSTX2_DPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                      2
+#define mmDPCSTX2_DPCSTX_DEBUG_CONFIG                                                                  0x2ade
+#define mmDPCSTX2_DPCSTX_DEBUG_CONFIG_BASE_IDX                                                         2
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx2_dispdec
+// base address: 0x6c0
+#define mmRDPCSTX2_RDPCSTX_CNTL                                                                        0x2ae0
+#define mmRDPCSTX2_RDPCSTX_CNTL_BASE_IDX                                                               2
+#define mmRDPCSTX2_RDPCSTX_CLOCK_CNTL                                                                  0x2ae1
+#define mmRDPCSTX2_RDPCSTX_CLOCK_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX2_RDPCSTX_INTERRUPT_CONTROL                                                           0x2ae2
+#define mmRDPCSTX2_RDPCSTX_INTERRUPT_CONTROL_BASE_IDX                                                  2
+#define mmRDPCSTX2_RDPCSTX_PLL_UPDATE_DATA                                                             0x2ae3
+#define mmRDPCSTX2_RDPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                    2
+#define mmRDPCSTX2_RDPCS_TX_CR_ADDR                                                                    0x2ae4
+#define mmRDPCSTX2_RDPCS_TX_CR_ADDR_BASE_IDX                                                           2
+#define mmRDPCSTX2_RDPCS_TX_CR_DATA                                                                    0x2ae5
+#define mmRDPCSTX2_RDPCS_TX_CR_DATA_BASE_IDX                                                           2
+#define mmRDPCSTX2_RDPCS_TX_SRAM_CNTL                                                                  0x2ae6
+#define mmRDPCSTX2_RDPCS_TX_SRAM_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX2_RDPCSTX_MEM_POWER_CTRL                                                              0x2ae7
+#define mmRDPCSTX2_RDPCSTX_MEM_POWER_CTRL_BASE_IDX                                                     2
+#define mmRDPCSTX2_RDPCSTX_MEM_POWER_CTRL2                                                             0x2ae8
+#define mmRDPCSTX2_RDPCSTX_MEM_POWER_CTRL2_BASE_IDX                                                    2
+#define mmRDPCSTX2_RDPCSTX_SCRATCH                                                                     0x2ae9
+#define mmRDPCSTX2_RDPCSTX_SCRATCH_BASE_IDX                                                            2
+#define mmRDPCSTX2_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG                                                    0x2aec
+#define mmRDPCSTX2_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG_BASE_IDX                                           2
+#define mmRDPCSTX2_RDPCSTX_DEBUG_CONFIG                                                                0x2aed
+#define mmRDPCSTX2_RDPCSTX_DEBUG_CONFIG_BASE_IDX                                                       2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL0                                                                   0x2af0
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL0_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL1                                                                   0x2af1
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL1_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL2                                                                   0x2af2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL2_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL3                                                                   0x2af3
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL3_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL4                                                                   0x2af4
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL4_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL5                                                                   0x2af5
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL5_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL6                                                                   0x2af6
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL6_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL7                                                                   0x2af7
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL7_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL8                                                                   0x2af8
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL8_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL9                                                                   0x2af9
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL9_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL10                                                                  0x2afa
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL10_BASE_IDX                                                         2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL11                                                                  0x2afb
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL11_BASE_IDX                                                         2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL12                                                                  0x2afc
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL12_BASE_IDX                                                         2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL13                                                                  0x2afd
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL13_BASE_IDX                                                         2
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL14                                                                  0x2afe
+#define mmRDPCSTX2_RDPCSTX_PHY_CNTL14_BASE_IDX                                                         2
+#define mmRDPCSTX2_RDPCSTX_PHY_FUSE0                                                                   0x2aff
+#define mmRDPCSTX2_RDPCSTX_PHY_FUSE0_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_FUSE1                                                                   0x2b00
+#define mmRDPCSTX2_RDPCSTX_PHY_FUSE1_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_FUSE2                                                                   0x2b01
+#define mmRDPCSTX2_RDPCSTX_PHY_FUSE2_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_FUSE3                                                                   0x2b02
+#define mmRDPCSTX2_RDPCSTX_PHY_FUSE3_BASE_IDX                                                          2
+#define mmRDPCSTX2_RDPCSTX_PHY_RX_LD_VAL                                                               0x2b03
+#define mmRDPCSTX2_RDPCSTX_PHY_RX_LD_VAL_BASE_IDX                                                      2
+#define mmRDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3                                                        0x2b04
+#define mmRDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3_BASE_IDX                                               2
+#define mmRDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6                                                        0x2b05
+#define mmRDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6_BASE_IDX                                               2
+#define mmRDPCSTX2_RDPCSTX_DPALT_CONTROL_REG                                                           0x2b06
+#define mmRDPCSTX2_RDPCSTX_DPALT_CONTROL_REG_BASE_IDX                                                  2
+
+
+// addressBlock: dpcssys_dpcssys_cr2_dispdec
+// base address: 0x6c0
+#define mmDPCSSYS_CR2_DPCSSYS_CR_ADDR                                                                  0x2ae4
+#define mmDPCSSYS_CR2_DPCSSYS_CR_ADDR_BASE_IDX                                                         2
+#define mmDPCSSYS_CR2_DPCSSYS_CR_DATA                                                                  0x2ae5
+#define mmDPCSSYS_CR2_DPCSSYS_CR_DATA_BASE_IDX                                                         2
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx3_dispdec
+// base address: 0xa20
+#define mmDPCSTX3_DPCSTX_TX_CLOCK_CNTL                                                                 0x2bb0
+#define mmDPCSTX3_DPCSTX_TX_CLOCK_CNTL_BASE_IDX                                                        2
+#define mmDPCSTX3_DPCSTX_TX_CNTL                                                                       0x2bb1
+#define mmDPCSTX3_DPCSTX_TX_CNTL_BASE_IDX                                                              2
+#define mmDPCSTX3_DPCSTX_CBUS_CNTL                                                                     0x2bb2
+#define mmDPCSTX3_DPCSTX_CBUS_CNTL_BASE_IDX                                                            2
+#define mmDPCSTX3_DPCSTX_INTERRUPT_CNTL                                                                0x2bb3
+#define mmDPCSTX3_DPCSTX_INTERRUPT_CNTL_BASE_IDX                                                       2
+#define mmDPCSTX3_DPCSTX_PLL_UPDATE_ADDR                                                               0x2bb4
+#define mmDPCSTX3_DPCSTX_PLL_UPDATE_ADDR_BASE_IDX                                                      2
+#define mmDPCSTX3_DPCSTX_PLL_UPDATE_DATA                                                               0x2bb5
+#define mmDPCSTX3_DPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                      2
+#define mmDPCSTX3_DPCSTX_DEBUG_CONFIG                                                                  0x2bb6
+#define mmDPCSTX3_DPCSTX_DEBUG_CONFIG_BASE_IDX                                                         2
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx3_dispdec
+// base address: 0xa20
+#define mmRDPCSTX3_RDPCSTX_CNTL                                                                        0x2bb8
+#define mmRDPCSTX3_RDPCSTX_CNTL_BASE_IDX                                                               2
+#define mmRDPCSTX3_RDPCSTX_CLOCK_CNTL                                                                  0x2bb9
+#define mmRDPCSTX3_RDPCSTX_CLOCK_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX3_RDPCSTX_INTERRUPT_CONTROL                                                           0x2bba
+#define mmRDPCSTX3_RDPCSTX_INTERRUPT_CONTROL_BASE_IDX                                                  2
+#define mmRDPCSTX3_RDPCSTX_PLL_UPDATE_DATA                                                             0x2bbb
+#define mmRDPCSTX3_RDPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                    2
+#define mmRDPCSTX3_RDPCS_TX_CR_ADDR                                                                    0x2bbc
+#define mmRDPCSTX3_RDPCS_TX_CR_ADDR_BASE_IDX                                                           2
+#define mmRDPCSTX3_RDPCS_TX_CR_DATA                                                                    0x2bbd
+#define mmRDPCSTX3_RDPCS_TX_CR_DATA_BASE_IDX                                                           2
+#define mmRDPCSTX3_RDPCS_TX_SRAM_CNTL                                                                  0x2bbe
+#define mmRDPCSTX3_RDPCS_TX_SRAM_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX3_RDPCSTX_MEM_POWER_CTRL                                                              0x2bbf
+#define mmRDPCSTX3_RDPCSTX_MEM_POWER_CTRL_BASE_IDX                                                     2
+#define mmRDPCSTX3_RDPCSTX_MEM_POWER_CTRL2                                                             0x2bc0
+#define mmRDPCSTX3_RDPCSTX_MEM_POWER_CTRL2_BASE_IDX                                                    2
+#define mmRDPCSTX3_RDPCSTX_SCRATCH                                                                     0x2bc1
+#define mmRDPCSTX3_RDPCSTX_SCRATCH_BASE_IDX                                                            2
+#define mmRDPCSTX3_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG                                                    0x2bc4
+#define mmRDPCSTX3_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG_BASE_IDX                                           2
+#define mmRDPCSTX3_RDPCSTX_DEBUG_CONFIG                                                                0x2bc5
+#define mmRDPCSTX3_RDPCSTX_DEBUG_CONFIG_BASE_IDX                                                       2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL0                                                                   0x2bc8
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL0_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL1                                                                   0x2bc9
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL1_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL2                                                                   0x2bca
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL2_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL3                                                                   0x2bcb
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL3_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL4                                                                   0x2bcc
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL4_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL5                                                                   0x2bcd
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL5_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL6                                                                   0x2bce
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL6_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL7                                                                   0x2bcf
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL7_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL8                                                                   0x2bd0
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL8_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL9                                                                   0x2bd1
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL9_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL10                                                                  0x2bd2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL10_BASE_IDX                                                         2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL11                                                                  0x2bd3
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL11_BASE_IDX                                                         2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL12                                                                  0x2bd4
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL12_BASE_IDX                                                         2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL13                                                                  0x2bd5
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL13_BASE_IDX                                                         2
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL14                                                                  0x2bd6
+#define mmRDPCSTX3_RDPCSTX_PHY_CNTL14_BASE_IDX                                                         2
+#define mmRDPCSTX3_RDPCSTX_PHY_FUSE0                                                                   0x2bd7
+#define mmRDPCSTX3_RDPCSTX_PHY_FUSE0_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_FUSE1                                                                   0x2bd8
+#define mmRDPCSTX3_RDPCSTX_PHY_FUSE1_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_FUSE2                                                                   0x2bd9
+#define mmRDPCSTX3_RDPCSTX_PHY_FUSE2_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_FUSE3                                                                   0x2bda
+#define mmRDPCSTX3_RDPCSTX_PHY_FUSE3_BASE_IDX                                                          2
+#define mmRDPCSTX3_RDPCSTX_PHY_RX_LD_VAL                                                               0x2bdb
+#define mmRDPCSTX3_RDPCSTX_PHY_RX_LD_VAL_BASE_IDX                                                      2
+#define mmRDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3                                                        0x2bdc
+#define mmRDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3_BASE_IDX                                               2
+#define mmRDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6                                                        0x2bdd
+#define mmRDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6_BASE_IDX                                               2
+#define mmRDPCSTX3_RDPCSTX_DPALT_CONTROL_REG                                                           0x2bde
+#define mmRDPCSTX3_RDPCSTX_DPALT_CONTROL_REG_BASE_IDX                                                  2
+
+
+// addressBlock: dpcssys_dpcssys_cr3_dispdec
+// base address: 0xa20
+#define mmDPCSSYS_CR3_DPCSSYS_CR_ADDR                                                                  0x2bbc
+#define mmDPCSSYS_CR3_DPCSSYS_CR_ADDR_BASE_IDX                                                         2
+#define mmDPCSSYS_CR3_DPCSSYS_CR_DATA                                                                  0x2bbd
+#define mmDPCSSYS_CR3_DPCSSYS_CR_DATA_BASE_IDX                                                         2
+
+
+// addressBlock: dpcssys_dpcs0_dpcsrx_dispdec
+// base address: 0x0
+#define mmDPCSRX_PHY_CNTL                                                                              0x2c76
+#define mmDPCSRX_PHY_CNTL_BASE_IDX                                                                     2
+#define mmDPCSRX_RX_CLOCK_CNTL                                                                         0x2c78
+#define mmDPCSRX_RX_CLOCK_CNTL_BASE_IDX                                                                2
+#define mmDPCSRX_RX_CNTL                                                                               0x2c7a
+#define mmDPCSRX_RX_CNTL_BASE_IDX                                                                      2
+#define mmDPCSRX_CBUS_CNTL                                                                             0x2c7b
+#define mmDPCSRX_CBUS_CNTL_BASE_IDX                                                                    2
+#define mmDPCSRX_REG_ERROR_STATUS                                                                      0x2c7c
+#define mmDPCSRX_REG_ERROR_STATUS_BASE_IDX                                                             2
+#define mmDPCSRX_RX_ERROR_STATUS                                                                       0x2c7d
+#define mmDPCSRX_RX_ERROR_STATUS_BASE_IDX                                                              2
+#define mmDPCSRX_INDEX_MODE_ADDR                                                                       0x2c80
+#define mmDPCSRX_INDEX_MODE_ADDR_BASE_IDX                                                              2
+#define mmDPCSRX_INDEX_MODE_DATA                                                                       0x2c81
+#define mmDPCSRX_INDEX_MODE_DATA_BASE_IDX                                                              2
+#define mmDPCSRX_DEBUG_CONFIG                                                                          0x2c82
+#define mmDPCSRX_DEBUG_CONFIG_BASE_IDX                                                                 2
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx4_dispdec
+// base address: 0xd80
+#define mmDPCSTX4_DPCSTX_TX_CLOCK_CNTL                                                                 0x2c88
+#define mmDPCSTX4_DPCSTX_TX_CLOCK_CNTL_BASE_IDX                                                        2
+#define mmDPCSTX4_DPCSTX_TX_CNTL                                                                       0x2c89
+#define mmDPCSTX4_DPCSTX_TX_CNTL_BASE_IDX                                                              2
+#define mmDPCSTX4_DPCSTX_CBUS_CNTL                                                                     0x2c8a
+#define mmDPCSTX4_DPCSTX_CBUS_CNTL_BASE_IDX                                                            2
+#define mmDPCSTX4_DPCSTX_INTERRUPT_CNTL                                                                0x2c8b
+#define mmDPCSTX4_DPCSTX_INTERRUPT_CNTL_BASE_IDX                                                       2
+#define mmDPCSTX4_DPCSTX_PLL_UPDATE_ADDR                                                               0x2c8c
+#define mmDPCSTX4_DPCSTX_PLL_UPDATE_ADDR_BASE_IDX                                                      2
+#define mmDPCSTX4_DPCSTX_PLL_UPDATE_DATA                                                               0x2c8d
+#define mmDPCSTX4_DPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                      2
+#define mmDPCSTX4_DPCSTX_DEBUG_CONFIG                                                                  0x2c8e
+#define mmDPCSTX4_DPCSTX_DEBUG_CONFIG_BASE_IDX                                                         2
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx4_dispdec
+// base address: 0xd80
+#define mmRDPCSTX4_RDPCSTX_CNTL                                                                        0x2c90
+#define mmRDPCSTX4_RDPCSTX_CNTL_BASE_IDX                                                               2
+#define mmRDPCSTX4_RDPCSTX_CLOCK_CNTL                                                                  0x2c91
+#define mmRDPCSTX4_RDPCSTX_CLOCK_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX4_RDPCSTX_INTERRUPT_CONTROL                                                           0x2c92
+#define mmRDPCSTX4_RDPCSTX_INTERRUPT_CONTROL_BASE_IDX                                                  2
+#define mmRDPCSTX4_RDPCSTX_PLL_UPDATE_DATA                                                             0x2c93
+#define mmRDPCSTX4_RDPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                    2
+#define mmRDPCSTX4_RDPCS_TX_CR_ADDR                                                                    0x2c94
+#define mmRDPCSTX4_RDPCS_TX_CR_ADDR_BASE_IDX                                                           2
+#define mmRDPCSTX4_RDPCS_TX_CR_DATA                                                                    0x2c95
+#define mmRDPCSTX4_RDPCS_TX_CR_DATA_BASE_IDX                                                           2
+#define mmRDPCSTX4_RDPCS_TX_SRAM_CNTL                                                                  0x2c96
+#define mmRDPCSTX4_RDPCS_TX_SRAM_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX4_RDPCSTX_MEM_POWER_CTRL                                                              0x2c97
+#define mmRDPCSTX4_RDPCSTX_MEM_POWER_CTRL_BASE_IDX                                                     2
+#define mmRDPCSTX4_RDPCSTX_MEM_POWER_CTRL2                                                             0x2c98
+#define mmRDPCSTX4_RDPCSTX_MEM_POWER_CTRL2_BASE_IDX                                                    2
+#define mmRDPCSTX4_RDPCSTX_SCRATCH                                                                     0x2c99
+#define mmRDPCSTX4_RDPCSTX_SCRATCH_BASE_IDX                                                            2
+#define mmRDPCSTX4_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG                                                    0x2c9c
+#define mmRDPCSTX4_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG_BASE_IDX                                           2
+#define mmRDPCSTX4_RDPCSTX_DEBUG_CONFIG                                                                0x2c9d
+#define mmRDPCSTX4_RDPCSTX_DEBUG_CONFIG_BASE_IDX                                                       2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL0                                                                   0x2ca0
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL0_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL1                                                                   0x2ca1
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL1_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL2                                                                   0x2ca2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL2_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL3                                                                   0x2ca3
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL3_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL4                                                                   0x2ca4
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL4_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL5                                                                   0x2ca5
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL5_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL6                                                                   0x2ca6
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL6_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL7                                                                   0x2ca7
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL7_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL8                                                                   0x2ca8
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL8_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL9                                                                   0x2ca9
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL9_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL10                                                                  0x2caa
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL10_BASE_IDX                                                         2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL11                                                                  0x2cab
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL11_BASE_IDX                                                         2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL12                                                                  0x2cac
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL12_BASE_IDX                                                         2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL13                                                                  0x2cad
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL13_BASE_IDX                                                         2
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL14                                                                  0x2cae
+#define mmRDPCSTX4_RDPCSTX_PHY_CNTL14_BASE_IDX                                                         2
+#define mmRDPCSTX4_RDPCSTX_PHY_FUSE0                                                                   0x2caf
+#define mmRDPCSTX4_RDPCSTX_PHY_FUSE0_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_FUSE1                                                                   0x2cb0
+#define mmRDPCSTX4_RDPCSTX_PHY_FUSE1_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_FUSE2                                                                   0x2cb1
+#define mmRDPCSTX4_RDPCSTX_PHY_FUSE2_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_FUSE3                                                                   0x2cb2
+#define mmRDPCSTX4_RDPCSTX_PHY_FUSE3_BASE_IDX                                                          2
+#define mmRDPCSTX4_RDPCSTX_PHY_RX_LD_VAL                                                               0x2cb3
+#define mmRDPCSTX4_RDPCSTX_PHY_RX_LD_VAL_BASE_IDX                                                      2
+#define mmRDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3                                                        0x2cb4
+#define mmRDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3_BASE_IDX                                               2
+#define mmRDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6                                                        0x2cb5
+#define mmRDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6_BASE_IDX                                               2
+#define mmRDPCSTX4_RDPCSTX_DPALT_CONTROL_REG                                                           0x2cb6
+#define mmRDPCSTX4_RDPCSTX_DPALT_CONTROL_REG_BASE_IDX                                                  2
+
+
+// addressBlock: dpcssys_dpcssys_cr4_dispdec
+// base address: 0xd80
+#define mmDPCSSYS_CR4_DPCSSYS_CR_ADDR                                                                  0x2c94
+#define mmDPCSSYS_CR4_DPCSSYS_CR_ADDR_BASE_IDX                                                         2
+#define mmDPCSSYS_CR4_DPCSSYS_CR_DATA                                                                  0x2c95
+#define mmDPCSSYS_CR4_DPCSSYS_CR_DATA_BASE_IDX                                                         2
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx5_dispdec
+// base address: 0x10e0
+#define mmDPCSTX5_DPCSTX_TX_CLOCK_CNTL                                                                 0x2d60
+#define mmDPCSTX5_DPCSTX_TX_CLOCK_CNTL_BASE_IDX                                                        2
+#define mmDPCSTX5_DPCSTX_TX_CNTL                                                                       0x2d61
+#define mmDPCSTX5_DPCSTX_TX_CNTL_BASE_IDX                                                              2
+#define mmDPCSTX5_DPCSTX_CBUS_CNTL                                                                     0x2d62
+#define mmDPCSTX5_DPCSTX_CBUS_CNTL_BASE_IDX                                                            2
+#define mmDPCSTX5_DPCSTX_INTERRUPT_CNTL                                                                0x2d63
+#define mmDPCSTX5_DPCSTX_INTERRUPT_CNTL_BASE_IDX                                                       2
+#define mmDPCSTX5_DPCSTX_PLL_UPDATE_ADDR                                                               0x2d64
+#define mmDPCSTX5_DPCSTX_PLL_UPDATE_ADDR_BASE_IDX                                                      2
+#define mmDPCSTX5_DPCSTX_PLL_UPDATE_DATA                                                               0x2d65
+#define mmDPCSTX5_DPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                      2
+#define mmDPCSTX5_DPCSTX_DEBUG_CONFIG                                                                  0x2d66
+#define mmDPCSTX5_DPCSTX_DEBUG_CONFIG_BASE_IDX                                                         2
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx5_dispdec
+// base address: 0x10e0
+#define mmRDPCSTX5_RDPCSTX_CNTL                                                                        0x2d68
+#define mmRDPCSTX5_RDPCSTX_CNTL_BASE_IDX                                                               2
+#define mmRDPCSTX5_RDPCSTX_CLOCK_CNTL                                                                  0x2d69
+#define mmRDPCSTX5_RDPCSTX_CLOCK_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX5_RDPCSTX_INTERRUPT_CONTROL                                                           0x2d6a
+#define mmRDPCSTX5_RDPCSTX_INTERRUPT_CONTROL_BASE_IDX                                                  2
+#define mmRDPCSTX5_RDPCSTX_PLL_UPDATE_DATA                                                             0x2d6b
+#define mmRDPCSTX5_RDPCSTX_PLL_UPDATE_DATA_BASE_IDX                                                    2
+#define mmRDPCSTX5_RDPCS_TX_CR_ADDR                                                                    0x2d6c
+#define mmRDPCSTX5_RDPCS_TX_CR_ADDR_BASE_IDX                                                           2
+#define mmRDPCSTX5_RDPCS_TX_CR_DATA                                                                    0x2d6d
+#define mmRDPCSTX5_RDPCS_TX_CR_DATA_BASE_IDX                                                           2
+#define mmRDPCSTX5_RDPCS_TX_SRAM_CNTL                                                                  0x2d6e
+#define mmRDPCSTX5_RDPCS_TX_SRAM_CNTL_BASE_IDX                                                         2
+#define mmRDPCSTX5_RDPCSTX_MEM_POWER_CTRL                                                              0x2d6f
+#define mmRDPCSTX5_RDPCSTX_MEM_POWER_CTRL_BASE_IDX                                                     2
+#define mmRDPCSTX5_RDPCSTX_MEM_POWER_CTRL2                                                             0x2d70
+#define mmRDPCSTX5_RDPCSTX_MEM_POWER_CTRL2_BASE_IDX                                                    2
+#define mmRDPCSTX5_RDPCSTX_SCRATCH                                                                     0x2d71
+#define mmRDPCSTX5_RDPCSTX_SCRATCH_BASE_IDX                                                            2
+#define mmRDPCSTX5_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG                                                    0x2d74
+#define mmRDPCSTX5_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG_BASE_IDX                                           2
+#define mmRDPCSTX5_RDPCSTX_DEBUG_CONFIG                                                                0x2d75
+#define mmRDPCSTX5_RDPCSTX_DEBUG_CONFIG_BASE_IDX                                                       2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL0                                                                   0x2d78
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL0_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL1                                                                   0x2d79
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL1_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL2                                                                   0x2d7a
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL2_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL3                                                                   0x2d7b
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL3_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL4                                                                   0x2d7c
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL4_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL5                                                                   0x2d7d
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL5_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL6                                                                   0x2d7e
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL6_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL7                                                                   0x2d7f
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL7_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL8                                                                   0x2d80
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL8_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL9                                                                   0x2d81
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL9_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL10                                                                  0x2d82
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL10_BASE_IDX                                                         2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL11                                                                  0x2d83
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL11_BASE_IDX                                                         2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL12                                                                  0x2d84
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL12_BASE_IDX                                                         2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL13                                                                  0x2d85
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL13_BASE_IDX                                                         2
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL14                                                                  0x2d86
+#define mmRDPCSTX5_RDPCSTX_PHY_CNTL14_BASE_IDX                                                         2
+#define mmRDPCSTX5_RDPCSTX_PHY_FUSE0                                                                   0x2d87
+#define mmRDPCSTX5_RDPCSTX_PHY_FUSE0_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_FUSE1                                                                   0x2d88
+#define mmRDPCSTX5_RDPCSTX_PHY_FUSE1_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_FUSE2                                                                   0x2d89
+#define mmRDPCSTX5_RDPCSTX_PHY_FUSE2_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_FUSE3                                                                   0x2d8a
+#define mmRDPCSTX5_RDPCSTX_PHY_FUSE3_BASE_IDX                                                          2
+#define mmRDPCSTX5_RDPCSTX_PHY_RX_LD_VAL                                                               0x2d8b
+#define mmRDPCSTX5_RDPCSTX_PHY_RX_LD_VAL_BASE_IDX                                                      2
+#define mmRDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3                                                        0x2d8c
+#define mmRDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3_BASE_IDX                                               2
+#define mmRDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6                                                        0x2d8d
+#define mmRDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6_BASE_IDX                                               2
+#define mmRDPCSTX5_RDPCSTX_DPALT_CONTROL_REG                                                           0x2d8e
+#define mmRDPCSTX5_RDPCSTX_DPALT_CONTROL_REG_BASE_IDX                                                  2
+
+
+// addressBlock: dpcssys_dpcssys_cr5_dispdec
+// base address: 0x10e0
+#define mmDPCSSYS_CR5_DPCSSYS_CR_ADDR                                                                  0x2d6c
+#define mmDPCSSYS_CR5_DPCSSYS_CR_ADDR_BASE_IDX                                                         2
+#define mmDPCSSYS_CR5_DPCSSYS_CR_DATA                                                                  0x2d6d
+#define mmDPCSSYS_CR5_DPCSSYS_CR_DATA_BASE_IDX                                                         2
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_0_sh_mask.h
new file mode 100644
index 000000000000..25e0569e05c8
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_0_sh_mask.h
@@ -0,0 +1,3912 @@
+/*
+ * Copyright (C) 2019  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _dpcs_2_0_0_SH_MASK_HEADER
+#define _dpcs_2_0_0_SH_MASK_HEADER
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx0_dispdec
+//DPCSTX0_DPCSTX_TX_CLOCK_CNTL
+#define DPCSTX0_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS__SHIFT                                             0x0
+#define DPCSTX0_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN__SHIFT                                                   0x1
+#define DPCSTX0_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON__SHIFT                                             0x2
+#define DPCSTX0_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0x3
+#define DPCSTX0_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS_MASK                                               0x00000001L
+#define DPCSTX0_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN_MASK                                                     0x00000002L
+#define DPCSTX0_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON_MASK                                               0x00000004L
+#define DPCSTX0_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000008L
+//DPCSTX0_DPCSTX_TX_CNTL
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ__SHIFT                                                 0xc
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING__SHIFT                                             0xd
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP__SHIFT                                                      0xe
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT__SHIFT                                              0xf
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ_MASK                                                   0x00001000L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING_MASK                                               0x00002000L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP_MASK                                                        0x00004000L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT_MASK                                                0x00008000L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//DPCSTX0_DPCSTX_CBUS_CNTL
+#define DPCSTX0_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY__SHIFT                                               0x0
+#define DPCSTX0_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET__SHIFT                                                 0x1f
+#define DPCSTX0_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY_MASK                                                 0x000000FFL
+#define DPCSTX0_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET_MASK                                                   0x80000000L
+//DPCSTX0_DPCSTX_INTERRUPT_CNTL
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW__SHIFT                                          0x0
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR__SHIFT                                              0x1
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK__SHIFT                                        0x4
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR__SHIFT                                             0x8
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR__SHIFT                                             0x9
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR__SHIFT                                             0xa
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR__SHIFT                                             0xb
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR__SHIFT                                               0xc
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK__SHIFT                                         0x10
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK__SHIFT                                             0x14
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW_MASK                                            0x00000001L
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR_MASK                                                0x00000002L
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK_MASK                                          0x00000010L
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR_MASK                                               0x00000100L
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR_MASK                                               0x00000200L
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR_MASK                                               0x00000400L
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR_MASK                                               0x00000800L
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR_MASK                                                 0x00001000L
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK_MASK                                           0x00010000L
+#define DPCSTX0_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK_MASK                                               0x00100000L
+//DPCSTX0_DPCSTX_PLL_UPDATE_ADDR
+#define DPCSTX0_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR__SHIFT                                           0x0
+#define DPCSTX0_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR_MASK                                             0x0003FFFFL
+//DPCSTX0_DPCSTX_PLL_UPDATE_DATA
+#define DPCSTX0_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA__SHIFT                                           0x0
+#define DPCSTX0_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA_MASK                                             0xFFFFFFFFL
+//DPCSTX0_DPCSTX_DEBUG_CONFIG
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN__SHIFT                                                       0x0
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL__SHIFT                                               0x1
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL__SHIFT                                            0x4
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL__SHIFT                                       0x8
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS__SHIFT                                                 0xe
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN__SHIFT                                          0x10
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN_MASK                                                         0x00000001L
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL_MASK                                                 0x0000000EL
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL_MASK                                              0x00000070L
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL_MASK                                         0x00000700L
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS_MASK                                                   0x00004000L
+#define DPCSTX0_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN_MASK                                            0x00010000L
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx0_dispdec
+//RDPCSTX0_RDPCSTX_CNTL
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET__SHIFT                                                   0x0
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET__SHIFT                                                   0x4
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN__SHIFT                                                  0xc
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN__SHIFT                                                  0xd
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN__SHIFT                                                  0xe
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN__SHIFT                                                  0xf
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN__SHIFT                                              0x18
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN__SHIFT                                       0x19
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS__SHIFT                                                0x1a
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET_MASK                                                     0x00000001L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET_MASK                                                     0x00000010L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN_MASK                                                    0x00001000L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN_MASK                                                    0x00002000L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN_MASK                                                    0x00004000L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN_MASK                                                    0x00008000L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN_MASK                                                0x01000000L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN_MASK                                         0x02000000L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS_MASK                                                  0x04000000L
+#define RDPCSTX0_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//RDPCSTX0_RDPCSTX_CLOCK_CNTL
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN__SHIFT                                               0x0
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN__SHIFT                                          0x4
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN__SHIFT                                          0x5
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN__SHIFT                                          0x6
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN__SHIFT                                          0x7
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS__SHIFT                                        0x8
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN__SHIFT                                              0x9
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0xa
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS__SHIFT                                            0xc
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN__SHIFT                                                  0xd
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON__SHIFT                                            0xe
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS__SHIFT                                              0x10
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN_MASK                                                 0x00000001L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN_MASK                                            0x00000010L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN_MASK                                            0x00000020L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN_MASK                                            0x00000040L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN_MASK                                            0x00000080L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS_MASK                                          0x00000100L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN_MASK                                                0x00000200L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000400L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS_MASK                                              0x00001000L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN_MASK                                                    0x00002000L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON_MASK                                              0x00004000L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS_MASK                                                0x00010000L
+//RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW__SHIFT                                    0x0
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE__SHIFT                                 0x1
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE__SHIFT                                   0x2
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR__SHIFT                                       0x4
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR__SHIFT                                       0x5
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR__SHIFT                                       0x6
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR__SHIFT                                       0x7
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR__SHIFT                                        0x8
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR__SHIFT                             0x9
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR__SHIFT                               0xa
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR__SHIFT                                         0xc
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK__SHIFT                                  0x10
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK__SHIFT                            0x11
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK__SHIFT                              0x12
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK__SHIFT                                   0x14
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW_MASK                                      0x00000001L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK                                   0x00000002L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK                                     0x00000004L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR_MASK                                         0x00000010L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR_MASK                                         0x00000020L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR_MASK                                         0x00000040L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR_MASK                                         0x00000080L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR_MASK                                          0x00000100L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR_MASK                               0x00000200L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR_MASK                                 0x00000400L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR_MASK                                           0x00001000L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK_MASK                                    0x00010000L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK_MASK                              0x00020000L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK_MASK                                0x00040000L
+#define RDPCSTX0_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK_MASK                                     0x00100000L
+//RDPCSTX0_RDPCSTX_PLL_UPDATE_DATA
+#define RDPCSTX0_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA__SHIFT                                        0x0
+#define RDPCSTX0_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA_MASK                                          0x00000001L
+//RDPCSTX0_RDPCS_TX_CR_ADDR
+#define RDPCSTX0_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                    0x0
+#define RDPCSTX0_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                      0x0000FFFFL
+//RDPCSTX0_RDPCS_TX_CR_DATA
+#define RDPCSTX0_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                    0x0
+#define RDPCSTX0_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                      0x0000FFFFL
+//RDPCSTX0_RDPCS_TX_SRAM_CNTL
+#define RDPCSTX0_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS__SHIFT                                                 0x14
+#define RDPCSTX0_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE__SHIFT                                               0x18
+#define RDPCSTX0_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE__SHIFT                                           0x1c
+#define RDPCSTX0_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS_MASK                                                   0x00100000L
+#define RDPCSTX0_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE_MASK                                                 0x03000000L
+#define RDPCSTX0_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE_MASK                                             0x30000000L
+//RDPCSTX0_RDPCSTX_MEM_POWER_CTRL
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES__SHIFT                                           0x0
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES__SHIFT                                    0xc
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1__SHIFT                                  0x1a
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2__SHIFT                                  0x1b
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1__SHIFT                                   0x1c
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2__SHIFT                                   0x1d
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM__SHIFT                                         0x1e
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES_MASK                                             0x00000FFFL
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES_MASK                                      0x03FFF000L
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1_MASK                                    0x04000000L
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2_MASK                                    0x08000000L
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1_MASK                                     0x10000000L
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2_MASK                                     0x20000000L
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM_MASK                                           0x40000000L
+//RDPCSTX0_RDPCSTX_MEM_POWER_CTRL2
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF__SHIFT                                    0x0
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO__SHIFT                                    0x2
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF_MASK                                      0x00000003L
+#define RDPCSTX0_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO_MASK                                      0x00000004L
+//RDPCSTX0_RDPCSTX_SCRATCH
+#define RDPCSTX0_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH__SHIFT                                                      0x0
+#define RDPCSTX0_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH_MASK                                                        0xFFFFFFFFL
+//RDPCSTX0_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG__SHIFT                      0x0
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS__SHIFT              0x4
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE__SHIFT                      0x8
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG_MASK                        0x00000001L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS_MASK                0x00000010L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE_MASK                        0x0000FF00L
+//RDPCSTX0_RDPCSTX_DEBUG_CONFIG
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN__SHIFT                                                    0x0
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT__SHIFT                                        0x4
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP__SHIFT                                        0x7
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK__SHIFT                                          0x8
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE__SHIFT                                       0xf
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX__SHIFT                                          0x10
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT__SHIFT                                              0x18
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN_MASK                                                      0x00000001L
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT_MASK                                          0x00000070L
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP_MASK                                          0x00000080L
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK_MASK                                            0x00001F00L
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE_MASK                                         0x00008000L
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX_MASK                                            0x00FF0000L
+#define RDPCSTX0_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MASK                                                0xFF000000L
+//RDPCSTX0_RDPCSTX_PHY_CNTL0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET__SHIFT                                                    0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET__SHIFT                                            0x1
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N__SHIFT                                          0x2
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN__SHIFT                                           0x3
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT__SHIFT                                                  0x4
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE__SHIFT                                          0x8
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE__SHIFT                                                0x9
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL__SHIFT                                            0xe
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ__SHIFT                                                0x11
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK__SHIFT                                                0x12
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL__SHIFT                                              0x14
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL__SHIFT                                               0x15
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN__SHIFT                                            0x18
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT__SHIFT                                        0x19
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE__SHIFT                                               0x1c
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE__SHIFT                                             0x1d
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS__SHIFT                                                  0x1f
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET_MASK                                                      0x00000001L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET_MASK                                              0x00000002L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N_MASK                                            0x00000004L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN_MASK                                             0x00000008L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT_MASK                                                    0x00000030L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE_MASK                                            0x00000100L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE_MASK                                                  0x00003E00L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL_MASK                                              0x0001C000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ_MASK                                                  0x00020000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK_MASK                                                  0x00040000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL_MASK                                                0x00100000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL_MASK                                                 0x00200000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN_MASK                                              0x01000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT_MASK                                          0x02000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE_MASK                                                 0x10000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE_MASK                                               0x20000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS_MASK                                                    0x80000000L
+//RDPCSTX0_RDPCSTX_PHY_CNTL1
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN__SHIFT                                               0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN__SHIFT                                               0x1
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE__SHIFT                                           0x2
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN__SHIFT                                               0x3
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE__SHIFT                                           0x4
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET__SHIFT                                              0x5
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN__SHIFT                                               0x6
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE__SHIFT                                           0x7
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN_MASK                                                 0x00000001L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN_MASK                                                 0x00000002L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE_MASK                                             0x00000004L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN_MASK                                                 0x00000008L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE_MASK                                             0x00000010L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET_MASK                                                0x00000020L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN_MASK                                                 0x00000040L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE_MASK                                             0x00000080L
+//RDPCSTX0_RDPCSTX_PHY_CNTL2
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR__SHIFT                                                  0x3
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN__SHIFT                                 0x4
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN__SHIFT                                 0x5
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN__SHIFT                                 0x6
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN__SHIFT                                 0x7
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN__SHIFT                                 0x8
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN__SHIFT                                 0x9
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN__SHIFT                                 0xa
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN__SHIFT                                 0xb
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR_MASK                                                    0x00000008L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN_MASK                                   0x00000010L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN_MASK                                   0x00000020L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN_MASK                                   0x00000040L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN_MASK                                   0x00000080L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN_MASK                                   0x00000100L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN_MASK                                   0x00000200L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN_MASK                                   0x00000400L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN_MASK                                   0x00000800L
+//RDPCSTX0_RDPCSTX_PHY_CNTL3
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET__SHIFT                                             0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE__SHIFT                                           0x1
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY__SHIFT                                           0x2
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN__SHIFT                                           0x3
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ__SHIFT                                               0x4
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK__SHIFT                                               0x5
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET__SHIFT                                             0x8
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE__SHIFT                                           0x9
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY__SHIFT                                           0xa
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN__SHIFT                                           0xb
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ__SHIFT                                               0xc
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK__SHIFT                                               0xd
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET__SHIFT                                             0x10
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE__SHIFT                                           0x11
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY__SHIFT                                           0x12
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN__SHIFT                                           0x13
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ__SHIFT                                               0x14
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK__SHIFT                                               0x15
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET__SHIFT                                             0x18
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE__SHIFT                                           0x19
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY__SHIFT                                           0x1a
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN__SHIFT                                           0x1b
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ__SHIFT                                               0x1c
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK__SHIFT                                               0x1d
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_MASK                                               0x00000001L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_MASK                                             0x00000002L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_MASK                                             0x00000004L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_MASK                                             0x00000008L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_MASK                                                 0x00000010L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_MASK                                                 0x00000020L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_MASK                                               0x00000100L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_MASK                                             0x00000200L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_MASK                                             0x00000400L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_MASK                                             0x00000800L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_MASK                                                 0x00001000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_MASK                                                 0x00002000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_MASK                                               0x00010000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_MASK                                             0x00020000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_MASK                                             0x00040000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_MASK                                             0x00080000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_MASK                                                 0x00100000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_MASK                                                 0x00200000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_MASK                                               0x01000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_MASK                                             0x02000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_MASK                                             0x04000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_MASK                                             0x08000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_MASK                                                 0x10000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_MASK                                                 0x20000000L
+//RDPCSTX0_RDPCSTX_PHY_CNTL4
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL__SHIFT                                         0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT__SHIFT                                            0x4
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC__SHIFT                                    0x6
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN__SHIFT                                        0x7
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL__SHIFT                                         0x8
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT__SHIFT                                            0xc
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC__SHIFT                                    0xe
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN__SHIFT                                        0xf
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL__SHIFT                                         0x10
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT__SHIFT                                            0x14
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC__SHIFT                                    0x16
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN__SHIFT                                        0x17
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL__SHIFT                                         0x18
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT__SHIFT                                            0x1c
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC__SHIFT                                    0x1e
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN__SHIFT                                        0x1f
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL_MASK                                           0x00000007L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT_MASK                                              0x00000010L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC_MASK                                      0x00000040L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN_MASK                                          0x00000080L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL_MASK                                           0x00000700L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT_MASK                                              0x00001000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC_MASK                                      0x00004000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN_MASK                                          0x00008000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL_MASK                                           0x00070000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT_MASK                                              0x00100000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC_MASK                                      0x00400000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN_MASK                                          0x00800000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL_MASK                                           0x07000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT_MASK                                              0x10000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC_MASK                                      0x40000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN_MASK                                          0x80000000L
+//RDPCSTX0_RDPCSTX_PHY_CNTL5
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD__SHIFT                                               0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE__SHIFT                                              0x1
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH__SHIFT                                             0x4
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ__SHIFT                                         0x6
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT__SHIFT                                      0x7
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD__SHIFT                                               0x8
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE__SHIFT                                              0x9
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH__SHIFT                                             0xc
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ__SHIFT                                         0xe
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT__SHIFT                                      0xf
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD__SHIFT                                               0x10
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE__SHIFT                                              0x11
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH__SHIFT                                             0x14
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ__SHIFT                                         0x16
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT__SHIFT                                      0x17
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD__SHIFT                                               0x18
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE__SHIFT                                              0x19
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH__SHIFT                                             0x1c
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ__SHIFT                                         0x1e
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT__SHIFT                                      0x1f
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD_MASK                                                 0x00000001L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE_MASK                                                0x0000000EL
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH_MASK                                               0x00000030L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ_MASK                                           0x00000040L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT_MASK                                        0x00000080L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD_MASK                                                 0x00000100L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE_MASK                                                0x00000E00L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH_MASK                                               0x00003000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ_MASK                                           0x00004000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT_MASK                                        0x00008000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD_MASK                                                 0x00010000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE_MASK                                                0x000E0000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH_MASK                                               0x00300000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ_MASK                                           0x00400000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT_MASK                                        0x00800000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD_MASK                                                 0x01000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE_MASK                                                0x0E000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH_MASK                                               0x30000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ_MASK                                           0x40000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT_MASK                                        0x80000000L
+//RDPCSTX0_RDPCSTX_PHY_CNTL6
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE__SHIFT                                            0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN__SHIFT                                           0x2
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE__SHIFT                                            0x4
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN__SHIFT                                           0x6
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE__SHIFT                                            0x8
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN__SHIFT                                           0xa
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE__SHIFT                                            0xc
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN__SHIFT                                           0xe
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4__SHIFT                                                0x10
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE__SHIFT                                            0x11
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK__SHIFT                                        0x12
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN__SHIFT                                            0x13
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ__SHIFT                                           0x14
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_MASK                                              0x00000003L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_MASK                                             0x00000004L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_MASK                                              0x00000030L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_MASK                                             0x00000040L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_MASK                                              0x00000300L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_MASK                                             0x00000400L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_MASK                                              0x00003000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_MASK                                             0x00004000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_MASK                                                  0x00010000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_MASK                                              0x00020000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_MASK                                          0x00040000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_MASK                                              0x00080000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_MASK                                             0x00100000L
+//RDPCSTX0_RDPCSTX_PHY_CNTL7
+#define RDPCSTX0_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN__SHIFT                                       0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT__SHIFT                                      0x10
+#define RDPCSTX0_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN_MASK                                         0x0000FFFFL
+#define RDPCSTX0_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT_MASK                                        0xFFFF0000L
+//RDPCSTX0_RDPCSTX_PHY_CNTL8
+#define RDPCSTX0_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK__SHIFT                                        0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK_MASK                                          0x000FFFFFL
+//RDPCSTX0_RDPCSTX_PHY_CNTL9
+#define RDPCSTX0_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE__SHIFT                                    0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD__SHIFT                                   0x18
+#define RDPCSTX0_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE_MASK                                      0x001FFFFFL
+#define RDPCSTX0_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD_MASK                                     0x01000000L
+//RDPCSTX0_RDPCSTX_PHY_CNTL10
+#define RDPCSTX0_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM__SHIFT                                      0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM_MASK                                        0x0000FFFFL
+//RDPCSTX0_RDPCSTX_PHY_CNTL11
+#define RDPCSTX0_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER__SHIFT                                     0x4
+#define RDPCSTX0_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV__SHIFT                                     0x10
+#define RDPCSTX0_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV__SHIFT                                    0x14
+#define RDPCSTX0_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV__SHIFT                           0x18
+#define RDPCSTX0_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER_MASK                                       0x0000FFF0L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV_MASK                                       0x00070000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV_MASK                                      0x00700000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV_MASK                             0x03000000L
+//RDPCSTX0_RDPCSTX_PHY_CNTL12
+#define RDPCSTX0_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN__SHIFT                                    0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN__SHIFT                                   0x2
+#define RDPCSTX0_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV__SHIFT                                     0x4
+#define RDPCSTX0_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE__SHIFT                                          0x7
+#define RDPCSTX0_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN__SHIFT                                         0x8
+#define RDPCSTX0_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN_MASK                                      0x00000001L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN_MASK                                     0x00000004L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV_MASK                                       0x00000070L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE_MASK                                            0x00000080L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN_MASK                                           0x00000100L
+//RDPCSTX0_RDPCSTX_PHY_CNTL13
+#define RDPCSTX0_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER__SHIFT                                 0x14
+#define RDPCSTX0_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN__SHIFT                                     0x1c
+#define RDPCSTX0_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN__SHIFT                                       0x1d
+#define RDPCSTX0_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE__SHIFT                               0x1e
+#define RDPCSTX0_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER_MASK                                   0x0FF00000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN_MASK                                       0x10000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN_MASK                                         0x20000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE_MASK                                 0x40000000L
+//RDPCSTX0_RDPCSTX_PHY_CNTL14
+#define RDPCSTX0_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE__SHIFT                                      0x0
+#define RDPCSTX0_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN__SHIFT                                       0x18
+#define RDPCSTX0_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN__SHIFT                                        0x1c
+#define RDPCSTX0_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE_MASK                                        0x00000001L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN_MASK                                         0x01000000L
+#define RDPCSTX0_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN_MASK                                          0x10000000L
+//RDPCSTX0_RDPCSTX_PHY_FUSE0
+#define RDPCSTX0_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX0_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX0_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX0_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I__SHIFT                                             0x12
+#define RDPCSTX0_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO__SHIFT                                        0x14
+#define RDPCSTX0_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX0_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX0_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX0_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I_MASK                                               0x000C0000L
+#define RDPCSTX0_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO_MASK                                          0x00300000L
+//RDPCSTX0_RDPCSTX_PHY_FUSE1
+#define RDPCSTX0_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX0_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX0_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX0_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT__SHIFT                                          0x12
+#define RDPCSTX0_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP__SHIFT                                         0x19
+#define RDPCSTX0_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX0_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX0_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX0_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT_MASK                                            0x01FC0000L
+#define RDPCSTX0_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP_MASK                                           0xFE000000L
+//RDPCSTX0_RDPCSTX_PHY_FUSE2
+#define RDPCSTX0_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX0_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX0_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX0_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX0_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX0_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST_MASK                                             0x0003F000L
+//RDPCSTX0_RDPCSTX_PHY_FUSE3
+#define RDPCSTX0_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX0_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX0_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX0_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE__SHIFT                                             0x12
+#define RDPCSTX0_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE__SHIFT                                                0x18
+#define RDPCSTX0_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX0_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX0_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX0_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE_MASK                                               0x00FC0000L
+#define RDPCSTX0_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE_MASK                                                  0x03000000L
+//RDPCSTX0_RDPCSTX_PHY_RX_LD_VAL
+#define RDPCSTX0_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL__SHIFT                                        0x0
+#define RDPCSTX0_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL__SHIFT                                        0x8
+#define RDPCSTX0_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL_MASK                                          0x0000007FL
+#define RDPCSTX0_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL_MASK                                          0x001FFF00L
+//RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED__SHIFT                         0x0
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED__SHIFT                       0x1
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED__SHIFT                       0x2
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED__SHIFT                       0x3
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED__SHIFT                           0x4
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED__SHIFT                           0x5
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED__SHIFT                         0x8
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED__SHIFT                       0x9
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED__SHIFT                       0xa
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED__SHIFT                       0xb
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED__SHIFT                           0xc
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED__SHIFT                           0xd
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED__SHIFT                         0x10
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED__SHIFT                       0x11
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED__SHIFT                       0x12
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED__SHIFT                       0x13
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED__SHIFT                           0x14
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED__SHIFT                           0x15
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED__SHIFT                         0x18
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED__SHIFT                       0x19
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED__SHIFT                       0x1a
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED__SHIFT                       0x1b
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED__SHIFT                           0x1c
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED__SHIFT                           0x1d
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED_MASK                           0x00000001L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED_MASK                         0x00000002L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED_MASK                         0x00000004L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED_MASK                         0x00000008L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED_MASK                             0x00000010L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED_MASK                             0x00000020L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED_MASK                           0x00000100L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED_MASK                         0x00000200L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED_MASK                         0x00000400L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED_MASK                         0x00000800L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED_MASK                             0x00001000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED_MASK                             0x00002000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED_MASK                           0x00010000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED_MASK                         0x00020000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED_MASK                         0x00040000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED_MASK                         0x00080000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED_MASK                             0x00100000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED_MASK                             0x00200000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED_MASK                           0x01000000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED_MASK                         0x02000000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED_MASK                         0x04000000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED_MASK                         0x08000000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED_MASK                             0x10000000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED_MASK                             0x20000000L
+//RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED__SHIFT                        0x0
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED__SHIFT                       0x2
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED__SHIFT                        0x4
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED__SHIFT                       0x6
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED__SHIFT                        0x8
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED__SHIFT                       0xa
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED__SHIFT                        0xc
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED__SHIFT                       0xe
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED__SHIFT                            0x10
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED__SHIFT                        0x11
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED__SHIFT                    0x12
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED__SHIFT                        0x13
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED__SHIFT                       0x14
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED_MASK                          0x00000003L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED_MASK                         0x00000004L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED_MASK                          0x00000030L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED_MASK                         0x00000040L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED_MASK                          0x00000300L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED_MASK                         0x00000400L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED_MASK                          0x00003000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED_MASK                         0x00004000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED_MASK                              0x00010000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED_MASK                          0x00020000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED_MASK                      0x00040000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED_MASK                          0x00080000L
+#define RDPCSTX0_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED_MASK                         0x00100000L
+//RDPCSTX0_RDPCSTX_DPALT_CONTROL_REG
+#define RDPCSTX0_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS__SHIFT                                  0x0
+#define RDPCSTX0_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED__SHIFT                                0x4
+#define RDPCSTX0_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE__SHIFT                                  0x8
+#define RDPCSTX0_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS_MASK                                    0x00000001L
+#define RDPCSTX0_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED_MASK                                  0x00000010L
+#define RDPCSTX0_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE_MASK                                    0x0000FF00L
+
+
+// addressBlock: dpcssys_dpcssys_cr0_dispdec
+//DPCSSYS_CR0_DPCSSYS_CR_ADDR
+#define DPCSSYS_CR0_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                  0x0
+#define DPCSSYS_CR0_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                    0x0000FFFFL
+//DPCSSYS_CR0_DPCSSYS_CR_DATA
+#define DPCSSYS_CR0_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                  0x0
+#define DPCSSYS_CR0_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                    0x0000FFFFL
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx1_dispdec
+//DPCSTX1_DPCSTX_TX_CLOCK_CNTL
+#define DPCSTX1_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS__SHIFT                                             0x0
+#define DPCSTX1_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN__SHIFT                                                   0x1
+#define DPCSTX1_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON__SHIFT                                             0x2
+#define DPCSTX1_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0x3
+#define DPCSTX1_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS_MASK                                               0x00000001L
+#define DPCSTX1_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN_MASK                                                     0x00000002L
+#define DPCSTX1_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON_MASK                                               0x00000004L
+#define DPCSTX1_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000008L
+//DPCSTX1_DPCSTX_TX_CNTL
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ__SHIFT                                                 0xc
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING__SHIFT                                             0xd
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP__SHIFT                                                      0xe
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT__SHIFT                                              0xf
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ_MASK                                                   0x00001000L
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING_MASK                                               0x00002000L
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP_MASK                                                        0x00004000L
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT_MASK                                                0x00008000L
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define DPCSTX1_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//DPCSTX1_DPCSTX_CBUS_CNTL
+#define DPCSTX1_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY__SHIFT                                               0x0
+#define DPCSTX1_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET__SHIFT                                                 0x1f
+#define DPCSTX1_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY_MASK                                                 0x000000FFL
+#define DPCSTX1_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET_MASK                                                   0x80000000L
+//DPCSTX1_DPCSTX_INTERRUPT_CNTL
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW__SHIFT                                          0x0
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR__SHIFT                                              0x1
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK__SHIFT                                        0x4
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR__SHIFT                                             0x8
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR__SHIFT                                             0x9
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR__SHIFT                                             0xa
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR__SHIFT                                             0xb
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR__SHIFT                                               0xc
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK__SHIFT                                         0x10
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK__SHIFT                                             0x14
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW_MASK                                            0x00000001L
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR_MASK                                                0x00000002L
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK_MASK                                          0x00000010L
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR_MASK                                               0x00000100L
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR_MASK                                               0x00000200L
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR_MASK                                               0x00000400L
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR_MASK                                               0x00000800L
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR_MASK                                                 0x00001000L
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK_MASK                                           0x00010000L
+#define DPCSTX1_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK_MASK                                               0x00100000L
+//DPCSTX1_DPCSTX_PLL_UPDATE_ADDR
+#define DPCSTX1_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR__SHIFT                                           0x0
+#define DPCSTX1_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR_MASK                                             0x0003FFFFL
+//DPCSTX1_DPCSTX_PLL_UPDATE_DATA
+#define DPCSTX1_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA__SHIFT                                           0x0
+#define DPCSTX1_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA_MASK                                             0xFFFFFFFFL
+//DPCSTX1_DPCSTX_DEBUG_CONFIG
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN__SHIFT                                                       0x0
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL__SHIFT                                               0x1
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL__SHIFT                                            0x4
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL__SHIFT                                       0x8
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS__SHIFT                                                 0xe
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN__SHIFT                                          0x10
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN_MASK                                                         0x00000001L
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL_MASK                                                 0x0000000EL
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL_MASK                                              0x00000070L
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL_MASK                                         0x00000700L
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS_MASK                                                   0x00004000L
+#define DPCSTX1_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN_MASK                                            0x00010000L
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx1_dispdec
+//RDPCSTX1_RDPCSTX_CNTL
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET__SHIFT                                                   0x0
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET__SHIFT                                                   0x4
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN__SHIFT                                                  0xc
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN__SHIFT                                                  0xd
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN__SHIFT                                                  0xe
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN__SHIFT                                                  0xf
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN__SHIFT                                              0x18
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN__SHIFT                                       0x19
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS__SHIFT                                                0x1a
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET_MASK                                                     0x00000001L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET_MASK                                                     0x00000010L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN_MASK                                                    0x00001000L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN_MASK                                                    0x00002000L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN_MASK                                                    0x00004000L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN_MASK                                                    0x00008000L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN_MASK                                                0x01000000L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN_MASK                                         0x02000000L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS_MASK                                                  0x04000000L
+#define RDPCSTX1_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//RDPCSTX1_RDPCSTX_CLOCK_CNTL
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN__SHIFT                                               0x0
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN__SHIFT                                          0x4
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN__SHIFT                                          0x5
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN__SHIFT                                          0x6
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN__SHIFT                                          0x7
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS__SHIFT                                        0x8
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN__SHIFT                                              0x9
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0xa
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS__SHIFT                                            0xc
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN__SHIFT                                                  0xd
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON__SHIFT                                            0xe
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS__SHIFT                                              0x10
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN_MASK                                                 0x00000001L
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN_MASK                                            0x00000010L
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN_MASK                                            0x00000020L
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN_MASK                                            0x00000040L
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN_MASK                                            0x00000080L
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS_MASK                                          0x00000100L
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN_MASK                                                0x00000200L
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000400L
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS_MASK                                              0x00001000L
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN_MASK                                                    0x00002000L
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON_MASK                                              0x00004000L
+#define RDPCSTX1_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS_MASK                                                0x00010000L
+//RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW__SHIFT                                    0x0
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE__SHIFT                                 0x1
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE__SHIFT                                   0x2
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR__SHIFT                                       0x4
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR__SHIFT                                       0x5
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR__SHIFT                                       0x6
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR__SHIFT                                       0x7
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR__SHIFT                                        0x8
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR__SHIFT                             0x9
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR__SHIFT                               0xa
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR__SHIFT                                         0xc
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK__SHIFT                                  0x10
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK__SHIFT                            0x11
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK__SHIFT                              0x12
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK__SHIFT                                   0x14
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW_MASK                                      0x00000001L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK                                   0x00000002L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK                                     0x00000004L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR_MASK                                         0x00000010L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR_MASK                                         0x00000020L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR_MASK                                         0x00000040L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR_MASK                                         0x00000080L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR_MASK                                          0x00000100L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR_MASK                               0x00000200L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR_MASK                                 0x00000400L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR_MASK                                           0x00001000L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK_MASK                                    0x00010000L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK_MASK                              0x00020000L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK_MASK                                0x00040000L
+#define RDPCSTX1_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK_MASK                                     0x00100000L
+//RDPCSTX1_RDPCSTX_PLL_UPDATE_DATA
+#define RDPCSTX1_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA__SHIFT                                        0x0
+#define RDPCSTX1_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA_MASK                                          0x00000001L
+//RDPCSTX1_RDPCS_TX_CR_ADDR
+#define RDPCSTX1_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                    0x0
+#define RDPCSTX1_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                      0x0000FFFFL
+//RDPCSTX1_RDPCS_TX_CR_DATA
+#define RDPCSTX1_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                    0x0
+#define RDPCSTX1_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                      0x0000FFFFL
+//RDPCSTX1_RDPCS_TX_SRAM_CNTL
+#define RDPCSTX1_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS__SHIFT                                                 0x14
+#define RDPCSTX1_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE__SHIFT                                               0x18
+#define RDPCSTX1_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE__SHIFT                                           0x1c
+#define RDPCSTX1_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS_MASK                                                   0x00100000L
+#define RDPCSTX1_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE_MASK                                                 0x03000000L
+#define RDPCSTX1_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE_MASK                                             0x30000000L
+//RDPCSTX1_RDPCSTX_MEM_POWER_CTRL
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES__SHIFT                                           0x0
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES__SHIFT                                    0xc
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1__SHIFT                                  0x1a
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2__SHIFT                                  0x1b
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1__SHIFT                                   0x1c
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2__SHIFT                                   0x1d
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM__SHIFT                                         0x1e
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES_MASK                                             0x00000FFFL
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES_MASK                                      0x03FFF000L
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1_MASK                                    0x04000000L
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2_MASK                                    0x08000000L
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1_MASK                                     0x10000000L
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2_MASK                                     0x20000000L
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM_MASK                                           0x40000000L
+//RDPCSTX1_RDPCSTX_MEM_POWER_CTRL2
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF__SHIFT                                    0x0
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO__SHIFT                                    0x2
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF_MASK                                      0x00000003L
+#define RDPCSTX1_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO_MASK                                      0x00000004L
+//RDPCSTX1_RDPCSTX_SCRATCH
+#define RDPCSTX1_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH__SHIFT                                                      0x0
+#define RDPCSTX1_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH_MASK                                                        0xFFFFFFFFL
+//RDPCSTX1_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG__SHIFT                      0x0
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS__SHIFT              0x4
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE__SHIFT                      0x8
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG_MASK                        0x00000001L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS_MASK                0x00000010L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE_MASK                        0x0000FF00L
+//RDPCSTX1_RDPCSTX_DEBUG_CONFIG
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN__SHIFT                                                    0x0
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT__SHIFT                                        0x4
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP__SHIFT                                        0x7
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK__SHIFT                                          0x8
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE__SHIFT                                       0xf
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX__SHIFT                                          0x10
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT__SHIFT                                              0x18
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN_MASK                                                      0x00000001L
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT_MASK                                          0x00000070L
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP_MASK                                          0x00000080L
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK_MASK                                            0x00001F00L
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE_MASK                                         0x00008000L
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX_MASK                                            0x00FF0000L
+#define RDPCSTX1_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MASK                                                0xFF000000L
+//RDPCSTX1_RDPCSTX_PHY_CNTL0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET__SHIFT                                                    0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET__SHIFT                                            0x1
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N__SHIFT                                          0x2
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN__SHIFT                                           0x3
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT__SHIFT                                                  0x4
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE__SHIFT                                          0x8
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE__SHIFT                                                0x9
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL__SHIFT                                            0xe
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ__SHIFT                                                0x11
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK__SHIFT                                                0x12
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL__SHIFT                                              0x14
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL__SHIFT                                               0x15
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN__SHIFT                                            0x18
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT__SHIFT                                        0x19
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE__SHIFT                                               0x1c
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE__SHIFT                                             0x1d
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS__SHIFT                                                  0x1f
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET_MASK                                                      0x00000001L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET_MASK                                              0x00000002L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N_MASK                                            0x00000004L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN_MASK                                             0x00000008L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT_MASK                                                    0x00000030L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE_MASK                                            0x00000100L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE_MASK                                                  0x00003E00L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL_MASK                                              0x0001C000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ_MASK                                                  0x00020000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK_MASK                                                  0x00040000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL_MASK                                                0x00100000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL_MASK                                                 0x00200000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN_MASK                                              0x01000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT_MASK                                          0x02000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE_MASK                                                 0x10000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE_MASK                                               0x20000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS_MASK                                                    0x80000000L
+//RDPCSTX1_RDPCSTX_PHY_CNTL1
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN__SHIFT                                               0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN__SHIFT                                               0x1
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE__SHIFT                                           0x2
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN__SHIFT                                               0x3
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE__SHIFT                                           0x4
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET__SHIFT                                              0x5
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN__SHIFT                                               0x6
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE__SHIFT                                           0x7
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN_MASK                                                 0x00000001L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN_MASK                                                 0x00000002L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE_MASK                                             0x00000004L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN_MASK                                                 0x00000008L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE_MASK                                             0x00000010L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET_MASK                                                0x00000020L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN_MASK                                                 0x00000040L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE_MASK                                             0x00000080L
+//RDPCSTX1_RDPCSTX_PHY_CNTL2
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR__SHIFT                                                  0x3
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN__SHIFT                                 0x4
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN__SHIFT                                 0x5
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN__SHIFT                                 0x6
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN__SHIFT                                 0x7
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN__SHIFT                                 0x8
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN__SHIFT                                 0x9
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN__SHIFT                                 0xa
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN__SHIFT                                 0xb
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR_MASK                                                    0x00000008L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN_MASK                                   0x00000010L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN_MASK                                   0x00000020L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN_MASK                                   0x00000040L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN_MASK                                   0x00000080L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN_MASK                                   0x00000100L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN_MASK                                   0x00000200L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN_MASK                                   0x00000400L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN_MASK                                   0x00000800L
+//RDPCSTX1_RDPCSTX_PHY_CNTL3
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET__SHIFT                                             0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE__SHIFT                                           0x1
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY__SHIFT                                           0x2
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN__SHIFT                                           0x3
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ__SHIFT                                               0x4
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK__SHIFT                                               0x5
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET__SHIFT                                             0x8
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE__SHIFT                                           0x9
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY__SHIFT                                           0xa
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN__SHIFT                                           0xb
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ__SHIFT                                               0xc
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK__SHIFT                                               0xd
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET__SHIFT                                             0x10
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE__SHIFT                                           0x11
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY__SHIFT                                           0x12
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN__SHIFT                                           0x13
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ__SHIFT                                               0x14
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK__SHIFT                                               0x15
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET__SHIFT                                             0x18
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE__SHIFT                                           0x19
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY__SHIFT                                           0x1a
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN__SHIFT                                           0x1b
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ__SHIFT                                               0x1c
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK__SHIFT                                               0x1d
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_MASK                                               0x00000001L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_MASK                                             0x00000002L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_MASK                                             0x00000004L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_MASK                                             0x00000008L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_MASK                                                 0x00000010L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_MASK                                                 0x00000020L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_MASK                                               0x00000100L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_MASK                                             0x00000200L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_MASK                                             0x00000400L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_MASK                                             0x00000800L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_MASK                                                 0x00001000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_MASK                                                 0x00002000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_MASK                                               0x00010000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_MASK                                             0x00020000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_MASK                                             0x00040000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_MASK                                             0x00080000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_MASK                                                 0x00100000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_MASK                                                 0x00200000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_MASK                                               0x01000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_MASK                                             0x02000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_MASK                                             0x04000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_MASK                                             0x08000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_MASK                                                 0x10000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_MASK                                                 0x20000000L
+//RDPCSTX1_RDPCSTX_PHY_CNTL4
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL__SHIFT                                         0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT__SHIFT                                            0x4
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC__SHIFT                                    0x6
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN__SHIFT                                        0x7
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL__SHIFT                                         0x8
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT__SHIFT                                            0xc
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC__SHIFT                                    0xe
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN__SHIFT                                        0xf
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL__SHIFT                                         0x10
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT__SHIFT                                            0x14
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC__SHIFT                                    0x16
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN__SHIFT                                        0x17
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL__SHIFT                                         0x18
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT__SHIFT                                            0x1c
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC__SHIFT                                    0x1e
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN__SHIFT                                        0x1f
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL_MASK                                           0x00000007L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT_MASK                                              0x00000010L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC_MASK                                      0x00000040L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN_MASK                                          0x00000080L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL_MASK                                           0x00000700L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT_MASK                                              0x00001000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC_MASK                                      0x00004000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN_MASK                                          0x00008000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL_MASK                                           0x00070000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT_MASK                                              0x00100000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC_MASK                                      0x00400000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN_MASK                                          0x00800000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL_MASK                                           0x07000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT_MASK                                              0x10000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC_MASK                                      0x40000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN_MASK                                          0x80000000L
+//RDPCSTX1_RDPCSTX_PHY_CNTL5
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD__SHIFT                                               0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE__SHIFT                                              0x1
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH__SHIFT                                             0x4
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ__SHIFT                                         0x6
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT__SHIFT                                      0x7
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD__SHIFT                                               0x8
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE__SHIFT                                              0x9
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH__SHIFT                                             0xc
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ__SHIFT                                         0xe
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT__SHIFT                                      0xf
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD__SHIFT                                               0x10
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE__SHIFT                                              0x11
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH__SHIFT                                             0x14
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ__SHIFT                                         0x16
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT__SHIFT                                      0x17
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD__SHIFT                                               0x18
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE__SHIFT                                              0x19
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH__SHIFT                                             0x1c
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ__SHIFT                                         0x1e
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT__SHIFT                                      0x1f
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD_MASK                                                 0x00000001L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE_MASK                                                0x0000000EL
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH_MASK                                               0x00000030L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ_MASK                                           0x00000040L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT_MASK                                        0x00000080L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD_MASK                                                 0x00000100L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE_MASK                                                0x00000E00L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH_MASK                                               0x00003000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ_MASK                                           0x00004000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT_MASK                                        0x00008000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD_MASK                                                 0x00010000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE_MASK                                                0x000E0000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH_MASK                                               0x00300000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ_MASK                                           0x00400000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT_MASK                                        0x00800000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD_MASK                                                 0x01000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE_MASK                                                0x0E000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH_MASK                                               0x30000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ_MASK                                           0x40000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT_MASK                                        0x80000000L
+//RDPCSTX1_RDPCSTX_PHY_CNTL6
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE__SHIFT                                            0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN__SHIFT                                           0x2
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE__SHIFT                                            0x4
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN__SHIFT                                           0x6
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE__SHIFT                                            0x8
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN__SHIFT                                           0xa
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE__SHIFT                                            0xc
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN__SHIFT                                           0xe
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4__SHIFT                                                0x10
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE__SHIFT                                            0x11
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK__SHIFT                                        0x12
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN__SHIFT                                            0x13
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ__SHIFT                                           0x14
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_MASK                                              0x00000003L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_MASK                                             0x00000004L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_MASK                                              0x00000030L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_MASK                                             0x00000040L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_MASK                                              0x00000300L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_MASK                                             0x00000400L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_MASK                                              0x00003000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_MASK                                             0x00004000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_MASK                                                  0x00010000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_MASK                                              0x00020000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_MASK                                          0x00040000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_MASK                                              0x00080000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_MASK                                             0x00100000L
+//RDPCSTX1_RDPCSTX_PHY_CNTL7
+#define RDPCSTX1_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN__SHIFT                                       0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT__SHIFT                                      0x10
+#define RDPCSTX1_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN_MASK                                         0x0000FFFFL
+#define RDPCSTX1_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT_MASK                                        0xFFFF0000L
+//RDPCSTX1_RDPCSTX_PHY_CNTL8
+#define RDPCSTX1_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK__SHIFT                                        0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK_MASK                                          0x000FFFFFL
+//RDPCSTX1_RDPCSTX_PHY_CNTL9
+#define RDPCSTX1_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE__SHIFT                                    0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD__SHIFT                                   0x18
+#define RDPCSTX1_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE_MASK                                      0x001FFFFFL
+#define RDPCSTX1_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD_MASK                                     0x01000000L
+//RDPCSTX1_RDPCSTX_PHY_CNTL10
+#define RDPCSTX1_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM__SHIFT                                      0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM_MASK                                        0x0000FFFFL
+//RDPCSTX1_RDPCSTX_PHY_CNTL11
+#define RDPCSTX1_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER__SHIFT                                     0x4
+#define RDPCSTX1_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV__SHIFT                                     0x10
+#define RDPCSTX1_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV__SHIFT                                    0x14
+#define RDPCSTX1_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV__SHIFT                           0x18
+#define RDPCSTX1_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER_MASK                                       0x0000FFF0L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV_MASK                                       0x00070000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV_MASK                                      0x00700000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV_MASK                             0x03000000L
+//RDPCSTX1_RDPCSTX_PHY_CNTL12
+#define RDPCSTX1_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN__SHIFT                                    0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN__SHIFT                                   0x2
+#define RDPCSTX1_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV__SHIFT                                     0x4
+#define RDPCSTX1_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE__SHIFT                                          0x7
+#define RDPCSTX1_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN__SHIFT                                         0x8
+#define RDPCSTX1_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN_MASK                                      0x00000001L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN_MASK                                     0x00000004L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV_MASK                                       0x00000070L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE_MASK                                            0x00000080L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN_MASK                                           0x00000100L
+//RDPCSTX1_RDPCSTX_PHY_CNTL13
+#define RDPCSTX1_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER__SHIFT                                 0x14
+#define RDPCSTX1_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN__SHIFT                                     0x1c
+#define RDPCSTX1_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN__SHIFT                                       0x1d
+#define RDPCSTX1_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE__SHIFT                               0x1e
+#define RDPCSTX1_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER_MASK                                   0x0FF00000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN_MASK                                       0x10000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN_MASK                                         0x20000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE_MASK                                 0x40000000L
+//RDPCSTX1_RDPCSTX_PHY_CNTL14
+#define RDPCSTX1_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE__SHIFT                                      0x0
+#define RDPCSTX1_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN__SHIFT                                       0x18
+#define RDPCSTX1_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN__SHIFT                                        0x1c
+#define RDPCSTX1_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE_MASK                                        0x00000001L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN_MASK                                         0x01000000L
+#define RDPCSTX1_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN_MASK                                          0x10000000L
+//RDPCSTX1_RDPCSTX_PHY_FUSE0
+#define RDPCSTX1_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX1_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX1_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX1_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I__SHIFT                                             0x12
+#define RDPCSTX1_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO__SHIFT                                        0x14
+#define RDPCSTX1_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX1_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX1_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX1_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I_MASK                                               0x000C0000L
+#define RDPCSTX1_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO_MASK                                          0x00300000L
+//RDPCSTX1_RDPCSTX_PHY_FUSE1
+#define RDPCSTX1_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX1_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX1_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX1_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT__SHIFT                                          0x12
+#define RDPCSTX1_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP__SHIFT                                         0x19
+#define RDPCSTX1_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX1_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX1_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX1_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT_MASK                                            0x01FC0000L
+#define RDPCSTX1_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP_MASK                                           0xFE000000L
+//RDPCSTX1_RDPCSTX_PHY_FUSE2
+#define RDPCSTX1_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX1_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX1_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX1_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX1_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX1_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST_MASK                                             0x0003F000L
+//RDPCSTX1_RDPCSTX_PHY_FUSE3
+#define RDPCSTX1_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX1_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX1_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX1_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE__SHIFT                                             0x12
+#define RDPCSTX1_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE__SHIFT                                                0x18
+#define RDPCSTX1_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX1_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX1_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX1_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE_MASK                                               0x00FC0000L
+#define RDPCSTX1_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE_MASK                                                  0x03000000L
+//RDPCSTX1_RDPCSTX_PHY_RX_LD_VAL
+#define RDPCSTX1_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL__SHIFT                                        0x0
+#define RDPCSTX1_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL__SHIFT                                        0x8
+#define RDPCSTX1_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL_MASK                                          0x0000007FL
+#define RDPCSTX1_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL_MASK                                          0x001FFF00L
+//RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED__SHIFT                         0x0
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED__SHIFT                       0x1
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED__SHIFT                       0x2
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED__SHIFT                       0x3
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED__SHIFT                           0x4
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED__SHIFT                           0x5
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED__SHIFT                         0x8
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED__SHIFT                       0x9
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED__SHIFT                       0xa
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED__SHIFT                       0xb
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED__SHIFT                           0xc
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED__SHIFT                           0xd
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED__SHIFT                         0x10
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED__SHIFT                       0x11
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED__SHIFT                       0x12
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED__SHIFT                       0x13
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED__SHIFT                           0x14
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED__SHIFT                           0x15
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED__SHIFT                         0x18
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED__SHIFT                       0x19
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED__SHIFT                       0x1a
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED__SHIFT                       0x1b
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED__SHIFT                           0x1c
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED__SHIFT                           0x1d
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED_MASK                           0x00000001L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED_MASK                         0x00000002L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED_MASK                         0x00000004L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED_MASK                         0x00000008L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED_MASK                             0x00000010L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED_MASK                             0x00000020L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED_MASK                           0x00000100L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED_MASK                         0x00000200L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED_MASK                         0x00000400L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED_MASK                         0x00000800L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED_MASK                             0x00001000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED_MASK                             0x00002000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED_MASK                           0x00010000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED_MASK                         0x00020000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED_MASK                         0x00040000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED_MASK                         0x00080000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED_MASK                             0x00100000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED_MASK                             0x00200000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED_MASK                           0x01000000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED_MASK                         0x02000000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED_MASK                         0x04000000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED_MASK                         0x08000000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED_MASK                             0x10000000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED_MASK                             0x20000000L
+//RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED__SHIFT                        0x0
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED__SHIFT                       0x2
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED__SHIFT                        0x4
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED__SHIFT                       0x6
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED__SHIFT                        0x8
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED__SHIFT                       0xa
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED__SHIFT                        0xc
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED__SHIFT                       0xe
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED__SHIFT                            0x10
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED__SHIFT                        0x11
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED__SHIFT                    0x12
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED__SHIFT                        0x13
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED__SHIFT                       0x14
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED_MASK                          0x00000003L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED_MASK                         0x00000004L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED_MASK                          0x00000030L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED_MASK                         0x00000040L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED_MASK                          0x00000300L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED_MASK                         0x00000400L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED_MASK                          0x00003000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED_MASK                         0x00004000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED_MASK                              0x00010000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED_MASK                          0x00020000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED_MASK                      0x00040000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED_MASK                          0x00080000L
+#define RDPCSTX1_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED_MASK                         0x00100000L
+//RDPCSTX1_RDPCSTX_DPALT_CONTROL_REG
+#define RDPCSTX1_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS__SHIFT                                  0x0
+#define RDPCSTX1_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED__SHIFT                                0x4
+#define RDPCSTX1_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE__SHIFT                                  0x8
+#define RDPCSTX1_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS_MASK                                    0x00000001L
+#define RDPCSTX1_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED_MASK                                  0x00000010L
+#define RDPCSTX1_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE_MASK                                    0x0000FF00L
+
+
+// addressBlock: dpcssys_dpcssys_cr1_dispdec
+//DPCSSYS_CR1_DPCSSYS_CR_ADDR
+#define DPCSSYS_CR1_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                  0x0
+#define DPCSSYS_CR1_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                    0x0000FFFFL
+//DPCSSYS_CR1_DPCSSYS_CR_DATA
+#define DPCSSYS_CR1_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                  0x0
+#define DPCSSYS_CR1_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                    0x0000FFFFL
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx2_dispdec
+//DPCSTX2_DPCSTX_TX_CLOCK_CNTL
+#define DPCSTX2_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS__SHIFT                                             0x0
+#define DPCSTX2_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN__SHIFT                                                   0x1
+#define DPCSTX2_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON__SHIFT                                             0x2
+#define DPCSTX2_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0x3
+#define DPCSTX2_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS_MASK                                               0x00000001L
+#define DPCSTX2_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN_MASK                                                     0x00000002L
+#define DPCSTX2_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON_MASK                                               0x00000004L
+#define DPCSTX2_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000008L
+//DPCSTX2_DPCSTX_TX_CNTL
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ__SHIFT                                                 0xc
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING__SHIFT                                             0xd
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP__SHIFT                                                      0xe
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT__SHIFT                                              0xf
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ_MASK                                                   0x00001000L
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING_MASK                                               0x00002000L
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP_MASK                                                        0x00004000L
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT_MASK                                                0x00008000L
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define DPCSTX2_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//DPCSTX2_DPCSTX_CBUS_CNTL
+#define DPCSTX2_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY__SHIFT                                               0x0
+#define DPCSTX2_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET__SHIFT                                                 0x1f
+#define DPCSTX2_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY_MASK                                                 0x000000FFL
+#define DPCSTX2_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET_MASK                                                   0x80000000L
+//DPCSTX2_DPCSTX_INTERRUPT_CNTL
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW__SHIFT                                          0x0
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR__SHIFT                                              0x1
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK__SHIFT                                        0x4
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR__SHIFT                                             0x8
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR__SHIFT                                             0x9
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR__SHIFT                                             0xa
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR__SHIFT                                             0xb
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR__SHIFT                                               0xc
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK__SHIFT                                         0x10
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK__SHIFT                                             0x14
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW_MASK                                            0x00000001L
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR_MASK                                                0x00000002L
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK_MASK                                          0x00000010L
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR_MASK                                               0x00000100L
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR_MASK                                               0x00000200L
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR_MASK                                               0x00000400L
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR_MASK                                               0x00000800L
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR_MASK                                                 0x00001000L
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK_MASK                                           0x00010000L
+#define DPCSTX2_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK_MASK                                               0x00100000L
+//DPCSTX2_DPCSTX_PLL_UPDATE_ADDR
+#define DPCSTX2_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR__SHIFT                                           0x0
+#define DPCSTX2_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR_MASK                                             0x0003FFFFL
+//DPCSTX2_DPCSTX_PLL_UPDATE_DATA
+#define DPCSTX2_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA__SHIFT                                           0x0
+#define DPCSTX2_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA_MASK                                             0xFFFFFFFFL
+//DPCSTX2_DPCSTX_DEBUG_CONFIG
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN__SHIFT                                                       0x0
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL__SHIFT                                               0x1
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL__SHIFT                                            0x4
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL__SHIFT                                       0x8
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS__SHIFT                                                 0xe
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN__SHIFT                                          0x10
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN_MASK                                                         0x00000001L
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL_MASK                                                 0x0000000EL
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL_MASK                                              0x00000070L
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL_MASK                                         0x00000700L
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS_MASK                                                   0x00004000L
+#define DPCSTX2_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN_MASK                                            0x00010000L
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx2_dispdec
+//RDPCSTX2_RDPCSTX_CNTL
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET__SHIFT                                                   0x0
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET__SHIFT                                                   0x4
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN__SHIFT                                                  0xc
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN__SHIFT                                                  0xd
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN__SHIFT                                                  0xe
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN__SHIFT                                                  0xf
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN__SHIFT                                              0x18
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN__SHIFT                                       0x19
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS__SHIFT                                                0x1a
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET_MASK                                                     0x00000001L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET_MASK                                                     0x00000010L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN_MASK                                                    0x00001000L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN_MASK                                                    0x00002000L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN_MASK                                                    0x00004000L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN_MASK                                                    0x00008000L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN_MASK                                                0x01000000L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN_MASK                                         0x02000000L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS_MASK                                                  0x04000000L
+#define RDPCSTX2_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//RDPCSTX2_RDPCSTX_CLOCK_CNTL
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN__SHIFT                                               0x0
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN__SHIFT                                          0x4
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN__SHIFT                                          0x5
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN__SHIFT                                          0x6
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN__SHIFT                                          0x7
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS__SHIFT                                        0x8
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN__SHIFT                                              0x9
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0xa
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS__SHIFT                                            0xc
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN__SHIFT                                                  0xd
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON__SHIFT                                            0xe
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS__SHIFT                                              0x10
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN_MASK                                                 0x00000001L
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN_MASK                                            0x00000010L
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN_MASK                                            0x00000020L
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN_MASK                                            0x00000040L
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN_MASK                                            0x00000080L
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS_MASK                                          0x00000100L
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN_MASK                                                0x00000200L
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000400L
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS_MASK                                              0x00001000L
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN_MASK                                                    0x00002000L
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON_MASK                                              0x00004000L
+#define RDPCSTX2_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS_MASK                                                0x00010000L
+//RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW__SHIFT                                    0x0
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE__SHIFT                                 0x1
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE__SHIFT                                   0x2
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR__SHIFT                                       0x4
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR__SHIFT                                       0x5
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR__SHIFT                                       0x6
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR__SHIFT                                       0x7
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR__SHIFT                                        0x8
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR__SHIFT                             0x9
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR__SHIFT                               0xa
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR__SHIFT                                         0xc
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK__SHIFT                                  0x10
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK__SHIFT                            0x11
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK__SHIFT                              0x12
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK__SHIFT                                   0x14
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW_MASK                                      0x00000001L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK                                   0x00000002L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK                                     0x00000004L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR_MASK                                         0x00000010L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR_MASK                                         0x00000020L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR_MASK                                         0x00000040L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR_MASK                                         0x00000080L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR_MASK                                          0x00000100L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR_MASK                               0x00000200L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR_MASK                                 0x00000400L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR_MASK                                           0x00001000L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK_MASK                                    0x00010000L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK_MASK                              0x00020000L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK_MASK                                0x00040000L
+#define RDPCSTX2_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK_MASK                                     0x00100000L
+//RDPCSTX2_RDPCSTX_PLL_UPDATE_DATA
+#define RDPCSTX2_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA__SHIFT                                        0x0
+#define RDPCSTX2_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA_MASK                                          0x00000001L
+//RDPCSTX2_RDPCS_TX_CR_ADDR
+#define RDPCSTX2_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                    0x0
+#define RDPCSTX2_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                      0x0000FFFFL
+//RDPCSTX2_RDPCS_TX_CR_DATA
+#define RDPCSTX2_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                    0x0
+#define RDPCSTX2_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                      0x0000FFFFL
+//RDPCSTX2_RDPCS_TX_SRAM_CNTL
+#define RDPCSTX2_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS__SHIFT                                                 0x14
+#define RDPCSTX2_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE__SHIFT                                               0x18
+#define RDPCSTX2_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE__SHIFT                                           0x1c
+#define RDPCSTX2_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS_MASK                                                   0x00100000L
+#define RDPCSTX2_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE_MASK                                                 0x03000000L
+#define RDPCSTX2_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE_MASK                                             0x30000000L
+//RDPCSTX2_RDPCSTX_MEM_POWER_CTRL
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES__SHIFT                                           0x0
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES__SHIFT                                    0xc
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1__SHIFT                                  0x1a
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2__SHIFT                                  0x1b
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1__SHIFT                                   0x1c
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2__SHIFT                                   0x1d
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM__SHIFT                                         0x1e
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES_MASK                                             0x00000FFFL
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES_MASK                                      0x03FFF000L
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1_MASK                                    0x04000000L
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2_MASK                                    0x08000000L
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1_MASK                                     0x10000000L
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2_MASK                                     0x20000000L
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM_MASK                                           0x40000000L
+//RDPCSTX2_RDPCSTX_MEM_POWER_CTRL2
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF__SHIFT                                    0x0
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO__SHIFT                                    0x2
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF_MASK                                      0x00000003L
+#define RDPCSTX2_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO_MASK                                      0x00000004L
+//RDPCSTX2_RDPCSTX_SCRATCH
+#define RDPCSTX2_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH__SHIFT                                                      0x0
+#define RDPCSTX2_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH_MASK                                                        0xFFFFFFFFL
+//RDPCSTX2_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG__SHIFT                      0x0
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS__SHIFT              0x4
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE__SHIFT                      0x8
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG_MASK                        0x00000001L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS_MASK                0x00000010L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE_MASK                        0x0000FF00L
+//RDPCSTX2_RDPCSTX_DEBUG_CONFIG
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN__SHIFT                                                    0x0
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT__SHIFT                                        0x4
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP__SHIFT                                        0x7
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK__SHIFT                                          0x8
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE__SHIFT                                       0xf
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX__SHIFT                                          0x10
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT__SHIFT                                              0x18
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN_MASK                                                      0x00000001L
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT_MASK                                          0x00000070L
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP_MASK                                          0x00000080L
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK_MASK                                            0x00001F00L
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE_MASK                                         0x00008000L
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX_MASK                                            0x00FF0000L
+#define RDPCSTX2_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MASK                                                0xFF000000L
+//RDPCSTX2_RDPCSTX_PHY_CNTL0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET__SHIFT                                                    0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET__SHIFT                                            0x1
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N__SHIFT                                          0x2
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN__SHIFT                                           0x3
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT__SHIFT                                                  0x4
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE__SHIFT                                          0x8
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE__SHIFT                                                0x9
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL__SHIFT                                            0xe
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ__SHIFT                                                0x11
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK__SHIFT                                                0x12
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL__SHIFT                                              0x14
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL__SHIFT                                               0x15
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN__SHIFT                                            0x18
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT__SHIFT                                        0x19
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE__SHIFT                                               0x1c
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE__SHIFT                                             0x1d
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS__SHIFT                                                  0x1f
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET_MASK                                                      0x00000001L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET_MASK                                              0x00000002L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N_MASK                                            0x00000004L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN_MASK                                             0x00000008L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT_MASK                                                    0x00000030L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE_MASK                                            0x00000100L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE_MASK                                                  0x00003E00L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL_MASK                                              0x0001C000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ_MASK                                                  0x00020000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK_MASK                                                  0x00040000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL_MASK                                                0x00100000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL_MASK                                                 0x00200000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN_MASK                                              0x01000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT_MASK                                          0x02000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE_MASK                                                 0x10000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE_MASK                                               0x20000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS_MASK                                                    0x80000000L
+//RDPCSTX2_RDPCSTX_PHY_CNTL1
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN__SHIFT                                               0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN__SHIFT                                               0x1
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE__SHIFT                                           0x2
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN__SHIFT                                               0x3
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE__SHIFT                                           0x4
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET__SHIFT                                              0x5
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN__SHIFT                                               0x6
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE__SHIFT                                           0x7
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN_MASK                                                 0x00000001L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN_MASK                                                 0x00000002L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE_MASK                                             0x00000004L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN_MASK                                                 0x00000008L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE_MASK                                             0x00000010L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET_MASK                                                0x00000020L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN_MASK                                                 0x00000040L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE_MASK                                             0x00000080L
+//RDPCSTX2_RDPCSTX_PHY_CNTL2
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR__SHIFT                                                  0x3
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN__SHIFT                                 0x4
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN__SHIFT                                 0x5
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN__SHIFT                                 0x6
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN__SHIFT                                 0x7
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN__SHIFT                                 0x8
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN__SHIFT                                 0x9
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN__SHIFT                                 0xa
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN__SHIFT                                 0xb
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR_MASK                                                    0x00000008L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN_MASK                                   0x00000010L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN_MASK                                   0x00000020L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN_MASK                                   0x00000040L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN_MASK                                   0x00000080L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN_MASK                                   0x00000100L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN_MASK                                   0x00000200L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN_MASK                                   0x00000400L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN_MASK                                   0x00000800L
+//RDPCSTX2_RDPCSTX_PHY_CNTL3
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET__SHIFT                                             0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE__SHIFT                                           0x1
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY__SHIFT                                           0x2
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN__SHIFT                                           0x3
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ__SHIFT                                               0x4
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK__SHIFT                                               0x5
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET__SHIFT                                             0x8
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE__SHIFT                                           0x9
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY__SHIFT                                           0xa
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN__SHIFT                                           0xb
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ__SHIFT                                               0xc
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK__SHIFT                                               0xd
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET__SHIFT                                             0x10
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE__SHIFT                                           0x11
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY__SHIFT                                           0x12
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN__SHIFT                                           0x13
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ__SHIFT                                               0x14
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK__SHIFT                                               0x15
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET__SHIFT                                             0x18
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE__SHIFT                                           0x19
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY__SHIFT                                           0x1a
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN__SHIFT                                           0x1b
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ__SHIFT                                               0x1c
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK__SHIFT                                               0x1d
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_MASK                                               0x00000001L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_MASK                                             0x00000002L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_MASK                                             0x00000004L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_MASK                                             0x00000008L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_MASK                                                 0x00000010L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_MASK                                                 0x00000020L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_MASK                                               0x00000100L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_MASK                                             0x00000200L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_MASK                                             0x00000400L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_MASK                                             0x00000800L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_MASK                                                 0x00001000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_MASK                                                 0x00002000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_MASK                                               0x00010000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_MASK                                             0x00020000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_MASK                                             0x00040000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_MASK                                             0x00080000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_MASK                                                 0x00100000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_MASK                                                 0x00200000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_MASK                                               0x01000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_MASK                                             0x02000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_MASK                                             0x04000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_MASK                                             0x08000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_MASK                                                 0x10000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_MASK                                                 0x20000000L
+//RDPCSTX2_RDPCSTX_PHY_CNTL4
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL__SHIFT                                         0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT__SHIFT                                            0x4
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC__SHIFT                                    0x6
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN__SHIFT                                        0x7
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL__SHIFT                                         0x8
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT__SHIFT                                            0xc
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC__SHIFT                                    0xe
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN__SHIFT                                        0xf
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL__SHIFT                                         0x10
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT__SHIFT                                            0x14
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC__SHIFT                                    0x16
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN__SHIFT                                        0x17
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL__SHIFT                                         0x18
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT__SHIFT                                            0x1c
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC__SHIFT                                    0x1e
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN__SHIFT                                        0x1f
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL_MASK                                           0x00000007L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT_MASK                                              0x00000010L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC_MASK                                      0x00000040L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN_MASK                                          0x00000080L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL_MASK                                           0x00000700L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT_MASK                                              0x00001000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC_MASK                                      0x00004000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN_MASK                                          0x00008000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL_MASK                                           0x00070000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT_MASK                                              0x00100000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC_MASK                                      0x00400000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN_MASK                                          0x00800000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL_MASK                                           0x07000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT_MASK                                              0x10000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC_MASK                                      0x40000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN_MASK                                          0x80000000L
+//RDPCSTX2_RDPCSTX_PHY_CNTL5
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD__SHIFT                                               0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE__SHIFT                                              0x1
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH__SHIFT                                             0x4
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ__SHIFT                                         0x6
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT__SHIFT                                      0x7
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD__SHIFT                                               0x8
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE__SHIFT                                              0x9
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH__SHIFT                                             0xc
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ__SHIFT                                         0xe
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT__SHIFT                                      0xf
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD__SHIFT                                               0x10
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE__SHIFT                                              0x11
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH__SHIFT                                             0x14
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ__SHIFT                                         0x16
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT__SHIFT                                      0x17
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD__SHIFT                                               0x18
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE__SHIFT                                              0x19
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH__SHIFT                                             0x1c
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ__SHIFT                                         0x1e
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT__SHIFT                                      0x1f
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD_MASK                                                 0x00000001L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE_MASK                                                0x0000000EL
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH_MASK                                               0x00000030L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ_MASK                                           0x00000040L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT_MASK                                        0x00000080L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD_MASK                                                 0x00000100L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE_MASK                                                0x00000E00L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH_MASK                                               0x00003000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ_MASK                                           0x00004000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT_MASK                                        0x00008000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD_MASK                                                 0x00010000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE_MASK                                                0x000E0000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH_MASK                                               0x00300000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ_MASK                                           0x00400000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT_MASK                                        0x00800000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD_MASK                                                 0x01000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE_MASK                                                0x0E000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH_MASK                                               0x30000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ_MASK                                           0x40000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT_MASK                                        0x80000000L
+//RDPCSTX2_RDPCSTX_PHY_CNTL6
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE__SHIFT                                            0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN__SHIFT                                           0x2
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE__SHIFT                                            0x4
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN__SHIFT                                           0x6
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE__SHIFT                                            0x8
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN__SHIFT                                           0xa
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE__SHIFT                                            0xc
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN__SHIFT                                           0xe
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4__SHIFT                                                0x10
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE__SHIFT                                            0x11
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK__SHIFT                                        0x12
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN__SHIFT                                            0x13
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ__SHIFT                                           0x14
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_MASK                                              0x00000003L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_MASK                                             0x00000004L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_MASK                                              0x00000030L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_MASK                                             0x00000040L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_MASK                                              0x00000300L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_MASK                                             0x00000400L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_MASK                                              0x00003000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_MASK                                             0x00004000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_MASK                                                  0x00010000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_MASK                                              0x00020000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_MASK                                          0x00040000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_MASK                                              0x00080000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_MASK                                             0x00100000L
+//RDPCSTX2_RDPCSTX_PHY_CNTL7
+#define RDPCSTX2_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN__SHIFT                                       0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT__SHIFT                                      0x10
+#define RDPCSTX2_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN_MASK                                         0x0000FFFFL
+#define RDPCSTX2_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT_MASK                                        0xFFFF0000L
+//RDPCSTX2_RDPCSTX_PHY_CNTL8
+#define RDPCSTX2_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK__SHIFT                                        0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK_MASK                                          0x000FFFFFL
+//RDPCSTX2_RDPCSTX_PHY_CNTL9
+#define RDPCSTX2_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE__SHIFT                                    0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD__SHIFT                                   0x18
+#define RDPCSTX2_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE_MASK                                      0x001FFFFFL
+#define RDPCSTX2_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD_MASK                                     0x01000000L
+//RDPCSTX2_RDPCSTX_PHY_CNTL10
+#define RDPCSTX2_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM__SHIFT                                      0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM_MASK                                        0x0000FFFFL
+//RDPCSTX2_RDPCSTX_PHY_CNTL11
+#define RDPCSTX2_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER__SHIFT                                     0x4
+#define RDPCSTX2_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV__SHIFT                                     0x10
+#define RDPCSTX2_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV__SHIFT                                    0x14
+#define RDPCSTX2_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV__SHIFT                           0x18
+#define RDPCSTX2_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER_MASK                                       0x0000FFF0L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV_MASK                                       0x00070000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV_MASK                                      0x00700000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV_MASK                             0x03000000L
+//RDPCSTX2_RDPCSTX_PHY_CNTL12
+#define RDPCSTX2_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN__SHIFT                                    0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN__SHIFT                                   0x2
+#define RDPCSTX2_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV__SHIFT                                     0x4
+#define RDPCSTX2_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE__SHIFT                                          0x7
+#define RDPCSTX2_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN__SHIFT                                         0x8
+#define RDPCSTX2_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN_MASK                                      0x00000001L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN_MASK                                     0x00000004L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV_MASK                                       0x00000070L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE_MASK                                            0x00000080L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN_MASK                                           0x00000100L
+//RDPCSTX2_RDPCSTX_PHY_CNTL13
+#define RDPCSTX2_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER__SHIFT                                 0x14
+#define RDPCSTX2_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN__SHIFT                                     0x1c
+#define RDPCSTX2_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN__SHIFT                                       0x1d
+#define RDPCSTX2_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE__SHIFT                               0x1e
+#define RDPCSTX2_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER_MASK                                   0x0FF00000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN_MASK                                       0x10000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN_MASK                                         0x20000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE_MASK                                 0x40000000L
+//RDPCSTX2_RDPCSTX_PHY_CNTL14
+#define RDPCSTX2_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE__SHIFT                                      0x0
+#define RDPCSTX2_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN__SHIFT                                       0x18
+#define RDPCSTX2_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN__SHIFT                                        0x1c
+#define RDPCSTX2_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE_MASK                                        0x00000001L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN_MASK                                         0x01000000L
+#define RDPCSTX2_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN_MASK                                          0x10000000L
+//RDPCSTX2_RDPCSTX_PHY_FUSE0
+#define RDPCSTX2_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX2_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX2_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX2_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I__SHIFT                                             0x12
+#define RDPCSTX2_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO__SHIFT                                        0x14
+#define RDPCSTX2_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX2_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX2_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX2_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I_MASK                                               0x000C0000L
+#define RDPCSTX2_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO_MASK                                          0x00300000L
+//RDPCSTX2_RDPCSTX_PHY_FUSE1
+#define RDPCSTX2_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX2_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX2_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX2_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT__SHIFT                                          0x12
+#define RDPCSTX2_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP__SHIFT                                         0x19
+#define RDPCSTX2_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX2_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX2_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX2_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT_MASK                                            0x01FC0000L
+#define RDPCSTX2_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP_MASK                                           0xFE000000L
+//RDPCSTX2_RDPCSTX_PHY_FUSE2
+#define RDPCSTX2_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX2_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX2_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX2_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX2_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX2_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST_MASK                                             0x0003F000L
+//RDPCSTX2_RDPCSTX_PHY_FUSE3
+#define RDPCSTX2_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX2_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX2_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX2_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE__SHIFT                                             0x12
+#define RDPCSTX2_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE__SHIFT                                                0x18
+#define RDPCSTX2_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX2_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX2_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX2_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE_MASK                                               0x00FC0000L
+#define RDPCSTX2_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE_MASK                                                  0x03000000L
+//RDPCSTX2_RDPCSTX_PHY_RX_LD_VAL
+#define RDPCSTX2_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL__SHIFT                                        0x0
+#define RDPCSTX2_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL__SHIFT                                        0x8
+#define RDPCSTX2_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL_MASK                                          0x0000007FL
+#define RDPCSTX2_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL_MASK                                          0x001FFF00L
+//RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED__SHIFT                         0x0
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED__SHIFT                       0x1
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED__SHIFT                       0x2
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED__SHIFT                       0x3
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED__SHIFT                           0x4
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED__SHIFT                           0x5
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED__SHIFT                         0x8
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED__SHIFT                       0x9
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED__SHIFT                       0xa
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED__SHIFT                       0xb
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED__SHIFT                           0xc
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED__SHIFT                           0xd
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED__SHIFT                         0x10
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED__SHIFT                       0x11
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED__SHIFT                       0x12
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED__SHIFT                       0x13
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED__SHIFT                           0x14
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED__SHIFT                           0x15
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED__SHIFT                         0x18
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED__SHIFT                       0x19
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED__SHIFT                       0x1a
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED__SHIFT                       0x1b
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED__SHIFT                           0x1c
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED__SHIFT                           0x1d
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED_MASK                           0x00000001L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED_MASK                         0x00000002L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED_MASK                         0x00000004L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED_MASK                         0x00000008L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED_MASK                             0x00000010L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED_MASK                             0x00000020L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED_MASK                           0x00000100L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED_MASK                         0x00000200L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED_MASK                         0x00000400L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED_MASK                         0x00000800L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED_MASK                             0x00001000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED_MASK                             0x00002000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED_MASK                           0x00010000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED_MASK                         0x00020000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED_MASK                         0x00040000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED_MASK                         0x00080000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED_MASK                             0x00100000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED_MASK                             0x00200000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED_MASK                           0x01000000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED_MASK                         0x02000000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED_MASK                         0x04000000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED_MASK                         0x08000000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED_MASK                             0x10000000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED_MASK                             0x20000000L
+//RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED__SHIFT                        0x0
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED__SHIFT                       0x2
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED__SHIFT                        0x4
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED__SHIFT                       0x6
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED__SHIFT                        0x8
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED__SHIFT                       0xa
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED__SHIFT                        0xc
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED__SHIFT                       0xe
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED__SHIFT                            0x10
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED__SHIFT                        0x11
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED__SHIFT                    0x12
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED__SHIFT                        0x13
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED__SHIFT                       0x14
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED_MASK                          0x00000003L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED_MASK                         0x00000004L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED_MASK                          0x00000030L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED_MASK                         0x00000040L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED_MASK                          0x00000300L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED_MASK                         0x00000400L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED_MASK                          0x00003000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED_MASK                         0x00004000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED_MASK                              0x00010000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED_MASK                          0x00020000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED_MASK                      0x00040000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED_MASK                          0x00080000L
+#define RDPCSTX2_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED_MASK                         0x00100000L
+//RDPCSTX2_RDPCSTX_DPALT_CONTROL_REG
+#define RDPCSTX2_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS__SHIFT                                  0x0
+#define RDPCSTX2_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED__SHIFT                                0x4
+#define RDPCSTX2_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE__SHIFT                                  0x8
+#define RDPCSTX2_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS_MASK                                    0x00000001L
+#define RDPCSTX2_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED_MASK                                  0x00000010L
+#define RDPCSTX2_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE_MASK                                    0x0000FF00L
+
+
+// addressBlock: dpcssys_dpcssys_cr2_dispdec
+//DPCSSYS_CR2_DPCSSYS_CR_ADDR
+#define DPCSSYS_CR2_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                  0x0
+#define DPCSSYS_CR2_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                    0x0000FFFFL
+//DPCSSYS_CR2_DPCSSYS_CR_DATA
+#define DPCSSYS_CR2_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                  0x0
+#define DPCSSYS_CR2_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                    0x0000FFFFL
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx3_dispdec
+//DPCSTX3_DPCSTX_TX_CLOCK_CNTL
+#define DPCSTX3_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS__SHIFT                                             0x0
+#define DPCSTX3_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN__SHIFT                                                   0x1
+#define DPCSTX3_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON__SHIFT                                             0x2
+#define DPCSTX3_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0x3
+#define DPCSTX3_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS_MASK                                               0x00000001L
+#define DPCSTX3_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN_MASK                                                     0x00000002L
+#define DPCSTX3_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON_MASK                                               0x00000004L
+#define DPCSTX3_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000008L
+//DPCSTX3_DPCSTX_TX_CNTL
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ__SHIFT                                                 0xc
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING__SHIFT                                             0xd
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP__SHIFT                                                      0xe
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT__SHIFT                                              0xf
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ_MASK                                                   0x00001000L
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING_MASK                                               0x00002000L
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP_MASK                                                        0x00004000L
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT_MASK                                                0x00008000L
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define DPCSTX3_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//DPCSTX3_DPCSTX_CBUS_CNTL
+#define DPCSTX3_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY__SHIFT                                               0x0
+#define DPCSTX3_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET__SHIFT                                                 0x1f
+#define DPCSTX3_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY_MASK                                                 0x000000FFL
+#define DPCSTX3_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET_MASK                                                   0x80000000L
+//DPCSTX3_DPCSTX_INTERRUPT_CNTL
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW__SHIFT                                          0x0
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR__SHIFT                                              0x1
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK__SHIFT                                        0x4
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR__SHIFT                                             0x8
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR__SHIFT                                             0x9
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR__SHIFT                                             0xa
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR__SHIFT                                             0xb
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR__SHIFT                                               0xc
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK__SHIFT                                         0x10
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK__SHIFT                                             0x14
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW_MASK                                            0x00000001L
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR_MASK                                                0x00000002L
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK_MASK                                          0x00000010L
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR_MASK                                               0x00000100L
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR_MASK                                               0x00000200L
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR_MASK                                               0x00000400L
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR_MASK                                               0x00000800L
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR_MASK                                                 0x00001000L
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK_MASK                                           0x00010000L
+#define DPCSTX3_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK_MASK                                               0x00100000L
+//DPCSTX3_DPCSTX_PLL_UPDATE_ADDR
+#define DPCSTX3_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR__SHIFT                                           0x0
+#define DPCSTX3_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR_MASK                                             0x0003FFFFL
+//DPCSTX3_DPCSTX_PLL_UPDATE_DATA
+#define DPCSTX3_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA__SHIFT                                           0x0
+#define DPCSTX3_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA_MASK                                             0xFFFFFFFFL
+//DPCSTX3_DPCSTX_DEBUG_CONFIG
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN__SHIFT                                                       0x0
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL__SHIFT                                               0x1
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL__SHIFT                                            0x4
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL__SHIFT                                       0x8
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS__SHIFT                                                 0xe
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN__SHIFT                                          0x10
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN_MASK                                                         0x00000001L
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL_MASK                                                 0x0000000EL
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL_MASK                                              0x00000070L
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL_MASK                                         0x00000700L
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS_MASK                                                   0x00004000L
+#define DPCSTX3_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN_MASK                                            0x00010000L
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx3_dispdec
+//RDPCSTX3_RDPCSTX_CNTL
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET__SHIFT                                                   0x0
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET__SHIFT                                                   0x4
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN__SHIFT                                                  0xc
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN__SHIFT                                                  0xd
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN__SHIFT                                                  0xe
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN__SHIFT                                                  0xf
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN__SHIFT                                              0x18
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN__SHIFT                                       0x19
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS__SHIFT                                                0x1a
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET_MASK                                                     0x00000001L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET_MASK                                                     0x00000010L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN_MASK                                                    0x00001000L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN_MASK                                                    0x00002000L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN_MASK                                                    0x00004000L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN_MASK                                                    0x00008000L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN_MASK                                                0x01000000L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN_MASK                                         0x02000000L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS_MASK                                                  0x04000000L
+#define RDPCSTX3_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//RDPCSTX3_RDPCSTX_CLOCK_CNTL
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN__SHIFT                                               0x0
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN__SHIFT                                          0x4
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN__SHIFT                                          0x5
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN__SHIFT                                          0x6
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN__SHIFT                                          0x7
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS__SHIFT                                        0x8
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN__SHIFT                                              0x9
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0xa
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS__SHIFT                                            0xc
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN__SHIFT                                                  0xd
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON__SHIFT                                            0xe
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS__SHIFT                                              0x10
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN_MASK                                                 0x00000001L
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN_MASK                                            0x00000010L
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN_MASK                                            0x00000020L
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN_MASK                                            0x00000040L
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN_MASK                                            0x00000080L
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS_MASK                                          0x00000100L
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN_MASK                                                0x00000200L
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000400L
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS_MASK                                              0x00001000L
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN_MASK                                                    0x00002000L
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON_MASK                                              0x00004000L
+#define RDPCSTX3_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS_MASK                                                0x00010000L
+//RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW__SHIFT                                    0x0
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE__SHIFT                                 0x1
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE__SHIFT                                   0x2
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR__SHIFT                                       0x4
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR__SHIFT                                       0x5
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR__SHIFT                                       0x6
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR__SHIFT                                       0x7
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR__SHIFT                                        0x8
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR__SHIFT                             0x9
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR__SHIFT                               0xa
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR__SHIFT                                         0xc
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK__SHIFT                                  0x10
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK__SHIFT                            0x11
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK__SHIFT                              0x12
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK__SHIFT                                   0x14
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW_MASK                                      0x00000001L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK                                   0x00000002L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK                                     0x00000004L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR_MASK                                         0x00000010L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR_MASK                                         0x00000020L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR_MASK                                         0x00000040L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR_MASK                                         0x00000080L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR_MASK                                          0x00000100L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR_MASK                               0x00000200L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR_MASK                                 0x00000400L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR_MASK                                           0x00001000L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK_MASK                                    0x00010000L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK_MASK                              0x00020000L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK_MASK                                0x00040000L
+#define RDPCSTX3_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK_MASK                                     0x00100000L
+//RDPCSTX3_RDPCSTX_PLL_UPDATE_DATA
+#define RDPCSTX3_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA__SHIFT                                        0x0
+#define RDPCSTX3_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA_MASK                                          0x00000001L
+//RDPCSTX3_RDPCS_TX_CR_ADDR
+#define RDPCSTX3_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                    0x0
+#define RDPCSTX3_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                      0x0000FFFFL
+//RDPCSTX3_RDPCS_TX_CR_DATA
+#define RDPCSTX3_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                    0x0
+#define RDPCSTX3_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                      0x0000FFFFL
+//RDPCSTX3_RDPCS_TX_SRAM_CNTL
+#define RDPCSTX3_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS__SHIFT                                                 0x14
+#define RDPCSTX3_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE__SHIFT                                               0x18
+#define RDPCSTX3_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE__SHIFT                                           0x1c
+#define RDPCSTX3_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS_MASK                                                   0x00100000L
+#define RDPCSTX3_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE_MASK                                                 0x03000000L
+#define RDPCSTX3_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE_MASK                                             0x30000000L
+//RDPCSTX3_RDPCSTX_MEM_POWER_CTRL
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES__SHIFT                                           0x0
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES__SHIFT                                    0xc
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1__SHIFT                                  0x1a
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2__SHIFT                                  0x1b
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1__SHIFT                                   0x1c
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2__SHIFT                                   0x1d
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM__SHIFT                                         0x1e
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES_MASK                                             0x00000FFFL
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES_MASK                                      0x03FFF000L
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1_MASK                                    0x04000000L
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2_MASK                                    0x08000000L
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1_MASK                                     0x10000000L
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2_MASK                                     0x20000000L
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM_MASK                                           0x40000000L
+//RDPCSTX3_RDPCSTX_MEM_POWER_CTRL2
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF__SHIFT                                    0x0
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO__SHIFT                                    0x2
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF_MASK                                      0x00000003L
+#define RDPCSTX3_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO_MASK                                      0x00000004L
+//RDPCSTX3_RDPCSTX_SCRATCH
+#define RDPCSTX3_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH__SHIFT                                                      0x0
+#define RDPCSTX3_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH_MASK                                                        0xFFFFFFFFL
+//RDPCSTX3_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG__SHIFT                      0x0
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS__SHIFT              0x4
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE__SHIFT                      0x8
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG_MASK                        0x00000001L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS_MASK                0x00000010L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE_MASK                        0x0000FF00L
+//RDPCSTX3_RDPCSTX_DEBUG_CONFIG
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN__SHIFT                                                    0x0
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT__SHIFT                                        0x4
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP__SHIFT                                        0x7
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK__SHIFT                                          0x8
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE__SHIFT                                       0xf
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX__SHIFT                                          0x10
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT__SHIFT                                              0x18
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN_MASK                                                      0x00000001L
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT_MASK                                          0x00000070L
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP_MASK                                          0x00000080L
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK_MASK                                            0x00001F00L
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE_MASK                                         0x00008000L
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX_MASK                                            0x00FF0000L
+#define RDPCSTX3_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MASK                                                0xFF000000L
+//RDPCSTX3_RDPCSTX_PHY_CNTL0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET__SHIFT                                                    0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET__SHIFT                                            0x1
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N__SHIFT                                          0x2
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN__SHIFT                                           0x3
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT__SHIFT                                                  0x4
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE__SHIFT                                          0x8
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE__SHIFT                                                0x9
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL__SHIFT                                            0xe
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ__SHIFT                                                0x11
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK__SHIFT                                                0x12
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL__SHIFT                                              0x14
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL__SHIFT                                               0x15
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN__SHIFT                                            0x18
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT__SHIFT                                        0x19
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE__SHIFT                                               0x1c
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE__SHIFT                                             0x1d
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS__SHIFT                                                  0x1f
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET_MASK                                                      0x00000001L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET_MASK                                              0x00000002L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N_MASK                                            0x00000004L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN_MASK                                             0x00000008L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT_MASK                                                    0x00000030L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE_MASK                                            0x00000100L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE_MASK                                                  0x00003E00L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL_MASK                                              0x0001C000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ_MASK                                                  0x00020000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK_MASK                                                  0x00040000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL_MASK                                                0x00100000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL_MASK                                                 0x00200000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN_MASK                                              0x01000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT_MASK                                          0x02000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE_MASK                                                 0x10000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE_MASK                                               0x20000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS_MASK                                                    0x80000000L
+//RDPCSTX3_RDPCSTX_PHY_CNTL1
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN__SHIFT                                               0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN__SHIFT                                               0x1
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE__SHIFT                                           0x2
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN__SHIFT                                               0x3
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE__SHIFT                                           0x4
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET__SHIFT                                              0x5
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN__SHIFT                                               0x6
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE__SHIFT                                           0x7
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN_MASK                                                 0x00000001L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN_MASK                                                 0x00000002L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE_MASK                                             0x00000004L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN_MASK                                                 0x00000008L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE_MASK                                             0x00000010L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET_MASK                                                0x00000020L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN_MASK                                                 0x00000040L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE_MASK                                             0x00000080L
+//RDPCSTX3_RDPCSTX_PHY_CNTL2
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR__SHIFT                                                  0x3
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN__SHIFT                                 0x4
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN__SHIFT                                 0x5
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN__SHIFT                                 0x6
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN__SHIFT                                 0x7
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN__SHIFT                                 0x8
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN__SHIFT                                 0x9
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN__SHIFT                                 0xa
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN__SHIFT                                 0xb
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR_MASK                                                    0x00000008L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN_MASK                                   0x00000010L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN_MASK                                   0x00000020L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN_MASK                                   0x00000040L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN_MASK                                   0x00000080L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN_MASK                                   0x00000100L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN_MASK                                   0x00000200L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN_MASK                                   0x00000400L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN_MASK                                   0x00000800L
+//RDPCSTX3_RDPCSTX_PHY_CNTL3
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET__SHIFT                                             0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE__SHIFT                                           0x1
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY__SHIFT                                           0x2
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN__SHIFT                                           0x3
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ__SHIFT                                               0x4
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK__SHIFT                                               0x5
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET__SHIFT                                             0x8
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE__SHIFT                                           0x9
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY__SHIFT                                           0xa
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN__SHIFT                                           0xb
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ__SHIFT                                               0xc
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK__SHIFT                                               0xd
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET__SHIFT                                             0x10
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE__SHIFT                                           0x11
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY__SHIFT                                           0x12
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN__SHIFT                                           0x13
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ__SHIFT                                               0x14
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK__SHIFT                                               0x15
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET__SHIFT                                             0x18
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE__SHIFT                                           0x19
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY__SHIFT                                           0x1a
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN__SHIFT                                           0x1b
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ__SHIFT                                               0x1c
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK__SHIFT                                               0x1d
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_MASK                                               0x00000001L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_MASK                                             0x00000002L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_MASK                                             0x00000004L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_MASK                                             0x00000008L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_MASK                                                 0x00000010L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_MASK                                                 0x00000020L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_MASK                                               0x00000100L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_MASK                                             0x00000200L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_MASK                                             0x00000400L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_MASK                                             0x00000800L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_MASK                                                 0x00001000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_MASK                                                 0x00002000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_MASK                                               0x00010000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_MASK                                             0x00020000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_MASK                                             0x00040000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_MASK                                             0x00080000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_MASK                                                 0x00100000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_MASK                                                 0x00200000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_MASK                                               0x01000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_MASK                                             0x02000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_MASK                                             0x04000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_MASK                                             0x08000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_MASK                                                 0x10000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_MASK                                                 0x20000000L
+//RDPCSTX3_RDPCSTX_PHY_CNTL4
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL__SHIFT                                         0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT__SHIFT                                            0x4
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC__SHIFT                                    0x6
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN__SHIFT                                        0x7
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL__SHIFT                                         0x8
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT__SHIFT                                            0xc
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC__SHIFT                                    0xe
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN__SHIFT                                        0xf
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL__SHIFT                                         0x10
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT__SHIFT                                            0x14
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC__SHIFT                                    0x16
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN__SHIFT                                        0x17
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL__SHIFT                                         0x18
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT__SHIFT                                            0x1c
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC__SHIFT                                    0x1e
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN__SHIFT                                        0x1f
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL_MASK                                           0x00000007L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT_MASK                                              0x00000010L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC_MASK                                      0x00000040L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN_MASK                                          0x00000080L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL_MASK                                           0x00000700L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT_MASK                                              0x00001000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC_MASK                                      0x00004000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN_MASK                                          0x00008000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL_MASK                                           0x00070000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT_MASK                                              0x00100000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC_MASK                                      0x00400000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN_MASK                                          0x00800000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL_MASK                                           0x07000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT_MASK                                              0x10000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC_MASK                                      0x40000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN_MASK                                          0x80000000L
+//RDPCSTX3_RDPCSTX_PHY_CNTL5
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD__SHIFT                                               0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE__SHIFT                                              0x1
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH__SHIFT                                             0x4
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ__SHIFT                                         0x6
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT__SHIFT                                      0x7
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD__SHIFT                                               0x8
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE__SHIFT                                              0x9
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH__SHIFT                                             0xc
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ__SHIFT                                         0xe
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT__SHIFT                                      0xf
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD__SHIFT                                               0x10
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE__SHIFT                                              0x11
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH__SHIFT                                             0x14
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ__SHIFT                                         0x16
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT__SHIFT                                      0x17
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD__SHIFT                                               0x18
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE__SHIFT                                              0x19
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH__SHIFT                                             0x1c
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ__SHIFT                                         0x1e
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT__SHIFT                                      0x1f
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD_MASK                                                 0x00000001L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE_MASK                                                0x0000000EL
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH_MASK                                               0x00000030L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ_MASK                                           0x00000040L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT_MASK                                        0x00000080L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD_MASK                                                 0x00000100L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE_MASK                                                0x00000E00L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH_MASK                                               0x00003000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ_MASK                                           0x00004000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT_MASK                                        0x00008000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD_MASK                                                 0x00010000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE_MASK                                                0x000E0000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH_MASK                                               0x00300000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ_MASK                                           0x00400000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT_MASK                                        0x00800000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD_MASK                                                 0x01000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE_MASK                                                0x0E000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH_MASK                                               0x30000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ_MASK                                           0x40000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT_MASK                                        0x80000000L
+//RDPCSTX3_RDPCSTX_PHY_CNTL6
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE__SHIFT                                            0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN__SHIFT                                           0x2
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE__SHIFT                                            0x4
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN__SHIFT                                           0x6
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE__SHIFT                                            0x8
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN__SHIFT                                           0xa
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE__SHIFT                                            0xc
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN__SHIFT                                           0xe
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4__SHIFT                                                0x10
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE__SHIFT                                            0x11
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK__SHIFT                                        0x12
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN__SHIFT                                            0x13
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ__SHIFT                                           0x14
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_MASK                                              0x00000003L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_MASK                                             0x00000004L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_MASK                                              0x00000030L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_MASK                                             0x00000040L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_MASK                                              0x00000300L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_MASK                                             0x00000400L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_MASK                                              0x00003000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_MASK                                             0x00004000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_MASK                                                  0x00010000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_MASK                                              0x00020000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_MASK                                          0x00040000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_MASK                                              0x00080000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_MASK                                             0x00100000L
+//RDPCSTX3_RDPCSTX_PHY_CNTL7
+#define RDPCSTX3_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN__SHIFT                                       0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT__SHIFT                                      0x10
+#define RDPCSTX3_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN_MASK                                         0x0000FFFFL
+#define RDPCSTX3_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT_MASK                                        0xFFFF0000L
+//RDPCSTX3_RDPCSTX_PHY_CNTL8
+#define RDPCSTX3_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK__SHIFT                                        0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK_MASK                                          0x000FFFFFL
+//RDPCSTX3_RDPCSTX_PHY_CNTL9
+#define RDPCSTX3_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE__SHIFT                                    0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD__SHIFT                                   0x18
+#define RDPCSTX3_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE_MASK                                      0x001FFFFFL
+#define RDPCSTX3_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD_MASK                                     0x01000000L
+//RDPCSTX3_RDPCSTX_PHY_CNTL10
+#define RDPCSTX3_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM__SHIFT                                      0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM_MASK                                        0x0000FFFFL
+//RDPCSTX3_RDPCSTX_PHY_CNTL11
+#define RDPCSTX3_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER__SHIFT                                     0x4
+#define RDPCSTX3_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV__SHIFT                                     0x10
+#define RDPCSTX3_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV__SHIFT                                    0x14
+#define RDPCSTX3_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV__SHIFT                           0x18
+#define RDPCSTX3_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER_MASK                                       0x0000FFF0L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV_MASK                                       0x00070000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV_MASK                                      0x00700000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV_MASK                             0x03000000L
+//RDPCSTX3_RDPCSTX_PHY_CNTL12
+#define RDPCSTX3_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN__SHIFT                                    0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN__SHIFT                                   0x2
+#define RDPCSTX3_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV__SHIFT                                     0x4
+#define RDPCSTX3_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE__SHIFT                                          0x7
+#define RDPCSTX3_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN__SHIFT                                         0x8
+#define RDPCSTX3_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN_MASK                                      0x00000001L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN_MASK                                     0x00000004L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV_MASK                                       0x00000070L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE_MASK                                            0x00000080L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN_MASK                                           0x00000100L
+//RDPCSTX3_RDPCSTX_PHY_CNTL13
+#define RDPCSTX3_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER__SHIFT                                 0x14
+#define RDPCSTX3_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN__SHIFT                                     0x1c
+#define RDPCSTX3_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN__SHIFT                                       0x1d
+#define RDPCSTX3_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE__SHIFT                               0x1e
+#define RDPCSTX3_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER_MASK                                   0x0FF00000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN_MASK                                       0x10000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN_MASK                                         0x20000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE_MASK                                 0x40000000L
+//RDPCSTX3_RDPCSTX_PHY_CNTL14
+#define RDPCSTX3_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE__SHIFT                                      0x0
+#define RDPCSTX3_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN__SHIFT                                       0x18
+#define RDPCSTX3_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN__SHIFT                                        0x1c
+#define RDPCSTX3_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE_MASK                                        0x00000001L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN_MASK                                         0x01000000L
+#define RDPCSTX3_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN_MASK                                          0x10000000L
+//RDPCSTX3_RDPCSTX_PHY_FUSE0
+#define RDPCSTX3_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX3_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX3_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX3_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I__SHIFT                                             0x12
+#define RDPCSTX3_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO__SHIFT                                        0x14
+#define RDPCSTX3_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX3_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX3_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX3_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I_MASK                                               0x000C0000L
+#define RDPCSTX3_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO_MASK                                          0x00300000L
+//RDPCSTX3_RDPCSTX_PHY_FUSE1
+#define RDPCSTX3_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX3_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX3_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX3_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT__SHIFT                                          0x12
+#define RDPCSTX3_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP__SHIFT                                         0x19
+#define RDPCSTX3_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX3_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX3_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX3_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT_MASK                                            0x01FC0000L
+#define RDPCSTX3_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP_MASK                                           0xFE000000L
+//RDPCSTX3_RDPCSTX_PHY_FUSE2
+#define RDPCSTX3_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX3_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX3_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX3_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX3_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX3_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST_MASK                                             0x0003F000L
+//RDPCSTX3_RDPCSTX_PHY_FUSE3
+#define RDPCSTX3_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX3_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX3_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX3_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE__SHIFT                                             0x12
+#define RDPCSTX3_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE__SHIFT                                                0x18
+#define RDPCSTX3_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX3_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX3_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX3_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE_MASK                                               0x00FC0000L
+#define RDPCSTX3_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE_MASK                                                  0x03000000L
+//RDPCSTX3_RDPCSTX_PHY_RX_LD_VAL
+#define RDPCSTX3_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL__SHIFT                                        0x0
+#define RDPCSTX3_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL__SHIFT                                        0x8
+#define RDPCSTX3_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL_MASK                                          0x0000007FL
+#define RDPCSTX3_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL_MASK                                          0x001FFF00L
+//RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED__SHIFT                         0x0
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED__SHIFT                       0x1
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED__SHIFT                       0x2
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED__SHIFT                       0x3
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED__SHIFT                           0x4
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED__SHIFT                           0x5
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED__SHIFT                         0x8
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED__SHIFT                       0x9
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED__SHIFT                       0xa
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED__SHIFT                       0xb
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED__SHIFT                           0xc
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED__SHIFT                           0xd
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED__SHIFT                         0x10
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED__SHIFT                       0x11
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED__SHIFT                       0x12
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED__SHIFT                       0x13
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED__SHIFT                           0x14
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED__SHIFT                           0x15
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED__SHIFT                         0x18
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED__SHIFT                       0x19
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED__SHIFT                       0x1a
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED__SHIFT                       0x1b
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED__SHIFT                           0x1c
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED__SHIFT                           0x1d
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED_MASK                           0x00000001L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED_MASK                         0x00000002L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED_MASK                         0x00000004L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED_MASK                         0x00000008L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED_MASK                             0x00000010L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED_MASK                             0x00000020L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED_MASK                           0x00000100L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED_MASK                         0x00000200L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED_MASK                         0x00000400L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED_MASK                         0x00000800L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED_MASK                             0x00001000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED_MASK                             0x00002000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED_MASK                           0x00010000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED_MASK                         0x00020000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED_MASK                         0x00040000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED_MASK                         0x00080000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED_MASK                             0x00100000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED_MASK                             0x00200000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED_MASK                           0x01000000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED_MASK                         0x02000000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED_MASK                         0x04000000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED_MASK                         0x08000000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED_MASK                             0x10000000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED_MASK                             0x20000000L
+//RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED__SHIFT                        0x0
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED__SHIFT                       0x2
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED__SHIFT                        0x4
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED__SHIFT                       0x6
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED__SHIFT                        0x8
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED__SHIFT                       0xa
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED__SHIFT                        0xc
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED__SHIFT                       0xe
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED__SHIFT                            0x10
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED__SHIFT                        0x11
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED__SHIFT                    0x12
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED__SHIFT                        0x13
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED__SHIFT                       0x14
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED_MASK                          0x00000003L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED_MASK                         0x00000004L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED_MASK                          0x00000030L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED_MASK                         0x00000040L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED_MASK                          0x00000300L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED_MASK                         0x00000400L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED_MASK                          0x00003000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED_MASK                         0x00004000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED_MASK                              0x00010000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED_MASK                          0x00020000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED_MASK                      0x00040000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED_MASK                          0x00080000L
+#define RDPCSTX3_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED_MASK                         0x00100000L
+//RDPCSTX3_RDPCSTX_DPALT_CONTROL_REG
+#define RDPCSTX3_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS__SHIFT                                  0x0
+#define RDPCSTX3_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED__SHIFT                                0x4
+#define RDPCSTX3_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE__SHIFT                                  0x8
+#define RDPCSTX3_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS_MASK                                    0x00000001L
+#define RDPCSTX3_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED_MASK                                  0x00000010L
+#define RDPCSTX3_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE_MASK                                    0x0000FF00L
+
+
+// addressBlock: dpcssys_dpcssys_cr3_dispdec
+//DPCSSYS_CR3_DPCSSYS_CR_ADDR
+#define DPCSSYS_CR3_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                  0x0
+#define DPCSSYS_CR3_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                    0x0000FFFFL
+//DPCSSYS_CR3_DPCSSYS_CR_DATA
+#define DPCSSYS_CR3_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                  0x0
+#define DPCSSYS_CR3_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                    0x0000FFFFL
+
+
+// addressBlock: dpcssys_dpcs0_dpcsrx_dispdec
+//DPCSRX_PHY_CNTL
+#define DPCSRX_PHY_CNTL__DPCS_PHY_RESET__SHIFT                                                                0x0
+#define DPCSRX_PHY_CNTL__DPCS_PHY_RESET_MASK                                                                  0x00000001L
+//DPCSRX_RX_CLOCK_CNTL
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX_GATE_DIS__SHIFT                                                  0x0
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX_EN__SHIFT                                                        0x1
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX_SEL__SHIFT                                                       0x2
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX_CLOCK_ON__SHIFT                                                  0x4
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX0_GATE_DIS__SHIFT                                                 0x10
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX0_EN__SHIFT                                                       0x11
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX0_CLOCK_ON__SHIFT                                                 0x12
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX1_GATE_DIS__SHIFT                                                 0x14
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX1_EN__SHIFT                                                       0x15
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX1_CLOCK_ON__SHIFT                                                 0x16
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX2_GATE_DIS__SHIFT                                                 0x18
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX2_EN__SHIFT                                                       0x19
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX2_CLOCK_ON__SHIFT                                                 0x1a
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX3_GATE_DIS__SHIFT                                                 0x1c
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX3_EN__SHIFT                                                       0x1d
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX3_CLOCK_ON__SHIFT                                                 0x1e
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX_GATE_DIS_MASK                                                    0x00000001L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX_EN_MASK                                                          0x00000002L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX_SEL_MASK                                                         0x0000000CL
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX_CLOCK_ON_MASK                                                    0x00000010L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX0_GATE_DIS_MASK                                                   0x00010000L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX0_EN_MASK                                                         0x00020000L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX0_CLOCK_ON_MASK                                                   0x00040000L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX1_GATE_DIS_MASK                                                   0x00100000L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX1_EN_MASK                                                         0x00200000L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX1_CLOCK_ON_MASK                                                   0x00400000L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX2_GATE_DIS_MASK                                                   0x01000000L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX2_EN_MASK                                                         0x02000000L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX2_CLOCK_ON_MASK                                                   0x04000000L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX3_GATE_DIS_MASK                                                   0x10000000L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX3_EN_MASK                                                         0x20000000L
+#define DPCSRX_RX_CLOCK_CNTL__DPCS_SYMCLK_RX3_CLOCK_ON_MASK                                                   0x40000000L
+//DPCSRX_RX_CNTL
+#define DPCSRX_RX_CNTL__DPCS_RX_LANE0_EN__SHIFT                                                               0x0
+#define DPCSRX_RX_CNTL__DPCS_RX_LANE1_EN__SHIFT                                                               0x1
+#define DPCSRX_RX_CNTL__DPCS_RX_LANE2_EN__SHIFT                                                               0x2
+#define DPCSRX_RX_CNTL__DPCS_RX_LANE3_EN__SHIFT                                                               0x3
+#define DPCSRX_RX_CNTL__DPCS_RX_FIFO_EN__SHIFT                                                                0x4
+#define DPCSRX_RX_CNTL__DPCS_RX_FIFO_START__SHIFT                                                             0x5
+#define DPCSRX_RX_CNTL__DPCS_RX_FIFO_RD_START_DELAY__SHIFT                                                    0x8
+#define DPCSRX_RX_CNTL__DPCS_RX_SOFT_RESET__SHIFT                                                             0x1f
+#define DPCSRX_RX_CNTL__DPCS_RX_LANE0_EN_MASK                                                                 0x00000001L
+#define DPCSRX_RX_CNTL__DPCS_RX_LANE1_EN_MASK                                                                 0x00000002L
+#define DPCSRX_RX_CNTL__DPCS_RX_LANE2_EN_MASK                                                                 0x00000004L
+#define DPCSRX_RX_CNTL__DPCS_RX_LANE3_EN_MASK                                                                 0x00000008L
+#define DPCSRX_RX_CNTL__DPCS_RX_FIFO_EN_MASK                                                                  0x00000010L
+#define DPCSRX_RX_CNTL__DPCS_RX_FIFO_START_MASK                                                               0x00000020L
+#define DPCSRX_RX_CNTL__DPCS_RX_FIFO_RD_START_DELAY_MASK                                                      0x00000F00L
+#define DPCSRX_RX_CNTL__DPCS_RX_SOFT_RESET_MASK                                                               0x80000000L
+//DPCSRX_CBUS_CNTL
+#define DPCSRX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY__SHIFT                                                       0x0
+#define DPCSRX_CBUS_CNTL__DPCS_PHY_MASTER_REQ_DELAY__SHIFT                                                    0x8
+#define DPCSRX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET__SHIFT                                                         0x1f
+#define DPCSRX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY_MASK                                                         0x0000000FL
+#define DPCSRX_CBUS_CNTL__DPCS_PHY_MASTER_REQ_DELAY_MASK                                                      0x0000FF00L
+#define DPCSRX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET_MASK                                                           0x80000000L
+//DPCSRX_REG_ERROR_STATUS
+#define DPCSRX_REG_ERROR_STATUS__DPCS_REG_FIFO_OVERFLOW__SHIFT                                                0x0
+#define DPCSRX_REG_ERROR_STATUS__DPCS_REG_ERROR_CLR__SHIFT                                                    0x1
+#define DPCSRX_REG_ERROR_STATUS__DPCS_REG_FIFO_ERROR_MASK__SHIFT                                              0x4
+#define DPCSRX_REG_ERROR_STATUS__DPCS_REG_FIFO_OVERFLOW_MASK                                                  0x00000001L
+#define DPCSRX_REG_ERROR_STATUS__DPCS_REG_ERROR_CLR_MASK                                                      0x00000002L
+#define DPCSRX_REG_ERROR_STATUS__DPCS_REG_FIFO_ERROR_MASK_MASK                                                0x00000010L
+//DPCSRX_RX_ERROR_STATUS
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX0_FIFO_ERROR__SHIFT                                                    0x0
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX1_FIFO_ERROR__SHIFT                                                    0x1
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX2_FIFO_ERROR__SHIFT                                                    0x2
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX3_FIFO_ERROR__SHIFT                                                    0x3
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX_ERROR_CLR__SHIFT                                                      0x8
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX_FIFO_ERROR_MASK__SHIFT                                                0xc
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX0_FIFO_ERROR_MASK                                                      0x00000001L
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX1_FIFO_ERROR_MASK                                                      0x00000002L
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX2_FIFO_ERROR_MASK                                                      0x00000004L
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX3_FIFO_ERROR_MASK                                                      0x00000008L
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX_ERROR_CLR_MASK                                                        0x00000100L
+#define DPCSRX_RX_ERROR_STATUS__DPCS_RX_FIFO_ERROR_MASK_MASK                                                  0x00001000L
+//DPCSRX_INDEX_MODE_ADDR
+#define DPCSRX_INDEX_MODE_ADDR__DPCS_INDEX_MODE_ADDR__SHIFT                                                   0x0
+#define DPCSRX_INDEX_MODE_ADDR__DPCS_INDEX_MODE_ADDR_MASK                                                     0x0003FFFFL
+//DPCSRX_INDEX_MODE_DATA
+#define DPCSRX_INDEX_MODE_DATA__DPCS_INDEX_MODE_DATA__SHIFT                                                   0x0
+#define DPCSRX_INDEX_MODE_DATA__DPCS_INDEX_MODE_DATA_MASK                                                     0xFFFFFFFFL
+//DPCSRX_DEBUG_CONFIG
+#define DPCSRX_DEBUG_CONFIG__DPCS_DBG_EN__SHIFT                                                               0x0
+#define DPCSRX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL__SHIFT                                                       0x1
+#define DPCSRX_DEBUG_CONFIG__DPCS_DBG_RX_SYMCLK_SEL__SHIFT                                                    0x6
+#define DPCSRX_DEBUG_CONFIG__DPCS_DBG_BLOCK_SEL__SHIFT                                                        0xb
+#define DPCSRX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS__SHIFT                                                         0xe
+#define DPCSRX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN__SHIFT                                                  0x10
+#define DPCSRX_DEBUG_CONFIG__DPCS_DBG_EN_MASK                                                                 0x00000001L
+#define DPCSRX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL_MASK                                                         0x0000000EL
+#define DPCSRX_DEBUG_CONFIG__DPCS_DBG_RX_SYMCLK_SEL_MASK                                                      0x000000C0L
+#define DPCSRX_DEBUG_CONFIG__DPCS_DBG_BLOCK_SEL_MASK                                                          0x00003800L
+#define DPCSRX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS_MASK                                                           0x00004000L
+#define DPCSRX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN_MASK                                                    0x00010000L
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx4_dispdec
+//DPCSTX4_DPCSTX_TX_CLOCK_CNTL
+#define DPCSTX4_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS__SHIFT                                             0x0
+#define DPCSTX4_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN__SHIFT                                                   0x1
+#define DPCSTX4_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON__SHIFT                                             0x2
+#define DPCSTX4_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0x3
+#define DPCSTX4_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS_MASK                                               0x00000001L
+#define DPCSTX4_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN_MASK                                                     0x00000002L
+#define DPCSTX4_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON_MASK                                               0x00000004L
+#define DPCSTX4_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000008L
+//DPCSTX4_DPCSTX_TX_CNTL
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ__SHIFT                                                 0xc
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING__SHIFT                                             0xd
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP__SHIFT                                                      0xe
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT__SHIFT                                              0xf
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ_MASK                                                   0x00001000L
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING_MASK                                               0x00002000L
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP_MASK                                                        0x00004000L
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT_MASK                                                0x00008000L
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define DPCSTX4_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//DPCSTX4_DPCSTX_CBUS_CNTL
+#define DPCSTX4_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY__SHIFT                                               0x0
+#define DPCSTX4_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET__SHIFT                                                 0x1f
+#define DPCSTX4_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY_MASK                                                 0x000000FFL
+#define DPCSTX4_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET_MASK                                                   0x80000000L
+//DPCSTX4_DPCSTX_INTERRUPT_CNTL
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW__SHIFT                                          0x0
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR__SHIFT                                              0x1
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK__SHIFT                                        0x4
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR__SHIFT                                             0x8
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR__SHIFT                                             0x9
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR__SHIFT                                             0xa
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR__SHIFT                                             0xb
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR__SHIFT                                               0xc
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK__SHIFT                                         0x10
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK__SHIFT                                             0x14
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW_MASK                                            0x00000001L
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR_MASK                                                0x00000002L
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK_MASK                                          0x00000010L
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR_MASK                                               0x00000100L
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR_MASK                                               0x00000200L
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR_MASK                                               0x00000400L
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR_MASK                                               0x00000800L
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR_MASK                                                 0x00001000L
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK_MASK                                           0x00010000L
+#define DPCSTX4_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK_MASK                                               0x00100000L
+//DPCSTX4_DPCSTX_PLL_UPDATE_ADDR
+#define DPCSTX4_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR__SHIFT                                           0x0
+#define DPCSTX4_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR_MASK                                             0x0003FFFFL
+//DPCSTX4_DPCSTX_PLL_UPDATE_DATA
+#define DPCSTX4_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA__SHIFT                                           0x0
+#define DPCSTX4_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA_MASK                                             0xFFFFFFFFL
+//DPCSTX4_DPCSTX_DEBUG_CONFIG
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN__SHIFT                                                       0x0
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL__SHIFT                                               0x1
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL__SHIFT                                            0x4
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL__SHIFT                                       0x8
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS__SHIFT                                                 0xe
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN__SHIFT                                          0x10
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN_MASK                                                         0x00000001L
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL_MASK                                                 0x0000000EL
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL_MASK                                              0x00000070L
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL_MASK                                         0x00000700L
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS_MASK                                                   0x00004000L
+#define DPCSTX4_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN_MASK                                            0x00010000L
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx4_dispdec
+//RDPCSTX4_RDPCSTX_CNTL
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET__SHIFT                                                   0x0
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET__SHIFT                                                   0x4
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN__SHIFT                                                  0xc
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN__SHIFT                                                  0xd
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN__SHIFT                                                  0xe
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN__SHIFT                                                  0xf
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN__SHIFT                                              0x18
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN__SHIFT                                       0x19
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS__SHIFT                                                0x1a
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET_MASK                                                     0x00000001L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET_MASK                                                     0x00000010L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN_MASK                                                    0x00001000L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN_MASK                                                    0x00002000L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN_MASK                                                    0x00004000L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN_MASK                                                    0x00008000L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN_MASK                                                0x01000000L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN_MASK                                         0x02000000L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS_MASK                                                  0x04000000L
+#define RDPCSTX4_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//RDPCSTX4_RDPCSTX_CLOCK_CNTL
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN__SHIFT                                               0x0
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN__SHIFT                                          0x4
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN__SHIFT                                          0x5
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN__SHIFT                                          0x6
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN__SHIFT                                          0x7
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS__SHIFT                                        0x8
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN__SHIFT                                              0x9
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0xa
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS__SHIFT                                            0xc
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN__SHIFT                                                  0xd
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON__SHIFT                                            0xe
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS__SHIFT                                              0x10
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN_MASK                                                 0x00000001L
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN_MASK                                            0x00000010L
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN_MASK                                            0x00000020L
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN_MASK                                            0x00000040L
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN_MASK                                            0x00000080L
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS_MASK                                          0x00000100L
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN_MASK                                                0x00000200L
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000400L
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS_MASK                                              0x00001000L
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN_MASK                                                    0x00002000L
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON_MASK                                              0x00004000L
+#define RDPCSTX4_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS_MASK                                                0x00010000L
+//RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW__SHIFT                                    0x0
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE__SHIFT                                 0x1
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE__SHIFT                                   0x2
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR__SHIFT                                       0x4
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR__SHIFT                                       0x5
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR__SHIFT                                       0x6
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR__SHIFT                                       0x7
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR__SHIFT                                        0x8
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR__SHIFT                             0x9
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR__SHIFT                               0xa
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR__SHIFT                                         0xc
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK__SHIFT                                  0x10
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK__SHIFT                            0x11
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK__SHIFT                              0x12
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK__SHIFT                                   0x14
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW_MASK                                      0x00000001L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK                                   0x00000002L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK                                     0x00000004L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR_MASK                                         0x00000010L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR_MASK                                         0x00000020L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR_MASK                                         0x00000040L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR_MASK                                         0x00000080L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR_MASK                                          0x00000100L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR_MASK                               0x00000200L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR_MASK                                 0x00000400L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR_MASK                                           0x00001000L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK_MASK                                    0x00010000L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK_MASK                              0x00020000L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK_MASK                                0x00040000L
+#define RDPCSTX4_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK_MASK                                     0x00100000L
+//RDPCSTX4_RDPCSTX_PLL_UPDATE_DATA
+#define RDPCSTX4_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA__SHIFT                                        0x0
+#define RDPCSTX4_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA_MASK                                          0x00000001L
+//RDPCSTX4_RDPCS_TX_CR_ADDR
+#define RDPCSTX4_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                    0x0
+#define RDPCSTX4_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                      0x0000FFFFL
+//RDPCSTX4_RDPCS_TX_CR_DATA
+#define RDPCSTX4_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                    0x0
+#define RDPCSTX4_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                      0x0000FFFFL
+//RDPCSTX4_RDPCS_TX_SRAM_CNTL
+#define RDPCSTX4_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS__SHIFT                                                 0x14
+#define RDPCSTX4_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE__SHIFT                                               0x18
+#define RDPCSTX4_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE__SHIFT                                           0x1c
+#define RDPCSTX4_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS_MASK                                                   0x00100000L
+#define RDPCSTX4_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE_MASK                                                 0x03000000L
+#define RDPCSTX4_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE_MASK                                             0x30000000L
+//RDPCSTX4_RDPCSTX_MEM_POWER_CTRL
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES__SHIFT                                           0x0
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES__SHIFT                                    0xc
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1__SHIFT                                  0x1a
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2__SHIFT                                  0x1b
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1__SHIFT                                   0x1c
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2__SHIFT                                   0x1d
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM__SHIFT                                         0x1e
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES_MASK                                             0x00000FFFL
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES_MASK                                      0x03FFF000L
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1_MASK                                    0x04000000L
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2_MASK                                    0x08000000L
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1_MASK                                     0x10000000L
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2_MASK                                     0x20000000L
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM_MASK                                           0x40000000L
+//RDPCSTX4_RDPCSTX_MEM_POWER_CTRL2
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF__SHIFT                                    0x0
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO__SHIFT                                    0x2
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF_MASK                                      0x00000003L
+#define RDPCSTX4_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO_MASK                                      0x00000004L
+//RDPCSTX4_RDPCSTX_SCRATCH
+#define RDPCSTX4_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH__SHIFT                                                      0x0
+#define RDPCSTX4_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH_MASK                                                        0xFFFFFFFFL
+//RDPCSTX4_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG__SHIFT                      0x0
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS__SHIFT              0x4
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE__SHIFT                      0x8
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG_MASK                        0x00000001L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS_MASK                0x00000010L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE_MASK                        0x0000FF00L
+//RDPCSTX4_RDPCSTX_DEBUG_CONFIG
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN__SHIFT                                                    0x0
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT__SHIFT                                        0x4
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP__SHIFT                                        0x7
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK__SHIFT                                          0x8
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE__SHIFT                                       0xf
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX__SHIFT                                          0x10
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT__SHIFT                                              0x18
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN_MASK                                                      0x00000001L
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT_MASK                                          0x00000070L
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP_MASK                                          0x00000080L
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK_MASK                                            0x00001F00L
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE_MASK                                         0x00008000L
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX_MASK                                            0x00FF0000L
+#define RDPCSTX4_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MASK                                                0xFF000000L
+//RDPCSTX4_RDPCSTX_PHY_CNTL0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET__SHIFT                                                    0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET__SHIFT                                            0x1
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N__SHIFT                                          0x2
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN__SHIFT                                           0x3
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT__SHIFT                                                  0x4
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE__SHIFT                                          0x8
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE__SHIFT                                                0x9
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL__SHIFT                                            0xe
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ__SHIFT                                                0x11
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK__SHIFT                                                0x12
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL__SHIFT                                              0x14
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL__SHIFT                                               0x15
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN__SHIFT                                            0x18
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT__SHIFT                                        0x19
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE__SHIFT                                               0x1c
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE__SHIFT                                             0x1d
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS__SHIFT                                                  0x1f
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET_MASK                                                      0x00000001L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET_MASK                                              0x00000002L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N_MASK                                            0x00000004L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN_MASK                                             0x00000008L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT_MASK                                                    0x00000030L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE_MASK                                            0x00000100L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE_MASK                                                  0x00003E00L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL_MASK                                              0x0001C000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ_MASK                                                  0x00020000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK_MASK                                                  0x00040000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL_MASK                                                0x00100000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL_MASK                                                 0x00200000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN_MASK                                              0x01000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT_MASK                                          0x02000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE_MASK                                                 0x10000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE_MASK                                               0x20000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS_MASK                                                    0x80000000L
+//RDPCSTX4_RDPCSTX_PHY_CNTL1
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN__SHIFT                                               0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN__SHIFT                                               0x1
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE__SHIFT                                           0x2
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN__SHIFT                                               0x3
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE__SHIFT                                           0x4
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET__SHIFT                                              0x5
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN__SHIFT                                               0x6
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE__SHIFT                                           0x7
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN_MASK                                                 0x00000001L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN_MASK                                                 0x00000002L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE_MASK                                             0x00000004L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN_MASK                                                 0x00000008L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE_MASK                                             0x00000010L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET_MASK                                                0x00000020L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN_MASK                                                 0x00000040L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE_MASK                                             0x00000080L
+//RDPCSTX4_RDPCSTX_PHY_CNTL2
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR__SHIFT                                                  0x3
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN__SHIFT                                 0x4
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN__SHIFT                                 0x5
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN__SHIFT                                 0x6
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN__SHIFT                                 0x7
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN__SHIFT                                 0x8
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN__SHIFT                                 0x9
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN__SHIFT                                 0xa
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN__SHIFT                                 0xb
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR_MASK                                                    0x00000008L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN_MASK                                   0x00000010L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN_MASK                                   0x00000020L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN_MASK                                   0x00000040L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN_MASK                                   0x00000080L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN_MASK                                   0x00000100L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN_MASK                                   0x00000200L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN_MASK                                   0x00000400L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN_MASK                                   0x00000800L
+//RDPCSTX4_RDPCSTX_PHY_CNTL3
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET__SHIFT                                             0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE__SHIFT                                           0x1
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY__SHIFT                                           0x2
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN__SHIFT                                           0x3
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ__SHIFT                                               0x4
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK__SHIFT                                               0x5
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET__SHIFT                                             0x8
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE__SHIFT                                           0x9
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY__SHIFT                                           0xa
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN__SHIFT                                           0xb
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ__SHIFT                                               0xc
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK__SHIFT                                               0xd
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET__SHIFT                                             0x10
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE__SHIFT                                           0x11
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY__SHIFT                                           0x12
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN__SHIFT                                           0x13
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ__SHIFT                                               0x14
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK__SHIFT                                               0x15
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET__SHIFT                                             0x18
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE__SHIFT                                           0x19
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY__SHIFT                                           0x1a
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN__SHIFT                                           0x1b
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ__SHIFT                                               0x1c
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK__SHIFT                                               0x1d
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_MASK                                               0x00000001L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_MASK                                             0x00000002L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_MASK                                             0x00000004L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_MASK                                             0x00000008L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_MASK                                                 0x00000010L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_MASK                                                 0x00000020L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_MASK                                               0x00000100L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_MASK                                             0x00000200L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_MASK                                             0x00000400L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_MASK                                             0x00000800L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_MASK                                                 0x00001000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_MASK                                                 0x00002000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_MASK                                               0x00010000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_MASK                                             0x00020000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_MASK                                             0x00040000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_MASK                                             0x00080000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_MASK                                                 0x00100000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_MASK                                                 0x00200000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_MASK                                               0x01000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_MASK                                             0x02000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_MASK                                             0x04000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_MASK                                             0x08000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_MASK                                                 0x10000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_MASK                                                 0x20000000L
+//RDPCSTX4_RDPCSTX_PHY_CNTL4
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL__SHIFT                                         0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT__SHIFT                                            0x4
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC__SHIFT                                    0x6
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN__SHIFT                                        0x7
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL__SHIFT                                         0x8
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT__SHIFT                                            0xc
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC__SHIFT                                    0xe
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN__SHIFT                                        0xf
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL__SHIFT                                         0x10
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT__SHIFT                                            0x14
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC__SHIFT                                    0x16
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN__SHIFT                                        0x17
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL__SHIFT                                         0x18
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT__SHIFT                                            0x1c
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC__SHIFT                                    0x1e
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN__SHIFT                                        0x1f
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL_MASK                                           0x00000007L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT_MASK                                              0x00000010L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC_MASK                                      0x00000040L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN_MASK                                          0x00000080L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL_MASK                                           0x00000700L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT_MASK                                              0x00001000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC_MASK                                      0x00004000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN_MASK                                          0x00008000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL_MASK                                           0x00070000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT_MASK                                              0x00100000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC_MASK                                      0x00400000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN_MASK                                          0x00800000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL_MASK                                           0x07000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT_MASK                                              0x10000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC_MASK                                      0x40000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN_MASK                                          0x80000000L
+//RDPCSTX4_RDPCSTX_PHY_CNTL5
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD__SHIFT                                               0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE__SHIFT                                              0x1
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH__SHIFT                                             0x4
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ__SHIFT                                         0x6
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT__SHIFT                                      0x7
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD__SHIFT                                               0x8
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE__SHIFT                                              0x9
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH__SHIFT                                             0xc
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ__SHIFT                                         0xe
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT__SHIFT                                      0xf
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD__SHIFT                                               0x10
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE__SHIFT                                              0x11
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH__SHIFT                                             0x14
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ__SHIFT                                         0x16
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT__SHIFT                                      0x17
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD__SHIFT                                               0x18
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE__SHIFT                                              0x19
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH__SHIFT                                             0x1c
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ__SHIFT                                         0x1e
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT__SHIFT                                      0x1f
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD_MASK                                                 0x00000001L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE_MASK                                                0x0000000EL
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH_MASK                                               0x00000030L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ_MASK                                           0x00000040L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT_MASK                                        0x00000080L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD_MASK                                                 0x00000100L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE_MASK                                                0x00000E00L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH_MASK                                               0x00003000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ_MASK                                           0x00004000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT_MASK                                        0x00008000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD_MASK                                                 0x00010000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE_MASK                                                0x000E0000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH_MASK                                               0x00300000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ_MASK                                           0x00400000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT_MASK                                        0x00800000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD_MASK                                                 0x01000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE_MASK                                                0x0E000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH_MASK                                               0x30000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ_MASK                                           0x40000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT_MASK                                        0x80000000L
+//RDPCSTX4_RDPCSTX_PHY_CNTL6
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE__SHIFT                                            0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN__SHIFT                                           0x2
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE__SHIFT                                            0x4
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN__SHIFT                                           0x6
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE__SHIFT                                            0x8
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN__SHIFT                                           0xa
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE__SHIFT                                            0xc
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN__SHIFT                                           0xe
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4__SHIFT                                                0x10
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE__SHIFT                                            0x11
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK__SHIFT                                        0x12
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN__SHIFT                                            0x13
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ__SHIFT                                           0x14
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_MASK                                              0x00000003L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_MASK                                             0x00000004L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_MASK                                              0x00000030L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_MASK                                             0x00000040L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_MASK                                              0x00000300L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_MASK                                             0x00000400L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_MASK                                              0x00003000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_MASK                                             0x00004000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_MASK                                                  0x00010000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_MASK                                              0x00020000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_MASK                                          0x00040000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_MASK                                              0x00080000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_MASK                                             0x00100000L
+//RDPCSTX4_RDPCSTX_PHY_CNTL7
+#define RDPCSTX4_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN__SHIFT                                       0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT__SHIFT                                      0x10
+#define RDPCSTX4_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN_MASK                                         0x0000FFFFL
+#define RDPCSTX4_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT_MASK                                        0xFFFF0000L
+//RDPCSTX4_RDPCSTX_PHY_CNTL8
+#define RDPCSTX4_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK__SHIFT                                        0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK_MASK                                          0x000FFFFFL
+//RDPCSTX4_RDPCSTX_PHY_CNTL9
+#define RDPCSTX4_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE__SHIFT                                    0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD__SHIFT                                   0x18
+#define RDPCSTX4_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE_MASK                                      0x001FFFFFL
+#define RDPCSTX4_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD_MASK                                     0x01000000L
+//RDPCSTX4_RDPCSTX_PHY_CNTL10
+#define RDPCSTX4_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM__SHIFT                                      0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM_MASK                                        0x0000FFFFL
+//RDPCSTX4_RDPCSTX_PHY_CNTL11
+#define RDPCSTX4_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER__SHIFT                                     0x4
+#define RDPCSTX4_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV__SHIFT                                     0x10
+#define RDPCSTX4_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV__SHIFT                                    0x14
+#define RDPCSTX4_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV__SHIFT                           0x18
+#define RDPCSTX4_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER_MASK                                       0x0000FFF0L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV_MASK                                       0x00070000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV_MASK                                      0x00700000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV_MASK                             0x03000000L
+//RDPCSTX4_RDPCSTX_PHY_CNTL12
+#define RDPCSTX4_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN__SHIFT                                    0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN__SHIFT                                   0x2
+#define RDPCSTX4_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV__SHIFT                                     0x4
+#define RDPCSTX4_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE__SHIFT                                          0x7
+#define RDPCSTX4_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN__SHIFT                                         0x8
+#define RDPCSTX4_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN_MASK                                      0x00000001L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN_MASK                                     0x00000004L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV_MASK                                       0x00000070L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE_MASK                                            0x00000080L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN_MASK                                           0x00000100L
+//RDPCSTX4_RDPCSTX_PHY_CNTL13
+#define RDPCSTX4_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER__SHIFT                                 0x14
+#define RDPCSTX4_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN__SHIFT                                     0x1c
+#define RDPCSTX4_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN__SHIFT                                       0x1d
+#define RDPCSTX4_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE__SHIFT                               0x1e
+#define RDPCSTX4_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER_MASK                                   0x0FF00000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN_MASK                                       0x10000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN_MASK                                         0x20000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE_MASK                                 0x40000000L
+//RDPCSTX4_RDPCSTX_PHY_CNTL14
+#define RDPCSTX4_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE__SHIFT                                      0x0
+#define RDPCSTX4_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN__SHIFT                                       0x18
+#define RDPCSTX4_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN__SHIFT                                        0x1c
+#define RDPCSTX4_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE_MASK                                        0x00000001L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN_MASK                                         0x01000000L
+#define RDPCSTX4_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN_MASK                                          0x10000000L
+//RDPCSTX4_RDPCSTX_PHY_FUSE0
+#define RDPCSTX4_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX4_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX4_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX4_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I__SHIFT                                             0x12
+#define RDPCSTX4_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO__SHIFT                                        0x14
+#define RDPCSTX4_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX4_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX4_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX4_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I_MASK                                               0x000C0000L
+#define RDPCSTX4_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO_MASK                                          0x00300000L
+//RDPCSTX4_RDPCSTX_PHY_FUSE1
+#define RDPCSTX4_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX4_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX4_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX4_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT__SHIFT                                          0x12
+#define RDPCSTX4_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP__SHIFT                                         0x19
+#define RDPCSTX4_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX4_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX4_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX4_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT_MASK                                            0x01FC0000L
+#define RDPCSTX4_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP_MASK                                           0xFE000000L
+//RDPCSTX4_RDPCSTX_PHY_FUSE2
+#define RDPCSTX4_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX4_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX4_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX4_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX4_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX4_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST_MASK                                             0x0003F000L
+//RDPCSTX4_RDPCSTX_PHY_FUSE3
+#define RDPCSTX4_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX4_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX4_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX4_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE__SHIFT                                             0x12
+#define RDPCSTX4_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE__SHIFT                                                0x18
+#define RDPCSTX4_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX4_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX4_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX4_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE_MASK                                               0x00FC0000L
+#define RDPCSTX4_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE_MASK                                                  0x03000000L
+//RDPCSTX4_RDPCSTX_PHY_RX_LD_VAL
+#define RDPCSTX4_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL__SHIFT                                        0x0
+#define RDPCSTX4_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL__SHIFT                                        0x8
+#define RDPCSTX4_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL_MASK                                          0x0000007FL
+#define RDPCSTX4_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL_MASK                                          0x001FFF00L
+//RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED__SHIFT                         0x0
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED__SHIFT                       0x1
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED__SHIFT                       0x2
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED__SHIFT                       0x3
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED__SHIFT                           0x4
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED__SHIFT                           0x5
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED__SHIFT                         0x8
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED__SHIFT                       0x9
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED__SHIFT                       0xa
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED__SHIFT                       0xb
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED__SHIFT                           0xc
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED__SHIFT                           0xd
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED__SHIFT                         0x10
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED__SHIFT                       0x11
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED__SHIFT                       0x12
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED__SHIFT                       0x13
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED__SHIFT                           0x14
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED__SHIFT                           0x15
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED__SHIFT                         0x18
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED__SHIFT                       0x19
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED__SHIFT                       0x1a
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED__SHIFT                       0x1b
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED__SHIFT                           0x1c
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED__SHIFT                           0x1d
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED_MASK                           0x00000001L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED_MASK                         0x00000002L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED_MASK                         0x00000004L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED_MASK                         0x00000008L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED_MASK                             0x00000010L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED_MASK                             0x00000020L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED_MASK                           0x00000100L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED_MASK                         0x00000200L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED_MASK                         0x00000400L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED_MASK                         0x00000800L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED_MASK                             0x00001000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED_MASK                             0x00002000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED_MASK                           0x00010000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED_MASK                         0x00020000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED_MASK                         0x00040000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED_MASK                         0x00080000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED_MASK                             0x00100000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED_MASK                             0x00200000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED_MASK                           0x01000000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED_MASK                         0x02000000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED_MASK                         0x04000000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED_MASK                         0x08000000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED_MASK                             0x10000000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED_MASK                             0x20000000L
+//RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED__SHIFT                        0x0
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED__SHIFT                       0x2
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED__SHIFT                        0x4
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED__SHIFT                       0x6
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED__SHIFT                        0x8
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED__SHIFT                       0xa
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED__SHIFT                        0xc
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED__SHIFT                       0xe
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED__SHIFT                            0x10
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED__SHIFT                        0x11
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED__SHIFT                    0x12
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED__SHIFT                        0x13
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED__SHIFT                       0x14
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED_MASK                          0x00000003L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED_MASK                         0x00000004L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED_MASK                          0x00000030L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED_MASK                         0x00000040L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED_MASK                          0x00000300L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED_MASK                         0x00000400L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED_MASK                          0x00003000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED_MASK                         0x00004000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED_MASK                              0x00010000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED_MASK                          0x00020000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED_MASK                      0x00040000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED_MASK                          0x00080000L
+#define RDPCSTX4_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED_MASK                         0x00100000L
+//RDPCSTX4_RDPCSTX_DPALT_CONTROL_REG
+#define RDPCSTX4_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS__SHIFT                                  0x0
+#define RDPCSTX4_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED__SHIFT                                0x4
+#define RDPCSTX4_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE__SHIFT                                  0x8
+#define RDPCSTX4_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS_MASK                                    0x00000001L
+#define RDPCSTX4_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED_MASK                                  0x00000010L
+#define RDPCSTX4_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE_MASK                                    0x0000FF00L
+
+
+// addressBlock: dpcssys_dpcssys_cr4_dispdec
+//DPCSSYS_CR4_DPCSSYS_CR_ADDR
+#define DPCSSYS_CR4_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                  0x0
+#define DPCSSYS_CR4_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                    0x0000FFFFL
+//DPCSSYS_CR4_DPCSSYS_CR_DATA
+#define DPCSSYS_CR4_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                  0x0
+#define DPCSSYS_CR4_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                    0x0000FFFFL
+
+
+// addressBlock: dpcssys_dpcs0_dpcstx5_dispdec
+//DPCSTX5_DPCSTX_TX_CLOCK_CNTL
+#define DPCSTX5_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS__SHIFT                                             0x0
+#define DPCSTX5_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN__SHIFT                                                   0x1
+#define DPCSTX5_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON__SHIFT                                             0x2
+#define DPCSTX5_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0x3
+#define DPCSTX5_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_GATE_DIS_MASK                                               0x00000001L
+#define DPCSTX5_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_EN_MASK                                                     0x00000002L
+#define DPCSTX5_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_CLOCK_ON_MASK                                               0x00000004L
+#define DPCSTX5_DPCSTX_TX_CLOCK_CNTL__DPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000008L
+//DPCSTX5_DPCSTX_TX_CNTL
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ__SHIFT                                                 0xc
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING__SHIFT                                             0xd
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP__SHIFT                                                      0xe
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT__SHIFT                                              0xf
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_REQ_MASK                                                   0x00001000L
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_PLL_UPDATE_PENDING_MASK                                               0x00002000L
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP_MASK                                                        0x00004000L
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT_MASK                                                0x00008000L
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define DPCSTX5_DPCSTX_TX_CNTL__DPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//DPCSTX5_DPCSTX_CBUS_CNTL
+#define DPCSTX5_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY__SHIFT                                               0x0
+#define DPCSTX5_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET__SHIFT                                                 0x1f
+#define DPCSTX5_DPCSTX_CBUS_CNTL__DPCS_CBUS_WR_CMD_DELAY_MASK                                                 0x000000FFL
+#define DPCSTX5_DPCSTX_CBUS_CNTL__DPCS_CBUS_SOFT_RESET_MASK                                                   0x80000000L
+//DPCSTX5_DPCSTX_INTERRUPT_CNTL
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW__SHIFT                                          0x0
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR__SHIFT                                              0x1
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK__SHIFT                                        0x4
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR__SHIFT                                             0x8
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR__SHIFT                                             0x9
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR__SHIFT                                             0xa
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR__SHIFT                                             0xb
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR__SHIFT                                               0xc
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK__SHIFT                                         0x10
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK__SHIFT                                             0x14
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_OVERFLOW_MASK                                            0x00000001L
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_REG_ERROR_CLR_MASK                                                0x00000002L
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_REG_FIFO_ERROR_MASK_MASK                                          0x00000010L
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX0_FIFO_ERROR_MASK                                               0x00000100L
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX1_FIFO_ERROR_MASK                                               0x00000200L
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX2_FIFO_ERROR_MASK                                               0x00000400L
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX3_FIFO_ERROR_MASK                                               0x00000800L
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX_ERROR_CLR_MASK                                                 0x00001000L
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_TX_FIFO_ERROR_MASK_MASK                                           0x00010000L
+#define DPCSTX5_DPCSTX_INTERRUPT_CNTL__DPCS_INTERRUPT_MASK_MASK                                               0x00100000L
+//DPCSTX5_DPCSTX_PLL_UPDATE_ADDR
+#define DPCSTX5_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR__SHIFT                                           0x0
+#define DPCSTX5_DPCSTX_PLL_UPDATE_ADDR__DPCS_PLL_UPDATE_ADDR_MASK                                             0x0003FFFFL
+//DPCSTX5_DPCSTX_PLL_UPDATE_DATA
+#define DPCSTX5_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA__SHIFT                                           0x0
+#define DPCSTX5_DPCSTX_PLL_UPDATE_DATA__DPCS_PLL_UPDATE_DATA_MASK                                             0xFFFFFFFFL
+//DPCSTX5_DPCSTX_DEBUG_CONFIG
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN__SHIFT                                                       0x0
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL__SHIFT                                               0x1
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL__SHIFT                                            0x4
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL__SHIFT                                       0x8
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS__SHIFT                                                 0xe
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN__SHIFT                                          0x10
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_DBG_EN_MASK                                                         0x00000001L
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CFGCLK_SEL_MASK                                                 0x0000000EL
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_SEL_MASK                                              0x00000070L
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_DBG_TX_SYMCLK_DIV2_SEL_MASK                                         0x00000700L
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_DBG_CBUS_DIS_MASK                                                   0x00004000L
+#define DPCSTX5_DPCSTX_DEBUG_CONFIG__DPCS_TEST_DEBUG_WRITE_EN_MASK                                            0x00010000L
+
+
+// addressBlock: dpcssys_dpcs0_rdpcstx5_dispdec
+//RDPCSTX5_RDPCSTX_CNTL
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET__SHIFT                                                   0x0
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET__SHIFT                                                   0x4
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN__SHIFT                                                  0xc
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN__SHIFT                                                  0xd
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN__SHIFT                                                  0xe
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN__SHIFT                                                  0xf
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN__SHIFT                                                        0x10
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_START__SHIFT                                                     0x11
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY__SHIFT                                            0x14
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN__SHIFT                                              0x18
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN__SHIFT                                       0x19
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS__SHIFT                                                0x1a
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET__SHIFT                                                     0x1f
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_CBUS_SOFT_RESET_MASK                                                     0x00000001L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_SRAM_SOFT_RESET_MASK                                                     0x00000010L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE0_EN_MASK                                                    0x00001000L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE1_EN_MASK                                                    0x00002000L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE2_EN_MASK                                                    0x00004000L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_LANE3_EN_MASK                                                    0x00008000L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_EN_MASK                                                          0x00010000L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_START_MASK                                                       0x00020000L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_FIFO_RD_START_DELAY_MASK                                              0x00F00000L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_CR_REGISTER_BLOCK_EN_MASK                                                0x01000000L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_NON_DPALT_REGISTER_BLOCK_EN_MASK                                         0x02000000L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_DPALT_BLOCK_STATUS_MASK                                                  0x04000000L
+#define RDPCSTX5_RDPCSTX_CNTL__RDPCS_TX_SOFT_RESET_MASK                                                       0x80000000L
+//RDPCSTX5_RDPCSTX_CLOCK_CNTL
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN__SHIFT                                               0x0
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN__SHIFT                                          0x4
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN__SHIFT                                          0x5
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN__SHIFT                                          0x6
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN__SHIFT                                          0x7
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS__SHIFT                                        0x8
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN__SHIFT                                              0x9
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT                                        0xa
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS__SHIFT                                            0xc
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN__SHIFT                                                  0xd
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON__SHIFT                                            0xe
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS__SHIFT                                              0x10
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_EXT_REFCLK_EN_MASK                                                 0x00000001L
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX0_EN_MASK                                            0x00000010L
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX1_EN_MASK                                            0x00000020L
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX2_EN_MASK                                            0x00000040L
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_TX3_EN_MASK                                            0x00000080L
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS_MASK                                          0x00000100L
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN_MASK                                                0x00000200L
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON_MASK                                          0x00000400L
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_GATE_DIS_MASK                                              0x00001000L
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_EN_MASK                                                    0x00002000L
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_CLOCK_ON_MASK                                              0x00004000L
+#define RDPCSTX5_RDPCSTX_CLOCK_CNTL__RDPCS_SRAMCLK_BYPASS_MASK                                                0x00010000L
+//RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW__SHIFT                                    0x0
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE__SHIFT                                 0x1
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE__SHIFT                                   0x2
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR__SHIFT                                       0x4
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR__SHIFT                                       0x5
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR__SHIFT                                       0x6
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR__SHIFT                                       0x7
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR__SHIFT                                        0x8
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR__SHIFT                             0x9
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR__SHIFT                               0xa
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR__SHIFT                                         0xc
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK__SHIFT                                  0x10
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK__SHIFT                            0x11
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK__SHIFT                              0x12
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK__SHIFT                                   0x14
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_OVERFLOW_MASK                                      0x00000001L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK                                   0x00000002L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK                                     0x00000004L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX0_FIFO_ERROR_MASK                                         0x00000010L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX1_FIFO_ERROR_MASK                                         0x00000020L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX2_FIFO_ERROR_MASK                                         0x00000040L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX3_FIFO_ERROR_MASK                                         0x00000080L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_ERROR_CLR_MASK                                          0x00000100L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_CLR_MASK                               0x00000200L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_CLR_MASK                                 0x00000400L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_ERROR_CLR_MASK                                           0x00001000L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_REG_FIFO_ERROR_MASK_MASK                                    0x00010000L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_DISABLE_TOGGLE_MASK_MASK                              0x00020000L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_DPALT_4LANE_TOGGLE_MASK_MASK                                0x00040000L
+#define RDPCSTX5_RDPCSTX_INTERRUPT_CONTROL__RDPCS_TX_FIFO_ERROR_MASK_MASK                                     0x00100000L
+//RDPCSTX5_RDPCSTX_PLL_UPDATE_DATA
+#define RDPCSTX5_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA__SHIFT                                        0x0
+#define RDPCSTX5_RDPCSTX_PLL_UPDATE_DATA__RDPCS_PLL_UPDATE_DATA_MASK                                          0x00000001L
+//RDPCSTX5_RDPCS_TX_CR_ADDR
+#define RDPCSTX5_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                    0x0
+#define RDPCSTX5_RDPCS_TX_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                      0x0000FFFFL
+//RDPCSTX5_RDPCS_TX_CR_DATA
+#define RDPCSTX5_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                    0x0
+#define RDPCSTX5_RDPCS_TX_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                      0x0000FFFFL
+//RDPCSTX5_RDPCS_TX_SRAM_CNTL
+#define RDPCSTX5_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS__SHIFT                                                 0x14
+#define RDPCSTX5_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE__SHIFT                                               0x18
+#define RDPCSTX5_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE__SHIFT                                           0x1c
+#define RDPCSTX5_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_DIS_MASK                                                   0x00100000L
+#define RDPCSTX5_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_FORCE_MASK                                                 0x03000000L
+#define RDPCSTX5_RDPCS_TX_SRAM_CNTL__RDPCS_MEM_PWR_PWR_STATE_MASK                                             0x30000000L
+//RDPCSTX5_RDPCSTX_MEM_POWER_CTRL
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES__SHIFT                                           0x0
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES__SHIFT                                    0xc
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1__SHIFT                                  0x1a
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2__SHIFT                                  0x1b
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1__SHIFT                                   0x1c
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2__SHIFT                                   0x1d
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM__SHIFT                                         0x1e
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_RM_FUSES_MASK                                             0x00000FFFL
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_FUSE_CUSTOM_RM_FUSES_MASK                                      0x03FFF000L
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC1_MASK                                    0x04000000L
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_PDP_BC2_MASK                                    0x08000000L
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC1_MASK                                     0x10000000L
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_MEM_POWER_CTRL_HD_BC2_MASK                                     0x20000000L
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL__RDPCS_LIVMIN_DIS_SRAM_MASK                                           0x40000000L
+//RDPCSTX5_RDPCSTX_MEM_POWER_CTRL2
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF__SHIFT                                    0x0
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO__SHIFT                                    0x2
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_POFF_MASK                                      0x00000003L
+#define RDPCSTX5_RDPCSTX_MEM_POWER_CTRL2__RDPCS_MEM_POWER_CTRL_FISO_MASK                                      0x00000004L
+//RDPCSTX5_RDPCSTX_SCRATCH
+#define RDPCSTX5_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH__SHIFT                                                      0x0
+#define RDPCSTX5_RDPCSTX_SCRATCH__RDPCSTX_SCRATCH_MASK                                                        0xFFFFFFFFL
+//RDPCSTX5_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG__SHIFT                      0x0
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS__SHIFT              0x4
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE__SHIFT                      0x8
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_DIS_BLOCK_REG_MASK                        0x00000001L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_FORCE_SYMCLK_DIV2_DIS_MASK                0x00000010L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG__RDPCS_DMCU_DPALT_CONTROL_SPARE_MASK                        0x0000FF00L
+//RDPCSTX5_RDPCSTX_DEBUG_CONFIG
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN__SHIFT                                                    0x0
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT__SHIFT                                        0x4
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP__SHIFT                                        0x7
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK__SHIFT                                          0x8
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE__SHIFT                                       0xf
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX__SHIFT                                          0x10
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT__SHIFT                                              0x18
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_EN_MASK                                                      0x00000001L
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_8BIT_MASK                                          0x00000070L
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_ASYNC_SWAP_MASK                                          0x00000080L
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_SEL_TEST_CLK_MASK                                            0x00001F00L
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_EXPIRE_MASK                                         0x00008000L
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MAX_MASK                                            0x00FF0000L
+#define RDPCSTX5_RDPCSTX_DEBUG_CONFIG__RDPCS_DBG_CR_COUNT_MASK                                                0xFF000000L
+//RDPCSTX5_RDPCSTX_PHY_CNTL0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET__SHIFT                                                    0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET__SHIFT                                            0x1
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N__SHIFT                                          0x2
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN__SHIFT                                           0x3
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT__SHIFT                                                  0x4
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE__SHIFT                                          0x8
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE__SHIFT                                                0x9
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL__SHIFT                                            0xe
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ__SHIFT                                                0x11
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK__SHIFT                                                0x12
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL__SHIFT                                              0x14
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL__SHIFT                                               0x15
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN__SHIFT                                            0x18
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT__SHIFT                                        0x19
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE__SHIFT                                               0x1c
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE__SHIFT                                             0x1d
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS__SHIFT                                                  0x1f
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RESET_MASK                                                      0x00000001L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_PHY_RESET_MASK                                              0x00000002L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TCA_APB_RESET_N_MASK                                            0x00000004L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TEST_POWERDOWN_MASK                                             0x00000008L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_DTB_OUT_MASK                                                    0x00000030L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_HDMIMODE_ENABLE_MASK                                            0x00000100L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_RANGE_MASK                                                  0x00003E00L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_TX_VBOOST_LVL_MASK                                              0x0001C000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_REQ_MASK                                                  0x00020000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_RTUNE_ACK_MASK                                                  0x00040000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_PARA_SEL_MASK                                                0x00100000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_CR_MUX_SEL_MASK                                                 0x00200000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_EN_MASK                                              0x01000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_PHY_REF_CLKDET_RESULT_MASK                                          0x02000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_INIT_DONE_MASK                                                 0x10000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_EXT_LD_DONE_MASK                                               0x20000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL0__RDPCS_SRAM_BYPASS_MASK                                                    0x80000000L
+//RDPCSTX5_RDPCSTX_PHY_CNTL1
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN__SHIFT                                               0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN__SHIFT                                               0x1
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE__SHIFT                                           0x2
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN__SHIFT                                               0x3
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE__SHIFT                                           0x4
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET__SHIFT                                              0x5
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN__SHIFT                                               0x6
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE__SHIFT                                           0x7
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PG_MODE_EN_MASK                                                 0x00000001L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_EN_MASK                                                 0x00000002L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PCS_PWR_STABLE_MASK                                             0x00000004L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_EN_MASK                                                 0x00000008L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_PMA_PWR_STABLE_MASK                                             0x00000010L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_DP_PG_RESET_MASK                                                0x00000020L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_EN_MASK                                                 0x00000040L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL1__RDPCS_PHY_ANA_PWR_STABLE_MASK                                             0x00000080L
+//RDPCSTX5_RDPCSTX_PHY_CNTL2
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR__SHIFT                                                  0x3
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN__SHIFT                                 0x4
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN__SHIFT                                 0x5
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN__SHIFT                                 0x6
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN__SHIFT                                 0x7
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN__SHIFT                                 0x8
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN__SHIFT                                 0x9
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN__SHIFT                                 0xa
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN__SHIFT                                 0xb
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP4_POR_MASK                                                    0x00000008L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_RX2TX_PAR_LB_EN_MASK                                   0x00000010L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_RX2TX_PAR_LB_EN_MASK                                   0x00000020L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_RX2TX_PAR_LB_EN_MASK                                   0x00000040L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_RX2TX_PAR_LB_EN_MASK                                   0x00000080L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE0_TX2RX_SER_LB_EN_MASK                                   0x00000100L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE1_TX2RX_SER_LB_EN_MASK                                   0x00000200L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE2_TX2RX_SER_LB_EN_MASK                                   0x00000400L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL2__RDPCS_PHY_DP_LANE3_TX2RX_SER_LB_EN_MASK                                   0x00000800L
+//RDPCSTX5_RDPCSTX_PHY_CNTL3
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET__SHIFT                                             0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE__SHIFT                                           0x1
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY__SHIFT                                           0x2
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN__SHIFT                                           0x3
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ__SHIFT                                               0x4
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK__SHIFT                                               0x5
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET__SHIFT                                             0x8
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE__SHIFT                                           0x9
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY__SHIFT                                           0xa
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN__SHIFT                                           0xb
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ__SHIFT                                               0xc
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK__SHIFT                                               0xd
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET__SHIFT                                             0x10
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE__SHIFT                                           0x11
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY__SHIFT                                           0x12
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN__SHIFT                                           0x13
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ__SHIFT                                               0x14
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK__SHIFT                                               0x15
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET__SHIFT                                             0x18
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE__SHIFT                                           0x19
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY__SHIFT                                           0x1a
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN__SHIFT                                           0x1b
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ__SHIFT                                               0x1c
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK__SHIFT                                               0x1d
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_MASK                                               0x00000001L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_MASK                                             0x00000002L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_MASK                                             0x00000004L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_MASK                                             0x00000008L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_MASK                                                 0x00000010L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_MASK                                                 0x00000020L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_MASK                                               0x00000100L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_MASK                                             0x00000200L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_MASK                                             0x00000400L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_MASK                                             0x00000800L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_MASK                                                 0x00001000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_MASK                                                 0x00002000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_MASK                                               0x00010000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_MASK                                             0x00020000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_MASK                                             0x00040000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_MASK                                             0x00080000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_MASK                                                 0x00100000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_MASK                                                 0x00200000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_MASK                                               0x01000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_MASK                                             0x02000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_MASK                                             0x04000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_MASK                                             0x08000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_MASK                                                 0x10000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_MASK                                                 0x20000000L
+//RDPCSTX5_RDPCSTX_PHY_CNTL4
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL__SHIFT                                         0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT__SHIFT                                            0x4
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC__SHIFT                                    0x6
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN__SHIFT                                        0x7
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL__SHIFT                                         0x8
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT__SHIFT                                            0xc
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC__SHIFT                                    0xe
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN__SHIFT                                        0xf
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL__SHIFT                                         0x10
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT__SHIFT                                            0x14
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC__SHIFT                                    0x16
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN__SHIFT                                        0x17
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL__SHIFT                                         0x18
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT__SHIFT                                            0x1c
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC__SHIFT                                    0x1e
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN__SHIFT                                        0x1f
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_TERM_CTRL_MASK                                           0x00000007L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_INVERT_MASK                                              0x00000010L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_BYPASS_EQ_CALC_MASK                                      0x00000040L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX0_HP_PROT_EN_MASK                                          0x00000080L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_TERM_CTRL_MASK                                           0x00000700L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_INVERT_MASK                                              0x00001000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_BYPASS_EQ_CALC_MASK                                      0x00004000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX1_HP_PROT_EN_MASK                                          0x00008000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_TERM_CTRL_MASK                                           0x00070000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_INVERT_MASK                                              0x00100000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_BYPASS_EQ_CALC_MASK                                      0x00400000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX2_HP_PROT_EN_MASK                                          0x00800000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_TERM_CTRL_MASK                                           0x07000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_INVERT_MASK                                              0x10000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_BYPASS_EQ_CALC_MASK                                      0x40000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL4__RDPCS_PHY_DP_TX3_HP_PROT_EN_MASK                                          0x80000000L
+//RDPCSTX5_RDPCSTX_PHY_CNTL5
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD__SHIFT                                               0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE__SHIFT                                              0x1
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH__SHIFT                                             0x4
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ__SHIFT                                         0x6
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT__SHIFT                                      0x7
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD__SHIFT                                               0x8
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE__SHIFT                                              0x9
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH__SHIFT                                             0xc
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ__SHIFT                                         0xe
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT__SHIFT                                      0xf
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD__SHIFT                                               0x10
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE__SHIFT                                              0x11
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH__SHIFT                                             0x14
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ__SHIFT                                         0x16
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT__SHIFT                                      0x17
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD__SHIFT                                               0x18
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE__SHIFT                                              0x19
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH__SHIFT                                             0x1c
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ__SHIFT                                         0x1e
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT__SHIFT                                      0x1f
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_LPD_MASK                                                 0x00000001L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_RATE_MASK                                                0x0000000EL
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_WIDTH_MASK                                               0x00000030L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_REQ_MASK                                           0x00000040L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX0_DETRX_RESULT_MASK                                        0x00000080L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_LPD_MASK                                                 0x00000100L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_RATE_MASK                                                0x00000E00L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_WIDTH_MASK                                               0x00003000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_REQ_MASK                                           0x00004000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX1_DETRX_RESULT_MASK                                        0x00008000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_LPD_MASK                                                 0x00010000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_RATE_MASK                                                0x000E0000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_WIDTH_MASK                                               0x00300000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_REQ_MASK                                           0x00400000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX2_DETRX_RESULT_MASK                                        0x00800000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_LPD_MASK                                                 0x01000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_RATE_MASK                                                0x0E000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_WIDTH_MASK                                               0x30000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_REQ_MASK                                           0x40000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL5__RDPCS_PHY_DP_TX3_DETRX_RESULT_MASK                                        0x80000000L
+//RDPCSTX5_RDPCSTX_PHY_CNTL6
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE__SHIFT                                            0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN__SHIFT                                           0x2
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE__SHIFT                                            0x4
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN__SHIFT                                           0x6
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE__SHIFT                                            0x8
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN__SHIFT                                           0xa
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE__SHIFT                                            0xc
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN__SHIFT                                           0xe
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4__SHIFT                                                0x10
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE__SHIFT                                            0x11
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK__SHIFT                                        0x12
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN__SHIFT                                            0x13
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ__SHIFT                                           0x14
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_MASK                                              0x00000003L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_MASK                                             0x00000004L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_MASK                                              0x00000030L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_MASK                                             0x00000040L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_MASK                                              0x00000300L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_MASK                                             0x00000400L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_MASK                                              0x00003000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_MASK                                             0x00004000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_MASK                                                  0x00010000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_MASK                                              0x00020000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_MASK                                          0x00040000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_MASK                                              0x00080000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_MASK                                             0x00100000L
+//RDPCSTX5_RDPCSTX_PHY_CNTL7
+#define RDPCSTX5_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN__SHIFT                                       0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT__SHIFT                                      0x10
+#define RDPCSTX5_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_DEN_MASK                                         0x0000FFFFL
+#define RDPCSTX5_RDPCSTX_PHY_CNTL7__RDPCS_PHY_DP_MPLLB_FRACN_QUOT_MASK                                        0xFFFF0000L
+//RDPCSTX5_RDPCSTX_PHY_CNTL8
+#define RDPCSTX5_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK__SHIFT                                        0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL8__RDPCS_PHY_DP_MPLLB_SSC_PEAK_MASK                                          0x000FFFFFL
+//RDPCSTX5_RDPCSTX_PHY_CNTL9
+#define RDPCSTX5_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE__SHIFT                                    0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD__SHIFT                                   0x18
+#define RDPCSTX5_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_STEPSIZE_MASK                                      0x001FFFFFL
+#define RDPCSTX5_RDPCSTX_PHY_CNTL9__RDPCS_PHY_DP_MPLLB_SSC_UP_SPREAD_MASK                                     0x01000000L
+//RDPCSTX5_RDPCSTX_PHY_CNTL10
+#define RDPCSTX5_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM__SHIFT                                      0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL10__RDPCS_PHY_DP_MPLLB_FRACN_REM_MASK                                        0x0000FFFFL
+//RDPCSTX5_RDPCSTX_PHY_CNTL11
+#define RDPCSTX5_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER__SHIFT                                     0x4
+#define RDPCSTX5_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV__SHIFT                                     0x10
+#define RDPCSTX5_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV__SHIFT                                    0x14
+#define RDPCSTX5_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV__SHIFT                           0x18
+#define RDPCSTX5_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_MPLLB_MULTIPLIER_MASK                                       0x0000FFF0L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_DIV_MASK                                       0x00070000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL11__RDPCS_PHY_DP_REF_CLK_MPLLB_DIV_MASK                                      0x00700000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL11__RDPCS_PHY_HDMI_MPLLB_HDMI_PIXEL_CLK_DIV_MASK                             0x03000000L
+//RDPCSTX5_RDPCSTX_PHY_CNTL12
+#define RDPCSTX5_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN__SHIFT                                    0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN__SHIFT                                   0x2
+#define RDPCSTX5_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV__SHIFT                                     0x4
+#define RDPCSTX5_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE__SHIFT                                          0x7
+#define RDPCSTX5_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN__SHIFT                                         0x8
+#define RDPCSTX5_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_DIV5_CLK_EN_MASK                                      0x00000001L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_WORD_DIV2_EN_MASK                                     0x00000004L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_TX_CLK_DIV_MASK                                       0x00000070L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_STATE_MASK                                            0x00000080L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL12__RDPCS_PHY_DP_MPLLB_SSC_EN_MASK                                           0x00000100L
+//RDPCSTX5_RDPCSTX_PHY_CNTL13
+#define RDPCSTX5_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER__SHIFT                                 0x14
+#define RDPCSTX5_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN__SHIFT                                     0x1c
+#define RDPCSTX5_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN__SHIFT                                       0x1d
+#define RDPCSTX5_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE__SHIFT                               0x1e
+#define RDPCSTX5_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_MULTIPLIER_MASK                                   0x0FF00000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_DIV_CLK_EN_MASK                                       0x10000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_FORCE_EN_MASK                                         0x20000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL13__RDPCS_PHY_DP_MPLLB_INIT_CAL_DISABLE_MASK                                 0x40000000L
+//RDPCSTX5_RDPCSTX_PHY_CNTL14
+#define RDPCSTX5_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE__SHIFT                                      0x0
+#define RDPCSTX5_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN__SHIFT                                       0x18
+#define RDPCSTX5_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN__SHIFT                                        0x1c
+#define RDPCSTX5_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_CAL_FORCE_MASK                                        0x00000001L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_FRACN_EN_MASK                                         0x01000000L
+#define RDPCSTX5_RDPCSTX_PHY_CNTL14__RDPCS_PHY_DP_MPLLB_PMIX_EN_MASK                                          0x10000000L
+//RDPCSTX5_RDPCSTX_PHY_FUSE0
+#define RDPCSTX5_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX5_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX5_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX5_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I__SHIFT                                             0x12
+#define RDPCSTX5_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO__SHIFT                                        0x14
+#define RDPCSTX5_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX5_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX5_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_TX0_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX5_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_V2I_MASK                                               0x000C0000L
+#define RDPCSTX5_RDPCSTX_PHY_FUSE0__RDPCS_PHY_DP_MPLLB_FREQ_VCO_MASK                                          0x00300000L
+//RDPCSTX5_RDPCSTX_PHY_FUSE1
+#define RDPCSTX5_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX5_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX5_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX5_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT__SHIFT                                          0x12
+#define RDPCSTX5_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP__SHIFT                                         0x19
+#define RDPCSTX5_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX5_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX5_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_TX1_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX5_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_INT_MASK                                            0x01FC0000L
+#define RDPCSTX5_RDPCSTX_PHY_FUSE1__RDPCS_PHY_DP_MPLLB_CP_PROP_MASK                                           0xFE000000L
+//RDPCSTX5_RDPCSTX_PHY_FUSE2
+#define RDPCSTX5_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX5_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX5_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX5_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX5_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX5_RDPCSTX_PHY_FUSE2__RDPCS_PHY_DP_TX2_EQ_POST_MASK                                             0x0003F000L
+//RDPCSTX5_RDPCSTX_PHY_FUSE3
+#define RDPCSTX5_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN__SHIFT                                           0x0
+#define RDPCSTX5_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE__SHIFT                                            0x6
+#define RDPCSTX5_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST__SHIFT                                           0xc
+#define RDPCSTX5_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE__SHIFT                                             0x12
+#define RDPCSTX5_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE__SHIFT                                                0x18
+#define RDPCSTX5_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_MAIN_MASK                                             0x0000003FL
+#define RDPCSTX5_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_PRE_MASK                                              0x00000FC0L
+#define RDPCSTX5_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DP_TX3_EQ_POST_MASK                                             0x0003F000L
+#define RDPCSTX5_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_FINETUNE_MASK                                               0x00FC0000L
+#define RDPCSTX5_RDPCSTX_PHY_FUSE3__RDPCS_PHY_DCO_RANGE_MASK                                                  0x03000000L
+//RDPCSTX5_RDPCSTX_PHY_RX_LD_VAL
+#define RDPCSTX5_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL__SHIFT                                        0x0
+#define RDPCSTX5_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL__SHIFT                                        0x8
+#define RDPCSTX5_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_REF_LD_VAL_MASK                                          0x0000007FL
+#define RDPCSTX5_RDPCSTX_PHY_RX_LD_VAL__RDPCS_PHY_RX_VCO_LD_VAL_MASK                                          0x001FFF00L
+//RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED__SHIFT                         0x0
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED__SHIFT                       0x1
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED__SHIFT                       0x2
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED__SHIFT                       0x3
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED__SHIFT                           0x4
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED__SHIFT                           0x5
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED__SHIFT                         0x8
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED__SHIFT                       0x9
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED__SHIFT                       0xa
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED__SHIFT                       0xb
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED__SHIFT                           0xc
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED__SHIFT                           0xd
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED__SHIFT                         0x10
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED__SHIFT                       0x11
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED__SHIFT                       0x12
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED__SHIFT                       0x13
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED__SHIFT                           0x14
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED__SHIFT                           0x15
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED__SHIFT                         0x18
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED__SHIFT                       0x19
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED__SHIFT                       0x1a
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED__SHIFT                       0x1b
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED__SHIFT                           0x1c
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED__SHIFT                           0x1d
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_RESET_RESERVED_MASK                           0x00000001L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DISABLE_RESERVED_MASK                         0x00000002L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_CLK_RDY_RESERVED_MASK                         0x00000004L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_DATA_EN_RESERVED_MASK                         0x00000008L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_REQ_RESERVED_MASK                             0x00000010L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX0_ACK_RESERVED_MASK                             0x00000020L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_RESET_RESERVED_MASK                           0x00000100L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DISABLE_RESERVED_MASK                         0x00000200L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_CLK_RDY_RESERVED_MASK                         0x00000400L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_DATA_EN_RESERVED_MASK                         0x00000800L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_REQ_RESERVED_MASK                             0x00001000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX1_ACK_RESERVED_MASK                             0x00002000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_RESET_RESERVED_MASK                           0x00010000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DISABLE_RESERVED_MASK                         0x00020000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_CLK_RDY_RESERVED_MASK                         0x00040000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_DATA_EN_RESERVED_MASK                         0x00080000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_REQ_RESERVED_MASK                             0x00100000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX2_ACK_RESERVED_MASK                             0x00200000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_RESET_RESERVED_MASK                           0x01000000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DISABLE_RESERVED_MASK                         0x02000000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_CLK_RDY_RESERVED_MASK                         0x04000000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_DATA_EN_RESERVED_MASK                         0x08000000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_REQ_RESERVED_MASK                             0x10000000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL3__RDPCS_PHY_DP_TX3_ACK_RESERVED_MASK                             0x20000000L
+//RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED__SHIFT                        0x0
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED__SHIFT                       0x2
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED__SHIFT                        0x4
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED__SHIFT                       0x6
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED__SHIFT                        0x8
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED__SHIFT                       0xa
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED__SHIFT                        0xc
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED__SHIFT                       0xe
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED__SHIFT                            0x10
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED__SHIFT                        0x11
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED__SHIFT                    0x12
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED__SHIFT                        0x13
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED__SHIFT                       0x14
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_PSTATE_RESERVED_MASK                          0x00000003L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX0_MPLL_EN_RESERVED_MASK                         0x00000004L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_PSTATE_RESERVED_MASK                          0x00000030L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX1_MPLL_EN_RESERVED_MASK                         0x00000040L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_PSTATE_RESERVED_MASK                          0x00000300L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX2_MPLL_EN_RESERVED_MASK                         0x00000400L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_PSTATE_RESERVED_MASK                          0x00003000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_TX3_MPLL_EN_RESERVED_MASK                         0x00004000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DP4_RESERVED_MASK                              0x00010000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_RESERVED_MASK                          0x00020000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DPALT_DISABLE_ACK_RESERVED_MASK                      0x00040000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_EN_RESERVED_MASK                          0x00080000L
+#define RDPCSTX5_RDPCSTX_DMCU_DPALT_PHY_CNTL6__RDPCS_PHY_DP_REF_CLK_REQ_RESERVED_MASK                         0x00100000L
+//RDPCSTX5_RDPCSTX_DPALT_CONTROL_REG
+#define RDPCSTX5_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS__SHIFT                                  0x0
+#define RDPCSTX5_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED__SHIFT                                0x4
+#define RDPCSTX5_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE__SHIFT                                  0x8
+#define RDPCSTX5_RDPCSTX_DPALT_CONTROL_REG__RDPCS_ALLOW_DRIVER_ACCESS_MASK                                    0x00000001L
+#define RDPCSTX5_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DRIVER_ACCESS_BLOCKED_MASK                                  0x00000010L
+#define RDPCSTX5_RDPCSTX_DPALT_CONTROL_REG__RDPCS_DPALT_CONTROL_SPARE_MASK                                    0x0000FF00L
+
+
+// addressBlock: dpcssys_dpcssys_cr5_dispdec
+//DPCSSYS_CR5_DPCSSYS_CR_ADDR
+#define DPCSSYS_CR5_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR__SHIFT                                                  0x0
+#define DPCSSYS_CR5_DPCSSYS_CR_ADDR__RDPCS_TX_CR_ADDR_MASK                                                    0x0000FFFFL
+//DPCSSYS_CR5_DPCSSYS_CR_DATA
+#define DPCSSYS_CR5_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA__SHIFT                                                  0x0
+#define DPCSSYS_CR5_DPCSSYS_CR_DATA__RDPCS_TX_CR_DATA_MASK                                                    0x0000FFFFL
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_2_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_1_0_offset.h
index 945bb6101a9d..945bb6101a9d 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_2_1_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_1_0_offset.h
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_2_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_1_0_sh_mask.h
index 6e039f2208e1..6e039f2208e1 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_2_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_1_0_sh_mask.h
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
index ca16d9125fbc..d984c916df80 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
@@ -1146,7 +1146,14 @@
 #define mmATC_L2_MEM_POWER_LS_BASE_IDX                                                                 0
 #define mmATC_L2_CGTT_CLK_CTRL                                                                         0x080c
 #define mmATC_L2_CGTT_CLK_CTRL_BASE_IDX                                                                0
-
+#define mmATC_L2_CACHE_4K_EDC_INDEX                                                                    0x080e
+#define mmATC_L2_CACHE_4K_EDC_INDEX_BASE_IDX                                                           0
+#define mmATC_L2_CACHE_2M_EDC_INDEX                                                                    0x080f
+#define mmATC_L2_CACHE_2M_EDC_INDEX_BASE_IDX                                                           0
+#define mmATC_L2_CACHE_4K_EDC_CNT                                                                      0x0810
+#define mmATC_L2_CACHE_4K_EDC_CNT_BASE_IDX                                                             0
+#define mmATC_L2_CACHE_2M_EDC_CNT                                                                      0x0811
+#define mmATC_L2_CACHE_2M_EDC_CNT_BASE_IDX                                                             0
 
 // addressBlock: gc_utcl2_vml2pfdec
 // base address: 0xa100
@@ -1206,7 +1213,14 @@
 #define mmVM_L2_CACHE_PARITY_CNTL_BASE_IDX                                                             0
 #define mmVM_L2_CGTT_CLK_CTRL                                                                          0x085e
 #define mmVM_L2_CGTT_CLK_CTRL_BASE_IDX                                                                 0
-
+#define mmVM_L2_MEM_ECC_INDEX                                                                          0x0860
+#define mmVM_L2_MEM_ECC_INDEX_BASE_IDX                                                                 0
+#define mmVM_L2_WALKER_MEM_ECC_INDEX                                                                   0x0861
+#define mmVM_L2_WALKER_MEM_ECC_INDEX_BASE_IDX                                                          0
+#define mmVM_L2_MEM_ECC_CNT                                                                            0x0862
+#define mmVM_L2_MEM_ECC_CNT_BASE_IDX                                                                   0
+#define mmVM_L2_WALKER_MEM_ECC_CNT                                                                     0x0863
+#define mmVM_L2_WALKER_MEM_ECC_CNT_BASE_IDX                                                            0
 
 // addressBlock: gc_utcl2_vml2vcdec
 // base address: 0xa200
@@ -2210,6 +2224,14 @@
 #define mmCOMPUTE_STATIC_THREAD_MGMT_SE2_BASE_IDX                                                      0
 #define mmCOMPUTE_STATIC_THREAD_MGMT_SE3                                                               0x0e1a
 #define mmCOMPUTE_STATIC_THREAD_MGMT_SE3_BASE_IDX                                                      0
+#define mmCOMPUTE_STATIC_THREAD_MGMT_SE4                                                               0x0e25
+#define mmCOMPUTE_STATIC_THREAD_MGMT_SE4_BASE_IDX                                                      0
+#define mmCOMPUTE_STATIC_THREAD_MGMT_SE5                                                               0x0e26
+#define mmCOMPUTE_STATIC_THREAD_MGMT_SE5_BASE_IDX                                                      0
+#define mmCOMPUTE_STATIC_THREAD_MGMT_SE6                                                               0x0e27
+#define mmCOMPUTE_STATIC_THREAD_MGMT_SE6_BASE_IDX                                                      0
+#define mmCOMPUTE_STATIC_THREAD_MGMT_SE7                                                               0x0e28
+#define mmCOMPUTE_STATIC_THREAD_MGMT_SE7_BASE_IDX                                                      0
 #define mmCOMPUTE_RESTART_X                                                                            0x0e1b
 #define mmCOMPUTE_RESTART_X_BASE_IDX                                                                   0
 #define mmCOMPUTE_RESTART_Y                                                                            0x0e1c
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
index 064c4bb1dc62..c9e3f6d849a8 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
@@ -6661,7 +6661,6 @@
 #define ATC_L2_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_MASK                                                        0x00FF0000L
 #define ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK                                                              0xFF000000L
 
-
 // addressBlock: gc_utcl2_vml2pfdec
 //VM_L2_CNTL
 #define VM_L2_CNTL__ENABLE_L2_CACHE__SHIFT                                                                    0x0
@@ -6991,7 +6990,22 @@
 #define VM_L2_CGTT_CLK_CTRL__MGLS_OVERRIDE_MASK                                                               0x00008000L
 #define VM_L2_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_MASK                                                         0x00FF0000L
 #define VM_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK                                                               0xFF000000L
-
+//VM_L2_MEM_ECC_INDEX
+#define VM_L2_MEM_ECC_INDEX__INDEX__SHIFT                                                                     0x0
+#define VM_L2_MEM_ECC_INDEX__INDEX_MASK                                                                       0x000000FFL
+//VM_L2_WALKER_MEM_ECC_INDEX
+#define VM_L2_WALKER_MEM_ECC_INDEX__INDEX__SHIFT                                                              0x0
+#define VM_L2_WALKER_MEM_ECC_INDEX__INDEX_MASK                                                                0x000000FFL
+//VM_L2_MEM_ECC_CNT
+#define VM_L2_MEM_ECC_CNT__SEC_COUNT__SHIFT                                                                   0xc
+#define VM_L2_MEM_ECC_CNT__DED_COUNT__SHIFT                                                                   0xe
+#define VM_L2_MEM_ECC_CNT__SEC_COUNT_MASK                                                                     0x00003000L
+#define VM_L2_MEM_ECC_CNT__DED_COUNT_MASK                                                                     0x0000C000L
+//VM_L2_WALKER_MEM_ECC_CNT
+#define VM_L2_WALKER_MEM_ECC_CNT__SEC_COUNT__SHIFT                                                            0xc
+#define VM_L2_WALKER_MEM_ECC_CNT__DED_COUNT__SHIFT                                                            0xe
+#define VM_L2_WALKER_MEM_ECC_CNT__SEC_COUNT_MASK                                                              0x00003000L
+#define VM_L2_WALKER_MEM_ECC_CNT__DED_COUNT_MASK                                                              0x0000C000L
 
 // addressBlock: gc_utcl2_vml2vcdec
 //VM_CONTEXT0_CNTL
@@ -8725,10 +8739,16 @@
 #define TCP_ADDR_CONFIG__NUM_BANKS__SHIFT                                                                     0x4
 #define TCP_ADDR_CONFIG__COLHI_WIDTH__SHIFT                                                                   0x6
 #define TCP_ADDR_CONFIG__RB_SPLIT_COLHI__SHIFT                                                                0x9
+#define TCP_ADDR_CONFIG__ENABLE64KHASH__SHIFT                                                                 0xb
+#define TCP_ADDR_CONFIG__ENABLE2MHASH__SHIFT                                                                  0xc
+#define TCP_ADDR_CONFIG__ENABLE1GHASH__SHIFT                                                                  0xd
 #define TCP_ADDR_CONFIG__NUM_TCC_BANKS_MASK                                                                   0x0000000FL
 #define TCP_ADDR_CONFIG__NUM_BANKS_MASK                                                                       0x00000030L
 #define TCP_ADDR_CONFIG__COLHI_WIDTH_MASK                                                                     0x000001C0L
 #define TCP_ADDR_CONFIG__RB_SPLIT_COLHI_MASK                                                                  0x00000200L
+#define TCP_ADDR_CONFIG__ENABLE64KHASH_MASK                                                                   0x00000800L
+#define TCP_ADDR_CONFIG__ENABLE2MHASH_MASK                                                                    0x00001000L
+#define TCP_ADDR_CONFIG__ENABLE1GHASH_MASK                                                                    0x00002000L
 //TCP_CREDIT
 #define TCP_CREDIT__LFIFO_CREDIT__SHIFT                                                                       0x0
 #define TCP_CREDIT__REQ_FIFO_CREDIT__SHIFT                                                                    0x10
diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_offset.h
index 352ffae7a7ca..2c3ce243861a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_offset.h
@@ -1964,4 +1964,20 @@
 #define mmATC_L2_PERFCOUNTER_RSLT_CNTL                                                                 0x084a
 #define mmATC_L2_PERFCOUNTER_RSLT_CNTL_BASE_IDX                                                        0
 
+/* MMEA */
+#define mmMMEA0_EDC_CNT_VG20                                                                           0x0206
+#define mmMMEA0_EDC_CNT_VG20_BASE_IDX                                                                  0
+#define mmMMEA0_EDC_CNT2_VG20                                                                          0x0207
+#define mmMMEA0_EDC_CNT2_VG20_BASE_IDX                                                                 0
+#define mmMMEA1_EDC_CNT_VG20                                                                           0x0346
+#define mmMMEA1_EDC_CNT_VG20_BASE_IDX                                                                  0
+#define mmMMEA1_EDC_CNT2_VG20                                                                          0x0347
+#define mmMMEA1_EDC_CNT2_VG20_BASE_IDX                                                                 0
+
+// addressBlock: mmhub_utcl2_vmsharedpfdec
+// base address: 0x6a040
+#define mmMC_VM_XGMI_LFB_CNTL                                                                          0x0823
+#define mmMC_VM_XGMI_LFB_CNTL_BASE_IDX                                                                 0
+#define mmMC_VM_XGMI_LFB_SIZE                                                                          0x0824
+#define mmMC_VM_XGMI_LFB_SIZE_BASE_IDX                                                                 0
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_sh_mask.h
index 34278ef2aa1b..198f5f93ed1a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_sh_mask.h
@@ -10124,4 +10124,126 @@
 #define ATC_L2_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK                                                          0x02000000L
 #define ATC_L2_PERFCOUNTER_RSLT_CNTL__STOP_ALL_ON_SATURATE_MASK                                               0x04000000L
 
+//MMEA0_EDC_CNT
+#define MMEA0_EDC_CNT_VG20__DRAMRD_CMDMEM_SEC_COUNT__SHIFT                                                         0x0
+#define MMEA0_EDC_CNT_VG20__DRAMRD_CMDMEM_DED_COUNT__SHIFT                                                         0x2
+#define MMEA0_EDC_CNT_VG20__DRAMWR_CMDMEM_SEC_COUNT__SHIFT                                                         0x4
+#define MMEA0_EDC_CNT_VG20__DRAMWR_CMDMEM_DED_COUNT__SHIFT                                                         0x6
+#define MMEA0_EDC_CNT_VG20__DRAMWR_DATAMEM_SEC_COUNT__SHIFT                                                        0x8
+#define MMEA0_EDC_CNT_VG20__DRAMWR_DATAMEM_DED_COUNT__SHIFT                                                        0xa
+#define MMEA0_EDC_CNT_VG20__RRET_TAGMEM_SEC_COUNT__SHIFT                                                           0xc
+#define MMEA0_EDC_CNT_VG20__RRET_TAGMEM_DED_COUNT__SHIFT                                                           0xe
+#define MMEA0_EDC_CNT_VG20__WRET_TAGMEM_SEC_COUNT__SHIFT                                                           0x10
+#define MMEA0_EDC_CNT_VG20__WRET_TAGMEM_DED_COUNT__SHIFT                                                           0x12
+#define MMEA0_EDC_CNT_VG20__DRAMRD_PAGEMEM_SED_COUNT__SHIFT                                                        0x14
+#define MMEA0_EDC_CNT_VG20__DRAMWR_PAGEMEM_SED_COUNT__SHIFT                                                        0x16
+#define MMEA0_EDC_CNT_VG20__IORD_CMDMEM_SED_COUNT__SHIFT                                                           0x18
+#define MMEA0_EDC_CNT_VG20__IOWR_CMDMEM_SED_COUNT__SHIFT                                                           0x1a
+#define MMEA0_EDC_CNT_VG20__IOWR_DATAMEM_SED_COUNT__SHIFT                                                          0x1c
+#define MMEA0_EDC_CNT_VG20__DRAMRD_CMDMEM_SEC_COUNT_MASK                                                           0x00000003L
+#define MMEA0_EDC_CNT_VG20__DRAMRD_CMDMEM_DED_COUNT_MASK                                                           0x0000000CL
+#define MMEA0_EDC_CNT_VG20__DRAMWR_CMDMEM_SEC_COUNT_MASK                                                           0x00000030L
+#define MMEA0_EDC_CNT_VG20__DRAMWR_CMDMEM_DED_COUNT_MASK                                                           0x000000C0L
+#define MMEA0_EDC_CNT_VG20__DRAMWR_DATAMEM_SEC_COUNT_MASK                                                          0x00000300L
+#define MMEA0_EDC_CNT_VG20__DRAMWR_DATAMEM_DED_COUNT_MASK                                                          0x00000C00L
+#define MMEA0_EDC_CNT_VG20__RRET_TAGMEM_SEC_COUNT_MASK                                                             0x00003000L
+#define MMEA0_EDC_CNT_VG20__RRET_TAGMEM_DED_COUNT_MASK                                                             0x0000C000L
+#define MMEA0_EDC_CNT_VG20__WRET_TAGMEM_SEC_COUNT_MASK                                                             0x00030000L
+#define MMEA0_EDC_CNT_VG20__WRET_TAGMEM_DED_COUNT_MASK                                                             0x000C0000L
+#define MMEA0_EDC_CNT_VG20__DRAMRD_PAGEMEM_SED_COUNT_MASK                                                          0x00300000L
+#define MMEA0_EDC_CNT_VG20__DRAMWR_PAGEMEM_SED_COUNT_MASK                                                          0x00C00000L
+#define MMEA0_EDC_CNT_VG20__IORD_CMDMEM_SED_COUNT_MASK                                                             0x03000000L
+#define MMEA0_EDC_CNT_VG20__IOWR_CMDMEM_SED_COUNT_MASK                                                             0x0C000000L
+#define MMEA0_EDC_CNT_VG20__IOWR_DATAMEM_SED_COUNT_MASK                                                            0x30000000L
+//MMEA0_EDC_CNT2
+#define MMEA0_EDC_CNT2_VG20__GMIRD_CMDMEM_SEC_COUNT__SHIFT                                                         0x0
+#define MMEA0_EDC_CNT2_VG20__GMIRD_CMDMEM_DED_COUNT__SHIFT                                                         0x2
+#define MMEA0_EDC_CNT2_VG20__GMIWR_CMDMEM_SEC_COUNT__SHIFT                                                         0x4
+#define MMEA0_EDC_CNT2_VG20__GMIWR_CMDMEM_DED_COUNT__SHIFT                                                         0x6
+#define MMEA0_EDC_CNT2_VG20__GMIWR_DATAMEM_SEC_COUNT__SHIFT                                                        0x8
+#define MMEA0_EDC_CNT2_VG20__GMIWR_DATAMEM_DED_COUNT__SHIFT                                                        0xa
+#define MMEA0_EDC_CNT2_VG20__GMIRD_PAGEMEM_SED_COUNT__SHIFT                                                        0xc
+#define MMEA0_EDC_CNT2_VG20__GMIWR_PAGEMEM_SED_COUNT__SHIFT                                                        0xe
+#define MMEA0_EDC_CNT2_VG20__MAM_D0MEM_SED_COUNT__SHIFT                                                            0x10
+#define MMEA0_EDC_CNT2_VG20__MAM_D1MEM_SED_COUNT__SHIFT                                                            0x12
+#define MMEA0_EDC_CNT2_VG20__MAM_D2MEM_SED_COUNT__SHIFT                                                            0x14
+#define MMEA0_EDC_CNT2_VG20__MAM_D3MEM_SED_COUNT__SHIFT                                                            0x16
+#define MMEA0_EDC_CNT2_VG20__GMIRD_CMDMEM_SEC_COUNT_MASK                                                           0x00000003L
+#define MMEA0_EDC_CNT2_VG20__GMIRD_CMDMEM_DED_COUNT_MASK                                                           0x0000000CL
+#define MMEA0_EDC_CNT2_VG20__GMIWR_CMDMEM_SEC_COUNT_MASK                                                           0x00000030L
+#define MMEA0_EDC_CNT2_VG20__GMIWR_CMDMEM_DED_COUNT_MASK                                                           0x000000C0L
+#define MMEA0_EDC_CNT2_VG20__GMIWR_DATAMEM_SEC_COUNT_MASK                                                          0x00000300L
+#define MMEA0_EDC_CNT2_VG20__GMIWR_DATAMEM_DED_COUNT_MASK                                                          0x00000C00L
+#define MMEA0_EDC_CNT2_VG20__GMIRD_PAGEMEM_SED_COUNT_MASK                                                          0x00003000L
+#define MMEA0_EDC_CNT2_VG20__GMIWR_PAGEMEM_SED_COUNT_MASK                                                          0x0000C000L
+#define MMEA0_EDC_CNT2_VG20__MAM_D0MEM_SED_COUNT_MASK                                                              0x00030000L
+#define MMEA0_EDC_CNT2_VG20__MAM_D1MEM_SED_COUNT_MASK                                                              0x000C0000L
+#define MMEA0_EDC_CNT2_VG20__MAM_D2MEM_SED_COUNT_MASK                                                              0x00300000L
+#define MMEA0_EDC_CNT2_VG20__MAM_D3MEM_SED_COUNT_MASK                                                              0x00C00000L
+//MMEA1_EDC_CNT
+#define MMEA1_EDC_CNT_VG20__DRAMRD_CMDMEM_SEC_COUNT__SHIFT                                                         0x0
+#define MMEA1_EDC_CNT_VG20__DRAMRD_CMDMEM_DED_COUNT__SHIFT                                                         0x2
+#define MMEA1_EDC_CNT_VG20__DRAMWR_CMDMEM_SEC_COUNT__SHIFT                                                         0x4
+#define MMEA1_EDC_CNT_VG20__DRAMWR_CMDMEM_DED_COUNT__SHIFT                                                         0x6
+#define MMEA1_EDC_CNT_VG20__DRAMWR_DATAMEM_SEC_COUNT__SHIFT                                                        0x8
+#define MMEA1_EDC_CNT_VG20__DRAMWR_DATAMEM_DED_COUNT__SHIFT                                                        0xa
+#define MMEA1_EDC_CNT_VG20__RRET_TAGMEM_SEC_COUNT__SHIFT                                                           0xc
+#define MMEA1_EDC_CNT_VG20__RRET_TAGMEM_DED_COUNT__SHIFT                                                           0xe
+#define MMEA1_EDC_CNT_VG20__WRET_TAGMEM_SEC_COUNT__SHIFT                                                           0x10
+#define MMEA1_EDC_CNT_VG20__WRET_TAGMEM_DED_COUNT__SHIFT                                                           0x12
+#define MMEA1_EDC_CNT_VG20__DRAMRD_PAGEMEM_SED_COUNT__SHIFT                                                        0x14
+#define MMEA1_EDC_CNT_VG20__DRAMWR_PAGEMEM_SED_COUNT__SHIFT                                                        0x16
+#define MMEA1_EDC_CNT_VG20__IORD_CMDMEM_SED_COUNT__SHIFT                                                           0x18
+#define MMEA1_EDC_CNT_VG20__IOWR_CMDMEM_SED_COUNT__SHIFT                                                           0x1a
+#define MMEA1_EDC_CNT_VG20__IOWR_DATAMEM_SED_COUNT__SHIFT                                                          0x1c
+#define MMEA1_EDC_CNT_VG20__DRAMRD_CMDMEM_SEC_COUNT_MASK                                                           0x00000003L
+#define MMEA1_EDC_CNT_VG20__DRAMRD_CMDMEM_DED_COUNT_MASK                                                           0x0000000CL
+#define MMEA1_EDC_CNT_VG20__DRAMWR_CMDMEM_SEC_COUNT_MASK                                                           0x00000030L
+#define MMEA1_EDC_CNT_VG20__DRAMWR_CMDMEM_DED_COUNT_MASK                                                           0x000000C0L
+#define MMEA1_EDC_CNT_VG20__DRAMWR_DATAMEM_SEC_COUNT_MASK                                                          0x00000300L
+#define MMEA1_EDC_CNT_VG20__DRAMWR_DATAMEM_DED_COUNT_MASK                                                          0x00000C00L
+#define MMEA1_EDC_CNT_VG20__RRET_TAGMEM_SEC_COUNT_MASK                                                             0x00003000L
+#define MMEA1_EDC_CNT_VG20__RRET_TAGMEM_DED_COUNT_MASK                                                             0x0000C000L
+#define MMEA1_EDC_CNT_VG20__WRET_TAGMEM_SEC_COUNT_MASK                                                             0x00030000L
+#define MMEA1_EDC_CNT_VG20__WRET_TAGMEM_DED_COUNT_MASK                                                             0x000C0000L
+#define MMEA1_EDC_CNT_VG20__DRAMRD_PAGEMEM_SED_COUNT_MASK                                                          0x00300000L
+#define MMEA1_EDC_CNT_VG20__DRAMWR_PAGEMEM_SED_COUNT_MASK                                                          0x00C00000L
+#define MMEA1_EDC_CNT_VG20__IORD_CMDMEM_SED_COUNT_MASK                                                             0x03000000L
+#define MMEA1_EDC_CNT_VG20__IOWR_CMDMEM_SED_COUNT_MASK                                                             0x0C000000L
+#define MMEA1_EDC_CNT_VG20__IOWR_DATAMEM_SED_COUNT_MASK                                                            0x30000000L
+//MMEA1_EDC_CNT2
+#define MMEA1_EDC_CNT2_VG20__GMIRD_CMDMEM_SEC_COUNT__SHIFT                                                         0x0
+#define MMEA1_EDC_CNT2_VG20__GMIRD_CMDMEM_DED_COUNT__SHIFT                                                         0x2
+#define MMEA1_EDC_CNT2_VG20__GMIWR_CMDMEM_SEC_COUNT__SHIFT                                                         0x4
+#define MMEA1_EDC_CNT2_VG20__GMIWR_CMDMEM_DED_COUNT__SHIFT                                                         0x6
+#define MMEA1_EDC_CNT2_VG20__GMIWR_DATAMEM_SEC_COUNT__SHIFT                                                        0x8
+#define MMEA1_EDC_CNT2_VG20__GMIWR_DATAMEM_DED_COUNT__SHIFT                                                        0xa
+#define MMEA1_EDC_CNT2_VG20__GMIRD_PAGEMEM_SED_COUNT__SHIFT                                                        0xc
+#define MMEA1_EDC_CNT2_VG20__GMIWR_PAGEMEM_SED_COUNT__SHIFT                                                        0xe
+#define MMEA1_EDC_CNT2_VG20__MAM_D0MEM_SED_COUNT__SHIFT                                                            0x10
+#define MMEA1_EDC_CNT2_VG20__MAM_D1MEM_SED_COUNT__SHIFT                                                            0x12
+#define MMEA1_EDC_CNT2_VG20__MAM_D2MEM_SED_COUNT__SHIFT                                                            0x14
+#define MMEA1_EDC_CNT2_VG20__MAM_D3MEM_SED_COUNT__SHIFT                                                            0x16
+#define MMEA1_EDC_CNT2_VG20__GMIRD_CMDMEM_SEC_COUNT_MASK                                                           0x00000003L
+#define MMEA1_EDC_CNT2_VG20__GMIRD_CMDMEM_DED_COUNT_MASK                                                           0x0000000CL
+#define MMEA1_EDC_CNT2_VG20__GMIWR_CMDMEM_SEC_COUNT_MASK                                                           0x00000030L
+#define MMEA1_EDC_CNT2_VG20__GMIWR_CMDMEM_DED_COUNT_MASK                                                           0x000000C0L
+#define MMEA1_EDC_CNT2_VG20__GMIWR_DATAMEM_SEC_COUNT_MASK                                                          0x00000300L
+#define MMEA1_EDC_CNT2_VG20__GMIWR_DATAMEM_DED_COUNT_MASK                                                          0x00000C00L
+#define MMEA1_EDC_CNT2_VG20__GMIRD_PAGEMEM_SED_COUNT_MASK                                                          0x00003000L
+#define MMEA1_EDC_CNT2_VG20__GMIWR_PAGEMEM_SED_COUNT_MASK                                                          0x0000C000L
+#define MMEA1_EDC_CNT2_VG20__MAM_D0MEM_SED_COUNT_MASK                                                              0x00030000L
+#define MMEA1_EDC_CNT2_VG20__MAM_D1MEM_SED_COUNT_MASK                                                              0x000C0000L
+#define MMEA1_EDC_CNT2_VG20__MAM_D2MEM_SED_COUNT_MASK                                                              0x00300000L
+#define MMEA1_EDC_CNT2_VG20__MAM_D3MEM_SED_COUNT_MASK                                                              0x00C00000L
+
+// addressBlock: mmhub_utcl2_vmsharedpfdec
+//MC_VM_XGMI_LFB_CNTL
+#define MC_VM_XGMI_LFB_CNTL__PF_LFB_REGION__SHIFT                                                             0x0
+#define MC_VM_XGMI_LFB_CNTL__PF_MAX_REGION__SHIFT                                                             0x4
+#define MC_VM_XGMI_LFB_CNTL__PF_LFB_REGION_MASK                                                               0x00000007L
+#define MC_VM_XGMI_LFB_CNTL__PF_MAX_REGION_MASK                                                               0x00000070L
+//MC_VM_XGMI_LFB_SIZE
+#define MC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE__SHIFT                                                               0x0
+#define MC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE_MASK                                                                 0x0000FFFFL
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h
deleted file mode 100644
index f2ae3a58949e..000000000000
--- a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2018  Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#ifndef _mmhub_9_4_0_OFFSET_HEADER
-#define _mmhub_9_4_0_OFFSET_HEADER
-
-/* MMEA */
-#define mmMMEA0_SDP_ARB_FINAL_VG20                                                                     0x01ee
-#define mmMMEA0_SDP_ARB_FINAL_VG20_BASE_IDX                                                            0
-#define mmMMEA0_EDC_CNT_VG20                                                                           0x0206
-#define mmMMEA0_EDC_CNT_VG20_BASE_IDX                                                                  0
-#define mmMMEA0_EDC_CNT2_VG20                                                                          0x0207
-#define mmMMEA0_EDC_CNT2_VG20_BASE_IDX                                                                 0
-#define mmMMEA0_EDC_MODE_VG20                                                                          0x0210
-#define mmMMEA0_EDC_MODE_VG20_BASE_IDX                                                                 0
-#define mmMMEA0_ERR_STATUS_VG20                                                                        0x0211
-#define mmMMEA0_ERR_STATUS_VG20_BASE_IDX                                                               0
-#define mmMMEA1_SDP_ARB_FINAL_VG20                                                                     0x032e
-#define mmMMEA1_SDP_ARB_FINAL_VG20_BASE_IDX                                                            0
-#define mmMMEA1_EDC_CNT_VG20                                                                           0x0346
-#define mmMMEA1_EDC_CNT_VG20_BASE_IDX                                                                  0
-#define mmMMEA1_EDC_CNT2_VG20                                                                          0x0347
-#define mmMMEA1_EDC_CNT2_VG20_BASE_IDX                                                                 0
-#define mmMMEA1_EDC_MODE_VG20                                                                          0x0350
-#define mmMMEA1_EDC_MODE_VG20_BASE_IDX                                                                 0
-#define mmMMEA1_ERR_STATUS_VG20                                                                        0x0351
-#define mmMMEA1_ERR_STATUS_VG20_BASE_IDX                                                               0
-
-// addressBlock: mmhub_utcl2_vmsharedpfdec
-// base address: 0x6a040
-#define mmMC_VM_XGMI_LFB_CNTL                                                                          0x0823
-#define mmMC_VM_XGMI_LFB_CNTL_BASE_IDX                                                                 0
-#define mmMC_VM_XGMI_LFB_SIZE                                                                          0x0824
-#define mmMC_VM_XGMI_LFB_SIZE_BASE_IDX                                                                 0
-
-#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h
deleted file mode 100644
index c24259ed12a1..000000000000
--- a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Copyright (C) 2018  Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#ifndef _mmhub_9_4_0_SH_MASK_HEADER
-#define _mmhub_9_4_0_SH_MASK_HEADER
-
-//MMEA0_SDP_ARB_FINAL
-#define MMEA0_SDP_ARB_FINAL__DRAM_BURST_LIMIT__SHIFT                                                          0x0
-#define MMEA0_SDP_ARB_FINAL__GMI_BURST_LIMIT__SHIFT                                                           0x5
-#define MMEA0_SDP_ARB_FINAL__IO_BURST_LIMIT__SHIFT                                                            0xa
-#define MMEA0_SDP_ARB_FINAL__BURST_LIMIT_MULTIPLIER__SHIFT                                                    0xf
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC0__SHIFT                                                                0x11
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC1__SHIFT                                                                0x12
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC2__SHIFT                                                                0x13
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC3__SHIFT                                                                0x14
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC4__SHIFT                                                                0x15
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC5__SHIFT                                                                0x16
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC6__SHIFT                                                                0x17
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC7__SHIFT                                                                0x18
-#define MMEA0_SDP_ARB_FINAL__ERREVENT_ON_ERROR__SHIFT                                                         0x19
-#define MMEA0_SDP_ARB_FINAL__HALTREQ_ON_ERROR__SHIFT                                                          0x1a
-#define MMEA0_SDP_ARB_FINAL__DRAM_BURST_LIMIT_MASK                                                            0x0000001FL
-#define MMEA0_SDP_ARB_FINAL__GMI_BURST_LIMIT_MASK                                                             0x000003E0L
-#define MMEA0_SDP_ARB_FINAL__IO_BURST_LIMIT_MASK                                                              0x00007C00L
-#define MMEA0_SDP_ARB_FINAL__BURST_LIMIT_MULTIPLIER_MASK                                                      0x00018000L
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC0_MASK                                                                  0x00020000L
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC1_MASK                                                                  0x00040000L
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC2_MASK                                                                  0x00080000L
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC3_MASK                                                                  0x00100000L
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC4_MASK                                                                  0x00200000L
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC5_MASK                                                                  0x00400000L
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC6_MASK                                                                  0x00800000L
-#define MMEA0_SDP_ARB_FINAL__RDONLY_VC7_MASK                                                                  0x01000000L
-#define MMEA0_SDP_ARB_FINAL__ERREVENT_ON_ERROR_MASK                                                           0x02000000L
-#define MMEA0_SDP_ARB_FINAL__HALTREQ_ON_ERROR_MASK                                                            0x04000000L
-//MMEA0_EDC_CNT
-#define MMEA0_EDC_CNT__DRAMRD_CMDMEM_SEC_COUNT__SHIFT                                                         0x0
-#define MMEA0_EDC_CNT__DRAMRD_CMDMEM_DED_COUNT__SHIFT                                                         0x2
-#define MMEA0_EDC_CNT__DRAMWR_CMDMEM_SEC_COUNT__SHIFT                                                         0x4
-#define MMEA0_EDC_CNT__DRAMWR_CMDMEM_DED_COUNT__SHIFT                                                         0x6
-#define MMEA0_EDC_CNT__DRAMWR_DATAMEM_SEC_COUNT__SHIFT                                                        0x8
-#define MMEA0_EDC_CNT__DRAMWR_DATAMEM_DED_COUNT__SHIFT                                                        0xa
-#define MMEA0_EDC_CNT__RRET_TAGMEM_SEC_COUNT__SHIFT                                                           0xc
-#define MMEA0_EDC_CNT__RRET_TAGMEM_DED_COUNT__SHIFT                                                           0xe
-#define MMEA0_EDC_CNT__WRET_TAGMEM_SEC_COUNT__SHIFT                                                           0x10
-#define MMEA0_EDC_CNT__WRET_TAGMEM_DED_COUNT__SHIFT                                                           0x12
-#define MMEA0_EDC_CNT__DRAMRD_PAGEMEM_SED_COUNT__SHIFT                                                        0x14
-#define MMEA0_EDC_CNT__DRAMWR_PAGEMEM_SED_COUNT__SHIFT                                                        0x16
-#define MMEA0_EDC_CNT__IORD_CMDMEM_SED_COUNT__SHIFT                                                           0x18
-#define MMEA0_EDC_CNT__IOWR_CMDMEM_SED_COUNT__SHIFT                                                           0x1a
-#define MMEA0_EDC_CNT__IOWR_DATAMEM_SED_COUNT__SHIFT                                                          0x1c
-#define MMEA0_EDC_CNT__DRAMRD_CMDMEM_SEC_COUNT_MASK                                                           0x00000003L
-#define MMEA0_EDC_CNT__DRAMRD_CMDMEM_DED_COUNT_MASK                                                           0x0000000CL
-#define MMEA0_EDC_CNT__DRAMWR_CMDMEM_SEC_COUNT_MASK                                                           0x00000030L
-#define MMEA0_EDC_CNT__DRAMWR_CMDMEM_DED_COUNT_MASK                                                           0x000000C0L
-#define MMEA0_EDC_CNT__DRAMWR_DATAMEM_SEC_COUNT_MASK                                                          0x00000300L
-#define MMEA0_EDC_CNT__DRAMWR_DATAMEM_DED_COUNT_MASK                                                          0x00000C00L
-#define MMEA0_EDC_CNT__RRET_TAGMEM_SEC_COUNT_MASK                                                             0x00003000L
-#define MMEA0_EDC_CNT__RRET_TAGMEM_DED_COUNT_MASK                                                             0x0000C000L
-#define MMEA0_EDC_CNT__WRET_TAGMEM_SEC_COUNT_MASK                                                             0x00030000L
-#define MMEA0_EDC_CNT__WRET_TAGMEM_DED_COUNT_MASK                                                             0x000C0000L
-#define MMEA0_EDC_CNT__DRAMRD_PAGEMEM_SED_COUNT_MASK                                                          0x00300000L
-#define MMEA0_EDC_CNT__DRAMWR_PAGEMEM_SED_COUNT_MASK                                                          0x00C00000L
-#define MMEA0_EDC_CNT__IORD_CMDMEM_SED_COUNT_MASK                                                             0x03000000L
-#define MMEA0_EDC_CNT__IOWR_CMDMEM_SED_COUNT_MASK                                                             0x0C000000L
-#define MMEA0_EDC_CNT__IOWR_DATAMEM_SED_COUNT_MASK                                                            0x30000000L
-//MMEA0_EDC_CNT2
-#define MMEA0_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT__SHIFT                                                         0x0
-#define MMEA0_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT__SHIFT                                                         0x2
-#define MMEA0_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT__SHIFT                                                         0x4
-#define MMEA0_EDC_CNT2__GMIWR_CMDMEM_DED_COUNT__SHIFT                                                         0x6
-#define MMEA0_EDC_CNT2__GMIWR_DATAMEM_SEC_COUNT__SHIFT                                                        0x8
-#define MMEA0_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT                                                        0xa
-#define MMEA0_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT                                                        0xc
-#define MMEA0_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT                                                        0xe
-#define MMEA0_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT                                                            0x10
-#define MMEA0_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT                                                            0x12
-#define MMEA0_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT                                                            0x14
-#define MMEA0_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT                                                            0x16
-#define MMEA0_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK                                                           0x00000003L
-#define MMEA0_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK                                                           0x0000000CL
-#define MMEA0_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK                                                           0x00000030L
-#define MMEA0_EDC_CNT2__GMIWR_CMDMEM_DED_COUNT_MASK                                                           0x000000C0L
-#define MMEA0_EDC_CNT2__GMIWR_DATAMEM_SEC_COUNT_MASK                                                          0x00000300L
-#define MMEA0_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK                                                          0x00000C00L
-#define MMEA0_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK                                                          0x00003000L
-#define MMEA0_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK                                                          0x0000C000L
-#define MMEA0_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK                                                              0x00030000L
-#define MMEA0_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK                                                              0x000C0000L
-#define MMEA0_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK                                                              0x00300000L
-#define MMEA0_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK                                                              0x00C00000L
-//MMEA0_EDC_MODE
-#define MMEA0_EDC_MODE__COUNT_FED_OUT__SHIFT                                                                  0x10
-#define MMEA0_EDC_MODE__GATE_FUE__SHIFT                                                                       0x11
-#define MMEA0_EDC_MODE__DED_MODE__SHIFT                                                                       0x14
-#define MMEA0_EDC_MODE__PROP_FED__SHIFT                                                                       0x1d
-#define MMEA0_EDC_MODE__BYPASS__SHIFT                                                                         0x1f
-#define MMEA0_EDC_MODE__COUNT_FED_OUT_MASK                                                                    0x00010000L
-#define MMEA0_EDC_MODE__GATE_FUE_MASK                                                                         0x00020000L
-#define MMEA0_EDC_MODE__DED_MODE_MASK                                                                         0x00300000L
-#define MMEA0_EDC_MODE__PROP_FED_MASK                                                                         0x20000000L
-#define MMEA0_EDC_MODE__BYPASS_MASK                                                                           0x80000000L
-//MMEA0_ERR_STATUS
-#define MMEA0_ERR_STATUS__SDP_RDRSP_STATUS__SHIFT                                                             0x0
-#define MMEA0_ERR_STATUS__SDP_WRRSP_STATUS__SHIFT                                                             0x4
-#define MMEA0_ERR_STATUS__SDP_RDRSP_DATASTATUS__SHIFT                                                         0x8
-#define MMEA0_ERR_STATUS__SDP_RDRSP_DATAPARITY_ERROR__SHIFT                                                   0xa
-#define MMEA0_ERR_STATUS__CLEAR_ERROR_STATUS__SHIFT                                                           0xb
-#define MMEA0_ERR_STATUS__BUSY_ON_ERROR__SHIFT                                                                0xc
-#define MMEA0_ERR_STATUS__FUE_FLAG__SHIFT                                                                     0xd
-#define MMEA0_ERR_STATUS__SDP_RDRSP_STATUS_MASK                                                               0x0000000FL
-#define MMEA0_ERR_STATUS__SDP_WRRSP_STATUS_MASK                                                               0x000000F0L
-#define MMEA0_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK                                                           0x00000300L
-#define MMEA0_ERR_STATUS__SDP_RDRSP_DATAPARITY_ERROR_MASK                                                     0x00000400L
-#define MMEA0_ERR_STATUS__CLEAR_ERROR_STATUS_MASK                                                             0x00000800L
-#define MMEA0_ERR_STATUS__BUSY_ON_ERROR_MASK                                                                  0x00001000L
-#define MMEA0_ERR_STATUS__FUE_FLAG_MASK                                                                       0x00002000L
-//MMEA1_SDP_ARB_FINAL
-#define MMEA1_SDP_ARB_FINAL__DRAM_BURST_LIMIT__SHIFT                                                          0x0
-#define MMEA1_SDP_ARB_FINAL__GMI_BURST_LIMIT__SHIFT                                                           0x5
-#define MMEA1_SDP_ARB_FINAL__IO_BURST_LIMIT__SHIFT                                                            0xa
-#define MMEA1_SDP_ARB_FINAL__BURST_LIMIT_MULTIPLIER__SHIFT                                                    0xf
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC0__SHIFT                                                                0x11
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC1__SHIFT                                                                0x12
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC2__SHIFT                                                                0x13
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC3__SHIFT                                                                0x14
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC4__SHIFT                                                                0x15
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC5__SHIFT                                                                0x16
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC6__SHIFT                                                                0x17
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC7__SHIFT                                                                0x18
-#define MMEA1_SDP_ARB_FINAL__ERREVENT_ON_ERROR__SHIFT                                                         0x19
-#define MMEA1_SDP_ARB_FINAL__HALTREQ_ON_ERROR__SHIFT                                                          0x1a
-#define MMEA1_SDP_ARB_FINAL__DRAM_BURST_LIMIT_MASK                                                            0x0000001FL
-#define MMEA1_SDP_ARB_FINAL__GMI_BURST_LIMIT_MASK                                                             0x000003E0L
-#define MMEA1_SDP_ARB_FINAL__IO_BURST_LIMIT_MASK                                                              0x00007C00L
-#define MMEA1_SDP_ARB_FINAL__BURST_LIMIT_MULTIPLIER_MASK                                                      0x00018000L
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC0_MASK                                                                  0x00020000L
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC1_MASK                                                                  0x00040000L
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC2_MASK                                                                  0x00080000L
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC3_MASK                                                                  0x00100000L
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC4_MASK                                                                  0x00200000L
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC5_MASK                                                                  0x00400000L
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC6_MASK                                                                  0x00800000L
-#define MMEA1_SDP_ARB_FINAL__RDONLY_VC7_MASK                                                                  0x01000000L
-#define MMEA1_SDP_ARB_FINAL__ERREVENT_ON_ERROR_MASK                                                           0x02000000L
-#define MMEA1_SDP_ARB_FINAL__HALTREQ_ON_ERROR_MASK                                                            0x04000000L
-//MMEA1_EDC_CNT
-#define MMEA1_EDC_CNT__DRAMRD_CMDMEM_SEC_COUNT__SHIFT                                                         0x0
-#define MMEA1_EDC_CNT__DRAMRD_CMDMEM_DED_COUNT__SHIFT                                                         0x2
-#define MMEA1_EDC_CNT__DRAMWR_CMDMEM_SEC_COUNT__SHIFT                                                         0x4
-#define MMEA1_EDC_CNT__DRAMWR_CMDMEM_DED_COUNT__SHIFT                                                         0x6
-#define MMEA1_EDC_CNT__DRAMWR_DATAMEM_SEC_COUNT__SHIFT                                                        0x8
-#define MMEA1_EDC_CNT__DRAMWR_DATAMEM_DED_COUNT__SHIFT                                                        0xa
-#define MMEA1_EDC_CNT__RRET_TAGMEM_SEC_COUNT__SHIFT                                                           0xc
-#define MMEA1_EDC_CNT__RRET_TAGMEM_DED_COUNT__SHIFT                                                           0xe
-#define MMEA1_EDC_CNT__WRET_TAGMEM_SEC_COUNT__SHIFT                                                           0x10
-#define MMEA1_EDC_CNT__WRET_TAGMEM_DED_COUNT__SHIFT                                                           0x12
-#define MMEA1_EDC_CNT__DRAMRD_PAGEMEM_SED_COUNT__SHIFT                                                        0x14
-#define MMEA1_EDC_CNT__DRAMWR_PAGEMEM_SED_COUNT__SHIFT                                                        0x16
-#define MMEA1_EDC_CNT__IORD_CMDMEM_SED_COUNT__SHIFT                                                           0x18
-#define MMEA1_EDC_CNT__IOWR_CMDMEM_SED_COUNT__SHIFT                                                           0x1a
-#define MMEA1_EDC_CNT__IOWR_DATAMEM_SED_COUNT__SHIFT                                                          0x1c
-#define MMEA1_EDC_CNT__DRAMRD_CMDMEM_SEC_COUNT_MASK                                                           0x00000003L
-#define MMEA1_EDC_CNT__DRAMRD_CMDMEM_DED_COUNT_MASK                                                           0x0000000CL
-#define MMEA1_EDC_CNT__DRAMWR_CMDMEM_SEC_COUNT_MASK                                                           0x00000030L
-#define MMEA1_EDC_CNT__DRAMWR_CMDMEM_DED_COUNT_MASK                                                           0x000000C0L
-#define MMEA1_EDC_CNT__DRAMWR_DATAMEM_SEC_COUNT_MASK                                                          0x00000300L
-#define MMEA1_EDC_CNT__DRAMWR_DATAMEM_DED_COUNT_MASK                                                          0x00000C00L
-#define MMEA1_EDC_CNT__RRET_TAGMEM_SEC_COUNT_MASK                                                             0x00003000L
-#define MMEA1_EDC_CNT__RRET_TAGMEM_DED_COUNT_MASK                                                             0x0000C000L
-#define MMEA1_EDC_CNT__WRET_TAGMEM_SEC_COUNT_MASK                                                             0x00030000L
-#define MMEA1_EDC_CNT__WRET_TAGMEM_DED_COUNT_MASK                                                             0x000C0000L
-#define MMEA1_EDC_CNT__DRAMRD_PAGEMEM_SED_COUNT_MASK                                                          0x00300000L
-#define MMEA1_EDC_CNT__DRAMWR_PAGEMEM_SED_COUNT_MASK                                                          0x00C00000L
-#define MMEA1_EDC_CNT__IORD_CMDMEM_SED_COUNT_MASK                                                             0x03000000L
-#define MMEA1_EDC_CNT__IOWR_CMDMEM_SED_COUNT_MASK                                                             0x0C000000L
-#define MMEA1_EDC_CNT__IOWR_DATAMEM_SED_COUNT_MASK                                                            0x30000000L
-//MMEA1_EDC_CNT2
-#define MMEA1_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT__SHIFT                                                         0x0
-#define MMEA1_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT__SHIFT                                                         0x2
-#define MMEA1_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT__SHIFT                                                         0x4
-#define MMEA1_EDC_CNT2__GMIWR_CMDMEM_DED_COUNT__SHIFT                                                         0x6
-#define MMEA1_EDC_CNT2__GMIWR_DATAMEM_SEC_COUNT__SHIFT                                                        0x8
-#define MMEA1_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT                                                        0xa
-#define MMEA1_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT                                                        0xc
-#define MMEA1_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT                                                        0xe
-#define MMEA1_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT                                                            0x10
-#define MMEA1_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT                                                            0x12
-#define MMEA1_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT                                                            0x14
-#define MMEA1_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT                                                            0x16
-#define MMEA1_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK                                                           0x00000003L
-#define MMEA1_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK                                                           0x0000000CL
-#define MMEA1_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK                                                           0x00000030L
-#define MMEA1_EDC_CNT2__GMIWR_CMDMEM_DED_COUNT_MASK                                                           0x000000C0L
-#define MMEA1_EDC_CNT2__GMIWR_DATAMEM_SEC_COUNT_MASK                                                          0x00000300L
-#define MMEA1_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK                                                          0x00000C00L
-#define MMEA1_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK                                                          0x00003000L
-#define MMEA1_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK                                                          0x0000C000L
-#define MMEA1_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK                                                              0x00030000L
-#define MMEA1_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK                                                              0x000C0000L
-#define MMEA1_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK                                                              0x00300000L
-#define MMEA1_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK                                                              0x00C00000L
-//MMEA1_EDC_MODE
-#define MMEA1_EDC_MODE__COUNT_FED_OUT__SHIFT                                                                  0x10
-#define MMEA1_EDC_MODE__GATE_FUE__SHIFT                                                                       0x11
-#define MMEA1_EDC_MODE__DED_MODE__SHIFT                                                                       0x14
-#define MMEA1_EDC_MODE__PROP_FED__SHIFT                                                                       0x1d
-#define MMEA1_EDC_MODE__BYPASS__SHIFT                                                                         0x1f
-#define MMEA1_EDC_MODE__COUNT_FED_OUT_MASK                                                                    0x00010000L
-#define MMEA1_EDC_MODE__GATE_FUE_MASK                                                                         0x00020000L
-#define MMEA1_EDC_MODE__DED_MODE_MASK                                                                         0x00300000L
-#define MMEA1_EDC_MODE__PROP_FED_MASK                                                                         0x20000000L
-#define MMEA1_EDC_MODE__BYPASS_MASK                                                                           0x80000000L
-//MMEA1_ERR_STATUS
-#define MMEA1_ERR_STATUS__SDP_RDRSP_STATUS__SHIFT                                                             0x0
-#define MMEA1_ERR_STATUS__SDP_WRRSP_STATUS__SHIFT                                                             0x4
-#define MMEA1_ERR_STATUS__SDP_RDRSP_DATASTATUS__SHIFT                                                         0x8
-#define MMEA1_ERR_STATUS__SDP_RDRSP_DATAPARITY_ERROR__SHIFT                                                   0xa
-#define MMEA1_ERR_STATUS__CLEAR_ERROR_STATUS__SHIFT                                                           0xb
-#define MMEA1_ERR_STATUS__BUSY_ON_ERROR__SHIFT                                                                0xc
-#define MMEA1_ERR_STATUS__FUE_FLAG__SHIFT                                                                     0xd
-#define MMEA1_ERR_STATUS__SDP_RDRSP_STATUS_MASK                                                               0x0000000FL
-#define MMEA1_ERR_STATUS__SDP_WRRSP_STATUS_MASK                                                               0x000000F0L
-#define MMEA1_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK                                                           0x00000300L
-#define MMEA1_ERR_STATUS__SDP_RDRSP_DATAPARITY_ERROR_MASK                                                     0x00000400L
-#define MMEA1_ERR_STATUS__CLEAR_ERROR_STATUS_MASK                                                             0x00000800L
-#define MMEA1_ERR_STATUS__BUSY_ON_ERROR_MASK                                                                  0x00001000L
-#define MMEA1_ERR_STATUS__FUE_FLAG_MASK                                                                       0x00002000L
-
-// addressBlock: mmhub_utcl2_vmsharedpfdec
-//MC_VM_XGMI_LFB_CNTL
-#define MC_VM_XGMI_LFB_CNTL__PF_LFB_REGION__SHIFT                                                             0x0
-#define MC_VM_XGMI_LFB_CNTL__PF_MAX_REGION__SHIFT                                                             0x4
-#define MC_VM_XGMI_LFB_CNTL__PF_LFB_REGION_MASK                                                               0x00000007L
-#define MC_VM_XGMI_LFB_CNTL__PF_MAX_REGION_MASK                                                               0x00000070L
-//MC_VM_XGMI_LFB_SIZE
-#define MC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE__SHIFT                                                               0x0
-#define MC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE_MASK                                                                 0x0000FFFFL
-
-#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
index 4bcacf529852..991128bb9476 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
@@ -22,6 +22,9 @@
 #ifndef _nbio_7_4_0_SMN_HEADER
 #define _nbio_7_4_0_SMN_HEADER
 
+// addressBlock: nbio_nbif0_bif_ras_bif_ras_regblk
+// base address: 0x10100000
+#define smnBIFL_RAS_CENTRAL_STATUS			0x10139040
 
 #define smnNBIF_MGCG_CTRL_LCLK				0x1013a21c
 #define smnCPM_CONTROL					0x11180460
@@ -53,4 +56,13 @@
 #define smnPCIE_RX_NUM_NAK				0x11180038
 #define smnPCIE_RX_NUM_NAK_GENERATED			0x1118003c
 
+// addressBlock: nbio_iohub_nb_misc_misc_cfgdec
+// base address: 0x13a10000
+#define smnIOHC_INTERRUPT_EOI				0x13a10120
+
+// addressBlock: nbio_iohub_nb_rascfg_ras_cfgdec
+// base address: 0x13a20000
+#define smnRAS_GLOBAL_STATUS_LO				0x13a20020
+#define smnRAS_GLOBAL_STATUS_HI				0x13a20024
+
 #endif	// _nbio_7_4_0_SMN_HEADER
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h
index 994e796a28d7..ce5830ebe095 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h
@@ -2793,8 +2793,8 @@
 #define mmBIF_DOORBELL_INT_CNTL_BASE_IDX                                                               2
 #define mmBIF_FB_EN                                                                                    0x00ff
 #define mmBIF_FB_EN_BASE_IDX                                                                           2
-#define mmBIF_BUSY_DELAY_CNTR                                                                          0x0100
-#define mmBIF_BUSY_DELAY_CNTR_BASE_IDX                                                                 2
+#define mmBIF_INTR_CNTL                                                                                0x0100
+#define mmBIF_INTR_CNTL_BASE_IDX                                                                       2
 #define mmBIF_MST_TRANS_PENDING_VF                                                                     0x0109
 #define mmBIF_MST_TRANS_PENDING_VF_BASE_IDX                                                            2
 #define mmBIF_SLV_TRANS_PENDING_VF                                                                     0x010a
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h
index d467b939c971..07f04b2b5bdd 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h
@@ -20420,9 +20420,9 @@
 #define BIF_FB_EN__FB_WRITE_EN__SHIFT                                                                         0x1
 #define BIF_FB_EN__FB_READ_EN_MASK                                                                            0x00000001L
 #define BIF_FB_EN__FB_WRITE_EN_MASK                                                                           0x00000002L
-//BIF_BUSY_DELAY_CNTR
-#define BIF_BUSY_DELAY_CNTR__DELAY_CNT__SHIFT                                                                 0x0
-#define BIF_BUSY_DELAY_CNTR__DELAY_CNT_MASK                                                                   0x0000003FL
+//BIF_INTR_CNTL
+#define BIF_INTR_CNTL__RAS_INTR_VEC_SEL__SHIFT                                                                0x0
+#define BIF_INTR_CNTL__RAS_INTR_VEC_SEL_MASK                                                                  0x00000001L
 //BIF_MST_TRANS_PENDING_VF
 #define BIF_MST_TRANS_PENDING_VF__BIF_MST_TRANS_PENDING__SHIFT                                                0x0
 #define BIF_MST_TRANS_PENDING_VF__BIF_MST_TRANS_PENDING_MASK                                                  0x7FFFFFFFL
@@ -48436,4 +48436,47 @@
 #define RCC_DEV0_EPF0_VF15_GFXMSIX_PBA__MSIX_PENDING_BITS_1_MASK                                              0x00000002L
 #define RCC_DEV0_EPF0_VF15_GFXMSIX_PBA__MSIX_PENDING_BITS_2_MASK                                              0x00000004L
 
+//IOHC_INTERRUPT_EOI
+#define IOHC_INTERRUPT_EOI__SMI_EOI__SHIFT                                                                    0x0
+#define IOHC_INTERRUPT_EOI__SCI_EOI__SHIFT                                                                    0x1
+#define IOHC_INTERRUPT_EOI__NMI_EOI__SHIFT                                                                    0x2
+#define IOHC_INTERRUPT_EOI__SMI_EOI_MASK                                                                      0x00000001L
+#define IOHC_INTERRUPT_EOI__SCI_EOI_MASK                                                                      0x00000002L
+#define IOHC_INTERRUPT_EOI__NMI_EOI_MASK                                                                      0x00000004L
+
+//RAS_GLOBAL_STATUS_LO
+#define RAS_GLOBAL_STATUS_LO__ParityErrCorr__SHIFT                                                            0x0
+#define RAS_GLOBAL_STATUS_LO__ParityErrNonFatal__SHIFT                                                        0x1
+#define RAS_GLOBAL_STATUS_LO__ParityErrFatal__SHIFT                                                           0x2
+#define RAS_GLOBAL_STATUS_LO__ParityErrSerr__SHIFT                                                            0x3
+#define RAS_GLOBAL_STATUS_LO__HPLGWA_NMI__SHIFT                                                               0x6
+#define RAS_GLOBAL_STATUS_LO__HPLGWA_SCI__SHIFT                                                               0x7
+#define RAS_GLOBAL_STATUS_LO__HPLGWA_SMI__SHIFT                                                               0x8
+#define RAS_GLOBAL_STATUS_LO__SW_SMI__SHIFT                                                                   0x9
+#define RAS_GLOBAL_STATUS_LO__SW_SCI__SHIFT                                                                   0xa
+#define RAS_GLOBAL_STATUS_LO__SW_NMI__SHIFT                                                                   0xb
+#define RAS_GLOBAL_STATUS_LO__APML_NMI__SHIFT                                                                 0xc
+#define RAS_GLOBAL_STATUS_LO__APML_SyncFld__SHIFT                                                             0xd
+#define RAS_GLOBAL_STATUS_LO__PIN_SyncFld_NMI__SHIFT                                                          0xe
+#define RAS_GLOBAL_STATUS_LO__APML_SyncFld_Private__SHIFT                                                     0xf
+#define RAS_GLOBAL_STATUS_LO__ParityErrCorr_MASK                                                              0x00000001L
+#define RAS_GLOBAL_STATUS_LO__ParityErrNonFatal_MASK                                                          0x00000002L
+#define RAS_GLOBAL_STATUS_LO__ParityErrFatal_MASK                                                             0x00000004L
+#define RAS_GLOBAL_STATUS_LO__ParityErrSerr_MASK                                                              0x00000008L
+#define RAS_GLOBAL_STATUS_LO__HPLGWA_NMI_MASK                                                                 0x00000040L
+#define RAS_GLOBAL_STATUS_LO__HPLGWA_SCI_MASK                                                                 0x00000080L
+#define RAS_GLOBAL_STATUS_LO__HPLGWA_SMI_MASK                                                                 0x00000100L
+#define RAS_GLOBAL_STATUS_LO__SW_SMI_MASK                                                                     0x00000200L
+#define RAS_GLOBAL_STATUS_LO__SW_SCI_MASK                                                                     0x00000400L
+#define RAS_GLOBAL_STATUS_LO__SW_NMI_MASK                                                                     0x00000800L
+#define RAS_GLOBAL_STATUS_LO__APML_NMI_MASK                                                                   0x00001000L
+#define RAS_GLOBAL_STATUS_LO__APML_SyncFld_MASK                                                               0x00002000L
+#define RAS_GLOBAL_STATUS_LO__PIN_SyncFld_NMI_MASK                                                            0x00004000L
+#define RAS_GLOBAL_STATUS_LO__APML_SyncFld_Private_MASK                                                       0x00008000L
+//RAS_GLOBAL_STATUS_HI
+#define RAS_GLOBAL_STATUS_HI__PCIE0PortAErr__SHIFT                                                            0x0
+#define RAS_GLOBAL_STATUS_HI__NBIF0PortAErr__SHIFT                                                            0x1
+#define RAS_GLOBAL_STATUS_HI__PCIE0PortAErr_MASK                                                              0x00000001L
+#define RAS_GLOBAL_STATUS_HI__NBIF0PortAErr_MASK                                                              0x00000002L
+
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_0_sh_mask.h
index dc9895a684fe..096d878eb1de 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_0_sh_mask.h
@@ -588,11 +588,15 @@
 #define IH_STORM_CLIENT_LIST_CNTL__CLIENT30_IS_STORM_CLIENT_MASK                                              0x40000000L
 #define IH_STORM_CLIENT_LIST_CNTL__CLIENT31_IS_STORM_CLIENT_MASK                                              0x80000000L
 //IH_CLK_CTRL
+#define IH_CLK_CTRL__IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE__SHIFT                                             0x19
+#define IH_CLK_CTRL__IH_BUFFER_MEM_CLK_SOFT_OVERRIDE__SHIFT                                             		  0x1a
 #define IH_CLK_CTRL__DBUS_MUX_CLK_SOFT_OVERRIDE__SHIFT                                                        0x1b
 #define IH_CLK_CTRL__OSSSYS_SHARE_CLK_SOFT_OVERRIDE__SHIFT                                                    0x1c
 #define IH_CLK_CTRL__LIMIT_SMN_CLK_SOFT_OVERRIDE__SHIFT                                                       0x1d
 #define IH_CLK_CTRL__DYN_CLK_SOFT_OVERRIDE__SHIFT                                                             0x1e
 #define IH_CLK_CTRL__REG_CLK_SOFT_OVERRIDE__SHIFT                                                             0x1f
+#define IH_CLK_CTRL__IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE_MASK                                              0x02000000L
+#define IH_CLK_CTRL__IH_BUFFER_MEM_CLK_SOFT_OVERRIDE_MASK                                             		  0x04000000L
 #define IH_CLK_CTRL__DBUS_MUX_CLK_SOFT_OVERRIDE_MASK                                                          0x08000000L
 #define IH_CLK_CTRL__OSSSYS_SHARE_CLK_SOFT_OVERRIDE_MASK                                                      0x10000000L
 #define IH_CLK_CTRL__LIMIT_SMN_CLK_SOFT_OVERRIDE_MASK                                                         0x20000000L
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h
index dbc2e723f659..71169daa701a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h
@@ -49,6 +49,7 @@
 #define ixCG_SPLL_FUNC_CNTL_5                                                   0xc0500150
 #define ixCG_SPLL_FUNC_CNTL_6                                                   0xc0500154
 #define ixCG_SPLL_FUNC_CNTL_7                                                   0xc0500158
+#define ixCG_SPLL_STATUS                                                        0xC050015C
 #define ixSPLL_CNTL_MODE                                                        0xc0500160
 #define ixCG_SPLL_SPREAD_SPECTRUM                                               0xc0500164
 #define ixCG_SPLL_SPREAD_SPECTRUM_2                                             0xc0500168
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h
index 6af9f0217b34..61a9a84e0c3a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h
@@ -194,6 +194,8 @@
 #define CG_SPLL_FUNC_CNTL_6__SPLL_LF_CNTR__SHIFT 0x19
 #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL_MASK 0xfff
 #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL__SHIFT 0x0
+#define CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK 0x2
+#define CG_SPLL_STATUS__SPLL_CHG_STATUS__SHIFT 0x1
 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK 0x1
 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL__SHIFT 0x0
 #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV_MASK 0x2
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h
index bd3685166779..351446754c72 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h
@@ -49,6 +49,7 @@
 #define ixCG_SPLL_FUNC_CNTL_5                                                   0xc0500150
 #define ixCG_SPLL_FUNC_CNTL_6                                                   0xc0500154
 #define ixCG_SPLL_FUNC_CNTL_7                                                   0xc0500158
+#define ixCG_SPLL_STATUS                                                        0xC050015C
 #define ixSPLL_CNTL_MODE                                                        0xc0500160
 #define ixCG_SPLL_SPREAD_SPECTRUM                                               0xc0500164
 #define ixCG_SPLL_SPREAD_SPECTRUM_2                                             0xc0500168
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h
index 627906674fe8..4bfd5f8ba66c 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h
@@ -194,6 +194,8 @@
 #define CG_SPLL_FUNC_CNTL_6__SPLL_LF_CNTR__SHIFT 0x19
 #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL_MASK 0xfff
 #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL__SHIFT 0x0
+#define CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK 0x2
+#define CG_SPLL_STATUS__SPLL_CHG_STATUS__SHIFT 0x1
 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK 0x1
 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL__SHIFT 0x0
 #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV_MASK 0x2
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h
index f35aba72e640..21da61c398f5 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h
@@ -52,6 +52,7 @@
 #define ixCG_SPLL_FUNC_CNTL_5                                                   0xc0500150
 #define ixCG_SPLL_FUNC_CNTL_6                                                   0xc0500154
 #define ixCG_SPLL_FUNC_CNTL_7                                                   0xc0500158
+#define ixCG_SPLL_STATUS                                                        0xC050015C
 #define ixSPLL_CNTL_MODE                                                        0xc0500160
 #define ixCG_SPLL_SPREAD_SPECTRUM                                               0xc0500164
 #define ixCG_SPLL_SPREAD_SPECTRUM_2                                             0xc0500168
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h
index 481ee6560aa9..f64fe0fbcb32 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h
@@ -220,6 +220,8 @@
 #define CG_SPLL_FUNC_CNTL_6__SPLL_LF_CNTR__SHIFT 0x19
 #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL_MASK 0xfff
 #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL__SHIFT 0x0
+#define CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK 0x2
+#define CG_SPLL_STATUS__SPLL_CHG_STATUS__SHIFT 0x1
 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK 0x1
 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL__SHIFT 0x0
 #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV_MASK 0x2
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_offset.h
index d3876052562b..687d6843c258 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_offset.h
@@ -121,6 +121,98 @@
 #define mmCKSVII2C_IC_COMP_VERSION_BASE_IDX                                                            0
 #define mmCKSVII2C_IC_COMP_TYPE                                                                        0x006d
 #define mmCKSVII2C_IC_COMP_TYPE_BASE_IDX                                                               0
+#define mmCKSVII2C1_IC_CON                                                                             0x0080
+#define mmCKSVII2C1_IC_CON_BASE_IDX                                                                    0
+#define mmCKSVII2C1_IC_TAR                                                                             0x0081
+#define mmCKSVII2C1_IC_TAR_BASE_IDX                                                                    0
+#define mmCKSVII2C1_IC_SAR                                                                             0x0082
+#define mmCKSVII2C1_IC_SAR_BASE_IDX                                                                    0
+#define mmCKSVII2C1_IC_HS_MADDR                                                                        0x0083
+#define mmCKSVII2C1_IC_HS_MADDR_BASE_IDX                                                               0
+#define mmCKSVII2C1_IC_DATA_CMD                                                                        0x0084
+#define mmCKSVII2C1_IC_DATA_CMD_BASE_IDX                                                               0
+#define mmCKSVII2C1_IC_SS_SCL_HCNT                                                                     0x0085
+#define mmCKSVII2C1_IC_SS_SCL_HCNT_BASE_IDX                                                            0
+#define mmCKSVII2C1_IC_SS_SCL_LCNT                                                                     0x0086
+#define mmCKSVII2C1_IC_SS_SCL_LCNT_BASE_IDX                                                            0
+#define mmCKSVII2C1_IC_FS_SCL_HCNT                                                                     0x0087
+#define mmCKSVII2C1_IC_FS_SCL_HCNT_BASE_IDX                                                            0
+#define mmCKSVII2C1_IC_FS_SCL_LCNT                                                                     0x0088
+#define mmCKSVII2C1_IC_FS_SCL_LCNT_BASE_IDX                                                            0
+#define mmCKSVII2C1_IC_HS_SCL_HCNT                                                                     0x0089
+#define mmCKSVII2C1_IC_HS_SCL_HCNT_BASE_IDX                                                            0
+#define mmCKSVII2C1_IC_HS_SCL_LCNT                                                                     0x008a
+#define mmCKSVII2C1_IC_HS_SCL_LCNT_BASE_IDX                                                            0
+#define mmCKSVII2C1_IC_INTR_STAT                                                                       0x008b
+#define mmCKSVII2C1_IC_INTR_STAT_BASE_IDX                                                              0
+#define mmCKSVII2C1_IC_INTR_MASK                                                                       0x008c
+#define mmCKSVII2C1_IC_INTR_MASK_BASE_IDX                                                              0
+#define mmCKSVII2C1_IC_RAW_INTR_STAT                                                                   0x008d
+#define mmCKSVII2C1_IC_RAW_INTR_STAT_BASE_IDX                                                          0
+#define mmCKSVII2C1_IC_RX_TL                                                                           0x008e
+#define mmCKSVII2C1_IC_RX_TL_BASE_IDX                                                                  0
+#define mmCKSVII2C1_IC_TX_TL                                                                           0x008f
+#define mmCKSVII2C1_IC_TX_TL_BASE_IDX                                                                  0
+#define mmCKSVII2C1_IC_CLR_INTR                                                                        0x0090
+#define mmCKSVII2C1_IC_CLR_INTR_BASE_IDX                                                               0
+#define mmCKSVII2C1_IC_CLR_RX_UNDER                                                                    0x0091
+#define mmCKSVII2C1_IC_CLR_RX_UNDER_BASE_IDX                                                           0
+#define mmCKSVII2C1_IC_CLR_RX_OVER                                                                     0x0092
+#define mmCKSVII2C1_IC_CLR_RX_OVER_BASE_IDX                                                            0
+#define mmCKSVII2C1_IC_CLR_TX_OVER                                                                     0x0093
+#define mmCKSVII2C1_IC_CLR_TX_OVER_BASE_IDX                                                            0
+#define mmCKSVII2C1_IC_CLR_RD_REQ                                                                      0x0094
+#define mmCKSVII2C1_IC_CLR_RD_REQ_BASE_IDX                                                             0
+#define mmCKSVII2C1_IC_CLR_TX_ABRT                                                                     0x0095
+#define mmCKSVII2C1_IC_CLR_TX_ABRT_BASE_IDX                                                            0
+#define mmCKSVII2C1_IC_CLR_RX_DONE                                                                     0x0096
+#define mmCKSVII2C1_IC_CLR_RX_DONE_BASE_IDX                                                            0
+#define mmCKSVII2C1_IC_CLR_ACTIVITY                                                                    0x0097
+#define mmCKSVII2C1_IC_CLR_ACTIVITY_BASE_IDX                                                           0
+#define mmCKSVII2C1_IC_CLR_STOP_DET                                                                    0x0098
+#define mmCKSVII2C1_IC_CLR_STOP_DET_BASE_IDX                                                           0
+#define mmCKSVII2C1_IC_CLR_START_DET                                                                   0x0099
+#define mmCKSVII2C1_IC_CLR_START_DET_BASE_IDX                                                          0
+#define mmCKSVII2C1_IC_CLR_GEN_CALL                                                                    0x009a
+#define mmCKSVII2C1_IC_CLR_GEN_CALL_BASE_IDX                                                           0
+#define mmCKSVII2C1_IC_ENABLE                                                                          0x009b
+#define mmCKSVII2C1_IC_ENABLE_BASE_IDX                                                                 0
+#define mmCKSVII2C1_IC_STATUS                                                                          0x009c
+#define mmCKSVII2C1_IC_STATUS_BASE_IDX                                                                 0
+#define mmCKSVII2C1_IC_TXFLR                                                                           0x009d
+#define mmCKSVII2C1_IC_TXFLR_BASE_IDX                                                                  0
+#define mmCKSVII2C1_IC_RXFLR                                                                           0x009e
+#define mmCKSVII2C1_IC_RXFLR_BASE_IDX                                                                  0
+#define mmCKSVII2C1_IC_SDA_HOLD                                                                        0x009f
+#define mmCKSVII2C1_IC_SDA_HOLD_BASE_IDX                                                               0
+#define mmCKSVII2C1_IC_TX_ABRT_SOURCE                                                                  0x00a0
+#define mmCKSVII2C1_IC_TX_ABRT_SOURCE_BASE_IDX                                                         0
+#define mmCKSVII2C1_IC_SLV_DATA_NACK_ONLY                                                              0x00a1
+#define mmCKSVII2C1_IC_SLV_DATA_NACK_ONLY_BASE_IDX                                                     0
+#define mmCKSVII2C1_IC_DMA_CR                                                                          0x00a2
+#define mmCKSVII2C1_IC_DMA_CR_BASE_IDX                                                                 0
+#define mmCKSVII2C1_IC_DMA_TDLR                                                                        0x00a3
+#define mmCKSVII2C1_IC_DMA_TDLR_BASE_IDX                                                               0
+#define mmCKSVII2C1_IC_DMA_RDLR                                                                        0x00a4
+#define mmCKSVII2C1_IC_DMA_RDLR_BASE_IDX                                                               0
+#define mmCKSVII2C1_IC_SDA_SETUP                                                                       0x00a5
+#define mmCKSVII2C1_IC_SDA_SETUP_BASE_IDX                                                              0
+#define mmCKSVII2C1_IC_ACK_GENERAL_CALL                                                                0x00a6
+#define mmCKSVII2C1_IC_ACK_GENERAL_CALL_BASE_IDX                                                       0
+#define mmCKSVII2C1_IC_ENABLE_STATUS                                                                   0x00a7
+#define mmCKSVII2C1_IC_ENABLE_STATUS_BASE_IDX                                                          0
+#define mmCKSVII2C1_IC_FS_SPKLEN                                                                       0x00a8
+#define mmCKSVII2C1_IC_FS_SPKLEN_BASE_IDX                                                              0
+#define mmCKSVII2C1_IC_HS_SPKLEN                                                                       0x00a9
+#define mmCKSVII2C1_IC_HS_SPKLEN_BASE_IDX                                                              0
+#define mmCKSVII2C1_IC_CLR_RESTART_DET                                                                 0x00aa
+#define mmCKSVII2C1_IC_CLR_RESTART_DET_BASE_IDX                                                        0
+#define mmCKSVII2C1_IC_COMP_PARAM_1                                                                    0x00ab
+#define mmCKSVII2C1_IC_COMP_PARAM_1_BASE_IDX                                                           0
+#define mmCKSVII2C1_IC_COMP_VERSION                                                                    0x00ac
+#define mmCKSVII2C1_IC_COMP_VERSION_BASE_IDX                                                           0
+#define mmCKSVII2C1_IC_COMP_TYPE                                                                       0x00ad
+#define mmCKSVII2C1_IC_COMP_TYPE_BASE_IDX                                                              0
 #define mmSMUIO_MP_RESET_INTR                                                                          0x00c1
 #define mmSMUIO_MP_RESET_INTR_BASE_IDX                                                                 0
 #define mmSMUIO_SOC_HALT                                                                               0x00c2
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_sh_mask.h
index f8afa3518bf2..6905a9618127 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_sh_mask.h
@@ -268,6 +268,182 @@
 //CKSVII2C_IC_COMP_TYPE
 #define CKSVII2C_IC_COMP_TYPE__COMP_TYPE__SHIFT                                                               0x0
 #define CKSVII2C_IC_COMP_TYPE__COMP_TYPE_MASK                                                                 0xFFFFFFFFL
+//CKSVII2C1_IC_CON
+#define CKSVII2C1_IC_CON__IC1_MASTER_MODE__SHIFT                                                              0x0
+#define CKSVII2C1_IC_CON__IC1_MAX_SPEED_MODE__SHIFT                                                           0x1
+#define CKSVII2C1_IC_CON__IC1_10BITADDR_SLAVE__SHIFT                                                          0x3
+#define CKSVII2C1_IC_CON__IC1_10BITADDR_MASTER__SHIFT                                                         0x4
+#define CKSVII2C1_IC_CON__IC1_RESTART_EN__SHIFT                                                               0x5
+#define CKSVII2C1_IC_CON__IC1_SLAVE_DISABLE__SHIFT                                                            0x6
+#define CKSVII2C1_IC_CON__STOP1_DET_IFADDRESSED__SHIFT                                                        0x7
+#define CKSVII2C1_IC_CON__TX1_EMPTY_CTRL__SHIFT                                                               0x8
+#define CKSVII2C1_IC_CON__RX1_FIFO_FULL_HLD_CTRL__SHIFT                                                       0x9
+#define CKSVII2C1_IC_CON__IC1_MASTER_MODE_MASK                                                                0x00000001L
+#define CKSVII2C1_IC_CON__IC1_MAX_SPEED_MODE_MASK                                                             0x00000006L
+#define CKSVII2C1_IC_CON__IC1_10BITADDR_SLAVE_MASK                                                            0x00000008L
+#define CKSVII2C1_IC_CON__IC1_10BITADDR_MASTER_MASK                                                           0x00000010L
+#define CKSVII2C1_IC_CON__IC1_RESTART_EN_MASK                                                                 0x00000020L
+#define CKSVII2C1_IC_CON__IC1_SLAVE_DISABLE_MASK                                                              0x00000040L
+#define CKSVII2C1_IC_CON__STOP1_DET_IFADDRESSED_MASK                                                          0x00000080L
+#define CKSVII2C1_IC_CON__TX1_EMPTY_CTRL_MASK                                                                 0x00000100L
+#define CKSVII2C1_IC_CON__RX1_FIFO_FULL_HLD_CTRL_MASK                                                         0x00000200L
+//CKSVII2C1_IC_TAR
+#define CKSVII2C1_IC_TAR__IC1_TAR__SHIFT                                                                      0x0
+#define CKSVII2C1_IC_TAR__GC1_OR_START__SHIFT                                                                 0xa
+#define CKSVII2C1_IC_TAR__SPECIAL1__SHIFT                                                                     0xb
+#define CKSVII2C1_IC_TAR__IC1_10BITADDR_MASTER__SHIFT                                                         0xc
+#define CKSVII2C1_IC_TAR__IC1_TAR_MASK                                                                        0x000003FFL
+#define CKSVII2C1_IC_TAR__GC1_OR_START_MASK                                                                   0x00000400L
+#define CKSVII2C1_IC_TAR__SPECIAL1_MASK                                                                       0x00000800L
+#define CKSVII2C1_IC_TAR__IC1_10BITADDR_MASTER_MASK                                                           0x00001000L
+//CKSVII2C1_IC_SAR
+#define CKSVII2C1_IC_SAR__IC1_SAR__SHIFT                                                                      0x0
+#define CKSVII2C1_IC_SAR__IC1_SAR_MASK                                                                        0x000003FFL
+//CKSVII2C1_IC_HS_MADDR
+#define CKSVII2C1_IC_HS_MADDR__IC1_HS_MADDR__SHIFT                                                            0x0
+#define CKSVII2C1_IC_HS_MADDR__IC1_HS_MADDR_MASK                                                              0x00000007L
+//CKSVII2C1_IC_DATA_CMD
+#define CKSVII2C1_IC_DATA_CMD__DAT1__SHIFT                                                                    0x0
+#define CKSVII2C1_IC_DATA_CMD__CMD1__SHIFT                                                                    0x8
+#define CKSVII2C1_IC_DATA_CMD__STOP1__SHIFT                                                                   0x9
+#define CKSVII2C1_IC_DATA_CMD__RESTART1__SHIFT                                                                0xa
+#define CKSVII2C1_IC_DATA_CMD__DAT1_MASK                                                                      0x000000FFL
+#define CKSVII2C1_IC_DATA_CMD__CMD1_MASK                                                                      0x00000100L
+#define CKSVII2C1_IC_DATA_CMD__STOP1_MASK                                                                     0x00000200L
+#define CKSVII2C1_IC_DATA_CMD__RESTART1_MASK                                                                  0x00000400L
+//CKSVII2C1_IC_SS_SCL_HCNT
+#define CKSVII2C1_IC_SS_SCL_HCNT__IC1_SS_SCL_HCNT__SHIFT                                                      0x0
+#define CKSVII2C1_IC_SS_SCL_HCNT__IC1_SS_SCL_HCNT_MASK                                                        0x0000FFFFL
+//CKSVII2C1_IC_SS_SCL_LCNT
+#define CKSVII2C1_IC_SS_SCL_LCNT__IC1_SS_SCL_LCNT__SHIFT                                                      0x0
+#define CKSVII2C1_IC_SS_SCL_LCNT__IC1_SS_SCL_LCNT_MASK                                                        0x0000FFFFL
+//CKSVII2C1_IC_FS_SCL_HCNT
+#define CKSVII2C1_IC_FS_SCL_HCNT__IC1_FS_SCL_HCNT__SHIFT                                                      0x0
+#define CKSVII2C1_IC_FS_SCL_HCNT__IC1_FS_SCL_HCNT_MASK                                                        0x0000FFFFL
+//CKSVII2C1_IC_FS_SCL_LCNT
+#define CKSVII2C1_IC_FS_SCL_LCNT__IC1_FS_SCL_LCNT__SHIFT                                                      0x0
+#define CKSVII2C1_IC_FS_SCL_LCNT__IC1_FS_SCL_LCNT_MASK                                                        0x0000FFFFL
+//CKSVII2C1_IC_HS_SCL_HCNT
+#define CKSVII2C1_IC_HS_SCL_HCNT__IC1_HS_SCL_HCNT__SHIFT                                                      0x0
+#define CKSVII2C1_IC_HS_SCL_HCNT__IC1_HS_SCL_HCNT_MASK                                                        0x0000FFFFL
+//CKSVII2C1_IC_HS_SCL_LCNT
+#define CKSVII2C1_IC_HS_SCL_LCNT__IC1_HS_SCL_LCNT__SHIFT                                                      0x0
+#define CKSVII2C1_IC_HS_SCL_LCNT__IC1_HS_SCL_LCNT_MASK                                                        0x0000FFFFL
+//CKSVII2C1_IC_INTR_STAT
+#define CKSVII2C1_IC_INTR_STAT__R1_RX_UNDER__SHIFT                                                            0x0
+#define CKSVII2C1_IC_INTR_STAT__R1_RX_OVER__SHIFT                                                             0x1
+#define CKSVII2C1_IC_INTR_STAT__R1_RX_FULL__SHIFT                                                             0x2
+#define CKSVII2C1_IC_INTR_STAT__R1_TX_OVER__SHIFT                                                             0x3
+#define CKSVII2C1_IC_INTR_STAT__R1_TX_EMPTY__SHIFT                                                            0x4
+#define CKSVII2C1_IC_INTR_STAT__R1_RD_REQ__SHIFT                                                              0x5
+#define CKSVII2C1_IC_INTR_STAT__R1_TX_ABRT__SHIFT                                                             0x6
+#define CKSVII2C1_IC_INTR_STAT__R1_RX_DONE__SHIFT                                                             0x7
+#define CKSVII2C1_IC_INTR_STAT__R1_ACTIVITY__SHIFT                                                            0x8
+#define CKSVII2C1_IC_INTR_STAT__R1_STOP_DET__SHIFT                                                            0x9
+#define CKSVII2C1_IC_INTR_STAT__R1_START_DET__SHIFT                                                           0xa
+#define CKSVII2C1_IC_INTR_STAT__R1_GEN_CALL__SHIFT                                                            0xb
+#define CKSVII2C1_IC_INTR_STAT__R1_RESTART_DET__SHIFT                                                         0xc
+#define CKSVII2C1_IC_INTR_STAT__R1_MST_ON_HOLD__SHIFT                                                         0xd
+#define CKSVII2C1_IC_INTR_STAT__R1_RX_UNDER_MASK                                                              0x00000001L
+#define CKSVII2C1_IC_INTR_STAT__R1_RX_OVER_MASK                                                               0x00000002L
+#define CKSVII2C1_IC_INTR_STAT__R1_RX_FULL_MASK                                                               0x00000004L
+#define CKSVII2C1_IC_INTR_STAT__R1_TX_OVER_MASK                                                               0x00000008L
+#define CKSVII2C1_IC_INTR_STAT__R1_TX_EMPTY_MASK                                                              0x00000010L
+#define CKSVII2C1_IC_INTR_STAT__R1_RD_REQ_MASK                                                                0x00000020L
+#define CKSVII2C1_IC_INTR_STAT__R1_TX_ABRT_MASK                                                               0x00000040L
+#define CKSVII2C1_IC_INTR_STAT__R1_RX_DONE_MASK                                                               0x00000080L
+#define CKSVII2C1_IC_INTR_STAT__R1_ACTIVITY_MASK                                                              0x00000100L
+#define CKSVII2C1_IC_INTR_STAT__R1_STOP_DET_MASK                                                              0x00000200L
+#define CKSVII2C1_IC_INTR_STAT__R1_START_DET_MASK                                                             0x00000400L
+#define CKSVII2C1_IC_INTR_STAT__R1_GEN_CALL_MASK                                                              0x00000800L
+#define CKSVII2C1_IC_INTR_STAT__R1_RESTART_DET_MASK                                                           0x00001000L
+#define CKSVII2C1_IC_INTR_STAT__R1_MST_ON_HOLD_MASK                                                           0x00002000L
+//CKSVII2C1_IC_INTR_MASK
+#define CKSVII2C1_IC_INTR_MASK__M1_RX_UNDER__SHIFT                                                            0x0
+#define CKSVII2C1_IC_INTR_MASK__M1_RX_OVER__SHIFT                                                             0x1
+#define CKSVII2C1_IC_INTR_MASK__M1_RX_FULL__SHIFT                                                             0x2
+#define CKSVII2C1_IC_INTR_MASK__M1_TX_OVER__SHIFT                                                             0x3
+#define CKSVII2C1_IC_INTR_MASK__M1_TX_EMPTY__SHIFT                                                            0x4
+#define CKSVII2C1_IC_INTR_MASK__M1_RD_REQ__SHIFT                                                              0x5
+#define CKSVII2C1_IC_INTR_MASK__M1_TX_ABRT__SHIFT                                                             0x6
+#define CKSVII2C1_IC_INTR_MASK__M1_RX_DONE__SHIFT                                                             0x7
+#define CKSVII2C1_IC_INTR_MASK__M1_ACTIVITY__SHIFT                                                            0x8
+#define CKSVII2C1_IC_INTR_MASK__M1_STOP_DET__SHIFT                                                            0x9
+#define CKSVII2C1_IC_INTR_MASK__M1_START_DET__SHIFT                                                           0xa
+#define CKSVII2C1_IC_INTR_MASK__M1_GEN_CALL__SHIFT                                                            0xb
+#define CKSVII2C1_IC_INTR_MASK__M1_RESTART_DET__SHIFT                                                         0xc
+#define CKSVII2C1_IC_INTR_MASK__M1_MST_ON_HOLD__SHIFT                                                         0xd
+#define CKSVII2C1_IC_INTR_MASK__M1_RX_UNDER_MASK                                                              0x00000001L
+#define CKSVII2C1_IC_INTR_MASK__M1_RX_OVER_MASK                                                               0x00000002L
+#define CKSVII2C1_IC_INTR_MASK__M1_RX_FULL_MASK                                                               0x00000004L
+#define CKSVII2C1_IC_INTR_MASK__M1_TX_OVER_MASK                                                               0x00000008L
+#define CKSVII2C1_IC_INTR_MASK__M1_TX_EMPTY_MASK                                                              0x00000010L
+#define CKSVII2C1_IC_INTR_MASK__M1_RD_REQ_MASK                                                                0x00000020L
+#define CKSVII2C1_IC_INTR_MASK__M1_TX_ABRT_MASK                                                               0x00000040L
+#define CKSVII2C1_IC_INTR_MASK__M1_RX_DONE_MASK                                                               0x00000080L
+#define CKSVII2C1_IC_INTR_MASK__M1_ACTIVITY_MASK                                                              0x00000100L
+#define CKSVII2C1_IC_INTR_MASK__M1_STOP_DET_MASK                                                              0x00000200L
+#define CKSVII2C1_IC_INTR_MASK__M1_START_DET_MASK                                                             0x00000400L
+#define CKSVII2C1_IC_INTR_MASK__M1_GEN_CALL_MASK                                                              0x00000800L
+#define CKSVII2C1_IC_INTR_MASK__M1_RESTART_DET_MASK                                                           0x00001000L
+#define CKSVII2C1_IC_INTR_MASK__M1_MST_ON_HOLD_MASK                                                           0x00002000L
+//CKSVII2C1_IC_RAW_INTR_STAT
+//CKSVII2C1_IC_RX_TL
+//CKSVII2C1_IC_TX_TL
+//CKSVII2C1_IC_CLR_INTR
+//CKSVII2C1_IC_CLR_RX_UNDER
+//CKSVII2C1_IC_CLR_RX_OVER
+//CKSVII2C1_IC_CLR_TX_OVER
+//CKSVII2C1_IC_CLR_RD_REQ
+//CKSVII2C1_IC_CLR_TX_ABRT
+//CKSVII2C1_IC_CLR_RX_DONE
+//CKSVII2C1_IC_CLR_ACTIVITY
+//CKSVII2C1_IC_CLR_STOP_DET
+//CKSVII2C1_IC_CLR_START_DET
+//CKSVII2C1_IC_CLR_GEN_CALL
+//CKSVII2C1_IC_ENABLE
+#define CKSVII2C1_IC_ENABLE__ENABLE1__SHIFT                                                                   0x0
+#define CKSVII2C1_IC_ENABLE__ABORT1__SHIFT                                                                    0x1
+#define CKSVII2C1_IC_ENABLE__ENABLE1_MASK                                                                     0x00000001L
+#define CKSVII2C1_IC_ENABLE__ABORT1_MASK                                                                      0x00000002L
+//CKSVII2C1_IC_STATUS
+#define CKSVII2C1_IC_STATUS__ACTIVITY1__SHIFT                                                                 0x0
+#define CKSVII2C1_IC_STATUS__TFNF1__SHIFT                                                                     0x1
+#define CKSVII2C1_IC_STATUS__TFE1__SHIFT                                                                      0x2
+#define CKSVII2C1_IC_STATUS__RFNE1__SHIFT                                                                     0x3
+#define CKSVII2C1_IC_STATUS__RFF1__SHIFT                                                                      0x4
+#define CKSVII2C1_IC_STATUS__MST1_ACTIVITY__SHIFT                                                             0x5
+#define CKSVII2C1_IC_STATUS__SLV1_ACTIVITY__SHIFT                                                             0x6
+#define CKSVII2C1_IC_STATUS__ACTIVITY1_MASK                                                                   0x00000001L
+#define CKSVII2C1_IC_STATUS__TFNF1_MASK                                                                       0x00000002L
+#define CKSVII2C1_IC_STATUS__TFE1_MASK                                                                        0x00000004L
+#define CKSVII2C1_IC_STATUS__RFNE1_MASK                                                                       0x00000008L
+#define CKSVII2C1_IC_STATUS__RFF1_MASK                                                                        0x00000010L
+#define CKSVII2C1_IC_STATUS__MST1_ACTIVITY_MASK                                                               0x00000020L
+#define CKSVII2C1_IC_STATUS__SLV1_ACTIVITY_MASK                                                               0x00000040L
+//CKSVII2C1_IC_TXFLR
+//CKSVII2C1_IC_RXFLR
+//CKSVII2C1_IC_SDA_HOLD
+#define CKSVII2C1_IC_SDA_HOLD__IC1_SDA_HOLD__SHIFT                                                            0x0
+#define CKSVII2C1_IC_SDA_HOLD__IC1_SDA_HOLD_MASK                                                              0x00FFFFFFL
+//CKSVII2C1_IC_TX_ABRT_SOURCE
+//CKSVII2C1_IC_SLV_DATA_NACK_ONLY
+//CKSVII2C1_IC_DMA_CR
+//CKSVII2C1_IC_DMA_TDLR
+//CKSVII2C1_IC_DMA_RDLR
+//CKSVII2C1_IC_SDA_SETUP
+#define CKSVII2C1_IC_SDA_SETUP__SDA1_SETUP__SHIFT                                                             0x0
+#define CKSVII2C1_IC_SDA_SETUP__SDA1_SETUP_MASK                                                               0x000000FFL
+//CKSVII2C1_IC_ACK_GENERAL_CALL
+#define CKSVII2C1_IC_ACK_GENERAL_CALL__ACK1_GENERAL_CALL__SHIFT                                               0x0
+#define CKSVII2C1_IC_ACK_GENERAL_CALL__ACK1_GENERAL_CALL_MASK                                                 0x00000001L
+//CKSVII2C1_IC_ENABLE_STATUS
+#define CKSVII2C1_IC_ENABLE_STATUS__IC1_EN__SHIFT                                                             0x0
+#define CKSVII2C1_IC_ENABLE_STATUS__SLV1_RX_ABORTED__SHIFT                                                    0x1
+#define CKSVII2C1_IC_ENABLE_STATUS__SLV1_FIFO_FILLED_AND_FLUSHED__SHIFT                                       0x2
+#define CKSVII2C1_IC_ENABLE_STATUS__IC1_EN_MASK                                                               0x00000001L
+#define CKSVII2C1_IC_ENABLE_STATUS__SLV1_RX_ABORTED_MASK                                                      0x00000002L
+#define CKSVII2C1_IC_ENABLE_STATUS__SLV1_FIFO_FILLED_AND_FLUSHED_MASK                                         0x00000004L
 //SMUIO_MP_RESET_INTR
 #define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR__SHIFT                                                       0x0
 #define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR_MASK                                                         0x00000001L
diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h
index 043aa695d63f..0d6b594be775 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h
@@ -27,5 +27,7 @@
 #define mmUMCCH0_0_EccErrCnt_BASE_IDX                                                                  0
 #define mmMCA_UMC_UMC0_MCUMC_STATUST0                                                                  0x03c2
 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_BASE_IDX                                                         0
+#define mmMCA_UMC_UMC0_MCUMC_ADDRT0                                                                    0x03c4
+#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_BASE_IDX                                                           0
 
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h
new file mode 100644
index 000000000000..ce005c674a18
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2019  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _umc_6_1_2_OFFSET_HEADER
+#define _umc_6_1_2_OFFSET_HEADER
+
+#define mmUMCCH0_0_EccErrCntSel_ARCT                                     0x0360
+#define mmUMCCH0_0_EccErrCntSel_ARCT_BASE_IDX                            1
+#define mmUMCCH0_0_EccErrCnt_ARCT                                        0x0361
+#define mmUMCCH0_0_EccErrCnt_ARCT_BASE_IDX                               1
+#define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT                               0x03c2
+#define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT_BASE_IDX                      1
+#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT                                 0x03c4
+#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT_BASE_IDX                        1
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h
new file mode 100644
index 000000000000..a5a8c993ec3a
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2020  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _umc_6_1_2_SH_MASK_HEADER
+#define _umc_6_1_2_SH_MASK_HEADER
+
+//UMCCH0_0_EccErrCntSel_ARCT
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntCsSel__SHIFT                                                          0x0
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrInt__SHIFT                                                               0xc
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntEn__SHIFT                                                             0xf
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntCsSel_MASK                                                            0x0000000FL
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrInt_MASK                                                                 0x00003000L
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntEn_MASK                                                               0x00008000L
+//UMCCH0_0_EccErrCnt_ARCT
+#define UMCCH0_0_EccErrCnt_ARCT__EccErrCnt__SHIFT                                                                  0x0
+#define UMCCH0_0_EccErrCnt_ARCT__EccErrCnt_MASK                                                                    0x0000FFFFL
+//MCA_UMC_UMC0_MCUMC_STATUST0_ARCT
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCode__SHIFT                                                         0x0
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCodeExt__SHIFT                                                      0x10
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV0__SHIFT                                                           0x16
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreId__SHIFT                                                         0x20
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV1__SHIFT                                                           0x26
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Scrub__SHIFT                                                             0x28
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV2__SHIFT                                                           0x29
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Poison__SHIFT                                                            0x2b
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Deferred__SHIFT                                                          0x2c
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UECC__SHIFT                                                              0x2d
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__CECC__SHIFT                                                              0x2e
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV3__SHIFT                                                           0x2f
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Transparent__SHIFT                                                       0x34
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__SyndV__SHIFT                                                             0x35
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV4__SHIFT                                                           0x36
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__TCC__SHIFT                                                               0x37
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreIdVal__SHIFT                                                      0x38
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__PCC__SHIFT                                                               0x39
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__AddrV__SHIFT                                                             0x3a
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__MiscV__SHIFT                                                             0x3b
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__En__SHIFT                                                                0x3c
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UC__SHIFT                                                                0x3d
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Overflow__SHIFT                                                          0x3e
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Val__SHIFT                                                               0x3f
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCode_MASK                                                           0x000000000000FFFFL
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCodeExt_MASK                                                        0x00000000003F0000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV0_MASK                                                             0x00000000FFC00000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreId_MASK                                                           0x0000003F00000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV1_MASK                                                             0x000000C000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Scrub_MASK                                                               0x0000010000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV2_MASK                                                             0x0000060000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Poison_MASK                                                              0x0000080000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Deferred_MASK                                                            0x0000100000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UECC_MASK                                                                0x0000200000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__CECC_MASK                                                                0x0000400000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV3_MASK                                                             0x000F800000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Transparent_MASK                                                         0x0010000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__SyndV_MASK                                                               0x0020000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV4_MASK                                                             0x0040000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__TCC_MASK                                                                 0x0080000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreIdVal_MASK                                                        0x0100000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__PCC_MASK                                                                 0x0200000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__AddrV_MASK                                                               0x0400000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__MiscV_MASK                                                               0x0800000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__En_MASK                                                                  0x1000000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UC_MASK                                                                  0x2000000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Overflow_MASK                                                            0x4000000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Val_MASK                                                                 0x8000000000000000L
+//MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__ErrorAddr__SHIFT                                                           0x0
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__LSB__SHIFT                                                                 0x38
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__Reserved__SHIFT                                                            0x3e
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__ErrorAddr_MASK                                                             0x00FFFFFFFFFFFFFFL
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__LSB_MASK                                                                   0x3F00000000000000L
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__Reserved_MASK                                                              0xC000000000000000L
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h
index cf2149cc12ee..90350f46a0c4 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h
@@ -24,6 +24,18 @@
 
 // addressBlock: uvd0_mmsch_dec
 // base address: 0x1e000
+#define mmMMSCH_VF_VMID                                                                                0x000b
+#define mmMMSCH_VF_VMID_BASE_IDX                                                                       0
+#define mmMMSCH_VF_CTX_ADDR_LO                                                                         0x000c
+#define mmMMSCH_VF_CTX_ADDR_LO_BASE_IDX                                                                0
+#define mmMMSCH_VF_CTX_ADDR_HI                                                                         0x000d
+#define mmMMSCH_VF_CTX_ADDR_HI_BASE_IDX                                                                0
+#define mmMMSCH_VF_CTX_SIZE                                                                            0x000e
+#define mmMMSCH_VF_CTX_SIZE_BASE_IDX                                                                   0
+#define mmMMSCH_VF_MAILBOX_HOST                                                                        0x0012
+#define mmMMSCH_VF_MAILBOX_HOST_BASE_IDX                                                               0
+#define mmMMSCH_VF_MAILBOX_RESP                                                                        0x0013
+#define mmMMSCH_VF_MAILBOX_RESP_BASE_IDX                                                               0
 
 
 // addressBlock: uvd0_jpegnpdec
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index e88541d67aa0..70146518174c 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -492,12 +492,13 @@ struct atom_firmware_info_v3_1
 /* Total 32bit cap indication */
 enum atombios_firmware_capability
 {
-  ATOM_FIRMWARE_CAP_FIRMWARE_POSTED = 0x00000001,
-  ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION  = 0x00000002,
-  ATOM_FIRMWARE_CAP_WMI_SUPPORT  = 0x00000040,
-  ATOM_FIRMWARE_CAP_HWEMU_ENABLE  = 0x00000080,
-  ATOM_FIRMWARE_CAP_HWEMU_UMC_CFG = 0x00000100,
-  ATOM_FIRMWARE_CAP_SRAM_ECC      = 0x00000200,
+	ATOM_FIRMWARE_CAP_FIRMWARE_POSTED = 0x00000001,
+	ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION  = 0x00000002,
+	ATOM_FIRMWARE_CAP_WMI_SUPPORT  = 0x00000040,
+	ATOM_FIRMWARE_CAP_HWEMU_ENABLE  = 0x00000080,
+	ATOM_FIRMWARE_CAP_HWEMU_UMC_CFG = 0x00000100,
+	ATOM_FIRMWARE_CAP_SRAM_ECC      = 0x00000200,
+	ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING  = 0x00000400,
 };
 
 enum atom_cooling_solution_id{
diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h
index 5dcb776548d8..7ec4331e67f2 100644
--- a/drivers/gpu/drm/amd/include/discovery.h
+++ b/drivers/gpu/drm/amd/include/discovery.h
@@ -25,7 +25,6 @@
 #define _DISCOVERY_H_
 
 #define PSP_HEADER_SIZE                 256
-#define BINARY_MAX_SIZE                 (64 << 10)
 #define BINARY_SIGNATURE                0x28211407
 #define DISCOVERY_TABLE_SIGNATURE       0x53445049
 
diff --git a/drivers/gpu/drm/amd/include/ivsrcid/nbio/irqsrcs_nbif_7_4.h b/drivers/gpu/drm/amd/include/ivsrcid/nbio/irqsrcs_nbif_7_4.h
new file mode 100644
index 000000000000..79af4258f259
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/ivsrcid/nbio/irqsrcs_nbif_7_4.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __IRQSRCS_NBIF_7_4_H__
+#define __IRQSRCS_NBIF_7_4_H__
+
+#define NBIF_7_4__SRCID__CHIP_ERR_INT_EVENT            0x5E        // Error generated
+#define NBIF_7_4__SRCID__DOORBELL_INTERRUPT            0x5F        // Interrupt for doorbell event during VDDGFX off
+#define NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT      0x60        // Interrupt for ras_intr_valid from RAS controller
+#define NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT      0x61        // Interrupt for SDP ErrEvent received from ATHUB
+#define NBIF_7_4__SRCID__PF_VF_MSGBUF_VALID            0x87        // Valid message in PF->VF mailbox message buffer (The interrupt is sent on behalf of PF)
+#define NBIF_7_4__SRCID__PF_VF_MSGBUF_ACK              0x88        // Acknowledge message in PF->VF mailbox message buffer (The interrupt is sent on behalf of VF)
+#define NBIF_7_4__SRCID__VF_PF_MSGBUF_VALID            0x89        // Valid message in VF->PF mailbox message buffer (The interrupt is sent on behalf of VF)
+#define NBIF_7_4__SRCID__VF_PF_MSGBUF_ACK              0x8A        // Acknowledge message in VF->PF mailbox message buffer (The interrupt is sent on behalf of PF)
+#define NBIF_7_4__SRCID__CHIP_DPA_INT_EVENT            0xA0        // BIF_CHIP_DPA_INT_EVENT
+#define NBIF_7_4__SRCID__CHIP_SLOT_POWER_CHG_INT_EVENT 0xA1        // BIF_CHIP_SLOT_POWER_CHG_INT_EVENT
+#define NBIF_7_4__SRCID__ATOMIC_UR_OPCODE              0xCE        // BIF receives unsupported atomic opcode from MC
+#define NBIF_7_4__SRCID__ATOMIC_REQESTEREN_LOW         0xCF        // BIF receive atomic request from MC while AtomicOp Requester is not enabled in PCIE config space
+
+#endif // __IRQSRCS_NBIF_7_4_H__
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 98b9533e672b..a607b1034962 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -256,6 +256,10 @@ struct kfd2kgd_calls {
 			uint32_t wptr_shift, uint32_t wptr_mask,
 			struct mm_struct *mm);
 
+	int (*hiq_mqd_load)(struct kgd_dev *kgd, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off);
+
 	int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd,
 			     uint32_t __user *wptr, struct mm_struct *mm);
 
@@ -291,21 +295,22 @@ struct kfd2kgd_calls {
 	uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd,
 					unsigned int watch_point_id,
 					unsigned int reg_offset);
-	bool (*get_atc_vmid_pasid_mapping_valid)(
-					struct kgd_dev *kgd,
-					uint8_t vmid);
-	uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
+	bool (*get_atc_vmid_pasid_mapping_info)(
 					struct kgd_dev *kgd,
-					uint8_t vmid);
+					uint8_t vmid,
+					uint16_t *p_pasid);
 
+	/* No longer needed from GFXv9 onward. The scratch base address is
+	 * passed to the shader by the CP. It's the user mode driver's
+	 * responsibility.
+	 */
 	void (*set_scratch_backing_va)(struct kgd_dev *kgd,
 				uint64_t va, uint32_t vmid);
+
 	int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config);
 
 	void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
 			uint32_t vmid, uint64_t page_table_base);
-	int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
-	int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid);
 	uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
 	uint64_t (*get_hive_id)(struct kgd_dev *kgd);
 
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 27cf0afaa0b4..a7f92d0b3a90 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -179,6 +179,11 @@ enum pp_mp1_state {
 	PP_MP1_STATE_RESET,
 };
 
+enum pp_df_cstate {
+	DF_CSTATE_DISALLOW = 0,
+	DF_CSTATE_ALLOW,
+};
+
 #define PP_GROUP_MASK        0xF0000000
 #define PP_GROUP_SHIFT       28
 
@@ -215,6 +220,9 @@ enum pp_mp1_state {
 		((group) << PP_GROUP_SHIFT | (block) << PP_BLOCK_SHIFT | \
 		(support) << PP_STATE_SUPPORT_SHIFT | (state) << PP_STATE_SHIFT)
 
+#define XGMI_MODE_PSTATE_D3 0
+#define XGMI_MODE_PSTATE_D0 1
+
 struct seq_file;
 enum amd_pp_clock_type;
 struct amd_pp_simple_clock_info;
@@ -312,6 +320,8 @@ struct amd_pm_funcs {
 	int (*get_ppfeature_status)(void *handle, char *buf);
 	int (*set_ppfeature_status)(void *handle, uint64_t ppfeature_masks);
 	int (*asic_reset_mode_2)(void *handle);
+	int (*set_df_cstate)(void *handle, enum pp_df_cstate state);
+	int (*set_xgmi_pstate)(void *handle, uint32_t pstate);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/include/renoir_ip_offset.h b/drivers/gpu/drm/amd/include/renoir_ip_offset.h
index 094648cac392..07633e22e99a 100644
--- a/drivers/gpu/drm/amd/include/renoir_ip_offset.h
+++ b/drivers/gpu/drm/amd/include/renoir_ip_offset.h
@@ -169,6 +169,11 @@ static const struct IP_BASE NBIF0_BASE ={ { { { 0x00000000, 0x00000014, 0x00000D
                                         { { 0, 0, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE DCN_BASE   ={ { { { 0x00000012, 0x000000C0, 0x000034C0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0 } } } };
 static const struct IP_BASE OSSSYS_BASE ={ { { { 0x000010A0, 0x0240A000, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } },
@@ -1361,4 +1366,33 @@ static const struct IP_BASE UVD0_BASE ={ { { { 0x00007800, 0x00007E00, 0x0240300
 #define UVD0_BASE__INST6_SEG3                      0
 #define UVD0_BASE__INST6_SEG4                      0
 
+#define DCN_BASE__INST0_SEG0                      0x00000012
+#define DCN_BASE__INST0_SEG1                      0x000000C0
+#define DCN_BASE__INST0_SEG2                      0x000034C0
+#define DCN_BASE__INST0_SEG3                      0
+#define DCN_BASE__INST0_SEG4                      0
+
+#define DCN_BASE__INST1_SEG0                      0
+#define DCN_BASE__INST1_SEG1                      0
+#define DCN_BASE__INST1_SEG2                      0
+#define DCN_BASE__INST1_SEG3                      0
+#define DCN_BASE__INST1_SEG4                      0
+
+#define DCN_BASE__INST2_SEG0                      0
+#define DCN_BASE__INST2_SEG1                      0
+#define DCN_BASE__INST2_SEG2                      0
+#define DCN_BASE__INST2_SEG3                      0
+#define DCN_BASE__INST2_SEG4                      0
+
+#define DCN_BASE__INST3_SEG0                      0
+#define DCN_BASE__INST3_SEG1                      0
+#define DCN_BASE__INST3_SEG2                      0
+#define DCN_BASE__INST3_SEG3                      0
+#define DCN_BASE__INST3_SEG4                      0
+
+#define DCN_BASE__INST4_SEG0                      0
+#define DCN_BASE__INST4_SEG1                      0
+#define DCN_BASE__INST4_SEG2                      0
+#define DCN_BASE__INST4_SEG3                      0
+#define DCN_BASE__INST4_SEG4                      0
 #endif
diff --git a/drivers/gpu/drm/amd/include/vega10_enum.h b/drivers/gpu/drm/amd/include/vega10_enum.h
index c14ba65a2415..adf1b754666e 100644
--- a/drivers/gpu/drm/amd/include/vega10_enum.h
+++ b/drivers/gpu/drm/amd/include/vega10_enum.h
@@ -1037,6 +1037,7 @@ TCC_CACHE_POLICY_STREAM                  = 0x00000001,
 typedef enum MTYPE {
 MTYPE_NC                                 = 0x00000000,
 MTYPE_WC                                 = 0x00000001,
+MTYPE_RW                                 = 0x00000001,
 MTYPE_CC                                 = 0x00000002,
 MTYPE_UC                                 = 0x00000003,
 } MTYPE;
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index fa8ad7db2b3a..c195575366a3 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -48,7 +48,6 @@ static int amd_powerplay_create(struct amdgpu_device *adev)
 
 	hwmgr->adev = adev;
 	hwmgr->not_vf = !amdgpu_sriov_vf(adev);
-	hwmgr->pm_en = (amdgpu_dpm && hwmgr->not_vf) ? true : false;
 	hwmgr->device = amdgpu_cgs_create_device(adev);
 	mutex_init(&hwmgr->smu_lock);
 	hwmgr->chip_family = adev->family;
@@ -928,9 +927,12 @@ static int pp_dpm_set_mp1_state(void *handle, enum pp_mp1_state mp1_state)
 {
 	struct pp_hwmgr *hwmgr = handle;
 
-	if (!hwmgr || !hwmgr->pm_en)
+	if (!hwmgr)
 		return -EINVAL;
 
+	if (!hwmgr->pm_en)
+		return 0;
+
 	if (hwmgr->hwmgr_func->set_mp1_state)
 		return hwmgr->hwmgr_func->set_mp1_state(hwmgr, mp1_state);
 
@@ -969,6 +971,14 @@ static int pp_dpm_switch_power_profile(void *handle,
 		workload = hwmgr->workload_setting[index];
 	}
 
+	if (type == PP_SMC_POWER_PROFILE_COMPUTE &&
+		hwmgr->hwmgr_func->disable_power_features_for_compute_performance) {
+			if (hwmgr->hwmgr_func->disable_power_features_for_compute_performance(hwmgr, en)) {
+				mutex_unlock(&hwmgr->smu_lock);
+				return -EINVAL;
+			}
+	}
+
 	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
 		hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, &workload, 0);
 	mutex_unlock(&hwmgr->smu_lock);
@@ -1421,6 +1431,7 @@ static int pp_get_asic_baco_capability(void *handle, bool *cap)
 {
 	struct pp_hwmgr *hwmgr = handle;
 
+	*cap = false;
 	if (!hwmgr)
 		return -EINVAL;
 
@@ -1548,6 +1559,40 @@ static int pp_smu_i2c_bus_access(void *handle, bool acquire)
 	return ret;
 }
 
+static int pp_set_df_cstate(void *handle, enum pp_df_cstate state)
+{
+	struct pp_hwmgr *hwmgr = handle;
+
+	if (!hwmgr)
+		return -EINVAL;
+
+	if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_df_cstate)
+		return 0;
+
+	mutex_lock(&hwmgr->smu_lock);
+	hwmgr->hwmgr_func->set_df_cstate(hwmgr, state);
+	mutex_unlock(&hwmgr->smu_lock);
+
+	return 0;
+}
+
+static int pp_set_xgmi_pstate(void *handle, uint32_t pstate)
+{
+	struct pp_hwmgr *hwmgr = handle;
+
+	if (!hwmgr)
+		return -EINVAL;
+
+	if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_xgmi_pstate)
+		return 0;
+
+	mutex_lock(&hwmgr->smu_lock);
+	hwmgr->hwmgr_func->set_xgmi_pstate(hwmgr, pstate);
+	mutex_unlock(&hwmgr->smu_lock);
+
+	return 0;
+}
+
 static const struct amd_pm_funcs pp_dpm_funcs = {
 	.load_firmware = pp_dpm_load_fw,
 	.wait_for_fw_loading_complete = pp_dpm_fw_loading_complete,
@@ -1606,4 +1651,6 @@ static const struct amd_pm_funcs pp_dpm_funcs = {
 	.set_ppfeature_status = pp_set_ppfeature_status,
 	.asic_reset_mode_2 = pp_asic_reset_mode_2,
 	.smu_i2c_bus_access = pp_smu_i2c_bus_access,
+	.set_df_cstate = pp_set_df_cstate,
+	.set_xgmi_pstate = pp_set_xgmi_pstate,
 };
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 4acf139ea014..99469479e277 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -25,11 +25,16 @@
 #include "pp_debug.h"
 #include "amdgpu.h"
 #include "amdgpu_smu.h"
+#include "smu_internal.h"
 #include "soc15_common.h"
 #include "smu_v11_0.h"
 #include "smu_v12_0.h"
 #include "atom.h"
 #include "amd_pcie.h"
+#include "vega20_ppt.h"
+#include "arcturus_ppt.h"
+#include "navi10_ppt.h"
+#include "renoir_ppt.h"
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)	#type
@@ -67,6 +72,8 @@ size_t smu_sys_get_pp_feature_mask(struct smu_context *smu, char *buf)
 	uint32_t sort_feature[SMU_FEATURE_COUNT];
 	uint64_t hw_feature_count = 0;
 
+	mutex_lock(&smu->mutex);
+
 	ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
 	if (ret)
 		goto failed;
@@ -92,9 +99,57 @@ size_t smu_sys_get_pp_feature_mask(struct smu_context *smu, char *buf)
 	}
 
 failed:
+	mutex_unlock(&smu->mutex);
+
 	return size;
 }
 
+static int smu_feature_update_enable_state(struct smu_context *smu,
+					   uint64_t feature_mask,
+					   bool enabled)
+{
+	struct smu_feature *feature = &smu->smu_feature;
+	uint32_t feature_low = 0, feature_high = 0;
+	int ret = 0;
+
+	if (!smu->pm_enabled)
+		return ret;
+
+	feature_low = (feature_mask >> 0 ) & 0xffffffff;
+	feature_high = (feature_mask >> 32) & 0xffffffff;
+
+	if (enabled) {
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesLow,
+						  feature_low);
+		if (ret)
+			return ret;
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesHigh,
+						  feature_high);
+		if (ret)
+			return ret;
+	} else {
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesLow,
+						  feature_low);
+		if (ret)
+			return ret;
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesHigh,
+						  feature_high);
+		if (ret)
+			return ret;
+	}
+
+	mutex_lock(&feature->mutex);
+	if (enabled)
+		bitmap_or(feature->enabled, feature->enabled,
+				(unsigned long *)(&feature_mask), SMU_FEATURE_MAX);
+	else
+		bitmap_andnot(feature->enabled, feature->enabled,
+				(unsigned long *)(&feature_mask), SMU_FEATURE_MAX);
+	mutex_unlock(&feature->mutex);
+
+	return ret;
+}
+
 int smu_sys_set_pp_feature_mask(struct smu_context *smu, uint64_t new_mask)
 {
 	int ret = 0;
@@ -103,9 +158,11 @@ int smu_sys_set_pp_feature_mask(struct smu_context *smu, uint64_t new_mask)
 	uint64_t feature_2_disabled = 0;
 	uint64_t feature_enables = 0;
 
+	mutex_lock(&smu->mutex);
+
 	ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
 	if (ret)
-		return ret;
+		goto out;
 
 	feature_enables = ((uint64_t)feature_mask[1] << 32 | (uint64_t)feature_mask[0]);
 
@@ -115,14 +172,17 @@ int smu_sys_set_pp_feature_mask(struct smu_context *smu, uint64_t new_mask)
 	if (feature_2_enabled) {
 		ret = smu_feature_update_enable_state(smu, feature_2_enabled, true);
 		if (ret)
-			return ret;
+			goto out;
 	}
 	if (feature_2_disabled) {
 		ret = smu_feature_update_enable_state(smu, feature_2_disabled, false);
 		if (ret)
-			return ret;
+			goto out;
 	}
 
+out:
+	mutex_unlock(&smu->mutex);
+
 	return ret;
 }
 
@@ -159,8 +219,7 @@ int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t
 int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type,
 			    uint32_t min, uint32_t max)
 {
-	int ret = 0, clk_id = 0;
-	uint32_t param;
+	int ret = 0;
 
 	if (min <= 0 && max <= 0)
 		return -EINVAL;
@@ -168,27 +227,7 @@ int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type,
 	if (!smu_clk_dpm_is_enabled(smu, clk_type))
 		return 0;
 
-	clk_id = smu_clk_get_index(smu, clk_type);
-	if (clk_id < 0)
-		return clk_id;
-
-	if (max > 0) {
-		param = (uint32_t)((clk_id << 16) | (max & 0xffff));
-		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxByFreq,
-						  param);
-		if (ret)
-			return ret;
-	}
-
-	if (min > 0) {
-		param = (uint32_t)((clk_id << 16) | (min & 0xffff));
-		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinByFreq,
-						  param);
-		if (ret)
-			return ret;
-	}
-
-
+	ret = smu_set_soft_freq_limited_range(smu, clk_type, min, max);
 	return ret;
 }
 
@@ -229,7 +268,7 @@ int smu_set_hard_freq_range(struct smu_context *smu, enum smu_clk_type clk_type,
 }
 
 int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type,
-			   uint32_t *min, uint32_t *max)
+			   uint32_t *min, uint32_t *max, bool lock_needed)
 {
 	uint32_t clock_limit;
 	int ret = 0;
@@ -237,6 +276,9 @@ int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type,
 	if (!min && !max)
 		return -EINVAL;
 
+	if (lock_needed)
+		mutex_lock(&smu->mutex);
+
 	if (!smu_clk_dpm_is_enabled(smu, clk_type)) {
 		switch (clk_type) {
 		case SMU_MCLK:
@@ -260,14 +302,17 @@ int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type,
 			*min = clock_limit / 100;
 		if (max)
 			*max = clock_limit / 100;
-
-		return 0;
+	} else {
+		/*
+		 * Todo: Use each asic(ASIC_ppt funcs) control the callbacks exposed to the
+		 * core driver and then have helpers for stuff that is common(SMU_v11_x | SMU_v12_x funcs).
+		 */
+		ret = smu_get_dpm_ultimate_freq(smu, clk_type, min, max);
 	}
-	/*
-	 * Todo: Use each asic(ASIC_ppt funcs) control the callbacks exposed to the
-	 * core driver and then have helpers for stuff that is common(SMU_v11_x | SMU_v12_x funcs).
-	 */
-	ret = smu_get_dpm_ultimate_freq(smu, clk_type, min, max);
+
+	if (lock_needed)
+		mutex_unlock(&smu->mutex);
+
 	return ret;
 }
 
@@ -311,6 +356,35 @@ int smu_get_dpm_level_count(struct smu_context *smu, enum smu_clk_type clk_type,
 	return smu_get_dpm_freq_by_index(smu, clk_type, 0xff, value);
 }
 
+int smu_get_dpm_level_range(struct smu_context *smu, enum smu_clk_type clk_type,
+			    uint32_t *min_value, uint32_t *max_value)
+{
+	int ret = 0;
+	uint32_t level_count = 0;
+
+	if (!min_value && !max_value)
+		return -EINVAL;
+
+	if (min_value) {
+		/* by default, level 0 clock value as min value */
+		ret = smu_get_dpm_freq_by_index(smu, clk_type, 0, min_value);
+		if (ret)
+			return ret;
+	}
+
+	if (max_value) {
+		ret = smu_get_dpm_level_count(smu, clk_type, &level_count);
+		if (ret)
+			return ret;
+
+		ret = smu_get_dpm_freq_by_index(smu, clk_type, level_count - 1, max_value);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
 bool smu_clk_dpm_is_enabled(struct smu_context *smu, enum smu_clk_type clk_type)
 {
 	enum smu_feature_mask feature_id = 0;
@@ -338,7 +412,20 @@ bool smu_clk_dpm_is_enabled(struct smu_context *smu, enum smu_clk_type clk_type)
 	return true;
 }
 
-
+/**
+ * smu_dpm_set_power_gate - power gate/ungate the specific IP block
+ *
+ * @smu:        smu_context pointer
+ * @block_type: the IP block to power gate/ungate
+ * @gate:       to power gate if true, ungate otherwise
+ *
+ * This API uses no smu->mutex lock protection due to:
+ * 1. It is either called by other IP block(gfx/sdma/vcn/uvd/vce).
+ *    This is guarded to be race condition free by the caller.
+ * 2. Or get called on user setting request of power_dpm_force_performance_level.
+ *    Under this case, the smu->mutex lock protection is already enforced on
+ *    the parent API smu_force_performance_level of the call path.
+ */
 int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
 			   bool gate)
 {
@@ -346,10 +433,10 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
 
 	switch (block_type) {
 	case AMD_IP_BLOCK_TYPE_UVD:
-		ret = smu_dpm_set_uvd_enable(smu, gate);
+		ret = smu_dpm_set_uvd_enable(smu, !gate);
 		break;
 	case AMD_IP_BLOCK_TYPE_VCE:
-		ret = smu_dpm_set_vce_enable(smu, gate);
+		ret = smu_dpm_set_vce_enable(smu, !gate);
 		break;
 	case AMD_IP_BLOCK_TYPE_GFX:
 		ret = smu_gfx_off_control(smu, gate);
@@ -357,6 +444,9 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
 	case AMD_IP_BLOCK_TYPE_SDMA:
 		ret = smu_powergate_sdma(smu, gate);
 		break;
+	case AMD_IP_BLOCK_TYPE_JPEG:
+		ret = smu_dpm_set_jpeg_enable(smu, !gate);
+		break;
 	default:
 		break;
 	}
@@ -364,12 +454,6 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
 	return ret;
 }
 
-enum amd_pm_state_type smu_get_current_power_state(struct smu_context *smu)
-{
-	/* not support power state */
-	return POWER_STATE_TYPE_DEFAULT;
-}
-
 int smu_get_power_num_states(struct smu_context *smu,
 			     struct pp_states_info *state_info)
 {
@@ -435,26 +519,25 @@ int smu_update_table(struct smu_context *smu, enum smu_table_id table_index, int
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
 	struct amdgpu_device *adev = smu->adev;
-	struct smu_table *table = NULL;
-	int ret = 0;
+	struct smu_table *table = &smu_table->driver_table;
 	int table_id = smu_table_get_index(smu, table_index);
+	uint32_t table_size;
+	int ret = 0;
 
-	if (!table_data || table_id >= smu_table->table_count || table_id < 0)
+	if (!table_data || table_id >= SMU_TABLE_COUNT || table_id < 0)
 		return -EINVAL;
 
-	table = &smu_table->tables[table_index];
+	table_size = smu_table->tables[table_index].size;
 
-	if (drv2smu)
-		memcpy(table->cpu_addr, table_data, table->size);
+	if (drv2smu) {
+		memcpy(table->cpu_addr, table_data, table_size);
+		/*
+		 * Flush hdp cache: to guard the content seen by
+		 * GPU is consitent with CPU.
+		 */
+		amdgpu_asic_flush_hdp(adev, NULL);
+	}
 
-	ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetDriverDramAddrHigh,
-					  upper_32_bits(table->mc_address));
-	if (ret)
-		return ret;
-	ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetDriverDramAddrLow,
-					  lower_32_bits(table->mc_address));
-	if (ret)
-		return ret;
 	ret = smu_send_smc_msg_with_param(smu, drv2smu ?
 					  SMU_MSG_TransferTableDram2Smu :
 					  SMU_MSG_TransferTableSmu2Dram,
@@ -462,11 +545,10 @@ int smu_update_table(struct smu_context *smu, enum smu_table_id table_index, int
 	if (ret)
 		return ret;
 
-	/* flush hdp cache */
-	adev->nbio_funcs->hdp_flush(adev, NULL);
-
-	if (!drv2smu)
-		memcpy(table_data, table->cpu_addr, table->size);
+	if (!drv2smu) {
+		amdgpu_asic_flush_hdp(adev, NULL);
+		memcpy(table_data, table->cpu_addr, table_size);
+	}
 
 	return ret;
 }
@@ -475,15 +557,18 @@ bool is_support_sw_smu(struct amdgpu_device *adev)
 {
 	if (adev->asic_type == CHIP_VEGA20)
 		return (amdgpu_dpm == 2) ? true : false;
-	else if (adev->asic_type >= CHIP_ARCTURUS)
-		return true;
-	else
+	else if (adev->asic_type >= CHIP_ARCTURUS) {
+		if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
+			return false;
+		else
+			return true;
+	} else
 		return false;
 }
 
 bool is_support_sw_smu_xgmi(struct amdgpu_device *adev)
 {
-	if (amdgpu_dpm != 1)
+	if (!is_support_sw_smu(adev))
 		return false;
 
 	if (adev->asic_type == CHIP_VEGA20)
@@ -495,16 +580,23 @@ bool is_support_sw_smu_xgmi(struct amdgpu_device *adev)
 int smu_sys_get_pp_table(struct smu_context *smu, void **table)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
+	uint32_t powerplay_table_size;
 
 	if (!smu_table->power_play_table && !smu_table->hardcode_pptable)
 		return -EINVAL;
 
+	mutex_lock(&smu->mutex);
+
 	if (smu_table->hardcode_pptable)
 		*table = smu_table->hardcode_pptable;
 	else
 		*table = smu_table->power_play_table;
 
-	return smu_table->power_play_table_size;
+	powerplay_table_size = smu_table->power_play_table_size;
+
+	mutex_unlock(&smu->mutex);
+
+	return powerplay_table_size;
 }
 
 int smu_sys_set_pp_table(struct smu_context *smu,  void *buf, size_t size)
@@ -531,13 +623,18 @@ int smu_sys_set_pp_table(struct smu_context *smu,  void *buf, size_t size)
 	memcpy(smu_table->hardcode_pptable, buf, size);
 	smu_table->power_play_table = smu_table->hardcode_pptable;
 	smu_table->power_play_table_size = size;
-	mutex_unlock(&smu->mutex);
+
+	/*
+	 * Special hw_fini action(for Navi1x, the DPMs disablement will be
+	 * skipped) may be needed for custom pptable uploading.
+	 */
+	smu->uploading_custom_pp_table = true;
 
 	ret = smu_reset(smu);
 	if (ret)
 		pr_info("smu reset failed, ret = %d\n", ret);
 
-	return ret;
+	smu->uploading_custom_pp_table = false;
 
 failed:
 	mutex_unlock(&smu->mutex);
@@ -569,50 +666,15 @@ int smu_feature_init_dpm(struct smu_context *smu)
 
 	return ret;
 }
-int smu_feature_update_enable_state(struct smu_context *smu, uint64_t feature_mask, bool enabled)
-{
-	uint32_t feature_low = 0, feature_high = 0;
-	int ret = 0;
-
-	if (!smu->pm_enabled)
-		return ret;
-
-	feature_low = (feature_mask >> 0 ) & 0xffffffff;
-	feature_high = (feature_mask >> 32) & 0xffffffff;
-
-	if (enabled) {
-		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesLow,
-						  feature_low);
-		if (ret)
-			return ret;
-		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesHigh,
-						  feature_high);
-		if (ret)
-			return ret;
-
-	} else {
-		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesLow,
-						  feature_low);
-		if (ret)
-			return ret;
-		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesHigh,
-						  feature_high);
-		if (ret)
-			return ret;
 
-	}
-
-	return ret;
-}
 
 int smu_feature_is_enabled(struct smu_context *smu, enum smu_feature_mask mask)
 {
-	struct amdgpu_device *adev = smu->adev;
 	struct smu_feature *feature = &smu->smu_feature;
 	int feature_id;
 	int ret = 0;
 
-	if (adev->flags & AMD_IS_APU)
+	if (smu->is_apu)
 		return 1;
 
 	feature_id = smu_feature_get_index(smu, mask);
@@ -633,8 +695,6 @@ int smu_feature_set_enabled(struct smu_context *smu, enum smu_feature_mask mask,
 {
 	struct smu_feature *feature = &smu->smu_feature;
 	int feature_id;
-	uint64_t feature_mask = 0;
-	int ret = 0;
 
 	feature_id = smu_feature_get_index(smu, mask);
 	if (feature_id < 0)
@@ -642,22 +702,9 @@ int smu_feature_set_enabled(struct smu_context *smu, enum smu_feature_mask mask,
 
 	WARN_ON(feature_id > feature->feature_num);
 
-	feature_mask = 1ULL << feature_id;
-
-	mutex_lock(&feature->mutex);
-	ret = smu_feature_update_enable_state(smu, feature_mask, enable);
-	if (ret)
-		goto failed;
-
-	if (enable)
-		test_and_set_bit(feature_id, feature->enabled);
-	else
-		test_and_clear_bit(feature_id, feature->enabled);
-
-failed:
-	mutex_unlock(&feature->mutex);
-
-	return ret;
+	return smu_feature_update_enable_state(smu,
+					       1ULL << feature_id,
+					       enable);
 }
 
 int smu_feature_is_supported(struct smu_context *smu, enum smu_feature_mask mask)
@@ -707,20 +754,27 @@ static int smu_set_funcs(struct amdgpu_device *adev)
 {
 	struct smu_context *smu = &adev->smu;
 
+	if (adev->pm.pp_feature & PP_OVERDRIVE_MASK)
+		smu->od_enabled = true;
+
 	switch (adev->asic_type) {
 	case CHIP_VEGA20:
+		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+		vega20_set_ppt_funcs(smu);
+		break;
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
 	case CHIP_NAVI12:
+		navi10_set_ppt_funcs(smu);
+		break;
 	case CHIP_ARCTURUS:
-		if (adev->pm.pp_feature & PP_OVERDRIVE_MASK)
-			smu->od_enabled = true;
-		smu_v11_0_set_smu_funcs(smu);
+		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+		arcturus_set_ppt_funcs(smu);
+		/* OD is not supported on Arcturus */
+		smu->od_enabled =false;
 		break;
 	case CHIP_RENOIR:
-		if (adev->pm.pp_feature & PP_OVERDRIVE_MASK)
-			smu->od_enabled = true;
-		smu_v12_0_set_smu_funcs(smu);
+		renoir_set_ppt_funcs(smu);
 		break;
 	default:
 		return -EINVAL;
@@ -736,6 +790,7 @@ static int smu_early_init(void *handle)
 
 	smu->adev = adev;
 	smu->pm_enabled = !!amdgpu_dpm;
+	smu->is_apu = false;
 	mutex_init(&smu->mutex);
 
 	return smu_set_funcs(adev);
@@ -749,11 +804,10 @@ static int smu_late_init(void *handle)
 	if (!smu->pm_enabled)
 		return 0;
 
-	mutex_lock(&smu->mutex);
 	smu_handle_task(&adev->smu,
 			smu->smu_dpm.dpm_level,
-			AMD_PP_TASK_COMPLETE_INIT);
-	mutex_unlock(&smu->mutex);
+			AMD_PP_TASK_COMPLETE_INIT,
+			false);
 
 	return 0;
 }
@@ -844,6 +898,7 @@ static int smu_sw_init(void *handle)
 	smu->smu_baco.platform_support = false;
 
 	mutex_init(&smu->sensor_lock);
+	mutex_init(&smu->metrics_lock);
 
 	smu->watermarks_bitmap = 0;
 	smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
@@ -919,37 +974,56 @@ static int smu_init_fb_allocations(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	struct smu_table_context *smu_table = &smu->smu_table;
 	struct smu_table *tables = smu_table->tables;
-	uint32_t table_count = smu_table->table_count;
-	uint32_t i = 0;
-	int32_t ret = 0;
+	struct smu_table *driver_table = &(smu_table->driver_table);
+	uint32_t max_table_size = 0;
+	int ret, i;
 
-	if (table_count <= 0)
-		return -EINVAL;
-
-	for (i = 0 ; i < table_count; i++) {
-		if (tables[i].size == 0)
-			continue;
+	/* VRAM allocation for tool table */
+	if (tables[SMU_TABLE_PMSTATUSLOG].size) {
 		ret = amdgpu_bo_create_kernel(adev,
-					      tables[i].size,
-					      tables[i].align,
-					      tables[i].domain,
-					      &tables[i].bo,
-					      &tables[i].mc_address,
-					      &tables[i].cpu_addr);
-		if (ret)
-			goto failed;
+					      tables[SMU_TABLE_PMSTATUSLOG].size,
+					      tables[SMU_TABLE_PMSTATUSLOG].align,
+					      tables[SMU_TABLE_PMSTATUSLOG].domain,
+					      &tables[SMU_TABLE_PMSTATUSLOG].bo,
+					      &tables[SMU_TABLE_PMSTATUSLOG].mc_address,
+					      &tables[SMU_TABLE_PMSTATUSLOG].cpu_addr);
+		if (ret) {
+			pr_err("VRAM allocation for tool table failed!\n");
+			return ret;
+		}
 	}
 
-	return 0;
-failed:
-	for (; i > 0; i--) {
+	/* VRAM allocation for driver table */
+	for (i = 0; i < SMU_TABLE_COUNT; i++) {
 		if (tables[i].size == 0)
 			continue;
-		amdgpu_bo_free_kernel(&tables[i].bo,
-				      &tables[i].mc_address,
-				      &tables[i].cpu_addr);
 
+		if (i == SMU_TABLE_PMSTATUSLOG)
+			continue;
+
+		if (max_table_size < tables[i].size)
+			max_table_size = tables[i].size;
+	}
+
+	driver_table->size = max_table_size;
+	driver_table->align = PAGE_SIZE;
+	driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+	ret = amdgpu_bo_create_kernel(adev,
+				      driver_table->size,
+				      driver_table->align,
+				      driver_table->domain,
+				      &driver_table->bo,
+				      &driver_table->mc_address,
+				      &driver_table->cpu_addr);
+	if (ret) {
+		pr_err("VRAM allocation for driver table failed!\n");
+		if (tables[SMU_TABLE_PMSTATUSLOG].mc_address)
+			amdgpu_bo_free_kernel(&tables[SMU_TABLE_PMSTATUSLOG].bo,
+					      &tables[SMU_TABLE_PMSTATUSLOG].mc_address,
+					      &tables[SMU_TABLE_PMSTATUSLOG].cpu_addr);
 	}
+
 	return ret;
 }
 
@@ -957,65 +1031,21 @@ static int smu_fini_fb_allocations(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
 	struct smu_table *tables = smu_table->tables;
-	uint32_t table_count = smu_table->table_count;
-	uint32_t i = 0;
+	struct smu_table *driver_table = &(smu_table->driver_table);
 
-	if (table_count == 0 || tables == NULL)
+	if (!tables)
 		return 0;
 
-	for (i = 0 ; i < table_count; i++) {
-		if (tables[i].size == 0)
-			continue;
-		amdgpu_bo_free_kernel(&tables[i].bo,
-				      &tables[i].mc_address,
-				      &tables[i].cpu_addr);
-	}
+	if (tables[SMU_TABLE_PMSTATUSLOG].mc_address)
+		amdgpu_bo_free_kernel(&tables[SMU_TABLE_PMSTATUSLOG].bo,
+				      &tables[SMU_TABLE_PMSTATUSLOG].mc_address,
+				      &tables[SMU_TABLE_PMSTATUSLOG].cpu_addr);
 
-	return 0;
-}
-
-static int smu_override_pcie_parameters(struct smu_context *smu)
-{
-	struct amdgpu_device *adev = smu->adev;
-	uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg;
-	int ret;
+	amdgpu_bo_free_kernel(&driver_table->bo,
+			      &driver_table->mc_address,
+			      &driver_table->cpu_addr);
 
-	if (adev->flags & AMD_IS_APU)
-		return 0;
-
-	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4)
-		pcie_gen = 3;
-	else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
-		pcie_gen = 2;
-	else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
-		pcie_gen = 1;
-	else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1)
-		pcie_gen = 0;
-
-	/* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1
-	 * Bit 15:8:  PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4
-	 * Bit 7:0:   PCIE lane width, 1 to 7 corresponds is x1 to x32
-	 */
-	if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
-		pcie_width = 6;
-	else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12)
-		pcie_width = 5;
-	else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8)
-		pcie_width = 4;
-	else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4)
-		pcie_width = 3;
-	else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2)
-		pcie_width = 2;
-	else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1)
-		pcie_width = 1;
-
-	smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width;
-	ret = smu_send_smc_msg_with_param(smu,
-					  SMU_MSG_OverridePcieParameters,
-					  smu_pcie_arg);
-	if (ret)
-		pr_err("[%s] Attempt to override pcie params failed!\n", __func__);
-	return ret;
+	return 0;
 }
 
 static int smu_smc_table_hw_init(struct smu_context *smu,
@@ -1083,33 +1113,32 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
 	}
 
 	/* smu_dump_pptable(smu); */
+	if (!amdgpu_sriov_vf(adev)) {
+		ret = smu_set_driver_table_location(smu);
+		if (ret)
+			return ret;
 
-	/*
-	 * Copy pptable bo in the vram to smc with SMU MSGs such as
-	 * SetDriverDramAddr and TransferTableDram2Smu.
-	 */
-	ret = smu_write_pptable(smu);
-	if (ret)
-		return ret;
-
-	/* issue RunAfllBtc msg */
-	ret = smu_run_afll_btc(smu);
-	if (ret)
-		return ret;
-
-	ret = smu_feature_set_allowed_mask(smu);
-	if (ret)
-		return ret;
-
-	ret = smu_system_features_control(smu, true);
-	if (ret)
-		return ret;
+		/*
+		 * Copy pptable bo in the vram to smc with SMU MSGs such as
+		 * SetDriverDramAddr and TransferTableDram2Smu.
+		 */
+		ret = smu_write_pptable(smu);
+		if (ret)
+			return ret;
 
-	if (adev->asic_type != CHIP_ARCTURUS) {
-		ret = smu_override_pcie_parameters(smu);
+		/* issue Run*Btc msg */
+		ret = smu_run_btc(smu);
+		if (ret)
+			return ret;
+		ret = smu_feature_set_allowed_mask(smu);
 		if (ret)
 			return ret;
 
+		ret = smu_system_features_control(smu, true);
+		if (ret)
+			return ret;
+	}
+	if (adev->asic_type != CHIP_ARCTURUS) {
 		ret = smu_notify_display_change(smu);
 		if (ret)
 			return ret;
@@ -1138,6 +1167,12 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
 			return ret;
 	}
 
+	if (adev->asic_type != CHIP_ARCTURUS) {
+		ret = smu_override_pcie_parameters(smu);
+		if (ret)
+			return ret;
+	}
+
 	ret = smu_set_default_od_settings(smu, initialize);
 	if (ret)
 		return ret;
@@ -1147,7 +1182,7 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
 		if (ret)
 			return ret;
 
-		ret = smu_get_power_limit(smu, &smu->default_power_limit, true);
+		ret = smu_get_power_limit(smu, &smu->default_power_limit, false, false);
 		if (ret)
 			return ret;
 	}
@@ -1155,8 +1190,9 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
 	/*
 	 * Set PMSTATUSLOG table bo address with SetToolsDramAddr MSG for tools.
 	 */
-	ret = smu_set_tool_table_location(smu);
-
+	if (!amdgpu_sriov_vf(adev)) {
+		ret = smu_set_tool_table_location(smu);
+	}
 	if (!smu_is_dpm_running(smu))
 		pr_info("dpm has been disabled\n");
 
@@ -1212,10 +1248,9 @@ static int smu_free_memory_pool(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
 	struct smu_table *memory_pool = &smu_table->memory_pool;
-	int ret = 0;
 
 	if (memory_pool->size == SMU_MEMORY_POOL_SIZE_ZERO)
-		return ret;
+		return 0;
 
 	amdgpu_bo_free_kernel(&memory_pool->bo,
 			      &memory_pool->mc_address,
@@ -1223,34 +1258,55 @@ static int smu_free_memory_pool(struct smu_context *smu)
 
 	memset(memory_pool, 0, sizeof(struct smu_table));
 
-	return ret;
+	return 0;
 }
 
-static int smu_hw_init(void *handle)
+static int smu_start_smc_engine(struct smu_context *smu)
 {
-	int ret;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct smu_context *smu = &adev->smu;
+	struct amdgpu_device *adev = smu->adev;
+	int ret = 0;
 
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
 		if (adev->asic_type < CHIP_NAVI10) {
-			ret = smu_load_microcode(smu);
-			if (ret)
-				return ret;
+			if (smu->ppt_funcs->load_microcode) {
+				ret = smu->ppt_funcs->load_microcode(smu);
+				if (ret)
+					return ret;
+			}
 		}
 	}
 
-	ret = smu_check_fw_status(smu);
+	if (smu->ppt_funcs->check_fw_status) {
+		ret = smu->ppt_funcs->check_fw_status(smu);
+		if (ret)
+			pr_err("SMC is not ready\n");
+	}
+
+	return ret;
+}
+
+static int smu_hw_init(void *handle)
+{
+	int ret;
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct smu_context *smu = &adev->smu;
+
+	ret = smu_start_smc_engine(smu);
 	if (ret) {
-		pr_err("SMC firmware status is not correct\n");
+		pr_err("SMU is not ready yet!\n");
 		return ret;
 	}
 
-	if (adev->flags & AMD_IS_APU) {
+	if (smu->is_apu) {
 		smu_powergate_sdma(&adev->smu, false);
 		smu_powergate_vcn(&adev->smu, false);
+		smu_powergate_jpeg(&adev->smu, false);
+		smu_set_gfx_cgpg(&adev->smu, true);
 	}
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
 	if (!smu->pm_enabled)
 		return 0;
 
@@ -1291,6 +1347,11 @@ failed:
 	return ret;
 }
 
+static int smu_stop_dpms(struct smu_context *smu)
+{
+	return smu_system_features_control(smu, false);
+}
+
 static int smu_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1298,9 +1359,45 @@ static int smu_hw_fini(void *handle)
 	struct smu_table_context *table_context = &smu->smu_table;
 	int ret = 0;
 
-	if (adev->flags & AMD_IS_APU) {
+	if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	if (smu->is_apu) {
 		smu_powergate_sdma(&adev->smu, true);
 		smu_powergate_vcn(&adev->smu, true);
+		smu_powergate_jpeg(&adev->smu, true);
+	}
+
+	if (!smu->pm_enabled)
+		return 0;
+
+	if (!amdgpu_sriov_vf(adev)){
+		ret = smu_stop_thermal_control(smu);
+		if (ret) {
+			pr_warn("Fail to stop thermal control!\n");
+			return ret;
+		}
+
+		/*
+		 * For custom pptable uploading, skip the DPM features
+		 * disable process on Navi1x ASICs.
+		 *   - As the gfx related features are under control of
+		 *     RLC on those ASICs. RLC reinitialization will be
+		 *     needed to reenable them. That will cost much more
+		 *     efforts.
+		 *
+		 *   - SMU firmware can handle the DPM reenablement
+		 *     properly.
+		 */
+		if (!smu->uploading_custom_pp_table ||
+				!((adev->asic_type >= CHIP_NAVI10) &&
+					(adev->asic_type <= CHIP_NAVI12))) {
+			ret = smu_stop_dpms(smu);
+			if (ret) {
+				pr_warn("Fail to stop Dpms!\n");
+				return ret;
+			}
+		}
 	}
 
 	kfree(table_context->driver_pptable);
@@ -1344,13 +1441,19 @@ static int smu_suspend(void *handle)
 	int ret;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct smu_context *smu = &adev->smu;
-	bool baco_feature_is_enabled = smu_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT);
+	bool baco_feature_is_enabled = false;
+
+	if (!smu->pm_enabled)
+		return 0;
+
+	if(!smu->is_apu)
+		baco_feature_is_enabled = smu_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT);
 
 	ret = smu_system_features_control(smu, false);
 	if (ret)
 		return ret;
 
-	if (adev->in_gpu_reset && baco_feature_is_enabled) {
+	if (baco_feature_is_enabled) {
 		ret = smu_feature_set_enabled(smu, SMU_FEATURE_BACO_BIT, true);
 		if (ret) {
 			pr_warn("set BACO feature enabled failed, return %d\n", ret);
@@ -1363,6 +1466,8 @@ static int smu_suspend(void *handle)
 	if (adev->asic_type >= CHIP_NAVI10 &&
 	    adev->gfx.rlc.funcs->stop)
 		adev->gfx.rlc.funcs->stop(adev);
+	if (smu->is_apu)
+		smu_set_gfx_cgpg(&adev->smu, false);
 
 	return 0;
 }
@@ -1373,9 +1478,19 @@ static int smu_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct smu_context *smu = &adev->smu;
 
+	if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
+	if (!smu->pm_enabled)
+		return 0;
+
 	pr_info("SMU is resuming...\n");
 
-	mutex_lock(&smu->mutex);
+	ret = smu_start_smc_engine(smu);
+	if (ret) {
+		pr_err("SMU is not ready yet!\n");
+		goto failed;
+	}
 
 	ret = smu_smc_table_hw_init(smu, false);
 	if (ret)
@@ -1385,13 +1500,16 @@ static int smu_resume(void *handle)
 	if (ret)
 		goto failed;
 
-	mutex_unlock(&smu->mutex);
+	if (smu->is_apu)
+		smu_set_gfx_cgpg(&adev->smu, true);
+
+	smu->disable_uclk_switch = 0;
 
 	pr_info("SMU is resumed successfully!\n");
 
 	return 0;
+
 failed:
-	mutex_unlock(&smu->mutex);
 	return ret;
 }
 
@@ -1409,8 +1527,9 @@ int smu_display_configuration_change(struct smu_context *smu,
 
 	mutex_lock(&smu->mutex);
 
-	smu_set_deep_sleep_dcefclk(smu,
-				   display_config->min_dcef_deep_sleep_set_clk / 100);
+	if (smu->ppt_funcs->set_deep_sleep_dcefclk)
+		smu->ppt_funcs->set_deep_sleep_dcefclk(smu,
+				display_config->min_dcef_deep_sleep_set_clk / 100);
 
 	for (index = 0; index < display_config->num_path_including_non_display; index++) {
 		if (display_config->displays[index].controller_id != 0)
@@ -1529,7 +1648,8 @@ static int smu_enable_umd_pstate(void *handle,
 
 	struct smu_context *smu = (struct smu_context*)(handle);
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
-	if (!smu->pm_enabled || !smu_dpm_ctx->dpm_context)
+
+	if (!smu->is_apu && (!smu->pm_enabled || !smu_dpm_ctx->dpm_context))
 		return -EINVAL;
 
 	if (!(smu_dpm_ctx->dpm_level & profile_mode_mask)) {
@@ -1562,43 +1682,6 @@ static int smu_enable_umd_pstate(void *handle,
 	return 0;
 }
 
-static int smu_default_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level)
-{
-	int ret = 0;
-	uint32_t sclk_mask, mclk_mask, soc_mask;
-
-	switch (level) {
-	case AMD_DPM_FORCED_LEVEL_HIGH:
-		ret = smu_force_dpm_limit_value(smu, true);
-		break;
-	case AMD_DPM_FORCED_LEVEL_LOW:
-		ret = smu_force_dpm_limit_value(smu, false);
-		break;
-	case AMD_DPM_FORCED_LEVEL_AUTO:
-	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
-		ret = smu_unforce_dpm_levels(smu);
-		break;
-	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
-	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
-	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
-		ret = smu_get_profiling_clk_mask(smu, level,
-						 &sclk_mask,
-						 &mclk_mask,
-						 &soc_mask);
-		if (ret)
-			return ret;
-		smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask);
-		smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask);
-		smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask);
-		break;
-	case AMD_DPM_FORCED_LEVEL_MANUAL:
-	case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
-	default:
-		break;
-	}
-	return ret;
-}
-
 int smu_adjust_power_state_dynamic(struct smu_context *smu,
 				   enum amd_dpm_forced_level level,
 				   bool skip_display_settings)
@@ -1626,7 +1709,7 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
 	}
 
 	if (!skip_display_settings) {
-		ret = smu_notify_smc_dispaly_config(smu);
+		ret = smu_notify_smc_display_config(smu);
 		if (ret) {
 			pr_err("Failed to notify smc display config!");
 			return ret;
@@ -1636,11 +1719,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
 	if (smu_dpm_ctx->dpm_level != level) {
 		ret = smu_asic_set_performance_level(smu, level);
 		if (ret) {
-			ret = smu_default_set_performance_level(smu, level);
-			if (ret) {
-				pr_err("Failed to set performance level!");
-				return ret;
-			}
+			pr_err("Failed to set performance level!");
+			return ret;
 		}
 
 		/* update the saved copy */
@@ -1653,7 +1733,7 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
 		workload = smu->workload_setting[index];
 
 		if (smu->power_profile_mode != workload)
-			smu_set_power_profile_mode(smu, &workload, 0);
+			smu_set_power_profile_mode(smu, &workload, 0, false);
 	}
 
 	return ret;
@@ -1661,18 +1741,22 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
 
 int smu_handle_task(struct smu_context *smu,
 		    enum amd_dpm_forced_level level,
-		    enum amd_pp_task task_id)
+		    enum amd_pp_task task_id,
+		    bool lock_needed)
 {
 	int ret = 0;
 
+	if (lock_needed)
+		mutex_lock(&smu->mutex);
+
 	switch (task_id) {
 	case AMD_PP_TASK_DISPLAY_CONFIG_CHANGE:
 		ret = smu_pre_display_config_changed(smu);
 		if (ret)
-			return ret;
+			goto out;
 		ret = smu_set_cpu_power_state(smu);
 		if (ret)
-			return ret;
+			goto out;
 		ret = smu_adjust_power_state_dynamic(smu, level, false);
 		break;
 	case AMD_PP_TASK_COMPLETE_INIT:
@@ -1683,6 +1767,10 @@ int smu_handle_task(struct smu_context *smu,
 		break;
 	}
 
+out:
+	if (lock_needed)
+		mutex_unlock(&smu->mutex);
+
 	return ret;
 }
 
@@ -1715,7 +1803,7 @@ int smu_switch_power_profile(struct smu_context *smu,
 	}
 
 	if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
-		smu_set_power_profile_mode(smu, &workload, 0);
+		smu_set_power_profile_mode(smu, &workload, 0, false);
 
 	mutex_unlock(&smu->mutex);
 
@@ -1727,7 +1815,7 @@ enum amd_dpm_forced_level smu_get_performance_level(struct smu_context *smu)
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
 	enum amd_dpm_forced_level level;
 
-	if (!smu_dpm_ctx->dpm_context)
+	if (!smu->is_apu && !smu_dpm_ctx->dpm_context)
 		return -EINVAL;
 
 	mutex_lock(&(smu->mutex));
@@ -1742,15 +1830,22 @@ int smu_force_performance_level(struct smu_context *smu, enum amd_dpm_forced_lev
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
 	int ret = 0;
 
-	if (!smu_dpm_ctx->dpm_context)
+	if (!smu->is_apu && !smu_dpm_ctx->dpm_context)
 		return -EINVAL;
 
+	mutex_lock(&smu->mutex);
+
 	ret = smu_enable_umd_pstate(smu, &level);
-	if (ret)
+	if (ret) {
+		mutex_unlock(&smu->mutex);
 		return ret;
+	}
 
 	ret = smu_handle_task(smu, level,
-			      AMD_PP_TASK_READJUST_POWER_STATE);
+			      AMD_PP_TASK_READJUST_POWER_STATE,
+			      false);
+
+	mutex_unlock(&smu->mutex);
 
 	return ret;
 }
@@ -1766,6 +1861,142 @@ int smu_set_display_count(struct smu_context *smu, uint32_t count)
 	return ret;
 }
 
+int smu_force_clk_levels(struct smu_context *smu,
+			 enum smu_clk_type clk_type,
+			 uint32_t mask,
+			 bool lock_needed)
+{
+	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+	int ret = 0;
+
+	if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) {
+		pr_debug("force clock level is for dpm manual mode only.\n");
+		return -EINVAL;
+	}
+
+	if (lock_needed)
+		mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs && smu->ppt_funcs->force_clk_levels)
+		ret = smu->ppt_funcs->force_clk_levels(smu, clk_type, mask);
+
+	if (lock_needed)
+		mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_mp1_state(struct smu_context *smu,
+		      enum pp_mp1_state mp1_state)
+{
+	uint16_t msg;
+	int ret;
+
+	/*
+	 * The SMC is not fully ready. That may be
+	 * expected as the IP may be masked.
+	 * So, just return without error.
+	 */
+	if (!smu->pm_enabled)
+		return 0;
+
+	mutex_lock(&smu->mutex);
+
+	switch (mp1_state) {
+	case PP_MP1_STATE_SHUTDOWN:
+		msg = SMU_MSG_PrepareMp1ForShutdown;
+		break;
+	case PP_MP1_STATE_UNLOAD:
+		msg = SMU_MSG_PrepareMp1ForUnload;
+		break;
+	case PP_MP1_STATE_RESET:
+		msg = SMU_MSG_PrepareMp1ForReset;
+		break;
+	case PP_MP1_STATE_NONE:
+	default:
+		mutex_unlock(&smu->mutex);
+		return 0;
+	}
+
+	/* some asics may not support those messages */
+	if (smu_msg_get_index(smu, msg) < 0) {
+		mutex_unlock(&smu->mutex);
+		return 0;
+	}
+
+	ret = smu_send_smc_msg(smu, msg);
+	if (ret)
+		pr_err("[PrepareMp1] Failed!\n");
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_df_cstate(struct smu_context *smu,
+		      enum pp_df_cstate state)
+{
+	int ret = 0;
+
+	/*
+	 * The SMC is not fully ready. That may be
+	 * expected as the IP may be masked.
+	 * So, just return without error.
+	 */
+	if (!smu->pm_enabled)
+		return 0;
+
+	if (!smu->ppt_funcs || !smu->ppt_funcs->set_df_cstate)
+		return 0;
+
+	mutex_lock(&smu->mutex);
+
+	ret = smu->ppt_funcs->set_df_cstate(smu, state);
+	if (ret)
+		pr_err("[SetDfCstate] failed!\n");
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_write_watermarks_table(struct smu_context *smu)
+{
+	void *watermarks_table = smu->smu_table.watermarks_table;
+
+	if (!watermarks_table)
+		return -EINVAL;
+
+	return smu_update_table(smu,
+				SMU_TABLE_WATERMARKS,
+				0,
+				watermarks_table,
+				true);
+}
+
+int smu_set_watermarks_for_clock_ranges(struct smu_context *smu,
+		struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges)
+{
+	void *table = smu->smu_table.watermarks_table;
+
+	if (!table)
+		return -EINVAL;
+
+	mutex_lock(&smu->mutex);
+
+	if (!smu->disable_watermark &&
+			smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) &&
+			smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) {
+		smu_set_watermarks_table(smu, table, clock_ranges);
+		smu->watermarks_bitmap |= WATERMARKS_EXIST;
+		smu->watermarks_bitmap &= ~WATERMARKS_LOADED;
+	}
+
+	mutex_unlock(&smu->mutex);
+
+	return 0;
+}
+
 const struct amd_ip_funcs smu_ip_funcs = {
 	.name = "smu",
 	.early_init = smu_early_init,
@@ -1802,3 +2033,578 @@ const struct amdgpu_ip_block_version smu_v12_0_ip_block =
 	.rev = 0,
 	.funcs = &smu_ip_funcs,
 };
+
+int smu_load_microcode(struct smu_context *smu)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->load_microcode)
+		ret = smu->ppt_funcs->load_microcode(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_check_fw_status(struct smu_context *smu)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->check_fw_status)
+		ret = smu->ppt_funcs->check_fw_status(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->set_gfx_cgpg)
+		ret = smu->ppt_funcs->set_gfx_cgpg(smu, enabled);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->set_fan_speed_rpm)
+		ret = smu->ppt_funcs->set_fan_speed_rpm(smu, speed);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_get_power_limit(struct smu_context *smu,
+			uint32_t *limit,
+			bool def,
+			bool lock_needed)
+{
+	int ret = 0;
+
+	if (lock_needed)
+		mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_power_limit)
+		ret = smu->ppt_funcs->get_power_limit(smu, limit, def);
+
+	if (lock_needed)
+		mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_power_limit(struct smu_context *smu, uint32_t limit)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->set_power_limit)
+		ret = smu->ppt_funcs->set_power_limit(smu, limit);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->print_clk_levels)
+		ret = smu->ppt_funcs->print_clk_levels(smu, clk_type, buf);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_get_od_percentage(struct smu_context *smu, enum smu_clk_type type)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_od_percentage)
+		ret = smu->ppt_funcs->get_od_percentage(smu, type);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_od_percentage(struct smu_context *smu, enum smu_clk_type type, uint32_t value)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->set_od_percentage)
+		ret = smu->ppt_funcs->set_od_percentage(smu, type, value);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_od_edit_dpm_table(struct smu_context *smu,
+			  enum PP_OD_DPM_TABLE_COMMAND type,
+			  long *input, uint32_t size)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->od_edit_dpm_table)
+		ret = smu->ppt_funcs->od_edit_dpm_table(smu, type, input, size);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_read_sensor(struct smu_context *smu,
+		    enum amd_pp_sensors sensor,
+		    void *data, uint32_t *size)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->read_sensor)
+		ret = smu->ppt_funcs->read_sensor(smu, sensor, data, size);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_get_power_profile_mode(struct smu_context *smu, char *buf)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_power_profile_mode)
+		ret = smu->ppt_funcs->get_power_profile_mode(smu, buf);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_power_profile_mode(struct smu_context *smu,
+			       long *param,
+			       uint32_t param_size,
+			       bool lock_needed)
+{
+	int ret = 0;
+
+	if (lock_needed)
+		mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->set_power_profile_mode)
+		ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size);
+
+	if (lock_needed)
+		mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+
+int smu_get_fan_control_mode(struct smu_context *smu)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_fan_control_mode)
+		ret = smu->ppt_funcs->get_fan_control_mode(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_fan_control_mode(struct smu_context *smu, int value)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->set_fan_control_mode)
+		ret = smu->ppt_funcs->set_fan_control_mode(smu, value);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_get_fan_speed_percent(struct smu_context *smu, uint32_t *speed)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_fan_speed_percent)
+		ret = smu->ppt_funcs->get_fan_speed_percent(smu, speed);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_fan_speed_percent(struct smu_context *smu, uint32_t speed)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->set_fan_speed_percent)
+		ret = smu->ppt_funcs->set_fan_speed_percent(smu, speed);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_get_fan_speed_rpm(struct smu_context *smu, uint32_t *speed)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_fan_speed_rpm)
+		ret = smu->ppt_funcs->get_fan_speed_rpm(smu, speed);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_deep_sleep_dcefclk(struct smu_context *smu, int clk)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->set_deep_sleep_dcefclk)
+		ret = smu->ppt_funcs->set_deep_sleep_dcefclk(smu, clk);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_active_display_count(struct smu_context *smu, uint32_t count)
+{
+	int ret = 0;
+
+	if (smu->ppt_funcs->set_active_display_count)
+		ret = smu->ppt_funcs->set_active_display_count(smu, count);
+
+	return ret;
+}
+
+int smu_get_clock_by_type(struct smu_context *smu,
+			  enum amd_pp_clock_type type,
+			  struct amd_pp_clocks *clocks)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_clock_by_type)
+		ret = smu->ppt_funcs->get_clock_by_type(smu, type, clocks);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_get_max_high_clocks(struct smu_context *smu,
+			    struct amd_pp_simple_clock_info *clocks)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_max_high_clocks)
+		ret = smu->ppt_funcs->get_max_high_clocks(smu, clocks);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_get_clock_by_type_with_latency(struct smu_context *smu,
+				       enum smu_clk_type clk_type,
+				       struct pp_clock_levels_with_latency *clocks)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_clock_by_type_with_latency)
+		ret = smu->ppt_funcs->get_clock_by_type_with_latency(smu, clk_type, clocks);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_get_clock_by_type_with_voltage(struct smu_context *smu,
+				       enum amd_pp_clock_type type,
+				       struct pp_clock_levels_with_voltage *clocks)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_clock_by_type_with_voltage)
+		ret = smu->ppt_funcs->get_clock_by_type_with_voltage(smu, type, clocks);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+
+int smu_display_clock_voltage_request(struct smu_context *smu,
+				      struct pp_display_clock_request *clock_req)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->display_clock_voltage_request)
+		ret = smu->ppt_funcs->display_clock_voltage_request(smu, clock_req);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+
+int smu_display_disable_memory_clock_switch(struct smu_context *smu, bool disable_memory_clock_switch)
+{
+	int ret = -EINVAL;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->display_disable_memory_clock_switch)
+		ret = smu->ppt_funcs->display_disable_memory_clock_switch(smu, disable_memory_clock_switch);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_notify_smu_enable_pwe(struct smu_context *smu)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->notify_smu_enable_pwe)
+		ret = smu->ppt_funcs->notify_smu_enable_pwe(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_xgmi_pstate(struct smu_context *smu,
+			uint32_t pstate)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->set_xgmi_pstate)
+		ret = smu->ppt_funcs->set_xgmi_pstate(smu, pstate);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_set_azalia_d3_pme(struct smu_context *smu)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->set_azalia_d3_pme)
+		ret = smu->ppt_funcs->set_azalia_d3_pme(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+bool smu_baco_is_support(struct smu_context *smu)
+{
+	bool ret = false;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs && smu->ppt_funcs->baco_is_support)
+		ret = smu->ppt_funcs->baco_is_support(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_baco_get_state(struct smu_context *smu, enum smu_baco_state *state)
+{
+	if (smu->ppt_funcs->baco_get_state)
+		return -EINVAL;
+
+	mutex_lock(&smu->mutex);
+	*state = smu->ppt_funcs->baco_get_state(smu);
+	mutex_unlock(&smu->mutex);
+
+	return 0;
+}
+
+int smu_baco_enter(struct smu_context *smu)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->baco_enter)
+		ret = smu->ppt_funcs->baco_enter(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_baco_exit(struct smu_context *smu)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->baco_exit)
+		ret = smu->ppt_funcs->baco_exit(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_mode2_reset(struct smu_context *smu)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->mode2_reset)
+		ret = smu->ppt_funcs->mode2_reset(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
+					 struct pp_smu_nv_clock_table *max_clocks)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_max_sustainable_clocks_by_dc)
+		ret = smu->ppt_funcs->get_max_sustainable_clocks_by_dc(smu, max_clocks);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_get_uclk_dpm_states(struct smu_context *smu,
+			    unsigned int *clock_values_in_khz,
+			    unsigned int *num_states)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_uclk_dpm_states)
+		ret = smu->ppt_funcs->get_uclk_dpm_states(smu, clock_values_in_khz, num_states);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+enum amd_pm_state_type smu_get_current_power_state(struct smu_context *smu)
+{
+	enum amd_pm_state_type pm_state = POWER_STATE_TYPE_DEFAULT;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_current_power_state)
+		pm_state = smu->ppt_funcs->get_current_power_state(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return pm_state;
+}
+
+int smu_get_dpm_clock_table(struct smu_context *smu,
+			    struct dpm_clocks *clock_table)
+{
+	int ret = 0;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->get_dpm_clock_table)
+		ret = smu->ppt_funcs->get_dpm_clock_table(smu, clock_table);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+uint32_t smu_get_pptable_power_limit(struct smu_context *smu)
+{
+	uint32_t ret = 0;
+
+	if (smu->ppt_funcs->get_pptable_power_limit)
+		ret = smu->ppt_funcs->get_pptable_power_limit(smu);
+
+	return ret;
+}
+
+int smu_send_smc_msg(struct smu_context *smu,
+		     enum smu_message_type msg)
+{
+	int ret;
+
+	ret = smu_send_smc_msg_with_param(smu, msg, 0);
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
index d493a3f8c07a..14ba6aa876e2 100644
--- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
@@ -25,6 +25,7 @@
 #include <linux/firmware.h>
 #include "amdgpu.h"
 #include "amdgpu_smu.h"
+#include "smu_internal.h"
 #include "atomfirmware.h"
 #include "amdgpu_atomfirmware.h"
 #include "smu_v11_0.h"
@@ -36,6 +37,12 @@
 #include "smu_v11_0_pptable.h"
 #include "arcturus_ppsmc.h"
 #include "nbio/nbio_7_4_sh_mask.h"
+#include "amdgpu_xgmi.h"
+#include <linux/i2c.h>
+#include <linux/pci.h>
+#include "amdgpu_ras.h"
+
+#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control.eeprom_accessor))->adev
 
 #define CTF_OFFSET_EDGE			5
 #define CTF_OFFSET_HOTSPOT		5
@@ -112,8 +119,7 @@ static struct smu_11_0_cmn2aisc_mapping arcturus_message_map[SMU_MSG_MAX_COUNT]
 	MSG_MAP(PrepareMp1ForShutdown,		     PPSMC_MSG_PrepareMp1ForShutdown),
 	MSG_MAP(SoftReset,			     PPSMC_MSG_SoftReset),
 	MSG_MAP(RunAfllBtc,			     PPSMC_MSG_RunAfllBtc),
-	MSG_MAP(RunGfxDcBtc,			     PPSMC_MSG_RunGfxDcBtc),
-	MSG_MAP(RunSocDcBtc,			     PPSMC_MSG_RunSocDcBtc),
+	MSG_MAP(RunDcBtc,			     PPSMC_MSG_RunDcBtc),
 	MSG_MAP(DramLogSetDramAddrHigh,		     PPSMC_MSG_DramLogSetDramAddrHigh),
 	MSG_MAP(DramLogSetDramAddrLow,		     PPSMC_MSG_DramLogSetDramAddrLow),
 	MSG_MAP(DramLogSetDramSize,		     PPSMC_MSG_DramLogSetDramSize),
@@ -172,6 +178,8 @@ static struct smu_11_0_cmn2aisc_mapping arcturus_table_map[SMU_TABLE_COUNT] = {
 	TAB_MAP(SMU_METRICS),
 	TAB_MAP(DRIVER_SMU_CONFIG),
 	TAB_MAP(OVERDRIVE),
+	TAB_MAP(I2C_COMMANDS),
+	TAB_MAP(ACTIVITY_MONITOR_COEFF),
 };
 
 static struct smu_11_0_cmn2aisc_mapping arcturus_pwr_src_map[SMU_POWER_SOURCE_COUNT] = {
@@ -273,10 +281,8 @@ static int arcturus_get_workload_type(struct smu_context *smu, enum PP_SMC_POWER
 		return -EINVAL;
 
 	mapping = arcturus_workload_map[profile];
-	if (!(mapping.valid_mapping)) {
-		pr_warn("Unsupported SMU power source: %d\n", profile);
+	if (!(mapping.valid_mapping))
 		return -EINVAL;
-	}
 
 	return mapping.map_to;
 }
@@ -294,6 +300,13 @@ static int arcturus_tables_init(struct smu_context *smu, struct smu_table *table
 	SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
 		       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 
+	SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t),
+			       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+
+	SMU_TABLE_INIT(tables, SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+		       sizeof(DpmActivityMonitorCoeffInt_t), PAGE_SIZE,
+		       AMDGPU_GEM_DOMAIN_VRAM);
+
 	smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);
 	if (!smu_table->metrics_table)
 		return -ENOMEM;
@@ -485,6 +498,7 @@ static int arcturus_store_powerplay_table(struct smu_context *smu)
 {
 	struct smu_11_0_powerplay_table *powerplay_table = NULL;
 	struct smu_table_context *table_context = &smu->smu_table;
+	struct smu_baco_context *smu_baco = &smu->smu_baco;
 	int ret = 0;
 
 	if (!table_context->power_play_table)
@@ -497,6 +511,12 @@ static int arcturus_store_powerplay_table(struct smu_context *smu)
 
 	table_context->thermal_controller_type = powerplay_table->thermal_controller_type;
 
+	mutex_lock(&smu_baco->mutex);
+	if (powerplay_table->platform_caps & SMU_11_0_PP_PLATFORM_CAP_BACO ||
+	    powerplay_table->platform_caps & SMU_11_0_PP_PLATFORM_CAP_MACO)
+		smu_baco->platform_support = true;
+	mutex_unlock(&smu_baco->mutex);
+
 	return ret;
 }
 
@@ -528,9 +548,17 @@ static int arcturus_append_powerplay_table(struct smu_context *smu)
 	return 0;
 }
 
-static int arcturus_run_btc_afll(struct smu_context *smu)
+static int arcturus_run_btc(struct smu_context *smu)
 {
-	return smu_send_smc_msg(smu, SMU_MSG_RunAfllBtc);
+	int ret = 0;
+
+	ret = smu_send_smc_msg(smu, SMU_MSG_RunAfllBtc);
+	if (ret) {
+		pr_err("RunAfllBtc failed!\n");
+		return ret;
+	}
+
+	return smu_send_smc_msg(smu, SMU_MSG_RunDcBtc);
 }
 
 static int arcturus_populate_umd_state_clk(struct smu_context *smu)
@@ -610,12 +638,17 @@ static int arcturus_print_clk_levels(struct smu_context *smu,
 			return ret;
 		}
 
+		/*
+		 * For DPM disabled case, there will be only one clock level.
+		 * And it's safe to assume that is always the current clock.
+		 */
 		for (i = 0; i < clocks.num_levels; i++)
 			size += sprintf(buf + size, "%d: %uMhz %s\n", i,
 					clocks.data[i].clocks_in_khz / 1000,
-					arcturus_freqs_in_same_level(
+					(clocks.num_levels == 1) ? "*" :
+					(arcturus_freqs_in_same_level(
 					clocks.data[i].clocks_in_khz / 1000,
-					now / 100) ? "*" : "");
+					now / 100) ? "*" : ""));
 		break;
 
 	case SMU_MCLK:
@@ -635,9 +668,10 @@ static int arcturus_print_clk_levels(struct smu_context *smu,
 		for (i = 0; i < clocks.num_levels; i++)
 			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, clocks.data[i].clocks_in_khz / 1000,
-				arcturus_freqs_in_same_level(
+				(clocks.num_levels == 1) ? "*" :
+				(arcturus_freqs_in_same_level(
 				clocks.data[i].clocks_in_khz / 1000,
-				now / 100) ? "*" : "");
+				now / 100) ? "*" : ""));
 		break;
 
 	case SMU_SOCCLK:
@@ -657,9 +691,10 @@ static int arcturus_print_clk_levels(struct smu_context *smu,
 		for (i = 0; i < clocks.num_levels; i++)
 			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, clocks.data[i].clocks_in_khz / 1000,
-				arcturus_freqs_in_same_level(
+				(clocks.num_levels == 1) ? "*" :
+				(arcturus_freqs_in_same_level(
 				clocks.data[i].clocks_in_khz / 1000,
-				now / 100) ? "*" : "");
+				now / 100) ? "*" : ""));
 		break;
 
 	case SMU_FCLK:
@@ -679,9 +714,10 @@ static int arcturus_print_clk_levels(struct smu_context *smu,
 		for (i = 0; i < single_dpm_table->count; i++)
 			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, single_dpm_table->dpm_levels[i].value,
-				arcturus_freqs_in_same_level(
+				(clocks.num_levels == 1) ? "*" :
+				(arcturus_freqs_in_same_level(
 				clocks.data[i].clocks_in_khz / 1000,
-				now / 100) ? "*" : "");
+				now / 100) ? "*" : ""));
 		break;
 
 	default:
@@ -756,8 +792,6 @@ static int arcturus_force_clk_levels(struct smu_context *smu,
 	uint32_t soft_min_level, soft_max_level;
 	int ret = 0;
 
-	mutex_lock(&(smu->mutex));
-
 	soft_min_level = mask ? (ffs(mask) - 1) : 0;
 	soft_max_level = mask ? (fls(mask) - 1) : 0;
 
@@ -792,91 +826,19 @@ static int arcturus_force_clk_levels(struct smu_context *smu,
 		break;
 
 	case SMU_MCLK:
-		single_dpm_table = &(dpm_table->mem_table);
-
-		if (soft_max_level >= single_dpm_table->count) {
-			pr_err("Clock level specified %d is over max allowed %d\n",
-					soft_max_level, single_dpm_table->count - 1);
-			ret = -EINVAL;
-			break;
-		}
-
-		single_dpm_table->dpm_state.soft_min_level =
-			single_dpm_table->dpm_levels[soft_min_level].value;
-		single_dpm_table->dpm_state.soft_max_level =
-			single_dpm_table->dpm_levels[soft_max_level].value;
-
-		ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_UCLK_MASK);
-		if (ret) {
-			pr_err("Failed to upload boot level to lowest!\n");
-			break;
-		}
-
-		ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_UCLK_MASK);
-		if (ret)
-			pr_err("Failed to upload dpm max level to highest!\n");
-
-		break;
-
 	case SMU_SOCCLK:
-		single_dpm_table = &(dpm_table->soc_table);
-
-		if (soft_max_level >= single_dpm_table->count) {
-			pr_err("Clock level specified %d is over max allowed %d\n",
-					soft_max_level, single_dpm_table->count - 1);
-			ret = -EINVAL;
-			break;
-		}
-
-		single_dpm_table->dpm_state.soft_min_level =
-			single_dpm_table->dpm_levels[soft_min_level].value;
-		single_dpm_table->dpm_state.soft_max_level =
-			single_dpm_table->dpm_levels[soft_max_level].value;
-
-		ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_SOCCLK_MASK);
-		if (ret) {
-			pr_err("Failed to upload boot level to lowest!\n");
-			break;
-		}
-
-		ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_SOCCLK_MASK);
-		if (ret)
-			pr_err("Failed to upload dpm max level to highest!\n");
-
-		break;
-
 	case SMU_FCLK:
-		single_dpm_table = &(dpm_table->fclk_table);
-
-		if (soft_max_level >= single_dpm_table->count) {
-			pr_err("Clock level specified %d is over max allowed %d\n",
-					soft_max_level, single_dpm_table->count - 1);
-			ret = -EINVAL;
-			break;
-		}
-
-		single_dpm_table->dpm_state.soft_min_level =
-			single_dpm_table->dpm_levels[soft_min_level].value;
-		single_dpm_table->dpm_state.soft_max_level =
-			single_dpm_table->dpm_levels[soft_max_level].value;
-
-		ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_FCLK_MASK);
-		if (ret) {
-			pr_err("Failed to upload boot level to lowest!\n");
-			break;
-		}
-
-		ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_FCLK_MASK);
-		if (ret)
-			pr_err("Failed to upload dpm max level to highest!\n");
-
+		/*
+		 * Should not arrive here since Arcturus does not
+		 * support mclk/socclk/fclk softmin/softmax settings
+		 */
+		ret = -EINVAL;
 		break;
 
 	default:
 		break;
 	}
 
-	mutex_unlock(&(smu->mutex));
 	return ret;
 }
 
@@ -910,18 +872,21 @@ static int arcturus_get_metrics_table(struct smu_context *smu,
 	struct smu_table_context *smu_table= &smu->smu_table;
 	int ret = 0;
 
+	mutex_lock(&smu->metrics_lock);
 	if (!smu_table->metrics_time ||
 	     time_after(jiffies, smu_table->metrics_time + HZ / 1000)) {
 		ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
 				(void *)smu_table->metrics_table, false);
 		if (ret) {
 			pr_info("Failed to export SMU metrics table!\n");
+			mutex_unlock(&smu->metrics_lock);
 			return ret;
 		}
 		smu_table->metrics_time = jiffies;
 	}
 
 	memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
+	mutex_unlock(&smu->metrics_lock);
 
 	return ret;
 }
@@ -1043,7 +1008,7 @@ static int arcturus_read_sensor(struct smu_context *smu,
 		*size = 4;
 		break;
 	default:
-		ret = smu_smc_read_sensor(smu, sensor, data, size);
+		ret = smu_v11_0_read_sensor(smu, sensor, data, size);
 	}
 	mutex_unlock(&smu->sensor_lock);
 
@@ -1186,6 +1151,7 @@ static int arcturus_force_dpm_limit_value(struct smu_context *smu, bool highest)
 {
 	struct arcturus_dpm_table *dpm_table =
 		(struct arcturus_dpm_table *)smu->smu_dpm.dpm_context;
+	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(smu->adev, 0);
 	uint32_t soft_level;
 	int ret = 0;
 
@@ -1199,40 +1165,27 @@ static int arcturus_force_dpm_limit_value(struct smu_context *smu, bool highest)
 		dpm_table->gfx_table.dpm_state.soft_max_level =
 		dpm_table->gfx_table.dpm_levels[soft_level].value;
 
-	/* uclk */
-	if (highest)
-		soft_level = arcturus_find_highest_dpm_level(&(dpm_table->mem_table));
-	else
-		soft_level = arcturus_find_lowest_dpm_level(&(dpm_table->mem_table));
-
-	dpm_table->mem_table.dpm_state.soft_min_level =
-		dpm_table->mem_table.dpm_state.soft_max_level =
-		dpm_table->mem_table.dpm_levels[soft_level].value;
-
-	/* socclk */
-	if (highest)
-		soft_level = arcturus_find_highest_dpm_level(&(dpm_table->soc_table));
-	else
-		soft_level = arcturus_find_lowest_dpm_level(&(dpm_table->soc_table));
-
-	dpm_table->soc_table.dpm_state.soft_min_level =
-		dpm_table->soc_table.dpm_state.soft_max_level =
-		dpm_table->soc_table.dpm_levels[soft_level].value;
-
-	ret = arcturus_upload_dpm_level(smu, false, 0xFFFFFFFF);
+	ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_GFXCLK_MASK);
 	if (ret) {
 		pr_err("Failed to upload boot level to %s!\n",
 				highest ? "highest" : "lowest");
 		return ret;
 	}
 
-	ret = arcturus_upload_dpm_level(smu, true, 0xFFFFFFFF);
+	ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_GFXCLK_MASK);
 	if (ret) {
 		pr_err("Failed to upload dpm max level to %s!\n!",
 				highest ? "highest" : "lowest");
 		return ret;
 	}
 
+	if (hive)
+		/*
+		 * Force XGMI Pstate to highest or lowest
+		 * TODO: revise this when xgmi dpm is functional
+		 */
+		ret = smu_v11_0_set_xgmi_pstate(smu, highest ? 1 : 0);
+
 	return ret;
 }
 
@@ -1240,6 +1193,7 @@ static int arcturus_unforce_dpm_levels(struct smu_context *smu)
 {
 	struct arcturus_dpm_table *dpm_table =
 		(struct arcturus_dpm_table *)smu->smu_dpm.dpm_context;
+	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(smu->adev, 0);
 	uint32_t soft_min_level, soft_max_level;
 	int ret = 0;
 
@@ -1251,34 +1205,25 @@ static int arcturus_unforce_dpm_levels(struct smu_context *smu)
 	dpm_table->gfx_table.dpm_state.soft_max_level =
 		dpm_table->gfx_table.dpm_levels[soft_max_level].value;
 
-	/* uclk */
-	soft_min_level = arcturus_find_lowest_dpm_level(&(dpm_table->mem_table));
-	soft_max_level = arcturus_find_highest_dpm_level(&(dpm_table->mem_table));
-	dpm_table->mem_table.dpm_state.soft_min_level =
-		dpm_table->gfx_table.dpm_levels[soft_min_level].value;
-	dpm_table->mem_table.dpm_state.soft_max_level =
-		dpm_table->gfx_table.dpm_levels[soft_max_level].value;
-
-	/* socclk */
-	soft_min_level = arcturus_find_lowest_dpm_level(&(dpm_table->soc_table));
-	soft_max_level = arcturus_find_highest_dpm_level(&(dpm_table->soc_table));
-	dpm_table->soc_table.dpm_state.soft_min_level =
-		dpm_table->soc_table.dpm_levels[soft_min_level].value;
-	dpm_table->soc_table.dpm_state.soft_max_level =
-		dpm_table->soc_table.dpm_levels[soft_max_level].value;
-
-	ret = arcturus_upload_dpm_level(smu, false, 0xFFFFFFFF);
+	ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_GFXCLK_MASK);
 	if (ret) {
 		pr_err("Failed to upload DPM Bootup Levels!");
 		return ret;
 	}
 
-	ret = arcturus_upload_dpm_level(smu, true, 0xFFFFFFFF);
+	ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_GFXCLK_MASK);
 	if (ret) {
 		pr_err("Failed to upload DPM Max Levels!");
 		return ret;
 	}
 
+	if (hive)
+		/*
+		 * Reset XGMI Pstate back to default
+		 * TODO: revise this when xgmi dpm is functional
+		 */
+		ret = smu_v11_0_set_xgmi_pstate(smu, 0);
+
 	return ret;
 }
 
@@ -1329,15 +1274,14 @@ arcturus_get_profiling_clk_mask(struct smu_context *smu,
 
 static int arcturus_get_power_limit(struct smu_context *smu,
 				     uint32_t *limit,
-				     bool asic_default)
+				     bool cap)
 {
 	PPTable_t *pptable = smu->smu_table.driver_pptable;
 	uint32_t asic_default_power_limit = 0;
 	int ret = 0;
 	int power_src;
 
-	if (!smu->default_power_limit ||
-	    !smu->power_limit) {
+	if (!smu->power_limit) {
 		if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) {
 			power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC);
 			if (power_src < 0)
@@ -1360,17 +1304,11 @@ static int arcturus_get_power_limit(struct smu_context *smu,
 				pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0];
 		}
 
-		if (smu->od_enabled) {
-			asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit);
-			asic_default_power_limit /= 100;
-		}
-
-		smu->default_power_limit = asic_default_power_limit;
 		smu->power_limit = asic_default_power_limit;
 	}
 
-	if (asic_default)
-		*limit = smu->default_power_limit;
+	if (cap)
+		*limit = smu_v11_0_get_max_power_limit(smu);
 	else
 		*limit = smu->power_limit;
 
@@ -1380,6 +1318,8 @@ static int arcturus_get_power_limit(struct smu_context *smu,
 static int arcturus_get_power_profile_mode(struct smu_context *smu,
 					   char *buf)
 {
+	struct amdgpu_device *adev = smu->adev;
+	DpmActivityMonitorCoeffInt_t activity_monitor;
 	static const char *profile_name[] = {
 					"BOOTUP_DEFAULT",
 					"3D_FULL_SCREEN",
@@ -1388,12 +1328,38 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu,
 					"VR",
 					"COMPUTE",
 					"CUSTOM"};
+	static const char *title[] = {
+			"PROFILE_INDEX(NAME)",
+			"CLOCK_TYPE(NAME)",
+			"FPS",
+			"UseRlcBusy",
+			"MinActiveFreqType",
+			"MinActiveFreq",
+			"BoosterFreqType",
+			"BoosterFreq",
+			"PD_Data_limit_c",
+			"PD_Data_error_coeff",
+			"PD_Data_error_rate_coeff"};
 	uint32_t i, size = 0;
 	int16_t workload_type = 0;
+	int result = 0;
+	uint32_t smu_version;
 
-	if (!smu->pm_enabled || !buf)
+	if (!buf)
 		return -EINVAL;
 
+	result = smu_get_smc_version(smu, NULL, &smu_version);
+	if (result)
+		return result;
+
+	if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev))
+		size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n",
+			title[0], title[1], title[2], title[3], title[4], title[5],
+			title[6], title[7], title[8], title[9], title[10]);
+	else
+		size += sprintf(buf + size, "%16s\n",
+			title[0]);
+
 	for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) {
 		/*
 		 * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT
@@ -1403,8 +1369,50 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu,
 		if (workload_type < 0)
 			continue;
 
+		if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) {
+			result = smu_update_table(smu,
+						  SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+						  workload_type,
+						  (void *)(&activity_monitor),
+						  false);
+			if (result) {
+				pr_err("[%s] Failed to get activity monitor!", __func__);
+				return result;
+			}
+		}
+
 		size += sprintf(buf + size, "%2d %14s%s\n",
 			i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " ");
+
+		if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) {
+			size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n",
+				" ",
+				0,
+				"GFXCLK",
+				activity_monitor.Gfx_FPS,
+				activity_monitor.Gfx_UseRlcBusy,
+				activity_monitor.Gfx_MinActiveFreqType,
+				activity_monitor.Gfx_MinActiveFreq,
+				activity_monitor.Gfx_BoosterFreqType,
+				activity_monitor.Gfx_BoosterFreq,
+				activity_monitor.Gfx_PD_Data_limit_c,
+				activity_monitor.Gfx_PD_Data_error_coeff,
+				activity_monitor.Gfx_PD_Data_error_rate_coeff);
+
+			size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n",
+				" ",
+				1,
+				"UCLK",
+				activity_monitor.Mem_FPS,
+				activity_monitor.Mem_UseRlcBusy,
+				activity_monitor.Mem_MinActiveFreqType,
+				activity_monitor.Mem_MinActiveFreq,
+				activity_monitor.Mem_BoosterFreqType,
+				activity_monitor.Mem_BoosterFreq,
+				activity_monitor.Mem_PD_Data_limit_c,
+				activity_monitor.Mem_PD_Data_error_coeff,
+				activity_monitor.Mem_PD_Data_error_rate_coeff);
+		}
 	}
 
 	return size;
@@ -1414,18 +1422,69 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu,
 					   long *input,
 					   uint32_t size)
 {
+	DpmActivityMonitorCoeffInt_t activity_monitor;
 	int workload_type = 0;
 	uint32_t profile_mode = input[size];
 	int ret = 0;
-
-	if (!smu->pm_enabled)
-		return -EINVAL;
+	uint32_t smu_version;
 
 	if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
 		pr_err("Invalid power profile mode %d\n", profile_mode);
 		return -EINVAL;
 	}
 
+	ret = smu_get_smc_version(smu, NULL, &smu_version);
+	if (ret)
+		return ret;
+
+	if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) &&
+	     (smu_version >=0x360d00)) {
+		ret = smu_update_table(smu,
+				       SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+				       WORKLOAD_PPLIB_CUSTOM_BIT,
+				       (void *)(&activity_monitor),
+				       false);
+		if (ret) {
+			pr_err("[%s] Failed to get activity monitor!", __func__);
+			return ret;
+		}
+
+		switch (input[0]) {
+		case 0: /* Gfxclk */
+			activity_monitor.Gfx_FPS = input[1];
+			activity_monitor.Gfx_UseRlcBusy = input[2];
+			activity_monitor.Gfx_MinActiveFreqType = input[3];
+			activity_monitor.Gfx_MinActiveFreq = input[4];
+			activity_monitor.Gfx_BoosterFreqType = input[5];
+			activity_monitor.Gfx_BoosterFreq = input[6];
+			activity_monitor.Gfx_PD_Data_limit_c = input[7];
+			activity_monitor.Gfx_PD_Data_error_coeff = input[8];
+			activity_monitor.Gfx_PD_Data_error_rate_coeff = input[9];
+			break;
+		case 1: /* Uclk */
+			activity_monitor.Mem_FPS = input[1];
+			activity_monitor.Mem_UseRlcBusy = input[2];
+			activity_monitor.Mem_MinActiveFreqType = input[3];
+			activity_monitor.Mem_MinActiveFreq = input[4];
+			activity_monitor.Mem_BoosterFreqType = input[5];
+			activity_monitor.Mem_BoosterFreq = input[6];
+			activity_monitor.Mem_PD_Data_limit_c = input[7];
+			activity_monitor.Mem_PD_Data_error_coeff = input[8];
+			activity_monitor.Mem_PD_Data_error_rate_coeff = input[9];
+			break;
+		}
+
+		ret = smu_update_table(smu,
+				       SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+				       WORKLOAD_PPLIB_CUSTOM_BIT,
+				       (void *)(&activity_monitor),
+				       true);
+		if (ret) {
+			pr_err("[%s] Failed to set activity monitor!", __func__);
+			return ret;
+		}
+	}
+
 	/*
 	 * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT
 	 * Not all profile modes are supported on arcturus.
@@ -1891,6 +1950,270 @@ static bool arcturus_is_dpm_running(struct smu_context *smu)
 	return !!(feature_enabled & SMC_DPM_FEATURE);
 }
 
+static int arcturus_dpm_set_uvd_enable(struct smu_context *smu, bool enable)
+{
+	struct smu_power_context *smu_power = &smu->smu_power;
+	struct smu_power_gate *power_gate = &smu_power->power_gate;
+	int ret = 0;
+
+	if (enable) {
+		if (!smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) {
+			ret = smu_feature_set_enabled(smu, SMU_FEATURE_VCN_PG_BIT, 1);
+			if (ret) {
+				pr_err("[EnableVCNDPM] failed!\n");
+				return ret;
+			}
+		}
+		power_gate->vcn_gated = false;
+	} else {
+		if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) {
+			ret = smu_feature_set_enabled(smu, SMU_FEATURE_VCN_PG_BIT, 0);
+			if (ret) {
+				pr_err("[DisableVCNDPM] failed!\n");
+				return ret;
+			}
+		}
+		power_gate->vcn_gated = true;
+	}
+
+	return ret;
+}
+
+
+static void arcturus_fill_eeprom_i2c_req(SwI2cRequest_t  *req, bool write,
+				  uint8_t address, uint32_t numbytes,
+				  uint8_t *data)
+{
+	int i;
+
+	BUG_ON(numbytes > MAX_SW_I2C_COMMANDS);
+
+	req->I2CcontrollerPort = 0;
+	req->I2CSpeed = 2;
+	req->SlaveAddress = address;
+	req->NumCmds = numbytes;
+
+	for (i = 0; i < numbytes; i++) {
+		SwI2cCmd_t *cmd =  &req->SwI2cCmds[i];
+
+		/* First 2 bytes are always write for lower 2b EEPROM address */
+		if (i < 2)
+			cmd->Cmd = 1;
+		else
+			cmd->Cmd = write;
+
+
+		/* Add RESTART for read  after address filled */
+		cmd->CmdConfig |= (i == 2 && !write) ? CMDCONFIG_RESTART_MASK : 0;
+
+		/* Add STOP in the end */
+		cmd->CmdConfig |= (i == (numbytes - 1)) ? CMDCONFIG_STOP_MASK : 0;
+
+		/* Fill with data regardless if read or write to simplify code */
+		cmd->RegisterAddr = data[i];
+	}
+}
+
+static int arcturus_i2c_eeprom_read_data(struct i2c_adapter *control,
+					       uint8_t address,
+					       uint8_t *data,
+					       uint32_t numbytes)
+{
+	uint32_t  i, ret = 0;
+	SwI2cRequest_t req;
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	struct smu_table_context *smu_table = &adev->smu.smu_table;
+	struct smu_table *table = &smu_table->driver_table;
+
+	memset(&req, 0, sizeof(req));
+	arcturus_fill_eeprom_i2c_req(&req, false, address, numbytes, data);
+
+	mutex_lock(&adev->smu.mutex);
+	/* Now read data starting with that address */
+	ret = smu_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, &req,
+					true);
+	mutex_unlock(&adev->smu.mutex);
+
+	if (!ret) {
+		SwI2cRequest_t *res = (SwI2cRequest_t *)table->cpu_addr;
+
+		/* Assume SMU  fills res.SwI2cCmds[i].Data with read bytes */
+		for (i = 0; i < numbytes; i++)
+			data[i] = res->SwI2cCmds[i].Data;
+
+		pr_debug("arcturus_i2c_eeprom_read_data, address = %x, bytes = %d, data :",
+				  (uint16_t)address, numbytes);
+
+		print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE,
+			       8, 1, data, numbytes, false);
+	} else
+		pr_err("arcturus_i2c_eeprom_read_data - error occurred :%x", ret);
+
+	return ret;
+}
+
+static int arcturus_i2c_eeprom_write_data(struct i2c_adapter *control,
+						uint8_t address,
+						uint8_t *data,
+						uint32_t numbytes)
+{
+	uint32_t ret;
+	SwI2cRequest_t req;
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+
+	memset(&req, 0, sizeof(req));
+	arcturus_fill_eeprom_i2c_req(&req, true, address, numbytes, data);
+
+	mutex_lock(&adev->smu.mutex);
+	ret = smu_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, &req, true);
+	mutex_unlock(&adev->smu.mutex);
+
+	if (!ret) {
+		pr_debug("arcturus_i2c_write(), address = %x, bytes = %d , data: ",
+					 (uint16_t)address, numbytes);
+
+		print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE,
+			       8, 1, data, numbytes, false);
+		/*
+		 * According to EEPROM spec there is a MAX of 10 ms required for
+		 * EEPROM to flush internal RX buffer after STOP was issued at the
+		 * end of write transaction. During this time the EEPROM will not be
+		 * responsive to any more commands - so wait a bit more.
+		 */
+		msleep(10);
+
+	} else
+		pr_err("arcturus_i2c_write- error occurred :%x", ret);
+
+	return ret;
+}
+
+static int arcturus_i2c_eeprom_i2c_xfer(struct i2c_adapter *i2c_adap,
+			      struct i2c_msg *msgs, int num)
+{
+	uint32_t  i, j, ret, data_size, data_chunk_size, next_eeprom_addr = 0;
+	uint8_t *data_ptr, data_chunk[MAX_SW_I2C_COMMANDS] = { 0 };
+
+	for (i = 0; i < num; i++) {
+		/*
+		 * SMU interface allows at most MAX_SW_I2C_COMMANDS bytes of data at
+		 * once and hence the data needs to be spliced into chunks and sent each
+		 * chunk separately
+		 */
+		data_size = msgs[i].len - 2;
+		data_chunk_size = MAX_SW_I2C_COMMANDS - 2;
+		next_eeprom_addr = (msgs[i].buf[0] << 8 & 0xff00) | (msgs[i].buf[1] & 0xff);
+		data_ptr = msgs[i].buf + 2;
+
+		for (j = 0; j < data_size / data_chunk_size; j++) {
+			/* Insert the EEPROM dest addess, bits 0-15 */
+			data_chunk[0] = ((next_eeprom_addr >> 8) & 0xff);
+			data_chunk[1] = (next_eeprom_addr & 0xff);
+
+			if (msgs[i].flags & I2C_M_RD) {
+				ret = arcturus_i2c_eeprom_read_data(i2c_adap,
+								(uint8_t)msgs[i].addr,
+								data_chunk, MAX_SW_I2C_COMMANDS);
+
+				memcpy(data_ptr, data_chunk + 2, data_chunk_size);
+			} else {
+
+				memcpy(data_chunk + 2, data_ptr, data_chunk_size);
+
+				ret = arcturus_i2c_eeprom_write_data(i2c_adap,
+								 (uint8_t)msgs[i].addr,
+								 data_chunk, MAX_SW_I2C_COMMANDS);
+			}
+
+			if (ret) {
+				num = -EIO;
+				goto fail;
+			}
+
+			next_eeprom_addr += data_chunk_size;
+			data_ptr += data_chunk_size;
+		}
+
+		if (data_size % data_chunk_size) {
+			data_chunk[0] = ((next_eeprom_addr >> 8) & 0xff);
+			data_chunk[1] = (next_eeprom_addr & 0xff);
+
+			if (msgs[i].flags & I2C_M_RD) {
+				ret = arcturus_i2c_eeprom_read_data(i2c_adap,
+								(uint8_t)msgs[i].addr,
+								data_chunk, (data_size % data_chunk_size) + 2);
+
+				memcpy(data_ptr, data_chunk + 2, data_size % data_chunk_size);
+			} else {
+				memcpy(data_chunk + 2, data_ptr, data_size % data_chunk_size);
+
+				ret = arcturus_i2c_eeprom_write_data(i2c_adap,
+								 (uint8_t)msgs[i].addr,
+								 data_chunk, (data_size % data_chunk_size) + 2);
+			}
+
+			if (ret) {
+				num = -EIO;
+				goto fail;
+			}
+		}
+	}
+
+fail:
+	return num;
+}
+
+static u32 arcturus_i2c_eeprom_i2c_func(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+
+static const struct i2c_algorithm arcturus_i2c_eeprom_i2c_algo = {
+	.master_xfer = arcturus_i2c_eeprom_i2c_xfer,
+	.functionality = arcturus_i2c_eeprom_i2c_func,
+};
+
+static int arcturus_i2c_eeprom_control_init(struct i2c_adapter *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	struct smu_context *smu = &adev->smu;
+	int res;
+
+	if (!smu->pm_enabled)
+		return -EOPNOTSUPP;
+
+	control->owner = THIS_MODULE;
+	control->class = I2C_CLASS_SPD;
+	control->dev.parent = &adev->pdev->dev;
+	control->algo = &arcturus_i2c_eeprom_i2c_algo;
+	snprintf(control->name, sizeof(control->name), "RAS EEPROM");
+
+	res = i2c_add_adapter(control);
+	if (res)
+		DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
+
+	return res;
+}
+
+static void arcturus_i2c_eeprom_control_fini(struct i2c_adapter *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	struct smu_context *smu = &adev->smu;
+
+	if (!smu->pm_enabled)
+		return;
+
+	i2c_del_adapter(control);
+}
+
+static uint32_t arcturus_get_pptable_power_limit(struct smu_context *smu)
+{
+	PPTable_t *pptable = smu->smu_table.driver_pptable;
+
+	return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0];
+}
+
 static const struct pptable_funcs arcturus_ppt_funcs = {
 	/* translate smu index into arcturus specific index */
 	.get_smu_msg_index = arcturus_get_smu_msg_index,
@@ -1909,7 +2232,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
 	/* init dpm */
 	.get_allowed_feature_mask = arcturus_get_allowed_feature_mask,
 	/* btc */
-	.run_afll_btc = arcturus_run_btc_afll,
+	.run_btc = arcturus_run_btc,
 	/* dpm/clk tables */
 	.set_default_dpm_table = arcturus_set_default_dpm_table,
 	.populate_umd_state_clk = arcturus_populate_umd_state_clk,
@@ -1925,16 +2248,68 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
 	.get_profiling_clk_mask = arcturus_get_profiling_clk_mask,
 	.get_power_profile_mode = arcturus_get_power_profile_mode,
 	.set_power_profile_mode = arcturus_set_power_profile_mode,
+	.set_performance_level = smu_v11_0_set_performance_level,
 	/* debug (internal used) */
 	.dump_pptable = arcturus_dump_pptable,
 	.get_power_limit = arcturus_get_power_limit,
 	.is_dpm_running = arcturus_is_dpm_running,
+	.dpm_set_uvd_enable = arcturus_dpm_set_uvd_enable,
+	.i2c_eeprom_init = arcturus_i2c_eeprom_control_init,
+	.i2c_eeprom_fini = arcturus_i2c_eeprom_control_fini,
+	.init_microcode = smu_v11_0_init_microcode,
+	.load_microcode = smu_v11_0_load_microcode,
+	.init_smc_tables = smu_v11_0_init_smc_tables,
+	.fini_smc_tables = smu_v11_0_fini_smc_tables,
+	.init_power = smu_v11_0_init_power,
+	.fini_power = smu_v11_0_fini_power,
+	.check_fw_status = smu_v11_0_check_fw_status,
+	.setup_pptable = smu_v11_0_setup_pptable,
+	.get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values,
+	.get_clk_info_from_vbios = smu_v11_0_get_clk_info_from_vbios,
+	.check_pptable = smu_v11_0_check_pptable,
+	.parse_pptable = smu_v11_0_parse_pptable,
+	.populate_smc_tables = smu_v11_0_populate_smc_pptable,
+	.check_fw_version = smu_v11_0_check_fw_version,
+	.write_pptable = smu_v11_0_write_pptable,
+	.set_min_dcef_deep_sleep = smu_v11_0_set_min_dcef_deep_sleep,
+	.set_driver_table_location = smu_v11_0_set_driver_table_location,
+	.set_tool_table_location = smu_v11_0_set_tool_table_location,
+	.notify_memory_pool_location = smu_v11_0_notify_memory_pool_location,
+	.system_features_control = smu_v11_0_system_features_control,
+	.send_smc_msg_with_param = smu_v11_0_send_msg_with_param,
+	.read_smc_arg = smu_v11_0_read_arg,
+	.init_display_count = smu_v11_0_init_display_count,
+	.set_allowed_mask = smu_v11_0_set_allowed_mask,
+	.get_enabled_mask = smu_v11_0_get_enabled_mask,
+	.notify_display_change = smu_v11_0_notify_display_change,
+	.set_power_limit = smu_v11_0_set_power_limit,
+	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
+	.init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
+	.start_thermal_control = smu_v11_0_start_thermal_control,
+	.stop_thermal_control = smu_v11_0_stop_thermal_control,
+	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
+	.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
+	.get_fan_control_mode = smu_v11_0_get_fan_control_mode,
+	.set_fan_control_mode = smu_v11_0_set_fan_control_mode,
+	.set_fan_speed_percent = smu_v11_0_set_fan_speed_percent,
+	.set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm,
+	.set_xgmi_pstate = smu_v11_0_set_xgmi_pstate,
+	.gfx_off_control = smu_v11_0_gfx_off_control,
+	.register_irq_handler = smu_v11_0_register_irq_handler,
+	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
+	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
+	.baco_is_support= smu_v11_0_baco_is_support,
+	.baco_get_state = smu_v11_0_baco_get_state,
+	.baco_set_state = smu_v11_0_baco_set_state,
+	.baco_enter = smu_v11_0_baco_enter,
+	.baco_exit = smu_v11_0_baco_exit,
+	.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
+	.set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range,
+	.override_pcie_parameters = smu_v11_0_override_pcie_parameters,
+	.get_pptable_power_limit = arcturus_get_pptable_power_limit,
 };
 
 void arcturus_set_ppt_funcs(struct smu_context *smu)
 {
-	struct smu_table_context *smu_table = &smu->smu_table;
-
 	smu->ppt_funcs = &arcturus_ppt_funcs;
-	smu_table->table_count = TABLE_COUNT;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile
index cc63705920dc..2773966ae434 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile
@@ -36,7 +36,8 @@ HARDWARE_MGR = hwmgr.o processpptables.o \
 		pp_overdriver.o smu_helper.o \
 		vega20_processpptables.o vega20_hwmgr.o vega20_powertune.o \
 		vega20_thermal.o common_baco.o vega10_baco.o  vega20_baco.o \
-		vega12_baco.o smu9_baco.o
+		vega12_baco.o smu9_baco.o tonga_baco.o polaris_baco.o fiji_baco.o \
+		ci_baco.o smu7_baco.o
 
 AMD_PP_HWMGR = $(addprefix $(AMD_PP_PATH)/hwmgr/,$(HARDWARE_MGR))
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ci_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ci_baco.c
new file mode 100644
index 000000000000..3be40114e63d
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ci_baco.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "ci_baco.h"
+
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+
+#include "bif/bif_4_1_d.h"
+#include "bif/bif_4_1_sh_mask.h"
+
+#include "dce/dce_8_0_d.h"
+#include "dce/dce_8_0_sh_mask.h"
+
+#include "smu/smu_7_0_1_d.h"
+#include "smu/smu_7_0_1_sh_mask.h"
+
+#include "gca/gfx_7_2_d.h"
+#include "gca/gfx_7_2_sh_mask.h"
+
+static const struct baco_cmd_entry gpio_tbl[] =
+{
+	{ CMD_WRITE, mmGPIOPAD_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_PD_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_PU_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_MASK, 0, 0, 0, 0xff77ffff },
+	{ CMD_WRITE, mmDC_GPIO_DVODATA_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmDC_GPIO_DVODATA_MASK, 0, 0, 0, 0xffffffff },
+	{ CMD_WRITE, mmDC_GPIO_GENERIC_EN, 0, 0, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmDC_GPIO_GENERIC_MASK, 0, 0, 0, 0x03333333 },
+	{ CMD_WRITE, mmDC_GPIO_SYNCA_EN, 0, 0, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmDC_GPIO_SYNCA_MASK, 0, 0, 0, 0x00001111 }
+};
+
+static const struct baco_cmd_entry enable_fb_req_rej_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0300024 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x1, 0x0, 0, 0x1 },
+	{ CMD_WRITE, mmBIF_FB_EN, 0, 0, 0, 0x0 }
+};
+
+static const struct baco_cmd_entry use_bclk_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK, CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_STATUS },
+	{ CMD_WAITFOR, mmGCK_SMC_IND_DATA, CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK, 0, 0xffffffff, 0x2 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG__SHIFT, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_STATUS },
+	{ CMD_WAITFOR, mmGCK_SMC_IND_DATA, CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK, 0, 0xffffffff, 0x2 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0500170 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x4000000, 0x1a, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMPLL_BYPASSCLK_SEL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT, 0, 0x2 },
+	{ CMD_READMODIFYWRITE, mmMPLL_CNTL_MODE, MPLL_CNTL_MODE__MPLL_SW_DIR_CONTROL_MASK, MPLL_CNTL_MODE__MPLL_SW_DIR_CONTROL__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmMPLL_CNTL_MODE, MPLL_CNTL_MODE__MPLL_MCLK_SEL_MASK, MPLL_CNTL_MODE__MPLL_MCLK_SEL__SHIFT, 0, 0x0 }
+};
+
+static const struct baco_cmd_entry turn_off_plls_tbl[] =
+{
+	{ CMD_READMODIFYWRITE, mmDISPPLL_BG_CNTL, DISPPLL_BG_CNTL__DISPPLL_BG_PDN_MASK, DISPPLL_BG_CNTL__DISPPLL_BG_PDN__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_CLKPIN_CNTL_DC },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_CLKPIN_CNTL_DC__OSC_EN_MASK, CG_CLKPIN_CNTL_DC__OSC_EN__SHIFT, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_CLKPIN_CNTL_DC__XTALIN_SEL_MASK, CG_CLKPIN_CNTL_DC__XTALIN_SEL__SHIFT, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmPLL_CNTL, PLL_CNTL__PLL_RESET_MASK, PLL_CNTL__PLL_RESET__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmPLL_CNTL, PLL_CNTL__PLL_POWER_DOWN_MASK, PLL_CNTL__PLL_POWER_DOWN__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmPLL_CNTL, PLL_CNTL__PLL_BYPASS_CAL_MASK, PLL_CNTL__PLL_BYPASS_CAL__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK, CG_SPLL_FUNC_CNTL__SPLL_RESET__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL__SPLL_PWRON_MASK, CG_SPLL_FUNC_CNTL__SPLL_PWRON__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0500170 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x2000000, 0x19, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x8000000, 0x1b, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmMPLL_CNTL_MODE, MPLL_CNTL_MODE__GLOBAL_MPLL_RESET_MASK, MPLL_CNTL_MODE__GLOBAL_MPLL_RESET__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmMPLL_CONTROL, 0, 0, 0, 0x00000006 },
+	{ CMD_WRITE, mmMC_IO_RXCNTL_DPHY0_D0, 0, 0, 0, 0x00007740 },
+	{ CMD_WRITE, mmMC_IO_RXCNTL_DPHY0_D1, 0, 0, 0, 0x00007740 },
+	{ CMD_WRITE, mmMC_IO_RXCNTL_DPHY1_D0, 0, 0, 0, 0x00007740 },
+	{ CMD_WRITE, mmMC_IO_RXCNTL_DPHY1_D1, 0, 0, 0, 0x00007740 },
+	{ CMD_READMODIFYWRITE, mmMCLK_PWRMGT_CNTL, MCLK_PWRMGT_CNTL__MRDCK0_PDNB_MASK, MCLK_PWRMGT_CNTL__MRDCK0_PDNB__SHIFT, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmMCLK_PWRMGT_CNTL, MCLK_PWRMGT_CNTL__MRDCK1_PDNB_MASK, MCLK_PWRMGT_CNTL__MRDCK1_PDNB__SHIFT, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmMC_SEQ_CNTL_2, MC_SEQ_CNTL_2__DRST_PU_MASK, MC_SEQ_CNTL_2__DRST_PU__SHIFT, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmMC_SEQ_CNTL_2, MC_SEQ_CNTL_2__DRST_PD_MASK, MC_SEQ_CNTL_2__DRST_PD__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_CLKPIN_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK, CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMPLL_BYPASSCLK_SEL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT, 0, 0x4 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMISC_CLK_CTRL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL_MASK, MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL__SHIFT, 0, 0x2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MISC_CLK_CTRL__ZCLK_SEL_MASK, MISC_CLK_CTRL__ZCLK_SEL__SHIFT, 0, 0x2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MISC_CLK_CTRL__DFT_SMS_PG_CLK_SEL_MASK, MISC_CLK_CTRL__DFT_SMS_PG_CLK_SEL__SHIFT, 0, 0x2 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixTHM_CLK_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, THM_CLK_CNTL__CMON_CLK_SEL_MASK, THM_CLK_CNTL__CMON_CLK_SEL__SHIFT, 0, 0x2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, THM_CLK_CNTL__TMON_CLK_SEL_MASK, THM_CLK_CNTL__TMON_CLK_SEL__SHIFT, 0, 0x2 }
+};
+
+static const struct baco_cmd_entry enter_baco_tbl[] =
+{
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, BACO_CNTL__BACO_BCLK_OFF__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR,         mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, 0, 5, 0x02 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, BACO_CNTL__BACO_ISO_DIS__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR,         mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, 0, 5,  0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, BACO_CNTL__BACO_ANA_ISO_DIS__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR,         mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, 0, 5, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR,         mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, 0, 5, 0x08 },
+	{ CMD_WAITFOR,         mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0x40 }
+};
+
+#define BACO_CNTL__PWRGOOD_MASK  BACO_CNTL__PWRGOOD_GPIO_MASK+BACO_CNTL__PWRGOOD_MEM_MASK+BACO_CNTL__PWRGOOD_DVO_MASK
+
+static const struct baco_cmd_entry exit_baco_tbl[] =
+{
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_RESET_EN_MASK, BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK,           BACO_CNTL__BACO_BCLK_OFF__SHIFT, 0, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK,          BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0x00 },
+	{ CMD_DELAY_MS, 0, 0, 0, 20, 0 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_BF_MASK, 0, 0xffffffff, 0x20 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, BACO_CNTL__BACO_ISO_DIS__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_MASK, 0, 5, 0x1c },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, BACO_CNTL__BACO_ANA_ISO_DIS__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_RESET_EN_MASK, BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__RCU_BIF_CONFIG_DONE_MASK, 0, 5, 0x10 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0x00 }
+};
+
+static const struct baco_cmd_entry clean_baco_tbl[] =
+{
+	{ CMD_WRITE, mmBIOS_SCRATCH_6, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmCP_PFP_UCODE_ADDR, 0, 0, 0, 0 }
+};
+
+int ci_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
+{
+	enum BACO_STATE cur_state;
+
+	smu7_baco_get_state(hwmgr, &cur_state);
+
+	if (cur_state == state)
+		/* aisc already in the target state */
+		return 0;
+
+	if (state == BACO_STATE_IN) {
+		baco_program_registers(hwmgr, gpio_tbl, ARRAY_SIZE(gpio_tbl));
+		baco_program_registers(hwmgr, enable_fb_req_rej_tbl,
+				       ARRAY_SIZE(enable_fb_req_rej_tbl));
+		baco_program_registers(hwmgr, use_bclk_tbl, ARRAY_SIZE(use_bclk_tbl));
+		baco_program_registers(hwmgr, turn_off_plls_tbl,
+				       ARRAY_SIZE(turn_off_plls_tbl));
+		if (baco_program_registers(hwmgr, enter_baco_tbl,
+					   ARRAY_SIZE(enter_baco_tbl)))
+			return 0;
+
+	} else if (state == BACO_STATE_OUT) {
+		/* HW requires at least 20ms between regulator off and on */
+		msleep(20);
+		/* Execute Hardware BACO exit sequence */
+		if (baco_program_registers(hwmgr, exit_baco_tbl,
+					   ARRAY_SIZE(exit_baco_tbl))) {
+			if (baco_program_registers(hwmgr, clean_baco_tbl,
+						   ARRAY_SIZE(clean_baco_tbl)))
+				return 0;
+		}
+	}
+
+	return -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ci_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ci_baco.h
new file mode 100644
index 000000000000..17041f187020
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ci_baco.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __CI_BACO_H__
+#define __CI_BACO_H__
+#include "smu7_baco.h"
+
+extern int ci_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
+
+#endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c
index 9c57c1f67749..1c73776bd606 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c
@@ -79,6 +79,25 @@ static bool baco_cmd_handler(struct pp_hwmgr *hwmgr, u32 command, u32 reg, u32 m
 	return ret;
 }
 
+bool baco_program_registers(struct pp_hwmgr *hwmgr,
+			    const struct baco_cmd_entry *entry,
+			    const u32 array_size)
+{
+	u32 i, reg = 0;
+
+	for (i = 0; i < array_size; i++) {
+		if ((entry[i].cmd == CMD_WRITE) ||
+		    (entry[i].cmd == CMD_READMODIFYWRITE) ||
+		    (entry[i].cmd == CMD_WAITFOR))
+			reg = entry[i].reg_offset;
+		if (!baco_cmd_handler(hwmgr, entry[i].cmd, reg, entry[i].mask,
+				     entry[i].shift, entry[i].val, entry[i].timeout))
+			return false;
+	}
+
+	return true;
+}
+
 bool soc15_baco_program_registers(struct pp_hwmgr *hwmgr,
 				 const struct soc15_baco_cmd_entry *entry,
 				 const u32 array_size)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h
index 95296c916f4e..8393eb62706d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h
@@ -33,6 +33,15 @@ enum baco_cmd_type {
 	CMD_DELAY_US,
 };
 
+struct baco_cmd_entry {
+	enum baco_cmd_type cmd;
+	uint32_t 	reg_offset;
+	uint32_t     	mask;
+	uint32_t     	shift;
+	uint32_t     	timeout;
+	uint32_t     	val;
+};
+
 struct soc15_baco_cmd_entry {
 	enum baco_cmd_type cmd;
 	uint32_t 	hwip;
@@ -44,6 +53,10 @@ struct soc15_baco_cmd_entry {
 	uint32_t     	timeout;
 	uint32_t     	val;
 };
+
+extern bool baco_program_registers(struct pp_hwmgr *hwmgr,
+				   const struct baco_cmd_entry *entry,
+				   const u32 array_size);
 extern bool soc15_baco_program_registers(struct pp_hwmgr *hwmgr,
 					const struct soc15_baco_cmd_entry *entry,
 					const u32 array_size);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_baco.c
new file mode 100644
index 000000000000..c0368f2dfb21
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_baco.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "fiji_baco.h"
+
+#include "gmc/gmc_8_1_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+
+#include "dce/dce_10_0_d.h"
+#include "dce/dce_10_0_sh_mask.h"
+
+#include "smu/smu_7_1_3_d.h"
+#include "smu/smu_7_1_3_sh_mask.h"
+
+
+static const struct baco_cmd_entry gpio_tbl[] =
+{
+	{ CMD_WRITE, mmGPIOPAD_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_PD_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_PU_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_MASK, 0, 0, 0, 0xff77ffff },
+	{ CMD_WRITE, mmDC_GPIO_DVODATA_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmDC_GPIO_DVODATA_MASK, 0, 0, 0, 0xffffffff },
+	{ CMD_WRITE, mmDC_GPIO_GENERIC_EN, 0, 0, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmDC_GPIO_GENERIC_MASK, 0, 0, 0, 0x03333333 },
+	{ CMD_WRITE, mmDC_GPIO_SYNCA_EN, 0, 0, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmDC_GPIO_SYNCA_MASK, 0, 0, 0, 0x00001111 }
+};
+
+static const struct baco_cmd_entry enable_fb_req_rej_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0300024 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x1, 0x0, 0, 0x1 },
+	{ CMD_WRITE, mmBIF_FB_EN, 0, 0, 0, 0x0 }
+};
+
+static const struct baco_cmd_entry use_bclk_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK, CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_STATUS },
+	{ CMD_WAITFOR, mmGCK_SMC_IND_DATA, CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK, 0, 0xffffffff, 0x2 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG__SHIFT, 0,  0x0 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG__SHIFT, 0,  0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_STATUS },
+	{ CMD_WAITFOR, mmGCK_SMC_IND_DATA, CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK, 0, 0xffffffff, 0x2 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0500170 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x4000000, 0x1a, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMPLL_BYPASSCLK_SEL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT, 0, 0x2 }
+};
+
+static const struct baco_cmd_entry turn_off_plls_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK, CG_SPLL_FUNC_CNTL__SPLL_RESET__SHIFT, 0,     0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL__SPLL_PWRON_MASK, CG_SPLL_FUNC_CNTL__SPLL_PWRON__SHIFT, 0,     0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0500170 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x2000000, 0x19, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x8000000, 0x1b, 0, 0x0 }
+};
+
+static const struct baco_cmd_entry clk_req_b_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_CLKPIN_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK, CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMPLL_BYPASSCLK_SEL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT, 0, 0x4 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMISC_CLK_CTRL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL_MASK, MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MISC_CLK_CTRL__ZCLK_SEL_MASK, MISC_CLK_CTRL__ZCLK_SEL__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_CLKPIN_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK, CG_CLKPIN_CNTL__BCLK_AS_XCLK__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixTHM_CLK_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, THM_CLK_CNTL__CMON_CLK_SEL_MASK, THM_CLK_CNTL__CMON_CLK_SEL__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, THM_CLK_CNTL__TMON_CLK_SEL_MASK, THM_CLK_CNTL__TMON_CLK_SEL__SHIFT, 0, 0x1 }
+};
+
+static const struct baco_cmd_entry enter_baco_tbl[] =
+{
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BIF_SCLK_SWITCH_MASK, BACO_CNTL__BACO_BIF_SCLK_SWITCH__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_BIF_SCLK_SWITCH_MASK, 0, 5, 0x40000 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, BACO_CNTL__BACO_BCLK_OFF__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, 0, 5, 0x02 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, BACO_CNTL__BACO_ISO_DIS__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, 0, 5, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, BACO_CNTL__BACO_ANA_ISO_DIS__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, 0, 5, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, 0, 5, 0x08 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0x40 }
+};
+
+#define BACO_CNTL__PWRGOOD_MASK  BACO_CNTL__PWRGOOD_GPIO_MASK+BACO_CNTL__PWRGOOD_MEM_MASK+BACO_CNTL__PWRGOOD_DVO_MASK
+
+static const struct baco_cmd_entry exit_baco_tbl[] =
+{
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_RESET_EN_MASK, BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, BACO_CNTL__BACO_BCLK_OFF__SHIFT, 0, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_BF_MASK, 0, 0xffffffff, 0x200 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, BACO_CNTL__BACO_ISO_DIS__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_MASK, 0, 5, 0x1c00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, BACO_CNTL__BACO_ANA_ISO_DIS__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BIF_SCLK_SWITCH_MASK, BACO_CNTL__BACO_BIF_SCLK_SWITCH__SHIFT, 0, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_RESET_EN_MASK, BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__RCU_BIF_CONFIG_DONE_MASK, 0, 5, 0x100 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0x00 }
+};
+
+static const struct baco_cmd_entry clean_baco_tbl[] =
+{
+	{ CMD_WRITE, mmBIOS_SCRATCH_0, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_1, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_2, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_3, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_4, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_5, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_6, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_7, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_8, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_9, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_10, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_11, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_12, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_13, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_14, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_15, 0, 0, 0, 0 }
+};
+
+int fiji_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
+{
+	enum BACO_STATE cur_state;
+
+	smu7_baco_get_state(hwmgr, &cur_state);
+
+	if (cur_state == state)
+		/* aisc already in the target state */
+		return 0;
+
+	if (state == BACO_STATE_IN) {
+		baco_program_registers(hwmgr, gpio_tbl, ARRAY_SIZE(gpio_tbl));
+		baco_program_registers(hwmgr, enable_fb_req_rej_tbl,
+				       ARRAY_SIZE(enable_fb_req_rej_tbl));
+		baco_program_registers(hwmgr, use_bclk_tbl, ARRAY_SIZE(use_bclk_tbl));
+		baco_program_registers(hwmgr, turn_off_plls_tbl,
+				       ARRAY_SIZE(turn_off_plls_tbl));
+		baco_program_registers(hwmgr, clk_req_b_tbl, ARRAY_SIZE(clk_req_b_tbl));
+		if (baco_program_registers(hwmgr, enter_baco_tbl,
+					   ARRAY_SIZE(enter_baco_tbl)))
+			return 0;
+
+	} else if (state == BACO_STATE_OUT) {
+		/* HW requires at least 20ms between regulator off and on */
+		msleep(20);
+		/* Execute Hardware BACO exit sequence */
+		if (baco_program_registers(hwmgr, exit_baco_tbl,
+					   ARRAY_SIZE(exit_baco_tbl))) {
+			if (baco_program_registers(hwmgr, clean_baco_tbl,
+						   ARRAY_SIZE(clean_baco_tbl)))
+				return 0;
+		}
+	}
+
+	return -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_baco.h
new file mode 100644
index 000000000000..47f402900bdb
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_baco.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __FIJI_BACO_H__
+#define __FIJI_BACO_H__
+#include "smu7_baco.h"
+
+extern int fiji_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
+
+#endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index cc57fb953e62..9454ab50f9a1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -81,8 +81,8 @@ int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr)
 	adev = hwmgr->adev;
 
 	/* Skip for suspend/resume case */
-	if (smum_is_dpm_running(hwmgr) && !amdgpu_passthrough(adev)
-		&& adev->in_suspend) {
+	if (!hwmgr->pp_one_vf && smum_is_dpm_running(hwmgr)
+	    && !amdgpu_passthrough(adev) && adev->in_suspend) {
 		pr_info("dpm has been enabled\n");
 		return 0;
 	}
@@ -99,6 +99,9 @@ int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr)
 
 	PHM_FUNC_CHECK(hwmgr);
 
+	if (!hwmgr->not_vf)
+		return 0;
+
 	if (!smum_is_dpm_running(hwmgr)) {
 		pr_info("dpm has been disabled\n");
 		return 0;
@@ -200,6 +203,9 @@ int phm_stop_thermal_controller(struct pp_hwmgr *hwmgr)
 {
 	PHM_FUNC_CHECK(hwmgr);
 
+	if (!hwmgr->not_vf)
+		return 0;
+
 	if (hwmgr->hwmgr_func->stop_thermal_controller == NULL)
 		return -EINVAL;
 
@@ -237,6 +243,9 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
 		TEMP_RANGE_MAX};
 	struct amdgpu_device *adev = hwmgr->adev;
 
+	if (!hwmgr->not_vf)
+		return 0;
+
 	if (hwmgr->hwmgr_func->get_thermal_temperature_range)
 		hwmgr->hwmgr_func->get_thermal_temperature_range(
 				hwmgr, &range);
@@ -263,6 +272,8 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
 bool phm_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmgr)
 {
 	PHM_FUNC_CHECK(hwmgr);
+	if (hwmgr->pp_one_vf)
+		return false;
 
 	if (hwmgr->hwmgr_func->check_smc_update_required_for_display_configuration == NULL)
 		return false;
@@ -482,6 +493,9 @@ int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr)
 {
 	PHM_FUNC_CHECK(hwmgr);
 
+	if (!hwmgr->not_vf)
+		return 0;
+
 	if (hwmgr->hwmgr_func->disable_smc_firmware_ctf == NULL)
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index a24beaa4fb01..f48fdc7f0382 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -81,6 +81,8 @@ static void hwmgr_init_workload_prority(struct pp_hwmgr *hwmgr)
 
 int hwmgr_early_init(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev;
+
 	if (!hwmgr)
 		return -EINVAL;
 
@@ -94,8 +96,11 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
 	hwmgr_init_workload_prority(hwmgr);
 	hwmgr->gfxoff_state_changed_by_workload = false;
 
+	adev = hwmgr->adev;
+
 	switch (hwmgr->chip_family) {
 	case AMDGPU_FAMILY_CI:
+		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 		hwmgr->smumgr_funcs = &ci_smu_funcs;
 		ci_set_asic_special_caps(hwmgr);
 		hwmgr->feature_mask &= ~(PP_VBI_TIME_SUPPORT_MASK |
@@ -106,12 +111,14 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
 		smu7_init_function_pointers(hwmgr);
 		break;
 	case AMDGPU_FAMILY_CZ:
+		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 		hwmgr->od_enabled = false;
 		hwmgr->smumgr_funcs = &smu8_smu_funcs;
 		hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
 		smu8_init_function_pointers(hwmgr);
 		break;
 	case AMDGPU_FAMILY_VI:
+		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 		hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
 		switch (hwmgr->chip_id) {
 		case CHIP_TOPAZ:
@@ -153,6 +160,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
 	case AMDGPU_FAMILY_AI:
 		switch (hwmgr->chip_id) {
 		case CHIP_VEGA10:
+			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 			hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
 			hwmgr->smumgr_funcs = &vega10_smu_funcs;
 			vega10_hwmgr_init(hwmgr);
@@ -162,6 +170,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
 			vega12_hwmgr_init(hwmgr);
 			break;
 		case CHIP_VEGA20:
+			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 			hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
 			hwmgr->smumgr_funcs = &vega20_smu_funcs;
 			vega20_hwmgr_init(hwmgr);
@@ -212,6 +221,9 @@ int hwmgr_hw_init(struct pp_hwmgr *hwmgr)
 {
 	int ret = 0;
 
+	hwmgr->pp_one_vf = amdgpu_sriov_is_pp_one_vf((struct amdgpu_device *)hwmgr->adev);
+	hwmgr->pm_en = (amdgpu_dpm && (hwmgr->not_vf || hwmgr->pp_one_vf))
+			? true : false;
 	if (!hwmgr->pm_en)
 		return 0;
 
@@ -270,7 +282,7 @@ err:
 
 int hwmgr_hw_fini(struct pp_hwmgr *hwmgr)
 {
-	if (!hwmgr || !hwmgr->pm_en)
+	if (!hwmgr || !hwmgr->pm_en || !hwmgr->not_vf)
 		return 0;
 
 	phm_stop_thermal_controller(hwmgr);
@@ -290,7 +302,7 @@ int hwmgr_suspend(struct pp_hwmgr *hwmgr)
 {
 	int ret = 0;
 
-	if (!hwmgr || !hwmgr->pm_en)
+	if (!hwmgr || !hwmgr->pm_en || !hwmgr->not_vf)
 		return 0;
 
 	phm_disable_smc_firmware_ctf(hwmgr);
@@ -312,7 +324,7 @@ int hwmgr_resume(struct pp_hwmgr *hwmgr)
 	if (!hwmgr)
 		return -EINVAL;
 
-	if (!hwmgr->pm_en)
+	if (!hwmgr->not_vf || !hwmgr->pm_en)
 		return 0;
 
 	ret = phm_setup_asic(hwmgr);
@@ -356,6 +368,8 @@ int hwmgr_handle_task(struct pp_hwmgr *hwmgr, enum amd_pp_task task_id,
 
 	switch (task_id) {
 	case AMD_PP_TASK_DISPLAY_CONFIG_CHANGE:
+		if (!hwmgr->not_vf)
+			return ret;
 		ret = phm_pre_display_configuration_changed(hwmgr);
 		if (ret)
 			return ret;
@@ -372,6 +386,8 @@ int hwmgr_handle_task(struct pp_hwmgr *hwmgr, enum amd_pp_task task_id,
 		enum PP_StateUILabel requested_ui_label;
 		struct pp_power_state *requested_ps = NULL;
 
+		if (!hwmgr->not_vf)
+			return ret;
 		if (user_state == NULL) {
 			ret = -EINVAL;
 			break;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris_baco.c
new file mode 100644
index 000000000000..8f8e296f2fe9
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris_baco.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "polaris_baco.h"
+
+#include "gmc/gmc_8_1_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+
+#include "dce/dce_11_0_d.h"
+#include "dce/dce_11_0_sh_mask.h"
+
+#include "smu/smu_7_1_3_d.h"
+#include "smu/smu_7_1_3_sh_mask.h"
+
+static const struct baco_cmd_entry gpio_tbl[] =
+{
+	{ CMD_WRITE, mmGPIOPAD_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_PD_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_PU_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_MASK, 0, 0, 0, 0xff77ffff },
+	{ CMD_WRITE, mmDC_GPIO_DVODATA_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmDC_GPIO_DVODATA_MASK, 0, 0, 0, 0xffffffff },
+	{ CMD_WRITE, mmDC_GPIO_GENERIC_EN, 0, 0, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmDC_GPIO_GENERIC_MASK, 0, 0, 0, 0x03333333 },
+	{ CMD_WRITE, mmDC_GPIO_SYNCA_EN, 0, 0, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmDC_GPIO_SYNCA_MASK, 0, 0, 0, 0x00001111 }
+};
+
+static const struct baco_cmd_entry enable_fb_req_rej_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0300024 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x1, 0x0, 0, 0x1 },
+	{ CMD_WRITE, mmBIF_FB_EN, 0, 0, 0, 0x0 }
+};
+
+static const struct baco_cmd_entry use_bclk_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK, CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0500170 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x4000000, 0x1a, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixGCK_DFS_BYPASS_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, GCK_DFS_BYPASS_CNTL__BYPASSACLK_MASK, GCK_DFS_BYPASS_CNTL__BYPASSACLK__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMPLL_BYPASSCLK_SEL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT, 0, 0x2 },
+	{ CMD_READMODIFYWRITE, mmMPLL_CNTL_MODE, MPLL_CNTL_MODE__MPLL_SW_DIR_CONTROL_MASK, MPLL_CNTL_MODE__MPLL_SW_DIR_CONTROL__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmMPLL_CNTL_MODE, MPLL_CNTL_MODE__MPLL_MCLK_SEL_MASK, MPLL_CNTL_MODE__MPLL_MCLK_SEL__SHIFT, 0, 0x0 }
+};
+
+static const struct baco_cmd_entry turn_off_plls_tbl[] =
+{
+	{ CMD_READMODIFYWRITE, mmDC_GPIO_PAD_STRENGTH_1, DC_GPIO_PAD_STRENGTH_1__GENLK_STRENGTH_SP_MASK, DC_GPIO_PAD_STRENGTH_1__GENLK_STRENGTH_SP__SHIFT, 0, 0x1 },
+	{ CMD_DELAY_US, 0, 0, 0, 1, 0x0 },
+	{ CMD_READMODIFYWRITE, mmMC_SEQ_DRAM, MC_SEQ_DRAM__RST_CTL_MASK, MC_SEQ_DRAM__RST_CTL__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC05002B0 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x10, 0x4, 0, 0x1 },
+	{ CMD_WAITFOR, mmGCK_SMC_IND_DATA, 0x10, 0, 1, 0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC050032C },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x10, 0x4, 0, 0x1 },
+	{ CMD_WAITFOR, mmGCK_SMC_IND_DATA, 0x10, 0, 1, 0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0500080 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x1, 0x0, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, 0xda2, 0x40, 0x6, 0, 0x0 },
+	{ CMD_DELAY_US, 0, 0, 0, 3, 0x0 },
+	{ CMD_READMODIFYWRITE, 0xda2, 0x8, 0x3, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, 0xda2, 0x3fff00, 0x8, 0, 0x32 },
+	{ CMD_DELAY_US, 0, 0, 0, 3, 0x0 },
+	{ CMD_READMODIFYWRITE, mmMPLL_FUNC_CNTL_2, MPLL_FUNC_CNTL_2__ISO_DIS_P_MASK, MPLL_FUNC_CNTL_2__ISO_DIS_P__SHIFT, 0, 0x0 },
+	{ CMD_DELAY_US, 0, 0, 0, 5, 0x0 }
+};
+
+static const struct baco_cmd_entry clk_req_b_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixTHM_CLK_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, THM_CLK_CNTL__CMON_CLK_SEL_MASK, THM_CLK_CNTL__CMON_CLK_SEL__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, THM_CLK_CNTL__TMON_CLK_SEL_MASK, THM_CLK_CNTL__TMON_CLK_SEL__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMISC_CLK_CTRL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL_MASK, MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MISC_CLK_CTRL__ZCLK_SEL_MASK, MISC_CLK_CTRL__ZCLK_SEL__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_CLKPIN_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK, CG_CLKPIN_CNTL__BCLK_AS_XCLK__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_CLKPIN_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK, CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMPLL_BYPASSCLK_SEL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT, 0,     0x4 }
+};
+
+static const struct baco_cmd_entry enter_baco_tbl[] =
+{
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BIF_SCLK_SWITCH_MASK, BACO_CNTL__BACO_BIF_SCLK_SWITCH__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_BIF_SCLK_SWITCH_MASK, 0, 5, 0x40000 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, BACO_CNTL__BACO_BCLK_OFF__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, 0, 5, 0x02 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, BACO_CNTL__BACO_ISO_DIS__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, 0, 5, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, BACO_CNTL__BACO_ANA_ISO_DIS__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, 0, 5, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, 0, 5, 0x08 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0x40 }
+};
+
+#define BACO_CNTL__PWRGOOD_MASK  BACO_CNTL__PWRGOOD_GPIO_MASK+BACO_CNTL__PWRGOOD_MEM_MASK+BACO_CNTL__PWRGOOD_DVO_MASK
+
+static const struct baco_cmd_entry exit_baco_tbl[] =
+{
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_RESET_EN_MASK, BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, BACO_CNTL__BACO_BCLK_OFF__SHIFT, 0, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_BF_MASK, 0, 0xffffffff, 0x200 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, BACO_CNTL__BACO_ISO_DIS__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_MASK, 0, 5, 0x1c00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, BACO_CNTL__BACO_ANA_ISO_DIS__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BIF_SCLK_SWITCH_MASK, BACO_CNTL__BACO_BIF_SCLK_SWITCH__SHIFT, 0, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_RESET_EN_MASK, BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__RCU_BIF_CONFIG_DONE_MASK, 0, 5, 0x100 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0x00 }
+};
+
+static const struct baco_cmd_entry clean_baco_tbl[] =
+{
+	{ CMD_WRITE, mmBIOS_SCRATCH_6, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_7, 0, 0, 0, 0 }
+};
+
+static const struct baco_cmd_entry use_bclk_tbl_vg[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK, CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0500170 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x4000000, 0x1a, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixGCK_DFS_BYPASS_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, GCK_DFS_BYPASS_CNTL__BYPASSACLK_MASK, GCK_DFS_BYPASS_CNTL__BYPASSACLK__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMPLL_BYPASSCLK_SEL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT, 0, 0x2 }
+};
+
+static const struct baco_cmd_entry turn_off_plls_tbl_vg[] =
+{
+	{ CMD_READMODIFYWRITE, mmDC_GPIO_PAD_STRENGTH_1, DC_GPIO_PAD_STRENGTH_1__GENLK_STRENGTH_SP_MASK, DC_GPIO_PAD_STRENGTH_1__GENLK_STRENGTH_SP__SHIFT, 0, 0x1 },
+	{ CMD_DELAY_US, 0, 0, 0, 1, 0x0 },
+	{ CMD_READMODIFYWRITE, mmMC_SEQ_DRAM, MC_SEQ_DRAM__RST_CTL_MASK, MC_SEQ_DRAM__RST_CTL__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC05002B0 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x10, 0x4, 0, 0x1 },
+	{ CMD_WAITFOR, mmGCK_SMC_IND_DATA, 0x10, 0, 1, 0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC050032C },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x10, 0x4, 0, 0x1 },
+	{ CMD_WAITFOR, mmGCK_SMC_IND_DATA, 0x10, 0, 1, 0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0500080 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x1, 0x0, 0, 0x1 },
+	{ CMD_DELAY_US, 0, 0, 0, 3, 0x0 },
+	{ CMD_DELAY_US, 0, 0, 0, 3, 0x0 },
+	{ CMD_DELAY_US, 0, 0, 0, 5, 0x0 }
+};
+
+int polaris_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
+{
+	enum BACO_STATE cur_state;
+
+	smu7_baco_get_state(hwmgr, &cur_state);
+
+	if (cur_state == state)
+		/* aisc already in the target state */
+		return 0;
+
+	if (state == BACO_STATE_IN) {
+		baco_program_registers(hwmgr, gpio_tbl, ARRAY_SIZE(gpio_tbl));
+		baco_program_registers(hwmgr, enable_fb_req_rej_tbl,
+				       ARRAY_SIZE(enable_fb_req_rej_tbl));
+		if (hwmgr->chip_id == CHIP_VEGAM) {
+			baco_program_registers(hwmgr, use_bclk_tbl_vg, ARRAY_SIZE(use_bclk_tbl_vg));
+			baco_program_registers(hwmgr, turn_off_plls_tbl_vg,
+					       ARRAY_SIZE(turn_off_plls_tbl_vg));
+		} else {
+			baco_program_registers(hwmgr, use_bclk_tbl, ARRAY_SIZE(use_bclk_tbl));
+			baco_program_registers(hwmgr, turn_off_plls_tbl,
+					       ARRAY_SIZE(turn_off_plls_tbl));
+		}
+		baco_program_registers(hwmgr, clk_req_b_tbl, ARRAY_SIZE(clk_req_b_tbl));
+		if (baco_program_registers(hwmgr, enter_baco_tbl,
+					   ARRAY_SIZE(enter_baco_tbl)))
+			return 0;
+
+	} else if (state == BACO_STATE_OUT) {
+		/* HW requires at least 20ms between regulator off and on */
+		msleep(20);
+		/* Execute Hardware BACO exit sequence */
+		if (baco_program_registers(hwmgr, exit_baco_tbl,
+					   ARRAY_SIZE(exit_baco_tbl))) {
+			if (baco_program_registers(hwmgr, clean_baco_tbl,
+						   ARRAY_SIZE(clean_baco_tbl)))
+				return 0;
+		}
+	}
+
+	return -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris_baco.h
new file mode 100644
index 000000000000..87a5fa0a157a
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris_baco.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __POLARIS_BACO_H__
+#define __POLARIS_BACO_H__
+#include "smu7_baco.h"
+
+extern int polaris_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
+
+#endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
index 6bf48934fdc4..31a32a79cfc2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
@@ -262,20 +262,22 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip_display_set
 	uint32_t index;
 	long workload;
 
-	if (!skip_display_settings)
-		phm_display_configuration_changed(hwmgr);
-
-	if (hwmgr->ps)
-		power_state_management(hwmgr, new_ps);
-	else
-		/*
-		 * for vega12/vega20 which does not support power state manager
-		 * DAL clock limits should also be honoured
-		 */
-		phm_apply_clock_adjust_rules(hwmgr);
-
-	if (!skip_display_settings)
-		phm_notify_smc_display_config_after_ps_adjustment(hwmgr);
+	if (hwmgr->not_vf) {
+		if (!skip_display_settings)
+			phm_display_configuration_changed(hwmgr);
+
+		if (hwmgr->ps)
+			power_state_management(hwmgr, new_ps);
+		else
+			/*
+			 * for vega12/vega20 which does not support power state manager
+			 * DAL clock limits should also be honoured
+			 */
+			phm_apply_clock_adjust_rules(hwmgr);
+
+		if (!skip_display_settings)
+			phm_notify_smc_display_config_after_ps_adjustment(hwmgr);
+	}
 
 	if (!phm_force_dpm_levels(hwmgr, hwmgr->request_dpm_level))
 		hwmgr->dpm_level = hwmgr->request_dpm_level;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index 1115761982a7..4e8ab139bb3b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -1151,12 +1151,11 @@ static int smu10_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr,
 	struct smu10_hwmgr *data = hwmgr->backend;
 	struct dm_pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges = clock_ranges;
 	Watermarks_t *table = &(data->water_marks_table);
-	int result = 0;
 
 	smu_set_watermarks_for_clocks_ranges(table,wm_with_clock_ranges);
 	smum_smc_table_manager(hwmgr, (uint8_t *)table, (uint16_t)SMU10_WMTABLE, false);
 	data->water_marks_exist = true;
-	return result;
+	return 0;
 }
 
 static int smu10_smus_notify_pwe(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_baco.c
new file mode 100644
index 000000000000..044cda005aed
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_baco.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smu7_baco.h"
+#include "tonga_baco.h"
+#include "fiji_baco.h"
+#include "polaris_baco.h"
+#include "ci_baco.h"
+
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+
+#include "smu/smu_7_1_2_d.h"
+#include "smu/smu_7_1_2_sh_mask.h"
+
+int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+	uint32_t reg;
+
+	*cap = false;
+	if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO))
+		return 0;
+
+	reg = RREG32(mmCC_BIF_BX_FUSESTRAP0);
+
+	if (reg & CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK)
+		*cap = true;
+
+	return 0;
+}
+
+int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+	uint32_t reg;
+
+	reg = RREG32(mmBACO_CNTL);
+
+	if (reg & BACO_CNTL__BACO_MODE_MASK)
+		/* gfx has already entered BACO state */
+		*state = BACO_STATE_IN;
+	else
+		*state = BACO_STATE_OUT;
+	return 0;
+}
+
+int smu7_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+
+	switch (adev->asic_type) {
+	case CHIP_TOPAZ:
+	case CHIP_TONGA:
+		return tonga_baco_set_state(hwmgr, state);
+	case CHIP_FIJI:
+		return fiji_baco_set_state(hwmgr, state);
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
+		return polaris_baco_set_state(hwmgr, state);
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+		return ci_baco_set_state(hwmgr, state);
+#endif
+	default:
+		return -EINVAL;
+	}
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_baco.h
new file mode 100644
index 000000000000..be0d98abb536
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_baco.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMU7_BACO_H__
+#define __SMU7_BACO_H__
+#include "hwmgr.h"
+#include "common_baco.h"
+
+extern int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap);
+extern int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
+extern int smu7_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
+
+#endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 203ce4b1028f..d70abada66bf 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -48,6 +48,7 @@
 #include "smu7_clockpowergating.h"
 #include "processpptables.h"
 #include "pp_thermal.h"
+#include "smu7_baco.h"
 
 #include "ivsrcid/ivsrcid_vislands30.h"
 
@@ -1994,7 +1995,6 @@ static int smu7_sort_lookup_table(struct pp_hwmgr *hwmgr,
 		struct phm_ppt_v1_voltage_lookup_table *lookup_table)
 {
 	uint32_t table_size, i, j;
-	struct phm_ppt_v1_voltage_lookup_record tmp_voltage_lookup_record;
 	table_size = lookup_table->count;
 
 	PP_ASSERT_WITH_CODE(0 != lookup_table->count,
@@ -2005,9 +2005,8 @@ static int smu7_sort_lookup_table(struct pp_hwmgr *hwmgr,
 		for (j = i + 1; j > 0; j--) {
 			if (lookup_table->entries[j].us_vdd <
 					lookup_table->entries[j - 1].us_vdd) {
-				tmp_voltage_lookup_record = lookup_table->entries[j - 1];
-				lookup_table->entries[j - 1] = lookup_table->entries[j];
-				lookup_table->entries[j] = tmp_voltage_lookup_record;
+				swap(lookup_table->entries[j - 1],
+				     lookup_table->entries[j]);
 			}
 		}
 	}
@@ -3983,6 +3982,13 @@ static int smu7_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input)
 			"Failed to populate and upload SCLK MCLK DPM levels!",
 			result = tmp_result);
 
+	/*
+	 * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag.
+	 * That effectively disables AVFS feature.
+	 */
+	if (hwmgr->hardcode_pp_table != NULL)
+		data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
+
 	tmp_result = smu7_update_avfs(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to update avfs voltages!",
@@ -4232,7 +4238,6 @@ static int smu7_check_mc_firmware(struct pp_hwmgr *hwmgr)
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 
-	uint32_t vbios_version;
 	uint32_t tmp;
 
 	/* Read MC indirect register offset 0x9F bits [3:0] to see
@@ -4241,7 +4246,6 @@ static int smu7_check_mc_firmware(struct pp_hwmgr *hwmgr)
 	 */
 
 	smu7_get_mc_microcode_version(hwmgr);
-	vbios_version = hwmgr->microcode_version_info.MC & 0xf;
 
 	data->need_long_memory_training = false;
 
@@ -4939,7 +4943,7 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
 			title[0], title[1], title[2], title[3],
 			title[4], title[5], title[6], title[7]);
 
-	len = sizeof(smu7_profiling) / sizeof(struct profile_mode_setting);
+	len = ARRAY_SIZE(smu7_profiling);
 
 	for (i = 0; i < len; i++) {
 		if (i == hwmgr->power_profile_mode) {
@@ -5071,13 +5075,11 @@ static int smu7_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw
 				PHM_PerformanceLevel *level)
 {
 	const struct smu7_power_state *ps;
-	struct smu7_hwmgr *data;
 	uint32_t i;
 
 	if (level == NULL || hwmgr == NULL || state == NULL)
 		return -EINVAL;
 
-	data = hwmgr->backend;
 	ps = cast_const_phw_smu7_power_state(state);
 
 	i = index > ps->performance_level_count - 1 ?
@@ -5158,6 +5160,9 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
 	.get_power_profile_mode = smu7_get_power_profile_mode,
 	.set_power_profile_mode = smu7_set_power_profile_mode,
 	.get_performance_level = smu7_get_performance_level,
+	.get_asic_baco_capability = smu7_baco_get_capability,
+	.get_asic_baco_state = smu7_baco_get_state,
+	.set_asic_baco_state = smu7_baco_set_state,
 	.power_off_asic = smu7_power_off_asic,
 };
 
@@ -5180,13 +5185,11 @@ uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock,
 
 int smu7_init_function_pointers(struct pp_hwmgr *hwmgr)
 {
-	int ret = 0;
-
 	hwmgr->hwmgr_func = &smu7_hwmgr_funcs;
 	if (hwmgr->pp_table_version == PP_TABLE_V0)
 		hwmgr->pptable_func = &pptable_funcs;
 	else if (hwmgr->pp_table_version == PP_TABLE_V1)
 		hwmgr->pptable_func = &pptable_v1_0_funcs;
 
-	return ret;
+	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_baco.c
new file mode 100644
index 000000000000..ea743bea8e29
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_baco.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "tonga_baco.h"
+
+#include "gmc/gmc_8_1_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+
+#include "dce/dce_10_0_d.h"
+#include "dce/dce_10_0_sh_mask.h"
+
+#include "smu/smu_7_1_2_d.h"
+#include "smu/smu_7_1_2_sh_mask.h"
+
+
+static const struct baco_cmd_entry gpio_tbl[] =
+{
+	{ CMD_WRITE, mmGPIOPAD_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_PD_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_PU_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_MASK, 0, 0, 0, 0xff77ffff },
+	{ CMD_WRITE, mmDC_GPIO_DVODATA_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmDC_GPIO_DVODATA_MASK, 0, 0, 0, 0xffffffff },
+	{ CMD_WRITE, mmDC_GPIO_GENERIC_EN, 0, 0, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmDC_GPIO_GENERIC_MASK, 0, 0, 0, 0x03333333 },
+	{ CMD_WRITE, mmDC_GPIO_SYNCA_EN, 0, 0, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmDC_GPIO_SYNCA_MASK, 0, 0, 0, 0x00001111 }
+};
+
+static const struct baco_cmd_entry enable_fb_req_rej_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0300024 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x1, 0x0, 0, 0x1 },
+	{ CMD_WRITE, mmBIF_FB_EN, 0, 0, 0, 0x0 }
+};
+
+static const struct baco_cmd_entry use_bclk_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK, CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_STATUS },
+	{ CMD_WAITFOR, mmGCK_SMC_IND_DATA, CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK, 0, 0xffffffff, 0x2 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_BYPASS_CHG__SHIFT, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_STATUS },
+	{ CMD_WAITFOR, mmGCK_SMC_IND_DATA, CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK, 0, 0xffffffff, 0x2 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK, CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0500170 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x4000000, 0x1a, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMPLL_BYPASSCLK_SEL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT, 0, 0x2 },
+	{ CMD_READMODIFYWRITE, mmMPLL_CNTL_MODE, MPLL_CNTL_MODE__MPLL_SW_DIR_CONTROL_MASK, MPLL_CNTL_MODE__MPLL_SW_DIR_CONTROL__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmMPLL_CNTL_MODE, MPLL_CNTL_MODE__MPLL_MCLK_SEL_MASK, MPLL_CNTL_MODE__MPLL_MCLK_SEL__SHIFT, 0, 0x0 }
+};
+
+static const struct baco_cmd_entry turn_off_plls_tbl[] =
+{
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_SPLL_FUNC_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK, CG_SPLL_FUNC_CNTL__SPLL_RESET__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_SPLL_FUNC_CNTL__SPLL_PWRON_MASK, CG_SPLL_FUNC_CNTL__SPLL_PWRON__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, 0xC0500170 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x2000000, 0x19, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, 0x8000000, 0x1b, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmMPLL_CNTL_MODE, MPLL_CNTL_MODE__GLOBAL_MPLL_RESET_MASK, MPLL_CNTL_MODE__GLOBAL_MPLL_RESET__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmMPLL_CONTROL, 0, 0, 0, 0x00000006 },
+	{ CMD_WRITE, mmMC_IO_RXCNTL_DPHY0_D0, 0, 0, 0, 0x00007740 },
+	{ CMD_WRITE, mmMC_IO_RXCNTL_DPHY0_D1, 0, 0, 0, 0x00007740 },
+	{ CMD_WRITE, mmMC_IO_RXCNTL_DPHY1_D0, 0, 0, 0, 0x00007740 },
+	{ CMD_WRITE, mmMC_IO_RXCNTL_DPHY1_D1, 0, 0, 0, 0x00007740 },
+	{ CMD_READMODIFYWRITE, mmMCLK_PWRMGT_CNTL, MCLK_PWRMGT_CNTL__MRDCK0_PDNB_MASK, MCLK_PWRMGT_CNTL__MRDCK0_PDNB__SHIFT, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmMCLK_PWRMGT_CNTL, MCLK_PWRMGT_CNTL__MRDCK1_PDNB_MASK, MCLK_PWRMGT_CNTL__MRDCK1_PDNB__SHIFT, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmMC_SEQ_CNTL_2, MC_SEQ_CNTL_2__DRST_PU_MASK, MC_SEQ_CNTL_2__DRST_PU__SHIFT, 0, 0x0 },
+	{ CMD_READMODIFYWRITE, mmMC_SEQ_CNTL_2, MC_SEQ_CNTL_2__DRST_PD_MASK, MC_SEQ_CNTL_2__DRST_PD__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_CLKPIN_CNTL_2 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK, CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMPLL_BYPASSCLK_SEL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK, MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT, 0, 0x4 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixMISC_CLK_CTRL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL_MASK, MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, MISC_CLK_CTRL__ZCLK_SEL_MASK, MISC_CLK_CTRL__ZCLK_SEL__SHIFT, 0, 0x1 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixCG_CLKPIN_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK, CG_CLKPIN_CNTL__BCLK_AS_XCLK__SHIFT, 0, 0x0 },
+	{ CMD_WRITE, mmGCK_SMC_IND_INDEX, 0, 0, 0, ixTHM_CLK_CNTL },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, THM_CLK_CNTL__CMON_CLK_SEL_MASK,  THM_CLK_CNTL__CMON_CLK_SEL__SHIFT, 0, 0x1 },
+	{ CMD_READMODIFYWRITE, mmGCK_SMC_IND_DATA, THM_CLK_CNTL__TMON_CLK_SEL_MASK,  THM_CLK_CNTL__TMON_CLK_SEL__SHIFT, 0, 0x1 }
+};
+
+static const struct baco_cmd_entry enter_baco_tbl[] =
+{
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BIF_SCLK_SWITCH_MASK, BACO_CNTL__BACO_BIF_SCLK_SWITCH__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_BIF_SCLK_SWITCH_MASK, 0, 5, 0x40000 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, BACO_CNTL__BACO_BCLK_OFF__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, 0, 5, 0x02 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, BACO_CNTL__BACO_ISO_DIS__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, 0, 5, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, BACO_CNTL__BACO_ANA_ISO_DIS__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, 0, 5, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, 0, 5, 0x08 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0x40 }
+};
+
+#define BACO_CNTL__PWRGOOD_MASK  BACO_CNTL__PWRGOOD_GPIO_MASK+BACO_CNTL__PWRGOOD_MEM_MASK+BACO_CNTL__PWRGOOD_DVO_MASK
+
+static const struct baco_cmd_entry exit_baco_tbl[] =
+{
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_RESET_EN_MASK, BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, BACO_CNTL__BACO_BCLK_OFF__SHIFT, 0, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_BF_MASK, 0, 0xffffffff, 0x200 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, BACO_CNTL__BACO_ISO_DIS__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_MASK, 0, 5, 0x1c00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, BACO_CNTL__BACO_ANA_ISO_DIS__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BIF_SCLK_SWITCH_MASK, BACO_CNTL__BACO_BIF_SCLK_SWITCH__SHIFT, 0, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_RESET_EN_MASK, BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__RCU_BIF_CONFIG_DONE_MASK, 0, 5, 0x100 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0x00 }
+};
+
+static const struct baco_cmd_entry clean_baco_tbl[] =
+{
+	{ CMD_WRITE, mmBIOS_SCRATCH_6, 0, 0, 0, 0 },
+	{ CMD_WRITE, mmBIOS_SCRATCH_7, 0, 0, 0, 0 }
+};
+
+static const struct baco_cmd_entry gpio_tbl_iceland[] =
+{
+	{ CMD_WRITE, mmGPIOPAD_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_PD_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_PU_EN, 0, 0, 0, 0x0 },
+	{ CMD_WRITE, mmGPIOPAD_MASK, 0, 0, 0, 0xff77ffff }
+};
+
+static const struct baco_cmd_entry exit_baco_tbl_iceland[] =
+{
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_RESET_EN_MASK, BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, BACO_CNTL__BACO_BCLK_OFF__SHIFT, 0, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0x00 },
+	{ CMD_DELAY_MS, 0, 0, 0, 20, 0 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_BF_MASK, 0, 0xffffffff, 0x200 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, BACO_CNTL__BACO_ISO_DIS__SHIFT, 0, 0x01 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_MASK, 0, 5, 0x1c00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, BACO_CNTL__BACO_ANA_ISO_DIS__SHIFT, 0, 0x01 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BIF_SCLK_SWITCH_MASK, BACO_CNTL__BACO_BIF_SCLK_SWITCH__SHIFT, 0, 0x00 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_RESET_EN_MASK, BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__RCU_BIF_CONFIG_DONE_MASK, 0, 5, 0x100 },
+	{ CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 0x00 },
+	{ CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0x00 }
+};
+
+static const struct baco_cmd_entry clean_baco_tbl_iceland[] =
+{
+	{ CMD_WRITE, mmBIOS_SCRATCH_7, 0, 0, 0, 0 }
+};
+
+int tonga_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
+{
+	enum BACO_STATE cur_state;
+
+	smu7_baco_get_state(hwmgr, &cur_state);
+
+	if (cur_state == state)
+		/* aisc already in the target state */
+		return 0;
+
+	if (state == BACO_STATE_IN) {
+		if (hwmgr->chip_id == CHIP_TOPAZ)
+			baco_program_registers(hwmgr, gpio_tbl_iceland, ARRAY_SIZE(gpio_tbl_iceland));
+		else
+			baco_program_registers(hwmgr, gpio_tbl, ARRAY_SIZE(gpio_tbl));
+		baco_program_registers(hwmgr, enable_fb_req_rej_tbl,
+				       ARRAY_SIZE(enable_fb_req_rej_tbl));
+		baco_program_registers(hwmgr, use_bclk_tbl, ARRAY_SIZE(use_bclk_tbl));
+		baco_program_registers(hwmgr, turn_off_plls_tbl,
+				       ARRAY_SIZE(turn_off_plls_tbl));
+		if (baco_program_registers(hwmgr, enter_baco_tbl,
+					   ARRAY_SIZE(enter_baco_tbl)))
+			return 0;
+
+	} else if (state == BACO_STATE_OUT) {
+		/* HW requires at least 20ms between regulator off and on */
+		msleep(20);
+		/* Execute Hardware BACO exit sequence */
+		if (hwmgr->chip_id == CHIP_TOPAZ) {
+			if (baco_program_registers(hwmgr, exit_baco_tbl_iceland,
+						   ARRAY_SIZE(exit_baco_tbl_iceland))) {
+				if (baco_program_registers(hwmgr, clean_baco_tbl_iceland,
+							   ARRAY_SIZE(clean_baco_tbl_iceland)))
+					return 0;
+			}
+		} else {
+			if (baco_program_registers(hwmgr, exit_baco_tbl,
+						   ARRAY_SIZE(exit_baco_tbl))) {
+				if (baco_program_registers(hwmgr, clean_baco_tbl,
+							   ARRAY_SIZE(clean_baco_tbl)))
+					return 0;
+			}
+		}
+	}
+
+	return -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_baco.h
new file mode 100644
index 000000000000..5dc16cc8a295
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_baco.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __TONGA_BACO_H__
+#define __TONGA_BACO_H__
+#include "smu7_baco.h"
+
+extern int tonga_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
+
+#endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index beacfffbdc3e..92a65e3daff4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -712,7 +712,6 @@ static int vega10_sort_lookup_table(struct pp_hwmgr *hwmgr,
 		struct phm_ppt_v1_voltage_lookup_table *lookup_table)
 {
 	uint32_t table_size, i, j;
-	struct phm_ppt_v1_voltage_lookup_record tmp_voltage_lookup_record;
 
 	PP_ASSERT_WITH_CODE(lookup_table && lookup_table->count,
 		"Lookup table is empty", return -EINVAL);
@@ -724,9 +723,8 @@ static int vega10_sort_lookup_table(struct pp_hwmgr *hwmgr,
 		for (j = i + 1; j > 0; j--) {
 			if (lookup_table->entries[j].us_vdd <
 					lookup_table->entries[j - 1].us_vdd) {
-				tmp_voltage_lookup_record = lookup_table->entries[j - 1];
-				lookup_table->entries[j - 1] = lookup_table->entries[j];
-				lookup_table->entries[j] = tmp_voltage_lookup_record;
+				swap(lookup_table->entries[j - 1],
+				     lookup_table->entries[j]);
 			}
 		}
 	}
@@ -914,6 +912,9 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	hwmgr->platform_descriptor.clockStep.memoryClock = 500;
 
 	data->total_active_cus = adev->gfx.cu_info.number;
+	if (!hwmgr->not_vf)
+		return result;
+
 	/* Setup default Overdrive Fan control settings */
 	data->odn_fan_table.target_fan_speed =
 			hwmgr->thermal_controller.advanceFanControlParameters.usMaxFanRPM;
@@ -981,6 +982,9 @@ static int vega10_setup_dpm_led_config(struct pp_hwmgr *hwmgr)
 
 static int vega10_setup_asic_task(struct pp_hwmgr *hwmgr)
 {
+	if (!hwmgr->not_vf)
+		return 0;
+
 	PP_ASSERT_WITH_CODE(!vega10_init_sclk_threshold(hwmgr),
 			"Failed to init sclk threshold!",
 			return -EINVAL);
@@ -2505,6 +2509,9 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
 			"Failed to setup default DPM tables!",
 			return result);
 
+	if (!hwmgr->not_vf)
+		return 0;
+
 	/* initialize ODN table */
 	if (hwmgr->od_enabled) {
 		if (odn_table->max_vddc) {
@@ -2828,6 +2835,8 @@ static int vega10_stop_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap)
 	struct vega10_hwmgr *data = hwmgr->backend;
 	uint32_t i, feature_mask = 0;
 
+	if (!hwmgr->not_vf)
+		return 0;
 
 	if(data->smu_features[GNLD_LED_DISPLAY].supported == true){
 		PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr,
@@ -2934,61 +2943,73 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	struct vega10_hwmgr *data = hwmgr->backend;
 	int tmp_result, result = 0;
 
-	vega10_enable_disable_PCC_limit_feature(hwmgr, true);
+	if (hwmgr->not_vf) {
+		vega10_enable_disable_PCC_limit_feature(hwmgr, true);
 
-	smum_send_msg_to_smc_with_parameter(hwmgr,
-		PPSMC_MSG_ConfigureTelemetry, data->config_telemetry);
-
-	tmp_result = vega10_construct_voltage_tables(hwmgr);
-	PP_ASSERT_WITH_CODE(!tmp_result,
-			"Failed to construct voltage tables!",
-			result = tmp_result);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_ConfigureTelemetry, data->config_telemetry);
 
-	tmp_result = vega10_init_smc_table(hwmgr);
-	PP_ASSERT_WITH_CODE(!tmp_result,
-			"Failed to initialize SMC table!",
-			result = tmp_result);
+		tmp_result = vega10_construct_voltage_tables(hwmgr);
+		PP_ASSERT_WITH_CODE(!tmp_result,
+				    "Failed to construct voltage tables!",
+				    result = tmp_result);
+	}
 
-	if (PP_CAP(PHM_PlatformCaps_ThermalController)) {
-		tmp_result = vega10_enable_thermal_protection(hwmgr);
+	if (hwmgr->not_vf || hwmgr->pp_one_vf) {
+		tmp_result = vega10_init_smc_table(hwmgr);
 		PP_ASSERT_WITH_CODE(!tmp_result,
-				"Failed to enable thermal protection!",
-				result = tmp_result);
+				    "Failed to initialize SMC table!",
+				    result = tmp_result);
 	}
 
-	tmp_result = vega10_enable_vrhot_feature(hwmgr);
-	PP_ASSERT_WITH_CODE(!tmp_result,
-			"Failed to enable VR hot feature!",
-			result = tmp_result);
+	if (hwmgr->not_vf) {
+		if (PP_CAP(PHM_PlatformCaps_ThermalController)) {
+			tmp_result = vega10_enable_thermal_protection(hwmgr);
+			PP_ASSERT_WITH_CODE(!tmp_result,
+					    "Failed to enable thermal protection!",
+					    result = tmp_result);
+		}
 
-	tmp_result = vega10_enable_deep_sleep_master_switch(hwmgr);
-	PP_ASSERT_WITH_CODE(!tmp_result,
-			"Failed to enable deep sleep master switch!",
-			result = tmp_result);
+		tmp_result = vega10_enable_vrhot_feature(hwmgr);
+		PP_ASSERT_WITH_CODE(!tmp_result,
+				    "Failed to enable VR hot feature!",
+				    result = tmp_result);
 
-	tmp_result = vega10_start_dpm(hwmgr, SMC_DPM_FEATURES);
-	PP_ASSERT_WITH_CODE(!tmp_result,
-			"Failed to start DPM!", result = tmp_result);
+		tmp_result = vega10_enable_deep_sleep_master_switch(hwmgr);
+		PP_ASSERT_WITH_CODE(!tmp_result,
+				    "Failed to enable deep sleep master switch!",
+				    result = tmp_result);
+	}
 
-	/* enable didt, do not abort if failed didt */
-	tmp_result = vega10_enable_didt_config(hwmgr);
-	PP_ASSERT(!tmp_result,
-			"Failed to enable didt config!");
+	if (hwmgr->not_vf) {
+		tmp_result = vega10_start_dpm(hwmgr, SMC_DPM_FEATURES);
+		PP_ASSERT_WITH_CODE(!tmp_result,
+				    "Failed to start DPM!", result = tmp_result);
+	}
+
+	if (hwmgr->not_vf) {
+		/* enable didt, do not abort if failed didt */
+		tmp_result = vega10_enable_didt_config(hwmgr);
+		PP_ASSERT(!tmp_result,
+			  "Failed to enable didt config!");
+	}
 
 	tmp_result = vega10_enable_power_containment(hwmgr);
 	PP_ASSERT_WITH_CODE(!tmp_result,
-			"Failed to enable power containment!",
-			result = tmp_result);
+			    "Failed to enable power containment!",
+			    result = tmp_result);
 
-	tmp_result = vega10_power_control_set_level(hwmgr);
-	PP_ASSERT_WITH_CODE(!tmp_result,
-			"Failed to power control set level!",
-			result = tmp_result);
+	if (hwmgr->not_vf) {
+		tmp_result = vega10_power_control_set_level(hwmgr);
+		PP_ASSERT_WITH_CODE(!tmp_result,
+				    "Failed to power control set level!",
+				    result = tmp_result);
 
-	tmp_result = vega10_enable_ulv(hwmgr);
-	PP_ASSERT_WITH_CODE(!tmp_result,
-			"Failed to enable ULV!",
-			result = tmp_result);
+		tmp_result = vega10_enable_ulv(hwmgr);
+		PP_ASSERT_WITH_CODE(!tmp_result,
+				    "Failed to enable ULV!",
+				    result = tmp_result);
+	}
 
 	return result;
 }
@@ -3082,11 +3103,22 @@ static int vega10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 	performance_level->soc_clock = socclk_dep_table->entries
 				[state_entry->ucSocClockIndexHigh].ulClk;
 	if (gfxclk_dep_table->ucRevId == 0) {
-		performance_level->gfx_clock = gfxclk_dep_table->entries
-			[state_entry->ucGfxClockIndexHigh].ulClk;
+		/* under vega10 pp one vf mode, the gfx clk dpm need be lower
+		 * to level-4 due to the limited 110w-power
+		 */
+		if (hwmgr->pp_one_vf && (state_entry->ucGfxClockIndexHigh > 0))
+			performance_level->gfx_clock =
+				gfxclk_dep_table->entries[4].ulClk;
+		else
+			performance_level->gfx_clock = gfxclk_dep_table->entries
+				[state_entry->ucGfxClockIndexHigh].ulClk;
 	} else if (gfxclk_dep_table->ucRevId == 1) {
 		patom_record_V2 = (ATOM_Vega10_GFXCLK_Dependency_Record_V2 *)gfxclk_dep_table->entries;
-		performance_level->gfx_clock = patom_record_V2[state_entry->ucGfxClockIndexHigh].ulClk;
+		if (hwmgr->pp_one_vf && (state_entry->ucGfxClockIndexHigh > 0))
+			performance_level->gfx_clock = patom_record_V2[4].ulClk;
+		else
+			performance_level->gfx_clock =
+				patom_record_V2[state_entry->ucGfxClockIndexHigh].ulClk;
 	}
 
 	performance_level->mem_clock = mclk_dep_table->entries
@@ -3497,6 +3529,7 @@ static int vega10_upload_dpm_bootup_level(struct pp_hwmgr *hwmgr)
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSoftMinGfxclkByIndex,
 				data->smc_state_table.gfx_boot_level);
+
 			data->dpm_table.gfx_table.dpm_state.soft_min_level =
 					data->smc_state_table.gfx_boot_level;
 		}
@@ -3505,7 +3538,8 @@ static int vega10_upload_dpm_bootup_level(struct pp_hwmgr *hwmgr)
 	if (!data->registry_data.mclk_dpm_key_disabled) {
 		if (data->smc_state_table.mem_boot_level !=
 				data->dpm_table.mem_table.dpm_state.soft_min_level) {
-			if (data->smc_state_table.mem_boot_level == NUM_UCLK_DPM_LEVELS - 1) {
+			if ((data->smc_state_table.mem_boot_level == NUM_UCLK_DPM_LEVELS - 1)
+			    && hwmgr->not_vf) {
 				socclk_idx = vega10_get_soc_index_for_max_uclk(hwmgr);
 				smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMinSocclkByIndex,
@@ -3520,6 +3554,9 @@ static int vega10_upload_dpm_bootup_level(struct pp_hwmgr *hwmgr)
 		}
 	}
 
+	if (!hwmgr->not_vf)
+		return 0;
+
 	if (!data->registry_data.socclk_dpm_key_disabled) {
 		if (data->smc_state_table.soc_boot_level !=
 				data->dpm_table.soc_table.dpm_state.soft_min_level) {
@@ -3562,6 +3599,9 @@ static int vega10_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 		}
 	}
 
+	if (!hwmgr->not_vf)
+		return 0;
+
 	if (!data->registry_data.socclk_dpm_key_disabled) {
 		if (data->smc_state_table.soc_max_level !=
 			data->dpm_table.soc_table.dpm_state.soft_max_level) {
@@ -3691,6 +3731,13 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr,
 	PP_ASSERT_WITH_CODE(!result,
 			"Failed to upload PPtable!", return result);
 
+	/*
+	 * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag.
+	 * That effectively disables AVFS feature.
+	 */
+	if(hwmgr->hardcode_pp_table != NULL)
+		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
+
 	vega10_update_avfs(hwmgr);
 
 	/*
@@ -4049,15 +4096,25 @@ static int vega10_get_profiling_clk_mask(struct pp_hwmgr *hwmgr, enum amd_dpm_fo
 	} else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) {
 		*mclk_mask = 0;
 	} else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) {
-		*sclk_mask = table_info->vdd_dep_on_sclk->count - 1;
+		/* under vega10  pp one vf mode, the gfx clk dpm need be lower
+		 * to level-4 due to the limited power
+		 */
+		if (hwmgr->pp_one_vf)
+			*sclk_mask = 4;
+		else
+			*sclk_mask = table_info->vdd_dep_on_sclk->count - 1;
 		*soc_mask = table_info->vdd_dep_on_socclk->count - 1;
 		*mclk_mask = table_info->vdd_dep_on_mclk->count - 1;
 	}
+
 	return 0;
 }
 
 static void vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode)
 {
+	if (!hwmgr->not_vf)
+		return;
+
 	switch (mode) {
 	case AMD_FAN_CTRL_NONE:
 		vega10_fan_ctrl_set_fan_speed_percent(hwmgr, 100);
@@ -4171,6 +4228,9 @@ static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 		break;
 	}
 
+	if (!hwmgr->not_vf)
+		return ret;
+
 	if (!ret) {
 		if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK && hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
 			vega10_set_fan_control_mode(hwmgr, AMD_FAN_CTRL_NONE);
@@ -4355,14 +4415,13 @@ static int vega10_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr,
 	struct vega10_hwmgr *data = hwmgr->backend;
 	struct dm_pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges = clock_range;
 	Watermarks_t *table = &(data->smc_state_table.water_marks_table);
-	int result = 0;
 
 	if (!data->registry_data.disable_water_mark) {
 		smu_set_watermarks_for_clocks_ranges(table, wm_with_clock_ranges);
 		data->water_marks_bitmap = WaterMarksExist;
 	}
 
-	return result;
+	return 0;
 }
 
 static int vega10_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf)
@@ -4475,7 +4534,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
 	struct vega10_pcie_table *pcie_table = &(data->dpm_table.pcie_table);
 	struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep = NULL;
 
-	int i, now, size = 0;
+	int i, now, size = 0, count = 0;
 
 	switch (type) {
 	case PP_SCLK:
@@ -4485,7 +4544,12 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentGfxclkIndex);
 		now = smum_get_argument(hwmgr);
 
-		for (i = 0; i < sclk_table->count; i++)
+		if (hwmgr->pp_one_vf &&
+		    (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK))
+			count = 5;
+		else
+			count = sclk_table->count;
+		for (i = 0; i < count; i++)
 			size += sprintf(buf + size, "%d: %uMhz %s\n",
 					i, sclk_table->dpm_levels[i].value / 100,
 					(i == now) ? "*" : "");
@@ -4696,6 +4760,9 @@ static int vega10_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
 {
 	int tmp_result, result = 0;
 
+	if (!hwmgr->not_vf)
+		return 0;
+
 	if (PP_CAP(PHM_PlatformCaps_ThermalController))
 		vega10_disable_thermal_protection(hwmgr);
 
@@ -5247,13 +5314,11 @@ static int vega10_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_
 				PHM_PerformanceLevel *level)
 {
 	const struct vega10_power_state *ps;
-	struct vega10_hwmgr *data;
 	uint32_t i;
 
 	if (level == NULL || hwmgr == NULL || state == NULL)
 		return -EINVAL;
 
-	data = hwmgr->backend;
 	ps = cast_const_phw_vega10_power_state(state);
 
 	i = index > ps->performance_level_count - 1 ?
@@ -5265,6 +5330,59 @@ static int vega10_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_
 	return 0;
 }
 
+static int vega10_disable_power_features_for_compute_performance(struct pp_hwmgr *hwmgr, bool disable)
+{
+	struct vega10_hwmgr *data = hwmgr->backend;
+	uint32_t feature_mask = 0;
+
+	if (disable) {
+		feature_mask |= data->smu_features[GNLD_ULV].enabled ?
+			data->smu_features[GNLD_ULV].smu_feature_bitmap : 0;
+		feature_mask |= data->smu_features[GNLD_DS_GFXCLK].enabled ?
+			data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0;
+		feature_mask |= data->smu_features[GNLD_DS_SOCCLK].enabled ?
+			data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0;
+		feature_mask |= data->smu_features[GNLD_DS_LCLK].enabled ?
+			data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0;
+		feature_mask |= data->smu_features[GNLD_DS_DCEFCLK].enabled ?
+			data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0;
+	} else {
+		feature_mask |= (!data->smu_features[GNLD_ULV].enabled) ?
+			data->smu_features[GNLD_ULV].smu_feature_bitmap : 0;
+		feature_mask |= (!data->smu_features[GNLD_DS_GFXCLK].enabled) ?
+			data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0;
+		feature_mask |= (!data->smu_features[GNLD_DS_SOCCLK].enabled) ?
+			data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0;
+		feature_mask |= (!data->smu_features[GNLD_DS_LCLK].enabled) ?
+			data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0;
+		feature_mask |= (!data->smu_features[GNLD_DS_DCEFCLK].enabled) ?
+			data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0;
+	}
+
+	if (feature_mask)
+		PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr,
+				!disable, feature_mask),
+				"enable/disable power features for compute performance Failed!",
+				return -EINVAL);
+
+	if (disable) {
+		data->smu_features[GNLD_ULV].enabled = false;
+		data->smu_features[GNLD_DS_GFXCLK].enabled = false;
+		data->smu_features[GNLD_DS_SOCCLK].enabled = false;
+		data->smu_features[GNLD_DS_LCLK].enabled = false;
+		data->smu_features[GNLD_DS_DCEFCLK].enabled = false;
+	} else {
+		data->smu_features[GNLD_ULV].enabled = true;
+		data->smu_features[GNLD_DS_GFXCLK].enabled = true;
+		data->smu_features[GNLD_DS_SOCCLK].enabled = true;
+		data->smu_features[GNLD_DS_LCLK].enabled = true;
+		data->smu_features[GNLD_DS_DCEFCLK].enabled = true;
+	}
+
+	return 0;
+
+}
+
 static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
 	.backend_init = vega10_hwmgr_backend_init,
 	.backend_fini = vega10_hwmgr_backend_fini,
@@ -5332,6 +5450,8 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
 	.get_ppfeature_status = vega10_get_ppfeature_status,
 	.set_ppfeature_status = vega10_set_ppfeature_status,
 	.set_mp1_state = vega10_set_mp1_state,
+	.disable_power_features_for_compute_performance =
+			vega10_disable_power_features_for_compute_performance,
 };
 
 int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
index 6f26cb241ecc..0a677d4bc87b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
@@ -1343,6 +1343,9 @@ int vega10_enable_power_containment(struct pp_hwmgr *hwmgr)
 	hwmgr->default_power_limit = hwmgr->power_limit =
 			(uint32_t)(tdp_table->usMaximumPowerDeliveryLimit);
 
+	if (!hwmgr->not_vf)
+		return 0;
+
 	if (PP_CAP(PHM_PlatformCaps_PowerContainment)) {
 		if (data->smu_features[GNLD_PPT].supported)
 			PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 7af9ad450ac4..aca61d1ff3c2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -499,8 +499,6 @@ static int vega12_get_number_of_dpm_level(struct pp_hwmgr *hwmgr,
 static int vega12_get_dpm_frequency_by_index(struct pp_hwmgr *hwmgr,
 		PPCLK_e clkID, uint32_t index, uint32_t *clock)
 {
-	int result = 0;
-
 	/*
 	 *SMU expects the Clock ID to be in the top 16 bits.
 	 *Lower 16 bits specify the level
@@ -512,7 +510,7 @@ static int vega12_get_dpm_frequency_by_index(struct pp_hwmgr *hwmgr,
 
 	*clock = smum_get_argument(hwmgr);
 
-	return result;
+	return 0;
 }
 
 static int vega12_setup_single_dpm_table(struct pp_hwmgr *hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
index df6ff9252401..9b5e72bdceca 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
@@ -29,7 +29,7 @@
 #include "vega20_baco.h"
 #include "vega20_smumgr.h"
 
-
+#include "amdgpu_ras.h"
 
 static const struct soc15_baco_cmd_entry clean_baco_tbl[] =
 {
@@ -74,6 +74,7 @@ int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
 int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 	enum BACO_STATE cur_state;
 	uint32_t data;
 
@@ -84,13 +85,19 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
 		return 0;
 
 	if (state == BACO_STATE_IN) {
-		data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
-		data |= 0x80000000;
-		WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
-
-
-		if(smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnterBaco, 0))
-			return -EINVAL;
+		if (!ras || !ras->supported) {
+			data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
+			data |= 0x80000000;
+			WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
+
+			if(smum_send_msg_to_smc_with_parameter(hwmgr,
+					PPSMC_MSG_EnterBaco, 0))
+				return -EINVAL;
+		} else {
+			if(smum_send_msg_to_smc_with_parameter(hwmgr,
+					PPSMC_MSG_EnterBaco, 1))
+				return -EINVAL;
+		}
 
 	} else if (state == BACO_STATE_OUT) {
 		if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ExitBaco))
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index f5915308e643..3b3ec5666051 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -183,6 +183,9 @@ static int vega20_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 			PHM_PlatformCaps_TablelessHardwareInterface);
 
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_BACO);
+
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_EnableSMU7ThermalManagement);
 
 	if (adev->pg_flags & AMD_PG_SUPPORT_UVD)
@@ -490,8 +493,8 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr)
 			"Failed to init sclk threshold!",
 			return ret);
 
-	if (adev->in_baco_reset) {
-		adev->in_baco_reset = 0;
+	if (adev->in_gpu_reset &&
+	    (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) {
 
 		ret = vega20_baco_apply_vdci_flush_workaround(hwmgr);
 		if (ret)
@@ -869,7 +872,7 @@ static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr)
 		"[OverridePcieParameters] Attempt to override pcie params failed!",
 		return ret);
 
-	data->pcie_parameters_override = 1;
+	data->pcie_parameters_override = true;
 	data->pcie_gen_level1 = pcie_gen;
 	data->pcie_width_level1 = pcie_width;
 
@@ -4155,6 +4158,38 @@ static int vega20_smu_i2c_bus_access(struct pp_hwmgr *hwmgr, bool acquire)
 	return res;
 }
 
+static int vega20_set_df_cstate(struct pp_hwmgr *hwmgr,
+				enum pp_df_cstate state)
+{
+	int ret;
+
+	/* PPSMC_MSG_DFCstateControl is supported with 40.50 and later fws */
+	if (hwmgr->smu_version < 0x283200) {
+		pr_err("Df cstate control is supported with 40.50 and later SMC fw!\n");
+		return -EINVAL;
+	}
+
+	ret = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_DFCstateControl, state);
+	if (ret)
+		pr_err("SetDfCstate failed!\n");
+
+	return ret;
+}
+
+static int vega20_set_xgmi_pstate(struct pp_hwmgr *hwmgr,
+				  uint32_t pstate)
+{
+	int ret;
+
+	ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+						  PPSMC_MSG_SetXgmiMode,
+						  pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3);
+	if (ret)
+		pr_err("SetXgmiPstate failed!\n");
+
+	return ret;
+}
+
 static const struct pp_hwmgr_func vega20_hwmgr_funcs = {
 	/* init/fini related */
 	.backend_init = vega20_hwmgr_backend_init,
@@ -4223,6 +4258,8 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = {
 	.set_asic_baco_state = vega20_baco_set_state,
 	.set_mp1_state = vega20_set_mp1_state,
 	.smu_i2c_bus_access = vega20_smu_i2c_bus_access,
+	.set_df_cstate = vega20_set_df_cstate,
+	.set_xgmi_pstate = vega20_set_xgmi_pstate,
 };
 
 int vega20_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index 23171a4d9a31..b0591a8dda41 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -254,15 +254,23 @@ struct smu_table_context
 	unsigned long			metrics_time;
 	void				*metrics_table;
 	void				*clocks_table;
+	void				*watermarks_table;
 
 	void				*max_sustainable_clocks;
 	struct smu_bios_boot_up_values	boot_values;
 	void                            *driver_pptable;
 	struct smu_table		*tables;
-	uint32_t			table_count;
+	/*
+	 * The driver table is just a staging buffer for
+	 * uploading/downloading content from the SMU.
+	 *
+	 * And the table_id for SMU_MSG_TransferTableSmu2Dram/
+	 * SMU_MSG_TransferTableDram2Smu instructs SMU
+	 * which content driver is interested.
+	 */
+	struct smu_table		driver_table;
 	struct smu_table		memory_pool;
 	uint8_t                         thermal_controller_type;
-	uint16_t			TDPODLimit;
 
 	void				*overdrive_table;
 };
@@ -284,6 +292,7 @@ struct smu_power_gate {
 	bool uvd_gated;
 	bool vce_gated;
 	bool vcn_gated;
+	bool jpeg_gated;
 };
 
 struct smu_power_context {
@@ -322,6 +331,13 @@ struct mclock_latency_table {
 	struct mclk_latency_entries  entries[MAX_REGULAR_DPM_NUM];
 };
 
+enum smu_reset_mode
+{
+    SMU_RESET_MODE_0,
+    SMU_RESET_MODE_1,
+    SMU_RESET_MODE_2,
+};
+
 enum smu_baco_state
 {
 	SMU_BACO_STATE_ENTER = 0,
@@ -341,10 +357,10 @@ struct smu_context
 	struct amdgpu_device            *adev;
 	struct amdgpu_irq_src		*irq_source;
 
-	const struct smu_funcs		*funcs;
 	const struct pptable_funcs	*ppt_funcs;
 	struct mutex			mutex;
 	struct mutex			sensor_lock;
+	struct mutex			metrics_lock;
 	uint64_t pool_size;
 
 	struct smu_table_context	smu_table;
@@ -382,11 +398,15 @@ struct smu_context
 	uint32_t power_profile_mode;
 	uint32_t default_power_profile_mode;
 	bool pm_enabled;
+	bool is_apu;
 
 	uint32_t smc_if_version;
 
+	bool uploading_custom_pp_table;
 };
 
+struct i2c_adapter;
+
 struct pptable_funcs {
 	int (*alloc_dpm_context)(struct smu_context *smu);
 	int (*store_powerplay_table)(struct smu_context *smu);
@@ -398,7 +418,7 @@ struct pptable_funcs {
 	int (*get_smu_table_index)(struct smu_context *smu, uint32_t index);
 	int (*get_smu_power_index)(struct smu_context *smu, uint32_t index);
 	int (*get_workload_type)(struct smu_context *smu, enum PP_SMC_POWER_PROFILE profile);
-	int (*run_afll_btc)(struct smu_context *smu);
+	int (*run_btc)(struct smu_context *smu);
 	int (*get_allowed_feature_mask)(struct smu_context *smu, uint32_t *feature_mask, uint32_t num);
 	enum amd_pm_state_type (*get_current_power_state)(struct smu_context *smu);
 	int (*set_default_dpm_table)(struct smu_context *smu);
@@ -428,12 +448,13 @@ struct pptable_funcs {
 	int (*set_power_profile_mode)(struct smu_context *smu, long *input, uint32_t size);
 	int (*dpm_set_uvd_enable)(struct smu_context *smu, bool enable);
 	int (*dpm_set_vce_enable)(struct smu_context *smu, bool enable);
+	int (*dpm_set_jpeg_enable)(struct smu_context *smu, bool enable);
 	int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
 			   void *data, uint32_t *size);
 	int (*pre_display_config_changed)(struct smu_context *smu);
 	int (*display_config_changed)(struct smu_context *smu);
 	int (*apply_clocks_adjust_rules)(struct smu_context *smu);
-	int (*notify_smc_dispaly_config)(struct smu_context *smu);
+	int (*notify_smc_display_config)(struct smu_context *smu);
 	int (*force_dpm_limit_value)(struct smu_context *smu, bool highest);
 	int (*unforce_dpm_levels)(struct smu_context *smu);
 	int (*get_profiling_clk_mask)(struct smu_context *smu,
@@ -459,17 +480,19 @@ struct pptable_funcs {
 	int (*display_disable_memory_clock_switch)(struct smu_context *smu, bool disable_memory_clock_switch);
 	void (*dump_pptable)(struct smu_context *smu);
 	int (*get_power_limit)(struct smu_context *smu, uint32_t *limit, bool asic_default);
-	int (*get_dpm_uclk_limited)(struct smu_context *smu, uint32_t *clock, bool max);
-};
-
-struct smu_funcs
-{
+	int (*get_dpm_clk_limited)(struct smu_context *smu, enum smu_clk_type clk_type,
+				   uint32_t dpm_level, uint32_t *freq);
+	int (*set_df_cstate)(struct smu_context *smu, enum pp_df_cstate state);
+	int (*update_pcie_parameters)(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap);
+	int (*i2c_eeprom_init)(struct i2c_adapter *control);
+	void (*i2c_eeprom_fini)(struct i2c_adapter *control);
+	int (*get_dpm_clock_table)(struct smu_context *smu, struct dpm_clocks *clock_table);
 	int (*init_microcode)(struct smu_context *smu);
+	int (*load_microcode)(struct smu_context *smu);
 	int (*init_smc_tables)(struct smu_context *smu);
 	int (*fini_smc_tables)(struct smu_context *smu);
 	int (*init_power)(struct smu_context *smu);
 	int (*fini_power)(struct smu_context *smu);
-	int (*load_microcode)(struct smu_context *smu);
 	int (*check_fw_status)(struct smu_context *smu);
 	int (*setup_pptable)(struct smu_context *smu);
 	int (*get_vbios_bootup_values)(struct smu_context *smu);
@@ -480,16 +503,17 @@ struct smu_funcs
 	int (*check_fw_version)(struct smu_context *smu);
 	int (*powergate_sdma)(struct smu_context *smu, bool gate);
 	int (*powergate_vcn)(struct smu_context *smu, bool gate);
+	int (*powergate_jpeg)(struct smu_context *smu, bool gate);
 	int (*set_gfx_cgpg)(struct smu_context *smu, bool enable);
 	int (*write_pptable)(struct smu_context *smu);
 	int (*set_min_dcef_deep_sleep)(struct smu_context *smu);
+	int (*set_driver_table_location)(struct smu_context *smu);
 	int (*set_tool_table_location)(struct smu_context *smu);
 	int (*notify_memory_pool_location)(struct smu_context *smu);
-	int (*write_watermarks_table)(struct smu_context *smu);
 	int (*set_last_dcef_min_deep_sleep_clk)(struct smu_context *smu);
 	int (*system_features_control)(struct smu_context *smu, bool en);
-	int (*send_smc_msg)(struct smu_context *smu, uint16_t msg);
-	int (*send_smc_msg_with_param)(struct smu_context *smu, uint16_t msg, uint32_t param);
+	int (*send_smc_msg_with_param)(struct smu_context *smu,
+				       enum smu_message_type msg, uint32_t param);
 	int (*read_smc_arg)(struct smu_context *smu, uint32_t *arg);
 	int (*init_display_count)(struct smu_context *smu, uint32_t count);
 	int (*set_allowed_mask)(struct smu_context *smu);
@@ -499,8 +523,7 @@ struct smu_funcs
 	int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value);
 	int (*init_max_sustainable_clocks)(struct smu_context *smu);
 	int (*start_thermal_control)(struct smu_context *smu);
-	int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
-			   void *data, uint32_t *size);
+	int (*stop_thermal_control)(struct smu_context *smu);
 	int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk);
 	int (*set_active_display_count)(struct smu_context *smu, uint32_t count);
 	int (*store_cc6_data)(struct smu_context *smu, uint32_t separation_time,
@@ -522,8 +545,6 @@ struct smu_funcs
 	int (*get_current_shallow_sleep_clocks)(struct smu_context *smu,
 						struct smu_clock_info *clocks);
 	int (*notify_smu_enable_pwe)(struct smu_context *smu);
-	int (*set_watermarks_for_clock_ranges)(struct smu_context *smu,
-					       struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges);
 	int (*conv_power_profile_to_pplib_workload)(int power_profile);
 	uint32_t (*get_fan_control_mode)(struct smu_context *smu);
 	int (*set_fan_control_mode)(struct smu_context *smu, uint32_t mode);
@@ -537,235 +558,93 @@ struct smu_funcs
 	bool (*baco_is_support)(struct smu_context *smu);
 	enum smu_baco_state (*baco_get_state)(struct smu_context *smu);
 	int (*baco_set_state)(struct smu_context *smu, enum smu_baco_state state);
-	int (*baco_reset)(struct smu_context *smu);
+	int (*baco_enter)(struct smu_context *smu);
+	int (*baco_exit)(struct smu_context *smu);
+	int (*mode2_reset)(struct smu_context *smu);
 	int (*get_dpm_ultimate_freq)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max);
+	int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max);
+	int (*override_pcie_parameters)(struct smu_context *smu);
+	uint32_t (*get_pptable_power_limit)(struct smu_context *smu);
 };
 
-#define smu_init_microcode(smu) \
-	((smu)->funcs->init_microcode ? (smu)->funcs->init_microcode((smu)) : 0)
-#define smu_init_smc_tables(smu) \
-	((smu)->funcs->init_smc_tables ? (smu)->funcs->init_smc_tables((smu)) : 0)
-#define smu_fini_smc_tables(smu) \
-	((smu)->funcs->fini_smc_tables ? (smu)->funcs->fini_smc_tables((smu)) : 0)
-#define smu_init_power(smu) \
-	((smu)->funcs->init_power ? (smu)->funcs->init_power((smu)) : 0)
-#define smu_fini_power(smu) \
-	((smu)->funcs->fini_power ? (smu)->funcs->fini_power((smu)) : 0)
-#define smu_load_microcode(smu) \
-	((smu)->funcs->load_microcode ? (smu)->funcs->load_microcode((smu)) : 0)
-#define smu_check_fw_status(smu) \
-	((smu)->funcs->check_fw_status ? (smu)->funcs->check_fw_status((smu)) : 0)
-#define smu_setup_pptable(smu) \
-	((smu)->funcs->setup_pptable ? (smu)->funcs->setup_pptable((smu)) : 0)
-#define smu_powergate_sdma(smu, gate) \
-	((smu)->funcs->powergate_sdma ? (smu)->funcs->powergate_sdma((smu), (gate)) : 0)
-#define smu_powergate_vcn(smu, gate) \
-	((smu)->funcs->powergate_vcn ? (smu)->funcs->powergate_vcn((smu), (gate)) : 0)
-#define smu_set_gfx_cgpg(smu, enabled) \
-	((smu)->funcs->set_gfx_cgpg ? (smu)->funcs->set_gfx_cgpg((smu), (enabled)) : 0)
-#define smu_get_vbios_bootup_values(smu) \
-	((smu)->funcs->get_vbios_bootup_values ? (smu)->funcs->get_vbios_bootup_values((smu)) : 0)
-#define smu_get_clk_info_from_vbios(smu) \
-	((smu)->funcs->get_clk_info_from_vbios ? (smu)->funcs->get_clk_info_from_vbios((smu)) : 0)
-#define smu_check_pptable(smu) \
-	((smu)->funcs->check_pptable ? (smu)->funcs->check_pptable((smu)) : 0)
-#define smu_parse_pptable(smu) \
-	((smu)->funcs->parse_pptable ? (smu)->funcs->parse_pptable((smu)) : 0)
-#define smu_populate_smc_tables(smu) \
-	((smu)->funcs->populate_smc_tables ? (smu)->funcs->populate_smc_tables((smu)) : 0)
-#define smu_check_fw_version(smu) \
-	((smu)->funcs->check_fw_version ? (smu)->funcs->check_fw_version((smu)) : 0)
-#define smu_write_pptable(smu) \
-	((smu)->funcs->write_pptable ? (smu)->funcs->write_pptable((smu)) : 0)
-#define smu_set_min_dcef_deep_sleep(smu) \
-	((smu)->funcs->set_min_dcef_deep_sleep ? (smu)->funcs->set_min_dcef_deep_sleep((smu)) : 0)
-#define smu_set_tool_table_location(smu) \
-	((smu)->funcs->set_tool_table_location ? (smu)->funcs->set_tool_table_location((smu)) : 0)
-#define smu_notify_memory_pool_location(smu) \
-	((smu)->funcs->notify_memory_pool_location ? (smu)->funcs->notify_memory_pool_location((smu)) : 0)
-#define smu_gfx_off_control(smu, enable) \
-	((smu)->funcs->gfx_off_control ? (smu)->funcs->gfx_off_control((smu), (enable)) : 0)
-
-#define smu_write_watermarks_table(smu) \
-	((smu)->funcs->write_watermarks_table ? (smu)->funcs->write_watermarks_table((smu)) : 0)
-#define smu_set_last_dcef_min_deep_sleep_clk(smu) \
-	((smu)->funcs->set_last_dcef_min_deep_sleep_clk ? (smu)->funcs->set_last_dcef_min_deep_sleep_clk((smu)) : 0)
-#define smu_system_features_control(smu, en) \
-	((smu)->funcs->system_features_control ? (smu)->funcs->system_features_control((smu), (en)) : 0)
-#define smu_init_max_sustainable_clocks(smu) \
-	((smu)->funcs->init_max_sustainable_clocks ? (smu)->funcs->init_max_sustainable_clocks((smu)) : 0)
-#define smu_set_default_od_settings(smu, initialize) \
-	((smu)->ppt_funcs->set_default_od_settings ? (smu)->ppt_funcs->set_default_od_settings((smu), (initialize)) : 0)
-#define smu_set_fan_speed_rpm(smu, speed) \
-	((smu)->funcs->set_fan_speed_rpm ? (smu)->funcs->set_fan_speed_rpm((smu), (speed)) : 0)
-#define smu_send_smc_msg(smu, msg) \
-	((smu)->funcs->send_smc_msg? (smu)->funcs->send_smc_msg((smu), (msg)) : 0)
-#define smu_send_smc_msg_with_param(smu, msg, param) \
-	((smu)->funcs->send_smc_msg_with_param? (smu)->funcs->send_smc_msg_with_param((smu), (msg), (param)) : 0)
-#define smu_read_smc_arg(smu, arg) \
-	((smu)->funcs->read_smc_arg? (smu)->funcs->read_smc_arg((smu), (arg)) : 0)
-#define smu_alloc_dpm_context(smu) \
-	((smu)->ppt_funcs->alloc_dpm_context ? (smu)->ppt_funcs->alloc_dpm_context((smu)) : 0)
-#define smu_init_display_count(smu, count) \
-	((smu)->funcs->init_display_count ? (smu)->funcs->init_display_count((smu), (count)) : 0)
-#define smu_feature_set_allowed_mask(smu) \
-	((smu)->funcs->set_allowed_mask? (smu)->funcs->set_allowed_mask((smu)) : 0)
-#define smu_feature_get_enabled_mask(smu, mask, num) \
-	((smu)->funcs->get_enabled_mask? (smu)->funcs->get_enabled_mask((smu), (mask), (num)) : 0)
-#define smu_is_dpm_running(smu) \
-	((smu)->ppt_funcs->is_dpm_running ? (smu)->ppt_funcs->is_dpm_running((smu)) : 0)
-#define smu_notify_display_change(smu) \
-	((smu)->funcs->notify_display_change? (smu)->funcs->notify_display_change((smu)) : 0)
-#define smu_store_powerplay_table(smu) \
-	((smu)->ppt_funcs->store_powerplay_table ? (smu)->ppt_funcs->store_powerplay_table((smu)) : 0)
-#define smu_check_powerplay_table(smu) \
-	((smu)->ppt_funcs->check_powerplay_table ? (smu)->ppt_funcs->check_powerplay_table((smu)) : 0)
-#define smu_append_powerplay_table(smu) \
-	((smu)->ppt_funcs->append_powerplay_table ? (smu)->ppt_funcs->append_powerplay_table((smu)) : 0)
-#define smu_set_default_dpm_table(smu) \
-	((smu)->ppt_funcs->set_default_dpm_table ? (smu)->ppt_funcs->set_default_dpm_table((smu)) : 0)
-#define smu_populate_umd_state_clk(smu) \
-	((smu)->ppt_funcs->populate_umd_state_clk ? (smu)->ppt_funcs->populate_umd_state_clk((smu)) : 0)
-#define smu_set_default_od8_settings(smu) \
-	((smu)->ppt_funcs->set_default_od8_settings ? (smu)->ppt_funcs->set_default_od8_settings((smu)) : 0)
-#define smu_get_power_limit(smu, limit, def) \
-	((smu)->ppt_funcs->get_power_limit ? (smu)->ppt_funcs->get_power_limit((smu), (limit), (def)) : 0)
-#define smu_set_power_limit(smu, limit) \
-	((smu)->funcs->set_power_limit ? (smu)->funcs->set_power_limit((smu), (limit)) : 0)
-#define smu_get_current_clk_freq(smu, clk_id, value) \
-	((smu)->funcs->get_current_clk_freq? (smu)->funcs->get_current_clk_freq((smu), (clk_id), (value)) : 0)
-#define smu_print_clk_levels(smu, clk_type, buf) \
-	((smu)->ppt_funcs->print_clk_levels ? (smu)->ppt_funcs->print_clk_levels((smu), (clk_type), (buf)) : 0)
-#define smu_force_clk_levels(smu, clk_type, level) \
-	((smu)->ppt_funcs->force_clk_levels ? (smu)->ppt_funcs->force_clk_levels((smu), (clk_type), (level)) : 0)
-#define smu_get_od_percentage(smu, type) \
-	((smu)->ppt_funcs->get_od_percentage ? (smu)->ppt_funcs->get_od_percentage((smu), (type)) : 0)
-#define smu_set_od_percentage(smu, type, value) \
-	((smu)->ppt_funcs->set_od_percentage ? (smu)->ppt_funcs->set_od_percentage((smu), (type), (value)) : 0)
-#define smu_od_edit_dpm_table(smu, type, input, size) \
-	((smu)->ppt_funcs->od_edit_dpm_table ? (smu)->ppt_funcs->od_edit_dpm_table((smu), (type), (input), (size)) : 0)
-#define smu_tables_init(smu, tab) \
-	((smu)->ppt_funcs->tables_init ? (smu)->ppt_funcs->tables_init((smu), (tab)) : 0)
-#define smu_set_thermal_fan_table(smu) \
-	((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)
-#define smu_start_thermal_control(smu) \
-	((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0)
-#define smu_read_sensor(smu, sensor, data, size) \
-	((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0)
-#define smu_smc_read_sensor(smu, sensor, data, size) \
-	((smu)->funcs->read_sensor? (smu)->funcs->read_sensor((smu), (sensor), (data), (size)) : -EINVAL)
-#define smu_get_power_profile_mode(smu, buf) \
-	((smu)->ppt_funcs->get_power_profile_mode ? (smu)->ppt_funcs->get_power_profile_mode((smu), buf) : 0)
-#define smu_set_power_profile_mode(smu, param, param_size) \
-	((smu)->ppt_funcs->set_power_profile_mode ? (smu)->ppt_funcs->set_power_profile_mode((smu), (param), (param_size)) : 0)
-#define smu_pre_display_config_changed(smu) \
-	((smu)->ppt_funcs->pre_display_config_changed ? (smu)->ppt_funcs->pre_display_config_changed((smu)) : 0)
-#define smu_display_config_changed(smu) \
-	((smu)->ppt_funcs->display_config_changed ? (smu)->ppt_funcs->display_config_changed((smu)) : 0)
-#define smu_apply_clocks_adjust_rules(smu) \
-	((smu)->ppt_funcs->apply_clocks_adjust_rules ? (smu)->ppt_funcs->apply_clocks_adjust_rules((smu)) : 0)
-#define smu_notify_smc_dispaly_config(smu) \
-	((smu)->ppt_funcs->notify_smc_dispaly_config ? (smu)->ppt_funcs->notify_smc_dispaly_config((smu)) : 0)
-#define smu_force_dpm_limit_value(smu, highest) \
-	((smu)->ppt_funcs->force_dpm_limit_value ? (smu)->ppt_funcs->force_dpm_limit_value((smu), (highest)) : 0)
-#define smu_unforce_dpm_levels(smu) \
-	((smu)->ppt_funcs->unforce_dpm_levels ? (smu)->ppt_funcs->unforce_dpm_levels((smu)) : 0)
-#define smu_get_profiling_clk_mask(smu, level, sclk_mask, mclk_mask, soc_mask) \
-	((smu)->ppt_funcs->get_profiling_clk_mask ? (smu)->ppt_funcs->get_profiling_clk_mask((smu), (level), (sclk_mask), (mclk_mask), (soc_mask)) : 0)
-#define smu_set_cpu_power_state(smu) \
-	((smu)->ppt_funcs->set_cpu_power_state ? (smu)->ppt_funcs->set_cpu_power_state((smu)) : 0)
-#define smu_get_fan_control_mode(smu) \
-	((smu)->funcs->get_fan_control_mode ? (smu)->funcs->get_fan_control_mode((smu)) : 0)
-#define smu_set_fan_control_mode(smu, value) \
-	((smu)->funcs->set_fan_control_mode ? (smu)->funcs->set_fan_control_mode((smu), (value)) : 0)
-#define smu_get_fan_speed_percent(smu, speed) \
-	((smu)->ppt_funcs->get_fan_speed_percent ? (smu)->ppt_funcs->get_fan_speed_percent((smu), (speed)) : 0)
-#define smu_set_fan_speed_percent(smu, speed) \
-	((smu)->funcs->set_fan_speed_percent ? (smu)->funcs->set_fan_speed_percent((smu), (speed)) : 0)
-#define smu_get_fan_speed_rpm(smu, speed) \
-	((smu)->ppt_funcs->get_fan_speed_rpm ? (smu)->ppt_funcs->get_fan_speed_rpm((smu), (speed)) : 0)
-
-#define smu_msg_get_index(smu, msg) \
-	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_msg_index? (smu)->ppt_funcs->get_smu_msg_index((smu), (msg)) : -EINVAL) : -EINVAL)
-#define smu_clk_get_index(smu, msg) \
-	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_clk_index? (smu)->ppt_funcs->get_smu_clk_index((smu), (msg)) : -EINVAL) : -EINVAL)
-#define smu_feature_get_index(smu, msg) \
-	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_feature_index? (smu)->ppt_funcs->get_smu_feature_index((smu), (msg)) : -EINVAL) : -EINVAL)
-#define smu_table_get_index(smu, tab) \
-	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_table_index? (smu)->ppt_funcs->get_smu_table_index((smu), (tab)) : -EINVAL) : -EINVAL)
-#define smu_power_get_index(smu, src) \
-	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_power_index? (smu)->ppt_funcs->get_smu_power_index((smu), (src)) : -EINVAL) : -EINVAL)
-#define smu_workload_get_type(smu, profile) \
-	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_workload_type? (smu)->ppt_funcs->get_workload_type((smu), (profile)) : -EINVAL) : -EINVAL)
-#define smu_run_afll_btc(smu) \
-	((smu)->ppt_funcs? ((smu)->ppt_funcs->run_afll_btc? (smu)->ppt_funcs->run_afll_btc((smu)) : 0) : 0)
-#define smu_get_allowed_feature_mask(smu, feature_mask, num) \
-	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_allowed_feature_mask? (smu)->ppt_funcs->get_allowed_feature_mask((smu), (feature_mask), (num)) : 0) : 0)
-#define smu_set_deep_sleep_dcefclk(smu, clk) \
-	((smu)->funcs->set_deep_sleep_dcefclk ? (smu)->funcs->set_deep_sleep_dcefclk((smu), (clk)) : 0)
-#define smu_set_active_display_count(smu, count) \
-	((smu)->funcs->set_active_display_count ? (smu)->funcs->set_active_display_count((smu), (count)) : 0)
-#define smu_store_cc6_data(smu, st, cc6_dis, pst_dis, pst_sw_dis) \
-	((smu)->funcs->store_cc6_data ? (smu)->funcs->store_cc6_data((smu), (st), (cc6_dis), (pst_dis), (pst_sw_dis)) : 0)
-#define smu_get_clock_by_type(smu, type, clocks) \
-	((smu)->funcs->get_clock_by_type ? (smu)->funcs->get_clock_by_type((smu), (type), (clocks)) : 0)
-#define smu_get_max_high_clocks(smu, clocks) \
-	((smu)->funcs->get_max_high_clocks ? (smu)->funcs->get_max_high_clocks((smu), (clocks)) : 0)
-#define smu_get_clock_by_type_with_latency(smu, clk_type, clocks) \
-	((smu)->ppt_funcs->get_clock_by_type_with_latency ? (smu)->ppt_funcs->get_clock_by_type_with_latency((smu), (clk_type), (clocks)) : 0)
-#define smu_get_clock_by_type_with_voltage(smu, type, clocks) \
-	((smu)->ppt_funcs->get_clock_by_type_with_voltage ? (smu)->ppt_funcs->get_clock_by_type_with_voltage((smu), (type), (clocks)) : 0)
-#define smu_display_clock_voltage_request(smu, clock_req) \
-	((smu)->funcs->display_clock_voltage_request ? (smu)->funcs->display_clock_voltage_request((smu), (clock_req)) : 0)
-#define smu_display_disable_memory_clock_switch(smu, disable_memory_clock_switch) \
-	((smu)->ppt_funcs->display_disable_memory_clock_switch ? (smu)->ppt_funcs->display_disable_memory_clock_switch((smu), (disable_memory_clock_switch)) : -EINVAL)
-#define smu_get_dal_power_level(smu, clocks) \
-	((smu)->funcs->get_dal_power_level ? (smu)->funcs->get_dal_power_level((smu), (clocks)) : 0)
-#define smu_get_perf_level(smu, designation, level) \
-	((smu)->funcs->get_perf_level ? (smu)->funcs->get_perf_level((smu), (designation), (level)) : 0)
-#define smu_get_current_shallow_sleep_clocks(smu, clocks) \
-	((smu)->funcs->get_current_shallow_sleep_clocks ? (smu)->funcs->get_current_shallow_sleep_clocks((smu), (clocks)) : 0)
-#define smu_notify_smu_enable_pwe(smu) \
-	((smu)->funcs->notify_smu_enable_pwe ? (smu)->funcs->notify_smu_enable_pwe((smu)) : 0)
-#define smu_set_watermarks_for_clock_ranges(smu, clock_ranges) \
-	((smu)->funcs->set_watermarks_for_clock_ranges ? (smu)->funcs->set_watermarks_for_clock_ranges((smu), (clock_ranges)) : 0)
-#define smu_dpm_set_uvd_enable(smu, enable) \
-	((smu)->ppt_funcs->dpm_set_uvd_enable ? (smu)->ppt_funcs->dpm_set_uvd_enable((smu), (enable)) : 0)
-#define smu_dpm_set_vce_enable(smu, enable) \
-	((smu)->ppt_funcs->dpm_set_vce_enable ? (smu)->ppt_funcs->dpm_set_vce_enable((smu), (enable)) : 0)
-#define smu_set_xgmi_pstate(smu, pstate) \
-		((smu)->funcs->set_xgmi_pstate ? (smu)->funcs->set_xgmi_pstate((smu), (pstate)) : 0)
-#define smu_set_watermarks_table(smu, tab, clock_ranges) \
-	((smu)->ppt_funcs->set_watermarks_table ? (smu)->ppt_funcs->set_watermarks_table((smu), (tab), (clock_ranges)) : 0)
-#define smu_get_current_clk_freq_by_table(smu, clk_type, value) \
-	((smu)->ppt_funcs->get_current_clk_freq_by_table ? (smu)->ppt_funcs->get_current_clk_freq_by_table((smu), (clk_type), (value)) : 0)
-#define smu_thermal_temperature_range_update(smu, range, rw) \
-	((smu)->ppt_funcs->thermal_temperature_range_update? (smu)->ppt_funcs->thermal_temperature_range_update((smu), (range), (rw)) : 0)
-#define smu_get_thermal_temperature_range(smu, range) \
-	((smu)->ppt_funcs->get_thermal_temperature_range? (smu)->ppt_funcs->get_thermal_temperature_range((smu), (range)) : 0)
-#define smu_register_irq_handler(smu) \
-	((smu)->funcs->register_irq_handler ? (smu)->funcs->register_irq_handler(smu) : 0)
-#define smu_set_azalia_d3_pme(smu) \
-	((smu)->funcs->set_azalia_d3_pme ? (smu)->funcs->set_azalia_d3_pme((smu)) : 0)
-#define smu_get_dpm_ultimate_freq(smu, param, min, max) \
-		((smu)->funcs->get_dpm_ultimate_freq ? (smu)->funcs->get_dpm_ultimate_freq((smu), (param), (min), (max)) : 0)
-#define smu_get_max_sustainable_clocks_by_dc(smu, max_clocks) \
-	((smu)->funcs->get_max_sustainable_clocks_by_dc ? (smu)->funcs->get_max_sustainable_clocks_by_dc((smu), (max_clocks)) : 0)
-#define smu_get_uclk_dpm_states(smu, clocks_in_khz, num_states) \
-	((smu)->ppt_funcs->get_uclk_dpm_states ? (smu)->ppt_funcs->get_uclk_dpm_states((smu), (clocks_in_khz), (num_states)) : 0)
-#define smu_baco_is_support(smu) \
-	((smu)->funcs->baco_is_support? (smu)->funcs->baco_is_support((smu)) : false)
-#define smu_baco_get_state(smu, state) \
-	((smu)->funcs->baco_get_state? (smu)->funcs->baco_get_state((smu), (state)) : 0)
-#define smu_baco_reset(smu) \
-	((smu)->funcs->baco_reset? (smu)->funcs->baco_reset((smu)) : 0)
-#define smu_asic_set_performance_level(smu, level) \
-	((smu)->ppt_funcs->set_performance_level? (smu)->ppt_funcs->set_performance_level((smu), (level)) : -EINVAL);
-#define smu_dump_pptable(smu) \
-	((smu)->ppt_funcs->dump_pptable ? (smu)->ppt_funcs->dump_pptable((smu)) : 0)
-#define smu_get_dpm_uclk_limited(smu, clock, max) \
-		((smu)->ppt_funcs->get_dpm_uclk_limited ? (smu)->ppt_funcs->get_dpm_uclk_limited((smu), (clock), (max)) : -EINVAL)
+int smu_load_microcode(struct smu_context *smu);
+
+int smu_check_fw_status(struct smu_context *smu);
+
+int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
+
+#define smu_i2c_eeprom_init(smu, control) \
+		((smu)->ppt_funcs->i2c_eeprom_init ? (smu)->ppt_funcs->i2c_eeprom_init((control)) : -EINVAL)
+#define smu_i2c_eeprom_fini(smu, control) \
+		((smu)->ppt_funcs->i2c_eeprom_fini ? (smu)->ppt_funcs->i2c_eeprom_fini((control)) : -EINVAL)
+
+int smu_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed);
+
+int smu_get_power_limit(struct smu_context *smu,
+			uint32_t *limit,
+			bool def,
+			bool lock_needed);
+
+int smu_set_power_limit(struct smu_context *smu, uint32_t limit);
+int smu_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf);
+int smu_get_od_percentage(struct smu_context *smu, enum smu_clk_type type);
+int smu_set_od_percentage(struct smu_context *smu, enum smu_clk_type type, uint32_t value);
+
+int smu_od_edit_dpm_table(struct smu_context *smu,
+			  enum PP_OD_DPM_TABLE_COMMAND type,
+			  long *input, uint32_t size);
+
+int smu_read_sensor(struct smu_context *smu,
+		    enum amd_pp_sensors sensor,
+		    void *data, uint32_t *size);
+int smu_get_power_profile_mode(struct smu_context *smu, char *buf);
+
+int smu_set_power_profile_mode(struct smu_context *smu,
+			       long *param,
+			       uint32_t param_size,
+			       bool lock_needed);
+int smu_get_fan_control_mode(struct smu_context *smu);
+int smu_set_fan_control_mode(struct smu_context *smu, int value);
+int smu_get_fan_speed_percent(struct smu_context *smu, uint32_t *speed);
+int smu_set_fan_speed_percent(struct smu_context *smu, uint32_t speed);
+int smu_get_fan_speed_rpm(struct smu_context *smu, uint32_t *speed);
+
+int smu_set_deep_sleep_dcefclk(struct smu_context *smu, int clk);
+int smu_set_active_display_count(struct smu_context *smu, uint32_t count);
+
+int smu_get_clock_by_type(struct smu_context *smu,
+			  enum amd_pp_clock_type type,
+			  struct amd_pp_clocks *clocks);
+
+int smu_get_max_high_clocks(struct smu_context *smu,
+			    struct amd_pp_simple_clock_info *clocks);
+
+int smu_get_clock_by_type_with_latency(struct smu_context *smu,
+				       enum smu_clk_type clk_type,
+				       struct pp_clock_levels_with_latency *clocks);
+
+int smu_get_clock_by_type_with_voltage(struct smu_context *smu,
+				       enum amd_pp_clock_type type,
+				       struct pp_clock_levels_with_voltage *clocks);
+
+int smu_display_clock_voltage_request(struct smu_context *smu,
+				      struct pp_display_clock_request *clock_req);
+int smu_display_disable_memory_clock_switch(struct smu_context *smu, bool disable_memory_clock_switch);
+int smu_notify_smu_enable_pwe(struct smu_context *smu);
+
+int smu_set_xgmi_pstate(struct smu_context *smu,
+			uint32_t pstate);
+
+int smu_set_azalia_d3_pme(struct smu_context *smu);
+
+bool smu_baco_is_support(struct smu_context *smu);
+
+int smu_baco_get_state(struct smu_context *smu, enum smu_baco_state *state);
+
+int smu_baco_enter(struct smu_context *smu);
+int smu_baco_exit(struct smu_context *smu);
 
+int smu_mode2_reset(struct smu_context *smu);
 
 extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table,
 				   uint16_t *size, uint8_t *frev, uint8_t *crev,
@@ -799,6 +678,10 @@ int smu_sys_get_pp_table(struct smu_context *smu, void **table);
 int smu_sys_set_pp_table(struct smu_context *smu,  void *buf, size_t size);
 int smu_get_power_num_states(struct smu_context *smu, struct pp_states_info *state_info);
 enum amd_pm_state_type smu_get_current_power_state(struct smu_context *smu);
+int smu_write_watermarks_table(struct smu_context *smu);
+int smu_set_watermarks_for_clock_ranges(
+		struct smu_context *smu,
+		struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges);
 
 /* smu to display interface */
 extern int smu_display_configuration_change(struct smu_context *smu, const
@@ -809,7 +692,8 @@ extern int smu_get_current_clocks(struct smu_context *smu,
 extern int smu_dpm_set_power_gate(struct smu_context *smu,uint32_t block_type, bool gate);
 extern int smu_handle_task(struct smu_context *smu,
 			   enum amd_dpm_forced_level level,
-			   enum amd_pp_task task_id);
+			   enum amd_pp_task task_id,
+			   bool lock_needed);
 int smu_switch_power_profile(struct smu_context *smu,
 			     enum PP_SMC_POWER_PROFILE type,
 			     bool en);
@@ -819,19 +703,40 @@ int smu_get_dpm_freq_by_index(struct smu_context *smu, enum smu_clk_type clk_typ
 int smu_get_dpm_level_count(struct smu_context *smu, enum smu_clk_type clk_type,
 			    uint32_t *value);
 int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type,
-			   uint32_t *min, uint32_t *max);
+			   uint32_t *min, uint32_t *max, bool lock_needed);
 int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type,
 			    uint32_t min, uint32_t max);
 int smu_set_hard_freq_range(struct smu_context *smu, enum smu_clk_type clk_type,
 			    uint32_t min, uint32_t max);
+int smu_get_dpm_level_range(struct smu_context *smu, enum smu_clk_type clk_type,
+			    uint32_t *min_value, uint32_t *max_value);
 enum amd_dpm_forced_level smu_get_performance_level(struct smu_context *smu);
 int smu_force_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level);
 int smu_set_display_count(struct smu_context *smu, uint32_t count);
 bool smu_clk_dpm_is_enabled(struct smu_context *smu, enum smu_clk_type clk_type);
-int smu_feature_update_enable_state(struct smu_context *smu, uint64_t feature_mask, bool enabled);
 const char *smu_get_message_name(struct smu_context *smu, enum smu_message_type type);
 const char *smu_get_feature_name(struct smu_context *smu, enum smu_feature_mask feature);
 size_t smu_sys_get_pp_feature_mask(struct smu_context *smu, char *buf);
 int smu_sys_set_pp_feature_mask(struct smu_context *smu, uint64_t new_mask);
+int smu_force_clk_levels(struct smu_context *smu,
+			 enum smu_clk_type clk_type,
+			 uint32_t mask,
+			 bool lock_needed);
+int smu_set_mp1_state(struct smu_context *smu,
+		      enum pp_mp1_state mp1_state);
+int smu_set_df_cstate(struct smu_context *smu,
+		      enum pp_df_cstate state);
+
+int smu_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
+					 struct pp_smu_nv_clock_table *max_clocks);
+
+int smu_get_uclk_dpm_states(struct smu_context *smu,
+			    unsigned int *clock_values_in_khz,
+			    unsigned int *num_states);
+
+int smu_get_dpm_clock_table(struct smu_context *smu,
+			    struct dpm_clocks *clock_table);
+
+uint32_t smu_get_pptable_power_limit(struct smu_context *smu);
 
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h
index 78e5927b7711..e3291259b249 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h
@@ -95,8 +95,7 @@
 
 //BTC
 #define PPSMC_MSG_RunAfllBtc                     0x30
-#define PPSMC_MSG_RunGfxDcBtc                    0x31
-#define PPSMC_MSG_RunSocDcBtc                    0x32
+#define PPSMC_MSG_RunDcBtc                       0x31
 
 //Debug
 #define PPSMC_MSG_DramLogSetDramAddrHigh         0x33
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 7bf9a14bfa0b..2ffb666b97e6 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -355,6 +355,10 @@ struct pp_hwmgr_func {
 	int (*set_mp1_state)(struct pp_hwmgr *hwmgr, enum pp_mp1_state mp1_state);
 	int (*asic_reset)(struct pp_hwmgr *hwmgr, enum SMU_ASIC_RESET_MODE mode);
 	int (*smu_i2c_bus_access)(struct pp_hwmgr *hwmgr, bool aquire);
+	int (*set_df_cstate)(struct pp_hwmgr *hwmgr, enum pp_df_cstate state);
+	int (*set_xgmi_pstate)(struct pp_hwmgr *hwmgr, uint32_t pstate);
+	int (*disable_power_features_for_compute_performance)(struct pp_hwmgr *hwmgr,
+					bool disable);
 };
 
 struct pp_table_func {
@@ -737,6 +741,7 @@ struct pp_hwmgr {
 	uint32_t smu_version;
 	bool not_vf;
 	bool pm_en;
+	bool pp_one_vf;
 	struct mutex smu_lock;
 
 	uint32_t pp_table_version;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
index e02950b505fa..ce5b5011c122 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
@@ -137,29 +137,29 @@
 #define FEATURE_DS_SOCCLK_MASK            (1 << FEATURE_DS_SOCCLK_BIT            )
 #define FEATURE_DS_LCLK_MASK              (1 << FEATURE_DS_LCLK_BIT              )
 #define FEATURE_DS_FCLK_MASK              (1 << FEATURE_DS_FCLK_BIT              )
-#define FEATURE_DS_LCLK_MASK              (1 << FEATURE_DS_LCLK_BIT              )
+#define FEATURE_DS_UCLK_MASK              (1 << FEATURE_DS_UCLK_BIT              )
 #define FEATURE_GFX_ULV_MASK              (1 << FEATURE_GFX_ULV_BIT              )
-#define FEATURE_VCN_PG_MASK               (1 << FEATURE_VCN_PG_BIT               )
+#define FEATURE_DPM_VCN_MASK              (1 << FEATURE_DPM_VCN_BIT              )
 #define FEATURE_RSMU_SMN_CG_MASK          (1 << FEATURE_RSMU_SMN_CG_BIT          )
 #define FEATURE_WAFL_CG_MASK              (1 << FEATURE_WAFL_CG_BIT              )
 
 #define FEATURE_PPT_MASK                  (1 << FEATURE_PPT_BIT                  )
 #define FEATURE_TDC_MASK                  (1 << FEATURE_TDC_BIT                  )
-#define FEATURE_APCC_MASK                 (1 << FEATURE_APCC_BIT                 )
+#define FEATURE_APCC_PLUS_MASK            (1 << FEATURE_APCC_PLUS_BIT            )
 #define FEATURE_VR0HOT_MASK               (1 << FEATURE_VR0HOT_BIT               )
 #define FEATURE_VR1HOT_MASK               (1 << FEATURE_VR1HOT_BIT               )
 #define FEATURE_FW_CTF_MASK               (1 << FEATURE_FW_CTF_BIT               )
 #define FEATURE_FAN_CONTROL_MASK          (1 << FEATURE_FAN_CONTROL_BIT          )
 #define FEATURE_THERMAL_MASK              (1 << FEATURE_THERMAL_BIT              )
 
-#define FEATURE_OUT_OF_BAND_MONITOR_MASK  (1 << EATURE_OUT_OF_BAND_MONITOR_BIT   )
-#define FEATURE_TEMP_DEPENDENT_VMIN_MASK  (1 << FEATURE_TEMP_DEPENDENT_VMIN_MASK )
+#define FEATURE_OUT_OF_BAND_MONITOR_MASK  (1 << FEATURE_OUT_OF_BAND_MONITOR_BIT   )
+#define FEATURE_TEMP_DEPENDENT_VMIN_MASK  (1 << FEATURE_TEMP_DEPENDENT_VMIN_BIT )
 
 
 //FIXME need updating
 // Debug Overrides Bitmask
 #define DPM_OVERRIDE_DISABLE_UCLK_PID               0x00000001
-#define DPM_OVERRIDE_ENABLE_VOLT_LINK_VCN_FCLK      0x00000002
+#define DPM_OVERRIDE_DISABLE_VOLT_LINK_VCN_FCLK     0x00000002
 
 // I2C Config Bit Defines
 #define I2C_CONTROLLER_ENABLED           1
@@ -423,18 +423,30 @@ typedef enum {
 } PwrConfig_e;
 
 typedef enum {
-  XGMI_LINK_RATE_12 = 0,  // 12Gbps
-  XGMI_LINK_RATE_16,      // 16Gbps
-  XGMI_LINK_RATE_22,      // 22Gbps
-  XGMI_LINK_RATE_25,      // 25Gbps
+  XGMI_LINK_RATE_2 = 2,    // 2Gbps
+  XGMI_LINK_RATE_4 = 4,    // 4Gbps
+  XGMI_LINK_RATE_8 = 8,    // 8Gbps
+  XGMI_LINK_RATE_12 = 12,  // 12Gbps
+  XGMI_LINK_RATE_16 = 16,  // 16Gbps
+  XGMI_LINK_RATE_17 = 17,  // 17Gbps
+  XGMI_LINK_RATE_18 = 18,  // 18Gbps
+  XGMI_LINK_RATE_19 = 19,  // 19Gbps
+  XGMI_LINK_RATE_20 = 20,  // 20Gbps
+  XGMI_LINK_RATE_21 = 21,  // 21Gbps
+  XGMI_LINK_RATE_22 = 22,  // 22Gbps
+  XGMI_LINK_RATE_23 = 23,  // 23Gbps
+  XGMI_LINK_RATE_24 = 24,  // 24Gbps
+  XGMI_LINK_RATE_25 = 25,  // 25Gbps
   XGMI_LINK_RATE_COUNT
 } XGMI_LINK_RATE_e;
 
 typedef enum {
-  XGMI_LINK_WIDTH_2 = 0, // x2
-  XGMI_LINK_WIDTH_4,     // x4
-  XGMI_LINK_WIDTH_8,     // x8
-  XGMI_LINK_WIDTH_16,    // x16
+  XGMI_LINK_WIDTH_1 = 1,   // x1
+  XGMI_LINK_WIDTH_2 = 2,   // x2
+  XGMI_LINK_WIDTH_4 = 4,   // x4
+  XGMI_LINK_WIDTH_8 = 8,   // x8
+  XGMI_LINK_WIDTH_9 = 9,   // x9
+  XGMI_LINK_WIDTH_16 = 16, // x16
   XGMI_LINK_WIDTH_COUNT
 } XGMI_LINK_WIDTH_e;
 
@@ -610,8 +622,14 @@ typedef struct {
   uint16_t     PccThresholdHigh;
   uint32_t     PaddingAPCC[6];  //FIXME pending SPEC
 
+  // OOB Settings
+  uint16_t BasePerformanceCardPower;
+  uint16_t MaxPerformanceCardPower;
+  uint16_t BasePerformanceFrequencyCap;   //In Mhz
+  uint16_t MaxPerformanceFrequencyCap;    //In Mhz
+
   // SECTION: Reserved
-  uint32_t     Reserved[11];
+  uint32_t     Reserved[9];
 
   // SECTION: BOARD PARAMETERS
 
@@ -696,7 +714,11 @@ typedef struct {
   uint8_t      GpioI2cSda;          // Serial Data
   uint16_t     GpioPadding;
 
-  uint32_t     BoardReserved[9];
+  // Platform input telemetry voltage coefficient
+  uint32_t     BoardVoltageCoeffA;    // decode by /1000
+  uint32_t     BoardVoltageCoeffB;    // decode by /1000
+
+  uint32_t     BoardReserved[7];
 
   // Padding for MMHUB - do not modify this
   uint32_t     MmHubPadding[8]; // SMU internal use
@@ -802,12 +824,11 @@ typedef struct {
 
   uint32_t P2VCharzFreq[AVFS_VOLTAGE_COUNT]; // in 10KHz units
 
-  uint32_t EnabledAvfsModules[2];
+  uint32_t EnabledAvfsModules[3];
 
   uint32_t MmHubPadding[8]; // SMU internal use
 } AvfsFuseOverride_t;
 
-/* NOT CURRENTLY USED
 typedef struct {
   uint8_t   Gfx_ActiveHystLimit;
   uint8_t   Gfx_IdleHystLimit;
@@ -850,7 +871,6 @@ typedef struct {
 
   uint32_t  MmHubPadding[8]; // SMU internal use
 } DpmActivityMonitorCoeffInt_t;
-*/
 
 // These defines are used with the following messages:
 // SMC_MSG_TransferTableDram2Smu
@@ -862,10 +882,11 @@ typedef struct {
 #define TABLE_PMSTATUSLOG             4
 #define TABLE_SMU_METRICS             5
 #define TABLE_DRIVER_SMU_CONFIG       6
-//#define TABLE_ACTIVITY_MONITOR_COEFF  7
 #define TABLE_OVERDRIVE               7
 #define TABLE_WAFL_XGMI_TOPOLOGY      8
-#define TABLE_COUNT                   9
+#define TABLE_I2C_COMMANDS            9
+#define TABLE_ACTIVITY_MONITOR_COEFF  10
+#define TABLE_COUNT                   11
 
 // These defines are used with the SMC_MSG_SetUclkFastSwitch message.
 typedef enum {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h
index c27c82851468..2f85a34c0591 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h
@@ -27,7 +27,7 @@
 // *** IMPORTANT ***
 // SMU TEAM: Always increment the interface version if 
 // any structure is changed in this file
-#define SMU12_DRIVER_IF_VERSION 10
+#define SMU12_DRIVER_IF_VERSION 11
 
 typedef struct {
   int32_t value;
@@ -192,6 +192,11 @@ typedef struct {
   uint16_t SocTemperature;              //[centi-Celsius]
   uint16_t ThrottlerStatus;
   uint16_t spare;
+
+  uint16_t StapmOriginalLimit;          //[mW]
+  uint16_t StapmCurrentLimit;           //[mW]
+  uint16_t ApuPower;              //[mW]
+  uint16_t dGpuPower;               //[mW]
 } SmuMetrics_t;
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h
index b0dd05d431dd..d8c9b7f91fcc 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h
@@ -114,6 +114,7 @@
        __SMU_DUMMY_MAP(PowerDownJpeg),                \
        __SMU_DUMMY_MAP(BacoAudioD3PME),               \
        __SMU_DUMMY_MAP(ArmD3),                        \
+       __SMU_DUMMY_MAP(RunDcBtc),                     \
        __SMU_DUMMY_MAP(RunGfxDcBtc),                  \
        __SMU_DUMMY_MAP(RunSocDcBtc),                  \
        __SMU_DUMMY_MAP(SetMemoryChannelEnable),       \
@@ -168,6 +169,7 @@
 	__SMU_DUMMY_MAP(PowerGateAtHub),              \
 	__SMU_DUMMY_MAP(SetSoftMinJpeg),              \
 	__SMU_DUMMY_MAP(SetHardMinFclkByFreq),        \
+	__SMU_DUMMY_MAP(DFCstateControl), \
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)	SMU_MSG_##type
@@ -251,6 +253,7 @@ enum smu_clk_type {
        __SMU_DUMMY_MAP(TEMP_DEPENDENT_VMIN),           	\
        __SMU_DUMMY_MAP(MMHUB_PG),                      	\
        __SMU_DUMMY_MAP(ATHUB_PG),                      	\
+       __SMU_DUMMY_MAP(APCC_DFLL),                     	\
        __SMU_DUMMY_MAP(WAFL_CG),
 
 #undef __SMU_DUMMY_MAP
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
index 5bda8539447a..d5314d12628a 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
@@ -27,7 +27,7 @@
 
 #define SMU11_DRIVER_IF_VERSION_INV 0xFFFFFFFF
 #define SMU11_DRIVER_IF_VERSION_VG20 0x13
-#define SMU11_DRIVER_IF_VERSION_ARCT 0x09
+#define SMU11_DRIVER_IF_VERSION_ARCT 0x12
 #define SMU11_DRIVER_IF_VERSION_NV10 0x33
 #define SMU11_DRIVER_IF_VERSION_NV14 0x34
 
@@ -48,6 +48,8 @@
 
 #define SMU11_TOOL_SIZE			0x19000
 
+#define MAX_PCIE_CONF 2
+
 #define CLK_MAP(clk, index) \
 	[SMU_##clk] = {1, (index)}
 
@@ -88,6 +90,11 @@ struct smu_11_0_dpm_table {
 	uint32_t    max;        /* MHz */
 };
 
+struct smu_11_0_pcie_table {
+        uint8_t  pcie_gen[MAX_PCIE_CONF];
+        uint8_t  pcie_lane[MAX_PCIE_CONF];
+};
+
 struct smu_11_0_dpm_tables {
 	struct smu_11_0_dpm_table        soc_table;
 	struct smu_11_0_dpm_table        gfx_table;
@@ -100,6 +107,7 @@ struct smu_11_0_dpm_tables {
 	struct smu_11_0_dpm_table        display_table;
 	struct smu_11_0_dpm_table        phy_table;
 	struct smu_11_0_dpm_table        fclk_table;
+	struct smu_11_0_pcie_table       pcie_table;
 };
 
 struct smu_11_0_dpm_context {
@@ -130,6 +138,133 @@ enum smu_v11_0_baco_seq {
 	BACO_SEQ_COUNT,
 };
 
-void smu_v11_0_set_smu_funcs(struct smu_context *smu);
+int smu_v11_0_init_microcode(struct smu_context *smu);
+
+int smu_v11_0_load_microcode(struct smu_context *smu);
+
+int smu_v11_0_init_smc_tables(struct smu_context *smu);
+
+int smu_v11_0_fini_smc_tables(struct smu_context *smu);
+
+int smu_v11_0_init_power(struct smu_context *smu);
+
+int smu_v11_0_fini_power(struct smu_context *smu);
+
+int smu_v11_0_check_fw_status(struct smu_context *smu);
+
+int smu_v11_0_setup_pptable(struct smu_context *smu);
+
+int smu_v11_0_get_vbios_bootup_values(struct smu_context *smu);
+
+int smu_v11_0_get_clk_info_from_vbios(struct smu_context *smu);
+
+int smu_v11_0_check_pptable(struct smu_context *smu);
+
+int smu_v11_0_parse_pptable(struct smu_context *smu);
+
+int smu_v11_0_populate_smc_pptable(struct smu_context *smu);
+
+int smu_v11_0_check_fw_version(struct smu_context *smu);
+
+int smu_v11_0_write_pptable(struct smu_context *smu);
+
+int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu);
+
+int smu_v11_0_set_driver_table_location(struct smu_context *smu);
+
+int smu_v11_0_set_tool_table_location(struct smu_context *smu);
+
+int smu_v11_0_notify_memory_pool_location(struct smu_context *smu);
+
+int smu_v11_0_system_features_control(struct smu_context *smu,
+					     bool en);
+
+int
+smu_v11_0_send_msg_with_param(struct smu_context *smu,
+			      enum smu_message_type msg,
+			      uint32_t param);
+
+int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg);
+
+int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count);
+
+int smu_v11_0_set_allowed_mask(struct smu_context *smu);
+
+int smu_v11_0_get_enabled_mask(struct smu_context *smu,
+				      uint32_t *feature_mask, uint32_t num);
+
+int smu_v11_0_notify_display_change(struct smu_context *smu);
+
+int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n);
+
+int smu_v11_0_get_current_clk_freq(struct smu_context *smu,
+					  enum smu_clk_type clk_id,
+					  uint32_t *value);
+
+int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu);
+
+int smu_v11_0_start_thermal_control(struct smu_context *smu);
+
+int smu_v11_0_stop_thermal_control(struct smu_context *smu);
+
+int smu_v11_0_read_sensor(struct smu_context *smu,
+				 enum amd_pp_sensors sensor,
+				 void *data, uint32_t *size);
+
+int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk);
+
+int
+smu_v11_0_display_clock_voltage_request(struct smu_context *smu,
+					struct pp_display_clock_request
+					*clock_req);
+
+uint32_t
+smu_v11_0_get_fan_control_mode(struct smu_context *smu);
+
+int
+smu_v11_0_set_fan_control_mode(struct smu_context *smu,
+			       uint32_t mode);
+
+int
+smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed);
+
+int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
+				       uint32_t speed);
+
+int smu_v11_0_set_xgmi_pstate(struct smu_context *smu,
+				     uint32_t pstate);
+
+int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable);
+
+int smu_v11_0_register_irq_handler(struct smu_context *smu);
+
+int smu_v11_0_set_azalia_d3_pme(struct smu_context *smu);
+
+int smu_v11_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
+		struct pp_smu_nv_clock_table *max_clocks);
+
+bool smu_v11_0_baco_is_support(struct smu_context *smu);
+
+enum smu_baco_state smu_v11_0_baco_get_state(struct smu_context *smu);
+
+int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state);
+
+int smu_v11_0_baco_enter(struct smu_context *smu);
+int smu_v11_0_baco_exit(struct smu_context *smu);
+
+int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
+						 uint32_t *min, uint32_t *max);
+
+int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type,
+			    uint32_t min, uint32_t max);
+
+int smu_v11_0_override_pcie_parameters(struct smu_context *smu);
+
+int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size);
+
+uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu);
+
+int smu_v11_0_set_performance_level(struct smu_context *smu,
+				    enum amd_dpm_forced_level level);
 
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h
index 86cdc3393eac..b2f96a101124 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h
@@ -141,7 +141,9 @@ struct smu_11_0_powerplay_table
       struct smu_11_0_power_saving_clock_table      power_saving_clock;
       struct smu_11_0_overdrive_table               overdrive_table;
 
+#ifndef SMU_11_0_PARTIAL_PPTABLE
       PPTable_t smc_pptable;                        //PPTable_t in smu11_driver_if.h
+#endif
 } __attribute__((packed));
 
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h
index acf3db12f59f..d79e54b5ebf6 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h
@@ -37,6 +37,59 @@ struct smu_12_0_cmn2aisc_mapping {
 	int	map_to;
 };
 
-void smu_v12_0_set_smu_funcs(struct smu_context *smu);
+int smu_v12_0_send_msg_without_waiting(struct smu_context *smu,
+					      uint16_t msg);
+
+int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg);
+
+int smu_v12_0_wait_for_response(struct smu_context *smu);
+
+int
+smu_v12_0_send_msg_with_param(struct smu_context *smu,
+			      enum smu_message_type msg,
+			      uint32_t param);
+
+int smu_v12_0_check_fw_status(struct smu_context *smu);
+
+int smu_v12_0_check_fw_version(struct smu_context *smu);
+
+int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate);
+
+int smu_v12_0_powergate_vcn(struct smu_context *smu, bool gate);
+
+int smu_v12_0_powergate_jpeg(struct smu_context *smu, bool gate);
+
+int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable);
+
+int smu_v12_0_read_sensor(struct smu_context *smu,
+			  enum amd_pp_sensors sensor,
+			  void *data, uint32_t *size);
+
+uint32_t smu_v12_0_get_gfxoff_status(struct smu_context *smu);
+
+int smu_v12_0_gfx_off_control(struct smu_context *smu, bool enable);
+
+int smu_v12_0_init_smc_tables(struct smu_context *smu);
+
+int smu_v12_0_fini_smc_tables(struct smu_context *smu);
+
+int smu_v12_0_populate_smc_tables(struct smu_context *smu);
+
+int smu_v12_0_get_enabled_mask(struct smu_context *smu,
+				      uint32_t *feature_mask, uint32_t num);
+
+int smu_v12_0_get_current_clk_freq(struct smu_context *smu,
+					  enum smu_clk_type clk_id,
+					  uint32_t *value);
+
+int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
+						 uint32_t *min, uint32_t *max);
+
+int smu_v12_0_mode2_reset(struct smu_context *smu);
+
+int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type,
+			    uint32_t min, uint32_t max);
+
+int smu_v12_0_set_driver_table_location(struct smu_context *smu);
 
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h
index a0883038f3c3..0c66f0fe1aaf 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h
@@ -120,7 +120,8 @@
 #define PPSMC_MSG_SetMGpuFanBoostLimitRpm        0x5D
 #define PPSMC_MSG_GetAVFSVoltageByDpm            0x5F
 #define PPSMC_MSG_BacoWorkAroundFlushVDCI        0x60
-#define PPSMC_Message_Count                      0x61
+#define PPSMC_MSG_DFCstateControl                0x63
+#define PPSMC_Message_Count                      0x64
 
 typedef uint32_t PPSMC_Result;
 typedef uint32_t PPSMC_Msg;
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 328e258a6895..93c66c69ca28 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -26,6 +26,7 @@
 #include <linux/pci.h>
 #include "amdgpu.h"
 #include "amdgpu_smu.h"
+#include "smu_internal.h"
 #include "atomfirmware.h"
 #include "amdgpu_atomfirmware.h"
 #include "smu_v11_0.h"
@@ -35,6 +36,7 @@
 #include "navi10_ppt.h"
 #include "smu_v11_0_pptable.h"
 #include "smu_v11_0_ppsmc.h"
+#include "nbio/nbio_7_4_sh_mask.h"
 
 #include "asic_reg/mp/mp_11_0_sh_mask.h"
 
@@ -177,6 +179,7 @@ static struct smu_11_0_cmn2aisc_mapping navi10_feature_mask_map[SMU_FEATURE_COUN
 	FEA_MAP(TEMP_DEPENDENT_VMIN),
 	FEA_MAP(MMHUB_PG),
 	FEA_MAP(ATHUB_PG),
+	FEA_MAP(APCC_DFLL),
 };
 
 static struct smu_11_0_cmn2aisc_mapping navi10_table_map[SMU_TABLE_COUNT] = {
@@ -327,40 +330,52 @@ navi10_get_allowed_feature_mask(struct smu_context *smu,
 	memset(feature_mask, 0, sizeof(uint32_t) * num);
 
 	*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_PREFETCHER_BIT)
-				| FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT)
-				| FEATURE_MASK(FEATURE_DPM_SOCCLK_BIT)
 				| FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT)
-				| FEATURE_MASK(FEATURE_DPM_LINK_BIT)
-				| FEATURE_MASK(FEATURE_GFX_ULV_BIT)
 				| FEATURE_MASK(FEATURE_RSMU_SMN_CG_BIT)
 				| FEATURE_MASK(FEATURE_DS_SOCCLK_BIT)
 				| FEATURE_MASK(FEATURE_PPT_BIT)
 				| FEATURE_MASK(FEATURE_TDC_BIT)
 				| FEATURE_MASK(FEATURE_GFX_EDC_BIT)
+				| FEATURE_MASK(FEATURE_APCC_PLUS_BIT)
 				| FEATURE_MASK(FEATURE_VR0HOT_BIT)
 				| FEATURE_MASK(FEATURE_FAN_CONTROL_BIT)
 				| FEATURE_MASK(FEATURE_THERMAL_BIT)
 				| FEATURE_MASK(FEATURE_LED_DISPLAY_BIT)
-				| FEATURE_MASK(FEATURE_DPM_DCEFCLK_BIT)
-				| FEATURE_MASK(FEATURE_DS_GFXCLK_BIT)
+				| FEATURE_MASK(FEATURE_DS_LCLK_BIT)
 				| FEATURE_MASK(FEATURE_DS_DCEFCLK_BIT)
 				| FEATURE_MASK(FEATURE_FW_DSTATE_BIT)
 				| FEATURE_MASK(FEATURE_BACO_BIT)
 				| FEATURE_MASK(FEATURE_ACDC_BIT)
 				| FEATURE_MASK(FEATURE_GFX_SS_BIT)
 				| FEATURE_MASK(FEATURE_APCC_DFLL_BIT)
-				| FEATURE_MASK(FEATURE_FW_CTF_BIT);
+				| FEATURE_MASK(FEATURE_FW_CTF_BIT)
+				| FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT);
+
+	if (adev->pm.pp_feature & PP_SOCCLK_DPM_MASK)
+		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_SOCCLK_BIT);
+
+	if (adev->pm.pp_feature & PP_SCLK_DPM_MASK)
+		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT);
+
+	if (adev->pm.pp_feature & PP_PCIE_DPM_MASK)
+		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_LINK_BIT);
+
+	if (adev->pm.pp_feature & PP_DCEFCLK_DPM_MASK)
+		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_DCEFCLK_BIT);
 
 	if (adev->pm.pp_feature & PP_MCLK_DPM_MASK)
 		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_UCLK_BIT)
 				| FEATURE_MASK(FEATURE_MEM_VDDCI_SCALING_BIT)
 				| FEATURE_MASK(FEATURE_MEM_MVDD_SCALING_BIT);
 
-	if (adev->pm.pp_feature & PP_GFXOFF_MASK) {
+	if (adev->pm.pp_feature & PP_ULV_MASK)
+		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_ULV_BIT);
+
+	if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
+		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_GFXCLK_BIT);
+
+	if (adev->pm.pp_feature & PP_GFXOFF_MASK)
 		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFXOFF_BIT);
-		/* TODO: remove it once fw fix the bug */
-		*(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_FW_DSTATE_BIT);
-	}
 
 	if (smu->adev->pg_flags & AMD_PG_SUPPORT_MMHUB)
 		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MMHUB_PG_BIT);
@@ -369,8 +384,10 @@ navi10_get_allowed_feature_mask(struct smu_context *smu,
 		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_ATHUB_PG_BIT);
 
 	if (smu->adev->pg_flags & AMD_PG_SUPPORT_VCN)
-		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VCN_PG_BIT)
-				| FEATURE_MASK(FEATURE_JPEG_PG_BIT);
+		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VCN_PG_BIT);
+
+	if (smu->adev->pg_flags & AMD_PG_SUPPORT_JPEG)
+		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_JPEG_PG_BIT);
 
 	/* disable DPM UCLK and DS SOCCLK on navi10 A0 secure board */
 	if (is_asic_secure(smu)) {
@@ -538,6 +555,10 @@ static int navi10_tables_init(struct smu_context *smu, struct smu_table *tables)
 		return -ENOMEM;
 	smu_table->metrics_time = 0;
 
+	smu_table->watermarks_table = kzalloc(sizeof(Watermarks_t), GFP_KERNEL);
+	if (!smu_table->watermarks_table)
+		return -ENOMEM;
+
 	return 0;
 }
 
@@ -547,17 +568,20 @@ static int navi10_get_metrics_table(struct smu_context *smu,
 	struct smu_table_context *smu_table= &smu->smu_table;
 	int ret = 0;
 
+	mutex_lock(&smu->metrics_lock);
 	if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(100))) {
 		ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
 				(void *)smu_table->metrics_table, false);
 		if (ret) {
 			pr_info("Failed to export SMU metrics table!\n");
+			mutex_unlock(&smu->metrics_lock);
 			return ret;
 		}
 		smu_table->metrics_time = jiffies;
 	}
 
 	memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
+	mutex_unlock(&smu->metrics_lock);
 
 	return ret;
 }
@@ -585,6 +609,7 @@ static int navi10_set_default_dpm_table(struct smu_context *smu)
 	struct smu_table_context *table_context = &smu->smu_table;
 	struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context;
 	PPTable_t *driver_ppt = NULL;
+	int i;
 
 	driver_ppt = table_context->driver_pptable;
 
@@ -615,6 +640,11 @@ static int navi10_set_default_dpm_table(struct smu_context *smu)
 	dpm_context->dpm_tables.phy_table.min = driver_ppt->FreqTablePhyclk[0];
 	dpm_context->dpm_tables.phy_table.max = driver_ppt->FreqTablePhyclk[NUM_PHYCLK_DPM_LEVELS - 1];
 
+	for (i = 0; i < MAX_PCIE_CONF; i++) {
+		dpm_context->dpm_tables.pcie_table.pcie_gen[i] = driver_ppt->PcieGenSpeed[i];
+		dpm_context->dpm_tables.pcie_table.pcie_lane[i] = driver_ppt->PcieLaneCount[i];
+	}
+
 	return 0;
 }
 
@@ -644,6 +674,31 @@ static int navi10_dpm_set_uvd_enable(struct smu_context *smu, bool enable)
 	return ret;
 }
 
+static int navi10_dpm_set_jpeg_enable(struct smu_context *smu, bool enable)
+{
+	struct smu_power_context *smu_power = &smu->smu_power;
+	struct smu_power_gate *power_gate = &smu_power->power_gate;
+	int ret = 0;
+
+	if (enable) {
+		if (smu_feature_is_enabled(smu, SMU_FEATURE_JPEG_PG_BIT)) {
+			ret = smu_send_smc_msg(smu, SMU_MSG_PowerUpJpeg);
+			if (ret)
+				return ret;
+		}
+		power_gate->jpeg_gated = false;
+	} else {
+		if (smu_feature_is_enabled(smu, SMU_FEATURE_JPEG_PG_BIT)) {
+			ret = smu_send_smc_msg(smu, SMU_MSG_PowerDownJpeg);
+			if (ret)
+				return ret;
+		}
+		power_gate->jpeg_gated = true;
+	}
+
+	return ret;
+}
+
 static int navi10_get_current_clk_freq_by_table(struct smu_context *smu,
 				       enum smu_clk_type clk_type,
 				       uint32_t *value)
@@ -677,13 +732,29 @@ static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu
 	return dpm_desc->SnapToDiscrete == 0 ? true : false;
 }
 
+static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_ID feature)
+{
+	return od_table->cap[feature];
+}
+
+
 static int navi10_print_clk_levels(struct smu_context *smu,
 			enum smu_clk_type clk_type, char *buf)
 {
+	uint16_t *curve_settings;
 	int i, size = 0, ret = 0;
 	uint32_t cur_value = 0, value = 0, count = 0;
 	uint32_t freq_values[3] = {0};
 	uint32_t mark_index = 0;
+	struct smu_table_context *table_context = &smu->smu_table;
+	uint32_t gen_speed, lane_width;
+	struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
+	struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context;
+	struct amdgpu_device *adev = smu->adev;
+	PPTable_t *pptable = (PPTable_t *)table_context->driver_pptable;
+	OverDriveTable_t *od_table =
+		(OverDriveTable_t *)table_context->overdrive_table;
+	struct smu_11_0_overdrive_table *od_settings = smu->od_settings;
 
 	switch (clk_type) {
 	case SMU_GFXCLK:
@@ -734,6 +805,69 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 
 		}
 		break;
+	case SMU_PCIE:
+		gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) &
+			     PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK)
+			>> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
+		lane_width = (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) &
+			      PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK)
+			>> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT;
+		for (i = 0; i < NUM_LINK_LEVELS; i++)
+			size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i,
+					(dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 0) ? "2.5GT/s," :
+					(dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 1) ? "5.0GT/s," :
+					(dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 2) ? "8.0GT/s," :
+					(dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 3) ? "16.0GT/s," : "",
+					(dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 1) ? "x1" :
+					(dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 2) ? "x2" :
+					(dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 3) ? "x4" :
+					(dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 4) ? "x8" :
+					(dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 5) ? "x12" :
+					(dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 6) ? "x16" : "",
+					pptable->LclkFreq[i],
+					(gen_speed == dpm_context->dpm_tables.pcie_table.pcie_gen[i]) &&
+					(lane_width == dpm_context->dpm_tables.pcie_table.pcie_lane[i]) ?
+					"*" : "");
+		break;
+	case SMU_OD_SCLK:
+		if (!smu->od_enabled || !od_table || !od_settings)
+			break;
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS))
+			break;
+		size += sprintf(buf + size, "OD_SCLK:\n");
+		size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax);
+		break;
+	case SMU_OD_MCLK:
+		if (!smu->od_enabled || !od_table || !od_settings)
+			break;
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX))
+			break;
+		size += sprintf(buf + size, "OD_MCLK:\n");
+		size += sprintf(buf + size, "0: %uMHz\n", od_table->UclkFmax);
+		break;
+	case SMU_OD_VDDC_CURVE:
+		if (!smu->od_enabled || !od_table || !od_settings)
+			break;
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE))
+			break;
+		size += sprintf(buf + size, "OD_VDDC_CURVE:\n");
+		for (i = 0; i < 3; i++) {
+			switch (i) {
+			case 0:
+				curve_settings = &od_table->GfxclkFreq1;
+				break;
+			case 1:
+				curve_settings = &od_table->GfxclkFreq2;
+				break;
+			case 2:
+				curve_settings = &od_table->GfxclkFreq3;
+				break;
+			default:
+				break;
+			}
+			size += sprintf(buf + size, "%d: %uMHz @ %umV\n", i, curve_settings[0], curve_settings[1] / NAVI10_VOLTAGE_SCALE);
+		}
+		break;
 	default:
 		break;
 	}
@@ -789,13 +923,13 @@ static int navi10_populate_umd_state_clk(struct smu_context *smu)
 	int ret = 0;
 	uint32_t min_sclk_freq = 0, min_mclk_freq = 0;
 
-	ret = smu_get_dpm_freq_range(smu, SMU_SCLK, &min_sclk_freq, NULL);
+	ret = smu_get_dpm_freq_range(smu, SMU_SCLK, &min_sclk_freq, NULL, false);
 	if (ret)
 		return ret;
 
 	smu->pstate_sclk = min_sclk_freq * 100;
 
-	ret = smu_get_dpm_freq_range(smu, SMU_MCLK, &min_mclk_freq, NULL);
+	ret = smu_get_dpm_freq_range(smu, SMU_MCLK, &min_mclk_freq, NULL, false);
 	if (ret)
 		return ret;
 
@@ -848,7 +982,7 @@ static int navi10_pre_display_config_changed(struct smu_context *smu)
 		return ret;
 
 	if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) {
-		ret = smu_get_dpm_freq_range(smu, SMU_UCLK, NULL, &max_freq);
+		ret = smu_get_dpm_freq_range(smu, SMU_UCLK, NULL, &max_freq, false);
 		if (ret)
 			return ret;
 		ret = smu_set_hard_freq_range(smu, SMU_UCLK, 0, max_freq);
@@ -898,7 +1032,7 @@ static int navi10_force_dpm_limit_value(struct smu_context *smu, bool highest)
 
 	for (i = 0; i < ARRAY_SIZE(clks); i++) {
 		clk_type = clks[i];
-		ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq);
+		ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false);
 		if (ret)
 			return ret;
 
@@ -925,7 +1059,7 @@ static int navi10_unforce_dpm_levels(struct smu_context *smu)
 
 	for (i = 0; i < ARRAY_SIZE(clks); i++) {
 		clk_type = clks[i];
-		ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq);
+		ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false);
 		if (ret)
 			return ret;
 
@@ -1247,7 +1381,7 @@ static int navi10_get_profiling_clk_mask(struct smu_context *smu,
 	return ret;
 }
 
-static int navi10_notify_smc_dispaly_config(struct smu_context *smu)
+static int navi10_notify_smc_display_config(struct smu_context *smu)
 {
 	struct smu_clocks min_clocks = {0};
 	struct pp_display_clock_request clock_req;
@@ -1260,7 +1394,9 @@ static int navi10_notify_smc_dispaly_config(struct smu_context *smu)
 	if (smu_feature_is_supported(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) {
 		clock_req.clock_type = amd_pp_dcef_clock;
 		clock_req.clock_freq_in_khz = min_clocks.dcef_clock * 10;
-		if (!smu_display_clock_voltage_request(smu, &clock_req)) {
+
+		ret = smu_v11_0_display_clock_voltage_request(smu, &clock_req);
+		if (!ret) {
 			if (smu_feature_is_supported(smu, SMU_FEATURE_DS_DCEFCLK_BIT)) {
 				ret = smu_send_smc_msg_with_param(smu,
 								  SMU_MSG_SetMinDeepSleepDcefclk,
@@ -1414,7 +1550,7 @@ static int navi10_read_sensor(struct smu_context *smu,
 		*size = 4;
 		break;
 	default:
-		ret = smu_smc_read_sensor(smu, sensor, data, size);
+		ret = smu_v11_0_read_sensor(smu, sensor, data, size);
 	}
 	mutex_unlock(&smu->sensor_lock);
 
@@ -1450,31 +1586,94 @@ static int navi10_get_uclk_dpm_states(struct smu_context *smu, uint32_t *clocks_
 	return 0;
 }
 
-static int navi10_set_peak_clock_by_device(struct smu_context *smu)
+static int navi10_set_performance_level(struct smu_context *smu,
+					enum amd_dpm_forced_level level);
+
+static int navi10_set_standard_performance_level(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 	uint32_t sclk_freq = 0, uclk_freq = 0;
-	uint32_t uclk_level = 0;
 
-	switch (adev->pdev->revision) {
-	case 0xf0: /* XTX */
-	case 0xc0:
-		sclk_freq = NAVI10_PEAK_SCLK_XTX;
-		break;
-	case 0xf1: /* XT */
-	case 0xc1:
-		sclk_freq = NAVI10_PEAK_SCLK_XT;
+	switch (adev->asic_type) {
+	case CHIP_NAVI10:
+		sclk_freq = NAVI10_UMD_PSTATE_PROFILING_GFXCLK;
+		uclk_freq = NAVI10_UMD_PSTATE_PROFILING_MEMCLK;
 		break;
-	default: /* XL */
-		sclk_freq = NAVI10_PEAK_SCLK_XL;
+	case CHIP_NAVI14:
+		sclk_freq = NAVI14_UMD_PSTATE_PROFILING_GFXCLK;
+		uclk_freq = NAVI14_UMD_PSTATE_PROFILING_MEMCLK;
 		break;
+	default:
+		/* by default, this is same as auto performance level */
+		return navi10_set_performance_level(smu, AMD_DPM_FORCED_LEVEL_AUTO);
 	}
 
-	ret = smu_get_dpm_level_count(smu, SMU_UCLK, &uclk_level);
+	ret = smu_set_soft_freq_range(smu, SMU_SCLK, sclk_freq, sclk_freq);
+	if (ret)
+		return ret;
+	ret = smu_set_soft_freq_range(smu, SMU_UCLK, uclk_freq, uclk_freq);
 	if (ret)
 		return ret;
-	ret = smu_get_dpm_freq_by_index(smu, SMU_UCLK, uclk_level - 1, &uclk_freq);
+
+	return ret;
+}
+
+static int navi10_set_peak_performance_level(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+	int ret = 0;
+	uint32_t sclk_freq = 0, uclk_freq = 0;
+
+	switch (adev->asic_type) {
+	case CHIP_NAVI10:
+		switch (adev->pdev->revision) {
+		case 0xf0: /* XTX */
+		case 0xc0:
+			sclk_freq = NAVI10_PEAK_SCLK_XTX;
+			break;
+		case 0xf1: /* XT */
+		case 0xc1:
+			sclk_freq = NAVI10_PEAK_SCLK_XT;
+			break;
+		default: /* XL */
+			sclk_freq = NAVI10_PEAK_SCLK_XL;
+			break;
+		}
+		break;
+	case CHIP_NAVI14:
+		switch (adev->pdev->revision) {
+		case 0xc7: /* XT */
+		case 0xf4:
+			sclk_freq = NAVI14_UMD_PSTATE_PEAK_XT_GFXCLK;
+			break;
+		case 0xc1: /* XTM */
+		case 0xf2:
+			sclk_freq = NAVI14_UMD_PSTATE_PEAK_XTM_GFXCLK;
+			break;
+		case 0xc3: /* XLM */
+		case 0xf3:
+			sclk_freq = NAVI14_UMD_PSTATE_PEAK_XLM_GFXCLK;
+			break;
+		case 0xc5: /* XTX */
+		case 0xf6:
+			sclk_freq = NAVI14_UMD_PSTATE_PEAK_XLM_GFXCLK;
+			break;
+		default: /* XL */
+			sclk_freq = NAVI14_UMD_PSTATE_PEAK_XL_GFXCLK;
+			break;
+		}
+		break;
+	case CHIP_NAVI12:
+		sclk_freq = NAVI12_UMD_PSTATE_PEAK_GFXCLK;
+		break;
+	default:
+		ret = smu_get_dpm_level_range(smu, SMU_SCLK, NULL, &sclk_freq);
+		if (ret)
+			return ret;
+	}
+
+	ret = smu_get_dpm_level_range(smu, SMU_UCLK, NULL, &uclk_freq);
 	if (ret)
 		return ret;
 
@@ -1488,23 +1687,45 @@ static int navi10_set_peak_clock_by_device(struct smu_context *smu)
 	return ret;
 }
 
-static int navi10_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level)
+static int navi10_set_performance_level(struct smu_context *smu,
+					enum amd_dpm_forced_level level)
 {
 	int ret = 0;
-	struct amdgpu_device *adev = smu->adev;
-
-	if (adev->asic_type != CHIP_NAVI10)
-		return -EINVAL;
+	uint32_t sclk_mask, mclk_mask, soc_mask;
 
 	switch (level) {
+	case AMD_DPM_FORCED_LEVEL_HIGH:
+		ret = smu_force_dpm_limit_value(smu, true);
+		break;
+	case AMD_DPM_FORCED_LEVEL_LOW:
+		ret = smu_force_dpm_limit_value(smu, false);
+		break;
+	case AMD_DPM_FORCED_LEVEL_AUTO:
+		ret = smu_unforce_dpm_levels(smu);
+		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+		ret = navi10_set_standard_performance_level(smu);
+		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+		ret = smu_get_profiling_clk_mask(smu, level,
+						 &sclk_mask,
+						 &mclk_mask,
+						 &soc_mask);
+		if (ret)
+			return ret;
+		smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask, false);
+		smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask, false);
+		smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask, false);
+		break;
 	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
-		ret = navi10_set_peak_clock_by_device(smu);
+		ret = navi10_set_peak_performance_level(smu);
 		break;
+	case AMD_DPM_FORCED_LEVEL_MANUAL:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
 	default:
-		ret = -EINVAL;
 		break;
 	}
-
 	return ret;
 }
 
@@ -1547,17 +1768,22 @@ static int navi10_display_disable_memory_clock_switch(struct smu_context *smu,
 	return ret;
 }
 
+static uint32_t navi10_get_pptable_power_limit(struct smu_context *smu)
+{
+	PPTable_t *pptable = smu->smu_table.driver_pptable;
+	return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0];
+}
+
 static int navi10_get_power_limit(struct smu_context *smu,
 				     uint32_t *limit,
-				     bool asic_default)
+				     bool cap)
 {
 	PPTable_t *pptable = smu->smu_table.driver_pptable;
 	uint32_t asic_default_power_limit = 0;
 	int ret = 0;
 	int power_src;
 
-	if (!smu->default_power_limit ||
-	    !smu->power_limit) {
+	if (!smu->power_limit) {
 		if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) {
 			power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC);
 			if (power_src < 0)
@@ -1580,23 +1806,291 @@ static int navi10_get_power_limit(struct smu_context *smu,
 				pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0];
 		}
 
-		if (smu->od_enabled) {
-			asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit);
-			asic_default_power_limit /= 100;
-		}
-
-		smu->default_power_limit = asic_default_power_limit;
 		smu->power_limit = asic_default_power_limit;
 	}
 
-	if (asic_default)
-		*limit = smu->default_power_limit;
+	if (cap)
+		*limit = smu_v11_0_get_max_power_limit(smu);
 	else
 		*limit = smu->power_limit;
 
 	return 0;
 }
 
+static int navi10_update_pcie_parameters(struct smu_context *smu,
+				     uint32_t pcie_gen_cap,
+				     uint32_t pcie_width_cap)
+{
+	PPTable_t *pptable = smu->smu_table.driver_pptable;
+	int ret, i;
+	uint32_t smu_pcie_arg;
+
+	struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
+	struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context;
+
+	for (i = 0; i < NUM_LINK_LEVELS; i++) {
+		smu_pcie_arg = (i << 16) |
+			((pptable->PcieGenSpeed[i] <= pcie_gen_cap) ? (pptable->PcieGenSpeed[i] << 8) :
+				(pcie_gen_cap << 8)) | ((pptable->PcieLaneCount[i] <= pcie_width_cap) ?
+					pptable->PcieLaneCount[i] : pcie_width_cap);
+		ret = smu_send_smc_msg_with_param(smu,
+					  SMU_MSG_OverridePcieParameters,
+					  smu_pcie_arg);
+
+		if (ret)
+			return ret;
+
+		if (pptable->PcieGenSpeed[i] > pcie_gen_cap)
+			dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pcie_gen_cap;
+		if (pptable->PcieLaneCount[i] > pcie_width_cap)
+			dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pcie_width_cap;
+	}
+
+	return 0;
+}
+
+static inline void navi10_dump_od_table(OverDriveTable_t *od_table) {
+	pr_debug("OD: Gfxclk: (%d, %d)\n", od_table->GfxclkFmin, od_table->GfxclkFmax);
+	pr_debug("OD: Gfx1: (%d, %d)\n", od_table->GfxclkFreq1, od_table->GfxclkVolt1);
+	pr_debug("OD: Gfx2: (%d, %d)\n", od_table->GfxclkFreq2, od_table->GfxclkVolt2);
+	pr_debug("OD: Gfx3: (%d, %d)\n", od_table->GfxclkFreq3, od_table->GfxclkVolt3);
+	pr_debug("OD: UclkFmax: %d\n", od_table->UclkFmax);
+	pr_debug("OD: OverDrivePct: %d\n", od_table->OverDrivePct);
+}
+
+static int navi10_od_setting_check_range(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODSETTING_ID setting, uint32_t value)
+{
+	if (value < od_table->min[setting]) {
+		pr_warn("OD setting (%d, %d) is less than the minimum allowed (%d)\n", setting, value, od_table->min[setting]);
+		return -EINVAL;
+	}
+	if (value > od_table->max[setting]) {
+		pr_warn("OD setting (%d, %d) is greater than the maximum allowed (%d)\n", setting, value, od_table->max[setting]);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int navi10_setup_od_limits(struct smu_context *smu) {
+	struct smu_11_0_overdrive_table *overdrive_table = NULL;
+	struct smu_11_0_powerplay_table *powerplay_table = NULL;
+
+	if (!smu->smu_table.power_play_table) {
+		pr_err("powerplay table uninitialized!\n");
+		return -ENOENT;
+	}
+	powerplay_table = (struct smu_11_0_powerplay_table *)smu->smu_table.power_play_table;
+	overdrive_table = &powerplay_table->overdrive_table;
+	if (!smu->od_settings) {
+		smu->od_settings = kmemdup(overdrive_table, sizeof(struct smu_11_0_overdrive_table), GFP_KERNEL);
+	} else {
+		memcpy(smu->od_settings, overdrive_table, sizeof(struct smu_11_0_overdrive_table));
+	}
+	return 0;
+}
+
+static int navi10_set_default_od_settings(struct smu_context *smu, bool initialize) {
+	OverDriveTable_t *od_table;
+	int ret = 0;
+
+	ret = smu_v11_0_set_default_od_settings(smu, initialize, sizeof(OverDriveTable_t));
+	if (ret)
+		return ret;
+
+	if (initialize) {
+		ret = navi10_setup_od_limits(smu);
+		if (ret) {
+			pr_err("Failed to retrieve board OD limits\n");
+			return ret;
+		}
+
+	}
+
+	od_table = (OverDriveTable_t *)smu->smu_table.overdrive_table;
+	if (od_table) {
+		navi10_dump_od_table(od_table);
+	}
+
+	return ret;
+}
+
+static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABLE_COMMAND type, long input[], uint32_t size) {
+	int i;
+	int ret = 0;
+	struct smu_table_context *table_context = &smu->smu_table;
+	OverDriveTable_t *od_table;
+	struct smu_11_0_overdrive_table *od_settings;
+	enum SMU_11_0_ODSETTING_ID freq_setting, voltage_setting;
+	uint16_t *freq_ptr, *voltage_ptr;
+	od_table = (OverDriveTable_t *)table_context->overdrive_table;
+
+	if (!smu->od_enabled) {
+		pr_warn("OverDrive is not enabled!\n");
+		return -EINVAL;
+	}
+
+	if (!smu->od_settings) {
+		pr_err("OD board limits are not set!\n");
+		return -ENOENT;
+	}
+
+	od_settings = smu->od_settings;
+
+	switch (type) {
+	case PP_OD_EDIT_SCLK_VDDC_TABLE:
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) {
+			pr_warn("GFXCLK_LIMITS not supported!\n");
+			return -ENOTSUPP;
+		}
+		if (!table_context->overdrive_table) {
+			pr_err("Overdrive is not initialized\n");
+			return -EINVAL;
+		}
+		for (i = 0; i < size; i += 2) {
+			if (i + 2 > size) {
+				pr_info("invalid number of input parameters %d\n", size);
+				return -EINVAL;
+			}
+			switch (input[i]) {
+			case 0:
+				freq_setting = SMU_11_0_ODSETTING_GFXCLKFMIN;
+				freq_ptr = &od_table->GfxclkFmin;
+				if (input[i + 1] > od_table->GfxclkFmax) {
+					pr_info("GfxclkFmin (%ld) must be <= GfxclkFmax (%u)!\n",
+						input[i + 1],
+						od_table->GfxclkFmin);
+					return -EINVAL;
+				}
+				break;
+			case 1:
+				freq_setting = SMU_11_0_ODSETTING_GFXCLKFMAX;
+				freq_ptr = &od_table->GfxclkFmax;
+				if (input[i + 1] < od_table->GfxclkFmin) {
+					pr_info("GfxclkFmax (%ld) must be >= GfxclkFmin (%u)!\n",
+						input[i + 1],
+						od_table->GfxclkFmax);
+					return -EINVAL;
+				}
+				break;
+			default:
+				pr_info("Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]);
+				pr_info("Supported indices: [0:min,1:max]\n");
+				return -EINVAL;
+			}
+			ret = navi10_od_setting_check_range(od_settings, freq_setting, input[i + 1]);
+			if (ret)
+				return ret;
+			*freq_ptr = input[i + 1];
+		}
+		break;
+	case PP_OD_EDIT_MCLK_VDDC_TABLE:
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) {
+			pr_warn("UCLK_MAX not supported!\n");
+			return -ENOTSUPP;
+		}
+		if (size < 2) {
+			pr_info("invalid number of parameters: %d\n", size);
+			return -EINVAL;
+		}
+		if (input[0] != 1) {
+			pr_info("Invalid MCLK_VDDC_TABLE index: %ld\n", input[0]);
+			pr_info("Supported indices: [1:max]\n");
+			return -EINVAL;
+		}
+		ret = navi10_od_setting_check_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX, input[1]);
+		if (ret)
+			return ret;
+		od_table->UclkFmax = input[1];
+		break;
+	case PP_OD_COMMIT_DPM_TABLE:
+		navi10_dump_od_table(od_table);
+		ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, true);
+		if (ret) {
+			pr_err("Failed to import overdrive table!\n");
+			return ret;
+		}
+		// no lock needed because smu_od_edit_dpm_table has it
+		ret = smu_handle_task(smu, smu->smu_dpm.dpm_level,
+			AMD_PP_TASK_READJUST_POWER_STATE,
+			false);
+		if (ret) {
+			return ret;
+		}
+		break;
+	case PP_OD_EDIT_VDDC_CURVE:
+		if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) {
+			pr_warn("GFXCLK_CURVE not supported!\n");
+			return -ENOTSUPP;
+		}
+		if (size < 3) {
+			pr_info("invalid number of parameters: %d\n", size);
+			return -EINVAL;
+		}
+		if (!od_table) {
+			pr_info("Overdrive is not initialized\n");
+			return -EINVAL;
+		}
+
+		switch (input[0]) {
+		case 0:
+			freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1;
+			voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P1;
+			freq_ptr = &od_table->GfxclkFreq1;
+			voltage_ptr = &od_table->GfxclkVolt1;
+			break;
+		case 1:
+			freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P2;
+			voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P2;
+			freq_ptr = &od_table->GfxclkFreq2;
+			voltage_ptr = &od_table->GfxclkVolt2;
+			break;
+		case 2:
+			freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P3;
+			voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P3;
+			freq_ptr = &od_table->GfxclkFreq3;
+			voltage_ptr = &od_table->GfxclkVolt3;
+			break;
+		default:
+			pr_info("Invalid VDDC_CURVE index: %ld\n", input[0]);
+			pr_info("Supported indices: [0, 1, 2]\n");
+			return -EINVAL;
+		}
+		ret = navi10_od_setting_check_range(od_settings, freq_setting, input[1]);
+		if (ret)
+			return ret;
+		// Allow setting zero to disable the OverDrive VDDC curve
+		if (input[2] != 0) {
+			ret = navi10_od_setting_check_range(od_settings, voltage_setting, input[2]);
+			if (ret)
+				return ret;
+			*freq_ptr = input[1];
+			*voltage_ptr = ((uint16_t)input[2]) * NAVI10_VOLTAGE_SCALE;
+			pr_debug("OD: set curve %ld: (%d, %d)\n", input[0], *freq_ptr, *voltage_ptr);
+		} else {
+			// If setting 0, disable all voltage curve settings
+			od_table->GfxclkVolt1 = 0;
+			od_table->GfxclkVolt2 = 0;
+			od_table->GfxclkVolt3 = 0;
+		}
+		navi10_dump_od_table(od_table);
+		break;
+	default:
+		return -ENOSYS;
+	}
+	return ret;
+}
+
+static int navi10_run_btc(struct smu_context *smu)
+{
+	int ret = 0;
+
+	ret = smu_send_smc_msg(smu, SMU_MSG_RunBtc);
+	if (ret)
+		pr_err("RunBtc failed!\n");
+
+	return ret;
+}
+
 static const struct pptable_funcs navi10_ppt_funcs = {
 	.tables_init = navi10_tables_init,
 	.alloc_dpm_context = navi10_allocate_dpm_context,
@@ -1612,6 +2106,7 @@ static const struct pptable_funcs navi10_ppt_funcs = {
 	.get_allowed_feature_mask = navi10_get_allowed_feature_mask,
 	.set_default_dpm_table = navi10_set_default_dpm_table,
 	.dpm_set_uvd_enable = navi10_dpm_set_uvd_enable,
+	.dpm_set_jpeg_enable = navi10_dpm_set_jpeg_enable,
 	.get_current_clk_freq_by_table = navi10_get_current_clk_freq_by_table,
 	.print_clk_levels = navi10_print_clk_levels,
 	.force_clk_levels = navi10_force_clk_levels,
@@ -1619,7 +2114,7 @@ static const struct pptable_funcs navi10_ppt_funcs = {
 	.get_clock_by_type_with_latency = navi10_get_clock_by_type_with_latency,
 	.pre_display_config_changed = navi10_pre_display_config_changed,
 	.display_config_changed = navi10_display_config_changed,
-	.notify_smc_dispaly_config = navi10_notify_smc_dispaly_config,
+	.notify_smc_display_config = navi10_notify_smc_display_config,
 	.force_dpm_limit_value = navi10_force_dpm_limit_value,
 	.unforce_dpm_levels = navi10_unforce_dpm_levels,
 	.is_dpm_running = navi10_is_dpm_running,
@@ -1635,12 +2130,64 @@ static const struct pptable_funcs navi10_ppt_funcs = {
 	.get_thermal_temperature_range = navi10_get_thermal_temperature_range,
 	.display_disable_memory_clock_switch = navi10_display_disable_memory_clock_switch,
 	.get_power_limit = navi10_get_power_limit,
+	.update_pcie_parameters = navi10_update_pcie_parameters,
+	.init_microcode = smu_v11_0_init_microcode,
+	.load_microcode = smu_v11_0_load_microcode,
+	.init_smc_tables = smu_v11_0_init_smc_tables,
+	.fini_smc_tables = smu_v11_0_fini_smc_tables,
+	.init_power = smu_v11_0_init_power,
+	.fini_power = smu_v11_0_fini_power,
+	.check_fw_status = smu_v11_0_check_fw_status,
+	.setup_pptable = smu_v11_0_setup_pptable,
+	.get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values,
+	.get_clk_info_from_vbios = smu_v11_0_get_clk_info_from_vbios,
+	.check_pptable = smu_v11_0_check_pptable,
+	.parse_pptable = smu_v11_0_parse_pptable,
+	.populate_smc_tables = smu_v11_0_populate_smc_pptable,
+	.check_fw_version = smu_v11_0_check_fw_version,
+	.write_pptable = smu_v11_0_write_pptable,
+	.set_min_dcef_deep_sleep = smu_v11_0_set_min_dcef_deep_sleep,
+	.set_driver_table_location = smu_v11_0_set_driver_table_location,
+	.set_tool_table_location = smu_v11_0_set_tool_table_location,
+	.notify_memory_pool_location = smu_v11_0_notify_memory_pool_location,
+	.system_features_control = smu_v11_0_system_features_control,
+	.send_smc_msg_with_param = smu_v11_0_send_msg_with_param,
+	.read_smc_arg = smu_v11_0_read_arg,
+	.init_display_count = smu_v11_0_init_display_count,
+	.set_allowed_mask = smu_v11_0_set_allowed_mask,
+	.get_enabled_mask = smu_v11_0_get_enabled_mask,
+	.notify_display_change = smu_v11_0_notify_display_change,
+	.set_power_limit = smu_v11_0_set_power_limit,
+	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
+	.init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
+	.start_thermal_control = smu_v11_0_start_thermal_control,
+	.stop_thermal_control = smu_v11_0_stop_thermal_control,
+	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
+	.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
+	.get_fan_control_mode = smu_v11_0_get_fan_control_mode,
+	.set_fan_control_mode = smu_v11_0_set_fan_control_mode,
+	.set_fan_speed_percent = smu_v11_0_set_fan_speed_percent,
+	.set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm,
+	.set_xgmi_pstate = smu_v11_0_set_xgmi_pstate,
+	.gfx_off_control = smu_v11_0_gfx_off_control,
+	.register_irq_handler = smu_v11_0_register_irq_handler,
+	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
+	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
+	.baco_is_support= smu_v11_0_baco_is_support,
+	.baco_get_state = smu_v11_0_baco_get_state,
+	.baco_set_state = smu_v11_0_baco_set_state,
+	.baco_enter = smu_v11_0_baco_enter,
+	.baco_exit = smu_v11_0_baco_exit,
+	.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
+	.set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range,
+	.override_pcie_parameters = smu_v11_0_override_pcie_parameters,
+	.set_default_od_settings = navi10_set_default_od_settings,
+	.od_edit_dpm_table = navi10_od_edit_dpm_table,
+	.get_pptable_power_limit = navi10_get_pptable_power_limit,
+	.run_btc = navi10_run_btc,
 };
 
 void navi10_set_ppt_funcs(struct smu_context *smu)
 {
-	struct smu_table_context *smu_table = &smu->smu_table;
-
 	smu->ppt_funcs = &navi10_ppt_funcs;
-	smu_table->table_count = TABLE_COUNT;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h
index 620ff17c2fef..2abb4ba01db1 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h
@@ -27,6 +27,31 @@
 #define NAVI10_PEAK_SCLK_XT  		(1755)
 #define NAVI10_PEAK_SCLK_XL  		(1625)
 
+#define NAVI10_UMD_PSTATE_PROFILING_GFXCLK    (1300)
+#define NAVI10_UMD_PSTATE_PROFILING_SOCCLK    (980)
+#define NAVI10_UMD_PSTATE_PROFILING_MEMCLK    (625)
+#define NAVI10_UMD_PSTATE_PROFILING_VCLK      (980)
+#define NAVI10_UMD_PSTATE_PROFILING_DCLK      (850)
+
+#define NAVI14_UMD_PSTATE_PEAK_XT_GFXCLK      (1670)
+#define NAVI14_UMD_PSTATE_PEAK_XTM_GFXCLK     (1448)
+#define NAVI14_UMD_PSTATE_PEAK_XLM_GFXCLK     (1181)
+#define NAVI14_UMD_PSTATE_PEAK_XTX_GFXCLK     (1717)
+#define NAVI14_UMD_PSTATE_PEAK_XL_GFXCLK      (1448)
+
+#define NAVI14_UMD_PSTATE_PROFILING_GFXCLK    (1200)
+#define NAVI14_UMD_PSTATE_PROFILING_SOCCLK    (900)
+#define NAVI14_UMD_PSTATE_PROFILING_MEMCLK    (600)
+#define NAVI14_UMD_PSTATE_PROFILING_VCLK      (900)
+#define NAVI14_UMD_PSTATE_PROFILING_DCLK      (800)
+
+#define NAVI12_UMD_PSTATE_PEAK_GFXCLK     (1100)
+
+#define NAVI10_VOLTAGE_SCALE (4)
+
+#define smnPCIE_LC_SPEED_CNTL			0x11140290
+#define smnPCIE_LC_LINK_WIDTH_CNTL		0x11140288
+
 extern void navi10_set_ppt_funcs(struct smu_context *smu);
 
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
index e62bfba51562..861e6410363b 100644
--- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
@@ -23,6 +23,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_smu.h"
+#include "smu_internal.h"
 #include "soc15_common.h"
 #include "smu_v12_0_ppsmc.h"
 #include "smu12_driver_if.h"
@@ -30,6 +31,9 @@
 #include "renoir_ppt.h"
 
 
+#define CLK_MAP(clk, index) \
+	[SMU_##clk] = {1, (index)}
+
 #define MSG_MAP(msg, index) \
 	[SMU_MSG_##msg] = {1, (index)}
 
@@ -103,6 +107,14 @@ static struct smu_12_0_cmn2aisc_mapping renoir_message_map[SMU_MSG_MAX_COUNT] =
 	MSG_MAP(SetHardMinFclkByFreq,           PPSMC_MSG_SetHardMinFclkByFreq),
 };
 
+static struct smu_12_0_cmn2aisc_mapping renoir_clk_map[SMU_CLK_COUNT] = {
+	CLK_MAP(GFXCLK, CLOCK_GFXCLK),
+	CLK_MAP(SCLK,	CLOCK_GFXCLK),
+	CLK_MAP(SOCCLK, CLOCK_SOCCLK),
+	CLK_MAP(UCLK, CLOCK_UMCCLK),
+	CLK_MAP(MCLK, CLOCK_UMCCLK),
+};
+
 static struct smu_12_0_cmn2aisc_mapping renoir_table_map[SMU_TABLE_COUNT] = {
 	TAB_MAP_VALID(WATERMARKS),
 	TAB_MAP_INVALID(CUSTOM_DPM),
@@ -124,6 +136,21 @@ static int renoir_get_smu_msg_index(struct smu_context *smc, uint32_t index)
 	return mapping.map_to;
 }
 
+static int renoir_get_smu_clk_index(struct smu_context *smc, uint32_t index)
+{
+	struct smu_12_0_cmn2aisc_mapping mapping;
+
+	if (index >= SMU_CLK_COUNT)
+		return -EINVAL;
+
+	mapping = renoir_clk_map[index];
+	if (!(mapping.valid_mapping)) {
+		return -EINVAL;
+	}
+
+	return mapping.map_to;
+}
+
 static int renoir_get_smu_table_index(struct smu_context *smc, uint32_t index)
 {
 	struct smu_12_0_cmn2aisc_mapping mapping;
@@ -138,6 +165,30 @@ static int renoir_get_smu_table_index(struct smu_context *smc, uint32_t index)
 	return mapping.map_to;
 }
 
+static int renoir_get_metrics_table(struct smu_context *smu,
+				    SmuMetrics_t *metrics_table)
+{
+	struct smu_table_context *smu_table= &smu->smu_table;
+	int ret = 0;
+
+	mutex_lock(&smu->metrics_lock);
+	if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(100))) {
+		ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
+				(void *)smu_table->metrics_table, false);
+		if (ret) {
+			pr_info("Failed to export SMU metrics table!\n");
+			mutex_unlock(&smu->metrics_lock);
+			return ret;
+		}
+		smu_table->metrics_time = jiffies;
+	}
+
+	memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
+	mutex_unlock(&smu->metrics_lock);
+
+	return ret;
+}
+
 static int renoir_tables_init(struct smu_context *smu, struct smu_table *tables)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
@@ -153,6 +204,15 @@ static int renoir_tables_init(struct smu_context *smu, struct smu_table *tables)
 	if (!smu_table->clocks_table)
 		return -ENOMEM;
 
+	smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);
+	if (!smu_table->metrics_table)
+		return -ENOMEM;
+	smu_table->metrics_time = 0;
+
+	smu_table->watermarks_table = kzalloc(sizeof(Watermarks_t), GFP_KERNEL);
+	if (!smu_table->watermarks_table)
+		return -ENOMEM;
+
 	return 0;
 }
 
@@ -160,21 +220,17 @@ static int renoir_tables_init(struct smu_context *smu, struct smu_table *tables)
  * This interface just for getting uclk ultimate freq and should't introduce
  * other likewise function result in overmuch callback.
  */
-static int renoir_get_dpm_uclk_limited(struct smu_context *smu, uint32_t *clock, bool max)
+static int renoir_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_type clk_type,
+						uint32_t dpm_level, uint32_t *freq)
 {
+	DpmClocks_t *clk_table = smu->smu_table.clocks_table;
 
-	DpmClocks_t *table = smu->smu_table.clocks_table;
-
-	if (!clock || !table)
+	if (!clk_table || clk_type >= SMU_CLK_COUNT)
 		return -EINVAL;
 
-	if (max)
-		*clock = table->FClocks[NUM_FCLK_DPM_LEVELS-1].Freq;
-	else
-		*clock = table->FClocks[0].Freq;
+	GET_DPM_CUR_FREQ(clk_table, clk_type, dpm_level, *freq);
 
 	return 0;
-
 }
 
 static int renoir_print_clk_levels(struct smu_context *smu,
@@ -183,13 +239,14 @@ static int renoir_print_clk_levels(struct smu_context *smu,
 	int i, size = 0, ret = 0;
 	uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0;
 	DpmClocks_t *clk_table = smu->smu_table.clocks_table;
-	SmuMetrics_t metrics = {0};
+	SmuMetrics_t metrics;
 
 	if (!clk_table || clk_type >= SMU_CLK_COUNT)
 		return -EINVAL;
 
-	ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
-			       (void *)&metrics, false);
+	memset(&metrics, 0, sizeof(metrics));
+
+	ret = renoir_get_metrics_table(smu, &metrics);
 	if (ret)
 		return ret;
 
@@ -198,7 +255,7 @@ static int renoir_print_clk_levels(struct smu_context *smu,
 	case SMU_SCLK:
 		/* retirve table returned paramters unit is MHz */
 		cur_value = metrics.ClockFrequency[CLOCK_GFXCLK];
-		ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min, &max);
+		ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min, &max, false);
 		if (!ret) {
 			/* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */
 			if (cur_value  == max)
@@ -246,20 +303,629 @@ static int renoir_print_clk_levels(struct smu_context *smu,
 	return size;
 }
 
+static enum amd_pm_state_type renoir_get_current_power_state(struct smu_context *smu)
+{
+	enum amd_pm_state_type pm_type;
+	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+
+	if (!smu_dpm_ctx->dpm_context ||
+	    !smu_dpm_ctx->dpm_current_power_state)
+		return -EINVAL;
+
+	switch (smu_dpm_ctx->dpm_current_power_state->classification.ui_label) {
+	case SMU_STATE_UI_LABEL_BATTERY:
+		pm_type = POWER_STATE_TYPE_BATTERY;
+		break;
+	case SMU_STATE_UI_LABEL_BALLANCED:
+		pm_type = POWER_STATE_TYPE_BALANCED;
+		break;
+	case SMU_STATE_UI_LABEL_PERFORMANCE:
+		pm_type = POWER_STATE_TYPE_PERFORMANCE;
+		break;
+	default:
+		if (smu_dpm_ctx->dpm_current_power_state->classification.flags & SMU_STATE_CLASSIFICATION_FLAG_BOOT)
+			pm_type = POWER_STATE_TYPE_INTERNAL_BOOT;
+		else
+			pm_type = POWER_STATE_TYPE_DEFAULT;
+		break;
+	}
+
+	return pm_type;
+}
+
+static int renoir_dpm_set_uvd_enable(struct smu_context *smu, bool enable)
+{
+	struct smu_power_context *smu_power = &smu->smu_power;
+	struct smu_power_gate *power_gate = &smu_power->power_gate;
+	int ret = 0;
+
+	if (enable) {
+		/* vcn dpm on is a prerequisite for vcn power gate messages */
+		if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) {
+			ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, 0);
+			if (ret)
+				return ret;
+		}
+		power_gate->vcn_gated = false;
+	} else {
+		if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) {
+			ret = smu_send_smc_msg(smu, SMU_MSG_PowerDownVcn);
+			if (ret)
+				return ret;
+		}
+		power_gate->vcn_gated = true;
+	}
+
+	return ret;
+}
+
+static int renoir_dpm_set_jpeg_enable(struct smu_context *smu, bool enable)
+{
+	struct smu_power_context *smu_power = &smu->smu_power;
+	struct smu_power_gate *power_gate = &smu_power->power_gate;
+	int ret = 0;
+
+	if (enable) {
+		if (smu_feature_is_enabled(smu, SMU_FEATURE_JPEG_PG_BIT)) {
+			ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpJpeg, 0);
+			if (ret)
+				return ret;
+		}
+		power_gate->jpeg_gated = false;
+	} else {
+		if (smu_feature_is_enabled(smu, SMU_FEATURE_JPEG_PG_BIT)) {
+			ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerDownJpeg, 0);
+			if (ret)
+				return ret;
+		}
+		power_gate->jpeg_gated = true;
+	}
+
+	return ret;
+}
+
+static int renoir_get_current_clk_freq_by_table(struct smu_context *smu,
+				       enum smu_clk_type clk_type,
+				       uint32_t *value)
+{
+	int ret = 0, clk_id = 0;
+	SmuMetrics_t metrics;
+
+	ret = renoir_get_metrics_table(smu, &metrics);
+	if (ret)
+		return ret;
+
+	clk_id = smu_clk_get_index(smu, clk_type);
+	if (clk_id < 0)
+		return clk_id;
+
+	*value = metrics.ClockFrequency[clk_id];
+
+	return ret;
+}
+
+static int renoir_force_dpm_limit_value(struct smu_context *smu, bool highest)
+{
+	int ret = 0, i = 0;
+	uint32_t min_freq, max_freq, force_freq;
+	enum smu_clk_type clk_type;
+
+	enum smu_clk_type clks[] = {
+		SMU_GFXCLK,
+		SMU_MCLK,
+		SMU_SOCCLK,
+	};
+
+	for (i = 0; i < ARRAY_SIZE(clks); i++) {
+		clk_type = clks[i];
+		ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false);
+		if (ret)
+			return ret;
+
+		force_freq = highest ? max_freq : min_freq;
+		ret = smu_set_soft_freq_range(smu, clk_type, force_freq, force_freq);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int renoir_unforce_dpm_levels(struct smu_context *smu) {
+
+	int ret = 0, i = 0;
+	uint32_t min_freq, max_freq;
+	enum smu_clk_type clk_type;
+
+	struct clk_feature_map {
+		enum smu_clk_type clk_type;
+		uint32_t	feature;
+	} clk_feature_map[] = {
+		{SMU_GFXCLK, SMU_FEATURE_DPM_GFXCLK_BIT},
+		{SMU_MCLK,   SMU_FEATURE_DPM_UCLK_BIT},
+		{SMU_SOCCLK, SMU_FEATURE_DPM_SOCCLK_BIT},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(clk_feature_map); i++) {
+		if (!smu_feature_is_enabled(smu, clk_feature_map[i].feature))
+		    continue;
+
+		clk_type = clk_feature_map[i].clk_type;
+
+		ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false);
+		if (ret)
+			return ret;
+
+		ret = smu_set_soft_freq_range(smu, clk_type, min_freq, max_freq);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int renoir_get_gpu_temperature(struct smu_context *smu, uint32_t *value)
+{
+	int ret = 0;
+	SmuMetrics_t metrics;
+
+	if (!value)
+		return -EINVAL;
+
+	ret = renoir_get_metrics_table(smu, &metrics);
+	if (ret)
+		return ret;
+
+	*value = (metrics.GfxTemperature / 100) *
+		SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+
+	return 0;
+}
+
+static int renoir_get_current_activity_percent(struct smu_context *smu,
+					       enum amd_pp_sensors sensor,
+					       uint32_t *value)
+{
+	int ret = 0;
+	SmuMetrics_t metrics;
+
+	if (!value)
+		return -EINVAL;
+
+	ret = renoir_get_metrics_table(smu, &metrics);
+	if (ret)
+		return ret;
+
+	switch (sensor) {
+	case AMDGPU_PP_SENSOR_GPU_LOAD:
+		*value = metrics.AverageGfxActivity / 100;
+		break;
+	default:
+		pr_err("Invalid sensor for retrieving clock activity\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int renoir_get_workload_type(struct smu_context *smu, uint32_t profile)
+{
+
+	uint32_t  pplib_workload = 0;
+
+	switch (profile) {
+	case PP_SMC_POWER_PROFILE_FULLSCREEN3D:
+		pplib_workload = WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT;
+		break;
+	case PP_SMC_POWER_PROFILE_CUSTOM:
+		pplib_workload = WORKLOAD_PPLIB_COUNT;
+		break;
+	case PP_SMC_POWER_PROFILE_VIDEO:
+		pplib_workload = WORKLOAD_PPLIB_VIDEO_BIT;
+		break;
+	case PP_SMC_POWER_PROFILE_VR:
+		pplib_workload = WORKLOAD_PPLIB_VR_BIT;
+		break;
+	case PP_SMC_POWER_PROFILE_COMPUTE:
+		pplib_workload = WORKLOAD_PPLIB_COMPUTE_BIT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return pplib_workload;
+}
+
+static int renoir_get_profiling_clk_mask(struct smu_context *smu,
+					 enum amd_dpm_forced_level level,
+					 uint32_t *sclk_mask,
+					 uint32_t *mclk_mask,
+					 uint32_t *soc_mask)
+{
+
+	if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) {
+		if (sclk_mask)
+			*sclk_mask = 0;
+	} else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) {
+		if (mclk_mask)
+			*mclk_mask = 0;
+	} else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) {
+		if(sclk_mask)
+			/* The sclk as gfxclk and has three level about max/min/current */
+			*sclk_mask = 3 - 1;
+
+		if(mclk_mask)
+			*mclk_mask = NUM_MEMCLK_DPM_LEVELS - 1;
+
+		if(soc_mask)
+			*soc_mask = NUM_SOCCLK_DPM_LEVELS - 1;
+	}
+
+	return 0;
+}
+
+/**
+ * This interface get dpm clock table for dc
+ */
+static int renoir_get_dpm_clock_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+	DpmClocks_t *table = smu->smu_table.clocks_table;
+	int i;
+
+	if (!clock_table || !table)
+		return -EINVAL;
+
+	for (i = 0; i < NUM_DCFCLK_DPM_LEVELS; i++) {
+		clock_table->DcfClocks[i].Freq = table->DcfClocks[i].Freq;
+		clock_table->DcfClocks[i].Vol = table->DcfClocks[i].Vol;
+	}
+
+	for (i = 0; i < NUM_SOCCLK_DPM_LEVELS; i++) {
+		clock_table->SocClocks[i].Freq = table->SocClocks[i].Freq;
+		clock_table->SocClocks[i].Vol = table->SocClocks[i].Vol;
+	}
+
+	for (i = 0; i < NUM_FCLK_DPM_LEVELS; i++) {
+		clock_table->FClocks[i].Freq = table->FClocks[i].Freq;
+		clock_table->FClocks[i].Vol = table->FClocks[i].Vol;
+	}
+
+	for (i = 0; i<  NUM_MEMCLK_DPM_LEVELS; i++) {
+		clock_table->MemClocks[i].Freq = table->MemClocks[i].Freq;
+		clock_table->MemClocks[i].Vol = table->MemClocks[i].Vol;
+	}
+
+	return 0;
+}
+
+static int renoir_force_clk_levels(struct smu_context *smu,
+				   enum smu_clk_type clk_type, uint32_t mask)
+{
+
+	int ret = 0 ;
+	uint32_t soft_min_level = 0, soft_max_level = 0, min_freq = 0, max_freq = 0;
+	DpmClocks_t *clk_table = smu->smu_table.clocks_table;
+
+	soft_min_level = mask ? (ffs(mask) - 1) : 0;
+	soft_max_level = mask ? (fls(mask) - 1) : 0;
+
+	switch (clk_type) {
+	case SMU_GFXCLK:
+	case SMU_SCLK:
+		if (soft_min_level > 2 || soft_max_level > 2) {
+			pr_info("Currently sclk only support 3 levels on APU\n");
+			return -EINVAL;
+		}
+
+		ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min_freq, &max_freq, false);
+		if (ret)
+			return ret;
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk,
+					soft_max_level == 0 ? min_freq :
+					soft_max_level == 1 ? RENOIR_UMD_PSTATE_GFXCLK : max_freq);
+		if (ret)
+			return ret;
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk,
+					soft_min_level == 2 ? max_freq :
+					soft_min_level == 1 ? RENOIR_UMD_PSTATE_GFXCLK : min_freq);
+		if (ret)
+			return ret;
+		break;
+	case SMU_SOCCLK:
+		GET_DPM_CUR_FREQ(clk_table, clk_type, soft_min_level, min_freq);
+		GET_DPM_CUR_FREQ(clk_table, clk_type, soft_max_level, max_freq);
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxSocclkByFreq, max_freq);
+		if (ret)
+			return ret;
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinSocclkByFreq, min_freq);
+		if (ret)
+			return ret;
+		break;
+	case SMU_MCLK:
+	case SMU_FCLK:
+		GET_DPM_CUR_FREQ(clk_table, clk_type, soft_min_level, min_freq);
+		GET_DPM_CUR_FREQ(clk_table, clk_type, soft_max_level, max_freq);
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxFclkByFreq, max_freq);
+		if (ret)
+			return ret;
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min_freq);
+		if (ret)
+			return ret;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
+{
+	int workload_type, ret;
+	uint32_t profile_mode = input[size];
+
+	if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
+		pr_err("Invalid power profile mode %d\n", smu->power_profile_mode);
+		return -EINVAL;
+	}
+
+	/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+	workload_type = smu_workload_get_type(smu, smu->power_profile_mode);
+	if (workload_type < 0) {
+		pr_err("Unsupported power profile mode %d on RENOIR\n",smu->power_profile_mode);
+		return -EINVAL;
+	}
+
+	ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
+				    1 << workload_type);
+	if (ret) {
+		pr_err("Fail to set workload type %d\n", workload_type);
+		return ret;
+	}
+
+	smu->power_profile_mode = profile_mode;
+
+	return 0;
+}
+
+static int renoir_set_peak_clock_by_device(struct smu_context *smu)
+{
+	int ret = 0;
+	uint32_t sclk_freq = 0, uclk_freq = 0;
+
+	ret = smu_get_dpm_freq_range(smu, SMU_SCLK, NULL, &sclk_freq, false);
+	if (ret)
+		return ret;
+
+	ret = smu_set_soft_freq_range(smu, SMU_SCLK, sclk_freq, sclk_freq);
+	if (ret)
+		return ret;
+
+	ret = smu_get_dpm_freq_range(smu, SMU_UCLK, NULL, &uclk_freq, false);
+	if (ret)
+		return ret;
+
+	ret = smu_set_soft_freq_range(smu, SMU_UCLK, uclk_freq, uclk_freq);
+	if (ret)
+		return ret;
+
+	return ret;
+}
+
+static int renoir_set_performance_level(struct smu_context *smu,
+					enum amd_dpm_forced_level level)
+{
+	int ret = 0;
+	uint32_t sclk_mask, mclk_mask, soc_mask;
+
+	switch (level) {
+	case AMD_DPM_FORCED_LEVEL_HIGH:
+		ret = smu_force_dpm_limit_value(smu, true);
+		break;
+	case AMD_DPM_FORCED_LEVEL_LOW:
+		ret = smu_force_dpm_limit_value(smu, false);
+		break;
+	case AMD_DPM_FORCED_LEVEL_AUTO:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+		ret = smu_unforce_dpm_levels(smu);
+		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+		ret = smu_get_profiling_clk_mask(smu, level,
+						 &sclk_mask,
+						 &mclk_mask,
+						 &soc_mask);
+		if (ret)
+			return ret;
+		smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask, false);
+		smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask, false);
+		smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask, false);
+		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+		ret = renoir_set_peak_clock_by_device(smu);
+		break;
+	case AMD_DPM_FORCED_LEVEL_MANUAL:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
+	default:
+		break;
+	}
+	return ret;
+}
+
+/* save watermark settings into pplib smu structure,
+ * also pass data to smu controller
+ */
+static int renoir_set_watermarks_table(
+		struct smu_context *smu,
+		void *watermarks,
+		struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges)
+{
+	int i;
+	int ret = 0;
+	Watermarks_t *table = watermarks;
+
+	if (!table || !clock_ranges)
+		return -EINVAL;
+
+	if (clock_ranges->num_wm_dmif_sets > 4 ||
+			clock_ranges->num_wm_mcif_sets > 4)
+		return -EINVAL;
+
+	/* save into smu->smu_table.tables[SMU_TABLE_WATERMARKS]->cpu_addr*/
+	for (i = 0; i < clock_ranges->num_wm_dmif_sets; i++) {
+		table->WatermarkRow[WM_DCFCLK][i].MinClock =
+			cpu_to_le16((uint16_t)
+			(clock_ranges->wm_dmif_clocks_ranges[i].wm_min_dcfclk_clk_in_khz));
+		table->WatermarkRow[WM_DCFCLK][i].MaxClock =
+			cpu_to_le16((uint16_t)
+			(clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz));
+		table->WatermarkRow[WM_DCFCLK][i].MinMclk =
+			cpu_to_le16((uint16_t)
+			(clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz));
+		table->WatermarkRow[WM_DCFCLK][i].MaxMclk =
+			cpu_to_le16((uint16_t)
+			(clock_ranges->wm_dmif_clocks_ranges[i].wm_max_mem_clk_in_khz));
+		table->WatermarkRow[WM_DCFCLK][i].WmSetting = (uint8_t)
+				clock_ranges->wm_dmif_clocks_ranges[i].wm_set_id;
+	}
+
+	for (i = 0; i < clock_ranges->num_wm_mcif_sets; i++) {
+		table->WatermarkRow[WM_SOCCLK][i].MinClock =
+			cpu_to_le16((uint16_t)
+			(clock_ranges->wm_mcif_clocks_ranges[i].wm_min_socclk_clk_in_khz));
+		table->WatermarkRow[WM_SOCCLK][i].MaxClock =
+			cpu_to_le16((uint16_t)
+			(clock_ranges->wm_mcif_clocks_ranges[i].wm_max_socclk_clk_in_khz));
+		table->WatermarkRow[WM_SOCCLK][i].MinMclk =
+			cpu_to_le16((uint16_t)
+			(clock_ranges->wm_mcif_clocks_ranges[i].wm_min_mem_clk_in_khz));
+		table->WatermarkRow[WM_SOCCLK][i].MaxMclk =
+			cpu_to_le16((uint16_t)
+			(clock_ranges->wm_mcif_clocks_ranges[i].wm_max_mem_clk_in_khz));
+		table->WatermarkRow[WM_SOCCLK][i].WmSetting = (uint8_t)
+				clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id;
+	}
+
+	/* pass data to smu controller */
+	if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
+			!(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
+		ret = smu_write_watermarks_table(smu);
+		if (ret) {
+			pr_err("Failed to update WMTABLE!");
+			return ret;
+		}
+		smu->watermarks_bitmap |= WATERMARKS_LOADED;
+	}
+
+	return 0;
+}
+
+static int renoir_get_power_profile_mode(struct smu_context *smu,
+					   char *buf)
+{
+	static const char *profile_name[] = {
+					"BOOTUP_DEFAULT",
+					"3D_FULL_SCREEN",
+					"POWER_SAVING",
+					"VIDEO",
+					"VR",
+					"COMPUTE",
+					"CUSTOM"};
+	uint32_t i, size = 0;
+	int16_t workload_type = 0;
+
+	if (!smu->pm_enabled || !buf)
+		return -EINVAL;
+
+	for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) {
+		/*
+		 * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT
+		 * Not all profile modes are supported on arcturus.
+		 */
+		workload_type = smu_workload_get_type(smu, i);
+		if (workload_type < 0)
+			continue;
+
+		size += sprintf(buf + size, "%2d %14s%s\n",
+			i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " ");
+	}
+
+	return size;
+}
+
+static int renoir_read_sensor(struct smu_context *smu,
+				 enum amd_pp_sensors sensor,
+				 void *data, uint32_t *size)
+{
+	int ret = 0;
+
+	if (!data || !size)
+		return -EINVAL;
+
+	mutex_lock(&smu->sensor_lock);
+	switch (sensor) {
+	case AMDGPU_PP_SENSOR_GPU_LOAD:
+		ret = renoir_get_current_activity_percent(smu, sensor, (uint32_t *)data);
+		*size = 4;
+		break;
+	case AMDGPU_PP_SENSOR_GPU_TEMP:
+		ret = renoir_get_gpu_temperature(smu, (uint32_t *)data);
+		*size = 4;
+		break;
+	default:
+		ret = smu_v12_0_read_sensor(smu, sensor, data, size);
+	}
+	mutex_unlock(&smu->sensor_lock);
+
+	return ret;
+}
+
 static const struct pptable_funcs renoir_ppt_funcs = {
 	.get_smu_msg_index = renoir_get_smu_msg_index,
+	.get_smu_clk_index = renoir_get_smu_clk_index,
 	.get_smu_table_index = renoir_get_smu_table_index,
 	.tables_init = renoir_tables_init,
 	.set_power_state = NULL,
-	.get_dpm_uclk_limited = renoir_get_dpm_uclk_limited,
+	.get_dpm_clk_limited = renoir_get_dpm_clk_limited,
 	.print_clk_levels = renoir_print_clk_levels,
+	.get_current_power_state = renoir_get_current_power_state,
+	.dpm_set_uvd_enable = renoir_dpm_set_uvd_enable,
+	.dpm_set_jpeg_enable = renoir_dpm_set_jpeg_enable,
+	.get_current_clk_freq_by_table = renoir_get_current_clk_freq_by_table,
+	.force_dpm_limit_value = renoir_force_dpm_limit_value,
+	.unforce_dpm_levels = renoir_unforce_dpm_levels,
+	.get_workload_type = renoir_get_workload_type,
+	.get_profiling_clk_mask = renoir_get_profiling_clk_mask,
+	.force_clk_levels = renoir_force_clk_levels,
+	.set_power_profile_mode = renoir_set_power_profile_mode,
+	.set_performance_level = renoir_set_performance_level,
+	.get_dpm_clock_table = renoir_get_dpm_clock_table,
+	.set_watermarks_table = renoir_set_watermarks_table,
+	.get_power_profile_mode = renoir_get_power_profile_mode,
+	.read_sensor = renoir_read_sensor,
+	.check_fw_status = smu_v12_0_check_fw_status,
+	.check_fw_version = smu_v12_0_check_fw_version,
+	.powergate_sdma = smu_v12_0_powergate_sdma,
+	.powergate_vcn = smu_v12_0_powergate_vcn,
+	.powergate_jpeg = smu_v12_0_powergate_jpeg,
+	.send_smc_msg_with_param = smu_v12_0_send_msg_with_param,
+	.read_smc_arg = smu_v12_0_read_arg,
+	.set_gfx_cgpg = smu_v12_0_set_gfx_cgpg,
+	.gfx_off_control = smu_v12_0_gfx_off_control,
+	.init_smc_tables = smu_v12_0_init_smc_tables,
+	.fini_smc_tables = smu_v12_0_fini_smc_tables,
+	.populate_smc_tables = smu_v12_0_populate_smc_tables,
+	.get_enabled_mask = smu_v12_0_get_enabled_mask,
+	.get_current_clk_freq = smu_v12_0_get_current_clk_freq,
+	.get_dpm_ultimate_freq = smu_v12_0_get_dpm_ultimate_freq,
+	.mode2_reset = smu_v12_0_mode2_reset,
+	.set_soft_freq_limited_range = smu_v12_0_set_soft_freq_limited_range,
+	.set_driver_table_location = smu_v12_0_set_driver_table_location,
 };
 
 void renoir_set_ppt_funcs(struct smu_context *smu)
 {
-	struct smu_table_context *smu_table = &smu->smu_table;
-
 	smu->ppt_funcs = &renoir_ppt_funcs;
 	smu->smc_if_version = SMU12_DRIVER_IF_VERSION;
-	smu_table->table_count = TABLE_COUNT;
+	smu->is_apu = true;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h
new file mode 100644
index 000000000000..783319ec8bf9
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __SMU_INTERNAL_H__
+#define __SMU_INTERNAL_H__
+
+#include "amdgpu_smu.h"
+
+#define smu_init_microcode(smu) \
+	((smu)->ppt_funcs->init_microcode ? (smu)->ppt_funcs->init_microcode((smu)) : 0)
+#define smu_init_smc_tables(smu) \
+	((smu)->ppt_funcs->init_smc_tables ? (smu)->ppt_funcs->init_smc_tables((smu)) : 0)
+#define smu_fini_smc_tables(smu) \
+	((smu)->ppt_funcs->fini_smc_tables ? (smu)->ppt_funcs->fini_smc_tables((smu)) : 0)
+#define smu_init_power(smu) \
+	((smu)->ppt_funcs->init_power ? (smu)->ppt_funcs->init_power((smu)) : 0)
+#define smu_fini_power(smu) \
+	((smu)->ppt_funcs->fini_power ? (smu)->ppt_funcs->fini_power((smu)) : 0)
+
+#define smu_setup_pptable(smu) \
+	((smu)->ppt_funcs->setup_pptable ? (smu)->ppt_funcs->setup_pptable((smu)) : 0)
+#define smu_powergate_sdma(smu, gate) \
+	((smu)->ppt_funcs->powergate_sdma ? (smu)->ppt_funcs->powergate_sdma((smu), (gate)) : 0)
+#define smu_powergate_vcn(smu, gate) \
+	((smu)->ppt_funcs->powergate_vcn ? (smu)->ppt_funcs->powergate_vcn((smu), (gate)) : 0)
+#define smu_powergate_jpeg(smu, gate) \
+	((smu)->ppt_funcs->powergate_jpeg ? (smu)->ppt_funcs->powergate_jpeg((smu), (gate)) : 0)
+
+#define smu_get_vbios_bootup_values(smu) \
+	((smu)->ppt_funcs->get_vbios_bootup_values ? (smu)->ppt_funcs->get_vbios_bootup_values((smu)) : 0)
+#define smu_get_clk_info_from_vbios(smu) \
+	((smu)->ppt_funcs->get_clk_info_from_vbios ? (smu)->ppt_funcs->get_clk_info_from_vbios((smu)) : 0)
+#define smu_check_pptable(smu) \
+	((smu)->ppt_funcs->check_pptable ? (smu)->ppt_funcs->check_pptable((smu)) : 0)
+#define smu_parse_pptable(smu) \
+	((smu)->ppt_funcs->parse_pptable ? (smu)->ppt_funcs->parse_pptable((smu)) : 0)
+#define smu_populate_smc_tables(smu) \
+	((smu)->ppt_funcs->populate_smc_tables ? (smu)->ppt_funcs->populate_smc_tables((smu)) : 0)
+#define smu_check_fw_version(smu) \
+	((smu)->ppt_funcs->check_fw_version ? (smu)->ppt_funcs->check_fw_version((smu)) : 0)
+#define smu_write_pptable(smu) \
+	((smu)->ppt_funcs->write_pptable ? (smu)->ppt_funcs->write_pptable((smu)) : 0)
+#define smu_set_min_dcef_deep_sleep(smu) \
+	((smu)->ppt_funcs->set_min_dcef_deep_sleep ? (smu)->ppt_funcs->set_min_dcef_deep_sleep((smu)) : 0)
+#define smu_set_driver_table_location(smu) \
+	((smu)->ppt_funcs->set_driver_table_location ? (smu)->ppt_funcs->set_driver_table_location((smu)) : 0)
+#define smu_set_tool_table_location(smu) \
+	((smu)->ppt_funcs->set_tool_table_location ? (smu)->ppt_funcs->set_tool_table_location((smu)) : 0)
+#define smu_notify_memory_pool_location(smu) \
+	((smu)->ppt_funcs->notify_memory_pool_location ? (smu)->ppt_funcs->notify_memory_pool_location((smu)) : 0)
+#define smu_gfx_off_control(smu, enable) \
+	((smu)->ppt_funcs->gfx_off_control ? (smu)->ppt_funcs->gfx_off_control((smu), (enable)) : 0)
+
+#define smu_set_last_dcef_min_deep_sleep_clk(smu) \
+	((smu)->ppt_funcs->set_last_dcef_min_deep_sleep_clk ? (smu)->ppt_funcs->set_last_dcef_min_deep_sleep_clk((smu)) : 0)
+#define smu_system_features_control(smu, en) \
+	((smu)->ppt_funcs->system_features_control ? (smu)->ppt_funcs->system_features_control((smu), (en)) : 0)
+#define smu_init_max_sustainable_clocks(smu) \
+	((smu)->ppt_funcs->init_max_sustainable_clocks ? (smu)->ppt_funcs->init_max_sustainable_clocks((smu)) : 0)
+#define smu_set_default_od_settings(smu, initialize) \
+	((smu)->ppt_funcs->set_default_od_settings ? (smu)->ppt_funcs->set_default_od_settings((smu), (initialize)) : 0)
+
+int smu_send_smc_msg(struct smu_context *smu, enum smu_message_type msg);
+
+#define smu_send_smc_msg_with_param(smu, msg, param) \
+	((smu)->ppt_funcs->send_smc_msg_with_param? (smu)->ppt_funcs->send_smc_msg_with_param((smu), (msg), (param)) : 0)
+#define smu_read_smc_arg(smu, arg) \
+	((smu)->ppt_funcs->read_smc_arg? (smu)->ppt_funcs->read_smc_arg((smu), (arg)) : 0)
+#define smu_alloc_dpm_context(smu) \
+	((smu)->ppt_funcs->alloc_dpm_context ? (smu)->ppt_funcs->alloc_dpm_context((smu)) : 0)
+#define smu_init_display_count(smu, count) \
+	((smu)->ppt_funcs->init_display_count ? (smu)->ppt_funcs->init_display_count((smu), (count)) : 0)
+#define smu_feature_set_allowed_mask(smu) \
+	((smu)->ppt_funcs->set_allowed_mask? (smu)->ppt_funcs->set_allowed_mask((smu)) : 0)
+#define smu_feature_get_enabled_mask(smu, mask, num) \
+	((smu)->ppt_funcs->get_enabled_mask? (smu)->ppt_funcs->get_enabled_mask((smu), (mask), (num)) : 0)
+#define smu_is_dpm_running(smu) \
+	((smu)->ppt_funcs->is_dpm_running ? (smu)->ppt_funcs->is_dpm_running((smu)) : 0)
+#define smu_notify_display_change(smu) \
+	((smu)->ppt_funcs->notify_display_change? (smu)->ppt_funcs->notify_display_change((smu)) : 0)
+#define smu_store_powerplay_table(smu) \
+	((smu)->ppt_funcs->store_powerplay_table ? (smu)->ppt_funcs->store_powerplay_table((smu)) : 0)
+#define smu_check_powerplay_table(smu) \
+	((smu)->ppt_funcs->check_powerplay_table ? (smu)->ppt_funcs->check_powerplay_table((smu)) : 0)
+#define smu_append_powerplay_table(smu) \
+	((smu)->ppt_funcs->append_powerplay_table ? (smu)->ppt_funcs->append_powerplay_table((smu)) : 0)
+#define smu_set_default_dpm_table(smu) \
+	((smu)->ppt_funcs->set_default_dpm_table ? (smu)->ppt_funcs->set_default_dpm_table((smu)) : 0)
+#define smu_populate_umd_state_clk(smu) \
+	((smu)->ppt_funcs->populate_umd_state_clk ? (smu)->ppt_funcs->populate_umd_state_clk((smu)) : 0)
+#define smu_set_default_od8_settings(smu) \
+	((smu)->ppt_funcs->set_default_od8_settings ? (smu)->ppt_funcs->set_default_od8_settings((smu)) : 0)
+
+#define smu_get_current_clk_freq(smu, clk_id, value) \
+	((smu)->ppt_funcs->get_current_clk_freq? (smu)->ppt_funcs->get_current_clk_freq((smu), (clk_id), (value)) : 0)
+
+#define smu_tables_init(smu, tab) \
+	((smu)->ppt_funcs->tables_init ? (smu)->ppt_funcs->tables_init((smu), (tab)) : 0)
+#define smu_set_thermal_fan_table(smu) \
+	((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)
+#define smu_start_thermal_control(smu) \
+	((smu)->ppt_funcs->start_thermal_control? (smu)->ppt_funcs->start_thermal_control((smu)) : 0)
+#define smu_stop_thermal_control(smu) \
+	((smu)->ppt_funcs->stop_thermal_control? (smu)->ppt_funcs->stop_thermal_control((smu)) : 0)
+
+#define smu_smc_read_sensor(smu, sensor, data, size) \
+	((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : -EINVAL)
+
+#define smu_pre_display_config_changed(smu) \
+	((smu)->ppt_funcs->pre_display_config_changed ? (smu)->ppt_funcs->pre_display_config_changed((smu)) : 0)
+#define smu_display_config_changed(smu) \
+	((smu)->ppt_funcs->display_config_changed ? (smu)->ppt_funcs->display_config_changed((smu)) : 0)
+#define smu_apply_clocks_adjust_rules(smu) \
+	((smu)->ppt_funcs->apply_clocks_adjust_rules ? (smu)->ppt_funcs->apply_clocks_adjust_rules((smu)) : 0)
+#define smu_notify_smc_display_config(smu) \
+	((smu)->ppt_funcs->notify_smc_display_config ? (smu)->ppt_funcs->notify_smc_display_config((smu)) : 0)
+#define smu_force_dpm_limit_value(smu, highest) \
+	((smu)->ppt_funcs->force_dpm_limit_value ? (smu)->ppt_funcs->force_dpm_limit_value((smu), (highest)) : 0)
+#define smu_unforce_dpm_levels(smu) \
+	((smu)->ppt_funcs->unforce_dpm_levels ? (smu)->ppt_funcs->unforce_dpm_levels((smu)) : 0)
+#define smu_get_profiling_clk_mask(smu, level, sclk_mask, mclk_mask, soc_mask) \
+	((smu)->ppt_funcs->get_profiling_clk_mask ? (smu)->ppt_funcs->get_profiling_clk_mask((smu), (level), (sclk_mask), (mclk_mask), (soc_mask)) : 0)
+#define smu_set_cpu_power_state(smu) \
+	((smu)->ppt_funcs->set_cpu_power_state ? (smu)->ppt_funcs->set_cpu_power_state((smu)) : 0)
+
+#define smu_msg_get_index(smu, msg) \
+	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_msg_index? (smu)->ppt_funcs->get_smu_msg_index((smu), (msg)) : -EINVAL) : -EINVAL)
+#define smu_clk_get_index(smu, msg) \
+	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_clk_index? (smu)->ppt_funcs->get_smu_clk_index((smu), (msg)) : -EINVAL) : -EINVAL)
+#define smu_feature_get_index(smu, msg) \
+	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_feature_index? (smu)->ppt_funcs->get_smu_feature_index((smu), (msg)) : -EINVAL) : -EINVAL)
+#define smu_table_get_index(smu, tab) \
+	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_table_index? (smu)->ppt_funcs->get_smu_table_index((smu), (tab)) : -EINVAL) : -EINVAL)
+#define smu_power_get_index(smu, src) \
+	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_power_index? (smu)->ppt_funcs->get_smu_power_index((smu), (src)) : -EINVAL) : -EINVAL)
+#define smu_workload_get_type(smu, profile) \
+	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_workload_type? (smu)->ppt_funcs->get_workload_type((smu), (profile)) : -EINVAL) : -EINVAL)
+#define smu_run_btc(smu) \
+	((smu)->ppt_funcs? ((smu)->ppt_funcs->run_btc? (smu)->ppt_funcs->run_btc((smu)) : 0) : 0)
+#define smu_get_allowed_feature_mask(smu, feature_mask, num) \
+	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_allowed_feature_mask? (smu)->ppt_funcs->get_allowed_feature_mask((smu), (feature_mask), (num)) : 0) : 0)
+
+
+#define smu_store_cc6_data(smu, st, cc6_dis, pst_dis, pst_sw_dis) \
+	((smu)->ppt_funcs->store_cc6_data ? (smu)->ppt_funcs->store_cc6_data((smu), (st), (cc6_dis), (pst_dis), (pst_sw_dis)) : 0)
+
+#define smu_get_dal_power_level(smu, clocks) \
+	((smu)->ppt_funcs->get_dal_power_level ? (smu)->ppt_funcs->get_dal_power_level((smu), (clocks)) : 0)
+#define smu_get_perf_level(smu, designation, level) \
+	((smu)->ppt_funcs->get_perf_level ? (smu)->ppt_funcs->get_perf_level((smu), (designation), (level)) : 0)
+#define smu_get_current_shallow_sleep_clocks(smu, clocks) \
+	((smu)->ppt_funcs->get_current_shallow_sleep_clocks ? (smu)->ppt_funcs->get_current_shallow_sleep_clocks((smu), (clocks)) : 0)
+
+#define smu_dpm_set_uvd_enable(smu, enable) \
+	((smu)->ppt_funcs->dpm_set_uvd_enable ? (smu)->ppt_funcs->dpm_set_uvd_enable((smu), (enable)) : 0)
+#define smu_dpm_set_vce_enable(smu, enable) \
+	((smu)->ppt_funcs->dpm_set_vce_enable ? (smu)->ppt_funcs->dpm_set_vce_enable((smu), (enable)) : 0)
+#define smu_dpm_set_jpeg_enable(smu, enable) \
+	((smu)->ppt_funcs->dpm_set_jpeg_enable ? (smu)->ppt_funcs->dpm_set_jpeg_enable((smu), (enable)) : 0)
+
+#define smu_set_watermarks_table(smu, tab, clock_ranges) \
+	((smu)->ppt_funcs->set_watermarks_table ? (smu)->ppt_funcs->set_watermarks_table((smu), (tab), (clock_ranges)) : 0)
+#define smu_get_current_clk_freq_by_table(smu, clk_type, value) \
+	((smu)->ppt_funcs->get_current_clk_freq_by_table ? (smu)->ppt_funcs->get_current_clk_freq_by_table((smu), (clk_type), (value)) : 0)
+#define smu_thermal_temperature_range_update(smu, range, rw) \
+	((smu)->ppt_funcs->thermal_temperature_range_update? (smu)->ppt_funcs->thermal_temperature_range_update((smu), (range), (rw)) : 0)
+#define smu_get_thermal_temperature_range(smu, range) \
+	((smu)->ppt_funcs->get_thermal_temperature_range? (smu)->ppt_funcs->get_thermal_temperature_range((smu), (range)) : 0)
+#define smu_register_irq_handler(smu) \
+	((smu)->ppt_funcs->register_irq_handler ? (smu)->ppt_funcs->register_irq_handler(smu) : 0)
+
+#define smu_get_dpm_ultimate_freq(smu, param, min, max) \
+		((smu)->ppt_funcs->get_dpm_ultimate_freq ? (smu)->ppt_funcs->get_dpm_ultimate_freq((smu), (param), (min), (max)) : 0)
+
+#define smu_asic_set_performance_level(smu, level) \
+	((smu)->ppt_funcs->set_performance_level? (smu)->ppt_funcs->set_performance_level((smu), (level)) : -EINVAL);
+#define smu_dump_pptable(smu) \
+	((smu)->ppt_funcs->dump_pptable ? (smu)->ppt_funcs->dump_pptable((smu)) : 0)
+#define smu_get_dpm_clk_limited(smu, clk_type, dpm_level, freq) \
+		((smu)->ppt_funcs->get_dpm_clk_limited ? (smu)->ppt_funcs->get_dpm_clk_limited((smu), (clk_type), (dpm_level), (freq)) : -EINVAL)
+
+#define smu_set_soft_freq_limited_range(smu, clk_type, min, max) \
+		((smu)->ppt_funcs->set_soft_freq_limited_range ? (smu)->ppt_funcs->set_soft_freq_limited_range((smu), (clk_type), (min), (max)) : -EINVAL)
+
+#define smu_override_pcie_parameters(smu) \
+		((smu)->ppt_funcs->override_pcie_parameters ? (smu)->ppt_funcs->override_pcie_parameters((smu)) : 0)
+
+#define smu_update_pcie_parameters(smu, pcie_gen_cap, pcie_width_cap) \
+		((smu)->ppt_funcs->update_pcie_parameters ? (smu)->ppt_funcs->update_pcie_parameters((smu), (pcie_gen_cap), (pcie_width_cap)) : 0)
+
+#endif
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index c5257ae3188a..02f8c9cb89d9 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -24,17 +24,20 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 
+#define SMU_11_0_PARTIAL_PPTABLE
+
 #include "pp_debug.h"
 #include "amdgpu.h"
 #include "amdgpu_smu.h"
+#include "smu_internal.h"
 #include "atomfirmware.h"
 #include "amdgpu_atomfirmware.h"
 #include "smu_v11_0.h"
+#include "smu_v11_0_pptable.h"
 #include "soc15_common.h"
 #include "atom.h"
-#include "vega20_ppt.h"
-#include "arcturus_ppt.h"
-#include "navi10_ppt.h"
+#include "amd_pcie.h"
+#include "amdgpu_ras.h"
 
 #include "asic_reg/thm/thm_11_0_2_offset.h"
 #include "asic_reg/thm/thm_11_0_2_sh_mask.h"
@@ -61,7 +64,7 @@ static int smu_v11_0_send_msg_without_waiting(struct smu_context *smu,
 	return 0;
 }
 
-static int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg)
+int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg)
 {
 	struct amdgpu_device *adev = smu->adev;
 
@@ -77,47 +80,20 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu)
 	for (i = 0; i < timeout; i++) {
 		cur_value = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
 		if ((cur_value & MP1_C2PMSG_90__CONTENT_MASK) != 0)
-			break;
+			return cur_value == 0x1 ? 0 : -EIO;
+
 		udelay(1);
 	}
 
 	/* timeout means wrong logic */
-	if (i == timeout)
-		return -ETIME;
-
-	return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO;
-}
-
-static int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg)
-{
-	struct amdgpu_device *adev = smu->adev;
-	int ret = 0, index = 0;
-
-	index = smu_msg_get_index(smu, msg);
-	if (index < 0)
-		return index;
-
-	smu_v11_0_wait_for_response(smu);
-
-	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
-
-	smu_v11_0_send_msg_without_waiting(smu, (uint16_t)index);
-
-	ret = smu_v11_0_wait_for_response(smu);
-
-	if (ret)
-		pr_err("failed send message: %10s (%d) response %#x\n",
-		       smu_get_message_name(smu, msg), index, ret);
-
-	return ret;
-
+	return -ETIME;
 }
 
-static int
-smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
+int
+smu_v11_0_send_msg_with_param(struct smu_context *smu,
+			      enum smu_message_type msg,
 			      uint32_t param)
 {
-
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0, index = 0;
 
@@ -126,9 +102,11 @@ smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
 		return index;
 
 	ret = smu_v11_0_wait_for_response(smu);
-	if (ret)
-		pr_err("failed send message: %10s (%d) \tparam: 0x%08x response %#x\n",
-		       smu_get_message_name(smu, msg), index, param, ret);
+	if (ret) {
+		pr_err("Msg issuing pre-check failed and "
+		       "SMU may be not in the right state!\n");
+		return ret;
+	}
 
 	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
 
@@ -144,7 +122,7 @@ smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
 	return ret;
 }
 
-static int smu_v11_0_init_microcode(struct smu_context *smu)
+int smu_v11_0_init_microcode(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
 	const char *chip_name;
@@ -206,7 +184,7 @@ out:
 	return err;
 }
 
-static int smu_v11_0_load_microcode(struct smu_context *smu)
+int smu_v11_0_load_microcode(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
 	const uint32_t *src;
@@ -244,7 +222,7 @@ static int smu_v11_0_load_microcode(struct smu_context *smu)
 	return 0;
 }
 
-static int smu_v11_0_check_fw_status(struct smu_context *smu)
+int smu_v11_0_check_fw_status(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
 	uint32_t mp1_fw_flags;
@@ -259,7 +237,7 @@ static int smu_v11_0_check_fw_status(struct smu_context *smu)
 	return -EIO;
 }
 
-static int smu_v11_0_check_fw_version(struct smu_context *smu)
+int smu_v11_0_check_fw_version(struct smu_context *smu)
 {
 	uint32_t if_version = 0xff, smu_version = 0xff;
 	uint16_t smu_major;
@@ -354,7 +332,7 @@ static int smu_v11_0_set_pptable_v2_1(struct smu_context *smu, void **table,
 	return 0;
 }
 
-static int smu_v11_0_setup_pptable(struct smu_context *smu)
+int smu_v11_0_setup_pptable(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
 	const struct smc_firmware_header_v1_0 *hdr;
@@ -369,6 +347,7 @@ static int smu_v11_0_setup_pptable(struct smu_context *smu)
 	version_major = le16_to_cpu(hdr->header.header_version_major);
 	version_minor = le16_to_cpu(hdr->header.header_version_minor);
 	if (version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) {
+		pr_info("use driver provided pptable %d\n", smu->smu_table.boot_values.pp_table_id);
 		switch (version_minor) {
 		case 0:
 			ret = smu_v11_0_set_pptable_v2_0(smu, &table, &size);
@@ -385,6 +364,7 @@ static int smu_v11_0_setup_pptable(struct smu_context *smu)
 			return ret;
 
 	} else {
+		pr_info("use vbios provided pptable\n");
 		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
 						    powerplayinfo);
 
@@ -433,13 +413,13 @@ static int smu_v11_0_fini_dpm_context(struct smu_context *smu)
 	return 0;
 }
 
-static int smu_v11_0_init_smc_tables(struct smu_context *smu)
+int smu_v11_0_init_smc_tables(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
 	struct smu_table *tables = NULL;
 	int ret = 0;
 
-	if (smu_table->tables || smu_table->table_count == 0)
+	if (smu_table->tables)
 		return -EINVAL;
 
 	tables = kcalloc(SMU_TABLE_COUNT, sizeof(struct smu_table),
@@ -460,19 +440,20 @@ static int smu_v11_0_init_smc_tables(struct smu_context *smu)
 	return 0;
 }
 
-static int smu_v11_0_fini_smc_tables(struct smu_context *smu)
+int smu_v11_0_fini_smc_tables(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
 	int ret = 0;
 
-	if (!smu_table->tables || smu_table->table_count == 0)
+	if (!smu_table->tables)
 		return -EINVAL;
 
 	kfree(smu_table->tables);
 	kfree(smu_table->metrics_table);
+	kfree(smu_table->watermarks_table);
 	smu_table->tables = NULL;
-	smu_table->table_count = 0;
 	smu_table->metrics_table = NULL;
+	smu_table->watermarks_table = NULL;
 	smu_table->metrics_time = 0;
 
 	ret = smu_v11_0_fini_dpm_context(smu);
@@ -481,7 +462,7 @@ static int smu_v11_0_fini_smc_tables(struct smu_context *smu)
 	return 0;
 }
 
-static int smu_v11_0_init_power(struct smu_context *smu)
+int smu_v11_0_init_power(struct smu_context *smu)
 {
 	struct smu_power_context *smu_power = &smu->smu_power;
 
@@ -499,7 +480,7 @@ static int smu_v11_0_init_power(struct smu_context *smu)
 	return 0;
 }
 
-static int smu_v11_0_fini_power(struct smu_context *smu)
+int smu_v11_0_fini_power(struct smu_context *smu)
 {
 	struct smu_power_context *smu_power = &smu->smu_power;
 
@@ -576,7 +557,7 @@ int smu_v11_0_get_vbios_bootup_values(struct smu_context *smu)
 	return 0;
 }
 
-static int smu_v11_0_get_clk_info_from_vbios(struct smu_context *smu)
+int smu_v11_0_get_clk_info_from_vbios(struct smu_context *smu)
 {
 	int ret, index;
 	struct amdgpu_device *adev = smu->adev;
@@ -673,7 +654,7 @@ static int smu_v11_0_get_clk_info_from_vbios(struct smu_context *smu)
 	return 0;
 }
 
-static int smu_v11_0_notify_memory_pool_location(struct smu_context *smu)
+int smu_v11_0_notify_memory_pool_location(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
 	struct smu_table *memory_pool = &smu_table->memory_pool;
@@ -719,7 +700,7 @@ static int smu_v11_0_notify_memory_pool_location(struct smu_context *smu)
 	return ret;
 }
 
-static int smu_v11_0_check_pptable(struct smu_context *smu)
+int smu_v11_0_check_pptable(struct smu_context *smu)
 {
 	int ret;
 
@@ -727,7 +708,7 @@ static int smu_v11_0_check_pptable(struct smu_context *smu)
 	return ret;
 }
 
-static int smu_v11_0_parse_pptable(struct smu_context *smu)
+int smu_v11_0_parse_pptable(struct smu_context *smu)
 {
 	int ret;
 
@@ -751,7 +732,7 @@ static int smu_v11_0_parse_pptable(struct smu_context *smu)
 	return ret;
 }
 
-static int smu_v11_0_populate_smc_pptable(struct smu_context *smu)
+int smu_v11_0_populate_smc_pptable(struct smu_context *smu)
 {
 	int ret;
 
@@ -760,7 +741,7 @@ static int smu_v11_0_populate_smc_pptable(struct smu_context *smu)
 	return ret;
 }
 
-static int smu_v11_0_write_pptable(struct smu_context *smu)
+int smu_v11_0_write_pptable(struct smu_context *smu)
 {
 	struct smu_table_context *table_context = &smu->smu_table;
 	int ret = 0;
@@ -771,24 +752,7 @@ static int smu_v11_0_write_pptable(struct smu_context *smu)
 	return ret;
 }
 
-static int smu_v11_0_write_watermarks_table(struct smu_context *smu)
-{
-	int ret = 0;
-	struct smu_table_context *smu_table = &smu->smu_table;
-	struct smu_table *table = NULL;
-
-	table = &smu_table->tables[SMU_TABLE_WATERMARKS];
-
-	if (!table->cpu_addr)
-		return -EINVAL;
-
-	ret = smu_update_table(smu, SMU_TABLE_WATERMARKS, 0, table->cpu_addr,
-				true);
-
-	return ret;
-}
-
-static int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk)
+int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk)
 {
 	int ret;
 
@@ -800,7 +764,7 @@ static int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t cl
 	return ret;
 }
 
-static int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu)
+int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu)
 {
 	struct smu_table_context *table_context = &smu->smu_table;
 
@@ -809,11 +773,28 @@ static int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu)
 	if (!table_context)
 		return -EINVAL;
 
-	return smu_set_deep_sleep_dcefclk(smu,
-					  table_context->boot_values.dcefclk / 100);
+	return smu_v11_0_set_deep_sleep_dcefclk(smu, table_context->boot_values.dcefclk / 100);
 }
 
-static int smu_v11_0_set_tool_table_location(struct smu_context *smu)
+int smu_v11_0_set_driver_table_location(struct smu_context *smu)
+{
+	struct smu_table *driver_table = &smu->smu_table.driver_table;
+	int ret = 0;
+
+	if (driver_table->mc_address) {
+		ret = smu_send_smc_msg_with_param(smu,
+				SMU_MSG_SetDriverDramAddrHigh,
+				upper_32_bits(driver_table->mc_address));
+		if (!ret)
+			ret = smu_send_smc_msg_with_param(smu,
+				SMU_MSG_SetDriverDramAddrLow,
+				lower_32_bits(driver_table->mc_address));
+	}
+
+	return ret;
+}
+
+int smu_v11_0_set_tool_table_location(struct smu_context *smu)
 {
 	int ret = 0;
 	struct smu_table *tool_table = &smu->smu_table.tables[SMU_TABLE_PMSTATUSLOG];
@@ -831,7 +812,7 @@ static int smu_v11_0_set_tool_table_location(struct smu_context *smu)
 	return ret;
 }
 
-static int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count)
+int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count)
 {
 	int ret = 0;
 
@@ -843,7 +824,7 @@ static int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count)
 }
 
 
-static int smu_v11_0_set_allowed_mask(struct smu_context *smu)
+int smu_v11_0_set_allowed_mask(struct smu_context *smu)
 {
 	struct smu_feature *feature = &smu->smu_feature;
 	int ret = 0;
@@ -870,62 +851,71 @@ failed:
 	return ret;
 }
 
-static int smu_v11_0_get_enabled_mask(struct smu_context *smu,
+int smu_v11_0_get_enabled_mask(struct smu_context *smu,
 				      uint32_t *feature_mask, uint32_t num)
 {
 	uint32_t feature_mask_high = 0, feature_mask_low = 0;
+	struct smu_feature *feature = &smu->smu_feature;
 	int ret = 0;
 
 	if (!feature_mask || num < 2)
 		return -EINVAL;
 
-	ret = smu_send_smc_msg(smu, SMU_MSG_GetEnabledSmuFeaturesHigh);
-	if (ret)
-		return ret;
-	ret = smu_read_smc_arg(smu, &feature_mask_high);
-	if (ret)
-		return ret;
+	if (bitmap_empty(feature->enabled, feature->feature_num)) {
+		ret = smu_send_smc_msg(smu, SMU_MSG_GetEnabledSmuFeaturesHigh);
+		if (ret)
+			return ret;
+		ret = smu_read_smc_arg(smu, &feature_mask_high);
+		if (ret)
+			return ret;
 
-	ret = smu_send_smc_msg(smu, SMU_MSG_GetEnabledSmuFeaturesLow);
-	if (ret)
-		return ret;
-	ret = smu_read_smc_arg(smu, &feature_mask_low);
-	if (ret)
-		return ret;
+		ret = smu_send_smc_msg(smu, SMU_MSG_GetEnabledSmuFeaturesLow);
+		if (ret)
+			return ret;
+		ret = smu_read_smc_arg(smu, &feature_mask_low);
+		if (ret)
+			return ret;
 
-	feature_mask[0] = feature_mask_low;
-	feature_mask[1] = feature_mask_high;
+		feature_mask[0] = feature_mask_low;
+		feature_mask[1] = feature_mask_high;
+	} else {
+		bitmap_copy((unsigned long *)feature_mask, feature->enabled,
+			     feature->feature_num);
+	}
 
 	return ret;
 }
 
-static int smu_v11_0_system_features_control(struct smu_context *smu,
+int smu_v11_0_system_features_control(struct smu_context *smu,
 					     bool en)
 {
 	struct smu_feature *feature = &smu->smu_feature;
 	uint32_t feature_mask[2];
 	int ret = 0;
 
-	if (smu->pm_enabled) {
-		ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures :
-					     SMU_MSG_DisableAllSmuFeatures));
-		if (ret)
-			return ret;
-	}
-
-	ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
+	ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures :
+				     SMU_MSG_DisableAllSmuFeatures));
 	if (ret)
 		return ret;
 
-	bitmap_copy(feature->enabled, (unsigned long *)&feature_mask,
-		    feature->feature_num);
-	bitmap_copy(feature->supported, (unsigned long *)&feature_mask,
-		    feature->feature_num);
+	if (en) {
+		ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
+		if (ret)
+			return ret;
+
+		bitmap_copy(feature->enabled, (unsigned long *)&feature_mask,
+			    feature->feature_num);
+		bitmap_copy(feature->supported, (unsigned long *)&feature_mask,
+			    feature->feature_num);
+	} else {
+		bitmap_zero(feature->enabled, feature->feature_num);
+		bitmap_zero(feature->supported, feature->feature_num);
+	}
 
 	return ret;
 }
 
-static int smu_v11_0_notify_display_change(struct smu_context *smu)
+int smu_v11_0_notify_display_change(struct smu_context *smu)
 {
 	int ret = 0;
 
@@ -983,7 +973,7 @@ smu_v11_0_get_max_sustainable_clock(struct smu_context *smu, uint32_t *clock,
 	return ret;
 }
 
-static int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu)
+int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu)
 {
 	struct smu_11_0_max_sustainable_clocks *max_sustainable_clocks;
 	int ret = 0;
@@ -1063,13 +1053,44 @@ static int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu)
 	return 0;
 }
 
-static int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n)
+uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu) {
+	uint32_t od_limit, max_power_limit;
+	struct smu_11_0_powerplay_table *powerplay_table = NULL;
+	struct smu_table_context *table_context = &smu->smu_table;
+	powerplay_table = table_context->power_play_table;
+
+	max_power_limit = smu_get_pptable_power_limit(smu);
+
+	if (!max_power_limit) {
+		// If we couldn't get the table limit, fall back on first-read value
+		if (!smu->default_power_limit)
+			smu->default_power_limit = smu->power_limit;
+		max_power_limit = smu->default_power_limit;
+	}
+
+	if (smu->od_enabled) {
+		od_limit = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
+
+		pr_debug("ODSETTING_POWERPERCENTAGE: %d (default: %d)\n", od_limit, smu->default_power_limit);
+
+		max_power_limit *= (100 + od_limit);
+		max_power_limit /= 100;
+	}
+
+	return max_power_limit;
+}
+
+int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n)
 {
 	int ret = 0;
+	uint32_t max_power_limit;
+
+	max_power_limit = smu_v11_0_get_max_power_limit(smu);
 
-	if (n > smu->default_power_limit) {
-		pr_err("New power limit is over the max allowed %d\n",
-				smu->default_power_limit);
+	if (n > max_power_limit) {
+		pr_err("New power limit (%d) is over the max allowed %d\n",
+				n,
+				max_power_limit);
 		return -EINVAL;
 	}
 
@@ -1091,7 +1112,7 @@ static int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n)
 	return 0;
 }
 
-static int smu_v11_0_get_current_clk_freq(struct smu_context *smu,
+int smu_v11_0_get_current_clk_freq(struct smu_context *smu,
 					  enum smu_clk_type clk_id,
 					  uint32_t *value)
 {
@@ -1133,11 +1154,12 @@ static int smu_v11_0_set_thermal_range(struct smu_context *smu,
 	int low = SMU_THERMAL_MINIMUM_ALERT_TEMP;
 	int high = SMU_THERMAL_MAXIMUM_ALERT_TEMP;
 	uint32_t val;
+	struct smu_table_context *table_context = &smu->smu_table;
+	struct smu_11_0_powerplay_table *powerplay_table = table_context->power_play_table;
 
 	low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP,
 			range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES);
-	high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP,
-			range.max / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES);
+	high = min((uint16_t)SMU_THERMAL_MAXIMUM_ALERT_TEMP, powerplay_table->software_shutdown_temp);
 
 	if (low > high)
 		return -EINVAL;
@@ -1170,7 +1192,7 @@ static int smu_v11_0_enable_thermal_alert(struct smu_context *smu)
 	return 0;
 }
 
-static int smu_v11_0_start_thermal_control(struct smu_context *smu)
+int smu_v11_0_start_thermal_control(struct smu_context *smu)
 {
 	int ret = 0;
 	struct smu_temperature_range range;
@@ -1212,6 +1234,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
 	return ret;
 }
 
+int smu_v11_0_stop_thermal_control(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
+
+	return 0;
+}
+
 static uint16_t convert_to_vddc(uint8_t vid)
 {
 	return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE);
@@ -1236,7 +1267,7 @@ static int smu_v11_0_get_gfx_vdd(struct smu_context *smu, uint32_t *value)
 
 }
 
-static int smu_v11_0_read_sensor(struct smu_context *smu,
+int smu_v11_0_read_sensor(struct smu_context *smu,
 				 enum amd_pp_sensors sensor,
 				 void *data, uint32_t *size)
 {
@@ -1273,7 +1304,7 @@ static int smu_v11_0_read_sensor(struct smu_context *smu,
 	return ret;
 }
 
-static int
+int
 smu_v11_0_display_clock_voltage_request(struct smu_context *smu,
 					struct pp_display_clock_request
 					*clock_req)
@@ -1316,9 +1347,7 @@ smu_v11_0_display_clock_voltage_request(struct smu_context *smu,
 		if (clk_select == SMU_UCLK && smu->disable_uclk_switch)
 			return 0;
 
-		mutex_lock(&smu->mutex);
 		ret = smu_set_hard_freq_range(smu, clk_select, clk_freq, 0);
-		mutex_unlock(&smu->mutex);
 
 		if(clk_select == SMU_UCLK)
 			smu->hard_min_uclk_req_from_dal = clk_freq;
@@ -1328,27 +1357,7 @@ failed:
 	return ret;
 }
 
-static int
-smu_v11_0_set_watermarks_for_clock_ranges(struct smu_context *smu, struct
-					  dm_pp_wm_sets_with_clock_ranges_soc15
-					  *clock_ranges)
-{
-	int ret = 0;
-	struct smu_table *watermarks = &smu->smu_table.tables[SMU_TABLE_WATERMARKS];
-	void *table = watermarks->cpu_addr;
-
-	if (!smu->disable_watermark &&
-	    smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) &&
-	    smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) {
-		smu_set_watermarks_table(smu, table, clock_ranges);
-		smu->watermarks_bitmap |= WATERMARKS_EXIST;
-		smu->watermarks_bitmap &= ~WATERMARKS_LOADED;
-	}
-
-	return ret;
-}
-
-static int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable)
+int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable)
 {
 	int ret = 0;
 	struct amdgpu_device *adev = smu->adev;
@@ -1361,12 +1370,10 @@ static int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable)
 	case CHIP_NAVI12:
 		if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
 			return 0;
-		mutex_lock(&smu->mutex);
 		if (enable)
 			ret = smu_send_smc_msg(smu, SMU_MSG_AllowGfxOff);
 		else
 			ret = smu_send_smc_msg(smu, SMU_MSG_DisallowGfxOff);
-		mutex_unlock(&smu->mutex);
 		break;
 	default:
 		break;
@@ -1375,7 +1382,7 @@ static int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable)
 	return ret;
 }
 
-static uint32_t
+uint32_t
 smu_v11_0_get_fan_control_mode(struct smu_context *smu)
 {
 	if (!smu_feature_is_enabled(smu, SMU_FEATURE_FAN_CONTROL_BIT))
@@ -1415,7 +1422,7 @@ smu_v11_0_set_fan_static_mode(struct smu_context *smu, uint32_t mode)
 	return 0;
 }
 
-static int
+int
 smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -1444,7 +1451,7 @@ smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed)
 	return smu_v11_0_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC);
 }
 
-static int
+int
 smu_v11_0_set_fan_control_mode(struct smu_context *smu,
 			       uint32_t mode)
 {
@@ -1472,7 +1479,7 @@ smu_v11_0_set_fan_control_mode(struct smu_context *smu,
 	return ret;
 }
 
-static int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
+int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
 				       uint32_t speed)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -1482,10 +1489,9 @@ static int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
 	if (!speed)
 		return -EINVAL;
 
-	mutex_lock(&(smu->mutex));
 	ret = smu_v11_0_auto_fan_control(smu, 0);
 	if (ret)
-		goto set_fan_speed_rpm_failed;
+		return ret;
 
 	crystal_clock_freq = amdgpu_asic_get_xclk(adev);
 	tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed);
@@ -1496,23 +1502,16 @@ static int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
 
 	ret = smu_v11_0_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC_RPM);
 
-set_fan_speed_rpm_failed:
-	mutex_unlock(&(smu->mutex));
 	return ret;
 }
 
-#define XGMI_STATE_D0 1
-#define XGMI_STATE_D3 0
-
-static int smu_v11_0_set_xgmi_pstate(struct smu_context *smu,
+int smu_v11_0_set_xgmi_pstate(struct smu_context *smu,
 				     uint32_t pstate)
 {
 	int ret = 0;
-	mutex_lock(&(smu->mutex));
 	ret = smu_send_smc_msg_with_param(smu,
 					  SMU_MSG_SetXgmiMode,
-					  pstate ? XGMI_STATE_D0 : XGMI_STATE_D3);
-	mutex_unlock(&(smu->mutex));
+					  pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3);
 	return ret;
 }
 
@@ -1559,7 +1558,7 @@ static const struct amdgpu_irq_src_funcs smu_v11_0_irq_funcs =
 	.process = smu_v11_0_irq_process,
 };
 
-static int smu_v11_0_register_irq_handler(struct smu_context *smu)
+int smu_v11_0_register_irq_handler(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
 	struct amdgpu_irq_src *irq_src = smu->irq_source;
@@ -1591,7 +1590,7 @@ static int smu_v11_0_register_irq_handler(struct smu_context *smu)
 	return ret;
 }
 
-static int smu_v11_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
+int smu_v11_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
 		struct pp_smu_nv_clock_table *max_clocks)
 {
 	struct smu_table_context *table_context = &smu->smu_table;
@@ -1621,13 +1620,11 @@ static int smu_v11_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
 	return 0;
 }
 
-static int smu_v11_0_set_azalia_d3_pme(struct smu_context *smu)
+int smu_v11_0_set_azalia_d3_pme(struct smu_context *smu)
 {
 	int ret = 0;
 
-	mutex_lock(&smu->mutex);
 	ret = smu_send_smc_msg(smu, SMU_MSG_BacoAudioD3PME);
-	mutex_unlock(&smu->mutex);
 
 	return ret;
 }
@@ -1637,7 +1634,7 @@ static int smu_v11_0_baco_set_armd3_sequence(struct smu_context *smu, enum smu_v
 	return smu_send_smc_msg_with_param(smu, SMU_MSG_ArmD3, baco_seq);
 }
 
-static bool smu_v11_0_baco_is_support(struct smu_context *smu)
+bool smu_v11_0_baco_is_support(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
@@ -1651,7 +1648,9 @@ static bool smu_v11_0_baco_is_support(struct smu_context *smu)
 	if (!baco_support)
 		return false;
 
-	if (!smu_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
+	/* Arcturus does not support this bit mask */
+	if (smu_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
+	   !smu_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
 		return false;
 
 	val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0);
@@ -1661,7 +1660,7 @@ static bool smu_v11_0_baco_is_support(struct smu_context *smu)
 	return false;
 }
 
-static enum smu_baco_state smu_v11_0_baco_get_state(struct smu_context *smu)
+enum smu_baco_state smu_v11_0_baco_get_state(struct smu_context *smu)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
 	enum smu_baco_state baco_state;
@@ -1673,10 +1672,14 @@ static enum smu_baco_state smu_v11_0_baco_get_state(struct smu_context *smu)
 	return baco_state;
 }
 
-static int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
+int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
 {
 
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
+	struct amdgpu_device *adev = smu->adev;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	uint32_t bif_doorbell_intr_cntl;
+	uint32_t data;
 	int ret = 0;
 
 	if (smu_v11_0_baco_get_state(smu) == state)
@@ -1684,10 +1687,37 @@ static int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state
 
 	mutex_lock(&smu_baco->mutex);
 
-	if (state == SMU_BACO_STATE_ENTER)
-		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnterBaco, BACO_SEQ_BACO);
-	else
+	bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+
+	if (state == SMU_BACO_STATE_ENTER) {
+		bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+						BIF_DOORBELL_INT_CNTL,
+						DOORBELL_INTERRUPT_DISABLE, 1);
+		WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+		if (!ras || !ras->supported) {
+			data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
+			data |= 0x80000000;
+			WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
+
+			ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnterBaco, 0);
+		} else {
+			ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnterBaco, 1);
+		}
+	} else {
 		ret = smu_send_smc_msg(smu, SMU_MSG_ExitBaco);
+		if (ret)
+			goto out;
+
+		bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+						BIF_DOORBELL_INT_CNTL,
+						DOORBELL_INTERRUPT_DISABLE, 0);
+		WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+		/* clear vbios scratch 6 and 7 for coming asic reinit */
+		WREG32(adev->bios_scratch_reg_offset + 6, 0);
+		WREG32(adev->bios_scratch_reg_offset + 7, 0);
+	}
 	if (ret)
 		goto out;
 
@@ -1697,13 +1727,17 @@ out:
 	return ret;
 }
 
-static int smu_v11_0_baco_reset(struct smu_context *smu)
+int smu_v11_0_baco_enter(struct smu_context *smu)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
-	ret = smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_BACO);
-	if (ret)
-		return ret;
+	/* Arcturus does not need this audio workaround */
+	if (adev->asic_type != CHIP_ARCTURUS) {
+		ret = smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_BACO);
+		if (ret)
+			return ret;
+	}
 
 	ret = smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_ENTER);
 	if (ret)
@@ -1711,6 +1745,13 @@ static int smu_v11_0_baco_reset(struct smu_context *smu)
 
 	msleep(10);
 
+	return ret;
+}
+
+int smu_v11_0_baco_exit(struct smu_context *smu)
+{
+	int ret = 0;
+
 	ret = smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
 	if (ret)
 		return ret;
@@ -1718,13 +1759,12 @@ static int smu_v11_0_baco_reset(struct smu_context *smu)
 	return ret;
 }
 
-static int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
+int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
 						 uint32_t *min, uint32_t *max)
 {
 	int ret = 0, clk_id = 0;
 	uint32_t param = 0;
 
-	mutex_lock(&smu->mutex);
 	clk_id = smu_clk_get_index(smu, clk_type);
 	if (clk_id < 0) {
 		ret = -EINVAL;
@@ -1751,80 +1791,141 @@ static int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk
 	}
 
 failed:
-	mutex_unlock(&smu->mutex);
 	return ret;
 }
 
-static const struct smu_funcs smu_v11_0_funcs = {
-	.init_microcode = smu_v11_0_init_microcode,
-	.load_microcode = smu_v11_0_load_microcode,
-	.check_fw_status = smu_v11_0_check_fw_status,
-	.check_fw_version = smu_v11_0_check_fw_version,
-	.send_smc_msg = smu_v11_0_send_msg,
-	.send_smc_msg_with_param = smu_v11_0_send_msg_with_param,
-	.read_smc_arg = smu_v11_0_read_arg,
-	.setup_pptable = smu_v11_0_setup_pptable,
-	.init_smc_tables = smu_v11_0_init_smc_tables,
-	.fini_smc_tables = smu_v11_0_fini_smc_tables,
-	.init_power = smu_v11_0_init_power,
-	.fini_power = smu_v11_0_fini_power,
-	.get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values,
-	.get_clk_info_from_vbios = smu_v11_0_get_clk_info_from_vbios,
-	.notify_memory_pool_location = smu_v11_0_notify_memory_pool_location,
-	.check_pptable = smu_v11_0_check_pptable,
-	.parse_pptable = smu_v11_0_parse_pptable,
-	.populate_smc_tables = smu_v11_0_populate_smc_pptable,
-	.write_pptable = smu_v11_0_write_pptable,
-	.write_watermarks_table = smu_v11_0_write_watermarks_table,
-	.set_min_dcef_deep_sleep = smu_v11_0_set_min_dcef_deep_sleep,
-	.set_tool_table_location = smu_v11_0_set_tool_table_location,
-	.init_display_count = smu_v11_0_init_display_count,
-	.set_allowed_mask = smu_v11_0_set_allowed_mask,
-	.get_enabled_mask = smu_v11_0_get_enabled_mask,
-	.system_features_control = smu_v11_0_system_features_control,
-	.notify_display_change = smu_v11_0_notify_display_change,
-	.set_power_limit = smu_v11_0_set_power_limit,
-	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
-	.init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
-	.start_thermal_control = smu_v11_0_start_thermal_control,
-	.read_sensor = smu_v11_0_read_sensor,
-	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
-	.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
-	.set_watermarks_for_clock_ranges = smu_v11_0_set_watermarks_for_clock_ranges,
-	.get_fan_control_mode = smu_v11_0_get_fan_control_mode,
-	.set_fan_control_mode = smu_v11_0_set_fan_control_mode,
-	.set_fan_speed_percent = smu_v11_0_set_fan_speed_percent,
-	.set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm,
-	.set_xgmi_pstate = smu_v11_0_set_xgmi_pstate,
-	.gfx_off_control = smu_v11_0_gfx_off_control,
-	.register_irq_handler = smu_v11_0_register_irq_handler,
-	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
-	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
-	.baco_is_support = smu_v11_0_baco_is_support,
-	.baco_get_state = smu_v11_0_baco_get_state,
-	.baco_set_state = smu_v11_0_baco_set_state,
-	.baco_reset = smu_v11_0_baco_reset,
-	.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
-};
+int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type,
+			    uint32_t min, uint32_t max)
+{
+	int ret = 0, clk_id = 0;
+	uint32_t param;
 
-void smu_v11_0_set_smu_funcs(struct smu_context *smu)
+	clk_id = smu_clk_get_index(smu, clk_type);
+	if (clk_id < 0)
+		return clk_id;
+
+	if (max > 0) {
+		param = (uint32_t)((clk_id << 16) | (max & 0xffff));
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxByFreq,
+						  param);
+		if (ret)
+			return ret;
+	}
+
+	if (min > 0) {
+		param = (uint32_t)((clk_id << 16) | (min & 0xffff));
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinByFreq,
+						  param);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+int smu_v11_0_override_pcie_parameters(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
+	uint32_t pcie_gen = 0, pcie_width = 0;
+	int ret;
 
-	smu->funcs = &smu_v11_0_funcs;
-	switch (adev->asic_type) {
-	case CHIP_VEGA20:
-		vega20_set_ppt_funcs(smu);
+	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4)
+		pcie_gen = 3;
+	else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
+		pcie_gen = 2;
+	else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
+		pcie_gen = 1;
+	else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1)
+		pcie_gen = 0;
+
+	/* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1
+	 * Bit 15:8:  PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4
+	 * Bit 7:0:   PCIE lane width, 1 to 7 corresponds is x1 to x32
+	 */
+	if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
+		pcie_width = 6;
+	else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12)
+		pcie_width = 5;
+	else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8)
+		pcie_width = 4;
+	else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4)
+		pcie_width = 3;
+	else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2)
+		pcie_width = 2;
+	else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1)
+		pcie_width = 1;
+
+	ret = smu_update_pcie_parameters(smu, pcie_gen, pcie_width);
+
+	if (ret)
+		pr_err("[%s] Attempt to override pcie params failed!\n", __func__);
+
+	return ret;
+
+}
+
+int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size)
+{
+	struct smu_table_context *table_context = &smu->smu_table;
+	int ret = 0;
+
+	if (initialize) {
+		if (table_context->overdrive_table) {
+			return -EINVAL;
+		}
+		table_context->overdrive_table = kzalloc(overdrive_table_size, GFP_KERNEL);
+		if (!table_context->overdrive_table) {
+			return -ENOMEM;
+		}
+		ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, false);
+		if (ret) {
+			pr_err("Failed to export overdrive table!\n");
+			return ret;
+		}
+	}
+	ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, true);
+	if (ret) {
+		pr_err("Failed to import overdrive table!\n");
+		return ret;
+	}
+	return ret;
+}
+
+int smu_v11_0_set_performance_level(struct smu_context *smu,
+				    enum amd_dpm_forced_level level)
+{
+	int ret = 0;
+	uint32_t sclk_mask, mclk_mask, soc_mask;
+
+	switch (level) {
+	case AMD_DPM_FORCED_LEVEL_HIGH:
+		ret = smu_force_dpm_limit_value(smu, true);
 		break;
-	case CHIP_ARCTURUS:
-		arcturus_set_ppt_funcs(smu);
+	case AMD_DPM_FORCED_LEVEL_LOW:
+		ret = smu_force_dpm_limit_value(smu, false);
 		break;
-	case CHIP_NAVI10:
-	case CHIP_NAVI14:
-	case CHIP_NAVI12:
-		navi10_set_ppt_funcs(smu);
+	case AMD_DPM_FORCED_LEVEL_AUTO:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+		ret = smu_unforce_dpm_levels(smu);
 		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+		ret = smu_get_profiling_clk_mask(smu, level,
+						 &sclk_mask,
+						 &mclk_mask,
+						 &soc_mask);
+		if (ret)
+			return ret;
+		smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask, false);
+		smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask, false);
+		smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask, false);
+		break;
+	case AMD_DPM_FORCED_LEVEL_MANUAL:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
 	default:
-		pr_warn("Unknown asic for smu11\n");
+		break;
 	}
+	return ret;
 }
+
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
index 9d2280ca1f4b..870e6db2907e 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
@@ -24,12 +24,12 @@
 #include <linux/firmware.h>
 #include "amdgpu.h"
 #include "amdgpu_smu.h"
+#include "smu_internal.h"
 #include "atomfirmware.h"
 #include "amdgpu_atomfirmware.h"
 #include "smu_v12_0.h"
 #include "soc15_common.h"
 #include "atom.h"
-#include "renoir_ppt.h"
 
 #include "asic_reg/mp/mp_12_0_0_offset.h"
 #include "asic_reg/mp/mp_12_0_0_sh_mask.h"
@@ -41,7 +41,7 @@
 #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK          0x00000006L
 #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT        0x1
 
-static int smu_v12_0_send_msg_without_waiting(struct smu_context *smu,
+int smu_v12_0_send_msg_without_waiting(struct smu_context *smu,
 					      uint16_t msg)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -50,7 +50,7 @@ static int smu_v12_0_send_msg_without_waiting(struct smu_context *smu,
 	return 0;
 }
 
-static int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg)
+int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg)
 {
 	struct amdgpu_device *adev = smu->adev;
 
@@ -58,7 +58,7 @@ static int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg)
 	return 0;
 }
 
-static int smu_v12_0_wait_for_response(struct smu_context *smu)
+int smu_v12_0_wait_for_response(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
 	uint32_t cur_value, i;
@@ -66,44 +66,18 @@ static int smu_v12_0_wait_for_response(struct smu_context *smu)
 	for (i = 0; i < adev->usec_timeout; i++) {
 		cur_value = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
 		if ((cur_value & MP1_C2PMSG_90__CONTENT_MASK) != 0)
-			break;
+			return cur_value == 0x1 ? 0 : -EIO;
+
 		udelay(1);
 	}
 
 	/* timeout means wrong logic */
-	if (i == adev->usec_timeout)
-		return -ETIME;
-
-	return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO;
+	return -ETIME;
 }
 
-static int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg)
-{
-	struct amdgpu_device *adev = smu->adev;
-	int ret = 0, index = 0;
-
-	index = smu_msg_get_index(smu, msg);
-	if (index < 0)
-		return index;
-
-	smu_v12_0_wait_for_response(smu);
-
-	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
-
-	smu_v12_0_send_msg_without_waiting(smu, (uint16_t)index);
-
-	ret = smu_v12_0_wait_for_response(smu);
-
-	if (ret)
-		pr_err("Failed to send message 0x%x, response 0x%x\n", index,
-		       ret);
-
-	return ret;
-
-}
-
-static int
-smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
+int
+smu_v12_0_send_msg_with_param(struct smu_context *smu,
+			      enum smu_message_type msg,
 			      uint32_t param)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -114,9 +88,11 @@ smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
 		return index;
 
 	ret = smu_v12_0_wait_for_response(smu);
-	if (ret)
-		pr_err("Failed to send message 0x%x, response 0x%x, param 0x%x\n",
-		       index, ret, param);
+	if (ret) {
+		pr_err("Msg issuing pre-check failed and "
+		       "SMU may be not in the right state!\n");
+		return ret;
+	}
 
 	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
 
@@ -132,7 +108,7 @@ smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
 	return ret;
 }
 
-static int smu_v12_0_check_fw_status(struct smu_context *smu)
+int smu_v12_0_check_fw_status(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
 	uint32_t mp1_fw_flags;
@@ -147,7 +123,7 @@ static int smu_v12_0_check_fw_status(struct smu_context *smu)
 	return -EIO;
 }
 
-static int smu_v12_0_check_fw_version(struct smu_context *smu)
+int smu_v12_0_check_fw_version(struct smu_context *smu)
 {
 	uint32_t if_version = 0xff, smu_version = 0xff;
 	uint16_t smu_major;
@@ -181,9 +157,9 @@ static int smu_v12_0_check_fw_version(struct smu_context *smu)
 	return ret;
 }
 
-static int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate)
+int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate)
 {
-	if (!(smu->adev->flags & AMD_IS_APU))
+	if (!smu->is_apu)
 		return 0;
 
 	if (gate)
@@ -192,9 +168,9 @@ static int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate)
 		return smu_send_smc_msg(smu, SMU_MSG_PowerUpSdma);
 }
 
-static int smu_v12_0_powergate_vcn(struct smu_context *smu, bool gate)
+int smu_v12_0_powergate_vcn(struct smu_context *smu, bool gate)
 {
-	if (!(smu->adev->flags & AMD_IS_APU))
+	if (!smu->is_apu)
 		return 0;
 
 	if (gate)
@@ -203,7 +179,18 @@ static int smu_v12_0_powergate_vcn(struct smu_context *smu, bool gate)
 		return smu_send_smc_msg(smu, SMU_MSG_PowerUpVcn);
 }
 
-static int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable)
+int smu_v12_0_powergate_jpeg(struct smu_context *smu, bool gate)
+{
+	if (!smu->is_apu)
+		return 0;
+
+	if (gate)
+		return smu_send_smc_msg_with_param(smu, SMU_MSG_PowerDownJpeg, 0);
+	else
+		return smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpJpeg, 0);
+}
+
+int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable)
 {
 	if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
 		return 0;
@@ -212,6 +199,39 @@ static int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable)
 		SMU_MSG_SetGfxCGPG, enable ? 1 : 0);
 }
 
+int smu_v12_0_read_sensor(struct smu_context *smu,
+				 enum amd_pp_sensors sensor,
+				 void *data, uint32_t *size)
+{
+	int ret = 0;
+
+	if(!data || !size)
+		return -EINVAL;
+
+	switch (sensor) {
+	case AMDGPU_PP_SENSOR_GFX_MCLK:
+		ret = smu_get_current_clk_freq(smu, SMU_UCLK, (uint32_t *)data);
+		*size = 4;
+		break;
+	case AMDGPU_PP_SENSOR_GFX_SCLK:
+		ret = smu_get_current_clk_freq(smu, SMU_GFXCLK, (uint32_t *)data);
+		*size = 4;
+		break;
+	case AMDGPU_PP_SENSOR_MIN_FAN_RPM:
+		*(uint32_t *)data = 0;
+		*size = 4;
+		break;
+	default:
+		ret = smu_common_read_sensor(smu, sensor, data, size);
+		break;
+	}
+
+	if (ret)
+		*size = 0;
+
+	return ret;
+}
+
 /**
  * smu_v12_0_get_gfxoff_status - get gfxoff status
  *
@@ -224,7 +244,7 @@ static int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable)
  * Returns 2=Not in GFXOFF.
  * Returns 3=Transition into GFXOFF.
  */
-static uint32_t smu_v12_0_get_gfxoff_status(struct smu_context *smu)
+uint32_t smu_v12_0_get_gfxoff_status(struct smu_context *smu)
 {
 	uint32_t reg;
 	uint32_t gfxOff_Status = 0;
@@ -237,22 +257,13 @@ static uint32_t smu_v12_0_get_gfxoff_status(struct smu_context *smu)
 	return gfxOff_Status;
 }
 
-static int smu_v12_0_gfx_off_control(struct smu_context *smu, bool enable)
+int smu_v12_0_gfx_off_control(struct smu_context *smu, bool enable)
 {
 	int ret = 0, timeout = 500;
 
 	if (enable) {
 		ret = smu_send_smc_msg(smu, SMU_MSG_AllowGfxOff);
 
-		/* confirm gfx is back to "off" state, timeout is 5 seconds */
-		while (!(smu_v12_0_get_gfxoff_status(smu) == 0)) {
-			msleep(10);
-			timeout--;
-			if (timeout == 0) {
-				DRM_ERROR("enable gfxoff timeout and failed!\n");
-				break;
-			}
-		}
 	} else {
 		ret = smu_send_smc_msg(smu, SMU_MSG_DisallowGfxOff);
 
@@ -270,12 +281,12 @@ static int smu_v12_0_gfx_off_control(struct smu_context *smu, bool enable)
 	return ret;
 }
 
-static int smu_v12_0_init_smc_tables(struct smu_context *smu)
+int smu_v12_0_init_smc_tables(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
 	struct smu_table *tables = NULL;
 
-	if (smu_table->tables || smu_table->table_count == 0)
+	if (smu_table->tables)
 		return -EINVAL;
 
 	tables = kcalloc(SMU_TABLE_COUNT, sizeof(struct smu_table),
@@ -288,11 +299,11 @@ static int smu_v12_0_init_smc_tables(struct smu_context *smu)
 	return smu_tables_init(smu, tables);
 }
 
-static int smu_v12_0_fini_smc_tables(struct smu_context *smu)
+int smu_v12_0_fini_smc_tables(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
 
-	if (!smu_table->tables || smu_table->table_count == 0)
+	if (!smu_table->tables)
 		return -EINVAL;
 
 	kfree(smu_table->clocks_table);
@@ -304,29 +315,76 @@ static int smu_v12_0_fini_smc_tables(struct smu_context *smu)
 	return 0;
 }
 
-static int smu_v12_0_populate_smc_tables(struct smu_context *smu)
+int smu_v12_0_populate_smc_tables(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	struct smu_table *table = NULL;
 
-	table = &smu_table->tables[SMU_TABLE_DPMCLOCKS];
-	if (!table)
+	return smu_update_table(smu, SMU_TABLE_DPMCLOCKS, 0, smu_table->clocks_table, false);
+}
+
+int smu_v12_0_get_enabled_mask(struct smu_context *smu,
+				      uint32_t *feature_mask, uint32_t num)
+{
+	uint32_t feature_mask_high = 0, feature_mask_low = 0;
+	int ret = 0;
+
+	if (!feature_mask || num < 2)
 		return -EINVAL;
 
-	if (!table->cpu_addr)
+	ret = smu_send_smc_msg(smu, SMU_MSG_GetEnabledSmuFeaturesHigh);
+	if (ret)
+		return ret;
+	ret = smu_read_smc_arg(smu, &feature_mask_high);
+	if (ret)
+		return ret;
+
+	ret = smu_send_smc_msg(smu, SMU_MSG_GetEnabledSmuFeaturesLow);
+	if (ret)
+		return ret;
+	ret = smu_read_smc_arg(smu, &feature_mask_low);
+	if (ret)
+		return ret;
+
+	feature_mask[0] = feature_mask_low;
+	feature_mask[1] = feature_mask_high;
+
+	return ret;
+}
+
+int smu_v12_0_get_current_clk_freq(struct smu_context *smu,
+					  enum smu_clk_type clk_id,
+					  uint32_t *value)
+{
+	int ret = 0;
+	uint32_t freq = 0;
+
+	if (clk_id >= SMU_CLK_COUNT || !value)
 		return -EINVAL;
 
-	return smu_update_table(smu, SMU_TABLE_DPMCLOCKS, 0, smu_table->clocks_table, false);
+	ret = smu_get_current_clk_freq_by_table(smu, clk_id, &freq);
+	if (ret)
+		return ret;
+
+	freq *= 100;
+	*value = freq;
+
+	return ret;
 }
 
-static int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
+int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
 						 uint32_t *min, uint32_t *max)
 {
 	int ret = 0;
-
-	mutex_lock(&smu->mutex);
+	uint32_t mclk_mask, soc_mask;
 
 	if (max) {
+		ret = smu_get_profiling_clk_mask(smu, AMD_DPM_FORCED_LEVEL_PROFILE_PEAK,
+						 NULL,
+						 &mclk_mask,
+						 &soc_mask);
+		if (ret)
+			goto failed;
+
 		switch (clk_type) {
 		case SMU_GFXCLK:
 		case SMU_SCLK:
@@ -340,14 +398,20 @@ static int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk
 				goto failed;
 			break;
 		case SMU_UCLK:
-			ret = smu_get_dpm_uclk_limited(smu, max, true);
+		case SMU_FCLK:
+		case SMU_MCLK:
+			ret = smu_get_dpm_clk_limited(smu, clk_type, mclk_mask, max);
+			if (ret)
+				goto failed;
+			break;
+		case SMU_SOCCLK:
+			ret = smu_get_dpm_clk_limited(smu, clk_type, soc_mask, max);
 			if (ret)
 				goto failed;
 			break;
 		default:
 			ret = -EINVAL;
 			goto failed;
-
 		}
 	}
 
@@ -365,7 +429,14 @@ static int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk
 				goto failed;
 			break;
 		case SMU_UCLK:
-			ret = smu_get_dpm_uclk_limited(smu, min, false);
+		case SMU_FCLK:
+		case SMU_MCLK:
+			ret = smu_get_dpm_clk_limited(smu, clk_type, 0, min);
+			if (ret)
+				goto failed;
+			break;
+		case SMU_SOCCLK:
+			ret = smu_get_dpm_clk_limited(smu, clk_type, 0, min);
 			if (ret)
 				goto failed;
 			break;
@@ -373,40 +444,83 @@ static int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk
 			ret = -EINVAL;
 			goto failed;
 		}
-
 	}
 failed:
-	mutex_unlock(&smu->mutex);
 	return ret;
 }
 
-static const struct smu_funcs smu_v12_0_funcs = {
-	.check_fw_status = smu_v12_0_check_fw_status,
-	.check_fw_version = smu_v12_0_check_fw_version,
-	.powergate_sdma = smu_v12_0_powergate_sdma,
-	.powergate_vcn = smu_v12_0_powergate_vcn,
-	.send_smc_msg = smu_v12_0_send_msg,
-	.send_smc_msg_with_param = smu_v12_0_send_msg_with_param,
-	.read_smc_arg = smu_v12_0_read_arg,
-	.set_gfx_cgpg = smu_v12_0_set_gfx_cgpg,
-	.gfx_off_control = smu_v12_0_gfx_off_control,
-	.init_smc_tables = smu_v12_0_init_smc_tables,
-	.fini_smc_tables = smu_v12_0_fini_smc_tables,
-	.populate_smc_tables = smu_v12_0_populate_smc_tables,
-	.get_dpm_ultimate_freq = smu_v12_0_get_dpm_ultimate_freq,
-};
-
-void smu_v12_0_set_smu_funcs(struct smu_context *smu)
+int smu_v12_0_mode2_reset(struct smu_context *smu){
+	return smu_v12_0_send_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_2);
+}
+
+int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type,
+			    uint32_t min, uint32_t max)
 {
-	struct amdgpu_device *adev = smu->adev;
+	int ret = 0;
 
-	smu->funcs = &smu_v12_0_funcs;
+	if (max < min)
+		return -EINVAL;
 
-	switch (adev->asic_type) {
-	case CHIP_RENOIR:
-		renoir_set_ppt_funcs(smu);
-		break;
+	switch (clk_type) {
+	case SMU_GFXCLK:
+	case SMU_SCLK:
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk, min);
+		if (ret)
+			return ret;
+
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk, max);
+		if (ret)
+			return ret;
+	break;
+	case SMU_FCLK:
+	case SMU_MCLK:
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min);
+		if (ret)
+			return ret;
+
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxFclkByFreq, max);
+		if (ret)
+			return ret;
+	break;
+	case SMU_SOCCLK:
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinSocclkByFreq, min);
+		if (ret)
+			return ret;
+
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxSocclkByFreq, max);
+		if (ret)
+			return ret;
+	break;
+	case SMU_VCLK:
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinVcn, min);
+		if (ret)
+			return ret;
+
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxVcn, max);
+		if (ret)
+			return ret;
+	break;
 	default:
-		pr_warn("Unknown asic for smu12\n");
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+int smu_v12_0_set_driver_table_location(struct smu_context *smu)
+{
+	struct smu_table *driver_table = &smu->smu_table.driver_table;
+	int ret = 0;
+
+	if (driver_table->mc_address) {
+		ret = smu_send_smc_msg_with_param(smu,
+				SMU_MSG_SetDriverDramAddrHigh,
+				upper_32_bits(driver_table->mc_address));
+		if (!ret)
+			ret = smu_send_smc_msg_with_param(smu,
+				SMU_MSG_SetDriverDramAddrLow,
+				lower_32_bits(driver_table->mc_address));
 	}
+
+	return ret;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
index 15590fd86ef4..868e2d5f6e62 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
@@ -653,8 +653,8 @@ static int ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(struct pp_hwmgr *hwmgr)
 static int ci_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr)
 {
 	struct ci_smumgr *smu_data = (struct ci_smumgr *)(hwmgr->smu_backend);
-	uint16_t HiSidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd;
-	uint16_t LoSidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd;
+	uint16_t HiSidd;
+	uint16_t LoSidd;
 	struct phm_cac_tdp_table *cac_table = hwmgr->dyn_state.cac_dtp_table;
 
 	HiSidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index da025b1d302d..32ebb383c456 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -940,7 +940,7 @@ static int fiji_populate_single_graphic_level(struct pp_hwmgr *hwmgr,
 {
 	int result;
 	/* PP_Clocks minClocks; */
-	uint32_t threshold, mvdd;
+	uint32_t mvdd;
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
@@ -973,8 +973,6 @@ static int fiji_populate_single_graphic_level(struct pp_hwmgr *hwmgr,
 	level->VoltageDownHyst = 0;
 	level->PowerThrottle = 0;
 
-	threshold = clock * data->fast_watermark_threshold / 100;
-
 	data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr;
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep))
@@ -1501,7 +1499,7 @@ static int fiji_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr,
 	uint32_t dram_timing;
 	uint32_t dram_timing2;
 	uint32_t burstTime;
-	ULONG state, trrds, trrdl;
+	ULONG trrds, trrdl;
 	int result;
 
 	result = atomctrl_set_engine_dram_timings_rv770(hwmgr,
@@ -1513,7 +1511,6 @@ static int fiji_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr,
 	dram_timing2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2);
 	burstTime = cgs_read_register(hwmgr->device, mmMC_ARB_BURST_TIME);
 
-	state = PHM_GET_FIELD(burstTime, MC_ARB_BURST_TIME, STATE0);
 	trrds = PHM_GET_FIELD(burstTime, MC_ARB_BURST_TIME, TRRDS0);
 	trrdl = PHM_GET_FIELD(burstTime, MC_ARB_BURST_TIME, TRRDL0);
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
index 3f12cf341511..2319400a3fcb 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
@@ -137,7 +137,7 @@ static int smu10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
 			priv->smu_tables.entry[table_id].table_id);
 
 	/* flush hdp cache */
-	adev->nbio_funcs->hdp_flush(adev, NULL);
+	amdgpu_asic_flush_hdp(adev, NULL);
 
 	memcpy(table, (uint8_t *)priv->smu_tables.entry[table_id].table,
 			priv->smu_tables.entry[table_id].size);
@@ -150,6 +150,7 @@ static int smu10_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 {
 	struct smu10_smumgr *priv =
 			(struct smu10_smumgr *)(hwmgr->smu_backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE,
 			"Invalid SMU Table ID!", return -EINVAL;);
@@ -161,6 +162,8 @@ static int smu10_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 	memcpy(priv->smu_tables.entry[table_id].table, table,
 			priv->smu_tables.entry[table_id].size);
 
+	amdgpu_asic_flush_hdp(adev, NULL);
+
 	smu10_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
 			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr));
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
index 4728aa23a818..7dca04a89217 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
@@ -177,12 +177,10 @@ static int smu8_load_mec_firmware(struct pp_hwmgr *hwmgr)
 	uint32_t tmp;
 	int ret = 0;
 	struct cgs_firmware_info info = {0};
-	struct smu8_smumgr *smu8_smu;
 
 	if (hwmgr == NULL || hwmgr->device == NULL)
 		return -EINVAL;
 
-	smu8_smu = hwmgr->smu_backend;
 	ret = cgs_get_firmware_info(hwmgr->device,
 						CGS_UCODE_ID_CP_MEC, &info);
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu9_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu9_smumgr.c
index 742b3dc1f6cb..adfbcbe5d113 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu9_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu9_smumgr.c
@@ -61,15 +61,29 @@ static uint32_t smu9_wait_for_response(struct pp_hwmgr *hwmgr)
 	uint32_t reg;
 	uint32_t ret;
 
-	reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
+	/* Due to the L1 policy problem under SRIOV, we have to use
+	 * mmMP1_SMN_C2PMSG_103 as the driver response register
+	 */
+	if (hwmgr->pp_one_vf) {
+		reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_103);
 
-	ret = phm_wait_for_register_unequal(hwmgr, reg,
-			0, MP1_C2PMSG_90__CONTENT_MASK);
+		ret = phm_wait_for_register_unequal(hwmgr, reg,
+				0, MP1_C2PMSG_103__CONTENT_MASK);
 
-	if (ret)
-		pr_err("No response from smu\n");
+		if (ret)
+			pr_err("No response from smu\n");
 
-	return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
+		return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_103);
+	} else {
+		reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
+
+		ret = phm_wait_for_register_unequal(hwmgr, reg,
+				0, MP1_C2PMSG_90__CONTENT_MASK);
+
+		if (ret)
+			pr_err("No response from smu\n");
+		return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
+	}
 }
 
 /*
@@ -83,7 +97,11 @@ static int smu9_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr,
 {
 	struct amdgpu_device *adev = hwmgr->adev;
 
-	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
+	if (hwmgr->pp_one_vf) {
+		WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_101, msg);
+	} else {
+		WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
+	}
 
 	return 0;
 }
@@ -101,7 +119,10 @@ int smu9_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
 
 	smu9_wait_for_response(hwmgr);
 
-	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
+	if (hwmgr->pp_one_vf)
+		WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_103, 0);
+	else
+		WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
 
 	smu9_send_msg_to_smc_without_waiting(hwmgr, msg);
 
@@ -127,9 +148,17 @@ int smu9_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
 
 	smu9_wait_for_response(hwmgr);
 
-	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
-
-	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter);
+	/* Due to the L1 policy problem under SRIOV, we have to use
+	 * mmMP1_SMN_C2PMSG_101 as the driver message register and
+	 * mmMP1_SMN_C2PMSG_102 as the driver parameter register.
+	 */
+	if (hwmgr->pp_one_vf) {
+		WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_103, 0);
+		WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_102, parameter);
+	} else {
+		WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
+		WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter);
+	}
 
 	smu9_send_msg_to_smc_without_waiting(hwmgr, msg);
 
@@ -144,5 +173,8 @@ uint32_t smu9_get_argument(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = hwmgr->adev;
 
-	return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
+	if (hwmgr->pp_one_vf)
+		return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_102);
+	else
+		return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 0dbdde69f2d9..715564009089 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -58,7 +58,7 @@ static int vega10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
 			priv->smu_tables.entry[table_id].table_id);
 
 	/* flush hdp cache */
-	adev->nbio_funcs->hdp_flush(adev, NULL);
+	amdgpu_asic_flush_hdp(adev, NULL);
 
 	memcpy(table, priv->smu_tables.entry[table_id].table,
 			priv->smu_tables.entry[table_id].size);
@@ -70,6 +70,13 @@ static int vega10_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 		uint8_t *table, int16_t table_id)
 {
 	struct vega10_smumgr *priv = hwmgr->smu_backend;
+	struct amdgpu_device *adev = hwmgr->adev;
+
+	/* under sriov, vbios or hypervisor driver
+	 * has already copy table to smc so here only skip it
+	 */
+	if (!hwmgr->not_vf)
+		return 0;
 
 	PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE,
 			"Invalid SMU Table ID!", return -EINVAL);
@@ -81,6 +88,8 @@ static int vega10_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 	memcpy(priv->smu_tables.entry[table_id].table, table,
 			priv->smu_tables.entry[table_id].size);
 
+	amdgpu_asic_flush_hdp(adev, NULL);
+
 	smu9_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
 			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr));
@@ -100,6 +109,14 @@ int vega10_enable_smc_features(struct pp_hwmgr *hwmgr,
 	int msg = enable ? PPSMC_MSG_EnableSmuFeatures :
 			PPSMC_MSG_DisableSmuFeatures;
 
+	/* VF has no permission to change smu feature due
+	 * to security concern even under pp one vf mode
+	 * it still can't do it. For vega10, the smu in
+	 * vbios will enable the appropriate features.
+	 * */
+	if (!hwmgr->not_vf)
+		return 0;
+
 	return smum_send_msg_to_smc_with_parameter(hwmgr,
 			msg, feature_mask);
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
index f9589806bf83..a3915bfcce81 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
@@ -66,7 +66,7 @@ static int vega12_copy_table_from_smc(struct pp_hwmgr *hwmgr,
 			return -EINVAL);
 
 	/* flush hdp cache */
-	adev->nbio_funcs->hdp_flush(adev, NULL);
+	amdgpu_asic_flush_hdp(adev, NULL);
 
 	memcpy(table, priv->smu_tables.entry[table_id].table,
 			priv->smu_tables.entry[table_id].size);
@@ -84,6 +84,7 @@ static int vega12_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 {
 	struct vega12_smumgr *priv =
 			(struct vega12_smumgr *)(hwmgr->smu_backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	PP_ASSERT_WITH_CODE(table_id < TABLE_COUNT,
 			"Invalid SMU Table ID!", return -EINVAL);
@@ -95,6 +96,8 @@ static int vega12_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 	memcpy(priv->smu_tables.entry[table_id].table, table,
 			priv->smu_tables.entry[table_id].size);
 
+	amdgpu_asic_flush_hdp(adev, NULL);
+
 	PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
 			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr)) == 0,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
index b9089c6bea85..0db57fb83d30 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
@@ -189,7 +189,7 @@ static int vega20_copy_table_from_smc(struct pp_hwmgr *hwmgr,
 			return ret);
 
 	/* flush hdp cache */
-	adev->nbio_funcs->hdp_flush(adev, NULL);
+	amdgpu_asic_flush_hdp(adev, NULL);
 
 	memcpy(table, priv->smu_tables.entry[table_id].table,
 			priv->smu_tables.entry[table_id].size);
@@ -207,6 +207,7 @@ static int vega20_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 {
 	struct vega20_smumgr *priv =
 			(struct vega20_smumgr *)(hwmgr->smu_backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 	int ret = 0;
 
 	PP_ASSERT_WITH_CODE(table_id < TABLE_COUNT,
@@ -219,6 +220,8 @@ static int vega20_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 	memcpy(priv->smu_tables.entry[table_id].table, table,
 			priv->smu_tables.entry[table_id].size);
 
+	amdgpu_asic_flush_hdp(adev, NULL);
+
 	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
 			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr))) == 0,
@@ -242,11 +245,14 @@ int vega20_set_activity_monitor_coeff(struct pp_hwmgr *hwmgr,
 {
 	struct vega20_smumgr *priv =
 			(struct vega20_smumgr *)(hwmgr->smu_backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 	int ret = 0;
 
 	memcpy(priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].table, table,
 			priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].size);
 
+	amdgpu_asic_flush_hdp(adev, NULL);
+
 	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
 			upper_32_bits(priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].mc_addr))) == 0,
@@ -290,7 +296,7 @@ int vega20_get_activity_monitor_coeff(struct pp_hwmgr *hwmgr,
 			return ret);
 
 	/* flush hdp cache */
-	adev->nbio_funcs->hdp_flush(adev, NULL);
+	amdgpu_asic_flush_hdp(adev, NULL);
 
 	memcpy(table, priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].table,
 			priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].size);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
index ae18fbcb26fb..b0e0d67cd54b 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
@@ -1114,7 +1114,6 @@ static int vegam_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	SMIO_Pattern vol_level;
 	uint32_t mvdd;
-	uint16_t us_mvdd;
 
 	table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC;
 
@@ -1168,17 +1167,6 @@ static int vegam_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 			"in Clock Dependency Table",
 			);
 
-	us_mvdd = 0;
-	if ((SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) ||
-			(data->mclk_dpm_key_disabled))
-		us_mvdd = data->vbios_boot_state.mvdd_bootup_value;
-	else {
-		if (!vegam_populate_mvdd_value(hwmgr,
-				data->dpm_table.mclk_table.dpm_levels[0].value,
-				&vol_level))
-			us_mvdd = vol_level.Voltage;
-	}
-
 	if (!vegam_populate_mvdd_value(hwmgr, 0, &vol_level))
 		table->MemoryACPILevel.MinMvdd = PP_HOST_TO_SMC_UL(vol_level.Voltage);
 	else
@@ -1383,11 +1371,16 @@ static int vegam_populate_smc_boot_level(struct pp_hwmgr *hwmgr,
 	result = phm_find_boot_level(&(data->dpm_table.sclk_table),
 			data->vbios_boot_state.sclk_bootup_value,
 			(uint32_t *)&(table->GraphicsBootLevel));
+	if (result)
+		return result;
 
 	result = phm_find_boot_level(&(data->dpm_table.mclk_table),
 			data->vbios_boot_state.mclk_bootup_value,
 			(uint32_t *)&(table->MemoryBootLevel));
 
+	if (result)
+		return result;
+
 	table->BootVddc  = data->vbios_boot_state.vddc_bootup_value *
 			VOLTAGE_SCALE;
 	table->BootVddci = data->vbios_boot_state.vddci_bootup_value *
@@ -1493,7 +1486,7 @@ static int vegam_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 	struct vegam_smumgr *smu_data =
 			(struct vegam_smumgr *)(hwmgr->smu_backend);
 
-	uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0;
+	uint8_t i, stretch_amount, volt_offset = 0;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
@@ -1532,11 +1525,9 @@ static int vegam_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 			(table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ?
 			table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 5;
 	/* Populate CKS Lookup Table */
-	if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5)
-		stretch_amount2 = 0;
-	else if (stretch_amount == 3 || stretch_amount == 4)
-		stretch_amount2 = 1;
-	else {
+	if (!(stretch_amount == 1 || stretch_amount == 2 ||
+	      stretch_amount == 5 || stretch_amount == 3 ||
+	      stretch_amount == 4)) {
 		phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_ClockStretcher);
 		PP_ASSERT_WITH_CODE(false,
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index 92c393f613d3..38febd5ca4da 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -25,6 +25,7 @@
 #include <linux/firmware.h>
 #include "amdgpu.h"
 #include "amdgpu_smu.h"
+#include "smu_internal.h"
 #include "atomfirmware.h"
 #include "amdgpu_atomfirmware.h"
 #include "smu_v11_0.h"
@@ -143,6 +144,7 @@ static struct smu_11_0_cmn2aisc_mapping vega20_message_map[SMU_MSG_MAX_COUNT] =
 	MSG_MAP(PrepareMp1ForShutdown),
 	MSG_MAP(SetMGpuFanBoostLimitRpm),
 	MSG_MAP(GetAVFSVoltageByDpm),
+	MSG_MAP(DFCstateControl),
 };
 
 static struct smu_11_0_cmn2aisc_mapping vega20_clk_map[SMU_CLK_COUNT] = {
@@ -336,6 +338,10 @@ static int vega20_tables_init(struct smu_context *smu, struct smu_table *tables)
 		return -ENOMEM;
 	smu_table->metrics_time = 0;
 
+	smu_table->watermarks_table = kzalloc(sizeof(Watermarks_t), GFP_KERNEL);
+	if (!smu_table->watermarks_table)
+		return -ENOMEM;
+
 	return 0;
 }
 
@@ -464,7 +470,6 @@ static int vega20_store_powerplay_table(struct smu_context *smu)
 	       sizeof(PPTable_t));
 
 	table_context->thermal_controller_type = powerplay_table->ucThermalControllerType;
-	table_context->TDPODLimit = le32_to_cpu(powerplay_table->OverDrive8Table.ODSettingsMax[ATOM_VEGA20_ODSETTING_POWERPERCENTAGE]);
 
 	return 0;
 }
@@ -634,7 +639,6 @@ amd_pm_state_type vega20_get_current_power_state(struct smu_context *smu)
 	    !smu_dpm_ctx->dpm_current_power_state)
 		return -EINVAL;
 
-	mutex_lock(&(smu->mutex));
 	switch (smu_dpm_ctx->dpm_current_power_state->classification.ui_label) {
 	case SMU_STATE_UI_LABEL_BATTERY:
 		pm_type = POWER_STATE_TYPE_BATTERY;
@@ -652,7 +656,6 @@ amd_pm_state_type vega20_get_current_power_state(struct smu_context *smu)
 			pm_type = POWER_STATE_TYPE_DEFAULT;
 		break;
 	}
-	mutex_unlock(&(smu->mutex));
 
 	return pm_type;
 }
@@ -1274,16 +1277,8 @@ static int vega20_force_clk_levels(struct smu_context *smu,
 	struct vega20_dpm_table *dpm_table;
 	struct vega20_single_dpm_table *single_dpm_table;
 	uint32_t soft_min_level, soft_max_level, hard_min_level;
-	struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
 	int ret = 0;
 
-	if (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) {
-		pr_info("force clock level is for dpm manual mode only.\n");
-		return -EINVAL;
-	}
-
-	mutex_lock(&(smu->mutex));
-
 	soft_min_level = mask ? (ffs(mask) - 1) : 0;
 	soft_max_level = mask ? (fls(mask) - 1) : 0;
 
@@ -1436,7 +1431,6 @@ static int vega20_force_clk_levels(struct smu_context *smu,
 		break;
 	}
 
-	mutex_unlock(&(smu->mutex));
 	return ret;
 }
 
@@ -1451,8 +1445,6 @@ static int vega20_get_clock_by_type_with_latency(struct smu_context *smu,
 
 	dpm_table = smu_dpm->dpm_context;
 
-	mutex_lock(&smu->mutex);
-
 	switch (clk_type) {
 	case SMU_GFXCLK:
 		single_dpm_table = &(dpm_table->gfx_table);
@@ -1474,7 +1466,6 @@ static int vega20_get_clock_by_type_with_latency(struct smu_context *smu,
 		ret = -EINVAL;
 	}
 
-	mutex_unlock(&smu->mutex);
 	return ret;
 }
 
@@ -1691,17 +1682,20 @@ static int vega20_get_metrics_table(struct smu_context *smu,
 	struct smu_table_context *smu_table= &smu->smu_table;
 	int ret = 0;
 
+	mutex_lock(&smu->metrics_lock);
 	if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + HZ / 1000)) {
 		ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
 				(void *)smu_table->metrics_table, false);
 		if (ret) {
 			pr_info("Failed to export SMU metrics table!\n");
+			mutex_unlock(&smu->metrics_lock);
 			return ret;
 		}
 		smu_table->metrics_time = jiffies;
 	}
 
 	memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
+	mutex_unlock(&smu->metrics_lock);
 
 	return ret;
 }
@@ -2245,7 +2239,7 @@ static int vega20_apply_clocks_adjust_rules(struct smu_context *smu)
 }
 
 static int
-vega20_notify_smc_dispaly_config(struct smu_context *smu)
+vega20_notify_smc_display_config(struct smu_context *smu)
 {
 	struct vega20_dpm_table *dpm_table = smu->smu_dpm.dpm_context;
 	struct vega20_single_dpm_table *memtable = &dpm_table->mem_table;
@@ -2260,7 +2254,7 @@ vega20_notify_smc_dispaly_config(struct smu_context *smu)
 	if (smu_feature_is_supported(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) {
 		clock_req.clock_type = amd_pp_dcef_clock;
 		clock_req.clock_freq_in_khz = min_clocks.dcef_clock * 10;
-		if (!smu->funcs->display_clock_voltage_request(smu, &clock_req)) {
+		if (!smu_v11_0_display_clock_voltage_request(smu, &clock_req)) {
 			if (smu_feature_is_supported(smu, SMU_FEATURE_DS_DCEFCLK_BIT)) {
 				ret = smu_send_smc_msg_with_param(smu,
 								  SMU_MSG_SetMinDeepSleepDcefclk,
@@ -2547,8 +2541,6 @@ static int vega20_set_od_percentage(struct smu_context *smu,
 	int feature_enabled;
 	PPCLK_e clk_id;
 
-	mutex_lock(&(smu->mutex));
-
 	dpm_table = smu_dpm->dpm_context;
 	golden_table = smu_dpm->golden_dpm_context;
 
@@ -2598,11 +2590,10 @@ static int vega20_set_od_percentage(struct smu_context *smu,
 	}
 
 	ret = smu_handle_task(smu, smu_dpm->dpm_level,
-			      AMD_PP_TASK_READJUST_POWER_STATE);
+			      AMD_PP_TASK_READJUST_POWER_STATE,
+			      false);
 
 set_od_failed:
-	mutex_unlock(&(smu->mutex));
-
 	return ret;
 }
 
@@ -2827,10 +2818,9 @@ static int vega20_odn_edit_dpm_table(struct smu_context *smu,
 	}
 
 	if (type == PP_OD_COMMIT_DPM_TABLE) {
-		mutex_lock(&(smu->mutex));
 		ret = smu_handle_task(smu, smu_dpm->dpm_level,
-				      AMD_PP_TASK_READJUST_POWER_STATE);
-		mutex_unlock(&(smu->mutex));
+				      AMD_PP_TASK_READJUST_POWER_STATE,
+				      false);
 	}
 
 	return ret;
@@ -3047,7 +3037,7 @@ static int vega20_read_sensor(struct smu_context *smu,
 		*size = 4;
 		break;
 	default:
-		ret = smu_smc_read_sensor(smu, sensor, data, size);
+		ret = smu_v11_0_read_sensor(smu, sensor, data, size);
 	}
 	mutex_unlock(&smu->sensor_lock);
 
@@ -3141,6 +3131,49 @@ static int vega20_get_thermal_temperature_range(struct smu_context *smu,
 	return 0;
 }
 
+static int vega20_set_df_cstate(struct smu_context *smu,
+				enum pp_df_cstate state)
+{
+	uint32_t smu_version;
+	int ret;
+
+	ret = smu_get_smc_version(smu, NULL, &smu_version);
+	if (ret) {
+		pr_err("Failed to get smu version!\n");
+		return ret;
+	}
+
+	/* PPSMC_MSG_DFCstateControl is supported with 40.50 and later fws */
+	if (smu_version < 0x283200) {
+		pr_err("Df cstate control is supported with 40.50 and later SMC fw!\n");
+		return -EINVAL;
+	}
+
+	return smu_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl, state);
+}
+
+static int vega20_update_pcie_parameters(struct smu_context *smu,
+				     uint32_t pcie_gen_cap,
+				     uint32_t pcie_width_cap)
+{
+	PPTable_t *pptable = smu->smu_table.driver_pptable;
+	int ret, i;
+	uint32_t smu_pcie_arg;
+
+	for (i = 0; i < NUM_LINK_LEVELS; i++) {
+		smu_pcie_arg = (i << 16) |
+			((pptable->PcieGenSpeed[i] <= pcie_gen_cap) ? (pptable->PcieGenSpeed[i] << 8) :
+				(pcie_gen_cap << 8)) | ((pptable->PcieLaneCount[i] <= pcie_width_cap) ?
+					pptable->PcieLaneCount[i] : pcie_width_cap);
+		ret = smu_send_smc_msg_with_param(smu,
+					  SMU_MSG_OverridePcieParameters,
+					  smu_pcie_arg);
+	}
+
+	return ret;
+}
+
+
 static const struct pptable_funcs vega20_ppt_funcs = {
 	.tables_init = vega20_tables_init,
 	.alloc_dpm_context = vega20_allocate_dpm_context,
@@ -3153,7 +3186,7 @@ static const struct pptable_funcs vega20_ppt_funcs = {
 	.get_smu_table_index = vega20_get_smu_table_index,
 	.get_smu_power_index = vega20_get_pwr_src_index,
 	.get_workload_type = vega20_get_workload_type,
-	.run_afll_btc = vega20_run_btc_afll,
+	.run_btc = vega20_run_btc_afll,
 	.get_allowed_feature_mask = vega20_get_allowed_feature_mask,
 	.get_current_power_state = vega20_get_current_power_state,
 	.set_default_dpm_table = vega20_set_default_dpm_table,
@@ -3165,6 +3198,7 @@ static const struct pptable_funcs vega20_ppt_funcs = {
 	.get_od_percentage = vega20_get_od_percentage,
 	.get_power_profile_mode = vega20_get_power_profile_mode,
 	.set_power_profile_mode = vega20_set_power_profile_mode,
+	.set_performance_level = smu_v11_0_set_performance_level,
 	.set_od_percentage = vega20_set_od_percentage,
 	.set_default_od_settings = vega20_set_default_od_settings,
 	.od_edit_dpm_table = vega20_odn_edit_dpm_table,
@@ -3174,7 +3208,7 @@ static const struct pptable_funcs vega20_ppt_funcs = {
 	.pre_display_config_changed = vega20_pre_display_config_changed,
 	.display_config_changed = vega20_display_config_changed,
 	.apply_clocks_adjust_rules = vega20_apply_clocks_adjust_rules,
-	.notify_smc_dispaly_config = vega20_notify_smc_dispaly_config,
+	.notify_smc_display_config = vega20_notify_smc_display_config,
 	.force_dpm_limit_value = vega20_force_dpm_limit_value,
 	.unforce_dpm_levels = vega20_unforce_dpm_levels,
 	.get_profiling_clk_mask = vega20_get_profiling_clk_mask,
@@ -3183,13 +3217,62 @@ static const struct pptable_funcs vega20_ppt_funcs = {
 	.get_fan_speed_percent = vega20_get_fan_speed_percent,
 	.get_fan_speed_rpm = vega20_get_fan_speed_rpm,
 	.set_watermarks_table = vega20_set_watermarks_table,
-	.get_thermal_temperature_range = vega20_get_thermal_temperature_range
+	.get_thermal_temperature_range = vega20_get_thermal_temperature_range,
+	.set_df_cstate = vega20_set_df_cstate,
+	.update_pcie_parameters = vega20_update_pcie_parameters,
+	.init_microcode = smu_v11_0_init_microcode,
+	.load_microcode = smu_v11_0_load_microcode,
+	.init_smc_tables = smu_v11_0_init_smc_tables,
+	.fini_smc_tables = smu_v11_0_fini_smc_tables,
+	.init_power = smu_v11_0_init_power,
+	.fini_power = smu_v11_0_fini_power,
+	.check_fw_status = smu_v11_0_check_fw_status,
+	.setup_pptable = smu_v11_0_setup_pptable,
+	.get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values,
+	.get_clk_info_from_vbios = smu_v11_0_get_clk_info_from_vbios,
+	.check_pptable = smu_v11_0_check_pptable,
+	.parse_pptable = smu_v11_0_parse_pptable,
+	.populate_smc_tables = smu_v11_0_populate_smc_pptable,
+	.check_fw_version = smu_v11_0_check_fw_version,
+	.write_pptable = smu_v11_0_write_pptable,
+	.set_min_dcef_deep_sleep = smu_v11_0_set_min_dcef_deep_sleep,
+	.set_driver_table_location = smu_v11_0_set_driver_table_location,
+	.set_tool_table_location = smu_v11_0_set_tool_table_location,
+	.notify_memory_pool_location = smu_v11_0_notify_memory_pool_location,
+	.system_features_control = smu_v11_0_system_features_control,
+	.send_smc_msg_with_param = smu_v11_0_send_msg_with_param,
+	.read_smc_arg = smu_v11_0_read_arg,
+	.init_display_count = smu_v11_0_init_display_count,
+	.set_allowed_mask = smu_v11_0_set_allowed_mask,
+	.get_enabled_mask = smu_v11_0_get_enabled_mask,
+	.notify_display_change = smu_v11_0_notify_display_change,
+	.set_power_limit = smu_v11_0_set_power_limit,
+	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
+	.init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
+	.start_thermal_control = smu_v11_0_start_thermal_control,
+	.stop_thermal_control = smu_v11_0_stop_thermal_control,
+	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
+	.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
+	.get_fan_control_mode = smu_v11_0_get_fan_control_mode,
+	.set_fan_control_mode = smu_v11_0_set_fan_control_mode,
+	.set_fan_speed_percent = smu_v11_0_set_fan_speed_percent,
+	.set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm,
+	.set_xgmi_pstate = smu_v11_0_set_xgmi_pstate,
+	.gfx_off_control = smu_v11_0_gfx_off_control,
+	.register_irq_handler = smu_v11_0_register_irq_handler,
+	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
+	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
+	.baco_is_support= smu_v11_0_baco_is_support,
+	.baco_get_state = smu_v11_0_baco_get_state,
+	.baco_set_state = smu_v11_0_baco_set_state,
+	.baco_enter = smu_v11_0_baco_enter,
+	.baco_exit = smu_v11_0_baco_exit,
+	.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
+	.set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range,
+	.override_pcie_parameters = smu_v11_0_override_pcie_parameters,
 };
 
 void vega20_set_ppt_funcs(struct smu_context *smu)
 {
-	struct smu_table_context *smu_table = &smu->smu_table;
-
 	smu->ppt_funcs = &vega20_ppt_funcs;
-	smu_table->table_count = TABLE_COUNT;
 }
diff --git a/drivers/gpu/drm/arc/arcpgu_crtc.c b/drivers/gpu/drm/arc/arcpgu_crtc.c
index dfaddbb7da0d..8ae1e1f97a73 100644
--- a/drivers/gpu/drm/arc/arcpgu_crtc.c
+++ b/drivers/gpu/drm/arc/arcpgu_crtc.c
@@ -20,9 +20,10 @@
 
 #define ENCODE_PGU_XY(x, y)	((((x) - 1) << 16) | ((y) - 1))
 
-static struct simplefb_format supported_formats[] = {
-	{ "r5g6b5", 16, {11, 5}, {5, 6}, {0, 5}, {0, 0}, DRM_FORMAT_RGB565 },
-	{ "r8g8b8", 24, {16, 8}, {8, 8}, {0, 8}, {0, 0}, DRM_FORMAT_RGB888 },
+static const u32 arc_pgu_supported_formats[] = {
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
 };
 
 static void arc_pgu_set_pxl_fmt(struct drm_crtc *crtc)
@@ -30,22 +31,24 @@ static void arc_pgu_set_pxl_fmt(struct drm_crtc *crtc)
 	struct arcpgu_drm_private *arcpgu = crtc_to_arcpgu_priv(crtc);
 	const struct drm_framebuffer *fb = crtc->primary->state->fb;
 	uint32_t pixel_format = fb->format->format;
-	struct simplefb_format *format = NULL;
+	u32 format = DRM_FORMAT_INVALID;
 	int i;
+	u32 reg_ctrl;
 
-	for (i = 0; i < ARRAY_SIZE(supported_formats); i++) {
-		if (supported_formats[i].fourcc == pixel_format)
-			format = &supported_formats[i];
+	for (i = 0; i < ARRAY_SIZE(arc_pgu_supported_formats); i++) {
+		if (arc_pgu_supported_formats[i] == pixel_format)
+			format = arc_pgu_supported_formats[i];
 	}
 
-	if (WARN_ON(!format))
+	if (WARN_ON(format == DRM_FORMAT_INVALID))
 		return;
 
-	if (format->fourcc == DRM_FORMAT_RGB888)
-		arc_pgu_write(arcpgu, ARCPGU_REG_CTRL,
-			      arc_pgu_read(arcpgu, ARCPGU_REG_CTRL) |
-					   ARCPGU_MODE_RGB888_MASK);
-
+	reg_ctrl = arc_pgu_read(arcpgu, ARCPGU_REG_CTRL);
+	if (format == DRM_FORMAT_RGB565)
+		reg_ctrl &= ~ARCPGU_MODE_XRGB8888;
+	else
+		reg_ctrl |= ARCPGU_MODE_XRGB8888;
+	arc_pgu_write(arcpgu, ARCPGU_REG_CTRL, reg_ctrl);
 }
 
 static const struct drm_crtc_funcs arc_pgu_crtc_funcs = {
@@ -193,18 +196,15 @@ static struct drm_plane *arc_pgu_plane_init(struct drm_device *drm)
 {
 	struct arcpgu_drm_private *arcpgu = drm->dev_private;
 	struct drm_plane *plane = NULL;
-	u32 formats[ARRAY_SIZE(supported_formats)], i;
 	int ret;
 
 	plane = devm_kzalloc(drm->dev, sizeof(*plane), GFP_KERNEL);
 	if (!plane)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(supported_formats); i++)
-		formats[i] = supported_formats[i].fourcc;
-
 	ret = drm_universal_plane_init(drm, plane, 0xff, &arc_pgu_plane_funcs,
-				       formats, ARRAY_SIZE(formats),
+				       arc_pgu_supported_formats,
+				       ARRAY_SIZE(arc_pgu_supported_formats),
 				       NULL,
 				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret)
diff --git a/drivers/gpu/drm/arc/arcpgu_drv.c b/drivers/gpu/drm/arc/arcpgu_drv.c
index 6b7f791685ec..d6a6692db0ac 100644
--- a/drivers/gpu/drm/arc/arcpgu_drv.c
+++ b/drivers/gpu/drm/arc/arcpgu_drv.c
@@ -14,6 +14,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_of.h>
 #include <drm/drm_probe_helper.h>
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
@@ -45,7 +46,7 @@ static int arcpgu_load(struct drm_device *drm)
 {
 	struct platform_device *pdev = to_platform_device(drm->dev);
 	struct arcpgu_drm_private *arcpgu;
-	struct device_node *encoder_node;
+	struct device_node *encoder_node = NULL, *endpoint_node = NULL;
 	struct resource *res;
 	int ret;
 
@@ -80,14 +81,23 @@ static int arcpgu_load(struct drm_device *drm)
 	if (arc_pgu_setup_crtc(drm) < 0)
 		return -ENODEV;
 
-	/* find the encoder node and initialize it */
-	encoder_node = of_parse_phandle(drm->dev->of_node, "encoder-slave", 0);
+	/*
+	 * There is only one output port inside each device. It is linked with
+	 * encoder endpoint.
+	 */
+	endpoint_node = of_graph_get_next_endpoint(pdev->dev.of_node, NULL);
+	if (endpoint_node) {
+		encoder_node = of_graph_get_remote_port_parent(endpoint_node);
+		of_node_put(endpoint_node);
+	}
+
 	if (encoder_node) {
 		ret = arcpgu_drm_hdmi_init(drm, encoder_node);
 		of_node_put(encoder_node);
 		if (ret < 0)
 			return ret;
 	} else {
+		dev_info(drm->dev, "no encoder found. Assumed virtual LCD on simulation platform\n");
 		ret = arcpgu_drm_sim_init(drm, NULL);
 		if (ret < 0)
 			return ret;
diff --git a/drivers/gpu/drm/arc/arcpgu_hdmi.c b/drivers/gpu/drm/arc/arcpgu_hdmi.c
index 98aac743cc26..8fd7094beece 100644
--- a/drivers/gpu/drm/arc/arcpgu_hdmi.c
+++ b/drivers/gpu/drm/arc/arcpgu_hdmi.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2016 Synopsys, Inc. (www.synopsys.com)
  */
 
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_device.h>
diff --git a/drivers/gpu/drm/arc/arcpgu_regs.h b/drivers/gpu/drm/arc/arcpgu_regs.h
index dab2c380f7f3..b689a382d556 100644
--- a/drivers/gpu/drm/arc/arcpgu_regs.h
+++ b/drivers/gpu/drm/arc/arcpgu_regs.h
@@ -25,7 +25,7 @@
 #define ARCPGU_CTRL_VS_POL_OFST	0x3
 #define ARCPGU_CTRL_HS_POL_MASK	0x1
 #define ARCPGU_CTRL_HS_POL_OFST	0x4
-#define ARCPGU_MODE_RGB888_MASK	0x04
+#define ARCPGU_MODE_XRGB8888	BIT(2)
 #define ARCPGU_STAT_BUSY_MASK	0x02
 
 #endif
diff --git a/drivers/gpu/drm/arm/display/include/malidp_product.h b/drivers/gpu/drm/arm/display/include/malidp_product.h
index 1053b11352eb..16a8a2c22c42 100644
--- a/drivers/gpu/drm/arm/display/include/malidp_product.h
+++ b/drivers/gpu/drm/arm/display/include/malidp_product.h
@@ -18,7 +18,8 @@
 #define MALIDP_CORE_ID_STATUS(__core_id)     (((__u32)(__core_id)) & 0xFF)
 
 /* Mali-display product IDs */
-#define MALIDP_D71_PRODUCT_ID   0x0071
+#define MALIDP_D71_PRODUCT_ID	0x0071
+#define MALIDP_D32_PRODUCT_ID	0x0032
 
 union komeda_config_id {
 	struct {
diff --git a/drivers/gpu/drm/arm/display/komeda/Makefile b/drivers/gpu/drm/arm/display/komeda/Makefile
index 5c3900c2e764..1931a7fa1a14 100644
--- a/drivers/gpu/drm/arm/display/komeda/Makefile
+++ b/drivers/gpu/drm/arm/display/komeda/Makefile
@@ -16,7 +16,8 @@ komeda-y := \
 	komeda_crtc.o \
 	komeda_plane.o \
 	komeda_wb_connector.o \
-	komeda_private_obj.o
+	komeda_private_obj.o \
+	komeda_event.o
 
 komeda-y += \
 	d71/d71_dev.o \
diff --git a/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c b/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c
index 55a8cc94808a..8a02ade369db 100644
--- a/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c
+++ b/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c
@@ -106,6 +106,23 @@ static void dump_block_header(struct seq_file *sf, void __iomem *reg)
 			   i, hdr.output_ids[i]);
 }
 
+/* On D71, we are using the global line size. From D32, every component have
+ * a line size register to indicate the fifo size.
+ */
+static u32 __get_blk_line_size(struct d71_dev *d71, u32 __iomem *reg,
+			       u32 max_default)
+{
+	if (!d71->periph_addr)
+		max_default = malidp_read32(reg, BLK_MAX_LINE_SIZE);
+
+	return max_default;
+}
+
+static u32 get_blk_line_size(struct d71_dev *d71, u32 __iomem *reg)
+{
+	return __get_blk_line_size(d71, reg, d71->max_line_size);
+}
+
 static u32 to_rot_ctrl(u32 rot)
 {
 	u32 lr_ctrl = 0;
@@ -332,7 +349,56 @@ static void d71_layer_dump(struct komeda_component *c, struct seq_file *sf)
 	seq_printf(sf, "%sAD_V_CROP:\t\t0x%X\n", prefix, v[2]);
 }
 
+static int d71_layer_validate(struct komeda_component *c,
+			      struct komeda_component_state *state)
+{
+	struct komeda_layer_state *st = to_layer_st(state);
+	struct komeda_layer *layer = to_layer(c);
+	struct drm_plane_state *plane_st;
+	struct drm_framebuffer *fb;
+	u32 fourcc, line_sz, max_line_sz;
+
+	plane_st = drm_atomic_get_new_plane_state(state->obj.state,
+						  state->plane);
+	fb = plane_st->fb;
+	fourcc = fb->format->format;
+
+	if (drm_rotation_90_or_270(st->rot))
+		line_sz = st->vsize - st->afbc_crop_t - st->afbc_crop_b;
+	else
+		line_sz = st->hsize - st->afbc_crop_l - st->afbc_crop_r;
+
+	if (fb->modifier) {
+		if ((fb->modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) ==
+			AFBC_FORMAT_MOD_BLOCK_SIZE_32x8)
+			max_line_sz = layer->line_sz;
+		else
+			max_line_sz = layer->line_sz / 2;
+
+		if (line_sz > max_line_sz) {
+			DRM_DEBUG_ATOMIC("afbc request line_sz: %d exceed the max afbc line_sz: %d.\n",
+					 line_sz, max_line_sz);
+			return -EINVAL;
+		}
+	}
+
+	if (fourcc == DRM_FORMAT_YUV420_10BIT && line_sz > 2046 && (st->afbc_crop_l % 4)) {
+		DRM_DEBUG_ATOMIC("YUV420_10BIT input_hsize: %d exceed the max size 2046.\n",
+				 line_sz);
+		return -EINVAL;
+	}
+
+	if (fourcc == DRM_FORMAT_X0L2 && line_sz > 2046 && (st->addr[0] % 16)) {
+		DRM_DEBUG_ATOMIC("X0L2 input_hsize: %d exceed the max size 2046.\n",
+				 line_sz);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct komeda_component_funcs d71_layer_funcs = {
+	.validate	= d71_layer_validate,
 	.update		= d71_layer_update,
 	.disable	= d71_layer_disable,
 	.dump_register	= d71_layer_dump,
@@ -365,7 +431,28 @@ static int d71_layer_init(struct d71_dev *d71,
 	else
 		layer->layer_type = KOMEDA_FMT_SIMPLE_LAYER;
 
-	set_range(&layer->hsize_in, 4, d71->max_line_size);
+	if (!d71->periph_addr) {
+		/* D32 or newer product */
+		layer->line_sz = malidp_read32(reg, BLK_MAX_LINE_SIZE);
+		layer->yuv_line_sz = L_INFO_YUV_MAX_LINESZ(layer_info);
+	} else if (d71->max_line_size > 2048) {
+		/* D71 4K */
+		layer->line_sz = d71->max_line_size;
+		layer->yuv_line_sz = layer->line_sz / 2;
+	} else	{
+		/* D71 2K */
+		if (layer->layer_type == KOMEDA_FMT_RICH_LAYER) {
+			/* rich layer is 4K configuration */
+			layer->line_sz = d71->max_line_size * 2;
+			layer->yuv_line_sz = layer->line_sz / 2;
+		} else {
+			layer->line_sz = d71->max_line_size;
+			layer->yuv_line_sz = 0;
+		}
+	}
+
+	set_range(&layer->hsize_in, 4, layer->line_sz);
+
 	set_range(&layer->vsize_in, 4, d71->max_vsize);
 
 	malidp_write32(reg, LAYER_PALPHA, D71_PALPHA_DEF_MAP);
@@ -456,9 +543,11 @@ static int d71_wb_layer_init(struct d71_dev *d71,
 
 	wb_layer = to_layer(c);
 	wb_layer->layer_type = KOMEDA_FMT_WB_LAYER;
+	wb_layer->line_sz = get_blk_line_size(d71, reg);
+	wb_layer->yuv_line_sz = wb_layer->line_sz;
 
-	set_range(&wb_layer->hsize_in, D71_MIN_LINE_SIZE, d71->max_line_size);
-	set_range(&wb_layer->vsize_in, D71_MIN_VERTICAL_SIZE, d71->max_vsize);
+	set_range(&wb_layer->hsize_in, 64, wb_layer->line_sz);
+	set_range(&wb_layer->vsize_in, 64, d71->max_vsize);
 
 	return 0;
 }
@@ -595,8 +684,8 @@ static int d71_compiz_init(struct d71_dev *d71,
 
 	compiz = to_compiz(c);
 
-	set_range(&compiz->hsize, D71_MIN_LINE_SIZE, d71->max_line_size);
-	set_range(&compiz->vsize, D71_MIN_VERTICAL_SIZE, d71->max_vsize);
+	set_range(&compiz->hsize, 64, get_blk_line_size(d71, reg));
+	set_range(&compiz->vsize, 64, d71->max_vsize);
 
 	return 0;
 }
@@ -703,7 +792,7 @@ static void d71_scaler_update(struct komeda_component *c,
 
 static void d71_scaler_dump(struct komeda_component *c, struct seq_file *sf)
 {
-	u32 v[9];
+	u32 v[10];
 
 	dump_block_header(sf, c->reg);
 
@@ -723,6 +812,18 @@ static void d71_scaler_dump(struct komeda_component *c, struct seq_file *sf)
 	seq_printf(sf, "SC_H_DELTA_PH:\t\t0x%X\n", v[6]);
 	seq_printf(sf, "SC_V_INIT_PH:\t\t0x%X\n", v[7]);
 	seq_printf(sf, "SC_V_DELTA_PH:\t\t0x%X\n", v[8]);
+
+	get_values_from_reg(c->reg, 0x130, 10, v);
+	seq_printf(sf, "SC_ENH_LIMITS:\t\t0x%X\n", v[0]);
+	seq_printf(sf, "SC_ENH_COEFF0:\t\t0x%X\n", v[1]);
+	seq_printf(sf, "SC_ENH_COEFF1:\t\t0x%X\n", v[2]);
+	seq_printf(sf, "SC_ENH_COEFF2:\t\t0x%X\n", v[3]);
+	seq_printf(sf, "SC_ENH_COEFF3:\t\t0x%X\n", v[4]);
+	seq_printf(sf, "SC_ENH_COEFF4:\t\t0x%X\n", v[5]);
+	seq_printf(sf, "SC_ENH_COEFF5:\t\t0x%X\n", v[6]);
+	seq_printf(sf, "SC_ENH_COEFF6:\t\t0x%X\n", v[7]);
+	seq_printf(sf, "SC_ENH_COEFF7:\t\t0x%X\n", v[8]);
+	seq_printf(sf, "SC_ENH_COEFF8:\t\t0x%X\n", v[9]);
 }
 
 static const struct komeda_component_funcs d71_scaler_funcs = {
@@ -753,7 +854,7 @@ static int d71_scaler_init(struct d71_dev *d71,
 	}
 
 	scaler = to_scaler(c);
-	set_range(&scaler->hsize, 4, 2048);
+	set_range(&scaler->hsize, 4, __get_blk_line_size(d71, reg, 2048));
 	set_range(&scaler->vsize, 4, 4096);
 	scaler->max_downscaling = 6;
 	scaler->max_upscaling = 64;
@@ -862,7 +963,7 @@ static int d71_splitter_init(struct d71_dev *d71,
 
 	splitter = to_splitter(c);
 
-	set_range(&splitter->hsize, 4, d71->max_line_size);
+	set_range(&splitter->hsize, 4, get_blk_line_size(d71, reg));
 	set_range(&splitter->vsize, 4, d71->max_vsize);
 
 	return 0;
@@ -933,7 +1034,8 @@ static int d71_merger_init(struct d71_dev *d71,
 
 	merger = to_merger(c);
 
-	set_range(&merger->hsize_merged, 4, 4032);
+	set_range(&merger->hsize_merged, 4,
+		  __get_blk_line_size(d71, reg, 4032));
 	set_range(&merger->vsize_merged, 4, 4096);
 
 	return 0;
@@ -942,15 +1044,48 @@ static int d71_merger_init(struct d71_dev *d71,
 static void d71_improc_update(struct komeda_component *c,
 			      struct komeda_component_state *state)
 {
+	struct drm_crtc_state *crtc_st = state->crtc->state;
 	struct komeda_improc_state *st = to_improc_st(state);
+	struct d71_pipeline *pipe = to_d71_pipeline(c->pipeline);
 	u32 __iomem *reg = c->reg;
-	u32 index;
+	u32 index, mask = 0, ctrl = 0;
 
 	for_each_changed_input(state, index)
 		malidp_write32(reg, BLK_INPUT_ID0 + index * 4,
 			       to_d71_input_id(state, index));
 
 	malidp_write32(reg, BLK_SIZE, HV_SIZE(st->hsize, st->vsize));
+	malidp_write32(reg, IPS_DEPTH, st->color_depth);
+
+	if (crtc_st->color_mgmt_changed) {
+		mask |= IPS_CTRL_FT | IPS_CTRL_RGB;
+
+		if (crtc_st->gamma_lut) {
+			malidp_write_group(pipe->dou_ft_coeff_addr, FT_COEFF0,
+					   KOMEDA_N_GAMMA_COEFFS,
+					   st->fgamma_coeffs);
+			ctrl |= IPS_CTRL_FT; /* enable gamma */
+		}
+
+		if (crtc_st->ctm) {
+			malidp_write_group(reg, IPS_RGB_RGB_COEFF0,
+					   KOMEDA_N_CTM_COEFFS,
+					   st->ctm_coeffs);
+			ctrl |= IPS_CTRL_RGB; /* enable gamut */
+		}
+	}
+
+	mask |= IPS_CTRL_YUV | IPS_CTRL_CHD422 | IPS_CTRL_CHD420;
+
+	/* config color format */
+	if (st->color_format == DRM_COLOR_FORMAT_YCRCB420)
+		ctrl |= IPS_CTRL_YUV | IPS_CTRL_CHD422 | IPS_CTRL_CHD420;
+	else if (st->color_format == DRM_COLOR_FORMAT_YCRCB422)
+		ctrl |= IPS_CTRL_YUV | IPS_CTRL_CHD422;
+	else if (st->color_format == DRM_COLOR_FORMAT_YCRCB444)
+		ctrl |= IPS_CTRL_YUV;
+
+	malidp_write32_mask(reg, BLK_CONTROL, mask, ctrl);
 }
 
 static void d71_improc_dump(struct komeda_component *c, struct seq_file *sf)
@@ -1135,7 +1270,7 @@ static int d71_timing_ctrlr_init(struct d71_dev *d71,
 
 	ctrlr = to_ctrlr(c);
 
-	ctrlr->supports_dual_link = true;
+	ctrlr->supports_dual_link = d71->supports_dual_link;
 
 	return 0;
 }
@@ -1218,6 +1353,90 @@ int d71_probe_block(struct d71_dev *d71,
 	return err;
 }
 
+static void d71_gcu_dump(struct d71_dev *d71, struct seq_file *sf)
+{
+	u32 v[5];
+
+	seq_puts(sf, "\n------ GCU ------\n");
+
+	get_values_from_reg(d71->gcu_addr, 0, 3, v);
+	seq_printf(sf, "GLB_ARCH_ID:\t\t0x%X\n", v[0]);
+	seq_printf(sf, "GLB_CORE_ID:\t\t0x%X\n", v[1]);
+	seq_printf(sf, "GLB_CORE_INFO:\t\t0x%X\n", v[2]);
+
+	get_values_from_reg(d71->gcu_addr, 0x10, 1, v);
+	seq_printf(sf, "GLB_IRQ_STATUS:\t\t0x%X\n", v[0]);
+
+	get_values_from_reg(d71->gcu_addr, 0xA0, 5, v);
+	seq_printf(sf, "GCU_IRQ_RAW_STATUS:\t0x%X\n", v[0]);
+	seq_printf(sf, "GCU_IRQ_CLEAR:\t\t0x%X\n", v[1]);
+	seq_printf(sf, "GCU_IRQ_MASK:\t\t0x%X\n", v[2]);
+	seq_printf(sf, "GCU_IRQ_STATUS:\t\t0x%X\n", v[3]);
+	seq_printf(sf, "GCU_STATUS:\t\t0x%X\n", v[4]);
+
+	get_values_from_reg(d71->gcu_addr, 0xD0, 3, v);
+	seq_printf(sf, "GCU_CONTROL:\t\t0x%X\n", v[0]);
+	seq_printf(sf, "GCU_CONFIG_VALID0:\t0x%X\n", v[1]);
+	seq_printf(sf, "GCU_CONFIG_VALID1:\t0x%X\n", v[2]);
+}
+
+static void d71_lpu_dump(struct d71_pipeline *pipe, struct seq_file *sf)
+{
+	u32 v[6];
+
+	seq_printf(sf, "\n------ LPU%d ------\n", pipe->base.id);
+
+	dump_block_header(sf, pipe->lpu_addr);
+
+	get_values_from_reg(pipe->lpu_addr, 0xA0, 6, v);
+	seq_printf(sf, "LPU_IRQ_RAW_STATUS:\t0x%X\n", v[0]);
+	seq_printf(sf, "LPU_IRQ_CLEAR:\t\t0x%X\n", v[1]);
+	seq_printf(sf, "LPU_IRQ_MASK:\t\t0x%X\n", v[2]);
+	seq_printf(sf, "LPU_IRQ_STATUS:\t\t0x%X\n", v[3]);
+	seq_printf(sf, "LPU_STATUS:\t\t0x%X\n", v[4]);
+	seq_printf(sf, "LPU_TBU_STATUS:\t\t0x%X\n", v[5]);
+
+	get_values_from_reg(pipe->lpu_addr, 0xC0, 1, v);
+	seq_printf(sf, "LPU_INFO:\t\t0x%X\n", v[0]);
+
+	get_values_from_reg(pipe->lpu_addr, 0xD0, 3, v);
+	seq_printf(sf, "LPU_RAXI_CONTROL:\t0x%X\n", v[0]);
+	seq_printf(sf, "LPU_WAXI_CONTROL:\t0x%X\n", v[1]);
+	seq_printf(sf, "LPU_TBU_CONTROL:\t0x%X\n", v[2]);
+}
+
+static void d71_dou_dump(struct d71_pipeline *pipe, struct seq_file *sf)
+{
+	u32 v[5];
+
+	seq_printf(sf, "\n------ DOU%d ------\n", pipe->base.id);
+
+	dump_block_header(sf, pipe->dou_addr);
+
+	get_values_from_reg(pipe->dou_addr, 0xA0, 5, v);
+	seq_printf(sf, "DOU_IRQ_RAW_STATUS:\t0x%X\n", v[0]);
+	seq_printf(sf, "DOU_IRQ_CLEAR:\t\t0x%X\n", v[1]);
+	seq_printf(sf, "DOU_IRQ_MASK:\t\t0x%X\n", v[2]);
+	seq_printf(sf, "DOU_IRQ_STATUS:\t\t0x%X\n", v[3]);
+	seq_printf(sf, "DOU_STATUS:\t\t0x%X\n", v[4]);
+}
+
+static void d71_pipeline_dump(struct komeda_pipeline *pipe, struct seq_file *sf)
+{
+	struct d71_pipeline *d71_pipe = to_d71_pipeline(pipe);
+
+	d71_lpu_dump(d71_pipe, sf);
+	d71_dou_dump(d71_pipe, sf);
+}
+
 const struct komeda_pipeline_funcs d71_pipeline_funcs = {
-	.downscaling_clk_check = d71_downscaling_clk_check,
+	.downscaling_clk_check	= d71_downscaling_clk_check,
+	.dump_register		= d71_pipeline_dump,
 };
+
+void d71_dump(struct komeda_dev *mdev, struct seq_file *sf)
+{
+	struct d71_dev *d71 = mdev->chip_data;
+
+	d71_gcu_dump(d71, sf);
+}
diff --git a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c
index d567ab7ed314..00fa56c29b3e 100644
--- a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c
+++ b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c
@@ -20,8 +20,10 @@ static u64 get_lpu_event(struct d71_pipeline *d71_pipeline)
 		evts |= KOMEDA_EVENT_IBSY;
 	if (raw_status & LPU_IRQ_EOW)
 		evts |= KOMEDA_EVENT_EOW;
+	if (raw_status & LPU_IRQ_OVR)
+		evts |= KOMEDA_EVENT_OVR;
 
-	if (raw_status & (LPU_IRQ_ERR | LPU_IRQ_IBSY)) {
+	if (raw_status & (LPU_IRQ_ERR | LPU_IRQ_IBSY | LPU_IRQ_OVR)) {
 		u32 restore = 0, tbu_status;
 		/* Check error of LPU status */
 		status = malidp_read32(reg, BLK_STATUS);
@@ -45,6 +47,15 @@ static u64 get_lpu_event(struct d71_pipeline *d71_pipeline)
 			restore |= LPU_STATUS_ACE3;
 			evts |= KOMEDA_ERR_ACE3;
 		}
+		if (status & LPU_STATUS_FEMPTY) {
+			restore |= LPU_STATUS_FEMPTY;
+			evts |= KOMEDA_EVENT_EMPTY;
+		}
+		if (status & LPU_STATUS_FFULL) {
+			restore |= LPU_STATUS_FFULL;
+			evts |= KOMEDA_EVENT_FULL;
+		}
+
 		if (restore != 0)
 			malidp_write32_mask(reg, BLK_STATUS, restore, 0);
 
@@ -195,7 +206,7 @@ d71_irq_handler(struct komeda_dev *mdev, struct komeda_events *evts)
 	if (gcu_status & GLB_IRQ_STATUS_PIPE1)
 		evts->pipes[1] |= get_pipeline_event(d71->pipes[1], gcu_status);
 
-	return gcu_status ? IRQ_HANDLED : IRQ_NONE;
+	return IRQ_RETVAL(gcu_status);
 }
 
 #define ENABLED_GCU_IRQS	(GCU_IRQ_CVAL0 | GCU_IRQ_CVAL1 | \
@@ -371,23 +382,33 @@ static int d71_enum_resources(struct komeda_dev *mdev)
 		goto err_cleanup;
 	}
 
-	/* probe PERIPH */
+	/* Only the legacy HW has the periph block, the newer merges the periph
+	 * into GCU
+	 */
 	value = malidp_read32(d71->periph_addr, BLK_BLOCK_INFO);
-	if (BLOCK_INFO_BLK_TYPE(value) != D71_BLK_TYPE_PERIPH) {
-		DRM_ERROR("access blk periph but got blk: %d.\n",
-			  BLOCK_INFO_BLK_TYPE(value));
-		err = -EINVAL;
-		goto err_cleanup;
+	if (BLOCK_INFO_BLK_TYPE(value) != D71_BLK_TYPE_PERIPH)
+		d71->periph_addr = NULL;
+
+	if (d71->periph_addr) {
+		/* probe PERIPHERAL in legacy HW */
+		value = malidp_read32(d71->periph_addr, PERIPH_CONFIGURATION_ID);
+
+		d71->max_line_size	= value & PERIPH_MAX_LINE_SIZE ? 4096 : 2048;
+		d71->max_vsize		= 4096;
+		d71->num_rich_layers	= value & PERIPH_NUM_RICH_LAYERS ? 2 : 1;
+		d71->supports_dual_link	= !!(value & PERIPH_SPLIT_EN);
+		d71->integrates_tbu	= !!(value & PERIPH_TBU_EN);
+	} else {
+		value = malidp_read32(d71->gcu_addr, GCU_CONFIGURATION_ID0);
+		d71->max_line_size	= GCU_MAX_LINE_SIZE(value);
+		d71->max_vsize		= GCU_MAX_NUM_LINES(value);
+
+		value = malidp_read32(d71->gcu_addr, GCU_CONFIGURATION_ID1);
+		d71->num_rich_layers	= GCU_NUM_RICH_LAYERS(value);
+		d71->supports_dual_link	= GCU_DISPLAY_SPLIT_EN(value);
+		d71->integrates_tbu	= GCU_DISPLAY_TBU_EN(value);
 	}
 
-	value = malidp_read32(d71->periph_addr, PERIPH_CONFIGURATION_ID);
-
-	d71->max_line_size	= value & PERIPH_MAX_LINE_SIZE ? 4096 : 2048;
-	d71->max_vsize		= 4096;
-	d71->num_rich_layers	= value & PERIPH_NUM_RICH_LAYERS ? 2 : 1;
-	d71->supports_dual_link	= value & PERIPH_SPLIT_EN ? true : false;
-	d71->integrates_tbu	= value & PERIPH_TBU_EN ? true : false;
-
 	for (i = 0; i < d71->num_pipelines; i++) {
 		pipe = komeda_pipeline_add(mdev, sizeof(struct d71_pipeline),
 					   &d71_pipeline_funcs);
@@ -395,11 +416,30 @@ static int d71_enum_resources(struct komeda_dev *mdev)
 			err = PTR_ERR(pipe);
 			goto err_cleanup;
 		}
+
+		/* D71 HW doesn't update shadow registers when display output
+		 * is turning off, so when we disable all pipeline components
+		 * together with display output disable by one flush or one
+		 * operation, the disable operation updated registers will not
+		 * be flush to or valid in HW, which may leads problem.
+		 * To workaround this problem, introduce a two phase disable.
+		 * Phase1: Disabling components with display is on to make sure
+		 *	   the disable can be flushed to HW.
+		 * Phase2: Only turn-off display output.
+		 */
+		value = KOMEDA_PIPELINE_IMPROCS |
+			BIT(KOMEDA_COMPONENT_TIMING_CTRLR);
+
+		pipe->standalone_disabled_comps = value;
+
 		d71->pipes[i] = to_d71_pipeline(pipe);
 	}
 
-	/* loop the register blks and probe */
-	i = 2; /* exclude GCU and PERIPH */
+	/* loop the register blks and probe.
+	 * NOTE: d71->num_blocks includes reserved blocks.
+	 * d71->num_blocks = GCU + valid blocks + reserved blocks
+	 */
+	i = 1; /* exclude GCU */
 	offset = D71_BLOCK_SIZE; /* skip GCU */
 	while (i < d71->num_blocks) {
 		blk_base = mdev->reg_base + (offset >> 2);
@@ -409,9 +449,9 @@ static int d71_enum_resources(struct komeda_dev *mdev)
 			err = d71_probe_block(d71, &blk, blk_base);
 			if (err)
 				goto err_cleanup;
-			i++;
 		}
 
+		i++;
 		offset += D71_BLOCK_SIZE;
 	}
 
@@ -561,26 +601,43 @@ static int d71_disconnect_iommu(struct komeda_dev *mdev)
 }
 
 static const struct komeda_dev_funcs d71_chip_funcs = {
-	.init_format_table = d71_init_fmt_tbl,
-	.enum_resources	= d71_enum_resources,
-	.cleanup	= d71_cleanup,
-	.irq_handler	= d71_irq_handler,
-	.enable_irq	= d71_enable_irq,
-	.disable_irq	= d71_disable_irq,
-	.on_off_vblank	= d71_on_off_vblank,
-	.change_opmode	= d71_change_opmode,
-	.flush		= d71_flush,
-	.connect_iommu	= d71_connect_iommu,
-	.disconnect_iommu = d71_disconnect_iommu,
+	.init_format_table	= d71_init_fmt_tbl,
+	.enum_resources		= d71_enum_resources,
+	.cleanup		= d71_cleanup,
+	.irq_handler		= d71_irq_handler,
+	.enable_irq		= d71_enable_irq,
+	.disable_irq		= d71_disable_irq,
+	.on_off_vblank		= d71_on_off_vblank,
+	.change_opmode		= d71_change_opmode,
+	.flush			= d71_flush,
+	.connect_iommu		= d71_connect_iommu,
+	.disconnect_iommu	= d71_disconnect_iommu,
+	.dump_register		= d71_dump,
 };
 
 const struct komeda_dev_funcs *
 d71_identify(u32 __iomem *reg_base, struct komeda_chip_info *chip)
 {
+	const struct komeda_dev_funcs *funcs;
+	u32 product_id;
+
+	chip->core_id = malidp_read32(reg_base, GLB_CORE_ID);
+
+	product_id = MALIDP_CORE_ID_PRODUCT_ID(chip->core_id);
+
+	switch (product_id) {
+	case MALIDP_D71_PRODUCT_ID:
+	case MALIDP_D32_PRODUCT_ID:
+		funcs = &d71_chip_funcs;
+		break;
+	default:
+		DRM_ERROR("Unsupported product: 0x%x\n", product_id);
+		return NULL;
+	}
+
 	chip->arch_id	= malidp_read32(reg_base, GLB_ARCH_ID);
-	chip->core_id	= malidp_read32(reg_base, GLB_CORE_ID);
 	chip->core_info	= malidp_read32(reg_base, GLB_CORE_INFO);
 	chip->bus_width	= D71_BUS_WIDTH_16_BYTES;
 
-	return &d71_chip_funcs;
+	return funcs;
 }
diff --git a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.h b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.h
index 84f1878b647d..c7357c2b9e62 100644
--- a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.h
+++ b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.h
@@ -49,4 +49,6 @@ int d71_probe_block(struct d71_dev *d71,
 		    struct block_header *blk, u32 __iomem *reg);
 void d71_read_block_header(u32 __iomem *reg, struct block_header *blk);
 
+void d71_dump(struct komeda_dev *mdev, struct seq_file *sf);
+
 #endif /* !_D71_DEV_H_ */
diff --git a/drivers/gpu/drm/arm/display/komeda/d71/d71_regs.h b/drivers/gpu/drm/arm/display/komeda/d71/d71_regs.h
index 2d5e6d00b42c..e80172a0b320 100644
--- a/drivers/gpu/drm/arm/display/komeda/d71/d71_regs.h
+++ b/drivers/gpu/drm/arm/display/komeda/d71/d71_regs.h
@@ -10,6 +10,7 @@
 /* Common block registers offset */
 #define BLK_BLOCK_INFO		0x000
 #define BLK_PIPELINE_INFO	0x004
+#define BLK_MAX_LINE_SIZE	0x008
 #define BLK_VALID_INPUT_ID0	0x020
 #define BLK_OUTPUT_ID0		0x060
 #define BLK_INPUT_ID0		0x080
@@ -71,6 +72,19 @@
 #define GCU_CONTROL_MODE(x)	((x) & 0x7)
 #define GCU_CONTROL_SRST	BIT(16)
 
+/* GCU_CONFIGURATION registers */
+#define GCU_CONFIGURATION_ID0	0x100
+#define GCU_CONFIGURATION_ID1	0x104
+
+/* GCU configuration */
+#define GCU_MAX_LINE_SIZE(x)	((x) & 0xFFFF)
+#define GCU_MAX_NUM_LINES(x)	((x) >> 16)
+#define GCU_NUM_RICH_LAYERS(x)	((x) & 0x7)
+#define GCU_NUM_PIPELINES(x)	(((x) >> 3) & 0x7)
+#define GCU_NUM_SCALERS(x)	(((x) >> 6) & 0x7)
+#define GCU_DISPLAY_SPLIT_EN(x)	(((x) >> 16) & 0x1)
+#define GCU_DISPLAY_TBU_EN(x)	(((x) >> 17) & 0x1)
+
 /* GCU opmode */
 #define INACTIVE_MODE		0
 #define TBU_CONNECT_MODE	1
@@ -161,6 +175,7 @@
 #define TBU_DOUTSTDCAPB_MASK	0x3F
 
 /* LPU_IRQ_BITS */
+#define LPU_IRQ_OVR		BIT(9)
 #define LPU_IRQ_IBSY		BIT(10)
 #define LPU_IRQ_ERR		BIT(11)
 #define LPU_IRQ_EOW		BIT(12)
@@ -171,6 +186,8 @@
 #define LPU_STATUS_AXIE		BIT(4)
 #define LPU_STATUS_AXIRP	BIT(5)
 #define LPU_STATUS_AXIWP	BIT(6)
+#define LPU_STATUS_FEMPTY	BIT(11)
+#define LPU_STATUS_FFULL	BIT(14)
 #define LPU_STATUS_ACE0		BIT(16)
 #define LPU_STATUS_ACE1		BIT(17)
 #define LPU_STATUS_ACE2		BIT(18)
@@ -321,6 +338,7 @@
 #define L_INFO_RF		BIT(0)
 #define L_INFO_CM		BIT(1)
 #define L_INFO_ABUF_SIZE(x)	(((x) >> 4) & 0x7)
+#define L_INFO_YUV_MAX_LINESZ(x)	(((x) >> 16) & 0xFFFF)
 
 /* Scaler registers */
 #define SC_COEFFTAB		0x0DC
@@ -494,13 +512,6 @@ enum d71_blk_type {
 #define D71_DEFAULT_PREPRETCH_LINE	5
 #define D71_BUS_WIDTH_16_BYTES		16
 
-#define D71_MIN_LINE_SIZE		64
-#define D71_MIN_VERTICAL_SIZE		64
-#define D71_SC_MIN_LIN_SIZE		4
-#define D71_SC_MIN_VERTICAL_SIZE	4
-#define D71_SC_MAX_LIN_SIZE		2048
-#define D71_SC_MAX_VERTICAL_SIZE	4096
-
 #define D71_SC_MAX_UPSCALING		64
 #define D71_SC_MAX_DOWNSCALING		6
 #define D71_SC_SPLIT_OVERLAP		8
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c b/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c
index 9d14a92dbb17..d8e449e6ebda 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c
@@ -65,3 +65,69 @@ const s32 *komeda_select_yuv2rgb_coeffs(u32 color_encoding, u32 color_range)
 
 	return coeffs;
 }
+
+struct gamma_curve_sector {
+	u32 boundary_start;
+	u32 num_of_segments;
+	u32 segment_width;
+};
+
+struct gamma_curve_segment {
+	u32 start;
+	u32 end;
+};
+
+static struct gamma_curve_sector sector_tbl[] = {
+	{ 0,    4,  4   },
+	{ 16,   4,  4   },
+	{ 32,   4,  8   },
+	{ 64,   4,  16  },
+	{ 128,  4,  32  },
+	{ 256,  4,  64  },
+	{ 512,  16, 32  },
+	{ 1024, 24, 128 },
+};
+
+static void
+drm_lut_to_coeffs(struct drm_property_blob *lut_blob, u32 *coeffs,
+		  struct gamma_curve_sector *sector_tbl, u32 num_sectors)
+{
+	struct drm_color_lut *lut;
+	u32 i, j, in, num = 0;
+
+	if (!lut_blob)
+		return;
+
+	lut = lut_blob->data;
+
+	for (i = 0; i < num_sectors; i++) {
+		for (j = 0; j < sector_tbl[i].num_of_segments; j++) {
+			in = sector_tbl[i].boundary_start +
+			     j * sector_tbl[i].segment_width;
+
+			coeffs[num++] = drm_color_lut_extract(lut[in].red,
+						KOMEDA_COLOR_PRECISION);
+		}
+	}
+
+	coeffs[num] = BIT(KOMEDA_COLOR_PRECISION);
+}
+
+void drm_lut_to_fgamma_coeffs(struct drm_property_blob *lut_blob, u32 *coeffs)
+{
+	drm_lut_to_coeffs(lut_blob, coeffs, sector_tbl, ARRAY_SIZE(sector_tbl));
+}
+
+void drm_ctm_to_coeffs(struct drm_property_blob *ctm_blob, u32 *coeffs)
+{
+	struct drm_color_ctm *ctm;
+	u32 i;
+
+	if (!ctm_blob)
+		return;
+
+	ctm = ctm_blob->data;
+
+	for (i = 0; i < KOMEDA_N_CTM_COEFFS; i++)
+		coeffs[i] = drm_color_ctm_s31_32_to_qm_n(ctm->matrix[i], 3, 12);
+}
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.h b/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.h
index a2df218f58e7..2f4668466112 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.h
@@ -11,7 +11,15 @@
 #include <drm/drm_color_mgmt.h>
 
 #define KOMEDA_N_YUV2RGB_COEFFS		12
+#define KOMEDA_N_RGB2YUV_COEFFS		12
+#define KOMEDA_COLOR_PRECISION		12
+#define KOMEDA_N_GAMMA_COEFFS		65
+#define KOMEDA_COLOR_LUT_SIZE		BIT(KOMEDA_COLOR_PRECISION)
+#define KOMEDA_N_CTM_COEFFS		9
+
+void drm_lut_to_fgamma_coeffs(struct drm_property_blob *lut_blob, u32 *coeffs);
+void drm_ctm_to_coeffs(struct drm_property_blob *ctm_blob, u32 *coeffs);
 
 const s32 *komeda_select_yuv2rgb_coeffs(u32 color_encoding, u32 color_range);
 
-#endif
+#endif /*_KOMEDA_COLOR_MGMT_H_*/
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
index 624d257da20f..56bd938961ee 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
@@ -18,6 +18,33 @@
 #include "komeda_dev.h"
 #include "komeda_kms.h"
 
+void komeda_crtc_get_color_config(struct drm_crtc_state *crtc_st,
+				  u32 *color_depths, u32 *color_formats)
+{
+	struct drm_connector *conn;
+	struct drm_connector_state *conn_st;
+	u32 conn_color_formats = ~0u;
+	int i, min_bpc = 31, conn_bpc = 0;
+
+	for_each_new_connector_in_state(crtc_st->state, conn, conn_st, i) {
+		if (conn_st->crtc != crtc_st->crtc)
+			continue;
+
+		conn_bpc = conn->display_info.bpc ? conn->display_info.bpc : 8;
+		conn_color_formats &= conn->display_info.color_formats;
+
+		if (conn_bpc < min_bpc)
+			min_bpc = conn_bpc;
+	}
+
+	/* connector doesn't config any color_format, use RGB444 as default */
+	if (!conn_color_formats)
+		conn_color_formats = DRM_COLOR_FORMAT_RGB444;
+
+	*color_depths = GENMASK(min_bpc, 0);
+	*color_formats = conn_color_formats;
+}
+
 static void komeda_crtc_update_clock_ratio(struct komeda_crtc_state *kcrtc_st)
 {
 	u64 pxlclk, aclk;
@@ -248,25 +275,60 @@ static void
 komeda_crtc_atomic_enable(struct drm_crtc *crtc,
 			  struct drm_crtc_state *old)
 {
+	pm_runtime_get_sync(crtc->dev->dev);
 	komeda_crtc_prepare(to_kcrtc(crtc));
 	drm_crtc_vblank_on(crtc);
+	WARN_ON(drm_crtc_vblank_get(crtc));
 	komeda_crtc_do_flush(crtc, old);
 }
 
 static void
+komeda_crtc_flush_and_wait_for_flip_done(struct komeda_crtc *kcrtc,
+					 struct completion *input_flip_done)
+{
+	struct drm_device *drm = kcrtc->base.dev;
+	struct komeda_dev *mdev = kcrtc->master->mdev;
+	struct completion *flip_done;
+	struct completion temp;
+	int timeout;
+
+	/* if caller doesn't send a flip_done, use a private flip_done */
+	if (input_flip_done) {
+		flip_done = input_flip_done;
+	} else {
+		init_completion(&temp);
+		kcrtc->disable_done = &temp;
+		flip_done = &temp;
+	}
+
+	mdev->funcs->flush(mdev, kcrtc->master->id, 0);
+
+	/* wait the flip take affect.*/
+	timeout = wait_for_completion_timeout(flip_done, HZ);
+	if (timeout == 0) {
+		DRM_ERROR("wait pipe%d flip done timeout\n", kcrtc->master->id);
+		if (!input_flip_done) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&drm->event_lock, flags);
+			kcrtc->disable_done = NULL;
+			spin_unlock_irqrestore(&drm->event_lock, flags);
+		}
+	}
+}
+
+static void
 komeda_crtc_atomic_disable(struct drm_crtc *crtc,
 			   struct drm_crtc_state *old)
 {
 	struct komeda_crtc *kcrtc = to_kcrtc(crtc);
 	struct komeda_crtc_state *old_st = to_kcrtc_st(old);
-	struct komeda_dev *mdev = crtc->dev->dev_private;
 	struct komeda_pipeline *master = kcrtc->master;
 	struct komeda_pipeline *slave  = kcrtc->slave;
-	struct completion *disable_done = &crtc->state->commit->flip_done;
-	struct completion temp;
-	int timeout;
+	struct completion *disable_done;
+	bool needs_phase2 = false;
 
-	DRM_DEBUG_ATOMIC("CRTC%d_DISABLE: active_pipes: 0x%x, affected: 0x%x.\n",
+	DRM_DEBUG_ATOMIC("CRTC%d_DISABLE: active_pipes: 0x%x, affected: 0x%x\n",
 			 drm_crtc_index(crtc),
 			 old_st->active_pipes, old_st->affected_pipes);
 
@@ -274,7 +336,7 @@ komeda_crtc_atomic_disable(struct drm_crtc *crtc,
 		komeda_pipeline_disable(slave, old->state);
 
 	if (has_bit(master->id, old_st->active_pipes))
-		komeda_pipeline_disable(master, old->state);
+		needs_phase2 = komeda_pipeline_disable(master, old->state);
 
 	/* crtc_disable has two scenarios according to the state->active switch.
 	 * 1. active -> inactive
@@ -293,34 +355,26 @@ komeda_crtc_atomic_disable(struct drm_crtc *crtc,
 	 *    That's also the reason why skip modeset commit in
 	 *    komeda_crtc_atomic_flush()
 	 */
-	if (crtc->state->active) {
-		struct komeda_pipeline_state *pipe_st;
-		/* clear the old active_comps to zero */
-		pipe_st = komeda_pipeline_get_old_state(master, old->state);
-		pipe_st->active_comps = 0;
+	disable_done = (needs_phase2 || crtc->state->active) ?
+		       NULL : &crtc->state->commit->flip_done;
 
-		init_completion(&temp);
-		kcrtc->disable_done = &temp;
-		disable_done = &temp;
-	}
+	/* wait phase 1 disable done */
+	komeda_crtc_flush_and_wait_for_flip_done(kcrtc, disable_done);
 
-	mdev->funcs->flush(mdev, master->id, 0);
+	/* phase 2 */
+	if (needs_phase2) {
+		komeda_pipeline_disable(kcrtc->master, old->state);
 
-	/* wait the disable take affect.*/
-	timeout = wait_for_completion_timeout(disable_done, HZ);
-	if (timeout == 0) {
-		DRM_ERROR("disable pipeline%d timeout.\n", kcrtc->master->id);
-		if (crtc->state->active) {
-			unsigned long flags;
+		disable_done = crtc->state->active ?
+			       NULL : &crtc->state->commit->flip_done;
 
-			spin_lock_irqsave(&crtc->dev->event_lock, flags);
-			kcrtc->disable_done = NULL;
-			spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-		}
+		komeda_crtc_flush_and_wait_for_flip_done(kcrtc, disable_done);
 	}
 
+	drm_crtc_vblank_put(crtc);
 	drm_crtc_vblank_off(crtc);
 	komeda_crtc_unprepare(kcrtc);
+	pm_runtime_put(crtc->dev->dev);
 }
 
 static void
@@ -566,6 +620,8 @@ static int komeda_crtc_add(struct komeda_kms_dev *kms,
 
 	crtc->port = kcrtc->master->of_output_port;
 
+	drm_crtc_enable_color_mgmt(crtc, 0, true, KOMEDA_COLOR_LUT_SIZE);
+
 	return err;
 }
 
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
index ca64a129c594..1d767473ba8a 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
@@ -10,6 +10,7 @@
 #include <linux/of_graph.h>
 #include <linux/of_reserved_mem.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/dma-mapping.h>
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
@@ -25,12 +26,18 @@ static int komeda_register_show(struct seq_file *sf, void *x)
 	struct komeda_dev *mdev = sf->private;
 	int i;
 
+	seq_puts(sf, "\n====== Komeda register dump =========\n");
+
+	pm_runtime_get_sync(mdev->dev);
+
 	if (mdev->funcs->dump_register)
 		mdev->funcs->dump_register(mdev, sf);
 
 	for (i = 0; i < mdev->n_pipelines; i++)
 		komeda_pipeline_dump_register(mdev->pipelines[i], sf);
 
+	pm_runtime_put(mdev->dev);
+
 	return 0;
 }
 
@@ -56,6 +63,8 @@ static void komeda_debugfs_init(struct komeda_dev *mdev)
 	mdev->debugfs_root = debugfs_create_dir("komeda", NULL);
 	debugfs_create_file("register", 0444, mdev->debugfs_root,
 			    mdev, &komeda_register_fops);
+	debugfs_create_x16("err_verbosity", 0664, mdev->debugfs_root,
+			   &mdev->err_verbosity);
 }
 #endif
 
@@ -91,9 +100,19 @@ config_id_show(struct device *dev, struct device_attribute *attr, char *buf)
 }
 static DEVICE_ATTR_RO(config_id);
 
+static ssize_t
+aclk_hz_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct komeda_dev *mdev = dev_to_mdev(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%lu\n", clk_get_rate(mdev->aclk));
+}
+static DEVICE_ATTR_RO(aclk_hz);
+
 static struct attribute *komeda_sysfs_entries[] = {
 	&dev_attr_core_id.attr,
 	&dev_attr_config_id.attr,
+	&dev_attr_aclk_hz.attr,
 	NULL,
 };
 
@@ -101,22 +120,14 @@ static struct attribute_group komeda_sysfs_attr_group = {
 	.attrs = komeda_sysfs_entries,
 };
 
-static int komeda_parse_pipe_dt(struct komeda_dev *mdev, struct device_node *np)
+static int komeda_parse_pipe_dt(struct komeda_pipeline *pipe)
 {
-	struct komeda_pipeline *pipe;
+	struct device_node *np = pipe->of_node;
 	struct clk *clk;
-	u32 pipe_id;
-	int ret = 0;
-
-	ret = of_property_read_u32(np, "reg", &pipe_id);
-	if (ret != 0 || pipe_id >= mdev->n_pipelines)
-		return -EINVAL;
-
-	pipe = mdev->pipelines[pipe_id];
 
 	clk = of_clk_get_by_name(np, "pxclk");
 	if (IS_ERR(clk)) {
-		DRM_ERROR("get pxclk for pipeline %d failed!\n", pipe_id);
+		DRM_ERROR("get pxclk for pipeline %d failed!\n", pipe->id);
 		return PTR_ERR(clk);
 	}
 	pipe->pxlclk = clk;
@@ -130,7 +141,6 @@ static int komeda_parse_pipe_dt(struct komeda_dev *mdev, struct device_node *np)
 		of_graph_get_port_by_id(np, KOMEDA_OF_PORT_OUTPUT);
 
 	pipe->dual_link = pipe->of_output_links[0] && pipe->of_output_links[1];
-	pipe->of_node = of_node_get(np);
 
 	return 0;
 }
@@ -139,7 +149,9 @@ static int komeda_parse_dt(struct device *dev, struct komeda_dev *mdev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct device_node *child, *np = dev->of_node;
-	int ret;
+	struct komeda_pipeline *pipe;
+	u32 pipe_id = U32_MAX;
+	int ret = -1;
 
 	mdev->irq  = platform_get_irq(pdev, 0);
 	if (mdev->irq < 0) {
@@ -154,37 +166,44 @@ static int komeda_parse_dt(struct device *dev, struct komeda_dev *mdev)
 	ret = 0;
 
 	for_each_available_child_of_node(np, child) {
-		if (of_node_cmp(child->name, "pipeline") == 0) {
-			ret = komeda_parse_pipe_dt(mdev, child);
-			if (ret) {
-				DRM_ERROR("parse pipeline dt error!\n");
-				of_node_put(child);
-				break;
+		if (of_node_name_eq(child, "pipeline")) {
+			of_property_read_u32(child, "reg", &pipe_id);
+			if (pipe_id >= mdev->n_pipelines) {
+				DRM_WARN("Skip the redundant DT node: pipeline-%u.\n",
+					 pipe_id);
+				continue;
 			}
+			mdev->pipelines[pipe_id]->of_node = of_node_get(child);
+		}
+	}
+
+	for (pipe_id = 0; pipe_id < mdev->n_pipelines; pipe_id++) {
+		pipe = mdev->pipelines[pipe_id];
+
+		if (!pipe->of_node) {
+			DRM_ERROR("Pipeline-%d doesn't have a DT node.\n",
+				  pipe->id);
+			return -EINVAL;
 		}
+		ret = komeda_parse_pipe_dt(pipe);
+		if (ret)
+			return ret;
 	}
 
-	return ret;
+	return 0;
 }
 
 struct komeda_dev *komeda_dev_create(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
-	const struct komeda_product_data *product;
+	komeda_identify_func komeda_identify;
 	struct komeda_dev *mdev;
-	struct resource *io_res;
 	int err = 0;
 
-	product = of_device_get_match_data(dev);
-	if (!product)
+	komeda_identify = of_device_get_match_data(dev);
+	if (!komeda_identify)
 		return ERR_PTR(-ENODEV);
 
-	io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!io_res) {
-		DRM_ERROR("No registers defined.\n");
-		return ERR_PTR(-ENODEV);
-	}
-
 	mdev = devm_kzalloc(dev, sizeof(*mdev), GFP_KERNEL);
 	if (!mdev)
 		return ERR_PTR(-ENOMEM);
@@ -192,7 +211,7 @@ struct komeda_dev *komeda_dev_create(struct device *dev)
 	mutex_init(&mdev->lock);
 
 	mdev->dev = dev;
-	mdev->reg_base = devm_ioremap_resource(dev, io_res);
+	mdev->reg_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(mdev->reg_base)) {
 		DRM_ERROR("Map register space failed.\n");
 		err = PTR_ERR(mdev->reg_base);
@@ -210,13 +229,11 @@ struct komeda_dev *komeda_dev_create(struct device *dev)
 
 	clk_prepare_enable(mdev->aclk);
 
-	mdev->funcs = product->identify(mdev->reg_base, &mdev->chip);
-	if (!komeda_product_match(mdev, product->product_id)) {
-		DRM_ERROR("DT configured %x mismatch with real HW %x.\n",
-			  product->product_id,
-			  MALIDP_CORE_ID_PRODUCT_ID(mdev->chip.core_id));
+	mdev->funcs = komeda_identify(mdev->reg_base, &mdev->chip);
+	if (!mdev->funcs) {
+		DRM_ERROR("Failed to identify the HW.\n");
 		err = -ENODEV;
-		goto err_cleanup;
+		goto disable_clk;
 	}
 
 	DRM_INFO("Found ARM Mali-D%x version r%dp%d\n",
@@ -229,19 +246,19 @@ struct komeda_dev *komeda_dev_create(struct device *dev)
 	err = mdev->funcs->enum_resources(mdev);
 	if (err) {
 		DRM_ERROR("enumerate display resource failed.\n");
-		goto err_cleanup;
+		goto disable_clk;
 	}
 
 	err = komeda_parse_dt(dev, mdev);
 	if (err) {
 		DRM_ERROR("parse device tree failed.\n");
-		goto err_cleanup;
+		goto disable_clk;
 	}
 
 	err = komeda_assemble_pipelines(mdev);
 	if (err) {
 		DRM_ERROR("assemble display pipelines failed.\n");
-		goto err_cleanup;
+		goto disable_clk;
 	}
 
 	dev->dma_parms = &mdev->dma_parms;
@@ -251,13 +268,7 @@ struct komeda_dev *komeda_dev_create(struct device *dev)
 	if (!mdev->iommu)
 		DRM_INFO("continue without IOMMU support!\n");
 
-	if (mdev->iommu && mdev->funcs->connect_iommu) {
-		err = mdev->funcs->connect_iommu(mdev);
-		if (err) {
-			mdev->iommu = NULL;
-			goto err_cleanup;
-		}
-	}
+	clk_disable_unprepare(mdev->aclk);
 
 	err = sysfs_create_group(&dev->kobj, &komeda_sysfs_attr_group);
 	if (err) {
@@ -265,12 +276,16 @@ struct komeda_dev *komeda_dev_create(struct device *dev)
 		goto err_cleanup;
 	}
 
+	mdev->err_verbosity = KOMEDA_DEV_PRINT_ERR_EVENTS;
+
 #ifdef CONFIG_DEBUG_FS
 	komeda_debugfs_init(mdev);
 #endif
 
 	return mdev;
 
+disable_clk:
+	clk_disable_unprepare(mdev->aclk);
 err_cleanup:
 	komeda_dev_destroy(mdev);
 	return ERR_PTR(err);
@@ -288,9 +303,8 @@ void komeda_dev_destroy(struct komeda_dev *mdev)
 	debugfs_remove_recursive(mdev->debugfs_root);
 #endif
 
-	if (mdev->iommu && mdev->funcs->disconnect_iommu)
-		mdev->funcs->disconnect_iommu(mdev);
-	mdev->iommu = NULL;
+	if (mdev->aclk)
+		clk_prepare_enable(mdev->aclk);
 
 	for (i = 0; i < mdev->n_pipelines; i++) {
 		komeda_pipeline_destroy(mdev, mdev->pipelines[i]);
@@ -317,3 +331,29 @@ void komeda_dev_destroy(struct komeda_dev *mdev)
 
 	devm_kfree(dev, mdev);
 }
+
+int komeda_dev_resume(struct komeda_dev *mdev)
+{
+	clk_prepare_enable(mdev->aclk);
+
+	mdev->funcs->enable_irq(mdev);
+
+	if (mdev->iommu && mdev->funcs->connect_iommu)
+		if (mdev->funcs->connect_iommu(mdev))
+			DRM_ERROR("connect iommu failed.\n");
+
+	return 0;
+}
+
+int komeda_dev_suspend(struct komeda_dev *mdev)
+{
+	if (mdev->iommu && mdev->funcs->disconnect_iommu)
+		if (mdev->funcs->disconnect_iommu(mdev))
+			DRM_ERROR("disconnect iommu failed.\n");
+
+	mdev->funcs->disable_irq(mdev);
+
+	clk_disable_unprepare(mdev->aclk);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h
index d1c86b6174c8..ce27f2f27c24 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h
@@ -20,6 +20,8 @@
 #define KOMEDA_EVENT_OVR		BIT_ULL(4)
 #define KOMEDA_EVENT_EOW		BIT_ULL(5)
 #define KOMEDA_EVENT_MODE		BIT_ULL(6)
+#define KOMEDA_EVENT_FULL		BIT_ULL(7)
+#define KOMEDA_EVENT_EMPTY		BIT_ULL(8)
 
 #define KOMEDA_ERR_TETO			BIT_ULL(14)
 #define KOMEDA_ERR_TEMR			BIT_ULL(15)
@@ -40,10 +42,24 @@
 #define KOMEDA_ERR_TTNG			BIT_ULL(30)
 #define KOMEDA_ERR_TTF			BIT_ULL(31)
 
-/* malidp device id */
-enum {
-	MALI_D71 = 0,
-};
+#define KOMEDA_ERR_EVENTS	\
+	(KOMEDA_EVENT_URUN	| KOMEDA_EVENT_IBSY	| KOMEDA_EVENT_OVR |\
+	KOMEDA_ERR_TETO		| KOMEDA_ERR_TEMR	| KOMEDA_ERR_TITR |\
+	KOMEDA_ERR_CPE		| KOMEDA_ERR_CFGE	| KOMEDA_ERR_AXIE |\
+	KOMEDA_ERR_ACE0		| KOMEDA_ERR_ACE1	| KOMEDA_ERR_ACE2 |\
+	KOMEDA_ERR_ACE3		| KOMEDA_ERR_DRIFTTO	| KOMEDA_ERR_FRAMETO |\
+	KOMEDA_ERR_ZME		| KOMEDA_ERR_MERR	| KOMEDA_ERR_TCF |\
+	KOMEDA_ERR_TTNG		| KOMEDA_ERR_TTF)
+
+#define KOMEDA_WARN_EVENTS	\
+	(KOMEDA_ERR_CSCE | KOMEDA_EVENT_FULL | KOMEDA_EVENT_EMPTY)
+
+#define KOMEDA_INFO_EVENTS (0 \
+			    | KOMEDA_EVENT_VSYNC \
+			    | KOMEDA_EVENT_FLIP \
+			    | KOMEDA_EVENT_EOW \
+			    | KOMEDA_EVENT_MODE \
+			    )
 
 /* pipeline DT ports */
 enum {
@@ -58,12 +74,6 @@ struct komeda_chip_info {
 	u32 bus_width;
 };
 
-struct komeda_product_data {
-	u32 product_id;
-	const struct komeda_dev_funcs *(*identify)(u32 __iomem *reg,
-					     struct komeda_chip_info *info);
-};
-
 struct komeda_dev;
 
 struct komeda_events {
@@ -191,6 +201,23 @@ struct komeda_dev {
 
 	/** @debugfs_root: root directory of komeda debugfs */
 	struct dentry *debugfs_root;
+	/**
+	 * @err_verbosity: bitmask for how much extra info to print on error
+	 *
+	 * See KOMEDA_DEV_* macros for details. Low byte contains the debug
+	 * level categories, the high byte contains extra debug options.
+	 */
+	u16 err_verbosity;
+	/* Print a single line per error per frame with error events. */
+#define KOMEDA_DEV_PRINT_ERR_EVENTS BIT(0)
+	/* Print a single line per warning per frame with error events. */
+#define KOMEDA_DEV_PRINT_WARN_EVENTS BIT(1)
+	/* Print a single line per info event per frame with error events. */
+#define KOMEDA_DEV_PRINT_INFO_EVENTS BIT(2)
+	/* Dump DRM state on an error or warning event. */
+#define KOMEDA_DEV_PRINT_DUMP_STATE_ON_EVENT BIT(8)
+	/* Disable rate limiting of event prints (normally one per commit) */
+#define KOMEDA_DEV_PRINT_DISABLE_RATELIMIT BIT(12)
 };
 
 static inline bool
@@ -199,6 +226,9 @@ komeda_product_match(struct komeda_dev *mdev, u32 target)
 	return MALIDP_CORE_ID_PRODUCT_ID(mdev->chip.core_id) == target;
 }
 
+typedef const struct komeda_dev_funcs *
+(*komeda_identify_func)(u32 __iomem *reg, struct komeda_chip_info *chip);
+
 const struct komeda_dev_funcs *
 d71_identify(u32 __iomem *reg, struct komeda_chip_info *chip);
 
@@ -207,4 +237,9 @@ void komeda_dev_destroy(struct komeda_dev *mdev);
 
 struct komeda_dev *dev_to_mdev(struct device *dev);
 
+void komeda_print_events(struct komeda_events *evts, struct drm_device *dev);
+
+int komeda_dev_resume(struct komeda_dev *mdev);
+int komeda_dev_suspend(struct komeda_dev *mdev);
+
 #endif /*_KOMEDA_DEV_H_*/
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
index 69ace6f9055d..ea5cd1e17304 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/component.h>
+#include <linux/pm_runtime.h>
 #include <drm/drm_of.h>
 #include "komeda_dev.h"
 #include "komeda_kms.h"
@@ -32,6 +33,12 @@ static void komeda_unbind(struct device *dev)
 		return;
 
 	komeda_kms_detach(mdrv->kms);
+
+	if (pm_runtime_enabled(dev))
+		pm_runtime_disable(dev);
+	else
+		komeda_dev_suspend(mdrv->mdev);
+
 	komeda_dev_destroy(mdrv->mdev);
 
 	dev_set_drvdata(dev, NULL);
@@ -53,6 +60,10 @@ static int komeda_bind(struct device *dev)
 		goto free_mdrv;
 	}
 
+	pm_runtime_enable(dev);
+	if (!pm_runtime_enabled(dev))
+		komeda_dev_resume(mdrv->mdev);
+
 	mdrv->kms = komeda_kms_attach(mdrv->mdev);
 	if (IS_ERR(mdrv->kms)) {
 		err = PTR_ERR(mdrv->kms);
@@ -64,6 +75,11 @@ static int komeda_bind(struct device *dev)
 	return 0;
 
 destroy_mdev:
+	if (pm_runtime_enabled(dev))
+		pm_runtime_disable(dev);
+	else
+		komeda_dev_suspend(mdrv->mdev);
+
 	komeda_dev_destroy(mdrv->mdev);
 
 free_mdrv:
@@ -122,27 +138,63 @@ static int komeda_platform_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct komeda_product_data komeda_products[] = {
-	[MALI_D71] = {
-		.product_id = MALIDP_D71_PRODUCT_ID,
-		.identify = d71_identify,
-	},
-};
-
 static const struct of_device_id komeda_of_match[] = {
-	{ .compatible = "arm,mali-d71", .data = &komeda_products[MALI_D71], },
+	{ .compatible = "arm,mali-d71", .data = d71_identify, },
+	{ .compatible = "arm,mali-d32", .data = d71_identify, },
 	{},
 };
 
 MODULE_DEVICE_TABLE(of, komeda_of_match);
 
+static int komeda_rt_pm_suspend(struct device *dev)
+{
+	struct komeda_drv *mdrv = dev_get_drvdata(dev);
+
+	return komeda_dev_suspend(mdrv->mdev);
+}
+
+static int komeda_rt_pm_resume(struct device *dev)
+{
+	struct komeda_drv *mdrv = dev_get_drvdata(dev);
+
+	return komeda_dev_resume(mdrv->mdev);
+}
+
+static int __maybe_unused komeda_pm_suspend(struct device *dev)
+{
+	struct komeda_drv *mdrv = dev_get_drvdata(dev);
+	int res;
+
+	res = drm_mode_config_helper_suspend(&mdrv->kms->base);
+
+	if (!pm_runtime_status_suspended(dev))
+		komeda_dev_suspend(mdrv->mdev);
+
+	return res;
+}
+
+static int __maybe_unused komeda_pm_resume(struct device *dev)
+{
+	struct komeda_drv *mdrv = dev_get_drvdata(dev);
+
+	if (!pm_runtime_status_suspended(dev))
+		komeda_dev_resume(mdrv->mdev);
+
+	return drm_mode_config_helper_resume(&mdrv->kms->base);
+}
+
+static const struct dev_pm_ops komeda_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(komeda_pm_suspend, komeda_pm_resume)
+	SET_RUNTIME_PM_OPS(komeda_rt_pm_suspend, komeda_rt_pm_resume, NULL)
+};
+
 static struct platform_driver komeda_platform_driver = {
 	.probe	= komeda_platform_probe,
 	.remove	= komeda_platform_remove,
 	.driver	= {
 		.name = "komeda",
 		.of_match_table	= komeda_of_match,
-		.pm = NULL,
+		.pm = &komeda_pm_ops,
 	},
 };
 
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_event.c b/drivers/gpu/drm/arm/display/komeda/komeda_event.c
new file mode 100644
index 000000000000..53f944e66dfc
--- /dev/null
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_event.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * Author: James.Qian.Wang <james.qian.wang@arm.com>
+ *
+ */
+#include <drm/drm_atomic.h>
+#include <drm/drm_print.h>
+
+#include "komeda_dev.h"
+
+struct komeda_str {
+	char *str;
+	u32 sz;
+	u32 len;
+};
+
+/* return 0 on success,  < 0 on no space.
+ */
+__printf(2, 3)
+static int komeda_sprintf(struct komeda_str *str, const char *fmt, ...)
+{
+	va_list args;
+	int num, free_sz;
+	int err;
+
+	free_sz = str->sz - str->len - 1;
+	if (free_sz <= 0)
+		return -ENOSPC;
+
+	va_start(args, fmt);
+
+	num = vsnprintf(str->str + str->len, free_sz, fmt, args);
+
+	va_end(args);
+
+	if (num < free_sz) {
+		str->len += num;
+		err = 0;
+	} else {
+		str->len = str->sz - 1;
+		err = -ENOSPC;
+	}
+
+	return err;
+}
+
+static void evt_sprintf(struct komeda_str *str, u64 evt, const char *msg)
+{
+	if (evt)
+		komeda_sprintf(str, msg);
+}
+
+static void evt_str(struct komeda_str *str, u64 events)
+{
+	if (events == 0ULL) {
+		komeda_sprintf(str, "None");
+		return;
+	}
+
+	evt_sprintf(str, events & KOMEDA_EVENT_VSYNC, "VSYNC|");
+	evt_sprintf(str, events & KOMEDA_EVENT_FLIP, "FLIP|");
+	evt_sprintf(str, events & KOMEDA_EVENT_EOW, "EOW|");
+	evt_sprintf(str, events & KOMEDA_EVENT_MODE, "OP-MODE|");
+
+	evt_sprintf(str, events & KOMEDA_EVENT_URUN, "UNDERRUN|");
+	evt_sprintf(str, events & KOMEDA_EVENT_OVR, "OVERRUN|");
+
+	/* GLB error */
+	evt_sprintf(str, events & KOMEDA_ERR_MERR, "MERR|");
+	evt_sprintf(str, events & KOMEDA_ERR_FRAMETO, "FRAMETO|");
+
+	/* DOU error */
+	evt_sprintf(str, events & KOMEDA_ERR_DRIFTTO, "DRIFTTO|");
+	evt_sprintf(str, events & KOMEDA_ERR_FRAMETO, "FRAMETO|");
+	evt_sprintf(str, events & KOMEDA_ERR_TETO, "TETO|");
+	evt_sprintf(str, events & KOMEDA_ERR_CSCE, "CSCE|");
+
+	/* LPU errors or events */
+	evt_sprintf(str, events & KOMEDA_EVENT_IBSY, "IBSY|");
+	evt_sprintf(str, events & KOMEDA_EVENT_EMPTY, "EMPTY|");
+	evt_sprintf(str, events & KOMEDA_EVENT_FULL, "FULL|");
+	evt_sprintf(str, events & KOMEDA_ERR_AXIE, "AXIE|");
+	evt_sprintf(str, events & KOMEDA_ERR_ACE0, "ACE0|");
+	evt_sprintf(str, events & KOMEDA_ERR_ACE1, "ACE1|");
+	evt_sprintf(str, events & KOMEDA_ERR_ACE2, "ACE2|");
+	evt_sprintf(str, events & KOMEDA_ERR_ACE3, "ACE3|");
+
+	/* LPU TBU errors*/
+	evt_sprintf(str, events & KOMEDA_ERR_TCF, "TCF|");
+	evt_sprintf(str, events & KOMEDA_ERR_TTNG, "TTNG|");
+	evt_sprintf(str, events & KOMEDA_ERR_TITR, "TITR|");
+	evt_sprintf(str, events & KOMEDA_ERR_TEMR, "TEMR|");
+	evt_sprintf(str, events & KOMEDA_ERR_TTF, "TTF|");
+
+	/* CU errors*/
+	evt_sprintf(str, events & KOMEDA_ERR_CPE, "COPROC|");
+	evt_sprintf(str, events & KOMEDA_ERR_ZME, "ZME|");
+	evt_sprintf(str, events & KOMEDA_ERR_CFGE, "CFGE|");
+	evt_sprintf(str, events & KOMEDA_ERR_TEMR, "TEMR|");
+
+	if (str->len > 0 && (str->str[str->len - 1] == '|')) {
+		str->str[str->len - 1] = 0;
+		str->len--;
+	}
+}
+
+static bool is_new_frame(struct komeda_events *a)
+{
+	return (a->pipes[0] | a->pipes[1]) &
+	       (KOMEDA_EVENT_FLIP | KOMEDA_EVENT_EOW);
+}
+
+void komeda_print_events(struct komeda_events *evts, struct drm_device *dev)
+{
+	u64 print_evts = 0;
+	static bool en_print = true;
+	struct komeda_dev *mdev = dev->dev_private;
+	u16 const err_verbosity = mdev->err_verbosity;
+	u64 evts_mask = evts->global | evts->pipes[0] | evts->pipes[1];
+
+	/* reduce the same msg print, only print the first evt for one frame */
+	if (evts->global || is_new_frame(evts))
+		en_print = true;
+	if (!(err_verbosity & KOMEDA_DEV_PRINT_DISABLE_RATELIMIT) && !en_print)
+		return;
+
+	if (err_verbosity & KOMEDA_DEV_PRINT_ERR_EVENTS)
+		print_evts |= KOMEDA_ERR_EVENTS;
+	if (err_verbosity & KOMEDA_DEV_PRINT_WARN_EVENTS)
+		print_evts |= KOMEDA_WARN_EVENTS;
+	if (err_verbosity & KOMEDA_DEV_PRINT_INFO_EVENTS)
+		print_evts |= KOMEDA_INFO_EVENTS;
+
+	if (evts_mask & print_evts) {
+		char msg[256];
+		struct komeda_str str;
+		struct drm_printer p = drm_info_printer(dev->dev);
+
+		str.str = msg;
+		str.sz  = sizeof(msg);
+		str.len = 0;
+
+		komeda_sprintf(&str, "gcu: ");
+		evt_str(&str, evts->global);
+		komeda_sprintf(&str, ", pipes[0]: ");
+		evt_str(&str, evts->pipes[0]);
+		komeda_sprintf(&str, ", pipes[1]: ");
+		evt_str(&str, evts->pipes[1]);
+
+		DRM_ERROR("err detect: %s\n", msg);
+		if ((err_verbosity & KOMEDA_DEV_PRINT_DUMP_STATE_ON_EVENT) &&
+		    (evts_mask & (KOMEDA_ERR_EVENTS | KOMEDA_WARN_EVENTS)))
+			drm_state_dump(dev, &p);
+
+		en_print = false;
+	}
+}
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
index ae274902ff92..442d4656150a 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
@@ -48,6 +48,8 @@ static irqreturn_t komeda_kms_irq_handler(int irq, void *data)
 	memset(&evts, 0, sizeof(evts));
 	status = mdev->funcs->irq_handler(mdev, &evts);
 
+	komeda_print_events(&evts, drm);
+
 	/* Notify the crtc to handle the events */
 	for (i = 0; i < kms->n_crtcs; i++)
 		komeda_crtc_handle_event(&kms->crtcs[i], &evts);
@@ -306,10 +308,6 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev)
 	if (err)
 		goto free_component_binding;
 
-	err = mdev->funcs->enable_irq(mdev);
-	if (err)
-		goto free_component_binding;
-
 	drm->irq_enabled = true;
 
 	drm_kms_helper_poll_init(drm);
@@ -323,7 +321,6 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev)
 free_interrupts:
 	drm_kms_helper_poll_fini(drm);
 	drm->irq_enabled = false;
-	mdev->funcs->disable_irq(mdev);
 free_component_binding:
 	component_unbind_all(mdev->dev, drm);
 cleanup_mode_config:
@@ -345,7 +342,6 @@ void komeda_kms_detach(struct komeda_kms_dev *kms)
 	drm_kms_helper_poll_fini(drm);
 	drm_atomic_helper_shutdown(drm);
 	drm->irq_enabled = false;
-	mdev->funcs->disable_irq(mdev);
 	component_unbind_all(mdev->dev, drm);
 	drm_mode_config_cleanup(drm);
 	komeda_kms_cleanup_private_objs(kms);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.h b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
index 45c498e15e7a..456f3c435719 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
@@ -166,6 +166,8 @@ static inline bool has_flip_h(u32 rot)
 		return !!(rotation & DRM_MODE_REFLECT_X);
 }
 
+void komeda_crtc_get_color_config(struct drm_crtc_state *crtc_st,
+				  u32 *color_depths, u32 *color_formats);
 unsigned long komeda_crtc_get_aclk(struct komeda_crtc_state *kcrtc_st);
 
 int komeda_kms_setup_crtcs(struct komeda_kms_dev *kms, struct komeda_dev *mdev);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h
index cf5bea578ad9..ac8725e24853 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h
@@ -11,6 +11,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include "malidp_utils.h"
+#include "komeda_color_mgmt.h"
 
 #define KOMEDA_MAX_PIPELINES		2
 #define KOMEDA_PIPELINE_MAX_LAYERS	4
@@ -227,6 +228,8 @@ struct komeda_layer {
 	/* accepted h/v input range before rotation */
 	struct malidp_range hsize_in, vsize_in;
 	u32 layer_type; /* RICH, SIMPLE or WB */
+	u32 line_sz;
+	u32 yuv_line_sz; /* maximum line size for YUV422 and YUV420 */
 	u32 supported_rots;
 	/* komeda supports layer split which splits a whole image to two parts
 	 * left and right and handle them by two individual layer processors
@@ -323,7 +326,10 @@ struct komeda_improc {
 
 struct komeda_improc_state {
 	struct komeda_component_state base;
+	u8 color_format, color_depth;
 	u16 hsize, vsize;
+	u32 fgamma_coeffs[KOMEDA_N_GAMMA_COEFFS];
+	u32 ctm_coeffs[KOMEDA_N_CTM_COEFFS];
 };
 
 /* display timing controller */
@@ -389,6 +395,18 @@ struct komeda_pipeline {
 	int id;
 	/** @avail_comps: available components mask of pipeline */
 	u32 avail_comps;
+	/**
+	 * @standalone_disabled_comps:
+	 *
+	 * When disable the pipeline, some components can not be disabled
+	 * together with others, but need a sparated and standalone disable.
+	 * The standalone_disabled_comps are the components which need to be
+	 * disabled standalone, and this concept also introduce concept of
+	 * two phase.
+	 * phase 1: for disabling the common components.
+	 * phase 2: for disabling the standalong_disabled_comps.
+	 */
+	u32 standalone_disabled_comps;
 	/** @n_layers: the number of layer on @layers */
 	int n_layers;
 	/** @layers: the pipeline layers */
@@ -535,7 +553,7 @@ int komeda_release_unclaimed_resources(struct komeda_pipeline *pipe,
 struct komeda_pipeline_state *
 komeda_pipeline_get_old_state(struct komeda_pipeline *pipe,
 			      struct drm_atomic_state *state);
-void komeda_pipeline_disable(struct komeda_pipeline *pipe,
+bool komeda_pipeline_disable(struct komeda_pipeline *pipe,
 			     struct drm_atomic_state *old_state);
 void komeda_pipeline_update(struct komeda_pipeline *pipe,
 			    struct drm_atomic_state *old_state);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
index b848270e0a1f..8f32ae7c25d0 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
@@ -285,6 +285,7 @@ komeda_layer_check_cfg(struct komeda_layer *layer,
 		       struct komeda_data_flow_cfg *dflow)
 {
 	u32 src_x, src_y, src_w, src_h;
+	u32 line_sz, max_line_sz;
 
 	if (!komeda_fb_is_layer_supported(kfb, layer->layer_type, dflow->rot))
 		return -EINVAL;
@@ -314,6 +315,22 @@ komeda_layer_check_cfg(struct komeda_layer *layer,
 		return -EINVAL;
 	}
 
+	if (drm_rotation_90_or_270(dflow->rot))
+		line_sz = dflow->in_h;
+	else
+		line_sz = dflow->in_w;
+
+	if (kfb->base.format->hsub > 1)
+		max_line_sz = layer->yuv_line_sz;
+	else
+		max_line_sz = layer->line_sz;
+
+	if (line_sz > max_line_sz) {
+		DRM_DEBUG_ATOMIC("Required line_sz: %d exceeds the max size %d\n",
+				 line_sz, max_line_sz);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -743,6 +760,7 @@ komeda_improc_validate(struct komeda_improc *improc,
 		       struct komeda_data_flow_cfg *dflow)
 {
 	struct drm_crtc *crtc = kcrtc_st->base.crtc;
+	struct drm_crtc_state *crtc_st = &kcrtc_st->base;
 	struct komeda_component_state *c_st;
 	struct komeda_improc_state *st;
 
@@ -756,6 +774,40 @@ komeda_improc_validate(struct komeda_improc *improc,
 	st->hsize = dflow->in_w;
 	st->vsize = dflow->in_h;
 
+	if (drm_atomic_crtc_needs_modeset(crtc_st)) {
+		u32 output_depths, output_formats;
+		u32 avail_depths, avail_formats;
+
+		komeda_crtc_get_color_config(crtc_st, &output_depths,
+					     &output_formats);
+
+		avail_depths = output_depths & improc->supported_color_depths;
+		if (avail_depths == 0) {
+			DRM_DEBUG_ATOMIC("No available color depths, conn depths: 0x%x & display: 0x%x\n",
+					 output_depths,
+					 improc->supported_color_depths);
+			return -EINVAL;
+		}
+
+		avail_formats = output_formats &
+				improc->supported_color_formats;
+		if (!avail_formats) {
+			DRM_DEBUG_ATOMIC("No available color_formats, conn formats 0x%x & display: 0x%x\n",
+					 output_formats,
+					 improc->supported_color_formats);
+			return -EINVAL;
+		}
+
+		st->color_depth = __fls(avail_depths);
+		st->color_format = BIT(__ffs(avail_formats));
+	}
+
+	if (kcrtc_st->base.color_mgmt_changed) {
+		drm_lut_to_fgamma_coeffs(kcrtc_st->base.gamma_lut,
+					 st->fgamma_coeffs);
+		drm_ctm_to_coeffs(kcrtc_st->base.ctm, st->ctm_coeffs);
+	}
+
 	komeda_component_add_input(&st->base, &dflow->input, 0);
 	komeda_component_set_output(&dflow->input, &improc->base, 0);
 
@@ -1218,7 +1270,17 @@ int komeda_release_unclaimed_resources(struct komeda_pipeline *pipe,
 	return 0;
 }
 
-void komeda_pipeline_disable(struct komeda_pipeline *pipe,
+/* Since standalong disabled components must be disabled separately and in the
+ * last, So a complete disable operation may needs to call pipeline_disable
+ * twice (two phase disabling).
+ * Phase 1: disable the common components, flush it.
+ * Phase 2: disable the standalone disabled components, flush it.
+ *
+ * RETURNS:
+ * true: disable is not complete, needs a phase 2 disable.
+ * false: disable is complete.
+ */
+bool komeda_pipeline_disable(struct komeda_pipeline *pipe,
 			     struct drm_atomic_state *old_state)
 {
 	struct komeda_pipeline_state *old;
@@ -1228,9 +1290,14 @@ void komeda_pipeline_disable(struct komeda_pipeline *pipe,
 
 	old = komeda_pipeline_get_old_state(pipe, old_state);
 
-	disabling_comps = old->active_comps;
-	DRM_DEBUG_ATOMIC("PIPE%d: disabling_comps: 0x%x.\n",
-			 pipe->id, disabling_comps);
+	disabling_comps = old->active_comps &
+			  (~pipe->standalone_disabled_comps);
+	if (!disabling_comps)
+		disabling_comps = old->active_comps &
+				  pipe->standalone_disabled_comps;
+
+	DRM_DEBUG_ATOMIC("PIPE%d: active_comps: 0x%x, disabling_comps: 0x%x.\n",
+			 pipe->id, old->active_comps, disabling_comps);
 
 	dp_for_each_set_bit(id, disabling_comps) {
 		c = komeda_pipeline_get_component(pipe, id);
@@ -1248,6 +1315,13 @@ void komeda_pipeline_disable(struct komeda_pipeline *pipe,
 
 		c->funcs->disable(c);
 	}
+
+	/* Update the pipeline state, if there are components that are still
+	 * active, return true for calling the phase 2 disable.
+	 */
+	old->active_comps &= ~disabling_comps;
+
+	return old->active_comps ? true : false;
 }
 
 void komeda_pipeline_update(struct komeda_pipeline *pipe,
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c
index b72840c06ab7..e465cc4879c9 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c
@@ -141,6 +141,7 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms,
 	struct komeda_dev *mdev = kms->base.dev_private;
 	struct komeda_wb_connector *kwb_conn;
 	struct drm_writeback_connector *wb_conn;
+	struct drm_display_info *info;
 	u32 *formats, n_formats = 0;
 	int err;
 
@@ -172,6 +173,10 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms,
 
 	drm_connector_helper_add(&wb_conn->base, &komeda_wb_conn_helper_funcs);
 
+	info = &kwb_conn->base.base.display_info;
+	info->bpc = __fls(kcrtc->master->improc->supported_color_depths);
+	info->color_formats = kcrtc->master->improc->supported_color_formats;
+
 	kcrtc->wb_conn = kwb_conn;
 
 	return 0;
diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index 333b88a5efb0..37d92a06318e 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -368,7 +368,7 @@ malidp_verify_afbc_framebuffer(struct drm_device *dev, struct drm_file *file,
 	return false;
 }
 
-struct drm_framebuffer *
+static struct drm_framebuffer *
 malidp_fb_create(struct drm_device *dev, struct drm_file *file,
 		 const struct drm_mode_fb_cmd2 *mode_cmd)
 {
@@ -491,9 +491,9 @@ void malidp_error(struct malidp_drm *malidp,
 	spin_unlock_irqrestore(&malidp->errors_lock, irqflags);
 }
 
-void malidp_error_stats_dump(const char *prefix,
-			     struct malidp_error_stats error_stats,
-			     struct seq_file *m)
+static void malidp_error_stats_dump(const char *prefix,
+				    struct malidp_error_stats error_stats,
+				    struct seq_file *m)
 {
 	seq_printf(m, "[%s] num_errors : %d\n", prefix,
 		   error_stats.num_errors);
@@ -665,7 +665,7 @@ static ssize_t core_id_show(struct device *dev, struct device_attribute *attr,
 	return snprintf(buf, PAGE_SIZE, "%08x\n", malidp->core_id);
 }
 
-DEVICE_ATTR_RO(core_id);
+static DEVICE_ATTR_RO(core_id);
 
 static int malidp_init_sysfs(struct device *dev)
 {
@@ -817,6 +817,12 @@ static int malidp_bind(struct device *dev)
 
 	malidp->core_id = version;
 
+	ret = of_property_read_u32(dev->of_node,
+					"arm,malidp-arqos-value",
+					&hwdev->arqos_value);
+	if (ret)
+		hwdev->arqos_value = 0x0;
+
 	/* set the number of lines used for output of RGB data */
 	ret = of_property_read_u8_array(dev->of_node,
 					"arm,malidp-output-port-lines",
diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c
index bd8265f02e0b..ca570b135478 100644
--- a/drivers/gpu/drm/arm/malidp_hw.c
+++ b/drivers/gpu/drm/arm/malidp_hw.c
@@ -379,6 +379,15 @@ static void malidp500_modeset(struct malidp_hw_device *hwdev, struct videomode *
 		malidp_hw_setbits(hwdev, MALIDP_DISP_FUNC_ILACED, MALIDP_DE_DISPLAY_FUNC);
 	else
 		malidp_hw_clearbits(hwdev, MALIDP_DISP_FUNC_ILACED, MALIDP_DE_DISPLAY_FUNC);
+
+	/*
+	 * Program the RQoS register to avoid high resolutions flicker
+	 * issue on the LS1028A.
+	 */
+	if (hwdev->arqos_value) {
+		val = hwdev->arqos_value;
+		malidp_hw_setbits(hwdev, val, MALIDP500_RQOS_QUALITY);
+	}
 }
 
 int malidp_format_get_bpp(u32 fmt)
diff --git a/drivers/gpu/drm/arm/malidp_hw.h b/drivers/gpu/drm/arm/malidp_hw.h
index 968a65eed371..e4c36bc90bda 100644
--- a/drivers/gpu/drm/arm/malidp_hw.h
+++ b/drivers/gpu/drm/arm/malidp_hw.h
@@ -251,6 +251,9 @@ struct malidp_hw_device {
 
 	/* size of memory used for rotating layers, up to two banks available */
 	u32 rotation_memory[2];
+
+	/* priority level of RQOS register used for driven the ARQOS signal */
+	u32 arqos_value;
 };
 
 static inline u32 malidp_hw_read(struct malidp_hw_device *hwdev, u32 reg)
diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c
index 875a3a9eabfa..7d0e7b031e44 100644
--- a/drivers/gpu/drm/arm/malidp_mw.c
+++ b/drivers/gpu/drm/arm/malidp_mw.c
@@ -56,7 +56,7 @@ malidp_mw_connector_mode_valid(struct drm_connector *connector,
 	return MODE_OK;
 }
 
-const struct drm_connector_helper_funcs malidp_mw_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs malidp_mw_connector_helper_funcs = {
 	.get_modes = malidp_mw_connector_get_modes,
 	.mode_valid = malidp_mw_connector_mode_valid,
 };
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
index 3c70a53813bf..37715cc6064e 100644
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -512,7 +512,7 @@ static int malidp_de_plane_check(struct drm_plane *plane,
 	int i, ret;
 	unsigned int block_w, block_h;
 
-	if (!state->crtc || !state->fb)
+	if (!state->crtc || WARN_ON(!state->fb))
 		return 0;
 
 	fb = state->fb;
diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h
index 993031542fa1..514c50dcb74d 100644
--- a/drivers/gpu/drm/arm/malidp_regs.h
+++ b/drivers/gpu/drm/arm/malidp_regs.h
@@ -210,6 +210,16 @@
 #define MALIDP500_CONFIG_VALID		0x00f00
 #define MALIDP500_CONFIG_ID		0x00fd4
 
+/*
+ * The quality of service (QoS) register on the DP500. RQOS register values
+ * are driven by the ARQOS signal, using AXI transacations, dependent on the
+ * FIFO input level.
+ * The RQOS register can also set QoS levels for:
+ *    - RED_ARQOS   @ A 4-bit signal value for close to underflow conditions
+ *    - GREEN_ARQOS @ A 4-bit signal value for normal conditions
+ */
+#define MALIDP500_RQOS_QUALITY          0x00500
+
 /* register offsets and bits specific to DP550/DP650 */
 #define MALIDP550_ADDR_SPACE_SIZE	0x10000
 #define MALIDP550_DE_CONTROL		0x00010
diff --git a/drivers/gpu/drm/armada/armada_fbdev.c b/drivers/gpu/drm/armada/armada_fbdev.c
index 090cc0d699ae..ac8a78bfda03 100644
--- a/drivers/gpu/drm/armada/armada_fbdev.c
+++ b/drivers/gpu/drm/armada/armada_fbdev.c
@@ -16,7 +16,7 @@
 #include "armada_fb.h"
 #include "armada_gem.h"
 
-static /*const*/ struct fb_ops armada_fb_ops = {
+static const struct fb_ops armada_fb_ops = {
 	.owner		= THIS_MODULE,
 	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_fillrect	= drm_fb_helper_cfb_fillrect,
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 93cf8b8bfcff..976685f2939e 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -461,16 +461,6 @@ static void armada_gem_prime_unmap_dma_buf(struct dma_buf_attachment *attach,
 	kfree(sgt);
 }
 
-static void *armada_gem_dmabuf_no_kmap(struct dma_buf *buf, unsigned long n)
-{
-	return NULL;
-}
-
-static void
-armada_gem_dmabuf_no_kunmap(struct dma_buf *buf, unsigned long n, void *addr)
-{
-}
-
 static int
 armada_gem_dmabuf_mmap(struct dma_buf *buf, struct vm_area_struct *vma)
 {
@@ -481,8 +471,6 @@ static const struct dma_buf_ops armada_gem_prime_dmabuf_ops = {
 	.map_dma_buf	= armada_gem_prime_map_dma_buf,
 	.unmap_dma_buf	= armada_gem_prime_unmap_dma_buf,
 	.release	= drm_gem_dmabuf_release,
-	.map		= armada_gem_dmabuf_no_kmap,
-	.unmap		= armada_gem_dmabuf_no_kunmap,
 	.mmap		= armada_gem_dmabuf_mmap,
 };
 
diff --git a/drivers/gpu/drm/ast/Kconfig b/drivers/gpu/drm/ast/Kconfig
index 829620d5326c..fbcf2f45cef5 100644
--- a/drivers/gpu/drm/ast/Kconfig
+++ b/drivers/gpu/drm/ast/Kconfig
@@ -4,6 +4,8 @@ config DRM_AST
 	depends on DRM && PCI && MMU
 	select DRM_KMS_HELPER
 	select DRM_VRAM_HELPER
+	select DRM_TTM
+	select DRM_TTM_HELPER
 	help
 	 Say yes for experimental AST GPU driver. Do not enable
 	 this driver without having a working -modesetting,
diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index 6ed6ff49efc0..30aa73a5d9b7 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -33,9 +33,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_gem_vram_helper.h>
-#include <drm/drm_pci.h>
 #include <drm/drm_probe_helper.h>
-#include <drm/drm_vram_mm_helper.h>
 
 #include "ast_drv.h"
 
@@ -87,9 +85,42 @@ static void ast_kick_out_firmware_fb(struct pci_dev *pdev)
 
 static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
+	struct drm_device *dev;
+	int ret;
+
 	ast_kick_out_firmware_fb(pdev);
 
-	return drm_get_pci_dev(pdev, ent, &driver);
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	dev = drm_dev_alloc(&driver, &pdev->dev);
+	if (IS_ERR(dev)) {
+		ret = PTR_ERR(dev);
+		goto err_pci_disable_device;
+	}
+
+	dev->pdev = pdev;
+	pci_set_drvdata(pdev, dev);
+
+	ret = ast_driver_load(dev, ent->driver_data);
+	if (ret)
+		goto err_drm_dev_put;
+
+	ret = drm_dev_register(dev, ent->driver_data);
+	if (ret)
+		goto err_ast_driver_unload;
+
+	return 0;
+
+err_ast_driver_unload:
+	ast_driver_unload(dev);
+err_drm_dev_put:
+	drm_dev_put(dev);
+err_pci_disable_device:
+	pci_disable_device(pdev);
+	return ret;
+
 }
 
 static void
@@ -97,17 +128,19 @@ ast_pci_remove(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
 
-	drm_put_dev(dev);
+	drm_dev_unregister(dev);
+	ast_driver_unload(dev);
+	drm_dev_put(dev);
 }
 
-
-
 static int ast_drm_freeze(struct drm_device *dev)
 {
-	drm_kms_helper_poll_disable(dev);
-	pci_save_state(dev->pdev);
-	drm_fb_helper_set_suspend_unlocked(dev->fb_helper, true);
+	int error;
 
+	error = drm_mode_config_helper_suspend(dev);
+	if (error)
+		return error;
+	pci_save_state(dev->pdev);
 	return 0;
 }
 
@@ -115,11 +148,7 @@ static int ast_drm_thaw(struct drm_device *dev)
 {
 	ast_post_gpu(dev);
 
-	drm_mode_config_reset(dev);
-	drm_helper_resume_force_mode(dev);
-	drm_fb_helper_set_suspend_unlocked(dev->fb_helper, false);
-
-	return 0;
+	return drm_mode_config_helper_resume(dev);
 }
 
 static int ast_drm_resume(struct drm_device *dev)
@@ -132,8 +161,6 @@ static int ast_drm_resume(struct drm_device *dev)
 	ret = ast_drm_thaw(dev);
 	if (ret)
 		return ret;
-
-	drm_kms_helper_poll_enable(dev);
 	return 0;
 }
 
@@ -151,6 +178,7 @@ static int ast_pm_suspend(struct device *dev)
 	pci_set_power_state(pdev, PCI_D3hot);
 	return 0;
 }
+
 static int ast_pm_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -166,7 +194,6 @@ static int ast_pm_freeze(struct device *dev)
 	if (!ddev || !ddev->dev_private)
 		return -ENODEV;
 	return ast_drm_freeze(ddev);
-
 }
 
 static int ast_pm_thaw(struct device *dev)
@@ -201,16 +228,12 @@ static struct pci_driver ast_pci_driver = {
 	.driver.pm = &ast_pm_ops,
 };
 
-static const struct file_operations ast_fops = {
-	.owner = THIS_MODULE,
-	DRM_VRAM_MM_FILE_OPERATIONS
-};
+DEFINE_DRM_GEM_FOPS(ast_fops);
 
 static struct drm_driver driver = {
-	.driver_features = DRIVER_MODESET | DRIVER_GEM,
-
-	.load = ast_driver_load,
-	.unload = ast_driver_unload,
+	.driver_features = DRIVER_ATOMIC |
+			   DRIVER_GEM |
+			   DRIVER_MODESET,
 
 	.fops = &ast_fops,
 	.name = DRIVER_NAME,
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 244cc7c382af..f5d8780776ae 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -82,6 +82,25 @@ enum ast_tx_chip {
 #define AST_DRAM_4Gx16   7
 #define AST_DRAM_8Gx16   8
 
+
+#define AST_MAX_HWC_WIDTH	64
+#define AST_MAX_HWC_HEIGHT	64
+
+#define AST_HWC_SIZE		(AST_MAX_HWC_WIDTH * AST_MAX_HWC_HEIGHT * 2)
+#define AST_HWC_SIGNATURE_SIZE	32
+
+#define AST_DEFAULT_HWC_NUM	2
+
+/* define for signature structure */
+#define AST_HWC_SIGNATURE_CHECKSUM	0x00
+#define AST_HWC_SIGNATURE_SizeX		0x04
+#define AST_HWC_SIGNATURE_SizeY		0x08
+#define AST_HWC_SIGNATURE_X		0x0C
+#define AST_HWC_SIGNATURE_Y		0x10
+#define AST_HWC_SIGNATURE_HOTSPOTX	0x14
+#define AST_HWC_SIGNATURE_HOTSPOTY	0x18
+
+
 struct ast_private {
 	struct drm_device *dev;
 
@@ -97,8 +116,14 @@ struct ast_private {
 
 	int fb_mtrr;
 
-	struct drm_gem_object *cursor_cache;
-	int next_cursor;
+	struct {
+		struct drm_gem_vram_object *gbo[AST_DEFAULT_HWC_NUM];
+		unsigned int next_index;
+	} cursor;
+
+	struct drm_plane primary_plane;
+	struct drm_plane cursor_plane;
+
 	bool support_wide_screen;
 	enum {
 		ast_use_p2a,
@@ -115,8 +140,6 @@ struct ast_private {
 int ast_driver_load(struct drm_device *dev, unsigned long flags);
 void ast_driver_unload(struct drm_device *dev);
 
-struct ast_gem_object;
-
 #define AST_IO_AR_PORT_WRITE		(0x40)
 #define AST_IO_MISC_PORT_WRITE		(0x42)
 #define AST_IO_VGA_ENABLE_PORT		(0x43)
@@ -199,23 +222,6 @@ static inline void ast_open_key(struct ast_private *ast)
 
 #define AST_VIDMEM_DEFAULT_SIZE AST_VIDMEM_SIZE_8M
 
-#define AST_MAX_HWC_WIDTH 64
-#define AST_MAX_HWC_HEIGHT 64
-
-#define AST_HWC_SIZE                (AST_MAX_HWC_WIDTH*AST_MAX_HWC_HEIGHT*2)
-#define AST_HWC_SIGNATURE_SIZE      32
-
-#define AST_DEFAULT_HWC_NUM 2
-/* define for signature structure */
-#define AST_HWC_SIGNATURE_CHECKSUM  0x00
-#define AST_HWC_SIGNATURE_SizeX     0x04
-#define AST_HWC_SIGNATURE_SizeY     0x08
-#define AST_HWC_SIGNATURE_X         0x0C
-#define AST_HWC_SIGNATURE_Y         0x10
-#define AST_HWC_SIGNATURE_HOTSPOTX  0x14
-#define AST_HWC_SIGNATURE_HOTSPOTY  0x18
-
-
 struct ast_i2c_chan {
 	struct i2c_adapter adapter;
 	struct drm_device *dev;
@@ -275,6 +281,17 @@ struct ast_vbios_mode_info {
 	const struct ast_vbios_enhtable *enh_table;
 };
 
+struct ast_crtc_state {
+	struct drm_crtc_state base;
+
+	/* Last known format of primary plane */
+	const struct drm_format_info *format;
+
+	struct ast_vbios_mode_info vbios_mode_info;
+};
+
+#define to_ast_crtc_state(state) container_of(state, struct ast_crtc_state, base)
+
 extern int ast_mode_init(struct drm_device *dev);
 extern void ast_mode_fini(struct drm_device *dev);
 
@@ -284,10 +301,6 @@ extern void ast_mode_fini(struct drm_device *dev);
 int ast_mm_init(struct ast_private *ast);
 void ast_mm_fini(struct ast_private *ast);
 
-int ast_gem_create(struct drm_device *dev,
-		   u32 size, bool iskernel,
-		   struct drm_gem_object **obj);
-
 /* ast post */
 void ast_enable_vga(struct drm_device *dev);
 void ast_enable_mmio(struct drm_device *dev);
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 50de8e47659c..b79f484e9bd2 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -28,12 +28,12 @@
 
 #include <linux/pci.h>
 
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_gem_vram_helper.h>
-#include <drm/drm_vram_mm_helper.h>
 
 #include "ast_drv.h"
 
@@ -388,8 +388,33 @@ static int ast_get_dram_info(struct drm_device *dev)
 	return 0;
 }
 
+enum drm_mode_status ast_mode_config_mode_valid(struct drm_device *dev,
+						const struct drm_display_mode *mode)
+{
+	static const unsigned long max_bpp = 4; /* DRM_FORMAT_XRGBA8888 */
+
+	struct ast_private *ast = dev->dev_private;
+	unsigned long fbsize, fbpages, max_fbpages;
+
+	/* To support double buffering, a framebuffer may not
+	 * consume more than half of the available VRAM.
+	 */
+	max_fbpages = (ast->vram_size / 2) >> PAGE_SHIFT;
+
+	fbsize = mode->hdisplay * mode->vdisplay * max_bpp;
+	fbpages = DIV_ROUND_UP(fbsize, PAGE_SIZE);
+
+	if (fbpages > max_fbpages)
+		return MODE_MEM;
+
+	return MODE_OK;
+}
+
 static const struct drm_mode_config_funcs ast_mode_funcs = {
-	.fb_create = drm_gem_fb_create
+	.fb_create = drm_gem_fb_create,
+	.mode_valid = ast_mode_config_mode_valid,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
 };
 
 static u32 ast_get_vram_info(struct drm_device *dev)
@@ -507,6 +532,8 @@ int ast_driver_load(struct drm_device *dev, unsigned long flags)
 	if (ret)
 		goto out_free;
 
+	drm_mode_config_reset(dev);
+
 	ret = drm_fbdev_generic_setup(dev, 32);
 	if (ret)
 		goto out_free;
@@ -536,27 +563,3 @@ void ast_driver_unload(struct drm_device *dev)
 	pci_iounmap(dev->pdev, ast->regs);
 	kfree(ast);
 }
-
-int ast_gem_create(struct drm_device *dev,
-		   u32 size, bool iskernel,
-		   struct drm_gem_object **obj)
-{
-	struct drm_gem_vram_object *gbo;
-	int ret;
-
-	*obj = NULL;
-
-	size = roundup(size, PAGE_SIZE);
-	if (size == 0)
-		return -EINVAL;
-
-	gbo = drm_gem_vram_create(dev, &dev->vram_mm->bdev, size, 0, false);
-	if (IS_ERR(gbo)) {
-		ret = PTR_ERR(gbo);
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("failed to allocate GEM object\n");
-		return ret;
-	}
-	*obj = &gbo->bo.base;
-	return 0;
-}
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index d349c721501c..34608f0499eb 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -31,6 +31,9 @@
 #include <linux/export.h>
 #include <linux/pci.h>
 
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_atomic_state_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fourcc.h>
@@ -43,11 +46,14 @@
 
 static struct ast_i2c_chan *ast_i2c_create(struct drm_device *dev);
 static void ast_i2c_destroy(struct ast_i2c_chan *i2c);
-static int ast_cursor_set(struct drm_crtc *crtc,
-			  struct drm_file *file_priv,
-			  uint32_t handle,
-			  uint32_t width,
-			  uint32_t height);
+static int ast_cursor_move(struct drm_crtc *crtc,
+			   int x, int y);
+
+
+static u32 copy_cursor_image(u8 *src, u8 *dst, int width, int height);
+static int ast_cursor_update(void *dst, void *src, unsigned int width,
+			     unsigned int height);
+static void ast_cursor_set_base(struct ast_private *ast, u64 address);
 static int ast_cursor_move(struct drm_crtc *crtc,
 			   int x, int y);
 
@@ -65,9 +71,8 @@ static inline void ast_load_palette_index(struct ast_private *ast,
 	ast_io_read8(ast, AST_IO_SEQ_PORT);
 }
 
-static void ast_crtc_load_lut(struct drm_crtc *crtc)
+static void ast_crtc_load_lut(struct ast_private *ast, struct drm_crtc *crtc)
 {
-	struct ast_private *ast = crtc->dev->dev_private;
 	u16 *r, *g, *b;
 	int i;
 
@@ -82,36 +87,32 @@ static void ast_crtc_load_lut(struct drm_crtc *crtc)
 		ast_load_palette_index(ast, i, *r++ >> 8, *g++ >> 8, *b++ >> 8);
 }
 
-static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mode *mode,
+static bool ast_get_vbios_mode_info(const struct drm_format_info *format,
+				    const struct drm_display_mode *mode,
 				    struct drm_display_mode *adjusted_mode,
 				    struct ast_vbios_mode_info *vbios_mode)
 {
-	struct ast_private *ast = crtc->dev->dev_private;
-	const struct drm_framebuffer *fb = crtc->primary->fb;
-	u32 refresh_rate_index = 0, mode_id, color_index, refresh_rate;
+	u32 refresh_rate_index = 0, refresh_rate;
 	const struct ast_vbios_enhtable *best = NULL;
 	u32 hborder, vborder;
 	bool check_sync;
 
-	switch (fb->format->cpp[0] * 8) {
+	switch (format->cpp[0] * 8) {
 	case 8:
 		vbios_mode->std_table = &vbios_stdtable[VGAModeIndex];
-		color_index = VGAModeIndex - 1;
 		break;
 	case 16:
 		vbios_mode->std_table = &vbios_stdtable[HiCModeIndex];
-		color_index = HiCModeIndex;
 		break;
 	case 24:
 	case 32:
 		vbios_mode->std_table = &vbios_stdtable[TrueCModeIndex];
-		color_index = TrueCModeIndex;
 		break;
 	default:
 		return false;
 	}
 
-	switch (crtc->mode.crtc_hdisplay) {
+	switch (mode->crtc_hdisplay) {
 	case 640:
 		vbios_mode->enh_table = &res_640x480[refresh_rate_index];
 		break;
@@ -122,7 +123,7 @@ static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mo
 		vbios_mode->enh_table = &res_1024x768[refresh_rate_index];
 		break;
 	case 1280:
-		if (crtc->mode.crtc_vdisplay == 800)
+		if (mode->crtc_vdisplay == 800)
 			vbios_mode->enh_table = &res_1280x800[refresh_rate_index];
 		else
 			vbios_mode->enh_table = &res_1280x1024[refresh_rate_index];
@@ -134,7 +135,7 @@ static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mo
 		vbios_mode->enh_table = &res_1440x900[refresh_rate_index];
 		break;
 	case 1600:
-		if (crtc->mode.crtc_vdisplay == 900)
+		if (mode->crtc_vdisplay == 900)
 			vbios_mode->enh_table = &res_1600x900[refresh_rate_index];
 		else
 			vbios_mode->enh_table = &res_1600x1200[refresh_rate_index];
@@ -143,7 +144,7 @@ static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mo
 		vbios_mode->enh_table = &res_1680x1050[refresh_rate_index];
 		break;
 	case 1920:
-		if (crtc->mode.crtc_vdisplay == 1080)
+		if (mode->crtc_vdisplay == 1080)
 			vbios_mode->enh_table = &res_1920x1080[refresh_rate_index];
 		else
 			vbios_mode->enh_table = &res_1920x1200[refresh_rate_index];
@@ -154,7 +155,8 @@ static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mo
 
 	refresh_rate = drm_mode_vrefresh(mode);
 	check_sync = vbios_mode->enh_table->flags & WideScreenMode;
-	do {
+
+	while (1) {
 		const struct ast_vbios_enhtable *loop = vbios_mode->enh_table;
 
 		while (loop->refresh_rate != 0xff) {
@@ -178,7 +180,8 @@ static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mo
 		if (best || !check_sync)
 			break;
 		check_sync = 0;
-	} while (1);
+	}
+
 	if (best)
 		vbios_mode->enh_table = best;
 
@@ -203,38 +206,67 @@ static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mo
 					 vbios_mode->enh_table->vfp +
 					 vbios_mode->enh_table->vsync);
 
-	refresh_rate_index = vbios_mode->enh_table->refresh_rate_index;
-	mode_id = vbios_mode->enh_table->mode_id;
+	return true;
+}
 
-	if (ast->chip == AST1180) {
-		/* TODO 1180 */
-	} else {
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x8c, (u8)((color_index & 0xf) << 4));
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x8d, refresh_rate_index & 0xff);
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x8e, mode_id & 0xff);
-
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x91, 0x00);
-		if (vbios_mode->enh_table->flags & NewModeInfo) {
-			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x91, 0xa8);
-			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x92,
-					  fb->format->cpp[0] * 8);
-			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x93, adjusted_mode->clock / 1000);
-			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x94, adjusted_mode->crtc_hdisplay);
-			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x95, adjusted_mode->crtc_hdisplay >> 8);
-
-			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x96, adjusted_mode->crtc_vdisplay);
-			ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x97, adjusted_mode->crtc_vdisplay >> 8);
-		}
+static void ast_set_vbios_color_reg(struct ast_private *ast,
+				    const struct drm_format_info *format,
+				    const struct ast_vbios_mode_info *vbios_mode)
+{
+	u32 color_index;
+
+	switch (format->cpp[0]) {
+	case 1:
+		color_index = VGAModeIndex - 1;
+		break;
+	case 2:
+		color_index = HiCModeIndex;
+		break;
+	case 3:
+	case 4:
+		color_index = TrueCModeIndex;
+	default:
+		return;
 	}
 
-	return true;
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x8c, (u8)((color_index & 0x0f) << 4));
 
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x91, 0x00);
 
+	if (vbios_mode->enh_table->flags & NewModeInfo) {
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x91, 0xa8);
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x92, format->cpp[0] * 8);
+	}
 }
-static void ast_set_std_reg(struct drm_crtc *crtc, struct drm_display_mode *mode,
+
+static void ast_set_vbios_mode_reg(struct ast_private *ast,
+				   const struct drm_display_mode *adjusted_mode,
+				   const struct ast_vbios_mode_info *vbios_mode)
+{
+	u32 refresh_rate_index, mode_id;
+
+	refresh_rate_index = vbios_mode->enh_table->refresh_rate_index;
+	mode_id = vbios_mode->enh_table->mode_id;
+
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x8d, refresh_rate_index & 0xff);
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x8e, mode_id & 0xff);
+
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x91, 0x00);
+
+	if (vbios_mode->enh_table->flags & NewModeInfo) {
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x91, 0xa8);
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x93, adjusted_mode->clock / 1000);
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x94, adjusted_mode->crtc_hdisplay);
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x95, adjusted_mode->crtc_hdisplay >> 8);
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x96, adjusted_mode->crtc_vdisplay);
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x97, adjusted_mode->crtc_vdisplay >> 8);
+	}
+}
+
+static void ast_set_std_reg(struct ast_private *ast,
+			    struct drm_display_mode *mode,
 			    struct ast_vbios_mode_info *vbios_mode)
 {
-	struct ast_private *ast = crtc->dev->dev_private;
 	const struct ast_vbios_stdtable *stdtable;
 	u32 i;
 	u8 jreg;
@@ -244,18 +276,21 @@ static void ast_set_std_reg(struct drm_crtc *crtc, struct drm_display_mode *mode
 	jreg = stdtable->misc;
 	ast_io_write8(ast, AST_IO_MISC_PORT_WRITE, jreg);
 
-	/* Set SEQ */
+	/* Set SEQ; except Screen Disable field */
 	ast_set_index_reg(ast, AST_IO_SEQ_PORT, 0x00, 0x03);
-	for (i = 0; i < 4; i++) {
+	ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x01, 0xdf, stdtable->seq[0]);
+	for (i = 1; i < 4; i++) {
 		jreg = stdtable->seq[i];
-		if (!i)
-			jreg |= 0x20;
 		ast_set_index_reg(ast, AST_IO_SEQ_PORT, (i + 1) , jreg);
 	}
 
-	/* Set CRTC */
+	/* Set CRTC; except base address and offset */
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x11, 0x7f, 0x00);
-	for (i = 0; i < 25; i++)
+	for (i = 0; i < 12; i++)
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, i, stdtable->crtc[i]);
+	for (i = 14; i < 19; i++)
+		ast_set_index_reg(ast, AST_IO_CRTC_PORT, i, stdtable->crtc[i]);
+	for (i = 20; i < 25; i++)
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, i, stdtable->crtc[i]);
 
 	/* set AR */
@@ -276,10 +311,10 @@ static void ast_set_std_reg(struct drm_crtc *crtc, struct drm_display_mode *mode
 		ast_set_index_reg(ast, AST_IO_GR_PORT, i, stdtable->gr[i]);
 }
 
-static void ast_set_crtc_reg(struct drm_crtc *crtc, struct drm_display_mode *mode,
+static void ast_set_crtc_reg(struct ast_private *ast,
+			     struct drm_display_mode *mode,
 			     struct ast_vbios_mode_info *vbios_mode)
 {
-	struct ast_private *ast = crtc->dev->dev_private;
 	u8 jreg05 = 0, jreg07 = 0, jreg09 = 0, jregAC = 0, jregAD = 0, jregAE = 0;
 	u16 temp, precache = 0;
 
@@ -385,11 +420,9 @@ static void ast_set_crtc_reg(struct drm_crtc *crtc, struct drm_display_mode *mod
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x11, 0x7f, 0x80);
 }
 
-static void ast_set_offset_reg(struct drm_crtc *crtc)
+static void ast_set_offset_reg(struct ast_private *ast,
+			       struct drm_framebuffer *fb)
 {
-	struct ast_private *ast = crtc->dev->dev_private;
-	const struct drm_framebuffer *fb = crtc->primary->fb;
-
 	u16 offset;
 
 	offset = fb->pitches[0] >> 3;
@@ -397,10 +430,10 @@ static void ast_set_offset_reg(struct drm_crtc *crtc)
 	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xb0, (offset >> 8) & 0x3f);
 }
 
-static void ast_set_dclk_reg(struct drm_device *dev, struct drm_display_mode *mode,
+static void ast_set_dclk_reg(struct ast_private *ast,
+			     struct drm_display_mode *mode,
 			     struct ast_vbios_mode_info *vbios_mode)
 {
-	struct ast_private *ast = dev->dev_private;
 	const struct ast_vbios_dclk_info *clk_info;
 
 	if (ast->chip == AST2500)
@@ -415,14 +448,12 @@ static void ast_set_dclk_reg(struct drm_device *dev, struct drm_display_mode *mo
 			       ((clk_info->param3 & 0x3) << 4));
 }
 
-static void ast_set_ext_reg(struct drm_crtc *crtc, struct drm_display_mode *mode,
-			     struct ast_vbios_mode_info *vbios_mode)
+static void ast_set_color_reg(struct ast_private *ast,
+			      const struct drm_format_info *format)
 {
-	struct ast_private *ast = crtc->dev->dev_private;
-	const struct drm_framebuffer *fb = crtc->primary->fb;
 	u8 jregA0 = 0, jregA3 = 0, jregA8 = 0;
 
-	switch (fb->format->cpp[0] * 8) {
+	switch (format->cpp[0] * 8) {
 	case 8:
 		jregA0 = 0x70;
 		jregA3 = 0x01;
@@ -444,7 +475,10 @@ static void ast_set_ext_reg(struct drm_crtc *crtc, struct drm_display_mode *mode
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa0, 0x8f, jregA0);
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xf0, jregA3);
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa8, 0xfd, jregA8);
+}
 
+static void ast_set_crtthd_reg(struct ast_private *ast)
+{
 	/* Set Threshold */
 	if (ast->chip == AST2300 || ast->chip == AST2400 ||
 	    ast->chip == AST2500) {
@@ -462,10 +496,10 @@ static void ast_set_ext_reg(struct drm_crtc *crtc, struct drm_display_mode *mode
 	}
 }
 
-static void ast_set_sync_reg(struct drm_device *dev, struct drm_display_mode *mode,
-		      struct ast_vbios_mode_info *vbios_mode)
+static void ast_set_sync_reg(struct ast_private *ast,
+			     struct drm_display_mode *mode,
+			     struct ast_vbios_mode_info *vbios_mode)
 {
-	struct ast_private *ast = dev->dev_private;
 	u8 jreg;
 
 	jreg  = ast_io_read8(ast, AST_IO_MISC_PORT_READ);
@@ -475,23 +509,9 @@ static void ast_set_sync_reg(struct drm_device *dev, struct drm_display_mode *mo
 	ast_io_write8(ast, AST_IO_MISC_PORT_WRITE, jreg);
 }
 
-static bool ast_set_dac_reg(struct drm_crtc *crtc, struct drm_display_mode *mode,
-		     struct ast_vbios_mode_info *vbios_mode)
+static void ast_set_start_address_crt1(struct ast_private *ast,
+				       unsigned offset)
 {
-	const struct drm_framebuffer *fb = crtc->primary->fb;
-
-	switch (fb->format->cpp[0] * 8) {
-	case 8:
-		break;
-	default:
-		return false;
-	}
-	return true;
-}
-
-static void ast_set_start_address_crt1(struct drm_crtc *crtc, unsigned offset)
-{
-	struct ast_private *ast = crtc->dev->dev_private;
 	u32 addr;
 
 	addr = offset >> 2;
@@ -501,6 +521,247 @@ static void ast_set_start_address_crt1(struct drm_crtc *crtc, unsigned offset)
 
 }
 
+/*
+ * Primary plane
+ */
+
+static const uint32_t ast_primary_plane_formats[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_C8,
+};
+
+static int ast_primary_plane_helper_atomic_check(struct drm_plane *plane,
+						 struct drm_plane_state *state)
+{
+	struct drm_crtc_state *crtc_state;
+	struct ast_crtc_state *ast_crtc_state;
+	int ret;
+
+	if (!state->crtc)
+		return 0;
+
+	crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
+
+	ret = drm_atomic_helper_check_plane_state(state, crtc_state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  false, true);
+	if (ret)
+		return ret;
+
+	if (!state->visible)
+		return 0;
+
+	ast_crtc_state = to_ast_crtc_state(crtc_state);
+
+	ast_crtc_state->format = state->fb->format;
+
+	return 0;
+}
+
+void ast_primary_plane_helper_atomic_update(struct drm_plane *plane,
+					    struct drm_plane_state *old_state)
+{
+	struct ast_private *ast = plane->dev->dev_private;
+	struct drm_plane_state *state = plane->state;
+	struct drm_gem_vram_object *gbo;
+	s64 gpu_addr;
+
+	gbo = drm_gem_vram_of_gem(state->fb->obj[0]);
+	gpu_addr = drm_gem_vram_offset(gbo);
+	if (WARN_ON_ONCE(gpu_addr < 0))
+		return; /* Bug: we didn't pin the BO to VRAM in prepare_fb. */
+
+	ast_set_offset_reg(ast, state->fb);
+	ast_set_start_address_crt1(ast, (u32)gpu_addr);
+
+	ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x1, 0xdf, 0x00);
+}
+
+static void
+ast_primary_plane_helper_atomic_disable(struct drm_plane *plane,
+					struct drm_plane_state *old_state)
+{
+	struct ast_private *ast = plane->dev->dev_private;
+
+	ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x1, 0xdf, 0x20);
+}
+
+static const struct drm_plane_helper_funcs ast_primary_plane_helper_funcs = {
+	.prepare_fb = drm_gem_vram_plane_helper_prepare_fb,
+	.cleanup_fb = drm_gem_vram_plane_helper_cleanup_fb,
+	.atomic_check = ast_primary_plane_helper_atomic_check,
+	.atomic_update = ast_primary_plane_helper_atomic_update,
+	.atomic_disable = ast_primary_plane_helper_atomic_disable,
+};
+
+static const struct drm_plane_funcs ast_primary_plane_funcs = {
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
+	.destroy = drm_plane_cleanup,
+	.reset = drm_atomic_helper_plane_reset,
+	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+};
+
+/*
+ * Cursor plane
+ */
+
+static const uint32_t ast_cursor_plane_formats[] = {
+	DRM_FORMAT_ARGB8888,
+};
+
+static int
+ast_cursor_plane_helper_prepare_fb(struct drm_plane *plane,
+				   struct drm_plane_state *new_state)
+{
+	struct drm_framebuffer *fb = new_state->fb;
+	struct drm_crtc *crtc = new_state->crtc;
+	struct drm_gem_vram_object *gbo;
+	struct ast_private *ast;
+	int ret;
+	void *src, *dst;
+
+	if (!crtc || !fb)
+		return 0;
+
+	if (WARN_ON_ONCE(fb->width > AST_MAX_HWC_WIDTH) ||
+	    WARN_ON_ONCE(fb->height > AST_MAX_HWC_HEIGHT))
+		return -EINVAL; /* BUG: didn't test in atomic_check() */
+
+	ast = crtc->dev->dev_private;
+
+	gbo = drm_gem_vram_of_gem(fb->obj[0]);
+	src = drm_gem_vram_vmap(gbo);
+	if (IS_ERR(src)) {
+		ret = PTR_ERR(src);
+		goto err_drm_gem_vram_unpin;
+	}
+
+	dst = drm_gem_vram_vmap(ast->cursor.gbo[ast->cursor.next_index]);
+	if (IS_ERR(dst)) {
+		ret = PTR_ERR(dst);
+		goto err_drm_gem_vram_vunmap_src;
+	}
+
+	ret = ast_cursor_update(dst, src, fb->width, fb->height);
+	if (ret)
+		goto err_drm_gem_vram_vunmap_dst;
+
+	/* Always unmap buffers here. Destination buffers are
+	 * perma-pinned while the driver is active. We're only
+	 * changing ref-counters here.
+	 */
+	drm_gem_vram_vunmap(ast->cursor.gbo[ast->cursor.next_index], dst);
+	drm_gem_vram_vunmap(gbo, src);
+
+	return 0;
+
+err_drm_gem_vram_vunmap_dst:
+	drm_gem_vram_vunmap(ast->cursor.gbo[ast->cursor.next_index], dst);
+err_drm_gem_vram_vunmap_src:
+	drm_gem_vram_vunmap(gbo, src);
+err_drm_gem_vram_unpin:
+	drm_gem_vram_unpin(gbo);
+	return ret;
+}
+
+static int ast_cursor_plane_helper_atomic_check(struct drm_plane *plane,
+						struct drm_plane_state *state)
+{
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_crtc_state *crtc_state;
+	int ret;
+
+	if (!state->crtc)
+		return 0;
+
+	crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
+
+	ret = drm_atomic_helper_check_plane_state(state, crtc_state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  true, true);
+	if (ret)
+		return ret;
+
+	if (!state->visible)
+		return 0;
+
+	if (fb->width > AST_MAX_HWC_WIDTH || fb->height > AST_MAX_HWC_HEIGHT)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void
+ast_cursor_plane_helper_atomic_update(struct drm_plane *plane,
+				      struct drm_plane_state *old_state)
+{
+	struct drm_plane_state *state = plane->state;
+	struct drm_crtc *crtc = state->crtc;
+	struct drm_framebuffer *fb = state->fb;
+	struct ast_private *ast = plane->dev->dev_private;
+	struct ast_crtc *ast_crtc = to_ast_crtc(crtc);
+	struct drm_gem_vram_object *gbo;
+	s64 off;
+	u8 jreg;
+
+	ast_crtc->offset_x = AST_MAX_HWC_WIDTH - fb->width;
+	ast_crtc->offset_y = AST_MAX_HWC_WIDTH - fb->height;
+
+	if (state->fb != old_state->fb) {
+		/* A new cursor image was installed. */
+		gbo = ast->cursor.gbo[ast->cursor.next_index];
+		off = drm_gem_vram_offset(gbo);
+		if (WARN_ON_ONCE(off < 0))
+			return; /* Bug: we didn't pin cursor HW BO to VRAM. */
+		ast_cursor_set_base(ast, off);
+
+		++ast->cursor.next_index;
+		ast->cursor.next_index %= ARRAY_SIZE(ast->cursor.gbo);
+	}
+
+	ast_cursor_move(crtc, state->crtc_x, state->crtc_y);
+
+	jreg = 0x2;
+	/* enable ARGB cursor */
+	jreg |= 1;
+	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xcb, 0xfc, jreg);
+}
+
+static void
+ast_cursor_plane_helper_atomic_disable(struct drm_plane *plane,
+				       struct drm_plane_state *old_state)
+{
+	struct ast_private *ast = plane->dev->dev_private;
+
+	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xcb, 0xfc, 0x00);
+}
+
+static const struct drm_plane_helper_funcs ast_cursor_plane_helper_funcs = {
+	.prepare_fb = ast_cursor_plane_helper_prepare_fb,
+	.cleanup_fb = NULL, /* not required for cursor plane */
+	.atomic_check = ast_cursor_plane_helper_atomic_check,
+	.atomic_update = ast_cursor_plane_helper_atomic_update,
+	.atomic_disable = ast_cursor_plane_helper_atomic_disable,
+};
+
+static const struct drm_plane_funcs ast_cursor_plane_funcs = {
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
+	.destroy = drm_plane_cleanup,
+	.reset = drm_atomic_helper_plane_reset,
+	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+};
+
+/*
+ * CRTC
+ */
+
 static void ast_crtc_dpms(struct drm_crtc *crtc, int mode)
 {
 	struct ast_private *ast = crtc->dev->dev_private;
@@ -508,229 +769,206 @@ static void ast_crtc_dpms(struct drm_crtc *crtc, int mode)
 	if (ast->chip == AST1180)
 		return;
 
+	/* TODO: Maybe control display signal generation with
+	 *       Sync Enable (bit CR17.7).
+	 */
 	switch (mode) {
 	case DRM_MODE_DPMS_ON:
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
-		ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x1, 0xdf, 0);
 		if (ast->tx_chip_type == AST_TX_DP501)
 			ast_set_dp501_video_output(crtc->dev, 1);
-		ast_crtc_load_lut(crtc);
+		ast_crtc_load_lut(ast, crtc);
 		break;
 	case DRM_MODE_DPMS_OFF:
 		if (ast->tx_chip_type == AST_TX_DP501)
 			ast_set_dp501_video_output(crtc->dev, 0);
-		ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x1, 0xdf, 0x20);
 		break;
 	}
 }
 
-static int ast_crtc_do_set_base(struct drm_crtc *crtc,
-				struct drm_framebuffer *fb,
-				int x, int y, int atomic)
+static int ast_crtc_helper_atomic_check(struct drm_crtc *crtc,
+					struct drm_crtc_state *state)
 {
-	struct drm_gem_vram_object *gbo;
-	int ret;
-	s64 gpu_addr;
+	struct ast_private *ast = crtc->dev->dev_private;
+	struct ast_crtc_state *ast_state;
+	const struct drm_format_info *format;
+	bool succ;
 
-	if (!atomic && fb) {
-		gbo = drm_gem_vram_of_gem(fb->obj[0]);
-		drm_gem_vram_unpin(gbo);
+	if (ast->chip == AST1180) {
+		DRM_ERROR("AST 1180 modesetting not supported\n");
+		return -EINVAL;
 	}
 
-	gbo = drm_gem_vram_of_gem(crtc->primary->fb->obj[0]);
+	ast_state = to_ast_crtc_state(state);
 
-	ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
-	if (ret)
-		return ret;
-	gpu_addr = drm_gem_vram_offset(gbo);
-	if (gpu_addr < 0) {
-		ret = (int)gpu_addr;
-		goto err_drm_gem_vram_unpin;
-	}
+	format = ast_state->format;
+	if (!format)
+		return 0;
 
-	ast_set_offset_reg(crtc);
-	ast_set_start_address_crt1(crtc, (u32)gpu_addr);
+	succ = ast_get_vbios_mode_info(format, &state->mode,
+				       &state->adjusted_mode,
+				       &ast_state->vbios_mode_info);
+	if (!succ)
+		return -EINVAL;
 
 	return 0;
-
-err_drm_gem_vram_unpin:
-	drm_gem_vram_unpin(gbo);
-	return ret;
 }
 
-static int ast_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
-			     struct drm_framebuffer *old_fb)
+static void ast_crtc_helper_atomic_begin(struct drm_crtc *crtc,
+					 struct drm_crtc_state *old_crtc_state)
 {
-	return ast_crtc_do_set_base(crtc, old_fb, x, y, 0);
+	struct ast_private *ast = crtc->dev->dev_private;
+
+	ast_open_key(ast);
 }
 
-static int ast_crtc_mode_set(struct drm_crtc *crtc,
-			     struct drm_display_mode *mode,
-			     struct drm_display_mode *adjusted_mode,
-			     int x, int y,
-			     struct drm_framebuffer *old_fb)
+static void ast_crtc_helper_atomic_flush(struct drm_crtc *crtc,
+					 struct drm_crtc_state *old_crtc_state)
 {
 	struct drm_device *dev = crtc->dev;
-	struct ast_private *ast = crtc->dev->dev_private;
-	struct ast_vbios_mode_info vbios_mode;
-	bool ret;
-	if (ast->chip == AST1180) {
-		DRM_ERROR("AST 1180 modesetting not supported\n");
-		return -EINVAL;
-	}
+	struct ast_private *ast = dev->dev_private;
+	struct ast_crtc_state *ast_state;
+	const struct drm_format_info *format;
+	struct ast_vbios_mode_info *vbios_mode_info;
+	struct drm_display_mode *adjusted_mode;
 
-	ret = ast_get_vbios_mode_info(crtc, mode, adjusted_mode, &vbios_mode);
-	if (ret == false)
-		return -EINVAL;
-	ast_open_key(ast);
+	crtc->state->no_vblank = true;
 
-	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa1, 0x06);
+	ast_state = to_ast_crtc_state(crtc->state);
 
-	ast_set_std_reg(crtc, adjusted_mode, &vbios_mode);
-	ast_set_crtc_reg(crtc, adjusted_mode, &vbios_mode);
-	ast_set_offset_reg(crtc);
-	ast_set_dclk_reg(dev, adjusted_mode, &vbios_mode);
-	ast_set_ext_reg(crtc, adjusted_mode, &vbios_mode);
-	ast_set_sync_reg(dev, adjusted_mode, &vbios_mode);
-	ast_set_dac_reg(crtc, adjusted_mode, &vbios_mode);
+	format = ast_state->format;
+	if (!format)
+		return;
 
-	ast_crtc_mode_set_base(crtc, x, y, old_fb);
+	vbios_mode_info = &ast_state->vbios_mode_info;
 
-	return 0;
-}
+	ast_set_color_reg(ast, format);
+	ast_set_vbios_color_reg(ast, format, vbios_mode_info);
 
-static void ast_crtc_disable(struct drm_crtc *crtc)
-{
-	DRM_DEBUG_KMS("\n");
-	ast_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
-	if (crtc->primary->fb) {
-		struct drm_framebuffer *fb = crtc->primary->fb;
-		struct drm_gem_vram_object *gbo =
-			drm_gem_vram_of_gem(fb->obj[0]);
+	if (!crtc->state->mode_changed)
+		return;
 
-		drm_gem_vram_unpin(gbo);
-	}
-	crtc->primary->fb = NULL;
+	adjusted_mode = &crtc->state->adjusted_mode;
+
+	ast_set_vbios_mode_reg(ast, adjusted_mode, vbios_mode_info);
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa1, 0x06);
+	ast_set_std_reg(ast, adjusted_mode, vbios_mode_info);
+	ast_set_crtc_reg(ast, adjusted_mode, vbios_mode_info);
+	ast_set_dclk_reg(ast, adjusted_mode, vbios_mode_info);
+	ast_set_crtthd_reg(ast);
+	ast_set_sync_reg(ast, adjusted_mode, vbios_mode_info);
 }
 
-static void ast_crtc_prepare(struct drm_crtc *crtc)
+static void
+ast_crtc_helper_atomic_enable(struct drm_crtc *crtc,
+			      struct drm_crtc_state *old_crtc_state)
 {
-
+	ast_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
 }
 
-static void ast_crtc_commit(struct drm_crtc *crtc)
+static void
+ast_crtc_helper_atomic_disable(struct drm_crtc *crtc,
+			       struct drm_crtc_state *old_crtc_state)
 {
-	struct ast_private *ast = crtc->dev->dev_private;
-	ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x1, 0xdf, 0);
-	ast_crtc_load_lut(crtc);
+	ast_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 }
 
-
 static const struct drm_crtc_helper_funcs ast_crtc_helper_funcs = {
-	.dpms = ast_crtc_dpms,
-	.mode_set = ast_crtc_mode_set,
-	.mode_set_base = ast_crtc_mode_set_base,
-	.disable = ast_crtc_disable,
-	.prepare = ast_crtc_prepare,
-	.commit = ast_crtc_commit,
-
+	.atomic_check = ast_crtc_helper_atomic_check,
+	.atomic_begin = ast_crtc_helper_atomic_begin,
+	.atomic_flush = ast_crtc_helper_atomic_flush,
+	.atomic_enable = ast_crtc_helper_atomic_enable,
+	.atomic_disable = ast_crtc_helper_atomic_disable,
 };
 
-static void ast_crtc_reset(struct drm_crtc *crtc)
+static void ast_crtc_destroy(struct drm_crtc *crtc)
 {
-
+	drm_crtc_cleanup(crtc);
+	kfree(crtc);
 }
 
-static int ast_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-			      u16 *blue, uint32_t size,
-			      struct drm_modeset_acquire_ctx *ctx)
+static struct drm_crtc_state *
+ast_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
 {
-	ast_crtc_load_lut(crtc);
+	struct ast_crtc_state *new_ast_state, *ast_state;
 
-	return 0;
-}
+	if (WARN_ON(!crtc->state))
+		return NULL;
 
+	new_ast_state = kmalloc(sizeof(*new_ast_state), GFP_KERNEL);
+	if (!new_ast_state)
+		return NULL;
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &new_ast_state->base);
 
-static void ast_crtc_destroy(struct drm_crtc *crtc)
+	ast_state = to_ast_crtc_state(crtc->state);
+
+	new_ast_state->format = ast_state->format;
+	memcpy(&new_ast_state->vbios_mode_info, &ast_state->vbios_mode_info,
+	       sizeof(new_ast_state->vbios_mode_info));
+
+	return &new_ast_state->base;
+}
+
+static void ast_crtc_atomic_destroy_state(struct drm_crtc *crtc,
+					  struct drm_crtc_state *state)
 {
-	drm_crtc_cleanup(crtc);
-	kfree(crtc);
+	struct ast_crtc_state *ast_state = to_ast_crtc_state(state);
+
+	__drm_atomic_helper_crtc_destroy_state(&ast_state->base);
+	kfree(ast_state);
 }
 
 static const struct drm_crtc_funcs ast_crtc_funcs = {
-	.cursor_set = ast_cursor_set,
-	.cursor_move = ast_cursor_move,
-	.reset = ast_crtc_reset,
+	.reset = drm_atomic_helper_crtc_reset,
 	.set_config = drm_crtc_helper_set_config,
-	.gamma_set = ast_crtc_gamma_set,
+	.gamma_set = drm_atomic_helper_legacy_gamma_set,
 	.destroy = ast_crtc_destroy,
+	.set_config = drm_atomic_helper_set_config,
+	.page_flip = drm_atomic_helper_page_flip,
+	.atomic_duplicate_state = ast_crtc_atomic_duplicate_state,
+	.atomic_destroy_state = ast_crtc_atomic_destroy_state,
 };
 
 static int ast_crtc_init(struct drm_device *dev)
 {
+	struct ast_private *ast = dev->dev_private;
 	struct ast_crtc *crtc;
+	int ret;
 
 	crtc = kzalloc(sizeof(struct ast_crtc), GFP_KERNEL);
 	if (!crtc)
 		return -ENOMEM;
 
-	drm_crtc_init(dev, &crtc->base, &ast_crtc_funcs);
+	ret = drm_crtc_init_with_planes(dev, &crtc->base, &ast->primary_plane,
+					&ast->cursor_plane, &ast_crtc_funcs,
+					NULL);
+	if (ret)
+		goto err_kfree;
+
 	drm_mode_crtc_set_gamma_size(&crtc->base, 256);
 	drm_crtc_helper_add(&crtc->base, &ast_crtc_helper_funcs);
 	return 0;
+
+err_kfree:
+	kfree(crtc);
+	return ret;
 }
 
+/*
+ * Encoder
+ */
+
 static void ast_encoder_destroy(struct drm_encoder *encoder)
 {
 	drm_encoder_cleanup(encoder);
 	kfree(encoder);
 }
 
-
-static struct drm_encoder *ast_best_single_encoder(struct drm_connector *connector)
-{
-	int enc_id = connector->encoder_ids[0];
-	/* pick the encoder ids */
-	if (enc_id)
-		return drm_encoder_find(connector->dev, NULL, enc_id);
-	return NULL;
-}
-
-
 static const struct drm_encoder_funcs ast_enc_funcs = {
 	.destroy = ast_encoder_destroy,
 };
 
-static void ast_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-
-}
-
-static void ast_encoder_mode_set(struct drm_encoder *encoder,
-			       struct drm_display_mode *mode,
-			       struct drm_display_mode *adjusted_mode)
-{
-}
-
-static void ast_encoder_prepare(struct drm_encoder *encoder)
-{
-
-}
-
-static void ast_encoder_commit(struct drm_encoder *encoder)
-{
-
-}
-
-
-static const struct drm_encoder_helper_funcs ast_enc_helper_funcs = {
-	.dpms = ast_encoder_dpms,
-	.prepare = ast_encoder_prepare,
-	.commit = ast_encoder_commit,
-	.mode_set = ast_encoder_mode_set,
-};
-
 static int ast_encoder_init(struct drm_device *dev)
 {
 	struct ast_encoder *ast_encoder;
@@ -741,12 +979,15 @@ static int ast_encoder_init(struct drm_device *dev)
 
 	drm_encoder_init(dev, &ast_encoder->base, &ast_enc_funcs,
 			 DRM_MODE_ENCODER_DAC, NULL);
-	drm_encoder_helper_add(&ast_encoder->base, &ast_enc_helper_funcs);
 
 	ast_encoder->base.possible_crtcs = 1;
 	return 0;
 }
 
+/*
+ * Connector
+ */
+
 static int ast_get_modes(struct drm_connector *connector)
 {
 	struct ast_connector *ast_connector = to_ast_connector(connector);
@@ -845,15 +1086,16 @@ static void ast_connector_destroy(struct drm_connector *connector)
 }
 
 static const struct drm_connector_helper_funcs ast_connector_helper_funcs = {
-	.mode_valid = ast_mode_valid,
 	.get_modes = ast_get_modes,
-	.best_encoder = ast_best_single_encoder,
+	.mode_valid = ast_mode_valid,
 };
 
 static const struct drm_connector_funcs ast_connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
+	.reset = drm_atomic_helper_connector_reset,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = ast_connector_destroy,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
 static int ast_connector_init(struct drm_device *dev)
@@ -895,58 +1137,89 @@ static int ast_connector_init(struct drm_device *dev)
 static int ast_cursor_init(struct drm_device *dev)
 {
 	struct ast_private *ast = dev->dev_private;
-	int size;
-	int ret;
-	struct drm_gem_object *obj;
+	size_t size, i;
 	struct drm_gem_vram_object *gbo;
-	s64 gpu_addr;
-	void *base;
+	int ret;
 
-	size = (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE) * AST_DEFAULT_HWC_NUM;
+	size = roundup(AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE, PAGE_SIZE);
 
-	ret = ast_gem_create(dev, size, true, &obj);
-	if (ret)
-		return ret;
-	gbo = drm_gem_vram_of_gem(obj);
-	ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
-	if (ret)
-		goto fail;
-	gpu_addr = drm_gem_vram_offset(gbo);
-	if (gpu_addr < 0) {
-		drm_gem_vram_unpin(gbo);
-		ret = (int)gpu_addr;
-		goto fail;
-	}
+	for (i = 0; i < ARRAY_SIZE(ast->cursor.gbo); ++i) {
+		gbo = drm_gem_vram_create(dev, size, 0);
+		if (IS_ERR(gbo)) {
+			ret = PTR_ERR(gbo);
+			goto err_drm_gem_vram_put;
+		}
+		ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM |
+					    DRM_GEM_VRAM_PL_FLAG_TOPDOWN);
+		if (ret) {
+			drm_gem_vram_put(gbo);
+			goto err_drm_gem_vram_put;
+		}
 
-	/* kmap the object */
-	base = drm_gem_vram_kmap(gbo, true, NULL);
-	if (IS_ERR(base)) {
-		ret = PTR_ERR(base);
-		goto fail;
+		ast->cursor.gbo[i] = gbo;
 	}
 
-	ast->cursor_cache = obj;
 	return 0;
-fail:
+
+err_drm_gem_vram_put:
+	while (i) {
+		--i;
+		gbo = ast->cursor.gbo[i];
+		drm_gem_vram_unpin(gbo);
+		drm_gem_vram_put(gbo);
+		ast->cursor.gbo[i] = NULL;
+	}
 	return ret;
 }
 
 static void ast_cursor_fini(struct drm_device *dev)
 {
 	struct ast_private *ast = dev->dev_private;
-	struct drm_gem_vram_object *gbo =
-		drm_gem_vram_of_gem(ast->cursor_cache);
-	drm_gem_vram_kunmap(gbo);
-	drm_gem_vram_unpin(gbo);
-	drm_gem_object_put_unlocked(ast->cursor_cache);
+	size_t i;
+	struct drm_gem_vram_object *gbo;
+
+	for (i = 0; i < ARRAY_SIZE(ast->cursor.gbo); ++i) {
+		gbo = ast->cursor.gbo[i];
+		drm_gem_vram_unpin(gbo);
+		drm_gem_vram_put(gbo);
+	}
 }
 
 int ast_mode_init(struct drm_device *dev)
 {
+	struct ast_private *ast = dev->dev_private;
+	int ret;
+
+	memset(&ast->primary_plane, 0, sizeof(ast->primary_plane));
+	ret = drm_universal_plane_init(dev, &ast->primary_plane, 0x01,
+				       &ast_primary_plane_funcs,
+				       ast_primary_plane_formats,
+				       ARRAY_SIZE(ast_primary_plane_formats),
+				       NULL, DRM_PLANE_TYPE_PRIMARY, NULL);
+	if (ret) {
+		DRM_ERROR("ast: drm_universal_plane_init() failed: %d\n", ret);
+		return ret;
+	}
+	drm_plane_helper_add(&ast->primary_plane,
+			     &ast_primary_plane_helper_funcs);
+
+	ret = drm_universal_plane_init(dev, &ast->cursor_plane, 0x01,
+				       &ast_cursor_plane_funcs,
+				       ast_cursor_plane_formats,
+				       ARRAY_SIZE(ast_cursor_plane_formats),
+				       NULL, DRM_PLANE_TYPE_CURSOR, NULL);
+	if (ret) {
+		DRM_ERROR("drm_universal_plane_failed(): %d\n", ret);
+		return ret;
+	}
+	drm_plane_helper_add(&ast->cursor_plane,
+			     &ast_cursor_plane_helper_funcs);
+
 	ast_cursor_init(dev);
 	ast_crtc_init(dev);
 	ast_encoder_init(dev);
 	ast_connector_init(dev);
+
 	return 0;
 }
 
@@ -1076,23 +1349,6 @@ static void ast_i2c_destroy(struct ast_i2c_chan *i2c)
 	kfree(i2c);
 }
 
-static void ast_show_cursor(struct drm_crtc *crtc)
-{
-	struct ast_private *ast = crtc->dev->dev_private;
-	u8 jreg;
-
-	jreg = 0x2;
-	/* enable ARGB cursor */
-	jreg |= 1;
-	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xcb, 0xfc, jreg);
-}
-
-static void ast_hide_cursor(struct drm_crtc *crtc)
-{
-	struct ast_private *ast = crtc->dev->dev_private;
-	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xcb, 0xfc, 0x00);
-}
-
 static u32 copy_cursor_image(u8 *src, u8 *dst, int width, int height)
 {
 	union {
@@ -1149,103 +1405,34 @@ static u32 copy_cursor_image(u8 *src, u8 *dst, int width, int height)
 	return csum;
 }
 
-static int ast_cursor_set(struct drm_crtc *crtc,
-			  struct drm_file *file_priv,
-			  uint32_t handle,
-			  uint32_t width,
-			  uint32_t height)
+static int ast_cursor_update(void *dst, void *src, unsigned int width,
+			     unsigned int height)
 {
-	struct ast_private *ast = crtc->dev->dev_private;
-	struct ast_crtc *ast_crtc = to_ast_crtc(crtc);
-	struct drm_gem_object *obj;
-	struct drm_gem_vram_object *gbo;
-	s64 dst_gpu;
-	u64 gpu_addr;
 	u32 csum;
-	int ret;
-	u8 *src, *dst;
-
-	if (!handle) {
-		ast_hide_cursor(crtc);
-		return 0;
-	}
-
-	if (width > AST_MAX_HWC_WIDTH || height > AST_MAX_HWC_HEIGHT)
-		return -EINVAL;
-
-	obj = drm_gem_object_lookup(file_priv, handle);
-	if (!obj) {
-		DRM_ERROR("Cannot find cursor object %x for crtc\n", handle);
-		return -ENOENT;
-	}
-	gbo = drm_gem_vram_of_gem(obj);
-
-	ret = drm_gem_vram_pin(gbo, 0);
-	if (ret)
-		goto err_drm_gem_object_put_unlocked;
-	src = drm_gem_vram_kmap(gbo, true, NULL);
-	if (IS_ERR(src)) {
-		ret = PTR_ERR(src);
-		goto err_drm_gem_vram_unpin;
-	}
-
-	dst = drm_gem_vram_kmap(drm_gem_vram_of_gem(ast->cursor_cache),
-				false, NULL);
-	if (IS_ERR(dst)) {
-		ret = PTR_ERR(dst);
-		goto err_drm_gem_vram_kunmap;
-	}
-	dst_gpu = drm_gem_vram_offset(drm_gem_vram_of_gem(ast->cursor_cache));
-	if (dst_gpu < 0) {
-		ret = (int)dst_gpu;
-		goto err_drm_gem_vram_kunmap;
-	}
-
-	dst += (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE)*ast->next_cursor;
 
 	/* do data transfer to cursor cache */
 	csum = copy_cursor_image(src, dst, width, height);
 
 	/* write checksum + signature */
-	{
-		struct drm_gem_vram_object *dst_gbo =
-			drm_gem_vram_of_gem(ast->cursor_cache);
-		u8 *dst = drm_gem_vram_kmap(dst_gbo, false, NULL);
-		dst += (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE)*ast->next_cursor + AST_HWC_SIZE;
-		writel(csum, dst);
-		writel(width, dst + AST_HWC_SIGNATURE_SizeX);
-		writel(height, dst + AST_HWC_SIGNATURE_SizeY);
-		writel(0, dst + AST_HWC_SIGNATURE_HOTSPOTX);
-		writel(0, dst + AST_HWC_SIGNATURE_HOTSPOTY);
-
-		/* set pattern offset */
-		gpu_addr = (u64)dst_gpu;
-		gpu_addr += (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE)*ast->next_cursor;
-		gpu_addr >>= 3;
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xc8, gpu_addr & 0xff);
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xc9, (gpu_addr >> 8) & 0xff);
-		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xca, (gpu_addr >> 16) & 0xff);
-	}
-	ast_crtc->offset_x = AST_MAX_HWC_WIDTH - width;
-	ast_crtc->offset_y = AST_MAX_HWC_WIDTH - height;
-
-	ast->next_cursor = (ast->next_cursor + 1) % AST_DEFAULT_HWC_NUM;
-
-	ast_show_cursor(crtc);
-
-	drm_gem_vram_kunmap(gbo);
-	drm_gem_vram_unpin(gbo);
-	drm_gem_object_put_unlocked(obj);
+	dst += AST_HWC_SIZE;
+	writel(csum, dst);
+	writel(width, dst + AST_HWC_SIGNATURE_SizeX);
+	writel(height, dst + AST_HWC_SIGNATURE_SizeY);
+	writel(0, dst + AST_HWC_SIGNATURE_HOTSPOTX);
+	writel(0, dst + AST_HWC_SIGNATURE_HOTSPOTY);
 
 	return 0;
+}
 
-err_drm_gem_vram_kunmap:
-	drm_gem_vram_kunmap(gbo);
-err_drm_gem_vram_unpin:
-	drm_gem_vram_unpin(gbo);
-err_drm_gem_object_put_unlocked:
-	drm_gem_object_put_unlocked(obj);
-	return ret;
+static void ast_cursor_set_base(struct ast_private *ast, u64 address)
+{
+	u8 addr0 = (address >> 3) & 0xff;
+	u8 addr1 = (address >> 11) & 0xff;
+	u8 addr2 = (address >> 19) & 0xff;
+
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xc8, addr0);
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xc9, addr1);
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xca, addr2);
 }
 
 static int ast_cursor_move(struct drm_crtc *crtc,
@@ -1253,12 +1440,17 @@ static int ast_cursor_move(struct drm_crtc *crtc,
 {
 	struct ast_crtc *ast_crtc = to_ast_crtc(crtc);
 	struct ast_private *ast = crtc->dev->dev_private;
+	struct drm_gem_vram_object *gbo;
 	int x_offset, y_offset;
-	u8 *sig;
+	u8 *dst, *sig;
+	u8 jreg;
+
+	gbo = ast->cursor.gbo[ast->cursor.next_index];
+	dst = drm_gem_vram_vmap(gbo);
+	if (IS_ERR(dst))
+		return PTR_ERR(dst);
 
-	sig = drm_gem_vram_kmap(drm_gem_vram_of_gem(ast->cursor_cache),
-				false, NULL);
-	sig += (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE)*ast->next_cursor + AST_HWC_SIZE;
+	sig = dst + AST_HWC_SIZE;
 	writel(x, sig + AST_HWC_SIGNATURE_X);
 	writel(y, sig + AST_HWC_SIGNATURE_Y);
 
@@ -1281,7 +1473,11 @@ static int ast_cursor_move(struct drm_crtc *crtc,
 	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xc7, ((y >> 8) & 0x07));
 
 	/* dummy write to fire HWC */
-	ast_show_cursor(crtc);
+	jreg = 0x02 |
+	       0x01; /* enable ARGB4444 cursor */
+	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xcb, 0xfc, jreg);
+
+	drm_gem_vram_vunmap(gbo, dst);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index c52d92294171..fad34106083a 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -30,7 +30,6 @@
 
 #include <drm/drm_print.h>
 #include <drm/drm_gem_vram_helper.h>
-#include <drm/drm_vram_mm_helper.h>
 
 #include "ast_drv.h"
 
@@ -42,7 +41,7 @@ int ast_mm_init(struct ast_private *ast)
 
 	vmm = drm_vram_helper_alloc_mm(
 		dev, pci_resource_start(dev->pdev, 0),
-		ast->vram_size, &drm_gem_vram_mm_funcs);
+		ast->vram_size);
 	if (IS_ERR(vmm)) {
 		ret = PTR_ERR(vmm);
 		DRM_ERROR("Error initializing VRAM MM; %d\n", ret);
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
index f2e73e6d46b8..10985134ce0b 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
@@ -73,7 +73,11 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
 	unsigned long prate;
 	unsigned int mask = ATMEL_HLCDC_CLKDIV_MASK | ATMEL_HLCDC_CLKPOL;
 	unsigned int cfg = 0;
-	int div;
+	int div, ret;
+
+	ret = clk_prepare_enable(crtc->dc->hlcdc->sys_clk);
+	if (ret)
+		return;
 
 	vm.vfront_porch = adj->crtc_vsync_start - adj->crtc_vdisplay;
 	vm.vback_porch = adj->crtc_vtotal - adj->crtc_vsync_end;
@@ -95,14 +99,14 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
 		     (adj->crtc_hdisplay - 1) |
 		     ((adj->crtc_vdisplay - 1) << 16));
 
+	prate = clk_get_rate(crtc->dc->hlcdc->sys_clk);
+	mode_rate = adj->crtc_clock * 1000;
 	if (!crtc->dc->desc->fixed_clksrc) {
+		prate *= 2;
 		cfg |= ATMEL_HLCDC_CLKSEL;
 		mask |= ATMEL_HLCDC_CLKSEL;
 	}
 
-	prate = 2 * clk_get_rate(crtc->dc->hlcdc->sys_clk);
-	mode_rate = adj->crtc_clock * 1000;
-
 	div = DIV_ROUND_UP(prate, mode_rate);
 	if (div < 2) {
 		div = 2;
@@ -117,8 +121,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
 		int div_low = prate / mode_rate;
 
 		if (div_low >= 2 &&
-		    ((prate / div_low - mode_rate) <
-		     10 * (mode_rate - prate / div)))
+		    (10 * (prate / div_low - mode_rate) <
+		     (mode_rate - prate / div)))
 			/*
 			 * At least 10 times better when using a higher
 			 * frequency than requested, instead of a lower.
@@ -147,6 +151,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
 			   ATMEL_HLCDC_VSPSU | ATMEL_HLCDC_VSPHO |
 			   ATMEL_HLCDC_GUARDTIME_MASK | ATMEL_HLCDC_MODE_MASK,
 			   cfg);
+
+	clk_disable_unprepare(crtc->dc->hlcdc->sys_clk);
 }
 
 static enum drm_mode_status
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index 92640298ad41..112aa5066cee 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -557,12 +557,6 @@ static irqreturn_t atmel_hlcdc_dc_irq_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static struct drm_framebuffer *atmel_hlcdc_fb_create(struct drm_device *dev,
-		struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd)
-{
-	return drm_gem_fb_create(dev, file_priv, mode_cmd);
-}
-
 struct atmel_hlcdc_dc_commit {
 	struct work_struct work;
 	struct drm_device *dev;
@@ -657,7 +651,7 @@ error:
 }
 
 static const struct drm_mode_config_funcs mode_config_funcs = {
-	.fb_create = atmel_hlcdc_fb_create,
+	.fb_create = drm_gem_fb_create,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = atmel_hlcdc_dc_atomic_commit,
 };
@@ -727,18 +721,10 @@ static int atmel_hlcdc_dc_load(struct drm_device *dev)
 	dc->hlcdc = dev_get_drvdata(dev->dev->parent);
 	dev->dev_private = dc;
 
-	if (dc->desc->fixed_clksrc) {
-		ret = clk_prepare_enable(dc->hlcdc->sys_clk);
-		if (ret) {
-			dev_err(dev->dev, "failed to enable sys_clk\n");
-			goto err_destroy_wq;
-		}
-	}
-
 	ret = clk_prepare_enable(dc->hlcdc->periph_clk);
 	if (ret) {
 		dev_err(dev->dev, "failed to enable periph_clk\n");
-		goto err_sys_clk_disable;
+		goto err_destroy_wq;
 	}
 
 	pm_runtime_enable(dev->dev);
@@ -774,9 +760,6 @@ static int atmel_hlcdc_dc_load(struct drm_device *dev)
 err_periph_clk_disable:
 	pm_runtime_disable(dev->dev);
 	clk_disable_unprepare(dc->hlcdc->periph_clk);
-err_sys_clk_disable:
-	if (dc->desc->fixed_clksrc)
-		clk_disable_unprepare(dc->hlcdc->sys_clk);
 
 err_destroy_wq:
 	destroy_workqueue(dc->wq);
@@ -801,8 +784,6 @@ static void atmel_hlcdc_dc_unload(struct drm_device *dev)
 
 	pm_runtime_disable(dev->dev);
 	clk_disable_unprepare(dc->hlcdc->periph_clk);
-	if (dc->desc->fixed_clksrc)
-		clk_disable_unprepare(dc->hlcdc->sys_clk);
 	destroy_workqueue(dc->wq);
 }
 
@@ -916,8 +897,6 @@ static int atmel_hlcdc_dc_drm_suspend(struct device *dev)
 	regmap_read(regmap, ATMEL_HLCDC_IMR, &dc->suspend.imr);
 	regmap_write(regmap, ATMEL_HLCDC_IDR, dc->suspend.imr);
 	clk_disable_unprepare(dc->hlcdc->periph_clk);
-	if (dc->desc->fixed_clksrc)
-		clk_disable_unprepare(dc->hlcdc->sys_clk);
 
 	return 0;
 }
@@ -927,8 +906,6 @@ static int atmel_hlcdc_dc_drm_resume(struct device *dev)
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 	struct atmel_hlcdc_dc *dc = drm_dev->dev_private;
 
-	if (dc->desc->fixed_clksrc)
-		clk_prepare_enable(dc->hlcdc->sys_clk);
 	clk_prepare_enable(dc->hlcdc->periph_clk);
 	regmap_write(dc->hlcdc->regmap, ATMEL_HLCDC_IER, dc->suspend.imr);
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index 375fa84c548b..121b62682d80 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -107,7 +107,8 @@ static int atmel_hlcdc_attach_endpoint(struct drm_device *dev, int endpoint)
 	output->encoder.possible_crtcs = 0x1;
 
 	if (panel) {
-		bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_Unknown);
+		bridge = drm_panel_bridge_add_typed(panel,
+						    DRM_MODE_CONNECTOR_Unknown);
 		if (IS_ERR(bridge))
 			return PTR_ERR(bridge);
 	}
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index 89f5a756fa37..40800ec5700a 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -601,11 +601,10 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 	struct drm_framebuffer *fb = state->base.fb;
 	const struct drm_display_mode *mode;
 	struct drm_crtc_state *crtc_state;
-	unsigned int tmp;
 	int ret;
 	int i;
 
-	if (!state->base.crtc || !fb)
+	if (!state->base.crtc || WARN_ON(!fb))
 		return 0;
 
 	crtc_state = drm_atomic_get_existing_crtc_state(s->state, s->crtc);
@@ -694,9 +693,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 	 * Swap width and size in case of 90 or 270 degrees rotation
 	 */
 	if (drm_rotation_90_or_270(state->base.rotation)) {
-		tmp = state->src_w;
-		state->src_w = state->src_h;
-		state->src_h = tmp;
+		swap(state->src_w, state->src_h);
 	}
 
 	if (!desc->layout.size &&
diff --git a/drivers/gpu/drm/bochs/Kconfig b/drivers/gpu/drm/bochs/Kconfig
index 32b043abb668..7bcdf294fed8 100644
--- a/drivers/gpu/drm/bochs/Kconfig
+++ b/drivers/gpu/drm/bochs/Kconfig
@@ -4,6 +4,8 @@ config DRM_BOCHS
 	depends on DRM && PCI && MMU
 	select DRM_KMS_HELPER
 	select DRM_VRAM_HELPER
+	select DRM_TTM
+	select DRM_TTM_HELPER
 	help
 	  Choose this option for qemu.
 	  If M is selected the module will be called bochs-drm.
diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h
index 68483a2fc12c..917767173ee6 100644
--- a/drivers/gpu/drm/bochs/bochs.h
+++ b/drivers/gpu/drm/bochs/bochs.h
@@ -10,7 +10,6 @@
 #include <drm/drm_gem.h>
 #include <drm/drm_gem_vram_helper.h>
 #include <drm/drm_simple_kms_helper.h>
-#include <drm/drm_vram_mm_helper.h>
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/drivers/gpu/drm/bochs/bochs_drv.c b/drivers/gpu/drm/bochs/bochs_drv.c
index 770e1625d05e..10460878414e 100644
--- a/drivers/gpu/drm/bochs/bochs_drv.c
+++ b/drivers/gpu/drm/bochs/bochs_drv.c
@@ -58,10 +58,7 @@ err:
 	return ret;
 }
 
-static const struct file_operations bochs_fops = {
-	.owner		= THIS_MODULE,
-	DRM_VRAM_MM_FILE_OPERATIONS
-};
+DEFINE_DRM_GEM_FOPS(bochs_fops);
 
 static struct drm_driver bochs_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
@@ -114,7 +111,7 @@ static int bochs_pci_probe(struct pci_dev *pdev,
 		return -ENOMEM;
 	}
 
-	ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "bochsdrmfb");
+	ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "bochsdrmfb");
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/bochs/bochs_hw.c b/drivers/gpu/drm/bochs/bochs_hw.c
index e567bdfa2ab8..b615b7dfdd9d 100644
--- a/drivers/gpu/drm/bochs/bochs_hw.c
+++ b/drivers/gpu/drm/bochs/bochs_hw.c
@@ -255,7 +255,7 @@ void bochs_hw_setformat(struct bochs_device *bochs,
 		DRM_ERROR("%s: Huh? Got framebuffer format 0x%x",
 			  __func__, format->format);
 		break;
-	};
+	}
 }
 
 void bochs_hw_setbase(struct bochs_device *bochs,
diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c
index 02a9c1ed165b..3f0006c2470d 100644
--- a/drivers/gpu/drm/bochs/bochs_kms.c
+++ b/drivers/gpu/drm/bochs/bochs_kms.c
@@ -69,33 +69,11 @@ static void bochs_pipe_update(struct drm_simple_display_pipe *pipe,
 	}
 }
 
-static int bochs_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
-				 struct drm_plane_state *new_state)
-{
-	struct drm_gem_vram_object *gbo;
-
-	if (!new_state->fb)
-		return 0;
-	gbo = drm_gem_vram_of_gem(new_state->fb->obj[0]);
-	return drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
-}
-
-static void bochs_pipe_cleanup_fb(struct drm_simple_display_pipe *pipe,
-				  struct drm_plane_state *old_state)
-{
-	struct drm_gem_vram_object *gbo;
-
-	if (!old_state->fb)
-		return;
-	gbo = drm_gem_vram_of_gem(old_state->fb->obj[0]);
-	drm_gem_vram_unpin(gbo);
-}
-
 static const struct drm_simple_display_pipe_funcs bochs_pipe_funcs = {
 	.enable	    = bochs_pipe_enable,
 	.update	    = bochs_pipe_update,
-	.prepare_fb = bochs_pipe_prepare_fb,
-	.cleanup_fb = bochs_pipe_cleanup_fb,
+	.prepare_fb = drm_gem_vram_simple_display_pipe_prepare_fb,
+	.cleanup_fb = drm_gem_vram_simple_display_pipe_cleanup_fb,
 };
 
 static int bochs_connector_get_modes(struct drm_connector *connector)
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index 8f9bb886f7ad..1b74f530b07c 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -11,8 +11,7 @@ int bochs_mm_init(struct bochs_device *bochs)
 	struct drm_vram_mm *vmm;
 
 	vmm = drm_vram_helper_alloc_mm(bochs->dev, bochs->fb_base,
-				       bochs->fb_size,
-				       &drm_gem_vram_mm_funcs);
+				       bochs->fb_size);
 	return PTR_ERR_OR_ZERO(vmm);
 }
 
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 1cc9f502c1f2..0b9ca5862455 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -16,16 +16,6 @@ config DRM_PANEL_BRIDGE
 menu "Display Interface Bridges"
 	depends on DRM && DRM_BRIDGE
 
-config DRM_ANALOGIX_ANX78XX
-	tristate "Analogix ANX78XX bridge"
-	select DRM_KMS_HELPER
-	select REGMAP_I2C
-	---help---
-	  ANX78XX is an ultra-low power Full-HD SlimPort transmitter
-	  designed for portable devices. The ANX78XX transforms
-	  the HDMI output of an application processor to MyDP
-	  or DisplayPort.
-
 config DRM_CDNS_DSI
 	tristate "Cadence DPI/DSI bridge"
 	select DRM_KMS_HELPER
@@ -45,14 +35,14 @@ config DRM_DUMB_VGA_DAC
 	  Support for non-programmable RGB to VGA DAC bridges, such as ADI
 	  ADV7123, TI THS8134 and THS8135 or passive resistor ladder DACs.
 
-config DRM_LVDS_ENCODER
-	tristate "Transparent parallel to LVDS encoder support"
+config DRM_LVDS_CODEC
+	tristate "Transparent LVDS encoders and decoders support"
 	depends on OF
 	select DRM_KMS_HELPER
 	select DRM_PANEL_BRIDGE
 	help
-	  Support for transparent parallel to LVDS encoders that don't require
-	  any configuration.
+	  Support for transparent LVDS encoders and decoders that don't
+	  require any configuration.
 
 config DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW
 	tristate "MegaChips stdp4028-ge-b850v3-fw and stdp2690-ge-b850v3-fw"
@@ -60,10 +50,10 @@ config DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW
 	select DRM_KMS_HELPER
 	select DRM_PANEL
 	---help---
-          This is a driver for the display bridges of
-          GE B850v3 that convert dual channel LVDS
-          to DP++. This is used with the i.MX6 imx-ldb
-          driver. You are likely to say N here.
+	  This is a driver for the display bridges of
+	  GE B850v3 that convert dual channel LVDS
+	  to DP++. This is used with the i.MX6 imx-ldb
+	  driver. You are likely to say N here.
 
 config DRM_NXP_PTN3460
 	tristate "NXP PTN3460 DP/LVDS bridge"
@@ -87,8 +77,7 @@ config DRM_SIL_SII8620
 	depends on OF
 	select DRM_KMS_HELPER
 	imply EXTCON
-	select INPUT
-	select RC_CORE
+	depends on RC_CORE || !RC_CORE
 	help
 	  Silicon Image SII8620 HDMI/MHL bridge chip driver.
 
diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile
index 4934fcf5a6f8..cd16ce830270 100644
--- a/drivers/gpu/drm/bridge/Makefile
+++ b/drivers/gpu/drm/bridge/Makefile
@@ -1,8 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_DRM_ANALOGIX_ANX78XX) += analogix-anx78xx.o
 obj-$(CONFIG_DRM_CDNS_DSI) += cdns-dsi.o
 obj-$(CONFIG_DRM_DUMB_VGA_DAC) += dumb-vga-dac.o
-obj-$(CONFIG_DRM_LVDS_ENCODER) += lvds-encoder.o
+obj-$(CONFIG_DRM_LVDS_CODEC) += lvds-codec.o
 obj-$(CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW) += megachips-stdpxxxx-ge-b850v3-fw.o
 obj-$(CONFIG_DRM_NXP_PTN3460) += nxp-ptn3460.o
 obj-$(CONFIG_DRM_PARADE_PS8622) += parade-ps8622.o
@@ -12,8 +11,9 @@ obj-$(CONFIG_DRM_SII9234) += sii9234.o
 obj-$(CONFIG_DRM_THINE_THC63LVD1024) += thc63lvd1024.o
 obj-$(CONFIG_DRM_TOSHIBA_TC358764) += tc358764.o
 obj-$(CONFIG_DRM_TOSHIBA_TC358767) += tc358767.o
-obj-$(CONFIG_DRM_ANALOGIX_DP) += analogix/
 obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511/
 obj-$(CONFIG_DRM_TI_SN65DSI86) += ti-sn65dsi86.o
 obj-$(CONFIG_DRM_TI_TFP410) += ti-tfp410.o
+
+obj-y += analogix/
 obj-y += synopsys/
diff --git a/drivers/gpu/drm/bridge/analogix-anx78xx.h b/drivers/gpu/drm/bridge/analogix-anx78xx.h
deleted file mode 100644
index 25e063bcecbc..000000000000
--- a/drivers/gpu/drm/bridge/analogix-anx78xx.h
+++ /dev/null
@@ -1,710 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright(c) 2016, Analogix Semiconductor. All rights reserved.
- */
-
-#ifndef __ANX78xx_H
-#define __ANX78xx_H
-
-#define TX_P0				0x70
-#define TX_P1				0x7a
-#define TX_P2				0x72
-
-#define RX_P0				0x7e
-#define RX_P1				0x80
-
-/***************************************************************/
-/* Register definition of device address 0x7e                  */
-/***************************************************************/
-
-/*
- * System Control and Status
- */
-
-/* Software Reset Register 1 */
-#define SP_SOFTWARE_RESET1_REG		0x11
-#define SP_VIDEO_RST			BIT(4)
-#define SP_HDCP_MAN_RST			BIT(2)
-#define SP_TMDS_RST			BIT(1)
-#define SP_SW_MAN_RST			BIT(0)
-
-/* System Status Register */
-#define SP_SYSTEM_STATUS_REG		0x14
-#define SP_TMDS_CLOCK_DET		BIT(1)
-#define SP_TMDS_DE_DET			BIT(0)
-
-/* HDMI Status Register */
-#define SP_HDMI_STATUS_REG		0x15
-#define SP_HDMI_AUD_LAYOUT		BIT(3)
-#define SP_HDMI_DET			BIT(0)
-#  define SP_DVI_MODE			0
-#  define SP_HDMI_MODE			1
-
-/* HDMI Mute Control Register */
-#define SP_HDMI_MUTE_CTRL_REG		0x16
-#define SP_AUD_MUTE			BIT(1)
-#define SP_VID_MUTE			BIT(0)
-
-/* System Power Down Register 1 */
-#define SP_SYSTEM_POWER_DOWN1_REG	0x18
-#define SP_PWDN_CTRL			BIT(0)
-
-/*
- * Audio and Video Auto Control
- */
-
-/* Auto Audio and Video Control register */
-#define SP_AUDVID_CTRL_REG		0x20
-#define SP_AVC_OE			BIT(7)
-#define SP_AAC_OE			BIT(6)
-#define SP_AVC_EN			BIT(1)
-#define SP_AAC_EN			BIT(0)
-
-/* Audio Exception Enable Registers */
-#define SP_AUD_EXCEPTION_ENABLE_BASE	(0x24 - 1)
-/* Bits for Audio Exception Enable Register 3 */
-#define SP_AEC_EN21			BIT(5)
-
-/*
- * Interrupt
- */
-
-/* Interrupt Status Register 1 */
-#define SP_INT_STATUS1_REG		0x31
-/* Bits for Interrupt Status Register 1 */
-#define SP_HDMI_DVI			BIT(7)
-#define SP_CKDT_CHG			BIT(6)
-#define SP_SCDT_CHG			BIT(5)
-#define SP_PCLK_CHG			BIT(4)
-#define SP_PLL_UNLOCK			BIT(3)
-#define SP_CABLE_PLUG_CHG		BIT(2)
-#define SP_SET_MUTE			BIT(1)
-#define SP_SW_INTR			BIT(0)
-/* Bits for Interrupt Status Register 2 */
-#define SP_HDCP_ERR			BIT(5)
-#define SP_AUDIO_SAMPLE_CHG		BIT(0)	/* undocumented */
-/* Bits for Interrupt Status Register 3 */
-#define SP_AUD_MODE_CHG			BIT(0)
-/* Bits for Interrupt Status Register 5 */
-#define SP_AUDIO_RCV			BIT(0)
-/* Bits for Interrupt Status Register 6 */
-#define SP_INT_STATUS6_REG		0x36
-#define SP_CTS_RCV			BIT(7)
-#define SP_NEW_AUD_PKT			BIT(4)
-#define SP_NEW_AVI_PKT			BIT(1)
-#define SP_NEW_CP_PKT			BIT(0)
-/* Bits for Interrupt Status Register 7 */
-#define SP_NO_VSI			BIT(7)
-#define SP_NEW_VS			BIT(4)
-
-/* Interrupt Mask 1 Status Registers */
-#define SP_INT_MASK1_REG		0x41
-
-/* HDMI US TIMER Control Register */
-#define SP_HDMI_US_TIMER_CTRL_REG	0x49
-#define SP_MS_TIMER_MARGIN_10_8_MASK	0x07
-
-/*
- * TMDS Control
- */
-
-/* TMDS Control Registers */
-#define SP_TMDS_CTRL_BASE		(0x50 - 1)
-/* Bits for TMDS Control Register 7 */
-#define SP_PD_RT			BIT(0)
-
-/*
- * Video Control
- */
-
-/* Video Status Register */
-#define SP_VIDEO_STATUS_REG		0x70
-#define SP_COLOR_DEPTH_MASK		0xf0
-#define SP_COLOR_DEPTH_SHIFT		4
-#  define SP_COLOR_DEPTH_MODE_LEGACY	0x00
-#  define SP_COLOR_DEPTH_MODE_24BIT	0x04
-#  define SP_COLOR_DEPTH_MODE_30BIT	0x05
-#  define SP_COLOR_DEPTH_MODE_36BIT	0x06
-#  define SP_COLOR_DEPTH_MODE_48BIT	0x07
-
-/* Video Data Range Control Register */
-#define SP_VID_DATA_RANGE_CTRL_REG	0x83
-#define SP_R2Y_INPUT_LIMIT		BIT(1)
-
-/* Pixel Clock High Resolution Counter Registers */
-#define SP_PCLK_HIGHRES_CNT_BASE	(0x8c - 1)
-
-/*
- * Audio Control
- */
-
-/* Number of Audio Channels Status Registers */
-#define SP_AUD_CH_STATUS_REG_NUM	6
-
-/* Audio IN S/PDIF Channel Status Registers */
-#define SP_AUD_SPDIF_CH_STATUS_BASE	0xc7
-
-/* Audio IN S/PDIF Channel Status Register 4 */
-#define SP_FS_FREQ_MASK			0x0f
-#  define SP_FS_FREQ_44100HZ		0x00
-#  define SP_FS_FREQ_48000HZ		0x02
-#  define SP_FS_FREQ_32000HZ		0x03
-#  define SP_FS_FREQ_88200HZ		0x08
-#  define SP_FS_FREQ_96000HZ		0x0a
-#  define SP_FS_FREQ_176400HZ		0x0c
-#  define SP_FS_FREQ_192000HZ		0x0e
-
-/*
- * Micellaneous Control Block
- */
-
-/* CHIP Control Register */
-#define SP_CHIP_CTRL_REG		0xe3
-#define SP_MAN_HDMI5V_DET		BIT(3)
-#define SP_PLLLOCK_CKDT_EN		BIT(2)
-#define SP_ANALOG_CKDT_EN		BIT(1)
-#define SP_DIGITAL_CKDT_EN		BIT(0)
-
-/* Packet Receiving Status Register */
-#define SP_PACKET_RECEIVING_STATUS_REG	0xf3
-#define SP_AVI_RCVD			BIT(5)
-#define SP_VSI_RCVD			BIT(1)
-
-/***************************************************************/
-/* Register definition of device address 0x80                  */
-/***************************************************************/
-
-/* HDCP BCAPS Shadow Register */
-#define SP_HDCP_BCAPS_SHADOW_REG	0x2a
-#define SP_BCAPS_REPEATER		BIT(5)
-
-/* HDCP Status Register */
-#define SP_RX_HDCP_STATUS_REG		0x3f
-#define SP_AUTH_EN			BIT(4)
-
-/*
- * InfoFrame and Control Packet Registers
- */
-
-/* AVI InfoFrame packet checksum */
-#define SP_AVI_INFOFRAME_CHECKSUM	0xa3
-
-/* AVI InfoFrame Registers */
-#define SP_AVI_INFOFRAME_DATA_BASE	0xa4
-
-#define SP_AVI_COLOR_F_MASK		0x60
-#define SP_AVI_COLOR_F_SHIFT		5
-
-/* Audio InfoFrame Registers */
-#define SP_AUD_INFOFRAME_DATA_BASE	0xc4
-#define SP_AUD_INFOFRAME_LAYOUT_MASK	0x0f
-
-/* MPEG/HDMI Vendor Specific InfoFrame Packet type code */
-#define SP_MPEG_VS_INFOFRAME_TYPE_REG	0xe0
-
-/* MPEG/HDMI Vendor Specific InfoFrame Packet length */
-#define SP_MPEG_VS_INFOFRAME_LEN_REG	0xe2
-
-/* MPEG/HDMI Vendor Specific InfoFrame Packet version number */
-#define SP_MPEG_VS_INFOFRAME_VER_REG	0xe1
-
-/* MPEG/HDMI Vendor Specific InfoFrame Packet content */
-#define SP_MPEG_VS_INFOFRAME_DATA_BASE	0xe4
-
-/* General Control Packet Register */
-#define SP_GENERAL_CTRL_PACKET_REG	0x9f
-#define SP_CLEAR_AVMUTE			BIT(4)
-#define SP_SET_AVMUTE			BIT(0)
-
-/***************************************************************/
-/* Register definition of device address 0x70                  */
-/***************************************************************/
-
-/* HDCP Status Register */
-#define SP_TX_HDCP_STATUS_REG		0x00
-#define SP_AUTH_FAIL			BIT(5)
-#define SP_AUTHEN_PASS			BIT(1)
-
-/* HDCP Control Register 0 */
-#define SP_HDCP_CTRL0_REG		0x01
-#define SP_RX_REPEATER			BIT(6)
-#define SP_RE_AUTH			BIT(5)
-#define SP_SW_AUTH_OK			BIT(4)
-#define SP_HARD_AUTH_EN			BIT(3)
-#define SP_HDCP_ENC_EN			BIT(2)
-#define SP_BKSV_SRM_PASS		BIT(1)
-#define SP_KSVLIST_VLD			BIT(0)
-/* HDCP Function Enabled */
-#define SP_HDCP_FUNCTION_ENABLED	(BIT(0) | BIT(1) | BIT(2) | BIT(3))
-
-/* HDCP Receiver BSTATUS Register 0 */
-#define	SP_HDCP_RX_BSTATUS0_REG		0x1b
-/* HDCP Receiver BSTATUS Register 1 */
-#define	SP_HDCP_RX_BSTATUS1_REG		0x1c
-
-/* HDCP Embedded "Blue Screen" Content Registers */
-#define SP_HDCP_VID0_BLUE_SCREEN_REG	0x2c
-#define SP_HDCP_VID1_BLUE_SCREEN_REG	0x2d
-#define SP_HDCP_VID2_BLUE_SCREEN_REG	0x2e
-
-/* HDCP Wait R0 Timing Register */
-#define SP_HDCP_WAIT_R0_TIME_REG	0x40
-
-/* HDCP Link Integrity Check Timer Register */
-#define SP_HDCP_LINK_CHECK_TIMER_REG	0x41
-
-/* HDCP Repeater Ready Wait Timer Register */
-#define SP_HDCP_RPTR_RDY_WAIT_TIME_REG	0x42
-
-/* HDCP Auto Timer Register */
-#define SP_HDCP_AUTO_TIMER_REG		0x51
-
-/* HDCP Key Status Register */
-#define SP_HDCP_KEY_STATUS_REG		0x5e
-
-/* HDCP Key Command Register */
-#define SP_HDCP_KEY_COMMAND_REG		0x5f
-#define SP_DISABLE_SYNC_HDCP		BIT(2)
-
-/* OTP Memory Key Protection Registers */
-#define SP_OTP_KEY_PROTECT1_REG		0x60
-#define SP_OTP_KEY_PROTECT2_REG		0x61
-#define SP_OTP_KEY_PROTECT3_REG		0x62
-#define SP_OTP_PSW1			0xa2
-#define SP_OTP_PSW2			0x7e
-#define SP_OTP_PSW3			0xc6
-
-/* DP System Control Registers */
-#define SP_DP_SYSTEM_CTRL_BASE		(0x80 - 1)
-/* Bits for DP System Control Register 2 */
-#define SP_CHA_STA			BIT(2)
-/* Bits for DP System Control Register 3 */
-#define SP_HPD_STATUS			BIT(6)
-#define SP_STRM_VALID			BIT(2)
-/* Bits for DP System Control Register 4 */
-#define SP_ENHANCED_MODE		BIT(3)
-
-/* DP Video Control Register */
-#define SP_DP_VIDEO_CTRL_REG		0x84
-#define SP_COLOR_F_MASK			0x06
-#define SP_COLOR_F_SHIFT		1
-#define SP_BPC_MASK			0xe0
-#define SP_BPC_SHIFT			5
-#  define SP_BPC_6BITS			0x00
-#  define SP_BPC_8BITS			0x01
-#  define SP_BPC_10BITS			0x02
-#  define SP_BPC_12BITS			0x03
-
-/* DP Audio Control Register */
-#define SP_DP_AUDIO_CTRL_REG		0x87
-#define SP_AUD_EN			BIT(0)
-
-/* 10us Pulse Generate Timer Registers */
-#define SP_I2C_GEN_10US_TIMER0_REG	0x88
-#define SP_I2C_GEN_10US_TIMER1_REG	0x89
-
-/* Packet Send Control Register */
-#define SP_PACKET_SEND_CTRL_REG		0x90
-#define SP_AUD_IF_UP			BIT(7)
-#define SP_AVI_IF_UD			BIT(6)
-#define SP_MPEG_IF_UD			BIT(5)
-#define SP_SPD_IF_UD			BIT(4)
-#define SP_AUD_IF_EN			BIT(3)
-#define SP_AVI_IF_EN			BIT(2)
-#define SP_MPEG_IF_EN			BIT(1)
-#define SP_SPD_IF_EN			BIT(0)
-
-/* DP HDCP Control Register */
-#define SP_DP_HDCP_CTRL_REG		0x92
-#define SP_AUTO_EN			BIT(7)
-#define SP_AUTO_START			BIT(5)
-#define SP_LINK_POLLING			BIT(1)
-
-/* DP Main Link Bandwidth Setting Register */
-#define SP_DP_MAIN_LINK_BW_SET_REG	0xa0
-#define SP_LINK_BW_SET_MASK		0x1f
-#define SP_INITIAL_SLIM_M_AUD_SEL	BIT(5)
-
-/* DP Training Pattern Set Register */
-#define SP_DP_TRAINING_PATTERN_SET_REG	0xa2
-
-/* DP Lane 0 Link Training Control Register */
-#define SP_DP_LANE0_LT_CTRL_REG		0xa3
-#define SP_TX_SW_SET_MASK		0x1b
-#define SP_MAX_PRE_REACH		BIT(5)
-#define SP_MAX_DRIVE_REACH		BIT(4)
-#define SP_PRE_EMP_LEVEL1		BIT(3)
-#define SP_DRVIE_CURRENT_LEVEL1		BIT(0)
-
-/* DP Link Training Control Register */
-#define SP_DP_LT_CTRL_REG		0xa8
-#define SP_LT_ERROR_TYPE_MASK		0x70
-#  define SP_LT_NO_ERROR		0x00
-#  define SP_LT_AUX_WRITE_ERROR		0x01
-#  define SP_LT_MAX_DRIVE_REACHED	0x02
-#  define SP_LT_WRONG_LANE_COUNT_SET	0x03
-#  define SP_LT_LOOP_SAME_5_TIME	0x04
-#  define SP_LT_CR_FAIL_IN_EQ		0x05
-#  define SP_LT_EQ_LOOP_5_TIME		0x06
-#define SP_LT_EN			BIT(0)
-
-/* DP CEP Training Control Registers */
-#define SP_DP_CEP_TRAINING_CTRL0_REG	0xa9
-#define SP_DP_CEP_TRAINING_CTRL1_REG	0xaa
-
-/* DP Debug Register 1 */
-#define SP_DP_DEBUG1_REG		0xb0
-#define SP_DEBUG_PLL_LOCK		BIT(4)
-#define SP_POLLING_EN			BIT(1)
-
-/* DP Polling Control Register */
-#define SP_DP_POLLING_CTRL_REG		0xb4
-#define SP_AUTO_POLLING_DISABLE		BIT(0)
-
-/* DP Link Debug Control Register */
-#define SP_DP_LINK_DEBUG_CTRL_REG	0xb8
-#define SP_M_VID_DEBUG			BIT(5)
-#define SP_NEW_PRBS7			BIT(4)
-#define SP_INSERT_ER			BIT(1)
-#define SP_PRBS31_EN			BIT(0)
-
-/* AUX Misc control Register */
-#define SP_AUX_MISC_CTRL_REG		0xbf
-
-/* DP PLL control Register */
-#define SP_DP_PLL_CTRL_REG		0xc7
-#define SP_PLL_RST			BIT(6)
-
-/* DP Analog Power Down Register */
-#define SP_DP_ANALOG_POWER_DOWN_REG	0xc8
-#define SP_CH0_PD			BIT(0)
-
-/* DP Misc Control Register */
-#define SP_DP_MISC_CTRL_REG		0xcd
-#define SP_EQ_TRAINING_LOOP		BIT(6)
-
-/* DP Extra I2C Device Address Register */
-#define SP_DP_EXTRA_I2C_DEV_ADDR_REG	0xce
-#define SP_I2C_STRETCH_DISABLE		BIT(7)
-
-#define SP_I2C_EXTRA_ADDR		0x50
-
-/* DP Downspread Control Register 1 */
-#define SP_DP_DOWNSPREAD_CTRL1_REG	0xd0
-
-/* DP M Value Calculation Control Register */
-#define SP_DP_M_CALCULATION_CTRL_REG	0xd9
-#define SP_M_GEN_CLK_SEL		BIT(0)
-
-/* AUX Channel Access Status Register */
-#define SP_AUX_CH_STATUS_REG		0xe0
-#define SP_AUX_STATUS			0x0f
-
-/* AUX Channel DEFER Control Register */
-#define SP_AUX_DEFER_CTRL_REG		0xe2
-#define SP_DEFER_CTRL_EN		BIT(7)
-
-/* DP Buffer Data Count Register */
-#define SP_BUF_DATA_COUNT_REG		0xe4
-#define SP_BUF_DATA_COUNT_MASK		0x1f
-#define SP_BUF_CLR			BIT(7)
-
-/* DP AUX Channel Control Register 1 */
-#define SP_DP_AUX_CH_CTRL1_REG		0xe5
-#define SP_AUX_TX_COMM_MASK		0x0f
-#define SP_AUX_LENGTH_MASK		0xf0
-#define SP_AUX_LENGTH_SHIFT		4
-
-/* DP AUX CH Address Register 0 */
-#define SP_AUX_ADDR_7_0_REG		0xe6
-
-/* DP AUX CH Address Register 1 */
-#define SP_AUX_ADDR_15_8_REG		0xe7
-
-/* DP AUX CH Address Register 2 */
-#define SP_AUX_ADDR_19_16_REG		0xe8
-#define SP_AUX_ADDR_19_16_MASK		0x0f
-
-/* DP AUX Channel Control Register 2 */
-#define SP_DP_AUX_CH_CTRL2_REG		0xe9
-#define SP_AUX_SEL_RXCM			BIT(6)
-#define SP_AUX_CHSEL			BIT(3)
-#define SP_AUX_PN_INV			BIT(2)
-#define SP_ADDR_ONLY			BIT(1)
-#define SP_AUX_EN			BIT(0)
-
-/* DP Video Stream Control InfoFrame Register */
-#define SP_DP_3D_VSC_CTRL_REG		0xea
-#define SP_INFO_FRAME_VSC_EN		BIT(0)
-
-/* DP Video Stream Data Byte 1 Register */
-#define SP_DP_VSC_DB1_REG		0xeb
-
-/* DP AUX Channel Control Register 3 */
-#define SP_DP_AUX_CH_CTRL3_REG		0xec
-#define SP_WAIT_COUNTER_7_0_MASK	0xff
-
-/* DP AUX Channel Control Register 4 */
-#define SP_DP_AUX_CH_CTRL4_REG		0xed
-
-/* DP AUX Buffer Data Registers */
-#define SP_DP_BUF_DATA0_REG		0xf0
-
-/***************************************************************/
-/* Register definition of device address 0x72                  */
-/***************************************************************/
-
-/*
- * Core Register Definitions
- */
-
-/* Device ID Low Byte Register */
-#define SP_DEVICE_IDL_REG		0x02
-
-/* Device ID High Byte Register */
-#define SP_DEVICE_IDH_REG		0x03
-
-/* Device version register */
-#define SP_DEVICE_VERSION_REG		0x04
-
-/* Power Down Control Register */
-#define SP_POWERDOWN_CTRL_REG		0x05
-#define SP_REGISTER_PD			BIT(7)
-#define SP_HDCP_PD			BIT(5)
-#define SP_AUDIO_PD			BIT(4)
-#define SP_VIDEO_PD			BIT(3)
-#define SP_LINK_PD			BIT(2)
-#define SP_TOTAL_PD			BIT(1)
-
-/* Reset Control Register 1 */
-#define SP_RESET_CTRL1_REG		0x06
-#define SP_MISC_RST			BIT(7)
-#define SP_VIDCAP_RST			BIT(6)
-#define SP_VIDFIF_RST			BIT(5)
-#define SP_AUDFIF_RST			BIT(4)
-#define SP_AUDCAP_RST			BIT(3)
-#define SP_HDCP_RST			BIT(2)
-#define SP_SW_RST			BIT(1)
-#define SP_HW_RST			BIT(0)
-
-/* Reset Control Register 2 */
-#define SP_RESET_CTRL2_REG		0x07
-#define SP_AUX_RST			BIT(2)
-#define SP_SERDES_FIFO_RST		BIT(1)
-#define SP_I2C_REG_RST			BIT(0)
-
-/* Video Control Register 1 */
-#define SP_VID_CTRL1_REG		0x08
-#define SP_VIDEO_EN			BIT(7)
-#define SP_VIDEO_MUTE			BIT(2)
-#define SP_DE_GEN			BIT(1)
-#define SP_DEMUX			BIT(0)
-
-/* Video Control Register 2 */
-#define SP_VID_CTRL2_REG		0x09
-#define SP_IN_COLOR_F_MASK		0x03
-#define SP_IN_YC_BIT_SEL		BIT(2)
-#define SP_IN_BPC_MASK			0x70
-#define SP_IN_BPC_SHIFT			4
-#  define SP_IN_BPC_12BIT		0x03
-#  define SP_IN_BPC_10BIT		0x02
-#  define SP_IN_BPC_8BIT		0x01
-#  define SP_IN_BPC_6BIT		0x00
-#define SP_IN_D_RANGE			BIT(7)
-
-/* Video Control Register 3 */
-#define SP_VID_CTRL3_REG		0x0a
-#define SP_HPD_OUT			BIT(6)
-
-/* Video Control Register 5 */
-#define SP_VID_CTRL5_REG		0x0c
-#define SP_CSC_STD_SEL			BIT(7)
-#define SP_XVYCC_RNG_LMT		BIT(6)
-#define SP_RANGE_Y2R			BIT(5)
-#define SP_CSPACE_Y2R			BIT(4)
-#define SP_RGB_RNG_LMT			BIT(3)
-#define SP_Y_RNG_LMT			BIT(2)
-#define SP_RANGE_R2Y			BIT(1)
-#define SP_CSPACE_R2Y			BIT(0)
-
-/* Video Control Register 6 */
-#define SP_VID_CTRL6_REG		0x0d
-#define SP_TEST_PATTERN_EN		BIT(7)
-#define SP_VIDEO_PROCESS_EN		BIT(6)
-#define SP_VID_US_MODE			BIT(3)
-#define SP_VID_DS_MODE			BIT(2)
-#define SP_UP_SAMPLE			BIT(1)
-#define SP_DOWN_SAMPLE			BIT(0)
-
-/* Video Control Register 8 */
-#define SP_VID_CTRL8_REG		0x0f
-#define SP_VID_VRES_TH			BIT(0)
-
-/* Total Line Status Low Byte Register */
-#define SP_TOTAL_LINE_STAL_REG		0x24
-
-/* Total Line Status High Byte Register */
-#define SP_TOTAL_LINE_STAH_REG		0x25
-
-/* Active Line Status Low Byte Register */
-#define SP_ACT_LINE_STAL_REG		0x26
-
-/* Active Line Status High Byte Register */
-#define SP_ACT_LINE_STAH_REG		0x27
-
-/* Vertical Front Porch Status Register */
-#define SP_V_F_PORCH_STA_REG		0x28
-
-/* Vertical SYNC Width Status Register */
-#define SP_V_SYNC_STA_REG		0x29
-
-/* Vertical Back Porch Status Register */
-#define SP_V_B_PORCH_STA_REG		0x2a
-
-/* Total Pixel Status Low Byte Register */
-#define SP_TOTAL_PIXEL_STAL_REG		0x2b
-
-/* Total Pixel Status High Byte Register */
-#define SP_TOTAL_PIXEL_STAH_REG		0x2c
-
-/* Active Pixel Status Low Byte Register */
-#define SP_ACT_PIXEL_STAL_REG		0x2d
-
-/* Active Pixel Status High Byte Register */
-#define SP_ACT_PIXEL_STAH_REG		0x2e
-
-/* Horizontal Front Porch Status Low Byte Register */
-#define SP_H_F_PORCH_STAL_REG		0x2f
-
-/* Horizontal Front Porch Statys High Byte Register */
-#define SP_H_F_PORCH_STAH_REG		0x30
-
-/* Horizontal SYNC Width Status Low Byte Register */
-#define SP_H_SYNC_STAL_REG		0x31
-
-/* Horizontal SYNC Width Status High Byte Register */
-#define SP_H_SYNC_STAH_REG		0x32
-
-/* Horizontal Back Porch Status Low Byte Register */
-#define SP_H_B_PORCH_STAL_REG		0x33
-
-/* Horizontal Back Porch Status High Byte Register */
-#define SP_H_B_PORCH_STAH_REG		0x34
-
-/* InfoFrame AVI Packet DB1 Register */
-#define SP_INFOFRAME_AVI_DB1_REG	0x70
-
-/* Bit Control Specific Register */
-#define SP_BIT_CTRL_SPECIFIC_REG	0x80
-#define SP_BIT_CTRL_SELECT_SHIFT	1
-#define SP_ENABLE_BIT_CTRL		BIT(0)
-
-/* InfoFrame Audio Packet DB1 Register */
-#define SP_INFOFRAME_AUD_DB1_REG	0x83
-
-/* InfoFrame MPEG Packet DB1 Register */
-#define SP_INFOFRAME_MPEG_DB1_REG	0xb0
-
-/* Audio Channel Status Registers */
-#define SP_AUD_CH_STATUS_BASE		0xd0
-
-/* Audio Channel Num Register 5 */
-#define SP_I2S_CHANNEL_NUM_MASK		0xe0
-#  define SP_I2S_CH_NUM_1		(0x00 << 5)
-#  define SP_I2S_CH_NUM_2		(0x01 << 5)
-#  define SP_I2S_CH_NUM_3		(0x02 << 5)
-#  define SP_I2S_CH_NUM_4		(0x03 << 5)
-#  define SP_I2S_CH_NUM_5		(0x04 << 5)
-#  define SP_I2S_CH_NUM_6		(0x05 << 5)
-#  define SP_I2S_CH_NUM_7		(0x06 << 5)
-#  define SP_I2S_CH_NUM_8		(0x07 << 5)
-#define SP_EXT_VUCP			BIT(2)
-#define SP_VBIT				BIT(1)
-#define SP_AUDIO_LAYOUT			BIT(0)
-
-/* Analog Debug Register 2 */
-#define SP_ANALOG_DEBUG2_REG		0xdd
-#define SP_FORCE_SW_OFF_BYPASS		0x20
-#define SP_XTAL_FRQ			0x1c
-#  define SP_XTAL_FRQ_19M2		(0x00 << 2)
-#  define SP_XTAL_FRQ_24M		(0x01 << 2)
-#  define SP_XTAL_FRQ_25M		(0x02 << 2)
-#  define SP_XTAL_FRQ_26M		(0x03 << 2)
-#  define SP_XTAL_FRQ_27M		(0x04 << 2)
-#  define SP_XTAL_FRQ_38M4		(0x05 << 2)
-#  define SP_XTAL_FRQ_52M		(0x06 << 2)
-#define SP_POWERON_TIME_1P5MS		0x03
-
-/* Analog Control 0 Register */
-#define SP_ANALOG_CTRL0_REG		0xe1
-
-/* Common Interrupt Status Register 1 */
-#define SP_COMMON_INT_STATUS_BASE	(0xf1 - 1)
-#define SP_PLL_LOCK_CHG			0x40
-
-/* Common Interrupt Status Register 2 */
-#define SP_COMMON_INT_STATUS2		0xf2
-#define SP_HDCP_AUTH_CHG		BIT(1)
-#define SP_HDCP_AUTH_DONE		BIT(0)
-
-#define SP_HDCP_LINK_CHECK_FAIL		BIT(0)
-
-/* Common Interrupt Status Register 4 */
-#define SP_COMMON_INT_STATUS4_REG	0xf4
-#define SP_HPD_IRQ			BIT(6)
-#define SP_HPD_ESYNC_ERR		BIT(4)
-#define SP_HPD_CHG			BIT(2)
-#define SP_HPD_LOST			BIT(1)
-#define SP_HPD_PLUG			BIT(0)
-
-/* DP Interrupt Status Register */
-#define SP_DP_INT_STATUS1_REG		0xf7
-#define SP_TRAINING_FINISH		BIT(5)
-#define SP_POLLING_ERR			BIT(4)
-
-/* Common Interrupt Mask Register */
-#define SP_COMMON_INT_MASK_BASE		(0xf8 - 1)
-
-#define SP_COMMON_INT_MASK4_REG		0xfb
-
-/* DP Interrupts Mask Register */
-#define SP_DP_INT_MASK1_REG		0xfe
-
-/* Interrupt Control Register */
-#define SP_INT_CTRL_REG			0xff
-
-/***************************************************************/
-/* Register definition of device address 0x7a                  */
-/***************************************************************/
-
-/* DP TX Link Training Control Register */
-#define SP_DP_TX_LT_CTRL0_REG		0x30
-
-/* PD 1.2 Lint Training 80bit Pattern Register */
-#define SP_DP_LT_80BIT_PATTERN0_REG	0x80
-#define SP_DP_LT_80BIT_PATTERN_REG_NUM	10
-
-/* Audio Interface Control Register 0 */
-#define SP_AUD_INTERFACE_CTRL0_REG	0x5f
-#define SP_AUD_INTERFACE_DISABLE	0x80
-
-/* Audio Interface Control Register 2 */
-#define SP_AUD_INTERFACE_CTRL2_REG	0x60
-#define SP_M_AUD_ADJUST_ST		0x04
-
-/* Audio Interface Control Register 3 */
-#define SP_AUD_INTERFACE_CTRL3_REG	0x62
-
-/* Audio Interface Control Register 4 */
-#define SP_AUD_INTERFACE_CTRL4_REG	0x67
-
-/* Audio Interface Control Register 5 */
-#define SP_AUD_INTERFACE_CTRL5_REG	0x68
-
-/* Audio Interface Control Register 6 */
-#define SP_AUD_INTERFACE_CTRL6_REG	0x69
-
-/* Firmware Version Register */
-#define SP_FW_VER_REG			0xb7
-
-#endif
diff --git a/drivers/gpu/drm/bridge/analogix/Kconfig b/drivers/gpu/drm/bridge/analogix/Kconfig
index e930ff9b5cd4..e1fa7d820373 100644
--- a/drivers/gpu/drm/bridge/analogix/Kconfig
+++ b/drivers/gpu/drm/bridge/analogix/Kconfig
@@ -1,4 +1,27 @@
 # SPDX-License-Identifier: GPL-2.0-only
+config DRM_ANALOGIX_ANX6345
+	tristate "Analogix ANX6345 bridge"
+	depends on OF
+	select DRM_ANALOGIX_DP
+	select DRM_KMS_HELPER
+	select REGMAP_I2C
+	help
+	  ANX6345 is an ultra-low Full-HD DisplayPort/eDP
+	  transmitter designed for portable devices. The
+	  ANX6345 transforms the LVTTL RGB output of an
+	  application processor to eDP or DisplayPort.
+
+config DRM_ANALOGIX_ANX78XX
+	tristate "Analogix ANX78XX bridge"
+	select DRM_ANALOGIX_DP
+	select DRM_KMS_HELPER
+	select REGMAP_I2C
+	help
+	  ANX78XX is an ultra-low power Full-HD SlimPort transmitter
+	  designed for portable devices. The ANX78XX transforms
+	  the HDMI output of an application processor to MyDP
+	  or DisplayPort.
+
 config DRM_ANALOGIX_DP
 	tristate
 	depends on DRM
diff --git a/drivers/gpu/drm/bridge/analogix/Makefile b/drivers/gpu/drm/bridge/analogix/Makefile
index fdbf3fd2f087..97669b374098 100644
--- a/drivers/gpu/drm/bridge/analogix/Makefile
+++ b/drivers/gpu/drm/bridge/analogix/Makefile
@@ -1,3 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-analogix_dp-objs := analogix_dp_core.o analogix_dp_reg.o
+analogix_dp-objs := analogix_dp_core.o analogix_dp_reg.o analogix-i2c-dptx.o
+obj-$(CONFIG_DRM_ANALOGIX_ANX6345) += analogix-anx6345.o
+obj-$(CONFIG_DRM_ANALOGIX_ANX78XX) += analogix-anx78xx.o
 obj-$(CONFIG_DRM_ANALOGIX_DP) += analogix_dp.o
diff --git a/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c b/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c
new file mode 100644
index 000000000000..56f55c53abfd
--- /dev/null
+++ b/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c
@@ -0,0 +1,817 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright(c) 2016, Analogix Semiconductor.
+ * Copyright(c) 2017, Icenowy Zheng <icenowy@aosc.io>
+ *
+ * Based on anx7808 driver obtained from chromeos with copyright:
+ * Copyright(c) 2013, Google Inc.
+ */
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/types.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
+
+#include "analogix-i2c-dptx.h"
+#include "analogix-i2c-txcommon.h"
+
+#define POLL_DELAY		50000 /* us */
+#define POLL_TIMEOUT		5000000 /* us */
+
+#define I2C_IDX_DPTX		0
+#define I2C_IDX_TXCOM		1
+
+static const u8 anx6345_i2c_addresses[] = {
+	[I2C_IDX_DPTX]	= 0x70,
+	[I2C_IDX_TXCOM]	= 0x72,
+};
+#define I2C_NUM_ADDRESSES	ARRAY_SIZE(anx6345_i2c_addresses)
+
+struct anx6345 {
+	struct drm_dp_aux aux;
+	struct drm_bridge bridge;
+	struct i2c_client *client;
+	struct edid *edid;
+	struct drm_connector connector;
+	struct drm_panel *panel;
+	struct regulator *dvdd12;
+	struct regulator *dvdd25;
+	struct gpio_desc *gpiod_reset;
+	struct mutex lock;	/* protect EDID access */
+
+	/* I2C Slave addresses of ANX6345 are mapped as DPTX and SYS */
+	struct i2c_client *i2c_clients[I2C_NUM_ADDRESSES];
+	struct regmap *map[I2C_NUM_ADDRESSES];
+
+	u16 chipid;
+	u8 dpcd[DP_RECEIVER_CAP_SIZE];
+
+	bool powered;
+};
+
+static inline struct anx6345 *connector_to_anx6345(struct drm_connector *c)
+{
+	return container_of(c, struct anx6345, connector);
+}
+
+static inline struct anx6345 *bridge_to_anx6345(struct drm_bridge *bridge)
+{
+	return container_of(bridge, struct anx6345, bridge);
+}
+
+static int anx6345_set_bits(struct regmap *map, u8 reg, u8 mask)
+{
+	return regmap_update_bits(map, reg, mask, mask);
+}
+
+static int anx6345_clear_bits(struct regmap *map, u8 reg, u8 mask)
+{
+	return regmap_update_bits(map, reg, mask, 0);
+}
+
+static ssize_t anx6345_aux_transfer(struct drm_dp_aux *aux,
+				    struct drm_dp_aux_msg *msg)
+{
+	struct anx6345 *anx6345 = container_of(aux, struct anx6345, aux);
+
+	return anx_dp_aux_transfer(anx6345->map[I2C_IDX_DPTX], msg);
+}
+
+static int anx6345_dp_link_training(struct anx6345 *anx6345)
+{
+	unsigned int value;
+	u8 dp_bw, dpcd[2];
+	int err;
+
+	err = anx6345_clear_bits(anx6345->map[I2C_IDX_TXCOM],
+				 SP_POWERDOWN_CTRL_REG,
+				 SP_TOTAL_PD);
+	if (err)
+		return err;
+
+	err = drm_dp_dpcd_readb(&anx6345->aux, DP_MAX_LINK_RATE, &dp_bw);
+	if (err < 0)
+		return err;
+
+	switch (dp_bw) {
+	case DP_LINK_BW_1_62:
+	case DP_LINK_BW_2_7:
+		break;
+
+	default:
+		DRM_DEBUG_KMS("DP bandwidth (%#02x) not supported\n", dp_bw);
+		return -EINVAL;
+	}
+
+	err = anx6345_set_bits(anx6345->map[I2C_IDX_TXCOM], SP_VID_CTRL1_REG,
+			       SP_VIDEO_MUTE);
+	if (err)
+		return err;
+
+	err = anx6345_clear_bits(anx6345->map[I2C_IDX_TXCOM],
+				 SP_VID_CTRL1_REG, SP_VIDEO_EN);
+	if (err)
+		return err;
+
+	/* Get DPCD info */
+	err = drm_dp_dpcd_read(&anx6345->aux, DP_DPCD_REV,
+			       &anx6345->dpcd, DP_RECEIVER_CAP_SIZE);
+	if (err < 0) {
+		DRM_ERROR("Failed to read DPCD: %d\n", err);
+		return err;
+	}
+
+	/* Clear channel x SERDES power down */
+	err = anx6345_clear_bits(anx6345->map[I2C_IDX_DPTX],
+				 SP_DP_ANALOG_POWER_DOWN_REG, SP_CH0_PD);
+	if (err)
+		return err;
+
+	/*
+	 * Power up the sink (DP_SET_POWER register is only available on DPCD
+	 * v1.1 and later).
+	 */
+	if (anx6345->dpcd[DP_DPCD_REV] >= 0x11) {
+		err = drm_dp_dpcd_readb(&anx6345->aux, DP_SET_POWER, &dpcd[0]);
+		if (err < 0) {
+			DRM_ERROR("Failed to read DP_SET_POWER register: %d\n",
+				  err);
+			return err;
+		}
+
+		dpcd[0] &= ~DP_SET_POWER_MASK;
+		dpcd[0] |= DP_SET_POWER_D0;
+
+		err = drm_dp_dpcd_writeb(&anx6345->aux, DP_SET_POWER, dpcd[0]);
+		if (err < 0) {
+			DRM_ERROR("Failed to power up DisplayPort link: %d\n",
+				  err);
+			return err;
+		}
+
+		/*
+		 * According to the DP 1.1 specification, a "Sink Device must
+		 * exit the power saving state within 1 ms" (Section 2.5.3.1,
+		 * Table 5-52, "Sink Control Field" (register 0x600).
+		 */
+		usleep_range(1000, 2000);
+	}
+
+	/* Possibly enable downspread on the sink */
+	err = regmap_write(anx6345->map[I2C_IDX_DPTX],
+			   SP_DP_DOWNSPREAD_CTRL1_REG, 0);
+	if (err)
+		return err;
+
+	if (anx6345->dpcd[DP_MAX_DOWNSPREAD] & DP_MAX_DOWNSPREAD_0_5) {
+		DRM_DEBUG("Enable downspread on the sink\n");
+		/* 4000PPM */
+		err = regmap_write(anx6345->map[I2C_IDX_DPTX],
+				   SP_DP_DOWNSPREAD_CTRL1_REG, 8);
+		if (err)
+			return err;
+
+		err = drm_dp_dpcd_writeb(&anx6345->aux, DP_DOWNSPREAD_CTRL,
+					 DP_SPREAD_AMP_0_5);
+		if (err < 0)
+			return err;
+	} else {
+		err = drm_dp_dpcd_writeb(&anx6345->aux, DP_DOWNSPREAD_CTRL, 0);
+		if (err < 0)
+			return err;
+	}
+
+	/* Set the lane count and the link rate on the sink */
+	if (drm_dp_enhanced_frame_cap(anx6345->dpcd))
+		err = anx6345_set_bits(anx6345->map[I2C_IDX_DPTX],
+				       SP_DP_SYSTEM_CTRL_BASE + 4,
+				       SP_ENHANCED_MODE);
+	else
+		err = anx6345_clear_bits(anx6345->map[I2C_IDX_DPTX],
+					 SP_DP_SYSTEM_CTRL_BASE + 4,
+					 SP_ENHANCED_MODE);
+	if (err)
+		return err;
+
+	dpcd[0] = drm_dp_max_link_rate(anx6345->dpcd);
+	dpcd[0] = drm_dp_link_rate_to_bw_code(dpcd[0]);
+	err = regmap_write(anx6345->map[I2C_IDX_DPTX],
+			   SP_DP_MAIN_LINK_BW_SET_REG, dpcd[0]);
+	if (err)
+		return err;
+
+	dpcd[1] = drm_dp_max_lane_count(anx6345->dpcd);
+
+	err = regmap_write(anx6345->map[I2C_IDX_DPTX],
+			   SP_DP_LANE_COUNT_SET_REG, dpcd[1]);
+	if (err)
+		return err;
+
+	if (drm_dp_enhanced_frame_cap(anx6345->dpcd))
+		dpcd[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+
+	err = drm_dp_dpcd_write(&anx6345->aux, DP_LINK_BW_SET, dpcd,
+				sizeof(dpcd));
+
+	if (err < 0) {
+		DRM_ERROR("Failed to configure link: %d\n", err);
+		return err;
+	}
+
+	/* Start training on the source */
+	err = regmap_write(anx6345->map[I2C_IDX_DPTX], SP_DP_LT_CTRL_REG,
+			   SP_LT_EN);
+	if (err)
+		return err;
+
+	return regmap_read_poll_timeout(anx6345->map[I2C_IDX_DPTX],
+				       SP_DP_LT_CTRL_REG,
+				       value, !(value & SP_DP_LT_INPROGRESS),
+				       POLL_DELAY, POLL_TIMEOUT);
+}
+
+static int anx6345_tx_initialization(struct anx6345 *anx6345)
+{
+	int err, i;
+
+	/* FIXME: colordepth is hardcoded for now */
+	err = regmap_write(anx6345->map[I2C_IDX_TXCOM], SP_VID_CTRL2_REG,
+			   SP_IN_BPC_6BIT << SP_IN_BPC_SHIFT);
+	if (err)
+		return err;
+
+	err = regmap_write(anx6345->map[I2C_IDX_DPTX], SP_DP_PLL_CTRL_REG, 0);
+	if (err)
+		return err;
+
+	err = regmap_write(anx6345->map[I2C_IDX_TXCOM],
+			   SP_ANALOG_DEBUG1_REG, 0);
+	if (err)
+		return err;
+
+	err = regmap_write(anx6345->map[I2C_IDX_DPTX],
+			   SP_DP_LINK_DEBUG_CTRL_REG,
+			   SP_NEW_PRBS7 | SP_M_VID_DEBUG);
+	if (err)
+		return err;
+
+	err = regmap_write(anx6345->map[I2C_IDX_DPTX],
+			   SP_DP_ANALOG_POWER_DOWN_REG, 0);
+	if (err)
+		return err;
+
+	/* Force HPD */
+	err = anx6345_set_bits(anx6345->map[I2C_IDX_DPTX],
+			       SP_DP_SYSTEM_CTRL_BASE + 3,
+			       SP_HPD_FORCE | SP_HPD_CTRL);
+	if (err)
+		return err;
+
+	for (i = 0; i < 4; i++) {
+		/* 4 lanes */
+		err = regmap_write(anx6345->map[I2C_IDX_DPTX],
+				   SP_DP_LANE0_LT_CTRL_REG + i, 0);
+		if (err)
+			return err;
+	}
+
+	/* Reset AUX */
+	err = anx6345_set_bits(anx6345->map[I2C_IDX_TXCOM],
+			       SP_RESET_CTRL2_REG, SP_AUX_RST);
+	if (err)
+		return err;
+
+	return anx6345_clear_bits(anx6345->map[I2C_IDX_TXCOM],
+				 SP_RESET_CTRL2_REG, SP_AUX_RST);
+}
+
+static void anx6345_poweron(struct anx6345 *anx6345)
+{
+	int err;
+
+	/* Ensure reset is asserted before starting power on sequence */
+	gpiod_set_value_cansleep(anx6345->gpiod_reset, 1);
+	usleep_range(1000, 2000);
+
+	err = regulator_enable(anx6345->dvdd12);
+	if (err) {
+		DRM_ERROR("Failed to enable dvdd12 regulator: %d\n",
+			  err);
+		return;
+	}
+
+	/* T1 - delay between VDD12 and VDD25 should be 0-2ms */
+	usleep_range(1000, 2000);
+
+	err = regulator_enable(anx6345->dvdd25);
+	if (err) {
+		DRM_ERROR("Failed to enable dvdd25 regulator: %d\n",
+			  err);
+		return;
+	}
+
+	/* T2 - delay between RESETN and all power rail stable,
+	 * should be 2-5ms
+	 */
+	usleep_range(2000, 5000);
+
+	gpiod_set_value_cansleep(anx6345->gpiod_reset, 0);
+
+	/* Power on registers module */
+	anx6345_set_bits(anx6345->map[I2C_IDX_TXCOM], SP_POWERDOWN_CTRL_REG,
+			 SP_HDCP_PD | SP_AUDIO_PD | SP_VIDEO_PD | SP_LINK_PD);
+	anx6345_clear_bits(anx6345->map[I2C_IDX_TXCOM], SP_POWERDOWN_CTRL_REG,
+			   SP_REGISTER_PD | SP_TOTAL_PD);
+
+	if (anx6345->panel)
+		drm_panel_prepare(anx6345->panel);
+
+	anx6345->powered = true;
+}
+
+static void anx6345_poweroff(struct anx6345 *anx6345)
+{
+	int err;
+
+	gpiod_set_value_cansleep(anx6345->gpiod_reset, 1);
+	usleep_range(1000, 2000);
+
+	if (anx6345->panel)
+		drm_panel_unprepare(anx6345->panel);
+
+	err = regulator_disable(anx6345->dvdd25);
+	if (err) {
+		DRM_ERROR("Failed to disable dvdd25 regulator: %d\n",
+			  err);
+		return;
+	}
+
+	usleep_range(5000, 10000);
+
+	err = regulator_disable(anx6345->dvdd12);
+	if (err) {
+		DRM_ERROR("Failed to disable dvdd12 regulator: %d\n",
+			  err);
+		return;
+	}
+
+	usleep_range(1000, 2000);
+
+	anx6345->powered = false;
+}
+
+static int anx6345_start(struct anx6345 *anx6345)
+{
+	int err;
+
+	if (!anx6345->powered)
+		anx6345_poweron(anx6345);
+
+	/* Power on needed modules */
+	err = anx6345_clear_bits(anx6345->map[I2C_IDX_TXCOM],
+				SP_POWERDOWN_CTRL_REG,
+				SP_VIDEO_PD | SP_LINK_PD);
+
+	err = anx6345_tx_initialization(anx6345);
+	if (err) {
+		DRM_ERROR("Failed eDP transmitter initialization: %d\n", err);
+		anx6345_poweroff(anx6345);
+		return err;
+	}
+
+	err = anx6345_dp_link_training(anx6345);
+	if (err) {
+		DRM_ERROR("Failed link training: %d\n", err);
+		anx6345_poweroff(anx6345);
+		return err;
+	}
+
+	/*
+	 * This delay seems to help keep the hardware in a good state. Without
+	 * it, there are times where it fails silently.
+	 */
+	usleep_range(10000, 15000);
+
+	return 0;
+}
+
+static int anx6345_config_dp_output(struct anx6345 *anx6345)
+{
+	int err;
+
+	err = anx6345_clear_bits(anx6345->map[I2C_IDX_TXCOM], SP_VID_CTRL1_REG,
+				 SP_VIDEO_MUTE);
+	if (err)
+		return err;
+
+	/* Enable DP output */
+	err = anx6345_set_bits(anx6345->map[I2C_IDX_TXCOM], SP_VID_CTRL1_REG,
+			       SP_VIDEO_EN);
+	if (err)
+		return err;
+
+	/* Force stream valid */
+	return anx6345_set_bits(anx6345->map[I2C_IDX_DPTX],
+			       SP_DP_SYSTEM_CTRL_BASE + 3,
+			       SP_STRM_FORCE | SP_STRM_CTRL);
+}
+
+static int anx6345_get_downstream_info(struct anx6345 *anx6345)
+{
+	u8 value;
+	int err;
+
+	err = drm_dp_dpcd_readb(&anx6345->aux, DP_SINK_COUNT, &value);
+	if (err < 0) {
+		DRM_ERROR("Get sink count failed %d\n", err);
+		return err;
+	}
+
+	if (!DP_GET_SINK_COUNT(value)) {
+		DRM_ERROR("Downstream disconnected\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int anx6345_get_modes(struct drm_connector *connector)
+{
+	struct anx6345 *anx6345 = connector_to_anx6345(connector);
+	int err, num_modes = 0;
+	bool power_off = false;
+
+	mutex_lock(&anx6345->lock);
+
+	if (!anx6345->edid) {
+		if (!anx6345->powered) {
+			anx6345_poweron(anx6345);
+			power_off = true;
+		}
+
+		err = anx6345_get_downstream_info(anx6345);
+		if (err) {
+			DRM_ERROR("Failed to get downstream info: %d\n", err);
+			goto unlock;
+		}
+
+		anx6345->edid = drm_get_edid(connector, &anx6345->aux.ddc);
+		if (!anx6345->edid)
+			DRM_ERROR("Failed to read EDID from panel\n");
+
+		err = drm_connector_update_edid_property(connector,
+							 anx6345->edid);
+		if (err) {
+			DRM_ERROR("Failed to update EDID property: %d\n", err);
+			goto unlock;
+		}
+	}
+
+	num_modes += drm_add_edid_modes(connector, anx6345->edid);
+
+unlock:
+	if (power_off)
+		anx6345_poweroff(anx6345);
+
+	mutex_unlock(&anx6345->lock);
+
+	if (!num_modes && anx6345->panel)
+		num_modes += drm_panel_get_modes(anx6345->panel, connector);
+
+	return num_modes;
+}
+
+static const struct drm_connector_helper_funcs anx6345_connector_helper_funcs = {
+	.get_modes = anx6345_get_modes,
+};
+
+static void
+anx6345_connector_destroy(struct drm_connector *connector)
+{
+	struct anx6345 *anx6345 = connector_to_anx6345(connector);
+
+	if (anx6345->panel)
+		drm_panel_detach(anx6345->panel);
+	drm_connector_cleanup(connector);
+}
+
+static const struct drm_connector_funcs anx6345_connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = anx6345_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static int anx6345_bridge_attach(struct drm_bridge *bridge)
+{
+	struct anx6345 *anx6345 = bridge_to_anx6345(bridge);
+	int err;
+
+	if (!bridge->encoder) {
+		DRM_ERROR("Parent encoder object not found");
+		return -ENODEV;
+	}
+
+	/* Register aux channel */
+	anx6345->aux.name = "DP-AUX";
+	anx6345->aux.dev = &anx6345->client->dev;
+	anx6345->aux.transfer = anx6345_aux_transfer;
+
+	err = drm_dp_aux_register(&anx6345->aux);
+	if (err < 0) {
+		DRM_ERROR("Failed to register aux channel: %d\n", err);
+		return err;
+	}
+
+	err = drm_connector_init(bridge->dev, &anx6345->connector,
+				 &anx6345_connector_funcs,
+				 DRM_MODE_CONNECTOR_eDP);
+	if (err) {
+		DRM_ERROR("Failed to initialize connector: %d\n", err);
+		return err;
+	}
+
+	drm_connector_helper_add(&anx6345->connector,
+				 &anx6345_connector_helper_funcs);
+
+	err = drm_connector_register(&anx6345->connector);
+	if (err) {
+		DRM_ERROR("Failed to register connector: %d\n", err);
+		return err;
+	}
+
+	anx6345->connector.polled = DRM_CONNECTOR_POLL_HPD;
+
+	err = drm_connector_attach_encoder(&anx6345->connector,
+					   bridge->encoder);
+	if (err) {
+		DRM_ERROR("Failed to link up connector to encoder: %d\n", err);
+		return err;
+	}
+
+	if (anx6345->panel) {
+		err = drm_panel_attach(anx6345->panel, &anx6345->connector);
+		if (err) {
+			DRM_ERROR("Failed to attach panel: %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static enum drm_mode_status
+anx6345_bridge_mode_valid(struct drm_bridge *bridge,
+			  const struct drm_display_mode *mode)
+{
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		return MODE_NO_INTERLACE;
+
+	/* Max 1200p at 5.4 Ghz, one lane */
+	if (mode->clock > 154000)
+		return MODE_CLOCK_HIGH;
+
+	return MODE_OK;
+}
+
+static void anx6345_bridge_disable(struct drm_bridge *bridge)
+{
+	struct anx6345 *anx6345 = bridge_to_anx6345(bridge);
+
+	/* Power off all modules except configuration registers access */
+	anx6345_set_bits(anx6345->map[I2C_IDX_TXCOM], SP_POWERDOWN_CTRL_REG,
+			 SP_HDCP_PD | SP_AUDIO_PD | SP_VIDEO_PD | SP_LINK_PD);
+	if (anx6345->panel)
+		drm_panel_disable(anx6345->panel);
+
+	if (anx6345->powered)
+		anx6345_poweroff(anx6345);
+}
+
+static void anx6345_bridge_enable(struct drm_bridge *bridge)
+{
+	struct anx6345 *anx6345 = bridge_to_anx6345(bridge);
+	int err;
+
+	if (anx6345->panel)
+		drm_panel_enable(anx6345->panel);
+
+	err = anx6345_start(anx6345);
+	if (err) {
+		DRM_ERROR("Failed to initialize: %d\n", err);
+		return;
+	}
+
+	err = anx6345_config_dp_output(anx6345);
+	if (err)
+		DRM_ERROR("Failed to enable DP output: %d\n", err);
+}
+
+static const struct drm_bridge_funcs anx6345_bridge_funcs = {
+	.attach = anx6345_bridge_attach,
+	.mode_valid = anx6345_bridge_mode_valid,
+	.disable = anx6345_bridge_disable,
+	.enable = anx6345_bridge_enable,
+};
+
+static void unregister_i2c_dummy_clients(struct anx6345 *anx6345)
+{
+	unsigned int i;
+
+	for (i = 1; i < ARRAY_SIZE(anx6345->i2c_clients); i++)
+		if (anx6345->i2c_clients[i] &&
+		    anx6345->i2c_clients[i]->addr != anx6345->client->addr)
+			i2c_unregister_device(anx6345->i2c_clients[i]);
+}
+
+static const struct regmap_config anx6345_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = 0xff,
+	.cache_type = REGCACHE_NONE,
+};
+
+static const u16 anx6345_chipid_list[] = {
+	0x6345,
+};
+
+static bool anx6345_get_chip_id(struct anx6345 *anx6345)
+{
+	unsigned int i, idl, idh, version;
+
+	if (regmap_read(anx6345->map[I2C_IDX_TXCOM], SP_DEVICE_IDL_REG, &idl))
+		return false;
+
+	if (regmap_read(anx6345->map[I2C_IDX_TXCOM], SP_DEVICE_IDH_REG, &idh))
+		return false;
+
+	anx6345->chipid = (u8)idl | ((u8)idh << 8);
+
+	if (regmap_read(anx6345->map[I2C_IDX_TXCOM], SP_DEVICE_VERSION_REG,
+			&version))
+		return false;
+
+	for (i = 0; i < ARRAY_SIZE(anx6345_chipid_list); i++) {
+		if (anx6345->chipid == anx6345_chipid_list[i]) {
+			DRM_INFO("Found ANX%x (ver. %d) eDP Transmitter\n",
+				 anx6345->chipid, version);
+			return true;
+		}
+	}
+
+	DRM_ERROR("ANX%x (ver. %d) not supported by this driver\n",
+		  anx6345->chipid, version);
+
+	return false;
+}
+
+static int anx6345_i2c_probe(struct i2c_client *client,
+			     const struct i2c_device_id *id)
+{
+	struct anx6345 *anx6345;
+	struct device *dev;
+	int i, err;
+
+	anx6345 = devm_kzalloc(&client->dev, sizeof(*anx6345), GFP_KERNEL);
+	if (!anx6345)
+		return -ENOMEM;
+
+	mutex_init(&anx6345->lock);
+
+	anx6345->bridge.of_node = client->dev.of_node;
+
+	anx6345->client = client;
+	i2c_set_clientdata(client, anx6345);
+
+	dev = &anx6345->client->dev;
+
+	err = drm_of_find_panel_or_bridge(client->dev.of_node, 1, 0,
+					  &anx6345->panel, NULL);
+	if (err == -EPROBE_DEFER)
+		return err;
+
+	if (err)
+		DRM_DEBUG("No panel found\n");
+
+	/* 1.2V digital core power regulator  */
+	anx6345->dvdd12 = devm_regulator_get(dev, "dvdd12-supply");
+	if (IS_ERR(anx6345->dvdd12)) {
+		DRM_ERROR("dvdd12-supply not found\n");
+		return PTR_ERR(anx6345->dvdd12);
+	}
+
+	/* 2.5V digital core power regulator  */
+	anx6345->dvdd25 = devm_regulator_get(dev, "dvdd25-supply");
+	if (IS_ERR(anx6345->dvdd25)) {
+		DRM_ERROR("dvdd25-supply not found\n");
+		return PTR_ERR(anx6345->dvdd25);
+	}
+
+	/* GPIO for chip reset */
+	anx6345->gpiod_reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(anx6345->gpiod_reset)) {
+		DRM_ERROR("Reset gpio not found\n");
+		return PTR_ERR(anx6345->gpiod_reset);
+	}
+
+	/* Map slave addresses of ANX6345 */
+	for (i = 0; i < I2C_NUM_ADDRESSES; i++) {
+		if (anx6345_i2c_addresses[i] >> 1 != client->addr)
+			anx6345->i2c_clients[i] = i2c_new_dummy_device(client->adapter,
+						anx6345_i2c_addresses[i] >> 1);
+		else
+			anx6345->i2c_clients[i] = client;
+
+		if (IS_ERR(anx6345->i2c_clients[i])) {
+			err = PTR_ERR(anx6345->i2c_clients[i]);
+			DRM_ERROR("Failed to reserve I2C bus %02x\n",
+				  anx6345_i2c_addresses[i]);
+			goto err_unregister_i2c;
+		}
+
+		anx6345->map[i] = devm_regmap_init_i2c(anx6345->i2c_clients[i],
+						       &anx6345_regmap_config);
+		if (IS_ERR(anx6345->map[i])) {
+			err = PTR_ERR(anx6345->map[i]);
+			DRM_ERROR("Failed regmap initialization %02x\n",
+				  anx6345_i2c_addresses[i]);
+			goto err_unregister_i2c;
+		}
+	}
+
+	/* Look for supported chip ID */
+	anx6345_poweron(anx6345);
+	if (anx6345_get_chip_id(anx6345)) {
+		anx6345->bridge.funcs = &anx6345_bridge_funcs;
+		drm_bridge_add(&anx6345->bridge);
+
+		return 0;
+	} else {
+		anx6345_poweroff(anx6345);
+		err = -ENODEV;
+	}
+
+err_unregister_i2c:
+	unregister_i2c_dummy_clients(anx6345);
+	return err;
+}
+
+static int anx6345_i2c_remove(struct i2c_client *client)
+{
+	struct anx6345 *anx6345 = i2c_get_clientdata(client);
+
+	drm_bridge_remove(&anx6345->bridge);
+
+	unregister_i2c_dummy_clients(anx6345);
+
+	kfree(anx6345->edid);
+
+	mutex_destroy(&anx6345->lock);
+
+	return 0;
+}
+
+static const struct i2c_device_id anx6345_id[] = {
+	{ "anx6345", 0 },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(i2c, anx6345_id);
+
+static const struct of_device_id anx6345_match_table[] = {
+	{ .compatible = "analogix,anx6345", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, anx6345_match_table);
+
+static struct i2c_driver anx6345_driver = {
+	.driver = {
+		   .name = "anx6345",
+		   .of_match_table = of_match_ptr(anx6345_match_table),
+		  },
+	.probe = anx6345_i2c_probe,
+	.remove = anx6345_i2c_remove,
+	.id_table = anx6345_id,
+};
+module_i2c_driver(anx6345_driver);
+
+MODULE_DESCRIPTION("ANX6345 eDP Transmitter driver");
+MODULE_AUTHOR("Icenowy Zheng <icenowy@aosc.io>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/bridge/analogix-anx78xx.c b/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c
index 3c7cc5af735c..41867be03751 100644
--- a/drivers/gpu/drm/bridge/analogix-anx78xx.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c
@@ -19,6 +19,7 @@
 #include <linux/types.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_edid.h>
@@ -35,15 +36,21 @@
 #define I2C_IDX_RX_P1		4
 
 #define XTAL_CLK		270 /* 27M */
-#define AUX_CH_BUFFER_SIZE	16
-#define AUX_WAIT_TIMEOUT_MS	15
-
-static const u8 anx78xx_i2c_addresses[] = {
-	[I2C_IDX_TX_P0] = TX_P0,
-	[I2C_IDX_TX_P1] = TX_P1,
-	[I2C_IDX_TX_P2] = TX_P2,
-	[I2C_IDX_RX_P0] = RX_P0,
-	[I2C_IDX_RX_P1] = RX_P1,
+
+static const u8 anx7808_i2c_addresses[] = {
+	[I2C_IDX_TX_P0] = 0x78,
+	[I2C_IDX_TX_P1] = 0x7a,
+	[I2C_IDX_TX_P2] = 0x72,
+	[I2C_IDX_RX_P0] = 0x7e,
+	[I2C_IDX_RX_P1] = 0x80,
+};
+
+static const u8 anx781x_i2c_addresses[] = {
+	[I2C_IDX_TX_P0] = 0x70,
+	[I2C_IDX_TX_P1] = 0x7a,
+	[I2C_IDX_TX_P2] = 0x72,
+	[I2C_IDX_RX_P0] = 0x7e,
+	[I2C_IDX_RX_P1] = 0x80,
 };
 
 struct anx78xx_platform_data {
@@ -62,7 +69,6 @@ struct anx78xx {
 	struct i2c_client *client;
 	struct edid *edid;
 	struct drm_connector connector;
-	struct drm_dp_link link;
 	struct anx78xx_platform_data pdata;
 	struct mutex lock;
 
@@ -99,153 +105,11 @@ static int anx78xx_clear_bits(struct regmap *map, u8 reg, u8 mask)
 	return regmap_update_bits(map, reg, mask, 0);
 }
 
-static bool anx78xx_aux_op_finished(struct anx78xx *anx78xx)
-{
-	unsigned int value;
-	int err;
-
-	err = regmap_read(anx78xx->map[I2C_IDX_TX_P0], SP_DP_AUX_CH_CTRL2_REG,
-			  &value);
-	if (err < 0)
-		return false;
-
-	return (value & SP_AUX_EN) == 0;
-}
-
-static int anx78xx_aux_wait(struct anx78xx *anx78xx)
-{
-	unsigned long timeout;
-	unsigned int status;
-	int err;
-
-	timeout = jiffies + msecs_to_jiffies(AUX_WAIT_TIMEOUT_MS) + 1;
-
-	while (!anx78xx_aux_op_finished(anx78xx)) {
-		if (time_after(jiffies, timeout)) {
-			if (!anx78xx_aux_op_finished(anx78xx)) {
-				DRM_ERROR("Timed out waiting AUX to finish\n");
-				return -ETIMEDOUT;
-			}
-
-			break;
-		}
-
-		usleep_range(1000, 2000);
-	}
-
-	/* Read the AUX channel access status */
-	err = regmap_read(anx78xx->map[I2C_IDX_TX_P0], SP_AUX_CH_STATUS_REG,
-			  &status);
-	if (err < 0) {
-		DRM_ERROR("Failed to read from AUX channel: %d\n", err);
-		return err;
-	}
-
-	if (status & SP_AUX_STATUS) {
-		DRM_ERROR("Failed to wait for AUX channel (status: %02x)\n",
-			  status);
-		return -ETIMEDOUT;
-	}
-
-	return 0;
-}
-
-static int anx78xx_aux_address(struct anx78xx *anx78xx, unsigned int addr)
-{
-	int err;
-
-	err = regmap_write(anx78xx->map[I2C_IDX_TX_P0], SP_AUX_ADDR_7_0_REG,
-			   addr & 0xff);
-	if (err)
-		return err;
-
-	err = regmap_write(anx78xx->map[I2C_IDX_TX_P0], SP_AUX_ADDR_15_8_REG,
-			   (addr & 0xff00) >> 8);
-	if (err)
-		return err;
-
-	/*
-	 * DP AUX CH Address Register #2, only update bits[3:0]
-	 * [7:4] RESERVED
-	 * [3:0] AUX_ADDR[19:16], Register control AUX CH address.
-	 */
-	err = regmap_update_bits(anx78xx->map[I2C_IDX_TX_P0],
-				 SP_AUX_ADDR_19_16_REG,
-				 SP_AUX_ADDR_19_16_MASK,
-				 (addr & 0xf0000) >> 16);
-
-	if (err)
-		return err;
-
-	return 0;
-}
-
 static ssize_t anx78xx_aux_transfer(struct drm_dp_aux *aux,
 				    struct drm_dp_aux_msg *msg)
 {
 	struct anx78xx *anx78xx = container_of(aux, struct anx78xx, aux);
-	u8 ctrl1 = msg->request;
-	u8 ctrl2 = SP_AUX_EN;
-	u8 *buffer = msg->buffer;
-	int err;
-
-	/* The DP AUX transmit and receive buffer has 16 bytes. */
-	if (WARN_ON(msg->size > AUX_CH_BUFFER_SIZE))
-		return -E2BIG;
-
-	/* Zero-sized messages specify address-only transactions. */
-	if (msg->size < 1)
-		ctrl2 |= SP_ADDR_ONLY;
-	else	/* For non-zero-sized set the length field. */
-		ctrl1 |= (msg->size - 1) << SP_AUX_LENGTH_SHIFT;
-
-	if ((msg->request & DP_AUX_I2C_READ) == 0) {
-		/* When WRITE | MOT write values to data buffer */
-		err = regmap_bulk_write(anx78xx->map[I2C_IDX_TX_P0],
-					SP_DP_BUF_DATA0_REG, buffer,
-					msg->size);
-		if (err)
-			return err;
-	}
-
-	/* Write address and request */
-	err = anx78xx_aux_address(anx78xx, msg->address);
-	if (err)
-		return err;
-
-	err = regmap_write(anx78xx->map[I2C_IDX_TX_P0], SP_DP_AUX_CH_CTRL1_REG,
-			   ctrl1);
-	if (err)
-		return err;
-
-	/* Start transaction */
-	err = regmap_update_bits(anx78xx->map[I2C_IDX_TX_P0],
-				 SP_DP_AUX_CH_CTRL2_REG, SP_ADDR_ONLY |
-				 SP_AUX_EN, ctrl2);
-	if (err)
-		return err;
-
-	err = anx78xx_aux_wait(anx78xx);
-	if (err)
-		return err;
-
-	msg->reply = DP_AUX_I2C_REPLY_ACK;
-
-	if ((msg->size > 0) && (msg->request & DP_AUX_I2C_READ)) {
-		/* Read values from data buffer */
-		err = regmap_bulk_read(anx78xx->map[I2C_IDX_TX_P0],
-				       SP_DP_BUF_DATA0_REG, buffer,
-				       msg->size);
-		if (err)
-			return err;
-	}
-
-	err = anx78xx_clear_bits(anx78xx->map[I2C_IDX_TX_P0],
-				 SP_DP_AUX_CH_CTRL2_REG, SP_ADDR_ONLY);
-	if (err)
-		return err;
-
-	return msg->size;
+	return anx_dp_aux_transfer(anx78xx->map[I2C_IDX_TX_P0], msg);
 }
 
 static int anx78xx_set_hpd(struct anx78xx *anx78xx)
@@ -715,7 +579,9 @@ static int anx78xx_init_pdata(struct anx78xx *anx78xx)
 	/* 1.0V digital core power regulator  */
 	pdata->dvdd10 = devm_regulator_get(dev, "dvdd10");
 	if (IS_ERR(pdata->dvdd10)) {
-		DRM_ERROR("DVDD10 regulator not found\n");
+		if (PTR_ERR(pdata->dvdd10) != -EPROBE_DEFER)
+			DRM_ERROR("DVDD10 regulator not found\n");
+
 		return PTR_ERR(pdata->dvdd10);
 	}
 
@@ -737,7 +603,7 @@ static int anx78xx_init_pdata(struct anx78xx *anx78xx)
 
 static int anx78xx_dp_link_training(struct anx78xx *anx78xx)
 {
-	u8 dp_bw, value;
+	u8 dp_bw, dpcd[2];
 	int err;
 
 	err = regmap_write(anx78xx->map[I2C_IDX_RX_P0], SP_HDMI_MUTE_CTRL_REG,
@@ -790,18 +656,34 @@ static int anx78xx_dp_link_training(struct anx78xx *anx78xx)
 	if (err)
 		return err;
 
-	/* Check link capabilities */
-	err = drm_dp_link_probe(&anx78xx->aux, &anx78xx->link);
-	if (err < 0) {
-		DRM_ERROR("Failed to probe link capabilities: %d\n", err);
-		return err;
-	}
+	/*
+	 * Power up the sink (DP_SET_POWER register is only available on DPCD
+	 * v1.1 and later).
+	 */
+	if (anx78xx->dpcd[DP_DPCD_REV] >= 0x11) {
+		err = drm_dp_dpcd_readb(&anx78xx->aux, DP_SET_POWER, &dpcd[0]);
+		if (err < 0) {
+			DRM_ERROR("Failed to read DP_SET_POWER register: %d\n",
+				  err);
+			return err;
+		}
 
-	/* Power up the sink */
-	err = drm_dp_link_power_up(&anx78xx->aux, &anx78xx->link);
-	if (err < 0) {
-		DRM_ERROR("Failed to power up DisplayPort link: %d\n", err);
-		return err;
+		dpcd[0] &= ~DP_SET_POWER_MASK;
+		dpcd[0] |= DP_SET_POWER_D0;
+
+		err = drm_dp_dpcd_writeb(&anx78xx->aux, DP_SET_POWER, dpcd[0]);
+		if (err < 0) {
+			DRM_ERROR("Failed to power up DisplayPort link: %d\n",
+				  err);
+			return err;
+		}
+
+		/*
+		 * According to the DP 1.1 specification, a "Sink Device must
+		 * exit the power saving state within 1 ms" (Section 2.5.3.1,
+		 * Table 5-52, "Sink Control Field" (register 0x600).
+		 */
+		usleep_range(1000, 2000);
 	}
 
 	/* Possibly enable downspread on the sink */
@@ -840,15 +722,22 @@ static int anx78xx_dp_link_training(struct anx78xx *anx78xx)
 	if (err)
 		return err;
 
-	value = drm_dp_link_rate_to_bw_code(anx78xx->link.rate);
+	dpcd[0] = drm_dp_max_link_rate(anx78xx->dpcd);
+	dpcd[0] = drm_dp_link_rate_to_bw_code(dpcd[0]);
 	err = regmap_write(anx78xx->map[I2C_IDX_TX_P0],
-			   SP_DP_MAIN_LINK_BW_SET_REG, value);
+			   SP_DP_MAIN_LINK_BW_SET_REG, dpcd[0]);
 	if (err)
 		return err;
 
-	err = drm_dp_link_configure(&anx78xx->aux, &anx78xx->link);
+	dpcd[1] = drm_dp_max_lane_count(anx78xx->dpcd);
+
+	if (drm_dp_enhanced_frame_cap(anx78xx->dpcd))
+		dpcd[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+
+	err = drm_dp_dpcd_write(&anx78xx->aux, DP_LINK_BW_SET, dpcd,
+				sizeof(dpcd));
 	if (err < 0) {
-		DRM_ERROR("Failed to configure DisplayPort link: %d\n", err);
+		DRM_ERROR("Failed to configure link: %d\n", err);
 		return err;
 	}
 
@@ -1301,6 +1190,7 @@ static const struct regmap_config anx78xx_regmap_config = {
 };
 
 static const u16 anx78xx_chipid_list[] = {
+	0x7808,
 	0x7812,
 	0x7814,
 	0x7818,
@@ -1312,6 +1202,7 @@ static int anx78xx_i2c_probe(struct i2c_client *client,
 	struct anx78xx *anx78xx;
 	struct anx78xx_platform_data *pdata;
 	unsigned int i, idl, idh, version;
+	const u8 *i2c_addresses;
 	bool found = false;
 	int err;
 
@@ -1332,7 +1223,9 @@ static int anx78xx_i2c_probe(struct i2c_client *client,
 
 	err = anx78xx_init_pdata(anx78xx);
 	if (err) {
-		DRM_ERROR("Failed to initialize pdata: %d\n", err);
+		if (err != -EPROBE_DEFER)
+			DRM_ERROR("Failed to initialize pdata: %d\n", err);
+
 		return err;
 	}
 
@@ -1349,22 +1242,26 @@ static int anx78xx_i2c_probe(struct i2c_client *client,
 	}
 
 	/* Map slave addresses of ANX7814 */
+	i2c_addresses = device_get_match_data(&client->dev);
 	for (i = 0; i < I2C_NUM_ADDRESSES; i++) {
-		anx78xx->i2c_dummy[i] = i2c_new_dummy(client->adapter,
-						anx78xx_i2c_addresses[i] >> 1);
-		if (!anx78xx->i2c_dummy[i]) {
-			err = -ENOMEM;
-			DRM_ERROR("Failed to reserve I2C bus %02x\n",
-				  anx78xx_i2c_addresses[i]);
+		struct i2c_client *i2c_dummy;
+
+		i2c_dummy = i2c_new_dummy_device(client->adapter,
+						 i2c_addresses[i] >> 1);
+		if (IS_ERR(i2c_dummy)) {
+			err = PTR_ERR(i2c_dummy);
+			DRM_ERROR("Failed to reserve I2C bus %02x: %d\n",
+				  i2c_addresses[i], err);
 			goto err_unregister_i2c;
 		}
 
+		anx78xx->i2c_dummy[i] = i2c_dummy;
 		anx78xx->map[i] = devm_regmap_init_i2c(anx78xx->i2c_dummy[i],
 						       &anx78xx_regmap_config);
 		if (IS_ERR(anx78xx->map[i])) {
 			err = PTR_ERR(anx78xx->map[i]);
 			DRM_ERROR("Failed regmap initialization %02x\n",
-				  anx78xx_i2c_addresses[i]);
+				  i2c_addresses[i]);
 			goto err_unregister_i2c;
 		}
 	}
@@ -1463,7 +1360,10 @@ MODULE_DEVICE_TABLE(i2c, anx78xx_id);
 
 #if IS_ENABLED(CONFIG_OF)
 static const struct of_device_id anx78xx_match_table[] = {
-	{ .compatible = "analogix,anx7814", },
+	{ .compatible = "analogix,anx7808", .data = anx7808_i2c_addresses },
+	{ .compatible = "analogix,anx7812", .data = anx781x_i2c_addresses },
+	{ .compatible = "analogix,anx7814", .data = anx781x_i2c_addresses },
+	{ .compatible = "analogix,anx7818", .data = anx781x_i2c_addresses },
 	{ /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, anx78xx_match_table);
diff --git a/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.h b/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.h
new file mode 100644
index 000000000000..db2a2725acb2
--- /dev/null
+++ b/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright(c) 2016, Analogix Semiconductor. All rights reserved.
+ */
+
+#ifndef __ANX78xx_H
+#define __ANX78xx_H
+
+#include "analogix-i2c-dptx.h"
+#include "analogix-i2c-txcommon.h"
+
+/***************************************************************/
+/* Register definitions for RX_PO                              */
+/***************************************************************/
+
+/*
+ * System Control and Status
+ */
+
+/* Software Reset Register 1 */
+#define SP_SOFTWARE_RESET1_REG		0x11
+#define SP_VIDEO_RST			BIT(4)
+#define SP_HDCP_MAN_RST			BIT(2)
+#define SP_TMDS_RST			BIT(1)
+#define SP_SW_MAN_RST			BIT(0)
+
+/* System Status Register */
+#define SP_SYSTEM_STATUS_REG		0x14
+#define SP_TMDS_CLOCK_DET		BIT(1)
+#define SP_TMDS_DE_DET			BIT(0)
+
+/* HDMI Status Register */
+#define SP_HDMI_STATUS_REG		0x15
+#define SP_HDMI_AUD_LAYOUT		BIT(3)
+#define SP_HDMI_DET			BIT(0)
+#  define SP_DVI_MODE			0
+#  define SP_HDMI_MODE			1
+
+/* HDMI Mute Control Register */
+#define SP_HDMI_MUTE_CTRL_REG		0x16
+#define SP_AUD_MUTE			BIT(1)
+#define SP_VID_MUTE			BIT(0)
+
+/* System Power Down Register 1 */
+#define SP_SYSTEM_POWER_DOWN1_REG	0x18
+#define SP_PWDN_CTRL			BIT(0)
+
+/*
+ * Audio and Video Auto Control
+ */
+
+/* Auto Audio and Video Control register */
+#define SP_AUDVID_CTRL_REG		0x20
+#define SP_AVC_OE			BIT(7)
+#define SP_AAC_OE			BIT(6)
+#define SP_AVC_EN			BIT(1)
+#define SP_AAC_EN			BIT(0)
+
+/* Audio Exception Enable Registers */
+#define SP_AUD_EXCEPTION_ENABLE_BASE	(0x24 - 1)
+/* Bits for Audio Exception Enable Register 3 */
+#define SP_AEC_EN21			BIT(5)
+
+/*
+ * Interrupt
+ */
+
+/* Interrupt Status Register 1 */
+#define SP_INT_STATUS1_REG		0x31
+/* Bits for Interrupt Status Register 1 */
+#define SP_HDMI_DVI			BIT(7)
+#define SP_CKDT_CHG			BIT(6)
+#define SP_SCDT_CHG			BIT(5)
+#define SP_PCLK_CHG			BIT(4)
+#define SP_PLL_UNLOCK			BIT(3)
+#define SP_CABLE_PLUG_CHG		BIT(2)
+#define SP_SET_MUTE			BIT(1)
+#define SP_SW_INTR			BIT(0)
+/* Bits for Interrupt Status Register 2 */
+#define SP_HDCP_ERR			BIT(5)
+#define SP_AUDIO_SAMPLE_CHG		BIT(0)	/* undocumented */
+/* Bits for Interrupt Status Register 3 */
+#define SP_AUD_MODE_CHG			BIT(0)
+/* Bits for Interrupt Status Register 5 */
+#define SP_AUDIO_RCV			BIT(0)
+/* Bits for Interrupt Status Register 6 */
+#define SP_INT_STATUS6_REG		0x36
+#define SP_CTS_RCV			BIT(7)
+#define SP_NEW_AUD_PKT			BIT(4)
+#define SP_NEW_AVI_PKT			BIT(1)
+#define SP_NEW_CP_PKT			BIT(0)
+/* Bits for Interrupt Status Register 7 */
+#define SP_NO_VSI			BIT(7)
+#define SP_NEW_VS			BIT(4)
+
+/* Interrupt Mask 1 Status Registers */
+#define SP_INT_MASK1_REG		0x41
+
+/* HDMI US TIMER Control Register */
+#define SP_HDMI_US_TIMER_CTRL_REG	0x49
+#define SP_MS_TIMER_MARGIN_10_8_MASK	0x07
+
+/*
+ * TMDS Control
+ */
+
+/* TMDS Control Registers */
+#define SP_TMDS_CTRL_BASE		(0x50 - 1)
+/* Bits for TMDS Control Register 7 */
+#define SP_PD_RT			BIT(0)
+
+/*
+ * Video Control
+ */
+
+/* Video Status Register */
+#define SP_VIDEO_STATUS_REG		0x70
+#define SP_COLOR_DEPTH_MASK		0xf0
+#define SP_COLOR_DEPTH_SHIFT		4
+#  define SP_COLOR_DEPTH_MODE_LEGACY	0x00
+#  define SP_COLOR_DEPTH_MODE_24BIT	0x04
+#  define SP_COLOR_DEPTH_MODE_30BIT	0x05
+#  define SP_COLOR_DEPTH_MODE_36BIT	0x06
+#  define SP_COLOR_DEPTH_MODE_48BIT	0x07
+
+/* Video Data Range Control Register */
+#define SP_VID_DATA_RANGE_CTRL_REG	0x83
+#define SP_R2Y_INPUT_LIMIT		BIT(1)
+
+/* Pixel Clock High Resolution Counter Registers */
+#define SP_PCLK_HIGHRES_CNT_BASE	(0x8c - 1)
+
+/*
+ * Audio Control
+ */
+
+/* Number of Audio Channels Status Registers */
+#define SP_AUD_CH_STATUS_REG_NUM	6
+
+/* Audio IN S/PDIF Channel Status Registers */
+#define SP_AUD_SPDIF_CH_STATUS_BASE	0xc7
+
+/* Audio IN S/PDIF Channel Status Register 4 */
+#define SP_FS_FREQ_MASK			0x0f
+#  define SP_FS_FREQ_44100HZ		0x00
+#  define SP_FS_FREQ_48000HZ		0x02
+#  define SP_FS_FREQ_32000HZ		0x03
+#  define SP_FS_FREQ_88200HZ		0x08
+#  define SP_FS_FREQ_96000HZ		0x0a
+#  define SP_FS_FREQ_176400HZ		0x0c
+#  define SP_FS_FREQ_192000HZ		0x0e
+
+/*
+ * Micellaneous Control Block
+ */
+
+/* CHIP Control Register */
+#define SP_CHIP_CTRL_REG		0xe3
+#define SP_MAN_HDMI5V_DET		BIT(3)
+#define SP_PLLLOCK_CKDT_EN		BIT(2)
+#define SP_ANALOG_CKDT_EN		BIT(1)
+#define SP_DIGITAL_CKDT_EN		BIT(0)
+
+/* Packet Receiving Status Register */
+#define SP_PACKET_RECEIVING_STATUS_REG	0xf3
+#define SP_AVI_RCVD			BIT(5)
+#define SP_VSI_RCVD			BIT(1)
+
+/***************************************************************/
+/* Register definitions for RX_P1                              */
+/***************************************************************/
+
+/* HDCP BCAPS Shadow Register */
+#define SP_HDCP_BCAPS_SHADOW_REG	0x2a
+#define SP_BCAPS_REPEATER		BIT(5)
+
+/* HDCP Status Register */
+#define SP_RX_HDCP_STATUS_REG		0x3f
+#define SP_AUTH_EN			BIT(4)
+
+/*
+ * InfoFrame and Control Packet Registers
+ */
+
+/* AVI InfoFrame packet checksum */
+#define SP_AVI_INFOFRAME_CHECKSUM	0xa3
+
+/* AVI InfoFrame Registers */
+#define SP_AVI_INFOFRAME_DATA_BASE	0xa4
+
+#define SP_AVI_COLOR_F_MASK		0x60
+#define SP_AVI_COLOR_F_SHIFT		5
+
+/* Audio InfoFrame Registers */
+#define SP_AUD_INFOFRAME_DATA_BASE	0xc4
+#define SP_AUD_INFOFRAME_LAYOUT_MASK	0x0f
+
+/* MPEG/HDMI Vendor Specific InfoFrame Packet type code */
+#define SP_MPEG_VS_INFOFRAME_TYPE_REG	0xe0
+
+/* MPEG/HDMI Vendor Specific InfoFrame Packet length */
+#define SP_MPEG_VS_INFOFRAME_LEN_REG	0xe2
+
+/* MPEG/HDMI Vendor Specific InfoFrame Packet version number */
+#define SP_MPEG_VS_INFOFRAME_VER_REG	0xe1
+
+/* MPEG/HDMI Vendor Specific InfoFrame Packet content */
+#define SP_MPEG_VS_INFOFRAME_DATA_BASE	0xe4
+
+/* General Control Packet Register */
+#define SP_GENERAL_CTRL_PACKET_REG	0x9f
+#define SP_CLEAR_AVMUTE			BIT(4)
+#define SP_SET_AVMUTE			BIT(0)
+
+/***************************************************************/
+/* Register definitions for TX_P1                              */
+/***************************************************************/
+
+/* DP TX Link Training Control Register */
+#define SP_DP_TX_LT_CTRL0_REG		0x30
+
+/* PD 1.2 Lint Training 80bit Pattern Register */
+#define SP_DP_LT_80BIT_PATTERN0_REG	0x80
+#define SP_DP_LT_80BIT_PATTERN_REG_NUM	10
+
+/* Audio Interface Control Register 0 */
+#define SP_AUD_INTERFACE_CTRL0_REG	0x5f
+#define SP_AUD_INTERFACE_DISABLE	0x80
+
+/* Audio Interface Control Register 2 */
+#define SP_AUD_INTERFACE_CTRL2_REG	0x60
+#define SP_M_AUD_ADJUST_ST		0x04
+
+/* Audio Interface Control Register 3 */
+#define SP_AUD_INTERFACE_CTRL3_REG	0x62
+
+/* Audio Interface Control Register 4 */
+#define SP_AUD_INTERFACE_CTRL4_REG	0x67
+
+/* Audio Interface Control Register 5 */
+#define SP_AUD_INTERFACE_CTRL5_REG	0x68
+
+/* Audio Interface Control Register 6 */
+#define SP_AUD_INTERFACE_CTRL6_REG	0x69
+
+/* Firmware Version Register */
+#define SP_FW_VER_REG			0xb7
+
+#endif
diff --git a/drivers/gpu/drm/bridge/analogix/analogix-i2c-dptx.c b/drivers/gpu/drm/bridge/analogix/analogix-i2c-dptx.c
new file mode 100644
index 000000000000..fe40bab21530
--- /dev/null
+++ b/drivers/gpu/drm/bridge/analogix/analogix-i2c-dptx.c
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright(c) 2016, Analogix Semiconductor.
+ *
+ * Based on anx7808 driver obtained from chromeos with copyright:
+ * Copyright(c) 2013, Google Inc.
+ */
+#include <linux/regmap.h>
+
+#include <drm/drm.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_print.h>
+
+#include "analogix-i2c-dptx.h"
+
+#define AUX_WAIT_TIMEOUT_MS	15
+#define AUX_CH_BUFFER_SIZE	16
+
+static int anx_i2c_dp_clear_bits(struct regmap *map, u8 reg, u8 mask)
+{
+	return regmap_update_bits(map, reg, mask, 0);
+}
+
+static bool anx_dp_aux_op_finished(struct regmap *map_dptx)
+{
+	unsigned int value;
+	int err;
+
+	err = regmap_read(map_dptx, SP_DP_AUX_CH_CTRL2_REG, &value);
+	if (err < 0)
+		return false;
+
+	return (value & SP_AUX_EN) == 0;
+}
+
+static int anx_dp_aux_wait(struct regmap *map_dptx)
+{
+	unsigned long timeout;
+	unsigned int status;
+	int err;
+
+	timeout = jiffies + msecs_to_jiffies(AUX_WAIT_TIMEOUT_MS) + 1;
+
+	while (!anx_dp_aux_op_finished(map_dptx)) {
+		if (time_after(jiffies, timeout)) {
+			if (!anx_dp_aux_op_finished(map_dptx)) {
+				DRM_ERROR("Timed out waiting AUX to finish\n");
+				return -ETIMEDOUT;
+			}
+
+			break;
+		}
+
+		usleep_range(1000, 2000);
+	}
+
+	/* Read the AUX channel access status */
+	err = regmap_read(map_dptx, SP_AUX_CH_STATUS_REG, &status);
+	if (err < 0) {
+		DRM_ERROR("Failed to read from AUX channel: %d\n", err);
+		return err;
+	}
+
+	if (status & SP_AUX_STATUS) {
+		DRM_ERROR("Failed to wait for AUX channel (status: %02x)\n",
+			  status);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int anx_dp_aux_address(struct regmap *map_dptx, unsigned int addr)
+{
+	int err;
+
+	err = regmap_write(map_dptx, SP_AUX_ADDR_7_0_REG, addr & 0xff);
+	if (err)
+		return err;
+
+	err = regmap_write(map_dptx, SP_AUX_ADDR_15_8_REG,
+			   (addr & 0xff00) >> 8);
+	if (err)
+		return err;
+
+	/*
+	 * DP AUX CH Address Register #2, only update bits[3:0]
+	 * [7:4] RESERVED
+	 * [3:0] AUX_ADDR[19:16], Register control AUX CH address.
+	 */
+	err = regmap_update_bits(map_dptx, SP_AUX_ADDR_19_16_REG,
+				 SP_AUX_ADDR_19_16_MASK,
+				 (addr & 0xf0000) >> 16);
+
+	if (err)
+		return err;
+
+	return 0;
+}
+
+ssize_t anx_dp_aux_transfer(struct regmap *map_dptx,
+				struct drm_dp_aux_msg *msg)
+{
+	u8 ctrl1 = msg->request;
+	u8 ctrl2 = SP_AUX_EN;
+	u8 *buffer = msg->buffer;
+	int err;
+
+	/* The DP AUX transmit and receive buffer has 16 bytes. */
+	if (WARN_ON(msg->size > AUX_CH_BUFFER_SIZE))
+		return -E2BIG;
+
+	/* Zero-sized messages specify address-only transactions. */
+	if (msg->size < 1)
+		ctrl2 |= SP_ADDR_ONLY;
+	else	/* For non-zero-sized set the length field. */
+		ctrl1 |= (msg->size - 1) << SP_AUX_LENGTH_SHIFT;
+
+	if ((msg->size > 0) && ((msg->request & DP_AUX_I2C_READ) == 0)) {
+		/* When WRITE | MOT write values to data buffer */
+		err = regmap_bulk_write(map_dptx,
+					SP_DP_BUF_DATA0_REG, buffer,
+					msg->size);
+		if (err)
+			return err;
+	}
+
+	/* Write address and request */
+	err = anx_dp_aux_address(map_dptx, msg->address);
+	if (err)
+		return err;
+
+	err = regmap_write(map_dptx, SP_DP_AUX_CH_CTRL1_REG, ctrl1);
+	if (err)
+		return err;
+
+	/* Start transaction */
+	err = regmap_update_bits(map_dptx, SP_DP_AUX_CH_CTRL2_REG,
+				 SP_ADDR_ONLY | SP_AUX_EN, ctrl2);
+	if (err)
+		return err;
+
+	err = anx_dp_aux_wait(map_dptx);
+	if (err)
+		return err;
+
+	msg->reply = DP_AUX_I2C_REPLY_ACK;
+
+	if ((msg->size > 0) && (msg->request & DP_AUX_I2C_READ)) {
+		/* Read values from data buffer */
+		err = regmap_bulk_read(map_dptx,
+				       SP_DP_BUF_DATA0_REG, buffer,
+				       msg->size);
+		if (err)
+			return err;
+	}
+
+	err = anx_i2c_dp_clear_bits(map_dptx, SP_DP_AUX_CH_CTRL2_REG,
+				    SP_ADDR_ONLY);
+	if (err)
+		return err;
+
+	return msg->size;
+}
+EXPORT_SYMBOL_GPL(anx_dp_aux_transfer);
diff --git a/drivers/gpu/drm/bridge/analogix/analogix-i2c-dptx.h b/drivers/gpu/drm/bridge/analogix/analogix-i2c-dptx.h
new file mode 100644
index 000000000000..663c4bea6e70
--- /dev/null
+++ b/drivers/gpu/drm/bridge/analogix/analogix-i2c-dptx.h
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright(c) 2016, Analogix Semiconductor.
+ *
+ * Based on anx7808 driver obtained from chromeos with copyright:
+ * Copyright(c) 2013, Google Inc.
+ */
+#ifndef _ANALOGIX_I2C_DPTX_H_
+#define _ANALOGIX_I2C_DPTX_H_
+
+/***************************************************************/
+/* Register definitions for TX_P0                              */
+/***************************************************************/
+
+/* HDCP Status Register */
+#define SP_TX_HDCP_STATUS_REG		0x00
+#define SP_AUTH_FAIL			BIT(5)
+#define SP_AUTHEN_PASS			BIT(1)
+
+/* HDCP Control Register 0 */
+#define SP_HDCP_CTRL0_REG		0x01
+#define SP_RX_REPEATER			BIT(6)
+#define SP_RE_AUTH			BIT(5)
+#define SP_SW_AUTH_OK			BIT(4)
+#define SP_HARD_AUTH_EN			BIT(3)
+#define SP_HDCP_ENC_EN			BIT(2)
+#define SP_BKSV_SRM_PASS		BIT(1)
+#define SP_KSVLIST_VLD			BIT(0)
+/* HDCP Function Enabled */
+#define SP_HDCP_FUNCTION_ENABLED	(BIT(0) | BIT(1) | BIT(2) | BIT(3))
+
+/* HDCP Receiver BSTATUS Register 0 */
+#define	SP_HDCP_RX_BSTATUS0_REG		0x1b
+/* HDCP Receiver BSTATUS Register 1 */
+#define	SP_HDCP_RX_BSTATUS1_REG		0x1c
+
+/* HDCP Embedded "Blue Screen" Content Registers */
+#define SP_HDCP_VID0_BLUE_SCREEN_REG	0x2c
+#define SP_HDCP_VID1_BLUE_SCREEN_REG	0x2d
+#define SP_HDCP_VID2_BLUE_SCREEN_REG	0x2e
+
+/* HDCP Wait R0 Timing Register */
+#define SP_HDCP_WAIT_R0_TIME_REG	0x40
+
+/* HDCP Link Integrity Check Timer Register */
+#define SP_HDCP_LINK_CHECK_TIMER_REG	0x41
+
+/* HDCP Repeater Ready Wait Timer Register */
+#define SP_HDCP_RPTR_RDY_WAIT_TIME_REG	0x42
+
+/* HDCP Auto Timer Register */
+#define SP_HDCP_AUTO_TIMER_REG		0x51
+
+/* HDCP Key Status Register */
+#define SP_HDCP_KEY_STATUS_REG		0x5e
+
+/* HDCP Key Command Register */
+#define SP_HDCP_KEY_COMMAND_REG		0x5f
+#define SP_DISABLE_SYNC_HDCP		BIT(2)
+
+/* OTP Memory Key Protection Registers */
+#define SP_OTP_KEY_PROTECT1_REG		0x60
+#define SP_OTP_KEY_PROTECT2_REG		0x61
+#define SP_OTP_KEY_PROTECT3_REG		0x62
+#define SP_OTP_PSW1			0xa2
+#define SP_OTP_PSW2			0x7e
+#define SP_OTP_PSW3			0xc6
+
+/* DP System Control Registers */
+#define SP_DP_SYSTEM_CTRL_BASE		(0x80 - 1)
+/* Bits for DP System Control Register 2 */
+#define SP_CHA_STA			BIT(2)
+/* Bits for DP System Control Register 3 */
+#define SP_HPD_STATUS			BIT(6)
+#define SP_HPD_FORCE			BIT(5)
+#define SP_HPD_CTRL			BIT(4)
+#define SP_STRM_VALID			BIT(2)
+#define SP_STRM_FORCE			BIT(1)
+#define SP_STRM_CTRL			BIT(0)
+/* Bits for DP System Control Register 4 */
+#define SP_ENHANCED_MODE		BIT(3)
+
+/* DP Video Control Register */
+#define SP_DP_VIDEO_CTRL_REG		0x84
+#define SP_COLOR_F_MASK			0x06
+#define SP_COLOR_F_SHIFT		1
+#define SP_BPC_MASK			0xe0
+#define SP_BPC_SHIFT			5
+#  define SP_BPC_6BITS			0x00
+#  define SP_BPC_8BITS			0x01
+#  define SP_BPC_10BITS			0x02
+#  define SP_BPC_12BITS			0x03
+
+/* DP Audio Control Register */
+#define SP_DP_AUDIO_CTRL_REG		0x87
+#define SP_AUD_EN			BIT(0)
+
+/* 10us Pulse Generate Timer Registers */
+#define SP_I2C_GEN_10US_TIMER0_REG	0x88
+#define SP_I2C_GEN_10US_TIMER1_REG	0x89
+
+/* Packet Send Control Register */
+#define SP_PACKET_SEND_CTRL_REG		0x90
+#define SP_AUD_IF_UP			BIT(7)
+#define SP_AVI_IF_UD			BIT(6)
+#define SP_MPEG_IF_UD			BIT(5)
+#define SP_SPD_IF_UD			BIT(4)
+#define SP_AUD_IF_EN			BIT(3)
+#define SP_AVI_IF_EN			BIT(2)
+#define SP_MPEG_IF_EN			BIT(1)
+#define SP_SPD_IF_EN			BIT(0)
+
+/* DP HDCP Control Register */
+#define SP_DP_HDCP_CTRL_REG		0x92
+#define SP_AUTO_EN			BIT(7)
+#define SP_AUTO_START			BIT(5)
+#define SP_LINK_POLLING			BIT(1)
+
+/* DP Main Link Bandwidth Setting Register */
+#define SP_DP_MAIN_LINK_BW_SET_REG	0xa0
+#define SP_LINK_BW_SET_MASK		0x1f
+#define SP_INITIAL_SLIM_M_AUD_SEL	BIT(5)
+
+/* DP Lane Count Setting Register */
+#define SP_DP_LANE_COUNT_SET_REG	0xa1
+
+/* DP Training Pattern Set Register */
+#define SP_DP_TRAINING_PATTERN_SET_REG	0xa2
+
+/* DP Lane 0 Link Training Control Register */
+#define SP_DP_LANE0_LT_CTRL_REG		0xa3
+#define SP_TX_SW_SET_MASK		0x1b
+#define SP_MAX_PRE_REACH		BIT(5)
+#define SP_MAX_DRIVE_REACH		BIT(4)
+#define SP_PRE_EMP_LEVEL1		BIT(3)
+#define SP_DRVIE_CURRENT_LEVEL1		BIT(0)
+
+/* DP Link Training Control Register */
+#define SP_DP_LT_CTRL_REG		0xa8
+#define SP_DP_LT_INPROGRESS		0x80
+#define SP_LT_ERROR_TYPE_MASK		0x70
+#  define SP_LT_NO_ERROR		0x00
+#  define SP_LT_AUX_WRITE_ERROR		0x01
+#  define SP_LT_MAX_DRIVE_REACHED	0x02
+#  define SP_LT_WRONG_LANE_COUNT_SET	0x03
+#  define SP_LT_LOOP_SAME_5_TIME	0x04
+#  define SP_LT_CR_FAIL_IN_EQ		0x05
+#  define SP_LT_EQ_LOOP_5_TIME		0x06
+#define SP_LT_EN			BIT(0)
+
+/* DP CEP Training Control Registers */
+#define SP_DP_CEP_TRAINING_CTRL0_REG	0xa9
+#define SP_DP_CEP_TRAINING_CTRL1_REG	0xaa
+
+/* DP Debug Register 1 */
+#define SP_DP_DEBUG1_REG		0xb0
+#define SP_DEBUG_PLL_LOCK		BIT(4)
+#define SP_POLLING_EN			BIT(1)
+
+/* DP Polling Control Register */
+#define SP_DP_POLLING_CTRL_REG		0xb4
+#define SP_AUTO_POLLING_DISABLE		BIT(0)
+
+/* DP Link Debug Control Register */
+#define SP_DP_LINK_DEBUG_CTRL_REG	0xb8
+#define SP_M_VID_DEBUG			BIT(5)
+#define SP_NEW_PRBS7			BIT(4)
+#define SP_INSERT_ER			BIT(1)
+#define SP_PRBS31_EN			BIT(0)
+
+/* AUX Misc control Register */
+#define SP_AUX_MISC_CTRL_REG		0xbf
+
+/* DP PLL control Register */
+#define SP_DP_PLL_CTRL_REG		0xc7
+#define SP_PLL_RST			BIT(6)
+
+/* DP Analog Power Down Register */
+#define SP_DP_ANALOG_POWER_DOWN_REG	0xc8
+#define SP_CH0_PD			BIT(0)
+
+/* DP Misc Control Register */
+#define SP_DP_MISC_CTRL_REG		0xcd
+#define SP_EQ_TRAINING_LOOP		BIT(6)
+
+/* DP Extra I2C Device Address Register */
+#define SP_DP_EXTRA_I2C_DEV_ADDR_REG	0xce
+#define SP_I2C_STRETCH_DISABLE		BIT(7)
+
+#define SP_I2C_EXTRA_ADDR		0x50
+
+/* DP Downspread Control Register 1 */
+#define SP_DP_DOWNSPREAD_CTRL1_REG	0xd0
+
+/* DP M Value Calculation Control Register */
+#define SP_DP_M_CALCULATION_CTRL_REG	0xd9
+#define SP_M_GEN_CLK_SEL		BIT(0)
+
+/* AUX Channel Access Status Register */
+#define SP_AUX_CH_STATUS_REG		0xe0
+#define SP_AUX_STATUS			0x0f
+
+/* AUX Channel DEFER Control Register */
+#define SP_AUX_DEFER_CTRL_REG		0xe2
+#define SP_DEFER_CTRL_EN		BIT(7)
+
+/* DP Buffer Data Count Register */
+#define SP_BUF_DATA_COUNT_REG		0xe4
+#define SP_BUF_DATA_COUNT_MASK		0x1f
+#define SP_BUF_CLR			BIT(7)
+
+/* DP AUX Channel Control Register 1 */
+#define SP_DP_AUX_CH_CTRL1_REG		0xe5
+#define SP_AUX_TX_COMM_MASK		0x0f
+#define SP_AUX_LENGTH_MASK		0xf0
+#define SP_AUX_LENGTH_SHIFT		4
+
+/* DP AUX CH Address Register 0 */
+#define SP_AUX_ADDR_7_0_REG		0xe6
+
+/* DP AUX CH Address Register 1 */
+#define SP_AUX_ADDR_15_8_REG		0xe7
+
+/* DP AUX CH Address Register 2 */
+#define SP_AUX_ADDR_19_16_REG		0xe8
+#define SP_AUX_ADDR_19_16_MASK		0x0f
+
+/* DP AUX Channel Control Register 2 */
+#define SP_DP_AUX_CH_CTRL2_REG		0xe9
+#define SP_AUX_SEL_RXCM			BIT(6)
+#define SP_AUX_CHSEL			BIT(3)
+#define SP_AUX_PN_INV			BIT(2)
+#define SP_ADDR_ONLY			BIT(1)
+#define SP_AUX_EN			BIT(0)
+
+/* DP Video Stream Control InfoFrame Register */
+#define SP_DP_3D_VSC_CTRL_REG		0xea
+#define SP_INFO_FRAME_VSC_EN		BIT(0)
+
+/* DP Video Stream Data Byte 1 Register */
+#define SP_DP_VSC_DB1_REG		0xeb
+
+/* DP AUX Channel Control Register 3 */
+#define SP_DP_AUX_CH_CTRL3_REG		0xec
+#define SP_WAIT_COUNTER_7_0_MASK	0xff
+
+/* DP AUX Channel Control Register 4 */
+#define SP_DP_AUX_CH_CTRL4_REG		0xed
+
+/* DP AUX Buffer Data Registers */
+#define SP_DP_BUF_DATA0_REG		0xf0
+
+ssize_t anx_dp_aux_transfer(struct regmap *map_dptx,
+				struct drm_dp_aux_msg *msg);
+
+#endif
diff --git a/drivers/gpu/drm/bridge/analogix/analogix-i2c-txcommon.h b/drivers/gpu/drm/bridge/analogix/analogix-i2c-txcommon.h
new file mode 100644
index 000000000000..3c843497d835
--- /dev/null
+++ b/drivers/gpu/drm/bridge/analogix/analogix-i2c-txcommon.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright(c) 2016, Analogix Semiconductor. All rights reserved.
+ */
+#ifndef _ANALOGIX_I2C_TXCOMMON_H_
+#define _ANALOGIX_I2C_TXCOMMON_H_
+
+/***************************************************************/
+/* Register definitions for TX_P2                              */
+/***************************************************************/
+
+/*
+ * Core Register Definitions
+ */
+
+/* Device ID Low Byte Register */
+#define SP_DEVICE_IDL_REG		0x02
+
+/* Device ID High Byte Register */
+#define SP_DEVICE_IDH_REG		0x03
+
+/* Device version register */
+#define SP_DEVICE_VERSION_REG		0x04
+
+/* Power Down Control Register */
+#define SP_POWERDOWN_CTRL_REG		0x05
+#define SP_REGISTER_PD			BIT(7)
+#define SP_HDCP_PD			BIT(5)
+#define SP_AUDIO_PD			BIT(4)
+#define SP_VIDEO_PD			BIT(3)
+#define SP_LINK_PD			BIT(2)
+#define SP_TOTAL_PD			BIT(1)
+
+/* Reset Control Register 1 */
+#define SP_RESET_CTRL1_REG		0x06
+#define SP_MISC_RST			BIT(7)
+#define SP_VIDCAP_RST			BIT(6)
+#define SP_VIDFIF_RST			BIT(5)
+#define SP_AUDFIF_RST			BIT(4)
+#define SP_AUDCAP_RST			BIT(3)
+#define SP_HDCP_RST			BIT(2)
+#define SP_SW_RST			BIT(1)
+#define SP_HW_RST			BIT(0)
+
+/* Reset Control Register 2 */
+#define SP_RESET_CTRL2_REG		0x07
+#define SP_AUX_RST			BIT(2)
+#define SP_SERDES_FIFO_RST		BIT(1)
+#define SP_I2C_REG_RST			BIT(0)
+
+/* Video Control Register 1 */
+#define SP_VID_CTRL1_REG		0x08
+#define SP_VIDEO_EN			BIT(7)
+#define SP_VIDEO_MUTE			BIT(2)
+#define SP_DE_GEN			BIT(1)
+#define SP_DEMUX			BIT(0)
+
+/* Video Control Register 2 */
+#define SP_VID_CTRL2_REG		0x09
+#define SP_IN_COLOR_F_MASK		0x03
+#define SP_IN_YC_BIT_SEL		BIT(2)
+#define SP_IN_BPC_MASK			0x70
+#define SP_IN_BPC_SHIFT			4
+#  define SP_IN_BPC_12BIT		0x03
+#  define SP_IN_BPC_10BIT		0x02
+#  define SP_IN_BPC_8BIT		0x01
+#  define SP_IN_BPC_6BIT		0x00
+#define SP_IN_D_RANGE			BIT(7)
+
+/* Video Control Register 3 */
+#define SP_VID_CTRL3_REG		0x0a
+#define SP_HPD_OUT			BIT(6)
+
+/* Video Control Register 5 */
+#define SP_VID_CTRL5_REG		0x0c
+#define SP_CSC_STD_SEL			BIT(7)
+#define SP_XVYCC_RNG_LMT		BIT(6)
+#define SP_RANGE_Y2R			BIT(5)
+#define SP_CSPACE_Y2R			BIT(4)
+#define SP_RGB_RNG_LMT			BIT(3)
+#define SP_Y_RNG_LMT			BIT(2)
+#define SP_RANGE_R2Y			BIT(1)
+#define SP_CSPACE_R2Y			BIT(0)
+
+/* Video Control Register 6 */
+#define SP_VID_CTRL6_REG		0x0d
+#define SP_TEST_PATTERN_EN		BIT(7)
+#define SP_VIDEO_PROCESS_EN		BIT(6)
+#define SP_VID_US_MODE			BIT(3)
+#define SP_VID_DS_MODE			BIT(2)
+#define SP_UP_SAMPLE			BIT(1)
+#define SP_DOWN_SAMPLE			BIT(0)
+
+/* Video Control Register 8 */
+#define SP_VID_CTRL8_REG		0x0f
+#define SP_VID_VRES_TH			BIT(0)
+
+/* Total Line Status Low Byte Register */
+#define SP_TOTAL_LINE_STAL_REG		0x24
+
+/* Total Line Status High Byte Register */
+#define SP_TOTAL_LINE_STAH_REG		0x25
+
+/* Active Line Status Low Byte Register */
+#define SP_ACT_LINE_STAL_REG		0x26
+
+/* Active Line Status High Byte Register */
+#define SP_ACT_LINE_STAH_REG		0x27
+
+/* Vertical Front Porch Status Register */
+#define SP_V_F_PORCH_STA_REG		0x28
+
+/* Vertical SYNC Width Status Register */
+#define SP_V_SYNC_STA_REG		0x29
+
+/* Vertical Back Porch Status Register */
+#define SP_V_B_PORCH_STA_REG		0x2a
+
+/* Total Pixel Status Low Byte Register */
+#define SP_TOTAL_PIXEL_STAL_REG		0x2b
+
+/* Total Pixel Status High Byte Register */
+#define SP_TOTAL_PIXEL_STAH_REG		0x2c
+
+/* Active Pixel Status Low Byte Register */
+#define SP_ACT_PIXEL_STAL_REG		0x2d
+
+/* Active Pixel Status High Byte Register */
+#define SP_ACT_PIXEL_STAH_REG		0x2e
+
+/* Horizontal Front Porch Status Low Byte Register */
+#define SP_H_F_PORCH_STAL_REG		0x2f
+
+/* Horizontal Front Porch Statys High Byte Register */
+#define SP_H_F_PORCH_STAH_REG		0x30
+
+/* Horizontal SYNC Width Status Low Byte Register */
+#define SP_H_SYNC_STAL_REG		0x31
+
+/* Horizontal SYNC Width Status High Byte Register */
+#define SP_H_SYNC_STAH_REG		0x32
+
+/* Horizontal Back Porch Status Low Byte Register */
+#define SP_H_B_PORCH_STAL_REG		0x33
+
+/* Horizontal Back Porch Status High Byte Register */
+#define SP_H_B_PORCH_STAH_REG		0x34
+
+/* InfoFrame AVI Packet DB1 Register */
+#define SP_INFOFRAME_AVI_DB1_REG	0x70
+
+/* Bit Control Specific Register */
+#define SP_BIT_CTRL_SPECIFIC_REG	0x80
+#define SP_BIT_CTRL_SELECT_SHIFT	1
+#define SP_ENABLE_BIT_CTRL		BIT(0)
+
+/* InfoFrame Audio Packet DB1 Register */
+#define SP_INFOFRAME_AUD_DB1_REG	0x83
+
+/* InfoFrame MPEG Packet DB1 Register */
+#define SP_INFOFRAME_MPEG_DB1_REG	0xb0
+
+/* Audio Channel Status Registers */
+#define SP_AUD_CH_STATUS_BASE		0xd0
+
+/* Audio Channel Num Register 5 */
+#define SP_I2S_CHANNEL_NUM_MASK		0xe0
+#  define SP_I2S_CH_NUM_1		(0x00 << 5)
+#  define SP_I2S_CH_NUM_2		(0x01 << 5)
+#  define SP_I2S_CH_NUM_3		(0x02 << 5)
+#  define SP_I2S_CH_NUM_4		(0x03 << 5)
+#  define SP_I2S_CH_NUM_5		(0x04 << 5)
+#  define SP_I2S_CH_NUM_6		(0x05 << 5)
+#  define SP_I2S_CH_NUM_7		(0x06 << 5)
+#  define SP_I2S_CH_NUM_8		(0x07 << 5)
+#define SP_EXT_VUCP			BIT(2)
+#define SP_VBIT				BIT(1)
+#define SP_AUDIO_LAYOUT			BIT(0)
+
+/* Analog Debug Register 1 */
+#define SP_ANALOG_DEBUG1_REG		0xdc
+
+/* Analog Debug Register 2 */
+#define SP_ANALOG_DEBUG2_REG		0xdd
+#define SP_FORCE_SW_OFF_BYPASS		0x20
+#define SP_XTAL_FRQ			0x1c
+#  define SP_XTAL_FRQ_19M2		(0x00 << 2)
+#  define SP_XTAL_FRQ_24M		(0x01 << 2)
+#  define SP_XTAL_FRQ_25M		(0x02 << 2)
+#  define SP_XTAL_FRQ_26M		(0x03 << 2)
+#  define SP_XTAL_FRQ_27M		(0x04 << 2)
+#  define SP_XTAL_FRQ_38M4		(0x05 << 2)
+#  define SP_XTAL_FRQ_52M		(0x06 << 2)
+#define SP_POWERON_TIME_1P5MS		0x03
+
+/* Analog Control 0 Register */
+#define SP_ANALOG_CTRL0_REG		0xe1
+
+/* Common Interrupt Status Register 1 */
+#define SP_COMMON_INT_STATUS_BASE	(0xf1 - 1)
+#define SP_PLL_LOCK_CHG			0x40
+
+/* Common Interrupt Status Register 2 */
+#define SP_COMMON_INT_STATUS2		0xf2
+#define SP_HDCP_AUTH_CHG		BIT(1)
+#define SP_HDCP_AUTH_DONE		BIT(0)
+
+#define SP_HDCP_LINK_CHECK_FAIL		BIT(0)
+
+/* Common Interrupt Status Register 4 */
+#define SP_COMMON_INT_STATUS4_REG	0xf4
+#define SP_HPD_IRQ			BIT(6)
+#define SP_HPD_ESYNC_ERR		BIT(4)
+#define SP_HPD_CHG			BIT(2)
+#define SP_HPD_LOST			BIT(1)
+#define SP_HPD_PLUG			BIT(0)
+
+/* DP Interrupt Status Register */
+#define SP_DP_INT_STATUS1_REG		0xf7
+#define SP_TRAINING_FINISH		BIT(5)
+#define SP_POLLING_ERR			BIT(4)
+
+/* Common Interrupt Mask Register */
+#define SP_COMMON_INT_MASK_BASE		(0xf8 - 1)
+
+#define SP_COMMON_INT_MASK4_REG		0xfb
+
+/* DP Interrupts Mask Register */
+#define SP_DP_INT_MASK1_REG		0xfe
+
+/* Interrupt Control Register */
+#define SP_INT_CTRL_REG			0xff
+
+#endif /* _ANALOGIX_I2C_TXCOMMON_H_ */
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
index 22885dceaa17..6effe532f820 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
@@ -21,6 +21,7 @@
 #include <drm/bridge/analogix_dp.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_device.h>
 #include <drm/drm_panel.h>
@@ -1110,7 +1111,7 @@ static int analogix_dp_get_modes(struct drm_connector *connector)
 	int ret, num_modes = 0;
 
 	if (dp->plat_data->panel) {
-		num_modes += drm_panel_get_modes(dp->plat_data->panel);
+		num_modes += drm_panel_get_modes(dp->plat_data->panel, connector);
 	} else {
 		ret = analogix_dp_prepare_panel(dp, true, false);
 		if (ret) {
diff --git a/drivers/gpu/drm/bridge/cdns-dsi.c b/drivers/gpu/drm/bridge/cdns-dsi.c
index 6166dca6be81..b7c97f060241 100644
--- a/drivers/gpu/drm/bridge/cdns-dsi.c
+++ b/drivers/gpu/drm/bridge/cdns-dsi.c
@@ -512,7 +512,7 @@ static int cdns_dsi_mode2cfg(struct cdns_dsi *dsi,
 	struct cdns_dsi_output *output = &dsi->output;
 	unsigned int tmp;
 	bool sync_pulse = false;
-	int bpp, nlanes;
+	int bpp;
 
 	memset(dsi_cfg, 0, sizeof(*dsi_cfg));
 
@@ -520,7 +520,6 @@ static int cdns_dsi_mode2cfg(struct cdns_dsi *dsi,
 		sync_pulse = true;
 
 	bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format);
-	nlanes = output->dev->lanes;
 
 	if (mode_valid_check)
 		tmp = mode->htotal -
@@ -785,13 +784,12 @@ static void cdns_dsi_bridge_enable(struct drm_bridge *bridge)
 	unsigned long tx_byte_period;
 	struct cdns_dsi_cfg dsi_cfg;
 	u32 tmp, reg_wakeup, div;
-	int bpp, nlanes;
+	int nlanes;
 
 	if (WARN_ON(pm_runtime_get_sync(dsi->base.dev) < 0))
 		return;
 
 	mode = &bridge->encoder->crtc->state->adjusted_mode;
-	bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format);
 	nlanes = output->dev->lanes;
 
 	WARN_ON_ONCE(cdns_dsi_check_conf(dsi, mode, &dsi_cfg, false));
@@ -956,7 +954,8 @@ static int cdns_dsi_attach(struct mipi_dsi_host *host,
 
 	panel = of_drm_find_panel(np);
 	if (!IS_ERR(panel)) {
-		bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_DSI);
+		bridge = drm_panel_bridge_add_typed(panel,
+						    DRM_MODE_CONNECTOR_DSI);
 	} else {
 		bridge = of_drm_find_bridge(dev->dev.of_node);
 		if (!bridge)
diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c
index 7aa789c35882..cc33dc411b9e 100644
--- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
+++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
@@ -12,6 +12,7 @@
 #include <linux/regulator/consumer.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
diff --git a/drivers/gpu/drm/bridge/lvds-codec.c b/drivers/gpu/drm/bridge/lvds-codec.c
new file mode 100644
index 000000000000..5f04cc11227e
--- /dev/null
+++ b/drivers/gpu/drm/bridge/lvds-codec.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Renesas Electronics Corporation
+ * Copyright (C) 2016 Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ */
+
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_graph.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_bridge.h>
+#include <drm/drm_panel.h>
+
+struct lvds_codec {
+	struct drm_bridge bridge;
+	struct drm_bridge *panel_bridge;
+	struct gpio_desc *powerdown_gpio;
+	u32 connector_type;
+};
+
+static int lvds_codec_attach(struct drm_bridge *bridge)
+{
+	struct lvds_codec *lvds_codec = container_of(bridge,
+						     struct lvds_codec, bridge);
+
+	return drm_bridge_attach(bridge->encoder, lvds_codec->panel_bridge,
+				 bridge);
+}
+
+static void lvds_codec_enable(struct drm_bridge *bridge)
+{
+	struct lvds_codec *lvds_codec = container_of(bridge,
+						     struct lvds_codec, bridge);
+
+	if (lvds_codec->powerdown_gpio)
+		gpiod_set_value_cansleep(lvds_codec->powerdown_gpio, 0);
+}
+
+static void lvds_codec_disable(struct drm_bridge *bridge)
+{
+	struct lvds_codec *lvds_codec = container_of(bridge,
+						     struct lvds_codec, bridge);
+
+	if (lvds_codec->powerdown_gpio)
+		gpiod_set_value_cansleep(lvds_codec->powerdown_gpio, 1);
+}
+
+static struct drm_bridge_funcs funcs = {
+	.attach = lvds_codec_attach,
+	.enable = lvds_codec_enable,
+	.disable = lvds_codec_disable,
+};
+
+static int lvds_codec_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *panel_node;
+	struct drm_panel *panel;
+	struct lvds_codec *lvds_codec;
+
+	lvds_codec = devm_kzalloc(dev, sizeof(*lvds_codec), GFP_KERNEL);
+	if (!lvds_codec)
+		return -ENOMEM;
+
+	lvds_codec->connector_type = (uintptr_t)of_device_get_match_data(dev);
+	lvds_codec->powerdown_gpio = devm_gpiod_get_optional(dev, "powerdown",
+							     GPIOD_OUT_HIGH);
+	if (IS_ERR(lvds_codec->powerdown_gpio)) {
+		int err = PTR_ERR(lvds_codec->powerdown_gpio);
+
+		if (err != -EPROBE_DEFER)
+			dev_err(dev, "powerdown GPIO failure: %d\n", err);
+		return err;
+	}
+
+	/* Locate the panel DT node. */
+	panel_node = of_graph_get_remote_node(dev->of_node, 1, 0);
+	if (!panel_node) {
+		dev_dbg(dev, "panel DT node not found\n");
+		return -ENXIO;
+	}
+
+	panel = of_drm_find_panel(panel_node);
+	of_node_put(panel_node);
+	if (IS_ERR(panel)) {
+		dev_dbg(dev, "panel not found, deferring probe\n");
+		return PTR_ERR(panel);
+	}
+
+	lvds_codec->panel_bridge =
+		devm_drm_panel_bridge_add_typed(dev, panel,
+						lvds_codec->connector_type);
+	if (IS_ERR(lvds_codec->panel_bridge))
+		return PTR_ERR(lvds_codec->panel_bridge);
+
+	/*
+	 * The panel_bridge bridge is attached to the panel's of_node,
+	 * but we need a bridge attached to our of_node for our user
+	 * to look up.
+	 */
+	lvds_codec->bridge.of_node = dev->of_node;
+	lvds_codec->bridge.funcs = &funcs;
+	drm_bridge_add(&lvds_codec->bridge);
+
+	platform_set_drvdata(pdev, lvds_codec);
+
+	return 0;
+}
+
+static int lvds_codec_remove(struct platform_device *pdev)
+{
+	struct lvds_codec *lvds_codec = platform_get_drvdata(pdev);
+
+	drm_bridge_remove(&lvds_codec->bridge);
+
+	return 0;
+}
+
+static const struct of_device_id lvds_codec_match[] = {
+	{
+		.compatible = "lvds-decoder",
+		.data = (void *)DRM_MODE_CONNECTOR_DPI,
+	},
+	{
+		.compatible = "lvds-encoder",
+		.data = (void *)DRM_MODE_CONNECTOR_LVDS,
+	},
+	{
+		.compatible = "thine,thc63lvdm83d",
+		.data = (void *)DRM_MODE_CONNECTOR_LVDS,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, lvds_codec_match);
+
+static struct platform_driver lvds_codec_driver = {
+	.probe	= lvds_codec_probe,
+	.remove	= lvds_codec_remove,
+	.driver		= {
+		.name		= "lvds-codec",
+		.of_match_table	= lvds_codec_match,
+	},
+};
+module_platform_driver(lvds_codec_driver);
+
+MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
+MODULE_DESCRIPTION("LVDS encoders and decoders");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/bridge/lvds-encoder.c b/drivers/gpu/drm/bridge/lvds-encoder.c
deleted file mode 100644
index 2ab2c234f26c..000000000000
--- a/drivers/gpu/drm/bridge/lvds-encoder.c
+++ /dev/null
@@ -1,154 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2016 Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- */
-
-#include <linux/gpio/consumer.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_graph.h>
-#include <linux/platform_device.h>
-
-#include <drm/drm_bridge.h>
-#include <drm/drm_panel.h>
-
-struct lvds_encoder {
-	struct drm_bridge bridge;
-	struct drm_bridge *panel_bridge;
-	struct gpio_desc *powerdown_gpio;
-};
-
-static int lvds_encoder_attach(struct drm_bridge *bridge)
-{
-	struct lvds_encoder *lvds_encoder = container_of(bridge,
-							 struct lvds_encoder,
-							 bridge);
-
-	return drm_bridge_attach(bridge->encoder, lvds_encoder->panel_bridge,
-				 bridge);
-}
-
-static void lvds_encoder_enable(struct drm_bridge *bridge)
-{
-	struct lvds_encoder *lvds_encoder = container_of(bridge,
-							 struct lvds_encoder,
-							 bridge);
-
-	if (lvds_encoder->powerdown_gpio)
-		gpiod_set_value_cansleep(lvds_encoder->powerdown_gpio, 0);
-}
-
-static void lvds_encoder_disable(struct drm_bridge *bridge)
-{
-	struct lvds_encoder *lvds_encoder = container_of(bridge,
-							 struct lvds_encoder,
-							 bridge);
-
-	if (lvds_encoder->powerdown_gpio)
-		gpiod_set_value_cansleep(lvds_encoder->powerdown_gpio, 1);
-}
-
-static struct drm_bridge_funcs funcs = {
-	.attach = lvds_encoder_attach,
-	.enable = lvds_encoder_enable,
-	.disable = lvds_encoder_disable,
-};
-
-static int lvds_encoder_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct device_node *port;
-	struct device_node *endpoint;
-	struct device_node *panel_node;
-	struct drm_panel *panel;
-	struct lvds_encoder *lvds_encoder;
-
-	lvds_encoder = devm_kzalloc(dev, sizeof(*lvds_encoder), GFP_KERNEL);
-	if (!lvds_encoder)
-		return -ENOMEM;
-
-	lvds_encoder->powerdown_gpio = devm_gpiod_get_optional(dev, "powerdown",
-							       GPIOD_OUT_HIGH);
-	if (IS_ERR(lvds_encoder->powerdown_gpio)) {
-		int err = PTR_ERR(lvds_encoder->powerdown_gpio);
-
-		if (err != -EPROBE_DEFER)
-			dev_err(dev, "powerdown GPIO failure: %d\n", err);
-		return err;
-	}
-
-	/* Locate the panel DT node. */
-	port = of_graph_get_port_by_id(dev->of_node, 1);
-	if (!port) {
-		dev_dbg(dev, "port 1 not found\n");
-		return -ENXIO;
-	}
-
-	endpoint = of_get_child_by_name(port, "endpoint");
-	of_node_put(port);
-	if (!endpoint) {
-		dev_dbg(dev, "no endpoint for port 1\n");
-		return -ENXIO;
-	}
-
-	panel_node = of_graph_get_remote_port_parent(endpoint);
-	of_node_put(endpoint);
-	if (!panel_node) {
-		dev_dbg(dev, "no remote endpoint for port 1\n");
-		return -ENXIO;
-	}
-
-	panel = of_drm_find_panel(panel_node);
-	of_node_put(panel_node);
-	if (IS_ERR(panel)) {
-		dev_dbg(dev, "panel not found, deferring probe\n");
-		return PTR_ERR(panel);
-	}
-
-	lvds_encoder->panel_bridge =
-		devm_drm_panel_bridge_add(dev, panel, DRM_MODE_CONNECTOR_LVDS);
-	if (IS_ERR(lvds_encoder->panel_bridge))
-		return PTR_ERR(lvds_encoder->panel_bridge);
-
-	/* The panel_bridge bridge is attached to the panel's of_node,
-	 * but we need a bridge attached to our of_node for our user
-	 * to look up.
-	 */
-	lvds_encoder->bridge.of_node = dev->of_node;
-	lvds_encoder->bridge.funcs = &funcs;
-	drm_bridge_add(&lvds_encoder->bridge);
-
-	platform_set_drvdata(pdev, lvds_encoder);
-
-	return 0;
-}
-
-static int lvds_encoder_remove(struct platform_device *pdev)
-{
-	struct lvds_encoder *lvds_encoder = platform_get_drvdata(pdev);
-
-	drm_bridge_remove(&lvds_encoder->bridge);
-
-	return 0;
-}
-
-static const struct of_device_id lvds_encoder_match[] = {
-	{ .compatible = "lvds-encoder" },
-	{ .compatible = "thine,thc63lvdm83d" },
-	{},
-};
-MODULE_DEVICE_TABLE(of, lvds_encoder_match);
-
-static struct platform_driver lvds_encoder_driver = {
-	.probe	= lvds_encoder_probe,
-	.remove	= lvds_encoder_remove,
-	.driver		= {
-		.name		= "lvds-encoder",
-		.of_match_table	= lvds_encoder_match,
-	},
-};
-module_platform_driver(lvds_encoder_driver);
-
-MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
-MODULE_DESCRIPTION("Transparent parallel to LVDS encoder");
-MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
index 6e81e5db57f2..e8a49f6146c6 100644
--- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
+++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
@@ -25,6 +25,7 @@
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c
index d4a1cc5052c3..57ff01339559 100644
--- a/drivers/gpu/drm/bridge/nxp-ptn3460.c
+++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_of.h>
diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c
index b12ae3a4c5f1..f66777e24968 100644
--- a/drivers/gpu/drm/bridge/panel.c
+++ b/drivers/gpu/drm/bridge/panel.c
@@ -5,6 +5,7 @@
  */
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_connector.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_modeset_helper_vtables.h>
@@ -36,7 +37,7 @@ static int panel_bridge_connector_get_modes(struct drm_connector *connector)
 	struct panel_bridge *panel_bridge =
 		drm_connector_to_panel_bridge(connector);
 
-	return drm_panel_get_modes(panel_bridge->panel);
+	return drm_panel_get_modes(panel_bridge->panel, connector);
 }
 
 static const struct drm_connector_helper_funcs
@@ -133,8 +134,6 @@ static const struct drm_bridge_funcs panel_bridge_bridge_funcs = {
  * just calls the appropriate functions from &drm_panel.
  *
  * @panel: The drm_panel being wrapped.  Must be non-NULL.
- * @connector_type: The DRM_MODE_CONNECTOR_* for the connector to be
- * created.
  *
  * For drivers converting from directly using drm_panel: The expected
  * usage pattern is that during either encoder module probe or DSI
@@ -148,11 +147,37 @@ static const struct drm_bridge_funcs panel_bridge_bridge_funcs = {
  * drm_mode_config_cleanup() if the bridge has already been attached), then
  * drm_panel_bridge_remove() to free it.
  *
+ * The connector type is set to @panel->connector_type, which must be set to a
+ * known type. Calling this function with a panel whose connector type is
+ * DRM_MODE_CONNECTOR_Unknown will return NULL.
+ *
  * See devm_drm_panel_bridge_add() for an automatically manged version of this
  * function.
  */
-struct drm_bridge *drm_panel_bridge_add(struct drm_panel *panel,
-					u32 connector_type)
+struct drm_bridge *drm_panel_bridge_add(struct drm_panel *panel)
+{
+	if (WARN_ON(panel->connector_type == DRM_MODE_CONNECTOR_Unknown))
+		return NULL;
+
+	return drm_panel_bridge_add_typed(panel, panel->connector_type);
+}
+EXPORT_SYMBOL(drm_panel_bridge_add);
+
+/**
+ * drm_panel_bridge_add_typed - Creates a &drm_bridge and &drm_connector with
+ * an explicit connector type.
+ * @panel: The drm_panel being wrapped.  Must be non-NULL.
+ * @connector_type: The connector type (DRM_MODE_CONNECTOR_*)
+ *
+ * This is just like drm_panel_bridge_add(), but forces the connector type to
+ * @connector_type instead of infering it from the panel.
+ *
+ * This function is deprecated and should not be used in new drivers. Use
+ * drm_panel_bridge_add() instead, and fix panel drivers as necessary if they
+ * don't report a connector type.
+ */
+struct drm_bridge *drm_panel_bridge_add_typed(struct drm_panel *panel,
+					      u32 connector_type)
 {
 	struct panel_bridge *panel_bridge;
 
@@ -176,7 +201,7 @@ struct drm_bridge *drm_panel_bridge_add(struct drm_panel *panel,
 
 	return &panel_bridge->bridge;
 }
-EXPORT_SYMBOL(drm_panel_bridge_add);
+EXPORT_SYMBOL(drm_panel_bridge_add_typed);
 
 /**
  * drm_panel_bridge_remove - Unregisters and frees a drm_bridge
@@ -213,15 +238,38 @@ static void devm_drm_panel_bridge_release(struct device *dev, void *res)
  * that just calls the appropriate functions from &drm_panel.
  * @dev: device to tie the bridge lifetime to
  * @panel: The drm_panel being wrapped.  Must be non-NULL.
- * @connector_type: The DRM_MODE_CONNECTOR_* for the connector to be
- * created.
  *
  * This is the managed version of drm_panel_bridge_add() which automatically
  * calls drm_panel_bridge_remove() when @dev is unbound.
  */
 struct drm_bridge *devm_drm_panel_bridge_add(struct device *dev,
-					     struct drm_panel *panel,
-					     u32 connector_type)
+					     struct drm_panel *panel)
+{
+	if (WARN_ON(panel->connector_type == DRM_MODE_CONNECTOR_Unknown))
+		return NULL;
+
+	return devm_drm_panel_bridge_add_typed(dev, panel,
+					       panel->connector_type);
+}
+EXPORT_SYMBOL(devm_drm_panel_bridge_add);
+
+/**
+ * devm_drm_panel_bridge_add_typed - Creates a managed &drm_bridge and
+ * &drm_connector with an explicit connector type.
+ * @dev: device to tie the bridge lifetime to
+ * @panel: The drm_panel being wrapped.  Must be non-NULL.
+ * @connector_type: The connector type (DRM_MODE_CONNECTOR_*)
+ *
+ * This is just like devm_drm_panel_bridge_add(), but forces the connector type
+ * to @connector_type instead of infering it from the panel.
+ *
+ * This function is deprecated and should not be used in new drivers. Use
+ * devm_drm_panel_bridge_add() instead, and fix panel drivers as necessary if
+ * they don't report a connector type.
+ */
+struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev,
+						   struct drm_panel *panel,
+						   u32 connector_type)
 {
 	struct drm_bridge **ptr, *bridge;
 
@@ -230,7 +278,7 @@ struct drm_bridge *devm_drm_panel_bridge_add(struct device *dev,
 	if (!ptr)
 		return ERR_PTR(-ENOMEM);
 
-	bridge = drm_panel_bridge_add(panel, connector_type);
+	bridge = drm_panel_bridge_add_typed(panel, connector_type);
 	if (!IS_ERR(bridge)) {
 		*ptr = bridge;
 		devres_add(dev, ptr);
@@ -240,4 +288,22 @@ struct drm_bridge *devm_drm_panel_bridge_add(struct device *dev,
 
 	return bridge;
 }
-EXPORT_SYMBOL(devm_drm_panel_bridge_add);
+EXPORT_SYMBOL(devm_drm_panel_bridge_add_typed);
+
+/**
+ * drm_panel_bridge_connector - return the connector for the panel bridge
+ *
+ * drm_panel_bridge creates the connector.
+ * This function gives external access to the connector.
+ *
+ * Returns: Pointer to drm_connector
+ */
+struct drm_connector *drm_panel_bridge_connector(struct drm_bridge *bridge)
+{
+	struct panel_bridge *panel_bridge;
+
+	panel_bridge = drm_bridge_to_panel_bridge(bridge);
+
+	return &panel_bridge->connector;
+}
+EXPORT_SYMBOL(drm_panel_bridge_connector);
diff --git a/drivers/gpu/drm/bridge/parade-ps8622.c b/drivers/gpu/drm/bridge/parade-ps8622.c
index 93c68e2e9484..10c47c008b40 100644
--- a/drivers/gpu/drm/bridge/parade-ps8622.c
+++ b/drivers/gpu/drm/bridge/parade-ps8622.c
@@ -17,6 +17,7 @@
 #include <linux/regulator/consumer.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
@@ -460,7 +461,7 @@ static int ps8622_get_modes(struct drm_connector *connector)
 
 	ps8622 = connector_to_ps8622(connector);
 
-	return drm_panel_get_modes(ps8622->panel);
+	return drm_panel_get_modes(ps8622->panel, connector);
 }
 
 static const struct drm_connector_helper_funcs ps8622_connector_helper_funcs = {
diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c
index 38f75ac580df..b70e8c5cf2e1 100644
--- a/drivers/gpu/drm/bridge/sii902x.c
+++ b/drivers/gpu/drm/bridge/sii902x.c
@@ -20,6 +20,7 @@
 #include <linux/clk.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_print.h>
diff --git a/drivers/gpu/drm/bridge/sii9234.c b/drivers/gpu/drm/bridge/sii9234.c
index 25d4ad8c7ad6..f81f81b7051f 100644
--- a/drivers/gpu/drm/bridge/sii9234.c
+++ b/drivers/gpu/drm/bridge/sii9234.c
@@ -13,6 +13,7 @@
  *    Dharam Kumar <dharam.kr@samsung.com>
  */
 #include <drm/bridge/mhl.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 
@@ -841,39 +842,28 @@ static int sii9234_init_resources(struct sii9234 *ctx,
 
 	ctx->client[I2C_MHL] = client;
 
-	ctx->client[I2C_TPI] = i2c_new_dummy(adapter, I2C_TPI_ADDR);
-	if (!ctx->client[I2C_TPI]) {
+	ctx->client[I2C_TPI] = devm_i2c_new_dummy_device(&client->dev, adapter,
+							 I2C_TPI_ADDR);
+	if (IS_ERR(ctx->client[I2C_TPI])) {
 		dev_err(ctx->dev, "failed to create TPI client\n");
-		return -ENODEV;
+		return PTR_ERR(ctx->client[I2C_TPI]);
 	}
 
-	ctx->client[I2C_HDMI] = i2c_new_dummy(adapter, I2C_HDMI_ADDR);
-	if (!ctx->client[I2C_HDMI]) {
+	ctx->client[I2C_HDMI] = devm_i2c_new_dummy_device(&client->dev, adapter,
+							  I2C_HDMI_ADDR);
+	if (IS_ERR(ctx->client[I2C_HDMI])) {
 		dev_err(ctx->dev, "failed to create HDMI RX client\n");
-		goto fail_tpi;
+		return PTR_ERR(ctx->client[I2C_HDMI]);
 	}
 
-	ctx->client[I2C_CBUS] = i2c_new_dummy(adapter, I2C_CBUS_ADDR);
-	if (!ctx->client[I2C_CBUS]) {
+	ctx->client[I2C_CBUS] = devm_i2c_new_dummy_device(&client->dev, adapter,
+							  I2C_CBUS_ADDR);
+	if (IS_ERR(ctx->client[I2C_CBUS])) {
 		dev_err(ctx->dev, "failed to create CBUS client\n");
-		goto fail_hdmi;
+		return PTR_ERR(ctx->client[I2C_CBUS]);
 	}
 
 	return 0;
-
-fail_hdmi:
-	i2c_unregister_device(ctx->client[I2C_HDMI]);
-fail_tpi:
-	i2c_unregister_device(ctx->client[I2C_TPI]);
-
-	return -ENODEV;
-}
-
-static void sii9234_deinit_resources(struct sii9234 *ctx)
-{
-	i2c_unregister_device(ctx->client[I2C_CBUS]);
-	i2c_unregister_device(ctx->client[I2C_HDMI]);
-	i2c_unregister_device(ctx->client[I2C_TPI]);
 }
 
 static inline struct sii9234 *bridge_to_sii9234(struct drm_bridge *bridge)
@@ -950,7 +940,6 @@ static int sii9234_remove(struct i2c_client *client)
 
 	sii9234_cable_out(ctx);
 	drm_bridge_remove(&ctx->bridge);
-	sii9234_deinit_resources(ctx);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index bd3165ee5354..4c0eef406eb1 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -9,6 +9,7 @@
 #include <asm/unaligned.h>
 
 #include <drm/bridge/mhl.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
@@ -1759,10 +1760,8 @@ static bool sii8620_rcp_consume(struct sii8620 *ctx, u8 scancode)
 
 	scancode &= MHL_RCP_KEY_ID_MASK;
 
-	if (!ctx->rc_dev) {
-		dev_dbg(ctx->dev, "RCP input device not initialized\n");
+	if (!IS_ENABLED(CONFIG_RC_CORE) || !ctx->rc_dev)
 		return false;
-	}
 
 	if (pressed)
 		rc_keydown(ctx->rc_dev, RC_PROTO_CEC, scancode, 0);
@@ -2099,6 +2098,9 @@ static void sii8620_init_rcp_input_dev(struct sii8620 *ctx)
 	struct rc_dev *rc_dev;
 	int ret;
 
+	if (!IS_ENABLED(CONFIG_RC_CORE))
+		return;
+
 	rc_dev = rc_allocate_device(RC_DRIVER_SCANCODE);
 	if (!rc_dev) {
 		dev_err(ctx->dev, "Failed to allocate RC device\n");
@@ -2213,6 +2215,9 @@ static void sii8620_detach(struct drm_bridge *bridge)
 {
 	struct sii8620 *ctx = bridge_to_sii8620(bridge);
 
+	if (!IS_ENABLED(CONFIG_RC_CORE))
+		return;
+
 	rc_unregister_device(ctx->rc_dev);
 }
 
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
index 2b7539701b42..dd56996fe9c7 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
@@ -291,7 +291,7 @@ static irqreturn_t snd_dw_hdmi_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static struct snd_pcm_hardware dw_hdmi_hw = {
+static const struct snd_pcm_hardware dw_hdmi_hw = {
 	.info = SNDRV_PCM_INFO_INTERLEAVED |
 		SNDRV_PCM_INFO_BLOCK_TRANSFER |
 		SNDRV_PCM_INFO_MMAP |
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c
index ac1e001d0882..70ab4fbdc23e 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c
@@ -285,7 +285,7 @@ static int dw_hdmi_cec_probe(struct platform_device *pdev)
 
 	ret = cec_register_adapter(cec->adap, pdev->dev.parent);
 	if (ret < 0) {
-		cec_notifier_cec_adap_unregister(cec->notify);
+		cec_notifier_cec_adap_unregister(cec->notify, cec->adap);
 		return ret;
 	}
 
@@ -302,7 +302,7 @@ static int dw_hdmi_cec_remove(struct platform_device *pdev)
 {
 	struct dw_hdmi_cec *cec = platform_get_drvdata(pdev);
 
-	cec_notifier_cec_adap_unregister(cec->notify);
+	cec_notifier_cec_adap_unregister(cec->notify, cec->adap);
 	cec_unregister_adapter(cec->adap);
 
 	return 0;
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
index 1d15cf9b6821..d7e65c869415 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
@@ -102,6 +102,7 @@ static int dw_hdmi_i2s_hw_params(struct device *dev, void *data,
 	}
 
 	dw_hdmi_set_sample_rate(hdmi, hparms->sample_rate);
+	dw_hdmi_set_channel_status(hdmi, hparms->iec.status);
 	dw_hdmi_set_channel_count(hdmi, hparms->channels);
 	dw_hdmi_set_channel_allocation(hdmi, hparms->cea.channel_allocation);
 
@@ -109,6 +110,14 @@ static int dw_hdmi_i2s_hw_params(struct device *dev, void *data,
 	hdmi_write(audio, conf0, HDMI_AUD_CONF0);
 	hdmi_write(audio, conf1, HDMI_AUD_CONF1);
 
+	return 0;
+}
+
+static int dw_hdmi_i2s_audio_startup(struct device *dev, void *data)
+{
+	struct dw_hdmi_i2s_audio_data *audio = data;
+	struct dw_hdmi *hdmi = audio->hdmi;
+
 	dw_hdmi_audio_enable(hdmi);
 
 	return 0;
@@ -151,11 +160,23 @@ static int dw_hdmi_i2s_get_dai_id(struct snd_soc_component *component,
 	return -EINVAL;
 }
 
+static int dw_hdmi_i2s_hook_plugged_cb(struct device *dev, void *data,
+				       hdmi_codec_plugged_cb fn,
+				       struct device *codec_dev)
+{
+	struct dw_hdmi_i2s_audio_data *audio = data;
+	struct dw_hdmi *hdmi = audio->hdmi;
+
+	return dw_hdmi_set_plugged_cb(hdmi, fn, codec_dev);
+}
+
 static struct hdmi_codec_ops dw_hdmi_i2s_ops = {
 	.hw_params	= dw_hdmi_i2s_hw_params,
+	.audio_startup  = dw_hdmi_i2s_audio_startup,
 	.audio_shutdown	= dw_hdmi_i2s_audio_shutdown,
 	.get_eld	= dw_hdmi_i2s_get_eld,
 	.get_dai_id	= dw_hdmi_i2s_get_dai_id,
+	.hook_plugged_cb = dw_hdmi_i2s_hook_plugged_cb,
 };
 
 static int snd_dw_hdmi_probe(struct platform_device *pdev)
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index 521d689413c8..67fca439bbfb 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -25,7 +25,9 @@
 #include <uapi/linux/videodev2.h>
 
 #include <drm/bridge/dw_hdmi.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_of.h>
 #include <drm/drm_print.h>
@@ -36,6 +38,7 @@
 #include "dw-hdmi-cec.h"
 #include "dw-hdmi.h"
 
+#define DDC_CI_ADDR		0x37
 #define DDC_SEGMENT_ADDR	0x30
 
 #define HDMI_EDID_LEN		512
@@ -191,6 +194,10 @@ struct dw_hdmi {
 
 	struct mutex cec_notifier_mutex;
 	struct cec_notifier *cec_notifier;
+
+	hdmi_codec_plugged_cb plugged_cb;
+	struct device *codec_dev;
+	enum drm_connector_status last_connector_result;
 };
 
 #define HDMI_IH_PHY_STAT0_RX_SENSE \
@@ -215,6 +222,28 @@ static inline u8 hdmi_readb(struct dw_hdmi *hdmi, int offset)
 	return val;
 }
 
+static void handle_plugged_change(struct dw_hdmi *hdmi, bool plugged)
+{
+	if (hdmi->plugged_cb && hdmi->codec_dev)
+		hdmi->plugged_cb(hdmi->codec_dev, plugged);
+}
+
+int dw_hdmi_set_plugged_cb(struct dw_hdmi *hdmi, hdmi_codec_plugged_cb fn,
+			   struct device *codec_dev)
+{
+	bool plugged;
+
+	mutex_lock(&hdmi->mutex);
+	hdmi->plugged_cb = fn;
+	hdmi->codec_dev = codec_dev;
+	plugged = hdmi->last_connector_result == connector_status_connected;
+	handle_plugged_change(hdmi, plugged);
+	mutex_unlock(&hdmi->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_set_plugged_cb);
+
 static void hdmi_modb(struct dw_hdmi *hdmi, u8 data, u8 mask, unsigned reg)
 {
 	regmap_update_bits(hdmi->regm, reg << hdmi->reg_shift, mask, data);
@@ -398,6 +427,15 @@ static int dw_hdmi_i2c_xfer(struct i2c_adapter *adap,
 	u8 addr = msgs[0].addr;
 	int i, ret = 0;
 
+	if (addr == DDC_CI_ADDR)
+		/*
+		 * The internal I2C controller does not support the multi-byte
+		 * read and write operations needed for DDC/CI.
+		 * TOFIX: Blacklist the DDC/CI address until we filter out
+		 * unsupported I2C operations.
+		 */
+		return -EOPNOTSUPP;
+
 	dev_dbg(hdmi->dev, "xfer: num: %d, addr: %#x\n", num, addr);
 
 	for (i = 0; i < num; i++) {
@@ -580,6 +618,26 @@ static unsigned int hdmi_compute_n(unsigned int freq, unsigned long pixel_clk)
 	return n;
 }
 
+/*
+ * When transmitting IEC60958 linear PCM audio, these registers allow to
+ * configure the channel status information of all the channel status
+ * bits in the IEC60958 frame. For the moment this configuration is only
+ * used when the I2S audio interface, General Purpose Audio (GPA),
+ * or AHB audio DMA (AHBAUDDMA) interface is active
+ * (for S/PDIF interface this information comes from the stream).
+ */
+void dw_hdmi_set_channel_status(struct dw_hdmi *hdmi,
+				u8 *channel_status)
+{
+	/*
+	 * Set channel status register for frequency and word length.
+	 * Use default values for other registers.
+	 */
+	hdmi_writeb(hdmi, channel_status[3], HDMI_FC_AUDSCHNLS7);
+	hdmi_writeb(hdmi, channel_status[4], HDMI_FC_AUDSCHNLS8);
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_set_channel_status);
+
 static void hdmi_set_clk_regenerator(struct dw_hdmi *hdmi,
 	unsigned long pixel_clk, unsigned int sample_rate)
 {
@@ -1712,6 +1770,41 @@ static void hdmi_config_vendor_specific_infoframe(struct dw_hdmi *hdmi,
 			HDMI_FC_DATAUTO0_VSD_MASK);
 }
 
+static void hdmi_config_drm_infoframe(struct dw_hdmi *hdmi)
+{
+	const struct drm_connector_state *conn_state = hdmi->connector.state;
+	struct hdmi_drm_infoframe frame;
+	u8 buffer[30];
+	ssize_t err;
+	int i;
+
+	if (!hdmi->plat_data->use_drm_infoframe)
+		return;
+
+	hdmi_modb(hdmi, HDMI_FC_PACKET_TX_EN_DRM_DISABLE,
+		  HDMI_FC_PACKET_TX_EN_DRM_MASK, HDMI_FC_PACKET_TX_EN);
+
+	err = drm_hdmi_infoframe_set_hdr_metadata(&frame, conn_state);
+	if (err < 0)
+		return;
+
+	err = hdmi_drm_infoframe_pack(&frame, buffer, sizeof(buffer));
+	if (err < 0) {
+		dev_err(hdmi->dev, "Failed to pack drm infoframe: %zd\n", err);
+		return;
+	}
+
+	hdmi_writeb(hdmi, frame.version, HDMI_FC_DRM_HB0);
+	hdmi_writeb(hdmi, frame.length, HDMI_FC_DRM_HB1);
+
+	for (i = 0; i < frame.length; i++)
+		hdmi_writeb(hdmi, buffer[4 + i], HDMI_FC_DRM_PB0 + i);
+
+	hdmi_writeb(hdmi, 1, HDMI_FC_DRM_UP);
+	hdmi_modb(hdmi, HDMI_FC_PACKET_TX_EN_DRM_ENABLE,
+		  HDMI_FC_PACKET_TX_EN_DRM_MASK, HDMI_FC_PACKET_TX_EN);
+}
+
 static void hdmi_av_composer(struct dw_hdmi *hdmi,
 			     const struct drm_display_mode *mode)
 {
@@ -2023,7 +2116,7 @@ static int dw_hdmi_setup(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
 
 		/* HDMI Initialization Step E - Configure audio */
 		hdmi_clk_regenerator_update_pixel_clock(hdmi);
-		hdmi_enable_audio_clk(hdmi, true);
+		hdmi_enable_audio_clk(hdmi, hdmi->audio_enable);
 	}
 
 	/* not for DVI mode */
@@ -2033,6 +2126,7 @@ static int dw_hdmi_setup(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
 		/* HDMI Initialization Step F - Configure AVI InfoFrame */
 		hdmi_config_AVI(hdmi, mode);
 		hdmi_config_vendor_specific_infoframe(hdmi, mode);
+		hdmi_config_drm_infoframe(hdmi);
 	} else {
 		dev_dbg(hdmi->dev, "%s DVI mode\n", __func__);
 	}
@@ -2161,6 +2255,7 @@ dw_hdmi_connector_detect(struct drm_connector *connector, bool force)
 {
 	struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi,
 					     connector);
+	enum drm_connector_status result;
 
 	mutex_lock(&hdmi->mutex);
 	hdmi->force = DRM_FORCE_UNSPECIFIED;
@@ -2168,7 +2263,18 @@ dw_hdmi_connector_detect(struct drm_connector *connector, bool force)
 	dw_hdmi_update_phy_mask(hdmi);
 	mutex_unlock(&hdmi->mutex);
 
-	return hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data);
+	result = hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data);
+
+	mutex_lock(&hdmi->mutex);
+	if (result != hdmi->last_connector_result) {
+		dev_dbg(hdmi->dev, "read_hpd result: %d", result);
+		handle_plugged_change(hdmi,
+				      result == connector_status_connected);
+		hdmi->last_connector_result = result;
+	}
+	mutex_unlock(&hdmi->mutex);
+
+	return result;
 }
 
 static int dw_hdmi_connector_get_modes(struct drm_connector *connector)
@@ -2199,6 +2305,45 @@ static int dw_hdmi_connector_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
+static bool hdr_metadata_equal(const struct drm_connector_state *old_state,
+			       const struct drm_connector_state *new_state)
+{
+	struct drm_property_blob *old_blob = old_state->hdr_output_metadata;
+	struct drm_property_blob *new_blob = new_state->hdr_output_metadata;
+
+	if (!old_blob || !new_blob)
+		return old_blob == new_blob;
+
+	if (old_blob->length != new_blob->length)
+		return false;
+
+	return !memcmp(old_blob->data, new_blob->data, old_blob->length);
+}
+
+static int dw_hdmi_connector_atomic_check(struct drm_connector *connector,
+					  struct drm_atomic_state *state)
+{
+	struct drm_connector_state *old_state =
+		drm_atomic_get_old_connector_state(state, connector);
+	struct drm_connector_state *new_state =
+		drm_atomic_get_new_connector_state(state, connector);
+	struct drm_crtc *crtc = new_state->crtc;
+	struct drm_crtc_state *crtc_state;
+
+	if (!crtc)
+		return 0;
+
+	if (!hdr_metadata_equal(old_state, new_state)) {
+		crtc_state = drm_atomic_get_crtc_state(state, crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+
+		crtc_state->mode_changed = true;
+	}
+
+	return 0;
+}
+
 static void dw_hdmi_connector_force(struct drm_connector *connector)
 {
 	struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi,
@@ -2223,6 +2368,7 @@ static const struct drm_connector_funcs dw_hdmi_connector_funcs = {
 
 static const struct drm_connector_helper_funcs dw_hdmi_connector_helper_funcs = {
 	.get_modes = dw_hdmi_connector_get_modes,
+	.atomic_check = dw_hdmi_connector_atomic_check,
 };
 
 static int dw_hdmi_bridge_attach(struct drm_bridge *bridge)
@@ -2243,6 +2389,10 @@ static int dw_hdmi_bridge_attach(struct drm_bridge *bridge)
 				    DRM_MODE_CONNECTOR_HDMIA,
 				    hdmi->ddc);
 
+	if (hdmi->version >= 0x200a && hdmi->plat_data->use_drm_infoframe)
+		drm_object_attach_property(&connector->base,
+			connector->dev->mode_config.hdr_output_metadata_property, 0);
+
 	drm_connector_attach_encoder(connector, encoder);
 
 	cec_fill_conn_info_from_drm(&conn_info, connector);
@@ -2619,6 +2769,7 @@ __dw_hdmi_probe(struct platform_device *pdev,
 	hdmi->rxsense = true;
 	hdmi->phy_mask = (u8)~(HDMI_PHY_HPD | HDMI_PHY_RX_SENSE);
 	hdmi->mc_clkdis = 0x7f;
+	hdmi->last_connector_result = connector_status_disconnected;
 
 	mutex_init(&hdmi->mutex);
 	mutex_init(&hdmi->audio_mutex);
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h
index 6988f12d89d9..1999db05bc3b 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h
@@ -158,6 +158,8 @@
 #define HDMI_FC_SPDDEVICEINF                    0x1062
 #define HDMI_FC_AUDSCONF                        0x1063
 #define HDMI_FC_AUDSSTAT                        0x1064
+#define HDMI_FC_AUDSCHNLS7                      0x106e
+#define HDMI_FC_AUDSCHNLS8                      0x106f
 #define HDMI_FC_DATACH0FILL                     0x1070
 #define HDMI_FC_DATACH1FILL                     0x1071
 #define HDMI_FC_DATACH2FILL                     0x1072
@@ -252,6 +254,7 @@
 #define HDMI_FC_POL2                            0x10DB
 #define HDMI_FC_PRCONF                          0x10E0
 #define HDMI_FC_SCRAMBLER_CTRL                  0x10E1
+#define HDMI_FC_PACKET_TX_EN                    0x10E3
 
 #define HDMI_FC_GMD_STAT                        0x1100
 #define HDMI_FC_GMD_EN                          0x1101
@@ -287,6 +290,37 @@
 #define HDMI_FC_GMD_PB26                        0x111F
 #define HDMI_FC_GMD_PB27                        0x1120
 
+#define HDMI_FC_DRM_UP                          0x1167
+#define HDMI_FC_DRM_HB0                         0x1168
+#define HDMI_FC_DRM_HB1                         0x1169
+#define HDMI_FC_DRM_PB0                         0x116A
+#define HDMI_FC_DRM_PB1                         0x116B
+#define HDMI_FC_DRM_PB2                         0x116C
+#define HDMI_FC_DRM_PB3                         0x116D
+#define HDMI_FC_DRM_PB4                         0x116E
+#define HDMI_FC_DRM_PB5                         0x116F
+#define HDMI_FC_DRM_PB6                         0x1170
+#define HDMI_FC_DRM_PB7                         0x1171
+#define HDMI_FC_DRM_PB8                         0x1172
+#define HDMI_FC_DRM_PB9                         0x1173
+#define HDMI_FC_DRM_PB10                        0x1174
+#define HDMI_FC_DRM_PB11                        0x1175
+#define HDMI_FC_DRM_PB12                        0x1176
+#define HDMI_FC_DRM_PB13                        0x1177
+#define HDMI_FC_DRM_PB14                        0x1178
+#define HDMI_FC_DRM_PB15                        0x1179
+#define HDMI_FC_DRM_PB16                        0x117A
+#define HDMI_FC_DRM_PB17                        0x117B
+#define HDMI_FC_DRM_PB18                        0x117C
+#define HDMI_FC_DRM_PB19                        0x117D
+#define HDMI_FC_DRM_PB20                        0x117E
+#define HDMI_FC_DRM_PB21                        0x117F
+#define HDMI_FC_DRM_PB22                        0x1180
+#define HDMI_FC_DRM_PB23                        0x1181
+#define HDMI_FC_DRM_PB24                        0x1182
+#define HDMI_FC_DRM_PB25                        0x1183
+#define HDMI_FC_DRM_PB26                        0x1184
+
 #define HDMI_FC_DBGFORCE                        0x1200
 #define HDMI_FC_DBGAUD0CH0                      0x1201
 #define HDMI_FC_DBGAUD1CH0                      0x1202
@@ -742,6 +776,11 @@ enum {
 	HDMI_FC_PRCONF_OUTPUT_PR_FACTOR_MASK = 0x0F,
 	HDMI_FC_PRCONF_OUTPUT_PR_FACTOR_OFFSET = 0,
 
+/* FC_PACKET_TX_EN field values */
+	HDMI_FC_PACKET_TX_EN_DRM_MASK = 0x80,
+	HDMI_FC_PACKET_TX_EN_DRM_ENABLE = 0x80,
+	HDMI_FC_PACKET_TX_EN_DRM_DISABLE = 0x00,
+
 /* FC_AVICONF0-FC_AVICONF3 field values */
 	HDMI_FC_AVICONF0_PIX_FMT_MASK = 0x03,
 	HDMI_FC_AVICONF0_PIX_FMT_RGB = 0x00,
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
index 675442bfc1bd..b18351b6760a 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
@@ -316,7 +316,8 @@ static int dw_mipi_dsi_host_attach(struct mipi_dsi_host *host,
 		return ret;
 
 	if (panel) {
-		bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_DSI);
+		bridge = drm_panel_bridge_add_typed(panel,
+						    DRM_MODE_CONNECTOR_DSI);
 		if (IS_ERR(bridge))
 			return PTR_ERR(bridge);
 	}
@@ -718,7 +719,15 @@ static void dw_mipi_dsi_vertical_timing_config(struct dw_mipi_dsi *dsi,
 
 static void dw_mipi_dsi_dphy_timing_config(struct dw_mipi_dsi *dsi)
 {
+	const struct dw_mipi_dsi_phy_ops *phy_ops = dsi->plat_data->phy_ops;
+	struct dw_mipi_dsi_dphy_timing timing;
 	u32 hw_version;
+	int ret;
+
+	ret = phy_ops->get_timing(dsi->plat_data->priv_data,
+				  dsi->lane_mbps, &timing);
+	if (ret)
+		DRM_DEV_ERROR(dsi->dev, "Retrieving phy timings failed\n");
 
 	/*
 	 * TODO dw drv improvements
@@ -731,16 +740,20 @@ static void dw_mipi_dsi_dphy_timing_config(struct dw_mipi_dsi *dsi)
 	hw_version = dsi_read(dsi, DSI_VERSION) & VERSION;
 
 	if (hw_version >= HWVER_131) {
-		dsi_write(dsi, DSI_PHY_TMR_CFG, PHY_HS2LP_TIME_V131(0x40) |
-			  PHY_LP2HS_TIME_V131(0x40));
+		dsi_write(dsi, DSI_PHY_TMR_CFG,
+			  PHY_HS2LP_TIME_V131(timing.data_hs2lp) |
+			  PHY_LP2HS_TIME_V131(timing.data_lp2hs));
 		dsi_write(dsi, DSI_PHY_TMR_RD_CFG, MAX_RD_TIME_V131(10000));
 	} else {
-		dsi_write(dsi, DSI_PHY_TMR_CFG, PHY_HS2LP_TIME(0x40) |
-			  PHY_LP2HS_TIME(0x40) | MAX_RD_TIME(10000));
+		dsi_write(dsi, DSI_PHY_TMR_CFG,
+			  PHY_HS2LP_TIME(timing.data_hs2lp) |
+			  PHY_LP2HS_TIME(timing.data_lp2hs) |
+			  MAX_RD_TIME(10000));
 	}
 
-	dsi_write(dsi, DSI_PHY_TMR_LPCLK_CFG, PHY_CLKHS2LP_TIME(0x40)
-		  | PHY_CLKLP2HS_TIME(0x40));
+	dsi_write(dsi, DSI_PHY_TMR_LPCLK_CFG,
+		  PHY_CLKHS2LP_TIME(timing.clk_hs2lp) |
+		  PHY_CLKLP2HS_TIME(timing.clk_lp2hs));
 }
 
 static void dw_mipi_dsi_dphy_interface_config(struct dw_mipi_dsi *dsi)
@@ -797,9 +810,6 @@ static void dw_mipi_dsi_bridge_post_disable(struct drm_bridge *bridge)
 	struct dw_mipi_dsi *dsi = bridge_to_dsi(bridge);
 	const struct dw_mipi_dsi_phy_ops *phy_ops = dsi->plat_data->phy_ops;
 
-	if (phy_ops->power_off)
-		phy_ops->power_off(dsi->plat_data->priv_data);
-
 	/*
 	 * Switch to command mode before panel-bridge post_disable &
 	 * panel unprepare.
@@ -816,6 +826,9 @@ static void dw_mipi_dsi_bridge_post_disable(struct drm_bridge *bridge)
 	 */
 	dsi->panel_bridge->funcs->post_disable(dsi->panel_bridge);
 
+	if (phy_ops->power_off)
+		phy_ops->power_off(dsi->plat_data->priv_data);
+
 	if (dsi->slave) {
 		dw_mipi_dsi_disable(dsi->slave);
 		clk_disable_unprepare(dsi->slave->pclk);
@@ -882,6 +895,9 @@ static void dw_mipi_dsi_mode_set(struct dw_mipi_dsi *dsi,
 
 	/* Switch to cmd mode for panel-bridge pre_enable & panel prepare */
 	dw_mipi_dsi_set_mode(dsi, 0);
+
+	if (phy_ops->power_on)
+		phy_ops->power_on(dsi->plat_data->priv_data);
 }
 
 static void dw_mipi_dsi_bridge_mode_set(struct drm_bridge *bridge,
@@ -898,15 +914,11 @@ static void dw_mipi_dsi_bridge_mode_set(struct drm_bridge *bridge,
 static void dw_mipi_dsi_bridge_enable(struct drm_bridge *bridge)
 {
 	struct dw_mipi_dsi *dsi = bridge_to_dsi(bridge);
-	const struct dw_mipi_dsi_phy_ops *phy_ops = dsi->plat_data->phy_ops;
 
 	/* Switch to video mode for panel-bridge enable & panel enable */
 	dw_mipi_dsi_set_mode(dsi, MIPI_DSI_MODE_VIDEO);
 	if (dsi->slave)
 		dw_mipi_dsi_set_mode(dsi->slave, MIPI_DSI_MODE_VIDEO);
-
-	if (phy_ops->power_on)
-		phy_ops->power_on(dsi->plat_data->priv_data);
 }
 
 static enum drm_mode_status
@@ -981,7 +993,6 @@ __dw_mipi_dsi_probe(struct platform_device *pdev,
 	struct device *dev = &pdev->dev;
 	struct reset_control *apb_rst;
 	struct dw_mipi_dsi *dsi;
-	struct resource *res;
 	int ret;
 
 	dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL);
@@ -991,17 +1002,14 @@ __dw_mipi_dsi_probe(struct platform_device *pdev,
 	dsi->dev = dev;
 	dsi->plat_data = plat_data;
 
-	if (!plat_data->phy_ops->init || !plat_data->phy_ops->get_lane_mbps) {
+	if (!plat_data->phy_ops->init || !plat_data->phy_ops->get_lane_mbps ||
+	    !plat_data->phy_ops->get_timing) {
 		DRM_ERROR("Phy not properly configured\n");
 		return ERR_PTR(-ENODEV);
 	}
 
 	if (!plat_data->base) {
-		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		if (!res)
-			return ERR_PTR(-ENODEV);
-
-		dsi->base = devm_ioremap_resource(dev, res);
+		dsi->base = devm_platform_ioremap_resource(pdev, 0);
 		if (IS_ERR(dsi->base))
 			return ERR_PTR(-ENODEV);
 
diff --git a/drivers/gpu/drm/bridge/tc358764.c b/drivers/gpu/drm/bridge/tc358764.c
index 170f162ffa55..96207fcfde19 100644
--- a/drivers/gpu/drm/bridge/tc358764.c
+++ b/drivers/gpu/drm/bridge/tc358764.c
@@ -16,6 +16,7 @@
 #include <video/mipi_display.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_mipi_dsi.h>
@@ -281,7 +282,7 @@ static int tc358764_get_modes(struct drm_connector *connector)
 {
 	struct tc358764 *ctx = connector_to_tc358764(connector);
 
-	return drm_panel_get_modes(ctx->panel);
+	return drm_panel_get_modes(ctx->panel, connector);
 }
 
 static const
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index 8a8d605021f0..3709e5ace724 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_of.h>
@@ -228,7 +229,9 @@ static bool tc_test_pattern;
 module_param_named(test, tc_test_pattern, bool, 0644);
 
 struct tc_edp_link {
-	struct drm_dp_link	base;
+	u8			dpcd[DP_RECEIVER_CAP_SIZE];
+	unsigned int		rate;
+	u8			num_lanes;
 	u8			assr;
 	bool			scrambler_dis;
 	bool			spread;
@@ -437,9 +440,9 @@ static u32 tc_srcctrl(struct tc_data *tc)
 		reg |= DP0_SRCCTRL_SCRMBLDIS;	/* Scrambler Disabled */
 	if (tc->link.spread)
 		reg |= DP0_SRCCTRL_SSCG;	/* Spread Spectrum Enable */
-	if (tc->link.base.num_lanes == 2)
+	if (tc->link.num_lanes == 2)
 		reg |= DP0_SRCCTRL_LANES_2;	/* Two Main Channel Lanes */
-	if (tc->link.base.rate != 162000)
+	if (tc->link.rate != 162000)
 		reg |= DP0_SRCCTRL_BW27;	/* 2.7 Gbps link */
 	return reg;
 }
@@ -662,23 +665,35 @@ err:
 
 static int tc_get_display_props(struct tc_data *tc)
 {
+	u8 revision, num_lanes;
+	unsigned int rate;
 	int ret;
 	u8 reg;
 
 	/* Read DP Rx Link Capability */
-	ret = drm_dp_link_probe(&tc->aux, &tc->link.base);
+	ret = drm_dp_dpcd_read(&tc->aux, DP_DPCD_REV, tc->link.dpcd,
+			       DP_RECEIVER_CAP_SIZE);
 	if (ret < 0)
 		goto err_dpcd_read;
-	if (tc->link.base.rate != 162000 && tc->link.base.rate != 270000) {
+
+	revision = tc->link.dpcd[DP_DPCD_REV];
+	rate = drm_dp_max_link_rate(tc->link.dpcd);
+	num_lanes = drm_dp_max_lane_count(tc->link.dpcd);
+
+	if (rate != 162000 && rate != 270000) {
 		dev_dbg(tc->dev, "Falling to 2.7 Gbps rate\n");
-		tc->link.base.rate = 270000;
+		rate = 270000;
 	}
 
-	if (tc->link.base.num_lanes > 2) {
+	tc->link.rate = rate;
+
+	if (num_lanes > 2) {
 		dev_dbg(tc->dev, "Falling to 2 lanes\n");
-		tc->link.base.num_lanes = 2;
+		num_lanes = 2;
 	}
 
+	tc->link.num_lanes = num_lanes;
+
 	ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, &reg);
 	if (ret < 0)
 		goto err_dpcd_read;
@@ -696,11 +711,11 @@ static int tc_get_display_props(struct tc_data *tc)
 	tc->link.assr = reg & DP_ALTERNATE_SCRAMBLER_RESET_ENABLE;
 
 	dev_dbg(tc->dev, "DPCD rev: %d.%d, rate: %s, lanes: %d, framing: %s\n",
-		tc->link.base.revision >> 4, tc->link.base.revision & 0x0f,
-		(tc->link.base.rate == 162000) ? "1.62Gbps" : "2.7Gbps",
-		tc->link.base.num_lanes,
-		(tc->link.base.capabilities & DP_LINK_CAP_ENHANCED_FRAMING) ?
-		"enhanced" : "non-enhanced");
+		revision >> 4, revision & 0x0f,
+		(tc->link.rate == 162000) ? "1.62Gbps" : "2.7Gbps",
+		tc->link.num_lanes,
+		drm_dp_enhanced_frame_cap(tc->link.dpcd) ?
+		"enhanced" : "default");
 	dev_dbg(tc->dev, "Downspread: %s, scrambler: %s\n",
 		tc->link.spread ? "0.5%" : "0.0%",
 		tc->link.scrambler_dis ? "disabled" : "enabled");
@@ -739,7 +754,7 @@ static int tc_set_video_mode(struct tc_data *tc,
 	 */
 
 	in_bw = mode->clock * bits_per_pixel / 8;
-	out_bw = tc->link.base.num_lanes * tc->link.base.rate;
+	out_bw = tc->link.num_lanes * tc->link.rate;
 	max_tu_symbol = DIV_ROUND_UP(in_bw * TU_SIZE_RECOMMENDED, out_bw);
 
 	dev_dbg(tc->dev, "set mode %dx%d\n",
@@ -901,7 +916,7 @@ static int tc_main_link_enable(struct tc_data *tc)
 	/* SSCG and BW27 on DP1 must be set to the same as on DP0 */
 	ret = regmap_write(tc->regmap, DP1_SRCCTRL,
 		 (tc->link.spread ? DP0_SRCCTRL_SSCG : 0) |
-		 ((tc->link.base.rate != 162000) ? DP0_SRCCTRL_BW27 : 0));
+		 ((tc->link.rate != 162000) ? DP0_SRCCTRL_BW27 : 0));
 	if (ret)
 		return ret;
 
@@ -911,7 +926,7 @@ static int tc_main_link_enable(struct tc_data *tc)
 
 	/* Setup Main Link */
 	dp_phy_ctrl = BGREN | PWR_SW_EN | PHY_A0_EN | PHY_M0_EN;
-	if (tc->link.base.num_lanes == 2)
+	if (tc->link.num_lanes == 2)
 		dp_phy_ctrl |= PHY_2LANE;
 
 	ret = regmap_write(tc->regmap, DP_PHY_CTRL, dp_phy_ctrl);
@@ -974,7 +989,13 @@ static int tc_main_link_enable(struct tc_data *tc)
 	}
 
 	/* Setup Link & DPRx Config for Training */
-	ret = drm_dp_link_configure(aux, &tc->link.base);
+	tmp[0] = drm_dp_link_rate_to_bw_code(tc->link.rate);
+	tmp[1] = tc->link.num_lanes;
+
+	if (drm_dp_enhanced_frame_cap(tc->link.dpcd))
+		tmp[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+
+	ret = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, tmp, 2);
 	if (ret < 0)
 		goto err_dpcd_write;
 
@@ -1018,9 +1039,8 @@ static int tc_main_link_enable(struct tc_data *tc)
 
 	/* Enable DP0 to start Link Training */
 	ret = regmap_write(tc->regmap, DP0CTL,
-			   ((tc->link.base.capabilities &
-			     DP_LINK_CAP_ENHANCED_FRAMING) ? EF_EN : 0) |
-			   DP_EN);
+			   (drm_dp_enhanced_frame_cap(tc->link.dpcd) ?
+				EF_EN : 0) | DP_EN);
 	if (ret)
 		return ret;
 
@@ -1099,7 +1119,7 @@ static int tc_main_link_enable(struct tc_data *tc)
 		ret = -ENODEV;
 	}
 
-	if (tc->link.base.num_lanes == 2) {
+	if (tc->link.num_lanes == 2) {
 		value = (tmp[0] >> 4) & DP_CHANNEL_EQ_BITS;
 
 		if (value != DP_CHANNEL_EQ_BITS) {
@@ -1170,7 +1190,7 @@ static int tc_stream_enable(struct tc_data *tc)
 		return ret;
 
 	value = VID_MN_GEN | DP_EN;
-	if (tc->link.base.capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
+	if (drm_dp_enhanced_frame_cap(tc->link.dpcd))
 		value |= EF_EN;
 	ret = regmap_write(tc->regmap, DP0CTL, value);
 	if (ret)
@@ -1296,7 +1316,7 @@ static enum drm_mode_status tc_mode_valid(struct drm_bridge *bridge,
 		return MODE_CLOCK_HIGH;
 
 	req = mode->clock * bits_per_pixel / 8;
-	avail = tc->link.base.num_lanes * tc->link.base.rate;
+	avail = tc->link.num_lanes * tc->link.rate;
 
 	if (req > avail)
 		return MODE_BAD;
@@ -1326,7 +1346,7 @@ static int tc_connector_get_modes(struct drm_connector *connector)
 		return 0;
 	}
 
-	count = drm_panel_get_modes(tc->panel);
+	count = drm_panel_get_modes(tc->panel, connector);
 	if (count > 0)
 		return count;
 
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 0a580957c8cf..9a2dd986afa5 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -17,6 +17,7 @@
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_of.h>
@@ -205,7 +206,7 @@ static int ti_sn_bridge_connector_get_modes(struct drm_connector *connector)
 {
 	struct ti_sn_bridge *pdata = connector_to_ti_sn_bridge(connector);
 
-	return drm_panel_get_modes(pdata->panel);
+	return drm_panel_get_modes(pdata->panel, connector);
 }
 
 static enum drm_mode_status
diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c
index 61cc2354ef1b..6f6d6d1e60ae 100644
--- a/drivers/gpu/drm/bridge/ti-tfp410.c
+++ b/drivers/gpu/drm/bridge/ti-tfp410.c
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
@@ -284,8 +285,8 @@ static int tfp410_get_connector_properties(struct tfp410 *dvi)
 	else
 		dvi->connector_type = DRM_MODE_CONNECTOR_DVID;
 
-	dvi->hpd = fwnode_get_named_gpiod(&connector_node->fwnode,
-					"hpd-gpios", 0, GPIOD_IN, "hpd");
+	dvi->hpd = fwnode_gpiod_get_index(&connector_node->fwnode,
+					  "hpd", 0, GPIOD_IN, "hpd");
 	if (IS_ERR(dvi->hpd)) {
 		ret = PTR_ERR(dvi->hpd);
 		dvi->hpd = NULL;
diff --git a/drivers/gpu/drm/cirrus/cirrus.c b/drivers/gpu/drm/cirrus/cirrus.c
index 36a69aec8a4b..248c9f765c45 100644
--- a/drivers/gpu/drm/cirrus/cirrus.c
+++ b/drivers/gpu/drm/cirrus/cirrus.c
@@ -390,7 +390,7 @@ static int cirrus_conn_init(struct cirrus_device *cirrus)
 /* ------------------------------------------------------------------ */
 /* cirrus (simple) display pipe					      */
 
-static enum drm_mode_status cirrus_pipe_mode_valid(struct drm_crtc *crtc,
+static enum drm_mode_status cirrus_pipe_mode_valid(struct drm_simple_display_pipe *pipe,
 						   const struct drm_display_mode *mode)
 {
 	if (cirrus_check_size(mode->hdisplay, mode->vdisplay, NULL) < 0)
@@ -510,7 +510,7 @@ static void cirrus_mode_config_init(struct cirrus_device *cirrus)
 
 /* ------------------------------------------------------------------ */
 
-DEFINE_DRM_GEM_SHMEM_FOPS(cirrus_fops);
+DEFINE_DRM_GEM_FOPS(cirrus_fops);
 
 static struct drm_driver cirrus_driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC,
@@ -532,7 +532,7 @@ static int cirrus_pci_probe(struct pci_dev *pdev,
 	struct cirrus_device *cirrus;
 	int ret;
 
-	ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "cirrusdrmfb");
+	ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "cirrusdrmfb");
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.h b/drivers/gpu/drm/cirrus/cirrus_drv.h
deleted file mode 100644
index 1f73916e528e..000000000000
--- a/drivers/gpu/drm/cirrus/cirrus_drv.h
+++ /dev/null
@@ -1,247 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2012 Red Hat
- *
- * Authors: Matthew Garrett
- *          Dave Airlie
- */
-#ifndef __CIRRUS_DRV_H__
-#define __CIRRUS_DRV_H__
-
-#include <video/vga.h>
-
-#include <drm/drm_encoder.h>
-#include <drm/drm_fb_helper.h>
-
-#include <drm/ttm/ttm_bo_api.h>
-#include <drm/ttm/ttm_bo_driver.h>
-#include <drm/ttm/ttm_placement.h>
-#include <drm/ttm/ttm_memory.h>
-#include <drm/ttm/ttm_module.h>
-
-#include <drm/drm_gem.h>
-
-#define DRIVER_AUTHOR		"Matthew Garrett"
-
-#define DRIVER_NAME		"cirrus"
-#define DRIVER_DESC		"qemu Cirrus emulation"
-#define DRIVER_DATE		"20110418"
-
-#define DRIVER_MAJOR		1
-#define DRIVER_MINOR		0
-#define DRIVER_PATCHLEVEL	0
-
-#define CIRRUSFB_CONN_LIMIT 1
-
-#define RREG8(reg) ioread8(((void __iomem *)cdev->rmmio) + (reg))
-#define WREG8(reg, v) iowrite8(v, ((void __iomem *)cdev->rmmio) + (reg))
-#define RREG32(reg) ioread32(((void __iomem *)cdev->rmmio) + (reg))
-#define WREG32(reg, v) iowrite32(v, ((void __iomem *)cdev->rmmio) + (reg))
-
-#define SEQ_INDEX 4
-#define SEQ_DATA 5
-
-#define WREG_SEQ(reg, v)					\
-	do {							\
-		WREG8(SEQ_INDEX, reg);				\
-		WREG8(SEQ_DATA, v);				\
-	} while (0)						\
-
-#define CRT_INDEX 0x14
-#define CRT_DATA 0x15
-
-#define WREG_CRT(reg, v)					\
-	do {							\
-		WREG8(CRT_INDEX, reg);				\
-		WREG8(CRT_DATA, v);				\
-	} while (0)						\
-
-#define GFX_INDEX 0xe
-#define GFX_DATA 0xf
-
-#define WREG_GFX(reg, v)					\
-	do {							\
-		WREG8(GFX_INDEX, reg);				\
-		WREG8(GFX_DATA, v);				\
-	} while (0)						\
-
-/*
- * Cirrus has a "hidden" DAC register that can be accessed by writing to
- * the pixel mask register to reset the state, then reading from the register
- * four times. The next write will then pass to the DAC
- */
-#define VGA_DAC_MASK 0x6
-
-#define WREG_HDR(v)						\
-	do {							\
-		RREG8(VGA_DAC_MASK);					\
-		RREG8(VGA_DAC_MASK);					\
-		RREG8(VGA_DAC_MASK);					\
-		RREG8(VGA_DAC_MASK);					\
-		WREG8(VGA_DAC_MASK, v);					\
-	} while (0)						\
-
-
-#define CIRRUS_MAX_FB_HEIGHT 4096
-#define CIRRUS_MAX_FB_WIDTH 4096
-
-#define CIRRUS_DPMS_CLEARED (-1)
-
-#define to_cirrus_crtc(x) container_of(x, struct cirrus_crtc, base)
-#define to_cirrus_encoder(x) container_of(x, struct cirrus_encoder, base)
-
-struct cirrus_crtc {
-	struct drm_crtc			base;
-	int				last_dpms;
-	bool				enabled;
-};
-
-struct cirrus_fbdev;
-struct cirrus_mode_info {
-	struct cirrus_crtc		*crtc;
-	/* pointer to fbdev info structure */
-	struct cirrus_fbdev		*gfbdev;
-};
-
-struct cirrus_encoder {
-	struct drm_encoder		base;
-	int				last_dpms;
-};
-
-struct cirrus_connector {
-	struct drm_connector		base;
-};
-
-struct cirrus_mc {
-	resource_size_t			vram_size;
-	resource_size_t			vram_base;
-};
-
-struct cirrus_device {
-	struct drm_device		*dev;
-	unsigned long			flags;
-
-	resource_size_t			rmmio_base;
-	resource_size_t			rmmio_size;
-	void __iomem			*rmmio;
-
-	struct cirrus_mc			mc;
-	struct cirrus_mode_info		mode_info;
-
-	int				num_crtc;
-	int fb_mtrr;
-
-	struct {
-		struct ttm_bo_device bdev;
-	} ttm;
-	bool mm_inited;
-};
-
-
-struct cirrus_fbdev {
-	struct drm_fb_helper helper; /* must be first */
-	struct drm_framebuffer *gfb;
-	void *sysram;
-	int size;
-	int x1, y1, x2, y2; /* dirty rect */
-	spinlock_t dirty_lock;
-};
-
-struct cirrus_bo {
-	struct ttm_buffer_object bo;
-	struct ttm_placement placement;
-	struct ttm_bo_kmap_obj kmap;
-	struct drm_gem_object gem;
-	struct ttm_place placements[3];
-	int pin_count;
-};
-#define gem_to_cirrus_bo(gobj) container_of((gobj), struct cirrus_bo, gem)
-
-static inline struct cirrus_bo *
-cirrus_bo(struct ttm_buffer_object *bo)
-{
-	return container_of(bo, struct cirrus_bo, bo);
-}
-
-
-#define to_cirrus_obj(x) container_of(x, struct cirrus_gem_object, base)
-
-				/* cirrus_main.c */
-int cirrus_device_init(struct cirrus_device *cdev,
-		      struct drm_device *ddev,
-		      struct pci_dev *pdev,
-		      uint32_t flags);
-void cirrus_device_fini(struct cirrus_device *cdev);
-void cirrus_gem_free_object(struct drm_gem_object *obj);
-int cirrus_dumb_mmap_offset(struct drm_file *file,
-			    struct drm_device *dev,
-			    uint32_t handle,
-			    uint64_t *offset);
-int cirrus_gem_create(struct drm_device *dev,
-		   u32 size, bool iskernel,
-		      struct drm_gem_object **obj);
-int cirrus_dumb_create(struct drm_file *file,
-		    struct drm_device *dev,
-		       struct drm_mode_create_dumb *args);
-
-int cirrus_framebuffer_init(struct drm_device *dev,
-			    struct drm_framebuffer *gfb,
-			    const struct drm_mode_fb_cmd2 *mode_cmd,
-			    struct drm_gem_object *obj);
-
-bool cirrus_check_framebuffer(struct cirrus_device *cdev, int width, int height,
-			      int bpp, int pitch);
-
-				/* cirrus_display.c */
-int cirrus_modeset_init(struct cirrus_device *cdev);
-void cirrus_modeset_fini(struct cirrus_device *cdev);
-
-				/* cirrus_fbdev.c */
-int cirrus_fbdev_init(struct cirrus_device *cdev);
-void cirrus_fbdev_fini(struct cirrus_device *cdev);
-
-
-
-				/* cirrus_irq.c */
-void cirrus_driver_irq_preinstall(struct drm_device *dev);
-int cirrus_driver_irq_postinstall(struct drm_device *dev);
-void cirrus_driver_irq_uninstall(struct drm_device *dev);
-irqreturn_t cirrus_driver_irq_handler(int irq, void *arg);
-
-				/* cirrus_kms.c */
-int cirrus_driver_load(struct drm_device *dev, unsigned long flags);
-void cirrus_driver_unload(struct drm_device *dev);
-extern struct drm_ioctl_desc cirrus_ioctls[];
-extern int cirrus_max_ioctl;
-
-int cirrus_mm_init(struct cirrus_device *cirrus);
-void cirrus_mm_fini(struct cirrus_device *cirrus);
-void cirrus_ttm_placement(struct cirrus_bo *bo, int domain);
-int cirrus_bo_create(struct drm_device *dev, int size, int align,
-		     uint32_t flags, struct cirrus_bo **pcirrusbo);
-int cirrus_mmap(struct file *filp, struct vm_area_struct *vma);
-
-static inline int cirrus_bo_reserve(struct cirrus_bo *bo, bool no_wait)
-{
-	int ret;
-
-	ret = ttm_bo_reserve(&bo->bo, true, no_wait, NULL);
-	if (ret) {
-		if (ret != -ERESTARTSYS && ret != -EBUSY)
-			DRM_ERROR("reserve failed %p\n", bo);
-		return ret;
-	}
-	return 0;
-}
-
-static inline void cirrus_bo_unreserve(struct cirrus_bo *bo)
-{
-	ttm_bo_unreserve(&bo->bo);
-}
-
-int cirrus_bo_push_sysram(struct cirrus_bo *bo);
-int cirrus_bo_pin(struct cirrus_bo *bo, u32 pl_flag, u64 *gpu_addr);
-
-extern int cirrus_bpp;
-
-#endif				/* __CIRRUS_DRV_H__ */
diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c
index 6e09f27fd9d6..4c7ad46fdd21 100644
--- a/drivers/gpu/drm/drm_agpsupport.c
+++ b/drivers/gpu/drm/drm_agpsupport.c
@@ -212,7 +212,7 @@ int drm_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request)
 	if (!entry)
 		return -ENOMEM;
 
-	pages = (request->size + PAGE_SIZE - 1) / PAGE_SIZE;
+	pages = DIV_ROUND_UP(request->size, PAGE_SIZE);
 	type = (u32) request->type;
 	memory = agp_allocate_memory(dev->agp->bridge, pages, type);
 	if (!memory) {
@@ -325,7 +325,7 @@ int drm_agp_bind(struct drm_device *dev, struct drm_agp_binding *request)
 	entry = drm_agp_lookup_entry(dev, request->handle);
 	if (!entry || entry->bound)
 		return -EINVAL;
-	page = (request->offset + PAGE_SIZE - 1) / PAGE_SIZE;
+	page = DIV_ROUND_UP(request->offset, PAGE_SIZE);
 	retcode = drm_bind_agp(entry->memory, page);
 	if (retcode)
 		return retcode;
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 14aeaf736321..d33691512a8e 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -251,7 +251,7 @@ EXPORT_SYMBOL(drm_atomic_state_clear);
  * @ref: This atomic state to deallocate
  *
  * This frees all memory associated with an atomic state, including all the
- * per-object state for planes, crtcs and connectors.
+ * per-object state for planes, CRTCs and connectors.
  */
 void __drm_atomic_state_free(struct kref *ref)
 {
@@ -272,12 +272,12 @@ void __drm_atomic_state_free(struct kref *ref)
 EXPORT_SYMBOL(__drm_atomic_state_free);
 
 /**
- * drm_atomic_get_crtc_state - get crtc state
+ * drm_atomic_get_crtc_state - get CRTC state
  * @state: global atomic state object
- * @crtc: crtc to get state object for
+ * @crtc: CRTC to get state object for
  *
- * This function returns the crtc state for the given crtc, allocating it if
- * needed. It will also grab the relevant crtc lock to make sure that the state
+ * This function returns the CRTC state for the given CRTC, allocating it if
+ * needed. It will also grab the relevant CRTC lock to make sure that the state
  * is consistent.
  *
  * Returns:
@@ -688,10 +688,12 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
  * associated state struct &drm_private_state.
  *
  * Similar to userspace-exposed objects, private state structures can be
- * acquired by calling drm_atomic_get_private_obj_state(). Since this function
- * does not take care of locking, drivers should wrap it for each type of
- * private state object they have with the required call to drm_modeset_lock()
- * for the corresponding &drm_modeset_lock.
+ * acquired by calling drm_atomic_get_private_obj_state(). This also takes care
+ * of locking, hence drivers should not have a need to call drm_modeset_lock()
+ * directly. Sequence of the actual hardware state commit is not handled,
+ * drivers might need to keep track of struct drm_crtc_commit within subclassed
+ * structure of &drm_private_state as necessary, e.g. similar to
+ * &drm_plane_state.commit. See also &drm_atomic_state.fake_commit.
  *
  * All private state structures contained in a &drm_atomic_state update can be
  * iterated using for_each_oldnew_private_obj_in_state(),
@@ -1016,14 +1018,14 @@ static void drm_atomic_connector_print_state(struct drm_printer *p,
 }
 
 /**
- * drm_atomic_add_affected_connectors - add connectors for crtc
+ * drm_atomic_add_affected_connectors - add connectors for CRTC
  * @state: atomic state
- * @crtc: DRM crtc
+ * @crtc: DRM CRTC
  *
  * This function walks the current configuration and adds all connectors
  * currently using @crtc to the atomic configuration @state. Note that this
  * function must acquire the connection mutex. This can potentially cause
- * unneeded seralization if the update is just for the planes on one crtc. Hence
+ * unneeded seralization if the update is just for the planes on one CRTC. Hence
  * drivers and helpers should only call this when really needed (e.g. when a
  * full modeset needs to happen due to some change).
  *
@@ -1076,9 +1078,9 @@ drm_atomic_add_affected_connectors(struct drm_atomic_state *state,
 EXPORT_SYMBOL(drm_atomic_add_affected_connectors);
 
 /**
- * drm_atomic_add_affected_planes - add planes for crtc
+ * drm_atomic_add_affected_planes - add planes for CRTC
  * @state: atomic state
- * @crtc: DRM crtc
+ * @crtc: DRM CRTC
  *
  * This function walks the current configuration and adds all planes
  * currently used by @crtc to the atomic configuration @state. This is useful
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 2dd2cd87cdbb..4511c2e07bb9 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -31,6 +31,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_atomic_uapi.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_device.h>
 #include <drm/drm_plane_helper.h>
@@ -97,17 +98,6 @@ drm_atomic_helper_plane_changed(struct drm_atomic_state *state,
 	}
 }
 
-/*
- * For connectors that support multiple encoders, either the
- * .atomic_best_encoder() or .best_encoder() operation must be implemented.
- */
-static struct drm_encoder *
-pick_single_encoder_for_connector(struct drm_connector *connector)
-{
-	WARN_ON(connector->encoder_ids[1]);
-	return drm_encoder_find(connector->dev, NULL, connector->encoder_ids[0]);
-}
-
 static int handle_conflicting_encoders(struct drm_atomic_state *state,
 				       bool disable_conflicting_encoders)
 {
@@ -135,7 +125,7 @@ static int handle_conflicting_encoders(struct drm_atomic_state *state,
 		else if (funcs->best_encoder)
 			new_encoder = funcs->best_encoder(connector);
 		else
-			new_encoder = pick_single_encoder_for_connector(connector);
+			new_encoder = drm_connector_get_single_encoder(connector);
 
 		if (new_encoder) {
 			if (encoder_mask & drm_encoder_mask(new_encoder)) {
@@ -160,8 +150,8 @@ static int handle_conflicting_encoders(struct drm_atomic_state *state,
 	 * is not set, an error is returned. Userspace can provide a solution
 	 * through the atomic ioctl.
 	 *
-	 * If the flag is set conflicting connectors are removed from the crtc
-	 * and the crtc is disabled if no encoder is left. This preserves
+	 * If the flag is set conflicting connectors are removed from the CRTC
+	 * and the CRTC is disabled if no encoder is left. This preserves
 	 * compatibility with the legacy set_config behavior.
 	 */
 	drm_connector_list_iter_begin(state->dev, &conn_iter);
@@ -230,7 +220,7 @@ set_best_encoder(struct drm_atomic_state *state,
 		crtc = conn_state->connector->state->crtc;
 
 		/* A NULL crtc is an error here because we should have
-		 *  duplicated a NULL best_encoder when crtc was NULL.
+		 * duplicated a NULL best_encoder when crtc was NULL.
 		 * As an exception restoring duplicated atomic state
 		 * during resume is allowed, so don't warn when
 		 * best_encoder is equal to encoder we intend to set.
@@ -359,7 +349,7 @@ update_connector_routing(struct drm_atomic_state *state,
 	else if (funcs->best_encoder)
 		new_encoder = funcs->best_encoder(connector);
 	else
-		new_encoder = pick_single_encoder_for_connector(connector);
+		new_encoder = drm_connector_get_single_encoder(connector);
 
 	if (!new_encoder) {
 		DRM_DEBUG_ATOMIC("No suitable encoder found for [CONNECTOR:%d:%s]\n",
@@ -429,6 +419,7 @@ mode_fixup(struct drm_atomic_state *state)
 	for_each_new_connector_in_state(state, connector, new_conn_state, i) {
 		const struct drm_encoder_helper_funcs *funcs;
 		struct drm_encoder *encoder;
+		struct drm_bridge *bridge;
 
 		WARN_ON(!!new_conn_state->best_encoder != !!new_conn_state->crtc);
 
@@ -445,8 +436,10 @@ mode_fixup(struct drm_atomic_state *state)
 		encoder = new_conn_state->best_encoder;
 		funcs = encoder->helper_private;
 
-		ret = drm_bridge_mode_fixup(encoder->bridge, &new_crtc_state->mode,
-				&new_crtc_state->adjusted_mode);
+		bridge = drm_bridge_chain_get_first_bridge(encoder);
+		ret = drm_bridge_chain_mode_fixup(bridge,
+					&new_crtc_state->mode,
+					&new_crtc_state->adjusted_mode);
 		if (!ret) {
 			DRM_DEBUG_ATOMIC("Bridge fixup failed\n");
 			return -EINVAL;
@@ -482,7 +475,7 @@ mode_fixup(struct drm_atomic_state *state)
 			continue;
 
 		funcs = crtc->helper_private;
-		if (!funcs->mode_fixup)
+		if (!funcs || !funcs->mode_fixup)
 			continue;
 
 		ret = funcs->mode_fixup(crtc, &new_crtc_state->mode,
@@ -502,6 +495,7 @@ static enum drm_mode_status mode_valid_path(struct drm_connector *connector,
 					    struct drm_crtc *crtc,
 					    const struct drm_display_mode *mode)
 {
+	struct drm_bridge *bridge;
 	enum drm_mode_status ret;
 
 	ret = drm_encoder_mode_valid(encoder, mode);
@@ -511,7 +505,8 @@ static enum drm_mode_status mode_valid_path(struct drm_connector *connector,
 		return ret;
 	}
 
-	ret = drm_bridge_mode_valid(encoder->bridge, mode);
+	bridge = drm_bridge_chain_get_first_bridge(encoder);
+	ret = drm_bridge_chain_mode_valid(bridge, mode);
 	if (ret != MODE_OK) {
 		DRM_DEBUG_ATOMIC("[BRIDGE] mode_valid() failed\n");
 		return ret;
@@ -566,27 +561,27 @@ mode_valid(struct drm_atomic_state *state)
  * @state: the driver state object
  *
  * Check the state object to see if the requested state is physically possible.
- * This does all the crtc and connector related computations for an atomic
+ * This does all the CRTC and connector related computations for an atomic
  * update and adds any additional connectors needed for full modesets. It calls
  * the various per-object callbacks in the follow order:
  *
  * 1. &drm_connector_helper_funcs.atomic_best_encoder for determining the new encoder.
  * 2. &drm_connector_helper_funcs.atomic_check to validate the connector state.
- * 3. If it's determined a modeset is needed then all connectors on the affected crtc
- *    crtc are added and &drm_connector_helper_funcs.atomic_check is run on them.
+ * 3. If it's determined a modeset is needed then all connectors on the affected
+ *    CRTC are added and &drm_connector_helper_funcs.atomic_check is run on them.
  * 4. &drm_encoder_helper_funcs.mode_valid, &drm_bridge_funcs.mode_valid and
  *    &drm_crtc_helper_funcs.mode_valid are called on the affected components.
  * 5. &drm_bridge_funcs.mode_fixup is called on all encoder bridges.
  * 6. &drm_encoder_helper_funcs.atomic_check is called to validate any encoder state.
- *    This function is only called when the encoder will be part of a configured crtc,
+ *    This function is only called when the encoder will be part of a configured CRTC,
  *    it must not be used for implementing connector property validation.
  *    If this function is NULL, &drm_atomic_encoder_helper_funcs.mode_fixup is called
  *    instead.
- * 7. &drm_crtc_helper_funcs.mode_fixup is called last, to fix up the mode with crtc constraints.
+ * 7. &drm_crtc_helper_funcs.mode_fixup is called last, to fix up the mode with CRTC constraints.
  *
  * &drm_crtc_state.mode_changed is set when the input mode is changed.
  * &drm_crtc_state.connectors_changed is set when a connector is added or
- * removed from the crtc.  &drm_crtc_state.active_changed is set when
+ * removed from the CRTC.  &drm_crtc_state.active_changed is set when
  * &drm_crtc_state.active changes, which is used for DPMS.
  * See also: drm_atomic_crtc_needs_modeset()
  *
@@ -697,7 +692,7 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 
 	/*
 	 * After all the routing has been prepared we need to add in any
-	 * connector which is itself unchanged, but whose crtc changes its
+	 * connector which is itself unchanged, but whose CRTC changes its
 	 * configuration. This must be done before calling mode_fixup in case a
 	 * crtc only changed its mode but has the same set of connectors.
 	 */
@@ -746,13 +741,13 @@ EXPORT_SYMBOL(drm_atomic_helper_check_modeset);
 /**
  * drm_atomic_helper_check_plane_state() - Check plane state for validity
  * @plane_state: plane state to check
- * @crtc_state: crtc state to check
+ * @crtc_state: CRTC state to check
  * @min_scale: minimum @src:@dest scaling factor in 16.16 fixed point
  * @max_scale: maximum @src:@dest scaling factor in 16.16 fixed point
  * @can_position: is it legal to position the plane such that it
- *                doesn't cover the entire crtc?  This will generally
+ *                doesn't cover the entire CRTC?  This will generally
  *                only be false for primary planes.
- * @can_update_disabled: can the plane be updated while the crtc
+ * @can_update_disabled: can the plane be updated while the CRTC
  *                       is disabled?
  *
  * Checks that a desired plane update is valid, and updates various
@@ -849,7 +844,7 @@ EXPORT_SYMBOL(drm_atomic_helper_check_plane_state);
  * &drm_crtc_helper_funcs.atomic_check and &drm_plane_helper_funcs.atomic_check
  * hooks provided by the driver.
  *
- * It also sets &drm_crtc_state.planes_changed to indicate that a crtc has
+ * It also sets &drm_crtc_state.planes_changed to indicate that a CRTC has
  * updated planes.
  *
  * RETURNS:
@@ -913,7 +908,7 @@ EXPORT_SYMBOL(drm_atomic_helper_check_planes);
  * @state: the driver state object
  *
  * Check the state object to see if the requested state is physically possible.
- * Only crtcs and planes have check callbacks, so for any additional (global)
+ * Only CRTCs and planes have check callbacks, so for any additional (global)
  * checking that a driver needs it can simply wrap that around this function.
  * Drivers without such needs can directly use this as their
  * &drm_mode_config_funcs.atomic_check callback.
@@ -966,14 +961,14 @@ crtc_needs_disable(struct drm_crtc_state *old_state,
 		   struct drm_crtc_state *new_state)
 {
 	/*
-	 * No new_state means the crtc is off, so the only criteria is whether
+	 * No new_state means the CRTC is off, so the only criteria is whether
 	 * it's currently active or in self refresh mode.
 	 */
 	if (!new_state)
 		return drm_atomic_crtc_effectively_active(old_state);
 
 	/*
-	 * We need to run through the crtc_funcs->disable() function if the crtc
+	 * We need to run through the crtc_funcs->disable() function if the CRTC
 	 * is currently on, if it's transitioning to self refresh mode, or if
 	 * it's in self refresh mode and needs to be fully disabled.
 	 */
@@ -994,6 +989,7 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 	for_each_oldnew_connector_in_state(old_state, connector, old_conn_state, new_conn_state, i) {
 		const struct drm_encoder_helper_funcs *funcs;
 		struct drm_encoder *encoder;
+		struct drm_bridge *bridge;
 
 		/* Shut down everything that's in the changeset and currently
 		 * still on. So need to check the old, saved state. */
@@ -1030,7 +1026,8 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 		 * Each encoder has at most one connector (since we always steal
 		 * it away), so we won't call disable hooks twice.
 		 */
-		drm_atomic_bridge_disable(encoder->bridge, old_state);
+		bridge = drm_bridge_chain_get_first_bridge(encoder);
+		drm_atomic_bridge_chain_disable(bridge, old_state);
 
 		/* Right function depends upon target state. */
 		if (funcs) {
@@ -1044,7 +1041,7 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 				funcs->dpms(encoder, DRM_MODE_DPMS_OFF);
 		}
 
-		drm_atomic_bridge_post_disable(encoder->bridge, old_state);
+		drm_atomic_bridge_chain_post_disable(bridge, old_state);
 	}
 
 	for_each_oldnew_crtc_in_state(old_state, crtc, old_crtc_state, new_crtc_state, i) {
@@ -1090,7 +1087,7 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
  * @old_state: atomic state object with old state structures
  *
  * This function updates all the various legacy modeset state pointers in
- * connectors, encoders and crtcs. It also updates the timestamping constants
+ * connectors, encoders and CRTCs. It also updates the timestamping constants
  * used for precise vblank timestamps by calling
  * drm_calc_timestamping_constants().
  *
@@ -1198,6 +1195,7 @@ crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *old_state)
 		const struct drm_encoder_helper_funcs *funcs;
 		struct drm_encoder *encoder;
 		struct drm_display_mode *mode, *adjusted_mode;
+		struct drm_bridge *bridge;
 
 		if (!new_conn_state->best_encoder)
 			continue;
@@ -1225,7 +1223,8 @@ crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *old_state)
 			funcs->mode_set(encoder, mode, adjusted_mode);
 		}
 
-		drm_bridge_mode_set(encoder->bridge, mode, adjusted_mode);
+		bridge = drm_bridge_chain_get_first_bridge(encoder);
+		drm_bridge_chain_mode_set(bridge, mode, adjusted_mode);
 	}
 }
 
@@ -1237,7 +1236,7 @@ crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *old_state)
  * This function shuts down all the outputs that need to be shut down and
  * prepares them (if required) with the new mode.
  *
- * For compatibility with legacy crtc helpers this should be called before
+ * For compatibility with legacy CRTC helpers this should be called before
  * drm_atomic_helper_commit_planes(), which is what the default commit function
  * does. But drivers with different needs can group the modeset commits together
  * and do the plane commits at the end. This is useful for drivers doing runtime
@@ -1283,7 +1282,7 @@ static void drm_atomic_helper_commit_writebacks(struct drm_device *dev,
  * This function enables all the outputs with the new configuration which had to
  * be turned off for the update.
  *
- * For compatibility with legacy crtc helpers this should be called after
+ * For compatibility with legacy CRTC helpers this should be called after
  * drm_atomic_helper_commit_planes(), which is what the default commit function
  * does. But drivers with different needs can group the modeset commits together
  * and do the plane commits at the end. This is useful for drivers doing runtime
@@ -1324,6 +1323,7 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev,
 	for_each_new_connector_in_state(old_state, connector, new_conn_state, i) {
 		const struct drm_encoder_helper_funcs *funcs;
 		struct drm_encoder *encoder;
+		struct drm_bridge *bridge;
 
 		if (!new_conn_state->best_encoder)
 			continue;
@@ -1342,7 +1342,8 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev,
 		 * Each encoder has at most one connector (since we always steal
 		 * it away), so we won't call enable hooks twice.
 		 */
-		drm_atomic_bridge_pre_enable(encoder->bridge, old_state);
+		bridge = drm_bridge_chain_get_first_bridge(encoder);
+		drm_atomic_bridge_chain_pre_enable(bridge, old_state);
 
 		if (funcs) {
 			if (funcs->atomic_enable)
@@ -1353,7 +1354,7 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev,
 				funcs->commit(encoder);
 		}
 
-		drm_atomic_bridge_enable(encoder->bridge, old_state);
+		drm_atomic_bridge_chain_enable(bridge, old_state);
 	}
 
 	drm_atomic_helper_commit_writebacks(dev, old_state);
@@ -1413,12 +1414,12 @@ int drm_atomic_helper_wait_for_fences(struct drm_device *dev,
 EXPORT_SYMBOL(drm_atomic_helper_wait_for_fences);
 
 /**
- * drm_atomic_helper_wait_for_vblanks - wait for vblank on crtcs
+ * drm_atomic_helper_wait_for_vblanks - wait for vblank on CRTCs
  * @dev: DRM device
  * @old_state: atomic state object with old state structures
  *
- * Helper to, after atomic commit, wait for vblanks on all effected
- * crtcs (ie. before cleaning up old framebuffers using
+ * Helper to, after atomic commit, wait for vblanks on all affected
+ * CRTCs (ie. before cleaning up old framebuffers using
  * drm_atomic_helper_cleanup_planes()). It will only wait on CRTCs where the
  * framebuffers have actually changed to optimize for the legacy cursor and
  * plane update use-case.
@@ -1477,10 +1478,10 @@ EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks);
  * @dev: DRM device
  * @old_state: atomic state object with old state structures
  *
- * Helper to, after atomic commit, wait for page flips on all effected
+ * Helper to, after atomic commit, wait for page flips on all affected
  * crtcs (ie. before cleaning up old framebuffers using
  * drm_atomic_helper_cleanup_planes()). Compared to
- * drm_atomic_helper_wait_for_vblanks() this waits for the completion of on all
+ * drm_atomic_helper_wait_for_vblanks() this waits for the completion on all
  * CRTCs, assuming that cursors-only updates are signalling their completion
  * immediately (or using a different path).
  *
@@ -1844,17 +1845,21 @@ EXPORT_SYMBOL(drm_atomic_helper_commit);
 /**
  * DOC: implementing nonblocking commit
  *
- * Nonblocking atomic commits have to be implemented in the following sequence:
+ * Nonblocking atomic commits should use struct &drm_crtc_commit to sequence
+ * different operations against each another. Locks, especially struct
+ * &drm_modeset_lock, should not be held in worker threads or any other
+ * asynchronous context used to commit the hardware state.
+ *
+ * drm_atomic_helper_commit() implements the recommended sequence for
+ * nonblocking commits, using drm_atomic_helper_setup_commit() internally:
  *
- * 1. Run drm_atomic_helper_prepare_planes() first. This is the only function
- * which commit needs to call which can fail, so we want to run it first and
+ * 1. Run drm_atomic_helper_prepare_planes(). Since this can fail and we
+ * need to propagate out of memory/VRAM errors to userspace, it must be called
  * synchronously.
  *
  * 2. Synchronize with any outstanding nonblocking commit worker threads which
- * might be affected the new state update. This can be done by either cancelling
- * or flushing the work items, depending upon whether the driver can deal with
- * cancelled updates. Note that it is important to ensure that the framebuffer
- * cleanup is still done when cancelling.
+ * might be affected by the new state update. This is handled by
+ * drm_atomic_helper_setup_commit().
  *
  * Asynchronous workers need to have sufficient parallelism to be able to run
  * different atomic commits on different CRTCs in parallel. The simplest way to
@@ -1865,21 +1870,29 @@ EXPORT_SYMBOL(drm_atomic_helper_commit);
  * must be done as one global operation, and enabling or disabling a CRTC can
  * take a long time. But even that is not required.
  *
+ * IMPORTANT: A &drm_atomic_state update for multiple CRTCs is sequenced
+ * against all CRTCs therein. Therefore for atomic state updates which only flip
+ * planes the driver must not get the struct &drm_crtc_state of unrelated CRTCs
+ * in its atomic check code: This would prevent committing of atomic updates to
+ * multiple CRTCs in parallel. In general, adding additional state structures
+ * should be avoided as much as possible, because this reduces parallelism in
+ * (nonblocking) commits, both due to locking and due to commit sequencing
+ * requirements.
+ *
  * 3. The software state is updated synchronously with
  * drm_atomic_helper_swap_state(). Doing this under the protection of all modeset
- * locks means concurrent callers never see inconsistent state. And doing this
- * while it's guaranteed that no relevant nonblocking worker runs means that
- * nonblocking workers do not need grab any locks. Actually they must not grab
- * locks, for otherwise the work flushing will deadlock.
+ * locks means concurrent callers never see inconsistent state. Note that commit
+ * workers do not hold any locks; their access is only coordinated through
+ * ordering. If workers would access state only through the pointers in the
+ * free-standing state objects (currently not the case for any driver) then even
+ * multiple pending commits could be in-flight at the same time.
  *
  * 4. Schedule a work item to do all subsequent steps, using the split-out
  * commit helpers: a) pre-plane commit b) plane commit c) post-plane commit and
  * then cleaning up the framebuffers after the old framebuffer is no longer
- * being displayed.
- *
- * The above scheme is implemented in the atomic helper libraries in
- * drm_atomic_helper_commit() using a bunch of helper functions. See
- * drm_atomic_helper_setup_commit() for a starting point.
+ * being displayed. The scheduled work should synchronize against other workers
+ * using the &drm_crtc_commit infrastructure as needed. See
+ * drm_atomic_helper_setup_commit() for more details.
  */
 
 static int stall_checks(struct drm_crtc *crtc, bool nonblock)
@@ -2108,7 +2121,7 @@ EXPORT_SYMBOL(drm_atomic_helper_setup_commit);
  *
  * This function waits for all preceeding commits that touch the same CRTC as
  * @old_state to both be committed to the hardware (as signalled by
- * drm_atomic_helper_commit_hw_done) and executed by the hardware (as signalled
+ * drm_atomic_helper_commit_hw_done()) and executed by the hardware (as signalled
  * by calling drm_crtc_send_vblank_event() on the &drm_crtc_state.event).
  *
  * This is part of the atomic helper support for nonblocking commits, see
@@ -2195,7 +2208,7 @@ EXPORT_SYMBOL(drm_atomic_helper_wait_for_dependencies);
  * drm_atomic_helper_fake_vblank - fake VBLANK events if needed
  * @old_state: atomic state object with old state structures
  *
- * This function walks all CRTCs and fake VBLANK events on those with
+ * This function walks all CRTCs and fakes VBLANK events on those with
  * &drm_crtc_state.no_vblank set to true and &drm_crtc_state.event != NULL.
  * The primary use of this function is writeback connectors working in oneshot
  * mode and faking VBLANK events. In this case they only fake the VBLANK event
@@ -2391,7 +2404,7 @@ static bool plane_crtc_active(const struct drm_plane_state *state)
  * @flags: flags for committing plane state
  *
  * This function commits the new plane state using the plane and atomic helper
- * functions for planes and crtcs. It assumes that the atomic state has already
+ * functions for planes and CRTCs. It assumes that the atomic state has already
  * been pushed into the relevant object state pointers, since this step can no
  * longer fail.
  *
@@ -2512,15 +2525,15 @@ void drm_atomic_helper_commit_planes(struct drm_device *dev,
 EXPORT_SYMBOL(drm_atomic_helper_commit_planes);
 
 /**
- * drm_atomic_helper_commit_planes_on_crtc - commit plane state for a crtc
- * @old_crtc_state: atomic state object with the old crtc state
+ * drm_atomic_helper_commit_planes_on_crtc - commit plane state for a CRTC
+ * @old_crtc_state: atomic state object with the old CRTC state
  *
  * This function commits the new plane state using the plane and atomic helper
- * functions for planes on the specific crtc. It assumes that the atomic state
+ * functions for planes on the specific CRTC. It assumes that the atomic state
  * has already been pushed into the relevant object state pointers, since this
  * step can no longer fail.
  *
- * This function is useful when plane updates should be done crtc-by-crtc
+ * This function is useful when plane updates should be done CRTC-by-CRTC
  * instead of one global step like drm_atomic_helper_commit_planes() does.
  *
  * This function can only be savely used when planes are not allowed to move
@@ -2810,10 +2823,10 @@ EXPORT_SYMBOL(drm_atomic_helper_swap_state);
  * @plane: plane object to update
  * @crtc: owning CRTC of owning plane
  * @fb: framebuffer to flip onto plane
- * @crtc_x: x offset of primary plane on crtc
- * @crtc_y: y offset of primary plane on crtc
- * @crtc_w: width of primary plane rectangle on crtc
- * @crtc_h: height of primary plane rectangle on crtc
+ * @crtc_x: x offset of primary plane on @crtc
+ * @crtc_y: y offset of primary plane on @crtc
+ * @crtc_w: width of primary plane rectangle on @crtc
+ * @crtc_h: height of primary plane rectangle on @crtc
  * @src_x: x offset of @fb for panning
  * @src_y: y offset of @fb for panning
  * @src_w: width of source rectangle in @fb
@@ -2919,7 +2932,7 @@ EXPORT_SYMBOL(drm_atomic_helper_disable_plane);
  * @set: mode set configuration
  * @ctx: lock acquisition context
  *
- * Provides a default crtc set_config handler using the atomic driver interface.
+ * Provides a default CRTC set_config handler using the atomic driver interface.
  *
  * NOTE: For backwards compatibility with old userspace this automatically
  * resets the "link-status" property to GOOD, to force any link
@@ -3332,7 +3345,7 @@ static int page_flip_common(struct drm_atomic_state *state,
 
 /**
  * drm_atomic_helper_page_flip - execute a legacy page flip
- * @crtc: DRM crtc
+ * @crtc: DRM CRTC
  * @fb: DRM framebuffer
  * @event: optional DRM event to signal upon completion
  * @flags: flip flags for non-vblank sync'ed updates
@@ -3376,7 +3389,7 @@ EXPORT_SYMBOL(drm_atomic_helper_page_flip);
 
 /**
  * drm_atomic_helper_page_flip_target - do page flip on target vblank period.
- * @crtc: DRM crtc
+ * @crtc: DRM CRTC
  * @fb: DRM framebuffer
  * @event: optional DRM event to signal upon completion
  * @flags: flip flags for non-vblank sync'ed updates
diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index d0a937fb0c56..7cf3cf936547 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -58,6 +58,22 @@
  */
 
 /**
+ * __drm_atomic_helper_crtc_state_reset - reset the CRTC state
+ * @crtc_state: atomic CRTC state, must not be NULL
+ * @crtc: CRTC object, must not be NULL
+ *
+ * Initializes the newly allocated @crtc_state with default
+ * values. This is useful for drivers that subclass the CRTC state.
+ */
+void
+__drm_atomic_helper_crtc_state_reset(struct drm_crtc_state *crtc_state,
+				     struct drm_crtc *crtc)
+{
+	crtc_state->crtc = crtc;
+}
+EXPORT_SYMBOL(__drm_atomic_helper_crtc_state_reset);
+
+/**
  * __drm_atomic_helper_crtc_reset - reset state on CRTC
  * @crtc: drm CRTC
  * @crtc_state: CRTC state to assign
@@ -74,7 +90,7 @@ __drm_atomic_helper_crtc_reset(struct drm_crtc *crtc,
 			       struct drm_crtc_state *crtc_state)
 {
 	if (crtc_state)
-		crtc_state->crtc = crtc;
+		__drm_atomic_helper_crtc_state_reset(crtc_state, crtc);
 
 	crtc->state = crtc_state;
 }
@@ -212,23 +228,43 @@ void drm_atomic_helper_crtc_destroy_state(struct drm_crtc *crtc,
 EXPORT_SYMBOL(drm_atomic_helper_crtc_destroy_state);
 
 /**
- * __drm_atomic_helper_plane_reset - resets planes state to default values
+ * __drm_atomic_helper_plane_state_reset - resets plane state to default values
+ * @plane_state: atomic plane state, must not be NULL
  * @plane: plane object, must not be NULL
- * @state: atomic plane state, must not be NULL
  *
- * Initializes plane state to default. This is useful for drivers that subclass
- * the plane state.
+ * Initializes the newly allocated @plane_state with default
+ * values. This is useful for drivers that subclass the CRTC state.
  */
-void __drm_atomic_helper_plane_reset(struct drm_plane *plane,
-				     struct drm_plane_state *state)
+void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state,
+					   struct drm_plane *plane)
 {
-	state->plane = plane;
-	state->rotation = DRM_MODE_ROTATE_0;
+	plane_state->plane = plane;
+	plane_state->rotation = DRM_MODE_ROTATE_0;
 
-	state->alpha = DRM_BLEND_ALPHA_OPAQUE;
-	state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI;
+	plane_state->alpha = DRM_BLEND_ALPHA_OPAQUE;
+	plane_state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI;
+}
+EXPORT_SYMBOL(__drm_atomic_helper_plane_state_reset);
 
-	plane->state = state;
+/**
+ * __drm_atomic_helper_plane_reset - reset state on plane
+ * @plane: drm plane
+ * @plane_state: plane state to assign
+ *
+ * Initializes the newly allocated @plane_state and assigns it to
+ * the &drm_crtc->state pointer of @plane, usually required when
+ * initializing the drivers or when called from the &drm_plane_funcs.reset
+ * hook.
+ *
+ * This is useful for drivers that subclass the plane state.
+ */
+void __drm_atomic_helper_plane_reset(struct drm_plane *plane,
+				     struct drm_plane_state *plane_state)
+{
+	if (plane_state)
+		__drm_atomic_helper_plane_state_reset(plane_state, plane);
+
+	plane->state = plane_state;
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_reset);
 
@@ -336,6 +372,22 @@ void drm_atomic_helper_plane_destroy_state(struct drm_plane *plane,
 EXPORT_SYMBOL(drm_atomic_helper_plane_destroy_state);
 
 /**
+ * __drm_atomic_helper_connector_state_reset - reset the connector state
+ * @conn_state: atomic connector state, must not be NULL
+ * @connector: connectotr object, must not be NULL
+ *
+ * Initializes the newly allocated @conn_state with default
+ * values. This is useful for drivers that subclass the connector state.
+ */
+void
+__drm_atomic_helper_connector_state_reset(struct drm_connector_state *conn_state,
+					  struct drm_connector *connector)
+{
+	conn_state->connector = connector;
+}
+EXPORT_SYMBOL(__drm_atomic_helper_connector_state_reset);
+
+/**
  * __drm_atomic_helper_connector_reset - reset state on connector
  * @connector: drm connector
  * @conn_state: connector state to assign
@@ -352,7 +404,7 @@ __drm_atomic_helper_connector_reset(struct drm_connector *connector,
 				    struct drm_connector_state *conn_state)
 {
 	if (conn_state)
-		conn_state->connector = connector;
+		__drm_atomic_helper_connector_state_reset(conn_state, connector);
 
 	connector->state = conn_state;
 }
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index 7a26bfb5329c..a1e5e262bae2 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -160,12 +160,12 @@ int drm_atomic_set_mode_prop_for_crtc(struct drm_crtc_state *state,
 EXPORT_SYMBOL(drm_atomic_set_mode_prop_for_crtc);
 
 /**
- * drm_atomic_set_crtc_for_plane - set crtc for plane
+ * drm_atomic_set_crtc_for_plane - set CRTC for plane
  * @plane_state: the plane whose incoming state to update
- * @crtc: crtc to use for the plane
+ * @crtc: CRTC to use for the plane
  *
- * Changing the assigned crtc for a plane requires us to grab the lock and state
- * for the new crtc, as needed. This function takes care of all these details
+ * Changing the assigned CRTC for a plane requires us to grab the lock and state
+ * for the new CRTC, as needed. This function takes care of all these details
  * besides updating the pointer in the state object itself.
  *
  * Returns:
@@ -279,12 +279,12 @@ drm_atomic_set_fence_for_plane(struct drm_plane_state *plane_state,
 EXPORT_SYMBOL(drm_atomic_set_fence_for_plane);
 
 /**
- * drm_atomic_set_crtc_for_connector - set crtc for connector
+ * drm_atomic_set_crtc_for_connector - set CRTC for connector
  * @conn_state: atomic state object for the connector
- * @crtc: crtc to use for the connector
+ * @crtc: CRTC to use for the connector
  *
- * Changing the assigned crtc for a connector requires us to grab the lock and
- * state for the new crtc, as needed. This function takes care of all these
+ * Changing the assigned CRTC for a connector requires us to grab the lock and
+ * state for the new CRTC, as needed. This function takes care of all these
  * details besides updating the pointer in the state object itself.
  *
  * Returns:
@@ -1405,7 +1405,7 @@ retry:
 	} else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) {
 		ret = drm_atomic_nonblocking_commit(state);
 	} else {
-		if (unlikely(drm_debug & DRM_UT_STATE))
+		if (drm_debug_enabled(DRM_UT_STATE))
 			drm_atomic_print_state(state);
 
 		ret = drm_atomic_commit(state);
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index 37ac168fcb60..121481f6aa71 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -130,7 +130,12 @@
  *	Z position is set up with drm_plane_create_zpos_immutable_property() and
  *	drm_plane_create_zpos_property(). It controls the visibility of overlapping
  *	planes. Without this property the primary plane is always below the cursor
- *	plane, and ordering between all other planes is undefined.
+ *	plane, and ordering between all other planes is undefined. The positive
+ *	Z axis points towards the user, i.e. planes with lower Z position values
+ *	are underneath planes with higher Z position values. Two planes with the
+ *	same Z position value have undefined ordering. Note that the Z position
+ *	value can also be immutable, to inform userspace about the hard-coded
+ *	stacking of planes, see drm_plane_create_zpos_immutable_property().
  *
  * pixel blend mode:
  *	Pixel blend mode is set up with drm_plane_create_blend_mode_property().
diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
index cba537c99e43..c2cf0c90fa26 100644
--- a/drivers/gpu/drm/drm_bridge.c
+++ b/drivers/gpu/drm/drm_bridge.c
@@ -55,7 +55,7 @@
  * just provide additional hooks to get the desired output at the end of the
  * encoder chain.
  *
- * Bridges can also be chained up using the &drm_bridge.next pointer.
+ * Bridges can also be chained up using the &drm_bridge.chain_node field.
  *
  * Both legacy CRTC helpers and the new atomic modeset helpers support bridges.
  */
@@ -128,20 +128,21 @@ int drm_bridge_attach(struct drm_encoder *encoder, struct drm_bridge *bridge,
 	bridge->dev = encoder->dev;
 	bridge->encoder = encoder;
 
+	if (previous)
+		list_add(&bridge->chain_node, &previous->chain_node);
+	else
+		list_add(&bridge->chain_node, &encoder->bridge_chain);
+
 	if (bridge->funcs->attach) {
 		ret = bridge->funcs->attach(bridge);
 		if (ret < 0) {
+			list_del(&bridge->chain_node);
 			bridge->dev = NULL;
 			bridge->encoder = NULL;
 			return ret;
 		}
 	}
 
-	if (previous)
-		previous->next = bridge;
-	else
-		encoder->bridge = bridge;
-
 	return 0;
 }
 EXPORT_SYMBOL(drm_bridge_attach);
@@ -157,6 +158,7 @@ void drm_bridge_detach(struct drm_bridge *bridge)
 	if (bridge->funcs->detach)
 		bridge->funcs->detach(bridge);
 
+	list_del(&bridge->chain_node);
 	bridge->dev = NULL;
 }
 
@@ -172,8 +174,8 @@ void drm_bridge_detach(struct drm_bridge *bridge)
  */
 
 /**
- * drm_bridge_mode_fixup - fixup proposed mode for all bridges in the
- *			   encoder chain
+ * drm_bridge_chain_mode_fixup - fixup proposed mode for all bridges in the
+ *				 encoder chain
  * @bridge: bridge control structure
  * @mode: desired mode to be set for the bridge
  * @adjusted_mode: updated mode that works for this bridge
@@ -186,27 +188,31 @@ void drm_bridge_detach(struct drm_bridge *bridge)
  * RETURNS:
  * true on success, false on failure
  */
-bool drm_bridge_mode_fixup(struct drm_bridge *bridge,
-			const struct drm_display_mode *mode,
-			struct drm_display_mode *adjusted_mode)
+bool drm_bridge_chain_mode_fixup(struct drm_bridge *bridge,
+				 const struct drm_display_mode *mode,
+				 struct drm_display_mode *adjusted_mode)
 {
-	bool ret = true;
+	struct drm_encoder *encoder;
 
 	if (!bridge)
 		return true;
 
-	if (bridge->funcs->mode_fixup)
-		ret = bridge->funcs->mode_fixup(bridge, mode, adjusted_mode);
+	encoder = bridge->encoder;
+	list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) {
+		if (!bridge->funcs->mode_fixup)
+			continue;
 
-	ret = ret && drm_bridge_mode_fixup(bridge->next, mode, adjusted_mode);
+		if (!bridge->funcs->mode_fixup(bridge, mode, adjusted_mode))
+			return false;
+	}
 
-	return ret;
+	return true;
 }
-EXPORT_SYMBOL(drm_bridge_mode_fixup);
+EXPORT_SYMBOL(drm_bridge_chain_mode_fixup);
 
 /**
- * drm_bridge_mode_valid - validate the mode against all bridges in the
- * 			   encoder chain.
+ * drm_bridge_chain_mode_valid - validate the mode against all bridges in the
+ *				 encoder chain.
  * @bridge: bridge control structure
  * @mode: desired mode to be validated
  *
@@ -219,26 +225,33 @@ EXPORT_SYMBOL(drm_bridge_mode_fixup);
  * RETURNS:
  * MODE_OK on success, drm_mode_status Enum error code on failure
  */
-enum drm_mode_status drm_bridge_mode_valid(struct drm_bridge *bridge,
-					   const struct drm_display_mode *mode)
+enum drm_mode_status
+drm_bridge_chain_mode_valid(struct drm_bridge *bridge,
+			    const struct drm_display_mode *mode)
 {
-	enum drm_mode_status ret = MODE_OK;
+	struct drm_encoder *encoder;
 
 	if (!bridge)
-		return ret;
+		return MODE_OK;
 
-	if (bridge->funcs->mode_valid)
-		ret = bridge->funcs->mode_valid(bridge, mode);
+	encoder = bridge->encoder;
+	list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) {
+		enum drm_mode_status ret;
 
-	if (ret != MODE_OK)
-		return ret;
+		if (!bridge->funcs->mode_valid)
+			continue;
 
-	return drm_bridge_mode_valid(bridge->next, mode);
+		ret = bridge->funcs->mode_valid(bridge, mode);
+		if (ret != MODE_OK)
+			return ret;
+	}
+
+	return MODE_OK;
 }
-EXPORT_SYMBOL(drm_bridge_mode_valid);
+EXPORT_SYMBOL(drm_bridge_chain_mode_valid);
 
 /**
- * drm_bridge_disable - disables all bridges in the encoder chain
+ * drm_bridge_chain_disable - disables all bridges in the encoder chain
  * @bridge: bridge control structure
  *
  * Calls &drm_bridge_funcs.disable op for all the bridges in the encoder
@@ -247,20 +260,28 @@ EXPORT_SYMBOL(drm_bridge_mode_valid);
  *
  * Note: the bridge passed should be the one closest to the encoder
  */
-void drm_bridge_disable(struct drm_bridge *bridge)
+void drm_bridge_chain_disable(struct drm_bridge *bridge)
 {
+	struct drm_encoder *encoder;
+	struct drm_bridge *iter;
+
 	if (!bridge)
 		return;
 
-	drm_bridge_disable(bridge->next);
+	encoder = bridge->encoder;
+	list_for_each_entry_reverse(iter, &encoder->bridge_chain, chain_node) {
+		if (iter->funcs->disable)
+			iter->funcs->disable(iter);
 
-	if (bridge->funcs->disable)
-		bridge->funcs->disable(bridge);
+		if (iter == bridge)
+			break;
+	}
 }
-EXPORT_SYMBOL(drm_bridge_disable);
+EXPORT_SYMBOL(drm_bridge_chain_disable);
 
 /**
- * drm_bridge_post_disable - cleans up after disabling all bridges in the encoder chain
+ * drm_bridge_chain_post_disable - cleans up after disabling all bridges in the
+ *				   encoder chain
  * @bridge: bridge control structure
  *
  * Calls &drm_bridge_funcs.post_disable op for all the bridges in the
@@ -269,47 +290,53 @@ EXPORT_SYMBOL(drm_bridge_disable);
  *
  * Note: the bridge passed should be the one closest to the encoder
  */
-void drm_bridge_post_disable(struct drm_bridge *bridge)
+void drm_bridge_chain_post_disable(struct drm_bridge *bridge)
 {
+	struct drm_encoder *encoder;
+
 	if (!bridge)
 		return;
 
-	if (bridge->funcs->post_disable)
-		bridge->funcs->post_disable(bridge);
-
-	drm_bridge_post_disable(bridge->next);
+	encoder = bridge->encoder;
+	list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) {
+		if (bridge->funcs->post_disable)
+			bridge->funcs->post_disable(bridge);
+	}
 }
-EXPORT_SYMBOL(drm_bridge_post_disable);
+EXPORT_SYMBOL(drm_bridge_chain_post_disable);
 
 /**
- * drm_bridge_mode_set - set proposed mode for all bridges in the
- *			 encoder chain
+ * drm_bridge_chain_mode_set - set proposed mode for all bridges in the
+ *			       encoder chain
  * @bridge: bridge control structure
- * @mode: desired mode to be set for the bridge
- * @adjusted_mode: updated mode that works for this bridge
+ * @mode: desired mode to be set for the encoder chain
+ * @adjusted_mode: updated mode that works for this encoder chain
  *
  * Calls &drm_bridge_funcs.mode_set op for all the bridges in the
  * encoder chain, starting from the first bridge to the last.
  *
  * Note: the bridge passed should be the one closest to the encoder
  */
-void drm_bridge_mode_set(struct drm_bridge *bridge,
-			 const struct drm_display_mode *mode,
-			 const struct drm_display_mode *adjusted_mode)
+void drm_bridge_chain_mode_set(struct drm_bridge *bridge,
+			       const struct drm_display_mode *mode,
+			       const struct drm_display_mode *adjusted_mode)
 {
+	struct drm_encoder *encoder;
+
 	if (!bridge)
 		return;
 
-	if (bridge->funcs->mode_set)
-		bridge->funcs->mode_set(bridge, mode, adjusted_mode);
-
-	drm_bridge_mode_set(bridge->next, mode, adjusted_mode);
+	encoder = bridge->encoder;
+	list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) {
+		if (bridge->funcs->mode_set)
+			bridge->funcs->mode_set(bridge, mode, adjusted_mode);
+	}
 }
-EXPORT_SYMBOL(drm_bridge_mode_set);
+EXPORT_SYMBOL(drm_bridge_chain_mode_set);
 
 /**
- * drm_bridge_pre_enable - prepares for enabling all
- *			   bridges in the encoder chain
+ * drm_bridge_chain_pre_enable - prepares for enabling all bridges in the
+ *				 encoder chain
  * @bridge: bridge control structure
  *
  * Calls &drm_bridge_funcs.pre_enable op for all the bridges in the encoder
@@ -318,20 +345,24 @@ EXPORT_SYMBOL(drm_bridge_mode_set);
  *
  * Note: the bridge passed should be the one closest to the encoder
  */
-void drm_bridge_pre_enable(struct drm_bridge *bridge)
+void drm_bridge_chain_pre_enable(struct drm_bridge *bridge)
 {
+	struct drm_encoder *encoder;
+	struct drm_bridge *iter;
+
 	if (!bridge)
 		return;
 
-	drm_bridge_pre_enable(bridge->next);
-
-	if (bridge->funcs->pre_enable)
-		bridge->funcs->pre_enable(bridge);
+	encoder = bridge->encoder;
+	list_for_each_entry_reverse(iter, &encoder->bridge_chain, chain_node) {
+		if (iter->funcs->pre_enable)
+			iter->funcs->pre_enable(iter);
+	}
 }
-EXPORT_SYMBOL(drm_bridge_pre_enable);
+EXPORT_SYMBOL(drm_bridge_chain_pre_enable);
 
 /**
- * drm_bridge_enable - enables all bridges in the encoder chain
+ * drm_bridge_chain_enable - enables all bridges in the encoder chain
  * @bridge: bridge control structure
  *
  * Calls &drm_bridge_funcs.enable op for all the bridges in the encoder
@@ -340,22 +371,25 @@ EXPORT_SYMBOL(drm_bridge_pre_enable);
  *
  * Note that the bridge passed should be the one closest to the encoder
  */
-void drm_bridge_enable(struct drm_bridge *bridge)
+void drm_bridge_chain_enable(struct drm_bridge *bridge)
 {
+	struct drm_encoder *encoder;
+
 	if (!bridge)
 		return;
 
-	if (bridge->funcs->enable)
-		bridge->funcs->enable(bridge);
-
-	drm_bridge_enable(bridge->next);
+	encoder = bridge->encoder;
+	list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) {
+		if (bridge->funcs->enable)
+			bridge->funcs->enable(bridge);
+	}
 }
-EXPORT_SYMBOL(drm_bridge_enable);
+EXPORT_SYMBOL(drm_bridge_chain_enable);
 
 /**
- * drm_atomic_bridge_disable - disables all bridges in the encoder chain
+ * drm_atomic_bridge_chain_disable - disables all bridges in the encoder chain
  * @bridge: bridge control structure
- * @state: atomic state being committed
+ * @old_state: old atomic state
  *
  * Calls &drm_bridge_funcs.atomic_disable (falls back on
  * &drm_bridge_funcs.disable) op for all the bridges in the encoder chain,
@@ -364,26 +398,33 @@ EXPORT_SYMBOL(drm_bridge_enable);
  *
  * Note: the bridge passed should be the one closest to the encoder
  */
-void drm_atomic_bridge_disable(struct drm_bridge *bridge,
-			       struct drm_atomic_state *state)
+void drm_atomic_bridge_chain_disable(struct drm_bridge *bridge,
+				     struct drm_atomic_state *old_state)
 {
+	struct drm_encoder *encoder;
+	struct drm_bridge *iter;
+
 	if (!bridge)
 		return;
 
-	drm_atomic_bridge_disable(bridge->next, state);
+	encoder = bridge->encoder;
+	list_for_each_entry_reverse(iter, &encoder->bridge_chain, chain_node) {
+		if (iter->funcs->atomic_disable)
+			iter->funcs->atomic_disable(iter, old_state);
+		else if (iter->funcs->disable)
+			iter->funcs->disable(iter);
 
-	if (bridge->funcs->atomic_disable)
-		bridge->funcs->atomic_disable(bridge, state);
-	else if (bridge->funcs->disable)
-		bridge->funcs->disable(bridge);
+		if (iter == bridge)
+			break;
+	}
 }
-EXPORT_SYMBOL(drm_atomic_bridge_disable);
+EXPORT_SYMBOL(drm_atomic_bridge_chain_disable);
 
 /**
- * drm_atomic_bridge_post_disable - cleans up after disabling all bridges in the
- *				    encoder chain
+ * drm_atomic_bridge_chain_post_disable - cleans up after disabling all bridges
+ *					  in the encoder chain
  * @bridge: bridge control structure
- * @state: atomic state being committed
+ * @old_state: old atomic state
  *
  * Calls &drm_bridge_funcs.atomic_post_disable (falls back on
  * &drm_bridge_funcs.post_disable) op for all the bridges in the encoder chain,
@@ -392,26 +433,29 @@ EXPORT_SYMBOL(drm_atomic_bridge_disable);
  *
  * Note: the bridge passed should be the one closest to the encoder
  */
-void drm_atomic_bridge_post_disable(struct drm_bridge *bridge,
-				    struct drm_atomic_state *state)
+void drm_atomic_bridge_chain_post_disable(struct drm_bridge *bridge,
+					  struct drm_atomic_state *old_state)
 {
+	struct drm_encoder *encoder;
+
 	if (!bridge)
 		return;
 
-	if (bridge->funcs->atomic_post_disable)
-		bridge->funcs->atomic_post_disable(bridge, state);
-	else if (bridge->funcs->post_disable)
-		bridge->funcs->post_disable(bridge);
-
-	drm_atomic_bridge_post_disable(bridge->next, state);
+	encoder = bridge->encoder;
+	list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) {
+		if (bridge->funcs->atomic_post_disable)
+			bridge->funcs->atomic_post_disable(bridge, old_state);
+		else if (bridge->funcs->post_disable)
+			bridge->funcs->post_disable(bridge);
+	}
 }
-EXPORT_SYMBOL(drm_atomic_bridge_post_disable);
+EXPORT_SYMBOL(drm_atomic_bridge_chain_post_disable);
 
 /**
- * drm_atomic_bridge_pre_enable - prepares for enabling all bridges in the
- *				  encoder chain
+ * drm_atomic_bridge_chain_pre_enable - prepares for enabling all bridges in
+ *					the encoder chain
  * @bridge: bridge control structure
- * @state: atomic state being committed
+ * @old_state: old atomic state
  *
  * Calls &drm_bridge_funcs.atomic_pre_enable (falls back on
  * &drm_bridge_funcs.pre_enable) op for all the bridges in the encoder chain,
@@ -420,25 +464,32 @@ EXPORT_SYMBOL(drm_atomic_bridge_post_disable);
  *
  * Note: the bridge passed should be the one closest to the encoder
  */
-void drm_atomic_bridge_pre_enable(struct drm_bridge *bridge,
-				  struct drm_atomic_state *state)
+void drm_atomic_bridge_chain_pre_enable(struct drm_bridge *bridge,
+					struct drm_atomic_state *old_state)
 {
+	struct drm_encoder *encoder;
+	struct drm_bridge *iter;
+
 	if (!bridge)
 		return;
 
-	drm_atomic_bridge_pre_enable(bridge->next, state);
+	encoder = bridge->encoder;
+	list_for_each_entry_reverse(iter, &encoder->bridge_chain, chain_node) {
+		if (iter->funcs->atomic_pre_enable)
+			iter->funcs->atomic_pre_enable(iter, old_state);
+		else if (iter->funcs->pre_enable)
+			iter->funcs->pre_enable(iter);
 
-	if (bridge->funcs->atomic_pre_enable)
-		bridge->funcs->atomic_pre_enable(bridge, state);
-	else if (bridge->funcs->pre_enable)
-		bridge->funcs->pre_enable(bridge);
+		if (iter == bridge)
+			break;
+	}
 }
-EXPORT_SYMBOL(drm_atomic_bridge_pre_enable);
+EXPORT_SYMBOL(drm_atomic_bridge_chain_pre_enable);
 
 /**
- * drm_atomic_bridge_enable - enables all bridges in the encoder chain
+ * drm_atomic_bridge_chain_enable - enables all bridges in the encoder chain
  * @bridge: bridge control structure
- * @state: atomic state being committed
+ * @old_state: old atomic state
  *
  * Calls &drm_bridge_funcs.atomic_enable (falls back on
  * &drm_bridge_funcs.enable) op for all the bridges in the encoder chain,
@@ -447,20 +498,23 @@ EXPORT_SYMBOL(drm_atomic_bridge_pre_enable);
  *
  * Note: the bridge passed should be the one closest to the encoder
  */
-void drm_atomic_bridge_enable(struct drm_bridge *bridge,
-			      struct drm_atomic_state *state)
+void drm_atomic_bridge_chain_enable(struct drm_bridge *bridge,
+				    struct drm_atomic_state *old_state)
 {
+	struct drm_encoder *encoder;
+
 	if (!bridge)
 		return;
 
-	if (bridge->funcs->atomic_enable)
-		bridge->funcs->atomic_enable(bridge, state);
-	else if (bridge->funcs->enable)
-		bridge->funcs->enable(bridge);
-
-	drm_atomic_bridge_enable(bridge->next, state);
+	encoder = bridge->encoder;
+	list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) {
+		if (bridge->funcs->atomic_enable)
+			bridge->funcs->atomic_enable(bridge, old_state);
+		else if (bridge->funcs->enable)
+			bridge->funcs->enable(bridge);
+	}
 }
-EXPORT_SYMBOL(drm_atomic_bridge_enable);
+EXPORT_SYMBOL(drm_atomic_bridge_chain_enable);
 
 #ifdef CONFIG_OF
 /**
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 3bd76e918b5d..03e01b000f7a 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -62,10 +62,10 @@ static void drm_cache_flush_clflush(struct page *pages[],
 {
 	unsigned long i;
 
-	mb();
+	mb(); /*Full memory barrier used before so that CLFLUSH is ordered*/
 	for (i = 0; i < num_pages; i++)
 		drm_clflush_page(*pages++);
-	mb();
+	mb(); /*Also used after CLFLUSH so that all cache is flushed*/
 }
 #endif
 
@@ -92,6 +92,7 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
 
 #elif defined(__powerpc__)
 	unsigned long i;
+
 	for (i = 0; i < num_pages; i++) {
 		struct page *page = pages[i];
 		void *page_virtual;
@@ -125,10 +126,10 @@ drm_clflush_sg(struct sg_table *st)
 	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		struct sg_page_iter sg_iter;
 
-		mb();
+		mb(); /*CLFLUSH is ordered only by using memory barriers*/
 		for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
 			drm_clflush_page(sg_page_iter_page(&sg_iter));
-		mb();
+		mb(); /*Make sure that all cache line entry is flushed*/
 
 		return;
 	}
@@ -157,12 +158,13 @@ drm_clflush_virt_range(void *addr, unsigned long length)
 	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		const int size = boot_cpu_data.x86_clflush_size;
 		void *end = addr + length;
+
 		addr = (void *)(((unsigned long)addr) & -size);
-		mb();
+		mb(); /*CLFLUSH is only ordered with a full memory barrier*/
 		for (; addr < end; addr += size)
 			clflushopt(addr);
 		clflushopt(end - 1); /* force serialisation */
-		mb();
+		mb(); /*Ensure that evry data cache line entry is flushed*/
 		return;
 	}
 
diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index d9a2e3695525..b031b45aa8ef 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -150,7 +150,7 @@ void drm_client_release(struct drm_client_dev *client)
 {
 	struct drm_device *dev = client->dev;
 
-	DRM_DEV_DEBUG_KMS(dev->dev, "%s\n", client->name);
+	drm_dbg_kms(dev, "%s\n", client->name);
 
 	drm_client_modeset_free(client);
 	drm_client_close(client);
@@ -203,7 +203,7 @@ void drm_client_dev_hotplug(struct drm_device *dev)
 			continue;
 
 		ret = client->funcs->hotplug(client);
-		DRM_DEV_DEBUG_KMS(dev->dev, "%s: ret=%d\n", client->name, ret);
+		drm_dbg_kms(dev, "%s: ret=%d\n", client->name, ret);
 	}
 	mutex_unlock(&dev->clientlist_mutex);
 }
@@ -223,7 +223,7 @@ void drm_client_dev_restore(struct drm_device *dev)
 			continue;
 
 		ret = client->funcs->restore(client);
-		DRM_DEV_DEBUG_KMS(dev->dev, "%s: ret=%d\n", client->name, ret);
+		drm_dbg_kms(dev, "%s: ret=%d\n", client->name, ret);
 		if (!ret) /* The first one to return zero gets the privilege to restore */
 			break;
 	}
@@ -351,8 +351,8 @@ static void drm_client_buffer_rmfb(struct drm_client_buffer *buffer)
 
 	ret = drm_mode_rmfb(buffer->client->dev, buffer->fb->base.id, buffer->client->file);
 	if (ret)
-		DRM_DEV_ERROR(buffer->client->dev->dev,
-			      "Error removing FB:%u (%d)\n", buffer->fb->base.id, ret);
+		drm_err(buffer->client->dev,
+			"Error removing FB:%u (%d)\n", buffer->fb->base.id, ret);
 
 	buffer->fb = NULL;
 }
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index c8922b7cac09..6d4a29e99ae2 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -115,6 +115,33 @@ drm_client_find_modeset(struct drm_client_dev *client, struct drm_crtc *crtc)
 }
 
 static struct drm_display_mode *
+drm_connector_get_tiled_mode(struct drm_connector *connector)
+{
+	struct drm_display_mode *mode;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		if (mode->hdisplay == connector->tile_h_size &&
+		    mode->vdisplay == connector->tile_v_size)
+			return mode;
+	}
+	return NULL;
+}
+
+static struct drm_display_mode *
+drm_connector_fallback_non_tiled_mode(struct drm_connector *connector)
+{
+	struct drm_display_mode *mode;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		if (mode->hdisplay == connector->tile_h_size &&
+		    mode->vdisplay == connector->tile_v_size)
+			continue;
+		return mode;
+	}
+	return NULL;
+}
+
+static struct drm_display_mode *
 drm_connector_has_preferred_mode(struct drm_connector *connector, int width, int height)
 {
 	struct drm_display_mode *mode;
@@ -348,8 +375,15 @@ static bool drm_client_target_preferred(struct drm_connector **connectors,
 	struct drm_connector *connector;
 	u64 conn_configured = 0;
 	int tile_pass = 0;
+	int num_tiled_conns = 0;
 	int i;
 
+	for (i = 0; i < connector_count; i++) {
+		if (connectors[i]->has_tile &&
+		    connectors[i]->status == connector_status_connected)
+			num_tiled_conns++;
+	}
+
 retry:
 	for (i = 0; i < connector_count; i++) {
 		connector = connectors[i];
@@ -399,6 +433,28 @@ retry:
 			list_for_each_entry(modes[i], &connector->modes, head)
 				break;
 		}
+		/*
+		 * In case of tiled mode if all tiles not present fallback to
+		 * first available non tiled mode.
+		 * After all tiles are present, try to find the tiled mode
+		 * for all and if tiled mode not present due to fbcon size
+		 * limitations, use first non tiled mode only for
+		 * tile 0,0 and set to no mode for all other tiles.
+		 */
+		if (connector->has_tile) {
+			if (num_tiled_conns <
+			    connector->num_h_tile * connector->num_v_tile ||
+			    (connector->tile_h_loc == 0 &&
+			     connector->tile_v_loc == 0 &&
+			     !drm_connector_get_tiled_mode(connector))) {
+				DRM_DEBUG_KMS("Falling back to non tiled mode on Connector %d\n",
+					      connector->base.id);
+				modes[i] = drm_connector_fallback_non_tiled_mode(connector);
+			} else {
+				modes[i] = drm_connector_get_tiled_mode(connector);
+			}
+		}
+
 		DRM_DEBUG_KMS("found mode %s\n", modes[i] ? modes[i]->name :
 			  "none");
 		conn_configured |= BIT_ULL(i);
@@ -415,9 +471,8 @@ static bool connector_has_possible_crtc(struct drm_connector *connector,
 					struct drm_crtc *crtc)
 {
 	struct drm_encoder *encoder;
-	int i;
 
-	drm_connector_for_each_possible_encoder(connector, encoder, i) {
+	drm_connector_for_each_possible_encoder(connector, encoder) {
 		if (encoder->possible_crtcs & drm_crtc_mask(crtc))
 			return true;
 	}
@@ -516,6 +571,7 @@ static bool drm_client_firmware_config(struct drm_client_dev *client,
 	bool fallback = true, ret = true;
 	int num_connectors_enabled = 0;
 	int num_connectors_detected = 0;
+	int num_tiled_conns = 0;
 	struct drm_modeset_acquire_ctx ctx;
 
 	if (!drm_drv_uses_atomic_modeset(dev))
@@ -533,6 +589,11 @@ static bool drm_client_firmware_config(struct drm_client_dev *client,
 	memcpy(save_enabled, enabled, count);
 	mask = GENMASK(count - 1, 0);
 	conn_configured = 0;
+	for (i = 0; i < count; i++) {
+		if (connectors[i]->has_tile &&
+		    connectors[i]->status == connector_status_connected)
+			num_tiled_conns++;
+	}
 retry:
 	conn_seq = conn_configured;
 	for (i = 0; i < count; i++) {
@@ -632,6 +693,16 @@ retry:
 				      connector->name);
 			modes[i] = &connector->state->crtc->mode;
 		}
+		/*
+		 * In case of tiled modes, if all tiles are not present
+		 * then fallback to a non tiled mode.
+		 */
+		if (connector->has_tile &&
+		    num_tiled_conns < connector->num_h_tile * connector->num_v_tile) {
+			DRM_DEBUG_KMS("Falling back to non tiled mode on Connector %d\n",
+				      connector->base.id);
+			modes[i] = drm_connector_fallback_non_tiled_mode(connector);
+		}
 		crtcs[i] = new_crtc;
 
 		DRM_DEBUG_KMS("connector %s on [CRTC:%d:%s]: %dx%d%s\n",
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c
index 4ce5c6d8de99..c93123ff7c21 100644
--- a/drivers/gpu/drm/drm_color_mgmt.c
+++ b/drivers/gpu/drm/drm_color_mgmt.c
@@ -109,28 +109,38 @@
  */
 
 /**
- * drm_color_lut_extract - clamp and round LUT entries
+ * drm_color_ctm_s31_32_to_qm_n
+ *
  * @user_input: input value
- * @bit_precision: number of bits the hw LUT supports
+ * @m: number of integer bits, only support m <= 32, include the sign-bit
+ * @n: number of fractional bits, only support n <= 32
+ *
+ * Convert and clamp S31.32 sign-magnitude to Qm.n (signed 2's complement).
+ * The sign-bit BIT(m+n-1) and above are 0 for positive value and 1 for negative
+ * the range of value is [-2^(m-1), 2^(m-1) - 2^-n]
+ *
+ * For example
+ * A Q3.12 format number:
+ * - required bit: 3 + 12 = 15bits
+ * - range: [-2^2, 2^2 - 2^−15]
  *
- * Extract a degamma/gamma LUT value provided by user (in the form of
- * &drm_color_lut entries) and round it to the precision supported by the
- * hardware.
+ * NOTE: the m can be zero if all bit_precision are used to present fractional
+ *       bits like Q0.32
  */
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision)
+u64 drm_color_ctm_s31_32_to_qm_n(u64 user_input, u32 m, u32 n)
 {
-	uint32_t val = user_input;
-	uint32_t max = 0xffff >> (16 - bit_precision);
+	u64 mag = (user_input & ~BIT_ULL(63)) >> (32 - n);
+	bool negative = !!(user_input & BIT_ULL(63));
+	s64 val;
 
-	/* Round only if we're not using full precision. */
-	if (bit_precision < 16) {
-		val += 1UL << (16 - bit_precision - 1);
-		val >>= 16 - bit_precision;
-	}
+	WARN_ON(m > 32 || n > 32);
+
+	val = clamp_val(mag, 0, negative ?
+				BIT_ULL(n + m - 1) : BIT_ULL(n + m - 1) - 1);
 
-	return clamp_val(val, 0, max);
+	return negative ? -val : val;
 }
-EXPORT_SYMBOL(drm_color_lut_extract);
+EXPORT_SYMBOL(drm_color_ctm_s31_32_to_qm_n);
 
 /**
  * drm_crtc_enable_color_mgmt - enable color management properties
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 4c766624b20d..2166000ed057 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -365,8 +365,6 @@ EXPORT_SYMBOL(drm_connector_attach_edid_property);
 int drm_connector_attach_encoder(struct drm_connector *connector,
 				 struct drm_encoder *encoder)
 {
-	int i;
-
 	/*
 	 * In the past, drivers have attempted to model the static association
 	 * of connector to encoder in simple connector/encoder devices using a
@@ -381,18 +379,15 @@ int drm_connector_attach_encoder(struct drm_connector *connector,
 	if (WARN_ON(connector->encoder))
 		return -EINVAL;
 
-	for (i = 0; i < ARRAY_SIZE(connector->encoder_ids); i++) {
-		if (connector->encoder_ids[i] == 0) {
-			connector->encoder_ids[i] = encoder->base.id;
-			return 0;
-		}
-	}
-	return -ENOMEM;
+	connector->possible_encoders |= drm_encoder_mask(encoder);
+
+	return 0;
 }
 EXPORT_SYMBOL(drm_connector_attach_encoder);
 
 /**
- * drm_connector_has_possible_encoder - check if the connector and encoder are assosicated with each other
+ * drm_connector_has_possible_encoder - check if the connector and encoder are
+ * associated with each other
  * @connector: the connector
  * @encoder: the encoder
  *
@@ -402,15 +397,7 @@ EXPORT_SYMBOL(drm_connector_attach_encoder);
 bool drm_connector_has_possible_encoder(struct drm_connector *connector,
 					struct drm_encoder *encoder)
 {
-	struct drm_encoder *enc;
-	int i;
-
-	drm_connector_for_each_possible_encoder(connector, enc, i) {
-		if (enc == encoder)
-			return true;
-	}
-
-	return false;
+	return connector->possible_encoders & drm_encoder_mask(encoder);
 }
 EXPORT_SYMBOL(drm_connector_has_possible_encoder);
 
@@ -480,7 +467,10 @@ EXPORT_SYMBOL(drm_connector_cleanup);
  * drm_connector_register - register a connector
  * @connector: the connector to register
  *
- * Register userspace interfaces for a connector
+ * Register userspace interfaces for a connector. Only call this for connectors
+ * which can be hotplugged after drm_dev_register() has been called already,
+ * e.g. DP MST connectors. All other connectors will be registered automatically
+ * when calling drm_dev_register().
  *
  * Returns:
  * Zero on success, error code on failure.
@@ -526,7 +516,10 @@ EXPORT_SYMBOL(drm_connector_register);
  * drm_connector_unregister - unregister a connector
  * @connector: the connector to unregister
  *
- * Unregister userspace interfaces for a connector
+ * Unregister userspace interfaces for a connector. Only call this for
+ * connectors which have registered explicitly by calling drm_dev_register(),
+ * since connectors are unregistered automatically when drm_dev_unregister() is
+ * called.
  */
 void drm_connector_unregister(struct drm_connector *connector)
 {
@@ -719,7 +712,7 @@ void drm_connector_list_iter_end(struct drm_connector_list_iter *iter)
 		__drm_connector_put_safe(iter->conn);
 		spin_unlock_irqrestore(&config->connector_list_lock, flags);
 	}
-	lock_release(&connector_list_iter_dep_map, 0, _RET_IP_);
+	lock_release(&connector_list_iter_dep_map, _RET_IP_);
 }
 EXPORT_SYMBOL(drm_connector_list_iter_end);
 
@@ -882,6 +875,38 @@ static const struct drm_prop_enum_list hdmi_colorspaces[] = {
 	{ DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER, "DCI-P3_RGB_Theater" },
 };
 
+/*
+ * As per DP 1.4a spec, 2.2.5.7.5 VSC SDP Payload for Pixel Encoding/Colorimetry
+ * Format Table 2-120
+ */
+static const struct drm_prop_enum_list dp_colorspaces[] = {
+	/* For Default case, driver will set the colorspace */
+	{ DRM_MODE_COLORIMETRY_DEFAULT, "Default" },
+	{ DRM_MODE_COLORIMETRY_RGB_WIDE_FIXED, "RGB_Wide_Gamut_Fixed_Point" },
+	/* Colorimetry based on scRGB (IEC 61966-2-2) */
+	{ DRM_MODE_COLORIMETRY_RGB_WIDE_FLOAT, "RGB_Wide_Gamut_Floating_Point" },
+	/* Colorimetry based on IEC 61966-2-5 */
+	{ DRM_MODE_COLORIMETRY_OPRGB, "opRGB" },
+	/* Colorimetry based on SMPTE RP 431-2 */
+	{ DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65, "DCI-P3_RGB_D65" },
+	/* Colorimetry based on ITU-R BT.2020 */
+	{ DRM_MODE_COLORIMETRY_BT2020_RGB, "BT2020_RGB" },
+	{ DRM_MODE_COLORIMETRY_BT601_YCC, "BT601_YCC" },
+	{ DRM_MODE_COLORIMETRY_BT709_YCC, "BT709_YCC" },
+	/* Standard Definition Colorimetry based on IEC 61966-2-4 */
+	{ DRM_MODE_COLORIMETRY_XVYCC_601, "XVYCC_601" },
+	/* High Definition Colorimetry based on IEC 61966-2-4 */
+	{ DRM_MODE_COLORIMETRY_XVYCC_709, "XVYCC_709" },
+	/* Colorimetry based on IEC 61966-2-1/Amendment 1 */
+	{ DRM_MODE_COLORIMETRY_SYCC_601, "SYCC_601" },
+	/* Colorimetry based on IEC 61966-2-5 [33] */
+	{ DRM_MODE_COLORIMETRY_OPYCC_601, "opYCC_601" },
+	/* Colorimetry based on ITU-R BT.2020 */
+	{ DRM_MODE_COLORIMETRY_BT2020_CYCC, "BT2020_CYCC" },
+	/* Colorimetry based on ITU-R BT.2020 */
+	{ DRM_MODE_COLORIMETRY_BT2020_YCC, "BT2020_YCC" },
+};
+
 /**
  * DOC: standard connector properties
  *
@@ -1674,7 +1699,6 @@ EXPORT_SYMBOL(drm_mode_create_aspect_ratio_property);
  * DOC: standard connector properties
  *
  * Colorspace:
- *     drm_mode_create_colorspace_property - create colorspace property
  *     This property helps select a suitable colorspace based on the sink
  *     capability. Modern sink devices support wider gamut like BT2020.
  *     This helps switch to BT2020 mode if the BT2020 encoded video stream
@@ -1694,32 +1718,68 @@ EXPORT_SYMBOL(drm_mode_create_aspect_ratio_property);
  *      - This property is just to inform sink what colorspace
  *        source is trying to drive.
  *
+ * Because between HDMI and DP have different colorspaces,
+ * drm_mode_create_hdmi_colorspace_property() is used for HDMI connector and
+ * drm_mode_create_dp_colorspace_property() is used for DP connector.
+ */
+
+/**
+ * drm_mode_create_hdmi_colorspace_property - create hdmi colorspace property
+ * @connector: connector to create the Colorspace property on.
+ *
  * Called by a driver the first time it's needed, must be attached to desired
- * connectors.
+ * HDMI connectors.
+ *
+ * Returns:
+ * Zero on success, negative errono on failure.
  */
-int drm_mode_create_colorspace_property(struct drm_connector *connector)
+int drm_mode_create_hdmi_colorspace_property(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
-	struct drm_property *prop;
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) {
-		prop = drm_property_create_enum(dev, DRM_MODE_PROP_ENUM,
-						"Colorspace",
-						hdmi_colorspaces,
-						ARRAY_SIZE(hdmi_colorspaces));
-		if (!prop)
-			return -ENOMEM;
-	} else {
-		DRM_DEBUG_KMS("Colorspace property not supported\n");
+	if (connector->colorspace_property)
 		return 0;
-	}
 
-	connector->colorspace_property = prop;
+	connector->colorspace_property =
+		drm_property_create_enum(dev, DRM_MODE_PROP_ENUM, "Colorspace",
+					 hdmi_colorspaces,
+					 ARRAY_SIZE(hdmi_colorspaces));
+
+	if (!connector->colorspace_property)
+		return -ENOMEM;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_create_hdmi_colorspace_property);
+
+/**
+ * drm_mode_create_dp_colorspace_property - create dp colorspace property
+ * @connector: connector to create the Colorspace property on.
+ *
+ * Called by a driver the first time it's needed, must be attached to desired
+ * DP connectors.
+ *
+ * Returns:
+ * Zero on success, negative errono on failure.
+ */
+int drm_mode_create_dp_colorspace_property(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+
+	if (connector->colorspace_property)
+		return 0;
+
+	connector->colorspace_property =
+		drm_property_create_enum(dev, DRM_MODE_PROP_ENUM, "Colorspace",
+					 dp_colorspaces,
+					 ARRAY_SIZE(dp_colorspaces));
+
+	if (!connector->colorspace_property)
+		return -ENOMEM;
 
 	return 0;
 }
-EXPORT_SYMBOL(drm_mode_create_colorspace_property);
+EXPORT_SYMBOL(drm_mode_create_dp_colorspace_property);
 
 /**
  * drm_mode_create_content_type_property - create content type property
@@ -2121,7 +2181,6 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 	int encoders_count = 0;
 	int ret = 0;
 	int copied = 0;
-	int i;
 	struct drm_mode_modeinfo u_mode;
 	struct drm_mode_modeinfo __user *mode_ptr;
 	uint32_t __user *encoder_ptr;
@@ -2136,14 +2195,13 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 	if (!connector)
 		return -ENOENT;
 
-	drm_connector_for_each_possible_encoder(connector, encoder, i)
-		encoders_count++;
+	encoders_count = hweight32(connector->possible_encoders);
 
 	if ((out_resp->count_encoders >= encoders_count) && encoders_count) {
 		copied = 0;
 		encoder_ptr = (uint32_t __user *)(unsigned long)(out_resp->encoders_ptr);
 
-		drm_connector_for_each_possible_encoder(connector, encoder, i) {
+		drm_connector_for_each_possible_encoder(connector, encoder) {
 			if (put_user(encoder->base.id, encoder_ptr + copied)) {
 				ret = -EFAULT;
 				goto out;
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 80ddf13ad996..93a4eec429e8 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -36,6 +36,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_atomic_uapi.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
@@ -47,6 +48,8 @@
 #include <drm/drm_print.h>
 #include <drm/drm_vblank.h>
 
+#include "drm_crtc_helper_internal.h"
+
 /**
  * DOC: overview
  *
@@ -459,6 +462,22 @@ drm_crtc_helper_disable(struct drm_crtc *crtc)
 	__drm_helper_disable_unused_functions(dev);
 }
 
+/*
+ * For connectors that support multiple encoders, either the
+ * .atomic_best_encoder() or .best_encoder() operation must be implemented.
+ */
+struct drm_encoder *
+drm_connector_get_single_encoder(struct drm_connector *connector)
+{
+	struct drm_encoder *encoder;
+
+	WARN_ON(hweight32(connector->possible_encoders) > 1);
+	drm_connector_for_each_possible_encoder(connector, encoder)
+		return encoder;
+
+	return NULL;
+}
+
 /**
  * drm_crtc_helper_set_config - set a new config from userspace
  * @set: mode set configuration
@@ -624,7 +643,11 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set,
 		new_encoder = connector->encoder;
 		for (ro = 0; ro < set->num_connectors; ro++) {
 			if (set->connectors[ro] == connector) {
-				new_encoder = connector_funcs->best_encoder(connector);
+				if (connector_funcs->best_encoder)
+					new_encoder = connector_funcs->best_encoder(connector);
+				else
+					new_encoder = drm_connector_get_single_encoder(connector);
+
 				/* if we can't get an encoder for a connector
 				   we are setting now - then fail */
 				if (new_encoder == NULL)
diff --git a/drivers/gpu/drm/drm_crtc_helper_internal.h b/drivers/gpu/drm/drm_crtc_helper_internal.h
index b5ac1581e623..f0a66ef47e5a 100644
--- a/drivers/gpu/drm/drm_crtc_helper_internal.h
+++ b/drivers/gpu/drm/drm_crtc_helper_internal.h
@@ -75,3 +75,6 @@ enum drm_mode_status drm_encoder_mode_valid(struct drm_encoder *encoder,
 					    const struct drm_display_mode *mode);
 enum drm_mode_status drm_connector_mode_valid(struct drm_connector *connector,
 					      struct drm_display_mode *mode);
+
+struct drm_encoder *
+drm_connector_get_single_encoder(struct drm_connector *connector);
diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c
index 8230dac01a89..3a4126dc2520 100644
--- a/drivers/gpu/drm/drm_damage_helper.c
+++ b/drivers/gpu/drm/drm_damage_helper.c
@@ -212,8 +212,14 @@ retry:
 	drm_for_each_plane(plane, fb->dev) {
 		struct drm_plane_state *plane_state;
 
-		if (plane->state->fb != fb)
+		ret = drm_modeset_lock(&plane->mutex, state->acquire_ctx);
+		if (ret)
+			goto out;
+
+		if (plane->state->fb != fb) {
+			drm_modeset_unlock(&plane->mutex);
 			continue;
+		}
 
 		plane_state = drm_atomic_get_plane_state(state, plane);
 		if (IS_ERR(plane_state)) {
diff --git a/drivers/gpu/drm/drm_debugfs_crc.c b/drivers/gpu/drm/drm_debugfs_crc.c
index be1b7ba92ffe..e22b812c4b80 100644
--- a/drivers/gpu/drm/drm_debugfs_crc.c
+++ b/drivers/gpu/drm/drm_debugfs_crc.c
@@ -140,8 +140,8 @@ static ssize_t crc_control_write(struct file *file, const char __user *ubuf,
 	if (IS_ERR(source))
 		return PTR_ERR(source);
 
-	if (source[len] == '\n')
-		source[len] = '\0';
+	if (source[len - 1] == '\n')
+		source[len - 1] = '\0';
 
 	ret = crtc->funcs->verify_crc_source(crtc, source, &values_cnt);
 	if (ret)
@@ -258,6 +258,11 @@ static int crtc_crc_release(struct inode *inode, struct file *filep)
 	struct drm_crtc *crtc = filep->f_inode->i_private;
 	struct drm_crtc_crc *crc = &crtc->crc;
 
+	/* terminate the infinite while loop if 'drm_dp_aux_crc_work' running */
+	spin_lock_irq(&crc->lock);
+	crc->opened = false;
+	spin_unlock_irq(&crc->lock);
+
 	crtc->funcs->set_crc_source(crtc, NULL);
 
 	spin_lock_irq(&crc->lock);
@@ -334,19 +339,17 @@ static ssize_t crtc_crc_read(struct file *filep, char __user *user_buf,
 	return LINE_LEN(crc->values_cnt);
 }
 
-static unsigned int crtc_crc_poll(struct file *file, poll_table *wait)
+static __poll_t crtc_crc_poll(struct file *file, poll_table *wait)
 {
 	struct drm_crtc *crtc = file->f_inode->i_private;
 	struct drm_crtc_crc *crc = &crtc->crc;
-	unsigned ret;
+	__poll_t ret = 0;
 
 	poll_wait(file, &crc->wq, wait);
 
 	spin_lock_irq(&crc->lock);
 	if (crc->source && crtc_crc_data_count(crc))
-		ret = POLLIN | POLLRDNORM;
-	else
-		ret = 0;
+		ret |= EPOLLIN | EPOLLRDNORM;
 	spin_unlock_irq(&crc->lock);
 
 	return ret;
diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
index 0cfb386754c3..2510717d5a08 100644
--- a/drivers/gpu/drm/drm_dp_aux_dev.c
+++ b/drivers/gpu/drm/drm_dp_aux_dev.c
@@ -163,11 +163,7 @@ static ssize_t auxdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			break;
 		}
 
-		if (aux_dev->aux->is_remote)
-			res = drm_dp_mst_dpcd_read(aux_dev->aux, pos, buf,
-						   todo);
-		else
-			res = drm_dp_dpcd_read(aux_dev->aux, pos, buf, todo);
+		res = drm_dp_dpcd_read(aux_dev->aux, pos, buf, todo);
 
 		if (res <= 0)
 			break;
@@ -215,11 +211,7 @@ static ssize_t auxdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 			break;
 		}
 
-		if (aux_dev->aux->is_remote)
-			res = drm_dp_mst_dpcd_write(aux_dev->aux, pos, buf,
-						    todo);
-		else
-			res = drm_dp_dpcd_write(aux_dev->aux, pos, buf, todo);
+		res = drm_dp_dpcd_write(aux_dev->aux, pos, buf, todo);
 
 		if (res <= 0)
 			break;
diff --git a/drivers/gpu/drm/drm_dp_cec.c b/drivers/gpu/drm/drm_dp_cec.c
index b15cee85b702..3ab2609f9ec7 100644
--- a/drivers/gpu/drm/drm_dp_cec.c
+++ b/drivers/gpu/drm/drm_dp_cec.c
@@ -8,9 +8,13 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <drm/drm_dp_helper.h>
+
 #include <media/cec.h>
 
+#include <drm/drm_connector.h>
+#include <drm/drm_device.h>
+#include <drm/drm_dp_helper.h>
+
 /*
  * Unfortunately it turns out that we have a chicken-and-egg situation
  * here. Quite a few active (mini-)DP-to-HDMI or USB-C-to-HDMI adapters
@@ -295,7 +299,10 @@ static void drm_dp_cec_unregister_work(struct work_struct *work)
  */
 void drm_dp_cec_set_edid(struct drm_dp_aux *aux, const struct edid *edid)
 {
-	u32 cec_caps = CEC_CAP_DEFAULTS | CEC_CAP_NEEDS_HPD;
+	struct drm_connector *connector = aux->cec.connector;
+	u32 cec_caps = CEC_CAP_DEFAULTS | CEC_CAP_NEEDS_HPD |
+		       CEC_CAP_CONNECTOR_INFO;
+	struct cec_connector_info conn_info;
 	unsigned int num_las = 1;
 	u8 cap;
 
@@ -344,13 +351,17 @@ void drm_dp_cec_set_edid(struct drm_dp_aux *aux, const struct edid *edid)
 
 	/* Create a new adapter */
 	aux->cec.adap = cec_allocate_adapter(&drm_dp_cec_adap_ops,
-					     aux, aux->cec.name, cec_caps,
+					     aux, connector->name, cec_caps,
 					     num_las);
 	if (IS_ERR(aux->cec.adap)) {
 		aux->cec.adap = NULL;
 		goto unlock;
 	}
-	if (cec_register_adapter(aux->cec.adap, aux->cec.parent)) {
+
+	cec_fill_conn_info_from_drm(&conn_info, connector);
+	cec_s_conn_info(aux->cec.adap, &conn_info);
+
+	if (cec_register_adapter(aux->cec.adap, connector->dev->dev)) {
 		cec_delete_adapter(aux->cec.adap);
 		aux->cec.adap = NULL;
 	} else {
@@ -406,22 +417,20 @@ EXPORT_SYMBOL(drm_dp_cec_unset_edid);
 /**
  * drm_dp_cec_register_connector() - register a new connector
  * @aux: DisplayPort AUX channel
- * @name: name of the CEC device
- * @parent: parent device
+ * @connector: drm connector
  *
  * A new connector was registered with associated CEC adapter name and
  * CEC adapter parent device. After registering the name and parent
  * drm_dp_cec_set_edid() is called to check if the connector supports
  * CEC and to register a CEC adapter if that is the case.
  */
-void drm_dp_cec_register_connector(struct drm_dp_aux *aux, const char *name,
-				   struct device *parent)
+void drm_dp_cec_register_connector(struct drm_dp_aux *aux,
+				   struct drm_connector *connector)
 {
 	WARN_ON(aux->cec.adap);
 	if (WARN_ON(!aux->transfer))
 		return;
-	aux->cec.name = name;
-	aux->cec.parent = parent;
+	aux->cec.connector = connector;
 	INIT_DELAYED_WORK(&aux->cec.unregister_work,
 			  drm_dp_cec_unregister_work);
 }
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index ffc68d305afe..a5364b5192b8 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -32,6 +32,7 @@
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_print.h>
 #include <drm/drm_vblank.h>
+#include <drm/drm_dp_mst_helper.h>
 
 #include "drm_crtc_helper_internal.h"
 
@@ -120,33 +121,49 @@ u8 drm_dp_get_adjust_request_pre_emphasis(const u8 link_status[DP_LINK_STATUS_SI
 }
 EXPORT_SYMBOL(drm_dp_get_adjust_request_pre_emphasis);
 
-void drm_dp_link_train_clock_recovery_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) {
-	int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
-			  DP_TRAINING_AUX_RD_MASK;
+u8 drm_dp_get_adjust_request_post_cursor(const u8 link_status[DP_LINK_STATUS_SIZE],
+					 unsigned int lane)
+{
+	unsigned int offset = DP_ADJUST_REQUEST_POST_CURSOR2;
+	u8 value = dp_link_status(link_status, offset);
+
+	return (value >> (lane << 1)) & 0x3;
+}
+EXPORT_SYMBOL(drm_dp_get_adjust_request_post_cursor);
+
+void drm_dp_link_train_clock_recovery_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE])
+{
+	unsigned long rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+					 DP_TRAINING_AUX_RD_MASK;
 
 	if (rd_interval > 4)
-		DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n",
+		DRM_DEBUG_KMS("AUX interval %lu, out of range (max 4)\n",
 			      rd_interval);
 
 	if (rd_interval == 0 || dpcd[DP_DPCD_REV] >= DP_DPCD_REV_14)
-		udelay(100);
+		rd_interval = 100;
 	else
-		mdelay(rd_interval * 4);
+		rd_interval *= 4 * USEC_PER_MSEC;
+
+	usleep_range(rd_interval, rd_interval * 2);
 }
 EXPORT_SYMBOL(drm_dp_link_train_clock_recovery_delay);
 
-void drm_dp_link_train_channel_eq_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) {
-	int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
-			  DP_TRAINING_AUX_RD_MASK;
+void drm_dp_link_train_channel_eq_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE])
+{
+	unsigned long rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+					 DP_TRAINING_AUX_RD_MASK;
 
 	if (rd_interval > 4)
-		DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n",
+		DRM_DEBUG_KMS("AUX interval %lu, out of range (max 4)\n",
 			      rd_interval);
 
 	if (rd_interval == 0)
-		udelay(400);
+		rd_interval = 400;
 	else
-		mdelay(rd_interval * 4);
+		rd_interval *= 4 * USEC_PER_MSEC;
+
+	usleep_range(rd_interval, rd_interval * 2);
 }
 EXPORT_SYMBOL(drm_dp_link_train_channel_eq_delay);
 
@@ -220,7 +237,6 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 		}
 
 		ret = aux->transfer(aux, &msg);
-
 		if (ret >= 0) {
 			native_reply = msg.reply & DP_AUX_NATIVE_REPLY_MASK;
 			if (native_reply == DP_AUX_NATIVE_REPLY_ACK) {
@@ -251,7 +267,7 @@ unlock:
 
 /**
  * drm_dp_dpcd_read() - read a series of bytes from the DPCD
- * @aux: DisplayPort AUX channel
+ * @aux: DisplayPort AUX channel (SST or MST)
  * @offset: address of the (first) register to read
  * @buffer: buffer to store the register values
  * @size: number of bytes in @buffer
@@ -280,13 +296,18 @@ ssize_t drm_dp_dpcd_read(struct drm_dp_aux *aux, unsigned int offset,
 	 * We just have to do it before any DPCD access and hope that the
 	 * monitor doesn't power down exactly after the throw away read.
 	 */
-	ret = drm_dp_dpcd_access(aux, DP_AUX_NATIVE_READ, DP_DPCD_REV, buffer,
-				 1);
-	if (ret != 1)
-		goto out;
+	if (!aux->is_remote) {
+		ret = drm_dp_dpcd_access(aux, DP_AUX_NATIVE_READ, DP_DPCD_REV,
+					 buffer, 1);
+		if (ret != 1)
+			goto out;
+	}
 
-	ret = drm_dp_dpcd_access(aux, DP_AUX_NATIVE_READ, offset, buffer,
-				 size);
+	if (aux->is_remote)
+		ret = drm_dp_mst_dpcd_read(aux, offset, buffer, size);
+	else
+		ret = drm_dp_dpcd_access(aux, DP_AUX_NATIVE_READ, offset,
+					 buffer, size);
 
 out:
 	drm_dp_dump_access(aux, DP_AUX_NATIVE_READ, offset, buffer, ret);
@@ -296,7 +317,7 @@ EXPORT_SYMBOL(drm_dp_dpcd_read);
 
 /**
  * drm_dp_dpcd_write() - write a series of bytes to the DPCD
- * @aux: DisplayPort AUX channel
+ * @aux: DisplayPort AUX channel (SST or MST)
  * @offset: address of the (first) register to write
  * @buffer: buffer containing the values to write
  * @size: number of bytes in @buffer
@@ -313,8 +334,12 @@ ssize_t drm_dp_dpcd_write(struct drm_dp_aux *aux, unsigned int offset,
 {
 	int ret;
 
-	ret = drm_dp_dpcd_access(aux, DP_AUX_NATIVE_WRITE, offset, buffer,
-				 size);
+	if (aux->is_remote)
+		ret = drm_dp_mst_dpcd_write(aux, offset, buffer, size);
+	else
+		ret = drm_dp_dpcd_access(aux, DP_AUX_NATIVE_WRITE, offset,
+					 buffer, size);
+
 	drm_dp_dump_access(aux, DP_AUX_NATIVE_WRITE, offset, buffer, ret);
 	return ret;
 }
@@ -337,134 +362,6 @@ int drm_dp_dpcd_read_link_status(struct drm_dp_aux *aux,
 EXPORT_SYMBOL(drm_dp_dpcd_read_link_status);
 
 /**
- * drm_dp_link_probe() - probe a DisplayPort link for capabilities
- * @aux: DisplayPort AUX channel
- * @link: pointer to structure in which to return link capabilities
- *
- * The structure filled in by this function can usually be passed directly
- * into drm_dp_link_power_up() and drm_dp_link_configure() to power up and
- * configure the link based on the link's capabilities.
- *
- * Returns 0 on success or a negative error code on failure.
- */
-int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
-{
-	u8 values[3];
-	int err;
-
-	memset(link, 0, sizeof(*link));
-
-	err = drm_dp_dpcd_read(aux, DP_DPCD_REV, values, sizeof(values));
-	if (err < 0)
-		return err;
-
-	link->revision = values[0];
-	link->rate = drm_dp_bw_code_to_link_rate(values[1]);
-	link->num_lanes = values[2] & DP_MAX_LANE_COUNT_MASK;
-
-	if (values[2] & DP_ENHANCED_FRAME_CAP)
-		link->capabilities |= DP_LINK_CAP_ENHANCED_FRAMING;
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_dp_link_probe);
-
-/**
- * drm_dp_link_power_up() - power up a DisplayPort link
- * @aux: DisplayPort AUX channel
- * @link: pointer to a structure containing the link configuration
- *
- * Returns 0 on success or a negative error code on failure.
- */
-int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link)
-{
-	u8 value;
-	int err;
-
-	/* DP_SET_POWER register is only available on DPCD v1.1 and later */
-	if (link->revision < 0x11)
-		return 0;
-
-	err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value);
-	if (err < 0)
-		return err;
-
-	value &= ~DP_SET_POWER_MASK;
-	value |= DP_SET_POWER_D0;
-
-	err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value);
-	if (err < 0)
-		return err;
-
-	/*
-	 * According to the DP 1.1 specification, a "Sink Device must exit the
-	 * power saving state within 1 ms" (Section 2.5.3.1, Table 5-52, "Sink
-	 * Control Field" (register 0x600).
-	 */
-	usleep_range(1000, 2000);
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_dp_link_power_up);
-
-/**
- * drm_dp_link_power_down() - power down a DisplayPort link
- * @aux: DisplayPort AUX channel
- * @link: pointer to a structure containing the link configuration
- *
- * Returns 0 on success or a negative error code on failure.
- */
-int drm_dp_link_power_down(struct drm_dp_aux *aux, struct drm_dp_link *link)
-{
-	u8 value;
-	int err;
-
-	/* DP_SET_POWER register is only available on DPCD v1.1 and later */
-	if (link->revision < 0x11)
-		return 0;
-
-	err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value);
-	if (err < 0)
-		return err;
-
-	value &= ~DP_SET_POWER_MASK;
-	value |= DP_SET_POWER_D3;
-
-	err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_dp_link_power_down);
-
-/**
- * drm_dp_link_configure() - configure a DisplayPort link
- * @aux: DisplayPort AUX channel
- * @link: pointer to a structure containing the link configuration
- *
- * Returns 0 on success or a negative error code on failure.
- */
-int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link)
-{
-	u8 values[2];
-	int err;
-
-	values[0] = drm_dp_link_rate_to_bw_code(link->rate);
-	values[1] = link->num_lanes;
-
-	if (link->capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
-		values[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
-
-	err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, values, sizeof(values));
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_dp_link_configure);
-
-/**
  * drm_dp_downstream_max_clock() - extract branch device max
  *                                 pixel rate for legacy VGA
  *                                 converter or max TMDS clock
@@ -1082,6 +979,19 @@ static void drm_dp_aux_crc_work(struct work_struct *work)
 }
 
 /**
+ * drm_dp_remote_aux_init() - minimally initialise a remote aux channel
+ * @aux: DisplayPort AUX channel
+ *
+ * Used for remote aux channel in general. Merely initialize the crc work
+ * struct.
+ */
+void drm_dp_remote_aux_init(struct drm_dp_aux *aux)
+{
+	INIT_WORK(&aux->crc_work, drm_dp_aux_crc_work);
+}
+EXPORT_SYMBOL(drm_dp_remote_aux_init);
+
+/**
  * drm_dp_aux_init() - minimally initialise an aux channel
  * @aux: DisplayPort AUX channel
  *
@@ -1109,6 +1019,14 @@ EXPORT_SYMBOL(drm_dp_aux_init);
  * @aux: DisplayPort AUX channel
  *
  * Automatically calls drm_dp_aux_init() if this hasn't been done yet.
+ * This should only be called when the underlying &struct drm_connector is
+ * initialiazed already. Therefore the best place to call this is from
+ * &drm_connector_funcs.late_register. Not that drivers which don't follow this
+ * will Oops when CONFIG_DRM_DP_AUX_CHARDEV is enabled.
+ *
+ * Drivers which need to use the aux channel before that point (e.g. at driver
+ * load time, before drm_dev_register() has been called) need to call
+ * drm_dp_aux_init().
  *
  * Returns 0 on success or a negative error code on failure.
  */
@@ -1260,6 +1178,8 @@ static const struct dpcd_quirk dpcd_quirk_list[] = {
 	{ OUI(0x00, 0x10, 0xfa), DEVICE_ID_ANY, false, BIT(DP_DPCD_QUIRK_NO_PSR) },
 	/* CH7511 seems to leave SINK_COUNT zeroed */
 	{ OUI(0x00, 0x00, 0x00), DEVICE_ID('C', 'H', '7', '5', '1', '1'), false, BIT(DP_DPCD_QUIRK_NO_SINK_COUNT) },
+	/* Synaptics DP1.4 MST hubs can support DSC without virtual DPCD */
+	{ OUI(0x90, 0xCC, 0x24), DEVICE_ID_ANY, true, BIT(DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD) },
 };
 
 #undef OUI
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 82add736e17d..20cdaf3146b8 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -28,15 +28,22 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 
+#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
+#include <linux/stacktrace.h>
+#include <linux/sort.h>
+#include <linux/timekeeping.h>
+#include <linux/math64.h>
+#endif
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_mst_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fixed.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
 #include "drm_crtc_helper_internal.h"
+#include "drm_dp_mst_topology_internal.h"
 
 /**
  * DOC: dp mst helper
@@ -45,9 +52,14 @@
  * protocol. The helpers contain a topology manager and bandwidth manager.
  * The helpers encapsulate the sending and received of sideband msgs.
  */
+struct drm_dp_pending_up_req {
+	struct drm_dp_sideband_msg_hdr hdr;
+	struct drm_dp_sideband_msg_req_body msg;
+	struct list_head next;
+};
+
 static bool dump_dp_payload_table(struct drm_dp_mst_topology_mgr *mgr,
 				  char *buf);
-static int test_calc_pbn_mode(void);
 
 static void drm_dp_mst_topology_put_port(struct drm_dp_mst_port *port);
 
@@ -62,8 +74,13 @@ static int drm_dp_send_dpcd_write(struct drm_dp_mst_topology_mgr *mgr,
 				  struct drm_dp_mst_port *port,
 				  int offset, int size, u8 *bytes);
 
-static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
-				     struct drm_dp_mst_branch *mstb);
+static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
+				    struct drm_dp_mst_branch *mstb);
+
+static void
+drm_dp_send_clear_payload_id_table(struct drm_dp_mst_topology_mgr *mgr,
+				   struct drm_dp_mst_branch *mstb);
+
 static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr,
 					   struct drm_dp_mst_branch *mstb,
 					   struct drm_dp_mst_port *port);
@@ -74,6 +91,8 @@ static int drm_dp_mst_register_i2c_bus(struct drm_dp_aux *aux);
 static void drm_dp_mst_unregister_i2c_bus(struct drm_dp_aux *aux);
 static void drm_dp_mst_kick_tx(struct drm_dp_mst_topology_mgr *mgr);
 
+#define DBG_PREFIX "[dp_mst]"
+
 #define DP_STR(x) [DP_ ## x] = #x
 
 static const char *drm_dp_mst_req_type_str(u8 req_type)
@@ -130,6 +149,43 @@ static const char *drm_dp_mst_nak_reason_str(u8 nak_reason)
 }
 
 #undef DP_STR
+#define DP_STR(x) [DRM_DP_SIDEBAND_TX_ ## x] = #x
+
+static const char *drm_dp_mst_sideband_tx_state_str(int state)
+{
+	static const char * const sideband_reason_str[] = {
+		DP_STR(QUEUED),
+		DP_STR(START_SEND),
+		DP_STR(SENT),
+		DP_STR(RX),
+		DP_STR(TIMEOUT),
+	};
+
+	if (state >= ARRAY_SIZE(sideband_reason_str) ||
+	    !sideband_reason_str[state])
+		return "unknown";
+
+	return sideband_reason_str[state];
+}
+
+static int
+drm_dp_mst_rad_to_str(const u8 rad[8], u8 lct, char *out, size_t len)
+{
+	int i;
+	u8 unpacked_rad[16];
+
+	for (i = 0; i < lct; i++) {
+		if (i % 2)
+			unpacked_rad[i] = rad[i / 2] >> 4;
+		else
+			unpacked_rad[i] = rad[i / 2] & BIT_MASK(4);
+	}
+
+	/* TODO: Eventually add something to printk so we can format the rad
+	 * like this: 1.2.3
+	 */
+	return snprintf(out, len, "%*phC", lct, unpacked_rad);
+}
 
 /* sideband msg handling */
 static u8 drm_dp_msg_header_crc4(const uint8_t *data, size_t num_nibbles)
@@ -262,8 +318,9 @@ static bool drm_dp_decode_sideband_msg_hdr(struct drm_dp_sideband_msg_hdr *hdr,
 	return true;
 }
 
-static void drm_dp_encode_sideband_req(struct drm_dp_sideband_msg_req_body *req,
-				       struct drm_dp_sideband_msg_tx *raw)
+void
+drm_dp_encode_sideband_req(const struct drm_dp_sideband_msg_req_body *req,
+			   struct drm_dp_sideband_msg_tx *raw)
 {
 	int idx = 0;
 	int i;
@@ -272,6 +329,8 @@ static void drm_dp_encode_sideband_req(struct drm_dp_sideband_msg_req_body *req,
 
 	switch (req->req_type) {
 	case DP_ENUM_PATH_RESOURCES:
+	case DP_POWER_DOWN_PHY:
+	case DP_POWER_UP_PHY:
 		buf[idx] = (req->u.port_num.port_number & 0xf) << 4;
 		idx++;
 		break;
@@ -339,7 +398,7 @@ static void drm_dp_encode_sideband_req(struct drm_dp_sideband_msg_req_body *req,
 			memcpy(&buf[idx], req->u.i2c_read.transactions[i].bytes, req->u.i2c_read.transactions[i].num_bytes);
 			idx += req->u.i2c_read.transactions[i].num_bytes;
 
-			buf[idx] = (req->u.i2c_read.transactions[i].no_stop_bit & 0x1) << 5;
+			buf[idx] = (req->u.i2c_read.transactions[i].no_stop_bit & 0x1) << 4;
 			buf[idx] |= (req->u.i2c_read.transactions[i].i2c_transaction_delay & 0xf);
 			idx++;
 		}
@@ -359,14 +418,255 @@ static void drm_dp_encode_sideband_req(struct drm_dp_sideband_msg_req_body *req,
 		memcpy(&buf[idx], req->u.i2c_write.bytes, req->u.i2c_write.num_bytes);
 		idx += req->u.i2c_write.num_bytes;
 		break;
+	}
+	raw->cur_len = idx;
+}
+EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_dp_encode_sideband_req);
 
+/* Decode a sideband request we've encoded, mainly used for debugging */
+int
+drm_dp_decode_sideband_req(const struct drm_dp_sideband_msg_tx *raw,
+			   struct drm_dp_sideband_msg_req_body *req)
+{
+	const u8 *buf = raw->msg;
+	int i, idx = 0;
+
+	req->req_type = buf[idx++] & 0x7f;
+	switch (req->req_type) {
+	case DP_ENUM_PATH_RESOURCES:
 	case DP_POWER_DOWN_PHY:
 	case DP_POWER_UP_PHY:
-		buf[idx] = (req->u.port_num.port_number & 0xf) << 4;
-		idx++;
+		req->u.port_num.port_number = (buf[idx] >> 4) & 0xf;
+		break;
+	case DP_ALLOCATE_PAYLOAD:
+		{
+			struct drm_dp_allocate_payload *a =
+				&req->u.allocate_payload;
+
+			a->number_sdp_streams = buf[idx] & 0xf;
+			a->port_number = (buf[idx] >> 4) & 0xf;
+
+			WARN_ON(buf[++idx] & 0x80);
+			a->vcpi = buf[idx] & 0x7f;
+
+			a->pbn = buf[++idx] << 8;
+			a->pbn |= buf[++idx];
+
+			idx++;
+			for (i = 0; i < a->number_sdp_streams; i++) {
+				a->sdp_stream_sink[i] =
+					(buf[idx + (i / 2)] >> ((i % 2) ? 0 : 4)) & 0xf;
+			}
+		}
+		break;
+	case DP_QUERY_PAYLOAD:
+		req->u.query_payload.port_number = (buf[idx] >> 4) & 0xf;
+		WARN_ON(buf[++idx] & 0x80);
+		req->u.query_payload.vcpi = buf[idx] & 0x7f;
+		break;
+	case DP_REMOTE_DPCD_READ:
+		{
+			struct drm_dp_remote_dpcd_read *r = &req->u.dpcd_read;
+
+			r->port_number = (buf[idx] >> 4) & 0xf;
+
+			r->dpcd_address = (buf[idx] << 16) & 0xf0000;
+			r->dpcd_address |= (buf[++idx] << 8) & 0xff00;
+			r->dpcd_address |= buf[++idx] & 0xff;
+
+			r->num_bytes = buf[++idx];
+		}
+		break;
+	case DP_REMOTE_DPCD_WRITE:
+		{
+			struct drm_dp_remote_dpcd_write *w =
+				&req->u.dpcd_write;
+
+			w->port_number = (buf[idx] >> 4) & 0xf;
+
+			w->dpcd_address = (buf[idx] << 16) & 0xf0000;
+			w->dpcd_address |= (buf[++idx] << 8) & 0xff00;
+			w->dpcd_address |= buf[++idx] & 0xff;
+
+			w->num_bytes = buf[++idx];
+
+			w->bytes = kmemdup(&buf[++idx], w->num_bytes,
+					   GFP_KERNEL);
+			if (!w->bytes)
+				return -ENOMEM;
+		}
+		break;
+	case DP_REMOTE_I2C_READ:
+		{
+			struct drm_dp_remote_i2c_read *r = &req->u.i2c_read;
+			struct drm_dp_remote_i2c_read_tx *tx;
+			bool failed = false;
+
+			r->num_transactions = buf[idx] & 0x3;
+			r->port_number = (buf[idx] >> 4) & 0xf;
+			for (i = 0; i < r->num_transactions; i++) {
+				tx = &r->transactions[i];
+
+				tx->i2c_dev_id = buf[++idx] & 0x7f;
+				tx->num_bytes = buf[++idx];
+				tx->bytes = kmemdup(&buf[++idx],
+						    tx->num_bytes,
+						    GFP_KERNEL);
+				if (!tx->bytes) {
+					failed = true;
+					break;
+				}
+				idx += tx->num_bytes;
+				tx->no_stop_bit = (buf[idx] >> 5) & 0x1;
+				tx->i2c_transaction_delay = buf[idx] & 0xf;
+			}
+
+			if (failed) {
+				for (i = 0; i < r->num_transactions; i++) {
+					tx = &r->transactions[i];
+					kfree(tx->bytes);
+				}
+				return -ENOMEM;
+			}
+
+			r->read_i2c_device_id = buf[++idx] & 0x7f;
+			r->num_bytes_read = buf[++idx];
+		}
+		break;
+	case DP_REMOTE_I2C_WRITE:
+		{
+			struct drm_dp_remote_i2c_write *w = &req->u.i2c_write;
+
+			w->port_number = (buf[idx] >> 4) & 0xf;
+			w->write_i2c_device_id = buf[++idx] & 0x7f;
+			w->num_bytes = buf[++idx];
+			w->bytes = kmemdup(&buf[++idx], w->num_bytes,
+					   GFP_KERNEL);
+			if (!w->bytes)
+				return -ENOMEM;
+		}
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_dp_decode_sideband_req);
+
+void
+drm_dp_dump_sideband_msg_req_body(const struct drm_dp_sideband_msg_req_body *req,
+				  int indent, struct drm_printer *printer)
+{
+	int i;
+
+#define P(f, ...) drm_printf_indent(printer, indent, f, ##__VA_ARGS__)
+	if (req->req_type == DP_LINK_ADDRESS) {
+		/* No contents to print */
+		P("type=%s\n", drm_dp_mst_req_type_str(req->req_type));
+		return;
+	}
+
+	P("type=%s contents:\n", drm_dp_mst_req_type_str(req->req_type));
+	indent++;
+
+	switch (req->req_type) {
+	case DP_ENUM_PATH_RESOURCES:
+	case DP_POWER_DOWN_PHY:
+	case DP_POWER_UP_PHY:
+		P("port=%d\n", req->u.port_num.port_number);
+		break;
+	case DP_ALLOCATE_PAYLOAD:
+		P("port=%d vcpi=%d pbn=%d sdp_streams=%d %*ph\n",
+		  req->u.allocate_payload.port_number,
+		  req->u.allocate_payload.vcpi, req->u.allocate_payload.pbn,
+		  req->u.allocate_payload.number_sdp_streams,
+		  req->u.allocate_payload.number_sdp_streams,
+		  req->u.allocate_payload.sdp_stream_sink);
+		break;
+	case DP_QUERY_PAYLOAD:
+		P("port=%d vcpi=%d\n",
+		  req->u.query_payload.port_number,
+		  req->u.query_payload.vcpi);
+		break;
+	case DP_REMOTE_DPCD_READ:
+		P("port=%d dpcd_addr=%05x len=%d\n",
+		  req->u.dpcd_read.port_number, req->u.dpcd_read.dpcd_address,
+		  req->u.dpcd_read.num_bytes);
+		break;
+	case DP_REMOTE_DPCD_WRITE:
+		P("port=%d addr=%05x len=%d: %*ph\n",
+		  req->u.dpcd_write.port_number,
+		  req->u.dpcd_write.dpcd_address,
+		  req->u.dpcd_write.num_bytes, req->u.dpcd_write.num_bytes,
+		  req->u.dpcd_write.bytes);
+		break;
+	case DP_REMOTE_I2C_READ:
+		P("port=%d num_tx=%d id=%d size=%d:\n",
+		  req->u.i2c_read.port_number,
+		  req->u.i2c_read.num_transactions,
+		  req->u.i2c_read.read_i2c_device_id,
+		  req->u.i2c_read.num_bytes_read);
+
+		indent++;
+		for (i = 0; i < req->u.i2c_read.num_transactions; i++) {
+			const struct drm_dp_remote_i2c_read_tx *rtx =
+				&req->u.i2c_read.transactions[i];
+
+			P("%d: id=%03d size=%03d no_stop_bit=%d tx_delay=%03d: %*ph\n",
+			  i, rtx->i2c_dev_id, rtx->num_bytes,
+			  rtx->no_stop_bit, rtx->i2c_transaction_delay,
+			  rtx->num_bytes, rtx->bytes);
+		}
+		break;
+	case DP_REMOTE_I2C_WRITE:
+		P("port=%d id=%d size=%d: %*ph\n",
+		  req->u.i2c_write.port_number,
+		  req->u.i2c_write.write_i2c_device_id,
+		  req->u.i2c_write.num_bytes, req->u.i2c_write.num_bytes,
+		  req->u.i2c_write.bytes);
+		break;
+	default:
+		P("???\n");
+		break;
+	}
+#undef P
+}
+EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_dp_dump_sideband_msg_req_body);
+
+static inline void
+drm_dp_mst_dump_sideband_msg_tx(struct drm_printer *p,
+				const struct drm_dp_sideband_msg_tx *txmsg)
+{
+	struct drm_dp_sideband_msg_req_body req;
+	char buf[64];
+	int ret;
+	int i;
+
+	drm_dp_mst_rad_to_str(txmsg->dst->rad, txmsg->dst->lct, buf,
+			      sizeof(buf));
+	drm_printf(p, "txmsg cur_offset=%x cur_len=%x seqno=%x state=%s path_msg=%d dst=%s\n",
+		   txmsg->cur_offset, txmsg->cur_len, txmsg->seqno,
+		   drm_dp_mst_sideband_tx_state_str(txmsg->state),
+		   txmsg->path_msg, buf);
+
+	ret = drm_dp_decode_sideband_req(txmsg, &req);
+	if (ret) {
+		drm_printf(p, "<failed to decode sideband req: %d>\n", ret);
+		return;
+	}
+	drm_dp_dump_sideband_msg_req_body(&req, 1, p);
+
+	switch (req.req_type) {
+	case DP_REMOTE_DPCD_WRITE:
+		kfree(req.u.dpcd_write.bytes);
+		break;
+	case DP_REMOTE_I2C_READ:
+		for (i = 0; i < req.u.i2c_read.num_transactions; i++)
+			kfree(req.u.i2c_read.transactions[i].bytes);
+		break;
+	case DP_REMOTE_I2C_WRITE:
+		kfree(req.u.i2c_write.bytes);
 		break;
 	}
-	raw->cur_len = idx;
 }
 
 static void drm_dp_crc_sideband_chunk_req(u8 *msg, u8 len)
@@ -553,6 +853,7 @@ static bool drm_dp_sideband_parse_enum_path_resources_ack(struct drm_dp_sideband
 {
 	int idx = 1;
 	repmsg->u.path_resources.port_number = (raw->msg[idx] >> 4) & 0xf;
+	repmsg->u.path_resources.fec_capable = raw->msg[idx] & 0x1;
 	idx++;
 	if (idx > raw->curlen)
 		goto fail_len;
@@ -657,6 +958,8 @@ static bool drm_dp_sideband_parse_reply(struct drm_dp_sideband_msg_rx *raw,
 	case DP_POWER_DOWN_PHY:
 	case DP_POWER_UP_PHY:
 		return drm_dp_sideband_parse_power_updown_phy_ack(raw, msg);
+	case DP_CLEAR_PAYLOAD_ID_TABLE:
+		return true; /* since there's nothing to parse */
 	default:
 		DRM_ERROR("Got unknown reply 0x%02x (%s)\n", msg->req_type,
 			  drm_dp_mst_req_type_str(msg->req_type));
@@ -755,6 +1058,15 @@ static int build_link_address(struct drm_dp_sideband_msg_tx *msg)
 	return 0;
 }
 
+static int build_clear_payload_id_table(struct drm_dp_sideband_msg_tx *msg)
+{
+	struct drm_dp_sideband_msg_req_body req;
+
+	req.req_type = DP_CLEAR_PAYLOAD_ID_TABLE;
+	drm_dp_encode_sideband_req(&req, msg);
+	return 0;
+}
+
 static int build_enum_path_resources(struct drm_dp_sideband_msg_tx *msg, int port_num)
 {
 	struct drm_dp_sideband_msg_req_body req;
@@ -842,11 +1154,11 @@ static void drm_dp_mst_put_payload_id(struct drm_dp_mst_topology_mgr *mgr,
 	clear_bit(vcpi - 1, &mgr->vcpi_mask);
 
 	for (i = 0; i < mgr->max_payloads; i++) {
-		if (mgr->proposed_vcpis[i])
-			if (mgr->proposed_vcpis[i]->vcpi == vcpi) {
-				mgr->proposed_vcpis[i] = NULL;
-				clear_bit(i + 1, &mgr->payload_mask);
-			}
+		if (mgr->proposed_vcpis[i] &&
+		    mgr->proposed_vcpis[i]->vcpi == vcpi) {
+			mgr->proposed_vcpis[i] = NULL;
+			clear_bit(i + 1, &mgr->payload_mask);
+		}
 	}
 	mutex_unlock(&mgr->payload_lock);
 }
@@ -897,10 +1209,18 @@ static int drm_dp_mst_wait_tx_reply(struct drm_dp_mst_branch *mstb,
 		    txmsg->state == DRM_DP_SIDEBAND_TX_SENT) {
 			mstb->tx_slots[txmsg->seqno] = NULL;
 		}
+		mgr->is_waiting_for_dwn_reply = false;
+
 	}
 out:
+	if (unlikely(ret == -EIO) && drm_debug_enabled(DRM_UT_DP)) {
+		struct drm_printer p = drm_debug_printer(DBG_PREFIX);
+
+		drm_dp_mst_dump_sideband_msg_tx(&p, txmsg);
+	}
 	mutex_unlock(&mgr->qlock);
 
+	drm_dp_mst_kick_tx(mgr);
 	return ret;
 }
 
@@ -1108,39 +1428,194 @@ drm_dp_mst_put_port_malloc(struct drm_dp_mst_port *port)
 }
 EXPORT_SYMBOL(drm_dp_mst_put_port_malloc);
 
-static void drm_dp_destroy_mst_branch_device(struct kref *kref)
+#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
+
+#define STACK_DEPTH 8
+
+static noinline void
+__topology_ref_save(struct drm_dp_mst_topology_mgr *mgr,
+		    struct drm_dp_mst_topology_ref_history *history,
+		    enum drm_dp_mst_topology_ref_type type)
 {
-	struct drm_dp_mst_branch *mstb =
-		container_of(kref, struct drm_dp_mst_branch, topology_kref);
-	struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
-	struct drm_dp_mst_port *port, *tmp;
-	bool wake_tx = false;
+	struct drm_dp_mst_topology_ref_entry *entry = NULL;
+	depot_stack_handle_t backtrace;
+	ulong stack_entries[STACK_DEPTH];
+	uint n;
+	int i;
 
-	mutex_lock(&mgr->lock);
-	list_for_each_entry_safe(port, tmp, &mstb->ports, next) {
-		list_del(&port->next);
-		drm_dp_mst_topology_put_port(port);
+	n = stack_trace_save(stack_entries, ARRAY_SIZE(stack_entries), 1);
+	backtrace = stack_depot_save(stack_entries, n, GFP_KERNEL);
+	if (!backtrace)
+		return;
+
+	/* Try to find an existing entry for this backtrace */
+	for (i = 0; i < history->len; i++) {
+		if (history->entries[i].backtrace == backtrace) {
+			entry = &history->entries[i];
+			break;
+		}
 	}
-	mutex_unlock(&mgr->lock);
 
-	/* drop any tx slots msg */
-	mutex_lock(&mstb->mgr->qlock);
-	if (mstb->tx_slots[0]) {
-		mstb->tx_slots[0]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
-		mstb->tx_slots[0] = NULL;
-		wake_tx = true;
+	/* Otherwise add one */
+	if (!entry) {
+		struct drm_dp_mst_topology_ref_entry *new;
+		int new_len = history->len + 1;
+
+		new = krealloc(history->entries, sizeof(*new) * new_len,
+			       GFP_KERNEL);
+		if (!new)
+			return;
+
+		entry = &new[history->len];
+		history->len = new_len;
+		history->entries = new;
+
+		entry->backtrace = backtrace;
+		entry->type = type;
+		entry->count = 0;
 	}
-	if (mstb->tx_slots[1]) {
-		mstb->tx_slots[1]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
-		mstb->tx_slots[1] = NULL;
-		wake_tx = true;
+	entry->count++;
+	entry->ts_nsec = ktime_get_ns();
+}
+
+static int
+topology_ref_history_cmp(const void *a, const void *b)
+{
+	const struct drm_dp_mst_topology_ref_entry *entry_a = a, *entry_b = b;
+
+	if (entry_a->ts_nsec > entry_b->ts_nsec)
+		return 1;
+	else if (entry_a->ts_nsec < entry_b->ts_nsec)
+		return -1;
+	else
+		return 0;
+}
+
+static inline const char *
+topology_ref_type_to_str(enum drm_dp_mst_topology_ref_type type)
+{
+	if (type == DRM_DP_MST_TOPOLOGY_REF_GET)
+		return "get";
+	else
+		return "put";
+}
+
+static void
+__dump_topology_ref_history(struct drm_dp_mst_topology_ref_history *history,
+			    void *ptr, const char *type_str)
+{
+	struct drm_printer p = drm_debug_printer(DBG_PREFIX);
+	char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	int i;
+
+	if (!buf)
+		return;
+
+	if (!history->len)
+		goto out;
+
+	/* First, sort the list so that it goes from oldest to newest
+	 * reference entry
+	 */
+	sort(history->entries, history->len, sizeof(*history->entries),
+	     topology_ref_history_cmp, NULL);
+
+	drm_printf(&p, "%s (%p) topology count reached 0, dumping history:\n",
+		   type_str, ptr);
+
+	for (i = 0; i < history->len; i++) {
+		const struct drm_dp_mst_topology_ref_entry *entry =
+			&history->entries[i];
+		ulong *entries;
+		uint nr_entries;
+		u64 ts_nsec = entry->ts_nsec;
+		u32 rem_nsec = do_div(ts_nsec, 1000000000);
+
+		nr_entries = stack_depot_fetch(entry->backtrace, &entries);
+		stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4);
+
+		drm_printf(&p, "  %d %ss (last at %5llu.%06u):\n%s",
+			   entry->count,
+			   topology_ref_type_to_str(entry->type),
+			   ts_nsec, rem_nsec / 1000, buf);
 	}
-	mutex_unlock(&mstb->mgr->qlock);
 
-	if (wake_tx)
-		wake_up_all(&mstb->mgr->tx_waitq);
+	/* Now free the history, since this is the only time we expose it */
+	kfree(history->entries);
+out:
+	kfree(buf);
+}
 
-	drm_dp_mst_put_mstb_malloc(mstb);
+static __always_inline void
+drm_dp_mst_dump_mstb_topology_history(struct drm_dp_mst_branch *mstb)
+{
+	__dump_topology_ref_history(&mstb->topology_ref_history, mstb,
+				    "MSTB");
+}
+
+static __always_inline void
+drm_dp_mst_dump_port_topology_history(struct drm_dp_mst_port *port)
+{
+	__dump_topology_ref_history(&port->topology_ref_history, port,
+				    "Port");
+}
+
+static __always_inline void
+save_mstb_topology_ref(struct drm_dp_mst_branch *mstb,
+		       enum drm_dp_mst_topology_ref_type type)
+{
+	__topology_ref_save(mstb->mgr, &mstb->topology_ref_history, type);
+}
+
+static __always_inline void
+save_port_topology_ref(struct drm_dp_mst_port *port,
+		       enum drm_dp_mst_topology_ref_type type)
+{
+	__topology_ref_save(port->mgr, &port->topology_ref_history, type);
+}
+
+static inline void
+topology_ref_history_lock(struct drm_dp_mst_topology_mgr *mgr)
+{
+	mutex_lock(&mgr->topology_ref_history_lock);
+}
+
+static inline void
+topology_ref_history_unlock(struct drm_dp_mst_topology_mgr *mgr)
+{
+	mutex_unlock(&mgr->topology_ref_history_lock);
+}
+#else
+static inline void
+topology_ref_history_lock(struct drm_dp_mst_topology_mgr *mgr) {}
+static inline void
+topology_ref_history_unlock(struct drm_dp_mst_topology_mgr *mgr) {}
+static inline void
+drm_dp_mst_dump_mstb_topology_history(struct drm_dp_mst_branch *mstb) {}
+static inline void
+drm_dp_mst_dump_port_topology_history(struct drm_dp_mst_port *port) {}
+#define save_mstb_topology_ref(mstb, type)
+#define save_port_topology_ref(port, type)
+#endif
+
+static void drm_dp_destroy_mst_branch_device(struct kref *kref)
+{
+	struct drm_dp_mst_branch *mstb =
+		container_of(kref, struct drm_dp_mst_branch, topology_kref);
+	struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
+
+	drm_dp_mst_dump_mstb_topology_history(mstb);
+
+	INIT_LIST_HEAD(&mstb->destroy_next);
+
+	/*
+	 * This can get called under mgr->mutex, so we need to perform the
+	 * actual destruction of the mstb in another worker
+	 */
+	mutex_lock(&mgr->delayed_destroy_lock);
+	list_add(&mstb->destroy_next, &mgr->destroy_branch_device_list);
+	mutex_unlock(&mgr->delayed_destroy_lock);
+	schedule_work(&mgr->delayed_destroy_work);
 }
 
 /**
@@ -1168,11 +1643,17 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref)
 static int __must_check
 drm_dp_mst_topology_try_get_mstb(struct drm_dp_mst_branch *mstb)
 {
-	int ret = kref_get_unless_zero(&mstb->topology_kref);
+	int ret;
 
-	if (ret)
-		DRM_DEBUG("mstb %p (%d)\n", mstb,
-			  kref_read(&mstb->topology_kref));
+	topology_ref_history_lock(mstb->mgr);
+	ret = kref_get_unless_zero(&mstb->topology_kref);
+	if (ret) {
+		DRM_DEBUG("mstb %p (%d)\n",
+			  mstb, kref_read(&mstb->topology_kref));
+		save_mstb_topology_ref(mstb, DRM_DP_MST_TOPOLOGY_REF_GET);
+	}
+
+	topology_ref_history_unlock(mstb->mgr);
 
 	return ret;
 }
@@ -1193,9 +1674,14 @@ drm_dp_mst_topology_try_get_mstb(struct drm_dp_mst_branch *mstb)
  */
 static void drm_dp_mst_topology_get_mstb(struct drm_dp_mst_branch *mstb)
 {
+	topology_ref_history_lock(mstb->mgr);
+
+	save_mstb_topology_ref(mstb, DRM_DP_MST_TOPOLOGY_REF_GET);
 	WARN_ON(kref_read(&mstb->topology_kref) == 0);
 	kref_get(&mstb->topology_kref);
 	DRM_DEBUG("mstb %p (%d)\n", mstb, kref_read(&mstb->topology_kref));
+
+	topology_ref_history_unlock(mstb->mgr);
 }
 
 /**
@@ -1213,27 +1699,14 @@ static void drm_dp_mst_topology_get_mstb(struct drm_dp_mst_branch *mstb)
 static void
 drm_dp_mst_topology_put_mstb(struct drm_dp_mst_branch *mstb)
 {
+	topology_ref_history_lock(mstb->mgr);
+
 	DRM_DEBUG("mstb %p (%d)\n",
 		  mstb, kref_read(&mstb->topology_kref) - 1);
-	kref_put(&mstb->topology_kref, drm_dp_destroy_mst_branch_device);
-}
-
-static void drm_dp_port_teardown_pdt(struct drm_dp_mst_port *port, int old_pdt)
-{
-	struct drm_dp_mst_branch *mstb;
+	save_mstb_topology_ref(mstb, DRM_DP_MST_TOPOLOGY_REF_PUT);
 
-	switch (old_pdt) {
-	case DP_PEER_DEVICE_DP_LEGACY_CONV:
-	case DP_PEER_DEVICE_SST_SINK:
-		/* remove i2c over sideband */
-		drm_dp_mst_unregister_i2c_bus(&port->aux);
-		break;
-	case DP_PEER_DEVICE_MST_BRANCHING:
-		mstb = port->mstb;
-		port->mstb = NULL;
-		drm_dp_mst_topology_put_mstb(mstb);
-		break;
-	}
+	topology_ref_history_unlock(mstb->mgr);
+	kref_put(&mstb->topology_kref, drm_dp_destroy_mst_branch_device);
 }
 
 static void drm_dp_destroy_port(struct kref *kref)
@@ -1242,31 +1715,24 @@ static void drm_dp_destroy_port(struct kref *kref)
 		container_of(kref, struct drm_dp_mst_port, topology_kref);
 	struct drm_dp_mst_topology_mgr *mgr = port->mgr;
 
-	if (!port->input) {
-		kfree(port->cached_edid);
+	drm_dp_mst_dump_port_topology_history(port);
 
-		/*
-		 * The only time we don't have a connector
-		 * on an output port is if the connector init
-		 * fails.
-		 */
-		if (port->connector) {
-			/* we can't destroy the connector here, as
-			 * we might be holding the mode_config.mutex
-			 * from an EDID retrieval */
-
-			mutex_lock(&mgr->destroy_connector_lock);
-			list_add(&port->next, &mgr->destroy_connector_list);
-			mutex_unlock(&mgr->destroy_connector_lock);
-			schedule_work(&mgr->destroy_connector_work);
-			return;
-		}
-		/* no need to clean up vcpi
-		 * as if we have no connector we never setup a vcpi */
-		drm_dp_port_teardown_pdt(port, port->pdt);
-		port->pdt = DP_PEER_DEVICE_NONE;
+	/* There's nothing that needs locking to destroy an input port yet */
+	if (port->input) {
+		drm_dp_mst_put_port_malloc(port);
+		return;
 	}
-	drm_dp_mst_put_port_malloc(port);
+
+	kfree(port->cached_edid);
+
+	/*
+	 * we can't destroy the connector here, as we might be holding the
+	 * mode_config.mutex from an EDID retrieval
+	 */
+	mutex_lock(&mgr->delayed_destroy_lock);
+	list_add(&port->next, &mgr->destroy_port_list);
+	mutex_unlock(&mgr->delayed_destroy_lock);
+	schedule_work(&mgr->delayed_destroy_work);
 }
 
 /**
@@ -1294,12 +1760,17 @@ static void drm_dp_destroy_port(struct kref *kref)
 static int __must_check
 drm_dp_mst_topology_try_get_port(struct drm_dp_mst_port *port)
 {
-	int ret = kref_get_unless_zero(&port->topology_kref);
+	int ret;
 
-	if (ret)
-		DRM_DEBUG("port %p (%d)\n", port,
-			  kref_read(&port->topology_kref));
+	topology_ref_history_lock(port->mgr);
+	ret = kref_get_unless_zero(&port->topology_kref);
+	if (ret) {
+		DRM_DEBUG("port %p (%d)\n",
+			  port, kref_read(&port->topology_kref));
+		save_port_topology_ref(port, DRM_DP_MST_TOPOLOGY_REF_GET);
+	}
 
+	topology_ref_history_unlock(port->mgr);
 	return ret;
 }
 
@@ -1318,9 +1789,14 @@ drm_dp_mst_topology_try_get_port(struct drm_dp_mst_port *port)
  */
 static void drm_dp_mst_topology_get_port(struct drm_dp_mst_port *port)
 {
+	topology_ref_history_lock(port->mgr);
+
 	WARN_ON(kref_read(&port->topology_kref) == 0);
 	kref_get(&port->topology_kref);
 	DRM_DEBUG("port %p (%d)\n", port, kref_read(&port->topology_kref));
+	save_port_topology_ref(port, DRM_DP_MST_TOPOLOGY_REF_GET);
+
+	topology_ref_history_unlock(port->mgr);
 }
 
 /**
@@ -1336,8 +1812,13 @@ static void drm_dp_mst_topology_get_port(struct drm_dp_mst_port *port)
  */
 static void drm_dp_mst_topology_put_port(struct drm_dp_mst_port *port)
 {
+	topology_ref_history_lock(port->mgr);
+
 	DRM_DEBUG("port %p (%d)\n",
 		  port, kref_read(&port->topology_kref) - 1);
+	save_port_topology_ref(port, DRM_DP_MST_TOPOLOGY_REF_PUT);
+
+	topology_ref_history_unlock(port->mgr);
 	kref_put(&port->topology_kref, drm_dp_destroy_port);
 }
 
@@ -1454,38 +1935,96 @@ static u8 drm_dp_calculate_rad(struct drm_dp_mst_port *port,
 	return parent_lct + 1;
 }
 
-/*
- * return sends link address for new mstb
- */
-static bool drm_dp_port_setup_pdt(struct drm_dp_mst_port *port)
+static bool drm_dp_mst_is_dp_mst_end_device(u8 pdt, bool mcs)
 {
-	int ret;
-	u8 rad[6], lct;
-	bool send_link = false;
-	switch (port->pdt) {
+	switch (pdt) {
 	case DP_PEER_DEVICE_DP_LEGACY_CONV:
 	case DP_PEER_DEVICE_SST_SINK:
-		/* add i2c over sideband */
-		ret = drm_dp_mst_register_i2c_bus(&port->aux);
-		break;
+		return true;
 	case DP_PEER_DEVICE_MST_BRANCHING:
-		lct = drm_dp_calculate_rad(port, rad);
+		/* For sst branch device */
+		if (!mcs)
+			return true;
+
+		return false;
+	}
+	return true;
+}
+
+static int
+drm_dp_port_set_pdt(struct drm_dp_mst_port *port, u8 new_pdt,
+		    bool new_mcs)
+{
+	struct drm_dp_mst_topology_mgr *mgr = port->mgr;
+	struct drm_dp_mst_branch *mstb;
+	u8 rad[8], lct;
+	int ret = 0;
+
+	if (port->pdt == new_pdt && port->mcs == new_mcs)
+		return 0;
+
+	/* Teardown the old pdt, if there is one */
+	if (port->pdt != DP_PEER_DEVICE_NONE) {
+		if (drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) {
+			/*
+			 * If the new PDT would also have an i2c bus,
+			 * don't bother with reregistering it
+			 */
+			if (new_pdt != DP_PEER_DEVICE_NONE &&
+			    drm_dp_mst_is_dp_mst_end_device(new_pdt, new_mcs)) {
+				port->pdt = new_pdt;
+				port->mcs = new_mcs;
+				return 0;
+			}
+
+			/* remove i2c over sideband */
+			drm_dp_mst_unregister_i2c_bus(&port->aux);
+		} else {
+			mutex_lock(&mgr->lock);
+			drm_dp_mst_topology_put_mstb(port->mstb);
+			port->mstb = NULL;
+			mutex_unlock(&mgr->lock);
+		}
+	}
+
+	port->pdt = new_pdt;
+	port->mcs = new_mcs;
+
+	if (port->pdt != DP_PEER_DEVICE_NONE) {
+		if (drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) {
+			/* add i2c over sideband */
+			ret = drm_dp_mst_register_i2c_bus(&port->aux);
+		} else {
+			lct = drm_dp_calculate_rad(port, rad);
+			mstb = drm_dp_add_mst_branch_device(lct, rad);
+			if (!mstb) {
+				ret = -ENOMEM;
+				DRM_ERROR("Failed to create MSTB for port %p",
+					  port);
+				goto out;
+			}
+
+			mutex_lock(&mgr->lock);
+			port->mstb = mstb;
+			mstb->mgr = port->mgr;
+			mstb->port_parent = port;
 
-		port->mstb = drm_dp_add_mst_branch_device(lct, rad);
-		if (port->mstb) {
-			port->mstb->mgr = port->mgr;
-			port->mstb->port_parent = port;
 			/*
 			 * Make sure this port's memory allocation stays
 			 * around until its child MSTB releases it
 			 */
 			drm_dp_mst_get_port_malloc(port);
+			mutex_unlock(&mgr->lock);
 
-			send_link = true;
+			/* And make sure we send a link address for this */
+			ret = 1;
 		}
-		break;
 	}
-	return send_link;
+
+out:
+	if (ret < 0)
+		port->pdt = DP_PEER_DEVICE_NONE;
+	return ret;
 }
 
 /**
@@ -1617,45 +2156,136 @@ void drm_dp_mst_connector_early_unregister(struct drm_connector *connector,
 }
 EXPORT_SYMBOL(drm_dp_mst_connector_early_unregister);
 
-static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
-			    struct drm_device *dev,
-			    struct drm_dp_link_addr_reply_port *port_msg)
+static void
+drm_dp_mst_port_add_connector(struct drm_dp_mst_branch *mstb,
+			      struct drm_dp_mst_port *port)
 {
+	struct drm_dp_mst_topology_mgr *mgr = port->mgr;
+	char proppath[255];
+	int ret;
+
+	build_mst_prop_path(mstb, port->port_num, proppath, sizeof(proppath));
+	port->connector = mgr->cbs->add_connector(mgr, port, proppath);
+	if (!port->connector) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	if (port->pdt != DP_PEER_DEVICE_NONE &&
+	    drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) {
+		port->cached_edid = drm_get_edid(port->connector,
+						 &port->aux.ddc);
+		drm_connector_set_tile_property(port->connector);
+	}
+
+	mgr->cbs->register_connector(port->connector);
+	return;
+
+error:
+	DRM_ERROR("Failed to create connector for port %p: %d\n", port, ret);
+}
+
+/*
+ * Drop a topology reference, and unlink the port from the in-memory topology
+ * layout
+ */
+static void
+drm_dp_mst_topology_unlink_port(struct drm_dp_mst_topology_mgr *mgr,
+				struct drm_dp_mst_port *port)
+{
+	mutex_lock(&mgr->lock);
+	port->parent->num_ports--;
+	list_del(&port->next);
+	mutex_unlock(&mgr->lock);
+	drm_dp_mst_topology_put_port(port);
+}
+
+static struct drm_dp_mst_port *
+drm_dp_mst_add_port(struct drm_device *dev,
+		    struct drm_dp_mst_topology_mgr *mgr,
+		    struct drm_dp_mst_branch *mstb, u8 port_number)
+{
+	struct drm_dp_mst_port *port = kzalloc(sizeof(*port), GFP_KERNEL);
+
+	if (!port)
+		return NULL;
+
+	kref_init(&port->topology_kref);
+	kref_init(&port->malloc_kref);
+	port->parent = mstb;
+	port->port_num = port_number;
+	port->mgr = mgr;
+	port->aux.name = "DPMST";
+	port->aux.dev = dev->dev;
+	port->aux.is_remote = true;
+
+	/* initialize the MST downstream port's AUX crc work queue */
+	drm_dp_remote_aux_init(&port->aux);
+
+	/*
+	 * Make sure the memory allocation for our parent branch stays
+	 * around until our own memory allocation is released
+	 */
+	drm_dp_mst_get_mstb_malloc(mstb);
+
+	return port;
+}
+
+static int
+drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb,
+				    struct drm_device *dev,
+				    struct drm_dp_link_addr_reply_port *port_msg)
+{
+	struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
 	struct drm_dp_mst_port *port;
-	bool ret;
-	bool created = false;
-	int old_pdt = 0;
-	int old_ddps = 0;
+	int old_ddps = 0, ret;
+	u8 new_pdt = DP_PEER_DEVICE_NONE;
+	bool new_mcs = 0;
+	bool created = false, send_link_addr = false, changed = false;
 
 	port = drm_dp_get_port(mstb, port_msg->port_number);
 	if (!port) {
-		port = kzalloc(sizeof(*port), GFP_KERNEL);
+		port = drm_dp_mst_add_port(dev, mgr, mstb,
+					   port_msg->port_number);
 		if (!port)
-			return;
-		kref_init(&port->topology_kref);
-		kref_init(&port->malloc_kref);
-		port->parent = mstb;
-		port->port_num = port_msg->port_number;
-		port->mgr = mstb->mgr;
-		port->aux.name = "DPMST";
-		port->aux.dev = dev->dev;
-		port->aux.is_remote = true;
-
-		/*
-		 * Make sure the memory allocation for our parent branch stays
-		 * around until our own memory allocation is released
+			return -ENOMEM;
+		created = true;
+		changed = true;
+	} else if (!port->input && port_msg->input_port && port->connector) {
+		/* Since port->connector can't be changed here, we create a
+		 * new port if input_port changes from 0 to 1
 		 */
-		drm_dp_mst_get_mstb_malloc(mstb);
-
+		drm_dp_mst_topology_unlink_port(mgr, port);
+		drm_dp_mst_topology_put_port(port);
+		port = drm_dp_mst_add_port(dev, mgr, mstb,
+					   port_msg->port_number);
+		if (!port)
+			return -ENOMEM;
+		changed = true;
 		created = true;
-	} else {
-		old_pdt = port->pdt;
+	} else if (port->input && !port_msg->input_port) {
+		changed = true;
+	} else if (port->connector) {
+		/* We're updating a port that's exposed to userspace, so do it
+		 * under lock
+		 */
+		drm_modeset_lock(&mgr->base.lock, NULL);
+
 		old_ddps = port->ddps;
+		changed = port->ddps != port_msg->ddps ||
+			(port->ddps &&
+			 (port->ldps != port_msg->legacy_device_plug_status ||
+			  port->dpcd_rev != port_msg->dpcd_revision ||
+			  port->mcs != port_msg->mcs ||
+			  port->pdt != port_msg->peer_device_type ||
+			  port->num_sdp_stream_sinks !=
+			  port_msg->num_sdp_stream_sinks));
 	}
 
-	port->pdt = port_msg->peer_device_type;
 	port->input = port_msg->input_port;
-	port->mcs = port_msg->mcs;
+	if (!port->input)
+		new_pdt = port_msg->peer_device_type;
+	new_mcs = port_msg->mcs;
 	port->ddps = port_msg->ddps;
 	port->ldps = port_msg->legacy_device_plug_status;
 	port->dpcd_rev = port_msg->dpcd_revision;
@@ -1665,78 +2295,108 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
 	/* manage mstb port lists with mgr lock - take a reference
 	   for this list */
 	if (created) {
-		mutex_lock(&mstb->mgr->lock);
+		mutex_lock(&mgr->lock);
 		drm_dp_mst_topology_get_port(port);
 		list_add(&port->next, &mstb->ports);
-		mutex_unlock(&mstb->mgr->lock);
+		mstb->num_ports++;
+		mutex_unlock(&mgr->lock);
 	}
 
 	if (old_ddps != port->ddps) {
 		if (port->ddps) {
 			if (!port->input) {
-				drm_dp_send_enum_path_resources(mstb->mgr,
-								mstb, port);
+				drm_dp_send_enum_path_resources(mgr, mstb,
+								port);
 			}
 		} else {
 			port->available_pbn = 0;
 		}
 	}
 
-	if (old_pdt != port->pdt && !port->input) {
-		drm_dp_port_teardown_pdt(port, old_pdt);
-
-		ret = drm_dp_port_setup_pdt(port);
-		if (ret == true)
-			drm_dp_send_link_address(mstb->mgr, port->mstb);
+	ret = drm_dp_port_set_pdt(port, new_pdt, new_mcs);
+	if (ret == 1) {
+		send_link_addr = true;
+	} else if (ret < 0) {
+		DRM_ERROR("Failed to change PDT on port %p: %d\n",
+			  port, ret);
+		goto fail;
 	}
 
-	if (created && !port->input) {
-		char proppath[255];
-
-		build_mst_prop_path(mstb, port->port_num, proppath,
-				    sizeof(proppath));
-		port->connector = (*mstb->mgr->cbs->add_connector)(mstb->mgr,
-								   port,
-								   proppath);
-		if (!port->connector) {
-			/* remove it from the port list */
-			mutex_lock(&mstb->mgr->lock);
-			list_del(&port->next);
-			mutex_unlock(&mstb->mgr->lock);
-			/* drop port list reference */
-			drm_dp_mst_topology_put_port(port);
-			goto out;
-		}
-		if ((port->pdt == DP_PEER_DEVICE_DP_LEGACY_CONV ||
-		     port->pdt == DP_PEER_DEVICE_SST_SINK) &&
-		    port->port_num >= DP_MST_LOGICAL_PORT_0) {
-			port->cached_edid = drm_get_edid(port->connector,
-							 &port->aux.ddc);
-			drm_connector_set_tile_property(port->connector);
-		}
-		(*mstb->mgr->cbs->register_connector)(port->connector);
+	/*
+	 * If this port wasn't just created, then we're reprobing because
+	 * we're coming out of suspend. In this case, always resend the link
+	 * address if there's an MSTB on this port
+	 */
+	if (!created && port->pdt == DP_PEER_DEVICE_MST_BRANCHING &&
+	    port->mcs)
+		send_link_addr = true;
+
+	if (port->connector)
+		drm_modeset_unlock(&mgr->base.lock);
+	else if (!port->input)
+		drm_dp_mst_port_add_connector(mstb, port);
+
+	if (send_link_addr && port->mstb) {
+		ret = drm_dp_send_link_address(mgr, port->mstb);
+		if (ret == 1) /* MSTB below us changed */
+			changed = true;
+		else if (ret < 0)
+			goto fail_put;
 	}
 
-out:
 	/* put reference to this port */
 	drm_dp_mst_topology_put_port(port);
+	return changed;
+
+fail:
+	drm_dp_mst_topology_unlink_port(mgr, port);
+	if (port->connector)
+		drm_modeset_unlock(&mgr->base.lock);
+fail_put:
+	drm_dp_mst_topology_put_port(port);
+	return ret;
 }
 
-static void drm_dp_update_port(struct drm_dp_mst_branch *mstb,
-			       struct drm_dp_connection_status_notify *conn_stat)
+static void
+drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb,
+			    struct drm_dp_connection_status_notify *conn_stat)
 {
+	struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
 	struct drm_dp_mst_port *port;
-	int old_pdt;
-	int old_ddps;
-	bool dowork = false;
+	int old_ddps, old_input, ret, i;
+	u8 new_pdt;
+	bool new_mcs;
+	bool dowork = false, create_connector = false;
+
 	port = drm_dp_get_port(mstb, conn_stat->port_number);
 	if (!port)
 		return;
 
+	if (port->connector) {
+		if (!port->input && conn_stat->input_port) {
+			/*
+			 * We can't remove a connector from an already exposed
+			 * port, so just throw the port out and make sure we
+			 * reprobe the link address of it's parent MSTB
+			 */
+			drm_dp_mst_topology_unlink_port(mgr, port);
+			mstb->link_address_sent = false;
+			dowork = true;
+			goto out;
+		}
+
+		/* Locking is only needed if the port's exposed to userspace */
+		drm_modeset_lock(&mgr->base.lock, NULL);
+	} else if (port->input && !conn_stat->input_port) {
+		create_connector = true;
+		/* Reprobe link address so we get num_sdp_streams */
+		mstb->link_address_sent = false;
+		dowork = true;
+	}
+
 	old_ddps = port->ddps;
-	old_pdt = port->pdt;
-	port->pdt = conn_stat->peer_device_type;
-	port->mcs = conn_stat->message_capability_status;
+	old_input = port->input;
+	port->input = conn_stat->input_port;
 	port->ldps = conn_stat->legacy_device_plug_status;
 	port->ddps = conn_stat->displayport_device_plug_status;
 
@@ -1747,17 +2407,49 @@ static void drm_dp_update_port(struct drm_dp_mst_branch *mstb,
 			port->available_pbn = 0;
 		}
 	}
-	if (old_pdt != port->pdt && !port->input) {
-		drm_dp_port_teardown_pdt(port, old_pdt);
 
-		if (drm_dp_port_setup_pdt(port))
-			dowork = true;
+	new_pdt = port->input ? DP_PEER_DEVICE_NONE : conn_stat->peer_device_type;
+	new_mcs = conn_stat->message_capability_status;
+	ret = drm_dp_port_set_pdt(port, new_pdt, new_mcs);
+	if (ret == 1) {
+		dowork = true;
+	} else if (ret < 0) {
+		DRM_ERROR("Failed to change PDT for port %p: %d\n",
+			  port, ret);
+		dowork = false;
+	}
+
+	if (!old_input && old_ddps != port->ddps && !port->ddps) {
+		for (i = 0; i < mgr->max_payloads; i++) {
+			struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i];
+			struct drm_dp_mst_port *port_validated;
+
+			if (!vcpi)
+				continue;
+
+			port_validated =
+				container_of(vcpi, struct drm_dp_mst_port, vcpi);
+			port_validated =
+				drm_dp_mst_topology_get_port_validated(mgr, port_validated);
+			if (!port_validated) {
+				mutex_lock(&mgr->payload_lock);
+				vcpi->num_slots = 0;
+				mutex_unlock(&mgr->payload_lock);
+			} else {
+				drm_dp_mst_topology_put_port(port_validated);
+			}
+		}
 	}
 
+	if (port->connector)
+		drm_modeset_unlock(&mgr->base.lock);
+	else if (create_connector)
+		drm_dp_mst_port_add_connector(mstb, port);
+
+out:
 	drm_dp_mst_topology_put_port(port);
 	if (dowork)
 		queue_work(system_long_wq, &mstb->mgr->work);
-
 }
 
 static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_topology_mgr *mgr,
@@ -1800,7 +2492,7 @@ out:
 
 static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper(
 	struct drm_dp_mst_branch *mstb,
-	uint8_t *guid)
+	const uint8_t *guid)
 {
 	struct drm_dp_mst_branch *found_mstb;
 	struct drm_dp_mst_port *port;
@@ -1824,7 +2516,7 @@ static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper(
 
 static struct drm_dp_mst_branch *
 drm_dp_get_mst_branch_device_by_guid(struct drm_dp_mst_topology_mgr *mgr,
-				     uint8_t *guid)
+				     const uint8_t *guid)
 {
 	struct drm_dp_mst_branch *mstb;
 	int ret;
@@ -1843,42 +2535,67 @@ drm_dp_get_mst_branch_device_by_guid(struct drm_dp_mst_topology_mgr *mgr,
 	return mstb;
 }
 
-static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
+static int drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
 					       struct drm_dp_mst_branch *mstb)
 {
 	struct drm_dp_mst_port *port;
-	struct drm_dp_mst_branch *mstb_child;
-	if (!mstb->link_address_sent)
-		drm_dp_send_link_address(mgr, mstb);
+	int ret;
+	bool changed = false;
+
+	if (!mstb->link_address_sent) {
+		ret = drm_dp_send_link_address(mgr, mstb);
+		if (ret == 1)
+			changed = true;
+		else if (ret < 0)
+			return ret;
+	}
 
 	list_for_each_entry(port, &mstb->ports, next) {
-		if (port->input)
-			continue;
+		struct drm_dp_mst_branch *mstb_child = NULL;
 
-		if (!port->ddps)
+		if (port->input || !port->ddps)
 			continue;
 
-		if (!port->available_pbn)
+		if (!port->available_pbn) {
+			drm_modeset_lock(&mgr->base.lock, NULL);
 			drm_dp_send_enum_path_resources(mgr, mstb, port);
+			drm_modeset_unlock(&mgr->base.lock);
+			changed = true;
+		}
 
-		if (port->mstb) {
+		if (port->mstb)
 			mstb_child = drm_dp_mst_topology_get_mstb_validated(
 			    mgr, port->mstb);
-			if (mstb_child) {
-				drm_dp_check_and_send_link_address(mgr, mstb_child);
-				drm_dp_mst_topology_put_mstb(mstb_child);
-			}
+
+		if (mstb_child) {
+			ret = drm_dp_check_and_send_link_address(mgr,
+								 mstb_child);
+			drm_dp_mst_topology_put_mstb(mstb_child);
+			if (ret == 1)
+				changed = true;
+			else if (ret < 0)
+				return ret;
 		}
 	}
+
+	return changed;
 }
 
 static void drm_dp_mst_link_probe_work(struct work_struct *work)
 {
-	struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, work);
+	struct drm_dp_mst_topology_mgr *mgr =
+		container_of(work, struct drm_dp_mst_topology_mgr, work);
+	struct drm_device *dev = mgr->dev;
 	struct drm_dp_mst_branch *mstb;
 	int ret;
+	bool clear_payload_id_table;
+
+	mutex_lock(&mgr->probe_lock);
 
 	mutex_lock(&mgr->lock);
+	clear_payload_id_table = !mgr->payload_id_table_cleared;
+	mgr->payload_id_table_cleared = true;
+
 	mstb = mgr->mst_primary;
 	if (mstb) {
 		ret = drm_dp_mst_topology_try_get_mstb(mstb);
@@ -1886,10 +2603,30 @@ static void drm_dp_mst_link_probe_work(struct work_struct *work)
 			mstb = NULL;
 	}
 	mutex_unlock(&mgr->lock);
-	if (mstb) {
-		drm_dp_check_and_send_link_address(mgr, mstb);
-		drm_dp_mst_topology_put_mstb(mstb);
+	if (!mstb) {
+		mutex_unlock(&mgr->probe_lock);
+		return;
 	}
+
+	/*
+	 * Certain branch devices seem to incorrectly report an available_pbn
+	 * of 0 on downstream sinks, even after clearing the
+	 * DP_PAYLOAD_ALLOCATE_* registers in
+	 * drm_dp_mst_topology_mgr_set_mst(). Namely, the CableMatters USB-C
+	 * 2x DP hub. Sending a CLEAR_PAYLOAD_ID_TABLE message seems to make
+	 * things work again.
+	 */
+	if (clear_payload_id_table) {
+		DRM_DEBUG_KMS("Clearing payload ID table\n");
+		drm_dp_send_clear_payload_id_table(mgr, mstb);
+	}
+
+	ret = drm_dp_check_and_send_link_address(mgr, mstb);
+	drm_dp_mst_topology_put_mstb(mstb);
+
+	mutex_unlock(&mgr->probe_lock);
+	if (ret)
+		drm_kms_helper_hotplug_event(dev);
 }
 
 static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr,
@@ -2035,8 +2772,11 @@ static int process_single_tx_qlock(struct drm_dp_mst_topology_mgr *mgr,
 	idx += tosend + 1;
 
 	ret = drm_dp_send_sideband_msg(mgr, up, chunk, idx);
-	if (ret) {
-		DRM_DEBUG_KMS("sideband msg failed to send\n");
+	if (unlikely(ret) && drm_debug_enabled(DRM_UT_DP)) {
+		struct drm_printer p = drm_debug_printer(DBG_PREFIX);
+
+		drm_printf(&p, "sideband msg failed to send\n");
+		drm_dp_mst_dump_sideband_msg_tx(&p, txmsg);
 		return ret;
 	}
 
@@ -2063,9 +2803,11 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr)
 	ret = process_single_tx_qlock(mgr, txmsg, false);
 	if (ret == 1) {
 		/* txmsg is sent it should be in the slots now */
+		mgr->is_waiting_for_dwn_reply = true;
 		list_del(&txmsg->next);
 	} else if (ret) {
 		DRM_DEBUG_KMS("failed to send msg in q %d\n", ret);
+		mgr->is_waiting_for_dwn_reply = false;
 		list_del(&txmsg->next);
 		if (txmsg->seqno != -1)
 			txmsg->dst->tx_slots[txmsg->seqno] = NULL;
@@ -2098,21 +2840,53 @@ static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr,
 {
 	mutex_lock(&mgr->qlock);
 	list_add_tail(&txmsg->next, &mgr->tx_msg_downq);
-	if (list_is_singular(&mgr->tx_msg_downq))
+
+	if (drm_debug_enabled(DRM_UT_DP)) {
+		struct drm_printer p = drm_debug_printer(DBG_PREFIX);
+
+		drm_dp_mst_dump_sideband_msg_tx(&p, txmsg);
+	}
+
+	if (list_is_singular(&mgr->tx_msg_downq) &&
+	    !mgr->is_waiting_for_dwn_reply)
 		process_single_down_tx_qlock(mgr);
 	mutex_unlock(&mgr->qlock);
 }
 
-static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
+static void
+drm_dp_dump_link_address(struct drm_dp_link_address_ack_reply *reply)
+{
+	struct drm_dp_link_addr_reply_port *port_reply;
+	int i;
+
+	for (i = 0; i < reply->nports; i++) {
+		port_reply = &reply->ports[i];
+		DRM_DEBUG_KMS("port %d: input %d, pdt: %d, pn: %d, dpcd_rev: %02x, mcs: %d, ddps: %d, ldps %d, sdp %d/%d\n",
+			      i,
+			      port_reply->input_port,
+			      port_reply->peer_device_type,
+			      port_reply->port_number,
+			      port_reply->dpcd_revision,
+			      port_reply->mcs,
+			      port_reply->ddps,
+			      port_reply->legacy_device_plug_status,
+			      port_reply->num_sdp_streams,
+			      port_reply->num_sdp_stream_sinks);
+	}
+}
+
+static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
 				     struct drm_dp_mst_branch *mstb)
 {
-	int len;
 	struct drm_dp_sideband_msg_tx *txmsg;
-	int ret;
+	struct drm_dp_link_address_ack_reply *reply;
+	struct drm_dp_mst_port *port, *tmp;
+	int i, len, ret, port_mask = 0;
+	bool changed = false;
 
 	txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
 	if (!txmsg)
-		return;
+		return -ENOMEM;
 
 	txmsg->dst = mstb;
 	len = build_link_address(txmsg);
@@ -2120,48 +2894,89 @@ static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
 	mstb->link_address_sent = true;
 	drm_dp_queue_down_tx(mgr, txmsg);
 
+	/* FIXME: Actually do some real error handling here */
 	ret = drm_dp_mst_wait_tx_reply(mstb, txmsg);
-	if (ret > 0) {
-		int i;
+	if (ret <= 0) {
+		DRM_ERROR("Sending link address failed with %d\n", ret);
+		goto out;
+	}
+	if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK) {
+		DRM_ERROR("link address NAK received\n");
+		ret = -EIO;
+		goto out;
+	}
 
-		if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK) {
-			DRM_DEBUG_KMS("link address nak received\n");
-		} else {
-			DRM_DEBUG_KMS("link address reply: %d\n", txmsg->reply.u.link_addr.nports);
-			for (i = 0; i < txmsg->reply.u.link_addr.nports; i++) {
-				DRM_DEBUG_KMS("port %d: input %d, pdt: %d, pn: %d, dpcd_rev: %02x, mcs: %d, ddps: %d, ldps %d, sdp %d/%d\n", i,
-				       txmsg->reply.u.link_addr.ports[i].input_port,
-				       txmsg->reply.u.link_addr.ports[i].peer_device_type,
-				       txmsg->reply.u.link_addr.ports[i].port_number,
-				       txmsg->reply.u.link_addr.ports[i].dpcd_revision,
-				       txmsg->reply.u.link_addr.ports[i].mcs,
-				       txmsg->reply.u.link_addr.ports[i].ddps,
-				       txmsg->reply.u.link_addr.ports[i].legacy_device_plug_status,
-				       txmsg->reply.u.link_addr.ports[i].num_sdp_streams,
-				       txmsg->reply.u.link_addr.ports[i].num_sdp_stream_sinks);
-			}
+	reply = &txmsg->reply.u.link_addr;
+	DRM_DEBUG_KMS("link address reply: %d\n", reply->nports);
+	drm_dp_dump_link_address(reply);
 
-			drm_dp_check_mstb_guid(mstb, txmsg->reply.u.link_addr.guid);
+	drm_dp_check_mstb_guid(mstb, reply->guid);
 
-			for (i = 0; i < txmsg->reply.u.link_addr.nports; i++) {
-				drm_dp_add_port(mstb, mgr->dev, &txmsg->reply.u.link_addr.ports[i]);
-			}
-			drm_kms_helper_hotplug_event(mgr->dev);
-		}
-	} else {
-		mstb->link_address_sent = false;
-		DRM_DEBUG_KMS("link address failed %d\n", ret);
+	for (i = 0; i < reply->nports; i++) {
+		port_mask |= BIT(reply->ports[i].port_number);
+		ret = drm_dp_mst_handle_link_address_port(mstb, mgr->dev,
+							  &reply->ports[i]);
+		if (ret == 1)
+			changed = true;
+		else if (ret < 0)
+			goto out;
+	}
+
+	/* Prune any ports that are currently a part of mstb in our in-memory
+	 * topology, but were not seen in this link address. Usually this
+	 * means that they were removed while the topology was out of sync,
+	 * e.g. during suspend/resume
+	 */
+	mutex_lock(&mgr->lock);
+	list_for_each_entry_safe(port, tmp, &mstb->ports, next) {
+		if (port_mask & BIT(port->port_num))
+			continue;
+
+		DRM_DEBUG_KMS("port %d was not in link address, removing\n",
+			      port->port_num);
+		list_del(&port->next);
+		drm_dp_mst_topology_put_port(port);
+		changed = true;
 	}
+	mutex_unlock(&mgr->lock);
 
+out:
+	if (ret <= 0)
+		mstb->link_address_sent = false;
 	kfree(txmsg);
+	return ret < 0 ? ret : changed;
 }
 
-static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr,
-					   struct drm_dp_mst_branch *mstb,
-					   struct drm_dp_mst_port *port)
+void drm_dp_send_clear_payload_id_table(struct drm_dp_mst_topology_mgr *mgr,
+					struct drm_dp_mst_branch *mstb)
 {
-	int len;
 	struct drm_dp_sideband_msg_tx *txmsg;
+	int len, ret;
+
+	txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
+	if (!txmsg)
+		return;
+
+	txmsg->dst = mstb;
+	len = build_clear_payload_id_table(txmsg);
+
+	drm_dp_queue_down_tx(mgr, txmsg);
+
+	ret = drm_dp_mst_wait_tx_reply(mstb, txmsg);
+	if (ret > 0 && txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK)
+		DRM_DEBUG_KMS("clear payload table id nak received\n");
+
+	kfree(txmsg);
+}
+
+static int
+drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr,
+				struct drm_dp_mst_branch *mstb,
+				struct drm_dp_mst_port *port)
+{
+	struct drm_dp_enum_path_resources_ack_reply *path_res;
+	struct drm_dp_sideband_msg_tx *txmsg;
+	int len;
 	int ret;
 
 	txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
@@ -2175,14 +2990,21 @@ static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr,
 
 	ret = drm_dp_mst_wait_tx_reply(mstb, txmsg);
 	if (ret > 0) {
+		path_res = &txmsg->reply.u.path_resources;
+
 		if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK) {
 			DRM_DEBUG_KMS("enum path resources nak received\n");
 		} else {
-			if (port->port_num != txmsg->reply.u.path_resources.port_number)
+			if (port->port_num != path_res->port_number)
 				DRM_ERROR("got incorrect port in response\n");
-			DRM_DEBUG_KMS("enum path resources %d: %d %d\n", txmsg->reply.u.path_resources.port_number, txmsg->reply.u.path_resources.full_payload_bw_number,
-			       txmsg->reply.u.path_resources.avail_payload_bw_number);
-			port->available_pbn = txmsg->reply.u.path_resources.avail_payload_bw_number;
+
+			DRM_DEBUG_KMS("enum path resources %d: %d %d\n",
+				      path_res->port_number,
+				      path_res->full_payload_bw_number,
+				      path_res->avail_payload_bw_number);
+			port->available_pbn =
+				path_res->avail_payload_bw_number;
+			port->fec_capable = path_res->fec_capable;
 		}
 	}
 
@@ -2465,9 +3287,11 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
 			drm_dp_mst_topology_put_port(port);
 	}
 
-	for (i = 0; i < mgr->max_payloads; i++) {
-		if (mgr->payloads[i].payload_state != DP_PAYLOAD_DELETE_LOCAL)
+	for (i = 0; i < mgr->max_payloads; /* do nothing */) {
+		if (mgr->payloads[i].payload_state != DP_PAYLOAD_DELETE_LOCAL) {
+			i++;
 			continue;
+		}
 
 		DRM_DEBUG_KMS("removing payload %d\n", i);
 		for (j = i; j < mgr->max_payloads - 1; j++) {
@@ -2655,30 +3479,13 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr,
 	return 0;
 }
 
-static bool drm_dp_get_vc_payload_bw(int dp_link_bw,
-				     int dp_link_count,
-				     int *out)
+static int drm_dp_get_vc_payload_bw(u8 dp_link_bw, u8  dp_link_count)
 {
-	switch (dp_link_bw) {
-	default:
+	if (dp_link_bw == 0 || dp_link_count == 0)
 		DRM_DEBUG_KMS("invalid link bandwidth in DPCD: %x (link count: %d)\n",
 			      dp_link_bw, dp_link_count);
-		return false;
 
-	case DP_LINK_BW_1_62:
-		*out = 3 * dp_link_count;
-		break;
-	case DP_LINK_BW_2_7:
-		*out = 5 * dp_link_count;
-		break;
-	case DP_LINK_BW_5_4:
-		*out = 10 * dp_link_count;
-		break;
-	case DP_LINK_BW_8_1:
-		*out = 15 * dp_link_count;
-		break;
-	}
-	return true;
+	return dp_link_bw * dp_link_count / 2;
 }
 
 /**
@@ -2692,6 +3499,7 @@ static bool drm_dp_get_vc_payload_bw(int dp_link_bw,
 int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool mst_state)
 {
 	int ret = 0;
+	int i = 0;
 	struct drm_dp_mst_branch *mstb = NULL;
 
 	mutex_lock(&mgr->lock);
@@ -2710,9 +3518,9 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
 			goto out_unlock;
 		}
 
-		if (!drm_dp_get_vc_payload_bw(mgr->dpcd[1],
-					      mgr->dpcd[2] & DP_MAX_LANE_COUNT_MASK,
-					      &mgr->pbn_div)) {
+		mgr->pbn_div = drm_dp_get_vc_payload_bw(mgr->dpcd[1],
+							mgr->dpcd[2] & DP_MAX_LANE_COUNT_MASK);
+		if (mgr->pbn_div == 0) {
 			ret = -EINVAL;
 			goto out_unlock;
 		}
@@ -2752,10 +3560,23 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
 		/* this can fail if the device is gone */
 		drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, 0);
 		ret = 0;
+		mutex_lock(&mgr->payload_lock);
 		memset(mgr->payloads, 0, mgr->max_payloads * sizeof(struct drm_dp_payload));
 		mgr->payload_mask = 0;
 		set_bit(0, &mgr->payload_mask);
+		for (i = 0; i < mgr->max_payloads; i++) {
+			struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i];
+
+			if (vcpi) {
+				vcpi->vcpi = 0;
+				vcpi->num_slots = 0;
+			}
+			mgr->proposed_vcpis[i] = NULL;
+		}
 		mgr->vcpi_mask = 0;
+		mutex_unlock(&mgr->payload_lock);
+
+		mgr->payload_id_table_cleared = false;
 	}
 
 out_unlock:
@@ -2767,6 +3588,23 @@ out_unlock:
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_set_mst);
 
+static void
+drm_dp_mst_topology_mgr_invalidate_mstb(struct drm_dp_mst_branch *mstb)
+{
+	struct drm_dp_mst_port *port;
+
+	/* The link address will need to be re-sent on resume */
+	mstb->link_address_sent = false;
+
+	list_for_each_entry(port, &mstb->ports, next) {
+		/* The PBN for each port will also need to be re-probed */
+		port->available_pbn = 0;
+
+		if (port->mstb)
+			drm_dp_mst_topology_mgr_invalidate_mstb(port->mstb);
+	}
+}
+
 /**
  * drm_dp_mst_topology_mgr_suspend() - suspend the MST manager
  * @mgr: manager to suspend
@@ -2780,62 +3618,89 @@ void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr)
 	drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
 			   DP_MST_EN | DP_UPSTREAM_IS_SRC);
 	mutex_unlock(&mgr->lock);
+	flush_work(&mgr->up_req_work);
 	flush_work(&mgr->work);
-	flush_work(&mgr->destroy_connector_work);
+	flush_work(&mgr->delayed_destroy_work);
+
+	mutex_lock(&mgr->lock);
+	if (mgr->mst_state && mgr->mst_primary)
+		drm_dp_mst_topology_mgr_invalidate_mstb(mgr->mst_primary);
+	mutex_unlock(&mgr->lock);
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend);
 
 /**
  * drm_dp_mst_topology_mgr_resume() - resume the MST manager
  * @mgr: manager to resume
+ * @sync: whether or not to perform topology reprobing synchronously
  *
  * This will fetch DPCD and see if the device is still there,
  * if it is, it will rewrite the MSTM control bits, and return.
  *
- * if the device fails this returns -1, and the driver should do
+ * If the device fails this returns -1, and the driver should do
  * a full MST reprobe, in case we were undocked.
+ *
+ * During system resume (where it is assumed that the driver will be calling
+ * drm_atomic_helper_resume()) this function should be called beforehand with
+ * @sync set to true. In contexts like runtime resume where the driver is not
+ * expected to be calling drm_atomic_helper_resume(), this function should be
+ * called with @sync set to false in order to avoid deadlocking.
+ *
+ * Returns: -1 if the MST topology was removed while we were suspended, 0
+ * otherwise.
  */
-int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr)
+int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr,
+				   bool sync)
 {
-	int ret = 0;
+	int ret;
+	u8 guid[16];
 
 	mutex_lock(&mgr->lock);
+	if (!mgr->mst_primary)
+		goto out_fail;
 
-	if (mgr->mst_primary) {
-		int sret;
-		u8 guid[16];
+	ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd,
+			       DP_RECEIVER_CAP_SIZE);
+	if (ret != DP_RECEIVER_CAP_SIZE) {
+		DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n");
+		goto out_fail;
+	}
 
-		sret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd, DP_RECEIVER_CAP_SIZE);
-		if (sret != DP_RECEIVER_CAP_SIZE) {
-			DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n");
-			ret = -1;
-			goto out_unlock;
-		}
+	ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
+				 DP_MST_EN |
+				 DP_UP_REQ_EN |
+				 DP_UPSTREAM_IS_SRC);
+	if (ret < 0) {
+		DRM_DEBUG_KMS("mst write failed - undocked during suspend?\n");
+		goto out_fail;
+	}
 
-		ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
-					 DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC);
-		if (ret < 0) {
-			DRM_DEBUG_KMS("mst write failed - undocked during suspend?\n");
-			ret = -1;
-			goto out_unlock;
-		}
+	/* Some hubs forget their guids after they resume */
+	ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
+	if (ret != 16) {
+		DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n");
+		goto out_fail;
+	}
+	drm_dp_check_mstb_guid(mgr->mst_primary, guid);
 
-		/* Some hubs forget their guids after they resume */
-		sret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
-		if (sret != 16) {
-			DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n");
-			ret = -1;
-			goto out_unlock;
-		}
-		drm_dp_check_mstb_guid(mgr->mst_primary, guid);
+	/*
+	 * For the final step of resuming the topology, we need to bring the
+	 * state of our in-memory topology back into sync with reality. So,
+	 * restart the probing process as if we're probing a new hub
+	 */
+	queue_work(system_long_wq, &mgr->work);
+	mutex_unlock(&mgr->lock);
 
-		ret = 0;
-	} else
-		ret = -1;
+	if (sync) {
+		DRM_DEBUG_KMS("Waiting for link probe work to finish re-syncing topology...\n");
+		flush_work(&mgr->work);
+	}
 
-out_unlock:
+	return 0;
+
+out_fail:
 	mutex_unlock(&mgr->lock);
-	return ret;
+	return -1;
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_resume);
 
@@ -2890,136 +3755,202 @@ static bool drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up)
 
 static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
 {
-	int ret = 0;
+	struct drm_dp_sideband_msg_tx *txmsg;
+	struct drm_dp_mst_branch *mstb;
+	struct drm_dp_sideband_msg_hdr *hdr = &mgr->down_rep_recv.initial_hdr;
+	int slot = -1;
+
+	if (!drm_dp_get_one_sb_msg(mgr, false))
+		goto clear_down_rep_recv;
 
-	if (!drm_dp_get_one_sb_msg(mgr, false)) {
-		memset(&mgr->down_rep_recv, 0,
-		       sizeof(struct drm_dp_sideband_msg_rx));
+	if (!mgr->down_rep_recv.have_eomt)
 		return 0;
+
+	mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad);
+	if (!mstb) {
+		DRM_DEBUG_KMS("Got MST reply from unknown device %d\n",
+			      hdr->lct);
+		goto clear_down_rep_recv;
 	}
 
-	if (mgr->down_rep_recv.have_eomt) {
-		struct drm_dp_sideband_msg_tx *txmsg;
-		struct drm_dp_mst_branch *mstb;
-		int slot = -1;
-		mstb = drm_dp_get_mst_branch_device(mgr,
-						    mgr->down_rep_recv.initial_hdr.lct,
-						    mgr->down_rep_recv.initial_hdr.rad);
+	/* find the message */
+	slot = hdr->seqno;
+	mutex_lock(&mgr->qlock);
+	txmsg = mstb->tx_slots[slot];
+	/* remove from slots */
+	mutex_unlock(&mgr->qlock);
 
-		if (!mstb) {
-			DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->down_rep_recv.initial_hdr.lct);
-			memset(&mgr->down_rep_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
-			return 0;
-		}
+	if (!txmsg) {
+		DRM_DEBUG_KMS("Got MST reply with no msg %p %d %d %02x %02x\n",
+			      mstb, hdr->seqno, hdr->lct, hdr->rad[0],
+			      mgr->down_rep_recv.msg[0]);
+		goto no_msg;
+	}
 
-		/* find the message */
-		slot = mgr->down_rep_recv.initial_hdr.seqno;
-		mutex_lock(&mgr->qlock);
-		txmsg = mstb->tx_slots[slot];
-		/* remove from slots */
-		mutex_unlock(&mgr->qlock);
-
-		if (!txmsg) {
-			DRM_DEBUG_KMS("Got MST reply with no msg %p %d %d %02x %02x\n",
-			       mstb,
-			       mgr->down_rep_recv.initial_hdr.seqno,
-			       mgr->down_rep_recv.initial_hdr.lct,
-				      mgr->down_rep_recv.initial_hdr.rad[0],
-				      mgr->down_rep_recv.msg[0]);
-			drm_dp_mst_topology_put_mstb(mstb);
-			memset(&mgr->down_rep_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
-			return 0;
-		}
+	drm_dp_sideband_parse_reply(&mgr->down_rep_recv, &txmsg->reply);
 
-		drm_dp_sideband_parse_reply(&mgr->down_rep_recv, &txmsg->reply);
+	if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK)
+		DRM_DEBUG_KMS("Got NAK reply: req 0x%02x (%s), reason 0x%02x (%s), nak data 0x%02x\n",
+			      txmsg->reply.req_type,
+			      drm_dp_mst_req_type_str(txmsg->reply.req_type),
+			      txmsg->reply.u.nak.reason,
+			      drm_dp_mst_nak_reason_str(txmsg->reply.u.nak.reason),
+			      txmsg->reply.u.nak.nak_data);
 
-		if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK)
-			DRM_DEBUG_KMS("Got NAK reply: req 0x%02x (%s), reason 0x%02x (%s), nak data 0x%02x\n",
-				      txmsg->reply.req_type,
-				      drm_dp_mst_req_type_str(txmsg->reply.req_type),
-				      txmsg->reply.u.nak.reason,
-				      drm_dp_mst_nak_reason_str(txmsg->reply.u.nak.reason),
-				      txmsg->reply.u.nak.nak_data);
-
-		memset(&mgr->down_rep_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
-		drm_dp_mst_topology_put_mstb(mstb);
+	memset(&mgr->down_rep_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
+	drm_dp_mst_topology_put_mstb(mstb);
 
-		mutex_lock(&mgr->qlock);
-		txmsg->state = DRM_DP_SIDEBAND_TX_RX;
-		mstb->tx_slots[slot] = NULL;
-		mutex_unlock(&mgr->qlock);
+	mutex_lock(&mgr->qlock);
+	txmsg->state = DRM_DP_SIDEBAND_TX_RX;
+	mstb->tx_slots[slot] = NULL;
+	mgr->is_waiting_for_dwn_reply = false;
+	mutex_unlock(&mgr->qlock);
 
-		wake_up_all(&mgr->tx_waitq);
-	}
-	return ret;
+	wake_up_all(&mgr->tx_waitq);
+
+	return 0;
+
+no_msg:
+	drm_dp_mst_topology_put_mstb(mstb);
+clear_down_rep_recv:
+	mutex_lock(&mgr->qlock);
+	mgr->is_waiting_for_dwn_reply = false;
+	mutex_unlock(&mgr->qlock);
+	memset(&mgr->down_rep_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
+
+	return 0;
 }
 
-static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
+static inline bool
+drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr,
+			  struct drm_dp_pending_up_req *up_req)
 {
-	int ret = 0;
+	struct drm_dp_mst_branch *mstb = NULL;
+	struct drm_dp_sideband_msg_req_body *msg = &up_req->msg;
+	struct drm_dp_sideband_msg_hdr *hdr = &up_req->hdr;
+	bool hotplug = false;
 
-	if (!drm_dp_get_one_sb_msg(mgr, true)) {
-		memset(&mgr->up_req_recv, 0,
-		       sizeof(struct drm_dp_sideband_msg_rx));
-		return 0;
+	if (hdr->broadcast) {
+		const u8 *guid = NULL;
+
+		if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY)
+			guid = msg->u.conn_stat.guid;
+		else if (msg->req_type == DP_RESOURCE_STATUS_NOTIFY)
+			guid = msg->u.resource_stat.guid;
+
+		mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid);
+	} else {
+		mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad);
 	}
 
-	if (mgr->up_req_recv.have_eomt) {
-		struct drm_dp_sideband_msg_req_body msg;
-		struct drm_dp_mst_branch *mstb = NULL;
-		bool seqno;
+	if (!mstb) {
+		DRM_DEBUG_KMS("Got MST reply from unknown device %d\n",
+			      hdr->lct);
+		return false;
+	}
 
-		if (!mgr->up_req_recv.initial_hdr.broadcast) {
-			mstb = drm_dp_get_mst_branch_device(mgr,
-							    mgr->up_req_recv.initial_hdr.lct,
-							    mgr->up_req_recv.initial_hdr.rad);
-			if (!mstb) {
-				DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct);
-				memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
-				return 0;
-			}
-		}
+	/* TODO: Add missing handler for DP_RESOURCE_STATUS_NOTIFY events */
+	if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY) {
+		drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat);
+		hotplug = true;
+	}
 
-		seqno = mgr->up_req_recv.initial_hdr.seqno;
-		drm_dp_sideband_parse_req(&mgr->up_req_recv, &msg);
+	drm_dp_mst_topology_put_mstb(mstb);
+	return hotplug;
+}
 
-		if (msg.req_type == DP_CONNECTION_STATUS_NOTIFY) {
-			drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, false);
+static void drm_dp_mst_up_req_work(struct work_struct *work)
+{
+	struct drm_dp_mst_topology_mgr *mgr =
+		container_of(work, struct drm_dp_mst_topology_mgr,
+			     up_req_work);
+	struct drm_dp_pending_up_req *up_req;
+	bool send_hotplug = false;
 
-			if (!mstb)
-				mstb = drm_dp_get_mst_branch_device_by_guid(mgr, msg.u.conn_stat.guid);
+	mutex_lock(&mgr->probe_lock);
+	while (true) {
+		mutex_lock(&mgr->up_req_lock);
+		up_req = list_first_entry_or_null(&mgr->up_req_list,
+						  struct drm_dp_pending_up_req,
+						  next);
+		if (up_req)
+			list_del(&up_req->next);
+		mutex_unlock(&mgr->up_req_lock);
+
+		if (!up_req)
+			break;
 
-			if (!mstb) {
-				DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct);
-				memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
-				return 0;
-			}
+		send_hotplug |= drm_dp_mst_process_up_req(mgr, up_req);
+		kfree(up_req);
+	}
+	mutex_unlock(&mgr->probe_lock);
 
-			drm_dp_update_port(mstb, &msg.u.conn_stat);
+	if (send_hotplug)
+		drm_kms_helper_hotplug_event(mgr->dev);
+}
 
-			DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type);
-			drm_kms_helper_hotplug_event(mgr->dev);
+static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
+{
+	struct drm_dp_sideband_msg_hdr *hdr = &mgr->up_req_recv.initial_hdr;
+	struct drm_dp_pending_up_req *up_req;
+	bool seqno;
 
-		} else if (msg.req_type == DP_RESOURCE_STATUS_NOTIFY) {
-			drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, false);
-			if (!mstb)
-				mstb = drm_dp_get_mst_branch_device_by_guid(mgr, msg.u.resource_stat.guid);
+	if (!drm_dp_get_one_sb_msg(mgr, true))
+		goto out;
 
-			if (!mstb) {
-				DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct);
-				memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
-				return 0;
-			}
+	if (!mgr->up_req_recv.have_eomt)
+		return 0;
 
-			DRM_DEBUG_KMS("Got RSN: pn: %d avail_pbn %d\n", msg.u.resource_stat.port_number, msg.u.resource_stat.available_pbn);
-		}
+	up_req = kzalloc(sizeof(*up_req), GFP_KERNEL);
+	if (!up_req) {
+		DRM_ERROR("Not enough memory to process MST up req\n");
+		return -ENOMEM;
+	}
+	INIT_LIST_HEAD(&up_req->next);
 
-		if (mstb)
-			drm_dp_mst_topology_put_mstb(mstb);
+	seqno = hdr->seqno;
+	drm_dp_sideband_parse_req(&mgr->up_req_recv, &up_req->msg);
 
-		memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
+	if (up_req->msg.req_type != DP_CONNECTION_STATUS_NOTIFY &&
+	    up_req->msg.req_type != DP_RESOURCE_STATUS_NOTIFY) {
+		DRM_DEBUG_KMS("Received unknown up req type, ignoring: %x\n",
+			      up_req->msg.req_type);
+		kfree(up_req);
+		goto out;
 	}
-	return ret;
+
+	drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, up_req->msg.req_type,
+				 seqno, false);
+
+	if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) {
+		const struct drm_dp_connection_status_notify *conn_stat =
+			&up_req->msg.u.conn_stat;
+
+		DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n",
+			      conn_stat->port_number,
+			      conn_stat->legacy_device_plug_status,
+			      conn_stat->displayport_device_plug_status,
+			      conn_stat->message_capability_status,
+			      conn_stat->input_port,
+			      conn_stat->peer_device_type);
+	} else if (up_req->msg.req_type == DP_RESOURCE_STATUS_NOTIFY) {
+		const struct drm_dp_resource_status_notify *res_stat =
+			&up_req->msg.u.resource_stat;
+
+		DRM_DEBUG_KMS("Got RSN: pn: %d avail_pbn %d\n",
+			      res_stat->port_number,
+			      res_stat->available_pbn);
+	}
+
+	up_req->hdr = *hdr;
+	mutex_lock(&mgr->up_req_lock);
+	list_add_tail(&up_req->next, &mgr->up_req_list);
+	mutex_unlock(&mgr->up_req_lock);
+	queue_work(system_long_wq, &mgr->up_req_work);
+
+out:
+	memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
+	return 0;
 }
 
 /**
@@ -3063,32 +3994,43 @@ EXPORT_SYMBOL(drm_dp_mst_hpd_irq);
 /**
  * drm_dp_mst_detect_port() - get connection status for an MST port
  * @connector: DRM connector for this port
+ * @ctx: The acquisition context to use for grabbing locks
  * @mgr: manager for this port
- * @port: unverified pointer to a port
+ * @port: pointer to a port
  *
- * This returns the current connection state for a port. It validates the
- * port pointer still exists so the caller doesn't require a reference
+ * This returns the current connection state for a port.
  */
-enum drm_connector_status drm_dp_mst_detect_port(struct drm_connector *connector,
-						 struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port)
+int
+drm_dp_mst_detect_port(struct drm_connector *connector,
+		       struct drm_modeset_acquire_ctx *ctx,
+		       struct drm_dp_mst_topology_mgr *mgr,
+		       struct drm_dp_mst_port *port)
 {
-	enum drm_connector_status status = connector_status_disconnected;
+	int ret;
 
 	/* we need to search for the port in the mgr in case it's gone */
 	port = drm_dp_mst_topology_get_port_validated(mgr, port);
 	if (!port)
 		return connector_status_disconnected;
 
+	ret = drm_modeset_lock(&mgr->base.lock, ctx);
+	if (ret)
+		goto out;
+
+	ret = connector_status_disconnected;
+
 	if (!port->ddps)
 		goto out;
 
 	switch (port->pdt) {
 	case DP_PEER_DEVICE_NONE:
 	case DP_PEER_DEVICE_MST_BRANCHING:
+		if (!port->mcs)
+			ret = connector_status_connected;
 		break;
 
 	case DP_PEER_DEVICE_SST_SINK:
-		status = connector_status_connected;
+		ret = connector_status_connected;
 		/* for logical ports - cache the EDID */
 		if (port->port_num >= 8 && !port->cached_edid) {
 			port->cached_edid = drm_get_edid(connector, &port->aux.ddc);
@@ -3096,12 +4038,12 @@ enum drm_connector_status drm_dp_mst_detect_port(struct drm_connector *connector
 		break;
 	case DP_PEER_DEVICE_DP_LEGACY_CONV:
 		if (port->ldps)
-			status = connector_status_connected;
+			ret = connector_status_connected;
 		break;
 	}
 out:
 	drm_dp_mst_topology_put_port(port);
-	return status;
+	return ret;
 }
 EXPORT_SYMBOL(drm_dp_mst_detect_port);
 
@@ -3207,6 +4149,7 @@ static int drm_dp_init_vcpi(struct drm_dp_mst_topology_mgr *mgr,
  * @mgr: MST topology manager for the port
  * @port: port to find vcpi slots for
  * @pbn: bandwidth required for the mode in PBN
+ * @pbn_div: divider for DSC mode that takes FEC into account
  *
  * Allocates VCPI slots to @port, replacing any previous VCPI allocations it
  * may have had. Any atomic drivers which support MST must call this function
@@ -3233,11 +4176,12 @@ static int drm_dp_init_vcpi(struct drm_dp_mst_topology_mgr *mgr,
  */
 int drm_dp_atomic_find_vcpi_slots(struct drm_atomic_state *state,
 				  struct drm_dp_mst_topology_mgr *mgr,
-				  struct drm_dp_mst_port *port, int pbn)
+				  struct drm_dp_mst_port *port, int pbn,
+				  int pbn_div)
 {
 	struct drm_dp_mst_topology_state *topology_state;
 	struct drm_dp_vcpi_allocation *pos, *vcpi = NULL;
-	int prev_slots, req_slots, ret;
+	int prev_slots, prev_bw, req_slots;
 
 	topology_state = drm_atomic_get_mst_topology_state(state, mgr);
 	if (IS_ERR(topology_state))
@@ -3248,6 +4192,7 @@ int drm_dp_atomic_find_vcpi_slots(struct drm_atomic_state *state,
 		if (pos->port == port) {
 			vcpi = pos;
 			prev_slots = vcpi->vcpi;
+			prev_bw = vcpi->pbn;
 
 			/*
 			 * This should never happen, unless the driver tries
@@ -3263,14 +4208,22 @@ int drm_dp_atomic_find_vcpi_slots(struct drm_atomic_state *state,
 			break;
 		}
 	}
-	if (!vcpi)
+	if (!vcpi) {
 		prev_slots = 0;
+		prev_bw = 0;
+	}
+
+	if (pbn_div <= 0)
+		pbn_div = mgr->pbn_div;
 
-	req_slots = DIV_ROUND_UP(pbn, mgr->pbn_div);
+	req_slots = DIV_ROUND_UP(pbn, pbn_div);
 
 	DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] [MST PORT:%p] VCPI %d -> %d\n",
 			 port->connector->base.id, port->connector->name,
 			 port, prev_slots, req_slots);
+	DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] [MST PORT:%p] PBN %d -> %d\n",
+			 port->connector->base.id, port->connector->name,
+			 port, prev_bw, pbn);
 
 	/* Add the new allocation to the state */
 	if (!vcpi) {
@@ -3283,9 +4236,9 @@ int drm_dp_atomic_find_vcpi_slots(struct drm_atomic_state *state,
 		list_add(&vcpi->next, &topology_state->vcpis);
 	}
 	vcpi->vcpi = req_slots;
+	vcpi->pbn = pbn;
 
-	ret = req_slots;
-	return ret;
+	return req_slots;
 }
 EXPORT_SYMBOL(drm_dp_atomic_find_vcpi_slots);
 
@@ -3534,18 +4487,12 @@ EXPORT_SYMBOL(drm_dp_check_act_status);
  * drm_dp_calc_pbn_mode() - Calculate the PBN for a mode.
  * @clock: dot clock for the mode
  * @bpp: bpp for the mode.
+ * @dsc: DSC mode. If true, bpp has units of 1/16 of a bit per pixel
  *
  * This uses the formula in the spec to calculate the PBN value for a mode.
  */
-int drm_dp_calc_pbn_mode(int clock, int bpp)
+int drm_dp_calc_pbn_mode(int clock, int bpp, bool dsc)
 {
-	u64 kbps;
-	s64 peak_kbps;
-	u32 numerator;
-	u32 denominator;
-
-	kbps = clock * bpp;
-
 	/*
 	 * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
 	 * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on
@@ -3555,42 +4502,21 @@ int drm_dp_calc_pbn_mode(int clock, int bpp)
 	 * peak_kbps *= (1006/1000)
 	 * peak_kbps *= (64/54)
 	 * peak_kbps *= 8    convert to bytes
+	 *
+	 * If the bpp is in units of 1/16, further divide by 16. Put this
+	 * factor in the numerator rather than the denominator to avoid
+	 * integer overflow
 	 */
 
-	numerator = 64 * 1006;
-	denominator = 54 * 8 * 1000 * 1000;
-
-	kbps *= numerator;
-	peak_kbps = drm_fixp_from_fraction(kbps, denominator);
+	if (dsc)
+		return DIV_ROUND_UP_ULL(mul_u32_u32(clock * (bpp / 16), 64 * 1006),
+					8 * 54 * 1000 * 1000);
 
-	return drm_fixp2int_ceil(peak_kbps);
+	return DIV_ROUND_UP_ULL(mul_u32_u32(clock * bpp, 64 * 1006),
+				8 * 54 * 1000 * 1000);
 }
 EXPORT_SYMBOL(drm_dp_calc_pbn_mode);
 
-static int test_calc_pbn_mode(void)
-{
-	int ret;
-	ret = drm_dp_calc_pbn_mode(154000, 30);
-	if (ret != 689) {
-		DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
-				154000, 30, 689, ret);
-		return -EINVAL;
-	}
-	ret = drm_dp_calc_pbn_mode(234000, 30);
-	if (ret != 1047) {
-		DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
-				234000, 30, 1047, ret);
-		return -EINVAL;
-	}
-	ret = drm_dp_calc_pbn_mode(297000, 24);
-	if (ret != 1063) {
-		DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
-				297000, 24, 1063, ret);
-		return -EINVAL;
-	}
-	return 0;
-}
-
 /* we want to kick the TX after we've ack the up/down IRQs. */
 static void drm_dp_mst_kick_tx(struct drm_dp_mst_topology_mgr *mgr)
 {
@@ -3724,41 +4650,108 @@ static void drm_dp_tx_work(struct work_struct *work)
 	struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, tx_work);
 
 	mutex_lock(&mgr->qlock);
-	if (!list_empty(&mgr->tx_msg_downq))
+	if (!list_empty(&mgr->tx_msg_downq) && !mgr->is_waiting_for_dwn_reply)
 		process_single_down_tx_qlock(mgr);
 	mutex_unlock(&mgr->qlock);
 }
 
-static void drm_dp_destroy_connector_work(struct work_struct *work)
+static inline void
+drm_dp_delayed_destroy_port(struct drm_dp_mst_port *port)
 {
-	struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work);
-	struct drm_dp_mst_port *port;
-	bool send_hotplug = false;
+	if (port->connector)
+		port->mgr->cbs->destroy_connector(port->mgr, port->connector);
+
+	drm_dp_port_set_pdt(port, DP_PEER_DEVICE_NONE, port->mcs);
+	drm_dp_mst_put_port_malloc(port);
+}
+
+static inline void
+drm_dp_delayed_destroy_mstb(struct drm_dp_mst_branch *mstb)
+{
+	struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
+	struct drm_dp_mst_port *port, *tmp;
+	bool wake_tx = false;
+
+	mutex_lock(&mgr->lock);
+	list_for_each_entry_safe(port, tmp, &mstb->ports, next) {
+		list_del(&port->next);
+		drm_dp_mst_topology_put_port(port);
+	}
+	mutex_unlock(&mgr->lock);
+
+	/* drop any tx slots msg */
+	mutex_lock(&mstb->mgr->qlock);
+	if (mstb->tx_slots[0]) {
+		mstb->tx_slots[0]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
+		mstb->tx_slots[0] = NULL;
+		wake_tx = true;
+	}
+	if (mstb->tx_slots[1]) {
+		mstb->tx_slots[1]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
+		mstb->tx_slots[1] = NULL;
+		wake_tx = true;
+	}
+	mutex_unlock(&mstb->mgr->qlock);
+
+	if (wake_tx)
+		wake_up_all(&mstb->mgr->tx_waitq);
+
+	drm_dp_mst_put_mstb_malloc(mstb);
+}
+
+static void drm_dp_delayed_destroy_work(struct work_struct *work)
+{
+	struct drm_dp_mst_topology_mgr *mgr =
+		container_of(work, struct drm_dp_mst_topology_mgr,
+			     delayed_destroy_work);
+	bool send_hotplug = false, go_again;
+
 	/*
 	 * Not a regular list traverse as we have to drop the destroy
-	 * connector lock before destroying the connector, to avoid AB->BA
+	 * connector lock before destroying the mstb/port, to avoid AB->BA
 	 * ordering between this lock and the config mutex.
 	 */
-	for (;;) {
-		mutex_lock(&mgr->destroy_connector_lock);
-		port = list_first_entry_or_null(&mgr->destroy_connector_list, struct drm_dp_mst_port, next);
-		if (!port) {
-			mutex_unlock(&mgr->destroy_connector_lock);
-			break;
+	do {
+		go_again = false;
+
+		for (;;) {
+			struct drm_dp_mst_branch *mstb;
+
+			mutex_lock(&mgr->delayed_destroy_lock);
+			mstb = list_first_entry_or_null(&mgr->destroy_branch_device_list,
+							struct drm_dp_mst_branch,
+							destroy_next);
+			if (mstb)
+				list_del(&mstb->destroy_next);
+			mutex_unlock(&mgr->delayed_destroy_lock);
+
+			if (!mstb)
+				break;
+
+			drm_dp_delayed_destroy_mstb(mstb);
+			go_again = true;
 		}
-		list_del(&port->next);
-		mutex_unlock(&mgr->destroy_connector_lock);
 
-		INIT_LIST_HEAD(&port->next);
+		for (;;) {
+			struct drm_dp_mst_port *port;
 
-		mgr->cbs->destroy_connector(mgr, port->connector);
+			mutex_lock(&mgr->delayed_destroy_lock);
+			port = list_first_entry_or_null(&mgr->destroy_port_list,
+							struct drm_dp_mst_port,
+							next);
+			if (port)
+				list_del(&port->next);
+			mutex_unlock(&mgr->delayed_destroy_lock);
 
-		drm_dp_port_teardown_pdt(port, port->pdt);
-		port->pdt = DP_PEER_DEVICE_NONE;
+			if (!port)
+				break;
+
+			drm_dp_delayed_destroy_port(port);
+			send_hotplug = true;
+			go_again = true;
+		}
+	} while (go_again);
 
-		drm_dp_mst_put_port_malloc(port);
-		send_hotplug = true;
-	}
 	if (send_hotplug)
 		drm_kms_helper_hotplug_event(mgr->dev);
 }
@@ -3820,9 +4813,61 @@ static void drm_dp_mst_destroy_state(struct drm_private_obj *obj,
 	kfree(mst_state);
 }
 
+static bool drm_dp_mst_port_downstream_of_branch(struct drm_dp_mst_port *port,
+						 struct drm_dp_mst_branch *branch)
+{
+	while (port->parent) {
+		if (port->parent == branch)
+			return true;
+
+		if (port->parent->port_parent)
+			port = port->parent->port_parent;
+		else
+			break;
+	}
+	return false;
+}
+
+static inline
+int drm_dp_mst_atomic_check_bw_limit(struct drm_dp_mst_branch *branch,
+				     struct drm_dp_mst_topology_state *mst_state)
+{
+	struct drm_dp_mst_port *port;
+	struct drm_dp_vcpi_allocation *vcpi;
+	int pbn_limit = 0, pbn_used = 0;
+
+	list_for_each_entry(port, &branch->ports, next) {
+		if (port->mstb)
+			if (drm_dp_mst_atomic_check_bw_limit(port->mstb, mst_state))
+				return -ENOSPC;
+
+		if (port->available_pbn > 0)
+			pbn_limit = port->available_pbn;
+	}
+	DRM_DEBUG_ATOMIC("[MST BRANCH:%p] branch has %d PBN available\n",
+			 branch, pbn_limit);
+
+	list_for_each_entry(vcpi, &mst_state->vcpis, next) {
+		if (!vcpi->pbn)
+			continue;
+
+		if (drm_dp_mst_port_downstream_of_branch(vcpi->port, branch))
+			pbn_used += vcpi->pbn;
+	}
+	DRM_DEBUG_ATOMIC("[MST BRANCH:%p] branch used %d PBN\n",
+			 branch, pbn_used);
+
+	if (pbn_used > pbn_limit) {
+		DRM_DEBUG_ATOMIC("[MST BRANCH:%p] No available bandwidth\n",
+				 branch);
+		return -ENOSPC;
+	}
+	return 0;
+}
+
 static inline int
-drm_dp_mst_atomic_check_topology_state(struct drm_dp_mst_topology_mgr *mgr,
-				       struct drm_dp_mst_topology_state *mst_state)
+drm_dp_mst_atomic_check_vcpi_alloc_limit(struct drm_dp_mst_topology_mgr *mgr,
+					 struct drm_dp_mst_topology_state *mst_state)
 {
 	struct drm_dp_vcpi_allocation *vcpi;
 	int avail_slots = 63, payload_count = 0;
@@ -3860,6 +4905,128 @@ drm_dp_mst_atomic_check_topology_state(struct drm_dp_mst_topology_mgr *mgr,
 }
 
 /**
+ * drm_dp_mst_add_affected_dsc_crtcs
+ * @state: Pointer to the new struct drm_dp_mst_topology_state
+ * @mgr: MST topology manager
+ *
+ * Whenever there is a change in mst topology
+ * DSC configuration would have to be recalculated
+ * therefore we need to trigger modeset on all affected
+ * CRTCs in that topology
+ *
+ * See also:
+ * drm_dp_mst_atomic_enable_dsc()
+ */
+int drm_dp_mst_add_affected_dsc_crtcs(struct drm_atomic_state *state, struct drm_dp_mst_topology_mgr *mgr)
+{
+	struct drm_dp_mst_topology_state *mst_state;
+	struct drm_dp_vcpi_allocation *pos;
+	struct drm_connector *connector;
+	struct drm_connector_state *conn_state;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+
+	mst_state = drm_atomic_get_mst_topology_state(state, mgr);
+
+	if (IS_ERR(mst_state))
+		return -EINVAL;
+
+	list_for_each_entry(pos, &mst_state->vcpis, next) {
+
+		connector = pos->port->connector;
+
+		if (!connector)
+			return -EINVAL;
+
+		conn_state = drm_atomic_get_connector_state(state, connector);
+
+		if (IS_ERR(conn_state))
+			return PTR_ERR(conn_state);
+
+		crtc = conn_state->crtc;
+
+		if (WARN_ON(!crtc))
+			return -EINVAL;
+
+		if (!drm_dp_mst_dsc_aux_for_port(pos->port))
+			continue;
+
+		crtc_state = drm_atomic_get_crtc_state(mst_state->base.state, crtc);
+
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+
+		DRM_DEBUG_ATOMIC("[MST MGR:%p] Setting mode_changed flag on CRTC %p\n",
+				 mgr, crtc);
+
+		crtc_state->mode_changed = true;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_dp_mst_add_affected_dsc_crtcs);
+
+/**
+ * drm_dp_mst_atomic_enable_dsc - Set DSC Enable Flag to On/Off
+ * @state: Pointer to the new drm_atomic_state
+ * @port: Pointer to the affected MST Port
+ * @pbn: Newly recalculated bw required for link with DSC enabled
+ * @pbn_div: Divider to calculate correct number of pbn per slot
+ * @enable: Boolean flag to enable or disable DSC on the port
+ *
+ * This function enables DSC on the given Port
+ * by recalculating its vcpi from pbn provided
+ * and sets dsc_enable flag to keep track of which
+ * ports have DSC enabled
+ *
+ */
+int drm_dp_mst_atomic_enable_dsc(struct drm_atomic_state *state,
+				 struct drm_dp_mst_port *port,
+				 int pbn, int pbn_div,
+				 bool enable)
+{
+	struct drm_dp_mst_topology_state *mst_state;
+	struct drm_dp_vcpi_allocation *pos;
+	bool found = false;
+	int vcpi = 0;
+
+	mst_state = drm_atomic_get_mst_topology_state(state, port->mgr);
+
+	if (IS_ERR(mst_state))
+		return PTR_ERR(mst_state);
+
+	list_for_each_entry(pos, &mst_state->vcpis, next) {
+		if (pos->port == port) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		DRM_DEBUG_ATOMIC("[MST PORT:%p] Couldn't find VCPI allocation in mst state %p\n",
+				 port, mst_state);
+		return -EINVAL;
+	}
+
+	if (pos->dsc_enabled == enable) {
+		DRM_DEBUG_ATOMIC("[MST PORT:%p] DSC flag is already set to %d, returning %d VCPI slots\n",
+				 port, enable, pos->vcpi);
+		vcpi = pos->vcpi;
+	}
+
+	if (enable) {
+		vcpi = drm_dp_atomic_find_vcpi_slots(state, port->mgr, port, pbn, pbn_div);
+		DRM_DEBUG_ATOMIC("[MST PORT:%p] Enabling DSC flag, reallocating %d VCPI slots on the port\n",
+				 port, vcpi);
+		if (vcpi < 0)
+			return -EINVAL;
+	}
+
+	pos->dsc_enabled = enable;
+
+	return vcpi;
+}
+EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc);
+/**
  * drm_dp_mst_atomic_check - Check that the new state of an MST topology in an
  * atomic update is valid
  * @state: Pointer to the new &struct drm_dp_mst_topology_state
@@ -3887,7 +5054,13 @@ int drm_dp_mst_atomic_check(struct drm_atomic_state *state)
 	int i, ret = 0;
 
 	for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) {
-		ret = drm_dp_mst_atomic_check_topology_state(mgr, mst_state);
+		if (!mgr->mst_state)
+			continue;
+
+		ret = drm_dp_mst_atomic_check_vcpi_alloc_limit(mgr, mst_state);
+		if (ret)
+			break;
+		ret = drm_dp_mst_atomic_check_bw_limit(mgr->mst_primary, mst_state);
 		if (ret)
 			break;
 	}
@@ -3920,9 +5093,6 @@ EXPORT_SYMBOL(drm_dp_mst_topology_state_funcs);
 struct drm_dp_mst_topology_state *drm_atomic_get_mst_topology_state(struct drm_atomic_state *state,
 								    struct drm_dp_mst_topology_mgr *mgr)
 {
-	struct drm_device *dev = mgr->dev;
-
-	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 	return to_dp_mst_topology_state(drm_atomic_get_private_obj_state(state, &mgr->base));
 }
 EXPORT_SYMBOL(drm_atomic_get_mst_topology_state);
@@ -3948,12 +5118,20 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
 	mutex_init(&mgr->lock);
 	mutex_init(&mgr->qlock);
 	mutex_init(&mgr->payload_lock);
-	mutex_init(&mgr->destroy_connector_lock);
+	mutex_init(&mgr->delayed_destroy_lock);
+	mutex_init(&mgr->up_req_lock);
+	mutex_init(&mgr->probe_lock);
+#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
+	mutex_init(&mgr->topology_ref_history_lock);
+#endif
 	INIT_LIST_HEAD(&mgr->tx_msg_downq);
-	INIT_LIST_HEAD(&mgr->destroy_connector_list);
+	INIT_LIST_HEAD(&mgr->destroy_port_list);
+	INIT_LIST_HEAD(&mgr->destroy_branch_device_list);
+	INIT_LIST_HEAD(&mgr->up_req_list);
 	INIT_WORK(&mgr->work, drm_dp_mst_link_probe_work);
 	INIT_WORK(&mgr->tx_work, drm_dp_tx_work);
-	INIT_WORK(&mgr->destroy_connector_work, drm_dp_destroy_connector_work);
+	INIT_WORK(&mgr->delayed_destroy_work, drm_dp_delayed_destroy_work);
+	INIT_WORK(&mgr->up_req_work, drm_dp_mst_up_req_work);
 	init_waitqueue_head(&mgr->tx_waitq);
 	mgr->dev = dev;
 	mgr->aux = aux;
@@ -3970,8 +5148,6 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
 	if (!mgr->proposed_vcpis)
 		return -ENOMEM;
 	set_bit(0, &mgr->payload_mask);
-	if (test_calc_pbn_mode() < 0)
-		DRM_ERROR("MST PBN self-test failed\n");
 
 	mst_state = kzalloc(sizeof(*mst_state), GFP_KERNEL);
 	if (mst_state == NULL)
@@ -3996,7 +5172,7 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr)
 {
 	drm_dp_mst_topology_mgr_set_mst(mgr, false);
 	flush_work(&mgr->work);
-	flush_work(&mgr->destroy_connector_work);
+	cancel_work_sync(&mgr->delayed_destroy_work);
 	mutex_lock(&mgr->payload_lock);
 	kfree(mgr->payloads);
 	mgr->payloads = NULL;
@@ -4007,6 +5183,16 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr)
 	mgr->aux = NULL;
 	drm_atomic_private_obj_fini(&mgr->base);
 	mgr->funcs = NULL;
+
+	mutex_destroy(&mgr->delayed_destroy_lock);
+	mutex_destroy(&mgr->payload_lock);
+	mutex_destroy(&mgr->qlock);
+	mutex_destroy(&mgr->lock);
+	mutex_destroy(&mgr->up_req_lock);
+	mutex_destroy(&mgr->probe_lock);
+#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
+	mutex_destroy(&mgr->topology_ref_history_lock);
+#endif
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_destroy);
 
@@ -4138,3 +5324,173 @@ static void drm_dp_mst_unregister_i2c_bus(struct drm_dp_aux *aux)
 {
 	i2c_del_adapter(&aux->ddc);
 }
+
+/**
+ * drm_dp_mst_is_virtual_dpcd() - Is the given port a virtual DP Peer Device
+ * @port: The port to check
+ *
+ * A single physical MST hub object can be represented in the topology
+ * by multiple branches, with virtual ports between those branches.
+ *
+ * As of DP1.4, An MST hub with internal (virtual) ports must expose
+ * certain DPCD registers over those ports. See sections 2.6.1.1.1
+ * and 2.6.1.1.2 of Display Port specification v1.4 for details.
+ *
+ * May acquire mgr->lock
+ *
+ * Returns:
+ * true if the port is a virtual DP peer device, false otherwise
+ */
+static bool drm_dp_mst_is_virtual_dpcd(struct drm_dp_mst_port *port)
+{
+	struct drm_dp_mst_port *downstream_port;
+
+	if (!port || port->dpcd_rev < DP_DPCD_REV_14)
+		return false;
+
+	/* Virtual DP Sink (Internal Display Panel) */
+	if (port->port_num >= 8)
+		return true;
+
+	/* DP-to-HDMI Protocol Converter */
+	if (port->pdt == DP_PEER_DEVICE_DP_LEGACY_CONV &&
+	    !port->mcs &&
+	    port->ldps)
+		return true;
+
+	/* DP-to-DP */
+	mutex_lock(&port->mgr->lock);
+	if (port->pdt == DP_PEER_DEVICE_MST_BRANCHING &&
+	    port->mstb &&
+	    port->mstb->num_ports == 2) {
+		list_for_each_entry(downstream_port, &port->mstb->ports, next) {
+			if (downstream_port->pdt == DP_PEER_DEVICE_SST_SINK &&
+			    !downstream_port->input) {
+				mutex_unlock(&port->mgr->lock);
+				return true;
+			}
+		}
+	}
+	mutex_unlock(&port->mgr->lock);
+
+	return false;
+}
+
+/**
+ * drm_dp_mst_dsc_aux_for_port() - Find the correct aux for DSC
+ * @port: The port to check. A leaf of the MST tree with an attached display.
+ *
+ * Depending on the situation, DSC may be enabled via the endpoint aux,
+ * the immediately upstream aux, or the connector's physical aux.
+ *
+ * This is both the correct aux to read DSC_CAPABILITY and the
+ * correct aux to write DSC_ENABLED.
+ *
+ * This operation can be expensive (up to four aux reads), so
+ * the caller should cache the return.
+ *
+ * Returns:
+ * NULL if DSC cannot be enabled on this port, otherwise the aux device
+ */
+struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port)
+{
+	struct drm_dp_mst_port *immediate_upstream_port;
+	struct drm_dp_mst_port *fec_port;
+	struct drm_dp_desc desc = { 0 };
+	u8 endpoint_fec;
+	u8 endpoint_dsc;
+
+	if (!port)
+		return NULL;
+
+	if (port->parent->port_parent)
+		immediate_upstream_port = port->parent->port_parent;
+	else
+		immediate_upstream_port = NULL;
+
+	fec_port = immediate_upstream_port;
+	while (fec_port) {
+		/*
+		 * Each physical link (i.e. not a virtual port) between the
+		 * output and the primary device must support FEC
+		 */
+		if (!drm_dp_mst_is_virtual_dpcd(fec_port) &&
+		    !fec_port->fec_capable)
+			return NULL;
+
+		fec_port = fec_port->parent->port_parent;
+	}
+
+	/* DP-to-DP peer device */
+	if (drm_dp_mst_is_virtual_dpcd(immediate_upstream_port)) {
+		u8 upstream_dsc;
+
+		if (drm_dp_dpcd_read(&port->aux,
+				     DP_DSC_SUPPORT, &endpoint_dsc, 1) != 1)
+			return NULL;
+		if (drm_dp_dpcd_read(&port->aux,
+				     DP_FEC_CAPABILITY, &endpoint_fec, 1) != 1)
+			return NULL;
+		if (drm_dp_dpcd_read(&immediate_upstream_port->aux,
+				     DP_DSC_SUPPORT, &upstream_dsc, 1) != 1)
+			return NULL;
+
+		/* Enpoint decompression with DP-to-DP peer device */
+		if ((endpoint_dsc & DP_DSC_DECOMPRESSION_IS_SUPPORTED) &&
+		    (endpoint_fec & DP_FEC_CAPABLE) &&
+		    (upstream_dsc & 0x2) /* DSC passthrough */)
+			return &port->aux;
+
+		/* Virtual DPCD decompression with DP-to-DP peer device */
+		return &immediate_upstream_port->aux;
+	}
+
+	/* Virtual DPCD decompression with DP-to-HDMI or Virtual DP Sink */
+	if (drm_dp_mst_is_virtual_dpcd(port))
+		return &port->aux;
+
+	/*
+	 * Synaptics quirk
+	 * Applies to ports for which:
+	 * - Physical aux has Synaptics OUI
+	 * - DPv1.4 or higher
+	 * - Port is on primary branch device
+	 * - Not a VGA adapter (DP_DWN_STRM_PORT_TYPE_ANALOG)
+	 */
+	if (drm_dp_read_desc(port->mgr->aux, &desc, true))
+		return NULL;
+
+	if (drm_dp_has_quirk(&desc, DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD) &&
+	    port->mgr->dpcd[DP_DPCD_REV] >= DP_DPCD_REV_14 &&
+	    port->parent == port->mgr->mst_primary) {
+		u8 downstreamport;
+
+		if (drm_dp_dpcd_read(&port->aux, DP_DOWNSTREAMPORT_PRESENT,
+				     &downstreamport, 1) < 0)
+			return NULL;
+
+		if ((downstreamport & DP_DWN_STRM_PORT_PRESENT) &&
+		   ((downstreamport & DP_DWN_STRM_PORT_TYPE_MASK)
+		     != DP_DWN_STRM_PORT_TYPE_ANALOG))
+			return port->mgr->aux;
+	}
+
+	/*
+	 * The check below verifies if the MST sink
+	 * connected to the GPU is capable of DSC -
+	 * therefore the endpoint needs to be
+	 * both DSC and FEC capable.
+	 */
+	if (drm_dp_dpcd_read(&port->aux,
+	   DP_DSC_SUPPORT, &endpoint_dsc, 1) != 1)
+		return NULL;
+	if (drm_dp_dpcd_read(&port->aux,
+	   DP_FEC_CAPABILITY, &endpoint_fec, 1) != 1)
+		return NULL;
+	if ((endpoint_dsc & DP_DSC_DECOMPRESSION_IS_SUPPORTED) &&
+	   (endpoint_fec & DP_FEC_CAPABLE))
+		return &port->aux;
+
+	return NULL;
+}
+EXPORT_SYMBOL(drm_dp_mst_dsc_aux_for_port);
diff --git a/drivers/gpu/drm/drm_dp_mst_topology_internal.h b/drivers/gpu/drm/drm_dp_mst_topology_internal.h
new file mode 100644
index 000000000000..eeda9a61c657
--- /dev/null
+++ b/drivers/gpu/drm/drm_dp_mst_topology_internal.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Declarations for DP MST related functions which are only used in selftests
+ *
+ * Copyright © 2018 Red Hat
+ * Authors:
+ *     Lyude Paul <lyude@redhat.com>
+ */
+
+#ifndef _DRM_DP_MST_HELPER_INTERNAL_H_
+#define _DRM_DP_MST_HELPER_INTERNAL_H_
+
+#include <drm/drm_dp_mst_helper.h>
+
+void
+drm_dp_encode_sideband_req(const struct drm_dp_sideband_msg_req_body *req,
+			   struct drm_dp_sideband_msg_tx *raw);
+int drm_dp_decode_sideband_req(const struct drm_dp_sideband_msg_tx *raw,
+			       struct drm_dp_sideband_msg_req_body *req);
+void
+drm_dp_dump_sideband_msg_req_body(const struct drm_dp_sideband_msg_req_body *req,
+				  int indent, struct drm_printer *printer);
+
+#endif /* !_DRM_DP_MST_HELPER_INTERNAL_H_ */
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 769feefeeeef..7c18a980cd4b 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -46,26 +46,9 @@
 #include "drm_internal.h"
 #include "drm_legacy.h"
 
-/*
- * drm_debug: Enable debug output.
- * Bitmask of DRM_UT_x. See include/drm/drm_print.h for details.
- */
-unsigned int drm_debug = 0;
-EXPORT_SYMBOL(drm_debug);
-
 MODULE_AUTHOR("Gareth Hughes, Leif Delgass, José Fonseca, Jon Smirl");
 MODULE_DESCRIPTION("DRM shared core routines");
 MODULE_LICENSE("GPL and additional rights");
-MODULE_PARM_DESC(debug, "Enable debug output, where each bit enables a debug category.\n"
-"\t\tBit 0 (0x01)  will enable CORE messages (drm core code)\n"
-"\t\tBit 1 (0x02)  will enable DRIVER messages (drm controller code)\n"
-"\t\tBit 2 (0x04)  will enable KMS messages (modesetting code)\n"
-"\t\tBit 3 (0x08)  will enable PRIME messages (prime code)\n"
-"\t\tBit 4 (0x10)  will enable ATOMIC messages (atomic code)\n"
-"\t\tBit 5 (0x20)  will enable VBL messages (vblank code)\n"
-"\t\tBit 7 (0x80)  will enable LEASE messages (leasing code)\n"
-"\t\tBit 8 (0x100) will enable DP messages (displayport code)");
-module_param_named(debug, drm_debug, int, 0600);
 
 static DEFINE_SPINLOCK(drm_minor_lock);
 static struct idr drm_minors_idr;
@@ -639,7 +622,8 @@ int drm_dev_init(struct drm_device *dev,
 		return -ENODEV;
 	}
 
-	BUG_ON(!parent);
+	if (WARN_ON(!parent))
+		return -EINVAL;
 
 	kref_init(&dev->ref);
 	dev->dev = get_device(parent);
@@ -742,7 +726,7 @@ int devm_drm_dev_init(struct device *parent,
 {
 	int ret;
 
-	if (WARN_ON(!parent || !driver->release))
+	if (WARN_ON(!driver->release))
 		return -EINVAL;
 
 	ret = drm_dev_init(dev, driver, parent);
diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c
index 77f4e5ae4197..4a475d9696ff 100644
--- a/drivers/gpu/drm/drm_dsc.c
+++ b/drivers/gpu/drm/drm_dsc.c
@@ -216,13 +216,11 @@ void drm_dsc_pps_payload_pack(struct drm_dsc_picture_parameter_set *pps_payload,
 	 */
 	for (i = 0; i < DSC_NUM_BUF_RANGES; i++) {
 		pps_payload->rc_range_parameters[i] =
-			((dsc_cfg->rc_range_params[i].range_min_qp <<
-			  DSC_PPS_RC_RANGE_MINQP_SHIFT) |
-			 (dsc_cfg->rc_range_params[i].range_max_qp <<
-			  DSC_PPS_RC_RANGE_MAXQP_SHIFT) |
-			 (dsc_cfg->rc_range_params[i].range_bpg_offset));
-		pps_payload->rc_range_parameters[i] =
-			cpu_to_be16(pps_payload->rc_range_parameters[i]);
+			cpu_to_be16((dsc_cfg->rc_range_params[i].range_min_qp <<
+				     DSC_PPS_RC_RANGE_MINQP_SHIFT) |
+				    (dsc_cfg->rc_range_params[i].range_max_qp <<
+				     DSC_PPS_RC_RANGE_MAXQP_SHIFT) |
+				    (dsc_cfg->rc_range_params[i].range_bpg_offset));
 	}
 
 	/* PPS 88 */
@@ -336,12 +334,6 @@ int drm_dsc_compute_rc_parameters(struct drm_dsc_config *vdsc_cfg)
 	else
 		vdsc_cfg->nfl_bpg_offset = 0;
 
-	/* 2^16 - 1 */
-	if (vdsc_cfg->nfl_bpg_offset > 65535) {
-		DRM_DEBUG_KMS("NflBpgOffset is too large for this slice height\n");
-		return -ERANGE;
-	}
-
 	/* Number of groups used to code the entire slice */
 	groups_total = groups_per_line * vdsc_cfg->slice_height;
 
@@ -371,11 +363,6 @@ int drm_dsc_compute_rc_parameters(struct drm_dsc_config *vdsc_cfg)
 		vdsc_cfg->scale_increment_interval = 0;
 	}
 
-	if (vdsc_cfg->scale_increment_interval > 65535) {
-		DRM_DEBUG_KMS("ScaleIncrementInterval is large for slice height\n");
-		return -ERANGE;
-	}
-
 	/*
 	 * DSC spec mentions that bits_per_pixel specifies the target
 	 * bits/pixel (bpp) rate that is used by the encoder,
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 6b0177112e18..99769d6c9f84 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -710,14 +710,11 @@ static const struct minimode extra_modes[] = {
 };
 
 /*
- * Probably taken from CEA-861 spec.
- * This table is converted from xorg's hw/xfree86/modes/xf86EdidModes.c.
+ * From CEA/CTA-861 spec.
  *
- * Index using the VIC.
+ * Do not access directly, instead always use cea_mode_for_vic().
  */
-static const struct drm_display_mode edid_cea_modes[] = {
-	/* 0 - dummy, VICs start at 1 */
-	{ },
+static const struct drm_display_mode edid_cea_modes_1[] = {
 	/* 1 - 640x480@60Hz 4:3 */
 	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656,
 		   752, 800, 0, 480, 490, 492, 525, 0,
@@ -1278,6 +1275,249 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   4104, 4400, 0, 2160, 2168, 2178, 2250, 0,
 		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
 	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 108 - 1280x720@48Hz 16:9 */
+	{ DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 90000, 1280, 2240,
+		   2280, 2500, 0, 720, 725, 730, 750, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 109 - 1280x720@48Hz 64:27 */
+	{ DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 90000, 1280, 2240,
+		   2280, 2500, 0, 720, 725, 730, 750, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 110 - 1680x720@48Hz 64:27 */
+	{ DRM_MODE("1680x720", DRM_MODE_TYPE_DRIVER, 99000, 1680, 2490,
+		   2530, 2750, 0, 720, 725, 730, 750, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 111 - 1920x1080@48Hz 16:9 */
+	{ DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 148500, 1920, 2558,
+		   2602, 2750, 0, 1080, 1084, 1089, 1125, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 112 - 1920x1080@48Hz 64:27 */
+	{ DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 148500, 1920, 2558,
+		   2602, 2750, 0, 1080, 1084, 1089, 1125, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 113 - 2560x1080@48Hz 64:27 */
+	{ DRM_MODE("2560x1080", DRM_MODE_TYPE_DRIVER, 198000, 2560, 3558,
+		   3602, 3750, 0, 1080, 1084, 1089, 1100, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 114 - 3840x2160@48Hz 16:9 */
+	{ DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 594000, 3840, 5116,
+		   5204, 5500, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 115 - 4096x2160@48Hz 256:135 */
+	{ DRM_MODE("4096x2160", DRM_MODE_TYPE_DRIVER, 594000, 4096, 5116,
+		   5204, 5500, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_256_135, },
+	/* 116 - 3840x2160@48Hz 64:27 */
+	{ DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 594000, 3840, 5116,
+		   5204, 5500, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 117 - 3840x2160@100Hz 16:9 */
+	{ DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 1188000, 3840, 4896,
+		   4984, 5280, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 118 - 3840x2160@120Hz 16:9 */
+	{ DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 1188000, 3840, 4016,
+		   4104, 4400, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 119 - 3840x2160@100Hz 64:27 */
+	{ DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 1188000, 3840, 4896,
+		   4984, 5280, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 120 - 3840x2160@120Hz 64:27 */
+	{ DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 1188000, 3840, 4016,
+		   4104, 4400, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 121 - 5120x2160@24Hz 64:27 */
+	{ DRM_MODE("5120x2160", DRM_MODE_TYPE_DRIVER, 396000, 5120, 7116,
+		   7204, 7500, 0, 2160, 2168, 2178, 2200, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 24, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 122 - 5120x2160@25Hz 64:27 */
+	{ DRM_MODE("5120x2160", DRM_MODE_TYPE_DRIVER, 396000, 5120, 6816,
+		   6904, 7200, 0, 2160, 2168, 2178, 2200, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 25, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 123 - 5120x2160@30Hz 64:27 */
+	{ DRM_MODE("5120x2160", DRM_MODE_TYPE_DRIVER, 396000, 5120, 5784,
+		   5872, 6000, 0, 2160, 2168, 2178, 2200, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 30, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 124 - 5120x2160@48Hz 64:27 */
+	{ DRM_MODE("5120x2160", DRM_MODE_TYPE_DRIVER, 742500, 5120, 5866,
+		   5954, 6250, 0, 2160, 2168, 2178, 2475, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 125 - 5120x2160@50Hz 64:27 */
+	{ DRM_MODE("5120x2160", DRM_MODE_TYPE_DRIVER, 742500, 5120, 6216,
+		   6304, 6600, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 126 - 5120x2160@60Hz 64:27 */
+	{ DRM_MODE("5120x2160", DRM_MODE_TYPE_DRIVER, 742500, 5120, 5284,
+		   5372, 5500, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 127 - 5120x2160@100Hz 64:27 */
+	{ DRM_MODE("5120x2160", DRM_MODE_TYPE_DRIVER, 1485000, 5120, 6216,
+		   6304, 6600, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+};
+
+/*
+ * From CEA/CTA-861 spec.
+ *
+ * Do not access directly, instead always use cea_mode_for_vic().
+ */
+static const struct drm_display_mode edid_cea_modes_193[] = {
+	/* 193 - 5120x2160@120Hz 64:27 */
+	{ DRM_MODE("5120x2160", DRM_MODE_TYPE_DRIVER, 1485000, 5120, 5284,
+		   5372, 5500, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 194 - 7680x4320@24Hz 16:9 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 1188000, 7680, 10232,
+		   10408, 11000, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 24, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 195 - 7680x4320@25Hz 16:9 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 1188000, 7680, 10032,
+		   10208, 10800, 0, 4320, 4336, 4356, 4400, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 25, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 196 - 7680x4320@30Hz 16:9 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 1188000, 7680, 8232,
+		   8408, 9000, 0, 4320, 4336, 4356, 4400, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 30, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 197 - 7680x4320@48Hz 16:9 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 2376000, 7680, 10232,
+		   10408, 11000, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 198 - 7680x4320@50Hz 16:9 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 2376000, 7680, 10032,
+		   10208, 10800, 0, 4320, 4336, 4356, 4400, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 199 - 7680x4320@60Hz 16:9 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 2376000, 7680, 8232,
+		   8408, 9000, 0, 4320, 4336, 4356, 4400, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 200 - 7680x4320@100Hz 16:9 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 4752000, 7680, 9792,
+		   9968, 10560, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 201 - 7680x4320@120Hz 16:9 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 4752000, 7680, 8032,
+		   8208, 8800, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 202 - 7680x4320@24Hz 64:27 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 1188000, 7680, 10232,
+		   10408, 11000, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 24, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 203 - 7680x4320@25Hz 64:27 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 1188000, 7680, 10032,
+		   10208, 10800, 0, 4320, 4336, 4356, 4400, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 25, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 204 - 7680x4320@30Hz 64:27 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 1188000, 7680, 8232,
+		   8408, 9000, 0, 4320, 4336, 4356, 4400, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 30, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 205 - 7680x4320@48Hz 64:27 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 2376000, 7680, 10232,
+		   10408, 11000, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 206 - 7680x4320@50Hz 64:27 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 2376000, 7680, 10032,
+		   10208, 10800, 0, 4320, 4336, 4356, 4400, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 207 - 7680x4320@60Hz 64:27 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 2376000, 7680, 8232,
+		   8408, 9000, 0, 4320, 4336, 4356, 4400, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 208 - 7680x4320@100Hz 64:27 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 4752000, 7680, 9792,
+		   9968, 10560, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 209 - 7680x4320@120Hz 64:27 */
+	{ DRM_MODE("7680x4320", DRM_MODE_TYPE_DRIVER, 4752000, 7680, 8032,
+		   8208, 8800, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 210 - 10240x4320@24Hz 64:27 */
+	{ DRM_MODE("10240x4320", DRM_MODE_TYPE_DRIVER, 1485000, 10240, 11732,
+		   11908, 12500, 0, 4320, 4336, 4356, 4950, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 24, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 211 - 10240x4320@25Hz 64:27 */
+	{ DRM_MODE("10240x4320", DRM_MODE_TYPE_DRIVER, 1485000, 10240, 12732,
+		   12908, 13500, 0, 4320, 4336, 4356, 4400, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 25, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 212 - 10240x4320@30Hz 64:27 */
+	{ DRM_MODE("10240x4320", DRM_MODE_TYPE_DRIVER, 1485000, 10240, 10528,
+		   10704, 11000, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 30, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 213 - 10240x4320@48Hz 64:27 */
+	{ DRM_MODE("10240x4320", DRM_MODE_TYPE_DRIVER, 2970000, 10240, 11732,
+		   11908, 12500, 0, 4320, 4336, 4356, 4950, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 48, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 214 - 10240x4320@50Hz 64:27 */
+	{ DRM_MODE("10240x4320", DRM_MODE_TYPE_DRIVER, 2970000, 10240, 12732,
+		   12908, 13500, 0, 4320, 4336, 4356, 4400, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 215 - 10240x4320@60Hz 64:27 */
+	{ DRM_MODE("10240x4320", DRM_MODE_TYPE_DRIVER, 2970000, 10240, 10528,
+		   10704, 11000, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 216 - 10240x4320@100Hz 64:27 */
+	{ DRM_MODE("10240x4320", DRM_MODE_TYPE_DRIVER, 5940000, 10240, 12432,
+		   12608, 13200, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 217 - 10240x4320@120Hz 64:27 */
+	{ DRM_MODE("10240x4320", DRM_MODE_TYPE_DRIVER, 5940000, 10240, 10528,
+		   10704, 11000, 0, 4320, 4336, 4356, 4500, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+	/* 218 - 4096x2160@100Hz 256:135 */
+	{ DRM_MODE("4096x2160", DRM_MODE_TYPE_DRIVER, 1188000, 4096, 4896,
+		   4984, 5280, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_256_135, },
+	/* 219 - 4096x2160@120Hz 256:135 */
+	{ DRM_MODE("4096x2160", DRM_MODE_TYPE_DRIVER, 1188000, 4096, 4184,
+		   4272, 4400, 0, 2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_256_135, },
 };
 
 /*
@@ -1291,25 +1531,25 @@ static const struct drm_display_mode edid_4k_modes[] = {
 		   3840, 4016, 4104, 4400, 0,
 		   2160, 2168, 2178, 2250, 0,
 		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
-	  .vrefresh = 30, },
+	  .vrefresh = 30, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
 	/* 2 - 3840x2160@25Hz */
 	{ DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 297000,
 		   3840, 4896, 4984, 5280, 0,
 		   2160, 2168, 2178, 2250, 0,
 		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
-	  .vrefresh = 25, },
+	  .vrefresh = 25, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
 	/* 3 - 3840x2160@24Hz */
 	{ DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 297000,
 		   3840, 5116, 5204, 5500, 0,
 		   2160, 2168, 2178, 2250, 0,
 		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
-	  .vrefresh = 24, },
+	  .vrefresh = 24, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
 	/* 4 - 4096x2160@24Hz (SMPTE) */
 	{ DRM_MODE("4096x2160", DRM_MODE_TYPE_DRIVER, 297000,
 		   4096, 5116, 5204, 5500, 0,
 		   2160, 2168, 2178, 2250, 0,
 		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
-	  .vrefresh = 24, },
+	  .vrefresh = 24, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_256_135, },
 };
 
 /*** DDC fetch and block validation ***/
@@ -1554,7 +1794,7 @@ static void connector_bad_edid(struct drm_connector *connector,
 {
 	int i;
 
-	if (connector->bad_edid_counter++ && !(drm_debug & DRM_UT_KMS))
+	if (connector->bad_edid_counter++ && !drm_debug_enabled(DRM_UT_KMS))
 		return;
 
 	dev_warn(connector->dev->dev,
@@ -2092,7 +2332,8 @@ static int standard_timing_level(struct edid *edid)
 			return LEVEL_CVT;
 		if (drm_gtf2_hbreak(edid))
 			return LEVEL_GTF2;
-		return LEVEL_GTF;
+		if (edid->features & DRM_EDID_FEATURE_DEFAULT_GTF)
+			return LEVEL_GTF;
 	}
 	return LEVEL_DMT;
 }
@@ -2970,6 +3211,30 @@ static u8 *drm_find_cea_extension(const struct edid *edid)
 	return cea;
 }
 
+static const struct drm_display_mode *cea_mode_for_vic(u8 vic)
+{
+	BUILD_BUG_ON(1 + ARRAY_SIZE(edid_cea_modes_1) - 1 != 127);
+	BUILD_BUG_ON(193 + ARRAY_SIZE(edid_cea_modes_193) - 1 != 219);
+
+	if (vic >= 1 && vic < 1 + ARRAY_SIZE(edid_cea_modes_1))
+		return &edid_cea_modes_1[vic - 1];
+	if (vic >= 193 && vic < 193 + ARRAY_SIZE(edid_cea_modes_193))
+		return &edid_cea_modes_193[vic - 193];
+	return NULL;
+}
+
+static u8 cea_num_vics(void)
+{
+	return 193 + ARRAY_SIZE(edid_cea_modes_193);
+}
+
+static u8 cea_next_vic(u8 vic)
+{
+	if (++vic == 1 + ARRAY_SIZE(edid_cea_modes_1))
+		vic = 193;
+	return vic;
+}
+
 /*
  * Calculate the alternate clock for the CEA mode
  * (60Hz vs. 59.94Hz etc.)
@@ -3007,14 +3272,14 @@ cea_mode_alternate_timings(u8 vic, struct drm_display_mode *mode)
 	 * get the other variants by simply increasing the
 	 * vertical front porch length.
 	 */
-	BUILD_BUG_ON(edid_cea_modes[8].vtotal != 262 ||
-		     edid_cea_modes[9].vtotal != 262 ||
-		     edid_cea_modes[12].vtotal != 262 ||
-		     edid_cea_modes[13].vtotal != 262 ||
-		     edid_cea_modes[23].vtotal != 312 ||
-		     edid_cea_modes[24].vtotal != 312 ||
-		     edid_cea_modes[27].vtotal != 312 ||
-		     edid_cea_modes[28].vtotal != 312);
+	BUILD_BUG_ON(cea_mode_for_vic(8)->vtotal != 262 ||
+		     cea_mode_for_vic(9)->vtotal != 262 ||
+		     cea_mode_for_vic(12)->vtotal != 262 ||
+		     cea_mode_for_vic(13)->vtotal != 262 ||
+		     cea_mode_for_vic(23)->vtotal != 312 ||
+		     cea_mode_for_vic(24)->vtotal != 312 ||
+		     cea_mode_for_vic(27)->vtotal != 312 ||
+		     cea_mode_for_vic(28)->vtotal != 312);
 
 	if (((vic == 8 || vic == 9 ||
 	      vic == 12 || vic == 13) && mode->vtotal < 263) ||
@@ -3042,8 +3307,8 @@ static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_m
 	if (to_match->picture_aspect_ratio)
 		match_flags |= DRM_MODE_MATCH_ASPECT_RATIO;
 
-	for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) {
-		struct drm_display_mode cea_mode = edid_cea_modes[vic];
+	for (vic = 1; vic < cea_num_vics(); vic = cea_next_vic(vic)) {
+		struct drm_display_mode cea_mode = *cea_mode_for_vic(vic);
 		unsigned int clock1, clock2;
 
 		/* Check both 60Hz and 59.94Hz */
@@ -3081,8 +3346,8 @@ u8 drm_match_cea_mode(const struct drm_display_mode *to_match)
 	if (to_match->picture_aspect_ratio)
 		match_flags |= DRM_MODE_MATCH_ASPECT_RATIO;
 
-	for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) {
-		struct drm_display_mode cea_mode = edid_cea_modes[vic];
+	for (vic = 1; vic < cea_num_vics(); vic = cea_next_vic(vic)) {
+		struct drm_display_mode cea_mode = *cea_mode_for_vic(vic);
 		unsigned int clock1, clock2;
 
 		/* Check both 60Hz and 59.94Hz */
@@ -3105,36 +3370,31 @@ EXPORT_SYMBOL(drm_match_cea_mode);
 
 static bool drm_valid_cea_vic(u8 vic)
 {
-	return vic > 0 && vic < ARRAY_SIZE(edid_cea_modes);
+	return cea_mode_for_vic(vic) != NULL;
 }
 
-/**
- * drm_get_cea_aspect_ratio - get the picture aspect ratio corresponding to
- * the input VIC from the CEA mode list
- * @video_code: ID given to each of the CEA modes
- *
- * Returns picture aspect ratio
- */
-enum hdmi_picture_aspect drm_get_cea_aspect_ratio(const u8 video_code)
+static enum hdmi_picture_aspect drm_get_cea_aspect_ratio(const u8 video_code)
+{
+	const struct drm_display_mode *mode = cea_mode_for_vic(video_code);
+
+	if (mode)
+		return mode->picture_aspect_ratio;
+
+	return HDMI_PICTURE_ASPECT_NONE;
+}
+
+static enum hdmi_picture_aspect drm_get_hdmi_aspect_ratio(const u8 video_code)
 {
-	return edid_cea_modes[video_code].picture_aspect_ratio;
+	return edid_4k_modes[video_code].picture_aspect_ratio;
 }
-EXPORT_SYMBOL(drm_get_cea_aspect_ratio);
 
 /*
  * Calculate the alternate clock for HDMI modes (those from the HDMI vendor
  * specific block).
- *
- * It's almost like cea_mode_alternate_clock(), we just need to add an
- * exception for the VIC 4 mode (4096x2160@24Hz): no alternate clock for this
- * one.
  */
 static unsigned int
 hdmi_mode_alternate_clock(const struct drm_display_mode *hdmi_mode)
 {
-	if (hdmi_mode->vdisplay == 4096 && hdmi_mode->hdisplay == 2160)
-		return hdmi_mode->clock;
-
 	return cea_mode_alternate_clock(hdmi_mode);
 }
 
@@ -3147,6 +3407,9 @@ static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_
 	if (!to_match->clock)
 		return 0;
 
+	if (to_match->picture_aspect_ratio)
+		match_flags |= DRM_MODE_MATCH_ASPECT_RATIO;
+
 	for (vic = 1; vic < ARRAY_SIZE(edid_4k_modes); vic++) {
 		const struct drm_display_mode *hdmi_mode = &edid_4k_modes[vic];
 		unsigned int clock1, clock2;
@@ -3182,6 +3445,9 @@ static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match)
 	if (!to_match->clock)
 		return 0;
 
+	if (to_match->picture_aspect_ratio)
+		match_flags |= DRM_MODE_MATCH_ASPECT_RATIO;
+
 	for (vic = 1; vic < ARRAY_SIZE(edid_4k_modes); vic++) {
 		const struct drm_display_mode *hdmi_mode = &edid_4k_modes[vic];
 		unsigned int clock1, clock2;
@@ -3226,7 +3492,7 @@ add_alternate_cea_modes(struct drm_connector *connector, struct edid *edid)
 		unsigned int clock1, clock2;
 
 		if (drm_valid_cea_vic(vic)) {
-			cea_mode = &edid_cea_modes[vic];
+			cea_mode = cea_mode_for_vic(vic);
 			clock2 = cea_mode_alternate_clock(cea_mode);
 		} else {
 			vic = drm_match_hdmi_mode(mode);
@@ -3301,7 +3567,7 @@ drm_display_mode_from_vic_index(struct drm_connector *connector,
 	if (!drm_valid_cea_vic(vic))
 		return NULL;
 
-	newmode = drm_mode_duplicate(dev, &edid_cea_modes[vic]);
+	newmode = drm_mode_duplicate(dev, cea_mode_for_vic(vic));
 	if (!newmode)
 		return NULL;
 
@@ -3335,7 +3601,7 @@ static int do_y420vdb_modes(struct drm_connector *connector,
 		if (!drm_valid_cea_vic(vic))
 			continue;
 
-		newmode = drm_mode_duplicate(dev, &edid_cea_modes[vic]);
+		newmode = drm_mode_duplicate(dev, cea_mode_for_vic(vic));
 		if (!newmode)
 			break;
 		bitmap_set(hdmi->y420_vdb_modes, vic, 1);
@@ -3722,7 +3988,7 @@ cea_db_offsets(const u8 *cea, int *start, int *end)
 		if (*end < 4 || *end > 127)
 			return -ERANGE;
 	} else {
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 	}
 
 	return 0;
@@ -3904,7 +4170,7 @@ static void fixup_detailed_cea_mode_clock(struct drm_display_mode *mode)
 	vic = drm_match_cea_mode_clock_tolerance(mode, 5);
 	if (drm_valid_cea_vic(vic)) {
 		type = "CEA";
-		cea_mode = &edid_cea_modes[vic];
+		cea_mode = cea_mode_for_vic(vic);
 		clock1 = cea_mode->clock;
 		clock2 = cea_mode_alternate_clock(cea_mode);
 	} else {
@@ -4186,12 +4452,12 @@ int drm_edid_to_sad(struct edid *edid, struct cea_sad **sads)
 	cea = drm_find_cea_extension(edid);
 	if (!cea) {
 		DRM_DEBUG_KMS("SAD: no CEA Extension found\n");
-		return -ENOENT;
+		return 0;
 	}
 
 	if (cea_revision(cea) < 3) {
 		DRM_DEBUG_KMS("SAD: wrong CEA revision\n");
-		return -ENOTSUPP;
+		return 0;
 	}
 
 	if (cea_db_offsets(cea, &start, &end)) {
@@ -4247,12 +4513,12 @@ int drm_edid_to_speaker_allocation(struct edid *edid, u8 **sadb)
 	cea = drm_find_cea_extension(edid);
 	if (!cea) {
 		DRM_DEBUG_KMS("SAD: no CEA Extension found\n");
-		return -ENOENT;
+		return 0;
 	}
 
 	if (cea_revision(cea) < 3) {
 		DRM_DEBUG_KMS("SAD: wrong CEA revision\n");
-		return -ENOTSUPP;
+		return 0;
 	}
 
 	if (cea_db_offsets(cea, &start, &end)) {
@@ -4480,7 +4746,7 @@ static void drm_parse_hdmi_forum_vsdb(struct drm_connector *connector,
 		if (scdc->supported) {
 			scdc->scrambling.supported = true;
 
-			/* Few sinks support scrambling for cloks < 340M */
+			/* Few sinks support scrambling for clocks < 340M */
 			if ((hf_vsdb[6] & 0x8))
 				scdc->scrambling.low_rates = true;
 		}
@@ -5071,6 +5337,49 @@ drm_hdmi_infoframe_set_hdr_metadata(struct hdmi_drm_infoframe *frame,
 }
 EXPORT_SYMBOL(drm_hdmi_infoframe_set_hdr_metadata);
 
+static u8 drm_mode_hdmi_vic(struct drm_connector *connector,
+			    const struct drm_display_mode *mode)
+{
+	bool has_hdmi_infoframe = connector ?
+		connector->display_info.has_hdmi_infoframe : false;
+
+	if (!has_hdmi_infoframe)
+		return 0;
+
+	/* No HDMI VIC when signalling 3D video format */
+	if (mode->flags & DRM_MODE_FLAG_3D_MASK)
+		return 0;
+
+	return drm_match_hdmi_mode(mode);
+}
+
+static u8 drm_mode_cea_vic(struct drm_connector *connector,
+			   const struct drm_display_mode *mode)
+{
+	u8 vic;
+
+	/*
+	 * HDMI spec says if a mode is found in HDMI 1.4b 4K modes
+	 * we should send its VIC in vendor infoframes, else send the
+	 * VIC in AVI infoframes. Lets check if this mode is present in
+	 * HDMI 1.4b 4K modes
+	 */
+	if (drm_mode_hdmi_vic(connector, mode))
+		return 0;
+
+	vic = drm_match_cea_mode(mode);
+
+	/*
+	 * HDMI 1.4 VIC range: 1 <= VIC <= 64 (CEA-861-D) but
+	 * HDMI 2.0 VIC range: 1 <= VIC <= 107 (CEA-861-F). So we
+	 * have to make sure we dont break HDMI 1.4 sinks.
+	 */
+	if (!is_hdmi2_sink(connector) && vic > 64)
+		return 0;
+
+	return vic;
+}
+
 /**
  * drm_hdmi_avi_infoframe_from_display_mode() - fill an HDMI AVI infoframe with
  *                                              data from a DRM display mode
@@ -5086,6 +5395,7 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,
 					 const struct drm_display_mode *mode)
 {
 	enum hdmi_picture_aspect picture_aspect;
+	u8 vic, hdmi_vic;
 	int err;
 
 	if (!frame || !mode)
@@ -5098,29 +5408,8 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,
 	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
 		frame->pixel_repeat = 1;
 
-	frame->video_code = drm_match_cea_mode(mode);
-
-	/*
-	 * HDMI 1.4 VIC range: 1 <= VIC <= 64 (CEA-861-D) but
-	 * HDMI 2.0 VIC range: 1 <= VIC <= 107 (CEA-861-F). So we
-	 * have to make sure we dont break HDMI 1.4 sinks.
-	 */
-	if (!is_hdmi2_sink(connector) && frame->video_code > 64)
-		frame->video_code = 0;
-
-	/*
-	 * HDMI spec says if a mode is found in HDMI 1.4b 4K modes
-	 * we should send its VIC in vendor infoframes, else send the
-	 * VIC in AVI infoframes. Lets check if this mode is present in
-	 * HDMI 1.4b 4K modes
-	 */
-	if (frame->video_code) {
-		u8 vendor_if_vic = drm_match_hdmi_mode(mode);
-		bool is_s3d = mode->flags & DRM_MODE_FLAG_3D_MASK;
-
-		if (drm_valid_hdmi_vic(vendor_if_vic) && !is_s3d)
-			frame->video_code = 0;
-	}
+	vic = drm_mode_cea_vic(connector, mode);
+	hdmi_vic = drm_mode_hdmi_vic(connector, mode);
 
 	frame->picture_aspect = HDMI_PICTURE_ASPECT_NONE;
 
@@ -5134,11 +5423,15 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,
 
 	/*
 	 * Populate picture aspect ratio from either
-	 * user input (if specified) or from the CEA mode list.
+	 * user input (if specified) or from the CEA/HDMI mode lists.
 	 */
 	picture_aspect = mode->picture_aspect_ratio;
-	if (picture_aspect == HDMI_PICTURE_ASPECT_NONE)
-		picture_aspect = drm_get_cea_aspect_ratio(frame->video_code);
+	if (picture_aspect == HDMI_PICTURE_ASPECT_NONE) {
+		if (vic)
+			picture_aspect = drm_get_cea_aspect_ratio(vic);
+		else if (hdmi_vic)
+			picture_aspect = drm_get_hdmi_aspect_ratio(hdmi_vic);
+	}
 
 	/*
 	 * The infoframe can't convey anything but none, 4:3
@@ -5146,12 +5439,20 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,
 	 * we can only satisfy it by specifying the right VIC.
 	 */
 	if (picture_aspect > HDMI_PICTURE_ASPECT_16_9) {
-		if (picture_aspect !=
-		    drm_get_cea_aspect_ratio(frame->video_code))
+		if (vic) {
+			if (picture_aspect != drm_get_cea_aspect_ratio(vic))
+				return -EINVAL;
+		} else if (hdmi_vic) {
+			if (picture_aspect != drm_get_hdmi_aspect_ratio(hdmi_vic))
+				return -EINVAL;
+		} else {
 			return -EINVAL;
+		}
+
 		picture_aspect = HDMI_PICTURE_ASPECT_NONE;
 	}
 
+	frame->video_code = vic;
 	frame->picture_aspect = picture_aspect;
 	frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE;
 	frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN;
@@ -5285,6 +5586,23 @@ drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame,
 }
 EXPORT_SYMBOL(drm_hdmi_avi_infoframe_quant_range);
 
+/**
+ * drm_hdmi_avi_infoframe_bars() - fill the HDMI AVI infoframe
+ *                                 bar information
+ * @frame: HDMI AVI infoframe
+ * @conn_state: connector state
+ */
+void
+drm_hdmi_avi_infoframe_bars(struct hdmi_avi_infoframe *frame,
+			    const struct drm_connector_state *conn_state)
+{
+	frame->right_bar = conn_state->tv.margins.right;
+	frame->left_bar = conn_state->tv.margins.left;
+	frame->top_bar = conn_state->tv.margins.top;
+	frame->bottom_bar = conn_state->tv.margins.bottom;
+}
+EXPORT_SYMBOL(drm_hdmi_avi_infoframe_bars);
+
 static enum hdmi_3d_structure
 s3d_structure_from_display_mode(const struct drm_display_mode *mode)
 {
@@ -5337,8 +5655,6 @@ drm_hdmi_vendor_infoframe_from_display_mode(struct hdmi_vendor_infoframe *frame,
 	bool has_hdmi_infoframe = connector ?
 		connector->display_info.has_hdmi_infoframe : false;
 	int err;
-	u32 s3d_flags;
-	u8 vic;
 
 	if (!frame || !mode)
 		return -EINVAL;
@@ -5346,8 +5662,9 @@ drm_hdmi_vendor_infoframe_from_display_mode(struct hdmi_vendor_infoframe *frame,
 	if (!has_hdmi_infoframe)
 		return -EINVAL;
 
-	vic = drm_match_hdmi_mode(mode);
-	s3d_flags = mode->flags & DRM_MODE_FLAG_3D_MASK;
+	err = hdmi_vendor_infoframe_init(frame);
+	if (err < 0)
+		return err;
 
 	/*
 	 * Even if it's not absolutely necessary to send the infoframe
@@ -5358,15 +5675,7 @@ drm_hdmi_vendor_infoframe_from_display_mode(struct hdmi_vendor_infoframe *frame,
 	 * mode if the source simply stops sending the infoframe when
 	 * it wants to switch from 3D to 2D.
 	 */
-
-	if (vic && s3d_flags)
-		return -EINVAL;
-
-	err = hdmi_vendor_infoframe_init(frame);
-	if (err < 0)
-		return err;
-
-	frame->vic = vic;
+	frame->vic = drm_mode_hdmi_vic(connector, mode);
 	frame->s3d_struct = s3d_structure_from_display_mode(mode);
 
 	return 0;
diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c
index d38b3b255926..37d8ba3ddb46 100644
--- a/drivers/gpu/drm/drm_edid_load.c
+++ b/drivers/gpu/drm/drm_edid_load.c
@@ -175,7 +175,7 @@ static void *edid_load(struct drm_connector *connector, const char *name,
 	u8 *edid;
 	int fwsize, builtin;
 	int i, valid_extensions = 0;
-	bool print_bad_edid = !connector->bad_edid_counter || (drm_debug & DRM_UT_KMS);
+	bool print_bad_edid = !connector->bad_edid_counter || drm_debug_enabled(DRM_UT_KMS);
 
 	builtin = match_string(generic_edid_name, GENERIC_EDIDS, name);
 	if (builtin >= 0) {
diff --git a/drivers/gpu/drm/drm_encoder.c b/drivers/gpu/drm/drm_encoder.c
index 7fb47b7b8b44..e555281f43d4 100644
--- a/drivers/gpu/drm/drm_encoder.c
+++ b/drivers/gpu/drm/drm_encoder.c
@@ -22,6 +22,7 @@
 
 #include <linux/export.h>
 
+#include <drm/drm_bridge.h>
 #include <drm/drm_device.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_encoder.h>
@@ -139,6 +140,7 @@ int drm_encoder_init(struct drm_device *dev,
 		goto out_put;
 	}
 
+	INIT_LIST_HEAD(&encoder->bridge_chain);
 	list_add_tail(&encoder->head, &dev->mode_config.encoder_list);
 	encoder->index = dev->mode_config.num_encoder++;
 
@@ -159,22 +161,16 @@ EXPORT_SYMBOL(drm_encoder_init);
 void drm_encoder_cleanup(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
+	struct drm_bridge *bridge, *next;
 
 	/* Note that the encoder_list is considered to be static; should we
 	 * remove the drm_encoder at runtime we would have to decrement all
 	 * the indices on the drm_encoder after us in the encoder_list.
 	 */
 
-	if (encoder->bridge) {
-		struct drm_bridge *bridge = encoder->bridge;
-		struct drm_bridge *next;
-
-		while (bridge) {
-			next = bridge->next;
-			drm_bridge_detach(bridge);
-			bridge = next;
-		}
-	}
+	list_for_each_entry_safe(bridge, next, &encoder->bridge_chain,
+				 chain_node)
+		drm_bridge_detach(bridge);
 
 	drm_mode_object_unregister(dev, &encoder->base);
 	kfree(encoder->name);
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index c0b0f603af63..9801c0333eca 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -9,6 +9,7 @@
  *  Copyright (C) 2012 Red Hat
  */
 
+#include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_cma_helper.h>
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index a7ba5b4902d6..4c7cbce7bae7 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -46,6 +46,7 @@
 #include <drm/drm_print.h>
 #include <drm/drm_vblank.h>
 
+#include "drm_crtc_helper_internal.h"
 #include "drm_internal.h"
 
 static bool drm_fbdev_emulation = true;
@@ -91,9 +92,8 @@ static DEFINE_MUTEX(kernel_fb_helper_lock);
  *
  * Drivers that support a dumb buffer with a virtual address and mmap support,
  * should try out the generic fbdev emulation using drm_fbdev_generic_setup().
- *
- * Setup fbdev emulation by calling drm_fb_helper_fbdev_setup() and tear it
- * down by calling drm_fb_helper_fbdev_teardown().
+ * It will automatically set up deferred I/O if the driver requires a shadow
+ * buffer.
  *
  * At runtime drivers should restore the fbdev console by using
  * drm_fb_helper_lastclose() as their &drm_driver.lastclose callback.
@@ -126,8 +126,10 @@ static DEFINE_MUTEX(kernel_fb_helper_lock);
  * always run in process context since the fb_*() function could be running in
  * atomic context. If drm_fb_helper_deferred_io() is used as the deferred_io
  * callback it will also schedule dirty_work with the damage collected from the
- * mmap page writes. Drivers can use drm_fb_helper_defio_init() to setup
- * deferred I/O (coupled with drm_fb_helper_fbdev_teardown()).
+ * mmap page writes.
+ *
+ * Deferred I/O is not compatible with SHMEM. Such drivers should request an
+ * fbdev shadow buffer and call drm_fbdev_generic_setup() instead.
  */
 
 static void drm_fb_helper_restore_lut_atomic(struct drm_crtc *crtc)
@@ -189,6 +191,7 @@ int drm_fb_helper_debug_leave(struct fb_info *info)
 {
 	struct drm_fb_helper *helper = info->par;
 	struct drm_client_dev *client = &helper->client;
+	struct drm_device *dev = helper->dev;
 	struct drm_crtc *crtc;
 	const struct drm_crtc_helper_funcs *funcs;
 	struct drm_mode_set *mode_set;
@@ -207,7 +210,7 @@ int drm_fb_helper_debug_leave(struct fb_info *info)
 			continue;
 
 		if (!fb) {
-			DRM_ERROR("no fb to restore??\n");
+			drm_err(dev, "no fb to restore?\n");
 			continue;
 		}
 
@@ -561,8 +564,7 @@ EXPORT_SYMBOL(drm_fb_helper_unregister_fbi);
  * drm_fb_helper_fini - finialize a &struct drm_fb_helper
  * @fb_helper: driver-allocated fbdev helper, can be NULL
  *
- * This cleans up all remaining resources associated with @fb_helper. Must be
- * called after drm_fb_helper_unlink_fbi() was called.
+ * This cleans up all remaining resources associated with @fb_helper.
  */
 void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
 {
@@ -602,19 +604,6 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
 }
 EXPORT_SYMBOL(drm_fb_helper_fini);
 
-/**
- * drm_fb_helper_unlink_fbi - wrapper around unlink_framebuffer
- * @fb_helper: driver-allocated fbdev helper, can be NULL
- *
- * A wrapper around unlink_framebuffer implemented by fbdev core
- */
-void drm_fb_helper_unlink_fbi(struct drm_fb_helper *fb_helper)
-{
-	if (fb_helper && fb_helper->fbdev)
-		unlink_framebuffer(fb_helper->fbdev);
-}
-EXPORT_SYMBOL(drm_fb_helper_unlink_fbi);
-
 static bool drm_fbdev_use_shadow_fb(struct drm_fb_helper *fb_helper)
 {
 	struct drm_device *dev = fb_helper->dev;
@@ -679,49 +668,6 @@ void drm_fb_helper_deferred_io(struct fb_info *info,
 EXPORT_SYMBOL(drm_fb_helper_deferred_io);
 
 /**
- * drm_fb_helper_defio_init - fbdev deferred I/O initialization
- * @fb_helper: driver-allocated fbdev helper
- *
- * This function allocates &fb_deferred_io, sets callback to
- * drm_fb_helper_deferred_io(), delay to 50ms and calls fb_deferred_io_init().
- * It should be called from the &drm_fb_helper_funcs->fb_probe callback.
- * drm_fb_helper_fbdev_teardown() cleans up deferred I/O.
- *
- * NOTE: A copy of &fb_ops is made and assigned to &info->fbops. This is done
- * because fb_deferred_io_cleanup() clears &fbops->fb_mmap and would thereby
- * affect other instances of that &fb_ops.
- *
- * Returns:
- * 0 on success or a negative error code on failure.
- */
-int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper)
-{
-	struct fb_info *info = fb_helper->fbdev;
-	struct fb_deferred_io *fbdefio;
-	struct fb_ops *fbops;
-
-	fbdefio = kzalloc(sizeof(*fbdefio), GFP_KERNEL);
-	fbops = kzalloc(sizeof(*fbops), GFP_KERNEL);
-	if (!fbdefio || !fbops) {
-		kfree(fbdefio);
-		kfree(fbops);
-		return -ENOMEM;
-	}
-
-	info->fbdefio = fbdefio;
-	fbdefio->delay = msecs_to_jiffies(50);
-	fbdefio->deferred_io = drm_fb_helper_deferred_io;
-
-	*fbops = *info->fbops;
-	info->fbops = fbops;
-
-	fb_deferred_io_init(info);
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_fb_helper_defio_init);
-
-/**
  * drm_fb_helper_sys_read - wrapper around fb_sys_read
  * @info: fb_info struct pointer
  * @buf: userspace buffer to read from framebuffer memory
@@ -1303,12 +1249,13 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 {
 	struct drm_fb_helper *fb_helper = info->par;
 	struct drm_framebuffer *fb = fb_helper->fb;
+	struct drm_device *dev = fb_helper->dev;
 
 	if (in_dbg_master())
 		return -EINVAL;
 
 	if (var->pixclock != 0) {
-		DRM_DEBUG("fbdev emulation doesn't support changing the pixel clock, value of pixclock is ignored\n");
+		drm_dbg_kms(dev, "fbdev emulation doesn't support changing the pixel clock, value of pixclock is ignored\n");
 		var->pixclock = 0;
 	}
 
@@ -1320,10 +1267,10 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 	 * Changes struct fb_var_screeninfo are currently not pushed back
 	 * to KMS, hence fail if different settings are requested.
 	 */
-	if (var->bits_per_pixel != fb->format->cpp[0] * 8 ||
+	if (var->bits_per_pixel > fb->format->cpp[0] * 8 ||
 	    var->xres > fb->width || var->yres > fb->height ||
 	    var->xres_virtual > fb->width || var->yres_virtual > fb->height) {
-		DRM_DEBUG("fb requested width/height/bpp can't fit in current fb "
+		drm_dbg_kms(dev, "fb requested width/height/bpp can't fit in current fb "
 			  "request %dx%d-%d (virtual %dx%d) > %dx%d-%d\n",
 			  var->xres, var->yres, var->bits_per_pixel,
 			  var->xres_virtual, var->yres_virtual,
@@ -1346,11 +1293,16 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 	}
 
 	/*
+	 * Likewise, bits_per_pixel should be rounded up to a supported value.
+	 */
+	var->bits_per_pixel = fb->format->cpp[0] * 8;
+
+	/*
 	 * drm fbdev emulation doesn't support changing the pixel format at all,
 	 * so reject all pixel format changing requests.
 	 */
 	if (!drm_fb_pixel_format_equal(var, &info->var)) {
-		DRM_DEBUG("fbdev emulation doesn't support changing the pixel format\n");
+		drm_dbg_kms(dev, "fbdev emulation doesn't support changing the pixel format\n");
 		return -EINVAL;
 	}
 
@@ -1375,7 +1327,7 @@ int drm_fb_helper_set_par(struct fb_info *info)
 		return -EBUSY;
 
 	if (var->pixclock != 0) {
-		DRM_ERROR("PIXEL CLOCK SET\n");
+		drm_err(fb_helper->dev, "PIXEL CLOCK SET\n");
 		return -EINVAL;
 	}
 
@@ -1485,6 +1437,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 					 int preferred_bpp)
 {
 	struct drm_client_dev *client = &fb_helper->client;
+	struct drm_device *dev = fb_helper->dev;
 	int ret = 0;
 	int crtc_count = 0;
 	struct drm_connector_list_iter conn_iter;
@@ -1548,7 +1501,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 		struct drm_plane *plane = crtc->primary;
 		int j;
 
-		DRM_DEBUG("test CRTC %u primary plane\n", drm_crtc_index(crtc));
+		drm_dbg_kms(dev, "test CRTC %u primary plane\n", drm_crtc_index(crtc));
 
 		for (j = 0; j < plane->format_count; j++) {
 			const struct drm_format_info *fmt;
@@ -1581,7 +1534,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 		}
 	}
 	if (sizes.surface_depth != best_depth && best_depth) {
-		DRM_INFO("requested bpp %d, scaled depth down to %d",
+		drm_info(dev, "requested bpp %d, scaled depth down to %d",
 			 sizes.surface_bpp, best_depth);
 		sizes.surface_depth = best_depth;
 	}
@@ -1613,7 +1566,9 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 		for (j = 0; j < mode_set->num_connectors; j++) {
 			struct drm_connector *connector = mode_set->connectors[j];
 
-			if (connector->has_tile) {
+			if (connector->has_tile &&
+			    desired_mode->hdisplay == connector->tile_h_size &&
+			    desired_mode->vdisplay == connector->tile_v_size) {
 				lasth = (connector->tile_h_loc == (connector->num_h_tile - 1));
 				lastv = (connector->tile_v_loc == (connector->num_v_tile - 1));
 				/* cloning to multiple tiles is just crazy-talk, so: */
@@ -1629,7 +1584,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 	mutex_unlock(&client->modeset_mutex);
 
 	if (crtc_count == 0 || sizes.fb_width == -1 || sizes.fb_height == -1) {
-		DRM_INFO("Cannot find any crtc or sizes\n");
+		drm_info(dev, "Cannot find any crtc or sizes\n");
 
 		/* First time: disable all crtc's.. */
 		if (!fb_helper->deferred_setup)
@@ -1944,7 +1899,7 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
 
 	drm_master_internal_release(fb_helper->dev);
 
-	DRM_DEBUG_KMS("\n");
+	drm_dbg_kms(fb_helper->dev, "\n");
 
 	drm_client_modeset_probe(&fb_helper->client, fb_helper->fb->width, fb_helper->fb->height);
 	drm_setup_crtcs_fb(fb_helper);
@@ -1957,108 +1912,6 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
 EXPORT_SYMBOL(drm_fb_helper_hotplug_event);
 
 /**
- * drm_fb_helper_fbdev_setup() - Setup fbdev emulation
- * @dev: DRM device
- * @fb_helper: fbdev helper structure to set up
- * @funcs: fbdev helper functions
- * @preferred_bpp: Preferred bits per pixel for the device.
- *                 @dev->mode_config.preferred_depth is used if this is zero.
- * @max_conn_count: Maximum number of connectors (not used)
- *
- * This function sets up fbdev emulation and registers fbdev for access by
- * userspace. If all connectors are disconnected, setup is deferred to the next
- * time drm_fb_helper_hotplug_event() is called.
- * The caller must to provide a &drm_fb_helper_funcs->fb_probe callback
- * function.
- *
- * Use drm_fb_helper_fbdev_teardown() to destroy the fbdev.
- *
- * See also: drm_fb_helper_initial_config(), drm_fbdev_generic_setup().
- *
- * Returns:
- * Zero on success or negative error code on failure.
- */
-int drm_fb_helper_fbdev_setup(struct drm_device *dev,
-			      struct drm_fb_helper *fb_helper,
-			      const struct drm_fb_helper_funcs *funcs,
-			      unsigned int preferred_bpp,
-			      unsigned int max_conn_count)
-{
-	int ret;
-
-	if (!preferred_bpp)
-		preferred_bpp = dev->mode_config.preferred_depth;
-	if (!preferred_bpp)
-		preferred_bpp = 32;
-
-	drm_fb_helper_prepare(dev, fb_helper, funcs);
-
-	ret = drm_fb_helper_init(dev, fb_helper, 0);
-	if (ret < 0) {
-		DRM_DEV_ERROR(dev->dev, "fbdev: Failed to initialize (ret=%d)\n", ret);
-		return ret;
-	}
-
-	if (!drm_drv_uses_atomic_modeset(dev))
-		drm_helper_disable_unused_functions(dev);
-
-	ret = drm_fb_helper_initial_config(fb_helper, preferred_bpp);
-	if (ret < 0) {
-		DRM_DEV_ERROR(dev->dev, "fbdev: Failed to set configuration (ret=%d)\n", ret);
-		goto err_drm_fb_helper_fini;
-	}
-
-	return 0;
-
-err_drm_fb_helper_fini:
-	drm_fb_helper_fbdev_teardown(dev);
-
-	return ret;
-}
-EXPORT_SYMBOL(drm_fb_helper_fbdev_setup);
-
-/**
- * drm_fb_helper_fbdev_teardown - Tear down fbdev emulation
- * @dev: DRM device
- *
- * This function unregisters fbdev if not already done and cleans up the
- * associated resources including the &drm_framebuffer.
- * The driver is responsible for freeing the &drm_fb_helper structure which is
- * stored in &drm_device->fb_helper. Do note that this pointer has been cleared
- * when this function returns.
- *
- * In order to support device removal/unplug while file handles are still open,
- * drm_fb_helper_unregister_fbi() should be called on device removal and
- * drm_fb_helper_fbdev_teardown() in the &drm_driver->release callback when
- * file handles are closed.
- */
-void drm_fb_helper_fbdev_teardown(struct drm_device *dev)
-{
-	struct drm_fb_helper *fb_helper = dev->fb_helper;
-	struct fb_ops *fbops = NULL;
-
-	if (!fb_helper)
-		return;
-
-	/* Unregister if it hasn't been done already */
-	if (fb_helper->fbdev && fb_helper->fbdev->dev)
-		drm_fb_helper_unregister_fbi(fb_helper);
-
-	if (fb_helper->fbdev && fb_helper->fbdev->fbdefio) {
-		fb_deferred_io_cleanup(fb_helper->fbdev);
-		kfree(fb_helper->fbdev->fbdefio);
-		fbops = fb_helper->fbdev->fbops;
-	}
-
-	drm_fb_helper_fini(fb_helper);
-	kfree(fbops);
-
-	if (fb_helper->fb)
-		drm_framebuffer_remove(fb_helper->fb);
-}
-EXPORT_SYMBOL(drm_fb_helper_fbdev_teardown);
-
-/**
  * drm_fb_helper_lastclose - DRM driver lastclose helper for fbdev emulation
  * @dev: DRM device
  *
@@ -2111,7 +1964,6 @@ static int drm_fbdev_fb_release(struct fb_info *info, int user)
 static void drm_fbdev_cleanup(struct drm_fb_helper *fb_helper)
 {
 	struct fb_info *fbi = fb_helper->fbdev;
-	struct fb_ops *fbops = NULL;
 	void *shadow = NULL;
 
 	if (!fb_helper->dev)
@@ -2120,15 +1972,11 @@ static void drm_fbdev_cleanup(struct drm_fb_helper *fb_helper)
 	if (fbi && fbi->fbdefio) {
 		fb_deferred_io_cleanup(fbi);
 		shadow = fbi->screen_buffer;
-		fbops = fbi->fbops;
 	}
 
 	drm_fb_helper_fini(fb_helper);
 
-	if (shadow) {
-		vfree(shadow);
-		kfree(fbops);
-	}
+	vfree(shadow);
 
 	drm_client_framebuffer_delete(fb_helper->buffer);
 }
@@ -2159,7 +2007,7 @@ static int drm_fbdev_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 		return -ENODEV;
 }
 
-static struct fb_ops drm_fbdev_fb_ops = {
+static const struct fb_ops drm_fbdev_fb_ops = {
 	.owner		= THIS_MODULE,
 	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_open	= drm_fbdev_fb_open,
@@ -2178,32 +2026,26 @@ static struct fb_deferred_io drm_fbdev_defio = {
 	.deferred_io	= drm_fb_helper_deferred_io,
 };
 
-/**
- * drm_fb_helper_generic_probe - Generic fbdev emulation probe helper
- * @fb_helper: fbdev helper structure
- * @sizes: describes fbdev size and scanout surface size
- *
+/*
  * This function uses the client API to create a framebuffer backed by a dumb buffer.
  *
  * The _sys_ versions are used for &fb_ops.fb_read, fb_write, fb_fillrect,
  * fb_copyarea, fb_imageblit.
- *
- * Returns:
- * Zero on success or negative error code on failure.
  */
-int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper,
-				struct drm_fb_helper_surface_size *sizes)
+static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper,
+				       struct drm_fb_helper_surface_size *sizes)
 {
 	struct drm_client_dev *client = &fb_helper->client;
+	struct drm_device *dev = fb_helper->dev;
 	struct drm_client_buffer *buffer;
 	struct drm_framebuffer *fb;
 	struct fb_info *fbi;
 	u32 format;
 	void *vaddr;
 
-	DRM_DEBUG_KMS("surface width(%d), height(%d) and bpp(%d)\n",
-		      sizes->surface_width, sizes->surface_height,
-		      sizes->surface_bpp);
+	drm_dbg_kms(dev, "surface width(%d), height(%d) and bpp(%d)\n",
+		    sizes->surface_width, sizes->surface_height,
+		    sizes->surface_bpp);
 
 	format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth);
 	buffer = drm_client_framebuffer_create(client, sizes->surface_width,
@@ -2226,24 +2068,10 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper,
 	drm_fb_helper_fill_info(fbi, fb_helper, sizes);
 
 	if (drm_fbdev_use_shadow_fb(fb_helper)) {
-		struct fb_ops *fbops;
-		void *shadow;
-
-		/*
-		 * fb_deferred_io_cleanup() clears &fbops->fb_mmap so a per
-		 * instance version is necessary.
-		 */
-		fbops = kzalloc(sizeof(*fbops), GFP_KERNEL);
-		shadow = vzalloc(fbi->screen_size);
-		if (!fbops || !shadow) {
-			kfree(fbops);
-			vfree(shadow);
+		fbi->screen_buffer = vzalloc(fbi->screen_size);
+		if (!fbi->screen_buffer)
 			return -ENOMEM;
-		}
 
-		*fbops = *fbi->fbops;
-		fbi->fbops = fbops;
-		fbi->screen_buffer = shadow;
 		fbi->fbdefio = &drm_fbdev_defio;
 
 		fb_deferred_io_init(fbi);
@@ -2264,7 +2092,6 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper,
 
 	return 0;
 }
-EXPORT_SYMBOL(drm_fb_helper_generic_probe);
 
 static const struct drm_fb_helper_funcs drm_fb_helper_generic_funcs = {
 	.fb_probe = drm_fb_helper_generic_probe,
@@ -2302,7 +2129,7 @@ static int drm_fbdev_client_hotplug(struct drm_client_dev *client)
 		return drm_fb_helper_hotplug_event(dev->fb_helper);
 
 	if (!dev->mode_config.num_connector) {
-		DRM_DEV_DEBUG(dev->dev, "No connectors found, will not create framebuffer!\n");
+		drm_dbg_kms(dev, "No connectors found, will not create framebuffer!\n");
 		return 0;
 	}
 
@@ -2327,7 +2154,7 @@ err:
 	fb_helper->dev = NULL;
 	fb_helper->fbdev = NULL;
 
-	DRM_DEV_ERROR(dev->dev, "fbdev: Failed to setup generic emulation (ret=%d)\n", ret);
+	drm_err(dev, "fbdev: Failed to setup generic emulation (ret=%d)\n", ret);
 
 	return ret;
 }
@@ -2346,8 +2173,7 @@ static const struct drm_client_funcs drm_fbdev_client_funcs = {
  *                 @dev->mode_config.preferred_depth is used if this is zero.
  *
  * This function sets up generic fbdev emulation for drivers that supports
- * dumb buffers with a virtual address and that can be mmap'ed. If the driver
- * does not support these functions, it could use drm_fb_helper_fbdev_setup().
+ * dumb buffers with a virtual address and that can be mmap'ed.
  *
  * Restore, hotplug events and teardown are all taken care of. Drivers that do
  * suspend/resume need to call drm_fb_helper_set_suspend_unlocked() themselves.
@@ -2355,7 +2181,10 @@ static const struct drm_client_funcs drm_fbdev_client_funcs = {
  *
  * Drivers that set the dirty callback on their framebuffer will get a shadow
  * fbdev buffer that is blitted onto the real buffer. This is done in order to
- * make deferred I/O work with all kinds of buffers.
+ * make deferred I/O work with all kinds of buffers. A shadow buffer can be
+ * requested explicitly by setting struct drm_mode_config.prefer_shadow or
+ * struct drm_mode_config.prefer_shadow_fbdev to true beforehand. This is
+ * required to use generic fbdev emulation with SHMEM helpers.
  *
  * This function is safe to call even when there are no connectors present.
  * Setup will be retried on the next hotplug event.
@@ -2382,7 +2211,7 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
 	ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs);
 	if (ret) {
 		kfree(fb_helper);
-		DRM_DEV_ERROR(dev->dev, "Failed to register client: %d\n", ret);
+		drm_err(dev, "Failed to register client: %d\n", ret);
 		return ret;
 	}
 
@@ -2394,7 +2223,7 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
 
 	ret = drm_fbdev_client_hotplug(&fb_helper->client);
 	if (ret)
-		DRM_DEV_DEBUG(dev->dev, "client hotplug ret=%d\n", ret);
+		drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
 
 	drm_client_register(&fb_helper->client);
 
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index ea34bc991858..92d16724f949 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -31,7 +31,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <linux/anon_inodes.h>
 #include <linux/dma-fence.h>
+#include <linux/file.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/poll.h>
@@ -285,7 +287,7 @@ static int drm_cpu_valid(void)
 }
 
 /*
- * Called whenever a process opens /dev/drm.
+ * Called whenever a process opens a drm node
  *
  * \param filp file pointer.
  * \param minor acquired minor-object.
@@ -754,3 +756,43 @@ void drm_send_event(struct drm_device *dev, struct drm_pending_event *e)
 	spin_unlock_irqrestore(&dev->event_lock, irqflags);
 }
 EXPORT_SYMBOL(drm_send_event);
+
+/**
+ * mock_drm_getfile - Create a new struct file for the drm device
+ * @minor: drm minor to wrap (e.g. #drm_device.primary)
+ * @flags: file creation mode (O_RDWR etc)
+ *
+ * This create a new struct file that wraps a DRM file context around a
+ * DRM minor. This mimicks userspace opening e.g. /dev/dri/card0, but without
+ * invoking userspace. The struct file may be operated on using its f_op
+ * (the drm_device.driver.fops) to mimick userspace operations, or be supplied
+ * to userspace facing functions as an internal/anonymous client.
+ *
+ * RETURNS:
+ * Pointer to newly created struct file, ERR_PTR on failure.
+ */
+struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags)
+{
+	struct drm_device *dev = minor->dev;
+	struct drm_file *priv;
+	struct file *file;
+
+	priv = drm_file_alloc(minor);
+	if (IS_ERR(priv))
+		return ERR_CAST(priv);
+
+	file = anon_inode_getfile("drm", dev->driver->fops, priv, flags);
+	if (IS_ERR(file)) {
+		drm_file_free(priv);
+		return file;
+	}
+
+	/* Everyone shares a single global address space */
+	file->f_mapping = dev->anon_inode->i_mapping;
+
+	drm_dev_get(dev);
+	priv->filp = file;
+
+	return file;
+}
+EXPORT_SYMBOL_FOR_TESTS_ONLY(mock_drm_getfile);
diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c
index c630064ccf41..b234bfaeda06 100644
--- a/drivers/gpu/drm/drm_fourcc.c
+++ b/drivers/gpu/drm/drm_fourcc.c
@@ -253,17 +253,17 @@ const struct drm_format_info *__drm_format_info(u32 format)
 		  .char_per_block = { 8, 0, 0 }, .block_w = { 2, 0, 0 }, .block_h = { 2, 0, 0 },
 		  .hsub = 2, .vsub = 2, .is_yuv = true },
 		{ .format = DRM_FORMAT_P010,            .depth = 0,  .num_planes = 2,
-		  .char_per_block = { 2, 4, 0 }, .block_w = { 1, 0, 0 }, .block_h = { 1, 0, 0 },
+		  .char_per_block = { 2, 4, 0 }, .block_w = { 1, 1, 0 }, .block_h = { 1, 1, 0 },
 		  .hsub = 2, .vsub = 2, .is_yuv = true},
 		{ .format = DRM_FORMAT_P012,		.depth = 0,  .num_planes = 2,
-		  .char_per_block = { 2, 4, 0 }, .block_w = { 1, 0, 0 }, .block_h = { 1, 0, 0 },
+		  .char_per_block = { 2, 4, 0 }, .block_w = { 1, 1, 0 }, .block_h = { 1, 1, 0 },
 		   .hsub = 2, .vsub = 2, .is_yuv = true},
 		{ .format = DRM_FORMAT_P016,		.depth = 0,  .num_planes = 2,
-		  .char_per_block = { 2, 4, 0 }, .block_w = { 1, 0, 0 }, .block_h = { 1, 0, 0 },
+		  .char_per_block = { 2, 4, 0 }, .block_w = { 1, 1, 0 }, .block_h = { 1, 1, 0 },
 		  .hsub = 2, .vsub = 2, .is_yuv = true},
 		{ .format = DRM_FORMAT_P210,		.depth = 0,
 		  .num_planes = 2, .char_per_block = { 2, 4, 0 },
-		  .block_w = { 1, 0, 0 }, .block_h = { 1, 0, 0 }, .hsub = 2,
+		  .block_w = { 1, 1, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2,
 		  .vsub = 1, .is_yuv = true },
 		{ .format = DRM_FORMAT_VUY101010,	.depth = 0,
 		  .num_planes = 1, .cpp = { 0, 0, 0 }, .hsub = 1, .vsub = 1,
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 6854f5867d51..a9e4a610445a 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1099,23 +1099,12 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size,
 		     struct vm_area_struct *vma)
 {
 	struct drm_device *dev = obj->dev;
+	int ret;
 
 	/* Check for valid size. */
 	if (obj_size < vma->vm_end - vma->vm_start)
 		return -EINVAL;
 
-	if (obj->funcs && obj->funcs->vm_ops)
-		vma->vm_ops = obj->funcs->vm_ops;
-	else if (dev->driver->gem_vm_ops)
-		vma->vm_ops = dev->driver->gem_vm_ops;
-	else
-		return -EINVAL;
-
-	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
-	vma->vm_private_data = obj;
-	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
-	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
-
 	/* Take a ref for this mapping of the object, so that the fault
 	 * handler can dereference the mmap offset's pointer to the object.
 	 * This reference is cleaned up by the corresponding vm_close
@@ -1124,6 +1113,30 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size,
 	 */
 	drm_gem_object_get(obj);
 
+	if (obj->funcs && obj->funcs->mmap) {
+		ret = obj->funcs->mmap(obj, vma);
+		if (ret) {
+			drm_gem_object_put_unlocked(obj);
+			return ret;
+		}
+		WARN_ON(!(vma->vm_flags & VM_DONTEXPAND));
+	} else {
+		if (obj->funcs && obj->funcs->vm_ops)
+			vma->vm_ops = obj->funcs->vm_ops;
+		else if (dev->driver->gem_vm_ops)
+			vma->vm_ops = dev->driver->gem_vm_ops;
+		else {
+			drm_gem_object_put_unlocked(obj);
+			return -EINVAL;
+		}
+
+		vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+		vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+	}
+
+	vma->vm_private_data = obj;
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_gem_mmap_obj);
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index b9bcd310ca2d..3a7ace19a902 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -74,8 +74,7 @@ drm_gem_fb_alloc(struct drm_device *dev,
 
 	ret = drm_framebuffer_init(dev, fb, funcs);
 	if (ret) {
-		DRM_DEV_ERROR(dev->dev, "Failed to init framebuffer: %d\n",
-			      ret);
+		drm_err(dev, "Failed to init framebuffer: %d\n", ret);
 		kfree(fb);
 		return ERR_PTR(ret);
 	}
@@ -160,7 +159,7 @@ drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file,
 
 		objs[i] = drm_gem_object_lookup(file, mode_cmd->handles[i]);
 		if (!objs[i]) {
-			DRM_DEBUG_KMS("Failed to lookup GEM object\n");
+			drm_dbg_kms(dev, "Failed to lookup GEM object\n");
 			ret = -ENOENT;
 			goto err_gem_object_put;
 		}
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index f5918707672f..a421a2eed48a 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -32,7 +32,7 @@ static const struct drm_gem_object_funcs drm_gem_shmem_funcs = {
 	.get_sg_table = drm_gem_shmem_get_sg_table,
 	.vmap = drm_gem_shmem_vmap,
 	.vunmap = drm_gem_shmem_vunmap,
-	.vm_ops = &drm_gem_shmem_vm_ops,
+	.mmap = drm_gem_shmem_mmap,
 };
 
 /**
@@ -505,39 +505,33 @@ static void drm_gem_shmem_vm_close(struct vm_area_struct *vma)
 	drm_gem_vm_close(vma);
 }
 
-const struct vm_operations_struct drm_gem_shmem_vm_ops = {
+static const struct vm_operations_struct drm_gem_shmem_vm_ops = {
 	.fault = drm_gem_shmem_fault,
 	.open = drm_gem_shmem_vm_open,
 	.close = drm_gem_shmem_vm_close,
 };
-EXPORT_SYMBOL_GPL(drm_gem_shmem_vm_ops);
 
 /**
  * drm_gem_shmem_mmap - Memory-map a shmem GEM object
- * @filp: File object
+ * @obj: gem object
  * @vma: VMA for the area to be mapped
  *
  * This function implements an augmented version of the GEM DRM file mmap
  * operation for shmem objects. Drivers which employ the shmem helpers should
- * use this function as their &file_operations.mmap handler in the DRM device file's
- * file_operations structure.
- *
- * Instead of directly referencing this function, drivers should use the
- * DEFINE_DRM_GEM_SHMEM_FOPS() macro.
+ * use this function as their &drm_gem_object_funcs.mmap handler.
  *
  * Returns:
  * 0 on success or a negative error code on failure.
  */
-int drm_gem_shmem_mmap(struct file *filp, struct vm_area_struct *vma)
+int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 {
 	struct drm_gem_shmem_object *shmem;
 	int ret;
 
-	ret = drm_gem_mmap(filp, vma);
-	if (ret)
-		return ret;
+	/* Remove the fake offset */
+	vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node);
 
-	shmem = to_drm_gem_shmem_obj(vma->vm_private_data);
+	shmem = to_drm_gem_shmem_obj(obj);
 
 	ret = drm_gem_shmem_get_pages(shmem);
 	if (ret) {
@@ -545,12 +539,10 @@ int drm_gem_shmem_mmap(struct file *filp, struct vm_area_struct *vma)
 		return ret;
 	}
 
-	/* VM_PFNMAP was set by drm_gem_mmap() */
-	vma->vm_flags &= ~VM_PFNMAP;
-	vma->vm_flags |= VM_MIXEDMAP;
-
-	/* Remove the fake offset */
-	vma->vm_pgoff -= drm_vma_node_start(&shmem->base.vma_node);
+	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+	vma->vm_ops = &drm_gem_shmem_vm_ops;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c
new file mode 100644
index 000000000000..605a8a3da7f9
--- /dev/null
+++ b/drivers/gpu/drm/drm_gem_ttm_helper.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/module.h>
+
+#include <drm/drm_gem_ttm_helper.h>
+
+/**
+ * DOC: overview
+ *
+ * This library provides helper functions for gem objects backed by
+ * ttm.
+ */
+
+/**
+ * drm_gem_ttm_print_info() - Print &ttm_buffer_object info for debugfs
+ * @p: DRM printer
+ * @indent: Tab indentation level
+ * @gem: GEM object
+ *
+ * This function can be used as &drm_gem_object_funcs.print_info
+ * callback.
+ */
+void drm_gem_ttm_print_info(struct drm_printer *p, unsigned int indent,
+			    const struct drm_gem_object *gem)
+{
+	static const char * const plname[] = {
+		[ TTM_PL_SYSTEM ] = "system",
+		[ TTM_PL_TT     ] = "tt",
+		[ TTM_PL_VRAM   ] = "vram",
+		[ TTM_PL_PRIV   ] = "priv",
+
+		[ 16 ]            = "cached",
+		[ 17 ]            = "uncached",
+		[ 18 ]            = "wc",
+		[ 19 ]            = "contig",
+
+		[ 21 ]            = "pinned", /* NO_EVICT */
+		[ 22 ]            = "topdown",
+	};
+	const struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+
+	drm_printf_indent(p, indent, "placement=");
+	drm_print_bits(p, bo->mem.placement, plname, ARRAY_SIZE(plname));
+	drm_printf(p, "\n");
+
+	if (bo->mem.bus.is_iomem) {
+		drm_printf_indent(p, indent, "bus.base=%lx\n",
+				  (unsigned long)bo->mem.bus.base);
+		drm_printf_indent(p, indent, "bus.offset=%lx\n",
+				  (unsigned long)bo->mem.bus.offset);
+	}
+}
+EXPORT_SYMBOL(drm_gem_ttm_print_info);
+
+/**
+ * drm_gem_ttm_mmap() - mmap &ttm_buffer_object
+ * @gem: GEM object.
+ * @vma: vm area.
+ *
+ * This function can be used as &drm_gem_object_funcs.mmap
+ * callback.
+ */
+int drm_gem_ttm_mmap(struct drm_gem_object *gem,
+		     struct vm_area_struct *vma)
+{
+	struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+	int ret;
+
+	ret = ttm_bo_mmap_obj(vma, bo);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * ttm has its own object refcounting, so drop gem reference
+	 * to avoid double accounting counting.
+	 */
+	drm_gem_object_put_unlocked(gem);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_gem_ttm_mmap);
+
+MODULE_DESCRIPTION("DRM gem ttm helpers");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
index fd751078bae1..a4863326061a 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -1,10 +1,16 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
-#include <drm/drm_gem_vram_helper.h>
+#include <drm/drm_debugfs.h>
 #include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_gem_vram_helper.h>
 #include <drm/drm_mode.h>
+#include <drm/drm_plane.h>
 #include <drm/drm_prime.h>
-#include <drm/drm_vram_mm_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 #include <drm/ttm/ttm_page_alloc.h>
 
 static const struct drm_gem_object_funcs drm_gem_vram_object_funcs;
@@ -14,6 +20,11 @@ static const struct drm_gem_object_funcs drm_gem_vram_object_funcs;
  *
  * This library provides a GEM buffer object that is backed by video RAM
  * (VRAM). It can be used for framebuffer devices with dedicated memory.
+ *
+ * The data structure &struct drm_vram_mm and its helpers implement a memory
+ * manager for simple framebuffer devices with dedicated video memory. Buffer
+ * objects are either placed in video RAM or evicted to system memory. The rsp.
+ * buffer object is provided by &struct drm_gem_vram_object.
  */
 
 /*
@@ -26,6 +37,10 @@ static void drm_gem_vram_cleanup(struct drm_gem_vram_object *gbo)
 	 * TTM buffer object in 'bo' has already been cleaned
 	 * up; only release the GEM object.
 	 */
+
+	WARN_ON(gbo->kmap_use_count);
+	WARN_ON(gbo->kmap.virtual);
+
 	drm_gem_object_release(&gbo->bo.base);
 }
 
@@ -47,6 +62,7 @@ static void drm_gem_vram_placement(struct drm_gem_vram_object *gbo,
 {
 	unsigned int i;
 	unsigned int c = 0;
+	u32 invariant_flags = pl_flag & TTM_PL_FLAG_TOPDOWN;
 
 	gbo->placement.placement = gbo->placements;
 	gbo->placement.busy_placement = gbo->placements;
@@ -54,15 +70,18 @@ static void drm_gem_vram_placement(struct drm_gem_vram_object *gbo,
 	if (pl_flag & TTM_PL_FLAG_VRAM)
 		gbo->placements[c++].flags = TTM_PL_FLAG_WC |
 					     TTM_PL_FLAG_UNCACHED |
-					     TTM_PL_FLAG_VRAM;
+					     TTM_PL_FLAG_VRAM |
+					     invariant_flags;
 
 	if (pl_flag & TTM_PL_FLAG_SYSTEM)
 		gbo->placements[c++].flags = TTM_PL_MASK_CACHING |
-					     TTM_PL_FLAG_SYSTEM;
+					     TTM_PL_FLAG_SYSTEM |
+					     invariant_flags;
 
 	if (!c)
 		gbo->placements[c++].flags = TTM_PL_MASK_CACHING |
-					     TTM_PL_FLAG_SYSTEM;
+					     TTM_PL_FLAG_SYSTEM |
+					     invariant_flags;
 
 	gbo->placement.num_placement = c;
 	gbo->placement.num_busy_placement = c;
@@ -74,16 +93,19 @@ static void drm_gem_vram_placement(struct drm_gem_vram_object *gbo,
 }
 
 static int drm_gem_vram_init(struct drm_device *dev,
-			     struct ttm_bo_device *bdev,
 			     struct drm_gem_vram_object *gbo,
-			     size_t size, unsigned long pg_align,
-			     bool interruptible)
+			     size_t size, unsigned long pg_align)
 {
+	struct drm_vram_mm *vmm = dev->vram_mm;
+	struct ttm_bo_device *bdev;
 	int ret;
 	size_t acc_size;
 
-	if (!gbo->bo.base.funcs)
-		gbo->bo.base.funcs = &drm_gem_vram_object_funcs;
+	if (WARN_ONCE(!vmm, "VRAM MM not initialized"))
+		return -EINVAL;
+	bdev = &vmm->bdev;
+
+	gbo->bo.base.funcs = &drm_gem_vram_object_funcs;
 
 	ret = drm_gem_object_init(dev, &gbo->bo.base, size);
 	if (ret)
@@ -95,7 +117,7 @@ static int drm_gem_vram_init(struct drm_device *dev,
 	drm_gem_vram_placement(gbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
 
 	ret = ttm_bo_init(bdev, &gbo->bo, size, ttm_bo_type_device,
-			  &gbo->placement, pg_align, interruptible, acc_size,
+			  &gbo->placement, pg_align, false, acc_size,
 			  NULL, NULL, ttm_buffer_object_destroy);
 	if (ret)
 		goto err_drm_gem_object_release;
@@ -110,29 +132,33 @@ err_drm_gem_object_release:
 /**
  * drm_gem_vram_create() - Creates a VRAM-backed GEM object
  * @dev:		the DRM device
- * @bdev:		the TTM BO device backing the object
  * @size:		the buffer size in bytes
  * @pg_align:		the buffer's alignment in multiples of the page size
- * @interruptible:	sleep interruptible if waiting for memory
  *
  * Returns:
  * A new instance of &struct drm_gem_vram_object on success, or
  * an ERR_PTR()-encoded error code otherwise.
  */
 struct drm_gem_vram_object *drm_gem_vram_create(struct drm_device *dev,
-						struct ttm_bo_device *bdev,
 						size_t size,
-						unsigned long pg_align,
-						bool interruptible)
+						unsigned long pg_align)
 {
 	struct drm_gem_vram_object *gbo;
 	int ret;
 
-	gbo = kzalloc(sizeof(*gbo), GFP_KERNEL);
-	if (!gbo)
-		return ERR_PTR(-ENOMEM);
+	if (dev->driver->gem_create_object) {
+		struct drm_gem_object *gem =
+			dev->driver->gem_create_object(dev, size);
+		if (!gem)
+			return ERR_PTR(-ENOMEM);
+		gbo = drm_gem_vram_of_gem(gem);
+	} else {
+		gbo = kzalloc(sizeof(*gbo), GFP_KERNEL);
+		if (!gbo)
+			return ERR_PTR(-ENOMEM);
+	}
 
-	ret = drm_gem_vram_init(dev, bdev, gbo, size, pg_align, interruptible);
+	ret = drm_gem_vram_init(dev, gbo, size, pg_align);
 	if (ret < 0)
 		goto err_kfree;
 
@@ -192,30 +218,12 @@ s64 drm_gem_vram_offset(struct drm_gem_vram_object *gbo)
 }
 EXPORT_SYMBOL(drm_gem_vram_offset);
 
-/**
- * drm_gem_vram_pin() - Pins a GEM VRAM object in a region.
- * @gbo:	the GEM VRAM object
- * @pl_flag:	a bitmask of possible memory regions
- *
- * Pinning a buffer object ensures that it is not evicted from
- * a memory region. A pinned buffer object has to be unpinned before
- * it can be pinned to another region. If the pl_flag argument is 0,
- * the buffer is pinned at its current location (video RAM or system
- * memory).
- *
- * Returns:
- * 0 on success, or
- * a negative error code otherwise.
- */
-int drm_gem_vram_pin(struct drm_gem_vram_object *gbo, unsigned long pl_flag)
+static int drm_gem_vram_pin_locked(struct drm_gem_vram_object *gbo,
+				   unsigned long pl_flag)
 {
 	int i, ret;
 	struct ttm_operation_ctx ctx = { false, false };
 
-	ret = ttm_bo_reserve(&gbo->bo, true, false, NULL);
-	if (ret < 0)
-		return ret;
-
 	if (gbo->pin_count)
 		goto out;
 
@@ -227,62 +235,123 @@ int drm_gem_vram_pin(struct drm_gem_vram_object *gbo, unsigned long pl_flag)
 
 	ret = ttm_bo_validate(&gbo->bo, &gbo->placement, &ctx);
 	if (ret < 0)
-		goto err_ttm_bo_unreserve;
+		return ret;
 
 out:
 	++gbo->pin_count;
-	ttm_bo_unreserve(&gbo->bo);
 
 	return 0;
-
-err_ttm_bo_unreserve:
-	ttm_bo_unreserve(&gbo->bo);
-	return ret;
 }
-EXPORT_SYMBOL(drm_gem_vram_pin);
 
 /**
- * drm_gem_vram_unpin() - Unpins a GEM VRAM object
+ * drm_gem_vram_pin() - Pins a GEM VRAM object in a region.
  * @gbo:	the GEM VRAM object
+ * @pl_flag:	a bitmask of possible memory regions
+ *
+ * Pinning a buffer object ensures that it is not evicted from
+ * a memory region. A pinned buffer object has to be unpinned before
+ * it can be pinned to another region. If the pl_flag argument is 0,
+ * the buffer is pinned at its current location (video RAM or system
+ * memory).
+ *
+ * Small buffer objects, such as cursor images, can lead to memory
+ * fragmentation if they are pinned in the middle of video RAM. This
+ * is especially a problem on devices with only a small amount of
+ * video RAM. Fragmentation can prevent the primary framebuffer from
+ * fitting in, even though there's enough memory overall. The modifier
+ * DRM_GEM_VRAM_PL_FLAG_TOPDOWN marks the buffer object to be pinned
+ * at the high end of the memory region to avoid fragmentation.
  *
  * Returns:
  * 0 on success, or
  * a negative error code otherwise.
  */
-int drm_gem_vram_unpin(struct drm_gem_vram_object *gbo)
+int drm_gem_vram_pin(struct drm_gem_vram_object *gbo, unsigned long pl_flag)
 {
-	int i, ret;
-	struct ttm_operation_ctx ctx = { false, false };
+	int ret;
 
 	ret = ttm_bo_reserve(&gbo->bo, true, false, NULL);
-	if (ret < 0)
+	if (ret)
 		return ret;
+	ret = drm_gem_vram_pin_locked(gbo, pl_flag);
+	ttm_bo_unreserve(&gbo->bo);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_vram_pin);
+
+static int drm_gem_vram_unpin_locked(struct drm_gem_vram_object *gbo)
+{
+	int i, ret;
+	struct ttm_operation_ctx ctx = { false, false };
 
 	if (WARN_ON_ONCE(!gbo->pin_count))
-		goto out;
+		return 0;
 
 	--gbo->pin_count;
 	if (gbo->pin_count)
-		goto out;
+		return 0;
 
 	for (i = 0; i < gbo->placement.num_placement ; ++i)
 		gbo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
 
 	ret = ttm_bo_validate(&gbo->bo, &gbo->placement, &ctx);
 	if (ret < 0)
-		goto err_ttm_bo_unreserve;
-
-out:
-	ttm_bo_unreserve(&gbo->bo);
+		return ret;
 
 	return 0;
+}
 
-err_ttm_bo_unreserve:
+/**
+ * drm_gem_vram_unpin() - Unpins a GEM VRAM object
+ * @gbo:	the GEM VRAM object
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative error code otherwise.
+ */
+int drm_gem_vram_unpin(struct drm_gem_vram_object *gbo)
+{
+	int ret;
+
+	ret = ttm_bo_reserve(&gbo->bo, true, false, NULL);
+	if (ret)
+		return ret;
+	ret = drm_gem_vram_unpin_locked(gbo);
 	ttm_bo_unreserve(&gbo->bo);
+
 	return ret;
 }
 EXPORT_SYMBOL(drm_gem_vram_unpin);
 
+static void *drm_gem_vram_kmap_locked(struct drm_gem_vram_object *gbo,
+				      bool map, bool *is_iomem)
+{
+	int ret;
+	struct ttm_bo_kmap_obj *kmap = &gbo->kmap;
+
+	if (gbo->kmap_use_count > 0)
+		goto out;
+
+	if (kmap->virtual || !map)
+		goto out;
+
+	ret = ttm_bo_kmap(&gbo->bo, 0, gbo->bo.num_pages, kmap);
+	if (ret)
+		return ERR_PTR(ret);
+
+out:
+	if (!kmap->virtual) {
+		if (is_iomem)
+			*is_iomem = false;
+		return NULL; /* not mapped; don't increment ref */
+	}
+	++gbo->kmap_use_count;
+	if (is_iomem)
+		return ttm_kmap_obj_virtual(kmap, is_iomem);
+	return kmap->virtual;
+}
+
 /**
  * drm_gem_vram_kmap() - Maps a GEM VRAM object into kernel address space
  * @gbo:	the GEM VRAM object
@@ -304,50 +373,127 @@ void *drm_gem_vram_kmap(struct drm_gem_vram_object *gbo, bool map,
 			bool *is_iomem)
 {
 	int ret;
-	struct ttm_bo_kmap_obj *kmap = &gbo->kmap;
-
-	if (kmap->virtual || !map)
-		goto out;
+	void *virtual;
 
-	ret = ttm_bo_kmap(&gbo->bo, 0, gbo->bo.num_pages, kmap);
+	ret = ttm_bo_reserve(&gbo->bo, true, false, NULL);
 	if (ret)
 		return ERR_PTR(ret);
+	virtual = drm_gem_vram_kmap_locked(gbo, map, is_iomem);
+	ttm_bo_unreserve(&gbo->bo);
 
-out:
-	if (!is_iomem)
-		return kmap->virtual;
-	if (!kmap->virtual) {
-		*is_iomem = false;
-		return NULL;
-	}
-	return ttm_kmap_obj_virtual(kmap, is_iomem);
+	return virtual;
 }
 EXPORT_SYMBOL(drm_gem_vram_kmap);
 
+static void drm_gem_vram_kunmap_locked(struct drm_gem_vram_object *gbo)
+{
+	if (WARN_ON_ONCE(!gbo->kmap_use_count))
+		return;
+	if (--gbo->kmap_use_count > 0)
+		return;
+
+	/*
+	 * Permanently mapping and unmapping buffers adds overhead from
+	 * updating the page tables and creates debugging output. Therefore,
+	 * we delay the actual unmap operation until the BO gets evicted
+	 * from memory. See drm_gem_vram_bo_driver_move_notify().
+	 */
+}
+
 /**
  * drm_gem_vram_kunmap() - Unmaps a GEM VRAM object
  * @gbo:	the GEM VRAM object
  */
 void drm_gem_vram_kunmap(struct drm_gem_vram_object *gbo)
 {
-	struct ttm_bo_kmap_obj *kmap = &gbo->kmap;
+	int ret;
 
-	if (!kmap->virtual)
+	ret = ttm_bo_reserve(&gbo->bo, false, false, NULL);
+	if (WARN_ONCE(ret, "ttm_bo_reserve_failed(): ret=%d\n", ret))
 		return;
-
-	ttm_bo_kunmap(kmap);
-	kmap->virtual = NULL;
+	drm_gem_vram_kunmap_locked(gbo);
+	ttm_bo_unreserve(&gbo->bo);
 }
 EXPORT_SYMBOL(drm_gem_vram_kunmap);
 
 /**
+ * drm_gem_vram_vmap() - Pins and maps a GEM VRAM object into kernel address
+ *                       space
+ * @gbo:	The GEM VRAM object to map
+ *
+ * The vmap function pins a GEM VRAM object to its current location, either
+ * system or video memory, and maps its buffer into kernel address space.
+ * As pinned object cannot be relocated, you should avoid pinning objects
+ * permanently. Call drm_gem_vram_vunmap() with the returned address to
+ * unmap and unpin the GEM VRAM object.
+ *
+ * If you have special requirements for the pinning or mapping operations,
+ * call drm_gem_vram_pin() and drm_gem_vram_kmap() directly.
+ *
+ * Returns:
+ * The buffer's virtual address on success, or
+ * an ERR_PTR()-encoded error code otherwise.
+ */
+void *drm_gem_vram_vmap(struct drm_gem_vram_object *gbo)
+{
+	int ret;
+	void *base;
+
+	ret = ttm_bo_reserve(&gbo->bo, true, false, NULL);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = drm_gem_vram_pin_locked(gbo, 0);
+	if (ret)
+		goto err_ttm_bo_unreserve;
+	base = drm_gem_vram_kmap_locked(gbo, true, NULL);
+	if (IS_ERR(base)) {
+		ret = PTR_ERR(base);
+		goto err_drm_gem_vram_unpin_locked;
+	}
+
+	ttm_bo_unreserve(&gbo->bo);
+
+	return base;
+
+err_drm_gem_vram_unpin_locked:
+	drm_gem_vram_unpin_locked(gbo);
+err_ttm_bo_unreserve:
+	ttm_bo_unreserve(&gbo->bo);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(drm_gem_vram_vmap);
+
+/**
+ * drm_gem_vram_vunmap() - Unmaps and unpins a GEM VRAM object
+ * @gbo:	The GEM VRAM object to unmap
+ * @vaddr:	The mapping's base address as returned by drm_gem_vram_vmap()
+ *
+ * A call to drm_gem_vram_vunmap() unmaps and unpins a GEM VRAM buffer. See
+ * the documentation for drm_gem_vram_vmap() for more information.
+ */
+void drm_gem_vram_vunmap(struct drm_gem_vram_object *gbo, void *vaddr)
+{
+	int ret;
+
+	ret = ttm_bo_reserve(&gbo->bo, false, false, NULL);
+	if (WARN_ONCE(ret, "ttm_bo_reserve_failed(): ret=%d\n", ret))
+		return;
+
+	drm_gem_vram_kunmap_locked(gbo);
+	drm_gem_vram_unpin_locked(gbo);
+
+	ttm_bo_unreserve(&gbo->bo);
+}
+EXPORT_SYMBOL(drm_gem_vram_vunmap);
+
+/**
  * drm_gem_vram_fill_create_dumb() - \
 	Helper for implementing &struct drm_driver.dumb_create
  * @file:		the DRM file
  * @dev:		the DRM device
- * @bdev:		the TTM BO device managing the buffer object
  * @pg_align:		the buffer's alignment in multiples of the page size
- * @interruptible:	sleep interruptible if waiting for memory
+ * @pitch_align:	the scanline's alignment in powers of 2
  * @args:		the arguments as provided to \
 				&struct drm_driver.dumb_create
  *
@@ -362,9 +508,8 @@ EXPORT_SYMBOL(drm_gem_vram_kunmap);
  */
 int drm_gem_vram_fill_create_dumb(struct drm_file *file,
 				  struct drm_device *dev,
-				  struct ttm_bo_device *bdev,
 				  unsigned long pg_align,
-				  bool interruptible,
+				  unsigned long pitch_align,
 				  struct drm_mode_create_dumb *args)
 {
 	size_t pitch, size;
@@ -372,14 +517,19 @@ int drm_gem_vram_fill_create_dumb(struct drm_file *file,
 	int ret;
 	u32 handle;
 
-	pitch = args->width * ((args->bpp + 7) / 8);
+	pitch = args->width * DIV_ROUND_UP(args->bpp, 8);
+	if (pitch_align) {
+		if (WARN_ON_ONCE(!is_power_of_2(pitch_align)))
+			return -EINVAL;
+		pitch = ALIGN(pitch, pitch_align);
+	}
 	size = pitch * args->height;
 
 	size = roundup(size, PAGE_SIZE);
 	if (!size)
 		return -EINVAL;
 
-	gbo = drm_gem_vram_create(dev, bdev, size, pg_align, interruptible);
+	gbo = drm_gem_vram_create(dev, size, pg_align);
 	if (IS_ERR(gbo))
 		return PTR_ERR(gbo);
 
@@ -410,59 +560,27 @@ static bool drm_is_gem_vram(struct ttm_buffer_object *bo)
 	return (bo->destroy == ttm_buffer_object_destroy);
 }
 
-/**
- * drm_gem_vram_bo_driver_evict_flags() - \
-	Implements &struct ttm_bo_driver.evict_flags
- * @bo:	TTM buffer object. Refers to &struct drm_gem_vram_object.bo
- * @pl:	TTM placement information.
- */
-void drm_gem_vram_bo_driver_evict_flags(struct ttm_buffer_object *bo,
-					struct ttm_placement *pl)
+static void drm_gem_vram_bo_driver_evict_flags(struct drm_gem_vram_object *gbo,
+					       struct ttm_placement *pl)
 {
-	struct drm_gem_vram_object *gbo;
-
-	/* TTM may pass BOs that are not GEM VRAM BOs. */
-	if (!drm_is_gem_vram(bo))
-		return;
-
-	gbo = drm_gem_vram_of_bo(bo);
 	drm_gem_vram_placement(gbo, TTM_PL_FLAG_SYSTEM);
 	*pl = gbo->placement;
 }
-EXPORT_SYMBOL(drm_gem_vram_bo_driver_evict_flags);
 
-/**
- * drm_gem_vram_bo_driver_verify_access() - \
-	Implements &struct ttm_bo_driver.verify_access
- * @bo:		TTM buffer object. Refers to &struct drm_gem_vram_object.bo
- * @filp:	File pointer.
- *
- * Returns:
- * 0 on success, or
- * a negative errno code otherwise.
- */
-int drm_gem_vram_bo_driver_verify_access(struct ttm_buffer_object *bo,
-					 struct file *filp)
+static void drm_gem_vram_bo_driver_move_notify(struct drm_gem_vram_object *gbo,
+					       bool evict,
+					       struct ttm_mem_reg *new_mem)
 {
-	struct drm_gem_vram_object *gbo = drm_gem_vram_of_bo(bo);
+	struct ttm_bo_kmap_obj *kmap = &gbo->kmap;
 
-	return drm_vma_node_verify_access(&gbo->bo.base.vma_node,
-					  filp->private_data);
-}
-EXPORT_SYMBOL(drm_gem_vram_bo_driver_verify_access);
+	if (WARN_ON_ONCE(gbo->kmap_use_count))
+		return;
 
-/*
- * drm_gem_vram_mm_funcs - Functions for &struct drm_vram_mm
- *
- * Most users of @struct drm_gem_vram_object will also use
- * @struct drm_vram_mm. This instance of &struct drm_vram_mm_funcs
- * can be used to connect both.
- */
-const struct drm_vram_mm_funcs drm_gem_vram_mm_funcs = {
-	.evict_flags = drm_gem_vram_bo_driver_evict_flags,
-	.verify_access = drm_gem_vram_bo_driver_verify_access
-};
-EXPORT_SYMBOL(drm_gem_vram_mm_funcs);
+	if (!kmap->virtual)
+		return;
+	ttm_bo_kunmap(kmap);
+	kmap->virtual = NULL;
+}
 
 /*
  * Helpers for struct drm_gem_object_funcs
@@ -506,8 +624,7 @@ int drm_gem_vram_driver_dumb_create(struct drm_file *file,
 	if (WARN_ONCE(!dev->vram_mm, "VRAM MM not initialized"))
 		return -EINVAL;
 
-	return drm_gem_vram_fill_create_dumb(file, dev, &dev->vram_mm->bdev, 0,
-					     false, args);
+	return drm_gem_vram_fill_create_dumb(file, dev, 0, 0, args);
 }
 EXPORT_SYMBOL(drm_gem_vram_driver_dumb_create);
 
@@ -544,6 +661,129 @@ int drm_gem_vram_driver_dumb_mmap_offset(struct drm_file *file,
 EXPORT_SYMBOL(drm_gem_vram_driver_dumb_mmap_offset);
 
 /*
+ * Helpers for struct drm_plane_helper_funcs
+ */
+
+/**
+ * drm_gem_vram_plane_helper_prepare_fb() - \
+ *	Implements &struct drm_plane_helper_funcs.prepare_fb
+ * @plane:	a DRM plane
+ * @new_state:	the plane's new state
+ *
+ * During plane updates, this function pins the GEM VRAM
+ * objects of the plane's new framebuffer to VRAM. Call
+ * drm_gem_vram_plane_helper_cleanup_fb() to unpin them.
+ *
+ * Returns:
+ *	0 on success, or
+ *	a negative errno code otherwise.
+ */
+int
+drm_gem_vram_plane_helper_prepare_fb(struct drm_plane *plane,
+				     struct drm_plane_state *new_state)
+{
+	size_t i;
+	struct drm_gem_vram_object *gbo;
+	int ret;
+
+	if (!new_state->fb)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(new_state->fb->obj); ++i) {
+		if (!new_state->fb->obj[i])
+			continue;
+		gbo = drm_gem_vram_of_gem(new_state->fb->obj[i]);
+		ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
+		if (ret)
+			goto err_drm_gem_vram_unpin;
+	}
+
+	return 0;
+
+err_drm_gem_vram_unpin:
+	while (i) {
+		--i;
+		gbo = drm_gem_vram_of_gem(new_state->fb->obj[i]);
+		drm_gem_vram_unpin(gbo);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_vram_plane_helper_prepare_fb);
+
+/**
+ * drm_gem_vram_plane_helper_cleanup_fb() - \
+ *	Implements &struct drm_plane_helper_funcs.cleanup_fb
+ * @plane:	a DRM plane
+ * @old_state:	the plane's old state
+ *
+ * During plane updates, this function unpins the GEM VRAM
+ * objects of the plane's old framebuffer from VRAM. Complements
+ * drm_gem_vram_plane_helper_prepare_fb().
+ */
+void
+drm_gem_vram_plane_helper_cleanup_fb(struct drm_plane *plane,
+				     struct drm_plane_state *old_state)
+{
+	size_t i;
+	struct drm_gem_vram_object *gbo;
+
+	if (!old_state->fb)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(old_state->fb->obj); ++i) {
+		if (!old_state->fb->obj[i])
+			continue;
+		gbo = drm_gem_vram_of_gem(old_state->fb->obj[i]);
+		drm_gem_vram_unpin(gbo);
+	}
+}
+EXPORT_SYMBOL(drm_gem_vram_plane_helper_cleanup_fb);
+
+/*
+ * Helpers for struct drm_simple_display_pipe_funcs
+ */
+
+/**
+ * drm_gem_vram_simple_display_pipe_prepare_fb() - \
+ *	Implements &struct drm_simple_display_pipe_funcs.prepare_fb
+ * @pipe:	a simple display pipe
+ * @new_state:	the plane's new state
+ *
+ * During plane updates, this function pins the GEM VRAM
+ * objects of the plane's new framebuffer to VRAM. Call
+ * drm_gem_vram_simple_display_pipe_cleanup_fb() to unpin them.
+ *
+ * Returns:
+ *	0 on success, or
+ *	a negative errno code otherwise.
+ */
+int drm_gem_vram_simple_display_pipe_prepare_fb(
+	struct drm_simple_display_pipe *pipe,
+	struct drm_plane_state *new_state)
+{
+	return drm_gem_vram_plane_helper_prepare_fb(&pipe->plane, new_state);
+}
+EXPORT_SYMBOL(drm_gem_vram_simple_display_pipe_prepare_fb);
+
+/**
+ * drm_gem_vram_simple_display_pipe_cleanup_fb() - \
+ *	Implements &struct drm_simple_display_pipe_funcs.cleanup_fb
+ * @pipe:	a simple display pipe
+ * @old_state:	the plane's old state
+ *
+ * During plane updates, this function unpins the GEM VRAM
+ * objects of the plane's old framebuffer from VRAM. Complements
+ * drm_gem_vram_simple_display_pipe_prepare_fb().
+ */
+void drm_gem_vram_simple_display_pipe_cleanup_fb(
+	struct drm_simple_display_pipe *pipe,
+	struct drm_plane_state *old_state)
+{
+	drm_gem_vram_plane_helper_cleanup_fb(&pipe->plane, old_state);
+}
+EXPORT_SYMBOL(drm_gem_vram_simple_display_pipe_cleanup_fb);
+
+/*
  * PRIME helpers
  */
 
@@ -595,17 +835,11 @@ static void drm_gem_vram_object_unpin(struct drm_gem_object *gem)
 static void *drm_gem_vram_object_vmap(struct drm_gem_object *gem)
 {
 	struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(gem);
-	int ret;
 	void *base;
 
-	ret = drm_gem_vram_pin(gbo, 0);
-	if (ret)
+	base = drm_gem_vram_vmap(gbo);
+	if (IS_ERR(base))
 		return NULL;
-	base = drm_gem_vram_kmap(gbo, true, NULL);
-	if (IS_ERR(base)) {
-		drm_gem_vram_unpin(gbo);
-		return NULL;
-	}
 	return base;
 }
 
@@ -620,8 +854,7 @@ static void drm_gem_vram_object_vunmap(struct drm_gem_object *gem,
 {
 	struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(gem);
 
-	drm_gem_vram_kunmap(gbo);
-	drm_gem_vram_unpin(gbo);
+	drm_gem_vram_vunmap(gbo, vaddr);
 }
 
 /*
@@ -633,5 +866,278 @@ static const struct drm_gem_object_funcs drm_gem_vram_object_funcs = {
 	.pin	= drm_gem_vram_object_pin,
 	.unpin	= drm_gem_vram_object_unpin,
 	.vmap	= drm_gem_vram_object_vmap,
-	.vunmap	= drm_gem_vram_object_vunmap
+	.vunmap	= drm_gem_vram_object_vunmap,
+	.mmap   = drm_gem_ttm_mmap,
+	.print_info = drm_gem_ttm_print_info,
 };
+
+/*
+ * VRAM memory manager
+ */
+
+/*
+ * TTM TT
+ */
+
+static void backend_func_destroy(struct ttm_tt *tt)
+{
+	ttm_tt_fini(tt);
+	kfree(tt);
+}
+
+static struct ttm_backend_func backend_func = {
+	.destroy = backend_func_destroy
+};
+
+/*
+ * TTM BO device
+ */
+
+static struct ttm_tt *bo_driver_ttm_tt_create(struct ttm_buffer_object *bo,
+					      uint32_t page_flags)
+{
+	struct ttm_tt *tt;
+	int ret;
+
+	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+	if (!tt)
+		return NULL;
+
+	tt->func = &backend_func;
+
+	ret = ttm_tt_init(tt, bo, page_flags);
+	if (ret < 0)
+		goto err_ttm_tt_init;
+
+	return tt;
+
+err_ttm_tt_init:
+	kfree(tt);
+	return NULL;
+}
+
+static int bo_driver_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
+				   struct ttm_mem_type_manager *man)
+{
+	switch (type) {
+	case TTM_PL_SYSTEM:
+		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_MASK_CACHING;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	case TTM_PL_VRAM:
+		man->func = &ttm_bo_manager_func;
+		man->flags = TTM_MEMTYPE_FLAG_FIXED |
+			     TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_FLAG_UNCACHED |
+					 TTM_PL_FLAG_WC;
+		man->default_caching = TTM_PL_FLAG_WC;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void bo_driver_evict_flags(struct ttm_buffer_object *bo,
+				  struct ttm_placement *placement)
+{
+	struct drm_gem_vram_object *gbo;
+
+	/* TTM may pass BOs that are not GEM VRAM BOs. */
+	if (!drm_is_gem_vram(bo))
+		return;
+
+	gbo = drm_gem_vram_of_bo(bo);
+
+	drm_gem_vram_bo_driver_evict_flags(gbo, placement);
+}
+
+static void bo_driver_move_notify(struct ttm_buffer_object *bo,
+				  bool evict,
+				  struct ttm_mem_reg *new_mem)
+{
+	struct drm_gem_vram_object *gbo;
+
+	/* TTM may pass BOs that are not GEM VRAM BOs. */
+	if (!drm_is_gem_vram(bo))
+		return;
+
+	gbo = drm_gem_vram_of_bo(bo);
+
+	drm_gem_vram_bo_driver_move_notify(gbo, evict, new_mem);
+}
+
+static int bo_driver_io_mem_reserve(struct ttm_bo_device *bdev,
+				    struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = bdev->man + mem->mem_type;
+	struct drm_vram_mm *vmm = drm_vram_mm_of_bdev(bdev);
+
+	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
+		return -EINVAL;
+
+	mem->bus.addr = NULL;
+	mem->bus.size = mem->num_pages << PAGE_SHIFT;
+
+	switch (mem->mem_type) {
+	case TTM_PL_SYSTEM:	/* nothing to do */
+		mem->bus.offset = 0;
+		mem->bus.base = 0;
+		mem->bus.is_iomem = false;
+		break;
+	case TTM_PL_VRAM:
+		mem->bus.offset = mem->start << PAGE_SHIFT;
+		mem->bus.base = vmm->vram_base;
+		mem->bus.is_iomem = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void bo_driver_io_mem_free(struct ttm_bo_device *bdev,
+				  struct ttm_mem_reg *mem)
+{ }
+
+static struct ttm_bo_driver bo_driver = {
+	.ttm_tt_create = bo_driver_ttm_tt_create,
+	.ttm_tt_populate = ttm_pool_populate,
+	.ttm_tt_unpopulate = ttm_pool_unpopulate,
+	.init_mem_type = bo_driver_init_mem_type,
+	.eviction_valuable = ttm_bo_eviction_valuable,
+	.evict_flags = bo_driver_evict_flags,
+	.move_notify = bo_driver_move_notify,
+	.io_mem_reserve = bo_driver_io_mem_reserve,
+	.io_mem_free = bo_driver_io_mem_free,
+};
+
+/*
+ * struct drm_vram_mm
+ */
+
+#if defined(CONFIG_DEBUG_FS)
+static int drm_vram_mm_debugfs(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_vram_mm *vmm = node->minor->dev->vram_mm;
+	struct drm_mm *mm = vmm->bdev.man[TTM_PL_VRAM].priv;
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	spin_lock(&ttm_bo_glob.lru_lock);
+	drm_mm_print(mm, &p);
+	spin_unlock(&ttm_bo_glob.lru_lock);
+	return 0;
+}
+
+static const struct drm_info_list drm_vram_mm_debugfs_list[] = {
+	{ "vram-mm", drm_vram_mm_debugfs, 0, NULL },
+};
+#endif
+
+/**
+ * drm_vram_mm_debugfs_init() - Register VRAM MM debugfs file.
+ *
+ * @minor: drm minor device.
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative error code otherwise.
+ */
+int drm_vram_mm_debugfs_init(struct drm_minor *minor)
+{
+	int ret = 0;
+
+#if defined(CONFIG_DEBUG_FS)
+	ret = drm_debugfs_create_files(drm_vram_mm_debugfs_list,
+				       ARRAY_SIZE(drm_vram_mm_debugfs_list),
+				       minor->debugfs_root, minor);
+#endif
+	return ret;
+}
+EXPORT_SYMBOL(drm_vram_mm_debugfs_init);
+
+static int drm_vram_mm_init(struct drm_vram_mm *vmm, struct drm_device *dev,
+			    uint64_t vram_base, size_t vram_size)
+{
+	int ret;
+
+	vmm->vram_base = vram_base;
+	vmm->vram_size = vram_size;
+
+	ret = ttm_bo_device_init(&vmm->bdev, &bo_driver,
+				 dev->anon_inode->i_mapping,
+				 dev->vma_offset_manager,
+				 true);
+	if (ret)
+		return ret;
+
+	ret = ttm_bo_init_mm(&vmm->bdev, TTM_PL_VRAM, vram_size >> PAGE_SHIFT);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void drm_vram_mm_cleanup(struct drm_vram_mm *vmm)
+{
+	ttm_bo_device_release(&vmm->bdev);
+}
+
+/*
+ * Helpers for integration with struct drm_device
+ */
+
+/**
+ * drm_vram_helper_alloc_mm - Allocates a device's instance of \
+	&struct drm_vram_mm
+ * @dev:	the DRM device
+ * @vram_base:	the base address of the video memory
+ * @vram_size:	the size of the video memory in bytes
+ *
+ * Returns:
+ * The new instance of &struct drm_vram_mm on success, or
+ * an ERR_PTR()-encoded errno code otherwise.
+ */
+struct drm_vram_mm *drm_vram_helper_alloc_mm(
+	struct drm_device *dev, uint64_t vram_base, size_t vram_size)
+{
+	int ret;
+
+	if (WARN_ON(dev->vram_mm))
+		return dev->vram_mm;
+
+	dev->vram_mm = kzalloc(sizeof(*dev->vram_mm), GFP_KERNEL);
+	if (!dev->vram_mm)
+		return ERR_PTR(-ENOMEM);
+
+	ret = drm_vram_mm_init(dev->vram_mm, dev, vram_base, vram_size);
+	if (ret)
+		goto err_kfree;
+
+	return dev->vram_mm;
+
+err_kfree:
+	kfree(dev->vram_mm);
+	dev->vram_mm = NULL;
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(drm_vram_helper_alloc_mm);
+
+/**
+ * drm_vram_helper_release_mm - Releases a device's instance of \
+	&struct drm_vram_mm
+ * @dev:	the DRM device
+ */
+void drm_vram_helper_release_mm(struct drm_device *dev)
+{
+	if (!dev->vram_mm)
+		return;
+
+	drm_vram_mm_cleanup(dev->vram_mm);
+	kfree(dev->vram_mm);
+	dev->vram_mm = NULL;
+}
+EXPORT_SYMBOL(drm_vram_helper_release_mm);
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 51a2055c8f18..6937bf923f05 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -45,12 +45,34 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor);
 void drm_file_free(struct drm_file *file);
 void drm_lastclose(struct drm_device *dev);
 
+#ifdef CONFIG_PCI
+
 /* drm_pci.c */
 int drm_irq_by_busid(struct drm_device *dev, void *data,
 		     struct drm_file *file_priv);
 void drm_pci_agp_destroy(struct drm_device *dev);
 int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master);
 
+#else
+
+static inline int drm_irq_by_busid(struct drm_device *dev, void *data,
+				   struct drm_file *file_priv)
+{
+	return -EINVAL;
+}
+
+static inline void drm_pci_agp_destroy(struct drm_device *dev)
+{
+}
+
+static inline int drm_pci_set_busid(struct drm_device *dev,
+				    struct drm_master *master)
+{
+	return -EINVAL;
+}
+
+#endif
+
 /* drm_prime.c */
 int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file_priv);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index fcd728d7cf72..5afb39688b55 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -652,8 +652,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, 0),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_RENDER_ALLOW),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, 0),
diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index 2e8ce99d0baa..2c79e8199e3c 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -360,7 +360,8 @@ void drm_legacy_lock_master_cleanup(struct drm_device *dev, struct drm_master *m
 	/*
 	 * Since the master is disappearing, so is the
 	 * possibility to lock.
-	 */	mutex_lock(&dev->struct_mutex);
+	 */
+	mutex_lock(&dev->struct_mutex);
 	if (master->lock.hw_lock) {
 		if (dev->sigdata.lock == master->lock.hw_lock)
 			dev->sigdata.lock = NULL;
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 0bec6dbb0142..fbea69d6f909 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -40,6 +40,7 @@
 #include <xen/xen.h>
 
 #include <drm/drm_agpsupport.h>
+#include <drm/drm_cache.h>
 #include <drm/drm_device.h>
 
 #include "drm_legacy.h"
diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c
index 1961f713aaab..16bff1be4b8a 100644
--- a/drivers/gpu/drm/drm_mipi_dbi.c
+++ b/drivers/gpu/drm/drm_mipi_dbi.c
@@ -367,9 +367,9 @@ static void mipi_dbi_blank(struct mipi_dbi_dev *dbidev)
 	memset(dbidev->tx_buf, 0, len);
 
 	mipi_dbi_command(dbi, MIPI_DCS_SET_COLUMN_ADDRESS, 0, 0,
-			 (width >> 8) & 0xFF, (width - 1) & 0xFF);
+			 ((width - 1) >> 8) & 0xFF, (width - 1) & 0xFF);
 	mipi_dbi_command(dbi, MIPI_DCS_SET_PAGE_ADDRESS, 0, 0,
-			 (height >> 8) & 0xFF, (height - 1) & 0xFF);
+			 ((height - 1) >> 8) & 0xFF, (height - 1) & 0xFF);
 	mipi_dbi_command_buf(dbi, MIPI_DCS_WRITE_MEMORY_START,
 			     (u8 *)dbidev->tx_buf, len);
 
@@ -783,7 +783,7 @@ static int mipi_dbi_spi1e_transfer(struct mipi_dbi *dbi, int dc,
 	int i, ret;
 	u8 *dst;
 
-	if (drm_debug & DRM_UT_DRIVER)
+	if (drm_debug_enabled(DRM_UT_DRIVER))
 		pr_debug("[drm:%s] dc=%d, max_chunk=%zu, transfers:\n",
 			 __func__, dc, max_chunk);
 
@@ -907,7 +907,7 @@ static int mipi_dbi_spi1_transfer(struct mipi_dbi *dbi, int dc,
 	max_chunk = dbi->tx_buf9_len;
 	dst16 = dbi->tx_buf9;
 
-	if (drm_debug & DRM_UT_DRIVER)
+	if (drm_debug_enabled(DRM_UT_DRIVER))
 		pr_debug("[drm:%s] dc=%d, max_chunk=%zu, transfers:\n",
 			 __func__, dc, max_chunk);
 
@@ -955,7 +955,7 @@ static int mipi_dbi_typec1_command(struct mipi_dbi *dbi, u8 *cmd,
 	int ret;
 
 	if (mipi_dbi_command_is_read(dbi, *cmd))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	MIPI_DBI_DEBUG_COMMAND(*cmd, parameters, num);
 
@@ -1021,7 +1021,7 @@ static int mipi_dbi_typec3_command_read(struct mipi_dbi *dbi, u8 *cmd,
 		unsigned int i;
 
 		for (i = 0; i < len; i++)
-			data[i] = (buf[i] << 1) | !!(buf[i + 1] & BIT(7));
+			data[i] = (buf[i] << 1) | (buf[i + 1] >> 7);
 	}
 
 	MIPI_DBI_DEBUG_COMMAND(*cmd, data, len);
@@ -1187,8 +1187,7 @@ static ssize_t mipi_dbi_debugfs_command_write(struct file *file,
 	struct mipi_dbi_dev *dbidev = m->private;
 	u8 val, cmd = 0, parameters[64];
 	char *buf, *pos, *token;
-	unsigned int i;
-	int ret, idx;
+	int i, ret, idx;
 
 	if (!drm_dev_enter(&dbidev->drm, &idx))
 		return -ENODEV;
diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index bd2498bbd74a..55531895dde6 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -33,6 +33,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 
+#include <drm/drm_dsc.h>
 #include <video/mipi_display.h>
 
 /**
@@ -373,6 +374,7 @@ bool mipi_dsi_packet_format_is_short(u8 type)
 	case MIPI_DSI_V_SYNC_END:
 	case MIPI_DSI_H_SYNC_START:
 	case MIPI_DSI_H_SYNC_END:
+	case MIPI_DSI_COMPRESSION_MODE:
 	case MIPI_DSI_END_OF_TRANSMISSION:
 	case MIPI_DSI_COLOR_MODE_OFF:
 	case MIPI_DSI_COLOR_MODE_ON:
@@ -387,7 +389,7 @@ bool mipi_dsi_packet_format_is_short(u8 type)
 	case MIPI_DSI_DCS_SHORT_WRITE:
 	case MIPI_DSI_DCS_SHORT_WRITE_PARAM:
 	case MIPI_DSI_DCS_READ:
-	case MIPI_DSI_DCS_COMPRESSION_MODE:
+	case MIPI_DSI_EXECUTE_QUEUE:
 	case MIPI_DSI_SET_MAXIMUM_RETURN_PACKET_SIZE:
 		return true;
 	}
@@ -406,11 +408,12 @@ EXPORT_SYMBOL(mipi_dsi_packet_format_is_short);
 bool mipi_dsi_packet_format_is_long(u8 type)
 {
 	switch (type) {
-	case MIPI_DSI_PPS_LONG_WRITE:
 	case MIPI_DSI_NULL_PACKET:
 	case MIPI_DSI_BLANKING_PACKET:
 	case MIPI_DSI_GENERIC_LONG_WRITE:
 	case MIPI_DSI_DCS_LONG_WRITE:
+	case MIPI_DSI_PICTURE_PARAMETER_SET:
+	case MIPI_DSI_COMPRESSED_PIXEL_STREAM:
 	case MIPI_DSI_LOOSELY_PACKED_PIXEL_STREAM_YCBCR20:
 	case MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR24:
 	case MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR16:
@@ -547,6 +550,56 @@ int mipi_dsi_set_maximum_return_packet_size(struct mipi_dsi_device *dsi,
 EXPORT_SYMBOL(mipi_dsi_set_maximum_return_packet_size);
 
 /**
+ * mipi_dsi_compression_mode() - enable/disable DSC on the peripheral
+ * @dsi: DSI peripheral device
+ * @enable: Whether to enable or disable the DSC
+ *
+ * Enable or disable Display Stream Compression on the peripheral using the
+ * default Picture Parameter Set and VESA DSC 1.1 algorithm.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+ssize_t mipi_dsi_compression_mode(struct mipi_dsi_device *dsi, bool enable)
+{
+	/* Note: Needs updating for non-default PPS or algorithm */
+	u8 tx[2] = { enable << 0, 0 };
+	struct mipi_dsi_msg msg = {
+		.channel = dsi->channel,
+		.type = MIPI_DSI_COMPRESSION_MODE,
+		.tx_len = sizeof(tx),
+		.tx_buf = tx,
+	};
+	int ret = mipi_dsi_device_transfer(dsi, &msg);
+
+	return (ret < 0) ? ret : 0;
+}
+EXPORT_SYMBOL(mipi_dsi_compression_mode);
+
+/**
+ * mipi_dsi_picture_parameter_set() - transmit the DSC PPS to the peripheral
+ * @dsi: DSI peripheral device
+ * @pps: VESA DSC 1.1 Picture Parameter Set
+ *
+ * Transmit the VESA DSC 1.1 Picture Parameter Set to the peripheral.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+ssize_t mipi_dsi_picture_parameter_set(struct mipi_dsi_device *dsi,
+				       const struct drm_dsc_picture_parameter_set *pps)
+{
+	struct mipi_dsi_msg msg = {
+		.channel = dsi->channel,
+		.type = MIPI_DSI_PICTURE_PARAMETER_SET,
+		.tx_len = sizeof(*pps),
+		.tx_buf = pps,
+	};
+	int ret = mipi_dsi_device_transfer(dsi, &msg);
+
+	return (ret < 0) ? ret : 0;
+}
+EXPORT_SYMBOL(mipi_dsi_picture_parameter_set);
+
+/**
  * mipi_dsi_generic_write() - transmit data using a generic write packet
  * @dsi: DSI peripheral device
  * @payload: buffer containing the payload
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 4581c5387372..2a6e34663146 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -174,7 +174,7 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
 
 	node->__subtree_last = LAST(node);
 
-	if (hole_node->allocated) {
+	if (drm_mm_node_allocated(hole_node)) {
 		rb = &hole_node->rb;
 		while (rb) {
 			parent = rb_entry(rb, struct drm_mm_node, rb);
@@ -424,9 +424,9 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 
 	node->mm = mm;
 
+	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 	list_add(&node->node_list, &hole->node_list);
 	drm_mm_interval_tree_add_node(hole, node);
-	node->allocated = true;
 	node->hole_size = 0;
 
 	rm_hole(hole);
@@ -543,9 +543,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 		node->color = color;
 		node->hole_size = 0;
 
+		__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 		list_add(&node->node_list, &hole->node_list);
 		drm_mm_interval_tree_add_node(hole, node);
-		node->allocated = true;
 
 		rm_hole(hole);
 		if (adj_start > hole_start)
@@ -561,6 +561,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 }
 EXPORT_SYMBOL(drm_mm_insert_node_in_range);
 
+static inline bool drm_mm_node_scanned_block(const struct drm_mm_node *node)
+{
+	return test_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags);
+}
+
 /**
  * drm_mm_remove_node - Remove a memory node from the allocator.
  * @node: drm_mm_node to remove
@@ -574,8 +579,8 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 	struct drm_mm *mm = node->mm;
 	struct drm_mm_node *prev_node;
 
-	DRM_MM_BUG_ON(!node->allocated);
-	DRM_MM_BUG_ON(node->scanned_block);
+	DRM_MM_BUG_ON(!drm_mm_node_allocated(node));
+	DRM_MM_BUG_ON(drm_mm_node_scanned_block(node));
 
 	prev_node = list_prev_entry(node, node_list);
 
@@ -584,11 +589,12 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 
 	drm_mm_interval_tree_remove(node, &mm->interval_tree);
 	list_del(&node->node_list);
-	node->allocated = false;
 
 	if (drm_mm_hole_follows(prev_node))
 		rm_hole(prev_node);
 	add_hole(prev_node);
+
+	clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 }
 EXPORT_SYMBOL(drm_mm_remove_node);
 
@@ -605,10 +611,11 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 {
 	struct drm_mm *mm = old->mm;
 
-	DRM_MM_BUG_ON(!old->allocated);
+	DRM_MM_BUG_ON(!drm_mm_node_allocated(old));
 
 	*new = *old;
 
+	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &new->flags);
 	list_replace(&old->node_list, &new->node_list);
 	rb_replace_node_cached(&old->rb, &new->rb, &mm->interval_tree);
 
@@ -622,8 +629,7 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 				&mm->holes_addr);
 	}
 
-	old->allocated = false;
-	new->allocated = true;
+	clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &old->flags);
 }
 EXPORT_SYMBOL(drm_mm_replace_node);
 
@@ -731,9 +737,9 @@ bool drm_mm_scan_add_block(struct drm_mm_scan *scan,
 	u64 adj_start, adj_end;
 
 	DRM_MM_BUG_ON(node->mm != mm);
-	DRM_MM_BUG_ON(!node->allocated);
-	DRM_MM_BUG_ON(node->scanned_block);
-	node->scanned_block = true;
+	DRM_MM_BUG_ON(!drm_mm_node_allocated(node));
+	DRM_MM_BUG_ON(drm_mm_node_scanned_block(node));
+	__set_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags);
 	mm->scan_active++;
 
 	/* Remove this block from the node_list so that we enlarge the hole
@@ -818,8 +824,8 @@ bool drm_mm_scan_remove_block(struct drm_mm_scan *scan,
 	struct drm_mm_node *prev_node;
 
 	DRM_MM_BUG_ON(node->mm != scan->mm);
-	DRM_MM_BUG_ON(!node->scanned_block);
-	node->scanned_block = false;
+	DRM_MM_BUG_ON(!drm_mm_node_scanned_block(node));
+	__clear_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags);
 
 	DRM_MM_BUG_ON(!node->mm->scan_active);
 	node->mm->scan_active--;
@@ -917,7 +923,7 @@ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size)
 
 	/* Clever trick to avoid a special case in the free hole tracking. */
 	INIT_LIST_HEAD(&mm->head_node.node_list);
-	mm->head_node.allocated = false;
+	mm->head_node.flags = 0;
 	mm->head_node.mm = mm;
 	mm->head_node.start = start + size;
 	mm->head_node.size = -size;
diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
index 7bc03c3c154f..08e6eff6a179 100644
--- a/drivers/gpu/drm/drm_mode_config.c
+++ b/drivers/gpu/drm/drm_mode_config.c
@@ -27,6 +27,7 @@
 #include <drm/drm_file.h>
 #include <drm/drm_mode_config.h>
 #include <drm/drm_print.h>
+#include <linux/dma-resv.h>
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
@@ -415,6 +416,33 @@ void drm_mode_config_init(struct drm_device *dev)
 	dev->mode_config.num_crtc = 0;
 	dev->mode_config.num_encoder = 0;
 	dev->mode_config.num_total_plane = 0;
+
+	if (IS_ENABLED(CONFIG_LOCKDEP)) {
+		struct drm_modeset_acquire_ctx modeset_ctx;
+		struct ww_acquire_ctx resv_ctx;
+		struct dma_resv resv;
+		int ret;
+
+		dma_resv_init(&resv);
+
+		drm_modeset_acquire_init(&modeset_ctx, 0);
+		ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
+				       &modeset_ctx);
+		if (ret == -EDEADLK)
+			ret = drm_modeset_backoff(&modeset_ctx);
+
+		ww_acquire_init(&resv_ctx, &reservation_ww_class);
+		ret = dma_resv_lock(&resv, &resv_ctx);
+		if (ret == -EDEADLK)
+			dma_resv_lock_slow(&resv, &resv_ctx);
+
+		dma_resv_unlock(&resv);
+		ww_acquire_fini(&resv_ctx);
+
+		drm_modeset_drop_locks(&modeset_ctx);
+		drm_modeset_acquire_fini(&modeset_ctx);
+		dma_resv_fini(&resv);
+	}
 }
 EXPORT_SYMBOL(drm_mode_config_init);
 
@@ -428,8 +456,6 @@ EXPORT_SYMBOL(drm_mode_config_init);
  * Note that since this /should/ happen single-threaded at driver/device
  * teardown time, no locking is required. It's the driver's job to ensure that
  * this guarantee actually holds true.
- *
- * FIXME: cleanup any dangling user buffer objects too
  */
 void drm_mode_config_cleanup(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c
index 6a23e36ed4fe..35c2719407a8 100644
--- a/drivers/gpu/drm/drm_mode_object.c
+++ b/drivers/gpu/drm/drm_mode_object.c
@@ -224,12 +224,26 @@ EXPORT_SYMBOL(drm_mode_object_get);
  * This attaches the given property to the modeset object with the given initial
  * value. Currently this function cannot fail since the properties are stored in
  * a statically sized array.
+ *
+ * Note that all properties must be attached before the object itself is
+ * registered and accessible from userspace.
  */
 void drm_object_attach_property(struct drm_mode_object *obj,
 				struct drm_property *property,
 				uint64_t init_val)
 {
 	int count = obj->properties->count;
+	struct drm_device *dev = property->dev;
+
+
+	if (obj->type == DRM_MODE_OBJECT_CONNECTOR) {
+		struct drm_connector *connector = obj_to_connector(obj);
+
+		WARN_ON(!dev->driver->load &&
+			connector->registration_state == DRM_CONNECTOR_REGISTERED);
+	} else {
+		WARN_ON(!dev->driver->load && dev->registered);
+	}
 
 	if (count == DRM_OBJECT_MAX_PROPERTY) {
 		WARN(1, "Failed to attach object property (type: 0x%x). Please "
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 88232698d7a0..10336b144c72 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -233,7 +233,7 @@ struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay,
 		/* 3) Nominal HSync width (% of line period) - default 8 */
 #define CVT_HSYNC_PERCENTAGE	8
 		unsigned int hblank_percentage;
-		int vsyncandback_porch, vback_porch, hblank;
+		int vsyncandback_porch, __maybe_unused vback_porch, hblank;
 
 		/* estimated the horizontal period */
 		tmp1 = HV_FACTOR * 1000000  -
@@ -386,9 +386,10 @@ drm_gtf_mode_complex(struct drm_device *dev, int hdisplay, int vdisplay,
 	int top_margin, bottom_margin;
 	int interlace;
 	unsigned int hfreq_est;
-	int vsync_plus_bp, vback_porch;
-	unsigned int vtotal_lines, vfieldrate_est, hperiod;
-	unsigned int vfield_rate, vframe_rate;
+	int vsync_plus_bp, __maybe_unused vback_porch;
+	unsigned int vtotal_lines, __maybe_unused vfieldrate_est;
+	unsigned int __maybe_unused hperiod;
+	unsigned int vfield_rate, __maybe_unused vframe_rate;
 	int left_margin, right_margin;
 	unsigned int total_active_pixels, ideal_duty_cycle;
 	unsigned int hblank, total_pixels, pixel_freq;
@@ -1568,33 +1569,76 @@ static int drm_mode_parse_cmdline_res_mode(const char *str, unsigned int length,
 	return 0;
 }
 
-static int drm_mode_parse_cmdline_options(char *str, size_t len,
+static int drm_mode_parse_cmdline_int(const char *delim, unsigned int *int_ret)
+{
+	const char *value;
+	char *endp;
+
+	/*
+	 * delim must point to the '=', otherwise it is a syntax error and
+	 * if delim points to the terminating zero, then delim + 1 wil point
+	 * past the end of the string.
+	 */
+	if (*delim != '=')
+		return -EINVAL;
+
+	value = delim + 1;
+	*int_ret = simple_strtol(value, &endp, 10);
+
+	/* Make sure we have parsed something */
+	if (endp == value)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int drm_mode_parse_panel_orientation(const char *delim,
+					    struct drm_cmdline_mode *mode)
+{
+	const char *value;
+
+	if (*delim != '=')
+		return -EINVAL;
+
+	value = delim + 1;
+	delim = strchr(value, ',');
+	if (!delim)
+		delim = value + strlen(value);
+
+	if (!strncmp(value, "normal", delim - value))
+		mode->panel_orientation = DRM_MODE_PANEL_ORIENTATION_NORMAL;
+	else if (!strncmp(value, "upside_down", delim - value))
+		mode->panel_orientation = DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP;
+	else if (!strncmp(value, "left_side_up", delim - value))
+		mode->panel_orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP;
+	else if (!strncmp(value, "right_side_up", delim - value))
+		mode->panel_orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int drm_mode_parse_cmdline_options(const char *str,
+					  bool freestanding,
 					  const struct drm_connector *connector,
 					  struct drm_cmdline_mode *mode)
 {
-	unsigned int rotation = 0;
-	char *sep = str;
+	unsigned int deg, margin, rotation = 0;
+	const char *delim, *option, *sep;
 
-	while ((sep = strchr(sep, ','))) {
-		char *delim, *option;
-
-		option = sep + 1;
+	option = str;
+	do {
 		delim = strchr(option, '=');
 		if (!delim) {
 			delim = strchr(option, ',');
 
 			if (!delim)
-				delim = str + len;
+				delim = option + strlen(option);
 		}
 
 		if (!strncmp(option, "rotate", delim - option)) {
-			const char *value = delim + 1;
-			unsigned int deg;
-
-			deg = simple_strtol(value, &sep, 10);
-
-			/* Make sure we have parsed something */
-			if (sep == value)
+			if (drm_mode_parse_cmdline_int(delim, &deg))
 				return -EINVAL;
 
 			switch (deg) {
@@ -1619,58 +1663,40 @@ static int drm_mode_parse_cmdline_options(char *str, size_t len,
 			}
 		} else if (!strncmp(option, "reflect_x", delim - option)) {
 			rotation |= DRM_MODE_REFLECT_X;
-			sep = delim;
 		} else if (!strncmp(option, "reflect_y", delim - option)) {
 			rotation |= DRM_MODE_REFLECT_Y;
-			sep = delim;
 		} else if (!strncmp(option, "margin_right", delim - option)) {
-			const char *value = delim + 1;
-			unsigned int margin;
-
-			margin = simple_strtol(value, &sep, 10);
-
-			/* Make sure we have parsed something */
-			if (sep == value)
+			if (drm_mode_parse_cmdline_int(delim, &margin))
 				return -EINVAL;
 
 			mode->tv_margins.right = margin;
 		} else if (!strncmp(option, "margin_left", delim - option)) {
-			const char *value = delim + 1;
-			unsigned int margin;
-
-			margin = simple_strtol(value, &sep, 10);
-
-			/* Make sure we have parsed something */
-			if (sep == value)
+			if (drm_mode_parse_cmdline_int(delim, &margin))
 				return -EINVAL;
 
 			mode->tv_margins.left = margin;
 		} else if (!strncmp(option, "margin_top", delim - option)) {
-			const char *value = delim + 1;
-			unsigned int margin;
-
-			margin = simple_strtol(value, &sep, 10);
-
-			/* Make sure we have parsed something */
-			if (sep == value)
+			if (drm_mode_parse_cmdline_int(delim, &margin))
 				return -EINVAL;
 
 			mode->tv_margins.top = margin;
 		} else if (!strncmp(option, "margin_bottom", delim - option)) {
-			const char *value = delim + 1;
-			unsigned int margin;
-
-			margin = simple_strtol(value, &sep, 10);
-
-			/* Make sure we have parsed something */
-			if (sep == value)
+			if (drm_mode_parse_cmdline_int(delim, &margin))
 				return -EINVAL;
 
 			mode->tv_margins.bottom = margin;
+		} else if (!strncmp(option, "panel_orientation", delim - option)) {
+			if (drm_mode_parse_panel_orientation(delim, mode))
+				return -EINVAL;
 		} else {
 			return -EINVAL;
 		}
-	}
+		sep = strchr(delim, ',');
+		option = sep + 1;
+	} while (sep);
+
+	if (rotation && freestanding)
+		return -EINVAL;
 
 	mode->rotation_reflection = rotation;
 
@@ -1682,17 +1708,6 @@ static const char * const drm_named_modes_whitelist[] = {
 	"PAL",
 };
 
-static bool drm_named_mode_is_in_whitelist(const char *mode, unsigned int size)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(drm_named_modes_whitelist); i++)
-		if (!strncmp(mode, drm_named_modes_whitelist[i], size))
-			return true;
-
-	return false;
-}
-
 /**
  * drm_mode_parse_command_line_for_connector - parse command line modeline for connector
  * @mode_option: optional per connector mode option
@@ -1723,72 +1738,30 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 					       struct drm_cmdline_mode *mode)
 {
 	const char *name;
-	bool named_mode = false, parse_extras = false;
+	bool freestanding = false, parse_extras = false;
 	unsigned int bpp_off = 0, refresh_off = 0, options_off = 0;
 	unsigned int mode_end = 0;
-	char *bpp_ptr = NULL, *refresh_ptr = NULL, *extra_ptr = NULL;
-	char *options_ptr = NULL;
+	const char *bpp_ptr = NULL, *refresh_ptr = NULL, *extra_ptr = NULL;
+	const char *options_ptr = NULL;
 	char *bpp_end_ptr = NULL, *refresh_end_ptr = NULL;
-	int ret;
+	int i, len, ret;
 
-#ifdef CONFIG_FB
-	if (!mode_option)
-		mode_option = fb_mode_option;
-#endif
+	memset(mode, 0, sizeof(*mode));
+	mode->panel_orientation = DRM_MODE_PANEL_ORIENTATION_UNKNOWN;
 
-	if (!mode_option) {
-		mode->specified = false;
+	if (!mode_option)
 		return false;
-	}
 
 	name = mode_option;
 
-	/*
-	 * This is a bit convoluted. To differentiate between the
-	 * named modes and poorly formatted resolutions, we need a
-	 * bunch of things:
-	 *   - We need to make sure that the first character (which
-	 *     would be our resolution in X) is a digit.
-	 *   - If not, then it's either a named mode or a force on/off.
-	 *     To distinguish between the two, we need to run the
-	 *     extra parsing function, and if not, then we consider it
-	 *     a named mode.
-	 *
-	 * If this isn't enough, we should add more heuristics here,
-	 * and matching unit-tests.
-	 */
-	if (!isdigit(name[0]) && name[0] != 'x') {
-		unsigned int namelen = strlen(name);
-
-		/*
-		 * Only the force on/off options can be in that case,
-		 * and they all take a single character.
-		 */
-		if (namelen == 1) {
-			ret = drm_mode_parse_cmdline_extra(name, namelen, true,
-							   connector, mode);
-			if (!ret)
-				return true;
-		}
-
-		named_mode = true;
-	}
-
 	/* Try to locate the bpp and refresh specifiers, if any */
 	bpp_ptr = strchr(name, '-');
-	if (bpp_ptr) {
+	if (bpp_ptr)
 		bpp_off = bpp_ptr - name;
-		mode->bpp_specified = true;
-	}
 
 	refresh_ptr = strchr(name, '@');
-	if (refresh_ptr) {
-		if (named_mode)
-			return false;
-
+	if (refresh_ptr)
 		refresh_off = refresh_ptr - name;
-		mode->refresh_specified = true;
-	}
 
 	/* Locate the start of named options */
 	options_ptr = strchr(name, ',');
@@ -1802,33 +1775,58 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 		mode_end = refresh_off;
 	} else if (options_ptr) {
 		mode_end = options_off;
+		parse_extras = true;
 	} else {
 		mode_end = strlen(name);
 		parse_extras = true;
 	}
 
-	if (named_mode) {
-		if (mode_end + 1 > DRM_DISPLAY_MODE_LEN)
-			return false;
+	/* First check for a named mode */
+	for (i = 0; i < ARRAY_SIZE(drm_named_modes_whitelist); i++) {
+		ret = str_has_prefix(name, drm_named_modes_whitelist[i]);
+		if (ret == mode_end) {
+			if (refresh_ptr)
+				return false; /* named + refresh is invalid */
 
-		if (!drm_named_mode_is_in_whitelist(name, mode_end))
-			return false;
+			strcpy(mode->name, drm_named_modes_whitelist[i]);
+			mode->specified = true;
+			break;
+		}
+	}
 
-		strscpy(mode->name, name, mode_end + 1);
-	} else {
+	/* No named mode? Check for a normal mode argument, e.g. 1024x768 */
+	if (!mode->specified && isdigit(name[0])) {
 		ret = drm_mode_parse_cmdline_res_mode(name, mode_end,
 						      parse_extras,
 						      connector,
 						      mode);
 		if (ret)
 			return false;
+
+		mode->specified = true;
+	}
+
+	/* No mode? Check for freestanding extras and/or options */
+	if (!mode->specified) {
+		unsigned int len = strlen(mode_option);
+
+		if (bpp_ptr || refresh_ptr)
+			return false; /* syntax error */
+
+		if (len == 1 || (len >= 2 && mode_option[1] == ','))
+			extra_ptr = mode_option;
+		else
+			options_ptr = mode_option - 1;
+
+		freestanding = true;
 	}
-	mode->specified = true;
 
 	if (bpp_ptr) {
 		ret = drm_mode_parse_cmdline_bpp(bpp_ptr, &bpp_end_ptr, mode);
 		if (ret)
 			return false;
+
+		mode->bpp_specified = true;
 	}
 
 	if (refresh_ptr) {
@@ -1836,6 +1834,8 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 						     &refresh_end_ptr, mode);
 		if (ret)
 			return false;
+
+		mode->refresh_specified = true;
 	}
 
 	/*
@@ -1849,20 +1849,21 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 	else if (refresh_ptr)
 		extra_ptr = refresh_end_ptr;
 
-	if (extra_ptr &&
-	    extra_ptr != options_ptr) {
-		int len = strlen(name) - (extra_ptr - name);
+	if (extra_ptr) {
+		if (options_ptr)
+			len = options_ptr - extra_ptr;
+		else
+			len = strlen(extra_ptr);
 
-		ret = drm_mode_parse_cmdline_extra(extra_ptr, len, false,
+		ret = drm_mode_parse_cmdline_extra(extra_ptr, len, freestanding,
 						   connector, mode);
 		if (ret)
 			return false;
 	}
 
 	if (options_ptr) {
-		int len = strlen(name) - (options_ptr - name);
-
-		ret = drm_mode_parse_cmdline_options(options_ptr, len,
+		ret = drm_mode_parse_cmdline_options(options_ptr + 1,
+						     freestanding,
 						     connector, mode);
 		if (ret)
 			return false;
diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c
index 43d89dd59c6b..b50b44e76279 100644
--- a/drivers/gpu/drm/drm_of.c
+++ b/drivers/gpu/drm/drm_of.c
@@ -250,11 +250,6 @@ int drm_of_find_panel_or_bridge(const struct device_node *np,
 	if (!remote)
 		return -ENODEV;
 
-	if (!of_device_is_available(remote)) {
-		of_node_put(remote);
-		return -ENODEV;
-	}
-
 	if (panel) {
 		*panel = of_drm_find_panel(remote);
 		if (!IS_ERR(*panel))
@@ -279,3 +274,119 @@ int drm_of_find_panel_or_bridge(const struct device_node *np,
 	return ret;
 }
 EXPORT_SYMBOL_GPL(drm_of_find_panel_or_bridge);
+
+enum drm_of_lvds_pixels {
+	DRM_OF_LVDS_EVEN = BIT(0),
+	DRM_OF_LVDS_ODD = BIT(1),
+};
+
+static int drm_of_lvds_get_port_pixels_type(struct device_node *port_node)
+{
+	bool even_pixels =
+		of_property_read_bool(port_node, "dual-lvds-even-pixels");
+	bool odd_pixels =
+		of_property_read_bool(port_node, "dual-lvds-odd-pixels");
+
+	return (even_pixels ? DRM_OF_LVDS_EVEN : 0) |
+	       (odd_pixels ? DRM_OF_LVDS_ODD : 0);
+}
+
+static int drm_of_lvds_get_remote_pixels_type(
+			const struct device_node *port_node)
+{
+	struct device_node *endpoint = NULL;
+	int pixels_type = -EPIPE;
+
+	for_each_child_of_node(port_node, endpoint) {
+		struct device_node *remote_port;
+		int current_pt;
+
+		if (!of_node_name_eq(endpoint, "endpoint"))
+			continue;
+
+		remote_port = of_graph_get_remote_port(endpoint);
+		if (!remote_port) {
+			of_node_put(remote_port);
+			return -EPIPE;
+		}
+
+		current_pt = drm_of_lvds_get_port_pixels_type(remote_port);
+		of_node_put(remote_port);
+		if (pixels_type < 0)
+			pixels_type = current_pt;
+
+		/*
+		 * Sanity check, ensure that all remote endpoints have the same
+		 * pixel type. We may lift this restriction later if we need to
+		 * support multiple sinks with different dual-link
+		 * configurations by passing the endpoints explicitly to
+		 * drm_of_lvds_get_dual_link_pixel_order().
+		 */
+		if (!current_pt || pixels_type != current_pt) {
+			of_node_put(remote_port);
+			return -EINVAL;
+		}
+	}
+
+	return pixels_type;
+}
+
+/**
+ * drm_of_lvds_get_dual_link_pixel_order - Get LVDS dual-link pixel order
+ * @port1: First DT port node of the Dual-link LVDS source
+ * @port2: Second DT port node of the Dual-link LVDS source
+ *
+ * An LVDS dual-link connection is made of two links, with even pixels
+ * transitting on one link, and odd pixels on the other link. This function
+ * returns, for two ports of an LVDS dual-link source, which port shall transmit
+ * the even and odd pixels, based on the requirements of the connected sink.
+ *
+ * The pixel order is determined from the dual-lvds-even-pixels and
+ * dual-lvds-odd-pixels properties in the sink's DT port nodes. If those
+ * properties are not present, or if their usage is not valid, this function
+ * returns -EINVAL.
+ *
+ * If either port is not connected, this function returns -EPIPE.
+ *
+ * @port1 and @port2 are typically DT sibling nodes, but may have different
+ * parents when, for instance, two separate LVDS encoders carry the even and odd
+ * pixels.
+ *
+ * Return:
+ * * DRM_LVDS_DUAL_LINK_EVEN_ODD_PIXELS - @port1 carries even pixels and @port2
+ *   carries odd pixels
+ * * DRM_LVDS_DUAL_LINK_ODD_EVEN_PIXELS - @port1 carries odd pixels and @port2
+ *   carries even pixels
+ * * -EINVAL - @port1 and @port2 are not connected to a dual-link LVDS sink, or
+ *   the sink configuration is invalid
+ * * -EPIPE - when @port1 or @port2 are not connected
+ */
+int drm_of_lvds_get_dual_link_pixel_order(const struct device_node *port1,
+					  const struct device_node *port2)
+{
+	int remote_p1_pt, remote_p2_pt;
+
+	if (!port1 || !port2)
+		return -EINVAL;
+
+	remote_p1_pt = drm_of_lvds_get_remote_pixels_type(port1);
+	if (remote_p1_pt < 0)
+		return remote_p1_pt;
+
+	remote_p2_pt = drm_of_lvds_get_remote_pixels_type(port2);
+	if (remote_p2_pt < 0)
+		return remote_p2_pt;
+
+	/*
+	 * A valid dual-lVDS bus is found when one remote port is marked with
+	 * "dual-lvds-even-pixels", and the other remote port is marked with
+	 * "dual-lvds-odd-pixels", bail out if the markers are not right.
+	 */
+	if (remote_p1_pt + remote_p2_pt != DRM_OF_LVDS_EVEN + DRM_OF_LVDS_ODD)
+		return -EINVAL;
+
+	return remote_p1_pt == DRM_OF_LVDS_EVEN ?
+		DRM_LVDS_DUAL_LINK_EVEN_ODD_PIXELS :
+		DRM_LVDS_DUAL_LINK_ODD_EVEN_PIXELS;
+}
+EXPORT_SYMBOL_GPL(drm_of_lvds_get_dual_link_pixel_order);
diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c
index 6b0bf42039cf..8c7bac85a793 100644
--- a/drivers/gpu/drm/drm_panel.c
+++ b/drivers/gpu/drm/drm_panel.c
@@ -21,11 +21,13 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include <linux/backlight.h>
 #include <linux/err.h>
 #include <linux/module.h>
 
 #include <drm/drm_crtc.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_print.h>
 
 static DEFINE_MUTEX(panel_lock);
 static LIST_HEAD(panel_list);
@@ -44,13 +46,21 @@ static LIST_HEAD(panel_list);
 /**
  * drm_panel_init - initialize a panel
  * @panel: DRM panel
+ * @dev: parent device of the panel
+ * @funcs: panel operations
+ * @connector_type: the connector type (DRM_MODE_CONNECTOR_*) corresponding to
+ *	the panel interface
  *
- * Sets up internal fields of the panel so that it can subsequently be added
- * to the registry.
+ * Initialize the panel structure for subsequent registration with
+ * drm_panel_add().
  */
-void drm_panel_init(struct drm_panel *panel)
+void drm_panel_init(struct drm_panel *panel, struct device *dev,
+		    const struct drm_panel_funcs *funcs, int connector_type)
 {
 	INIT_LIST_HEAD(&panel->list);
+	panel->dev = dev;
+	panel->funcs = funcs;
+	panel->connector_type = connector_type;
 }
 EXPORT_SYMBOL(drm_panel_init);
 
@@ -104,12 +114,6 @@ EXPORT_SYMBOL(drm_panel_remove);
  */
 int drm_panel_attach(struct drm_panel *panel, struct drm_connector *connector)
 {
-	if (panel->connector)
-		return -EBUSY;
-
-	panel->connector = connector;
-	panel->drm = connector->dev;
-
 	return 0;
 }
 EXPORT_SYMBOL(drm_panel_attach);
@@ -126,8 +130,6 @@ EXPORT_SYMBOL(drm_panel_attach);
  */
 void drm_panel_detach(struct drm_panel *panel)
 {
-	panel->connector = NULL;
-	panel->drm = NULL;
 }
 EXPORT_SYMBOL(drm_panel_detach);
 
@@ -143,10 +145,13 @@ EXPORT_SYMBOL(drm_panel_detach);
  */
 int drm_panel_prepare(struct drm_panel *panel)
 {
-	if (panel && panel->funcs && panel->funcs->prepare)
+	if (!panel)
+		return -EINVAL;
+
+	if (panel->funcs && panel->funcs->prepare)
 		return panel->funcs->prepare(panel);
 
-	return panel ? -ENOSYS : -EINVAL;
+	return 0;
 }
 EXPORT_SYMBOL(drm_panel_prepare);
 
@@ -163,10 +168,13 @@ EXPORT_SYMBOL(drm_panel_prepare);
  */
 int drm_panel_unprepare(struct drm_panel *panel)
 {
-	if (panel && panel->funcs && panel->funcs->unprepare)
+	if (!panel)
+		return -EINVAL;
+
+	if (panel->funcs && panel->funcs->unprepare)
 		return panel->funcs->unprepare(panel);
 
-	return panel ? -ENOSYS : -EINVAL;
+	return 0;
 }
 EXPORT_SYMBOL(drm_panel_unprepare);
 
@@ -182,10 +190,23 @@ EXPORT_SYMBOL(drm_panel_unprepare);
  */
 int drm_panel_enable(struct drm_panel *panel)
 {
-	if (panel && panel->funcs && panel->funcs->enable)
-		return panel->funcs->enable(panel);
+	int ret;
+
+	if (!panel)
+		return -EINVAL;
+
+	if (panel->funcs && panel->funcs->enable) {
+		ret = panel->funcs->enable(panel);
+		if (ret < 0)
+			return ret;
+	}
 
-	return panel ? -ENOSYS : -EINVAL;
+	ret = backlight_enable(panel->backlight);
+	if (ret < 0)
+		DRM_DEV_INFO(panel->dev, "failed to enable backlight: %d\n",
+			     ret);
+
+	return 0;
 }
 EXPORT_SYMBOL(drm_panel_enable);
 
@@ -201,16 +222,27 @@ EXPORT_SYMBOL(drm_panel_enable);
  */
 int drm_panel_disable(struct drm_panel *panel)
 {
-	if (panel && panel->funcs && panel->funcs->disable)
+	int ret;
+
+	if (!panel)
+		return -EINVAL;
+
+	ret = backlight_disable(panel->backlight);
+	if (ret < 0)
+		DRM_DEV_INFO(panel->dev, "failed to disable backlight: %d\n",
+			     ret);
+
+	if (panel->funcs && panel->funcs->disable)
 		return panel->funcs->disable(panel);
 
-	return panel ? -ENOSYS : -EINVAL;
+	return 0;
 }
 EXPORT_SYMBOL(drm_panel_disable);
 
 /**
  * drm_panel_get_modes - probe the available display modes of a panel
  * @panel: DRM panel
+ * @connector: DRM connector
  *
  * The modes probed from the panel are automatically added to the connector
  * that the panel is attached to.
@@ -218,12 +250,16 @@ EXPORT_SYMBOL(drm_panel_disable);
  * Return: The number of modes available from the panel on success or a
  * negative error code on failure.
  */
-int drm_panel_get_modes(struct drm_panel *panel)
+int drm_panel_get_modes(struct drm_panel *panel,
+			struct drm_connector *connector)
 {
-	if (panel && panel->funcs && panel->funcs->get_modes)
-		return panel->funcs->get_modes(panel);
+	if (!panel)
+		return -EINVAL;
 
-	return panel ? -ENOSYS : -EINVAL;
+	if (panel->funcs && panel->funcs->get_modes)
+		return panel->funcs->get_modes(panel, connector);
+
+	return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(drm_panel_get_modes);
 
@@ -266,6 +302,45 @@ struct drm_panel *of_drm_find_panel(const struct device_node *np)
 EXPORT_SYMBOL(of_drm_find_panel);
 #endif
 
+#if IS_REACHABLE(CONFIG_BACKLIGHT_CLASS_DEVICE)
+/**
+ * drm_panel_of_backlight - use backlight device node for backlight
+ * @panel: DRM panel
+ *
+ * Use this function to enable backlight handling if your panel
+ * uses device tree and has a backlight phandle.
+ *
+ * When the panel is enabled backlight will be enabled after a
+ * successful call to &drm_panel_funcs.enable()
+ *
+ * When the panel is disabled backlight will be disabled before the
+ * call to &drm_panel_funcs.disable().
+ *
+ * A typical implementation for a panel driver supporting device tree
+ * will call this function at probe time. Backlight will then be handled
+ * transparently without requiring any intervention from the driver.
+ * drm_panel_of_backlight() must be called after the call to drm_panel_init().
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int drm_panel_of_backlight(struct drm_panel *panel)
+{
+	struct backlight_device *backlight;
+
+	if (!panel || !panel->dev)
+		return -EINVAL;
+
+	backlight = devm_of_find_backlight(panel->dev);
+
+	if (IS_ERR(backlight))
+		return PTR_ERR(backlight);
+
+	panel->backlight = backlight;
+	return 0;
+}
+EXPORT_SYMBOL(drm_panel_of_backlight);
+#endif
+
 MODULE_AUTHOR("Thierry Reding <treding@nvidia.com>");
 MODULE_DESCRIPTION("DRM panel infrastructure");
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index a86a3ab2771c..f2e43d341980 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -125,8 +125,6 @@ void drm_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah)
 
 EXPORT_SYMBOL(drm_pci_free);
 
-#ifdef CONFIG_PCI
-
 static int drm_get_pci_domain(struct drm_device *dev)
 {
 #ifndef __alpha__
@@ -284,6 +282,8 @@ err_free:
 }
 EXPORT_SYMBOL(drm_get_pci_dev);
 
+#ifdef CONFIG_DRM_LEGACY
+
 /**
  * drm_legacy_pci_init - shadow-attach a legacy DRM PCI driver
  * @driver: DRM device driver
@@ -331,17 +331,6 @@ int drm_legacy_pci_init(struct drm_driver *driver, struct pci_driver *pdriver)
 }
 EXPORT_SYMBOL(drm_legacy_pci_init);
 
-#else
-
-void drm_pci_agp_destroy(struct drm_device *dev) {}
-
-int drm_irq_by_busid(struct drm_device *dev, void *data,
-		     struct drm_file *file_priv)
-{
-	return -EINVAL;
-}
-#endif
-
 /**
  * drm_legacy_pci_exit - unregister shadow-attach legacy DRM driver
  * @driver: DRM device driver
@@ -367,3 +356,5 @@ void drm_legacy_pci_exit(struct drm_driver *driver, struct pci_driver *pdriver)
 	DRM_INFO("Module unloaded\n");
 }
 EXPORT_SYMBOL(drm_legacy_pci_exit);
+
+#endif
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 0a2316e0e812..86d9b0e45c8c 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -240,6 +240,7 @@ void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv)
 struct dma_buf *drm_gem_dmabuf_export(struct drm_device *dev,
 				      struct dma_buf_export_info *exp_info)
 {
+	struct drm_gem_object *obj = exp_info->priv;
 	struct dma_buf *dma_buf;
 
 	dma_buf = dma_buf_export(exp_info);
@@ -247,7 +248,8 @@ struct dma_buf *drm_gem_dmabuf_export(struct drm_device *dev,
 		return dma_buf;
 
 	drm_dev_get(dev);
-	drm_gem_object_get(exp_info->priv);
+	drm_gem_object_get(obj);
+	dma_buf->file->f_mapping = obj->dev->anon_inode->i_mapping;
 
 	return dma_buf;
 }
@@ -713,6 +715,18 @@ int drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 	struct file *fil;
 	int ret;
 
+	/* Add the fake offset */
+	vma->vm_pgoff += drm_vma_node_start(&obj->vma_node);
+
+	if (obj->funcs && obj->funcs->mmap) {
+		ret = obj->funcs->mmap(obj, vma);
+		if (ret)
+			return ret;
+		vma->vm_private_data = obj;
+		drm_gem_object_get(obj);
+		return 0;
+	}
+
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	fil = kzalloc(sizeof(*fil), GFP_KERNEL);
 	if (!priv || !fil) {
@@ -728,8 +742,6 @@ int drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 	if (ret)
 		goto out;
 
-	vma->vm_pgoff += drm_vma_node_start(&obj->vma_node);
-
 	ret = obj->dev->driver->fops->mmap(fil, vma);
 
 	drm_vma_node_revoke(&obj->vma_node, priv);
diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index a17c8a14dba4..111b932cf2a9 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -28,6 +28,7 @@
 #include <stdarg.h>
 
 #include <linux/io.h>
+#include <linux/moduleparam.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 
@@ -35,6 +36,24 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_print.h>
 
+/*
+ * __drm_debug: Enable debug output.
+ * Bitmask of DRM_UT_x. See include/drm/drm_print.h for details.
+ */
+unsigned int __drm_debug;
+EXPORT_SYMBOL(__drm_debug);
+
+MODULE_PARM_DESC(debug, "Enable debug output, where each bit enables a debug category.\n"
+"\t\tBit 0 (0x01)  will enable CORE messages (drm core code)\n"
+"\t\tBit 1 (0x02)  will enable DRIVER messages (drm controller code)\n"
+"\t\tBit 2 (0x04)  will enable KMS messages (modesetting code)\n"
+"\t\tBit 3 (0x08)  will enable PRIME messages (prime code)\n"
+"\t\tBit 4 (0x10)  will enable ATOMIC messages (atomic code)\n"
+"\t\tBit 5 (0x20)  will enable VBL messages (vblank code)\n"
+"\t\tBit 7 (0x80)  will enable LEASE messages (leasing code)\n"
+"\t\tBit 8 (0x100) will enable DP messages (displayport code)");
+module_param_named(debug, __drm_debug, int, 0600);
+
 void __drm_puts_coredump(struct drm_printer *p, const char *str)
 {
 	struct drm_print_iterator *iterator = p->arg;
@@ -147,6 +166,12 @@ void __drm_printfn_debug(struct drm_printer *p, struct va_format *vaf)
 }
 EXPORT_SYMBOL(__drm_printfn_debug);
 
+void __drm_printfn_err(struct drm_printer *p, struct va_format *vaf)
+{
+	pr_err("*ERROR* %s %pV", p->prefix, vaf);
+}
+EXPORT_SYMBOL(__drm_printfn_err);
+
 /**
  * drm_puts - print a const string to a &drm_printer stream
  * @p: the &drm printer
@@ -179,6 +204,37 @@ void drm_printf(struct drm_printer *p, const char *f, ...)
 }
 EXPORT_SYMBOL(drm_printf);
 
+/**
+ * drm_print_bits - print bits to a &drm_printer stream
+ *
+ * Print bits (in flag fields for example) in human readable form.
+ *
+ * @p: the &drm_printer
+ * @value: field value.
+ * @bits: Array with bit names.
+ * @nbits: Size of bit names array.
+ */
+void drm_print_bits(struct drm_printer *p, unsigned long value,
+		    const char * const bits[], unsigned int nbits)
+{
+	bool first = true;
+	unsigned int i;
+
+	if (WARN_ON_ONCE(nbits > BITS_PER_TYPE(value)))
+		nbits = BITS_PER_TYPE(value);
+
+	for_each_set_bit(i, &value, nbits) {
+		if (WARN_ON_ONCE(!bits[i]))
+			continue;
+		drm_printf(p, "%s%s", first ? "" : ",",
+			   bits[i]);
+		first = false;
+	}
+	if (first)
+		drm_printf(p, "(none)");
+}
+EXPORT_SYMBOL(drm_print_bits);
+
 void drm_dev_printk(const struct device *dev, const char *level,
 		    const char *format, ...)
 {
@@ -200,13 +256,13 @@ void drm_dev_printk(const struct device *dev, const char *level,
 }
 EXPORT_SYMBOL(drm_dev_printk);
 
-void drm_dev_dbg(const struct device *dev, unsigned int category,
+void drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
 		 const char *format, ...)
 {
 	struct va_format vaf;
 	va_list args;
 
-	if (!(drm_debug & category))
+	if (!drm_debug_enabled(category))
 		return;
 
 	va_start(args, format);
@@ -224,12 +280,12 @@ void drm_dev_dbg(const struct device *dev, unsigned int category,
 }
 EXPORT_SYMBOL(drm_dev_dbg);
 
-void drm_dbg(unsigned int category, const char *format, ...)
+void __drm_dbg(enum drm_debug_category category, const char *format, ...)
 {
 	struct va_format vaf;
 	va_list args;
 
-	if (!(drm_debug & category))
+	if (!drm_debug_enabled(category))
 		return;
 
 	va_start(args, format);
@@ -241,9 +297,9 @@ void drm_dbg(unsigned int category, const char *format, ...)
 
 	va_end(args);
 }
-EXPORT_SYMBOL(drm_dbg);
+EXPORT_SYMBOL(__drm_dbg);
 
-void drm_err(const char *format, ...)
+void __drm_err(const char *format, ...)
 {
 	struct va_format vaf;
 	va_list args;
@@ -257,7 +313,7 @@ void drm_err(const char *format, ...)
 
 	va_end(args);
 }
-EXPORT_SYMBOL(drm_err);
+EXPORT_SYMBOL(__drm_err);
 
 /**
  * drm_print_regset32 - print the contents of registers to a
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index ef2c468205a2..576b4b7dcd89 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -32,6 +32,7 @@
 #include <linux/export.h>
 #include <linux/moduleparam.h>
 
+#include <drm/drm_bridge.h>
 #include <drm/drm_client.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
@@ -92,7 +93,6 @@ drm_mode_validate_pipeline(struct drm_display_mode *mode,
 	struct drm_device *dev = connector->dev;
 	enum drm_mode_status ret = MODE_OK;
 	struct drm_encoder *encoder;
-	int i;
 
 	/* Step 1: Validate against connector */
 	ret = drm_connector_mode_valid(connector, mode);
@@ -100,7 +100,8 @@ drm_mode_validate_pipeline(struct drm_display_mode *mode,
 		return ret;
 
 	/* Step 2: Validate against encoders and crtcs */
-	drm_connector_for_each_possible_encoder(connector, encoder, i) {
+	drm_connector_for_each_possible_encoder(connector, encoder) {
+		struct drm_bridge *bridge;
 		struct drm_crtc *crtc;
 
 		ret = drm_encoder_mode_valid(encoder, mode);
@@ -112,7 +113,8 @@ drm_mode_validate_pipeline(struct drm_display_mode *mode,
 			continue;
 		}
 
-		ret = drm_bridge_mode_valid(encoder->bridge, mode);
+		bridge = drm_bridge_chain_get_first_bridge(encoder);
+		ret = drm_bridge_chain_mode_valid(bridge, mode);
 		if (ret != MODE_OK) {
 			/* There is also no point in continuing for crtc check
 			 * here. */
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index 892ce636ef72..6ee04803c362 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -561,7 +561,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length,
 	struct drm_property_blob *blob;
 	int ret;
 
-	if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob))
+	if (!length || length > INT_MAX - sizeof(struct drm_property_blob))
 		return ERR_PTR(-EINVAL);
 
 	blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL);
diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c
index b8363aaa9032..0460e874896e 100644
--- a/drivers/gpu/drm/drm_rect.c
+++ b/drivers/gpu/drm/drm_rect.c
@@ -52,9 +52,17 @@ bool drm_rect_intersect(struct drm_rect *r1, const struct drm_rect *r2)
 }
 EXPORT_SYMBOL(drm_rect_intersect);
 
-static u32 clip_scaled(u32 src, u32 dst, u32 clip)
+static u32 clip_scaled(int src, int dst, int *clip)
 {
-	u64 tmp = mul_u32_u32(src, dst - clip);
+	u64 tmp;
+
+	if (dst == 0)
+		return 0;
+
+	/* Only clip what we have. Keeps the result bounded. */
+	*clip = min(*clip, dst);
+
+	tmp = mul_u32_u32(src, dst - *clip);
 
 	/*
 	 * Round toward 1.0 when clipping so that we don't accidentally
@@ -73,11 +81,13 @@ static u32 clip_scaled(u32 src, u32 dst, u32 clip)
  * @clip: clip rectangle
  *
  * Clip rectangle @dst by rectangle @clip. Clip rectangle @src by the
- * same amounts multiplied by @hscale and @vscale.
+ * the corresponding amounts, retaining the vertical and horizontal scaling
+ * factors from @src to @dst.
  *
  * RETURNS:
+ *
  * %true if rectangle @dst is still visible after being clipped,
- * %false otherwise
+ * %false otherwise.
  */
 bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst,
 			  const struct drm_rect *clip)
@@ -87,34 +97,34 @@ bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst,
 	diff = clip->x1 - dst->x1;
 	if (diff > 0) {
 		u32 new_src_w = clip_scaled(drm_rect_width(src),
-					    drm_rect_width(dst), diff);
+					    drm_rect_width(dst), &diff);
 
-		src->x1 = clamp_t(int64_t, src->x2 - new_src_w, INT_MIN, INT_MAX);
-		dst->x1 = clip->x1;
+		src->x1 = src->x2 - new_src_w;
+		dst->x1 += diff;
 	}
 	diff = clip->y1 - dst->y1;
 	if (diff > 0) {
 		u32 new_src_h = clip_scaled(drm_rect_height(src),
-					    drm_rect_height(dst), diff);
+					    drm_rect_height(dst), &diff);
 
-		src->y1 = clamp_t(int64_t, src->y2 - new_src_h, INT_MIN, INT_MAX);
-		dst->y1 = clip->y1;
+		src->y1 = src->y2 - new_src_h;
+		dst->y1 += diff;
 	}
 	diff = dst->x2 - clip->x2;
 	if (diff > 0) {
 		u32 new_src_w = clip_scaled(drm_rect_width(src),
-					    drm_rect_width(dst), diff);
+					    drm_rect_width(dst), &diff);
 
-		src->x2 = clamp_t(int64_t, src->x1 + new_src_w, INT_MIN, INT_MAX);
-		dst->x2 = clip->x2;
+		src->x2 = src->x1 + new_src_w;
+		dst->x2 -= diff;
 	}
 	diff = dst->y2 - clip->y2;
 	if (diff > 0) {
 		u32 new_src_h = clip_scaled(drm_rect_height(src),
-					    drm_rect_height(dst), diff);
+					    drm_rect_height(dst), &diff);
 
-		src->y2 = clamp_t(int64_t, src->y1 + new_src_h, INT_MIN, INT_MAX);
-		dst->y2 = clip->y2;
+		src->y2 = src->y1 + new_src_h;
+		dst->y2 -= diff;
 	}
 
 	return drm_rect_visible(dst);
diff --git a/drivers/gpu/drm/drm_simple_kms_helper.c b/drivers/gpu/drm/drm_simple_kms_helper.c
index b11910f14c46..15fb516ae2d8 100644
--- a/drivers/gpu/drm/drm_simple_kms_helper.c
+++ b/drivers/gpu/drm/drm_simple_kms_helper.c
@@ -8,6 +8,7 @@
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_simple_kms_helper.h>
@@ -42,7 +43,7 @@ drm_simple_kms_crtc_mode_valid(struct drm_crtc *crtc,
 		/* Anything goes */
 		return MODE_OK;
 
-	return pipe->funcs->mode_valid(crtc, mode);
+	return pipe->funcs->mode_valid(pipe, mode);
 }
 
 static int drm_simple_kms_crtc_check(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 4b5c7b0ed714..669c93fe2500 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -135,6 +135,7 @@
 #include <drm/drm_gem.h>
 #include <drm/drm_print.h>
 #include <drm/drm_syncobj.h>
+#include <drm/drm_utils.h>
 
 #include "drm_internal.h"
 
@@ -1279,7 +1280,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
 	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
 		return -EOPNOTSUPP;
 
-	if (args->pad != 0)
+	if (args->flags != 0)
 		return -EINVAL;
 
 	if (args->count_handles == 0)
@@ -1350,7 +1351,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
 		return -EOPNOTSUPP;
 
-	if (args->pad != 0)
+	if (args->flags & ~DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED)
 		return -EINVAL;
 
 	if (args->count_handles == 0)
@@ -1371,25 +1372,32 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 		fence = drm_syncobj_fence_get(syncobjs[i]);
 		chain = to_dma_fence_chain(fence);
 		if (chain) {
-			struct dma_fence *iter, *last_signaled = NULL;
-
-			dma_fence_chain_for_each(iter, fence) {
-				if (iter->context != fence->context) {
-					dma_fence_put(iter);
-					/* It is most likely that timeline has
-					 * unorder points. */
-					break;
+			struct dma_fence *iter, *last_signaled =
+				dma_fence_get(fence);
+
+			if (args->flags &
+			    DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED) {
+				point = fence->seqno;
+			} else {
+				dma_fence_chain_for_each(iter, fence) {
+					if (iter->context != fence->context) {
+						dma_fence_put(iter);
+						/* It is most likely that timeline has
+						* unorder points. */
+						break;
+					}
+					dma_fence_put(last_signaled);
+					last_signaled = dma_fence_get(iter);
 				}
-				dma_fence_put(last_signaled);
-				last_signaled = dma_fence_get(iter);
+				point = dma_fence_is_signaled(last_signaled) ?
+					last_signaled->seqno :
+					to_dma_fence_chain(last_signaled)->prev_seqno;
 			}
-			point = dma_fence_is_signaled(last_signaled) ?
-				last_signaled->seqno :
-				to_dma_fence_chain(last_signaled)->prev_seqno;
 			dma_fence_put(last_signaled);
 		} else {
 			point = 0;
 		}
+		dma_fence_put(fence);
 		ret = copy_to_user(&points[i], &point, sizeof(uint64_t));
 		ret = ret ? -EFAULT : 0;
 		if (ret)
diff --git a/drivers/gpu/drm/drm_trace.h b/drivers/gpu/drm/drm_trace.h
index 471eb927474b..11c6dd577e8e 100644
--- a/drivers/gpu/drm/drm_trace.h
+++ b/drivers/gpu/drm/drm_trace.h
@@ -13,17 +13,23 @@ struct drm_file;
 #define TRACE_INCLUDE_FILE drm_trace
 
 TRACE_EVENT(drm_vblank_event,
-	    TP_PROTO(int crtc, unsigned int seq),
-	    TP_ARGS(crtc, seq),
+	    TP_PROTO(int crtc, unsigned int seq, ktime_t time, bool high_prec),
+	    TP_ARGS(crtc, seq, time, high_prec),
 	    TP_STRUCT__entry(
 		    __field(int, crtc)
 		    __field(unsigned int, seq)
+		    __field(ktime_t, time)
+		    __field(bool, high_prec)
 		    ),
 	    TP_fast_assign(
 		    __entry->crtc = crtc;
 		    __entry->seq = seq;
-		    ),
-	    TP_printk("crtc=%d, seq=%u", __entry->crtc, __entry->seq)
+		    __entry->time = time;
+		    __entry->high_prec = high_prec;
+			),
+	    TP_printk("crtc=%d, seq=%u, time=%lld, high-prec=%s",
+			__entry->crtc, __entry->seq, __entry->time,
+			__entry->high_prec ? "true" : "false")
 );
 
 TRACE_EVENT(drm_vblank_event_queued,
diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
index fd1fbc77871f..1659b13b178c 100644
--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -106,7 +106,7 @@ static void store_vblank(struct drm_device *dev, unsigned int pipe,
 
 	write_seqlock(&vblank->seqlock);
 	vblank->time = t_vblank;
-	vblank->count += vblank_count_inc;
+	atomic64_add(vblank_count_inc, &vblank->count);
 	write_sequnlock(&vblank->seqlock);
 }
 
@@ -272,7 +272,8 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe,
 
 	DRM_DEBUG_VBL("updating vblank count on crtc %u:"
 		      " current=%llu, diff=%u, hw=%u hw_last=%u\n",
-		      pipe, vblank->count, diff, cur_vblank, vblank->last);
+		      pipe, atomic64_read(&vblank->count), diff,
+		      cur_vblank, vblank->last);
 
 	if (diff == 0) {
 		WARN_ON_ONCE(cur_vblank != vblank->last);
@@ -294,11 +295,23 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe,
 static u64 drm_vblank_count(struct drm_device *dev, unsigned int pipe)
 {
 	struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
+	u64 count;
 
 	if (WARN_ON(pipe >= dev->num_crtcs))
 		return 0;
 
-	return vblank->count;
+	count = atomic64_read(&vblank->count);
+
+	/*
+	 * This read barrier corresponds to the implicit write barrier of the
+	 * write seqlock in store_vblank(). Note that this is the only place
+	 * where we need an explicit barrier, since all other access goes
+	 * through drm_vblank_count_and_time(), which already has the required
+	 * read barrier curtesy of the read seqlock.
+	 */
+	smp_rmb();
+
+	return count;
 }
 
 /**
@@ -319,7 +332,7 @@ u64 drm_crtc_accurate_vblank_count(struct drm_crtc *crtc)
 	u64 vblank;
 	unsigned long flags;
 
-	WARN_ONCE(drm_debug & DRM_UT_VBL && !dev->driver->get_vblank_timestamp,
+	WARN_ONCE(drm_debug_enabled(DRM_UT_VBL) && !dev->driver->get_vblank_timestamp,
 		  "This function requires support for accurate vblank timestamps.");
 
 	spin_lock_irqsave(&dev->vblank_time_lock, flags);
@@ -693,7 +706,7 @@ bool drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev,
 	 */
 	*vblank_time = ktime_sub_ns(etime, delta_ns);
 
-	if ((drm_debug & DRM_UT_VBL) == 0)
+	if (!drm_debug_enabled(DRM_UT_VBL))
 		return true;
 
 	ts_etime = ktime_to_timespec64(etime);
@@ -763,6 +776,14 @@ drm_get_last_vbltimestamp(struct drm_device *dev, unsigned int pipe,
  * vblank interrupt (since it only reports the software vblank counter), see
  * drm_crtc_accurate_vblank_count() for such use-cases.
  *
+ * Note that for a given vblank counter value drm_crtc_handle_vblank()
+ * and drm_crtc_vblank_count() or drm_crtc_vblank_count_and_time()
+ * provide a barrier: Any writes done before calling
+ * drm_crtc_handle_vblank() will be visible to callers of the later
+ * functions, iff the vblank count is the same or a later one.
+ *
+ * See also &drm_vblank_crtc.count.
+ *
  * Returns:
  * The software vblank counter.
  */
@@ -800,7 +821,7 @@ static u64 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe,
 
 	do {
 		seq = read_seqbegin(&vblank->seqlock);
-		vblank_count = vblank->count;
+		vblank_count = atomic64_read(&vblank->count);
 		*vblanktime = vblank->time;
 	} while (read_seqretry(&vblank->seqlock, seq));
 
@@ -817,6 +838,14 @@ static u64 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe,
  * vblank events since the system was booted, including lost events due to
  * modesetting activity. Returns corresponding system timestamp of the time
  * of the vblank interval that corresponds to the current vblank counter value.
+ *
+ * Note that for a given vblank counter value drm_crtc_handle_vblank()
+ * and drm_crtc_vblank_count() or drm_crtc_vblank_count_and_time()
+ * provide a barrier: Any writes done before calling
+ * drm_crtc_handle_vblank() will be visible to callers of the later
+ * functions, iff the vblank count is the same or a later one.
+ *
+ * See also &drm_vblank_crtc.count.
  */
 u64 drm_crtc_vblank_count_and_time(struct drm_crtc *crtc,
 				   ktime_t *vblanktime)
@@ -1323,7 +1352,7 @@ void drm_vblank_restore(struct drm_device *dev, unsigned int pipe)
 	assert_spin_locked(&dev->vblank_time_lock);
 
 	vblank = &dev->vblank[pipe];
-	WARN_ONCE((drm_debug & DRM_UT_VBL) && !vblank->framedur_ns,
+	WARN_ONCE(drm_debug_enabled(DRM_UT_VBL) && !vblank->framedur_ns,
 		  "Cannot compute missed vblanks without frame duration\n");
 	framedur_ns = vblank->framedur_ns;
 
@@ -1581,7 +1610,7 @@ int drm_wait_vblank_ioctl(struct drm_device *dev, void *data,
 	unsigned int flags, pipe, high_pipe;
 
 	if (!dev->irq_enabled)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	if (vblwait->request.type & _DRM_VBLANK_SIGNAL)
 		return -EINVAL;
@@ -1731,7 +1760,8 @@ static void drm_handle_vblank_events(struct drm_device *dev, unsigned int pipe)
 		send_vblank_event(dev, e, seq, now);
 	}
 
-	trace_drm_vblank_event(pipe, seq);
+	trace_drm_vblank_event(pipe, seq, now,
+			dev->driver->get_vblank_timestamp != NULL);
 }
 
 /**
@@ -1806,6 +1836,14 @@ EXPORT_SYMBOL(drm_handle_vblank);
  *
  * This is the native KMS version of drm_handle_vblank().
  *
+ * Note that for a given vblank counter value drm_crtc_handle_vblank()
+ * and drm_crtc_vblank_count() or drm_crtc_vblank_count_and_time()
+ * provide a barrier: Any writes done before calling
+ * drm_crtc_handle_vblank() will be visible to callers of the later
+ * functions, iff the vblank count is the same or a later one.
+ *
+ * See also &drm_vblank_crtc.count.
+ *
  * Returns:
  * True if the event was successfully handled, false on failure.
  */
@@ -1838,7 +1876,7 @@ int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data,
 		return -EOPNOTSUPP;
 
 	if (!dev->irq_enabled)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	crtc = drm_crtc_find(dev, file_priv, get_seq->crtc_id);
 	if (!crtc)
@@ -1896,7 +1934,7 @@ int drm_crtc_queue_sequence_ioctl(struct drm_device *dev, void *data,
 		return -EOPNOTSUPP;
 
 	if (!dev->irq_enabled)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	crtc = drm_crtc_find(dev, file_priv, queue_seq->crtc_id);
 	if (!crtc)
diff --git a/drivers/gpu/drm/drm_vram_helper_common.c b/drivers/gpu/drm/drm_vram_helper_common.c
index e9c9f9a80ba3..2000d9b33fd5 100644
--- a/drivers/gpu/drm/drm_vram_helper_common.c
+++ b/drivers/gpu/drm/drm_vram_helper_common.c
@@ -7,9 +7,8 @@
  *
  * This library provides &struct drm_gem_vram_object (GEM VRAM), a GEM
  * buffer object that is backed by video RAM. It can be used for
- * framebuffer devices with dedicated memory. The video RAM can be
- * managed with &struct drm_vram_mm (VRAM MM). Both data structures are
- * supposed to be used together, but can also be used individually.
+ * framebuffer devices with dedicated memory. The video RAM is managed
+ * by &struct drm_vram_mm (VRAM MM).
  *
  * With the GEM interface userspace applications create, manage and destroy
  * graphics buffers, such as an on-screen framebuffer. GEM does not provide
@@ -50,8 +49,7 @@
  *		// setup device, vram base and size
  *		// ...
  *
- *		ret = drm_vram_helper_alloc_mm(dev, vram_base, vram_size,
- *					       &drm_gem_vram_mm_funcs);
+ *		ret = drm_vram_helper_alloc_mm(dev, vram_base, vram_size);
  *		if (ret)
  *			return ret;
  *		return 0;
diff --git a/drivers/gpu/drm/drm_vram_mm_helper.c b/drivers/gpu/drm/drm_vram_mm_helper.c
deleted file mode 100644
index c911781d6728..000000000000
--- a/drivers/gpu/drm/drm_vram_mm_helper.c
+++ /dev/null
@@ -1,297 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <drm/drm_device.h>
-#include <drm/drm_file.h>
-#include <drm/drm_vram_mm_helper.h>
-
-#include <drm/ttm/ttm_page_alloc.h>
-
-/**
- * DOC: overview
- *
- * The data structure &struct drm_vram_mm and its helpers implement a memory
- * manager for simple framebuffer devices with dedicated video memory. Buffer
- * objects are either placed in video RAM or evicted to system memory. These
- * helper functions work well with &struct drm_gem_vram_object.
- */
-
-/*
- * TTM TT
- */
-
-static void backend_func_destroy(struct ttm_tt *tt)
-{
-	ttm_tt_fini(tt);
-	kfree(tt);
-}
-
-static struct ttm_backend_func backend_func = {
-	.destroy = backend_func_destroy
-};
-
-/*
- * TTM BO device
- */
-
-static struct ttm_tt *bo_driver_ttm_tt_create(struct ttm_buffer_object *bo,
-					      uint32_t page_flags)
-{
-	struct ttm_tt *tt;
-	int ret;
-
-	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
-	if (!tt)
-		return NULL;
-
-	tt->func = &backend_func;
-
-	ret = ttm_tt_init(tt, bo, page_flags);
-	if (ret < 0)
-		goto err_ttm_tt_init;
-
-	return tt;
-
-err_ttm_tt_init:
-	kfree(tt);
-	return NULL;
-}
-
-static int bo_driver_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
-				   struct ttm_mem_type_manager *man)
-{
-	switch (type) {
-	case TTM_PL_SYSTEM:
-		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_MASK_CACHING;
-		man->default_caching = TTM_PL_FLAG_CACHED;
-		break;
-	case TTM_PL_VRAM:
-		man->func = &ttm_bo_manager_func;
-		man->flags = TTM_MEMTYPE_FLAG_FIXED |
-			     TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_FLAG_UNCACHED |
-					 TTM_PL_FLAG_WC;
-		man->default_caching = TTM_PL_FLAG_WC;
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void bo_driver_evict_flags(struct ttm_buffer_object *bo,
-				  struct ttm_placement *placement)
-{
-	struct drm_vram_mm *vmm = drm_vram_mm_of_bdev(bo->bdev);
-
-	if (vmm->funcs && vmm->funcs->evict_flags)
-		vmm->funcs->evict_flags(bo, placement);
-}
-
-static int bo_driver_verify_access(struct ttm_buffer_object *bo,
-				   struct file *filp)
-{
-	struct drm_vram_mm *vmm = drm_vram_mm_of_bdev(bo->bdev);
-
-	if (!vmm->funcs || !vmm->funcs->verify_access)
-		return 0;
-	return vmm->funcs->verify_access(bo, filp);
-}
-
-static int bo_driver_io_mem_reserve(struct ttm_bo_device *bdev,
-				    struct ttm_mem_reg *mem)
-{
-	struct ttm_mem_type_manager *man = bdev->man + mem->mem_type;
-	struct drm_vram_mm *vmm = drm_vram_mm_of_bdev(bdev);
-
-	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
-		return -EINVAL;
-
-	mem->bus.addr = NULL;
-	mem->bus.size = mem->num_pages << PAGE_SHIFT;
-
-	switch (mem->mem_type) {
-	case TTM_PL_SYSTEM:	/* nothing to do */
-		mem->bus.offset = 0;
-		mem->bus.base = 0;
-		mem->bus.is_iomem = false;
-		break;
-	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		mem->bus.base = vmm->vram_base;
-		mem->bus.is_iomem = true;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static void bo_driver_io_mem_free(struct ttm_bo_device *bdev,
-				  struct ttm_mem_reg *mem)
-{ }
-
-static struct ttm_bo_driver bo_driver = {
-	.ttm_tt_create = bo_driver_ttm_tt_create,
-	.ttm_tt_populate = ttm_pool_populate,
-	.ttm_tt_unpopulate = ttm_pool_unpopulate,
-	.init_mem_type = bo_driver_init_mem_type,
-	.eviction_valuable = ttm_bo_eviction_valuable,
-	.evict_flags = bo_driver_evict_flags,
-	.verify_access = bo_driver_verify_access,
-	.io_mem_reserve = bo_driver_io_mem_reserve,
-	.io_mem_free = bo_driver_io_mem_free,
-};
-
-/*
- * struct drm_vram_mm
- */
-
-/**
- * drm_vram_mm_init() - Initialize an instance of VRAM MM.
- * @vmm:	the VRAM MM instance to initialize
- * @dev:	the DRM device
- * @vram_base:	the base address of the video memory
- * @vram_size:	the size of the video memory in bytes
- * @funcs:	callback functions for buffer objects
- *
- * Returns:
- * 0 on success, or
- * a negative error code otherwise.
- */
-int drm_vram_mm_init(struct drm_vram_mm *vmm, struct drm_device *dev,
-		     uint64_t vram_base, size_t vram_size,
-		     const struct drm_vram_mm_funcs *funcs)
-{
-	int ret;
-
-	vmm->vram_base = vram_base;
-	vmm->vram_size = vram_size;
-	vmm->funcs = funcs;
-
-	ret = ttm_bo_device_init(&vmm->bdev, &bo_driver,
-				 dev->anon_inode->i_mapping,
-				 true);
-	if (ret)
-		return ret;
-
-	ret = ttm_bo_init_mm(&vmm->bdev, TTM_PL_VRAM, vram_size >> PAGE_SHIFT);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_vram_mm_init);
-
-/**
- * drm_vram_mm_cleanup() - Cleans up an initialized instance of VRAM MM.
- * @vmm:	the VRAM MM instance to clean up
- */
-void drm_vram_mm_cleanup(struct drm_vram_mm *vmm)
-{
-	ttm_bo_device_release(&vmm->bdev);
-}
-EXPORT_SYMBOL(drm_vram_mm_cleanup);
-
-/**
- * drm_vram_mm_mmap() - Helper for implementing &struct file_operations.mmap()
- * @filp:	the mapping's file structure
- * @vma:	the mapping's memory area
- * @vmm:	the VRAM MM instance
- *
- * Returns:
- * 0 on success, or
- * a negative error code otherwise.
- */
-int drm_vram_mm_mmap(struct file *filp, struct vm_area_struct *vma,
-		     struct drm_vram_mm *vmm)
-{
-	return ttm_bo_mmap(filp, vma, &vmm->bdev);
-}
-EXPORT_SYMBOL(drm_vram_mm_mmap);
-
-/*
- * Helpers for integration with struct drm_device
- */
-
-/**
- * drm_vram_helper_alloc_mm - Allocates a device's instance of \
-	&struct drm_vram_mm
- * @dev:	the DRM device
- * @vram_base:	the base address of the video memory
- * @vram_size:	the size of the video memory in bytes
- * @funcs:	callback functions for buffer objects
- *
- * Returns:
- * The new instance of &struct drm_vram_mm on success, or
- * an ERR_PTR()-encoded errno code otherwise.
- */
-struct drm_vram_mm *drm_vram_helper_alloc_mm(
-	struct drm_device *dev, uint64_t vram_base, size_t vram_size,
-	const struct drm_vram_mm_funcs *funcs)
-{
-	int ret;
-
-	if (WARN_ON(dev->vram_mm))
-		return dev->vram_mm;
-
-	dev->vram_mm = kzalloc(sizeof(*dev->vram_mm), GFP_KERNEL);
-	if (!dev->vram_mm)
-		return ERR_PTR(-ENOMEM);
-
-	ret = drm_vram_mm_init(dev->vram_mm, dev, vram_base, vram_size, funcs);
-	if (ret)
-		goto err_kfree;
-
-	return dev->vram_mm;
-
-err_kfree:
-	kfree(dev->vram_mm);
-	dev->vram_mm = NULL;
-	return ERR_PTR(ret);
-}
-EXPORT_SYMBOL(drm_vram_helper_alloc_mm);
-
-/**
- * drm_vram_helper_release_mm - Releases a device's instance of \
-	&struct drm_vram_mm
- * @dev:	the DRM device
- */
-void drm_vram_helper_release_mm(struct drm_device *dev)
-{
-	if (!dev->vram_mm)
-		return;
-
-	drm_vram_mm_cleanup(dev->vram_mm);
-	kfree(dev->vram_mm);
-	dev->vram_mm = NULL;
-}
-EXPORT_SYMBOL(drm_vram_helper_release_mm);
-
-/*
- * Helpers for &struct file_operations
- */
-
-/**
- * drm_vram_mm_file_operations_mmap() - \
-	Implements &struct file_operations.mmap()
- * @filp:	the mapping's file structure
- * @vma:	the mapping's memory area
- *
- * Returns:
- * 0 on success, or
- * a negative error code otherwise.
- */
-int drm_vram_mm_file_operations_mmap(
-	struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *file_priv = filp->private_data;
-	struct drm_device *dev = file_priv->minor->dev;
-
-	if (WARN_ONCE(!dev->vram_mm, "VRAM MM not initialized"))
-		return -EINVAL;
-
-	return drm_vram_mm_mmap(filp, vma, dev->vram_mm);
-}
-EXPORT_SYMBOL(drm_vram_mm_file_operations_mmap);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
index 7e4e2959bf4f..32d9fac587f9 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -326,7 +326,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 
 	lockdep_assert_held(&gpu->lock);
 
-	if (drm_debug & DRM_UT_DRIVER)
+	if (drm_debug_enabled(DRM_UT_DRIVER))
 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
 
 	link_target = etnaviv_cmdbuf_get_va(cmdbuf,
@@ -459,13 +459,13 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
 		 + buffer->user_size - 4);
 
-	if (drm_debug & DRM_UT_DRIVER)
+	if (drm_debug_enabled(DRM_UT_DRIVER))
 		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
 			return_target,
 			etnaviv_cmdbuf_get_va(cmdbuf, &gpu->mmu_context->cmdbuf_mapping),
 			cmdbuf->vaddr);
 
-	if (drm_debug & DRM_UT_DRIVER) {
+	if (drm_debug_enabled(DRM_UT_DRIVER)) {
 		print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
 			       cmdbuf->vaddr, cmdbuf->size, 0);
 
@@ -484,6 +484,6 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 				    VIV_FE_LINK_HEADER_PREFETCH(link_dwords),
 				    link_target);
 
-	if (drm_debug & DRM_UT_DRIVER)
+	if (drm_debug_enabled(DRM_UT_DRIVER))
 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 1f9c01be40d7..6b43c1c94e8f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -65,12 +65,13 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
 
 	for (i = 0; i < ETNA_MAX_PIPES; i++) {
 		struct etnaviv_gpu *gpu = priv->gpu[i];
-		struct drm_sched_rq *rq;
+		struct drm_gpu_scheduler *sched;
 
 		if (gpu) {
-			rq = &gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+			sched = &gpu->sched;
 			drm_sched_entity_init(&ctx->sched_entity[i],
-					      &rq, 1, NULL);
+					      DRM_SCHED_PRIORITY_NORMAL, &sched,
+					      1, NULL);
 			}
 	}
 
@@ -282,11 +283,6 @@ static int etnaviv_ioctl_gem_new(struct drm_device *dev, void *data,
 			args->flags, &args->handle);
 }
 
-#define TS(t) ((struct timespec){ \
-	.tv_sec = (t).tv_sec, \
-	.tv_nsec = (t).tv_nsec \
-})
-
 static int etnaviv_ioctl_gem_cpu_prep(struct drm_device *dev, void *data,
 		struct drm_file *file)
 {
@@ -301,7 +297,7 @@ static int etnaviv_ioctl_gem_cpu_prep(struct drm_device *dev, void *data,
 	if (!obj)
 		return -ENOENT;
 
-	ret = etnaviv_gem_cpu_prep(obj, args->op, &TS(args->timeout));
+	ret = etnaviv_gem_cpu_prep(obj, args->op, &args->timeout);
 
 	drm_gem_object_put_unlocked(obj);
 
@@ -354,7 +350,7 @@ static int etnaviv_ioctl_wait_fence(struct drm_device *dev, void *data,
 {
 	struct drm_etnaviv_wait_fence *args = data;
 	struct etnaviv_drm_private *priv = dev->dev_private;
-	struct timespec *timeout = &TS(args->timeout);
+	struct drm_etnaviv_timespec *timeout = &args->timeout;
 	struct etnaviv_gpu *gpu;
 
 	if (args->flags & ~(ETNA_WAIT_NONBLOCK))
@@ -403,7 +399,7 @@ static int etnaviv_ioctl_gem_wait(struct drm_device *dev, void *data,
 {
 	struct etnaviv_drm_private *priv = dev->dev_private;
 	struct drm_etnaviv_gem_wait *args = data;
-	struct timespec *timeout = &TS(args->timeout);
+	struct drm_etnaviv_timespec *timeout = &args->timeout;
 	struct drm_gem_object *obj;
 	struct etnaviv_gpu *gpu;
 	int ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index 32cfa5a48d42..efc656efeb0f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -61,7 +61,7 @@ int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
 void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
 void *etnaviv_gem_vmap(struct drm_gem_object *obj);
 int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
-		struct timespec *timeout);
+		struct drm_etnaviv_timespec *timeout);
 int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
 void etnaviv_gem_free_object(struct drm_gem_object *obj);
 int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file,
@@ -107,11 +107,12 @@ static inline size_t size_vstruct(size_t nelem, size_t elem_size, size_t base)
  * between the specified timeout and the current CLOCK_MONOTONIC time.
  */
 static inline unsigned long etnaviv_timeout_to_jiffies(
-	const struct timespec *timeout)
+	const struct drm_etnaviv_timespec *timeout)
 {
-	struct timespec64 ts, to;
-
-	to = timespec_to_timespec64(*timeout);
+	struct timespec64 ts, to = {
+		.tv_sec = timeout->tv_sec,
+		.tv_nsec = timeout->tv_nsec,
+	};
 
 	ktime_get_ts64(&ts);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index cb1faaac380a..6adea180d629 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -373,7 +373,7 @@ static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
 }
 
 int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
-		struct timespec *timeout)
+		struct drm_etnaviv_timespec *timeout)
 {
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 	struct drm_device *dev = obj->dev;
@@ -431,7 +431,7 @@ int etnaviv_gem_cpu_fini(struct drm_gem_object *obj)
 }
 
 int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
-	struct timespec *timeout)
+	struct drm_etnaviv_timespec *timeout)
 {
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index d6270acce619..6b68fe16041b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -112,7 +112,7 @@ struct etnaviv_gem_submit {
 void etnaviv_submit_put(struct etnaviv_gem_submit * submit);
 
 int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
-	struct timespec *timeout);
+	struct drm_etnaviv_timespec *timeout);
 int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags,
 	const struct etnaviv_gem_ops *ops, struct etnaviv_gem_object **res);
 void etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index aa3e4c3b063a..3b0afa156d92 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -113,7 +113,7 @@ static void submit_unlock_object(struct etnaviv_gem_submit *submit, int i)
 	if (submit->bos[i].flags & BO_LOCKED) {
 		struct drm_gem_object *obj = &submit->bos[i].obj->base;
 
-		ww_mutex_unlock(&obj->resv->lock);
+		dma_resv_unlock(obj->resv);
 		submit->bos[i].flags &= ~BO_LOCKED;
 	}
 }
@@ -133,8 +133,7 @@ retry:
 		contended = i;
 
 		if (!(submit->bos[i].flags & BO_LOCKED)) {
-			ret = ww_mutex_lock_interruptible(&obj->resv->lock,
-							  ticket);
+			ret = dma_resv_lock_interruptible(obj->resv, ticket);
 			if (ret == -EALREADY)
 				DRM_ERROR("BO at index %u already on submit list\n",
 					  i);
@@ -161,8 +160,7 @@ fail:
 		obj = &submit->bos[contended].obj->base;
 
 		/* we lost out in a seqno race, lock and retry.. */
-		ret = ww_mutex_lock_slow_interruptible(&obj->resv->lock,
-						       ticket);
+		ret = dma_resv_lock_slow_interruptible(obj->resv, ticket);
 		if (!ret) {
 			submit->bos[contended].flags |= BO_LOCKED;
 			slow_locked = contended;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index d47d1a8e0219..799ec20b267d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1132,7 +1132,7 @@ static void event_free(struct etnaviv_gpu *gpu, unsigned int event)
  * Cmdstream submission/retirement:
  */
 int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
-	u32 id, struct timespec *timeout)
+	u32 id, struct drm_etnaviv_timespec *timeout)
 {
 	struct dma_fence *fence;
 	int ret;
@@ -1179,7 +1179,8 @@ int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
  * that lock in this function while waiting.
  */
 int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
-	struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout)
+	struct etnaviv_gem_object *etnaviv_obj,
+	struct drm_etnaviv_timespec *timeout)
 {
 	unsigned long remaining;
 	long ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 8f9bd4edc96a..97bb48042b4d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -169,9 +169,10 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m);
 void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu);
 void etnaviv_gpu_retire(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
-	u32 fence, struct timespec *timeout);
+	u32 fence, struct drm_etnaviv_timespec *timeout);
 int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
-	struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout);
+	struct etnaviv_gem_object *etnaviv_obj,
+	struct drm_etnaviv_timespec *timeout);
 struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit);
 int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu);
 void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu);
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 6f7d3b3b3628..6417f374b923 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -1,13 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_EXYNOS
-	tristate "DRM Support for Samsung SoC EXYNOS Series"
+	tristate "DRM Support for Samsung SoC Exynos Series"
 	depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM || COMPILE_TEST)
 	depends on MMU
 	select DRM_KMS_HELPER
 	select VIDEOMODE_HELPERS
 	select SND_SOC_HDMI_CODEC if SND_SOC
 	help
-	  Choose this option if you have a Samsung SoC EXYNOS chipset.
+	  Choose this option if you have a Samsung SoC Exynos chipset.
 	  If M is selected the module will be called exynosdrm.
 
 if DRM_EXYNOS
@@ -62,7 +62,7 @@ config DRM_EXYNOS_DSI
 	  This enables support for Exynos MIPI-DSI device.
 
 config DRM_EXYNOS_DP
-	bool "EXYNOS specific extensions for Analogix DP driver"
+	bool "Exynos specific extensions for Analogix DP driver"
 	depends on DRM_EXYNOS_FIMD || DRM_EXYNOS7_DECON
 	select DRM_ANALOGIX_DP
 	default DRM_EXYNOS
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 2d5cbfda3ca7..8428ae12dfa5 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -510,7 +510,7 @@ static void decon_swreset(struct decon_context *ctx)
 	       ctx->addr + DECON_CRCCTRL);
 }
 
-static void decon_enable(struct exynos_drm_crtc *crtc)
+static void decon_atomic_enable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
 
@@ -523,7 +523,7 @@ static void decon_enable(struct exynos_drm_crtc *crtc)
 	decon_commit(ctx->crtc);
 }
 
-static void decon_disable(struct exynos_drm_crtc *crtc)
+static void decon_atomic_disable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
 	int i;
@@ -599,8 +599,8 @@ static enum drm_mode_status decon_mode_valid(struct exynos_drm_crtc *crtc,
 }
 
 static const struct exynos_drm_crtc_ops decon_crtc_ops = {
-	.enable			= decon_enable,
-	.disable		= decon_disable,
+	.atomic_enable		= decon_atomic_enable,
+	.atomic_disable		= decon_atomic_disable,
 	.enable_vblank		= decon_enable_vblank,
 	.disable_vblank		= decon_disable_vblank,
 	.atomic_begin		= decon_atomic_begin,
@@ -651,7 +651,7 @@ static void decon_unbind(struct device *dev, struct device *master, void *data)
 {
 	struct decon_context *ctx = dev_get_drvdata(dev);
 
-	decon_disable(ctx->crtc);
+	decon_atomic_disable(ctx->crtc);
 
 	/* detach this sub driver from iommu mapping if supported. */
 	exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev);
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index f0640950bd46..ff59c641fa80 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -526,7 +526,7 @@ static void decon_init(struct decon_context *ctx)
 		writel(VIDCON1_VCLK_HOLD, ctx->regs + VIDCON1(0));
 }
 
-static void decon_enable(struct exynos_drm_crtc *crtc)
+static void decon_atomic_enable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
 
@@ -546,7 +546,7 @@ static void decon_enable(struct exynos_drm_crtc *crtc)
 	ctx->suspended = false;
 }
 
-static void decon_disable(struct exynos_drm_crtc *crtc)
+static void decon_atomic_disable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
 	int i;
@@ -568,8 +568,8 @@ static void decon_disable(struct exynos_drm_crtc *crtc)
 }
 
 static const struct exynos_drm_crtc_ops decon_crtc_ops = {
-	.enable = decon_enable,
-	.disable = decon_disable,
+	.atomic_enable = decon_atomic_enable,
+	.atomic_disable = decon_atomic_disable,
 	.enable_vblank = decon_enable_vblank,
 	.disable_vblank = decon_disable_vblank,
 	.atomic_begin = decon_atomic_begin,
@@ -653,7 +653,7 @@ static void decon_unbind(struct device *dev, struct device *master,
 {
 	struct decon_context *ctx = dev_get_drvdata(dev);
 
-	decon_disable(ctx->crtc);
+	decon_atomic_disable(ctx->crtc);
 
 	if (ctx->encoder)
 		exynos_dpi_remove(ctx->encoder);
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 3a0f0ba8c63a..4785885c0f4f 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -19,6 +19,7 @@
 
 #include <drm/bridge/analogix_dp.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
@@ -109,7 +110,6 @@ static int exynos_dp_bridge_attach(struct analogix_dp_plat_data *plat_data,
 		if (ret) {
 			DRM_DEV_ERROR(dp->dev,
 				      "Failed to attach bridge to drm\n");
-			bridge->next = NULL;
 			return ret;
 		}
 	}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index 77ce78986408..1c03485676ef 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -23,8 +23,8 @@ static void exynos_drm_crtc_atomic_enable(struct drm_crtc *crtc,
 {
 	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
 
-	if (exynos_crtc->ops->enable)
-		exynos_crtc->ops->enable(exynos_crtc);
+	if (exynos_crtc->ops->atomic_enable)
+		exynos_crtc->ops->atomic_enable(exynos_crtc);
 
 	drm_crtc_vblank_on(crtc);
 }
@@ -36,8 +36,8 @@ static void exynos_drm_crtc_atomic_disable(struct drm_crtc *crtc,
 
 	drm_crtc_vblank_off(crtc);
 
-	if (exynos_crtc->ops->disable)
-		exynos_crtc->ops->disable(exynos_crtc);
+	if (exynos_crtc->ops->atomic_disable)
+		exynos_crtc->ops->atomic_disable(exynos_crtc);
 
 	if (crtc->state->event && !crtc->state->active) {
 		spin_lock_irq(&crtc->dev->event_lock);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
index 3cebb19ec1c4..43fa0f26c052 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
@@ -43,7 +43,7 @@ exynos_dpi_detect(struct drm_connector *connector, bool force)
 {
 	struct exynos_dpi *ctx = connector_to_dpi(connector);
 
-	if (ctx->panel && !ctx->panel->connector)
+	if (ctx->panel)
 		drm_panel_attach(ctx->panel, &ctx->connector);
 
 	return connector_status_connected;
@@ -85,7 +85,7 @@ static int exynos_dpi_get_modes(struct drm_connector *connector)
 	}
 
 	if (ctx->panel)
-		return ctx->panel->funcs->get_modes(ctx->panel);
+		return drm_panel_get_modes(ctx->panel, connector);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index d4014ba592fd..d4d21d8cfb90 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -118,8 +118,8 @@ struct exynos_drm_plane_config {
 /*
  * Exynos drm crtc ops
  *
- * @enable: enable the device
- * @disable: disable the device
+ * @atomic_enable: enable the device
+ * @atomic_disable: disable the device
  * @enable_vblank: specific driver callback for enabling vblank interrupt.
  * @disable_vblank: specific driver callback for disabling vblank interrupt.
  * @mode_valid: specific driver callback for mode validation
@@ -133,8 +133,8 @@ struct exynos_drm_plane_config {
  */
 struct exynos_drm_crtc;
 struct exynos_drm_crtc_ops {
-	void (*enable)(struct exynos_drm_crtc *crtc);
-	void (*disable)(struct exynos_drm_crtc *crtc);
+	void (*atomic_enable)(struct exynos_drm_crtc *crtc);
+	void (*atomic_disable)(struct exynos_drm_crtc *crtc);
 	int (*enable_vblank)(struct exynos_drm_crtc *crtc);
 	void (*disable_vblank)(struct exynos_drm_crtc *crtc);
 	enum drm_mode_status (*mode_valid)(struct exynos_drm_crtc *crtc,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 6926cee91b36..33628d85edad 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -24,6 +24,7 @@
 #include <video/videomode.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
@@ -254,6 +255,7 @@ struct exynos_dsi {
 	struct mipi_dsi_host dsi_host;
 	struct drm_connector connector;
 	struct drm_panel *panel;
+	struct list_head bridge_chain;
 	struct drm_bridge *out_bridge;
 	struct device *dev;
 
@@ -1376,6 +1378,7 @@ static void exynos_dsi_unregister_te_irq(struct exynos_dsi *dsi)
 static void exynos_dsi_enable(struct drm_encoder *encoder)
 {
 	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
+	struct drm_bridge *iter;
 	int ret;
 
 	if (dsi->state & DSIM_STATE_ENABLED)
@@ -1389,7 +1392,11 @@ static void exynos_dsi_enable(struct drm_encoder *encoder)
 		if (ret < 0)
 			goto err_put_sync;
 	} else {
-		drm_bridge_pre_enable(dsi->out_bridge);
+		list_for_each_entry_reverse(iter, &dsi->bridge_chain,
+					    chain_node) {
+			if (iter->funcs->pre_enable)
+				iter->funcs->pre_enable(iter);
+		}
 	}
 
 	exynos_dsi_set_display_mode(dsi);
@@ -1400,7 +1407,10 @@ static void exynos_dsi_enable(struct drm_encoder *encoder)
 		if (ret < 0)
 			goto err_display_disable;
 	} else {
-		drm_bridge_enable(dsi->out_bridge);
+		list_for_each_entry(iter, &dsi->bridge_chain, chain_node) {
+			if (iter->funcs->enable)
+				iter->funcs->enable(iter);
+		}
 	}
 
 	dsi->state |= DSIM_STATE_VIDOUT_AVAILABLE;
@@ -1418,6 +1428,7 @@ err_put_sync:
 static void exynos_dsi_disable(struct drm_encoder *encoder)
 {
 	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
+	struct drm_bridge *iter;
 
 	if (!(dsi->state & DSIM_STATE_ENABLED))
 		return;
@@ -1425,10 +1436,20 @@ static void exynos_dsi_disable(struct drm_encoder *encoder)
 	dsi->state &= ~DSIM_STATE_VIDOUT_AVAILABLE;
 
 	drm_panel_disable(dsi->panel);
-	drm_bridge_disable(dsi->out_bridge);
+
+	list_for_each_entry_reverse(iter, &dsi->bridge_chain, chain_node) {
+		if (iter->funcs->disable)
+			iter->funcs->disable(iter);
+	}
+
 	exynos_dsi_set_display_enable(dsi, false);
 	drm_panel_unprepare(dsi->panel);
-	drm_bridge_post_disable(dsi->out_bridge);
+
+	list_for_each_entry(iter, &dsi->bridge_chain, chain_node) {
+		if (iter->funcs->post_disable)
+			iter->funcs->post_disable(iter);
+	}
+
 	dsi->state &= ~DSIM_STATE_ENABLED;
 	pm_runtime_put_sync(dsi->dev);
 }
@@ -1460,7 +1481,7 @@ static int exynos_dsi_get_modes(struct drm_connector *connector)
 	struct exynos_dsi *dsi = connector_to_dsi(connector);
 
 	if (dsi->panel)
-		return dsi->panel->funcs->get_modes(dsi->panel);
+		return drm_panel_get_modes(dsi->panel, connector);
 
 	return 0;
 }
@@ -1521,7 +1542,7 @@ static int exynos_dsi_host_attach(struct mipi_dsi_host *host,
 	if (out_bridge) {
 		drm_bridge_attach(encoder, out_bridge, NULL);
 		dsi->out_bridge = out_bridge;
-		encoder->bridge = NULL;
+		list_splice_init(&encoder->bridge_chain, &dsi->bridge_chain);
 	} else {
 		int ret = exynos_dsi_create_connector(encoder);
 
@@ -1587,6 +1608,7 @@ static int exynos_dsi_host_detach(struct mipi_dsi_host *host,
 		if (dsi->out_bridge->funcs->detach)
 			dsi->out_bridge->funcs->detach(dsi->out_bridge);
 		dsi->out_bridge = NULL;
+		INIT_LIST_HEAD(&dsi->bridge_chain);
 	}
 
 	if (drm->mode_config.poll_enabled)
@@ -1734,6 +1756,7 @@ static int exynos_dsi_probe(struct platform_device *pdev)
 	init_completion(&dsi->completed);
 	spin_lock_init(&dsi->transfer_lock);
 	INIT_LIST_HEAD(&dsi->transfer_list);
+	INIT_LIST_HEAD(&dsi->bridge_chain);
 
 	dsi->dsi_host.ops = &exynos_dsi_ops;
 	dsi->dsi_host.dev = dev;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index b0877b97291c..647a1fd1d815 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -60,7 +60,7 @@ static int exynos_drm_fb_mmap(struct fb_info *info,
 	return 0;
 }
 
-static struct fb_ops exynos_drm_fb_ops = {
+static const struct fb_ops exynos_drm_fb_ops = {
 	.owner		= THIS_MODULE,
 	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_mmap        = exynos_drm_fb_mmap,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 8d0a929104e5..21aec38702fc 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -894,7 +894,7 @@ static void fimd_disable_plane(struct exynos_drm_crtc *crtc,
 		fimd_enable_shadow_channel_path(ctx, win, false);
 }
 
-static void fimd_enable(struct exynos_drm_crtc *crtc)
+static void fimd_atomic_enable(struct exynos_drm_crtc *crtc)
 {
 	struct fimd_context *ctx = crtc->ctx;
 
@@ -912,7 +912,7 @@ static void fimd_enable(struct exynos_drm_crtc *crtc)
 	fimd_commit(ctx->crtc);
 }
 
-static void fimd_disable(struct exynos_drm_crtc *crtc)
+static void fimd_atomic_disable(struct exynos_drm_crtc *crtc)
 {
 	struct fimd_context *ctx = crtc->ctx;
 	int i;
@@ -1006,8 +1006,8 @@ static void fimd_dp_clock_enable(struct exynos_drm_clk *clk, bool enable)
 }
 
 static const struct exynos_drm_crtc_ops fimd_crtc_ops = {
-	.enable = fimd_enable,
-	.disable = fimd_disable,
+	.atomic_enable = fimd_atomic_enable,
+	.atomic_disable = fimd_atomic_disable,
 	.enable_vblank = fimd_enable_vblank,
 	.disable_vblank = fimd_disable_vblank,
 	.atomic_begin = fimd_atomic_begin,
@@ -1098,7 +1098,7 @@ static void fimd_unbind(struct device *dev, struct device *master,
 {
 	struct fimd_context *ctx = dev_get_drvdata(dev);
 
-	fimd_disable(ctx->crtc);
+	fimd_atomic_disable(ctx->crtc);
 
 	exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 7ae087b0504d..88b6fcaa20be 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1313,6 +1313,7 @@ static int gsc_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
+	component_del(dev, &gsc_component_ops);
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index b78e8c5ba553..f41d75923557 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -21,6 +21,7 @@
 #include <video/of_videomode.h>
 #include <video/videomode.h>
 
+#include <drm/drm_bridge.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_print.h>
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 65b891cb9c50..b320b3a21ad4 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -120,7 +120,7 @@ static void vidi_update_plane(struct exynos_drm_crtc *crtc,
 	DRM_DEV_DEBUG_KMS(ctx->dev, "dma_addr = %pad\n", &addr);
 }
 
-static void vidi_enable(struct exynos_drm_crtc *crtc)
+static void vidi_atomic_enable(struct exynos_drm_crtc *crtc)
 {
 	struct vidi_context *ctx = crtc->ctx;
 
@@ -133,7 +133,7 @@ static void vidi_enable(struct exynos_drm_crtc *crtc)
 	drm_crtc_vblank_on(&crtc->base);
 }
 
-static void vidi_disable(struct exynos_drm_crtc *crtc)
+static void vidi_atomic_disable(struct exynos_drm_crtc *crtc)
 {
 	struct vidi_context *ctx = crtc->ctx;
 
@@ -147,8 +147,8 @@ static void vidi_disable(struct exynos_drm_crtc *crtc)
 }
 
 static const struct exynos_drm_crtc_ops vidi_crtc_ops = {
-	.enable = vidi_enable,
-	.disable = vidi_disable,
+	.atomic_enable = vidi_atomic_enable,
+	.atomic_disable = vidi_atomic_disable,
 	.enable_vblank = vidi_enable_vblank,
 	.disable_vblank = vidi_disable_vblank,
 	.update_plane = vidi_update_plane,
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index bc1565f1822a..9ff921f43a93 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -34,6 +34,7 @@
 #include <media/cec-notifier.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
@@ -852,6 +853,10 @@ static enum drm_connector_status hdmi_detect(struct drm_connector *connector,
 
 static void hdmi_connector_destroy(struct drm_connector *connector)
 {
+	struct hdmi_context *hdata = connector_to_hdmi(connector);
+
+	cec_notifier_conn_unregister(hdata->notifier);
+
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 }
@@ -935,13 +940,16 @@ static int hdmi_create_connector(struct drm_encoder *encoder)
 {
 	struct hdmi_context *hdata = encoder_to_hdmi(encoder);
 	struct drm_connector *connector = &hdata->connector;
+	struct cec_connector_info conn_info;
 	int ret;
 
 	connector->interlace_allowed = true;
 	connector->polled = DRM_CONNECTOR_POLL_HPD;
 
-	ret = drm_connector_init(hdata->drm_dev, connector,
-			&hdmi_connector_funcs, DRM_MODE_CONNECTOR_HDMIA);
+	ret = drm_connector_init_with_ddc(hdata->drm_dev, connector,
+					  &hdmi_connector_funcs,
+					  DRM_MODE_CONNECTOR_HDMIA,
+					  hdata->ddc_adpt);
 	if (ret) {
 		DRM_DEV_ERROR(hdata->dev,
 			      "Failed to initialize connector with drm\n");
@@ -957,6 +965,15 @@ static int hdmi_create_connector(struct drm_encoder *encoder)
 			DRM_DEV_ERROR(hdata->dev, "Failed to attach bridge\n");
 	}
 
+	cec_fill_conn_info_from_drm(&conn_info, connector);
+
+	hdata->notifier = cec_notifier_conn_register(hdata->dev, NULL,
+						     &conn_info);
+	if (!hdata->notifier) {
+		ret = -ENOMEM;
+		DRM_DEV_ERROR(hdata->dev, "Failed to allocate CEC notifier\n");
+	}
+
 	return ret;
 }
 
@@ -1528,8 +1545,8 @@ static void hdmi_disable(struct drm_encoder *encoder)
 		 */
 		mutex_unlock(&hdata->mutex);
 		cancel_delayed_work(&hdata->hotplug_work);
-		cec_notifier_set_phys_addr(hdata->notifier,
-					   CEC_PHYS_ADDR_INVALID);
+		if (hdata->notifier)
+			cec_notifier_phys_addr_invalidate(hdata->notifier);
 		return;
 	}
 
@@ -2006,12 +2023,6 @@ static int hdmi_probe(struct platform_device *pdev)
 		}
 	}
 
-	hdata->notifier = cec_notifier_get(&pdev->dev);
-	if (hdata->notifier == NULL) {
-		ret = -ENOMEM;
-		goto err_hdmiphy;
-	}
-
 	pm_runtime_enable(dev);
 
 	audio_infoframe = &hdata->audio.infoframe;
@@ -2023,7 +2034,7 @@ static int hdmi_probe(struct platform_device *pdev)
 
 	ret = hdmi_register_audio_device(hdata);
 	if (ret)
-		goto err_notifier_put;
+		goto err_rpm_disable;
 
 	ret = component_add(&pdev->dev, &hdmi_component_ops);
 	if (ret)
@@ -2034,8 +2045,7 @@ static int hdmi_probe(struct platform_device *pdev)
 err_unregister_audio:
 	platform_device_unregister(hdata->audio.pdev);
 
-err_notifier_put:
-	cec_notifier_put(hdata->notifier);
+err_rpm_disable:
 	pm_runtime_disable(dev);
 
 err_hdmiphy:
@@ -2054,12 +2064,10 @@ static int hdmi_remove(struct platform_device *pdev)
 	struct hdmi_context *hdata = platform_get_drvdata(pdev);
 
 	cancel_delayed_work_sync(&hdata->hotplug_work);
-	cec_notifier_set_phys_addr(hdata->notifier, CEC_PHYS_ADDR_INVALID);
 
 	component_del(&pdev->dev, &hdmi_component_ops);
 	platform_device_unregister(hdata->audio.pdev);
 
-	cec_notifier_put(hdata->notifier);
 	pm_runtime_disable(&pdev->dev);
 
 	if (!IS_ERR(hdata->reg_hdmi_en))
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 7b24338fad3c..38ae9c32feef 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -986,7 +986,7 @@ static void mixer_atomic_flush(struct exynos_drm_crtc *crtc)
 	exynos_crtc_handle_event(crtc);
 }
 
-static void mixer_enable(struct exynos_drm_crtc *crtc)
+static void mixer_atomic_enable(struct exynos_drm_crtc *crtc)
 {
 	struct mixer_context *ctx = crtc->ctx;
 
@@ -1015,7 +1015,7 @@ static void mixer_enable(struct exynos_drm_crtc *crtc)
 	set_bit(MXR_BIT_POWERED, &ctx->flags);
 }
 
-static void mixer_disable(struct exynos_drm_crtc *crtc)
+static void mixer_atomic_disable(struct exynos_drm_crtc *crtc)
 {
 	struct mixer_context *ctx = crtc->ctx;
 	int i;
@@ -1069,9 +1069,9 @@ static bool mixer_mode_fixup(struct exynos_drm_crtc *crtc,
 	struct mixer_context *ctx = crtc->ctx;
 	int width = mode->hdisplay, height = mode->vdisplay, i;
 
-	struct {
+	static const struct {
 		int hdisplay, vdisplay, htotal, vtotal, scan_val;
-	} static const modes[] = {
+	} modes[] = {
 		{ 720, 480, 858, 525, MXR_CFG_SCAN_NTSC | MXR_CFG_SCAN_SD },
 		{ 720, 576, 864, 625, MXR_CFG_SCAN_PAL | MXR_CFG_SCAN_SD },
 		{ 1280, 720, 1650, 750, MXR_CFG_SCAN_HD_720 | MXR_CFG_SCAN_HD },
@@ -1109,8 +1109,8 @@ static bool mixer_mode_fixup(struct exynos_drm_crtc *crtc,
 }
 
 static const struct exynos_drm_crtc_ops mixer_crtc_ops = {
-	.enable			= mixer_enable,
-	.disable		= mixer_disable,
+	.atomic_enable		= mixer_atomic_enable,
+	.atomic_disable		= mixer_atomic_disable,
 	.enable_vblank		= mixer_enable_vblank,
 	.disable_vblank		= mixer_disable_vblank,
 	.atomic_begin		= mixer_atomic_begin,
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
index a92fd6c70b09..9598ee3cc4d2 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
@@ -9,6 +9,7 @@
 #include <linux/of_graph.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_probe_helper.h>
@@ -67,7 +68,7 @@ static int fsl_dcu_drm_connector_get_modes(struct drm_connector *connector)
 	struct fsl_dcu_drm_connector *fsl_connector;
 
 	fsl_connector = to_fsl_dcu_connector(connector);
-	return drm_panel_get_modes(fsl_connector->panel);
+	return drm_panel_get_modes(fsl_connector->panel, connector);
 }
 
 static int fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector,
diff --git a/drivers/gpu/drm/gma500/accel_2d.c b/drivers/gpu/drm/gma500/accel_2d.c
index 45ad5ffedc93..adc0507545bf 100644
--- a/drivers/gpu/drm/gma500/accel_2d.c
+++ b/drivers/gpu/drm/gma500/accel_2d.c
@@ -21,9 +21,9 @@
 
 #include <drm/drm.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 
-#include "framebuffer.h"
 #include "psb_drv.h"
 #include "psb_reg.h"
 
@@ -226,11 +226,10 @@ static int psb_accel_2d_copy(struct drm_psb_private *dev_priv,
 static void psbfb_copyarea_accel(struct fb_info *info,
 				 const struct fb_copyarea *a)
 {
-	struct psb_fbdev *fbdev = info->par;
-	struct psb_framebuffer *psbfb = &fbdev->pfb;
-	struct drm_device *dev = psbfb->base.dev;
-	struct drm_framebuffer *fb = fbdev->psb_fb_helper.fb;
-	struct drm_psb_private *dev_priv = dev->dev_private;
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_framebuffer *fb = fb_helper->fb;
+	struct drm_device *dev;
+	struct drm_psb_private *dev_priv;
 	uint32_t offset;
 	uint32_t stride;
 	uint32_t src_format;
@@ -239,6 +238,8 @@ static void psbfb_copyarea_accel(struct fb_info *info,
 	if (!fb)
 		return;
 
+	dev = fb->dev;
+	dev_priv = dev->dev_private;
 	offset = to_gtt_range(fb->obj[0])->offset;
 	stride = fb->pitches[0];
 
@@ -309,9 +310,9 @@ void psbfb_copyarea(struct fb_info *info,
  */
 int psbfb_sync(struct fb_info *info)
 {
-	struct psb_fbdev *fbdev = info->par;
-	struct psb_framebuffer *psbfb = &fbdev->pfb;
-	struct drm_device *dev = psbfb->base.dev;
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_framebuffer *fb = fb_helper->fb;
+	struct drm_device *dev = fb->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	unsigned long _end = jiffies + HZ;
 	int busy = 0;
diff --git a/drivers/gpu/drm/gma500/cdv_intel_display.c b/drivers/gpu/drm/gma500/cdv_intel_display.c
index f56852a503e8..1ed854f498b7 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_display.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_display.c
@@ -405,6 +405,8 @@ static bool cdv_intel_find_dp_pll(const struct gma_limit_t *limit,
 	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	struct gma_clock_t clock;
 
+	memset(&clock, 0, sizeof(clock));
+
 	switch (refclk) {
 	case 27000:
 		if (target < 200000) {
@@ -580,8 +582,8 @@ static int cdv_intel_crtc_mode_set(struct drm_crtc *crtc,
 	struct gma_clock_t clock;
 	u32 dpll = 0, dspcntr, pipeconf;
 	bool ok;
-	bool is_crt = false, is_lvds = false, is_tv = false;
-	bool is_hdmi = false, is_dp = false;
+	bool is_lvds = false, is_tv = false;
+	bool is_dp = false;
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct drm_connector *connector;
 	const struct gma_limit_t *limit;
@@ -605,10 +607,7 @@ static int cdv_intel_crtc_mode_set(struct drm_crtc *crtc,
 			is_tv = true;
 			break;
 		case INTEL_OUTPUT_ANALOG:
-			is_crt = true;
-			break;
 		case INTEL_OUTPUT_HDMI:
-			is_hdmi = true;
 			break;
 		case INTEL_OUTPUT_DISPLAYPORT:
 			is_dp = true;
@@ -977,6 +976,7 @@ const struct drm_crtc_funcs cdv_intel_crtc_funcs = {
 	.gamma_set = gma_crtc_gamma_set,
 	.set_config = gma_crtc_set_config,
 	.destroy = gma_crtc_destroy,
+	.page_flip = gma_crtc_page_flip,
 };
 
 const struct gma_clock_funcs cdv_clock_funcs = {
diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index 570b59520fd1..5772b2dce0d6 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c
@@ -1594,7 +1594,6 @@ cdv_intel_dp_complete_link_train(struct gma_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
-	bool channel_eq = false;
 	int tries, cr_tries;
 	u32 reg;
 	uint32_t DP = intel_dp->DP;
@@ -1602,7 +1601,6 @@ cdv_intel_dp_complete_link_train(struct gma_encoder *encoder)
 	/* channel equalization */
 	tries = 0;
 	cr_tries = 0;
-	channel_eq = false;
 
 	DRM_DEBUG_KMS("\n");
 		reg = DP | DP_LINK_TRAIN_PAT_2;
@@ -1648,7 +1646,6 @@ cdv_intel_dp_complete_link_train(struct gma_encoder *encoder)
 
 		if (cdv_intel_channel_eq_ok(encoder)) {
 			DRM_DEBUG_KMS("PT2 train is done\n");
-			channel_eq = true;
 			break;
 		}
 
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 218f3bb15276..1459076d1980 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -40,8 +40,8 @@ static int psbfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 			   unsigned blue, unsigned transp,
 			   struct fb_info *info)
 {
-	struct psb_fbdev *fbdev = info->par;
-	struct drm_framebuffer *fb = fbdev->psb_fb_helper.fb;
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_framebuffer *fb = fb_helper->fb;
 	uint32_t v;
 
 	if (!fb)
@@ -77,10 +77,10 @@ static int psbfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 
 static int psbfb_pan(struct fb_var_screeninfo *var, struct fb_info *info)
 {
-	struct psb_fbdev *fbdev = info->par;
-	struct psb_framebuffer *psbfb = &fbdev->pfb;
-	struct drm_device *dev = psbfb->base.dev;
-	struct gtt_range *gtt = to_gtt_range(psbfb->base.obj[0]);
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_framebuffer *fb = fb_helper->fb;
+	struct drm_device *dev = fb->dev;
+	struct gtt_range *gtt = to_gtt_range(fb->obj[0]);
 
 	/*
 	 *	We have to poke our nose in here. The core fb code assumes
@@ -99,10 +99,10 @@ static int psbfb_pan(struct fb_var_screeninfo *var, struct fb_info *info)
 static vm_fault_t psbfb_vm_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	struct psb_framebuffer *psbfb = vma->vm_private_data;
-	struct drm_device *dev = psbfb->base.dev;
+	struct drm_framebuffer *fb = vma->vm_private_data;
+	struct drm_device *dev = fb->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct gtt_range *gtt = to_gtt_range(psbfb->base.obj[0]);
+	struct gtt_range *gtt = to_gtt_range(fb->obj[0]);
 	int page_num;
 	int i;
 	unsigned long address;
@@ -145,23 +145,21 @@ static const struct vm_operations_struct psbfb_vm_ops = {
 
 static int psbfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
-	struct psb_fbdev *fbdev = info->par;
-	struct psb_framebuffer *psbfb = &fbdev->pfb;
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_framebuffer *fb = fb_helper->fb;
 
 	if (vma->vm_pgoff != 0)
 		return -EINVAL;
 	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT))
 		return -EINVAL;
 
-	if (!psbfb->addr_space)
-		psbfb->addr_space = vma->vm_file->f_mapping;
 	/*
 	 * If this is a GEM object then info->screen_base is the virtual
 	 * kernel remapping of the object. FIXME: Review if this is
 	 * suitable for our mmap work
 	 */
 	vma->vm_ops = &psbfb_vm_ops;
-	vma->vm_private_data = (void *)psbfb;
+	vma->vm_private_data = (void *)fb;
 	vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	return 0;
 }
@@ -209,9 +207,9 @@ static struct fb_ops psbfb_unaccel_ops = {
  *	0 on success or an error code if we fail.
  */
 static int psb_framebuffer_init(struct drm_device *dev,
-					struct psb_framebuffer *fb,
+					struct drm_framebuffer *fb,
 					const struct drm_mode_fb_cmd2 *mode_cmd,
-					struct gtt_range *gt)
+					struct drm_gem_object *obj)
 {
 	const struct drm_format_info *info;
 	int ret;
@@ -227,9 +225,9 @@ static int psb_framebuffer_init(struct drm_device *dev,
 	if (mode_cmd->pitches[0] & 63)
 		return -EINVAL;
 
-	drm_helper_mode_fill_fb_struct(dev, &fb->base, mode_cmd);
-	fb->base.obj[0] = &gt->gem;
-	ret = drm_framebuffer_init(dev, &fb->base, &psb_fb_funcs);
+	drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd);
+	fb->obj[0] = obj;
+	ret = drm_framebuffer_init(dev, fb, &psb_fb_funcs);
 	if (ret) {
 		dev_err(dev->dev, "framebuffer init failed: %d\n", ret);
 		return ret;
@@ -252,21 +250,21 @@ static int psb_framebuffer_init(struct drm_device *dev,
 static struct drm_framebuffer *psb_framebuffer_create
 			(struct drm_device *dev,
 			 const struct drm_mode_fb_cmd2 *mode_cmd,
-			 struct gtt_range *gt)
+			 struct drm_gem_object *obj)
 {
-	struct psb_framebuffer *fb;
+	struct drm_framebuffer *fb;
 	int ret;
 
 	fb = kzalloc(sizeof(*fb), GFP_KERNEL);
 	if (!fb)
 		return ERR_PTR(-ENOMEM);
 
-	ret = psb_framebuffer_init(dev, fb, mode_cmd, gt);
+	ret = psb_framebuffer_init(dev, fb, mode_cmd, obj);
 	if (ret) {
 		kfree(fb);
 		return ERR_PTR(ret);
 	}
-	return &fb->base;
+	return fb;
 }
 
 /**
@@ -300,14 +298,13 @@ static struct gtt_range *psbfb_alloc(struct drm_device *dev, int aligned_size)
  *
  *	Create a framebuffer to the specifications provided
  */
-static int psbfb_create(struct psb_fbdev *fbdev,
+static int psbfb_create(struct drm_fb_helper *fb_helper,
 				struct drm_fb_helper_surface_size *sizes)
 {
-	struct drm_device *dev = fbdev->psb_fb_helper.dev;
+	struct drm_device *dev = fb_helper->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct fb_info *info;
 	struct drm_framebuffer *fb;
-	struct psb_framebuffer *psbfb = &fbdev->pfb;
 	struct drm_mode_fb_cmd2 mode_cmd;
 	int size;
 	int ret;
@@ -372,7 +369,7 @@ static int psbfb_create(struct psb_fbdev *fbdev,
 
 	memset(dev_priv->vram_addr + backing->offset, 0, size);
 
-	info = drm_fb_helper_alloc_fbi(&fbdev->psb_fb_helper);
+	info = drm_fb_helper_alloc_fbi(fb_helper);
 	if (IS_ERR(info)) {
 		ret = PTR_ERR(info);
 		goto out;
@@ -380,14 +377,13 @@ static int psbfb_create(struct psb_fbdev *fbdev,
 
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth);
 
-	ret = psb_framebuffer_init(dev, psbfb, &mode_cmd, backing);
-	if (ret)
+	fb = psb_framebuffer_create(dev, &mode_cmd, &backing->gem);
+	if (IS_ERR(fb)) {
+		ret = PTR_ERR(fb);
 		goto out;
+	}
 
-	fb = &psbfb->base;
-	psbfb->fbdev = info;
-
-	fbdev->psb_fb_helper.fb = fb;
+	fb_helper->fb = fb;
 
 	if (dev_priv->ops->accel_2d && pitch_lines > 8)	/* 2D engine */
 		info->fbops = &psbfb_ops;
@@ -411,15 +407,14 @@ static int psbfb_create(struct psb_fbdev *fbdev,
 		info->apertures->ranges[0].size = dev_priv->gtt.stolen_size;
 	}
 
-	drm_fb_helper_fill_info(info, &fbdev->psb_fb_helper, sizes);
+	drm_fb_helper_fill_info(info, fb_helper, sizes);
 
 	info->fix.mmio_start = pci_resource_start(dev->pdev, 0);
 	info->fix.mmio_len = pci_resource_len(dev->pdev, 0);
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
 
-	dev_dbg(dev->dev, "allocated %dx%d fb\n",
-					psbfb->base.width, psbfb->base.height);
+	dev_dbg(dev->dev, "allocated %dx%d fb\n", fb->width, fb->height);
 
 	return 0;
 out:
@@ -439,7 +434,6 @@ static struct drm_framebuffer *psb_user_framebuffer_create
 			(struct drm_device *dev, struct drm_file *filp,
 			 const struct drm_mode_fb_cmd2 *cmd)
 {
-	struct gtt_range *r;
 	struct drm_gem_object *obj;
 
 	/*
@@ -451,17 +445,15 @@ static struct drm_framebuffer *psb_user_framebuffer_create
 		return ERR_PTR(-ENOENT);
 
 	/* Let the core code do all the work */
-	r = container_of(obj, struct gtt_range, gem);
-	return psb_framebuffer_create(dev, cmd, r);
+	return psb_framebuffer_create(dev, cmd, obj);
 }
 
-static int psbfb_probe(struct drm_fb_helper *helper,
+static int psbfb_probe(struct drm_fb_helper *fb_helper,
 				struct drm_fb_helper_surface_size *sizes)
 {
-	struct psb_fbdev *psb_fbdev =
-		container_of(helper, struct psb_fbdev, psb_fb_helper);
-	struct drm_device *dev = psb_fbdev->psb_fb_helper.dev;
+	struct drm_device *dev = fb_helper->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
+	unsigned int fb_size;
 	int bytespp;
 
 	bytespp = sizes->surface_bpp / 8;
@@ -471,72 +463,77 @@ static int psbfb_probe(struct drm_fb_helper *helper,
 	/* If the mode will not fit in 32bit then switch to 16bit to get
 	   a console on full resolution. The X mode setting server will
 	   allocate its own 32bit GEM framebuffer */
-	if (ALIGN(sizes->fb_width * bytespp, 64) * sizes->fb_height >
-	                dev_priv->vram_stolen_size) {
+	fb_size = ALIGN(sizes->surface_width * bytespp, 64) *
+		  sizes->surface_height;
+	fb_size = ALIGN(fb_size, PAGE_SIZE);
+
+	if (fb_size > dev_priv->vram_stolen_size) {
                 sizes->surface_bpp = 16;
                 sizes->surface_depth = 16;
         }
 
-	return psbfb_create(psb_fbdev, sizes);
+	return psbfb_create(fb_helper, sizes);
 }
 
 static const struct drm_fb_helper_funcs psb_fb_helper_funcs = {
 	.fb_probe = psbfb_probe,
 };
 
-static int psb_fbdev_destroy(struct drm_device *dev, struct psb_fbdev *fbdev)
+static int psb_fbdev_destroy(struct drm_device *dev,
+			     struct drm_fb_helper *fb_helper)
 {
-	struct psb_framebuffer *psbfb = &fbdev->pfb;
+	struct drm_framebuffer *fb = fb_helper->fb;
+
+	drm_fb_helper_unregister_fbi(fb_helper);
 
-	drm_fb_helper_unregister_fbi(&fbdev->psb_fb_helper);
+	drm_fb_helper_fini(fb_helper);
+	drm_framebuffer_unregister_private(fb);
+	drm_framebuffer_cleanup(fb);
 
-	drm_fb_helper_fini(&fbdev->psb_fb_helper);
-	drm_framebuffer_unregister_private(&psbfb->base);
-	drm_framebuffer_cleanup(&psbfb->base);
+	if (fb->obj[0])
+		drm_gem_object_put_unlocked(fb->obj[0]);
+	kfree(fb);
 
-	if (psbfb->base.obj[0])
-		drm_gem_object_put_unlocked(psbfb->base.obj[0]);
 	return 0;
 }
 
 int psb_fbdev_init(struct drm_device *dev)
 {
-	struct psb_fbdev *fbdev;
+	struct drm_fb_helper *fb_helper;
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	int ret;
 
-	fbdev = kzalloc(sizeof(struct psb_fbdev), GFP_KERNEL);
-	if (!fbdev) {
+	fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL);
+	if (!fb_helper) {
 		dev_err(dev->dev, "no memory\n");
 		return -ENOMEM;
 	}
 
-	dev_priv->fbdev = fbdev;
+	dev_priv->fb_helper = fb_helper;
 
-	drm_fb_helper_prepare(dev, &fbdev->psb_fb_helper, &psb_fb_helper_funcs);
+	drm_fb_helper_prepare(dev, fb_helper, &psb_fb_helper_funcs);
 
-	ret = drm_fb_helper_init(dev, &fbdev->psb_fb_helper,
-				 INTELFB_CONN_LIMIT);
+	ret = drm_fb_helper_init(dev, fb_helper, INTELFB_CONN_LIMIT);
 	if (ret)
 		goto free;
 
-	ret = drm_fb_helper_single_add_all_connectors(&fbdev->psb_fb_helper);
+	ret = drm_fb_helper_single_add_all_connectors(fb_helper);
 	if (ret)
 		goto fini;
 
 	/* disable all the possible outputs/crtcs before entering KMS mode */
 	drm_helper_disable_unused_functions(dev);
 
-	ret = drm_fb_helper_initial_config(&fbdev->psb_fb_helper, 32);
+	ret = drm_fb_helper_initial_config(fb_helper, 32);
 	if (ret)
 		goto fini;
 
 	return 0;
 
 fini:
-	drm_fb_helper_fini(&fbdev->psb_fb_helper);
+	drm_fb_helper_fini(fb_helper);
 free:
-	kfree(fbdev);
+	kfree(fb_helper);
 	return ret;
 }
 
@@ -544,12 +541,12 @@ static void psb_fbdev_fini(struct drm_device *dev)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
 
-	if (!dev_priv->fbdev)
+	if (!dev_priv->fb_helper)
 		return;
 
-	psb_fbdev_destroy(dev, dev_priv->fbdev);
-	kfree(dev_priv->fbdev);
-	dev_priv->fbdev = NULL;
+	psb_fbdev_destroy(dev, dev_priv->fb_helper);
+	kfree(dev_priv->fb_helper);
+	dev_priv->fb_helper = NULL;
 }
 
 static const struct drm_mode_config_funcs psb_mode_funcs = {
diff --git a/drivers/gpu/drm/gma500/framebuffer.h b/drivers/gpu/drm/gma500/framebuffer.h
index ae8a02639fd9..2fbba4b48841 100644
--- a/drivers/gpu/drm/gma500/framebuffer.h
+++ b/drivers/gpu/drm/gma500/framebuffer.h
@@ -9,23 +9,8 @@
 #ifndef _FRAMEBUFFER_H_
 #define _FRAMEBUFFER_H_
 
-#include <drm/drm_fb_helper.h>
-
 #include "psb_drv.h"
 
-struct psb_framebuffer {
-	struct drm_framebuffer base;
-	struct address_space *addr_space;
-	struct fb_info *fbdev;
-};
-
-struct psb_fbdev {
-	struct drm_fb_helper psb_fb_helper; /* must be first */
-	struct psb_framebuffer pfb;
-};
-
-#define to_psb_fb(x) container_of(x, struct psb_framebuffer, base)
-
 extern int gma_connector_clones(struct drm_device *dev, int type_mask);
 
 #endif
diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c
index e20ccb5d10fd..17f136985d21 100644
--- a/drivers/gpu/drm/gma500/gma_display.c
+++ b/drivers/gpu/drm/gma500/gma_display.c
@@ -255,6 +255,8 @@ void gma_crtc_dpms(struct drm_crtc *crtc, int mode)
 		/* Give the overlay scaler a chance to enable
 		 * if it's on this pipe */
 		/* psb_intel_crtc_dpms_video(crtc, true); TODO */
+
+		drm_crtc_vblank_on(crtc);
 		break;
 	case DRM_MODE_DPMS_OFF:
 		if (!gma_crtc->active)
@@ -501,6 +503,52 @@ void gma_crtc_destroy(struct drm_crtc *crtc)
 	kfree(gma_crtc);
 }
 
+int gma_crtc_page_flip(struct drm_crtc *crtc,
+		       struct drm_framebuffer *fb,
+		       struct drm_pending_vblank_event *event,
+		       uint32_t page_flip_flags,
+		       struct drm_modeset_acquire_ctx *ctx)
+{
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	struct drm_framebuffer *current_fb = crtc->primary->fb;
+	struct drm_framebuffer *old_fb = crtc->primary->old_fb;
+	const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+	struct drm_device *dev = crtc->dev;
+	unsigned long flags;
+	int ret;
+
+	if (!crtc_funcs->mode_set_base)
+		return -EINVAL;
+
+	/* Using mode_set_base requires the new fb to be set already. */
+	crtc->primary->fb = fb;
+
+	if (event) {
+		spin_lock_irqsave(&dev->event_lock, flags);
+
+		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
+		gma_crtc->page_flip_event = event;
+
+		/* Call this locked if we want an event at vblank interrupt. */
+		ret = crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y, old_fb);
+		if (ret) {
+			gma_crtc->page_flip_event = NULL;
+			drm_crtc_vblank_put(crtc);
+		}
+
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	} else {
+		ret = crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y, old_fb);
+	}
+
+	/* Restore previous fb in case of failure. */
+	if (ret)
+		crtc->primary->fb = current_fb;
+
+	return ret;
+}
+
 int gma_crtc_set_config(struct drm_mode_set *set,
 			struct drm_modeset_acquire_ctx *ctx)
 {
diff --git a/drivers/gpu/drm/gma500/gma_display.h b/drivers/gpu/drm/gma500/gma_display.h
index fdbd7ecaa59c..7bd6c1ee8b21 100644
--- a/drivers/gpu/drm/gma500/gma_display.h
+++ b/drivers/gpu/drm/gma500/gma_display.h
@@ -11,6 +11,7 @@
 #define _GMA_DISPLAY_H_
 
 #include <linux/pm_runtime.h>
+#include <drm/drm_vblank.h>
 
 struct drm_encoder;
 struct drm_mode_set;
@@ -71,6 +72,11 @@ extern void gma_crtc_prepare(struct drm_crtc *crtc);
 extern void gma_crtc_commit(struct drm_crtc *crtc);
 extern void gma_crtc_disable(struct drm_crtc *crtc);
 extern void gma_crtc_destroy(struct drm_crtc *crtc);
+extern int gma_crtc_page_flip(struct drm_crtc *crtc,
+			      struct drm_framebuffer *fb,
+			      struct drm_pending_vblank_event *event,
+			      uint32_t page_flip_flags,
+			      struct drm_modeset_acquire_ctx *ctx);
 extern int gma_crtc_set_config(struct drm_mode_set *set,
 			       struct drm_modeset_acquire_ctx *ctx);
 
diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c
index afaf4bea21cf..9278bcfad1bf 100644
--- a/drivers/gpu/drm/gma500/gtt.c
+++ b/drivers/gpu/drm/gma500/gtt.c
@@ -503,7 +503,7 @@ int psb_gtt_init(struct drm_device *dev, int resume)
 	 *	Map the GTT and the stolen memory area
 	 */
 	if (!resume)
-		dev_priv->gtt_map = ioremap_nocache(pg->gtt_phys_start,
+		dev_priv->gtt_map = ioremap(pg->gtt_phys_start,
 						gtt_pages << PAGE_SHIFT);
 	if (!dev_priv->gtt_map) {
 		dev_err(dev->dev, "Failure to map gtt.\n");
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
index 03023fa0fb6f..f350ac1ead18 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
@@ -498,7 +498,7 @@ void mdfld_dsi_output_init(struct drm_device *dev,
 		return;
 	}
 
-	/*create a new connetor*/
+	/*create a new connector*/
 	dsi_connector = kzalloc(sizeof(struct mdfld_dsi_connector), GFP_KERNEL);
 	if (!dsi_connector) {
 		DRM_ERROR("No memory");
diff --git a/drivers/gpu/drm/gma500/mdfld_intel_display.c b/drivers/gpu/drm/gma500/mdfld_intel_display.c
index b8bfb96008b8..4fff110c4921 100644
--- a/drivers/gpu/drm/gma500/mdfld_intel_display.c
+++ b/drivers/gpu/drm/gma500/mdfld_intel_display.c
@@ -113,27 +113,6 @@ static int psb_intel_panel_fitter_pipe(struct drm_device *dev)
 	return (pfit_control >> 29) & 0x3;
 }
 
-static struct drm_device globle_dev;
-
-void mdfld__intel_plane_set_alpha(int enable)
-{
-	struct drm_device *dev = &globle_dev;
-	int dspcntr_reg = DSPACNTR;
-	u32 dspcntr;
-
-	dspcntr = REG_READ(dspcntr_reg);
-
-	if (enable) {
-		dspcntr &= ~DISPPLANE_32BPP_NO_ALPHA;
-		dspcntr |= DISPPLANE_32BPP;
-	} else {
-		dspcntr &= ~DISPPLANE_32BPP;
-		dspcntr |= DISPPLANE_32BPP_NO_ALPHA;
-	}
-
-	REG_WRITE(dspcntr_reg, dspcntr);
-}
-
 static int check_fb(struct drm_framebuffer *fb)
 {
 	if (!fb)
@@ -164,8 +143,6 @@ static int mdfld__intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	u32 dspcntr;
 	int ret;
 
-	memcpy(&globle_dev, dev, sizeof(struct drm_device));
-
 	dev_dbg(dev->dev, "pipe = 0x%x.\n", pipe);
 
 	/* no fb bound */
diff --git a/drivers/gpu/drm/gma500/oaktrail_crtc.c b/drivers/gpu/drm/gma500/oaktrail_crtc.c
index 167c10767dd4..900e5499249d 100644
--- a/drivers/gpu/drm/gma500/oaktrail_crtc.c
+++ b/drivers/gpu/drm/gma500/oaktrail_crtc.c
@@ -129,6 +129,7 @@ static bool mrst_sdvo_find_best_pll(const struct gma_limit_t *limit,
 	s32 freq_error, min_error = 100000;
 
 	memset(best_clock, 0, sizeof(*best_clock));
+	memset(&clock, 0, sizeof(clock));
 
 	for (clock.m = limit->m.min; clock.m <= limit->m.max; clock.m++) {
 		for (clock.n = limit->n.min; clock.n <= limit->n.max;
@@ -185,6 +186,7 @@ static bool mrst_lvds_find_best_pll(const struct gma_limit_t *limit,
 	int err = target;
 
 	memset(best_clock, 0, sizeof(*best_clock));
+	memset(&clock, 0, sizeof(clock));
 
 	for (clock.m = limit->m.min; clock.m <= limit->m.max; clock.m++) {
 		for (clock.p1 = limit->p1.min; clock.p1 <= limit->p1.max;
diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index f4c520893ceb..f4370232767d 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
@@ -159,9 +159,7 @@ static void oaktrail_hdmi_audio_disable(struct drm_device *dev)
 
 static unsigned int htotal_calculate(struct drm_display_mode *mode)
 {
-	u32 htotal, new_crtc_htotal;
-
-	htotal = (mode->crtc_hdisplay - 1) | ((mode->crtc_htotal - 1) << 16);
+	u32 new_crtc_htotal;
 
 	/*
 	 * 1024 x 768  new_crtc_htotal = 0x1024;
diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c
index 7390403ea1b7..582e09597500 100644
--- a/drivers/gpu/drm/gma500/oaktrail_lvds.c
+++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c
@@ -117,6 +117,7 @@ static void oaktrail_lvds_mode_set(struct drm_encoder *encoder,
 
 	if (!connector) {
 		DRM_ERROR("Couldn't find connector when setting mode");
+		gma_power_end(dev);
 		return;
 	}
 
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 7005f8f69c68..6956c8e7501c 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -19,10 +19,10 @@
 
 #include <drm/drm.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_file.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_irq.h>
-#include <drm/drm_pci.h>
 #include <drm/drm_pciids.h>
 #include <drm/drm_vblank.h>
 
@@ -256,7 +256,7 @@ static int psb_driver_load(struct drm_device *dev, unsigned long flags)
 							    PSB_AUX_RESOURCE);
 			resource_len = pci_resource_len(dev_priv->aux_pdev,
 							PSB_AUX_RESOURCE);
-			dev_priv->aux_reg = ioremap_nocache(resource_start,
+			dev_priv->aux_reg = ioremap(resource_start,
 							    resource_len);
 			if (!dev_priv->aux_reg)
 				goto out_err;
@@ -426,14 +426,48 @@ static long psb_unlocked_ioctl(struct file *filp, unsigned int cmd,
 
 static int psb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	return drm_get_pci_dev(pdev, ent, &driver);
-}
+	struct drm_device *dev;
+	int ret;
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	dev = drm_dev_alloc(&driver, &pdev->dev);
+	if (IS_ERR(dev)) {
+		ret = PTR_ERR(dev);
+		goto err_pci_disable_device;
+	}
+
+	dev->pdev = pdev;
+	pci_set_drvdata(pdev, dev);
+
+	ret = psb_driver_load(dev, ent->driver_data);
+	if (ret)
+		goto err_drm_dev_put;
 
+	ret = drm_dev_register(dev, ent->driver_data);
+	if (ret)
+		goto err_psb_driver_unload;
+
+	return 0;
+
+err_psb_driver_unload:
+	psb_driver_unload(dev);
+err_drm_dev_put:
+	drm_dev_put(dev);
+err_pci_disable_device:
+	pci_disable_device(pdev);
+	return ret;
+}
 
 static void psb_pci_remove(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
-	drm_put_dev(dev);
+
+	drm_dev_unregister(dev);
+	psb_driver_unload(dev);
+	drm_dev_put(dev);
 }
 
 static const struct dev_pm_ops psb_pm_ops = {
@@ -466,8 +500,6 @@ static const struct file_operations psb_gem_fops = {
 
 static struct drm_driver driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM,
-	.load = psb_driver_load,
-	.unload = psb_driver_unload,
 	.lastclose = drm_fb_helper_lastclose,
 
 	.num_ioctls = ARRAY_SIZE(psb_ioctls),
diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h
index 9b3c03f4a38d..3d4ef3071d45 100644
--- a/drivers/gpu/drm/gma500/psb_drv.h
+++ b/drivers/gpu/drm/gma500/psb_drv.h
@@ -229,6 +229,8 @@ enum {
 #define KSEL_BYPASS_25 6
 #define KSEL_BYPASS_83_100 7
 
+struct drm_fb_helper;
+
 struct opregion_header;
 struct opregion_acpi;
 struct opregion_swsci;
@@ -432,7 +434,7 @@ struct drm_psb_private {
 	struct pci_dev *lpc_pdev; /* Currently only used by mrst */
 	const struct psb_ops *ops;
 	const struct psb_offset *regmap;
-	
+
 	struct child_device_config *child_dev;
 	int child_dev_num;
 
@@ -540,7 +542,7 @@ struct drm_psb_private {
 
 	/* Oaktrail HDMI state */
 	struct oaktrail_hdmi_dev *hdmi_priv;
-	
+
 	/* Register state */
 	struct psb_save_area regs;
 
@@ -572,7 +574,7 @@ struct drm_psb_private {
 	uint32_t blc_adj1;
 	uint32_t blc_adj2;
 
-	void *fbdev;
+	struct drm_fb_helper *fb_helper;
 
 	/* 2D acceleration */
 	spinlock_t lock_2d;
diff --git a/drivers/gpu/drm/gma500/psb_intel_display.c b/drivers/gpu/drm/gma500/psb_intel_display.c
index 4256410535f0..fed3b563e62e 100644
--- a/drivers/gpu/drm/gma500/psb_intel_display.c
+++ b/drivers/gpu/drm/gma500/psb_intel_display.c
@@ -432,6 +432,7 @@ const struct drm_crtc_funcs psb_intel_crtc_funcs = {
 	.gamma_set = gma_crtc_gamma_set,
 	.set_config = gma_crtc_set_config,
 	.destroy = gma_crtc_destroy,
+	.page_flip = gma_crtc_page_flip,
 };
 
 const struct gma_clock_funcs psb_clock_funcs = {
diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h
index cdf10333d1c2..16c6136f778b 100644
--- a/drivers/gpu/drm/gma500/psb_intel_drv.h
+++ b/drivers/gpu/drm/gma500/psb_intel_drv.h
@@ -12,6 +12,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
 #include <linux/gpio.h>
 #include "gma_display.h"
 
@@ -182,6 +183,8 @@ struct gma_crtc {
 	struct psb_intel_crtc_state *crtc_state;
 
 	const struct gma_clock_funcs *clock_funcs;
+
+	struct drm_pending_vblank_event *page_flip_event;
 };
 
 #define to_gma_crtc(x)	\
diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c
index e6265fb85626..91f90016dba9 100644
--- a/drivers/gpu/drm/gma500/psb_irq.c
+++ b/drivers/gpu/drm/gma500/psb_irq.c
@@ -165,11 +165,23 @@ static void mid_pipe_event_handler(struct drm_device *dev, int pipe)
 		"%s, can't clear status bits for pipe %d, its value = 0x%x.\n",
 		__func__, pipe, PSB_RVDC32(pipe_stat_reg));
 
-	if (pipe_stat_val & PIPE_VBLANK_STATUS)
-		drm_handle_vblank(dev, pipe);
+	if (pipe_stat_val & PIPE_VBLANK_STATUS ||
+	    (IS_MFLD(dev) && pipe_stat_val & PIPE_TE_STATUS)) {
+		struct drm_crtc *crtc = drm_crtc_from_index(dev, pipe);
+		struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+		unsigned long flags;
 
-	if (pipe_stat_val & PIPE_TE_STATUS)
 		drm_handle_vblank(dev, pipe);
+
+		spin_lock_irqsave(&dev->event_lock, flags);
+		if (gma_crtc->page_flip_event) {
+			drm_crtc_send_vblank_event(crtc,
+						   gma_crtc->page_flip_event);
+			gma_crtc->page_flip_event = NULL;
+			drm_crtc_vblank_put(crtc);
+		}
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	}
 }
 
 /*
@@ -194,7 +206,6 @@ static void psb_sgx_interrupt(struct drm_device *dev, u32 stat_1, u32 stat_2)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	u32 val, addr;
-	int error = false;
 
 	if (stat_1 & _PSB_CE_TWOD_COMPLETE)
 		val = PSB_RSGX32(PSB_CR_2D_BLIT_STATUS);
@@ -229,7 +240,6 @@ static void psb_sgx_interrupt(struct drm_device *dev, u32 stat_1, u32 stat_2)
 
 			DRM_ERROR("\tMMU failing address is 0x%08x.\n",
 				  (unsigned int)addr);
-			error = true;
 		}
 	}
 
@@ -460,12 +470,11 @@ void psb_irq_turn_off_dpst(struct drm_device *dev)
 {
 	struct drm_psb_private *dev_priv =
 	    (struct drm_psb_private *) dev->dev_private;
-	u32 hist_reg;
 	u32 pwm_reg;
 
 	if (gma_power_begin(dev, false)) {
 		PSB_WVDC32(0x00000000, HISTOGRAM_INT_CONTROL);
-		hist_reg = PSB_RVDC32(HISTOGRAM_INT_CONTROL);
+		PSB_RVDC32(HISTOGRAM_INT_CONTROL);
 
 		psb_disable_pipestat(dev_priv, 0, PIPE_DPST_EVENT_ENABLE);
 
diff --git a/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c b/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c
index 7de3ce637c7f..9e8224456ea2 100644
--- a/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c
+++ b/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c
@@ -25,7 +25,7 @@
 #include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/platform_data/tc35876x.h>
+#include <linux/gpio/consumer.h>
 
 #include <asm/intel_scu_ipc.h>
 
@@ -36,6 +36,11 @@
 
 static struct i2c_client *tc35876x_client;
 static struct i2c_client *cmi_lcd_i2c_client;
+/* Panel GPIOs */
+static struct gpio_desc *bridge_reset;
+static struct gpio_desc *bridge_bl_enable;
+static struct gpio_desc *backlight_voltage;
+
 
 #define FLD_MASK(start, end)	(((1 << ((start) - (end) + 1)) - 1) << (end))
 #define FLD_VAL(val, start, end) (((val) << (end)) & FLD_MASK(start, end))
@@ -316,27 +321,23 @@ static int tc35876x_regr(struct i2c_client *client, u16 reg, u32 *value)
 
 void tc35876x_set_bridge_reset_state(struct drm_device *dev, int state)
 {
-	struct tc35876x_platform_data *pdata;
-
 	if (WARN(!tc35876x_client, "%s called before probe", __func__))
 		return;
 
 	dev_dbg(&tc35876x_client->dev, "%s: state %d\n", __func__, state);
 
-	pdata = dev_get_platdata(&tc35876x_client->dev);
-
-	if (pdata->gpio_bridge_reset == -1)
+	if (!bridge_reset)
 		return;
 
 	if (state) {
-		gpio_set_value_cansleep(pdata->gpio_bridge_reset, 0);
+		gpiod_set_value_cansleep(bridge_reset, 0);
 		mdelay(10);
 	} else {
 		/* Pull MIPI Bridge reset pin to Low */
-		gpio_set_value_cansleep(pdata->gpio_bridge_reset, 0);
+		gpiod_set_value_cansleep(bridge_reset, 0);
 		mdelay(20);
 		/* Pull MIPI Bridge reset pin to High */
-		gpio_set_value_cansleep(pdata->gpio_bridge_reset, 1);
+		gpiod_set_value_cansleep(bridge_reset, 1);
 		mdelay(40);
 	}
 }
@@ -510,25 +511,20 @@ void tc35876x_brightness_control(struct drm_device *dev, int level)
 
 void tc35876x_toshiba_bridge_panel_off(struct drm_device *dev)
 {
-	struct tc35876x_platform_data *pdata;
-
 	if (WARN(!tc35876x_client, "%s called before probe", __func__))
 		return;
 
 	dev_dbg(&tc35876x_client->dev, "%s\n", __func__);
 
-	pdata = dev_get_platdata(&tc35876x_client->dev);
-
-	if (pdata->gpio_panel_bl_en != -1)
-		gpio_set_value_cansleep(pdata->gpio_panel_bl_en, 0);
+	if (bridge_bl_enable)
+		gpiod_set_value_cansleep(bridge_bl_enable, 0);
 
-	if (pdata->gpio_panel_vadd != -1)
-		gpio_set_value_cansleep(pdata->gpio_panel_vadd, 0);
+	if (backlight_voltage)
+		gpiod_set_value_cansleep(backlight_voltage, 0);
 }
 
 void tc35876x_toshiba_bridge_panel_on(struct drm_device *dev)
 {
-	struct tc35876x_platform_data *pdata;
 	struct drm_psb_private *dev_priv = dev->dev_private;
 
 	if (WARN(!tc35876x_client, "%s called before probe", __func__))
@@ -536,10 +532,8 @@ void tc35876x_toshiba_bridge_panel_on(struct drm_device *dev)
 
 	dev_dbg(&tc35876x_client->dev, "%s\n", __func__);
 
-	pdata = dev_get_platdata(&tc35876x_client->dev);
-
-	if (pdata->gpio_panel_vadd != -1) {
-		gpio_set_value_cansleep(pdata->gpio_panel_vadd, 1);
+	if (backlight_voltage) {
+		gpiod_set_value_cansleep(backlight_voltage, 1);
 		msleep(260);
 	}
 
@@ -571,8 +565,8 @@ void tc35876x_toshiba_bridge_panel_on(struct drm_device *dev)
 				"i2c write failed (%d)\n", ret);
 	}
 
-	if (pdata->gpio_panel_bl_en != -1)
-		gpio_set_value_cansleep(pdata->gpio_panel_bl_en, 1);
+	if (bridge_bl_enable)
+		gpiod_set_value_cansleep(bridge_bl_enable, 1);
 
 	tc35876x_brightness_control(dev, dev_priv->brightness_adjusted);
 }
@@ -635,8 +629,6 @@ static int tc35876x_get_panel_info(struct drm_device *dev, int pipe,
 static int tc35876x_bridge_probe(struct i2c_client *client,
 				const struct i2c_device_id *id)
 {
-	struct tc35876x_platform_data *pdata;
-
 	dev_info(&client->dev, "%s\n", __func__);
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
@@ -645,26 +637,23 @@ static int tc35876x_bridge_probe(struct i2c_client *client,
 		return -ENODEV;
 	}
 
-	pdata = dev_get_platdata(&client->dev);
-	if (!pdata) {
-		dev_err(&client->dev, "%s: no platform data\n", __func__);
-		return -ENODEV;
-	}
+	bridge_reset = devm_gpiod_get_optional(&client->dev, "bridge-reset", GPIOD_OUT_LOW);
+	if (IS_ERR(bridge_reset))
+		return PTR_ERR(bridge_reset);
+	if (bridge_reset)
+		gpiod_set_consumer_name(bridge_reset, "tc35876x bridge reset");
 
-	if (pdata->gpio_bridge_reset != -1) {
-		gpio_request(pdata->gpio_bridge_reset, "tc35876x bridge reset");
-		gpio_direction_output(pdata->gpio_bridge_reset, 0);
-	}
-
-	if (pdata->gpio_panel_bl_en != -1) {
-		gpio_request(pdata->gpio_panel_bl_en, "tc35876x panel bl en");
-		gpio_direction_output(pdata->gpio_panel_bl_en, 0);
-	}
+	bridge_bl_enable = devm_gpiod_get_optional(&client->dev, "bl-en", GPIOD_OUT_LOW);
+	if (IS_ERR(bridge_bl_enable))
+		return PTR_ERR(bridge_bl_enable);
+	if (bridge_bl_enable)
+		gpiod_set_consumer_name(bridge_bl_enable, "tc35876x panel bl en");
 
-	if (pdata->gpio_panel_vadd != -1) {
-		gpio_request(pdata->gpio_panel_vadd, "tc35876x panel vadd");
-		gpio_direction_output(pdata->gpio_panel_vadd, 0);
-	}
+	backlight_voltage = devm_gpiod_get_optional(&client->dev, "vadd", GPIOD_OUT_LOW);
+	if (IS_ERR(backlight_voltage))
+		return PTR_ERR(backlight_voltage);
+	if (backlight_voltage)
+		gpiod_set_consumer_name(backlight_voltage, "tc35876x panel vadd");
 
 	tc35876x_client = client;
 
@@ -673,19 +662,8 @@ static int tc35876x_bridge_probe(struct i2c_client *client,
 
 static int tc35876x_bridge_remove(struct i2c_client *client)
 {
-	struct tc35876x_platform_data *pdata = dev_get_platdata(&client->dev);
-
 	dev_dbg(&client->dev, "%s\n", __func__);
 
-	if (pdata->gpio_bridge_reset != -1)
-		gpio_free(pdata->gpio_bridge_reset);
-
-	if (pdata->gpio_panel_bl_en != -1)
-		gpio_free(pdata->gpio_panel_bl_en);
-
-	if (pdata->gpio_panel_vadd != -1)
-		gpio_free(pdata->gpio_panel_vadd);
-
 	tc35876x_client = NULL;
 
 	return 0;
diff --git a/drivers/gpu/drm/hisilicon/hibmc/Kconfig b/drivers/gpu/drm/hisilicon/hibmc/Kconfig
index 35a3c5f0c38c..dfc5aef62f7b 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/Kconfig
+++ b/drivers/gpu/drm/hisilicon/hibmc/Kconfig
@@ -4,7 +4,8 @@ config DRM_HISI_HIBMC
 	depends on DRM && PCI && MMU && ARM64
 	select DRM_KMS_HELPER
 	select DRM_VRAM_HELPER
-
+	select DRM_TTM
+	select DRM_TTM_HELPER
 	help
 	  Choose this option if you have a Hisilicon Hibmc soc chipset.
 	  If M is selected the module will be called hibmc-drm.
diff --git a/drivers/gpu/drm/hisilicon/hibmc/Makefile b/drivers/gpu/drm/hisilicon/hibmc/Makefile
index 0c2d4296bccd..f99132715597 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/Makefile
+++ b/drivers/gpu/drm/hisilicon/hibmc/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_fbdev.o hibmc_ttm.o
+hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_ttm.o
 
 obj-$(CONFIG_DRM_HISI_HIBMC) += hibmc-drm.o
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c
index cc4c41748cfb..7fa7d4933f60 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c
@@ -96,29 +96,19 @@ static void hibmc_plane_atomic_update(struct drm_plane *plane,
 {
 	struct drm_plane_state	*state	= plane->state;
 	u32 reg;
-	int ret;
 	s64 gpu_addr = 0;
 	unsigned int line_l;
 	struct hibmc_drm_private *priv = plane->dev->dev_private;
-	struct hibmc_framebuffer *hibmc_fb;
 	struct drm_gem_vram_object *gbo;
 
 	if (!state->fb)
 		return;
 
-	hibmc_fb = to_hibmc_framebuffer(state->fb);
-	gbo = drm_gem_vram_of_gem(hibmc_fb->obj);
+	gbo = drm_gem_vram_of_gem(state->fb->obj[0]);
 
-	ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
-	if (ret) {
-		DRM_ERROR("failed to pin bo: %d", ret);
-		return;
-	}
 	gpu_addr = drm_gem_vram_offset(gbo);
-	if (gpu_addr < 0) {
-		drm_gem_vram_unpin(gbo);
-		return;
-	}
+	if (WARN_ON_ONCE(gpu_addr < 0))
+		return; /* Bug: we didn't pin the BO to VRAM in prepare_fb. */
 
 	writel(gpu_addr, priv->mmio + HIBMC_CRT_FB_ADDRESS);
 
@@ -157,6 +147,8 @@ static struct drm_plane_funcs hibmc_plane_funcs = {
 };
 
 static const struct drm_plane_helper_funcs hibmc_plane_helper_funcs = {
+	.prepare_fb	= drm_gem_vram_plane_helper_prepare_fb,
+	.cleanup_fb	= drm_gem_vram_plane_helper_cleanup_fb,
 	.atomic_check = hibmc_plane_atomic_check,
 	.atomic_update = hibmc_plane_atomic_update,
 };
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
index c103005b0a33..4a8a4cfb4b75 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
@@ -17,20 +17,17 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_vram_helper.h>
 #include <drm/drm_irq.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
-#include <drm/drm_vram_mm_helper.h>
 
 #include "hibmc_drm_drv.h"
 #include "hibmc_drm_regs.h"
 
-static const struct file_operations hibmc_fops = {
-	.owner		= THIS_MODULE,
-	DRM_VRAM_MM_FILE_OPERATIONS
-};
+DEFINE_DRM_GEM_FOPS(hibmc_fops);
 
 static irqreturn_t hibmc_drm_interrupt(int irq, void *arg)
 {
@@ -58,6 +55,7 @@ static struct drm_driver hibmc_driver = {
 	.desc			= "hibmc drm driver",
 	.major			= 1,
 	.minor			= 0,
+	.debugfs_init		= drm_vram_mm_debugfs_init,
 	.dumb_create            = hibmc_dumb_create,
 	.dumb_map_offset        = drm_gem_vram_driver_dumb_mmap_offset,
 	.gem_prime_mmap		= drm_gem_prime_mmap,
@@ -215,7 +213,7 @@ static int hibmc_hw_map(struct hibmc_drm_private *priv)
 
 	ioaddr = pci_resource_start(pdev, 1);
 	iosize = pci_resource_len(pdev, 1);
-	priv->mmio = devm_ioremap_nocache(dev->dev, ioaddr, iosize);
+	priv->mmio = devm_ioremap(dev->dev, ioaddr, iosize);
 	if (!priv->mmio) {
 		DRM_ERROR("Cannot map mmio region\n");
 		return -ENOMEM;
@@ -251,8 +249,6 @@ static int hibmc_unload(struct drm_device *dev)
 {
 	struct hibmc_drm_private *priv = dev->dev_private;
 
-	hibmc_fbdev_fini(priv);
-
 	drm_atomic_helper_shutdown(dev);
 
 	if (dev->irq_enabled)
@@ -311,7 +307,7 @@ static int hibmc_load(struct drm_device *dev)
 	/* reset all the states of crtc/plane/encoder/connector */
 	drm_mode_config_reset(dev);
 
-	ret = hibmc_fbdev_init(priv);
+	ret = drm_fbdev_generic_setup(dev, 16);
 	if (ret) {
 		DRM_ERROR("failed to initialize fbdev: %d\n", ret);
 		goto err;
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
index e58ecd7edcf8..50a0c1f9d211 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
@@ -18,18 +18,6 @@
 #include <drm/drm_framebuffer.h>
 
 struct drm_device;
-struct drm_gem_object;
-
-struct hibmc_framebuffer {
-	struct drm_framebuffer fb;
-	struct drm_gem_object *obj;
-};
-
-struct hibmc_fbdev {
-	struct drm_fb_helper helper; /* must be first */
-	struct hibmc_framebuffer *fb;
-	int size;
-};
 
 struct hibmc_drm_private {
 	/* hw */
@@ -42,13 +30,8 @@ struct hibmc_drm_private {
 	/* drm */
 	struct drm_device  *dev;
 	bool mode_config_initialized;
-
-	/* fbdev */
-	struct hibmc_fbdev *fbdev;
 };
 
-#define to_hibmc_framebuffer(x) container_of(x, struct hibmc_framebuffer, fb)
-
 void hibmc_set_power_mode(struct hibmc_drm_private *priv,
 			  unsigned int power_mode);
 void hibmc_set_current_gate(struct hibmc_drm_private *priv,
@@ -56,15 +39,6 @@ void hibmc_set_current_gate(struct hibmc_drm_private *priv,
 
 int hibmc_de_init(struct hibmc_drm_private *priv);
 int hibmc_vdac_init(struct hibmc_drm_private *priv);
-int hibmc_fbdev_init(struct hibmc_drm_private *priv);
-void hibmc_fbdev_fini(struct hibmc_drm_private *priv);
-
-int hibmc_gem_create(struct drm_device *dev, u32 size, bool iskernel,
-		     struct drm_gem_object **obj);
-struct hibmc_framebuffer *
-hibmc_framebuffer_init(struct drm_device *dev,
-		       const struct drm_mode_fb_cmd2 *mode_cmd,
-		       struct drm_gem_object *obj);
 
 int hibmc_mm_init(struct hibmc_drm_private *hibmc);
 void hibmc_mm_fini(struct hibmc_drm_private *hibmc);
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c
deleted file mode 100644
index b4c1cea051e8..000000000000
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* Hisilicon Hibmc SoC drm driver
- *
- * Based on the bochs drm driver.
- *
- * Copyright (c) 2016 Huawei Limited.
- *
- * Author:
- *	Rongrong Zou <zourongrong@huawei.com>
- *	Rongrong Zou <zourongrong@gmail.com>
- *	Jianhua Li <lijianhua@huawei.com>
- */
-
-#include <drm/drm_crtc.h>
-#include <drm/drm_fb_helper.h>
-#include <drm/drm_fourcc.h>
-#include <drm/drm_gem_vram_helper.h>
-#include <drm/drm_probe_helper.h>
-
-#include "hibmc_drm_drv.h"
-
-static int hibmcfb_create_object(
-				struct hibmc_drm_private *priv,
-				const struct drm_mode_fb_cmd2 *mode_cmd,
-				struct drm_gem_object **gobj_p)
-{
-	struct drm_gem_object *gobj;
-	struct drm_device *dev = priv->dev;
-	u32 size;
-	int ret = 0;
-
-	size = mode_cmd->pitches[0] * mode_cmd->height;
-	ret = hibmc_gem_create(dev, size, true, &gobj);
-	if (ret)
-		return ret;
-
-	*gobj_p = gobj;
-	return ret;
-}
-
-static struct fb_ops hibmc_drm_fb_ops = {
-	.owner = THIS_MODULE,
-	.fb_check_var = drm_fb_helper_check_var,
-	.fb_set_par = drm_fb_helper_set_par,
-	.fb_fillrect = drm_fb_helper_sys_fillrect,
-	.fb_copyarea = drm_fb_helper_sys_copyarea,
-	.fb_imageblit = drm_fb_helper_sys_imageblit,
-	.fb_pan_display = drm_fb_helper_pan_display,
-	.fb_blank = drm_fb_helper_blank,
-	.fb_setcmap = drm_fb_helper_setcmap,
-};
-
-static int hibmc_drm_fb_create(struct drm_fb_helper *helper,
-			       struct drm_fb_helper_surface_size *sizes)
-{
-	struct hibmc_fbdev *hi_fbdev =
-		container_of(helper, struct hibmc_fbdev, helper);
-	struct hibmc_drm_private *priv = helper->dev->dev_private;
-	struct fb_info *info;
-	struct drm_mode_fb_cmd2 mode_cmd;
-	struct drm_gem_object *gobj = NULL;
-	int ret = 0;
-	size_t size;
-	unsigned int bytes_per_pixel;
-	struct drm_gem_vram_object *gbo = NULL;
-	void *base;
-
-	DRM_DEBUG_DRIVER("surface width(%d), height(%d) and bpp(%d)\n",
-			 sizes->surface_width, sizes->surface_height,
-			 sizes->surface_bpp);
-
-	bytes_per_pixel = DIV_ROUND_UP(sizes->surface_bpp, 8);
-
-	mode_cmd.width = sizes->surface_width;
-	mode_cmd.height = sizes->surface_height;
-	mode_cmd.pitches[0] = mode_cmd.width * bytes_per_pixel;
-	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
-							  sizes->surface_depth);
-
-	size = PAGE_ALIGN(mode_cmd.pitches[0] * mode_cmd.height);
-
-	ret = hibmcfb_create_object(priv, &mode_cmd, &gobj);
-	if (ret) {
-		DRM_ERROR("failed to create fbcon backing object: %d\n", ret);
-		return -ENOMEM;
-	}
-
-	gbo = drm_gem_vram_of_gem(gobj);
-
-	ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
-	if (ret) {
-		DRM_ERROR("failed to pin fbcon: %d\n", ret);
-		goto out_unref_gem;
-	}
-
-	base = drm_gem_vram_kmap(gbo, true, NULL);
-	if (IS_ERR(base)) {
-		ret = PTR_ERR(base);
-		DRM_ERROR("failed to kmap fbcon: %d\n", ret);
-		goto out_unpin_bo;
-	}
-
-	info = drm_fb_helper_alloc_fbi(helper);
-	if (IS_ERR(info)) {
-		ret = PTR_ERR(info);
-		DRM_ERROR("failed to allocate fbi: %d\n", ret);
-		goto out_release_fbi;
-	}
-
-	hi_fbdev->fb = hibmc_framebuffer_init(priv->dev, &mode_cmd, gobj);
-	if (IS_ERR(hi_fbdev->fb)) {
-		ret = PTR_ERR(hi_fbdev->fb);
-		hi_fbdev->fb = NULL;
-		DRM_ERROR("failed to initialize framebuffer: %d\n", ret);
-		goto out_release_fbi;
-	}
-
-	priv->fbdev->size = size;
-	hi_fbdev->helper.fb = &hi_fbdev->fb->fb;
-
-	info->fbops = &hibmc_drm_fb_ops;
-
-	drm_fb_helper_fill_info(info, &priv->fbdev->helper, sizes);
-
-	info->screen_base = base;
-	info->screen_size = size;
-
-	info->fix.smem_start = gbo->bo.mem.bus.offset + gbo->bo.mem.bus.base;
-	info->fix.smem_len = size;
-	return 0;
-
-out_release_fbi:
-	drm_gem_vram_kunmap(gbo);
-out_unpin_bo:
-	drm_gem_vram_unpin(gbo);
-out_unref_gem:
-	drm_gem_object_put_unlocked(gobj);
-
-	return ret;
-}
-
-static void hibmc_fbdev_destroy(struct hibmc_fbdev *fbdev)
-{
-	struct hibmc_framebuffer *gfb = fbdev->fb;
-	struct drm_fb_helper *fbh = &fbdev->helper;
-
-	drm_fb_helper_unregister_fbi(fbh);
-
-	drm_fb_helper_fini(fbh);
-
-	if (gfb)
-		drm_framebuffer_put(&gfb->fb);
-}
-
-static const struct drm_fb_helper_funcs hibmc_fbdev_helper_funcs = {
-	.fb_probe = hibmc_drm_fb_create,
-};
-
-int hibmc_fbdev_init(struct hibmc_drm_private *priv)
-{
-	int ret;
-	struct fb_var_screeninfo *var;
-	struct fb_fix_screeninfo *fix;
-	struct hibmc_fbdev *hifbdev;
-
-	hifbdev = devm_kzalloc(priv->dev->dev, sizeof(*hifbdev), GFP_KERNEL);
-	if (!hifbdev) {
-		DRM_ERROR("failed to allocate hibmc_fbdev\n");
-		return -ENOMEM;
-	}
-
-	priv->fbdev = hifbdev;
-	drm_fb_helper_prepare(priv->dev, &hifbdev->helper,
-			      &hibmc_fbdev_helper_funcs);
-
-	/* Now just one crtc and one channel */
-	ret = drm_fb_helper_init(priv->dev, &hifbdev->helper, 1);
-	if (ret) {
-		DRM_ERROR("failed to initialize fb helper: %d\n", ret);
-		return ret;
-	}
-
-	ret = drm_fb_helper_single_add_all_connectors(&hifbdev->helper);
-	if (ret) {
-		DRM_ERROR("failed to add all connectors: %d\n", ret);
-		goto fini;
-	}
-
-	ret = drm_fb_helper_initial_config(&hifbdev->helper, 16);
-	if (ret) {
-		DRM_ERROR("failed to setup initial conn config: %d\n", ret);
-		goto fini;
-	}
-
-	var = &hifbdev->helper.fbdev->var;
-	fix = &hifbdev->helper.fbdev->fix;
-
-	DRM_DEBUG_DRIVER("Member of info->var is :\n"
-			 "xres=%d\n"
-			 "yres=%d\n"
-			 "xres_virtual=%d\n"
-			 "yres_virtual=%d\n"
-			 "xoffset=%d\n"
-			 "yoffset=%d\n"
-			 "bits_per_pixel=%d\n"
-			 "...\n", var->xres, var->yres, var->xres_virtual,
-			 var->yres_virtual, var->xoffset, var->yoffset,
-			 var->bits_per_pixel);
-	DRM_DEBUG_DRIVER("Member of info->fix is :\n"
-			 "smem_start=%lx\n"
-			 "smem_len=%d\n"
-			 "type=%d\n"
-			 "type_aux=%d\n"
-			 "visual=%d\n"
-			 "xpanstep=%d\n"
-			 "ypanstep=%d\n"
-			 "ywrapstep=%d\n"
-			 "line_length=%d\n"
-			 "accel=%d\n"
-			 "capabilities=%d\n"
-			 "...\n", fix->smem_start, fix->smem_len, fix->type,
-			 fix->type_aux, fix->visual, fix->xpanstep,
-			 fix->ypanstep, fix->ywrapstep, fix->line_length,
-			 fix->accel, fix->capabilities);
-
-	return 0;
-
-fini:
-	drm_fb_helper_fini(&hifbdev->helper);
-	return ret;
-}
-
-void hibmc_fbdev_fini(struct hibmc_drm_private *priv)
-{
-	if (!priv->fbdev)
-		return;
-
-	hibmc_fbdev_destroy(priv->fbdev);
-	priv->fbdev = NULL;
-}
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
index 9f6e473e6295..50b988fdd5cc 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
@@ -15,9 +15,9 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_gem.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_gem_vram_helper.h>
 #include <drm/drm_print.h>
-#include <drm/drm_vram_mm_helper.h>
 
 #include "hibmc_drm_drv.h"
 
@@ -29,7 +29,7 @@ int hibmc_mm_init(struct hibmc_drm_private *hibmc)
 
 	vmm = drm_vram_helper_alloc_mm(dev,
 				       pci_resource_start(dev->pdev, 0),
-				       hibmc->fb_size, &drm_gem_vram_mm_funcs);
+				       hibmc->fb_size);
 	if (IS_ERR(vmm)) {
 		ret = PTR_ERR(vmm);
 		DRM_ERROR("Error initializing VRAM MM; %d\n", ret);
@@ -47,125 +47,14 @@ void hibmc_mm_fini(struct hibmc_drm_private *hibmc)
 	drm_vram_helper_release_mm(hibmc->dev);
 }
 
-int hibmc_gem_create(struct drm_device *dev, u32 size, bool iskernel,
-		     struct drm_gem_object **obj)
-{
-	struct drm_gem_vram_object *gbo;
-	int ret;
-
-	*obj = NULL;
-
-	size = roundup(size, PAGE_SIZE);
-	if (size == 0)
-		return -EINVAL;
-
-	gbo = drm_gem_vram_create(dev, &dev->vram_mm->bdev, size, 0, false);
-	if (IS_ERR(gbo)) {
-		ret = PTR_ERR(gbo);
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("failed to allocate GEM object: %d\n", ret);
-		return ret;
-	}
-	*obj = &gbo->bo.base;
-	return 0;
-}
-
 int hibmc_dumb_create(struct drm_file *file, struct drm_device *dev,
 		      struct drm_mode_create_dumb *args)
 {
-	struct drm_gem_object *gobj;
-	u32 handle;
-	int ret;
-
-	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 16);
-	args->size = args->pitch * args->height;
-
-	ret = hibmc_gem_create(dev, args->size, false,
-			       &gobj);
-	if (ret) {
-		DRM_ERROR("failed to create GEM object: %d\n", ret);
-		return ret;
-	}
-
-	ret = drm_gem_handle_create(file, gobj, &handle);
-	drm_gem_object_put_unlocked(gobj);
-	if (ret) {
-		DRM_ERROR("failed to unreference GEM object: %d\n", ret);
-		return ret;
-	}
-
-	args->handle = handle;
-	return 0;
-}
-
-static void hibmc_user_framebuffer_destroy(struct drm_framebuffer *fb)
-{
-	struct hibmc_framebuffer *hibmc_fb = to_hibmc_framebuffer(fb);
-
-	drm_gem_object_put_unlocked(hibmc_fb->obj);
-	drm_framebuffer_cleanup(fb);
-	kfree(hibmc_fb);
-}
-
-static const struct drm_framebuffer_funcs hibmc_fb_funcs = {
-	.destroy = hibmc_user_framebuffer_destroy,
-};
-
-struct hibmc_framebuffer *
-hibmc_framebuffer_init(struct drm_device *dev,
-		       const struct drm_mode_fb_cmd2 *mode_cmd,
-		       struct drm_gem_object *obj)
-{
-	struct hibmc_framebuffer *hibmc_fb;
-	int ret;
-
-	hibmc_fb = kzalloc(sizeof(*hibmc_fb), GFP_KERNEL);
-	if (!hibmc_fb) {
-		DRM_ERROR("failed to allocate hibmc_fb\n");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	drm_helper_mode_fill_fb_struct(dev, &hibmc_fb->fb, mode_cmd);
-	hibmc_fb->obj = obj;
-	ret = drm_framebuffer_init(dev, &hibmc_fb->fb, &hibmc_fb_funcs);
-	if (ret) {
-		DRM_ERROR("drm_framebuffer_init failed: %d\n", ret);
-		kfree(hibmc_fb);
-		return ERR_PTR(ret);
-	}
-
-	return hibmc_fb;
-}
-
-static struct drm_framebuffer *
-hibmc_user_framebuffer_create(struct drm_device *dev,
-			      struct drm_file *filp,
-			      const struct drm_mode_fb_cmd2 *mode_cmd)
-{
-	struct drm_gem_object *obj;
-	struct hibmc_framebuffer *hibmc_fb;
-
-	DRM_DEBUG_DRIVER("%dx%d, format %c%c%c%c\n",
-			 mode_cmd->width, mode_cmd->height,
-			 (mode_cmd->pixel_format) & 0xff,
-			 (mode_cmd->pixel_format >> 8)  & 0xff,
-			 (mode_cmd->pixel_format >> 16) & 0xff,
-			 (mode_cmd->pixel_format >> 24) & 0xff);
-
-	obj = drm_gem_object_lookup(filp, mode_cmd->handles[0]);
-	if (!obj)
-		return ERR_PTR(-ENOENT);
-
-	hibmc_fb = hibmc_framebuffer_init(dev, mode_cmd, obj);
-	if (IS_ERR(hibmc_fb)) {
-		drm_gem_object_put_unlocked(obj);
-		return ERR_PTR((long)hibmc_fb);
-	}
-	return &hibmc_fb->fb;
+	return drm_gem_vram_fill_create_dumb(file, dev, 0, 16, args);
 }
 
 const struct drm_mode_config_funcs hibmc_mode_funcs = {
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = drm_atomic_helper_commit,
-	.fb_create = hibmc_user_framebuffer_create,
+	.fb_create = drm_gem_fb_create,
 };
diff --git a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
index 5bf8138941de..bdcf9c6ae9e9 100644
--- a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
+++ b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
@@ -18,6 +18,7 @@
 #include <linux/platform_device.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_device.h>
 #include <drm/drm_encoder_slave.h>
 #include <drm/drm_mipi_dsi.h>
diff --git a/drivers/gpu/drm/i2c/sil164_drv.c b/drivers/gpu/drm/i2c/sil164_drv.c
index 8bcf0d199145..a839f78a4c8a 100644
--- a/drivers/gpu/drm/i2c/sil164_drv.c
+++ b/drivers/gpu/drm/i2c/sil164_drv.c
@@ -44,7 +44,7 @@ struct sil164_priv {
 	((struct sil164_priv *)to_encoder_slave(x)->slave_priv)
 
 #define sil164_dbg(client, format, ...) do {				\
-		if (drm_debug & DRM_UT_KMS)				\
+		if (drm_debug_enabled(DRM_UT_KMS))			\
 			dev_printk(KERN_DEBUG, &client->dev,		\
 				   "%s: " format, __func__, ## __VA_ARGS__); \
 	} while (0)
diff --git a/drivers/gpu/drm/i2c/tda9950.c b/drivers/gpu/drm/i2c/tda9950.c
index 8039fc0d83db..5b03fdd1eaa4 100644
--- a/drivers/gpu/drm/i2c/tda9950.c
+++ b/drivers/gpu/drm/i2c/tda9950.c
@@ -420,7 +420,8 @@ static int tda9950_probe(struct i2c_client *client,
 		priv->hdmi = glue->parent;
 
 	priv->adap = cec_allocate_adapter(&tda9950_cec_ops, priv, "tda9950",
-					  CEC_CAP_DEFAULTS,
+					  CEC_CAP_DEFAULTS |
+					  CEC_CAP_CONNECTOR_INFO,
 					  CEC_MAX_LOG_ADDRS);
 	if (IS_ERR(priv->adap))
 		return PTR_ERR(priv->adap);
@@ -457,13 +458,14 @@ static int tda9950_probe(struct i2c_client *client,
 	if (ret < 0)
 		return ret;
 
-	priv->notify = cec_notifier_get(priv->hdmi);
+	priv->notify = cec_notifier_cec_adap_register(priv->hdmi, NULL,
+						      priv->adap);
 	if (!priv->notify)
 		return -ENOMEM;
 
 	ret = cec_register_adapter(priv->adap, priv->hdmi);
 	if (ret < 0) {
-		cec_notifier_put(priv->notify);
+		cec_notifier_cec_adap_unregister(priv->notify, priv->adap);
 		return ret;
 	}
 
@@ -473,8 +475,6 @@ static int tda9950_probe(struct i2c_client *client,
 	 */
 	devm_remove_action(dev, tda9950_cec_del, priv);
 
-	cec_register_cec_notifier(priv->adap, priv->notify);
-
 	return 0;
 }
 
@@ -482,8 +482,8 @@ static int tda9950_remove(struct i2c_client *client)
 {
 	struct tda9950_priv *priv = i2c_get_clientdata(client);
 
+	cec_notifier_cec_adap_unregister(priv->notify, priv->adap);
 	cec_unregister_adapter(priv->adap);
-	cec_notifier_put(priv->notify);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index 84c6d4c91c65..a63790d32d75 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -14,6 +14,7 @@
 #include <sound/hdmi-codec.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_of.h>
 #include <drm/drm_print.h>
@@ -805,8 +806,8 @@ static irqreturn_t tda998x_irq_thread(int irq, void *data)
 				tda998x_edid_delay_start(priv);
 			} else {
 				schedule_work(&priv->detect_work);
-				cec_notifier_set_phys_addr(priv->cec_notify,
-						   CEC_PHYS_ADDR_INVALID);
+				cec_notifier_phys_addr_invalidate(
+						priv->cec_notify);
 			}
 
 			handled = true;
@@ -1790,8 +1791,7 @@ static void tda998x_destroy(struct device *dev)
 
 	i2c_unregister_device(priv->cec);
 
-	if (priv->cec_notify)
-		cec_notifier_put(priv->cec_notify);
+	cec_notifier_conn_unregister(priv->cec_notify);
 }
 
 static int tda998x_create(struct device *dev)
@@ -1916,7 +1916,7 @@ static int tda998x_create(struct device *dev)
 		cec_write(priv, REG_CEC_RXSHPDINTENA, CEC_RXSHPDLEV_HPD);
 	}
 
-	priv->cec_notify = cec_notifier_get(dev);
+	priv->cec_notify = cec_notifier_conn_register(dev, NULL, NULL);
 	if (!priv->cec_notify) {
 		ret = -ENOMEM;
 		goto fail;
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index 2a77823b8e9a..b88c3d5f92b4 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -32,6 +32,7 @@
 
 #include <linux/delay.h>
 #include <linux/mman.h>
+#include <linux/pci.h>
 
 #include <drm/drm_agpsupport.h>
 #include <drm/drm_device.h>
@@ -39,7 +40,6 @@
 #include <drm/drm_file.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_irq.h>
-#include <drm/drm_pci.h>
 #include <drm/drm_print.h>
 #include <drm/i810_drm.h>
 
@@ -728,7 +728,7 @@ static void i810_dma_dispatch_vertex(struct drm_device *dev,
 	if (nbox > I810_NR_SAREA_CLIPRECTS)
 		nbox = I810_NR_SAREA_CLIPRECTS;
 
-	if (used > 4 * 1024)
+	if (used < 0 || used > 4 * 1024)
 		used = 0;
 
 	if (sarea_priv->dirty)
@@ -1048,7 +1048,7 @@ static void i810_dma_dispatch_mc(struct drm_device *dev, struct drm_buf *buf, in
 	if (u != I810_BUF_CLIENT)
 		DRM_DEBUG("MC found buffer that isn't mine!\n");
 
-	if (used > 4 * 1024)
+	if (used < 0 || used > 4 * 1024)
 		used = 0;
 
 	sarea_priv->dirty = 0x7f;
diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c
index 5dd26a06ee0e..0e53a066d4db 100644
--- a/drivers/gpu/drm/i810/i810_drv.c
+++ b/drivers/gpu/drm/i810/i810_drv.c
@@ -31,11 +31,12 @@
  */
 
 #include "i810_drv.h"
+
 #include <linux/module.h>
+#include <linux/pci.h>
 
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
-#include <drm/drm_pci.h>
 #include <drm/drm_pciids.h>
 #include <drm/i810_drm.h>
 
diff --git a/drivers/gpu/drm/i915/.gitignore b/drivers/gpu/drm/i915/.gitignore
new file mode 100644
index 000000000000..d9a77f3b59b2
--- /dev/null
+++ b/drivers/gpu/drm/i915/.gitignore
@@ -0,0 +1 @@
+*.hdrtest
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 0d21402945ab..ba9595960bbe 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -76,7 +76,7 @@ config DRM_I915_CAPTURE_ERROR
 	  This option enables capturing the GPU state when a hang is detected.
 	  This information is vital for triaging hangs and assists in debugging.
 	  Please report any hang to
-            https://bugs.freedesktop.org/enter_bug.cgi?product=DRI
+	    https://bugs.freedesktop.org/enter_bug.cgi?product=DRI
 	  for triaging.
 
 	  If in doubt, say "Y".
@@ -105,11 +105,11 @@ config DRM_I915_USERPTR
 	  If in doubt, say "Y".
 
 config DRM_I915_GVT
-        bool "Enable Intel GVT-g graphics virtualization host support"
-        depends on DRM_I915
-        depends on 64BIT
-        default n
-        help
+	bool "Enable Intel GVT-g graphics virtualization host support"
+	depends on DRM_I915
+	depends on 64BIT
+	default n
+	help
 	  Choose this option if you want to enable Intel GVT-g graphics
 	  virtualization technology host support with integrated graphics.
 	  With GVT-g, it's possible to have one integrated graphics
@@ -148,3 +148,9 @@ menu "drm/i915 Profile Guided Optimisation"
 	depends on DRM_I915
 	source "drivers/gpu/drm/i915/Kconfig.profile"
 endmenu
+
+menu "drm/i915 Unstable Evolution"
+	visible if EXPERT && STAGING && BROKEN
+	depends on DRM_I915
+	source "drivers/gpu/drm/i915/Kconfig.unstable"
+endmenu
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 00786a142ff0..1cb28c20807c 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -1,34 +1,33 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_I915_WERROR
-        bool "Force GCC to throw an error instead of a warning when compiling"
-        # As this may inadvertently break the build, only allow the user
-        # to shoot oneself in the foot iff they aim really hard
-        depends on EXPERT
-        # We use the dependency on !COMPILE_TEST to not be enabled in
-        # allmodconfig or allyesconfig configurations
-        depends on !COMPILE_TEST
-	select HEADER_TEST
-        default n
-        help
-          Add -Werror to the build flags for (and only for) i915.ko.
-          Do not enable this unless you are writing code for the i915.ko module.
-
-          Recommended for driver developers only.
-
-          If in doubt, say "N".
+	bool "Force GCC to throw an error instead of a warning when compiling"
+	# As this may inadvertently break the build, only allow the user
+	# to shoot oneself in the foot iff they aim really hard
+	depends on EXPERT
+	# We use the dependency on !COMPILE_TEST to not be enabled in
+	# allmodconfig or allyesconfig configurations
+	depends on !COMPILE_TEST
+	default n
+	help
+	  Add -Werror to the build flags for (and only for) i915.ko.
+	  Do not enable this unless you are writing code for the i915.ko module.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
 
 config DRM_I915_DEBUG
-        bool "Enable additional driver debugging"
-        depends on DRM_I915
-        select DEBUG_FS
-        select PREEMPT_COUNT
-        select REFCOUNT_FULL
-        select I2C_CHARDEV
-        select STACKDEPOT
-        select DRM_DP_AUX_CHARDEV
-        select X86_MSR # used by igt/pm_rpm
-        select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
-        select DRM_DEBUG_MM if DRM=y
+	bool "Enable additional driver debugging"
+	depends on DRM_I915
+	select DEBUG_FS
+	select PREEMPT_COUNT
+	select I2C_CHARDEV
+	select STACKDEPOT
+	select DRM_DP_AUX_CHARDEV
+	select X86_MSR # used by igt/pm_rpm
+	select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
+	select DRM_DEBUG_MM if DRM=y
+	select DRM_EXPORT_FOR_TESTS if m
 	select DRM_DEBUG_SELFTEST
 	select DMABUF_SELFTESTS
 	select SW_SYNC # signaling validation framework (igt/syncobj*)
@@ -36,14 +35,14 @@ config DRM_I915_DEBUG
 	select DRM_I915_SELFTEST
 	select DRM_I915_DEBUG_RUNTIME_PM
 	select DRM_I915_DEBUG_MMIO
-        default n
-        help
-          Choose this option to turn on extra driver debugging that may affect
-          performance but will catch some internal issues.
+	default n
+	help
+	  Choose this option to turn on extra driver debugging that may affect
+	  performance but will catch some internal issues.
 
-          Recommended for driver developers only.
+	  Recommended for driver developers only.
 
-          If in doubt, say "N".
+	  If in doubt, say "N".
 
 config DRM_I915_DEBUG_MMIO
 	bool "Always insert extra checks around mmio access by default"
@@ -59,16 +58,16 @@ config DRM_I915_DEBUG_MMIO
 	  If in doubt, say "N".
 
 config DRM_I915_DEBUG_GEM
-        bool "Insert extra checks into the GEM internals"
-        default n
-        depends on DRM_I915_WERROR
-        help
-          Enable extra sanity checks (including BUGs) along the GEM driver
-          paths that may slow the system down and if hit hang the machine.
+	bool "Insert extra checks into the GEM internals"
+	default n
+	depends on DRM_I915_WERROR
+	help
+	  Enable extra sanity checks (including BUGs) along the GEM driver
+	  paths that may slow the system down and if hit hang the machine.
 
-          Recommended for driver developers only.
+	  Recommended for driver developers only.
 
-          If in doubt, say "N".
+	  If in doubt, say "N".
 
 config DRM_I915_ERRLOG_GEM
 	bool "Insert extra logging (very verbose) for common GEM errors"
@@ -111,46 +110,47 @@ config DRM_I915_TRACE_GTT
 	  If in doubt, say "N".
 
 config DRM_I915_SW_FENCE_DEBUG_OBJECTS
-        bool "Enable additional driver debugging for fence objects"
-        depends on DRM_I915
-        select DEBUG_OBJECTS
-        default n
-        help
-          Choose this option to turn on extra driver debugging that may affect
-          performance but will catch some internal issues.
+	bool "Enable additional driver debugging for fence objects"
+	depends on DRM_I915
+	select DEBUG_OBJECTS
+	default n
+	help
+	  Choose this option to turn on extra driver debugging that may affect
+	  performance but will catch some internal issues.
 
-          Recommended for driver developers only.
+	  Recommended for driver developers only.
 
-          If in doubt, say "N".
+	  If in doubt, say "N".
 
 config DRM_I915_SW_FENCE_CHECK_DAG
-        bool "Enable additional driver debugging for detecting dependency cycles"
-        depends on DRM_I915
-        default n
-        help
-          Choose this option to turn on extra driver debugging that may affect
-          performance but will catch some internal issues.
+	bool "Enable additional driver debugging for detecting dependency cycles"
+	depends on DRM_I915
+	default n
+	help
+	  Choose this option to turn on extra driver debugging that may affect
+	  performance but will catch some internal issues.
 
-          Recommended for driver developers only.
+	  Recommended for driver developers only.
 
-          If in doubt, say "N".
+	  If in doubt, say "N".
 
 config DRM_I915_DEBUG_GUC
-        bool "Enable additional driver debugging for GuC"
-        depends on DRM_I915
-        default n
-        help
-          Choose this option to turn on extra driver debugging that may affect
-          performance but will help resolve GuC related issues.
+	bool "Enable additional driver debugging for GuC"
+	depends on DRM_I915
+	default n
+	help
+	  Choose this option to turn on extra driver debugging that may affect
+	  performance but will help resolve GuC related issues.
 
-          Recommended for driver developers only.
+	  Recommended for driver developers only.
 
-          If in doubt, say "N".
+	  If in doubt, say "N".
 
 config DRM_I915_SELFTEST
 	bool "Enable selftests upon driver load"
 	depends on DRM_I915
 	default n
+	select DRM_EXPORT_FOR_TESTS if m
 	select FAULT_INJECTION
 	select PRIME_NUMBERS
 	help
@@ -178,15 +178,15 @@ config DRM_I915_SELFTEST_BROKEN
 	  If in doubt, say "N".
 
 config DRM_I915_LOW_LEVEL_TRACEPOINTS
-        bool "Enable low level request tracing events"
-        depends on DRM_I915
-        default n
-        help
-          Choose this option to turn on low level request tracing events.
-          This provides the ability to precisely monitor engine utilisation
-          and also analyze the request dependency resolving timeline.
-
-          If in doubt, say "N".
+	bool "Enable low level request tracing events"
+	depends on DRM_I915
+	default n
+	help
+	  Choose this option to turn on low level request tracing events.
+	  This provides the ability to precisely monitor engine utilisation
+	  and also analyze the request dependency resolving timeline.
+
+	  If in doubt, say "N".
 
 config DRM_I915_DEBUG_VBLANK_EVADE
 	bool "Enable extra debug warnings for vblank evasion"
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 48df8889a88a..c280b6ae38eb 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -12,6 +12,29 @@ config DRM_I915_USERFAULT_AUTOSUSPEND
 	  May be 0 to disable the extra delay and solely use the device level
 	  runtime pm autosuspend delay tunable.
 
+config DRM_I915_HEARTBEAT_INTERVAL
+	int "Interval between heartbeat pulses (ms)"
+	default 2500 # milliseconds
+	help
+	  The driver sends a periodic heartbeat down all active engines to
+	  check the health of the GPU and undertake regular house-keeping of
+	  internal driver state.
+
+	  May be 0 to disable heartbeats and therefore disable automatic GPU
+	  hang detection.
+
+config DRM_I915_PREEMPT_TIMEOUT
+	int "Preempt timeout (ms, jiffy granularity)"
+	default 640 # milliseconds
+	help
+	  How long to wait (in milliseconds) for a preemption event to occur
+	  when submitting a new context via execlists. If the current context
+	  does not hit an arbitration point and yield to HW before the timer
+	  expires, the HW will be reset to allow the more important context
+	  to execute.
+
+	  May be 0 to disable the timeout.
+
 config DRM_I915_SPIN_REQUEST
 	int "Busywait for request completion (us)"
 	default 5 # microseconds
@@ -25,3 +48,29 @@ config DRM_I915_SPIN_REQUEST
 	  May be 0 to disable the initial spin. In practice, we estimate
 	  the cost of enabling the interrupt (if currently disabled) to be
 	  a few microseconds.
+
+config DRM_I915_STOP_TIMEOUT
+	int "How long to wait for an engine to quiesce gracefully before reset (ms)"
+	default 100 # milliseconds
+	help
+	  By stopping submission and sleeping for a short time before resetting
+	  the GPU, we allow the innocent contexts also on the system to quiesce.
+	  It is then less likely for a hanging context to cause collateral
+	  damage as the system is reset in order to recover. The corollary is
+	  that the reset itself may take longer and so be more disruptive to
+	  interactive or low latency workloads.
+
+config DRM_I915_TIMESLICE_DURATION
+	int "Scheduling quantum for userspace batches (ms, jiffy granularity)"
+	default 1 # milliseconds
+	help
+	  When two user batches of equal priority are executing, we will
+	  alternate execution of each batch to ensure forward progress of
+	  all users. This is necessary in some cases where there may be
+	  an implicit dependency between those batches that requires
+	  concurrent execution in order for them to proceed, e.g. they
+	  interact with each other via userspace semaphores. Each context
+	  is scheduled for execution for the timeslice duration, before
+	  switching to the next context.
+
+	  May be 0 to disable timeslicing.
diff --git a/drivers/gpu/drm/i915/Kconfig.unstable b/drivers/gpu/drm/i915/Kconfig.unstable
new file mode 100644
index 000000000000..0c2276155c2b
--- /dev/null
+++ b/drivers/gpu/drm/i915/Kconfig.unstable
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DRM_I915_UNSTABLE
+	bool "Enable unstable API for early prototype development"
+	depends on EXPERT
+	depends on STAGING
+	depends on BROKEN # should never be enabled by distros!
+	# We use the dependency on !COMPILE_TEST to not be enabled in
+	# allmodconfig or allyesconfig configurations
+	depends on !COMPILE_TEST
+	default n
+	help
+	  Enable prototype uAPI under general discussion before they are
+	  finalized. Such prototypes may be withdrawn or substantially
+	  changed before release. They are only enabled here so that a wide
+	  number of interested parties (userspace driver developers) can
+	  verify that the uAPI meet their expectations. These uAPI should
+	  never be used in production.
+
+	  Recommended for driver developers _only_.
+
+	  If in the slightest bit of doubt, say "N".
+
+config DRM_I915_UNSTABLE_FAKE_LMEM
+	bool "Enable the experimental fake lmem"
+	depends on DRM_I915_UNSTABLE
+	default n
+	help
+	  Convert some system memory into a fake local memory region for
+	  testing.
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 2587ea834f06..b8c5f8934dbd 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -31,9 +31,6 @@ CFLAGS_display/intel_fbdev.o = $(call cc-disable-warning, override-init)
 subdir-ccflags-y += \
 	$(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA)
 
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
-
 subdir-ccflags-y += -I$(srctree)/$(src)
 
 # Please keep these build lists sorted!
@@ -46,10 +43,12 @@ i915-y += i915_drv.o \
 	  i915_pci.o \
 	  i915_scatterlist.o \
 	  i915_suspend.o \
+	  i915_switcheroo.o \
 	  i915_sysfs.o \
 	  i915_utils.o \
 	  intel_csr.o \
 	  intel_device_info.o \
+	  intel_memory_region.o \
 	  intel_pch.o \
 	  intel_pm.o \
 	  intel_runtime_pm.o \
@@ -71,24 +70,36 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o display/intel_pipe_crc.o
 i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # "Graphics Technology" (aka we talk to the gpu)
-obj-y += gt/
 gt-y += \
+	gt/debugfs_engines.o \
+	gt/debugfs_gt.o \
+	gt/debugfs_gt_pm.o \
+	gt/gen6_ppgtt.o \
+	gt/gen8_ppgtt.o \
 	gt/intel_breadcrumbs.o \
 	gt/intel_context.o \
 	gt/intel_engine_cs.o \
-	gt/intel_engine_pool.o \
+	gt/intel_engine_heartbeat.o \
 	gt/intel_engine_pm.o \
+	gt/intel_engine_pool.o \
 	gt/intel_engine_user.o \
+	gt/intel_ggtt.o \
 	gt/intel_gt.o \
 	gt/intel_gt_irq.o \
 	gt/intel_gt_pm.o \
 	gt/intel_gt_pm_irq.o \
-	gt/intel_hangcheck.o \
+	gt/intel_gt_requests.o \
+	gt/intel_gtt.o \
+	gt/intel_llc.o \
 	gt/intel_lrc.o \
+	gt/intel_mocs.o \
+	gt/intel_ppgtt.o \
+	gt/intel_rc6.o \
 	gt/intel_renderstate.o \
 	gt/intel_reset.o \
-	gt/intel_ringbuffer.o \
-	gt/intel_mocs.o \
+	gt/intel_ring.o \
+	gt/intel_ring_submission.o \
+	gt/intel_rps.o \
 	gt/intel_sseu.o \
 	gt/intel_timeline.o \
 	gt/intel_workarounds.o
@@ -101,7 +112,6 @@ gt-y += \
 i915-y += $(gt-y)
 
 # GEM (Graphics Execution Management) code
-obj-y += gem/
 gem-y += \
 	gem/i915_gem_busy.o \
 	gem/i915_gem_clflush.o \
@@ -114,10 +124,12 @@ gem-y += \
 	gem/i915_gem_internal.o \
 	gem/i915_gem_object.o \
 	gem/i915_gem_object_blt.o \
+	gem/i915_gem_lmem.o \
 	gem/i915_gem_mman.o \
 	gem/i915_gem_pages.o \
 	gem/i915_gem_phys.o \
 	gem/i915_gem_pm.o \
+	gem/i915_gem_region.o \
 	gem/i915_gem_shmem.o \
 	gem/i915_gem_shrinker.o \
 	gem/i915_gem_stolen.o \
@@ -141,10 +153,10 @@ i915-y += \
 	  i915_scheduler.o \
 	  i915_trace_points.o \
 	  i915_vma.o \
+	  intel_region_lmem.o \
 	  intel_wopcm.o
 
 # general-purpose microcontroller (GuC) support
-obj-y += gt/uc/
 i915-y += gt/uc/intel_uc.o \
 	  gt/uc/intel_uc_fw.o \
 	  gt/uc/intel_guc.o \
@@ -157,7 +169,6 @@ i915-y += gt/uc/intel_uc.o \
 	  gt/uc/intel_huc_fw.o
 
 # modesetting core code
-obj-y += display/
 i915-y += \
 	display/intel_atomic.o \
 	display/intel_atomic_plane.o \
@@ -172,6 +183,7 @@ i915-y += \
 	display/intel_display_power.o \
 	display/intel_dpio_phy.o \
 	display/intel_dpll_mgr.o \
+	display/intel_dsb.o \
 	display/intel_fbc.o \
 	display/intel_fifo_underrun.o \
 	display/intel_frontbuffer.o \
@@ -182,7 +194,8 @@ i915-y += \
 	display/intel_psr.o \
 	display/intel_quirks.o \
 	display/intel_sprite.o \
-	display/intel_tc.o
+	display/intel_tc.o \
+	display/intel_vga.o
 i915-$(CONFIG_ACPI) += \
 	display/intel_acpi.o \
 	display/intel_opregion.o
@@ -220,7 +233,6 @@ i915-y += \
 	display/vlv_dsi_pll.o
 
 # perf code
-obj-y += oa/
 i915-y += \
 	oa/i915_oa_hsw.o \
 	oa/i915_oa_bdw.o \
@@ -235,7 +247,8 @@ i915-y += \
 	oa/i915_oa_cflgt2.o \
 	oa/i915_oa_cflgt3.o \
 	oa/i915_oa_cnl.o \
-	oa/i915_oa_icl.o
+	oa/i915_oa_icl.o \
+	oa/i915_oa_tgl.o
 i915-y += i915_perf.o
 
 # Post-mortem debug and GPU hang state capture
@@ -244,8 +257,10 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \
 	gem/selftests/igt_gem_utils.o \
 	selftests/i915_random.o \
 	selftests/i915_selftest.o \
+	selftests/igt_atomic.o \
 	selftests/igt_flush_test.o \
 	selftests/igt_live_test.o \
+	selftests/igt_mmap.o \
 	selftests/igt_reset.o \
 	selftests/igt_spinner.o
 
@@ -259,3 +274,27 @@ endif
 
 obj-$(CONFIG_DRM_I915) += i915.o
 obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o
+
+# header test
+
+# exclude some broken headers from the test coverage
+no-header-test := \
+	display/intel_vbt_defs.h \
+	gvt/execlist.h \
+	gvt/fb_decoder.h \
+	gvt/gtt.h \
+	gvt/gvt.h \
+	gvt/interrupt.h \
+	gvt/mmio_context.h \
+	gvt/mpt.h \
+	gvt/scheduler.h
+
+extra-$(CONFIG_DRM_I915_WERROR) += \
+	$(patsubst %.h,%.hdrtest, $(filter-out $(no-header-test), \
+		$(shell cd $(srctree)/$(src) && find * -name '*.h')))
+
+quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
+      cmd_hdrtest = $(CC) $(c_flags) -S -o /dev/null -x c /dev/null -include $<; touch $@
+
+$(obj)/%.hdrtest: $(src)/%.h FORCE
+	$(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/i915/display/Makefile b/drivers/gpu/drm/i915/display/Makefile
deleted file mode 100644
index 173c305d7866..000000000000
--- a/drivers/gpu/drm/i915/display/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
-header-test- := intel_vbt_defs.h
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index 6e398c33a524..f8e882101396 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -34,6 +34,7 @@
 #include "intel_ddi.h"
 #include "intel_dsi.h"
 #include "intel_panel.h"
+#include "intel_vdsc.h"
 
 static inline int header_credits_available(struct drm_i915_private *dev_priv,
 					   enum transcoder dsi_trans)
@@ -76,7 +77,7 @@ static enum transcoder dsi_port_to_transcoder(enum port port)
 static void wait_for_cmds_dispatched_to_panel(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct mipi_dsi_device *dsi;
 	enum port port;
 	enum transcoder dsi_trans;
@@ -201,7 +202,7 @@ static int dsi_send_pkt_payld(struct intel_dsi_host *host,
 static void dsi_program_swing_and_deemphasis(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum phy phy;
 	u32 tmp;
 	int lane;
@@ -266,7 +267,7 @@ static void configure_dual_link_mode(struct intel_encoder *encoder,
 				     const struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 dss_ctl1;
 
 	dss_ctl1 = I915_READ(DSS_CTL1);
@@ -276,7 +277,7 @@ static void configure_dual_link_mode(struct intel_encoder *encoder,
 
 	if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) {
 		const struct drm_display_mode *adjusted_mode =
-					&pipe_config->base.adjusted_mode;
+					&pipe_config->hw.adjusted_mode;
 		u32 dss_ctl2;
 		u16 hactive = adjusted_mode->crtc_hdisplay;
 		u16 dl_buffer_depth;
@@ -301,18 +302,31 @@ static void configure_dual_link_mode(struct intel_encoder *encoder,
 	I915_WRITE(DSS_CTL1, dss_ctl1);
 }
 
-static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder)
+/* aka DSI 8X clock */
+static int afe_clk(struct intel_encoder *encoder,
+		   const struct intel_crtc_state *crtc_state)
+{
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
+	int bpp;
+
+	if (crtc_state->dsc.compression_enable)
+		bpp = crtc_state->dsc.compressed_bpp;
+	else
+		bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
+
+	return DIV_ROUND_CLOSEST(intel_dsi->pclk * bpp, intel_dsi->lane_count);
+}
+
+static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder,
+					  const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
-	u32 bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
-	u32 afe_clk_khz; /* 8X Clock */
+	int afe_clk_khz;
 	u32 esc_clk_div_m;
 
-	afe_clk_khz = DIV_ROUND_CLOSEST(intel_dsi->pclk * bpp,
-					intel_dsi->lane_count);
-
+	afe_clk_khz = afe_clk(encoder, crtc_state);
 	esc_clk_div_m = DIV_ROUND_UP(afe_clk_khz, DSI_MAX_ESC_CLK);
 
 	for_each_dsi_port(port, intel_dsi->ports) {
@@ -346,7 +360,7 @@ static void get_dsi_io_power_domains(struct drm_i915_private *dev_priv,
 static void gen11_dsi_enable_io_power(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 tmp;
 
@@ -362,7 +376,7 @@ static void gen11_dsi_enable_io_power(struct intel_encoder *encoder)
 static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum phy phy;
 
 	for_each_dsi_phy(phy, intel_dsi->phys)
@@ -373,7 +387,7 @@ static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder)
 static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum phy phy;
 	u32 tmp;
 	int lane;
@@ -422,7 +436,7 @@ static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder)
 static void gen11_dsi_voltage_swing_program_seq(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum phy phy;
 
@@ -474,7 +488,7 @@ static void gen11_dsi_voltage_swing_program_seq(struct intel_encoder *encoder)
 static void gen11_dsi_enable_ddi_buffer(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum port port;
 
@@ -490,10 +504,12 @@ static void gen11_dsi_enable_ddi_buffer(struct intel_encoder *encoder)
 	}
 }
 
-static void gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder)
+static void
+gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder,
+			     const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum port port;
 	enum phy phy;
@@ -531,7 +547,7 @@ static void gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder)
 	 * leave all fields at HW default values.
 	 */
 	if (IS_GEN(dev_priv, 11)) {
-		if (intel_dsi_bitrate(intel_dsi) <= 800000) {
+		if (afe_clk(encoder, crtc_state) <= 800000) {
 			for_each_dsi_port(port, intel_dsi->ports) {
 				tmp = I915_READ(DPHY_TA_TIMING_PARAM(port));
 				tmp &= ~TA_SURE_MASK;
@@ -559,7 +575,7 @@ static void gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder)
 static void gen11_dsi_gate_clocks(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum phy phy;
 
@@ -575,7 +591,7 @@ static void gen11_dsi_gate_clocks(struct intel_encoder *encoder)
 static void gen11_dsi_ungate_clocks(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum phy phy;
 
@@ -592,7 +608,7 @@ static void gen11_dsi_map_pll(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct intel_shared_dpll *pll = crtc_state->shared_dpll;
 	enum phy phy;
 	u32 val;
@@ -624,8 +640,8 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder,
 			       const struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
-	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
+	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	enum pipe pipe = intel_crtc->pipe;
 	u32 tmp;
 	enum port port;
@@ -641,7 +657,7 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder,
 			tmp |= EOTP_DISABLED;
 
 		/* enable link calibration if freq > 1.5Gbps */
-		if (intel_dsi_bitrate(intel_dsi) >= 1500 * 1000) {
+		if (afe_clk(encoder, pipe_config) >= 1500 * 1000) {
 			tmp &= ~LINK_CALIBRATION_MASK;
 			tmp |= CALIBRATION_ENABLED_INITIAL_ONLY;
 		}
@@ -667,22 +683,26 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder,
 
 		/* select pixel format */
 		tmp &= ~PIX_FMT_MASK;
-		switch (intel_dsi->pixel_format) {
-		default:
-			MISSING_CASE(intel_dsi->pixel_format);
-			/* fallthrough */
-		case MIPI_DSI_FMT_RGB565:
-			tmp |= PIX_FMT_RGB565;
-			break;
-		case MIPI_DSI_FMT_RGB666_PACKED:
-			tmp |= PIX_FMT_RGB666_PACKED;
-			break;
-		case MIPI_DSI_FMT_RGB666:
-			tmp |= PIX_FMT_RGB666_LOOSE;
-			break;
-		case MIPI_DSI_FMT_RGB888:
-			tmp |= PIX_FMT_RGB888;
-			break;
+		if (pipe_config->dsc.compression_enable) {
+			tmp |= PIX_FMT_COMPRESSED;
+		} else {
+			switch (intel_dsi->pixel_format) {
+			default:
+				MISSING_CASE(intel_dsi->pixel_format);
+				/* fallthrough */
+			case MIPI_DSI_FMT_RGB565:
+				tmp |= PIX_FMT_RGB565;
+				break;
+			case MIPI_DSI_FMT_RGB666_PACKED:
+				tmp |= PIX_FMT_RGB666_PACKED;
+				break;
+			case MIPI_DSI_FMT_RGB666:
+				tmp |= PIX_FMT_RGB666_LOOSE;
+				break;
+			case MIPI_DSI_FMT_RGB888:
+				tmp |= PIX_FMT_RGB888;
+				break;
+			}
 		}
 
 		if (INTEL_GEN(dev_priv) >= 12) {
@@ -745,6 +765,9 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder,
 		case PIPE_C:
 			tmp |= TRANS_DDI_EDP_INPUT_C_ONOFF;
 			break;
+		case PIPE_D:
+			tmp |= TRANS_DDI_EDP_INPUT_D_ONOFF;
+			break;
 		}
 
 		/* enable DDI buffer */
@@ -763,12 +786,12 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder,
 
 static void
 gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder,
-				 const struct intel_crtc_state *pipe_config)
+				 const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	const struct drm_display_mode *adjusted_mode =
-					&pipe_config->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	enum port port;
 	enum transcoder dsi_trans;
 	/* horizontal timings */
@@ -776,11 +799,25 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder,
 	u16 hback_porch;
 	/* vertical timings */
 	u16 vtotal, vactive, vsync_start, vsync_end, vsync_shift;
+	int mul = 1, div = 1;
+
+	/*
+	 * Adjust horizontal timings (htotal, hsync_start, hsync_end) to account
+	 * for slower link speed if DSC is enabled.
+	 *
+	 * The compression frequency ratio is the ratio between compressed and
+	 * non-compressed link speeds, and simplifies down to the ratio between
+	 * compressed and non-compressed bpp.
+	 */
+	if (crtc_state->dsc.compression_enable) {
+		mul = crtc_state->dsc.compressed_bpp;
+		div = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
+	}
 
 	hactive = adjusted_mode->crtc_hdisplay;
-	htotal = adjusted_mode->crtc_htotal;
-	hsync_start = adjusted_mode->crtc_hsync_start;
-	hsync_end = adjusted_mode->crtc_hsync_end;
+	htotal = DIV_ROUND_UP(adjusted_mode->crtc_htotal * mul, div);
+	hsync_start = DIV_ROUND_UP(adjusted_mode->crtc_hsync_start * mul, div);
+	hsync_end = DIV_ROUND_UP(adjusted_mode->crtc_hsync_end * mul, div);
 	hsync_size  = hsync_end - hsync_start;
 	hback_porch = (adjusted_mode->crtc_htotal -
 		       adjusted_mode->crtc_hsync_end);
@@ -886,7 +923,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder,
 static void gen11_dsi_enable_transcoder(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	enum transcoder dsi_trans;
 	u32 tmp;
@@ -904,10 +941,11 @@ static void gen11_dsi_enable_transcoder(struct intel_encoder *encoder)
 	}
 }
 
-static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder)
+static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder,
+				     const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	enum transcoder dsi_trans;
 	u32 tmp, hs_tx_timeout, lp_rx_timeout, ta_timeout, divisor, mul;
@@ -919,7 +957,7 @@ static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder)
 	 * TIME_NS = (BYTE_CLK_COUNT * 8 * 10^6)/ Bitrate
 	 * ESCAPE_CLK_COUNT  = TIME_NS/ESC_CLK_NS
 	 */
-	divisor = intel_dsi_tlpx_ns(intel_dsi) * intel_dsi_bitrate(intel_dsi) * 1000;
+	divisor = intel_dsi_tlpx_ns(intel_dsi) * afe_clk(encoder, crtc_state) * 1000;
 	mul = 8 * 1000000;
 	hs_tx_timeout = DIV_ROUND_UP(intel_dsi->hs_tx_timeout * mul,
 				     divisor);
@@ -955,7 +993,7 @@ static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder)
 
 static void
 gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder,
-			      const struct intel_crtc_state *pipe_config)
+			      const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 
@@ -972,13 +1010,13 @@ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder,
 	gen11_dsi_enable_ddi_buffer(encoder);
 
 	/* setup D-PHY timings */
-	gen11_dsi_setup_dphy_timings(encoder);
+	gen11_dsi_setup_dphy_timings(encoder, crtc_state);
 
 	/* step 4h: setup DSI protocol timeouts */
-	gen11_dsi_setup_timeouts(encoder);
+	gen11_dsi_setup_timeouts(encoder, crtc_state);
 
 	/* Step (4h, 4i, 4j, 4k): Configure transcoder */
-	gen11_dsi_configure_transcoder(encoder, pipe_config);
+	gen11_dsi_configure_transcoder(encoder, crtc_state);
 
 	/* Step 4l: Gate DDI clocks */
 	if (IS_GEN(dev_priv, 11))
@@ -988,7 +1026,7 @@ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder,
 static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct mipi_dsi_device *dsi;
 	enum port port;
 	enum transcoder dsi_trans;
@@ -1025,21 +1063,21 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
 }
 
 static void gen11_dsi_pre_pll_enable(struct intel_encoder *encoder,
-				     const struct intel_crtc_state *pipe_config,
+				     const struct intel_crtc_state *crtc_state,
 				     const struct drm_connector_state *conn_state)
 {
 	/* step2: enable IO power */
 	gen11_dsi_enable_io_power(encoder);
 
 	/* step3: enable DSI PLL */
-	gen11_dsi_program_esc_clk_div(encoder);
+	gen11_dsi_program_esc_clk_div(encoder, crtc_state);
 }
 
 static void gen11_dsi_pre_enable(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *pipe_config,
 				 const struct drm_connector_state *conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 
 	/* step3b */
 	gen11_dsi_map_pll(encoder, pipe_config);
@@ -1050,6 +1088,8 @@ static void gen11_dsi_pre_enable(struct intel_encoder *encoder,
 	/* step5: program and powerup panel */
 	gen11_dsi_powerup_panel(encoder);
 
+	intel_dsc_enable(encoder, pipe_config);
+
 	/* step6c: configure transcoder timings */
 	gen11_dsi_set_transcoder_timings(encoder, pipe_config);
 
@@ -1064,7 +1104,7 @@ static void gen11_dsi_pre_enable(struct intel_encoder *encoder,
 static void gen11_dsi_disable_transcoder(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	enum transcoder dsi_trans;
 	u32 tmp;
@@ -1086,7 +1126,7 @@ static void gen11_dsi_disable_transcoder(struct intel_encoder *encoder)
 
 static void gen11_dsi_powerdown_panel(struct intel_encoder *encoder)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF);
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET);
@@ -1099,7 +1139,7 @@ static void gen11_dsi_powerdown_panel(struct intel_encoder *encoder)
 static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	enum transcoder dsi_trans;
 	u32 tmp;
@@ -1140,7 +1180,7 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder)
 static void gen11_dsi_disable_port(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum port port;
 
@@ -1162,7 +1202,7 @@ static void gen11_dsi_disable_port(struct intel_encoder *encoder)
 static void gen11_dsi_disable_io_power(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 tmp;
 
@@ -1189,7 +1229,7 @@ static void gen11_dsi_disable(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 
 	/* step1: turn off backlight */
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF);
@@ -1211,12 +1251,42 @@ static void gen11_dsi_disable(struct intel_encoder *encoder,
 	gen11_dsi_disable_io_power(encoder);
 }
 
+static void gen11_dsi_post_disable(struct intel_encoder *encoder,
+				   const struct intel_crtc_state *old_crtc_state,
+				   const struct drm_connector_state *old_conn_state)
+{
+	intel_crtc_vblank_off(old_crtc_state);
+
+	intel_dsc_disable(old_crtc_state);
+
+	skl_scaler_disable(old_crtc_state);
+}
+
+static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector,
+						 struct drm_display_mode *mode)
+{
+	/* FIXME: DSC? */
+	return intel_dsi_mode_valid(connector, mode);
+}
+
 static void gen11_dsi_get_timings(struct intel_encoder *encoder,
 				  struct intel_crtc_state *pipe_config)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct drm_display_mode *adjusted_mode =
-					&pipe_config->base.adjusted_mode;
+					&pipe_config->hw.adjusted_mode;
+
+	if (pipe_config->dsc.compressed_bpp) {
+		int div = pipe_config->dsc.compressed_bpp;
+		int mul = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
+
+		adjusted_mode->crtc_htotal =
+			DIV_ROUND_UP(adjusted_mode->crtc_htotal * mul, div);
+		adjusted_mode->crtc_hsync_start =
+			DIV_ROUND_UP(adjusted_mode->crtc_hsync_start * mul, div);
+		adjusted_mode->crtc_hsync_end =
+			DIV_ROUND_UP(adjusted_mode->crtc_hsync_end * mul, div);
+	}
 
 	if (intel_dsi->dual_link) {
 		adjusted_mode->crtc_hdisplay *= 2;
@@ -1242,22 +1312,66 @@ static void gen11_dsi_get_config(struct intel_encoder *encoder,
 				 struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
+
+	intel_dsc_get_config(encoder, pipe_config);
 
 	/* FIXME: adapt icl_ddi_clock_get() for DSI and use that? */
 	pipe_config->port_clock =
 		cnl_calc_wrpll_link(dev_priv, &pipe_config->dpll_hw_state);
 
-	pipe_config->base.adjusted_mode.crtc_clock = intel_dsi->pclk;
+	pipe_config->hw.adjusted_mode.crtc_clock = intel_dsi->pclk;
 	if (intel_dsi->dual_link)
-		pipe_config->base.adjusted_mode.crtc_clock *= 2;
+		pipe_config->hw.adjusted_mode.crtc_clock *= 2;
 
 	gen11_dsi_get_timings(encoder, pipe_config);
 	pipe_config->output_types |= BIT(INTEL_OUTPUT_DSI);
 	pipe_config->pipe_bpp = bdw_get_pipemisc_bpp(crtc);
 }
 
+static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder,
+					struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
+	int dsc_max_bpc = INTEL_GEN(dev_priv) >= 12 ? 12 : 10;
+	bool use_dsc;
+	int ret;
+
+	use_dsc = intel_bios_get_dsc_params(encoder, crtc_state, dsc_max_bpc);
+	if (!use_dsc)
+		return 0;
+
+	if (crtc_state->pipe_bpp < 8 * 3)
+		return -EINVAL;
+
+	/* FIXME: split only when necessary */
+	if (crtc_state->dsc.slice_count > 1)
+		crtc_state->dsc.dsc_split = true;
+
+	vdsc_cfg->convert_rgb = true;
+
+	ret = intel_dsc_compute_params(encoder, crtc_state);
+	if (ret)
+		return ret;
+
+	/* DSI specific sanity checks on the common code */
+	WARN_ON(vdsc_cfg->vbr_enable);
+	WARN_ON(vdsc_cfg->simple_422);
+	WARN_ON(vdsc_cfg->pic_width % vdsc_cfg->slice_width);
+	WARN_ON(vdsc_cfg->slice_height < 8);
+	WARN_ON(vdsc_cfg->pic_height % vdsc_cfg->slice_height);
+
+	ret = drm_dsc_compute_rc_parameters(vdsc_cfg);
+	if (ret)
+		return ret;
+
+	crtc_state->dsc.compression_enable = true;
+
+	return 0;
+}
+
 static int gen11_dsi_compute_config(struct intel_encoder *encoder,
 				    struct intel_crtc_state *pipe_config,
 				    struct drm_connector_state *conn_state)
@@ -1265,11 +1379,11 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder,
 	struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi,
 						   base);
 	struct intel_connector *intel_connector = intel_dsi->attached_connector;
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	const struct drm_display_mode *fixed_mode =
 					intel_connector->panel.fixed_mode;
 	struct drm_display_mode *adjusted_mode =
-					&pipe_config->base.adjusted_mode;
+					&pipe_config->hw.adjusted_mode;
 
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
 	intel_fixed_panel_mode(fixed_mode, adjusted_mode);
@@ -1283,8 +1397,17 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder,
 	else
 		pipe_config->cpu_transcoder = TRANSCODER_DSI_0;
 
+	if (intel_dsi->pixel_format == MIPI_DSI_FMT_RGB888)
+		pipe_config->pipe_bpp = 24;
+	else
+		pipe_config->pipe_bpp = 18;
+
 	pipe_config->clock_set = true;
-	pipe_config->port_clock = intel_dsi_bitrate(intel_dsi) / 5;
+
+	if (gen11_dsi_dsc_compute_config(encoder, pipe_config))
+		DRM_DEBUG_KMS("Attempting to use DSC failed\n");
+
+	pipe_config->port_clock = afe_clk(encoder, pipe_config) / 5;
 
 	return 0;
 }
@@ -1292,15 +1415,21 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder,
 static void gen11_dsi_get_power_domains(struct intel_encoder *encoder,
 					struct intel_crtc_state *crtc_state)
 {
-	get_dsi_io_power_domains(to_i915(encoder->base.dev),
-				 enc_to_intel_dsi(&encoder->base));
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+
+	get_dsi_io_power_domains(i915,
+				 enc_to_intel_dsi(encoder));
+
+	if (crtc_state->dsc.compression_enable)
+		intel_display_power_get(i915,
+					intel_dsc_power_domain(crtc_state));
 }
 
 static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder,
 				   enum pipe *pipe)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum transcoder dsi_trans;
 	intel_wakeref_t wakeref;
 	enum port port;
@@ -1325,6 +1454,9 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder,
 		case TRANS_DDI_EDP_INPUT_C_ONOFF:
 			*pipe = PIPE_C;
 			break;
+		case TRANS_DDI_EDP_INPUT_D_ONOFF:
+			*pipe = PIPE_D;
+			break;
 		default:
 			DRM_ERROR("Invalid PIPE input\n");
 			goto out;
@@ -1360,7 +1492,7 @@ static const struct drm_connector_funcs gen11_dsi_connector_funcs = {
 
 static const struct drm_connector_helper_funcs gen11_dsi_connector_helper_funcs = {
 	.get_modes = intel_dsi_get_modes,
-	.mode_valid = intel_dsi_mode_valid,
+	.mode_valid = gen11_dsi_mode_valid,
 	.atomic_check = intel_digital_connector_atomic_check,
 };
 
@@ -1577,6 +1709,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv)
 	encoder->pre_pll_enable = gen11_dsi_pre_pll_enable;
 	encoder->pre_enable = gen11_dsi_pre_enable;
 	encoder->disable = gen11_dsi_disable;
+	encoder->post_disable = gen11_dsi_post_disable;
 	encoder->port = port;
 	encoder->get_config = gen11_dsi_get_config;
 	encoder->update_pipe = intel_panel_update_backlight;
@@ -1584,7 +1717,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv)
 	encoder->get_hw_state = gen11_dsi_get_hw_state;
 	encoder->type = INTEL_OUTPUT_DSI;
 	encoder->cloneable = 0;
-	encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C);
+	encoder->pipe_mask = ~0;
 	encoder->power_domain = POWER_DOMAIN_PORT_DSI;
 	encoder->get_power_domains = gen11_dsi_get_power_domains;
 
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c
index 7cb2257bbb93..c362eecdd414 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -37,6 +37,7 @@
 #include "intel_atomic.h"
 #include "intel_display_types.h"
 #include "intel_hdcp.h"
+#include "intel_psr.h"
 #include "intel_sprite.h"
 
 /**
@@ -129,6 +130,7 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn,
 	struct drm_crtc_state *crtc_state;
 
 	intel_hdcp_atomic_check(conn, old_state, new_state);
+	intel_psr_atomic_check(conn, old_state, new_state);
 
 	if (!new_state->crtc)
 		return 0;
@@ -175,6 +177,38 @@ intel_digital_connector_duplicate_state(struct drm_connector *connector)
 }
 
 /**
+ * intel_connector_needs_modeset - check if connector needs a modeset
+ */
+bool
+intel_connector_needs_modeset(struct intel_atomic_state *state,
+			      struct drm_connector *connector)
+{
+	const struct drm_connector_state *old_conn_state, *new_conn_state;
+
+	old_conn_state = drm_atomic_get_old_connector_state(&state->base, connector);
+	new_conn_state = drm_atomic_get_new_connector_state(&state->base, connector);
+
+	return old_conn_state->crtc != new_conn_state->crtc ||
+	       (new_conn_state->crtc &&
+		drm_atomic_crtc_needs_modeset(drm_atomic_get_new_crtc_state(&state->base,
+									    new_conn_state->crtc)));
+}
+
+struct intel_digital_connector_state *
+intel_atomic_get_digital_connector_state(struct intel_atomic_state *state,
+					 struct intel_connector *connector)
+{
+	struct drm_connector_state *conn_state;
+
+	conn_state = drm_atomic_get_connector_state(&state->base,
+						    &connector->base);
+	if (IS_ERR(conn_state))
+		return ERR_CAST(conn_state);
+
+	return to_intel_digital_connector_state(conn_state);
+}
+
+/**
  * intel_crtc_duplicate_state - duplicate crtc state
  * @crtc: drm crtc
  *
@@ -186,27 +220,57 @@ intel_digital_connector_duplicate_state(struct drm_connector *connector)
 struct drm_crtc_state *
 intel_crtc_duplicate_state(struct drm_crtc *crtc)
 {
+	const struct intel_crtc_state *old_crtc_state = to_intel_crtc_state(crtc->state);
 	struct intel_crtc_state *crtc_state;
 
-	crtc_state = kmemdup(crtc->state, sizeof(*crtc_state), GFP_KERNEL);
+	crtc_state = kmemdup(old_crtc_state, sizeof(*crtc_state), GFP_KERNEL);
 	if (!crtc_state)
 		return NULL;
 
-	__drm_atomic_helper_crtc_duplicate_state(crtc, &crtc_state->base);
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &crtc_state->uapi);
+
+	/* copy color blobs */
+	if (crtc_state->hw.degamma_lut)
+		drm_property_blob_get(crtc_state->hw.degamma_lut);
+	if (crtc_state->hw.ctm)
+		drm_property_blob_get(crtc_state->hw.ctm);
+	if (crtc_state->hw.gamma_lut)
+		drm_property_blob_get(crtc_state->hw.gamma_lut);
 
 	crtc_state->update_pipe = false;
 	crtc_state->disable_lp_wm = false;
 	crtc_state->disable_cxsr = false;
 	crtc_state->update_wm_pre = false;
 	crtc_state->update_wm_post = false;
-	crtc_state->fb_changed = false;
 	crtc_state->fifo_changed = false;
 	crtc_state->preload_luts = false;
 	crtc_state->wm.need_postvbl_update = false;
 	crtc_state->fb_bits = 0;
 	crtc_state->update_planes = 0;
 
-	return &crtc_state->base;
+	return &crtc_state->uapi;
+}
+
+static void intel_crtc_put_color_blobs(struct intel_crtc_state *crtc_state)
+{
+	drm_property_blob_put(crtc_state->hw.degamma_lut);
+	drm_property_blob_put(crtc_state->hw.gamma_lut);
+	drm_property_blob_put(crtc_state->hw.ctm);
+}
+
+void intel_crtc_free_hw_state(struct intel_crtc_state *crtc_state)
+{
+	intel_crtc_put_color_blobs(crtc_state);
+}
+
+void intel_crtc_copy_color_blobs(struct intel_crtc_state *crtc_state)
+{
+	drm_property_replace_blob(&crtc_state->hw.degamma_lut,
+				  crtc_state->uapi.degamma_lut);
+	drm_property_replace_blob(&crtc_state->hw.gamma_lut,
+				  crtc_state->uapi.gamma_lut);
+	drm_property_replace_blob(&crtc_state->hw.ctm,
+				  crtc_state->uapi.ctm);
 }
 
 /**
@@ -221,7 +285,11 @@ void
 intel_crtc_destroy_state(struct drm_crtc *crtc,
 			 struct drm_crtc_state *state)
 {
-	drm_atomic_helper_crtc_destroy_state(crtc, state);
+	struct intel_crtc_state *crtc_state = to_intel_crtc_state(state);
+
+	__drm_atomic_helper_crtc_destroy_state(&crtc_state->uapi);
+	intel_crtc_free_hw_state(crtc_state);
+	kfree(crtc_state);
 }
 
 static void intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_state,
@@ -250,10 +318,10 @@ static void intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_sta
 		return;
 
 	/* set scaler mode */
-	if (plane_state && plane_state->base.fb &&
-	    plane_state->base.fb->format->is_yuv &&
-	    plane_state->base.fb->format->num_planes > 1) {
-		struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	if (plane_state && plane_state->hw.fb &&
+	    plane_state->hw.fb->format->is_yuv &&
+	    plane_state->hw.fb->format->num_planes > 1) {
+		struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 		if (IS_GEN(dev_priv, 9) &&
 		    !IS_GEMINILAKE(dev_priv)) {
 			mode = SKL_PS_SCALER_MODE_NV12;
@@ -265,10 +333,13 @@ static void intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_sta
 			 */
 			mode = PS_SCALER_MODE_NORMAL;
 		} else {
+			struct intel_plane *linked =
+				plane_state->planar_linked_plane;
+
 			mode = PS_SCALER_MODE_PLANAR;
 
-			if (plane_state->linked_plane)
-				mode |= PS_PLANE_Y_SEL(plane_state->linked_plane->id);
+			if (linked)
+				mode |= PS_PLANE_Y_SEL(linked->id);
 		}
 	} else if (INTEL_GEN(dev_priv) > 9 || IS_GEMINILAKE(dev_priv)) {
 		mode = PS_SCALER_MODE_NORMAL;
@@ -317,7 +388,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 	struct intel_plane_state *plane_state = NULL;
 	struct intel_crtc_scaler_state *scaler_state =
 		&crtc_state->scaler_state;
-	struct drm_atomic_state *drm_state = crtc_state->base.state;
+	struct drm_atomic_state *drm_state = crtc_state->uapi.state;
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(drm_state);
 	int num_scalers_need;
 	int i;
@@ -372,6 +443,15 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 			 */
 			if (!plane) {
 				struct drm_plane_state *state;
+
+				/*
+				 * GLK+ scalers don't have a HQ mode so it
+				 * isn't necessary to change between HQ and dyn mode
+				 * on those platforms.
+				 */
+				if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+					continue;
+
 				plane = drm_plane_from_index(&dev_priv->drm, i);
 				state = drm_atomic_get_plane_state(drm_state, plane);
 				if (IS_ERR(state)) {
@@ -379,13 +459,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 						plane->base.id);
 					return PTR_ERR(state);
 				}
-
-				/*
-				 * the plane is added after plane checks are run,
-				 * but since this plane is unchanged just do the
-				 * minimum required validation.
-				 */
-				crtc_state->base.planes_changed = true;
 			}
 
 			intel_plane = to_intel_plane(plane);
@@ -426,6 +499,13 @@ void intel_atomic_state_clear(struct drm_atomic_state *s)
 	struct intel_atomic_state *state = to_intel_atomic_state(s);
 	drm_atomic_state_default_clear(&state->base);
 	state->dpll_set = state->modeset = false;
+	state->global_state_changed = false;
+	state->active_pipes = 0;
+	memset(&state->min_cdclk, 0, sizeof(state->min_cdclk));
+	memset(&state->min_voltage_level, 0, sizeof(state->min_voltage_level));
+	memset(&state->cdclk.logical, 0, sizeof(state->cdclk.logical));
+	memset(&state->cdclk.actual, 0, sizeof(state->cdclk.actual));
+	state->cdclk.pipe = INVALID_PIPE;
 }
 
 struct intel_crtc_state *
@@ -439,3 +519,40 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state,
 
 	return to_intel_crtc_state(crtc_state);
 }
+
+int intel_atomic_lock_global_state(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_crtc *crtc;
+
+	state->global_state_changed = true;
+
+	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		int ret;
+
+		ret = drm_modeset_lock(&crtc->base.mutex,
+				       state->base.acquire_ctx);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int intel_atomic_serialize_global_state(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_crtc *crtc;
+
+	state->global_state_changed = true;
+
+	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		struct intel_crtc_state *crtc_state;
+
+		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.h b/drivers/gpu/drm/i915/display/intel_atomic.h
index 58065d3161a3..74c749dbfb4f 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.h
+++ b/drivers/gpu/drm/i915/display/intel_atomic.h
@@ -16,6 +16,8 @@ struct drm_crtc_state;
 struct drm_device;
 struct drm_i915_private;
 struct drm_property;
+struct intel_atomic_state;
+struct intel_connector;
 struct intel_crtc;
 struct intel_crtc_state;
 
@@ -31,10 +33,17 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn,
 					 struct drm_atomic_state *state);
 struct drm_connector_state *
 intel_digital_connector_duplicate_state(struct drm_connector *connector);
+bool intel_connector_needs_modeset(struct intel_atomic_state *state,
+				   struct drm_connector *connector);
+struct intel_digital_connector_state *
+intel_atomic_get_digital_connector_state(struct intel_atomic_state *state,
+					 struct intel_connector *connector);
 
 struct drm_crtc_state *intel_crtc_duplicate_state(struct drm_crtc *crtc);
 void intel_crtc_destroy_state(struct drm_crtc *crtc,
 			       struct drm_crtc_state *state);
+void intel_crtc_free_hw_state(struct intel_crtc_state *crtc_state);
+void intel_crtc_copy_color_blobs(struct intel_crtc_state *crtc_state);
 struct drm_atomic_state *intel_atomic_state_alloc(struct drm_device *dev);
 void intel_atomic_state_clear(struct drm_atomic_state *state);
 
@@ -46,4 +55,8 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 			       struct intel_crtc *intel_crtc,
 			       struct intel_crtc_state *crtc_state);
 
+int intel_atomic_lock_global_state(struct intel_atomic_state *state);
+
+int intel_atomic_serialize_global_state(struct intel_atomic_state *state);
+
 #endif /* __INTEL_ATOMIC_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
index d1fcdf206da4..3e97af682b1b 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@@ -41,6 +41,16 @@
 #include "intel_pm.h"
 #include "intel_sprite.h"
 
+static void intel_plane_state_reset(struct intel_plane_state *plane_state,
+				    struct intel_plane *plane)
+{
+	memset(plane_state, 0, sizeof(*plane_state));
+
+	__drm_atomic_helper_plane_state_reset(&plane_state->uapi, &plane->base);
+
+	plane_state->scaler_id = -1;
+}
+
 struct intel_plane *intel_plane_alloc(void)
 {
 	struct intel_plane_state *plane_state;
@@ -56,8 +66,9 @@ struct intel_plane *intel_plane_alloc(void)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	__drm_atomic_helper_plane_reset(&plane->base, &plane_state->base);
-	plane_state->scaler_id = -1;
+	intel_plane_state_reset(plane_state, plane);
+
+	plane->base.state = &plane_state->uapi;
 
 	return plane;
 }
@@ -80,22 +91,24 @@ void intel_plane_free(struct intel_plane *plane)
 struct drm_plane_state *
 intel_plane_duplicate_state(struct drm_plane *plane)
 {
-	struct drm_plane_state *state;
 	struct intel_plane_state *intel_state;
 
-	intel_state = kmemdup(plane->state, sizeof(*intel_state), GFP_KERNEL);
+	intel_state = to_intel_plane_state(plane->state);
+	intel_state = kmemdup(intel_state, sizeof(*intel_state), GFP_KERNEL);
 
 	if (!intel_state)
 		return NULL;
 
-	state = &intel_state->base;
-
-	__drm_atomic_helper_plane_duplicate_state(plane, state);
+	__drm_atomic_helper_plane_duplicate_state(plane, &intel_state->uapi);
 
 	intel_state->vma = NULL;
 	intel_state->flags = 0;
 
-	return state;
+	/* add reference to fb */
+	if (intel_state->hw.fb)
+		drm_framebuffer_get(intel_state->hw.fb);
+
+	return &intel_state->uapi;
 }
 
 /**
@@ -110,18 +123,22 @@ void
 intel_plane_destroy_state(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
-	WARN_ON(to_intel_plane_state(state)->vma);
+	struct intel_plane_state *plane_state = to_intel_plane_state(state);
+	WARN_ON(plane_state->vma);
 
-	drm_atomic_helper_plane_destroy_state(plane, state);
+	__drm_atomic_helper_plane_destroy_state(&plane_state->uapi);
+	if (plane_state->hw.fb)
+		drm_framebuffer_put(plane_state->hw.fb);
+	kfree(plane_state);
 }
 
 unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
 				   const struct intel_plane_state *plane_state)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	unsigned int cpp;
 
-	if (!plane_state->base.visible)
+	if (!plane_state->uapi.visible)
 		return 0;
 
 	cpp = fb->format->cpp[0];
@@ -138,21 +155,90 @@ unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
 	return cpp * crtc_state->pixel_rate;
 }
 
+bool intel_plane_calc_min_cdclk(struct intel_atomic_state *state,
+				struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	const struct intel_plane_state *plane_state =
+		intel_atomic_get_new_plane_state(state, plane);
+	struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc);
+	struct intel_crtc_state *crtc_state;
+
+	if (!plane_state->uapi.visible || !plane->min_cdclk)
+		return false;
+
+	crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
+
+	crtc_state->min_cdclk[plane->id] =
+		plane->min_cdclk(crtc_state, plane_state);
+
+	/*
+	 * Does the cdclk need to be bumbed up?
+	 *
+	 * Note: we obviously need to be called before the new
+	 * cdclk frequency is calculated so state->cdclk.logical
+	 * hasn't been populated yet. Hence we look at the old
+	 * cdclk state under dev_priv->cdclk.logical. This is
+	 * safe as long we hold at least one crtc mutex (which
+	 * must be true since we have crtc_state).
+	 */
+	if (crtc_state->min_cdclk[plane->id] > dev_priv->cdclk.logical.cdclk) {
+		DRM_DEBUG_KMS("[PLANE:%d:%s] min_cdclk (%d kHz) > logical cdclk (%d kHz)\n",
+			      plane->base.base.id, plane->base.name,
+			      crtc_state->min_cdclk[plane->id],
+			      dev_priv->cdclk.logical.cdclk);
+		return true;
+	}
+
+	return false;
+}
+
+static void intel_plane_clear_hw_state(struct intel_plane_state *plane_state)
+{
+	if (plane_state->hw.fb)
+		drm_framebuffer_put(plane_state->hw.fb);
+
+	memset(&plane_state->hw, 0, sizeof(plane_state->hw));
+}
+
+void intel_plane_copy_uapi_to_hw_state(struct intel_plane_state *plane_state,
+				       const struct intel_plane_state *from_plane_state)
+{
+	intel_plane_clear_hw_state(plane_state);
+
+	plane_state->hw.crtc = from_plane_state->uapi.crtc;
+	plane_state->hw.fb = from_plane_state->uapi.fb;
+	if (plane_state->hw.fb)
+		drm_framebuffer_get(plane_state->hw.fb);
+
+	plane_state->hw.alpha = from_plane_state->uapi.alpha;
+	plane_state->hw.pixel_blend_mode =
+		from_plane_state->uapi.pixel_blend_mode;
+	plane_state->hw.rotation = from_plane_state->uapi.rotation;
+	plane_state->hw.color_encoding = from_plane_state->uapi.color_encoding;
+	plane_state->hw.color_range = from_plane_state->uapi.color_range;
+}
+
 int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
 					struct intel_crtc_state *new_crtc_state,
 					const struct intel_plane_state *old_plane_state,
 					struct intel_plane_state *new_plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane);
+	const struct drm_framebuffer *fb;
 	int ret;
 
+	intel_plane_copy_uapi_to_hw_state(new_plane_state, new_plane_state);
+	fb = new_plane_state->hw.fb;
+
 	new_crtc_state->active_planes &= ~BIT(plane->id);
 	new_crtc_state->nv12_planes &= ~BIT(plane->id);
 	new_crtc_state->c8_planes &= ~BIT(plane->id);
 	new_crtc_state->data_rate[plane->id] = 0;
-	new_plane_state->base.visible = false;
+	new_crtc_state->min_cdclk[plane->id] = 0;
+	new_plane_state->uapi.visible = false;
 
-	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
+	if (!new_plane_state->hw.crtc && !old_plane_state->hw.crtc)
 		return 0;
 
 	ret = plane->check_plane(new_crtc_state, new_plane_state);
@@ -160,18 +246,18 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 		return ret;
 
 	/* FIXME pre-g4x don't work like this */
-	if (new_plane_state->base.visible)
+	if (new_plane_state->uapi.visible)
 		new_crtc_state->active_planes |= BIT(plane->id);
 
-	if (new_plane_state->base.visible &&
-	    is_planar_yuv_format(new_plane_state->base.fb->format->format))
+	if (new_plane_state->uapi.visible &&
+	    intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier))
 		new_crtc_state->nv12_planes |= BIT(plane->id);
 
-	if (new_plane_state->base.visible &&
-	    new_plane_state->base.fb->format->format == DRM_FORMAT_C8)
+	if (new_plane_state->uapi.visible &&
+	    fb->format->format == DRM_FORMAT_C8)
 		new_crtc_state->c8_planes |= BIT(plane->id);
 
-	if (new_plane_state->base.visible || old_plane_state->base.visible)
+	if (new_plane_state->uapi.visible || old_plane_state->uapi.visible)
 		new_crtc_state->update_planes |= BIT(plane->id);
 
 	new_crtc_state->data_rate[plane->id] =
@@ -185,23 +271,20 @@ static struct intel_crtc *
 get_crtc_from_states(const struct intel_plane_state *old_plane_state,
 		     const struct intel_plane_state *new_plane_state)
 {
-	if (new_plane_state->base.crtc)
-		return to_intel_crtc(new_plane_state->base.crtc);
+	if (new_plane_state->uapi.crtc)
+		return to_intel_crtc(new_plane_state->uapi.crtc);
 
-	if (old_plane_state->base.crtc)
-		return to_intel_crtc(old_plane_state->base.crtc);
+	if (old_plane_state->uapi.crtc)
+		return to_intel_crtc(old_plane_state->uapi.crtc);
 
 	return NULL;
 }
 
-static int intel_plane_atomic_check(struct drm_plane *_plane,
-				    struct drm_plane_state *_new_plane_state)
+int intel_plane_atomic_check(struct intel_atomic_state *state,
+			     struct intel_plane *plane)
 {
-	struct intel_plane *plane = to_intel_plane(_plane);
-	struct intel_atomic_state *state =
-		to_intel_atomic_state(_new_plane_state->state);
 	struct intel_plane_state *new_plane_state =
-		to_intel_plane_state(_new_plane_state);
+		intel_atomic_get_new_plane_state(state, plane);
 	const struct intel_plane_state *old_plane_state =
 		intel_atomic_get_old_plane_state(state, plane);
 	struct intel_crtc *crtc =
@@ -209,7 +292,7 @@ static int intel_plane_atomic_check(struct drm_plane *_plane,
 	const struct intel_crtc_state *old_crtc_state;
 	struct intel_crtc_state *new_crtc_state;
 
-	new_plane_state->base.visible = false;
+	new_plane_state->uapi.visible = false;
 	if (!crtc)
 		return 0;
 
@@ -270,26 +353,16 @@ void intel_update_plane(struct intel_plane *plane,
 			const struct intel_crtc_state *crtc_state,
 			const struct intel_plane_state *plane_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 
 	trace_intel_update_plane(&plane->base, crtc);
 	plane->update_plane(plane, crtc_state, plane_state);
 }
 
-void intel_update_slave(struct intel_plane *plane,
-			const struct intel_crtc_state *crtc_state,
-			const struct intel_plane_state *plane_state)
-{
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-
-	trace_intel_update_plane(&plane->base, crtc);
-	plane->update_slave(plane, crtc_state, plane_state);
-}
-
 void intel_disable_plane(struct intel_plane *plane,
 			 const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 
 	trace_intel_disable_plane(&plane->base, crtc);
 	plane->disable_plane(plane, crtc_state);
@@ -318,25 +391,9 @@ void skl_update_planes_on_crtc(struct intel_atomic_state *state,
 		struct intel_plane_state *new_plane_state =
 			intel_atomic_get_new_plane_state(state, plane);
 
-		if (new_plane_state->base.visible) {
+		if (new_plane_state->uapi.visible ||
+		    new_plane_state->planar_slave) {
 			intel_update_plane(plane, new_crtc_state, new_plane_state);
-		} else if (new_plane_state->slave) {
-			struct intel_plane *master =
-				new_plane_state->linked_plane;
-
-			/*
-			 * We update the slave plane from this function because
-			 * programming it from the master plane's update_plane
-			 * callback runs into issues when the Y plane is
-			 * reassigned, disabled or used by a different plane.
-			 *
-			 * The slave plane is updated with the master plane's
-			 * plane_state.
-			 */
-			new_plane_state =
-				intel_atomic_get_new_plane_state(state, master);
-
-			intel_update_slave(plane, new_crtc_state, new_plane_state);
 		} else {
 			intel_disable_plane(plane, new_crtc_state);
 		}
@@ -358,7 +415,7 @@ void i9xx_update_planes_on_crtc(struct intel_atomic_state *state,
 		    !(update_mask & BIT(plane->id)))
 			continue;
 
-		if (new_plane_state->base.visible)
+		if (new_plane_state->uapi.visible)
 			intel_update_plane(plane, new_crtc_state, new_plane_state);
 		else
 			intel_disable_plane(plane, new_crtc_state);
@@ -368,5 +425,4 @@ void i9xx_update_planes_on_crtc(struct intel_atomic_state *state,
 const struct drm_plane_helper_funcs intel_plane_helper_funcs = {
 	.prepare_fb = intel_prepare_plane_fb,
 	.cleanup_fb = intel_cleanup_plane_fb,
-	.atomic_check = intel_plane_atomic_check,
 };
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h
index cb7ef4f9eafd..5cedafdddb55 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h
@@ -20,12 +20,11 @@ extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
 
 unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
 				   const struct intel_plane_state *plane_state);
+void intel_plane_copy_uapi_to_hw_state(struct intel_plane_state *plane_state,
+				       const struct intel_plane_state *from_plane_state);
 void intel_update_plane(struct intel_plane *plane,
 			const struct intel_crtc_state *crtc_state,
 			const struct intel_plane_state *plane_state);
-void intel_update_slave(struct intel_plane *plane,
-			const struct intel_crtc_state *crtc_state,
-			const struct intel_plane_state *plane_state);
 void intel_disable_plane(struct intel_plane *plane,
 			 const struct intel_crtc_state *crtc_state);
 struct intel_plane *intel_plane_alloc(void);
@@ -41,9 +40,13 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 					struct intel_crtc_state *crtc_state,
 					const struct intel_plane_state *old_plane_state,
 					struct intel_plane_state *intel_state);
+int intel_plane_atomic_check(struct intel_atomic_state *state,
+			     struct intel_plane *plane);
 int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state,
 				    struct intel_crtc_state *crtc_state,
 				    const struct intel_plane_state *old_plane_state,
 				    struct intel_plane_state *plane_state);
+bool intel_plane_calc_min_cdclk(struct intel_atomic_state *state,
+				struct intel_plane *plane);
 
 #endif /* __INTEL_ATOMIC_PLANE_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
index ddcccf4408c3..b18040793d9e 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -28,6 +28,7 @@
 #include <drm/i915_component.h>
 
 #include "i915_drv.h"
+#include "intel_atomic.h"
 #include "intel_audio.h"
 #include "intel_display_types.h"
 #include "intel_lpe_audio.h"
@@ -233,7 +234,7 @@ static const struct hdmi_aud_ncts hdmi_aud_ncts_36bpp[] = {
 static u32 audio_config_hdmi_pixel_clock(const struct intel_crtc_state *crtc_state)
 {
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(hdmi_audio_clock); i++) {
@@ -554,14 +555,15 @@ static void ilk_audio_codec_disable(struct intel_encoder *encoder,
 				    const struct drm_connector_state *old_conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	enum pipe pipe = crtc->pipe;
 	enum port port = encoder->port;
 	u32 tmp, eldv;
 	i915_reg_t aud_config, aud_cntrl_st2;
 
-	DRM_DEBUG_KMS("Disable audio codec on port %c, pipe %c\n",
-		      port_name(port), pipe_name(pipe));
+	DRM_DEBUG_KMS("Disable audio codec on [ENCODER:%d:%s], pipe %c\n",
+		      encoder->base.base.id, encoder->base.name,
+		      pipe_name(pipe));
 
 	if (WARN_ON(port == PORT_A))
 		return;
@@ -600,7 +602,7 @@ static void ilk_audio_codec_enable(struct intel_encoder *encoder,
 				   const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_connector *connector = conn_state->connector;
 	enum pipe pipe = crtc->pipe;
 	enum port port = encoder->port;
@@ -609,8 +611,9 @@ static void ilk_audio_codec_enable(struct intel_encoder *encoder,
 	int len, i;
 	i915_reg_t hdmiw_hdmiedid, aud_config, aud_cntl_st, aud_cntrl_st2;
 
-	DRM_DEBUG_KMS("Enable audio codec on port %c, pipe %c, %u bytes ELD\n",
-		      port_name(port), pipe_name(pipe), drm_eld_size(eld));
+	DRM_DEBUG_KMS("Enable audio codec on [ENCODER:%d:%s], pipe %c, %u bytes ELD\n",
+		      encoder->base.base.id, encoder->base.name,
+		      pipe_name(pipe), drm_eld_size(eld));
 
 	if (WARN_ON(port == PORT_A))
 		return;
@@ -689,10 +692,10 @@ void intel_audio_codec_enable(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct i915_audio_component *acomp = dev_priv->audio_component;
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_connector *connector = conn_state->connector;
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	enum port port = encoder->port;
 	enum pipe pipe = crtc->pipe;
 
@@ -704,8 +707,8 @@ void intel_audio_codec_enable(struct intel_encoder *encoder,
 	DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
 			 connector->base.id,
 			 connector->name,
-			 connector->encoder->base.id,
-			 connector->encoder->name);
+			 encoder->base.base.id,
+			 encoder->base.name);
 
 	connector->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2;
 
@@ -750,7 +753,7 @@ void intel_audio_codec_disable(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct i915_audio_component *acomp = dev_priv->audio_component;
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	enum port port = encoder->port;
 	enum pipe pipe = crtc->pipe;
 
@@ -816,13 +819,8 @@ retry:
 	to_intel_atomic_state(state)->cdclk.force_min_cdclk =
 		enable ? 2 * 96000 : 0;
 
-	/*
-	 * Protects dev_priv->cdclk.force_min_cdclk
-	 * Need to lock this here in case we have no active pipes
-	 * and thus wouldn't lock it during the commit otherwise.
-	 */
-	ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
-			       &ctx);
+	/* Protects dev_priv->cdclk.force_min_cdclk */
+	ret = intel_atomic_lock_global_state(to_intel_atomic_state(state));
 	if (!ret)
 		ret = drm_atomic_commit(state);
 
@@ -850,11 +848,23 @@ static unsigned long i915_audio_component_get_power(struct device *kdev)
 
 	ret = intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO);
 
-	/* Force CDCLK to 2*BCLK as long as we need audio to be powered. */
-	if (dev_priv->audio_power_refcount++ == 0)
-		if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
+	if (dev_priv->audio_power_refcount++ == 0) {
+		if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) {
+			I915_WRITE(AUD_FREQ_CNTRL, dev_priv->audio_freq_cntrl);
+			DRM_DEBUG_KMS("restored AUD_FREQ_CNTRL to 0x%x\n",
+				      dev_priv->audio_freq_cntrl);
+		}
+
+		/* Force CDCLK to 2*BCLK as long as we need audio powered. */
+		if (IS_GEMINILAKE(dev_priv))
 			glk_force_audio_cdclk(dev_priv, true);
 
+		if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+			I915_WRITE(AUD_PIN_BUF_CTL,
+				   (I915_READ(AUD_PIN_BUF_CTL) |
+				    AUD_PIN_BUF_ENABLE));
+	}
+
 	return ret;
 }
 
@@ -865,7 +875,7 @@ static void i915_audio_component_put_power(struct device *kdev,
 
 	/* Stop forcing CDCLK to 2*BCLK if no need for audio to be powered. */
 	if (--dev_priv->audio_power_refcount == 0)
-		if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
+		if (IS_GEMINILAKE(dev_priv))
 			glk_force_audio_cdclk(dev_priv, false);
 
 	intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO, cookie);
@@ -1114,6 +1124,12 @@ static void i915_audio_component_init(struct drm_i915_private *dev_priv)
 		return;
 	}
 
+	if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) {
+		dev_priv->audio_freq_cntrl = I915_READ(AUD_FREQ_CNTRL);
+		DRM_DEBUG_KMS("init value of AUD_FREQ_CNTRL of 0x%x\n",
+			      dev_priv->audio_freq_cntrl);
+	}
+
 	dev_priv->audio_component_registered = true;
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 3250c1b8dcca..8beac06e3f10 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -29,6 +29,7 @@
 #include <drm/i915_drm.h>
 
 #include "display/intel_display.h"
+#include "display/intel_display_types.h"
 #include "display/intel_gmbus.h"
 
 #include "i915_drv.h"
@@ -58,6 +59,13 @@
  * that.
  */
 
+/* Wrapper for VBT child device config */
+struct display_device_data {
+	struct child_device_config child;
+	struct dsc_compression_parameters_entry *dsc;
+	struct list_head node;
+};
+
 #define	SLAVE_ADDR1	0x70
 #define	SLAVE_ADDR2	0x72
 
@@ -202,17 +210,12 @@ get_lvds_fp_timing(const struct bdb_header *bdb,
 	return (const struct lvds_fp_timing *)((const u8 *)bdb + ofs);
 }
 
-/* Try to find integrated panel data */
+/* Parse general panel options */
 static void
-parse_lfp_panel_data(struct drm_i915_private *dev_priv,
-		     const struct bdb_header *bdb)
+parse_panel_options(struct drm_i915_private *dev_priv,
+		    const struct bdb_header *bdb)
 {
 	const struct bdb_lvds_options *lvds_options;
-	const struct bdb_lvds_lfp_data *lvds_lfp_data;
-	const struct bdb_lvds_lfp_data_ptrs *lvds_lfp_data_ptrs;
-	const struct lvds_dvo_timing *panel_dvo_timing;
-	const struct lvds_fp_timing *fp_timing;
-	struct drm_display_mode *panel_fixed_mode;
 	int panel_type;
 	int drrs_mode;
 	int ret;
@@ -261,6 +264,19 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv,
 		DRM_DEBUG_KMS("DRRS not supported (VBT input)\n");
 		break;
 	}
+}
+
+/* Try to find integrated panel timing data */
+static void
+parse_lfp_panel_dtd(struct drm_i915_private *dev_priv,
+		    const struct bdb_header *bdb)
+{
+	const struct bdb_lvds_lfp_data *lvds_lfp_data;
+	const struct bdb_lvds_lfp_data_ptrs *lvds_lfp_data_ptrs;
+	const struct lvds_dvo_timing *panel_dvo_timing;
+	const struct lvds_fp_timing *fp_timing;
+	struct drm_display_mode *panel_fixed_mode;
+	int panel_type = dev_priv->vbt.panel_type;
 
 	lvds_lfp_data = find_section(bdb, BDB_LVDS_LFP_DATA);
 	if (!lvds_lfp_data)
@@ -282,7 +298,7 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv,
 
 	dev_priv->vbt.lfp_lvds_vbt_mode = panel_fixed_mode;
 
-	DRM_DEBUG_KMS("Found panel mode in BIOS VBT tables:\n");
+	DRM_DEBUG_KMS("Found panel mode in BIOS VBT legacy lfp table:\n");
 	drm_mode_debug_printmodeline(panel_fixed_mode);
 
 	fp_timing = get_lvds_fp_timing(bdb, lvds_lfp_data,
@@ -300,6 +316,98 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv,
 }
 
 static void
+parse_generic_dtd(struct drm_i915_private *dev_priv,
+		  const struct bdb_header *bdb)
+{
+	const struct bdb_generic_dtd *generic_dtd;
+	const struct generic_dtd_entry *dtd;
+	struct drm_display_mode *panel_fixed_mode;
+	int num_dtd;
+
+	generic_dtd = find_section(bdb, BDB_GENERIC_DTD);
+	if (!generic_dtd)
+		return;
+
+	if (generic_dtd->gdtd_size < sizeof(struct generic_dtd_entry)) {
+		DRM_ERROR("GDTD size %u is too small.\n",
+			  generic_dtd->gdtd_size);
+		return;
+	} else if (generic_dtd->gdtd_size !=
+		   sizeof(struct generic_dtd_entry)) {
+		DRM_ERROR("Unexpected GDTD size %u\n", generic_dtd->gdtd_size);
+		/* DTD has unknown fields, but keep going */
+	}
+
+	num_dtd = (get_blocksize(generic_dtd) -
+		   sizeof(struct bdb_generic_dtd)) / generic_dtd->gdtd_size;
+	if (dev_priv->vbt.panel_type >= num_dtd) {
+		DRM_ERROR("Panel type %d not found in table of %d DTD's\n",
+			  dev_priv->vbt.panel_type, num_dtd);
+		return;
+	}
+
+	dtd = &generic_dtd->dtd[dev_priv->vbt.panel_type];
+
+	panel_fixed_mode = kzalloc(sizeof(*panel_fixed_mode), GFP_KERNEL);
+	if (!panel_fixed_mode)
+		return;
+
+	panel_fixed_mode->hdisplay = dtd->hactive;
+	panel_fixed_mode->hsync_start =
+		panel_fixed_mode->hdisplay + dtd->hfront_porch;
+	panel_fixed_mode->hsync_end =
+		panel_fixed_mode->hsync_start + dtd->hsync;
+	panel_fixed_mode->htotal = panel_fixed_mode->hsync_end;
+
+	panel_fixed_mode->vdisplay = dtd->vactive;
+	panel_fixed_mode->vsync_start =
+		panel_fixed_mode->vdisplay + dtd->vfront_porch;
+	panel_fixed_mode->vsync_end =
+		panel_fixed_mode->vsync_start + dtd->vsync;
+	panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end;
+
+	panel_fixed_mode->clock = dtd->pixel_clock;
+	panel_fixed_mode->width_mm = dtd->width_mm;
+	panel_fixed_mode->height_mm = dtd->height_mm;
+
+	panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED;
+	drm_mode_set_name(panel_fixed_mode);
+
+	if (dtd->hsync_positive_polarity)
+		panel_fixed_mode->flags |= DRM_MODE_FLAG_PHSYNC;
+	else
+		panel_fixed_mode->flags |= DRM_MODE_FLAG_NHSYNC;
+
+	if (dtd->vsync_positive_polarity)
+		panel_fixed_mode->flags |= DRM_MODE_FLAG_PVSYNC;
+	else
+		panel_fixed_mode->flags |= DRM_MODE_FLAG_NVSYNC;
+
+	DRM_DEBUG_KMS("Found panel mode in BIOS VBT generic dtd table:\n");
+	drm_mode_debug_printmodeline(panel_fixed_mode);
+
+	dev_priv->vbt.lfp_lvds_vbt_mode = panel_fixed_mode;
+}
+
+static void
+parse_panel_dtd(struct drm_i915_private *dev_priv,
+		const struct bdb_header *bdb)
+{
+	/*
+	 * Older VBTs provided provided DTD information for internal displays
+	 * through the "LFP panel DTD" block (42).  As of VBT revision 229,
+	 * that block is now deprecated and DTD information should be provided
+	 * via a newer "generic DTD" block (58).  Just to be safe, we'll
+	 * try the new generic DTD block first on VBT >= 229, but still fall
+	 * back to trying the old LFP block if that fails.
+	 */
+	if (bdb->version >= 229)
+		parse_generic_dtd(dev_priv, bdb);
+	if (!dev_priv->vbt.lfp_lvds_vbt_mode)
+		parse_lfp_panel_dtd(dev_priv, bdb);
+}
+
+static void
 parse_lfp_backlight(struct drm_i915_private *dev_priv,
 		    const struct bdb_header *bdb)
 {
@@ -449,8 +557,9 @@ static void
 parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, u8 bdb_version)
 {
 	struct sdvo_device_mapping *mapping;
+	const struct display_device_data *devdata;
 	const struct child_device_config *child;
-	int i, count = 0;
+	int count = 0;
 
 	/*
 	 * Only parse SDVO mappings on gens that could have SDVO. This isn't
@@ -461,8 +570,8 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, u8 bdb_version)
 		return;
 	}
 
-	for (i = 0, count = 0; i < dev_priv->vbt.child_dev_num; i++) {
-		child = dev_priv->vbt.child_dev + i;
+	list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) {
+		child = &devdata->child;
 
 		if (child->slave_addr != SLAVE_ADDR1 &&
 		    child->slave_addr != SLAVE_ADDR2) {
@@ -552,16 +661,45 @@ parse_driver_features(struct drm_i915_private *dev_priv,
 			dev_priv->vbt.int_lvds_support = 0;
 	}
 
-	DRM_DEBUG_KMS("DRRS State Enabled:%d\n", driver->drrs_enabled);
+	if (bdb->version < 228) {
+		DRM_DEBUG_KMS("DRRS State Enabled:%d\n", driver->drrs_enabled);
+		/*
+		 * If DRRS is not supported, drrs_type has to be set to 0.
+		 * This is because, VBT is configured in such a way that
+		 * static DRRS is 0 and DRRS not supported is represented by
+		 * driver->drrs_enabled=false
+		 */
+		if (!driver->drrs_enabled)
+			dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED;
+
+		dev_priv->vbt.psr.enable = driver->psr_enabled;
+	}
+}
+
+static void
+parse_power_conservation_features(struct drm_i915_private *dev_priv,
+				  const struct bdb_header *bdb)
+{
+	const struct bdb_lfp_power *power;
+	u8 panel_type = dev_priv->vbt.panel_type;
+
+	if (bdb->version < 228)
+		return;
+
+	power = find_section(bdb, BDB_LVDS_POWER);
+	if (!power)
+		return;
+
+	dev_priv->vbt.psr.enable = power->psr & BIT(panel_type);
+
 	/*
 	 * If DRRS is not supported, drrs_type has to be set to 0.
 	 * This is because, VBT is configured in such a way that
 	 * static DRRS is 0 and DRRS not supported is represented by
-	 * driver->drrs_enabled=false
+	 * power->drrs & BIT(panel_type)=false
 	 */
-	if (!driver->drrs_enabled)
+	if (!(power->drrs & BIT(panel_type)))
 		dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED;
-	dev_priv->vbt.psr.enable = driver->psr_enabled;
 }
 
 static void
@@ -1230,6 +1368,57 @@ err:
 	memset(dev_priv->vbt.dsi.sequence, 0, sizeof(dev_priv->vbt.dsi.sequence));
 }
 
+static void
+parse_compression_parameters(struct drm_i915_private *i915,
+			     const struct bdb_header *bdb)
+{
+	const struct bdb_compression_parameters *params;
+	struct display_device_data *devdata;
+	const struct child_device_config *child;
+	u16 block_size;
+	int index;
+
+	if (bdb->version < 198)
+		return;
+
+	params = find_section(bdb, BDB_COMPRESSION_PARAMETERS);
+	if (params) {
+		/* Sanity checks */
+		if (params->entry_size != sizeof(params->data[0])) {
+			DRM_DEBUG_KMS("VBT: unsupported compression param entry size\n");
+			return;
+		}
+
+		block_size = get_blocksize(params);
+		if (block_size < sizeof(*params)) {
+			DRM_DEBUG_KMS("VBT: expected 16 compression param entries\n");
+			return;
+		}
+	}
+
+	list_for_each_entry(devdata, &i915->vbt.display_devices, node) {
+		child = &devdata->child;
+
+		if (!child->compression_enable)
+			continue;
+
+		if (!params) {
+			DRM_DEBUG_KMS("VBT: compression params not available\n");
+			continue;
+		}
+
+		if (child->compression_method_cps) {
+			DRM_DEBUG_KMS("VBT: CPS compression not supported\n");
+			continue;
+		}
+
+		index = child->compression_structure_index;
+
+		devdata->dsc = kmemdup(&params->data[index],
+				       sizeof(*devdata->dsc), GFP_KERNEL);
+	}
+}
+
 static u8 translate_iboost(u8 val)
 {
 	static const u8 mapping[] = { 1, 3, 7 }; /* See VBT spec */
@@ -1246,7 +1435,7 @@ static enum port get_port_by_ddc_pin(struct drm_i915_private *i915, u8 ddc_pin)
 	const struct ddi_vbt_port_info *info;
 	enum port port;
 
-	for (port = PORT_A; port < I915_MAX_PORTS; port++) {
+	for_each_port(port) {
 		info = &i915->vbt.ddi_port_info[port];
 
 		if (info->child && ddc_pin == info->alternate_ddc_pin)
@@ -1297,7 +1486,7 @@ static enum port get_port_by_aux_ch(struct drm_i915_private *i915, u8 aux_ch)
 	const struct ddi_vbt_port_info *info;
 	enum port port;
 
-	for (port = PORT_A; port < I915_MAX_PORTS; port++) {
+	for_each_port(port) {
 		info = &i915->vbt.ddi_port_info[port];
 
 		if (info->child && aux_ch == info->alternate_aux_channel)
@@ -1399,6 +1588,7 @@ static enum port dvo_port_to_port(u8 dvo_port)
 		[PORT_D] = { DVO_PORT_HDMID, DVO_PORT_DPD, -1},
 		[PORT_E] = { DVO_PORT_CRT, DVO_PORT_HDMIE, DVO_PORT_DPE},
 		[PORT_F] = { DVO_PORT_HDMIF, DVO_PORT_DPF, -1},
+		[PORT_G] = { DVO_PORT_HDMIG, DVO_PORT_DPG, -1},
 	};
 	enum port port;
 	int i;
@@ -1417,9 +1607,10 @@ static enum port dvo_port_to_port(u8 dvo_port)
 }
 
 static void parse_ddi_port(struct drm_i915_private *dev_priv,
-			   const struct child_device_config *child,
+			   struct display_device_data *devdata,
 			   u8 bdb_version)
 {
+	const struct child_device_config *child = &devdata->child;
 	struct ddi_vbt_port_info *info;
 	bool is_dvi, is_hdmi, is_dp, is_edp, is_crt;
 	enum port port;
@@ -1442,7 +1633,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv,
 	is_hdmi = is_dvi && (child->device_type & DEVICE_TYPE_NOT_HDMI_OUTPUT) == 0;
 	is_edp = is_dp && (child->device_type & DEVICE_TYPE_INTERNAL_CONNECTOR);
 
-	if (port == PORT_A && is_dvi) {
+	if (port == PORT_A && is_dvi && INTEL_GEN(dev_priv) < 12) {
 		DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n",
 			      is_hdmi ? "/HDMI" : "");
 		is_dvi = false;
@@ -1460,26 +1651,11 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv,
 	if (bdb_version >= 209)
 		info->supports_tbt = child->tbt;
 
-	DRM_DEBUG_KMS("Port %c VBT info: CRT:%d DVI:%d HDMI:%d DP:%d eDP:%d LSPCON:%d USB-Type-C:%d TBT:%d\n",
+	DRM_DEBUG_KMS("Port %c VBT info: CRT:%d DVI:%d HDMI:%d DP:%d eDP:%d LSPCON:%d USB-Type-C:%d TBT:%d DSC:%d\n",
 		      port_name(port), is_crt, is_dvi, is_hdmi, is_dp, is_edp,
 		      HAS_LSPCON(dev_priv) && child->lspcon,
-		      info->supports_typec_usb, info->supports_tbt);
-
-	if (is_edp && is_dvi)
-		DRM_DEBUG_KMS("Internal DP port %c is TMDS compatible\n",
-			      port_name(port));
-	if (is_crt && port != PORT_E)
-		DRM_DEBUG_KMS("Port %c is analog\n", port_name(port));
-	if (is_crt && (is_dvi || is_dp))
-		DRM_DEBUG_KMS("Analog port %c is also DP or TMDS compatible\n",
-			      port_name(port));
-	if (is_dvi && (port == PORT_A || port == PORT_E))
-		DRM_DEBUG_KMS("Port %c is TMDS compatible\n", port_name(port));
-	if (!is_dvi && !is_dp && !is_crt)
-		DRM_DEBUG_KMS("Port %c is not DP/TMDS/CRT compatible\n",
-			      port_name(port));
-	if (is_edp && (port == PORT_B || port == PORT_C || port == PORT_E))
-		DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port));
+		      info->supports_typec_usb, info->supports_tbt,
+		      devdata->dsc != NULL);
 
 	if (is_dvi) {
 		u8 ddc_pin;
@@ -1508,6 +1684,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv,
 			      port_name(port),
 			      hdmi_level_shift);
 		info->hdmi_level_shift = hdmi_level_shift;
+		info->hdmi_level_shift_set = true;
 	}
 
 	if (bdb_version >= 204) {
@@ -1570,8 +1747,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv,
 
 static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version)
 {
-	const struct child_device_config *child;
-	int i;
+	struct display_device_data *devdata;
 
 	if (!HAS_DDI(dev_priv) && !IS_CHERRYVIEW(dev_priv))
 		return;
@@ -1579,11 +1755,8 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version)
 	if (bdb_version < 155)
 		return;
 
-	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
-		child = dev_priv->vbt.child_dev + i;
-
-		parse_ddi_port(dev_priv, child, bdb_version);
-	}
+	list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node)
+		parse_ddi_port(dev_priv, devdata, bdb_version);
 }
 
 static void
@@ -1591,8 +1764,9 @@ parse_general_definitions(struct drm_i915_private *dev_priv,
 			  const struct bdb_header *bdb)
 {
 	const struct bdb_general_definitions *defs;
+	struct display_device_data *devdata;
 	const struct child_device_config *child;
-	int i, child_device_num, count;
+	int i, child_device_num;
 	u8 expected_size;
 	u16 block_size;
 	int bus_pin;
@@ -1625,7 +1799,7 @@ parse_general_definitions(struct drm_i915_private *dev_priv,
 		expected_size = 37;
 	} else if (bdb->version <= 215) {
 		expected_size = 38;
-	} else if (bdb->version <= 216) {
+	} else if (bdb->version <= 229) {
 		expected_size = 39;
 	} else {
 		expected_size = sizeof(*child);
@@ -1648,26 +1822,7 @@ parse_general_definitions(struct drm_i915_private *dev_priv,
 
 	/* get the number of child device */
 	child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size;
-	count = 0;
-	/* get the number of child device that is present */
-	for (i = 0; i < child_device_num; i++) {
-		child = child_device_ptr(defs, i);
-		if (!child->device_type)
-			continue;
-		count++;
-	}
-	if (!count) {
-		DRM_DEBUG_KMS("no child dev is parsed from VBT\n");
-		return;
-	}
-	dev_priv->vbt.child_dev = kcalloc(count, sizeof(*child), GFP_KERNEL);
-	if (!dev_priv->vbt.child_dev) {
-		DRM_DEBUG_KMS("No memory space for child device\n");
-		return;
-	}
 
-	dev_priv->vbt.child_dev_num = count;
-	count = 0;
 	for (i = 0; i < child_device_num; i++) {
 		child = child_device_ptr(defs, i);
 		if (!child->device_type)
@@ -1676,23 +1831,29 @@ parse_general_definitions(struct drm_i915_private *dev_priv,
 		DRM_DEBUG_KMS("Found VBT child device with type 0x%x\n",
 			      child->device_type);
 
+		devdata = kzalloc(sizeof(*devdata), GFP_KERNEL);
+		if (!devdata)
+			break;
+
 		/*
 		 * Copy as much as we know (sizeof) and is available
-		 * (child_dev_size) of the child device. Accessing the data must
-		 * depend on VBT version.
+		 * (child_dev_size) of the child device config. Accessing the
+		 * data must depend on VBT version.
 		 */
-		memcpy(dev_priv->vbt.child_dev + count, child,
+		memcpy(&devdata->child, child,
 		       min_t(size_t, defs->child_dev_size, sizeof(*child)));
-		count++;
+
+		list_add_tail(&devdata->node, &dev_priv->vbt.display_devices);
 	}
+
+	if (list_empty(&dev_priv->vbt.display_devices))
+		DRM_DEBUG_KMS("no child dev is parsed from VBT\n");
 }
 
 /* Common defaults which may be overridden by VBT. */
 static void
 init_vbt_defaults(struct drm_i915_private *dev_priv)
 {
-	enum port port;
-
 	dev_priv->vbt.crt_ddc_pin = GMBUS_PIN_VGADDC;
 
 	/* Default to having backlight */
@@ -1720,13 +1881,6 @@ init_vbt_defaults(struct drm_i915_private *dev_priv)
 	dev_priv->vbt.lvds_ssc_freq = intel_bios_ssc_frequency(dev_priv,
 			!HAS_PCH_SPLIT(dev_priv));
 	DRM_DEBUG_KMS("Set default to SSC at %d kHz\n", dev_priv->vbt.lvds_ssc_freq);
-
-	for (port = PORT_A; port < I915_MAX_PORTS; port++) {
-		struct ddi_vbt_port_info *info =
-			&dev_priv->vbt.ddi_port_info[port];
-
-		info->hdmi_level_shift = HDMI_LEVEL_SHIFT_UNKNOWN;
-	}
 }
 
 /* Defaults to initialize only if there is no VBT. */
@@ -1735,7 +1889,7 @@ init_vbt_missing_defaults(struct drm_i915_private *dev_priv)
 {
 	enum port port;
 
-	for (port = PORT_A; port < I915_MAX_PORTS; port++) {
+	for_each_port(port) {
 		struct ddi_vbt_port_info *info =
 			&dev_priv->vbt.ddi_port_info[port];
 		enum phy phy = intel_port_to_phy(dev_priv, port);
@@ -1786,6 +1940,13 @@ bool intel_bios_is_valid_vbt(const void *buf, size_t size)
 		return false;
 	}
 
+	if (vbt->vbt_size > size) {
+		DRM_DEBUG_DRIVER("VBT incomplete (vbt_size overflows)\n");
+		return false;
+	}
+
+	size = vbt->vbt_size;
+
 	if (range_overflows_t(size_t,
 			      vbt->bdb_offset,
 			      sizeof(struct bdb_header),
@@ -1803,28 +1964,61 @@ bool intel_bios_is_valid_vbt(const void *buf, size_t size)
 	return vbt;
 }
 
-static const struct vbt_header *find_vbt(void __iomem *bios, size_t size)
+static struct vbt_header *oprom_get_vbt(struct drm_i915_private *dev_priv)
 {
-	size_t i;
+	struct pci_dev *pdev = dev_priv->drm.pdev;
+	void __iomem *p = NULL, *oprom;
+	struct vbt_header *vbt;
+	u16 vbt_size;
+	size_t i, size;
 
-	/* Scour memory looking for the VBT signature. */
-	for (i = 0; i + 4 < size; i++) {
-		void *vbt;
+	oprom = pci_map_rom(pdev, &size);
+	if (!oprom)
+		return NULL;
 
-		if (ioread32(bios + i) != *((const u32 *) "$VBT"))
+	/* Scour memory looking for the VBT signature. */
+	for (i = 0; i + 4 < size; i += 4) {
+		if (ioread32(oprom + i) != *((const u32 *)"$VBT"))
 			continue;
 
-		/*
-		 * This is the one place where we explicitly discard the address
-		 * space (__iomem) of the BIOS/VBT.
-		 */
-		vbt = (void __force *) bios + i;
-		if (intel_bios_is_valid_vbt(vbt, size - i))
-			return vbt;
-
+		p = oprom + i;
+		size -= i;
 		break;
 	}
 
+	if (!p)
+		goto err_unmap_oprom;
+
+	if (sizeof(struct vbt_header) > size) {
+		DRM_DEBUG_DRIVER("VBT header incomplete\n");
+		goto err_unmap_oprom;
+	}
+
+	vbt_size = ioread16(p + offsetof(struct vbt_header, vbt_size));
+	if (vbt_size > size) {
+		DRM_DEBUG_DRIVER("VBT incomplete (vbt_size overflows)\n");
+		goto err_unmap_oprom;
+	}
+
+	/* The rest will be validated by intel_bios_is_valid_vbt() */
+	vbt = kmalloc(vbt_size, GFP_KERNEL);
+	if (!vbt)
+		goto err_unmap_oprom;
+
+	memcpy_fromio(vbt, p, vbt_size);
+
+	if (!intel_bios_is_valid_vbt(vbt, vbt_size))
+		goto err_free_vbt;
+
+	pci_unmap_rom(pdev, oprom);
+
+	return vbt;
+
+err_free_vbt:
+	kfree(vbt);
+err_unmap_oprom:
+	pci_unmap_rom(pdev, oprom);
+
 	return NULL;
 }
 
@@ -1838,12 +2032,13 @@ static const struct vbt_header *find_vbt(void __iomem *bios, size_t size)
  */
 void intel_bios_init(struct drm_i915_private *dev_priv)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
 	const struct vbt_header *vbt = dev_priv->opregion.vbt;
+	struct vbt_header *oprom_vbt = NULL;
 	const struct bdb_header *bdb;
-	u8 __iomem *bios = NULL;
 
-	if (!HAS_DISPLAY(dev_priv)) {
+	INIT_LIST_HEAD(&dev_priv->vbt.display_devices);
+
+	if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) {
 		DRM_DEBUG_KMS("Skipping VBT init due to disabled display.\n");
 		return;
 	}
@@ -1852,15 +2047,11 @@ void intel_bios_init(struct drm_i915_private *dev_priv)
 
 	/* If the OpRegion does not have VBT, look in PCI ROM. */
 	if (!vbt) {
-		size_t size;
-
-		bios = pci_map_rom(pdev, &size);
-		if (!bios)
+		oprom_vbt = oprom_get_vbt(dev_priv);
+		if (!oprom_vbt)
 			goto out;
 
-		vbt = find_vbt(bios, size);
-		if (!vbt)
-			goto out;
+		vbt = oprom_vbt;
 
 		DRM_DEBUG_KMS("Found valid VBT in PCI ROM\n");
 	}
@@ -1873,15 +2064,20 @@ void intel_bios_init(struct drm_i915_private *dev_priv)
 	/* Grab useful general definitions */
 	parse_general_features(dev_priv, bdb);
 	parse_general_definitions(dev_priv, bdb);
-	parse_lfp_panel_data(dev_priv, bdb);
+	parse_panel_options(dev_priv, bdb);
+	parse_panel_dtd(dev_priv, bdb);
 	parse_lfp_backlight(dev_priv, bdb);
 	parse_sdvo_panel_data(dev_priv, bdb);
 	parse_driver_features(dev_priv, bdb);
+	parse_power_conservation_features(dev_priv, bdb);
 	parse_edp(dev_priv, bdb);
 	parse_psr(dev_priv, bdb);
 	parse_mipi_config(dev_priv, bdb);
 	parse_mipi_sequence(dev_priv, bdb);
 
+	/* Depends on child device list */
+	parse_compression_parameters(dev_priv, bdb);
+
 	/* Further processing on pre-parsed data */
 	parse_sdvo_device_mapping(dev_priv, bdb->version);
 	parse_ddi_ports(dev_priv, bdb->version);
@@ -1892,8 +2088,7 @@ out:
 		init_vbt_missing_defaults(dev_priv);
 	}
 
-	if (bios)
-		pci_unmap_rom(pdev, bios);
+	kfree(oprom_vbt);
 }
 
 /**
@@ -1902,9 +2097,14 @@ out:
  */
 void intel_bios_driver_remove(struct drm_i915_private *dev_priv)
 {
-	kfree(dev_priv->vbt.child_dev);
-	dev_priv->vbt.child_dev = NULL;
-	dev_priv->vbt.child_dev_num = 0;
+	struct display_device_data *devdata, *n;
+
+	list_for_each_entry_safe(devdata, n, &dev_priv->vbt.display_devices, node) {
+		list_del(&devdata->node);
+		kfree(devdata->dsc);
+		kfree(devdata);
+	}
+
 	kfree(dev_priv->vbt.sdvo_lvds_vbt_mode);
 	dev_priv->vbt.sdvo_lvds_vbt_mode = NULL;
 	kfree(dev_priv->vbt.lfp_lvds_vbt_mode);
@@ -1928,17 +2128,18 @@ void intel_bios_driver_remove(struct drm_i915_private *dev_priv)
  */
 bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv)
 {
+	const struct display_device_data *devdata;
 	const struct child_device_config *child;
-	int i;
 
 	if (!dev_priv->vbt.int_tv_support)
 		return false;
 
-	if (!dev_priv->vbt.child_dev_num)
+	if (list_empty(&dev_priv->vbt.display_devices))
 		return true;
 
-	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
-		child = dev_priv->vbt.child_dev + i;
+	list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) {
+		child = &devdata->child;
+
 		/*
 		 * If the device type is not TV, continue.
 		 */
@@ -1970,14 +2171,14 @@ bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv)
  */
 bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin)
 {
+	const struct display_device_data *devdata;
 	const struct child_device_config *child;
-	int i;
 
-	if (!dev_priv->vbt.child_dev_num)
+	if (list_empty(&dev_priv->vbt.display_devices))
 		return true;
 
-	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
-		child = dev_priv->vbt.child_dev + i;
+	list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) {
+		child = &devdata->child;
 
 		/* If the device type is not LFP, continue.
 		 * We have to check both the new identifiers as well as the
@@ -2019,6 +2220,7 @@ bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin)
  */
 bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port)
 {
+	const struct display_device_data *devdata;
 	const struct child_device_config *child;
 	static const struct {
 		u16 dp, hdmi;
@@ -2029,7 +2231,6 @@ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port por
 		[PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, },
 		[PORT_F] = { DVO_PORT_DPF, DVO_PORT_HDMIF, },
 	};
-	int i;
 
 	if (HAS_DDI(dev_priv)) {
 		const struct ddi_vbt_port_info *port_info =
@@ -2044,11 +2245,8 @@ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port por
 	if (WARN_ON(port == PORT_A) || port >= ARRAY_SIZE(port_mapping))
 		return false;
 
-	if (!dev_priv->vbt.child_dev_num)
-		return false;
-
-	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
-		child = dev_priv->vbt.child_dev + i;
+	list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) {
+		child = &devdata->child;
 
 		if ((child->dvo_port == port_mapping[port].dp ||
 		     child->dvo_port == port_mapping[port].hdmi) &&
@@ -2069,6 +2267,7 @@ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port por
  */
 bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port)
 {
+	const struct display_device_data *devdata;
 	const struct child_device_config *child;
 	static const short port_mapping[] = {
 		[PORT_B] = DVO_PORT_DPB,
@@ -2077,16 +2276,12 @@ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port)
 		[PORT_E] = DVO_PORT_DPE,
 		[PORT_F] = DVO_PORT_DPF,
 	};
-	int i;
 
 	if (HAS_DDI(dev_priv))
 		return dev_priv->vbt.ddi_port_info[port].supports_edp;
 
-	if (!dev_priv->vbt.child_dev_num)
-		return false;
-
-	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
-		child = dev_priv->vbt.child_dev + i;
+	list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) {
+		child = &devdata->child;
 
 		if (child->dvo_port == port_mapping[port] &&
 		    (child->device_type & DEVICE_TYPE_eDP_BITS) ==
@@ -2135,13 +2330,10 @@ static bool child_dev_is_dp_dual_mode(const struct child_device_config *child,
 bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv,
 				     enum port port)
 {
-	const struct child_device_config *child;
-	int i;
-
-	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
-		child = dev_priv->vbt.child_dev + i;
+	const struct display_device_data *devdata;
 
-		if (child_dev_is_dp_dual_mode(child, port))
+	list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) {
+		if (child_dev_is_dp_dual_mode(&devdata->child, port))
 			return true;
 	}
 
@@ -2158,12 +2350,12 @@ bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv,
 bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv,
 			       enum port *port)
 {
+	const struct display_device_data *devdata;
 	const struct child_device_config *child;
 	u8 dvo_port;
-	int i;
 
-	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
-		child = dev_priv->vbt.child_dev + i;
+	list_for_each_entry(devdata, &dev_priv->vbt.display_devices, node) {
+		child = &devdata->child;
 
 		if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT))
 			continue;
@@ -2187,6 +2379,104 @@ bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv,
 	return false;
 }
 
+static void fill_dsc(struct intel_crtc_state *crtc_state,
+		     struct dsc_compression_parameters_entry *dsc,
+		     int dsc_max_bpc)
+{
+	struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
+	int bpc = 8;
+
+	vdsc_cfg->dsc_version_major = dsc->version_major;
+	vdsc_cfg->dsc_version_minor = dsc->version_minor;
+
+	if (dsc->support_12bpc && dsc_max_bpc >= 12)
+		bpc = 12;
+	else if (dsc->support_10bpc && dsc_max_bpc >= 10)
+		bpc = 10;
+	else if (dsc->support_8bpc && dsc_max_bpc >= 8)
+		bpc = 8;
+	else
+		DRM_DEBUG_KMS("VBT: Unsupported BPC %d for DCS\n",
+			      dsc_max_bpc);
+
+	crtc_state->pipe_bpp = bpc * 3;
+
+	crtc_state->dsc.compressed_bpp = min(crtc_state->pipe_bpp,
+					     VBT_DSC_MAX_BPP(dsc->max_bpp));
+
+	/*
+	 * FIXME: This is ugly, and slice count should take DSC engine
+	 * throughput etc. into account.
+	 *
+	 * Also, per spec DSI supports 1, 2, 3 or 4 horizontal slices.
+	 */
+	if (dsc->slices_per_line & BIT(2)) {
+		crtc_state->dsc.slice_count = 4;
+	} else if (dsc->slices_per_line & BIT(1)) {
+		crtc_state->dsc.slice_count = 2;
+	} else {
+		/* FIXME */
+		if (!(dsc->slices_per_line & BIT(0)))
+			DRM_DEBUG_KMS("VBT: Unsupported DSC slice count for DSI\n");
+
+		crtc_state->dsc.slice_count = 1;
+	}
+
+	if (crtc_state->hw.adjusted_mode.crtc_hdisplay %
+	    crtc_state->dsc.slice_count != 0)
+		DRM_DEBUG_KMS("VBT: DSC hdisplay %d not divisible by slice count %d\n",
+			      crtc_state->hw.adjusted_mode.crtc_hdisplay,
+			      crtc_state->dsc.slice_count);
+
+	/*
+	 * FIXME: Use VBT rc_buffer_block_size and rc_buffer_size for the
+	 * implementation specific physical rate buffer size. Currently we use
+	 * the required rate buffer model size calculated in
+	 * drm_dsc_compute_rc_parameters() according to VESA DSC Annex E.
+	 *
+	 * The VBT rc_buffer_block_size and rc_buffer_size definitions
+	 * correspond to DP 1.4 DPCD offsets 0x62 and 0x63. The DP DSC
+	 * implementation should also use the DPCD (or perhaps VBT for eDP)
+	 * provided value for the buffer size.
+	 */
+
+	/* FIXME: DSI spec says bpc + 1 for this one */
+	vdsc_cfg->line_buf_depth = VBT_DSC_LINE_BUFFER_DEPTH(dsc->line_buffer_depth);
+
+	vdsc_cfg->block_pred_enable = dsc->block_prediction_enable;
+
+	vdsc_cfg->slice_height = dsc->slice_height;
+}
+
+/* FIXME: initially DSI specific */
+bool intel_bios_get_dsc_params(struct intel_encoder *encoder,
+			       struct intel_crtc_state *crtc_state,
+			       int dsc_max_bpc)
+{
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	const struct display_device_data *devdata;
+	const struct child_device_config *child;
+
+	list_for_each_entry(devdata, &i915->vbt.display_devices, node) {
+		child = &devdata->child;
+
+		if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT))
+			continue;
+
+		if (child->dvo_port - DVO_PORT_MIPIA == encoder->port) {
+			if (!devdata->dsc)
+				return false;
+
+			if (crtc_state)
+				fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc);
+
+			return true;
+		}
+	}
+
+	return false;
+}
+
 /**
  * intel_bios_is_port_hpd_inverted - is HPD inverted for %port
  * @i915:	i915 device instance
@@ -2258,6 +2548,9 @@ enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv,
 	case DP_AUX_F:
 		aux_ch = AUX_CH_F;
 		break;
+	case DP_AUX_G:
+		aux_ch = AUX_CH_G;
+		break;
 	default:
 		MISSING_CASE(info->alternate_aux_channel);
 		aux_ch = AUX_CH_A;
diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h
index 4969189e620f..d6a0c29d37ac 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.h
+++ b/drivers/gpu/drm/i915/display/intel_bios.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2016 Intel Corporation
+ * Copyright © 2016-2019 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -35,6 +35,9 @@
 #include <drm/i915_drm.h>
 
 struct drm_i915_private;
+struct intel_crtc_state;
+struct intel_encoder;
+enum port;
 
 enum intel_backlight_type {
 	INTEL_BACKLIGHT_PMIC,
@@ -241,5 +244,8 @@ bool intel_bios_is_port_hpd_inverted(const struct drm_i915_private *i915,
 bool intel_bios_is_lspcon_present(const struct drm_i915_private *i915,
 				  enum port port);
 enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, enum port port);
+bool intel_bios_get_dsc_params(struct intel_encoder *encoder,
+			       struct intel_crtc_state *crtc_state,
+			       int dsc_max_bpc);
 
 #endif /* _INTEL_BIOS_H_ */
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index 688858ebe4d0..b228671d5a5d 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -15,7 +15,7 @@ struct intel_qgv_point {
 };
 
 struct intel_qgv_info {
-	struct intel_qgv_point points[3];
+	struct intel_qgv_point points[I915_NUM_QGV_POINTS];
 	u8 num_points;
 	u8 num_channels;
 	u8 t_bl;
@@ -35,28 +35,54 @@ static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
 	if (ret)
 		return ret;
 
-	switch (val & 0xf) {
-	case 0:
-		qi->dram_type = INTEL_DRAM_DDR4;
-		break;
-	case 1:
-		qi->dram_type = INTEL_DRAM_DDR3;
-		break;
-	case 2:
-		qi->dram_type = INTEL_DRAM_LPDDR3;
-		break;
-	case 3:
-		qi->dram_type = INTEL_DRAM_LPDDR3;
-		break;
-	default:
-		MISSING_CASE(val & 0xf);
-		break;
+	if (IS_GEN(dev_priv, 12)) {
+		switch (val & 0xf) {
+		case 0:
+			qi->dram_type = INTEL_DRAM_DDR4;
+			break;
+		case 3:
+			qi->dram_type = INTEL_DRAM_LPDDR4;
+			break;
+		case 4:
+			qi->dram_type = INTEL_DRAM_DDR3;
+			break;
+		case 5:
+			qi->dram_type = INTEL_DRAM_LPDDR3;
+			break;
+		default:
+			MISSING_CASE(val & 0xf);
+			break;
+		}
+	} else if (IS_GEN(dev_priv, 11)) {
+		switch (val & 0xf) {
+		case 0:
+			qi->dram_type = INTEL_DRAM_DDR4;
+			break;
+		case 1:
+			qi->dram_type = INTEL_DRAM_DDR3;
+			break;
+		case 2:
+			qi->dram_type = INTEL_DRAM_LPDDR3;
+			break;
+		case 3:
+			qi->dram_type = INTEL_DRAM_LPDDR4;
+			break;
+		default:
+			MISSING_CASE(val & 0xf);
+			break;
+		}
+	} else {
+		MISSING_CASE(INTEL_GEN(dev_priv));
+		qi->dram_type = INTEL_DRAM_LPDDR3; /* Conservative default */
 	}
 
 	qi->num_channels = (val & 0xf0) >> 4;
 	qi->num_points = (val & 0xf00) >> 8;
 
-	qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
+	if (IS_GEN(dev_priv, 12))
+		qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 16;
+	else if (IS_GEN(dev_priv, 11))
+		qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
 
 	return 0;
 }
@@ -132,20 +158,25 @@ static int icl_sagv_max_dclk(const struct intel_qgv_info *qi)
 }
 
 struct intel_sa_info {
-	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
+	u16 displayrtids;
+	u8 deburst, deprogbwlimit;
 };
 
 static const struct intel_sa_info icl_sa_info = {
 	.deburst = 8,
-	.mpagesize = 16,
 	.deprogbwlimit = 25, /* GB/s */
 	.displayrtids = 128,
 };
 
-static int icl_get_bw_info(struct drm_i915_private *dev_priv)
+static const struct intel_sa_info tgl_sa_info = {
+	.deburst = 16,
+	.deprogbwlimit = 34, /* GB/s */
+	.displayrtids = 256,
+};
+
+static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa)
 {
 	struct intel_qgv_info qi = {};
-	const struct intel_sa_info *sa = &icl_sa_info;
 	bool is_y_tile = true; /* assume y tile may be used */
 	int num_channels;
 	int deinterleave;
@@ -233,24 +264,41 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
 
 void intel_bw_init_hw(struct drm_i915_private *dev_priv)
 {
-	if (IS_GEN(dev_priv, 11))
-		icl_get_bw_info(dev_priv);
+	if (!HAS_DISPLAY(dev_priv))
+		return;
+
+	if (IS_GEN(dev_priv, 12))
+		icl_get_bw_info(dev_priv, &tgl_sa_info);
+	else if (IS_GEN(dev_priv, 11))
+		icl_get_bw_info(dev_priv, &icl_sa_info);
 }
 
 static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
 					int num_planes)
 {
-	if (IS_GEN(dev_priv, 11))
+	if (INTEL_GEN(dev_priv) >= 11) {
+		/*
+		 * Any bw group has same amount of QGV points
+		 */
+		const struct intel_bw_info *bi =
+			&dev_priv->max_bw[0];
+		unsigned int min_bw = UINT_MAX;
+		int i;
+
 		/*
 		 * FIXME with SAGV disabled maybe we can assume
 		 * point 1 will always be used? Seems to match
 		 * the behaviour observed in the wild.
 		 */
-		return min3(icl_max_bw(dev_priv, num_planes, 0),
-			    icl_max_bw(dev_priv, num_planes, 1),
-			    icl_max_bw(dev_priv, num_planes, 2));
-	else
+		for (i = 0; i < bi->num_qgv_points; i++) {
+			unsigned int bw = icl_max_bw(dev_priv, num_planes, i);
+
+			min_bw = min(bw, min_bw);
+		}
+		return min_bw;
+	} else {
 		return UINT_MAX;
+	}
 }
 
 static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
@@ -264,7 +312,7 @@ static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_stat
 
 static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	unsigned int data_rate = 0;
 	enum plane_id plane_id;
 
@@ -285,7 +333,7 @@ static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_
 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
 			  const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 
 	bw_state->data_rate[crtc->pipe] =
 		intel_bw_crtc_data_rate(crtc_state);
@@ -438,3 +486,8 @@ int intel_bw_init(struct drm_i915_private *dev_priv)
 
 	return 0;
 }
+
+void intel_bw_cleanup(struct drm_i915_private *dev_priv)
+{
+	drm_atomic_private_obj_fini(&dev_priv->bw_obj);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
index 9db10af012f4..20b9ad241802 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.h
+++ b/drivers/gpu/drm/i915/display/intel_bw.h
@@ -25,6 +25,7 @@ struct intel_bw_state {
 
 void intel_bw_init_hw(struct drm_i915_private *dev_priv);
 int intel_bw_init(struct drm_i915_private *dev_priv);
+void intel_bw_cleanup(struct drm_i915_private *dev_priv);
 int intel_bw_atomic_check(struct intel_atomic_state *state);
 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
 			  const struct intel_crtc_state *crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index d0bc42e5039c..0ce5926006ca 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -21,6 +21,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include "intel_atomic.h"
 #include "intel_cdclk.h"
 #include "intel_display_types.h"
 #include "intel_sideband.h"
@@ -1161,28 +1162,88 @@ static void skl_uninit_cdclk(struct drm_i915_private *dev_priv)
 	skl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
 }
 
-static int bxt_calc_cdclk(int min_cdclk)
-{
-	if (min_cdclk > 576000)
-		return 624000;
-	else if (min_cdclk > 384000)
-		return 576000;
-	else if (min_cdclk > 288000)
-		return 384000;
-	else if (min_cdclk > 144000)
-		return 288000;
-	else
-		return 144000;
+static const struct intel_cdclk_vals bxt_cdclk_table[] = {
+	{ .refclk = 19200, .cdclk = 144000, .divider = 8, .ratio = 60 },
+	{ .refclk = 19200, .cdclk = 288000, .divider = 4, .ratio = 60 },
+	{ .refclk = 19200, .cdclk = 384000, .divider = 3, .ratio = 60 },
+	{ .refclk = 19200, .cdclk = 576000, .divider = 2, .ratio = 60 },
+	{ .refclk = 19200, .cdclk = 624000, .divider = 2, .ratio = 65 },
+	{}
+};
+
+static const struct intel_cdclk_vals glk_cdclk_table[] = {
+	{ .refclk = 19200, .cdclk =  79200, .divider = 8, .ratio = 33 },
+	{ .refclk = 19200, .cdclk = 158400, .divider = 4, .ratio = 33 },
+	{ .refclk = 19200, .cdclk = 316800, .divider = 2, .ratio = 33 },
+	{}
+};
+
+static const struct intel_cdclk_vals cnl_cdclk_table[] = {
+	{ .refclk = 19200, .cdclk = 168000, .divider = 4, .ratio = 35 },
+	{ .refclk = 19200, .cdclk = 336000, .divider = 2, .ratio = 35 },
+	{ .refclk = 19200, .cdclk = 528000, .divider = 2, .ratio = 55 },
+
+	{ .refclk = 24000, .cdclk = 168000, .divider = 4, .ratio = 28 },
+	{ .refclk = 24000, .cdclk = 336000, .divider = 2, .ratio = 28 },
+	{ .refclk = 24000, .cdclk = 528000, .divider = 2, .ratio = 44 },
+	{}
+};
+
+static const struct intel_cdclk_vals icl_cdclk_table[] = {
+	{ .refclk = 19200, .cdclk = 172800, .divider = 2, .ratio = 18 },
+	{ .refclk = 19200, .cdclk = 192000, .divider = 2, .ratio = 20 },
+	{ .refclk = 19200, .cdclk = 307200, .divider = 2, .ratio = 32 },
+	{ .refclk = 19200, .cdclk = 326400, .divider = 4, .ratio = 68 },
+	{ .refclk = 19200, .cdclk = 556800, .divider = 2, .ratio = 58 },
+	{ .refclk = 19200, .cdclk = 652800, .divider = 2, .ratio = 68 },
+
+	{ .refclk = 24000, .cdclk = 180000, .divider = 2, .ratio = 15 },
+	{ .refclk = 24000, .cdclk = 192000, .divider = 2, .ratio = 16 },
+	{ .refclk = 24000, .cdclk = 312000, .divider = 2, .ratio = 26 },
+	{ .refclk = 24000, .cdclk = 324000, .divider = 4, .ratio = 54 },
+	{ .refclk = 24000, .cdclk = 552000, .divider = 2, .ratio = 46 },
+	{ .refclk = 24000, .cdclk = 648000, .divider = 2, .ratio = 54 },
+
+	{ .refclk = 38400, .cdclk = 172800, .divider = 2, .ratio =  9 },
+	{ .refclk = 38400, .cdclk = 192000, .divider = 2, .ratio = 10 },
+	{ .refclk = 38400, .cdclk = 307200, .divider = 2, .ratio = 16 },
+	{ .refclk = 38400, .cdclk = 326400, .divider = 4, .ratio = 34 },
+	{ .refclk = 38400, .cdclk = 556800, .divider = 2, .ratio = 29 },
+	{ .refclk = 38400, .cdclk = 652800, .divider = 2, .ratio = 34 },
+	{}
+};
+
+static int bxt_calc_cdclk(struct drm_i915_private *dev_priv, int min_cdclk)
+{
+	const struct intel_cdclk_vals *table = dev_priv->cdclk.table;
+	int i;
+
+	for (i = 0; table[i].refclk; i++)
+		if (table[i].refclk == dev_priv->cdclk.hw.ref &&
+		    table[i].cdclk >= min_cdclk)
+			return table[i].cdclk;
+
+	WARN(1, "Cannot satisfy minimum cdclk %d with refclk %u\n",
+	     min_cdclk, dev_priv->cdclk.hw.ref);
+	return 0;
 }
 
-static int glk_calc_cdclk(int min_cdclk)
+static int bxt_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk)
 {
-	if (min_cdclk > 158400)
-		return 316800;
-	else if (min_cdclk > 79200)
-		return 158400;
-	else
-		return 79200;
+	const struct intel_cdclk_vals *table = dev_priv->cdclk.table;
+	int i;
+
+	if (cdclk == dev_priv->cdclk.hw.bypass)
+		return 0;
+
+	for (i = 0; table[i].refclk; i++)
+		if (table[i].refclk == dev_priv->cdclk.hw.ref &&
+		    table[i].cdclk == cdclk)
+			return dev_priv->cdclk.hw.ref * table[i].ratio;
+
+	WARN(1, "cdclk %d not valid for refclk %u\n",
+	     cdclk, dev_priv->cdclk.hw.ref);
+	return 0;
 }
 
 static u8 bxt_calc_voltage_level(int cdclk)
@@ -1190,69 +1251,101 @@ static u8 bxt_calc_voltage_level(int cdclk)
 	return DIV_ROUND_UP(cdclk, 25000);
 }
 
-static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk)
+static u8 cnl_calc_voltage_level(int cdclk)
 {
-	int ratio;
-
-	if (cdclk == dev_priv->cdclk.hw.bypass)
+	if (cdclk > 336000)
+		return 2;
+	else if (cdclk > 168000)
+		return 1;
+	else
 		return 0;
+}
 
-	switch (cdclk) {
-	default:
-		MISSING_CASE(cdclk);
-		/* fall through */
-	case 144000:
-	case 288000:
-	case 384000:
-	case 576000:
-		ratio = 60;
-		break;
-	case 624000:
-		ratio = 65;
-		break;
-	}
+static u8 icl_calc_voltage_level(int cdclk)
+{
+	if (cdclk > 556800)
+		return 2;
+	else if (cdclk > 312000)
+		return 1;
+	else
+		return 0;
+}
 
-	return dev_priv->cdclk.hw.ref * ratio;
+static u8 ehl_calc_voltage_level(int cdclk)
+{
+	if (cdclk > 326400)
+		return 3;
+	else if (cdclk > 312000)
+		return 2;
+	else if (cdclk > 180000)
+		return 1;
+	else
+		return 0;
 }
 
-static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk)
+static void cnl_readout_refclk(struct drm_i915_private *dev_priv,
+			       struct intel_cdclk_state *cdclk_state)
 {
-	int ratio;
+	if (I915_READ(SKL_DSSM) & CNL_DSSM_CDCLK_PLL_REFCLK_24MHz)
+		cdclk_state->ref = 24000;
+	else
+		cdclk_state->ref = 19200;
+}
 
-	if (cdclk == dev_priv->cdclk.hw.bypass)
-		return 0;
+static void icl_readout_refclk(struct drm_i915_private *dev_priv,
+			       struct intel_cdclk_state *cdclk_state)
+{
+	u32 dssm = I915_READ(SKL_DSSM) & ICL_DSSM_CDCLK_PLL_REFCLK_MASK;
 
-	switch (cdclk) {
+	switch (dssm) {
 	default:
-		MISSING_CASE(cdclk);
+		MISSING_CASE(dssm);
 		/* fall through */
-	case  79200:
-	case 158400:
-	case 316800:
-		ratio = 33;
+	case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz:
+		cdclk_state->ref = 24000;
+		break;
+	case ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz:
+		cdclk_state->ref = 19200;
+		break;
+	case ICL_DSSM_CDCLK_PLL_REFCLK_38_4MHz:
+		cdclk_state->ref = 38400;
 		break;
 	}
-
-	return dev_priv->cdclk.hw.ref * ratio;
 }
 
-static void bxt_de_pll_update(struct drm_i915_private *dev_priv,
-			      struct intel_cdclk_state *cdclk_state)
+static void bxt_de_pll_readout(struct drm_i915_private *dev_priv,
+			       struct intel_cdclk_state *cdclk_state)
 {
-	u32 val;
+	u32 val, ratio;
 
-	cdclk_state->ref = 19200;
-	cdclk_state->vco = 0;
+	if (INTEL_GEN(dev_priv) >= 11)
+		icl_readout_refclk(dev_priv, cdclk_state);
+	else if (IS_CANNONLAKE(dev_priv))
+		cnl_readout_refclk(dev_priv, cdclk_state);
+	else
+		cdclk_state->ref = 19200;
 
 	val = I915_READ(BXT_DE_PLL_ENABLE);
-	if ((val & BXT_DE_PLL_PLL_ENABLE) == 0)
+	if ((val & BXT_DE_PLL_PLL_ENABLE) == 0 ||
+	    (val & BXT_DE_PLL_LOCK) == 0) {
+		/*
+		 * CDCLK PLL is disabled, the VCO/ratio doesn't matter, but
+		 * setting it to zero is a way to signal that.
+		 */
+		cdclk_state->vco = 0;
 		return;
+	}
 
-	if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0))
-		return;
+	/*
+	 * CNL+ have the ratio directly in the PLL enable register, gen9lp had
+	 * it in a separate PLL control register.
+	 */
+	if (INTEL_GEN(dev_priv) >= 10)
+		ratio = val & CNL_CDCLK_PLL_RATIO_MASK;
+	else
+		ratio = I915_READ(BXT_DE_PLL_CTL) & BXT_DE_PLL_RATIO_MASK;
 
-	val = I915_READ(BXT_DE_PLL_CTL);
-	cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref;
+	cdclk_state->vco = ratio * cdclk_state->ref;
 }
 
 static void bxt_get_cdclk(struct drm_i915_private *dev_priv,
@@ -1261,12 +1354,19 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv,
 	u32 divider;
 	int div;
 
-	bxt_de_pll_update(dev_priv, cdclk_state);
+	bxt_de_pll_readout(dev_priv, cdclk_state);
 
-	cdclk_state->cdclk = cdclk_state->bypass = cdclk_state->ref;
+	if (INTEL_GEN(dev_priv) >= 12)
+		cdclk_state->bypass = cdclk_state->ref / 2;
+	else if (INTEL_GEN(dev_priv) >= 11)
+		cdclk_state->bypass = 50000;
+	else
+		cdclk_state->bypass = cdclk_state->ref;
 
-	if (cdclk_state->vco == 0)
+	if (cdclk_state->vco == 0) {
+		cdclk_state->cdclk = cdclk_state->bypass;
 		goto out;
+	}
 
 	divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK;
 
@@ -1275,13 +1375,15 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv,
 		div = 2;
 		break;
 	case BXT_CDCLK_CD2X_DIV_SEL_1_5:
-		WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n");
+		WARN(IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10,
+		     "Unsupported divider\n");
 		div = 3;
 		break;
 	case BXT_CDCLK_CD2X_DIV_SEL_2:
 		div = 4;
 		break;
 	case BXT_CDCLK_CD2X_DIV_SEL_4:
+		WARN(INTEL_GEN(dev_priv) >= 10, "Unsupported divider\n");
 		div = 8;
 		break;
 	default:
@@ -1297,7 +1399,7 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv,
 	 * at least what the CDCLK frequency requires.
 	 */
 	cdclk_state->voltage_level =
-		bxt_calc_voltage_level(cdclk_state->cdclk);
+		dev_priv->display.calc_voltage_level(cdclk_state->cdclk);
 }
 
 static void bxt_de_pll_disable(struct drm_i915_private *dev_priv)
@@ -1332,259 +1434,6 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco)
 	dev_priv->cdclk.hw.vco = vco;
 }
 
-static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
-			  const struct intel_cdclk_state *cdclk_state,
-			  enum pipe pipe)
-{
-	int cdclk = cdclk_state->cdclk;
-	int vco = cdclk_state->vco;
-	u32 val, divider;
-	int ret;
-
-	/* cdclk = vco / 2 / div{1,1.5,2,4} */
-	switch (DIV_ROUND_CLOSEST(vco, cdclk)) {
-	default:
-		WARN_ON(cdclk != dev_priv->cdclk.hw.bypass);
-		WARN_ON(vco != 0);
-		/* fall through */
-	case 2:
-		divider = BXT_CDCLK_CD2X_DIV_SEL_1;
-		break;
-	case 3:
-		WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n");
-		divider = BXT_CDCLK_CD2X_DIV_SEL_1_5;
-		break;
-	case 4:
-		divider = BXT_CDCLK_CD2X_DIV_SEL_2;
-		break;
-	case 8:
-		divider = BXT_CDCLK_CD2X_DIV_SEL_4;
-		break;
-	}
-
-	/*
-	 * Inform power controller of upcoming frequency change. BSpec
-	 * requires us to wait up to 150usec, but that leads to timeouts;
-	 * the 2ms used here is based on experiment.
-	 */
-	ret = sandybridge_pcode_write_timeout(dev_priv,
-					      HSW_PCODE_DE_WRITE_FREQ_REQ,
-					      0x80000000, 150, 2);
-	if (ret) {
-		DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n",
-			  ret, cdclk);
-		return;
-	}
-
-	if (dev_priv->cdclk.hw.vco != 0 &&
-	    dev_priv->cdclk.hw.vco != vco)
-		bxt_de_pll_disable(dev_priv);
-
-	if (dev_priv->cdclk.hw.vco != vco)
-		bxt_de_pll_enable(dev_priv, vco);
-
-	val = divider | skl_cdclk_decimal(cdclk);
-	if (pipe == INVALID_PIPE)
-		val |= BXT_CDCLK_CD2X_PIPE_NONE;
-	else
-		val |= BXT_CDCLK_CD2X_PIPE(pipe);
-	/*
-	 * Disable SSA Precharge when CD clock frequency < 500 MHz,
-	 * enable otherwise.
-	 */
-	if (cdclk >= 500000)
-		val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
-	I915_WRITE(CDCLK_CTL, val);
-
-	if (pipe != INVALID_PIPE)
-		intel_wait_for_vblank(dev_priv, pipe);
-
-	/*
-	 * The timeout isn't specified, the 2ms used here is based on
-	 * experiment.
-	 * FIXME: Waiting for the request completion could be delayed until
-	 * the next PCODE request based on BSpec.
-	 */
-	ret = sandybridge_pcode_write_timeout(dev_priv,
-					      HSW_PCODE_DE_WRITE_FREQ_REQ,
-					      cdclk_state->voltage_level, 150, 2);
-	if (ret) {
-		DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n",
-			  ret, cdclk);
-		return;
-	}
-
-	intel_update_cdclk(dev_priv);
-}
-
-static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv)
-{
-	u32 cdctl, expected;
-
-	intel_update_cdclk(dev_priv);
-	intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK");
-
-	if (dev_priv->cdclk.hw.vco == 0 ||
-	    dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass)
-		goto sanitize;
-
-	/* DPLL okay; verify the cdclock
-	 *
-	 * Some BIOS versions leave an incorrect decimal frequency value and
-	 * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4,
-	 * so sanitize this register.
-	 */
-	cdctl = I915_READ(CDCLK_CTL);
-	/*
-	 * Let's ignore the pipe field, since BIOS could have configured the
-	 * dividers both synching to an active pipe, or asynchronously
-	 * (PIPE_NONE).
-	 */
-	cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE;
-
-	expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) |
-		skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk);
-	/*
-	 * Disable SSA Precharge when CD clock frequency < 500 MHz,
-	 * enable otherwise.
-	 */
-	if (dev_priv->cdclk.hw.cdclk >= 500000)
-		expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
-
-	if (cdctl == expected)
-		/* All well; nothing to sanitize */
-		return;
-
-sanitize:
-	DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n");
-
-	/* force cdclk programming */
-	dev_priv->cdclk.hw.cdclk = 0;
-
-	/* force full PLL disable + enable */
-	dev_priv->cdclk.hw.vco = -1;
-}
-
-static void bxt_init_cdclk(struct drm_i915_private *dev_priv)
-{
-	struct intel_cdclk_state cdclk_state;
-
-	bxt_sanitize_cdclk(dev_priv);
-
-	if (dev_priv->cdclk.hw.cdclk != 0 &&
-	    dev_priv->cdclk.hw.vco != 0)
-		return;
-
-	cdclk_state = dev_priv->cdclk.hw;
-
-	/*
-	 * FIXME:
-	 * - The initial CDCLK needs to be read from VBT.
-	 *   Need to make this change after VBT has changes for BXT.
-	 */
-	if (IS_GEMINILAKE(dev_priv)) {
-		cdclk_state.cdclk = glk_calc_cdclk(0);
-		cdclk_state.vco = glk_de_pll_vco(dev_priv, cdclk_state.cdclk);
-	} else {
-		cdclk_state.cdclk = bxt_calc_cdclk(0);
-		cdclk_state.vco = bxt_de_pll_vco(dev_priv, cdclk_state.cdclk);
-	}
-	cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk);
-
-	bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
-}
-
-static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv)
-{
-	struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw;
-
-	cdclk_state.cdclk = cdclk_state.bypass;
-	cdclk_state.vco = 0;
-	cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk);
-
-	bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
-}
-
-static int cnl_calc_cdclk(int min_cdclk)
-{
-	if (min_cdclk > 336000)
-		return 528000;
-	else if (min_cdclk > 168000)
-		return 336000;
-	else
-		return 168000;
-}
-
-static u8 cnl_calc_voltage_level(int cdclk)
-{
-	if (cdclk > 336000)
-		return 2;
-	else if (cdclk > 168000)
-		return 1;
-	else
-		return 0;
-}
-
-static void cnl_cdclk_pll_update(struct drm_i915_private *dev_priv,
-				 struct intel_cdclk_state *cdclk_state)
-{
-	u32 val;
-
-	if (I915_READ(SKL_DSSM) & CNL_DSSM_CDCLK_PLL_REFCLK_24MHz)
-		cdclk_state->ref = 24000;
-	else
-		cdclk_state->ref = 19200;
-
-	cdclk_state->vco = 0;
-
-	val = I915_READ(BXT_DE_PLL_ENABLE);
-	if ((val & BXT_DE_PLL_PLL_ENABLE) == 0)
-		return;
-
-	if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0))
-		return;
-
-	cdclk_state->vco = (val & CNL_CDCLK_PLL_RATIO_MASK) * cdclk_state->ref;
-}
-
-static void cnl_get_cdclk(struct drm_i915_private *dev_priv,
-			 struct intel_cdclk_state *cdclk_state)
-{
-	u32 divider;
-	int div;
-
-	cnl_cdclk_pll_update(dev_priv, cdclk_state);
-
-	cdclk_state->cdclk = cdclk_state->bypass = cdclk_state->ref;
-
-	if (cdclk_state->vco == 0)
-		goto out;
-
-	divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK;
-
-	switch (divider) {
-	case BXT_CDCLK_CD2X_DIV_SEL_1:
-		div = 2;
-		break;
-	case BXT_CDCLK_CD2X_DIV_SEL_2:
-		div = 4;
-		break;
-	default:
-		MISSING_CASE(divider);
-		return;
-	}
-
-	cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div);
-
- out:
-	/*
-	 * Can't read this out :( Let's assume it's
-	 * at least what the CDCLK frequency requires.
-	 */
-	cdclk_state->voltage_level =
-		cnl_calc_voltage_level(cdclk_state->cdclk);
-}
-
 static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv)
 {
 	u32 val;
@@ -1618,7 +1467,27 @@ static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco)
 	dev_priv->cdclk.hw.vco = vco;
 }
 
-static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
+static u32 bxt_cdclk_cd2x_pipe(struct drm_i915_private *dev_priv, enum pipe pipe)
+{
+	if (INTEL_GEN(dev_priv) >= 12) {
+		if (pipe == INVALID_PIPE)
+			return TGL_CDCLK_CD2X_PIPE_NONE;
+		else
+			return TGL_CDCLK_CD2X_PIPE(pipe);
+	} else if (INTEL_GEN(dev_priv) >= 11) {
+		if (pipe == INVALID_PIPE)
+			return ICL_CDCLK_CD2X_PIPE_NONE;
+		else
+			return ICL_CDCLK_CD2X_PIPE(pipe);
+	} else {
+		if (pipe == INVALID_PIPE)
+			return BXT_CDCLK_CD2X_PIPE_NONE;
+		else
+			return BXT_CDCLK_CD2X_PIPE(pipe);
+	}
+}
+
+static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
 			  const struct intel_cdclk_state *cdclk_state,
 			  enum pipe pipe)
 {
@@ -1627,17 +1496,28 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
 	u32 val, divider;
 	int ret;
 
-	ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
-				SKL_CDCLK_PREPARE_FOR_CHANGE,
-				SKL_CDCLK_READY_FOR_CHANGE,
-				SKL_CDCLK_READY_FOR_CHANGE, 3);
+	/* Inform power controller of upcoming frequency change. */
+	if (INTEL_GEN(dev_priv) >= 10)
+		ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
+					SKL_CDCLK_PREPARE_FOR_CHANGE,
+					SKL_CDCLK_READY_FOR_CHANGE,
+					SKL_CDCLK_READY_FOR_CHANGE, 3);
+	else
+		/*
+		 * BSpec requires us to wait up to 150usec, but that leads to
+		 * timeouts; the 2ms used here is based on experiment.
+		 */
+		ret = sandybridge_pcode_write_timeout(dev_priv,
+						      HSW_PCODE_DE_WRITE_FREQ_REQ,
+						      0x80000000, 150, 2);
+
 	if (ret) {
-		DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
-			  ret);
+		DRM_ERROR("Failed to inform PCU about cdclk change (err %d, freq %d)\n",
+			  ret, cdclk);
 		return;
 	}
 
-	/* cdclk = vco / 2 / div{1,2} */
+	/* cdclk = vco / 2 / div{1,1.5,2,4} */
 	switch (DIV_ROUND_CLOSEST(vco, cdclk)) {
 	default:
 		WARN_ON(cdclk != dev_priv->cdclk.hw.bypass);
@@ -1646,67 +1526,87 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
 	case 2:
 		divider = BXT_CDCLK_CD2X_DIV_SEL_1;
 		break;
+	case 3:
+		WARN(IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10,
+		     "Unsupported divider\n");
+		divider = BXT_CDCLK_CD2X_DIV_SEL_1_5;
+		break;
 	case 4:
 		divider = BXT_CDCLK_CD2X_DIV_SEL_2;
 		break;
+	case 8:
+		WARN(INTEL_GEN(dev_priv) >= 10, "Unsupported divider\n");
+		divider = BXT_CDCLK_CD2X_DIV_SEL_4;
+		break;
 	}
 
-	if (dev_priv->cdclk.hw.vco != 0 &&
-	    dev_priv->cdclk.hw.vco != vco)
-		cnl_cdclk_pll_disable(dev_priv);
+	if (INTEL_GEN(dev_priv) >= 10) {
+		if (dev_priv->cdclk.hw.vco != 0 &&
+		    dev_priv->cdclk.hw.vco != vco)
+			cnl_cdclk_pll_disable(dev_priv);
 
-	if (dev_priv->cdclk.hw.vco != vco)
-		cnl_cdclk_pll_enable(dev_priv, vco);
+		if (dev_priv->cdclk.hw.vco != vco)
+			cnl_cdclk_pll_enable(dev_priv, vco);
 
-	val = divider | skl_cdclk_decimal(cdclk);
-	if (pipe == INVALID_PIPE)
-		val |= BXT_CDCLK_CD2X_PIPE_NONE;
-	else
-		val |= BXT_CDCLK_CD2X_PIPE(pipe);
-	I915_WRITE(CDCLK_CTL, val);
+	} else {
+		if (dev_priv->cdclk.hw.vco != 0 &&
+		    dev_priv->cdclk.hw.vco != vco)
+			bxt_de_pll_disable(dev_priv);
 
-	if (pipe != INVALID_PIPE)
-		intel_wait_for_vblank(dev_priv, pipe);
+		if (dev_priv->cdclk.hw.vco != vco)
+			bxt_de_pll_enable(dev_priv, vco);
+	}
 
-	/* inform PCU of the change */
-	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL,
-				cdclk_state->voltage_level);
-
-	intel_update_cdclk(dev_priv);
+	val = divider | skl_cdclk_decimal(cdclk) |
+		bxt_cdclk_cd2x_pipe(dev_priv, pipe);
 
 	/*
-	 * Can't read out the voltage level :(
-	 * Let's just assume everything is as expected.
+	 * Disable SSA Precharge when CD clock frequency < 500 MHz,
+	 * enable otherwise.
 	 */
-	dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level;
-}
+	if (IS_GEN9_LP(dev_priv) && cdclk >= 500000)
+		val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
+	I915_WRITE(CDCLK_CTL, val);
 
-static int cnl_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk)
-{
-	int ratio;
+	if (pipe != INVALID_PIPE)
+		intel_wait_for_vblank(dev_priv, pipe);
 
-	if (cdclk == dev_priv->cdclk.hw.bypass)
-		return 0;
+	if (INTEL_GEN(dev_priv) >= 10) {
+		ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL,
+					      cdclk_state->voltage_level);
+	} else {
+		/*
+		 * The timeout isn't specified, the 2ms used here is based on
+		 * experiment.
+		 * FIXME: Waiting for the request completion could be delayed
+		 * until the next PCODE request based on BSpec.
+		 */
+		ret = sandybridge_pcode_write_timeout(dev_priv,
+						      HSW_PCODE_DE_WRITE_FREQ_REQ,
+						      cdclk_state->voltage_level,
+						      150, 2);
+	}
 
-	switch (cdclk) {
-	default:
-		MISSING_CASE(cdclk);
-		/* fall through */
-	case 168000:
-	case 336000:
-		ratio = dev_priv->cdclk.hw.ref == 19200 ? 35 : 28;
-		break;
-	case 528000:
-		ratio = dev_priv->cdclk.hw.ref == 19200 ? 55 : 44;
-		break;
+	if (ret) {
+		DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n",
+			  ret, cdclk);
+		return;
 	}
 
-	return dev_priv->cdclk.hw.ref * ratio;
+	intel_update_cdclk(dev_priv);
+
+	if (INTEL_GEN(dev_priv) >= 10)
+		/*
+		 * Can't read out the voltage level :(
+		 * Let's just assume everything is as expected.
+		 */
+		dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level;
 }
 
-static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv)
+static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv)
 {
 	u32 cdctl, expected;
+	int cdclk, vco;
 
 	intel_update_cdclk(dev_priv);
 	intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK");
@@ -1727,262 +1627,65 @@ static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv)
 	 * dividers both synching to an active pipe, or asynchronously
 	 * (PIPE_NONE).
 	 */
-	cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE;
-
-	expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) |
-		   skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk);
-
-	if (cdctl == expected)
-		/* All well; nothing to sanitize */
-		return;
+	cdctl &= ~bxt_cdclk_cd2x_pipe(dev_priv, INVALID_PIPE);
 
-sanitize:
-	DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n");
-
-	/* force cdclk programming */
-	dev_priv->cdclk.hw.cdclk = 0;
+	/* Make sure this is a legal cdclk value for the platform */
+	cdclk = bxt_calc_cdclk(dev_priv, dev_priv->cdclk.hw.cdclk);
+	if (cdclk != dev_priv->cdclk.hw.cdclk)
+		goto sanitize;
 
-	/* force full PLL disable + enable */
-	dev_priv->cdclk.hw.vco = -1;
-}
+	/* Make sure the VCO is correct for the cdclk */
+	vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk);
+	if (vco != dev_priv->cdclk.hw.vco)
+		goto sanitize;
 
-static int icl_calc_cdclk(int min_cdclk, unsigned int ref)
-{
-	static const int ranges_24[] = { 180000, 192000, 312000, 552000, 648000 };
-	static const int ranges_19_38[] = { 172800, 192000, 307200, 556800, 652800 };
-	const int *ranges;
-	int len, i;
+	expected = skl_cdclk_decimal(cdclk);
 
-	switch (ref) {
-	default:
-		MISSING_CASE(ref);
-		/* fall through */
-	case 24000:
-		ranges = ranges_24;
-		len = ARRAY_SIZE(ranges_24);
-		break;
-	case 19200:
-	case 38400:
-		ranges = ranges_19_38;
-		len = ARRAY_SIZE(ranges_19_38);
+	/* Figure out what CD2X divider we should be using for this cdclk */
+	switch (DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.vco,
+				  dev_priv->cdclk.hw.cdclk)) {
+	case 2:
+		expected |= BXT_CDCLK_CD2X_DIV_SEL_1;
 		break;
-	}
-
-	for (i = 0; i < len; i++) {
-		if (min_cdclk <= ranges[i])
-			return ranges[i];
-	}
-
-	WARN_ON(min_cdclk > ranges[len - 1]);
-	return ranges[len - 1];
-}
-
-static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk)
-{
-	int ratio;
-
-	if (cdclk == dev_priv->cdclk.hw.bypass)
-		return 0;
-
-	switch (cdclk) {
-	default:
-		MISSING_CASE(cdclk);
-		/* fall through */
-	case 172800:
-	case 307200:
-	case 556800:
-	case 652800:
-		WARN_ON(dev_priv->cdclk.hw.ref != 19200 &&
-			dev_priv->cdclk.hw.ref != 38400);
+	case 3:
+		expected |= BXT_CDCLK_CD2X_DIV_SEL_1_5;
 		break;
-	case 180000:
-	case 312000:
-	case 552000:
-	case 648000:
-		WARN_ON(dev_priv->cdclk.hw.ref != 24000);
+	case 4:
+		expected |= BXT_CDCLK_CD2X_DIV_SEL_2;
 		break;
-	case 192000:
-		WARN_ON(dev_priv->cdclk.hw.ref != 19200 &&
-			dev_priv->cdclk.hw.ref != 38400 &&
-			dev_priv->cdclk.hw.ref != 24000);
+	case 8:
+		expected |= BXT_CDCLK_CD2X_DIV_SEL_4;
 		break;
-	}
-
-	ratio = cdclk / (dev_priv->cdclk.hw.ref / 2);
-
-	return dev_priv->cdclk.hw.ref * ratio;
-}
-
-static void icl_set_cdclk(struct drm_i915_private *dev_priv,
-			  const struct intel_cdclk_state *cdclk_state,
-			  enum pipe pipe)
-{
-	unsigned int cdclk = cdclk_state->cdclk;
-	unsigned int vco = cdclk_state->vco;
-	int ret;
-
-	ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
-				SKL_CDCLK_PREPARE_FOR_CHANGE,
-				SKL_CDCLK_READY_FOR_CHANGE,
-				SKL_CDCLK_READY_FOR_CHANGE, 3);
-	if (ret) {
-		DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
-			  ret);
-		return;
-	}
-
-	if (dev_priv->cdclk.hw.vco != 0 &&
-	    dev_priv->cdclk.hw.vco != vco)
-		cnl_cdclk_pll_disable(dev_priv);
-
-	if (dev_priv->cdclk.hw.vco != vco)
-		cnl_cdclk_pll_enable(dev_priv, vco);
-
-	/*
-	 * On ICL CD2X_DIV can only be 1, so we'll never end up changing the
-	 * divider here synchronized to a pipe while CDCLK is on, nor will we
-	 * need the corresponding vblank wait.
-	 */
-	I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE |
-			      skl_cdclk_decimal(cdclk));
-
-	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL,
-				cdclk_state->voltage_level);
-
-	intel_update_cdclk(dev_priv);
-
-	/*
-	 * Can't read out the voltage level :(
-	 * Let's just assume everything is as expected.
-	 */
-	dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level;
-}
-
-static u8 icl_calc_voltage_level(struct drm_i915_private *dev_priv, int cdclk)
-{
-	if (IS_ELKHARTLAKE(dev_priv)) {
-		if (cdclk > 312000)
-			return 2;
-		else if (cdclk > 180000)
-			return 1;
-		else
-			return 0;
-	} else {
-		if (cdclk > 556800)
-			return 2;
-		else if (cdclk > 312000)
-			return 1;
-		else
-			return 0;
-	}
-}
-
-static void icl_get_cdclk(struct drm_i915_private *dev_priv,
-			  struct intel_cdclk_state *cdclk_state)
-{
-	u32 val;
-
-	cdclk_state->bypass = 50000;
-
-	val = I915_READ(SKL_DSSM);
-	switch (val & ICL_DSSM_CDCLK_PLL_REFCLK_MASK) {
 	default:
-		MISSING_CASE(val);
-		/* fall through */
-	case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz:
-		cdclk_state->ref = 24000;
-		break;
-	case ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz:
-		cdclk_state->ref = 19200;
-		break;
-	case ICL_DSSM_CDCLK_PLL_REFCLK_38_4MHz:
-		cdclk_state->ref = 38400;
-		break;
-	}
-
-	val = I915_READ(BXT_DE_PLL_ENABLE);
-	if ((val & BXT_DE_PLL_PLL_ENABLE) == 0 ||
-	    (val & BXT_DE_PLL_LOCK) == 0) {
-		/*
-		 * CDCLK PLL is disabled, the VCO/ratio doesn't matter, but
-		 * setting it to zero is a way to signal that.
-		 */
-		cdclk_state->vco = 0;
-		cdclk_state->cdclk = cdclk_state->bypass;
-		goto out;
+		goto sanitize;
 	}
 
-	cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref;
-
-	val = I915_READ(CDCLK_CTL);
-	WARN_ON((val & BXT_CDCLK_CD2X_DIV_SEL_MASK) != 0);
-
-	cdclk_state->cdclk = cdclk_state->vco / 2;
-
-out:
 	/*
-	 * Can't read this out :( Let's assume it's
-	 * at least what the CDCLK frequency requires.
+	 * Disable SSA Precharge when CD clock frequency < 500 MHz,
+	 * enable otherwise.
 	 */
-	cdclk_state->voltage_level =
-		icl_calc_voltage_level(dev_priv, cdclk_state->cdclk);
-}
-
-static void icl_init_cdclk(struct drm_i915_private *dev_priv)
-{
-	struct intel_cdclk_state sanitized_state;
-	u32 val;
-
-	/* This sets dev_priv->cdclk.hw. */
-	intel_update_cdclk(dev_priv);
-	intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK");
-
-	/* This means CDCLK disabled. */
-	if (dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass)
-		goto sanitize;
-
-	val = I915_READ(CDCLK_CTL);
-
-	if ((val & BXT_CDCLK_CD2X_DIV_SEL_MASK) != 0)
-		goto sanitize;
-
-	if ((val & CDCLK_FREQ_DECIMAL_MASK) !=
-	    skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk))
-		goto sanitize;
+	if (IS_GEN9_LP(dev_priv) && dev_priv->cdclk.hw.cdclk >= 500000)
+		expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
 
-	return;
+	if (cdctl == expected)
+		/* All well; nothing to sanitize */
+		return;
 
 sanitize:
 	DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n");
 
-	sanitized_state.ref = dev_priv->cdclk.hw.ref;
-	sanitized_state.cdclk = icl_calc_cdclk(0, sanitized_state.ref);
-	sanitized_state.vco = icl_calc_cdclk_pll_vco(dev_priv,
-						     sanitized_state.cdclk);
-	sanitized_state.voltage_level =
-				icl_calc_voltage_level(dev_priv,
-						       sanitized_state.cdclk);
-
-	icl_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE);
-}
-
-static void icl_uninit_cdclk(struct drm_i915_private *dev_priv)
-{
-	struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw;
-
-	cdclk_state.cdclk = cdclk_state.bypass;
-	cdclk_state.vco = 0;
-	cdclk_state.voltage_level = icl_calc_voltage_level(dev_priv,
-							   cdclk_state.cdclk);
+	/* force cdclk programming */
+	dev_priv->cdclk.hw.cdclk = 0;
 
-	icl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
+	/* force full PLL disable + enable */
+	dev_priv->cdclk.hw.vco = -1;
 }
 
-static void cnl_init_cdclk(struct drm_i915_private *dev_priv)
+static void bxt_init_cdclk(struct drm_i915_private *dev_priv)
 {
 	struct intel_cdclk_state cdclk_state;
 
-	cnl_sanitize_cdclk(dev_priv);
+	bxt_sanitize_cdclk(dev_priv);
 
 	if (dev_priv->cdclk.hw.cdclk != 0 &&
 	    dev_priv->cdclk.hw.vco != 0)
@@ -1990,22 +1693,29 @@ static void cnl_init_cdclk(struct drm_i915_private *dev_priv)
 
 	cdclk_state = dev_priv->cdclk.hw;
 
-	cdclk_state.cdclk = cnl_calc_cdclk(0);
-	cdclk_state.vco = cnl_cdclk_pll_vco(dev_priv, cdclk_state.cdclk);
-	cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk);
+	/*
+	 * FIXME:
+	 * - The initial CDCLK needs to be read from VBT.
+	 *   Need to make this change after VBT has changes for BXT.
+	 */
+	cdclk_state.cdclk = bxt_calc_cdclk(dev_priv, 0);
+	cdclk_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk_state.cdclk);
+	cdclk_state.voltage_level =
+		dev_priv->display.calc_voltage_level(cdclk_state.cdclk);
 
-	cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
+	bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
 }
 
-static void cnl_uninit_cdclk(struct drm_i915_private *dev_priv)
+static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv)
 {
 	struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw;
 
 	cdclk_state.cdclk = cdclk_state.bypass;
 	cdclk_state.vco = 0;
-	cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk);
+	cdclk_state.voltage_level =
+		dev_priv->display.calc_voltage_level(cdclk_state.cdclk);
 
-	cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
+	bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
 }
 
 /**
@@ -2019,14 +1729,10 @@ static void cnl_uninit_cdclk(struct drm_i915_private *dev_priv)
  */
 void intel_cdclk_init(struct drm_i915_private *i915)
 {
-	if (INTEL_GEN(i915) >= 11)
-		icl_init_cdclk(i915);
-	else if (IS_CANNONLAKE(i915))
-		cnl_init_cdclk(i915);
+	if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10)
+		bxt_init_cdclk(i915);
 	else if (IS_GEN9_BC(i915))
 		skl_init_cdclk(i915);
-	else if (IS_GEN9_LP(i915))
-		bxt_init_cdclk(i915);
 }
 
 /**
@@ -2038,14 +1744,10 @@ void intel_cdclk_init(struct drm_i915_private *i915)
  */
 void intel_cdclk_uninit(struct drm_i915_private *i915)
 {
-	if (INTEL_GEN(i915) >= 11)
-		icl_uninit_cdclk(i915);
-	else if (IS_CANNONLAKE(i915))
-		cnl_uninit_cdclk(i915);
+	if (INTEL_GEN(i915) >= 10 || IS_GEN9_LP(i915))
+		bxt_uninit_cdclk(i915);
 	else if (IS_GEN9_BC(i915))
 		skl_uninit_cdclk(i915);
-	else if (IS_GEN9_LP(i915))
-		bxt_uninit_cdclk(i915);
 }
 
 /**
@@ -2073,9 +1775,9 @@ bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a,
  * Returns:
  * True if the CDCLK states require just a cd2x divider update, false if not.
  */
-bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv,
-				   const struct intel_cdclk_state *a,
-				   const struct intel_cdclk_state *b)
+static bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv,
+					  const struct intel_cdclk_state *a,
+					  const struct intel_cdclk_state *b)
 {
 	/* Older hw doesn't have the capability */
 	if (INTEL_GEN(dev_priv) < 10 && !IS_GEN9_LP(dev_priv))
@@ -2094,8 +1796,8 @@ bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv,
  * Returns:
  * True if the CDCLK states don't match, false if they do.
  */
-bool intel_cdclk_changed(const struct intel_cdclk_state *a,
-			 const struct intel_cdclk_state *b)
+static bool intel_cdclk_changed(const struct intel_cdclk_state *a,
+				const struct intel_cdclk_state *b)
 {
 	return intel_cdclk_needs_modeset(a, b) ||
 		a->voltage_level != b->voltage_level;
@@ -2200,9 +1902,11 @@ intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv,
 		intel_set_cdclk(dev_priv, new_state, pipe);
 }
 
-static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv,
-				     int pixel_rate)
+static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state)
 {
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	int pixel_rate = crtc_state->pixel_rate;
+
 	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
 		return DIV_ROUND_UP(pixel_rate, 2);
 	else if (IS_GEN(dev_priv, 9) ||
@@ -2210,20 +1914,35 @@ static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv,
 		return pixel_rate;
 	else if (IS_CHERRYVIEW(dev_priv))
 		return DIV_ROUND_UP(pixel_rate * 100, 95);
+	else if (crtc_state->double_wide)
+		return DIV_ROUND_UP(pixel_rate * 100, 90 * 2);
 	else
 		return DIV_ROUND_UP(pixel_rate * 100, 90);
 }
 
+static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_plane *plane;
+	int min_cdclk = 0;
+
+	for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane)
+		min_cdclk = max(crtc_state->min_cdclk[plane->id], min_cdclk);
+
+	return min_cdclk;
+}
+
 int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(crtc_state->base.crtc->dev);
+		to_i915(crtc_state->uapi.crtc->dev);
 	int min_cdclk;
 
-	if (!crtc_state->base.enable)
+	if (!crtc_state->hw.enable)
 		return 0;
 
-	min_cdclk = intel_pixel_rate_to_cdclk(dev_priv, crtc_state->pixel_rate);
+	min_cdclk = intel_pixel_rate_to_cdclk(crtc_state);
 
 	/* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
 	if (IS_BROADWELL(dev_priv) && hsw_crtc_state_ips_capable(crtc_state))
@@ -2282,6 +2001,21 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
 	    IS_GEMINILAKE(dev_priv))
 		min_cdclk = max(158400, min_cdclk);
 
+	/* Account for additional needs from the planes */
+	min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk);
+
+	/*
+	 * HACK. Currently for TGL platforms we calculate
+	 * min_cdclk initially based on pixel_rate divided
+	 * by 2, accounting for also plane requirements,
+	 * however in some cases the lowest possible CDCLK
+	 * doesn't work and causing the underruns.
+	 * Explicitly stating here that this seems to be currently
+	 * rather a Hack, than final solution.
+	 */
+	if (IS_TIGERLAKE(dev_priv))
+		min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
+
 	if (min_cdclk > dev_priv->max_cdclk_freq) {
 		DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n",
 			      min_cdclk, dev_priv->max_cdclk_freq);
@@ -2303,11 +2037,20 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state)
 	       sizeof(state->min_cdclk));
 
 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
+		int ret;
+
 		min_cdclk = intel_crtc_compute_min_cdclk(crtc_state);
 		if (min_cdclk < 0)
 			return min_cdclk;
 
+		if (state->min_cdclk[i] == min_cdclk)
+			continue;
+
 		state->min_cdclk[i] = min_cdclk;
+
+		ret = intel_atomic_lock_global_state(state);
+		if (ret)
+			return ret;
 	}
 
 	min_cdclk = state->cdclk.force_min_cdclk;
@@ -2318,6 +2061,10 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state)
 }
 
 /*
+ * Account for port clock min voltage level requirements.
+ * This only really does something on CNL+ but can be
+ * called on earlier platforms as well.
+ *
  * Note that this functions assumes that 0 is
  * the lowest voltage value, and higher values
  * correspond to increasingly higher voltages.
@@ -2326,7 +2073,7 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state)
  * future platforms this code will need to be
  * adjusted.
  */
-static u8 cnl_compute_min_voltage_level(struct intel_atomic_state *state)
+static int bxt_compute_min_voltage_level(struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	struct intel_crtc *crtc;
@@ -2339,11 +2086,21 @@ static u8 cnl_compute_min_voltage_level(struct intel_atomic_state *state)
 	       sizeof(state->min_voltage_level));
 
 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
-		if (crtc_state->base.enable)
-			state->min_voltage_level[i] =
-				crtc_state->min_voltage_level;
+		int ret;
+
+		if (crtc_state->hw.enable)
+			min_voltage_level = crtc_state->min_voltage_level;
 		else
-			state->min_voltage_level[i] = 0;
+			min_voltage_level = 0;
+
+		if (state->min_voltage_level[i] == min_voltage_level)
+			continue;
+
+		state->min_voltage_level[i] = min_voltage_level;
+
+		ret = intel_atomic_lock_global_state(state);
+		if (ret)
+			return ret;
 	}
 
 	min_voltage_level = 0;
@@ -2369,7 +2126,7 @@ static int vlv_modeset_calc_cdclk(struct intel_atomic_state *state)
 	state->cdclk.logical.voltage_level =
 		vlv_calc_voltage_level(dev_priv, cdclk);
 
-	if (!state->active_crtcs) {
+	if (!state->active_pipes) {
 		cdclk = vlv_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk);
 
 		state->cdclk.actual.cdclk = cdclk;
@@ -2400,7 +2157,7 @@ static int bdw_modeset_calc_cdclk(struct intel_atomic_state *state)
 	state->cdclk.logical.voltage_level =
 		bdw_calc_voltage_level(cdclk);
 
-	if (!state->active_crtcs) {
+	if (!state->active_pipes) {
 		cdclk = bdw_calc_cdclk(state->cdclk.force_min_cdclk);
 
 		state->cdclk.actual.cdclk = cdclk;
@@ -2425,7 +2182,7 @@ static int skl_dpll0_vco(struct intel_atomic_state *state)
 		vco = dev_priv->skl_preferred_vco_freq;
 
 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
-		if (!crtc_state->base.enable)
+		if (!crtc_state->hw.enable)
 			continue;
 
 		if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP))
@@ -2470,7 +2227,7 @@ static int skl_modeset_calc_cdclk(struct intel_atomic_state *state)
 	state->cdclk.logical.voltage_level =
 		skl_calc_voltage_level(cdclk);
 
-	if (!state->active_crtcs) {
+	if (!state->active_pipes) {
 		cdclk = skl_calc_cdclk(state->cdclk.force_min_cdclk, vco);
 
 		state->cdclk.actual.vco = vco;
@@ -2487,38 +2244,33 @@ static int skl_modeset_calc_cdclk(struct intel_atomic_state *state)
 static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-	int min_cdclk, cdclk, vco;
+	int min_cdclk, min_voltage_level, cdclk, vco;
 
 	min_cdclk = intel_compute_min_cdclk(state);
 	if (min_cdclk < 0)
 		return min_cdclk;
 
-	if (IS_GEMINILAKE(dev_priv)) {
-		cdclk = glk_calc_cdclk(min_cdclk);
-		vco = glk_de_pll_vco(dev_priv, cdclk);
-	} else {
-		cdclk = bxt_calc_cdclk(min_cdclk);
-		vco = bxt_de_pll_vco(dev_priv, cdclk);
-	}
+	min_voltage_level = bxt_compute_min_voltage_level(state);
+	if (min_voltage_level < 0)
+		return min_voltage_level;
+
+	cdclk = bxt_calc_cdclk(dev_priv, min_cdclk);
+	vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk);
 
 	state->cdclk.logical.vco = vco;
 	state->cdclk.logical.cdclk = cdclk;
 	state->cdclk.logical.voltage_level =
-		bxt_calc_voltage_level(cdclk);
-
-	if (!state->active_crtcs) {
-		if (IS_GEMINILAKE(dev_priv)) {
-			cdclk = glk_calc_cdclk(state->cdclk.force_min_cdclk);
-			vco = glk_de_pll_vco(dev_priv, cdclk);
-		} else {
-			cdclk = bxt_calc_cdclk(state->cdclk.force_min_cdclk);
-			vco = bxt_de_pll_vco(dev_priv, cdclk);
-		}
+		max_t(int, min_voltage_level,
+		      dev_priv->display.calc_voltage_level(cdclk));
+
+	if (!state->active_pipes) {
+		cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk);
+		vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk);
 
 		state->cdclk.actual.vco = vco;
 		state->cdclk.actual.cdclk = cdclk;
 		state->cdclk.actual.voltage_level =
-			bxt_calc_voltage_level(cdclk);
+			dev_priv->display.calc_voltage_level(cdclk);
 	} else {
 		state->cdclk.actual = state->cdclk.logical;
 	}
@@ -2526,70 +2278,138 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state)
 	return 0;
 }
 
-static int cnl_modeset_calc_cdclk(struct intel_atomic_state *state)
+static int intel_modeset_all_pipes(struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-	int min_cdclk, cdclk, vco;
+	struct intel_crtc *crtc;
 
-	min_cdclk = intel_compute_min_cdclk(state);
-	if (min_cdclk < 0)
-		return min_cdclk;
+	/*
+	 * Add all pipes to the state, and force
+	 * a modeset on all the active ones.
+	 */
+	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		struct intel_crtc_state *crtc_state;
+		int ret;
 
-	cdclk = cnl_calc_cdclk(min_cdclk);
-	vco = cnl_cdclk_pll_vco(dev_priv, cdclk);
+		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
 
-	state->cdclk.logical.vco = vco;
-	state->cdclk.logical.cdclk = cdclk;
-	state->cdclk.logical.voltage_level =
-		max(cnl_calc_voltage_level(cdclk),
-		    cnl_compute_min_voltage_level(state));
+		if (!crtc_state->hw.active ||
+		    drm_atomic_crtc_needs_modeset(&crtc_state->uapi))
+			continue;
 
-	if (!state->active_crtcs) {
-		cdclk = cnl_calc_cdclk(state->cdclk.force_min_cdclk);
-		vco = cnl_cdclk_pll_vco(dev_priv, cdclk);
+		crtc_state->uapi.mode_changed = true;
 
-		state->cdclk.actual.vco = vco;
-		state->cdclk.actual.cdclk = cdclk;
-		state->cdclk.actual.voltage_level =
-			cnl_calc_voltage_level(cdclk);
-	} else {
-		state->cdclk.actual = state->cdclk.logical;
+		ret = drm_atomic_add_affected_connectors(&state->base,
+							 &crtc->base);
+		if (ret)
+			return ret;
+
+		ret = drm_atomic_add_affected_planes(&state->base,
+						     &crtc->base);
+		if (ret)
+			return ret;
+
+		crtc_state->update_planes |= crtc_state->active_planes;
 	}
 
 	return 0;
 }
 
-static int icl_modeset_calc_cdclk(struct intel_atomic_state *state)
+static int fixed_modeset_calc_cdclk(struct intel_atomic_state *state)
 {
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-	unsigned int ref = state->cdclk.logical.ref;
-	int min_cdclk, cdclk, vco;
+	int min_cdclk;
 
+	/*
+	 * We can't change the cdclk frequency, but we still want to
+	 * check that the required minimum frequency doesn't exceed
+	 * the actual cdclk frequency.
+	 */
 	min_cdclk = intel_compute_min_cdclk(state);
 	if (min_cdclk < 0)
 		return min_cdclk;
 
-	cdclk = icl_calc_cdclk(min_cdclk, ref);
-	vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk);
+	return 0;
+}
 
-	state->cdclk.logical.vco = vco;
-	state->cdclk.logical.cdclk = cdclk;
-	state->cdclk.logical.voltage_level =
-		max(icl_calc_voltage_level(dev_priv, cdclk),
-		    cnl_compute_min_voltage_level(state));
+int intel_modeset_calc_cdclk(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	enum pipe pipe;
+	int ret;
 
-	if (!state->active_crtcs) {
-		cdclk = icl_calc_cdclk(state->cdclk.force_min_cdclk, ref);
-		vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk);
+	ret = dev_priv->display.modeset_calc_cdclk(state);
+	if (ret)
+		return ret;
 
-		state->cdclk.actual.vco = vco;
-		state->cdclk.actual.cdclk = cdclk;
-		state->cdclk.actual.voltage_level =
-			icl_calc_voltage_level(dev_priv, cdclk);
+	/*
+	 * Writes to dev_priv->cdclk.{actual,logical} must protected
+	 * by holding all the crtc mutexes even if we don't end up
+	 * touching the hardware
+	 */
+	if (intel_cdclk_changed(&dev_priv->cdclk.actual,
+				&state->cdclk.actual)) {
+		/*
+		 * Also serialize commits across all crtcs
+		 * if the actual hw needs to be poked.
+		 */
+		ret = intel_atomic_serialize_global_state(state);
+		if (ret)
+			return ret;
+	} else if (intel_cdclk_changed(&dev_priv->cdclk.logical,
+				       &state->cdclk.logical)) {
+		ret = intel_atomic_lock_global_state(state);
+		if (ret)
+			return ret;
 	} else {
-		state->cdclk.actual = state->cdclk.logical;
+		return 0;
 	}
 
+	if (is_power_of_2(state->active_pipes) &&
+	    intel_cdclk_needs_cd2x_update(dev_priv,
+					  &dev_priv->cdclk.actual,
+					  &state->cdclk.actual)) {
+		struct intel_crtc *crtc;
+		struct intel_crtc_state *crtc_state;
+
+		pipe = ilog2(state->active_pipes);
+		crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
+
+		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+
+		if (drm_atomic_crtc_needs_modeset(&crtc_state->uapi))
+			pipe = INVALID_PIPE;
+	} else {
+		pipe = INVALID_PIPE;
+	}
+
+	if (pipe != INVALID_PIPE) {
+		state->cdclk.pipe = pipe;
+
+		DRM_DEBUG_KMS("Can change cdclk with pipe %c active\n",
+			      pipe_name(pipe));
+	} else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual,
+					     &state->cdclk.actual)) {
+		/* All pipes must be switched off while we change the cdclk. */
+		ret = intel_modeset_all_pipes(state);
+		if (ret)
+			return ret;
+
+		state->cdclk.pipe = INVALID_PIPE;
+
+		DRM_DEBUG_KMS("Modeset required for cdclk change\n");
+	}
+
+	DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n",
+		      state->cdclk.logical.cdclk,
+		      state->cdclk.actual.cdclk);
+	DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n",
+		      state->cdclk.logical.voltage_level,
+		      state->cdclk.actual.voltage_level);
+
 	return 0;
 }
 
@@ -2809,15 +2629,29 @@ void intel_update_rawclk(struct drm_i915_private *dev_priv)
  */
 void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv)
 {
-	if (INTEL_GEN(dev_priv) >= 11) {
-		dev_priv->display.set_cdclk = icl_set_cdclk;
-		dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk;
+	if (IS_ELKHARTLAKE(dev_priv)) {
+		dev_priv->display.set_cdclk = bxt_set_cdclk;
+		dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk;
+		dev_priv->display.calc_voltage_level = ehl_calc_voltage_level;
+		dev_priv->cdclk.table = icl_cdclk_table;
+	} else if (INTEL_GEN(dev_priv) >= 11) {
+		dev_priv->display.set_cdclk = bxt_set_cdclk;
+		dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk;
+		dev_priv->display.calc_voltage_level = icl_calc_voltage_level;
+		dev_priv->cdclk.table = icl_cdclk_table;
 	} else if (IS_CANNONLAKE(dev_priv)) {
-		dev_priv->display.set_cdclk = cnl_set_cdclk;
-		dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk;
+		dev_priv->display.set_cdclk = bxt_set_cdclk;
+		dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk;
+		dev_priv->display.calc_voltage_level = cnl_calc_voltage_level;
+		dev_priv->cdclk.table = cnl_cdclk_table;
 	} else if (IS_GEN9_LP(dev_priv)) {
 		dev_priv->display.set_cdclk = bxt_set_cdclk;
 		dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk;
+		dev_priv->display.calc_voltage_level = bxt_calc_voltage_level;
+		if (IS_GEMINILAKE(dev_priv))
+			dev_priv->cdclk.table = glk_cdclk_table;
+		else
+			dev_priv->cdclk.table = bxt_cdclk_table;
 	} else if (IS_GEN9_BC(dev_priv)) {
 		dev_priv->display.set_cdclk = skl_set_cdclk;
 		dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk;
@@ -2830,13 +2664,11 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv)
 	} else if (IS_VALLEYVIEW(dev_priv)) {
 		dev_priv->display.set_cdclk = vlv_set_cdclk;
 		dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk;
+	} else {
+		dev_priv->display.modeset_calc_cdclk = fixed_modeset_calc_cdclk;
 	}
 
-	if (INTEL_GEN(dev_priv) >= 11)
-		dev_priv->display.get_cdclk = icl_get_cdclk;
-	else if (IS_CANNONLAKE(dev_priv))
-		dev_priv->display.get_cdclk = cnl_get_cdclk;
-	else if (IS_GEN9_LP(dev_priv))
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEN9_LP(dev_priv))
 		dev_priv->display.get_cdclk = bxt_get_cdclk;
 	else if (IS_GEN9_BC(dev_priv))
 		dev_priv->display.get_cdclk = skl_get_cdclk;
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h
index 4d6f7f5f8930..cf71394cc79c 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.h
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.h
@@ -15,6 +15,13 @@ struct intel_atomic_state;
 struct intel_cdclk_state;
 struct intel_crtc_state;
 
+struct intel_cdclk_vals {
+	u16 refclk;
+	u32 cdclk;
+	u8 divider;	/* CD2X divider * 2 */
+	u8 ratio;
+};
+
 int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state);
 void intel_cdclk_init(struct drm_i915_private *i915);
 void intel_cdclk_uninit(struct drm_i915_private *i915);
@@ -22,13 +29,8 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv);
 void intel_update_max_cdclk(struct drm_i915_private *dev_priv);
 void intel_update_cdclk(struct drm_i915_private *dev_priv);
 void intel_update_rawclk(struct drm_i915_private *dev_priv);
-bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv,
-				   const struct intel_cdclk_state *a,
-				   const struct intel_cdclk_state *b);
 bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a,
 			       const struct intel_cdclk_state *b);
-bool intel_cdclk_changed(const struct intel_cdclk_state *a,
-			 const struct intel_cdclk_state *b);
 void intel_cdclk_swap_state(struct intel_atomic_state *state);
 void
 intel_set_cdclk_pre_plane_update(struct drm_i915_private *dev_priv,
@@ -42,5 +44,6 @@ intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv,
 				  enum pipe pipe);
 void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state,
 			    const char *context);
+int intel_modeset_calc_cdclk(struct intel_atomic_state *state);
 
 #endif /* __INTEL_CDCLK_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index aa1e2c670bc4..3980e8b50c28 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -43,6 +43,21 @@
 #define LEGACY_LUT_LENGTH		256
 
 /*
+ * ILK+ csc matrix:
+ *
+ * |R/Cr|   | c0 c1 c2 |   ( |R/Cr|   |preoff0| )   |postoff0|
+ * |G/Y | = | c3 c4 c5 | x ( |G/Y | + |preoff1| ) + |postoff1|
+ * |B/Cb|   | c6 c7 c8 |   ( |B/Cb|   |preoff2| )   |postoff2|
+ *
+ * ILK/SNB don't have explicit post offsets, and instead
+ * CSC_MODE_YUV_TO_RGB and CSC_BLACK_SCREEN_OFFSET are used:
+ *  CSC_MODE_YUV_TO_RGB=0 + CSC_BLACK_SCREEN_OFFSET=0 -> 1/2, 0, 1/2
+ *  CSC_MODE_YUV_TO_RGB=0 + CSC_BLACK_SCREEN_OFFSET=1 -> 1/2, 1/16, 1/2
+ *  CSC_MODE_YUV_TO_RGB=1 + CSC_BLACK_SCREEN_OFFSET=0 -> 0, 0, 0
+ *  CSC_MODE_YUV_TO_RGB=1 + CSC_BLACK_SCREEN_OFFSET=1 -> 1/16, 1/16, 1/16
+ */
+
+/*
  * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
  * format). This macro takes the coefficient we want transformed and the
  * number of fractional bits.
@@ -59,37 +74,38 @@
 
 #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255)
 
+/* Nop pre/post offsets */
 static const u16 ilk_csc_off_zero[3] = {};
 
+/* Identity matrix */
 static const u16 ilk_csc_coeff_identity[9] = {
 	ILK_CSC_COEFF_1_0, 0, 0,
 	0, ILK_CSC_COEFF_1_0, 0,
 	0, 0, ILK_CSC_COEFF_1_0,
 };
 
+/* Limited range RGB post offsets */
 static const u16 ilk_csc_postoff_limited_range[3] = {
 	ILK_CSC_POSTOFF_LIMITED_RANGE,
 	ILK_CSC_POSTOFF_LIMITED_RANGE,
 	ILK_CSC_POSTOFF_LIMITED_RANGE,
 };
 
+/* Full range RGB -> limited range RGB matrix */
 static const u16 ilk_csc_coeff_limited_range[9] = {
 	ILK_CSC_COEFF_LIMITED_RANGE, 0, 0,
 	0, ILK_CSC_COEFF_LIMITED_RANGE, 0,
 	0, 0, ILK_CSC_COEFF_LIMITED_RANGE,
 };
 
-/*
- * These values are direct register values specified in the Bspec,
- * for RGB->YUV conversion matrix (colorspace BT709)
- */
+/* BT.709 full range RGB -> limited range YCbCr matrix */
 static const u16 ilk_csc_coeff_rgb_to_ycbcr[9] = {
 	0x1e08, 0x9cc0, 0xb528,
 	0x2ba8, 0x09d8, 0x37e8,
 	0xbce8, 0x9ad8, 0x1e08,
 };
 
-/* Post offset values for RGB->YCBCR conversion */
+/* Limited range YCbCr post offsets */
 static const u16 ilk_csc_postoff_rgb_to_ycbcr[3] = {
 	0x0800, 0x0100, 0x0800,
 };
@@ -101,10 +117,10 @@ static bool lut_is_legacy(const struct drm_property_blob *lut)
 
 static bool crtc_state_is_legacy_gamma(const struct intel_crtc_state *crtc_state)
 {
-	return !crtc_state->base.degamma_lut &&
-		!crtc_state->base.ctm &&
-		crtc_state->base.gamma_lut &&
-		lut_is_legacy(crtc_state->base.gamma_lut);
+	return !crtc_state->hw.degamma_lut &&
+		!crtc_state->hw.ctm &&
+		crtc_state->hw.gamma_lut &&
+		lut_is_legacy(crtc_state->hw.gamma_lut);
 }
 
 /*
@@ -189,7 +205,7 @@ static void icl_update_output_csc(struct intel_crtc *crtc,
 
 static bool ilk_csc_limited_range(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
 	/*
 	 * FIXME if there's a gamma LUT after the CSC, we should
@@ -203,7 +219,7 @@ static bool ilk_csc_limited_range(const struct intel_crtc_state *crtc_state)
 static void ilk_csc_convert_ctm(const struct intel_crtc_state *crtc_state,
 				u16 coeffs[9])
 {
-	const struct drm_color_ctm *ctm = crtc_state->base.ctm->data;
+	const struct drm_color_ctm *ctm = crtc_state->hw.ctm->data;
 	const u64 *input;
 	u64 temp[9];
 	int i;
@@ -254,11 +270,11 @@ static void ilk_csc_convert_ctm(const struct intel_crtc_state *crtc_state,
 
 static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	bool limited_color_range = ilk_csc_limited_range(crtc_state);
 
-	if (crtc_state->base.ctm) {
+	if (crtc_state->hw.ctm) {
 		u16 coeff[9];
 
 		ilk_csc_convert_ctm(crtc_state, coeff);
@@ -293,10 +309,10 @@ static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state)
 
 static void icl_load_csc_matrix(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	if (crtc_state->base.ctm) {
+	if (crtc_state->hw.ctm) {
 		u16 coeff[9];
 
 		ilk_csc_convert_ctm(crtc_state, coeff);
@@ -322,12 +338,12 @@ static void icl_load_csc_matrix(const struct intel_crtc_state *crtc_state)
  */
 static void cherryview_load_csc_matrix(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
-	if (crtc_state->base.ctm) {
-		const struct drm_color_ctm *ctm = crtc_state->base.ctm->data;
+	if (crtc_state->hw.ctm) {
+		const struct drm_color_ctm *ctm = crtc_state->hw.ctm->data;
 		u16 coeffs[9] = {};
 		int i;
 
@@ -388,7 +404,7 @@ static u32 ilk_lut_10(const struct drm_color_lut *color)
 static void i9xx_load_luts_internal(const struct intel_crtc_state *crtc_state,
 				    const struct drm_property_blob *blob)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	int i;
@@ -419,12 +435,12 @@ static void i9xx_load_luts_internal(const struct intel_crtc_state *crtc_state,
 
 static void i9xx_load_luts(const struct intel_crtc_state *crtc_state)
 {
-	i9xx_load_luts_internal(crtc_state, crtc_state->base.gamma_lut);
+	i9xx_load_luts_internal(crtc_state, crtc_state->hw.gamma_lut);
 }
 
 static void i9xx_color_commit(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	u32 val;
@@ -437,7 +453,7 @@ static void i9xx_color_commit(const struct intel_crtc_state *crtc_state)
 
 static void ilk_color_commit(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	u32 val;
@@ -452,7 +468,7 @@ static void ilk_color_commit(const struct intel_crtc_state *crtc_state)
 
 static void hsw_color_commit(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	I915_WRITE(GAMMA_MODE(crtc->pipe), crtc_state->gamma_mode);
@@ -462,7 +478,7 @@ static void hsw_color_commit(const struct intel_crtc_state *crtc_state)
 
 static void skl_color_commit(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	u32 val = 0;
@@ -508,8 +524,8 @@ static void i965_load_lut_10p6(struct intel_crtc *crtc,
 
 static void i965_load_luts(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut;
 
 	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT)
 		i9xx_load_luts(crtc_state);
@@ -531,8 +547,8 @@ static void ilk_load_lut_10(struct intel_crtc *crtc,
 
 static void ilk_load_luts(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut;
 
 	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT)
 		i9xx_load_luts(crtc_state);
@@ -611,12 +627,13 @@ static void bdw_load_lut_10(struct intel_crtc *crtc,
 static void ivb_load_lut_ext_max(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_dsb *dsb = intel_dsb_get(crtc);
 	enum pipe pipe = crtc->pipe;
 
 	/* Program the max register to clamp values > 1.0. */
-	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16);
-	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16);
-	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16);
+	intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16);
+	intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16);
+	intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16);
 
 	/*
 	 * Program the gc max 2 register to clamp values > 1.0.
@@ -624,17 +641,22 @@ static void ivb_load_lut_ext_max(struct intel_crtc *crtc)
 	 * from 3.0 to 7.0
 	 */
 	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
-		I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 0), 1 << 16);
-		I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 1), 1 << 16);
-		I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 2), 1 << 16);
+		intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 0),
+				    1 << 16);
+		intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 1),
+				    1 << 16);
+		intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 2),
+				    1 << 16);
 	}
+
+	intel_dsb_put(dsb);
 }
 
 static void ivb_load_luts(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
-	const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut;
+	const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut;
 
 	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) {
 		i9xx_load_luts(crtc_state);
@@ -655,9 +677,9 @@ static void ivb_load_luts(const struct intel_crtc_state *crtc_state)
 
 static void bdw_load_luts(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
-	const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut;
+	const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut;
 
 	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) {
 		i9xx_load_luts(crtc_state);
@@ -678,11 +700,11 @@ static void bdw_load_luts(const struct intel_crtc_state *crtc_state)
 
 static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	const u32 lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size;
-	const struct drm_color_lut *lut = crtc_state->base.degamma_lut->data;
+	const struct drm_color_lut *lut = crtc_state->hw.degamma_lut->data;
 	u32 i;
 
 	/*
@@ -717,7 +739,7 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state)
 
 static void glk_load_degamma_lut_linear(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	const u32 lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size;
@@ -744,8 +766,8 @@ static void glk_load_degamma_lut_linear(const struct intel_crtc_state *crtc_stat
 
 static void glk_load_luts(const struct intel_crtc_state *crtc_state)
 {
-	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 
 	/*
 	 * On GLK+ both pipe CSC and degamma LUT are controlled
@@ -755,7 +777,7 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
 	 * the degama LUT so that we don't have to reload
 	 * it every time the pipe CSC is being enabled.
 	 */
-	if (crtc_state->base.degamma_lut)
+	if (crtc_state->hw.degamma_lut)
 		glk_load_degamma_lut(crtc_state);
 	else
 		glk_load_degamma_lut_linear(crtc_state);
@@ -786,79 +808,84 @@ static void
 icl_load_gcmax(const struct intel_crtc_state *crtc_state,
 	       const struct drm_color_lut *color)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct intel_dsb *dsb = intel_dsb_get(crtc);
 	enum pipe pipe = crtc->pipe;
 
 	/* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */
-	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red);
-	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green);
-	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue);
+	intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 0), color->red);
+	intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 1), color->green);
+	intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 2), color->blue);
+	intel_dsb_put(dsb);
 }
 
 static void
 icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	const struct drm_property_blob *blob = crtc_state->hw.gamma_lut;
 	const struct drm_color_lut *lut = blob->data;
+	struct intel_dsb *dsb = intel_dsb_get(crtc);
 	enum pipe pipe = crtc->pipe;
 	u32 i;
 
 	/*
-	 * Every entry in the multi-segment LUT is corresponding to a superfine
-	 * segment step which is 1/(8 * 128 * 256).
+	 * Program Super Fine segment (let's call it seg1)...
 	 *
-	 * Superfine segment has 9 entries, corresponding to values
-	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
+	 * Super Fine segment's step is 1/(8 * 128 * 256) and it has
+	 * 9 entries, corresponding to values 0, 1/(8 * 128 * 256),
+	 * 2/(8 * 128 * 256) ... 8/(8 * 128 * 256).
 	 */
-	I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
+	intel_dsb_reg_write(dsb, PREC_PAL_MULTI_SEG_INDEX(pipe),
+			    PAL_PREC_AUTO_INCREMENT);
 
 	for (i = 0; i < 9; i++) {
 		const struct drm_color_lut *entry = &lut[i];
 
-		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
-			   ilk_lut_12p4_ldw(entry));
-		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
-			   ilk_lut_12p4_udw(entry));
+		intel_dsb_indexed_reg_write(dsb, PREC_PAL_MULTI_SEG_DATA(pipe),
+					    ilk_lut_12p4_ldw(entry));
+		intel_dsb_indexed_reg_write(dsb, PREC_PAL_MULTI_SEG_DATA(pipe),
+					    ilk_lut_12p4_udw(entry));
 	}
+
+	intel_dsb_put(dsb);
 }
 
 static void
 icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	const struct drm_property_blob *blob = crtc_state->hw.gamma_lut;
 	const struct drm_color_lut *lut = blob->data;
 	const struct drm_color_lut *entry;
+	struct intel_dsb *dsb = intel_dsb_get(crtc);
 	enum pipe pipe = crtc->pipe;
 	u32 i;
 
 	/*
-	 *
 	 * Program Fine segment (let's call it seg2)...
 	 *
-	 * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
-	 * ... 256/(128*256). So in order to program fine segment of LUT we
-	 * need to pick every 8'th entry in LUT, and program 256 indexes.
+	 * Fine segment's step is 1/(128 * 256) i.e. 1/(128 * 256), 2/(128 * 256)
+	 * ... 256/(128 * 256). So in order to program fine segment of LUT we
+	 * need to pick every 8th entry in the LUT, and program 256 indexes.
 	 *
 	 * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1],
-	 * with seg2[0] being unused by the hardware.
+	 * seg2[0] being unused by the hardware.
 	 */
-	I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
+	intel_dsb_reg_write(dsb, PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
 	for (i = 1; i < 257; i++) {
 		entry = &lut[i * 8];
-		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
-		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
+		intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe),
+					    ilk_lut_12p4_ldw(entry));
+		intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe),
+					    ilk_lut_12p4_udw(entry));
 	}
 
 	/*
 	 * Program Coarse segment (let's call it seg3)...
 	 *
-	 * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
-	 * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
+	 * Coarse segment starts from index 0 and it's step is 1/256 ie 0,
+	 * 1/256, 2/256 ... 256/256. As per the description of each entry in LUT
 	 * above, we need to pick every (8 * 128)th entry in LUT, and
 	 * program 256 of those.
 	 *
@@ -868,38 +895,43 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
 	 */
 	for (i = 0; i < 256; i++) {
 		entry = &lut[i * 8 * 128];
-		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
-		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
+		intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe),
+					    ilk_lut_12p4_ldw(entry));
+		intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe),
+					    ilk_lut_12p4_udw(entry));
 	}
 
 	/* The last entry in the LUT is to be programmed in GCMAX */
 	entry = &lut[256 * 8 * 128];
 	icl_load_gcmax(crtc_state, entry);
 	ivb_load_lut_ext_max(crtc);
+	intel_dsb_put(dsb);
 }
 
 static void icl_load_luts(const struct intel_crtc_state *crtc_state)
 {
-	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct intel_dsb *dsb = intel_dsb_get(crtc);
 
-	if (crtc_state->base.degamma_lut)
+	if (crtc_state->hw.degamma_lut)
 		glk_load_degamma_lut(crtc_state);
 
 	switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) {
 	case GAMMA_MODE_MODE_8BIT:
 		i9xx_load_luts(crtc_state);
 		break;
-
 	case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
 		icl_program_gamma_superfine_segment(crtc_state);
 		icl_program_gamma_multi_segment(crtc_state);
 		break;
-
 	default:
 		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0));
 		ivb_load_lut_ext_max(crtc);
 	}
+
+	intel_dsb_commit(dsb);
+	intel_dsb_put(dsb);
 }
 
 static u32 chv_cgm_degamma_ldw(const struct drm_color_lut *color)
@@ -958,9 +990,9 @@ static void chv_load_cgm_gamma(struct intel_crtc *crtc,
 
 static void chv_load_luts(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
-	const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut;
+	const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut;
 
 	cherryview_load_csc_matrix(crtc_state);
 
@@ -978,35 +1010,35 @@ static void chv_load_luts(const struct intel_crtc_state *crtc_state)
 
 void intel_color_load_luts(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
 	dev_priv->display.load_luts(crtc_state);
 }
 
 void intel_color_commit(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
 	dev_priv->display.color_commit(crtc_state);
 }
 
 static bool intel_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct intel_atomic_state *state =
-		to_intel_atomic_state(new_crtc_state->base.state);
+		to_intel_atomic_state(new_crtc_state->uapi.state);
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(state, crtc);
 
-	return !old_crtc_state->base.gamma_lut &&
-		!old_crtc_state->base.degamma_lut;
+	return !old_crtc_state->hw.gamma_lut &&
+		!old_crtc_state->hw.degamma_lut;
 }
 
 static bool chv_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct intel_atomic_state *state =
-		to_intel_atomic_state(new_crtc_state->base.state);
+		to_intel_atomic_state(new_crtc_state->uapi.state);
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(state, crtc);
 
@@ -1018,14 +1050,14 @@ static bool chv_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
 	if (old_crtc_state->cgm_mode || new_crtc_state->cgm_mode)
 		return false;
 
-	return !old_crtc_state->base.gamma_lut;
+	return !old_crtc_state->hw.gamma_lut;
 }
 
 static bool glk_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct intel_atomic_state *state =
-		to_intel_atomic_state(new_crtc_state->base.state);
+		to_intel_atomic_state(new_crtc_state->uapi.state);
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(state, crtc);
 
@@ -1036,19 +1068,19 @@ static bool glk_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
 	 * linear hardware degamma mid scanout.
 	 */
 	return !old_crtc_state->csc_enable &&
-		!old_crtc_state->base.gamma_lut;
+		!old_crtc_state->hw.gamma_lut;
 }
 
 int intel_color_check(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
 	return dev_priv->display.color_check(crtc_state);
 }
 
 void intel_color_get_config(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
 	if (dev_priv->display.read_luts)
 		dev_priv->display.read_luts(crtc_state);
@@ -1072,16 +1104,16 @@ static bool need_plane_update(struct intel_plane *plane,
 static int
 intel_color_add_affected_planes(struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_atomic_state *state =
-		to_intel_atomic_state(new_crtc_state->base.state);
+		to_intel_atomic_state(new_crtc_state->uapi.state);
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(state, crtc);
 	struct intel_plane *plane;
 
-	if (!new_crtc_state->base.active ||
-	    drm_atomic_crtc_needs_modeset(&new_crtc_state->base))
+	if (!new_crtc_state->hw.active ||
+	    drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi))
 		return 0;
 
 	if (new_crtc_state->gamma_enable == old_crtc_state->gamma_enable &&
@@ -1123,9 +1155,9 @@ static int check_lut_size(const struct drm_property_blob *lut, int expected)
 
 static int check_luts(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
-	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
-	const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut;
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut;
+	const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut;
 	int gamma_length, degamma_length;
 	u32 gamma_tests, degamma_tests;
 
@@ -1173,7 +1205,7 @@ static int i9xx_color_check(struct intel_crtc_state *crtc_state)
 		return ret;
 
 	crtc_state->gamma_enable =
-		crtc_state->base.gamma_lut &&
+		crtc_state->hw.gamma_lut &&
 		!crtc_state->c8_planes;
 
 	crtc_state->gamma_mode = i9xx_gamma_mode(crtc_state);
@@ -1194,11 +1226,11 @@ static u32 chv_cgm_mode(const struct intel_crtc_state *crtc_state)
 	if (crtc_state_is_legacy_gamma(crtc_state))
 		return 0;
 
-	if (crtc_state->base.degamma_lut)
+	if (crtc_state->hw.degamma_lut)
 		cgm_mode |= CGM_PIPE_MODE_DEGAMMA;
-	if (crtc_state->base.ctm)
+	if (crtc_state->hw.ctm)
 		cgm_mode |= CGM_PIPE_MODE_CSC;
-	if (crtc_state->base.gamma_lut)
+	if (crtc_state->hw.gamma_lut)
 		cgm_mode |= CGM_PIPE_MODE_GAMMA;
 
 	return cgm_mode;
@@ -1250,6 +1282,21 @@ static u32 ilk_gamma_mode(const struct intel_crtc_state *crtc_state)
 		return GAMMA_MODE_MODE_10BIT;
 }
 
+static u32 ilk_csc_mode(const struct intel_crtc_state *crtc_state)
+{
+	/*
+	 * CSC comes after the LUT in RGB->YCbCr mode.
+	 * RGB->YCbCr needs the limited range offsets added to
+	 * the output. RGB limited range output is handled by
+	 * the hw automagically elsewhere.
+	 */
+	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB)
+		return CSC_BLACK_SCREEN_OFFSET;
+
+	return CSC_MODE_YUV_TO_RGB |
+		CSC_POSITION_BEFORE_GAMMA;
+}
+
 static int ilk_color_check(struct intel_crtc_state *crtc_state)
 {
 	int ret;
@@ -1259,19 +1306,19 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state)
 		return ret;
 
 	crtc_state->gamma_enable =
-		crtc_state->base.gamma_lut &&
+		crtc_state->hw.gamma_lut &&
 		!crtc_state->c8_planes;
 
 	/*
-	 * We don't expose the ctm on ilk/snb currently,
-	 * nor do we enable YCbCr output. Also RGB limited
-	 * range output is handled by the hw automagically.
+	 * We don't expose the ctm on ilk/snb currently, also RGB
+	 * limited range output is handled by the hw automagically.
 	 */
-	crtc_state->csc_enable = false;
+	crtc_state->csc_enable =
+		crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB;
 
 	crtc_state->gamma_mode = ilk_gamma_mode(crtc_state);
 
-	crtc_state->csc_mode = 0;
+	crtc_state->csc_mode = ilk_csc_mode(crtc_state);
 
 	ret = intel_color_add_affected_planes(crtc_state);
 	if (ret)
@@ -1287,8 +1334,8 @@ static u32 ivb_gamma_mode(const struct intel_crtc_state *crtc_state)
 	if (!crtc_state->gamma_enable ||
 	    crtc_state_is_legacy_gamma(crtc_state))
 		return GAMMA_MODE_MODE_8BIT;
-	else if (crtc_state->base.gamma_lut &&
-		 crtc_state->base.degamma_lut)
+	else if (crtc_state->hw.gamma_lut &&
+		 crtc_state->hw.degamma_lut)
 		return GAMMA_MODE_MODE_SPLIT;
 	else
 		return GAMMA_MODE_MODE_10BIT;
@@ -1302,7 +1349,7 @@ static u32 ivb_csc_mode(const struct intel_crtc_state *crtc_state)
 	 * CSC comes after the LUT in degamma, RGB->YCbCr,
 	 * and RGB full->limited range mode.
 	 */
-	if (crtc_state->base.degamma_lut ||
+	if (crtc_state->hw.degamma_lut ||
 	    crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB ||
 	    limited_color_range)
 		return 0;
@@ -1320,13 +1367,13 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state)
 		return ret;
 
 	crtc_state->gamma_enable =
-		(crtc_state->base.gamma_lut ||
-		 crtc_state->base.degamma_lut) &&
+		(crtc_state->hw.gamma_lut ||
+		 crtc_state->hw.degamma_lut) &&
 		!crtc_state->c8_planes;
 
 	crtc_state->csc_enable =
 		crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB ||
-		crtc_state->base.ctm || limited_color_range;
+		crtc_state->hw.ctm || limited_color_range;
 
 	crtc_state->gamma_mode = ivb_gamma_mode(crtc_state);
 
@@ -1359,14 +1406,14 @@ static int glk_color_check(struct intel_crtc_state *crtc_state)
 		return ret;
 
 	crtc_state->gamma_enable =
-		crtc_state->base.gamma_lut &&
+		crtc_state->hw.gamma_lut &&
 		!crtc_state->c8_planes;
 
 	/* On GLK+ degamma LUT is controlled by csc_enable */
 	crtc_state->csc_enable =
-		crtc_state->base.degamma_lut ||
+		crtc_state->hw.degamma_lut ||
 		crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB ||
-		crtc_state->base.ctm || crtc_state->limited_color_range;
+		crtc_state->hw.ctm || crtc_state->limited_color_range;
 
 	crtc_state->gamma_mode = glk_gamma_mode(crtc_state);
 
@@ -1385,14 +1432,14 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state)
 {
 	u32 gamma_mode = 0;
 
-	if (crtc_state->base.degamma_lut)
+	if (crtc_state->hw.degamma_lut)
 		gamma_mode |= PRE_CSC_GAMMA_ENABLE;
 
-	if (crtc_state->base.gamma_lut &&
+	if (crtc_state->hw.gamma_lut &&
 	    !crtc_state->c8_planes)
 		gamma_mode |= POST_CSC_GAMMA_ENABLE;
 
-	if (!crtc_state->base.gamma_lut ||
+	if (!crtc_state->hw.gamma_lut ||
 	    crtc_state_is_legacy_gamma(crtc_state))
 		gamma_mode |= GAMMA_MODE_MODE_8BIT;
 	else
@@ -1405,7 +1452,7 @@ static u32 icl_csc_mode(const struct intel_crtc_state *crtc_state)
 {
 	u32 csc_mode = 0;
 
-	if (crtc_state->base.ctm)
+	if (crtc_state->hw.ctm)
 		csc_mode |= ICL_CSC_ENABLE;
 
 	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB ||
@@ -1432,6 +1479,403 @@ static int icl_color_check(struct intel_crtc_state *crtc_state)
 	return 0;
 }
 
+static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state)
+{
+	if (!crtc_state->gamma_enable)
+		return 0;
+
+	switch (crtc_state->gamma_mode) {
+	case GAMMA_MODE_MODE_8BIT:
+		return 8;
+	case GAMMA_MODE_MODE_10BIT:
+		return 16;
+	default:
+		MISSING_CASE(crtc_state->gamma_mode);
+		return 0;
+	}
+}
+
+static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state)
+{
+	if (!crtc_state->gamma_enable)
+		return 0;
+
+	if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0)
+		return 0;
+
+	switch (crtc_state->gamma_mode) {
+	case GAMMA_MODE_MODE_8BIT:
+		return 8;
+	case GAMMA_MODE_MODE_10BIT:
+		return 10;
+	default:
+		MISSING_CASE(crtc_state->gamma_mode);
+		return 0;
+	}
+}
+
+static int chv_gamma_precision(const struct intel_crtc_state *crtc_state)
+{
+	if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA)
+		return 10;
+	else
+		return i9xx_gamma_precision(crtc_state);
+}
+
+static int glk_gamma_precision(const struct intel_crtc_state *crtc_state)
+{
+	if (!crtc_state->gamma_enable)
+		return 0;
+
+	switch (crtc_state->gamma_mode) {
+	case GAMMA_MODE_MODE_8BIT:
+		return 8;
+	case GAMMA_MODE_MODE_10BIT:
+		return 10;
+	default:
+		MISSING_CASE(crtc_state->gamma_mode);
+		return 0;
+	}
+}
+
+int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	if (HAS_GMCH(dev_priv)) {
+		if (IS_CHERRYVIEW(dev_priv))
+			return chv_gamma_precision(crtc_state);
+		else
+			return i9xx_gamma_precision(crtc_state);
+	} else {
+		if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
+			return glk_gamma_precision(crtc_state);
+		else if (IS_IRONLAKE(dev_priv))
+			return ilk_gamma_precision(crtc_state);
+	}
+
+	return 0;
+}
+
+static bool err_check(struct drm_color_lut *lut1,
+		      struct drm_color_lut *lut2, u32 err)
+{
+	return ((abs((long)lut2->red - lut1->red)) <= err) &&
+		((abs((long)lut2->blue - lut1->blue)) <= err) &&
+		((abs((long)lut2->green - lut1->green)) <= err);
+}
+
+static bool intel_color_lut_entry_equal(struct drm_color_lut *lut1,
+					struct drm_color_lut *lut2,
+					int lut_size, u32 err)
+{
+	int i;
+
+	for (i = 0; i < lut_size; i++) {
+		if (!err_check(&lut1[i], &lut2[i], err))
+			return false;
+	}
+
+	return true;
+}
+
+bool intel_color_lut_equal(struct drm_property_blob *blob1,
+			   struct drm_property_blob *blob2,
+			   u32 gamma_mode, u32 bit_precision)
+{
+	struct drm_color_lut *lut1, *lut2;
+	int lut_size1, lut_size2;
+	u32 err;
+
+	if (!blob1 != !blob2)
+		return false;
+
+	if (!blob1)
+		return true;
+
+	lut_size1 = drm_color_lut_size(blob1);
+	lut_size2 = drm_color_lut_size(blob2);
+
+	/* check sw and hw lut size */
+	switch (gamma_mode) {
+	case GAMMA_MODE_MODE_8BIT:
+	case GAMMA_MODE_MODE_10BIT:
+		if (lut_size1 != lut_size2)
+			return false;
+		break;
+	default:
+		MISSING_CASE(gamma_mode);
+			return false;
+	}
+
+	lut1 = blob1->data;
+	lut2 = blob2->data;
+
+	err = 0xffff >> bit_precision;
+
+	/* check sw and hw lut entry to be equal */
+	switch (gamma_mode) {
+	case GAMMA_MODE_MODE_8BIT:
+	case GAMMA_MODE_MODE_10BIT:
+		if (!intel_color_lut_entry_equal(lut1, lut2,
+						 lut_size2, err))
+			return false;
+		break;
+	default:
+		MISSING_CASE(gamma_mode);
+			return false;
+	}
+
+	return true;
+}
+
+/* convert hw value with given bit_precision to lut property val */
+static u32 intel_color_lut_pack(u32 val, u32 bit_precision)
+{
+	u32 max = 0xffff >> (16 - bit_precision);
+
+	val = clamp_val(val, 0, max);
+
+	if (bit_precision < 16)
+		val <<= 16 - bit_precision;
+
+	return val;
+}
+
+static struct drm_property_blob *
+i9xx_read_lut_8(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
+	struct drm_property_blob *blob;
+	struct drm_color_lut *blob_data;
+	u32 i, val;
+
+	blob = drm_property_create_blob(&dev_priv->drm,
+					sizeof(struct drm_color_lut) * LEGACY_LUT_LENGTH,
+					NULL);
+	if (IS_ERR(blob))
+		return NULL;
+
+	blob_data = blob->data;
+
+	for (i = 0; i < LEGACY_LUT_LENGTH; i++) {
+		if (HAS_GMCH(dev_priv))
+			val = I915_READ(PALETTE(pipe, i));
+		else
+			val = I915_READ(LGC_PALETTE(pipe, i));
+
+		blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET(
+							LGC_PALETTE_RED_MASK, val), 8);
+		blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET(
+							  LGC_PALETTE_GREEN_MASK, val), 8);
+		blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET(
+							 LGC_PALETTE_BLUE_MASK, val), 8);
+	}
+
+	return blob;
+}
+
+static void i9xx_read_luts(struct intel_crtc_state *crtc_state)
+{
+	if (!crtc_state->gamma_enable)
+		return;
+
+	crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state);
+}
+
+static struct drm_property_blob *
+i965_read_lut_10p6(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size;
+	enum pipe pipe = crtc->pipe;
+	struct drm_property_blob *blob;
+	struct drm_color_lut *blob_data;
+	u32 i, val1, val2;
+
+	blob = drm_property_create_blob(&dev_priv->drm,
+					sizeof(struct drm_color_lut) * lut_size,
+					NULL);
+	if (IS_ERR(blob))
+		return NULL;
+
+	blob_data = blob->data;
+
+	for (i = 0; i < lut_size - 1; i++) {
+		val1 = I915_READ(PALETTE(pipe, 2 * i + 0));
+		val2 = I915_READ(PALETTE(pipe, 2 * i + 1));
+
+		blob_data[i].red = REG_FIELD_GET(PALETTE_RED_MASK, val2) << 8 |
+						 REG_FIELD_GET(PALETTE_RED_MASK, val1);
+		blob_data[i].green = REG_FIELD_GET(PALETTE_GREEN_MASK, val2) << 8 |
+						   REG_FIELD_GET(PALETTE_GREEN_MASK, val1);
+		blob_data[i].blue = REG_FIELD_GET(PALETTE_BLUE_MASK, val2) << 8 |
+						  REG_FIELD_GET(PALETTE_BLUE_MASK, val1);
+	}
+
+	blob_data[i].red = REG_FIELD_GET(PIPEGCMAX_RGB_MASK,
+					 I915_READ(PIPEGCMAX(pipe, 0)));
+	blob_data[i].green = REG_FIELD_GET(PIPEGCMAX_RGB_MASK,
+					   I915_READ(PIPEGCMAX(pipe, 1)));
+	blob_data[i].blue = REG_FIELD_GET(PIPEGCMAX_RGB_MASK,
+					  I915_READ(PIPEGCMAX(pipe, 2)));
+
+	return blob;
+}
+
+static void i965_read_luts(struct intel_crtc_state *crtc_state)
+{
+	if (!crtc_state->gamma_enable)
+		return;
+
+	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT)
+		crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state);
+	else
+		crtc_state->hw.gamma_lut = i965_read_lut_10p6(crtc_state);
+}
+
+static struct drm_property_blob *
+chv_read_cgm_lut(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size;
+	enum pipe pipe = crtc->pipe;
+	struct drm_property_blob *blob;
+	struct drm_color_lut *blob_data;
+	u32 i, val;
+
+	blob = drm_property_create_blob(&dev_priv->drm,
+					sizeof(struct drm_color_lut) * lut_size,
+					NULL);
+	if (IS_ERR(blob))
+		return NULL;
+
+	blob_data = blob->data;
+
+	for (i = 0; i < lut_size; i++) {
+		val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 0));
+		blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET(
+							  CGM_PIPE_GAMMA_GREEN_MASK, val), 10);
+		blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET(
+							 CGM_PIPE_GAMMA_BLUE_MASK, val), 10);
+
+		val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 1));
+		blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET(
+							CGM_PIPE_GAMMA_RED_MASK, val), 10);
+	}
+
+	return blob;
+}
+
+static void chv_read_luts(struct intel_crtc_state *crtc_state)
+{
+	if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA)
+		crtc_state->hw.gamma_lut = chv_read_cgm_lut(crtc_state);
+	else
+		i965_read_luts(crtc_state);
+}
+
+static struct drm_property_blob *
+ilk_read_lut_10(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size;
+	enum pipe pipe = crtc->pipe;
+	struct drm_property_blob *blob;
+	struct drm_color_lut *blob_data;
+	u32 i, val;
+
+	blob = drm_property_create_blob(&dev_priv->drm,
+					sizeof(struct drm_color_lut) * lut_size,
+					NULL);
+	if (IS_ERR(blob))
+		return NULL;
+
+	blob_data = blob->data;
+
+	for (i = 0; i < lut_size; i++) {
+		val = I915_READ(PREC_PALETTE(pipe, i));
+
+		blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET(
+							PREC_PALETTE_RED_MASK, val), 10);
+		blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET(
+							  PREC_PALETTE_GREEN_MASK, val), 10);
+		blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET(
+							 PREC_PALETTE_BLUE_MASK, val), 10);
+	}
+
+	return blob;
+}
+
+static void ilk_read_luts(struct intel_crtc_state *crtc_state)
+{
+	if (!crtc_state->gamma_enable)
+		return;
+
+	if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0)
+		return;
+
+	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT)
+		crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state);
+	else
+		crtc_state->hw.gamma_lut = ilk_read_lut_10(crtc_state);
+}
+
+static struct drm_property_blob *
+glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	int hw_lut_size = ivb_lut_10_size(prec_index);
+	enum pipe pipe = crtc->pipe;
+	struct drm_property_blob *blob;
+	struct drm_color_lut *blob_data;
+	u32 i, val;
+
+	blob = drm_property_create_blob(&dev_priv->drm,
+					sizeof(struct drm_color_lut) * hw_lut_size,
+					NULL);
+	if (IS_ERR(blob))
+		return NULL;
+
+	blob_data = blob->data;
+
+	I915_WRITE(PREC_PAL_INDEX(pipe), prec_index |
+		   PAL_PREC_AUTO_INCREMENT);
+
+	for (i = 0; i < hw_lut_size; i++) {
+		val = I915_READ(PREC_PAL_DATA(pipe));
+
+		blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET(
+							PREC_PAL_DATA_RED_MASK, val), 10);
+		blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET(
+							PREC_PAL_DATA_GREEN_MASK, val), 10);
+		blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET(
+							PREC_PAL_DATA_BLUE_MASK, val), 10);
+	}
+
+	I915_WRITE(PREC_PAL_INDEX(pipe), 0);
+
+	return blob;
+}
+
+static void glk_read_luts(struct intel_crtc_state *crtc_state)
+{
+	if (!crtc_state->gamma_enable)
+		return;
+
+	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT)
+		crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state);
+	else
+		crtc_state->hw.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0));
+}
+
 void intel_color_init(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -1444,14 +1888,17 @@ void intel_color_init(struct intel_crtc *crtc)
 			dev_priv->display.color_check = chv_color_check;
 			dev_priv->display.color_commit = i9xx_color_commit;
 			dev_priv->display.load_luts = chv_load_luts;
+			dev_priv->display.read_luts = chv_read_luts;
 		} else if (INTEL_GEN(dev_priv) >= 4) {
 			dev_priv->display.color_check = i9xx_color_check;
 			dev_priv->display.color_commit = i9xx_color_commit;
 			dev_priv->display.load_luts = i965_load_luts;
+			dev_priv->display.read_luts = i965_read_luts;
 		} else {
 			dev_priv->display.color_check = i9xx_color_check;
 			dev_priv->display.color_commit = i9xx_color_commit;
 			dev_priv->display.load_luts = i9xx_load_luts;
+			dev_priv->display.read_luts = i9xx_read_luts;
 		}
 	} else {
 		if (INTEL_GEN(dev_priv) >= 11)
@@ -1470,16 +1917,19 @@ void intel_color_init(struct intel_crtc *crtc)
 		else
 			dev_priv->display.color_commit = ilk_color_commit;
 
-		if (INTEL_GEN(dev_priv) >= 11)
+		if (INTEL_GEN(dev_priv) >= 11) {
 			dev_priv->display.load_luts = icl_load_luts;
-		else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
+		} else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) {
 			dev_priv->display.load_luts = glk_load_luts;
-		else if (INTEL_GEN(dev_priv) >= 8)
+			dev_priv->display.read_luts = glk_read_luts;
+		} else if (INTEL_GEN(dev_priv) >= 8) {
 			dev_priv->display.load_luts = bdw_load_luts;
-		else if (INTEL_GEN(dev_priv) >= 7)
+		} else if (INTEL_GEN(dev_priv) >= 7) {
 			dev_priv->display.load_luts = ivb_load_luts;
-		else
+		} else {
 			dev_priv->display.load_luts = ilk_load_luts;
+			dev_priv->display.read_luts = ilk_read_luts;
+		}
 	}
 
 	drm_crtc_enable_color_mgmt(&crtc->base,
diff --git a/drivers/gpu/drm/i915/display/intel_color.h b/drivers/gpu/drm/i915/display/intel_color.h
index 057e8ac63555..173727aaa24d 100644
--- a/drivers/gpu/drm/i915/display/intel_color.h
+++ b/drivers/gpu/drm/i915/display/intel_color.h
@@ -6,13 +6,20 @@
 #ifndef __INTEL_COLOR_H__
 #define __INTEL_COLOR_H__
 
+#include <linux/types.h>
+
 struct intel_crtc_state;
 struct intel_crtc;
+struct drm_property_blob;
 
 void intel_color_init(struct intel_crtc *crtc);
 int intel_color_check(struct intel_crtc_state *crtc_state);
 void intel_color_commit(const struct intel_crtc_state *crtc_state);
 void intel_color_load_luts(const struct intel_crtc_state *crtc_state);
 void intel_color_get_config(struct intel_crtc_state *crtc_state);
+int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state);
+bool intel_color_lut_equal(struct drm_property_blob *blob1,
+			   struct drm_property_blob *blob2,
+			   u32 gamma_mode, u32 bit_precision);
 
 #endif /* __INTEL_COLOR_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c
index 308ec63207ee..1133c4e97bb4 100644
--- a/drivers/gpu/drm/i915/display/intel_connector.c
+++ b/drivers/gpu/drm/i915/display/intel_connector.c
@@ -277,7 +277,22 @@ intel_attach_aspect_ratio_property(struct drm_connector *connector)
 void
 intel_attach_colorspace_property(struct drm_connector *connector)
 {
-	if (!drm_mode_create_colorspace_property(connector))
-		drm_object_attach_property(&connector->base,
-					   connector->colorspace_property, 0);
+	switch (connector->connector_type) {
+	case DRM_MODE_CONNECTOR_HDMIA:
+	case DRM_MODE_CONNECTOR_HDMIB:
+		if (drm_mode_create_hdmi_colorspace_property(connector))
+			return;
+		break;
+	case DRM_MODE_CONNECTOR_DisplayPort:
+	case DRM_MODE_CONNECTOR_eDP:
+		if (drm_mode_create_dp_colorspace_property(connector))
+			return;
+		break;
+	default:
+		DRM_DEBUG_KMS("Colorspace property not supported\n");
+		return;
+	}
+
+	drm_object_attach_property(&connector->base,
+				   connector->colorspace_property, 0);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c
index 0a08354a6183..f976b800b245 100644
--- a/drivers/gpu/drm/i915/display/intel_crt.c
+++ b/drivers/gpu/drm/i915/display/intel_crt.c
@@ -65,7 +65,7 @@ static struct intel_crt *intel_encoder_to_crt(struct intel_encoder *encoder)
 	return container_of(encoder, struct intel_crt, base);
 }
 
-static struct intel_crt *intel_attached_crt(struct drm_connector *connector)
+static struct intel_crt *intel_attached_crt(struct intel_connector *connector)
 {
 	return intel_encoder_to_crt(intel_attached_encoder(connector));
 }
@@ -132,9 +132,9 @@ static void intel_crt_get_config(struct intel_encoder *encoder,
 {
 	pipe_config->output_types |= BIT(INTEL_OUTPUT_ANALOG);
 
-	pipe_config->base.adjusted_mode.flags |= intel_crt_get_flags(encoder);
+	pipe_config->hw.adjusted_mode.flags |= intel_crt_get_flags(encoder);
 
-	pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock;
+	pipe_config->hw.adjusted_mode.crtc_clock = pipe_config->port_clock;
 }
 
 static void hsw_crt_get_config(struct intel_encoder *encoder,
@@ -144,13 +144,13 @@ static void hsw_crt_get_config(struct intel_encoder *encoder,
 
 	intel_ddi_get_config(encoder, pipe_config);
 
-	pipe_config->base.adjusted_mode.flags &= ~(DRM_MODE_FLAG_PHSYNC |
+	pipe_config->hw.adjusted_mode.flags &= ~(DRM_MODE_FLAG_PHSYNC |
 					      DRM_MODE_FLAG_NHSYNC |
 					      DRM_MODE_FLAG_PVSYNC |
 					      DRM_MODE_FLAG_NVSYNC);
-	pipe_config->base.adjusted_mode.flags |= intel_crt_get_flags(encoder);
+	pipe_config->hw.adjusted_mode.flags |= intel_crt_get_flags(encoder);
 
-	pipe_config->base.adjusted_mode.crtc_clock = lpt_get_iclkip(dev_priv);
+	pipe_config->hw.adjusted_mode.crtc_clock = lpt_get_iclkip(dev_priv);
 }
 
 /* Note: The caller is required to filter out dpms modes not supported by the
@@ -161,8 +161,8 @@ static void intel_crt_set_dpms(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crt *crt = intel_encoder_to_crt(encoder);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
 	u32 adpa;
 
 	if (INTEL_GEN(dev_priv) >= 5)
@@ -241,6 +241,14 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 
+	intel_crtc_vblank_off(old_crtc_state);
+
+	intel_disable_pipe(old_crtc_state);
+
+	intel_ddi_disable_transcoder_func(old_crtc_state);
+
+	ilk_pfit_disable(old_crtc_state);
+
 	intel_ddi_disable_pipe_clock(old_crtc_state);
 
 	pch_post_disable_crt(encoder, old_crtc_state, old_conn_state);
@@ -271,14 +279,14 @@ static void hsw_pre_enable_crt(struct intel_encoder *encoder,
 			       const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum pipe pipe = crtc->pipe;
 
 	WARN_ON(!crtc_state->has_pch_encoder);
 
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
 
-	dev_priv->display.fdi_link_train(crtc, crtc_state);
+	hsw_fdi_link_train(encoder, crtc_state);
 
 	intel_ddi_enable_pipe_clock(crtc_state);
 }
@@ -288,7 +296,7 @@ static void hsw_enable_crt(struct intel_encoder *encoder,
 			   const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum pipe pipe = crtc->pipe;
 
 	WARN_ON(!crtc_state->has_pch_encoder);
@@ -343,7 +351,7 @@ intel_crt_mode_valid(struct drm_connector *connector,
 
 	/* The FDI receiver on LPT only supports 8bpc and only has 2 lanes. */
 	if (HAS_PCH_LPT(dev_priv) &&
-	    (ironlake_get_lanes_required(mode->clock, 270000, 24) > 2))
+	    ilk_get_lanes_required(mode->clock, 270000, 24) > 2)
 		return MODE_CLOCK_HIGH;
 
 	/* HSW/BDW FDI limited to 4k */
@@ -358,7 +366,7 @@ static int intel_crt_compute_config(struct intel_encoder *encoder,
 				    struct drm_connector_state *conn_state)
 {
 	struct drm_display_mode *adjusted_mode =
-		&pipe_config->base.adjusted_mode;
+		&pipe_config->hw.adjusted_mode;
 
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return -EINVAL;
@@ -373,7 +381,7 @@ static int pch_crt_compute_config(struct intel_encoder *encoder,
 				  struct drm_connector_state *conn_state)
 {
 	struct drm_display_mode *adjusted_mode =
-		&pipe_config->base.adjusted_mode;
+		&pipe_config->hw.adjusted_mode;
 
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return -EINVAL;
@@ -390,7 +398,7 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct drm_display_mode *adjusted_mode =
-		&pipe_config->base.adjusted_mode;
+		&pipe_config->hw.adjusted_mode;
 
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return -EINVAL;
@@ -419,10 +427,10 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder,
 	return 0;
 }
 
-static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector)
+static bool ilk_crt_detect_hotplug(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
-	struct intel_crt *crt = intel_attached_crt(connector);
+	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 adpa;
 	bool ret;
@@ -432,7 +440,7 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector)
 		bool turn_off_dac = HAS_PCH_SPLIT(dev_priv);
 		u32 save_adpa;
 
-		crt->force_hotplug_required = 0;
+		crt->force_hotplug_required = false;
 
 		save_adpa = adpa = I915_READ(crt->adpa_reg);
 		DRM_DEBUG_KMS("trigger hotplug detect cycle: adpa=0x%x\n", adpa);
@@ -469,7 +477,7 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector)
 static bool valleyview_crt_detect_hotplug(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
-	struct intel_crt *crt = intel_attached_crt(connector);
+	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	bool reenable_hpd;
 	u32 adpa;
@@ -527,7 +535,7 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector)
 	int i, tries = 0;
 
 	if (HAS_PCH_SPLIT(dev_priv))
-		return intel_ironlake_crt_detect_hotplug(connector);
+		return ilk_crt_detect_hotplug(connector);
 
 	if (IS_VALLEYVIEW(dev_priv))
 		return valleyview_crt_detect_hotplug(connector);
@@ -601,7 +609,7 @@ static int intel_crt_ddc_get_modes(struct drm_connector *connector,
 
 static bool intel_crt_detect_ddc(struct drm_connector *connector)
 {
-	struct intel_crt *crt = intel_attached_crt(connector);
+	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
 	struct drm_i915_private *dev_priv = to_i915(crt->base.base.dev);
 	struct edid *edid;
 	struct i2c_adapter *i2c;
@@ -787,7 +795,7 @@ intel_crt_detect(struct drm_connector *connector,
 		 bool force)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_crt *crt = intel_attached_crt(connector);
+	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
 	struct intel_encoder *intel_encoder = &crt->base;
 	intel_wakeref_t wakeref;
 	int status, ret;
@@ -844,7 +852,7 @@ load_detect:
 	}
 
 	/* for pre-945g platforms use load detect */
-	ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx);
+	ret = intel_get_load_detect_pipe(connector, &tmp, ctx);
 	if (ret > 0) {
 		if (intel_crt_detect_ddc(connector))
 			status = connector_status_connected;
@@ -878,7 +886,7 @@ static int intel_crt_get_modes(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crt *crt = intel_attached_crt(connector);
+	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
 	struct intel_encoder *intel_encoder = &crt->base;
 	intel_wakeref_t wakeref;
 	struct i2c_adapter *i2c;
@@ -917,7 +925,7 @@ void intel_crt_reset(struct drm_encoder *encoder)
 		POSTING_READ(crt->adpa_reg);
 
 		DRM_DEBUG_KMS("crt adpa set to 0x%x\n", adpa);
-		crt->force_hotplug_required = 1;
+		crt->force_hotplug_required = true;
 	}
 
 }
@@ -1001,9 +1009,9 @@ void intel_crt_init(struct drm_i915_private *dev_priv)
 	crt->base.type = INTEL_OUTPUT_ANALOG;
 	crt->base.cloneable = (1 << INTEL_OUTPUT_DVO) | (1 << INTEL_OUTPUT_HDMI);
 	if (IS_I830(dev_priv))
-		crt->base.crtc_mask = (1 << 0);
+		crt->base.pipe_mask = BIT(PIPE_A);
 	else
-		crt->base.crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
+		crt->base.pipe_mask = ~0;
 
 	if (IS_GEN(dev_priv, 2))
 		connector->interlace_allowed = 0;
@@ -1055,7 +1063,7 @@ void intel_crt_init(struct drm_i915_private *dev_priv)
 	/*
 	 * Configure the automatic hotplug detection stuff
 	 */
-	crt->force_hotplug_required = 0;
+	crt->force_hotplug_required = false;
 
 	/*
 	 * TODO: find a proper way to discover whether we need to set the the
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 8eb2b3ec01ed..33f1dc3d7c1a 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -34,6 +34,7 @@
 #include "intel_ddi.h"
 #include "intel_display_types.h"
 #include "intel_dp.h"
+#include "intel_dp_mst.h"
 #include "intel_dp_link_training.h"
 #include "intel_dpio_phy.h"
 #include "intel_dsi.h"
@@ -45,6 +46,7 @@
 #include "intel_lspcon.h"
 #include "intel_panel.h"
 #include "intel_psr.h"
+#include "intel_sprite.h"
 #include "intel_tc.h"
 #include "intel_vdsc.h"
 
@@ -586,6 +588,40 @@ static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations[] = {
 	{ 0x0, 0x00, 0x00 },	/* 3              0   */
 };
 
+struct tgl_dkl_phy_ddi_buf_trans {
+	u32 dkl_vswing_control;
+	u32 dkl_preshoot_control;
+	u32 dkl_de_emphasis_control;
+};
+
+static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = {
+				/* VS	pre-emp	Non-trans mV	Pre-emph dB */
+	{ 0x7, 0x0, 0x00 },	/* 0	0	400mV		0 dB */
+	{ 0x5, 0x0, 0x03 },	/* 0	1	400mV		3.5 dB */
+	{ 0x2, 0x0, 0x0b },	/* 0	2	400mV		6 dB */
+	{ 0x0, 0x0, 0x19 },	/* 0	3	400mV		9.5 dB */
+	{ 0x5, 0x0, 0x00 },	/* 1	0	600mV		0 dB */
+	{ 0x2, 0x0, 0x03 },	/* 1	1	600mV		3.5 dB */
+	{ 0x0, 0x0, 0x14 },	/* 1	2	600mV		6 dB */
+	{ 0x2, 0x0, 0x00 },	/* 2	0	800mV		0 dB */
+	{ 0x0, 0x0, 0x0B },	/* 2	1	800mV		3.5 dB */
+	{ 0x0, 0x0, 0x00 },	/* 3	0	1200mV		0 dB HDMI default */
+};
+
+static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = {
+				/* HDMI Preset	VS	Pre-emph */
+	{ 0x7, 0x0, 0x0 },	/* 1		400mV	0dB */
+	{ 0x6, 0x0, 0x0 },	/* 2		500mV	0dB */
+	{ 0x4, 0x0, 0x0 },	/* 3		650mV	0dB */
+	{ 0x2, 0x0, 0x0 },	/* 4		800mV	0dB */
+	{ 0x0, 0x0, 0x0 },	/* 5		1000mV	0dB */
+	{ 0x0, 0x0, 0x5 },	/* 6		Full	-1.5 dB */
+	{ 0x0, 0x0, 0x6 },	/* 7		Full	-1.8 dB */
+	{ 0x0, 0x0, 0x7 },	/* 8		Full	-2 dB */
+	{ 0x0, 0x0, 0x8 },	/* 9		Full	-2.5 dB */
+	{ 0x0, 0x0, 0xA },	/* 10		Full	-3 dB */
+};
+
 static const struct ddi_buf_trans *
 bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
 {
@@ -867,12 +903,18 @@ icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, int type, int rate,
 
 static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port)
 {
+	struct ddi_vbt_port_info *port_info = &dev_priv->vbt.ddi_port_info[port];
 	int n_entries, level, default_entry;
 	enum phy phy = intel_port_to_phy(dev_priv, port);
 
-	level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
-
-	if (INTEL_GEN(dev_priv) >= 11) {
+	if (INTEL_GEN(dev_priv) >= 12) {
+		if (intel_phy_is_combo(dev_priv, phy))
+			icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI,
+						0, &n_entries);
+		else
+			n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
+		default_entry = n_entries - 1;
+	} else if (INTEL_GEN(dev_priv) == 11) {
 		if (intel_phy_is_combo(dev_priv, phy))
 			icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI,
 						0, &n_entries);
@@ -899,12 +941,14 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por
 		return 0;
 	}
 
-	/* Choose a good default if VBT is badly populated */
-	if (level == HDMI_LEVEL_SHIFT_UNKNOWN || level >= n_entries)
-		level = default_entry;
-
 	if (WARN_ON_ONCE(n_entries == 0))
 		return 0;
+
+	if (port_info->hdmi_level_shift_set)
+		level = port_info->hdmi_level_shift;
+	else
+		level = default_entry;
+
 	if (WARN_ON_ONCE(level >= n_entries))
 		level = n_entries - 1;
 
@@ -1049,6 +1093,8 @@ static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder,
 	case DPLL_ID_ICL_MGPLL2:
 	case DPLL_ID_ICL_MGPLL3:
 	case DPLL_ID_ICL_MGPLL4:
+	case DPLL_ID_TGL_MGPLL5:
+	case DPLL_ID_TGL_MGPLL6:
 		return DDI_CLK_SEL_MG;
 	}
 }
@@ -1062,18 +1108,14 @@ static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder,
  * DDI A (which is used for eDP)
  */
 
-void hsw_fdi_link_train(struct intel_crtc *crtc,
+void hsw_fdi_link_train(struct intel_encoder *encoder,
 			const struct intel_crtc_state *crtc_state)
 {
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_encoder *encoder;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	u32 temp, i, rx_ctl_val, ddi_pll_sel;
 
-	for_each_encoder_on_crtc(dev, &crtc->base, encoder) {
-		WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG);
-		intel_prepare_dp_ddi_buffers(encoder, crtc_state);
-	}
+	intel_prepare_dp_ddi_buffers(encoder, crtc_state);
 
 	/* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the
 	 * mode set "sequence for CRT port" document:
@@ -1196,9 +1238,9 @@ void hsw_fdi_link_train(struct intel_crtc *crtc,
 
 static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_digital_port *intel_dig_port =
-		enc_to_dig_port(&encoder->base);
+		enc_to_dig_port(encoder);
 
 	intel_dp->DP = intel_dig_port->saved_port_bits |
 		DDI_BUF_CTL_ENABLE | DDI_BUF_TRANS_SELECT(0);
@@ -1413,11 +1455,30 @@ static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv,
 
 	ref_clock = dev_priv->cdclk.hw.ref;
 
-	m1 = pll_state->mg_pll_div1 & MG_PLL_DIV1_FBPREDIV_MASK;
-	m2_int = pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK;
-	m2_frac = (pll_state->mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ?
-		(pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >>
-		MG_PLL_DIV0_FBDIV_FRAC_SHIFT : 0;
+	if (INTEL_GEN(dev_priv) >= 12) {
+		m1 = pll_state->mg_pll_div0 & DKL_PLL_DIV0_FBPREDIV_MASK;
+		m1 = m1 >> DKL_PLL_DIV0_FBPREDIV_SHIFT;
+		m2_int = pll_state->mg_pll_div0 & DKL_PLL_DIV0_FBDIV_INT_MASK;
+
+		if (pll_state->mg_pll_bias & DKL_PLL_BIAS_FRAC_EN_H) {
+			m2_frac = pll_state->mg_pll_bias &
+				  DKL_PLL_BIAS_FBDIV_FRAC_MASK;
+			m2_frac = m2_frac >> DKL_PLL_BIAS_FBDIV_SHIFT;
+		} else {
+			m2_frac = 0;
+		}
+	} else {
+		m1 = pll_state->mg_pll_div1 & MG_PLL_DIV1_FBPREDIV_MASK;
+		m2_int = pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK;
+
+		if (pll_state->mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) {
+			m2_frac = pll_state->mg_pll_div0 &
+				  MG_PLL_DIV0_FBDIV_FRAC_MASK;
+			m2_frac = m2_frac >> MG_PLL_DIV0_FBDIV_FRAC_SHIFT;
+		} else {
+			m2_frac = 0;
+		}
+	}
 
 	switch (pll_state->mg_clktop2_hsclkctl &
 		MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK) {
@@ -1479,7 +1540,7 @@ static void ddi_dotclock_get(struct intel_crtc_state *pipe_config)
 	if (pipe_config->pixel_multiplier)
 		dotclock /= pipe_config->pixel_multiplier;
 
-	pipe_config->base.adjusted_mode.crtc_clock = dotclock;
+	pipe_config->hw.adjusted_mode.crtc_clock = dotclock;
 }
 
 static void icl_ddi_clock_get(struct intel_encoder *encoder,
@@ -1692,9 +1753,10 @@ static void intel_ddi_clock_get(struct intel_encoder *encoder,
 		hsw_ddi_clock_get(encoder, pipe_config);
 }
 
-void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state)
+void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
+			  const struct drm_connector_state *conn_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
 	u32 temp;
@@ -1704,66 +1766,63 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state)
 
 	WARN_ON(transcoder_is_dsi(cpu_transcoder));
 
-	temp = TRANS_MSA_SYNC_CLK;
-
-	if (crtc_state->limited_color_range)
-		temp |= TRANS_MSA_CEA_RANGE;
+	temp = DP_MSA_MISC_SYNC_CLOCK;
 
 	switch (crtc_state->pipe_bpp) {
 	case 18:
-		temp |= TRANS_MSA_6_BPC;
+		temp |= DP_MSA_MISC_6_BPC;
 		break;
 	case 24:
-		temp |= TRANS_MSA_8_BPC;
+		temp |= DP_MSA_MISC_8_BPC;
 		break;
 	case 30:
-		temp |= TRANS_MSA_10_BPC;
+		temp |= DP_MSA_MISC_10_BPC;
 		break;
 	case 36:
-		temp |= TRANS_MSA_12_BPC;
+		temp |= DP_MSA_MISC_12_BPC;
 		break;
 	default:
 		MISSING_CASE(crtc_state->pipe_bpp);
 		break;
 	}
 
+	/* nonsense combination */
+	WARN_ON(crtc_state->limited_color_range &&
+		crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB);
+
+	if (crtc_state->limited_color_range)
+		temp |= DP_MSA_MISC_COLOR_CEA_RGB;
+
 	/*
 	 * As per DP 1.2 spec section 2.3.4.3 while sending
 	 * YCBCR 444 signals we should program MSA MISC1/0 fields with
-	 * colorspace information. The output colorspace encoding is BT601.
+	 * colorspace information.
 	 */
 	if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444)
-		temp |= TRANS_MSA_SAMPLING_444 | TRANS_MSA_CLRSP_YCBCR;
+		temp |= DP_MSA_MISC_COLOR_YCBCR_444_BT709;
+
 	/*
 	 * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication
 	 * of Color Encoding Format and Content Color Gamut] while sending
-	 * YCBCR 420 signals we should program MSA MISC1 fields which
-	 * indicate VSC SDP for the Pixel Encoding/Colorimetry Format.
+	 * YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields
+	 * which indicate VSC SDP for the Pixel Encoding/Colorimetry Format.
 	 */
-	if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
-		temp |= TRANS_MSA_USE_VSC_SDP;
-	I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp);
-}
+	if (intel_dp_needs_vsc_sdp(crtc_state, conn_state))
+		temp |= DP_MSA_MISC_COLOR_VSC_SDP;
 
-void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state,
-				    bool state)
-{
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
-	u32 temp;
-
-	temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder));
-	if (state == true)
-		temp |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC;
-	else
-		temp &= ~TRANS_DDI_DP_VC_PAYLOAD_ALLOC;
-	I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp);
+	I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp);
 }
 
-void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
+/*
+ * Returns the TRANS_DDI_FUNC_CTL value based on CRTC state.
+ *
+ * Only intended to be used by intel_ddi_enable_transcoder_func() and
+ * intel_ddi_config_transcoder_func().
+ */
+static u32
+intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
@@ -1795,9 +1854,9 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
 		BUG();
 	}
 
-	if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC)
+	if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC)
 		temp |= TRANS_DDI_PVSYNC;
-	if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC)
+	if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC)
 		temp |= TRANS_DDI_PHSYNC;
 
 	if (cpu_transcoder == TRANSCODER_EDP) {
@@ -1840,30 +1899,69 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
 	} else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) {
 		temp |= TRANS_DDI_MODE_SELECT_DP_MST;
 		temp |= DDI_PORT_WIDTH(crtc_state->lane_count);
+
+		if (INTEL_GEN(dev_priv) >= 12) {
+			enum transcoder master;
+
+			master = crtc_state->mst_master_transcoder;
+			WARN_ON(master == INVALID_TRANSCODER);
+			temp |= TRANS_DDI_MST_TRANSPORT_SELECT(master);
+		}
 	} else {
 		temp |= TRANS_DDI_MODE_SELECT_DP_SST;
 		temp |= DDI_PORT_WIDTH(crtc_state->lane_count);
 	}
 
+	return temp;
+}
+
+void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
+	u32 temp;
+
+	temp = intel_ddi_transcoder_func_reg_val_get(crtc_state);
+	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))
+		temp |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC;
+	I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp);
+}
+
+/*
+ * Same as intel_ddi_enable_transcoder_func(), but it does not set the enable
+ * bit.
+ */
+static void
+intel_ddi_config_transcoder_func(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
+	u32 temp;
+
+	temp = intel_ddi_transcoder_func_reg_val_get(crtc_state);
+	temp &= ~TRANS_DDI_FUNC_ENABLE;
 	I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp);
 }
 
 void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
-	i915_reg_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder);
-	u32 val = I915_READ(reg);
+	u32 val;
+
+	val = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder));
+	val &= ~TRANS_DDI_FUNC_ENABLE;
 
 	if (INTEL_GEN(dev_priv) >= 12) {
-		val &= ~(TRANS_DDI_FUNC_ENABLE | TGL_TRANS_DDI_PORT_MASK |
-			 TRANS_DDI_DP_VC_PAYLOAD_ALLOC);
+		if (!intel_dp_mst_is_master_trans(crtc_state))
+			val &= ~TGL_TRANS_DDI_PORT_MASK;
 	} else {
-		val &= ~(TRANS_DDI_FUNC_ENABLE | TRANS_DDI_PORT_MASK |
-			 TRANS_DDI_DP_VC_PAYLOAD_ALLOC);
+		val &= ~TRANS_DDI_PORT_MASK;
 	}
-	I915_WRITE(reg, val);
+	I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), val);
 
 	if (dev_priv->quirks & QUIRK_INCREASE_DDI_DISABLED_TIME &&
 	    intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) {
@@ -2045,18 +2143,20 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder,
 	}
 
 	if (!*pipe_mask)
-		DRM_DEBUG_KMS("No pipe for ddi port %c found\n",
-			      port_name(port));
+		DRM_DEBUG_KMS("No pipe for [ENCODER:%d:%s] found\n",
+			      encoder->base.base.id, encoder->base.name);
 
 	if (!mst_pipe_mask && hweight8(*pipe_mask) > 1) {
-		DRM_DEBUG_KMS("Multiple pipes for non DP-MST port %c (pipe_mask %02x)\n",
-			      port_name(port), *pipe_mask);
+		DRM_DEBUG_KMS("Multiple pipes for [ENCODER:%d:%s] (pipe_mask %02x)\n",
+			      encoder->base.base.id, encoder->base.name,
+			      *pipe_mask);
 		*pipe_mask = BIT(ffs(*pipe_mask) - 1);
 	}
 
 	if (mst_pipe_mask && mst_pipe_mask != *pipe_mask)
-		DRM_DEBUG_KMS("Conflicting MST and non-MST encoders for port %c (pipe_mask %02x mst_pipe_mask %02x)\n",
-			      port_name(port), *pipe_mask, mst_pipe_mask);
+		DRM_DEBUG_KMS("Conflicting MST and non-MST state for [ENCODER:%d:%s] (pipe_mask %02x mst_pipe_mask %02x)\n",
+			      encoder->base.base.id, encoder->base.name,
+			      *pipe_mask, mst_pipe_mask);
 	else
 		*is_dp_mst = mst_pipe_mask;
 
@@ -2066,8 +2166,9 @@ out:
 		if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK |
 			    BXT_PHY_LANE_POWERDOWN_ACK |
 			    BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED)
-			DRM_ERROR("Port %c enabled but PHY powered down? "
-				  "(PHY_CTL %08x)\n", port_name(port), tmp);
+			DRM_ERROR("[ENCODER:%d:%s] enabled but PHY powered down? "
+				  "(PHY_CTL %08x)\n", encoder->base.base.id,
+				  encoder->base.name, tmp);
 	}
 
 	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
@@ -2123,7 +2224,7 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder,
 	if (WARN_ON(intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)))
 		return;
 
-	dig_port = enc_to_dig_port(&encoder->base);
+	dig_port = enc_to_dig_port(encoder);
 	intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain);
 
 	/*
@@ -2138,14 +2239,14 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder,
 	/*
 	 * VDSC power is needed when DSC is enabled
 	 */
-	if (crtc_state->dsc_params.compression_enable)
+	if (crtc_state->dsc.compression_enable)
 		intel_display_power_get(dev_priv,
 					intel_dsc_power_domain(crtc_state));
 }
 
 void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc);
 	enum port port = encoder->port;
@@ -2163,7 +2264,7 @@ void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state)
 
 void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
 
 	if (cpu_transcoder != TRANSCODER_EDP) {
@@ -2193,7 +2294,7 @@ static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
 static void skl_ddi_set_iboost(struct intel_encoder *encoder,
 			       int level, enum intel_output_type type)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = encoder->port;
 	u8 iboost;
@@ -2264,12 +2365,18 @@ static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder,
 u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	enum port port = encoder->port;
 	enum phy phy = intel_port_to_phy(dev_priv, port);
 	int n_entries;
 
-	if (INTEL_GEN(dev_priv) >= 11) {
+	if (INTEL_GEN(dev_priv) >= 12) {
+		if (intel_phy_is_combo(dev_priv, phy))
+			icl_get_combo_buf_trans(dev_priv, encoder->type,
+						intel_dp->link_rate, &n_entries);
+		else
+			n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans);
+	} else if (INTEL_GEN(dev_priv) == 11) {
 		if (intel_phy_is_combo(dev_priv, phy))
 			icl_get_combo_buf_trans(dev_priv, encoder->type,
 						intel_dp->link_rate, &n_entries);
@@ -2397,7 +2504,7 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder,
 		width = 4;
 		rate = 0; /* Rate is always < than 6GHz for HDMI */
 	} else {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		width = intel_dp->lane_count;
 		rate = intel_dp->link_rate;
@@ -2523,7 +2630,7 @@ static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
 		width = 4;
 		/* Rate is always < than 6GHz for HDMI */
 	} else {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		width = intel_dp->lane_count;
 		rate = intel_dp->link_rate;
@@ -2583,7 +2690,7 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
 					   u32 level)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum port port = encoder->port;
+	enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port);
 	const struct icl_mg_phy_ddi_buf_trans *ddi_translations;
 	u32 n_entries, val;
 	int ln;
@@ -2599,33 +2706,33 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
 
 	/* Set MG_TX_LINK_PARAMS cri_use_fs32 to 0. */
 	for (ln = 0; ln < 2; ln++) {
-		val = I915_READ(MG_TX1_LINK_PARAMS(ln, port));
+		val = I915_READ(MG_TX1_LINK_PARAMS(ln, tc_port));
 		val &= ~CRI_USE_FS32;
-		I915_WRITE(MG_TX1_LINK_PARAMS(ln, port), val);
+		I915_WRITE(MG_TX1_LINK_PARAMS(ln, tc_port), val);
 
-		val = I915_READ(MG_TX2_LINK_PARAMS(ln, port));
+		val = I915_READ(MG_TX2_LINK_PARAMS(ln, tc_port));
 		val &= ~CRI_USE_FS32;
-		I915_WRITE(MG_TX2_LINK_PARAMS(ln, port), val);
+		I915_WRITE(MG_TX2_LINK_PARAMS(ln, tc_port), val);
 	}
 
 	/* Program MG_TX_SWINGCTRL with values from vswing table */
 	for (ln = 0; ln < 2; ln++) {
-		val = I915_READ(MG_TX1_SWINGCTRL(ln, port));
+		val = I915_READ(MG_TX1_SWINGCTRL(ln, tc_port));
 		val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK;
 		val |= CRI_TXDEEMPH_OVERRIDE_17_12(
 			ddi_translations[level].cri_txdeemph_override_17_12);
-		I915_WRITE(MG_TX1_SWINGCTRL(ln, port), val);
+		I915_WRITE(MG_TX1_SWINGCTRL(ln, tc_port), val);
 
-		val = I915_READ(MG_TX2_SWINGCTRL(ln, port));
+		val = I915_READ(MG_TX2_SWINGCTRL(ln, tc_port));
 		val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK;
 		val |= CRI_TXDEEMPH_OVERRIDE_17_12(
 			ddi_translations[level].cri_txdeemph_override_17_12);
-		I915_WRITE(MG_TX2_SWINGCTRL(ln, port), val);
+		I915_WRITE(MG_TX2_SWINGCTRL(ln, tc_port), val);
 	}
 
 	/* Program MG_TX_DRVCTRL with values from vswing table */
 	for (ln = 0; ln < 2; ln++) {
-		val = I915_READ(MG_TX1_DRVCTRL(ln, port));
+		val = I915_READ(MG_TX1_DRVCTRL(ln, tc_port));
 		val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK |
 			 CRI_TXDEEMPH_OVERRIDE_5_0_MASK);
 		val |= CRI_TXDEEMPH_OVERRIDE_5_0(
@@ -2633,9 +2740,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
 			CRI_TXDEEMPH_OVERRIDE_11_6(
 				ddi_translations[level].cri_txdeemph_override_11_6) |
 			CRI_TXDEEMPH_OVERRIDE_EN;
-		I915_WRITE(MG_TX1_DRVCTRL(ln, port), val);
+		I915_WRITE(MG_TX1_DRVCTRL(ln, tc_port), val);
 
-		val = I915_READ(MG_TX2_DRVCTRL(ln, port));
+		val = I915_READ(MG_TX2_DRVCTRL(ln, tc_port));
 		val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK |
 			 CRI_TXDEEMPH_OVERRIDE_5_0_MASK);
 		val |= CRI_TXDEEMPH_OVERRIDE_5_0(
@@ -2643,7 +2750,7 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
 			CRI_TXDEEMPH_OVERRIDE_11_6(
 				ddi_translations[level].cri_txdeemph_override_11_6) |
 			CRI_TXDEEMPH_OVERRIDE_EN;
-		I915_WRITE(MG_TX2_DRVCTRL(ln, port), val);
+		I915_WRITE(MG_TX2_DRVCTRL(ln, tc_port), val);
 
 		/* FIXME: Program CRI_LOADGEN_SEL after the spec is updated */
 	}
@@ -2654,17 +2761,17 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
 	 * values from table for which TX1 and TX2 enabled.
 	 */
 	for (ln = 0; ln < 2; ln++) {
-		val = I915_READ(MG_CLKHUB(ln, port));
+		val = I915_READ(MG_CLKHUB(ln, tc_port));
 		if (link_clock < 300000)
 			val |= CFG_LOW_RATE_LKREN_EN;
 		else
 			val &= ~CFG_LOW_RATE_LKREN_EN;
-		I915_WRITE(MG_CLKHUB(ln, port), val);
+		I915_WRITE(MG_CLKHUB(ln, tc_port), val);
 	}
 
 	/* Program the MG_TX_DCC<LN, port being used> based on the link frequency */
 	for (ln = 0; ln < 2; ln++) {
-		val = I915_READ(MG_TX1_DCC(ln, port));
+		val = I915_READ(MG_TX1_DCC(ln, tc_port));
 		val &= ~CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK;
 		if (link_clock <= 500000) {
 			val &= ~CFG_AMI_CK_DIV_OVERRIDE_EN;
@@ -2672,9 +2779,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
 			val |= CFG_AMI_CK_DIV_OVERRIDE_EN |
 				CFG_AMI_CK_DIV_OVERRIDE_VAL(1);
 		}
-		I915_WRITE(MG_TX1_DCC(ln, port), val);
+		I915_WRITE(MG_TX1_DCC(ln, tc_port), val);
 
-		val = I915_READ(MG_TX2_DCC(ln, port));
+		val = I915_READ(MG_TX2_DCC(ln, tc_port));
 		val &= ~CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK;
 		if (link_clock <= 500000) {
 			val &= ~CFG_AMI_CK_DIV_OVERRIDE_EN;
@@ -2682,18 +2789,18 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
 			val |= CFG_AMI_CK_DIV_OVERRIDE_EN |
 				CFG_AMI_CK_DIV_OVERRIDE_VAL(1);
 		}
-		I915_WRITE(MG_TX2_DCC(ln, port), val);
+		I915_WRITE(MG_TX2_DCC(ln, tc_port), val);
 	}
 
 	/* Program MG_TX_PISO_READLOAD with values from vswing table */
 	for (ln = 0; ln < 2; ln++) {
-		val = I915_READ(MG_TX1_PISO_READLOAD(ln, port));
+		val = I915_READ(MG_TX1_PISO_READLOAD(ln, tc_port));
 		val |= CRI_CALCINIT;
-		I915_WRITE(MG_TX1_PISO_READLOAD(ln, port), val);
+		I915_WRITE(MG_TX1_PISO_READLOAD(ln, tc_port), val);
 
-		val = I915_READ(MG_TX2_PISO_READLOAD(ln, port));
+		val = I915_READ(MG_TX2_PISO_READLOAD(ln, tc_port));
 		val |= CRI_CALCINIT;
-		I915_WRITE(MG_TX2_PISO_READLOAD(ln, port), val);
+		I915_WRITE(MG_TX2_PISO_READLOAD(ln, tc_port), val);
 	}
 }
 
@@ -2711,6 +2818,69 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder,
 		icl_mg_phy_ddi_vswing_sequence(encoder, link_clock, level);
 }
 
+static void
+tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock,
+				u32 level)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port);
+	const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations;
+	u32 n_entries, val, ln, dpcnt_mask, dpcnt_val;
+
+	if (encoder->type == INTEL_OUTPUT_HDMI) {
+		n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
+		ddi_translations = tgl_dkl_phy_hdmi_ddi_trans;
+	} else {
+		n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans);
+		ddi_translations = tgl_dkl_phy_dp_ddi_trans;
+	}
+
+	if (level >= n_entries)
+		level = n_entries - 1;
+
+	dpcnt_mask = (DKL_TX_PRESHOOT_COEFF_MASK |
+		      DKL_TX_DE_EMPAHSIS_COEFF_MASK |
+		      DKL_TX_VSWING_CONTROL_MASK);
+	dpcnt_val = DKL_TX_VSWING_CONTROL(ddi_translations[level].dkl_vswing_control);
+	dpcnt_val |= DKL_TX_DE_EMPHASIS_COEFF(ddi_translations[level].dkl_de_emphasis_control);
+	dpcnt_val |= DKL_TX_PRESHOOT_COEFF(ddi_translations[level].dkl_preshoot_control);
+
+	for (ln = 0; ln < 2; ln++) {
+		I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln));
+
+		I915_WRITE(DKL_TX_PMD_LANE_SUS(tc_port), 0);
+
+		/* All the registers are RMW */
+		val = I915_READ(DKL_TX_DPCNTL0(tc_port));
+		val &= ~dpcnt_mask;
+		val |= dpcnt_val;
+		I915_WRITE(DKL_TX_DPCNTL0(tc_port), val);
+
+		val = I915_READ(DKL_TX_DPCNTL1(tc_port));
+		val &= ~dpcnt_mask;
+		val |= dpcnt_val;
+		I915_WRITE(DKL_TX_DPCNTL1(tc_port), val);
+
+		val = I915_READ(DKL_TX_DPCNTL2(tc_port));
+		val &= ~DKL_TX_DP20BITMODE;
+		I915_WRITE(DKL_TX_DPCNTL2(tc_port), val);
+	}
+}
+
+static void tgl_ddi_vswing_sequence(struct intel_encoder *encoder,
+				    int link_clock,
+				    u32 level,
+				    enum intel_output_type type)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
+
+	if (intel_phy_is_combo(dev_priv, phy))
+		icl_combo_phy_ddi_vswing_sequence(encoder, level, type);
+	else
+		tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level);
+}
+
 static u32 translate_signal_level(int signal_levels)
 {
 	int i;
@@ -2742,7 +2912,10 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp)
 	struct intel_encoder *encoder = &dport->base;
 	int level = intel_ddi_dp_level(intel_dp);
 
-	if (INTEL_GEN(dev_priv) >= 11)
+	if (INTEL_GEN(dev_priv) >= 12)
+		tgl_ddi_vswing_sequence(encoder, intel_dp->link_rate,
+					level, encoder->type);
+	else if (INTEL_GEN(dev_priv) >= 11)
 		icl_ddi_vswing_sequence(encoder, intel_dp->link_rate,
 					level, encoder->type);
 	else if (IS_CANNONLAKE(dev_priv))
@@ -2833,11 +3006,38 @@ static void icl_unmap_plls_to_ports(struct intel_encoder *encoder)
 	mutex_unlock(&dev_priv->dpll_lock);
 }
 
+static void icl_sanitize_port_clk_off(struct drm_i915_private *dev_priv,
+				      u32 port_mask, bool ddi_clk_needed)
+{
+	enum port port;
+	u32 val;
+
+	val = I915_READ(ICL_DPCLKA_CFGCR0);
+	for_each_port_masked(port, port_mask) {
+		enum phy phy = intel_port_to_phy(dev_priv, port);
+		bool ddi_clk_off = val & icl_dpclka_cfgcr0_clk_off(dev_priv,
+								   phy);
+
+		if (ddi_clk_needed == !ddi_clk_off)
+			continue;
+
+		/*
+		 * Punt on the case now where clock is gated, but it would
+		 * be needed by the port. Something else is really broken then.
+		 */
+		if (WARN_ON(ddi_clk_needed))
+			continue;
+
+		DRM_NOTE("PHY %c is disabled/in DSI mode with an ungated DDI clock, gate it\n",
+			 phy_name(phy));
+		val |= icl_dpclka_cfgcr0_clk_off(dev_priv, phy);
+		I915_WRITE(ICL_DPCLKA_CFGCR0, val);
+	}
+}
+
 void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	u32 val;
-	enum port port;
 	u32 port_mask;
 	bool ddi_clk_needed;
 
@@ -2886,29 +3086,7 @@ void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder)
 		ddi_clk_needed = false;
 	}
 
-	val = I915_READ(ICL_DPCLKA_CFGCR0);
-	for_each_port_masked(port, port_mask) {
-		enum phy phy = intel_port_to_phy(dev_priv, port);
-
-		bool ddi_clk_ungated = !(val &
-					 icl_dpclka_cfgcr0_clk_off(dev_priv,
-								   phy));
-
-		if (ddi_clk_needed == ddi_clk_ungated)
-			continue;
-
-		/*
-		 * Punt on the case now where clock is gated, but it would
-		 * be needed by the port. Something else is really broken then.
-		 */
-		if (WARN_ON(ddi_clk_needed))
-			continue;
-
-		DRM_NOTE("PHY %c is disabled/in DSI mode with an ungated DDI clock, gate it\n",
-			 phy_name(port));
-		val |= icl_dpclka_cfgcr0_clk_off(dev_priv, phy);
-		I915_WRITE(ICL_DPCLKA_CFGCR0, val);
-	}
+	icl_sanitize_port_clk_off(dev_priv, port_mask, ddi_clk_needed);
 }
 
 static void intel_ddi_clk_select(struct intel_encoder *encoder,
@@ -2989,130 +3167,90 @@ static void intel_ddi_clk_disable(struct intel_encoder *encoder)
 	}
 }
 
-static void icl_enable_phy_clock_gating(struct intel_digital_port *dig_port)
-{
-	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
-	enum port port = dig_port->base.port;
-	enum tc_port tc_port = intel_port_to_tc(dev_priv, port);
-	u32 val;
-	int ln;
-
-	if (tc_port == PORT_TC_NONE)
-		return;
-
-	for (ln = 0; ln < 2; ln++) {
-		val = I915_READ(MG_DP_MODE(ln, port));
-		val |= MG_DP_MODE_CFG_TR2PWR_GATING |
-		       MG_DP_MODE_CFG_TRPWR_GATING |
-		       MG_DP_MODE_CFG_CLNPWR_GATING |
-		       MG_DP_MODE_CFG_DIGPWR_GATING |
-		       MG_DP_MODE_CFG_GAONPWR_GATING;
-		I915_WRITE(MG_DP_MODE(ln, port), val);
-	}
-
-	val = I915_READ(MG_MISC_SUS0(tc_port));
-	val |= MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(3) |
-	       MG_MISC_SUS0_CFG_TR2PWR_GATING |
-	       MG_MISC_SUS0_CFG_CL2PWR_GATING |
-	       MG_MISC_SUS0_CFG_GAONPWR_GATING |
-	       MG_MISC_SUS0_CFG_TRPWR_GATING |
-	       MG_MISC_SUS0_CFG_CL1PWR_GATING |
-	       MG_MISC_SUS0_CFG_DGPWR_GATING;
-	I915_WRITE(MG_MISC_SUS0(tc_port), val);
-}
-
-static void icl_disable_phy_clock_gating(struct intel_digital_port *dig_port)
-{
-	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
-	enum port port = dig_port->base.port;
-	enum tc_port tc_port = intel_port_to_tc(dev_priv, port);
-	u32 val;
-	int ln;
-
-	if (tc_port == PORT_TC_NONE)
-		return;
-
-	for (ln = 0; ln < 2; ln++) {
-		val = I915_READ(MG_DP_MODE(ln, port));
-		val &= ~(MG_DP_MODE_CFG_TR2PWR_GATING |
-			 MG_DP_MODE_CFG_TRPWR_GATING |
-			 MG_DP_MODE_CFG_CLNPWR_GATING |
-			 MG_DP_MODE_CFG_DIGPWR_GATING |
-			 MG_DP_MODE_CFG_GAONPWR_GATING);
-		I915_WRITE(MG_DP_MODE(ln, port), val);
-	}
-
-	val = I915_READ(MG_MISC_SUS0(tc_port));
-	val &= ~(MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK |
-		 MG_MISC_SUS0_CFG_TR2PWR_GATING |
-		 MG_MISC_SUS0_CFG_CL2PWR_GATING |
-		 MG_MISC_SUS0_CFG_GAONPWR_GATING |
-		 MG_MISC_SUS0_CFG_TRPWR_GATING |
-		 MG_MISC_SUS0_CFG_CL1PWR_GATING |
-		 MG_MISC_SUS0_CFG_DGPWR_GATING);
-	I915_WRITE(MG_MISC_SUS0(tc_port), val);
-}
-
-static void icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port)
+static void
+icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port,
+		       const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
-	enum port port = intel_dig_port->base.port;
-	u32 ln0, ln1, lane_mask;
+	enum tc_port tc_port = intel_port_to_tc(dev_priv, intel_dig_port->base.port);
+	u32 ln0, ln1, pin_assignment;
+	u8 width;
 
 	if (intel_dig_port->tc_mode == TC_PORT_TBT_ALT)
 		return;
 
-	ln0 = I915_READ(MG_DP_MODE(0, port));
-	ln1 = I915_READ(MG_DP_MODE(1, port));
+	if (INTEL_GEN(dev_priv) >= 12) {
+		I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x0));
+		ln0 = I915_READ(DKL_DP_MODE(tc_port));
+		I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x1));
+		ln1 = I915_READ(DKL_DP_MODE(tc_port));
+	} else {
+		ln0 = I915_READ(MG_DP_MODE(0, tc_port));
+		ln1 = I915_READ(MG_DP_MODE(1, tc_port));
+	}
 
-	switch (intel_dig_port->tc_mode) {
-	case TC_PORT_DP_ALT:
-		ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE);
-		ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE);
+	ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE);
+	ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE);
 
-		lane_mask = intel_tc_port_get_lane_mask(intel_dig_port);
+	/* DPPATC */
+	pin_assignment = intel_tc_port_get_pin_assignment_mask(intel_dig_port);
+	width = crtc_state->lane_count;
 
-		switch (lane_mask) {
-		case 0x1:
-		case 0x4:
-			break;
-		case 0x2:
+	switch (pin_assignment) {
+	case 0x0:
+		WARN_ON(intel_dig_port->tc_mode != TC_PORT_LEGACY);
+		if (width == 1) {
+			ln1 |= MG_DP_MODE_CFG_DP_X1_MODE;
+		} else {
+			ln0 |= MG_DP_MODE_CFG_DP_X2_MODE;
+			ln1 |= MG_DP_MODE_CFG_DP_X2_MODE;
+		}
+		break;
+	case 0x1:
+		if (width == 4) {
+			ln0 |= MG_DP_MODE_CFG_DP_X2_MODE;
+			ln1 |= MG_DP_MODE_CFG_DP_X2_MODE;
+		}
+		break;
+	case 0x2:
+		if (width == 2) {
+			ln0 |= MG_DP_MODE_CFG_DP_X2_MODE;
+			ln1 |= MG_DP_MODE_CFG_DP_X2_MODE;
+		}
+		break;
+	case 0x3:
+	case 0x5:
+		if (width == 1) {
 			ln0 |= MG_DP_MODE_CFG_DP_X1_MODE;
-			break;
-		case 0x3:
-			ln0 |= MG_DP_MODE_CFG_DP_X1_MODE |
-			       MG_DP_MODE_CFG_DP_X2_MODE;
-			break;
-		case 0x8:
 			ln1 |= MG_DP_MODE_CFG_DP_X1_MODE;
-			break;
-		case 0xC:
-			ln1 |= MG_DP_MODE_CFG_DP_X1_MODE |
-			       MG_DP_MODE_CFG_DP_X2_MODE;
-			break;
-		case 0xF:
-			ln0 |= MG_DP_MODE_CFG_DP_X1_MODE |
-			       MG_DP_MODE_CFG_DP_X2_MODE;
-			ln1 |= MG_DP_MODE_CFG_DP_X1_MODE |
-			       MG_DP_MODE_CFG_DP_X2_MODE;
-			break;
-		default:
-			MISSING_CASE(lane_mask);
+		} else {
+			ln0 |= MG_DP_MODE_CFG_DP_X2_MODE;
+			ln1 |= MG_DP_MODE_CFG_DP_X2_MODE;
 		}
 		break;
-
-	case TC_PORT_LEGACY:
-		ln0 |= MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE;
-		ln1 |= MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE;
+	case 0x4:
+	case 0x6:
+		if (width == 1) {
+			ln0 |= MG_DP_MODE_CFG_DP_X1_MODE;
+			ln1 |= MG_DP_MODE_CFG_DP_X1_MODE;
+		} else {
+			ln0 |= MG_DP_MODE_CFG_DP_X2_MODE;
+			ln1 |= MG_DP_MODE_CFG_DP_X2_MODE;
+		}
 		break;
-
 	default:
-		MISSING_CASE(intel_dig_port->tc_mode);
-		return;
+		MISSING_CASE(pin_assignment);
 	}
 
-	I915_WRITE(MG_DP_MODE(0, port), ln0);
-	I915_WRITE(MG_DP_MODE(1, port), ln1);
+	if (INTEL_GEN(dev_priv) >= 12) {
+		I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x0));
+		I915_WRITE(DKL_DP_MODE(tc_port), ln0);
+		I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x1));
+		I915_WRITE(DKL_DP_MODE(tc_port), ln1);
+	} else {
+		I915_WRITE(MG_DP_MODE(0, tc_port), ln0);
+		I915_WRITE(MG_DP_MODE(1, tc_port), ln1);
+	}
 }
 
 static void intel_dp_sink_set_fec_ready(struct intel_dp *intel_dp,
@@ -3129,17 +3267,18 @@ static void intel_ddi_enable_fec(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum port port = encoder->port;
+	struct intel_dp *intel_dp;
 	u32 val;
 
 	if (!crtc_state->fec_enable)
 		return;
 
-	val = I915_READ(DP_TP_CTL(port));
+	intel_dp = enc_to_intel_dp(encoder);
+	val = I915_READ(intel_dp->regs.dp_tp_ctl);
 	val |= DP_TP_CTL_FEC_ENABLE;
-	I915_WRITE(DP_TP_CTL(port), val);
+	I915_WRITE(intel_dp->regs.dp_tp_ctl, val);
 
-	if (intel_de_wait_for_set(dev_priv, DP_TP_STATUS(port),
+	if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status,
 				  DP_TP_STATUS_FEC_ENABLE_LIVE, 1))
 		DRM_ERROR("Timed out waiting for FEC Enable Status\n");
 }
@@ -3148,35 +3287,263 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder,
 					const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum port port = encoder->port;
+	struct intel_dp *intel_dp;
 	u32 val;
 
 	if (!crtc_state->fec_enable)
 		return;
 
-	val = I915_READ(DP_TP_CTL(port));
+	intel_dp = enc_to_intel_dp(encoder);
+	val = I915_READ(intel_dp->regs.dp_tp_ctl);
 	val &= ~DP_TP_CTL_FEC_ENABLE;
-	I915_WRITE(DP_TP_CTL(port), val);
-	POSTING_READ(DP_TP_CTL(port));
+	I915_WRITE(intel_dp->regs.dp_tp_ctl, val);
+	POSTING_READ(intel_dp->regs.dp_tp_ctl);
 }
 
-static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
-				    const struct intel_crtc_state *crtc_state,
-				    const struct drm_connector_state *conn_state)
+static void
+tgl_clear_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
+{
+	struct drm_i915_private *dev_priv = to_i915(cstate->uapi.crtc->dev);
+	u32 val;
+
+	if (!cstate->dc3co_exitline)
+		return;
+
+	val = I915_READ(EXITLINE(cstate->cpu_transcoder));
+	val &= ~(EXITLINE_MASK | EXITLINE_ENABLE);
+	I915_WRITE(EXITLINE(cstate->cpu_transcoder), val);
+}
+
+static void
+tgl_set_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	u32 val, exit_scanlines;
+	struct drm_i915_private *dev_priv = to_i915(cstate->uapi.crtc->dev);
+
+	if (!cstate->dc3co_exitline)
+		return;
+
+	exit_scanlines = cstate->dc3co_exitline;
+	exit_scanlines <<= EXITLINE_SHIFT;
+	val = I915_READ(EXITLINE(cstate->cpu_transcoder));
+	val &= ~(EXITLINE_MASK | EXITLINE_ENABLE);
+	val |= exit_scanlines;
+	val |= EXITLINE_ENABLE;
+	I915_WRITE(EXITLINE(cstate->cpu_transcoder), val);
+}
+
+static void tgl_dc3co_exitline_compute_config(struct intel_encoder *encoder,
+					      struct intel_crtc_state *cstate)
+{
+	u32 exit_scanlines;
+	struct drm_i915_private *dev_priv = to_i915(cstate->uapi.crtc->dev);
+	u32 crtc_vdisplay = cstate->hw.adjusted_mode.crtc_vdisplay;
+
+	cstate->dc3co_exitline = 0;
+
+	if (!(dev_priv->csr.allowed_dc_mask & DC_STATE_EN_DC3CO))
+		return;
+
+	/* B.Specs:49196 DC3CO only works with pipeA and DDIA.*/
+	if (to_intel_crtc(cstate->uapi.crtc)->pipe != PIPE_A ||
+	    encoder->port != PORT_A)
+		return;
+
+	if (!cstate->has_psr2 || !cstate->hw.active)
+		return;
+
+	/*
+	 * DC3CO Exit time 200us B.Spec 49196
+	 * PSR2 transcoder Early Exit scanlines = ROUNDUP(200 / line time) + 1
+	 */
+	exit_scanlines =
+		intel_usecs_to_scanlines(&cstate->hw.adjusted_mode, 200) + 1;
+
+	if (WARN_ON(exit_scanlines > crtc_vdisplay))
+		return;
+
+	cstate->dc3co_exitline = crtc_vdisplay - exit_scanlines;
+	DRM_DEBUG_KMS("DC3CO exit scanlines %d\n", cstate->dc3co_exitline);
+}
+
+static void tgl_dc3co_exitline_get_config(struct intel_crtc_state *crtc_state)
+{
+	u32 val;
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+
+	if (INTEL_GEN(dev_priv) < 12)
+		return;
+
+	val = I915_READ(EXITLINE(crtc_state->cpu_transcoder));
+
+	if (val & EXITLINE_ENABLE)
+		crtc_state->dc3co_exitline = val & EXITLINE_MASK;
+}
+
+static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder,
+				  const struct intel_crtc_state *crtc_state,
+				  const struct drm_connector_state *conn_state)
+{
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+	bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST);
+	int level = intel_ddi_dp_level(intel_dp);
+	enum transcoder transcoder = crtc_state->cpu_transcoder;
+
+	tgl_set_psr2_transcoder_exitline(crtc_state);
+	intel_dp_set_link_params(intel_dp, crtc_state->port_clock,
+				 crtc_state->lane_count, is_mst);
+
+	intel_dp->regs.dp_tp_ctl = TGL_DP_TP_CTL(transcoder);
+	intel_dp->regs.dp_tp_status = TGL_DP_TP_STATUS(transcoder);
+
+	/*
+	 * 1. Enable Power Wells
+	 *
+	 * This was handled at the beginning of intel_atomic_commit_tail(),
+	 * before we called down into this function.
+	 */
+
+	/* 2. Enable Panel Power if PPS is required */
+	intel_edp_panel_on(intel_dp);
+
+	/*
+	 * 3. For non-TBT Type-C ports, set FIA lane count
+	 * (DFLEXDPSP.DPX4TXLATC)
+	 *
+	 * This was done before tgl_ddi_pre_enable_dp by
+	 * hsw_crtc_enable()->intel_encoders_pre_pll_enable().
+	 */
+
+	/*
+	 * 4. Enable the port PLL.
+	 *
+	 * The PLL enabling itself was already done before this function by
+	 * hsw_crtc_enable()->intel_enable_shared_dpll().  We need only
+	 * configure the PLL to port mapping here.
+	 */
+	intel_ddi_clk_select(encoder, crtc_state);
+
+	/* 5. If IO power is controlled through PWR_WELL_CTL, Enable IO Power */
+	if (!intel_phy_is_tc(dev_priv, phy) ||
+	    dig_port->tc_mode != TC_PORT_TBT_ALT)
+		intel_display_power_get(dev_priv,
+					dig_port->ddi_io_power_domain);
+
+	/* 6. Program DP_MODE */
+	icl_program_mg_dp_mode(dig_port, crtc_state);
+
+	/*
+	 * 7. The rest of the below are substeps under the bspec's "Enable and
+	 * Train Display Port" step.  Note that steps that are specific to
+	 * MST will be handled by intel_mst_pre_enable_dp() before/after it
+	 * calls into this function.  Also intel_mst_pre_enable_dp() only calls
+	 * us when active_mst_links==0, so any steps designated for "single
+	 * stream or multi-stream master transcoder" can just be performed
+	 * unconditionally here.
+	 */
+
+	/*
+	 * 7.a Configure Transcoder Clock Select to direct the Port clock to the
+	 * Transcoder.
+	 */
+	intel_ddi_enable_pipe_clock(crtc_state);
+
+	/*
+	 * 7.b Configure TRANS_DDI_FUNC_CTL DDI Select, DDI Mode Select & MST
+	 * Transport Select
+	 */
+	intel_ddi_config_transcoder_func(crtc_state);
+
+	/*
+	 * 7.c Configure & enable DP_TP_CTL with link training pattern 1
+	 * selected
+	 *
+	 * This will be handled by the intel_dp_start_link_train() farther
+	 * down this function.
+	 */
+
+	/* 7.e Configure voltage swing and related IO settings */
+	tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level,
+				encoder->type);
+
+	/*
+	 * 7.f Combo PHY: Configure PORT_CL_DW10 Static Power Down to power up
+	 * the used lanes of the DDI.
+	 */
+	if (intel_phy_is_combo(dev_priv, phy)) {
+		bool lane_reversal =
+			dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL;
+
+		intel_combo_phy_power_up_lanes(dev_priv, phy, false,
+					       crtc_state->lane_count,
+					       lane_reversal);
+	}
+
+	/*
+	 * 7.g Configure and enable DDI_BUF_CTL
+	 * 7.h Wait for DDI_BUF_CTL DDI Idle Status = 0b (Not Idle), timeout
+	 *     after 500 us.
+	 *
+	 * We only configure what the register value will be here.  Actual
+	 * enabling happens during link training farther down.
+	 */
+	intel_ddi_init_dp_buf_reg(encoder);
+
+	if (!is_mst)
+		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+
+	intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true);
+	/*
+	 * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit
+	 * in the FEC_CONFIGURATION register to 1 before initiating link
+	 * training
+	 */
+	intel_dp_sink_set_fec_ready(intel_dp, crtc_state);
+
+	/*
+	 * 7.i Follow DisplayPort specification training sequence (see notes for
+	 *     failure handling)
+	 * 7.j If DisplayPort multi-stream - Set DP_TP_CTL link training to Idle
+	 *     Pattern, wait for 5 idle patterns (DP_TP_STATUS Min_Idles_Sent)
+	 *     (timeout after 800 us)
+	 */
+	intel_dp_start_link_train(intel_dp);
+
+	/* 7.k Set DP_TP_CTL link training to Normal */
+	if (!is_trans_port_sync_mode(crtc_state))
+		intel_dp_stop_link_train(intel_dp);
+
+	/* 7.l Configure and enable FEC if needed */
+	intel_ddi_enable_fec(encoder, crtc_state);
+	intel_dsc_enable(encoder, crtc_state);
+}
+
+static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder,
+				  const struct intel_crtc_state *crtc_state,
+				  const struct drm_connector_state *conn_state)
+{
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = encoder->port;
 	enum phy phy = intel_port_to_phy(dev_priv, port);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST);
 	int level = intel_ddi_dp_level(intel_dp);
 
-	WARN_ON(is_mst && (port == PORT_A || port == PORT_E));
+	if (INTEL_GEN(dev_priv) < 11)
+		WARN_ON(is_mst && (port == PORT_A || port == PORT_E));
+	else
+		WARN_ON(is_mst && port == PORT_A);
 
 	intel_dp_set_link_params(intel_dp, crtc_state->port_clock,
 				 crtc_state->lane_count, is_mst);
 
+	intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port);
+	intel_dp->regs.dp_tp_status = DP_TP_STATUS(port);
+
 	intel_edp_panel_on(intel_dp);
 
 	intel_ddi_clk_select(encoder, crtc_state);
@@ -3186,8 +3553,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
 		intel_display_power_get(dev_priv,
 					dig_port->ddi_io_power_domain);
 
-	icl_program_mg_dp_mode(dig_port);
-	icl_disable_phy_clock_gating(dig_port);
+	icl_program_mg_dp_mode(dig_port, crtc_state);
 
 	if (INTEL_GEN(dev_priv) >= 11)
 		icl_ddi_vswing_sequence(encoder, crtc_state->port_clock,
@@ -3215,39 +3581,58 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
 					      true);
 	intel_dp_sink_set_fec_ready(intel_dp, crtc_state);
 	intel_dp_start_link_train(intel_dp);
-	if (port != PORT_A || INTEL_GEN(dev_priv) >= 9)
+	if ((port != PORT_A || INTEL_GEN(dev_priv) >= 9) &&
+	    !is_trans_port_sync_mode(crtc_state))
 		intel_dp_stop_link_train(intel_dp);
 
 	intel_ddi_enable_fec(encoder, crtc_state);
 
-	icl_enable_phy_clock_gating(dig_port);
-
 	if (!is_mst)
 		intel_ddi_enable_pipe_clock(crtc_state);
 
 	intel_dsc_enable(encoder, crtc_state);
 }
 
+static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
+				    const struct intel_crtc_state *crtc_state,
+				    const struct drm_connector_state *conn_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+
+	if (INTEL_GEN(dev_priv) >= 12)
+		tgl_ddi_pre_enable_dp(encoder, crtc_state, conn_state);
+	else
+		hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state);
+
+	/* MST will call a setting of MSA after an allocating of Virtual Channel
+	 * from MST encoder pre_enable callback.
+	 */
+	if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))
+		intel_ddi_set_dp_msa(crtc_state, conn_state);
+}
+
 static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder,
 				      const struct intel_crtc_state *crtc_state,
 				      const struct drm_connector_state *conn_state)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = encoder->port;
 	int level = intel_ddi_hdmi_level(dev_priv, port);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 
 	intel_dp_dual_mode_set_tmds_output(intel_hdmi, true);
 	intel_ddi_clk_select(encoder, crtc_state);
 
 	intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain);
 
-	icl_program_mg_dp_mode(dig_port);
-	icl_disable_phy_clock_gating(dig_port);
+	icl_program_mg_dp_mode(dig_port, crtc_state);
 
-	if (INTEL_GEN(dev_priv) >= 11)
+	if (INTEL_GEN(dev_priv) >= 12)
+		tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock,
+					level, INTEL_OUTPUT_HDMI);
+	else if (INTEL_GEN(dev_priv) == 11)
 		icl_ddi_vswing_sequence(encoder, crtc_state->port_clock,
 					level, INTEL_OUTPUT_HDMI);
 	else if (IS_CANNONLAKE(dev_priv))
@@ -3257,8 +3642,6 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder,
 	else
 		intel_prepare_hdmi_ddi_buffers(encoder, level);
 
-	icl_enable_phy_clock_gating(dig_port);
-
 	if (IS_GEN9_BC(dev_priv))
 		skl_ddi_set_iboost(encoder, level, INTEL_OUTPUT_HDMI);
 
@@ -3273,7 +3656,7 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *crtc_state,
 				 const struct drm_connector_state *conn_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
@@ -3301,12 +3684,12 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder,
 		intel_ddi_pre_enable_hdmi(encoder, crtc_state, conn_state);
 	} else {
 		struct intel_lspcon *lspcon =
-				enc_to_intel_lspcon(&encoder->base);
+				enc_to_intel_lspcon(encoder);
 
 		intel_ddi_pre_enable_dp(encoder, crtc_state, conn_state);
 		if (lspcon->active) {
 			struct intel_digital_port *dig_port =
-					enc_to_dig_port(&encoder->base);
+					enc_to_dig_port(encoder);
 
 			dig_port->set_infoframes(encoder,
 						 crtc_state->has_infoframe,
@@ -3330,10 +3713,14 @@ static void intel_disable_ddi_buf(struct intel_encoder *encoder,
 		wait = true;
 	}
 
-	val = I915_READ(DP_TP_CTL(port));
-	val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK);
-	val |= DP_TP_CTL_LINK_TRAIN_PAT1;
-	I915_WRITE(DP_TP_CTL(port), val);
+	if (intel_crtc_has_dp_encoder(crtc_state)) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+		val = I915_READ(intel_dp->regs.dp_tp_ctl);
+		val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK);
+		val |= DP_TP_CTL_LINK_TRAIN_PAT1;
+		I915_WRITE(intel_dp->regs.dp_tp_ctl, val);
+	}
 
 	/* Disable FEC in DP Sink */
 	intel_ddi_disable_fec_state(encoder, crtc_state);
@@ -3347,23 +3734,42 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
 				      const struct drm_connector_state *old_conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct intel_dp *intel_dp = &dig_port->dp;
 	bool is_mst = intel_crtc_has_type(old_crtc_state,
 					  INTEL_OUTPUT_DP_MST);
 	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
 
-	if (!is_mst) {
-		intel_ddi_disable_pipe_clock(old_crtc_state);
-		/*
-		 * Power down sink before disabling the port, otherwise we end
-		 * up getting interrupts from the sink on detecting link loss.
-		 */
-		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+	/*
+	 * Power down sink before disabling the port, otherwise we end
+	 * up getting interrupts from the sink on detecting link loss.
+	 */
+	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+
+	if (INTEL_GEN(dev_priv) >= 12) {
+		if (is_mst) {
+			enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder;
+			u32 val;
+
+			val = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder));
+			val &= ~TGL_TRANS_DDI_PORT_MASK;
+			I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), val);
+		}
+	} else {
+		if (!is_mst)
+			intel_ddi_disable_pipe_clock(old_crtc_state);
 	}
 
 	intel_disable_ddi_buf(encoder, old_crtc_state);
 
+	/*
+	 * From TGL spec: "If single stream or multi-stream master transcoder:
+	 * Configure Transcoder Clock select to direct no clock to the
+	 * transcoder"
+	 */
+	if (INTEL_GEN(dev_priv) >= 12)
+		intel_ddi_disable_pipe_clock(old_crtc_state);
+
 	intel_edp_panel_vdd_on(intel_dp);
 	intel_edp_panel_off(intel_dp);
 
@@ -3373,6 +3779,7 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
 						  dig_port->ddi_io_power_domain);
 
 	intel_ddi_clk_disable(encoder);
+	tgl_clear_psr2_transcoder_exitline(old_crtc_state);
 }
 
 static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder,
@@ -3380,7 +3787,7 @@ static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder,
 					const struct drm_connector_state *old_conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &dig_port->hdmi;
 
 	dig_port->set_infoframes(encoder, false,
@@ -3398,11 +3805,46 @@ static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder,
 	intel_dp_dual_mode_set_tmds_output(intel_hdmi, false);
 }
 
+static void icl_disable_transcoder_port_sync(const struct intel_crtc_state *old_crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	if (old_crtc_state->master_transcoder == INVALID_TRANSCODER)
+		return;
+
+	DRM_DEBUG_KMS("Disabling Transcoder Port Sync on Slave Transcoder %s\n",
+		      transcoder_name(old_crtc_state->cpu_transcoder));
+
+	I915_WRITE(TRANS_DDI_FUNC_CTL2(old_crtc_state->cpu_transcoder), 0);
+}
+
 static void intel_ddi_post_disable(struct intel_encoder *encoder,
 				   const struct intel_crtc_state *old_crtc_state,
 				   const struct drm_connector_state *old_conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
+	bool is_tc_port = intel_phy_is_tc(dev_priv, phy);
+
+	if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST)) {
+		intel_crtc_vblank_off(old_crtc_state);
+
+		intel_disable_pipe(old_crtc_state);
+
+		if (INTEL_GEN(dev_priv) >= 11)
+			icl_disable_transcoder_port_sync(old_crtc_state);
+
+		intel_ddi_disable_transcoder_func(old_crtc_state);
+
+		intel_dsc_disable(old_crtc_state);
+
+		if (INTEL_GEN(dev_priv) >= 9)
+			skl_scaler_disable(old_crtc_state);
+		else
+			ilk_pfit_disable(old_crtc_state);
+	}
 
 	/*
 	 * When called from DP MST code:
@@ -3426,6 +3868,13 @@ static void intel_ddi_post_disable(struct intel_encoder *encoder,
 
 	if (INTEL_GEN(dev_priv) >= 11)
 		icl_unmap_plls_to_ports(encoder);
+
+	if (intel_crtc_has_dp_encoder(old_crtc_state) || is_tc_port)
+		intel_display_power_put_unchecked(dev_priv,
+						  intel_ddi_main_link_aux_domain(dig_port));
+
+	if (is_tc_port)
+		intel_tc_port_put_link(dig_port);
 }
 
 void intel_ddi_fdi_post_disable(struct intel_encoder *encoder,
@@ -3467,7 +3916,7 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder,
 				const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	enum port port = encoder->port;
 
 	if (port == PORT_A && INTEL_GEN(dev_priv) < 9)
@@ -3475,7 +3924,8 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder,
 
 	intel_edp_backlight_on(crtc_state, conn_state);
 	intel_psr_enable(intel_dp, crtc_state);
-	intel_dp_ycbcr_420_enable(intel_dp, crtc_state);
+	intel_dp_vsc_enable(intel_dp, crtc_state, conn_state);
+	intel_dp_hdr_metadata_enable(intel_dp, crtc_state, conn_state);
 	intel_edp_drrs_enable(intel_dp, crtc_state);
 
 	if (crtc_state->has_audio)
@@ -3486,12 +3936,12 @@ static i915_reg_t
 gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv,
 			       enum port port)
 {
-	static const i915_reg_t regs[] = {
-		[PORT_A] = CHICKEN_TRANS_EDP,
-		[PORT_B] = CHICKEN_TRANS_A,
-		[PORT_C] = CHICKEN_TRANS_B,
-		[PORT_D] = CHICKEN_TRANS_C,
-		[PORT_E] = CHICKEN_TRANS_A,
+	static const enum transcoder trans[] = {
+		[PORT_A] = TRANSCODER_EDP,
+		[PORT_B] = TRANSCODER_A,
+		[PORT_C] = TRANSCODER_B,
+		[PORT_D] = TRANSCODER_C,
+		[PORT_E] = TRANSCODER_A,
 	};
 
 	WARN_ON(INTEL_GEN(dev_priv) < 9);
@@ -3499,7 +3949,7 @@ gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv,
 	if (WARN_ON(port < PORT_A || port > PORT_E))
 		port = PORT_A;
 
-	return regs[port];
+	return CHICKEN_TRANS(trans[port]);
 }
 
 static void intel_enable_ddi_hdmi(struct intel_encoder *encoder,
@@ -3507,7 +3957,7 @@ static void intel_enable_ddi_hdmi(struct intel_encoder *encoder,
 				  const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct drm_connector *connector = conn_state->connector;
 	enum port port = encoder->port;
 
@@ -3576,6 +4026,7 @@ static void intel_enable_ddi(struct intel_encoder *encoder,
 	if (conn_state->content_protection ==
 	    DRM_MODE_CONTENT_PROTECTION_DESIRED)
 		intel_hdcp_enable(to_intel_connector(conn_state->connector),
+				  crtc_state->cpu_transcoder,
 				  (u8)conn_state->hdcp_content_type);
 }
 
@@ -3583,7 +4034,7 @@ static void intel_disable_ddi_dp(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *old_crtc_state,
 				 const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 	intel_dp->link_trained = false;
 
@@ -3631,9 +4082,9 @@ static void intel_ddi_update_pipe_dp(struct intel_encoder *encoder,
 				     const struct intel_crtc_state *crtc_state,
 				     const struct drm_connector_state *conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
-	intel_ddi_set_pipe_settings(crtc_state);
+	intel_ddi_set_dp_msa(crtc_state, conn_state);
 
 	intel_psr_update(intel_dp, crtc_state);
 	intel_edp_drrs_enable(intel_dp, crtc_state);
@@ -3679,7 +4130,9 @@ static void intel_ddi_update_pipe(struct intel_encoder *encoder,
 	if (conn_state->content_protection ==
 	    DRM_MODE_CONTENT_PROTECTION_DESIRED ||
 	    content_protection_type_changed)
-		intel_hdcp_enable(connector, (u8)conn_state->hdcp_content_type);
+		intel_hdcp_enable(connector,
+				  crtc_state->cpu_transcoder,
+				  (u8)conn_state->hdcp_content_type);
 }
 
 static void
@@ -3693,8 +4146,9 @@ intel_ddi_update_prepare(struct intel_atomic_state *state,
 
 	WARN_ON(crtc && crtc->active);
 
-	intel_tc_port_get_link(enc_to_dig_port(&encoder->base), required_lanes);
-	if (crtc_state && crtc_state->base.active)
+	intel_tc_port_get_link(enc_to_dig_port(encoder),
+		               required_lanes);
+	if (crtc_state && crtc_state->hw.active)
 		intel_update_active_dpll(state, crtc, encoder);
 }
 
@@ -3703,7 +4157,7 @@ intel_ddi_update_complete(struct intel_atomic_state *state,
 			  struct intel_encoder *encoder,
 			  struct intel_crtc *crtc)
 {
-	intel_tc_port_put_link(enc_to_dig_port(&encoder->base));
+	intel_tc_port_put_link(enc_to_dig_port(encoder));
 }
 
 static void
@@ -3712,7 +4166,7 @@ intel_ddi_pre_pll_enable(struct intel_encoder *encoder,
 			 const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
 	bool is_tc_port = intel_phy_is_tc(dev_priv, phy);
 
@@ -3734,62 +4188,45 @@ intel_ddi_pre_pll_enable(struct intel_encoder *encoder,
 						crtc_state->lane_lat_optim_mask);
 }
 
-static void
-intel_ddi_post_pll_disable(struct intel_encoder *encoder,
-			   const struct intel_crtc_state *crtc_state,
-			   const struct drm_connector_state *conn_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
-	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
-	bool is_tc_port = intel_phy_is_tc(dev_priv, phy);
-
-	if (intel_crtc_has_dp_encoder(crtc_state) || is_tc_port)
-		intel_display_power_put_unchecked(dev_priv,
-						  intel_ddi_main_link_aux_domain(dig_port));
-
-	if (is_tc_port)
-		intel_tc_port_put_link(dig_port);
-}
-
 static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv =
 		to_i915(intel_dig_port->base.base.dev);
 	enum port port = intel_dig_port->base.port;
-	u32 val;
+	u32 dp_tp_ctl, ddi_buf_ctl;
 	bool wait = false;
 
-	if (I915_READ(DP_TP_CTL(port)) & DP_TP_CTL_ENABLE) {
-		val = I915_READ(DDI_BUF_CTL(port));
-		if (val & DDI_BUF_CTL_ENABLE) {
-			val &= ~DDI_BUF_CTL_ENABLE;
-			I915_WRITE(DDI_BUF_CTL(port), val);
+	dp_tp_ctl = I915_READ(intel_dp->regs.dp_tp_ctl);
+
+	if (dp_tp_ctl & DP_TP_CTL_ENABLE) {
+		ddi_buf_ctl = I915_READ(DDI_BUF_CTL(port));
+		if (ddi_buf_ctl & DDI_BUF_CTL_ENABLE) {
+			I915_WRITE(DDI_BUF_CTL(port),
+				   ddi_buf_ctl & ~DDI_BUF_CTL_ENABLE);
 			wait = true;
 		}
 
-		val = I915_READ(DP_TP_CTL(port));
-		val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK);
-		val |= DP_TP_CTL_LINK_TRAIN_PAT1;
-		I915_WRITE(DP_TP_CTL(port), val);
-		POSTING_READ(DP_TP_CTL(port));
+		dp_tp_ctl &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK);
+		dp_tp_ctl |= DP_TP_CTL_LINK_TRAIN_PAT1;
+		I915_WRITE(intel_dp->regs.dp_tp_ctl, dp_tp_ctl);
+		POSTING_READ(intel_dp->regs.dp_tp_ctl);
 
 		if (wait)
 			intel_wait_ddi_buf_idle(dev_priv, port);
 	}
 
-	val = DP_TP_CTL_ENABLE |
-	      DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE;
+	dp_tp_ctl = DP_TP_CTL_ENABLE |
+		    DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE;
 	if (intel_dp->link_mst)
-		val |= DP_TP_CTL_MODE_MST;
+		dp_tp_ctl |= DP_TP_CTL_MODE_MST;
 	else {
-		val |= DP_TP_CTL_MODE_SST;
+		dp_tp_ctl |= DP_TP_CTL_MODE_SST;
 		if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
-			val |= DP_TP_CTL_ENHANCED_FRAME_ENABLE;
+			dp_tp_ctl |= DP_TP_CTL_ENHANCED_FRAME_ENABLE;
 	}
-	I915_WRITE(DP_TP_CTL(port), val);
-	POSTING_READ(DP_TP_CTL(port));
+	I915_WRITE(intel_dp->regs.dp_tp_ctl, dp_tp_ctl);
+	POSTING_READ(intel_dp->regs.dp_tp_ctl);
 
 	intel_dp->DP |= DDI_BUF_CTL_ENABLE;
 	I915_WRITE(DDI_BUF_CTL(port), intel_dp->DP);
@@ -3824,7 +4261,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 			  struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	enum transcoder cpu_transcoder = pipe_config->cpu_transcoder;
 	u32 temp, flags = 0;
 
@@ -3832,6 +4269,8 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 	if (WARN_ON(transcoder_is_dsi(cpu_transcoder)))
 		return;
 
+	intel_dsc_get_config(encoder, pipe_config);
+
 	temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder));
 	if (temp & TRANS_DDI_PHSYNC)
 		flags |= DRM_MODE_FLAG_PHSYNC;
@@ -3842,7 +4281,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 	else
 		flags |= DRM_MODE_FLAG_NVSYNC;
 
-	pipe_config->base.adjusted_mode.flags |= flags;
+	pipe_config->hw.adjusted_mode.flags |= flags;
 
 	switch (temp & TRANS_DDI_BPC_MASK) {
 	case TRANS_DDI_BPC_6:
@@ -3891,17 +4330,42 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 		pipe_config->lane_count =
 			((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1;
 		intel_dp_get_m_n(intel_crtc, pipe_config);
+
+		if (INTEL_GEN(dev_priv) >= 11) {
+			i915_reg_t dp_tp_ctl;
+
+			if (IS_GEN(dev_priv, 11))
+				dp_tp_ctl = DP_TP_CTL(encoder->port);
+			else
+				dp_tp_ctl = TGL_DP_TP_CTL(pipe_config->cpu_transcoder);
+
+			pipe_config->fec_enable =
+				I915_READ(dp_tp_ctl) & DP_TP_CTL_FEC_ENABLE;
+
+			DRM_DEBUG_KMS("[ENCODER:%d:%s] Fec status: %u\n",
+				      encoder->base.base.id, encoder->base.name,
+				      pipe_config->fec_enable);
+		}
+
 		break;
 	case TRANS_DDI_MODE_SELECT_DP_MST:
 		pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST);
 		pipe_config->lane_count =
 			((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1;
+
+		if (INTEL_GEN(dev_priv) >= 12)
+			pipe_config->mst_master_transcoder =
+					REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, temp);
+
 		intel_dp_get_m_n(intel_crtc, pipe_config);
 		break;
 	default:
 		break;
 	}
 
+	if (encoder->type == INTEL_OUTPUT_EDP)
+		tgl_dc3co_exitline_get_config(pipe_config);
+
 	pipe_config->has_audio =
 		intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder);
 
@@ -3971,7 +4435,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 				    struct intel_crtc_state *pipe_config,
 				    struct drm_connector_state *conn_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = encoder->port;
 	int ret;
@@ -3979,10 +4443,13 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 	if (HAS_TRANSCODER_EDP(dev_priv) && port == PORT_A)
 		pipe_config->cpu_transcoder = TRANSCODER_EDP;
 
-	if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI))
+	if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) {
 		ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state);
-	else
+	} else {
 		ret = intel_dp_compute_config(encoder, pipe_config, conn_state);
+		tgl_dc3co_exitline_compute_config(encoder, pipe_config);
+	}
+
 	if (ret)
 		return ret;
 
@@ -4003,7 +4470,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 
 static void intel_ddi_encoder_destroy(struct drm_encoder *encoder)
 {
-	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+	struct intel_digital_port *dig_port = enc_to_dig_port(to_intel_encoder(encoder));
 
 	intel_dp_encoder_flush_work(encoder);
 
@@ -4070,7 +4537,7 @@ static int intel_hdmi_reset_link(struct intel_encoder *encoder,
 				 struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_hdmi *hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *hdmi = enc_to_intel_hdmi(encoder);
 	struct intel_connector *connector = hdmi->attached_connector;
 	struct i2c_adapter *adapter =
 		intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
@@ -4102,7 +4569,7 @@ static int intel_hdmi_reset_link(struct intel_encoder *encoder,
 
 	WARN_ON(!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI));
 
-	if (!crtc_state->base.active)
+	if (!crtc_state->hw.active)
 		return 0;
 
 	if (!crtc_state->hdmi_high_tmds_clock_ratio &&
@@ -4142,7 +4609,7 @@ intel_ddi_hotplug(struct intel_encoder *encoder,
 		  struct intel_connector *connector,
 		  bool irq_received)
 {
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct drm_modeset_acquire_ctx ctx;
 	enum intel_hotplug_state state;
 	int ret;
@@ -4273,10 +4740,8 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 	struct ddi_vbt_port_info *port_info =
 		&dev_priv->vbt.ddi_port_info[port];
 	struct intel_digital_port *intel_dig_port;
-	struct intel_encoder *intel_encoder;
-	struct drm_encoder *encoder;
+	struct intel_encoder *encoder;
 	bool init_hdmi, init_dp, init_lspcon = false;
-	enum pipe pipe;
 	enum phy phy = intel_port_to_phy(dev_priv, port);
 
 	init_hdmi = port_info->supports_dvi || port_info->supports_hdmi;
@@ -4304,32 +4769,30 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 	if (!intel_dig_port)
 		return;
 
-	intel_encoder = &intel_dig_port->base;
-	encoder = &intel_encoder->base;
+	encoder = &intel_dig_port->base;
 
-	drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs,
+	drm_encoder_init(&dev_priv->drm, &encoder->base, &intel_ddi_funcs,
 			 DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port));
 
-	intel_encoder->hotplug = intel_ddi_hotplug;
-	intel_encoder->compute_output_type = intel_ddi_compute_output_type;
-	intel_encoder->compute_config = intel_ddi_compute_config;
-	intel_encoder->enable = intel_enable_ddi;
-	intel_encoder->pre_pll_enable = intel_ddi_pre_pll_enable;
-	intel_encoder->post_pll_disable = intel_ddi_post_pll_disable;
-	intel_encoder->pre_enable = intel_ddi_pre_enable;
-	intel_encoder->disable = intel_disable_ddi;
-	intel_encoder->post_disable = intel_ddi_post_disable;
-	intel_encoder->update_pipe = intel_ddi_update_pipe;
-	intel_encoder->get_hw_state = intel_ddi_get_hw_state;
-	intel_encoder->get_config = intel_ddi_get_config;
-	intel_encoder->suspend = intel_dp_encoder_suspend;
-	intel_encoder->get_power_domains = intel_ddi_get_power_domains;
-	intel_encoder->type = INTEL_OUTPUT_DDI;
-	intel_encoder->power_domain = intel_port_to_power_domain(port);
-	intel_encoder->port = port;
-	intel_encoder->cloneable = 0;
-	for_each_pipe(dev_priv, pipe)
-		intel_encoder->crtc_mask |= BIT(pipe);
+	encoder->hotplug = intel_ddi_hotplug;
+	encoder->compute_output_type = intel_ddi_compute_output_type;
+	encoder->compute_config = intel_ddi_compute_config;
+	encoder->enable = intel_enable_ddi;
+	encoder->pre_pll_enable = intel_ddi_pre_pll_enable;
+	encoder->pre_enable = intel_ddi_pre_enable;
+	encoder->disable = intel_disable_ddi;
+	encoder->post_disable = intel_ddi_post_disable;
+	encoder->update_pipe = intel_ddi_update_pipe;
+	encoder->get_hw_state = intel_ddi_get_hw_state;
+	encoder->get_config = intel_ddi_get_config;
+	encoder->suspend = intel_dp_encoder_suspend;
+	encoder->get_power_domains = intel_ddi_get_power_domains;
+
+	encoder->type = INTEL_OUTPUT_DDI;
+	encoder->power_domain = intel_port_to_power_domain(port);
+	encoder->port = port;
+	encoder->cloneable = 0;
+	encoder->pipe_mask = ~0;
 
 	if (INTEL_GEN(dev_priv) >= 11)
 		intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
@@ -4337,6 +4800,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 	else
 		intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
 			(DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES);
+
 	intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
 	intel_dig_port->max_lanes = intel_ddi_max_lanes(intel_dig_port);
 	intel_dig_port->aux_ch = intel_bios_port_aux_ch(dev_priv, port);
@@ -4347,50 +4811,13 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 
 		intel_tc_port_init(intel_dig_port, is_legacy);
 
-		intel_encoder->update_prepare = intel_ddi_update_prepare;
-		intel_encoder->update_complete = intel_ddi_update_complete;
+		encoder->update_prepare = intel_ddi_update_prepare;
+		encoder->update_complete = intel_ddi_update_complete;
 	}
 
-	switch (port) {
-	case PORT_A:
-		intel_dig_port->ddi_io_power_domain =
-			POWER_DOMAIN_PORT_DDI_A_IO;
-		break;
-	case PORT_B:
-		intel_dig_port->ddi_io_power_domain =
-			POWER_DOMAIN_PORT_DDI_B_IO;
-		break;
-	case PORT_C:
-		intel_dig_port->ddi_io_power_domain =
-			POWER_DOMAIN_PORT_DDI_C_IO;
-		break;
-	case PORT_D:
-		intel_dig_port->ddi_io_power_domain =
-			POWER_DOMAIN_PORT_DDI_D_IO;
-		break;
-	case PORT_E:
-		intel_dig_port->ddi_io_power_domain =
-			POWER_DOMAIN_PORT_DDI_E_IO;
-		break;
-	case PORT_F:
-		intel_dig_port->ddi_io_power_domain =
-			POWER_DOMAIN_PORT_DDI_F_IO;
-		break;
-	case PORT_G:
-		intel_dig_port->ddi_io_power_domain =
-			POWER_DOMAIN_PORT_DDI_G_IO;
-		break;
-	case PORT_H:
-		intel_dig_port->ddi_io_power_domain =
-			POWER_DOMAIN_PORT_DDI_H_IO;
-		break;
-	case PORT_I:
-		intel_dig_port->ddi_io_power_domain =
-			POWER_DOMAIN_PORT_DDI_I_IO;
-		break;
-	default:
-		MISSING_CASE(port);
-	}
+	WARN_ON(port > PORT_I);
+	intel_dig_port->ddi_io_power_domain = POWER_DOMAIN_PORT_DDI_A_IO +
+					      port - PORT_A;
 
 	if (init_dp) {
 		if (!intel_ddi_init_dp_connector(intel_dig_port))
@@ -4401,7 +4828,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 
 	/* In theory we don't need the encoder->type check, but leave it just in
 	 * case we have some really bad VBTs... */
-	if (intel_encoder->type != INTEL_OUTPUT_EDP && init_hdmi) {
+	if (encoder->type != INTEL_OUTPUT_EDP && init_hdmi) {
 		if (!intel_ddi_init_hdmi_connector(intel_dig_port))
 			goto err;
 	}
@@ -4425,6 +4852,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 	return;
 
 err:
-	drm_encoder_cleanup(encoder);
+	drm_encoder_cleanup(&encoder->base);
 	kfree(intel_dig_port);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h
index a08365da2643..167c6579d972 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.h
+++ b/drivers/gpu/drm/i915/display/intel_ddi.h
@@ -22,7 +22,7 @@ struct intel_encoder;
 void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder,
 				const struct intel_crtc_state *old_crtc_state,
 				const struct drm_connector_state *old_conn_state);
-void hsw_fdi_link_train(struct intel_crtc *crtc,
+void hsw_fdi_link_train(struct intel_encoder *encoder,
 			const struct intel_crtc_state *crtc_state);
 void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port);
 bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe);
@@ -30,7 +30,8 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
 void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state);
 void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state);
 void intel_ddi_disable_pipe_clock(const  struct intel_crtc_state *crtc_state);
-void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state);
+void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
+			  const struct drm_connector_state *conn_state);
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
 void intel_ddi_get_config(struct intel_encoder *encoder,
 			  struct intel_crtc_state *pipe_config);
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index af50f05f4e9d..19ea842cfd84 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -31,7 +31,6 @@
 #include <linux/module.h>
 #include <linux/dma-resv.h>
 #include <linux/slab.h>
-#include <linux/vgaarb.h>
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
@@ -47,6 +46,7 @@
 #include "display/intel_crt.h"
 #include "display/intel_ddi.h"
 #include "display/intel_dp.h"
+#include "display/intel_dp_mst.h"
 #include "display/intel_dsi.h"
 #include "display/intel_dvo.h"
 #include "display/intel_gmbus.h"
@@ -56,6 +56,8 @@
 #include "display/intel_tv.h"
 #include "display/intel_vdsc.h"
 
+#include "gt/intel_rps.h"
+
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_acpi.h"
@@ -65,6 +67,7 @@
 #include "intel_cdclk.h"
 #include "intel_color.h"
 #include "intel_display_types.h"
+#include "intel_dp_link_training.h"
 #include "intel_fbc.h"
 #include "intel_fbdev.h"
 #include "intel_fifo_underrun.h"
@@ -79,16 +82,27 @@
 #include "intel_sideband.h"
 #include "intel_sprite.h"
 #include "intel_tc.h"
+#include "intel_vga.h"
 
 /* Primary plane formats for gen <= 3 */
 static const u32 i8xx_primary_formats[] = {
 	DRM_FORMAT_C8,
-	DRM_FORMAT_RGB565,
 	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB8888,
+};
+
+/* Primary plane formats for ivb (no fp16 due to hw issue) */
+static const u32 ivb_primary_formats[] = {
+	DRM_FORMAT_C8,
+	DRM_FORMAT_RGB565,
 	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_XRGB2101010,
+	DRM_FORMAT_XBGR2101010,
 };
 
-/* Primary plane formats for gen >= 4 */
+/* Primary plane formats for gen >= 4, except ivb */
 static const u32 i965_primary_formats[] = {
 	DRM_FORMAT_C8,
 	DRM_FORMAT_RGB565,
@@ -96,6 +110,22 @@ static const u32 i965_primary_formats[] = {
 	DRM_FORMAT_XBGR8888,
 	DRM_FORMAT_XRGB2101010,
 	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_XBGR16161616F,
+};
+
+/* Primary plane formats for vlv/chv */
+static const u32 vlv_primary_formats[] = {
+	DRM_FORMAT_C8,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_XRGB2101010,
+	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ARGB2101010,
+	DRM_FORMAT_ABGR2101010,
+	DRM_FORMAT_XBGR16161616F,
 };
 
 static const u64 i9xx_format_modifiers[] = {
@@ -116,8 +146,8 @@ static const u64 cursor_format_modifiers[] = {
 
 static void i9xx_crtc_clock_get(struct intel_crtc *crtc,
 				struct intel_crtc_state *pipe_config);
-static void ironlake_pch_clock_get(struct intel_crtc *crtc,
-				   struct intel_crtc_state *pipe_config);
+static void ilk_pch_clock_get(struct intel_crtc *crtc,
+			      struct intel_crtc_state *pipe_config);
 
 static int intel_framebuffer_init(struct intel_framebuffer *ifb,
 				  struct drm_i915_gem_object *obj,
@@ -128,23 +158,18 @@ static void intel_cpu_transcoder_set_m_n(const struct intel_crtc_state *crtc_sta
 					 const struct intel_link_m_n *m_n,
 					 const struct intel_link_m_n *m2_n2);
 static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state);
-static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state);
-static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state);
+static void ilk_set_pipeconf(const struct intel_crtc_state *crtc_state);
+static void hsw_set_pipeconf(const struct intel_crtc_state *crtc_state);
 static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state);
 static void vlv_prepare_pll(struct intel_crtc *crtc,
 			    const struct intel_crtc_state *pipe_config);
 static void chv_prepare_pll(struct intel_crtc *crtc,
 			    const struct intel_crtc_state *pipe_config);
-static void intel_begin_crtc_commit(struct intel_atomic_state *, struct intel_crtc *);
-static void intel_finish_crtc_commit(struct intel_atomic_state *, struct intel_crtc *);
-static void intel_crtc_init_scalers(struct intel_crtc *crtc,
-				    struct intel_crtc_state *crtc_state);
-static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state);
-static void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state);
-static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state);
+static void skl_pfit_enable(const struct intel_crtc_state *crtc_state);
+static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state);
 static void intel_modeset_setup_hw_state(struct drm_device *dev,
 					 struct drm_modeset_acquire_ctx *ctx);
-static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
+static struct intel_crtc_state *intel_crtc_state_alloc(struct intel_crtc *crtc);
 
 struct intel_limit {
 	struct {
@@ -345,7 +370,7 @@ static const struct intel_limit intel_limits_g4x_dual_channel_lvds = {
 	},
 };
 
-static const struct intel_limit intel_limits_pineview_sdvo = {
+static const struct intel_limit pnv_limits_sdvo = {
 	.dot = { .min = 20000, .max = 400000},
 	.vco = { .min = 1700000, .max = 3500000 },
 	/* Pineview's Ncounter is a ring counter */
@@ -360,7 +385,7 @@ static const struct intel_limit intel_limits_pineview_sdvo = {
 		.p2_slow = 10, .p2_fast = 5 },
 };
 
-static const struct intel_limit intel_limits_pineview_lvds = {
+static const struct intel_limit pnv_limits_lvds = {
 	.dot = { .min = 20000, .max = 400000 },
 	.vco = { .min = 1700000, .max = 3500000 },
 	.n = { .min = 3, .max = 6 },
@@ -378,7 +403,7 @@ static const struct intel_limit intel_limits_pineview_lvds = {
  * We calculate clock using (register_value + 2) for N/M1/M2, so here
  * the range value for them is (actual_value - 2).
  */
-static const struct intel_limit intel_limits_ironlake_dac = {
+static const struct intel_limit ilk_limits_dac = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 1760000, .max = 3510000 },
 	.n = { .min = 1, .max = 5 },
@@ -391,7 +416,7 @@ static const struct intel_limit intel_limits_ironlake_dac = {
 		.p2_slow = 10, .p2_fast = 5 },
 };
 
-static const struct intel_limit intel_limits_ironlake_single_lvds = {
+static const struct intel_limit ilk_limits_single_lvds = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 1760000, .max = 3510000 },
 	.n = { .min = 1, .max = 3 },
@@ -404,7 +429,7 @@ static const struct intel_limit intel_limits_ironlake_single_lvds = {
 		.p2_slow = 14, .p2_fast = 14 },
 };
 
-static const struct intel_limit intel_limits_ironlake_dual_lvds = {
+static const struct intel_limit ilk_limits_dual_lvds = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 1760000, .max = 3510000 },
 	.n = { .min = 1, .max = 3 },
@@ -418,7 +443,7 @@ static const struct intel_limit intel_limits_ironlake_dual_lvds = {
 };
 
 /* LVDS 100mhz refclk limits. */
-static const struct intel_limit intel_limits_ironlake_single_lvds_100m = {
+static const struct intel_limit ilk_limits_single_lvds_100m = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 1760000, .max = 3510000 },
 	.n = { .min = 1, .max = 2 },
@@ -431,7 +456,7 @@ static const struct intel_limit intel_limits_ironlake_single_lvds_100m = {
 		.p2_slow = 14, .p2_fast = 14 },
 };
 
-static const struct intel_limit intel_limits_ironlake_dual_lvds_100m = {
+static const struct intel_limit ilk_limits_dual_lvds_100m = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 1760000, .max = 3510000 },
 	.n = { .min = 1, .max = 3 },
@@ -490,7 +515,7 @@ static const struct intel_limit intel_limits_bxt = {
 
 /* WA Display #0827: Gen9:all */
 static void
-skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable)
+skl_wa_827(struct drm_i915_private *dev_priv, enum pipe pipe, bool enable)
 {
 	if (enable)
 		I915_WRITE(CLKGATE_DIS_PSL(pipe),
@@ -518,7 +543,20 @@ icl_wa_scalerclkgating(struct drm_i915_private *dev_priv, enum pipe pipe,
 static bool
 needs_modeset(const struct intel_crtc_state *state)
 {
-	return drm_atomic_crtc_needs_modeset(&state->base);
+	return drm_atomic_crtc_needs_modeset(&state->uapi);
+}
+
+bool
+is_trans_port_sync_mode(const struct intel_crtc_state *crtc_state)
+{
+	return (crtc_state->master_transcoder != INVALID_TRANSCODER ||
+		crtc_state->sync_mode_slaves_mask);
+}
+
+static bool
+is_trans_port_sync_slave(const struct intel_crtc_state *crtc_state)
+{
+	return crtc_state->master_transcoder != INVALID_TRANSCODER;
 }
 
 /*
@@ -632,7 +670,7 @@ i9xx_select_p2_div(const struct intel_limit *limit,
 		   const struct intel_crtc_state *crtc_state,
 		   int target)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
 	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_LVDS)) {
 		/*
@@ -668,7 +706,7 @@ i9xx_find_best_dpll(const struct intel_limit *limit,
 		    int target, int refclk, struct dpll *match_clock,
 		    struct dpll *best_clock)
 {
-	struct drm_device *dev = crtc_state->base.crtc->dev;
+	struct drm_device *dev = crtc_state->uapi.crtc->dev;
 	struct dpll clock;
 	int err = target;
 
@@ -726,7 +764,7 @@ pnv_find_best_dpll(const struct intel_limit *limit,
 		   int target, int refclk, struct dpll *match_clock,
 		   struct dpll *best_clock)
 {
-	struct drm_device *dev = crtc_state->base.crtc->dev;
+	struct drm_device *dev = crtc_state->uapi.crtc->dev;
 	struct dpll clock;
 	int err = target;
 
@@ -782,7 +820,7 @@ g4x_find_best_dpll(const struct intel_limit *limit,
 		   int target, int refclk, struct dpll *match_clock,
 		   struct dpll *best_clock)
 {
-	struct drm_device *dev = crtc_state->base.crtc->dev;
+	struct drm_device *dev = crtc_state->uapi.crtc->dev;
 	struct dpll clock;
 	int max_n;
 	bool found = false;
@@ -876,7 +914,7 @@ vlv_find_best_dpll(const struct intel_limit *limit,
 		   int target, int refclk, struct dpll *match_clock,
 		   struct dpll *best_clock)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_device *dev = crtc->base.dev;
 	struct dpll clock;
 	unsigned int bestppm = 1000000;
@@ -936,7 +974,7 @@ chv_find_best_dpll(const struct intel_limit *limit,
 		   int target, int refclk, struct dpll *match_clock,
 		   struct dpll *best_clock)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_device *dev = crtc->base.dev;
 	unsigned int best_error_ppm;
 	struct dpll clock;
@@ -999,33 +1037,6 @@ bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state,
 				  NULL, best_clock);
 }
 
-bool intel_crtc_active(struct intel_crtc *crtc)
-{
-	/* Be paranoid as we can arrive here with only partial
-	 * state retrieved from the hardware during setup.
-	 *
-	 * We can ditch the adjusted_mode.crtc_clock check as soon
-	 * as Haswell has gained clock readout/fastboot support.
-	 *
-	 * We can ditch the crtc->primary->state->fb check as soon as we can
-	 * properly reconstruct framebuffers.
-	 *
-	 * FIXME: The intel_crtc->active here should be switched to
-	 * crtc->state->active once we have proper CRTC states wired up
-	 * for atomic.
-	 */
-	return crtc->active && crtc->base.primary->state->fb &&
-		crtc->config->base.adjusted_mode.crtc_clock;
-}
-
-enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv,
-					     enum pipe pipe)
-{
-	struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
-
-	return crtc->config->cpu_transcoder;
-}
-
 static bool pipe_scanline_is_moving(struct drm_i915_private *dev_priv,
 				    enum pipe pipe)
 {
@@ -1069,7 +1080,7 @@ static void intel_wait_for_pipe_scanline_moving(struct intel_crtc *crtc)
 static void
 intel_wait_for_pipe_off(const struct intel_crtc_state *old_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	if (INTEL_GEN(dev_priv) >= 4) {
@@ -1119,11 +1130,15 @@ static void assert_fdi_tx(struct drm_i915_private *dev_priv,
 			  enum pipe pipe, bool state)
 {
 	bool cur_state;
-	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
-								      pipe);
 
 	if (HAS_DDI(dev_priv)) {
-		/* DDI does not have a specific FDI_TX register */
+		/*
+		 * DDI does not have a specific FDI_TX register.
+		 *
+		 * FDI is never fed from EDP transcoder
+		 * so pipe->transcoder cast is fine here.
+		 */
+		enum transcoder cpu_transcoder = (enum transcoder)pipe;
 		u32 val = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder));
 		cur_state = !!(val & TRANS_DDI_FUNC_ENABLE);
 	} else {
@@ -1240,11 +1255,9 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, enum pipe pipe)
 }
 
 void assert_pipe(struct drm_i915_private *dev_priv,
-		 enum pipe pipe, bool state)
+		 enum transcoder cpu_transcoder, bool state)
 {
 	bool cur_state;
-	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
-								      pipe);
 	enum intel_display_power_domain power_domain;
 	intel_wakeref_t wakeref;
 
@@ -1264,8 +1277,9 @@ void assert_pipe(struct drm_i915_private *dev_priv,
 	}
 
 	I915_STATE_WARN(cur_state != state,
-	     "pipe %c assertion failure (expected %s, current %s)\n",
-			pipe_name(pipe), onoff(state), onoff(cur_state));
+			"transcoder %s assertion failure (expected %s, current %s)\n",
+			transcoder_name(cpu_transcoder),
+			onoff(state), onoff(cur_state));
 }
 
 static void assert_plane(struct intel_plane *plane, bool state)
@@ -1392,7 +1406,7 @@ static void vlv_enable_pll(struct intel_crtc *crtc,
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
-	assert_pipe_disabled(dev_priv, pipe);
+	assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder);
 
 	/* PLL is protected by panel, make sure we can write it */
 	assert_panel_unlocked(dev_priv, pipe);
@@ -1441,7 +1455,7 @@ static void chv_enable_pll(struct intel_crtc *crtc,
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
-	assert_pipe_disabled(dev_priv, pipe);
+	assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder);
 
 	/* PLL is protected by panel, make sure we can write it */
 	assert_panel_unlocked(dev_priv, pipe);
@@ -1488,7 +1502,7 @@ static void i9xx_enable_pll(struct intel_crtc *crtc,
 	u32 dpll = crtc_state->dpll_hw_state.dpll;
 	int i;
 
-	assert_pipe_disabled(dev_priv, crtc->pipe);
+	assert_pipe_disabled(dev_priv, crtc_state->cpu_transcoder);
 
 	/* PLL is protected by panel, make sure we can write it */
 	if (i9xx_has_pps(dev_priv))
@@ -1528,7 +1542,7 @@ static void i9xx_enable_pll(struct intel_crtc *crtc,
 
 static void i9xx_disable_pll(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
@@ -1537,7 +1551,7 @@ static void i9xx_disable_pll(const struct intel_crtc_state *crtc_state)
 		return;
 
 	/* Make sure the pipe isn't still relying on us */
-	assert_pipe_disabled(dev_priv, pipe);
+	assert_pipe_disabled(dev_priv, crtc_state->cpu_transcoder);
 
 	I915_WRITE(DPLL(pipe), DPLL_VGA_MODE_DIS);
 	POSTING_READ(DPLL(pipe));
@@ -1548,7 +1562,7 @@ static void vlv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 	u32 val;
 
 	/* Make sure the pipe isn't still relying on us */
-	assert_pipe_disabled(dev_priv, pipe);
+	assert_pipe_disabled(dev_priv, (enum transcoder)pipe);
 
 	val = DPLL_INTEGRATED_REF_CLK_VLV |
 		DPLL_REF_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS;
@@ -1565,7 +1579,7 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 	u32 val;
 
 	/* Make sure the pipe isn't still relying on us */
-	assert_pipe_disabled(dev_priv, pipe);
+	assert_pipe_disabled(dev_priv, (enum transcoder)pipe);
 
 	val = DPLL_SSC_REF_CLK_CHV |
 		DPLL_REF_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS;
@@ -1612,14 +1626,14 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
 
 	if (intel_de_wait_for_register(dev_priv, dpll_reg,
 				       port_mask, expected_mask, 1000))
-		WARN(1, "timed out waiting for port %c ready: got 0x%x, expected 0x%x\n",
-		     port_name(dport->base.port),
+		WARN(1, "timed out waiting for [ENCODER:%d:%s] port ready: got 0x%x, expected 0x%x\n",
+		     dport->base.base.base.id, dport->base.base.name,
 		     I915_READ(dpll_reg) & port_mask, expected_mask);
 }
 
-static void ironlake_enable_pch_transcoder(const struct intel_crtc_state *crtc_state)
+static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
@@ -1633,11 +1647,16 @@ static void ironlake_enable_pch_transcoder(const struct intel_crtc_state *crtc_s
 	assert_fdi_rx_enabled(dev_priv, pipe);
 
 	if (HAS_PCH_CPT(dev_priv)) {
-		/* Workaround: Set the timing override bit before enabling the
-		 * pch transcoder. */
 		reg = TRANS_CHICKEN2(pipe);
 		val = I915_READ(reg);
+		/*
+		 * Workaround: Set the timing override bit
+		 * before enabling the pch transcoder.
+		 */
 		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
+		/* Configure frame start delay to match the CPU */
+		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
+		val |= TRANS_CHICKEN2_FRAME_START_DELAY(0);
 		I915_WRITE(reg, val);
 	}
 
@@ -1646,6 +1665,10 @@ static void ironlake_enable_pch_transcoder(const struct intel_crtc_state *crtc_s
 	pipeconf_val = I915_READ(PIPECONF(pipe));
 
 	if (HAS_PCH_IBX(dev_priv)) {
+		/* Configure frame start delay to match the CPU */
+		val &= ~TRANS_FRAME_START_DELAY_MASK;
+		val |= TRANS_FRAME_START_DELAY(0);
+
 		/*
 		 * Make the BPC in transcoder be consistent with
 		 * that in pipeconf reg. For HDMI we must use 8bpc
@@ -1683,9 +1706,12 @@ static void lpt_enable_pch_transcoder(struct drm_i915_private *dev_priv,
 	assert_fdi_tx_enabled(dev_priv, (enum pipe) cpu_transcoder);
 	assert_fdi_rx_enabled(dev_priv, PIPE_A);
 
-	/* Workaround: set timing override bit. */
 	val = I915_READ(TRANS_CHICKEN2(PIPE_A));
+	/* Workaround: set timing override bit. */
 	val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
+	/* Configure frame start delay to match the CPU */
+	val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
+	val |= TRANS_CHICKEN2_FRAME_START_DELAY(0);
 	I915_WRITE(TRANS_CHICKEN2(PIPE_A), val);
 
 	val = TRANS_ENABLE;
@@ -1703,8 +1729,8 @@ static void lpt_enable_pch_transcoder(struct drm_i915_private *dev_priv,
 		DRM_ERROR("Failed to enable PCH transcoder\n");
 }
 
-static void ironlake_disable_pch_transcoder(struct drm_i915_private *dev_priv,
-					    enum pipe pipe)
+static void ilk_disable_pch_transcoder(struct drm_i915_private *dev_priv,
+				       enum pipe pipe)
 {
 	i915_reg_t reg;
 	u32 val;
@@ -1763,7 +1789,7 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc)
 
 static u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
 	/*
 	 * On i965gm the hardware frame counter reads
@@ -1783,16 +1809,25 @@ static u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state
 
 static void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 
+	assert_vblank_disabled(&crtc->base);
 	drm_crtc_set_max_vblank_count(&crtc->base,
 				      intel_crtc_max_vblank_count(crtc_state));
 	drm_crtc_vblank_on(&crtc->base);
 }
 
+void intel_crtc_vblank_off(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+
+	drm_crtc_vblank_off(&crtc->base);
+	assert_vblank_disabled(&crtc->base);
+}
+
 static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder;
 	enum pipe pipe = crtc->pipe;
@@ -1848,9 +1883,9 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state)
 		intel_wait_for_pipe_scanline_moving(crtc);
 }
 
-static void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state)
+void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder;
 	enum pipe pipe = crtc->pipe;
@@ -1893,6 +1928,74 @@ static unsigned int intel_tile_size(const struct drm_i915_private *dev_priv)
 	return IS_GEN(dev_priv, 2) ? 2048 : 4096;
 }
 
+static bool is_ccs_plane(const struct drm_framebuffer *fb, int plane)
+{
+	if (!is_ccs_modifier(fb->modifier))
+		return false;
+
+	return plane >= fb->format->num_planes / 2;
+}
+
+static bool is_gen12_ccs_modifier(u64 modifier)
+{
+	return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
+	       modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS;
+
+}
+
+static bool is_gen12_ccs_plane(const struct drm_framebuffer *fb, int plane)
+{
+	return is_gen12_ccs_modifier(fb->modifier) && is_ccs_plane(fb, plane);
+}
+
+static bool is_aux_plane(const struct drm_framebuffer *fb, int plane)
+{
+	if (is_ccs_modifier(fb->modifier))
+		return is_ccs_plane(fb, plane);
+
+	return plane == 1;
+}
+
+static int main_to_ccs_plane(const struct drm_framebuffer *fb, int main_plane)
+{
+	WARN_ON(!is_ccs_modifier(fb->modifier) ||
+		(main_plane && main_plane >= fb->format->num_planes / 2));
+
+	return fb->format->num_planes / 2 + main_plane;
+}
+
+static int ccs_to_main_plane(const struct drm_framebuffer *fb, int ccs_plane)
+{
+	WARN_ON(!is_ccs_modifier(fb->modifier) ||
+		ccs_plane < fb->format->num_planes / 2);
+
+	return ccs_plane - fb->format->num_planes / 2;
+}
+
+/* Return either the main plane's CCS or - if not a CCS FB - UV plane */
+int intel_main_to_aux_plane(const struct drm_framebuffer *fb, int main_plane)
+{
+	if (is_ccs_modifier(fb->modifier))
+		return main_to_ccs_plane(fb, main_plane);
+
+	return 1;
+}
+
+bool
+intel_format_info_is_yuv_semiplanar(const struct drm_format_info *info,
+				    uint64_t modifier)
+{
+	return info->is_yuv &&
+	       info->num_planes == (is_ccs_modifier(modifier) ? 4 : 2);
+}
+
+static bool is_semiplanar_uv_plane(const struct drm_framebuffer *fb,
+				   int color_plane)
+{
+	return intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) &&
+	       color_plane == 1;
+}
+
 static unsigned int
 intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane)
 {
@@ -1908,16 +2011,21 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane)
 		else
 			return 512;
 	case I915_FORMAT_MOD_Y_TILED_CCS:
-		if (color_plane == 1)
+		if (is_ccs_plane(fb, color_plane))
 			return 128;
 		/* fall through */
+	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
+		if (is_ccs_plane(fb, color_plane))
+			return 64;
+		/* fall through */
 	case I915_FORMAT_MOD_Y_TILED:
 		if (IS_GEN(dev_priv, 2) || HAS_128_BYTE_Y_TILING(dev_priv))
 			return 128;
 		else
 			return 512;
 	case I915_FORMAT_MOD_Yf_TILED_CCS:
-		if (color_plane == 1)
+		if (is_ccs_plane(fb, color_plane))
 			return 128;
 		/* fall through */
 	case I915_FORMAT_MOD_Yf_TILED:
@@ -1944,6 +2052,9 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane)
 static unsigned int
 intel_tile_height(const struct drm_framebuffer *fb, int color_plane)
 {
+	if (is_gen12_ccs_plane(fb, color_plane))
+		return 1;
+
 	return intel_tile_size(to_i915(fb->dev)) /
 		intel_tile_width_bytes(fb, color_plane);
 }
@@ -1957,7 +2068,17 @@ static void intel_tile_dims(const struct drm_framebuffer *fb, int color_plane,
 	unsigned int cpp = fb->format->cpp[color_plane];
 
 	*tile_width = tile_width_bytes / cpp;
-	*tile_height = intel_tile_size(to_i915(fb->dev)) / tile_width_bytes;
+	*tile_height = intel_tile_height(fb, color_plane);
+}
+
+static unsigned int intel_tile_row_size(const struct drm_framebuffer *fb,
+					int color_plane)
+{
+	unsigned int tile_width, tile_height;
+
+	intel_tile_dims(fb, color_plane, &tile_width, &tile_height);
+
+	return fb->pitches[color_plane] * tile_height;
 }
 
 unsigned int
@@ -2034,7 +2155,8 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb,
 	struct drm_i915_private *dev_priv = to_i915(fb->dev);
 
 	/* AUX_DIST needs only 4K alignment */
-	if (color_plane == 1)
+	if ((INTEL_GEN(dev_priv) < 12 && is_aux_plane(fb, color_plane)) ||
+	    is_ccs_plane(fb, color_plane))
 		return 4096;
 
 	switch (fb->modifier) {
@@ -2044,9 +2166,19 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb,
 		if (INTEL_GEN(dev_priv) >= 9)
 			return 256 * 1024;
 		return 0;
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
+		if (is_semiplanar_uv_plane(fb, color_plane))
+			return intel_tile_row_size(fb, color_plane);
+		/* Fall-through */
+	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
+		return 16 * 1024;
 	case I915_FORMAT_MOD_Y_TILED_CCS:
 	case I915_FORMAT_MOD_Yf_TILED_CCS:
 	case I915_FORMAT_MOD_Y_TILED:
+		if (INTEL_GEN(dev_priv) >= 12 &&
+		    is_semiplanar_uv_plane(fb, color_plane))
+			return intel_tile_row_size(fb, color_plane);
+		/* Fall-through */
 	case I915_FORMAT_MOD_Yf_TILED:
 		return 1 * 1024 * 1024;
 	default:
@@ -2057,7 +2189,7 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb,
 
 static bool intel_plane_uses_fence(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 
 	return INTEL_GEN(dev_priv) < 4 ||
@@ -2079,9 +2211,12 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	unsigned int pinctl;
 	u32 alignment;
 
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	if (WARN_ON(!i915_gem_object_is_framebuffer(obj)))
+		return ERR_PTR(-EINVAL);
 
 	alignment = intel_surf_alignment(fb, 0);
+	if (WARN_ON(alignment && !is_power_of_2(alignment)))
+		return ERR_PTR(-EINVAL);
 
 	/* Note that the w/a also requires 64 PTE of padding following the
 	 * bo. We currently fill all unused PTE with the shadow page and so
@@ -2099,19 +2234,18 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	 * pin/unpin/fence and not more.
 	 */
 	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
-	i915_gem_object_lock(obj);
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
-	pinctl = 0;
-
-	/* Valleyview is definitely limited to scanning out the first
+	/*
+	 * Valleyview is definitely limited to scanning out the first
 	 * 512MiB. Lets presume this behaviour was inherited from the
 	 * g4x display engine and that all earlier gen are similarly
 	 * limited. Testing suggests that it is a little more
 	 * complicated than this. For example, Cherryview appears quite
 	 * happy to scanout from anywhere within its global aperture.
 	 */
+	pinctl = 0;
 	if (HAS_GMCH(dev_priv))
 		pinctl |= PIN_MAPPABLE;
 
@@ -2123,7 +2257,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	if (uses_fence && i915_vma_is_map_and_fenceable(vma)) {
 		int ret;
 
-		/* Install a fence for tiled scan-out. Pre-i965 always needs a
+		/*
+		 * Install a fence for tiled scan-out. Pre-i965 always needs a
 		 * fence, whereas 965+ only requires a fence if using
 		 * framebuffer compression.  For simplicity, we always, when
 		 * possible, install a fence as the cost is not that onerous.
@@ -2153,16 +2288,12 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	i915_vma_get(vma);
 err:
 	atomic_dec(&dev_priv->gpu_error.pending_fb_pin);
-
-	i915_gem_object_unlock(obj);
 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 	return vma;
 }
 
 void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
 	i915_gem_object_lock(vma->obj);
 	if (flags & PLANE_HAS_FENCE)
 		i915_vma_unpin_fence(vma);
@@ -2191,7 +2322,7 @@ u32 intel_fb_xy_to_linear(int x, int y,
 			  const struct intel_plane_state *state,
 			  int color_plane)
 {
-	const struct drm_framebuffer *fb = state->base.fb;
+	const struct drm_framebuffer *fb = state->hw.fb;
 	unsigned int cpp = fb->format->cpp[color_plane];
 	unsigned int pitch = state->color_plane[color_plane].stride;
 
@@ -2239,9 +2370,10 @@ static u32 intel_adjust_tile_offset(int *x, int *y,
 	return new_offset;
 }
 
-static bool is_surface_linear(u64 modifier, int color_plane)
+static bool is_surface_linear(const struct drm_framebuffer *fb, int color_plane)
 {
-	return modifier == DRM_FORMAT_MOD_LINEAR;
+	return fb->modifier == DRM_FORMAT_MOD_LINEAR ||
+	       is_gen12_ccs_plane(fb, color_plane);
 }
 
 static u32 intel_adjust_aligned_offset(int *x, int *y,
@@ -2256,7 +2388,7 @@ static u32 intel_adjust_aligned_offset(int *x, int *y,
 
 	WARN_ON(new_offset > old_offset);
 
-	if (!is_surface_linear(fb->modifier, color_plane)) {
+	if (!is_surface_linear(fb, color_plane)) {
 		unsigned int tile_size, tile_width, tile_height;
 		unsigned int pitch_tiles;
 
@@ -2292,8 +2424,8 @@ static u32 intel_plane_adjust_aligned_offset(int *x, int *y,
 					     int color_plane,
 					     u32 old_offset, u32 new_offset)
 {
-	return intel_adjust_aligned_offset(x, y, state->base.fb, color_plane,
-					   state->base.rotation,
+	return intel_adjust_aligned_offset(x, y, state->hw.fb, color_plane,
+					   state->hw.rotation,
 					   state->color_plane[color_plane].stride,
 					   old_offset, new_offset);
 }
@@ -2323,10 +2455,7 @@ static u32 intel_compute_aligned_offset(struct drm_i915_private *dev_priv,
 	unsigned int cpp = fb->format->cpp[color_plane];
 	u32 offset, offset_aligned;
 
-	if (alignment)
-		alignment--;
-
-	if (!is_surface_linear(fb->modifier, color_plane)) {
+	if (!is_surface_linear(fb, color_plane)) {
 		unsigned int tile_size, tile_width, tile_height;
 		unsigned int tile_rows, tiles, pitch_tiles;
 
@@ -2347,17 +2476,24 @@ static u32 intel_compute_aligned_offset(struct drm_i915_private *dev_priv,
 		*x %= tile_width;
 
 		offset = (tile_rows * pitch_tiles + tiles) * tile_size;
-		offset_aligned = offset & ~alignment;
+
+		offset_aligned = offset;
+		if (alignment)
+			offset_aligned = rounddown(offset_aligned, alignment);
 
 		intel_adjust_tile_offset(x, y, tile_width, tile_height,
 					 tile_size, pitch_tiles,
 					 offset, offset_aligned);
 	} else {
 		offset = *y * pitch + *x * cpp;
-		offset_aligned = offset & ~alignment;
-
-		*y = (offset & alignment) / pitch;
-		*x = ((offset & alignment) - *y * pitch) / cpp;
+		offset_aligned = offset;
+		if (alignment) {
+			offset_aligned = rounddown(offset_aligned, alignment);
+			*y = (offset % alignment) / pitch;
+			*x = ((offset % alignment) - *y * pitch) / cpp;
+		} else {
+			*y = *x = 0;
+		}
 	}
 
 	return offset_aligned;
@@ -2367,10 +2503,10 @@ static u32 intel_plane_compute_aligned_offset(int *x, int *y,
 					      const struct intel_plane_state *state,
 					      int color_plane)
 {
-	struct intel_plane *intel_plane = to_intel_plane(state->base.plane);
+	struct intel_plane *intel_plane = to_intel_plane(state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev);
-	const struct drm_framebuffer *fb = state->base.fb;
-	unsigned int rotation = state->base.rotation;
+	const struct drm_framebuffer *fb = state->hw.fb;
+	unsigned int rotation = state->hw.rotation;
 	int pitch = state->color_plane[color_plane].stride;
 	u32 alignment;
 
@@ -2390,9 +2526,17 @@ static int intel_fb_offset_to_xy(int *x, int *y,
 {
 	struct drm_i915_private *dev_priv = to_i915(fb->dev);
 	unsigned int height;
+	u32 alignment;
+
+	if (INTEL_GEN(dev_priv) >= 12 &&
+	    is_semiplanar_uv_plane(fb, color_plane))
+		alignment = intel_tile_row_size(fb, color_plane);
+	else if (fb->modifier != DRM_FORMAT_MOD_LINEAR)
+		alignment = intel_tile_size(dev_priv);
+	else
+		alignment = 0;
 
-	if (fb->modifier != DRM_FORMAT_MOD_LINEAR &&
-	    fb->offsets[color_plane] % intel_tile_size(dev_priv)) {
+	if (alignment != 0 && fb->offsets[color_plane] % alignment) {
 		DRM_DEBUG_KMS("Misaligned offset 0x%08x for color plane %d\n",
 			      fb->offsets[color_plane], color_plane);
 		return -EINVAL;
@@ -2428,6 +2572,8 @@ static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
 		return I915_TILING_X;
 	case I915_FORMAT_MOD_Y_TILED:
 	case I915_FORMAT_MOD_Y_TILED_CCS:
+	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
 		return I915_TILING_Y;
 	default:
 		return I915_TILING_NONE;
@@ -2448,7 +2594,7 @@ static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
  * us a ratio of one byte in the CCS for each 8x16 pixels in the
  * main surface.
  */
-static const struct drm_format_info ccs_formats[] = {
+static const struct drm_format_info skl_ccs_formats[] = {
 	{ .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2,
 	  .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
 	{ .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2,
@@ -2459,6 +2605,52 @@ static const struct drm_format_info ccs_formats[] = {
 	  .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, .has_alpha = true, },
 };
 
+/*
+ * Gen-12 compression uses 4 bits of CCS data for each cache line pair in the
+ * main surface. And each 64B CCS cache line represents an area of 4x1 Y-tiles
+ * in the main surface. With 4 byte pixels and each Y-tile having dimensions of
+ * 32x32 pixels, the ratio turns out to 1B in the CCS for every 2x32 pixels in
+ * the main surface.
+ */
+static const struct drm_format_info gen12_ccs_formats[] = {
+	{ .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2,
+	  .char_per_block = { 4, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 1, .vsub = 1, },
+	{ .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2,
+	  .char_per_block = { 4, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 1, .vsub = 1, },
+	{ .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2,
+	  .char_per_block = { 4, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 1, .vsub = 1, .has_alpha = true },
+	{ .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2,
+	  .char_per_block = { 4, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 1, .vsub = 1, .has_alpha = true },
+	{ .format = DRM_FORMAT_YUYV, .num_planes = 2,
+	  .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 2, .vsub = 1, .is_yuv = true },
+	{ .format = DRM_FORMAT_YVYU, .num_planes = 2,
+	  .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 2, .vsub = 1, .is_yuv = true },
+	{ .format = DRM_FORMAT_UYVY, .num_planes = 2,
+	  .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 2, .vsub = 1, .is_yuv = true },
+	{ .format = DRM_FORMAT_VYUY, .num_planes = 2,
+	  .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 2, .vsub = 1, .is_yuv = true },
+	{ .format = DRM_FORMAT_NV12, .num_planes = 4,
+	  .char_per_block = { 1, 2, 1, 1 }, .block_w = { 1, 1, 4, 4 }, .block_h = { 1, 1, 1, 1 },
+	  .hsub = 2, .vsub = 2, .is_yuv = true },
+	{ .format = DRM_FORMAT_P010, .num_planes = 4,
+	  .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 },
+	  .hsub = 2, .vsub = 2, .is_yuv = true },
+	{ .format = DRM_FORMAT_P012, .num_planes = 4,
+	  .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 },
+	  .hsub = 2, .vsub = 2, .is_yuv = true },
+	{ .format = DRM_FORMAT_P016, .num_planes = 4,
+	  .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 },
+	  .hsub = 2, .vsub = 2, .is_yuv = true },
+};
+
 static const struct drm_format_info *
 lookup_format_info(const struct drm_format_info formats[],
 		   int num_formats, u32 format)
@@ -2479,8 +2671,13 @@ intel_get_format_info(const struct drm_mode_fb_cmd2 *cmd)
 	switch (cmd->modifier[0]) {
 	case I915_FORMAT_MOD_Y_TILED_CCS:
 	case I915_FORMAT_MOD_Yf_TILED_CCS:
-		return lookup_format_info(ccs_formats,
-					  ARRAY_SIZE(ccs_formats),
+		return lookup_format_info(skl_ccs_formats,
+					  ARRAY_SIZE(skl_ccs_formats),
+					  cmd->pixel_format);
+	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
+		return lookup_format_info(gen12_ccs_formats,
+					  ARRAY_SIZE(gen12_ccs_formats),
 					  cmd->pixel_format);
 	default:
 		return NULL;
@@ -2489,10 +2686,18 @@ intel_get_format_info(const struct drm_mode_fb_cmd2 *cmd)
 
 bool is_ccs_modifier(u64 modifier)
 {
-	return modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
+	return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
+	       modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS ||
+	       modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
 	       modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
 }
 
+static int gen12_ccs_aux_stride(struct drm_framebuffer *fb, int ccs_plane)
+{
+	return DIV_ROUND_UP(fb->pitches[ccs_to_main_plane(fb, ccs_plane)],
+			    512) * 64;
+}
+
 u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv,
 			      u32 pixel_format, u64 modifier)
 {
@@ -2537,8 +2742,9 @@ static u32
 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane)
 {
 	struct drm_i915_private *dev_priv = to_i915(fb->dev);
+	u32 tile_width;
 
-	if (fb->modifier == DRM_FORMAT_MOD_LINEAR) {
+	if (is_surface_linear(fb, color_plane)) {
 		u32 max_stride = intel_plane_fb_max_stride(dev_priv,
 							   fb->format->format,
 							   fb->modifier);
@@ -2547,20 +2753,41 @@ intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane)
 		 * To make remapping with linear generally feasible
 		 * we need the stride to be page aligned.
 		 */
-		if (fb->pitches[color_plane] > max_stride)
+		if (fb->pitches[color_plane] > max_stride &&
+		    !is_ccs_modifier(fb->modifier))
 			return intel_tile_size(dev_priv);
 		else
 			return 64;
-	} else {
-		return intel_tile_width_bytes(fb, color_plane);
 	}
+
+	tile_width = intel_tile_width_bytes(fb, color_plane);
+	if (is_ccs_modifier(fb->modifier)) {
+		/*
+		 * Display WA #0531: skl,bxt,kbl,glk
+		 *
+		 * Render decompression and plane width > 3840
+		 * combined with horizontal panning requires the
+		 * plane stride to be a multiple of 4. We'll just
+		 * require the entire fb to accommodate that to avoid
+		 * potential runtime errors at plane configuration time.
+		 */
+		if (IS_GEN(dev_priv, 9) && color_plane == 0 && fb->width > 3840)
+			tile_width *= 4;
+		/*
+		 * The main surface pitch must be padded to a multiple of four
+		 * tile widths.
+		 */
+		else if (INTEL_GEN(dev_priv) >= 12)
+			tile_width *= 4;
+	}
+	return tile_width;
 }
 
 bool intel_plane_can_remap(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	int i;
 
 	/* We don't want to deal with remapping with cursors */
@@ -2598,16 +2825,16 @@ bool intel_plane_can_remap(const struct intel_plane_state *plane_state)
 
 static bool intel_plane_needs_remap(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
 	u32 stride, max_stride;
 
 	/*
 	 * No remapping for invisible planes since we don't have
 	 * an actual source viewport to remap.
 	 */
-	if (!plane_state->base.visible)
+	if (!plane_state->uapi.visible)
 		return false;
 
 	if (!intel_plane_can_remap(plane_state))
@@ -2624,12 +2851,171 @@ static bool intel_plane_needs_remap(const struct intel_plane_state *plane_state)
 	return stride > max_stride;
 }
 
+static void
+intel_fb_plane_get_subsampling(int *hsub, int *vsub,
+			       const struct drm_framebuffer *fb,
+			       int color_plane)
+{
+	int main_plane;
+
+	if (color_plane == 0) {
+		*hsub = 1;
+		*vsub = 1;
+
+		return;
+	}
+
+	/*
+	 * TODO: Deduct the subsampling from the char block for all CCS
+	 * formats and planes.
+	 */
+	if (!is_gen12_ccs_plane(fb, color_plane)) {
+		*hsub = fb->format->hsub;
+		*vsub = fb->format->vsub;
+
+		return;
+	}
+
+	main_plane = ccs_to_main_plane(fb, color_plane);
+	*hsub = drm_format_info_block_width(fb->format, color_plane) /
+		drm_format_info_block_width(fb->format, main_plane);
+
+	/*
+	 * The min stride check in the core framebuffer_check() function
+	 * assumes that format->hsub applies to every plane except for the
+	 * first plane. That's incorrect for the CCS AUX plane of the first
+	 * plane, but for the above check to pass we must define the block
+	 * width with that subsampling applied to it. Adjust the width here
+	 * accordingly, so we can calculate the actual subsampling factor.
+	 */
+	if (main_plane == 0)
+		*hsub *= fb->format->hsub;
+
+	*vsub = 32;
+}
+static int
+intel_fb_check_ccs_xy(struct drm_framebuffer *fb, int ccs_plane, int x, int y)
+{
+	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+	int main_plane;
+	int hsub, vsub;
+	int tile_width, tile_height;
+	int ccs_x, ccs_y;
+	int main_x, main_y;
+
+	if (!is_ccs_plane(fb, ccs_plane))
+		return 0;
+
+	intel_tile_dims(fb, ccs_plane, &tile_width, &tile_height);
+	intel_fb_plane_get_subsampling(&hsub, &vsub, fb, ccs_plane);
+
+	tile_width *= hsub;
+	tile_height *= vsub;
+
+	ccs_x = (x * hsub) % tile_width;
+	ccs_y = (y * vsub) % tile_height;
+
+	main_plane = ccs_to_main_plane(fb, ccs_plane);
+	main_x = intel_fb->normal[main_plane].x % tile_width;
+	main_y = intel_fb->normal[main_plane].y % tile_height;
+
+	/*
+	 * CCS doesn't have its own x/y offset register, so the intra CCS tile
+	 * x/y offsets must match between CCS and the main surface.
+	 */
+	if (main_x != ccs_x || main_y != ccs_y) {
+		DRM_DEBUG_KMS("Bad CCS x/y (main %d,%d ccs %d,%d) full (main %d,%d ccs %d,%d)\n",
+			      main_x, main_y,
+			      ccs_x, ccs_y,
+			      intel_fb->normal[main_plane].x,
+			      intel_fb->normal[main_plane].y,
+			      x, y);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void
+intel_fb_plane_dims(int *w, int *h, struct drm_framebuffer *fb, int color_plane)
+{
+	int main_plane = is_ccs_plane(fb, color_plane) ?
+			 ccs_to_main_plane(fb, color_plane) : 0;
+	int main_hsub, main_vsub;
+	int hsub, vsub;
+
+	intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, fb, main_plane);
+	intel_fb_plane_get_subsampling(&hsub, &vsub, fb, color_plane);
+	*w = fb->width / main_hsub / hsub;
+	*h = fb->height / main_vsub / vsub;
+}
+
+/*
+ * Setup the rotated view for an FB plane and return the size the GTT mapping
+ * requires for this view.
+ */
+static u32
+setup_fb_rotation(int plane, const struct intel_remapped_plane_info *plane_info,
+		  u32 gtt_offset_rotated, int x, int y,
+		  unsigned int width, unsigned int height,
+		  unsigned int tile_size,
+		  unsigned int tile_width, unsigned int tile_height,
+		  struct drm_framebuffer *fb)
+{
+	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+	struct intel_rotation_info *rot_info = &intel_fb->rot_info;
+	unsigned int pitch_tiles;
+	struct drm_rect r;
+
+	/* Y or Yf modifiers required for 90/270 rotation */
+	if (fb->modifier != I915_FORMAT_MOD_Y_TILED &&
+	    fb->modifier != I915_FORMAT_MOD_Yf_TILED)
+		return 0;
+
+	if (WARN_ON(plane >= ARRAY_SIZE(rot_info->plane)))
+		return 0;
+
+	rot_info->plane[plane] = *plane_info;
+
+	intel_fb->rotated[plane].pitch = plane_info->height * tile_height;
+
+	/* rotate the x/y offsets to match the GTT view */
+	drm_rect_init(&r, x, y, width, height);
+	drm_rect_rotate(&r,
+			plane_info->width * tile_width,
+			plane_info->height * tile_height,
+			DRM_MODE_ROTATE_270);
+	x = r.x1;
+	y = r.y1;
+
+	/* rotate the tile dimensions to match the GTT view */
+	pitch_tiles = intel_fb->rotated[plane].pitch / tile_height;
+	swap(tile_width, tile_height);
+
+	/*
+	 * We only keep the x/y offsets, so push all of the
+	 * gtt offset into the x/y offsets.
+	 */
+	intel_adjust_tile_offset(&x, &y,
+				 tile_width, tile_height,
+				 tile_size, pitch_tiles,
+				 gtt_offset_rotated * tile_size, 0);
+
+	/*
+	 * First pixel of the framebuffer from
+	 * the start of the rotated gtt mapping.
+	 */
+	intel_fb->rotated[plane].x = x;
+	intel_fb->rotated[plane].y = y;
+
+	return plane_info->width * plane_info->height;
+}
+
 static int
 intel_fill_fb_info(struct drm_i915_private *dev_priv,
 		   struct drm_framebuffer *fb)
 {
 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
-	struct intel_rotation_info *rot_info = &intel_fb->rot_info;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	u32 gtt_offset_rotated = 0;
 	unsigned int max_size = 0;
@@ -2644,8 +3030,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 		int ret;
 
 		cpp = fb->format->cpp[i];
-		width = drm_framebuffer_plane_width(fb->width, fb, i);
-		height = drm_framebuffer_plane_height(fb->height, fb, i);
+		intel_fb_plane_dims(&width, &height, fb, i);
 
 		ret = intel_fb_offset_to_xy(&x, &y, fb, i);
 		if (ret) {
@@ -2654,36 +3039,9 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 			return ret;
 		}
 
-		if (is_ccs_modifier(fb->modifier) && i == 1) {
-			int hsub = fb->format->hsub;
-			int vsub = fb->format->vsub;
-			int tile_width, tile_height;
-			int main_x, main_y;
-			int ccs_x, ccs_y;
-
-			intel_tile_dims(fb, i, &tile_width, &tile_height);
-			tile_width *= hsub;
-			tile_height *= vsub;
-
-			ccs_x = (x * hsub) % tile_width;
-			ccs_y = (y * vsub) % tile_height;
-			main_x = intel_fb->normal[0].x % tile_width;
-			main_y = intel_fb->normal[0].y % tile_height;
-
-			/*
-			 * CCS doesn't have its own x/y offset register, so the intra CCS tile
-			 * x/y offsets must match between CCS and the main surface.
-			 */
-			if (main_x != ccs_x || main_y != ccs_y) {
-				DRM_DEBUG_KMS("Bad CCS x/y (main %d,%d ccs %d,%d) full (main %d,%d ccs %d,%d)\n",
-					      main_x, main_y,
-					      ccs_x, ccs_y,
-					      intel_fb->normal[0].x,
-					      intel_fb->normal[0].y,
-					      x, y);
-				return -EINVAL;
-			}
-		}
+		ret = intel_fb_check_ccs_xy(fb, i, x, y);
+		if (ret)
+			return ret;
 
 		/*
 		 * The fence (if used) is aligned to the start of the object
@@ -2714,23 +3072,21 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 						      tile_size);
 		offset /= tile_size;
 
-		if (!is_surface_linear(fb->modifier, i)) {
+		if (!is_surface_linear(fb, i)) {
+			struct intel_remapped_plane_info plane_info;
 			unsigned int tile_width, tile_height;
-			unsigned int pitch_tiles;
-			struct drm_rect r;
 
 			intel_tile_dims(fb, i, &tile_width, &tile_height);
 
-			rot_info->plane[i].offset = offset;
-			rot_info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], tile_width * cpp);
-			rot_info->plane[i].width = DIV_ROUND_UP(x + width, tile_width);
-			rot_info->plane[i].height = DIV_ROUND_UP(y + height, tile_height);
-
-			intel_fb->rotated[i].pitch =
-				rot_info->plane[i].height * tile_height;
+			plane_info.offset = offset;
+			plane_info.stride = DIV_ROUND_UP(fb->pitches[i],
+							 tile_width * cpp);
+			plane_info.width = DIV_ROUND_UP(x + width, tile_width);
+			plane_info.height = DIV_ROUND_UP(y + height,
+							 tile_height);
 
 			/* how many tiles does this plane need */
-			size = rot_info->plane[i].stride * rot_info->plane[i].height;
+			size = plane_info.stride * plane_info.height;
 			/*
 			 * If the plane isn't horizontally tile aligned,
 			 * we need one more tile.
@@ -2738,39 +3094,13 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 			if (x != 0)
 				size++;
 
-			/* rotate the x/y offsets to match the GTT view */
-			r.x1 = x;
-			r.y1 = y;
-			r.x2 = x + width;
-			r.y2 = y + height;
-			drm_rect_rotate(&r,
-					rot_info->plane[i].width * tile_width,
-					rot_info->plane[i].height * tile_height,
-					DRM_MODE_ROTATE_270);
-			x = r.x1;
-			y = r.y1;
-
-			/* rotate the tile dimensions to match the GTT view */
-			pitch_tiles = intel_fb->rotated[i].pitch / tile_height;
-			swap(tile_width, tile_height);
-
-			/*
-			 * We only keep the x/y offsets, so push all of the
-			 * gtt offset into the x/y offsets.
-			 */
-			intel_adjust_tile_offset(&x, &y,
-						 tile_width, tile_height,
-						 tile_size, pitch_tiles,
-						 gtt_offset_rotated * tile_size, 0);
-
-			gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height;
-
-			/*
-			 * First pixel of the framebuffer from
-			 * the start of the rotated gtt mapping.
-			 */
-			intel_fb->rotated[i].x = x;
-			intel_fb->rotated[i].y = y;
+			gtt_offset_rotated +=
+				setup_fb_rotation(i, &plane_info,
+						  gtt_offset_rotated,
+						  x, y, width, height,
+						  tile_size,
+						  tile_width, tile_height,
+						  fb);
 		} else {
 			size = DIV_ROUND_UP((y + height) * fb->pitches[i] +
 					    x * cpp, tile_size);
@@ -2793,11 +3123,11 @@ static void
 intel_plane_remap_gtt(struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(plane_state->base.plane->dev);
-	struct drm_framebuffer *fb = plane_state->base.fb;
+		to_i915(plane_state->uapi.plane->dev);
+	struct drm_framebuffer *fb = plane_state->hw.fb;
 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
 	struct intel_rotation_info *info = &plane_state->view.rotated;
-	unsigned int rotation = plane_state->base.rotation;
+	unsigned int rotation = plane_state->hw.rotation;
 	int i, num_planes = fb->format->num_planes;
 	unsigned int tile_size = intel_tile_size(dev_priv);
 	unsigned int src_x, src_y;
@@ -2808,20 +3138,20 @@ intel_plane_remap_gtt(struct intel_plane_state *plane_state)
 	plane_state->view.type = drm_rotation_90_or_270(rotation) ?
 		I915_GGTT_VIEW_ROTATED : I915_GGTT_VIEW_REMAPPED;
 
-	src_x = plane_state->base.src.x1 >> 16;
-	src_y = plane_state->base.src.y1 >> 16;
-	src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	src_h = drm_rect_height(&plane_state->base.src) >> 16;
+	src_x = plane_state->uapi.src.x1 >> 16;
+	src_y = plane_state->uapi.src.y1 >> 16;
+	src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
 
 	WARN_ON(is_ccs_modifier(fb->modifier));
 
 	/* Make src coordinates relative to the viewport */
-	drm_rect_translate(&plane_state->base.src,
+	drm_rect_translate(&plane_state->uapi.src,
 			   -(src_x << 16), -(src_y << 16));
 
 	/* Rotate src coordinates to match rotated GTT view */
 	if (drm_rotation_90_or_270(rotation))
-		drm_rect_rotate(&plane_state->base.src,
+		drm_rect_rotate(&plane_state->uapi.src,
 				src_w << 16, src_h << 16,
 				DRM_MODE_ROTATE_270);
 
@@ -2854,6 +3184,7 @@ intel_plane_remap_gtt(struct intel_plane_state *plane_state)
 						      DRM_MODE_ROTATE_0, tile_size);
 		offset /= tile_size;
 
+		WARN_ON(i >= ARRAY_SIZE(info->plane));
 		info->plane[i].offset = offset;
 		info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i],
 						     tile_width * cpp);
@@ -2864,10 +3195,7 @@ intel_plane_remap_gtt(struct intel_plane_state *plane_state)
 			struct drm_rect r;
 
 			/* rotate the x/y offsets to match the GTT view */
-			r.x1 = x;
-			r.y1 = y;
-			r.x2 = x + width;
-			r.y2 = y + height;
+			drm_rect_init(&r, x, y, width, height);
 			drm_rect_rotate(&r,
 					info->plane[i].width * tile_width,
 					info->plane[i].height * tile_height,
@@ -2906,8 +3234,8 @@ static int
 intel_plane_compute_gtt(struct intel_plane_state *plane_state)
 {
 	const struct intel_framebuffer *fb =
-		to_intel_framebuffer(plane_state->base.fb);
-	unsigned int rotation = plane_state->base.rotation;
+		to_intel_framebuffer(plane_state->hw.fb);
+	unsigned int rotation = plane_state->hw.rotation;
 	int i, num_planes;
 
 	if (!fb)
@@ -2944,7 +3272,7 @@ intel_plane_compute_gtt(struct intel_plane_state *plane_state)
 
 	/* Rotate src coordinates to match rotated GTT view */
 	if (drm_rotation_90_or_270(rotation))
-		drm_rect_rotate(&plane_state->base.src,
+		drm_rect_rotate(&plane_state->uapi.src,
 				fb->base.width << 16, fb->base.height << 16,
 				DRM_MODE_ROTATE_270);
 
@@ -2956,6 +3284,8 @@ static int i9xx_format_to_fourcc(int format)
 	switch (format) {
 	case DISPPLANE_8BPP:
 		return DRM_FORMAT_C8;
+	case DISPPLANE_BGRA555:
+		return DRM_FORMAT_ARGB1555;
 	case DISPPLANE_BGRX555:
 		return DRM_FORMAT_XRGB1555;
 	case DISPPLANE_BGRX565:
@@ -2965,10 +3295,20 @@ static int i9xx_format_to_fourcc(int format)
 		return DRM_FORMAT_XRGB8888;
 	case DISPPLANE_RGBX888:
 		return DRM_FORMAT_XBGR8888;
+	case DISPPLANE_BGRA888:
+		return DRM_FORMAT_ARGB8888;
+	case DISPPLANE_RGBA888:
+		return DRM_FORMAT_ABGR8888;
 	case DISPPLANE_BGRX101010:
 		return DRM_FORMAT_XRGB2101010;
 	case DISPPLANE_RGBX101010:
 		return DRM_FORMAT_XBGR2101010;
+	case DISPPLANE_BGRA101010:
+		return DRM_FORMAT_ARGB2101010;
+	case DISPPLANE_RGBA101010:
+		return DRM_FORMAT_ABGR2101010;
+	case DISPPLANE_RGBX161616:
+		return DRM_FORMAT_XBGR16161616F;
 	}
 }
 
@@ -3011,10 +3351,17 @@ int skl_format_to_fourcc(int format, bool rgb_order, bool alpha)
 				return DRM_FORMAT_XRGB8888;
 		}
 	case PLANE_CTL_FORMAT_XRGB_2101010:
-		if (rgb_order)
-			return DRM_FORMAT_XBGR2101010;
-		else
-			return DRM_FORMAT_XRGB2101010;
+		if (rgb_order) {
+			if (alpha)
+				return DRM_FORMAT_ABGR2101010;
+			else
+				return DRM_FORMAT_XBGR2101010;
+		} else {
+			if (alpha)
+				return DRM_FORMAT_ARGB2101010;
+			else
+				return DRM_FORMAT_XRGB2101010;
+		}
 	case PLANE_CTL_FORMAT_XRGB_16161616F:
 		if (rgb_order) {
 			if (alpha)
@@ -3066,13 +3413,11 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 		return false;
 	}
 
-	mutex_lock(&dev->struct_mutex);
 	obj = i915_gem_object_create_stolen_for_preallocated(dev_priv,
 							     base_aligned,
 							     base_aligned,
 							     size_aligned);
-	mutex_unlock(&dev->struct_mutex);
-	if (!obj)
+	if (IS_ERR(obj))
 		return false;
 
 	switch (plane_config->tiling) {
@@ -3112,19 +3457,19 @@ intel_set_plane_visible(struct intel_crtc_state *crtc_state,
 			struct intel_plane_state *plane_state,
 			bool visible)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 
-	plane_state->base.visible = visible;
+	plane_state->uapi.visible = visible;
 
 	if (visible)
-		crtc_state->base.plane_mask |= drm_plane_mask(&plane->base);
+		crtc_state->uapi.plane_mask |= drm_plane_mask(&plane->base);
 	else
-		crtc_state->base.plane_mask &= ~drm_plane_mask(&plane->base);
+		crtc_state->uapi.plane_mask &= ~drm_plane_mask(&plane->base);
 }
 
 static void fixup_active_planes(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	struct drm_plane *plane;
 
 	/*
@@ -3135,13 +3480,14 @@ static void fixup_active_planes(struct intel_crtc_state *crtc_state)
 	crtc_state->active_planes = 0;
 
 	drm_for_each_plane_mask(plane, &dev_priv->drm,
-				crtc_state->base.plane_mask)
+				crtc_state->uapi.plane_mask)
 		crtc_state->active_planes |= BIT(to_intel_plane(plane)->id);
 }
 
 static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
 					 struct intel_plane *plane)
 {
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_crtc_state *crtc_state =
 		to_intel_crtc_state(crtc->base.state);
 	struct intel_plane_state *plane_state =
@@ -3154,9 +3500,30 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
 	intel_set_plane_visible(crtc_state, plane_state, false);
 	fixup_active_planes(crtc_state);
 	crtc_state->data_rate[plane->id] = 0;
+	crtc_state->min_cdclk[plane->id] = 0;
 
 	if (plane->id == PLANE_PRIMARY)
-		intel_pre_disable_primary_noatomic(&crtc->base);
+		hsw_disable_ips(crtc_state);
+
+	/*
+	 * Vblank time updates from the shadow to live plane control register
+	 * are blocked if the memory self-refresh mode is active at that
+	 * moment. So to make sure the plane gets truly disabled, disable
+	 * first the self-refresh mode. The self-refresh enable bit in turn
+	 * will be checked/applied by the HW only at the next frame start
+	 * event which is after the vblank start event, so we need to have a
+	 * wait-for-vblank between disabling the plane and the pipe.
+	 */
+	if (HAS_GMCH(dev_priv) &&
+	    intel_set_memory_cxsr(dev_priv, false))
+		intel_wait_for_vblank(dev_priv, crtc->pipe);
+
+	/*
+	 * Gen2 reports pipe underruns whenever all planes are disabled.
+	 * So disable underrun reporting before all the planes get disabled.
+	 */
+	if (IS_GEN(dev_priv, 2) && !crtc_state->active_planes)
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false);
 
 	intel_disable_plane(plane, crtc_state);
 }
@@ -3209,7 +3576,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 			continue;
 
 		if (intel_plane_ggtt_offset(state) == plane_config->base) {
-			fb = state->base.fb;
+			fb = state->hw.fb;
 			drm_framebuffer_get(fb);
 			goto valid_fb;
 		}
@@ -3227,19 +3594,17 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	return;
 
 valid_fb:
-	intel_state->base.rotation = plane_config->rotation;
+	intel_state->hw.rotation = plane_config->rotation;
 	intel_fill_fb_ggtt_view(&intel_state->view, fb,
-				intel_state->base.rotation);
+				intel_state->hw.rotation);
 	intel_state->color_plane[0].stride =
-		intel_fb_pitch(fb, 0, intel_state->base.rotation);
+		intel_fb_pitch(fb, 0, intel_state->hw.rotation);
 
-	mutex_lock(&dev->struct_mutex);
 	intel_state->vma =
 		intel_pin_and_fence_fb_obj(fb,
 					   &intel_state->view,
 					   intel_plane_uses_fence(intel_state),
 					   &intel_state->flags);
-	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(intel_state->vma)) {
 		DRM_ERROR("failed to pin boot fb on pipe %d: %li\n",
 			  intel_crtc->pipe, PTR_ERR(intel_state->vma));
@@ -3261,14 +3626,15 @@ valid_fb:
 	plane_state->crtc_w = fb->width;
 	plane_state->crtc_h = fb->height;
 
-	intel_state->base.src = drm_plane_state_src(plane_state);
-	intel_state->base.dst = drm_plane_state_dest(plane_state);
+	intel_state->uapi.src = drm_plane_state_src(plane_state);
+	intel_state->uapi.dst = drm_plane_state_dest(plane_state);
 
 	if (plane_config->tiling)
 		dev_priv->preserve_bios_swizzle = true;
 
 	plane_state->fb = fb;
 	plane_state->crtc = &intel_crtc->base;
+	intel_plane_copy_uapi_to_hw_state(intel_state, intel_state);
 
 	atomic_or(to_intel_plane(primary)->frontbuffer_bit,
 		  &to_intel_frontbuffer(fb)->bits);
@@ -3299,6 +3665,7 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb,
 			return 5120;
 	case I915_FORMAT_MOD_Y_TILED_CCS:
 	case I915_FORMAT_MOD_Yf_TILED_CCS:
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
 		/* FIXME AUX plane? */
 	case I915_FORMAT_MOD_Y_TILED:
 	case I915_FORMAT_MOD_Yf_TILED:
@@ -3347,17 +3714,30 @@ static int icl_max_plane_width(const struct drm_framebuffer *fb,
 	return 5120;
 }
 
-static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state,
-					   int main_x, int main_y, u32 main_offset)
+static int skl_max_plane_height(void)
+{
+	return 4096;
+}
+
+static int icl_max_plane_height(void)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	int hsub = fb->format->hsub;
-	int vsub = fb->format->vsub;
-	int aux_x = plane_state->color_plane[1].x;
-	int aux_y = plane_state->color_plane[1].y;
-	u32 aux_offset = plane_state->color_plane[1].offset;
-	u32 alignment = intel_surf_alignment(fb, 1);
+	return 4320;
+}
 
+static bool
+skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state,
+			       int main_x, int main_y, u32 main_offset,
+			       int ccs_plane)
+{
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	int aux_x = plane_state->color_plane[ccs_plane].x;
+	int aux_y = plane_state->color_plane[ccs_plane].y;
+	u32 aux_offset = plane_state->color_plane[ccs_plane].offset;
+	u32 alignment = intel_surf_alignment(fb, ccs_plane);
+	int hsub;
+	int vsub;
+
+	intel_fb_plane_get_subsampling(&hsub, &vsub, fb, ccs_plane);
 	while (aux_offset >= main_offset && aux_y <= main_y) {
 		int x, y;
 
@@ -3369,8 +3749,12 @@ static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state
 
 		x = aux_x / hsub;
 		y = aux_y / vsub;
-		aux_offset = intel_plane_adjust_aligned_offset(&x, &y, plane_state, 1,
-							       aux_offset, aux_offset - alignment);
+		aux_offset = intel_plane_adjust_aligned_offset(&x, &y,
+							       plane_state,
+							       ccs_plane,
+							       aux_offset,
+							       aux_offset -
+								alignment);
 		aux_x = x * hsub + aux_x % hsub;
 		aux_y = y * vsub + aux_y % vsub;
 	}
@@ -3378,25 +3762,28 @@ static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state
 	if (aux_x != main_x || aux_y != main_y)
 		return false;
 
-	plane_state->color_plane[1].offset = aux_offset;
-	plane_state->color_plane[1].x = aux_x;
-	plane_state->color_plane[1].y = aux_y;
+	plane_state->color_plane[ccs_plane].offset = aux_offset;
+	plane_state->color_plane[ccs_plane].x = aux_x;
+	plane_state->color_plane[ccs_plane].y = aux_y;
 
 	return true;
 }
 
 static int skl_check_main_surface(struct intel_plane_state *plane_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
-	int x = plane_state->base.src.x1 >> 16;
-	int y = plane_state->base.src.y1 >> 16;
-	int w = drm_rect_width(&plane_state->base.src) >> 16;
-	int h = drm_rect_height(&plane_state->base.src) >> 16;
+	struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
+	int x = plane_state->uapi.src.x1 >> 16;
+	int y = plane_state->uapi.src.y1 >> 16;
+	int w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	int h = drm_rect_height(&plane_state->uapi.src) >> 16;
 	int max_width;
-	int max_height = 4096;
-	u32 alignment, offset, aux_offset = plane_state->color_plane[1].offset;
+	int max_height;
+	u32 alignment;
+	u32 offset;
+	int aux_plane = intel_main_to_aux_plane(fb, 0);
+	u32 aux_offset = plane_state->color_plane[aux_plane].offset;
 
 	if (INTEL_GEN(dev_priv) >= 11)
 		max_width = icl_max_plane_width(fb, 0, rotation);
@@ -3405,6 +3792,11 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state)
 	else
 		max_width = skl_max_plane_width(fb, 0, rotation);
 
+	if (INTEL_GEN(dev_priv) >= 11)
+		max_height = icl_max_plane_height();
+	else
+		max_height = skl_max_plane_height();
+
 	if (w > max_width || h > max_height) {
 		DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n",
 			      w, h, max_width, max_height);
@@ -3414,6 +3806,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state)
 	intel_add_fb_offsets(&x, &y, plane_state, 0);
 	offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 0);
 	alignment = intel_surf_alignment(fb, 0);
+	if (WARN_ON(alignment && !is_power_of_2(alignment)))
+		return -EINVAL;
 
 	/*
 	 * AUX surface offset is specified as the distance from the
@@ -3449,7 +3843,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state)
 	 * they match with the main surface x/y offsets.
 	 */
 	if (is_ccs_modifier(fb->modifier)) {
-		while (!skl_check_main_ccs_coordinates(plane_state, x, y, offset)) {
+		while (!skl_check_main_ccs_coordinates(plane_state, x, y,
+						       offset, aux_plane)) {
 			if (offset == 0)
 				break;
 
@@ -3457,7 +3852,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state)
 								   offset, offset - alignment);
 		}
 
-		if (x != plane_state->color_plane[1].x || y != plane_state->color_plane[1].y) {
+		if (x != plane_state->color_plane[aux_plane].x ||
+		    y != plane_state->color_plane[aux_plane].y) {
 			DRM_DEBUG_KMS("Unable to find suitable display surface offset due to CCS\n");
 			return -EINVAL;
 		}
@@ -3471,27 +3867,28 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state)
 	 * Put the final coordinates back so that the src
 	 * coordinate checks will see the right values.
 	 */
-	drm_rect_translate(&plane_state->base.src,
-			   (x << 16) - plane_state->base.src.x1,
-			   (y << 16) - plane_state->base.src.y1);
+	drm_rect_translate_to(&plane_state->uapi.src,
+			      x << 16, y << 16);
 
 	return 0;
 }
 
 static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
-	int max_width = skl_max_plane_width(fb, 1, rotation);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
+	int uv_plane = 1;
+	int max_width = skl_max_plane_width(fb, uv_plane, rotation);
 	int max_height = 4096;
-	int x = plane_state->base.src.x1 >> 17;
-	int y = plane_state->base.src.y1 >> 17;
-	int w = drm_rect_width(&plane_state->base.src) >> 17;
-	int h = drm_rect_height(&plane_state->base.src) >> 17;
+	int x = plane_state->uapi.src.x1 >> 17;
+	int y = plane_state->uapi.src.y1 >> 17;
+	int w = drm_rect_width(&plane_state->uapi.src) >> 17;
+	int h = drm_rect_height(&plane_state->uapi.src) >> 17;
 	u32 offset;
 
-	intel_add_fb_offsets(&x, &y, plane_state, 1);
-	offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 1);
+	intel_add_fb_offsets(&x, &y, plane_state, uv_plane);
+	offset = intel_plane_compute_aligned_offset(&x, &y,
+						    plane_state, uv_plane);
 
 	/* FIXME not quite sure how/if these apply to the chroma plane */
 	if (w > max_width || h > max_height) {
@@ -3500,62 +3897,126 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state)
 		return -EINVAL;
 	}
 
-	plane_state->color_plane[1].offset = offset;
-	plane_state->color_plane[1].x = x;
-	plane_state->color_plane[1].y = y;
+	if (is_ccs_modifier(fb->modifier)) {
+		int ccs_plane = main_to_ccs_plane(fb, uv_plane);
+		int aux_offset = plane_state->color_plane[ccs_plane].offset;
+		int alignment = intel_surf_alignment(fb, uv_plane);
+
+		if (offset > aux_offset)
+			offset = intel_plane_adjust_aligned_offset(&x, &y,
+								   plane_state,
+								   uv_plane,
+								   offset,
+								   aux_offset & ~(alignment - 1));
+
+		while (!skl_check_main_ccs_coordinates(plane_state, x, y,
+						       offset, ccs_plane)) {
+			if (offset == 0)
+				break;
+
+			offset = intel_plane_adjust_aligned_offset(&x, &y,
+								   plane_state,
+								   uv_plane,
+								   offset, offset - alignment);
+		}
+
+		if (x != plane_state->color_plane[ccs_plane].x ||
+		    y != plane_state->color_plane[ccs_plane].y) {
+			DRM_DEBUG_KMS("Unable to find suitable display surface offset due to CCS\n");
+			return -EINVAL;
+		}
+	}
+
+	plane_state->color_plane[uv_plane].offset = offset;
+	plane_state->color_plane[uv_plane].x = x;
+	plane_state->color_plane[uv_plane].y = y;
 
 	return 0;
 }
 
 static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	int src_x = plane_state->base.src.x1 >> 16;
-	int src_y = plane_state->base.src.y1 >> 16;
-	int hsub = fb->format->hsub;
-	int vsub = fb->format->vsub;
-	int x = src_x / hsub;
-	int y = src_y / vsub;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	int src_x = plane_state->uapi.src.x1 >> 16;
+	int src_y = plane_state->uapi.src.y1 >> 16;
 	u32 offset;
+	int ccs_plane;
+
+	for (ccs_plane = 0; ccs_plane < fb->format->num_planes; ccs_plane++) {
+		int main_hsub, main_vsub;
+		int hsub, vsub;
+		int x, y;
+
+		if (!is_ccs_plane(fb, ccs_plane))
+			continue;
+
+		intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, fb,
+					       ccs_to_main_plane(fb, ccs_plane));
+		intel_fb_plane_get_subsampling(&hsub, &vsub, fb, ccs_plane);
 
-	intel_add_fb_offsets(&x, &y, plane_state, 1);
-	offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 1);
+		hsub *= main_hsub;
+		vsub *= main_vsub;
+		x = src_x / hsub;
+		y = src_y / vsub;
 
-	plane_state->color_plane[1].offset = offset;
-	plane_state->color_plane[1].x = x * hsub + src_x % hsub;
-	plane_state->color_plane[1].y = y * vsub + src_y % vsub;
+		intel_add_fb_offsets(&x, &y, plane_state, ccs_plane);
+
+		offset = intel_plane_compute_aligned_offset(&x, &y,
+							    plane_state,
+							    ccs_plane);
+
+		plane_state->color_plane[ccs_plane].offset = offset;
+		plane_state->color_plane[ccs_plane].x = (x * hsub +
+							 src_x % hsub) /
+							main_hsub;
+		plane_state->color_plane[ccs_plane].y = (y * vsub +
+							 src_y % vsub) /
+							main_vsub;
+	}
 
 	return 0;
 }
 
 int skl_check_plane_surface(struct intel_plane_state *plane_state)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	int ret;
+	bool needs_aux = false;
 
 	ret = intel_plane_compute_gtt(plane_state);
 	if (ret)
 		return ret;
 
-	if (!plane_state->base.visible)
+	if (!plane_state->uapi.visible)
 		return 0;
 
 	/*
-	 * Handle the AUX surface first since
-	 * the main surface setup depends on it.
+	 * Handle the AUX surface first since the main surface setup depends on
+	 * it.
 	 */
-	if (is_planar_yuv_format(fb->format->format)) {
-		ret = skl_check_nv12_aux_surface(plane_state);
+	if (is_ccs_modifier(fb->modifier)) {
+		needs_aux = true;
+		ret = skl_check_ccs_aux_surface(plane_state);
 		if (ret)
 			return ret;
-	} else if (is_ccs_modifier(fb->modifier)) {
-		ret = skl_check_ccs_aux_surface(plane_state);
+	}
+
+	if (intel_format_info_is_yuv_semiplanar(fb->format,
+						fb->modifier)) {
+		needs_aux = true;
+		ret = skl_check_nv12_aux_surface(plane_state);
 		if (ret)
 			return ret;
-	} else {
-		plane_state->color_plane[1].offset = ~0xfff;
-		plane_state->color_plane[1].x = 0;
-		plane_state->color_plane[1].y = 0;
+	}
+
+	if (!needs_aux) {
+		int i;
+
+		for (i = 1; i < fb->format->num_planes; i++) {
+			plane_state->color_plane[i].offset = ~0xfff;
+			plane_state->color_plane[i].x = 0;
+			plane_state->color_plane[i].y = 0;
+		}
 	}
 
 	ret = skl_check_main_surface(plane_state);
@@ -3565,6 +4026,53 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
 	return 0;
 }
 
+static void i9xx_plane_ratio(const struct intel_crtc_state *crtc_state,
+			     const struct intel_plane_state *plane_state,
+			     unsigned int *num, unsigned int *den)
+{
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int cpp = fb->format->cpp[0];
+
+	/*
+	 * g4x bspec says 64bpp pixel rate can't exceed 80%
+	 * of cdclk when the sprite plane is enabled on the
+	 * same pipe. ilk/snb bspec says 64bpp pixel rate is
+	 * never allowed to exceed 80% of cdclk. Let's just go
+	 * with the ilk/snb limit always.
+	 */
+	if (cpp == 8) {
+		*num = 10;
+		*den = 8;
+	} else {
+		*num = 1;
+		*den = 1;
+	}
+}
+
+static int i9xx_plane_min_cdclk(const struct intel_crtc_state *crtc_state,
+				const struct intel_plane_state *plane_state)
+{
+	unsigned int pixel_rate;
+	unsigned int num, den;
+
+	/*
+	 * Note that crtc_state->pixel_rate accounts for both
+	 * horizontal and vertical panel fitter downscaling factors.
+	 * Pre-HSW bspec tells us to only consider the horizontal
+	 * downscaling factor here. We ignore that and just consider
+	 * both for simplicity.
+	 */
+	pixel_rate = crtc_state->pixel_rate;
+
+	i9xx_plane_ratio(crtc_state, plane_state, &num, &den);
+
+	/* two pixels per clock with double wide pipe */
+	if (crtc_state->double_wide)
+		den *= 2;
+
+	return DIV_ROUND_UP(pixel_rate * num, den);
+}
+
 unsigned int
 i9xx_plane_max_stride(struct intel_plane *plane,
 		      u32 pixel_format, u64 modifier,
@@ -3594,7 +4102,7 @@ i9xx_plane_max_stride(struct intel_plane *plane,
 
 static u32 i9xx_plane_ctl_crtc(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	u32 dspcntr = 0;
 
@@ -3614,9 +4122,9 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state,
 			  const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(plane_state->base.plane->dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
+		to_i915(plane_state->uapi.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
 	u32 dspcntr;
 
 	dspcntr = DISPLAY_PLANE_ENABLE;
@@ -3632,6 +4140,9 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state,
 	case DRM_FORMAT_XRGB1555:
 		dspcntr |= DISPPLANE_BGRX555;
 		break;
+	case DRM_FORMAT_ARGB1555:
+		dspcntr |= DISPPLANE_BGRA555;
+		break;
 	case DRM_FORMAT_RGB565:
 		dspcntr |= DISPPLANE_BGRX565;
 		break;
@@ -3641,12 +4152,27 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state,
 	case DRM_FORMAT_XBGR8888:
 		dspcntr |= DISPPLANE_RGBX888;
 		break;
+	case DRM_FORMAT_ARGB8888:
+		dspcntr |= DISPPLANE_BGRA888;
+		break;
+	case DRM_FORMAT_ABGR8888:
+		dspcntr |= DISPPLANE_RGBA888;
+		break;
 	case DRM_FORMAT_XRGB2101010:
 		dspcntr |= DISPPLANE_BGRX101010;
 		break;
 	case DRM_FORMAT_XBGR2101010:
 		dspcntr |= DISPPLANE_RGBX101010;
 		break;
+	case DRM_FORMAT_ARGB2101010:
+		dspcntr |= DISPPLANE_BGRA101010;
+		break;
+	case DRM_FORMAT_ABGR2101010:
+		dspcntr |= DISPPLANE_RGBA101010;
+		break;
+	case DRM_FORMAT_XBGR16161616F:
+		dspcntr |= DISPPLANE_RGBX161616;
+		break;
 	default:
 		MISSING_CASE(fb->format->format);
 		return 0;
@@ -3668,8 +4194,9 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state,
 int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(plane_state->base.plane->dev);
-	int src_x, src_y;
+		to_i915(plane_state->uapi.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	int src_x, src_y, src_w;
 	u32 offset;
 	int ret;
 
@@ -3677,11 +4204,16 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
 	if (ret)
 		return ret;
 
-	if (!plane_state->base.visible)
+	if (!plane_state->uapi.visible)
 		return 0;
 
-	src_x = plane_state->base.src.x1 >> 16;
-	src_y = plane_state->base.src.y1 >> 16;
+	src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	src_x = plane_state->uapi.src.x1 >> 16;
+	src_y = plane_state->uapi.src.y1 >> 16;
+
+	/* Undocumented hardware limit on i965/g4x/vlv/chv */
+	if (HAS_GMCH(dev_priv) && fb->format->cpp[0] == 8 && src_w > 2048)
+		return -EINVAL;
 
 	intel_add_fb_offsets(&src_x, &src_y, plane_state, 0);
 
@@ -3695,15 +4227,14 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
 	 * Put the final coordinates back so that the src
 	 * coordinate checks will see the right values.
 	 */
-	drm_rect_translate(&plane_state->base.src,
-			   (src_x << 16) - plane_state->base.src.x1,
-			   (src_y << 16) - plane_state->base.src.y1);
+	drm_rect_translate_to(&plane_state->uapi.src,
+			      src_x << 16, src_y << 16);
 
 	/* HSW/BDW do this automagically in hardware */
 	if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) {
-		unsigned int rotation = plane_state->base.rotation;
-		int src_w = drm_rect_width(&plane_state->base.src) >> 16;
-		int src_h = drm_rect_height(&plane_state->base.src) >> 16;
+		unsigned int rotation = plane_state->hw.rotation;
+		int src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+		int src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
 
 		if (rotation & DRM_MODE_ROTATE_180) {
 			src_x += src_w - 1;
@@ -3740,15 +4271,15 @@ static int
 i9xx_plane_check(struct intel_crtc_state *crtc_state,
 		 struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	int ret;
 
 	ret = chv_plane_check_rotation(plane_state);
 	if (ret)
 		return ret;
 
-	ret = drm_atomic_helper_check_plane_state(&plane_state->base,
-						  &crtc_state->base,
+	ret = drm_atomic_helper_check_plane_state(&plane_state->uapi,
+						  &crtc_state->uapi,
 						  DRM_PLANE_HELPER_NO_SCALING,
 						  DRM_PLANE_HELPER_NO_SCALING,
 						  i9xx_plane_has_windowing(plane),
@@ -3760,7 +4291,7 @@ i9xx_plane_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
-	if (!plane_state->base.visible)
+	if (!plane_state->uapi.visible)
 		return 0;
 
 	ret = intel_plane_check_src_coordinates(plane_state);
@@ -3781,10 +4312,10 @@ static void i9xx_update_plane(struct intel_plane *plane,
 	u32 linear_offset;
 	int x = plane_state->color_plane[0].x;
 	int y = plane_state->color_plane[0].y;
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
-	int crtc_w = drm_rect_width(&plane_state->base.dst);
-	int crtc_h = drm_rect_height(&plane_state->base.dst);
+	int crtc_x = plane_state->uapi.dst.x1;
+	int crtc_y = plane_state->uapi.dst.y1;
+	int crtc_w = drm_rect_width(&plane_state->uapi.dst);
+	int crtc_h = drm_rect_height(&plane_state->uapi.dst);
 	unsigned long irqflags;
 	u32 dspaddr_offset;
 	u32 dspcntr;
@@ -3924,7 +4455,7 @@ static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id)
  */
 static void skl_detach_scalers(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	const struct intel_crtc_scaler_state *scaler_state =
 		&crtc_state->scaler_state;
 	int i;
@@ -3943,7 +4474,7 @@ static unsigned int skl_plane_stride_mult(const struct drm_framebuffer *fb,
 	 * The stride is either expressed as a multiple of 64 bytes chunks for
 	 * linear buffers or in number of tiles for tiled buffers.
 	 */
-	if (fb->modifier == DRM_FORMAT_MOD_LINEAR)
+	if (is_surface_linear(fb, color_plane))
 		return 64;
 	else if (drm_rotation_90_or_270(rotation))
 		return intel_tile_height(fb, color_plane);
@@ -3954,8 +4485,8 @@ static unsigned int skl_plane_stride_mult(const struct drm_framebuffer *fb,
 u32 skl_plane_stride(const struct intel_plane_state *plane_state,
 		     int color_plane)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
 	u32 stride = plane_state->color_plane[color_plane].stride;
 
 	if (color_plane >= fb->format->num_planes)
@@ -3978,8 +4509,10 @@ static u32 skl_plane_ctl_format(u32 pixel_format)
 	case DRM_FORMAT_ARGB8888:
 		return PLANE_CTL_FORMAT_XRGB_8888;
 	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_ABGR2101010:
 		return PLANE_CTL_FORMAT_XRGB_2101010 | PLANE_CTL_ORDER_RGBX;
 	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_ARGB2101010:
 		return PLANE_CTL_FORMAT_XRGB_2101010;
 	case DRM_FORMAT_XBGR16161616F:
 	case DRM_FORMAT_ABGR16161616F:
@@ -4024,10 +4557,10 @@ static u32 skl_plane_ctl_format(u32 pixel_format)
 
 static u32 skl_plane_ctl_alpha(const struct intel_plane_state *plane_state)
 {
-	if (!plane_state->base.fb->format->has_alpha)
+	if (!plane_state->hw.fb->format->has_alpha)
 		return PLANE_CTL_ALPHA_DISABLE;
 
-	switch (plane_state->base.pixel_blend_mode) {
+	switch (plane_state->hw.pixel_blend_mode) {
 	case DRM_MODE_BLEND_PIXEL_NONE:
 		return PLANE_CTL_ALPHA_DISABLE;
 	case DRM_MODE_BLEND_PREMULTI:
@@ -4035,17 +4568,17 @@ static u32 skl_plane_ctl_alpha(const struct intel_plane_state *plane_state)
 	case DRM_MODE_BLEND_COVERAGE:
 		return PLANE_CTL_ALPHA_HW_PREMULTIPLY;
 	default:
-		MISSING_CASE(plane_state->base.pixel_blend_mode);
+		MISSING_CASE(plane_state->hw.pixel_blend_mode);
 		return PLANE_CTL_ALPHA_DISABLE;
 	}
 }
 
 static u32 glk_plane_color_ctl_alpha(const struct intel_plane_state *plane_state)
 {
-	if (!plane_state->base.fb->format->has_alpha)
+	if (!plane_state->hw.fb->format->has_alpha)
 		return PLANE_COLOR_ALPHA_DISABLE;
 
-	switch (plane_state->base.pixel_blend_mode) {
+	switch (plane_state->hw.pixel_blend_mode) {
 	case DRM_MODE_BLEND_PIXEL_NONE:
 		return PLANE_COLOR_ALPHA_DISABLE;
 	case DRM_MODE_BLEND_PREMULTI:
@@ -4053,7 +4586,7 @@ static u32 glk_plane_color_ctl_alpha(const struct intel_plane_state *plane_state
 	case DRM_MODE_BLEND_COVERAGE:
 		return PLANE_COLOR_ALPHA_HW_PREMULTIPLY;
 	default:
-		MISSING_CASE(plane_state->base.pixel_blend_mode);
+		MISSING_CASE(plane_state->hw.pixel_blend_mode);
 		return PLANE_COLOR_ALPHA_DISABLE;
 	}
 }
@@ -4069,6 +4602,12 @@ static u32 skl_plane_ctl_tiling(u64 fb_modifier)
 		return PLANE_CTL_TILED_Y;
 	case I915_FORMAT_MOD_Y_TILED_CCS:
 		return PLANE_CTL_TILED_Y | PLANE_CTL_RENDER_DECOMPRESSION_ENABLE;
+	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
+		return PLANE_CTL_TILED_Y |
+		       PLANE_CTL_RENDER_DECOMPRESSION_ENABLE |
+		       PLANE_CTL_CLEAR_COLOR_DISABLE;
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
+		return PLANE_CTL_TILED_Y | PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE;
 	case I915_FORMAT_MOD_Yf_TILED:
 		return PLANE_CTL_TILED_YF;
 	case I915_FORMAT_MOD_Yf_TILED_CCS:
@@ -4119,7 +4658,7 @@ static u32 cnl_plane_ctl_flip(unsigned int reflect)
 
 u32 skl_plane_ctl_crtc(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	u32 plane_ctl = 0;
 
 	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
@@ -4138,9 +4677,9 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
 		  const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(plane_state->base.plane->dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
+		to_i915(plane_state->uapi.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	u32 plane_ctl;
 
@@ -4150,10 +4689,10 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
 		plane_ctl |= skl_plane_ctl_alpha(plane_state);
 		plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE;
 
-		if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709)
+		if (plane_state->hw.color_encoding == DRM_COLOR_YCBCR_BT709)
 			plane_ctl |= PLANE_CTL_YUV_TO_RGB_CSC_FORMAT_BT709;
 
-		if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
+		if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
 			plane_ctl |= PLANE_CTL_YUV_RANGE_CORRECTION_DISABLE;
 	}
 
@@ -4175,7 +4714,7 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
 
 u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	u32 plane_color_ctl = 0;
 
 	if (INTEL_GEN(dev_priv) >= 11)
@@ -4194,21 +4733,21 @@ u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state,
 			const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(plane_state->base.plane->dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+		to_i915(plane_state->uapi.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	u32 plane_color_ctl = 0;
 
 	plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE;
 	plane_color_ctl |= glk_plane_color_ctl_alpha(plane_state);
 
 	if (fb->format->is_yuv && !icl_is_hdr_plane(dev_priv, plane->id)) {
-		if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709)
+		if (plane_state->hw.color_encoding == DRM_COLOR_YCBCR_BT709)
 			plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709;
 		else
 			plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709;
 
-		if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
+		if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
 			plane_color_ctl |= PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE;
 	} else if (fb->format->is_yuv) {
 		plane_color_ctl |= PLANE_COLOR_INPUT_CSC_ENABLE;
@@ -4227,7 +4766,7 @@ __intel_display_resume(struct drm_device *dev,
 	int i, ret;
 
 	intel_modeset_setup_hw_state(dev, ctx);
-	i915_redisable_vga(to_i915(dev));
+	intel_vga_redisable(to_i915(dev));
 
 	if (!state)
 		return 0;
@@ -4259,7 +4798,7 @@ __intel_display_resume(struct drm_device *dev,
 static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
 {
 	return (INTEL_INFO(dev_priv)->gpu_reset_clobbers_display &&
-		intel_has_gpu_reset(dev_priv));
+		intel_has_gpu_reset(&dev_priv->gt));
 }
 
 void intel_prepare_reset(struct drm_i915_private *dev_priv)
@@ -4346,7 +4885,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 		 * so need a full re-initialization.
 		 */
 		intel_pps_unlock_regs_wa(dev_priv);
-		intel_modeset_init_hw(dev);
+		intel_modeset_init_hw(dev_priv);
 		intel_init_clock_gating(dev_priv);
 
 		spin_lock_irq(&dev_priv->irq_lock);
@@ -4394,50 +4933,41 @@ static void icl_set_pipe_chicken(struct intel_crtc *crtc)
 	I915_WRITE(PIPE_CHICKEN(pipe), tmp);
 }
 
-static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_state,
-				     const struct intel_crtc_state *new_crtc_state)
+static void icl_enable_trans_port_sync(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-
-	/* drm_atomic_helper_update_legacy_modeset_state might not be called. */
-	crtc->base.mode = new_crtc_state->base.mode;
+	u32 trans_ddi_func_ctl2_val;
+	u8 master_select;
 
 	/*
-	 * Update pipe size and adjust fitter if needed: the reason for this is
-	 * that in compute_mode_changes we check the native mode (not the pfit
-	 * mode) to see if we can flip rather than do a full mode set. In the
-	 * fastboot case, we'll flip, but if we don't update the pipesrc and
-	 * pfit state, we'll end up with a big fb scanned out into the wrong
-	 * sized surface.
+	 * Configure the master select and enable Transcoder Port Sync for
+	 * Slave CRTCs transcoder.
 	 */
+	if (crtc_state->master_transcoder == INVALID_TRANSCODER)
+		return;
 
-	I915_WRITE(PIPESRC(crtc->pipe),
-		   ((new_crtc_state->pipe_src_w - 1) << 16) |
-		   (new_crtc_state->pipe_src_h - 1));
-
-	/* on skylake this is done by detaching scalers */
-	if (INTEL_GEN(dev_priv) >= 9) {
-		skl_detach_scalers(new_crtc_state);
+	if (crtc_state->master_transcoder == TRANSCODER_EDP)
+		master_select = 0;
+	else
+		master_select = crtc_state->master_transcoder + 1;
 
-		if (new_crtc_state->pch_pfit.enabled)
-			skylake_pfit_enable(new_crtc_state);
-	} else if (HAS_PCH_SPLIT(dev_priv)) {
-		if (new_crtc_state->pch_pfit.enabled)
-			ironlake_pfit_enable(new_crtc_state);
-		else if (old_crtc_state->pch_pfit.enabled)
-			ironlake_pfit_disable(old_crtc_state);
-	}
+	/* Set the master select bits for Tranascoder Port Sync */
+	trans_ddi_func_ctl2_val = (PORT_SYNC_MODE_MASTER_SELECT(master_select) &
+				   PORT_SYNC_MODE_MASTER_SELECT_MASK) <<
+		PORT_SYNC_MODE_MASTER_SELECT_SHIFT;
+	/* Enable Transcoder Port Sync */
+	trans_ddi_func_ctl2_val |= PORT_SYNC_MODE_ENABLE;
 
-	if (INTEL_GEN(dev_priv) >= 11)
-		icl_set_pipe_chicken(crtc);
+	I915_WRITE(TRANS_DDI_FUNC_CTL2(crtc_state->cpu_transcoder),
+		   trans_ddi_func_ctl2_val);
 }
 
 static void intel_fdi_normal_train(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int pipe = crtc->pipe;
+	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
 	u32 temp;
 
@@ -4475,17 +5005,17 @@ static void intel_fdi_normal_train(struct intel_crtc *crtc)
 }
 
 /* The FDI link training functions for ILK/Ibexpeak. */
-static void ironlake_fdi_link_train(struct intel_crtc *crtc,
-				    const struct intel_crtc_state *crtc_state)
+static void ilk_fdi_link_train(struct intel_crtc *crtc,
+			       const struct intel_crtc_state *crtc_state)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int pipe = crtc->pipe;
+	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
 	u32 temp, tries;
 
 	/* FDI needs bits from pipe first */
-	assert_pipe_enabled(dev_priv, pipe);
+	assert_pipe_enabled(dev_priv, crtc_state->cpu_transcoder);
 
 	/* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit
 	   for train result */
@@ -4581,7 +5111,7 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int pipe = crtc->pipe;
+	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
 	u32 temp, i, retry;
 
@@ -4714,7 +5244,7 @@ static void ivb_manual_fdi_link_train(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int pipe = crtc->pipe;
+	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
 	u32 temp, i, j;
 
@@ -4828,11 +5358,11 @@ train_done:
 	DRM_DEBUG_KMS("FDI train done.\n");
 }
 
-static void ironlake_fdi_pll_enable(const struct intel_crtc_state *crtc_state)
+static void ilk_fdi_pll_enable(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
-	int pipe = intel_crtc->pipe;
+	enum pipe pipe = intel_crtc->pipe;
 	i915_reg_t reg;
 	u32 temp;
 
@@ -4865,11 +5395,11 @@ static void ironlake_fdi_pll_enable(const struct intel_crtc_state *crtc_state)
 	}
 }
 
-static void ironlake_fdi_pll_disable(struct intel_crtc *intel_crtc)
+static void ilk_fdi_pll_disable(struct intel_crtc *intel_crtc)
 {
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int pipe = intel_crtc->pipe;
+	enum pipe pipe = intel_crtc->pipe;
 	i915_reg_t reg;
 	u32 temp;
 
@@ -4895,12 +5425,10 @@ static void ironlake_fdi_pll_disable(struct intel_crtc *intel_crtc)
 	udelay(100);
 }
 
-static void ironlake_fdi_disable(struct drm_crtc *crtc)
+static void ilk_fdi_disable(struct intel_crtc *crtc)
 {
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
 	u32 temp;
 
@@ -4991,9 +5519,9 @@ void lpt_disable_iclkip(struct drm_i915_private *dev_priv)
 /* Program iCLKIP clock to the desired frequency */
 static void lpt_program_iclkip(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	int clock = crtc_state->base.adjusted_mode.crtc_clock;
+	int clock = crtc_state->hw.adjusted_mode.crtc_clock;
 	u32 divsel, phaseinc, auxdiv, phasedir = 0;
 	u32 temp;
 
@@ -5104,10 +5632,10 @@ int lpt_get_iclkip(struct drm_i915_private *dev_priv)
 				 desired_divisor << auxdiv);
 }
 
-static void ironlake_pch_transcoder_set_timings(const struct intel_crtc_state *crtc_state,
-						enum pipe pch_transcoder)
+static void ilk_pch_transcoder_set_timings(const struct intel_crtc_state *crtc_state,
+					   enum pipe pch_transcoder)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
 
@@ -5148,9 +5676,9 @@ static void cpt_set_fdi_bc_bifurcation(struct drm_i915_private *dev_priv, bool e
 	POSTING_READ(SOUTH_CHICKEN1);
 }
 
-static void ivybridge_update_fdi_bc_bifurcation(const struct intel_crtc_state *crtc_state)
+static void ivb_update_fdi_bc_bifurcation(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	switch (crtc->pipe) {
@@ -5180,7 +5708,7 @@ static struct intel_encoder *
 intel_get_crtc_new_encoder(const struct intel_atomic_state *state,
 			   const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	const struct drm_connector_state *connector_state;
 	const struct drm_connector *connector;
 	struct intel_encoder *encoder = NULL;
@@ -5209,19 +5737,19 @@ intel_get_crtc_new_encoder(const struct intel_atomic_state *state,
  *   - DP transcoding bits
  *   - transcoder
  */
-static void ironlake_pch_enable(const struct intel_atomic_state *state,
-				const struct intel_crtc_state *crtc_state)
+static void ilk_pch_enable(const struct intel_atomic_state *state,
+			   const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int pipe = crtc->pipe;
+	enum pipe pipe = crtc->pipe;
 	u32 temp;
 
 	assert_pch_transcoder_disabled(dev_priv, pipe);
 
 	if (IS_IVYBRIDGE(dev_priv))
-		ivybridge_update_fdi_bc_bifurcation(crtc_state);
+		ivb_update_fdi_bc_bifurcation(crtc_state);
 
 	/* Write the TU size bits before fdi link training, so that error
 	 * detection works. */
@@ -5258,7 +5786,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state,
 
 	/* set transcoder timing, panel must allow it */
 	assert_panel_unlocked(dev_priv, pipe);
-	ironlake_pch_transcoder_set_timings(crtc_state, pipe);
+	ilk_pch_transcoder_set_timings(crtc_state, pipe);
 
 	intel_fdi_normal_train(crtc);
 
@@ -5266,7 +5794,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state,
 	if (HAS_PCH_CPT(dev_priv) &&
 	    intel_crtc_has_dp_encoder(crtc_state)) {
 		const struct drm_display_mode *adjusted_mode =
-			&crtc_state->base.adjusted_mode;
+			&crtc_state->hw.adjusted_mode;
 		u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPECONF_BPC_MASK) >> 5;
 		i915_reg_t reg = TRANS_DP_CTL(pipe);
 		enum port port;
@@ -5290,13 +5818,13 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state,
 		I915_WRITE(reg, temp);
 	}
 
-	ironlake_enable_pch_transcoder(crtc_state);
+	ilk_enable_pch_transcoder(crtc_state);
 }
 
 static void lpt_pch_enable(const struct intel_atomic_state *state,
 			   const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
 
@@ -5305,14 +5833,14 @@ static void lpt_pch_enable(const struct intel_atomic_state *state,
 	lpt_program_iclkip(crtc_state);
 
 	/* Set transcoder timing. */
-	ironlake_pch_transcoder_set_timings(crtc_state, PIPE_A);
+	ilk_pch_transcoder_set_timings(crtc_state, PIPE_A);
 
 	lpt_enable_pch_transcoder(dev_priv, cpu_transcoder);
 }
 
-static void cpt_verify_modeset(struct drm_device *dev, int pipe)
+static void cpt_verify_modeset(struct drm_i915_private *dev_priv,
+			       enum pipe pipe)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	i915_reg_t dslreg = PIPEDSL(pipe);
 	u32 temp;
 
@@ -5408,15 +5936,16 @@ static int
 skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
 		  unsigned int scaler_user, int *scaler_id,
 		  int src_w, int src_h, int dst_w, int dst_h,
-		  const struct drm_format_info *format, bool need_scaler)
+		  const struct drm_format_info *format,
+		  u64 modifier, bool need_scaler)
 {
 	struct intel_crtc_scaler_state *scaler_state =
 		&crtc_state->scaler_state;
 	struct intel_crtc *intel_crtc =
-		to_intel_crtc(crtc_state->base.crtc);
+		to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 
 	/*
 	 * Src coordinates are already rotated by 270 degrees for
@@ -5432,7 +5961,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
 	 * Once NV12 is enabled, handle it here while allocating scaler
 	 * for NV12.
 	 */
-	if (INTEL_GEN(dev_priv) >= 9 && crtc_state->base.enable &&
+	if (INTEL_GEN(dev_priv) >= 9 && crtc_state->hw.enable &&
 	    need_scaler && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) {
 		DRM_DEBUG_KMS("Pipe/Plane scaling not supported with IF-ID mode\n");
 		return -EINVAL;
@@ -5462,7 +5991,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
 		return 0;
 	}
 
-	if (format && is_planar_yuv_format(format->format) &&
+	if (format && intel_format_info_is_yuv_semiplanar(format, modifier) &&
 	    (src_h < SKL_MIN_YUV_420_SRC_H || src_w < SKL_MIN_YUV_420_SRC_W)) {
 		DRM_DEBUG_KMS("Planar YUV: src dimensions not met\n");
 		return -EINVAL;
@@ -5504,17 +6033,18 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
  */
 int skl_update_scaler_crtc(struct intel_crtc_state *state)
 {
-	const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode;
+	const struct drm_display_mode *adjusted_mode = &state->hw.adjusted_mode;
 	bool need_scaler = false;
 
 	if (state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
 		need_scaler = true;
 
-	return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
+	return skl_update_scaler(state, !state->hw.active, SKL_CRTC_INDEX,
 				 &state->scaler_state.scaler_id,
 				 state->pipe_src_w, state->pipe_src_h,
 				 adjusted_mode->crtc_hdisplay,
-				 adjusted_mode->crtc_vdisplay, NULL, need_scaler);
+				 adjusted_mode->crtc_vdisplay, NULL, 0,
+				 need_scaler);
 }
 
 /**
@@ -5530,26 +6060,28 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
 				   struct intel_plane_state *plane_state)
 {
 	struct intel_plane *intel_plane =
-		to_intel_plane(plane_state->base.plane);
+		to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev);
-	struct drm_framebuffer *fb = plane_state->base.fb;
+	struct drm_framebuffer *fb = plane_state->hw.fb;
 	int ret;
-	bool force_detach = !fb || !plane_state->base.visible;
+	bool force_detach = !fb || !plane_state->uapi.visible;
 	bool need_scaler = false;
 
 	/* Pre-gen11 and SDR planes always need a scaler for planar formats. */
 	if (!icl_is_hdr_plane(dev_priv, intel_plane->id) &&
-	    fb && is_planar_yuv_format(fb->format->format))
+	    fb && intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier))
 		need_scaler = true;
 
 	ret = skl_update_scaler(crtc_state, force_detach,
 				drm_plane_index(&intel_plane->base),
 				&plane_state->scaler_id,
-				drm_rect_width(&plane_state->base.src) >> 16,
-				drm_rect_height(&plane_state->base.src) >> 16,
-				drm_rect_width(&plane_state->base.dst),
-				drm_rect_height(&plane_state->base.dst),
-				fb ? fb->format : NULL, need_scaler);
+				drm_rect_width(&plane_state->uapi.src) >> 16,
+				drm_rect_height(&plane_state->uapi.src) >> 16,
+				drm_rect_width(&plane_state->uapi.dst),
+				drm_rect_height(&plane_state->uapi.dst),
+				fb ? fb->format : NULL,
+				fb ? fb->modifier : 0,
+				need_scaler);
 
 	if (ret || plane_state->scaler_id < 0)
 		return ret;
@@ -5571,10 +6103,8 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
 	case DRM_FORMAT_ARGB8888:
 	case DRM_FORMAT_XRGB2101010:
 	case DRM_FORMAT_XBGR2101010:
-	case DRM_FORMAT_XBGR16161616F:
-	case DRM_FORMAT_ABGR16161616F:
-	case DRM_FORMAT_XRGB16161616F:
-	case DRM_FORMAT_ARGB16161616F:
+	case DRM_FORMAT_ARGB2101010:
+	case DRM_FORMAT_ABGR2101010:
 	case DRM_FORMAT_YUYV:
 	case DRM_FORMAT_YVYU:
 	case DRM_FORMAT_UYVY:
@@ -5590,6 +6120,13 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
 	case DRM_FORMAT_XVYU12_16161616:
 	case DRM_FORMAT_XVYU16161616:
 		break;
+	case DRM_FORMAT_XBGR16161616F:
+	case DRM_FORMAT_ABGR16161616F:
+	case DRM_FORMAT_XRGB16161616F:
+	case DRM_FORMAT_ARGB16161616F:
+		if (INTEL_GEN(dev_priv) >= 11)
+			break;
+		/* fall through */
 	default:
 		DRM_DEBUG_KMS("[PLANE:%d:%s] FB:%d unsupported scaling format 0x%x\n",
 			      intel_plane->base.base.id, intel_plane->base.name,
@@ -5600,17 +6137,18 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
 	return 0;
 }
 
-static void skylake_scaler_disable(struct intel_crtc *crtc)
+void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state)
 {
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	int i;
 
 	for (i = 0; i < crtc->num_scalers; i++)
 		skl_detach_scaler(crtc, i);
 }
 
-static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state)
+static void skl_pfit_enable(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	const struct intel_crtc_scaler_state *scaler_state =
@@ -5645,11 +6183,11 @@ static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state)
 	}
 }
 
-static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state)
+static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	int pipe = crtc->pipe;
+	enum pipe pipe = crtc->pipe;
 
 	if (crtc_state->pch_pfit.enabled) {
 		/* Force use of hard-coded filter coefficients
@@ -5668,7 +6206,7 @@ static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state)
 
 void hsw_enable_ips(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
@@ -5704,7 +6242,7 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state)
 
 void hsw_disable_ips(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
@@ -5731,90 +6269,18 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state)
 
 static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc)
 {
-	if (intel_crtc->overlay) {
-		struct drm_device *dev = intel_crtc->base.dev;
-
-		mutex_lock(&dev->struct_mutex);
+	if (intel_crtc->overlay)
 		(void) intel_overlay_switch_off(intel_crtc->overlay);
-		mutex_unlock(&dev->struct_mutex);
-	}
 
 	/* Let userspace switch the overlay on again. In most cases userspace
 	 * has to recompute where to put it anyway.
 	 */
 }
 
-/**
- * intel_post_enable_primary - Perform operations after enabling primary plane
- * @crtc: the CRTC whose primary plane was just enabled
- * @new_crtc_state: the enabling state
- *
- * Performs potentially sleeping operations that must be done after the primary
- * plane is enabled, such as updating FBC and IPS.  Note that this may be
- * called due to an explicit primary plane update, or due to an implicit
- * re-enable that is caused when a sprite plane is updated to no longer
- * completely hide the primary plane.
- */
-static void
-intel_post_enable_primary(struct drm_crtc *crtc,
-			  const struct intel_crtc_state *new_crtc_state)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-
-	/*
-	 * Gen2 reports pipe underruns whenever all planes are disabled.
-	 * So don't enable underrun reporting before at least some planes
-	 * are enabled.
-	 * FIXME: Need to fix the logic to work when we turn off all planes
-	 * but leave the pipe running.
-	 */
-	if (IS_GEN(dev_priv, 2))
-		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
-
-	/* Underruns don't always raise interrupts, so check manually. */
-	intel_check_cpu_fifo_underruns(dev_priv);
-	intel_check_pch_fifo_underruns(dev_priv);
-}
-
-/* FIXME get rid of this and use pre_plane_update */
-static void
-intel_pre_disable_primary_noatomic(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-
-	/*
-	 * Gen2 reports pipe underruns whenever all planes are disabled.
-	 * So disable underrun reporting before all the planes get disabled.
-	 */
-	if (IS_GEN(dev_priv, 2))
-		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
-
-	hsw_disable_ips(to_intel_crtc_state(crtc->state));
-
-	/*
-	 * Vblank time updates from the shadow to live plane control register
-	 * are blocked if the memory self-refresh mode is active at that
-	 * moment. So to make sure the plane gets truly disabled, disable
-	 * first the self-refresh mode. The self-refresh enable bit in turn
-	 * will be checked/applied by the HW only at the next frame start
-	 * event which is after the vblank start event, so we need to have a
-	 * wait-for-vblank between disabling the plane and the pipe.
-	 */
-	if (HAS_GMCH(dev_priv) &&
-	    intel_set_memory_cxsr(dev_priv, false))
-		intel_wait_for_vblank(dev_priv, pipe);
-}
-
 static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_state,
 				       const struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	if (!old_crtc_state->ips_enabled)
@@ -5830,7 +6296,7 @@ static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_s
 	 * Disable IPS before we program the LUT.
 	 */
 	if (IS_HASWELL(dev_priv) &&
-	    (new_crtc_state->base.color_mgmt_changed ||
+	    (new_crtc_state->uapi.color_mgmt_changed ||
 	     new_crtc_state->update_pipe) &&
 	    new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT)
 		return true;
@@ -5841,7 +6307,7 @@ static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_s
 static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_state,
 				       const struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	if (!new_crtc_state->ips_enabled)
@@ -5857,7 +6323,7 @@ static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_s
 	 * Re-enable IPS after the LUT has been programmed.
 	 */
 	if (IS_HASWELL(dev_priv) &&
-	    (new_crtc_state->base.color_mgmt_changed ||
+	    (new_crtc_state->uapi.color_mgmt_changed ||
 	     new_crtc_state->update_pipe) &&
 	    new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT)
 		return true;
@@ -5867,15 +6333,16 @@ static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_s
 	 * forcibly enable IPS on the first fastset.
 	 */
 	if (new_crtc_state->update_pipe &&
-	    old_crtc_state->base.adjusted_mode.private_flags & I915_MODE_FLAG_INHERITED)
+	    old_crtc_state->hw.adjusted_mode.private_flags & I915_MODE_FLAG_INHERITED)
 		return true;
 
 	return !old_crtc_state->ips_enabled;
 }
 
-static bool needs_nv12_wa(struct drm_i915_private *dev_priv,
-			  const struct intel_crtc_state *crtc_state)
+static bool needs_nv12_wa(const struct intel_crtc_state *crtc_state)
 {
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+
 	if (!crtc_state->nv12_planes)
 		return false;
 
@@ -5886,9 +6353,10 @@ static bool needs_nv12_wa(struct drm_i915_private *dev_priv,
 	return false;
 }
 
-static bool needs_scalerclk_wa(struct drm_i915_private *dev_priv,
-			       const struct intel_crtc_state *crtc_state)
+static bool needs_scalerclk_wa(const struct intel_crtc_state *crtc_state)
 {
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+
 	/* Wa_2006604312:icl */
 	if (crtc_state->scaler_state.scaler_users > 0 && IS_ICELAKE(dev_priv))
 		return true;
@@ -5896,89 +6364,82 @@ static bool needs_scalerclk_wa(struct drm_i915_private *dev_priv,
 	return false;
 }
 
-static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
+static bool planes_enabling(const struct intel_crtc_state *old_crtc_state,
+			    const struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_atomic_state *state = old_crtc_state->base.state;
-	struct intel_crtc_state *pipe_config =
-		intel_atomic_get_new_crtc_state(to_intel_atomic_state(state),
-						crtc);
-	struct drm_plane *primary = crtc->base.primary;
-	struct drm_plane_state *old_primary_state =
-		drm_atomic_get_old_plane_state(state, primary);
+	return (!old_crtc_state->active_planes || needs_modeset(new_crtc_state)) &&
+		new_crtc_state->active_planes;
+}
 
-	intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits);
+static bool planes_disabling(const struct intel_crtc_state *old_crtc_state,
+			     const struct intel_crtc_state *new_crtc_state)
+{
+	return old_crtc_state->active_planes &&
+		(!new_crtc_state->active_planes || needs_modeset(new_crtc_state));
+}
 
-	if (pipe_config->update_wm_post && pipe_config->base.active)
-		intel_update_watermarks(crtc);
+static void intel_post_plane_update(struct intel_atomic_state *state,
+				    struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_plane *primary = to_intel_plane(crtc->base.primary);
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	const struct intel_plane_state *new_primary_state =
+		intel_atomic_get_new_plane_state(state, primary);
+	enum pipe pipe = crtc->pipe;
 
-	if (hsw_post_update_enable_ips(old_crtc_state, pipe_config))
-		hsw_enable_ips(pipe_config);
+	intel_frontbuffer_flip(dev_priv, new_crtc_state->fb_bits);
 
-	if (old_primary_state) {
-		struct drm_plane_state *new_primary_state =
-			drm_atomic_get_new_plane_state(state, primary);
+	if (new_crtc_state->update_wm_post && new_crtc_state->hw.active)
+		intel_update_watermarks(crtc);
 
-		intel_fbc_post_update(crtc);
+	if (hsw_post_update_enable_ips(old_crtc_state, new_crtc_state))
+		hsw_enable_ips(new_crtc_state);
 
-		if (new_primary_state->visible &&
-		    (needs_modeset(pipe_config) ||
-		     !old_primary_state->visible))
-			intel_post_enable_primary(&crtc->base, pipe_config);
-	}
+	if (new_primary_state)
+		intel_fbc_post_update(crtc);
 
-	if (needs_nv12_wa(dev_priv, old_crtc_state) &&
-	    !needs_nv12_wa(dev_priv, pipe_config))
-		skl_wa_827(dev_priv, crtc->pipe, false);
+	if (needs_nv12_wa(old_crtc_state) &&
+	    !needs_nv12_wa(new_crtc_state))
+		skl_wa_827(dev_priv, pipe, false);
 
-	if (needs_scalerclk_wa(dev_priv, old_crtc_state) &&
-	    !needs_scalerclk_wa(dev_priv, pipe_config))
-		icl_wa_scalerclkgating(dev_priv, crtc->pipe, false);
+	if (needs_scalerclk_wa(old_crtc_state) &&
+	    !needs_scalerclk_wa(new_crtc_state))
+		icl_wa_scalerclkgating(dev_priv, pipe, false);
 }
 
-static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state,
-				   struct intel_crtc_state *pipe_config)
+static void intel_pre_plane_update(struct intel_atomic_state *state,
+				   struct intel_crtc *crtc)
 {
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_atomic_state *state = old_crtc_state->base.state;
-	struct drm_plane *primary = crtc->base.primary;
-	struct drm_plane_state *old_primary_state =
-		drm_atomic_get_old_plane_state(state, primary);
-	bool modeset = needs_modeset(pipe_config);
-	struct intel_atomic_state *intel_state =
-		to_intel_atomic_state(state);
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_plane *primary = to_intel_plane(crtc->base.primary);
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	const struct intel_plane_state *new_primary_state =
+		intel_atomic_get_new_plane_state(state, primary);
+	enum pipe pipe = crtc->pipe;
 
-	if (hsw_pre_update_disable_ips(old_crtc_state, pipe_config))
+	if (hsw_pre_update_disable_ips(old_crtc_state, new_crtc_state))
 		hsw_disable_ips(old_crtc_state);
 
-	if (old_primary_state) {
-		struct intel_plane_state *new_primary_state =
-			intel_atomic_get_new_plane_state(intel_state,
-							 to_intel_plane(primary));
-
-		intel_fbc_pre_update(crtc, pipe_config, new_primary_state);
-		/*
-		 * Gen2 reports pipe underruns whenever all planes are disabled.
-		 * So disable underrun reporting before all the planes get disabled.
-		 */
-		if (IS_GEN(dev_priv, 2) && old_primary_state->visible &&
-		    (modeset || !new_primary_state->base.visible))
-			intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false);
-	}
+	if (new_primary_state &&
+	    intel_fbc_pre_update(crtc, new_crtc_state, new_primary_state))
+		intel_wait_for_vblank(dev_priv, pipe);
 
 	/* Display WA 827 */
-	if (!needs_nv12_wa(dev_priv, old_crtc_state) &&
-	    needs_nv12_wa(dev_priv, pipe_config))
-		skl_wa_827(dev_priv, crtc->pipe, true);
+	if (!needs_nv12_wa(old_crtc_state) &&
+	    needs_nv12_wa(new_crtc_state))
+		skl_wa_827(dev_priv, pipe, true);
 
 	/* Wa_2006604312:icl */
-	if (!needs_scalerclk_wa(dev_priv, old_crtc_state) &&
-	    needs_scalerclk_wa(dev_priv, pipe_config))
-		icl_wa_scalerclkgating(dev_priv, crtc->pipe, true);
+	if (!needs_scalerclk_wa(old_crtc_state) &&
+	    needs_scalerclk_wa(new_crtc_state))
+		icl_wa_scalerclkgating(dev_priv, pipe, true);
 
 	/*
 	 * Vblank time updates from the shadow to live plane control register
@@ -5989,9 +6450,9 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state,
 	 * event which is after the vblank start event, so we need to have a
 	 * wait-for-vblank between disabling the plane and the pipe.
 	 */
-	if (HAS_GMCH(dev_priv) && old_crtc_state->base.active &&
-	    pipe_config->disable_cxsr && intel_set_memory_cxsr(dev_priv, false))
-		intel_wait_for_vblank(dev_priv, crtc->pipe);
+	if (HAS_GMCH(dev_priv) && old_crtc_state->hw.active &&
+	    new_crtc_state->disable_cxsr && intel_set_memory_cxsr(dev_priv, false))
+		intel_wait_for_vblank(dev_priv, pipe);
 
 	/*
 	 * IVB workaround: must disable low power watermarks for at least
@@ -6000,36 +6461,45 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state,
 	 *
 	 * WaCxSRDisabledForSpriteScaling:ivb
 	 */
-	if (pipe_config->disable_lp_wm && ilk_disable_lp_wm(dev) &&
-	    old_crtc_state->base.active)
-		intel_wait_for_vblank(dev_priv, crtc->pipe);
+	if (old_crtc_state->hw.active &&
+	    new_crtc_state->disable_lp_wm && ilk_disable_lp_wm(dev_priv))
+		intel_wait_for_vblank(dev_priv, pipe);
 
 	/*
-	 * If we're doing a modeset, we're done.  No need to do any pre-vblank
-	 * watermark programming here.
+	 * If we're doing a modeset we don't need to do any
+	 * pre-vblank watermark programming here.
 	 */
-	if (needs_modeset(pipe_config))
-		return;
+	if (!needs_modeset(new_crtc_state)) {
+		/*
+		 * For platforms that support atomic watermarks, program the
+		 * 'intermediate' watermarks immediately.  On pre-gen9 platforms, these
+		 * will be the intermediate values that are safe for both pre- and
+		 * post- vblank; when vblank happens, the 'active' values will be set
+		 * to the final 'target' values and we'll do this again to get the
+		 * optimal watermarks.  For gen9+ platforms, the values we program here
+		 * will be the final target values which will get automatically latched
+		 * at vblank time; no further programming will be necessary.
+		 *
+		 * If a platform hasn't been transitioned to atomic watermarks yet,
+		 * we'll continue to update watermarks the old way, if flags tell
+		 * us to.
+		 */
+		if (dev_priv->display.initial_watermarks)
+			dev_priv->display.initial_watermarks(state, crtc);
+		else if (new_crtc_state->update_wm_pre)
+			intel_update_watermarks(crtc);
+	}
 
 	/*
-	 * For platforms that support atomic watermarks, program the
-	 * 'intermediate' watermarks immediately.  On pre-gen9 platforms, these
-	 * will be the intermediate values that are safe for both pre- and
-	 * post- vblank; when vblank happens, the 'active' values will be set
-	 * to the final 'target' values and we'll do this again to get the
-	 * optimal watermarks.  For gen9+ platforms, the values we program here
-	 * will be the final target values which will get automatically latched
-	 * at vblank time; no further programming will be necessary.
+	 * Gen2 reports pipe underruns whenever all planes are disabled.
+	 * So disable underrun reporting before all the planes get disabled.
 	 *
-	 * If a platform hasn't been transitioned to atomic watermarks yet,
-	 * we'll continue to update watermarks the old way, if flags tell
-	 * us to.
+	 * We do this after .initial_watermarks() so that we have a
+	 * chance of catching underruns with the intermediate watermarks
+	 * vs. the old plane configuration.
 	 */
-	if (dev_priv->display.initial_watermarks != NULL)
-		dev_priv->display.initial_watermarks(intel_state,
-						     pipe_config);
-	else if (pipe_config->update_wm_pre)
-		intel_update_watermarks(crtc);
+	if (IS_GEN(dev_priv, 2) && planes_disabling(old_crtc_state, new_crtc_state))
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
 }
 
 static void intel_crtc_disable_planes(struct intel_atomic_state *state,
@@ -6053,7 +6523,7 @@ static void intel_crtc_disable_planes(struct intel_atomic_state *state,
 
 		intel_disable_plane(plane, new_crtc_state);
 
-		if (old_plane_state->base.visible)
+		if (old_plane_state->uapi.visible)
 			fb_bits |= plane->frontbuffer_bit;
 	}
 
@@ -6077,45 +6547,29 @@ intel_connector_primary_encoder(struct intel_connector *connector)
 	if (connector->mst_port)
 		return &dp_to_dig_port(connector->mst_port)->base;
 
-	encoder = intel_attached_encoder(&connector->base);
+	encoder = intel_attached_encoder(connector);
 	WARN_ON(!encoder);
 
 	return encoder;
 }
 
-static bool
-intel_connector_needs_modeset(struct intel_atomic_state *state,
-			      const struct drm_connector_state *old_conn_state,
-			      const struct drm_connector_state *new_conn_state)
-{
-	struct intel_crtc *old_crtc = old_conn_state->crtc ?
-				      to_intel_crtc(old_conn_state->crtc) : NULL;
-	struct intel_crtc *new_crtc = new_conn_state->crtc ?
-				      to_intel_crtc(new_conn_state->crtc) : NULL;
-
-	return new_crtc != old_crtc ||
-	       (new_crtc &&
-		needs_modeset(intel_atomic_get_new_crtc_state(state, new_crtc)));
-}
-
 static void intel_encoders_update_prepare(struct intel_atomic_state *state)
 {
-	struct drm_connector_state *old_conn_state;
 	struct drm_connector_state *new_conn_state;
-	struct drm_connector *conn;
+	struct drm_connector *connector;
 	int i;
 
-	for_each_oldnew_connector_in_state(&state->base, conn,
-					   old_conn_state, new_conn_state, i) {
+	for_each_new_connector_in_state(&state->base, connector, new_conn_state,
+					i) {
+		struct intel_connector *intel_connector;
 		struct intel_encoder *encoder;
 		struct intel_crtc *crtc;
 
-		if (!intel_connector_needs_modeset(state,
-						   old_conn_state,
-						   new_conn_state))
+		if (!intel_connector_needs_modeset(state, connector))
 			continue;
 
-		encoder = intel_connector_primary_encoder(to_intel_connector(conn));
+		intel_connector = to_intel_connector(connector);
+		encoder = intel_connector_primary_encoder(intel_connector);
 		if (!encoder->update_prepare)
 			continue;
 
@@ -6127,22 +6581,21 @@ static void intel_encoders_update_prepare(struct intel_atomic_state *state)
 
 static void intel_encoders_update_complete(struct intel_atomic_state *state)
 {
-	struct drm_connector_state *old_conn_state;
 	struct drm_connector_state *new_conn_state;
-	struct drm_connector *conn;
+	struct drm_connector *connector;
 	int i;
 
-	for_each_oldnew_connector_in_state(&state->base, conn,
-					   old_conn_state, new_conn_state, i) {
+	for_each_new_connector_in_state(&state->base, connector, new_conn_state,
+					i) {
+		struct intel_connector *intel_connector;
 		struct intel_encoder *encoder;
 		struct intel_crtc *crtc;
 
-		if (!intel_connector_needs_modeset(state,
-						   old_conn_state,
-						   new_conn_state))
+		if (!intel_connector_needs_modeset(state, connector))
 			continue;
 
-		encoder = intel_connector_primary_encoder(to_intel_connector(conn));
+		intel_connector = to_intel_connector(connector);
+		encoder = intel_connector_primary_encoder(intel_connector);
 		if (!encoder->update_complete)
 			continue;
 
@@ -6152,11 +6605,12 @@ static void intel_encoders_update_complete(struct intel_atomic_state *state)
 	}
 }
 
-static void intel_encoders_pre_pll_enable(struct intel_crtc *crtc,
-					  struct intel_crtc_state *crtc_state,
-					  struct intel_atomic_state *state)
+static void intel_encoders_pre_pll_enable(struct intel_atomic_state *state,
+					  struct intel_crtc *crtc)
 {
-	struct drm_connector_state *conn_state;
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	const struct drm_connector_state *conn_state;
 	struct drm_connector *conn;
 	int i;
 
@@ -6172,11 +6626,12 @@ static void intel_encoders_pre_pll_enable(struct intel_crtc *crtc,
 	}
 }
 
-static void intel_encoders_pre_enable(struct intel_crtc *crtc,
-				      struct intel_crtc_state *crtc_state,
-				      struct intel_atomic_state *state)
+static void intel_encoders_pre_enable(struct intel_atomic_state *state,
+				      struct intel_crtc *crtc)
 {
-	struct drm_connector_state *conn_state;
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	const struct drm_connector_state *conn_state;
 	struct drm_connector *conn;
 	int i;
 
@@ -6192,11 +6647,12 @@ static void intel_encoders_pre_enable(struct intel_crtc *crtc,
 	}
 }
 
-static void intel_encoders_enable(struct intel_crtc *crtc,
-				  struct intel_crtc_state *crtc_state,
-				  struct intel_atomic_state *state)
+static void intel_encoders_enable(struct intel_atomic_state *state,
+				  struct intel_crtc *crtc)
 {
-	struct drm_connector_state *conn_state;
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	const struct drm_connector_state *conn_state;
 	struct drm_connector *conn;
 	int i;
 
@@ -6213,11 +6669,12 @@ static void intel_encoders_enable(struct intel_crtc *crtc,
 	}
 }
 
-static void intel_encoders_disable(struct intel_crtc *crtc,
-				   struct intel_crtc_state *old_crtc_state,
-				   struct intel_atomic_state *state)
+static void intel_encoders_disable(struct intel_atomic_state *state,
+				   struct intel_crtc *crtc)
 {
-	struct drm_connector_state *old_conn_state;
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	const struct drm_connector_state *old_conn_state;
 	struct drm_connector *conn;
 	int i;
 
@@ -6234,11 +6691,12 @@ static void intel_encoders_disable(struct intel_crtc *crtc,
 	}
 }
 
-static void intel_encoders_post_disable(struct intel_crtc *crtc,
-					struct intel_crtc_state *old_crtc_state,
-					struct intel_atomic_state *state)
+static void intel_encoders_post_disable(struct intel_atomic_state *state,
+					struct intel_crtc *crtc)
 {
-	struct drm_connector_state *old_conn_state;
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	const struct drm_connector_state *old_conn_state;
 	struct drm_connector *conn;
 	int i;
 
@@ -6254,11 +6712,12 @@ static void intel_encoders_post_disable(struct intel_crtc *crtc,
 	}
 }
 
-static void intel_encoders_post_pll_disable(struct intel_crtc *crtc,
-					    struct intel_crtc_state *old_crtc_state,
-					    struct intel_atomic_state *state)
+static void intel_encoders_post_pll_disable(struct intel_atomic_state *state,
+					    struct intel_crtc *crtc)
 {
-	struct drm_connector_state *old_conn_state;
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	const struct drm_connector_state *old_conn_state;
 	struct drm_connector *conn;
 	int i;
 
@@ -6274,11 +6733,12 @@ static void intel_encoders_post_pll_disable(struct intel_crtc *crtc,
 	}
 }
 
-static void intel_encoders_update_pipe(struct intel_crtc *crtc,
-				       struct intel_crtc_state *crtc_state,
-				       struct intel_atomic_state *state)
+static void intel_encoders_update_pipe(struct intel_atomic_state *state,
+				       struct intel_crtc *crtc)
 {
-	struct drm_connector_state *conn_state;
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	const struct drm_connector_state *conn_state;
 	struct drm_connector *conn;
 	int i;
 
@@ -6296,22 +6756,21 @@ static void intel_encoders_update_pipe(struct intel_crtc *crtc,
 
 static void intel_disable_primary_plane(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct intel_plane *plane = to_intel_plane(crtc->base.primary);
 
 	plane->disable_plane(plane, crtc_state);
 }
 
-static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config,
-				 struct intel_atomic_state *state)
+static void ilk_crtc_enable(struct intel_atomic_state *state,
+			    struct intel_crtc *crtc)
 {
-	struct drm_crtc *crtc = pipe_config->base.crtc;
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
 
-	if (WARN_ON(intel_crtc->active))
+	if (WARN_ON(crtc->active))
 		return;
 
 	/*
@@ -6327,61 +6786,59 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config,
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
 	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
 
-	if (pipe_config->has_pch_encoder)
-		intel_prepare_shared_dpll(pipe_config);
+	if (new_crtc_state->has_pch_encoder)
+		intel_prepare_shared_dpll(new_crtc_state);
 
-	if (intel_crtc_has_dp_encoder(pipe_config))
-		intel_dp_set_m_n(pipe_config, M1_N1);
+	if (intel_crtc_has_dp_encoder(new_crtc_state))
+		intel_dp_set_m_n(new_crtc_state, M1_N1);
 
-	intel_set_pipe_timings(pipe_config);
-	intel_set_pipe_src_size(pipe_config);
+	intel_set_pipe_timings(new_crtc_state);
+	intel_set_pipe_src_size(new_crtc_state);
 
-	if (pipe_config->has_pch_encoder) {
-		intel_cpu_transcoder_set_m_n(pipe_config,
-					     &pipe_config->fdi_m_n, NULL);
-	}
+	if (new_crtc_state->has_pch_encoder)
+		intel_cpu_transcoder_set_m_n(new_crtc_state,
+					     &new_crtc_state->fdi_m_n, NULL);
 
-	ironlake_set_pipeconf(pipe_config);
+	ilk_set_pipeconf(new_crtc_state);
 
-	intel_crtc->active = true;
+	crtc->active = true;
 
-	intel_encoders_pre_enable(intel_crtc, pipe_config, state);
+	intel_encoders_pre_enable(state, crtc);
 
-	if (pipe_config->has_pch_encoder) {
+	if (new_crtc_state->has_pch_encoder) {
 		/* Note: FDI PLL enabling _must_ be done before we enable the
 		 * cpu pipes, hence this is separate from all the other fdi/pch
 		 * enabling. */
-		ironlake_fdi_pll_enable(pipe_config);
+		ilk_fdi_pll_enable(new_crtc_state);
 	} else {
 		assert_fdi_tx_disabled(dev_priv, pipe);
 		assert_fdi_rx_disabled(dev_priv, pipe);
 	}
 
-	ironlake_pfit_enable(pipe_config);
+	ilk_pfit_enable(new_crtc_state);
 
 	/*
 	 * On ILK+ LUT must be loaded before the pipe is running but with
 	 * clocks enabled
 	 */
-	intel_color_load_luts(pipe_config);
-	intel_color_commit(pipe_config);
+	intel_color_load_luts(new_crtc_state);
+	intel_color_commit(new_crtc_state);
 	/* update DSPCNTR to configure gamma for pipe bottom color */
-	intel_disable_primary_plane(pipe_config);
+	intel_disable_primary_plane(new_crtc_state);
 
-	if (dev_priv->display.initial_watermarks != NULL)
-		dev_priv->display.initial_watermarks(state, pipe_config);
-	intel_enable_pipe(pipe_config);
+	if (dev_priv->display.initial_watermarks)
+		dev_priv->display.initial_watermarks(state, crtc);
+	intel_enable_pipe(new_crtc_state);
 
-	if (pipe_config->has_pch_encoder)
-		ironlake_pch_enable(state, pipe_config);
+	if (new_crtc_state->has_pch_encoder)
+		ilk_pch_enable(state, new_crtc_state);
 
-	assert_vblank_disabled(crtc);
-	intel_crtc_vblank_on(pipe_config);
+	intel_crtc_vblank_on(new_crtc_state);
 
-	intel_encoders_enable(intel_crtc, pipe_config, state);
+	intel_encoders_enable(state, crtc);
 
 	if (HAS_PCH_CPT(dev_priv))
-		cpt_verify_modeset(dev, intel_crtc->pipe);
+		cpt_verify_modeset(dev_priv, pipe);
 
 	/*
 	 * Must wait for vblank to avoid spurious PCH FIFO underruns.
@@ -6389,7 +6846,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config,
 	 * some interlaced HDMI modes. Let's do the double wait always
 	 * in case there are more corner cases we don't know about.
 	 */
-	if (pipe_config->has_pch_encoder) {
+	if (new_crtc_state->has_pch_encoder) {
 		intel_wait_for_vblank(dev_priv, pipe);
 		intel_wait_for_vblank(dev_priv, pipe);
 	}
@@ -6436,101 +6893,112 @@ static void icl_pipe_mbus_enable(struct intel_crtc *crtc)
 	I915_WRITE(PIPE_MBUS_DBOX_CTL(pipe), val);
 }
 
-static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
-				struct intel_atomic_state *state)
+static void hsw_set_frame_start_delay(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_crtc *crtc = pipe_config->base.crtc;
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe, hsw_workaround_pipe;
-	enum transcoder cpu_transcoder = pipe_config->cpu_transcoder;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	i915_reg_t reg = CHICKEN_TRANS(crtc_state->cpu_transcoder);
+	u32 val;
+
+	val = I915_READ(reg);
+	val &= ~HSW_FRAME_START_DELAY_MASK;
+	val |= HSW_FRAME_START_DELAY(0);
+	I915_WRITE(reg, val);
+}
+
+static void hsw_crtc_enable(struct intel_atomic_state *state,
+			    struct intel_crtc *crtc)
+{
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe, hsw_workaround_pipe;
+	enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder;
 	bool psl_clkgate_wa;
 
-	if (WARN_ON(intel_crtc->active))
+	if (WARN_ON(crtc->active))
 		return;
 
-	intel_encoders_pre_pll_enable(intel_crtc, pipe_config, state);
+	intel_encoders_pre_pll_enable(state, crtc);
 
-	if (pipe_config->shared_dpll)
-		intel_enable_shared_dpll(pipe_config);
+	if (new_crtc_state->shared_dpll)
+		intel_enable_shared_dpll(new_crtc_state);
 
-	intel_encoders_pre_enable(intel_crtc, pipe_config, state);
+	intel_encoders_pre_enable(state, crtc);
 
-	if (intel_crtc_has_dp_encoder(pipe_config))
-		intel_dp_set_m_n(pipe_config, M1_N1);
+	if (intel_crtc_has_dp_encoder(new_crtc_state))
+		intel_dp_set_m_n(new_crtc_state, M1_N1);
 
 	if (!transcoder_is_dsi(cpu_transcoder))
-		intel_set_pipe_timings(pipe_config);
+		intel_set_pipe_timings(new_crtc_state);
 
-	intel_set_pipe_src_size(pipe_config);
+	if (INTEL_GEN(dev_priv) >= 11)
+		icl_enable_trans_port_sync(new_crtc_state);
+
+	intel_set_pipe_src_size(new_crtc_state);
 
 	if (cpu_transcoder != TRANSCODER_EDP &&
-	    !transcoder_is_dsi(cpu_transcoder)) {
+	    !transcoder_is_dsi(cpu_transcoder))
 		I915_WRITE(PIPE_MULT(cpu_transcoder),
-			   pipe_config->pixel_multiplier - 1);
-	}
+			   new_crtc_state->pixel_multiplier - 1);
 
-	if (pipe_config->has_pch_encoder) {
-		intel_cpu_transcoder_set_m_n(pipe_config,
-					     &pipe_config->fdi_m_n, NULL);
-	}
+	if (new_crtc_state->has_pch_encoder)
+		intel_cpu_transcoder_set_m_n(new_crtc_state,
+					     &new_crtc_state->fdi_m_n, NULL);
 
-	if (!transcoder_is_dsi(cpu_transcoder))
-		haswell_set_pipeconf(pipe_config);
+	if (!transcoder_is_dsi(cpu_transcoder)) {
+		hsw_set_frame_start_delay(new_crtc_state);
+		hsw_set_pipeconf(new_crtc_state);
+	}
 
 	if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
-		bdw_set_pipemisc(pipe_config);
+		bdw_set_pipemisc(new_crtc_state);
 
-	intel_crtc->active = true;
+	crtc->active = true;
 
 	/* Display WA #1180: WaDisableScalarClockGating: glk, cnl */
 	psl_clkgate_wa = (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) &&
-			 pipe_config->pch_pfit.enabled;
+		new_crtc_state->pch_pfit.enabled;
 	if (psl_clkgate_wa)
 		glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true);
 
 	if (INTEL_GEN(dev_priv) >= 9)
-		skylake_pfit_enable(pipe_config);
+		skl_pfit_enable(new_crtc_state);
 	else
-		ironlake_pfit_enable(pipe_config);
+		ilk_pfit_enable(new_crtc_state);
 
 	/*
 	 * On ILK+ LUT must be loaded before the pipe is running but with
 	 * clocks enabled
 	 */
-	intel_color_load_luts(pipe_config);
-	intel_color_commit(pipe_config);
+	intel_color_load_luts(new_crtc_state);
+	intel_color_commit(new_crtc_state);
 	/* update DSPCNTR to configure gamma/csc for pipe bottom color */
 	if (INTEL_GEN(dev_priv) < 9)
-		intel_disable_primary_plane(pipe_config);
+		intel_disable_primary_plane(new_crtc_state);
 
 	if (INTEL_GEN(dev_priv) >= 11)
-		icl_set_pipe_chicken(intel_crtc);
+		icl_set_pipe_chicken(crtc);
 
-	intel_ddi_set_pipe_settings(pipe_config);
 	if (!transcoder_is_dsi(cpu_transcoder))
-		intel_ddi_enable_transcoder_func(pipe_config);
+		intel_ddi_enable_transcoder_func(new_crtc_state);
 
-	if (dev_priv->display.initial_watermarks != NULL)
-		dev_priv->display.initial_watermarks(state, pipe_config);
+	if (dev_priv->display.initial_watermarks)
+		dev_priv->display.initial_watermarks(state, crtc);
 
 	if (INTEL_GEN(dev_priv) >= 11)
-		icl_pipe_mbus_enable(intel_crtc);
+		icl_pipe_mbus_enable(crtc);
 
 	/* XXX: Do the pipe assertions at the right place for BXT DSI. */
 	if (!transcoder_is_dsi(cpu_transcoder))
-		intel_enable_pipe(pipe_config);
-
-	if (pipe_config->has_pch_encoder)
-		lpt_pch_enable(state, pipe_config);
+		intel_enable_pipe(new_crtc_state);
 
-	if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST))
-		intel_ddi_set_vc_payload_alloc(pipe_config, true);
+	if (new_crtc_state->has_pch_encoder)
+		lpt_pch_enable(state, new_crtc_state);
 
-	assert_vblank_disabled(crtc);
-	intel_crtc_vblank_on(pipe_config);
+	intel_crtc_vblank_on(new_crtc_state);
 
-	intel_encoders_enable(intel_crtc, pipe_config, state);
+	intel_encoders_enable(state, crtc);
 
 	if (psl_clkgate_wa) {
 		intel_wait_for_vblank(dev_priv, pipe);
@@ -6539,16 +7007,16 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
 
 	/* If we change the relative order between pipe/planes enabling, we need
 	 * to change the workaround. */
-	hsw_workaround_pipe = pipe_config->hsw_workaround_pipe;
+	hsw_workaround_pipe = new_crtc_state->hsw_workaround_pipe;
 	if (IS_HASWELL(dev_priv) && hsw_workaround_pipe != INVALID_PIPE) {
 		intel_wait_for_vblank(dev_priv, hsw_workaround_pipe);
 		intel_wait_for_vblank(dev_priv, hsw_workaround_pipe);
 	}
 }
 
-static void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state)
+void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
@@ -6561,14 +7029,13 @@ static void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state)
 	}
 }
 
-static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state,
-				  struct intel_atomic_state *state)
+static void ilk_crtc_disable(struct intel_atomic_state *state,
+			     struct intel_crtc *crtc)
 {
-	struct drm_crtc *crtc = old_crtc_state->base.crtc;
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
 
 	/*
 	 * Sometimes spurious CPU pipe underruns happen when the
@@ -6578,22 +7045,21 @@ static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state,
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
 	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
 
-	intel_encoders_disable(intel_crtc, old_crtc_state, state);
+	intel_encoders_disable(state, crtc);
 
-	drm_crtc_vblank_off(crtc);
-	assert_vblank_disabled(crtc);
+	intel_crtc_vblank_off(old_crtc_state);
 
 	intel_disable_pipe(old_crtc_state);
 
-	ironlake_pfit_disable(old_crtc_state);
+	ilk_pfit_disable(old_crtc_state);
 
 	if (old_crtc_state->has_pch_encoder)
-		ironlake_fdi_disable(crtc);
+		ilk_fdi_disable(crtc);
 
-	intel_encoders_post_disable(intel_crtc, old_crtc_state, state);
+	intel_encoders_post_disable(state, crtc);
 
 	if (old_crtc_state->has_pch_encoder) {
-		ironlake_disable_pch_transcoder(dev_priv, pipe);
+		ilk_disable_pch_transcoder(dev_priv, pipe);
 
 		if (HAS_PCH_CPT(dev_priv)) {
 			i915_reg_t reg;
@@ -6613,51 +7079,27 @@ static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state,
 			I915_WRITE(PCH_DPLL_SEL, temp);
 		}
 
-		ironlake_fdi_pll_disable(intel_crtc);
+		ilk_fdi_pll_disable(crtc);
 	}
 
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
 	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
 }
 
-static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state,
-				 struct intel_atomic_state *state)
+static void hsw_crtc_disable(struct intel_atomic_state *state,
+			     struct intel_crtc *crtc)
 {
-	struct drm_crtc *crtc = old_crtc_state->base.crtc;
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder;
-
-	intel_encoders_disable(intel_crtc, old_crtc_state, state);
-
-	drm_crtc_vblank_off(crtc);
-	assert_vblank_disabled(crtc);
-
-	/* XXX: Do the pipe assertions at the right place for BXT DSI. */
-	if (!transcoder_is_dsi(cpu_transcoder))
-		intel_disable_pipe(old_crtc_state);
-
-	if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST))
-		intel_ddi_set_vc_payload_alloc(old_crtc_state, false);
-
-	if (!transcoder_is_dsi(cpu_transcoder))
-		intel_ddi_disable_transcoder_func(old_crtc_state);
-
-	intel_dsc_disable(old_crtc_state);
-
-	if (INTEL_GEN(dev_priv) >= 9)
-		skylake_scaler_disable(intel_crtc);
-	else
-		ironlake_pfit_disable(old_crtc_state);
-
-	intel_encoders_post_disable(intel_crtc, old_crtc_state, state);
-
-	intel_encoders_post_pll_disable(intel_crtc, old_crtc_state, state);
+	/*
+	 * FIXME collapse everything to one hook.
+	 * Need care with mst->ddi interactions.
+	 */
+	intel_encoders_disable(state, crtc);
+	intel_encoders_post_disable(state, crtc);
 }
 
 static void i9xx_pfit_enable(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	if (!crtc_state->gmch_pfit.control)
@@ -6668,7 +7110,7 @@ static void i9xx_pfit_enable(const struct intel_crtc_state *crtc_state)
 	 * according to register description and PRM.
 	 */
 	WARN_ON(I915_READ(PFIT_CONTROL) & PFIT_ENABLE);
-	assert_pipe_disabled(dev_priv, crtc->pipe);
+	assert_pipe_disabled(dev_priv, crtc_state->cpu_transcoder);
 
 	I915_WRITE(PFIT_PGM_RATIOS, crtc_state->gmch_pfit.pgm_ratios);
 	I915_WRITE(PFIT_CONTROL, crtc_state->gmch_pfit.control);
@@ -6737,6 +7179,8 @@ enum intel_display_power_domain intel_port_to_power_domain(enum port port)
 		return POWER_DOMAIN_PORT_DDI_E_LANES;
 	case PORT_F:
 		return POWER_DOMAIN_PORT_DDI_F_LANES;
+	case PORT_G:
+		return POWER_DOMAIN_PORT_DDI_G_LANES;
 	default:
 		MISSING_CASE(port);
 		return POWER_DOMAIN_PORT_OTHER;
@@ -6753,16 +7197,18 @@ intel_aux_power_domain(struct intel_digital_port *dig_port)
 	    dig_port->tc_mode == TC_PORT_TBT_ALT) {
 		switch (dig_port->aux_ch) {
 		case AUX_CH_C:
-			return POWER_DOMAIN_AUX_TBT1;
+			return POWER_DOMAIN_AUX_C_TBT;
 		case AUX_CH_D:
-			return POWER_DOMAIN_AUX_TBT2;
+			return POWER_DOMAIN_AUX_D_TBT;
 		case AUX_CH_E:
-			return POWER_DOMAIN_AUX_TBT3;
+			return POWER_DOMAIN_AUX_E_TBT;
 		case AUX_CH_F:
-			return POWER_DOMAIN_AUX_TBT4;
+			return POWER_DOMAIN_AUX_F_TBT;
+		case AUX_CH_G:
+			return POWER_DOMAIN_AUX_G_TBT;
 		default:
 			MISSING_CASE(dig_port->aux_ch);
-			return POWER_DOMAIN_AUX_TBT1;
+			return POWER_DOMAIN_AUX_C_TBT;
 		}
 	}
 
@@ -6779,6 +7225,8 @@ intel_aux_power_domain(struct intel_digital_port *dig_port)
 		return POWER_DOMAIN_AUX_E;
 	case AUX_CH_F:
 		return POWER_DOMAIN_AUX_F;
+	case AUX_CH_G:
+		return POWER_DOMAIN_AUX_G;
 	default:
 		MISSING_CASE(dig_port->aux_ch);
 		return POWER_DOMAIN_AUX_A;
@@ -6787,14 +7235,14 @@ intel_aux_power_domain(struct intel_digital_port *dig_port)
 
 static u64 get_crtc_power_domains(struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct drm_encoder *encoder;
 	enum pipe pipe = crtc->pipe;
 	u64 mask;
 	enum transcoder transcoder = crtc_state->cpu_transcoder;
 
-	if (!crtc_state->base.active)
+	if (!crtc_state->hw.active)
 		return 0;
 
 	mask = BIT_ULL(POWER_DOMAIN_PIPE(pipe));
@@ -6804,7 +7252,7 @@ static u64 get_crtc_power_domains(struct intel_crtc_state *crtc_state)
 		mask |= BIT_ULL(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe));
 
 	drm_for_each_encoder_mask(encoder, &dev_priv->drm,
-				  crtc_state->base.encoder_mask) {
+				  crtc_state->uapi.encoder_mask) {
 		struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
 
 		mask |= BIT_ULL(intel_encoder->power_domain);
@@ -6822,7 +7270,7 @@ static u64 get_crtc_power_domains(struct intel_crtc_state *crtc_state)
 static u64
 modeset_get_crtc_power_domains(struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum intel_display_power_domain domain;
 	u64 domains, new_domains, old_domains;
@@ -6848,146 +7296,140 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv,
 		intel_display_power_put_unchecked(dev_priv, domain);
 }
 
-static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config,
-				   struct intel_atomic_state *state)
+static void valleyview_crtc_enable(struct intel_atomic_state *state,
+				   struct intel_crtc *crtc)
 {
-	struct drm_crtc *crtc = pipe_config->base.crtc;
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
 
-	if (WARN_ON(intel_crtc->active))
+	if (WARN_ON(crtc->active))
 		return;
 
-	if (intel_crtc_has_dp_encoder(pipe_config))
-		intel_dp_set_m_n(pipe_config, M1_N1);
+	if (intel_crtc_has_dp_encoder(new_crtc_state))
+		intel_dp_set_m_n(new_crtc_state, M1_N1);
 
-	intel_set_pipe_timings(pipe_config);
-	intel_set_pipe_src_size(pipe_config);
+	intel_set_pipe_timings(new_crtc_state);
+	intel_set_pipe_src_size(new_crtc_state);
 
 	if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) {
 		I915_WRITE(CHV_BLEND(pipe), CHV_BLEND_LEGACY);
 		I915_WRITE(CHV_CANVAS(pipe), 0);
 	}
 
-	i9xx_set_pipeconf(pipe_config);
+	i9xx_set_pipeconf(new_crtc_state);
 
-	intel_crtc->active = true;
+	crtc->active = true;
 
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
 
-	intel_encoders_pre_pll_enable(intel_crtc, pipe_config, state);
+	intel_encoders_pre_pll_enable(state, crtc);
 
 	if (IS_CHERRYVIEW(dev_priv)) {
-		chv_prepare_pll(intel_crtc, pipe_config);
-		chv_enable_pll(intel_crtc, pipe_config);
+		chv_prepare_pll(crtc, new_crtc_state);
+		chv_enable_pll(crtc, new_crtc_state);
 	} else {
-		vlv_prepare_pll(intel_crtc, pipe_config);
-		vlv_enable_pll(intel_crtc, pipe_config);
+		vlv_prepare_pll(crtc, new_crtc_state);
+		vlv_enable_pll(crtc, new_crtc_state);
 	}
 
-	intel_encoders_pre_enable(intel_crtc, pipe_config, state);
+	intel_encoders_pre_enable(state, crtc);
 
-	i9xx_pfit_enable(pipe_config);
+	i9xx_pfit_enable(new_crtc_state);
 
-	intel_color_load_luts(pipe_config);
-	intel_color_commit(pipe_config);
+	intel_color_load_luts(new_crtc_state);
+	intel_color_commit(new_crtc_state);
 	/* update DSPCNTR to configure gamma for pipe bottom color */
-	intel_disable_primary_plane(pipe_config);
+	intel_disable_primary_plane(new_crtc_state);
 
-	dev_priv->display.initial_watermarks(state, pipe_config);
-	intel_enable_pipe(pipe_config);
+	dev_priv->display.initial_watermarks(state, crtc);
+	intel_enable_pipe(new_crtc_state);
 
-	assert_vblank_disabled(crtc);
-	intel_crtc_vblank_on(pipe_config);
+	intel_crtc_vblank_on(new_crtc_state);
 
-	intel_encoders_enable(intel_crtc, pipe_config, state);
+	intel_encoders_enable(state, crtc);
 }
 
 static void i9xx_set_pll_dividers(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	I915_WRITE(FP0(crtc->pipe), crtc_state->dpll_hw_state.fp0);
 	I915_WRITE(FP1(crtc->pipe), crtc_state->dpll_hw_state.fp1);
 }
 
-static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
-			     struct intel_atomic_state *state)
+static void i9xx_crtc_enable(struct intel_atomic_state *state,
+			     struct intel_crtc *crtc)
 {
-	struct drm_crtc *crtc = pipe_config->base.crtc;
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	enum pipe pipe = intel_crtc->pipe;
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
 
-	if (WARN_ON(intel_crtc->active))
+	if (WARN_ON(crtc->active))
 		return;
 
-	i9xx_set_pll_dividers(pipe_config);
+	i9xx_set_pll_dividers(new_crtc_state);
 
-	if (intel_crtc_has_dp_encoder(pipe_config))
-		intel_dp_set_m_n(pipe_config, M1_N1);
+	if (intel_crtc_has_dp_encoder(new_crtc_state))
+		intel_dp_set_m_n(new_crtc_state, M1_N1);
 
-	intel_set_pipe_timings(pipe_config);
-	intel_set_pipe_src_size(pipe_config);
+	intel_set_pipe_timings(new_crtc_state);
+	intel_set_pipe_src_size(new_crtc_state);
 
-	i9xx_set_pipeconf(pipe_config);
+	i9xx_set_pipeconf(new_crtc_state);
 
-	intel_crtc->active = true;
+	crtc->active = true;
 
 	if (!IS_GEN(dev_priv, 2))
 		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
 
-	intel_encoders_pre_enable(intel_crtc, pipe_config, state);
+	intel_encoders_pre_enable(state, crtc);
 
-	i9xx_enable_pll(intel_crtc, pipe_config);
+	i9xx_enable_pll(crtc, new_crtc_state);
 
-	i9xx_pfit_enable(pipe_config);
+	i9xx_pfit_enable(new_crtc_state);
 
-	intel_color_load_luts(pipe_config);
-	intel_color_commit(pipe_config);
+	intel_color_load_luts(new_crtc_state);
+	intel_color_commit(new_crtc_state);
 	/* update DSPCNTR to configure gamma for pipe bottom color */
-	intel_disable_primary_plane(pipe_config);
+	intel_disable_primary_plane(new_crtc_state);
 
-	if (dev_priv->display.initial_watermarks != NULL)
-		dev_priv->display.initial_watermarks(state,
-						     pipe_config);
+	if (dev_priv->display.initial_watermarks)
+		dev_priv->display.initial_watermarks(state, crtc);
 	else
-		intel_update_watermarks(intel_crtc);
-	intel_enable_pipe(pipe_config);
+		intel_update_watermarks(crtc);
+	intel_enable_pipe(new_crtc_state);
 
-	assert_vblank_disabled(crtc);
-	intel_crtc_vblank_on(pipe_config);
+	intel_crtc_vblank_on(new_crtc_state);
 
-	intel_encoders_enable(intel_crtc, pipe_config, state);
+	intel_encoders_enable(state, crtc);
 }
 
 static void i9xx_pfit_disable(const struct intel_crtc_state *old_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	if (!old_crtc_state->gmch_pfit.control)
 		return;
 
-	assert_pipe_disabled(dev_priv, crtc->pipe);
+	assert_pipe_disabled(dev_priv, old_crtc_state->cpu_transcoder);
 
 	DRM_DEBUG_KMS("disabling pfit, current: 0x%08x\n",
 		      I915_READ(PFIT_CONTROL));
 	I915_WRITE(PFIT_CONTROL, 0);
 }
 
-static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
-			      struct intel_atomic_state *state)
+static void i9xx_crtc_disable(struct intel_atomic_state *state,
+			      struct intel_crtc *crtc)
 {
-	struct drm_crtc *crtc = old_crtc_state->base.crtc;
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
+	struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
 
 	/*
 	 * On gen2 planes are double buffered but the pipe isn't, so we must
@@ -6996,16 +7438,15 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
 	if (IS_GEN(dev_priv, 2))
 		intel_wait_for_vblank(dev_priv, pipe);
 
-	intel_encoders_disable(intel_crtc, old_crtc_state, state);
+	intel_encoders_disable(state, crtc);
 
-	drm_crtc_vblank_off(crtc);
-	assert_vblank_disabled(crtc);
+	intel_crtc_vblank_off(old_crtc_state);
 
 	intel_disable_pipe(old_crtc_state);
 
 	i9xx_pfit_disable(old_crtc_state);
 
-	intel_encoders_post_disable(intel_crtc, old_crtc_state, state);
+	intel_encoders_post_disable(state, crtc);
 
 	if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DSI)) {
 		if (IS_CHERRYVIEW(dev_priv))
@@ -7016,92 +7457,97 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
 			i9xx_disable_pll(old_crtc_state);
 	}
 
-	intel_encoders_post_pll_disable(intel_crtc, old_crtc_state, state);
+	intel_encoders_post_pll_disable(state, crtc);
 
 	if (!IS_GEN(dev_priv, 2))
 		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
 
 	if (!dev_priv->display.initial_watermarks)
-		intel_update_watermarks(intel_crtc);
+		intel_update_watermarks(crtc);
 
 	/* clock the pipe down to 640x480@60 to potentially save power */
 	if (IS_I830(dev_priv))
 		i830_enable_pipe(dev_priv, pipe);
 }
 
-static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
+static void intel_crtc_disable_noatomic(struct intel_crtc *crtc,
 					struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_encoder *encoder;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_bw_state *bw_state =
 		to_intel_bw_state(dev_priv->bw_obj.state);
+	struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
 	enum intel_display_power_domain domain;
 	struct intel_plane *plane;
-	u64 domains;
 	struct drm_atomic_state *state;
-	struct intel_crtc_state *crtc_state;
+	struct intel_crtc_state *temp_crtc_state;
+	enum pipe pipe = crtc->pipe;
+	u64 domains;
 	int ret;
 
-	if (!intel_crtc->active)
+	if (!crtc_state->hw.active)
 		return;
 
-	for_each_intel_plane_on_crtc(&dev_priv->drm, intel_crtc, plane) {
+	for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
 		const struct intel_plane_state *plane_state =
 			to_intel_plane_state(plane->base.state);
 
-		if (plane_state->base.visible)
-			intel_plane_disable_noatomic(intel_crtc, plane);
+		if (plane_state->uapi.visible)
+			intel_plane_disable_noatomic(crtc, plane);
 	}
 
-	state = drm_atomic_state_alloc(crtc->dev);
+	state = drm_atomic_state_alloc(&dev_priv->drm);
 	if (!state) {
 		DRM_DEBUG_KMS("failed to disable [CRTC:%d:%s], out of memory",
-			      crtc->base.id, crtc->name);
+			      crtc->base.base.id, crtc->base.name);
 		return;
 	}
 
 	state->acquire_ctx = ctx;
 
 	/* Everything's already locked, -EDEADLK can't happen. */
-	crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
-	ret = drm_atomic_add_affected_connectors(state, crtc);
+	temp_crtc_state = intel_atomic_get_crtc_state(state, crtc);
+	ret = drm_atomic_add_affected_connectors(state, &crtc->base);
 
-	WARN_ON(IS_ERR(crtc_state) || ret);
+	WARN_ON(IS_ERR(temp_crtc_state) || ret);
 
-	dev_priv->display.crtc_disable(crtc_state, to_intel_atomic_state(state));
+	dev_priv->display.crtc_disable(to_intel_atomic_state(state), crtc);
 
 	drm_atomic_state_put(state);
 
 	DRM_DEBUG_KMS("[CRTC:%d:%s] hw state adjusted, was enabled, now disabled\n",
-		      crtc->base.id, crtc->name);
+		      crtc->base.base.id, crtc->base.name);
+
+	crtc->active = false;
+	crtc->base.enabled = false;
 
-	WARN_ON(drm_atomic_set_mode_for_crtc(crtc->state, NULL) < 0);
-	crtc->state->active = false;
-	intel_crtc->active = false;
-	crtc->enabled = false;
-	crtc->state->connector_mask = 0;
-	crtc->state->encoder_mask = 0;
+	WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->uapi, NULL) < 0);
+	crtc_state->uapi.active = false;
+	crtc_state->uapi.connector_mask = 0;
+	crtc_state->uapi.encoder_mask = 0;
+	intel_crtc_free_hw_state(crtc_state);
+	memset(&crtc_state->hw, 0, sizeof(crtc_state->hw));
 
-	for_each_encoder_on_crtc(crtc->dev, crtc, encoder)
+	for_each_encoder_on_crtc(&dev_priv->drm, &crtc->base, encoder)
 		encoder->base.crtc = NULL;
 
-	intel_fbc_disable(intel_crtc);
-	intel_update_watermarks(intel_crtc);
-	intel_disable_shared_dpll(to_intel_crtc_state(crtc->state));
+	intel_fbc_disable(crtc);
+	intel_update_watermarks(crtc);
+	intel_disable_shared_dpll(crtc_state);
 
-	domains = intel_crtc->enabled_power_domains;
+	domains = crtc->enabled_power_domains;
 	for_each_power_domain(domain, domains)
 		intel_display_power_put_unchecked(dev_priv, domain);
-	intel_crtc->enabled_power_domains = 0;
+	crtc->enabled_power_domains = 0;
 
-	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
-	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
-	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
+	dev_priv->active_pipes &= ~BIT(pipe);
+	dev_priv->min_cdclk[pipe] = 0;
+	dev_priv->min_voltage_level[pipe] = 0;
 
-	bw_state->data_rate[intel_crtc->pipe] = 0;
-	bw_state->num_active_planes[intel_crtc->pipe] = 0;
+	bw_state->data_rate[pipe] = 0;
+	bw_state->num_active_planes[pipe] = 0;
 }
 
 /*
@@ -7151,8 +7597,8 @@ static void intel_connector_verify_state(struct intel_crtc_state *crtc_state,
 		if (!crtc_state)
 			return;
 
-		I915_STATE_WARN(!crtc_state->base.active,
-		      "connector is active, but attached crtc isn't\n");
+		I915_STATE_WARN(!crtc_state->hw.active,
+				"connector is active, but attached crtc isn't\n");
 
 		if (!encoder || encoder->type == INTEL_OUTPUT_DP_MST)
 			return;
@@ -7163,8 +7609,8 @@ static void intel_connector_verify_state(struct intel_crtc_state *crtc_state,
 		I915_STATE_WARN(conn_state->crtc != encoder->base.crtc,
 			"attached encoder crtc differs from connector crtc\n");
 	} else {
-		I915_STATE_WARN(crtc_state && crtc_state->base.active,
-			"attached crtc is active, but connector isn't\n");
+		I915_STATE_WARN(crtc_state && crtc_state->hw.active,
+				"attached crtc is active, but connector isn't\n");
 		I915_STATE_WARN(!crtc_state && conn_state->best_encoder,
 			"best encoder set without crtc!\n");
 	}
@@ -7172,17 +7618,17 @@ static void intel_connector_verify_state(struct intel_crtc_state *crtc_state,
 
 static int pipe_required_fdi_lanes(struct intel_crtc_state *crtc_state)
 {
-	if (crtc_state->base.enable && crtc_state->has_pch_encoder)
+	if (crtc_state->hw.enable && crtc_state->has_pch_encoder)
 		return crtc_state->fdi_lanes;
 
 	return 0;
 }
 
-static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe,
-				     struct intel_crtc_state *pipe_config)
+static int ilk_check_fdi_lanes(struct drm_device *dev, enum pipe pipe,
+			       struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_atomic_state *state = pipe_config->base.state;
+	struct drm_atomic_state *state = pipe_config->uapi.state;
 	struct intel_crtc *other_crtc;
 	struct intel_crtc_state *other_crtc_state;
 
@@ -7204,7 +7650,7 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe,
 		}
 	}
 
-	if (INTEL_INFO(dev_priv)->num_pipes == 2)
+	if (INTEL_NUM_PIPES(dev_priv) == 2)
 		return 0;
 
 	/* Ivybridge 3 pipe is really complicated */
@@ -7251,11 +7697,11 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe,
 }
 
 #define RETRY 1
-static int ironlake_fdi_compute_config(struct intel_crtc *intel_crtc,
-				       struct intel_crtc_state *pipe_config)
+static int ilk_fdi_compute_config(struct intel_crtc *intel_crtc,
+				  struct intel_crtc_state *pipe_config)
 {
 	struct drm_device *dev = intel_crtc->base.dev;
-	const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	int lane, link_bw, fdi_dotclock, ret;
 	bool needs_recompute = false;
 
@@ -7271,15 +7717,15 @@ retry:
 
 	fdi_dotclock = adjusted_mode->crtc_clock;
 
-	lane = ironlake_get_lanes_required(fdi_dotclock, link_bw,
-					   pipe_config->pipe_bpp);
+	lane = ilk_get_lanes_required(fdi_dotclock, link_bw,
+				      pipe_config->pipe_bpp);
 
 	pipe_config->fdi_lanes = lane;
 
 	intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock,
 			       link_bw, &pipe_config->fdi_m_n, false, false);
 
-	ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config);
+	ret = ilk_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config);
 	if (ret == -EDEADLK)
 		return ret;
 
@@ -7301,7 +7747,7 @@ retry:
 
 bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	/* IPS only exists on ULT machines and is tied to pipe A. */
@@ -7331,9 +7777,9 @@ bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state)
 static bool hsw_compute_ips_config(struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(crtc_state->base.crtc->dev);
+		to_i915(crtc_state->uapi.crtc->dev);
 	struct intel_atomic_state *intel_state =
-		to_intel_atomic_state(crtc_state->base.state);
+		to_intel_atomic_state(crtc_state->uapi.state);
 
 	if (!hsw_crtc_state_ips_capable(crtc_state))
 		return false;
@@ -7372,7 +7818,7 @@ static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
 {
 	u32 pixel_rate;
 
-	pixel_rate = pipe_config->base.adjusted_mode.crtc_clock;
+	pixel_rate = pipe_config->hw.adjusted_mode.crtc_clock;
 
 	/*
 	 * We only use IF-ID interlacing. If we ever use
@@ -7405,12 +7851,12 @@ static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
 
 static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
 	if (HAS_GMCH(dev_priv))
 		/* FIXME calculate proper pipe pixel rate for GMCH pfit */
 		crtc_state->pixel_rate =
-			crtc_state->base.adjusted_mode.crtc_clock;
+			crtc_state->hw.adjusted_mode.crtc_clock;
 	else
 		crtc_state->pixel_rate =
 			ilk_pipe_pixel_rate(crtc_state);
@@ -7420,7 +7866,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc,
 				     struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	int clock_limit = dev_priv->max_dotclk_freq;
 
 	if (INTEL_GEN(dev_priv) < 4) {
@@ -7446,7 +7892,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc,
 
 	if ((pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 ||
 	     pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR444) &&
-	     pipe_config->base.ctm) {
+	     pipe_config->hw.ctm) {
 		/*
 		 * There is only one pipe CSC unit per pipe, and we need that
 		 * for output conversion from RGB->YCBCR. So if CTM is already
@@ -7485,7 +7931,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc,
 	intel_crtc_compute_pixel_rate(pipe_config);
 
 	if (pipe_config->has_pch_encoder)
-		return ironlake_fdi_compute_config(crtc, pipe_config);
+		return ilk_fdi_compute_config(crtc, pipe_config);
 
 	return 0;
 }
@@ -7542,6 +7988,27 @@ intel_link_compute_m_n(u16 bits_per_pixel, int nlanes,
 		    constant_n);
 }
 
+static void intel_panel_sanitize_ssc(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * There may be no VBT; and if the BIOS enabled SSC we can
+	 * just keep using it to avoid unnecessary flicker.  Whereas if the
+	 * BIOS isn't using it, don't assume it will work even if the VBT
+	 * indicates as much.
+	 */
+	if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) {
+		bool bios_lvds_use_ssc = I915_READ(PCH_DREF_CONTROL) &
+			DREF_SSC1_ENABLE;
+
+		if (dev_priv->vbt.lvds_use_ssc != bios_lvds_use_ssc) {
+			DRM_DEBUG_KMS("SSC %s by BIOS, overriding VBT which says %s\n",
+				      enableddisabled(bios_lvds_use_ssc),
+				      enableddisabled(dev_priv->vbt.lvds_use_ssc));
+			dev_priv->vbt.lvds_use_ssc = bios_lvds_use_ssc;
+		}
+	}
+}
+
 static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
 {
 	if (i915_modparams.panel_use_ssc >= 0)
@@ -7619,7 +8086,7 @@ static void vlv_pllb_recal_opamp(struct drm_i915_private *dev_priv, enum pipe
 static void intel_pch_transcoder_set_m_n(const struct intel_crtc_state *crtc_state,
 					 const struct intel_link_m_n *m_n)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
@@ -7646,7 +8113,7 @@ static void intel_cpu_transcoder_set_m_n(const struct intel_crtc_state *crtc_sta
 					 const struct intel_link_m_n *m_n,
 					 const struct intel_link_m_n *m2_n2)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	enum transcoder transcoder = crtc_state->cpu_transcoder;
@@ -7955,11 +8422,11 @@ int vlv_force_pll_on(struct drm_i915_private *dev_priv, enum pipe pipe,
 	struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
 	struct intel_crtc_state *pipe_config;
 
-	pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL);
+	pipe_config = intel_crtc_state_alloc(crtc);
 	if (!pipe_config)
 		return -ENOMEM;
 
-	pipe_config->base.crtc = &crtc->base;
+	pipe_config->cpu_transcoder = (enum transcoder)pipe;
 	pipe_config->pixel_multiplier = 1;
 	pipe_config->dpll = *dpll;
 
@@ -8119,11 +8586,11 @@ static void i8xx_compute_dpll(struct intel_crtc *crtc,
 
 static void intel_set_pipe_timings(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
-	const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode;
+	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
 	u32 crtc_vtotal, crtc_vblank_end;
 	int vsyncshift = 0;
 
@@ -8181,7 +8648,7 @@ static void intel_set_pipe_timings(const struct intel_crtc_state *crtc_state)
 
 static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
@@ -8193,6 +8660,21 @@ static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state)
 		   (crtc_state->pipe_src_h - 1));
 }
 
+static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
+
+	if (IS_GEN(dev_priv, 2))
+		return false;
+
+	if (INTEL_GEN(dev_priv) >= 9 ||
+	    IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv))
+		return I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK_HSW;
+	else
+		return I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK;
+}
+
 static void intel_get_pipe_timings(struct intel_crtc *crtc,
 				   struct intel_crtc_state *pipe_config)
 {
@@ -8202,39 +8684,39 @@ static void intel_get_pipe_timings(struct intel_crtc *crtc,
 	u32 tmp;
 
 	tmp = I915_READ(HTOTAL(cpu_transcoder));
-	pipe_config->base.adjusted_mode.crtc_hdisplay = (tmp & 0xffff) + 1;
-	pipe_config->base.adjusted_mode.crtc_htotal = ((tmp >> 16) & 0xffff) + 1;
+	pipe_config->hw.adjusted_mode.crtc_hdisplay = (tmp & 0xffff) + 1;
+	pipe_config->hw.adjusted_mode.crtc_htotal = ((tmp >> 16) & 0xffff) + 1;
 
 	if (!transcoder_is_dsi(cpu_transcoder)) {
 		tmp = I915_READ(HBLANK(cpu_transcoder));
-		pipe_config->base.adjusted_mode.crtc_hblank_start =
+		pipe_config->hw.adjusted_mode.crtc_hblank_start =
 							(tmp & 0xffff) + 1;
-		pipe_config->base.adjusted_mode.crtc_hblank_end =
+		pipe_config->hw.adjusted_mode.crtc_hblank_end =
 						((tmp >> 16) & 0xffff) + 1;
 	}
 	tmp = I915_READ(HSYNC(cpu_transcoder));
-	pipe_config->base.adjusted_mode.crtc_hsync_start = (tmp & 0xffff) + 1;
-	pipe_config->base.adjusted_mode.crtc_hsync_end = ((tmp >> 16) & 0xffff) + 1;
+	pipe_config->hw.adjusted_mode.crtc_hsync_start = (tmp & 0xffff) + 1;
+	pipe_config->hw.adjusted_mode.crtc_hsync_end = ((tmp >> 16) & 0xffff) + 1;
 
 	tmp = I915_READ(VTOTAL(cpu_transcoder));
-	pipe_config->base.adjusted_mode.crtc_vdisplay = (tmp & 0xffff) + 1;
-	pipe_config->base.adjusted_mode.crtc_vtotal = ((tmp >> 16) & 0xffff) + 1;
+	pipe_config->hw.adjusted_mode.crtc_vdisplay = (tmp & 0xffff) + 1;
+	pipe_config->hw.adjusted_mode.crtc_vtotal = ((tmp >> 16) & 0xffff) + 1;
 
 	if (!transcoder_is_dsi(cpu_transcoder)) {
 		tmp = I915_READ(VBLANK(cpu_transcoder));
-		pipe_config->base.adjusted_mode.crtc_vblank_start =
+		pipe_config->hw.adjusted_mode.crtc_vblank_start =
 							(tmp & 0xffff) + 1;
-		pipe_config->base.adjusted_mode.crtc_vblank_end =
+		pipe_config->hw.adjusted_mode.crtc_vblank_end =
 						((tmp >> 16) & 0xffff) + 1;
 	}
 	tmp = I915_READ(VSYNC(cpu_transcoder));
-	pipe_config->base.adjusted_mode.crtc_vsync_start = (tmp & 0xffff) + 1;
-	pipe_config->base.adjusted_mode.crtc_vsync_end = ((tmp >> 16) & 0xffff) + 1;
+	pipe_config->hw.adjusted_mode.crtc_vsync_start = (tmp & 0xffff) + 1;
+	pipe_config->hw.adjusted_mode.crtc_vsync_end = ((tmp >> 16) & 0xffff) + 1;
 
-	if (I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK) {
-		pipe_config->base.adjusted_mode.flags |= DRM_MODE_FLAG_INTERLACE;
-		pipe_config->base.adjusted_mode.crtc_vtotal += 1;
-		pipe_config->base.adjusted_mode.crtc_vblank_end += 1;
+	if (intel_pipe_is_interlaced(pipe_config)) {
+		pipe_config->hw.adjusted_mode.flags |= DRM_MODE_FLAG_INTERLACE;
+		pipe_config->hw.adjusted_mode.crtc_vtotal += 1;
+		pipe_config->hw.adjusted_mode.crtc_vblank_end += 1;
 	}
 }
 
@@ -8249,27 +8731,27 @@ static void intel_get_pipe_src_size(struct intel_crtc *crtc,
 	pipe_config->pipe_src_h = (tmp & 0xffff) + 1;
 	pipe_config->pipe_src_w = ((tmp >> 16) & 0xffff) + 1;
 
-	pipe_config->base.mode.vdisplay = pipe_config->pipe_src_h;
-	pipe_config->base.mode.hdisplay = pipe_config->pipe_src_w;
+	pipe_config->hw.mode.vdisplay = pipe_config->pipe_src_h;
+	pipe_config->hw.mode.hdisplay = pipe_config->pipe_src_w;
 }
 
 void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 				 struct intel_crtc_state *pipe_config)
 {
-	mode->hdisplay = pipe_config->base.adjusted_mode.crtc_hdisplay;
-	mode->htotal = pipe_config->base.adjusted_mode.crtc_htotal;
-	mode->hsync_start = pipe_config->base.adjusted_mode.crtc_hsync_start;
-	mode->hsync_end = pipe_config->base.adjusted_mode.crtc_hsync_end;
+	mode->hdisplay = pipe_config->hw.adjusted_mode.crtc_hdisplay;
+	mode->htotal = pipe_config->hw.adjusted_mode.crtc_htotal;
+	mode->hsync_start = pipe_config->hw.adjusted_mode.crtc_hsync_start;
+	mode->hsync_end = pipe_config->hw.adjusted_mode.crtc_hsync_end;
 
-	mode->vdisplay = pipe_config->base.adjusted_mode.crtc_vdisplay;
-	mode->vtotal = pipe_config->base.adjusted_mode.crtc_vtotal;
-	mode->vsync_start = pipe_config->base.adjusted_mode.crtc_vsync_start;
-	mode->vsync_end = pipe_config->base.adjusted_mode.crtc_vsync_end;
+	mode->vdisplay = pipe_config->hw.adjusted_mode.crtc_vdisplay;
+	mode->vtotal = pipe_config->hw.adjusted_mode.crtc_vtotal;
+	mode->vsync_start = pipe_config->hw.adjusted_mode.crtc_vsync_start;
+	mode->vsync_end = pipe_config->hw.adjusted_mode.crtc_vsync_end;
 
-	mode->flags = pipe_config->base.adjusted_mode.flags;
+	mode->flags = pipe_config->hw.adjusted_mode.flags;
 	mode->type = DRM_MODE_TYPE_DRIVER;
 
-	mode->clock = pipe_config->base.adjusted_mode.crtc_clock;
+	mode->clock = pipe_config->hw.adjusted_mode.crtc_clock;
 
 	mode->hsync = drm_mode_hsync(mode);
 	mode->vrefresh = drm_mode_vrefresh(mode);
@@ -8278,7 +8760,7 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 
 static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	u32 pipeconf;
 
@@ -8315,7 +8797,7 @@ static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state)
 		}
 	}
 
-	if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) {
+	if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) {
 		if (INTEL_GEN(dev_priv) < 4 ||
 		    intel_crtc_has_type(crtc_state, INTEL_OUTPUT_SDVO))
 			pipeconf |= PIPECONF_INTERLACE_W_FIELD_INDICATION;
@@ -8331,6 +8813,8 @@ static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state)
 
 	pipeconf |= PIPECONF_GAMMA_MODE(crtc_state->gamma_mode);
 
+	pipeconf |= PIPECONF_FRAME_START_DELAY(0);
+
 	I915_WRITE(PIPECONF(crtc->pipe), pipeconf);
 	POSTING_READ(PIPECONF(crtc->pipe));
 }
@@ -8430,9 +8914,9 @@ static int pnv_crtc_compute_clock(struct intel_crtc *crtc,
 			DRM_DEBUG_KMS("using SSC reference clock of %d kHz\n", refclk);
 		}
 
-		limit = &intel_limits_pineview_lvds;
+		limit = &pnv_limits_lvds;
 	} else {
-		limit = &intel_limits_pineview_sdvo;
+		limit = &pnv_limits_sdvo;
 	}
 
 	if (!crtc_state->clock_set &&
@@ -8563,7 +9047,7 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int pipe = pipe_config->cpu_transcoder;
+	enum pipe pipe = crtc->pipe;
 	struct dpll clock;
 	u32 mdiv;
 	int refclk = 100000;
@@ -8673,7 +9157,7 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int pipe = pipe_config->cpu_transcoder;
+	enum pipe pipe = crtc->pipe;
 	enum dpio_channel port = vlv_pipe_to_channel(pipe);
 	struct dpll clock;
 	u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3;
@@ -8702,52 +9186,29 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc,
 	pipe_config->port_clock = chv_calc_dpll_params(refclk, &clock);
 }
 
-static void intel_get_crtc_ycbcr_config(struct intel_crtc *crtc,
-					struct intel_crtc_state *pipe_config)
+static enum intel_output_format
+bdw_get_pipemisc_output_format(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	enum intel_output_format output = INTEL_OUTPUT_FORMAT_RGB;
-
-	pipe_config->lspcon_downsampling = false;
+	u32 tmp;
 
-	if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) {
-		u32 tmp = I915_READ(PIPEMISC(crtc->pipe));
+	tmp = I915_READ(PIPEMISC(crtc->pipe));
 
-		if (tmp & PIPEMISC_OUTPUT_COLORSPACE_YUV) {
-			bool ycbcr420_enabled = tmp & PIPEMISC_YUV420_ENABLE;
-			bool blend = tmp & PIPEMISC_YUV420_MODE_FULL_BLEND;
+	if (tmp & PIPEMISC_YUV420_ENABLE) {
+		/* We support 4:2:0 in full blend mode only */
+		WARN_ON((tmp & PIPEMISC_YUV420_MODE_FULL_BLEND) == 0);
 
-			if (ycbcr420_enabled) {
-				/* We support 4:2:0 in full blend mode only */
-				if (!blend)
-					output = INTEL_OUTPUT_FORMAT_INVALID;
-				else if (!(IS_GEMINILAKE(dev_priv) ||
-					   INTEL_GEN(dev_priv) >= 10))
-					output = INTEL_OUTPUT_FORMAT_INVALID;
-				else
-					output = INTEL_OUTPUT_FORMAT_YCBCR420;
-			} else {
-				/*
-				 * Currently there is no interface defined to
-				 * check user preference between RGB/YCBCR444
-				 * or YCBCR420. So the only possible case for
-				 * YCBCR444 usage is driving YCBCR420 output
-				 * with LSPCON, when pipe is configured for
-				 * YCBCR444 output and LSPCON takes care of
-				 * downsampling it.
-				 */
-				pipe_config->lspcon_downsampling = true;
-				output = INTEL_OUTPUT_FORMAT_YCBCR444;
-			}
-		}
+		return INTEL_OUTPUT_FORMAT_YCBCR420;
+	} else if (tmp & PIPEMISC_OUTPUT_COLORSPACE_YUV) {
+		return INTEL_OUTPUT_FORMAT_YCBCR444;
+	} else {
+		return INTEL_OUTPUT_FORMAT_RGB;
 	}
-
-	pipe_config->output_format = output;
 }
 
 static void i9xx_get_pipe_color_config(struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct intel_plane *plane = to_intel_plane(crtc->base.primary);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum i9xx_plane_id i9xx_plane = plane->i9xx_plane;
@@ -8780,6 +9241,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = NULL;
+	pipe_config->master_transcoder = INVALID_TRANSCODER;
 
 	ret = false;
 
@@ -8870,7 +9332,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 	 * but in case the pipe is enabled w/o any ports we need a sane
 	 * default.
 	 */
-	pipe_config->base.adjusted_mode.crtc_clock =
+	pipe_config->hw.adjusted_mode.crtc_clock =
 		pipe_config->port_clock / pipe_config->pixel_multiplier;
 
 	ret = true;
@@ -8881,7 +9343,7 @@ out:
 	return ret;
 }
 
-static void ironlake_init_pch_refclk(struct drm_i915_private *dev_priv)
+static void ilk_init_pch_refclk(struct drm_i915_private *dev_priv)
 {
 	struct intel_encoder *encoder;
 	int i;
@@ -9379,14 +9841,14 @@ static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv)
 void intel_init_pch_refclk(struct drm_i915_private *dev_priv)
 {
 	if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv))
-		ironlake_init_pch_refclk(dev_priv);
+		ilk_init_pch_refclk(dev_priv);
 	else if (HAS_PCH_LPT(dev_priv))
 		lpt_init_pch_refclk(dev_priv);
 }
 
-static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state)
+static void ilk_set_pipeconf(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	u32 val;
@@ -9414,23 +9876,35 @@ static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state)
 	if (crtc_state->dither)
 		val |= (PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_SP);
 
-	if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
+	if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
 		val |= PIPECONF_INTERLACED_ILK;
 	else
 		val |= PIPECONF_PROGRESSIVE;
 
+	/*
+	 * This would end up with an odd purple hue over
+	 * the entire display. Make sure we don't do it.
+	 */
+	WARN_ON(crtc_state->limited_color_range &&
+		crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB);
+
 	if (crtc_state->limited_color_range)
 		val |= PIPECONF_COLOR_RANGE_SELECT;
 
+	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB)
+		val |= PIPECONF_OUTPUT_COLORSPACE_YUV709;
+
 	val |= PIPECONF_GAMMA_MODE(crtc_state->gamma_mode);
 
+	val |= PIPECONF_FRAME_START_DELAY(0);
+
 	I915_WRITE(PIPECONF(pipe), val);
 	POSTING_READ(PIPECONF(pipe));
 }
 
-static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state)
+static void hsw_set_pipeconf(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
 	u32 val = 0;
@@ -9438,18 +9912,22 @@ static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state)
 	if (IS_HASWELL(dev_priv) && crtc_state->dither)
 		val |= (PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_SP);
 
-	if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
+	if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
 		val |= PIPECONF_INTERLACED_ILK;
 	else
 		val |= PIPECONF_PROGRESSIVE;
 
+	if (IS_HASWELL(dev_priv) &&
+	    crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB)
+		val |= PIPECONF_OUTPUT_COLORSPACE_YUV_HSW;
+
 	I915_WRITE(PIPECONF(cpu_transcoder), val);
 	POSTING_READ(PIPECONF(cpu_transcoder));
 }
 
 static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	u32 val = 0;
 
@@ -9512,7 +9990,7 @@ int bdw_get_pipemisc_bpp(struct intel_crtc *crtc)
 	}
 }
 
-int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp)
+int ilk_get_lanes_required(int target_clock, int link_bw, int bpp)
 {
 	/*
 	 * Account for spread spectrum to avoid
@@ -9523,14 +10001,14 @@ int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp)
 	return DIV_ROUND_UP(bps, link_bw * 8);
 }
 
-static bool ironlake_needs_fb_cb_tune(struct dpll *dpll, int factor)
+static bool ilk_needs_fb_cb_tune(struct dpll *dpll, int factor)
 {
 	return i9xx_dpll_compute_m(dpll) < factor * dpll->n;
 }
 
-static void ironlake_compute_dpll(struct intel_crtc *crtc,
-				  struct intel_crtc_state *crtc_state,
-				  struct dpll *reduced_clock)
+static void ilk_compute_dpll(struct intel_crtc *crtc,
+			     struct intel_crtc_state *crtc_state,
+			     struct dpll *reduced_clock)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	u32 dpll, fp, fp2;
@@ -9550,7 +10028,7 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc,
 
 	fp = i9xx_dpll_compute_fp(&crtc_state->dpll);
 
-	if (ironlake_needs_fb_cb_tune(&crtc_state->dpll, factor))
+	if (ilk_needs_fb_cb_tune(&crtc_state->dpll, factor))
 		fp |= FP_CB_TUNE;
 
 	if (reduced_clock) {
@@ -9593,7 +10071,7 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc,
 	 * clear if it''s a win or loss power wise. No point in doing
 	 * this on ILK at all since it has a fixed DPLL<->pipe mapping.
 	 */
-	if (INTEL_INFO(dev_priv)->num_pipes == 3 &&
+	if (INTEL_NUM_PIPES(dev_priv) == 3 &&
 	    intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG))
 		dpll |= DPLL_SDVO_HIGH_SPEED;
 
@@ -9630,12 +10108,12 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc,
 	crtc_state->dpll_hw_state.fp1 = fp2;
 }
 
-static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
-				       struct intel_crtc_state *crtc_state)
+static int ilk_crtc_compute_clock(struct intel_crtc *crtc,
+				  struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_atomic_state *state =
-		to_intel_atomic_state(crtc_state->base.state);
+		to_intel_atomic_state(crtc_state->uapi.state);
 	const struct intel_limit *limit;
 	int refclk = 120000;
 
@@ -9655,17 +10133,17 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
 
 		if (intel_is_dual_link_lvds(dev_priv)) {
 			if (refclk == 100000)
-				limit = &intel_limits_ironlake_dual_lvds_100m;
+				limit = &ilk_limits_dual_lvds_100m;
 			else
-				limit = &intel_limits_ironlake_dual_lvds;
+				limit = &ilk_limits_dual_lvds;
 		} else {
 			if (refclk == 100000)
-				limit = &intel_limits_ironlake_single_lvds_100m;
+				limit = &ilk_limits_single_lvds_100m;
 			else
-				limit = &intel_limits_ironlake_single_lvds;
+				limit = &ilk_limits_single_lvds;
 		}
 	} else {
-		limit = &intel_limits_ironlake_dac;
+		limit = &ilk_limits_dac;
 	}
 
 	if (!crtc_state->clock_set &&
@@ -9675,7 +10153,7 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
 		return -EINVAL;
 	}
 
-	ironlake_compute_dpll(crtc, crtc_state, NULL);
+	ilk_compute_dpll(crtc, crtc_state, NULL);
 
 	if (!intel_reserve_shared_dplls(state, crtc, NULL)) {
 		DRM_DEBUG_KMS("failed to find PLL for pipe %c\n",
@@ -9750,15 +10228,15 @@ void intel_dp_get_m_n(struct intel_crtc *crtc,
 					     &pipe_config->dp_m2_n2);
 }
 
-static void ironlake_get_fdi_m_n_config(struct intel_crtc *crtc,
-					struct intel_crtc_state *pipe_config)
+static void ilk_get_fdi_m_n_config(struct intel_crtc *crtc,
+				   struct intel_crtc_state *pipe_config)
 {
 	intel_cpu_transcoder_get_m_n(crtc, pipe_config->cpu_transcoder,
 				     &pipe_config->fdi_m_n, NULL);
 }
 
-static void skylake_get_pfit_config(struct intel_crtc *crtc,
-				    struct intel_crtc_state *pipe_config)
+static void skl_get_pfit_config(struct intel_crtc *crtc,
+				struct intel_crtc_state *pipe_config)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -9789,8 +10267,8 @@ static void skylake_get_pfit_config(struct intel_crtc *crtc,
 }
 
 static void
-skylake_get_initial_plane_config(struct intel_crtc *crtc,
-				 struct intel_initial_plane_config *plane_config)
+skl_get_initial_plane_config(struct intel_crtc *crtc,
+			     struct intel_initial_plane_config *plane_config)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -9848,7 +10326,11 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
 	case PLANE_CTL_TILED_Y:
 		plane_config->tiling = I915_TILING_Y;
 		if (val & PLANE_CTL_RENDER_DECOMPRESSION_ENABLE)
-			fb->modifier = I915_FORMAT_MOD_Y_TILED_CCS;
+			fb->modifier = INTEL_GEN(dev_priv) >= 12 ?
+				I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS :
+				I915_FORMAT_MOD_Y_TILED_CCS;
+		else if (val & PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE)
+			fb->modifier = I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS;
 		else
 			fb->modifier = I915_FORMAT_MOD_Y_TILED;
 		break;
@@ -9892,8 +10374,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
 	offset = I915_READ(PLANE_OFFSET(pipe, plane_id));
 
 	val = I915_READ(PLANE_SIZE(pipe, plane_id));
-	fb->height = ((val >> 16) & 0xfff) + 1;
-	fb->width = ((val >> 0) & 0x1fff) + 1;
+	fb->height = ((val >> 16) & 0xffff) + 1;
+	fb->width = ((val >> 0) & 0xffff) + 1;
 
 	val = I915_READ(PLANE_STRIDE(pipe, plane_id));
 	stride_mult = skl_plane_stride_mult(fb, 0, DRM_MODE_ROTATE_0);
@@ -9915,8 +10397,8 @@ error:
 	kfree(intel_fb);
 }
 
-static void ironlake_get_pfit_config(struct intel_crtc *crtc,
-				     struct intel_crtc_state *pipe_config)
+static void ilk_get_pfit_config(struct intel_crtc *crtc,
+				struct intel_crtc_state *pipe_config)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -9939,8 +10421,8 @@ static void ironlake_get_pfit_config(struct intel_crtc *crtc,
 	}
 }
 
-static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
-				     struct intel_crtc_state *pipe_config)
+static bool ilk_get_pipe_config(struct intel_crtc *crtc,
+				struct intel_crtc_state *pipe_config)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -9954,9 +10436,9 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 	if (!wakeref)
 		return false;
 
-	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = NULL;
+	pipe_config->master_transcoder = INVALID_TRANSCODER;
 
 	ret = false;
 	tmp = I915_READ(PIPECONF(crtc->pipe));
@@ -9983,6 +10465,16 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 	if (tmp & PIPECONF_COLOR_RANGE_SELECT)
 		pipe_config->limited_color_range = true;
 
+	switch (tmp & PIPECONF_OUTPUT_COLORSPACE_MASK) {
+	case PIPECONF_OUTPUT_COLORSPACE_YUV601:
+	case PIPECONF_OUTPUT_COLORSPACE_YUV709:
+		pipe_config->output_format = INTEL_OUTPUT_FORMAT_YCBCR444;
+		break;
+	default:
+		pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
+		break;
+	}
+
 	pipe_config->gamma_mode = (tmp & PIPECONF_GAMMA_MODE_MASK_ILK) >>
 		PIPECONF_GAMMA_MODE_SHIFT;
 
@@ -10001,7 +10493,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 		pipe_config->fdi_lanes = ((FDI_DP_PORT_WIDTH_MASK & tmp) >>
 					  FDI_DP_PORT_WIDTH_SHIFT) + 1;
 
-		ironlake_get_fdi_m_n_config(crtc, pipe_config);
+		ilk_get_fdi_m_n_config(crtc, pipe_config);
 
 		if (HAS_PCH_IBX(dev_priv)) {
 			/*
@@ -10029,7 +10521,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 			((tmp & PLL_REF_SDVO_HDMI_MULTIPLIER_MASK)
 			 >> PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT) + 1;
 
-		ironlake_pch_clock_get(crtc, pipe_config);
+		ilk_pch_clock_get(crtc, pipe_config);
 	} else {
 		pipe_config->pixel_multiplier = 1;
 	}
@@ -10037,7 +10529,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 	intel_get_pipe_timings(crtc, pipe_config);
 	intel_get_pipe_src_size(crtc, pipe_config);
 
-	ironlake_get_pfit_config(crtc, pipe_config);
+	ilk_get_pfit_config(crtc, pipe_config);
 
 	ret = true;
 
@@ -10046,12 +10538,13 @@ out:
 
 	return ret;
 }
-static int haswell_crtc_compute_clock(struct intel_crtc *crtc,
-				      struct intel_crtc_state *crtc_state)
+
+static int hsw_crtc_compute_clock(struct intel_crtc *crtc,
+				  struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_atomic_state *state =
-		to_intel_atomic_state(crtc_state->base.state);
+		to_intel_atomic_state(crtc_state->uapi.state);
 
 	if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) ||
 	    INTEL_GEN(dev_priv) >= 11) {
@@ -10068,9 +10561,8 @@ static int haswell_crtc_compute_clock(struct intel_crtc *crtc,
 	return 0;
 }
 
-static void cannonlake_get_ddi_pll(struct drm_i915_private *dev_priv,
-				   enum port port,
-				   struct intel_crtc_state *pipe_config)
+static void cnl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port,
+			    struct intel_crtc_state *pipe_config)
 {
 	enum intel_dpll_id id;
 	u32 temp;
@@ -10084,9 +10576,8 @@ static void cannonlake_get_ddi_pll(struct drm_i915_private *dev_priv,
 	pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id);
 }
 
-static void icelake_get_ddi_pll(struct drm_i915_private *dev_priv,
-				enum port port,
-				struct intel_crtc_state *pipe_config)
+static void icl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port,
+			    struct intel_crtc_state *pipe_config)
 {
 	enum phy phy = intel_port_to_phy(dev_priv, port);
 	enum icl_port_dpll_id port_dpll_id;
@@ -10145,9 +10636,8 @@ static void bxt_get_ddi_pll(struct drm_i915_private *dev_priv,
 	pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id);
 }
 
-static void skylake_get_ddi_pll(struct drm_i915_private *dev_priv,
-				enum port port,
-				struct intel_crtc_state *pipe_config)
+static void skl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port,
+			    struct intel_crtc_state *pipe_config)
 {
 	enum intel_dpll_id id;
 	u32 temp;
@@ -10161,9 +10651,8 @@ static void skylake_get_ddi_pll(struct drm_i915_private *dev_priv,
 	pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id);
 }
 
-static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv,
-				enum port port,
-				struct intel_crtc_state *pipe_config)
+static void hsw_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port,
+			    struct intel_crtc_state *pipe_config)
 {
 	enum intel_dpll_id id;
 	u32 ddi_pll_sel = I915_READ(PORT_CLK_SEL(port));
@@ -10264,6 +10753,9 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc,
 		case TRANS_DDI_EDP_INPUT_C_ONOFF:
 			trans_pipe = PIPE_C;
 			break;
+		case TRANS_DDI_EDP_INPUT_D_ONOFF:
+			trans_pipe = PIPE_D;
+			break;
 		}
 
 		if (trans_pipe == crtc->pipe) {
@@ -10348,31 +10840,36 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc,
 	return transcoder_is_dsi(pipe_config->cpu_transcoder);
 }
 
-static void haswell_get_ddi_port_state(struct intel_crtc *crtc,
-				       struct intel_crtc_state *pipe_config)
+static void hsw_get_ddi_port_state(struct intel_crtc *crtc,
+				   struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum transcoder cpu_transcoder = pipe_config->cpu_transcoder;
 	struct intel_shared_dpll *pll;
 	enum port port;
 	u32 tmp;
 
-	tmp = I915_READ(TRANS_DDI_FUNC_CTL(pipe_config->cpu_transcoder));
-
-	if (INTEL_GEN(dev_priv) >= 12)
-		port = TGL_TRANS_DDI_FUNC_CTL_VAL_TO_PORT(tmp);
-	else
-		port = TRANS_DDI_FUNC_CTL_VAL_TO_PORT(tmp);
+	if (transcoder_is_dsi(cpu_transcoder)) {
+		port = (cpu_transcoder == TRANSCODER_DSI_A) ?
+						PORT_A : PORT_B;
+	} else {
+		tmp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder));
+		if (INTEL_GEN(dev_priv) >= 12)
+			port = TGL_TRANS_DDI_FUNC_CTL_VAL_TO_PORT(tmp);
+		else
+			port = TRANS_DDI_FUNC_CTL_VAL_TO_PORT(tmp);
+	}
 
 	if (INTEL_GEN(dev_priv) >= 11)
-		icelake_get_ddi_pll(dev_priv, port, pipe_config);
+		icl_get_ddi_pll(dev_priv, port, pipe_config);
 	else if (IS_CANNONLAKE(dev_priv))
-		cannonlake_get_ddi_pll(dev_priv, port, pipe_config);
+		cnl_get_ddi_pll(dev_priv, port, pipe_config);
 	else if (IS_GEN9_BC(dev_priv))
-		skylake_get_ddi_pll(dev_priv, port, pipe_config);
+		skl_get_ddi_pll(dev_priv, port, pipe_config);
 	else if (IS_GEN9_LP(dev_priv))
 		bxt_get_ddi_pll(dev_priv, port, pipe_config);
 	else
-		haswell_get_ddi_pll(dev_priv, port, pipe_config);
+		hsw_get_ddi_pll(dev_priv, port, pipe_config);
 
 	pll = pipe_config->shared_dpll;
 	if (pll) {
@@ -10393,12 +10890,65 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc,
 		pipe_config->fdi_lanes = ((FDI_DP_PORT_WIDTH_MASK & tmp) >>
 					  FDI_DP_PORT_WIDTH_SHIFT) + 1;
 
-		ironlake_get_fdi_m_n_config(crtc, pipe_config);
+		ilk_get_fdi_m_n_config(crtc, pipe_config);
 	}
 }
 
-static bool haswell_get_pipe_config(struct intel_crtc *crtc,
-				    struct intel_crtc_state *pipe_config)
+static enum transcoder transcoder_master_readout(struct drm_i915_private *dev_priv,
+						 enum transcoder cpu_transcoder)
+{
+	u32 trans_port_sync, master_select;
+
+	trans_port_sync = I915_READ(TRANS_DDI_FUNC_CTL2(cpu_transcoder));
+
+	if ((trans_port_sync & PORT_SYNC_MODE_ENABLE) == 0)
+		return INVALID_TRANSCODER;
+
+	master_select = trans_port_sync &
+			PORT_SYNC_MODE_MASTER_SELECT_MASK;
+	if (master_select == 0)
+		return TRANSCODER_EDP;
+	else
+		return master_select - 1;
+}
+
+static void icl_get_trans_port_sync_config(struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	u32 transcoders;
+	enum transcoder cpu_transcoder;
+
+	crtc_state->master_transcoder = transcoder_master_readout(dev_priv,
+								  crtc_state->cpu_transcoder);
+
+	transcoders = BIT(TRANSCODER_A) |
+		BIT(TRANSCODER_B) |
+		BIT(TRANSCODER_C) |
+		BIT(TRANSCODER_D);
+	for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) {
+		enum intel_display_power_domain power_domain;
+		intel_wakeref_t trans_wakeref;
+
+		power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
+		trans_wakeref = intel_display_power_get_if_enabled(dev_priv,
+								   power_domain);
+
+		if (!trans_wakeref)
+			continue;
+
+		if (transcoder_master_readout(dev_priv, cpu_transcoder) ==
+		    crtc_state->cpu_transcoder)
+			crtc_state->sync_mode_slaves_mask |= BIT(cpu_transcoder);
+
+		intel_display_power_put(dev_priv, power_domain, trans_wakeref);
+	}
+
+	WARN_ON(crtc_state->master_transcoder != INVALID_TRANSCODER &&
+		crtc_state->sync_mode_slaves_mask);
+}
+
+static bool hsw_get_pipe_config(struct intel_crtc *crtc,
+				struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	intel_wakeref_t wakerefs[POWER_DOMAIN_NUM], wf;
@@ -10406,7 +10956,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 	u64 power_domain_mask;
 	bool active;
 
-	intel_crtc_init_scalers(crtc, pipe_config);
+	pipe_config->master_transcoder = INVALID_TRANSCODER;
 
 	power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
 	wf = intel_display_power_get_if_enabled(dev_priv, power_domain);
@@ -10433,12 +10983,35 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 
 	if (!transcoder_is_dsi(pipe_config->cpu_transcoder) ||
 	    INTEL_GEN(dev_priv) >= 11) {
-		haswell_get_ddi_port_state(crtc, pipe_config);
+		hsw_get_ddi_port_state(crtc, pipe_config);
 		intel_get_pipe_timings(crtc, pipe_config);
 	}
 
 	intel_get_pipe_src_size(crtc, pipe_config);
-	intel_get_crtc_ycbcr_config(crtc, pipe_config);
+
+	if (IS_HASWELL(dev_priv)) {
+		u32 tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder));
+
+		if (tmp & PIPECONF_OUTPUT_COLORSPACE_YUV_HSW)
+			pipe_config->output_format = INTEL_OUTPUT_FORMAT_YCBCR444;
+		else
+			pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
+	} else {
+		pipe_config->output_format =
+			bdw_get_pipemisc_output_format(crtc);
+
+		/*
+		 * Currently there is no interface defined to
+		 * check user preference between RGB/YCBCR444
+		 * or YCBCR420. So the only possible case for
+		 * YCBCR444 usage is driving YCBCR420 output
+		 * with LSPCON, when pipe is configured for
+		 * YCBCR444 output and LSPCON takes care of
+		 * downsampling it.
+		 */
+		pipe_config->lspcon_downsampling =
+			pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR444;
+	}
 
 	pipe_config->gamma_mode = I915_READ(GAMMA_MODE(crtc->pipe));
 
@@ -10467,9 +11040,9 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 		power_domain_mask |= BIT_ULL(power_domain);
 
 		if (INTEL_GEN(dev_priv) >= 9)
-			skylake_get_pfit_config(crtc, pipe_config);
+			skl_get_pfit_config(crtc, pipe_config);
 		else
-			ironlake_get_pfit_config(crtc, pipe_config);
+			ilk_get_pfit_config(crtc, pipe_config);
 	}
 
 	if (hsw_crtc_supports_ips(crtc)) {
@@ -10493,6 +11066,10 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 		pipe_config->pixel_multiplier = 1;
 	}
 
+	if (INTEL_GEN(dev_priv) >= 11 &&
+	    !transcoder_is_dsi(pipe_config->cpu_transcoder))
+		icl_get_trans_port_sync_config(pipe_config);
+
 out:
 	for_each_power_domain(power_domain, power_domain_mask)
 		intel_display_power_put(dev_priv,
@@ -10504,8 +11081,8 @@ out:
 static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(plane_state->base.plane->dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+		to_i915(plane_state->uapi.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	const struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	u32 base;
 
@@ -10514,21 +11091,13 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
 	else
 		base = intel_plane_ggtt_offset(plane_state);
 
-	base += plane_state->color_plane[0].offset;
-
-	/* ILK+ do this automagically */
-	if (HAS_GMCH(dev_priv) &&
-	    plane_state->base.rotation & DRM_MODE_ROTATE_180)
-		base += (plane_state->base.crtc_h *
-			 plane_state->base.crtc_w - 1) * fb->format->cpp[0];
-
-	return base;
+	return base + plane_state->color_plane[0].offset;
 }
 
 static u32 intel_cursor_position(const struct intel_plane_state *plane_state)
 {
-	int x = plane_state->base.crtc_x;
-	int y = plane_state->base.crtc_y;
+	int x = plane_state->uapi.dst.x1;
+	int y = plane_state->uapi.dst.y1;
 	u32 pos = 0;
 
 	if (x < 0) {
@@ -10549,9 +11118,9 @@ static u32 intel_cursor_position(const struct intel_plane_state *plane_state)
 static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state)
 {
 	const struct drm_mode_config *config =
-		&plane_state->base.plane->dev->mode_config;
-	int width = plane_state->base.crtc_w;
-	int height = plane_state->base.crtc_h;
+		&plane_state->uapi.plane->dev->mode_config;
+	int width = drm_rect_width(&plane_state->uapi.dst);
+	int height = drm_rect_height(&plane_state->uapi.dst);
 
 	return width > 0 && width <= config->cursor_width &&
 		height > 0 && height <= config->cursor_height;
@@ -10559,6 +11128,9 @@ static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state)
 
 static int intel_cursor_check_surface(struct intel_plane_state *plane_state)
 {
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->uapi.plane->dev);
+	unsigned int rotation = plane_state->hw.rotation;
 	int src_x, src_y;
 	u32 offset;
 	int ret;
@@ -10567,11 +11139,11 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state)
 	if (ret)
 		return ret;
 
-	if (!plane_state->base.visible)
+	if (!plane_state->uapi.visible)
 		return 0;
 
-	src_x = plane_state->base.src_x >> 16;
-	src_y = plane_state->base.src_y >> 16;
+	src_x = plane_state->uapi.src.x1 >> 16;
+	src_y = plane_state->uapi.src.y1 >> 16;
 
 	intel_add_fb_offsets(&src_x, &src_y, plane_state, 0);
 	offset = intel_plane_compute_aligned_offset(&src_x, &src_y,
@@ -10582,7 +11154,25 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state)
 		return -EINVAL;
 	}
 
+	/*
+	 * Put the final coordinates back so that the src
+	 * coordinate checks will see the right values.
+	 */
+	drm_rect_translate_to(&plane_state->uapi.src,
+			      src_x << 16, src_y << 16);
+
+	/* ILK+ do this automagically in hardware */
+	if (HAS_GMCH(dev_priv) && rotation & DRM_MODE_ROTATE_180) {
+		const struct drm_framebuffer *fb = plane_state->hw.fb;
+		int src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+		int src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
+
+		offset += (src_h * src_w - 1) * fb->format->cpp[0];
+	}
+
 	plane_state->color_plane[0].offset = offset;
+	plane_state->color_plane[0].x = src_x;
+	plane_state->color_plane[0].y = src_y;
 
 	return 0;
 }
@@ -10590,7 +11180,7 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state)
 static int intel_check_cursor(struct intel_crtc_state *crtc_state,
 			      struct intel_plane_state *plane_state)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	int ret;
 
 	if (fb && fb->modifier != DRM_FORMAT_MOD_LINEAR) {
@@ -10598,19 +11188,23 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state,
 		return -EINVAL;
 	}
 
-	ret = drm_atomic_helper_check_plane_state(&plane_state->base,
-						  &crtc_state->base,
+	ret = drm_atomic_helper_check_plane_state(&plane_state->uapi,
+						  &crtc_state->uapi,
 						  DRM_PLANE_HELPER_NO_SCALING,
 						  DRM_PLANE_HELPER_NO_SCALING,
 						  true, true);
 	if (ret)
 		return ret;
 
+	/* Use the unclipped src/dst rectangles, which we program to hw */
+	plane_state->uapi.src = drm_plane_state_src(&plane_state->uapi);
+	plane_state->uapi.dst = drm_plane_state_dest(&plane_state->uapi);
+
 	ret = intel_cursor_check_surface(plane_state);
 	if (ret)
 		return ret;
 
-	if (!plane_state->base.visible)
+	if (!plane_state->uapi.visible)
 		return 0;
 
 	ret = intel_plane_check_src_coordinates(plane_state);
@@ -10648,7 +11242,7 @@ static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state,
 
 static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state)
 {
-	int width = plane_state->base.crtc_w;
+	int width = drm_rect_width(&plane_state->uapi.dst);
 
 	/*
 	 * 845g/865g are only limited by the width of their cursors,
@@ -10660,7 +11254,7 @@ static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state)
 static int i845_check_cursor(struct intel_crtc_state *crtc_state,
 			     struct intel_plane_state *plane_state)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	int ret;
 
 	ret = intel_check_cursor(crtc_state, plane_state);
@@ -10674,12 +11268,12 @@ static int i845_check_cursor(struct intel_crtc_state *crtc_state,
 	/* Check for which cursor types we support */
 	if (!i845_cursor_size_ok(plane_state)) {
 		DRM_DEBUG("Cursor dimension %dx%d not supported\n",
-			  plane_state->base.crtc_w,
-			  plane_state->base.crtc_h);
+			  drm_rect_width(&plane_state->uapi.dst),
+			  drm_rect_height(&plane_state->uapi.dst));
 		return -EINVAL;
 	}
 
-	WARN_ON(plane_state->base.visible &&
+	WARN_ON(plane_state->uapi.visible &&
 		plane_state->color_plane[0].stride != fb->pitches[0]);
 
 	switch (fb->pitches[0]) {
@@ -10707,9 +11301,9 @@ static void i845_update_cursor(struct intel_plane *plane,
 	u32 cntl = 0, base = 0, pos = 0, size = 0;
 	unsigned long irqflags;
 
-	if (plane_state && plane_state->base.visible) {
-		unsigned int width = plane_state->base.crtc_w;
-		unsigned int height = plane_state->base.crtc_h;
+	if (plane_state && plane_state->uapi.visible) {
+		unsigned int width = drm_rect_width(&plane_state->uapi.dst);
+		unsigned int height = drm_rect_height(&plane_state->uapi.dst);
 
 		cntl = plane_state->ctl |
 			i845_cursor_ctl_crtc(crtc_state);
@@ -10782,7 +11376,7 @@ i9xx_cursor_max_stride(struct intel_plane *plane,
 
 static u32 i9xx_cursor_ctl_crtc(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	u32 cntl = 0;
 
@@ -10805,13 +11399,13 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
 			   const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(plane_state->base.plane->dev);
+		to_i915(plane_state->uapi.plane->dev);
 	u32 cntl = 0;
 
 	if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv))
 		cntl |= MCURSOR_TRICKLE_FEED_DISABLE;
 
-	switch (plane_state->base.crtc_w) {
+	switch (drm_rect_width(&plane_state->uapi.dst)) {
 	case 64:
 		cntl |= MCURSOR_MODE_64_ARGB_AX;
 		break;
@@ -10822,11 +11416,11 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
 		cntl |= MCURSOR_MODE_256_ARGB_AX;
 		break;
 	default:
-		MISSING_CASE(plane_state->base.crtc_w);
+		MISSING_CASE(drm_rect_width(&plane_state->uapi.dst));
 		return 0;
 	}
 
-	if (plane_state->base.rotation & DRM_MODE_ROTATE_180)
+	if (plane_state->hw.rotation & DRM_MODE_ROTATE_180)
 		cntl |= MCURSOR_ROTATE_180;
 
 	return cntl;
@@ -10835,9 +11429,9 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
 static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(plane_state->base.plane->dev);
-	int width = plane_state->base.crtc_w;
-	int height = plane_state->base.crtc_h;
+		to_i915(plane_state->uapi.plane->dev);
+	int width = drm_rect_width(&plane_state->uapi.dst);
+	int height = drm_rect_height(&plane_state->uapi.dst);
 
 	if (!intel_cursor_size_ok(plane_state))
 		return false;
@@ -10859,7 +11453,7 @@ static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state)
 	 * cursors.
 	 */
 	if (HAS_CUR_FBC(dev_priv) &&
-	    plane_state->base.rotation & DRM_MODE_ROTATE_0) {
+	    plane_state->hw.rotation & DRM_MODE_ROTATE_0) {
 		if (height < 8 || height > width)
 			return false;
 	} else {
@@ -10873,9 +11467,9 @@ static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state)
 static int i9xx_check_cursor(struct intel_crtc_state *crtc_state,
 			     struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	enum pipe pipe = plane->pipe;
 	int ret;
 
@@ -10890,17 +11484,19 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state,
 	/* Check for which cursor types we support */
 	if (!i9xx_cursor_size_ok(plane_state)) {
 		DRM_DEBUG("Cursor dimension %dx%d not supported\n",
-			  plane_state->base.crtc_w,
-			  plane_state->base.crtc_h);
+			  drm_rect_width(&plane_state->uapi.dst),
+			  drm_rect_height(&plane_state->uapi.dst));
 		return -EINVAL;
 	}
 
-	WARN_ON(plane_state->base.visible &&
+	WARN_ON(plane_state->uapi.visible &&
 		plane_state->color_plane[0].stride != fb->pitches[0]);
 
-	if (fb->pitches[0] != plane_state->base.crtc_w * fb->format->cpp[0]) {
+	if (fb->pitches[0] !=
+	    drm_rect_width(&plane_state->uapi.dst) * fb->format->cpp[0]) {
 		DRM_DEBUG_KMS("Invalid cursor stride (%u) (cursor width %d)\n",
-			      fb->pitches[0], plane_state->base.crtc_w);
+			      fb->pitches[0],
+			      drm_rect_width(&plane_state->uapi.dst));
 		return -EINVAL;
 	}
 
@@ -10915,7 +11511,7 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state,
 	 * Refuse the put the cursor into that compromised position.
 	 */
 	if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C &&
-	    plane_state->base.visible && plane_state->base.crtc_x < 0) {
+	    plane_state->uapi.visible && plane_state->uapi.dst.x1 < 0) {
 		DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
 		return -EINVAL;
 	}
@@ -10934,12 +11530,15 @@ static void i9xx_update_cursor(struct intel_plane *plane,
 	u32 cntl = 0, base = 0, pos = 0, fbc_ctl = 0;
 	unsigned long irqflags;
 
-	if (plane_state && plane_state->base.visible) {
+	if (plane_state && plane_state->uapi.visible) {
+		unsigned width = drm_rect_width(&plane_state->uapi.dst);
+		unsigned height = drm_rect_height(&plane_state->uapi.dst);
+
 		cntl = plane_state->ctl |
 			i9xx_cursor_ctl_crtc(crtc_state);
 
-		if (plane_state->base.crtc_h != plane_state->base.crtc_w)
-			fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1);
+		if (width != height)
+			fbc_ctl = CUR_FBC_CTL_EN | (height - 1);
 
 		base = intel_cursor_base(plane_state);
 		pos = intel_cursor_position(plane_state);
@@ -11084,13 +11683,12 @@ static int intel_modeset_disable_planes(struct drm_atomic_state *state,
 }
 
 int intel_get_load_detect_pipe(struct drm_connector *connector,
-			       const struct drm_display_mode *mode,
 			       struct intel_load_detect_pipe *old,
 			       struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_crtc *intel_crtc;
 	struct intel_encoder *intel_encoder =
-		intel_attached_encoder(connector);
+		intel_attached_encoder(to_intel_connector(connector));
 	struct drm_crtc *possible_crtc;
 	struct drm_encoder *encoder = &intel_encoder->base;
 	struct drm_crtc *crtc = NULL;
@@ -11189,12 +11787,10 @@ found:
 		goto fail;
 	}
 
-	crtc_state->base.active = crtc_state->base.enable = true;
-
-	if (!mode)
-		mode = &load_detect_mode;
+	crtc_state->uapi.active = true;
 
-	ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode);
+	ret = drm_atomic_set_mode_for_crtc(&crtc_state->uapi,
+					   &load_detect_mode);
 	if (ret)
 		goto fail;
 
@@ -11246,7 +11842,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 				    struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_encoder *intel_encoder =
-		intel_attached_encoder(connector);
+		intel_attached_encoder(to_intel_connector(connector));
 	struct drm_encoder *encoder = &intel_encoder->base;
 	struct drm_atomic_state *state = old->restore_state;
 	int ret;
@@ -11286,7 +11882,7 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int pipe = pipe_config->cpu_transcoder;
+	enum pipe pipe = crtc->pipe;
 	u32 dpll = pipe_config->dpll_hw_state.dpll;
 	u32 fp;
 	struct dpll clock;
@@ -11389,8 +11985,8 @@ int intel_dotclock_calculate(int link_freq,
 	return div_u64(mul_u32_u32(m_n->link_m, link_freq), m_n->link_n);
 }
 
-static void ironlake_pch_clock_get(struct intel_crtc *crtc,
-				   struct intel_crtc_state *pipe_config)
+static void ilk_pch_clock_get(struct intel_crtc *crtc,
+			      struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
@@ -11402,11 +11998,38 @@ static void ironlake_pch_clock_get(struct intel_crtc *crtc,
 	 * we may need some idea for the dotclock anyway.
 	 * Calculate one based on the FDI configuration.
 	 */
-	pipe_config->base.adjusted_mode.crtc_clock =
+	pipe_config->hw.adjusted_mode.crtc_clock =
 		intel_dotclock_calculate(intel_fdi_link_freq(dev_priv, pipe_config),
 					 &pipe_config->fdi_m_n);
 }
 
+static void intel_crtc_state_reset(struct intel_crtc_state *crtc_state,
+				   struct intel_crtc *crtc)
+{
+	memset(crtc_state, 0, sizeof(*crtc_state));
+
+	__drm_atomic_helper_crtc_state_reset(&crtc_state->uapi, &crtc->base);
+
+	crtc_state->cpu_transcoder = INVALID_TRANSCODER;
+	crtc_state->master_transcoder = INVALID_TRANSCODER;
+	crtc_state->hsw_workaround_pipe = INVALID_PIPE;
+	crtc_state->output_format = INTEL_OUTPUT_FORMAT_INVALID;
+	crtc_state->scaler_state.scaler_id = -1;
+	crtc_state->mst_master_transcoder = INVALID_TRANSCODER;
+}
+
+static struct intel_crtc_state *intel_crtc_state_alloc(struct intel_crtc *crtc)
+{
+	struct intel_crtc_state *crtc_state;
+
+	crtc_state = kmalloc(sizeof(*crtc_state), GFP_KERNEL);
+
+	if (crtc_state)
+		intel_crtc_state_reset(crtc_state, crtc);
+
+	return crtc_state;
+}
+
 /* Returns the currently programmed mode of the given encoder. */
 struct drm_display_mode *
 intel_encoder_current_mode(struct intel_encoder *encoder)
@@ -11426,14 +12049,12 @@ intel_encoder_current_mode(struct intel_encoder *encoder)
 	if (!mode)
 		return NULL;
 
-	crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL);
+	crtc_state = intel_crtc_state_alloc(crtc);
 	if (!crtc_state) {
 		kfree(mode);
 		return NULL;
 	}
 
-	crtc_state->base.crtc = &crtc->base;
-
 	if (!dev_priv->display.get_pipe_config(crtc, crtc_state)) {
 		kfree(crtc_state);
 		kfree(mode);
@@ -11471,18 +12092,18 @@ static bool intel_wm_need_update(const struct intel_plane_state *cur,
 				 struct intel_plane_state *new)
 {
 	/* Update watermarks on tiling or size changes. */
-	if (new->base.visible != cur->base.visible)
+	if (new->uapi.visible != cur->uapi.visible)
 		return true;
 
-	if (!cur->base.fb || !new->base.fb)
+	if (!cur->hw.fb || !new->hw.fb)
 		return false;
 
-	if (cur->base.fb->modifier != new->base.fb->modifier ||
-	    cur->base.rotation != new->base.rotation ||
-	    drm_rect_width(&new->base.src) != drm_rect_width(&cur->base.src) ||
-	    drm_rect_height(&new->base.src) != drm_rect_height(&cur->base.src) ||
-	    drm_rect_width(&new->base.dst) != drm_rect_width(&cur->base.dst) ||
-	    drm_rect_height(&new->base.dst) != drm_rect_height(&cur->base.dst))
+	if (cur->hw.fb->modifier != new->hw.fb->modifier ||
+	    cur->hw.rotation != new->hw.rotation ||
+	    drm_rect_width(&new->uapi.src) != drm_rect_width(&cur->uapi.src) ||
+	    drm_rect_height(&new->uapi.src) != drm_rect_height(&cur->uapi.src) ||
+	    drm_rect_width(&new->uapi.dst) != drm_rect_width(&cur->uapi.dst) ||
+	    drm_rect_height(&new->uapi.dst) != drm_rect_height(&cur->uapi.dst))
 		return true;
 
 	return false;
@@ -11490,10 +12111,10 @@ static bool intel_wm_need_update(const struct intel_plane_state *cur,
 
 static bool needs_scaling(const struct intel_plane_state *state)
 {
-	int src_w = drm_rect_width(&state->base.src) >> 16;
-	int src_h = drm_rect_height(&state->base.src) >> 16;
-	int dst_w = drm_rect_width(&state->base.dst);
-	int dst_h = drm_rect_height(&state->base.dst);
+	int src_w = drm_rect_width(&state->uapi.src) >> 16;
+	int src_h = drm_rect_height(&state->uapi.src) >> 16;
+	int dst_w = drm_rect_width(&state->uapi.dst);
+	int dst_h = drm_rect_height(&state->uapi.dst);
 
 	return (src_w != dst_w || src_h != dst_h);
 }
@@ -11503,14 +12124,13 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
 				    const struct intel_plane_state *old_plane_state,
 				    struct intel_plane_state *plane_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	bool mode_changed = needs_modeset(crtc_state);
-	bool was_crtc_enabled = old_crtc_state->base.active;
-	bool is_crtc_enabled = crtc_state->base.active;
+	bool was_crtc_enabled = old_crtc_state->hw.active;
+	bool is_crtc_enabled = crtc_state->hw.active;
 	bool turn_off, turn_on, visible, was_visible;
-	struct drm_framebuffer *fb = plane_state->base.fb;
 	int ret;
 
 	if (INTEL_GEN(dev_priv) >= 9 && plane->id != PLANE_CURSOR) {
@@ -11519,8 +12139,8 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
 			return ret;
 	}
 
-	was_visible = old_plane_state->base.visible;
-	visible = plane_state->base.visible;
+	was_visible = old_plane_state->uapi.visible;
+	visible = plane_state->uapi.visible;
 
 	if (!was_crtc_enabled && WARN_ON(was_visible))
 		was_visible = false;
@@ -11536,27 +12156,21 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
 	 * only combine the results from all planes in the current place?
 	 */
 	if (!is_crtc_enabled) {
-		plane_state->base.visible = visible = false;
+		plane_state->uapi.visible = visible = false;
 		crtc_state->active_planes &= ~BIT(plane->id);
 		crtc_state->data_rate[plane->id] = 0;
+		crtc_state->min_cdclk[plane->id] = 0;
 	}
 
 	if (!was_visible && !visible)
 		return 0;
 
-	if (fb != old_plane_state->base.fb)
-		crtc_state->fb_changed = true;
-
 	turn_off = was_visible && (!visible || mode_changed);
 	turn_on = visible && (!was_visible || mode_changed);
 
-	DRM_DEBUG_ATOMIC("[CRTC:%d:%s] has [PLANE:%d:%s] with fb %i\n",
+	DRM_DEBUG_ATOMIC("[CRTC:%d:%s] with [PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n",
 			 crtc->base.base.id, crtc->base.name,
 			 plane->base.base.id, plane->base.name,
-			 fb ? fb->base.id : -1);
-
-	DRM_DEBUG_ATOMIC("[PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n",
-			 plane->base.base.id, plane->base.name,
 			 was_visible, visible,
 			 turn_off, turn_on, mode_changed);
 
@@ -11665,7 +12279,7 @@ static int icl_add_linked_planes(struct intel_atomic_state *state)
 	int i;
 
 	for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
-		linked = plane_state->linked_plane;
+		linked = plane_state->planar_linked_plane;
 
 		if (!linked)
 			continue;
@@ -11674,8 +12288,8 @@ static int icl_add_linked_planes(struct intel_atomic_state *state)
 		if (IS_ERR(linked_plane_state))
 			return PTR_ERR(linked_plane_state);
 
-		WARN_ON(linked_plane_state->linked_plane != plane);
-		WARN_ON(linked_plane_state->slave == plane_state->slave);
+		WARN_ON(linked_plane_state->planar_linked_plane != plane);
+		WARN_ON(linked_plane_state->planar_slave == plane_state->planar_slave);
 	}
 
 	return 0;
@@ -11683,9 +12297,9 @@ static int icl_add_linked_planes(struct intel_atomic_state *state)
 
 static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->base.state);
+	struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state);
 	struct intel_plane *plane, *linked;
 	struct intel_plane_state *plane_state;
 	int i;
@@ -11698,16 +12312,16 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state)
 	 * in the crtc_state->active_planes mask.
 	 */
 	for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
-		if (plane->pipe != crtc->pipe || !plane_state->linked_plane)
+		if (plane->pipe != crtc->pipe || !plane_state->planar_linked_plane)
 			continue;
 
-		plane_state->linked_plane = NULL;
-		if (plane_state->slave && !plane_state->base.visible) {
+		plane_state->planar_linked_plane = NULL;
+		if (plane_state->planar_slave && !plane_state->uapi.visible) {
 			crtc_state->active_planes &= ~BIT(plane->id);
 			crtc_state->update_planes |= BIT(plane->id);
 		}
 
-		plane_state->slave = false;
+		plane_state->planar_slave = false;
 	}
 
 	if (!crtc_state->nv12_planes)
@@ -11741,13 +12355,32 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state)
 			return -EINVAL;
 		}
 
-		plane_state->linked_plane = linked;
+		plane_state->planar_linked_plane = linked;
 
-		linked_state->slave = true;
-		linked_state->linked_plane = plane;
+		linked_state->planar_slave = true;
+		linked_state->planar_linked_plane = plane;
 		crtc_state->active_planes |= BIT(linked->id);
 		crtc_state->update_planes |= BIT(linked->id);
 		DRM_DEBUG_KMS("Using %s as Y plane for %s\n", linked->base.name, plane->base.name);
+
+		/* Copy parameters to slave plane */
+		linked_state->ctl = plane_state->ctl | PLANE_CTL_YUV420_Y_PLANE;
+		linked_state->color_ctl = plane_state->color_ctl;
+		memcpy(linked_state->color_plane, plane_state->color_plane,
+		       sizeof(linked_state->color_plane));
+
+		intel_plane_copy_uapi_to_hw_state(linked_state, plane_state);
+		linked_state->uapi.src = plane_state->uapi.src;
+		linked_state->uapi.dst = plane_state->uapi.dst;
+
+		if (icl_is_hdr_plane(dev_priv, plane->id)) {
+			if (linked->id == PLANE_SPRITE5)
+				plane_state->cus_ctl |= PLANE_CUS_PLANE_7;
+			else if (linked->id == PLANE_SPRITE4)
+				plane_state->cus_ctl |= PLANE_CUS_PLANE_6;
+			else
+				MISSING_CASE(linked->id);
+		}
 	}
 
 	return 0;
@@ -11755,34 +12388,150 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state)
 
 static bool c8_planes_changed(const struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct intel_atomic_state *state =
-		to_intel_atomic_state(new_crtc_state->base.state);
+		to_intel_atomic_state(new_crtc_state->uapi.state);
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(state, crtc);
 
 	return !old_crtc_state->c8_planes != !new_crtc_state->c8_planes;
 }
 
-static int intel_crtc_atomic_check(struct drm_crtc *crtc,
-				   struct drm_crtc_state *crtc_state)
+static bool
+intel_atomic_is_master_connector(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_crtc_state *pipe_config =
-		to_intel_crtc_state(crtc_state);
+	struct drm_crtc *crtc = crtc_state->uapi.crtc;
+	struct drm_atomic_state *state = crtc_state->uapi.state;
+	struct drm_connector *connector;
+	struct drm_connector_state *connector_state;
+	int i;
+
+	for_each_new_connector_in_state(state, connector, connector_state, i) {
+		if (connector_state->crtc != crtc)
+			continue;
+		if (connector->has_tile &&
+		    connector->tile_h_loc == connector->num_h_tile - 1 &&
+		    connector->tile_v_loc == connector->num_v_tile - 1)
+			return true;
+	}
+
+	return false;
+}
+
+static void reset_port_sync_mode_state(struct intel_crtc_state *crtc_state)
+{
+	crtc_state->master_transcoder = INVALID_TRANSCODER;
+	crtc_state->sync_mode_slaves_mask = 0;
+}
+
+static int icl_compute_port_sync_crtc_state(struct drm_connector *connector,
+					    struct intel_crtc_state *crtc_state,
+					    int num_tiled_conns)
+{
+	struct drm_crtc *crtc = crtc_state->uapi.crtc;
+	struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	struct drm_connector *master_connector;
+	struct drm_connector_list_iter conn_iter;
+	struct drm_crtc *master_crtc = NULL;
+	struct drm_crtc_state *master_crtc_state;
+	struct intel_crtc_state *master_pipe_config;
+
+	if (INTEL_GEN(dev_priv) < 11)
+		return 0;
+
+	if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP))
+		return 0;
+
+	/*
+	 * In case of tiled displays there could be one or more slaves but there is
+	 * only one master. Lets make the CRTC used by the connector corresponding
+	 * to the last horizonal and last vertical tile a master/genlock CRTC.
+	 * All the other CRTCs corresponding to other tiles of the same Tile group
+	 * are the slave CRTCs and hold a pointer to their genlock CRTC.
+	 * If all tiles not present do not make master slave assignments.
+	 */
+	if (!connector->has_tile ||
+	    crtc_state->hw.mode.hdisplay != connector->tile_h_size ||
+	    crtc_state->hw.mode.vdisplay != connector->tile_v_size ||
+	    num_tiled_conns < connector->num_h_tile * connector->num_v_tile) {
+		reset_port_sync_mode_state(crtc_state);
+		return 0;
+	}
+	/* Last Horizontal and last vertical tile connector is a master
+	 * Master's crtc state is already populated in slave for port sync
+	 */
+	if (connector->tile_h_loc == connector->num_h_tile - 1 &&
+	    connector->tile_v_loc == connector->num_v_tile - 1)
+		return 0;
+
+	/* Loop through all connectors and configure the Slave crtc_state
+	 * to point to the correct master.
+	 */
+	drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter);
+	drm_for_each_connector_iter(master_connector, &conn_iter) {
+		struct drm_connector_state *master_conn_state = NULL;
+
+		if (!(master_connector->has_tile &&
+		      master_connector->tile_group->id == connector->tile_group->id))
+			continue;
+		if (master_connector->tile_h_loc != master_connector->num_h_tile - 1 ||
+		    master_connector->tile_v_loc != master_connector->num_v_tile - 1)
+			continue;
+
+		master_conn_state = drm_atomic_get_connector_state(&state->base,
+								   master_connector);
+		if (IS_ERR(master_conn_state)) {
+			drm_connector_list_iter_end(&conn_iter);
+			return PTR_ERR(master_conn_state);
+		}
+		if (master_conn_state->crtc) {
+			master_crtc = master_conn_state->crtc;
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&conn_iter);
+
+	if (!master_crtc) {
+		DRM_DEBUG_KMS("Could not find Master CRTC for Slave CRTC %d\n",
+			      crtc->base.id);
+		return -EINVAL;
+	}
+
+	master_crtc_state = drm_atomic_get_crtc_state(&state->base,
+						      master_crtc);
+	if (IS_ERR(master_crtc_state))
+		return PTR_ERR(master_crtc_state);
+
+	master_pipe_config = to_intel_crtc_state(master_crtc_state);
+	crtc_state->master_transcoder = master_pipe_config->cpu_transcoder;
+	master_pipe_config->sync_mode_slaves_mask |=
+		BIT(crtc_state->cpu_transcoder);
+	DRM_DEBUG_KMS("Master Transcoder = %s added for Slave CRTC = %d, slave transcoder bitmask = %d\n",
+		      transcoder_name(crtc_state->master_transcoder),
+		      crtc->base.id,
+		      master_pipe_config->sync_mode_slaves_mask);
+
+	return 0;
+}
+
+static int intel_crtc_atomic_check(struct intel_atomic_state *state,
+				   struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	bool mode_changed = needs_modeset(crtc_state);
 	int ret;
-	bool mode_changed = needs_modeset(pipe_config);
 
 	if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv) &&
-	    mode_changed && !crtc_state->active)
-		pipe_config->update_wm_post = true;
+	    mode_changed && !crtc_state->hw.active)
+		crtc_state->update_wm_post = true;
 
-	if (mode_changed && crtc_state->enable &&
+	if (mode_changed && crtc_state->hw.enable &&
 	    dev_priv->display.crtc_compute_clock &&
-	    !WARN_ON(pipe_config->shared_dpll)) {
-		ret = dev_priv->display.crtc_compute_clock(intel_crtc,
-							   pipe_config);
+	    !WARN_ON(crtc_state->shared_dpll)) {
+		ret = dev_priv->display.crtc_compute_clock(crtc, crtc_state);
 		if (ret)
 			return ret;
 	}
@@ -11791,19 +12540,19 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 	 * May need to update pipe gamma enable bits
 	 * when C8 planes are getting enabled/disabled.
 	 */
-	if (c8_planes_changed(pipe_config))
-		crtc_state->color_mgmt_changed = true;
+	if (c8_planes_changed(crtc_state))
+		crtc_state->uapi.color_mgmt_changed = true;
 
-	if (mode_changed || pipe_config->update_pipe ||
-	    crtc_state->color_mgmt_changed) {
-		ret = intel_color_check(pipe_config);
+	if (mode_changed || crtc_state->update_pipe ||
+	    crtc_state->uapi.color_mgmt_changed) {
+		ret = intel_color_check(crtc_state);
 		if (ret)
 			return ret;
 	}
 
 	ret = 0;
 	if (dev_priv->display.compute_pipe_wm) {
-		ret = dev_priv->display.compute_pipe_wm(pipe_config);
+		ret = dev_priv->display.compute_pipe_wm(crtc_state);
 		if (ret) {
 			DRM_DEBUG_KMS("Target pipe watermarks are invalid\n");
 			return ret;
@@ -11819,7 +12568,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 		 * old state and the new state.  We can program these
 		 * immediately.
 		 */
-		ret = dev_priv->display.compute_intermediate_wm(pipe_config);
+		ret = dev_priv->display.compute_intermediate_wm(crtc_state);
 		if (ret) {
 			DRM_DEBUG_KMS("No valid intermediate pipe watermarks are possible\n");
 			return ret;
@@ -11827,29 +12576,19 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 	}
 
 	if (INTEL_GEN(dev_priv) >= 9) {
-		if (mode_changed || pipe_config->update_pipe)
-			ret = skl_update_scaler_crtc(pipe_config);
-
+		if (mode_changed || crtc_state->update_pipe)
+			ret = skl_update_scaler_crtc(crtc_state);
 		if (!ret)
-			ret = icl_check_nv12_planes(pipe_config);
-		if (!ret)
-			ret = skl_check_pipe_max_pixel_rate(intel_crtc,
-							    pipe_config);
-		if (!ret)
-			ret = intel_atomic_setup_scalers(dev_priv, intel_crtc,
-							 pipe_config);
+			ret = intel_atomic_setup_scalers(dev_priv, crtc,
+							 crtc_state);
 	}
 
 	if (HAS_IPS(dev_priv))
-		pipe_config->ips_enabled = hsw_compute_ips_config(pipe_config);
+		crtc_state->ips_enabled = hsw_compute_ips_config(crtc_state);
 
 	return ret;
 }
 
-static const struct drm_crtc_helper_funcs intel_helper_funcs = {
-	.atomic_check = intel_crtc_atomic_check,
-};
-
 static void intel_modeset_update_connector_atomic_state(struct drm_device *dev)
 {
 	struct intel_connector *connector;
@@ -11918,7 +12657,7 @@ compute_baseline_pipe_bpp(struct intel_crtc *crtc,
 			  struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	struct drm_atomic_state *state = pipe_config->base.state;
+	struct drm_atomic_state *state = pipe_config->uapi.state;
 	struct drm_connector *connector;
 	struct drm_connector_state *connector_state;
 	int bpp, i;
@@ -11975,7 +12714,7 @@ static void
 intel_dump_infoframe(struct drm_i915_private *dev_priv,
 		     const union hdmi_infoframe *frame)
 {
-	if ((drm_debug & DRM_UT_KMS) == 0)
+	if (!drm_debug_enabled(DRM_UT_KMS))
 		return;
 
 	hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, frame);
@@ -12043,14 +12782,14 @@ static const char *output_formats(enum intel_output_format format)
 
 static void intel_dump_plane_state(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	struct drm_format_name_buf format_name;
 
 	if (!fb) {
 		DRM_DEBUG_KMS("[PLANE:%d:%s] fb: [NOFB], visible: %s\n",
 			      plane->base.base.id, plane->base.name,
-			      yesno(plane_state->base.visible));
+			      yesno(plane_state->uapi.visible));
 		return;
 	}
 
@@ -12058,20 +12797,20 @@ static void intel_dump_plane_state(const struct intel_plane_state *plane_state)
 		      plane->base.base.id, plane->base.name,
 		      fb->base.id, fb->width, fb->height,
 		      drm_get_format_name(fb->format->format, &format_name),
-		      yesno(plane_state->base.visible));
+		      yesno(plane_state->uapi.visible));
 	DRM_DEBUG_KMS("\trotation: 0x%x, scaler: %d\n",
-		      plane_state->base.rotation, plane_state->scaler_id);
-	if (plane_state->base.visible)
+		      plane_state->hw.rotation, plane_state->scaler_id);
+	if (plane_state->uapi.visible)
 		DRM_DEBUG_KMS("\tsrc: " DRM_RECT_FP_FMT " dst: " DRM_RECT_FMT "\n",
-			      DRM_RECT_FP_ARG(&plane_state->base.src),
-			      DRM_RECT_ARG(&plane_state->base.dst));
+			      DRM_RECT_FP_ARG(&plane_state->uapi.src),
+			      DRM_RECT_ARG(&plane_state->uapi.dst));
 }
 
 static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config,
 				   struct intel_atomic_state *state,
 				   const char *context)
 {
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	const struct intel_plane_state *plane_state;
 	struct intel_plane *plane;
@@ -12080,14 +12819,14 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config,
 
 	DRM_DEBUG_KMS("[CRTC:%d:%s] enable: %s %s\n",
 		      crtc->base.base.id, crtc->base.name,
-		      yesno(pipe_config->base.enable), context);
+		      yesno(pipe_config->hw.enable), context);
 
-	if (!pipe_config->base.enable)
+	if (!pipe_config->hw.enable)
 		goto dump_planes;
 
 	snprintf_output_types(buf, sizeof(buf), pipe_config->output_types);
 	DRM_DEBUG_KMS("active: %s, output_types: %s (0x%x), output format: %s\n",
-		      yesno(pipe_config->base.active),
+		      yesno(pipe_config->hw.active),
 		      buf, pipe_config->output_types,
 		      output_formats(pipe_config->output_format));
 
@@ -12127,10 +12866,10 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config,
 		intel_dump_infoframe(dev_priv, &pipe_config->infoframes.hdmi);
 
 	DRM_DEBUG_KMS("requested mode:\n");
-	drm_mode_debug_printmodeline(&pipe_config->base.mode);
+	drm_mode_debug_printmodeline(&pipe_config->hw.mode);
 	DRM_DEBUG_KMS("adjusted mode:\n");
-	drm_mode_debug_printmodeline(&pipe_config->base.adjusted_mode);
-	intel_dump_crtc_timings(&pipe_config->base.adjusted_mode);
+	drm_mode_debug_printmodeline(&pipe_config->hw.adjusted_mode);
+	intel_dump_crtc_timings(&pipe_config->hw.adjusted_mode);
 	DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d, pixel rate %d\n",
 		      pipe_config->port_clock,
 		      pipe_config->pipe_src_w, pipe_config->pipe_src_h,
@@ -12159,6 +12898,18 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config,
 
 	intel_dpll_dump_hw_state(dev_priv, &pipe_config->dpll_hw_state);
 
+	if (IS_CHERRYVIEW(dev_priv))
+		DRM_DEBUG_KMS("cgm_mode: 0x%x gamma_mode: 0x%x gamma_enable: %d csc_enable: %d\n",
+			      pipe_config->cgm_mode, pipe_config->gamma_mode,
+			      pipe_config->gamma_enable, pipe_config->csc_enable);
+	else
+		DRM_DEBUG_KMS("csc_mode: 0x%x gamma_mode: 0x%x gamma_enable: %d csc_enable: %d\n",
+			      pipe_config->csc_mode, pipe_config->gamma_mode,
+			      pipe_config->gamma_enable, pipe_config->csc_enable);
+
+	DRM_DEBUG_KMS("MST master transcoder: %s\n",
+		      transcoder_name(pipe_config->mst_master_transcoder));
+
 dump_planes:
 	if (!state)
 		return;
@@ -12179,6 +12930,12 @@ static bool check_digital_port_conflicts(struct intel_atomic_state *state)
 	bool ret = true;
 
 	/*
+	 * We're going to peek into connector->state,
+	 * hence connection_mutex must be held.
+	 */
+	drm_modeset_lock_assert_held(&dev->mode_config.connection_mutex);
+
+	/*
 	 * Walk the connector list instead of the encoder
 	 * list to detect the problem on ddi platforms
 	 * where there's just one encoder per digital port.
@@ -12235,22 +12992,59 @@ static bool check_digital_port_conflicts(struct intel_atomic_state *state)
 	return ret;
 }
 
+static void
+intel_crtc_copy_uapi_to_hw_state_nomodeset(struct intel_crtc_state *crtc_state)
+{
+	intel_crtc_copy_color_blobs(crtc_state);
+}
+
+static void
+intel_crtc_copy_uapi_to_hw_state(struct intel_crtc_state *crtc_state)
+{
+	crtc_state->hw.enable = crtc_state->uapi.enable;
+	crtc_state->hw.active = crtc_state->uapi.active;
+	crtc_state->hw.mode = crtc_state->uapi.mode;
+	crtc_state->hw.adjusted_mode = crtc_state->uapi.adjusted_mode;
+	intel_crtc_copy_uapi_to_hw_state_nomodeset(crtc_state);
+}
+
+static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state)
+{
+	crtc_state->uapi.enable = crtc_state->hw.enable;
+	crtc_state->uapi.active = crtc_state->hw.active;
+	WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->uapi, &crtc_state->hw.mode) < 0);
+
+	crtc_state->uapi.adjusted_mode = crtc_state->hw.adjusted_mode;
+
+	/* copy color blobs to uapi */
+	drm_property_replace_blob(&crtc_state->uapi.degamma_lut,
+				  crtc_state->hw.degamma_lut);
+	drm_property_replace_blob(&crtc_state->uapi.gamma_lut,
+				  crtc_state->hw.gamma_lut);
+	drm_property_replace_blob(&crtc_state->uapi.ctm,
+				  crtc_state->hw.ctm);
+}
+
 static int
-clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
+intel_crtc_prepare_cleared_state(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv =
-		to_i915(crtc_state->base.crtc->dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_crtc_state *saved_state;
 
-	saved_state = kzalloc(sizeof(*saved_state), GFP_KERNEL);
+	saved_state = intel_crtc_state_alloc(crtc);
 	if (!saved_state)
 		return -ENOMEM;
 
+	/* free the old crtc_state->hw members */
+	intel_crtc_free_hw_state(crtc_state);
+
 	/* FIXME: before the switch to atomic started, a new pipe_config was
 	 * kzalloc'd. Code that depends on any field being zero should be
 	 * fixed, so that the crtc_state can be safely duplicated. For now,
 	 * only fields that are know to not cause problems are preserved. */
 
+	saved_state->uapi = crtc_state->uapi;
 	saved_state->scaler_state = crtc_state->scaler_state;
 	saved_state->shared_dpll = crtc_state->shared_dpll;
 	saved_state->dpll_hw_state = crtc_state->dpll_hw_state;
@@ -12260,32 +13054,36 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
 	if (IS_G4X(dev_priv) ||
 	    IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		saved_state->wm = crtc_state->wm;
+	/*
+	 * Save the slave bitmask which gets filled for master crtc state during
+	 * slave atomic check call. For all other CRTCs reset the port sync variables
+	 * crtc_state->master_transcoder needs to be set to INVALID
+	 */
+	reset_port_sync_mode_state(saved_state);
+	if (intel_atomic_is_master_connector(crtc_state))
+		saved_state->sync_mode_slaves_mask =
+			crtc_state->sync_mode_slaves_mask;
 
-	/* Keep base drm_crtc_state intact, only clear our extended struct */
-	BUILD_BUG_ON(offsetof(struct intel_crtc_state, base));
-	memcpy(&crtc_state->base + 1, &saved_state->base + 1,
-	       sizeof(*crtc_state) - sizeof(crtc_state->base));
-
+	memcpy(crtc_state, saved_state, sizeof(*crtc_state));
 	kfree(saved_state);
+
+	intel_crtc_copy_uapi_to_hw_state(crtc_state);
+
 	return 0;
 }
 
 static int
 intel_modeset_pipe_config(struct intel_crtc_state *pipe_config)
 {
-	struct drm_crtc *crtc = pipe_config->base.crtc;
-	struct drm_atomic_state *state = pipe_config->base.state;
+	struct drm_crtc *crtc = pipe_config->uapi.crtc;
+	struct drm_atomic_state *state = pipe_config->uapi.state;
 	struct intel_encoder *encoder;
 	struct drm_connector *connector;
 	struct drm_connector_state *connector_state;
 	int base_bpp, ret;
-	int i;
+	int i, tile_group_id = -1, num_tiled_conns = 0;
 	bool retry = true;
 
-	ret = clear_intel_crtc_state(pipe_config);
-	if (ret)
-		return ret;
-
 	pipe_config->cpu_transcoder =
 		(enum transcoder) to_intel_crtc(crtc)->pipe;
 
@@ -12294,13 +13092,13 @@ intel_modeset_pipe_config(struct intel_crtc_state *pipe_config)
 	 * positive or negative polarity is requested, treat this as meaning
 	 * negative polarity.
 	 */
-	if (!(pipe_config->base.adjusted_mode.flags &
+	if (!(pipe_config->hw.adjusted_mode.flags &
 	      (DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NHSYNC)))
-		pipe_config->base.adjusted_mode.flags |= DRM_MODE_FLAG_NHSYNC;
+		pipe_config->hw.adjusted_mode.flags |= DRM_MODE_FLAG_NHSYNC;
 
-	if (!(pipe_config->base.adjusted_mode.flags &
+	if (!(pipe_config->hw.adjusted_mode.flags &
 	      (DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_NVSYNC)))
-		pipe_config->base.adjusted_mode.flags |= DRM_MODE_FLAG_NVSYNC;
+		pipe_config->hw.adjusted_mode.flags |= DRM_MODE_FLAG_NVSYNC;
 
 	ret = compute_baseline_pipe_bpp(to_intel_crtc(crtc),
 					pipe_config);
@@ -12317,7 +13115,7 @@ intel_modeset_pipe_config(struct intel_crtc_state *pipe_config)
 	 * computation to clearly distinguish it from the adjusted mode, which
 	 * can be changed by the connectors in the below retry loop.
 	 */
-	drm_mode_get_hv_timing(&pipe_config->base.mode,
+	drm_mode_get_hv_timing(&pipe_config->hw.mode,
 			       &pipe_config->pipe_src_w,
 			       &pipe_config->pipe_src_h);
 
@@ -12350,9 +13148,27 @@ encoder_retry:
 	pipe_config->pixel_multiplier = 1;
 
 	/* Fill in default crtc timings, allow encoders to overwrite them. */
-	drm_mode_set_crtcinfo(&pipe_config->base.adjusted_mode,
+	drm_mode_set_crtcinfo(&pipe_config->hw.adjusted_mode,
 			      CRTC_STEREO_DOUBLE);
 
+	/* Get tile_group_id of tiled connector */
+	for_each_new_connector_in_state(state, connector, connector_state, i) {
+		if (connector_state->crtc == crtc &&
+		    connector->has_tile) {
+			tile_group_id = connector->tile_group->id;
+			break;
+		}
+	}
+
+	/* Get total number of tiled connectors in state that belong to
+	 * this tile group.
+	 */
+	for_each_new_connector_in_state(state, connector, connector_state, i) {
+		if (connector->has_tile &&
+		    connector->tile_group->id == tile_group_id)
+			num_tiled_conns++;
+	}
+
 	/* Pass our mode to the connectors and the CRTC to give them a chance to
 	 * adjust it according to limitations or connector properties, and also
 	 * a chance to reject the mode entirely.
@@ -12361,6 +13177,14 @@ encoder_retry:
 		if (connector_state->crtc != crtc)
 			continue;
 
+		ret = icl_compute_port_sync_crtc_state(connector, pipe_config,
+						       num_tiled_conns);
+		if (ret) {
+			DRM_DEBUG_KMS("Cannot assign Sync Mode CRTCs: %d\n",
+				      ret);
+			return ret;
+		}
+
 		encoder = to_intel_encoder(connector_state->best_encoder);
 		ret = encoder->compute_config(encoder, pipe_config,
 					      connector_state);
@@ -12375,7 +13199,7 @@ encoder_retry:
 	/* Set default port clock if not overwritten by the encoder. Needs to be
 	 * done afterwards in case the encoder adjusts the mode. */
 	if (!pipe_config->port_clock)
-		pipe_config->port_clock = pipe_config->base.adjusted_mode.crtc_clock
+		pipe_config->port_clock = pipe_config->hw.adjusted_mode.crtc_clock
 			* pipe_config->pixel_multiplier;
 
 	ret = intel_crtc_compute_config(to_intel_crtc(crtc), pipe_config);
@@ -12404,6 +13228,12 @@ encoder_retry:
 	DRM_DEBUG_KMS("hw max bpp: %i, pipe bpp: %i, dithering: %i\n",
 		      base_bpp, pipe_config->pipe_bpp, pipe_config->dither);
 
+	/*
+	 * Make drm_calc_timestamping_constants in
+	 * drm_atomic_helper_update_legacy_modeset_state() happy
+	 */
+	pipe_config->uapi.adjusted_mode = pipe_config->hw.adjusted_mode;
+
 	return 0;
 }
 
@@ -12482,25 +13312,26 @@ pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv,
 			       const union hdmi_infoframe *b)
 {
 	if (fastset) {
-		if ((drm_debug & DRM_UT_KMS) == 0)
+		if (!drm_debug_enabled(DRM_UT_KMS))
 			return;
 
-		drm_dbg(DRM_UT_KMS, "fastset mismatch in %s infoframe", name);
-		drm_dbg(DRM_UT_KMS, "expected:");
+		DRM_DEBUG_KMS("fastset mismatch in %s infoframe\n", name);
+		DRM_DEBUG_KMS("expected:\n");
 		hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, a);
-		drm_dbg(DRM_UT_KMS, "found");
+		DRM_DEBUG_KMS("found:\n");
 		hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, b);
 	} else {
-		drm_err("mismatch in %s infoframe", name);
-		drm_err("expected:");
+		DRM_ERROR("mismatch in %s infoframe\n", name);
+		DRM_ERROR("expected:\n");
 		hdmi_infoframe_log(KERN_ERR, dev_priv->drm.dev, a);
-		drm_err("found");
+		DRM_ERROR("found:\n");
 		hdmi_infoframe_log(KERN_ERR, dev_priv->drm.dev, b);
 	}
 }
 
-static void __printf(3, 4)
-pipe_config_mismatch(bool fastset, const char *name, const char *format, ...)
+static void __printf(4, 5)
+pipe_config_mismatch(bool fastset, const struct intel_crtc *crtc,
+		     const char *name, const char *format, ...)
 {
 	struct va_format vaf;
 	va_list args;
@@ -12510,9 +13341,11 @@ pipe_config_mismatch(bool fastset, const char *name, const char *format, ...)
 	vaf.va = &args;
 
 	if (fastset)
-		drm_dbg(DRM_UT_KMS, "fastset mismatch in %s %pV", name, &vaf);
+		DRM_DEBUG_KMS("[CRTC:%d:%s] fastset mismatch in %s %pV\n",
+			      crtc->base.base.id, crtc->base.name, name, &vaf);
 	else
-		drm_err("mismatch in %s %pV", name, &vaf);
+		DRM_ERROR("[CRTC:%d:%s] mismatch in %s %pV\n",
+			  crtc->base.base.id, crtc->base.name, name, &vaf);
 
 	va_end(args);
 }
@@ -12539,11 +13372,13 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 			  const struct intel_crtc_state *pipe_config,
 			  bool fastset)
 {
-	struct drm_i915_private *dev_priv = to_i915(current_config->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(current_config->uapi.crtc->dev);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	bool ret = true;
+	u32 bp_gamma = 0;
 	bool fixup_inherited = fastset &&
-		(current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) &&
-		!(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED);
+		(current_config->hw.mode.private_flags & I915_MODE_FLAG_INHERITED) &&
+		!(pipe_config->hw.mode.private_flags & I915_MODE_FLAG_INHERITED);
 
 	if (fixup_inherited && !fastboot_enabled(dev_priv)) {
 		DRM_DEBUG_KMS("initial modeset and fastboot not set\n");
@@ -12552,8 +13387,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_X(name) do { \
 	if (current_config->name != pipe_config->name) { \
-		pipe_config_mismatch(fastset, __stringify(name), \
-				     "(expected 0x%08x, found 0x%08x)\n", \
+		pipe_config_mismatch(fastset, crtc, __stringify(name), \
+				     "(expected 0x%08x, found 0x%08x)", \
 				     current_config->name, \
 				     pipe_config->name); \
 		ret = false; \
@@ -12562,8 +13397,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_I(name) do { \
 	if (current_config->name != pipe_config->name) { \
-		pipe_config_mismatch(fastset, __stringify(name), \
-				     "(expected %i, found %i)\n", \
+		pipe_config_mismatch(fastset, crtc, __stringify(name), \
+				     "(expected %i, found %i)", \
 				     current_config->name, \
 				     pipe_config->name); \
 		ret = false; \
@@ -12572,8 +13407,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_BOOL(name) do { \
 	if (current_config->name != pipe_config->name) { \
-		pipe_config_mismatch(fastset, __stringify(name), \
-				     "(expected %s, found %s)\n", \
+		pipe_config_mismatch(fastset, crtc,  __stringify(name), \
+				     "(expected %s, found %s)", \
 				     yesno(current_config->name), \
 				     yesno(pipe_config->name)); \
 		ret = false; \
@@ -12589,8 +13424,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \
 		PIPE_CONF_CHECK_BOOL(name); \
 	} else { \
-		pipe_config_mismatch(fastset, __stringify(name), \
-				     "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)\n", \
+		pipe_config_mismatch(fastset, crtc, __stringify(name), \
+				     "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)", \
 				     yesno(current_config->name), \
 				     yesno(pipe_config->name)); \
 		ret = false; \
@@ -12599,8 +13434,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_P(name) do { \
 	if (current_config->name != pipe_config->name) { \
-		pipe_config_mismatch(fastset, __stringify(name), \
-				     "(expected %p, found %p)\n", \
+		pipe_config_mismatch(fastset, crtc, __stringify(name), \
+				     "(expected %p, found %p)", \
 				     current_config->name, \
 				     pipe_config->name); \
 		ret = false; \
@@ -12611,9 +13446,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	if (!intel_compare_link_m_n(&current_config->name, \
 				    &pipe_config->name,\
 				    !fastset)) { \
-		pipe_config_mismatch(fastset, __stringify(name), \
+		pipe_config_mismatch(fastset, crtc, __stringify(name), \
 				     "(expected tu %i gmch %i/%i link %i/%i, " \
-				     "found tu %i, gmch %i/%i link %i/%i)\n", \
+				     "found tu %i, gmch %i/%i link %i/%i)", \
 				     current_config->name.tu, \
 				     current_config->name.gmch_m, \
 				     current_config->name.gmch_n, \
@@ -12638,10 +13473,10 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 				    &pipe_config->name, !fastset) && \
 	    !intel_compare_link_m_n(&current_config->alt_name, \
 				    &pipe_config->name, !fastset)) { \
-		pipe_config_mismatch(fastset, __stringify(name), \
+		pipe_config_mismatch(fastset, crtc, __stringify(name), \
 				     "(expected tu %i gmch %i/%i link %i/%i, " \
 				     "or tu %i gmch %i/%i link %i/%i, " \
-				     "found tu %i, gmch %i/%i link %i/%i)\n", \
+				     "found tu %i, gmch %i/%i link %i/%i)", \
 				     current_config->name.tu, \
 				     current_config->name.gmch_m, \
 				     current_config->name.gmch_n, \
@@ -12663,8 +13498,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_FLAGS(name, mask) do { \
 	if ((current_config->name ^ pipe_config->name) & (mask)) { \
-		pipe_config_mismatch(fastset, __stringify(name), \
-				     "(%x) (expected %i, found %i)\n", \
+		pipe_config_mismatch(fastset, crtc, __stringify(name), \
+				     "(%x) (expected %i, found %i)", \
 				     (mask), \
 				     current_config->name & (mask), \
 				     pipe_config->name & (mask)); \
@@ -12674,8 +13509,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \
 	if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \
-		pipe_config_mismatch(fastset, __stringify(name), \
-				     "(expected %i, found %i)\n", \
+		pipe_config_mismatch(fastset, crtc, __stringify(name), \
+				     "(expected %i, found %i)", \
 				     current_config->name, \
 				     pipe_config->name); \
 		ret = false; \
@@ -12692,6 +13527,24 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	} \
 } while (0)
 
+#define PIPE_CONF_CHECK_COLOR_LUT(name1, name2, bit_precision) do { \
+	if (current_config->name1 != pipe_config->name1) { \
+		pipe_config_mismatch(fastset, crtc, __stringify(name1), \
+				"(expected %i, found %i, won't compare lut values)", \
+				current_config->name1, \
+				pipe_config->name1); \
+		ret = false;\
+	} else { \
+		if (!intel_color_lut_equal(current_config->name2, \
+					pipe_config->name2, pipe_config->name1, \
+					bit_precision)) { \
+			pipe_config_mismatch(fastset, crtc, __stringify(name2), \
+					"hw_state doesn't match sw_state"); \
+			ret = false; \
+		} \
+	} \
+} while (0)
+
 #define PIPE_CONF_QUIRK(quirk) \
 	((current_config->quirks | pipe_config->quirks) & (quirk))
 
@@ -12714,22 +13567,23 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 	PIPE_CONF_CHECK_X(output_types);
 
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hdisplay);
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_htotal);
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hblank_start);
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hblank_end);
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hsync_start);
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hsync_end);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_hdisplay);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_htotal);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_hblank_start);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_hblank_end);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_hsync_start);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_hsync_end);
 
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vdisplay);
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vtotal);
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vblank_start);
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vblank_end);
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vsync_start);
-	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vsync_end);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vdisplay);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vtotal);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vblank_start);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vblank_end);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vsync_start);
+	PIPE_CONF_CHECK_I(hw.adjusted_mode.crtc_vsync_end);
 
 	PIPE_CONF_CHECK_I(pixel_multiplier);
 	PIPE_CONF_CHECK_I(output_format);
+	PIPE_CONF_CHECK_I(dc3co_exitline);
 	PIPE_CONF_CHECK_BOOL(has_hdmi_sink);
 	if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) ||
 	    IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
@@ -12738,20 +13592,21 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	PIPE_CONF_CHECK_BOOL(hdmi_scrambling);
 	PIPE_CONF_CHECK_BOOL(hdmi_high_tmds_clock_ratio);
 	PIPE_CONF_CHECK_BOOL(has_infoframe);
+	PIPE_CONF_CHECK_BOOL(fec_enable);
 
 	PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_audio);
 
-	PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags,
+	PIPE_CONF_CHECK_FLAGS(hw.adjusted_mode.flags,
 			      DRM_MODE_FLAG_INTERLACE);
 
 	if (!PIPE_CONF_QUIRK(PIPE_CONFIG_QUIRK_MODE_SYNC_FLAGS)) {
-		PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags,
+		PIPE_CONF_CHECK_FLAGS(hw.adjusted_mode.flags,
 				      DRM_MODE_FLAG_PHSYNC);
-		PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags,
+		PIPE_CONF_CHECK_FLAGS(hw.adjusted_mode.flags,
 				      DRM_MODE_FLAG_NHSYNC);
-		PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags,
+		PIPE_CONF_CHECK_FLAGS(hw.adjusted_mode.flags,
 				      DRM_MODE_FLAG_PVSYNC);
-		PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags,
+		PIPE_CONF_CHECK_FLAGS(hw.adjusted_mode.flags,
 				      DRM_MODE_FLAG_NVSYNC);
 	}
 
@@ -12787,6 +13642,11 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 			PIPE_CONF_CHECK_X(csc_mode);
 		PIPE_CONF_CHECK_BOOL(gamma_enable);
 		PIPE_CONF_CHECK_BOOL(csc_enable);
+
+		bp_gamma = intel_color_get_gamma_bit_precision(pipe_config);
+		if (bp_gamma)
+			PIPE_CONF_CHECK_COLOR_LUT(gamma_mode, hw.gamma_lut, bp_gamma);
+
 	}
 
 	PIPE_CONF_CHECK_BOOL(double_wide);
@@ -12830,7 +13690,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	if (IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5)
 		PIPE_CONF_CHECK_I(pipe_bpp);
 
-	PIPE_CONF_CHECK_CLOCK_FUZZY(base.adjusted_mode.crtc_clock);
+	PIPE_CONF_CHECK_CLOCK_FUZZY(hw.adjusted_mode.crtc_clock);
 	PIPE_CONF_CHECK_CLOCK_FUZZY(port_clock);
 
 	PIPE_CONF_CHECK_I(min_voltage_level);
@@ -12842,6 +13702,15 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	PIPE_CONF_CHECK_INFOFRAME(hdmi);
 	PIPE_CONF_CHECK_INFOFRAME(drm);
 
+	PIPE_CONF_CHECK_I(sync_mode_slaves_mask);
+	PIPE_CONF_CHECK_I(master_transcoder);
+
+	PIPE_CONF_CHECK_I(dsc.compression_enable);
+	PIPE_CONF_CHECK_I(dsc.dsc_split);
+	PIPE_CONF_CHECK_I(dsc.compressed_bpp);
+
+	PIPE_CONF_CHECK_I(mst_master_transcoder);
+
 #undef PIPE_CONF_CHECK_X
 #undef PIPE_CONF_CHECK_I
 #undef PIPE_CONF_CHECK_BOOL
@@ -12849,6 +13718,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 #undef PIPE_CONF_CHECK_P
 #undef PIPE_CONF_CHECK_FLAGS
 #undef PIPE_CONF_CHECK_CLOCK_FUZZY
+#undef PIPE_CONF_CHECK_COLOR_LUT
 #undef PIPE_CONF_QUIRK
 
 	return ret;
@@ -12860,7 +13730,7 @@ static void intel_pipe_config_sanity_check(struct drm_i915_private *dev_priv,
 	if (pipe_config->has_pch_encoder) {
 		int fdi_dotclock = intel_dotclock_calculate(intel_fdi_link_freq(dev_priv, pipe_config),
 							    &pipe_config->fdi_m_n);
-		int dotclock = pipe_config->base.adjusted_mode.crtc_clock;
+		int dotclock = pipe_config->hw.adjusted_mode.crtc_clock;
 
 		/*
 		 * FDI already provided one idea for the dotclock.
@@ -12888,7 +13758,7 @@ static void verify_wm_state(struct intel_crtc *crtc,
 	const enum pipe pipe = crtc->pipe;
 	int plane, level, max_level = ilk_wm_max_level(dev_priv);
 
-	if (INTEL_GEN(dev_priv) < 9 || !new_crtc_state->base.active)
+	if (INTEL_GEN(dev_priv) < 9 || !new_crtc_state->hw.active)
 		return;
 
 	hw = kzalloc(sizeof(*hw), GFP_KERNEL);
@@ -13093,16 +13963,14 @@ verify_crtc_state(struct intel_crtc *crtc,
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_encoder *encoder;
-	struct intel_crtc_state *pipe_config;
-	struct drm_atomic_state *state;
+	struct intel_crtc_state *pipe_config = old_crtc_state;
+	struct drm_atomic_state *state = old_crtc_state->uapi.state;
 	bool active;
 
-	state = old_crtc_state->base.state;
-	__drm_atomic_helper_crtc_destroy_state(&old_crtc_state->base);
-	pipe_config = old_crtc_state;
-	memset(pipe_config, 0, sizeof(*pipe_config));
-	pipe_config->base.crtc = &crtc->base;
-	pipe_config->base.state = state;
+	__drm_atomic_helper_crtc_destroy_state(&old_crtc_state->uapi);
+	intel_crtc_free_hw_state(old_crtc_state);
+	intel_crtc_state_reset(old_crtc_state, crtc);
+	old_crtc_state->uapi.state = state;
 
 	DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.base.id, crtc->base.name);
 
@@ -13110,23 +13978,26 @@ verify_crtc_state(struct intel_crtc *crtc,
 
 	/* we keep both pipes enabled on 830 */
 	if (IS_I830(dev_priv))
-		active = new_crtc_state->base.active;
+		active = new_crtc_state->hw.active;
 
-	I915_STATE_WARN(new_crtc_state->base.active != active,
-	     "crtc active state doesn't match with hw state "
-	     "(expected %i, found %i)\n", new_crtc_state->base.active, active);
+	I915_STATE_WARN(new_crtc_state->hw.active != active,
+			"crtc active state doesn't match with hw state "
+			"(expected %i, found %i)\n",
+			new_crtc_state->hw.active, active);
 
-	I915_STATE_WARN(crtc->active != new_crtc_state->base.active,
-	     "transitional active state does not match atomic hw state "
-	     "(expected %i, found %i)\n", new_crtc_state->base.active, crtc->active);
+	I915_STATE_WARN(crtc->active != new_crtc_state->hw.active,
+			"transitional active state does not match atomic hw state "
+			"(expected %i, found %i)\n",
+			new_crtc_state->hw.active, crtc->active);
 
 	for_each_encoder_on_crtc(dev, &crtc->base, encoder) {
 		enum pipe pipe;
 
 		active = encoder->get_hw_state(encoder, &pipe);
-		I915_STATE_WARN(active != new_crtc_state->base.active,
-			"[ENCODER:%i] active %i with crtc active %i\n",
-			encoder->base.base.id, active, new_crtc_state->base.active);
+		I915_STATE_WARN(active != new_crtc_state->hw.active,
+				"[ENCODER:%i] active %i with crtc active %i\n",
+				encoder->base.base.id, active,
+				new_crtc_state->hw.active);
 
 		I915_STATE_WARN(active && crtc->pipe != pipe,
 				"Encoder connected to wrong pipe %c\n",
@@ -13138,7 +14009,7 @@ verify_crtc_state(struct intel_crtc *crtc,
 
 	intel_crtc_compute_pixel_rate(pipe_config);
 
-	if (!new_crtc_state->base.active)
+	if (!new_crtc_state->hw.active)
 		return;
 
 	intel_pipe_config_sanity_check(dev_priv, pipe_config);
@@ -13160,8 +14031,8 @@ intel_verify_planes(struct intel_atomic_state *state)
 
 	for_each_new_intel_plane_in_state(state, plane,
 					  plane_state, i)
-		assert_plane(plane, plane_state->slave ||
-			     plane_state->base.visible);
+		assert_plane(plane, plane_state->planar_slave ||
+			     plane_state->uapi.visible);
 }
 
 static void
@@ -13200,7 +14071,7 @@ verify_single_dpll_state(struct drm_i915_private *dev_priv,
 
 	crtc_mask = drm_crtc_mask(&crtc->base);
 
-	if (new_crtc_state->base.active)
+	if (new_crtc_state->hw.active)
 		I915_STATE_WARN(!(pll->active_mask & crtc_mask),
 				"pll active mismatch (expected pipe %c in active mask 0x%02x)\n",
 				pipe_name(drm_crtc_index(&crtc->base)), pll->active_mask);
@@ -13276,10 +14147,15 @@ intel_modeset_verify_disabled(struct drm_i915_private *dev_priv,
 	verify_disabled_dpll_state(dev_priv);
 }
 
-static void update_scanline_offset(const struct intel_crtc_state *crtc_state)
+static void
+intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->hw.adjusted_mode;
+
+	drm_calc_timestamping_constants(&crtc->base, adjusted_mode);
 
 	/*
 	 * The scanline counter increments at the leading edge of hsync.
@@ -13309,7 +14185,6 @@ static void update_scanline_offset(const struct intel_crtc_state *crtc_state)
 	 * answer that's slightly in the future.
 	 */
 	if (IS_GEN(dev_priv, 2)) {
-		const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode;
 		int vtotal;
 
 		vtotal = adjusted_mode->crtc_vtotal;
@@ -13320,8 +14195,9 @@ static void update_scanline_offset(const struct intel_crtc_state *crtc_state)
 	} else if (HAS_DDI(dev_priv) &&
 		   intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) {
 		crtc->scanline_offset = 2;
-	} else
+	} else {
 		crtc->scanline_offset = 1;
+	}
 }
 
 static void intel_modeset_clear_plls(struct intel_atomic_state *state)
@@ -13348,7 +14224,7 @@ static void intel_modeset_clear_plls(struct intel_atomic_state *state)
  * multiple pipes, and planes are enabled after the pipe, we need to wait at
  * least 2 vblanks on the first pipe before enabling planes on the second pipe.
  */
-static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state)
+static int hsw_mode_set_planes_workaround(struct intel_atomic_state *state)
 {
 	struct intel_crtc_state *crtc_state;
 	struct intel_crtc *crtc;
@@ -13359,7 +14235,7 @@ static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state)
 
 	/* look at all crtc's that are going to be enabled in during modeset */
 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
-		if (!crtc_state->base.active ||
+		if (!crtc_state->hw.active ||
 		    !needs_modeset(crtc_state))
 			continue;
 
@@ -13384,7 +14260,7 @@ static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state)
 
 		crtc_state->hsw_workaround_pipe = INVALID_PIPE;
 
-		if (!crtc_state->base.active ||
+		if (!crtc_state->hw.active ||
 		    needs_modeset(crtc_state))
 			continue;
 
@@ -13403,162 +14279,47 @@ static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state)
 	return 0;
 }
 
-static int intel_lock_all_pipes(struct intel_atomic_state *state)
-{
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-	struct intel_crtc *crtc;
-
-	/* Add all pipes to the state */
-	for_each_intel_crtc(&dev_priv->drm, crtc) {
-		struct intel_crtc_state *crtc_state;
-
-		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
-		if (IS_ERR(crtc_state))
-			return PTR_ERR(crtc_state);
-	}
-
-	return 0;
-}
-
-static int intel_modeset_all_pipes(struct intel_atomic_state *state)
-{
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-	struct intel_crtc *crtc;
-
-	/*
-	 * Add all pipes to the state, and force
-	 * a modeset on all the active ones.
-	 */
-	for_each_intel_crtc(&dev_priv->drm, crtc) {
-		struct intel_crtc_state *crtc_state;
-		int ret;
-
-		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
-		if (IS_ERR(crtc_state))
-			return PTR_ERR(crtc_state);
-
-		if (!crtc_state->base.active || needs_modeset(crtc_state))
-			continue;
-
-		crtc_state->base.mode_changed = true;
-
-		ret = drm_atomic_add_affected_connectors(&state->base,
-							 &crtc->base);
-		if (ret)
-			return ret;
-
-		ret = drm_atomic_add_affected_planes(&state->base,
-						     &crtc->base);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
 static int intel_modeset_checks(struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	struct intel_crtc_state *old_crtc_state, *new_crtc_state;
 	struct intel_crtc *crtc;
-	int ret = 0, i;
-
-	if (!check_digital_port_conflicts(state)) {
-		DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n");
-		return -EINVAL;
-	}
+	int ret, i;
 
 	/* keep the current setting */
 	if (!state->cdclk.force_min_cdclk_changed)
 		state->cdclk.force_min_cdclk = dev_priv->cdclk.force_min_cdclk;
 
 	state->modeset = true;
-	state->active_crtcs = dev_priv->active_crtcs;
+	state->active_pipes = dev_priv->active_pipes;
 	state->cdclk.logical = dev_priv->cdclk.logical;
 	state->cdclk.actual = dev_priv->cdclk.actual;
-	state->cdclk.pipe = INVALID_PIPE;
 
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
 					    new_crtc_state, i) {
-		if (new_crtc_state->base.active)
-			state->active_crtcs |= 1 << i;
+		if (new_crtc_state->hw.active)
+			state->active_pipes |= BIT(crtc->pipe);
 		else
-			state->active_crtcs &= ~(1 << i);
+			state->active_pipes &= ~BIT(crtc->pipe);
 
-		if (old_crtc_state->base.active != new_crtc_state->base.active)
-			state->active_pipe_changes |= drm_crtc_mask(&crtc->base);
+		if (old_crtc_state->hw.active != new_crtc_state->hw.active)
+			state->active_pipe_changes |= BIT(crtc->pipe);
 	}
 
-	/*
-	 * See if the config requires any additional preparation, e.g.
-	 * to adjust global state with pipes off.  We need to do this
-	 * here so we can get the modeset_pipe updated config for the new
-	 * mode set on this crtc.  For other crtcs we need to use the
-	 * adjusted_mode bits in the crtc directly.
-	 */
-	if (dev_priv->display.modeset_calc_cdclk) {
-		enum pipe pipe;
-
-		ret = dev_priv->display.modeset_calc_cdclk(state);
-		if (ret < 0)
+	if (state->active_pipe_changes) {
+		ret = intel_atomic_lock_global_state(state);
+		if (ret)
 			return ret;
-
-		/*
-		 * Writes to dev_priv->cdclk.logical must protected by
-		 * holding all the crtc locks, even if we don't end up
-		 * touching the hardware
-		 */
-		if (intel_cdclk_changed(&dev_priv->cdclk.logical,
-					&state->cdclk.logical)) {
-			ret = intel_lock_all_pipes(state);
-			if (ret < 0)
-				return ret;
-		}
-
-		if (is_power_of_2(state->active_crtcs)) {
-			struct intel_crtc *crtc;
-			struct intel_crtc_state *crtc_state;
-
-			pipe = ilog2(state->active_crtcs);
-			crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
-			crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
-			if (crtc_state && needs_modeset(crtc_state))
-				pipe = INVALID_PIPE;
-		} else {
-			pipe = INVALID_PIPE;
-		}
-
-		/* All pipes must be switched off while we change the cdclk. */
-		if (pipe != INVALID_PIPE &&
-		    intel_cdclk_needs_cd2x_update(dev_priv,
-						  &dev_priv->cdclk.actual,
-						  &state->cdclk.actual)) {
-			ret = intel_lock_all_pipes(state);
-			if (ret < 0)
-				return ret;
-
-			state->cdclk.pipe = pipe;
-		} else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual,
-						     &state->cdclk.actual)) {
-			ret = intel_modeset_all_pipes(state);
-			if (ret < 0)
-				return ret;
-
-			state->cdclk.pipe = INVALID_PIPE;
-		}
-
-		DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n",
-			      state->cdclk.logical.cdclk,
-			      state->cdclk.actual.cdclk);
-		DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n",
-			      state->cdclk.logical.voltage_level,
-			      state->cdclk.actual.voltage_level);
 	}
 
+	ret = intel_modeset_calc_cdclk(state);
+	if (ret)
+		return ret;
+
 	intel_modeset_clear_plls(state);
 
 	if (IS_HASWELL(dev_priv))
-		return haswell_mode_set_planes_workaround(state);
+		return hsw_mode_set_planes_workaround(state);
 
 	return 0;
 }
@@ -13586,9 +14347,13 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta
 	if (!intel_pipe_config_compare(old_crtc_state, new_crtc_state, true))
 		return;
 
-	new_crtc_state->base.mode_changed = false;
+	new_crtc_state->uapi.mode_changed = false;
 	new_crtc_state->update_pipe = true;
+}
 
+static void intel_crtc_copy_fastset(const struct intel_crtc_state *old_crtc_state,
+				    struct intel_crtc_state *new_crtc_state)
+{
 	/*
 	 * If we're not doing the full modeset we want to
 	 * keep the current M/N values as they may be
@@ -13603,6 +14368,215 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta
 	new_crtc_state->has_drrs = old_crtc_state->has_drrs;
 }
 
+static int intel_crtc_add_planes_to_state(struct intel_atomic_state *state,
+					  struct intel_crtc *crtc,
+					  u8 plane_ids_mask)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_plane *plane;
+
+	for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+		struct intel_plane_state *plane_state;
+
+		if ((plane_ids_mask & BIT(plane->id)) == 0)
+			continue;
+
+		plane_state = intel_atomic_get_plane_state(state, plane);
+		if (IS_ERR(plane_state))
+			return PTR_ERR(plane_state);
+	}
+
+	return 0;
+}
+
+static bool active_planes_affects_min_cdclk(struct drm_i915_private *dev_priv)
+{
+	/* See {hsw,vlv,ivb}_plane_ratio() */
+	return IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv) ||
+		IS_CHERRYVIEW(dev_priv) || IS_VALLEYVIEW(dev_priv) ||
+		IS_IVYBRIDGE(dev_priv);
+}
+
+static int intel_atomic_check_planes(struct intel_atomic_state *state,
+				     bool *need_modeset)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_crtc_state *old_crtc_state, *new_crtc_state;
+	struct intel_plane_state *plane_state;
+	struct intel_plane *plane;
+	struct intel_crtc *crtc;
+	int i, ret;
+
+	ret = icl_add_linked_planes(state);
+	if (ret)
+		return ret;
+
+	for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
+		ret = intel_plane_atomic_check(state, plane);
+		if (ret) {
+			DRM_DEBUG_ATOMIC("[PLANE:%d:%s] atomic driver check failed\n",
+					 plane->base.base.id, plane->base.name);
+			return ret;
+		}
+	}
+
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		u8 old_active_planes, new_active_planes;
+
+		ret = icl_check_nv12_planes(new_crtc_state);
+		if (ret)
+			return ret;
+
+		/*
+		 * On some platforms the number of active planes affects
+		 * the planes' minimum cdclk calculation. Add such planes
+		 * to the state before we compute the minimum cdclk.
+		 */
+		if (!active_planes_affects_min_cdclk(dev_priv))
+			continue;
+
+		old_active_planes = old_crtc_state->active_planes & ~BIT(PLANE_CURSOR);
+		new_active_planes = new_crtc_state->active_planes & ~BIT(PLANE_CURSOR);
+
+		if (hweight8(old_active_planes) == hweight8(new_active_planes))
+			continue;
+
+		ret = intel_crtc_add_planes_to_state(state, crtc, new_active_planes);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * active_planes bitmask has been updated, and potentially
+	 * affected planes are part of the state. We can now
+	 * compute the minimum cdclk for each plane.
+	 */
+	for_each_new_intel_plane_in_state(state, plane, plane_state, i)
+		*need_modeset |= intel_plane_calc_min_cdclk(state, plane);
+
+	return 0;
+}
+
+static int intel_atomic_check_crtcs(struct intel_atomic_state *state)
+{
+	struct intel_crtc_state *crtc_state;
+	struct intel_crtc *crtc;
+	int i;
+
+	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
+		int ret = intel_crtc_atomic_check(state, crtc);
+		if (ret) {
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] atomic driver check failed\n",
+					 crtc->base.base.id, crtc->base.name);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static bool intel_cpu_transcoder_needs_modeset(struct intel_atomic_state *state,
+					       enum transcoder transcoder)
+{
+	struct intel_crtc_state *new_crtc_state;
+	struct intel_crtc *crtc;
+	int i;
+
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i)
+		if (new_crtc_state->cpu_transcoder == transcoder)
+			return needs_modeset(new_crtc_state);
+
+	return false;
+}
+
+static void
+intel_modeset_synced_crtcs(struct intel_atomic_state *state,
+			   u8 transcoders)
+{
+	struct intel_crtc_state *new_crtc_state;
+	struct intel_crtc *crtc;
+	int i;
+
+	for_each_new_intel_crtc_in_state(state, crtc,
+					 new_crtc_state, i) {
+		if (transcoders & BIT(new_crtc_state->cpu_transcoder)) {
+			new_crtc_state->uapi.mode_changed = true;
+			new_crtc_state->update_pipe = false;
+		}
+	}
+}
+
+static int
+intel_modeset_all_tiles(struct intel_atomic_state *state, int tile_grp_id)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter conn_iter;
+	int ret = 0;
+
+	drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter);
+	drm_for_each_connector_iter(connector, &conn_iter) {
+		struct drm_connector_state *conn_state;
+		struct drm_crtc_state *crtc_state;
+
+		if (!connector->has_tile ||
+		    connector->tile_group->id != tile_grp_id)
+			continue;
+		conn_state = drm_atomic_get_connector_state(&state->base,
+							    connector);
+		if (IS_ERR(conn_state)) {
+			ret =  PTR_ERR(conn_state);
+			break;
+		}
+
+		if (!conn_state->crtc)
+			continue;
+
+		crtc_state = drm_atomic_get_crtc_state(&state->base,
+						       conn_state->crtc);
+		if (IS_ERR(crtc_state)) {
+			ret = PTR_ERR(crtc_state);
+			break;
+		}
+		crtc_state->mode_changed = true;
+		ret = drm_atomic_add_affected_connectors(&state->base,
+							 conn_state->crtc);
+		if (ret)
+			break;
+	}
+	drm_connector_list_iter_end(&conn_iter);
+
+	return ret;
+}
+
+static int
+intel_atomic_check_tiled_conns(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct drm_connector *connector;
+	struct drm_connector_state *old_conn_state, *new_conn_state;
+	int i, ret;
+
+	if (INTEL_GEN(dev_priv) < 11)
+		return 0;
+
+	/* Is tiled, mark all other tiled CRTCs as needing a modeset */
+	for_each_oldnew_connector_in_state(&state->base, connector,
+					   old_conn_state, new_conn_state, i) {
+		if (!connector->has_tile)
+			continue;
+		if (!intel_connector_needs_modeset(state, connector))
+			continue;
+
+		ret = intel_modeset_all_tiles(state, connector->tile_group->id);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * intel_atomic_check - validate state object
  * @dev: drm device
@@ -13616,44 +14590,119 @@ static int intel_atomic_check(struct drm_device *dev,
 	struct intel_crtc_state *old_crtc_state, *new_crtc_state;
 	struct intel_crtc *crtc;
 	int ret, i;
-	bool any_ms = state->cdclk.force_min_cdclk_changed;
+	bool any_ms = false;
 
 	/* Catch I915_MODE_FLAG_INHERITED */
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
 					    new_crtc_state, i) {
-		if (new_crtc_state->base.mode.private_flags !=
-		    old_crtc_state->base.mode.private_flags)
-			new_crtc_state->base.mode_changed = true;
+		if (new_crtc_state->hw.mode.private_flags !=
+		    old_crtc_state->hw.mode.private_flags)
+			new_crtc_state->uapi.mode_changed = true;
 	}
 
 	ret = drm_atomic_helper_check_modeset(dev, &state->base);
 	if (ret)
 		goto fail;
 
+	/**
+	 * This check adds all the connectors in current state that belong to
+	 * the same tile group to a full modeset.
+	 * This function directly sets the mode_changed to true and we also call
+	 * drm_atomic_add_affected_connectors(). Hence we are not explicitly
+	 * calling drm_atomic_helper_check_modeset() after this.
+	 *
+	 * Fixme: Handle some corner cases where one of the
+	 * tiled connectors gets disconnected and tile info is lost but since it
+	 * was previously synced to other conn, we need to add that to the modeset.
+	 */
+	ret = intel_atomic_check_tiled_conns(state);
+	if (ret)
+		goto fail;
+
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
 					    new_crtc_state, i) {
-		if (!needs_modeset(new_crtc_state))
+		if (!needs_modeset(new_crtc_state)) {
+			/* Light copy */
+			intel_crtc_copy_uapi_to_hw_state_nomodeset(new_crtc_state);
+
 			continue;
+		}
 
-		if (!new_crtc_state->base.enable) {
-			any_ms = true;
+		if (!new_crtc_state->uapi.enable) {
+			intel_crtc_copy_uapi_to_hw_state(new_crtc_state);
 			continue;
 		}
 
+		ret = intel_crtc_prepare_cleared_state(new_crtc_state);
+		if (ret)
+			goto fail;
+
 		ret = intel_modeset_pipe_config(new_crtc_state);
 		if (ret)
 			goto fail;
 
 		intel_crtc_check_fastset(old_crtc_state, new_crtc_state);
+	}
+
+	/**
+	 * Check if fastset is allowed by external dependencies like other
+	 * pipes and transcoders.
+	 *
+	 * Right now it only forces a fullmodeset when the MST master
+	 * transcoder did not changed but the pipe of the master transcoder
+	 * needs a fullmodeset so all slaves also needs to do a fullmodeset or
+	 * in case of port synced crtcs, if one of the synced crtcs
+	 * needs a full modeset, all other synced crtcs should be
+	 * forced a full modeset.
+	 */
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
+		if (!new_crtc_state->hw.enable || needs_modeset(new_crtc_state))
+			continue;
 
-		if (needs_modeset(new_crtc_state))
+		if (intel_dp_mst_is_slave_trans(new_crtc_state)) {
+			enum transcoder master = new_crtc_state->mst_master_transcoder;
+
+			if (intel_cpu_transcoder_needs_modeset(state, master)) {
+				new_crtc_state->uapi.mode_changed = true;
+				new_crtc_state->update_pipe = false;
+			}
+		} else if (is_trans_port_sync_mode(new_crtc_state)) {
+			u8 trans = new_crtc_state->sync_mode_slaves_mask |
+				   BIT(new_crtc_state->master_transcoder);
+
+			intel_modeset_synced_crtcs(state, trans);
+		}
+	}
+
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		if (needs_modeset(new_crtc_state)) {
 			any_ms = true;
+			continue;
+		}
+
+		if (!new_crtc_state->update_pipe)
+			continue;
+
+		intel_crtc_copy_fastset(old_crtc_state, new_crtc_state);
+	}
+
+	if (any_ms && !check_digital_port_conflicts(state)) {
+		DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n");
+		ret = EINVAL;
+		goto fail;
 	}
 
 	ret = drm_dp_mst_atomic_check(&state->base);
 	if (ret)
 		goto fail;
 
+	any_ms |= state->cdclk.force_min_cdclk_changed;
+
+	ret = intel_atomic_check_planes(state, &any_ms);
+	if (ret)
+		goto fail;
+
 	if (any_ms) {
 		ret = intel_modeset_checks(state);
 		if (ret)
@@ -13662,11 +14711,7 @@ static int intel_atomic_check(struct drm_device *dev,
 		state->cdclk.logical = dev_priv->cdclk.logical;
 	}
 
-	ret = icl_add_linked_planes(state);
-	if (ret)
-		goto fail;
-
-	ret = drm_atomic_helper_check_planes(dev, &state->base);
+	ret = intel_atomic_check_crtcs(state);
 	if (ret)
 		goto fail;
 
@@ -13724,34 +14769,114 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc)
 	return crtc->base.funcs->get_vblank_counter(&crtc->base);
 }
 
+void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc,
+				  struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	if (!IS_GEN(dev_priv, 2) || crtc_state->active_planes)
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true);
+
+	if (crtc_state->has_pch_encoder) {
+		enum pipe pch_transcoder =
+			intel_crtc_pch_transcoder(crtc);
+
+		intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true);
+	}
+}
+
+static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state,
+			       const struct intel_crtc_state *new_crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	/*
+	 * Update pipe size and adjust fitter if needed: the reason for this is
+	 * that in compute_mode_changes we check the native mode (not the pfit
+	 * mode) to see if we can flip rather than do a full mode set. In the
+	 * fastboot case, we'll flip, but if we don't update the pipesrc and
+	 * pfit state, we'll end up with a big fb scanned out into the wrong
+	 * sized surface.
+	 */
+	intel_set_pipe_src_size(new_crtc_state);
+
+	/* on skylake this is done by detaching scalers */
+	if (INTEL_GEN(dev_priv) >= 9) {
+		skl_detach_scalers(new_crtc_state);
+
+		if (new_crtc_state->pch_pfit.enabled)
+			skl_pfit_enable(new_crtc_state);
+	} else if (HAS_PCH_SPLIT(dev_priv)) {
+		if (new_crtc_state->pch_pfit.enabled)
+			ilk_pfit_enable(new_crtc_state);
+		else if (old_crtc_state->pch_pfit.enabled)
+			ilk_pfit_disable(old_crtc_state);
+	}
+
+	if (INTEL_GEN(dev_priv) >= 11)
+		icl_set_pipe_chicken(crtc);
+}
+
+static void commit_pipe_config(struct intel_atomic_state *state,
+			       struct intel_crtc_state *old_crtc_state,
+			       struct intel_crtc_state *new_crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	bool modeset = needs_modeset(new_crtc_state);
+
+	/*
+	 * During modesets pipe configuration was programmed as the
+	 * CRTC was enabled.
+	 */
+	if (!modeset) {
+		if (new_crtc_state->uapi.color_mgmt_changed ||
+		    new_crtc_state->update_pipe)
+			intel_color_commit(new_crtc_state);
+
+		if (INTEL_GEN(dev_priv) >= 9)
+			skl_detach_scalers(new_crtc_state);
+
+		if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
+			bdw_set_pipemisc(new_crtc_state);
+
+		if (new_crtc_state->update_pipe)
+			intel_pipe_fastset(old_crtc_state, new_crtc_state);
+	}
+
+	if (dev_priv->display.atomic_update_watermarks)
+		dev_priv->display.atomic_update_watermarks(state, crtc);
+}
+
 static void intel_update_crtc(struct intel_crtc *crtc,
 			      struct intel_atomic_state *state,
 			      struct intel_crtc_state *old_crtc_state,
 			      struct intel_crtc_state *new_crtc_state)
 {
-	struct drm_device *dev = state->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	bool modeset = needs_modeset(new_crtc_state);
 	struct intel_plane_state *new_plane_state =
 		intel_atomic_get_new_plane_state(state,
 						 to_intel_plane(crtc->base.primary));
 
 	if (modeset) {
-		update_scanline_offset(new_crtc_state);
-		dev_priv->display.crtc_enable(new_crtc_state, state);
+		intel_crtc_update_active_timings(new_crtc_state);
+
+		dev_priv->display.crtc_enable(state, crtc);
 
 		/* vblanks work again, re-enable pipe CRC. */
 		intel_crtc_enable_pipe_crc(crtc);
 	} else {
 		if (new_crtc_state->preload_luts &&
-		    (new_crtc_state->base.color_mgmt_changed ||
+		    (new_crtc_state->uapi.color_mgmt_changed ||
 		     new_crtc_state->update_pipe))
 			intel_color_load_luts(new_crtc_state);
 
-		intel_pre_plane_update(old_crtc_state, new_crtc_state);
+		intel_pre_plane_update(state, crtc);
 
 		if (new_crtc_state->update_pipe)
-			intel_encoders_update_pipe(crtc, new_crtc_state, state);
+			intel_encoders_update_pipe(state, crtc);
 	}
 
 	if (new_crtc_state->update_pipe && !new_crtc_state->enable_fbc)
@@ -13759,24 +14884,121 @@ static void intel_update_crtc(struct intel_crtc *crtc,
 	else if (new_plane_state)
 		intel_fbc_enable(crtc, new_crtc_state, new_plane_state);
 
-	intel_begin_crtc_commit(state, crtc);
+	/* Perform vblank evasion around commit operation */
+	intel_pipe_update_start(new_crtc_state);
+
+	commit_pipe_config(state, old_crtc_state, new_crtc_state);
 
 	if (INTEL_GEN(dev_priv) >= 9)
 		skl_update_planes_on_crtc(state, crtc);
 	else
 		i9xx_update_planes_on_crtc(state, crtc);
 
-	intel_finish_crtc_commit(state, crtc);
+	intel_pipe_update_end(new_crtc_state);
+
+	/*
+	 * We usually enable FIFO underrun interrupts as part of the
+	 * CRTC enable sequence during modesets.  But when we inherit a
+	 * valid pipe configuration from the BIOS we need to take care
+	 * of enabling them on the CRTC's first fastset.
+	 */
+	if (new_crtc_state->update_pipe && !modeset &&
+	    old_crtc_state->hw.mode.private_flags & I915_MODE_FLAG_INHERITED)
+		intel_crtc_arm_fifo_underrun(crtc, new_crtc_state);
 }
 
-static void intel_update_crtcs(struct intel_atomic_state *state)
+static struct intel_crtc *intel_get_slave_crtc(const struct intel_crtc_state *new_crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(new_crtc_state->uapi.crtc->dev);
+	enum transcoder slave_transcoder;
+
+	WARN_ON(!is_power_of_2(new_crtc_state->sync_mode_slaves_mask));
+
+	slave_transcoder = ffs(new_crtc_state->sync_mode_slaves_mask) - 1;
+	return intel_get_crtc_for_pipe(dev_priv,
+				       (enum pipe)slave_transcoder);
+}
+
+static void intel_old_crtc_state_disables(struct intel_atomic_state *state,
+					  struct intel_crtc_state *old_crtc_state,
+					  struct intel_crtc_state *new_crtc_state,
+					  struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+
+	intel_crtc_disable_planes(state, crtc);
+
+	/*
+	 * We need to disable pipe CRC before disabling the pipe,
+	 * or we race against vblank off.
+	 */
+	intel_crtc_disable_pipe_crc(crtc);
+
+	dev_priv->display.crtc_disable(state, crtc);
+	crtc->active = false;
+	intel_fbc_disable(crtc);
+	intel_disable_shared_dpll(old_crtc_state);
+
+	/* FIXME unify this for all platforms */
+	if (!new_crtc_state->hw.active &&
+	    !HAS_GMCH(dev_priv) &&
+	    dev_priv->display.initial_watermarks)
+		dev_priv->display.initial_watermarks(state, crtc);
+}
+
+static void intel_commit_modeset_disables(struct intel_atomic_state *state)
+{
+	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
+	struct intel_crtc *crtc;
+	u32 handled = 0;
+	int i;
+
+	/* Only disable port sync and MST slaves */
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		if (!needs_modeset(new_crtc_state))
+			continue;
+
+		if (!old_crtc_state->hw.active)
+			continue;
+
+		/* In case of Transcoder port Sync master slave CRTCs can be
+		 * assigned in any order and we need to make sure that
+		 * slave CRTCs are disabled first and then master CRTC since
+		 * Slave vblanks are masked till Master Vblanks.
+		 */
+		if (!is_trans_port_sync_slave(old_crtc_state) &&
+		    !intel_dp_mst_is_slave_trans(old_crtc_state))
+			continue;
+
+		intel_pre_plane_update(state, crtc);
+		intel_old_crtc_state_disables(state, old_crtc_state,
+					      new_crtc_state, crtc);
+		handled |= BIT(crtc->pipe);
+	}
+
+	/* Disable everything else left on */
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		if (!needs_modeset(new_crtc_state) ||
+		    (handled & BIT(crtc->pipe)))
+			continue;
+
+		intel_pre_plane_update(state, crtc);
+		if (old_crtc_state->hw.active)
+			intel_old_crtc_state_disables(state, old_crtc_state,
+						      new_crtc_state, crtc);
+	}
+}
+
+static void intel_commit_modeset_enables(struct intel_atomic_state *state)
 {
 	struct intel_crtc *crtc;
 	struct intel_crtc_state *old_crtc_state, *new_crtc_state;
 	int i;
 
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
-		if (!new_crtc_state->base.active)
+		if (!new_crtc_state->hw.active)
 			continue;
 
 		intel_update_crtc(crtc, state, old_crtc_state,
@@ -13784,23 +15006,141 @@ static void intel_update_crtcs(struct intel_atomic_state *state)
 	}
 }
 
-static void skl_update_crtcs(struct intel_atomic_state *state)
+static void intel_crtc_enable_trans_port_sync(struct intel_crtc *crtc,
+					      struct intel_atomic_state *state,
+					      struct intel_crtc_state *new_crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+
+	intel_crtc_update_active_timings(new_crtc_state);
+	dev_priv->display.crtc_enable(state, crtc);
+	intel_crtc_enable_pipe_crc(crtc);
+}
+
+static void intel_set_dp_tp_ctl_normal(struct intel_crtc *crtc,
+				       struct intel_atomic_state *state)
+{
+	struct drm_connector *uninitialized_var(conn);
+	struct drm_connector_state *conn_state;
+	struct intel_dp *intel_dp;
+	int i;
+
+	for_each_new_connector_in_state(&state->base, conn, conn_state, i) {
+		if (conn_state->crtc == &crtc->base)
+			break;
+	}
+	intel_dp = enc_to_intel_dp(intel_attached_encoder(to_intel_connector(conn)));
+	intel_dp_stop_link_train(intel_dp);
+}
+
+/*
+ * TODO: This is only called from port sync and it is identical to what will be
+ * executed again in intel_update_crtc() over port sync pipes
+ */
+static void intel_post_crtc_enable_updates(struct intel_crtc *crtc,
+					   struct intel_atomic_state *state)
+{
+	struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	struct intel_plane_state *new_plane_state =
+		intel_atomic_get_new_plane_state(state,
+						 to_intel_plane(crtc->base.primary));
+	bool modeset = needs_modeset(new_crtc_state);
+
+	if (new_crtc_state->update_pipe && !new_crtc_state->enable_fbc)
+		intel_fbc_disable(crtc);
+	else if (new_plane_state)
+		intel_fbc_enable(crtc, new_crtc_state, new_plane_state);
+
+	/* Perform vblank evasion around commit operation */
+	intel_pipe_update_start(new_crtc_state);
+	commit_pipe_config(state, old_crtc_state, new_crtc_state);
+	skl_update_planes_on_crtc(state, crtc);
+	intel_pipe_update_end(new_crtc_state);
+
+	/*
+	 * We usually enable FIFO underrun interrupts as part of the
+	 * CRTC enable sequence during modesets.  But when we inherit a
+	 * valid pipe configuration from the BIOS we need to take care
+	 * of enabling them on the CRTC's first fastset.
+	 */
+	if (new_crtc_state->update_pipe && !modeset &&
+	    old_crtc_state->hw.mode.private_flags & I915_MODE_FLAG_INHERITED)
+		intel_crtc_arm_fifo_underrun(crtc, new_crtc_state);
+}
+
+static void intel_update_trans_port_sync_crtcs(struct intel_crtc *crtc,
+					       struct intel_atomic_state *state,
+					       struct intel_crtc_state *old_crtc_state,
+					       struct intel_crtc_state *new_crtc_state)
+{
+	struct intel_crtc *slave_crtc = intel_get_slave_crtc(new_crtc_state);
+	struct intel_crtc_state *new_slave_crtc_state =
+		intel_atomic_get_new_crtc_state(state, slave_crtc);
+	struct intel_crtc_state *old_slave_crtc_state =
+		intel_atomic_get_old_crtc_state(state, slave_crtc);
+
+	WARN_ON(!slave_crtc || !new_slave_crtc_state ||
+		!old_slave_crtc_state);
+
+	DRM_DEBUG_KMS("Updating Transcoder Port Sync Master CRTC = %d %s and Slave CRTC %d %s\n",
+		      crtc->base.base.id, crtc->base.name, slave_crtc->base.base.id,
+		      slave_crtc->base.name);
+
+	/* Enable seq for slave with with DP_TP_CTL left Idle until the
+	 * master is ready
+	 */
+	intel_crtc_enable_trans_port_sync(slave_crtc,
+					  state,
+					  new_slave_crtc_state);
+
+	/* Enable seq for master with with DP_TP_CTL left Idle */
+	intel_crtc_enable_trans_port_sync(crtc,
+					  state,
+					  new_crtc_state);
+
+	/* Set Slave's DP_TP_CTL to Normal */
+	intel_set_dp_tp_ctl_normal(slave_crtc,
+				   state);
+
+	/* Set Master's DP_TP_CTL To Normal */
+	usleep_range(200, 400);
+	intel_set_dp_tp_ctl_normal(crtc,
+				   state);
+
+	/* Now do the post crtc enable for all master and slaves */
+	intel_post_crtc_enable_updates(slave_crtc,
+				       state);
+	intel_post_crtc_enable_updates(crtc,
+				       state);
+}
+
+static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	struct intel_crtc *crtc;
 	struct intel_crtc_state *old_crtc_state, *new_crtc_state;
-	unsigned int updated = 0;
-	bool progress;
-	enum pipe pipe;
-	int i;
 	u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
 	u8 required_slices = state->wm_results.ddb.enabled_slices;
 	struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
+	const u8 num_pipes = INTEL_NUM_PIPES(dev_priv);
+	u8 update_pipes = 0, modeset_pipes = 0;
+	int i;
+
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+		if (!new_crtc_state->hw.active)
+			continue;
 
-	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i)
 		/* ignore allocations for crtc's that have been turned off. */
-		if (new_crtc_state->base.active)
+		if (!needs_modeset(new_crtc_state)) {
 			entries[i] = old_crtc_state->wm.skl.ddb;
+			update_pipes |= BIT(crtc->pipe);
+		} else {
+			modeset_pipes |= BIT(crtc->pipe);
+		}
+	}
 
 	/* If 2nd DBuf slice required, enable it here */
 	if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices)
@@ -13809,28 +15149,29 @@ static void skl_update_crtcs(struct intel_atomic_state *state)
 	/*
 	 * Whenever the number of active pipes changes, we need to make sure we
 	 * update the pipes in the right order so that their ddb allocations
-	 * never overlap with eachother inbetween CRTC updates. Otherwise we'll
+	 * never overlap with each other between CRTC updates. Otherwise we'll
 	 * cause pipe underruns and other bad stuff.
+	 *
+	 * So first lets enable all pipes that do not need a fullmodeset as
+	 * those don't have any external dependency.
 	 */
-	do {
-		progress = false;
+	while (update_pipes) {
+		for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+						    new_crtc_state, i) {
+			enum pipe pipe = crtc->pipe;
 
-		for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
-			bool vbl_wait = false;
-			unsigned int cmask = drm_crtc_mask(&crtc->base);
-
-			pipe = crtc->pipe;
-
-			if (updated & cmask || !new_crtc_state->base.active)
+			if ((update_pipes & BIT(pipe)) == 0)
 				continue;
 
 			if (skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb,
-							entries,
-							INTEL_INFO(dev_priv)->num_pipes, i))
+							entries, num_pipes, i))
 				continue;
 
-			updated |= cmask;
 			entries[i] = new_crtc_state->wm.skl.ddb;
+			update_pipes &= ~BIT(pipe);
+
+			intel_update_crtc(crtc, state, old_crtc_state,
+					  new_crtc_state);
 
 			/*
 			 * If this is an already active pipe, it's DDB changed,
@@ -13840,19 +15181,71 @@ static void skl_update_crtcs(struct intel_atomic_state *state)
 			 */
 			if (!skl_ddb_entry_equal(&new_crtc_state->wm.skl.ddb,
 						 &old_crtc_state->wm.skl.ddb) &&
-			    !new_crtc_state->base.active_changed &&
-			    state->wm_results.dirty_pipes != updated)
-				vbl_wait = true;
+			    (update_pipes | modeset_pipes))
+				intel_wait_for_vblank(dev_priv, pipe);
+		}
+	}
+
+	/*
+	 * Enable all pipes that needs a modeset and do not depends on other
+	 * pipes
+	 */
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		enum pipe pipe = crtc->pipe;
 
+		if ((modeset_pipes & BIT(pipe)) == 0)
+			continue;
+
+		if (intel_dp_mst_is_slave_trans(new_crtc_state) ||
+		    is_trans_port_sync_slave(new_crtc_state))
+			continue;
+
+		WARN_ON(skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb,
+						    entries, num_pipes, i));
+
+		entries[i] = new_crtc_state->wm.skl.ddb;
+		modeset_pipes &= ~BIT(pipe);
+
+		if (is_trans_port_sync_mode(new_crtc_state)) {
+			struct intel_crtc *slave_crtc;
+
+			intel_update_trans_port_sync_crtcs(crtc, state,
+							   old_crtc_state,
+							   new_crtc_state);
+
+			slave_crtc = intel_get_slave_crtc(new_crtc_state);
+			/* TODO: update entries[] of slave */
+			modeset_pipes &= ~BIT(slave_crtc->pipe);
+
+		} else {
 			intel_update_crtc(crtc, state, old_crtc_state,
 					  new_crtc_state);
+		}
+	}
 
-			if (vbl_wait)
-				intel_wait_for_vblank(dev_priv, pipe);
+	/*
+	 * Finally enable all pipes that needs a modeset and depends on
+	 * other pipes, right now it is only MST slaves as both port sync slave
+	 * and master are enabled together
+	 */
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		enum pipe pipe = crtc->pipe;
 
-			progress = true;
-		}
-	} while (progress);
+		if ((modeset_pipes & BIT(pipe)) == 0)
+			continue;
+
+		WARN_ON(skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb,
+						    entries, num_pipes, i));
+
+		entries[i] = new_crtc_state->wm.skl.ddb;
+		modeset_pipes &= ~BIT(pipe);
+
+		intel_update_crtc(crtc, state, old_crtc_state, new_crtc_state);
+	}
+
+	WARN_ON(modeset_pipes);
 
 	/* If 2nd DBuf slice is no more required disable it */
 	if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices)
@@ -13934,49 +15327,18 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 	if (state->modeset)
 		wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
 
-	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
 		if (needs_modeset(new_crtc_state) ||
 		    new_crtc_state->update_pipe) {
 
 			put_domains[crtc->pipe] =
 				modeset_get_crtc_power_domains(new_crtc_state);
 		}
-
-		if (!needs_modeset(new_crtc_state))
-			continue;
-
-		intel_pre_plane_update(old_crtc_state, new_crtc_state);
-
-		if (old_crtc_state->base.active) {
-			intel_crtc_disable_planes(state, crtc);
-
-			/*
-			 * We need to disable pipe CRC before disabling the pipe,
-			 * or we race against vblank off.
-			 */
-			intel_crtc_disable_pipe_crc(crtc);
-
-			dev_priv->display.crtc_disable(old_crtc_state, state);
-			crtc->active = false;
-			intel_fbc_disable(crtc);
-			intel_disable_shared_dpll(old_crtc_state);
-
-			/*
-			 * Underruns don't always raise
-			 * interrupts, so check manually.
-			 */
-			intel_check_cpu_fifo_underruns(dev_priv);
-			intel_check_pch_fifo_underruns(dev_priv);
-
-			/* FIXME unify this for all platforms */
-			if (!new_crtc_state->base.active &&
-			    !HAS_GMCH(dev_priv) &&
-			    dev_priv->display.initial_watermarks)
-				dev_priv->display.initial_watermarks(state,
-								     new_crtc_state);
-		}
 	}
 
+	intel_commit_modeset_disables(state);
+
 	/* FIXME: Eventually get rid of our crtc->config pointer */
 	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i)
 		crtc->config = new_crtc_state;
@@ -14004,12 +15366,13 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 		bool modeset = needs_modeset(new_crtc_state);
 
 		/* Complete events for now disable pipes here. */
-		if (modeset && !new_crtc_state->base.active && new_crtc_state->base.event) {
+		if (modeset && !new_crtc_state->hw.active && new_crtc_state->uapi.event) {
 			spin_lock_irq(&dev->event_lock);
-			drm_crtc_send_vblank_event(&crtc->base, new_crtc_state->base.event);
+			drm_crtc_send_vblank_event(&crtc->base,
+						   new_crtc_state->uapi.event);
 			spin_unlock_irq(&dev->event_lock);
 
-			new_crtc_state->base.event = NULL;
+			new_crtc_state->uapi.event = NULL;
 		}
 	}
 
@@ -14017,7 +15380,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 		intel_encoders_update_prepare(state);
 
 	/* Now enable the clocks, plane, pipe, and connectors that we set up. */
-	dev_priv->display.update_crtcs(state);
+	dev_priv->display.commit_modeset_enables(state);
 
 	if (state->modeset) {
 		intel_encoders_update_complete(state);
@@ -14040,10 +15403,10 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 	drm_atomic_helper_wait_for_flip_done(dev, &state->base);
 
 	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
-		if (new_crtc_state->base.active &&
+		if (new_crtc_state->hw.active &&
 		    !needs_modeset(new_crtc_state) &&
 		    !new_crtc_state->preload_luts &&
-		    (new_crtc_state->base.color_mgmt_changed ||
+		    (new_crtc_state->uapi.color_mgmt_changed ||
 		     new_crtc_state->update_pipe))
 			intel_color_load_luts(new_crtc_state);
 	}
@@ -14055,14 +15418,25 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 	 *
 	 * TODO: Move this (and other cleanup) to an async worker eventually.
 	 */
-	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		/*
+		 * Gen2 reports pipe underruns whenever all planes are disabled.
+		 * So re-enable underrun reporting after some planes get enabled.
+		 *
+		 * We do this before .optimize_watermarks() so that we have a
+		 * chance of catching underruns with the intermediate watermarks
+		 * vs. the new plane configuration.
+		 */
+		if (IS_GEN(dev_priv, 2) && planes_enabling(old_crtc_state, new_crtc_state))
+			intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true);
+
 		if (dev_priv->display.optimize_watermarks)
-			dev_priv->display.optimize_watermarks(state,
-							      new_crtc_state);
+			dev_priv->display.optimize_watermarks(state, crtc);
 	}
 
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
-		intel_post_plane_update(old_crtc_state);
+		intel_post_plane_update(state, crtc);
 
 		if (put_domains[i])
 			modeset_put_power_domains(dev_priv, put_domains[i]);
@@ -14070,6 +15444,10 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 		intel_modeset_verify_crtc(crtc, state, old_crtc_state, new_crtc_state);
 	}
 
+	/* Underruns don't always raise interrupts, so check manually */
+	intel_check_cpu_fifo_underruns(dev_priv);
+	intel_check_pch_fifo_underruns(dev_priv);
+
 	if (state->modeset)
 		intel_verify_planes(state);
 
@@ -14143,11 +15521,19 @@ static void intel_atomic_track_fbs(struct intel_atomic_state *state)
 
 	for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state,
 					     new_plane_state, i)
-		intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->base.fb),
-					to_intel_frontbuffer(new_plane_state->base.fb),
+		intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->hw.fb),
+					to_intel_frontbuffer(new_plane_state->hw.fb),
 					plane->frontbuffer_bit);
 }
 
+static void assert_global_state_locked(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc;
+
+	for_each_intel_crtc(&dev_priv->drm, crtc)
+		drm_modeset_lock_assert_held(&crtc->base.mutex);
+}
+
 static int intel_atomic_commit(struct drm_device *dev,
 			       struct drm_atomic_state *_state,
 			       bool nonblock)
@@ -14213,12 +15599,14 @@ static int intel_atomic_commit(struct drm_device *dev,
 	intel_shared_dpll_swap_state(state);
 	intel_atomic_track_fbs(state);
 
-	if (state->modeset) {
+	if (state->global_state_changed) {
+		assert_global_state_locked(dev_priv);
+
 		memcpy(dev_priv->min_cdclk, state->min_cdclk,
 		       sizeof(state->min_cdclk));
 		memcpy(dev_priv->min_voltage_level, state->min_voltage_level,
 		       sizeof(state->min_voltage_level));
-		dev_priv->active_crtcs = state->active_crtcs;
+		dev_priv->active_pipes = state->active_pipes;
 		dev_priv->cdclk.force_min_cdclk = state->cdclk.force_min_cdclk;
 
 		intel_cdclk_swap_state(state);
@@ -14231,7 +15619,7 @@ static int intel_atomic_commit(struct drm_device *dev,
 	if (nonblock && state->modeset) {
 		queue_work(dev_priv->modeset_wq, &state->base.commit_work);
 	} else if (nonblock) {
-		queue_work(system_unbound_wq, &state->base.commit_work);
+		queue_work(dev_priv->flip_wq, &state->base.commit_work);
 	} else {
 		if (state->modeset)
 			flush_workqueue(dev_priv->modeset_wq);
@@ -14260,7 +15648,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait,
 	 * vblank without our intervention, so leave RPS alone.
 	 */
 	if (!i915_request_started(rq))
-		gen6_rps_boost(rq);
+		intel_rps_boost(rq);
 	i915_request_put(rq);
 
 	drm_crtc_vblank_put(wait->crtc);
@@ -14301,9 +15689,9 @@ static void add_rps_boost_after_vblank(struct drm_crtc *crtc,
 
 static int intel_plane_pin_fb(struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	struct drm_framebuffer *fb = plane_state->base.fb;
+	struct drm_framebuffer *fb = plane_state->hw.fb;
 	struct i915_vma *vma;
 
 	if (plane->id == PLANE_CURSOR &&
@@ -14341,7 +15729,7 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
 static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
 {
 	struct i915_sched_attr attr = {
-		.priority = I915_PRIORITY_DISPLAY,
+		.priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY),
 	};
 
 	i915_gem_object_wait_priority(obj, 0, &attr);
@@ -14350,25 +15738,25 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
 /**
  * intel_prepare_plane_fb - Prepare fb for usage on plane
  * @plane: drm plane to prepare for
- * @new_state: the plane state being prepared
+ * @_new_plane_state: the plane state being prepared
  *
  * Prepares a framebuffer for usage on a display plane.  Generally this
  * involves pinning the underlying object and updating the frontbuffer tracking
  * bits.  Some older platforms need special physical address handling for
  * cursor planes.
  *
- * Must be called with struct_mutex held.
- *
  * Returns 0 on success, negative error code on failure.
  */
 int
 intel_prepare_plane_fb(struct drm_plane *plane,
-		       struct drm_plane_state *new_state)
+		       struct drm_plane_state *_new_plane_state)
 {
+	struct intel_plane_state *new_plane_state =
+		to_intel_plane_state(_new_plane_state);
 	struct intel_atomic_state *intel_state =
-		to_intel_atomic_state(new_state->state);
+		to_intel_atomic_state(new_plane_state->uapi.state);
 	struct drm_i915_private *dev_priv = to_i915(plane->dev);
-	struct drm_framebuffer *fb = new_state->fb;
+	struct drm_framebuffer *fb = new_plane_state->hw.fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
 	int ret;
@@ -14399,9 +15787,9 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		}
 	}
 
-	if (new_state->fence) { /* explicit fencing */
+	if (new_plane_state->uapi.fence) { /* explicit fencing */
 		ret = i915_sw_fence_await_dma_fence(&intel_state->commit_ready,
-						    new_state->fence,
+						    new_plane_state->uapi.fence,
 						    I915_FENCE_TIMEOUT,
 						    GFP_KERNEL);
 		if (ret < 0)
@@ -14415,23 +15803,16 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	if (ret)
 		return ret;
 
-	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
-	if (ret) {
-		i915_gem_object_unpin_pages(obj);
-		return ret;
-	}
-
-	ret = intel_plane_pin_fb(to_intel_plane_state(new_state));
+	ret = intel_plane_pin_fb(new_plane_state);
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 	if (ret)
 		return ret;
 
 	fb_obj_bump_render_priority(obj);
-	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_DIRTYFB);
+	i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB);
 
-	if (!new_state->fence) { /* implicit fencing */
+	if (!new_plane_state->uapi.fence) { /* implicit fencing */
 		struct dma_fence *fence;
 
 		ret = i915_sw_fence_await_reservation(&intel_state->commit_ready,
@@ -14443,11 +15824,13 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 
 		fence = dma_resv_get_excl_rcu(obj->base.resv);
 		if (fence) {
-			add_rps_boost_after_vblank(new_state->crtc, fence);
+			add_rps_boost_after_vblank(new_plane_state->hw.crtc,
+						   fence);
 			dma_fence_put(fence);
 		}
 	} else {
-		add_rps_boost_after_vblank(new_state->crtc, new_state->fence);
+		add_rps_boost_after_vblank(new_plane_state->hw.crtc,
+					   new_plane_state->uapi.fence);
 	}
 
 	/*
@@ -14459,7 +15842,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	 * maximum clocks following a vblank miss (see do_rps_boost()).
 	 */
 	if (!intel_state->rps_interactive) {
-		intel_rps_mark_interactive(dev_priv, true);
+		intel_rps_mark_interactive(&dev_priv->gt.rps, true);
 		intel_state->rps_interactive = true;
 	}
 
@@ -14469,130 +15852,27 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 /**
  * intel_cleanup_plane_fb - Cleans up an fb after plane use
  * @plane: drm plane to clean up for
- * @old_state: the state from the previous modeset
+ * @_old_plane_state: the state from the previous modeset
  *
  * Cleans up a framebuffer that has just been removed from a plane.
- *
- * Must be called with struct_mutex held.
  */
 void
 intel_cleanup_plane_fb(struct drm_plane *plane,
-		       struct drm_plane_state *old_state)
+		       struct drm_plane_state *_old_plane_state)
 {
+	struct intel_plane_state *old_plane_state =
+		to_intel_plane_state(_old_plane_state);
 	struct intel_atomic_state *intel_state =
-		to_intel_atomic_state(old_state->state);
+		to_intel_atomic_state(old_plane_state->uapi.state);
 	struct drm_i915_private *dev_priv = to_i915(plane->dev);
 
 	if (intel_state->rps_interactive) {
-		intel_rps_mark_interactive(dev_priv, false);
+		intel_rps_mark_interactive(&dev_priv->gt.rps, false);
 		intel_state->rps_interactive = false;
 	}
 
 	/* Should only be called after a successful intel_prepare_plane_fb()! */
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	intel_plane_unpin_fb(to_intel_plane_state(old_state));
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-}
-
-int
-skl_max_scale(const struct intel_crtc_state *crtc_state,
-	      u32 pixel_format)
-{
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	int max_scale, mult;
-	int crtc_clock, max_dotclk, tmpclk1, tmpclk2;
-
-	if (!crtc_state->base.enable)
-		return DRM_PLANE_HELPER_NO_SCALING;
-
-	crtc_clock = crtc_state->base.adjusted_mode.crtc_clock;
-	max_dotclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk;
-
-	if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
-		max_dotclk *= 2;
-
-	if (WARN_ON_ONCE(!crtc_clock || max_dotclk < crtc_clock))
-		return DRM_PLANE_HELPER_NO_SCALING;
-
-	/*
-	 * skl max scale is lower of:
-	 *    close to 3 but not 3, -1 is for that purpose
-	 *            or
-	 *    cdclk/crtc_clock
-	 */
-	mult = is_planar_yuv_format(pixel_format) ? 2 : 3;
-	tmpclk1 = (1 << 16) * mult - 1;
-	tmpclk2 = (1 << 8) * ((max_dotclk << 8) / crtc_clock);
-	max_scale = min(tmpclk1, tmpclk2);
-
-	return max_scale;
-}
-
-static void intel_begin_crtc_commit(struct intel_atomic_state *state,
-				    struct intel_crtc *crtc)
-{
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	struct intel_crtc_state *old_crtc_state =
-		intel_atomic_get_old_crtc_state(state, crtc);
-	struct intel_crtc_state *new_crtc_state =
-		intel_atomic_get_new_crtc_state(state, crtc);
-	bool modeset = needs_modeset(new_crtc_state);
-
-	/* Perform vblank evasion around commit operation */
-	intel_pipe_update_start(new_crtc_state);
-
-	if (modeset)
-		goto out;
-
-	if (new_crtc_state->base.color_mgmt_changed ||
-	    new_crtc_state->update_pipe)
-		intel_color_commit(new_crtc_state);
-
-	if (new_crtc_state->update_pipe)
-		intel_update_pipe_config(old_crtc_state, new_crtc_state);
-	else if (INTEL_GEN(dev_priv) >= 9)
-		skl_detach_scalers(new_crtc_state);
-
-	if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
-		bdw_set_pipemisc(new_crtc_state);
-
-out:
-	if (dev_priv->display.atomic_update_watermarks)
-		dev_priv->display.atomic_update_watermarks(state,
-							   new_crtc_state);
-}
-
-void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc,
-				  struct intel_crtc_state *crtc_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-
-	if (!IS_GEN(dev_priv, 2))
-		intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true);
-
-	if (crtc_state->has_pch_encoder) {
-		enum pipe pch_transcoder =
-			intel_crtc_pch_transcoder(crtc);
-
-		intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true);
-	}
-}
-
-static void intel_finish_crtc_commit(struct intel_atomic_state *state,
-				     struct intel_crtc *crtc)
-{
-	struct intel_crtc_state *old_crtc_state =
-		intel_atomic_get_old_crtc_state(state, crtc);
-	struct intel_crtc_state *new_crtc_state =
-		intel_atomic_get_new_crtc_state(state, crtc);
-
-	intel_pipe_update_end(new_crtc_state);
-
-	if (new_crtc_state->update_pipe &&
-	    !needs_modeset(new_crtc_state) &&
-	    old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED)
-		intel_crtc_arm_fifo_underrun(crtc, new_crtc_state);
+	intel_plane_unpin_fb(old_plane_state);
 }
 
 /**
@@ -14647,8 +15927,13 @@ static bool i965_plane_format_mod_supported(struct drm_plane *_plane,
 	case DRM_FORMAT_RGB565:
 	case DRM_FORMAT_XRGB8888:
 	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_ABGR8888:
 	case DRM_FORMAT_XRGB2101010:
 	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_ARGB2101010:
+	case DRM_FORMAT_ABGR2101010:
+	case DRM_FORMAT_XBGR16161616F:
 		return modifier == DRM_FORMAT_MOD_LINEAR ||
 			modifier == I915_FORMAT_MOD_X_TILED;
 	default:
@@ -14682,8 +15967,8 @@ static const struct drm_plane_funcs i8xx_plane_funcs = {
 };
 
 static int
-intel_legacy_cursor_update(struct drm_plane *plane,
-			   struct drm_crtc *crtc,
+intel_legacy_cursor_update(struct drm_plane *_plane,
+			   struct drm_crtc *_crtc,
 			   struct drm_framebuffer *fb,
 			   int crtc_x, int crtc_y,
 			   unsigned int crtc_w, unsigned int crtc_h,
@@ -14691,11 +15976,13 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 			   u32 src_w, u32 src_h,
 			   struct drm_modeset_acquire_ctx *ctx)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
-	struct drm_plane_state *old_plane_state, *new_plane_state;
-	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct intel_plane *plane = to_intel_plane(_plane);
+	struct intel_crtc *crtc = to_intel_crtc(_crtc);
+	struct intel_plane_state *old_plane_state =
+		to_intel_plane_state(plane->base.state);
+	struct intel_plane_state *new_plane_state;
 	struct intel_crtc_state *crtc_state =
-		to_intel_crtc_state(crtc->state);
+		to_intel_crtc_state(crtc->base.state);
 	struct intel_crtc_state *new_crtc_state;
 	int ret;
 
@@ -14703,18 +15990,17 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 	 * When crtc is inactive or there is a modeset pending,
 	 * wait for it to complete in the slowpath
 	 */
-	if (!crtc_state->base.active || needs_modeset(crtc_state) ||
+	if (!crtc_state->hw.active || needs_modeset(crtc_state) ||
 	    crtc_state->update_pipe)
 		goto slow;
 
-	old_plane_state = plane->state;
 	/*
 	 * Don't do an async update if there is an outstanding commit modifying
 	 * the plane.  This prevents our async update's changes from getting
 	 * overridden by a previous synchronous update's state.
 	 */
-	if (old_plane_state->commit &&
-	    !try_wait_for_completion(&old_plane_state->commit->hw_done))
+	if (old_plane_state->uapi.commit &&
+	    !try_wait_for_completion(&old_plane_state->uapi.commit->hw_done))
 		goto slow;
 
 	/*
@@ -14722,56 +16008,52 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 	 * take the slowpath. Only changing fb or position should be
 	 * in the fastpath.
 	 */
-	if (old_plane_state->crtc != crtc ||
-	    old_plane_state->src_w != src_w ||
-	    old_plane_state->src_h != src_h ||
-	    old_plane_state->crtc_w != crtc_w ||
-	    old_plane_state->crtc_h != crtc_h ||
-	    !old_plane_state->fb != !fb)
+	if (old_plane_state->uapi.crtc != &crtc->base ||
+	    old_plane_state->uapi.src_w != src_w ||
+	    old_plane_state->uapi.src_h != src_h ||
+	    old_plane_state->uapi.crtc_w != crtc_w ||
+	    old_plane_state->uapi.crtc_h != crtc_h ||
+	    !old_plane_state->uapi.fb != !fb)
 		goto slow;
 
-	new_plane_state = intel_plane_duplicate_state(plane);
+	new_plane_state = to_intel_plane_state(intel_plane_duplicate_state(&plane->base));
 	if (!new_plane_state)
 		return -ENOMEM;
 
-	new_crtc_state = to_intel_crtc_state(intel_crtc_duplicate_state(crtc));
+	new_crtc_state = to_intel_crtc_state(intel_crtc_duplicate_state(&crtc->base));
 	if (!new_crtc_state) {
 		ret = -ENOMEM;
 		goto out_free;
 	}
 
-	drm_atomic_set_fb_for_plane(new_plane_state, fb);
+	drm_atomic_set_fb_for_plane(&new_plane_state->uapi, fb);
 
-	new_plane_state->src_x = src_x;
-	new_plane_state->src_y = src_y;
-	new_plane_state->src_w = src_w;
-	new_plane_state->src_h = src_h;
-	new_plane_state->crtc_x = crtc_x;
-	new_plane_state->crtc_y = crtc_y;
-	new_plane_state->crtc_w = crtc_w;
-	new_plane_state->crtc_h = crtc_h;
+	new_plane_state->uapi.src_x = src_x;
+	new_plane_state->uapi.src_y = src_y;
+	new_plane_state->uapi.src_w = src_w;
+	new_plane_state->uapi.src_h = src_h;
+	new_plane_state->uapi.crtc_x = crtc_x;
+	new_plane_state->uapi.crtc_y = crtc_y;
+	new_plane_state->uapi.crtc_w = crtc_w;
+	new_plane_state->uapi.crtc_h = crtc_h;
 
 	ret = intel_plane_atomic_check_with_state(crtc_state, new_crtc_state,
-						  to_intel_plane_state(old_plane_state),
-						  to_intel_plane_state(new_plane_state));
+						  old_plane_state, new_plane_state);
 	if (ret)
 		goto out_free;
 
-	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
+	ret = intel_plane_pin_fb(new_plane_state);
 	if (ret)
 		goto out_free;
 
-	ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state));
-	if (ret)
-		goto out_unlock;
-
-	intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP);
-	intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb),
-				to_intel_frontbuffer(fb),
-				intel_plane->frontbuffer_bit);
+	intel_frontbuffer_flush(to_intel_frontbuffer(new_plane_state->hw.fb),
+				ORIGIN_FLIP);
+	intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->hw.fb),
+				to_intel_frontbuffer(new_plane_state->hw.fb),
+				plane->frontbuffer_bit);
 
 	/* Swap plane state */
-	plane->state = new_plane_state;
+	plane->base.state = &new_plane_state->uapi;
 
 	/*
 	 * We cannot swap crtc_state as it may be in use by an atomic commit or
@@ -14785,27 +16067,24 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 	 */
 	crtc_state->active_planes = new_crtc_state->active_planes;
 
-	if (plane->state->visible)
-		intel_update_plane(intel_plane, crtc_state,
-				   to_intel_plane_state(plane->state));
+	if (new_plane_state->uapi.visible)
+		intel_update_plane(plane, crtc_state, new_plane_state);
 	else
-		intel_disable_plane(intel_plane, crtc_state);
+		intel_disable_plane(plane, crtc_state);
 
-	intel_plane_unpin_fb(to_intel_plane_state(old_plane_state));
+	intel_plane_unpin_fb(old_plane_state);
 
-out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 out_free:
 	if (new_crtc_state)
-		intel_crtc_destroy_state(crtc, &new_crtc_state->base);
+		intel_crtc_destroy_state(&crtc->base, &new_crtc_state->uapi);
 	if (ret)
-		intel_plane_destroy_state(plane, new_plane_state);
+		intel_plane_destroy_state(&plane->base, &new_plane_state->uapi);
 	else
-		intel_plane_destroy_state(plane, old_plane_state);
+		intel_plane_destroy_state(&plane->base, &old_plane_state->uapi);
 	return ret;
 
 slow:
-	return drm_atomic_helper_update_plane(plane, crtc, fb,
+	return drm_atomic_helper_update_plane(&plane->base, &crtc->base, fb,
 					      crtc_x, crtc_y, crtc_w, crtc_h,
 					      src_x, src_y, src_w, src_h, ctx);
 }
@@ -14843,10 +16122,9 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 	const struct drm_plane_funcs *plane_funcs;
 	unsigned int supported_rotations;
 	unsigned int possible_crtcs;
-	const u64 *modifiers;
 	const u32 *formats;
 	int num_formats;
-	int ret;
+	int ret, zpos;
 
 	if (INTEL_GEN(dev_priv) >= 9)
 		return skl_universal_plane_create(dev_priv, pipe,
@@ -14875,44 +16153,69 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 		fbc->possible_framebuffer_bits |= plane->frontbuffer_bit;
 	}
 
-	if (INTEL_GEN(dev_priv) >= 4) {
-		formats = i965_primary_formats;
-		num_formats = ARRAY_SIZE(i965_primary_formats);
-		modifiers = i9xx_format_modifiers;
-
-		plane->max_stride = i9xx_plane_max_stride;
-		plane->update_plane = i9xx_update_plane;
-		plane->disable_plane = i9xx_disable_plane;
-		plane->get_hw_state = i9xx_plane_get_hw_state;
-		plane->check_plane = i9xx_plane_check;
-
-		plane_funcs = &i965_plane_funcs;
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+		formats = vlv_primary_formats;
+		num_formats = ARRAY_SIZE(vlv_primary_formats);
+	} else if (INTEL_GEN(dev_priv) >= 4) {
+		/*
+		 * WaFP16GammaEnabling:ivb
+		 * "Workaround : When using the 64-bit format, the plane
+		 *  output on each color channel has one quarter amplitude.
+		 *  It can be brought up to full amplitude by using pipe
+		 *  gamma correction or pipe color space conversion to
+		 *  multiply the plane output by four."
+		 *
+		 * There is no dedicated plane gamma for the primary plane,
+		 * and using the pipe gamma/csc could conflict with other
+		 * planes, so we choose not to expose fp16 on IVB primary
+		 * planes. HSW primary planes no longer have this problem.
+		 */
+		if (IS_IVYBRIDGE(dev_priv)) {
+			formats = ivb_primary_formats;
+			num_formats = ARRAY_SIZE(ivb_primary_formats);
+		} else {
+			formats = i965_primary_formats;
+			num_formats = ARRAY_SIZE(i965_primary_formats);
+		}
 	} else {
 		formats = i8xx_primary_formats;
 		num_formats = ARRAY_SIZE(i8xx_primary_formats);
-		modifiers = i9xx_format_modifiers;
-
-		plane->max_stride = i9xx_plane_max_stride;
-		plane->update_plane = i9xx_update_plane;
-		plane->disable_plane = i9xx_disable_plane;
-		plane->get_hw_state = i9xx_plane_get_hw_state;
-		plane->check_plane = i9xx_plane_check;
+	}
 
+	if (INTEL_GEN(dev_priv) >= 4)
+		plane_funcs = &i965_plane_funcs;
+	else
 		plane_funcs = &i8xx_plane_funcs;
-	}
+
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		plane->min_cdclk = vlv_plane_min_cdclk;
+	else if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv))
+		plane->min_cdclk = hsw_plane_min_cdclk;
+	else if (IS_IVYBRIDGE(dev_priv))
+		plane->min_cdclk = ivb_plane_min_cdclk;
+	else
+		plane->min_cdclk = i9xx_plane_min_cdclk;
+
+	plane->max_stride = i9xx_plane_max_stride;
+	plane->update_plane = i9xx_update_plane;
+	plane->disable_plane = i9xx_disable_plane;
+	plane->get_hw_state = i9xx_plane_get_hw_state;
+	plane->check_plane = i9xx_plane_check;
 
 	possible_crtcs = BIT(pipe);
 
 	if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
 		ret = drm_universal_plane_init(&dev_priv->drm, &plane->base,
 					       possible_crtcs, plane_funcs,
-					       formats, num_formats, modifiers,
+					       formats, num_formats,
+					       i9xx_format_modifiers,
 					       DRM_PLANE_TYPE_PRIMARY,
 					       "primary %c", pipe_name(pipe));
 	else
 		ret = drm_universal_plane_init(&dev_priv->drm, &plane->base,
 					       possible_crtcs, plane_funcs,
-					       formats, num_formats, modifiers,
+					       formats, num_formats,
+					       i9xx_format_modifiers,
 					       DRM_PLANE_TYPE_PRIMARY,
 					       "plane %c",
 					       plane_name(plane->i9xx_plane));
@@ -14935,6 +16238,9 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 						   DRM_MODE_ROTATE_0,
 						   supported_rotations);
 
+	zpos = 0;
+	drm_plane_create_zpos_immutable_property(&plane->base, zpos);
+
 	drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs);
 
 	return plane;
@@ -14951,7 +16257,7 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv,
 {
 	unsigned int possible_crtcs;
 	struct intel_plane *cursor;
-	int ret;
+	int ret, zpos;
 
 	cursor = intel_plane_alloc();
 	if (IS_ERR(cursor))
@@ -15000,6 +16306,9 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv,
 						   DRM_MODE_ROTATE_0 |
 						   DRM_MODE_ROTATE_180);
 
+	zpos = RUNTIME_INFO(dev_priv)->num_sprites[pipe] + 1;
+	drm_plane_create_zpos_immutable_property(&cursor->base, zpos);
+
 	drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs);
 
 	return cursor;
@@ -15010,28 +16319,6 @@ fail:
 	return ERR_PTR(ret);
 }
 
-static void intel_crtc_init_scalers(struct intel_crtc *crtc,
-				    struct intel_crtc_state *crtc_state)
-{
-	struct intel_crtc_scaler_state *scaler_state =
-		&crtc_state->scaler_state;
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	int i;
-
-	crtc->num_scalers = RUNTIME_INFO(dev_priv)->num_scalers[crtc->pipe];
-	if (!crtc->num_scalers)
-		return;
-
-	for (i = 0; i < crtc->num_scalers; i++) {
-		struct intel_scaler *scaler = &scaler_state->scalers[i];
-
-		scaler->in_use = 0;
-		scaler->mode = 0;
-	}
-
-	scaler_state->scaler_id = -1;
-}
-
 #define INTEL_CRTC_FUNCS \
 	.gamma_set = drm_atomic_helper_legacy_gamma_set, \
 	.set_config = drm_atomic_helper_set_config, \
@@ -15075,12 +16362,12 @@ static const struct drm_crtc_funcs i965_crtc_funcs = {
 	.disable_vblank = i965_disable_vblank,
 };
 
-static const struct drm_crtc_funcs i945gm_crtc_funcs = {
+static const struct drm_crtc_funcs i915gm_crtc_funcs = {
 	INTEL_CRTC_FUNCS,
 
 	.get_vblank_counter = i915_get_vblank_counter,
-	.enable_vblank = i945gm_enable_vblank,
-	.disable_vblank = i945gm_disable_vblank,
+	.enable_vblank = i915gm_enable_vblank,
+	.disable_vblank = i915gm_disable_vblank,
 };
 
 static const struct drm_crtc_funcs i915_crtc_funcs = {
@@ -15099,33 +16386,53 @@ static const struct drm_crtc_funcs i8xx_crtc_funcs = {
 	.disable_vblank = i8xx_disable_vblank,
 };
 
+static struct intel_crtc *intel_crtc_alloc(void)
+{
+	struct intel_crtc_state *crtc_state;
+	struct intel_crtc *crtc;
+
+	crtc = kzalloc(sizeof(*crtc), GFP_KERNEL);
+	if (!crtc)
+		return ERR_PTR(-ENOMEM);
+
+	crtc_state = intel_crtc_state_alloc(crtc);
+	if (!crtc_state) {
+		kfree(crtc);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	crtc->base.state = &crtc_state->uapi;
+	crtc->config = crtc_state;
+
+	return crtc;
+}
+
+static void intel_crtc_free(struct intel_crtc *crtc)
+{
+	intel_crtc_destroy_state(&crtc->base, crtc->base.state);
+	kfree(crtc);
+}
+
 static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
 {
+	struct intel_plane *primary, *cursor;
 	const struct drm_crtc_funcs *funcs;
-	struct intel_crtc *intel_crtc;
-	struct intel_crtc_state *crtc_state = NULL;
-	struct intel_plane *primary = NULL;
-	struct intel_plane *cursor = NULL;
+	struct intel_crtc *crtc;
 	int sprite, ret;
 
-	intel_crtc = kzalloc(sizeof(*intel_crtc), GFP_KERNEL);
-	if (!intel_crtc)
-		return -ENOMEM;
+	crtc = intel_crtc_alloc();
+	if (IS_ERR(crtc))
+		return PTR_ERR(crtc);
 
-	crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL);
-	if (!crtc_state) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-	__drm_atomic_helper_crtc_reset(&intel_crtc->base, &crtc_state->base);
-	intel_crtc->config = crtc_state;
+	crtc->pipe = pipe;
+	crtc->num_scalers = RUNTIME_INFO(dev_priv)->num_scalers[pipe];
 
 	primary = intel_primary_plane_create(dev_priv, pipe);
 	if (IS_ERR(primary)) {
 		ret = PTR_ERR(primary);
 		goto fail;
 	}
-	intel_crtc->plane_ids_mask |= BIT(primary->id);
+	crtc->plane_ids_mask |= BIT(primary->id);
 
 	for_each_sprite(dev_priv, pipe, sprite) {
 		struct intel_plane *plane;
@@ -15135,7 +16442,7 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
 			ret = PTR_ERR(plane);
 			goto fail;
 		}
-		intel_crtc->plane_ids_mask |= BIT(plane->id);
+		crtc->plane_ids_mask |= BIT(plane->id);
 	}
 
 	cursor = intel_cursor_plane_create(dev_priv, pipe);
@@ -15143,7 +16450,7 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
 		ret = PTR_ERR(cursor);
 		goto fail;
 	}
-	intel_crtc->plane_ids_mask |= BIT(cursor->id);
+	crtc->plane_ids_mask |= BIT(cursor->id);
 
 	if (HAS_GMCH(dev_priv)) {
 		if (IS_CHERRYVIEW(dev_priv) ||
@@ -15151,8 +16458,8 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
 			funcs = &g4x_crtc_funcs;
 		else if (IS_GEN(dev_priv, 4))
 			funcs = &i965_crtc_funcs;
-		else if (IS_I945GM(dev_priv))
-			funcs = &i945gm_crtc_funcs;
+		else if (IS_I945GM(dev_priv) || IS_I915GM(dev_priv))
+			funcs = &i915gm_crtc_funcs;
 		else if (IS_GEN(dev_priv, 3))
 			funcs = &i915_crtc_funcs;
 		else
@@ -15164,44 +16471,32 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
 			funcs = &ilk_crtc_funcs;
 	}
 
-	ret = drm_crtc_init_with_planes(&dev_priv->drm, &intel_crtc->base,
+	ret = drm_crtc_init_with_planes(&dev_priv->drm, &crtc->base,
 					&primary->base, &cursor->base,
 					funcs, "pipe %c", pipe_name(pipe));
 	if (ret)
 		goto fail;
 
-	intel_crtc->pipe = pipe;
-
-	/* initialize shared scalers */
-	intel_crtc_init_scalers(intel_crtc, crtc_state);
-
 	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->pipe_to_crtc_mapping) ||
 	       dev_priv->pipe_to_crtc_mapping[pipe] != NULL);
-	dev_priv->pipe_to_crtc_mapping[pipe] = intel_crtc;
+	dev_priv->pipe_to_crtc_mapping[pipe] = crtc;
 
 	if (INTEL_GEN(dev_priv) < 9) {
 		enum i9xx_plane_id i9xx_plane = primary->i9xx_plane;
 
 		BUG_ON(i9xx_plane >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
 		       dev_priv->plane_to_crtc_mapping[i9xx_plane] != NULL);
-		dev_priv->plane_to_crtc_mapping[i9xx_plane] = intel_crtc;
+		dev_priv->plane_to_crtc_mapping[i9xx_plane] = crtc;
 	}
 
-	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
+	intel_color_init(crtc);
 
-	intel_color_init(intel_crtc);
-
-	WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe);
+	WARN_ON(drm_crtc_index(&crtc->base) != crtc->pipe);
 
 	return 0;
 
 fail:
-	/*
-	 * drm_mode_config_cleanup() will free up any
-	 * crtcs/planes already initialized.
-	 */
-	kfree(crtc_state);
-	kfree(intel_crtc);
+	intel_crtc_free(crtc);
 
 	return ret;
 }
@@ -15223,21 +16518,32 @@ int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static int intel_encoder_clones(struct intel_encoder *encoder)
+static u32 intel_encoder_possible_clones(struct intel_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct intel_encoder *source_encoder;
-	int index_mask = 0;
-	int entry = 0;
+	u32 possible_clones = 0;
 
 	for_each_intel_encoder(dev, source_encoder) {
 		if (encoders_cloneable(encoder, source_encoder))
-			index_mask |= (1 << entry);
+			possible_clones |= drm_encoder_mask(&source_encoder->base);
+	}
+
+	return possible_clones;
+}
+
+static u32 intel_encoder_possible_crtcs(struct intel_encoder *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct intel_crtc *crtc;
+	u32 possible_crtcs = 0;
 
-		entry++;
+	for_each_intel_crtc(dev, crtc) {
+		if (encoder->pipe_mask & BIT(crtc->pipe))
+			possible_crtcs |= drm_crtc_mask(&crtc->base);
 	}
 
-	return index_mask;
+	return possible_crtcs;
 }
 
 static bool ilk_has_edp_a(struct drm_i915_private *dev_priv)
@@ -15319,13 +16625,18 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
 
 	intel_pps_init(dev_priv);
 
-	if (!HAS_DISPLAY(dev_priv))
+	if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv))
 		return;
 
 	if (INTEL_GEN(dev_priv) >= 12) {
-		/* TODO: initialize TC ports as well */
 		intel_ddi_init(dev_priv, PORT_A);
 		intel_ddi_init(dev_priv, PORT_B);
+		intel_ddi_init(dev_priv, PORT_D);
+		intel_ddi_init(dev_priv, PORT_E);
+		intel_ddi_init(dev_priv, PORT_F);
+		intel_ddi_init(dev_priv, PORT_G);
+		intel_ddi_init(dev_priv, PORT_H);
+		intel_ddi_init(dev_priv, PORT_I);
 		icl_dsi_init(dev_priv);
 	} else if (IS_ELKHARTLAKE(dev_priv)) {
 		intel_ddi_init(dev_priv, PORT_A);
@@ -15535,9 +16846,10 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
 	intel_psr_init(dev_priv);
 
 	for_each_intel_encoder(&dev_priv->drm, encoder) {
-		encoder->base.possible_crtcs = encoder->crtc_mask;
+		encoder->base.possible_crtcs =
+			intel_encoder_possible_crtcs(encoder);
 		encoder->base.possible_clones =
-			intel_encoder_clones(encoder);
+			intel_encoder_possible_clones(encoder);
 	}
 
 	intel_init_pch_refclk(dev_priv);
@@ -15671,8 +16983,11 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	}
 
 	/* FIXME need to adjust LINOFF/TILEOFF accordingly. */
-	if (mode_cmd->offsets[0] != 0)
+	if (mode_cmd->offsets[0] != 0) {
+		DRM_DEBUG_KMS("plane 0 offset (0x%08x) must be 0\n",
+			      mode_cmd->offsets[0]);
 		goto err;
+	}
 
 	drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd);
 
@@ -15685,26 +17000,23 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		}
 
 		stride_alignment = intel_fb_stride_alignment(fb, i);
-
-		/*
-		 * Display WA #0531: skl,bxt,kbl,glk
-		 *
-		 * Render decompression and plane width > 3840
-		 * combined with horizontal panning requires the
-		 * plane stride to be a multiple of 4. We'll just
-		 * require the entire fb to accommodate that to avoid
-		 * potential runtime errors at plane configuration time.
-		 */
-		if (IS_GEN(dev_priv, 9) && i == 0 && fb->width > 3840 &&
-		    is_ccs_modifier(fb->modifier))
-			stride_alignment *= 4;
-
 		if (fb->pitches[i] & (stride_alignment - 1)) {
 			DRM_DEBUG_KMS("plane %d pitch (%d) must be at least %u byte aligned\n",
 				      i, fb->pitches[i], stride_alignment);
 			goto err;
 		}
 
+		if (is_gen12_ccs_plane(fb, i)) {
+			int ccs_aux_stride = gen12_ccs_aux_stride(fb, i);
+
+			if (fb->pitches[i] != ccs_aux_stride) {
+				DRM_DEBUG_KMS("ccs aux plane %d pitch (%d) must be %d\n",
+					      i,
+					      fb->pitches[i], ccs_aux_stride);
+				goto err;
+			}
+		}
+
 		fb->obj[i] = &obj->base;
 	}
 
@@ -15792,8 +17104,14 @@ intel_mode_valid(struct drm_device *dev,
 			   DRM_MODE_FLAG_CLKDIV2))
 		return MODE_BAD;
 
-	if (INTEL_GEN(dev_priv) >= 9 ||
-	    IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) {
+	/* Transcoder timing limits */
+	if (INTEL_GEN(dev_priv) >= 11) {
+		hdisplay_max = 16384;
+		vdisplay_max = 8192;
+		htotal_max = 16384;
+		vtotal_max = 8192;
+	} else if (INTEL_GEN(dev_priv) >= 9 ||
+		   IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) {
 		hdisplay_max = 8192; /* FDI max 4096 handled elsewhere */
 		vdisplay_max = 4096;
 		htotal_max = 8192;
@@ -15822,6 +17140,56 @@ intel_mode_valid(struct drm_device *dev,
 	    mode->vtotal > vtotal_max)
 		return MODE_V_ILLEGAL;
 
+	if (INTEL_GEN(dev_priv) >= 5) {
+		if (mode->hdisplay < 64 ||
+		    mode->htotal - mode->hdisplay < 32)
+			return MODE_H_ILLEGAL;
+
+		if (mode->vtotal - mode->vdisplay < 5)
+			return MODE_V_ILLEGAL;
+	} else {
+		if (mode->htotal - mode->hdisplay < 32)
+			return MODE_H_ILLEGAL;
+
+		if (mode->vtotal - mode->vdisplay < 3)
+			return MODE_V_ILLEGAL;
+	}
+
+	return MODE_OK;
+}
+
+enum drm_mode_status
+intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv,
+				const struct drm_display_mode *mode)
+{
+	int plane_width_max, plane_height_max;
+
+	/*
+	 * intel_mode_valid() should be
+	 * sufficient on older platforms.
+	 */
+	if (INTEL_GEN(dev_priv) < 9)
+		return MODE_OK;
+
+	/*
+	 * Most people will probably want a fullscreen
+	 * plane so let's not advertize modes that are
+	 * too big for that.
+	 */
+	if (INTEL_GEN(dev_priv) >= 11) {
+		plane_width_max = 5120;
+		plane_height_max = 4320;
+	} else {
+		plane_width_max = 5120;
+		plane_height_max = 4096;
+	}
+
+	if (mode->hdisplay > plane_width_max)
+		return MODE_H_ILLEGAL;
+
+	if (mode->vdisplay > plane_height_max)
+		return MODE_V_ILLEGAL;
+
 	return MODE_OK;
 }
 
@@ -15846,29 +17214,28 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv)
 	intel_init_cdclk_hooks(dev_priv);
 
 	if (INTEL_GEN(dev_priv) >= 9) {
-		dev_priv->display.get_pipe_config = haswell_get_pipe_config;
+		dev_priv->display.get_pipe_config = hsw_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
-			skylake_get_initial_plane_config;
-		dev_priv->display.crtc_compute_clock =
-			haswell_crtc_compute_clock;
-		dev_priv->display.crtc_enable = haswell_crtc_enable;
-		dev_priv->display.crtc_disable = haswell_crtc_disable;
+			skl_get_initial_plane_config;
+		dev_priv->display.crtc_compute_clock = hsw_crtc_compute_clock;
+		dev_priv->display.crtc_enable = hsw_crtc_enable;
+		dev_priv->display.crtc_disable = hsw_crtc_disable;
 	} else if (HAS_DDI(dev_priv)) {
-		dev_priv->display.get_pipe_config = haswell_get_pipe_config;
+		dev_priv->display.get_pipe_config = hsw_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
 			i9xx_get_initial_plane_config;
 		dev_priv->display.crtc_compute_clock =
-			haswell_crtc_compute_clock;
-		dev_priv->display.crtc_enable = haswell_crtc_enable;
-		dev_priv->display.crtc_disable = haswell_crtc_disable;
+			hsw_crtc_compute_clock;
+		dev_priv->display.crtc_enable = hsw_crtc_enable;
+		dev_priv->display.crtc_disable = hsw_crtc_disable;
 	} else if (HAS_PCH_SPLIT(dev_priv)) {
-		dev_priv->display.get_pipe_config = ironlake_get_pipe_config;
+		dev_priv->display.get_pipe_config = ilk_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
 			i9xx_get_initial_plane_config;
 		dev_priv->display.crtc_compute_clock =
-			ironlake_crtc_compute_clock;
-		dev_priv->display.crtc_enable = ironlake_crtc_enable;
-		dev_priv->display.crtc_disable = ironlake_crtc_disable;
+			ilk_crtc_compute_clock;
+		dev_priv->display.crtc_enable = ilk_crtc_enable;
+		dev_priv->display.crtc_disable = ilk_crtc_disable;
 	} else if (IS_CHERRYVIEW(dev_priv)) {
 		dev_priv->display.get_pipe_config = i9xx_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
@@ -15914,58 +17281,26 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv)
 	}
 
 	if (IS_GEN(dev_priv, 5)) {
-		dev_priv->display.fdi_link_train = ironlake_fdi_link_train;
+		dev_priv->display.fdi_link_train = ilk_fdi_link_train;
 	} else if (IS_GEN(dev_priv, 6)) {
 		dev_priv->display.fdi_link_train = gen6_fdi_link_train;
 	} else if (IS_IVYBRIDGE(dev_priv)) {
 		/* FIXME: detect B0+ stepping and use auto training */
 		dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train;
-	} else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
-		dev_priv->display.fdi_link_train = hsw_fdi_link_train;
 	}
 
 	if (INTEL_GEN(dev_priv) >= 9)
-		dev_priv->display.update_crtcs = skl_update_crtcs;
-	else
-		dev_priv->display.update_crtcs = intel_update_crtcs;
-}
-
-static i915_reg_t i915_vgacntrl_reg(struct drm_i915_private *dev_priv)
-{
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		return VLV_VGACNTRL;
-	else if (INTEL_GEN(dev_priv) >= 5)
-		return CPU_VGACNTRL;
+		dev_priv->display.commit_modeset_enables = skl_commit_modeset_enables;
 	else
-		return VGACNTRL;
-}
-
-/* Disable the VGA plane that we never use */
-static void i915_disable_vga(struct drm_i915_private *dev_priv)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	u8 sr1;
-	i915_reg_t vga_reg = i915_vgacntrl_reg(dev_priv);
+		dev_priv->display.commit_modeset_enables = intel_commit_modeset_enables;
 
-	/* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */
-	vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO);
-	outb(SR01, VGA_SR_INDEX);
-	sr1 = inb(VGA_SR_DATA);
-	outb(sr1 | 1<<5, VGA_SR_DATA);
-	vga_put(pdev, VGA_RSRC_LEGACY_IO);
-	udelay(300);
-
-	I915_WRITE(vga_reg, VGA_DISP_DISABLE);
-	POSTING_READ(vga_reg);
 }
 
-void intel_modeset_init_hw(struct drm_device *dev)
+void intel_modeset_init_hw(struct drm_i915_private *i915)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	intel_update_cdclk(dev_priv);
-	intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK");
-	dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw;
+	intel_update_cdclk(i915);
+	intel_dump_cdclk_state(&i915->cdclk.hw, "Current CDCLK");
+	i915->cdclk.logical = i915->cdclk.actual = i915->cdclk.hw;
 }
 
 /*
@@ -16041,7 +17376,7 @@ retry:
 	/* Write calculated watermark values back */
 	for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
 		crtc_state->wm.need_postvbl_update = true;
-		dev_priv->display.optimize_watermarks(intel_state, crtc_state);
+		dev_priv->display.optimize_watermarks(intel_state, crtc);
 
 		to_intel_crtc_state(crtc->base.state)->wm = crtc_state->wm;
 	}
@@ -16073,8 +17408,7 @@ static int intel_initial_commit(struct drm_device *dev)
 {
 	struct drm_atomic_state *state = NULL;
 	struct drm_modeset_acquire_ctx ctx;
-	struct drm_crtc *crtc;
-	struct drm_crtc_state *crtc_state;
+	struct intel_crtc *crtc;
 	int ret = 0;
 
 	state = drm_atomic_state_alloc(dev);
@@ -16086,15 +17420,17 @@ static int intel_initial_commit(struct drm_device *dev)
 retry:
 	state->acquire_ctx = &ctx;
 
-	drm_for_each_crtc(crtc, dev) {
-		crtc_state = drm_atomic_get_crtc_state(state, crtc);
+	for_each_intel_crtc(dev, crtc) {
+		struct intel_crtc_state *crtc_state =
+			intel_atomic_get_crtc_state(state, crtc);
+
 		if (IS_ERR(crtc_state)) {
 			ret = PTR_ERR(crtc_state);
 			goto out;
 		}
 
-		if (crtc_state->active) {
-			ret = drm_atomic_add_affected_planes(state, crtc);
+		if (crtc_state->hw.active) {
+			ret = drm_atomic_add_affected_planes(state, &crtc->base);
 			if (ret)
 				goto out;
 
@@ -16104,7 +17440,7 @@ retry:
 			 * having a proper LUT loaded. Remove once we
 			 * have readout for pipe gamma enable.
 			 */
-			crtc_state->color_mgmt_changed = true;
+			crtc_state->uapi.color_mgmt_changed = true;
 		}
 	}
 
@@ -16125,114 +17461,111 @@ out:
 	return ret;
 }
 
-int intel_modeset_init(struct drm_device *dev)
+static void intel_mode_config_init(struct drm_i915_private *i915)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	enum pipe pipe;
-	struct intel_crtc *crtc;
-	int ret;
-
-	dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0);
-
-	drm_mode_config_init(dev);
-
-	ret = intel_bw_init(dev_priv);
-	if (ret)
-		return ret;
-
-	dev->mode_config.min_width = 0;
-	dev->mode_config.min_height = 0;
-
-	dev->mode_config.preferred_depth = 24;
-	dev->mode_config.prefer_shadow = 1;
-
-	dev->mode_config.allow_fb_modifiers = true;
+	struct drm_mode_config *mode_config = &i915->drm.mode_config;
 
-	dev->mode_config.funcs = &intel_mode_funcs;
+	drm_mode_config_init(&i915->drm);
 
-	init_llist_head(&dev_priv->atomic_helper.free_list);
-	INIT_WORK(&dev_priv->atomic_helper.free_work,
-		  intel_atomic_helper_free_state_worker);
-
-	intel_init_quirks(dev_priv);
+	mode_config->min_width = 0;
+	mode_config->min_height = 0;
 
-	intel_fbc_init(dev_priv);
+	mode_config->preferred_depth = 24;
+	mode_config->prefer_shadow = 1;
 
-	intel_init_pm(dev_priv);
-
-	/*
-	 * There may be no VBT; and if the BIOS enabled SSC we can
-	 * just keep using it to avoid unnecessary flicker.  Whereas if the
-	 * BIOS isn't using it, don't assume it will work even if the VBT
-	 * indicates as much.
-	 */
-	if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) {
-		bool bios_lvds_use_ssc = !!(I915_READ(PCH_DREF_CONTROL) &
-					    DREF_SSC1_ENABLE);
+	mode_config->allow_fb_modifiers = true;
 
-		if (dev_priv->vbt.lvds_use_ssc != bios_lvds_use_ssc) {
-			DRM_DEBUG_KMS("SSC %sabled by BIOS, overriding VBT which says %sabled\n",
-				     bios_lvds_use_ssc ? "en" : "dis",
-				     dev_priv->vbt.lvds_use_ssc ? "en" : "dis");
-			dev_priv->vbt.lvds_use_ssc = bios_lvds_use_ssc;
-		}
-	}
+	mode_config->funcs = &intel_mode_funcs;
 
 	/*
 	 * Maximum framebuffer dimensions, chosen to match
 	 * the maximum render engine surface size on gen4+.
 	 */
-	if (INTEL_GEN(dev_priv) >= 7) {
-		dev->mode_config.max_width = 16384;
-		dev->mode_config.max_height = 16384;
-	} else if (INTEL_GEN(dev_priv) >= 4) {
-		dev->mode_config.max_width = 8192;
-		dev->mode_config.max_height = 8192;
-	} else if (IS_GEN(dev_priv, 3)) {
-		dev->mode_config.max_width = 4096;
-		dev->mode_config.max_height = 4096;
+	if (INTEL_GEN(i915) >= 7) {
+		mode_config->max_width = 16384;
+		mode_config->max_height = 16384;
+	} else if (INTEL_GEN(i915) >= 4) {
+		mode_config->max_width = 8192;
+		mode_config->max_height = 8192;
+	} else if (IS_GEN(i915, 3)) {
+		mode_config->max_width = 4096;
+		mode_config->max_height = 4096;
 	} else {
-		dev->mode_config.max_width = 2048;
-		dev->mode_config.max_height = 2048;
+		mode_config->max_width = 2048;
+		mode_config->max_height = 2048;
 	}
 
-	if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) {
-		dev->mode_config.cursor_width = IS_I845G(dev_priv) ? 64 : 512;
-		dev->mode_config.cursor_height = 1023;
-	} else if (IS_GEN(dev_priv, 2)) {
-		dev->mode_config.cursor_width = 64;
-		dev->mode_config.cursor_height = 64;
+	if (IS_I845G(i915) || IS_I865G(i915)) {
+		mode_config->cursor_width = IS_I845G(i915) ? 64 : 512;
+		mode_config->cursor_height = 1023;
+	} else if (IS_GEN(i915, 2)) {
+		mode_config->cursor_width = 64;
+		mode_config->cursor_height = 64;
 	} else {
-		dev->mode_config.cursor_width = 256;
-		dev->mode_config.cursor_height = 256;
+		mode_config->cursor_width = 256;
+		mode_config->cursor_height = 256;
 	}
+}
 
-	DRM_DEBUG_KMS("%d display pipe%s available.\n",
-		      INTEL_INFO(dev_priv)->num_pipes,
-		      INTEL_INFO(dev_priv)->num_pipes > 1 ? "s" : "");
+int intel_modeset_init(struct drm_i915_private *i915)
+{
+	struct drm_device *dev = &i915->drm;
+	enum pipe pipe;
+	struct intel_crtc *crtc;
+	int ret;
 
-	for_each_pipe(dev_priv, pipe) {
-		ret = intel_crtc_init(dev_priv, pipe);
-		if (ret) {
-			drm_mode_config_cleanup(dev);
-			return ret;
+	i915->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0);
+	i915->flip_wq = alloc_workqueue("i915_flip", WQ_HIGHPRI |
+					WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE);
+
+	intel_mode_config_init(i915);
+
+	ret = intel_bw_init(i915);
+	if (ret)
+		return ret;
+
+	init_llist_head(&i915->atomic_helper.free_list);
+	INIT_WORK(&i915->atomic_helper.free_work,
+		  intel_atomic_helper_free_state_worker);
+
+	intel_init_quirks(i915);
+
+	intel_fbc_init(i915);
+
+	intel_init_pm(i915);
+
+	intel_panel_sanitize_ssc(i915);
+
+	intel_gmbus_setup(i915);
+
+	DRM_DEBUG_KMS("%d display pipe%s available.\n",
+		      INTEL_NUM_PIPES(i915),
+		      INTEL_NUM_PIPES(i915) > 1 ? "s" : "");
+
+	if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) {
+		for_each_pipe(i915, pipe) {
+			ret = intel_crtc_init(i915, pipe);
+			if (ret) {
+				drm_mode_config_cleanup(dev);
+				return ret;
+			}
 		}
 	}
 
 	intel_shared_dpll_init(dev);
-	intel_update_fdi_pll_freq(dev_priv);
+	intel_update_fdi_pll_freq(i915);
 
-	intel_update_czclk(dev_priv);
-	intel_modeset_init_hw(dev);
+	intel_update_czclk(i915);
+	intel_modeset_init_hw(i915);
 
-	intel_hdcp_component_init(dev_priv);
+	intel_hdcp_component_init(i915);
 
-	if (dev_priv->max_cdclk_freq == 0)
-		intel_update_max_cdclk(dev_priv);
+	if (i915->max_cdclk_freq == 0)
+		intel_update_max_cdclk(i915);
 
 	/* Just disable it once at startup */
-	i915_disable_vga(dev_priv);
-	intel_setup_outputs(dev_priv);
+	intel_vga_disable(i915);
+	intel_setup_outputs(i915);
 
 	drm_modeset_lock_all(dev);
 	intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx);
@@ -16251,8 +17584,7 @@ int intel_modeset_init(struct drm_device *dev)
 		 * can even allow for smooth boot transitions if the BIOS
 		 * fb is large enough for the active pipe configuration.
 		 */
-		dev_priv->display.get_initial_plane_config(crtc,
-							   &plane_config);
+		i915->display.get_initial_plane_config(crtc, &plane_config);
 
 		/*
 		 * If the fb is shared between multiple heads, we'll
@@ -16266,7 +17598,7 @@ int intel_modeset_init(struct drm_device *dev)
 	 * Note that we need to do this after reconstructing the BIOS fb's
 	 * since the watermark calculation done here will use pstate->fb.
 	 */
-	if (!HAS_GMCH(dev_priv))
+	if (!HAS_GMCH(i915))
 		sanitize_watermarks(dev);
 
 	/*
@@ -16431,31 +17763,76 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
 		(HAS_PCH_LPT_H(dev_priv) && pch_transcoder == PIPE_A);
 }
 
-static void intel_sanitize_crtc(struct intel_crtc *crtc,
-				struct drm_modeset_acquire_ctx *ctx)
+static void intel_sanitize_frame_start_delay(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
 
-	/* Clear any frame start delays used for debugging left by the BIOS */
-	if (crtc->active && !transcoder_is_dsi(cpu_transcoder)) {
+	if (INTEL_GEN(dev_priv) >= 9 ||
+	    IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) {
+		i915_reg_t reg = CHICKEN_TRANS(cpu_transcoder);
+		u32 val;
+
+		if (transcoder_is_dsi(cpu_transcoder))
+			return;
+
+		val = I915_READ(reg);
+		val &= ~HSW_FRAME_START_DELAY_MASK;
+		val |= HSW_FRAME_START_DELAY(0);
+		I915_WRITE(reg, val);
+	} else {
 		i915_reg_t reg = PIPECONF(cpu_transcoder);
+		u32 val;
+
+		val = I915_READ(reg);
+		val &= ~PIPECONF_FRAME_START_DELAY_MASK;
+		val |= PIPECONF_FRAME_START_DELAY(0);
+		I915_WRITE(reg, val);
+	}
+
+	if (!crtc_state->has_pch_encoder)
+		return;
+
+	if (HAS_PCH_IBX(dev_priv)) {
+		i915_reg_t reg = PCH_TRANSCONF(crtc->pipe);
+		u32 val;
 
-		I915_WRITE(reg,
-			   I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK);
+		val = I915_READ(reg);
+		val &= ~TRANS_FRAME_START_DELAY_MASK;
+		val |= TRANS_FRAME_START_DELAY(0);
+		I915_WRITE(reg, val);
+	} else {
+		enum pipe pch_transcoder = intel_crtc_pch_transcoder(crtc);
+		i915_reg_t reg = TRANS_CHICKEN2(pch_transcoder);
+		u32 val;
+
+		val = I915_READ(reg);
+		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
+		val |= TRANS_CHICKEN2_FRAME_START_DELAY(0);
+		I915_WRITE(reg, val);
 	}
+}
+
+static void intel_sanitize_crtc(struct intel_crtc *crtc,
+				struct drm_modeset_acquire_ctx *ctx)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state);
 
-	if (crtc_state->base.active) {
+	if (crtc_state->hw.active) {
 		struct intel_plane *plane;
 
+		/* Clear any frame start delays used for debugging left by the BIOS */
+		intel_sanitize_frame_start_delay(crtc_state);
+
 		/* Disable everything but the primary plane */
 		for_each_intel_plane_on_crtc(dev, crtc, plane) {
 			const struct intel_plane_state *plane_state =
 				to_intel_plane_state(plane->base.state);
 
-			if (plane_state->base.visible &&
+			if (plane_state->uapi.visible &&
 			    plane->base.type != DRM_PLANE_TYPE_PRIMARY)
 				intel_plane_disable_noatomic(crtc, plane);
 		}
@@ -16472,10 +17849,10 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
 
 	/* Adjust the state of the output pipe according to whether we
 	 * have active connectors/encoders. */
-	if (crtc_state->base.active && !intel_crtc_has_encoders(crtc))
-		intel_crtc_disable_noatomic(&crtc->base, ctx);
+	if (crtc_state->hw.active && !intel_crtc_has_encoders(crtc))
+		intel_crtc_disable_noatomic(crtc, ctx);
 
-	if (crtc_state->base.active || HAS_GMCH(dev_priv)) {
+	if (crtc_state->hw.active || HAS_GMCH(dev_priv)) {
 		/*
 		 * We start out with underrun reporting disabled to avoid races.
 		 * For correct bookkeeping mark this on active crtcs.
@@ -16506,7 +17883,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
 
 static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
 	/*
 	 * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram
@@ -16519,7 +17896,7 @@ static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state)
 	 * road.
 	 */
 	return IS_GEN(dev_priv, 6) &&
-		crtc_state->base.active &&
+		crtc_state->hw.active &&
 		crtc_state->shared_dpll &&
 		crtc_state->port_clock == 0;
 }
@@ -16536,7 +17913,7 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 	 * encoder is active and trying to read from a pipe) and the
 	 * pipe itself being active. */
 	bool has_active_crtc = crtc_state &&
-		crtc_state->base.active;
+		crtc_state->hw.active;
 
 	if (crtc_state && has_bogus_dpll_config(crtc_state)) {
 		DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n",
@@ -16591,39 +17968,6 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 		icl_sanitize_encoder_pll_mapping(encoder);
 }
 
-void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv)
-{
-	i915_reg_t vga_reg = i915_vgacntrl_reg(dev_priv);
-
-	if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) {
-		DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n");
-		i915_disable_vga(dev_priv);
-	}
-}
-
-void i915_redisable_vga(struct drm_i915_private *dev_priv)
-{
-	intel_wakeref_t wakeref;
-
-	/*
-	 * This function can be called both from intel_modeset_setup_hw_state or
-	 * at a very early point in our resume sequence, where the power well
-	 * structures are not yet restored. Since this function is at a very
-	 * paranoid "someone might have enabled VGA while we were not looking"
-	 * level, just check if the power well is enabled instead of trying to
-	 * follow the "don't touch the power well if we don't need it" policy
-	 * the rest of the driver uses.
-	 */
-	wakeref = intel_display_power_get_if_enabled(dev_priv,
-						     POWER_DOMAIN_VGA);
-	if (!wakeref)
-		return;
-
-	i915_redisable_vga_power_on(dev_priv);
-
-	intel_display_power_put(dev_priv, POWER_DOMAIN_VGA, wakeref);
-}
-
 /* FIXME read out full plane state for all planes */
 static void readout_plane_state(struct drm_i915_private *dev_priv)
 {
@@ -16667,28 +18011,28 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 	struct drm_connector_list_iter conn_iter;
 	int i;
 
-	dev_priv->active_crtcs = 0;
+	dev_priv->active_pipes = 0;
 
 	for_each_intel_crtc(dev, crtc) {
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 
-		__drm_atomic_helper_crtc_destroy_state(&crtc_state->base);
-		memset(crtc_state, 0, sizeof(*crtc_state));
-		__drm_atomic_helper_crtc_reset(&crtc->base, &crtc_state->base);
+		__drm_atomic_helper_crtc_destroy_state(&crtc_state->uapi);
+		intel_crtc_free_hw_state(crtc_state);
+		intel_crtc_state_reset(crtc_state, crtc);
 
-		crtc_state->base.active = crtc_state->base.enable =
+		crtc_state->hw.active = crtc_state->hw.enable =
 			dev_priv->display.get_pipe_config(crtc, crtc_state);
 
-		crtc->base.enabled = crtc_state->base.enable;
-		crtc->active = crtc_state->base.active;
+		crtc->base.enabled = crtc_state->hw.enable;
+		crtc->active = crtc_state->hw.active;
 
-		if (crtc_state->base.active)
-			dev_priv->active_crtcs |= 1 << crtc->pipe;
+		if (crtc_state->hw.active)
+			dev_priv->active_pipes |= BIT(crtc->pipe);
 
 		DRM_DEBUG_KMS("[CRTC:%d:%s] hw state readout: %s\n",
 			      crtc->base.base.id, crtc->base.name,
-			      enableddisabled(crtc_state->base.active));
+			      enableddisabled(crtc_state->hw.active));
 	}
 
 	readout_plane_state(dev_priv);
@@ -16710,7 +18054,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 			struct intel_crtc_state *crtc_state =
 				to_intel_crtc_state(crtc->base.state);
 
-			if (crtc_state->base.active &&
+			if (crtc_state->hw.active &&
 			    crtc_state->shared_dpll == pll)
 				pll->state.crtc_mask |= 1 << crtc->pipe;
 		}
@@ -16744,24 +18088,28 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 	drm_connector_list_iter_begin(dev, &conn_iter);
 	for_each_intel_connector_iter(connector, &conn_iter) {
 		if (connector->get_hw_state(connector)) {
+			struct intel_crtc_state *crtc_state;
+			struct intel_crtc *crtc;
+
 			connector->base.dpms = DRM_MODE_DPMS_ON;
 
 			encoder = connector->encoder;
 			connector->base.encoder = &encoder->base;
 
-			if (encoder->base.crtc &&
-			    encoder->base.crtc->state->active) {
+			crtc = to_intel_crtc(encoder->base.crtc);
+			crtc_state = crtc ? to_intel_crtc_state(crtc->base.state) : NULL;
+
+			if (crtc_state && crtc_state->hw.active) {
 				/*
 				 * This has to be done during hardware readout
 				 * because anything calling .crtc_disable may
 				 * rely on the connector_mask being accurate.
 				 */
-				encoder->base.crtc->state->connector_mask |=
+				crtc_state->uapi.connector_mask |=
 					drm_connector_mask(&connector->base);
-				encoder->base.crtc->state->encoder_mask |=
+				crtc_state->uapi.encoder_mask |=
 					drm_encoder_mask(&encoder->base);
 			}
-
 		} else {
 			connector->base.dpms = DRM_MODE_DPMS_OFF;
 			connector->base.encoder = NULL;
@@ -16780,13 +18128,15 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 		struct intel_plane *plane;
 		int min_cdclk = 0;
 
-		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
-		if (crtc_state->base.active) {
-			intel_mode_from_pipe_config(&crtc->base.mode, crtc_state);
-			crtc->base.mode.hdisplay = crtc_state->pipe_src_w;
-			crtc->base.mode.vdisplay = crtc_state->pipe_src_h;
-			intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state);
-			WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode));
+		if (crtc_state->hw.active) {
+			struct drm_display_mode *mode = &crtc_state->hw.mode;
+
+			intel_mode_from_pipe_config(&crtc_state->hw.adjusted_mode,
+						    crtc_state);
+
+			*mode = crtc_state->hw.adjusted_mode;
+			mode->hdisplay = crtc_state->pipe_src_w;
+			mode->vdisplay = crtc_state->pipe_src_h;
 
 			/*
 			 * The initial mode needs to be set in order to keep
@@ -16797,25 +18147,15 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 			 * set a flag to indicate that a full recalculation is
 			 * needed on the next commit.
 			 */
-			crtc_state->base.mode.private_flags = I915_MODE_FLAG_INHERITED;
+			mode->private_flags = I915_MODE_FLAG_INHERITED;
 
 			intel_crtc_compute_pixel_rate(crtc_state);
 
-			if (dev_priv->display.modeset_calc_cdclk) {
-				min_cdclk = intel_crtc_compute_min_cdclk(crtc_state);
-				if (WARN_ON(min_cdclk < 0))
-					min_cdclk = 0;
-			}
+			intel_crtc_update_active_timings(crtc_state);
 
-			drm_calc_timestamping_constants(&crtc->base,
-							&crtc_state->base.adjusted_mode);
-			update_scanline_offset(crtc_state);
+			intel_crtc_copy_hw_to_uapi_state(crtc_state);
 		}
 
-		dev_priv->min_cdclk[crtc->pipe] = min_cdclk;
-		dev_priv->min_voltage_level[crtc->pipe] =
-			crtc_state->min_voltage_level;
-
 		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
 			const struct intel_plane_state *plane_state =
 				to_intel_plane_state(plane->base.state);
@@ -16824,11 +18164,37 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 			 * FIXME don't have the fb yet, so can't
 			 * use intel_plane_data_rate() :(
 			 */
-			if (plane_state->base.visible)
+			if (plane_state->uapi.visible)
 				crtc_state->data_rate[plane->id] =
 					4 * crtc_state->pixel_rate;
+			/*
+			 * FIXME don't have the fb yet, so can't
+			 * use plane->min_cdclk() :(
+			 */
+			if (plane_state->uapi.visible && plane->min_cdclk) {
+				if (crtc_state->double_wide ||
+				    INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+					crtc_state->min_cdclk[plane->id] =
+						DIV_ROUND_UP(crtc_state->pixel_rate, 2);
+				else
+					crtc_state->min_cdclk[plane->id] =
+						crtc_state->pixel_rate;
+			}
+			DRM_DEBUG_KMS("[PLANE:%d:%s] min_cdclk %d kHz\n",
+				      plane->base.base.id, plane->base.name,
+				      crtc_state->min_cdclk[plane->id]);
 		}
 
+		if (crtc_state->hw.active) {
+			min_cdclk = intel_crtc_compute_min_cdclk(crtc_state);
+			if (WARN_ON(min_cdclk < 0))
+				min_cdclk = 0;
+		}
+
+		dev_priv->min_cdclk[crtc->pipe] = min_cdclk;
+		dev_priv->min_voltage_level[crtc->pipe] =
+			crtc_state->min_voltage_level;
+
 		intel_bw_crtc_update(bw_state, crtc_state);
 
 		intel_pipe_config_sanity_check(dev_priv, crtc_state);
@@ -16860,8 +18226,11 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv)
 
 static void intel_early_display_was(struct drm_i915_private *dev_priv)
 {
-	/* Display WA #1185 WaDisableDARBFClkGating:cnl,glk */
-	if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
+	/*
+	 * Display WA #1185 WaDisableDARBFClkGating:cnl,glk,icl,ehl,tgl
+	 * Also known as Wa_14010480278.
+	 */
+	if (IS_GEN_RANGE(dev_priv, 10, 12) || IS_GEMINILAKE(dev_priv))
 		I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
 			   DARBF_GATING_DIS);
 
@@ -16942,7 +18311,6 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 			     struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc_state *crtc_state;
 	struct intel_encoder *encoder;
 	struct intel_crtc *crtc;
 	intel_wakeref_t wakeref;
@@ -16962,7 +18330,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 		/* We need to sanitize only the MST primary port. */
 		if (encoder->type != INTEL_OUTPUT_DP_MST &&
 		    intel_phy_is_tc(dev_priv, phy))
-			intel_tc_port_sanitize(enc_to_dig_port(&encoder->base));
+			intel_tc_port_sanitize(enc_to_dig_port(encoder));
 	}
 
 	get_encoder_power_domains(dev_priv);
@@ -16975,11 +18343,12 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 	 * waits, so we need vblank interrupts restored beforehand.
 	 */
 	for_each_intel_crtc(&dev_priv->drm, crtc) {
-		crtc_state = to_intel_crtc_state(crtc->base.state);
+		struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
 
 		drm_crtc_vblank_reset(&crtc->base);
 
-		if (crtc_state->base.active)
+		if (crtc_state->hw.active)
 			intel_crtc_vblank_on(crtc_state);
 	}
 
@@ -16989,7 +18358,9 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 		intel_sanitize_encoder(encoder);
 
 	for_each_intel_crtc(&dev_priv->drm, crtc) {
-		crtc_state = to_intel_crtc_state(crtc->base.state);
+		struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
+
 		intel_sanitize_crtc(crtc, ctx);
 		intel_dump_pipe_config(crtc_state, NULL, "[setup_hw_state]");
 	}
@@ -17022,17 +18393,16 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 	}
 
 	for_each_intel_crtc(dev, crtc) {
+		struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
 		u64 put_domains;
 
-		crtc_state = to_intel_crtc_state(crtc->base.state);
 		put_domains = modeset_get_crtc_power_domains(crtc_state);
 		if (WARN_ON(put_domains))
 			modeset_put_power_domains(dev_priv, put_domains);
 	}
 
 	intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref);
-
-	intel_fbc_init_pipe_state(dev_priv);
 }
 
 void intel_display_resume(struct drm_device *dev)
@@ -17069,13 +18439,13 @@ void intel_display_resume(struct drm_device *dev)
 		drm_atomic_state_put(state);
 }
 
-static void intel_hpd_poll_fini(struct drm_device *dev)
+static void intel_hpd_poll_fini(struct drm_i915_private *i915)
 {
 	struct intel_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 
 	/* Kill all the work that may have been queued by hpd. */
-	drm_connector_list_iter_begin(dev, &conn_iter);
+	drm_connector_list_iter_begin(&i915->drm, &conn_iter);
 	for_each_intel_connector_iter(connector, &conn_iter) {
 		if (connector->modeset_retry_work.func)
 			cancel_work_sync(&connector->modeset_retry_work);
@@ -17087,78 +18457,58 @@ static void intel_hpd_poll_fini(struct drm_device *dev)
 	drm_connector_list_iter_end(&conn_iter);
 }
 
-void intel_modeset_driver_remove(struct drm_device *dev)
+void intel_modeset_driver_remove(struct drm_i915_private *i915)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	flush_workqueue(dev_priv->modeset_wq);
+	flush_workqueue(i915->flip_wq);
+	flush_workqueue(i915->modeset_wq);
 
-	flush_work(&dev_priv->atomic_helper.free_work);
-	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
+	flush_work(&i915->atomic_helper.free_work);
+	WARN_ON(!llist_empty(&i915->atomic_helper.free_list));
 
 	/*
 	 * Interrupts and polling as the first thing to avoid creating havoc.
 	 * Too much stuff here (turning of connectors, ...) would
 	 * experience fancy races otherwise.
 	 */
-	intel_irq_uninstall(dev_priv);
+	intel_irq_uninstall(i915);
 
 	/*
 	 * Due to the hpd irq storm handling the hotplug work can re-arm the
 	 * poll handlers. Hence disable polling after hpd handling is shut down.
 	 */
-	intel_hpd_poll_fini(dev);
+	intel_hpd_poll_fini(i915);
+
+	/*
+	 * MST topology needs to be suspended so we don't have any calls to
+	 * fbdev after it's finalized. MST will be destroyed later as part of
+	 * drm_mode_config_cleanup()
+	 */
+	intel_dp_mst_suspend(i915);
 
 	/* poll work can call into fbdev, hence clean that up afterwards */
-	intel_fbdev_fini(dev_priv);
+	intel_fbdev_fini(i915);
 
 	intel_unregister_dsm_handler();
 
-	intel_fbc_global_disable(dev_priv);
+	intel_fbc_global_disable(i915);
 
 	/* flush any delayed tasks or pending work */
 	flush_scheduled_work();
 
-	intel_hdcp_component_fini(dev_priv);
+	intel_hdcp_component_fini(i915);
 
-	drm_mode_config_cleanup(dev);
+	drm_mode_config_cleanup(&i915->drm);
 
-	intel_overlay_cleanup(dev_priv);
+	intel_overlay_cleanup(i915);
 
-	intel_gmbus_teardown(dev_priv);
+	intel_gmbus_teardown(i915);
 
-	destroy_workqueue(dev_priv->modeset_wq);
+	intel_bw_cleanup(i915);
 
-	intel_fbc_cleanup_cfb(dev_priv);
-}
+	destroy_workqueue(i915->flip_wq);
+	destroy_workqueue(i915->modeset_wq);
 
-/*
- * set vga decode state - true == enable VGA decode
- */
-int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv, bool state)
-{
-	unsigned reg = INTEL_GEN(dev_priv) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL;
-	u16 gmch_ctrl;
-
-	if (pci_read_config_word(dev_priv->bridge_dev, reg, &gmch_ctrl)) {
-		DRM_ERROR("failed to read control word\n");
-		return -EIO;
-	}
-
-	if (!!(gmch_ctrl & INTEL_GMCH_VGA_DISABLE) == !state)
-		return 0;
-
-	if (state)
-		gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE;
-	else
-		gmch_ctrl |= INTEL_GMCH_VGA_DISABLE;
-
-	if (pci_write_config_word(dev_priv->bridge_dev, reg, gmch_ctrl)) {
-		DRM_ERROR("failed to write control word\n");
-		return -EIO;
-	}
-
-	return 0;
+	intel_fbc_cleanup_cfb(i915);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
@@ -17221,7 +18571,7 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv)
 
 	BUILD_BUG_ON(ARRAY_SIZE(transcoders) != ARRAY_SIZE(error->transcoder));
 
-	if (!HAS_DISPLAY(dev_priv))
+	if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv))
 		return NULL;
 
 	error = kzalloc(sizeof(*error), GFP_ATOMIC);
@@ -17300,7 +18650,7 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m,
 	if (!error)
 		return;
 
-	err_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev_priv)->num_pipes);
+	err_printf(m, "Num Pipes: %d\n", INTEL_NUM_PIPES(dev_priv));
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		err_printf(m, "PWR_WELL_CTL2: %08x\n",
 			   error->power_well_driver);
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 01fa87ad3270..028aab728514 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2006-2017 Intel Corporation
+ * Copyright © 2006-2019 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -32,8 +32,10 @@ enum link_m_n_set;
 struct dpll;
 struct drm_connector;
 struct drm_device;
+struct drm_display_mode;
 struct drm_encoder;
 struct drm_file;
+struct drm_format_info;
 struct drm_framebuffer;
 struct drm_i915_error_state_buf;
 struct drm_i915_gem_object;
@@ -52,6 +54,7 @@ struct intel_plane;
 struct intel_plane_state;
 struct intel_remapped_info;
 struct intel_rotation_info;
+struct intel_crtc_state;
 
 enum i915_gpio {
 	GPIOA,
@@ -91,6 +94,7 @@ enum pipe {
 #define pipe_name(p) ((p) + 'A')
 
 enum transcoder {
+	INVALID_TRANSCODER = -1,
 	/*
 	 * The following transcoders have a 1:1 transcoder -> pipe mapping,
 	 * keep their values fixed: the code assumes that TRANSCODER_A=0, the
@@ -182,6 +186,24 @@ enum plane_id {
 	for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \
 		for_each_if((__crtc)->plane_ids_mask & BIT(__p))
 
+enum port {
+	PORT_NONE = -1,
+
+	PORT_A = 0,
+	PORT_B,
+	PORT_C,
+	PORT_D,
+	PORT_E,
+	PORT_F,
+	PORT_G,
+	PORT_H,
+	PORT_I,
+
+	I915_MAX_PORTS
+};
+
+#define port_name(p) ((p) + 'A')
+
 /*
  * Ports identifier referenced from other drivers.
  * Expected to remain stable over time
@@ -251,6 +273,7 @@ enum aux_ch {
 	AUX_CH_D,
 	AUX_CH_E, /* ICL+ */
 	AUX_CH_F,
+	AUX_CH_G,
 };
 
 #define aux_ch_name(a) ((a) + 'A')
@@ -289,10 +312,10 @@ enum phy_fia {
 };
 
 #define for_each_pipe(__dev_priv, __p) \
-	for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++)
+	for ((__p) = 0; (__p) < INTEL_NUM_PIPES(__dev_priv); (__p)++)
 
 #define for_each_pipe_masked(__dev_priv, __p, __mask) \
-	for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \
+	for ((__p) = 0; (__p) < INTEL_NUM_PIPES(__dev_priv); (__p)++) \
 		for_each_if((__mask) & BIT(__p))
 
 #define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \
@@ -309,8 +332,11 @@ enum phy_fia {
 	     (__s) < RUNTIME_INFO(__dev_priv)->num_sprites[(__p)];	\
 	     (__s)++)
 
-#define for_each_port_masked(__port, __ports_mask) \
-	for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++)	\
+#define for_each_port(__port) \
+	for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++)
+
+#define for_each_port_masked(__port, __ports_mask)			\
+	for_each_port(__port)						\
 		for_each_if((__ports_mask) & BIT(__port))
 
 #define for_each_phy_masked(__phy, __phys_mask) \
@@ -330,7 +356,7 @@ enum phy_fia {
 			    &(dev)->mode_config.plane_list,		\
 			    base.head)					\
 		for_each_if((plane_mask) &				\
-			    drm_plane_mask(&intel_plane->base)))
+			    drm_plane_mask(&intel_plane->base))
 
 #define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane)	\
 	list_for_each_entry(intel_plane,				\
@@ -354,6 +380,13 @@ enum phy_fia {
 			    &(dev)->mode_config.encoder_list,	\
 			    base.head)
 
+#define for_each_intel_encoder_mask(dev, intel_encoder, encoder_mask)	\
+	list_for_each_entry(intel_encoder,				\
+			    &(dev)->mode_config.encoder_list,		\
+			    base.head)					\
+		for_each_if((encoder_mask) &				\
+			    drm_encoder_mask(&intel_encoder->base))
+
 #define for_each_intel_dp(dev, intel_encoder)			\
 	for_each_intel_encoder(dev, intel_encoder)		\
 		for_each_if(intel_encoder_is_dp(intel_encoder))
@@ -411,18 +444,49 @@ enum phy_fia {
 	     (__i)++) \
 		for_each_if(crtc)
 
+#define for_each_oldnew_intel_crtc_in_state_reverse(__state, crtc, old_crtc_state, new_crtc_state, __i) \
+	for ((__i) = (__state)->base.dev->mode_config.num_crtc - 1; \
+	     (__i) >= 0  && \
+	     ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \
+	      (old_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].old_state), \
+	      (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \
+	     (__i)--) \
+		for_each_if(crtc)
+
+#define intel_atomic_crtc_state_for_each_plane_state( \
+		  plane, plane_state, \
+		  crtc_state) \
+	for_each_intel_plane_mask(((crtc_state)->uapi.state->dev), (plane), \
+				((crtc_state)->uapi.plane_mask)) \
+		for_each_if ((plane_state = \
+			      to_intel_plane_state(__drm_atomic_get_current_plane_state((crtc_state)->uapi.state, &plane->base))))
+
+#define for_each_new_intel_connector_in_state(__state, connector, new_connector_state, __i) \
+	for ((__i) = 0; \
+	     (__i) < (__state)->base.num_connector; \
+	     (__i)++) \
+		for_each_if ((__state)->base.connectors[__i].ptr && \
+			     ((connector) = to_intel_connector((__state)->base.connectors[__i].ptr), \
+			     (new_connector_state) = to_intel_digital_connector_state((__state)->base.connectors[__i].new_state), 1))
+
 void intel_link_compute_m_n(u16 bpp, int nlanes,
 			    int pixel_clock, int link_clock,
 			    struct intel_link_m_n *m_n,
 			    bool constant_n, bool fec_enable);
 bool is_ccs_modifier(u64 modifier);
+int intel_main_to_aux_plane(const struct drm_framebuffer *fb, int main_plane);
 void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv);
 u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv,
 			      u32 pixel_format, u64 modifier);
 bool intel_plane_can_remap(const struct intel_plane_state *plane_state);
+enum drm_mode_status
+intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv,
+				const struct drm_display_mode *mode);
 enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port);
+bool is_trans_port_sync_mode(const struct intel_crtc_state *state);
 
 void intel_plane_destroy(struct drm_plane *plane);
+void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state);
 void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe);
 void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe);
 enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc);
@@ -455,16 +519,14 @@ enum tc_port intel_port_to_tc(struct drm_i915_private *dev_priv,
 			      enum port port);
 int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data,
 				      struct drm_file *file_priv);
-enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv,
-					     enum pipe pipe);
 u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc);
+void intel_crtc_vblank_off(const struct intel_crtc_state *crtc_state);
 
-int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp);
+int ilk_get_lanes_required(int target_clock, int link_bw, int bpp);
 void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
 			 struct intel_digital_port *dport,
 			 unsigned int expected_mask);
 int intel_get_load_detect_pipe(struct drm_connector *connector,
-			       const struct drm_display_mode *mode,
 			       struct intel_load_detect_pipe *old,
 			       struct drm_modeset_acquire_ctx *ctx);
 void intel_release_load_detect_pipe(struct drm_connector *connector,
@@ -499,14 +561,11 @@ void intel_dp_get_m_n(struct intel_crtc *crtc,
 		      struct intel_crtc_state *pipe_config);
 void intel_dp_set_m_n(const struct intel_crtc_state *crtc_state,
 		      enum link_m_n_set m_n);
-void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp,
-			       const struct intel_crtc_state *crtc_state);
 int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n);
 bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state,
 			struct dpll *best_clock);
 int chv_calc_dpll_params(int refclk, struct dpll *pll_clock);
 
-bool intel_crtc_active(struct intel_crtc *crtc);
 bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state);
 void hsw_enable_ips(const struct intel_crtc_state *crtc_state);
 void hsw_disable_ips(const struct intel_crtc_state *crtc_state);
@@ -520,8 +579,8 @@ void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc,
 
 u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center);
 int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
-int skl_max_scale(const struct intel_crtc_state *crtc_state,
-		  u32 pixel_format);
+void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state);
+void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state);
 u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state,
 			const struct intel_plane_state *plane_state);
 u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state);
@@ -543,14 +602,15 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv);
 void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
 				     struct intel_display_error_state *error);
 
+bool
+intel_format_info_is_yuv_semiplanar(const struct drm_format_info *info,
+				    uint64_t modifier);
+
 /* modesetting */
-void intel_modeset_init_hw(struct drm_device *dev);
-int intel_modeset_init(struct drm_device *dev);
-void intel_modeset_driver_remove(struct drm_device *dev);
-int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv, bool state);
+void intel_modeset_init_hw(struct drm_i915_private *i915);
+int intel_modeset_init(struct drm_i915_private *i915);
+void intel_modeset_driver_remove(struct drm_i915_private *i915);
 void intel_display_resume(struct drm_device *dev);
-void i915_redisable_vga(struct drm_i915_private *dev_priv);
-void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
 void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
 
 /* modesetting asserts */
@@ -567,9 +627,10 @@ void assert_fdi_rx_pll(struct drm_i915_private *dev_priv,
 		       enum pipe pipe, bool state);
 #define assert_fdi_rx_pll_enabled(d, p) assert_fdi_rx_pll(d, p, true)
 #define assert_fdi_rx_pll_disabled(d, p) assert_fdi_rx_pll(d, p, false)
-void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state);
-#define assert_pipe_enabled(d, p) assert_pipe(d, p, true)
-#define assert_pipe_disabled(d, p) assert_pipe(d, p, false)
+void assert_pipe(struct drm_i915_private *dev_priv,
+		 enum transcoder cpu_transcoder, bool state);
+#define assert_pipe_enabled(d, t) assert_pipe(d, t, true)
+#define assert_pipe_disabled(d, t) assert_pipe(d, t, false)
 
 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
  * WARN_ON()) for hw state sanity checks to check for unexpected conditions
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index c002f234ff31..21561acfa3ac 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -3,8 +3,6 @@
  * Copyright © 2019 Intel Corporation
  */
 
-#include <linux/vgaarb.h>
-
 #include "display/intel_crt.h"
 #include "display/intel_dp.h"
 
@@ -19,16 +17,14 @@
 #include "intel_hotplug.h"
 #include "intel_sideband.h"
 #include "intel_tc.h"
+#include "intel_vga.h"
 
 bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
 					 enum i915_power_well_id power_well_id);
 
 const char *
-intel_display_power_domain_str(struct drm_i915_private *i915,
-			       enum intel_display_power_domain domain)
+intel_display_power_domain_str(enum intel_display_power_domain domain)
 {
-	bool ddi_tc_ports = IS_GEN(i915, 12);
-
 	switch (domain) {
 	case POWER_DOMAIN_DISPLAY_CORE:
 		return "DISPLAY_CORE";
@@ -71,23 +67,17 @@ intel_display_power_domain_str(struct drm_i915_private *i915,
 	case POWER_DOMAIN_PORT_DDI_C_LANES:
 		return "PORT_DDI_C_LANES";
 	case POWER_DOMAIN_PORT_DDI_D_LANES:
-		BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_D_LANES !=
-			     POWER_DOMAIN_PORT_DDI_TC1_LANES);
-		return ddi_tc_ports ? "PORT_DDI_TC1_LANES" : "PORT_DDI_D_LANES";
+		return "PORT_DDI_D_LANES";
 	case POWER_DOMAIN_PORT_DDI_E_LANES:
-		BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_E_LANES !=
-			     POWER_DOMAIN_PORT_DDI_TC2_LANES);
-		return ddi_tc_ports ? "PORT_DDI_TC2_LANES" : "PORT_DDI_E_LANES";
+		return "PORT_DDI_E_LANES";
 	case POWER_DOMAIN_PORT_DDI_F_LANES:
-		BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_F_LANES !=
-			     POWER_DOMAIN_PORT_DDI_TC3_LANES);
-		return ddi_tc_ports ? "PORT_DDI_TC3_LANES" : "PORT_DDI_F_LANES";
-	case POWER_DOMAIN_PORT_DDI_TC4_LANES:
-		return "PORT_DDI_TC4_LANES";
-	case POWER_DOMAIN_PORT_DDI_TC5_LANES:
-		return "PORT_DDI_TC5_LANES";
-	case POWER_DOMAIN_PORT_DDI_TC6_LANES:
-		return "PORT_DDI_TC6_LANES";
+		return "PORT_DDI_F_LANES";
+	case POWER_DOMAIN_PORT_DDI_G_LANES:
+		return "PORT_DDI_G_LANES";
+	case POWER_DOMAIN_PORT_DDI_H_LANES:
+		return "PORT_DDI_H_LANES";
+	case POWER_DOMAIN_PORT_DDI_I_LANES:
+		return "PORT_DDI_I_LANES";
 	case POWER_DOMAIN_PORT_DDI_A_IO:
 		return "PORT_DDI_A_IO";
 	case POWER_DOMAIN_PORT_DDI_B_IO:
@@ -95,23 +85,17 @@ intel_display_power_domain_str(struct drm_i915_private *i915,
 	case POWER_DOMAIN_PORT_DDI_C_IO:
 		return "PORT_DDI_C_IO";
 	case POWER_DOMAIN_PORT_DDI_D_IO:
-		BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_D_IO !=
-			     POWER_DOMAIN_PORT_DDI_TC1_IO);
-		return ddi_tc_ports ? "PORT_DDI_TC1_IO" : "PORT_DDI_D_IO";
+		return "PORT_DDI_D_IO";
 	case POWER_DOMAIN_PORT_DDI_E_IO:
-		BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_E_IO !=
-			     POWER_DOMAIN_PORT_DDI_TC2_IO);
-		return ddi_tc_ports ? "PORT_DDI_TC2_IO" : "PORT_DDI_E_IO";
+		return "PORT_DDI_E_IO";
 	case POWER_DOMAIN_PORT_DDI_F_IO:
-		BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_F_IO !=
-			     POWER_DOMAIN_PORT_DDI_TC3_IO);
-		return ddi_tc_ports ? "PORT_DDI_TC3_IO" : "PORT_DDI_F_IO";
-	case POWER_DOMAIN_PORT_DDI_TC4_IO:
-		return "PORT_DDI_TC4_IO";
-	case POWER_DOMAIN_PORT_DDI_TC5_IO:
-		return "PORT_DDI_TC5_IO";
-	case POWER_DOMAIN_PORT_DDI_TC6_IO:
-		return "PORT_DDI_TC6_IO";
+		return "PORT_DDI_F_IO";
+	case POWER_DOMAIN_PORT_DDI_G_IO:
+		return "PORT_DDI_G_IO";
+	case POWER_DOMAIN_PORT_DDI_H_IO:
+		return "PORT_DDI_H_IO";
+	case POWER_DOMAIN_PORT_DDI_I_IO:
+		return "PORT_DDI_I_IO";
 	case POWER_DOMAIN_PORT_DSI:
 		return "PORT_DSI";
 	case POWER_DOMAIN_PORT_CRT:
@@ -129,34 +113,33 @@ intel_display_power_domain_str(struct drm_i915_private *i915,
 	case POWER_DOMAIN_AUX_C:
 		return "AUX_C";
 	case POWER_DOMAIN_AUX_D:
-		BUILD_BUG_ON(POWER_DOMAIN_AUX_D != POWER_DOMAIN_AUX_TC1);
-		return ddi_tc_ports ? "AUX_TC1" : "AUX_D";
+		return "AUX_D";
 	case POWER_DOMAIN_AUX_E:
-		BUILD_BUG_ON(POWER_DOMAIN_AUX_E != POWER_DOMAIN_AUX_TC2);
-		return ddi_tc_ports ? "AUX_TC2" : "AUX_E";
+		return "AUX_E";
 	case POWER_DOMAIN_AUX_F:
-		BUILD_BUG_ON(POWER_DOMAIN_AUX_F != POWER_DOMAIN_AUX_TC3);
-		return ddi_tc_ports ? "AUX_TC3" : "AUX_F";
-	case POWER_DOMAIN_AUX_TC4:
-		return "AUX_TC4";
-	case POWER_DOMAIN_AUX_TC5:
-		return "AUX_TC5";
-	case POWER_DOMAIN_AUX_TC6:
-		return "AUX_TC6";
+		return "AUX_F";
+	case POWER_DOMAIN_AUX_G:
+		return "AUX_G";
+	case POWER_DOMAIN_AUX_H:
+		return "AUX_H";
+	case POWER_DOMAIN_AUX_I:
+		return "AUX_I";
 	case POWER_DOMAIN_AUX_IO_A:
 		return "AUX_IO_A";
-	case POWER_DOMAIN_AUX_TBT1:
-		return "AUX_TBT1";
-	case POWER_DOMAIN_AUX_TBT2:
-		return "AUX_TBT2";
-	case POWER_DOMAIN_AUX_TBT3:
-		return "AUX_TBT3";
-	case POWER_DOMAIN_AUX_TBT4:
-		return "AUX_TBT4";
-	case POWER_DOMAIN_AUX_TBT5:
-		return "AUX_TBT5";
-	case POWER_DOMAIN_AUX_TBT6:
-		return "AUX_TBT6";
+	case POWER_DOMAIN_AUX_C_TBT:
+		return "AUX_C_TBT";
+	case POWER_DOMAIN_AUX_D_TBT:
+		return "AUX_D_TBT";
+	case POWER_DOMAIN_AUX_E_TBT:
+		return "AUX_E_TBT";
+	case POWER_DOMAIN_AUX_F_TBT:
+		return "AUX_F_TBT";
+	case POWER_DOMAIN_AUX_G_TBT:
+		return "AUX_G_TBT";
+	case POWER_DOMAIN_AUX_H_TBT:
+		return "AUX_H_TBT";
+	case POWER_DOMAIN_AUX_I_TBT:
+		return "AUX_I_TBT";
 	case POWER_DOMAIN_GMBUS:
 		return "GMBUS";
 	case POWER_DOMAIN_INIT:
@@ -283,23 +266,8 @@ bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
 static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv,
 				       u8 irq_pipe_mask, bool has_vga)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-
-	/*
-	 * After we re-enable the power well, if we touch VGA register 0x3d5
-	 * we'll get unclaimed register interrupts. This stops after we write
-	 * anything to the VGA MSR register. The vgacon module uses this
-	 * register all the time, so if we unbind our driver and, as a
-	 * consequence, bind vgacon, we'll get stuck in an infinite loop at
-	 * console_unlock(). So make here we touch the VGA MSR register, making
-	 * sure vgacon can keep working normally without triggering interrupts
-	 * and error messages.
-	 */
-	if (has_vga) {
-		vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO);
-		outb(inb(VGA_MSR_READ), VGA_MSR_WRITE);
-		vga_put(pdev, VGA_RSRC_LEGACY_IO);
-	}
+	if (has_vga)
+		intel_vga_reset_io_mem(dev_priv);
 
 	if (irq_pipe_mask)
 		gen8_irq_power_well_post_enable(dev_priv, irq_pipe_mask);
@@ -450,7 +418,8 @@ icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
 	int pw_idx = power_well->desc->hsw.idx;
 	enum phy phy = ICL_AUX_PW_TO_PHY(pw_idx);
 	u32 val;
-	int wa_idx_max;
+
+	WARN_ON(!IS_ICELAKE(dev_priv));
 
 	val = I915_READ(regs->driver);
 	I915_WRITE(regs->driver, val | HSW_PWR_WELL_CTL_REQ(pw_idx));
@@ -462,14 +431,8 @@ icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
 
 	hsw_wait_for_power_well_enable(dev_priv, power_well);
 
-	/* Display WA #1178: icl, tgl */
-	if (IS_TIGERLAKE(dev_priv))
-		wa_idx_max = ICL_PW_CTL_IDX_AUX_C;
-	else
-		wa_idx_max = ICL_PW_CTL_IDX_AUX_B;
-
-	if (!IS_ELKHARTLAKE(dev_priv) &&
-	    pw_idx >= ICL_PW_CTL_IDX_AUX_A && pw_idx <= wa_idx_max &&
+	/* Display WA #1178: icl */
+	if (pw_idx >= ICL_PW_CTL_IDX_AUX_A && pw_idx <= ICL_PW_CTL_IDX_AUX_B &&
 	    !intel_bios_is_port_edp(dev_priv, (enum port)phy)) {
 		val = I915_READ(ICL_AUX_ANAOVRD1(pw_idx));
 		val |= ICL_AUX_ANAOVRD1_ENABLE | ICL_AUX_ANAOVRD1_LDO_BYPASS;
@@ -486,10 +449,10 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv,
 	enum phy phy = ICL_AUX_PW_TO_PHY(pw_idx);
 	u32 val;
 
-	if (INTEL_GEN(dev_priv) < 12) {
-		val = I915_READ(ICL_PORT_CL_DW12(phy));
-		I915_WRITE(ICL_PORT_CL_DW12(phy), val & ~ICL_LANE_ENABLE_AUX);
-	}
+	WARN_ON(!IS_ICELAKE(dev_priv));
+
+	val = I915_READ(ICL_PORT_CL_DW12(phy));
+	I915_WRITE(ICL_PORT_CL_DW12(phy), val & ~ICL_LANE_ENABLE_AUX);
 
 	val = I915_READ(regs->driver);
 	I915_WRITE(regs->driver, val & ~HSW_PWR_WELL_CTL_REQ(pw_idx));
@@ -551,7 +514,7 @@ static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv,
 		if (encoder->type == INTEL_OUTPUT_DP_MST)
 			continue;
 
-		dig_port = enc_to_dig_port(&encoder->base);
+		dig_port = enc_to_dig_port(encoder);
 		if (WARN_ON(!dig_port))
 			continue;
 
@@ -578,6 +541,8 @@ static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv,
 
 #endif
 
+#define TGL_AUX_PW_TO_TC_PORT(pw_idx)	((pw_idx) - TGL_PW_CTL_IDX_AUX_TC1)
+
 static void
 icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
 				 struct i915_power_well *power_well)
@@ -594,6 +559,17 @@ icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
 	I915_WRITE(DP_AUX_CH_CTL(aux_ch), val);
 
 	hsw_power_well_enable(dev_priv, power_well);
+
+	if (INTEL_GEN(dev_priv) >= 12 && !power_well->desc->hsw.is_tc_tbt) {
+		enum tc_port tc_port;
+
+		tc_port = TGL_AUX_PW_TO_TC_PORT(power_well->desc->hsw.idx);
+		I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2));
+
+		if (intel_de_wait_for_set(dev_priv, DKL_CMN_UC_DW_27(tc_port),
+					  DKL_CMN_UC_DW27_UC_HEALTH, 1))
+			DRM_WARN("Timeout waiting TC uC health\n");
+	}
 }
 
 static void
@@ -714,7 +690,11 @@ static u32 gen9_dc_mask(struct drm_i915_private *dev_priv)
 	u32 mask;
 
 	mask = DC_STATE_EN_UPTO_DC5;
-	if (INTEL_GEN(dev_priv) >= 11)
+
+	if (INTEL_GEN(dev_priv) >= 12)
+		mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6
+					  | DC_STATE_EN_DC9;
+	else if (IS_GEN(dev_priv, 11))
 		mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9;
 	else if (IS_GEN9_LP(dev_priv))
 		mask |= DC_STATE_EN_DC9;
@@ -784,6 +764,52 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state)
 	dev_priv->csr.dc_state = val & mask;
 }
 
+static u32
+sanitize_target_dc_state(struct drm_i915_private *dev_priv,
+			 u32 target_dc_state)
+{
+	u32 states[] = {
+		DC_STATE_EN_UPTO_DC6,
+		DC_STATE_EN_UPTO_DC5,
+		DC_STATE_EN_DC3CO,
+		DC_STATE_DISABLE,
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(states) - 1; i++) {
+		if (target_dc_state != states[i])
+			continue;
+
+		if (dev_priv->csr.allowed_dc_mask & target_dc_state)
+			break;
+
+		target_dc_state = states[i + 1];
+	}
+
+	return target_dc_state;
+}
+
+static void tgl_enable_dc3co(struct drm_i915_private *dev_priv)
+{
+	DRM_DEBUG_KMS("Enabling DC3CO\n");
+	gen9_set_dc_state(dev_priv, DC_STATE_EN_DC3CO);
+}
+
+static void tgl_disable_dc3co(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	DRM_DEBUG_KMS("Disabling DC3CO\n");
+	val = I915_READ(DC_STATE_EN);
+	val &= ~DC_STATE_DC3CO_STATUS;
+	I915_WRITE(DC_STATE_EN, val);
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+	/*
+	 * Delay of 200us DC3CO Exit time B.Spec 49196
+	 */
+	usleep_range(200, 210);
+}
+
 static void bxt_enable_dc9(struct drm_i915_private *dev_priv)
 {
 	assert_can_enable_dc9(dev_priv);
@@ -839,6 +865,51 @@ lookup_power_well(struct drm_i915_private *dev_priv,
 	return &dev_priv->power_domains.power_wells[0];
 }
 
+/**
+ * intel_display_power_set_target_dc_state - Set target dc state.
+ * @dev_priv: i915 device
+ * @state: state which needs to be set as target_dc_state.
+ *
+ * This function set the "DC off" power well target_dc_state,
+ * based upon this target_dc_stste, "DC off" power well will
+ * enable desired DC state.
+ */
+void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv,
+					     u32 state)
+{
+	struct i915_power_well *power_well;
+	bool dc_off_enabled;
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+
+	mutex_lock(&power_domains->lock);
+	power_well = lookup_power_well(dev_priv, SKL_DISP_DC_OFF);
+
+	if (WARN_ON(!power_well))
+		goto unlock;
+
+	state = sanitize_target_dc_state(dev_priv, state);
+
+	if (state == dev_priv->csr.target_dc_state)
+		goto unlock;
+
+	dc_off_enabled = power_well->desc->ops->is_enabled(dev_priv,
+							   power_well);
+	/*
+	 * If DC off power well is disabled, need to enable and disable the
+	 * DC off power well to effect target DC state.
+	 */
+	if (!dc_off_enabled)
+		power_well->desc->ops->enable(dev_priv, power_well);
+
+	dev_priv->csr.target_dc_state = state;
+
+	if (!dc_off_enabled)
+		power_well->desc->ops->disable(dev_priv, power_well);
+
+unlock:
+	mutex_unlock(&power_domains->lock);
+}
+
 static void assert_can_enable_dc5(struct drm_i915_private *dev_priv)
 {
 	bool pg2_enabled = intel_display_power_well_is_enabled(dev_priv,
@@ -951,7 +1022,8 @@ static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv)
 static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv,
 					   struct i915_power_well *power_well)
 {
-	return (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0;
+	return ((I915_READ(DC_STATE_EN) & DC_STATE_EN_DC3CO) == 0 &&
+		(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0);
 }
 
 static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv)
@@ -967,6 +1039,11 @@ static void gen9_disable_dc_states(struct drm_i915_private *dev_priv)
 {
 	struct intel_cdclk_state cdclk_state = {};
 
+	if (dev_priv->csr.target_dc_state == DC_STATE_EN_DC3CO) {
+		tgl_disable_dc3co(dev_priv);
+		return;
+	}
+
 	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
 
 	dev_priv->display.get_cdclk(dev_priv, &cdclk_state);
@@ -999,10 +1076,17 @@ static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv,
 	if (!dev_priv->csr.dmc_payload)
 		return;
 
-	if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC6)
+	switch (dev_priv->csr.target_dc_state) {
+	case DC_STATE_EN_DC3CO:
+		tgl_enable_dc3co(dev_priv);
+		break;
+	case DC_STATE_EN_UPTO_DC6:
 		skl_enable_dc6(dev_priv);
-	else if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC5)
+		break;
+	case DC_STATE_EN_UPTO_DC5:
 		gen9_enable_dc5(dev_priv);
+		break;
+	}
 }
 
 static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv,
@@ -1208,7 +1292,7 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv)
 			intel_crt_reset(&encoder->base);
 	}
 
-	i915_redisable_vga_power_on(dev_priv);
+	intel_vga_redisable_power_on(dev_priv);
 
 	intel_pps_unlock_regs_wa(dev_priv);
 }
@@ -1580,8 +1664,8 @@ void chv_phy_powergate_lanes(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	enum dpio_phy phy = vlv_dport_to_phy(enc_to_dig_port(&encoder->base));
-	enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base));
+	enum dpio_phy phy = vlv_dport_to_phy(enc_to_dig_port(encoder));
+	enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(encoder));
 
 	mutex_lock(&power_domains->lock);
 
@@ -1718,15 +1802,12 @@ __async_put_domains_state_ok(struct i915_power_domains *power_domains)
 static void print_power_domains(struct i915_power_domains *power_domains,
 				const char *prefix, u64 mask)
 {
-	struct drm_i915_private *i915 =
-		container_of(power_domains, struct drm_i915_private,
-			     power_domains);
 	enum intel_display_power_domain domain;
 
 	DRM_DEBUG_DRIVER("%s (%lu):\n", prefix, hweight64(mask));
 	for_each_power_domain(domain, mask)
 		DRM_DEBUG_DRIVER("%s use_count %d\n",
-				 intel_display_power_domain_str(i915, domain),
+				 intel_display_power_domain_str(domain),
 				 power_domains->domain_use_count[domain]);
 }
 
@@ -1896,7 +1977,7 @@ __intel_display_power_put_domain(struct drm_i915_private *dev_priv,
 {
 	struct i915_power_domains *power_domains;
 	struct i915_power_well *power_well;
-	const char *name = intel_display_power_domain_str(dev_priv, domain);
+	const char *name = intel_display_power_domain_str(domain);
 
 	power_domains = &dev_priv->power_domains;
 
@@ -2487,10 +2568,10 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 	BIT_ULL(POWER_DOMAIN_AUX_D) |			\
 	BIT_ULL(POWER_DOMAIN_AUX_E) |			\
 	BIT_ULL(POWER_DOMAIN_AUX_F) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT1) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT2) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT3) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT4) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_C_TBT) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_D_TBT) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_E_TBT) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_F_TBT) |		\
 	BIT_ULL(POWER_DOMAIN_VGA) |			\
 	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
 	BIT_ULL(POWER_DOMAIN_INIT))
@@ -2530,22 +2611,22 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 	BIT_ULL(POWER_DOMAIN_AUX_A))
 #define ICL_AUX_B_IO_POWER_DOMAINS (			\
 	BIT_ULL(POWER_DOMAIN_AUX_B))
-#define ICL_AUX_C_IO_POWER_DOMAINS (			\
+#define ICL_AUX_C_TC1_IO_POWER_DOMAINS (		\
 	BIT_ULL(POWER_DOMAIN_AUX_C))
-#define ICL_AUX_D_IO_POWER_DOMAINS (			\
+#define ICL_AUX_D_TC2_IO_POWER_DOMAINS (		\
 	BIT_ULL(POWER_DOMAIN_AUX_D))
-#define ICL_AUX_E_IO_POWER_DOMAINS (			\
+#define ICL_AUX_E_TC3_IO_POWER_DOMAINS (		\
 	BIT_ULL(POWER_DOMAIN_AUX_E))
-#define ICL_AUX_F_IO_POWER_DOMAINS (			\
+#define ICL_AUX_F_TC4_IO_POWER_DOMAINS (		\
 	BIT_ULL(POWER_DOMAIN_AUX_F))
-#define ICL_AUX_TBT1_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT1))
-#define ICL_AUX_TBT2_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT2))
-#define ICL_AUX_TBT3_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT3))
-#define ICL_AUX_TBT4_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT4))
+#define ICL_AUX_C_TBT1_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_C_TBT))
+#define ICL_AUX_D_TBT2_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_D_TBT))
+#define ICL_AUX_E_TBT3_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_E_TBT))
+#define ICL_AUX_F_TBT4_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_F_TBT))
 
 #define TGL_PW_5_POWER_DOMAINS (			\
 	BIT_ULL(POWER_DOMAIN_PIPE_D) |			\
@@ -2565,24 +2646,24 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
 	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
 	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC1_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC2_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC3_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC4_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC5_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC6_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_AUX_TC1) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TC2) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TC3) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TC4) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TC5) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TC6) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT1) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT2) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT3) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT4) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT5) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT6) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_G_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_H_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_I_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_AUX_D) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_E) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_F) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_G) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_H) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_I) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_D_TBT) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_E_TBT) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_F_TBT) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_G_TBT) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_H_TBT) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_I_TBT) |		\
 	BIT_ULL(POWER_DOMAIN_VGA) |			\
 	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
 	BIT_ULL(POWER_DOMAIN_INIT))
@@ -2596,37 +2677,54 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 	TGL_PW_2_POWER_DOMAINS |			\
 	BIT_ULL(POWER_DOMAIN_MODESET) |			\
 	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
 	BIT_ULL(POWER_DOMAIN_INIT))
 
-#define TGL_DDI_IO_TC1_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC1_IO))
-#define TGL_DDI_IO_TC2_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC2_IO))
-#define TGL_DDI_IO_TC3_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC3_IO))
-#define TGL_DDI_IO_TC4_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC4_IO))
-#define TGL_DDI_IO_TC5_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC5_IO))
-#define TGL_DDI_IO_TC6_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_TC6_IO))
-
-#define TGL_AUX_TC1_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_TC1))
-#define TGL_AUX_TC2_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_TC2))
-#define TGL_AUX_TC3_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_TC3))
-#define TGL_AUX_TC4_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_TC4))
-#define TGL_AUX_TC5_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_TC5))
-#define TGL_AUX_TC6_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_TC6))
-#define TGL_AUX_TBT5_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT5))
-#define TGL_AUX_TBT6_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT6))
+#define TGL_DDI_IO_D_TC1_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO))
+#define TGL_DDI_IO_E_TC2_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO))
+#define TGL_DDI_IO_F_TC3_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO))
+#define TGL_DDI_IO_G_TC4_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_G_IO))
+#define TGL_DDI_IO_H_TC5_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_H_IO))
+#define TGL_DDI_IO_I_TC6_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_I_IO))
+
+#define TGL_AUX_A_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_IO_A) |	\
+	BIT_ULL(POWER_DOMAIN_AUX_A))
+#define TGL_AUX_B_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_B))
+#define TGL_AUX_C_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_C))
+#define TGL_AUX_D_TC1_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_D))
+#define TGL_AUX_E_TC2_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_E))
+#define TGL_AUX_F_TC3_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_F))
+#define TGL_AUX_G_TC4_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_G))
+#define TGL_AUX_H_TC5_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_H))
+#define TGL_AUX_I_TC6_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_I))
+#define TGL_AUX_D_TBT1_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_D_TBT))
+#define TGL_AUX_E_TBT2_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_E_TBT))
+#define TGL_AUX_F_TBT3_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_F_TBT))
+#define TGL_AUX_G_TBT4_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_G_TBT))
+#define TGL_AUX_H_TBT5_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_H_TBT))
+#define TGL_AUX_I_TBT6_IO_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_AUX_I_TBT))
 
 static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
 	.sync_hw = i9xx_power_well_sync_hw_noop,
@@ -2938,7 +3036,7 @@ static const struct i915_power_well_desc skl_power_wells[] = {
 		.name = "DC off",
 		.domains = SKL_DISPLAY_DC_OFF_POWER_DOMAINS,
 		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
+		.id = SKL_DISP_DC_OFF,
 	},
 	{
 		.name = "power well 2",
@@ -3020,7 +3118,7 @@ static const struct i915_power_well_desc bxt_power_wells[] = {
 		.name = "DC off",
 		.domains = BXT_DISPLAY_DC_OFF_POWER_DOMAINS,
 		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
+		.id = SKL_DISP_DC_OFF,
 	},
 	{
 		.name = "power well 2",
@@ -3080,7 +3178,7 @@ static const struct i915_power_well_desc glk_power_wells[] = {
 		.name = "DC off",
 		.domains = GLK_DISPLAY_DC_OFF_POWER_DOMAINS,
 		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
+		.id = SKL_DISP_DC_OFF,
 	},
 	{
 		.name = "power well 2",
@@ -3249,7 +3347,7 @@ static const struct i915_power_well_desc cnl_power_wells[] = {
 		.name = "DC off",
 		.domains = CNL_DISPLAY_DC_OFF_POWER_DOMAINS,
 		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
+		.id = SKL_DISP_DC_OFF,
 	},
 	{
 		.name = "power well 2",
@@ -3377,7 +3475,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 		.name = "DC off",
 		.domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS,
 		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
+		.id = SKL_DISP_DC_OFF,
 	},
 	{
 		.name = "power well 2",
@@ -3484,8 +3582,8 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX C",
-		.domains = ICL_AUX_C_IO_POWER_DOMAINS,
+		.name = "AUX C TC1",
+		.domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3495,8 +3593,8 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX D",
-		.domains = ICL_AUX_D_IO_POWER_DOMAINS,
+		.name = "AUX D TC2",
+		.domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3506,8 +3604,8 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX E",
-		.domains = ICL_AUX_E_IO_POWER_DOMAINS,
+		.name = "AUX E TC3",
+		.domains = ICL_AUX_E_TC3_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3517,8 +3615,8 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX F",
-		.domains = ICL_AUX_F_IO_POWER_DOMAINS,
+		.name = "AUX F TC4",
+		.domains = ICL_AUX_F_TC4_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3528,8 +3626,8 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TBT1",
-		.domains = ICL_AUX_TBT1_IO_POWER_DOMAINS,
+		.name = "AUX C TBT1",
+		.domains = ICL_AUX_C_TBT1_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3539,8 +3637,8 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TBT2",
-		.domains = ICL_AUX_TBT2_IO_POWER_DOMAINS,
+		.name = "AUX D TBT2",
+		.domains = ICL_AUX_D_TBT2_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3550,8 +3648,8 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TBT3",
-		.domains = ICL_AUX_TBT3_IO_POWER_DOMAINS,
+		.name = "AUX E TBT3",
+		.domains = ICL_AUX_E_TBT3_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3561,8 +3659,8 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TBT4",
-		.domains = ICL_AUX_TBT4_IO_POWER_DOMAINS,
+		.name = "AUX F TBT4",
+		.domains = ICL_AUX_F_TBT4_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3585,6 +3683,151 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	},
 };
 
+static const struct i915_power_well_desc ehl_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "power well 1",
+		/* Handled by the DMC firmware */
+		.always_on = true,
+		.domains = 0,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_1,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_PW_1,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "DC off",
+		.domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.id = SKL_DISP_DC_OFF,
+	},
+	{
+		.name = "power well 2",
+		.domains = ICL_PW_2_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_2,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_PW_2,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "power well 3",
+		.domains = ICL_PW_3_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_PW_3,
+			.hsw.irq_pipe_mask = BIT(PIPE_B),
+			.hsw.has_vga = true,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "DDI A IO",
+		.domains = ICL_DDI_IO_A_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_ddi_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_DDI_A,
+		},
+	},
+	{
+		.name = "DDI B IO",
+		.domains = ICL_DDI_IO_B_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_ddi_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_DDI_B,
+		},
+	},
+	{
+		.name = "DDI C IO",
+		.domains = ICL_DDI_IO_C_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_ddi_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_DDI_C,
+		},
+	},
+	{
+		.name = "DDI D IO",
+		.domains = ICL_DDI_IO_D_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_ddi_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_DDI_D,
+		},
+	},
+	{
+		.name = "AUX A",
+		.domains = ICL_AUX_A_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_A,
+		},
+	},
+	{
+		.name = "AUX B",
+		.domains = ICL_AUX_B_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_B,
+		},
+	},
+	{
+		.name = "AUX C",
+		.domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_C,
+		},
+	},
+	{
+		.name = "AUX D",
+		.domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_D,
+		},
+	},
+	{
+		.name = "power well 4",
+		.domains = ICL_PW_4_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_PW_4,
+			.hsw.has_fuses = true,
+			.hsw.irq_pipe_mask = BIT(PIPE_C),
+		},
+	},
+};
+
 static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "always-on",
@@ -3610,7 +3853,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		.name = "DC off",
 		.domains = TGL_DISPLAY_DC_OFF_POWER_DOMAINS,
 		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
+		.id = SKL_DISP_DC_OFF,
 	},
 	{
 		.name = "power well 2",
@@ -3667,8 +3910,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		}
 	},
 	{
-		.name = "DDI TC1 IO",
-		.domains = TGL_DDI_IO_TC1_POWER_DOMAINS,
+		.name = "DDI D TC1 IO",
+		.domains = TGL_DDI_IO_D_TC1_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3677,8 +3920,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "DDI TC2 IO",
-		.domains = TGL_DDI_IO_TC2_POWER_DOMAINS,
+		.name = "DDI E TC2 IO",
+		.domains = TGL_DDI_IO_E_TC2_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3687,8 +3930,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "DDI TC3 IO",
-		.domains = TGL_DDI_IO_TC3_POWER_DOMAINS,
+		.name = "DDI F TC3 IO",
+		.domains = TGL_DDI_IO_F_TC3_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3697,8 +3940,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "DDI TC4 IO",
-		.domains = TGL_DDI_IO_TC4_POWER_DOMAINS,
+		.name = "DDI G TC4 IO",
+		.domains = TGL_DDI_IO_G_TC4_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3707,8 +3950,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "DDI TC5 IO",
-		.domains = TGL_DDI_IO_TC5_POWER_DOMAINS,
+		.name = "DDI H TC5 IO",
+		.domains = TGL_DDI_IO_H_TC5_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3717,8 +3960,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "DDI TC6 IO",
-		.domains = TGL_DDI_IO_TC6_POWER_DOMAINS,
+		.name = "DDI I TC6 IO",
+		.domains = TGL_DDI_IO_I_TC6_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3728,8 +3971,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	},
 	{
 		.name = "AUX A",
-		.domains = ICL_AUX_A_IO_POWER_DOMAINS,
-		.ops = &icl_combo_phy_aux_power_well_ops,
+		.domains = TGL_AUX_A_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3738,8 +3981,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	},
 	{
 		.name = "AUX B",
-		.domains = ICL_AUX_B_IO_POWER_DOMAINS,
-		.ops = &icl_combo_phy_aux_power_well_ops,
+		.domains = TGL_AUX_B_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3748,8 +3991,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	},
 	{
 		.name = "AUX C",
-		.domains = ICL_AUX_C_IO_POWER_DOMAINS,
-		.ops = &icl_combo_phy_aux_power_well_ops,
+		.domains = TGL_AUX_C_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3757,8 +4000,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TC1",
-		.domains = TGL_AUX_TC1_IO_POWER_DOMAINS,
+		.name = "AUX D TC1",
+		.domains = TGL_AUX_D_TC1_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3768,8 +4011,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TC2",
-		.domains = TGL_AUX_TC2_IO_POWER_DOMAINS,
+		.name = "AUX E TC2",
+		.domains = TGL_AUX_E_TC2_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3779,8 +4022,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TC3",
-		.domains = TGL_AUX_TC3_IO_POWER_DOMAINS,
+		.name = "AUX F TC3",
+		.domains = TGL_AUX_F_TC3_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3790,8 +4033,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TC4",
-		.domains = TGL_AUX_TC4_IO_POWER_DOMAINS,
+		.name = "AUX G TC4",
+		.domains = TGL_AUX_G_TC4_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3801,8 +4044,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TC5",
-		.domains = TGL_AUX_TC5_IO_POWER_DOMAINS,
+		.name = "AUX H TC5",
+		.domains = TGL_AUX_H_TC5_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3812,8 +4055,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TC6",
-		.domains = TGL_AUX_TC6_IO_POWER_DOMAINS,
+		.name = "AUX I TC6",
+		.domains = TGL_AUX_I_TC6_IO_POWER_DOMAINS,
 		.ops = &icl_tc_phy_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3823,8 +4066,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TBT1",
-		.domains = ICL_AUX_TBT1_IO_POWER_DOMAINS,
+		.name = "AUX D TBT1",
+		.domains = TGL_AUX_D_TBT1_IO_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3834,8 +4077,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TBT2",
-		.domains = ICL_AUX_TBT2_IO_POWER_DOMAINS,
+		.name = "AUX E TBT2",
+		.domains = TGL_AUX_E_TBT2_IO_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3845,8 +4088,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TBT3",
-		.domains = ICL_AUX_TBT3_IO_POWER_DOMAINS,
+		.name = "AUX F TBT3",
+		.domains = TGL_AUX_F_TBT3_IO_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3856,8 +4099,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TBT4",
-		.domains = ICL_AUX_TBT4_IO_POWER_DOMAINS,
+		.name = "AUX G TBT4",
+		.domains = TGL_AUX_G_TBT4_IO_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3867,8 +4110,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TBT5",
-		.domains = TGL_AUX_TBT5_IO_POWER_DOMAINS,
+		.name = "AUX H TBT5",
+		.domains = TGL_AUX_H_TBT5_IO_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3878,8 +4121,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		},
 	},
 	{
-		.name = "AUX TBT6",
-		.domains = TGL_AUX_TBT6_IO_POWER_DOMAINS,
+		.name = "AUX I TBT6",
+		.domains = TGL_AUX_I_TBT6_IO_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
@@ -3931,14 +4174,17 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv,
 	int requested_dc;
 	int max_dc;
 
-	if (INTEL_GEN(dev_priv) >= 11) {
-		max_dc = 2;
+	if (INTEL_GEN(dev_priv) >= 12) {
+		max_dc = 4;
 		/*
 		 * DC9 has a separate HW flow from the rest of the DC states,
 		 * not depending on the DMC firmware. It's needed by system
 		 * suspend/resume, so allow it unconditionally.
 		 */
 		mask = DC_STATE_EN_DC9;
+	} else if (IS_GEN(dev_priv, 11)) {
+		max_dc = 2;
+		mask = DC_STATE_EN_DC9;
 	} else if (IS_GEN(dev_priv, 10) || IS_GEN9_BC(dev_priv)) {
 		max_dc = 2;
 		mask = 0;
@@ -3957,7 +4203,7 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv,
 		requested_dc = enable_dc;
 	} else if (enable_dc == -1) {
 		requested_dc = max_dc;
-	} else if (enable_dc > max_dc && enable_dc <= 2) {
+	} else if (enable_dc > max_dc && enable_dc <= 4) {
 		DRM_DEBUG_KMS("Adjusting requested max DC state (%d->%d)\n",
 			      enable_dc, max_dc);
 		requested_dc = max_dc;
@@ -3966,10 +4212,20 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv,
 		requested_dc = max_dc;
 	}
 
-	if (requested_dc > 1)
+	switch (requested_dc) {
+	case 4:
+		mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6;
+		break;
+	case 3:
+		mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC5;
+		break;
+	case 2:
 		mask |= DC_STATE_EN_UPTO_DC6;
-	if (requested_dc > 0)
+		break;
+	case 1:
 		mask |= DC_STATE_EN_UPTO_DC5;
+		break;
+	}
 
 	DRM_DEBUG_KMS("Allowed DC state mask %02x\n", mask);
 
@@ -4030,6 +4286,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv)
 	dev_priv->csr.allowed_dc_mask =
 		get_allowed_dc_mask(dev_priv, i915_modparams.enable_dc);
 
+	dev_priv->csr.target_dc_state =
+		sanitize_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
+
 	BUILD_BUG_ON(POWER_DOMAIN_NUM > 64);
 
 	mutex_init(&power_domains->lock);
@@ -4043,6 +4302,8 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv)
 	 */
 	if (IS_GEN(dev_priv, 12)) {
 		err = set_power_wells(power_domains, tgl_power_wells);
+	} else if (IS_ELKHARTLAKE(dev_priv)) {
+		err = set_power_wells(power_domains, ehl_power_wells);
 	} else if (IS_GEN(dev_priv, 11)) {
 		err = set_power_wells(power_domains, icl_power_wells);
 	} else if (IS_CANNONLAKE(dev_priv)) {
@@ -4662,6 +4923,56 @@ static void cnl_display_core_uninit(struct drm_i915_private *dev_priv)
 	intel_combo_phy_uninit(dev_priv);
 }
 
+struct buddy_page_mask {
+	u32 page_mask;
+	u8 type;
+	u8 num_channels;
+};
+
+static const struct buddy_page_mask tgl_buddy_page_masks[] = {
+	{ .num_channels = 1, .type = INTEL_DRAM_LPDDR4, .page_mask = 0xE },
+	{ .num_channels = 1, .type = INTEL_DRAM_DDR4,   .page_mask = 0xF },
+	{ .num_channels = 2, .type = INTEL_DRAM_LPDDR4, .page_mask = 0x1C },
+	{ .num_channels = 2, .type = INTEL_DRAM_DDR4,   .page_mask = 0x1F },
+	{}
+};
+
+static const struct buddy_page_mask wa_1409767108_buddy_page_masks[] = {
+	{ .num_channels = 1, .type = INTEL_DRAM_LPDDR4, .page_mask = 0x1 },
+	{ .num_channels = 1, .type = INTEL_DRAM_DDR4,   .page_mask = 0x1 },
+	{ .num_channels = 2, .type = INTEL_DRAM_LPDDR4, .page_mask = 0x3 },
+	{ .num_channels = 2, .type = INTEL_DRAM_DDR4,   .page_mask = 0x3 },
+	{}
+};
+
+static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv)
+{
+	enum intel_dram_type type = dev_priv->dram_info.type;
+	u8 num_channels = dev_priv->dram_info.num_channels;
+	const struct buddy_page_mask *table;
+	int i;
+
+	if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0))
+		/* Wa_1409767108: tgl */
+		table = wa_1409767108_buddy_page_masks;
+	else
+		table = tgl_buddy_page_masks;
+
+	for (i = 0; table[i].page_mask != 0; i++)
+		if (table[i].num_channels == num_channels &&
+		    table[i].type == type)
+			break;
+
+	if (table[i].page_mask == 0) {
+		DRM_DEBUG_DRIVER("Unknown memory configuration; disabling address buddy logic.\n");
+		I915_WRITE(BW_BUDDY1_CTL, BW_BUDDY_DISABLE);
+		I915_WRITE(BW_BUDDY2_CTL, BW_BUDDY_DISABLE);
+	} else {
+		I915_WRITE(BW_BUDDY1_PAGE_MASK, table[i].page_mask);
+		I915_WRITE(BW_BUDDY2_PAGE_MASK, table[i].page_mask);
+	}
+}
+
 static void icl_display_core_init(struct drm_i915_private *dev_priv,
 				  bool resume)
 {
@@ -4694,6 +5005,10 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv,
 	/* 6. Setup MBUS. */
 	icl_mbus_init(dev_priv);
 
+	/* 7. Program arbiter BW_BUDDY registers */
+	if (INTEL_GEN(dev_priv) >= 12)
+		tgl_bw_buddy_init(dev_priv);
+
 	if (resume && dev_priv->csr.dmc_payload)
 		intel_csr_load_program(dev_priv);
 }
@@ -5107,8 +5422,7 @@ static void intel_power_domains_dump_info(struct drm_i915_private *i915)
 
 		for_each_power_domain(domain, power_well->desc->domains)
 			DRM_DEBUG_DRIVER("  %-23s %d\n",
-					 intel_display_power_domain_str(i915,
-									domain),
+					 intel_display_power_domain_str(domain),
 					 power_domains->domain_use_count[domain]);
 	}
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
index a50605b8b1ad..2608a65af7fa 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -28,7 +28,7 @@ enum intel_display_power_domain {
 	POWER_DOMAIN_TRANSCODER_C,
 	POWER_DOMAIN_TRANSCODER_D,
 	POWER_DOMAIN_TRANSCODER_EDP,
-	/* VDSC/joining for TRANSCODER_EDP (ICL) or TRANSCODER_A (TGL) */
+	/* VDSC/joining for eDP/DSI transcoder (ICL) or pipe A (TGL) */
 	POWER_DOMAIN_TRANSCODER_VDSC_PW2,
 	POWER_DOMAIN_TRANSCODER_DSI_A,
 	POWER_DOMAIN_TRANSCODER_DSI_C,
@@ -36,29 +36,20 @@ enum intel_display_power_domain {
 	POWER_DOMAIN_PORT_DDI_B_LANES,
 	POWER_DOMAIN_PORT_DDI_C_LANES,
 	POWER_DOMAIN_PORT_DDI_D_LANES,
-	POWER_DOMAIN_PORT_DDI_TC1_LANES = POWER_DOMAIN_PORT_DDI_D_LANES,
 	POWER_DOMAIN_PORT_DDI_E_LANES,
-	POWER_DOMAIN_PORT_DDI_TC2_LANES = POWER_DOMAIN_PORT_DDI_E_LANES,
 	POWER_DOMAIN_PORT_DDI_F_LANES,
-	POWER_DOMAIN_PORT_DDI_TC3_LANES = POWER_DOMAIN_PORT_DDI_F_LANES,
-	POWER_DOMAIN_PORT_DDI_TC4_LANES,
-	POWER_DOMAIN_PORT_DDI_TC5_LANES,
-	POWER_DOMAIN_PORT_DDI_TC6_LANES,
+	POWER_DOMAIN_PORT_DDI_G_LANES,
+	POWER_DOMAIN_PORT_DDI_H_LANES,
+	POWER_DOMAIN_PORT_DDI_I_LANES,
 	POWER_DOMAIN_PORT_DDI_A_IO,
 	POWER_DOMAIN_PORT_DDI_B_IO,
 	POWER_DOMAIN_PORT_DDI_C_IO,
 	POWER_DOMAIN_PORT_DDI_D_IO,
-	POWER_DOMAIN_PORT_DDI_TC1_IO = POWER_DOMAIN_PORT_DDI_D_IO,
 	POWER_DOMAIN_PORT_DDI_E_IO,
-	POWER_DOMAIN_PORT_DDI_TC2_IO = POWER_DOMAIN_PORT_DDI_E_IO,
 	POWER_DOMAIN_PORT_DDI_F_IO,
-	POWER_DOMAIN_PORT_DDI_TC3_IO = POWER_DOMAIN_PORT_DDI_F_IO,
 	POWER_DOMAIN_PORT_DDI_G_IO,
-	POWER_DOMAIN_PORT_DDI_TC4_IO = POWER_DOMAIN_PORT_DDI_G_IO,
 	POWER_DOMAIN_PORT_DDI_H_IO,
-	POWER_DOMAIN_PORT_DDI_TC5_IO = POWER_DOMAIN_PORT_DDI_H_IO,
 	POWER_DOMAIN_PORT_DDI_I_IO,
-	POWER_DOMAIN_PORT_DDI_TC6_IO = POWER_DOMAIN_PORT_DDI_I_IO,
 	POWER_DOMAIN_PORT_DSI,
 	POWER_DOMAIN_PORT_CRT,
 	POWER_DOMAIN_PORT_OTHER,
@@ -68,21 +59,19 @@ enum intel_display_power_domain {
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_AUX_D,
-	POWER_DOMAIN_AUX_TC1 = POWER_DOMAIN_AUX_D,
 	POWER_DOMAIN_AUX_E,
-	POWER_DOMAIN_AUX_TC2 = POWER_DOMAIN_AUX_E,
 	POWER_DOMAIN_AUX_F,
-	POWER_DOMAIN_AUX_TC3 = POWER_DOMAIN_AUX_F,
-	POWER_DOMAIN_AUX_TC4,
-	POWER_DOMAIN_AUX_TC5,
-	POWER_DOMAIN_AUX_TC6,
+	POWER_DOMAIN_AUX_G,
+	POWER_DOMAIN_AUX_H,
+	POWER_DOMAIN_AUX_I,
 	POWER_DOMAIN_AUX_IO_A,
-	POWER_DOMAIN_AUX_TBT1,
-	POWER_DOMAIN_AUX_TBT2,
-	POWER_DOMAIN_AUX_TBT3,
-	POWER_DOMAIN_AUX_TBT4,
-	POWER_DOMAIN_AUX_TBT5,
-	POWER_DOMAIN_AUX_TBT6,
+	POWER_DOMAIN_AUX_C_TBT,
+	POWER_DOMAIN_AUX_D_TBT,
+	POWER_DOMAIN_AUX_E_TBT,
+	POWER_DOMAIN_AUX_F_TBT,
+	POWER_DOMAIN_AUX_G_TBT,
+	POWER_DOMAIN_AUX_H_TBT,
+	POWER_DOMAIN_AUX_I_TBT,
 	POWER_DOMAIN_GMBUS,
 	POWER_DOMAIN_MODESET,
 	POWER_DOMAIN_GT_IRQ,
@@ -111,6 +100,7 @@ enum i915_power_well_id {
 	SKL_DISP_PW_MISC_IO,
 	SKL_DISP_PW_1,
 	SKL_DISP_PW_2,
+	SKL_DISP_DC_OFF,
 };
 
 #define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A)
@@ -267,10 +257,11 @@ void intel_display_power_suspend_late(struct drm_i915_private *i915);
 void intel_display_power_resume_early(struct drm_i915_private *i915);
 void intel_display_power_suspend(struct drm_i915_private *i915);
 void intel_display_power_resume(struct drm_i915_private *i915);
+void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv,
+					     u32 state);
 
 const char *
-intel_display_power_domain_str(struct drm_i915_private *i915,
-			       enum intel_display_power_domain domain);
+intel_display_power_domain_str(enum intel_display_power_domain domain);
 
 bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
 				    enum intel_display_power_domain domain);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 4075b0387c87..888ea8a170d1 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -90,8 +90,8 @@ struct intel_framebuffer {
 	/* for each plane in the normal GTT view */
 	struct {
 		unsigned int x, y;
-	} normal[2];
-	/* for each plane in the rotated GTT view */
+	} normal[4];
+	/* for each plane in the rotated GTT view for no-CCS formats */
 	struct {
 		unsigned int x, y;
 		unsigned int pitch; /* pixels */
@@ -128,7 +128,8 @@ struct intel_encoder {
 
 	enum intel_output_type type;
 	enum port port;
-	unsigned int cloneable;
+	u16 cloneable;
+	u8 pipe_mask;
 	enum intel_hotplug_state (*hotplug)(struct intel_encoder *encoder,
 					    struct intel_connector *connector,
 					    bool irq_received);
@@ -187,7 +188,6 @@ struct intel_encoder {
 	 * device interrupts are disabled.
 	 */
 	void (*suspend)(struct intel_encoder *);
-	int crtc_mask;
 	enum hpd_pin hpd_pin;
 	enum intel_display_power_domain power_domain;
 	/* for communication with audio component; protected by av_mutex */
@@ -388,6 +388,13 @@ struct intel_hdcp {
 	wait_queue_head_t cp_irq_queue;
 	atomic_t cp_irq_count;
 	int cp_irq_count_cached;
+
+	/*
+	 * HDCP register access for gen12+ need the transcoder associated.
+	 * Transcoder attached to the connector could be changed at modeset.
+	 * Hence caching the transcoder here.
+	 */
+	enum transcoder cpu_transcoder;
 };
 
 struct intel_connector {
@@ -481,9 +488,9 @@ struct intel_atomic_state {
 	 * but the converse is not necessarily true; simply changing a mode may
 	 * not flip the final active status of any CRTC's
 	 */
-	unsigned int active_pipe_changes;
+	u8 active_pipe_changes;
 
-	unsigned int active_crtcs;
+	u8 active_pipes;
 	/* minimum acceptable cdclk for each pipe */
 	int min_cdclk[I915_MAX_PIPES];
 	/* minimum acceptable voltage level for each pipe */
@@ -499,6 +506,14 @@ struct intel_atomic_state {
 
 	bool rps_interactive;
 
+	/*
+	 * active_pipes
+	 * min_cdclk[]
+	 * min_voltage_level[]
+	 * cdclk.*
+	 */
+	bool global_state_changed;
+
 	/* Gen9+ only */
 	struct skl_ddb_values wm_results;
 
@@ -508,7 +523,24 @@ struct intel_atomic_state {
 };
 
 struct intel_plane_state {
-	struct drm_plane_state base;
+	struct drm_plane_state uapi;
+
+	/*
+	 * actual hardware state, the state we program to the hardware.
+	 * The following members are used to verify the hardware state:
+	 * During initial hw readout, they need to be copied from uapi.
+	 */
+	struct {
+		struct drm_crtc *crtc;
+		struct drm_framebuffer *fb;
+
+		u16 alpha;
+		uint16_t pixel_blend_mode;
+		unsigned int rotation;
+		enum drm_color_encoding color_encoding;
+		enum drm_color_range color_range;
+	} hw;
+
 	struct i915_ggtt_view view;
 	struct i915_vma *vma;
 	unsigned long flags;
@@ -523,7 +555,7 @@ struct intel_plane_state {
 		 */
 		u32 stride;
 		int x, y;
-	} color_plane[2];
+	} color_plane[4];
 
 	/* plane control register */
 	u32 ctl;
@@ -531,6 +563,9 @@ struct intel_plane_state {
 	/* plane color control register */
 	u32 color_ctl;
 
+	/* chroma upsampler control register */
+	u32 cus_ctl;
+
 	/*
 	 * scaler_id
 	 *    = -1 : not using a scaler
@@ -552,24 +587,24 @@ struct intel_plane_state {
 	int scaler_id;
 
 	/*
-	 * linked_plane:
+	 * planar_linked_plane:
 	 *
 	 * ICL planar formats require 2 planes that are updated as pairs.
 	 * This member is used to make sure the other plane is also updated
 	 * when required, and for update_slave() to find the correct
 	 * plane_state to pass as argument.
 	 */
-	struct intel_plane *linked_plane;
+	struct intel_plane *planar_linked_plane;
 
 	/*
-	 * slave:
+	 * planar_slave:
 	 * If set don't update use the linked plane's state for updating
 	 * this plane during atomic commit with the update_slave() callback.
 	 *
 	 * It's also used by the watermark code to ignore wm calculations on
 	 * this plane. They're calculated by the linked plane's wm code.
 	 */
-	u32 slave;
+	u32 planar_slave;
 
 	struct drm_intel_sprite_colorkey ckey;
 };
@@ -742,7 +777,33 @@ enum intel_output_format {
 };
 
 struct intel_crtc_state {
-	struct drm_crtc_state base;
+	/*
+	 * uapi (drm) state. This is the software state shown to userspace.
+	 * In particular, the following members are used for bookkeeping:
+	 * - crtc
+	 * - state
+	 * - *_changed
+	 * - event
+	 * - commit
+	 * - mode_blob
+	 */
+	struct drm_crtc_state uapi;
+
+	/*
+	 * actual hardware state, the state we program to the hardware.
+	 * The following members are used to verify the hardware state:
+	 * - enable
+	 * - active
+	 * - mode / adjusted_mode
+	 * - color property blobs.
+	 *
+	 * During initial hw readout, they need to be copied to uapi.
+	 */
+	struct {
+		bool active, enable;
+		struct drm_property_blob *degamma_lut, *gamma_lut, *ctm;
+		struct drm_display_mode mode, adjusted_mode;
+	} hw;
 
 	/**
 	 * quirks - bitfield with hw state readout quirks
@@ -759,7 +820,6 @@ struct intel_crtc_state {
 	bool update_pipe; /* can a fast modeset be performed? */
 	bool disable_cxsr;
 	bool update_wm_pre, update_wm_post; /* watermarks are updated */
-	bool fb_changed; /* fb on any of the planes is changed */
 	bool fifo_changed; /* FIFO split is changed */
 	bool preload_luts;
 
@@ -865,6 +925,7 @@ struct intel_crtc_state {
 
 	bool has_psr;
 	bool has_psr2;
+	u32 dc3co_exitline;
 
 	/*
 	 * Frequence the dpll for the port should run at. Differs from the
@@ -926,6 +987,8 @@ struct intel_crtc_state {
 
 	struct intel_crtc_wm_state wm;
 
+	int min_cdclk[I915_MAX_PLANES];
+
 	u32 data_rate[I915_MAX_PLANES];
 
 	/* Gamma mode programmed on the pipe */
@@ -980,11 +1043,20 @@ struct intel_crtc_state {
 		bool dsc_split;
 		u16 compressed_bpp;
 		u8 slice_count;
-	} dsc_params;
-	struct drm_dsc_config dp_dsc_cfg;
+		struct drm_dsc_config config;
+	} dsc;
 
 	/* Forward Error correction State */
 	bool fec_enable;
+
+	/* Pointer to master transcoder in case of tiled displays */
+	enum transcoder master_transcoder;
+
+	/* Bitmask to indicate slaves attached */
+	u8 sync_mode_slaves_mask;
+
+	/* Only valid on TGL+ */
+	enum transcoder mst_master_transcoder;
 };
 
 struct intel_crtc {
@@ -1027,6 +1099,9 @@ struct intel_crtc {
 
 	/* scalers available on this crtc */
 	int num_scalers;
+
+	/* per pipe DSB related info */
+	struct intel_dsb dsb;
 };
 
 struct intel_plane {
@@ -1054,14 +1129,13 @@ struct intel_plane {
 	void (*update_plane)(struct intel_plane *plane,
 			     const struct intel_crtc_state *crtc_state,
 			     const struct intel_plane_state *plane_state);
-	void (*update_slave)(struct intel_plane *plane,
-			     const struct intel_crtc_state *crtc_state,
-			     const struct intel_plane_state *plane_state);
 	void (*disable_plane)(struct intel_plane *plane,
 			      const struct intel_crtc_state *crtc_state);
 	bool (*get_hw_state)(struct intel_plane *plane, enum pipe *pipe);
 	int (*check_plane)(struct intel_crtc_state *crtc_state,
 			   struct intel_plane_state *plane_state);
+	int (*min_cdclk)(const struct intel_crtc_state *crtc_state,
+			 const struct intel_plane_state *plane_state);
 };
 
 struct intel_watermark_params {
@@ -1085,12 +1159,12 @@ struct cxsr_latency {
 
 #define to_intel_atomic_state(x) container_of(x, struct intel_atomic_state, base)
 #define to_intel_crtc(x) container_of(x, struct intel_crtc, base)
-#define to_intel_crtc_state(x) container_of(x, struct intel_crtc_state, base)
+#define to_intel_crtc_state(x) container_of(x, struct intel_crtc_state, uapi)
 #define to_intel_connector(x) container_of(x, struct intel_connector, base)
 #define to_intel_encoder(x) container_of(x, struct intel_encoder, base)
 #define to_intel_framebuffer(x) container_of(x, struct intel_framebuffer, base)
 #define to_intel_plane(x) container_of(x, struct intel_plane, base)
-#define to_intel_plane_state(x) container_of(x, struct intel_plane_state, base)
+#define to_intel_plane_state(x) container_of(x, struct intel_plane_state, uapi)
 #define intel_fb_obj(x) ((x) ? to_intel_bo((x)->obj[0]) : NULL)
 
 struct intel_hdmi {
@@ -1177,6 +1251,7 @@ struct intel_dp {
 	/* sink or branch descriptor */
 	struct drm_dp_desc desc;
 	struct drm_dp_aux aux;
+	u32 aux_busy_last_status;
 	u8 train_set[4];
 	int panel_power_up_delay;
 	int panel_power_down_delay;
@@ -1212,6 +1287,15 @@ struct intel_dp {
 	bool can_mst; /* this port supports mst */
 	bool is_mst;
 	int active_mst_links;
+
+	/*
+	 * DP_TP_* registers may be either on port or transcoder register space.
+	 */
+	struct {
+		i915_reg_t dp_tp_ctl;
+		i915_reg_t dp_tp_status;
+	} regs;
+
 	/* connector directly attached - won't be use for modeset in mst world */
 	struct intel_connector *attached_connector;
 
@@ -1270,6 +1354,7 @@ struct intel_digital_port {
 	char tc_port_name[8];
 	enum tc_port_mode tc_mode;
 	enum phy_fia tc_phy_fia;
+	u8 tc_phy_fia_idx;
 
 	void (*write_infoframe)(struct intel_encoder *encoder,
 				const struct intel_crtc_state *crtc_state,
@@ -1353,9 +1438,9 @@ struct intel_load_detect_pipe {
 };
 
 static inline struct intel_encoder *
-intel_attached_encoder(struct drm_connector *connector)
+intel_attached_encoder(struct intel_connector *connector)
 {
-	return to_intel_connector(connector)->encoder;
+	return connector->encoder;
 }
 
 static inline bool intel_encoder_is_dig_port(struct intel_encoder *encoder)
@@ -1372,12 +1457,12 @@ static inline bool intel_encoder_is_dig_port(struct intel_encoder *encoder)
 }
 
 static inline struct intel_digital_port *
-enc_to_dig_port(struct drm_encoder *encoder)
+enc_to_dig_port(struct intel_encoder *encoder)
 {
-	struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
+	struct intel_encoder *intel_encoder = encoder;
 
 	if (intel_encoder_is_dig_port(intel_encoder))
-		return container_of(encoder, struct intel_digital_port,
+		return container_of(&encoder->base, struct intel_digital_port,
 				    base.base);
 	else
 		return NULL;
@@ -1386,16 +1471,17 @@ enc_to_dig_port(struct drm_encoder *encoder)
 static inline struct intel_digital_port *
 conn_to_dig_port(struct intel_connector *connector)
 {
-	return enc_to_dig_port(&intel_attached_encoder(&connector->base)->base);
+	return enc_to_dig_port(intel_attached_encoder(connector));
 }
 
 static inline struct intel_dp_mst_encoder *
-enc_to_mst(struct drm_encoder *encoder)
+enc_to_mst(struct intel_encoder *encoder)
 {
-	return container_of(encoder, struct intel_dp_mst_encoder, base.base);
+	return container_of(&encoder->base, struct intel_dp_mst_encoder,
+			    base.base);
 }
 
-static inline struct intel_dp *enc_to_intel_dp(struct drm_encoder *encoder)
+static inline struct intel_dp *enc_to_intel_dp(struct intel_encoder *encoder)
 {
 	return &enc_to_dig_port(encoder)->dp;
 }
@@ -1408,14 +1494,14 @@ static inline bool intel_encoder_is_dp(struct intel_encoder *encoder)
 		return true;
 	case INTEL_OUTPUT_DDI:
 		/* Skip pure HDMI/DVI DDI encoders */
-		return i915_mmio_reg_valid(enc_to_intel_dp(&encoder->base)->output_reg);
+		return i915_mmio_reg_valid(enc_to_intel_dp(encoder)->output_reg);
 	default:
 		return false;
 	}
 }
 
 static inline struct intel_lspcon *
-enc_to_intel_lspcon(struct drm_encoder *encoder)
+enc_to_intel_lspcon(struct intel_encoder *encoder)
 {
 	return &enc_to_dig_port(encoder)->lspcon;
 }
@@ -1489,6 +1575,24 @@ intel_atomic_get_new_crtc_state(struct intel_atomic_state *state,
 								 &crtc->base));
 }
 
+static inline struct intel_digital_connector_state *
+intel_atomic_get_new_connector_state(struct intel_atomic_state *state,
+				     struct intel_connector *connector)
+{
+	return to_intel_digital_connector_state(
+			drm_atomic_get_new_connector_state(&state->base,
+			&connector->base));
+}
+
+static inline struct intel_digital_connector_state *
+intel_atomic_get_old_connector_state(struct intel_atomic_state *state,
+				     struct intel_connector *connector)
+{
+	return to_intel_digital_connector_state(
+			drm_atomic_get_old_connector_state(&state->base,
+			&connector->base));
+}
+
 /* intel_display.c */
 static inline bool
 intel_crtc_has_type(const struct intel_crtc_state *crtc_state,
@@ -1510,7 +1614,7 @@ intel_wait_for_vblank(struct drm_i915_private *dev_priv, enum pipe pipe)
 	drm_wait_one_vblank(&dev_priv->drm, pipe);
 }
 static inline void
-intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, int pipe)
+intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, enum pipe pipe)
 {
 	const struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 9b15ac4f2fb6..c7424e2a04a3 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -68,11 +68,6 @@
 
 #define DP_DPRX_ESI_LEN 14
 
-/* DP DSC small joiner has 2 FIFOs each of 640 x 6 bytes */
-#define DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER	61440
-#define DP_DSC_MIN_SUPPORTED_BPC		8
-#define DP_DSC_MAX_SUPPORTED_BPC		10
-
 /* DP DSC throughput values used for slice count calculations KPixels/s */
 #define DP_DSC_PEAK_PIXEL_RATE			2720000
 #define DP_DSC_MAX_ENC_THROUGHPUT_0		340000
@@ -151,9 +146,9 @@ bool intel_dp_is_edp(struct intel_dp *intel_dp)
 	return intel_dig_port->base.type == INTEL_OUTPUT_EDP;
 }
 
-static struct intel_dp *intel_attached_dp(struct drm_connector *connector)
+static struct intel_dp *intel_attached_dp(struct intel_connector *connector)
 {
-	return enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+	return enc_to_intel_dp(intel_attached_encoder(connector));
 }
 
 static void intel_dp_link_down(struct intel_encoder *encoder,
@@ -500,7 +495,17 @@ u32 intel_dp_mode_to_fec_clock(u32 mode_clock)
 		       DP_DSC_FEC_OVERHEAD_FACTOR);
 }
 
-static u16 intel_dp_dsc_get_output_bpp(u32 link_clock, u32 lane_count,
+static int
+small_joiner_ram_size_bits(struct drm_i915_private *i915)
+{
+	if (INTEL_GEN(i915) >= 11)
+		return 7680 * 8;
+	else
+		return 6144 * 8;
+}
+
+static u16 intel_dp_dsc_get_output_bpp(struct drm_i915_private *i915,
+				       u32 link_clock, u32 lane_count,
 				       u32 mode_clock, u32 mode_hdisplay)
 {
 	u32 bits_per_pixel, max_bpp_small_joiner_ram;
@@ -517,7 +522,8 @@ static u16 intel_dp_dsc_get_output_bpp(u32 link_clock, u32 lane_count,
 	DRM_DEBUG_KMS("Max link bpp: %u\n", bits_per_pixel);
 
 	/* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */
-	max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER / mode_hdisplay;
+	max_bpp_small_joiner_ram = small_joiner_ram_size_bits(i915) /
+		mode_hdisplay;
 	DRM_DEBUG_KMS("Max small joiner bpp: %u\n", max_bpp_small_joiner_ram);
 
 	/*
@@ -585,11 +591,30 @@ static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
 	return 0;
 }
 
+static bool intel_dp_hdisplay_bad(struct drm_i915_private *dev_priv,
+				  int hdisplay)
+{
+	/*
+	 * Older platforms don't like hdisplay==4096 with DP.
+	 *
+	 * On ILK/SNB/IVB the pipe seems to be somewhat running (scanline
+	 * and frame counter increment), but we don't get vblank interrupts,
+	 * and the pipe underruns immediately. The link also doesn't seem
+	 * to get trained properly.
+	 *
+	 * On CHV the vblank interrupts don't seem to disappear but
+	 * otherwise the symptoms are similar.
+	 *
+	 * TODO: confirm the behaviour on HSW+
+	 */
+	return hdisplay == 4096 && !HAS_DDI(dev_priv);
+}
+
 static enum drm_mode_status
 intel_dp_mode_valid(struct drm_connector *connector,
 		    struct drm_display_mode *mode)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
 	struct intel_connector *intel_connector = to_intel_connector(connector);
 	struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
@@ -620,6 +645,9 @@ intel_dp_mode_valid(struct drm_connector *connector,
 	max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes);
 	mode_rate = intel_dp_link_required(target_clock, 18);
 
+	if (intel_dp_hdisplay_bad(dev_priv, mode->hdisplay))
+		return MODE_H_ILLEGAL;
+
 	/*
 	 * Output bpp is stored in 6.4 format so right shift by 4 to get the
 	 * integer value since we support only integer values of bpp.
@@ -634,7 +662,8 @@ intel_dp_mode_valid(struct drm_connector *connector,
 								true);
 		} else if (drm_dp_sink_supports_fec(intel_dp->fec_capable)) {
 			dsc_max_output_bpp =
-				intel_dp_dsc_get_output_bpp(max_link_clock,
+				intel_dp_dsc_get_output_bpp(dev_priv,
+							    max_link_clock,
 							    max_lanes,
 							    target_clock,
 							    mode->hdisplay) >> 4;
@@ -655,7 +684,7 @@ intel_dp_mode_valid(struct drm_connector *connector,
 	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
 		return MODE_H_ILLEGAL;
 
-	return MODE_OK;
+	return intel_mode_valid_max_plane_size(dev_priv, mode);
 }
 
 u32 intel_dp_pack_aux(const u8 *src, int src_bytes)
@@ -732,12 +761,14 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp)
 	u32 DP;
 
 	if (WARN(I915_READ(intel_dp->output_reg) & DP_PORT_EN,
-		 "skipping pipe %c power sequencer kick due to port %c being active\n",
-		 pipe_name(pipe), port_name(intel_dig_port->base.port)))
+		 "skipping pipe %c power sequencer kick due to [ENCODER:%d:%s] being active\n",
+		 pipe_name(pipe), intel_dig_port->base.base.base.id,
+		 intel_dig_port->base.base.name))
 		return;
 
-	DRM_DEBUG_KMS("kicking pipe %c power sequencer for port %c\n",
-		      pipe_name(pipe), port_name(intel_dig_port->base.port));
+	DRM_DEBUG_KMS("kicking pipe %c power sequencer for [ENCODER:%d:%s]\n",
+		      pipe_name(pipe), intel_dig_port->base.base.base.id,
+		      intel_dig_port->base.base.name);
 
 	/* Preserve the BIOS-computed detected bit. This is
 	 * supposed to be read-only.
@@ -803,7 +834,7 @@ static enum pipe vlv_find_free_pps(struct drm_i915_private *dev_priv)
 	 * Pick one that's not used by other ports.
 	 */
 	for_each_intel_dp(&dev_priv->drm, encoder) {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		if (encoder->type == INTEL_OUTPUT_EDP) {
 			WARN_ON(intel_dp->active_pipe != INVALID_PIPE &&
@@ -855,9 +886,10 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp)
 	vlv_steal_power_sequencer(dev_priv, pipe);
 	intel_dp->pps_pipe = pipe;
 
-	DRM_DEBUG_KMS("picked pipe %c power sequencer for port %c\n",
+	DRM_DEBUG_KMS("picked pipe %c power sequencer for [ENCODER:%d:%s]\n",
 		      pipe_name(intel_dp->pps_pipe),
-		      port_name(intel_dig_port->base.port));
+		      intel_dig_port->base.base.base.id,
+		      intel_dig_port->base.base.name);
 
 	/* init power sequencer on this pipe and port */
 	intel_dp_init_panel_power_sequencer(intel_dp);
@@ -965,13 +997,16 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp)
 
 	/* didn't find one? just let vlv_power_sequencer_pipe() pick one when needed */
 	if (intel_dp->pps_pipe == INVALID_PIPE) {
-		DRM_DEBUG_KMS("no initial power sequencer for port %c\n",
-			      port_name(port));
+		DRM_DEBUG_KMS("no initial power sequencer for [ENCODER:%d:%s]\n",
+			      intel_dig_port->base.base.base.id,
+			      intel_dig_port->base.base.name);
 		return;
 	}
 
-	DRM_DEBUG_KMS("initial power sequencer for port %c: pipe %c\n",
-		      port_name(port), pipe_name(intel_dp->pps_pipe));
+	DRM_DEBUG_KMS("initial power sequencer for [ENCODER:%d:%s]: pipe %c\n",
+		      intel_dig_port->base.base.base.id,
+		      intel_dig_port->base.base.name,
+		      pipe_name(intel_dp->pps_pipe));
 
 	intel_dp_init_panel_power_sequencer(intel_dp);
 	intel_dp_init_panel_power_sequencer_registers(intel_dp, false);
@@ -996,7 +1031,7 @@ void intel_power_sequencer_reset(struct drm_i915_private *dev_priv)
 	 */
 
 	for_each_intel_dp(&dev_priv->drm, encoder) {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		WARN_ON(intel_dp->active_pipe != INVALID_PIPE);
 
@@ -1144,18 +1179,20 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp);
+	const unsigned int timeout_ms = 10;
 	u32 status;
 	bool done;
 
 #define C (((status = intel_uncore_read_notrace(&i915->uncore, ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0)
 	done = wait_event_timeout(i915->gmbus_wait_queue, C,
-				  msecs_to_jiffies_timeout(10));
+				  msecs_to_jiffies_timeout(timeout_ms));
 
 	/* just trace the final value */
 	trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true);
 
 	if (!done)
-		DRM_ERROR("dp aux hw did not signal timeout!\n");
+		DRM_ERROR("%s did not complete or timeout within %ums (status 0x%08x)\n",
+			  intel_dp->aux.name, timeout_ms, status);
 #undef C
 
 	return status;
@@ -1338,13 +1375,12 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 	trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true);
 
 	if (try == 3) {
-		static u32 last_status = -1;
 		const u32 status = intel_uncore_read(uncore, ch_ctl);
 
-		if (status != last_status) {
+		if (status != intel_dp->aux_busy_last_status) {
 			WARN(1, "dp_aux_ch not started status 0x%08x\n",
 			     status);
-			last_status = status;
+			intel_dp->aux_busy_last_status = status;
 		}
 
 		ret = -EBUSY;
@@ -1636,6 +1672,7 @@ static i915_reg_t skl_aux_ctl_reg(struct intel_dp *intel_dp)
 	case AUX_CH_D:
 	case AUX_CH_E:
 	case AUX_CH_F:
+	case AUX_CH_G:
 		return DP_AUX_CH_CTL(aux_ch);
 	default:
 		MISSING_CASE(aux_ch);
@@ -1656,6 +1693,7 @@ static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index)
 	case AUX_CH_D:
 	case AUX_CH_E:
 	case AUX_CH_F:
+	case AUX_CH_G:
 		return DP_AUX_CH_DATA(aux_ch, index);
 	default:
 		MISSING_CASE(aux_ch);
@@ -1776,7 +1814,7 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp)
 {
 	char str[128]; /* FIXME: too big for stack? */
 
-	if ((drm_debug & DRM_UT_KMS) == 0)
+	if (!drm_debug_enabled(DRM_UT_KMS))
 		return;
 
 	snprintf_int_array(str, sizeof(str),
@@ -1834,8 +1872,14 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp,
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 
-	return INTEL_GEN(dev_priv) >= 11 &&
-		pipe_config->cpu_transcoder != TRANSCODER_A;
+	/* On TGL, FEC is supported on all Pipes */
+	if (INTEL_GEN(dev_priv) >= 12)
+		return true;
+
+	if (IS_GEN(dev_priv, 11) && pipe_config->cpu_transcoder != TRANSCODER_A)
+		return true;
+
+	return false;
 }
 
 static bool intel_dp_supports_fec(struct intel_dp *intel_dp,
@@ -1845,22 +1889,15 @@ static bool intel_dp_supports_fec(struct intel_dp *intel_dp,
 		drm_dp_sink_supports_fec(intel_dp->fec_capable);
 }
 
-static bool intel_dp_source_supports_dsc(struct intel_dp *intel_dp,
-					 const struct intel_crtc_state *pipe_config)
-{
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-
-	return INTEL_GEN(dev_priv) >= 10 &&
-		pipe_config->cpu_transcoder != TRANSCODER_A;
-}
-
 static bool intel_dp_supports_dsc(struct intel_dp *intel_dp,
-				  const struct intel_crtc_state *pipe_config)
+				  const struct intel_crtc_state *crtc_state)
 {
-	if (!intel_dp_is_edp(intel_dp) && !pipe_config->fec_enable)
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+
+	if (!intel_dp_is_edp(intel_dp) && !crtc_state->fec_enable)
 		return false;
 
-	return intel_dp_source_supports_dsc(intel_dp, pipe_config) &&
+	return intel_dsc_source_support(encoder, crtc_state) &&
 		drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd);
 }
 
@@ -1945,7 +1982,7 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp,
 				  struct intel_crtc_state *pipe_config,
 				  const struct link_config_limits *limits)
 {
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	int bpp, clock, lane_count;
 	int mode_rate, link_clock, link_avail;
 
@@ -1992,6 +2029,63 @@ static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc)
 	return 0;
 }
 
+#define DSC_SUPPORTED_VERSION_MIN		1
+
+static int intel_dp_dsc_compute_params(struct intel_encoder *encoder,
+				       struct intel_crtc_state *crtc_state)
+{
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
+	u8 line_buf_depth;
+	int ret;
+
+	ret = intel_dsc_compute_params(encoder, crtc_state);
+	if (ret)
+		return ret;
+
+	/*
+	 * Slice Height of 8 works for all currently available panels. So start
+	 * with that if pic_height is an integral multiple of 8. Eventually add
+	 * logic to try multiple slice heights.
+	 */
+	if (vdsc_cfg->pic_height % 8 == 0)
+		vdsc_cfg->slice_height = 8;
+	else if (vdsc_cfg->pic_height % 4 == 0)
+		vdsc_cfg->slice_height = 4;
+	else
+		vdsc_cfg->slice_height = 2;
+
+	vdsc_cfg->dsc_version_major =
+		(intel_dp->dsc_dpcd[DP_DSC_REV - DP_DSC_SUPPORT] &
+		 DP_DSC_MAJOR_MASK) >> DP_DSC_MAJOR_SHIFT;
+	vdsc_cfg->dsc_version_minor =
+		min(DSC_SUPPORTED_VERSION_MIN,
+		    (intel_dp->dsc_dpcd[DP_DSC_REV - DP_DSC_SUPPORT] &
+		     DP_DSC_MINOR_MASK) >> DP_DSC_MINOR_SHIFT);
+
+	vdsc_cfg->convert_rgb = intel_dp->dsc_dpcd[DP_DSC_DEC_COLOR_FORMAT_CAP - DP_DSC_SUPPORT] &
+		DP_DSC_RGB;
+
+	line_buf_depth = drm_dp_dsc_sink_line_buf_depth(intel_dp->dsc_dpcd);
+	if (!line_buf_depth) {
+		DRM_DEBUG_KMS("DSC Sink Line Buffer Depth invalid\n");
+		return -EINVAL;
+	}
+
+	if (vdsc_cfg->dsc_version_minor == 2)
+		vdsc_cfg->line_buf_depth = (line_buf_depth == DSC_1_2_MAX_LINEBUF_DEPTH_BITS) ?
+			DSC_1_2_MAX_LINEBUF_DEPTH_VAL : line_buf_depth;
+	else
+		vdsc_cfg->line_buf_depth = (line_buf_depth > DSC_1_1_MAX_LINEBUF_DEPTH_BITS) ?
+			DSC_1_1_MAX_LINEBUF_DEPTH_BITS : line_buf_depth;
+
+	vdsc_cfg->block_pred_enable =
+		intel_dp->dsc_dpcd[DP_DSC_BLK_PREDICTION_SUPPORT - DP_DSC_SUPPORT] &
+		DP_DSC_BLK_PREDICTION_IS_SUPPORTED;
+
+	return drm_dsc_compute_rc_parameters(vdsc_cfg);
+}
+
 static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 				       struct intel_crtc_state *pipe_config,
 				       struct drm_connector_state *conn_state,
@@ -1999,7 +2093,7 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	u8 dsc_max_bpc;
 	int pipe_bpp;
 	int ret;
@@ -2010,11 +2104,17 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 	if (!intel_dp_supports_dsc(intel_dp, pipe_config))
 		return -EINVAL;
 
-	dsc_max_bpc = min_t(u8, DP_DSC_MAX_SUPPORTED_BPC,
-			    conn_state->max_requested_bpc);
+	/* Max DSC Input BPC for ICL is 10 and for TGL+ is 12 */
+	if (INTEL_GEN(dev_priv) >= 12)
+		dsc_max_bpc = min_t(u8, 12, conn_state->max_requested_bpc);
+	else
+		dsc_max_bpc = min_t(u8, 10,
+				    conn_state->max_requested_bpc);
 
 	pipe_bpp = intel_dp_dsc_compute_bpp(intel_dp, dsc_max_bpc);
-	if (pipe_bpp < DP_DSC_MIN_SUPPORTED_BPC * 3) {
+
+	/* Min Input BPC for ICL+ is 8 */
+	if (pipe_bpp < 8 * 3) {
 		DRM_DEBUG_KMS("No DSC support for less than 8bpc\n");
 		return -EINVAL;
 	}
@@ -2029,10 +2129,10 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 	pipe_config->lane_count = limits->max_lane_count;
 
 	if (intel_dp_is_edp(intel_dp)) {
-		pipe_config->dsc_params.compressed_bpp =
+		pipe_config->dsc.compressed_bpp =
 			min_t(u16, drm_edp_dsc_sink_output_bpp(intel_dp->dsc_dpcd) >> 4,
 			      pipe_config->pipe_bpp);
-		pipe_config->dsc_params.slice_count =
+		pipe_config->dsc.slice_count =
 			drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd,
 							true);
 	} else {
@@ -2040,7 +2140,8 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 		u8 dsc_dp_slice_count;
 
 		dsc_max_output_bpp =
-			intel_dp_dsc_get_output_bpp(pipe_config->port_clock,
+			intel_dp_dsc_get_output_bpp(dev_priv,
+						    pipe_config->port_clock,
 						    pipe_config->lane_count,
 						    adjusted_mode->crtc_clock,
 						    adjusted_mode->crtc_hdisplay);
@@ -2052,10 +2153,10 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 			DRM_DEBUG_KMS("Compressed BPP/Slice Count not supported\n");
 			return -EINVAL;
 		}
-		pipe_config->dsc_params.compressed_bpp = min_t(u16,
+		pipe_config->dsc.compressed_bpp = min_t(u16,
 							       dsc_max_output_bpp >> 4,
 							       pipe_config->pipe_bpp);
-		pipe_config->dsc_params.slice_count = dsc_dp_slice_count;
+		pipe_config->dsc.slice_count = dsc_dp_slice_count;
 	}
 	/*
 	 * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate
@@ -2063,29 +2164,29 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 	 * then we need to use 2 VDSC instances.
 	 */
 	if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq) {
-		if (pipe_config->dsc_params.slice_count > 1) {
-			pipe_config->dsc_params.dsc_split = true;
+		if (pipe_config->dsc.slice_count > 1) {
+			pipe_config->dsc.dsc_split = true;
 		} else {
 			DRM_DEBUG_KMS("Cannot split stream to use 2 VDSC instances\n");
 			return -EINVAL;
 		}
 	}
 
-	ret = intel_dp_compute_dsc_params(intel_dp, pipe_config);
+	ret = intel_dp_dsc_compute_params(&dig_port->base, pipe_config);
 	if (ret < 0) {
 		DRM_DEBUG_KMS("Cannot compute valid DSC parameters for Input Bpp = %d "
 			      "Compressed BPP = %d\n",
 			      pipe_config->pipe_bpp,
-			      pipe_config->dsc_params.compressed_bpp);
+			      pipe_config->dsc.compressed_bpp);
 		return ret;
 	}
 
-	pipe_config->dsc_params.compression_enable = true;
+	pipe_config->dsc.compression_enable = true;
 	DRM_DEBUG_KMS("DP DSC computed with Input Bpp = %d "
 		      "Compressed Bpp = %d Slice Count = %d\n",
 		      pipe_config->pipe_bpp,
-		      pipe_config->dsc_params.compressed_bpp,
-		      pipe_config->dsc_params.slice_count);
+		      pipe_config->dsc.compressed_bpp,
+		      pipe_config->dsc.slice_count);
 
 	return 0;
 }
@@ -2103,8 +2204,8 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 			     struct intel_crtc_state *pipe_config,
 			     struct drm_connector_state *conn_state)
 {
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct link_config_limits limits;
 	int common_len;
 	int ret;
@@ -2159,15 +2260,15 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 			return ret;
 	}
 
-	if (pipe_config->dsc_params.compression_enable) {
+	if (pipe_config->dsc.compression_enable) {
 		DRM_DEBUG_KMS("DP lane count %d clock %d Input bpp %d Compressed bpp %d\n",
 			      pipe_config->lane_count, pipe_config->port_clock,
 			      pipe_config->pipe_bpp,
-			      pipe_config->dsc_params.compressed_bpp);
+			      pipe_config->dsc.compressed_bpp);
 
 		DRM_DEBUG_KMS("DP link rate required %i available %i\n",
 			      intel_dp_link_required(adjusted_mode->crtc_clock,
-						     pipe_config->dsc_params.compressed_bpp),
+						     pipe_config->dsc.compressed_bpp),
 			      intel_dp_max_data_rate(pipe_config->port_clock,
 						     pipe_config->lane_count));
 	} else {
@@ -2191,8 +2292,8 @@ intel_dp_ycbcr420_config(struct intel_dp *intel_dp,
 {
 	const struct drm_display_info *info = &connector->display_info;
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+		&crtc_state->hw.adjusted_mode;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	int ret;
 
 	if (!drm_mode_is_420_only(info, adjusted_mode) ||
@@ -2220,7 +2321,17 @@ bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state,
 	const struct intel_digital_connector_state *intel_conn_state =
 		to_intel_digital_connector_state(conn_state);
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
+
+	/*
+	 * Our YCbCr output is always limited range.
+	 * crtc_state->limited_color_range only applies to RGB,
+	 * and it must never be set for YCbCr or we risk setting
+	 * some conflicting bits in PIPECONF which will mess up
+	 * the colors on the monitor.
+	 */
+	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB)
+		return false;
 
 	if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) {
 		/*
@@ -2237,17 +2348,28 @@ bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state,
 	}
 }
 
+static bool intel_dp_port_has_audio(struct drm_i915_private *dev_priv,
+				    enum port port)
+{
+	if (IS_G4X(dev_priv))
+		return false;
+	if (INTEL_GEN(dev_priv) < 12 && port == PORT_A)
+		return false;
+
+	return true;
+}
+
 int
 intel_dp_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_state *pipe_config,
 			struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
-	struct intel_lspcon *lspcon = enc_to_intel_lspcon(&encoder->base);
+	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder);
 	enum port port = encoder->port;
-	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	struct intel_connector *intel_connector = intel_dp->attached_connector;
 	struct intel_digital_connector_state *intel_conn_state =
 		to_intel_digital_connector_state(conn_state);
@@ -2259,6 +2381,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 		pipe_config->has_pch_encoder = true;
 
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
+
 	if (lspcon->active)
 		lspcon_ycbcr420_config(&intel_connector->base, pipe_config);
 	else
@@ -2269,7 +2392,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 		return ret;
 
 	pipe_config->has_drrs = false;
-	if (IS_G4X(dev_priv) || port == PORT_A)
+	if (!intel_dp_port_has_audio(dev_priv, port))
 		pipe_config->has_audio = false;
 	else if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO)
 		pipe_config->has_audio = intel_dp->has_audio;
@@ -2304,6 +2427,9 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
 		return -EINVAL;
 
+	if (intel_dp_hdisplay_bad(dev_priv, adjusted_mode->crtc_hdisplay))
+		return -EINVAL;
+
 	ret = intel_dp_compute_link_config(encoder, pipe_config, conn_state);
 	if (ret < 0)
 		return ret;
@@ -2311,8 +2437,8 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	pipe_config->limited_color_range =
 		intel_dp_limited_color_range(pipe_config, conn_state);
 
-	if (pipe_config->dsc_params.compression_enable)
-		output_bpp = pipe_config->dsc_params.compressed_bpp;
+	if (pipe_config->dsc.compression_enable)
+		output_bpp = pipe_config->dsc.compressed_bpp;
 	else
 		output_bpp = intel_dp_output_bpp(pipe_config, pipe_config->pipe_bpp);
 
@@ -2356,16 +2482,19 @@ static void intel_dp_prepare(struct intel_encoder *encoder,
 			     const struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	enum port port = encoder->port;
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
-	const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
+	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 
 	intel_dp_set_link_params(intel_dp, pipe_config->port_clock,
 				 pipe_config->lane_count,
 				 intel_crtc_has_type(pipe_config,
 						     INTEL_OUTPUT_DP_MST));
 
+	intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port);
+	intel_dp->regs.dp_tp_status = DP_TP_STATUS(port);
+
 	/*
 	 * There are four kinds of DP registers:
 	 *
@@ -2380,7 +2509,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder,
 	 *
 	 * CPT PCH is quite different, having many bits moved
 	 * to the TRANS_DP_CTL register instead. That
-	 * configuration happens (oddly) in ironlake_pch_enable
+	 * configuration happens (oddly) in ilk_pch_enable
 	 */
 
 	/* Preserve the BIOS-computed detected bit. This is
@@ -2524,7 +2653,7 @@ static void edp_wait_backlight_off(struct intel_dp *intel_dp)
  * is locked
  */
 
-static  u32 ironlake_get_pp_control(struct intel_dp *intel_dp)
+static  u32 ilk_get_pp_control(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	u32 control;
@@ -2567,13 +2696,14 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp)
 	intel_display_power_get(dev_priv,
 				intel_aux_power_domain(intel_dig_port));
 
-	DRM_DEBUG_KMS("Turning eDP port %c VDD on\n",
-		      port_name(intel_dig_port->base.port));
+	DRM_DEBUG_KMS("Turning [ENCODER:%d:%s] VDD on\n",
+		      intel_dig_port->base.base.base.id,
+		      intel_dig_port->base.base.name);
 
 	if (!edp_have_panel_power(intel_dp))
 		wait_panel_power_cycle(intel_dp);
 
-	pp = ironlake_get_pp_control(intel_dp);
+	pp = ilk_get_pp_control(intel_dp);
 	pp |= EDP_FORCE_VDD;
 
 	pp_stat_reg = _pp_stat_reg(intel_dp);
@@ -2587,8 +2717,9 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp)
 	 * If the panel wasn't on, delay before accessing aux channel
 	 */
 	if (!edp_have_panel_power(intel_dp)) {
-		DRM_DEBUG_KMS("eDP port %c panel power wasn't enabled\n",
-			      port_name(intel_dig_port->base.port));
+		DRM_DEBUG_KMS("[ENCODER:%d:%s] panel power wasn't enabled\n",
+			      intel_dig_port->base.base.base.id,
+			      intel_dig_port->base.base.name);
 		msleep(intel_dp->panel_power_up_delay);
 	}
 
@@ -2613,8 +2744,9 @@ void intel_edp_panel_vdd_on(struct intel_dp *intel_dp)
 	vdd = false;
 	with_pps_lock(intel_dp, wakeref)
 		vdd = edp_panel_vdd_on(intel_dp);
-	I915_STATE_WARN(!vdd, "eDP port %c VDD already requested on\n",
-	     port_name(dp_to_dig_port(intel_dp)->base.port));
+	I915_STATE_WARN(!vdd, "[ENCODER:%d:%s] VDD already requested on\n",
+			dp_to_dig_port(intel_dp)->base.base.base.id,
+			dp_to_dig_port(intel_dp)->base.base.name);
 }
 
 static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
@@ -2632,10 +2764,11 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
 	if (!edp_have_panel_vdd(intel_dp))
 		return;
 
-	DRM_DEBUG_KMS("Turning eDP port %c VDD off\n",
-		      port_name(intel_dig_port->base.port));
+	DRM_DEBUG_KMS("Turning [ENCODER:%d:%s] VDD off\n",
+		      intel_dig_port->base.base.base.id,
+		      intel_dig_port->base.base.name);
 
-	pp = ironlake_get_pp_control(intel_dp);
+	pp = ilk_get_pp_control(intel_dp);
 	pp &= ~EDP_FORCE_VDD;
 
 	pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
@@ -2695,8 +2828,9 @@ static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync)
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	I915_STATE_WARN(!intel_dp->want_panel_vdd, "eDP port %c VDD not forced on",
-	     port_name(dp_to_dig_port(intel_dp)->base.port));
+	I915_STATE_WARN(!intel_dp->want_panel_vdd, "[ENCODER:%d:%s] VDD not forced on",
+			dp_to_dig_port(intel_dp)->base.base.base.id,
+			dp_to_dig_port(intel_dp)->base.base.name);
 
 	intel_dp->want_panel_vdd = false;
 
@@ -2717,18 +2851,20 @@ static void edp_panel_on(struct intel_dp *intel_dp)
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	DRM_DEBUG_KMS("Turn eDP port %c panel power on\n",
-		      port_name(dp_to_dig_port(intel_dp)->base.port));
+	DRM_DEBUG_KMS("Turn [ENCODER:%d:%s] panel power on\n",
+		      dp_to_dig_port(intel_dp)->base.base.base.id,
+		      dp_to_dig_port(intel_dp)->base.base.name);
 
 	if (WARN(edp_have_panel_power(intel_dp),
-		 "eDP port %c panel power already on\n",
-		 port_name(dp_to_dig_port(intel_dp)->base.port)))
+		 "[ENCODER:%d:%s] panel power already on\n",
+		 dp_to_dig_port(intel_dp)->base.base.base.id,
+		 dp_to_dig_port(intel_dp)->base.base.name))
 		return;
 
 	wait_panel_power_cycle(intel_dp);
 
 	pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
-	pp = ironlake_get_pp_control(intel_dp);
+	pp = ilk_get_pp_control(intel_dp);
 	if (IS_GEN(dev_priv, 5)) {
 		/* ILK workaround: disable reset around power sequence */
 		pp &= ~PANEL_POWER_RESET;
@@ -2777,13 +2913,13 @@ static void edp_panel_off(struct intel_dp *intel_dp)
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	DRM_DEBUG_KMS("Turn eDP port %c panel power off\n",
-		      port_name(dig_port->base.port));
+	DRM_DEBUG_KMS("Turn [ENCODER:%d:%s] panel power off\n",
+		      dig_port->base.base.base.id, dig_port->base.base.name);
 
-	WARN(!intel_dp->want_panel_vdd, "Need eDP port %c VDD to turn off panel\n",
-	     port_name(dig_port->base.port));
+	WARN(!intel_dp->want_panel_vdd, "Need [ENCODER:%d:%s] VDD to turn off panel\n",
+	     dig_port->base.base.base.id, dig_port->base.base.name);
 
-	pp = ironlake_get_pp_control(intel_dp);
+	pp = ilk_get_pp_control(intel_dp);
 	/* We need to switch off panel power _and_ force vdd, for otherwise some
 	 * panels get very unhappy and cease to work. */
 	pp &= ~(PANEL_POWER_ON | PANEL_POWER_RESET | EDP_FORCE_VDD |
@@ -2832,7 +2968,7 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp)
 		i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
 		u32 pp;
 
-		pp = ironlake_get_pp_control(intel_dp);
+		pp = ilk_get_pp_control(intel_dp);
 		pp |= EDP_BLC_ENABLE;
 
 		I915_WRITE(pp_ctrl_reg, pp);
@@ -2844,7 +2980,7 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp)
 void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state,
 			    const struct drm_connector_state *conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(conn_state->best_encoder);
+	struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(conn_state->best_encoder));
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
@@ -2868,7 +3004,7 @@ static void _intel_edp_backlight_off(struct intel_dp *intel_dp)
 		i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
 		u32 pp;
 
-		pp = ironlake_get_pp_control(intel_dp);
+		pp = ilk_get_pp_control(intel_dp);
 		pp &= ~EDP_BLC_ENABLE;
 
 		I915_WRITE(pp_ctrl_reg, pp);
@@ -2882,7 +3018,7 @@ static void _intel_edp_backlight_off(struct intel_dp *intel_dp)
 /* Disable backlight PP control and backlight PWM. */
 void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(old_conn_state->best_encoder);
+	struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder));
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
@@ -2900,13 +3036,13 @@ void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state)
 static void intel_edp_backlight_power(struct intel_connector *connector,
 				      bool enable)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(&connector->base);
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
 	intel_wakeref_t wakeref;
 	bool is_enabled;
 
 	is_enabled = false;
 	with_pps_lock(intel_dp, wakeref)
-		is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE;
+		is_enabled = ilk_get_pp_control(intel_dp) & EDP_BLC_ENABLE;
 	if (is_enabled == enable)
 		return;
 
@@ -2926,8 +3062,8 @@ static void assert_dp_port(struct intel_dp *intel_dp, bool state)
 	bool cur_state = I915_READ(intel_dp->output_reg) & DP_PORT_EN;
 
 	I915_STATE_WARN(cur_state != state,
-			"DP port %c state assertion failure (expected %s, current %s)\n",
-			port_name(dig_port->base.port),
+			"[ENCODER:%d:%s] state assertion failure (expected %s, current %s)\n",
+			dig_port->base.base.base.id, dig_port->base.base.name,
 			onoff(state), onoff(cur_state));
 }
 #define assert_dp_port_disabled(d) assert_dp_port((d), false)
@@ -2943,13 +3079,13 @@ static void assert_edp_pll(struct drm_i915_private *dev_priv, bool state)
 #define assert_edp_pll_enabled(d) assert_edp_pll((d), true)
 #define assert_edp_pll_disabled(d) assert_edp_pll((d), false)
 
-static void ironlake_edp_pll_on(struct intel_dp *intel_dp,
-				const struct intel_crtc_state *pipe_config)
+static void ilk_edp_pll_on(struct intel_dp *intel_dp,
+			   const struct intel_crtc_state *pipe_config)
 {
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	assert_pipe_disabled(dev_priv, crtc->pipe);
+	assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder);
 	assert_dp_port_disabled(intel_dp);
 	assert_edp_pll_disabled(dev_priv);
 
@@ -2983,13 +3119,13 @@ static void ironlake_edp_pll_on(struct intel_dp *intel_dp,
 	udelay(200);
 }
 
-static void ironlake_edp_pll_off(struct intel_dp *intel_dp,
-				 const struct intel_crtc_state *old_crtc_state)
+static void ilk_edp_pll_off(struct intel_dp *intel_dp,
+			    const struct intel_crtc_state *old_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	assert_pipe_disabled(dev_priv, crtc->pipe);
+	assert_pipe_disabled(dev_priv, old_crtc_state->cpu_transcoder);
 	assert_dp_port_disabled(intel_dp);
 	assert_edp_pll_enabled(dev_priv);
 
@@ -3023,7 +3159,7 @@ void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp,
 {
 	int ret;
 
-	if (!crtc_state->dsc_params.compression_enable)
+	if (!crtc_state->dsc.compression_enable)
 		return;
 
 	ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_DSC_ENABLE,
@@ -3122,7 +3258,7 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
 				  enum pipe *pipe)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	intel_wakeref_t wakeref;
 	bool ret;
 
@@ -3143,10 +3279,10 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
 				struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	u32 tmp, flags = 0;
 	enum port port = encoder->port;
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 
 	if (encoder->type == INTEL_OUTPUT_EDP)
 		pipe_config->output_types |= BIT(INTEL_OUTPUT_EDP);
@@ -3181,7 +3317,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
 			flags |= DRM_MODE_FLAG_NVSYNC;
 	}
 
-	pipe_config->base.adjusted_mode.flags |= flags;
+	pipe_config->hw.adjusted_mode.flags |= flags;
 
 	if (IS_G4X(dev_priv) && tmp & DP_COLOR_RANGE_16_235)
 		pipe_config->limited_color_range = true;
@@ -3198,7 +3334,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
 			pipe_config->port_clock = 270000;
 	}
 
-	pipe_config->base.adjusted_mode.crtc_clock =
+	pipe_config->hw.adjusted_mode.crtc_clock =
 		intel_dotclock_calculate(pipe_config->port_clock,
 					 &pipe_config->dp_m_n);
 
@@ -3227,7 +3363,7 @@ static void intel_disable_dp(struct intel_encoder *encoder,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 	intel_dp->link_trained = false;
 
@@ -3261,7 +3397,7 @@ static void g4x_post_disable_dp(struct intel_encoder *encoder,
 				const struct intel_crtc_state *old_crtc_state,
 				const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	enum port port = encoder->port;
 
 	/*
@@ -3274,7 +3410,7 @@ static void g4x_post_disable_dp(struct intel_encoder *encoder,
 
 	/* Only ilk+ has port A */
 	if (port == PORT_A)
-		ironlake_edp_pll_off(intel_dp, old_crtc_state);
+		ilk_edp_pll_off(intel_dp, old_crtc_state);
 }
 
 static void vlv_post_disable_dp(struct intel_encoder *encoder,
@@ -3315,7 +3451,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp,
 			      dp_train_pat & train_pat_mask);
 
 	if (HAS_DDI(dev_priv)) {
-		u32 temp = I915_READ(DP_TP_CTL(port));
+		u32 temp = I915_READ(intel_dp->regs.dp_tp_ctl);
 
 		if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE)
 			temp |= DP_TP_CTL_SCRAMBLE_DISABLE;
@@ -3341,7 +3477,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp,
 			temp |= DP_TP_CTL_LINK_TRAIN_PAT4;
 			break;
 		}
-		I915_WRITE(DP_TP_CTL(port), temp);
+		I915_WRITE(intel_dp->regs.dp_tp_ctl, temp);
 
 	} else if ((IS_IVYBRIDGE(dev_priv) && port == PORT_A) ||
 		   (HAS_PCH_CPT(dev_priv) && port != PORT_A)) {
@@ -3412,8 +3548,8 @@ static void intel_enable_dp(struct intel_encoder *encoder,
 			    const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	u32 dp_reg = I915_READ(intel_dp->output_reg);
 	enum pipe pipe = crtc->pipe;
 	intel_wakeref_t wakeref;
@@ -3472,14 +3608,14 @@ static void g4x_pre_enable_dp(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *pipe_config,
 			      const struct drm_connector_state *conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	enum port port = encoder->port;
 
 	intel_dp_prepare(encoder, pipe_config);
 
 	/* Only ilk+ has port A */
 	if (port == PORT_A)
-		ironlake_edp_pll_on(intel_dp, pipe_config);
+		ilk_edp_pll_on(intel_dp, pipe_config);
 }
 
 static void vlv_detach_power_sequencer(struct intel_dp *intel_dp)
@@ -3505,8 +3641,9 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp)
 	 * port select always when logically disconnecting a power sequencer
 	 * from a port.
 	 */
-	DRM_DEBUG_KMS("detaching pipe %c power sequencer from port %c\n",
-		      pipe_name(pipe), port_name(intel_dig_port->base.port));
+	DRM_DEBUG_KMS("detaching pipe %c power sequencer from [ENCODER:%d:%s]\n",
+		      pipe_name(pipe), intel_dig_port->base.base.base.id,
+		      intel_dig_port->base.base.name);
 	I915_WRITE(pp_on_reg, 0);
 	POSTING_READ(pp_on_reg);
 
@@ -3521,18 +3658,19 @@ static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv,
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
 	for_each_intel_dp(&dev_priv->drm, encoder) {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
-		enum port port = encoder->port;
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		WARN(intel_dp->active_pipe == pipe,
-		     "stealing pipe %c power sequencer from active (e)DP port %c\n",
-		     pipe_name(pipe), port_name(port));
+		     "stealing pipe %c power sequencer from active [ENCODER:%d:%s]\n",
+		     pipe_name(pipe), encoder->base.base.id,
+		     encoder->base.name);
 
 		if (intel_dp->pps_pipe != pipe)
 			continue;
 
-		DRM_DEBUG_KMS("stealing pipe %c power sequencer from port %c\n",
-			      pipe_name(pipe), port_name(port));
+		DRM_DEBUG_KMS("stealing pipe %c power sequencer from [ENCODER:%d:%s]\n",
+			      pipe_name(pipe), encoder->base.base.id,
+			      encoder->base.name);
 
 		/* make sure vdd is off before we steal it */
 		vlv_detach_power_sequencer(intel_dp);
@@ -3543,8 +3681,8 @@ static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder,
 					   const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
@@ -3574,8 +3712,9 @@ static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder,
 	/* now it's all ours */
 	intel_dp->pps_pipe = crtc->pipe;
 
-	DRM_DEBUG_KMS("initializing pipe %c power sequencer for port %c\n",
-		      pipe_name(intel_dp->pps_pipe), port_name(encoder->port));
+	DRM_DEBUG_KMS("initializing pipe %c power sequencer for [ENCODER:%d:%s]\n",
+		      pipe_name(intel_dp->pps_pipe), encoder->base.base.id,
+		      encoder->base.name);
 
 	/* init power sequencer on this pipe and port */
 	intel_dp_init_panel_power_sequencer(intel_dp);
@@ -4039,22 +4178,22 @@ void intel_dp_set_idle_link_train(struct intel_dp *intel_dp)
 	if (!HAS_DDI(dev_priv))
 		return;
 
-	val = I915_READ(DP_TP_CTL(port));
+	val = I915_READ(intel_dp->regs.dp_tp_ctl);
 	val &= ~DP_TP_CTL_LINK_TRAIN_MASK;
 	val |= DP_TP_CTL_LINK_TRAIN_IDLE;
-	I915_WRITE(DP_TP_CTL(port), val);
+	I915_WRITE(intel_dp->regs.dp_tp_ctl, val);
 
 	/*
-	 * On PORT_A we can have only eDP in SST mode. There the only reason
-	 * we need to set idle transmission mode is to work around a HW issue
-	 * where we enable the pipe while not in idle link-training mode.
+	 * Until TGL on PORT_A we can have only eDP in SST mode. There the only
+	 * reason we need to set idle transmission mode is to work around a HW
+	 * issue where we enable the pipe while not in idle link-training mode.
 	 * In this case there is requirement to wait for a minimum number of
 	 * idle patterns to be sent.
 	 */
-	if (port == PORT_A)
+	if (port == PORT_A && INTEL_GEN(dev_priv) < 12)
 		return;
 
-	if (intel_de_wait_for_set(dev_priv, DP_TP_STATUS(port),
+	if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status,
 				  DP_TP_STATUS_IDLE_DONE, 1))
 		DRM_ERROR("Timed out waiting for DP idle patterns\n");
 }
@@ -4064,8 +4203,8 @@ intel_dp_link_down(struct intel_encoder *encoder,
 		   const struct intel_crtc_state *old_crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	enum port port = encoder->port;
 	u32 DP = intel_dp->DP;
 
@@ -4396,9 +4535,10 @@ intel_dp_configure_mst(struct intel_dp *intel_dp)
 		&dp_to_dig_port(intel_dp)->base;
 	bool sink_can_mst = intel_dp_sink_can_mst(intel_dp);
 
-	DRM_DEBUG_KMS("MST support? port %c: %s, sink: %s, modparam: %s\n",
-		      port_name(encoder->port), yesno(intel_dp->can_mst),
-		      yesno(sink_can_mst), yesno(i915_modparams.enable_dp_mst));
+	DRM_DEBUG_KMS("[ENCODER:%d:%s] MST support: port: %s, sink: %s, modparam: %s\n",
+		      encoder->base.base.id, encoder->base.name,
+		      yesno(intel_dp->can_mst), yesno(sink_can_mst),
+		      yesno(i915_modparams.enable_dp_mst));
 
 	if (!intel_dp->can_mst)
 		return;
@@ -4418,9 +4558,36 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector)
 		DP_DPRX_ESI_LEN;
 }
 
+bool
+intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state,
+		       const struct drm_connector_state *conn_state)
+{
+	/*
+	 * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication
+	 * of Color Encoding Format and Content Color Gamut], in order to
+	 * sending YCBCR 420 or HDR BT.2020 signals we should use DP VSC SDP.
+	 */
+	if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
+		return true;
+
+	switch (conn_state->colorspace) {
+	case DRM_MODE_COLORIMETRY_SYCC_601:
+	case DRM_MODE_COLORIMETRY_OPYCC_601:
+	case DRM_MODE_COLORIMETRY_BT2020_YCC:
+	case DRM_MODE_COLORIMETRY_BT2020_RGB:
+	case DRM_MODE_COLORIMETRY_BT2020_CYCC:
+		return true;
+	default:
+		break;
+	}
+
+	return false;
+}
+
 static void
-intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp,
-			       const struct intel_crtc_state *crtc_state)
+intel_dp_setup_vsc_sdp(struct intel_dp *intel_dp,
+		       const struct intel_crtc_state *crtc_state,
+		       const struct drm_connector_state *conn_state)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct dp_sdp vsc_sdp = {};
@@ -4441,13 +4608,55 @@ intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp,
 	 */
 	vsc_sdp.sdp_header.HB3 = 0x13;
 
-	/*
-	 * YCbCr 420 = 3h DB16[7:4] ITU-R BT.601 = 0h, ITU-R BT.709 = 1h
-	 * DB16[3:0] DP 1.4a spec, Table 2-120
-	 */
-	vsc_sdp.db[16] = 0x3 << 4; /* 0x3 << 4 , YCbCr 420*/
-	/* RGB->YCBCR color conversion uses the BT.709 color space. */
-	vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */
+	/* DP 1.4a spec, Table 2-120 */
+	switch (crtc_state->output_format) {
+	case INTEL_OUTPUT_FORMAT_YCBCR444:
+		vsc_sdp.db[16] = 0x1 << 4; /* YCbCr 444 : DB16[7:4] = 1h */
+		break;
+	case INTEL_OUTPUT_FORMAT_YCBCR420:
+		vsc_sdp.db[16] = 0x3 << 4; /* YCbCr 420 : DB16[7:4] = 3h */
+		break;
+	case INTEL_OUTPUT_FORMAT_RGB:
+	default:
+		/* RGB: DB16[7:4] = 0h */
+		break;
+	}
+
+	switch (conn_state->colorspace) {
+	case DRM_MODE_COLORIMETRY_BT709_YCC:
+		vsc_sdp.db[16] |= 0x1;
+		break;
+	case DRM_MODE_COLORIMETRY_XVYCC_601:
+		vsc_sdp.db[16] |= 0x2;
+		break;
+	case DRM_MODE_COLORIMETRY_XVYCC_709:
+		vsc_sdp.db[16] |= 0x3;
+		break;
+	case DRM_MODE_COLORIMETRY_SYCC_601:
+		vsc_sdp.db[16] |= 0x4;
+		break;
+	case DRM_MODE_COLORIMETRY_OPYCC_601:
+		vsc_sdp.db[16] |= 0x5;
+		break;
+	case DRM_MODE_COLORIMETRY_BT2020_CYCC:
+	case DRM_MODE_COLORIMETRY_BT2020_RGB:
+		vsc_sdp.db[16] |= 0x6;
+		break;
+	case DRM_MODE_COLORIMETRY_BT2020_YCC:
+		vsc_sdp.db[16] |= 0x7;
+		break;
+	case DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65:
+	case DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER:
+		vsc_sdp.db[16] |= 0x4; /* DCI-P3 (SMPTE RP 431-2) */
+		break;
+	default:
+		/* sRGB (IEC 61966-2-1) / ITU-R BT.601: DB16[0:3] = 0h */
+
+		/* RGB->YCBCR color conversion uses the BT.709 color space. */
+		if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
+			vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */
+		break;
+	}
 
 	/*
 	 * For pixel encoding formats YCbCr444, YCbCr422, YCbCr420, and Y Only,
@@ -4499,13 +4708,106 @@ intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp,
 			crtc_state, DP_SDP_VSC, &vsc_sdp, sizeof(vsc_sdp));
 }
 
-void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp,
-			       const struct intel_crtc_state *crtc_state)
+static void
+intel_dp_setup_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp,
+					  const struct intel_crtc_state *crtc_state,
+					  const struct drm_connector_state *conn_state)
 {
-	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_YCBCR420)
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct dp_sdp infoframe_sdp = {};
+	struct hdmi_drm_infoframe drm_infoframe = {};
+	const int infoframe_size = HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE;
+	unsigned char buf[HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE];
+	ssize_t len;
+	int ret;
+
+	ret = drm_hdmi_infoframe_set_hdr_metadata(&drm_infoframe, conn_state);
+	if (ret) {
+		DRM_DEBUG_KMS("couldn't set HDR metadata in infoframe\n");
 		return;
+	}
 
-	intel_pixel_encoding_setup_vsc(intel_dp, crtc_state);
+	len = hdmi_drm_infoframe_pack_only(&drm_infoframe, buf, sizeof(buf));
+	if (len < 0) {
+		DRM_DEBUG_KMS("buffer size is smaller than hdr metadata infoframe\n");
+		return;
+	}
+
+	if (len != infoframe_size) {
+		DRM_DEBUG_KMS("wrong static hdr metadata size\n");
+		return;
+	}
+
+	/*
+	 * Set up the infoframe sdp packet for HDR static metadata.
+	 * Prepare VSC Header for SU as per DP 1.4a spec,
+	 * Table 2-100 and Table 2-101
+	 */
+
+	/* Packet ID, 00h for non-Audio INFOFRAME */
+	infoframe_sdp.sdp_header.HB0 = 0;
+	/*
+	 * Packet Type 80h + Non-audio INFOFRAME Type value
+	 * HDMI_INFOFRAME_TYPE_DRM: 0x87,
+	 */
+	infoframe_sdp.sdp_header.HB1 = drm_infoframe.type;
+	/*
+	 * Least Significant Eight Bits of (Data Byte Count – 1)
+	 * infoframe_size - 1,
+	 */
+	infoframe_sdp.sdp_header.HB2 = 0x1D;
+	/* INFOFRAME SDP Version Number */
+	infoframe_sdp.sdp_header.HB3 = (0x13 << 2);
+	/* CTA Header Byte 2 (INFOFRAME Version Number) */
+	infoframe_sdp.db[0] = drm_infoframe.version;
+	/* CTA Header Byte 3 (Length of INFOFRAME): HDMI_DRM_INFOFRAME_SIZE */
+	infoframe_sdp.db[1] = drm_infoframe.length;
+	/*
+	 * Copy HDMI_DRM_INFOFRAME_SIZE size from a buffer after
+	 * HDMI_INFOFRAME_HEADER_SIZE
+	 */
+	BUILD_BUG_ON(sizeof(infoframe_sdp.db) < HDMI_DRM_INFOFRAME_SIZE + 2);
+	memcpy(&infoframe_sdp.db[2], &buf[HDMI_INFOFRAME_HEADER_SIZE],
+	       HDMI_DRM_INFOFRAME_SIZE);
+
+	/*
+	 * Size of DP infoframe sdp packet for HDR static metadata is consist of
+	 * - DP SDP Header(struct dp_sdp_header): 4 bytes
+	 * - Two Data Blocks: 2 bytes
+	 *    CTA Header Byte2 (INFOFRAME Version Number)
+	 *    CTA Header Byte3 (Length of INFOFRAME)
+	 * - HDMI_DRM_INFOFRAME_SIZE: 26 bytes
+	 *
+	 * Prior to GEN11's GMP register size is identical to DP HDR static metadata
+	 * infoframe size. But GEN11+ has larger than that size, write_infoframe
+	 * will pad rest of the size.
+	 */
+	intel_dig_port->write_infoframe(&intel_dig_port->base, crtc_state,
+					HDMI_PACKET_TYPE_GAMUT_METADATA,
+					&infoframe_sdp,
+					sizeof(struct dp_sdp_header) + 2 + HDMI_DRM_INFOFRAME_SIZE);
+}
+
+void intel_dp_vsc_enable(struct intel_dp *intel_dp,
+			 const struct intel_crtc_state *crtc_state,
+			 const struct drm_connector_state *conn_state)
+{
+	if (!intel_dp_needs_vsc_sdp(crtc_state, conn_state))
+		return;
+
+	intel_dp_setup_vsc_sdp(intel_dp, crtc_state, conn_state);
+}
+
+void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp,
+				  const struct intel_crtc_state *crtc_state,
+				  const struct drm_connector_state *conn_state)
+{
+	if (!conn_state->hdr_output_metadata)
+		return;
+
+	intel_dp_setup_hdr_metadata_infoframe_sdp(intel_dp,
+						  crtc_state,
+						  conn_state);
 }
 
 static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp)
@@ -4601,7 +4903,7 @@ static u8 intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
 	intel_dp->compliance.test_data.hdisplay = be16_to_cpu(h_width);
 	intel_dp->compliance.test_data.vdisplay = be16_to_cpu(v_height);
 	/* Set test active flag here so userspace doesn't interrupt things */
-	intel_dp->compliance.test_active = 1;
+	intel_dp->compliance.test_active = true;
 
 	return DP_TEST_ACK;
 }
@@ -4645,7 +4947,7 @@ static u8 intel_dp_autotest_edid(struct intel_dp *intel_dp)
 	}
 
 	/* Set test active flag here so userspace doesn't interrupt things */
-	intel_dp->compliance.test_active = 1;
+	intel_dp->compliance.test_active = true;
 
 	return test_result;
 }
@@ -4794,7 +5096,7 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
 			  struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_connector *connector = intel_dp->attached_connector;
 	struct drm_connector_state *conn_state;
 	struct intel_crtc_state *crtc_state;
@@ -4825,7 +5127,7 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
 
 	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
 
-	if (!crtc_state->base.active)
+	if (!crtc_state->hw.active)
 		return 0;
 
 	if (conn_state->commit &&
@@ -5222,22 +5524,23 @@ static bool bxt_digital_port_connected(struct intel_encoder *encoder)
 	return I915_READ(GEN8_DE_PORT_ISR) & bit;
 }
 
-static bool icl_combo_port_connected(struct drm_i915_private *dev_priv,
-				     struct intel_digital_port *intel_dig_port)
+static bool intel_combo_phy_connected(struct drm_i915_private *dev_priv,
+				      enum phy phy)
 {
-	enum port port = intel_dig_port->base.port;
+	if (HAS_PCH_MCC(dev_priv) && phy == PHY_C)
+		return I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1);
 
-	return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(port);
+	return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(phy);
 }
 
-static bool icl_digital_port_connected(struct intel_encoder *encoder)
+static bool icp_digital_port_connected(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
 
 	if (intel_phy_is_combo(dev_priv, phy))
-		return icl_combo_port_connected(dev_priv, dig_port);
+		return intel_combo_phy_connected(dev_priv, phy);
 	else if (intel_phy_is_tc(dev_priv, phy))
 		return intel_tc_port_connected(dig_port);
 	else
@@ -5268,9 +5571,9 @@ static bool __intel_digital_port_connected(struct intel_encoder *encoder)
 			return g4x_digital_port_connected(encoder);
 	}
 
-	if (INTEL_GEN(dev_priv) >= 11)
-		return icl_digital_port_connected(encoder);
-	else if (IS_GEN(dev_priv, 10) || IS_GEN9_BC(dev_priv))
+	if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
+		return icp_digital_port_connected(encoder);
+	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT)
 		return spt_digital_port_connected(encoder);
 	else if (IS_GEN9_LP(dev_priv))
 		return bxt_digital_port_connected(encoder);
@@ -5348,7 +5651,7 @@ intel_dp_detect(struct drm_connector *connector,
 		bool force)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct intel_encoder *encoder = &dig_port->base;
 	enum drm_connector_status status;
@@ -5452,7 +5755,7 @@ out:
 static void
 intel_dp_force(struct drm_connector *connector)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct intel_encoder *intel_encoder = &dig_port->base;
 	struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev);
@@ -5487,7 +5790,7 @@ static int intel_dp_get_modes(struct drm_connector *connector)
 	}
 
 	/* if eDP has no EDID, fall back to fixed mode */
-	if (intel_dp_is_edp(intel_attached_dp(connector)) &&
+	if (intel_dp_is_edp(intel_attached_dp(to_intel_connector(connector))) &&
 	    intel_connector->panel.fixed_mode) {
 		struct drm_display_mode *mode;
 
@@ -5505,8 +5808,7 @@ static int intel_dp_get_modes(struct drm_connector *connector)
 static int
 intel_dp_connector_register(struct drm_connector *connector)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
-	struct drm_device *dev = connector->dev;
+	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
 	int ret;
 
 	ret = intel_connector_register(connector);
@@ -5521,15 +5823,14 @@ intel_dp_connector_register(struct drm_connector *connector)
 	intel_dp->aux.dev = connector->kdev;
 	ret = drm_dp_aux_register(&intel_dp->aux);
 	if (!ret)
-		drm_dp_cec_register_connector(&intel_dp->aux,
-					      connector->name, dev->dev);
+		drm_dp_cec_register_connector(&intel_dp->aux, connector);
 	return ret;
 }
 
 static void
 intel_dp_connector_unregister(struct drm_connector *connector)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
 
 	drm_dp_cec_unregister_connector(&intel_dp->aux);
 	drm_dp_aux_unregister(&intel_dp->aux);
@@ -5538,7 +5839,7 @@ intel_dp_connector_unregister(struct drm_connector *connector)
 
 void intel_dp_encoder_flush_work(struct drm_encoder *encoder)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(to_intel_encoder(encoder));
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 
 	intel_dp_mst_encoder_cleanup(intel_dig_port);
@@ -5567,12 +5868,12 @@ static void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 	intel_dp_encoder_flush_work(encoder);
 
 	drm_encoder_cleanup(encoder);
-	kfree(enc_to_dig_port(encoder));
+	kfree(enc_to_dig_port(to_intel_encoder(encoder)));
 }
 
 void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder);
 	intel_wakeref_t wakeref;
 
 	if (!intel_dp_is_edp(intel_dp))
@@ -5603,7 +5904,7 @@ static
 int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port,
 				u8 *an)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_dig_port->base.base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(&intel_dig_port->base.base));
 	static const struct drm_dp_aux_msg msg = {
 		.request = DP_AUX_NATIVE_WRITE,
 		.address = DP_AUX_HDCP_AKSV,
@@ -6213,7 +6514,7 @@ static enum pipe vlv_active_pipe(struct intel_dp *intel_dp)
 void intel_dp_encoder_reset(struct drm_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(encoder));
 	struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp);
 	intel_wakeref_t wakeref;
 
@@ -6280,13 +6581,15 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 		 * would end up in an endless cycle of
 		 * "vdd off -> long hpd -> vdd on -> detect -> vdd off -> ..."
 		 */
-		DRM_DEBUG_KMS("ignoring long hpd on eDP port %c\n",
-			      port_name(intel_dig_port->base.port));
+		DRM_DEBUG_KMS("ignoring long hpd on eDP [ENCODER:%d:%s]\n",
+			      intel_dig_port->base.base.base.id,
+			      intel_dig_port->base.base.name);
 		return IRQ_HANDLED;
 	}
 
-	DRM_DEBUG_KMS("got hpd irq on port %c - %s\n",
-		      port_name(intel_dig_port->base.port),
+	DRM_DEBUG_KMS("got hpd irq on [ENCODER:%d:%s] - %s\n",
+		      intel_dig_port->base.base.base.id,
+		      intel_dig_port->base.base.name,
 		      long_hpd ? "long" : "short");
 
 	if (long_hpd) {
@@ -6353,6 +6656,13 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect
 	else if (INTEL_GEN(dev_priv) >= 5)
 		drm_connector_attach_max_bpc_property(connector, 6, 12);
 
+	intel_attach_colorspace_property(connector);
+
+	if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 11)
+		drm_object_attach_property(&connector->base,
+					   connector->dev->mode_config.hdr_output_metadata_property,
+					   0);
+
 	if (intel_dp_is_edp(intel_dp)) {
 		u32 allowed_scalers;
 
@@ -6383,7 +6693,7 @@ intel_pps_readout_hw_state(struct intel_dp *intel_dp, struct edp_power_seq *seq)
 
 	intel_pps_get_registers(intel_dp, &regs);
 
-	pp_ctl = ironlake_get_pp_control(intel_dp);
+	pp_ctl = ilk_get_pp_control(intel_dp);
 
 	/* Ensure PPS is unlocked */
 	if (!HAS_DDI(dev_priv))
@@ -6553,7 +6863,7 @@ intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp,
 	 * soon as the new power sequencer gets initialized.
 	 */
 	if (force_disable_vdd) {
-		u32 pp = ironlake_get_pp_control(intel_dp);
+		u32 pp = ilk_get_pp_control(intel_dp);
 
 		WARN(pp & PANEL_POWER_ON, "Panel power already on\n");
 
@@ -6650,7 +6960,7 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv,
 				    int refresh_rate)
 {
 	struct intel_dp *intel_dp = dev_priv->drrs.dp;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum drrs_refresh_rate_type index = DRRS_HIGH_RR;
 
 	if (refresh_rate <= 0) {
@@ -6683,7 +6993,7 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv,
 		return;
 	}
 
-	if (!crtc_state->base.active) {
+	if (!crtc_state->hw.active) {
 		DRM_DEBUG_KMS("eDP encoder disabled. CRTC not Active\n");
 		return;
 	}
@@ -7150,8 +7460,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 		  intel_dp_modeset_retry_work_fn);
 
 	if (WARN(intel_dig_port->max_lanes < 1,
-		 "Not enough lanes (%d) for DP on port %c\n",
-		 intel_dig_port->max_lanes, port_name(port)))
+		 "Not enough lanes (%d) for DP on [ENCODER:%d:%s]\n",
+		 intel_dig_port->max_lanes, intel_encoder->base.base.id,
+		 intel_encoder->base.name))
 		return false;
 
 	intel_dp_set_source_rates(intel_dp);
@@ -7192,9 +7503,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 		    port != PORT_B && port != PORT_C))
 		return false;
 
-	DRM_DEBUG_KMS("Adding %s connector on port %c\n",
-			type == DRM_MODE_CONNECTOR_eDP ? "eDP" : "DP",
-			port_name(port));
+	DRM_DEBUG_KMS("Adding %s connector on [ENCODER:%d:%s]\n",
+		      type == DRM_MODE_CONNECTOR_eDP ? "eDP" : "DP",
+		      intel_encoder->base.base.id, intel_encoder->base.name);
 
 	drm_connector_init(dev, connector, &intel_dp_connector_funcs, type);
 	drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs);
@@ -7218,11 +7529,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 		intel_connector->get_hw_state = intel_connector_get_hw_state;
 
 	/* init MST on ports that can support it */
-	if (HAS_DP_MST(dev_priv) && !intel_dp_is_edp(intel_dp) &&
-	    (port == PORT_B || port == PORT_C ||
-	     port == PORT_D || port == PORT_F))
-		intel_dp_mst_encoder_init(intel_dig_port,
-					  intel_connector->base.base.id);
+	intel_dp_mst_encoder_init(intel_dig_port,
+				  intel_connector->base.base.id);
 
 	if (!intel_edp_init_connector(intel_dp, intel_connector)) {
 		intel_dp_aux_fini(intel_dp);
@@ -7313,11 +7621,11 @@ bool intel_dp_init(struct drm_i915_private *dev_priv,
 	intel_encoder->power_domain = intel_port_to_power_domain(port);
 	if (IS_CHERRYVIEW(dev_priv)) {
 		if (port == PORT_D)
-			intel_encoder->crtc_mask = 1 << 2;
+			intel_encoder->pipe_mask = BIT(PIPE_C);
 		else
-			intel_encoder->crtc_mask = (1 << 0) | (1 << 1);
+			intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B);
 	} else {
-		intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
+		intel_encoder->pipe_mask = ~0;
 	}
 	intel_encoder->cloneable = 0;
 	intel_encoder->port = port;
@@ -7352,7 +7660,7 @@ void intel_dp_mst_suspend(struct drm_i915_private *dev_priv)
 		if (encoder->type != INTEL_OUTPUT_DDI)
 			continue;
 
-		intel_dp = enc_to_intel_dp(&encoder->base);
+		intel_dp = enc_to_intel_dp(encoder);
 
 		if (!intel_dp->can_mst)
 			continue;
@@ -7373,12 +7681,13 @@ void intel_dp_mst_resume(struct drm_i915_private *dev_priv)
 		if (encoder->type != INTEL_OUTPUT_DDI)
 			continue;
 
-		intel_dp = enc_to_intel_dp(&encoder->base);
+		intel_dp = enc_to_intel_dp(encoder);
 
 		if (!intel_dp->can_mst)
 			continue;
 
-		ret = drm_dp_mst_topology_mgr_resume(&intel_dp->mst_mgr);
+		ret = drm_dp_mst_topology_mgr_resume(&intel_dp->mst_mgr,
+						     true);
 		if (ret) {
 			intel_dp->is_mst = false;
 			drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr,
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 00981fb9414b..3da166054788 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -13,6 +13,7 @@
 #include "i915_reg.h"
 
 enum pipe;
+enum port;
 struct drm_connector_state;
 struct drm_encoder;
 struct drm_i915_private;
@@ -107,6 +108,14 @@ bool intel_dp_read_dpcd(struct intel_dp *intel_dp);
 bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp);
 int intel_dp_link_required(int pixel_clock, int bpp);
 int intel_dp_max_data_rate(int max_link_clock, int max_lanes);
+bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state,
+			    const struct drm_connector_state *conn_state);
+void intel_dp_vsc_enable(struct intel_dp *intel_dp,
+			 const struct intel_crtc_state *crtc_state,
+			 const struct drm_connector_state *conn_state);
+void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp,
+				  const struct intel_crtc_state *crtc_state,
+				  const struct drm_connector_state *conn_state);
 bool intel_digital_port_connected(struct intel_encoder *encoder);
 
 static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index 020422da2ae2..7c653f8c307f 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -57,7 +57,7 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
  */
 static u32 intel_dp_aux_get_backlight(struct intel_connector *connector)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 	u8 read_val[2] = { 0x0 };
 	u16 level = 0;
 
@@ -82,7 +82,7 @@ static void
 intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 level)
 {
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 	u8 vals[2] = { 0x0 };
 
 	vals[0] = level;
@@ -110,7 +110,7 @@ intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 lev
 static bool intel_dp_aux_set_pwm_freq(struct intel_connector *connector)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 	int freq, fxp, fxp_min, fxp_max, fxp_actual, f = 1;
 	u8 pn, pn_min, pn_max;
 
@@ -178,7 +178,7 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st
 					  const struct drm_connector_state *conn_state)
 {
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 	u8 dpcd_buf, new_dpcd_buf, edp_backlight_mode;
 
 	if (drm_dp_dpcd_readb(&intel_dp->aux,
@@ -222,13 +222,14 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st
 
 static void intel_dp_aux_disable_backlight(const struct drm_connector_state *old_conn_state)
 {
-	set_aux_backlight_enable(enc_to_intel_dp(old_conn_state->best_encoder), false);
+	set_aux_backlight_enable(enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder)),
+				 false);
 }
 
 static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
 					enum pipe pipe)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 	struct intel_panel *panel = &connector->panel;
 
 	if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT)
@@ -247,7 +248,7 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
 static bool
 intel_dp_aux_display_control_capable(struct intel_connector *connector)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 
 	/* Check the eDP Display control capabilities registers to determine if
 	 * the panel can support backlight control over the aux channel
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 600873c796d0..cba68c5a80fa 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -42,13 +42,13 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 					    struct drm_connector_state *conn_state,
 					    struct link_config_limits *limits)
 {
-	struct drm_atomic_state *state = crtc_state->base.state;
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct drm_atomic_state *state = crtc_state->uapi.state;
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_dp *intel_dp = &intel_mst->primary->dp;
 	struct intel_connector *connector =
 		to_intel_connector(conn_state->connector);
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	void *port = connector->port;
 	bool constant_n = drm_dp_has_quirk(&intel_dp->desc,
 					   DP_DPCD_QUIRK_CONSTANT_N);
@@ -61,10 +61,11 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 		crtc_state->pipe_bpp = bpp;
 
 		crtc_state->pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock,
-						       crtc_state->pipe_bpp);
+						       crtc_state->pipe_bpp,
+						       false);
 
 		slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr,
-						      port, crtc_state->pbn);
+						      port, crtc_state->pbn, 0);
 		if (slots == -EDEADLK)
 			return slots;
 		if (slots >= 0)
@@ -87,19 +88,65 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 	return 0;
 }
 
+/*
+ * Iterate over all connectors and return the smallest transcoder in the MST
+ * stream
+ */
+static enum transcoder
+intel_dp_mst_master_trans_compute(struct intel_atomic_state *state,
+				  struct intel_dp *mst_port)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_digital_connector_state *conn_state;
+	struct intel_connector *connector;
+	enum pipe ret = I915_MAX_PIPES;
+	int i;
+
+	if (INTEL_GEN(dev_priv) < 12)
+		return INVALID_TRANSCODER;
+
+	for_each_new_intel_connector_in_state(state, connector, conn_state, i) {
+		struct intel_crtc_state *crtc_state;
+		struct intel_crtc *crtc;
+
+		if (connector->mst_port != mst_port || !conn_state->base.crtc)
+			continue;
+
+		crtc = to_intel_crtc(conn_state->base.crtc);
+		crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
+		if (!crtc_state->uapi.active)
+			continue;
+
+		/*
+		 * Using crtc->pipe because crtc_state->cpu_transcoder is
+		 * computed, so others CRTCs could have non-computed
+		 * cpu_transcoder
+		 */
+		if (crtc->pipe < ret)
+			ret = crtc->pipe;
+	}
+
+	if (ret == I915_MAX_PIPES)
+		return INVALID_TRANSCODER;
+
+	/* Simple cast works because TGL don't have a eDP transcoder */
+	return (enum transcoder)ret;
+}
+
 static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 				       struct intel_crtc_state *pipe_config,
 				       struct drm_connector_state *conn_state)
 {
+	struct intel_atomic_state *state = to_intel_atomic_state(conn_state->state);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_dp *intel_dp = &intel_mst->primary->dp;
 	struct intel_connector *connector =
 		to_intel_connector(conn_state->connector);
 	struct intel_digital_connector_state *intel_conn_state =
 		to_intel_digital_connector_state(conn_state);
 	const struct drm_display_mode *adjusted_mode =
-		&pipe_config->base.adjusted_mode;
+		&pipe_config->hw.adjusted_mode;
 	void *port = connector->port;
 	struct link_config_limits limits;
 	int ret;
@@ -154,25 +201,91 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 
 	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
 
+	pipe_config->mst_master_transcoder = intel_dp_mst_master_trans_compute(state, intel_dp);
+
+	return 0;
+}
+
+/*
+ * If one of the connectors in a MST stream needs a modeset, mark all CRTCs
+ * that shares the same MST stream as mode changed,
+ * intel_modeset_pipe_config()+intel_crtc_check_fastset() will take care to do
+ * a fastset when possible.
+ */
+static int
+intel_dp_mst_atomic_master_trans_check(struct intel_connector *connector,
+				       struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct drm_connector_list_iter connector_list_iter;
+	struct intel_connector *connector_iter;
+
+	if (INTEL_GEN(dev_priv) < 12)
+		return  0;
+
+	if (!intel_connector_needs_modeset(state, &connector->base))
+		return 0;
+
+	drm_connector_list_iter_begin(&dev_priv->drm, &connector_list_iter);
+	for_each_intel_connector_iter(connector_iter, &connector_list_iter) {
+		struct intel_digital_connector_state *conn_iter_state;
+		struct intel_crtc_state *crtc_state;
+		struct intel_crtc *crtc;
+		int ret;
+
+		if (connector_iter->mst_port != connector->mst_port ||
+		    connector_iter == connector)
+			continue;
+
+		conn_iter_state = intel_atomic_get_digital_connector_state(state,
+									   connector_iter);
+		if (IS_ERR(conn_iter_state)) {
+			drm_connector_list_iter_end(&connector_list_iter);
+			return PTR_ERR(conn_iter_state);
+		}
+
+		if (!conn_iter_state->base.crtc)
+			continue;
+
+		crtc = to_intel_crtc(conn_iter_state->base.crtc);
+		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
+		if (IS_ERR(crtc_state)) {
+			drm_connector_list_iter_end(&connector_list_iter);
+			return PTR_ERR(crtc_state);
+		}
+
+		ret = drm_atomic_add_affected_planes(&state->base, &crtc->base);
+		if (ret) {
+			drm_connector_list_iter_end(&connector_list_iter);
+			return ret;
+		}
+		crtc_state->uapi.mode_changed = true;
+	}
+	drm_connector_list_iter_end(&connector_list_iter);
+
 	return 0;
 }
 
 static int
 intel_dp_mst_atomic_check(struct drm_connector *connector,
-			  struct drm_atomic_state *state)
+			  struct drm_atomic_state *_state)
 {
+	struct intel_atomic_state *state = to_intel_atomic_state(_state);
 	struct drm_connector_state *new_conn_state =
-		drm_atomic_get_new_connector_state(state, connector);
+		drm_atomic_get_new_connector_state(&state->base, connector);
 	struct drm_connector_state *old_conn_state =
-		drm_atomic_get_old_connector_state(state, connector);
+		drm_atomic_get_old_connector_state(&state->base, connector);
 	struct intel_connector *intel_connector =
 		to_intel_connector(connector);
 	struct drm_crtc *new_crtc = new_conn_state->crtc;
-	struct drm_crtc_state *crtc_state;
 	struct drm_dp_mst_topology_mgr *mgr;
 	int ret;
 
-	ret = intel_digital_connector_atomic_check(connector, state);
+	ret = intel_digital_connector_atomic_check(connector, &state->base);
+	if (ret)
+		return ret;
+
+	ret = intel_dp_mst_atomic_master_trans_check(intel_connector, state);
 	if (ret)
 		return ret;
 
@@ -183,16 +296,18 @@ intel_dp_mst_atomic_check(struct drm_connector *connector,
 	 * connector
 	 */
 	if (new_crtc) {
-		crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc);
+		struct intel_crtc *intel_crtc = to_intel_crtc(new_crtc);
+		struct intel_crtc_state *crtc_state =
+			intel_atomic_get_new_crtc_state(state, intel_crtc);
 
 		if (!crtc_state ||
-		    !drm_atomic_crtc_needs_modeset(crtc_state) ||
-		    crtc_state->enable)
+		    !drm_atomic_crtc_needs_modeset(&crtc_state->uapi) ||
+		    crtc_state->uapi.enable)
 			return 0;
 	}
 
-	mgr = &enc_to_mst(old_conn_state->best_encoder)->primary->dp.mst_mgr;
-	ret = drm_dp_atomic_release_vcpi_slots(state, mgr,
+	mgr = &enc_to_mst(to_intel_encoder(old_conn_state->best_encoder))->primary->dp.mst_mgr;
+	ret = drm_dp_atomic_release_vcpi_slots(&state->base, mgr,
 					       intel_connector->port);
 
 	return ret;
@@ -202,7 +317,7 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *old_crtc_state,
 				 const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct intel_connector *connector =
@@ -215,7 +330,7 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder,
 
 	ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr);
 	if (ret) {
-		DRM_ERROR("failed to update payload %d\n", ret);
+		DRM_DEBUG_KMS("failed to update payload %d\n", ret);
 	}
 	if (old_crtc_state->has_audio)
 		intel_audio_codec_disable(encoder,
@@ -226,36 +341,65 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder,
 				      const struct intel_crtc_state *old_crtc_state,
 				      const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct intel_connector *connector =
 		to_intel_connector(old_conn_state->connector);
+	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+	bool last_mst_stream;
+	u32 val;
 
-	intel_ddi_disable_pipe_clock(old_crtc_state);
+	intel_dp->active_mst_links--;
+	last_mst_stream = intel_dp->active_mst_links == 0;
+	WARN_ON(INTEL_GEN(dev_priv) >= 12 && last_mst_stream &&
+		!intel_dp_mst_is_master_trans(old_crtc_state));
+
+	intel_crtc_vblank_off(old_crtc_state);
+
+	intel_disable_pipe(old_crtc_state);
 
-	/* this can fail */
-	drm_dp_check_act_status(&intel_dp->mst_mgr);
-	/* and this can also fail */
 	drm_dp_update_payload_part2(&intel_dp->mst_mgr);
 
+	val = I915_READ(TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder));
+	val &= ~TRANS_DDI_DP_VC_PAYLOAD_ALLOC;
+	I915_WRITE(TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder), val);
+
+	if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status,
+				  DP_TP_STATUS_ACT_SENT, 1))
+		DRM_ERROR("Timed out waiting for ACT sent when disabling\n");
+	drm_dp_check_act_status(&intel_dp->mst_mgr);
+
 	drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port);
 
+	intel_ddi_disable_transcoder_func(old_crtc_state);
+
+	if (INTEL_GEN(dev_priv) >= 9)
+		skl_scaler_disable(old_crtc_state);
+	else
+		ilk_pfit_disable(old_crtc_state);
+
 	/*
 	 * Power down mst path before disabling the port, otherwise we end
 	 * up getting interrupts from the sink upon detecting link loss.
 	 */
 	drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port,
 				     false);
+	/*
+	 * From TGL spec: "If multi-stream slave transcoder: Configure
+	 * Transcoder Clock Select to direct no clock to the transcoder"
+	 *
+	 * From older GENs spec: "Configure Transcoder Clock Select to direct
+	 * no clock to the transcoder"
+	 */
+	if (INTEL_GEN(dev_priv) < 12 || !last_mst_stream)
+		intel_ddi_disable_pipe_clock(old_crtc_state);
 
-	intel_dp->active_mst_links--;
 
 	intel_mst->connector = NULL;
-	if (intel_dp->active_mst_links == 0) {
-		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+	if (last_mst_stream)
 		intel_dig_port->base.post_disable(&intel_dig_port->base,
 						  old_crtc_state, NULL);
-	}
 
 	DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
 }
@@ -264,7 +408,7 @@ static void intel_mst_pre_pll_enable_dp(struct intel_encoder *encoder,
 					const struct intel_crtc_state *pipe_config,
 					const struct drm_connector_state *conn_state)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 
@@ -273,48 +417,37 @@ static void intel_mst_pre_pll_enable_dp(struct intel_encoder *encoder,
 						    pipe_config, NULL);
 }
 
-static void intel_mst_post_pll_disable_dp(struct intel_encoder *encoder,
-					  const struct intel_crtc_state *old_crtc_state,
-					  const struct drm_connector_state *old_conn_state)
-{
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
-	struct intel_digital_port *intel_dig_port = intel_mst->primary;
-	struct intel_dp *intel_dp = &intel_dig_port->dp;
-
-	if (intel_dp->active_mst_links == 0)
-		intel_dig_port->base.post_pll_disable(&intel_dig_port->base,
-						      old_crtc_state,
-						      old_conn_state);
-}
-
 static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
 				    const struct intel_crtc_state *pipe_config,
 				    const struct drm_connector_state *conn_state)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum port port = intel_dig_port->base.port;
 	struct intel_connector *connector =
 		to_intel_connector(conn_state->connector);
 	int ret;
 	u32 temp;
+	bool first_mst_stream;
 
 	/* MST encoders are bound to a crtc, not to a connector,
 	 * force the mapping here for get_hw_state.
 	 */
 	connector->encoder = encoder;
 	intel_mst->connector = connector;
+	first_mst_stream = intel_dp->active_mst_links == 0;
+	WARN_ON(INTEL_GEN(dev_priv) >= 12 && first_mst_stream &&
+		!intel_dp_mst_is_master_trans(pipe_config));
 
 	DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
 
-	if (intel_dp->active_mst_links == 0)
+	if (first_mst_stream)
 		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
 
 	drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true);
 
-	if (intel_dp->active_mst_links == 0)
+	if (first_mst_stream)
 		intel_dig_port->base.pre_enable(&intel_dig_port->base,
 						pipe_config, NULL);
 
@@ -326,27 +459,36 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
 		DRM_ERROR("failed to allocate vcpi\n");
 
 	intel_dp->active_mst_links++;
-	temp = I915_READ(DP_TP_STATUS(port));
-	I915_WRITE(DP_TP_STATUS(port), temp);
+	temp = I915_READ(intel_dp->regs.dp_tp_status);
+	I915_WRITE(intel_dp->regs.dp_tp_status, temp);
 
 	ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr);
 
-	intel_ddi_enable_pipe_clock(pipe_config);
+	/*
+	 * Before Gen 12 this is not done as part of
+	 * intel_dig_port->base.pre_enable() and should be done here. For
+	 * Gen 12+ the step in which this should be done is different for the
+	 * first MST stream, so it's done on the DDI for the first stream and
+	 * here for the following ones.
+	 */
+	if (INTEL_GEN(dev_priv) < 12 || !first_mst_stream)
+		intel_ddi_enable_pipe_clock(pipe_config);
+
+	intel_ddi_set_dp_msa(pipe_config, conn_state);
 }
 
 static void intel_mst_enable_dp(struct intel_encoder *encoder,
 				const struct intel_crtc_state *pipe_config,
 				const struct drm_connector_state *conn_state)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum port port = intel_dig_port->base.port;
 
 	DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
 
-	if (intel_de_wait_for_set(dev_priv, DP_TP_STATUS(port),
+	if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status,
 				  DP_TP_STATUS_ACT_SENT, 1))
 		DRM_ERROR("Timed out waiting for ACT sent\n");
 
@@ -360,7 +502,7 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder,
 static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder,
 				      enum pipe *pipe)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	*pipe = intel_mst->pipe;
 	if (intel_mst->connector)
 		return true;
@@ -370,7 +512,7 @@ static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder,
 static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder,
 					struct intel_crtc_state *pipe_config)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 
 	intel_ddi_get_config(&intel_dig_port->base, pipe_config);
@@ -393,20 +535,7 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector)
 	return ret;
 }
 
-static enum drm_connector_status
-intel_dp_mst_detect(struct drm_connector *connector, bool force)
-{
-	struct intel_connector *intel_connector = to_intel_connector(connector);
-	struct intel_dp *intel_dp = intel_connector->mst_port;
-
-	if (drm_connector_is_unregistered(connector))
-		return connector_status_disconnected;
-	return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr,
-				      intel_connector->port);
-}
-
 static const struct drm_connector_funcs intel_dp_mst_connector_funcs = {
-	.detect = intel_dp_mst_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.atomic_get_property = intel_digital_connector_atomic_get_property,
 	.atomic_set_property = intel_digital_connector_atomic_set_property,
@@ -426,6 +555,7 @@ static enum drm_mode_status
 intel_dp_mst_mode_valid(struct drm_connector *connector,
 			struct drm_display_mode *mode)
 {
+	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_connector *intel_connector = to_intel_connector(connector);
 	struct intel_dp *intel_dp = intel_connector->mst_port;
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
@@ -453,7 +583,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
 	if (mode_rate > max_rate || mode->clock > max_dotclk)
 		return MODE_CLOCK_HIGH;
 
-	return MODE_OK;
+	return intel_mode_valid_max_plane_size(dev_priv, mode);
 }
 
 static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *connector,
@@ -466,16 +596,31 @@ static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *c
 	return &intel_dp->mst_encoders[crtc->pipe]->base.base;
 }
 
+static int
+intel_dp_mst_detect(struct drm_connector *connector,
+		    struct drm_modeset_acquire_ctx *ctx, bool force)
+{
+	struct intel_connector *intel_connector = to_intel_connector(connector);
+	struct intel_dp *intel_dp = intel_connector->mst_port;
+
+	if (drm_connector_is_unregistered(connector))
+		return connector_status_disconnected;
+
+	return drm_dp_mst_detect_port(connector, ctx, &intel_dp->mst_mgr,
+				      intel_connector->port);
+}
+
 static const struct drm_connector_helper_funcs intel_dp_mst_connector_helper_funcs = {
 	.get_modes = intel_dp_mst_get_modes,
 	.mode_valid = intel_dp_mst_mode_valid,
 	.atomic_best_encoder = intel_mst_atomic_best_encoder,
 	.atomic_check = intel_dp_mst_atomic_check,
+	.detect_ctx = intel_dp_mst_detect,
 };
 
 static void intel_dp_mst_encoder_destroy(struct drm_encoder *encoder)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(to_intel_encoder(encoder));
 
 	drm_encoder_cleanup(encoder);
 	kfree(intel_mst);
@@ -615,14 +760,21 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum
 	intel_encoder->type = INTEL_OUTPUT_DP_MST;
 	intel_encoder->power_domain = intel_dig_port->base.power_domain;
 	intel_encoder->port = intel_dig_port->base.port;
-	intel_encoder->crtc_mask = 0x7;
 	intel_encoder->cloneable = 0;
+	/*
+	 * This is wrong, but broken userspace uses the intersection
+	 * of possible_crtcs of all the encoders of a given connector
+	 * to figure out which crtcs can drive said connector. What
+	 * should be used instead is the union of possible_crtcs.
+	 * To keep such userspace functioning we must misconfigure
+	 * this to make sure the intersection is not empty :(
+	 */
+	intel_encoder->pipe_mask = ~0;
 
 	intel_encoder->compute_config = intel_dp_mst_compute_config;
 	intel_encoder->disable = intel_mst_disable_dp;
 	intel_encoder->post_disable = intel_mst_post_disable_dp;
 	intel_encoder->pre_pll_enable = intel_mst_pre_pll_enable_dp;
-	intel_encoder->post_pll_disable = intel_mst_post_pll_disable_dp;
 	intel_encoder->pre_enable = intel_mst_pre_enable_dp;
 	intel_encoder->enable = intel_mst_enable_dp;
 	intel_encoder->get_hw_state = intel_dp_mst_enc_get_hw_state;
@@ -653,21 +805,31 @@ intel_dp_mst_encoder_active_links(struct intel_digital_port *intel_dig_port)
 int
 intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_base_id)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
-	struct drm_device *dev = intel_dig_port->base.base.dev;
+	enum port port = intel_dig_port->base.port;
 	int ret;
 
-	intel_dp->can_mst = true;
+	if (!HAS_DP_MST(i915) || intel_dp_is_edp(intel_dp))
+		return 0;
+
+	if (INTEL_GEN(i915) < 12 && port == PORT_A)
+		return 0;
+
+	if (INTEL_GEN(i915) < 11 && port == PORT_E)
+		return 0;
+
 	intel_dp->mst_mgr.cbs = &mst_cbs;
 
 	/* create encoders */
 	intel_dp_create_fake_mst_encoders(intel_dig_port);
-	ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, dev,
+	ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, &i915->drm,
 					   &intel_dp->aux, 16, 3, conn_base_id);
-	if (ret) {
-		intel_dp->can_mst = false;
+	if (ret)
 		return ret;
-	}
+
+	intel_dp->can_mst = true;
+
 	return 0;
 }
 
@@ -682,3 +844,14 @@ intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port)
 	drm_dp_mst_topology_mgr_destroy(&intel_dp->mst_mgr);
 	/* encoders will get killed by normal cleanup */
 }
+
+bool intel_dp_mst_is_master_trans(const struct intel_crtc_state *crtc_state)
+{
+	return crtc_state->mst_master_transcoder == crtc_state->cpu_transcoder;
+}
+
+bool intel_dp_mst_is_slave_trans(const struct intel_crtc_state *crtc_state)
+{
+	return crtc_state->mst_master_transcoder != INVALID_TRANSCODER &&
+	       crtc_state->mst_master_transcoder != crtc_state->cpu_transcoder;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h
index f660ad80db04..854724f68f09 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.h
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h
@@ -6,10 +6,15 @@
 #ifndef __INTEL_DP_MST_H__
 #define __INTEL_DP_MST_H__
 
+#include <linux/types.h>
+
 struct intel_digital_port;
+struct intel_crtc_state;
 
 int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id);
 void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port);
 int intel_dp_mst_encoder_active_links(struct intel_digital_port *intel_dig_port);
+bool intel_dp_mst_is_master_trans(const struct intel_crtc_state *crtc_state);
+bool intel_dp_mst_is_slave_trans(const struct intel_crtc_state *crtc_state);
 
 #endif /* __INTEL_DP_MST_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
index 556d1b30f06a..6fb1f7a7364e 100644
--- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
@@ -642,7 +642,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 			      bool uniq_trans_scale)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
 	enum dpio_channel ch = vlv_dport_to_channel(dport);
 	enum pipe pipe = intel_crtc->pipe;
@@ -738,8 +738,8 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder,
 			      bool reset)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base));
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(encoder));
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum pipe pipe = crtc->pipe;
 	u32 val;
 
@@ -781,9 +781,9 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder,
 void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 			    const struct intel_crtc_state *crtc_state)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel ch = vlv_dport_to_channel(dport);
 	enum pipe pipe = crtc->pipe;
 	unsigned int lane_mask =
@@ -861,10 +861,10 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 				const struct intel_crtc_state *crtc_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel ch = vlv_dport_to_channel(dport);
 	enum pipe pipe = crtc->pipe;
 	int data, i, stagger;
@@ -940,7 +940,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 
 void chv_phy_release_cl2_override(struct intel_encoder *encoder)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 
 	if (dport->release_cl2_override) {
@@ -953,7 +953,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *old_crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum pipe pipe = to_intel_crtc(old_crtc_state->base.crtc)->pipe;
+	enum pipe pipe = to_intel_crtc(old_crtc_state->uapi.crtc)->pipe;
 	u32 val;
 
 	vlv_dpio_get(dev_priv);
@@ -989,7 +989,7 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	enum pipe pipe = intel_crtc->pipe;
 
@@ -1014,9 +1014,9 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder,
 void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
 			    const struct intel_crtc_state *crtc_state)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	enum pipe pipe = crtc->pipe;
 
@@ -1043,10 +1043,10 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
 void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 				const struct intel_crtc_state *crtc_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	enum pipe pipe = crtc->pipe;
 	u32 val;
@@ -1073,9 +1073,9 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 void vlv_phy_reset_lanes(struct intel_encoder *encoder,
 			 const struct intel_crtc_state *old_crtc_state)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	enum pipe pipe = crtc->pipe;
 
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index d5a298c3c83b..c75e34d87111 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -136,7 +136,7 @@ void assert_shared_dpll(struct drm_i915_private *dev_priv,
  */
 void intel_prepare_shared_dpll(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_shared_dpll *pll = crtc_state->shared_dpll;
 
@@ -163,7 +163,7 @@ void intel_prepare_shared_dpll(const struct intel_crtc_state *crtc_state)
  */
 void intel_enable_shared_dpll(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_shared_dpll *pll = crtc_state->shared_dpll;
 	unsigned int crtc_mask = drm_crtc_mask(&crtc->base);
@@ -208,7 +208,7 @@ out:
  */
 void intel_disable_shared_dpll(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_shared_dpll *pll = crtc_state->shared_dpll;
 	unsigned int crtc_mask = drm_crtc_mask(&crtc->base);
@@ -247,8 +247,7 @@ static struct intel_shared_dpll *
 intel_find_shared_dpll(struct intel_atomic_state *state,
 		       const struct intel_crtc *crtc,
 		       const struct intel_dpll_hw_state *pll_state,
-		       enum intel_dpll_id range_min,
-		       enum intel_dpll_id range_max)
+		       unsigned long dpll_mask)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_shared_dpll *pll, *unused_pll = NULL;
@@ -257,7 +256,9 @@ intel_find_shared_dpll(struct intel_atomic_state *state,
 
 	shared_dpll = intel_atomic_get_shared_dpll_state(&state->base);
 
-	for (i = range_min; i <= range_max; i++) {
+	WARN_ON(dpll_mask & ~(BIT(I915_NUM_PLLS) - 1));
+
+	for_each_set_bit(i, &dpll_mask, I915_NUM_PLLS) {
 		pll = &dev_priv->shared_dplls[i];
 
 		/* Only want to check enabled timings first */
@@ -464,8 +465,8 @@ static bool ibx_get_dpll(struct intel_atomic_state *state,
 	} else {
 		pll = intel_find_shared_dpll(state, crtc,
 					     &crtc_state->dpll_hw_state,
-					     DPLL_ID_PCH_PLL_A,
-					     DPLL_ID_PCH_PLL_B);
+					     BIT(DPLL_ID_PCH_PLL_B) |
+					     BIT(DPLL_ID_PCH_PLL_A));
 	}
 
 	if (!pll)
@@ -829,7 +830,8 @@ hsw_ddi_hdmi_get_dpll(struct intel_atomic_state *state,
 
 	pll = intel_find_shared_dpll(state, crtc,
 				     &crtc_state->dpll_hw_state,
-				     DPLL_ID_WRPLL1, DPLL_ID_WRPLL2);
+				     BIT(DPLL_ID_WRPLL2) |
+				     BIT(DPLL_ID_WRPLL1));
 
 	if (!pll)
 		return NULL;
@@ -840,7 +842,7 @@ hsw_ddi_hdmi_get_dpll(struct intel_atomic_state *state,
 static struct intel_shared_dpll *
 hsw_ddi_dp_get_dpll(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	struct intel_shared_dpll *pll;
 	enum intel_dpll_id pll_id;
 	int clock = crtc_state->port_clock;
@@ -892,7 +894,7 @@ static bool hsw_get_dpll(struct intel_atomic_state *state,
 
 		pll = intel_find_shared_dpll(state, crtc,
 					     &crtc_state->dpll_hw_state,
-					     DPLL_ID_SPLL, DPLL_ID_SPLL);
+					     BIT(DPLL_ID_SPLL));
 	} else {
 		return false;
 	}
@@ -1462,13 +1464,13 @@ static bool skl_get_dpll(struct intel_atomic_state *state,
 	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP))
 		pll = intel_find_shared_dpll(state, crtc,
 					     &crtc_state->dpll_hw_state,
-					     DPLL_ID_SKL_DPLL0,
-					     DPLL_ID_SKL_DPLL0);
+					     BIT(DPLL_ID_SKL_DPLL0));
 	else
 		pll = intel_find_shared_dpll(state, crtc,
 					     &crtc_state->dpll_hw_state,
-					     DPLL_ID_SKL_DPLL1,
-					     DPLL_ID_SKL_DPLL3);
+					     BIT(DPLL_ID_SKL_DPLL3) |
+					     BIT(DPLL_ID_SKL_DPLL2) |
+					     BIT(DPLL_ID_SKL_DPLL1));
 	if (!pll)
 		return false;
 
@@ -1749,7 +1751,7 @@ static bool
 bxt_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state,
 			  struct bxt_clk_div *clk_div)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct dpll best_clock;
 
 	/* Calculate HDMI div */
@@ -2272,7 +2274,7 @@ static bool
 cnl_ddi_calculate_wrpll(struct intel_crtc_state *crtc_state,
 			struct skl_wrpll_params *wrpll_params)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	u32 afe_clock = crtc_state->port_clock * 5;
 	u32 ref_clock;
 	u32 dco_min = 7998000;
@@ -2416,8 +2418,9 @@ static bool cnl_get_dpll(struct intel_atomic_state *state,
 
 	pll = intel_find_shared_dpll(state, crtc,
 				     &crtc_state->dpll_hw_state,
-				     DPLL_ID_SKL_DPLL0,
-				     DPLL_ID_SKL_DPLL2);
+				     BIT(DPLL_ID_SKL_DPLL2) |
+				     BIT(DPLL_ID_SKL_DPLL1) |
+				     BIT(DPLL_ID_SKL_DPLL0));
 	if (!pll) {
 		DRM_DEBUG_KMS("No PLL selected\n");
 		return false;
@@ -2535,10 +2538,22 @@ static const struct skl_wrpll_params icl_tbt_pll_19_2MHz_values = {
 	.pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0,
 };
 
+static const struct skl_wrpll_params tgl_tbt_pll_19_2MHz_values = {
+	.dco_integer = 0x54, .dco_fraction = 0x3000,
+	/* the following params are unused */
+	.pdiv = 0, .kdiv = 0, .qdiv_mode = 0, .qdiv_ratio = 0,
+};
+
+static const struct skl_wrpll_params tgl_tbt_pll_24MHz_values = {
+	.dco_integer = 0x43, .dco_fraction = 0x4000,
+	/* the following params are unused */
+	.pdiv = 0, .kdiv = 0, .qdiv_mode = 0, .qdiv_ratio = 0,
+};
+
 static bool icl_calc_dp_combo_pll(struct intel_crtc_state *crtc_state,
 				  struct skl_wrpll_params *pll_params)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	const struct icl_combo_pll_params *params =
 		dev_priv->cdclk.hw.ref == 24000 ?
 		icl_dp_combo_pll_24MHz_values :
@@ -2560,10 +2575,36 @@ static bool icl_calc_dp_combo_pll(struct intel_crtc_state *crtc_state,
 static bool icl_calc_tbt_pll(struct intel_crtc_state *crtc_state,
 			     struct skl_wrpll_params *pll_params)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+
+	if (INTEL_GEN(dev_priv) >= 12) {
+		switch (dev_priv->cdclk.hw.ref) {
+		default:
+			MISSING_CASE(dev_priv->cdclk.hw.ref);
+			/* fall-through */
+		case 19200:
+		case 38400:
+			*pll_params = tgl_tbt_pll_19_2MHz_values;
+			break;
+		case 24000:
+			*pll_params = tgl_tbt_pll_24MHz_values;
+			break;
+		}
+	} else {
+		switch (dev_priv->cdclk.hw.ref) {
+		default:
+			MISSING_CASE(dev_priv->cdclk.hw.ref);
+			/* fall-through */
+		case 19200:
+		case 38400:
+			*pll_params = icl_tbt_pll_19_2MHz_values;
+			break;
+		case 24000:
+			*pll_params = icl_tbt_pll_24MHz_values;
+			break;
+		}
+	}
 
-	*pll_params = dev_priv->cdclk.hw.ref == 24000 ?
-			icl_tbt_pll_24MHz_values : icl_tbt_pll_19_2MHz_values;
 	return true;
 }
 
@@ -2571,7 +2612,7 @@ static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state,
 				struct intel_encoder *encoder,
 				struct intel_dpll_hw_state *pll_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	u32 cfgcr0, cfgcr1;
 	struct skl_wrpll_params pll_params = { 0 };
 	bool ret;
@@ -2622,7 +2663,8 @@ enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port)
 
 static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc,
 				     u32 *target_dco_khz,
-				     struct intel_dpll_hw_state *state)
+				     struct intel_dpll_hw_state *state,
+				     bool is_dkl)
 {
 	u32 dco_min_freq, dco_max_freq;
 	int div1_vals[] = {7, 5, 3, 2};
@@ -2644,8 +2686,13 @@ static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc,
 				continue;
 
 			if (div2 >= 2) {
+				/*
+				 * Note: a_divratio not matching TGL BSpec
+				 * algorithm but matching hardcoded values and
+				 * working on HW for DP alt-mode at least
+				 */
 				a_divratio = is_dp ? 10 : 5;
-				tlinedrv = 2;
+				tlinedrv = is_dkl ? 1 : 2;
 			} else {
 				a_divratio = 5;
 				tlinedrv = 0;
@@ -2697,7 +2744,7 @@ static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc,
 static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state,
 				  struct intel_dpll_hw_state *pll_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	int refclk_khz = dev_priv->cdclk.hw.ref;
 	int clock = crtc_state->port_clock;
 	u32 dco_khz, m1div, m2div_int, m2div_rem, m2div_frac;
@@ -2708,11 +2755,12 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state,
 	u64 tmp;
 	bool use_ssc = false;
 	bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI);
+	bool is_dkl = INTEL_GEN(dev_priv) >= 12;
 
 	memset(pll_state, 0, sizeof(*pll_state));
 
 	if (!icl_mg_pll_find_divisors(clock, is_dp, use_ssc, &dco_khz,
-				      pll_state)) {
+				      pll_state, is_dkl)) {
 		DRM_DEBUG_KMS("Failed to find divisors for clock %d\n", clock);
 		return false;
 	}
@@ -2720,8 +2768,11 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state,
 	m1div = 2;
 	m2div_int = dco_khz / (refclk_khz * m1div);
 	if (m2div_int > 255) {
-		m1div = 4;
-		m2div_int = dco_khz / (refclk_khz * m1div);
+		if (!is_dkl) {
+			m1div = 4;
+			m2div_int = dco_khz / (refclk_khz * m1div);
+		}
+
 		if (m2div_int > 255) {
 			DRM_DEBUG_KMS("Failed to find mdiv for clock %d\n",
 				      clock);
@@ -2801,60 +2852,94 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state,
 	}
 	ssc_steplog = 4;
 
-	pll_state->mg_pll_div0 = (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) |
-				  MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) |
-				  MG_PLL_DIV0_FBDIV_INT(m2div_int);
-
-	pll_state->mg_pll_div1 = MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) |
-				 MG_PLL_DIV1_DITHER_DIV_2 |
-				 MG_PLL_DIV1_NDIVRATIO(1) |
-				 MG_PLL_DIV1_FBPREDIV(m1div);
-
-	pll_state->mg_pll_lf = MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) |
-			       MG_PLL_LF_AFCCNTSEL_512 |
-			       MG_PLL_LF_GAINCTRL(1) |
-			       MG_PLL_LF_INT_COEFF(int_coeff) |
-			       MG_PLL_LF_PROP_COEFF(prop_coeff);
-
-	pll_state->mg_pll_frac_lock = MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 |
-				      MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 |
-				      MG_PLL_FRAC_LOCK_LOCKTHRESH(10) |
-				      MG_PLL_FRAC_LOCK_DCODITHEREN |
-				      MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain);
-	if (use_ssc || m2div_rem > 0)
-		pll_state->mg_pll_frac_lock |= MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN;
-
-	pll_state->mg_pll_ssc = (use_ssc ? MG_PLL_SSC_EN : 0) |
-				MG_PLL_SSC_TYPE(2) |
-				MG_PLL_SSC_STEPLENGTH(ssc_steplen) |
-				MG_PLL_SSC_STEPNUM(ssc_steplog) |
-				MG_PLL_SSC_FLLEN |
-				MG_PLL_SSC_STEPSIZE(ssc_stepsize);
-
-	pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART |
-					    MG_PLL_TDC_COLDST_IREFINT_EN |
-					    MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) |
-					    MG_PLL_TDC_TDCOVCCORR_EN |
-					    MG_PLL_TDC_TDCSEL(3);
-
-	pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) |
-				 MG_PLL_BIAS_INIT_DCOAMP(0x3F) |
-				 MG_PLL_BIAS_BIAS_BONUS(10) |
-				 MG_PLL_BIAS_BIASCAL_EN |
-				 MG_PLL_BIAS_CTRIM(12) |
-				 MG_PLL_BIAS_VREF_RDAC(4) |
-				 MG_PLL_BIAS_IREFTRIM(iref_trim);
-
-	if (refclk_khz == 38400) {
-		pll_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART;
-		pll_state->mg_pll_bias_mask = 0;
+	/* write pll_state calculations */
+	if (is_dkl) {
+		pll_state->mg_pll_div0 = DKL_PLL_DIV0_INTEG_COEFF(int_coeff) |
+					 DKL_PLL_DIV0_PROP_COEFF(prop_coeff) |
+					 DKL_PLL_DIV0_FBPREDIV(m1div) |
+					 DKL_PLL_DIV0_FBDIV_INT(m2div_int);
+
+		pll_state->mg_pll_div1 = DKL_PLL_DIV1_IREF_TRIM(iref_trim) |
+					 DKL_PLL_DIV1_TDC_TARGET_CNT(tdc_targetcnt);
+
+		pll_state->mg_pll_ssc = DKL_PLL_SSC_IREF_NDIV_RATIO(iref_ndiv) |
+					DKL_PLL_SSC_STEP_LEN(ssc_steplen) |
+					DKL_PLL_SSC_STEP_NUM(ssc_steplog) |
+					(use_ssc ? DKL_PLL_SSC_EN : 0);
+
+		pll_state->mg_pll_bias = (m2div_frac ? DKL_PLL_BIAS_FRAC_EN_H : 0) |
+					  DKL_PLL_BIAS_FBDIV_FRAC(m2div_frac);
+
+		pll_state->mg_pll_tdc_coldst_bias =
+				DKL_PLL_TDC_SSC_STEP_SIZE(ssc_stepsize) |
+				DKL_PLL_TDC_FEED_FWD_GAIN(feedfwgain);
+
 	} else {
-		pll_state->mg_pll_tdc_coldst_bias_mask = -1U;
-		pll_state->mg_pll_bias_mask = -1U;
-	}
+		pll_state->mg_pll_div0 =
+			(m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) |
+			MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) |
+			MG_PLL_DIV0_FBDIV_INT(m2div_int);
+
+		pll_state->mg_pll_div1 =
+			MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) |
+			MG_PLL_DIV1_DITHER_DIV_2 |
+			MG_PLL_DIV1_NDIVRATIO(1) |
+			MG_PLL_DIV1_FBPREDIV(m1div);
+
+		pll_state->mg_pll_lf =
+			MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) |
+			MG_PLL_LF_AFCCNTSEL_512 |
+			MG_PLL_LF_GAINCTRL(1) |
+			MG_PLL_LF_INT_COEFF(int_coeff) |
+			MG_PLL_LF_PROP_COEFF(prop_coeff);
+
+		pll_state->mg_pll_frac_lock =
+			MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 |
+			MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 |
+			MG_PLL_FRAC_LOCK_LOCKTHRESH(10) |
+			MG_PLL_FRAC_LOCK_DCODITHEREN |
+			MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain);
+		if (use_ssc || m2div_rem > 0)
+			pll_state->mg_pll_frac_lock |=
+				MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN;
+
+		pll_state->mg_pll_ssc =
+			(use_ssc ? MG_PLL_SSC_EN : 0) |
+			MG_PLL_SSC_TYPE(2) |
+			MG_PLL_SSC_STEPLENGTH(ssc_steplen) |
+			MG_PLL_SSC_STEPNUM(ssc_steplog) |
+			MG_PLL_SSC_FLLEN |
+			MG_PLL_SSC_STEPSIZE(ssc_stepsize);
+
+		pll_state->mg_pll_tdc_coldst_bias =
+			MG_PLL_TDC_COLDST_COLDSTART |
+			MG_PLL_TDC_COLDST_IREFINT_EN |
+			MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) |
+			MG_PLL_TDC_TDCOVCCORR_EN |
+			MG_PLL_TDC_TDCSEL(3);
+
+		pll_state->mg_pll_bias =
+			MG_PLL_BIAS_BIAS_GB_SEL(3) |
+			MG_PLL_BIAS_INIT_DCOAMP(0x3F) |
+			MG_PLL_BIAS_BIAS_BONUS(10) |
+			MG_PLL_BIAS_BIASCAL_EN |
+			MG_PLL_BIAS_CTRIM(12) |
+			MG_PLL_BIAS_VREF_RDAC(4) |
+			MG_PLL_BIAS_IREFTRIM(iref_trim);
+
+		if (refclk_khz == 38400) {
+			pll_state->mg_pll_tdc_coldst_bias_mask =
+				MG_PLL_TDC_COLDST_COLDSTART;
+			pll_state->mg_pll_bias_mask = 0;
+		} else {
+			pll_state->mg_pll_tdc_coldst_bias_mask = -1U;
+			pll_state->mg_pll_bias_mask = -1U;
+		}
 
-	pll_state->mg_pll_tdc_coldst_bias &= pll_state->mg_pll_tdc_coldst_bias_mask;
-	pll_state->mg_pll_bias &= pll_state->mg_pll_bias_mask;
+		pll_state->mg_pll_tdc_coldst_bias &=
+			pll_state->mg_pll_tdc_coldst_bias_mask;
+		pll_state->mg_pll_bias &= pll_state->mg_pll_bias_mask;
+	}
 
 	return true;
 }
@@ -2887,8 +2972,8 @@ static void icl_update_active_dpll(struct intel_atomic_state *state,
 	enum icl_port_dpll_id port_dpll_id = ICL_PORT_DPLL_DEFAULT;
 
 	primary_port = encoder->type == INTEL_OUTPUT_DP_MST ?
-		enc_to_mst(&encoder->base)->primary :
-		enc_to_dig_port(&encoder->base);
+		enc_to_mst(encoder)->primary :
+		enc_to_dig_port(encoder);
 
 	if (primary_port &&
 	    (primary_port->tc_mode == TC_PORT_DP_ALT ||
@@ -2908,7 +2993,7 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state,
 		&crtc_state->icl_port_dplls[ICL_PORT_DPLL_DEFAULT];
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum port port = encoder->port;
-	bool has_dpll4 = false;
+	unsigned long dpll_mask;
 
 	if (!icl_calc_dpll_state(crtc_state, encoder, &port_dpll->hw_state)) {
 		DRM_DEBUG_KMS("Could not calculate combo PHY PLL state.\n");
@@ -2917,16 +3002,19 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state,
 	}
 
 	if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A)
-		has_dpll4 = true;
+		dpll_mask =
+			BIT(DPLL_ID_EHL_DPLL4) |
+			BIT(DPLL_ID_ICL_DPLL1) |
+			BIT(DPLL_ID_ICL_DPLL0);
+	else
+		dpll_mask = BIT(DPLL_ID_ICL_DPLL1) | BIT(DPLL_ID_ICL_DPLL0);
 
 	port_dpll->pll = intel_find_shared_dpll(state, crtc,
 						&port_dpll->hw_state,
-						DPLL_ID_ICL_DPLL0,
-						has_dpll4 ? DPLL_ID_EHL_DPLL4
-							  : DPLL_ID_ICL_DPLL1);
+						dpll_mask);
 	if (!port_dpll->pll) {
-		DRM_DEBUG_KMS("No combo PHY PLL found for port %c\n",
-			      port_name(encoder->port));
+		DRM_DEBUG_KMS("No combo PHY PLL found for [ENCODER:%d:%s]\n",
+			      encoder->base.base.id, encoder->base.name);
 		return false;
 	}
 
@@ -2956,8 +3044,7 @@ static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state,
 
 	port_dpll->pll = intel_find_shared_dpll(state, crtc,
 						&port_dpll->hw_state,
-						DPLL_ID_ICL_TBTPLL,
-						DPLL_ID_ICL_TBTPLL);
+						BIT(DPLL_ID_ICL_TBTPLL));
 	if (!port_dpll->pll) {
 		DRM_DEBUG_KMS("No TBT-ALT PLL found\n");
 		return false;
@@ -2976,8 +3063,7 @@ static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state,
 							 encoder->port));
 	port_dpll->pll = intel_find_shared_dpll(state, crtc,
 						&port_dpll->hw_state,
-						dpll_id,
-						dpll_id);
+						BIT(dpll_id));
 	if (!port_dpll->pll) {
 		DRM_DEBUG_KMS("No MG PHY PLL found\n");
 		goto err_unreference_tbt_pll;
@@ -3101,6 +3187,78 @@ out:
 	return ret;
 }
 
+static bool dkl_pll_get_hw_state(struct drm_i915_private *dev_priv,
+				 struct intel_shared_dpll *pll,
+				 struct intel_dpll_hw_state *hw_state)
+{
+	const enum intel_dpll_id id = pll->info->id;
+	enum tc_port tc_port = icl_pll_id_to_tc_port(id);
+	intel_wakeref_t wakeref;
+	bool ret = false;
+	u32 val;
+
+	wakeref = intel_display_power_get_if_enabled(dev_priv,
+						     POWER_DOMAIN_DISPLAY_CORE);
+	if (!wakeref)
+		return false;
+
+	val = I915_READ(MG_PLL_ENABLE(tc_port));
+	if (!(val & PLL_ENABLE))
+		goto out;
+
+	/*
+	 * All registers read here have the same HIP_INDEX_REG even though
+	 * they are on different building blocks
+	 */
+	I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2));
+
+	hw_state->mg_refclkin_ctl = I915_READ(DKL_REFCLKIN_CTL(tc_port));
+	hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK;
+
+	hw_state->mg_clktop2_hsclkctl =
+		I915_READ(DKL_CLKTOP2_HSCLKCTL(tc_port));
+	hw_state->mg_clktop2_hsclkctl &=
+		MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK |
+		MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK |
+		MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK |
+		MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK;
+
+	hw_state->mg_clktop2_coreclkctl1 =
+		I915_READ(DKL_CLKTOP2_CORECLKCTL1(tc_port));
+	hw_state->mg_clktop2_coreclkctl1 &=
+		MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK;
+
+	hw_state->mg_pll_div0 = I915_READ(DKL_PLL_DIV0(tc_port));
+	hw_state->mg_pll_div0 &= (DKL_PLL_DIV0_INTEG_COEFF_MASK |
+				  DKL_PLL_DIV0_PROP_COEFF_MASK |
+				  DKL_PLL_DIV0_FBPREDIV_MASK |
+				  DKL_PLL_DIV0_FBDIV_INT_MASK);
+
+	hw_state->mg_pll_div1 = I915_READ(DKL_PLL_DIV1(tc_port));
+	hw_state->mg_pll_div1 &= (DKL_PLL_DIV1_IREF_TRIM_MASK |
+				  DKL_PLL_DIV1_TDC_TARGET_CNT_MASK);
+
+	hw_state->mg_pll_ssc = I915_READ(DKL_PLL_SSC(tc_port));
+	hw_state->mg_pll_ssc &= (DKL_PLL_SSC_IREF_NDIV_RATIO_MASK |
+				 DKL_PLL_SSC_STEP_LEN_MASK |
+				 DKL_PLL_SSC_STEP_NUM_MASK |
+				 DKL_PLL_SSC_EN);
+
+	hw_state->mg_pll_bias = I915_READ(DKL_PLL_BIAS(tc_port));
+	hw_state->mg_pll_bias &= (DKL_PLL_BIAS_FRAC_EN_H |
+				  DKL_PLL_BIAS_FBDIV_FRAC_MASK);
+
+	hw_state->mg_pll_tdc_coldst_bias =
+		I915_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port));
+	hw_state->mg_pll_tdc_coldst_bias &= (DKL_PLL_TDC_SSC_STEP_SIZE_MASK |
+					     DKL_PLL_TDC_FEED_FWD_GAIN_MASK);
+
+	ret = true;
+out:
+	intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref);
+	return ret;
+}
+
 static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 				 struct intel_shared_dpll *pll,
 				 struct intel_dpll_hw_state *hw_state,
@@ -3235,6 +3393,75 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv,
 	POSTING_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));
 }
 
+static void dkl_pll_write(struct drm_i915_private *dev_priv,
+			  struct intel_shared_dpll *pll)
+{
+	struct intel_dpll_hw_state *hw_state = &pll->state.hw_state;
+	enum tc_port tc_port = icl_pll_id_to_tc_port(pll->info->id);
+	u32 val;
+
+	/*
+	 * All registers programmed here have the same HIP_INDEX_REG even
+	 * though on different building block
+	 */
+	I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2));
+
+	/* All the registers are RMW */
+	val = I915_READ(DKL_REFCLKIN_CTL(tc_port));
+	val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK;
+	val |= hw_state->mg_refclkin_ctl;
+	I915_WRITE(DKL_REFCLKIN_CTL(tc_port), val);
+
+	val = I915_READ(DKL_CLKTOP2_CORECLKCTL1(tc_port));
+	val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK;
+	val |= hw_state->mg_clktop2_coreclkctl1;
+	I915_WRITE(DKL_CLKTOP2_CORECLKCTL1(tc_port), val);
+
+	val = I915_READ(DKL_CLKTOP2_HSCLKCTL(tc_port));
+	val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK |
+		 MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK |
+		 MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK |
+		 MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK);
+	val |= hw_state->mg_clktop2_hsclkctl;
+	I915_WRITE(DKL_CLKTOP2_HSCLKCTL(tc_port), val);
+
+	val = I915_READ(DKL_PLL_DIV0(tc_port));
+	val &= ~(DKL_PLL_DIV0_INTEG_COEFF_MASK |
+		 DKL_PLL_DIV0_PROP_COEFF_MASK |
+		 DKL_PLL_DIV0_FBPREDIV_MASK |
+		 DKL_PLL_DIV0_FBDIV_INT_MASK);
+	val |= hw_state->mg_pll_div0;
+	I915_WRITE(DKL_PLL_DIV0(tc_port), val);
+
+	val = I915_READ(DKL_PLL_DIV1(tc_port));
+	val &= ~(DKL_PLL_DIV1_IREF_TRIM_MASK |
+		 DKL_PLL_DIV1_TDC_TARGET_CNT_MASK);
+	val |= hw_state->mg_pll_div1;
+	I915_WRITE(DKL_PLL_DIV1(tc_port), val);
+
+	val = I915_READ(DKL_PLL_SSC(tc_port));
+	val &= ~(DKL_PLL_SSC_IREF_NDIV_RATIO_MASK |
+		 DKL_PLL_SSC_STEP_LEN_MASK |
+		 DKL_PLL_SSC_STEP_NUM_MASK |
+		 DKL_PLL_SSC_EN);
+	val |= hw_state->mg_pll_ssc;
+	I915_WRITE(DKL_PLL_SSC(tc_port), val);
+
+	val = I915_READ(DKL_PLL_BIAS(tc_port));
+	val &= ~(DKL_PLL_BIAS_FRAC_EN_H |
+		 DKL_PLL_BIAS_FBDIV_FRAC_MASK);
+	val |= hw_state->mg_pll_bias;
+	I915_WRITE(DKL_PLL_BIAS(tc_port), val);
+
+	val = I915_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port));
+	val &= ~(DKL_PLL_TDC_SSC_STEP_SIZE_MASK |
+		 DKL_PLL_TDC_FEED_FWD_GAIN_MASK);
+	val |= hw_state->mg_pll_tdc_coldst_bias;
+	I915_WRITE(DKL_PLL_TDC_COLDST_BIAS(tc_port), val);
+
+	POSTING_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port));
+}
+
 static void icl_pll_power_enable(struct drm_i915_private *dev_priv,
 				 struct intel_shared_dpll *pll,
 				 i915_reg_t enable_reg)
@@ -3327,7 +3554,10 @@ static void mg_pll_enable(struct drm_i915_private *dev_priv,
 
 	icl_pll_power_enable(dev_priv, pll, enable_reg);
 
-	icl_mg_pll_write(dev_priv, pll);
+	if (INTEL_GEN(dev_priv) >= 12)
+		dkl_pll_write(dev_priv, pll);
+	else
+		icl_mg_pll_write(dev_priv, pll);
 
 	/*
 	 * DVFS pre sequence would be here, but in our driver the cdclk code
@@ -3482,11 +3712,22 @@ static const struct intel_dpll_mgr ehl_pll_mgr = {
 	.dump_hw_state = icl_dump_hw_state,
 };
 
+static const struct intel_shared_dpll_funcs dkl_pll_funcs = {
+	.enable = mg_pll_enable,
+	.disable = mg_pll_disable,
+	.get_hw_state = dkl_pll_get_hw_state,
+};
+
 static const struct dpll_info tgl_plls[] = {
 	{ "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0,  0 },
 	{ "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1,  0 },
 	{ "TBT PLL",  &tbt_pll_funcs, DPLL_ID_ICL_TBTPLL, 0 },
-	/* TODO: Add typeC plls */
+	{ "TC PLL 1", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 },
+	{ "TC PLL 2", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 },
+	{ "TC PLL 3", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 },
+	{ "TC PLL 4", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 },
+	{ "TC PLL 5", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL5, 0 },
+	{ "TC PLL 6", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL6, 0 },
 	{ },
 };
 
@@ -3494,6 +3735,7 @@ static const struct intel_dpll_mgr tgl_pll_mgr = {
 	.dpll_info = tgl_plls,
 	.get_dplls = icl_get_dplls,
 	.put_dplls = icl_put_dplls,
+	.update_active_dpll = icl_update_active_dpll,
 	.dump_hw_state = icl_dump_hw_state,
 };
 
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
index 104cf6d42333..2a104c64291d 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
@@ -337,6 +337,11 @@ struct intel_shared_dpll {
 	 * @info: platform specific info
 	 */
 	const struct dpll_info *info;
+
+	/**
+	 * @wakeref: In some platforms a device-level runtime pm reference may
+	 * need to be grabbed to disable DC states while this DPLL is enabled
+	 */
 	intel_wakeref_t wakeref;
 };
 
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
new file mode 100644
index 000000000000..ada006a690df
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ */
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+
+#define DSB_BUF_SIZE    (2 * PAGE_SIZE)
+
+/**
+ * DOC: DSB
+ *
+ * A DSB (Display State Buffer) is a queue of MMIO instructions in the memory
+ * which can be offloaded to DSB HW in Display Controller. DSB HW is a DMA
+ * engine that can be programmed to download the DSB from memory.
+ * It allows driver to batch submit display HW programming. This helps to
+ * reduce loading time and CPU activity, thereby making the context switch
+ * faster. DSB Support added from Gen12 Intel graphics based platform.
+ *
+ * DSB's can access only the pipe, plane, and transcoder Data Island Packet
+ * registers.
+ *
+ * DSB HW can support only register writes (both indexed and direct MMIO
+ * writes). There are no registers reads possible with DSB HW engine.
+ */
+
+/* DSB opcodes. */
+#define DSB_OPCODE_SHIFT		24
+#define DSB_OPCODE_MMIO_WRITE		0x1
+#define DSB_OPCODE_INDEXED_WRITE	0x9
+#define DSB_BYTE_EN			0xF
+#define DSB_BYTE_EN_SHIFT		20
+#define DSB_REG_VALUE_MASK		0xfffff
+
+static inline bool is_dsb_busy(struct intel_dsb *dsb)
+{
+	struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
+
+	return DSB_STATUS & I915_READ(DSB_CTRL(pipe, dsb->id));
+}
+
+static inline bool intel_dsb_enable_engine(struct intel_dsb *dsb)
+{
+	struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
+	u32 dsb_ctrl;
+
+	dsb_ctrl = I915_READ(DSB_CTRL(pipe, dsb->id));
+	if (DSB_STATUS & dsb_ctrl) {
+		DRM_DEBUG_KMS("DSB engine is busy.\n");
+		return false;
+	}
+
+	dsb_ctrl |= DSB_ENABLE;
+	I915_WRITE(DSB_CTRL(pipe, dsb->id), dsb_ctrl);
+
+	POSTING_READ(DSB_CTRL(pipe, dsb->id));
+	return true;
+}
+
+static inline bool intel_dsb_disable_engine(struct intel_dsb *dsb)
+{
+	struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
+	u32 dsb_ctrl;
+
+	dsb_ctrl = I915_READ(DSB_CTRL(pipe, dsb->id));
+	if (DSB_STATUS & dsb_ctrl) {
+		DRM_DEBUG_KMS("DSB engine is busy.\n");
+		return false;
+	}
+
+	dsb_ctrl &= ~DSB_ENABLE;
+	I915_WRITE(DSB_CTRL(pipe, dsb->id), dsb_ctrl);
+
+	POSTING_READ(DSB_CTRL(pipe, dsb->id));
+	return true;
+}
+
+/**
+ * intel_dsb_get() - Allocate DSB context and return a DSB instance.
+ * @crtc: intel_crtc structure to get pipe info.
+ *
+ * This function provides handle of a DSB instance, for the further DSB
+ * operations.
+ *
+ * Returns: address of Intel_dsb instance requested for.
+ * Failure: Returns the same DSB instance, but without a command buffer.
+ */
+
+struct intel_dsb *
+intel_dsb_get(struct intel_crtc *crtc)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct intel_dsb *dsb = &crtc->dsb;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	u32 *buf;
+	intel_wakeref_t wakeref;
+
+	if (!HAS_DSB(i915))
+		return dsb;
+
+	if (dsb->refcount++ != 0)
+		return dsb;
+
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	obj = i915_gem_object_create_internal(i915, DSB_BUF_SIZE);
+	if (IS_ERR(obj)) {
+		DRM_ERROR("Gem object creation failed\n");
+		goto out;
+	}
+
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
+	if (IS_ERR(vma)) {
+		DRM_ERROR("Vma creation failed\n");
+		i915_gem_object_put(obj);
+		goto out;
+	}
+
+	buf = i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
+	if (IS_ERR(buf)) {
+		DRM_ERROR("Command buffer creation failed\n");
+		goto out;
+	}
+
+	dsb->id = DSB1;
+	dsb->vma = vma;
+	dsb->cmd_buf = buf;
+
+out:
+	/*
+	 * On error dsb->cmd_buf will continue to be NULL, making the writes
+	 * pass-through. Leave the dangling ref to be removed later by the
+	 * corresponding intel_dsb_put(): the important error message will
+	 * already be logged above.
+	 */
+
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+
+	return dsb;
+}
+
+/**
+ * intel_dsb_put() - To destroy DSB context.
+ * @dsb: intel_dsb structure.
+ *
+ * This function destroys the DSB context allocated by a dsb_get(), by
+ * unpinning and releasing the VMA object associated with it.
+ */
+
+void intel_dsb_put(struct intel_dsb *dsb)
+{
+	struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+
+	if (!HAS_DSB(i915))
+		return;
+
+	if (WARN_ON(dsb->refcount == 0))
+		return;
+
+	if (--dsb->refcount == 0) {
+		i915_vma_unpin_and_release(&dsb->vma, I915_VMA_RELEASE_MAP);
+		dsb->cmd_buf = NULL;
+		dsb->free_pos = 0;
+		dsb->ins_start_offset = 0;
+	}
+}
+
+/**
+ * intel_dsb_indexed_reg_write() -Write to the DSB context for auto
+ * increment register.
+ * @dsb: intel_dsb structure.
+ * @reg: register address.
+ * @val: value.
+ *
+ * This function is used for writing register-value pair in command
+ * buffer of DSB for auto-increment register. During command buffer overflow,
+ * a warning is thrown and rest all erroneous condition register programming
+ * is done through mmio write.
+ */
+
+void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg,
+				 u32 val)
+{
+	struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	u32 *buf = dsb->cmd_buf;
+	u32 reg_val;
+
+	if (!buf) {
+		I915_WRITE(reg, val);
+		return;
+	}
+
+	if (WARN_ON(dsb->free_pos >= DSB_BUF_SIZE)) {
+		DRM_DEBUG_KMS("DSB buffer overflow\n");
+		return;
+	}
+
+	/*
+	 * For example the buffer will look like below for 3 dwords for auto
+	 * increment register:
+	 * +--------------------------------------------------------+
+	 * | size = 3 | offset &| value1 | value2 | value3 | zero   |
+	 * |          | opcode  |        |        |        |        |
+	 * +--------------------------------------------------------+
+	 * +          +         +        +        +        +        +
+	 * 0          4         8        12       16       20       24
+	 * Byte
+	 *
+	 * As every instruction is 8 byte aligned the index of dsb instruction
+	 * will start always from even number while dealing with u32 array. If
+	 * we are writing odd no of dwords, Zeros will be added in the end for
+	 * padding.
+	 */
+	reg_val = buf[dsb->ins_start_offset + 1] & DSB_REG_VALUE_MASK;
+	if (reg_val != i915_mmio_reg_offset(reg)) {
+		/* Every instruction should be 8 byte aligned. */
+		dsb->free_pos = ALIGN(dsb->free_pos, 2);
+
+		dsb->ins_start_offset = dsb->free_pos;
+
+		/* Update the size. */
+		buf[dsb->free_pos++] = 1;
+
+		/* Update the opcode and reg. */
+		buf[dsb->free_pos++] = (DSB_OPCODE_INDEXED_WRITE  <<
+					DSB_OPCODE_SHIFT) |
+					i915_mmio_reg_offset(reg);
+
+		/* Update the value. */
+		buf[dsb->free_pos++] = val;
+	} else {
+		/* Update the new value. */
+		buf[dsb->free_pos++] = val;
+
+		/* Update the size. */
+		buf[dsb->ins_start_offset]++;
+	}
+
+	/* if number of data words is odd, then the last dword should be 0.*/
+	if (dsb->free_pos & 0x1)
+		buf[dsb->free_pos] = 0;
+}
+
+/**
+ * intel_dsb_reg_write() -Write to the DSB context for normal
+ * register.
+ * @dsb: intel_dsb structure.
+ * @reg: register address.
+ * @val: value.
+ *
+ * This function is used for writing register-value pair in command
+ * buffer of DSB. During command buffer overflow, a warning  is thrown
+ * and rest all erroneous condition register programming is done
+ * through mmio write.
+ */
+void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val)
+{
+	struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	u32 *buf = dsb->cmd_buf;
+
+	if (!buf) {
+		I915_WRITE(reg, val);
+		return;
+	}
+
+	if (WARN_ON(dsb->free_pos >= DSB_BUF_SIZE)) {
+		DRM_DEBUG_KMS("DSB buffer overflow\n");
+		return;
+	}
+
+	dsb->ins_start_offset = dsb->free_pos;
+	buf[dsb->free_pos++] = val;
+	buf[dsb->free_pos++] = (DSB_OPCODE_MMIO_WRITE  << DSB_OPCODE_SHIFT) |
+			       (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) |
+			       i915_mmio_reg_offset(reg);
+}
+
+/**
+ * intel_dsb_commit() - Trigger workload execution of DSB.
+ * @dsb: intel_dsb structure.
+ *
+ * This function is used to do actual write to hardware using DSB.
+ * On errors, fall back to MMIO. Also this function help to reset the context.
+ */
+void intel_dsb_commit(struct intel_dsb *dsb)
+{
+	struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb);
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	enum pipe pipe = crtc->pipe;
+	u32 tail;
+
+	if (!dsb->free_pos)
+		return;
+
+	if (!intel_dsb_enable_engine(dsb))
+		goto reset;
+
+	if (is_dsb_busy(dsb)) {
+		DRM_ERROR("HEAD_PTR write failed - dsb engine is busy.\n");
+		goto reset;
+	}
+	I915_WRITE(DSB_HEAD(pipe, dsb->id), i915_ggtt_offset(dsb->vma));
+
+	tail = ALIGN(dsb->free_pos * 4, CACHELINE_BYTES);
+	if (tail > dsb->free_pos * 4)
+		memset(&dsb->cmd_buf[dsb->free_pos], 0,
+		       (tail - dsb->free_pos * 4));
+
+	if (is_dsb_busy(dsb)) {
+		DRM_ERROR("TAIL_PTR write failed - dsb engine is busy.\n");
+		goto reset;
+	}
+	DRM_DEBUG_KMS("DSB execution started - head 0x%x, tail 0x%x\n",
+		      i915_ggtt_offset(dsb->vma), tail);
+	I915_WRITE(DSB_TAIL(pipe, dsb->id), i915_ggtt_offset(dsb->vma) + tail);
+	if (wait_for(!is_dsb_busy(dsb), 1)) {
+		DRM_ERROR("Timed out waiting for DSB workload completion.\n");
+		goto reset;
+	}
+
+reset:
+	dsb->free_pos = 0;
+	dsb->ins_start_offset = 0;
+	intel_dsb_disable_engine(dsb);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h
new file mode 100644
index 000000000000..395ef9ce558e
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dsb.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _INTEL_DSB_H
+#define _INTEL_DSB_H
+
+#include <linux/types.h>
+
+#include "i915_reg.h"
+
+struct intel_crtc;
+struct i915_vma;
+
+enum dsb_id {
+	INVALID_DSB = -1,
+	DSB1,
+	DSB2,
+	DSB3,
+	MAX_DSB_PER_PIPE
+};
+
+struct intel_dsb {
+	long refcount;
+	enum dsb_id id;
+	u32 *cmd_buf;
+	struct i915_vma *vma;
+
+	/*
+	 * free_pos will point the first free entry position
+	 * and help in calculating tail of command buffer.
+	 */
+	int free_pos;
+
+	/*
+	 * ins_start_offset will help to store start address of the dsb
+	 * instuction and help in identifying the batch of auto-increment
+	 * register.
+	 */
+	u32 ins_start_offset;
+};
+
+struct intel_dsb *
+intel_dsb_get(struct intel_crtc *crtc);
+void intel_dsb_put(struct intel_dsb *dsb);
+void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val);
+void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg,
+				 u32 val);
+void intel_dsb_commit(struct intel_dsb *dsb);
+
+#endif
diff --git a/drivers/gpu/drm/i915/display/intel_dsi.c b/drivers/gpu/drm/i915/display/intel_dsi.c
index 5fec02aceaed..a2a937109a5a 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi.c
@@ -55,6 +55,7 @@ int intel_dsi_get_modes(struct drm_connector *connector)
 enum drm_mode_status intel_dsi_mode_valid(struct drm_connector *connector,
 					  struct drm_display_mode *mode)
 {
+	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_connector *intel_connector = to_intel_connector(connector);
 	const struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
@@ -73,7 +74,7 @@ enum drm_mode_status intel_dsi_mode_valid(struct drm_connector *connector,
 			return MODE_CLOCK_HIGH;
 	}
 
-	return MODE_OK;
+	return intel_mode_valid_max_plane_size(dev_priv, mode);
 }
 
 struct intel_dsi_host *intel_dsi_host_init(struct intel_dsi *intel_dsi,
diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h
index b15be5814599..19f78a4022d3 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi.h
+++ b/drivers/gpu/drm/i915/display/intel_dsi.h
@@ -45,8 +45,9 @@ struct intel_dsi {
 	struct intel_dsi_host *dsi_hosts[I915_MAX_PORTS];
 	intel_wakeref_t io_wakeref[I915_MAX_PORTS];
 
-	/* GPIO Desc for CRC based Panel control */
+	/* GPIO Desc for panel and backlight control */
 	struct gpio_desc *gpio_panel;
+	struct gpio_desc *gpio_backlight;
 
 	struct intel_connector *attached_connector;
 
@@ -68,6 +69,9 @@ struct intel_dsi {
 	/* number of DSI lanes */
 	unsigned int lane_count;
 
+	/* i2c bus associated with the slave device */
+	int i2c_bus_num;
+
 	/*
 	 * video mode pixel format
 	 *
@@ -141,9 +145,9 @@ static inline struct intel_dsi_host *to_intel_dsi_host(struct mipi_dsi_host *h)
 #define for_each_dsi_phy(__phy, __phys_mask) \
 	for_each_phy_masked(__phy, __phys_mask)
 
-static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder)
+static inline struct intel_dsi *enc_to_intel_dsi(struct intel_encoder *encoder)
 {
-	return container_of(encoder, struct intel_dsi, base.base);
+	return container_of(&encoder->base, struct intel_dsi, base.base);
 }
 
 static inline bool is_vid_mode(struct intel_dsi *intel_dsi)
@@ -158,7 +162,7 @@ static inline bool is_cmd_mode(struct intel_dsi *intel_dsi)
 
 static inline u16 intel_dsi_encoder_ports(struct intel_encoder *encoder)
 {
-	return enc_to_intel_dsi(&encoder->base)->ports;
+	return enc_to_intel_dsi(encoder)->ports;
 }
 
 /* icl_dsi.c */
@@ -203,6 +207,8 @@ void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port);
 
 /* intel_dsi_vbt.c */
 bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id);
+void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on);
+void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi);
 void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
 				 enum mipi_seq seq_id);
 void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec);
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c
index bb3fd8b786a2..c87838843d0b 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c
@@ -46,7 +46,7 @@
 static u32 dcs_get_backlight(struct intel_connector *connector)
 {
 	struct intel_encoder *encoder = connector->encoder;
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct mipi_dsi_device *dsi_device;
 	u8 data = 0;
 	enum port port;
@@ -64,7 +64,7 @@ static u32 dcs_get_backlight(struct intel_connector *connector)
 
 static void dcs_set_backlight(const struct drm_connector_state *conn_state, u32 level)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder));
 	struct mipi_dsi_device *dsi_device;
 	u8 data = level;
 	enum port port;
@@ -79,7 +79,7 @@ static void dcs_set_backlight(const struct drm_connector_state *conn_state, u32
 
 static void dcs_disable_backlight(const struct drm_connector_state *conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder));
 	struct mipi_dsi_device *dsi_device;
 	enum port port;
 
@@ -113,7 +113,7 @@ static void dcs_disable_backlight(const struct drm_connector_state *conn_state)
 static void dcs_enable_backlight(const struct intel_crtc_state *crtc_state,
 				 const struct drm_connector_state *conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder));
 	struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel;
 	struct mipi_dsi_device *dsi_device;
 	enum port port;
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f90946c912ee..89fb0d90b694 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -25,7 +25,10 @@
  */
 
 #include <linux/gpio/consumer.h>
+#include <linux/gpio/machine.h>
 #include <linux/mfd/intel_soc_pmic.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/pinctrl/machine.h>
 #include <linux/slab.h>
 
 #include <asm/intel-mid.h>
@@ -83,6 +86,12 @@ static struct gpio_map vlv_gpio_table[] = {
 	{ VLV_GPIO_NC_11_PANEL1_BKLTCTL },
 };
 
+struct i2c_adapter_lookup {
+	u16 slave_addr;
+	struct intel_dsi *intel_dsi;
+	acpi_handle dev_handle;
+};
+
 #define CHV_GPIO_IDX_START_N		0
 #define CHV_GPIO_IDX_START_E		73
 #define CHV_GPIO_IDX_START_SW		100
@@ -375,11 +384,98 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 	return data;
 }
 
+static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
+{
+	struct i2c_adapter_lookup *lookup = data;
+	struct intel_dsi *intel_dsi = lookup->intel_dsi;
+	struct acpi_resource_i2c_serialbus *sb;
+	struct i2c_adapter *adapter;
+	acpi_handle adapter_handle;
+	acpi_status status;
+
+	if (intel_dsi->i2c_bus_num >= 0 ||
+	    !i2c_acpi_get_i2c_resource(ares, &sb))
+		return 1;
+
+	if (lookup->slave_addr != sb->slave_address)
+		return 1;
+
+	status = acpi_get_handle(lookup->dev_handle,
+				 sb->resource_source.string_ptr,
+				 &adapter_handle);
+	if (ACPI_FAILURE(status))
+		return 1;
+
+	adapter = i2c_acpi_find_adapter_by_handle(adapter_handle);
+	if (adapter)
+		intel_dsi->i2c_bus_num = adapter->nr;
+
+	return 1;
+}
+
 static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
 {
-	DRM_DEBUG_KMS("Skipping I2C element execution\n");
+	struct drm_device *drm_dev = intel_dsi->base.base.dev;
+	struct device *dev = &drm_dev->pdev->dev;
+	struct i2c_adapter *adapter;
+	struct acpi_device *acpi_dev;
+	struct list_head resource_list;
+	struct i2c_adapter_lookup lookup;
+	struct i2c_msg msg;
+	int ret;
+	u8 vbt_i2c_bus_num = *(data + 2);
+	u16 slave_addr = *(u16 *)(data + 3);
+	u8 reg_offset = *(data + 5);
+	u8 payload_size = *(data + 6);
+	u8 *payload_data;
+
+	if (intel_dsi->i2c_bus_num < 0) {
+		intel_dsi->i2c_bus_num = vbt_i2c_bus_num;
+
+		acpi_dev = ACPI_COMPANION(dev);
+		if (acpi_dev) {
+			memset(&lookup, 0, sizeof(lookup));
+			lookup.slave_addr = slave_addr;
+			lookup.intel_dsi = intel_dsi;
+			lookup.dev_handle = acpi_device_handle(acpi_dev);
+
+			INIT_LIST_HEAD(&resource_list);
+			acpi_dev_get_resources(acpi_dev, &resource_list,
+					       i2c_adapter_lookup,
+					       &lookup);
+			acpi_dev_free_resource_list(&resource_list);
+		}
+	}
 
-	return data + *(data + 6) + 7;
+	adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
+	if (!adapter) {
+		DRM_DEV_ERROR(dev, "Cannot find a valid i2c bus for xfer\n");
+		goto err_bus;
+	}
+
+	payload_data = kzalloc(payload_size + 1, GFP_KERNEL);
+	if (!payload_data)
+		goto err_alloc;
+
+	payload_data[0] = reg_offset;
+	memcpy(&payload_data[1], (data + 7), payload_size);
+
+	msg.addr = slave_addr;
+	msg.flags = 0;
+	msg.len = payload_size + 1;
+	msg.buf = payload_data;
+
+	ret = i2c_transfer(adapter, &msg, 1);
+	if (ret < 0)
+		DRM_DEV_ERROR(dev,
+			      "Failed to xfer payload of size (%u) to reg (%u)\n",
+			      payload_size, reg_offset);
+
+	kfree(payload_data);
+err_alloc:
+	i2c_put_adapter(adapter);
+err_bus:
+	return data + payload_size + 7;
 }
 
 static const u8 *mipi_exec_spi(struct intel_dsi *intel_dsi, const u8 *data)
@@ -453,8 +549,8 @@ static const char *sequence_name(enum mipi_seq seq_id)
 		return "(unknown)";
 }
 
-void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
-				 enum mipi_seq seq_id)
+static void intel_dsi_vbt_exec(struct intel_dsi *intel_dsi,
+			       enum mipi_seq seq_id)
 {
 	struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev);
 	const u8 *data;
@@ -519,6 +615,22 @@ void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
 	}
 }
 
+void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
+				 enum mipi_seq seq_id)
+{
+	if (seq_id == MIPI_SEQ_POWER_ON && intel_dsi->gpio_panel)
+		gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1);
+	if (seq_id == MIPI_SEQ_BACKLIGHT_ON && intel_dsi->gpio_backlight)
+		gpiod_set_value_cansleep(intel_dsi->gpio_backlight, 1);
+
+	intel_dsi_vbt_exec(intel_dsi, seq_id);
+
+	if (seq_id == MIPI_SEQ_POWER_OFF && intel_dsi->gpio_panel)
+		gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0);
+	if (seq_id == MIPI_SEQ_BACKLIGHT_OFF && intel_dsi->gpio_backlight)
+		gpiod_set_value_cansleep(intel_dsi->gpio_backlight, 0);
+}
+
 void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec)
 {
 	struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev);
@@ -664,6 +776,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
 	intel_dsi->panel_off_delay = pps->panel_off_delay / 10;
 	intel_dsi->panel_pwr_cycle_delay = pps->panel_power_cycle_delay / 10;
 
+	intel_dsi->i2c_bus_num = -1;
+
 	/* a regular driver would get the device in probe */
 	for_each_dsi_port(port, intel_dsi->ports) {
 		mipi_dsi_attach(intel_dsi->dsi_hosts[port]->device);
@@ -671,3 +785,110 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
 
 	return true;
 }
+
+/*
+ * On some BYT/CHT devs some sequences are incomplete and we need to manually
+ * control some GPIOs. We need to add a GPIO lookup table before we get these.
+ * If the GOP did not initialize the panel (HDMI inserted) we may need to also
+ * change the pinmux for the SoC's PWM0 pin from GPIO to PWM.
+ */
+static struct gpiod_lookup_table pmic_panel_gpio_table = {
+	/* Intel GFX is consumer */
+	.dev_id = "0000:00:02.0",
+	.table = {
+		/* Panel EN/DISABLE */
+		GPIO_LOOKUP("gpio_crystalcove", 94, "panel", GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
+
+static struct gpiod_lookup_table soc_panel_gpio_table = {
+	.dev_id = "0000:00:02.0",
+	.table = {
+		GPIO_LOOKUP("INT33FC:01", 10, "backlight", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("INT33FC:01", 11, "panel", GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
+
+static const struct pinctrl_map soc_pwm_pinctrl_map[] = {
+	PIN_MAP_MUX_GROUP("0000:00:02.0", "soc_pwm0", "INT33FC:00",
+			  "pwm0_grp", "pwm"),
+};
+
+void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
+{
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
+	enum gpiod_flags flags = panel_is_on ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
+	bool want_backlight_gpio = false;
+	bool want_panel_gpio = false;
+	struct pinctrl *pinctrl;
+	int ret;
+
+	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
+	    mipi_config->pwm_blc == PPS_BLC_PMIC) {
+		gpiod_add_lookup_table(&pmic_panel_gpio_table);
+		want_panel_gpio = true;
+	}
+
+	if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) {
+		gpiod_add_lookup_table(&soc_panel_gpio_table);
+		want_panel_gpio = true;
+		want_backlight_gpio = true;
+
+		/* Ensure PWM0 pin is muxed as PWM instead of GPIO */
+		ret = pinctrl_register_mappings(soc_pwm_pinctrl_map,
+					     ARRAY_SIZE(soc_pwm_pinctrl_map));
+		if (ret)
+			DRM_ERROR("Failed to register pwm0 pinmux mapping\n");
+
+		pinctrl = devm_pinctrl_get_select(dev->dev, "soc_pwm0");
+		if (IS_ERR(pinctrl))
+			DRM_ERROR("Failed to set pinmux to PWM\n");
+	}
+
+	if (want_panel_gpio) {
+		intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", flags);
+		if (IS_ERR(intel_dsi->gpio_panel)) {
+			DRM_ERROR("Failed to own gpio for panel control\n");
+			intel_dsi->gpio_panel = NULL;
+		}
+	}
+
+	if (want_backlight_gpio) {
+		intel_dsi->gpio_backlight =
+			gpiod_get(dev->dev, "backlight", flags);
+		if (IS_ERR(intel_dsi->gpio_backlight)) {
+			DRM_ERROR("Failed to own gpio for backlight control\n");
+			intel_dsi->gpio_backlight = NULL;
+		}
+	}
+}
+
+void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi)
+{
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
+
+	if (intel_dsi->gpio_panel) {
+		gpiod_put(intel_dsi->gpio_panel);
+		intel_dsi->gpio_panel = NULL;
+	}
+
+	if (intel_dsi->gpio_backlight) {
+		gpiod_put(intel_dsi->gpio_backlight);
+		intel_dsi->gpio_backlight = NULL;
+	}
+
+	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
+	    mipi_config->pwm_blc == PPS_BLC_PMIC)
+		gpiod_remove_lookup_table(&pmic_panel_gpio_table);
+
+	if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) {
+		pinctrl_unregister_mappings(soc_pwm_pinctrl_map);
+		gpiod_remove_lookup_table(&soc_panel_gpio_table);
+	}
+}
diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c
index 93baf366692e..86a337c9d85d 100644
--- a/drivers/gpu/drm/i915/display/intel_dvo.c
+++ b/drivers/gpu/drm/i915/display/intel_dvo.c
@@ -125,7 +125,7 @@ static struct intel_dvo *enc_to_dvo(struct intel_encoder *encoder)
 	return container_of(encoder, struct intel_dvo, base);
 }
 
-static struct intel_dvo *intel_attached_dvo(struct drm_connector *connector)
+static struct intel_dvo *intel_attached_dvo(struct intel_connector *connector)
 {
 	return enc_to_dvo(intel_attached_encoder(connector));
 }
@@ -134,7 +134,7 @@ static bool intel_dvo_connector_get_hw_state(struct intel_connector *connector)
 {
 	struct drm_device *dev = connector->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_dvo *intel_dvo = intel_attached_dvo(&connector->base);
+	struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
 	u32 tmp;
 
 	tmp = I915_READ(intel_dvo->dev.dvo_reg);
@@ -178,9 +178,9 @@ static void intel_dvo_get_config(struct intel_encoder *encoder,
 	else
 		flags |= DRM_MODE_FLAG_NVSYNC;
 
-	pipe_config->base.adjusted_mode.flags |= flags;
+	pipe_config->hw.adjusted_mode.flags |= flags;
 
-	pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock;
+	pipe_config->hw.adjusted_mode.crtc_clock = pipe_config->port_clock;
 }
 
 static void intel_disable_dvo(struct intel_encoder *encoder,
@@ -207,8 +207,8 @@ static void intel_enable_dvo(struct intel_encoder *encoder,
 	u32 temp = I915_READ(dvo_reg);
 
 	intel_dvo->dev.dev_ops->mode_set(&intel_dvo->dev,
-					 &pipe_config->base.mode,
-					 &pipe_config->base.adjusted_mode);
+					 &pipe_config->hw.mode,
+					 &pipe_config->hw.adjusted_mode);
 
 	I915_WRITE(dvo_reg, temp | DVO_ENABLE);
 	I915_READ(dvo_reg);
@@ -220,7 +220,7 @@ static enum drm_mode_status
 intel_dvo_mode_valid(struct drm_connector *connector,
 		     struct drm_display_mode *mode)
 {
-	struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
+	struct intel_dvo *intel_dvo = intel_attached_dvo(to_intel_connector(connector));
 	const struct drm_display_mode *fixed_mode =
 		to_intel_connector(connector)->panel.fixed_mode;
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
@@ -253,7 +253,7 @@ static int intel_dvo_compute_config(struct intel_encoder *encoder,
 	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
 	const struct drm_display_mode *fixed_mode =
 		intel_dvo->attached_connector->panel.fixed_mode;
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 
 	/*
 	 * If we have timings from the BIOS for the panel, put them in
@@ -277,10 +277,10 @@ static void intel_dvo_pre_enable(struct intel_encoder *encoder,
 				 const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
-	const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
+	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
-	int pipe = crtc->pipe;
+	enum pipe pipe = crtc->pipe;
 	u32 dvo_val;
 	i915_reg_t dvo_reg = intel_dvo->dev.dvo_reg;
 	i915_reg_t dvo_srcdim_reg = intel_dvo->dev.dvo_srcdim_reg;
@@ -311,7 +311,7 @@ static void intel_dvo_pre_enable(struct intel_encoder *encoder,
 static enum drm_connector_status
 intel_dvo_detect(struct drm_connector *connector, bool force)
 {
-	struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
+	struct intel_dvo *intel_dvo = intel_attached_dvo(to_intel_connector(connector));
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
 	return intel_dvo->dev.dev_ops->detect(&intel_dvo->dev);
@@ -505,7 +505,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv)
 		intel_encoder->type = INTEL_OUTPUT_DVO;
 		intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER;
 		intel_encoder->port = port;
-		intel_encoder->crtc_mask = (1 << 0) | (1 << 1);
+		intel_encoder->pipe_mask = ~0;
 
 		switch (dvo->type) {
 		case INTEL_DVO_CHIP_TMDS:
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 16ed44bfd734..a1048ece541e 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -50,11 +50,6 @@ static inline bool fbc_supported(struct drm_i915_private *dev_priv)
 	return HAS_FBC(dev_priv);
 }
 
-static inline bool no_fbc_on_multiple_pipes(struct drm_i915_private *dev_priv)
-{
-	return INTEL_GEN(dev_priv) <= 3;
-}
-
 /*
  * In some platforms where the CRTC's x:0/y:0 coordinates doesn't match the
  * frontbuffer's x:0/y:0 coordinates we lie to the hardware about the plane's
@@ -73,7 +68,7 @@ static unsigned int get_crtc_fence_y_offset(struct intel_fbc *fbc)
  * write to the PLANE_SIZE register. For BDW-, the hardware looks at the value
  * we wrote to PIPESRC.
  */
-static void intel_fbc_get_plane_source_size(struct intel_fbc_state_cache *cache,
+static void intel_fbc_get_plane_source_size(const struct intel_fbc_state_cache *cache,
 					    int *width, int *height)
 {
 	if (width)
@@ -83,7 +78,7 @@ static void intel_fbc_get_plane_source_size(struct intel_fbc_state_cache *cache,
 }
 
 static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv,
-					struct intel_fbc_state_cache *cache)
+					const struct intel_fbc_state_cache *cache)
 {
 	int lines;
 
@@ -143,8 +138,10 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv)
 		u32 fbc_ctl2;
 
 		/* Set it up... */
-		fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE;
+		fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM;
 		fbc_ctl2 |= FBC_CTL_PLANE(params->crtc.i9xx_plane);
+		if (params->fence_id >= 0)
+			fbc_ctl2 |= FBC_CTL_CPU_FENCE;
 		I915_WRITE(FBC_CONTROL2, fbc_ctl2);
 		I915_WRITE(FBC_FENCE_OFF, params->crtc.fence_y_offset);
 	}
@@ -156,7 +153,8 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv)
 	if (IS_I945GM(dev_priv))
 		fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */
 	fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
-	fbc_ctl |= params->vma->fence->id;
+	if (params->fence_id >= 0)
+		fbc_ctl |= params->fence_id;
 	I915_WRITE(FBC_CONTROL, fbc_ctl);
 }
 
@@ -176,8 +174,8 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv)
 	else
 		dpfc_ctl |= DPFC_CTL_LIMIT_1X;
 
-	if (params->flags & PLANE_HAS_FENCE) {
-		dpfc_ctl |= DPFC_CTL_FENCE_EN | params->vma->fence->id;
+	if (params->fence_id >= 0) {
+		dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fence_id;
 		I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset);
 	} else {
 		I915_WRITE(DPFC_FENCE_YOFF, 0);
@@ -234,14 +232,14 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
 		break;
 	}
 
-	if (params->flags & PLANE_HAS_FENCE) {
+	if (params->fence_id >= 0) {
 		dpfc_ctl |= DPFC_CTL_FENCE_EN;
 		if (IS_GEN(dev_priv, 5))
-			dpfc_ctl |= params->vma->fence->id;
+			dpfc_ctl |= params->fence_id;
 		if (IS_GEN(dev_priv, 6)) {
 			I915_WRITE(SNB_DPFC_CTL_SA,
 				   SNB_CPU_FENCE_ENABLE |
-				   params->vma->fence->id);
+				   params->fence_id);
 			I915_WRITE(DPFC_CPU_FENCE_OFFSET,
 				   params->crtc.fence_y_offset);
 		}
@@ -253,8 +251,6 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
 	}
 
 	I915_WRITE(ILK_DPFC_FENCE_YOFF, params->crtc.fence_y_offset);
-	I915_WRITE(ILK_FBC_RT_BASE,
-		   i915_ggtt_offset(params->vma) | ILK_FBC_RT_VALID);
 	/* enable it... */
 	I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
 
@@ -285,13 +281,12 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
 	int threshold = dev_priv->fbc.threshold;
 
 	/* Display WA #0529: skl, kbl, bxt. */
-	if (IS_GEN(dev_priv, 9) && !IS_GEMINILAKE(dev_priv)) {
+	if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) {
 		u32 val = I915_READ(CHICKEN_MISC_4);
 
 		val &= ~(FBC_STRIDE_OVERRIDE | FBC_STRIDE_MASK);
 
-		if (i915_gem_object_get_tiling(params->vma->obj) !=
-		    I915_TILING_X)
+		if (params->gen9_wa_cfb_stride)
 			val |= FBC_STRIDE_OVERRIDE | params->gen9_wa_cfb_stride;
 
 		I915_WRITE(CHICKEN_MISC_4, val);
@@ -317,11 +312,11 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
 		break;
 	}
 
-	if (params->flags & PLANE_HAS_FENCE) {
+	if (params->fence_id >= 0) {
 		dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN;
 		I915_WRITE(SNB_DPFC_CTL_SA,
 			   SNB_CPU_FENCE_ENABLE |
-			   params->vma->fence->id);
+			   params->fence_id);
 		I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset);
 	} else {
 		I915_WRITE(SNB_DPFC_CTL_SA,0);
@@ -343,8 +338,8 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
 			   HSW_FBCQ_DIS);
 	}
 
-	if (IS_GEN(dev_priv, 11))
-		/* Wa_1409120013:icl,ehl */
+	if (INTEL_GEN(dev_priv) >= 11)
+		/* Wa_1409120013:icl,ehl,tgl */
 		I915_WRITE(ILK_DPFC_CHICKEN, ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL);
 
 	I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
@@ -367,6 +362,7 @@ static void intel_fbc_hw_activate(struct drm_i915_private *dev_priv)
 	struct intel_fbc *fbc = &dev_priv->fbc;
 
 	fbc->active = true;
+	fbc->activated = true;
 
 	if (INTEL_GEN(dev_priv) >= 7)
 		gen7_fbc_activate(dev_priv);
@@ -419,29 +415,10 @@ static void intel_fbc_deactivate(struct drm_i915_private *dev_priv,
 	fbc->no_fbc_reason = reason;
 }
 
-static bool multiple_pipes_ok(struct intel_crtc *crtc,
-			      struct intel_plane_state *plane_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	struct intel_fbc *fbc = &dev_priv->fbc;
-	enum pipe pipe = crtc->pipe;
-
-	/* Don't even bother tracking anything we don't need. */
-	if (!no_fbc_on_multiple_pipes(dev_priv))
-		return true;
-
-	if (plane_state->base.visible)
-		fbc->visible_pipes_mask |= (1 << pipe);
-	else
-		fbc->visible_pipes_mask &= ~(1 << pipe);
-
-	return (fbc->visible_pipes_mask & ~(1 << pipe)) != 0;
-}
-
 static int find_compression_threshold(struct drm_i915_private *dev_priv,
 				      struct drm_mm_node *node,
-				      int size,
-				      int fb_cpp)
+				      unsigned int size,
+				      unsigned int fb_cpp)
 {
 	int compression_threshold = 1;
 	int ret;
@@ -487,18 +464,15 @@ again:
 	}
 }
 
-static int intel_fbc_alloc_cfb(struct intel_crtc *crtc)
+static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv,
+			       unsigned int size, unsigned int fb_cpp)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	struct drm_mm_node *uninitialized_var(compressed_llb);
-	int size, fb_cpp, ret;
+	int ret;
 
 	WARN_ON(drm_mm_node_allocated(&fbc->compressed_fb));
 
-	size = intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache);
-	fb_cpp = fbc->state_cache.fb.format->cpp[0];
-
 	ret = find_compression_threshold(dev_priv, &fbc->compressed_fb,
 					 size, fb_cpp);
 	if (!ret)
@@ -656,46 +630,55 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
 }
 
 static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
-					 struct intel_crtc_state *crtc_state,
-					 struct intel_plane_state *plane_state)
+					 const struct intel_crtc_state *crtc_state,
+					 const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	struct intel_fbc_state_cache *cache = &fbc->state_cache;
-	struct drm_framebuffer *fb = plane_state->base.fb;
+	struct drm_framebuffer *fb = plane_state->hw.fb;
 
-	cache->vma = NULL;
-	cache->flags = 0;
+	cache->plane.visible = plane_state->uapi.visible;
+	if (!cache->plane.visible)
+		return;
 
-	cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags;
+	cache->crtc.mode_flags = crtc_state->hw.adjusted_mode.flags;
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		cache->crtc.hsw_bdw_pixel_rate = crtc_state->pixel_rate;
 
-	cache->plane.rotation = plane_state->base.rotation;
+	cache->plane.rotation = plane_state->hw.rotation;
 	/*
 	 * Src coordinates are already rotated by 270 degrees for
 	 * the 90/270 degree plane rotation cases (to match the
 	 * GTT mapping), hence no need to account for rotation here.
 	 */
-	cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	cache->plane.src_h = drm_rect_height(&plane_state->base.src) >> 16;
-	cache->plane.visible = plane_state->base.visible;
+	cache->plane.src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	cache->plane.src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
 	cache->plane.adjusted_x = plane_state->color_plane[0].x;
 	cache->plane.adjusted_y = plane_state->color_plane[0].y;
-	cache->plane.y = plane_state->base.src.y1 >> 16;
+	cache->plane.y = plane_state->uapi.src.y1 >> 16;
 
-	cache->plane.pixel_blend_mode = plane_state->base.pixel_blend_mode;
-
-	if (!cache->plane.visible)
-		return;
+	cache->plane.pixel_blend_mode = plane_state->hw.pixel_blend_mode;
 
 	cache->fb.format = fb->format;
 	cache->fb.stride = fb->pitches[0];
 
-	cache->vma = plane_state->vma;
-	cache->flags = plane_state->flags;
-	if (WARN_ON(cache->flags & PLANE_HAS_FENCE && !cache->vma->fence))
-		cache->flags &= ~PLANE_HAS_FENCE;
+	WARN_ON(plane_state->flags & PLANE_HAS_FENCE &&
+		!plane_state->vma->fence);
+
+	if (plane_state->flags & PLANE_HAS_FENCE &&
+	    plane_state->vma->fence)
+		cache->fence_id = plane_state->vma->fence->id;
+	else
+		cache->fence_id = -1;
+}
+
+static bool intel_fbc_cfb_size_changed(struct drm_i915_private *dev_priv)
+{
+	struct intel_fbc *fbc = &dev_priv->fbc;
+
+	return intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) >
+		fbc->compressed_fb.size * fbc->threshold;
 }
 
 static bool intel_fbc_can_activate(struct intel_crtc *crtc)
@@ -704,6 +687,11 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	struct intel_fbc_state_cache *cache = &fbc->state_cache;
 
+	if (!cache->plane.visible) {
+		fbc->no_fbc_reason = "primary plane not visible";
+		return false;
+	}
+
 	/* We don't need to use a state cache here since this information is
 	 * global for all CRTC.
 	 */
@@ -712,11 +700,6 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 		return false;
 	}
 
-	if (!cache->vma) {
-		fbc->no_fbc_reason = "primary plane not visible";
-		return false;
-	}
-
 	if (cache->crtc.mode_flags & DRM_MODE_FLAG_INTERLACE) {
 		fbc->no_fbc_reason = "incompatible mode";
 		return false;
@@ -740,7 +723,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 	 * For now this will effecively disable FBC with 90/270 degree
 	 * rotation.
 	 */
-	if (!(cache->flags & PLANE_HAS_FENCE)) {
+	if (cache->fence_id < 0) {
 		fbc->no_fbc_reason = "framebuffer not tiled or fenced";
 		return false;
 	}
@@ -783,8 +766,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 	 * we didn't get any invalidate/deactivate calls, but this would require
 	 * a lot of tracking just for a specific case. If we conclude it's an
 	 * important case, we can implement it later. */
-	if (intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) >
-	    fbc->compressed_fb.size * fbc->threshold) {
+	if (intel_fbc_cfb_size_changed(dev_priv)) {
 		fbc->no_fbc_reason = "CFB requirements changed";
 		return false;
 	}
@@ -794,7 +776,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 	 * having a Y offset that isn't divisible by 4 causes FIFO underrun
 	 * and screen flicker.
 	 */
-	if (IS_GEN_RANGE(dev_priv, 9, 10) &&
+	if (INTEL_GEN(dev_priv) >= 9 &&
 	    (fbc->state_cache.plane.adjusted_y & 3)) {
 		fbc->no_fbc_reason = "plane Y offset is misaligned";
 		return false;
@@ -837,8 +819,7 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc,
 	 * zero. */
 	memset(params, 0, sizeof(*params));
 
-	params->vma = cache->vma;
-	params->flags = cache->flags;
+	params->fence_id = cache->fence_id;
 
 	params->crtc.pipe = crtc->pipe;
 	params->crtc.i9xx_plane = to_intel_plane(crtc->base.primary)->i9xx_plane;
@@ -849,39 +830,88 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc,
 
 	params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache);
 
-	if (IS_GEN(dev_priv, 9) && !IS_GEMINILAKE(dev_priv))
-		params->gen9_wa_cfb_stride = DIV_ROUND_UP(cache->plane.src_w,
-						32 * fbc->threshold) * 8;
+	params->gen9_wa_cfb_stride = cache->gen9_wa_cfb_stride;
+
+	params->plane_visible = cache->plane.visible;
+}
+
+static bool intel_fbc_can_flip_nuke(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_fbc *fbc = &dev_priv->fbc;
+	const struct intel_fbc_state_cache *cache = &fbc->state_cache;
+	const struct intel_fbc_reg_params *params = &fbc->params;
+
+	if (drm_atomic_crtc_needs_modeset(&crtc_state->uapi))
+		return false;
+
+	if (!params->plane_visible)
+		return false;
+
+	if (!intel_fbc_can_activate(crtc))
+		return false;
+
+	if (params->fb.format != cache->fb.format)
+		return false;
+
+	if (params->fb.stride != cache->fb.stride)
+		return false;
+
+	if (params->cfb_size != intel_fbc_calculate_cfb_size(dev_priv, cache))
+		return false;
+
+	if (params->gen9_wa_cfb_stride != cache->gen9_wa_cfb_stride)
+		return false;
+
+	return true;
 }
 
-void intel_fbc_pre_update(struct intel_crtc *crtc,
-			  struct intel_crtc_state *crtc_state,
-			  struct intel_plane_state *plane_state)
+bool intel_fbc_pre_update(struct intel_crtc *crtc,
+			  const struct intel_crtc_state *crtc_state,
+			  const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	const char *reason = "update pending";
+	bool need_vblank_wait = false;
 
 	if (!fbc_supported(dev_priv))
-		return;
+		return need_vblank_wait;
 
 	mutex_lock(&fbc->lock);
 
-	if (!multiple_pipes_ok(crtc, plane_state)) {
-		reason = "more than one pipe active";
-		goto deactivate;
-	}
-
-	if (!fbc->enabled || fbc->crtc != crtc)
+	if (fbc->crtc != crtc)
 		goto unlock;
 
 	intel_fbc_update_state_cache(crtc, crtc_state, plane_state);
 	fbc->flip_pending = true;
 
-deactivate:
-	intel_fbc_deactivate(dev_priv, reason);
+	if (!intel_fbc_can_flip_nuke(crtc_state)) {
+		intel_fbc_deactivate(dev_priv, reason);
+
+		/*
+		 * Display WA #1198: glk+
+		 * Need an extra vblank wait between FBC disable and most plane
+		 * updates. Bspec says this is only needed for plane disable, but
+		 * that is not true. Touching most plane registers will cause the
+		 * corruption to appear. Also SKL/derivatives do not seem to be
+		 * affected.
+		 *
+		 * TODO: could optimize this a bit by sampling the frame
+		 * counter when we disable FBC (if it was already done earlier)
+		 * and skipping the extra vblank wait before the plane update
+		 * if at least one frame has already passed.
+		 */
+		if (fbc->activated &&
+		    (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)))
+			need_vblank_wait = true;
+		fbc->activated = false;
+	}
 unlock:
 	mutex_unlock(&fbc->lock);
+
+	return need_vblank_wait;
 }
 
 /**
@@ -897,14 +927,13 @@ static void __intel_fbc_disable(struct drm_i915_private *dev_priv)
 	struct intel_crtc *crtc = fbc->crtc;
 
 	WARN_ON(!mutex_is_locked(&fbc->lock));
-	WARN_ON(!fbc->enabled);
+	WARN_ON(!fbc->crtc);
 	WARN_ON(fbc->active);
 
 	DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe));
 
 	__intel_fbc_cleanup_cfb(dev_priv);
 
-	fbc->enabled = false;
 	fbc->crtc = NULL;
 }
 
@@ -915,11 +944,10 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc)
 
 	WARN_ON(!mutex_is_locked(&fbc->lock));
 
-	if (!fbc->enabled || fbc->crtc != crtc)
+	if (fbc->crtc != crtc)
 		return;
 
 	fbc->flip_pending = false;
-	WARN_ON(fbc->active);
 
 	if (!i915_modparams.enable_fbc) {
 		intel_fbc_deactivate(dev_priv, "disabled at runtime per module param");
@@ -933,10 +961,9 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc)
 	if (!intel_fbc_can_activate(crtc))
 		return;
 
-	if (!fbc->busy_bits) {
-		intel_fbc_deactivate(dev_priv, "FBC enabled (active or scheduled)");
+	if (!fbc->busy_bits)
 		intel_fbc_hw_activate(dev_priv);
-	} else
+	else
 		intel_fbc_deactivate(dev_priv, "frontbuffer write");
 }
 
@@ -955,7 +982,7 @@ void intel_fbc_post_update(struct intel_crtc *crtc)
 
 static unsigned int intel_fbc_get_frontbuffer_bit(struct intel_fbc *fbc)
 {
-	if (fbc->enabled)
+	if (fbc->crtc)
 		return to_intel_plane(fbc->crtc->base.primary)->frontbuffer_bit;
 	else
 		return fbc->possible_framebuffer_bits;
@@ -977,7 +1004,7 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
 
 	fbc->busy_bits |= intel_fbc_get_frontbuffer_bit(fbc) & frontbuffer_bits;
 
-	if (fbc->enabled && fbc->busy_bits)
+	if (fbc->crtc && fbc->busy_bits)
 		intel_fbc_deactivate(dev_priv, "frontbuffer write");
 
 	mutex_unlock(&fbc->lock);
@@ -998,7 +1025,7 @@ void intel_fbc_flush(struct drm_i915_private *dev_priv,
 	if (origin == ORIGIN_GTT || origin == ORIGIN_FLIP)
 		goto out;
 
-	if (!fbc->busy_bits && fbc->enabled &&
+	if (!fbc->busy_bits && fbc->crtc &&
 	    (frontbuffer_bits & intel_fbc_get_frontbuffer_bit(fbc))) {
 		if (fbc->active)
 			intel_fbc_recompress(dev_priv);
@@ -1047,12 +1074,12 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv,
 	 * to pipe or plane A. */
 	for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
 		struct intel_crtc_state *crtc_state;
-		struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc);
+		struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc);
 
 		if (!plane->has_fbc)
 			continue;
 
-		if (!plane_state->base.visible)
+		if (!plane_state->uapi.visible)
 			continue;
 
 		crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
@@ -1081,42 +1108,53 @@ out:
  * intel_fbc_disable in the middle, as long as it is deactivated.
  */
 void intel_fbc_enable(struct intel_crtc *crtc,
-		      struct intel_crtc_state *crtc_state,
-		      struct intel_plane_state *plane_state)
+		      const struct intel_crtc_state *crtc_state,
+		      const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_fbc *fbc = &dev_priv->fbc;
+	struct intel_fbc_state_cache *cache = &fbc->state_cache;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 
 	if (!fbc_supported(dev_priv))
 		return;
 
 	mutex_lock(&fbc->lock);
 
-	if (fbc->enabled) {
-		WARN_ON(fbc->crtc == NULL);
-		if (fbc->crtc == crtc) {
-			WARN_ON(!crtc_state->enable_fbc);
-			WARN_ON(fbc->active);
-		}
-		goto out;
-	}
+	if (fbc->crtc) {
+		if (fbc->crtc != crtc ||
+		    !intel_fbc_cfb_size_changed(dev_priv))
+			goto out;
 
-	if (!crtc_state->enable_fbc)
-		goto out;
+		__intel_fbc_disable(dev_priv);
+	}
 
 	WARN_ON(fbc->active);
-	WARN_ON(fbc->crtc != NULL);
 
 	intel_fbc_update_state_cache(crtc, crtc_state, plane_state);
-	if (intel_fbc_alloc_cfb(crtc)) {
+
+	/* FIXME crtc_state->enable_fbc lies :( */
+	if (!cache->plane.visible)
+		goto out;
+
+	if (intel_fbc_alloc_cfb(dev_priv,
+				intel_fbc_calculate_cfb_size(dev_priv, cache),
+				fb->format->cpp[0])) {
+		cache->plane.visible = false;
 		fbc->no_fbc_reason = "not enough stolen memory";
 		goto out;
 	}
 
+	if ((IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) &&
+	    fb->modifier != I915_FORMAT_MOD_X_TILED)
+		cache->gen9_wa_cfb_stride =
+			DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->threshold) * 8;
+	else
+		cache->gen9_wa_cfb_stride = 0;
+
 	DRM_DEBUG_KMS("Enabling FBC on pipe %c\n", pipe_name(crtc->pipe));
 	fbc->no_fbc_reason = "FBC enabled but not active yet\n";
 
-	fbc->enabled = true;
 	fbc->crtc = crtc;
 out:
 	mutex_unlock(&fbc->lock);
@@ -1156,7 +1194,7 @@ void intel_fbc_global_disable(struct drm_i915_private *dev_priv)
 		return;
 
 	mutex_lock(&fbc->lock);
-	if (fbc->enabled) {
+	if (fbc->crtc) {
 		WARN_ON(fbc->crtc->active);
 		__intel_fbc_disable(dev_priv);
 	}
@@ -1172,7 +1210,7 @@ static void intel_fbc_underrun_work_fn(struct work_struct *work)
 	mutex_lock(&fbc->lock);
 
 	/* Maybe we were scheduled twice. */
-	if (fbc->underrun_detected || !fbc->enabled)
+	if (fbc->underrun_detected || !fbc->crtc)
 		goto out;
 
 	DRM_DEBUG_KMS("Disabling FBC due to FIFO underrun.\n");
@@ -1244,28 +1282,6 @@ void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *dev_priv)
 	schedule_work(&fbc->underrun_work);
 }
 
-/**
- * intel_fbc_init_pipe_state - initialize FBC's CRTC visibility tracking
- * @dev_priv: i915 device instance
- *
- * The FBC code needs to track CRTC visibility since the older platforms can't
- * have FBC enabled while multiple pipes are used. This function does the
- * initial setup at driver load to make sure FBC is matching the real hardware.
- */
-void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv)
-{
-	struct intel_crtc *crtc;
-
-	/* Don't even bother tracking anything if we don't need. */
-	if (!no_fbc_on_multiple_pipes(dev_priv))
-		return;
-
-	for_each_intel_crtc(&dev_priv->drm, crtc)
-		if (intel_crtc_active(crtc) &&
-		    crtc->base.primary->state->visible)
-			dev_priv->fbc.visible_pipes_mask |= (1 << crtc->pipe);
-}
-
 /*
  * The DDX driver changes its behavior depending on the value it reads from
  * i915.enable_fbc, so sanitize it by translating the default value into either
@@ -1283,10 +1299,6 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
 	if (!HAS_FBC(dev_priv))
 		return 0;
 
-	/* https://bugs.freedesktop.org/show_bug.cgi?id=108085 */
-	if (IS_GEMINILAKE(dev_priv))
-		return 0;
-
 	if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9)
 		return 1;
 
@@ -1317,9 +1329,11 @@ void intel_fbc_init(struct drm_i915_private *dev_priv)
 
 	INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn);
 	mutex_init(&fbc->lock);
-	fbc->enabled = false;
 	fbc->active = false;
 
+	if (!drm_mm_initialized(&dev_priv->mm.stolen))
+		mkwrite_device_info(dev_priv)->display.has_fbc = false;
+
 	if (need_fbc_vtd_wa(dev_priv))
 		mkwrite_device_info(dev_priv)->display.has_fbc = false;
 
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.h b/drivers/gpu/drm/i915/display/intel_fbc.h
index 50272eda8d43..c8a5e5098687 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.h
+++ b/drivers/gpu/drm/i915/display/intel_fbc.h
@@ -19,15 +19,14 @@ struct intel_plane_state;
 void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv,
 			   struct intel_atomic_state *state);
 bool intel_fbc_is_active(struct drm_i915_private *dev_priv);
-void intel_fbc_pre_update(struct intel_crtc *crtc,
-			  struct intel_crtc_state *crtc_state,
-			  struct intel_plane_state *plane_state);
+bool intel_fbc_pre_update(struct intel_crtc *crtc,
+			  const struct intel_crtc_state *crtc_state,
+			  const struct intel_plane_state *plane_state);
 void intel_fbc_post_update(struct intel_crtc *crtc);
 void intel_fbc_init(struct drm_i915_private *dev_priv);
-void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv);
 void intel_fbc_enable(struct intel_crtc *crtc,
-		      struct intel_crtc_state *crtc_state,
-		      struct intel_plane_state *plane_state);
+		      const struct intel_crtc_state *crtc_state,
+		      const struct intel_plane_state *plane_state);
 void intel_fbc_disable(struct intel_crtc *crtc);
 void intel_fbc_global_disable(struct drm_i915_private *dev_priv);
 void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index b5c588e511dd..1e98e432c9fa 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -100,7 +100,7 @@ static int intel_fbdev_pan_display(struct fb_var_screeninfo *var,
 	return ret;
 }
 
-static struct fb_ops intelfb_ops = {
+static const struct fb_ops intelfb_ops = {
 	.owner = THIS_MODULE,
 	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_set_par = intel_fbdev_set_par,
@@ -141,10 +141,10 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 	/* If the FB is too big, just don't use it since fbdev is not very
 	 * important and we should probably use that space with FBC or other
 	 * features. */
-	obj = NULL;
+	obj = ERR_PTR(-ENODEV);
 	if (size * 2 < dev_priv->stolen_usable_size)
 		obj = i915_gem_object_create_stolen(dev_priv, size);
-	if (obj == NULL)
+	if (IS_ERR(obj))
 		obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("failed to allocate framebuffer\n");
@@ -204,7 +204,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		sizes->fb_height = intel_fb->base.height;
 	}
 
-	mutex_lock(&dev->struct_mutex);
 	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	/* Pin the GGTT vma for our access via info->screen_base.
@@ -267,7 +266,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	ifbdev->vma_flags = flags;
 
 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev->struct_mutex);
 	vga_switcheroo_client_fb_set(pdev, info);
 	return 0;
 
@@ -275,7 +273,6 @@ out_unpin:
 	intel_unpin_fb_vma(vma, flags);
 out_unlock:
 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
 
@@ -292,11 +289,8 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev)
 
 	drm_fb_helper_fini(&ifbdev->helper);
 
-	if (ifbdev->vma) {
-		mutex_lock(&ifbdev->helper.dev->struct_mutex);
+	if (ifbdev->vma)
 		intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags);
-		mutex_unlock(&ifbdev->helper.dev->struct_mutex);
-	}
 
 	if (ifbdev->fb)
 		drm_framebuffer_remove(&ifbdev->fb->base);
@@ -445,7 +439,7 @@ int intel_fbdev_init(struct drm_device *dev)
 	struct intel_fbdev *ifbdev;
 	int ret;
 
-	if (WARN_ON(!HAS_DISPLAY(dev_priv)))
+	if (WARN_ON(!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)))
 		return -ENODEV;
 
 	ifbdev = kzalloc(sizeof(struct intel_fbdev), GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c
index ab61f88d1d33..6c83b350525d 100644
--- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c
+++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c
@@ -126,8 +126,8 @@ static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev,
 	}
 }
 
-static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev,
-						 enum pipe pipe, bool enable)
+static void ilk_set_fifo_underrun_reporting(struct drm_device *dev,
+					    enum pipe pipe, bool enable)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 bit = (pipe == PIPE_A) ?
@@ -139,7 +139,7 @@ static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev,
 		ilk_disable_display_irq(dev_priv, bit);
 }
 
-static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc)
+static void ivb_check_fifo_underruns(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
@@ -157,9 +157,9 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc)
 	DRM_ERROR("fifo underrun on pipe %c\n", pipe_name(pipe));
 }
 
-static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev,
-						  enum pipe pipe,
-						  bool enable, bool old)
+static void ivb_set_fifo_underrun_reporting(struct drm_device *dev,
+					    enum pipe pipe, bool enable,
+					    bool old)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	if (enable) {
@@ -180,8 +180,8 @@ static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev,
 	}
 }
 
-static void broadwell_set_fifo_underrun_reporting(struct drm_device *dev,
-						  enum pipe pipe, bool enable)
+static void bdw_set_fifo_underrun_reporting(struct drm_device *dev,
+					    enum pipe pipe, bool enable)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
@@ -264,11 +264,11 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev,
 	if (HAS_GMCH(dev_priv))
 		i9xx_set_fifo_underrun_reporting(dev, pipe, enable, old);
 	else if (IS_GEN_RANGE(dev_priv, 5, 6))
-		ironlake_set_fifo_underrun_reporting(dev, pipe, enable);
+		ilk_set_fifo_underrun_reporting(dev, pipe, enable);
 	else if (IS_GEN(dev_priv, 7))
-		ivybridge_set_fifo_underrun_reporting(dev, pipe, enable, old);
+		ivb_set_fifo_underrun_reporting(dev, pipe, enable, old);
 	else if (INTEL_GEN(dev_priv) >= 8)
-		broadwell_set_fifo_underrun_reporting(dev, pipe, enable);
+		bdw_set_fifo_underrun_reporting(dev, pipe, enable);
 
 	return old;
 }
@@ -427,7 +427,7 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv)
 		if (HAS_GMCH(dev_priv))
 			i9xx_check_fifo_underruns(crtc);
 		else if (IS_GEN(dev_priv, 7))
-			ivybridge_check_fifo_underruns(crtc);
+			ivb_check_fifo_underruns(crtc);
 	}
 
 	spin_unlock_irq(&dev_priv->irq_lock);
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index 719379774fa5..6cb02c912acc 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -206,6 +206,7 @@ static int frontbuffer_active(struct i915_active *ref)
 	return 0;
 }
 
+__i915_active_call
 static void frontbuffer_retire(struct i915_active *ref)
 {
 	struct intel_frontbuffer *front =
@@ -220,12 +221,19 @@ static void frontbuffer_release(struct kref *ref)
 {
 	struct intel_frontbuffer *front =
 		container_of(ref, typeof(*front), ref);
+	struct drm_i915_gem_object *obj = front->obj;
+	struct i915_vma *vma;
 
-	front->obj->frontbuffer = NULL;
-	spin_unlock(&to_i915(front->obj->base.dev)->fb_tracking.lock);
+	spin_lock(&obj->vma.lock);
+	for_each_ggtt_vma(vma, obj)
+		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
+	spin_unlock(&obj->vma.lock);
 
-	i915_gem_object_put(front->obj);
-	kfree(front);
+	RCU_INIT_POINTER(obj->frontbuffer, NULL);
+	spin_unlock(&to_i915(obj->base.dev)->fb_tracking.lock);
+
+	i915_gem_object_put(obj);
+	kfree_rcu(front, rcu);
 }
 
 struct intel_frontbuffer *
@@ -234,11 +242,7 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct intel_frontbuffer *front;
 
-	spin_lock(&i915->fb_tracking.lock);
-	front = obj->frontbuffer;
-	if (front)
-		kref_get(&front->ref);
-	spin_unlock(&i915->fb_tracking.lock);
+	front = __intel_frontbuffer_get(obj);
 	if (front)
 		return front;
 
@@ -249,17 +253,18 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
 	front->obj = obj;
 	kref_init(&front->ref);
 	atomic_set(&front->bits, 0);
-	i915_active_init(i915, &front->write,
-			 frontbuffer_active, frontbuffer_retire);
+	i915_active_init(&front->write,
+			 frontbuffer_active,
+			 i915_active_may_sleep(frontbuffer_retire));
 
 	spin_lock(&i915->fb_tracking.lock);
-	if (obj->frontbuffer) {
+	if (rcu_access_pointer(obj->frontbuffer)) {
 		kfree(front);
-		front = obj->frontbuffer;
+		front = rcu_dereference_protected(obj->frontbuffer, true);
 		kref_get(&front->ref);
 	} else {
 		i915_gem_object_get(obj);
-		obj->frontbuffer = front;
+		rcu_assign_pointer(obj->frontbuffer, front);
 	}
 	spin_unlock(&i915->fb_tracking.lock);
 
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.h b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
index adc64d61a4a5..6d41f5394425 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.h
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
@@ -27,10 +27,10 @@
 #include <linux/atomic.h>
 #include <linux/kref.h>
 
+#include "gem/i915_gem_object_types.h"
 #include "i915_active.h"
 
 struct drm_i915_private;
-struct drm_i915_gem_object;
 
 enum fb_op_origin {
 	ORIGIN_GTT,
@@ -45,6 +45,7 @@ struct intel_frontbuffer {
 	atomic_t bits;
 	struct i915_active write;
 	struct drm_i915_gem_object *obj;
+	struct rcu_head rcu;
 };
 
 void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915,
@@ -54,6 +55,35 @@ void intel_frontbuffer_flip_complete(struct drm_i915_private *i915,
 void intel_frontbuffer_flip(struct drm_i915_private *i915,
 			    unsigned frontbuffer_bits);
 
+void intel_frontbuffer_put(struct intel_frontbuffer *front);
+
+static inline struct intel_frontbuffer *
+__intel_frontbuffer_get(const struct drm_i915_gem_object *obj)
+{
+	struct intel_frontbuffer *front;
+
+	if (likely(!rcu_access_pointer(obj->frontbuffer)))
+		return NULL;
+
+	rcu_read_lock();
+	do {
+		front = rcu_dereference(obj->frontbuffer);
+		if (!front)
+			break;
+
+		if (unlikely(!kref_get_unless_zero(&front->ref)))
+			continue;
+
+		if (likely(front == rcu_access_pointer(obj->frontbuffer)))
+			break;
+
+		intel_frontbuffer_put(front);
+	} while (1);
+	rcu_read_unlock();
+
+	return front;
+}
+
 struct intel_frontbuffer *
 intel_frontbuffer_get(struct drm_i915_gem_object *obj);
 
@@ -119,6 +149,4 @@ void intel_frontbuffer_track(struct intel_frontbuffer *old,
 			     struct intel_frontbuffer *new,
 			     unsigned int frontbuffer_bits);
 
-void intel_frontbuffer_put(struct intel_frontbuffer *front);
-
 #endif /* __INTEL_FRONTBUFFER_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c
index d6775a005726..3d4d19ac1d14 100644
--- a/drivers/gpu/drm/i915/display/intel_gmbus.c
+++ b/drivers/gpu/drm/i915/display/intel_gmbus.c
@@ -836,7 +836,7 @@ int intel_gmbus_setup(struct drm_i915_private *dev_priv)
 	unsigned int pin;
 	int ret;
 
-	if (!HAS_DISPLAY(dev_priv))
+	if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv))
 		return 0;
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index 6ec5ceeab601..0fdbd39f6641 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -1,9 +1,11 @@
 /* SPDX-License-Identifier: MIT */
 /*
  * Copyright (C) 2017 Google, Inc.
+ * Copyright _ 2017-2019, Intel Corporation.
  *
  * Authors:
  * Sean Paul <seanpaul@chromium.org>
+ * Ramalingam C <ramalingam.c@intel.com>
  */
 
 #include <linux/component.h>
@@ -18,6 +20,7 @@
 #include "intel_display_types.h"
 #include "intel_hdcp.h"
 #include "intel_sideband.h"
+#include "intel_connector.h"
 
 #define KEY_LOAD_TRIES	5
 #define ENCRYPT_STATUS_CHANGE_TIMEOUT_MS	50
@@ -105,24 +108,20 @@ bool intel_hdcp2_capable(struct intel_connector *connector)
 	return capable;
 }
 
-static inline bool intel_hdcp_in_use(struct intel_connector *connector)
+static inline
+bool intel_hdcp_in_use(struct drm_i915_private *dev_priv,
+		       enum transcoder cpu_transcoder, enum port port)
 {
-	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	enum port port = connector->encoder->port;
-	u32 reg;
-
-	reg = I915_READ(PORT_HDCP_STATUS(port));
-	return reg & HDCP_STATUS_ENC;
+	return I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) &
+	       HDCP_STATUS_ENC;
 }
 
-static inline bool intel_hdcp2_in_use(struct intel_connector *connector)
+static inline
+bool intel_hdcp2_in_use(struct drm_i915_private *dev_priv,
+			enum transcoder cpu_transcoder, enum port port)
 {
-	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	enum port port = connector->encoder->port;
-	u32 reg;
-
-	reg = I915_READ(HDCP2_STATUS_DDI(port));
-	return reg & LINK_ENCRYPTION_STATUS;
+	return I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) &
+	       LINK_ENCRYPTION_STATUS;
 }
 
 static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *intel_dig_port,
@@ -253,9 +252,29 @@ static int intel_write_sha_text(struct drm_i915_private *dev_priv, u32 sha_text)
 }
 
 static
-u32 intel_hdcp_get_repeater_ctl(struct intel_digital_port *intel_dig_port)
+u32 intel_hdcp_get_repeater_ctl(struct drm_i915_private *dev_priv,
+				enum transcoder cpu_transcoder, enum port port)
 {
-	enum port port = intel_dig_port->base.port;
+	if (INTEL_GEN(dev_priv) >= 12) {
+		switch (cpu_transcoder) {
+		case TRANSCODER_A:
+			return HDCP_TRANSA_REP_PRESENT |
+			       HDCP_TRANSA_SHA1_M0;
+		case TRANSCODER_B:
+			return HDCP_TRANSB_REP_PRESENT |
+			       HDCP_TRANSB_SHA1_M0;
+		case TRANSCODER_C:
+			return HDCP_TRANSC_REP_PRESENT |
+			       HDCP_TRANSC_SHA1_M0;
+		case TRANSCODER_D:
+			return HDCP_TRANSD_REP_PRESENT |
+			       HDCP_TRANSD_SHA1_M0;
+		default:
+			DRM_ERROR("Unknown transcoder %d\n", cpu_transcoder);
+			return -EINVAL;
+		}
+	}
+
 	switch (port) {
 	case PORT_A:
 		return HDCP_DDIA_REP_PRESENT | HDCP_DDIA_SHA1_M0;
@@ -268,18 +287,20 @@ u32 intel_hdcp_get_repeater_ctl(struct intel_digital_port *intel_dig_port)
 	case PORT_E:
 		return HDCP_DDIE_REP_PRESENT | HDCP_DDIE_SHA1_M0;
 	default:
-		break;
+		DRM_ERROR("Unknown port %d\n", port);
+		return -EINVAL;
 	}
-	DRM_ERROR("Unknown port %d\n", port);
-	return -EINVAL;
 }
 
 static
-int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port,
+int intel_hdcp_validate_v_prime(struct intel_connector *connector,
 				const struct intel_hdcp_shim *shim,
 				u8 *ksv_fifo, u8 num_downstream, u8 *bstatus)
 {
+	struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector);
 	struct drm_i915_private *dev_priv;
+	enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder;
+	enum port port = intel_dig_port->base.port;
 	u32 vprime, sha_text, sha_leftovers, rep_ctl;
 	int ret, i, j, sha_idx;
 
@@ -306,7 +327,7 @@ int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port,
 	sha_idx = 0;
 	sha_text = 0;
 	sha_leftovers = 0;
-	rep_ctl = intel_hdcp_get_repeater_ctl(intel_dig_port);
+	rep_ctl = intel_hdcp_get_repeater_ctl(dev_priv, cpu_transcoder, port);
 	I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32);
 	for (i = 0; i < num_downstream; i++) {
 		unsigned int sha_empty;
@@ -548,7 +569,7 @@ int intel_hdcp_auth_downstream(struct intel_connector *connector)
 	 * V prime atleast twice.
 	 */
 	for (i = 0; i < tries; i++) {
-		ret = intel_hdcp_validate_v_prime(intel_dig_port, shim,
+		ret = intel_hdcp_validate_v_prime(connector, shim,
 						  ksv_fifo, num_downstream,
 						  bstatus);
 		if (!ret)
@@ -576,6 +597,7 @@ static int intel_hdcp_auth(struct intel_connector *connector)
 	struct drm_device *dev = connector->base.dev;
 	const struct intel_hdcp_shim *shim = hdcp->shim;
 	struct drm_i915_private *dev_priv;
+	enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder;
 	enum port port;
 	unsigned long r0_prime_gen_start;
 	int ret, i, tries = 2;
@@ -615,18 +637,21 @@ static int intel_hdcp_auth(struct intel_connector *connector)
 
 	/* Initialize An with 2 random values and acquire it */
 	for (i = 0; i < 2; i++)
-		I915_WRITE(PORT_HDCP_ANINIT(port), get_random_u32());
-	I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_CAPTURE_AN);
+		I915_WRITE(HDCP_ANINIT(dev_priv, cpu_transcoder, port),
+			   get_random_u32());
+	I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port),
+		   HDCP_CONF_CAPTURE_AN);
 
 	/* Wait for An to be acquired */
-	if (intel_de_wait_for_set(dev_priv, PORT_HDCP_STATUS(port),
+	if (intel_de_wait_for_set(dev_priv,
+				  HDCP_STATUS(dev_priv, cpu_transcoder, port),
 				  HDCP_STATUS_AN_READY, 1)) {
 		DRM_ERROR("Timed out waiting for An\n");
 		return -ETIMEDOUT;
 	}
 
-	an.reg[0] = I915_READ(PORT_HDCP_ANLO(port));
-	an.reg[1] = I915_READ(PORT_HDCP_ANHI(port));
+	an.reg[0] = I915_READ(HDCP_ANLO(dev_priv, cpu_transcoder, port));
+	an.reg[1] = I915_READ(HDCP_ANHI(dev_priv, cpu_transcoder, port));
 	ret = shim->write_an_aksv(intel_dig_port, an.shim);
 	if (ret)
 		return ret;
@@ -644,24 +669,26 @@ static int intel_hdcp_auth(struct intel_connector *connector)
 		return -EPERM;
 	}
 
-	I915_WRITE(PORT_HDCP_BKSVLO(port), bksv.reg[0]);
-	I915_WRITE(PORT_HDCP_BKSVHI(port), bksv.reg[1]);
+	I915_WRITE(HDCP_BKSVLO(dev_priv, cpu_transcoder, port), bksv.reg[0]);
+	I915_WRITE(HDCP_BKSVHI(dev_priv, cpu_transcoder, port), bksv.reg[1]);
 
 	ret = shim->repeater_present(intel_dig_port, &repeater_present);
 	if (ret)
 		return ret;
 	if (repeater_present)
 		I915_WRITE(HDCP_REP_CTL,
-			   intel_hdcp_get_repeater_ctl(intel_dig_port));
+			   intel_hdcp_get_repeater_ctl(dev_priv, cpu_transcoder,
+						       port));
 
 	ret = shim->toggle_signalling(intel_dig_port, true);
 	if (ret)
 		return ret;
 
-	I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_AUTH_AND_ENC);
+	I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port),
+		   HDCP_CONF_AUTH_AND_ENC);
 
 	/* Wait for R0 ready */
-	if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) &
+	if (wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) &
 		     (HDCP_STATUS_R0_READY | HDCP_STATUS_ENC), 1)) {
 		DRM_ERROR("Timed out waiting for R0 ready\n");
 		return -ETIMEDOUT;
@@ -689,22 +716,25 @@ static int intel_hdcp_auth(struct intel_connector *connector)
 		ret = shim->read_ri_prime(intel_dig_port, ri.shim);
 		if (ret)
 			return ret;
-		I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg);
+		I915_WRITE(HDCP_RPRIME(dev_priv, cpu_transcoder, port), ri.reg);
 
 		/* Wait for Ri prime match */
-		if (!wait_for(I915_READ(PORT_HDCP_STATUS(port)) &
+		if (!wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder,
+						    port)) &
 		    (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1))
 			break;
 	}
 
 	if (i == tries) {
 		DRM_DEBUG_KMS("Timed out waiting for Ri prime match (%x)\n",
-			      I915_READ(PORT_HDCP_STATUS(port)));
+			      I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder,
+						    port)));
 		return -ETIMEDOUT;
 	}
 
 	/* Wait for encryption confirmation */
-	if (intel_de_wait_for_set(dev_priv, PORT_HDCP_STATUS(port),
+	if (intel_de_wait_for_set(dev_priv,
+				  HDCP_STATUS(dev_priv, cpu_transcoder, port),
 				  HDCP_STATUS_ENC,
 				  ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) {
 		DRM_ERROR("Timed out waiting for encryption\n");
@@ -729,15 +759,17 @@ static int _intel_hdcp_disable(struct intel_connector *connector)
 	struct drm_i915_private *dev_priv = connector->base.dev->dev_private;
 	struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector);
 	enum port port = intel_dig_port->base.port;
+	enum transcoder cpu_transcoder = hdcp->cpu_transcoder;
 	int ret;
 
 	DRM_DEBUG_KMS("[%s:%d] HDCP is being disabled...\n",
 		      connector->base.name, connector->base.base.id);
 
 	hdcp->hdcp_encrypted = false;
-	I915_WRITE(PORT_HDCP_CONF(port), 0);
-	if (intel_de_wait_for_clear(dev_priv, PORT_HDCP_STATUS(port), ~0,
-				    ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) {
+	I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port), 0);
+	if (intel_de_wait_for_clear(dev_priv,
+				    HDCP_STATUS(dev_priv, cpu_transcoder, port),
+				    ~0, ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) {
 		DRM_ERROR("Failed to disable HDCP, timeout clearing status\n");
 		return -ETIMEDOUT;
 	}
@@ -808,9 +840,11 @@ static int intel_hdcp_check_link(struct intel_connector *connector)
 	struct drm_i915_private *dev_priv = connector->base.dev->dev_private;
 	struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector);
 	enum port port = intel_dig_port->base.port;
+	enum transcoder cpu_transcoder;
 	int ret = 0;
 
 	mutex_lock(&hdcp->mutex);
+	cpu_transcoder = hdcp->cpu_transcoder;
 
 	/* Check_link valid only when HDCP1.4 is enabled */
 	if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_ENABLED ||
@@ -819,10 +853,11 @@ static int intel_hdcp_check_link(struct intel_connector *connector)
 		goto out;
 	}
 
-	if (WARN_ON(!intel_hdcp_in_use(connector))) {
+	if (WARN_ON(!intel_hdcp_in_use(dev_priv, cpu_transcoder, port))) {
 		DRM_ERROR("%s:%d HDCP link stopped encryption,%x\n",
 			  connector->base.name, connector->base.base.id,
-			  I915_READ(PORT_HDCP_STATUS(port)));
+			  I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder,
+						port)));
 		ret = -ENXIO;
 		hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED;
 		schedule_work(&hdcp->prop_work);
@@ -887,7 +922,7 @@ static void intel_hdcp_prop_work(struct work_struct *work)
 bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port)
 {
 	/* PORT E doesn't have HDCP, and PORT F is disabled */
-	return INTEL_GEN(dev_priv) >= 9 && port < PORT_E;
+	return INTEL_INFO(dev_priv)->display.has_hdcp && port < PORT_E;
 }
 
 static int
@@ -1493,10 +1528,11 @@ static int hdcp2_enable_encryption(struct intel_connector *connector)
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_hdcp *hdcp = &connector->hdcp;
 	enum port port = connector->encoder->port;
+	enum transcoder cpu_transcoder = hdcp->cpu_transcoder;
 	int ret;
 
-	WARN_ON(I915_READ(HDCP2_STATUS_DDI(port)) & LINK_ENCRYPTION_STATUS);
-
+	WARN_ON(I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) &
+		LINK_ENCRYPTION_STATUS);
 	if (hdcp->shim->toggle_signalling) {
 		ret = hdcp->shim->toggle_signalling(intel_dig_port, true);
 		if (ret) {
@@ -1506,14 +1542,18 @@ static int hdcp2_enable_encryption(struct intel_connector *connector)
 		}
 	}
 
-	if (I915_READ(HDCP2_STATUS_DDI(port)) & LINK_AUTH_STATUS) {
+	if (I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) &
+	    LINK_AUTH_STATUS) {
 		/* Link is Authenticated. Now set for Encryption */
-		I915_WRITE(HDCP2_CTL_DDI(port),
-			   I915_READ(HDCP2_CTL_DDI(port)) |
+		I915_WRITE(HDCP2_CTL(dev_priv, cpu_transcoder, port),
+			   I915_READ(HDCP2_CTL(dev_priv, cpu_transcoder,
+					       port)) |
 			   CTL_LINK_ENCRYPTION_REQ);
 	}
 
-	ret = intel_de_wait_for_set(dev_priv, HDCP2_STATUS_DDI(port),
+	ret = intel_de_wait_for_set(dev_priv,
+				    HDCP2_STATUS(dev_priv, cpu_transcoder,
+						 port),
 				    LINK_ENCRYPTION_STATUS,
 				    ENCRYPT_STATUS_CHANGE_TIMEOUT_MS);
 
@@ -1526,14 +1566,19 @@ static int hdcp2_disable_encryption(struct intel_connector *connector)
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_hdcp *hdcp = &connector->hdcp;
 	enum port port = connector->encoder->port;
+	enum transcoder cpu_transcoder = hdcp->cpu_transcoder;
 	int ret;
 
-	WARN_ON(!(I915_READ(HDCP2_STATUS_DDI(port)) & LINK_ENCRYPTION_STATUS));
+	WARN_ON(!(I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) &
+			    LINK_ENCRYPTION_STATUS));
 
-	I915_WRITE(HDCP2_CTL_DDI(port),
-		   I915_READ(HDCP2_CTL_DDI(port)) & ~CTL_LINK_ENCRYPTION_REQ);
+	I915_WRITE(HDCP2_CTL(dev_priv, cpu_transcoder, port),
+		   I915_READ(HDCP2_CTL(dev_priv, cpu_transcoder, port)) &
+		   ~CTL_LINK_ENCRYPTION_REQ);
 
-	ret = intel_de_wait_for_clear(dev_priv, HDCP2_STATUS_DDI(port),
+	ret = intel_de_wait_for_clear(dev_priv,
+				      HDCP2_STATUS(dev_priv, cpu_transcoder,
+						   port),
 				      LINK_ENCRYPTION_STATUS,
 				      ENCRYPT_STATUS_CHANGE_TIMEOUT_MS);
 	if (ret == -ETIMEDOUT)
@@ -1632,9 +1677,11 @@ static int intel_hdcp2_check_link(struct intel_connector *connector)
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_hdcp *hdcp = &connector->hdcp;
 	enum port port = connector->encoder->port;
+	enum transcoder cpu_transcoder;
 	int ret = 0;
 
 	mutex_lock(&hdcp->mutex);
+	cpu_transcoder = hdcp->cpu_transcoder;
 
 	/* hdcp2_check_link is expected only when HDCP2.2 is Enabled */
 	if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_ENABLED ||
@@ -1643,9 +1690,10 @@ static int intel_hdcp2_check_link(struct intel_connector *connector)
 		goto out;
 	}
 
-	if (WARN_ON(!intel_hdcp2_in_use(connector))) {
+	if (WARN_ON(!intel_hdcp2_in_use(dev_priv, cpu_transcoder, port))) {
 		DRM_ERROR("HDCP2.2 link stopped the encryption, %x\n",
-			  I915_READ(HDCP2_STATUS_DDI(port)));
+			  I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder,
+						 port)));
 		ret = -ENXIO;
 		hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED;
 		schedule_work(&hdcp->prop_work);
@@ -1749,13 +1797,54 @@ static const struct component_ops i915_hdcp_component_ops = {
 	.unbind = i915_hdcp_component_unbind,
 };
 
+static inline
+enum mei_fw_ddi intel_get_mei_fw_ddi_index(enum port port)
+{
+	switch (port) {
+	case PORT_A:
+		return MEI_DDI_A;
+	case PORT_B ... PORT_F:
+		return (enum mei_fw_ddi)port;
+	default:
+		return MEI_DDI_INVALID_PORT;
+	}
+}
+
+static inline
+enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder)
+{
+	switch (cpu_transcoder) {
+	case TRANSCODER_A ... TRANSCODER_D:
+		return (enum mei_fw_tc)(cpu_transcoder | 0x10);
+	default: /* eDP, DSI TRANSCODERS are non HDCP capable */
+		return MEI_INVALID_TRANSCODER;
+	}
+}
+
 static inline int initialize_hdcp_port_data(struct intel_connector *connector,
 					    const struct intel_hdcp_shim *shim)
 {
+	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_hdcp *hdcp = &connector->hdcp;
 	struct hdcp_port_data *data = &hdcp->port_data;
 
-	data->port = connector->encoder->port;
+	if (INTEL_GEN(dev_priv) < 12)
+		data->fw_ddi =
+			intel_get_mei_fw_ddi_index(connector->encoder->port);
+	else
+		/*
+		 * As per ME FW API expectation, for GEN 12+, fw_ddi is filled
+		 * with zero(INVALID PORT index).
+		 */
+		data->fw_ddi = MEI_DDI_INVALID_PORT;
+
+	/*
+	 * As associated transcoder is set and modified at modeset, here fw_tc
+	 * is initialized to zero (invalid transcoder index). This will be
+	 * retained for <Gen12 forever.
+	 */
+	data->fw_tc = MEI_INVALID_TRANSCODER;
+
 	data->port_type = (u8)HDCP_PORT_TYPE_INTEGRATED;
 	data->protocol = (u8)shim->protocol;
 
@@ -1781,7 +1870,7 @@ static bool is_hdcp2_supported(struct drm_i915_private *dev_priv)
 		return false;
 
 	return (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv) ||
-		IS_KABYLAKE(dev_priv));
+		IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv));
 }
 
 void intel_hdcp_component_init(struct drm_i915_private *dev_priv)
@@ -1853,8 +1942,10 @@ int intel_hdcp_init(struct intel_connector *connector,
 	return 0;
 }
 
-int intel_hdcp_enable(struct intel_connector *connector, u8 content_type)
+int intel_hdcp_enable(struct intel_connector *connector,
+		      enum transcoder cpu_transcoder, u8 content_type)
 {
+	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_hdcp *hdcp = &connector->hdcp;
 	unsigned long check_link_interval = DRM_HDCP_CHECK_PERIOD_MS;
 	int ret = -EINVAL;
@@ -1866,6 +1957,11 @@ int intel_hdcp_enable(struct intel_connector *connector, u8 content_type)
 	WARN_ON(hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED);
 	hdcp->content_type = content_type;
 
+	if (INTEL_GEN(dev_priv) >= 12) {
+		hdcp->cpu_transcoder = cpu_transcoder;
+		hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder);
+	}
+
 	/*
 	 * Considering that HDCP2.2 is more secure than HDCP1.4, If the setup
 	 * is capable of HDCP2.2, it is preferred to use HDCP2.2.
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h
index 13555b054930..f3c3272e712a 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.h
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.h
@@ -15,13 +15,16 @@ struct drm_connector_state;
 struct drm_i915_private;
 struct intel_connector;
 struct intel_hdcp_shim;
+enum port;
+enum transcoder;
 
 void intel_hdcp_atomic_check(struct drm_connector *connector,
 			     struct drm_connector_state *old_state,
 			     struct drm_connector_state *new_state);
 int intel_hdcp_init(struct intel_connector *connector,
 		    const struct intel_hdcp_shim *hdcp_shim);
-int intel_hdcp_enable(struct intel_connector *connector, u8 content_type);
+int intel_hdcp_enable(struct intel_connector *connector,
+		      enum transcoder cpu_transcoder, u8 content_type);
 int intel_hdcp_disable(struct intel_connector *connector);
 bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port);
 bool intel_hdcp_capable(struct intel_connector *connector);
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index b030f7ae3302..93ac0f296852 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -85,16 +85,17 @@ assert_hdmi_transcoder_func_disabled(struct drm_i915_private *dev_priv,
 	     "HDMI transcoder function enabled, expecting disabled\n");
 }
 
-struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder)
+struct intel_hdmi *enc_to_intel_hdmi(struct intel_encoder *encoder)
 {
 	struct intel_digital_port *intel_dig_port =
-		container_of(encoder, struct intel_digital_port, base.base);
+		container_of(&encoder->base, struct intel_digital_port,
+			     base.base);
 	return &intel_dig_port->hdmi;
 }
 
-static struct intel_hdmi *intel_attached_hdmi(struct drm_connector *connector)
+static struct intel_hdmi *intel_attached_hdmi(struct intel_connector *connector)
 {
-	return enc_to_intel_hdmi(&intel_attached_encoder(connector)->base);
+	return enc_to_intel_hdmi(intel_attached_encoder(connector));
 }
 
 static u32 g4x_infoframe_index(unsigned int type)
@@ -189,13 +190,19 @@ hsw_dip_data_reg(struct drm_i915_private *dev_priv,
 	}
 }
 
-static int hsw_dip_data_size(unsigned int type)
+static int hsw_dip_data_size(struct drm_i915_private *dev_priv,
+			     unsigned int type)
 {
 	switch (type) {
 	case DP_SDP_VSC:
 		return VIDEO_DIP_VSC_DATA_SIZE;
 	case DP_SDP_PPS:
 		return VIDEO_DIP_PPS_DATA_SIZE;
+	case HDMI_PACKET_TYPE_GAMUT_METADATA:
+		if (INTEL_GEN(dev_priv) >= 11)
+			return VIDEO_DIP_GMP_DATA_SIZE;
+		else
+			return VIDEO_DIP_DATA_SIZE;
 	default:
 		return VIDEO_DIP_DATA_SIZE;
 	}
@@ -279,7 +286,7 @@ static void ibx_write_infoframe(struct intel_encoder *encoder,
 {
 	const u32 *data = frame;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 	int i;
@@ -315,7 +322,7 @@ static void ibx_read_infoframe(struct intel_encoder *encoder,
 			       void *frame, ssize_t len)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	u32 val, *data = frame;
 	int i;
 
@@ -334,7 +341,7 @@ static u32 ibx_infoframes_enabled(struct intel_encoder *encoder,
 				  const struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe;
+	enum pipe pipe = to_intel_crtc(pipe_config->uapi.crtc)->pipe;
 	i915_reg_t reg = TVIDEO_DIP_CTL(pipe);
 	u32 val = I915_READ(reg);
 
@@ -356,7 +363,7 @@ static void cpt_write_infoframe(struct intel_encoder *encoder,
 {
 	const u32 *data = frame;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 	int i;
@@ -395,7 +402,7 @@ static void cpt_read_infoframe(struct intel_encoder *encoder,
 			       void *frame, ssize_t len)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	u32 val, *data = frame;
 	int i;
 
@@ -414,7 +421,7 @@ static u32 cpt_infoframes_enabled(struct intel_encoder *encoder,
 				  const struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe;
+	enum pipe pipe = to_intel_crtc(pipe_config->uapi.crtc)->pipe;
 	u32 val = I915_READ(TVIDEO_DIP_CTL(pipe));
 
 	if ((val & VIDEO_DIP_ENABLE) == 0)
@@ -432,7 +439,7 @@ static void vlv_write_infoframe(struct intel_encoder *encoder,
 {
 	const u32 *data = frame;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 	int i;
@@ -468,7 +475,7 @@ static void vlv_read_infoframe(struct intel_encoder *encoder,
 			       void *frame, ssize_t len)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	u32 val, *data = frame;
 	int i;
 
@@ -487,7 +494,7 @@ static u32 vlv_infoframes_enabled(struct intel_encoder *encoder,
 				  const struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe;
+	enum pipe pipe = to_intel_crtc(pipe_config->uapi.crtc)->pipe;
 	u32 val = I915_READ(VLV_TVIDEO_DIP_CTL(pipe));
 
 	if ((val & VIDEO_DIP_ENABLE) == 0)
@@ -514,7 +521,9 @@ static void hsw_write_infoframe(struct intel_encoder *encoder,
 	int i;
 	u32 val = I915_READ(ctl_reg);
 
-	data_size = hsw_dip_data_size(type);
+	data_size = hsw_dip_data_size(dev_priv, type);
+
+	WARN_ON(len > data_size);
 
 	val &= ~hsw_infoframe_enable(type);
 	I915_WRITE(ctl_reg, val);
@@ -594,7 +603,7 @@ u32 intel_hdmi_infoframes_enabled(struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	u32 val, ret = 0;
 	int i;
 
@@ -638,7 +647,7 @@ static void intel_write_infoframe(struct intel_encoder *encoder,
 				  enum hdmi_infoframe_type type,
 				  const union hdmi_infoframe *frame)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	u8 buffer[VIDEO_DIP_DATA_SIZE];
 	ssize_t len;
 
@@ -667,7 +676,7 @@ void intel_read_infoframe(struct intel_encoder *encoder,
 			  enum hdmi_infoframe_type type,
 			  union hdmi_infoframe *frame)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	u8 buffer[VIDEO_DIP_DATA_SIZE];
 	int ret;
 
@@ -700,7 +709,7 @@ intel_hdmi_compute_avi_infoframe(struct intel_encoder *encoder,
 {
 	struct hdmi_avi_infoframe *frame = &crtc_state->infoframes.avi.avi;
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	struct drm_connector *connector = conn_state->connector;
 	int ret;
 
@@ -724,11 +733,20 @@ intel_hdmi_compute_avi_infoframe(struct intel_encoder *encoder,
 
 	drm_hdmi_avi_infoframe_colorspace(frame, conn_state);
 
-	drm_hdmi_avi_infoframe_quant_range(frame, connector,
-					   adjusted_mode,
-					   crtc_state->limited_color_range ?
-					   HDMI_QUANTIZATION_RANGE_LIMITED :
-					   HDMI_QUANTIZATION_RANGE_FULL);
+	/* nonsense combination */
+	WARN_ON(crtc_state->limited_color_range &&
+		crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB);
+
+	if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_RGB) {
+		drm_hdmi_avi_infoframe_quant_range(frame, connector,
+						   adjusted_mode,
+						   crtc_state->limited_color_range ?
+						   HDMI_QUANTIZATION_RANGE_LIMITED :
+						   HDMI_QUANTIZATION_RANGE_FULL);
+	} else {
+		frame->quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT;
+		frame->ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_LIMITED;
+	}
 
 	drm_hdmi_avi_infoframe_content_type(frame, conn_state);
 
@@ -787,7 +805,7 @@ intel_hdmi_compute_hdmi_infoframe(struct intel_encoder *encoder,
 
 	ret = drm_hdmi_vendor_infoframe_from_display_mode(frame,
 							  conn_state->connector,
-							  &crtc_state->base.adjusted_mode);
+							  &crtc_state->hw.adjusted_mode);
 	if (WARN_ON(ret))
 		return false;
 
@@ -838,7 +856,7 @@ static void g4x_set_infoframes(struct intel_encoder *encoder,
 			       const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi;
 	i915_reg_t reg = VIDEO_DIP_CTL;
 	u32 val = I915_READ(reg);
@@ -948,7 +966,7 @@ static bool intel_hdmi_set_gcp_infoframe(struct intel_encoder *encoder,
 					 const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	i915_reg_t reg;
 
 	if ((crtc_state->infoframes.enable &
@@ -973,7 +991,7 @@ void intel_hdmi_read_gcp_infoframe(struct intel_encoder *encoder,
 				   struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	i915_reg_t reg;
 
 	if ((crtc_state->infoframes.enable &
@@ -1010,7 +1028,7 @@ static void intel_hdmi_compute_gcp_infoframe(struct intel_encoder *encoder,
 
 	/* Enable default_phase whenever the display mode is suitably aligned */
 	if (gcp_default_phase_possible(crtc_state->pipe_bpp,
-				       &crtc_state->base.adjusted_mode))
+				       &crtc_state->hw.adjusted_mode))
 		crtc_state->infoframes.gcp |= GCP_DEFAULT_PHASE_ENABLE;
 }
 
@@ -1020,8 +1038,8 @@ static void ibx_set_infoframes(struct intel_encoder *encoder,
 			       const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi;
 	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
@@ -1079,8 +1097,8 @@ static void cpt_set_infoframes(struct intel_encoder *encoder,
 			       const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 
@@ -1128,8 +1146,8 @@ static void vlv_set_infoframes(struct intel_encoder *encoder,
 			       const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 	u32 port = VIDEO_DIP_PORT(encoder->port);
@@ -1491,7 +1509,10 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port)
 {
 	struct drm_i915_private *dev_priv =
 		intel_dig_port->base.base.dev->dev_private;
+	struct intel_connector *connector =
+		intel_dig_port->hdmi.attached_connector;
 	enum port port = intel_dig_port->base.port;
+	enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder;
 	int ret;
 	union {
 		u32 reg;
@@ -1502,39 +1523,30 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port)
 	if (ret)
 		return false;
 
-	I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg);
+	I915_WRITE(HDCP_RPRIME(dev_priv, cpu_transcoder, port), ri.reg);
 
 	/* Wait for Ri prime match */
-	if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) &
+	if (wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) &
 		     (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) {
 		DRM_ERROR("Ri' mismatch detected, link check failed (%x)\n",
-			  I915_READ(PORT_HDCP_STATUS(port)));
+			  I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder,
+						port)));
 		return false;
 	}
 	return true;
 }
 
-struct hdcp2_hdmi_msg_data {
+struct hdcp2_hdmi_msg_timeout {
 	u8 msg_id;
-	u32 timeout;
-	u32 timeout2;
+	u16 timeout;
 };
 
-static const struct hdcp2_hdmi_msg_data hdcp2_msg_data[] = {
-	{ HDCP_2_2_AKE_INIT, 0, 0 },
-	{ HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, 0 },
-	{ HDCP_2_2_AKE_NO_STORED_KM, 0, 0 },
-	{ HDCP_2_2_AKE_STORED_KM, 0, 0 },
-	{ HDCP_2_2_AKE_SEND_HPRIME, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS,
-	  HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS },
-	{ HDCP_2_2_AKE_SEND_PAIRING_INFO, HDCP_2_2_PAIRING_TIMEOUT_MS, 0 },
-	{ HDCP_2_2_LC_INIT, 0, 0 },
-	{ HDCP_2_2_LC_SEND_LPRIME, HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS, 0 },
-	{ HDCP_2_2_SKE_SEND_EKS, 0, 0 },
-	{ HDCP_2_2_REP_SEND_RECVID_LIST, HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0 },
-	{ HDCP_2_2_REP_SEND_ACK, 0, 0 },
-	{ HDCP_2_2_REP_STREAM_MANAGE, 0, 0 },
-	{ HDCP_2_2_REP_STREAM_READY, HDCP_2_2_STREAM_READY_TIMEOUT_MS, 0 },
+static const struct hdcp2_hdmi_msg_timeout hdcp2_msg_timeout[] = {
+	{ HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, },
+	{ HDCP_2_2_AKE_SEND_PAIRING_INFO, HDCP_2_2_PAIRING_TIMEOUT_MS, },
+	{ HDCP_2_2_LC_SEND_LPRIME, HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS, },
+	{ HDCP_2_2_REP_SEND_RECVID_LIST, HDCP_2_2_RECVID_LIST_TIMEOUT_MS, },
+	{ HDCP_2_2_REP_STREAM_READY, HDCP_2_2_STREAM_READY_TIMEOUT_MS, },
 };
 
 static
@@ -1551,12 +1563,17 @@ static int get_hdcp2_msg_timeout(u8 msg_id, bool is_paired)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(hdcp2_msg_data); i++)
-		if (hdcp2_msg_data[i].msg_id == msg_id &&
-		    (msg_id != HDCP_2_2_AKE_SEND_HPRIME || is_paired))
-			return hdcp2_msg_data[i].timeout;
-		else if (hdcp2_msg_data[i].msg_id == msg_id)
-			return hdcp2_msg_data[i].timeout2;
+	if (msg_id == HDCP_2_2_AKE_SEND_HPRIME) {
+		if (is_paired)
+			return HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS;
+		else
+			return HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(hdcp2_msg_timeout); i++) {
+		if (hdcp2_msg_timeout[i].msg_id == msg_id)
+			return hdcp2_msg_timeout[i].timeout;
+	}
 
 	return -EINVAL;
 }
@@ -1720,9 +1737,9 @@ static void intel_hdmi_prepare(struct intel_encoder *encoder,
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
-	const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
 	u32 hdmi_val;
 
 	intel_dp_dual_mode_set_tmds_output(intel_hdmi, true);
@@ -1758,7 +1775,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
 				    enum pipe *pipe)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	intel_wakeref_t wakeref;
 	bool ret;
 
@@ -1777,7 +1794,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
 static void intel_hdmi_get_config(struct intel_encoder *encoder,
 				  struct intel_crtc_state *pipe_config)
 {
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 tmp, flags = 0;
@@ -1813,7 +1830,7 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder,
 	    tmp & HDMI_COLOR_RANGE_16_235)
 		pipe_config->limited_color_range = true;
 
-	pipe_config->base.adjusted_mode.flags |= flags;
+	pipe_config->hw.adjusted_mode.flags |= flags;
 
 	if ((tmp & SDVO_COLOR_FORMAT_MASK) == HDMI_COLOR_FORMAT_12bpc)
 		dotclock = pipe_config->port_clock * 2 / 3;
@@ -1823,7 +1840,7 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder,
 	if (pipe_config->pixel_multiplier)
 		dotclock /= pipe_config->pixel_multiplier;
 
-	pipe_config->base.adjusted_mode.crtc_clock = dotclock;
+	pipe_config->hw.adjusted_mode.crtc_clock = dotclock;
 
 	pipe_config->lane_count = 4;
 
@@ -1844,7 +1861,7 @@ static void intel_enable_hdmi_audio(struct intel_encoder *encoder,
 				    const struct intel_crtc_state *pipe_config,
 				    const struct drm_connector_state *conn_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 
 	WARN_ON(!pipe_config->has_hdmi_sink);
 	DRM_DEBUG_DRIVER("Enabling HDMI audio on pipe %c\n",
@@ -1858,7 +1875,7 @@ static void g4x_enable_hdmi(struct intel_encoder *encoder,
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	u32 temp;
 
 	temp = I915_READ(intel_hdmi->hdmi_reg);
@@ -1880,7 +1897,7 @@ static void ibx_enable_hdmi(struct intel_encoder *encoder,
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	u32 temp;
 
 	temp = I915_READ(intel_hdmi->hdmi_reg);
@@ -1930,8 +1947,8 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder,
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	enum pipe pipe = crtc->pipe;
 	u32 temp;
 
@@ -1991,10 +2008,10 @@ static void intel_disable_hdmi(struct intel_encoder *encoder,
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	struct intel_digital_port *intel_dig_port =
 		hdmi_to_dig_port(intel_hdmi);
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	u32 temp;
 
 	temp = I915_READ(intel_hdmi->hdmi_reg);
@@ -2144,7 +2161,7 @@ static enum drm_mode_status
 intel_hdmi_mode_valid(struct drm_connector *connector,
 		      struct drm_display_mode *mode)
 {
-	struct intel_hdmi *hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector));
 	struct drm_device *dev = intel_hdmi_to_dev(hdmi);
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	enum drm_mode_status status;
@@ -2184,20 +2201,22 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
 			status = hdmi_port_clock_valid(hdmi, clock * 5 / 4,
 						       true, force_dvi);
 	}
+	if (status != MODE_OK)
+		return status;
 
-	return status;
+	return intel_mode_valid_max_plane_size(dev_priv, mode);
 }
 
 static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state,
 				     int bpc)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(crtc_state->base.crtc->dev);
-	struct drm_atomic_state *state = crtc_state->base.state;
+		to_i915(crtc_state->uapi.crtc->dev);
+	struct drm_atomic_state *state = crtc_state->uapi.state;
 	struct drm_connector_state *connector_state;
 	struct drm_connector *connector;
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	int i;
 
 	if (HAS_GMCH(dev_priv))
@@ -2222,7 +2241,7 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state,
 	for_each_new_connector_in_state(state, connector, connector_state, i) {
 		const struct drm_display_info *info = &connector->display_info;
 
-		if (connector_state->crtc != crtc_state->base.crtc)
+		if (connector_state->crtc != crtc_state->uapi.crtc)
 			continue;
 
 		if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) {
@@ -2261,22 +2280,15 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state,
 
 static bool
 intel_hdmi_ycbcr420_config(struct drm_connector *connector,
-			   struct intel_crtc_state *config,
-			   int *clock_12bpc, int *clock_10bpc,
-			   int *clock_8bpc)
+			   struct intel_crtc_state *config)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(config->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(config->uapi.crtc);
 
 	if (!connector->ycbcr_420_allowed) {
 		DRM_ERROR("Platform doesn't support YCBCR420 output\n");
 		return false;
 	}
 
-	/* YCBCR420 TMDS rate requirement is half the pixel clock */
-	config->port_clock /= 2;
-	*clock_12bpc /= 2;
-	*clock_10bpc /= 2;
-	*clock_8bpc /= 2;
 	config->output_format = INTEL_OUTPUT_FORMAT_YCBCR420;
 
 	/* YCBCR 420 output conversion needs a scaler */
@@ -2291,22 +2303,117 @@ intel_hdmi_ycbcr420_config(struct drm_connector *connector,
 	return true;
 }
 
+static int intel_hdmi_port_clock(int clock, int bpc)
+{
+	/*
+	 * Need to adjust the port link by:
+	 *  1.5x for 12bpc
+	 *  1.25x for 10bpc
+	 */
+	return clock * bpc / 8;
+}
+
+static int intel_hdmi_compute_bpc(struct intel_encoder *encoder,
+				  struct intel_crtc_state *crtc_state,
+				  int clock, bool force_dvi)
+{
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+	int bpc;
+
+	for (bpc = 12; bpc >= 10; bpc -= 2) {
+		if (hdmi_deep_color_possible(crtc_state, bpc) &&
+		    hdmi_port_clock_valid(intel_hdmi,
+					  intel_hdmi_port_clock(clock, bpc),
+					  true, force_dvi) == MODE_OK)
+			return bpc;
+	}
+
+	return 8;
+}
+
+static int intel_hdmi_compute_clock(struct intel_encoder *encoder,
+				    struct intel_crtc_state *crtc_state,
+				    bool force_dvi)
+{
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->hw.adjusted_mode;
+	int bpc, clock = adjusted_mode->crtc_clock;
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
+		clock *= 2;
+
+	/* YCBCR420 TMDS rate requirement is half the pixel clock */
+	if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
+		clock /= 2;
+
+	bpc = intel_hdmi_compute_bpc(encoder, crtc_state,
+				     clock, force_dvi);
+
+	crtc_state->port_clock = intel_hdmi_port_clock(clock, bpc);
+
+	/*
+	 * pipe_bpp could already be below 8bpc due to
+	 * FDI bandwidth constraints. We shouldn't bump it
+	 * back up to 8bpc in that case.
+	 */
+	if (crtc_state->pipe_bpp > bpc * 3)
+		crtc_state->pipe_bpp = bpc * 3;
+
+	DRM_DEBUG_KMS("picking %d bpc for HDMI output (pipe bpp: %d)\n",
+		      bpc, crtc_state->pipe_bpp);
+
+	if (hdmi_port_clock_valid(intel_hdmi, crtc_state->port_clock,
+				  false, force_dvi) != MODE_OK) {
+		DRM_DEBUG_KMS("unsupported HDMI clock (%d kHz), rejecting mode\n",
+			      crtc_state->port_clock);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool intel_hdmi_limited_color_range(const struct intel_crtc_state *crtc_state,
+					   const struct drm_connector_state *conn_state)
+{
+	const struct intel_digital_connector_state *intel_conn_state =
+		to_intel_digital_connector_state(conn_state);
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->hw.adjusted_mode;
+
+	/*
+	 * Our YCbCr output is always limited range.
+	 * crtc_state->limited_color_range only applies to RGB,
+	 * and it must never be set for YCbCr or we risk setting
+	 * some conflicting bits in PIPECONF which will mess up
+	 * the colors on the monitor.
+	 */
+	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB)
+		return false;
+
+	if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) {
+		/* See CEA-861-E - 5.1 Default Encoding Parameters */
+		return crtc_state->has_hdmi_sink &&
+			drm_default_rgb_quant_range(adjusted_mode) ==
+			HDMI_QUANTIZATION_RANGE_LIMITED;
+	} else {
+		return intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED;
+	}
+}
+
 int intel_hdmi_compute_config(struct intel_encoder *encoder,
 			      struct intel_crtc_state *pipe_config,
 			      struct drm_connector_state *conn_state)
 {
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	struct drm_connector *connector = conn_state->connector;
 	struct drm_scdc *scdc = &connector->display_info.hdmi.scdc;
 	struct intel_digital_connector_state *intel_conn_state =
 		to_intel_digital_connector_state(conn_state);
-	int clock_8bpc = pipe_config->base.adjusted_mode.crtc_clock;
-	int clock_10bpc = clock_8bpc * 5 / 4;
-	int clock_12bpc = clock_8bpc * 3 / 2;
-	int desired_bpp;
 	bool force_dvi = intel_conn_state->force_audio == HDMI_AUDIO_OFF_DVI;
+	int ret;
 
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return -EINVAL;
@@ -2317,33 +2424,19 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 	if (pipe_config->has_hdmi_sink)
 		pipe_config->has_infoframe = true;
 
-	if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) {
-		/* See CEA-861-E - 5.1 Default Encoding Parameters */
-		pipe_config->limited_color_range =
-			pipe_config->has_hdmi_sink &&
-			drm_default_rgb_quant_range(adjusted_mode) ==
-			HDMI_QUANTIZATION_RANGE_LIMITED;
-	} else {
-		pipe_config->limited_color_range =
-			intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED;
-	}
-
-	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) {
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
 		pipe_config->pixel_multiplier = 2;
-		clock_8bpc *= 2;
-		clock_10bpc *= 2;
-		clock_12bpc *= 2;
-	}
 
 	if (drm_mode_is_420_only(&connector->display_info, adjusted_mode)) {
-		if (!intel_hdmi_ycbcr420_config(connector, pipe_config,
-						&clock_12bpc, &clock_10bpc,
-						&clock_8bpc)) {
+		if (!intel_hdmi_ycbcr420_config(connector, pipe_config)) {
 			DRM_ERROR("Can't support YCBCR420 output\n");
 			return -EINVAL;
 		}
 	}
 
+	pipe_config->limited_color_range =
+		intel_hdmi_limited_color_range(pipe_config, conn_state);
+
 	if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv))
 		pipe_config->has_pch_encoder = true;
 
@@ -2355,46 +2448,13 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 				intel_conn_state->force_audio == HDMI_AUDIO_ON;
 	}
 
-	/*
-	 * Note that g4x/vlv don't support 12bpc hdmi outputs. We also need
-	 * to check that the higher clock still fits within limits.
-	 */
-	if (hdmi_deep_color_possible(pipe_config, 12) &&
-	    hdmi_port_clock_valid(intel_hdmi, clock_12bpc,
-				  true, force_dvi) == MODE_OK) {
-		DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n");
-		desired_bpp = 12*3;
-
-		/* Need to adjust the port link by 1.5x for 12bpc. */
-		pipe_config->port_clock = clock_12bpc;
-	} else if (hdmi_deep_color_possible(pipe_config, 10) &&
-		   hdmi_port_clock_valid(intel_hdmi, clock_10bpc,
-					 true, force_dvi) == MODE_OK) {
-		DRM_DEBUG_KMS("picking bpc to 10 for HDMI output\n");
-		desired_bpp = 10 * 3;
-
-		/* Need to adjust the port link by 1.25x for 10bpc. */
-		pipe_config->port_clock = clock_10bpc;
-	} else {
-		DRM_DEBUG_KMS("picking bpc to 8 for HDMI output\n");
-		desired_bpp = 8*3;
-
-		pipe_config->port_clock = clock_8bpc;
-	}
-
-	if (!pipe_config->bw_constrained) {
-		DRM_DEBUG_KMS("forcing pipe bpp to %i for HDMI\n", desired_bpp);
-		pipe_config->pipe_bpp = desired_bpp;
-	}
-
-	if (hdmi_port_clock_valid(intel_hdmi, pipe_config->port_clock,
-				  false, force_dvi) != MODE_OK) {
-		DRM_DEBUG_KMS("unsupported HDMI clock, rejecting mode\n");
-		return -EINVAL;
-	}
+	ret = intel_hdmi_compute_clock(encoder, pipe_config, force_dvi);
+	if (ret)
+		return ret;
 
-	/* Set user selected PAR to incoming mode's member */
-	adjusted_mode->picture_aspect_ratio = conn_state->picture_aspect_ratio;
+	if (conn_state->picture_aspect_ratio)
+		adjusted_mode->picture_aspect_ratio =
+			conn_state->picture_aspect_ratio;
 
 	pipe_config->lane_count = 4;
 
@@ -2437,7 +2497,7 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 static void
 intel_hdmi_unset_edid(struct drm_connector *connector)
 {
-	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector));
 
 	intel_hdmi->has_hdmi_sink = false;
 	intel_hdmi->has_audio = false;
@@ -2453,7 +2513,7 @@ static void
 intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector, bool has_edid)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_hdmi *hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector));
 	enum port port = hdmi_to_dig_port(hdmi)->base.port;
 	struct i2c_adapter *adapter =
 		intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
@@ -2500,7 +2560,7 @@ static bool
 intel_hdmi_set_edid(struct drm_connector *connector)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector));
 	intel_wakeref_t wakeref;
 	struct edid *edid;
 	bool connected = false;
@@ -2541,7 +2601,7 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
 {
 	enum drm_connector_status status = connector_status_disconnected;
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector));
 	struct intel_encoder *encoder = &hdmi_to_dig_port(intel_hdmi)->base;
 	intel_wakeref_t wakeref;
 
@@ -2604,7 +2664,7 @@ static void intel_hdmi_pre_enable(struct intel_encoder *encoder,
 				  const struct drm_connector_state *conn_state)
 {
 	struct intel_digital_port *intel_dig_port =
-		enc_to_dig_port(&encoder->base);
+		enc_to_dig_port(encoder);
 
 	intel_hdmi_prepare(encoder, pipe_config);
 
@@ -2617,7 +2677,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder,
 				const struct intel_crtc_state *pipe_config,
 				const struct drm_connector_state *conn_state)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 
 	vlv_phy_pre_encoder_enable(encoder, pipe_config);
@@ -2687,7 +2747,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder,
 				const struct intel_crtc_state *pipe_config,
 				const struct drm_connector_state *conn_state)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
@@ -2713,7 +2773,7 @@ static struct i2c_adapter *
 intel_hdmi_get_i2c_adapter(struct drm_connector *connector)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector));
 
 	return intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus);
 }
@@ -2757,8 +2817,9 @@ intel_hdmi_connector_register(struct drm_connector *connector)
 
 static void intel_hdmi_destroy(struct drm_connector *connector)
 {
-	if (intel_attached_hdmi(connector)->cec_notifier)
-		cec_notifier_put(intel_attached_hdmi(connector)->cec_notifier);
+	struct cec_notifier *n = intel_attached_hdmi(to_intel_connector(connector))->cec_notifier;
+
+	cec_notifier_conn_unregister(n);
 
 	intel_connector_destroy(connector);
 }
@@ -2813,7 +2874,6 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c
 		intel_attach_colorspace_property(connector);
 
 	drm_connector_attach_content_type_property(connector);
-	connector->state->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
 
 	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
 		drm_object_attach_property(&connector->base,
@@ -2847,7 +2907,7 @@ bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
 				       bool scrambling)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	struct drm_scrambling *sink_scrambling =
 		&connector->display_info.hdmi.scdc.scrambling;
 	struct i2c_adapter *adapter =
@@ -3007,7 +3067,7 @@ static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv,
 
 	if (HAS_PCH_MCC(dev_priv))
 		ddc_pin = mcc_port_to_ddc_pin(dev_priv, port);
-	else if (HAS_PCH_TGP(dev_priv) || HAS_PCH_ICP(dev_priv))
+	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
 		ddc_pin = icl_port_to_ddc_pin(dev_priv, port);
 	else if (HAS_PCH_CNP(dev_priv))
 		ddc_pin = cnp_port_to_ddc_pin(dev_priv, port);
@@ -3072,18 +3132,29 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 	struct intel_encoder *intel_encoder = &intel_dig_port->base;
 	struct drm_device *dev = intel_encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i2c_adapter *ddc;
 	enum port port = intel_encoder->port;
+	struct cec_connector_info conn_info;
 
-	DRM_DEBUG_KMS("Adding HDMI connector on port %c\n",
-		      port_name(port));
+	DRM_DEBUG_KMS("Adding HDMI connector on [ENCODER:%d:%s]\n",
+		      intel_encoder->base.base.id, intel_encoder->base.name);
+
+	if (INTEL_GEN(dev_priv) < 12 && WARN_ON(port == PORT_A))
+		return;
 
 	if (WARN(intel_dig_port->max_lanes < 4,
-		 "Not enough lanes (%d) for HDMI on port %c\n",
-		 intel_dig_port->max_lanes, port_name(port)))
+		 "Not enough lanes (%d) for HDMI on [ENCODER:%d:%s]\n",
+		 intel_dig_port->max_lanes, intel_encoder->base.base.id,
+		 intel_encoder->base.name))
 		return;
 
-	drm_connector_init(dev, connector, &intel_hdmi_connector_funcs,
-			   DRM_MODE_CONNECTOR_HDMIA);
+	intel_hdmi->ddc_bus = intel_hdmi_ddc_pin(dev_priv, port);
+	ddc = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus);
+
+	drm_connector_init_with_ddc(dev, connector,
+				    &intel_hdmi_connector_funcs,
+				    DRM_MODE_CONNECTOR_HDMIA,
+				    ddc);
 	drm_connector_helper_add(connector, &intel_hdmi_connector_helper_funcs);
 
 	connector->interlace_allowed = 1;
@@ -3093,10 +3164,6 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
 		connector->ycbcr_420_allowed = true;
 
-	intel_hdmi->ddc_bus = intel_hdmi_ddc_pin(dev_priv, port);
-
-	if (WARN_ON(port == PORT_A))
-		return;
 	intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port);
 
 	if (HAS_DDI(dev_priv))
@@ -3125,8 +3192,11 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 		I915_WRITE(PEG_BAND_GAP_DATA, (temp & ~0xf) | 0xd);
 	}
 
-	intel_hdmi->cec_notifier = cec_notifier_get_conn(dev->dev,
-							 port_identifier(port));
+	cec_fill_conn_info_from_drm(&conn_info, connector);
+
+	intel_hdmi->cec_notifier =
+		cec_notifier_conn_register(dev->dev, port_identifier(port),
+					   &conn_info);
 	if (!intel_hdmi->cec_notifier)
 		DRM_DEBUG_KMS("CEC notifier get failed\n");
 }
@@ -3216,11 +3286,11 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv,
 	intel_encoder->port = port;
 	if (IS_CHERRYVIEW(dev_priv)) {
 		if (port == PORT_D)
-			intel_encoder->crtc_mask = 1 << 2;
+			intel_encoder->pipe_mask = BIT(PIPE_C);
 		else
-			intel_encoder->crtc_mask = (1 << 0) | (1 << 1);
+			intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B);
 	} else {
-		intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
+		intel_encoder->pipe_mask = ~0;
 	}
 	intel_encoder->cloneable = 1 << INTEL_OUTPUT_ANALOG;
 	/*
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h
index 106c2e0bc3c9..d3659d0b408b 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.h
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.h
@@ -23,12 +23,13 @@ struct intel_crtc_state;
 struct intel_hdmi;
 struct drm_connector_state;
 union hdmi_infoframe;
+enum port;
 
 void intel_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg,
 		     enum port port);
 void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 			       struct intel_connector *intel_connector);
-struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder);
+struct intel_hdmi *enc_to_intel_hdmi(struct intel_encoder *encoder);
 int intel_hdmi_compute_config(struct intel_encoder *encoder,
 			      struct intel_crtc_state *pipe_config,
 			      struct drm_connector_state *conn_state);
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 56be20f6f47e..99d3a3c7989e 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -302,7 +302,7 @@ intel_encoder_hotplug(struct intel_encoder *encoder,
 static bool intel_encoder_has_hpd_pulse(struct intel_encoder *encoder)
 {
 	return intel_encoder_is_dig_port(encoder) &&
-		enc_to_dig_port(&encoder->base)->hpd_pulse != NULL;
+		enc_to_dig_port(encoder)->hpd_pulse != NULL;
 }
 
 static void i915_digport_work_func(struct work_struct *work)
@@ -335,7 +335,7 @@ static void i915_digport_work_func(struct work_struct *work)
 		if (!long_hpd && !short_hpd)
 			continue;
 
-		dig_port = enc_to_dig_port(&encoder->base);
+		dig_port = enc_to_dig_port(encoder);
 
 		ret = dig_port->hpd_pulse(dig_port, long_hpd);
 		if (ret == IRQ_NONE) {
@@ -481,7 +481,8 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 
 		long_hpd = long_mask & BIT(pin);
 
-		DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port),
+		DRM_DEBUG_DRIVER("digital hpd on [ENCODER:%d:%s] - %s\n",
+				 encoder->base.base.id, encoder->base.name,
 				 long_hpd ? "long" : "short");
 		queue_dig = true;
 
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.h b/drivers/gpu/drm/i915/display/intel_hotplug.h
index b0cd447b7fbc..087b5f57b321 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.h
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.h
@@ -13,6 +13,7 @@
 struct drm_i915_private;
 struct intel_connector;
 struct intel_encoder;
+enum port;
 
 void intel_hpd_poll_init(struct drm_i915_private *dev_priv);
 enum intel_hotplug_state intel_encoder_hotplug(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
index b19800b58442..0b67f7887cd0 100644
--- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
@@ -114,7 +114,7 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
 	pinfo.size_data = sizeof(*pdata);
 	pinfo.dma_mask = DMA_BIT_MASK(32);
 
-	pdata->num_pipes = INTEL_INFO(dev_priv)->num_pipes;
+	pdata->num_pipes = INTEL_NUM_PIPES(dev_priv);
 	pdata->num_ports = IS_CHERRYVIEW(dev_priv) ? 3 : 2; /* B,C,D or B,C */
 	pdata->port[0].pipe = -1;
 	pdata->port[1].pipe = -1;
diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c
index f8f1308643a9..d807c5648c87 100644
--- a/drivers/gpu/drm/i915/display/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/display/intel_lspcon.c
@@ -189,7 +189,7 @@ void lspcon_ycbcr420_config(struct drm_connector *connector,
 {
 	const struct drm_display_info *info = &connector->display_info;
 	const struct drm_display_mode *adjusted_mode =
-					&crtc_state->base.adjusted_mode;
+					&crtc_state->hw.adjusted_mode;
 
 	if (drm_mode_is_420_only(info, adjusted_mode) &&
 	    connector->ycbcr_420_allowed) {
@@ -434,8 +434,8 @@ void lspcon_write_infoframe(struct intel_encoder *encoder,
 			    const void *frame, ssize_t len)
 {
 	bool ret;
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
-	struct intel_lspcon *lspcon = enc_to_intel_lspcon(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder);
 
 	/* LSPCON only needs AVI IF */
 	if (type != HDMI_INFOFRAME_TYPE_AVI)
@@ -472,10 +472,10 @@ void lspcon_set_infoframes(struct intel_encoder *encoder,
 	ssize_t ret;
 	union hdmi_infoframe frame;
 	u8 buf[VIDEO_DIP_DATA_SIZE];
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct intel_lspcon *lspcon = &dig_port->lspcon;
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 
 	if (!lspcon->active) {
 		DRM_ERROR("Writing infoframes while LSPCON disabled ?\n");
@@ -522,7 +522,7 @@ u32 lspcon_infoframes_enabled(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *pipe_config)
 {
 	/* FIXME actually read this from the hw */
-	return enc_to_intel_lspcon(&encoder->base)->active;
+	return enc_to_intel_lspcon(encoder)->active;
 }
 
 void lspcon_resume(struct intel_lspcon *lspcon)
diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
index b7c459a8931c..10696bb99dcf 100644
--- a/drivers/gpu/drm/i915/display/intel_lvds.c
+++ b/drivers/gpu/drm/i915/display/intel_lvds.c
@@ -135,7 +135,7 @@ static void intel_lvds_get_config(struct intel_encoder *encoder,
 	else
 		flags |= DRM_MODE_FLAG_PVSYNC;
 
-	pipe_config->base.adjusted_mode.flags |= flags;
+	pipe_config->hw.adjusted_mode.flags |= flags;
 
 	if (INTEL_GEN(dev_priv) < 5)
 		pipe_config->gmch_pfit.lvds_border_bits =
@@ -148,7 +148,7 @@ static void intel_lvds_get_config(struct intel_encoder *encoder,
 		pipe_config->gmch_pfit.control |= tmp & PANEL_8TO6_DITHER_ENABLE;
 	}
 
-	pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock;
+	pipe_config->hw.adjusted_mode.crtc_clock = pipe_config->port_clock;
 }
 
 static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv,
@@ -230,9 +230,9 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder,
 {
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
-	const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
-	int pipe = crtc->pipe;
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
+	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
+	enum pipe pipe = crtc->pipe;
 	u32 temp;
 
 	if (HAS_PCH_SPLIT(dev_priv)) {
@@ -392,8 +392,8 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder,
 		to_lvds_encoder(&intel_encoder->base);
 	struct intel_connector *intel_connector =
 		lvds_encoder->attached_connector;
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
-	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
+	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	unsigned int lvds_bpp;
 
 	/* Should never happen!! */
@@ -899,12 +899,10 @@ void intel_lvds_init(struct drm_i915_private *dev_priv)
 	intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER;
 	intel_encoder->port = PORT_NONE;
 	intel_encoder->cloneable = 0;
-	if (HAS_PCH_SPLIT(dev_priv))
-		intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
-	else if (IS_GEN(dev_priv, 4))
-		intel_encoder->crtc_mask = (1 << 0) | (1 << 1);
+	if (INTEL_GEN(dev_priv) < 4)
+		intel_encoder->pipe_mask = BIT(PIPE_B);
 	else
-		intel_encoder->crtc_mask = (1 << 1);
+		intel_encoder->pipe_mask = ~0;
 
 	drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs);
 	connector->display_info.subpixel_order = SubPixelHorizontalRGB;
diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c
index 969ade623691..e59b4992ba1b 100644
--- a/drivers/gpu/drm/i915/display/intel_opregion.c
+++ b/drivers/gpu/drm/i915/display/intel_opregion.c
@@ -941,6 +941,13 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 	if (mboxes & MBOX_ACPI) {
 		DRM_DEBUG_DRIVER("Public ACPI methods supported\n");
 		opregion->acpi = base + OPREGION_ACPI_OFFSET;
+		/*
+		 * Indicate we handle monitor hotplug events ourselves so we do
+		 * not need ACPI notifications for them. Disabling these avoids
+		 * triggering the AML code doing the notifation, which may be
+		 * broken as Windows also seems to disable these.
+		 */
+		opregion->acpi->chpd = 1;
 	}
 
 	if (mboxes & MBOX_SWSCI) {
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 29edfc343716..e40c3a0e2cd7 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -30,6 +30,7 @@
 #include <drm/i915_drm.h>
 
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_reg.h"
@@ -230,7 +231,7 @@ alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *))
 	if (IS_ERR(rq))
 		return rq;
 
-	err = i915_active_ref(&overlay->last_flip, rq->timeline, rq);
+	err = i915_active_add_request(&overlay->last_flip, rq);
 	if (err) {
 		i915_request_add(rq);
 		return ERR_PTR(err);
@@ -278,12 +279,21 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay,
 				       struct i915_vma *vma)
 {
 	enum pipe pipe = overlay->crtc->pipe;
+	struct intel_frontbuffer *from = NULL, *to = NULL;
 
 	WARN_ON(overlay->old_vma);
 
-	intel_frontbuffer_track(overlay->vma ? overlay->vma->obj->frontbuffer : NULL,
-				vma ? vma->obj->frontbuffer : NULL,
-				INTEL_FRONTBUFFER_OVERLAY(pipe));
+	if (overlay->vma)
+		from = intel_frontbuffer_get(overlay->vma->obj);
+	if (vma)
+		to = intel_frontbuffer_get(vma->obj);
+
+	intel_frontbuffer_track(from, to, INTEL_FRONTBUFFER_OVERLAY(pipe));
+
+	if (to)
+		intel_frontbuffer_put(to);
+	if (from)
+		intel_frontbuffer_put(from);
 
 	intel_frontbuffer_flip_prepare(overlay->i915,
 				       INTEL_FRONTBUFFER_OVERLAY(pipe));
@@ -439,8 +449,6 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 	struct i915_request *rq;
 	u32 *cs;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	/*
 	 * Only wait if there is actually an old frame to release to
 	 * guarantee forward progress.
@@ -669,8 +677,8 @@ static void update_colorkey(struct intel_overlay *overlay,
 	if (overlay->color_key_enabled)
 		flags |= DST_KEY_ENABLE;
 
-	if (state->base.visible)
-		format = state->base.fb->format->format;
+	if (state->uapi.visible)
+		format = state->hw.fb->format->format;
 
 	switch (format) {
 	case DRM_FORMAT_C8:
@@ -751,7 +759,6 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	struct i915_vma *vma;
 	int ret, tmp_width;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
 
 	ret = intel_overlay_release_old_vid(overlay);
@@ -760,15 +767,13 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
-	i915_gem_object_lock(new_bo);
 	vma = i915_gem_object_pin_to_display_plane(new_bo,
 						   0, NULL, PIN_MAPPABLE);
-	i915_gem_object_unlock(new_bo);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out_pin_section;
 	}
-	intel_frontbuffer_flush(new_bo->frontbuffer, ORIGIN_DIRTYFB);
+	i915_gem_object_flush_frontbuffer(new_bo, ORIGIN_DIRTYFB);
 
 	if (!overlay->active) {
 		u32 oconfig;
@@ -852,7 +857,6 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
 	struct drm_i915_private *dev_priv = overlay->i915;
 	int ret;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
 
 	ret = intel_overlay_recover_from_interrupt(overlay);
@@ -1068,11 +1072,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
 
 	if (!(params->flags & I915_OVERLAY_ENABLE)) {
 		drm_modeset_lock_all(dev);
-		mutex_lock(&dev->struct_mutex);
-
 		ret = intel_overlay_switch_off(overlay);
-
-		mutex_unlock(&dev->struct_mutex);
 		drm_modeset_unlock_all(dev);
 
 		return ret;
@@ -1088,7 +1088,6 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 
 	drm_modeset_lock_all(dev);
-	mutex_lock(&dev->struct_mutex);
 
 	if (i915_gem_object_is_tiled(new_bo)) {
 		DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n");
@@ -1152,14 +1151,12 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
 	if (ret != 0)
 		goto out_unlock;
 
-	mutex_unlock(&dev->struct_mutex);
 	drm_modeset_unlock_all(dev);
 	i915_gem_object_put(new_bo);
 
 	return 0;
 
 out_unlock:
-	mutex_unlock(&dev->struct_mutex);
 	drm_modeset_unlock_all(dev);
 	i915_gem_object_put(new_bo);
 
@@ -1233,7 +1230,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
 	}
 
 	drm_modeset_lock_all(dev);
-	mutex_lock(&dev->struct_mutex);
 
 	ret = -EINVAL;
 	if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) {
@@ -1290,7 +1286,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
 
 	ret = 0;
 out_unlock:
-	mutex_unlock(&dev->struct_mutex);
 	drm_modeset_unlock_all(dev);
 
 	return ret;
@@ -1303,15 +1298,11 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
 	struct i915_vma *vma;
 	int err;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	obj = i915_gem_object_create_stolen(i915, PAGE_SIZE);
-	if (obj == NULL)
+	if (IS_ERR(obj))
 		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto err_unlock;
-	}
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
 	if (IS_ERR(vma)) {
@@ -1332,25 +1323,24 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
 	}
 
 	overlay->reg_bo = obj;
-	mutex_unlock(&i915->drm.struct_mutex);
 	return 0;
 
 err_put_bo:
 	i915_gem_object_put(obj);
-err_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
 void intel_overlay_setup(struct drm_i915_private *dev_priv)
 {
 	struct intel_overlay *overlay;
+	struct intel_engine_cs *engine;
 	int ret;
 
 	if (!HAS_OVERLAY(dev_priv))
 		return;
 
-	if (!HAS_ENGINE(dev_priv, RCS0))
+	engine = dev_priv->engine[RCS0];
+	if (!engine || !engine->kernel_context)
 		return;
 
 	overlay = kzalloc(sizeof(*overlay), GFP_KERNEL);
@@ -1358,7 +1348,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 		return;
 
 	overlay->i915 = dev_priv;
-	overlay->context = dev_priv->engine[RCS0]->kernel_context;
+	overlay->context = engine->kernel_context;
 	GEM_BUG_ON(!overlay->context);
 
 	overlay->color_key = 0x0101fe;
@@ -1367,8 +1357,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 	overlay->contrast = 75;
 	overlay->saturation = 146;
 
-	i915_active_init(dev_priv,
-			 &overlay->last_flip,
+	i915_active_init(&overlay->last_flip,
 			 NULL, intel_overlay_last_flip_retire);
 
 	ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c
index bc14e9c0285a..7b3ec6eb3382 100644
--- a/drivers/gpu/drm/i915/display/intel_panel.c
+++ b/drivers/gpu/drm/i915/display/intel_panel.c
@@ -178,7 +178,7 @@ intel_pch_panel_fitting(struct intel_crtc *intel_crtc,
 			struct intel_crtc_state *pipe_config,
 			int fitting_mode)
 {
-	const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	int x = 0, y = 0, width = 0, height = 0;
 
 	/* Native modes don't need fitting */
@@ -300,7 +300,7 @@ static inline u32 panel_fitter_scaling(u32 source, u32 target)
 static void i965_scale_aspect(struct intel_crtc_state *pipe_config,
 			      u32 *pfit_control)
 {
-	const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	u32 scaled_width = adjusted_mode->crtc_hdisplay *
 		pipe_config->pipe_src_h;
 	u32 scaled_height = pipe_config->pipe_src_w *
@@ -321,7 +321,7 @@ static void i9xx_scale_aspect(struct intel_crtc_state *pipe_config,
 			      u32 *pfit_control, u32 *pfit_pgm_ratios,
 			      u32 *border)
 {
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	u32 scaled_width = adjusted_mode->crtc_hdisplay *
 		pipe_config->pipe_src_h;
 	u32 scaled_height = pipe_config->pipe_src_w *
@@ -380,7 +380,7 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc,
 {
 	struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
 	u32 pfit_control = 0, pfit_pgm_ratios = 0, border = 0;
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 
 	/* Native modes don't need fitting */
 	if (adjusted_mode->crtc_hdisplay == pipe_config->pipe_src_w &&
@@ -1047,7 +1047,7 @@ static void vlv_enable_backlight(const struct intel_crtc_state *crtc_state,
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_panel *panel = &connector->panel;
-	enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe;
+	enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
 	u32 ctl, ctl2;
 
 	ctl2 = I915_READ(VLV_BLC_PWM_CTL2(pipe));
@@ -1077,7 +1077,7 @@ static void bxt_enable_backlight(const struct intel_crtc_state *crtc_state,
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_panel *panel = &connector->panel;
-	enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe;
+	enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
 	u32 pwm_ctl, val;
 
 	/* Controller 1 uses the utility pin. */
@@ -1189,7 +1189,7 @@ void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state,
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_panel *panel = &connector->panel;
-	enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe;
+	enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
 
 	if (!panel->backlight.present)
 		return;
@@ -1840,13 +1840,22 @@ static int pwm_setup_backlight(struct intel_connector *connector,
 			       enum pipe pipe)
 {
 	struct drm_device *dev = connector->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_panel *panel = &connector->panel;
+	const char *desc;
 	int retval;
 
-	/* Get the PWM chip for backlight control */
-	panel->backlight.pwm = pwm_get(dev->dev, "pwm_backlight");
+	/* Get the right PWM chip for DSI backlight according to VBT */
+	if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) {
+		panel->backlight.pwm = pwm_get(dev->dev, "pwm_pmic_backlight");
+		desc = "PMIC";
+	} else {
+		panel->backlight.pwm = pwm_get(dev->dev, "pwm_soc_backlight");
+		desc = "SoC";
+	}
+
 	if (IS_ERR(panel->backlight.pwm)) {
-		DRM_ERROR("Failed to own the pwm chip\n");
+		DRM_ERROR("Failed to get the %s PWM chip\n", desc);
 		panel->backlight.pwm = NULL;
 		return -ENODEV;
 	}
@@ -1873,6 +1882,7 @@ static int pwm_setup_backlight(struct intel_connector *connector,
 				 CRC_PMIC_PWM_PERIOD_NS);
 	panel->backlight.enabled = panel->backlight.level != 0;
 
+	DRM_INFO("Using %s PWM for LCD backlight control\n", desc);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_pipe_crc.c b/drivers/gpu/drm/i915/display/intel_pipe_crc.c
index 6260a2082719..520408e83681 100644
--- a/drivers/gpu/drm/i915/display/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/display/intel_pipe_crc.c
@@ -98,7 +98,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_i915_private *dev_priv,
 			break;
 		case INTEL_OUTPUT_DP:
 		case INTEL_OUTPUT_EDP:
-			dig_port = enc_to_dig_port(&encoder->base);
+			dig_port = enc_to_dig_port(encoder);
 			switch (dig_port->base.port) {
 			case PORT_B:
 				*source = INTEL_PIPE_CRC_SOURCE_DP_B;
@@ -309,13 +309,13 @@ retry:
 		goto put_state;
 	}
 
-	pipe_config->base.mode_changed = pipe_config->has_psr;
+	pipe_config->uapi.mode_changed = pipe_config->has_psr;
 	pipe_config->crc_enabled = enable;
 
 	if (IS_HASWELL(dev_priv) &&
-	    pipe_config->base.active && crtc->pipe == PIPE_A &&
+	    pipe_config->hw.active && crtc->pipe == PIPE_A &&
 	    pipe_config->cpu_transcoder == TRANSCODER_EDP)
-		pipe_config->base.mode_changed = true;
+		pipe_config->uapi.mode_changed = true;
 
 	ret = drm_atomic_commit(state);
 
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 3bfb720560c2..89c9cf5f38d2 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -26,6 +26,7 @@
 #include "display/intel_dp.h"
 
 #include "i915_drv.h"
+#include "intel_atomic.h"
 #include "intel_display_types.h"
 #include "intel_psr.h"
 #include "intel_sprite.h"
@@ -76,7 +77,7 @@ static bool intel_psr2_enabled(struct drm_i915_private *dev_priv,
 			       const struct intel_crtc_state *crtc_state)
 {
 	/* Cannot enable DSC and PSR2 simultaneously */
-	WARN_ON(crtc_state->dsc_params.compression_enable &&
+	WARN_ON(crtc_state->dsc.compression_enable &&
 		crtc_state->has_psr2);
 
 	switch (dev_priv->psr.debug & I915_PSR_DEBUG_MODE_MASK) {
@@ -88,48 +89,35 @@ static bool intel_psr2_enabled(struct drm_i915_private *dev_priv,
 	}
 }
 
-static int edp_psr_shift(enum transcoder cpu_transcoder)
+static void psr_irq_control(struct drm_i915_private *dev_priv)
 {
-	switch (cpu_transcoder) {
-	case TRANSCODER_A:
-		return EDP_PSR_TRANSCODER_A_SHIFT;
-	case TRANSCODER_B:
-		return EDP_PSR_TRANSCODER_B_SHIFT;
-	case TRANSCODER_C:
-		return EDP_PSR_TRANSCODER_C_SHIFT;
-	default:
-		MISSING_CASE(cpu_transcoder);
-		/* fallthrough */
-	case TRANSCODER_EDP:
-		return EDP_PSR_TRANSCODER_EDP_SHIFT;
-	}
-}
+	enum transcoder trans_shift;
+	u32 mask, val;
+	i915_reg_t imr_reg;
 
-void intel_psr_irq_control(struct drm_i915_private *dev_priv, u32 debug)
-{
-	u32 debug_mask, mask;
-	enum transcoder cpu_transcoder;
-	u32 transcoders = BIT(TRANSCODER_EDP);
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		transcoders |= BIT(TRANSCODER_A) |
-			       BIT(TRANSCODER_B) |
-			       BIT(TRANSCODER_C);
-
-	debug_mask = 0;
-	mask = 0;
-	for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) {
-		int shift = edp_psr_shift(cpu_transcoder);
-
-		mask |= EDP_PSR_ERROR(shift);
-		debug_mask |= EDP_PSR_POST_EXIT(shift) |
-			      EDP_PSR_PRE_ENTRY(shift);
+	/*
+	 * gen12+ has registers relative to transcoder and one per transcoder
+	 * using the same bit definition: handle it as TRANSCODER_EDP to force
+	 * 0 shift in bit definition
+	 */
+	if (INTEL_GEN(dev_priv) >= 12) {
+		trans_shift = 0;
+		imr_reg = TRANS_PSR_IMR(dev_priv->psr.transcoder);
+	} else {
+		trans_shift = dev_priv->psr.transcoder;
+		imr_reg = EDP_PSR_IMR;
 	}
 
-	if (debug & I915_PSR_DEBUG_IRQ)
-		mask |= debug_mask;
+	mask = EDP_PSR_ERROR(trans_shift);
+	if (dev_priv->psr.debug & I915_PSR_DEBUG_IRQ)
+		mask |= EDP_PSR_POST_EXIT(trans_shift) |
+			EDP_PSR_PRE_ENTRY(trans_shift);
 
-	I915_WRITE(EDP_PSR_IMR, ~mask);
+	/* Warning: it is masking/setting reserved bits too */
+	val = I915_READ(imr_reg);
+	val &= ~EDP_PSR_TRANS_MASK(trans_shift);
+	val |= ~mask;
+	I915_WRITE(imr_reg, val);
 }
 
 static void psr_event_print(u32 val, bool psr2_enabled)
@@ -171,60 +159,58 @@ static void psr_event_print(u32 val, bool psr2_enabled)
 
 void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir)
 {
-	u32 transcoders = BIT(TRANSCODER_EDP);
-	enum transcoder cpu_transcoder;
+	enum transcoder cpu_transcoder = dev_priv->psr.transcoder;
+	enum transcoder trans_shift;
+	i915_reg_t imr_reg;
 	ktime_t time_ns =  ktime_get();
-	u32 mask = 0;
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		transcoders |= BIT(TRANSCODER_A) |
-			       BIT(TRANSCODER_B) |
-			       BIT(TRANSCODER_C);
 
-	for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) {
-		int shift = edp_psr_shift(cpu_transcoder);
+	if (INTEL_GEN(dev_priv) >= 12) {
+		trans_shift = 0;
+		imr_reg = TRANS_PSR_IMR(dev_priv->psr.transcoder);
+	} else {
+		trans_shift = dev_priv->psr.transcoder;
+		imr_reg = EDP_PSR_IMR;
+	}
 
-		if (psr_iir & EDP_PSR_ERROR(shift)) {
-			DRM_WARN("[transcoder %s] PSR aux error\n",
-				 transcoder_name(cpu_transcoder));
+	if (psr_iir & EDP_PSR_PRE_ENTRY(trans_shift)) {
+		dev_priv->psr.last_entry_attempt = time_ns;
+		DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n",
+			      transcoder_name(cpu_transcoder));
+	}
 
-			dev_priv->psr.irq_aux_error = true;
+	if (psr_iir & EDP_PSR_POST_EXIT(trans_shift)) {
+		dev_priv->psr.last_exit = time_ns;
+		DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n",
+			      transcoder_name(cpu_transcoder));
 
-			/*
-			 * If this interruption is not masked it will keep
-			 * interrupting so fast that it prevents the scheduled
-			 * work to run.
-			 * Also after a PSR error, we don't want to arm PSR
-			 * again so we don't care about unmask the interruption
-			 * or unset irq_aux_error.
-			 */
-			mask |= EDP_PSR_ERROR(shift);
-		}
+		if (INTEL_GEN(dev_priv) >= 9) {
+			u32 val = I915_READ(PSR_EVENT(cpu_transcoder));
+			bool psr2_enabled = dev_priv->psr.psr2_enabled;
 
-		if (psr_iir & EDP_PSR_PRE_ENTRY(shift)) {
-			dev_priv->psr.last_entry_attempt = time_ns;
-			DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n",
-				      transcoder_name(cpu_transcoder));
+			I915_WRITE(PSR_EVENT(cpu_transcoder), val);
+			psr_event_print(val, psr2_enabled);
 		}
+	}
 
-		if (psr_iir & EDP_PSR_POST_EXIT(shift)) {
-			dev_priv->psr.last_exit = time_ns;
-			DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n",
-				      transcoder_name(cpu_transcoder));
+	if (psr_iir & EDP_PSR_ERROR(trans_shift)) {
+		u32 val;
 
-			if (INTEL_GEN(dev_priv) >= 9) {
-				u32 val = I915_READ(PSR_EVENT(cpu_transcoder));
-				bool psr2_enabled = dev_priv->psr.psr2_enabled;
+		DRM_WARN("[transcoder %s] PSR aux error\n",
+			 transcoder_name(cpu_transcoder));
 
-				I915_WRITE(PSR_EVENT(cpu_transcoder), val);
-				psr_event_print(val, psr2_enabled);
-			}
-		}
-	}
+		dev_priv->psr.irq_aux_error = true;
 
-	if (mask) {
-		mask |= I915_READ(EDP_PSR_IMR);
-		I915_WRITE(EDP_PSR_IMR, mask);
+		/*
+		 * If this interruption is not masked it will keep
+		 * interrupting so fast that it prevents the scheduled
+		 * work to run.
+		 * Also after a PSR error, we don't want to arm PSR
+		 * again so we don't care about unmask the interruption
+		 * or unset irq_aux_error.
+		 */
+		val = I915_READ(imr_reg);
+		val |= EDP_PSR_ERROR(trans_shift);
+		I915_WRITE(imr_reg, val);
 
 		schedule_work(&dev_priv->psr.work);
 	}
@@ -283,6 +269,11 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp)
 	struct drm_i915_private *dev_priv =
 		to_i915(dp_to_dig_port(intel_dp)->base.base.dev);
 
+	if (dev_priv->psr.dp) {
+		DRM_WARN("More than one eDP panel found, PSR support should be extended\n");
+		return;
+	}
+
 	drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd,
 			 sizeof(intel_dp->psr_dpcd));
 
@@ -305,7 +296,6 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp)
 	dev_priv->psr.sink_sync_latency =
 		intel_dp_get_sink_sync_latency(intel_dp);
 
-	WARN_ON(dev_priv->psr.dp);
 	dev_priv->psr.dp = intel_dp;
 
 	if (INTEL_GEN(dev_priv) >= 9 &&
@@ -390,7 +380,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp)
 
 	BUILD_BUG_ON(sizeof(aux_msg) > 20);
 	for (i = 0; i < sizeof(aux_msg); i += 4)
-		I915_WRITE(EDP_PSR_AUX_DATA(i >> 2),
+		I915_WRITE(EDP_PSR_AUX_DATA(dev_priv->psr.transcoder, i >> 2),
 			   intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i));
 
 	aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, 0);
@@ -401,7 +391,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp)
 
 	/* Select only valid bits for SRD_AUX_CTL */
 	aux_ctl &= psr_aux_mask;
-	I915_WRITE(EDP_PSR_AUX_CTL, aux_ctl);
+	I915_WRITE(EDP_PSR_AUX_CTL(dev_priv->psr.transcoder), aux_ctl);
 }
 
 static void intel_psr_enable_sink(struct intel_dp *intel_dp)
@@ -412,7 +402,9 @@ static void intel_psr_enable_sink(struct intel_dp *intel_dp)
 	/* Enable ALPM at sink for psr2 */
 	if (dev_priv->psr.psr2_enabled) {
 		drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG,
-				   DP_ALPM_ENABLE);
+				   DP_ALPM_ENABLE |
+				   DP_ALPM_LOCK_ERROR_IRQ_HPD_ENABLE);
+
 		dpcd_val |= DP_PSR_ENABLE_PSR2 | DP_PSR_IRQ_HPD_WITH_CRC_ERRORS;
 	} else {
 		if (dev_priv->psr.link_standby)
@@ -491,8 +483,9 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp)
 	if (INTEL_GEN(dev_priv) >= 8)
 		val |= EDP_PSR_CRC_ENABLE;
 
-	val |= I915_READ(EDP_PSR_CTL) & EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK;
-	I915_WRITE(EDP_PSR_CTL, val);
+	val |= (I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)) &
+		EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK);
+	I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), val);
 }
 
 static void hsw_activate_psr2(struct intel_dp *intel_dp)
@@ -528,38 +521,128 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
 	 * PSR2 HW is incorrectly using EDP_PSR_TP1_TP3_SEL and BSpec is
 	 * recommending keep this bit unset while PSR2 is enabled.
 	 */
-	I915_WRITE(EDP_PSR_CTL, 0);
+	I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), 0);
+
+	I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val);
+}
+
+static bool
+transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder trans)
+{
+	if (INTEL_GEN(dev_priv) < 9)
+		return false;
+	else if (INTEL_GEN(dev_priv) >= 12)
+		return trans == TRANSCODER_A;
+	else
+		return trans == TRANSCODER_EDP;
+}
+
+static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate)
+{
+	if (!cstate || !cstate->hw.active)
+		return 0;
+
+	return DIV_ROUND_UP(1000 * 1000,
+			    drm_mode_vrefresh(&cstate->hw.adjusted_mode));
+}
+
+static void psr2_program_idle_frames(struct drm_i915_private *dev_priv,
+				     u32 idle_frames)
+{
+	u32 val;
+
+	idle_frames <<=  EDP_PSR2_IDLE_FRAME_SHIFT;
+	val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder));
+	val &= ~EDP_PSR2_IDLE_FRAME_MASK;
+	val |= idle_frames;
+	I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val);
+}
+
+static void tgl_psr2_enable_dc3co(struct drm_i915_private *dev_priv)
+{
+	psr2_program_idle_frames(dev_priv, 0);
+	intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO);
+}
+
+static void tgl_psr2_disable_dc3co(struct drm_i915_private *dev_priv)
+{
+	int idle_frames;
+
+	intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
+	/*
+	 * Restore PSR2 idle frame let's use 6 as the minimum to cover all known
+	 * cases including the off-by-one issue that HW has in some cases.
+	 */
+	idle_frames = max(6, dev_priv->vbt.psr.idle_frames);
+	idle_frames = max(idle_frames, dev_priv->psr.sink_sync_latency + 1);
+	psr2_program_idle_frames(dev_priv, idle_frames);
+}
+
+static void tgl_dc5_idle_thread(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), psr.idle_work.work);
+
+	mutex_lock(&dev_priv->psr.lock);
+	/* If delayed work is pending, it is not idle */
+	if (delayed_work_pending(&dev_priv->psr.idle_work))
+		goto unlock;
+
+	DRM_DEBUG_KMS("DC5/6 idle thread\n");
+	tgl_psr2_disable_dc3co(dev_priv);
+unlock:
+	mutex_unlock(&dev_priv->psr.lock);
+}
+
+static void tgl_disallow_dc3co_on_psr2_exit(struct drm_i915_private *dev_priv)
+{
+	if (!dev_priv->psr.dc3co_enabled)
+		return;
 
-	I915_WRITE(EDP_PSR2_CTL, val);
+	cancel_delayed_work(&dev_priv->psr.idle_work);
+	/* Before PSR2 exit disallow dc3co*/
+	tgl_psr2_disable_dc3co(dev_priv);
 }
 
 static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
 				    struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	int crtc_hdisplay = crtc_state->base.adjusted_mode.crtc_hdisplay;
-	int crtc_vdisplay = crtc_state->base.adjusted_mode.crtc_vdisplay;
-	int psr_max_h = 0, psr_max_v = 0;
+	int crtc_hdisplay = crtc_state->hw.adjusted_mode.crtc_hdisplay;
+	int crtc_vdisplay = crtc_state->hw.adjusted_mode.crtc_vdisplay;
+	int psr_max_h = 0, psr_max_v = 0, max_bpp = 0;
 
 	if (!dev_priv->psr.sink_psr2_support)
 		return false;
 
+	if (!transcoder_has_psr2(dev_priv, crtc_state->cpu_transcoder)) {
+		DRM_DEBUG_KMS("PSR2 not supported in transcoder %s\n",
+			      transcoder_name(crtc_state->cpu_transcoder));
+		return false;
+	}
+
 	/*
 	 * DSC and PSR2 cannot be enabled simultaneously. If a requested
 	 * resolution requires DSC to be enabled, priority is given to DSC
 	 * over PSR2.
 	 */
-	if (crtc_state->dsc_params.compression_enable) {
+	if (crtc_state->dsc.compression_enable) {
 		DRM_DEBUG_KMS("PSR2 cannot be enabled since DSC is enabled\n");
 		return false;
 	}
 
-	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
+	if (INTEL_GEN(dev_priv) >= 12) {
+		psr_max_h = 5120;
+		psr_max_v = 3200;
+		max_bpp = 30;
+	} else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
 		psr_max_h = 4096;
 		psr_max_v = 2304;
+		max_bpp = 24;
 	} else if (IS_GEN(dev_priv, 9)) {
 		psr_max_h = 3640;
 		psr_max_v = 2304;
+		max_bpp = 24;
 	}
 
 	if (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v) {
@@ -569,6 +652,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
 		return false;
 	}
 
+	if (crtc_state->pipe_bpp > max_bpp) {
+		DRM_DEBUG_KMS("PSR2 not enabled, pipe bpp %d > max supported %d\n",
+			      crtc_state->pipe_bpp, max_bpp);
+		return false;
+	}
+
 	/*
 	 * HW sends SU blocks of size four scan lines, which means the starting
 	 * X coordinate and Y granularity requirements will always be met. We
@@ -595,7 +684,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	int psr_setup_time;
 
 	if (!CAN_PSR(dev_priv))
@@ -606,10 +695,9 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
 
 	/*
 	 * HSW spec explicitly says PSR is tied to port A.
-	 * BDW+ platforms with DDI implementation of PSR have different
-	 * PSR registers per transcoder and we only implement transcoder EDP
-	 * ones. Since by Display design transcoder EDP is tied to port A
-	 * we can safely escape based on the port A.
+	 * BDW+ platforms have a instance of PSR registers per transcoder but
+	 * for now it only supports one instance of PSR, so lets keep it
+	 * hardcoded to PORT_A
 	 */
 	if (dig_port->base.port != PORT_A) {
 		DRM_DEBUG_KMS("PSR condition failed: Port not supported\n");
@@ -648,9 +736,10 @@ static void intel_psr_activate(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 
-	if (INTEL_GEN(dev_priv) >= 9)
-		WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE);
-	WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE);
+	if (transcoder_has_psr2(dev_priv, dev_priv->psr.transcoder))
+		WARN_ON(I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)) & EDP_PSR2_ENABLE);
+
+	WARN_ON(I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)) & EDP_PSR_ENABLE);
 	WARN_ON(dev_priv->psr.active);
 	lockdep_assert_held(&dev_priv->psr.lock);
 
@@ -663,25 +752,6 @@ static void intel_psr_activate(struct intel_dp *intel_dp)
 	dev_priv->psr.active = true;
 }
 
-static i915_reg_t gen9_chicken_trans_reg(struct drm_i915_private *dev_priv,
-					 enum transcoder cpu_transcoder)
-{
-	static const i915_reg_t regs[] = {
-		[TRANSCODER_A] = CHICKEN_TRANS_A,
-		[TRANSCODER_B] = CHICKEN_TRANS_B,
-		[TRANSCODER_C] = CHICKEN_TRANS_C,
-		[TRANSCODER_EDP] = CHICKEN_TRANS_EDP,
-	};
-
-	WARN_ON(INTEL_GEN(dev_priv) < 9);
-
-	if (WARN_ON(cpu_transcoder >= ARRAY_SIZE(regs) ||
-		    !regs[cpu_transcoder].reg))
-		cpu_transcoder = TRANSCODER_A;
-
-	return regs[cpu_transcoder];
-}
-
 static void intel_psr_enable_source(struct intel_dp *intel_dp,
 				    const struct intel_crtc_state *crtc_state)
 {
@@ -697,8 +767,7 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp,
 
 	if (dev_priv->psr.psr2_enabled && (IS_GEN(dev_priv, 9) &&
 					   !IS_GEMINILAKE(dev_priv))) {
-		i915_reg_t reg = gen9_chicken_trans_reg(dev_priv,
-							cpu_transcoder);
+		i915_reg_t reg = CHICKEN_TRANS(cpu_transcoder);
 		u32 chicken = I915_READ(reg);
 
 		chicken |= PSR2_VSC_ENABLE_PROG_HEADER |
@@ -720,19 +789,46 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp,
 	if (INTEL_GEN(dev_priv) < 11)
 		mask |= EDP_PSR_DEBUG_MASK_DISP_REG_WRITE;
 
-	I915_WRITE(EDP_PSR_DEBUG, mask);
+	I915_WRITE(EDP_PSR_DEBUG(dev_priv->psr.transcoder), mask);
+
+	psr_irq_control(dev_priv);
 }
 
 static void intel_psr_enable_locked(struct drm_i915_private *dev_priv,
 				    const struct intel_crtc_state *crtc_state)
 {
 	struct intel_dp *intel_dp = dev_priv->psr.dp;
+	u32 val;
 
 	WARN_ON(dev_priv->psr.enabled);
 
 	dev_priv->psr.psr2_enabled = intel_psr2_enabled(dev_priv, crtc_state);
 	dev_priv->psr.busy_frontbuffer_bits = 0;
-	dev_priv->psr.pipe = to_intel_crtc(crtc_state->base.crtc)->pipe;
+	dev_priv->psr.pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
+	dev_priv->psr.dc3co_enabled = !!crtc_state->dc3co_exitline;
+	dev_priv->psr.dc3co_exit_delay = intel_get_frame_time_us(crtc_state);
+	dev_priv->psr.transcoder = crtc_state->cpu_transcoder;
+
+	/*
+	 * If a PSR error happened and the driver is reloaded, the EDP_PSR_IIR
+	 * will still keep the error set even after the reset done in the
+	 * irq_preinstall and irq_uninstall hooks.
+	 * And enabling in this situation cause the screen to freeze in the
+	 * first time that PSR HW tries to activate so lets keep PSR disabled
+	 * to avoid any rendering problems.
+	 */
+	if (INTEL_GEN(dev_priv) >= 12) {
+		val = I915_READ(TRANS_PSR_IIR(dev_priv->psr.transcoder));
+		val &= EDP_PSR_ERROR(0);
+	} else {
+		val = I915_READ(EDP_PSR_IIR);
+		val &= EDP_PSR_ERROR(dev_priv->psr.transcoder);
+	}
+	if (val) {
+		dev_priv->psr.sink_not_reliable = true;
+		DRM_DEBUG_KMS("PSR interruption error set, not enabling PSR\n");
+		return;
+	}
 
 	DRM_DEBUG_KMS("Enabling PSR%s\n",
 		      dev_priv->psr.psr2_enabled ? "2" : "1");
@@ -782,20 +878,28 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv)
 	u32 val;
 
 	if (!dev_priv->psr.active) {
-		if (INTEL_GEN(dev_priv) >= 9)
-			WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE);
-		WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE);
+		if (transcoder_has_psr2(dev_priv, dev_priv->psr.transcoder)) {
+			val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder));
+			WARN_ON(val & EDP_PSR2_ENABLE);
+		}
+
+		val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder));
+		WARN_ON(val & EDP_PSR_ENABLE);
+
 		return;
 	}
 
 	if (dev_priv->psr.psr2_enabled) {
-		val = I915_READ(EDP_PSR2_CTL);
+		tgl_disallow_dc3co_on_psr2_exit(dev_priv);
+		val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder));
 		WARN_ON(!(val & EDP_PSR2_ENABLE));
-		I915_WRITE(EDP_PSR2_CTL, val & ~EDP_PSR2_ENABLE);
+		val &= ~EDP_PSR2_ENABLE;
+		I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val);
 	} else {
-		val = I915_READ(EDP_PSR_CTL);
+		val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder));
 		WARN_ON(!(val & EDP_PSR_ENABLE));
-		I915_WRITE(EDP_PSR_CTL, val & ~EDP_PSR_ENABLE);
+		val &= ~EDP_PSR_ENABLE;
+		I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), val);
 	}
 	dev_priv->psr.active = false;
 }
@@ -817,10 +921,10 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp)
 	intel_psr_exit(dev_priv);
 
 	if (dev_priv->psr.psr2_enabled) {
-		psr_status = EDP_PSR2_STATUS;
+		psr_status = EDP_PSR2_STATUS(dev_priv->psr.transcoder);
 		psr_status_mask = EDP_PSR2_STATUS_STATE_MASK;
 	} else {
-		psr_status = EDP_PSR_STATUS;
+		psr_status = EDP_PSR_STATUS(dev_priv->psr.transcoder);
 		psr_status_mask = EDP_PSR_STATUS_STATE_MASK;
 	}
 
@@ -832,6 +936,9 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp)
 	/* Disable PSR on Sink */
 	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0);
 
+	if (dev_priv->psr.psr2_enabled)
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG, 0);
+
 	dev_priv->psr.enabled = false;
 }
 
@@ -859,6 +966,7 @@ void intel_psr_disable(struct intel_dp *intel_dp,
 
 	mutex_unlock(&dev_priv->psr.lock);
 	cancel_work_sync(&dev_priv->psr.work);
+	cancel_delayed_work_sync(&dev_priv->psr.idle_work);
 }
 
 static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv)
@@ -946,7 +1054,7 @@ unlock:
 int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state,
 			    u32 *out_value)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	if (!dev_priv->psr.enabled || !new_crtc_state->has_psr)
@@ -963,7 +1071,8 @@ int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state,
 	 * defensive enough to cover everything.
 	 */
 
-	return __intel_wait_for_register(&dev_priv->uncore, EDP_PSR_STATUS,
+	return __intel_wait_for_register(&dev_priv->uncore,
+					 EDP_PSR_STATUS(dev_priv->psr.transcoder),
 					 EDP_PSR_STATUS_STATE_MASK,
 					 EDP_PSR_STATUS_STATE_IDLE, 2, 50,
 					 out_value);
@@ -979,10 +1088,10 @@ static bool __psr_wait_for_idle_locked(struct drm_i915_private *dev_priv)
 		return false;
 
 	if (dev_priv->psr.psr2_enabled) {
-		reg = EDP_PSR2_STATUS;
+		reg = EDP_PSR2_STATUS(dev_priv->psr.transcoder);
 		mask = EDP_PSR2_STATUS_STATE_MASK;
 	} else {
-		reg = EDP_PSR_STATUS;
+		reg = EDP_PSR_STATUS(dev_priv->psr.transcoder);
 		mask = EDP_PSR_STATUS_STATE_MASK;
 	}
 
@@ -1002,7 +1111,7 @@ static int intel_psr_fastset_force(struct drm_i915_private *dev_priv)
 	struct drm_device *dev = &dev_priv->drm;
 	struct drm_modeset_acquire_ctx ctx;
 	struct drm_atomic_state *state;
-	struct drm_crtc *crtc;
+	struct intel_crtc *crtc;
 	int err;
 
 	state = drm_atomic_state_alloc(dev);
@@ -1013,21 +1122,18 @@ static int intel_psr_fastset_force(struct drm_i915_private *dev_priv)
 	state->acquire_ctx = &ctx;
 
 retry:
-	drm_for_each_crtc(crtc, dev) {
-		struct drm_crtc_state *crtc_state;
-		struct intel_crtc_state *intel_crtc_state;
+	for_each_intel_crtc(dev, crtc) {
+		struct intel_crtc_state *crtc_state =
+			intel_atomic_get_crtc_state(state, crtc);
 
-		crtc_state = drm_atomic_get_crtc_state(state, crtc);
 		if (IS_ERR(crtc_state)) {
 			err = PTR_ERR(crtc_state);
 			goto error;
 		}
 
-		intel_crtc_state = to_intel_crtc_state(crtc_state);
-
-		if (crtc_state->active && intel_crtc_state->has_psr) {
+		if (crtc_state->hw.active && crtc_state->has_psr) {
 			/* Mark mode as changed to trigger a pipe->update() */
-			crtc_state->mode_changed = true;
+			crtc_state->uapi.mode_changed = true;
 			break;
 		}
 	}
@@ -1067,7 +1173,13 @@ int intel_psr_debug_set(struct drm_i915_private *dev_priv, u64 val)
 
 	old_mode = dev_priv->psr.debug & I915_PSR_DEBUG_MODE_MASK;
 	dev_priv->psr.debug = val;
-	intel_psr_irq_control(dev_priv, dev_priv->psr.debug);
+
+	/*
+	 * Do it right away if it's already enabled, otherwise it will be done
+	 * when enabling the source.
+	 */
+	if (dev_priv->psr.enabled)
+		psr_irq_control(dev_priv);
 
 	mutex_unlock(&dev_priv->psr.lock);
 
@@ -1159,6 +1271,44 @@ void intel_psr_invalidate(struct drm_i915_private *dev_priv,
 	mutex_unlock(&dev_priv->psr.lock);
 }
 
+/*
+ * When we will be completely rely on PSR2 S/W tracking in future,
+ * intel_psr_flush() will invalidate and flush the PSR for ORIGIN_FLIP
+ * event also therefore tgl_dc3co_flush() require to be changed
+ * accrodingly in future.
+ */
+static void
+tgl_dc3co_flush(struct drm_i915_private *dev_priv,
+		unsigned int frontbuffer_bits, enum fb_op_origin origin)
+{
+	u32 delay;
+
+	mutex_lock(&dev_priv->psr.lock);
+
+	if (!dev_priv->psr.dc3co_enabled)
+		goto unlock;
+
+	if (!dev_priv->psr.psr2_enabled || !dev_priv->psr.active)
+		goto unlock;
+
+	/*
+	 * At every frontbuffer flush flip event modified delay of delayed work,
+	 * when delayed work schedules that means display has been idle.
+	 */
+	if (!(frontbuffer_bits &
+	    INTEL_FRONTBUFFER_ALL_MASK(dev_priv->psr.pipe)))
+		goto unlock;
+
+	tgl_psr2_enable_dc3co(dev_priv);
+	/* DC5/DC6 required idle frames = 6 */
+	delay = 6 * dev_priv->psr.dc3co_exit_delay;
+	mod_delayed_work(system_wq, &dev_priv->psr.idle_work,
+			 usecs_to_jiffies(delay));
+
+unlock:
+	mutex_unlock(&dev_priv->psr.lock);
+}
+
 /**
  * intel_psr_flush - Flush PSR
  * @dev_priv: i915 device
@@ -1178,8 +1328,10 @@ void intel_psr_flush(struct drm_i915_private *dev_priv,
 	if (!CAN_PSR(dev_priv))
 		return;
 
-	if (origin == ORIGIN_FLIP)
+	if (origin == ORIGIN_FLIP) {
+		tgl_dc3co_flush(dev_priv, frontbuffer_bits, origin);
 		return;
+	}
 
 	mutex_lock(&dev_priv->psr.lock);
 	if (!dev_priv->psr.enabled) {
@@ -1208,53 +1360,111 @@ void intel_psr_flush(struct drm_i915_private *dev_priv,
  */
 void intel_psr_init(struct drm_i915_private *dev_priv)
 {
-	u32 val;
-
 	if (!HAS_PSR(dev_priv))
 		return;
 
-	dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ?
-		HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE;
-
 	if (!dev_priv->psr.sink_support)
 		return;
 
+	if (IS_HASWELL(dev_priv))
+		/*
+		 * HSW don't have PSR registers on the same space as transcoder
+		 * so set this to a value that when subtract to the register
+		 * in transcoder space results in the right offset for HSW
+		 */
+		dev_priv->hsw_psr_mmio_adjust = _SRD_CTL_EDP - _HSW_EDP_PSR_BASE;
+
 	if (i915_modparams.enable_psr == -1)
 		if (INTEL_GEN(dev_priv) < 9 || !dev_priv->vbt.psr.enable)
 			i915_modparams.enable_psr = 0;
 
-	/*
-	 * If a PSR error happened and the driver is reloaded, the EDP_PSR_IIR
-	 * will still keep the error set even after the reset done in the
-	 * irq_preinstall and irq_uninstall hooks.
-	 * And enabling in this situation cause the screen to freeze in the
-	 * first time that PSR HW tries to activate so lets keep PSR disabled
-	 * to avoid any rendering problems.
-	 */
-	val = I915_READ(EDP_PSR_IIR);
-	val &= EDP_PSR_ERROR(edp_psr_shift(TRANSCODER_EDP));
-	if (val) {
-		DRM_DEBUG_KMS("PSR interruption error set\n");
-		dev_priv->psr.sink_not_reliable = true;
-	}
-
 	/* Set link_standby x link_off defaults */
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		/* HSW and BDW require workarounds that we don't implement. */
 		dev_priv->psr.link_standby = false;
-	else
-		/* For new platforms let's respect VBT back again */
+	else if (INTEL_GEN(dev_priv) < 12)
+		/* For new platforms up to TGL let's respect VBT back again */
 		dev_priv->psr.link_standby = dev_priv->vbt.psr.full_link;
 
 	INIT_WORK(&dev_priv->psr.work, intel_psr_work);
+	INIT_DELAYED_WORK(&dev_priv->psr.idle_work, tgl_dc5_idle_thread);
 	mutex_init(&dev_priv->psr.lock);
 }
 
-void intel_psr_short_pulse(struct intel_dp *intel_dp)
+static int psr_get_status_and_error_status(struct intel_dp *intel_dp,
+					   u8 *status, u8 *error_status)
+{
+	struct drm_dp_aux *aux = &intel_dp->aux;
+	int ret;
+
+	ret = drm_dp_dpcd_readb(aux, DP_PSR_STATUS, status);
+	if (ret != 1)
+		return ret;
+
+	ret = drm_dp_dpcd_readb(aux, DP_PSR_ERROR_STATUS, error_status);
+	if (ret != 1)
+		return ret;
+
+	*status = *status & DP_PSR_SINK_STATE_MASK;
+
+	return 0;
+}
+
+static void psr_alpm_check(struct intel_dp *intel_dp)
+{
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	struct drm_dp_aux *aux = &intel_dp->aux;
+	struct i915_psr *psr = &dev_priv->psr;
+	u8 val;
+	int r;
+
+	if (!psr->psr2_enabled)
+		return;
+
+	r = drm_dp_dpcd_readb(aux, DP_RECEIVER_ALPM_STATUS, &val);
+	if (r != 1) {
+		DRM_ERROR("Error reading ALPM status\n");
+		return;
+	}
+
+	if (val & DP_ALPM_LOCK_TIMEOUT_ERROR) {
+		intel_psr_disable_locked(intel_dp);
+		psr->sink_not_reliable = true;
+		DRM_DEBUG_KMS("ALPM lock timeout error, disabling PSR\n");
+
+		/* Clearing error */
+		drm_dp_dpcd_writeb(aux, DP_RECEIVER_ALPM_STATUS, val);
+	}
+}
+
+static void psr_capability_changed_check(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	struct i915_psr *psr = &dev_priv->psr;
 	u8 val;
+	int r;
+
+	r = drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_ESI, &val);
+	if (r != 1) {
+		DRM_ERROR("Error reading DP_PSR_ESI\n");
+		return;
+	}
+
+	if (val & DP_PSR_CAPS_CHANGE) {
+		intel_psr_disable_locked(intel_dp);
+		psr->sink_not_reliable = true;
+		DRM_DEBUG_KMS("Sink PSR capability changed, disabling PSR\n");
+
+		/* Clearing it */
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_ESI, val);
+	}
+}
+
+void intel_psr_short_pulse(struct intel_dp *intel_dp)
+{
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	struct i915_psr *psr = &dev_priv->psr;
+	u8 status, error_status;
 	const u8 errors = DP_PSR_RFB_STORAGE_ERROR |
 			  DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR |
 			  DP_PSR_LINK_CRC_ERROR;
@@ -1267,38 +1477,34 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp)
 	if (!psr->enabled || psr->dp != intel_dp)
 		goto exit;
 
-	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_STATUS, &val) != 1) {
-		DRM_ERROR("PSR_STATUS dpcd read failed\n");
+	if (psr_get_status_and_error_status(intel_dp, &status, &error_status)) {
+		DRM_ERROR("Error reading PSR status or error status\n");
 		goto exit;
 	}
 
-	if ((val & DP_PSR_SINK_STATE_MASK) == DP_PSR_SINK_INTERNAL_ERROR) {
-		DRM_DEBUG_KMS("PSR sink internal error, disabling PSR\n");
+	if (status == DP_PSR_SINK_INTERNAL_ERROR || (error_status & errors)) {
 		intel_psr_disable_locked(intel_dp);
 		psr->sink_not_reliable = true;
 	}
 
-	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_ERROR_STATUS, &val) != 1) {
-		DRM_ERROR("PSR_ERROR_STATUS dpcd read failed\n");
-		goto exit;
-	}
-
-	if (val & DP_PSR_RFB_STORAGE_ERROR)
+	if (status == DP_PSR_SINK_INTERNAL_ERROR && !error_status)
+		DRM_DEBUG_KMS("PSR sink internal error, disabling PSR\n");
+	if (error_status & DP_PSR_RFB_STORAGE_ERROR)
 		DRM_DEBUG_KMS("PSR RFB storage error, disabling PSR\n");
-	if (val & DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR)
+	if (error_status & DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR)
 		DRM_DEBUG_KMS("PSR VSC SDP uncorrectable error, disabling PSR\n");
-	if (val & DP_PSR_LINK_CRC_ERROR)
-		DRM_ERROR("PSR Link CRC error, disabling PSR\n");
+	if (error_status & DP_PSR_LINK_CRC_ERROR)
+		DRM_DEBUG_KMS("PSR Link CRC error, disabling PSR\n");
 
-	if (val & ~errors)
+	if (error_status & ~errors)
 		DRM_ERROR("PSR_ERROR_STATUS unhandled errors %x\n",
-			  val & ~errors);
-	if (val & errors) {
-		intel_psr_disable_locked(intel_dp);
-		psr->sink_not_reliable = true;
-	}
+			  error_status & ~errors);
 	/* clear status register */
-	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_ERROR_STATUS, val);
+	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_ERROR_STATUS, error_status);
+
+	psr_alpm_check(intel_dp);
+	psr_capability_changed_check(intel_dp);
+
 exit:
 	mutex_unlock(&psr->lock);
 }
@@ -1317,3 +1523,27 @@ bool intel_psr_enabled(struct intel_dp *intel_dp)
 
 	return ret;
 }
+
+void intel_psr_atomic_check(struct drm_connector *connector,
+			    struct drm_connector_state *old_state,
+			    struct drm_connector_state *new_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(connector->dev);
+	struct intel_connector *intel_connector;
+	struct intel_digital_port *dig_port;
+	struct drm_crtc_state *crtc_state;
+
+	if (!CAN_PSR(dev_priv) || !new_state->crtc ||
+	    dev_priv->psr.initially_probed)
+		return;
+
+	intel_connector = to_intel_connector(connector);
+	dig_port = enc_to_dig_port(intel_connector->encoder);
+	if (dev_priv->psr.dp != &dig_port->dp)
+		return;
+
+	crtc_state = drm_atomic_get_new_crtc_state(new_state->state,
+						   new_state->crtc);
+	crtc_state->mode_changed = true;
+	dev_priv->psr.initially_probed = true;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index dc818826f36d..c58a1d438808 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -8,6 +8,8 @@
 
 #include "intel_frontbuffer.h"
 
+struct drm_connector;
+struct drm_connector_state;
 struct drm_i915_private;
 struct intel_crtc_state;
 struct intel_dp;
@@ -30,11 +32,13 @@ void intel_psr_flush(struct drm_i915_private *dev_priv,
 void intel_psr_init(struct drm_i915_private *dev_priv);
 void intel_psr_compute_config(struct intel_dp *intel_dp,
 			      struct intel_crtc_state *crtc_state);
-void intel_psr_irq_control(struct drm_i915_private *dev_priv, u32 debug);
 void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir);
 void intel_psr_short_pulse(struct intel_dp *intel_dp);
 int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state,
 			    u32 *out_value);
 bool intel_psr_enabled(struct intel_dp *intel_dp);
+void intel_psr_atomic_check(struct drm_connector *connector,
+			    struct drm_connector_state *old_state,
+			    struct drm_connector_state *new_state);
 
 #endif /* __INTEL_PSR_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index adeb1c840976..e8819fd21e03 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -180,7 +180,7 @@ static struct intel_sdvo *to_sdvo(struct intel_encoder *encoder)
 	return container_of(encoder, struct intel_sdvo, base);
 }
 
-static struct intel_sdvo *intel_attached_sdvo(struct drm_connector *connector)
+static struct intel_sdvo *intel_attached_sdvo(struct intel_connector *connector)
 {
 	return to_sdvo(intel_attached_encoder(connector));
 }
@@ -1087,7 +1087,7 @@ static bool intel_sdvo_compute_avi_infoframe(struct intel_sdvo *intel_sdvo,
 {
 	struct hdmi_avi_infoframe *frame = &crtc_state->infoframes.avi.avi;
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	int ret;
 
 	if (!crtc_state->has_hdmi_sink)
@@ -1276,8 +1276,8 @@ static int intel_sdvo_compute_config(struct intel_encoder *encoder,
 		to_intel_sdvo_connector_state(conn_state);
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(conn_state->connector);
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
-	struct drm_display_mode *mode = &pipe_config->base.mode;
+	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
+	struct drm_display_mode *mode = &pipe_config->hw.mode;
 
 	DRM_DEBUG_KMS("forcing bpc to 8 for SDVO\n");
 	pipe_config->pipe_bpp = 8*3;
@@ -1349,9 +1349,9 @@ static int intel_sdvo_compute_config(struct intel_encoder *encoder,
 	if (IS_TV(intel_sdvo_connector))
 		i9xx_adjust_sdvo_tv_clock(pipe_config);
 
-	/* Set user selected PAR to incoming mode's member */
-	if (intel_sdvo_connector->is_hdmi)
-		adjusted_mode->picture_aspect_ratio = conn_state->picture_aspect_ratio;
+	if (conn_state->picture_aspect_ratio)
+		adjusted_mode->picture_aspect_ratio =
+			conn_state->picture_aspect_ratio;
 
 	if (!intel_sdvo_compute_avi_infoframe(intel_sdvo,
 					      pipe_config, conn_state)) {
@@ -1429,13 +1429,13 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder,
 				  const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
 	const struct intel_sdvo_connector_state *sdvo_state =
 		to_intel_sdvo_connector_state(conn_state);
 	const struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(conn_state->connector);
-	const struct drm_display_mode *mode = &crtc_state->base.mode;
+	const struct drm_display_mode *mode = &crtc_state->hw.mode;
 	struct intel_sdvo *intel_sdvo = to_sdvo(intel_encoder);
 	u32 sdvox;
 	struct intel_sdvo_in_out_map in_out;
@@ -1551,7 +1551,7 @@ static bool intel_sdvo_connector_get_hw_state(struct intel_connector *connector)
 {
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(&connector->base);
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(&connector->base);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
 	u16 active_outputs = 0;
 
 	intel_sdvo_get_active_outputs(intel_sdvo, &active_outputs);
@@ -1629,7 +1629,7 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder,
 			flags |= DRM_MODE_FLAG_NVSYNC;
 	}
 
-	pipe_config->base.adjusted_mode.flags |= flags;
+	pipe_config->hw.adjusted_mode.flags |= flags;
 
 	/*
 	 * pixel multiplier readout is tricky: Only on i915g/gm it is stored in
@@ -1649,7 +1649,7 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder,
 	if (pipe_config->pixel_multiplier)
 		dotclock /= pipe_config->pixel_multiplier;
 
-	pipe_config->base.adjusted_mode.crtc_clock = dotclock;
+	pipe_config->hw.adjusted_mode.crtc_clock = dotclock;
 
 	/* Cross check the port pixel multiplier with the sdvo encoder state. */
 	if (intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_CLOCK_RATE_MULT,
@@ -1701,7 +1701,7 @@ static void intel_sdvo_enable_audio(struct intel_sdvo *intel_sdvo,
 				    const struct drm_connector_state *conn_state)
 {
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	struct drm_connector *connector = conn_state->connector;
 	u8 *eld = connector->eld;
 
@@ -1723,7 +1723,7 @@ static void intel_disable_sdvo(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	u32 temp;
 
 	if (old_crtc_state->has_audio)
@@ -1785,7 +1785,7 @@ static void intel_enable_sdvo(struct intel_encoder *encoder,
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
-	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	u32 temp;
 	bool input1, input2;
 	int i;
@@ -1823,7 +1823,7 @@ static enum drm_mode_status
 intel_sdvo_mode_valid(struct drm_connector *connector,
 		      struct drm_display_mode *mode)
 {
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(connector);
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
@@ -1941,7 +1941,7 @@ intel_sdvo_multifunc_encoder(struct intel_sdvo *intel_sdvo)
 static struct edid *
 intel_sdvo_get_edid(struct drm_connector *connector)
 {
-	struct intel_sdvo *sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	return drm_get_edid(connector, &sdvo->ddc);
 }
 
@@ -1959,7 +1959,7 @@ intel_sdvo_get_analog_edid(struct drm_connector *connector)
 static enum drm_connector_status
 intel_sdvo_tmds_sink_detect(struct drm_connector *connector)
 {
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(connector);
 	enum drm_connector_status status;
@@ -2028,7 +2028,7 @@ static enum drm_connector_status
 intel_sdvo_detect(struct drm_connector *connector, bool force)
 {
 	u16 response;
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector);
 	enum drm_connector_status ret;
 
@@ -2175,7 +2175,7 @@ static const struct drm_display_mode sdvo_tv_modes[] = {
 
 static void intel_sdvo_get_tv_modes(struct drm_connector *connector)
 {
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	const struct drm_connector_state *conn_state = connector->state;
 	struct intel_sdvo_sdtv_resolution_request tv_res;
 	u32 reply = 0, format_map = 0;
@@ -2215,7 +2215,7 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector)
 
 static void intel_sdvo_get_lvds_modes(struct drm_connector *connector)
 {
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct drm_display_mode *newmode;
 
@@ -2379,7 +2379,7 @@ intel_sdvo_connector_atomic_set_property(struct drm_connector *connector,
 static int
 intel_sdvo_connector_register(struct drm_connector *connector)
 {
-	struct intel_sdvo *sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	int ret;
 
 	ret = intel_connector_register(connector);
@@ -2394,7 +2394,7 @@ intel_sdvo_connector_register(struct drm_connector *connector)
 static void
 intel_sdvo_connector_unregister(struct drm_connector *connector)
 {
-	struct intel_sdvo *sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector));
 
 	sysfs_remove_link(&connector->kdev->kobj,
 			  sdvo->ddc.dev.kobj.name);
@@ -2654,7 +2654,6 @@ intel_sdvo_add_hdmi_properties(struct intel_sdvo *intel_sdvo,
 		intel_attach_broadcast_rgb_property(&connector->base.base);
 	}
 	intel_attach_aspect_ratio_property(&connector->base.base);
-	connector->base.base.state->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
 }
 
 static struct intel_sdvo_connector *intel_sdvo_connector_alloc(void)
@@ -2921,7 +2920,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags)
 			      bytes[0], bytes[1]);
 		return false;
 	}
-	intel_sdvo->base.crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
+	intel_sdvo->base.pipe_mask = ~0;
 
 	return true;
 }
@@ -2933,7 +2932,7 @@ static void intel_sdvo_output_cleanup(struct intel_sdvo *intel_sdvo)
 
 	list_for_each_entry_safe(connector, tmp,
 				 &dev->mode_config.connector_list, head) {
-		if (intel_attached_encoder(connector) == &intel_sdvo->base) {
+		if (intel_attached_encoder(to_intel_connector(connector)) == &intel_sdvo->base) {
 			drm_connector_unregister(connector);
 			intel_connector_destroy(connector);
 		}
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.h b/drivers/gpu/drm/i915/display/intel_sdvo.h
index c9e05bcdd141..a66f224aa17d 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.h
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.h
@@ -14,6 +14,7 @@
 
 struct drm_i915_private;
 enum pipe;
+enum port;
 
 bool intel_sdvo_port_enabled(struct drm_i915_private *dev_priv,
 			     i915_reg_t sdvo_reg, enum pipe *pipe);
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index cae25e493128..fca77ec1e0dd 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -48,19 +48,6 @@
 #include "intel_psr.h"
 #include "intel_sprite.h"
 
-bool is_planar_yuv_format(u32 pixelformat)
-{
-	switch (pixelformat) {
-	case DRM_FORMAT_NV12:
-	case DRM_FORMAT_P010:
-	case DRM_FORMAT_P012:
-	case DRM_FORMAT_P016:
-		return true;
-	default:
-		return false;
-	}
-}
-
 int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode,
 			     int usecs)
 {
@@ -94,9 +81,9 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode,
  */
 void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	const struct drm_display_mode *adjusted_mode = &new_crtc_state->base.adjusted_mode;
+	const struct drm_display_mode *adjusted_mode = &new_crtc_state->hw.adjusted_mode;
 	long timeout = msecs_to_jiffies_timeout(1);
 	int scanline, min, max, vblank_start;
 	wait_queue_head_t *wq = drm_crtc_vblank_waitqueue(&crtc->base);
@@ -133,7 +120,7 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
 
 	crtc->debug.min_vbl = min;
 	crtc->debug.max_vbl = max;
-	trace_i915_pipe_update_start(crtc);
+	trace_intel_pipe_update_start(crtc);
 
 	for (;;) {
 		/*
@@ -186,7 +173,7 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
 	crtc->debug.start_vbl_time = ktime_get();
 	crtc->debug.start_vbl_count = intel_crtc_get_vblank_counter(crtc);
 
-	trace_i915_pipe_update_vblank_evaded(crtc);
+	trace_intel_pipe_update_vblank_evaded(crtc);
 	return;
 
 irq_disable:
@@ -203,27 +190,28 @@ irq_disable:
  */
 void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	enum pipe pipe = crtc->pipe;
 	int scanline_end = intel_get_crtc_scanline(crtc);
 	u32 end_vbl_count = intel_crtc_get_vblank_counter(crtc);
 	ktime_t end_vbl_time = ktime_get();
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	trace_i915_pipe_update_end(crtc, end_vbl_count, scanline_end);
+	trace_intel_pipe_update_end(crtc, end_vbl_count, scanline_end);
 
 	/* We're still in the vblank-evade critical section, this can't race.
 	 * Would be slightly nice to just grab the vblank count and arm the
 	 * event outside of the critical section - the spinlock might spin for a
 	 * while ... */
-	if (new_crtc_state->base.event) {
+	if (new_crtc_state->uapi.event) {
 		WARN_ON(drm_crtc_vblank_get(&crtc->base) != 0);
 
 		spin_lock(&crtc->base.dev->event_lock);
-		drm_crtc_arm_vblank_event(&crtc->base, new_crtc_state->base.event);
+		drm_crtc_arm_vblank_event(&crtc->base,
+				          new_crtc_state->uapi.event);
 		spin_unlock(&crtc->base.dev->event_lock);
 
-		new_crtc_state->base.event = NULL;
+		new_crtc_state->uapi.event = NULL;
 	}
 
 	local_irq_enable();
@@ -252,9 +240,9 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
 
 int intel_plane_check_stride(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
 	u32 stride, max_stride;
 
 	/*
@@ -264,7 +252,7 @@ int intel_plane_check_stride(const struct intel_plane_state *plane_state)
 	 * kick in due the plane being invisible.
 	 */
 	if (intel_plane_can_remap(plane_state) &&
-	    !plane_state->base.visible)
+	    !plane_state->uapi.visible)
 		return 0;
 
 	/* FIXME other color planes? */
@@ -284,10 +272,10 @@ int intel_plane_check_stride(const struct intel_plane_state *plane_state)
 
 int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	struct drm_rect *src = &plane_state->base.src;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	struct drm_rect *src = &plane_state->uapi.src;
 	u32 src_x, src_y, src_w, src_h, hsub, vsub;
-	bool rotated = drm_rotation_90_or_270(plane_state->base.rotation);
+	bool rotated = drm_rotation_90_or_270(plane_state->hw.rotation);
 
 	/*
 	 * Hardware doesn't handle subpixel coordinates.
@@ -300,10 +288,8 @@ int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state)
 	src_y = src->y1 >> 16;
 	src_h = drm_rect_height(src) >> 16;
 
-	src->x1 = src_x << 16;
-	src->x2 = (src_x + src_w) << 16;
-	src->y1 = src_y << 16;
-	src->y2 = (src_y + src_h) << 16;
+	drm_rect_init(src, src_x << 16, src_y << 16,
+		      src_w << 16, src_h << 16);
 
 	if (!fb->format->is_yuv)
 		return 0;
@@ -337,6 +323,55 @@ bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id)
 		icl_hdr_plane_mask() & BIT(plane_id);
 }
 
+static void
+skl_plane_ratio(const struct intel_crtc_state *crtc_state,
+		const struct intel_plane_state *plane_state,
+		unsigned int *num, unsigned int *den)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+
+	if (fb->format->cpp[0] == 8) {
+		if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
+			*num = 10;
+			*den = 8;
+		} else {
+			*num = 9;
+			*den = 8;
+		}
+	} else {
+		*num = 1;
+		*den = 1;
+	}
+}
+
+static int skl_plane_min_cdclk(const struct intel_crtc_state *crtc_state,
+			       const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev);
+	unsigned int pixel_rate = crtc_state->pixel_rate;
+	unsigned int src_w, src_h, dst_w, dst_h;
+	unsigned int num, den;
+
+	skl_plane_ratio(crtc_state, plane_state, &num, &den);
+
+	/* two pixels per clock on glk+ */
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+		den *= 2;
+
+	src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
+	dst_w = drm_rect_width(&plane_state->uapi.dst);
+	dst_h = drm_rect_height(&plane_state->uapi.dst);
+
+	/* Downscaling limits the maximum pixel rate */
+	dst_w = min(src_w, dst_w);
+	dst_h = min(src_h, dst_h);
+
+	return DIV64_U64_ROUND_UP(mul_u32_u32(pixel_rate * num, src_w * src_h),
+				  mul_u32_u32(den, dst_w * dst_h));
+}
+
 static unsigned int
 skl_plane_max_stride(struct intel_plane *plane,
 		     u32 pixel_format, u64 modifier,
@@ -361,27 +396,28 @@ skl_program_scaler(struct intel_plane *plane,
 		   const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	enum pipe pipe = plane->pipe;
 	int scaler_id = plane_state->scaler_id;
 	const struct intel_scaler *scaler =
 		&crtc_state->scaler_state.scalers[scaler_id];
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
-	u32 crtc_w = drm_rect_width(&plane_state->base.dst);
-	u32 crtc_h = drm_rect_height(&plane_state->base.dst);
+	int crtc_x = plane_state->uapi.dst.x1;
+	int crtc_y = plane_state->uapi.dst.y1;
+	u32 crtc_w = drm_rect_width(&plane_state->uapi.dst);
+	u32 crtc_h = drm_rect_height(&plane_state->uapi.dst);
 	u16 y_hphase, uv_rgb_hphase;
 	u16 y_vphase, uv_rgb_vphase;
 	int hscale, vscale;
 
-	hscale = drm_rect_calc_hscale(&plane_state->base.src,
-				      &plane_state->base.dst,
+	hscale = drm_rect_calc_hscale(&plane_state->uapi.src,
+				      &plane_state->uapi.dst,
 				      0, INT_MAX);
-	vscale = drm_rect_calc_vscale(&plane_state->base.src,
-				      &plane_state->base.dst,
+	vscale = drm_rect_calc_vscale(&plane_state->uapi.src,
+				      &plane_state->uapi.dst,
 				      0, INT_MAX);
 
 	/* TODO: handle sub-pixel coordinates */
-	if (is_planar_yuv_format(plane_state->base.fb->format->format) &&
+	if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) &&
 	    !icl_is_hdr_plane(dev_priv, plane->id)) {
 		y_hphase = skl_scaler_calc_phase(1, hscale, false);
 		y_vphase = skl_scaler_calc_phase(1, vscale, false);
@@ -506,10 +542,10 @@ icl_program_input_csc(struct intel_plane *plane,
 	};
 	const u16 *csc;
 
-	if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
-		csc = input_csc_matrix[plane_state->base.color_encoding];
+	if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
+		csc = input_csc_matrix[plane_state->hw.color_encoding];
 	else
-		csc = input_csc_matrix_lr[plane_state->base.color_encoding];
+		csc = input_csc_matrix_lr[plane_state->hw.color_encoding];
 
 	I915_WRITE_FW(PLANE_INPUT_CSC_COEFF(pipe, plane_id, 0), ROFF(csc[0]) |
 		      GOFF(csc[1]));
@@ -523,7 +559,7 @@ icl_program_input_csc(struct intel_plane *plane,
 
 	I915_WRITE_FW(PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 0),
 		      PREOFF_YUV_TO_RGB_HI);
-	if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
+	if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
 		I915_WRITE_FW(PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 1), 0);
 	else
 		I915_WRITE_FW(PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 1),
@@ -539,7 +575,7 @@ static void
 skl_program_plane(struct intel_plane *plane,
 		  const struct intel_crtc_state *crtc_state,
 		  const struct intel_plane_state *plane_state,
-		  int color_plane, bool slave, u32 plane_ctl)
+		  int color_plane)
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum plane_id plane_id = plane->id;
@@ -547,19 +583,21 @@ skl_program_plane(struct intel_plane *plane,
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	u32 surf_addr = plane_state->color_plane[color_plane].offset;
 	u32 stride = skl_plane_stride(plane_state, color_plane);
-	u32 aux_stride = skl_plane_stride(plane_state, 1);
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	int aux_plane = intel_main_to_aux_plane(fb, color_plane);
+	u32 aux_dist = plane_state->color_plane[aux_plane].offset - surf_addr;
+	u32 aux_stride = skl_plane_stride(plane_state, aux_plane);
+	int crtc_x = plane_state->uapi.dst.x1;
+	int crtc_y = plane_state->uapi.dst.y1;
 	u32 x = plane_state->color_plane[color_plane].x;
 	u32 y = plane_state->color_plane[color_plane].y;
-	u32 src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	u32 src_h = drm_rect_height(&plane_state->base.src) >> 16;
-	struct intel_plane *linked = plane_state->linked_plane;
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	u8 alpha = plane_state->base.alpha >> 8;
+	u32 src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	u32 src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
+	u8 alpha = plane_state->hw.alpha >> 8;
 	u32 plane_color_ctl = 0;
 	unsigned long irqflags;
 	u32 keymsk, keymax;
+	u32 plane_ctl = plane_state->ctl;
 
 	plane_ctl |= skl_plane_ctl_crtc(crtc_state);
 
@@ -588,29 +626,13 @@ skl_program_plane(struct intel_plane *plane,
 	I915_WRITE_FW(PLANE_STRIDE(pipe, plane_id), stride);
 	I915_WRITE_FW(PLANE_POS(pipe, plane_id), (crtc_y << 16) | crtc_x);
 	I915_WRITE_FW(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w);
-	I915_WRITE_FW(PLANE_AUX_DIST(pipe, plane_id),
-		      (plane_state->color_plane[1].offset - surf_addr) | aux_stride);
 
-	if (icl_is_hdr_plane(dev_priv, plane_id)) {
-		u32 cus_ctl = 0;
-
-		if (linked) {
-			/* Enable and use MPEG-2 chroma siting */
-			cus_ctl = PLANE_CUS_ENABLE |
-				PLANE_CUS_HPHASE_0 |
-				PLANE_CUS_VPHASE_SIGN_NEGATIVE |
-				PLANE_CUS_VPHASE_0_25;
-
-			if (linked->id == PLANE_SPRITE5)
-				cus_ctl |= PLANE_CUS_PLANE_7;
-			else if (linked->id == PLANE_SPRITE4)
-				cus_ctl |= PLANE_CUS_PLANE_6;
-			else
-				MISSING_CASE(linked->id);
-		}
+	if (INTEL_GEN(dev_priv) < 12)
+		aux_dist |= aux_stride;
+	I915_WRITE_FW(PLANE_AUX_DIST(pipe, plane_id), aux_dist);
 
-		I915_WRITE_FW(PLANE_CUS_CTL(pipe, plane_id), cus_ctl);
-	}
+	if (icl_is_hdr_plane(dev_priv, plane_id))
+		I915_WRITE_FW(PLANE_CUS_CTL(pipe, plane_id), plane_state->cus_ctl);
 
 	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
 		I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), plane_color_ctl);
@@ -640,7 +662,7 @@ skl_program_plane(struct intel_plane *plane,
 	I915_WRITE_FW(PLANE_SURF(pipe, plane_id),
 		      intel_plane_ggtt_offset(plane_state) + surf_addr);
 
-	if (!slave && plane_state->scaler_id >= 0)
+	if (plane_state->scaler_id >= 0)
 		skl_program_scaler(plane, crtc_state, plane_state);
 
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
@@ -653,24 +675,12 @@ skl_update_plane(struct intel_plane *plane,
 {
 	int color_plane = 0;
 
-	if (plane_state->linked_plane) {
-		/* Program the UV plane */
+	if (plane_state->planar_linked_plane && !plane_state->planar_slave)
+		/* Program the UV plane on planar master */
 		color_plane = 1;
-	}
-
-	skl_program_plane(plane, crtc_state, plane_state,
-			  color_plane, false, plane_state->ctl);
-}
 
-static void
-icl_update_slave(struct intel_plane *plane,
-		 const struct intel_crtc_state *crtc_state,
-		 const struct intel_plane_state *plane_state)
-{
-	skl_program_plane(plane, crtc_state, plane_state, 0, true,
-			  plane_state->ctl | PLANE_CTL_YUV420_Y_PLANE);
+	skl_program_plane(plane, crtc_state, plane_state, color_plane);
 }
-
 static void
 skl_disable_plane(struct intel_plane *plane,
 		  const struct intel_crtc_state *crtc_state)
@@ -730,9 +740,9 @@ static void i9xx_plane_linear_gamma(u16 gamma[8])
 static void
 chv_update_csc(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	enum plane_id plane_id = plane->id;
 	/*
 	 * |r|   | c0 c1 c2 |   |cr|
@@ -758,7 +768,7 @@ chv_update_csc(const struct intel_plane_state *plane_state)
 			    0, 4096,  7601,
 		},
 	};
-	const s16 *csc = csc_matrix[plane_state->base.color_encoding];
+	const s16 *csc = csc_matrix[plane_state->hw.color_encoding];
 
 	/* Seems RGB data bypasses the CSC always */
 	if (!fb->format->is_yuv)
@@ -789,15 +799,15 @@ chv_update_csc(const struct intel_plane_state *plane_state)
 static void
 vlv_update_clrc(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	enum pipe pipe = plane->pipe;
 	enum plane_id plane_id = plane->id;
 	int contrast, brightness, sh_scale, sh_sin, sh_cos;
 
 	if (fb->format->is_yuv &&
-	    plane_state->base.color_range == DRM_COLOR_YCBCR_LIMITED_RANGE) {
+	    plane_state->hw.color_range == DRM_COLOR_YCBCR_LIMITED_RANGE) {
 		/*
 		 * Expand limited range to full range:
 		 * Contrast is applied first and is used to expand Y range.
@@ -825,6 +835,85 @@ vlv_update_clrc(const struct intel_plane_state *plane_state)
 		      SP_SH_SIN(sh_sin) | SP_SH_COS(sh_cos));
 }
 
+static void
+vlv_plane_ratio(const struct intel_crtc_state *crtc_state,
+		const struct intel_plane_state *plane_state,
+		unsigned int *num, unsigned int *den)
+{
+	u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int cpp = fb->format->cpp[0];
+
+	/*
+	 * VLV bspec only considers cases where all three planes are
+	 * enabled, and cases where the primary and one sprite is enabled.
+	 * Let's assume the case with just two sprites enabled also
+	 * maps to the latter case.
+	 */
+	if (hweight8(active_planes) == 3) {
+		switch (cpp) {
+		case 8:
+			*num = 11;
+			*den = 8;
+			break;
+		case 4:
+			*num = 18;
+			*den = 16;
+			break;
+		default:
+			*num = 1;
+			*den = 1;
+			break;
+		}
+	} else if (hweight8(active_planes) == 2) {
+		switch (cpp) {
+		case 8:
+			*num = 10;
+			*den = 8;
+			break;
+		case 4:
+			*num = 17;
+			*den = 16;
+			break;
+		default:
+			*num = 1;
+			*den = 1;
+			break;
+		}
+	} else {
+		switch (cpp) {
+		case 8:
+			*num = 10;
+			*den = 8;
+			break;
+		default:
+			*num = 1;
+			*den = 1;
+			break;
+		}
+	}
+}
+
+int vlv_plane_min_cdclk(const struct intel_crtc_state *crtc_state,
+			const struct intel_plane_state *plane_state)
+{
+	unsigned int pixel_rate;
+	unsigned int num, den;
+
+	/*
+	 * Note that crtc_state->pixel_rate accounts for both
+	 * horizontal and vertical panel fitter downscaling factors.
+	 * Pre-HSW bspec tells us to only consider the horizontal
+	 * downscaling factor here. We ignore that and just consider
+	 * both for simplicity.
+	 */
+	pixel_rate = crtc_state->pixel_rate;
+
+	vlv_plane_ratio(crtc_state, plane_state, &num, &den);
+
+	return DIV_ROUND_UP(pixel_rate * num, den);
+}
+
 static u32 vlv_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state)
 {
 	u32 sprctl = 0;
@@ -838,8 +927,8 @@ static u32 vlv_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state)
 static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
 			  const struct intel_plane_state *plane_state)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	u32 sprctl;
 
@@ -858,6 +947,9 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
 	case DRM_FORMAT_VYUY:
 		sprctl |= SP_FORMAT_YUV422 | SP_YUV_ORDER_VYUY;
 		break;
+	case DRM_FORMAT_C8:
+		sprctl |= SP_FORMAT_8BPP;
+		break;
 	case DRM_FORMAT_RGB565:
 		sprctl |= SP_FORMAT_BGR565;
 		break;
@@ -873,6 +965,12 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
 	case DRM_FORMAT_ABGR2101010:
 		sprctl |= SP_FORMAT_RGBA1010102;
 		break;
+	case DRM_FORMAT_XRGB2101010:
+		sprctl |= SP_FORMAT_BGRX1010102;
+		break;
+	case DRM_FORMAT_ARGB2101010:
+		sprctl |= SP_FORMAT_BGRA1010102;
+		break;
 	case DRM_FORMAT_XBGR8888:
 		sprctl |= SP_FORMAT_RGBX8888;
 		break;
@@ -884,7 +982,7 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
 		return 0;
 	}
 
-	if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709)
+	if (plane_state->hw.color_encoding == DRM_COLOR_YCBCR_BT709)
 		sprctl |= SP_YUV_FORMAT_BT709;
 
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
@@ -904,9 +1002,9 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
 
 static void vlv_update_gamma(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	enum pipe pipe = plane->pipe;
 	enum plane_id plane_id = plane->id;
 	u16 gamma[8];
@@ -938,10 +1036,10 @@ vlv_update_plane(struct intel_plane *plane,
 	u32 sprsurf_offset = plane_state->color_plane[0].offset;
 	u32 linear_offset;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
-	u32 crtc_w = drm_rect_width(&plane_state->base.dst);
-	u32 crtc_h = drm_rect_height(&plane_state->base.dst);
+	int crtc_x = plane_state->uapi.dst.x1;
+	int crtc_y = plane_state->uapi.dst.y1;
+	u32 crtc_w = drm_rect_width(&plane_state->uapi.dst);
+	u32 crtc_h = drm_rect_height(&plane_state->uapi.dst);
 	u32 x = plane_state->color_plane[0].x;
 	u32 y = plane_state->color_plane[0].y;
 	unsigned long irqflags;
@@ -1031,6 +1129,164 @@ vlv_plane_get_hw_state(struct intel_plane *plane,
 	return ret;
 }
 
+static void ivb_plane_ratio(const struct intel_crtc_state *crtc_state,
+			    const struct intel_plane_state *plane_state,
+			    unsigned int *num, unsigned int *den)
+{
+	u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int cpp = fb->format->cpp[0];
+
+	if (hweight8(active_planes) == 2) {
+		switch (cpp) {
+		case 8:
+			*num = 10;
+			*den = 8;
+			break;
+		case 4:
+			*num = 17;
+			*den = 16;
+			break;
+		default:
+			*num = 1;
+			*den = 1;
+			break;
+		}
+	} else {
+		switch (cpp) {
+		case 8:
+			*num = 9;
+			*den = 8;
+			break;
+		default:
+			*num = 1;
+			*den = 1;
+			break;
+		}
+	}
+}
+
+static void ivb_plane_ratio_scaling(const struct intel_crtc_state *crtc_state,
+				    const struct intel_plane_state *plane_state,
+				    unsigned int *num, unsigned int *den)
+{
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int cpp = fb->format->cpp[0];
+
+	switch (cpp) {
+	case 8:
+		*num = 12;
+		*den = 8;
+		break;
+	case 4:
+		*num = 19;
+		*den = 16;
+		break;
+	case 2:
+		*num = 33;
+		*den = 32;
+		break;
+	default:
+		*num = 1;
+		*den = 1;
+		break;
+	}
+}
+
+int ivb_plane_min_cdclk(const struct intel_crtc_state *crtc_state,
+			const struct intel_plane_state *plane_state)
+{
+	unsigned int pixel_rate;
+	unsigned int num, den;
+
+	/*
+	 * Note that crtc_state->pixel_rate accounts for both
+	 * horizontal and vertical panel fitter downscaling factors.
+	 * Pre-HSW bspec tells us to only consider the horizontal
+	 * downscaling factor here. We ignore that and just consider
+	 * both for simplicity.
+	 */
+	pixel_rate = crtc_state->pixel_rate;
+
+	ivb_plane_ratio(crtc_state, plane_state, &num, &den);
+
+	return DIV_ROUND_UP(pixel_rate * num, den);
+}
+
+static int ivb_sprite_min_cdclk(const struct intel_crtc_state *crtc_state,
+				const struct intel_plane_state *plane_state)
+{
+	unsigned int src_w, dst_w, pixel_rate;
+	unsigned int num, den;
+
+	/*
+	 * Note that crtc_state->pixel_rate accounts for both
+	 * horizontal and vertical panel fitter downscaling factors.
+	 * Pre-HSW bspec tells us to only consider the horizontal
+	 * downscaling factor here. We ignore that and just consider
+	 * both for simplicity.
+	 */
+	pixel_rate = crtc_state->pixel_rate;
+
+	src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	dst_w = drm_rect_width(&plane_state->uapi.dst);
+
+	if (src_w != dst_w)
+		ivb_plane_ratio_scaling(crtc_state, plane_state, &num, &den);
+	else
+		ivb_plane_ratio(crtc_state, plane_state, &num, &den);
+
+	/* Horizontal downscaling limits the maximum pixel rate */
+	dst_w = min(src_w, dst_w);
+
+	return DIV_ROUND_UP_ULL(mul_u32_u32(pixel_rate, num * src_w),
+				den * dst_w);
+}
+
+static void hsw_plane_ratio(const struct intel_crtc_state *crtc_state,
+			    const struct intel_plane_state *plane_state,
+			    unsigned int *num, unsigned int *den)
+{
+	u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int cpp = fb->format->cpp[0];
+
+	if (hweight8(active_planes) == 2) {
+		switch (cpp) {
+		case 8:
+			*num = 10;
+			*den = 8;
+			break;
+		default:
+			*num = 1;
+			*den = 1;
+			break;
+		}
+	} else {
+		switch (cpp) {
+		case 8:
+			*num = 9;
+			*den = 8;
+			break;
+		default:
+			*num = 1;
+			*den = 1;
+			break;
+		}
+	}
+}
+
+int hsw_plane_min_cdclk(const struct intel_crtc_state *crtc_state,
+			const struct intel_plane_state *plane_state)
+{
+	unsigned int pixel_rate = crtc_state->pixel_rate;
+	unsigned int num, den;
+
+	hsw_plane_ratio(crtc_state, plane_state, &num, &den);
+
+	return DIV_ROUND_UP(pixel_rate * num, den);
+}
+
 static u32 ivb_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state)
 {
 	u32 sprctl = 0;
@@ -1044,13 +1300,23 @@ static u32 ivb_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state)
 	return sprctl;
 }
 
+static bool ivb_need_sprite_gamma(const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->uapi.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+
+	return fb->format->cpp[0] == 8 &&
+		(IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv));
+}
+
 static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
 			  const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(plane_state->base.plane->dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
+		to_i915(plane_state->uapi.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	u32 sprctl;
 
@@ -1066,6 +1332,18 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
 	case DRM_FORMAT_XRGB8888:
 		sprctl |= SPRITE_FORMAT_RGBX888;
 		break;
+	case DRM_FORMAT_XBGR2101010:
+		sprctl |= SPRITE_FORMAT_RGBX101010 | SPRITE_RGB_ORDER_RGBX;
+		break;
+	case DRM_FORMAT_XRGB2101010:
+		sprctl |= SPRITE_FORMAT_RGBX101010;
+		break;
+	case DRM_FORMAT_XBGR16161616F:
+		sprctl |= SPRITE_FORMAT_RGBX161616 | SPRITE_RGB_ORDER_RGBX;
+		break;
+	case DRM_FORMAT_XRGB16161616F:
+		sprctl |= SPRITE_FORMAT_RGBX161616;
+		break;
 	case DRM_FORMAT_YUYV:
 		sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_YUYV;
 		break;
@@ -1083,12 +1361,13 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
 		return 0;
 	}
 
-	sprctl |= SPRITE_INT_GAMMA_DISABLE;
+	if (!ivb_need_sprite_gamma(plane_state))
+		sprctl |= SPRITE_INT_GAMMA_DISABLE;
 
-	if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709)
+	if (plane_state->hw.color_encoding == DRM_COLOR_YCBCR_BT709)
 		sprctl |= SPRITE_YUV_TO_RGB_CSC_FORMAT_BT709;
 
-	if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
+	if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
 		sprctl |= SPRITE_YUV_RANGE_CORRECTION_DISABLE;
 
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
@@ -1105,12 +1384,26 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
 	return sprctl;
 }
 
-static void ivb_sprite_linear_gamma(u16 gamma[18])
+static void ivb_sprite_linear_gamma(const struct intel_plane_state *plane_state,
+				    u16 gamma[18])
 {
-	int i;
+	int scale, i;
 
-	for (i = 0; i < 17; i++)
-		gamma[i] = (i << 10) / 16;
+	/*
+	 * WaFP16GammaEnabling:ivb,hsw
+	 * "Workaround : When using the 64-bit format, the sprite output
+	 *  on each color channel has one quarter amplitude. It can be
+	 *  brought up to full amplitude by using sprite internal gamma
+	 *  correction, pipe gamma correction, or pipe color space
+	 *  conversion to multiply the sprite output by four."
+	 */
+	scale = 4;
+
+	for (i = 0; i < 16; i++)
+		gamma[i] = min((scale * i << 10) / 16, (1 << 10) - 1);
+
+	gamma[i] = min((scale * i << 10) / 16, 1 << 10);
+	i++;
 
 	gamma[i] = 3 << 10;
 	i++;
@@ -1118,13 +1411,16 @@ static void ivb_sprite_linear_gamma(u16 gamma[18])
 
 static void ivb_update_gamma(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
 	u16 gamma[18];
 	int i;
 
-	ivb_sprite_linear_gamma(gamma);
+	if (!ivb_need_sprite_gamma(plane_state))
+		return;
+
+	ivb_sprite_linear_gamma(plane_state, gamma);
 
 	/* FIXME these register are single buffered :( */
 	for (i = 0; i < 16; i++)
@@ -1154,14 +1450,14 @@ ivb_update_plane(struct intel_plane *plane,
 	u32 sprsurf_offset = plane_state->color_plane[0].offset;
 	u32 linear_offset;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
-	u32 crtc_w = drm_rect_width(&plane_state->base.dst);
-	u32 crtc_h = drm_rect_height(&plane_state->base.dst);
+	int crtc_x = plane_state->uapi.dst.x1;
+	int crtc_y = plane_state->uapi.dst.y1;
+	u32 crtc_w = drm_rect_width(&plane_state->uapi.dst);
+	u32 crtc_h = drm_rect_height(&plane_state->uapi.dst);
 	u32 x = plane_state->color_plane[0].x;
 	u32 y = plane_state->color_plane[0].y;
-	u32 src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	u32 src_h = drm_rect_height(&plane_state->base.src) >> 16;
+	u32 src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	u32 src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
 	u32 sprctl, sprscale = 0;
 	unsigned long irqflags;
 
@@ -1257,6 +1553,53 @@ ivb_plane_get_hw_state(struct intel_plane *plane,
 	return ret;
 }
 
+static int g4x_sprite_min_cdclk(const struct intel_crtc_state *crtc_state,
+				const struct intel_plane_state *plane_state)
+{
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int hscale, pixel_rate;
+	unsigned int limit, decimate;
+
+	/*
+	 * Note that crtc_state->pixel_rate accounts for both
+	 * horizontal and vertical panel fitter downscaling factors.
+	 * Pre-HSW bspec tells us to only consider the horizontal
+	 * downscaling factor here. We ignore that and just consider
+	 * both for simplicity.
+	 */
+	pixel_rate = crtc_state->pixel_rate;
+
+	/* Horizontal downscaling limits the maximum pixel rate */
+	hscale = drm_rect_calc_hscale(&plane_state->uapi.src,
+				      &plane_state->uapi.dst,
+				      0, INT_MAX);
+	if (hscale < 0x10000)
+		return pixel_rate;
+
+	/* Decimation steps at 2x,4x,8x,16x */
+	decimate = ilog2(hscale >> 16);
+	hscale >>= decimate;
+
+	/* Starting limit is 90% of cdclk */
+	limit = 9;
+
+	/* -10% per decimation step */
+	limit -= decimate;
+
+	/* -10% for RGB */
+	if (fb->format->cpp[0] >= 4)
+		limit--; /* -10% for RGB */
+
+	/*
+	 * We should also do -10% if sprite scaling is enabled
+	 * on the other pipe, but we can't really check for that,
+	 * so we ignore it.
+	 */
+
+	return DIV_ROUND_UP_ULL(mul_u32_u32(pixel_rate, 10 * hscale),
+				limit << 16);
+}
+
 static unsigned int
 g4x_sprite_max_stride(struct intel_plane *plane,
 		      u32 pixel_format, u64 modifier,
@@ -1282,9 +1625,9 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state,
 			  const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(plane_state->base.plane->dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
+		to_i915(plane_state->uapi.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	u32 dvscntr;
 
@@ -1300,6 +1643,18 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state,
 	case DRM_FORMAT_XRGB8888:
 		dvscntr |= DVS_FORMAT_RGBX888;
 		break;
+	case DRM_FORMAT_XBGR2101010:
+		dvscntr |= DVS_FORMAT_RGBX101010 | DVS_RGB_ORDER_XBGR;
+		break;
+	case DRM_FORMAT_XRGB2101010:
+		dvscntr |= DVS_FORMAT_RGBX101010;
+		break;
+	case DRM_FORMAT_XBGR16161616F:
+		dvscntr |= DVS_FORMAT_RGBX161616 | DVS_RGB_ORDER_XBGR;
+		break;
+	case DRM_FORMAT_XRGB16161616F:
+		dvscntr |= DVS_FORMAT_RGBX161616;
+		break;
 	case DRM_FORMAT_YUYV:
 		dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_YUYV;
 		break;
@@ -1317,10 +1672,10 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state,
 		return 0;
 	}
 
-	if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709)
+	if (plane_state->hw.color_encoding == DRM_COLOR_YCBCR_BT709)
 		dvscntr |= DVS_YUV_FORMAT_BT709;
 
-	if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
+	if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
 		dvscntr |= DVS_YUV_RANGE_CORRECTION_DISABLE;
 
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
@@ -1339,9 +1694,9 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state,
 
 static void g4x_update_gamma(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	enum pipe pipe = plane->pipe;
 	u16 gamma[8];
 	int i;
@@ -1371,9 +1726,9 @@ static void ilk_sprite_linear_gamma(u16 gamma[17])
 
 static void ilk_update_gamma(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	enum pipe pipe = plane->pipe;
 	u16 gamma[17];
 	int i;
@@ -1407,14 +1762,14 @@ g4x_update_plane(struct intel_plane *plane,
 	u32 dvssurf_offset = plane_state->color_plane[0].offset;
 	u32 linear_offset;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
-	u32 crtc_w = drm_rect_width(&plane_state->base.dst);
-	u32 crtc_h = drm_rect_height(&plane_state->base.dst);
+	int crtc_x = plane_state->uapi.dst.x1;
+	int crtc_y = plane_state->uapi.dst.y1;
+	u32 crtc_w = drm_rect_width(&plane_state->uapi.dst);
+	u32 crtc_h = drm_rect_height(&plane_state->uapi.dst);
 	u32 x = plane_state->color_plane[0].x;
 	u32 y = plane_state->color_plane[0].y;
-	u32 src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	u32 src_h = drm_rect_height(&plane_state->base.src) >> 16;
+	u32 src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	u32 src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
 	u32 dvscntr, dvsscale = 0;
 	unsigned long irqflags;
 
@@ -1513,6 +1868,11 @@ static bool intel_fb_scalable(const struct drm_framebuffer *fb)
 	switch (fb->format->format) {
 	case DRM_FORMAT_C8:
 		return false;
+	case DRM_FORMAT_XRGB16161616F:
+	case DRM_FORMAT_ARGB16161616F:
+	case DRM_FORMAT_XBGR16161616F:
+	case DRM_FORMAT_ABGR16161616F:
+		return INTEL_GEN(to_i915(fb->dev)) >= 11;
 	default:
 		return true;
 	}
@@ -1522,12 +1882,12 @@ static int
 g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state,
 			 struct intel_plane_state *plane_state)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	const struct drm_rect *src = &plane_state->base.src;
-	const struct drm_rect *dst = &plane_state->base.dst;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	const struct drm_rect *src = &plane_state->uapi.src;
+	const struct drm_rect *dst = &plane_state->uapi.dst;
 	int src_x, src_w, src_h, crtc_w, crtc_h;
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	unsigned int stride = plane_state->color_plane[0].stride;
 	unsigned int cpp = fb->format->cpp[0];
 	unsigned int width_bytes;
@@ -1583,13 +1943,13 @@ static int
 g4x_sprite_check(struct intel_crtc_state *crtc_state,
 		 struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	int min_scale = DRM_PLANE_HELPER_NO_SCALING;
 	int max_scale = DRM_PLANE_HELPER_NO_SCALING;
 	int ret;
 
-	if (intel_fb_scalable(plane_state->base.fb)) {
+	if (intel_fb_scalable(plane_state->hw.fb)) {
 		if (INTEL_GEN(dev_priv) < 7) {
 			min_scale = 1;
 			max_scale = 16 << 16;
@@ -1599,8 +1959,8 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state,
 		}
 	}
 
-	ret = drm_atomic_helper_check_plane_state(&plane_state->base,
-						  &crtc_state->base,
+	ret = drm_atomic_helper_check_plane_state(&plane_state->uapi,
+						  &crtc_state->uapi,
 						  min_scale, max_scale,
 						  true, true);
 	if (ret)
@@ -1610,7 +1970,7 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
-	if (!plane_state->base.visible)
+	if (!plane_state->uapi.visible)
 		return 0;
 
 	ret = intel_plane_check_src_coordinates(plane_state);
@@ -1631,9 +1991,9 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state,
 
 int chv_plane_check_rotation(const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	unsigned int rotation = plane_state->base.rotation;
+	unsigned int rotation = plane_state->hw.rotation;
 
 	/* CHV ignores the mirror bit when the rotate bit is set :( */
 	if (IS_CHERRYVIEW(dev_priv) &&
@@ -1656,8 +2016,8 @@ vlv_sprite_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
-	ret = drm_atomic_helper_check_plane_state(&plane_state->base,
-						  &crtc_state->base,
+	ret = drm_atomic_helper_check_plane_state(&plane_state->uapi,
+						  &crtc_state->uapi,
 						  DRM_PLANE_HELPER_NO_SCALING,
 						  DRM_PLANE_HELPER_NO_SCALING,
 						  true, true);
@@ -1668,7 +2028,7 @@ vlv_sprite_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
-	if (!plane_state->base.visible)
+	if (!plane_state->uapi.visible)
 		return 0;
 
 	ret = intel_plane_check_src_coordinates(plane_state);
@@ -1683,10 +2043,10 @@ vlv_sprite_check(struct intel_crtc_state *crtc_state,
 static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state,
 			      const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
 	struct drm_format_name_buf format_name;
 
 	if (!fb)
@@ -1741,12 +2101,14 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state,
 	}
 
 	/* Y-tiling is not supported in IF-ID Interlace mode */
-	if (crtc_state->base.enable &&
-	    crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE &&
+	if (crtc_state->hw.enable &&
+	    crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE &&
 	    (fb->modifier == I915_FORMAT_MOD_Y_TILED ||
 	     fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
 	     fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
-	     fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS)) {
+	     fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS ||
+	     fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
+	     fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS)) {
 		DRM_DEBUG_KMS("Y/Yf tiling not supported in IF-ID mode\n");
 		return -EINVAL;
 	}
@@ -1758,9 +2120,9 @@ static int skl_plane_check_dst_coordinates(const struct intel_crtc_state *crtc_s
 					   const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
-		to_i915(plane_state->base.plane->dev);
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_w = drm_rect_width(&plane_state->base.dst);
+		to_i915(plane_state->uapi.plane->dev);
+	int crtc_x = plane_state->uapi.dst.x1;
+	int crtc_w = drm_rect_width(&plane_state->uapi.dst);
 	int pipe_src_w = crtc_state->pipe_src_w;
 
 	/*
@@ -1786,12 +2148,13 @@ static int skl_plane_check_dst_coordinates(const struct intel_crtc_state *crtc_s
 
 static int skl_plane_check_nv12_rotation(const struct intel_plane_state *plane_state)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
-	int src_w = drm_rect_width(&plane_state->base.src) >> 16;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int rotation = plane_state->hw.rotation;
+	int src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
 
 	/* Display WA #1106 */
-	if (is_planar_yuv_format(fb->format->format) && src_w & 3 &&
+	if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) &&
+	    src_w & 3 &&
 	    (rotation == DRM_MODE_ROTATE_270 ||
 	     rotation == (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_90))) {
 		DRM_DEBUG_KMS("src width must be multiple of 4 for rotated planar YUV\n");
@@ -1801,12 +2164,28 @@ static int skl_plane_check_nv12_rotation(const struct intel_plane_state *plane_s
 	return 0;
 }
 
+static int skl_plane_max_scale(struct drm_i915_private *dev_priv,
+			       const struct drm_framebuffer *fb)
+{
+	/*
+	 * We don't yet know the final source width nor
+	 * whether we can use the HQ scaler mode. Assume
+	 * the best case.
+	 * FIXME need to properly check this later.
+	 */
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv) ||
+	    !intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier))
+		return 0x30000 - 1;
+	else
+		return 0x20000 - 1;
+}
+
 static int skl_plane_check(struct intel_crtc_state *crtc_state,
 			   struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	int min_scale = DRM_PLANE_HELPER_NO_SCALING;
 	int max_scale = DRM_PLANE_HELPER_NO_SCALING;
 	int ret;
@@ -1818,11 +2197,11 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state,
 	/* use scaler when colorkey is not required */
 	if (!plane_state->ckey.flags && intel_fb_scalable(fb)) {
 		min_scale = 1;
-		max_scale = skl_max_scale(crtc_state, fb->format->format);
+		max_scale = skl_plane_max_scale(dev_priv, fb);
 	}
 
-	ret = drm_atomic_helper_check_plane_state(&plane_state->base,
-						  &crtc_state->base,
+	ret = drm_atomic_helper_check_plane_state(&plane_state->uapi,
+						  &crtc_state->uapi,
 						  min_scale, max_scale,
 						  true, true);
 	if (ret)
@@ -1832,7 +2211,7 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
-	if (!plane_state->base.visible)
+	if (!plane_state->uapi.visible)
 		return 0;
 
 	ret = skl_plane_check_dst_coordinates(crtc_state, plane_state);
@@ -1848,8 +2227,8 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state,
 		return ret;
 
 	/* HW only has 8 bits pixel precision, disable plane if invisible */
-	if (!(plane_state->base.alpha >> 8))
-		plane_state->base.visible = false;
+	if (!(plane_state->hw.alpha >> 8))
+		plane_state->uapi.visible = false;
 
 	plane_state->ctl = skl_plane_ctl(crtc_state, plane_state);
 
@@ -1857,6 +2236,15 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state,
 		plane_state->color_ctl = glk_plane_color_ctl(crtc_state,
 							     plane_state);
 
+	if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) &&
+	    icl_is_hdr_plane(dev_priv, plane->id))
+		/* Enable and use MPEG-2 chroma siting */
+		plane_state->cus_ctl = PLANE_CUS_ENABLE |
+			PLANE_CUS_HPHASE_0 |
+			PLANE_CUS_VPHASE_SIGN_NEGATIVE | PLANE_CUS_VPHASE_0_25;
+	else
+		plane_state->cus_ctl = 0;
+
 	return 0;
 }
 
@@ -1868,7 +2256,7 @@ static bool has_dst_key_in_primary_plane(struct drm_i915_private *dev_priv)
 static void intel_plane_set_ckey(struct intel_plane_state *plane_state,
 				 const struct drm_intel_sprite_colorkey *set)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 
@@ -1993,8 +2381,12 @@ static const u64 i9xx_plane_format_modifiers[] = {
 };
 
 static const u32 snb_plane_formats[] = {
-	DRM_FORMAT_XBGR8888,
 	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_XRGB2101010,
+	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_XRGB16161616F,
+	DRM_FORMAT_XBGR16161616F,
 	DRM_FORMAT_YUYV,
 	DRM_FORMAT_YVYU,
 	DRM_FORMAT_UYVY,
@@ -2002,12 +2394,30 @@ static const u32 snb_plane_formats[] = {
 };
 
 static const u32 vlv_plane_formats[] = {
+	DRM_FORMAT_C8,
 	DRM_FORMAT_RGB565,
-	DRM_FORMAT_ABGR8888,
-	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ABGR2101010,
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_YVYU,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_VYUY,
+};
+
+static const u32 chv_pipe_b_sprite_formats[] = {
+	DRM_FORMAT_C8,
+	DRM_FORMAT_RGB565,
 	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_XRGB2101010,
 	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ARGB2101010,
 	DRM_FORMAT_ABGR2101010,
 	DRM_FORMAT_YUYV,
 	DRM_FORMAT_YVYU,
@@ -2024,6 +2434,8 @@ static const u32 skl_plane_formats[] = {
 	DRM_FORMAT_ABGR8888,
 	DRM_FORMAT_XRGB2101010,
 	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_XRGB16161616F,
+	DRM_FORMAT_XBGR16161616F,
 	DRM_FORMAT_YUYV,
 	DRM_FORMAT_YVYU,
 	DRM_FORMAT_UYVY,
@@ -2039,6 +2451,8 @@ static const u32 skl_planar_formats[] = {
 	DRM_FORMAT_ABGR8888,
 	DRM_FORMAT_XRGB2101010,
 	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_XRGB16161616F,
+	DRM_FORMAT_XBGR16161616F,
 	DRM_FORMAT_YUYV,
 	DRM_FORMAT_YVYU,
 	DRM_FORMAT_UYVY,
@@ -2055,6 +2469,8 @@ static const u32 glk_planar_formats[] = {
 	DRM_FORMAT_ABGR8888,
 	DRM_FORMAT_XRGB2101010,
 	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_XRGB16161616F,
+	DRM_FORMAT_XBGR16161616F,
 	DRM_FORMAT_YUYV,
 	DRM_FORMAT_YVYU,
 	DRM_FORMAT_UYVY,
@@ -2074,6 +2490,8 @@ static const u32 icl_sdr_y_plane_formats[] = {
 	DRM_FORMAT_ABGR8888,
 	DRM_FORMAT_XRGB2101010,
 	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ARGB2101010,
+	DRM_FORMAT_ABGR2101010,
 	DRM_FORMAT_YUYV,
 	DRM_FORMAT_YVYU,
 	DRM_FORMAT_UYVY,
@@ -2095,6 +2513,8 @@ static const u32 icl_sdr_uv_plane_formats[] = {
 	DRM_FORMAT_ABGR8888,
 	DRM_FORMAT_XRGB2101010,
 	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ARGB2101010,
+	DRM_FORMAT_ABGR2101010,
 	DRM_FORMAT_YUYV,
 	DRM_FORMAT_YVYU,
 	DRM_FORMAT_UYVY,
@@ -2120,6 +2540,8 @@ static const u32 icl_hdr_plane_formats[] = {
 	DRM_FORMAT_ABGR8888,
 	DRM_FORMAT_XRGB2101010,
 	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ARGB2101010,
+	DRM_FORMAT_ABGR2101010,
 	DRM_FORMAT_XRGB16161616F,
 	DRM_FORMAT_XBGR16161616F,
 	DRM_FORMAT_ARGB16161616F,
@@ -2158,6 +2580,23 @@ static const u64 skl_plane_format_modifiers_ccs[] = {
 	DRM_FORMAT_MOD_INVALID
 };
 
+static const u64 gen12_plane_format_modifiers_mc_ccs[] = {
+	I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS,
+	I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS,
+	I915_FORMAT_MOD_Y_TILED,
+	I915_FORMAT_MOD_X_TILED,
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
+};
+
+static const u64 gen12_plane_format_modifiers_rc_ccs[] = {
+	I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS,
+	I915_FORMAT_MOD_Y_TILED,
+	I915_FORMAT_MOD_X_TILED,
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
+};
+
 static bool g4x_sprite_format_mod_supported(struct drm_plane *_plane,
 					    u32 format, u64 modifier)
 {
@@ -2198,6 +2637,10 @@ static bool snb_sprite_format_mod_supported(struct drm_plane *_plane,
 	switch (format) {
 	case DRM_FORMAT_XRGB8888:
 	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_XRGB16161616F:
+	case DRM_FORMAT_XBGR16161616F:
 	case DRM_FORMAT_YUYV:
 	case DRM_FORMAT_YVYU:
 	case DRM_FORMAT_UYVY:
@@ -2223,6 +2666,7 @@ static bool vlv_sprite_format_mod_supported(struct drm_plane *_plane,
 	}
 
 	switch (format) {
+	case DRM_FORMAT_C8:
 	case DRM_FORMAT_RGB565:
 	case DRM_FORMAT_ABGR8888:
 	case DRM_FORMAT_ARGB8888:
@@ -2230,6 +2674,8 @@ static bool vlv_sprite_format_mod_supported(struct drm_plane *_plane,
 	case DRM_FORMAT_XRGB8888:
 	case DRM_FORMAT_XBGR2101010:
 	case DRM_FORMAT_ABGR2101010:
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_ARGB2101010:
 	case DRM_FORMAT_YUYV:
 	case DRM_FORMAT_YVYU:
 	case DRM_FORMAT_UYVY:
@@ -2274,6 +2720,8 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane,
 	case DRM_FORMAT_RGB565:
 	case DRM_FORMAT_XRGB2101010:
 	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_ARGB2101010:
+	case DRM_FORMAT_ABGR2101010:
 	case DRM_FORMAT_YUYV:
 	case DRM_FORMAT_YVYU:
 	case DRM_FORMAT_UYVY:
@@ -2306,6 +2754,75 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane,
 	}
 }
 
+static bool gen12_plane_supports_mc_ccs(enum plane_id plane_id)
+{
+	return plane_id < PLANE_SPRITE4;
+}
+
+static bool gen12_plane_format_mod_supported(struct drm_plane *_plane,
+					     u32 format, u64 modifier)
+{
+	struct intel_plane *plane = to_intel_plane(_plane);
+
+	switch (modifier) {
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
+		if (!gen12_plane_supports_mc_ccs(plane->id))
+			return false;
+		/* fall through */
+	case DRM_FORMAT_MOD_LINEAR:
+	case I915_FORMAT_MOD_X_TILED:
+	case I915_FORMAT_MOD_Y_TILED:
+	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
+		break;
+	default:
+		return false;
+	}
+
+	switch (format) {
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_ABGR8888:
+		if (is_ccs_modifier(modifier))
+			return true;
+		/* fall through */
+	case DRM_FORMAT_YUYV:
+	case DRM_FORMAT_YVYU:
+	case DRM_FORMAT_UYVY:
+	case DRM_FORMAT_VYUY:
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_P010:
+	case DRM_FORMAT_P012:
+	case DRM_FORMAT_P016:
+		if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS)
+			return true;
+		/* fall through */
+	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_ARGB2101010:
+	case DRM_FORMAT_ABGR2101010:
+	case DRM_FORMAT_XVYU2101010:
+	case DRM_FORMAT_C8:
+	case DRM_FORMAT_XBGR16161616F:
+	case DRM_FORMAT_ABGR16161616F:
+	case DRM_FORMAT_XRGB16161616F:
+	case DRM_FORMAT_ARGB16161616F:
+	case DRM_FORMAT_Y210:
+	case DRM_FORMAT_Y212:
+	case DRM_FORMAT_Y216:
+	case DRM_FORMAT_XVYU12_16161616:
+	case DRM_FORMAT_XVYU16161616:
+		if (modifier == DRM_FORMAT_MOD_LINEAR ||
+		    modifier == I915_FORMAT_MOD_X_TILED ||
+		    modifier == I915_FORMAT_MOD_Y_TILED)
+			return true;
+		/* fall through */
+	default:
+		return false;
+	}
+}
+
 static const struct drm_plane_funcs g4x_sprite_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
@@ -2342,6 +2859,15 @@ static const struct drm_plane_funcs skl_plane_funcs = {
 	.format_mod_supported = skl_plane_format_mod_supported,
 };
 
+static const struct drm_plane_funcs gen12_plane_funcs = {
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
+	.destroy = intel_plane_destroy,
+	.atomic_duplicate_state = intel_plane_duplicate_state,
+	.atomic_destroy_state = intel_plane_destroy_state,
+	.format_mod_supported = gen12_plane_format_mod_supported,
+};
+
 static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv,
 			      enum pipe pipe, enum plane_id plane_id)
 {
@@ -2409,6 +2935,14 @@ static const u32 *icl_get_plane_formats(struct drm_i915_private *dev_priv,
 	}
 }
 
+static const u64 *gen12_get_plane_modifiers(enum plane_id plane_id)
+{
+	if (gen12_plane_supports_mc_ccs(plane_id))
+		return gen12_plane_format_modifiers_mc_ccs;
+	else
+		return gen12_plane_format_modifiers_rc_ccs;
+}
+
 static bool skl_plane_has_ccs(struct drm_i915_private *dev_priv,
 			      enum pipe pipe, enum plane_id plane_id)
 {
@@ -2430,6 +2964,7 @@ struct intel_plane *
 skl_universal_plane_create(struct drm_i915_private *dev_priv,
 			   enum pipe pipe, enum plane_id plane_id)
 {
+	const struct drm_plane_funcs *plane_funcs;
 	struct intel_plane *plane;
 	enum drm_plane_type plane_type;
 	unsigned int supported_rotations;
@@ -2459,8 +2994,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv,
 	plane->disable_plane = skl_disable_plane;
 	plane->get_hw_state = skl_plane_get_hw_state;
 	plane->check_plane = skl_plane_check;
-	if (icl_is_nv12_y_plane(plane_id))
-		plane->update_slave = icl_update_slave;
+	plane->min_cdclk = skl_plane_min_cdclk;
 
 	if (INTEL_GEN(dev_priv) >= 11)
 		formats = icl_get_plane_formats(dev_priv, pipe,
@@ -2473,10 +3007,16 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv,
 						plane_id, &num_formats);
 
 	plane->has_ccs = skl_plane_has_ccs(dev_priv, pipe, plane_id);
-	if (plane->has_ccs)
-		modifiers = skl_plane_format_modifiers_ccs;
-	else
-		modifiers = skl_plane_format_modifiers_noccs;
+	if (INTEL_GEN(dev_priv) >= 12) {
+		modifiers = gen12_get_plane_modifiers(plane_id);
+		plane_funcs = &gen12_plane_funcs;
+	} else {
+		if (plane->has_ccs)
+			modifiers = skl_plane_format_modifiers_ccs;
+		else
+			modifiers = skl_plane_format_modifiers_noccs;
+		plane_funcs = &skl_plane_funcs;
+	}
 
 	if (plane_id == PLANE_PRIMARY)
 		plane_type = DRM_PLANE_TYPE_PRIMARY;
@@ -2486,7 +3026,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv,
 	possible_crtcs = BIT(pipe);
 
 	ret = drm_universal_plane_init(&dev_priv->drm, &plane->base,
-				       possible_crtcs, &skl_plane_funcs,
+				       possible_crtcs, plane_funcs,
 				       formats, num_formats, modifiers,
 				       plane_type,
 				       "plane %d%c", plane_id + 1,
@@ -2519,6 +3059,8 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv,
 					     BIT(DRM_MODE_BLEND_PREMULTI) |
 					     BIT(DRM_MODE_BLEND_COVERAGE));
 
+	drm_plane_create_zpos_immutable_property(&plane->base, plane_id);
+
 	drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs);
 
 	return plane;
@@ -2540,7 +3082,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 	const u64 *modifiers;
 	const u32 *formats;
 	int num_formats;
-	int ret;
+	int ret, zpos;
 
 	if (INTEL_GEN(dev_priv) >= 9)
 		return skl_universal_plane_create(dev_priv, pipe,
@@ -2556,9 +3098,15 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 		plane->disable_plane = vlv_disable_plane;
 		plane->get_hw_state = vlv_plane_get_hw_state;
 		plane->check_plane = vlv_sprite_check;
+		plane->min_cdclk = vlv_plane_min_cdclk;
 
-		formats = vlv_plane_formats;
-		num_formats = ARRAY_SIZE(vlv_plane_formats);
+		if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) {
+			formats = chv_pipe_b_sprite_formats;
+			num_formats = ARRAY_SIZE(chv_pipe_b_sprite_formats);
+		} else {
+			formats = vlv_plane_formats;
+			num_formats = ARRAY_SIZE(vlv_plane_formats);
+		}
 		modifiers = i9xx_plane_format_modifiers;
 
 		plane_funcs = &vlv_sprite_funcs;
@@ -2569,6 +3117,11 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 		plane->get_hw_state = ivb_plane_get_hw_state;
 		plane->check_plane = g4x_sprite_check;
 
+		if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv))
+			plane->min_cdclk = hsw_plane_min_cdclk;
+		else
+			plane->min_cdclk = ivb_sprite_min_cdclk;
+
 		formats = snb_plane_formats;
 		num_formats = ARRAY_SIZE(snb_plane_formats);
 		modifiers = i9xx_plane_format_modifiers;
@@ -2580,6 +3133,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 		plane->disable_plane = g4x_disable_plane;
 		plane->get_hw_state = g4x_plane_get_hw_state;
 		plane->check_plane = g4x_sprite_check;
+		plane->min_cdclk = g4x_sprite_min_cdclk;
 
 		modifiers = i9xx_plane_format_modifiers;
 		if (IS_GEN(dev_priv, 6)) {
@@ -2630,6 +3184,9 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 					  DRM_COLOR_YCBCR_BT709,
 					  DRM_COLOR_YCBCR_LIMITED_RANGE);
 
+	zpos = sprite + 1;
+	drm_plane_create_zpos_immutable_property(&plane->base, zpos);
+
 	drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs);
 
 	return plane;
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.h b/drivers/gpu/drm/i915/display/intel_sprite.h
index 093a2d156f1e..5eeaa92420d1 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.h
+++ b/drivers/gpu/drm/i915/display/intel_sprite.h
@@ -17,7 +17,6 @@ struct drm_i915_private;
 struct intel_crtc_state;
 struct intel_plane_state;
 
-bool is_planar_yuv_format(u32 pixelformat);
 int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode,
 			     int usecs);
 struct intel_plane *intel_sprite_plane_create(struct drm_i915_private *dev_priv,
@@ -50,4 +49,11 @@ static inline u8 icl_hdr_plane_mask(void)
 
 bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id);
 
+int ivb_plane_min_cdclk(const struct intel_crtc_state *crtc_state,
+			const struct intel_plane_state *plane_state);
+int hsw_plane_min_cdclk(const struct intel_crtc_state *crtc_state,
+			const struct intel_plane_state *plane_state);
+int vlv_plane_min_cdclk(const struct intel_crtc_state *crtc_state,
+			const struct intel_plane_state *plane_state);
+
 #endif /* __INTEL_SPRITE_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index 85743a43bee2..7773169b7331 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -23,32 +23,38 @@ static const char *tc_port_mode_name(enum tc_port_mode mode)
 	return names[mode];
 }
 
-static bool has_modular_fia(struct drm_i915_private *i915)
-{
-	if (!INTEL_INFO(i915)->display.has_modular_fia)
-		return false;
-
-	return intel_uncore_read(&i915->uncore,
-				 PORT_TX_DFLEXDPSP(FIA1)) & MODULAR_FIA_MASK;
-}
-
-static enum phy_fia tc_port_to_fia(struct drm_i915_private *i915,
-				   enum tc_port tc_port)
+static void
+tc_port_load_fia_params(struct drm_i915_private *i915,
+			struct intel_digital_port *dig_port)
 {
-	if (!has_modular_fia(i915))
-		return FIA1;
+	enum port port = dig_port->base.port;
+	enum tc_port tc_port = intel_port_to_tc(i915, port);
+	u32 modular_fia;
+
+	if (INTEL_INFO(i915)->display.has_modular_fia) {
+		modular_fia = intel_uncore_read(&i915->uncore,
+						PORT_TX_DFLEXDPSP(FIA1));
+		modular_fia &= MODULAR_FIA_MASK;
+	} else {
+		modular_fia = 0;
+	}
 
 	/*
 	 * Each Modular FIA instance houses 2 TC ports. In SOC that has more
 	 * than two TC ports, there are multiple instances of Modular FIA.
 	 */
-	return tc_port / 2;
+	if (modular_fia) {
+		dig_port->tc_phy_fia = tc_port / 2;
+		dig_port->tc_phy_fia_idx = tc_port % 2;
+	} else {
+		dig_port->tc_phy_fia = FIA1;
+		dig_port->tc_phy_fia_idx = tc_port;
+	}
 }
 
 u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port)
 {
 	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
-	enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port);
 	struct intel_uncore *uncore = &i915->uncore;
 	u32 lane_mask;
 
@@ -57,8 +63,23 @@ u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port)
 
 	WARN_ON(lane_mask == 0xffffffff);
 
-	return (lane_mask & DP_LANE_ASSIGNMENT_MASK(tc_port)) >>
-	       DP_LANE_ASSIGNMENT_SHIFT(tc_port);
+	lane_mask &= DP_LANE_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx);
+	return lane_mask >> DP_LANE_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx);
+}
+
+u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port)
+{
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+	struct intel_uncore *uncore = &i915->uncore;
+	u32 pin_mask;
+
+	pin_mask = intel_uncore_read(uncore,
+				     PORT_TX_DFLEXPA1(dig_port->tc_phy_fia));
+
+	WARN_ON(pin_mask == 0xffffffff);
+
+	return (pin_mask & DP_PIN_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx)) >>
+	       DP_PIN_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx);
 }
 
 int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port)
@@ -95,7 +116,6 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port,
 				      int required_lanes)
 {
 	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
-	enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port);
 	bool lane_reversal = dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL;
 	struct intel_uncore *uncore = &i915->uncore;
 	u32 val;
@@ -104,19 +124,21 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port,
 
 	val = intel_uncore_read(uncore,
 				PORT_TX_DFLEXDPMLE1(dig_port->tc_phy_fia));
-	val &= ~DFLEXDPMLE1_DPMLETC_MASK(tc_port);
+	val &= ~DFLEXDPMLE1_DPMLETC_MASK(dig_port->tc_phy_fia_idx);
 
 	switch (required_lanes) {
 	case 1:
-		val |= lane_reversal ? DFLEXDPMLE1_DPMLETC_ML3(tc_port) :
-			DFLEXDPMLE1_DPMLETC_ML0(tc_port);
+		val |= lane_reversal ?
+			DFLEXDPMLE1_DPMLETC_ML3(dig_port->tc_phy_fia_idx) :
+			DFLEXDPMLE1_DPMLETC_ML0(dig_port->tc_phy_fia_idx);
 		break;
 	case 2:
-		val |= lane_reversal ? DFLEXDPMLE1_DPMLETC_ML3_2(tc_port) :
-			DFLEXDPMLE1_DPMLETC_ML1_0(tc_port);
+		val |= lane_reversal ?
+			DFLEXDPMLE1_DPMLETC_ML3_2(dig_port->tc_phy_fia_idx) :
+			DFLEXDPMLE1_DPMLETC_ML1_0(dig_port->tc_phy_fia_idx);
 		break;
 	case 4:
-		val |= DFLEXDPMLE1_DPMLETC_ML3_0(tc_port);
+		val |= DFLEXDPMLE1_DPMLETC_ML3_0(dig_port->tc_phy_fia_idx);
 		break;
 	default:
 		MISSING_CASE(required_lanes);
@@ -164,9 +186,9 @@ static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port)
 		return mask;
 	}
 
-	if (val & TC_LIVE_STATE_TBT(tc_port))
+	if (val & TC_LIVE_STATE_TBT(dig_port->tc_phy_fia_idx))
 		mask |= BIT(TC_PORT_TBT_ALT);
-	if (val & TC_LIVE_STATE_TC(tc_port))
+	if (val & TC_LIVE_STATE_TC(dig_port->tc_phy_fia_idx))
 		mask |= BIT(TC_PORT_DP_ALT);
 
 	if (intel_uncore_read(uncore, SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port))
@@ -182,7 +204,6 @@ static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port)
 static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port)
 {
 	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
-	enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port);
 	struct intel_uncore *uncore = &i915->uncore;
 	u32 val;
 
@@ -194,14 +215,13 @@ static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port)
 		return false;
 	}
 
-	return val & DP_PHY_MODE_STATUS_COMPLETED(tc_port);
+	return val & DP_PHY_MODE_STATUS_COMPLETED(dig_port->tc_phy_fia_idx);
 }
 
 static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port,
 				     bool enable)
 {
 	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
-	enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port);
 	struct intel_uncore *uncore = &i915->uncore;
 	u32 val;
 
@@ -215,9 +235,9 @@ static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port,
 		return false;
 	}
 
-	val &= ~DP_PHY_MODE_STATUS_NOT_SAFE(tc_port);
+	val &= ~DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx);
 	if (!enable)
-		val |= DP_PHY_MODE_STATUS_NOT_SAFE(tc_port);
+		val |= DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx);
 
 	intel_uncore_write(uncore,
 			   PORT_TX_DFLEXDPCSSS(dig_port->tc_phy_fia), val);
@@ -232,7 +252,6 @@ static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port,
 static bool icl_tc_phy_is_in_safe_mode(struct intel_digital_port *dig_port)
 {
 	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
-	enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port);
 	struct intel_uncore *uncore = &i915->uncore;
 	u32 val;
 
@@ -244,7 +263,7 @@ static bool icl_tc_phy_is_in_safe_mode(struct intel_digital_port *dig_port)
 		return true;
 	}
 
-	return !(val & DP_PHY_MODE_STATUS_NOT_SAFE(tc_port));
+	return !(val & DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx));
 }
 
 /*
@@ -540,5 +559,5 @@ void intel_tc_port_init(struct intel_digital_port *dig_port, bool is_legacy)
 	mutex_init(&dig_port->tc_lock);
 	dig_port->tc_legacy_port = is_legacy;
 	dig_port->tc_link_refcount = 0;
-	dig_port->tc_phy_fia = tc_port_to_fia(i915, tc_port);
+	tc_port_load_fia_params(i915, dig_port);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_tc.h b/drivers/gpu/drm/i915/display/intel_tc.h
index 783d75531435..463f1b3c836f 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.h
+++ b/drivers/gpu/drm/i915/display/intel_tc.h
@@ -13,6 +13,7 @@ struct intel_digital_port;
 
 bool intel_tc_port_connected(struct intel_digital_port *dig_port);
 u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port);
+u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port);
 int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port);
 void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port,
 				      int required_lanes);
diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c
index b70221f5112a..c75e0ceecee6 100644
--- a/drivers/gpu/drm/i915/display/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -898,7 +898,7 @@ static struct intel_tv *enc_to_tv(struct intel_encoder *encoder)
 	return container_of(encoder, struct intel_tv, base);
 }
 
-static struct intel_tv *intel_attached_tv(struct drm_connector *connector)
+static struct intel_tv *intel_attached_tv(struct intel_connector *connector)
 {
 	return enc_to_tv(intel_attached_encoder(connector));
 }
@@ -924,7 +924,7 @@ intel_enable_tv(struct intel_encoder *encoder,
 
 	/* Prevents vblank waits from timing out in intel_tv_detect_type() */
 	intel_wait_for_vblank(dev_priv,
-			      to_intel_crtc(pipe_config->base.crtc)->pipe);
+			      to_intel_crtc(pipe_config->uapi.crtc)->pipe);
 
 	I915_WRITE(TV_CTL, I915_READ(TV_CTL) | TV_ENC_ENABLE);
 }
@@ -961,11 +961,10 @@ intel_tv_mode_valid(struct drm_connector *connector,
 		return MODE_CLOCK_HIGH;
 
 	/* Ensure TV refresh is close to desired refresh */
-	if (tv_mode && abs(tv_mode->refresh - drm_mode_vrefresh(mode) * 1000)
-				< 1000)
-		return MODE_OK;
+	if (abs(tv_mode->refresh - drm_mode_vrefresh(mode) * 1000) >= 1000)
+		return MODE_CLOCK_RANGE;
 
-	return MODE_CLOCK_RANGE;
+	return MODE_OK;
 }
 
 static int
@@ -1086,7 +1085,7 @@ intel_tv_get_config(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct drm_display_mode *adjusted_mode =
-		&pipe_config->base.adjusted_mode;
+		&pipe_config->hw.adjusted_mode;
 	struct drm_display_mode mode = {};
 	u32 tv_ctl, hctl1, hctl3, vctl1, vctl2, tmp;
 	struct tv_mode tv_mode = {};
@@ -1189,7 +1188,7 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 		to_intel_tv_connector_state(conn_state);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
 	struct drm_display_mode *adjusted_mode =
-		&pipe_config->base.adjusted_mode;
+		&pipe_config->hw.adjusted_mode;
 	int hdisplay = adjusted_mode->crtc_hdisplay;
 	int vdisplay = adjusted_mode->crtc_vdisplay;
 
@@ -1418,7 +1417,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 				const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	struct intel_tv *intel_tv = enc_to_tv(encoder);
 	const struct intel_tv_connector_state *tv_conn_state =
 		to_intel_tv_connector_state(conn_state);
@@ -1528,7 +1527,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 			   ((video_levels->black << TV_BLACK_LEVEL_SHIFT) |
 			    (video_levels->blank << TV_BLANK_LEVEL_SHIFT)));
 
-	assert_pipe_disabled(dev_priv, intel_crtc->pipe);
+	assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder);
 
 	/* Filter ctl must be set before TV_WIN_SIZE */
 	tv_filter_ctl = TV_AUTO_SCALE;
@@ -1663,7 +1662,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv,
  */
 static void intel_tv_find_better_format(struct drm_connector *connector)
 {
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
+	struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector));
 	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int i;
 
@@ -1690,7 +1689,7 @@ intel_tv_detect(struct drm_connector *connector,
 		struct drm_modeset_acquire_ctx *ctx,
 		bool force)
 {
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
+	struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector));
 	enum drm_connector_status status;
 	int type;
 
@@ -1702,7 +1701,7 @@ intel_tv_detect(struct drm_connector *connector,
 		struct intel_load_detect_pipe tmp;
 		int ret;
 
-		ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx);
+		ret = intel_get_load_detect_pipe(connector, &tmp, ctx);
 		if (ret < 0)
 			return ret;
 
@@ -1948,9 +1947,8 @@ intel_tv_init(struct drm_i915_private *dev_priv)
 	intel_encoder->type = INTEL_OUTPUT_TVOUT;
 	intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER;
 	intel_encoder->port = PORT_NONE;
-	intel_encoder->crtc_mask = (1 << 0) | (1 << 1);
+	intel_encoder->pipe_mask = ~0;
 	intel_encoder->cloneable = 0;
-	intel_encoder->base.possible_crtcs = ((1 << 0) | (1 << 1));
 	intel_tv->type = DRM_MODE_CONNECTOR_Unknown;
 
 	/* BIOS margin values */
diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h
index dfcd156b5094..4d0c23b29248 100644
--- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h
+++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h
@@ -114,6 +114,8 @@ enum bdb_block_id {
 	BDB_LVDS_POWER			= 44,
 	BDB_MIPI_CONFIG			= 52,
 	BDB_MIPI_SEQUENCE		= 53,
+	BDB_COMPRESSION_PARAMETERS	= 56,
+	BDB_GENERIC_DTD			= 58,
 	BDB_SKIP			= 254, /* VBIOS private block, ignore */
 };
 
@@ -291,6 +293,8 @@ struct bdb_general_features {
 #define DVO_PORT_HDMIE		12				/* 193 */
 #define DVO_PORT_DPF		13				/* N/A */
 #define DVO_PORT_HDMIF		14				/* N/A */
+#define DVO_PORT_DPG		15
+#define DVO_PORT_HDMIG		16
 #define DVO_PORT_MIPIA		21				/* 171 */
 #define DVO_PORT_MIPIB		22				/* 171 */
 #define DVO_PORT_MIPIC		23				/* 171 */
@@ -325,6 +329,7 @@ enum vbt_gmbus_ddi {
 #define DP_AUX_D 0x30
 #define DP_AUX_E 0x50
 #define DP_AUX_F 0x60
+#define DP_AUX_G 0x70
 
 #define VBT_DP_MAX_LINK_RATE_HBR3	0
 #define VBT_DP_MAX_LINK_RATE_HBR2	1
@@ -364,7 +369,7 @@ struct child_device_config {
 			u16 dtd_buf_ptr;			/* 161 */
 			u8 edidless_efp:1;			/* 161 */
 			u8 compression_enable:1;		/* 198 */
-			u8 compression_method:1;		/* 198 */
+			u8 compression_method_cps:1;		/* 198 */
 			u8 ganged_edp:1;			/* 202 */
 			u8 reserved0:4;
 			u8 compression_structure_index:4;	/* 198 */
@@ -789,6 +794,35 @@ struct bdb_lfp_backlight_data {
 } __packed;
 
 /*
+ * Block 44 - LFP Power Conservation Features Block
+ */
+
+struct als_data_entry {
+	u16 backlight_adjust;
+	u16 lux;
+} __packed;
+
+struct agressiveness_profile_entry {
+	u8 dpst_agressiveness : 4;
+	u8 lace_agressiveness : 4;
+} __packed;
+
+struct bdb_lfp_power {
+	u8 lfp_feature_bits;
+	struct als_data_entry als[5];
+	u8 lace_aggressiveness_profile;
+	u16 dpst;
+	u16 psr;
+	u16 drrs;
+	u16 lace_support;
+	u16 adt;
+	u16 dmrrs;
+	u16 adb;
+	u16 lace_enabled_status;
+	struct agressiveness_profile_entry aggressivenes[16];
+} __packed;
+
+/*
  * Block 52 - MIPI Configuration Block
  */
 
@@ -808,4 +842,85 @@ struct bdb_mipi_sequence {
 	u8 data[0]; /* up to 6 variable length blocks */
 } __packed;
 
+/*
+ * Block 56 - Compression Parameters
+ */
+
+#define VBT_RC_BUFFER_BLOCK_SIZE_1KB	0
+#define VBT_RC_BUFFER_BLOCK_SIZE_4KB	1
+#define VBT_RC_BUFFER_BLOCK_SIZE_16KB	2
+#define VBT_RC_BUFFER_BLOCK_SIZE_64KB	3
+
+#define VBT_DSC_LINE_BUFFER_DEPTH(vbt_value)	((vbt_value) + 8) /* bits */
+#define VBT_DSC_MAX_BPP(vbt_value)		(6 + (vbt_value) * 2)
+
+struct dsc_compression_parameters_entry {
+	u8 version_major:4;
+	u8 version_minor:4;
+
+	u8 rc_buffer_block_size:2;
+	u8 reserved1:6;
+
+	/*
+	 * Buffer size in bytes:
+	 *
+	 * 4 ^ rc_buffer_block_size * 1024 * (rc_buffer_size + 1) bytes
+	 */
+	u8 rc_buffer_size;
+	u32 slices_per_line;
+
+	u8 line_buffer_depth:4;
+	u8 reserved2:4;
+
+	/* Flag Bits 1 */
+	u8 block_prediction_enable:1;
+	u8 reserved3:7;
+
+	u8 max_bpp; /* mapping */
+
+	/* Color depth capabilities */
+	u8 reserved4:1;
+	u8 support_8bpc:1;
+	u8 support_10bpc:1;
+	u8 support_12bpc:1;
+	u8 reserved5:4;
+
+	u16 slice_height;
+} __packed;
+
+struct bdb_compression_parameters {
+	u16 entry_size;
+	struct dsc_compression_parameters_entry data[16];
+} __packed;
+
+/*
+ * Block 58 - Generic DTD Block
+ */
+
+struct generic_dtd_entry {
+	u32 pixel_clock;
+	u16 hactive;
+	u16 hblank;
+	u16 hfront_porch;
+	u16 hsync;
+	u16 vactive;
+	u16 vblank;
+	u16 vfront_porch;
+	u16 vsync;
+	u16 width_mm;
+	u16 height_mm;
+
+	/* Flags */
+	u8 rsvd_flags:6;
+	u8 vsync_positive_polarity:1;
+	u8 hsync_positive_polarity:1;
+
+	u8 rsvd[3];
+} __packed;
+
+struct bdb_generic_dtd {
+	u16 gdtd_size;
+	struct generic_dtd_entry dtd[];	/* up to 24 DTD's */
+} __packed;
+
 #endif /* _INTEL_VBT_DEFS_H_ */
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
index d4fb7f16f9f6..9e6aaa302e40 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -10,6 +10,7 @@
 
 #include "i915_drv.h"
 #include "intel_display_types.h"
+#include "intel_dsi.h"
 #include "intel_vdsc.h"
 
 enum ROW_INDEX_BPP {
@@ -30,10 +31,8 @@ enum COLUMN_INDEX_BPC {
 	MAX_COLUMN_INDEX
 };
 
-#define DSC_SUPPORTED_VERSION_MIN		1
-
 /* From DSC_v1.11 spec, rc_parameter_Set syntax element typically constant */
-static u16 rc_buf_thresh[] = {
+static const u16 rc_buf_thresh[] = {
 	896, 1792, 2688, 3584, 4480, 5376, 6272, 6720, 7168, 7616,
 	7744, 7872, 8000, 8064
 };
@@ -53,7 +52,7 @@ struct rc_parameters {
  * Selected Rate Control Related Parameter Recommended Values
  * from DSC_v1.11 spec & C Model release: DSC_model_20161212
  */
-static struct rc_parameters rc_params[][MAX_COLUMN_INDEX] = {
+static const struct rc_parameters rc_parameters[][MAX_COLUMN_INDEX] = {
 {
 	/* 6BPP/8BPC */
 	{ 768, 15, 6144, 3, 13, 11, 11, {
@@ -319,63 +318,84 @@ static int get_column_index_for_rc_params(u8 bits_per_component)
 	}
 }
 
-int intel_dp_compute_dsc_params(struct intel_dp *intel_dp,
-				struct intel_crtc_state *pipe_config)
+static const struct rc_parameters *get_rc_params(u16 compressed_bpp,
+						 u8 bits_per_component)
+{
+	int row_index, column_index;
+
+	row_index = get_row_index_for_rc_params(compressed_bpp);
+	if (row_index < 0)
+		return NULL;
+
+	column_index = get_column_index_for_rc_params(bits_per_component);
+	if (column_index < 0)
+		return NULL;
+
+	return &rc_parameters[row_index][column_index];
+}
+
+bool intel_dsc_source_support(struct intel_encoder *encoder,
+			      const struct intel_crtc_state *crtc_state)
+{
+	const struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
+	enum pipe pipe = crtc->pipe;
+
+	if (!INTEL_INFO(i915)->display.has_dsc)
+		return false;
+
+	/* On TGL, DSC is supported on all Pipes */
+	if (INTEL_GEN(i915) >= 12)
+		return true;
+
+	if (INTEL_GEN(i915) >= 10 &&
+	    (pipe != PIPE_A ||
+	     (cpu_transcoder == TRANSCODER_EDP ||
+	      cpu_transcoder == TRANSCODER_DSI_0 ||
+	      cpu_transcoder == TRANSCODER_DSI_1)))
+		return true;
+
+	return false;
+}
+
+static bool is_pipe_dsc(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_dsc_config *vdsc_cfg = &pipe_config->dp_dsc_cfg;
-	u16 compressed_bpp = pipe_config->dsc_params.compressed_bpp;
+	const struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	const struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
+
+	if (INTEL_GEN(i915) >= 12)
+		return true;
+
+	if (cpu_transcoder == TRANSCODER_EDP ||
+	    cpu_transcoder == TRANSCODER_DSI_0 ||
+	    cpu_transcoder == TRANSCODER_DSI_1)
+		return false;
+
+	/* There's no pipe A DSC engine on ICL */
+	WARN_ON(crtc->pipe == PIPE_A);
+
+	return true;
+}
+
+int intel_dsc_compute_params(struct intel_encoder *encoder,
+			     struct intel_crtc_state *pipe_config)
+{
+	struct drm_dsc_config *vdsc_cfg = &pipe_config->dsc.config;
+	u16 compressed_bpp = pipe_config->dsc.compressed_bpp;
+	const struct rc_parameters *rc_params;
 	u8 i = 0;
-	int row_index = 0;
-	int column_index = 0;
-	u8 line_buf_depth = 0;
 
-	vdsc_cfg->pic_width = pipe_config->base.adjusted_mode.crtc_hdisplay;
-	vdsc_cfg->pic_height = pipe_config->base.adjusted_mode.crtc_vdisplay;
+	vdsc_cfg->pic_width = pipe_config->hw.adjusted_mode.crtc_hdisplay;
+	vdsc_cfg->pic_height = pipe_config->hw.adjusted_mode.crtc_vdisplay;
 	vdsc_cfg->slice_width = DIV_ROUND_UP(vdsc_cfg->pic_width,
-					     pipe_config->dsc_params.slice_count);
-	/*
-	 * Slice Height of 8 works for all currently available panels. So start
-	 * with that if pic_height is an integral multiple of 8.
-	 * Eventually add logic to try multiple slice heights.
-	 */
-	if (vdsc_cfg->pic_height % 8 == 0)
-		vdsc_cfg->slice_height = 8;
-	else if (vdsc_cfg->pic_height % 4 == 0)
-		vdsc_cfg->slice_height = 4;
-	else
-		vdsc_cfg->slice_height = 2;
-
-	/* Values filled from DSC Sink DPCD */
-	vdsc_cfg->dsc_version_major =
-		(intel_dp->dsc_dpcd[DP_DSC_REV - DP_DSC_SUPPORT] &
-		 DP_DSC_MAJOR_MASK) >> DP_DSC_MAJOR_SHIFT;
-	vdsc_cfg->dsc_version_minor =
-		min(DSC_SUPPORTED_VERSION_MIN,
-		    (intel_dp->dsc_dpcd[DP_DSC_REV - DP_DSC_SUPPORT] &
-		     DP_DSC_MINOR_MASK) >> DP_DSC_MINOR_SHIFT);
-
-	vdsc_cfg->convert_rgb = intel_dp->dsc_dpcd[DP_DSC_DEC_COLOR_FORMAT_CAP - DP_DSC_SUPPORT] &
-		DP_DSC_RGB;
-
-	line_buf_depth = drm_dp_dsc_sink_line_buf_depth(intel_dp->dsc_dpcd);
-	if (!line_buf_depth) {
-		DRM_DEBUG_KMS("DSC Sink Line Buffer Depth invalid\n");
-		return -EINVAL;
-	}
-	if (vdsc_cfg->dsc_version_minor == 2)
-		vdsc_cfg->line_buf_depth = (line_buf_depth == DSC_1_2_MAX_LINEBUF_DEPTH_BITS) ?
-			DSC_1_2_MAX_LINEBUF_DEPTH_VAL : line_buf_depth;
-	else
-		vdsc_cfg->line_buf_depth = (line_buf_depth > DSC_1_1_MAX_LINEBUF_DEPTH_BITS) ?
-			DSC_1_1_MAX_LINEBUF_DEPTH_BITS : line_buf_depth;
+					     pipe_config->dsc.slice_count);
 
 	/* Gen 11 does not support YCbCr */
 	vdsc_cfg->simple_422 = false;
 	/* Gen 11 does not support VBR */
 	vdsc_cfg->vbr_enable = false;
-	vdsc_cfg->block_pred_enable =
-			intel_dp->dsc_dpcd[DP_DSC_BLK_PREDICTION_SUPPORT - DP_DSC_SUPPORT] &
-		DP_DSC_BLK_PREDICTION_IS_SUPPORTED;
 
 	/* Gen 11 only supports integral values of bpp */
 	vdsc_cfg->bits_per_pixel = compressed_bpp << 4;
@@ -399,39 +419,29 @@ int intel_dp_compute_dsc_params(struct intel_dp *intel_dp,
 		vdsc_cfg->rc_buf_thresh[13] = 0x7D;
 	}
 
-	row_index = get_row_index_for_rc_params(compressed_bpp);
-	column_index =
-		get_column_index_for_rc_params(vdsc_cfg->bits_per_component);
-
-	if (row_index < 0 || column_index < 0)
+	rc_params = get_rc_params(compressed_bpp, vdsc_cfg->bits_per_component);
+	if (!rc_params)
 		return -EINVAL;
 
-	vdsc_cfg->first_line_bpg_offset =
-		rc_params[row_index][column_index].first_line_bpg_offset;
-	vdsc_cfg->initial_xmit_delay =
-		rc_params[row_index][column_index].initial_xmit_delay;
-	vdsc_cfg->initial_offset =
-		rc_params[row_index][column_index].initial_offset;
-	vdsc_cfg->flatness_min_qp =
-		rc_params[row_index][column_index].flatness_min_qp;
-	vdsc_cfg->flatness_max_qp =
-		rc_params[row_index][column_index].flatness_max_qp;
-	vdsc_cfg->rc_quant_incr_limit0 =
-		rc_params[row_index][column_index].rc_quant_incr_limit0;
-	vdsc_cfg->rc_quant_incr_limit1 =
-		rc_params[row_index][column_index].rc_quant_incr_limit1;
+	vdsc_cfg->first_line_bpg_offset = rc_params->first_line_bpg_offset;
+	vdsc_cfg->initial_xmit_delay = rc_params->initial_xmit_delay;
+	vdsc_cfg->initial_offset = rc_params->initial_offset;
+	vdsc_cfg->flatness_min_qp = rc_params->flatness_min_qp;
+	vdsc_cfg->flatness_max_qp = rc_params->flatness_max_qp;
+	vdsc_cfg->rc_quant_incr_limit0 = rc_params->rc_quant_incr_limit0;
+	vdsc_cfg->rc_quant_incr_limit1 = rc_params->rc_quant_incr_limit1;
 
 	for (i = 0; i < DSC_NUM_BUF_RANGES; i++) {
 		vdsc_cfg->rc_range_params[i].range_min_qp =
-			rc_params[row_index][column_index].rc_range_params[i].range_min_qp;
+			rc_params->rc_range_params[i].range_min_qp;
 		vdsc_cfg->rc_range_params[i].range_max_qp =
-			rc_params[row_index][column_index].rc_range_params[i].range_max_qp;
+			rc_params->rc_range_params[i].range_max_qp;
 		/*
 		 * Range BPG Offset uses 2's complement and is only a 6 bits. So
 		 * mask it to get only 6 bits.
 		 */
 		vdsc_cfg->rc_range_params[i].range_bpg_offset =
-			rc_params[row_index][column_index].rc_range_params[i].range_bpg_offset &
+			rc_params->rc_range_params[i].range_bpg_offset &
 			DSC_RANGE_BPG_OFFSET_MASK;
 	}
 
@@ -453,45 +463,46 @@ int intel_dp_compute_dsc_params(struct intel_dp *intel_dp,
 	vdsc_cfg->initial_scale_value = (vdsc_cfg->rc_model_size << 3) /
 		(vdsc_cfg->rc_model_size - vdsc_cfg->initial_offset);
 
-	return drm_dsc_compute_rc_parameters(vdsc_cfg);
+	return 0;
 }
 
 enum intel_display_power_domain
 intel_dsc_power_domain(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *i915 = to_i915(crtc_state->base.crtc->dev);
-	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
 
 	/*
-	 * On ICL VDSC/joining for eDP transcoder uses a separate power well,
-	 * PW2. This requires POWER_DOMAIN_TRANSCODER_VDSC_PW2 power domain.
-	 * For any other transcoder, VDSC/joining uses the power well associated
-	 * with the pipe/transcoder in use. Hence another reference on the
-	 * transcoder power domain will suffice.
+	 * VDSC/joining uses a separate power well, PW2, and requires
+	 * POWER_DOMAIN_TRANSCODER_VDSC_PW2 power domain in two cases:
 	 *
-	 * On TGL we have the same mapping, but for transcoder A (the special
-	 * TRANSCODER_EDP is gone).
+	 *  - ICL eDP/DSI transcoder
+	 *  - TGL pipe A
+	 *
+	 * For any other pipe, VDSC/joining uses the power well associated with
+	 * the pipe in use. Hence another reference on the pipe power domain
+	 * will suffice. (Except no VDSC/joining on ICL pipe A.)
 	 */
-	if (INTEL_GEN(i915) >= 12 && cpu_transcoder == TRANSCODER_A)
-		return POWER_DOMAIN_TRANSCODER_VDSC_PW2;
-	else if (cpu_transcoder == TRANSCODER_EDP)
+	if (INTEL_GEN(i915) >= 12 && pipe == PIPE_A)
 		return POWER_DOMAIN_TRANSCODER_VDSC_PW2;
+	else if (is_pipe_dsc(crtc_state))
+		return POWER_DOMAIN_PIPE(pipe);
 	else
-		return POWER_DOMAIN_TRANSCODER(cpu_transcoder);
+		return POWER_DOMAIN_TRANSCODER_VDSC_PW2;
 }
 
-static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
-						const struct intel_crtc_state *crtc_state)
+static void intel_dsc_pps_configure(struct intel_encoder *encoder,
+				    const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	const struct drm_dsc_config *vdsc_cfg = &crtc_state->dp_dsc_cfg;
+	const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
 	enum pipe pipe = crtc->pipe;
-	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
 	u32 pps_val = 0;
 	u32 rc_buf_thresh_dword[4];
 	u32 rc_range_params_dword[8];
-	u8 num_vdsc_instances = (crtc_state->dsc_params.dsc_split) ? 2 : 1;
+	u8 num_vdsc_instances = (crtc_state->dsc.dsc_split) ? 2 : 1;
 	int i = 0;
 
 	/* Populate PICTURE_PARAMETER_SET_0 registers */
@@ -508,17 +519,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 	if (vdsc_cfg->vbr_enable)
 		pps_val |= DSC_VBR_ENABLE;
 	DRM_INFO("PPS0 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_0, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_0, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_0(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_0(pipe),
 				   pps_val);
 	}
@@ -527,17 +538,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 	pps_val = 0;
 	pps_val |= DSC_BPP(vdsc_cfg->bits_per_pixel);
 	DRM_INFO("PPS1 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_1, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_1, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_1(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_1(pipe),
 				   pps_val);
 	}
@@ -547,17 +558,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 	pps_val |= DSC_PIC_HEIGHT(vdsc_cfg->pic_height) |
 		DSC_PIC_WIDTH(vdsc_cfg->pic_width / num_vdsc_instances);
 	DRM_INFO("PPS2 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_2, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_2, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_2(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_2(pipe),
 				   pps_val);
 	}
@@ -567,17 +578,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 	pps_val |= DSC_SLICE_HEIGHT(vdsc_cfg->slice_height) |
 		DSC_SLICE_WIDTH(vdsc_cfg->slice_width);
 	DRM_INFO("PPS3 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_3, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_3, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_3(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_3(pipe),
 				   pps_val);
 	}
@@ -587,17 +598,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 	pps_val |= DSC_INITIAL_XMIT_DELAY(vdsc_cfg->initial_xmit_delay) |
 		DSC_INITIAL_DEC_DELAY(vdsc_cfg->initial_dec_delay);
 	DRM_INFO("PPS4 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_4, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_4, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_4(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_4(pipe),
 				   pps_val);
 	}
@@ -607,17 +618,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 	pps_val |= DSC_SCALE_INC_INT(vdsc_cfg->scale_increment_interval) |
 		DSC_SCALE_DEC_INT(vdsc_cfg->scale_decrement_interval);
 	DRM_INFO("PPS5 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_5, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_5, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_5(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_5(pipe),
 				   pps_val);
 	}
@@ -629,17 +640,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 		DSC_FLATNESS_MIN_QP(vdsc_cfg->flatness_min_qp) |
 		DSC_FLATNESS_MAX_QP(vdsc_cfg->flatness_max_qp);
 	DRM_INFO("PPS6 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_6, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_6, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_6(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_6(pipe),
 				   pps_val);
 	}
@@ -649,17 +660,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 	pps_val |= DSC_SLICE_BPG_OFFSET(vdsc_cfg->slice_bpg_offset) |
 		DSC_NFL_BPG_OFFSET(vdsc_cfg->nfl_bpg_offset);
 	DRM_INFO("PPS7 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_7, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_7, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_7(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_7(pipe),
 				   pps_val);
 	}
@@ -669,17 +680,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 	pps_val |= DSC_FINAL_OFFSET(vdsc_cfg->final_offset) |
 		DSC_INITIAL_OFFSET(vdsc_cfg->initial_offset);
 	DRM_INFO("PPS8 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_8, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_8, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_8(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_8(pipe),
 				   pps_val);
 	}
@@ -689,17 +700,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 	pps_val |= DSC_RC_MODEL_SIZE(DSC_RC_MODEL_SIZE_CONST) |
 		DSC_RC_EDGE_FACTOR(DSC_RC_EDGE_FACTOR_CONST);
 	DRM_INFO("PPS9 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_9, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_9, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_9(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_9(pipe),
 				   pps_val);
 	}
@@ -711,17 +722,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 		DSC_RC_TARGET_OFF_HIGH(DSC_RC_TGT_OFFSET_HI_CONST) |
 		DSC_RC_TARGET_OFF_LOW(DSC_RC_TGT_OFFSET_LO_CONST);
 	DRM_INFO("PPS10 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_10, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_10, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_10(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_10(pipe),
 				   pps_val);
 	}
@@ -734,17 +745,17 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 		DSC_SLICE_ROW_PER_FRAME(vdsc_cfg->pic_height /
 					vdsc_cfg->slice_height);
 	DRM_INFO("PPS16 = 0x%08x\n", pps_val);
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_PICTURE_PARAMETER_SET_16, pps_val);
 		/*
 		 * If 2 VDSC instances are needed, configure PPS for second
 		 * VDSC
 		 */
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(DSCC_PICTURE_PARAMETER_SET_16, pps_val);
 	} else {
 		I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_16(pipe), pps_val);
-		if (crtc_state->dsc_params.dsc_split)
+		if (crtc_state->dsc.dsc_split)
 			I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_16(pipe),
 				   pps_val);
 	}
@@ -758,12 +769,12 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 		DRM_INFO(" RC_BUF_THRESH%d = 0x%08x\n", i,
 			 rc_buf_thresh_dword[i / 4]);
 	}
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_RC_BUF_THRESH_0, rc_buf_thresh_dword[0]);
 		I915_WRITE(DSCA_RC_BUF_THRESH_0_UDW, rc_buf_thresh_dword[1]);
 		I915_WRITE(DSCA_RC_BUF_THRESH_1, rc_buf_thresh_dword[2]);
 		I915_WRITE(DSCA_RC_BUF_THRESH_1_UDW, rc_buf_thresh_dword[3]);
-		if (crtc_state->dsc_params.dsc_split) {
+		if (crtc_state->dsc.dsc_split) {
 			I915_WRITE(DSCC_RC_BUF_THRESH_0,
 				   rc_buf_thresh_dword[0]);
 			I915_WRITE(DSCC_RC_BUF_THRESH_0_UDW,
@@ -782,7 +793,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 			   rc_buf_thresh_dword[2]);
 		I915_WRITE(ICL_DSC0_RC_BUF_THRESH_1_UDW(pipe),
 			   rc_buf_thresh_dword[3]);
-		if (crtc_state->dsc_params.dsc_split) {
+		if (crtc_state->dsc.dsc_split) {
 			I915_WRITE(ICL_DSC1_RC_BUF_THRESH_0(pipe),
 				   rc_buf_thresh_dword[0]);
 			I915_WRITE(ICL_DSC1_RC_BUF_THRESH_0_UDW(pipe),
@@ -807,7 +818,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 		DRM_INFO(" RC_RANGE_PARAM_%d = 0x%08x\n", i,
 			 rc_range_params_dword[i / 2]);
 	}
-	if (cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		I915_WRITE(DSCA_RC_RANGE_PARAMETERS_0,
 			   rc_range_params_dword[0]);
 		I915_WRITE(DSCA_RC_RANGE_PARAMETERS_0_UDW,
@@ -824,7 +835,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 			   rc_range_params_dword[6]);
 		I915_WRITE(DSCA_RC_RANGE_PARAMETERS_3_UDW,
 			   rc_range_params_dword[7]);
-		if (crtc_state->dsc_params.dsc_split) {
+		if (crtc_state->dsc.dsc_split) {
 			I915_WRITE(DSCC_RC_RANGE_PARAMETERS_0,
 				   rc_range_params_dword[0]);
 			I915_WRITE(DSCC_RC_RANGE_PARAMETERS_0_UDW,
@@ -859,7 +870,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 			   rc_range_params_dword[6]);
 		I915_WRITE(ICL_DSC0_RC_RANGE_PARAMETERS_3_UDW(pipe),
 			   rc_range_params_dword[7]);
-		if (crtc_state->dsc_params.dsc_split) {
+		if (crtc_state->dsc.dsc_split) {
 			I915_WRITE(ICL_DSC1_RC_RANGE_PARAMETERS_0(pipe),
 				   rc_range_params_dword[0]);
 			I915_WRITE(ICL_DSC1_RC_RANGE_PARAMETERS_0_UDW(pipe),
@@ -880,12 +891,79 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder,
 	}
 }
 
-static void intel_dp_write_dsc_pps_sdp(struct intel_encoder *encoder,
-				       const struct intel_crtc_state *crtc_state)
+void intel_dsc_get_config(struct intel_encoder *encoder,
+			  struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	enum pipe pipe = crtc->pipe;
+	enum intel_display_power_domain power_domain;
+	intel_wakeref_t wakeref;
+	u32 dss_ctl1, dss_ctl2, val;
+
+	if (!intel_dsc_source_support(encoder, crtc_state))
+		return;
+
+	power_domain = intel_dsc_power_domain(crtc_state);
+
+	wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
+	if (!wakeref)
+		return;
+
+	if (!is_pipe_dsc(crtc_state)) {
+		dss_ctl1 = I915_READ(DSS_CTL1);
+		dss_ctl2 = I915_READ(DSS_CTL2);
+	} else {
+		dss_ctl1 = I915_READ(ICL_PIPE_DSS_CTL1(pipe));
+		dss_ctl2 = I915_READ(ICL_PIPE_DSS_CTL2(pipe));
+	}
+
+	crtc_state->dsc.compression_enable = dss_ctl2 & LEFT_BRANCH_VDSC_ENABLE;
+	if (!crtc_state->dsc.compression_enable)
+		goto out;
+
+	crtc_state->dsc.dsc_split = (dss_ctl2 & RIGHT_BRANCH_VDSC_ENABLE) &&
+		(dss_ctl1 & JOINER_ENABLE);
+
+	/* FIXME: add more state readout as needed */
+
+	/* PPS1 */
+	if (!is_pipe_dsc(crtc_state))
+		val = I915_READ(DSCA_PICTURE_PARAMETER_SET_1);
+	else
+		val = I915_READ(ICL_DSC0_PICTURE_PARAMETER_SET_1(pipe));
+	vdsc_cfg->bits_per_pixel = val;
+	crtc_state->dsc.compressed_bpp = vdsc_cfg->bits_per_pixel >> 4;
+out:
+	intel_display_power_put(dev_priv, power_domain, wakeref);
+}
+
+static void intel_dsc_dsi_pps_write(struct intel_encoder *encoder,
+				    const struct intel_crtc_state *crtc_state)
+{
+	const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
+	struct mipi_dsi_device *dsi;
+	struct drm_dsc_picture_parameter_set pps;
+	enum port port;
+
+	drm_dsc_pps_payload_pack(&pps, vdsc_cfg);
+
+	for_each_dsi_port(port, intel_dsi->ports) {
+		dsi = intel_dsi->dsi_hosts[port]->device;
+
+		mipi_dsi_picture_parameter_set(dsi, &pps);
+		mipi_dsi_compression_mode(dsi, true);
+	}
+}
+
+static void intel_dsc_dp_pps_write(struct intel_encoder *encoder,
+				   const struct intel_crtc_state *crtc_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	const struct drm_dsc_config *vdsc_cfg = &crtc_state->dp_dsc_cfg;
+	const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
 	struct drm_dsc_pps_infoframe dp_dsc_pps_sdp;
 
 	/* Prepare DP SDP PPS header as per DP 1.4 spec, Table 2-123 */
@@ -902,25 +980,28 @@ static void intel_dp_write_dsc_pps_sdp(struct intel_encoder *encoder,
 void intel_dsc_enable(struct intel_encoder *encoder,
 		      const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum pipe pipe = crtc->pipe;
 	i915_reg_t dss_ctl1_reg, dss_ctl2_reg;
 	u32 dss_ctl1_val = 0;
 	u32 dss_ctl2_val = 0;
 
-	if (!crtc_state->dsc_params.compression_enable)
+	if (!crtc_state->dsc.compression_enable)
 		return;
 
 	/* Enable Power wells for VDSC/joining */
 	intel_display_power_get(dev_priv,
 				intel_dsc_power_domain(crtc_state));
 
-	intel_configure_pps_for_dsc_encoder(encoder, crtc_state);
+	intel_dsc_pps_configure(encoder, crtc_state);
 
-	intel_dp_write_dsc_pps_sdp(encoder, crtc_state);
+	if (encoder->type == INTEL_OUTPUT_DSI)
+		intel_dsc_dsi_pps_write(encoder, crtc_state);
+	else
+		intel_dsc_dp_pps_write(encoder, crtc_state);
 
-	if (crtc_state->cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(crtc_state)) {
 		dss_ctl1_reg = DSS_CTL1;
 		dss_ctl2_reg = DSS_CTL2;
 	} else {
@@ -928,7 +1009,7 @@ void intel_dsc_enable(struct intel_encoder *encoder,
 		dss_ctl2_reg = ICL_PIPE_DSS_CTL2(pipe);
 	}
 	dss_ctl2_val |= LEFT_BRANCH_VDSC_ENABLE;
-	if (crtc_state->dsc_params.dsc_split) {
+	if (crtc_state->dsc.dsc_split) {
 		dss_ctl2_val |= RIGHT_BRANCH_VDSC_ENABLE;
 		dss_ctl1_val |= JOINER_ENABLE;
 	}
@@ -938,16 +1019,16 @@ void intel_dsc_enable(struct intel_encoder *encoder,
 
 void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	i915_reg_t dss_ctl1_reg, dss_ctl2_reg;
 	u32 dss_ctl1_val = 0, dss_ctl2_val = 0;
 
-	if (!old_crtc_state->dsc_params.compression_enable)
+	if (!old_crtc_state->dsc.compression_enable)
 		return;
 
-	if (old_crtc_state->cpu_transcoder == TRANSCODER_EDP) {
+	if (!is_pipe_dsc(old_crtc_state)) {
 		dss_ctl1_reg = DSS_CTL1;
 		dss_ctl2_reg = DSS_CTL2;
 	} else {
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h
index 90d3f6017fcb..e56a3254c214 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.h
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.h
@@ -6,15 +6,20 @@
 #ifndef __INTEL_VDSC_H__
 #define __INTEL_VDSC_H__
 
+#include <linux/types.h>
+
 struct intel_encoder;
 struct intel_crtc_state;
-struct intel_dp;
 
+bool intel_dsc_source_support(struct intel_encoder *encoder,
+			      const struct intel_crtc_state *crtc_state);
 void intel_dsc_enable(struct intel_encoder *encoder,
 		      const struct intel_crtc_state *crtc_state);
 void intel_dsc_disable(const struct intel_crtc_state *crtc_state);
-int intel_dp_compute_dsc_params(struct intel_dp *intel_dp,
-				struct intel_crtc_state *pipe_config);
+int intel_dsc_compute_params(struct intel_encoder *encoder,
+			     struct intel_crtc_state *pipe_config);
+void intel_dsc_get_config(struct intel_encoder *encoder,
+			  struct intel_crtc_state *crtc_state);
 enum intel_display_power_domain
 intel_dsc_power_domain(const struct intel_crtc_state *crtc_state);
 
diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c
new file mode 100644
index 000000000000..2ff7293986d4
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_vga.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/pci.h>
+#include <linux/vgaarb.h>
+
+#include <drm/i915_drm.h>
+
+#include "i915_drv.h"
+#include "intel_vga.h"
+
+static i915_reg_t intel_vga_cntrl_reg(struct drm_i915_private *i915)
+{
+	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+		return VLV_VGACNTRL;
+	else if (INTEL_GEN(i915) >= 5)
+		return CPU_VGACNTRL;
+	else
+		return VGACNTRL;
+}
+
+/* Disable the VGA plane that we never use */
+void intel_vga_disable(struct drm_i915_private *dev_priv)
+{
+	struct pci_dev *pdev = dev_priv->drm.pdev;
+	i915_reg_t vga_reg = intel_vga_cntrl_reg(dev_priv);
+	u8 sr1;
+
+	/* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */
+	vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO);
+	outb(SR01, VGA_SR_INDEX);
+	sr1 = inb(VGA_SR_DATA);
+	outb(sr1 | 1 << 5, VGA_SR_DATA);
+	vga_put(pdev, VGA_RSRC_LEGACY_IO);
+	udelay(300);
+
+	I915_WRITE(vga_reg, VGA_DISP_DISABLE);
+	POSTING_READ(vga_reg);
+}
+
+void intel_vga_redisable_power_on(struct drm_i915_private *dev_priv)
+{
+	i915_reg_t vga_reg = intel_vga_cntrl_reg(dev_priv);
+
+	if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) {
+		DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n");
+		intel_vga_disable(dev_priv);
+	}
+}
+
+void intel_vga_redisable(struct drm_i915_private *i915)
+{
+	intel_wakeref_t wakeref;
+
+	/*
+	 * This function can be called both from intel_modeset_setup_hw_state or
+	 * at a very early point in our resume sequence, where the power well
+	 * structures are not yet restored. Since this function is at a very
+	 * paranoid "someone might have enabled VGA while we were not looking"
+	 * level, just check if the power well is enabled instead of trying to
+	 * follow the "don't touch the power well if we don't need it" policy
+	 * the rest of the driver uses.
+	 */
+	wakeref = intel_display_power_get_if_enabled(i915, POWER_DOMAIN_VGA);
+	if (!wakeref)
+		return;
+
+	intel_vga_redisable_power_on(i915);
+
+	intel_display_power_put(i915, POWER_DOMAIN_VGA, wakeref);
+}
+
+void intel_vga_reset_io_mem(struct drm_i915_private *i915)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+
+	/*
+	 * After we re-enable the power well, if we touch VGA register 0x3d5
+	 * we'll get unclaimed register interrupts. This stops after we write
+	 * anything to the VGA MSR register. The vgacon module uses this
+	 * register all the time, so if we unbind our driver and, as a
+	 * consequence, bind vgacon, we'll get stuck in an infinite loop at
+	 * console_unlock(). So make here we touch the VGA MSR register, making
+	 * sure vgacon can keep working normally without triggering interrupts
+	 * and error messages.
+	 */
+	vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO);
+	outb(inb(VGA_MSR_READ), VGA_MSR_WRITE);
+	vga_put(pdev, VGA_RSRC_LEGACY_IO);
+}
+
+static int
+intel_vga_set_state(struct drm_i915_private *i915, bool enable_decode)
+{
+	unsigned int reg = INTEL_GEN(i915) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL;
+	u16 gmch_ctrl;
+
+	if (pci_read_config_word(i915->bridge_dev, reg, &gmch_ctrl)) {
+		DRM_ERROR("failed to read control word\n");
+		return -EIO;
+	}
+
+	if (!!(gmch_ctrl & INTEL_GMCH_VGA_DISABLE) == !enable_decode)
+		return 0;
+
+	if (enable_decode)
+		gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE;
+	else
+		gmch_ctrl |= INTEL_GMCH_VGA_DISABLE;
+
+	if (pci_write_config_word(i915->bridge_dev, reg, gmch_ctrl)) {
+		DRM_ERROR("failed to write control word\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static unsigned int
+intel_vga_set_decode(void *cookie, bool enable_decode)
+{
+	struct drm_i915_private *i915 = cookie;
+
+	intel_vga_set_state(i915, enable_decode);
+
+	if (enable_decode)
+		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
+		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+	else
+		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+}
+
+int intel_vga_register(struct drm_i915_private *i915)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+	int ret;
+
+	/*
+	 * If we have > 1 VGA cards, then we need to arbitrate access to the
+	 * common VGA resources.
+	 *
+	 * If we are a secondary display controller (!PCI_DISPLAY_CLASS_VGA),
+	 * then we do not take part in VGA arbitration and the
+	 * vga_client_register() fails with -ENODEV.
+	 */
+	ret = vga_client_register(pdev, i915, NULL, intel_vga_set_decode);
+	if (ret && ret != -ENODEV)
+		return ret;
+
+	return 0;
+}
+
+void intel_vga_unregister(struct drm_i915_private *i915)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+
+	vga_client_register(pdev, NULL, NULL, NULL);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_vga.h b/drivers/gpu/drm/i915/display/intel_vga.h
new file mode 100644
index 000000000000..ba5b55b917f0
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_vga.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_VGA_H__
+#define __INTEL_VGA_H__
+
+struct drm_i915_private;
+
+void intel_vga_reset_io_mem(struct drm_i915_private *i915);
+void intel_vga_disable(struct drm_i915_private *i915);
+void intel_vga_redisable(struct drm_i915_private *i915);
+void intel_vga_redisable_power_on(struct drm_i915_private *i915);
+int intel_vga_register(struct drm_i915_private *i915);
+void intel_vga_unregister(struct drm_i915_private *i915);
+
+#endif /* __INTEL_VGA_H__ */
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index a71b22bdd95b..daf4fc3dab6f 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -23,7 +23,6 @@
  * Author: Jani Nikula <jani.nikula@intel.com>
  */
 
-#include <linux/gpio/consumer.h>
 #include <linux/slab.h>
 
 #include <drm/drm_atomic_helper.h>
@@ -261,9 +260,9 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder,
 	struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi,
 						   base);
 	struct intel_connector *intel_connector = intel_dsi->attached_connector;
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	const struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	int ret;
 
 	DRM_DEBUG_KMS("\n");
@@ -319,7 +318,7 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder,
 static bool glk_dsi_enable_io(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 tmp;
 	bool cold_boot = false;
@@ -367,7 +366,7 @@ static bool glk_dsi_enable_io(struct intel_encoder *encoder)
 static void glk_dsi_device_ready(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
@@ -438,7 +437,7 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder)
 static void bxt_dsi_device_ready(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
@@ -465,7 +464,7 @@ static void bxt_dsi_device_ready(struct intel_encoder *encoder)
 static void vlv_dsi_device_ready(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
@@ -516,7 +515,7 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder)
 static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
@@ -546,7 +545,7 @@ static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder)
 static void glk_dsi_disable_mipi_io(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 tmp;
 
@@ -579,7 +578,7 @@ static void glk_dsi_clear_device_ready(struct intel_encoder *encoder)
 static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 
 	DRM_DEBUG_KMS("\n");
@@ -624,8 +623,8 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 
 	if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) {
@@ -681,7 +680,7 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 
 	for_each_dsi_port(port, intel_dsi->ports) {
@@ -745,11 +744,11 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *pipe_config,
 				 const struct drm_connector_state *conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
-	struct drm_crtc *crtc = pipe_config->base.crtc;
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
+	struct drm_crtc *crtc = pipe_config->uapi.crtc;
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
+	enum pipe pipe = intel_crtc->pipe;
 	enum port port;
 	u32 val;
 	bool glk_cold_boot = false;
@@ -793,9 +792,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder,
 	if (!IS_GEMINILAKE(dev_priv))
 		intel_dsi_prepare(encoder, pipe_config);
 
-	/* Power on, try both CRC pmic gpio and VBT */
-	if (intel_dsi->gpio_panel)
-		gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1);
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_ON);
 	intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay);
 
@@ -850,7 +846,7 @@ static void intel_dsi_disable(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 
 	DRM_DEBUG_KMS("\n");
@@ -882,16 +878,22 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder)
 }
 
 static void intel_dsi_post_disable(struct intel_encoder *encoder,
-				   const struct intel_crtc_state *pipe_config,
-				   const struct drm_connector_state *conn_state)
+				   const struct intel_crtc_state *old_crtc_state,
+				   const struct drm_connector_state *old_conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
 	DRM_DEBUG_KMS("\n");
 
+	if (IS_GEN9_LP(dev_priv)) {
+		intel_crtc_vblank_off(old_crtc_state);
+
+		skl_scaler_disable(old_crtc_state);
+	}
+
 	if (is_vid_mode(intel_dsi)) {
 		for_each_dsi_port(port, intel_dsi->ports)
 			vlv_dsi_wait_for_fifo_empty(intel_dsi, port);
@@ -939,11 +941,8 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder,
 	/* Assert reset */
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET);
 
-	/* Power off, try both CRC pmic gpio and VBT */
 	intel_dsi_msleep(intel_dsi, intel_dsi->panel_off_delay);
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_OFF);
-	if (intel_dsi->gpio_panel)
-		gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0);
 
 	/*
 	 * FIXME As we do with eDP, just make a note of the time here
@@ -956,7 +955,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
 				   enum pipe *pipe)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	intel_wakeref_t wakeref;
 	enum port port;
 	bool active = false;
@@ -1032,10 +1031,10 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder,
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_display_mode *adjusted_mode =
-					&pipe_config->base.adjusted_mode;
+					&pipe_config->hw.adjusted_mode;
 	struct drm_display_mode *adjusted_mode_sw;
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	unsigned int lane_count = intel_dsi->lane_count;
 	unsigned int bpp, fmt;
 	enum port port;
@@ -1045,7 +1044,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder,
 				crtc_hblank_start_sw, crtc_hblank_end_sw;
 
 	/* FIXME: hw readout should not depend on SW state */
-	adjusted_mode_sw = &crtc->config->base.adjusted_mode;
+	adjusted_mode_sw = &crtc->config->hw.adjusted_mode;
 
 	/*
 	 * Atleast one port is active as encoder->get_config called only if
@@ -1204,7 +1203,7 @@ static void intel_dsi_get_config(struct intel_encoder *encoder,
 	}
 
 	if (pclk) {
-		pipe_config->base.adjusted_mode.crtc_clock = pclk;
+		pipe_config->hw.adjusted_mode.crtc_clock = pclk;
 		pipe_config->port_clock = pclk;
 	}
 }
@@ -1228,7 +1227,7 @@ static void set_dsi_timings(struct drm_encoder *encoder,
 {
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder));
 	enum port port;
 	unsigned int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
 	unsigned int lane_count = intel_dsi->lane_count;
@@ -1315,9 +1314,9 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder,
 	struct drm_encoder *encoder = &intel_encoder->base;
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
-	const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder));
+	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	enum port port;
 	unsigned int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
 	u32 val, tmp;
@@ -1506,7 +1505,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder,
 static void intel_dsi_unprepare(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
@@ -1533,12 +1532,9 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder)
 
 static void intel_dsi_encoder_destroy(struct drm_encoder *encoder)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
-
-	/* dispose of the gpios */
-	if (intel_dsi->gpio_panel)
-		gpiod_put(intel_dsi->gpio_panel);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder));
 
+	intel_dsi_vbt_gpio_cleanup(intel_dsi);
 	intel_encoder_destroy(encoder);
 }
 
@@ -1819,6 +1815,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
 	struct drm_connector *connector;
 	struct drm_display_mode *current_mode, *fixed_mode;
 	enum port port;
+	enum pipe pipe;
 
 	DRM_DEBUG_KMS("\n");
 
@@ -1870,11 +1867,11 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
 	 * port C. BXT isn't limited like this.
 	 */
 	if (IS_GEN9_LP(dev_priv))
-		intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C);
+		intel_encoder->pipe_mask = ~0;
 	else if (port == PORT_A)
-		intel_encoder->crtc_mask = BIT(PIPE_A);
+		intel_encoder->pipe_mask = BIT(PIPE_A);
 	else
-		intel_encoder->crtc_mask = BIT(PIPE_B);
+		intel_encoder->pipe_mask = BIT(PIPE_B);
 
 	if (dev_priv->vbt.dsi.config->dual_link)
 		intel_dsi->ports = BIT(PORT_A) | BIT(PORT_C);
@@ -1917,20 +1914,8 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
 
 	vlv_dphy_param_init(intel_dsi);
 
-	/*
-	 * In case of BYT with CRC PMIC, we need to use GPIO for
-	 * Panel control.
-	 */
-	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
-	    (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC)) {
-		intel_dsi->gpio_panel =
-			gpiod_get(dev->dev, "panel", GPIOD_OUT_HIGH);
-
-		if (IS_ERR(intel_dsi->gpio_panel)) {
-			DRM_ERROR("Failed to own gpio for panel control\n");
-			intel_dsi->gpio_panel = NULL;
-		}
-	}
+	intel_dsi_vbt_gpio_init(intel_dsi,
+				intel_dsi_get_hw_state(intel_encoder, &pipe));
 
 	drm_connector_init(dev, connector, &intel_dsi_connector_funcs,
 			   DRM_MODE_CONNECTOR_DSI);
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c
index 95f39cd0ce02..6b89e67b120f 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c
@@ -117,7 +117,7 @@ int vlv_dsi_pll_compute(struct intel_encoder *encoder,
 			struct intel_crtc_state *config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	int ret;
 	u32 dsi_clk;
 
@@ -255,7 +255,7 @@ u32 vlv_dsi_get_pclk(struct intel_encoder *encoder,
 		     struct intel_crtc_state *config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
 	u32 dsi_clock, pclk;
 	u32 pll_ctl, pll_div;
@@ -321,7 +321,7 @@ u32 bxt_dsi_get_pclk(struct intel_encoder *encoder,
 	u32 pclk;
 	u32 dsi_clk;
 	u32 dsi_ratio;
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
 
@@ -341,7 +341,7 @@ void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port)
 {
 	u32 temp;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 
 	temp = I915_READ(MIPI_CTRL(port));
 	temp &= ~ESCAPE_CLOCK_DIVIDER_MASK;
@@ -455,7 +455,7 @@ int bxt_dsi_pll_compute(struct intel_encoder *encoder,
 			struct intel_crtc_state *config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u8 dsi_ratio, dsi_ratio_min, dsi_ratio_max;
 	u32 dsi_clk;
 
@@ -503,7 +503,7 @@ void bxt_dsi_pll_enable(struct intel_encoder *encoder,
 			const struct intel_crtc_state *config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile
deleted file mode 100644
index 7e73aa587967..000000000000
--- a/drivers/gpu/drm/i915/gem/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
index 3d4f5775a4ba..25235ef630c1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -9,16 +9,16 @@
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
 
-static __always_inline u32 __busy_read_flag(u8 id)
+static __always_inline u32 __busy_read_flag(u16 id)
 {
-	if (id == (u8)I915_ENGINE_CLASS_INVALID)
+	if (id == (u16)I915_ENGINE_CLASS_INVALID)
 		return 0xffff0000u;
 
 	GEM_BUG_ON(id >= 16);
 	return 0x10000u << id;
 }
 
-static __always_inline u32 __busy_write_id(u8 id)
+static __always_inline u32 __busy_write_id(u16 id)
 {
 	/*
 	 * The uABI guarantees an active writer is also amongst the read
@@ -29,14 +29,14 @@ static __always_inline u32 __busy_write_id(u8 id)
 	 * last_read - hence we always set both read and write busy for
 	 * last_write.
 	 */
-	if (id == (u8)I915_ENGINE_CLASS_INVALID)
+	if (id == (u16)I915_ENGINE_CLASS_INVALID)
 		return 0xffffffffu;
 
 	return (id + 1) | __busy_read_flag(id);
 }
 
 static __always_inline unsigned int
-__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
+__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
 {
 	const struct i915_request *rq;
 
@@ -57,7 +57,7 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
 		return 0;
 
 	/* Beware type-expansion follies! */
-	BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
+	BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
 	return flag(rq->engine->uabi_class);
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index b9f504ba3b32..34be4c0ee7c5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -20,33 +20,31 @@ static void __do_clflush(struct drm_i915_gem_object *obj)
 {
 	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
 	drm_clflush_sg(obj->mm.pages);
-	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
+
+	i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
 }
 
 static int clflush_work(struct dma_fence_work *base)
 {
 	struct clflush *clflush = container_of(base, typeof(*clflush), base);
-	struct drm_i915_gem_object *obj = fetch_and_zero(&clflush->obj);
+	struct drm_i915_gem_object *obj = clflush->obj;
 	int err;
 
 	err = i915_gem_object_pin_pages(obj);
 	if (err)
-		goto put;
+		return err;
 
 	__do_clflush(obj);
 	i915_gem_object_unpin_pages(obj);
 
-put:
-	i915_gem_object_put(obj);
-	return err;
+	return 0;
 }
 
 static void clflush_release(struct dma_fence_work *base)
 {
 	struct clflush *clflush = container_of(base, typeof(*clflush), base);
 
-	if (clflush->obj)
-		i915_gem_object_put(clflush->obj);
+	i915_gem_object_put(clflush->obj);
 }
 
 static const struct dma_fence_work_ops clflush_ops = {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index f99920652751..81366aa4812b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -155,7 +155,6 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence,
 static void clear_pages_worker(struct work_struct *work)
 {
 	struct clear_pages_work *w = container_of(work, typeof(*w), work);
-	struct drm_i915_private *i915 = w->ce->engine->i915;
 	struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
 	struct i915_vma *vma = w->sleeve->vma;
 	struct i915_request *rq;
@@ -173,11 +172,9 @@ static void clear_pages_worker(struct work_struct *work)
 	obj->read_domains = I915_GEM_GPU_DOMAINS;
 	obj->write_domain = 0;
 
-	/* XXX: we need to kill this */
-	mutex_lock(&i915->drm.struct_mutex);
 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
 	if (unlikely(err))
-		goto out_unlock;
+		goto out_signal;
 
 	batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
 	if (IS_ERR(batch)) {
@@ -211,7 +208,7 @@ static void clear_pages_worker(struct work_struct *work)
 	 * keep track of the GPU activity within this vma/request, and
 	 * propagate the signal from the request to w->dma.
 	 */
-	err = i915_active_ref(&vma->active, rq->timeline, rq);
+	err = __i915_vma_move_to_active(vma, rq);
 	if (err)
 		goto out_request;
 
@@ -229,8 +226,6 @@ out_batch:
 	intel_emit_vma_release(w->ce, batch);
 out_unpin:
 	i915_vma_unpin(vma);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 out_signal:
 	if (unlikely(err)) {
 		dma_fence_set_error(&w->dma, err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e41fd94ae5a9..a2e57e62af30 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -69,8 +69,13 @@
 
 #include <drm/i915_drm.h>
 
-#include "gt/intel_lrc_reg.h"
+#include "gt/gen6_ppgtt.h"
+#include "gt/intel_context.h"
+#include "gt/intel_engine_heartbeat.h"
+#include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_user.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
 
 #include "i915_gem_context.h"
 #include "i915_globals.h"
@@ -167,95 +172,71 @@ lookup_user_engine(struct i915_gem_context *ctx,
 	return i915_gem_context_get_engine(ctx, idx);
 }
 
-static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
+static struct i915_address_space *
+context_get_vm_rcu(struct i915_gem_context *ctx)
 {
-	unsigned int max;
+	GEM_BUG_ON(!rcu_access_pointer(ctx->vm));
 
-	lockdep_assert_held(&i915->contexts.mutex);
+	do {
+		struct i915_address_space *vm;
 
-	if (INTEL_GEN(i915) >= 12)
-		max = GEN12_MAX_CONTEXT_HW_ID;
-	else if (INTEL_GEN(i915) >= 11)
-		max = GEN11_MAX_CONTEXT_HW_ID;
-	else if (USES_GUC_SUBMISSION(i915))
 		/*
-		 * When using GuC in proxy submission, GuC consumes the
-		 * highest bit in the context id to indicate proxy submission.
+		 * We do not allow downgrading from full-ppgtt [to a shared
+		 * global gtt], so ctx->vm cannot become NULL.
 		 */
-		max = MAX_GUC_CONTEXT_HW_ID;
-	else
-		max = MAX_CONTEXT_HW_ID;
-
-	return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
-}
-
-static int steal_hw_id(struct drm_i915_private *i915)
-{
-	struct i915_gem_context *ctx, *cn;
-	LIST_HEAD(pinned);
-	int id = -ENOSPC;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	list_for_each_entry_safe(ctx, cn,
-				 &i915->contexts.hw_id_list, hw_id_link) {
-		if (atomic_read(&ctx->hw_id_pin_count)) {
-			list_move_tail(&ctx->hw_id_link, &pinned);
+		vm = rcu_dereference(ctx->vm);
+		if (!kref_get_unless_zero(&vm->ref))
 			continue;
-		}
 
-		GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
-		list_del_init(&ctx->hw_id_link);
-		id = ctx->hw_id;
-		break;
-	}
+		/*
+		 * This ppgtt may have be reallocated between
+		 * the read and the kref, and reassigned to a third
+		 * context. In order to avoid inadvertent sharing
+		 * of this ppgtt with that third context (and not
+		 * src), we have to confirm that we have the same
+		 * ppgtt after passing through the strong memory
+		 * barrier implied by a successful
+		 * kref_get_unless_zero().
+		 *
+		 * Once we have acquired the current ppgtt of ctx,
+		 * we no longer care if it is released from ctx, as
+		 * it cannot be reallocated elsewhere.
+		 */
 
-	/*
-	 * Remember how far we got up on the last repossesion scan, so the
-	 * list is kept in a "least recently scanned" order.
-	 */
-	list_splice_tail(&pinned, &i915->contexts.hw_id_list);
-	return id;
+		if (vm == rcu_access_pointer(ctx->vm))
+			return rcu_pointer_handoff(vm);
+
+		i915_vm_put(vm);
+	} while (1);
 }
 
-static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
+static void intel_context_set_gem(struct intel_context *ce,
+				  struct i915_gem_context *ctx)
 {
-	int ret;
+	GEM_BUG_ON(rcu_access_pointer(ce->gem_context));
+	RCU_INIT_POINTER(ce->gem_context, ctx);
 
-	lockdep_assert_held(&i915->contexts.mutex);
+	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+		ce->ring = __intel_context_ring_size(SZ_16K);
 
-	/*
-	 * We prefer to steal/stall ourselves and our users over that of the
-	 * entire system. That may be a little unfair to our users, and
-	 * even hurt high priority clients. The choice is whether to oomkill
-	 * something else, or steal a context id.
-	 */
-	ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
-	if (unlikely(ret < 0)) {
-		ret = steal_hw_id(i915);
-		if (ret < 0) /* once again for the correct errno code */
-			ret = new_hw_id(i915, GFP_KERNEL);
-		if (ret < 0)
-			return ret;
-	}
+	if (rcu_access_pointer(ctx->vm)) {
+		struct i915_address_space *vm;
 
-	*out = ret;
-	return 0;
-}
+		rcu_read_lock();
+		vm = context_get_vm_rcu(ctx); /* hmm */
+		rcu_read_unlock();
 
-static void release_hw_id(struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = ctx->i915;
+		i915_vm_put(ce->vm);
+		ce->vm = vm;
+	}
 
-	if (list_empty(&ctx->hw_id_link))
-		return;
+	GEM_BUG_ON(ce->timeline);
+	if (ctx->timeline)
+		ce->timeline = intel_timeline_get(ctx->timeline);
 
-	mutex_lock(&i915->contexts.mutex);
-	if (!list_empty(&ctx->hw_id_link)) {
-		ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
-		list_del_init(&ctx->hw_id_link);
-	}
-	mutex_unlock(&i915->contexts.mutex);
+	if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
+	    intel_engine_has_semaphores(ce->engine))
+		__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
 }
 
 static void __free_engines(struct i915_gem_engines *e, unsigned int count)
@@ -264,6 +245,7 @@ static void __free_engines(struct i915_gem_engines *e, unsigned int count)
 		if (!e->engines[count])
 			continue;
 
+		RCU_INIT_POINTER(e->engines[count]->gem_context, NULL);
 		intel_context_put(e->engines[count]);
 	}
 	kfree(e);
@@ -294,103 +276,251 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 	for_each_engine(engine, gt, id) {
 		struct intel_context *ce;
 
-		ce = intel_context_create(ctx, engine);
+		if (engine->legacy_idx == INVALID_ENGINE)
+			continue;
+
+		GEM_BUG_ON(engine->legacy_idx >= I915_NUM_ENGINES);
+		GEM_BUG_ON(e->engines[engine->legacy_idx]);
+
+		ce = intel_context_create(engine);
 		if (IS_ERR(ce)) {
-			__free_engines(e, id);
+			__free_engines(e, e->num_engines + 1);
 			return ERR_CAST(ce);
 		}
 
-		e->engines[id] = ce;
-		e->num_engines = id + 1;
+		intel_context_set_gem(ce, ctx);
+
+		e->engines[engine->legacy_idx] = ce;
+		e->num_engines = max(e->num_engines, engine->legacy_idx);
 	}
+	e->num_engines++;
 
 	return e;
 }
 
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
-	release_hw_id(ctx);
-	if (ctx->vm)
-		i915_vm_put(ctx->vm);
+	spin_lock(&ctx->i915->gem.contexts.lock);
+	list_del(&ctx->link);
+	spin_unlock(&ctx->i915->gem.contexts.lock);
 
 	free_engines(rcu_access_pointer(ctx->engines));
 	mutex_destroy(&ctx->engines_mutex);
 
-	kfree(ctx->jump_whitelist);
-
 	if (ctx->timeline)
 		intel_timeline_put(ctx->timeline);
 
-	kfree(ctx->name);
 	put_pid(ctx->pid);
-
-	list_del(&ctx->link);
 	mutex_destroy(&ctx->mutex);
 
 	kfree_rcu(ctx, rcu);
 }
 
-static void contexts_free(struct drm_i915_private *i915)
+static void contexts_free_all(struct llist_node *list)
 {
-	struct llist_node *freed = llist_del_all(&i915->contexts.free_list);
 	struct i915_gem_context *ctx, *cn;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	llist_for_each_entry_safe(ctx, cn, freed, free_link)
+	llist_for_each_entry_safe(ctx, cn, list, free_link)
 		i915_gem_context_free(ctx);
 }
 
-static void contexts_free_first(struct drm_i915_private *i915)
+static void contexts_flush_free(struct i915_gem_contexts *gc)
 {
-	struct i915_gem_context *ctx;
-	struct llist_node *freed;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	freed = llist_del_first(&i915->contexts.free_list);
-	if (!freed)
-		return;
-
-	ctx = container_of(freed, typeof(*ctx), free_link);
-	i915_gem_context_free(ctx);
+	contexts_free_all(llist_del_all(&gc->free_list));
 }
 
 static void contexts_free_worker(struct work_struct *work)
 {
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), contexts.free_work);
+	struct i915_gem_contexts *gc =
+		container_of(work, typeof(*gc), free_work);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	contexts_free(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
+	contexts_flush_free(gc);
 }
 
 void i915_gem_context_release(struct kref *ref)
 {
 	struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
-	struct drm_i915_private *i915 = ctx->i915;
+	struct i915_gem_contexts *gc = &ctx->i915->gem.contexts;
 
 	trace_i915_context_free(ctx);
-	if (llist_add(&ctx->free_link, &i915->contexts.free_list))
-		queue_work(i915->wq, &i915->contexts.free_work);
+	if (llist_add(&ctx->free_link, &gc->free_list))
+		schedule_work(&gc->free_work);
 }
 
-static void context_close(struct i915_gem_context *ctx)
+static inline struct i915_gem_engines *
+__context_engines_static(const struct i915_gem_context *ctx)
 {
-	mutex_lock(&ctx->mutex);
+	return rcu_dereference_protected(ctx->engines, true);
+}
 
-	i915_gem_context_set_closed(ctx);
-	ctx->file_priv = ERR_PTR(-EBADF);
+static bool __reset_engine(struct intel_engine_cs *engine)
+{
+	struct intel_gt *gt = engine->gt;
+	bool success = false;
+
+	if (!intel_has_reset_engine(gt))
+		return false;
 
+	if (!test_and_set_bit(I915_RESET_ENGINE + engine->id,
+			      &gt->reset.flags)) {
+		success = intel_engine_reset(engine, NULL) == 0;
+		clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
+				      &gt->reset.flags);
+	}
+
+	return success;
+}
+
+static void __reset_context(struct i915_gem_context *ctx,
+			    struct intel_engine_cs *engine)
+{
+	intel_gt_handle_error(engine->gt, engine->mask, 0,
+			      "context closure in %s", ctx->name);
+}
+
+static bool __cancel_engine(struct intel_engine_cs *engine)
+{
 	/*
-	 * This context will never again be assinged to HW, so we can
-	 * reuse its ID for the next context.
+	 * Send a "high priority pulse" down the engine to cause the
+	 * current request to be momentarily preempted. (If it fails to
+	 * be preempted, it will be reset). As we have marked our context
+	 * as banned, any incomplete request, including any running, will
+	 * be skipped following the preemption.
+	 *
+	 * If there is no hangchecking (one of the reasons why we try to
+	 * cancel the context) and no forced preemption, there may be no
+	 * means by which we reset the GPU and evict the persistent hog.
+	 * Ergo if we are unable to inject a preemptive pulse that can
+	 * kill the banned context, we fallback to doing a local reset
+	 * instead.
 	 */
-	release_hw_id(ctx);
+	if (IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT) &&
+	    !intel_engine_pulse(engine))
+		return true;
+
+	/* If we are unable to send a pulse, try resetting this engine. */
+	return __reset_engine(engine);
+}
+
+static struct intel_engine_cs *__active_engine(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine, *locked;
+
+	/*
+	 * Serialise with __i915_request_submit() so that it sees
+	 * is-banned?, or we know the request is already inflight.
+	 */
+	locked = READ_ONCE(rq->engine);
+	spin_lock_irq(&locked->active.lock);
+	while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
+		spin_unlock(&locked->active.lock);
+		spin_lock(&engine->active.lock);
+		locked = engine;
+	}
+
+	engine = NULL;
+	if (i915_request_is_active(rq) && !rq->fence.error)
+		engine = rq->engine;
+
+	spin_unlock_irq(&locked->active.lock);
+
+	return engine;
+}
+
+static struct intel_engine_cs *active_engine(struct intel_context *ce)
+{
+	struct intel_engine_cs *engine = NULL;
+	struct i915_request *rq;
+
+	if (!ce->timeline)
+		return NULL;
+
+	mutex_lock(&ce->timeline->mutex);
+	list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
+		if (i915_request_completed(rq))
+			break;
+
+		/* Check with the backend if the request is inflight */
+		engine = __active_engine(rq);
+		if (engine)
+			break;
+	}
+	mutex_unlock(&ce->timeline->mutex);
+
+	return engine;
+}
+
+static void kill_context(struct i915_gem_context *ctx)
+{
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+
+	/*
+	 * Map the user's engine back to the actual engines; one virtual
+	 * engine will be mapped to multiple engines, and using ctx->engine[]
+	 * the same engine may be have multiple instances in the user's map.
+	 * However, we only care about pending requests, so only include
+	 * engines on which there are incomplete requests.
+	 */
+	for_each_gem_engine(ce, __context_engines_static(ctx), it) {
+		struct intel_engine_cs *engine;
+
+		if (intel_context_set_banned(ce))
+			continue;
+
+		/*
+		 * Check the current active state of this context; if we
+		 * are currently executing on the GPU we need to evict
+		 * ourselves. On the other hand, if we haven't yet been
+		 * submitted to the GPU or if everything is complete,
+		 * we have nothing to do.
+		 */
+		engine = active_engine(ce);
+
+		/* First attempt to gracefully cancel the context */
+		if (engine && !__cancel_engine(engine))
+			/*
+			 * If we are unable to send a preemptive pulse to bump
+			 * the context from the GPU, we have to resort to a full
+			 * reset. We hope the collateral damage is worth it.
+			 */
+			__reset_context(ctx, engine);
+	}
+}
+
+static void set_closed_name(struct i915_gem_context *ctx)
+{
+	char *s;
+
+	/* Replace '[]' with '<>' to indicate closed in debug prints */
+
+	s = strrchr(ctx->name, '[');
+	if (!s)
+		return;
+
+	*s = '<';
+
+	s = strchr(s + 1, ']');
+	if (s)
+		*s = '>';
+}
+
+static void context_close(struct i915_gem_context *ctx)
+{
+	struct i915_address_space *vm;
+
+	i915_gem_context_set_closed(ctx);
+	set_closed_name(ctx);
+
+	mutex_lock(&ctx->mutex);
+
+	vm = i915_gem_context_vm(ctx);
+	if (vm)
+		i915_vm_close(vm);
+
+	ctx->file_priv = ERR_PTR(-EBADF);
 
 	/*
 	 * The LUT uses the VMA as a backpointer to unref the object,
@@ -400,9 +530,47 @@ static void context_close(struct i915_gem_context *ctx)
 	lut_close(ctx);
 
 	mutex_unlock(&ctx->mutex);
+
+	/*
+	 * If the user has disabled hangchecking, we can not be sure that
+	 * the batches will ever complete after the context is closed,
+	 * keeping the context and all resources pinned forever. So in this
+	 * case we opt to forcibly kill off all remaining requests on
+	 * context close.
+	 */
+	if (!i915_gem_context_is_persistent(ctx) ||
+	    !i915_modparams.enable_hangcheck)
+		kill_context(ctx);
+
 	i915_gem_context_put(ctx);
 }
 
+static int __context_set_persistence(struct i915_gem_context *ctx, bool state)
+{
+	if (i915_gem_context_is_persistent(ctx) == state)
+		return 0;
+
+	if (state) {
+		/*
+		 * Only contexts that are short-lived [that will expire or be
+		 * reset] are allowed to survive past termination. We require
+		 * hangcheck to ensure that the persistent requests are healthy.
+		 */
+		if (!i915_modparams.enable_hangcheck)
+			return -EINVAL;
+
+		i915_gem_context_set_persistence(ctx);
+	} else {
+		/* To cancel a context we use "preempt-to-idle" */
+		if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+			return -ENODEV;
+
+		i915_gem_context_clear_persistence(ctx);
+	}
+
+	return 0;
+}
+
 static struct i915_gem_context *
 __create_context(struct drm_i915_private *i915)
 {
@@ -416,7 +584,6 @@ __create_context(struct drm_i915_private *i915)
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ctx->ref);
-	list_add_tail(&ctx->link, &i915->contexts.list);
 	ctx->i915 = i915;
 	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
 	mutex_init(&ctx->mutex);
@@ -430,7 +597,6 @@ __create_context(struct drm_i915_private *i915)
 	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->hw_id_link);
 
 	/* NB: Mark all slices as needing a remap so that when the context first
 	 * loads it will restore whatever remap state already exists. If there
@@ -439,12 +605,14 @@ __create_context(struct drm_i915_private *i915)
 
 	i915_gem_context_set_bannable(ctx);
 	i915_gem_context_set_recoverable(ctx);
+	__context_set_persistence(ctx, true /* cgroup hook? */);
 
 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
-	ctx->jump_whitelist = NULL;
-	ctx->jump_whitelist_cmds = 0;
+	spin_lock(&i915->gem.contexts.lock);
+	list_add_tail(&ctx->link, &i915->gem.contexts.list);
+	spin_unlock(&i915->gem.contexts.lock);
 
 	return ctx;
 
@@ -475,11 +643,11 @@ static void __apply_ppgtt(struct intel_context *ce, void *vm)
 static struct i915_address_space *
 __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
 {
-	struct i915_address_space *old = ctx->vm;
+	struct i915_address_space *old = i915_gem_context_vm(ctx);
 
 	GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
 
-	ctx->vm = i915_vm_get(vm);
+	rcu_assign_pointer(ctx->vm, i915_vm_open(vm));
 	context_apply_all(ctx, __apply_ppgtt, vm);
 
 	return old;
@@ -488,12 +656,12 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
 static void __assign_ppgtt(struct i915_gem_context *ctx,
 			   struct i915_address_space *vm)
 {
-	if (vm == ctx->vm)
+	if (vm == rcu_access_pointer(ctx->vm))
 		return;
 
 	vm = __set_ppgtt(ctx, vm);
 	if (vm)
-		i915_vm_put(vm);
+		i915_vm_close(vm);
 }
 
 static void __set_timeline(struct intel_timeline **dst,
@@ -520,27 +688,25 @@ static void __assign_timeline(struct i915_gem_context *ctx,
 }
 
 static struct i915_gem_context *
-i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
+i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
 {
 	struct i915_gem_context *ctx;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
-	    !HAS_EXECLISTS(dev_priv))
+	    !HAS_EXECLISTS(i915))
 		return ERR_PTR(-EINVAL);
 
-	/* Reap the most stale context */
-	contexts_free_first(dev_priv);
+	/* Reap the stale contexts */
+	contexts_flush_free(&i915->gem.contexts);
 
-	ctx = __create_context(dev_priv);
+	ctx = __create_context(i915);
 	if (IS_ERR(ctx))
 		return ctx;
 
-	if (HAS_FULL_PPGTT(dev_priv)) {
+	if (HAS_FULL_PPGTT(i915)) {
 		struct i915_ppgtt *ppgtt;
 
-		ppgtt = i915_ppgtt_create(dev_priv);
+		ppgtt = i915_ppgtt_create(&i915->gt);
 		if (IS_ERR(ppgtt)) {
 			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 					 PTR_ERR(ppgtt));
@@ -548,14 +714,17 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 			return ERR_CAST(ppgtt);
 		}
 
+		mutex_lock(&ctx->mutex);
 		__assign_ppgtt(ctx, &ppgtt->vm);
+		mutex_unlock(&ctx->mutex);
+
 		i915_vm_put(&ppgtt->vm);
 	}
 
 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
 		struct intel_timeline *timeline;
 
-		timeline = intel_timeline_create(&dev_priv->gt, NULL);
+		timeline = intel_timeline_create(&i915->gt, NULL);
 		if (IS_ERR(timeline)) {
 			context_close(ctx);
 			return ERR_CAST(timeline);
@@ -570,105 +739,26 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 	return ctx;
 }
 
-static void
-destroy_kernel_context(struct i915_gem_context **ctxp)
+static void init_contexts(struct i915_gem_contexts *gc)
 {
-	struct i915_gem_context *ctx;
-
-	/* Keep the context ref so that we can free it immediately ourselves */
-	ctx = i915_gem_context_get(fetch_and_zero(ctxp));
-	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
+	spin_lock_init(&gc->lock);
+	INIT_LIST_HEAD(&gc->list);
 
-	context_close(ctx);
-	i915_gem_context_free(ctx);
+	INIT_WORK(&gc->free_work, contexts_free_worker);
+	init_llist_head(&gc->free_list);
 }
 
-struct i915_gem_context *
-i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
+void i915_gem_init__contexts(struct drm_i915_private *i915)
 {
-	struct i915_gem_context *ctx;
-	int err;
-
-	ctx = i915_gem_create_context(i915, 0);
-	if (IS_ERR(ctx))
-		return ctx;
-
-	err = i915_gem_context_pin_hw_id(ctx);
-	if (err) {
-		destroy_kernel_context(&ctx);
-		return ERR_PTR(err);
-	}
-
-	i915_gem_context_clear_bannable(ctx);
-	ctx->sched.priority = I915_USER_PRIORITY(prio);
-
-	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
-
-	return ctx;
-}
-
-static void init_contexts(struct drm_i915_private *i915)
-{
-	mutex_init(&i915->contexts.mutex);
-	INIT_LIST_HEAD(&i915->contexts.list);
-
-	/* Using the simple ida interface, the max is limited by sizeof(int) */
-	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
-	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
-	ida_init(&i915->contexts.hw_ida);
-	INIT_LIST_HEAD(&i915->contexts.hw_id_list);
-
-	INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
-	init_llist_head(&i915->contexts.free_list);
-}
-
-int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
-{
-	struct i915_gem_context *ctx;
-
-	/* Reassure ourselves we are only called once */
-	GEM_BUG_ON(dev_priv->kernel_context);
-
-	init_contexts(dev_priv);
-
-	/* lowest priority; idle task */
-	ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN);
-	if (IS_ERR(ctx)) {
-		DRM_ERROR("Failed to create default global context\n");
-		return PTR_ERR(ctx);
-	}
-	/*
-	 * For easy recognisablity, we want the kernel context to be 0 and then
-	 * all user contexts will have non-zero hw_id. Kernel contexts are
-	 * permanently pinned, so that we never suffer a stall and can
-	 * use them from any allocation context (e.g. for evicting other
-	 * contexts and from inside the shrinker).
-	 */
-	GEM_BUG_ON(ctx->hw_id);
-	GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
-	dev_priv->kernel_context = ctx;
-
+	init_contexts(&i915->gem.contexts);
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
-			 DRIVER_CAPS(dev_priv)->has_logical_contexts ?
+			 DRIVER_CAPS(i915)->has_logical_contexts ?
 			 "logical" : "fake");
-	return 0;
 }
 
-void i915_gem_contexts_fini(struct drm_i915_private *i915)
+void i915_gem_driver_release__contexts(struct drm_i915_private *i915)
 {
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	destroy_kernel_context(&i915->kernel_context);
-
-	/* Must free all deferred contexts (via flush_workqueue) first */
-	GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
-	ida_destroy(&i915->contexts.hw_ida);
-}
-
-static int context_idr_cleanup(int id, void *p, void *data)
-{
-	context_close(p);
-	return 0;
+	flush_work(&i915->gem.contexts.free_work);
 }
 
 static int vm_idr_cleanup(int id, void *p, void *data)
@@ -678,33 +768,29 @@ static int vm_idr_cleanup(int id, void *p, void *data)
 }
 
 static int gem_context_register(struct i915_gem_context *ctx,
-				struct drm_i915_file_private *fpriv)
+				struct drm_i915_file_private *fpriv,
+				u32 *id)
 {
+	struct i915_address_space *vm;
 	int ret;
 
 	ctx->file_priv = fpriv;
-	if (ctx->vm)
-		ctx->vm->file = fpriv;
+
+	mutex_lock(&ctx->mutex);
+	vm = i915_gem_context_vm(ctx);
+	if (vm)
+		WRITE_ONCE(vm->file, fpriv); /* XXX */
+	mutex_unlock(&ctx->mutex);
 
 	ctx->pid = get_task_pid(current, PIDTYPE_PID);
-	ctx->name = kasprintf(GFP_KERNEL, "%s[%d]",
-			      current->comm, pid_nr(ctx->pid));
-	if (!ctx->name) {
-		ret = -ENOMEM;
-		goto err_pid;
-	}
+	snprintf(ctx->name, sizeof(ctx->name), "%s[%d]",
+		 current->comm, pid_nr(ctx->pid));
 
 	/* And finally expose ourselves to userspace via the idr */
-	mutex_lock(&fpriv->context_idr_lock);
-	ret = idr_alloc(&fpriv->context_idr, ctx, 0, 0, GFP_KERNEL);
-	mutex_unlock(&fpriv->context_idr_lock);
-	if (ret >= 0)
-		goto out;
+	ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL);
+	if (ret)
+		put_pid(fetch_and_zero(&ctx->pid));
 
-	kfree(fetch_and_zero(&ctx->name));
-err_pid:
-	put_pid(fetch_and_zero(&ctx->pid));
-out:
 	return ret;
 }
 
@@ -714,51 +800,51 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct i915_gem_context *ctx;
 	int err;
+	u32 id;
 
-	mutex_init(&file_priv->context_idr_lock);
-	mutex_init(&file_priv->vm_idr_lock);
+	xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC);
 
-	idr_init(&file_priv->context_idr);
+	mutex_init(&file_priv->vm_idr_lock);
 	idr_init_base(&file_priv->vm_idr, 1);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	ctx = i915_gem_create_context(i915, 0);
-	mutex_unlock(&i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto err;
 	}
 
-	err = gem_context_register(ctx, file_priv);
+	err = gem_context_register(ctx, file_priv, &id);
 	if (err < 0)
 		goto err_ctx;
 
-	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
-	GEM_BUG_ON(err > 0);
-
+	GEM_BUG_ON(id);
 	return 0;
 
 err_ctx:
 	context_close(ctx);
 err:
 	idr_destroy(&file_priv->vm_idr);
-	idr_destroy(&file_priv->context_idr);
+	xa_destroy(&file_priv->context_xa);
 	mutex_destroy(&file_priv->vm_idr_lock);
-	mutex_destroy(&file_priv->context_idr_lock);
 	return err;
 }
 
 void i915_gem_context_close(struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_private *i915 = file_priv->dev_priv;
+	struct i915_gem_context *ctx;
+	unsigned long idx;
 
-	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
-	idr_destroy(&file_priv->context_idr);
-	mutex_destroy(&file_priv->context_idr_lock);
+	xa_for_each(&file_priv->context_xa, idx, ctx)
+		context_close(ctx);
+	xa_destroy(&file_priv->context_xa);
 
 	idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
 	idr_destroy(&file_priv->vm_idr);
 	mutex_destroy(&file_priv->vm_idr_lock);
+
+	contexts_flush_free(&i915->gem.contexts);
 }
 
 int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
@@ -776,7 +862,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
 	if (args->flags)
 		return -EINVAL;
 
-	ppgtt = i915_ppgtt_create(i915);
+	ppgtt = i915_ppgtt_create(&i915->gt);
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
@@ -851,6 +937,7 @@ struct context_barrier_task {
 	void *data;
 };
 
+__i915_active_call
 static void cb_retire(struct i915_active *base)
 {
 	struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
@@ -870,20 +957,18 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 				void (*task)(void *data),
 				void *data)
 {
-	struct drm_i915_private *i915 = ctx->i915;
 	struct context_barrier_task *cb;
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
 	int err = 0;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!task);
 
 	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
 	if (!cb)
 		return -ENOMEM;
 
-	i915_active_init(i915, &cb->base, NULL, cb_retire);
+	i915_active_init(&cb->base, NULL, cb_retire);
 	err = i915_active_acquire(&cb->base);
 	if (err) {
 		kfree(cb);
@@ -915,7 +1000,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 		if (emit)
 			err = emit(rq, data);
 		if (err == 0)
-			err = i915_active_ref(&cb->base, rq->timeline, rq);
+			err = i915_active_add_request(&cb->base, rq);
 
 		i915_request_add(rq);
 		if (err)
@@ -938,16 +1023,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
 	struct i915_address_space *vm;
 	int ret;
 
-	if (!ctx->vm)
+	if (!rcu_access_pointer(ctx->vm))
 		return -ENODEV;
 
-	/* XXX rcu acquire? */
-	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
-	vm = i915_vm_get(ctx->vm);
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
+	rcu_read_lock();
+	vm = context_get_vm_rcu(ctx);
+	rcu_read_unlock();
 
 	ret = mutex_lock_interruptible(&file_priv->vm_idr_lock);
 	if (ret)
@@ -958,7 +1039,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
 	if (ret < 0)
 		goto err_unlock;
 
-	i915_vm_get(vm);
+	i915_vm_open(vm);
 
 	args->size = 0;
 	args->value = ret;
@@ -978,12 +1059,12 @@ static void set_ppgtt_barrier(void *data)
 	if (INTEL_GEN(old->i915) < 8)
 		gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old));
 
-	i915_vm_put(old);
+	i915_vm_close(old);
 }
 
 static int emit_ppgtt_update(struct i915_request *rq, void *data)
 {
-	struct i915_address_space *vm = rq->hw_context->vm;
+	struct i915_address_space *vm = rq->context->vm;
 	struct intel_engine_cs *engine = rq->engine;
 	u32 base = engine->mmio_base;
 	u32 *cs;
@@ -1008,12 +1089,18 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
 		intel_ring_advance(rq, cs);
 	} else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) {
 		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+		int err;
+
+		/* Magic required to prevent forcewake errors! */
+		err = engine->emit_flush(rq, EMIT_INVALIDATE);
+		if (err)
+			return err;
 
 		cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
 		if (IS_ERR(cs))
 			return PTR_ERR(cs);
 
-		*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
+		*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
 		for (i = GEN8_3LVL_PDPES; i--; ) {
 			const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
 
@@ -1024,9 +1111,6 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
 		}
 		*cs++ = MI_NOOP;
 		intel_ring_advance(rq, cs);
-	} else {
-		/* ppGTT is not part of the legacy context image */
-		gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
 	}
 
 	return 0;
@@ -1034,10 +1118,20 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
 
 static bool skip_ppgtt_update(struct intel_context *ce, void *data)
 {
+	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+		return true;
+
 	if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
-		return !ce->state;
-	else
-		return !atomic_read(&ce->pin_count);
+		return false;
+
+	if (!atomic_read(&ce->pin_count))
+		return true;
+
+	/* ppGTT is not part of the legacy context image */
+	if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm)))
+		return true;
+
+	return false;
 }
 
 static int set_ppgtt(struct drm_i915_file_private *file_priv,
@@ -1050,34 +1144,34 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 	if (args->size)
 		return -EINVAL;
 
-	if (!ctx->vm)
+	if (!rcu_access_pointer(ctx->vm))
 		return -ENODEV;
 
 	if (upper_32_bits(args->value))
 		return -ENOENT;
 
-	err = mutex_lock_interruptible(&file_priv->vm_idr_lock);
-	if (err)
-		return err;
-
+	rcu_read_lock();
 	vm = idr_find(&file_priv->vm_idr, args->value);
-	if (vm)
-		i915_vm_get(vm);
-	mutex_unlock(&file_priv->vm_idr_lock);
+	if (vm && !kref_get_unless_zero(&vm->ref))
+		vm = NULL;
+	rcu_read_unlock();
 	if (!vm)
 		return -ENOENT;
 
-	err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	err = mutex_lock_interruptible(&ctx->mutex);
 	if (err)
 		goto out;
 
-	if (vm == ctx->vm)
+	if (i915_gem_context_is_closed(ctx)) {
+		err = -ENOENT;
+		goto unlock;
+	}
+
+	if (vm == rcu_access_pointer(ctx->vm))
 		goto unlock;
 
 	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
-	mutex_lock(&ctx->mutex);
 	lut_close(ctx);
-	mutex_unlock(&ctx->mutex);
 
 	old = __set_ppgtt(ctx, vm);
 
@@ -1092,13 +1186,12 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 				   set_ppgtt_barrier,
 				   old);
 	if (err) {
-		i915_vm_put(__set_ppgtt(ctx, old));
-		i915_vm_put(old);
+		i915_vm_close(__set_ppgtt(ctx, old));
+		i915_vm_close(old);
 	}
 
 unlock:
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
-
+	mutex_unlock(&ctx->mutex);
 out:
 	i915_vm_put(vm);
 	return err;
@@ -1117,7 +1210,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
 
 	offset = i915_ggtt_offset(ce->state) +
 		 LRC_STATE_PN * PAGE_SIZE +
-		 (CTX_R_PWR_CLK_STATE + 1) * 4;
+		 CTX_R_PWR_CLK_STATE * 4;
 
 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 	*cs++ = lower_32_bits(offset);
@@ -1143,12 +1236,14 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	 * image, or into the registers directory, does not stick). Pristine
 	 * and idle contexts will be configured on pinning.
 	 */
-	if (!intel_context_is_pinned(ce))
+	if (!intel_context_pin_if_active(ce))
 		return 0;
 
-	rq = i915_request_create(ce->engine->kernel_context);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto out_unpin;
+	}
 
 	/* Serialise with the remote context */
 	ret = intel_context_prepare_remote_request(ce, rq);
@@ -1156,12 +1251,13 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 		ret = gen8_emit_rpcs_config(rq, ce, sseu);
 
 	i915_request_add(rq);
+out_unpin:
+	intel_context_unpin(ce);
 	return ret;
 }
 
 static int
-__intel_context_reconfigure_sseu(struct intel_context *ce,
-				 struct intel_sseu sseu)
+intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
 {
 	int ret;
 
@@ -1185,23 +1281,6 @@ unlock:
 }
 
 static int
-intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
-{
-	struct drm_i915_private *i915 = ce->engine->i915;
-	int ret;
-
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
-	ret = __intel_context_reconfigure_sseu(ce, sseu);
-
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return ret;
-}
-
-static int
 user_to_context_sseu(struct drm_i915_private *i915,
 		     const struct drm_i915_gem_context_param_sseu *user,
 		     struct intel_sseu *context)
@@ -1432,12 +1511,14 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
 		}
 	}
 
-	ce = intel_execlists_create_virtual(set->ctx, siblings, n);
+	ce = intel_execlists_create_virtual(siblings, n);
 	if (IS_ERR(ce)) {
 		err = PTR_ERR(ce);
 		goto out_siblings;
 	}
 
+	intel_context_set_gem(ce, set->ctx);
+
 	if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
 		intel_context_put(ce);
 		err = -EEXIST;
@@ -1607,12 +1688,14 @@ set_engines(struct i915_gem_context *ctx,
 			return -ENOENT;
 		}
 
-		ce = intel_context_create(ctx, engine);
+		ce = intel_context_create(engine);
 		if (IS_ERR(ce)) {
 			__free_engines(set.engines, n);
 			return PTR_ERR(ce);
 		}
 
+		intel_context_set_gem(ce, ctx);
+
 		set.engines->engines[n] = ce;
 	}
 	set.engines->num_engines = num_engines;
@@ -1634,7 +1717,7 @@ replace:
 		i915_gem_context_set_user_engines(ctx);
 	else
 		i915_gem_context_clear_user_engines(ctx);
-	rcu_swap_protected(ctx->engines, set.engines, 1);
+	set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1);
 	mutex_unlock(&ctx->engines_mutex);
 
 	call_rcu(&set.engines->rcu, free_engines_rcu);
@@ -1743,6 +1826,54 @@ err_free:
 	return err;
 }
 
+static int
+set_persistence(struct i915_gem_context *ctx,
+		const struct drm_i915_gem_context_param *args)
+{
+	if (args->size)
+		return -EINVAL;
+
+	return __context_set_persistence(ctx, args->value);
+}
+
+static void __apply_priority(struct intel_context *ce, void *arg)
+{
+	struct i915_gem_context *ctx = arg;
+
+	if (!intel_engine_has_semaphores(ce->engine))
+		return;
+
+	if (ctx->sched.priority >= I915_PRIORITY_NORMAL)
+		intel_context_set_use_semaphores(ce);
+	else
+		intel_context_clear_use_semaphores(ce);
+}
+
+static int set_priority(struct i915_gem_context *ctx,
+			const struct drm_i915_gem_context_param *args)
+{
+	s64 priority = args->value;
+
+	if (args->size)
+		return -EINVAL;
+
+	if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+		return -ENODEV;
+
+	if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
+	    priority < I915_CONTEXT_MIN_USER_PRIORITY)
+		return -EINVAL;
+
+	if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
+	    !capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	ctx->sched.priority = I915_USER_PRIORITY(priority);
+	context_apply_all(ctx, __apply_priority, ctx);
+
+	return 0;
+}
+
 static int ctx_setparam(struct drm_i915_file_private *fpriv,
 			struct i915_gem_context *ctx,
 			struct drm_i915_gem_context_param *args)
@@ -1789,23 +1920,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
 		break;
 
 	case I915_CONTEXT_PARAM_PRIORITY:
-		{
-			s64 priority = args->value;
-
-			if (args->size)
-				ret = -EINVAL;
-			else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
-				ret = -ENODEV;
-			else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
-				 priority < I915_CONTEXT_MIN_USER_PRIORITY)
-				ret = -EINVAL;
-			else if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
-				 !capable(CAP_SYS_NICE))
-				ret = -EPERM;
-			else
-				ctx->sched.priority =
-					I915_USER_PRIORITY(priority);
-		}
+		ret = set_priority(ctx, args);
 		break;
 
 	case I915_CONTEXT_PARAM_SSEU:
@@ -1820,6 +1935,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
 		ret = set_engines(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_PERSISTENCE:
+		ret = set_persistence(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -1881,20 +2000,23 @@ static int clone_engines(struct i915_gem_context *dst,
 		 */
 		if (intel_engine_is_virtual(engine))
 			clone->engines[n] =
-				intel_execlists_clone_virtual(dst, engine);
+				intel_execlists_clone_virtual(engine);
 		else
-			clone->engines[n] = intel_context_create(dst, engine);
+			clone->engines[n] = intel_context_create(engine);
 		if (IS_ERR_OR_NULL(clone->engines[n])) {
 			__free_engines(clone, n);
 			goto err_unlock;
 		}
+
+		intel_context_set_gem(clone->engines[n], dst);
 	}
 	clone->num_engines = n;
 
 	user_engines = i915_gem_context_user_engines(src);
 	i915_gem_context_unlock_engines(src);
 
-	free_engines(dst->engines);
+	/* Serialised by constructor */
+	free_engines(__context_engines_static(dst));
 	RCU_INIT_POINTER(dst->engines, clone);
 	if (user_engines)
 		i915_gem_context_set_user_engines(dst);
@@ -1929,7 +2051,8 @@ static int clone_sseu(struct i915_gem_context *dst,
 	unsigned long n;
 	int err;
 
-	clone = dst->engines; /* no locking required; sole access */
+	/* no locking required; sole access under constructor*/
+	clone = __context_engines_static(dst);
 	if (e->num_engines != clone->num_engines) {
 		err = -EINVAL;
 		goto unlock;
@@ -1972,44 +2095,24 @@ static int clone_vm(struct i915_gem_context *dst,
 		    struct i915_gem_context *src)
 {
 	struct i915_address_space *vm;
+	int err = 0;
 
-	rcu_read_lock();
-	do {
-		vm = READ_ONCE(src->vm);
-		if (!vm)
-			break;
-
-		if (!kref_get_unless_zero(&vm->ref))
-			continue;
-
-		/*
-		 * This ppgtt may have be reallocated between
-		 * the read and the kref, and reassigned to a third
-		 * context. In order to avoid inadvertent sharing
-		 * of this ppgtt with that third context (and not
-		 * src), we have to confirm that we have the same
-		 * ppgtt after passing through the strong memory
-		 * barrier implied by a successful
-		 * kref_get_unless_zero().
-		 *
-		 * Once we have acquired the current ppgtt of src,
-		 * we no longer care if it is released from src, as
-		 * it cannot be reallocated elsewhere.
-		 */
-
-		if (vm == READ_ONCE(src->vm))
-			break;
+	if (!rcu_access_pointer(src->vm))
+		return 0;
 
-		i915_vm_put(vm);
-	} while (1);
+	rcu_read_lock();
+	vm = context_get_vm_rcu(src);
 	rcu_read_unlock();
 
-	if (vm) {
+	if (!mutex_lock_interruptible(&dst->mutex)) {
 		__assign_ppgtt(dst, vm);
-		i915_vm_put(vm);
+		mutex_unlock(&dst->mutex);
+	} else {
+		err = -EINTR;
 	}
 
-	return 0;
+	i915_vm_put(vm);
+	return err;
 }
 
 static int create_clone(struct i915_user_extension __user *ext, void *data)
@@ -2080,6 +2183,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_context_create_ext *args = data;
 	struct create_ext ext_data;
 	int ret;
+	u32 id;
 
 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
 		return -ENODEV;
@@ -2094,17 +2198,11 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	ext_data.fpriv = file->driver_priv;
 	if (client_is_banned(ext_data.fpriv)) {
 		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
-			  current->comm,
-			  pid_nr(get_task_pid(current, PIDTYPE_PID)));
+			  current->comm, task_pid_nr(current));
 		return -EIO;
 	}
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
 	ext_data.ctx = i915_gem_create_context(i915, args->flags);
-	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ext_data.ctx))
 		return PTR_ERR(ext_data.ctx);
 
@@ -2117,11 +2215,11 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 			goto err_ctx;
 	}
 
-	ret = gem_context_register(ext_data.ctx, ext_data.fpriv);
+	ret = gem_context_register(ext_data.ctx, ext_data.fpriv, &id);
 	if (ret < 0)
 		goto err_ctx;
 
-	args->ctx_id = ret;
+	args->ctx_id = id;
 	DRM_DEBUG("HW context %d created\n", args->ctx_id);
 
 	return 0;
@@ -2144,11 +2242,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	if (!args->ctx_id)
 		return -ENOENT;
 
-	if (mutex_lock_interruptible(&file_priv->context_idr_lock))
-		return -EINTR;
-
-	ctx = idr_remove(&file_priv->context_idr, args->ctx_id);
-	mutex_unlock(&file_priv->context_idr_lock);
+	ctx = xa_erase(&file_priv->context_xa, args->ctx_id);
 	if (!ctx)
 		return -ENOENT;
 
@@ -2231,12 +2325,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 
 	case I915_CONTEXT_PARAM_GTT_SIZE:
 		args->size = 0;
-		if (ctx->vm)
-			args->value = ctx->vm->total;
-		else if (to_i915(dev)->ggtt.alias)
-			args->value = to_i915(dev)->ggtt.alias->vm.total;
+		rcu_read_lock();
+		if (rcu_access_pointer(ctx->vm))
+			args->value = rcu_dereference(ctx->vm)->total;
 		else
 			args->value = to_i915(dev)->ggtt.vm.total;
+		rcu_read_unlock();
 		break;
 
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
@@ -2271,6 +2365,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		ret = get_engines(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_PERSISTENCE:
+		args->size = 0;
+		args->value = i915_gem_context_is_persistent(ctx);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -2302,7 +2401,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
 				       void *data, struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_reset_stats *args = data;
 	struct i915_gem_context *ctx;
 	int ret;
@@ -2324,7 +2423,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		args->reset_count = i915_reset_count(&dev_priv->gpu_error);
+		args->reset_count = i915_reset_count(&i915->gpu_error);
 	else
 		args->reset_count = 0;
 
@@ -2337,33 +2436,6 @@ out:
 	return ret;
 }
 
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = ctx->i915;
-	int err = 0;
-
-	mutex_lock(&i915->contexts.mutex);
-
-	GEM_BUG_ON(i915_gem_context_is_closed(ctx));
-
-	if (list_empty(&ctx->hw_id_link)) {
-		GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
-
-		err = assign_hw_id(i915, &ctx->hw_id);
-		if (err)
-			goto out_unlock;
-
-		list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
-	}
-
-	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
-	atomic_inc(&ctx->hw_id_pin_count);
-
-out_unlock:
-	mutex_unlock(&i915->contexts.mutex);
-	return err;
-}
-
 /* GEM context-engines iterator: for_each_gem_engine() */
 struct intel_context *
 i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 176978608b6f..3ae61a355d87 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -11,6 +11,7 @@
 
 #include "gt/intel_context.h"
 
+#include "i915_drv.h"
 #include "i915_gem.h"
 #include "i915_scheduler.h"
 #include "intel_device_info.h"
@@ -74,24 +75,19 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c
 	clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
 }
 
-static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx)
+static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx)
 {
-	return test_bit(CONTEXT_BANNED, &ctx->flags);
+	return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
 }
 
-static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx)
+static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx)
 {
-	set_bit(CONTEXT_BANNED, &ctx->flags);
+	set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
 }
 
-static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx)
+static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx)
 {
-	return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
-}
-
-static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx)
-{
-	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
+	clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
 }
 
 static inline bool
@@ -112,29 +108,9 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
 	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
 }
 
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
-static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
-	if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
-		return 0;
-
-	return __i915_gem_context_pin_hw_id(ctx);
-}
-
-static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx)
-{
-	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
-	atomic_dec(&ctx->hw_id_pin_count);
-}
-
-static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
-{
-	return !ctx->file_priv;
-}
-
 /* i915_gem_context.c */
-int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
+void i915_gem_init__contexts(struct drm_i915_private *i915);
+void i915_gem_driver_release__contexts(struct drm_i915_private *i915);
 
 int i915_gem_context_open(struct drm_i915_private *i915,
 			  struct drm_file *file);
@@ -158,9 +134,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
 				       struct drm_file *file);
 
-struct i915_gem_context *
-i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
-
 static inline struct i915_gem_context *
 i915_gem_context_get(struct i915_gem_context *ctx)
 {
@@ -173,6 +146,27 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx)
 	kref_put(&ctx->ref, i915_gem_context_release);
 }
 
+static inline struct i915_address_space *
+i915_gem_context_vm(struct i915_gem_context *ctx)
+{
+	return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex));
+}
+
+static inline struct i915_address_space *
+i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx)
+{
+	struct i915_address_space *vm;
+
+	rcu_read_lock();
+	vm = rcu_dereference(ctx->vm);
+	if (!vm)
+		vm = &ctx->i915->ggtt.vm;
+	vm = i915_vm_get(vm);
+	rcu_read_unlock();
+
+	return vm;
+}
+
 static inline struct i915_gem_engines *
 i915_gem_context_engines(struct i915_gem_context *ctx)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 00537b9d7006..017ca803ab47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -88,7 +88,7 @@ struct i915_gem_context {
 	 * In other modes, this is a NULL pointer with the expectation that
 	 * the caller uses the shared global GTT.
 	 */
-	struct i915_address_space *vm;
+	struct i915_address_space __rcu *vm;
 
 	/**
 	 * @pid: process id of creator
@@ -100,15 +100,6 @@ struct i915_gem_context {
 	 */
 	struct pid *pid;
 
-	/**
-	 * @name: arbitrary name
-	 *
-	 * A name is constructed for the context from the creator's process
-	 * name, pid and user handle in order to uniquely identify the
-	 * context in messages.
-	 */
-	const char *name;
-
 	/** link: place with &drm_i915_private.context_list */
 	struct list_head link;
 	struct llist_node free_link;
@@ -137,33 +128,14 @@ struct i915_gem_context {
 #define UCONTEXT_NO_ERROR_CAPTURE	1
 #define UCONTEXT_BANNABLE		2
 #define UCONTEXT_RECOVERABLE		3
+#define UCONTEXT_PERSISTENCE		4
 
 	/**
 	 * @flags: small set of booleans
 	 */
 	unsigned long flags;
-#define CONTEXT_BANNED			0
-#define CONTEXT_CLOSED			1
-#define CONTEXT_FORCE_SINGLE_SUBMISSION	2
-#define CONTEXT_USER_ENGINES		3
-
-	/**
-	 * @hw_id: - unique identifier for the context
-	 *
-	 * The hardware needs to uniquely identify the context for a few
-	 * functions like fault reporting, PASID, scheduling. The
-	 * &drm_i915_private.context_hw_ida is used to assign a unqiue
-	 * id for the lifetime of the context.
-	 *
-	 * @hw_id_pin_count: - number of times this context had been pinned
-	 * for use (should be, at most, once per engine).
-	 *
-	 * @hw_id_link: - all contexts with an assigned id are tracked
-	 * for possible repossession.
-	 */
-	unsigned int hw_id;
-	atomic_t hw_id_pin_count;
-	struct list_head hw_id_link;
+#define CONTEXT_CLOSED			0
+#define CONTEXT_USER_ENGINES		1
 
 	struct mutex mutex;
 
@@ -193,12 +165,14 @@ struct i915_gem_context {
 	 */
 	struct radix_tree_root handles_vma;
 
-	/** jump_whitelist: Bit array for tracking cmds during cmdparsing
-	 *  Guarded by struct_mutex
+	/**
+	 * @name: arbitrary name, used for user debug
+	 *
+	 * A name is constructed for the context from the creator's process
+	 * name, pid and user handle in order to uniquely identify the
+	 * context in messages.
 	 */
-	unsigned long *jump_whitelist;
-	/** jump_whitelist_cmds: No of cmd slots available */
-	u32 jump_whitelist_cmds;
+	char name[TASK_COMM_LEN + 8];
 };
 
 #endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 96ce95c8ac5a..372b57ca0efc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -93,40 +93,6 @@ static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
 	i915_gem_object_unpin_map(obj);
 }
 
-static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num)
-{
-	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-	struct page *page;
-
-	if (page_num >= obj->base.size >> PAGE_SHIFT)
-		return NULL;
-
-	if (!i915_gem_object_has_struct_page(obj))
-		return NULL;
-
-	if (i915_gem_object_pin_pages(obj))
-		return NULL;
-
-	/* Synchronisation is left to the caller (via .begin_cpu_access()) */
-	page = i915_gem_object_get_page(obj, page_num);
-	if (IS_ERR(page))
-		goto err_unpin;
-
-	return kmap(page);
-
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
-	return NULL;
-}
-
-static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr)
-{
-	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-
-	kunmap(virt_to_page(addr));
-	i915_gem_object_unpin_pages(obj);
-}
-
 static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
@@ -195,8 +161,6 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 	.map_dma_buf = i915_gem_map_dma_buf,
 	.unmap_dma_buf = i915_gem_unmap_dma_buf,
 	.release = drm_gem_dmabuf_release,
-	.map = i915_gem_dmabuf_kmap,
-	.unmap = i915_gem_dmabuf_kunmap,
 	.mmap = i915_gem_dmabuf_mmap,
 	.vmap = i915_gem_dmabuf_vmap,
 	.vunmap = i915_gem_dmabuf_vunmap,
@@ -256,6 +220,7 @@ static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
 struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 					     struct dma_buf *dma_buf)
 {
+	static struct lock_class_key lock_class;
 	struct dma_buf_attachment *attach;
 	struct drm_i915_gem_object *obj;
 	int ret;
@@ -287,7 +252,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 	}
 
 	drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
-	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
+	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class);
 	obj->base.import_attach = attach;
 	obj->base.resv = dma_buf->resv;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 9c58e8fac1d9..0cc40e77bbd2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -12,6 +12,8 @@
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
 #include "i915_vma.h"
+#include "i915_gem_lmem.h"
+#include "i915_gem_mman.h"
 
 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 {
@@ -27,7 +29,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 {
-	if (!READ_ONCE(obj->pin_global))
+	if (!i915_gem_object_is_framebuffer(obj))
 		return;
 
 	i915_gem_object_lock(obj);
@@ -148,9 +150,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
 	if (write) {
+		struct i915_vma *vma;
+
 		obj->read_domains = I915_GEM_DOMAIN_GTT;
 		obj->write_domain = I915_GEM_DOMAIN_GTT;
 		obj->mm.dirty = true;
+
+		spin_lock(&obj->vma.lock);
+		for_each_ggtt_vma(vma, obj)
+			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+				i915_vma_set_ggtt_write(vma);
+		spin_unlock(&obj->vma.lock);
 	}
 
 	i915_gem_object_unpin_pages(obj);
@@ -175,131 +185,34 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				    enum i915_cache_level cache_level)
 {
-	struct i915_vma *vma;
 	int ret;
 
-	assert_object_held(obj);
-
 	if (obj->cache_level == cache_level)
 		return 0;
 
-	/* Inspect the list of currently bound VMA and unbind any that would
-	 * be invalid given the new cache-level. This is principally to
-	 * catch the issue of the CS prefetch crossing page boundaries and
-	 * reading an invalid PTE on older architectures.
-	 */
-restart:
-	list_for_each_entry(vma, &obj->vma.list, obj_link) {
-		if (!drm_mm_node_allocated(&vma->node))
-			continue;
-
-		if (i915_vma_is_pinned(vma)) {
-			DRM_DEBUG("can not change the cache level of pinned objects\n");
-			return -EBUSY;
-		}
-
-		if (!i915_vma_is_closed(vma) &&
-		    i915_gem_valid_gtt_space(vma, cache_level))
-			continue;
-
-		ret = i915_vma_unbind(vma);
-		if (ret)
-			return ret;
-
-		/* As unbinding may affect other elements in the
-		 * obj->vma_list (due to side-effects from retiring
-		 * an active vma), play safe and restart the iterator.
-		 */
-		goto restart;
-	}
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
 
-	/* We can reuse the existing drm_mm nodes but need to change the
-	 * cache-level on the PTE. We could simply unbind them all and
-	 * rebind with the correct cache-level on next use. However since
-	 * we already have a valid slot, dma mapping, pages etc, we may as
-	 * rewrite the PTE in the belief that doing so tramples upon less
-	 * state and so involves less work.
-	 */
-	if (atomic_read(&obj->bind_count)) {
-		struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		return ret;
 
-		/* Before we change the PTE, the GPU must not be accessing it.
-		 * If we wait upon the object, we know that all the bound
-		 * VMA are no longer active.
-		 */
-		ret = i915_gem_object_wait(obj,
-					   I915_WAIT_INTERRUPTIBLE |
-					   I915_WAIT_ALL,
-					   MAX_SCHEDULE_TIMEOUT);
-		if (ret)
-			return ret;
-
-		if (!HAS_LLC(i915) && cache_level != I915_CACHE_NONE) {
-			intel_wakeref_t wakeref =
-				intel_runtime_pm_get(&i915->runtime_pm);
-
-			/*
-			 * Access to snoopable pages through the GTT is
-			 * incoherent and on some machines causes a hard
-			 * lockup. Relinquish the CPU mmaping to force
-			 * userspace to refault in the pages and we can
-			 * then double check if the GTT mapping is still
-			 * valid for that pointer access.
-			 */
-			ret = mutex_lock_interruptible(&i915->ggtt.vm.mutex);
-			if (ret) {
-				intel_runtime_pm_put(&i915->runtime_pm,
-						     wakeref);
-				return ret;
-			}
-
-			if (obj->userfault_count)
-				__i915_gem_object_release_mmap(obj);
-
-			/*
-			 * As we no longer need a fence for GTT access,
-			 * we can relinquish it now (and so prevent having
-			 * to steal a fence from someone else on the next
-			 * fence request). Note GPU activity would have
-			 * dropped the fence as all snoopable access is
-			 * supposed to be linear.
-			 */
-			for_each_ggtt_vma(vma, obj) {
-				ret = i915_vma_revoke_fence(vma);
-				if (ret)
-					break;
-			}
-			mutex_unlock(&i915->ggtt.vm.mutex);
-			intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-			if (ret)
-				return ret;
-		} else {
-			/*
-			 * We either have incoherent backing store and
-			 * so no GTT access or the architecture is fully
-			 * coherent. In such cases, existing GTT mmaps
-			 * ignore the cache bit in the PTE and we can
-			 * rewrite it without confusing the GPU or having
-			 * to force userspace to fault back in its mmaps.
-			 */
-		}
-
-		list_for_each_entry(vma, &obj->vma.list, obj_link) {
-			if (!drm_mm_node_allocated(&vma->node))
-				continue;
-
-			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
-			if (ret)
-				return ret;
-		}
+	/* Always invalidate stale cachelines */
+	if (obj->cache_level != cache_level) {
+		i915_gem_object_set_cache_coherency(obj, cache_level);
+		obj->cache_dirty = true;
 	}
 
-	list_for_each_entry(vma, &obj->vma.list, obj_link)
-		vma->node.color = cache_level;
-	i915_gem_object_set_cache_coherency(obj, cache_level);
-	obj->cache_dirty = true; /* Always invalidate stale cachelines */
+	i915_gem_object_unlock(obj);
 
-	return 0;
+	/* The cache-level will be applied when each vma is rebound. */
+	return i915_gem_object_unbind(obj,
+				      I915_GEM_OBJECT_UNBIND_ACTIVE |
+				      I915_GEM_OBJECT_UNBIND_BARRIER);
 }
 
 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
@@ -380,25 +293,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	if (obj->cache_level == level)
-		goto out;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		goto out;
-
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		goto out;
-
-	ret = i915_gem_object_lock_interruptible(obj);
-	if (ret == 0) {
-		ret = i915_gem_object_set_cache_level(obj, level);
-		i915_gem_object_unlock(obj);
-	}
-	mutex_unlock(&i915->drm.struct_mutex);
+	ret = i915_gem_object_set_cache_level(obj, level);
 
 out:
 	i915_gem_object_put(obj);
@@ -417,17 +312,16 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     const struct i915_ggtt_view *view,
 				     unsigned int flags)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_vma *vma;
 	int ret;
 
-	assert_object_held(obj);
-
-	/* Mark the global pin early so that we account for the
-	 * display coherency whilst setting up the cache domains.
-	 */
-	obj->pin_global++;
+	/* Frame buffer must be in LMEM (no migration yet) */
+	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
+		return ERR_PTR(-EINVAL);
 
-	/* The display engine is not coherent with the LLC cache on gen6.  As
+	/*
+	 * The display engine is not coherent with the LLC cache on gen6.  As
 	 * a result, we make sure that the pinning that is about to occur is
 	 * done with uncached PTEs. This is lowest common denominator for all
 	 * chipsets.
@@ -437,14 +331,13 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
 	 */
 	ret = i915_gem_object_set_cache_level(obj,
-					      HAS_WT(to_i915(obj->base.dev)) ?
+					      HAS_WT(i915) ?
 					      I915_CACHE_WT : I915_CACHE_NONE);
-	if (ret) {
-		vma = ERR_PTR(ret);
-		goto err_unpin_global;
-	}
+	if (ret)
+		return ERR_PTR(ret);
 
-	/* As the user may map the buffer once pinned in the display plane
+	/*
+	 * As the user may map the buffer once pinned in the display plane
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
 	 * always use map_and_fenceable for all scanout buffers. However,
 	 * it may simply be too big to fit into mappable, in which case
@@ -461,21 +354,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
 	if (IS_ERR(vma))
-		goto err_unpin_global;
+		return vma;
 
 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
-	__i915_gem_object_flush_for_display(obj);
+	i915_gem_object_flush_if_display(obj);
 
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	obj->read_domains |= I915_GEM_DOMAIN_GTT;
-
-	return vma;
-
-err_unpin_global:
-	obj->pin_global--;
 	return vma;
 }
 
@@ -485,14 +369,19 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 	struct i915_vma *vma;
 
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	if (!atomic_read(&obj->bind_count))
+		return;
 
 	mutex_lock(&i915->ggtt.vm.mutex);
+	spin_lock(&obj->vma.lock);
 	for_each_ggtt_vma(vma, obj) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
+		GEM_BUG_ON(vma->vm != &i915->ggtt.vm);
 		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 	}
+	spin_unlock(&obj->vma.lock);
 	mutex_unlock(&i915->ggtt.vm.mutex);
 
 	if (i915_gem_object_is_shrinkable(obj)) {
@@ -500,7 +389,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 
 		spin_lock_irqsave(&i915->mm.obj_lock, flags);
 
-		if (obj->mm.madv == I915_MADV_WILLNEED)
+		if (obj->mm.madv == I915_MADV_WILLNEED &&
+		    !atomic_read(&obj->mm.shrink_pin))
 			list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
 
 		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
@@ -514,12 +404,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 
 	assert_object_held(obj);
 
-	if (WARN_ON(obj->pin_global == 0))
-		return;
-
-	if (--obj->pin_global == 0)
-		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
-
 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
 	i915_gem_object_bump_inactive_ggtt(obj);
 
@@ -674,7 +558,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	i915_gem_object_unlock(obj);
 
 	if (write_domain)
-		intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
+		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 
 out_unpin:
 	i915_gem_object_unpin_pages(obj);
@@ -794,7 +678,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 	}
 
 out:
-	intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
+	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 	obj->mm.dirty = true;
 	/* return with the pages pinned */
 	return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index e635e1e5f4d3..d5a0f5ae4a8b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -19,11 +19,13 @@
 #include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
+#include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 
 enum {
@@ -227,6 +229,7 @@ struct i915_execbuffer {
 
 	struct i915_request *request; /** our request to build */
 	struct i915_vma *batch; /** identity of the batch obj/vma */
+	struct i915_vma *trampoline; /** trampoline used for chaining */
 
 	/** actual size of execobj[] as we may extend it for the cmdparser */
 	unsigned int buffer_count;
@@ -275,25 +278,6 @@ struct i915_execbuffer {
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
 
-/*
- * Used to convert any address to canonical form.
- * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
- * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
- * addresses to be in a canonical form:
- * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
- * canonical form [63:48] == [47]."
- */
-#define GEN8_HIGH_ADDRESS_BIT 47
-static inline u64 gen8_canonical_addr(u64 address)
-{
-	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
-}
-
-static inline u64 gen8_noncanonical_addr(u64 address)
-{
-	return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
-}
-
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
 	return intel_engine_requires_cmd_parser(eb->engine) ||
@@ -699,7 +683,9 @@ static int eb_reserve(struct i915_execbuffer *eb)
 
 		case 1:
 			/* Too fragmented, unbind everything and retry */
+			mutex_lock(&eb->context->vm->mutex);
 			err = i915_gem_evict_vm(eb->context->vm);
+			mutex_unlock(&eb->context->vm->mutex);
 			if (err)
 				return err;
 			break;
@@ -727,7 +713,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
 		return -ENOENT;
 
 	eb->gem_context = ctx;
-	if (ctx->vm)
+	if (rcu_access_pointer(ctx->vm))
 		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
 
 	eb->context_flags = 0;
@@ -744,9 +730,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	unsigned int i, batch;
 	int err;
 
-	if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
-		return -EIO;
-
 	INIT_LIST_HEAD(&eb->relocs);
 	INIT_LIST_HEAD(&eb->unbound);
 
@@ -904,7 +887,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
 	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
 	cache->has_fence = cache->gen < 4;
 	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
-	cache->node.allocated = false;
+	cache->node.flags = 0;
 	cache->rq = NULL;
 	cache->rq_size = 0;
 }
@@ -965,11 +948,13 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 		io_mapping_unmap_atomic((void __iomem *)vaddr);
 
-		if (cache->node.allocated) {
+		if (drm_mm_node_allocated(&cache->node)) {
 			ggtt->vm.clear_range(&ggtt->vm,
 					     cache->node.start,
 					     cache->node.size);
+			mutex_lock(&ggtt->vm.mutex);
 			drm_mm_remove_node(&cache->node);
+			mutex_unlock(&ggtt->vm.mutex);
 		} else {
 			i915_vma_unpin((struct i915_vma *)cache->node.mm);
 		}
@@ -1044,11 +1029,13 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 					       PIN_NOEVICT);
 		if (IS_ERR(vma)) {
 			memset(&cache->node, 0, sizeof(cache->node));
+			mutex_lock(&ggtt->vm.mutex);
 			err = drm_mm_insert_node_in_range
 				(&ggtt->vm.mm, &cache->node,
 				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
 				 0, ggtt->mappable_end,
 				 DRM_MM_INSERT_LOW);
+			mutex_unlock(&ggtt->vm.mutex);
 			if (err) /* no inactive aperture space, use cpu reloc */
 				return NULL;
 		} else {
@@ -1058,7 +1045,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 	}
 
 	offset = cache->node.start;
-	if (cache->node.allocated) {
+	if (drm_mm_node_allocated(&cache->node)) {
 		ggtt->vm.insert_page(&ggtt->vm,
 				     i915_gem_object_get_dma_address(obj, page),
 				     offset, I915_CACHE_NONE, 0);
@@ -1147,7 +1134,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	u32 *cmd;
 	int err;
 
-	pool = intel_engine_pool_get(&eb->engine->pool, PAGE_SIZE);
+	pool = intel_engine_get_pool(eb->engine, PAGE_SIZE);
 	if (IS_ERR(pool))
 		return PTR_ERR(pool);
 
@@ -1234,10 +1221,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
 	if (unlikely(!cache->rq)) {
 		int err;
 
-		/* If we need to copy for the cmdparser, we will stall anyway */
-		if (eb_use_cmdparser(eb))
-			return ERR_PTR(-EWOULDBLOCK);
-
 		if (!intel_engine_can_store_dword(eb->engine))
 			return ERR_PTR(-ENODEV);
 
@@ -1390,7 +1373,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
 		    IS_GEN(eb->i915, 6)) {
 			err = i915_vma_bind(target, target->obj->cache_level,
-					    PIN_GLOBAL);
+					    PIN_GLOBAL, NULL);
 			if (WARN_ONCE(err,
 				      "Unexpected failure to bind target VMA!"))
 				return err;
@@ -1908,15 +1891,15 @@ err_skip:
 	return err;
 }
 
-static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
+static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 {
 	if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
-		return false;
+		return -EINVAL;
 
 	/* Kernel clipping was a DRI1 misfeature */
 	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
 		if (exec->num_cliprects || exec->cliprects_ptr)
-			return false;
+			return -EINVAL;
 	}
 
 	if (exec->DR4 == 0xffffffff) {
@@ -1924,12 +1907,12 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 		exec->DR4 = 0;
 	}
 	if (exec->DR1 || exec->DR4)
-		return false;
+		return -EINVAL;
 
 	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
-		return false;
+		return -EINVAL;
 
-	return true;
+	return 0;
 }
 
 static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
@@ -1958,99 +1941,179 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
 }
 
 static struct i915_vma *
-shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
+shadow_batch_pin(struct drm_i915_gem_object *obj,
+		 struct i915_address_space *vm,
+		 unsigned int flags)
 {
-	struct drm_i915_private *dev_priv = eb->i915;
-	struct i915_vma * const vma = *eb->vma;
-	struct i915_address_space *vm;
-	u64 flags;
+	struct i915_vma *vma;
+	int err;
 
-	/*
-	 * PPGTT backed shadow buffers must be mapped RO, to prevent
-	 * post-scan tampering
-	 */
-	if (CMDPARSER_USES_GGTT(dev_priv)) {
-		flags = PIN_GLOBAL;
-		vm = &dev_priv->ggtt.vm;
-	} else if (vma->vm->has_read_only) {
-		flags = PIN_USER;
-		vm = vma->vm;
-		i915_gem_object_set_readonly(obj);
-	} else {
-		DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
-		return ERR_PTR(-EINVAL);
-	}
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma))
+		return vma;
+
+	err = i915_vma_pin(vma, 0, 0, flags);
+	if (err)
+		return ERR_PTR(err);
+
+	return vma;
+}
 
-	return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags);
+struct eb_parse_work {
+	struct dma_fence_work base;
+	struct intel_engine_cs *engine;
+	struct i915_vma *batch;
+	struct i915_vma *shadow;
+	struct i915_vma *trampoline;
+	unsigned int batch_offset;
+	unsigned int batch_length;
+};
+
+static int __eb_parse(struct dma_fence_work *work)
+{
+	struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
+
+	return intel_engine_cmd_parser(pw->engine,
+				       pw->batch,
+				       pw->batch_offset,
+				       pw->batch_length,
+				       pw->shadow,
+				       pw->trampoline);
 }
 
-static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
+static const struct dma_fence_work_ops eb_parse_ops = {
+	.name = "eb_parse",
+	.work = __eb_parse,
+};
+
+static int eb_parse_pipeline(struct i915_execbuffer *eb,
+			     struct i915_vma *shadow,
+			     struct i915_vma *trampoline)
 {
-	struct intel_engine_pool_node *pool;
-	struct i915_vma *vma;
-	u64 batch_start;
-	u64 shadow_batch_start;
+	struct eb_parse_work *pw;
 	int err;
 
-	pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len);
-	if (IS_ERR(pool))
-		return ERR_CAST(pool);
+	pw = kzalloc(sizeof(*pw), GFP_KERNEL);
+	if (!pw)
+		return -ENOMEM;
 
-	vma = shadow_batch_pin(eb, pool->obj);
-	if (IS_ERR(vma))
-		goto err;
+	dma_fence_work_init(&pw->base, &eb_parse_ops);
+
+	pw->engine = eb->engine;
+	pw->batch = eb->batch;
+	pw->batch_offset = eb->batch_start_offset;
+	pw->batch_length = eb->batch_len;
+	pw->shadow = shadow;
+	pw->trampoline = trampoline;
 
-	batch_start = gen8_canonical_addr(eb->batch->node.start) +
-		      eb->batch_start_offset;
+	dma_resv_lock(pw->batch->resv, NULL);
 
-	shadow_batch_start = gen8_canonical_addr(vma->node.start);
+	err = dma_resv_reserve_shared(pw->batch->resv, 1);
+	if (err)
+		goto err_batch_unlock;
 
-	err = intel_engine_cmd_parser(eb->gem_context,
-				      eb->engine,
-				      eb->batch->obj,
-				      batch_start,
-				      eb->batch_start_offset,
-				      eb->batch_len,
-				      pool->obj,
-				      shadow_batch_start);
+	/* Wait for all writes (and relocs) into the batch to complete */
+	err = i915_sw_fence_await_reservation(&pw->base.chain,
+					      pw->batch->resv, NULL, false,
+					      0, I915_FENCE_GFP);
+	if (err < 0)
+		goto err_batch_unlock;
 
-	if (err) {
-		i915_vma_unpin(vma);
+	/* Keep the batch alive and unwritten as we parse */
+	dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
 
+	dma_resv_unlock(pw->batch->resv);
+
+	/* Force execution to wait for completion of the parser */
+	dma_resv_lock(shadow->resv, NULL);
+	dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
+	dma_resv_unlock(shadow->resv);
+
+	dma_fence_work_commit(&pw->base);
+	return 0;
+
+err_batch_unlock:
+	dma_resv_unlock(pw->batch->resv);
+	kfree(pw);
+	return err;
+}
+
+static int eb_parse(struct i915_execbuffer *eb)
+{
+	struct intel_engine_pool_node *pool;
+	struct i915_vma *shadow, *trampoline;
+	unsigned int len;
+	int err;
+
+	if (!eb_use_cmdparser(eb))
+		return 0;
+
+	len = eb->batch_len;
+	if (!CMDPARSER_USES_GGTT(eb->i915)) {
 		/*
-		 * Unsafe GGTT-backed buffers can still be submitted safely
-		 * as non-secure.
-		 * For PPGTT backing however, we have no choice but to forcibly
-		 * reject unsafe buffers
+		 * ppGTT backed shadow buffers must be mapped RO, to prevent
+		 * post-scan tampering
 		 */
-		if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES))
-			/* Execute original buffer non-secure */
-			vma = NULL;
-		else
-			vma = ERR_PTR(err);
+		if (!eb->context->vm->has_read_only) {
+			DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
+			return -EINVAL;
+		}
+	} else {
+		len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
+	}
+
+	pool = intel_engine_get_pool(eb->engine, len);
+	if (IS_ERR(pool))
+		return PTR_ERR(pool);
+
+	shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER);
+	if (IS_ERR(shadow)) {
+		err = PTR_ERR(shadow);
 		goto err;
 	}
+	i915_gem_object_set_readonly(shadow->obj);
+
+	trampoline = NULL;
+	if (CMDPARSER_USES_GGTT(eb->i915)) {
+		trampoline = shadow;
+
+		shadow = shadow_batch_pin(pool->obj,
+					  &eb->engine->gt->ggtt->vm,
+					  PIN_GLOBAL);
+		if (IS_ERR(shadow)) {
+			err = PTR_ERR(shadow);
+			shadow = trampoline;
+			goto err_shadow;
+		}
 
-	eb->vma[eb->buffer_count] = i915_vma_get(vma);
+		eb->batch_flags |= I915_DISPATCH_SECURE;
+	}
+
+	err = eb_parse_pipeline(eb, shadow, trampoline);
+	if (err)
+		goto err_trampoline;
+
+	eb->vma[eb->buffer_count] = i915_vma_get(shadow);
 	eb->flags[eb->buffer_count] =
 		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
-	vma->exec_flags = &eb->flags[eb->buffer_count];
+	shadow->exec_flags = &eb->flags[eb->buffer_count];
 	eb->buffer_count++;
 
+	eb->trampoline = trampoline;
 	eb->batch_start_offset = 0;
-	eb->batch = vma;
-
-	if (CMDPARSER_USES_GGTT(eb->i915))
-		eb->batch_flags |= I915_DISPATCH_SECURE;
-
-	/* eb->batch_len unchanged */
+	eb->batch = shadow;
 
-	vma->private = pool;
-	return vma;
+	shadow->private = pool;
+	return 0;
 
+err_trampoline:
+	if (trampoline)
+		i915_vma_unpin(trampoline);
+err_shadow:
+	i915_vma_unpin(shadow);
 err:
 	intel_engine_pool_put(pool);
-	return vma;
+	return err;
 }
 
 static void
@@ -2099,6 +2162,19 @@ static int eb_submit(struct i915_execbuffer *eb)
 	if (err)
 		return err;
 
+	if (eb->trampoline) {
+		GEM_BUG_ON(eb->batch_start_offset);
+		err = eb->engine->emit_bb_start(eb->request,
+						eb->trampoline->node.start +
+						eb->batch_len,
+						0, 0);
+		if (err)
+			return err;
+	}
+
+	if (intel_context_nopreempt(eb->context))
+		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags);
+
 	return 0;
 }
 
@@ -2168,35 +2244,6 @@ static struct i915_request *eb_throttle(struct intel_context *ce)
 	return i915_request_get(rq);
 }
 
-static int
-__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
-{
-	int err;
-
-	if (likely(atomic_inc_not_zero(&ce->pin_count)))
-		return 0;
-
-	err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
-	if (err)
-		return err;
-
-	err = __intel_context_do_pin(ce);
-	mutex_unlock(&eb->i915->drm.struct_mutex);
-
-	return err;
-}
-
-static void
-__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
-{
-	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
-		return;
-
-	mutex_lock(&eb->i915->drm.struct_mutex);
-	intel_context_unpin(ce);
-	mutex_unlock(&eb->i915->drm.struct_mutex);
-}
-
 static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
 {
 	struct intel_timeline *tl;
@@ -2211,12 +2258,15 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
 	if (err)
 		return err;
 
+	if (unlikely(intel_context_is_banned(ce)))
+		return -EIO;
+
 	/*
 	 * Pinning the contexts may generate requests in order to acquire
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	err = __eb_pin_context(eb, ce);
+	err = intel_context_pin(ce);
 	if (err)
 		return err;
 
@@ -2260,7 +2310,7 @@ err_exit:
 	intel_context_exit(ce);
 	intel_context_timeline_unlock(tl);
 err_unpin:
-	__eb_unpin_context(eb, ce);
+	intel_context_unpin(ce);
 	return err;
 }
 
@@ -2273,7 +2323,7 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
 	intel_context_exit(ce);
 	mutex_unlock(&tl->mutex);
 
-	__eb_unpin_context(eb, ce);
+	intel_context_unpin(ce);
 }
 
 static unsigned int
@@ -2506,6 +2556,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	eb.buffer_count = args->buffer_count;
 	eb.batch_start_offset = args->batch_start_offset;
 	eb.batch_len = args->batch_len;
+	eb.trampoline = NULL;
 
 	eb.batch_flags = 0;
 	if (args->flags & I915_EXEC_SECURE) {
@@ -2597,15 +2648,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (eb.batch_len == 0)
 		eb.batch_len = eb.batch->size - eb.batch_start_offset;
 
-	if (eb_use_cmdparser(&eb)) {
-		struct i915_vma *vma;
-
-		vma = eb_parse(&eb);
-		if (IS_ERR(vma)) {
-			err = PTR_ERR(vma);
-			goto err_vma;
-		}
-	}
+	err = eb_parse(&eb);
+	if (err)
+		goto err_vma;
 
 	/*
 	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
@@ -2685,6 +2730,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	err = eb_submit(&eb);
 err_request:
 	add_to_client(eb.request, file);
+	i915_request_get(eb.request);
 	i915_request_add(eb.request);
 
 	if (fences)
@@ -2700,6 +2746,7 @@ err_request:
 			fput(out_fence->file);
 		}
 	}
+	i915_request_put(eb.request);
 
 err_batch_unpin:
 	if (eb.batch_flags & I915_DISPATCH_SECURE)
@@ -2709,6 +2756,8 @@ err_batch_unpin:
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
+	if (eb.trampoline)
+		i915_vma_unpin(eb.trampoline);
 	mutex_unlock(&dev->struct_mutex);
 err_engine:
 	eb_unpin_engine(&eb);
@@ -2778,8 +2827,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
 	exec2.flags = I915_EXEC_RENDER;
 	i915_execbuffer2_set_context_id(exec2, 0);
 
-	if (!i915_gem_check_execbuffer(&exec2))
-		return -EINVAL;
+	err = i915_gem_check_execbuffer(&exec2);
+	if (err)
+		return err;
 
 	/* Copy in the exec list from userland */
 	exec_list = kvmalloc_array(count, sizeof(*exec_list),
@@ -2856,8 +2906,9 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (!i915_gem_check_execbuffer(args))
-		return -EINVAL;
+	err = i915_gem_check_execbuffer(args);
+	if (err)
+		return err;
 
 	/* Allocate an extra slot for use by the command parser */
 	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index 0c41e04ab8fa..9cfb0e41ff06 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -117,13 +117,6 @@ create_st:
 		goto err;
 	}
 
-	/* Mark the pages as dontneed whilst they are still pinned. As soon
-	 * as they are unpinned they are allowed to be reaped by the shrinker,
-	 * and the caller is expected to repopulate - the contents of this
-	 * object are only valid whilst active and pinned.
-	 */
-	obj->mm.madv = I915_MADV_DONTNEED;
-
 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
 
 	return 0;
@@ -143,7 +136,6 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
 	internal_free_pages(pages);
 
 	obj->mm.dirty = false;
-	obj->mm.madv = I915_MADV_WILLNEED;
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
@@ -172,6 +164,7 @@ struct drm_i915_gem_object *
 i915_gem_object_create_internal(struct drm_i915_private *i915,
 				phys_addr_t size)
 {
+	static struct lock_class_key lock_class;
 	struct drm_i915_gem_object *obj;
 	unsigned int cache_level;
 
@@ -186,7 +179,16 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
 		return ERR_PTR(-ENOMEM);
 
 	drm_gem_private_object_init(&i915->drm, &obj->base, size);
-	i915_gem_object_init(obj, &i915_gem_object_internal_ops);
+	i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class);
+
+	/*
+	 * Mark the object as volatile, such that the pages are marked as
+	 * dontneed whilst they are still pinned. As soon as they are unpinned
+	 * they are allowed to be reaped by the shrinker, and the caller is
+	 * expected to repopulate - the contents of this object are only valid
+	 * whilst active and pinned.
+	 */
+	i915_gem_object_set_volatile(obj);
 
 	obj->read_domains = I915_GEM_DOMAIN_CPU;
 	obj->write_domain = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
index ddc7f2a52b3e..87d8b27f426d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
@@ -28,8 +28,8 @@ int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file);
 int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file);
-int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *file);
+int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
 int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file);
 int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
new file mode 100644
index 000000000000..70543c83df06
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_memory_region.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_lmem.h"
+#include "i915_drv.h"
+
+const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = {
+	.flags = I915_GEM_OBJECT_HAS_IOMEM,
+
+	.get_pages = i915_gem_object_get_pages_buddy,
+	.put_pages = i915_gem_object_put_pages_buddy,
+	.release = i915_gem_object_release_memory_region,
+};
+
+bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
+{
+	return obj->ops == &i915_gem_lmem_obj_ops;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_lmem(struct drm_i915_private *i915,
+			    resource_size_t size,
+			    unsigned int flags)
+{
+	return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM],
+					     size, flags);
+}
+
+struct drm_i915_gem_object *
+__i915_gem_lmem_object_create(struct intel_memory_region *mem,
+			      resource_size_t size,
+			      unsigned int flags)
+{
+	static struct lock_class_key lock_class;
+	struct drm_i915_private *i915 = mem->i915;
+	struct drm_i915_gem_object *obj;
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(&i915->drm, &obj->base, size);
+	i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class);
+
+	obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT;
+
+	i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+
+	i915_gem_object_init_memory_region(obj, mem, flags);
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
new file mode 100644
index 000000000000..fc3f15580fe3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_LMEM_H
+#define __I915_GEM_LMEM_H
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+struct intel_memory_region;
+
+extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops;
+
+bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
+
+struct drm_i915_gem_object *
+i915_gem_object_create_lmem(struct drm_i915_private *i915,
+			    resource_size_t size,
+			    unsigned int flags);
+
+struct drm_i915_gem_object *
+__i915_gem_lmem_object_create(struct intel_memory_region *mem,
+			      resource_size_t size,
+			      unsigned int flags);
+
+#endif /* !__I915_GEM_LMEM_H */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 05289edbafe3..b9fdac2f9003 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -4,16 +4,21 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
+#include <linux/anon_inodes.h>
 #include <linux/mman.h>
+#include <linux/pfn_t.h>
 #include <linux/sizes.h>
 
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
+#include "i915_gem_mman.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 #include "i915_vma.h"
 
 static inline bool
@@ -143,6 +148,9 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
  * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
  *     pagefault; swapin remains transparent.
  *
+ * 4 - Support multiple fault handlers per object depending on object's
+ *     backing storage (a.k.a. MMAP_OFFSET).
+ *
  * Restrictions:
  *
  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
@@ -170,7 +178,7 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
  */
 int i915_gem_mmap_gtt_version(void)
 {
-	return 3;
+	return 4;
 }
 
 static inline struct i915_ggtt_view
@@ -196,29 +204,80 @@ compute_partial_view(const struct drm_i915_gem_object *obj,
 	return view;
 }
 
-/**
- * i915_gem_fault - fault a page into the GTT
- * @vmf: fault info
- *
- * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
- * from userspace.  The fault handler takes care of binding the object to
- * the GTT (if needed), allocating and programming a fence register (again,
- * only if needed based on whether the old reg is still valid or the object
- * is tiled) and inserting a new PTE into the faulting process.
- *
- * Note that the faulting process may involve evicting existing objects
- * from the GTT and/or fence registers to make room.  So performance may
- * suffer if the GTT working set is large or there are few fence registers
- * left.
- *
- * The current feature set supported by i915_gem_fault() and thus GTT mmaps
- * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
- */
-vm_fault_t i915_gem_fault(struct vm_fault *vmf)
+static vm_fault_t i915_error_to_vmf_fault(int err)
+{
+	switch (err) {
+	default:
+		WARN_ONCE(err, "unhandled error in %s: %i\n", __func__, err);
+		/* fallthrough */
+	case -EIO: /* shmemfs failure from swap device */
+	case -EFAULT: /* purged object */
+	case -ENODEV: /* bad object, how did you get here! */
+	case -ENXIO: /* unable to access backing store (on device) */
+		return VM_FAULT_SIGBUS;
+
+	case -ENOSPC: /* shmemfs allocation failure */
+	case -ENOMEM: /* our allocation failure */
+		return VM_FAULT_OOM;
+
+	case 0:
+	case -EAGAIN:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		/*
+		 * EBUSY is ok: this just means that another thread
+		 * already did the job.
+		 */
+		return VM_FAULT_NOPAGE;
+	}
+}
+
+static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
+{
+	struct vm_area_struct *area = vmf->vma;
+	struct i915_mmap_offset *mmo = area->vm_private_data;
+	struct drm_i915_gem_object *obj = mmo->obj;
+	resource_size_t iomap;
+	int err;
+
+	/* Sanity check that we allow writing into this object */
+	if (unlikely(i915_gem_object_is_readonly(obj) &&
+		     area->vm_flags & VM_WRITE))
+		return VM_FAULT_SIGBUS;
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		goto out;
+
+	iomap = -1;
+	if (!i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE)) {
+		iomap = obj->mm.region->iomap.base;
+		iomap -= obj->mm.region->region.start;
+	}
+
+	/* PTEs are revoked in obj->ops->put_pages() */
+	err = remap_io_sg(area,
+			  area->vm_start, area->vm_end - area->vm_start,
+			  obj->mm.pages->sgl, iomap);
+
+	if (area->vm_flags & VM_WRITE) {
+		GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+
+out:
+	return i915_error_to_vmf_fault(err);
+}
+
+static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 {
 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
 	struct vm_area_struct *area = vmf->vma;
-	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
+	struct i915_mmap_offset *mmo = area->vm_private_data;
+	struct drm_i915_gem_object *obj = mmo->obj;
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct intel_runtime_pm *rpm = &i915->runtime_pm;
@@ -249,16 +308,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	if (ret)
 		goto err_rpm;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto err_reset;
-
-	/* Access to snoopable pages through the GTT is incoherent. */
-	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
-		ret = -EFAULT;
-		goto err_unlock;
-	}
-
 	/* Now pin it into the GTT as needed */
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE |
@@ -285,10 +334,19 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 			view.type = I915_GGTT_VIEW_PARTIAL;
 			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
 		}
+
+		/* The entire mappable GGTT is pinned? Unexpected! */
+		GEM_BUG_ON(vma == ERR_PTR(-ENOSPC));
 	}
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
-		goto err_unlock;
+		goto err_reset;
+	}
+
+	/* Access to snoopable pages through the GTT is incoherent. */
+	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+		ret = -EFAULT;
+		goto err_unpin;
 	}
 
 	ret = i915_vma_pin_fence(vma);
@@ -312,7 +370,10 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 		list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
 	mutex_unlock(&i915->ggtt.vm.mutex);
 
-	if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
+	/* Track the mmo associated with the fenced vma */
+	vma->mmo = mmo;
+
+	if (IS_ACTIVE(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND))
 		intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
 				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
 
@@ -326,87 +387,42 @@ err_fence:
 	i915_vma_unpin_fence(vma);
 err_unpin:
 	__i915_vma_unpin(vma);
-err_unlock:
-	mutex_unlock(&dev->struct_mutex);
 err_reset:
 	intel_gt_reset_unlock(ggtt->vm.gt, srcu);
 err_rpm:
 	intel_runtime_pm_put(rpm, wakeref);
 	i915_gem_object_unpin_pages(obj);
 err:
-	switch (ret) {
-	case -EIO:
-		/*
-		 * We eat errors when the gpu is terminally wedged to avoid
-		 * userspace unduly crashing (gl has no provisions for mmaps to
-		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
-		 * and so needs to be reported.
-		 */
-		if (!intel_gt_is_wedged(ggtt->vm.gt))
-			return VM_FAULT_SIGBUS;
-		/* else, fall through */
-	case -EAGAIN:
-		/*
-		 * EAGAIN means the gpu is hung and we'll wait for the error
-		 * handler to reset everything when re-faulting in
-		 * i915_mutex_lock_interruptible.
-		 */
-	case 0:
-	case -ERESTARTSYS:
-	case -EINTR:
-	case -EBUSY:
-		/*
-		 * EBUSY is ok: this just means that another thread
-		 * already did the job.
-		 */
-		return VM_FAULT_NOPAGE;
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	case -ENOSPC:
-	case -EFAULT:
-	case -ENODEV: /* bad object, how did you get here! */
-		return VM_FAULT_SIGBUS;
-	default:
-		WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
-		return VM_FAULT_SIGBUS;
-	}
+	return i915_error_to_vmf_fault(ret);
 }
 
-void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 
 	GEM_BUG_ON(!obj->userfault_count);
 
-	obj->userfault_count = 0;
-	list_del(&obj->userfault_link);
-	drm_vma_node_unmap(&obj->base.vma_node,
-			   obj->base.dev->anon_inode->i_mapping);
-
 	for_each_ggtt_vma(vma, obj)
-		i915_vma_unset_userfault(vma);
+		i915_vma_revoke_mmap(vma);
+
+	GEM_BUG_ON(obj->userfault_count);
 }
 
-/**
- * i915_gem_object_release_mmap - remove physical page mappings
- * @obj: obj in question
- *
- * Preserve the reservation of the mmapping with the DRM core code, but
- * relinquish ownership of the pages back to the system.
- *
+/*
  * It is vital that we remove the page mapping if we have mapped a tiled
  * object through the GTT and then lose the fence register due to
  * resource pressure. Similarly if the object has been moved out of the
  * aperture, than pages mapped into userspace must be revoked. Removing the
  * mapping will then trigger a page fault on the next user access, allowing
- * fixup by i915_gem_fault().
+ * fixup by vm_fault_gtt().
  */
-void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+static void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	intel_wakeref_t wakeref;
 
-	/* Serialisation between user GTT access and our code depends upon
+	/*
+	 * Serialisation between user GTT access and our code depends upon
 	 * revoking the CPU's PTE whilst the mutex is held. The next user
 	 * pagefault then has to wait until we release the mutex.
 	 *
@@ -420,9 +436,10 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
 	if (!obj->userfault_count)
 		goto out;
 
-	__i915_gem_object_release_mmap(obj);
+	__i915_gem_object_release_mmap_gtt(obj);
 
-	/* Ensure that the CPU's PTE are revoked and there are not outstanding
+	/*
+	 * Ensure that the CPU's PTE are revoked and there are not outstanding
 	 * memory transactions from userspace before we return. The TLB
 	 * flushing implied above by changing the PTE above *should* be
 	 * sufficient, an extra barrier here just provides us with a bit
@@ -436,62 +453,151 @@ out:
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }
 
-static int create_mmap_offset(struct drm_i915_gem_object *obj)
+void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
+{
+	struct i915_mmap_offset *mmo;
+
+	spin_lock(&obj->mmo.lock);
+	list_for_each_entry(mmo, &obj->mmo.offsets, offset) {
+		/*
+		 * vma_node_unmap for GTT mmaps handled already in
+		 * __i915_gem_object_release_mmap_gtt
+		 */
+		if (mmo->mmap_type == I915_MMAP_TYPE_GTT)
+			continue;
+
+		spin_unlock(&obj->mmo.lock);
+		drm_vma_node_unmap(&mmo->vma_node,
+				   obj->base.dev->anon_inode->i_mapping);
+		spin_lock(&obj->mmo.lock);
+	}
+	spin_unlock(&obj->mmo.lock);
+}
+
+/**
+ * i915_gem_object_release_mmap - remove physical page mappings
+ * @obj: obj in question
+ *
+ * Preserve the reservation of the mmapping with the DRM core code, but
+ * relinquish ownership of the pages back to the system.
+ */
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_release_mmap_gtt(obj);
+	i915_gem_object_release_mmap_offset(obj);
+}
+
+static struct i915_mmap_offset *
+mmap_offset_attach(struct drm_i915_gem_object *obj,
+		   enum i915_mmap_type mmap_type,
+		   struct drm_file *file)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_mmap_offset *mmo;
 	int err;
 
-	err = drm_gem_create_mmap_offset(&obj->base);
+	mmo = kmalloc(sizeof(*mmo), GFP_KERNEL);
+	if (!mmo)
+		return ERR_PTR(-ENOMEM);
+
+	mmo->obj = obj;
+	mmo->dev = obj->base.dev;
+	mmo->file = file;
+	mmo->mmap_type = mmap_type;
+	drm_vma_node_reset(&mmo->vma_node);
+
+	err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node,
+				 obj->base.size / PAGE_SIZE);
 	if (likely(!err))
-		return 0;
+		goto out;
 
 	/* Attempt to reap some mmap space from dead objects */
-	do {
-		err = i915_gem_wait_for_idle(i915,
-					     I915_WAIT_INTERRUPTIBLE,
-					     MAX_SCHEDULE_TIMEOUT);
-		if (err)
-			break;
+	err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		goto err;
 
-		i915_gem_drain_freed_objects(i915);
-		err = drm_gem_create_mmap_offset(&obj->base);
-		if (!err)
-			break;
+	i915_gem_drain_freed_objects(i915);
+	err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node,
+				 obj->base.size / PAGE_SIZE);
+	if (err)
+		goto err;
 
-	} while (flush_delayed_work(&i915->gem.retire_work));
+out:
+	if (file)
+		drm_vma_node_allow(&mmo->vma_node, file);
 
-	return err;
+	spin_lock(&obj->mmo.lock);
+	list_add(&mmo->offset, &obj->mmo.offsets);
+	spin_unlock(&obj->mmo.lock);
+
+	return mmo;
+
+err:
+	kfree(mmo);
+	return ERR_PTR(err);
 }
 
-int
-i915_gem_mmap_gtt(struct drm_file *file,
-		  struct drm_device *dev,
-		  u32 handle,
-		  u64 *offset)
+static int
+__assign_mmap_offset(struct drm_file *file,
+		     u32 handle,
+		     enum i915_mmap_type mmap_type,
+		     u64 *offset)
 {
 	struct drm_i915_gem_object *obj;
-	int ret;
+	struct i915_mmap_offset *mmo;
+	int err;
 
 	obj = i915_gem_object_lookup(file, handle);
 	if (!obj)
 		return -ENOENT;
 
-	if (i915_gem_object_never_bind_ggtt(obj)) {
-		ret = -ENODEV;
+	if (mmap_type == I915_MMAP_TYPE_GTT &&
+	    i915_gem_object_never_bind_ggtt(obj)) {
+		err = -ENODEV;
 		goto out;
 	}
 
-	ret = create_mmap_offset(obj);
-	if (ret == 0)
-		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+	if (mmap_type != I915_MMAP_TYPE_GTT &&
+	    !i915_gem_object_type_has(obj,
+				      I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+				      I915_GEM_OBJECT_HAS_IOMEM)) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	mmo = mmap_offset_attach(obj, mmap_type, file);
+	if (IS_ERR(mmo)) {
+		err = PTR_ERR(mmo);
+		goto out;
+	}
 
+	*offset = drm_vma_node_offset_addr(&mmo->vma_node);
+	err = 0;
 out:
 	i915_gem_object_put(obj);
-	return ret;
+	return err;
+}
+
+int
+i915_gem_dumb_mmap_offset(struct drm_file *file,
+			  struct drm_device *dev,
+			  u32 handle,
+			  u64 *offset)
+{
+	enum i915_mmap_type mmap_type;
+
+	if (boot_cpu_has(X86_FEATURE_PAT))
+		mmap_type = I915_MMAP_TYPE_WC;
+	else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt))
+		return -ENODEV;
+	else
+		mmap_type = I915_MMAP_TYPE_GTT;
+
+	return __assign_mmap_offset(file, handle, mmap_type, offset);
 }
 
 /**
- * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * i915_gem_mmap_offset_ioctl - prepare an object for GTT mmap'ing
  * @dev: DRM device
  * @data: GTT mapping ioctl data
  * @file: GEM object info
@@ -506,12 +612,237 @@ out:
  * userspace.
  */
 int
-i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file)
+i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_mmap_offset *args = data;
+	enum i915_mmap_type type;
+	int err;
+
+	/*
+	 * Historically we failed to check args.pad and args.offset
+	 * and so we cannot use those fields for user input and we cannot
+	 * add -EINVAL for them as the ABI is fixed, i.e. old userspace
+	 * may be feeding in garbage in those fields.
+	 *
+	 * if (args->pad) return -EINVAL; is verbotten!
+	 */
+
+	err = i915_user_extensions(u64_to_user_ptr(args->extensions),
+				   NULL, 0, NULL);
+	if (err)
+		return err;
+
+	switch (args->flags) {
+	case I915_MMAP_OFFSET_GTT:
+		if (!i915_ggtt_has_aperture(&i915->ggtt))
+			return -ENODEV;
+		type = I915_MMAP_TYPE_GTT;
+		break;
+
+	case I915_MMAP_OFFSET_WC:
+		if (!boot_cpu_has(X86_FEATURE_PAT))
+			return -ENODEV;
+		type = I915_MMAP_TYPE_WC;
+		break;
+
+	case I915_MMAP_OFFSET_WB:
+		type = I915_MMAP_TYPE_WB;
+		break;
+
+	case I915_MMAP_OFFSET_UC:
+		if (!boot_cpu_has(X86_FEATURE_PAT))
+			return -ENODEV;
+		type = I915_MMAP_TYPE_UC;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return __assign_mmap_offset(file, args->handle, type, &args->offset);
+}
+
+static void vm_open(struct vm_area_struct *vma)
+{
+	struct i915_mmap_offset *mmo = vma->vm_private_data;
+	struct drm_i915_gem_object *obj = mmo->obj;
+
+	GEM_BUG_ON(!obj);
+	i915_gem_object_get(obj);
+}
+
+static void vm_close(struct vm_area_struct *vma)
+{
+	struct i915_mmap_offset *mmo = vma->vm_private_data;
+	struct drm_i915_gem_object *obj = mmo->obj;
+
+	GEM_BUG_ON(!obj);
+	i915_gem_object_put(obj);
+}
+
+static const struct vm_operations_struct vm_ops_gtt = {
+	.fault = vm_fault_gtt,
+	.open = vm_open,
+	.close = vm_close,
+};
+
+static const struct vm_operations_struct vm_ops_cpu = {
+	.fault = vm_fault_cpu,
+	.open = vm_open,
+	.close = vm_close,
+};
+
+static int singleton_release(struct inode *inode, struct file *file)
+{
+	struct drm_i915_private *i915 = file->private_data;
+
+	cmpxchg(&i915->gem.mmap_singleton, file, NULL);
+	drm_dev_put(&i915->drm);
+
+	return 0;
+}
+
+static const struct file_operations singleton_fops = {
+	.owner = THIS_MODULE,
+	.release = singleton_release,
+};
+
+static struct file *mmap_singleton(struct drm_i915_private *i915)
 {
-	struct drm_i915_gem_mmap_gtt *args = data;
+	struct file *file;
+
+	rcu_read_lock();
+	file = i915->gem.mmap_singleton;
+	if (file && !get_file_rcu(file))
+		file = NULL;
+	rcu_read_unlock();
+	if (file)
+		return file;
 
-	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
+	file = anon_inode_getfile("i915.gem", &singleton_fops, i915, O_RDWR);
+	if (IS_ERR(file))
+		return file;
+
+	/* Everyone shares a single global address space */
+	file->f_mapping = i915->drm.anon_inode->i_mapping;
+
+	smp_store_mb(i915->gem.mmap_singleton, file);
+	drm_dev_get(&i915->drm);
+
+	return file;
+}
+
+/*
+ * This overcomes the limitation in drm_gem_mmap's assignment of a
+ * drm_gem_object as the vma->vm_private_data. Since we need to
+ * be able to resolve multiple mmap offsets which could be tied
+ * to a single gem object.
+ */
+int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_vma_offset_node *node;
+	struct drm_file *priv = filp->private_data;
+	struct drm_device *dev = priv->minor->dev;
+	struct i915_mmap_offset *mmo = NULL;
+	struct drm_gem_object *obj = NULL;
+	struct file *anon;
+
+	if (drm_dev_is_unplugged(dev))
+		return -ENODEV;
+
+	drm_vma_offset_lock_lookup(dev->vma_offset_manager);
+	node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
+						  vma->vm_pgoff,
+						  vma_pages(vma));
+	if (likely(node)) {
+		mmo = container_of(node, struct i915_mmap_offset,
+				   vma_node);
+		/*
+		 * In our dependency chain, the drm_vma_offset_node
+		 * depends on the validity of the mmo, which depends on
+		 * the gem object. However the only reference we have
+		 * at this point is the mmo (as the parent of the node).
+		 * Try to check if the gem object was at least cleared.
+		 */
+		if (!mmo || !mmo->obj) {
+			drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
+			return -EINVAL;
+		}
+		/*
+		 * Skip 0-refcnted objects as it is in the process of being
+		 * destroyed and will be invalid when the vma manager lock
+		 * is released.
+		 */
+		obj = &mmo->obj->base;
+		if (!kref_get_unless_zero(&obj->refcount))
+			obj = NULL;
+	}
+	drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
+	if (!obj)
+		return -EINVAL;
+
+	if (!drm_vma_node_is_allowed(node, priv)) {
+		drm_gem_object_put_unlocked(obj);
+		return -EACCES;
+	}
+
+	if (i915_gem_object_is_readonly(to_intel_bo(obj))) {
+		if (vma->vm_flags & VM_WRITE) {
+			drm_gem_object_put_unlocked(obj);
+			return -EINVAL;
+		}
+		vma->vm_flags &= ~VM_MAYWRITE;
+	}
+
+	anon = mmap_singleton(to_i915(obj->dev));
+	if (IS_ERR(anon)) {
+		drm_gem_object_put_unlocked(obj);
+		return PTR_ERR(anon);
+	}
+
+	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_private_data = mmo;
+
+	/*
+	 * We keep the ref on mmo->obj, not vm_file, but we require
+	 * vma->vm_file->f_mapping, see vma_link(), for later revocation.
+	 * Our userspace is accustomed to having per-file resource cleanup
+	 * (i.e. contexts, objects and requests) on their close(fd), which
+	 * requires avoiding extraneous references to their filp, hence why
+	 * we prefer to use an anonymous file for their mmaps.
+	 */
+	fput(vma->vm_file);
+	vma->vm_file = anon;
+
+	switch (mmo->mmap_type) {
+	case I915_MMAP_TYPE_WC:
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		vma->vm_ops = &vm_ops_cpu;
+		break;
+
+	case I915_MMAP_TYPE_WB:
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+		vma->vm_ops = &vm_ops_cpu;
+		break;
+
+	case I915_MMAP_TYPE_UC:
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+		vma->vm_ops = &vm_ops_cpu;
+		break;
+
+	case I915_MMAP_TYPE_GTT:
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		vma->vm_ops = &vm_ops_gtt;
+		break;
+	}
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
+	return 0;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
new file mode 100644
index 000000000000..862e01b7cb69
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
@@ -0,0 +1,31 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_MMAN_H__
+#define __I915_GEM_MMAN_H__
+
+#include <linux/mm_types.h>
+#include <linux/types.h>
+
+struct drm_device;
+struct drm_file;
+struct drm_i915_gem_object;
+struct file;
+struct i915_mmap_offset;
+struct mutex;
+
+int i915_gem_mmap_gtt_version(void);
+int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+
+int i915_gem_dumb_mmap_offset(struct drm_file *file_priv,
+			      struct drm_device *dev,
+			      u32 handle, u64 *offset);
+
+void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index d7855dc5a5c5..46bacc82ddc4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -22,11 +22,14 @@
  *
  */
 
+#include <linux/sched/mm.h>
+
 #include "display/intel_frontbuffer.h"
 #include "gt/intel_gt.h"
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
+#include "i915_gem_mman.h"
 #include "i915_gem_object.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
@@ -47,9 +50,10 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
 }
 
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
-			  const struct drm_i915_gem_object_ops *ops)
+			  const struct drm_i915_gem_object_ops *ops,
+			  struct lock_class_key *key)
 {
-	mutex_init(&obj->mm.lock);
+	__mutex_init(&obj->mm.lock, "obj->mm.lock", key);
 
 	spin_lock_init(&obj->vma.lock);
 	INIT_LIST_HEAD(&obj->vma.list);
@@ -58,6 +62,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	INIT_LIST_HEAD(&obj->lut_list);
 
+	spin_lock_init(&obj->mmo.lock);
+	INIT_LIST_HEAD(&obj->mmo.offsets);
+
 	init_rcu_head(&obj->rcu);
 
 	obj->ops = ops;
@@ -94,6 +101,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
 	struct drm_i915_file_private *fpriv = file->driver_priv;
 	struct i915_lut_handle *lut, *ln;
+	struct i915_mmap_offset *mmo;
 	LIST_HEAD(close);
 
 	i915_gem_object_lock(obj);
@@ -108,6 +116,17 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 	}
 	i915_gem_object_unlock(obj);
 
+	spin_lock(&obj->mmo.lock);
+	list_for_each_entry(mmo, &obj->mmo.offsets, offset) {
+		if (mmo->file != file)
+			continue;
+
+		spin_unlock(&obj->mmo.lock);
+		drm_vma_node_revoke(&mmo->vma_node, file);
+		spin_lock(&obj->mmo.lock);
+	}
+	spin_unlock(&obj->mmo.lock);
+
 	list_for_each_entry_safe(lut, ln, &close, obj_link) {
 		struct i915_gem_context *ctx = lut->ctx;
 		struct i915_vma *vma;
@@ -155,28 +174,48 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	llist_for_each_entry_safe(obj, on, freed, freed) {
-		struct i915_vma *vma, *vn;
+		struct i915_mmap_offset *mmo, *mn;
 
 		trace_i915_gem_object_destroy(obj);
 
-		mutex_lock(&i915->drm.struct_mutex);
-
-		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
-			GEM_BUG_ON(i915_vma_is_active(vma));
-			vma->flags &= ~I915_VMA_PIN_MASK;
-			i915_vma_destroy(vma);
+		if (!list_empty(&obj->vma.list)) {
+			struct i915_vma *vma;
+
+			/*
+			 * Note that the vma keeps an object reference while
+			 * it is active, so it *should* not sleep while we
+			 * destroy it. Our debug code errs insits it *might*.
+			 * For the moment, play along.
+			 */
+			spin_lock(&obj->vma.lock);
+			while ((vma = list_first_entry_or_null(&obj->vma.list,
+							       struct i915_vma,
+							       obj_link))) {
+				GEM_BUG_ON(vma->obj != obj);
+				spin_unlock(&obj->vma.lock);
+
+				__i915_vma_put(vma);
+
+				spin_lock(&obj->vma.lock);
+			}
+			spin_unlock(&obj->vma.lock);
 		}
-		GEM_BUG_ON(!list_empty(&obj->vma.list));
-		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
 
-		mutex_unlock(&i915->drm.struct_mutex);
+		i915_gem_object_release_mmap(obj);
+
+		list_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset) {
+			drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
+					      &mmo->vma_node);
+			kfree(mmo);
+		}
+		INIT_LIST_HEAD(&obj->mmo.offsets);
 
 		GEM_BUG_ON(atomic_read(&obj->bind_count));
 		GEM_BUG_ON(obj->userfault_count);
 		GEM_BUG_ON(!list_empty(&obj->lut_list));
 
 		atomic_set(&obj->mm.pages_pin_count, 0);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		__i915_gem_object_put_pages(obj);
 		GEM_BUG_ON(i915_gem_object_has_pages(obj));
 		bitmap_free(obj->bit_17);
 
@@ -267,18 +306,14 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
 
 	switch (obj->write_domain) {
 	case I915_GEM_DOMAIN_GTT:
-		for_each_ggtt_vma(vma, obj)
-			intel_gt_flush_ggtt_writes(vma->vm->gt);
-
-		intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
-
+		spin_lock(&obj->vma.lock);
 		for_each_ggtt_vma(vma, obj) {
-			if (vma->iomap)
-				continue;
-
-			i915_vma_unset_ggtt_write(vma);
+			if (i915_vma_unset_ggtt_write(vma))
+				intel_gt_flush_ggtt_writes(vma->vm->gt);
 		}
+		spin_unlock(&obj->vma.lock);
 
+		i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
 		break;
 
 	case I915_GEM_DOMAIN_WC:
@@ -298,6 +333,30 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
 	obj->write_domain = 0;
 }
 
+void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+					 enum fb_op_origin origin)
+{
+	struct intel_frontbuffer *front;
+
+	front = __intel_frontbuffer_get(obj);
+	if (front) {
+		intel_frontbuffer_flush(front, origin);
+		intel_frontbuffer_put(front);
+	}
+}
+
+void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+					      enum fb_op_origin origin)
+{
+	struct intel_frontbuffer *front;
+
+	front = __intel_frontbuffer_get(obj);
+	if (front) {
+		intel_frontbuffer_invalidate(front, origin);
+		intel_frontbuffer_put(front);
+	}
+}
+
 void i915_gem_init__objects(struct drm_i915_private *i915)
 {
 	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index ddf3605bea8e..db70a3306e59 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -13,9 +13,10 @@
 
 #include <drm/i915_drm.h>
 
+#include "display/intel_frontbuffer.h"
 #include "i915_gem_object_types.h"
-
 #include "i915_gem_gtt.h"
+#include "i915_vma_types.h"
 
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
@@ -23,12 +24,14 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
 
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
-			  const struct drm_i915_gem_object_ops *ops);
+			  const struct drm_i915_gem_object_ops *ops,
+			  struct lock_class_key *key);
 struct drm_i915_gem_object *
-i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size);
+i915_gem_object_create_shmem(struct drm_i915_private *i915,
+			     resource_size_t size);
 struct drm_i915_gem_object *
 i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
-				       const void *data, size_t size);
+				       const void *data, resource_size_t size);
 
 extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
 void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
@@ -106,6 +109,11 @@ static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
 	dma_resv_lock(obj->base.resv, NULL);
 }
 
+static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+{
+	return dma_resv_trylock(obj->base.resv);
+}
+
 static inline int
 i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
 {
@@ -125,43 +133,68 @@ void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
 static inline void
 i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
 {
-	obj->base.vma_node.readonly = true;
+	obj->flags |= I915_BO_READONLY;
 }
 
 static inline bool
 i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj)
 {
-	return obj->base.vma_node.readonly;
+	return obj->flags & I915_BO_READONLY;
+}
+
+static inline bool
+i915_gem_object_is_contiguous(const struct drm_i915_gem_object *obj)
+{
+	return obj->flags & I915_BO_ALLOC_CONTIGUOUS;
+}
+
+static inline bool
+i915_gem_object_is_volatile(const struct drm_i915_gem_object *obj)
+{
+	return obj->flags & I915_BO_ALLOC_VOLATILE;
+}
+
+static inline void
+i915_gem_object_set_volatile(struct drm_i915_gem_object *obj)
+{
+	obj->flags |= I915_BO_ALLOC_VOLATILE;
+}
+
+static inline bool
+i915_gem_object_type_has(const struct drm_i915_gem_object *obj,
+			 unsigned long flags)
+{
+	return obj->ops->flags & flags;
 }
 
 static inline bool
 i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
 {
-	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE);
 }
 
 static inline bool
 i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
 {
-	return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE;
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE);
 }
 
 static inline bool
 i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
 {
-	return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY);
 }
 
 static inline bool
 i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj)
 {
-	return obj->ops->flags & I915_GEM_OBJECT_NO_GGTT;
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_GGTT);
 }
 
 static inline bool
 i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
 {
-	return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL);
 }
 
 static inline bool
@@ -239,10 +272,27 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
+enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
+	I915_MM_NORMAL = 0,
+	/*
+	 * Only used by struct_mutex, when called "recursively" from
+	 * direct-reclaim-esque. Safe because there is only every one
+	 * struct_mutex in the entire system.
+	 */
+	I915_MM_SHRINKER = 1,
+	/*
+	 * Used for obj->mm.lock when allocating pages. Safe because the object
+	 * isn't yet on any LRU, and therefore the shrinker can't deadlock on
+	 * it. As soon as the object has pages, obj->mm.lock nests within
+	 * fs_reclaim.
+	 */
+	I915_MM_GET_PAGES = 1,
+};
+
 static inline int __must_check
 i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
-	might_lock(&obj->mm.lock);
+	might_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
 
 	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
 		return 0;
@@ -285,13 +335,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 	__i915_gem_object_unpin_pages(obj);
 }
 
-enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
-	I915_MM_NORMAL = 0,
-	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
-};
-
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass);
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
 
@@ -344,9 +388,6 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
 	i915_gem_object_unpin_pages(obj);
 }
 
-void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
-void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
-
 void
 i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
 				   unsigned int flush_domains);
@@ -412,7 +453,8 @@ static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 		return true;
 
-	return obj->pin_global; /* currently in use by HW, keep flushed */
+	/* Currently in use by HW (display engine)? Keep flushed. */
+	return i915_gem_object_is_framebuffer(obj);
 }
 
 static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
@@ -429,6 +471,26 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
 				  const struct i915_sched_attr *attr);
-#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
+
+void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+					 enum fb_op_origin origin);
+void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+					      enum fb_op_origin origin);
+
+static inline void
+i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+				  enum fb_op_origin origin)
+{
+	if (unlikely(rcu_access_pointer(obj->frontbuffer)))
+		__i915_gem_object_flush_frontbuffer(obj, origin);
+}
+
+static inline void
+i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+				       enum fb_op_origin origin)
+{
+	if (unlikely(rcu_access_pointer(obj->frontbuffer)))
+		__i915_gem_object_invalidate_frontbuffer(obj, origin);
+}
 
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index 6415f9a17e2d..70809d8897cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -8,6 +8,7 @@
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_ring.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_object_blt.h"
 
@@ -16,7 +17,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 					 u32 value)
 {
 	struct drm_i915_private *i915 = ce->vm->i915;
-	const u32 block_size = S16_MAX * PAGE_SIZE;
+	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
 	struct intel_engine_pool_node *pool;
 	struct i915_vma *batch;
 	u64 offset;
@@ -29,10 +30,10 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
 	intel_engine_pm_get(ce->engine);
 
-	count = div_u64(vma->size, block_size);
+	count = div_u64(round_up(vma->size, block_size), block_size);
 	size = (1 + 8 * count) * sizeof(u32);
 	size = round_up(size, PAGE_SIZE);
-	pool = intel_engine_pool_get(&ce->engine->pool, size);
+	pool = intel_engine_get_pool(ce->engine, size);
 	if (IS_ERR(pool)) {
 		err = PTR_ERR(pool);
 		goto out_pm;
@@ -200,7 +201,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 					 struct i915_vma *dst)
 {
 	struct drm_i915_private *i915 = ce->vm->i915;
-	const u32 block_size = S16_MAX * PAGE_SIZE;
+	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
 	struct intel_engine_pool_node *pool;
 	struct i915_vma *batch;
 	u64 src_offset, dst_offset;
@@ -213,10 +214,10 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
 	intel_engine_pm_get(ce->engine);
 
-	count = div_u64(dst->size, block_size);
+	count = div_u64(round_up(dst->size, block_size), block_size);
 	size = (1 + 11 * count) * sizeof(u32);
 	size = round_up(size, PAGE_SIZE);
-	pool = intel_engine_pool_get(&ce->engine->pool, size);
+	pool = intel_engine_get_pool(ce->engine, size);
 	if (IS_ERR(pool)) {
 		err = PTR_ERR(pool);
 		goto out_pm;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 646859fea224..88e268633fdc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -8,6 +8,7 @@
 #define __I915_GEM_OBJECT_TYPES_H__
 
 #include <drm/drm_gem.h>
+#include <uapi/drm/i915_drm.h>
 
 #include "i915_active.h"
 #include "i915_selftest.h"
@@ -30,10 +31,11 @@ struct i915_lut_handle {
 struct drm_i915_gem_object_ops {
 	unsigned int flags;
 #define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
-#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
-#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
-#define I915_GEM_OBJECT_NO_GGTT		BIT(3)
-#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(4)
+#define I915_GEM_OBJECT_HAS_IOMEM	BIT(1)
+#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(2)
+#define I915_GEM_OBJECT_IS_PROXY	BIT(3)
+#define I915_GEM_OBJECT_NO_GGTT		BIT(4)
+#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(5)
 
 	/* Interface between the GEM object and its backing storage.
 	 * get_pages() is called once prior to the use of the associated set
@@ -61,6 +63,23 @@ struct drm_i915_gem_object_ops {
 	void (*release)(struct drm_i915_gem_object *obj);
 };
 
+enum i915_mmap_type {
+	I915_MMAP_TYPE_GTT = 0,
+	I915_MMAP_TYPE_WC,
+	I915_MMAP_TYPE_WB,
+	I915_MMAP_TYPE_UC,
+};
+
+struct i915_mmap_offset {
+	struct drm_device *dev;
+	struct drm_vma_offset_node vma_node;
+	struct drm_i915_gem_object *obj;
+	struct drm_file *file;
+	enum i915_mmap_type mmap_type;
+
+	struct list_head offset;
+};
+
 struct drm_i915_gem_object {
 	struct drm_gem_object base;
 
@@ -116,8 +135,19 @@ struct drm_i915_gem_object {
 	unsigned int userfault_count;
 	struct list_head userfault_link;
 
+	struct {
+		spinlock_t lock; /* Protects access to mmo offsets */
+		struct list_head offsets;
+	} mmo;
+
 	I915_SELFTEST_DECLARE(struct list_head st_link);
 
+	unsigned long flags;
+#define I915_BO_ALLOC_CONTIGUOUS BIT(0)
+#define I915_BO_ALLOC_VOLATILE   BIT(1)
+#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE)
+#define I915_BO_READONLY         BIT(2)
+
 	/*
 	 * Is the object to be mapped as read-only to the GPU
 	 * Only honoured if hardware has relevant pte bit
@@ -143,7 +173,7 @@ struct drm_i915_gem_object {
 	 */
 	u16 write_domain;
 
-	struct intel_frontbuffer *frontbuffer;
+	struct intel_frontbuffer __rcu *frontbuffer;
 
 	/** Current tiling stride for the object, if it's tiled. */
 	unsigned int tiling_and_stride;
@@ -153,17 +183,34 @@ struct drm_i915_gem_object {
 
 	/** Count of VMA actually bound by this object */
 	atomic_t bind_count;
-	/** Count of how many global VMA are currently pinned for use by HW */
-	unsigned int pin_global;
 
 	struct {
-		struct mutex lock; /* protects the pages and their use */
+		/*
+		 * Protects the pages and their use. Do not use directly, but
+		 * instead go through the pin/unpin interfaces.
+		 */
+		struct mutex lock;
 		atomic_t pages_pin_count;
+		atomic_t shrink_pin;
+
+		/**
+		 * Memory region for this object.
+		 */
+		struct intel_memory_region *region;
+		/**
+		 * List of memory region blocks allocated for this object.
+		 */
+		struct list_head blocks;
+		/**
+		 * Element within memory_region->objects or region->purgeable
+		 * if the object is marked as DONTNEED. Access is protected by
+		 * region->obj_lock.
+		 */
+		struct list_head region_link;
 
 		struct sg_table *pages;
 		void *mapping;
 
-		/* TODO: whack some of this into the error state */
 		struct i915_page_sizes {
 			/**
 			 * The sg mask of the pages sg_table. i.e the mask of
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 18f0ce0135c1..54aca5c9101e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -7,6 +7,8 @@
 #include "i915_drv.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
+#include "i915_gem_lmem.h"
+#include "i915_gem_mman.h"
 
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
@@ -18,6 +20,9 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 
 	lockdep_assert_held(&obj->mm.lock);
 
+	if (i915_gem_object_is_volatile(obj))
+		obj->mm.madv = I915_MADV_DONTNEED;
+
 	/* Make the pages coherent with the GPU (flushing any swapin). */
 	if (obj->cache_dirty) {
 		obj->write_domain = 0;
@@ -71,6 +76,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 			list = &i915->mm.shrink_list;
 		list_add_tail(&obj->mm.link, list);
 
+		atomic_set(&obj->mm.shrink_pin, 0);
 		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 	}
 }
@@ -101,7 +107,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
 	int err;
 
-	err = mutex_lock_interruptible(&obj->mm.lock);
+	err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES);
 	if (err)
 		return err;
 
@@ -150,6 +156,14 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 	rcu_read_unlock();
 }
 
+static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
+{
+	if (is_vmalloc_addr(ptr))
+		vunmap(ptr);
+	else
+		kunmap(kmap_to_page(ptr));
+}
+
 struct sg_table *
 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 {
@@ -159,17 +173,13 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 	if (IS_ERR_OR_NULL(pages))
 		return pages;
 
+	if (i915_gem_object_is_volatile(obj))
+		obj->mm.madv = I915_MADV_WILLNEED;
+
 	i915_gem_object_make_unshrinkable(obj);
 
 	if (obj->mm.mapping) {
-		void *ptr;
-
-		ptr = page_mask_bits(obj->mm.mapping);
-		if (is_vmalloc_addr(ptr))
-			vunmap(ptr);
-		else
-			kunmap(kmap_to_page(ptr));
-
+		unmap_object(obj, page_mask_bits(obj->mm.mapping));
 		obj->mm.mapping = NULL;
 	}
 
@@ -179,8 +189,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 	return pages;
 }
 
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
 	struct sg_table *pages;
 	int err;
@@ -191,12 +200,14 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(atomic_read(&obj->bind_count));
 
 	/* May be called by shrinker from within get_pages() (on another bo) */
-	mutex_lock_nested(&obj->mm.lock, subclass);
+	mutex_lock(&obj->mm.lock);
 	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
 		err = -EBUSY;
 		goto unlock;
 	}
 
+	i915_gem_object_release_mmap_offset(obj);
+
 	/*
 	 * ->put_pages might need to allocate memory for the bit17 swizzle
 	 * array, hence protect them from being reaped by removing them from gtt
@@ -223,36 +234,44 @@ unlock:
 	return err;
 }
 
+static inline pte_t iomap_pte(resource_size_t base,
+			      dma_addr_t offset,
+			      pgprot_t prot)
+{
+	return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot));
+}
+
 /* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
 				 enum i915_map_type type)
 {
-	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+	unsigned long n_pte = obj->base.size >> PAGE_SHIFT;
 	struct sg_table *sgt = obj->mm.pages;
-	struct sgt_iter sgt_iter;
-	struct page *page;
-	struct page *stack_pages[32];
-	struct page **pages = stack_pages;
-	unsigned long i = 0;
+	pte_t *stack[32], **mem;
+	struct vm_struct *area;
 	pgprot_t pgprot;
-	void *addr;
+
+	if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC)
+		return NULL;
 
 	/* A single page can always be kmapped */
-	if (n_pages == 1 && type == I915_MAP_WB)
+	if (n_pte == 1 && type == I915_MAP_WB)
 		return kmap(sg_page(sgt->sgl));
 
-	if (n_pages > ARRAY_SIZE(stack_pages)) {
+	mem = stack;
+	if (n_pte > ARRAY_SIZE(stack)) {
 		/* Too big for stack -- allocate temporary array instead */
-		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
-		if (!pages)
+		mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
+		if (!mem)
 			return NULL;
 	}
 
-	for_each_sgt_page(page, sgt_iter, sgt)
-		pages[i++] = page;
-
-	/* Check that we have the expected number of pages */
-	GEM_BUG_ON(i != n_pages);
+	area = alloc_vm_area(obj->base.size, mem);
+	if (!area) {
+		if (mem != stack)
+			kvfree(mem);
+		return NULL;
+	}
 
 	switch (type) {
 	default:
@@ -265,12 +284,31 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
 		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
 		break;
 	}
-	addr = vmap(pages, n_pages, 0, pgprot);
 
-	if (pages != stack_pages)
-		kvfree(pages);
+	if (i915_gem_object_has_struct_page(obj)) {
+		struct sgt_iter iter;
+		struct page *page;
+		pte_t **ptes = mem;
+
+		for_each_sgt_page(page, iter, sgt)
+			**ptes++ = mk_pte(page, pgprot);
+	} else {
+		resource_size_t iomap;
+		struct sgt_iter iter;
+		pte_t **ptes = mem;
+		dma_addr_t addr;
 
-	return addr;
+		iomap = obj->mm.region->iomap.base;
+		iomap -= obj->mm.region->region.start;
+
+		for_each_sgt_daddr(addr, iter, sgt)
+			**ptes++ = iomap_pte(iomap, addr, pgprot);
+	}
+
+	if (mem != stack)
+		kvfree(mem);
+
+	return area->addr;
 }
 
 /* get, pin, and map the pages of the object into kernel space */
@@ -278,14 +316,16 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 			      enum i915_map_type type)
 {
 	enum i915_map_type has_type;
+	unsigned int flags;
 	bool pinned;
 	void *ptr;
 	int err;
 
-	if (unlikely(!i915_gem_object_has_struct_page(obj)))
+	flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM;
+	if (!i915_gem_object_type_has(obj, flags))
 		return ERR_PTR(-ENXIO);
 
-	err = mutex_lock_interruptible(&obj->mm.lock);
+	err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES);
 	if (err)
 		return ERR_PTR(err);
 
@@ -314,10 +354,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 			goto err_unpin;
 		}
 
-		if (is_vmalloc_addr(ptr))
-			vunmap(ptr);
-		else
-			kunmap(kmap_to_page(ptr));
+		unmap_object(obj, ptr);
 
 		ptr = obj->mm.mapping = NULL;
 	}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 768356908160..b1b7c1b3038a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -16,6 +16,7 @@
 #include "gt/intel_gt.h"
 #include "i915_drv.h"
 #include "i915_gem_object.h"
+#include "i915_gem_region.h"
 #include "i915_scatterlist.h"
 
 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
@@ -163,7 +164,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 	if (err)
 		return err;
 
-	mutex_lock(&obj->mm.lock);
+	mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
 
 	if (obj->mm.madv != I915_MADV_WILLNEED) {
 		err = -EFAULT;
@@ -191,8 +192,10 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 	/* Perma-pin (until release) the physical set of pages */
 	__i915_gem_object_pin_pages(obj);
 
-	if (!IS_ERR_OR_NULL(pages))
+	if (!IS_ERR_OR_NULL(pages)) {
 		i915_gem_shmem_ops.put_pages(obj, pages);
+		i915_gem_object_release_memory_region(obj);
+	}
 	mutex_unlock(&obj->mm.lock);
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index ad2a63dbcac2..c8264eb036bf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -7,148 +7,17 @@
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
-#include "i915_globals.h"
-
-static void call_idle_barriers(struct intel_engine_cs *engine)
-{
-	struct llist_node *node, *next;
-
-	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
-		struct i915_active_request *active =
-			container_of((struct list_head *)node,
-				     typeof(*active), link);
-
-		INIT_LIST_HEAD(&active->link);
-		RCU_INIT_POINTER(active->request, NULL);
-
-		active->retire(active, NULL);
-	}
-}
-
-static void i915_gem_park(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	for_each_engine(engine, i915, id)
-		call_idle_barriers(engine); /* cleanup after wedging */
-
-	i915_vma_parked(i915);
-
-	i915_globals_park();
-}
-
-static void idle_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), gem.idle_work);
-	bool park;
-
-	cancel_delayed_work_sync(&i915->gem.retire_work);
-	mutex_lock(&i915->drm.struct_mutex);
-
-	intel_wakeref_lock(&i915->gt.wakeref);
-	park = (!intel_wakeref_is_active(&i915->gt.wakeref) &&
-		!work_pending(work));
-	intel_wakeref_unlock(&i915->gt.wakeref);
-	if (park)
-		i915_gem_park(i915);
-	else
-		queue_delayed_work(i915->wq,
-				   &i915->gem.retire_work,
-				   round_jiffies_up_relative(HZ));
-
-	mutex_unlock(&i915->drm.struct_mutex);
-}
-
-static void retire_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), gem.retire_work.work);
-
-	/* Come back later if the device is busy... */
-	if (mutex_trylock(&i915->drm.struct_mutex)) {
-		i915_retire_requests(i915);
-		mutex_unlock(&i915->drm.struct_mutex);
-	}
-
-	queue_delayed_work(i915->wq,
-			   &i915->gem.retire_work,
-			   round_jiffies_up_relative(HZ));
-}
-
-static int pm_notifier(struct notifier_block *nb,
-		       unsigned long action,
-		       void *data)
-{
-	struct drm_i915_private *i915 =
-		container_of(nb, typeof(*i915), gem.pm_notifier);
-
-	switch (action) {
-	case INTEL_GT_UNPARK:
-		i915_globals_unpark();
-		queue_delayed_work(i915->wq,
-				   &i915->gem.retire_work,
-				   round_jiffies_up_relative(HZ));
-		break;
-
-	case INTEL_GT_PARK:
-		queue_work(i915->wq, &i915->gem.idle_work);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static bool switch_to_kernel_context_sync(struct intel_gt *gt)
-{
-	bool result = !intel_gt_is_wedged(gt);
-
-	do {
-		if (i915_gem_wait_for_idle(gt->i915,
-					   I915_WAIT_LOCKED |
-					   I915_WAIT_FOR_IDLE_BOOST,
-					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
-			/* XXX hide warning from gem_eio */
-			if (i915_modparams.reset) {
-				dev_err(gt->i915->drm.dev,
-					"Failed to idle engines, declaring wedged!\n");
-				GEM_TRACE_DUMP();
-			}
-
-			/*
-			 * Forcibly cancel outstanding work and leave
-			 * the gpu quiet.
-			 */
-			intel_gt_set_wedged(gt);
-			result = false;
-		}
-	} while (i915_retire_requests(gt->i915) && result);
-
-	if (intel_gt_pm_wait_for_idle(gt))
-		result = false;
-
-	return result;
-}
-
-bool i915_gem_load_power_context(struct drm_i915_private *i915)
-{
-	return switch_to_kernel_context_sync(&i915->gt);
-}
 
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
-	GEM_TRACE("\n");
+	GEM_TRACE("%s\n", dev_name(i915->drm.dev));
 
 	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
 	flush_workqueue(i915->wq);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	/*
 	 * We have to flush all the executing contexts to main memory so
 	 * that they can saved in the hibernation image. To ensure the last
@@ -158,15 +27,9 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	switch_to_kernel_context_sync(&i915->gt);
-
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
+	intel_gt_suspend_prepare(&i915->gt);
 
 	i915_gem_drain_freed_objects(i915);
-
-	intel_uc_suspend(&i915->gt.uc);
 }
 
 static struct drm_i915_gem_object *first_mm_object(struct list_head *list)
@@ -206,6 +69,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
 	 * machine in an unusable condition.
 	 */
 
+	intel_gt_suspend_late(&i915->gt);
+
 	spin_lock_irqsave(&i915->mm.obj_lock, flags);
 	for (phase = phases; *phase; phase++) {
 		LIST_HEAD(keep);
@@ -230,54 +95,16 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
 		list_splice_tail(&keep, *phase);
 	}
 	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
-
-	i915_gem_sanitize(i915);
 }
 
 void i915_gem_resume(struct drm_i915_private *i915)
 {
-	GEM_TRACE("\n");
-
-	mutex_lock(&i915->drm.struct_mutex);
-	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
-
-	if (i915_gem_init_hw(i915))
-		goto err_wedged;
+	GEM_TRACE("%s\n", dev_name(i915->drm.dev));
 
 	/*
 	 * As we didn't flush the kernel context before suspend, we cannot
 	 * guarantee that the context image is complete. So let's just reset
 	 * it and start again.
 	 */
-	if (intel_gt_resume(&i915->gt))
-		goto err_wedged;
-
-	intel_uc_resume(&i915->gt.uc);
-
-	/* Always reload a context for powersaving. */
-	if (!i915_gem_load_power_context(i915))
-		goto err_wedged;
-
-out_unlock:
-	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
-	mutex_unlock(&i915->drm.struct_mutex);
-	return;
-
-err_wedged:
-	if (!intel_gt_is_wedged(&i915->gt)) {
-		dev_err(i915->drm.dev,
-			"Failed to re-initialize GPU, declaring it wedged!\n");
-		intel_gt_set_wedged(&i915->gt);
-	}
-	goto out_unlock;
-}
-
-void i915_gem_init__pm(struct drm_i915_private *i915)
-{
-	INIT_WORK(&i915->gem.idle_work, idle_work_handler);
-	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
-
-	i915->gem.pm_notifier.notifier_call = pm_notifier;
-	blocking_notifier_chain_register(&i915->gt.pm_notifications,
-					 &i915->gem.pm_notifier);
+	intel_gt_resume(&i915->gt);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
index 6f7d5d11ac3b..26b78dbdc225 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
@@ -12,9 +12,6 @@
 struct drm_i915_private;
 struct work_struct;
 
-void i915_gem_init__pm(struct drm_i915_private *i915);
-
-bool i915_gem_load_power_context(struct drm_i915_private *i915);
 void i915_gem_resume(struct drm_i915_private *i915);
 
 void i915_gem_idle_work_handler(struct work_struct *work);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
new file mode 100644
index 000000000000..1515384d7e0e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_memory_region.h"
+#include "i915_gem_region.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+
+void
+i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj,
+				struct sg_table *pages)
+{
+	__intel_memory_region_put_pages_buddy(obj->mm.region, &obj->mm.blocks);
+
+	obj->mm.dirty = false;
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+int
+i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj)
+{
+	struct intel_memory_region *mem = obj->mm.region;
+	struct list_head *blocks = &obj->mm.blocks;
+	resource_size_t size = obj->base.size;
+	resource_size_t prev_end;
+	struct i915_buddy_block *block;
+	unsigned int flags;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	unsigned int sg_page_sizes;
+	int ret;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, size >> ilog2(mem->mm.chunk_size), GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	flags = I915_ALLOC_MIN_PAGE_SIZE;
+	if (obj->flags & I915_BO_ALLOC_CONTIGUOUS)
+		flags |= I915_ALLOC_CONTIGUOUS;
+
+	ret = __intel_memory_region_get_pages_buddy(mem, size, flags, blocks);
+	if (ret)
+		goto err_free_sg;
+
+	GEM_BUG_ON(list_empty(blocks));
+
+	sg = st->sgl;
+	st->nents = 0;
+	sg_page_sizes = 0;
+	prev_end = (resource_size_t)-1;
+
+	list_for_each_entry(block, blocks, link) {
+		u64 block_size, offset;
+
+		block_size = min_t(u64, size,
+				   i915_buddy_block_size(&mem->mm, block));
+		offset = i915_buddy_block_offset(block);
+
+		GEM_BUG_ON(overflows_type(block_size, sg->length));
+
+		if (offset != prev_end ||
+		    add_overflows_t(typeof(sg->length), sg->length, block_size)) {
+			if (st->nents) {
+				sg_page_sizes |= sg->length;
+				sg = __sg_next(sg);
+			}
+
+			sg_dma_address(sg) = mem->region.start + offset;
+			sg_dma_len(sg) = block_size;
+
+			sg->length = block_size;
+
+			st->nents++;
+		} else {
+			sg->length += block_size;
+			sg_dma_len(sg) += block_size;
+		}
+
+		prev_end = offset + block_size;
+	}
+
+	sg_page_sizes |= sg->length;
+	sg_mark_end(sg);
+	i915_sg_trim(st);
+
+	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+	return 0;
+
+err_free_sg:
+	sg_free_table(st);
+	kfree(st);
+	return ret;
+}
+
+void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
+					struct intel_memory_region *mem,
+					unsigned long flags)
+{
+	INIT_LIST_HEAD(&obj->mm.blocks);
+	obj->mm.region = intel_memory_region_get(mem);
+
+	obj->flags |= flags;
+	if (obj->base.size <= mem->min_page_size)
+		obj->flags |= I915_BO_ALLOC_CONTIGUOUS;
+
+	mutex_lock(&mem->objects.lock);
+
+	if (obj->flags & I915_BO_ALLOC_VOLATILE)
+		list_add(&obj->mm.region_link, &mem->objects.purgeable);
+	else
+		list_add(&obj->mm.region_link, &mem->objects.list);
+
+	mutex_unlock(&mem->objects.lock);
+}
+
+void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj)
+{
+	struct intel_memory_region *mem = obj->mm.region;
+
+	mutex_lock(&mem->objects.lock);
+	list_del(&obj->mm.region_link);
+	mutex_unlock(&mem->objects.lock);
+
+	intel_memory_region_put(mem);
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_region(struct intel_memory_region *mem,
+			      resource_size_t size,
+			      unsigned int flags)
+{
+	struct drm_i915_gem_object *obj;
+
+	/*
+	 * NB: Our use of resource_size_t for the size stems from using struct
+	 * resource for the mem->region. We might need to revisit this in the
+	 * future.
+	 */
+
+	GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS);
+
+	if (!mem)
+		return ERR_PTR(-ENODEV);
+
+	size = round_up(size, mem->min_page_size);
+
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT));
+
+	/*
+	 * XXX: There is a prevalence of the assumption that we fit the
+	 * object's page count inside a 32bit _signed_ variable. Let's document
+	 * this and catch if we ever need to fix it. In the meantime, if you do
+	 * spot such a local variable, please consider fixing!
+	 */
+
+	if (size >> PAGE_SHIFT > INT_MAX)
+		return ERR_PTR(-E2BIG);
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = mem->ops->create_object(mem, size, flags);
+	if (!IS_ERR(obj))
+		trace_i915_gem_object_create(obj);
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h
new file mode 100644
index 000000000000..f2ff6f8bff74
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_REGION_H__
+#define __I915_GEM_REGION_H__
+
+#include <linux/types.h>
+
+struct intel_memory_region;
+struct drm_i915_gem_object;
+struct sg_table;
+
+int i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj);
+void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj,
+				     struct sg_table *pages);
+
+void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
+					struct intel_memory_region *mem,
+					unsigned long flags);
+void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj);
+
+struct drm_i915_gem_object *
+i915_gem_object_create_region(struct intel_memory_region *mem,
+			      resource_size_t size,
+			      unsigned int flags);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4c4954e8ce0a..a2a980d9d241 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -7,7 +7,9 @@
 #include <linux/pagevec.h>
 #include <linux/swap.h>
 
+#include "gem/i915_gem_region.h"
 #include "i915_drv.h"
+#include "i915_gemfs.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
 #include "i915_trace.h"
@@ -26,6 +28,7 @@ static void check_release_pagevec(struct pagevec *pvec)
 static int shmem_get_pages(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_memory_region *mem = obj->mm.region;
 	const unsigned long page_count = obj->base.size / PAGE_SIZE;
 	unsigned long i;
 	struct address_space *mapping;
@@ -52,7 +55,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	 * If there's no chance of allocating enough pages for the whole
 	 * object, bail early.
 	 */
-	if (page_count > totalram_pages())
+	if (obj->base.size > resource_size(&mem->region))
 		return -ENOMEM;
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
@@ -417,6 +420,8 @@ shmem_pwrite(struct drm_i915_gem_object *obj,
 
 static void shmem_release(struct drm_i915_gem_object *obj)
 {
+	i915_gem_object_release_memory_region(obj);
+
 	fput(obj->base.filp);
 }
 
@@ -434,9 +439,9 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
 	.release = shmem_release,
 };
 
-static int create_shmem(struct drm_i915_private *i915,
-			struct drm_gem_object *obj,
-			size_t size)
+static int __create_shmem(struct drm_i915_private *i915,
+			  struct drm_gem_object *obj,
+			  resource_size_t size)
 {
 	unsigned long flags = VM_NORESERVE;
 	struct file *filp;
@@ -455,31 +460,24 @@ static int create_shmem(struct drm_i915_private *i915,
 	return 0;
 }
 
-struct drm_i915_gem_object *
-i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
+static struct drm_i915_gem_object *
+create_shmem(struct intel_memory_region *mem,
+	     resource_size_t size,
+	     unsigned int flags)
 {
+	static struct lock_class_key lock_class;
+	struct drm_i915_private *i915 = mem->i915;
 	struct drm_i915_gem_object *obj;
 	struct address_space *mapping;
 	unsigned int cache_level;
 	gfp_t mask;
 	int ret;
 
-	/* There is a prevalence of the assumption that we fit the object's
-	 * page count inside a 32bit _signed_ variable. Let's document this and
-	 * catch if we ever need to fix it. In the meantime, if you do spot
-	 * such a local variable, please consider fixing!
-	 */
-	if (size >> PAGE_SHIFT > INT_MAX)
-		return ERR_PTR(-E2BIG);
-
-	if (overflows_type(size, obj->base.size))
-		return ERR_PTR(-E2BIG);
-
 	obj = i915_gem_object_alloc();
 	if (!obj)
 		return ERR_PTR(-ENOMEM);
 
-	ret = create_shmem(i915, &obj->base, size);
+	ret = __create_shmem(i915, &obj->base, size);
 	if (ret)
 		goto fail;
 
@@ -494,7 +492,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
 	mapping_set_gfp_mask(mapping, mask);
 	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
 
-	i915_gem_object_init(obj, &i915_gem_shmem_ops);
+	i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class);
 
 	obj->write_domain = I915_GEM_DOMAIN_CPU;
 	obj->read_domains = I915_GEM_DOMAIN_CPU;
@@ -518,7 +516,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
 
 	i915_gem_object_set_cache_coherency(obj, cache_level);
 
-	trace_i915_gem_object_create(obj);
+	i915_gem_object_init_memory_region(obj, mem, 0);
 
 	return obj;
 
@@ -527,14 +525,22 @@ fail:
 	return ERR_PTR(ret);
 }
 
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915,
+			     resource_size_t size)
+{
+	return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM],
+					     size, 0);
+}
+
 /* Allocate a new GEM object and fill it with the supplied data */
 struct drm_i915_gem_object *
 i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
-				       const void *data, size_t size)
+				       const void *data, resource_size_t size)
 {
 	struct drm_i915_gem_object *obj;
 	struct file *file;
-	size_t offset;
+	resource_size_t offset;
 	int err;
 
 	obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
@@ -577,3 +583,37 @@ fail:
 	i915_gem_object_put(obj);
 	return ERR_PTR(err);
 }
+
+static int init_shmem(struct intel_memory_region *mem)
+{
+	int err;
+
+	err = i915_gemfs_init(mem->i915);
+	if (err) {
+		DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n",
+			 err);
+	}
+
+	intel_memory_region_set_name(mem, "system");
+
+	return 0; /* Don't error, we can simply fallback to the kernel mnt */
+}
+
+static void release_shmem(struct intel_memory_region *mem)
+{
+	i915_gemfs_fini(mem->i915);
+}
+
+static const struct intel_memory_region_ops shmem_region_ops = {
+	.init = init_shmem,
+	.release = release_shmem,
+	.create_object = create_shmem,
+};
+
+struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915)
+{
+	return intel_memory_region_create(i915, 0,
+					  totalram_pages() << PAGE_SHIFT,
+					  PAGE_SIZE, 0,
+					  &shmem_region_ops);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index edd21d14e64f..f7e4b39c734f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -16,40 +16,6 @@
 
 #include "i915_trace.h"
 
-static bool shrinker_lock(struct drm_i915_private *i915,
-			  unsigned int flags,
-			  bool *unlock)
-{
-	struct mutex *m = &i915->drm.struct_mutex;
-
-	switch (mutex_trylock_recursive(m)) {
-	case MUTEX_TRYLOCK_RECURSIVE:
-		*unlock = false;
-		return true;
-
-	case MUTEX_TRYLOCK_FAILED:
-		*unlock = false;
-		if (flags & I915_SHRINK_ACTIVE &&
-		    mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0)
-			*unlock = true;
-		return *unlock;
-
-	case MUTEX_TRYLOCK_SUCCESS:
-		*unlock = true;
-		return true;
-	}
-
-	BUG();
-}
-
-static void shrinker_unlock(struct drm_i915_private *i915, bool unlock)
-{
-	if (!unlock)
-		return;
-
-	mutex_unlock(&i915->drm.struct_mutex);
-}
-
 static bool swap_available(void)
 {
 	return get_nr_swap_pages() > 0;
@@ -61,7 +27,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	if (!i915_gem_object_is_shrinkable(obj))
 		return false;
 
-	/* Only report true if by unbinding the object and putting its pages
+	/*
+	 * Only report true if by unbinding the object and putting its pages
 	 * we can actually make forward progress towards freeing physical
 	 * pages.
 	 *
@@ -72,16 +39,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count))
 		return false;
 
-	/* If any vma are "permanently" pinned, it will prevent us from
-	 * reclaiming the obj->mm.pages. We only allow scanout objects to claim
-	 * a permanent pin, along with a few others like the context objects.
-	 * To simplify the scan, and to avoid walking the list of vma under the
-	 * object, we just check the count of its permanently pinned.
-	 */
-	if (READ_ONCE(obj->pin_global))
-		return false;
-
-	/* We can only return physical pages to the system if we can either
+	/*
+	 * We can only return physical pages to the system if we can either
 	 * discard the contents (because the user has marked them as being
 	 * purgeable) or if we can move their contents out to swap.
 	 */
@@ -98,7 +57,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
 		flags = I915_GEM_OBJECT_UNBIND_ACTIVE;
 
 	if (i915_gem_object_unbind(obj, flags) == 0)
-		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+		__i915_gem_object_put_pages(obj);
 
 	return !i915_gem_object_has_pages(obj);
 }
@@ -162,10 +121,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	intel_wakeref_t wakeref = 0;
 	unsigned long count = 0;
 	unsigned long scanned = 0;
-	bool unlock;
-
-	if (!shrinker_lock(i915, shrink, &unlock))
-		return 0;
 
 	/*
 	 * When shrinking the active list, we should also consider active
@@ -254,8 +209,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 
 			if (unsafe_drop_pages(obj, shrink)) {
 				/* May arrive from get_pages on another bo */
-				mutex_lock_nested(&obj->mm.lock,
-						  I915_MM_SHRINKER);
+				mutex_lock(&obj->mm.lock);
 				if (!i915_gem_object_has_pages(obj)) {
 					try_to_writeback(obj, shrink);
 					count += obj->base.size >> PAGE_SHIFT;
@@ -275,8 +229,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	if (shrink & I915_SHRINK_BOUND)
 		intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
-	shrinker_unlock(i915, unlock);
-
 	if (nr_scanned)
 		*nr_scanned += scanned;
 	return count;
@@ -346,19 +298,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 	struct drm_i915_private *i915 =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
 	unsigned long freed;
-	bool unlock;
 
 	sc->nr_scanned = 0;
 
-	if (!shrinker_lock(i915, 0, &unlock))
-		return SHRINK_STOP;
-
 	freed = i915_gem_shrink(i915,
 				sc->nr_to_scan,
 				&sc->nr_scanned,
 				I915_SHRINK_BOUND |
-				I915_SHRINK_UNBOUND |
-				I915_SHRINK_WRITEBACK);
+				I915_SHRINK_UNBOUND);
 	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
 		intel_wakeref_t wakeref;
 
@@ -373,8 +320,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 		}
 	}
 
-	shrinker_unlock(i915, unlock);
-
 	return sc->nr_scanned ? freed : SHRINK_STOP;
 }
 
@@ -391,6 +336,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 	freed_pages = 0;
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
+					       I915_SHRINK_ACTIVE |
 					       I915_SHRINK_BOUND |
 					       I915_SHRINK_UNBOUND |
 					       I915_SHRINK_WRITEBACK);
@@ -426,10 +372,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	struct i915_vma *vma, *next;
 	unsigned long freed_pages = 0;
 	intel_wakeref_t wakeref;
-	bool unlock;
-
-	if (!shrinker_lock(i915, 0, &unlock))
-		return NOTIFY_DONE;
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
@@ -446,15 +388,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		if (!vma->iomap || i915_vma_is_active(vma))
 			continue;
 
-		mutex_unlock(&i915->ggtt.vm.mutex);
-		if (i915_vma_unbind(vma) == 0)
+		if (__i915_vma_unbind(vma) == 0)
 			freed_pages += count;
-		mutex_lock(&i915->ggtt.vm.mutex);
 	}
 	mutex_unlock(&i915->ggtt.vm.mutex);
 
-	shrinker_unlock(i915, unlock);
-
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
 }
@@ -497,72 +435,65 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
 
 	fs_reclaim_acquire(GFP_KERNEL);
 
-	/*
-	 * As we invariably rely on the struct_mutex within the shrinker,
-	 * but have a complicated recursion dance, taint all the mutexes used
-	 * within the shrinker with the struct_mutex. For completeness, we
-	 * taint with all subclass of struct_mutex, even though we should
-	 * only need tainting by I915_MM_NORMAL to catch possible ABBA
-	 * deadlocks from using struct_mutex inside @mutex.
-	 */
-	mutex_acquire(&i915->drm.struct_mutex.dep_map,
-		      I915_MM_SHRINKER, 0, _RET_IP_);
-
 	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
-	mutex_release(&mutex->dep_map, 0, _RET_IP_);
-
-	mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
+	mutex_release(&mutex->dep_map, _RET_IP_);
 
 	fs_reclaim_release(GFP_KERNEL);
 
 	if (unlock)
-		mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
+		mutex_release(&i915->drm.struct_mutex.dep_map, _RET_IP_);
 }
 
 #define obj_to_i915(obj__) to_i915((obj__)->base.dev)
 
 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *i915 = obj_to_i915(obj);
+	unsigned long flags;
+
 	/*
 	 * We can only be called while the pages are pinned or when
 	 * the pages are released. If pinned, we should only be called
 	 * from a single caller under controlled conditions; and on release
 	 * only one caller may release us. Neither the two may cross.
 	 */
-	if (!list_empty(&obj->mm.link)) { /* pinned by caller */
-		struct drm_i915_private *i915 = obj_to_i915(obj);
-		unsigned long flags;
-
-		spin_lock_irqsave(&i915->mm.obj_lock, flags);
-		GEM_BUG_ON(list_empty(&obj->mm.link));
+	if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0))
+		return;
 
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	if (!atomic_fetch_inc(&obj->mm.shrink_pin) &&
+	    !list_empty(&obj->mm.link)) {
 		list_del_init(&obj->mm.link);
 		i915->mm.shrink_count--;
 		i915->mm.shrink_memory -= obj->base.size;
-
-		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 	}
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
 static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
 					      struct list_head *head)
 {
+	struct drm_i915_private *i915 = obj_to_i915(obj);
+	unsigned long flags;
+
 	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-	GEM_BUG_ON(!list_empty(&obj->mm.link));
+	if (!i915_gem_object_is_shrinkable(obj))
+		return;
 
-	if (i915_gem_object_is_shrinkable(obj)) {
-		struct drm_i915_private *i915 = obj_to_i915(obj);
-		unsigned long flags;
+	if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1))
+		return;
 
-		spin_lock_irqsave(&i915->mm.obj_lock, flags);
-		GEM_BUG_ON(!kref_read(&obj->base.refcount));
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	GEM_BUG_ON(!kref_read(&obj->base.refcount));
+	if (atomic_dec_and_test(&obj->mm.shrink_pin)) {
+		GEM_BUG_ON(!list_empty(&obj->mm.link));
 
 		list_add_tail(&obj->mm.link, head);
 		i915->mm.shrink_count++;
 		i915->mm.shrink_memory += obj->base.size;
 
-		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 	}
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index aa533b4ab5f5..451f3078d60d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -10,6 +10,7 @@
 #include <drm/drm_mm.h>
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_region.h"
 #include "i915_drv.h"
 #include "i915_gem_stolen.h"
 
@@ -25,48 +26,49 @@
  * for is a boon.
  */
 
-int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
+int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915,
 					 struct drm_mm_node *node, u64 size,
 					 unsigned alignment, u64 start, u64 end)
 {
 	int ret;
 
-	if (!drm_mm_initialized(&dev_priv->mm.stolen))
+	if (!drm_mm_initialized(&i915->mm.stolen))
 		return -ENODEV;
 
 	/* WaSkipStolenMemoryFirstPage:bdw+ */
-	if (INTEL_GEN(dev_priv) >= 8 && start < 4096)
+	if (INTEL_GEN(i915) >= 8 && start < 4096)
 		start = 4096;
 
-	mutex_lock(&dev_priv->mm.stolen_lock);
-	ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node,
+	mutex_lock(&i915->mm.stolen_lock);
+	ret = drm_mm_insert_node_in_range(&i915->mm.stolen, node,
 					  size, alignment, 0,
 					  start, end, DRM_MM_INSERT_BEST);
-	mutex_unlock(&dev_priv->mm.stolen_lock);
+	mutex_unlock(&i915->mm.stolen_lock);
 
 	return ret;
 }
 
-int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
+int i915_gem_stolen_insert_node(struct drm_i915_private *i915,
 				struct drm_mm_node *node, u64 size,
 				unsigned alignment)
 {
-	return i915_gem_stolen_insert_node_in_range(dev_priv, node, size,
+	return i915_gem_stolen_insert_node_in_range(i915, node, size,
 						    alignment, 0, U64_MAX);
 }
 
-void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
+void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
 				 struct drm_mm_node *node)
 {
-	mutex_lock(&dev_priv->mm.stolen_lock);
+	mutex_lock(&i915->mm.stolen_lock);
 	drm_mm_remove_node(node);
-	mutex_unlock(&dev_priv->mm.stolen_lock);
+	mutex_unlock(&i915->mm.stolen_lock);
 }
 
-static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
+static int i915_adjust_stolen(struct drm_i915_private *i915,
 			      struct resource *dsm)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 	struct resource *r;
 
 	if (dsm->start == 0 || dsm->end <= dsm->start)
@@ -78,14 +80,14 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
 	 */
 
 	/* Make sure we don't clobber the GTT if it's within stolen memory */
-	if (INTEL_GEN(dev_priv) <= 4 &&
-	    !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) {
+	if (INTEL_GEN(i915) <= 4 &&
+	    !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) {
 		struct resource stolen[2] = {*dsm, *dsm};
 		struct resource ggtt_res;
 		resource_size_t ggtt_start;
 
-		ggtt_start = I915_READ(PGTBL_CTL);
-		if (IS_GEN(dev_priv, 4))
+		ggtt_start = intel_uncore_read(uncore, PGTBL_CTL);
+		if (IS_GEN(i915, 4))
 			ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) |
 				     (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28;
 		else
@@ -119,7 +121,7 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
 	 * kernel. So if the region is already marked as busy, something
 	 * is seriously wrong.
 	 */
-	r = devm_request_mem_region(dev_priv->drm.dev, dsm->start,
+	r = devm_request_mem_region(i915->drm.dev, dsm->start,
 				    resource_size(dsm),
 				    "Graphics Stolen Memory");
 	if (r == NULL) {
@@ -132,14 +134,14 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
 		 * reservation starting from 1 instead of 0.
 		 * There's also BIOS with off-by-one on the other end.
 		 */
-		r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1,
+		r = devm_request_mem_region(i915->drm.dev, dsm->start + 1,
 					    resource_size(dsm) - 2,
 					    "Graphics Stolen Memory");
 		/*
 		 * GEN3 firmware likes to smash pci bridges into the stolen
 		 * range. Apparently this works.
 		 */
-		if (r == NULL && !IS_GEN(dev_priv, 3)) {
+		if (!r && !IS_GEN(i915, 3)) {
 			DRM_ERROR("conflict detected with stolen region: %pR\n",
 				  dsm);
 
@@ -150,25 +152,27 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
 	return 0;
 }
 
-void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv)
+static void i915_gem_cleanup_stolen(struct drm_i915_private *i915)
 {
-	if (!drm_mm_initialized(&dev_priv->mm.stolen))
+	if (!drm_mm_initialized(&i915->mm.stolen))
 		return;
 
-	drm_mm_takedown(&dev_priv->mm.stolen);
+	drm_mm_takedown(&i915->mm.stolen);
 }
 
-static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void g4x_get_stolen_reserved(struct drm_i915_private *i915,
+				    struct intel_uncore *uncore,
 				    resource_size_t *base,
 				    resource_size_t *size)
 {
-	u32 reg_val = I915_READ(IS_GM45(dev_priv) ?
-				CTG_STOLEN_RESERVED :
-				ELK_STOLEN_RESERVED);
-	resource_size_t stolen_top = dev_priv->dsm.end + 1;
+	u32 reg_val = intel_uncore_read(uncore,
+					IS_GM45(i915) ?
+					CTG_STOLEN_RESERVED :
+					ELK_STOLEN_RESERVED);
+	resource_size_t stolen_top = i915->dsm.end + 1;
 
 	DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n",
-			 IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val);
+			 IS_GM45(i915) ? "CTG" : "ELK", reg_val);
 
 	if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0)
 		return;
@@ -177,7 +181,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
 	 * Whether ILK really reuses the ELK register for this is unclear.
 	 * Let's see if we catch anyone with this supposedly enabled on ILK.
 	 */
-	WARN(IS_GEN(dev_priv, 5), "ILK stolen reserved found? 0x%08x\n",
+	WARN(IS_GEN(i915, 5), "ILK stolen reserved found? 0x%08x\n",
 	     reg_val);
 
 	if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK))
@@ -189,11 +193,12 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
 	*size = stolen_top - *base;
 }
 
-static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void gen6_get_stolen_reserved(struct drm_i915_private *i915,
+				     struct intel_uncore *uncore,
 				     resource_size_t *base,
 				     resource_size_t *size)
 {
-	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
 
 	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
@@ -221,12 +226,13 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
 	}
 }
 
-static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void vlv_get_stolen_reserved(struct drm_i915_private *i915,
+				    struct intel_uncore *uncore,
 				    resource_size_t *base,
 				    resource_size_t *size)
 {
-	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
-	resource_size_t stolen_top = dev_priv->dsm.end + 1;
+	u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
+	resource_size_t stolen_top = i915->dsm.end + 1;
 
 	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
@@ -249,11 +255,12 @@ static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv,
 	*base = stolen_top - *size;
 }
 
-static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void gen7_get_stolen_reserved(struct drm_i915_private *i915,
+				     struct intel_uncore *uncore,
 				     resource_size_t *base,
 				     resource_size_t *size)
 {
-	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
 
 	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
@@ -275,11 +282,12 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
 	}
 }
 
-static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void chv_get_stolen_reserved(struct drm_i915_private *i915,
+				    struct intel_uncore *uncore,
 				    resource_size_t *base,
 				    resource_size_t *size)
 {
-	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
 
 	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
@@ -307,12 +315,13 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
 	}
 }
 
-static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void bdw_get_stolen_reserved(struct drm_i915_private *i915,
+				    struct intel_uncore *uncore,
 				    resource_size_t *base,
 				    resource_size_t *size)
 {
-	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
-	resource_size_t stolen_top = dev_priv->dsm.end + 1;
+	u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
+	resource_size_t stolen_top = i915->dsm.end + 1;
 
 	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
@@ -327,10 +336,11 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
 }
 
 static void icl_get_stolen_reserved(struct drm_i915_private *i915,
+				    struct intel_uncore *uncore,
 				    resource_size_t *base,
 				    resource_size_t *size)
 {
-	u64 reg_val = intel_uncore_read64(&i915->uncore, GEN6_STOLEN_RESERVED);
+	u64 reg_val = intel_uncore_read64(uncore, GEN6_STOLEN_RESERVED);
 
 	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
@@ -355,22 +365,23 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
 	}
 }
 
-int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
+static int i915_gem_init_stolen(struct drm_i915_private *i915)
 {
+	struct intel_uncore *uncore = &i915->uncore;
 	resource_size_t reserved_base, stolen_top;
 	resource_size_t reserved_total, reserved_size;
 
-	mutex_init(&dev_priv->mm.stolen_lock);
+	mutex_init(&i915->mm.stolen_lock);
 
-	if (intel_vgpu_active(dev_priv)) {
-		dev_notice(dev_priv->drm.dev,
+	if (intel_vgpu_active(i915)) {
+		dev_notice(i915->drm.dev,
 			   "%s, disabling use of stolen memory\n",
 			   "iGVT-g active");
 		return 0;
 	}
 
-	if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) {
-		dev_notice(dev_priv->drm.dev,
+	if (intel_vtd_active() && INTEL_GEN(i915) < 8) {
+		dev_notice(i915->drm.dev,
 			   "%s, disabling use of stolen memory\n",
 			   "DMAR active");
 		return 0;
@@ -379,55 +390,59 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 	if (resource_size(&intel_graphics_stolen_res) == 0)
 		return 0;
 
-	dev_priv->dsm = intel_graphics_stolen_res;
+	i915->dsm = intel_graphics_stolen_res;
 
-	if (i915_adjust_stolen(dev_priv, &dev_priv->dsm))
+	if (i915_adjust_stolen(i915, &i915->dsm))
 		return 0;
 
-	GEM_BUG_ON(dev_priv->dsm.start == 0);
-	GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start);
+	GEM_BUG_ON(i915->dsm.start == 0);
+	GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
 
-	stolen_top = dev_priv->dsm.end + 1;
+	stolen_top = i915->dsm.end + 1;
 	reserved_base = stolen_top;
 	reserved_size = 0;
 
-	switch (INTEL_GEN(dev_priv)) {
+	switch (INTEL_GEN(i915)) {
 	case 2:
 	case 3:
 		break;
 	case 4:
-		if (!IS_G4X(dev_priv))
+		if (!IS_G4X(i915))
 			break;
 		/* fall through */
 	case 5:
-		g4x_get_stolen_reserved(dev_priv,
+		g4x_get_stolen_reserved(i915, uncore,
 					&reserved_base, &reserved_size);
 		break;
 	case 6:
-		gen6_get_stolen_reserved(dev_priv,
+		gen6_get_stolen_reserved(i915, uncore,
 					 &reserved_base, &reserved_size);
 		break;
 	case 7:
-		if (IS_VALLEYVIEW(dev_priv))
-			vlv_get_stolen_reserved(dev_priv,
+		if (IS_VALLEYVIEW(i915))
+			vlv_get_stolen_reserved(i915, uncore,
 						&reserved_base, &reserved_size);
 		else
-			gen7_get_stolen_reserved(dev_priv,
+			gen7_get_stolen_reserved(i915, uncore,
 						 &reserved_base, &reserved_size);
 		break;
 	case 8:
 	case 9:
 	case 10:
-		if (IS_LP(dev_priv))
-			chv_get_stolen_reserved(dev_priv,
+		if (IS_LP(i915))
+			chv_get_stolen_reserved(i915, uncore,
 						&reserved_base, &reserved_size);
 		else
-			bdw_get_stolen_reserved(dev_priv,
+			bdw_get_stolen_reserved(i915, uncore,
 						&reserved_base, &reserved_size);
 		break;
-	case 11:
 	default:
-		icl_get_stolen_reserved(dev_priv, &reserved_base,
+		MISSING_CASE(INTEL_GEN(i915));
+		/* fall-through */
+	case 11:
+	case 12:
+		icl_get_stolen_reserved(i915, uncore,
+					&reserved_base,
 					&reserved_size);
 		break;
 	}
@@ -444,12 +459,12 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 		reserved_size = 0;
 	}
 
-	dev_priv->dsm_reserved =
-		(struct resource) DEFINE_RES_MEM(reserved_base, reserved_size);
+	i915->dsm_reserved =
+		(struct resource)DEFINE_RES_MEM(reserved_base, reserved_size);
 
-	if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) {
+	if (!resource_contains(&i915->dsm, &i915->dsm_reserved)) {
 		DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n",
-			  &dev_priv->dsm_reserved, &dev_priv->dsm);
+			  &i915->dsm_reserved, &i915->dsm);
 		return 0;
 	}
 
@@ -458,14 +473,14 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 	reserved_total = stolen_top - reserved_base;
 
 	DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n",
-			 (u64)resource_size(&dev_priv->dsm) >> 10,
-			 ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10);
+			 (u64)resource_size(&i915->dsm) >> 10,
+			 ((u64)resource_size(&i915->dsm) - reserved_total) >> 10);
 
-	dev_priv->stolen_usable_size =
-		resource_size(&dev_priv->dsm) - reserved_total;
+	i915->stolen_usable_size =
+		resource_size(&i915->dsm) - reserved_total;
 
 	/* Basic memrange allocator for stolen space. */
-	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size);
+	drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size);
 
 	return 0;
 }
@@ -474,11 +489,11 @@ static struct sg_table *
 i915_pages_create_for_stolen(struct drm_device *dev,
 			     resource_size_t offset, resource_size_t size)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct sg_table *st;
 	struct scatterlist *sg;
 
-	GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm)));
+	GEM_BUG_ON(range_overflows(offset, size, resource_size(&i915->dsm)));
 
 	/* We hide that we have no struct page backing our stolen object
 	 * by wrapping the contiguous physical allocation with a fake
@@ -498,7 +513,7 @@ i915_pages_create_for_stolen(struct drm_device *dev,
 	sg->offset = 0;
 	sg->length = size;
 
-	sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset;
+	sg_dma_address(sg) = (dma_addr_t)i915->dsm.start + offset;
 	sg_dma_len(sg) = size;
 
 	return st;
@@ -529,12 +544,14 @@ static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
 static void
 i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen);
 
 	GEM_BUG_ON(!stolen);
 
-	i915_gem_stolen_remove_node(dev_priv, stolen);
+	i915_gem_object_release_memory_region(obj);
+
+	i915_gem_stolen_remove_node(i915, stolen);
 	kfree(stolen);
 }
 
@@ -545,83 +562,133 @@ static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {
 };
 
 static struct drm_i915_gem_object *
-_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
-			       struct drm_mm_node *stolen)
+__i915_gem_object_create_stolen(struct intel_memory_region *mem,
+				struct drm_mm_node *stolen)
 {
+	static struct lock_class_key lock_class;
 	struct drm_i915_gem_object *obj;
 	unsigned int cache_level;
+	int err = -ENOMEM;
 
 	obj = i915_gem_object_alloc();
-	if (obj == NULL)
-		return NULL;
+	if (!obj)
+		goto err;
 
-	drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size);
-	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
+	drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size);
+	i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class);
 
 	obj->stolen = stolen;
 	obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
-	cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
+	cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
 	i915_gem_object_set_cache_coherency(obj, cache_level);
 
-	if (i915_gem_object_pin_pages(obj))
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
 		goto cleanup;
 
+	i915_gem_object_init_memory_region(obj, mem, 0);
+
 	return obj;
 
 cleanup:
 	i915_gem_object_free(obj);
-	return NULL;
+err:
+	return ERR_PTR(err);
 }
 
-struct drm_i915_gem_object *
-i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
-			      resource_size_t size)
+static struct drm_i915_gem_object *
+_i915_gem_object_create_stolen(struct intel_memory_region *mem,
+			       resource_size_t size,
+			       unsigned int flags)
 {
+	struct drm_i915_private *i915 = mem->i915;
 	struct drm_i915_gem_object *obj;
 	struct drm_mm_node *stolen;
 	int ret;
 
-	if (!drm_mm_initialized(&dev_priv->mm.stolen))
-		return NULL;
+	if (!drm_mm_initialized(&i915->mm.stolen))
+		return ERR_PTR(-ENODEV);
 
 	if (size == 0)
-		return NULL;
+		return ERR_PTR(-EINVAL);
 
 	stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
 	if (!stolen)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
-	ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096);
+	ret = i915_gem_stolen_insert_node(i915, stolen, size, 4096);
 	if (ret) {
-		kfree(stolen);
-		return NULL;
+		obj = ERR_PTR(ret);
+		goto err_free;
 	}
 
-	obj = _i915_gem_object_create_stolen(dev_priv, stolen);
-	if (obj)
-		return obj;
+	obj = __i915_gem_object_create_stolen(mem, stolen);
+	if (IS_ERR(obj))
+		goto err_remove;
 
-	i915_gem_stolen_remove_node(dev_priv, stolen);
+	return obj;
+
+err_remove:
+	i915_gem_stolen_remove_node(i915, stolen);
+err_free:
 	kfree(stolen);
-	return NULL;
+	return obj;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_i915_private *i915,
+			      resource_size_t size)
+{
+	return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_STOLEN],
+					     size, I915_BO_ALLOC_CONTIGUOUS);
+}
+
+static int init_stolen(struct intel_memory_region *mem)
+{
+	intel_memory_region_set_name(mem, "stolen");
+
+	/*
+	 * Initialise stolen early so that we may reserve preallocated
+	 * objects for the BIOS to KMS transition.
+	 */
+	return i915_gem_init_stolen(mem->i915);
+}
+
+static void release_stolen(struct intel_memory_region *mem)
+{
+	i915_gem_cleanup_stolen(mem->i915);
+}
+
+static const struct intel_memory_region_ops i915_region_stolen_ops = {
+	.init = init_stolen,
+	.release = release_stolen,
+	.create_object = _i915_gem_object_create_stolen,
+};
+
+struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915)
+{
+	return intel_memory_region_create(i915,
+					  intel_graphics_stolen_res.start,
+					  resource_size(&intel_graphics_stolen_res),
+					  PAGE_SIZE, 0,
+					  &i915_region_stolen_ops);
 }
 
 struct drm_i915_gem_object *
-i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
+i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915,
 					       resource_size_t stolen_offset,
 					       resource_size_t gtt_offset,
 					       resource_size_t size)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN];
+	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct drm_i915_gem_object *obj;
 	struct drm_mm_node *stolen;
 	struct i915_vma *vma;
 	int ret;
 
-	if (!drm_mm_initialized(&dev_priv->mm.stolen))
-		return NULL;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	if (!drm_mm_initialized(&i915->mm.stolen))
+		return ERR_PTR(-ENODEV);
 
 	DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
 			 &stolen_offset, &gtt_offset, &size);
@@ -630,29 +697,29 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	if (WARN_ON(size == 0) ||
 	    WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) ||
 	    WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT)))
-		return NULL;
+		return ERR_PTR(-EINVAL);
 
 	stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
 	if (!stolen)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	stolen->start = stolen_offset;
 	stolen->size = size;
-	mutex_lock(&dev_priv->mm.stolen_lock);
-	ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen);
-	mutex_unlock(&dev_priv->mm.stolen_lock);
+	mutex_lock(&i915->mm.stolen_lock);
+	ret = drm_mm_reserve_node(&i915->mm.stolen, stolen);
+	mutex_unlock(&i915->mm.stolen_lock);
 	if (ret) {
 		DRM_DEBUG_DRIVER("failed to allocate stolen space\n");
 		kfree(stolen);
-		return NULL;
+		return ERR_PTR(ret);
 	}
 
-	obj = _i915_gem_object_create_stolen(dev_priv, stolen);
-	if (obj == NULL) {
+	obj = __i915_gem_object_create_stolen(mem, stolen);
+	if (IS_ERR(obj)) {
 		DRM_DEBUG_DRIVER("failed to allocate stolen object\n");
-		i915_gem_stolen_remove_node(dev_priv, stolen);
+		i915_gem_stolen_remove_node(i915, stolen);
 		kfree(stolen);
-		return NULL;
+		return obj;
 	}
 
 	/* Some objects just need physical mem from stolen space */
@@ -674,22 +741,26 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	 * setting up the GTT space. The actual reservation will occur
 	 * later.
 	 */
+	mutex_lock(&ggtt->vm.mutex);
 	ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 				   size, gtt_offset, obj->cache_level,
 				   0);
 	if (ret) {
 		DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n");
+		mutex_unlock(&ggtt->vm.mutex);
 		goto err_pages;
 	}
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
+	GEM_BUG_ON(vma->pages);
 	vma->pages = obj->mm.pages;
-	vma->flags |= I915_VMA_GLOBAL_BIND;
+	atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
+
+	set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
 	__i915_vma_set_map_and_fenceable(vma);
 
-	mutex_lock(&ggtt->vm.mutex);
-	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
+	list_add_tail(&vma->vm_link, &ggtt->vm.bound_list);
 	mutex_unlock(&ggtt->vm.mutex);
 
 	GEM_BUG_ON(i915_gem_object_is_shrinkable(obj));
@@ -701,5 +772,5 @@ err_pages:
 	i915_gem_object_unpin_pages(obj);
 err:
 	i915_gem_object_put(obj);
-	return NULL;
+	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
index 2289644d8604..c1040627fbf3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
@@ -21,8 +21,7 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
 					 u64 end);
 void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
 				 struct drm_mm_node *node);
-int i915_gem_init_stolen(struct drm_i915_private *dev_priv);
-void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv);
+struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915);
 struct drm_i915_gem_object *
 i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
 			      resource_size_t size);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
index 1e372420771b..540ef0551789 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -50,10 +50,8 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 		if (time_after_eq(request->emitted_jiffies, recent_enough))
 			break;
 
-		if (target) {
+		if (target && xchg(&target->file_priv, NULL))
 			list_del(&target->client_link);
-			target->file_priv = NULL;
-		}
 
 		target = request;
 	}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index ca0c2f451742..6c7825a2dc2a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -11,6 +11,7 @@
 #include "i915_drv.h"
 #include "i915_gem.h"
 #include "i915_gem_ioctls.h"
+#include "i915_gem_mman.h"
 #include "i915_gem_object.h"
 
 /**
@@ -181,22 +182,25 @@ static int
 i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
 			      int tiling_mode, unsigned int stride)
 {
+	struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
 	struct i915_vma *vma;
-	int ret;
+	int ret = 0;
 
 	if (tiling_mode == I915_TILING_NONE)
 		return 0;
 
+	mutex_lock(&ggtt->vm.mutex);
 	for_each_ggtt_vma(vma, obj) {
 		if (i915_vma_fence_prepare(vma, tiling_mode, stride))
 			continue;
 
-		ret = i915_vma_unbind(vma);
+		ret = __i915_vma_unbind(vma);
 		if (ret)
-			return ret;
+			break;
 	}
+	mutex_unlock(&ggtt->vm.mutex);
 
-	return 0;
+	return ret;
 }
 
 int
@@ -212,7 +216,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 
 	GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride));
 	GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE));
-	lockdep_assert_held(&i915->drm.struct_mutex);
 
 	if ((tiling | stride) == obj->tiling_and_stride)
 		return 0;
@@ -233,16 +236,18 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 	 * whilst executing a fenced command for an untiled object.
 	 */
 
-	err = i915_gem_object_fence_prepare(obj, tiling, stride);
-	if (err)
-		return err;
-
 	i915_gem_object_lock(obj);
 	if (i915_gem_object_is_framebuffer(obj)) {
 		i915_gem_object_unlock(obj);
 		return -EBUSY;
 	}
 
+	err = i915_gem_object_fence_prepare(obj, tiling, stride);
+	if (err) {
+		i915_gem_object_unlock(obj);
+		return err;
+	}
+
 	/* If the memory has unknown (i.e. varying) swizzling, we pin the
 	 * pages to prevent them being swapped out and causing corruption
 	 * due to the change in swizzling.
@@ -313,10 +318,14 @@ int
 i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_set_tiling *args = data;
 	struct drm_i915_gem_object *obj;
 	int err;
 
+	if (!dev_priv->ggtt.num_fences)
+		return -EOPNOTSUPP;
+
 	obj = i915_gem_object_lookup(file, args->handle);
 	if (!obj)
 		return -ENOENT;
@@ -340,9 +349,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 		args->stride = 0;
 	} else {
 		if (args->tiling_mode == I915_TILING_X)
-			args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x;
+			args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_x;
 		else
-			args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y;
+			args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_y;
 
 		/* Hide bit 17 swizzling from the user.  This prevents old Mesa
 		 * from aborting the application on sw fallbacks to bit 17,
@@ -364,12 +373,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	err = mutex_lock_interruptible(&dev->struct_mutex);
-	if (err)
-		goto err;
-
 	err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
-	mutex_unlock(&dev->struct_mutex);
 
 	/* We have to maintain this existing ABI... */
 	args->stride = i915_gem_object_get_stride(obj);
@@ -402,6 +406,9 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_object *obj;
 	int err = -ENOENT;
 
+	if (!dev_priv->ggtt.num_fences)
+		return -EOPNOTSUPP;
+
 	rcu_read_lock();
 	obj = i915_gem_object_lookup_rcu(file, args->handle);
 	if (obj) {
@@ -415,10 +422,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 
 	switch (args->tiling_mode) {
 	case I915_TILING_X:
-		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
+		args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_x;
 		break;
 	case I915_TILING_Y:
-		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
+		args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_y;
 		break;
 	default:
 	case I915_TILING_NONE:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index abfbac49b8e8..580319b7bf1a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -92,7 +92,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	struct i915_mmu_notifier *mn =
 		container_of(_mn, struct i915_mmu_notifier, mn);
 	struct interval_tree_node *it;
-	struct mutex *unlock = NULL;
 	unsigned long end;
 	int ret = 0;
 
@@ -129,33 +128,14 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 		}
 		spin_unlock(&mn->lock);
 
-		if (!unlock) {
-			unlock = &mn->mm->i915->drm.struct_mutex;
-
-			switch (mutex_trylock_recursive(unlock)) {
-			default:
-			case MUTEX_TRYLOCK_FAILED:
-				if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) {
-					i915_gem_object_put(obj);
-					return -EINTR;
-				}
-				/* fall through */
-			case MUTEX_TRYLOCK_SUCCESS:
-				break;
-
-			case MUTEX_TRYLOCK_RECURSIVE:
-				unlock = ERR_PTR(-EEXIST);
-				break;
-			}
-		}
-
 		ret = i915_gem_object_unbind(obj,
-					     I915_GEM_OBJECT_UNBIND_ACTIVE);
+					     I915_GEM_OBJECT_UNBIND_ACTIVE |
+					     I915_GEM_OBJECT_UNBIND_BARRIER);
 		if (ret == 0)
-			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+			ret = __i915_gem_object_put_pages(obj);
 		i915_gem_object_put(obj);
 		if (ret)
-			goto unlock;
+			return ret;
 
 		spin_lock(&mn->lock);
 
@@ -168,10 +148,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	}
 	spin_unlock(&mn->lock);
 
-unlock:
-	if (!IS_ERR_OR_NULL(unlock))
-		mutex_unlock(unlock);
-
 	return ret;
 
 }
@@ -427,7 +403,7 @@ struct get_pages_work {
 
 static struct sg_table *
 __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
-			       struct page **pvec, int num_pages)
+			       struct page **pvec, unsigned long num_pages)
 {
 	unsigned int max_segment = i915_sg_segment_size();
 	struct sg_table *st;
@@ -473,9 +449,10 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 {
 	struct get_pages_work *work = container_of(_work, typeof(*work), work);
 	struct drm_i915_gem_object *obj = work->obj;
-	const int npages = obj->base.size >> PAGE_SHIFT;
+	const unsigned long npages = obj->base.size >> PAGE_SHIFT;
+	unsigned long pinned;
 	struct page **pvec;
-	int pinned, ret;
+	int ret;
 
 	ret = -ENOMEM;
 	pinned = 0;
@@ -484,31 +461,36 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 	if (pvec != NULL) {
 		struct mm_struct *mm = obj->userptr.mm->mm;
 		unsigned int flags = 0;
+		int locked = 0;
 
 		if (!i915_gem_object_is_readonly(obj))
 			flags |= FOLL_WRITE;
 
 		ret = -EFAULT;
 		if (mmget_not_zero(mm)) {
-			down_read(&mm->mmap_sem);
 			while (pinned < npages) {
+				if (!locked) {
+					down_read(&mm->mmap_sem);
+					locked = 1;
+				}
 				ret = get_user_pages_remote
 					(work->task, mm,
 					 obj->userptr.ptr + pinned * PAGE_SIZE,
 					 npages - pinned,
 					 flags,
-					 pvec + pinned, NULL, NULL);
+					 pvec + pinned, NULL, &locked);
 				if (ret < 0)
 					break;
 
 				pinned += ret;
 			}
-			up_read(&mm->mmap_sem);
+			if (locked)
+				up_read(&mm->mmap_sem);
 			mmput(mm);
 		}
 	}
 
-	mutex_lock(&obj->mm.lock);
+	mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
 	if (obj->userptr.work == &work->work) {
 		struct sg_table *pages = ERR_PTR(ret);
 
@@ -578,7 +560,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 
 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 {
-	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
 	struct mm_struct *mm = obj->userptr.mm->mm;
 	struct page **pvec;
 	struct sg_table *pages;
@@ -770,6 +752,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
 		       void *data,
 		       struct drm_file *file)
 {
+	static struct lock_class_key lock_class;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_userptr *args = data;
 	struct drm_i915_gem_object *obj;
@@ -797,14 +780,11 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
 		return -EFAULT;
 
 	if (args->flags & I915_USERPTR_READ_ONLY) {
-		struct i915_address_space *vm;
-
 		/*
 		 * On almost all of the older hw, we cannot tell the GPU that
 		 * a page is readonly.
 		 */
-		vm = dev_priv->kernel_context->vm;
-		if (!vm || !vm->has_read_only)
+		if (!dev_priv->gt.vm->has_read_only)
 			return -ENODEV;
 	}
 
@@ -813,7 +793,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
 		return -ENOMEM;
 
 	drm_gem_private_object_init(dev, &obj->base, args->user_size);
-	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class);
 	obj->read_domains = I915_GEM_DOMAIN_CPU;
 	obj->write_domain = I915_GEM_DOMAIN_CPU;
 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index 3c5d17b2b670..fa16f2c3f3ac 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -12,10 +12,14 @@ static void huge_free_pages(struct drm_i915_gem_object *obj,
 			    struct sg_table *pages)
 {
 	unsigned long nreal = obj->scratch / PAGE_SIZE;
-	struct scatterlist *sg;
+	struct sgt_iter sgt_iter;
+	struct page *page;
 
-	for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg))
-		__free_page(sg_page(sg));
+	for_each_sgt_page(page, sgt_iter, pages) {
+		__free_page(page);
+		if (!--nreal)
+			break;
+	}
 
 	sg_free_table(pages);
 	kfree(pages);
@@ -70,7 +74,6 @@ static int huge_get_pages(struct drm_i915_gem_object *obj)
 
 err:
 	huge_free_pages(obj, pages);
-
 	return -ENOMEM;
 #undef GFP
 }
@@ -96,6 +99,7 @@ huge_gem_object(struct drm_i915_private *i915,
 		phys_addr_t phys_size,
 		dma_addr_t dma_size)
 {
+	static struct lock_class_key lock_class;
 	struct drm_i915_gem_object *obj;
 	unsigned int cache_level;
 
@@ -111,7 +115,7 @@ huge_gem_object(struct drm_i915_private *i915,
 		return ERR_PTR(-ENOMEM);
 
 	drm_gem_private_object_init(&i915->drm, &obj->base, dma_size);
-	i915_gem_object_init(obj, &huge_ops);
+	i915_gem_object_init(obj, &huge_ops, &lock_class);
 
 	obj->read_domains = I915_GEM_DOMAIN_CPU;
 	obj->write_domain = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
index 549c1394bcdc..b8cf31b7bf14 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
@@ -7,6 +7,12 @@
 #ifndef __HUGE_GEM_OBJECT_H
 #define __HUGE_GEM_OBJECT_H
 
+#include <linux/types.h>
+
+#include "gem/i915_gem_object_types.h"
+
+struct drm_i915_private;
+
 struct drm_i915_gem_object *
 huge_gem_object(struct drm_i915_private *i915,
 		phys_addr_t phys_size,
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 8de83c6d81f5..9311250d7d6f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -8,6 +8,8 @@
 
 #include "i915_selftest.h"
 
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_pm.h"
 
 #include "gt/intel_gt.h"
@@ -17,6 +19,7 @@
 
 #include "selftests/mock_drm.h"
 #include "selftests/mock_gem_device.h"
+#include "selftests/mock_region.h"
 #include "selftests/i915_random.h"
 
 static const unsigned int page_sizes[] = {
@@ -113,8 +116,6 @@ static int get_huge_pages(struct drm_i915_gem_object *obj)
 	if (i915_gem_gtt_prepare_pages(obj, st))
 		goto err;
 
-	obj->mm.madv = I915_MADV_DONTNEED;
-
 	GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask);
 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
 
@@ -135,7 +136,6 @@ static void put_huge_pages(struct drm_i915_gem_object *obj,
 	huge_pages_free_pages(pages);
 
 	obj->mm.dirty = false;
-	obj->mm.madv = I915_MADV_WILLNEED;
 }
 
 static const struct drm_i915_gem_object_ops huge_page_ops = {
@@ -150,6 +150,7 @@ huge_pages_object(struct drm_i915_private *i915,
 		  u64 size,
 		  unsigned int page_mask)
 {
+	static struct lock_class_key lock_class;
 	struct drm_i915_gem_object *obj;
 
 	GEM_BUG_ON(!size);
@@ -166,7 +167,9 @@ huge_pages_object(struct drm_i915_private *i915,
 		return ERR_PTR(-ENOMEM);
 
 	drm_gem_private_object_init(&i915->drm, &obj->base, size);
-	i915_gem_object_init(obj, &huge_page_ops);
+	i915_gem_object_init(obj, &huge_page_ops, &lock_class);
+
+	i915_gem_object_set_volatile(obj);
 
 	obj->write_domain = I915_GEM_DOMAIN_CPU;
 	obj->read_domains = I915_GEM_DOMAIN_CPU;
@@ -227,8 +230,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj)
 
 	i915_sg_trim(st);
 
-	obj->mm.madv = I915_MADV_DONTNEED;
-
 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
 
 	return 0;
@@ -261,8 +262,6 @@ static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj)
 	sg_dma_len(sg) = obj->base.size;
 	sg_dma_address(sg) = page_size;
 
-	obj->mm.madv = I915_MADV_DONTNEED;
-
 	__i915_gem_object_set_pages(obj, st, sg->length);
 
 	return 0;
@@ -281,7 +280,6 @@ static void fake_put_huge_pages(struct drm_i915_gem_object *obj,
 {
 	fake_free_huge_pages(obj, pages);
 	obj->mm.dirty = false;
-	obj->mm.madv = I915_MADV_WILLNEED;
 }
 
 static const struct drm_i915_gem_object_ops fake_ops = {
@@ -299,6 +297,7 @@ static const struct drm_i915_gem_object_ops fake_ops_single = {
 static struct drm_i915_gem_object *
 fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
 {
+	static struct lock_class_key lock_class;
 	struct drm_i915_gem_object *obj;
 
 	GEM_BUG_ON(!size);
@@ -317,9 +316,11 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
 	drm_gem_private_object_init(&i915->drm, &obj->base, size);
 
 	if (single)
-		i915_gem_object_init(obj, &fake_ops_single);
+		i915_gem_object_init(obj, &fake_ops_single, &lock_class);
 	else
-		i915_gem_object_init(obj, &fake_ops);
+		i915_gem_object_init(obj, &fake_ops, &lock_class);
+
+	i915_gem_object_set_volatile(obj);
 
 	obj->write_domain = I915_GEM_DOMAIN_CPU;
 	obj->read_domains = I915_GEM_DOMAIN_CPU;
@@ -333,7 +334,12 @@ static int igt_check_page_sizes(struct i915_vma *vma)
 	struct drm_i915_private *i915 = vma->vm->i915;
 	unsigned int supported = INTEL_INFO(i915)->page_sizes;
 	struct drm_i915_gem_object *obj = vma->obj;
-	int err = 0;
+	int err;
+
+	/* We have to wait for the async bind to complete before our asserts */
+	err = i915_vma_sync(vma);
+	if (err)
+		return err;
 
 	if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) {
 		pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
@@ -447,6 +453,88 @@ out_device:
 	return err;
 }
 
+static int igt_mock_memory_region_huge_pages(void *arg)
+{
+	const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS };
+	struct i915_ppgtt *ppgtt = arg;
+	struct drm_i915_private *i915 = ppgtt->vm.i915;
+	unsigned long supported = INTEL_INFO(i915)->page_sizes;
+	struct intel_memory_region *mem;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int bit;
+	int err = 0;
+
+	mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
+	if (IS_ERR(mem)) {
+		pr_err("%s failed to create memory region\n", __func__);
+		return PTR_ERR(mem);
+	}
+
+	for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+		unsigned int page_size = BIT(bit);
+		resource_size_t phys;
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(flags); ++i) {
+			obj = i915_gem_object_create_region(mem, page_size,
+							    flags[i]);
+			if (IS_ERR(obj)) {
+				err = PTR_ERR(obj);
+				goto out_region;
+			}
+
+			vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+			if (IS_ERR(vma)) {
+				err = PTR_ERR(vma);
+				goto out_put;
+			}
+
+			err = i915_vma_pin(vma, 0, 0, PIN_USER);
+			if (err)
+				goto out_close;
+
+			err = igt_check_page_sizes(vma);
+			if (err)
+				goto out_unpin;
+
+			phys = i915_gem_object_get_dma_address(obj, 0);
+			if (!IS_ALIGNED(phys, page_size)) {
+				pr_err("%s addr misaligned(%pa) page_size=%u\n",
+				       __func__, &phys, page_size);
+				err = -EINVAL;
+				goto out_unpin;
+			}
+
+			if (vma->page_sizes.gtt != page_size) {
+				pr_err("%s page_sizes.gtt=%u, expected=%u\n",
+				       __func__, vma->page_sizes.gtt,
+				       page_size);
+				err = -EINVAL;
+				goto out_unpin;
+			}
+
+			i915_vma_unpin(vma);
+			i915_vma_close(vma);
+
+			__i915_gem_object_put_pages(obj);
+			i915_gem_object_put(obj);
+		}
+	}
+
+	goto out_region;
+
+out_unpin:
+	i915_vma_unpin(vma);
+out_close:
+	i915_vma_close(vma);
+out_put:
+	i915_gem_object_put(obj);
+out_region:
+	intel_memory_region_put(mem);
+	return err;
+}
+
 static int igt_mock_ppgtt_misaligned_dma(void *arg)
 {
 	struct i915_ppgtt *ppgtt = arg;
@@ -562,7 +650,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 		i915_vma_close(vma);
 
 		i915_gem_object_unpin_pages(obj);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		__i915_gem_object_put_pages(obj);
 		i915_gem_object_put(obj);
 	}
 
@@ -590,7 +678,7 @@ static void close_object_list(struct list_head *objects,
 
 		list_del(&obj->st_link);
 		i915_gem_object_unpin_pages(obj);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		__i915_gem_object_put_pages(obj);
 		i915_gem_object_put(obj);
 	}
 }
@@ -860,7 +948,7 @@ static int igt_mock_ppgtt_64K(void *arg)
 			i915_vma_close(vma);
 
 			i915_gem_object_unpin_pages(obj);
-			__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+			__i915_gem_object_put_pages(obj);
 			i915_gem_object_put(obj);
 		}
 	}
@@ -879,9 +967,8 @@ out_object_put:
 	return err;
 }
 
-static int gpu_write(struct i915_vma *vma,
-		     struct i915_gem_context *ctx,
-		     struct intel_engine_cs *engine,
+static int gpu_write(struct intel_context *ce,
+		     struct i915_vma *vma,
 		     u32 dw,
 		     u32 val)
 {
@@ -893,11 +980,12 @@ static int gpu_write(struct i915_vma *vma,
 	if (err)
 		return err;
 
-	return igt_gpu_fill_dw(vma, ctx, engine, dw * sizeof(u32),
+	return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32),
 			       vma->size >> PAGE_SHIFT, val);
 }
 
-static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+static int
+__cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 {
 	unsigned int needs_flush;
 	unsigned long n;
@@ -929,18 +1017,54 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 	return err;
 }
 
-static int __igt_write_huge(struct i915_gem_context *ctx,
-			    struct intel_engine_cs *engine,
+static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+{
+	unsigned long n = obj->base.size >> PAGE_SHIFT;
+	u32 *ptr;
+	int err;
+
+	err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		return err;
+
+	ptr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+
+	ptr += dword;
+	while (n--) {
+		if (*ptr != val) {
+			pr_err("base[%u]=%08x, val=%08x\n",
+			       dword, *ptr, val);
+			err = -EINVAL;
+			break;
+		}
+
+		ptr += PAGE_SIZE / sizeof(*ptr);
+	}
+
+	i915_gem_object_unpin_map(obj);
+	return err;
+}
+
+static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+{
+	if (i915_gem_object_has_struct_page(obj))
+		return __cpu_check_shmem(obj, dword, val);
+	else
+		return __cpu_check_vmap(obj, dword, val);
+}
+
+static int __igt_write_huge(struct intel_context *ce,
 			    struct drm_i915_gem_object *obj,
 			    u64 size, u64 offset,
 			    u32 dword, u32 val)
 {
-	struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm;
 	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
 	struct i915_vma *vma;
 	int err;
 
-	vma = i915_vma_instance(obj, vm, NULL);
+	vma = i915_vma_instance(obj, ce->vm, NULL);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
@@ -954,7 +1078,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx,
 		 * The ggtt may have some pages reserved so
 		 * refrain from erroring out.
 		 */
-		if (err == -ENOSPC && i915_is_ggtt(vm))
+		if (err == -ENOSPC && i915_is_ggtt(ce->vm))
 			err = 0;
 
 		goto out_vma_close;
@@ -964,7 +1088,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx,
 	if (err)
 		goto out_vma_unpin;
 
-	err = gpu_write(vma, ctx, engine, dword, val);
+	err = gpu_write(ce, vma, dword, val);
 	if (err) {
 		pr_err("gpu-write failed at offset=%llx\n", offset);
 		goto out_vma_unpin;
@@ -979,22 +1103,20 @@ static int __igt_write_huge(struct i915_gem_context *ctx,
 out_vma_unpin:
 	i915_vma_unpin(vma);
 out_vma_close:
-	i915_vma_destroy(vma);
-
+	__i915_vma_put(vma);
 	return err;
 }
 
 static int igt_write_huge(struct i915_gem_context *ctx,
 			  struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
-	static struct intel_engine_cs *engines[I915_NUM_ENGINES];
-	struct intel_engine_cs *engine;
+	struct i915_gem_engines *engines;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
 	I915_RND_STATE(prng);
 	IGT_TIMEOUT(end_time);
 	unsigned int max_page_size;
-	unsigned int id;
+	unsigned int count;
 	u64 max;
 	u64 num;
 	u64 size;
@@ -1008,19 +1130,18 @@ static int igt_write_huge(struct i915_gem_context *ctx,
 	if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
 		size = round_up(size, I915_GTT_PAGE_SIZE_2M);
 
-	max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
-	max = div_u64((vm->total - size), max_page_size);
-
 	n = 0;
-	for_each_engine(engine, i915, id) {
-		if (!intel_engine_can_store_dword(engine)) {
-			pr_info("store-dword-imm not supported on engine=%u\n",
-				id);
+	count = 0;
+	max = U64_MAX;
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		count++;
+		if (!intel_engine_can_store_dword(ce->engine))
 			continue;
-		}
-		engines[n++] = engine;
-	}
 
+		max = min(max, ce->vm->total);
+		n++;
+	}
+	i915_gem_context_unlock_engines(ctx);
 	if (!n)
 		return 0;
 
@@ -1029,23 +1150,30 @@ static int igt_write_huge(struct i915_gem_context *ctx,
 	 * randomized order, lets also make feeding to the same engine a few
 	 * times in succession a possibility by enlarging the permutation array.
 	 */
-	order = i915_random_order(n * I915_NUM_ENGINES, &prng);
+	order = i915_random_order(count * count, &prng);
 	if (!order)
 		return -ENOMEM;
 
+	max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
+	max = div_u64(max - size, max_page_size);
+
 	/*
 	 * Try various offsets in an ascending/descending fashion until we
 	 * timeout -- we want to avoid issues hidden by effectively always using
 	 * offset = 0.
 	 */
 	i = 0;
+	engines = i915_gem_context_lock_engines(ctx);
 	for_each_prime_number_from(num, 0, max) {
 		u64 offset_low = num * max_page_size;
 		u64 offset_high = (max - num) * max_page_size;
 		u32 dword = offset_in_page(num) / 4;
+		struct intel_context *ce;
 
-		engine = engines[order[i] % n];
-		i = (i + 1) % (n * I915_NUM_ENGINES);
+		ce = engines->engines[order[i] % engines->num_engines];
+		i = (i + 1) % (count * count);
+		if (!ce || !intel_engine_can_store_dword(ce->engine))
+			continue;
 
 		/*
 		 * In order to utilize 64K pages we need to both pad the vma
@@ -1057,22 +1185,23 @@ static int igt_write_huge(struct i915_gem_context *ctx,
 			offset_low = round_down(offset_low,
 						I915_GTT_PAGE_SIZE_2M);
 
-		err = __igt_write_huge(ctx, engine, obj, size, offset_low,
+		err = __igt_write_huge(ce, obj, size, offset_low,
 				       dword, num + 1);
 		if (err)
 			break;
 
-		err = __igt_write_huge(ctx, engine, obj, size, offset_high,
+		err = __igt_write_huge(ce, obj, size, offset_high,
 				       dword, num + 1);
 		if (err)
 			break;
 
 		if (igt_timeout(end_time,
-				"%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
-				__func__, engine->id, offset_low, offset_high,
+				"%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
+				__func__, ce->engine->name, offset_low, offset_high,
 				max_page_size))
 			break;
 	}
+	i915_gem_context_unlock_engines(ctx);
 
 	kfree(order);
 
@@ -1164,7 +1293,7 @@ static int igt_ppgtt_exhaust_huge(void *arg)
 			}
 
 			i915_gem_object_unpin_pages(obj);
-			__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+			__i915_gem_object_put_pages(obj);
 			i915_gem_object_put(obj);
 		}
 	}
@@ -1180,131 +1309,235 @@ out_device:
 	return err;
 }
 
-static int igt_ppgtt_internal_huge(void *arg)
+typedef struct drm_i915_gem_object *
+(*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags);
+
+static inline bool igt_can_allocate_thp(struct drm_i915_private *i915)
+{
+	return i915->mm.gemfs && has_transparent_hugepage();
+}
+
+static struct drm_i915_gem_object *
+igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+	if (!igt_can_allocate_thp(i915)) {
+		pr_info("%s missing THP support, skipping\n", __func__);
+		return ERR_PTR(-ENODEV);
+	}
+
+	return i915_gem_object_create_shmem(i915, size);
+}
+
+static struct drm_i915_gem_object *
+igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+	return i915_gem_object_create_internal(i915, size);
+}
+
+static struct drm_i915_gem_object *
+igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+	return huge_pages_object(i915, size, size);
+}
+
+static struct drm_i915_gem_object *
+igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+	return i915_gem_object_create_lmem(i915, size, flags);
+}
+
+static u32 igt_random_size(struct rnd_state *prng,
+			   u32 min_page_size,
+			   u32 max_page_size)
+{
+	u64 mask;
+	u32 size;
+
+	GEM_BUG_ON(!is_power_of_2(min_page_size));
+	GEM_BUG_ON(!is_power_of_2(max_page_size));
+	GEM_BUG_ON(min_page_size < PAGE_SIZE);
+	GEM_BUG_ON(min_page_size > max_page_size);
+
+	mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK;
+	size = prandom_u32_state(prng) & mask;
+	if (size < min_page_size)
+		size |= min_page_size;
+
+	return size;
+}
+
+static int igt_ppgtt_smoke_huge(void *arg)
 {
 	struct i915_gem_context *ctx = arg;
 	struct drm_i915_private *i915 = ctx->i915;
 	struct drm_i915_gem_object *obj;
-	static const unsigned int sizes[] = {
-		SZ_64K,
-		SZ_128K,
-		SZ_256K,
-		SZ_512K,
-		SZ_1M,
-		SZ_2M,
+	I915_RND_STATE(prng);
+	struct {
+		igt_create_fn fn;
+		u32 min;
+		u32 max;
+	} backends[] = {
+		{ igt_create_internal, SZ_64K, SZ_2M,  },
+		{ igt_create_shmem,    SZ_64K, SZ_32M, },
+		{ igt_create_local,    SZ_64K, SZ_1G,  },
 	};
-	int i;
 	int err;
+	int i;
 
 	/*
-	 * Sanity check that the HW uses huge pages correctly through internal
-	 * -- ensure that our writes land in the right place.
+	 * Sanity check that the HW uses huge pages correctly through our
+	 * various backends -- ensure that our writes land in the right place.
 	 */
 
-	for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
-		unsigned int size = sizes[i];
+	for (i = 0; i < ARRAY_SIZE(backends); ++i) {
+		u32 min = backends[i].min;
+		u32 max = backends[i].max;
+		u32 size = max;
+try_again:
+		size = igt_random_size(&prng, min, rounddown_pow_of_two(size));
 
-		obj = i915_gem_object_create_internal(i915, size);
-		if (IS_ERR(obj))
-			return PTR_ERR(obj);
+		obj = backends[i].fn(i915, size, 0);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			if (err == -E2BIG) {
+				size >>= 1;
+				goto try_again;
+			} else if (err == -ENODEV) {
+				err = 0;
+				continue;
+			}
+
+			return err;
+		}
 
 		err = i915_gem_object_pin_pages(obj);
-		if (err)
+		if (err) {
+			if (err == -ENXIO || err == -E2BIG) {
+				i915_gem_object_put(obj);
+				size >>= 1;
+				goto try_again;
+			}
 			goto out_put;
+		}
 
-		if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
-			pr_info("internal unable to allocate huge-page(s) with size=%u\n",
-				size);
+		if (obj->mm.page_sizes.phys < min) {
+			pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n",
+				__func__, size, i);
+			err = -ENOMEM;
 			goto out_unpin;
 		}
 
 		err = igt_write_huge(ctx, obj);
 		if (err) {
-			pr_err("internal write-huge failed with size=%u\n",
-			       size);
-			goto out_unpin;
+			pr_err("%s write-huge failed with size=%u, i=%d\n",
+			       __func__, size, i);
 		}
-
+out_unpin:
 		i915_gem_object_unpin_pages(obj);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		__i915_gem_object_put_pages(obj);
+out_put:
 		i915_gem_object_put(obj);
-	}
 
-	return 0;
+		if (err == -ENOMEM || err == -ENXIO)
+			err = 0;
 
-out_unpin:
-	i915_gem_object_unpin_pages(obj);
-out_put:
-	i915_gem_object_put(obj);
+		if (err)
+			break;
 
-	return err;
-}
+		cond_resched();
+	}
 
-static inline bool igt_can_allocate_thp(struct drm_i915_private *i915)
-{
-	return i915->mm.gemfs && has_transparent_hugepage();
+	return err;
 }
 
-static int igt_ppgtt_gemfs_huge(void *arg)
+static int igt_ppgtt_sanity_check(void *arg)
 {
 	struct i915_gem_context *ctx = arg;
 	struct drm_i915_private *i915 = ctx->i915;
-	struct drm_i915_gem_object *obj;
-	static const unsigned int sizes[] = {
-		SZ_2M,
-		SZ_4M,
-		SZ_8M,
-		SZ_16M,
-		SZ_32M,
+	unsigned int supported = INTEL_INFO(i915)->page_sizes;
+	struct {
+		igt_create_fn fn;
+		unsigned int flags;
+	} backends[] = {
+		{ igt_create_system, 0,                        },
+		{ igt_create_local,  I915_BO_ALLOC_CONTIGUOUS, },
 	};
-	int i;
+	struct {
+		u32 size;
+		u32 pages;
+	} combos[] = {
+		{ SZ_64K,		SZ_64K		},
+		{ SZ_2M,		SZ_2M		},
+		{ SZ_2M,		SZ_64K		},
+		{ SZ_2M - SZ_64K,	SZ_64K		},
+		{ SZ_2M - SZ_4K,	SZ_64K | SZ_4K	},
+		{ SZ_2M + SZ_4K,	SZ_64K | SZ_4K	},
+		{ SZ_2M + SZ_4K,	SZ_2M  | SZ_4K	},
+		{ SZ_2M + SZ_64K,	SZ_2M  | SZ_64K },
+	};
+	int i, j;
 	int err;
 
+	if (supported == I915_GTT_PAGE_SIZE_4K)
+		return 0;
+
 	/*
-	 * Sanity check that the HW uses huge pages correctly through gemfs --
-	 * ensure that our writes land in the right place.
+	 * Sanity check that the HW behaves with a limited set of combinations.
+	 * We already have a bunch of randomised testing, which should give us
+	 * a decent amount of variation between runs, however we should keep
+	 * this to limit the chances of introducing a temporary regression, by
+	 * testing the most obvious cases that might make something blow up.
 	 */
 
-	if (!igt_can_allocate_thp(i915)) {
-		pr_info("missing THP support, skipping\n");
-		return 0;
-	}
+	for (i = 0; i < ARRAY_SIZE(backends); ++i) {
+		for (j = 0; j < ARRAY_SIZE(combos); ++j) {
+			struct drm_i915_gem_object *obj;
+			u32 size = combos[j].size;
+			u32 pages = combos[j].pages;
 
-	for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
-		unsigned int size = sizes[i];
+			obj = backends[i].fn(i915, size, backends[i].flags);
+			if (IS_ERR(obj)) {
+				err = PTR_ERR(obj);
+				if (err == -ENODEV) {
+					pr_info("Device lacks local memory, skipping\n");
+					err = 0;
+					break;
+				}
 
-		obj = i915_gem_object_create_shmem(i915, size);
-		if (IS_ERR(obj))
-			return PTR_ERR(obj);
+				return err;
+			}
 
-		err = i915_gem_object_pin_pages(obj);
-		if (err)
-			goto out_put;
+			err = i915_gem_object_pin_pages(obj);
+			if (err) {
+				i915_gem_object_put(obj);
+				goto out;
+			}
 
-		if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
-			pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n",
-				size);
-			goto out_unpin;
-		}
+			GEM_BUG_ON(pages > obj->base.size);
+			pages = pages & supported;
 
-		err = igt_write_huge(ctx, obj);
-		if (err) {
-			pr_err("gemfs write-huge failed with size=%u\n",
-			       size);
-			goto out_unpin;
+			if (pages)
+				obj->mm.page_sizes.sg = pages;
+
+			err = igt_write_huge(ctx, obj);
+
+			i915_gem_object_unpin_pages(obj);
+			__i915_gem_object_put_pages(obj);
+			i915_gem_object_put(obj);
+
+			if (err) {
+				pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n",
+				       __func__, size, pages, i, j);
+				goto out;
+			}
 		}
 
-		i915_gem_object_unpin_pages(obj);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
-		i915_gem_object_put(obj);
+		cond_resched();
 	}
 
-	return 0;
-
-out_unpin:
-	i915_gem_object_unpin_pages(obj);
-out_put:
-	i915_gem_object_put(obj);
+out:
+	if (err == -ENOMEM)
+		err = 0;
 
 	return err;
 }
@@ -1314,15 +1547,15 @@ static int igt_ppgtt_pin_update(void *arg)
 	struct i915_gem_context *ctx = arg;
 	struct drm_i915_private *dev_priv = ctx->i915;
 	unsigned long supported = INTEL_INFO(dev_priv)->page_sizes;
-	struct i915_address_space *vm = ctx->vm;
 	struct drm_i915_gem_object *obj;
+	struct i915_gem_engines_iter it;
+	struct i915_address_space *vm;
+	struct intel_context *ce;
 	struct i915_vma *vma;
 	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	unsigned int n;
 	int first, last;
-	int err;
+	int err = 0;
 
 	/*
 	 * Make sure there's no funny business when doing a PIN_UPDATE -- in the
@@ -1332,9 +1565,10 @@ static int igt_ppgtt_pin_update(void *arg)
 	 * huge-gtt-pages.
 	 */
 
-	if (!vm || !i915_vm_is_4lvl(vm)) {
+	vm = i915_gem_context_get_vm_rcu(ctx);
+	if (!i915_vm_is_4lvl(vm)) {
 		pr_info("48b PPGTT not supported, skipping\n");
-		return 0;
+		goto out_vm;
 	}
 
 	first = ilog2(I915_GTT_PAGE_SIZE_64K);
@@ -1387,7 +1621,7 @@ static int igt_ppgtt_pin_update(void *arg)
 			goto out_unpin;
 		}
 
-		err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE);
+		err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL);
 		if (err)
 			goto out_unpin;
 
@@ -1419,14 +1653,18 @@ static int igt_ppgtt_pin_update(void *arg)
 	 */
 
 	n = 0;
-	for_each_engine(engine, dev_priv, id) {
-		if (!intel_engine_can_store_dword(engine))
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		if (!intel_engine_can_store_dword(ce->engine))
 			continue;
 
-		err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf);
+		err = gpu_write(ce, vma, n++, 0xdeadbeaf);
 		if (err)
-			goto out_unpin;
+			break;
 	}
+	i915_gem_context_unlock_engines(ctx);
+	if (err)
+		goto out_unpin;
+
 	while (n--) {
 		err = cpu_check(obj, n, 0xdeadbeaf);
 		if (err)
@@ -1439,6 +1677,8 @@ out_close:
 	i915_vma_close(vma);
 out_put:
 	i915_gem_object_put(obj);
+out_vm:
+	i915_vm_put(vm);
 
 	return err;
 }
@@ -1448,7 +1688,7 @@ static int igt_tmpfs_fallback(void *arg)
 	struct i915_gem_context *ctx = arg;
 	struct drm_i915_private *i915 = ctx->i915;
 	struct vfsmount *gemfs = i915->mm.gemfs;
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx);
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	u32 *vaddr;
@@ -1498,6 +1738,7 @@ out_put:
 out_restore:
 	i915->mm.gemfs = gemfs;
 
+	i915_vm_put(vm);
 	return err;
 }
 
@@ -1505,14 +1746,14 @@ static int igt_shrink_thp(void *arg)
 {
 	struct i915_gem_context *ctx = arg;
 	struct drm_i915_private *i915 = ctx->i915;
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx);
 	struct drm_i915_gem_object *obj;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
 	struct i915_vma *vma;
 	unsigned int flags = PIN_USER;
 	unsigned int n;
-	int err;
+	int err = 0;
 
 	/*
 	 * Sanity check shrinking huge-paged object -- make sure nothing blows
@@ -1521,12 +1762,14 @@ static int igt_shrink_thp(void *arg)
 
 	if (!igt_can_allocate_thp(i915)) {
 		pr_info("missing THP support, skipping\n");
-		return 0;
+		goto out_vm;
 	}
 
 	obj = i915_gem_object_create_shmem(i915, SZ_2M);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto out_vm;
+	}
 
 	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma)) {
@@ -1548,16 +1791,19 @@ static int igt_shrink_thp(void *arg)
 		goto out_unpin;
 
 	n = 0;
-	for_each_engine(engine, i915, id) {
-		if (!intel_engine_can_store_dword(engine))
+
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		if (!intel_engine_can_store_dword(ce->engine))
 			continue;
 
-		err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf);
+		err = gpu_write(ce, vma, n++, 0xdeadbeaf);
 		if (err)
-			goto out_unpin;
+			break;
 	}
-
+	i915_gem_context_unlock_engines(ctx);
 	i915_vma_unpin(vma);
+	if (err)
+		goto out_close;
 
 	/*
 	 * Now that the pages are *unpinned* shrink-all should invoke
@@ -1583,16 +1829,17 @@ static int igt_shrink_thp(void *arg)
 	while (n--) {
 		err = cpu_check(obj, n, 0xdeadbeaf);
 		if (err)
-			goto out_unpin;
+			break;
 	}
 
-
 out_unpin:
 	i915_vma_unpin(vma);
 out_close:
 	i915_vma_close(vma);
 out_put:
 	i915_gem_object_put(obj);
+out_vm:
+	i915_vm_put(vm);
 
 	return err;
 }
@@ -1601,6 +1848,7 @@ int i915_gem_huge_page_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_mock_exhaust_device_supported_pages),
+		SUBTEST(igt_mock_memory_region_huge_pages),
 		SUBTEST(igt_mock_ppgtt_misaligned_dma),
 		SUBTEST(igt_mock_ppgtt_huge_fill),
 		SUBTEST(igt_mock_ppgtt_64K),
@@ -1617,8 +1865,7 @@ int i915_gem_huge_page_mock_selftests(void)
 	mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
 	mkwrite_device_info(dev_priv)->ppgtt_size = 48;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	ppgtt = i915_ppgtt_create(dev_priv);
+	ppgtt = i915_ppgtt_create(&dev_priv->gt);
 	if (IS_ERR(ppgtt)) {
 		err = PTR_ERR(ppgtt);
 		goto out_unlock;
@@ -1643,9 +1890,7 @@ out_close:
 	i915_vm_put(&ppgtt->vm);
 
 out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	drm_dev_put(&dev_priv->drm);
-
 	return err;
 }
 
@@ -1656,12 +1901,12 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_ppgtt_pin_update),
 		SUBTEST(igt_tmpfs_fallback),
 		SUBTEST(igt_ppgtt_exhaust_huge),
-		SUBTEST(igt_ppgtt_gemfs_huge),
-		SUBTEST(igt_ppgtt_internal_huge),
+		SUBTEST(igt_ppgtt_smoke_huge),
+		SUBTEST(igt_ppgtt_sanity_check),
 	};
-	struct drm_file *file;
 	struct i915_gem_context *ctx;
-	intel_wakeref_t wakeref;
+	struct i915_address_space *vm;
+	struct file *file;
 	int err;
 
 	if (!HAS_PPGTT(i915)) {
@@ -1676,25 +1921,21 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	ctx = live_context(i915, file);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
-		goto out_unlock;
+		goto out_file;
 	}
 
-	if (ctx->vm)
-		ctx->vm->scrub_64K = true;
+	mutex_lock(&ctx->mutex);
+	vm = i915_gem_context_vm(ctx);
+	if (vm)
+		WRITE_ONCE(vm->scrub_64K, true);
+	mutex_unlock(&ctx->mutex);
 
 	err = i915_subtests(tests, ctx);
 
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	mock_file_free(i915, file);
-
+out_file:
+	fput(file);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index d8804a847945..b972be165e85 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -5,6 +5,7 @@
 
 #include "i915_selftest.h"
 
+#include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 
 #include "selftests/igt_flush_test.h"
@@ -12,10 +13,9 @@
 #include "huge_gem_object.h"
 #include "mock_context.h"
 
-static int igt_client_fill(void *arg)
+static int __igt_client_fill(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *i915 = arg;
-	struct intel_context *ce = i915->engine[BCS0]->kernel_context;
+	struct intel_context *ce = engine->kernel_context;
 	struct drm_i915_gem_object *obj;
 	struct rnd_state prng;
 	IGT_TIMEOUT(end);
@@ -24,6 +24,7 @@ static int igt_client_fill(void *arg)
 
 	prandom_seed_state(&prng, i915_selftest.random_seed);
 
+	intel_engine_pm_get(engine);
 	do {
 		const u32 max_block_size = S16_MAX * PAGE_SIZE;
 		u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
@@ -37,7 +38,7 @@ static int igt_client_fill(void *arg)
 		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
 			 phys_sz, sz, val);
 
-		obj = huge_gem_object(i915, phys_sz, sz);
+		obj = huge_gem_object(engine->i915, phys_sz, sz);
 		if (IS_ERR(obj)) {
 			err = PTR_ERR(obj);
 			goto err_flush;
@@ -99,10 +100,33 @@ err_put:
 err_flush:
 	if (err == -ENOMEM)
 		err = 0;
+	intel_engine_pm_put(engine);
 
 	return err;
 }
 
+static int igt_client_fill(void *arg)
+{
+	int inst = 0;
+
+	do {
+		struct intel_engine_cs *engine;
+		int err;
+
+		engine = intel_engine_lookup_user(arg,
+						  I915_ENGINE_CLASS_COPY,
+						  inst++);
+		if (!engine)
+			return 0;
+
+		err = __igt_client_fill(engine);
+		if (err == -ENOMEM)
+			err = 0;
+		if (err)
+			return err;
+	} while (1);
+}
+
 int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 0ff7a89aadca..3f6079e1dfb6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -6,14 +6,20 @@
 
 #include <linux/prime_numbers.h>
 
+#include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
 
-static int cpu_set(struct drm_i915_gem_object *obj,
-		   unsigned long offset,
-		   u32 v)
+struct context {
+	struct drm_i915_gem_object *obj;
+	struct intel_engine_cs *engine;
+};
+
+static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 {
 	unsigned int needs_clflush;
 	struct page *page;
@@ -21,11 +27,11 @@ static int cpu_set(struct drm_i915_gem_object *obj,
 	u32 *cpu;
 	int err;
 
-	err = i915_gem_object_prepare_write(obj, &needs_clflush);
+	err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
 	if (err)
 		return err;
 
-	page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
 	map = kmap_atomic(page);
 	cpu = map + offset_in_page(offset);
 
@@ -38,14 +44,12 @@ static int cpu_set(struct drm_i915_gem_object *obj,
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
 
 	kunmap_atomic(map);
-	i915_gem_object_finish_access(obj);
+	i915_gem_object_finish_access(ctx->obj);
 
 	return 0;
 }
 
-static int cpu_get(struct drm_i915_gem_object *obj,
-		   unsigned long offset,
-		   u32 *v)
+static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
 {
 	unsigned int needs_clflush;
 	struct page *page;
@@ -53,11 +57,11 @@ static int cpu_get(struct drm_i915_gem_object *obj,
 	u32 *cpu;
 	int err;
 
-	err = i915_gem_object_prepare_read(obj, &needs_clflush);
+	err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
 	if (err)
 		return err;
 
-	page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
 	map = kmap_atomic(page);
 	cpu = map + offset_in_page(offset);
 
@@ -67,136 +71,137 @@ static int cpu_get(struct drm_i915_gem_object *obj,
 	*v = *cpu;
 
 	kunmap_atomic(map);
-	i915_gem_object_finish_access(obj);
+	i915_gem_object_finish_access(ctx->obj);
 
 	return 0;
 }
 
-static int gtt_set(struct drm_i915_gem_object *obj,
-		   unsigned long offset,
-		   u32 v)
+static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 {
 	struct i915_vma *vma;
 	u32 __iomem *map;
-	int err;
+	int err = 0;
 
-	i915_gem_object_lock(obj);
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
-	i915_gem_object_unlock(obj);
+	i915_gem_object_lock(ctx->obj);
+	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
+	i915_gem_object_unlock(ctx->obj);
 	if (err)
 		return err;
 
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	intel_gt_pm_get(vma->vm->gt);
+
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out_rpm;
+	}
 
 	iowrite32(v, &map[offset / sizeof(*map)]);
 	i915_vma_unpin_iomap(vma);
 
-	return 0;
+out_rpm:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
 }
 
-static int gtt_get(struct drm_i915_gem_object *obj,
-		   unsigned long offset,
-		   u32 *v)
+static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 {
 	struct i915_vma *vma;
 	u32 __iomem *map;
-	int err;
+	int err = 0;
 
-	i915_gem_object_lock(obj);
-	err = i915_gem_object_set_to_gtt_domain(obj, false);
-	i915_gem_object_unlock(obj);
+	i915_gem_object_lock(ctx->obj);
+	err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
+	i915_gem_object_unlock(ctx->obj);
 	if (err)
 		return err;
 
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	intel_gt_pm_get(vma->vm->gt);
+
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out_rpm;
+	}
 
 	*v = ioread32(&map[offset / sizeof(*map)]);
 	i915_vma_unpin_iomap(vma);
 
-	return 0;
+out_rpm:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
 }
 
-static int wc_set(struct drm_i915_gem_object *obj,
-		  unsigned long offset,
-		  u32 v)
+static int wc_set(struct context *ctx, unsigned long offset, u32 v)
 {
 	u32 *map;
 	int err;
 
-	i915_gem_object_lock(obj);
-	err = i915_gem_object_set_to_wc_domain(obj, true);
-	i915_gem_object_unlock(obj);
+	i915_gem_object_lock(ctx->obj);
+	err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
+	i915_gem_object_unlock(ctx->obj);
 	if (err)
 		return err;
 
-	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
 
 	map[offset / sizeof(*map)] = v;
-	i915_gem_object_unpin_map(obj);
+	i915_gem_object_unpin_map(ctx->obj);
 
 	return 0;
 }
 
-static int wc_get(struct drm_i915_gem_object *obj,
-		  unsigned long offset,
-		  u32 *v)
+static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
 {
 	u32 *map;
 	int err;
 
-	i915_gem_object_lock(obj);
-	err = i915_gem_object_set_to_wc_domain(obj, false);
-	i915_gem_object_unlock(obj);
+	i915_gem_object_lock(ctx->obj);
+	err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
+	i915_gem_object_unlock(ctx->obj);
 	if (err)
 		return err;
 
-	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
 
 	*v = map[offset / sizeof(*map)];
-	i915_gem_object_unpin_map(obj);
+	i915_gem_object_unpin_map(ctx->obj);
 
 	return 0;
 }
 
-static int gpu_set(struct drm_i915_gem_object *obj,
-		   unsigned long offset,
-		   u32 v)
+static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_request *rq;
 	struct i915_vma *vma;
 	u32 *cs;
 	int err;
 
-	i915_gem_object_lock(obj);
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
-	i915_gem_object_unlock(obj);
+	i915_gem_object_lock(ctx->obj);
+	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
+	i915_gem_object_unlock(ctx->obj);
 	if (err)
 		return err;
 
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
+	vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
+	rq = intel_engine_create_kernel_request(ctx->engine);
 	if (IS_ERR(rq)) {
 		i915_vma_unpin(vma);
 		return PTR_ERR(rq);
@@ -209,12 +214,12 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 		return PTR_ERR(cs);
 	}
 
-	if (INTEL_GEN(i915) >= 8) {
+	if (INTEL_GEN(ctx->engine->i915) >= 8) {
 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
 		*cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset);
 		*cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset);
 		*cs++ = v;
-	} else if (INTEL_GEN(i915) >= 4) {
+	} else if (INTEL_GEN(ctx->engine->i915) >= 4) {
 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 		*cs++ = 0;
 		*cs++ = i915_ggtt_offset(vma) + offset;
@@ -239,32 +244,34 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	return err;
 }
 
-static bool always_valid(struct drm_i915_private *i915)
+static bool always_valid(struct context *ctx)
 {
 	return true;
 }
 
-static bool needs_fence_registers(struct drm_i915_private *i915)
+static bool needs_fence_registers(struct context *ctx)
 {
-	return !intel_gt_is_wedged(&i915->gt);
-}
+	struct intel_gt *gt = ctx->engine->gt;
 
-static bool needs_mi_store_dword(struct drm_i915_private *i915)
-{
-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(gt))
 		return false;
 
-	if (!HAS_ENGINE(i915, RCS0))
+	return gt->ggtt->num_fences;
+}
+
+static bool needs_mi_store_dword(struct context *ctx)
+{
+	if (intel_gt_is_wedged(ctx->engine->gt))
 		return false;
 
-	return intel_engine_can_store_dword(i915->engine[RCS0]);
+	return intel_engine_can_store_dword(ctx->engine);
 }
 
 static const struct igt_coherency_mode {
 	const char *name;
-	int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v);
-	int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v);
-	bool (*valid)(struct drm_i915_private *i915);
+	int (*set)(struct context *ctx, unsigned long offset, u32 v);
+	int (*get)(struct context *ctx, unsigned long offset, u32 *v);
+	bool (*valid)(struct context *ctx);
 } igt_coherency_mode[] = {
 	{ "cpu", cpu_set, cpu_get, always_valid },
 	{ "gtt", gtt_set, gtt_get, needs_fence_registers },
@@ -273,19 +280,37 @@ static const struct igt_coherency_mode {
 	{ },
 };
 
+static struct intel_engine_cs *
+random_engine(struct drm_i915_private *i915, struct rnd_state *prng)
+{
+	struct intel_engine_cs *engine;
+	unsigned int count;
+
+	count = 0;
+	for_each_uabi_engine(engine, i915)
+		count++;
+
+	count = i915_prandom_u32_max_state(count, prng);
+	for_each_uabi_engine(engine, i915)
+		if (count-- == 0)
+			return engine;
+
+	return NULL;
+}
+
 static int igt_gem_coherency(void *arg)
 {
 	const unsigned int ncachelines = PAGE_SIZE/64;
-	I915_RND_STATE(prng);
 	struct drm_i915_private *i915 = arg;
 	const struct igt_coherency_mode *read, *write, *over;
-	struct drm_i915_gem_object *obj;
-	intel_wakeref_t wakeref;
 	unsigned long count, n;
 	u32 *offsets, *values;
+	I915_RND_STATE(prng);
+	struct context ctx;
 	int err = 0;
 
-	/* We repeatedly write, overwrite and read from a sequence of
+	/*
+	 * We repeatedly write, overwrite and read from a sequence of
 	 * cachelines in order to try and detect incoherency (unflushed writes
 	 * from either the CPU or GPU). Each setter/getter uses our cache
 	 * domain API which should prevent incoherency.
@@ -299,34 +324,40 @@ static int igt_gem_coherency(void *arg)
 
 	values = offsets + ncachelines;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	ctx.engine = random_engine(i915, &prng);
+	if (!ctx.engine) {
+		err = -ENODEV;
+		goto out_free;
+	}
+	pr_info("%s: using %s\n", __func__, ctx.engine->name);
+	intel_engine_pm_get(ctx.engine);
+
 	for (over = igt_coherency_mode; over->name; over++) {
 		if (!over->set)
 			continue;
 
-		if (!over->valid(i915))
+		if (!over->valid(&ctx))
 			continue;
 
 		for (write = igt_coherency_mode; write->name; write++) {
 			if (!write->set)
 				continue;
 
-			if (!write->valid(i915))
+			if (!write->valid(&ctx))
 				continue;
 
 			for (read = igt_coherency_mode; read->name; read++) {
 				if (!read->get)
 					continue;
 
-				if (!read->valid(i915))
+				if (!read->valid(&ctx))
 					continue;
 
 				for_each_prime_number_from(count, 1, ncachelines) {
-					obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-					if (IS_ERR(obj)) {
-						err = PTR_ERR(obj);
-						goto unlock;
+					ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+					if (IS_ERR(ctx.obj)) {
+						err = PTR_ERR(ctx.obj);
+						goto out_pm;
 					}
 
 					i915_random_reorder(offsets, ncachelines, &prng);
@@ -334,7 +365,7 @@ static int igt_gem_coherency(void *arg)
 						values[n] = prandom_u32_state(&prng);
 
 					for (n = 0; n < count; n++) {
-						err = over->set(obj, offsets[n], ~values[n]);
+						err = over->set(&ctx, offsets[n], ~values[n]);
 						if (err) {
 							pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
 							       n, count, over->name, err);
@@ -343,7 +374,7 @@ static int igt_gem_coherency(void *arg)
 					}
 
 					for (n = 0; n < count; n++) {
-						err = write->set(obj, offsets[n], values[n]);
+						err = write->set(&ctx, offsets[n], values[n]);
 						if (err) {
 							pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
 							       n, count, write->name, err);
@@ -354,7 +385,7 @@ static int igt_gem_coherency(void *arg)
 					for (n = 0; n < count; n++) {
 						u32 found;
 
-						err = read->get(obj, offsets[n], &found);
+						err = read->get(&ctx, offsets[n], &found);
 						if (err) {
 							pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
 							       n, count, read->name, err);
@@ -372,20 +403,20 @@ static int igt_gem_coherency(void *arg)
 						}
 					}
 
-					i915_gem_object_put(obj);
+					i915_gem_object_put(ctx.obj);
 				}
 			}
 		}
 	}
-unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+out_pm:
+	intel_engine_pm_put(ctx.engine);
+out_free:
 	kfree(offsets);
 	return err;
 
 put_object:
-	i915_gem_object_put(obj);
-	goto unlock;
+	i915_gem_object_put(ctx.obj);
+	goto out_pm;
 }
 
 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 3e6f4a65d356..7fc46861a54d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -7,7 +7,9 @@
 #include <linux/prime_numbers.h>
 
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 #include "gt/intel_reset.h"
 #include "i915_selftest.h"
 
@@ -25,15 +27,20 @@
 
 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
 
+static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx)
+{
+	/* single threaded, private ctx */
+	return rcu_dereference_protected(ctx->vm, true);
+}
+
 static int live_nop_switch(void *arg)
 {
 	const unsigned int nctx = 1024;
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
 	struct i915_gem_context **ctx;
-	enum intel_engine_id id;
 	struct igt_live_test t;
-	struct drm_file *file;
+	struct file *file;
 	unsigned long n;
 	int err = -ENODEV;
 
@@ -52,42 +59,49 @@ static int live_nop_switch(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx) {
 		err = -ENOMEM;
-		goto out_unlock;
+		goto out_file;
 	}
 
 	for (n = 0; n < nctx; n++) {
 		ctx[n] = live_context(i915, file);
 		if (IS_ERR(ctx[n])) {
 			err = PTR_ERR(ctx[n]);
-			goto out_unlock;
+			goto out_file;
 		}
 	}
 
-	for_each_engine(engine, i915, id) {
-		struct i915_request *rq;
+	for_each_uabi_engine(engine, i915) {
+		struct i915_request *rq = NULL;
 		unsigned long end_time, prime;
 		ktime_t times[2] = {};
 
 		times[0] = ktime_get_raw();
 		for (n = 0; n < nctx; n++) {
-			rq = igt_request_alloc(ctx[n], engine);
-			if (IS_ERR(rq)) {
-				err = PTR_ERR(rq);
-				goto out_unlock;
+			struct i915_request *this;
+
+			this = igt_request_alloc(ctx[n], engine);
+			if (IS_ERR(this)) {
+				err = PTR_ERR(this);
+				goto out_file;
 			}
-			i915_request_add(rq);
+			if (rq) {
+				i915_request_await_dma_fence(this, &rq->fence);
+				i915_request_put(rq);
+			}
+			rq = i915_request_get(this);
+			i915_request_add(this);
 		}
 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 			pr_err("Failed to populated %d contexts\n", nctx);
 			intel_gt_set_wedged(&i915->gt);
+			i915_request_put(rq);
 			err = -EIO;
-			goto out_unlock;
+			goto out_file;
 		}
+		i915_request_put(rq);
 
 		times[1] = ktime_get_raw();
 
@@ -96,17 +110,25 @@ static int live_nop_switch(void *arg)
 
 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 		if (err)
-			goto out_unlock;
+			goto out_file;
 
 		end_time = jiffies + i915_selftest.timeout_jiffies;
 		for_each_prime_number_from(prime, 2, 8192) {
 			times[1] = ktime_get_raw();
 
+			rq = NULL;
 			for (n = 0; n < prime; n++) {
-				rq = igt_request_alloc(ctx[n % nctx], engine);
-				if (IS_ERR(rq)) {
-					err = PTR_ERR(rq);
-					goto out_unlock;
+				struct i915_request *this;
+
+				this = igt_request_alloc(ctx[n % nctx], engine);
+				if (IS_ERR(this)) {
+					err = PTR_ERR(this);
+					goto out_file;
+				}
+
+				if (rq) { /* Force submission order */
+					i915_request_await_dma_fence(this, &rq->fence);
+					i915_request_put(rq);
 				}
 
 				/*
@@ -123,14 +145,18 @@ static int live_nop_switch(void *arg)
 				 * for latency.
 				 */
 
-				i915_request_add(rq);
+				rq = i915_request_get(this);
+				i915_request_add(this);
 			}
+			GEM_BUG_ON(!rq);
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				pr_err("Switching between %ld contexts timed out\n",
 				       prime);
 				intel_gt_set_wedged(&i915->gt);
+				i915_request_put(rq);
 				break;
 			}
+			i915_request_put(rq);
 
 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
 			if (prime == 2)
@@ -142,7 +168,7 @@ static int live_nop_switch(void *arg)
 
 		err = igt_live_test_end(&t);
 		if (err)
-			goto out_unlock;
+			goto out_file;
 
 		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
 			engine->name,
@@ -150,9 +176,236 @@ static int live_nop_switch(void *arg)
 			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
 	}
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	mock_file_free(i915, file);
+out_file:
+	fput(file);
+	return err;
+}
+
+struct parallel_switch {
+	struct task_struct *tsk;
+	struct intel_context *ce[2];
+};
+
+static int __live_parallel_switch1(void *data)
+{
+	struct parallel_switch *arg = data;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+
+	count = 0;
+	do {
+		struct i915_request *rq = NULL;
+		int err, n;
+
+		err = 0;
+		for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
+			struct i915_request *prev = rq;
+
+			rq = i915_request_create(arg->ce[n]);
+			if (IS_ERR(rq)) {
+				i915_request_put(prev);
+				return PTR_ERR(rq);
+			}
+
+			i915_request_get(rq);
+			if (prev) {
+				err = i915_request_await_dma_fence(rq, &prev->fence);
+				i915_request_put(prev);
+			}
+
+			i915_request_add(rq);
+		}
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			return err;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+
+	pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
+	return 0;
+}
+
+static int __live_parallel_switchN(void *data)
+{
+	struct parallel_switch *arg = data;
+	struct i915_request *rq = NULL;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int n;
+
+	count = 0;
+	do {
+		for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
+			struct i915_request *prev = rq;
+			int err = 0;
+
+			rq = i915_request_create(arg->ce[n]);
+			if (IS_ERR(rq)) {
+				i915_request_put(prev);
+				return PTR_ERR(rq);
+			}
+
+			i915_request_get(rq);
+			if (prev) {
+				err = i915_request_await_dma_fence(rq, &prev->fence);
+				i915_request_put(prev);
+			}
+
+			i915_request_add(rq);
+			if (err) {
+				i915_request_put(rq);
+				return err;
+			}
+		}
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(rq);
+
+	pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
+	return 0;
+}
+
+static int live_parallel_switch(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		__live_parallel_switch1,
+		__live_parallel_switchN,
+		NULL,
+	};
+	struct parallel_switch *data = NULL;
+	struct i915_gem_engines *engines;
+	struct i915_gem_engines_iter it;
+	int (* const *fn)(void *arg);
+	struct i915_gem_context *ctx;
+	struct intel_context *ce;
+	struct file *file;
+	int n, m, count;
+	int err = 0;
+
+	/*
+	 * Check we can process switches on all engines simultaneously.
+	 */
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = live_context(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out_file;
+	}
+
+	engines = i915_gem_context_lock_engines(ctx);
+	count = engines->num_engines;
+
+	data = kcalloc(count, sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		i915_gem_context_unlock_engines(ctx);
+		err = -ENOMEM;
+		goto out_file;
+	}
+
+	m = 0; /* Use the first context as our template for the engines */
+	for_each_gem_engine(ce, engines, it) {
+		err = intel_context_pin(ce);
+		if (err) {
+			i915_gem_context_unlock_engines(ctx);
+			goto out;
+		}
+		data[m++].ce[0] = intel_context_get(ce);
+	}
+	i915_gem_context_unlock_engines(ctx);
+
+	/* Clone the same set of engines into the other contexts */
+	for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
+		ctx = live_context(i915, file);
+		if (IS_ERR(ctx)) {
+			err = PTR_ERR(ctx);
+			goto out;
+		}
+
+		for (m = 0; m < count; m++) {
+			if (!data[m].ce[0])
+				continue;
+
+			ce = intel_context_create(data[m].ce[0]->engine);
+			if (IS_ERR(ce))
+				goto out;
+
+			err = intel_context_pin(ce);
+			if (err) {
+				intel_context_put(ce);
+				goto out;
+			}
+
+			data[m].ce[n] = ce;
+		}
+	}
+
+	for (fn = func; !err && *fn; fn++) {
+		struct igt_live_test t;
+		int n;
+
+		err = igt_live_test_begin(&t, i915, __func__, "");
+		if (err)
+			break;
+
+		for (n = 0; n < count; n++) {
+			if (!data[n].ce[0])
+				continue;
+
+			data[n].tsk = kthread_run(*fn, &data[n],
+						  "igt/parallel:%s",
+						  data[n].ce[0]->engine->name);
+			if (IS_ERR(data[n].tsk)) {
+				err = PTR_ERR(data[n].tsk);
+				break;
+			}
+			get_task_struct(data[n].tsk);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		for (n = 0; n < count; n++) {
+			int status;
+
+			if (IS_ERR_OR_NULL(data[n].tsk))
+				continue;
+
+			status = kthread_stop(data[n].tsk);
+			if (status && !err)
+				err = status;
+
+			put_task_struct(data[n].tsk);
+			data[n].tsk = NULL;
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+	}
+
+out:
+	for (n = 0; n < count; n++) {
+		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
+			if (!data[n].ce[m])
+				continue;
+
+			intel_context_unpin(data[n].ce[m]);
+			intel_context_put(data[n].ce[m]);
+		}
+	}
+	kfree(data);
+out_file:
+	fput(file);
 	return err;
 }
 
@@ -166,28 +419,20 @@ static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
 	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
 }
 
-static int gpu_fill(struct drm_i915_gem_object *obj,
-		    struct i915_gem_context *ctx,
-		    struct intel_engine_cs *engine,
+static int gpu_fill(struct intel_context *ce,
+		    struct drm_i915_gem_object *obj,
 		    unsigned int dw)
 {
-	struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm;
 	struct i915_vma *vma;
 	int err;
 
-	GEM_BUG_ON(obj->base.size > vm->total);
-	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+	GEM_BUG_ON(obj->base.size > ce->vm->total);
+	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
 
-	vma = i915_vma_instance(obj, vm, NULL);
+	vma = i915_vma_instance(obj, ce->vm, NULL);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	i915_gem_object_lock(obj);
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
-	i915_gem_object_unlock(obj);
-	if (err)
-		return err;
-
 	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
 	if (err)
 		return err;
@@ -200,9 +445,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	 * whilst checking that each context provides a unique view
 	 * into the object.
 	 */
-	err = igt_gpu_fill_dw(vma,
-			      ctx,
-			      engine,
+	err = igt_gpu_fill_dw(ce, vma,
 			      (dw * real_page_count(obj)) << PAGE_SHIFT |
 			      (dw * sizeof(u32)),
 			      real_page_count(obj),
@@ -287,17 +530,17 @@ out_unmap:
 	return err;
 }
 
-static int file_add_object(struct drm_file *file,
-			    struct drm_i915_gem_object *obj)
+static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
 {
 	int err;
 
 	GEM_BUG_ON(obj->base.handle_count);
 
 	/* tie the object to the drm_file for easy reaping */
-	err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL);
+	err = idr_alloc(&to_drm_file(file)->object_idr,
+			&obj->base, 1, 0, GFP_KERNEL);
 	if (err < 0)
-		return  err;
+		return err;
 
 	i915_gem_object_get(obj);
 	obj->base.handle_count++;
@@ -305,22 +548,21 @@ static int file_add_object(struct drm_file *file,
 }
 
 static struct drm_i915_gem_object *
-create_test_object(struct i915_gem_context *ctx,
-		   struct drm_file *file,
+create_test_object(struct i915_address_space *vm,
+		   struct file *file,
 		   struct list_head *objects)
 {
 	struct drm_i915_gem_object *obj;
-	struct i915_address_space *vm = ctx->vm ?: &ctx->i915->ggtt.vm;
 	u64 size;
 	int err;
 
 	/* Keep in GEM's good graces */
-	i915_retire_requests(ctx->i915);
+	intel_gt_retire_requests(vm->gt);
 
 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
 
-	obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size);
+	obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
 	if (IS_ERR(obj))
 		return obj;
 
@@ -348,11 +590,49 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 	return npages / DW_PER_PAGE;
 }
 
+static void throttle_release(struct i915_request **q, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if (IS_ERR_OR_NULL(q[i]))
+			continue;
+
+		i915_request_put(fetch_and_zero(&q[i]));
+	}
+}
+
+static int throttle(struct intel_context *ce,
+		    struct i915_request **q, int count)
+{
+	int i;
+
+	if (!IS_ERR_OR_NULL(q[0])) {
+		if (i915_request_wait(q[0],
+				      I915_WAIT_INTERRUPTIBLE,
+				      MAX_SCHEDULE_TIMEOUT) < 0)
+			return -EINTR;
+
+		i915_request_put(q[0]);
+	}
+
+	for (i = 0; i < count - 1; i++)
+		q[i] = q[i + 1];
+
+	q[i] = intel_context_create_request(ce);
+	if (IS_ERR(q[i]))
+		return PTR_ERR(q[i]);
+
+	i915_request_get(q[i]);
+	i915_request_add(q[i]);
+
+	return 0;
+}
+
 static int igt_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	int err = -ENODEV;
 
 	/*
@@ -364,13 +644,14 @@ static int igt_ctx_exec(void *arg)
 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
 		return 0;
 
-	for_each_engine(engine, i915, id) {
+	for_each_uabi_engine(engine, i915) {
 		struct drm_i915_gem_object *obj = NULL;
 		unsigned long ncontexts, ndwords, dw;
+		struct i915_request *tq[5] = {};
 		struct igt_live_test t;
-		struct drm_file *file;
 		IGT_TIMEOUT(end_time);
 		LIST_HEAD(objects);
+		struct file *file;
 
 		if (!intel_engine_can_store_dword(engine))
 			continue;
@@ -382,39 +663,53 @@ static int igt_ctx_exec(void *arg)
 		if (IS_ERR(file))
 			return PTR_ERR(file);
 
-		mutex_lock(&i915->drm.struct_mutex);
-
 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 		if (err)
-			goto out_unlock;
+			goto out_file;
 
 		ncontexts = 0;
 		ndwords = 0;
 		dw = 0;
 		while (!time_after(jiffies, end_time)) {
 			struct i915_gem_context *ctx;
+			struct intel_context *ce;
 
-			ctx = live_context(i915, file);
+			ctx = kernel_context(i915);
 			if (IS_ERR(ctx)) {
 				err = PTR_ERR(ctx);
-				goto out_unlock;
+				goto out_file;
 			}
 
+			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
+			GEM_BUG_ON(IS_ERR(ce));
+
 			if (!obj) {
-				obj = create_test_object(ctx, file, &objects);
+				obj = create_test_object(ce->vm, file, &objects);
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
-					goto out_unlock;
+					intel_context_put(ce);
+					kernel_context_close(ctx);
+					goto out_file;
 				}
 			}
 
-			err = gpu_fill(obj, ctx, engine, dw);
+			err = gpu_fill(ce, obj, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
-				       yesno(!!ctx->vm), err);
-				goto out_unlock;
+				       engine->name,
+				       yesno(!!rcu_access_pointer(ctx->vm)),
+				       err);
+				intel_context_put(ce);
+				kernel_context_close(ctx);
+				goto out_file;
+			}
+
+			err = throttle(ce, tq, ARRAY_SIZE(tq));
+			if (err) {
+				intel_context_put(ce);
+				kernel_context_close(ctx);
+				goto out_file;
 			}
 
 			if (++dw == max_dwords(obj)) {
@@ -424,6 +719,9 @@ static int igt_ctx_exec(void *arg)
 
 			ndwords++;
 			ncontexts++;
+
+			intel_context_put(ce);
+			kernel_context_close(ctx);
 		}
 
 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
@@ -441,12 +739,12 @@ static int igt_ctx_exec(void *arg)
 			dw += rem;
 		}
 
-out_unlock:
+out_file:
+		throttle_release(tq, ARRAY_SIZE(tq));
 		if (igt_live_test_end(&t))
 			err = -EIO;
-		mutex_unlock(&i915->drm.struct_mutex);
 
-		mock_file_free(i915, file);
+		fput(file);
 		if (err)
 			return err;
 
@@ -459,11 +757,11 @@ out_unlock:
 static int igt_shared_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
+	struct i915_request *tq[5] = {};
 	struct i915_gem_context *parent;
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	struct igt_live_test t;
-	struct drm_file *file;
+	struct file *file;
 	int err = 0;
 
 	/*
@@ -478,24 +776,22 @@ static int igt_shared_ctx_exec(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	parent = live_context(i915, file);
 	if (IS_ERR(parent)) {
 		err = PTR_ERR(parent);
-		goto out_unlock;
+		goto out_file;
 	}
 
 	if (!parent->vm) { /* not full-ppgtt; nothing to share */
 		err = 0;
-		goto out_unlock;
+		goto out_file;
 	}
 
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		goto out_file;
 
-	for_each_engine(engine, i915, id) {
+	for_each_uabi_engine(engine, i915) {
 		unsigned long ncontexts, ndwords, dw;
 		struct drm_i915_gem_object *obj = NULL;
 		IGT_TIMEOUT(end_time);
@@ -509,6 +805,7 @@ static int igt_shared_ctx_exec(void *arg)
 		ncontexts = 0;
 		while (!time_after(jiffies, end_time)) {
 			struct i915_gem_context *ctx;
+			struct intel_context *ce;
 
 			ctx = kernel_context(i915);
 			if (IS_ERR(ctx)) {
@@ -516,23 +813,39 @@ static int igt_shared_ctx_exec(void *arg)
 				goto out_test;
 			}
 
-			__assign_ppgtt(ctx, parent->vm);
+			mutex_lock(&ctx->mutex);
+			__assign_ppgtt(ctx, ctx_vm(parent));
+			mutex_unlock(&ctx->mutex);
+
+			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
+			GEM_BUG_ON(IS_ERR(ce));
 
 			if (!obj) {
-				obj = create_test_object(parent, file, &objects);
+				obj = create_test_object(ctx_vm(parent),
+							 file, &objects);
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
+					intel_context_put(ce);
 					kernel_context_close(ctx);
 					goto out_test;
 				}
 			}
 
-			err = gpu_fill(obj, ctx, engine, dw);
+			err = gpu_fill(ce, obj, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
-				       yesno(!!ctx->vm), err);
+				       engine->name,
+				       yesno(!!rcu_access_pointer(ctx->vm)),
+				       err);
+				intel_context_put(ce);
+				kernel_context_close(ctx);
+				goto out_test;
+			}
+
+			err = throttle(ce, tq, ARRAY_SIZE(tq));
+			if (err) {
+				intel_context_put(ce);
 				kernel_context_close(ctx);
 				goto out_test;
 			}
@@ -545,6 +858,7 @@ static int igt_shared_ctx_exec(void *arg)
 			ndwords++;
 			ncontexts++;
 
+			intel_context_put(ce);
 			kernel_context_close(ctx);
 		}
 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
@@ -562,17 +876,14 @@ static int igt_shared_ctx_exec(void *arg)
 			dw += rem;
 		}
 
-		mutex_unlock(&i915->drm.struct_mutex);
 		i915_gem_drain_freed_objects(i915);
-		mutex_lock(&i915->drm.struct_mutex);
 	}
 out_test:
+	throttle_release(tq, ARRAY_SIZE(tq));
 	if (igt_live_test_end(&t))
 		err = -EIO;
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	mock_file_free(i915, file);
+out_file:
+	fput(file);
 	return err;
 }
 
@@ -604,6 +915,8 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
 	__i915_gem_object_flush_map(obj, 0, 64);
 	i915_gem_object_unpin_map(obj);
 
+	intel_gt_chipset_flush(vma->vm->gt);
+
 	vma = i915_vma_instance(obj, vma->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
@@ -681,10 +994,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	if (err)
 		goto skip_request;
 
-	i915_vma_unpin(batch);
-	i915_vma_close(batch);
-	i915_vma_put(batch);
-
+	i915_vma_unpin_and_release(&batch, 0);
 	i915_vma_unpin(vma);
 
 	*rq_out = i915_request_get(rq);
@@ -698,8 +1008,7 @@ skip_request:
 err_request:
 	i915_request_add(rq);
 err_batch:
-	i915_vma_unpin(batch);
-	i915_vma_put(batch);
+	i915_vma_unpin_and_release(&batch, 0);
 err_vma:
 	i915_vma_unpin(vma);
 
@@ -860,8 +1169,7 @@ out:
 		igt_spinner_end(spin);
 
 	if ((flags & TEST_IDLE) && ret == 0) {
-		ret = i915_gem_wait_for_idle(ce->engine->i915,
-					     0, MAX_SCHEDULE_TIMEOUT);
+		ret = igt_flush_test(ce->engine->i915);
 		if (ret)
 			return ret;
 
@@ -883,11 +1191,13 @@ __sseu_test(const char *name,
 	struct igt_spinner *spin = NULL;
 	int ret;
 
+	intel_engine_pm_get(ce->engine);
+
 	ret = __sseu_prepare(name, flags, ce, &spin);
 	if (ret)
-		return ret;
+		goto out_pm;
 
-	ret = __intel_context_reconfigure_sseu(ce, sseu);
+	ret = intel_context_reconfigure_sseu(ce, sseu);
 	if (ret)
 		goto out_spin;
 
@@ -900,6 +1210,8 @@ out_spin:
 		igt_spinner_fini(spin);
 		kfree(spin);
 	}
+out_pm:
+	intel_engine_pm_put(ce->engine);
 	return ret;
 }
 
@@ -908,106 +1220,96 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 	       const char *name,
 	       unsigned int flags)
 {
-	struct intel_engine_cs *engine = i915->engine[RCS0];
 	struct drm_i915_gem_object *obj;
-	struct i915_gem_context *ctx;
-	struct intel_context *ce;
-	struct intel_sseu pg_sseu;
-	struct drm_file *file;
-	int ret;
-
-	if (INTEL_GEN(i915) < 9 || !engine)
-		return 0;
-
-	if (!RUNTIME_INFO(i915)->sseu.has_slice_pg)
-		return 0;
+	int inst = 0;
+	int ret = 0;
 
-	if (hweight32(engine->sseu.slice_mask) < 2)
+	if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg)
 		return 0;
 
-	/*
-	 * Gen11 VME friendly power-gated configuration with half enabled
-	 * sub-slices.
-	 */
-	pg_sseu = engine->sseu;
-	pg_sseu.slice_mask = 1;
-	pg_sseu.subslice_mask =
-		~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
-
-	pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
-		name, flags, hweight32(engine->sseu.slice_mask),
-		hweight32(pg_sseu.slice_mask));
-
-	file = mock_file(i915);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-
 	if (flags & TEST_RESET)
 		igt_global_reset_lock(&i915->gt);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
-	ctx = live_context(i915, file);
-	if (IS_ERR(ctx)) {
-		ret = PTR_ERR(ctx);
-		goto out_unlock;
-	}
-	i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */
-
 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		ret = PTR_ERR(obj);
 		goto out_unlock;
 	}
 
-	ce = i915_gem_context_get_engine(ctx, RCS0);
-	if (IS_ERR(ce)) {
-		ret = PTR_ERR(ce);
-		goto out_put;
-	}
+	do {
+		struct intel_engine_cs *engine;
+		struct intel_context *ce;
+		struct intel_sseu pg_sseu;
 
-	ret = intel_context_pin(ce);
-	if (ret)
-		goto out_context;
+		engine = intel_engine_lookup_user(i915,
+						  I915_ENGINE_CLASS_RENDER,
+						  inst++);
+		if (!engine)
+			break;
 
-	/* First set the default mask. */
-	ret = __sseu_test(name, flags, ce, obj, engine->sseu);
-	if (ret)
-		goto out_fail;
+		if (hweight32(engine->sseu.slice_mask) < 2)
+			continue;
 
-	/* Then set a power-gated configuration. */
-	ret = __sseu_test(name, flags, ce, obj, pg_sseu);
-	if (ret)
-		goto out_fail;
+		/*
+		 * Gen11 VME friendly power-gated configuration with
+		 * half enabled sub-slices.
+		 */
+		pg_sseu = engine->sseu;
+		pg_sseu.slice_mask = 1;
+		pg_sseu.subslice_mask =
+			~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
+
+		pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
+			engine->name, name, flags,
+			hweight32(engine->sseu.slice_mask),
+			hweight32(pg_sseu.slice_mask));
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			ret = PTR_ERR(ce);
+			goto out_put;
+		}
 
-	/* Back to defaults. */
-	ret = __sseu_test(name, flags, ce, obj, engine->sseu);
-	if (ret)
-		goto out_fail;
+		ret = intel_context_pin(ce);
+		if (ret)
+			goto out_ce;
 
-	/* One last power-gated configuration for the road. */
-	ret = __sseu_test(name, flags, ce, obj, pg_sseu);
-	if (ret)
-		goto out_fail;
+		/* First set the default mask. */
+		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
+		if (ret)
+			goto out_unpin;
+
+		/* Then set a power-gated configuration. */
+		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
+		if (ret)
+			goto out_unpin;
+
+		/* Back to defaults. */
+		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
+		if (ret)
+			goto out_unpin;
+
+		/* One last power-gated configuration for the road. */
+		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
+		if (ret)
+			goto out_unpin;
+
+out_unpin:
+		intel_context_unpin(ce);
+out_ce:
+		intel_context_put(ce);
+	} while (!ret);
 
-out_fail:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		ret = -EIO;
 
-	intel_context_unpin(ce);
-out_context:
-	intel_context_put(ce);
 out_put:
 	i915_gem_object_put(obj);
 
 out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	if (flags & TEST_RESET)
 		igt_global_reset_unlock(&i915->gt);
 
-	mock_file_free(i915, file);
-
 	if (ret)
 		pr_err("%s: Failed with %d!\n", name, ret);
 
@@ -1040,15 +1342,18 @@ static int igt_ctx_sseu(void *arg)
 static int igt_ctx_readonly(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
+	unsigned long idx, ndwords, dw, num_engines;
 	struct drm_i915_gem_object *obj = NULL;
+	struct i915_request *tq[5] = {};
+	struct i915_gem_engines_iter it;
 	struct i915_address_space *vm;
 	struct i915_gem_context *ctx;
-	unsigned long idx, ndwords, dw;
+	struct intel_context *ce;
 	struct igt_live_test t;
-	struct drm_file *file;
 	I915_RND_STATE(prng);
 	IGT_TIMEOUT(end_time);
 	LIST_HEAD(objects);
+	struct file *file;
 	int err = -ENODEV;
 
 	/*
@@ -1061,52 +1366,63 @@ static int igt_ctx_readonly(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		goto out_file;
 
 	ctx = live_context(i915, file);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
-		goto out_unlock;
+		goto out_file;
 	}
 
-	vm = ctx->vm ?: &i915->ggtt.alias->vm;
+	vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm;
 	if (!vm || !vm->has_read_only) {
 		err = 0;
-		goto out_unlock;
+		goto out_file;
 	}
 
+	num_engines = 0;
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
+		if (intel_engine_can_store_dword(ce->engine))
+			num_engines++;
+	i915_gem_context_unlock_engines(ctx);
+
 	ndwords = 0;
 	dw = 0;
 	while (!time_after(jiffies, end_time)) {
-		struct intel_engine_cs *engine;
-		unsigned int id;
-
-		for_each_engine(engine, i915, id) {
-			if (!intel_engine_can_store_dword(engine))
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx), it) {
+			if (!intel_engine_can_store_dword(ce->engine))
 				continue;
 
 			if (!obj) {
-				obj = create_test_object(ctx, file, &objects);
+				obj = create_test_object(ce->vm, file, &objects);
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
-					goto out_unlock;
+					i915_gem_context_unlock_engines(ctx);
+					goto out_file;
 				}
 
 				if (prandom_u32_state(&prng) & 1)
 					i915_gem_object_set_readonly(obj);
 			}
 
-			err = gpu_fill(obj, ctx, engine, dw);
+			err = gpu_fill(ce, obj, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
-				       yesno(!!ctx->vm), err);
-				goto out_unlock;
+				       ce->engine->name,
+				       yesno(!!ctx_vm(ctx)),
+				       err);
+				i915_gem_context_unlock_engines(ctx);
+				goto out_file;
+			}
+
+			err = throttle(ce, tq, ARRAY_SIZE(tq));
+			if (err) {
+				i915_gem_context_unlock_engines(ctx);
+				goto out_file;
 			}
 
 			if (++dw == max_dwords(obj)) {
@@ -1115,9 +1431,10 @@ static int igt_ctx_readonly(void *arg)
 			}
 			ndwords++;
 		}
+		i915_gem_context_unlock_engines(ctx);
 	}
-	pr_info("Submitted %lu dwords (across %u engines)\n",
-		ndwords, RUNTIME_INFO(i915)->num_engines);
+	pr_info("Submitted %lu dwords (across %lu engines)\n",
+		ndwords, num_engines);
 
 	dw = 0;
 	idx = 0;
@@ -1137,19 +1454,19 @@ static int igt_ctx_readonly(void *arg)
 		dw += rem;
 	}
 
-out_unlock:
+out_file:
+	throttle_release(tq, ARRAY_SIZE(tq));
 	if (igt_live_test_end(&t))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
-	mock_file_free(i915, file);
+	fput(file);
 	return err;
 }
 
-static int check_scratch(struct i915_gem_context *ctx, u64 offset)
+static int check_scratch(struct i915_address_space *vm, u64 offset)
 {
 	struct drm_mm_node *node =
-		__drm_mm_interval_first(&ctx->vm->mm,
+		__drm_mm_interval_first(&vm->mm,
 					offset, offset + sizeof(u32) - 1);
 	if (!node || node->start > offset)
 		return 0;
@@ -1167,6 +1484,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 {
 	struct drm_i915_private *i915 = ctx->i915;
 	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm;
 	struct i915_request *rq;
 	struct i915_vma *vma;
 	u32 *cmd;
@@ -1181,7 +1499,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
 	if (IS_ERR(cmd)) {
 		err = PTR_ERR(cmd);
-		goto err;
+		goto out;
 	}
 
 	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
@@ -1197,17 +1515,20 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	__i915_gem_object_flush_map(obj, 0, 64);
 	i915_gem_object_unpin_map(obj);
 
-	vma = i915_vma_instance(obj, ctx->vm, NULL);
+	intel_gt_chipset_flush(engine->gt);
+
+	vm = i915_gem_context_get_vm_rcu(ctx);
+	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
-		goto err;
+		goto out_vm;
 	}
 
 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
 	if (err)
-		goto err;
+		goto out_vm;
 
-	err = check_scratch(ctx, offset);
+	err = check_scratch(vm, offset);
 	if (err)
 		goto err_unpin;
 
@@ -1230,20 +1551,19 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 		goto skip_request;
 
 	i915_vma_unpin(vma);
-	i915_vma_close(vma);
-	i915_vma_put(vma);
 
 	i915_request_add(rq);
 
-	return 0;
-
+	goto out_vm;
 skip_request:
 	i915_request_skip(rq, err);
 err_request:
 	i915_request_add(rq);
 err_unpin:
 	i915_vma_unpin(vma);
-err:
+out_vm:
+	i915_vm_put(vm);
+out:
 	i915_gem_object_put(obj);
 	return err;
 }
@@ -1254,6 +1574,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 {
 	struct drm_i915_private *i915 = ctx->i915;
 	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm;
 	const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */
 	const u32 result = 0x100;
 	struct i915_request *rq;
@@ -1270,7 +1591,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
 	if (IS_ERR(cmd)) {
 		err = PTR_ERR(cmd);
-		goto err;
+		goto out;
 	}
 
 	memset(cmd, POISON_INUSE, PAGE_SIZE);
@@ -1296,17 +1617,20 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	i915_gem_object_flush_map(obj);
 	i915_gem_object_unpin_map(obj);
 
-	vma = i915_vma_instance(obj, ctx->vm, NULL);
+	intel_gt_chipset_flush(engine->gt);
+
+	vm = i915_gem_context_get_vm_rcu(ctx);
+	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
-		goto err;
+		goto out_vm;
 	}
 
 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
 	if (err)
-		goto err;
+		goto out_vm;
 
-	err = check_scratch(ctx, offset);
+	err = check_scratch(vm, offset);
 	if (err)
 		goto err_unpin;
 
@@ -1337,27 +1661,27 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	err = i915_gem_object_set_to_cpu_domain(obj, false);
 	i915_gem_object_unlock(obj);
 	if (err)
-		goto err;
+		goto out_vm;
 
 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
 	if (IS_ERR(cmd)) {
 		err = PTR_ERR(cmd);
-		goto err;
+		goto out_vm;
 	}
 
 	*value = cmd[result / sizeof(*cmd)];
 	i915_gem_object_unpin_map(obj);
-	i915_gem_object_put(obj);
-
-	return 0;
 
+	goto out_vm;
 skip_request:
 	i915_request_skip(rq, err);
 err_request:
 	i915_request_add(rq);
 err_unpin:
 	i915_vma_unpin(vma);
-err:
+out_vm:
+	i915_vm_put(vm);
+out:
 	i915_gem_object_put(obj);
 	return err;
 }
@@ -1366,12 +1690,11 @@ static int igt_vm_isolation(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct i915_gem_context *ctx_a, *ctx_b;
+	unsigned long num_engines, count;
 	struct intel_engine_cs *engine;
 	struct igt_live_test t;
-	struct drm_file *file;
 	I915_RND_STATE(prng);
-	unsigned long count;
-	unsigned int id;
+	struct file *file;
 	u64 vm_total;
 	int err;
 
@@ -1387,34 +1710,33 @@ static int igt_vm_isolation(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		goto out_file;
 
 	ctx_a = live_context(i915, file);
 	if (IS_ERR(ctx_a)) {
 		err = PTR_ERR(ctx_a);
-		goto out_unlock;
+		goto out_file;
 	}
 
 	ctx_b = live_context(i915, file);
 	if (IS_ERR(ctx_b)) {
 		err = PTR_ERR(ctx_b);
-		goto out_unlock;
+		goto out_file;
 	}
 
 	/* We can only test vm isolation, if the vm are distinct */
-	if (ctx_a->vm == ctx_b->vm)
-		goto out_unlock;
+	if (ctx_vm(ctx_a) == ctx_vm(ctx_b))
+		goto out_file;
 
-	vm_total = ctx_a->vm->total;
-	GEM_BUG_ON(ctx_b->vm->total != vm_total);
+	vm_total = ctx_vm(ctx_a)->total;
+	GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total);
 	vm_total -= I915_GTT_PAGE_SIZE;
 
 	count = 0;
-	for_each_engine(engine, i915, id) {
+	num_engines = 0;
+	for_each_uabi_engine(engine, i915) {
 		IGT_TIMEOUT(end_time);
 		unsigned long this = 0;
 
@@ -1436,7 +1758,7 @@ static int igt_vm_isolation(void *arg)
 				err = read_from_scratch(ctx_b, engine,
 							offset, &value);
 			if (err)
-				goto out_unlock;
+				goto out_file;
 
 			if (value) {
 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
@@ -1445,40 +1767,24 @@ static int igt_vm_isolation(void *arg)
 				       lower_32_bits(offset),
 				       this);
 				err = -EINVAL;
-				goto out_unlock;
+				goto out_file;
 			}
 
 			this++;
 		}
 		count += this;
+		num_engines++;
 	}
-	pr_info("Checked %lu scratch offsets across %d engines\n",
-		count, RUNTIME_INFO(i915)->num_engines);
+	pr_info("Checked %lu scratch offsets across %lu engines\n",
+		count, num_engines);
 
-out_unlock:
+out_file:
 	if (igt_live_test_end(&t))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	mock_file_free(i915, file);
+	fput(file);
 	return err;
 }
 
-static __maybe_unused const char *
-__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines)
-{
-	struct intel_engine_cs *engine;
-	intel_engine_mask_t tmp;
-
-	if (engines == ALL_ENGINES)
-		return "all";
-
-	for_each_engine_masked(engine, i915, engines, tmp)
-		return engine->name;
-
-	return "none";
-}
-
 static bool skip_unused_engines(struct intel_context *ce, void *data)
 {
 	return !ce->state;
@@ -1506,13 +1812,9 @@ static int mock_context_barrier(void *arg)
 	 * a request; useful for retiring old state after loading new.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	ctx = mock_context(i915, "mock");
-	if (!ctx) {
-		err = -ENOMEM;
-		goto unlock;
-	}
+	if (!ctx)
+		return -ENOMEM;
 
 	counter = 0;
 	err = context_barrier_task(ctx, 0,
@@ -1585,8 +1887,6 @@ static int mock_context_barrier(void *arg)
 
 out:
 	mock_context_close(ctx);
-unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 #undef pr_fmt
 #define pr_fmt(x) x
@@ -1614,6 +1914,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_nop_switch),
+		SUBTEST(live_parallel_switch),
 		SUBTEST(igt_ctx_exec),
 		SUBTEST(igt_ctx_readonly),
 		SUBTEST(igt_ctx_sseu),
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index d85d1ce273ca..2a52b92586b9 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -254,106 +254,6 @@ err_obj:
 	return err;
 }
 
-static int igt_dmabuf_export_kmap(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
-	struct dma_buf *dmabuf;
-	void *ptr;
-	int err;
-
-	obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
-	dmabuf = i915_gem_prime_export(&obj->base, 0);
-	i915_gem_object_put(obj);
-	if (IS_ERR(dmabuf)) {
-		err = PTR_ERR(dmabuf);
-		pr_err("i915_gem_prime_export failed with err=%d\n", err);
-		return err;
-	}
-
-	ptr = dma_buf_kmap(dmabuf, 0);
-	if (!ptr) {
-		pr_err("dma_buf_kmap failed\n");
-		err = -ENOMEM;
-		goto err;
-	}
-
-	if (memchr_inv(ptr, 0, PAGE_SIZE)) {
-		dma_buf_kunmap(dmabuf, 0, ptr);
-		pr_err("Exported page[0] not initialiased to zero!\n");
-		err = -EINVAL;
-		goto err;
-	}
-
-	memset(ptr, 0xc5, PAGE_SIZE);
-	dma_buf_kunmap(dmabuf, 0, ptr);
-
-	ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
-	if (IS_ERR(ptr)) {
-		err = PTR_ERR(ptr);
-		pr_err("i915_gem_object_pin_map failed with err=%d\n", err);
-		goto err;
-	}
-	memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE);
-	i915_gem_object_flush_map(obj);
-	i915_gem_object_unpin_map(obj);
-
-	ptr = dma_buf_kmap(dmabuf, 1);
-	if (!ptr) {
-		pr_err("dma_buf_kmap failed\n");
-		err = -ENOMEM;
-		goto err;
-	}
-
-	if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) {
-		dma_buf_kunmap(dmabuf, 1, ptr);
-		pr_err("Exported page[1] not set to 0xaa!\n");
-		err = -EINVAL;
-		goto err;
-	}
-
-	memset(ptr, 0xc5, PAGE_SIZE);
-	dma_buf_kunmap(dmabuf, 1, ptr);
-
-	ptr = dma_buf_kmap(dmabuf, 0);
-	if (!ptr) {
-		pr_err("dma_buf_kmap failed\n");
-		err = -ENOMEM;
-		goto err;
-	}
-	if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) {
-		dma_buf_kunmap(dmabuf, 0, ptr);
-		pr_err("Exported page[0] did not retain 0xc5!\n");
-		err = -EINVAL;
-		goto err;
-	}
-	dma_buf_kunmap(dmabuf, 0, ptr);
-
-	ptr = dma_buf_kmap(dmabuf, 2);
-	if (ptr) {
-		pr_err("Erroneously kmapped beyond the end of the object!\n");
-		dma_buf_kunmap(dmabuf, 2, ptr);
-		err = -EINVAL;
-		goto err;
-	}
-
-	ptr = dma_buf_kmap(dmabuf, -1);
-	if (ptr) {
-		pr_err("Erroneously kmapped before the start of the object!\n");
-		dma_buf_kunmap(dmabuf, -1, ptr);
-		err = -EINVAL;
-		goto err;
-	}
-
-	err = 0;
-err:
-	dma_buf_put(dmabuf);
-	return err;
-}
-
 int i915_gem_dmabuf_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
@@ -362,7 +262,6 @@ int i915_gem_dmabuf_mock_selftests(void)
 		SUBTEST(igt_dmabuf_import),
 		SUBTEST(igt_dmabuf_import_ownership),
 		SUBTEST(igt_dmabuf_export_vmap),
-		SUBTEST(igt_dmabuf_export_kmap),
 	};
 	struct drm_i915_private *i915;
 	int err;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 1d27babff0ce..ef7c74cff28a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -6,11 +6,15 @@
 
 #include <linux/prime_numbers.h>
 
+#include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gem/i915_gem_region.h"
 #include "huge_gem_object.h"
 #include "i915_selftest.h"
+#include "selftests/i915_random.h"
 #include "selftests/igt_flush_test.h"
+#include "selftests/igt_mmap.h"
 
 struct tile {
 	unsigned int width;
@@ -76,18 +80,103 @@ static u64 tiled_offset(const struct tile *tile, u64 v)
 
 static int check_partial_mapping(struct drm_i915_gem_object *obj,
 				 const struct tile *tile,
-				 unsigned long end_time)
+				 struct rnd_state *prng)
 {
-	const unsigned int nreal = obj->scratch / PAGE_SIZE;
 	const unsigned long npages = obj->base.size / PAGE_SIZE;
+	struct i915_ggtt_view view;
 	struct i915_vma *vma;
 	unsigned long page;
+	u32 __iomem *io;
+	struct page *p;
+	unsigned int n;
+	u64 offset;
+	u32 *cpu;
 	int err;
 
-	if (igt_timeout(end_time,
-			"%s: timed out before tiling=%d stride=%d\n",
-			__func__, tile->tiling, tile->stride))
-		return -EINTR;
+	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
+	if (err) {
+		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
+		       tile->tiling, tile->stride, err);
+		return err;
+	}
+
+	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
+	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
+
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
+	if (err) {
+		pr_err("Failed to flush to GTT write domain; err=%d\n", err);
+		return err;
+	}
+
+	page = i915_prandom_u32_max_state(npages, prng);
+	view = compute_partial_view(obj, page, MIN_CHUNK_PAGES);
+
+	vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma)) {
+		pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
+		       page, (int)PTR_ERR(vma));
+		return PTR_ERR(vma);
+	}
+
+	n = page - view.partial.offset;
+	GEM_BUG_ON(n >= view.partial.size);
+
+	io = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(io)) {
+		pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+		       page, (int)PTR_ERR(io));
+		err = PTR_ERR(io);
+		goto out;
+	}
+
+	iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
+	i915_vma_unpin_iomap(vma);
+
+	offset = tiled_offset(tile, page << PAGE_SHIFT);
+	if (offset >= obj->base.size)
+		goto out;
+
+	intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
+
+	p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+	cpu = kmap(p) + offset_in_page(offset);
+	drm_clflush_virt_range(cpu, sizeof(*cpu));
+	if (*cpu != (u32)page) {
+		pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
+		       page, n,
+		       view.partial.offset,
+		       view.partial.size,
+		       vma->size >> PAGE_SHIFT,
+		       tile->tiling ? tile_row_pages(obj) : 0,
+		       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
+		       offset >> PAGE_SHIFT,
+		       (unsigned int)offset_in_page(offset),
+		       offset,
+		       (u32)page, *cpu);
+		err = -EINVAL;
+	}
+	*cpu = 0;
+	drm_clflush_virt_range(cpu, sizeof(*cpu));
+	kunmap(p);
+
+out:
+	__i915_vma_put(vma);
+	return err;
+}
+
+static int check_partial_mappings(struct drm_i915_gem_object *obj,
+				  const struct tile *tile,
+				  unsigned long end_time)
+{
+	const unsigned int nreal = obj->scratch / PAGE_SIZE;
+	const unsigned long npages = obj->base.size / PAGE_SIZE;
+	struct i915_vma *vma;
+	unsigned long page;
+	int err;
 
 	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
 	if (err) {
@@ -169,12 +258,43 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		if (err)
 			return err;
 
-		i915_vma_destroy(vma);
+		__i915_vma_put(vma);
+
+		if (igt_timeout(end_time,
+				"%s: timed out after tiling=%d stride=%d\n",
+				__func__, tile->tiling, tile->stride))
+			return -EINTR;
 	}
 
 	return 0;
 }
 
+static unsigned int
+setup_tile_size(struct tile *tile, struct drm_i915_private *i915)
+{
+	if (INTEL_GEN(i915) <= 2) {
+		tile->height = 16;
+		tile->width = 128;
+		tile->size = 11;
+	} else if (tile->tiling == I915_TILING_Y &&
+		   HAS_128_BYTE_Y_TILING(i915)) {
+		tile->height = 32;
+		tile->width = 128;
+		tile->size = 12;
+	} else {
+		tile->height = 8;
+		tile->width = 512;
+		tile->size = 12;
+	}
+
+	if (INTEL_GEN(i915) < 4)
+		return 8192 / tile->width;
+	else if (INTEL_GEN(i915) < 7)
+		return 128 * I965_FENCE_MAX_PITCH_VAL / tile->width;
+	else
+		return 128 * GEN7_FENCE_MAX_PITCH_VAL / tile->width;
+}
+
 static int igt_partial_tiling(void *arg)
 {
 	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
@@ -184,6 +304,9 @@ static int igt_partial_tiling(void *arg)
 	int tiling;
 	int err;
 
+	if (!i915_ggtt_has_aperture(&i915->ggtt))
+		return 0;
+
 	/* We want to check the page mapping and fencing of a large object
 	 * mmapped through the GTT. The object we create is larger than can
 	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
@@ -205,7 +328,6 @@ static int igt_partial_tiling(void *arg)
 		goto out;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	if (1) {
@@ -219,7 +341,7 @@ static int igt_partial_tiling(void *arg)
 		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
 		tile.tiling = I915_TILING_NONE;
 
-		err = check_partial_mapping(obj, &tile, end);
+		err = check_partial_mappings(obj, &tile, end);
 		if (err && err != -EINTR)
 			goto out_unlock;
 	}
@@ -241,10 +363,10 @@ static int igt_partial_tiling(void *arg)
 		tile.tiling = tiling;
 		switch (tiling) {
 		case I915_TILING_X:
-			tile.swizzle = i915->mm.bit_6_swizzle_x;
+			tile.swizzle = i915->ggtt.bit_6_swizzle_x;
 			break;
 		case I915_TILING_Y:
-			tile.swizzle = i915->mm.bit_6_swizzle_y;
+			tile.swizzle = i915->ggtt.bit_6_swizzle_y;
 			break;
 		}
 
@@ -253,31 +375,11 @@ static int igt_partial_tiling(void *arg)
 		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
 			continue;
 
-		if (INTEL_GEN(i915) <= 2) {
-			tile.height = 16;
-			tile.width = 128;
-			tile.size = 11;
-		} else if (tile.tiling == I915_TILING_Y &&
-			   HAS_128_BYTE_Y_TILING(i915)) {
-			tile.height = 32;
-			tile.width = 128;
-			tile.size = 12;
-		} else {
-			tile.height = 8;
-			tile.width = 512;
-			tile.size = 12;
-		}
-
-		if (INTEL_GEN(i915) < 4)
-			max_pitch = 8192 / tile.width;
-		else if (INTEL_GEN(i915) < 7)
-			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
-		else
-			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
+		max_pitch = setup_tile_size(&tile, i915);
 
 		for (pitch = max_pitch; pitch; pitch >>= 1) {
 			tile.stride = tile.width * pitch;
-			err = check_partial_mapping(obj, &tile, end);
+			err = check_partial_mappings(obj, &tile, end);
 			if (err == -EINTR)
 				goto next_tiling;
 			if (err)
@@ -285,7 +387,7 @@ static int igt_partial_tiling(void *arg)
 
 			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
 				tile.stride = tile.width * (pitch - 1);
-				err = check_partial_mapping(obj, &tile, end);
+				err = check_partial_mappings(obj, &tile, end);
 				if (err == -EINTR)
 					goto next_tiling;
 				if (err)
@@ -294,7 +396,7 @@ static int igt_partial_tiling(void *arg)
 
 			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
 				tile.stride = tile.width * (pitch + 1);
-				err = check_partial_mapping(obj, &tile, end);
+				err = check_partial_mappings(obj, &tile, end);
 				if (err == -EINTR)
 					goto next_tiling;
 				if (err)
@@ -305,7 +407,7 @@ static int igt_partial_tiling(void *arg)
 		if (INTEL_GEN(i915) >= 4) {
 			for_each_prime_number(pitch, max_pitch) {
 				tile.stride = tile.width * pitch;
-				err = check_partial_mapping(obj, &tile, end);
+				err = check_partial_mappings(obj, &tile, end);
 				if (err == -EINTR)
 					goto next_tiling;
 				if (err)
@@ -318,7 +420,100 @@ next_tiling: ;
 
 out_unlock:
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int igt_smoke_tiling(void *arg)
+{
+	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
+	I915_RND_STATE(prng);
+	unsigned long count;
+	IGT_TIMEOUT(end);
+	int err;
+
+	if (!i915_ggtt_has_aperture(&i915->ggtt))
+		return 0;
+
+	/*
+	 * igt_partial_tiling() does an exhastive check of partial tiling
+	 * chunking, but will undoubtably run out of time. Here, we do a
+	 * randomised search and hope over many runs of 1s with different
+	 * seeds we will do a thorough check.
+	 *
+	 * Remember to look at the st_seed if we see a flip-flop in BAT!
+	 */
+
+	if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
+		return 0;
+
+	obj = huge_gem_object(i915,
+			      nreal << PAGE_SHIFT,
+			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err) {
+		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
+		       nreal, obj->base.size / PAGE_SIZE, err);
+		goto out;
+	}
+
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	count = 0;
+	do {
+		struct tile tile;
+
+		tile.tiling =
+			i915_prandom_u32_max_state(I915_TILING_Y + 1, &prng);
+		switch (tile.tiling) {
+		case I915_TILING_NONE:
+			tile.height = 1;
+			tile.width = 1;
+			tile.size = 0;
+			tile.stride = 0;
+			tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
+			break;
+
+		case I915_TILING_X:
+			tile.swizzle = i915->ggtt.bit_6_swizzle_x;
+			break;
+		case I915_TILING_Y:
+			tile.swizzle = i915->ggtt.bit_6_swizzle_y;
+			break;
+		}
+
+		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
+		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
+			continue;
+
+		if (tile.tiling != I915_TILING_NONE) {
+			unsigned int max_pitch = setup_tile_size(&tile, i915);
+
+			tile.stride =
+				i915_prandom_u32_max_state(max_pitch, &prng);
+			tile.stride = (1 + tile.stride) * tile.width;
+			if (INTEL_GEN(i915) < 4)
+				tile.stride = rounddown_pow_of_two(tile.stride);
+		}
+
+		err = check_partial_mapping(obj, &tile, &prng);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end, NULL));
+
+	pr_info("%s: Completed %lu trials\n", __func__, count);
+
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	i915_gem_object_unpin_pages(obj);
 out:
 	i915_gem_object_put(obj);
@@ -329,22 +524,21 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	struct i915_vma *vma;
-	int err;
 
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+	for_each_uabi_engine(engine, i915) {
+		struct i915_request *rq;
+		struct i915_vma *vma;
+		int err;
 
-	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (err)
-		return err;
+		vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+		if (IS_ERR(vma))
+			return PTR_ERR(vma);
 
-	for_each_engine(engine, i915, id) {
-		struct i915_request *rq;
+		err = i915_vma_pin(vma, 0, 0, PIN_USER);
+		if (err)
+			return err;
 
-		rq = i915_request_create(engine->kernel_context);
+		rq = intel_engine_create_kernel_request(engine);
 		if (IS_ERR(rq)) {
 			i915_vma_unpin(vma);
 			return PTR_ERR(rq);
@@ -358,12 +552,13 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 		i915_vma_unlock(vma);
 
 		i915_request_add(rq);
+		i915_vma_unpin(vma);
+		if (err)
+			return err;
 	}
 
-	i915_vma_unpin(vma);
 	i915_gem_object_put(obj); /* leave it only alive via its active ref */
-
-	return err;
+	return 0;
 }
 
 static bool assert_mmap_offset(struct drm_i915_private *i915,
@@ -371,36 +566,29 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
 			       int expected)
 {
 	struct drm_i915_gem_object *obj;
-	int err;
+	struct i915_mmap_offset *mmo;
 
 	obj = i915_gem_object_create_internal(i915, size);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	err = create_mmap_offset(obj);
+	mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL);
 	i915_gem_object_put(obj);
 
-	return err == expected;
+	return PTR_ERR_OR_ZERO(mmo) == expected;
 }
 
 static void disable_retire_worker(struct drm_i915_private *i915)
 {
 	i915_gem_driver_unregister__shrinker(i915);
-
 	intel_gt_pm_get(&i915->gt);
-
-	cancel_delayed_work_sync(&i915->gem.retire_work);
-	flush_work(&i915->gem.idle_work);
+	cancel_delayed_work_sync(&i915->gt.requests.retire_work);
 }
 
 static void restore_retire_worker(struct drm_i915_private *i915)
 {
+	igt_flush_test(i915);
 	intel_gt_pm_put(&i915->gt);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	igt_flush_test(i915, I915_WAIT_LOCKED);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_driver_register__shrinker(i915);
 }
 
@@ -421,28 +609,50 @@ static int igt_mmap_offset_exhaustion(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
 	struct drm_i915_gem_object *obj;
-	struct drm_mm_node resv, *hole;
-	u64 hole_start, hole_end;
-	int loop, err;
+	struct drm_mm_node *hole, *next;
+	struct i915_mmap_offset *mmo;
+	int loop, err = 0;
 
 	/* Disable background reaper */
 	disable_retire_worker(i915);
 	GEM_BUG_ON(!i915->gt.awake);
+	intel_gt_retire_requests(&i915->gt);
+	i915_gem_drain_freed_objects(i915);
 
 	/* Trim the device mmap space to only a page */
-	memset(&resv, 0, sizeof(resv));
-	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
-		resv.start = hole_start;
-		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
-		mmap_offset_lock(i915);
-		err = drm_mm_reserve_node(mm, &resv);
-		mmap_offset_unlock(i915);
+	mmap_offset_lock(i915);
+	loop = 1; /* PAGE_SIZE units */
+	list_for_each_entry_safe(hole, next, &mm->hole_stack, hole_stack) {
+		struct drm_mm_node *resv;
+
+		resv = kzalloc(sizeof(*resv), GFP_NOWAIT);
+		if (!resv) {
+			err = -ENOMEM;
+			goto out_park;
+		}
+
+		resv->start = drm_mm_hole_node_start(hole) + loop;
+		resv->size = hole->hole_size - loop;
+		resv->color = -1ul;
+		loop = 0;
+
+		if (!resv->size) {
+			kfree(resv);
+			continue;
+		}
+
+		pr_debug("Reserving hole [%llx + %llx]\n",
+			 resv->start, resv->size);
+
+		err = drm_mm_reserve_node(mm, resv);
 		if (err) {
 			pr_err("Failed to trim VMA manager, err=%d\n", err);
+			kfree(resv);
 			goto out_park;
 		}
-		break;
 	}
+	GEM_BUG_ON(!list_is_singular(&mm->hole_stack));
+	mmap_offset_unlock(i915);
 
 	/* Just fits! */
 	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
@@ -465,9 +675,10 @@ static int igt_mmap_offset_exhaustion(void *arg)
 		goto out;
 	}
 
-	err = create_mmap_offset(obj);
-	if (err) {
+	mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL);
+	if (IS_ERR(mmo)) {
 		pr_err("Unable to insert object into reclaimed hole\n");
+		err = PTR_ERR(mmo);
 		goto err_obj;
 	}
 
@@ -490,9 +701,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
 			goto out;
 		}
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = make_obj_busy(obj);
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err) {
 			pr_err("[loop %d] Failed to busy the object\n", loop);
 			goto err_obj;
@@ -501,9 +710,15 @@ static int igt_mmap_offset_exhaustion(void *arg)
 
 out:
 	mmap_offset_lock(i915);
-	drm_mm_remove_node(&resv);
-	mmap_offset_unlock(i915);
 out_park:
+	drm_mm_for_each_node_safe(hole, next, mm) {
+		if (hole->color != -1ul)
+			continue;
+
+		drm_mm_remove_node(hole);
+		kfree(hole);
+	}
+	mmap_offset_unlock(i915);
 	restore_retire_worker(i915);
 	return err;
 err_obj:
@@ -511,11 +726,515 @@ err_obj:
 	goto out;
 }
 
+static int gtt_set(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+	void __iomem *map;
+	int err = 0;
+
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	intel_gt_pm_get(vma->vm->gt);
+	map = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out;
+	}
+
+	memset_io(map, POISON_INUSE, obj->base.size);
+	i915_vma_unpin_iomap(vma);
+
+out:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
+}
+
+static int gtt_check(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+	void __iomem *map;
+	int err = 0;
+
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	intel_gt_pm_get(vma->vm->gt);
+	map = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out;
+	}
+
+	if (memchr_inv((void __force *)map, POISON_FREE, obj->base.size)) {
+		pr_err("%s: Write via mmap did not land in backing store (GTT)\n",
+		       obj->mm.region->name);
+		err = -EINVAL;
+	}
+	i915_vma_unpin_iomap(vma);
+
+out:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
+}
+
+static int wc_set(struct drm_i915_gem_object *obj)
+{
+	void *vaddr;
+
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	memset(vaddr, POISON_INUSE, obj->base.size);
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+
+	return 0;
+}
+
+static int wc_check(struct drm_i915_gem_object *obj)
+{
+	void *vaddr;
+	int err = 0;
+
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	if (memchr_inv(vaddr, POISON_FREE, obj->base.size)) {
+		pr_err("%s: Write via mmap did not land in backing store (WC)\n",
+		       obj->mm.region->name);
+		err = -EINVAL;
+	}
+	i915_gem_object_unpin_map(obj);
+
+	return err;
+}
+
+static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
+{
+	if (type == I915_MMAP_TYPE_GTT &&
+	    !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt))
+		return false;
+
+	if (type != I915_MMAP_TYPE_GTT &&
+	    !i915_gem_object_type_has(obj,
+				      I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+				      I915_GEM_OBJECT_HAS_IOMEM))
+		return false;
+
+	return true;
+}
+
+#define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24))
+static int __igt_mmap(struct drm_i915_private *i915,
+		      struct drm_i915_gem_object *obj,
+		      enum i915_mmap_type type)
+{
+	struct i915_mmap_offset *mmo;
+	struct vm_area_struct *area;
+	unsigned long addr;
+	int err, i;
+
+	if (!can_mmap(obj, type))
+		return 0;
+
+	err = wc_set(obj);
+	if (err == -ENXIO)
+		err = gtt_set(obj);
+	if (err)
+		return err;
+
+	mmo = mmap_offset_attach(obj, type, NULL);
+	if (IS_ERR(mmo))
+		return PTR_ERR(mmo);
+
+	addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	pr_debug("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, type, addr);
+
+	area = find_vma(current->mm, addr);
+	if (!area) {
+		pr_err("%s: Did not create a vm_area_struct for the mmap\n",
+		       obj->mm.region->name);
+		err = -EINVAL;
+		goto out_unmap;
+	}
+
+	if (area->vm_private_data != mmo) {
+		pr_err("%s: vm_area_struct did not point back to our mmap_offset object!\n",
+		       obj->mm.region->name);
+		err = -EINVAL;
+		goto out_unmap;
+	}
+
+	for (i = 0; i < obj->base.size / sizeof(u32); i++) {
+		u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux)));
+		u32 x;
+
+		if (get_user(x, ux)) {
+			pr_err("%s: Unable to read from mmap, offset:%zd\n",
+			       obj->mm.region->name, i * sizeof(x));
+			err = -EFAULT;
+			goto out_unmap;
+		}
+
+		if (x != expand32(POISON_INUSE)) {
+			pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n",
+			       obj->mm.region->name,
+			       i * sizeof(x), x, expand32(POISON_INUSE));
+			err = -EINVAL;
+			goto out_unmap;
+		}
+
+		x = expand32(POISON_FREE);
+		if (put_user(x, ux)) {
+			pr_err("%s: Unable to write to mmap, offset:%zd\n",
+			       obj->mm.region->name, i * sizeof(x));
+			err = -EFAULT;
+			goto out_unmap;
+		}
+	}
+
+	if (type == I915_MMAP_TYPE_GTT)
+		intel_gt_flush_ggtt_writes(&i915->gt);
+
+	err = wc_check(obj);
+	if (err == -ENXIO)
+		err = gtt_check(obj);
+out_unmap:
+	vm_munmap(addr, obj->base.size);
+	return err;
+}
+
+static int igt_mmap(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	for_each_memory_region(mr, i915, id) {
+		unsigned long sizes[] = {
+			PAGE_SIZE,
+			mr->min_page_size,
+			SZ_4M,
+		};
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+			struct drm_i915_gem_object *obj;
+			int err;
+
+			obj = i915_gem_object_create_region(mr, sizes[i], 0);
+			if (obj == ERR_PTR(-ENODEV))
+				continue;
+
+			if (IS_ERR(obj))
+				return PTR_ERR(obj);
+
+			err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT);
+			if (err == 0)
+				err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC);
+
+			i915_gem_object_put(obj);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int __igt_mmap_gpu(struct drm_i915_private *i915,
+			  struct drm_i915_gem_object *obj,
+			  enum i915_mmap_type type)
+{
+	struct intel_engine_cs *engine;
+	struct i915_mmap_offset *mmo;
+	unsigned long addr;
+	u32 __user *ux;
+	u32 bbe;
+	int err;
+
+	/*
+	 * Verify that the mmap access into the backing store aligns with
+	 * that of the GPU, i.e. that mmap is indeed writing into the same
+	 * page as being read by the GPU.
+	 */
+
+	if (!can_mmap(obj, type))
+		return 0;
+
+	err = wc_set(obj);
+	if (err == -ENXIO)
+		err = gtt_set(obj);
+	if (err)
+		return err;
+
+	mmo = mmap_offset_attach(obj, type, NULL);
+	if (IS_ERR(mmo))
+		return PTR_ERR(mmo);
+
+	addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	ux = u64_to_user_ptr((u64)addr);
+	bbe = MI_BATCH_BUFFER_END;
+	if (put_user(bbe, ux)) {
+		pr_err("%s: Unable to write to mmap\n", obj->mm.region->name);
+		err = -EFAULT;
+		goto out_unmap;
+	}
+
+	if (type == I915_MMAP_TYPE_GTT)
+		intel_gt_flush_ggtt_writes(&i915->gt);
+
+	for_each_uabi_engine(engine, i915) {
+		struct i915_request *rq;
+		struct i915_vma *vma;
+
+		vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto out_unmap;
+		}
+
+		err = i915_vma_pin(vma, 0, 0, PIN_USER);
+		if (err)
+			goto out_unmap;
+
+		rq = i915_request_create(engine->kernel_context);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out_unpin;
+		}
+
+		i915_vma_lock(vma);
+		err = i915_request_await_object(rq, vma->obj, false);
+		if (err == 0)
+			err = i915_vma_move_to_active(vma, rq, 0);
+		i915_vma_unlock(vma);
+
+		err = engine->emit_bb_start(rq, vma->node.start, 0, 0);
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+			struct drm_printer p =
+				drm_info_printer(engine->i915->drm.dev);
+
+			pr_err("%s(%s, %s): Failed to execute batch\n",
+			       __func__, engine->name, obj->mm.region->name);
+			intel_engine_dump(engine, &p,
+					  "%s\n", engine->name);
+
+			intel_gt_set_wedged(engine->gt);
+			err = -EIO;
+		}
+		i915_request_put(rq);
+
+out_unpin:
+		i915_vma_unpin(vma);
+		if (err)
+			goto out_unmap;
+	}
+
+out_unmap:
+	vm_munmap(addr, obj->base.size);
+	return err;
+}
+
+static int igt_mmap_gpu(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	for_each_memory_region(mr, i915, id) {
+		struct drm_i915_gem_object *obj;
+		int err;
+
+		obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0);
+		if (obj == ERR_PTR(-ENODEV))
+			continue;
+
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT);
+		if (err == 0)
+			err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC);
+
+		i915_gem_object_put(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
+{
+	if (!pte_present(*pte) || pte_none(*pte)) {
+		pr_err("missing PTE:%lx\n",
+		       (addr - (unsigned long)data) >> PAGE_SHIFT);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int check_absent_pte(pte_t *pte, unsigned long addr, void *data)
+{
+	if (pte_present(*pte) && !pte_none(*pte)) {
+		pr_err("present PTE:%lx; expected to be revoked\n",
+		       (addr - (unsigned long)data) >> PAGE_SHIFT);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int check_present(unsigned long addr, unsigned long len)
+{
+	return apply_to_page_range(current->mm, addr, len,
+				   check_present_pte, (void *)addr);
+}
+
+static int check_absent(unsigned long addr, unsigned long len)
+{
+	return apply_to_page_range(current->mm, addr, len,
+				   check_absent_pte, (void *)addr);
+}
+
+static int prefault_range(u64 start, u64 len)
+{
+	const char __user *addr, *end;
+	char __maybe_unused c;
+	int err;
+
+	addr = u64_to_user_ptr(start);
+	end = addr + len;
+
+	for (; addr < end; addr += PAGE_SIZE) {
+		err = __get_user(c, addr);
+		if (err)
+			return err;
+	}
+
+	return __get_user(c, end - 1);
+}
+
+static int __igt_mmap_revoke(struct drm_i915_private *i915,
+			     struct drm_i915_gem_object *obj,
+			     enum i915_mmap_type type)
+{
+	struct i915_mmap_offset *mmo;
+	unsigned long addr;
+	int err;
+
+	if (!can_mmap(obj, type))
+		return 0;
+
+	mmo = mmap_offset_attach(obj, type, NULL);
+	if (IS_ERR(mmo))
+		return PTR_ERR(mmo);
+
+	addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	err = prefault_range(addr, obj->base.size);
+	if (err)
+		goto out_unmap;
+
+	GEM_BUG_ON(mmo->mmap_type == I915_MMAP_TYPE_GTT &&
+		   !atomic_read(&obj->bind_count));
+
+	err = check_present(addr, obj->base.size);
+	if (err) {
+		pr_err("%s: was not present\n", obj->mm.region->name);
+		goto out_unmap;
+	}
+
+	/*
+	 * After unbinding the object from the GGTT, its address may be reused
+	 * for other objects. Ergo we have to revoke the previous mmap PTE
+	 * access as it no longer points to the same object.
+	 */
+	err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+	if (err) {
+		pr_err("Failed to unbind object!\n");
+		goto out_unmap;
+	}
+	GEM_BUG_ON(atomic_read(&obj->bind_count));
+
+	if (type != I915_MMAP_TYPE_GTT) {
+		__i915_gem_object_put_pages(obj);
+		if (i915_gem_object_has_pages(obj)) {
+			pr_err("Failed to put-pages object!\n");
+			err = -EINVAL;
+			goto out_unmap;
+		}
+	}
+
+	err = check_absent(addr, obj->base.size);
+	if (err) {
+		pr_err("%s: was not absent\n", obj->mm.region->name);
+		goto out_unmap;
+	}
+
+out_unmap:
+	vm_munmap(addr, obj->base.size);
+	return err;
+}
+
+static int igt_mmap_revoke(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	for_each_memory_region(mr, i915, id) {
+		struct drm_i915_gem_object *obj;
+		int err;
+
+		obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0);
+		if (obj == ERR_PTR(-ENODEV))
+			continue;
+
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT);
+		if (err == 0)
+			err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC);
+
+		i915_gem_object_put(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_partial_tiling),
+		SUBTEST(igt_smoke_tiling),
 		SUBTEST(igt_mmap_offset_exhaustion),
+		SUBTEST(igt_mmap),
+		SUBTEST(igt_mmap_revoke),
+		SUBTEST(igt_mmap_gpu),
 	};
 
 	return i915_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
index c21d747e7d05..62077fe46715 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
@@ -3,40 +3,247 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include <linux/sort.h>
+
 #include "gt/intel_gt.h"
+#include "gt/intel_engine_user.h"
 
 #include "i915_selftest.h"
 
+#include "gem/i915_gem_context.h"
 #include "selftests/igt_flush_test.h"
+#include "selftests/i915_random.h"
 #include "selftests/mock_drm.h"
 #include "huge_gem_object.h"
 #include "mock_context.h"
 
-static int igt_fill_blt(void *arg)
+static int wrap_ktime_compare(const void *A, const void *B)
+{
+	const ktime_t *a = A, *b = B;
+
+	return ktime_compare(*a, *b);
+}
+
+static int __perf_fill_blt(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	int inst = 0;
+
+	do {
+		struct intel_engine_cs *engine;
+		ktime_t t[5];
+		int pass;
+		int err;
+
+		engine = intel_engine_lookup_user(i915,
+						  I915_ENGINE_CLASS_COPY,
+						  inst++);
+		if (!engine)
+			return 0;
+
+		intel_engine_pm_get(engine);
+		for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
+			struct intel_context *ce = engine->kernel_context;
+			ktime_t t0, t1;
+
+			t0 = ktime_get();
+
+			err = i915_gem_object_fill_blt(obj, ce, 0);
+			if (err)
+				break;
+
+			err = i915_gem_object_wait(obj,
+						   I915_WAIT_ALL,
+						   MAX_SCHEDULE_TIMEOUT);
+			if (err)
+				break;
+
+			t1 = ktime_get();
+			t[pass] = ktime_sub(t1, t0);
+		}
+		intel_engine_pm_put(engine);
+		if (err)
+			return err;
+
+		sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
+		pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
+			engine->name,
+			obj->base.size >> 10,
+			div64_u64(mul_u32_u32(4 * obj->base.size,
+					      1000 * 1000 * 1000),
+				  t[1] + 2 * t[2] + t[3]) >> 20);
+	} while (1);
+}
+
+static int perf_fill_blt(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct intel_context *ce = i915->engine[BCS0]->kernel_context;
-	struct drm_i915_gem_object *obj;
+	static const unsigned long sizes[] = {
+		SZ_4K,
+		SZ_64K,
+		SZ_2M,
+		SZ_64M
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+		struct drm_i915_gem_object *obj;
+		int err;
+
+		obj = i915_gem_object_create_internal(i915, sizes[i]);
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = __perf_fill_blt(obj);
+		i915_gem_object_put(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int __perf_copy_blt(struct drm_i915_gem_object *src,
+			   struct drm_i915_gem_object *dst)
+{
+	struct drm_i915_private *i915 = to_i915(src->base.dev);
+	int inst = 0;
+
+	do {
+		struct intel_engine_cs *engine;
+		ktime_t t[5];
+		int pass;
+		int err = 0;
+
+		engine = intel_engine_lookup_user(i915,
+						  I915_ENGINE_CLASS_COPY,
+						  inst++);
+		if (!engine)
+			return 0;
+
+		intel_engine_pm_get(engine);
+		for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
+			struct intel_context *ce = engine->kernel_context;
+			ktime_t t0, t1;
+
+			t0 = ktime_get();
+
+			err = i915_gem_object_copy_blt(src, dst, ce);
+			if (err)
+				break;
+
+			err = i915_gem_object_wait(dst,
+						   I915_WAIT_ALL,
+						   MAX_SCHEDULE_TIMEOUT);
+			if (err)
+				break;
+
+			t1 = ktime_get();
+			t[pass] = ktime_sub(t1, t0);
+		}
+		intel_engine_pm_put(engine);
+		if (err)
+			return err;
+
+		sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
+		pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
+			engine->name,
+			src->base.size >> 10,
+			div64_u64(mul_u32_u32(4 * src->base.size,
+					      1000 * 1000 * 1000),
+				  t[1] + 2 * t[2] + t[3]) >> 20);
+	} while (1);
+}
+
+static int perf_copy_blt(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static const unsigned long sizes[] = {
+		SZ_4K,
+		SZ_64K,
+		SZ_2M,
+		SZ_64M
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+		struct drm_i915_gem_object *src, *dst;
+		int err;
+
+		src = i915_gem_object_create_internal(i915, sizes[i]);
+		if (IS_ERR(src))
+			return PTR_ERR(src);
+
+		dst = i915_gem_object_create_internal(i915, sizes[i]);
+		if (IS_ERR(dst)) {
+			err = PTR_ERR(dst);
+			goto err_src;
+		}
+
+		err = __perf_copy_blt(src, dst);
+
+		i915_gem_object_put(dst);
+err_src:
+		i915_gem_object_put(src);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+struct igt_thread_arg {
+	struct drm_i915_private *i915;
+	struct i915_gem_context *ctx;
+	struct file *file;
 	struct rnd_state prng;
+	unsigned int n_cpus;
+};
+
+static int igt_fill_blt_thread(void *arg)
+{
+	struct igt_thread_arg *thread = arg;
+	struct drm_i915_private *i915 = thread->i915;
+	struct rnd_state *prng = &thread->prng;
+	struct drm_i915_gem_object *obj;
+	struct i915_gem_context *ctx;
+	struct intel_context *ce;
+	unsigned int prio;
 	IGT_TIMEOUT(end);
-	u32 *vaddr;
-	int err = 0;
+	int err;
+
+	ctx = thread->ctx;
+	if (!ctx) {
+		ctx = live_context(i915, thread->file);
+		if (IS_ERR(ctx))
+			return PTR_ERR(ctx);
 
-	prandom_seed_state(&prng, i915_selftest.random_seed);
+		prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
+		ctx->sched.priority = I915_USER_PRIORITY(prio);
+	}
 
-	/*
-	 * XXX: needs some threads to scale all these tests, also maybe throw
-	 * in submission from higher priority context to see if we are
-	 * preempted for very large objects...
-	 */
+	ce = i915_gem_context_get_engine(ctx, BCS0);
+	GEM_BUG_ON(IS_ERR(ce));
 
 	do {
 		const u32 max_block_size = S16_MAX * PAGE_SIZE;
-		u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
-		u32 phys_sz = sz % (max_block_size + 1);
-		u32 val = prandom_u32_state(&prng);
+		u32 val = prandom_u32_state(prng);
+		u64 total = ce->vm->total;
+		u32 phys_sz;
+		u32 sz;
+		u32 *vaddr;
 		u32 i;
 
+		/*
+		 * If we have a tiny shared address space, like for the GGTT
+		 * then we can't be too greedy.
+		 */
+		if (i915_is_ggtt(ce->vm))
+			total = div64_u64(total, thread->n_cpus);
+
+		sz = min_t(u64, total >> 4, prandom_u32_state(prng));
+		phys_sz = sz % (max_block_size + 1);
+
 		sz = round_up(sz, PAGE_SIZE);
 		phys_sz = round_up(phys_sz, PAGE_SIZE);
 
@@ -65,9 +272,7 @@ static int igt_fill_blt(void *arg)
 		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 			obj->cache_dirty = true;
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = i915_gem_object_fill_blt(obj, ce, val);
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			goto err_unpin;
 
@@ -100,28 +305,50 @@ err_flush:
 	if (err == -ENOMEM)
 		err = 0;
 
+	intel_context_put(ce);
 	return err;
 }
 
-static int igt_copy_blt(void *arg)
+static int igt_copy_blt_thread(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
-	struct intel_context *ce = i915->engine[BCS0]->kernel_context;
+	struct igt_thread_arg *thread = arg;
+	struct drm_i915_private *i915 = thread->i915;
+	struct rnd_state *prng = &thread->prng;
 	struct drm_i915_gem_object *src, *dst;
-	struct rnd_state prng;
+	struct i915_gem_context *ctx;
+	struct intel_context *ce;
+	unsigned int prio;
 	IGT_TIMEOUT(end);
-	u32 *vaddr;
-	int err = 0;
+	int err;
 
-	prandom_seed_state(&prng, i915_selftest.random_seed);
+	ctx = thread->ctx;
+	if (!ctx) {
+		ctx = live_context(i915, thread->file);
+		if (IS_ERR(ctx))
+			return PTR_ERR(ctx);
+
+		prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
+		ctx->sched.priority = I915_USER_PRIORITY(prio);
+	}
+
+	ce = i915_gem_context_get_engine(ctx, BCS0);
+	GEM_BUG_ON(IS_ERR(ce));
 
 	do {
 		const u32 max_block_size = S16_MAX * PAGE_SIZE;
-		u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
-		u32 phys_sz = sz % (max_block_size + 1);
-		u32 val = prandom_u32_state(&prng);
+		u32 val = prandom_u32_state(prng);
+		u64 total = ce->vm->total;
+		u32 phys_sz;
+		u32 sz;
+		u32 *vaddr;
 		u32 i;
 
+		if (i915_is_ggtt(ce->vm))
+			total = div64_u64(total, thread->n_cpus);
+
+		sz = min_t(u64, total >> 4, prandom_u32_state(prng));
+		phys_sz = sz % (max_block_size + 1);
+
 		sz = round_up(sz, PAGE_SIZE);
 		phys_sz = round_up(phys_sz, PAGE_SIZE);
 
@@ -166,9 +393,7 @@ static int igt_copy_blt(void *arg)
 		if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 			dst->cache_dirty = true;
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = i915_gem_object_copy_blt(src, dst, ce);
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			goto err_unpin;
 
@@ -205,14 +430,113 @@ err_flush:
 	if (err == -ENOMEM)
 		err = 0;
 
+	intel_context_put(ce);
 	return err;
 }
 
+static int igt_threaded_blt(struct drm_i915_private *i915,
+			    int (*blt_fn)(void *arg),
+			    unsigned int flags)
+#define SINGLE_CTX BIT(0)
+{
+	struct igt_thread_arg *thread;
+	struct task_struct **tsk;
+	unsigned int n_cpus, i;
+	I915_RND_STATE(prng);
+	int err = 0;
+
+	n_cpus = num_online_cpus() + 1;
+
+	tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
+	if (!tsk)
+		return 0;
+
+	thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
+	if (!thread)
+		goto out_tsk;
+
+	thread[0].file = mock_file(i915);
+	if (IS_ERR(thread[0].file)) {
+		err = PTR_ERR(thread[0].file);
+		goto out_thread;
+	}
+
+	if (flags & SINGLE_CTX) {
+		thread[0].ctx = live_context(i915, thread[0].file);
+		if (IS_ERR(thread[0].ctx)) {
+			err = PTR_ERR(thread[0].ctx);
+			goto out_file;
+		}
+	}
+
+	for (i = 0; i < n_cpus; ++i) {
+		thread[i].i915 = i915;
+		thread[i].file = thread[0].file;
+		thread[i].ctx = thread[0].ctx;
+		thread[i].n_cpus = n_cpus;
+		thread[i].prng =
+			I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
+
+		tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
+		if (IS_ERR(tsk[i])) {
+			err = PTR_ERR(tsk[i]);
+			break;
+		}
+
+		get_task_struct(tsk[i]);
+	}
+
+	yield(); /* start all threads before we kthread_stop() */
+
+	for (i = 0; i < n_cpus; ++i) {
+		int status;
+
+		if (IS_ERR_OR_NULL(tsk[i]))
+			continue;
+
+		status = kthread_stop(tsk[i]);
+		if (status && !err)
+			err = status;
+
+		put_task_struct(tsk[i]);
+	}
+
+out_file:
+	fput(thread[0].file);
+out_thread:
+	kfree(thread);
+out_tsk:
+	kfree(tsk);
+	return err;
+}
+
+static int igt_fill_blt(void *arg)
+{
+	return igt_threaded_blt(arg, igt_fill_blt_thread, 0);
+}
+
+static int igt_fill_blt_ctx0(void *arg)
+{
+	return igt_threaded_blt(arg, igt_fill_blt_thread, SINGLE_CTX);
+}
+
+static int igt_copy_blt(void *arg)
+{
+	return igt_threaded_blt(arg, igt_copy_blt_thread, 0);
+}
+
+static int igt_copy_blt_ctx0(void *arg)
+{
+	return igt_threaded_blt(arg, igt_copy_blt_thread, SINGLE_CTX);
+}
+
 int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_fill_blt),
+		SUBTEST(igt_fill_blt_ctx0),
 		SUBTEST(igt_copy_blt),
+		SUBTEST(igt_copy_blt_ctx0),
 	};
 
 	if (intel_gt_is_wedged(&i915->gt))
@@ -223,3 +547,16 @@ int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
 
 	return i915_live_subtests(tests, i915);
 }
+
+int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_fill_blt),
+		SUBTEST(perf_copy_blt),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_live_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
index 94a15e3f6db8..34932871b3a5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -25,9 +25,7 @@ static int mock_phys_object(void *arg)
 		goto out;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
 	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
-	mutex_unlock(&i915->drm.struct_mutex);
 	if (err) {
 		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
 		goto out_obj;
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
index 57ece53c1075..6718da20f35d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -9,6 +9,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_context.h"
+#include "gt/intel_gt.h"
 #include "i915_vma.h"
 #include "i915_drv.h"
 
@@ -84,6 +85,8 @@ igt_emit_store_dw(struct i915_vma *vma,
 	*cmd = MI_BATCH_BUFFER_END;
 	i915_gem_object_unpin_map(obj);
 
+	intel_gt_chipset_flush(vma->vm->gt);
+
 	vma = i915_vma_instance(obj, vma->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
@@ -101,40 +104,35 @@ err:
 	return ERR_PTR(err);
 }
 
-int igt_gpu_fill_dw(struct i915_vma *vma,
-		    struct i915_gem_context *ctx,
-		    struct intel_engine_cs *engine,
-		    u64 offset,
-		    unsigned long count,
-		    u32 val)
+int igt_gpu_fill_dw(struct intel_context *ce,
+		    struct i915_vma *vma, u64 offset,
+		    unsigned long count, u32 val)
 {
-	struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	unsigned int flags;
 	int err;
 
-	GEM_BUG_ON(vma->size > vm->total);
-	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
 	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 
 	batch = igt_emit_store_dw(vma, offset, count, val);
 	if (IS_ERR(batch))
 		return PTR_ERR(batch);
 
-	rq = igt_request_alloc(ctx, engine);
+	rq = intel_context_create_request(ce);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_batch;
 	}
 
 	flags = 0;
-	if (INTEL_GEN(vm->i915) <= 5)
+	if (INTEL_GEN(ce->vm->i915) <= 5)
 		flags |= I915_DISPATCH_SECURE;
 
-	err = engine->emit_bb_start(rq,
-				    batch->node.start, batch->node.size,
-				    flags);
+	err = rq->engine->emit_bb_start(rq,
+					batch->node.start, batch->node.size,
+					flags);
 	if (err)
 		goto err_request;
 
@@ -156,9 +154,7 @@ int igt_gpu_fill_dw(struct i915_vma *vma,
 
 	i915_request_add(rq);
 
-	i915_vma_unpin(batch);
-	i915_vma_close(batch);
-	i915_vma_put(batch);
+	i915_vma_unpin_and_release(&batch, 0);
 
 	return 0;
 
@@ -167,7 +163,6 @@ skip_request:
 err_request:
 	i915_request_add(rq);
 err_batch:
-	i915_vma_unpin(batch);
-	i915_vma_put(batch);
+	i915_vma_unpin_and_release(&batch, 0);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
index 361a7ef866b0..4221cf84d175 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
@@ -11,9 +11,11 @@
 
 struct i915_request;
 struct i915_gem_context;
-struct intel_engine_cs;
 struct i915_vma;
 
+struct intel_context;
+struct intel_engine_cs;
+
 struct i915_request *
 igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
 
@@ -23,11 +25,8 @@ igt_emit_store_dw(struct i915_vma *vma,
 		  unsigned long count,
 		  u32 val);
 
-int igt_gpu_fill_dw(struct i915_vma *vma,
-		    struct i915_gem_context *ctx,
-		    struct intel_engine_cs *engine,
-		    u64 offset,
-		    unsigned long count,
-		    u32 val);
+int igt_gpu_fill_dw(struct intel_context *ce,
+		    struct i915_vma *vma, u64 offset,
+		    unsigned long count, u32 val);
 
 #endif /* __IGT_GEM_UTILS_H__ */
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index be8974ccff24..384143aa7776 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -5,6 +5,7 @@
  */
 
 #include "mock_context.h"
+#include "selftests/mock_drm.h"
 #include "selftests/mock_gtt.h"
 
 struct i915_gem_context *
@@ -13,7 +14,6 @@ mock_context(struct drm_i915_private *i915,
 {
 	struct i915_gem_context *ctx;
 	struct i915_gem_engines *e;
-	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -23,6 +23,8 @@ mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
+	i915_gem_context_set_persistence(ctx);
+
 	mutex_init(&ctx->engines_mutex);
 	e = default_engines(ctx);
 	if (IS_ERR(e))
@@ -30,32 +32,26 @@ mock_context(struct drm_i915_private *i915,
 	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->hw_id_link);
 	mutex_init(&ctx->mutex);
 
-	ret = i915_gem_context_pin_hw_id(ctx);
-	if (ret < 0)
-		goto err_engines;
-
 	if (name) {
 		struct i915_ppgtt *ppgtt;
 
-		ctx->name = kstrdup(name, GFP_KERNEL);
-		if (!ctx->name)
-			goto err_put;
+		strncpy(ctx->name, name, sizeof(ctx->name));
 
 		ppgtt = mock_ppgtt(i915, name);
 		if (!ppgtt)
 			goto err_put;
 
+		mutex_lock(&ctx->mutex);
 		__set_ppgtt(ctx, &ppgtt->vm);
+		mutex_unlock(&ctx->mutex);
+
 		i915_vm_put(&ppgtt->vm);
 	}
 
 	return ctx;
 
-err_engines:
-	free_engines(rcu_access_pointer(ctx->engines));
 err_free:
 	kfree(ctx);
 	return NULL;
@@ -73,22 +69,21 @@ void mock_context_close(struct i915_gem_context *ctx)
 
 void mock_init_contexts(struct drm_i915_private *i915)
 {
-	init_contexts(i915);
+	init_contexts(&i915->gem.contexts);
 }
 
 struct i915_gem_context *
-live_context(struct drm_i915_private *i915, struct drm_file *file)
+live_context(struct drm_i915_private *i915, struct file *file)
 {
 	struct i915_gem_context *ctx;
 	int err;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
+	u32 id;
 
 	ctx = i915_gem_create_context(i915, 0);
 	if (IS_ERR(ctx))
 		return ctx;
 
-	err = gem_context_register(ctx, file->driver_priv);
+	err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id);
 	if (err < 0)
 		goto err_ctx;
 
@@ -102,7 +97,16 @@ err_ctx:
 struct i915_gem_context *
 kernel_context(struct drm_i915_private *i915)
 {
-	return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL);
+	struct i915_gem_context *ctx;
+
+	ctx = i915_gem_create_context(i915, 0);
+	if (IS_ERR(ctx))
+		return ctx;
+
+	i915_gem_context_clear_bannable(ctx);
+	i915_gem_context_set_persistence(ctx);
+
+	return ctx;
 }
 
 void kernel_context_close(struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
index 0b926653914f..fb83d2f09212 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
@@ -7,6 +7,9 @@
 #ifndef __MOCK_CONTEXT_H
 #define __MOCK_CONTEXT_H
 
+struct file;
+struct drm_i915_private;
+
 void mock_init_contexts(struct drm_i915_private *i915);
 
 struct i915_gem_context *
@@ -16,7 +19,7 @@ mock_context(struct drm_i915_private *i915,
 void mock_context_close(struct i915_gem_context *ctx);
 
 struct i915_gem_context *
-live_context(struct drm_i915_private *i915, struct drm_file *file);
+live_context(struct drm_i915_private *i915, struct file *file);
 
 struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
 void kernel_context_close(struct i915_gem_context *ctx);
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
index b9e059d4328a..9272bef57092 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
@@ -76,20 +76,6 @@ static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
 	vm_unmap_ram(vaddr, mock->npages);
 }
 
-static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num)
-{
-	struct mock_dmabuf *mock = to_mock(dma_buf);
-
-	return kmap(mock->pages[page_num]);
-}
-
-static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr)
-{
-	struct mock_dmabuf *mock = to_mock(dma_buf);
-
-	return kunmap(mock->pages[page_num]);
-}
-
 static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
 {
 	return -ENODEV;
@@ -99,8 +85,6 @@ static const struct dma_buf_ops mock_dmabuf_ops =  {
 	.map_dma_buf = mock_map_dma_buf,
 	.unmap_dma_buf = mock_unmap_dma_buf,
 	.release = mock_dmabuf_release,
-	.map = mock_dmabuf_kmap,
-	.unmap = mock_dmabuf_kunmap,
 	.mmap = mock_dmabuf_mmap,
 	.vmap = mock_dmabuf_vmap,
 	.vunmap = mock_dmabuf_vunmap,
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
index f0f8bbd82dfc..22818bbb139d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
@@ -14,7 +14,7 @@ struct mock_dmabuf {
 	struct page *pages[];
 };
 
-static struct mock_dmabuf *to_mock(struct dma_buf *buf)
+static inline struct mock_dmabuf *to_mock(struct dma_buf *buf)
 {
 	return buf->priv;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
index 370360b4a148..688511afa883 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
@@ -7,6 +7,8 @@
 #ifndef __MOCK_GEM_OBJECT_H__
 #define __MOCK_GEM_OBJECT_H__
 
+#include "gem/i915_gem_object_types.h"
+
 struct mock_object {
 	struct drm_i915_gem_object base;
 };
diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile
deleted file mode 100644
index 7e73aa587967..000000000000
--- a/drivers/gpu/drm/i915/gt/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c
new file mode 100644
index 000000000000..6a5e9ab20b94
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "debugfs_engines.h"
+#include "debugfs_gt.h"
+#include "i915_drv.h" /* for_each_engine! */
+#include "intel_engine.h"
+
+static int engines_show(struct seq_file *m, void *data)
+{
+	struct intel_gt *gt = m->private;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct drm_printer p;
+
+	p = drm_seq_file_printer(m);
+	for_each_engine(engine, gt, id)
+		intel_engine_dump(engine, &p, "%s\n", engine->name);
+
+	return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(engines);
+
+void debugfs_engines_register(struct intel_gt *gt, struct dentry *root)
+{
+	static const struct debugfs_gt_file files[] = {
+		{ "engines", &engines_fops },
+	};
+
+	debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.h b/drivers/gpu/drm/i915/gt/debugfs_engines.h
new file mode 100644
index 000000000000..f69257eaa1cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_engines.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_ENGINES_H
+#define DEBUGFS_ENGINES_H
+
+struct intel_gt;
+struct dentry;
+
+void debugfs_engines_register(struct intel_gt *gt, struct dentry *root);
+
+#endif /* DEBUGFS_ENGINES_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c
new file mode 100644
index 000000000000..75255aaacaed
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include "debugfs_engines.h"
+#include "debugfs_gt.h"
+#include "debugfs_gt_pm.h"
+#include "i915_drv.h"
+
+void debugfs_gt_register(struct intel_gt *gt)
+{
+	struct dentry *root;
+
+	if (!gt->i915->drm.primary->debugfs_root)
+		return;
+
+	root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root);
+	if (IS_ERR(root))
+		return;
+
+	debugfs_engines_register(gt, root);
+	debugfs_gt_pm_register(gt, root);
+}
+
+void debugfs_gt_register_files(struct intel_gt *gt,
+			       struct dentry *root,
+			       const struct debugfs_gt_file *files,
+			       unsigned long count)
+{
+	while (count--) {
+		if (!files->eval || files->eval(gt))
+			debugfs_create_file(files->name,
+					    0444, root, gt,
+					    files->fops);
+
+		files++;
+	}
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h
new file mode 100644
index 000000000000..4ea0f06cda8f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GT_H
+#define DEBUGFS_GT_H
+
+#include <linux/file.h>
+
+struct intel_gt;
+
+#define DEFINE_GT_DEBUGFS_ATTRIBUTE(__name)				\
+	static int __name ## _open(struct inode *inode, struct file *file) \
+{									\
+	return single_open(file, __name ## _show, inode->i_private);	\
+}									\
+static const struct file_operations __name ## _fops = {			\
+	.owner = THIS_MODULE,						\
+	.open = __name ## _open,					\
+	.read = seq_read,						\
+	.llseek = seq_lseek,						\
+	.release = single_release,					\
+}
+
+void debugfs_gt_register(struct intel_gt *gt);
+
+struct debugfs_gt_file {
+	const char *name;
+	const struct file_operations *fops;
+	bool (*eval)(const struct intel_gt *gt);
+};
+
+void debugfs_gt_register_files(struct intel_gt *gt,
+			       struct dentry *root,
+			       const struct debugfs_gt_file *files,
+			       unsigned long count);
+
+#endif /* DEBUGFS_GT_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
new file mode 100644
index 000000000000..059c9e5c002e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/seq_file.h>
+
+#include "debugfs_gt.h"
+#include "debugfs_gt_pm.h"
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_llc.h"
+#include "intel_rc6.h"
+#include "intel_rps.h"
+#include "intel_runtime_pm.h"
+#include "intel_sideband.h"
+#include "intel_uncore.h"
+
+static int fw_domains_show(struct seq_file *m, void *data)
+{
+	struct intel_gt *gt = m->private;
+	struct intel_uncore *uncore = gt->uncore;
+	struct intel_uncore_forcewake_domain *fw_domain;
+	unsigned int tmp;
+
+	seq_printf(m, "user.bypass_count = %u\n",
+		   uncore->user_forcewake_count);
+
+	for_each_fw_domain(fw_domain, uncore, tmp)
+		seq_printf(m, "%s.wake_count = %u\n",
+			   intel_uncore_forcewake_domain_to_str(fw_domain->id),
+			   READ_ONCE(fw_domain->wake_count));
+
+	return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains);
+
+static void print_rc6_res(struct seq_file *m,
+			  const char *title,
+			  const i915_reg_t reg)
+{
+	struct intel_gt *gt = m->private;
+	intel_wakeref_t wakeref;
+
+	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+		seq_printf(m, "%s %u (%llu us)\n", title,
+			   intel_uncore_read(gt->uncore, reg),
+			   intel_rc6_residency_us(&gt->rc6, reg));
+}
+
+static int vlv_drpc(struct seq_file *m)
+{
+	struct intel_gt *gt = m->private;
+	struct intel_uncore *uncore = gt->uncore;
+	u32 rcctl1, pw_status;
+
+	pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS);
+	rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+
+	seq_printf(m, "RC6 Enabled: %s\n",
+		   yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE |
+					GEN6_RC_CTL_EI_MODE(1))));
+	seq_printf(m, "Render Power Well: %s\n",
+		   (pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down");
+	seq_printf(m, "Media Power Well: %s\n",
+		   (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
+
+	print_rc6_res(m, "Render RC6 residency since boot:", VLV_GT_RENDER_RC6);
+	print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6);
+
+	return fw_domains_show(m, NULL);
+}
+
+static int gen6_drpc(struct seq_file *m)
+{
+	struct intel_gt *gt = m->private;
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_uncore *uncore = gt->uncore;
+	u32 gt_core_status, rcctl1, rc6vids = 0;
+	u32 gen9_powergate_enable = 0, gen9_powergate_status = 0;
+
+	gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS);
+
+	rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+	if (INTEL_GEN(i915) >= 9) {
+		gen9_powergate_enable =
+			intel_uncore_read(uncore, GEN9_PG_ENABLE);
+		gen9_powergate_status =
+			intel_uncore_read(uncore, GEN9_PWRGT_DOMAIN_STATUS);
+	}
+
+	if (INTEL_GEN(i915) <= 7)
+		sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
+				       &rc6vids, NULL);
+
+	seq_printf(m, "RC1e Enabled: %s\n",
+		   yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
+	seq_printf(m, "RC6 Enabled: %s\n",
+		   yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
+	if (INTEL_GEN(i915) >= 9) {
+		seq_printf(m, "Render Well Gating Enabled: %s\n",
+			   yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE));
+		seq_printf(m, "Media Well Gating Enabled: %s\n",
+			   yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE));
+	}
+	seq_printf(m, "Deep RC6 Enabled: %s\n",
+		   yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE));
+	seq_printf(m, "Deepest RC6 Enabled: %s\n",
+		   yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE));
+	seq_puts(m, "Current RC state: ");
+	switch (gt_core_status & GEN6_RCn_MASK) {
+	case GEN6_RC0:
+		if (gt_core_status & GEN6_CORE_CPD_STATE_MASK)
+			seq_puts(m, "Core Power Down\n");
+		else
+			seq_puts(m, "on\n");
+		break;
+	case GEN6_RC3:
+		seq_puts(m, "RC3\n");
+		break;
+	case GEN6_RC6:
+		seq_puts(m, "RC6\n");
+		break;
+	case GEN6_RC7:
+		seq_puts(m, "RC7\n");
+		break;
+	default:
+		seq_puts(m, "Unknown\n");
+		break;
+	}
+
+	seq_printf(m, "Core Power Down: %s\n",
+		   yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK));
+	if (INTEL_GEN(i915) >= 9) {
+		seq_printf(m, "Render Power Well: %s\n",
+			   (gen9_powergate_status &
+			    GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
+		seq_printf(m, "Media Power Well: %s\n",
+			   (gen9_powergate_status &
+			    GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
+	}
+
+	/* Not exactly sure what this is */
+	print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:",
+		      GEN6_GT_GFX_RC6_LOCKED);
+	print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6);
+	print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p);
+	print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp);
+
+	if (INTEL_GEN(i915) <= 7) {
+		seq_printf(m, "RC6   voltage: %dmV\n",
+			   GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff)));
+		seq_printf(m, "RC6+  voltage: %dmV\n",
+			   GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff)));
+		seq_printf(m, "RC6++ voltage: %dmV\n",
+			   GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff)));
+	}
+
+	return fw_domains_show(m, NULL);
+}
+
+static int ilk_drpc(struct seq_file *m)
+{
+	struct intel_gt *gt = m->private;
+	struct intel_uncore *uncore = gt->uncore;
+	u32 rgvmodectl, rstdbyctl;
+	u16 crstandvid;
+
+	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+	rstdbyctl = intel_uncore_read(uncore, RSTDBYCTL);
+	crstandvid = intel_uncore_read16(uncore, CRSTANDVID);
+
+	seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
+	seq_printf(m, "Boost freq: %d\n",
+		   (rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >>
+		   MEMMODE_BOOST_FREQ_SHIFT);
+	seq_printf(m, "HW control enabled: %s\n",
+		   yesno(rgvmodectl & MEMMODE_HWIDLE_EN));
+	seq_printf(m, "SW control enabled: %s\n",
+		   yesno(rgvmodectl & MEMMODE_SWMODE_EN));
+	seq_printf(m, "Gated voltage change: %s\n",
+		   yesno(rgvmodectl & MEMMODE_RCLK_GATE));
+	seq_printf(m, "Starting frequency: P%d\n",
+		   (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT);
+	seq_printf(m, "Max P-state: P%d\n",
+		   (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT);
+	seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK));
+	seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f));
+	seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f));
+	seq_printf(m, "Render standby enabled: %s\n",
+		   yesno(!(rstdbyctl & RCX_SW_EXIT)));
+	seq_puts(m, "Current RS state: ");
+	switch (rstdbyctl & RSX_STATUS_MASK) {
+	case RSX_STATUS_ON:
+		seq_puts(m, "on\n");
+		break;
+	case RSX_STATUS_RC1:
+		seq_puts(m, "RC1\n");
+		break;
+	case RSX_STATUS_RC1E:
+		seq_puts(m, "RC1E\n");
+		break;
+	case RSX_STATUS_RS1:
+		seq_puts(m, "RS1\n");
+		break;
+	case RSX_STATUS_RS2:
+		seq_puts(m, "RS2 (RC6)\n");
+		break;
+	case RSX_STATUS_RS3:
+		seq_puts(m, "RC3 (RC6+)\n");
+		break;
+	default:
+		seq_puts(m, "unknown\n");
+		break;
+	}
+
+	return 0;
+}
+
+static int drpc_show(struct seq_file *m, void *unused)
+{
+	struct intel_gt *gt = m->private;
+	struct drm_i915_private *i915 = gt->i915;
+	intel_wakeref_t wakeref;
+	int err = -ENODEV;
+
+	with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
+		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+			err = vlv_drpc(m);
+		else if (INTEL_GEN(i915) >= 6)
+			err = gen6_drpc(m);
+		else
+			err = ilk_drpc(m);
+	}
+
+	return err;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc);
+
+static int frequency_show(struct seq_file *m, void *unused)
+{
+	struct intel_gt *gt = m->private;
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_uncore *uncore = gt->uncore;
+	struct intel_rps *rps = &gt->rps;
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(uncore->rpm);
+
+	if (IS_GEN(i915, 5)) {
+		u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+		u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK);
+
+		seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
+		seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f);
+		seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >>
+			   MEMSTAT_VID_SHIFT);
+		seq_printf(m, "Current P-state: %d\n",
+			   (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
+	} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+		u32 rpmodectl, freq_sts;
+
+		rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
+		seq_printf(m, "Video Turbo Mode: %s\n",
+			   yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
+		seq_printf(m, "HW control enabled: %s\n",
+			   yesno(rpmodectl & GEN6_RP_ENABLE));
+		seq_printf(m, "SW control enabled: %s\n",
+			   yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
+				  GEN6_RP_MEDIA_SW_MODE));
+
+		vlv_punit_get(i915);
+		freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+		vlv_punit_put(i915);
+
+		seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
+		seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq);
+
+		seq_printf(m, "actual GPU freq: %d MHz\n",
+			   intel_gpu_freq(rps, (freq_sts >> 8) & 0xff));
+
+		seq_printf(m, "current GPU freq: %d MHz\n",
+			   intel_gpu_freq(rps, rps->cur_freq));
+
+		seq_printf(m, "max GPU freq: %d MHz\n",
+			   intel_gpu_freq(rps, rps->max_freq));
+
+		seq_printf(m, "min GPU freq: %d MHz\n",
+			   intel_gpu_freq(rps, rps->min_freq));
+
+		seq_printf(m, "idle GPU freq: %d MHz\n",
+			   intel_gpu_freq(rps, rps->idle_freq));
+
+		seq_printf(m, "efficient (RPe) frequency: %d MHz\n",
+			   intel_gpu_freq(rps, rps->efficient_freq));
+	} else if (INTEL_GEN(i915) >= 6) {
+		u32 rp_state_limits;
+		u32 gt_perf_status;
+		u32 rp_state_cap;
+		u32 rpmodectl, rpinclimit, rpdeclimit;
+		u32 rpstat, cagf, reqf;
+		u32 rpupei, rpcurup, rpprevup;
+		u32 rpdownei, rpcurdown, rpprevdown;
+		u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
+		int max_freq;
+
+		rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
+		if (IS_GEN9_LP(i915)) {
+			rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+			gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
+		} else {
+			rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+			gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
+		}
+
+		/* RPSTAT1 is in the GT power well */
+		intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+		reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
+		if (INTEL_GEN(i915) >= 9) {
+			reqf >>= 23;
+		} else {
+			reqf &= ~GEN6_TURBO_DISABLE;
+			if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+				reqf >>= 24;
+			else
+				reqf >>= 25;
+		}
+		reqf = intel_gpu_freq(rps, reqf);
+
+		rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
+		rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+		rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
+		rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
+		rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
+		rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
+		rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
+		rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
+		rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
+		rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
+		cagf = intel_rps_read_actual_frequency(rps);
+
+		intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+		if (INTEL_GEN(i915) >= 11) {
+			pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
+			pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
+			/*
+			 * The equivalent to the PM ISR & IIR cannot be read
+			 * without affecting the current state of the system
+			 */
+			pm_isr = 0;
+			pm_iir = 0;
+		} else if (INTEL_GEN(i915) >= 8) {
+			pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
+			pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
+			pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
+			pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
+		} else {
+			pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
+			pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
+			pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
+			pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
+		}
+		pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
+
+		seq_printf(m, "Video Turbo Mode: %s\n",
+			   yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
+		seq_printf(m, "HW control enabled: %s\n",
+			   yesno(rpmodectl & GEN6_RP_ENABLE));
+		seq_printf(m, "SW control enabled: %s\n",
+			   yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
+				  GEN6_RP_MEDIA_SW_MODE));
+
+		seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
+			   pm_ier, pm_imr, pm_mask);
+		if (INTEL_GEN(i915) <= 10)
+			seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n",
+				   pm_isr, pm_iir);
+		seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n",
+			   rps->pm_intrmsk_mbz);
+		seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
+		seq_printf(m, "Render p-state ratio: %d\n",
+			   (gt_perf_status & (INTEL_GEN(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
+		seq_printf(m, "Render p-state VID: %d\n",
+			   gt_perf_status & 0xff);
+		seq_printf(m, "Render p-state limit: %d\n",
+			   rp_state_limits & 0xff);
+		seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
+		seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl);
+		seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit);
+		seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
+		seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
+		seq_printf(m, "CAGF: %dMHz\n", cagf);
+		seq_printf(m, "RP CUR UP EI: %d (%dus)\n",
+			   rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei));
+		seq_printf(m, "RP CUR UP: %d (%dus)\n",
+			   rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup));
+		seq_printf(m, "RP PREV UP: %d (%dus)\n",
+			   rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup));
+		seq_printf(m, "Up threshold: %d%%\n",
+			   rps->power.up_threshold);
+
+		seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
+			   rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei));
+		seq_printf(m, "RP CUR DOWN: %d (%dus)\n",
+			   rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown));
+		seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
+			   rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown));
+		seq_printf(m, "Down threshold: %d%%\n",
+			   rps->power.down_threshold);
+
+		max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
+			    rp_state_cap >> 16) & 0xff;
+		max_freq *= (IS_GEN9_BC(i915) ||
+			     INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+		seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
+			   intel_gpu_freq(rps, max_freq));
+
+		max_freq = (rp_state_cap & 0xff00) >> 8;
+		max_freq *= (IS_GEN9_BC(i915) ||
+			     INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+		seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
+			   intel_gpu_freq(rps, max_freq));
+
+		max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 :
+			    rp_state_cap >> 0) & 0xff;
+		max_freq *= (IS_GEN9_BC(i915) ||
+			     INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
+			   intel_gpu_freq(rps, max_freq));
+		seq_printf(m, "Max overclocked frequency: %dMHz\n",
+			   intel_gpu_freq(rps, rps->max_freq));
+
+		seq_printf(m, "Current freq: %d MHz\n",
+			   intel_gpu_freq(rps, rps->cur_freq));
+		seq_printf(m, "Actual freq: %d MHz\n", cagf);
+		seq_printf(m, "Idle freq: %d MHz\n",
+			   intel_gpu_freq(rps, rps->idle_freq));
+		seq_printf(m, "Min freq: %d MHz\n",
+			   intel_gpu_freq(rps, rps->min_freq));
+		seq_printf(m, "Boost freq: %d MHz\n",
+			   intel_gpu_freq(rps, rps->boost_freq));
+		seq_printf(m, "Max freq: %d MHz\n",
+			   intel_gpu_freq(rps, rps->max_freq));
+		seq_printf(m,
+			   "efficient (RPe) frequency: %d MHz\n",
+			   intel_gpu_freq(rps, rps->efficient_freq));
+	} else {
+		seq_puts(m, "no P-state info available\n");
+	}
+
+	seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk);
+	seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq);
+	seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq);
+
+	intel_runtime_pm_put(uncore->rpm, wakeref);
+
+	return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency);
+
+static int llc_show(struct seq_file *m, void *data)
+{
+	struct intel_gt *gt = m->private;
+	struct drm_i915_private *i915 = gt->i915;
+	const bool edram = INTEL_GEN(i915) > 8;
+	struct intel_rps *rps = &gt->rps;
+	unsigned int max_gpu_freq, min_gpu_freq;
+	intel_wakeref_t wakeref;
+	int gpu_freq, ia_freq;
+
+	seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(i915)));
+	seq_printf(m, "%s: %uMB\n", edram ? "eDRAM" : "eLLC",
+		   i915->edram_size_mb);
+
+	min_gpu_freq = rps->min_freq;
+	max_gpu_freq = rps->max_freq;
+	if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+		/* Convert GT frequency to 50 HZ units */
+		min_gpu_freq /= GEN9_FREQ_SCALER;
+		max_gpu_freq /= GEN9_FREQ_SCALER;
+	}
+
+	seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
+
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
+		ia_freq = gpu_freq;
+		sandybridge_pcode_read(i915,
+				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
+				       &ia_freq, NULL);
+		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
+			   intel_gpu_freq(rps,
+					  (gpu_freq *
+					   (IS_GEN9_BC(i915) ||
+					    INTEL_GEN(i915) >= 10 ?
+					    GEN9_FREQ_SCALER : 1))),
+			   ((ia_freq >> 0) & 0xff) * 100,
+			   ((ia_freq >> 8) & 0xff) * 100);
+	}
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+
+	return 0;
+}
+
+static bool llc_eval(const struct intel_gt *gt)
+{
+	return HAS_LLC(gt->i915);
+}
+
+DEFINE_GT_DEBUGFS_ATTRIBUTE(llc);
+
+static const char *rps_power_to_str(unsigned int power)
+{
+	static const char * const strings[] = {
+		[LOW_POWER] = "low power",
+		[BETWEEN] = "mixed",
+		[HIGH_POWER] = "high power",
+	};
+
+	if (power >= ARRAY_SIZE(strings) || !strings[power])
+		return "unknown";
+
+	return strings[power];
+}
+
+static int rps_boost_show(struct seq_file *m, void *data)
+{
+	struct intel_gt *gt = m->private;
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_rps *rps = &gt->rps;
+
+	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
+	seq_printf(m, "GPU busy? %s\n", yesno(gt->awake));
+	seq_printf(m, "Boosts outstanding? %d\n",
+		   atomic_read(&rps->num_waiters));
+	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
+	seq_printf(m, "Frequency requested %d, actual %d\n",
+		   intel_gpu_freq(rps, rps->cur_freq),
+		   intel_rps_read_actual_frequency(rps));
+	seq_printf(m, "  min hard:%d, soft:%d; max soft:%d, hard:%d\n",
+		   intel_gpu_freq(rps, rps->min_freq),
+		   intel_gpu_freq(rps, rps->min_freq_softlimit),
+		   intel_gpu_freq(rps, rps->max_freq_softlimit),
+		   intel_gpu_freq(rps, rps->max_freq));
+	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
+		   intel_gpu_freq(rps, rps->idle_freq),
+		   intel_gpu_freq(rps, rps->efficient_freq),
+		   intel_gpu_freq(rps, rps->boost_freq));
+
+	seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
+
+	if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) {
+		struct intel_uncore *uncore = gt->uncore;
+		u32 rpup, rpupei;
+		u32 rpdown, rpdownei;
+
+		intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+		rpup = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK;
+		rpupei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK;
+		rpdown = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK;
+		rpdownei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK;
+		intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+		seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n",
+			   rps_power_to_str(rps->power.mode));
+		seq_printf(m, "  Avg. up: %d%% [above threshold? %d%%]\n",
+			   rpup && rpupei ? 100 * rpup / rpupei : 0,
+			   rps->power.up_threshold);
+		seq_printf(m, "  Avg. down: %d%% [below threshold? %d%%]\n",
+			   rpdown && rpdownei ? 100 * rpdown / rpdownei : 0,
+			   rps->power.down_threshold);
+	} else {
+		seq_puts(m, "\nRPS Autotuning inactive\n");
+	}
+
+	return 0;
+}
+
+static bool rps_eval(const struct intel_gt *gt)
+{
+	return HAS_RPS(gt->i915);
+}
+
+DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost);
+
+void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root)
+{
+	static const struct debugfs_gt_file files[] = {
+		{ "drpc", &drpc_fops, NULL },
+		{ "frequency", &frequency_fops, NULL },
+		{ "forcewake", &fw_domains_fops, NULL },
+		{ "llc", &llc_fops, llc_eval },
+		{ "rps_boost", &rps_boost_fops, rps_eval },
+	};
+
+	debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
new file mode 100644
index 000000000000..4cf5f5c9da7d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GT_PM_H
+#define DEBUGFS_GT_PM_H
+
+struct intel_gt;
+struct dentry;
+
+void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root);
+
+#endif /* DEBUGFS_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
new file mode 100644
index 000000000000..f4fec7eb4064
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -0,0 +1,483 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/log2.h>
+
+#include "gen6_ppgtt.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+#include "intel_gt.h"
+
+/* Write pde (index) from the page directory @pd to the page table @pt */
+static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
+				  const unsigned int pde,
+				  const struct i915_page_table *pt)
+{
+	/* Caller needs to make sure the write completes if necessary */
+	iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
+		  ppgtt->pd_addr + pde);
+}
+
+void gen7_ppgtt_enable(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_uncore *uncore = gt->uncore;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 ecochk;
+
+	intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
+
+	ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
+	if (IS_HASWELL(i915)) {
+		ecochk |= ECOCHK_PPGTT_WB_HSW;
+	} else {
+		ecochk |= ECOCHK_PPGTT_LLC_IVB;
+		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
+	}
+	intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
+
+	for_each_engine(engine, gt, id) {
+		/* GFX_MODE is per-ring on gen7+ */
+		ENGINE_WRITE(engine,
+			     RING_MODE_GEN7,
+			     _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+	}
+}
+
+void gen6_ppgtt_enable(struct intel_gt *gt)
+{
+	struct intel_uncore *uncore = gt->uncore;
+
+	intel_uncore_rmw(uncore,
+			 GAC_ECO_BITS,
+			 0,
+			 ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
+
+	intel_uncore_rmw(uncore,
+			 GAB_CTL,
+			 0,
+			 GAB_CTL_CONT_AFTER_PAGEFAULT);
+
+	intel_uncore_rmw(uncore,
+			 GAM_ECOCHK,
+			 0,
+			 ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
+
+	if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
+		intel_uncore_write(uncore,
+				   GFX_MODE,
+				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+}
+
+/* PPGTT support for Sandybdrige/Gen6 and later */
+static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
+				   u64 start, u64 length)
+{
+	struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+	const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+	const gen6_pte_t scratch_pte = vm->scratch[0].encode;
+	unsigned int pde = first_entry / GEN6_PTES;
+	unsigned int pte = first_entry % GEN6_PTES;
+	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+
+	while (num_entries) {
+		struct i915_page_table * const pt =
+			i915_pt_entry(ppgtt->base.pd, pde++);
+		const unsigned int count = min(num_entries, GEN6_PTES - pte);
+		gen6_pte_t *vaddr;
+
+		GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
+
+		num_entries -= count;
+
+		GEM_BUG_ON(count > atomic_read(&pt->used));
+		if (!atomic_sub_return(count, &pt->used))
+			ppgtt->scan_for_unused_pt = true;
+
+		/*
+		 * Note that the hw doesn't support removing PDE on the fly
+		 * (they are cached inside the context with no means to
+		 * invalidate the cache), so we can only reset the PTE
+		 * entries back to scratch.
+		 */
+
+		vaddr = kmap_atomic_px(pt);
+		memset32(vaddr + pte, scratch_pte, count);
+		kunmap_atomic(vaddr);
+
+		pte = 0;
+	}
+}
+
+static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
+				      struct i915_vma *vma,
+				      enum i915_cache_level cache_level,
+				      u32 flags)
+{
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_page_directory * const pd = ppgtt->pd;
+	unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
+	unsigned int act_pt = first_entry / GEN6_PTES;
+	unsigned int act_pte = first_entry % GEN6_PTES;
+	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
+	struct sgt_dma iter = sgt_dma(vma);
+	gen6_pte_t *vaddr;
+
+	GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
+
+	vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
+	do {
+		GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE);
+		vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
+
+		iter.dma += I915_GTT_PAGE_SIZE;
+		if (iter.dma == iter.max) {
+			iter.sg = __sg_next(iter.sg);
+			if (!iter.sg)
+				break;
+
+			iter.dma = sg_dma_address(iter.sg);
+			iter.max = iter.dma + iter.sg->length;
+		}
+
+		if (++act_pte == GEN6_PTES) {
+			kunmap_atomic(vaddr);
+			vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
+			act_pte = 0;
+		}
+	} while (1);
+	kunmap_atomic(vaddr);
+
+	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+}
+
+static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
+{
+	struct i915_page_directory * const pd = ppgtt->base.pd;
+	struct i915_page_table *pt;
+	unsigned int pde;
+
+	start = round_down(start, SZ_64K);
+	end = round_up(end, SZ_64K) - start;
+
+	mutex_lock(&ppgtt->flush);
+
+	gen6_for_each_pde(pt, pd, start, end, pde)
+		gen6_write_pde(ppgtt, pde, pt);
+
+	mb();
+	ioread32(ppgtt->pd_addr + pde - 1);
+	gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
+	mb();
+
+	mutex_unlock(&ppgtt->flush);
+}
+
+static int gen6_alloc_va_range(struct i915_address_space *vm,
+			       u64 start, u64 length)
+{
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+	struct i915_page_directory * const pd = ppgtt->base.pd;
+	struct i915_page_table *pt, *alloc = NULL;
+	intel_wakeref_t wakeref;
+	u64 from = start;
+	unsigned int pde;
+	int ret = 0;
+
+	wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
+	spin_lock(&pd->lock);
+	gen6_for_each_pde(pt, pd, start, length, pde) {
+		const unsigned int count = gen6_pte_count(start, length);
+
+		if (px_base(pt) == px_base(&vm->scratch[1])) {
+			spin_unlock(&pd->lock);
+
+			pt = fetch_and_zero(&alloc);
+			if (!pt)
+				pt = alloc_pt(vm);
+			if (IS_ERR(pt)) {
+				ret = PTR_ERR(pt);
+				goto unwind_out;
+			}
+
+			fill32_px(pt, vm->scratch[0].encode);
+
+			spin_lock(&pd->lock);
+			if (pd->entry[pde] == &vm->scratch[1]) {
+				pd->entry[pde] = pt;
+			} else {
+				alloc = pt;
+				pt = pd->entry[pde];
+			}
+		}
+
+		atomic_add(count, &pt->used);
+	}
+	spin_unlock(&pd->lock);
+
+	if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND))
+		gen6_flush_pd(ppgtt, from, start);
+
+	goto out;
+
+unwind_out:
+	gen6_ppgtt_clear_range(vm, from, start - from);
+out:
+	if (alloc)
+		free_px(vm, alloc);
+	intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+	return ret;
+}
+
+static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
+{
+	struct i915_address_space * const vm = &ppgtt->base.vm;
+	struct i915_page_directory * const pd = ppgtt->base.pd;
+	int ret;
+
+	ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+	if (ret)
+		return ret;
+
+	vm->scratch[0].encode =
+		vm->pte_encode(px_dma(&vm->scratch[0]),
+			       I915_CACHE_NONE, PTE_READ_ONLY);
+
+	if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
+		cleanup_scratch_page(vm);
+		return -ENOMEM;
+	}
+
+	fill32_px(&vm->scratch[1], vm->scratch[0].encode);
+	memset_p(pd->entry, &vm->scratch[1], I915_PDES);
+
+	return 0;
+}
+
+static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
+{
+	struct i915_page_directory * const pd = ppgtt->base.pd;
+	struct i915_page_dma * const scratch =
+		px_base(&ppgtt->base.vm.scratch[1]);
+	struct i915_page_table *pt;
+	u32 pde;
+
+	gen6_for_all_pdes(pt, pd, pde)
+		if (px_base(pt) != scratch)
+			free_px(&ppgtt->base.vm, pt);
+}
+
+static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
+{
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+
+	__i915_vma_put(ppgtt->vma);
+
+	gen6_ppgtt_free_pd(ppgtt);
+	free_scratch(vm);
+
+	mutex_destroy(&ppgtt->flush);
+	mutex_destroy(&ppgtt->pin_mutex);
+	kfree(ppgtt->base.pd);
+}
+
+static int pd_vma_set_pages(struct i915_vma *vma)
+{
+	vma->pages = ERR_PTR(-ENODEV);
+	return 0;
+}
+
+static void pd_vma_clear_pages(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!vma->pages);
+
+	vma->pages = NULL;
+}
+
+static int pd_vma_bind(struct i915_vma *vma,
+		       enum i915_cache_level cache_level,
+		       u32 unused)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
+	struct gen6_ppgtt *ppgtt = vma->private;
+	u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
+
+	px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
+	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
+
+	gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
+	return 0;
+}
+
+static void pd_vma_unbind(struct i915_vma *vma)
+{
+	struct gen6_ppgtt *ppgtt = vma->private;
+	struct i915_page_directory * const pd = ppgtt->base.pd;
+	struct i915_page_dma * const scratch =
+		px_base(&ppgtt->base.vm.scratch[1]);
+	struct i915_page_table *pt;
+	unsigned int pde;
+
+	if (!ppgtt->scan_for_unused_pt)
+		return;
+
+	/* Free all no longer used page tables */
+	gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
+		if (px_base(pt) == scratch || atomic_read(&pt->used))
+			continue;
+
+		free_px(&ppgtt->base.vm, pt);
+		pd->entry[pde] = scratch;
+	}
+
+	ppgtt->scan_for_unused_pt = false;
+}
+
+static const struct i915_vma_ops pd_vma_ops = {
+	.set_pages = pd_vma_set_pages,
+	.clear_pages = pd_vma_clear_pages,
+	.bind_vma = pd_vma_bind,
+	.unbind_vma = pd_vma_unbind,
+};
+
+static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
+{
+	struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(size > ggtt->vm.total);
+
+	vma = i915_vma_alloc();
+	if (!vma)
+		return ERR_PTR(-ENOMEM);
+
+	i915_active_init(&vma->active, NULL, NULL);
+
+	kref_init(&vma->ref);
+	mutex_init(&vma->pages_mutex);
+	vma->vm = i915_vm_get(&ggtt->vm);
+	vma->ops = &pd_vma_ops;
+	vma->private = ppgtt;
+
+	vma->size = size;
+	vma->fence_size = size;
+	atomic_set(&vma->flags, I915_VMA_GGTT);
+	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
+
+	INIT_LIST_HEAD(&vma->obj_link);
+	INIT_LIST_HEAD(&vma->closed_link);
+
+	return vma;
+}
+
+int gen6_ppgtt_pin(struct i915_ppgtt *base)
+{
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+	int err;
+
+	GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
+
+	/*
+	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
+	 * which will be pinned into every active context.
+	 * (When vma->pin_count becomes atomic, I expect we will naturally
+	 * need a larger, unpacked, type and kill this redundancy.)
+	 */
+	if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
+		return 0;
+
+	if (mutex_lock_interruptible(&ppgtt->pin_mutex))
+		return -EINTR;
+
+	/*
+	 * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
+	 * allocator works in address space sizes, so it's multiplied by page
+	 * size. We allocate at the top of the GTT to avoid fragmentation.
+	 */
+	err = 0;
+	if (!atomic_read(&ppgtt->pin_count))
+		err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
+	if (!err)
+		atomic_inc(&ppgtt->pin_count);
+	mutex_unlock(&ppgtt->pin_mutex);
+
+	return err;
+}
+
+void gen6_ppgtt_unpin(struct i915_ppgtt *base)
+{
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+	GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
+	if (atomic_dec_and_test(&ppgtt->pin_count))
+		i915_vma_unpin(ppgtt->vma);
+}
+
+void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
+{
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+	if (!atomic_read(&ppgtt->pin_count))
+		return;
+
+	i915_vma_unpin(ppgtt->vma);
+	atomic_set(&ppgtt->pin_count, 0);
+}
+
+struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
+{
+	struct i915_ggtt * const ggtt = gt->ggtt;
+	struct gen6_ppgtt *ppgtt;
+	int err;
+
+	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+	if (!ppgtt)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&ppgtt->flush);
+	mutex_init(&ppgtt->pin_mutex);
+
+	ppgtt_init(&ppgtt->base, gt);
+	ppgtt->base.vm.top = 1;
+
+	ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
+	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
+	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
+	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
+	ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
+
+	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
+
+	ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
+	if (!ppgtt->base.pd) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	err = gen6_ppgtt_init_scratch(ppgtt);
+	if (err)
+		goto err_pd;
+
+	ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
+	if (IS_ERR(ppgtt->vma)) {
+		err = PTR_ERR(ppgtt->vma);
+		goto err_scratch;
+	}
+
+	return &ppgtt->base;
+
+err_scratch:
+	free_scratch(&ppgtt->base.vm);
+err_pd:
+	kfree(ppgtt->base.pd);
+err_free:
+	mutex_destroy(&ppgtt->pin_mutex);
+	kfree(ppgtt);
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
new file mode 100644
index 000000000000..72e481806c96
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __GEN6_PPGTT_H__
+#define __GEN6_PPGTT_H__
+
+#include "intel_gtt.h"
+
+struct gen6_ppgtt {
+	struct i915_ppgtt base;
+
+	struct mutex flush;
+	struct i915_vma *vma;
+	gen6_pte_t __iomem *pd_addr;
+
+	atomic_t pin_count;
+	struct mutex pin_mutex;
+
+	bool scan_for_unused_pt;
+};
+
+static inline u32 gen6_pte_index(u32 addr)
+{
+	return i915_pte_index(addr, GEN6_PDE_SHIFT);
+}
+
+static inline u32 gen6_pte_count(u32 addr, u32 length)
+{
+	return i915_pte_count(addr, length, GEN6_PDE_SHIFT);
+}
+
+static inline u32 gen6_pde_index(u32 addr)
+{
+	return i915_pde_index(addr, GEN6_PDE_SHIFT);
+}
+
+#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base)
+
+static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
+{
+	BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base));
+	return __to_gen6_ppgtt(base);
+}
+
+/*
+ * gen6_for_each_pde() iterates over every pde from start until start+length.
+ * If start and start+length are not perfectly divisible, the macro will round
+ * down and up as needed. Start=0 and length=2G effectively iterates over
+ * every PDE in the system. The macro modifies ALL its parameters except 'pd',
+ * so each of the other parameters should preferably be a simple variable, or
+ * at most an lvalue with no side-effects!
+ */
+#define gen6_for_each_pde(pt, pd, start, length, iter)			\
+	for (iter = gen6_pde_index(start);				\
+	     length > 0 && iter < I915_PDES &&				\
+		     (pt = i915_pt_entry(pd, iter), true);		\
+	     ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT);		\
+		    temp = min(temp - start, length);			\
+		    start += temp, length -= temp; }), ++iter)
+
+#define gen6_for_all_pdes(pt, pd, iter)					\
+	for (iter = 0;							\
+	     iter < I915_PDES &&					\
+		     (pt = i915_pt_entry(pd, iter), true);		\
+	     ++iter)
+
+int gen6_ppgtt_pin(struct i915_ppgtt *base);
+void gen6_ppgtt_unpin(struct i915_ppgtt *base);
+void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
+void gen6_ppgtt_enable(struct intel_gt *gt);
+void gen7_ppgtt_enable(struct intel_gt *gt);
+struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
new file mode 100644
index 000000000000..4d1de2d97d5c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -0,0 +1,724 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/log2.h>
+
+#include "gen8_ppgtt.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+#include "intel_gt.h"
+#include "intel_gtt.h"
+
+static u64 gen8_pde_encode(const dma_addr_t addr,
+			   const enum i915_cache_level level)
+{
+	u64 pde = addr | _PAGE_PRESENT | _PAGE_RW;
+
+	if (level != I915_CACHE_NONE)
+		pde |= PPAT_CACHED_PDE;
+	else
+		pde |= PPAT_UNCACHED;
+
+	return pde;
+}
+
+static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
+{
+	struct drm_i915_private *i915 = ppgtt->vm.i915;
+	struct intel_uncore *uncore = ppgtt->vm.gt->uncore;
+	enum vgt_g2v_type msg;
+	int i;
+
+	if (create)
+		atomic_inc(px_used(ppgtt->pd)); /* never remove */
+	else
+		atomic_dec(px_used(ppgtt->pd));
+
+	mutex_lock(&i915->vgpu.lock);
+
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
+		const u64 daddr = px_dma(ppgtt->pd);
+
+		intel_uncore_write(uncore,
+				   vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
+		intel_uncore_write(uncore,
+				   vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
+
+		msg = create ?
+			VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
+			VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY;
+	} else {
+		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+			intel_uncore_write(uncore,
+					   vgtif_reg(pdp[i].lo),
+					   lower_32_bits(daddr));
+			intel_uncore_write(uncore,
+					   vgtif_reg(pdp[i].hi),
+					   upper_32_bits(daddr));
+		}
+
+		msg = create ?
+			VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
+			VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY;
+	}
+
+	/* g2v_notify atomically (via hv trap) consumes the message packet. */
+	intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg);
+
+	mutex_unlock(&i915->vgpu.lock);
+}
+
+/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
+#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
+#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
+#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
+#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
+#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
+#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
+#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
+
+#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
+
+static inline unsigned int
+gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
+{
+	const int shift = gen8_pd_shift(lvl);
+	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
+
+	GEM_BUG_ON(start >= end);
+	end += ~mask >> gen8_pd_shift(1);
+
+	*idx = i915_pde_index(start, shift);
+	if ((start ^ end) & mask)
+		return GEN8_PDES - *idx;
+	else
+		return i915_pde_index(end, shift) - *idx;
+}
+
+static inline bool gen8_pd_contains(u64 start, u64 end, int lvl)
+{
+	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
+
+	GEM_BUG_ON(start >= end);
+	return (start ^ end) & mask && (start & ~mask) == 0;
+}
+
+static inline unsigned int gen8_pt_count(u64 start, u64 end)
+{
+	GEM_BUG_ON(start >= end);
+	if ((start ^ end) >> gen8_pd_shift(1))
+		return GEN8_PDES - (start & (GEN8_PDES - 1));
+	else
+		return end - start;
+}
+
+static inline unsigned int
+gen8_pd_top_count(const struct i915_address_space *vm)
+{
+	unsigned int shift = __gen8_pte_shift(vm->top);
+	return (vm->total + (1ull << shift) - 1) >> shift;
+}
+
+static inline struct i915_page_directory *
+gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
+{
+	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
+
+	if (vm->top == 2)
+		return ppgtt->pd;
+	else
+		return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
+}
+
+static inline struct i915_page_directory *
+gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
+{
+	return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
+}
+
+static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
+				 struct i915_page_directory *pd,
+				 int count, int lvl)
+{
+	if (lvl) {
+		void **pde = pd->entry;
+
+		do {
+			if (!*pde)
+				continue;
+
+			__gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
+		} while (pde++, --count);
+	}
+
+	free_px(vm, pd);
+}
+
+static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+{
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
+	if (intel_vgpu_active(vm->i915))
+		gen8_ppgtt_notify_vgt(ppgtt, false);
+
+	__gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
+	free_scratch(vm);
+}
+
+static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
+			      struct i915_page_directory * const pd,
+			      u64 start, const u64 end, int lvl)
+{
+	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+	unsigned int idx, len;
+
+	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
+
+	len = gen8_pd_range(start, end, lvl--, &idx);
+	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+	    __func__, vm, lvl + 1, start, end,
+	    idx, len, atomic_read(px_used(pd)));
+	GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
+
+	do {
+		struct i915_page_table *pt = pd->entry[idx];
+
+		if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
+		    gen8_pd_contains(start, end, lvl)) {
+			DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
+			    __func__, vm, lvl + 1, idx, start, end);
+			clear_pd_entry(pd, idx, scratch);
+			__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
+			start += (u64)I915_PDES << gen8_pd_shift(lvl);
+			continue;
+		}
+
+		if (lvl) {
+			start = __gen8_ppgtt_clear(vm, as_pd(pt),
+						   start, end, lvl);
+		} else {
+			unsigned int count;
+			u64 *vaddr;
+
+			count = gen8_pt_count(start, end);
+			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
+			    __func__, vm, lvl, start, end,
+			    gen8_pd_index(start, 0), count,
+			    atomic_read(&pt->used));
+			GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
+
+			vaddr = kmap_atomic_px(pt);
+			memset64(vaddr + gen8_pd_index(start, 0),
+				 vm->scratch[0].encode,
+				 count);
+			kunmap_atomic(vaddr);
+
+			atomic_sub(count, &pt->used);
+			start += count;
+		}
+
+		if (release_pd_entry(pd, idx, pt, scratch))
+			free_px(vm, pt);
+	} while (idx++, --len);
+
+	return start;
+}
+
+static void gen8_ppgtt_clear(struct i915_address_space *vm,
+			     u64 start, u64 length)
+{
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(range_overflows(start, length, vm->total));
+
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
+	GEM_BUG_ON(length == 0);
+
+	__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+			   start, start + length, vm->top);
+}
+
+static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
+			      struct i915_page_directory * const pd,
+			      u64 * const start, const u64 end, int lvl)
+{
+	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+	struct i915_page_table *alloc = NULL;
+	unsigned int idx, len;
+	int ret = 0;
+
+	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
+
+	len = gen8_pd_range(*start, end, lvl--, &idx);
+	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+	    __func__, vm, lvl + 1, *start, end,
+	    idx, len, atomic_read(px_used(pd)));
+	GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
+
+	spin_lock(&pd->lock);
+	GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
+	do {
+		struct i915_page_table *pt = pd->entry[idx];
+
+		if (!pt) {
+			spin_unlock(&pd->lock);
+
+			DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
+			    __func__, vm, lvl + 1, idx);
+
+			pt = fetch_and_zero(&alloc);
+			if (lvl) {
+				if (!pt) {
+					pt = &alloc_pd(vm)->pt;
+					if (IS_ERR(pt)) {
+						ret = PTR_ERR(pt);
+						goto out;
+					}
+				}
+
+				fill_px(pt, vm->scratch[lvl].encode);
+			} else {
+				if (!pt) {
+					pt = alloc_pt(vm);
+					if (IS_ERR(pt)) {
+						ret = PTR_ERR(pt);
+						goto out;
+					}
+				}
+
+				if (intel_vgpu_active(vm->i915) ||
+				    gen8_pt_count(*start, end) < I915_PDES)
+					fill_px(pt, vm->scratch[lvl].encode);
+			}
+
+			spin_lock(&pd->lock);
+			if (likely(!pd->entry[idx]))
+				set_pd_entry(pd, idx, pt);
+			else
+				alloc = pt, pt = pd->entry[idx];
+		}
+
+		if (lvl) {
+			atomic_inc(&pt->used);
+			spin_unlock(&pd->lock);
+
+			ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
+						 start, end, lvl);
+			if (unlikely(ret)) {
+				if (release_pd_entry(pd, idx, pt, scratch))
+					free_px(vm, pt);
+				goto out;
+			}
+
+			spin_lock(&pd->lock);
+			atomic_dec(&pt->used);
+			GEM_BUG_ON(!atomic_read(&pt->used));
+		} else {
+			unsigned int count = gen8_pt_count(*start, end);
+
+			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
+			    __func__, vm, lvl, *start, end,
+			    gen8_pd_index(*start, 0), count,
+			    atomic_read(&pt->used));
+
+			atomic_add(count, &pt->used);
+			/* All other pdes may be simultaneously removed */
+			GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES);
+			*start += count;
+		}
+	} while (idx++, --len);
+	spin_unlock(&pd->lock);
+out:
+	if (alloc)
+		free_px(vm, alloc);
+	return ret;
+}
+
+static int gen8_ppgtt_alloc(struct i915_address_space *vm,
+			    u64 start, u64 length)
+{
+	u64 from;
+	int err;
+
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(range_overflows(start, length, vm->total));
+
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
+	GEM_BUG_ON(length == 0);
+	from = start;
+
+	err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
+				 &start, start + length, vm->top);
+	if (unlikely(err && from != start))
+		__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+				   from, start, vm->top);
+
+	return err;
+}
+
+static __always_inline u64
+gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
+		      struct i915_page_directory *pdp,
+		      struct sgt_dma *iter,
+		      u64 idx,
+		      enum i915_cache_level cache_level,
+		      u32 flags)
+{
+	struct i915_page_directory *pd;
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+	gen8_pte_t *vaddr;
+
+	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+	vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+	do {
+		GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
+		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
+
+		iter->dma += I915_GTT_PAGE_SIZE;
+		if (iter->dma >= iter->max) {
+			iter->sg = __sg_next(iter->sg);
+			if (!iter->sg) {
+				idx = 0;
+				break;
+			}
+
+			iter->dma = sg_dma_address(iter->sg);
+			iter->max = iter->dma + iter->sg->length;
+		}
+
+		if (gen8_pd_index(++idx, 0) == 0) {
+			if (gen8_pd_index(idx, 1) == 0) {
+				/* Limited by sg length for 3lvl */
+				if (gen8_pd_index(idx, 2) == 0)
+					break;
+
+				pd = pdp->entry[gen8_pd_index(idx, 2)];
+			}
+
+			kunmap_atomic(vaddr);
+			vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+		}
+	} while (1);
+	kunmap_atomic(vaddr);
+
+	return idx;
+}
+
+static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
+				   struct sgt_dma *iter,
+				   enum i915_cache_level cache_level,
+				   u32 flags)
+{
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+	u64 start = vma->node.start;
+	dma_addr_t rem = iter->sg->length;
+
+	GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
+
+	do {
+		struct i915_page_directory * const pdp =
+			gen8_pdp_for_page_address(vma->vm, start);
+		struct i915_page_directory * const pd =
+			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
+		gen8_pte_t encode = pte_encode;
+		unsigned int maybe_64K = -1;
+		unsigned int page_size;
+		gen8_pte_t *vaddr;
+		u16 index;
+
+		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
+		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
+		    rem >= I915_GTT_PAGE_SIZE_2M &&
+		    !__gen8_pte_index(start, 0)) {
+			index = __gen8_pte_index(start, 1);
+			encode |= GEN8_PDE_PS_2M;
+			page_size = I915_GTT_PAGE_SIZE_2M;
+
+			vaddr = kmap_atomic_px(pd);
+		} else {
+			struct i915_page_table *pt =
+				i915_pt_entry(pd, __gen8_pte_index(start, 1));
+
+			index = __gen8_pte_index(start, 0);
+			page_size = I915_GTT_PAGE_SIZE;
+
+			if (!index &&
+			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
+			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
+				maybe_64K = __gen8_pte_index(start, 1);
+
+			vaddr = kmap_atomic_px(pt);
+		}
+
+		do {
+			GEM_BUG_ON(iter->sg->length < page_size);
+			vaddr[index++] = encode | iter->dma;
+
+			start += page_size;
+			iter->dma += page_size;
+			rem -= page_size;
+			if (iter->dma >= iter->max) {
+				iter->sg = __sg_next(iter->sg);
+				if (!iter->sg)
+					break;
+
+				rem = iter->sg->length;
+				iter->dma = sg_dma_address(iter->sg);
+				iter->max = iter->dma + rem;
+
+				if (maybe_64K != -1 && index < I915_PDES &&
+				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
+				       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
+					maybe_64K = -1;
+
+				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
+					break;
+			}
+		} while (rem >= page_size && index < I915_PDES);
+
+		kunmap_atomic(vaddr);
+
+		/*
+		 * Is it safe to mark the 2M block as 64K? -- Either we have
+		 * filled whole page-table with 64K entries, or filled part of
+		 * it and have reached the end of the sg table and we have
+		 * enough padding.
+		 */
+		if (maybe_64K != -1 &&
+		    (index == I915_PDES ||
+		     (i915_vm_has_scratch_64K(vma->vm) &&
+		      !iter->sg && IS_ALIGNED(vma->node.start +
+					      vma->node.size,
+					      I915_GTT_PAGE_SIZE_2M)))) {
+			vaddr = kmap_atomic_px(pd);
+			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
+			kunmap_atomic(vaddr);
+			page_size = I915_GTT_PAGE_SIZE_64K;
+
+			/*
+			 * We write all 4K page entries, even when using 64K
+			 * pages. In order to verify that the HW isn't cheating
+			 * by using the 4K PTE instead of the 64K PTE, we want
+			 * to remove all the surplus entries. If the HW skipped
+			 * the 64K PTE, it will read/write into the scratch page
+			 * instead - which we detect as missing results during
+			 * selftests.
+			 */
+			if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
+				u16 i;
+
+				encode = vma->vm->scratch[0].encode;
+				vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
+
+				for (i = 1; i < index; i += 16)
+					memset64(vaddr + i, encode, 15);
+
+				kunmap_atomic(vaddr);
+			}
+		}
+
+		vma->page_sizes.gtt |= page_size;
+	} while (iter->sg);
+}
+
+static void gen8_ppgtt_insert(struct i915_address_space *vm,
+			      struct i915_vma *vma,
+			      enum i915_cache_level cache_level,
+			      u32 flags)
+{
+	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
+	struct sgt_dma iter = sgt_dma(vma);
+
+	if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
+		gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
+	} else  {
+		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
+
+		do {
+			struct i915_page_directory * const pdp =
+				gen8_pdp_for_page_index(vm, idx);
+
+			idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
+						    cache_level, flags);
+		} while (idx);
+
+		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+	}
+}
+
+static int gen8_init_scratch(struct i915_address_space *vm)
+{
+	int ret;
+	int i;
+
+	/*
+	 * If everybody agrees to not to write into the scratch page,
+	 * we can reuse it for all vm, keeping contexts and processes separate.
+	 */
+	if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) {
+		struct i915_address_space *clone = vm->gt->vm;
+
+		GEM_BUG_ON(!clone->has_read_only);
+
+		vm->scratch_order = clone->scratch_order;
+		memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
+		px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
+		return 0;
+	}
+
+	ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+	if (ret)
+		return ret;
+
+	vm->scratch[0].encode =
+		gen8_pte_encode(px_dma(&vm->scratch[0]),
+				I915_CACHE_LLC, vm->has_read_only);
+
+	for (i = 1; i <= vm->top; i++) {
+		if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
+			goto free_scratch;
+
+		fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
+		vm->scratch[i].encode =
+			gen8_pde_encode(px_dma(&vm->scratch[i]),
+					I915_CACHE_LLC);
+	}
+
+	return 0;
+
+free_scratch:
+	free_scratch(vm);
+	return -ENOMEM;
+}
+
+static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
+{
+	struct i915_address_space *vm = &ppgtt->vm;
+	struct i915_page_directory *pd = ppgtt->pd;
+	unsigned int idx;
+
+	GEM_BUG_ON(vm->top != 2);
+	GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
+
+	for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
+		struct i915_page_directory *pde;
+
+		pde = alloc_pd(vm);
+		if (IS_ERR(pde))
+			return PTR_ERR(pde);
+
+		fill_px(pde, vm->scratch[1].encode);
+		set_pd_entry(pd, idx, pde);
+		atomic_inc(px_used(pde)); /* keep pinned */
+	}
+	wmb();
+
+	return 0;
+}
+
+static struct i915_page_directory *
+gen8_alloc_top_pd(struct i915_address_space *vm)
+{
+	const unsigned int count = gen8_pd_top_count(vm);
+	struct i915_page_directory *pd;
+
+	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
+
+	pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
+	if (unlikely(!pd))
+		return ERR_PTR(-ENOMEM);
+
+	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+		kfree(pd);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
+	atomic_inc(px_used(pd)); /* mark as pinned */
+	return pd;
+}
+
+/*
+ * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
+ * with a net effect resembling a 2-level page table in normal x86 terms. Each
+ * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
+ * space.
+ *
+ */
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
+{
+	struct i915_ppgtt *ppgtt;
+	int err;
+
+	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+	if (!ppgtt)
+		return ERR_PTR(-ENOMEM);
+
+	ppgtt_init(ppgtt, gt);
+	ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
+
+	/*
+	 * From bdw, there is hw support for read-only pages in the PPGTT.
+	 *
+	 * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
+	 * for now.
+	 *
+	 * Gen12 has inherited the same read-only fault issue from gen11.
+	 */
+	ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12);
+
+	/*
+	 * There are only few exceptions for gen >=6. chv and bxt.
+	 * And we are not sure about the latter so play safe for now.
+	 */
+	if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915))
+		ppgtt->vm.pt_kmap_wc = true;
+
+	err = gen8_init_scratch(&ppgtt->vm);
+	if (err)
+		goto err_free;
+
+	ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm);
+	if (IS_ERR(ppgtt->pd)) {
+		err = PTR_ERR(ppgtt->pd);
+		goto err_free_scratch;
+	}
+
+	if (!i915_vm_is_4lvl(&ppgtt->vm)) {
+		err = gen8_preallocate_top_level_pdp(ppgtt);
+		if (err)
+			goto err_free_pd;
+	}
+
+	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
+	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
+	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
+	ppgtt->vm.clear_range = gen8_ppgtt_clear;
+
+	if (intel_vgpu_active(gt->i915))
+		gen8_ppgtt_notify_vgt(ppgtt, true);
+
+	ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
+
+	return ppgtt;
+
+err_free_pd:
+	__gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
+			     gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top);
+err_free_scratch:
+	free_scratch(&ppgtt->vm);
+err_free:
+	kfree(ppgtt);
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
new file mode 100644
index 000000000000..76a08b9c1f5c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __GEN8_PPGTT_H__
+#define __GEN8_PPGTT_H__
+
+struct intel_gt;
+
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 09c68dda2098..0ba524a414c6 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -28,6 +28,8 @@
 
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 
 static void irq_enable(struct intel_engine_cs *engine)
 {
@@ -53,15 +55,17 @@ static void irq_disable(struct intel_engine_cs *engine)
 
 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 {
+	struct intel_engine_cs *engine =
+		container_of(b, struct intel_engine_cs, breadcrumbs);
+
 	lockdep_assert_held(&b->irq_lock);
 
 	GEM_BUG_ON(!b->irq_enabled);
 	if (!--b->irq_enabled)
-		irq_disable(container_of(b,
-					 struct intel_engine_cs,
-					 breadcrumbs));
+		irq_disable(engine);
 
 	b->irq_armed = false;
+	intel_gt_pm_put_async(engine->gt);
 }
 
 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
@@ -120,7 +124,6 @@ __dma_fence_signal__notify(struct dma_fence *fence,
 	struct dma_fence_cb *cur, *tmp;
 
 	lockdep_assert_held(fence->lock);
-	lockdep_assert_irqs_disabled();
 
 	list_for_each_entry_safe(cur, tmp, list, node) {
 		INIT_LIST_HEAD(&cur->node);
@@ -128,9 +131,17 @@ __dma_fence_signal__notify(struct dma_fence *fence,
 	}
 }
 
-void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
+static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
 {
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct intel_engine_cs *engine =
+		container_of(b, struct intel_engine_cs, breadcrumbs);
+
+	intel_engine_add_retire(engine, tl);
+}
+
+static void signal_irq_work(struct irq_work *work)
+{
+	struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
 	const ktime_t timestamp = ktime_get();
 	struct intel_context *ce, *cn;
 	struct list_head *pos, *next;
@@ -177,8 +188,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 		if (!list_is_first(pos, &ce->signals)) {
 			/* Advance the list to the first incomplete request */
 			__list_del_many(&ce->signals, pos);
-			if (&ce->signals == pos) /* now empty */
+			if (&ce->signals == pos) { /* now empty */
 				list_del_init(&ce->signal_link);
+				add_retire(b, ce->timeline);
+			}
 		}
 	}
 
@@ -199,29 +212,17 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 	}
 }
 
-void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
-{
-	local_irq_disable();
-	intel_engine_breadcrumbs_irq(engine);
-	local_irq_enable();
-}
-
-static void signal_irq_work(struct irq_work *work)
-{
-	struct intel_engine_cs *engine =
-		container_of(work, typeof(*engine), breadcrumbs.irq_work);
-
-	intel_engine_breadcrumbs_irq(engine);
-}
-
-static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
+static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
 	struct intel_engine_cs *engine =
 		container_of(b, struct intel_engine_cs, breadcrumbs);
 
 	lockdep_assert_held(&b->irq_lock);
 	if (b->irq_armed)
-		return;
+		return true;
+
+	if (!intel_gt_pm_get_if_awake(engine->gt))
+		return false;
 
 	/*
 	 * The breadcrumb irq will be disarmed on the interrupt after the
@@ -241,6 +242,8 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 
 	if (!b->irq_enabled++)
 		irq_enable(engine);
+
+	return true;
 }
 
 void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@@ -275,23 +278,23 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 bool i915_request_enable_breadcrumb(struct i915_request *rq)
 {
 	lockdep_assert_held(&rq->lock);
-	lockdep_assert_irqs_disabled();
 
 	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
 		struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
-		struct intel_context *ce = rq->hw_context;
+		struct intel_context *ce = rq->context;
 		struct list_head *pos;
 
 		spin_lock(&b->irq_lock);
 		GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
 
-		__intel_breadcrumbs_arm_irq(b);
+		if (!__intel_breadcrumbs_arm_irq(b))
+			goto unlock;
 
 		/*
 		 * We keep the seqno in retirement order, so we can break
-		 * inside intel_engine_breadcrumbs_irq as soon as we've passed
-		 * the last completed request (or seen a request that hasn't
-		 * event started). We could iterate the timeline->requests list,
+		 * inside intel_engine_signal_breadcrumbs as soon as we've
+		 * passed the last completed request (or seen a request that
+		 * hasn't event started). We could walk the timeline->requests,
 		 * but keeping a separate signalers_list has the advantage of
 		 * hopefully being much smaller than the full list and so
 		 * provides faster iteration and detection when there are no
@@ -314,6 +317,7 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
 		GEM_BUG_ON(!check_signal_order(ce, rq));
 
 		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+unlock:
 		spin_unlock(&b->irq_lock);
 	}
 
@@ -325,7 +329,6 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
 	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 
 	lockdep_assert_held(&rq->lock);
-	lockdep_assert_irqs_disabled();
 
 	/*
 	 * We must wait for b->irq_lock so that we know the interrupt handler
@@ -335,7 +338,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
 	 */
 	spin_lock(&b->irq_lock);
 	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
-		struct intel_context *ce = rq->hw_context;
+		struct intel_context *ce = rq->context;
 
 		list_del(&rq->signal_link);
 		if (list_empty(&ce->signals))
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index f55691d151ae..23137b2a8689 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -13,6 +13,7 @@
 #include "intel_context.h"
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
+#include "intel_ring.h"
 
 static struct i915_global_context {
 	struct i915_global base;
@@ -30,8 +31,7 @@ void intel_context_free(struct intel_context *ce)
 }
 
 struct intel_context *
-intel_context_create(struct i915_gem_context *ctx,
-		     struct intel_engine_cs *engine)
+intel_context_create(struct intel_engine_cs *engine)
 {
 	struct intel_context *ce;
 
@@ -39,39 +39,82 @@ intel_context_create(struct i915_gem_context *ctx,
 	if (!ce)
 		return ERR_PTR(-ENOMEM);
 
-	intel_context_init(ce, ctx, engine);
+	intel_context_init(ce, engine);
 	return ce;
 }
 
-int __intel_context_do_pin(struct intel_context *ce)
+int intel_context_alloc_state(struct intel_context *ce)
 {
-	int err;
+	int err = 0;
 
 	if (mutex_lock_interruptible(&ce->pin_mutex))
 		return -EINTR;
 
-	if (likely(!atomic_read(&ce->pin_count))) {
-		intel_wakeref_t wakeref;
+	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+		err = ce->ops->alloc(ce);
+		if (unlikely(err))
+			goto unlock;
+
+		set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+	}
+
+unlock:
+	mutex_unlock(&ce->pin_mutex);
+	return err;
+}
 
-		if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
-			err = ce->ops->alloc(ce);
-			if (unlikely(err))
-				goto err;
+static int intel_context_active_acquire(struct intel_context *ce)
+{
+	int err;
 
-			__set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+	err = i915_active_acquire(&ce->active);
+	if (err)
+		return err;
+
+	/* Preallocate tracking nodes */
+	if (!intel_context_is_barrier(ce)) {
+		err = i915_active_acquire_preallocate_barrier(&ce->active,
+							      ce->engine);
+		if (err) {
+			i915_active_release(&ce->active);
+			return err;
 		}
+	}
+
+	return 0;
+}
 
-		err = 0;
-		with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref)
-			err = ce->ops->pin(ce);
+static void intel_context_active_release(struct intel_context *ce)
+{
+	/* Nodes preallocated in intel_context_active() */
+	i915_active_acquire_barrier(&ce->active);
+	i915_active_release(&ce->active);
+}
+
+int __intel_context_do_pin(struct intel_context *ce)
+{
+	int err;
+
+	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
+		err = intel_context_alloc_state(ce);
 		if (err)
+			return err;
+	}
+
+	if (mutex_lock_interruptible(&ce->pin_mutex))
+		return -EINTR;
+
+	if (likely(!atomic_read(&ce->pin_count))) {
+		err = intel_context_active_acquire(ce);
+		if (unlikely(err))
 			goto err;
 
-		GEM_TRACE("%s context:%llx pin ring:{head:%04x, tail:%04x}\n",
-			  ce->engine->name, ce->timeline->fence_context,
-			  ce->ring->head, ce->ring->tail);
+		err = ce->ops->pin(ce);
+		if (unlikely(err))
+			goto err_active;
 
-		i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
+		CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n",
+			 ce->ring->head, ce->ring->tail);
 
 		smp_mb__before_atomic(); /* flush pin before it is visible */
 	}
@@ -82,6 +125,8 @@ int __intel_context_do_pin(struct intel_context *ce)
 	mutex_unlock(&ce->pin_mutex);
 	return 0;
 
+err_active:
+	intel_context_active_release(ce);
 err:
 	mutex_unlock(&ce->pin_mutex);
 	return err;
@@ -89,39 +134,36 @@ err:
 
 void intel_context_unpin(struct intel_context *ce)
 {
-	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+	if (!atomic_dec_and_test(&ce->pin_count))
 		return;
 
-	/* We may be called from inside intel_context_pin() to evict another */
-	intel_context_get(ce);
-	mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
-
-	if (likely(atomic_dec_and_test(&ce->pin_count))) {
-		GEM_TRACE("%s context:%llx retire\n",
-			  ce->engine->name, ce->timeline->fence_context);
-
-		ce->ops->unpin(ce);
-
-		i915_gem_context_put(ce->gem_context);
-		intel_context_active_release(ce);
-	}
+	CE_TRACE(ce, "unpin\n");
+	ce->ops->unpin(ce);
 
-	mutex_unlock(&ce->pin_mutex);
+	/*
+	 * Once released, we may asynchronously drop the active reference.
+	 * As that may be the only reference keeping the context alive,
+	 * take an extra now so that it is not freed before we finish
+	 * dereferencing it.
+	 */
+	intel_context_get(ce);
+	intel_context_active_release(ce);
 	intel_context_put(ce);
 }
 
 static int __context_pin_state(struct i915_vma *vma)
 {
-	u64 flags;
+	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
 	int err;
 
-	flags = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
-	flags |= PIN_HIGH | PIN_GLOBAL;
-
-	err = i915_vma_pin(vma, 0, 0, flags);
+	err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
 	if (err)
 		return err;
 
+	err = i915_active_acquire(&vma->active);
+	if (err)
+		goto err_unpin;
+
 	/*
 	 * And mark it as a globally pinned object to let the shrinker know
 	 * it cannot reclaim the object until we release it.
@@ -130,26 +172,58 @@ static int __context_pin_state(struct i915_vma *vma)
 	vma->obj->mm.dirty = true;
 
 	return 0;
+
+err_unpin:
+	i915_vma_unpin(vma);
+	return err;
 }
 
 static void __context_unpin_state(struct i915_vma *vma)
 {
-	__i915_vma_unpin(vma);
 	i915_vma_make_shrinkable(vma);
+	i915_active_release(&vma->active);
+	__i915_vma_unpin(vma);
 }
 
+static int __ring_active(struct intel_ring *ring)
+{
+	int err;
+
+	err = i915_active_acquire(&ring->vma->active);
+	if (err)
+		return err;
+
+	err = intel_ring_pin(ring);
+	if (err)
+		goto err_active;
+
+	return 0;
+
+err_active:
+	i915_active_release(&ring->vma->active);
+	return err;
+}
+
+static void __ring_retire(struct intel_ring *ring)
+{
+	intel_ring_unpin(ring);
+	i915_active_release(&ring->vma->active);
+}
+
+__i915_active_call
 static void __intel_context_retire(struct i915_active *active)
 {
 	struct intel_context *ce = container_of(active, typeof(*ce), active);
 
-	GEM_TRACE("%s context:%llx retire\n",
-		  ce->engine->name, ce->timeline->fence_context);
+	CE_TRACE(ce, "retire\n");
 
+	set_bit(CONTEXT_VALID_BIT, &ce->flags);
 	if (ce->state)
 		__context_unpin_state(ce->state);
 
 	intel_timeline_unpin(ce->timeline);
-	intel_ring_unpin(ce->ring);
+	__ring_retire(ce->ring);
+
 	intel_context_put(ce);
 }
 
@@ -158,9 +232,11 @@ static int __intel_context_active(struct i915_active *active)
 	struct intel_context *ce = container_of(active, typeof(*ce), active);
 	int err;
 
+	CE_TRACE(ce, "active\n");
+
 	intel_context_get(ce);
 
-	err = intel_ring_pin(ce->ring);
+	err = __ring_active(ce->ring);
 	if (err)
 		goto err_put;
 
@@ -180,65 +256,34 @@ static int __intel_context_active(struct i915_active *active)
 err_timeline:
 	intel_timeline_unpin(ce->timeline);
 err_ring:
-	intel_ring_unpin(ce->ring);
+	__ring_retire(ce->ring);
 err_put:
 	intel_context_put(ce);
 	return err;
 }
 
-int intel_context_active_acquire(struct intel_context *ce)
-{
-	int err;
-
-	err = i915_active_acquire(&ce->active);
-	if (err)
-		return err;
-
-	/* Preallocate tracking nodes */
-	if (!i915_gem_context_is_kernel(ce->gem_context)) {
-		err = i915_active_acquire_preallocate_barrier(&ce->active,
-							      ce->engine);
-		if (err) {
-			i915_active_release(&ce->active);
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-void intel_context_active_release(struct intel_context *ce)
-{
-	/* Nodes preallocated in intel_context_active() */
-	i915_active_acquire_barrier(&ce->active);
-	i915_active_release(&ce->active);
-}
-
 void
 intel_context_init(struct intel_context *ce,
-		   struct i915_gem_context *ctx,
 		   struct intel_engine_cs *engine)
 {
 	GEM_BUG_ON(!engine->cops);
+	GEM_BUG_ON(!engine->gt->vm);
 
 	kref_init(&ce->ref);
 
-	ce->gem_context = ctx;
-	ce->vm = i915_vm_get(ctx->vm ?: &engine->gt->ggtt->vm);
-	if (ctx->timeline)
-		ce->timeline = intel_timeline_get(ctx->timeline);
-
 	ce->engine = engine;
 	ce->ops = engine->cops;
 	ce->sseu = engine->sseu;
-	ce->ring = __intel_context_ring_size(SZ_16K);
+	ce->ring = __intel_context_ring_size(SZ_4K);
+
+	ce->vm = i915_vm_get(engine->gt->vm);
 
 	INIT_LIST_HEAD(&ce->signal_link);
 	INIT_LIST_HEAD(&ce->signals);
 
 	mutex_init(&ce->pin_mutex);
 
-	i915_active_init(ctx->i915, &ce->active,
+	i915_active_init(&ce->active,
 			 __intel_context_active, __intel_context_retire);
 }
 
@@ -296,17 +341,11 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 	int err;
 
 	/* Only suitable for use in remotely modifying this context */
-	GEM_BUG_ON(rq->hw_context == ce);
-
-	if (rq->timeline != tl) { /* beware timeline sharing */
-		err = mutex_lock_interruptible_nested(&tl->mutex,
-						      SINGLE_DEPTH_NESTING);
-		if (err)
-			return err;
+	GEM_BUG_ON(rq->context == ce);
 
+	if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
 		/* Queue this switch after current activity by this context. */
-		err = i915_active_request_set(&tl->last_request, rq);
-		mutex_unlock(&tl->mutex);
+		err = i915_active_fence_set(&tl->last_request, rq);
 		if (err)
 			return err;
 	}
@@ -319,7 +358,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 	 * words transfer the pinned ce object to tracked active request.
 	 */
 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
-	return i915_active_ref(&ce->active, rq->timeline, rq);
+	return i915_active_add_request(&ce->active, rq);
 }
 
 struct i915_request *intel_context_create_request(struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index dd742ac2fbdb..30bd248827d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -7,21 +7,31 @@
 #ifndef __INTEL_CONTEXT_H__
 #define __INTEL_CONTEXT_H__
 
+#include <linux/bitops.h>
 #include <linux/lockdep.h>
+#include <linux/types.h>
 
 #include "i915_active.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
+#include "intel_ring_types.h"
 #include "intel_timeline_types.h"
 
+#define CE_TRACE(ce, fmt, ...) do {					\
+	const struct intel_context *ce__ = (ce);			\
+	ENGINE_TRACE(ce__->engine, "context:%llx " fmt,			\
+		     ce__->timeline->fence_context,			\
+		     ##__VA_ARGS__);					\
+} while (0)
+
 void intel_context_init(struct intel_context *ce,
-			struct i915_gem_context *ctx,
 			struct intel_engine_cs *engine);
 void intel_context_fini(struct intel_context *ce);
 
 struct intel_context *
-intel_context_create(struct i915_gem_context *ctx,
-		     struct intel_engine_cs *engine);
+intel_context_create(struct intel_engine_cs *engine);
+
+int intel_context_alloc_state(struct intel_context *ce);
 
 void intel_context_free(struct intel_context *ce);
 
@@ -68,9 +78,14 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
 
 int __intel_context_do_pin(struct intel_context *ce);
 
+static inline bool intel_context_pin_if_active(struct intel_context *ce)
+{
+	return atomic_inc_not_zero(&ce->pin_count);
+}
+
 static inline int intel_context_pin(struct intel_context *ce)
 {
-	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+	if (likely(intel_context_pin_if_active(ce)))
 		return 0;
 
 	return __intel_context_do_pin(ce);
@@ -108,9 +123,6 @@ static inline void intel_context_exit(struct intel_context *ce)
 		ce->ops->exit(ce);
 }
 
-int intel_context_active_acquire(struct intel_context *ce);
-void intel_context_active_release(struct intel_context *ce);
-
 static inline struct intel_context *intel_context_get(struct intel_context *ce)
 {
 	kref_get(&ce->ref);
@@ -152,4 +164,64 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz)
 	return u64_to_ptr(struct intel_ring, sz);
 }
 
+static inline bool intel_context_is_barrier(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
+}
+
+static inline bool intel_context_use_semaphores(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline void intel_context_set_use_semaphores(struct intel_context *ce)
+{
+	set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline void intel_context_clear_use_semaphores(struct intel_context *ce)
+{
+	clear_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline bool intel_context_is_banned(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_BANNED, &ce->flags);
+}
+
+static inline bool intel_context_set_banned(struct intel_context *ce)
+{
+	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
+}
+
+static inline bool
+intel_context_force_single_submission(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
+}
+
+static inline void
+intel_context_set_single_submission(struct intel_context *ce)
+{
+	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
+}
+
+static inline bool
+intel_context_nopreempt(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
+static inline void
+intel_context_set_nopreempt(struct intel_context *ce)
+{
+	set_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
+static inline void
+intel_context_clear_nopreempt(struct intel_context *ce)
+{
+	clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index bf9cedfccbf0..ca1420fb8b53 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -17,6 +17,8 @@
 #include "intel_engine_types.h"
 #include "intel_sseu.h"
 
+#define CONTEXT_REDZONE POISON_INUSE
+
 struct i915_gem_context;
 struct i915_vma;
 struct intel_context;
@@ -44,7 +46,7 @@ struct intel_context {
 #define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
 
 	struct i915_address_space *vm;
-	struct i915_gem_context *gem_context;
+	struct i915_gem_context __rcu *gem_context;
 
 	struct list_head signal_link;
 	struct list_head signals;
@@ -54,10 +56,17 @@ struct intel_context {
 	struct intel_timeline *timeline;
 
 	unsigned long flags;
-#define CONTEXT_ALLOC_BIT 0
+#define CONTEXT_BARRIER_BIT		0
+#define CONTEXT_ALLOC_BIT		1
+#define CONTEXT_VALID_BIT		2
+#define CONTEXT_USE_SEMAPHORES		3
+#define CONTEXT_BANNED			4
+#define CONTEXT_FORCE_SINGLE_SUBMISSION	5
+#define CONTEXT_NOPREEMPT		6
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
+	u32 tag; /* cookie passed to HW to track this context on submission */
 
 	unsigned int active_count; /* protected by timeline->mutex */
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 22aab8593abf..5df003061e44 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -19,6 +19,7 @@
 #include "intel_workarounds.h"
 
 struct drm_printer;
+struct intel_gt;
 
 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
  * but keeps the logic simple. Indeed, the whole purpose of this macro is just
@@ -28,6 +29,13 @@ struct drm_printer;
 #define CACHELINE_BYTES 64
 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
 
+#define ENGINE_TRACE(e, fmt, ...) do {					\
+	const struct intel_engine_cs *e__ __maybe_unused = (e);		\
+	GEM_TRACE("%s %s: " fmt,					\
+		  dev_name(e__->i915->drm.dev), e__->name,		\
+		  ##__VA_ARGS__);					\
+} while (0)
+
 /*
  * The register defines to be used with the following macros need to accept a
  * base param, e.g:
@@ -89,38 +97,6 @@ struct drm_printer;
 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  */
-enum intel_engine_hangcheck_action {
-	ENGINE_IDLE = 0,
-	ENGINE_WAIT,
-	ENGINE_ACTIVE_SEQNO,
-	ENGINE_ACTIVE_HEAD,
-	ENGINE_ACTIVE_SUBUNITS,
-	ENGINE_WAIT_KICK,
-	ENGINE_DEAD,
-};
-
-static inline const char *
-hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
-{
-	switch (a) {
-	case ENGINE_IDLE:
-		return "idle";
-	case ENGINE_WAIT:
-		return "wait";
-	case ENGINE_ACTIVE_SEQNO:
-		return "active seqno";
-	case ENGINE_ACTIVE_HEAD:
-		return "active head";
-	case ENGINE_ACTIVE_SUBUNITS:
-		return "active subunits";
-	case ENGINE_WAIT_KICK:
-		return "wait kick";
-	case ENGINE_DEAD:
-		return "dead";
-	}
-
-	return "unknown";
-}
 
 static inline unsigned int
 execlists_num_ports(const struct intel_engine_execlists * const execlists)
@@ -131,9 +107,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
 static inline struct i915_request *
 execlists_active(const struct intel_engine_execlists *execlists)
 {
-	GEM_BUG_ON(execlists->active - execlists->inflight >
-		   execlists_num_ports(execlists));
-	return READ_ONCE(*execlists->active);
+	return *READ_ONCE(execlists->active);
 }
 
 static inline void
@@ -206,132 +180,19 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 #define I915_HWS_CSB_WRITE_INDEX	0x1f
 #define CNL_HWS_CSB_WRITE_INDEX		0x2f
 
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_ring_pin(struct intel_ring *ring);
-void intel_ring_reset(struct intel_ring *ring, u32 tail);
-unsigned int intel_ring_update_space(struct intel_ring *ring);
-void intel_ring_unpin(struct intel_ring *ring);
-void intel_ring_free(struct kref *ref);
-
-static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
-{
-	kref_get(&ring->ref);
-	return ring;
-}
-
-static inline void intel_ring_put(struct intel_ring *ring)
-{
-	kref_put(&ring->ref, intel_ring_free);
-}
-
 void intel_engine_stop(struct intel_engine_cs *engine);
 void intel_engine_cleanup(struct intel_engine_cs *engine);
 
-int __must_check intel_ring_cacheline_align(struct i915_request *rq);
-
-u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
-
-static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
-{
-	/* Dummy function.
-	 *
-	 * This serves as a placeholder in the code so that the reader
-	 * can compare against the preceding intel_ring_begin() and
-	 * check that the number of dwords emitted matches the space
-	 * reserved for the command packet (i.e. the value passed to
-	 * intel_ring_begin()).
-	 */
-	GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
-}
-
-static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
-{
-	return pos & (ring->size - 1);
-}
-
-static inline bool
-intel_ring_offset_valid(const struct intel_ring *ring,
-			unsigned int pos)
-{
-	if (pos & -ring->size) /* must be strictly within the ring */
-		return false;
-
-	if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
-		return false;
+int intel_engines_init_mmio(struct intel_gt *gt);
+int intel_engines_init(struct intel_gt *gt);
 
-	return true;
-}
-
-static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
-{
-	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
-	u32 offset = addr - rq->ring->vaddr;
-	GEM_BUG_ON(offset > rq->ring->size);
-	return intel_ring_wrap(rq->ring, offset);
-}
-
-static inline void
-assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
-{
-	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
-
-	/*
-	 * "Ring Buffer Use"
-	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
-	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
-	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
-	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
-	 * same cacheline, the Head Pointer must not be greater than the Tail
-	 * Pointer."
-	 *
-	 * We use ring->head as the last known location of the actual RING_HEAD,
-	 * it may have advanced but in the worst case it is equally the same
-	 * as ring->head and so we should never program RING_TAIL to advance
-	 * into the same cacheline as ring->head.
-	 */
-#define cacheline(a) round_down(a, CACHELINE_BYTES)
-	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
-		   tail < ring->head);
-#undef cacheline
-}
-
-static inline unsigned int
-intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
-{
-	/* Whilst writes to the tail are strictly order, there is no
-	 * serialisation between readers and the writers. The tail may be
-	 * read by i915_request_retire() just as it is being updated
-	 * by execlists, as although the breadcrumb is complete, the context
-	 * switch hasn't been seen.
-	 */
-	assert_ring_tail_valid(ring, tail);
-	ring->tail = tail;
-	return tail;
-}
-
-static inline unsigned int
-__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
-{
-	/*
-	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
-	 * same cacheline, the Head Pointer must not be greater than the Tail
-	 * Pointer."
-	 */
-	GEM_BUG_ON(!is_power_of_2(size));
-	return (head - tail - CACHELINE_BYTES) & (size - 1);
-}
-
-int intel_engines_init_mmio(struct drm_i915_private *i915);
-int intel_engines_setup(struct drm_i915_private *i915);
-int intel_engines_init(struct drm_i915_private *i915);
-void intel_engines_cleanup(struct drm_i915_private *i915);
+void intel_engines_release(struct intel_gt *gt);
+void intel_engines_free(struct intel_gt *gt);
 
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
 int intel_ring_submission_setup(struct intel_engine_cs *engine);
-int intel_ring_submission_init(struct intel_engine_cs *engine);
 
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
@@ -341,7 +202,7 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
 
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
+void intel_engine_get_instdone(const struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
 void intel_engine_init_execlists(struct intel_engine_cs *engine);
@@ -349,17 +210,14 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine);
 void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
-void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 
 static inline void
-intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
+intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
 {
 	irq_work_queue(&engine->breadcrumbs.irq_work);
 }
 
-void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
-
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
@@ -417,13 +275,14 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 static inline void __intel_engine_reset(struct intel_engine_cs *engine,
 					bool stalled)
 {
-	if (engine->reset.reset)
-		engine->reset.reset(engine, stalled);
+	if (engine->reset.rewind)
+		engine->reset.rewind(engine, stalled);
 	engine->serial++; /* contexts lost */
 }
 
-bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct intel_gt *gt);
+bool intel_engine_is_idle(struct intel_engine_cs *engine);
+void intel_engine_flush_submission(struct intel_engine_cs *engine);
 
 void intel_engines_reset_default_submission(struct intel_gt *gt);
 
@@ -434,61 +293,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		       struct drm_printer *m,
 		       const char *header, ...);
 
-static inline void intel_engine_context_in(struct intel_engine_cs *engine)
-{
-	unsigned long flags;
-
-	if (READ_ONCE(engine->stats.enabled) == 0)
-		return;
-
-	write_seqlock_irqsave(&engine->stats.lock, flags);
-
-	if (engine->stats.enabled > 0) {
-		if (engine->stats.active++ == 0)
-			engine->stats.start = ktime_get();
-		GEM_BUG_ON(engine->stats.active == 0);
-	}
-
-	write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
-static inline void intel_engine_context_out(struct intel_engine_cs *engine)
-{
-	unsigned long flags;
-
-	if (READ_ONCE(engine->stats.enabled) == 0)
-		return;
-
-	write_seqlock_irqsave(&engine->stats.lock, flags);
-
-	if (engine->stats.enabled > 0) {
-		ktime_t last;
-
-		if (engine->stats.active && --engine->stats.active == 0) {
-			/*
-			 * Decrement the active context count and in case GPU
-			 * is now idle add up to the running total.
-			 */
-			last = ktime_sub(ktime_get(), engine->stats.start);
-
-			engine->stats.total = ktime_add(engine->stats.total,
-							last);
-		} else if (engine->stats.active == 0) {
-			/*
-			 * After turning on engine stats, context out might be
-			 * the first event in which case we account from the
-			 * time stats gathering was turned on.
-			 */
-			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
-
-			engine->stats.total = ktime_add(engine->stats.total,
-							last);
-		}
-	}
-
-	write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
 int intel_enable_engine_stats(struct intel_engine_cs *engine);
 void intel_disable_engine_stats(struct intel_engine_cs *engine);
 
@@ -497,7 +301,7 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
 struct i915_request *
 intel_engine_find_active_request(struct intel_engine_cs *engine);
 
-u32 intel_engine_context_size(struct drm_i915_private *i915, u8 class);
+u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 
@@ -525,4 +329,22 @@ void intel_engine_init_active(struct intel_engine_cs *engine,
 #define ENGINE_MOCK	1
 #define ENGINE_VIRTUAL	2
 
+static inline bool
+intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
+{
+	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+		return false;
+
+	return intel_engine_has_preemption(engine);
+}
+
+static inline bool
+intel_engine_has_timeslices(const struct intel_engine_cs *engine)
+{
+	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+		return false;
+
+	return intel_engine_has_semaphores(engine);
+}
+
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 4ce8626b140e..f451ef376548 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -28,15 +28,16 @@
 
 #include "i915_drv.h"
 
-#include "gt/intel_gt.h"
-
+#include "intel_context.h"
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_pool.h"
 #include "intel_engine_user.h"
-#include "intel_context.h"
+#include "intel_gt.h"
+#include "intel_gt_requests.h"
 #include "intel_lrc.h"
 #include "intel_reset.h"
+#include "intel_ring.h"
 
 /* Haswell does have the CXT_SIZE register however it does not appear to be
  * valid. Now, docs explain in dwords what is in the context object. The full
@@ -140,7 +141,7 @@ static const struct engine_info intel_engines[] = {
 
 /**
  * intel_engine_context_size() - return the size of the context for an engine
- * @dev_priv: i915 device private
+ * @gt: the gt
  * @class: engine class
  *
  * Each engine class may require a different amount of space for a context
@@ -152,17 +153,18 @@ static const struct engine_info intel_engines[] = {
  * in LRC mode, but does not include the "shared data page" used with
  * GuC submission. The caller should account for this if using the GuC.
  */
-u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
+u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
 {
+	struct intel_uncore *uncore = gt->uncore;
 	u32 cxt_size;
 
 	BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
 
 	switch (class) {
 	case RENDER_CLASS:
-		switch (INTEL_GEN(dev_priv)) {
+		switch (INTEL_GEN(gt->i915)) {
 		default:
-			MISSING_CASE(INTEL_GEN(dev_priv));
+			MISSING_CASE(INTEL_GEN(gt->i915));
 			return DEFAULT_LR_CONTEXT_RENDER_SIZE;
 		case 12:
 		case 11:
@@ -174,14 +176,14 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 		case 8:
 			return GEN8_LR_CONTEXT_RENDER_SIZE;
 		case 7:
-			if (IS_HASWELL(dev_priv))
+			if (IS_HASWELL(gt->i915))
 				return HSW_CXT_TOTAL_SIZE;
 
-			cxt_size = I915_READ(GEN7_CXT_SIZE);
+			cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE);
 			return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
 					PAGE_SIZE);
 		case 6:
-			cxt_size = I915_READ(CXT_SIZE);
+			cxt_size = intel_uncore_read(uncore, CXT_SIZE);
 			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
 					PAGE_SIZE);
 		case 5:
@@ -196,9 +198,9 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 			 * minimum allocation anyway so it should all come
 			 * out in the wash.
 			 */
-			cxt_size = I915_READ(CXT_SIZE) + 1;
+			cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
 			DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
-					 INTEL_GEN(dev_priv),
+					 INTEL_GEN(gt->i915),
 					 cxt_size * 64,
 					 cxt_size - 1);
 			return round_up(cxt_size * 64, PAGE_SIZE);
@@ -215,7 +217,7 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 	case VIDEO_DECODE_CLASS:
 	case VIDEO_ENHANCEMENT_CLASS:
 	case COPY_ENGINE_CLASS:
-		if (INTEL_GEN(dev_priv) < 8)
+		if (INTEL_GEN(gt->i915) < 8)
 			return 0;
 		return GEN8_LR_CONTEXT_OTHER_SIZE;
 	}
@@ -277,6 +279,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
 	BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
 
+	if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
+		return -EINVAL;
+
 	if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
 		return -EINVAL;
 
@@ -293,6 +298,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
 
 	engine->id = id;
+	engine->legacy_idx = INVALID_ENGINE;
 	engine->mask = BIT(id);
 	engine->i915 = gt->i915;
 	engine->gt = gt;
@@ -304,14 +310,16 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->instance = info->instance;
 	__sprint_engine_name(engine);
 
-	/*
-	 * To be overridden by the backend on setup. However to facilitate
-	 * cleanup on error during setup, we always provide the destroy vfunc.
-	 */
-	engine->destroy = (typeof(engine->destroy))kfree;
+	engine->props.heartbeat_interval_ms =
+		CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
+	engine->props.preempt_timeout_ms =
+		CONFIG_DRM_I915_PREEMPT_TIMEOUT;
+	engine->props.stop_timeout_ms =
+		CONFIG_DRM_I915_STOP_TIMEOUT;
+	engine->props.timeslice_duration_ms =
+		CONFIG_DRM_I915_TIMESLICE_DURATION;
 
-	engine->context_size = intel_engine_context_size(gt->i915,
-							 engine->class);
+	engine->context_size = intel_engine_context_size(gt, engine->class);
 	if (WARN_ON(engine->context_size > BIT(20)))
 		engine->context_size = 0;
 	if (engine->context_size)
@@ -320,6 +328,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
 
+	ewma__engine_latency_init(&engine->latency);
 	seqlock_init(&engine->stats.lock);
 
 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
@@ -328,8 +337,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	intel_engine_sanitize_mmio(engine);
 
 	gt->engine_class[info->class][info->instance] = engine;
+	gt->engine[id] = engine;
 
-	intel_engine_add_user(engine);
 	gt->i915->engine[id] = engine;
 
 	return 0;
@@ -365,38 +374,58 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine)
 	}
 }
 
-static void intel_setup_engine_capabilities(struct drm_i915_private *i915)
+static void intel_setup_engine_capabilities(struct intel_gt *gt)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, i915, id)
+	for_each_engine(engine, gt, id)
 		__setup_engine_capabilities(engine);
 }
 
 /**
- * intel_engines_cleanup() - free the resources allocated for Command Streamers
- * @i915: the i915 devic
+ * intel_engines_release() - free the resources allocated for Command Streamers
+ * @gt: pointer to struct intel_gt
  */
-void intel_engines_cleanup(struct drm_i915_private *i915)
+void intel_engines_release(struct intel_gt *gt)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, i915, id) {
-		engine->destroy(engine);
-		i915->engine[id] = NULL;
+	/* Decouple the backend; but keep the layout for late GPU resets */
+	for_each_engine(engine, gt, id) {
+		if (!engine->release)
+			continue;
+
+		engine->release(engine);
+		engine->release = NULL;
+
+		memset(&engine->reset, 0, sizeof(engine->reset));
+
+		gt->i915->engine[id] = NULL;
+	}
+}
+
+void intel_engines_free(struct intel_gt *gt)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, gt, id) {
+		kfree(engine);
+		gt->engine[id] = NULL;
 	}
 }
 
 /**
  * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
- * @i915: the i915 device
+ * @gt: pointer to struct intel_gt
  *
  * Return: non-zero if the initialization failed.
  */
-int intel_engines_init_mmio(struct drm_i915_private *i915)
+int intel_engines_init_mmio(struct intel_gt *gt)
 {
+	struct drm_i915_private *i915 = gt->i915;
 	struct intel_device_info *device_info = mkwrite_device_info(i915);
 	const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
 	unsigned int mask = 0;
@@ -414,7 +443,7 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
 		if (!HAS_ENGINE(i915, i))
 			continue;
 
-		err = intel_engine_setup(&i915->gt, i);
+		err = intel_engine_setup(gt, i);
 		if (err)
 			goto cleanup;
 
@@ -431,45 +460,14 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
 
 	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
 
-	intel_gt_check_and_clear_faults(&i915->gt);
-
-	intel_setup_engine_capabilities(i915);
-
-	return 0;
-
-cleanup:
-	intel_engines_cleanup(i915);
-	return err;
-}
+	intel_gt_check_and_clear_faults(gt);
 
-/**
- * intel_engines_init() - init the Engine Command Streamers
- * @i915: i915 device private
- *
- * Return: non-zero if the initialization failed.
- */
-int intel_engines_init(struct drm_i915_private *i915)
-{
-	int (*init)(struct intel_engine_cs *engine);
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int err;
-
-	if (HAS_EXECLISTS(i915))
-		init = intel_execlists_submission_init;
-	else
-		init = intel_ring_submission_init;
-
-	for_each_engine(engine, i915, id) {
-		err = init(engine);
-		if (err)
-			goto cleanup;
-	}
+	intel_setup_engine_capabilities(gt);
 
 	return 0;
 
 cleanup:
-	intel_engines_cleanup(i915);
+	intel_engines_free(gt);
 	return err;
 }
 
@@ -513,7 +511,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
 	unsigned int flags;
 
 	flags = PIN_GLOBAL;
-	if (!HAS_LLC(engine->i915))
+	if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
 		/*
 		 * On g33, we cannot place HWS above 256MiB, so
 		 * restrict its pinning to the low mappable arena.
@@ -584,7 +582,7 @@ err:
 	return ret;
 }
 
-static int intel_engine_setup_common(struct intel_engine_cs *engine)
+static int engine_setup_common(struct intel_engine_cs *engine)
 {
 	int err;
 
@@ -597,9 +595,9 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_active(engine, ENGINE_PHYSICAL);
 	intel_engine_init_breadcrumbs(engine);
 	intel_engine_init_execlists(engine);
-	intel_engine_init_hangcheck(engine);
 	intel_engine_init_cmd_parser(engine);
 	intel_engine_init__pm(engine);
+	intel_engine_init_retire(engine);
 
 	intel_engine_pool_init(&engine->pool);
 
@@ -614,49 +612,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 	return 0;
 }
 
-/**
- * intel_engines_setup- setup engine state not requiring hw access
- * @i915: Device to setup.
- *
- * Initializes engine structure members shared between legacy and execlists
- * submission modes which do not require hardware access.
- *
- * Typically done early in the submission mode specific engine setup stage.
- */
-int intel_engines_setup(struct drm_i915_private *i915)
-{
-	int (*setup)(struct intel_engine_cs *engine);
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int err;
-
-	if (HAS_EXECLISTS(i915))
-		setup = intel_execlists_submission_setup;
-	else
-		setup = intel_ring_submission_setup;
-
-	for_each_engine(engine, i915, id) {
-		err = intel_engine_setup_common(engine);
-		if (err)
-			goto cleanup;
-
-		err = setup(engine);
-		if (err)
-			goto cleanup;
-
-		/* We expect the backend to take control over its state */
-		GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);
-
-		GEM_BUG_ON(!engine->cops);
-	}
-
-	return 0;
-
-cleanup:
-	intel_engines_cleanup(i915);
-	return err;
-}
-
 struct measure_breadcrumb {
 	struct i915_request rq;
 	struct intel_timeline timeline;
@@ -680,6 +635,8 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 				engine->status_page.vma))
 		goto out_frame;
 
+	mutex_lock(&frame->timeline.mutex);
+
 	frame->ring.vaddr = frame->cs;
 	frame->ring.size = sizeof(frame->cs);
 	frame->ring.effective_size = frame->ring.size;
@@ -688,18 +645,22 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 	frame->rq.i915 = engine->i915;
 	frame->rq.engine = engine;
 	frame->rq.ring = &frame->ring;
-	frame->rq.timeline = &frame->timeline;
+	rcu_assign_pointer(frame->rq.timeline, &frame->timeline);
 
 	dw = intel_timeline_pin(&frame->timeline);
 	if (dw < 0)
 		goto out_timeline;
 
+	spin_lock_irq(&engine->active.lock);
 	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+	spin_unlock_irq(&engine->active.lock);
+
 	GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
 
 	intel_timeline_unpin(&frame->timeline);
 
 out_timeline:
+	mutex_unlock(&frame->timeline.mutex);
 	intel_timeline_fini(&frame->timeline);
 out_frame:
 	kfree(frame);
@@ -730,21 +691,30 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
 static struct intel_context *
 create_kernel_context(struct intel_engine_cs *engine)
 {
+	static struct lock_class_key kernel;
 	struct intel_context *ce;
 	int err;
 
-	ce = intel_context_create(engine->i915->kernel_context, engine);
+	ce = intel_context_create(engine);
 	if (IS_ERR(ce))
 		return ce;
 
-	ce->ring = __intel_context_ring_size(SZ_4K);
+	__set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
 
-	err = intel_context_pin(ce);
+	err = intel_context_pin(ce); /* perma-pin so it is always available */
 	if (err) {
 		intel_context_put(ce);
 		return ERR_PTR(err);
 	}
 
+	/*
+	 * Give our perma-pinned kernel timelines a separate lockdep class,
+	 * so that we can use them from within the normal user timelines
+	 * should we need to inject GPU operations during their request
+	 * construction.
+	 */
+	lockdep_set_class(&ce->timeline->mutex, &kernel);
+
 	return ce;
 }
 
@@ -759,13 +729,19 @@ create_kernel_context(struct intel_engine_cs *engine)
  *
  * Returns zero on success or an error code on failure.
  */
-int intel_engine_init_common(struct intel_engine_cs *engine)
+static int engine_init_common(struct intel_engine_cs *engine)
 {
 	struct intel_context *ce;
 	int ret;
 
 	engine->set_default_submission(engine);
 
+	ret = measure_breadcrumb_dw(engine);
+	if (ret < 0)
+		return ret;
+
+	engine->emit_fini_breadcrumb_dw = ret;
+
 	/*
 	 * We may need to do things with the shrinker which
 	 * require us to immediately switch back to the default
@@ -780,18 +756,38 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 
 	engine->kernel_context = ce;
 
-	ret = measure_breadcrumb_dw(engine);
-	if (ret < 0)
-		goto err_unpin;
+	return 0;
+}
 
-	engine->emit_fini_breadcrumb_dw = ret;
+int intel_engines_init(struct intel_gt *gt)
+{
+	int (*setup)(struct intel_engine_cs *engine);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err;
 
-	return 0;
+	if (HAS_EXECLISTS(gt->i915))
+		setup = intel_execlists_submission_setup;
+	else
+		setup = intel_ring_submission_setup;
 
-err_unpin:
-	intel_context_unpin(ce);
-	intel_context_put(ce);
-	return ret;
+	for_each_engine(engine, gt, id) {
+		err = engine_setup_common(engine);
+		if (err)
+			return err;
+
+		err = setup(engine);
+		if (err)
+			return err;
+
+		err = engine_init_common(engine);
+		if (err)
+			return err;
+
+		intel_engine_add_user(engine);
+	}
+
+	return 0;
 }
 
 /**
@@ -804,9 +800,11 @@ err_unpin:
 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
 	GEM_BUG_ON(!list_empty(&engine->active.requests));
+	tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
 
 	cleanup_status_page(engine);
 
+	intel_engine_fini_retire(engine);
 	intel_engine_pool_fini(&engine->pool);
 	intel_engine_fini_breadcrumbs(engine);
 	intel_engine_cleanup_cmd_parser(engine);
@@ -814,8 +812,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->default_state)
 		i915_gem_object_put(engine->default_state);
 
-	intel_context_unpin(engine->kernel_context);
-	intel_context_put(engine->kernel_context);
+	if (engine->kernel_context) {
+		intel_context_unpin(engine->kernel_context);
+		intel_context_put(engine->kernel_context);
+	}
 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 
 	intel_wa_list_free(&engine->ctx_wa_list);
@@ -851,6 +851,21 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
 	return bbaddr;
 }
 
+static unsigned long stop_timeout(const struct intel_engine_cs *engine)
+{
+	if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
+		return 0;
+
+	/*
+	 * If we are doing a normal GPU reset, we can take our time and allow
+	 * the engine to quiesce. We've stopped submission to the engine, and
+	 * if we wait long enough an innocent context should complete and
+	 * leave the engine idle. So they should not be caught unaware by
+	 * the forthcoming GPU reset (which usually follows the stop_cs)!
+	 */
+	return READ_ONCE(engine->props.stop_timeout_ms);
+}
+
 int intel_engine_stop_cs(struct intel_engine_cs *engine)
 {
 	struct intel_uncore *uncore = engine->uncore;
@@ -861,16 +876,16 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
 	if (INTEL_GEN(engine->i915) < 3)
 		return -ENODEV;
 
-	GEM_TRACE("%s\n", engine->name);
+	ENGINE_TRACE(engine, "\n");
 
 	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
 
 	err = 0;
 	if (__intel_wait_for_register_fw(uncore,
 					 mode, MODE_IDLE, MODE_IDLE,
-					 1000, 0,
+					 1000, stop_timeout(engine),
 					 NULL)) {
-		GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
+		ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n");
 		err = -ETIMEDOUT;
 	}
 
@@ -882,7 +897,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
 
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
 {
-	GEM_TRACE("%s\n", engine->name);
+	ENGINE_TRACE(engine, "\n");
 
 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 }
@@ -899,8 +914,8 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
 }
 
 static u32
-read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
-		  i915_reg_t reg)
+read_subslice_reg(const struct intel_engine_cs *engine,
+		  int slice, int subslice, i915_reg_t reg)
 {
 	struct drm_i915_private *i915 = engine->i915;
 	struct intel_uncore *uncore = engine->uncore;
@@ -944,10 +959,11 @@ read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
 }
 
 /* NB: please notice the memset */
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
+void intel_engine_get_instdone(const struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone)
 {
 	struct drm_i915_private *i915 = engine->i915;
+	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
 	struct intel_uncore *uncore = engine->uncore;
 	u32 mmio_base = engine->mmio_base;
 	int slice;
@@ -965,7 +981,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
 
 		instdone->slice_common =
 			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
-		for_each_instdone_slice_subslice(i915, slice, subslice) {
+		for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
 			instdone->sampler[slice][subslice] =
 				read_subslice_reg(engine, slice, subslice,
 						  GEN7_SAMPLER_INSTDONE);
@@ -1031,6 +1047,25 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 	return idle;
 }
 
+void intel_engine_flush_submission(struct intel_engine_cs *engine)
+{
+	struct tasklet_struct *t = &engine->execlists.tasklet;
+
+	if (__tasklet_is_scheduled(t)) {
+		local_bh_disable();
+		if (tasklet_trylock(t)) {
+			/* Must wait for any GPU reset in progress. */
+			if (__tasklet_is_enabled(t))
+				t->func(t->data);
+			tasklet_unlock(t);
+		}
+		local_bh_enable();
+	}
+
+	/* Otherwise flush the tasklet if it was running on another cpu */
+	tasklet_unlock_wait(t);
+}
+
 /**
  * intel_engine_is_idle() - Report if the engine has finished process all work
  * @engine: the intel_engine_cs
@@ -1049,21 +1084,9 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 
 	/* Waiting to drain ELSP? */
 	if (execlists_active(&engine->execlists)) {
-		struct tasklet_struct *t = &engine->execlists.tasklet;
-
 		synchronize_hardirq(engine->i915->drm.pdev->irq);
 
-		local_bh_disable();
-		if (tasklet_trylock(t)) {
-			/* Must wait for any GPU reset in progress. */
-			if (__tasklet_is_enabled(t))
-				t->func(t->data);
-			tasklet_unlock(t);
-		}
-		local_bh_enable();
-
-		/* Otherwise flush the tasklet if it was on another cpu */
-		tasklet_unlock_wait(t);
+		intel_engine_flush_submission(engine);
 
 		if (execlists_active(&engine->execlists))
 			return false;
@@ -1093,7 +1116,7 @@ bool intel_engines_are_idle(struct intel_gt *gt)
 	if (!READ_ONCE(gt->awake))
 		return true;
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		if (!intel_engine_is_idle(engine))
 			return false;
 	}
@@ -1106,7 +1129,7 @@ void intel_engines_reset_default_submission(struct intel_gt *gt)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, gt->i915, id)
+	for_each_engine(engine, gt, id)
 		engine->set_default_submission(engine);
 }
 
@@ -1118,6 +1141,8 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	case 3:
 		/* maybe only uses physical not virtual addresses */
 		return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
+	case 4:
+		return !IS_I965G(engine->i915); /* who knows! */
 	case 6:
 		return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
 	default:
@@ -1193,6 +1218,38 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
 	}
 }
 
+static struct intel_timeline *get_timeline(struct i915_request *rq)
+{
+	struct intel_timeline *tl;
+
+	/*
+	 * Even though we are holding the engine->active.lock here, there
+	 * is no control over the submission queue per-se and we are
+	 * inspecting the active state at a random point in time, with an
+	 * unknown queue. Play safe and make sure the timeline remains valid.
+	 * (Only being used for pretty printing, one extra kref shouldn't
+	 * cause a camel stampede!)
+	 */
+	rcu_read_lock();
+	tl = rcu_dereference(rq->timeline);
+	if (!kref_get_unless_zero(&tl->kref))
+		tl = NULL;
+	rcu_read_unlock();
+
+	return tl;
+}
+
+static const char *repr_timer(const struct timer_list *t)
+{
+	if (!READ_ONCE(t->expires))
+		return "inactive";
+
+	if (timer_pending(t))
+		return "active";
+
+	return "expired";
+}
+
 static void intel_engine_print_registers(struct intel_engine_cs *engine,
 					 struct drm_printer *m)
 {
@@ -1254,19 +1311,21 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 		unsigned int idx;
 		u8 read, write;
 
-		drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n",
-			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
-			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
-			   num_entries);
+		drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
+			   yesno(test_bit(TASKLET_STATE_SCHED,
+					  &engine->execlists.tasklet.state)),
+			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)),
+			   repr_timer(&engine->execlists.preempt),
+			   repr_timer(&engine->execlists.timer));
 
 		read = execlists->csb_head;
 		write = READ_ONCE(*execlists->csb_write);
 
-		drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n",
-			   read, write,
-			   yesno(test_bit(TASKLET_STATE_SCHED,
-					  &engine->execlists.tasklet.state)),
-			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
+		drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
+			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
+			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
+			   read, write, num_entries);
+
 		if (read >= num_entries)
 			read = 0;
 		if (write >= num_entries)
@@ -1280,33 +1339,45 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 		}
 
 		execlists_active_lock_bh(execlists);
+		rcu_read_lock();
 		for (port = execlists->active; (rq = *port); port++) {
 			char hdr[80];
 			int len;
 
 			len = snprintf(hdr, sizeof(hdr),
-				       "\t\tActive[%d: ",
+				       "\t\tActive[%d]: ",
 				       (int)(port - execlists->active));
-			if (!i915_request_signaled(rq))
+			if (!i915_request_signaled(rq)) {
+				struct intel_timeline *tl = get_timeline(rq);
+
 				len += snprintf(hdr + len, sizeof(hdr) - len,
 						"ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
 						i915_ggtt_offset(rq->ring->vma),
-						rq->timeline->hwsp_offset,
+						tl ? tl->hwsp_offset : 0,
 						hwsp_seqno(rq));
+
+				if (tl)
+					intel_timeline_put(tl);
+			}
 			snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
 			print_request(m, rq, hdr);
 		}
 		for (port = execlists->pending; (rq = *port); port++) {
+			struct intel_timeline *tl = get_timeline(rq);
 			char hdr[80];
 
 			snprintf(hdr, sizeof(hdr),
 				 "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
 				 (int)(port - execlists->pending),
 				 i915_ggtt_offset(rq->ring->vma),
-				 rq->timeline->hwsp_offset,
+				 tl ? tl->hwsp_offset : 0,
 				 hwsp_seqno(rq));
 			print_request(m, rq, hdr);
+
+			if (tl)
+				intel_timeline_put(tl);
 		}
+		rcu_read_unlock();
 		execlists_active_unlock_bh(execlists);
 	} else if (INTEL_GEN(dev_priv) > 6) {
 		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
@@ -1372,8 +1443,17 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		drm_printf(m, "*** WEDGED ***\n");
 
 	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
-	drm_printf(m, "\tHangcheck: %d ms ago\n",
-		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
+	drm_printf(m, "\tBarriers?: %s\n",
+		   yesno(!llist_empty(&engine->barrier_tasks)));
+	drm_printf(m, "\tLatency: %luus\n",
+		   ewma__engine_latency_read(&engine->latency));
+
+	rcu_read_lock();
+	rq = READ_ONCE(engine->heartbeat.systole);
+	if (rq)
+		drm_printf(m, "\tHeartbeat: %d ms ago\n",
+			   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+	rcu_read_unlock();
 	drm_printf(m, "\tReset count: %d (global %d)\n",
 		   i915_reset_engine_count(error, engine),
 		   i915_reset_count(error));
@@ -1383,6 +1463,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_lock_irqsave(&engine->active.lock, flags);
 	rq = intel_engine_find_active_request(engine);
 	if (rq) {
+		struct intel_timeline *tl = get_timeline(rq);
+
 		print_request(m, rq, "\t\tactive ");
 
 		drm_printf(m, "\t\tring->start:  0x%08x\n",
@@ -1395,18 +1477,27 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 			   rq->ring->emit);
 		drm_printf(m, "\t\tring->space:  0x%08x\n",
 			   rq->ring->space);
-		drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
-			   rq->timeline->hwsp_offset);
+
+		if (tl) {
+			drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
+				   tl->hwsp_offset);
+			intel_timeline_put(tl);
+		}
 
 		print_request_ring(m, rq);
+
+		if (rq->context->lrc_reg_state) {
+			drm_printf(m, "Logical Ring Context:\n");
+			hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
+		}
 	}
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
-	wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm);
+	wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
 	if (wakeref) {
 		intel_engine_print_registers(engine, m);
-		intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref);
+		intel_runtime_pm_put(engine->uncore->rpm, wakeref);
 	} else {
 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
 	}
@@ -1458,7 +1549,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 		for (port = execlists->pending; (rq = *port); port++) {
 			/* Exclude any contexts already counted in active */
-			if (!intel_context_inflight_count(rq->hw_context))
+			if (!intel_context_inflight_count(rq->context))
 				engine->stats.active++;
 		}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
new file mode 100644
index 000000000000..6c6fd185457c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -0,0 +1,242 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_request.h"
+
+#include "intel_context.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_engine.h"
+#include "intel_gt.h"
+#include "intel_reset.h"
+
+/*
+ * While the engine is active, we send a periodic pulse along the engine
+ * to check on its health and to flush any idle-barriers. If that request
+ * is stuck, and we fail to preempt it, we declare the engine hung and
+ * issue a reset -- in the hope that restores progress.
+ */
+
+static bool next_heartbeat(struct intel_engine_cs *engine)
+{
+	long delay;
+
+	delay = READ_ONCE(engine->props.heartbeat_interval_ms);
+	if (!delay)
+		return false;
+
+	delay = msecs_to_jiffies_timeout(delay);
+	if (delay >= HZ)
+		delay = round_jiffies_up_relative(delay);
+	schedule_delayed_work(&engine->heartbeat.work, delay);
+
+	return true;
+}
+
+static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
+{
+	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
+	i915_request_add_active_barriers(rq);
+}
+
+static void show_heartbeat(const struct i915_request *rq,
+			   struct intel_engine_cs *engine)
+{
+	struct drm_printer p = drm_debug_printer("heartbeat");
+
+	intel_engine_dump(engine, &p,
+			  "%s heartbeat {prio:%d} not ticking\n",
+			  engine->name,
+			  rq->sched.attr.priority);
+}
+
+static void heartbeat(struct work_struct *wrk)
+{
+	struct i915_sched_attr attr = {
+		.priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
+	};
+	struct intel_engine_cs *engine =
+		container_of(wrk, typeof(*engine), heartbeat.work.work);
+	struct intel_context *ce = engine->kernel_context;
+	struct i915_request *rq;
+
+	rq = engine->heartbeat.systole;
+	if (rq && i915_request_completed(rq)) {
+		i915_request_put(rq);
+		engine->heartbeat.systole = NULL;
+	}
+
+	if (!intel_engine_pm_get_if_awake(engine))
+		return;
+
+	if (intel_gt_is_wedged(engine->gt))
+		goto out;
+
+	if (engine->heartbeat.systole) {
+		if (engine->schedule &&
+		    rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
+			/*
+			 * Gradually raise the priority of the heartbeat to
+			 * give high priority work [which presumably desires
+			 * low latency and no jitter] the chance to naturally
+			 * complete before being preempted.
+			 */
+			attr.priority = I915_PRIORITY_MASK;
+			if (rq->sched.attr.priority >= attr.priority)
+				attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
+			if (rq->sched.attr.priority >= attr.priority)
+				attr.priority = I915_PRIORITY_BARRIER;
+
+			local_bh_disable();
+			engine->schedule(rq, &attr);
+			local_bh_enable();
+		} else {
+			if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+				show_heartbeat(rq, engine);
+
+			intel_gt_handle_error(engine->gt, engine->mask,
+					      I915_ERROR_CAPTURE,
+					      "stopped heartbeat on %s",
+					      engine->name);
+		}
+		goto out;
+	}
+
+	if (engine->wakeref_serial == engine->serial)
+		goto out;
+
+	mutex_lock(&ce->timeline->mutex);
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+	intel_context_exit(ce);
+	if (IS_ERR(rq))
+		goto unlock;
+
+	idle_pulse(engine, rq);
+	if (i915_modparams.enable_hangcheck)
+		engine->heartbeat.systole = i915_request_get(rq);
+
+	__i915_request_commit(rq);
+	__i915_request_queue(rq, &attr);
+
+unlock:
+	mutex_unlock(&ce->timeline->mutex);
+out:
+	if (!next_heartbeat(engine))
+		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+	intel_engine_pm_put(engine);
+}
+
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
+{
+	if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+		return;
+
+	next_heartbeat(engine);
+}
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
+{
+	if (cancel_delayed_work(&engine->heartbeat.work))
+		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+}
+
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
+{
+	INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
+}
+
+int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
+			       unsigned long delay)
+{
+	int err;
+
+	/* Send one last pulse before to cleanup persistent hogs */
+	if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) {
+		err = intel_engine_pulse(engine);
+		if (err)
+			return err;
+	}
+
+	WRITE_ONCE(engine->props.heartbeat_interval_ms, delay);
+
+	if (intel_engine_pm_get_if_awake(engine)) {
+		if (delay)
+			intel_engine_unpark_heartbeat(engine);
+		else
+			intel_engine_park_heartbeat(engine);
+		intel_engine_pm_put(engine);
+	}
+
+	return 0;
+}
+
+int intel_engine_pulse(struct intel_engine_cs *engine)
+{
+	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
+	struct intel_context *ce = engine->kernel_context;
+	struct i915_request *rq;
+	int err = 0;
+
+	if (!intel_engine_has_preemption(engine))
+		return -ENODEV;
+
+	if (!intel_engine_pm_get_if_awake(engine))
+		return 0;
+
+	if (mutex_lock_interruptible(&ce->timeline->mutex))
+		goto out_rpm;
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+	intel_context_exit(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unlock;
+	}
+
+	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
+	idle_pulse(engine, rq);
+
+	__i915_request_commit(rq);
+	__i915_request_queue(rq, &attr);
+
+out_unlock:
+	mutex_unlock(&ce->timeline->mutex);
+out_rpm:
+	intel_engine_pm_put(engine);
+	return err;
+}
+
+int intel_engine_flush_barriers(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+	int err = 0;
+
+	if (llist_empty(&engine->barrier_tasks))
+		return 0;
+
+	if (!intel_engine_pm_get_if_awake(engine))
+		return 0;
+
+	rq = i915_request_create(engine->kernel_context);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_rpm;
+	}
+
+	idle_pulse(engine, rq);
+	i915_request_add(rq);
+
+out_rpm:
+	intel_engine_pm_put(engine);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_engine_heartbeat.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
new file mode 100644
index 000000000000..a7b8c0f9e005
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -0,0 +1,23 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_HEARTBEAT_H
+#define INTEL_ENGINE_HEARTBEAT_H
+
+struct intel_engine_cs;
+
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine);
+
+int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
+			       unsigned long delay);
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine);
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine);
+
+int intel_engine_pulse(struct intel_engine_cs *engine);
+int intel_engine_flush_barriers(struct intel_engine_cs *engine);
+
+#endif /* INTEL_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 65b5ca74b394..ea90ab3e396e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -6,19 +6,24 @@
 
 #include "i915_drv.h"
 
+#include "intel_context.h"
 #include "intel_engine.h"
+#include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_pool.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
+#include "intel_rc6.h"
+#include "intel_ring.h"
 
 static int __engine_unpark(struct intel_wakeref *wf)
 {
 	struct intel_engine_cs *engine =
 		container_of(wf, typeof(*engine), wakeref);
+	struct intel_context *ce;
 	void *map;
 
-	GEM_TRACE("%s\n", engine->name);
+	ENGINE_TRACE(engine, "\n");
 
 	intel_gt_pm_get(engine->gt);
 
@@ -30,10 +35,31 @@ static int __engine_unpark(struct intel_wakeref *wf)
 	if (!IS_ERR_OR_NULL(map))
 		engine->pinned_default_state = map;
 
+	/* Discard stale context state from across idling */
+	ce = engine->kernel_context;
+	if (ce) {
+		GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
+
+		/* First poison the image to verify we never fully trust it */
+		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
+			struct drm_i915_gem_object *obj = ce->state->obj;
+			int type = i915_coherent_map_type(engine->i915);
+
+			map = i915_gem_object_pin_map(obj, type);
+			if (!IS_ERR(map)) {
+				memset(map, CONTEXT_REDZONE, obj->base.size);
+				i915_gem_object_flush_map(obj);
+				i915_gem_object_unpin_map(obj);
+			}
+		}
+
+		ce->ops->reset(ce);
+	}
+
 	if (engine->unpark)
 		engine->unpark(engine);
 
-	intel_engine_init_hangcheck(engine);
+	intel_engine_unpark_heartbeat(engine);
 	return 0;
 }
 
@@ -52,7 +78,7 @@ static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
 static inline void __timeline_mark_unlock(struct intel_context *ce,
 					  unsigned long flags)
 {
-	mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
+	mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
 	local_irq_restore(flags);
 }
 
@@ -70,20 +96,65 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
 
 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
 
+static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct i915_request *rq = to_request(fence);
+
+	ewma__engine_latency_add(&rq->engine->latency,
+				 ktime_us_delta(rq->fence.timestamp,
+						rq->duration.emitted));
+}
+
+static void
+__queue_and_release_pm(struct i915_request *rq,
+		       struct intel_timeline *tl,
+		       struct intel_engine_cs *engine)
+{
+	struct intel_gt_timelines *timelines = &engine->gt->timelines;
+
+	ENGINE_TRACE(engine, "\n");
+
+	/*
+	 * We have to serialise all potential retirement paths with our
+	 * submission, as we don't want to underflow either the
+	 * engine->wakeref.counter or our timeline->active_count.
+	 *
+	 * Equally, we cannot allow a new submission to start until
+	 * after we finish queueing, nor could we allow that submitter
+	 * to retire us before we are ready!
+	 */
+	spin_lock(&timelines->lock);
+
+	/* Let intel_gt_retire_requests() retire us (acquired under lock) */
+	if (!atomic_fetch_inc(&tl->active_count))
+		list_add_tail(&tl->link, &timelines->active_list);
+
+	/* Hand the request over to HW and so engine_retire() */
+	__i915_request_queue(rq, NULL);
+
+	/* Let new submissions commence (and maybe retire this timeline) */
+	__intel_wakeref_defer_park(&engine->wakeref);
+
+	spin_unlock(&timelines->lock);
+}
+
 static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 {
+	struct intel_context *ce = engine->kernel_context;
 	struct i915_request *rq;
 	unsigned long flags;
 	bool result = true;
 
-	/* Already inside the kernel context, safe to power down. */
-	if (engine->wakeref_serial == engine->serial)
-		return true;
-
 	/* GPU is pointing to the void, as good as in the kernel context. */
 	if (intel_gt_is_wedged(engine->gt))
 		return true;
 
+	GEM_BUG_ON(!intel_context_is_barrier(ce));
+
+	/* Already inside the kernel context, safe to power down. */
+	if (engine->wakeref_serial == engine->serial)
+		return true;
+
 	/*
 	 * Note, we do this without taking the timeline->mutex. We cannot
 	 * as we may be called while retiring the kernel context and so
@@ -95,34 +166,72 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	 * This should hold true as we can only park the engine after
 	 * retiring the last request, thus all rings should be empty and
 	 * all timelines idle.
+	 *
+	 * For unlocking, there are 2 other parties and the GPU who have a
+	 * stake here.
+	 *
+	 * A new gpu user will be waiting on the engine-pm to start their
+	 * engine_unpark. New waiters are predicated on engine->wakeref.count
+	 * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
+	 * engine->wakeref.
+	 *
+	 * The other party is intel_gt_retire_requests(), which is walking the
+	 * list of active timelines looking for completions. Meanwhile as soon
+	 * as we call __i915_request_queue(), the GPU may complete our request.
+	 * Ergo, if we put ourselves on the timelines.active_list
+	 * (se intel_timeline_enter()) before we increment the
+	 * engine->wakeref.count, we may see the request completion and retire
+	 * it causing an undeflow of the engine->wakeref.
 	 */
-	flags = __timeline_mark_lock(engine->kernel_context);
+	flags = __timeline_mark_lock(ce);
+	GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
 
-	rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
+	rq = __i915_request_create(ce, GFP_NOWAIT);
 	if (IS_ERR(rq))
 		/* Context switch failed, hope for the best! Maybe reset? */
 		goto out_unlock;
 
-	intel_timeline_enter(rq->timeline);
-
 	/* Check again on the next retirement. */
 	engine->wakeref_serial = engine->serial + 1;
 	i915_request_add_active_barriers(rq);
 
 	/* Install ourselves as a preemption barrier */
-	rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE;
-	__i915_request_commit(rq);
+	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+	if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
+		/*
+		 * Use an interrupt for precise measurement of duration,
+		 * otherwise we rely on someone else retiring all the requests
+		 * which may delay the signaling (i.e. we will likely wait
+		 * until the background request retirement running every
+		 * second or two).
+		 */
+		BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq));
+		dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
+		rq->duration.emitted = ktime_get();
+	}
 
-	/* Release our exclusive hold on the engine */
-	__intel_wakeref_defer_park(&engine->wakeref);
-	__i915_request_queue(rq, NULL);
+	/* Expose ourselves to the world */
+	__queue_and_release_pm(rq, ce->timeline, engine);
 
 	result = false;
 out_unlock:
-	__timeline_mark_unlock(engine->kernel_context, flags);
+	__timeline_mark_unlock(ce, flags);
 	return result;
 }
 
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+		struct dma_fence_cb *cb =
+			container_of((struct list_head *)node,
+				     typeof(*cb), node);
+
+		cb->func(ERR_PTR(-EAGAIN), cb);
+	}
+}
+
 static int __engine_park(struct intel_wakeref *wf)
 {
 	struct intel_engine_cs *engine =
@@ -140,8 +249,11 @@ static int __engine_park(struct intel_wakeref *wf)
 	if (!switch_to_kernel_context(engine))
 		return -EBUSY;
 
-	GEM_TRACE("%s\n", engine->name);
+	ENGINE_TRACE(engine, "\n");
+
+	call_idle_barriers(engine); /* cleanup after wedging */
 
+	intel_engine_park_heartbeat(engine);
 	intel_engine_disarm_breadcrumbs(engine);
 	intel_engine_pool_park(&engine->pool);
 
@@ -158,7 +270,8 @@ static int __engine_park(struct intel_wakeref *wf)
 
 	engine->execlists.no_priolist = false;
 
-	intel_gt_pm_put(engine->gt);
+	/* While gt calls i915_vma_parked(), we have to break the lock cycle */
+	intel_gt_pm_put_async(engine->gt);
 	return 0;
 }
 
@@ -169,9 +282,10 @@ static const struct intel_wakeref_ops wf_ops = {
 
 void intel_engine_init__pm(struct intel_engine_cs *engine)
 {
-	struct intel_runtime_pm *rpm = &engine->i915->runtime_pm;
+	struct intel_runtime_pm *rpm = engine->uncore->rpm;
 
 	intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
+	intel_engine_init_heartbeat(engine);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 739c50fefcef..e52c2b0cb245 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -7,6 +7,7 @@
 #ifndef INTEL_ENGINE_PM_H
 #define INTEL_ENGINE_PM_H
 
+#include "i915_request.h"
 #include "intel_engine_types.h"
 #include "intel_wakeref.h"
 
@@ -31,6 +32,36 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
 	intel_wakeref_put(&engine->wakeref);
 }
 
+static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
+{
+	intel_wakeref_put_async(&engine->wakeref);
+}
+
+static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
+{
+	intel_wakeref_unlock_wait(&engine->wakeref);
+}
+
+static inline struct i915_request *
+intel_engine_create_kernel_request(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	/*
+	 * The engine->kernel_context is special as it is used inside
+	 * the engine-pm barrier (see __engine_park()), circumventing
+	 * the usual mutexes and relying on the engine-pm barrier
+	 * instead. So whenever we use the engine->kernel_context
+	 * outside of the barrier, we must manually handle the
+	 * engine wakeref to serialise with the use inside.
+	 */
+	intel_engine_pm_get(engine);
+	rq = i915_request_create(engine->kernel_context);
+	intel_engine_pm_put(engine);
+
+	return rq;
+}
+
 void intel_engine_init__pm(struct intel_engine_cs *engine);
 
 #endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 379a91780bd4..397186818305 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -61,6 +61,7 @@ static int pool_active(struct i915_active *ref)
 	return 0;
 }
 
+__i915_active_call
 static void pool_retire(struct i915_active *ref)
 {
 	struct intel_engine_pool_node *node =
@@ -94,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
 		return ERR_PTR(-ENOMEM);
 
 	node->pool = pool;
-	i915_active_init(engine->i915, &node->active, pool_active, pool_retire);
+	i915_active_init(&node->active, pool_active, pool_retire);
 
 	obj = i915_gem_object_create_internal(engine->i915, sz);
 	if (IS_ERR(obj)) {
@@ -109,9 +110,19 @@ node_create(struct intel_engine_pool *pool, size_t sz)
 	return node;
 }
 
+static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine)
+{
+	if (intel_engine_is_virtual(engine))
+		engine = intel_virtual_engine_get_sibling(engine, 0);
+
+	GEM_BUG_ON(!engine);
+	return &engine->pool;
+}
+
 struct intel_engine_pool_node *
-intel_engine_pool_get(struct intel_engine_pool *pool, size_t size)
+intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
 {
+	struct intel_engine_pool *pool = lookup_pool(engine);
 	struct intel_engine_pool_node *node;
 	struct list_head *list;
 	unsigned long flags;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
index 8d069efd9457..1bd89cadc3b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
@@ -12,13 +12,13 @@
 #include "i915_request.h"
 
 struct intel_engine_pool_node *
-intel_engine_pool_get(struct intel_engine_pool *pool, size_t size);
+intel_engine_get_pool(struct intel_engine_cs *engine, size_t size);
 
 static inline int
 intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
 			      struct i915_request *rq)
 {
-	return i915_active_ref(&node->active, rq->timeline, rq);
+	return i915_active_add_request(&node->active, rq);
 }
 
 static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9dd8c299cb2d..350da59e605b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -7,6 +7,7 @@
 #ifndef __INTEL_ENGINE_TYPES__
 #define __INTEL_ENGINE_TYPES__
 
+#include <linux/average.h>
 #include <linux/hashtable.h>
 #include <linux/irq_work.h>
 #include <linux/kref.h>
@@ -15,6 +16,7 @@
 #include <linux/rbtree.h>
 #include <linux/timer.h>
 #include <linux/types.h>
+#include <linux/workqueue.h>
 
 #include "i915_gem.h"
 #include "i915_pmu.h"
@@ -58,6 +60,7 @@ struct i915_gem_context;
 struct i915_request;
 struct i915_sched_attr;
 struct intel_gt;
+struct intel_ring;
 struct intel_uncore;
 
 typedef u8 intel_engine_mask_t;
@@ -76,40 +79,6 @@ struct intel_instdone {
 	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
 };
 
-struct intel_engine_hangcheck {
-	u64 acthd;
-	u32 last_ring;
-	u32 last_head;
-	unsigned long action_timestamp;
-	struct intel_instdone instdone;
-};
-
-struct intel_ring {
-	struct kref ref;
-	struct i915_vma *vma;
-	void *vaddr;
-
-	/*
-	 * As we have two types of rings, one global to the engine used
-	 * by ringbuffer submission and those that are exclusive to a
-	 * context used by execlists, we have to play safe and allow
-	 * atomic updates to the pin_count. However, the actual pinning
-	 * of the context is either done during initialisation for
-	 * ringbuffer submission or serialised as part of the context
-	 * pinning for execlists, and so we do not need a mutex ourselves
-	 * to serialise intel_ring_pin/intel_ring_unpin.
-	 */
-	atomic_t pin_count;
-
-	u32 head;
-	u32 tail;
-	u32 emit;
-
-	u32 space;
-	u32 size;
-	u32 effective_size;
-};
-
 /*
  * we use a single page to load ctx workarounds so all of these
  * values are referred in terms of dwords
@@ -148,8 +117,12 @@ enum intel_engine_id {
 	VECS1,
 #define _VECS(n) (VECS0 + (n))
 	I915_NUM_ENGINES
+#define INVALID_ENGINE ((enum intel_engine_id)-1)
 };
 
+/* A simple estimator for the round-trip latency of an engine */
+DECLARE_EWMA(_engine_latency, 6, 4)
+
 struct st_preempt_hang {
 	struct completion completion;
 	unsigned int count;
@@ -174,6 +147,11 @@ struct intel_engine_execlists {
 	struct timer_list timer;
 
 	/**
+	 * @preempt: reset the current context if it fails to give way
+	 */
+	struct timer_list preempt;
+
+	/**
 	 * @default_priolist: priority list for I915_PRIORITY_NORMAL
 	 */
 	struct i915_priolist default_priolist;
@@ -300,13 +278,15 @@ struct intel_engine_cs {
 	u8 class;
 	u8 instance;
 
-	u8 uabi_class;
-	u8 uabi_instance;
+	u16 uabi_class;
+	u16 uabi_instance;
 
+	u32 uabi_capabilities;
 	u32 context_size;
 	u32 mmio_base;
 
-	u32 uabi_capabilities;
+	unsigned int context_tag;
+#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
 
 	struct rb_node uabi_node;
 
@@ -323,6 +303,11 @@ struct intel_engine_cs {
 
 	intel_engine_mask_t saturated; /* submitting semaphores too late? */
 
+	struct {
+		struct delayed_work work;
+		struct i915_request *systole;
+	} heartbeat;
+
 	unsigned long serial;
 
 	unsigned long wakeref_serial;
@@ -335,6 +320,13 @@ struct intel_engine_cs {
 		struct intel_timeline *timeline;
 	} legacy;
 
+	/*
+	 * We track the average duration of the idle pulse on parking the
+	 * engine to keep an estimate of the how the fast the engine is
+	 * under ideal conditions.
+	 */
+	struct ewma__engine_latency latency;
+
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
 	 * heavyweight seqno dance, we delegate the task (of being the
@@ -408,7 +400,10 @@ struct intel_engine_cs {
 
 	struct {
 		void (*prepare)(struct intel_engine_cs *engine);
-		void (*reset)(struct intel_engine_cs *engine, bool stalled);
+
+		void (*rewind)(struct intel_engine_cs *engine, bool stalled);
+		void (*cancel)(struct intel_engine_cs *engine);
+
 		void (*finish)(struct intel_engine_cs *engine);
 	} reset;
 
@@ -458,29 +453,28 @@ struct intel_engine_cs {
 	void		(*schedule)(struct i915_request *request,
 				    const struct i915_sched_attr *attr);
 
-	/*
-	 * Cancel all requests on the hardware, or queued for execution.
-	 * This should only cancel the ready requests that have been
-	 * submitted to the engine (via the engine->submit_request callback).
-	 * This is called when marking the device as wedged.
-	 */
-	void		(*cancel_requests)(struct intel_engine_cs *engine);
-
-	void		(*destroy)(struct intel_engine_cs *engine);
+	void		(*release)(struct intel_engine_cs *engine);
 
 	struct intel_engine_execlists execlists;
 
+	/*
+	 * Keep track of completed timelines on this engine for early
+	 * retirement with the goal of quickly enabling powersaving as
+	 * soon as the engine is idle.
+	 */
+	struct intel_timeline *retire;
+	struct work_struct retire_work;
+
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
-	struct intel_engine_hangcheck hangcheck;
-
 #define I915_ENGINE_USING_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
 #define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
 	unsigned int flags;
 
@@ -539,6 +533,13 @@ struct intel_engine_cs {
 		 */
 		ktime_t total;
 	} stats;
+
+	struct {
+		unsigned long heartbeat_interval_ms;
+		unsigned long preempt_timeout_ms;
+		unsigned long stop_timeout_ms;
+		unsigned long timeslice_duration_ms;
+	} props;
 };
 
 static inline bool
@@ -583,20 +584,24 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_IS_VIRTUAL;
 }
 
-#define instdone_slice_mask(dev_priv__) \
-	(IS_GEN(dev_priv__, 7) ? \
-	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
+static inline bool
+intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
+{
+	return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO;
+}
 
-#define instdone_subslice_mask(dev_priv__) \
-	(IS_GEN(dev_priv__, 7) ? \
-	 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
+#define instdone_has_slice(dev_priv___, sseu___, slice___) \
+	((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
 
-#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
-	for ((slice__) = 0, (subslice__) = 0; \
-	     (slice__) < I915_MAX_SLICES; \
-	     (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
-	       (slice__) += ((subslice__) == 0)) \
-		for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
-			    (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
+#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \
+	(IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \
+	 intel_sseu_has_subslice(sseu__, 0, subslice__))
 
+#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \
+	for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \
+	     (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \
+	     (slice_) += ((subslice_) == 0)) \
+		for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \
+			    (instdone_has_subslice(dev_priv_, sseu_, slice_, \
+						    subslice_)))
 #endif /* __INTEL_ENGINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 77cd5de83930..9e7f12bef828 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -11,6 +11,7 @@
 #include "i915_drv.h"
 #include "intel_engine.h"
 #include "intel_engine_user.h"
+#include "intel_gt.h"
 
 struct intel_engine_cs *
 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
@@ -160,10 +161,10 @@ static int legacy_ring_idx(const struct legacy_ring *ring)
 	};
 
 	if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map)))
-		return -1;
+		return INVALID_ENGINE;
 
 	if (GEM_DEBUG_WARN_ON(ring->instance >= map[ring->class].max))
-		return -1;
+		return INVALID_ENGINE;
 
 	return map[ring->class].base + ring->instance;
 }
@@ -171,23 +172,15 @@ static int legacy_ring_idx(const struct legacy_ring *ring)
 static void add_legacy_ring(struct legacy_ring *ring,
 			    struct intel_engine_cs *engine)
 {
-	int idx;
-
 	if (engine->gt != ring->gt || engine->class != ring->class) {
 		ring->gt = engine->gt;
 		ring->class = engine->class;
 		ring->instance = 0;
 	}
 
-	idx = legacy_ring_idx(ring);
-	if (unlikely(idx == -1))
-		return;
-
-	GEM_BUG_ON(idx >= ARRAY_SIZE(ring->gt->engine));
-	ring->gt->engine[idx] = engine;
-	ring->instance++;
-
-	engine->legacy_idx = idx;
+	engine->legacy_idx = legacy_ring_idx(ring);
+	if (engine->legacy_idx != INVALID_ENGINE)
+		ring->instance++;
 }
 
 void intel_engines_driver_register(struct drm_i915_private *i915)
@@ -208,6 +201,9 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
 				     uabi_node);
 		char old[sizeof(engine->name)];
 
+		if (intel_gt_has_init_error(engine->gt))
+			continue; /* ignore incomplete engines */
+
 		GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
 		engine->uabi_class = uabi_classes[engine->class];
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
new file mode 100644
index 000000000000..531d501be01f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -0,0 +1,1486 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/stop_machine.h>
+
+#include <asm/set_memory.h>
+#include <asm/smp.h>
+
+#include "intel_gt.h"
+#include "i915_drv.h"
+#include "i915_scatterlist.h"
+#include "i915_vgpu.h"
+
+#include "intel_gtt.h"
+
+static int
+i915_get_ggtt_vma_pages(struct i915_vma *vma);
+
+static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
+				   unsigned long color,
+				   u64 *start,
+				   u64 *end)
+{
+	if (i915_node_color_differs(node, color))
+		*start += I915_GTT_PAGE_SIZE;
+
+	/*
+	 * Also leave a space between the unallocated reserved node after the
+	 * GTT and any objects within the GTT, i.e. we use the color adjustment
+	 * to insert a guard page to prevent prefetches crossing over the
+	 * GTT boundary.
+	 */
+	node = list_next_entry(node, node_list);
+	if (node->color != color)
+		*end -= I915_GTT_PAGE_SIZE;
+}
+
+static int ggtt_init_hw(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+
+	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
+
+	ggtt->vm.is_ggtt = true;
+
+	/* Only VLV supports read-only GGTT mappings */
+	ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
+
+	if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
+		ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
+
+	if (ggtt->mappable_end) {
+		if (!io_mapping_init_wc(&ggtt->iomap,
+					ggtt->gmadr.start,
+					ggtt->mappable_end)) {
+			ggtt->vm.cleanup(&ggtt->vm);
+			return -EIO;
+		}
+
+		ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
+					      ggtt->mappable_end);
+	}
+
+	i915_ggtt_init_fences(ggtt);
+
+	return 0;
+}
+
+/**
+ * i915_ggtt_init_hw - Initialize GGTT hardware
+ * @i915: i915 device
+ */
+int i915_ggtt_init_hw(struct drm_i915_private *i915)
+{
+	int ret;
+
+	stash_init(&i915->mm.wc_stash);
+
+	/*
+	 * Note that we use page colouring to enforce a guard page at the
+	 * end of the address space. This is required as the CS may prefetch
+	 * beyond the end of the batch buffer, across the page boundary,
+	 * and beyond the end of the GTT if we do not provide a guard.
+	 */
+	ret = ggtt_init_hw(&i915->ggtt);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Certain Gen5 chipsets require require idling the GPU before
+ * unmapping anything from the GTT when VT-d is enabled.
+ */
+static bool needs_idle_maps(struct drm_i915_private *i915)
+{
+	/*
+	 * Query intel_iommu to see if we need the workaround. Presumably that
+	 * was loaded first.
+	 */
+	return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active();
+}
+
+static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+
+	/*
+	 * Don't bother messing with faults pre GEN6 as we have little
+	 * documentation supporting that it's a good idea.
+	 */
+	if (INTEL_GEN(i915) < 6)
+		return;
+
+	intel_gt_check_and_clear_faults(ggtt->vm.gt);
+
+	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
+
+	ggtt->invalidate(ggtt);
+}
+
+void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
+{
+	ggtt_suspend_mappings(&i915->ggtt);
+}
+
+void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+
+	spin_lock_irq(&uncore->lock);
+	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
+	spin_unlock_irq(&uncore->lock);
+}
+
+static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+
+	/*
+	 * Note that as an uncached mmio write, this will flush the
+	 * WCB of the writes into the GGTT before it triggers the invalidate.
+	 */
+	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+}
+
+static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+
+	gen8_ggtt_invalidate(ggtt);
+
+	if (INTEL_GEN(i915) >= 12)
+		intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
+				      GEN12_GUC_TLB_INV_CR_INVALIDATE);
+	else
+		intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+}
+
+static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+	intel_gtt_chipset_flush();
+}
+
+static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
+{
+	writeq(pte, addr);
+}
+
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  u64 offset,
+				  enum i915_cache_level level,
+				  u32 unused)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen8_pte_t __iomem *pte =
+		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+	gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
+
+	ggtt->invalidate(ggtt);
+}
+
+static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+				     struct i915_vma *vma,
+				     enum i915_cache_level level,
+				     u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	struct sgt_iter sgt_iter;
+	gen8_pte_t __iomem *gtt_entries;
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
+	dma_addr_t addr;
+
+	/*
+	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
+	 * not to allow the user to override access to a read only page.
+	 */
+
+	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
+	gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
+	for_each_sgt_daddr(addr, sgt_iter, vma->pages)
+		gen8_set_pte(gtt_entries++, pte_encode | addr);
+
+	/*
+	 * We want to flush the TLBs only after we're certain all the PTE
+	 * updates have finished.
+	 */
+	ggtt->invalidate(ggtt);
+}
+
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  u64 offset,
+				  enum i915_cache_level level,
+				  u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen6_pte_t __iomem *pte =
+		(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+	iowrite32(vm->pte_encode(addr, level, flags), pte);
+
+	ggtt->invalidate(ggtt);
+}
+
+/*
+ * Binds an object into the global gtt with the specified cache level.
+ * The object will be accessible to the GPU via commands whose operands
+ * reference offsets within the global GTT as well as accessible by the GPU
+ * through the GMADR mapped BAR (i915->mm.gtt->gtt).
+ */
+static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
+				     struct i915_vma *vma,
+				     enum i915_cache_level level,
+				     u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
+	unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
+	struct sgt_iter iter;
+	dma_addr_t addr;
+
+	for_each_sgt_daddr(addr, iter, vma->pages)
+		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
+
+	/*
+	 * We want to flush the TLBs only after we're certain all the PTE
+	 * updates have finished.
+	 */
+	ggtt->invalidate(ggtt);
+}
+
+static void nop_clear_range(struct i915_address_space *vm,
+			    u64 start, u64 length)
+{
+}
+
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+				  u64 start, u64 length)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+	const gen8_pte_t scratch_pte = vm->scratch[0].encode;
+	gen8_pte_t __iomem *gtt_base =
+		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+	int i;
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	for (i = 0; i < num_entries; i++)
+		gen8_set_pte(&gtt_base[i], scratch_pte);
+}
+
+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+	/*
+	 * Make sure the internal GAM fifo has been cleared of all GTT
+	 * writes before exiting stop_machine(). This guarantees that
+	 * any aperture accesses waiting to start in another process
+	 * cannot back up behind the GTT writes causing a hang.
+	 * The register can be any arbitrary GAM register.
+	 */
+	intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
+}
+
+struct insert_page {
+	struct i915_address_space *vm;
+	dma_addr_t addr;
+	u64 offset;
+	enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+	struct insert_page *arg = _arg;
+
+	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+					  dma_addr_t addr,
+					  u64 offset,
+					  enum i915_cache_level level,
+					  u32 unused)
+{
+	struct insert_page arg = { vm, addr, offset, level };
+
+	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+struct insert_entries {
+	struct i915_address_space *vm;
+	struct i915_vma *vma;
+	enum i915_cache_level level;
+	u32 flags;
+};
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+	struct insert_entries *arg = _arg;
+
+	gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+					     struct i915_vma *vma,
+					     enum i915_cache_level level,
+					     u32 flags)
+{
+	struct insert_entries arg = { vm, vma, level, flags };
+
+	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+struct clear_range {
+	struct i915_address_space *vm;
+	u64 start;
+	u64 length;
+};
+
+static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
+{
+	struct clear_range *arg = _arg;
+
+	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
+					  u64 start,
+					  u64 length)
+{
+	struct clear_range arg = { vm, start, length };
+
+	stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
+}
+
+static void gen6_ggtt_clear_range(struct i915_address_space *vm,
+				  u64 start, u64 length)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+	gen6_pte_t scratch_pte, __iomem *gtt_base =
+		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
+	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+	int i;
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	scratch_pte = vm->scratch[0].encode;
+	for (i = 0; i < num_entries; i++)
+		iowrite32(scratch_pte, &gtt_base[i]);
+}
+
+static void i915_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  u64 offset,
+				  enum i915_cache_level cache_level,
+				  u32 unused)
+{
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+}
+
+static void i915_ggtt_insert_entries(struct i915_address_space *vm,
+				     struct i915_vma *vma,
+				     enum i915_cache_level cache_level,
+				     u32 unused)
+{
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+	intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
+				    flags);
+}
+
+static void i915_ggtt_clear_range(struct i915_address_space *vm,
+				  u64 start, u64 length)
+{
+	intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
+}
+
+static int ggtt_bind_vma(struct i915_vma *vma,
+			 enum i915_cache_level cache_level,
+			 u32 flags)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	u32 pte_flags;
+
+	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
+	pte_flags = 0;
+	if (i915_gem_object_is_readonly(obj))
+		pte_flags |= PTE_READ_ONLY;
+
+	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+
+	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+
+	/*
+	 * Without aliasing PPGTT there's no difference between
+	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
+	 * upgrade to both bound if we bind either to avoid double-binding.
+	 */
+	atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
+
+	return 0;
+}
+
+static void ggtt_unbind_vma(struct i915_vma *vma)
+{
+	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+}
+
+static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
+{
+	u64 size;
+	int ret;
+
+	if (!USES_GUC(ggtt->vm.i915))
+		return 0;
+
+	GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
+	size = ggtt->vm.total - GUC_GGTT_TOP;
+
+	ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
+				   GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
+				   PIN_NOEVICT);
+	if (ret)
+		DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
+
+	return ret;
+}
+
+static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
+{
+	if (drm_mm_node_allocated(&ggtt->uc_fw))
+		drm_mm_remove_node(&ggtt->uc_fw);
+}
+
+static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
+{
+	ggtt_release_guc_top(ggtt);
+	if (drm_mm_node_allocated(&ggtt->error_capture))
+		drm_mm_remove_node(&ggtt->error_capture);
+	mutex_destroy(&ggtt->error_mutex);
+}
+
+static int init_ggtt(struct i915_ggtt *ggtt)
+{
+	/*
+	 * Let GEM Manage all of the aperture.
+	 *
+	 * However, leave one page at the end still bound to the scratch page.
+	 * There are a number of places where the hardware apparently prefetches
+	 * past the end of the object, and we've seen multiple hangs with the
+	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
+	 * aperture.  One page should be enough to keep any prefetching inside
+	 * of the aperture.
+	 */
+	unsigned long hole_start, hole_end;
+	struct drm_mm_node *entry;
+	int ret;
+
+	/*
+	 * GuC requires all resources that we're sharing with it to be placed in
+	 * non-WOPCM memory. If GuC is not present or not in use we still need a
+	 * small bias as ring wraparound at offset 0 sometimes hangs. No idea
+	 * why.
+	 */
+	ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
+			       intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
+
+	ret = intel_vgt_balloon(ggtt);
+	if (ret)
+		return ret;
+
+	mutex_init(&ggtt->error_mutex);
+	if (ggtt->mappable_end) {
+		/* Reserve a mappable slot for our lockless error capture */
+		ret = drm_mm_insert_node_in_range(&ggtt->vm.mm,
+						  &ggtt->error_capture,
+						  PAGE_SIZE, 0,
+						  I915_COLOR_UNEVICTABLE,
+						  0, ggtt->mappable_end,
+						  DRM_MM_INSERT_LOW);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * The upper portion of the GuC address space has a sizeable hole
+	 * (several MB) that is inaccessible by GuC. Reserve this range within
+	 * GGTT as it can comfortably hold GuC/HuC firmware images.
+	 */
+	ret = ggtt_reserve_guc_top(ggtt);
+	if (ret)
+		goto err;
+
+	/* Clear any non-preallocated blocks */
+	drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
+		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
+			      hole_start, hole_end);
+		ggtt->vm.clear_range(&ggtt->vm, hole_start,
+				     hole_end - hole_start);
+	}
+
+	/* And finally clear the reserved guard page */
+	ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
+
+	return 0;
+
+err:
+	cleanup_init_ggtt(ggtt);
+	return ret;
+}
+
+static int aliasing_gtt_bind_vma(struct i915_vma *vma,
+				 enum i915_cache_level cache_level,
+				 u32 flags)
+{
+	u32 pte_flags;
+	int ret;
+
+	/* Currently applicable only to VLV */
+	pte_flags = 0;
+	if (i915_gem_object_is_readonly(vma->obj))
+		pte_flags |= PTE_READ_ONLY;
+
+	if (flags & I915_VMA_LOCAL_BIND) {
+		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
+
+		if (flags & I915_VMA_ALLOC) {
+			ret = alias->vm.allocate_va_range(&alias->vm,
+							  vma->node.start,
+							  vma->size);
+			if (ret)
+				return ret;
+
+			set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
+		}
+
+		GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
+				     __i915_vma_flags(vma)));
+		alias->vm.insert_entries(&alias->vm, vma,
+					 cache_level, pte_flags);
+	}
+
+	if (flags & I915_VMA_GLOBAL_BIND)
+		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+
+	return 0;
+}
+
+static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
+{
+	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
+		struct i915_address_space *vm = vma->vm;
+
+		vm->clear_range(vm, vma->node.start, vma->size);
+	}
+
+	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
+		struct i915_address_space *vm =
+			&i915_vm_to_ggtt(vma->vm)->alias->vm;
+
+		vm->clear_range(vm, vma->node.start, vma->size);
+	}
+}
+
+static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
+{
+	struct i915_ppgtt *ppgtt;
+	int err;
+
+	ppgtt = i915_ppgtt_create(ggtt->vm.gt);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
+
+	if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
+		err = -ENODEV;
+		goto err_ppgtt;
+	}
+
+	/*
+	 * Note we only pre-allocate as far as the end of the global
+	 * GTT. On 48b / 4-level page-tables, the difference is very,
+	 * very significant! We have to preallocate as GVT/vgpu does
+	 * not like the page directory disappearing.
+	 */
+	err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
+	if (err)
+		goto err_ppgtt;
+
+	ggtt->alias = ppgtt;
+	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
+
+	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
+	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
+
+	GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
+	ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
+
+	return 0;
+
+err_ppgtt:
+	i915_vm_put(&ppgtt->vm);
+	return err;
+}
+
+static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
+{
+	struct i915_ppgtt *ppgtt;
+
+	ppgtt = fetch_and_zero(&ggtt->alias);
+	if (!ppgtt)
+		return;
+
+	i915_vm_put(&ppgtt->vm);
+
+	ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
+	ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+}
+
+int i915_init_ggtt(struct drm_i915_private *i915)
+{
+	int ret;
+
+	ret = init_ggtt(&i915->ggtt);
+	if (ret)
+		return ret;
+
+	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
+		ret = init_aliasing_ppgtt(&i915->ggtt);
+		if (ret)
+			cleanup_init_ggtt(&i915->ggtt);
+	}
+
+	return 0;
+}
+
+static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
+{
+	struct i915_vma *vma, *vn;
+
+	atomic_set(&ggtt->vm.open, 0);
+
+	rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
+	flush_workqueue(ggtt->vm.i915->wq);
+
+	mutex_lock(&ggtt->vm.mutex);
+
+	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
+		WARN_ON(__i915_vma_unbind(vma));
+
+	if (drm_mm_node_allocated(&ggtt->error_capture))
+		drm_mm_remove_node(&ggtt->error_capture);
+	mutex_destroy(&ggtt->error_mutex);
+
+	ggtt_release_guc_top(ggtt);
+	intel_vgt_deballoon(ggtt);
+
+	ggtt->vm.cleanup(&ggtt->vm);
+
+	mutex_unlock(&ggtt->vm.mutex);
+	i915_address_space_fini(&ggtt->vm);
+
+	arch_phys_wc_del(ggtt->mtrr);
+
+	if (ggtt->iomap.size)
+		io_mapping_fini(&ggtt->iomap);
+}
+
+/**
+ * i915_ggtt_driver_release - Clean up GGTT hardware initialization
+ * @i915: i915 device
+ */
+void i915_ggtt_driver_release(struct drm_i915_private *i915)
+{
+	struct pagevec *pvec;
+
+	fini_aliasing_ppgtt(&i915->ggtt);
+
+	ggtt_cleanup_hw(&i915->ggtt);
+
+	pvec = &i915->mm.wc_stash.pvec;
+	if (pvec->nr) {
+		set_pages_array_wb(pvec->pages, pvec->nr);
+		__pagevec_release(pvec);
+	}
+}
+
+static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
+{
+	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
+	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
+	return snb_gmch_ctl << 20;
+}
+
+static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
+{
+	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
+	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
+	if (bdw_gmch_ctl)
+		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
+
+#ifdef CONFIG_X86_32
+	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
+	if (bdw_gmch_ctl > 4)
+		bdw_gmch_ctl = 4;
+#endif
+
+	return bdw_gmch_ctl << 20;
+}
+
+static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
+{
+	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
+	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
+
+	if (gmch_ctrl)
+		return 1 << (20 + gmch_ctrl);
+
+	return 0;
+}
+
+static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	struct pci_dev *pdev = i915->drm.pdev;
+	phys_addr_t phys_addr;
+	int ret;
+
+	/* For Modern GENs the PTEs and register space are split in the BAR */
+	phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
+
+	/*
+	 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
+	 * will be dropped. For WC mappings in general we have 64 byte burst
+	 * writes when the WC buffer is flushed, so we can't use it, but have to
+	 * resort to an uncached mapping. The WC issue is easily caught by the
+	 * readback check when writing GTT PTE entries.
+	 */
+	if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10)
+		ggtt->gsm = ioremap(phys_addr, size);
+	else
+		ggtt->gsm = ioremap_wc(phys_addr, size);
+	if (!ggtt->gsm) {
+		DRM_ERROR("Failed to map the ggtt page table\n");
+		return -ENOMEM;
+	}
+
+	ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
+	if (ret) {
+		DRM_ERROR("Scratch setup failed\n");
+		/* iounmap will also get called at remove, but meh */
+		iounmap(ggtt->gsm);
+		return ret;
+	}
+
+	ggtt->vm.scratch[0].encode =
+		ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
+				    I915_CACHE_NONE, 0);
+
+	return 0;
+}
+
+int ggtt_set_pages(struct i915_vma *vma)
+{
+	int ret;
+
+	GEM_BUG_ON(vma->pages);
+
+	ret = i915_get_ggtt_vma_pages(vma);
+	if (ret)
+		return ret;
+
+	vma->page_sizes = vma->obj->mm.page_sizes;
+
+	return 0;
+}
+
+static void gen6_gmch_remove(struct i915_address_space *vm)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+	iounmap(ggtt->gsm);
+	cleanup_scratch_page(vm);
+}
+
+static struct resource pci_resource(struct pci_dev *pdev, int bar)
+{
+	return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
+					       pci_resource_len(pdev, bar));
+}
+
+static int gen8_gmch_probe(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	struct pci_dev *pdev = i915->drm.pdev;
+	unsigned int size;
+	u16 snb_gmch_ctl;
+	int err;
+
+	/* TODO: We're not aware of mappable constraints on gen8 yet */
+	if (!IS_DGFX(i915)) {
+		ggtt->gmadr = pci_resource(pdev, 2);
+		ggtt->mappable_end = resource_size(&ggtt->gmadr);
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
+	if (!err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
+	if (err)
+		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
+
+	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+	if (IS_CHERRYVIEW(i915))
+		size = chv_get_total_gtt_size(snb_gmch_ctl);
+	else
+		size = gen8_get_total_gtt_size(snb_gmch_ctl);
+
+	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
+	ggtt->vm.cleanup = gen6_gmch_remove;
+	ggtt->vm.insert_page = gen8_ggtt_insert_page;
+	ggtt->vm.clear_range = nop_clear_range;
+	if (intel_scanout_needs_vtd_wa(i915))
+		ggtt->vm.clear_range = gen8_ggtt_clear_range;
+
+	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
+
+	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
+	if (intel_ggtt_update_needs_vtd_wa(i915) ||
+	    IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
+		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
+		if (ggtt->vm.clear_range != nop_clear_range)
+			ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+	}
+
+	ggtt->invalidate = gen8_ggtt_invalidate;
+
+	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
+	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
+	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
+	ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+	ggtt->vm.pte_encode = gen8_pte_encode;
+
+	setup_private_pat(ggtt->vm.gt->uncore);
+
+	return ggtt_probe_common(ggtt, size);
+}
+
+static u64 snb_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	switch (level) {
+	case I915_CACHE_L3_LLC:
+	case I915_CACHE_LLC:
+		pte |= GEN6_PTE_CACHE_LLC;
+		break;
+	case I915_CACHE_NONE:
+		pte |= GEN6_PTE_UNCACHED;
+		break;
+	default:
+		MISSING_CASE(level);
+	}
+
+	return pte;
+}
+
+static u64 ivb_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	switch (level) {
+	case I915_CACHE_L3_LLC:
+		pte |= GEN7_PTE_CACHE_L3_LLC;
+		break;
+	case I915_CACHE_LLC:
+		pte |= GEN6_PTE_CACHE_LLC;
+		break;
+	case I915_CACHE_NONE:
+		pte |= GEN6_PTE_UNCACHED;
+		break;
+	default:
+		MISSING_CASE(level);
+	}
+
+	return pte;
+}
+
+static u64 byt_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	if (!(flags & PTE_READ_ONLY))
+		pte |= BYT_PTE_WRITEABLE;
+
+	if (level != I915_CACHE_NONE)
+		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
+
+	return pte;
+}
+
+static u64 hsw_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	if (level != I915_CACHE_NONE)
+		pte |= HSW_WB_LLC_AGE3;
+
+	return pte;
+}
+
+static u64 iris_pte_encode(dma_addr_t addr,
+			   enum i915_cache_level level,
+			   u32 flags)
+{
+	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		break;
+	case I915_CACHE_WT:
+		pte |= HSW_WT_ELLC_LLC_AGE3;
+		break;
+	default:
+		pte |= HSW_WB_ELLC_LLC_AGE3;
+		break;
+	}
+
+	return pte;
+}
+
+static int gen6_gmch_probe(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	struct pci_dev *pdev = i915->drm.pdev;
+	unsigned int size;
+	u16 snb_gmch_ctl;
+	int err;
+
+	ggtt->gmadr = pci_resource(pdev, 2);
+	ggtt->mappable_end = resource_size(&ggtt->gmadr);
+
+	/*
+	 * 64/512MB is the current min/max we actually know of, but this is
+	 * just a coarse sanity check.
+	 */
+	if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
+		DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
+		return -ENXIO;
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
+	if (!err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
+	if (err)
+		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
+	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+
+	size = gen6_get_total_gtt_size(snb_gmch_ctl);
+	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
+
+	ggtt->vm.clear_range = nop_clear_range;
+	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
+		ggtt->vm.clear_range = gen6_ggtt_clear_range;
+	ggtt->vm.insert_page = gen6_ggtt_insert_page;
+	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
+	ggtt->vm.cleanup = gen6_gmch_remove;
+
+	ggtt->invalidate = gen6_ggtt_invalidate;
+
+	if (HAS_EDRAM(i915))
+		ggtt->vm.pte_encode = iris_pte_encode;
+	else if (IS_HASWELL(i915))
+		ggtt->vm.pte_encode = hsw_pte_encode;
+	else if (IS_VALLEYVIEW(i915))
+		ggtt->vm.pte_encode = byt_pte_encode;
+	else if (INTEL_GEN(i915) >= 7)
+		ggtt->vm.pte_encode = ivb_pte_encode;
+	else
+		ggtt->vm.pte_encode = snb_pte_encode;
+
+	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
+	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
+	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
+	ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+	return ggtt_probe_common(ggtt, size);
+}
+
+static void i915_gmch_remove(struct i915_address_space *vm)
+{
+	intel_gmch_remove();
+}
+
+static int i915_gmch_probe(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	phys_addr_t gmadr_base;
+	int ret;
+
+	ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL);
+	if (!ret) {
+		DRM_ERROR("failed to set up gmch\n");
+		return -EIO;
+	}
+
+	intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
+
+	ggtt->gmadr =
+		(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
+
+	ggtt->do_idle_maps = needs_idle_maps(i915);
+	ggtt->vm.insert_page = i915_ggtt_insert_page;
+	ggtt->vm.insert_entries = i915_ggtt_insert_entries;
+	ggtt->vm.clear_range = i915_ggtt_clear_range;
+	ggtt->vm.cleanup = i915_gmch_remove;
+
+	ggtt->invalidate = gmch_ggtt_invalidate;
+
+	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
+	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
+	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
+	ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+	if (unlikely(ggtt->do_idle_maps))
+		dev_notice(i915->drm.dev,
+			   "Applying Ironlake quirks for intel_iommu\n");
+
+	return 0;
+}
+
+static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	int ret;
+
+	ggtt->vm.gt = gt;
+	ggtt->vm.i915 = i915;
+	ggtt->vm.dma = &i915->drm.pdev->dev;
+
+	if (INTEL_GEN(i915) <= 5)
+		ret = i915_gmch_probe(ggtt);
+	else if (INTEL_GEN(i915) < 8)
+		ret = gen6_gmch_probe(ggtt);
+	else
+		ret = gen8_gmch_probe(ggtt);
+	if (ret)
+		return ret;
+
+	if ((ggtt->vm.total - 1) >> 32) {
+		DRM_ERROR("We never expected a Global GTT with more than 32bits"
+			  " of address space! Found %lldM!\n",
+			  ggtt->vm.total >> 20);
+		ggtt->vm.total = 1ULL << 32;
+		ggtt->mappable_end =
+			min_t(u64, ggtt->mappable_end, ggtt->vm.total);
+	}
+
+	if (ggtt->mappable_end > ggtt->vm.total) {
+		DRM_ERROR("mappable aperture extends past end of GGTT,"
+			  " aperture=%pa, total=%llx\n",
+			  &ggtt->mappable_end, ggtt->vm.total);
+		ggtt->mappable_end = ggtt->vm.total;
+	}
+
+	/* GMADR is the PCI mmio aperture into the global GTT. */
+	DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
+	DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
+	DRM_DEBUG_DRIVER("DSM size = %lluM\n",
+			 (u64)resource_size(&intel_graphics_stolen_res) >> 20);
+
+	return 0;
+}
+
+/**
+ * i915_ggtt_probe_hw - Probe GGTT hardware location
+ * @i915: i915 device
+ */
+int i915_ggtt_probe_hw(struct drm_i915_private *i915)
+{
+	int ret;
+
+	ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
+	if (ret)
+		return ret;
+
+	if (intel_vtd_active())
+		dev_info(i915->drm.dev, "VT-d active for gfx access\n");
+
+	return 0;
+}
+
+int i915_ggtt_enable_hw(struct drm_i915_private *i915)
+{
+	if (INTEL_GEN(i915) < 6 && !intel_enable_gtt())
+		return -EIO;
+
+	return 0;
+}
+
+void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
+{
+	GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate);
+
+	ggtt->invalidate = guc_ggtt_invalidate;
+
+	ggtt->invalidate(ggtt);
+}
+
+void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
+{
+	/* XXX Temporary pardon for error unload */
+	if (ggtt->invalidate == gen8_ggtt_invalidate)
+		return;
+
+	/* We should only be called after i915_ggtt_enable_guc() */
+	GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
+
+	ggtt->invalidate = gen8_ggtt_invalidate;
+
+	ggtt->invalidate(ggtt);
+}
+
+static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
+{
+	struct i915_vma *vma;
+	bool flush = false;
+	int open;
+
+	intel_gt_check_and_clear_faults(ggtt->vm.gt);
+
+	mutex_lock(&ggtt->vm.mutex);
+
+	/* First fill our portion of the GTT with scratch pages */
+	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
+
+	/* Skip rewriting PTE on VMA unbind. */
+	open = atomic_xchg(&ggtt->vm.open, 0);
+
+	/* clflush objects bound into the GGTT and rebind them. */
+	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+			continue;
+
+		clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
+		WARN_ON(i915_vma_bind(vma,
+				      obj ? obj->cache_level : 0,
+				      PIN_GLOBAL, NULL));
+		if (obj) { /* only used during resume => exclusive access */
+			flush |= fetch_and_zero(&obj->write_domain);
+			obj->read_domains |= I915_GEM_DOMAIN_GTT;
+		}
+	}
+
+	atomic_set(&ggtt->vm.open, open);
+	ggtt->invalidate(ggtt);
+
+	mutex_unlock(&ggtt->vm.mutex);
+
+	if (flush)
+		wbinvd_on_all_cpus();
+}
+
+void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
+{
+	struct i915_ggtt *ggtt = &i915->ggtt;
+
+	ggtt_restore_mappings(ggtt);
+
+	if (INTEL_GEN(i915) >= 8)
+		setup_private_pat(ggtt->vm.gt->uncore);
+}
+
+static struct scatterlist *
+rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+	     unsigned int width, unsigned int height,
+	     unsigned int stride,
+	     struct sg_table *st, struct scatterlist *sg)
+{
+	unsigned int column, row;
+	unsigned int src_idx;
+
+	for (column = 0; column < width; column++) {
+		src_idx = stride * (height - 1) + column + offset;
+		for (row = 0; row < height; row++) {
+			st->nents++;
+			/*
+			 * We don't need the pages, but need to initialize
+			 * the entries so the sg list can be happily traversed.
+			 * The only thing we need are DMA addresses.
+			 */
+			sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
+			sg_dma_address(sg) =
+				i915_gem_object_get_dma_address(obj, src_idx);
+			sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
+			sg = sg_next(sg);
+			src_idx -= stride;
+		}
+	}
+
+	return sg;
+}
+
+static noinline struct sg_table *
+intel_rotate_pages(struct intel_rotation_info *rot_info,
+		   struct drm_i915_gem_object *obj)
+{
+	unsigned int size = intel_rotation_info_size(rot_info);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int ret = -ENOMEM;
+	int i;
+
+	/* Allocate target SG list. */
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto err_st_alloc;
+
+	ret = sg_alloc_table(st, size, GFP_KERNEL);
+	if (ret)
+		goto err_sg_alloc;
+
+	st->nents = 0;
+	sg = st->sgl;
+
+	for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
+		sg = rotate_pages(obj, rot_info->plane[i].offset,
+				  rot_info->plane[i].width, rot_info->plane[i].height,
+				  rot_info->plane[i].stride, st, sg);
+	}
+
+	return st;
+
+err_sg_alloc:
+	kfree(st);
+err_st_alloc:
+
+	DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+			 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
+
+	return ERR_PTR(ret);
+}
+
+static struct scatterlist *
+remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+	    unsigned int width, unsigned int height,
+	    unsigned int stride,
+	    struct sg_table *st, struct scatterlist *sg)
+{
+	unsigned int row;
+
+	for (row = 0; row < height; row++) {
+		unsigned int left = width * I915_GTT_PAGE_SIZE;
+
+		while (left) {
+			dma_addr_t addr;
+			unsigned int length;
+
+			/*
+			 * We don't need the pages, but need to initialize
+			 * the entries so the sg list can be happily traversed.
+			 * The only thing we need are DMA addresses.
+			 */
+
+			addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
+
+			length = min(left, length);
+
+			st->nents++;
+
+			sg_set_page(sg, NULL, length, 0);
+			sg_dma_address(sg) = addr;
+			sg_dma_len(sg) = length;
+			sg = sg_next(sg);
+
+			offset += length / I915_GTT_PAGE_SIZE;
+			left -= length;
+		}
+
+		offset += stride - width;
+	}
+
+	return sg;
+}
+
+static noinline struct sg_table *
+intel_remap_pages(struct intel_remapped_info *rem_info,
+		  struct drm_i915_gem_object *obj)
+{
+	unsigned int size = intel_remapped_info_size(rem_info);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int ret = -ENOMEM;
+	int i;
+
+	/* Allocate target SG list. */
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto err_st_alloc;
+
+	ret = sg_alloc_table(st, size, GFP_KERNEL);
+	if (ret)
+		goto err_sg_alloc;
+
+	st->nents = 0;
+	sg = st->sgl;
+
+	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
+		sg = remap_pages(obj, rem_info->plane[i].offset,
+				 rem_info->plane[i].width, rem_info->plane[i].height,
+				 rem_info->plane[i].stride, st, sg);
+	}
+
+	i915_sg_trim(st);
+
+	return st;
+
+err_sg_alloc:
+	kfree(st);
+err_st_alloc:
+
+	DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+			 obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
+
+	return ERR_PTR(ret);
+}
+
+static noinline struct sg_table *
+intel_partial_pages(const struct i915_ggtt_view *view,
+		    struct drm_i915_gem_object *obj)
+{
+	struct sg_table *st;
+	struct scatterlist *sg, *iter;
+	unsigned int count = view->partial.size;
+	unsigned int offset;
+	int ret = -ENOMEM;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto err_st_alloc;
+
+	ret = sg_alloc_table(st, count, GFP_KERNEL);
+	if (ret)
+		goto err_sg_alloc;
+
+	iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
+	GEM_BUG_ON(!iter);
+
+	sg = st->sgl;
+	st->nents = 0;
+	do {
+		unsigned int len;
+
+		len = min(iter->length - (offset << PAGE_SHIFT),
+			  count << PAGE_SHIFT);
+		sg_set_page(sg, NULL, len, 0);
+		sg_dma_address(sg) =
+			sg_dma_address(iter) + (offset << PAGE_SHIFT);
+		sg_dma_len(sg) = len;
+
+		st->nents++;
+		count -= len >> PAGE_SHIFT;
+		if (count == 0) {
+			sg_mark_end(sg);
+			i915_sg_trim(st); /* Drop any unused tail entries. */
+
+			return st;
+		}
+
+		sg = __sg_next(sg);
+		iter = __sg_next(iter);
+		offset = 0;
+	} while (1);
+
+err_sg_alloc:
+	kfree(st);
+err_st_alloc:
+	return ERR_PTR(ret);
+}
+
+static int
+i915_get_ggtt_vma_pages(struct i915_vma *vma)
+{
+	int ret;
+
+	/*
+	 * The vma->pages are only valid within the lifespan of the borrowed
+	 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
+	 * must be the vma->pages. A simple rule is that vma->pages must only
+	 * be accessed when the obj->mm.pages are pinned.
+	 */
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
+
+	switch (vma->ggtt_view.type) {
+	default:
+		GEM_BUG_ON(vma->ggtt_view.type);
+		/* fall through */
+	case I915_GGTT_VIEW_NORMAL:
+		vma->pages = vma->obj->mm.pages;
+		return 0;
+
+	case I915_GGTT_VIEW_ROTATED:
+		vma->pages =
+			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
+		break;
+
+	case I915_GGTT_VIEW_REMAPPED:
+		vma->pages =
+			intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
+		break;
+
+	case I915_GGTT_VIEW_PARTIAL:
+		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
+		break;
+	}
+
+	ret = 0;
+	if (IS_ERR(vma->pages)) {
+		ret = PTR_ERR(vma->pages);
+		vma->pages = NULL;
+		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
+			  vma->ggtt_view.type, ret);
+	}
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 86e00a2db8a4..51b8718513bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -7,6 +7,8 @@
 #ifndef _INTEL_GPU_COMMANDS_H_
 #define _INTEL_GPU_COMMANDS_H_
 
+#include <linux/bitops.h>
+
 /*
  * Target address alignments required for GPU access e.g.
  * MI_STORE_DWORD_IMM.
@@ -112,6 +114,7 @@
 #define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
 #define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
 #define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
+#define MI_SEMAPHORE_WAIT_TOKEN	MI_INSTR(0x1c, 3) /* GEN12+ */
 #define   MI_SEMAPHORE_POLL		(1 << 15)
 #define   MI_SEMAPHORE_SAD_GT_SDD	(0 << 12)
 #define   MI_SEMAPHORE_SAD_GTE_SDD	(1 << 12)
@@ -119,6 +122,8 @@
 #define   MI_SEMAPHORE_SAD_LTE_SDD	(3 << 12)
 #define   MI_SEMAPHORE_SAD_EQ_SDD	(4 << 12)
 #define   MI_SEMAPHORE_SAD_NEQ_SDD	(5 << 12)
+#define   MI_SEMAPHORE_TOKEN_MASK	REG_GENMASK(9, 5)
+#define   MI_SEMAPHORE_TOKEN_SHIFT	5
 #define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
 #define MI_STORE_DWORD_IMM_GEN4	MI_INSTR(0x20, 2)
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */
@@ -132,7 +137,10 @@
  *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
+/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
+#define   MI_LRI_CS_MMIO		(1<<19)
 #define   MI_LRI_FORCE_POSTED		(1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
@@ -147,6 +155,7 @@
 #define   MI_FLUSH_DW_USE_PPGTT		(0<<2)
 #define MI_LOAD_REGISTER_MEM	   MI_INSTR(0x29, 1)
 #define MI_LOAD_REGISTER_MEM_GEN8  MI_INSTR(0x29, 2)
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
 #define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
 #define   MI_BATCH_NON_SECURE		(1)
 /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -156,7 +165,8 @@
 #define MI_BATCH_BUFFER_START	MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT		    (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
-#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
+#define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/
 
 /*
  * 3D instructions used by the kernel
@@ -217,6 +227,7 @@
 #define   PIPE_CONTROL_CS_STALL				(1<<20)
 #define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
 #define   PIPE_CONTROL_MEDIA_STATE_CLEAR		(1<<16)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP			(3<<14)
 #define   PIPE_CONTROL_QW_WRITE				(1<<14)
 #define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
@@ -224,7 +235,9 @@
 #define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
 #define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11) /* MBZ on ILK */
 #define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10) /* GM45+ only */
+#define   PIPE_CONTROL_L3_RO_CACHE_INVALIDATE		REG_BIT(10) /* gen12 */
 #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
+#define   PIPE_CONTROL_HDC_PIPELINE_FLUSH		REG_BIT(9)  /* gen12 */
 #define   PIPE_CONTROL_NOTIFY				(1<<8)
 #define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7) /* gen7+ */
 #define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
@@ -235,6 +248,29 @@
 #define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
 
+#define MI_MATH(x)			MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP			MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)	MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)	MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)		MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)		MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD			MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB			MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND			MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR			MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR			MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)	MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)	MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)		(x)
+#define   MI_MATH_REG_SRCA		0x20
+#define   MI_MATH_REG_SRCB		0x21
+#define   MI_MATH_REG_ACCU		0x31
+#define   MI_MATH_REG_ZF		0x32
+#define   MI_MATH_REG_CF		0x33
+
 /*
  * Commands used only by the command parser
  */
@@ -251,7 +287,6 @@
 #define MI_CLFLUSH              MI_INSTR(0x27, 0)
 #define MI_REPORT_PERF_COUNT    MI_INSTR(0x28, 0)
 #define   MI_REPORT_PERF_COUNT_GGTT (1<<0)
-#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 0)
 #define MI_RS_STORE_DATA_IMM    MI_INSTR(0x2B, 0)
 #define MI_LOAD_URB_MEM         MI_INSTR(0x2C, 0)
 #define MI_STORE_URB_MEM        MI_INSTR(0x2D, 0)
@@ -286,4 +321,31 @@
 #define COLOR_BLT     ((0x2<<29)|(0x40<<22))
 #define SRC_COPY_BLT  ((0x2<<29)|(0x43<<22))
 
+/*
+ * Used to convert any address to canonical form.
+ * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
+ * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
+ * addresses to be in a canonical form:
+ * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
+ * canonical form [63:48] == [47]."
+ */
+#define GEN8_HIGH_ADDRESS_BIT 47
+static inline u64 gen8_canonical_addr(u64 address)
+{
+	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
+}
+
+static inline u64 gen8_noncanonical_addr(u64 address)
+{
+	return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
+}
+
+static inline u32 *__gen6_emit_bb_start(u32 *cs, u32 addr, unsigned int flags)
+{
+	*cs++ = MI_BATCH_BUFFER_START | flags;
+	*cs++ = addr;
+
+	return cs;
+}
+
 #endif /* _INTEL_GPU_COMMANDS_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index d48ec9a76ed1..da2b6e2ae692 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -3,10 +3,18 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include "debugfs_gt.h"
 #include "i915_drv.h"
+#include "intel_context.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_mocs.h"
+#include "intel_rc6.h"
+#include "intel_renderstate.h"
+#include "intel_rps.h"
 #include "intel_uncore.h"
+#include "intel_pm.h"
 
 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 {
@@ -18,15 +26,102 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&gt->closed_vma);
 	spin_lock_init(&gt->closed_lock);
 
-	intel_gt_init_hangcheck(gt);
 	intel_gt_init_reset(gt);
+	intel_gt_init_requests(gt);
+	intel_gt_init_timelines(gt);
 	intel_gt_pm_init_early(gt);
+
+	intel_rps_init_early(&gt->rps);
 	intel_uc_init_early(&gt->uc);
 }
 
-void intel_gt_init_hw(struct drm_i915_private *i915)
+void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
+{
+	gt->ggtt = ggtt;
+}
+
+static void init_unused_ring(struct intel_gt *gt, u32 base)
+{
+	struct intel_uncore *uncore = gt->uncore;
+
+	intel_uncore_write(uncore, RING_CTL(base), 0);
+	intel_uncore_write(uncore, RING_HEAD(base), 0);
+	intel_uncore_write(uncore, RING_TAIL(base), 0);
+	intel_uncore_write(uncore, RING_START(base), 0);
+}
+
+static void init_unused_rings(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+
+	if (IS_I830(i915)) {
+		init_unused_ring(gt, PRB1_BASE);
+		init_unused_ring(gt, SRB0_BASE);
+		init_unused_ring(gt, SRB1_BASE);
+		init_unused_ring(gt, SRB2_BASE);
+		init_unused_ring(gt, SRB3_BASE);
+	} else if (IS_GEN(i915, 2)) {
+		init_unused_ring(gt, SRB0_BASE);
+		init_unused_ring(gt, SRB1_BASE);
+	} else if (IS_GEN(i915, 3)) {
+		init_unused_ring(gt, PRB1_BASE);
+		init_unused_ring(gt, PRB2_BASE);
+	}
+}
+
+int intel_gt_init_hw(struct intel_gt *gt)
 {
-	i915->gt.ggtt = &i915->ggtt;
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_uncore *uncore = gt->uncore;
+	int ret;
+
+	gt->last_init_time = ktime_get();
+
+	/* Double layer security blanket, see i915_gem_init() */
+	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+	if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9)
+		intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf));
+
+	if (IS_HASWELL(i915))
+		intel_uncore_write(uncore,
+				   MI_PREDICATE_RESULT_2,
+				   IS_HSW_GT3(i915) ?
+				   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
+
+	/* Apply the GT workarounds... */
+	intel_gt_apply_workarounds(gt);
+	/* ...and determine whether they are sticking. */
+	intel_gt_verify_workarounds(gt, "init");
+
+	intel_gt_init_swizzling(gt);
+
+	/*
+	 * At least 830 can leave some of the unused rings
+	 * "active" (ie. head != tail) after resume which
+	 * will prevent c3 entry. Makes sure all unused rings
+	 * are totally idle.
+	 */
+	init_unused_rings(gt);
+
+	ret = i915_ppgtt_init_hw(gt);
+	if (ret) {
+		DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
+		goto out;
+	}
+
+	/* We can't enable contexts until all firmware is loaded */
+	ret = intel_uc_init_hw(&gt->uc);
+	if (ret) {
+		i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
+		goto out;
+	}
+
+	intel_mocs_init(gt);
+
+out:
+	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+	return ret;
 }
 
 static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
@@ -89,7 +184,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
 		struct intel_engine_cs *engine;
 		enum intel_engine_id id;
 
-		for_each_engine_masked(engine, i915, engine_mask, id)
+		for_each_engine_masked(engine, gt, engine_mask, id)
 			gen8_clear_engine_error_register(engine);
 	}
 }
@@ -100,7 +195,7 @@ static void gen6_check_faults(struct intel_gt *gt)
 	enum intel_engine_id id;
 	u32 fault;
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		fault = GEN6_RING_FAULT_REG_READ(engine);
 		if (fault & RING_FAULT_VALID) {
 			DRM_DEBUG_DRIVER("Unexpected fault\n"
@@ -176,7 +271,7 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
 
 void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
 {
-	struct drm_i915_private *i915 = gt->i915;
+	struct intel_uncore *uncore = gt->uncore;
 	intel_wakeref_t wakeref;
 
 	/*
@@ -200,18 +295,18 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
 
 	wmb();
 
-	if (INTEL_INFO(i915)->has_coherent_ggtt)
+	if (INTEL_INFO(gt->i915)->has_coherent_ggtt)
 		return;
 
 	intel_gt_chipset_flush(gt);
 
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		struct intel_uncore *uncore = gt->uncore;
+	with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) {
+		unsigned long flags;
 
-		spin_lock_irq(&uncore->lock);
+		spin_lock_irqsave(&uncore->lock, flags);
 		intel_uncore_posting_read_fw(uncore,
 					     RING_HEAD(RENDER_RING_BASE));
-		spin_unlock_irq(&uncore->lock);
+		spin_unlock_irqrestore(&uncore->lock, flags);
 	}
 }
 
@@ -222,7 +317,14 @@ void intel_gt_chipset_flush(struct intel_gt *gt)
 		intel_gtt_chipset_flush();
 }
 
-int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
+void intel_gt_driver_register(struct intel_gt *gt)
+{
+	intel_rps_driver_register(&gt->rps);
+
+	debugfs_gt_register(gt);
+}
+
+static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
 {
 	struct drm_i915_private *i915 = gt->i915;
 	struct drm_i915_gem_object *obj;
@@ -230,7 +332,7 @@ int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
 	int ret;
 
 	obj = i915_gem_object_create_stolen(i915, size);
-	if (!obj)
+	if (IS_ERR(obj))
 		obj = i915_gem_object_create_internal(i915, size);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("Failed to allocate scratch page\n");
@@ -256,13 +358,301 @@ err_unref:
 	return ret;
 }
 
-void intel_gt_fini_scratch(struct intel_gt *gt)
+static void intel_gt_fini_scratch(struct intel_gt *gt)
 {
 	i915_vma_unpin_and_release(&gt->scratch, 0);
 }
 
+static struct i915_address_space *kernel_vm(struct intel_gt *gt)
+{
+	if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
+		return &i915_ppgtt_create(gt)->vm;
+	else
+		return i915_vm_get(&gt->ggtt->vm);
+}
+
+static int __intel_context_flush_retire(struct intel_context *ce)
+{
+	struct intel_timeline *tl;
+
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl))
+		return PTR_ERR(tl);
+
+	intel_context_timeline_unlock(tl);
+	return 0;
+}
+
+static int __engines_record_defaults(struct intel_gt *gt)
+{
+	struct i915_request *requests[I915_NUM_ENGINES] = {};
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * As we reset the gpu during very early sanitisation, the current
+	 * register state on the GPU should reflect its defaults values.
+	 * We load a context onto the hw (with restore-inhibit), then switch
+	 * over to a second context to save that default register state. We
+	 * can then prime every new context with that state so they all start
+	 * from the same default HW values.
+	 */
+
+	for_each_engine(engine, gt, id) {
+		struct intel_renderstate so;
+		struct intel_context *ce;
+		struct i915_request *rq;
+
+		/* We must be able to switch to something! */
+		GEM_BUG_ON(!engine->kernel_context);
+
+		err = intel_renderstate_init(&so, engine);
+		if (err)
+			goto out;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto out;
+		}
+
+		rq = intel_context_create_request(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			intel_context_put(ce);
+			goto out;
+		}
+
+		err = intel_engine_emit_ctx_wa(rq);
+		if (err)
+			goto err_rq;
+
+		err = intel_renderstate_emit(&so, rq);
+		if (err)
+			goto err_rq;
+
+err_rq:
+		requests[id] = i915_request_get(rq);
+		i915_request_add(rq);
+		intel_renderstate_fini(&so);
+		if (err)
+			goto out;
+	}
+
+	/* Flush the default context image to memory, and enable powersaving. */
+	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+		err = -EIO;
+		goto out;
+	}
+
+	for (id = 0; id < ARRAY_SIZE(requests); id++) {
+		struct i915_request *rq;
+		struct i915_vma *state;
+		void *vaddr;
+
+		rq = requests[id];
+		if (!rq)
+			continue;
+
+		GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
+		state = rq->context->state;
+		if (!state)
+			continue;
+
+		/* Serialise with retirement on another CPU */
+		GEM_BUG_ON(!i915_request_completed(rq));
+		err = __intel_context_flush_retire(rq->context);
+		if (err)
+			goto out;
+
+		/* We want to be able to unbind the state from the GGTT */
+		GEM_BUG_ON(intel_context_is_pinned(rq->context));
+
+		/*
+		 * As we will hold a reference to the logical state, it will
+		 * not be torn down with the context, and importantly the
+		 * object will hold onto its vma (making it possible for a
+		 * stray GTT write to corrupt our defaults). Unmap the vma
+		 * from the GTT to prevent such accidents and reclaim the
+		 * space.
+		 */
+		err = i915_vma_unbind(state);
+		if (err)
+			goto out;
+
+		i915_gem_object_lock(state->obj);
+		err = i915_gem_object_set_to_cpu_domain(state->obj, false);
+		i915_gem_object_unlock(state->obj);
+		if (err)
+			goto out;
+
+		i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC);
+
+		/* Check we can acquire the image of the context state */
+		vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto out;
+		}
+
+		rq->engine->default_state = i915_gem_object_get(state->obj);
+		i915_gem_object_unpin_map(state->obj);
+	}
+
+out:
+	/*
+	 * If we have to abandon now, we expect the engines to be idle
+	 * and ready to be torn-down. The quickest way we can accomplish
+	 * this is by declaring ourselves wedged.
+	 */
+	if (err)
+		intel_gt_set_wedged(gt);
+
+	for (id = 0; id < ARRAY_SIZE(requests); id++) {
+		struct intel_context *ce;
+		struct i915_request *rq;
+
+		rq = requests[id];
+		if (!rq)
+			continue;
+
+		ce = rq->context;
+		i915_request_put(rq);
+		intel_context_put(ce);
+	}
+	return err;
+}
+
+static int __engines_verify_workarounds(struct intel_gt *gt)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		return 0;
+
+	for_each_engine(engine, gt, id) {
+		if (intel_engine_verify_workarounds(engine, "load"))
+			err = -EIO;
+	}
+
+	return err;
+}
+
+static void __intel_gt_disable(struct intel_gt *gt)
+{
+	intel_gt_set_wedged_on_init(gt);
+
+	intel_gt_suspend_prepare(gt);
+	intel_gt_suspend_late(gt);
+
+	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
+}
+
+int intel_gt_init(struct intel_gt *gt)
+{
+	int err;
+
+	err = i915_inject_probe_error(gt->i915, -ENODEV);
+	if (err)
+		return err;
+
+	/*
+	 * This is just a security blanket to placate dragons.
+	 * On some systems, we very sporadically observe that the first TLBs
+	 * used by the CS may be stale, despite us poking the TLB reset. If
+	 * we hold the forcewake during initialisation these problems
+	 * just magically go away.
+	 */
+	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+
+	err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
+	if (err)
+		goto out_fw;
+
+	intel_gt_pm_init(gt);
+
+	gt->vm = kernel_vm(gt);
+	if (!gt->vm) {
+		err = -ENOMEM;
+		goto err_pm;
+	}
+
+	err = intel_engines_init(gt);
+	if (err)
+		goto err_engines;
+
+	intel_uc_init(&gt->uc);
+
+	err = intel_gt_resume(gt);
+	if (err)
+		goto err_uc_init;
+
+	err = __engines_record_defaults(gt);
+	if (err)
+		goto err_gt;
+
+	err = __engines_verify_workarounds(gt);
+	if (err)
+		goto err_gt;
+
+	err = i915_inject_probe_error(gt->i915, -EIO);
+	if (err)
+		goto err_gt;
+
+	goto out_fw;
+err_gt:
+	__intel_gt_disable(gt);
+	intel_uc_fini_hw(&gt->uc);
+err_uc_init:
+	intel_uc_fini(&gt->uc);
+err_engines:
+	intel_engines_release(gt);
+	i915_vm_put(fetch_and_zero(&gt->vm));
+err_pm:
+	intel_gt_pm_fini(gt);
+	intel_gt_fini_scratch(gt);
+out_fw:
+	if (err)
+		intel_gt_set_wedged_on_init(gt);
+	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+	return err;
+}
+
+void intel_gt_driver_remove(struct intel_gt *gt)
+{
+	__intel_gt_disable(gt);
+
+	intel_uc_fini_hw(&gt->uc);
+	intel_uc_fini(&gt->uc);
+
+	intel_engines_release(gt);
+}
+
+void intel_gt_driver_unregister(struct intel_gt *gt)
+{
+	intel_rps_driver_unregister(&gt->rps);
+}
+
+void intel_gt_driver_release(struct intel_gt *gt)
+{
+	struct i915_address_space *vm;
+
+	vm = fetch_and_zero(&gt->vm);
+	if (vm) /* FIXME being called twice on error paths :( */
+		i915_vm_put(vm);
+
+	intel_gt_pm_fini(gt);
+	intel_gt_fini_scratch(gt);
+}
+
 void intel_gt_driver_late_release(struct intel_gt *gt)
 {
 	intel_uc_driver_late_release(&gt->uc);
+	intel_gt_fini_requests(gt);
 	intel_gt_fini_reset(gt);
+	intel_gt_fini_timelines(gt);
+	intel_engines_free(gt);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 4920cb351f10..1dac441cb8f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -12,6 +12,12 @@
 
 struct drm_i915_private;
 
+#define GT_TRACE(gt, fmt, ...) do {					\
+	const struct intel_gt *gt__ __maybe_unused = (gt);		\
+	GEM_TRACE("%s  " fmt, dev_name(gt__->i915->drm.dev),		\
+		  ##__VA_ARGS__);					\
+} while (0)
+
 static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
 {
 	return container_of(uc, struct intel_gt, uc);
@@ -28,7 +34,14 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
 }
 
 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
-void intel_gt_init_hw(struct drm_i915_private *i915);
+void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt);
+int __must_check intel_gt_init_hw(struct intel_gt *gt);
+int intel_gt_init(struct intel_gt *gt);
+void intel_gt_driver_register(struct intel_gt *gt);
+
+void intel_gt_driver_unregister(struct intel_gt *gt);
+void intel_gt_driver_remove(struct intel_gt *gt);
+void intel_gt_driver_release(struct intel_gt *gt);
 
 void intel_gt_driver_late_release(struct intel_gt *gt);
 
@@ -39,22 +52,20 @@ void intel_gt_clear_error_registers(struct intel_gt *gt,
 void intel_gt_flush_ggtt_writes(struct intel_gt *gt);
 void intel_gt_chipset_flush(struct intel_gt *gt);
 
-void intel_gt_init_hangcheck(struct intel_gt *gt);
-
-int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size);
-void intel_gt_fini_scratch(struct intel_gt *gt);
-
 static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
 					  enum intel_gt_scratch_field field)
 {
 	return i915_ggtt_offset(gt->scratch) + field;
 }
 
-static inline bool intel_gt_is_wedged(struct intel_gt *gt)
+static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
 {
 	return __intel_reset_failed(&gt->reset);
 }
 
-void intel_gt_queue_hangcheck(struct intel_gt *gt);
+static inline bool intel_gt_has_init_error(const struct intel_gt *gt)
+{
+	return test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
+}
 
 #endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 34a4fb624bf7..f796bdf1ed30 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -11,6 +11,7 @@
 #include "intel_gt.h"
 #include "intel_gt_irq.h"
 #include "intel_uncore.h"
+#include "intel_rps.h"
 
 static void guc_irq_handler(struct intel_guc *guc, u16 iir)
 {
@@ -27,7 +28,7 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 		tasklet = true;
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
-		intel_engine_breadcrumbs_irq(engine);
+		intel_engine_signal_breadcrumbs(engine);
 		tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
 	}
 
@@ -77,7 +78,7 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
 		return guc_irq_handler(&gt->uc.guc, iir);
 
 	if (instance == OTHER_GTPM_INSTANCE)
-		return gen11_rps_irq_handler(gt, iir);
+		return gen11_rps_irq_handler(&gt->rps, iir);
 
 	WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
 		  instance, iir);
@@ -244,9 +245,9 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
 void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]);
+		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
 	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
 }
 
 static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
@@ -270,11 +271,11 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
 void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]);
+		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
 	if (gt_iir & GT_BSD_USER_INTERRUPT)
-		intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
 	if (gt_iir & GT_BLT_USER_INTERRUPT)
-		intel_engine_breadcrumbs_irq(gt->engine_class[COPY_ENGINE_CLASS][0]);
+		intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
 
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
@@ -336,7 +337,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4])
 	}
 
 	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
-		gen6_rps_irq_handler(gt->i915, gt_iir[2]);
+		gen6_rps_irq_handler(&gt->rps, gt_iir[2]);
 		guc_irq_handler(&gt->uc.guc, gt_iir[2] >> 16);
 	}
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index fac75afed35b..d1c2f034296a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -4,17 +4,38 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include <linux/suspend.h>
+
 #include "i915_drv.h"
+#include "i915_globals.h"
 #include "i915_params.h"
+#include "intel_context.h"
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_llc.h"
 #include "intel_pm.h"
+#include "intel_rc6.h"
+#include "intel_rps.h"
 #include "intel_wakeref.h"
 
-static void pm_notify(struct drm_i915_private *i915, int state)
+static void user_forcewake(struct intel_gt *gt, bool suspend)
 {
-	blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915);
+	int count = atomic_read(&gt->user_wakeref);
+
+	/* Inside suspend/resume so single threaded, no races to worry about. */
+	if (likely(!count))
+		return;
+
+	intel_gt_pm_get(gt);
+	if (suspend) {
+		GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
+		atomic_sub(count, &gt->wakeref.count);
+	} else {
+		atomic_add(count, &gt->wakeref.count);
+	}
+	intel_gt_pm_put(gt);
 }
 
 static int __gt_unpark(struct intel_wakeref *wf)
@@ -22,7 +43,9 @@ static int __gt_unpark(struct intel_wakeref *wf)
 	struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
 	struct drm_i915_private *i915 = gt->i915;
 
-	GEM_TRACE("\n");
+	GT_TRACE(gt, "\n");
+
+	i915_globals_unpark();
 
 	/*
 	 * It seems that the DMC likes to transition between the DC states a lot
@@ -38,48 +61,38 @@ static int __gt_unpark(struct intel_wakeref *wf)
 	gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
 	GEM_BUG_ON(!gt->awake);
 
-	if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
-		intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
-
-	intel_enable_gt_powersave(i915);
-
-	i915_update_gfx_val(i915);
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_busy(i915);
-
+	intel_rc6_unpark(&gt->rc6);
+	intel_rps_unpark(&gt->rps);
 	i915_pmu_gt_unparked(i915);
 
-	intel_gt_queue_hangcheck(gt);
-
-	pm_notify(i915, INTEL_GT_UNPARK);
+	intel_gt_unpark_requests(gt);
 
 	return 0;
 }
 
 static int __gt_park(struct intel_wakeref *wf)
 {
-	struct drm_i915_private *i915 =
-		container_of(wf, typeof(*i915), gt.wakeref);
-	intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake);
+	struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
+	intel_wakeref_t wakeref = fetch_and_zero(&gt->awake);
+	struct drm_i915_private *i915 = gt->i915;
 
-	GEM_TRACE("\n");
+	GT_TRACE(gt, "\n");
 
-	pm_notify(i915, INTEL_GT_PARK);
+	intel_gt_park_requests(gt);
 
+	i915_vma_parked(gt);
 	i915_pmu_gt_parked(i915);
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_idle(i915);
-
-	if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) {
-		i915_rc6_ctx_wa_check(i915);
-		intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
-	}
+	intel_rps_park(&gt->rps);
+	intel_rc6_park(&gt->rc6);
 
 	/* Everything switched off, flush any residual interrupt just in case */
 	intel_synchronize_irq(i915);
 
+	/* Defer dropping the display power well for 100ms, it's slow! */
 	GEM_BUG_ON(!wakeref);
-	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+	intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+
+	i915_globals_park();
 
 	return 0;
 }
@@ -87,14 +100,22 @@ static int __gt_park(struct intel_wakeref *wf)
 static const struct intel_wakeref_ops wf_ops = {
 	.get = __gt_unpark,
 	.put = __gt_park,
-	.flags = INTEL_WAKEREF_PUT_ASYNC,
 };
 
 void intel_gt_pm_init_early(struct intel_gt *gt)
 {
-	intel_wakeref_init(&gt->wakeref, &gt->i915->runtime_pm, &wf_ops);
+	intel_wakeref_init(&gt->wakeref, gt->uncore->rpm, &wf_ops);
+}
 
-	BLOCKING_INIT_NOTIFIER_HEAD(&gt->pm_notifications);
+void intel_gt_pm_init(struct intel_gt *gt)
+{
+	/*
+	 * Enabling power-management should be "self-healing". If we cannot
+	 * enable a feature, simply leave it disabled with a notice to the
+	 * user.
+	 */
+	intel_rc6_init(&gt->rc6);
+	intel_rps_init(&gt->rps);
 }
 
 static bool reset_engines(struct intel_gt *gt)
@@ -105,37 +126,64 @@ static bool reset_engines(struct intel_gt *gt)
 	return __intel_gt_reset(gt, ALL_ENGINES) == 0;
 }
 
-/**
- * intel_gt_sanitize: called after the GPU has lost power
- * @gt: the i915 GT container
- * @force: ignore a failed reset and sanitize engine state anyway
- *
- * Anytime we reset the GPU, either with an explicit GPU reset or through a
- * PCI power cycle, the GPU loses state and we must reset our state tracking
- * to match. Note that calling intel_gt_sanitize() if the GPU has not
- * been reset results in much confusion!
- */
-void intel_gt_sanitize(struct intel_gt *gt, bool force)
+static void gt_sanitize(struct intel_gt *gt, bool force)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+
+	GT_TRACE(gt, "force:%s", yesno(force));
 
-	GEM_TRACE("\n");
+	/* Use a raw wakeref to avoid calling intel_display_power_get early */
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+
+	/*
+	 * As we have just resumed the machine and woken the device up from
+	 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
+	 * back to defaults, recovering from whatever wedged state we left it
+	 * in and so worth trying to use the device once more.
+	 */
+	if (intel_gt_is_wedged(gt))
+		intel_gt_unset_wedged(gt);
 
 	intel_uc_sanitize(&gt->uc);
 
-	if (!reset_engines(gt) && !force)
-		return;
+	for_each_engine(engine, gt, id)
+		if (engine->reset.prepare)
+			engine->reset.prepare(engine);
+
+	intel_uc_reset_prepare(&gt->uc);
+
+	if (reset_engines(gt) || force) {
+		for_each_engine(engine, gt, id)
+			__intel_engine_reset(engine, false);
+	}
 
-	for_each_engine(engine, gt->i915, id)
-		__intel_engine_reset(engine, false);
+	for_each_engine(engine, gt, id)
+		if (engine->reset.finish)
+			engine->reset.finish(engine);
+
+	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+}
+
+void intel_gt_pm_fini(struct intel_gt *gt)
+{
+	intel_rc6_fini(&gt->rc6);
 }
 
 int intel_gt_resume(struct intel_gt *gt)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	int err = 0;
+	int err;
+
+	err = intel_gt_has_init_error(gt);
+	if (err)
+		return err;
+
+	GT_TRACE(gt, "\n");
 
 	/*
 	 * After resume, we may need to poke into the pinned kernel
@@ -144,14 +192,28 @@ int intel_gt_resume(struct intel_gt *gt)
 	 * allowing us to fixup the user contexts on their first pin.
 	 */
 	intel_gt_pm_get(gt);
-	for_each_engine(engine, gt->i915, id) {
-		struct intel_context *ce;
 
-		intel_engine_pm_get(engine);
+	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+	intel_rc6_sanitize(&gt->rc6);
+	gt_sanitize(gt, true);
+	if (intel_gt_is_wedged(gt)) {
+		err = -EIO;
+		goto out_fw;
+	}
+
+	/* Only when the HW is re-initialised, can we replay the requests */
+	err = intel_gt_init_hw(gt);
+	if (err) {
+		dev_err(gt->i915->drm.dev,
+			"Failed to initialize GPU, declaring it wedged!\n");
+		goto err_wedged;
+	}
 
-		ce = engine->kernel_context;
-		if (ce)
-			ce->ops->reset(ce);
+	intel_rps_enable(&gt->rps);
+	intel_llc_enable(&gt->llc);
+
+	for_each_engine(engine, gt, id) {
+		intel_engine_pm_get(engine);
 
 		engine->serial++; /* kernel context lost */
 		err = engine->resume(engine);
@@ -161,22 +223,111 @@ int intel_gt_resume(struct intel_gt *gt)
 			dev_err(gt->i915->drm.dev,
 				"Failed to restart %s (%d)\n",
 				engine->name, err);
-			break;
+			goto err_wedged;
 		}
 	}
-	intel_gt_pm_put(gt);
 
+	intel_rc6_enable(&gt->rc6);
+
+	intel_uc_resume(&gt->uc);
+
+	user_forcewake(gt, false);
+
+out_fw:
+	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+	intel_gt_pm_put(gt);
 	return err;
+
+err_wedged:
+	intel_gt_set_wedged(gt);
+	goto out_fw;
+}
+
+static void wait_for_suspend(struct intel_gt *gt)
+{
+	if (!intel_gt_pm_is_awake(gt))
+		return;
+
+	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+		/*
+		 * Forcibly cancel outstanding work and leave
+		 * the gpu quiet.
+		 */
+		intel_gt_set_wedged(gt);
+		intel_gt_retire_requests(gt);
+	}
+
+	intel_gt_pm_wait_for_idle(gt);
+}
+
+void intel_gt_suspend_prepare(struct intel_gt *gt)
+{
+	user_forcewake(gt, true);
+	wait_for_suspend(gt);
+
+	intel_uc_suspend(&gt->uc);
+}
+
+static suspend_state_t pm_suspend_target(void)
+{
+#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP)
+	return pm_suspend_target_state;
+#else
+	return PM_SUSPEND_TO_IDLE;
+#endif
+}
+
+void intel_gt_suspend_late(struct intel_gt *gt)
+{
+	intel_wakeref_t wakeref;
+
+	/* We expect to be idle already; but also want to be independent */
+	wait_for_suspend(gt);
+
+	if (is_mock_gt(gt))
+		return;
+
+	GEM_BUG_ON(gt->awake);
+
+	/*
+	 * On disabling the device, we want to turn off HW access to memory
+	 * that we no longer own.
+	 *
+	 * However, not all suspend-states disable the device. S0 (s2idle)
+	 * is effectively runtime-suspend, the device is left powered on
+	 * but needs to be put into a low power state. We need to keep
+	 * powermanagement enabled, but we also retain system state and so
+	 * it remains safe to keep on using our allocated memory.
+	 */
+	if (pm_suspend_target() == PM_SUSPEND_TO_IDLE)
+		return;
+
+	with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
+		intel_rps_disable(&gt->rps);
+		intel_rc6_disable(&gt->rc6);
+		intel_llc_disable(&gt->llc);
+	}
+
+	gt_sanitize(gt, false);
+
+	GT_TRACE(gt, "\n");
 }
 
 void intel_gt_runtime_suspend(struct intel_gt *gt)
 {
 	intel_uc_runtime_suspend(&gt->uc);
+
+	GT_TRACE(gt, "\n");
 }
 
 int intel_gt_runtime_resume(struct intel_gt *gt)
 {
+	GT_TRACE(gt, "\n");
 	intel_gt_init_swizzling(gt);
 
 	return intel_uc_runtime_resume(&gt->uc);
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_gt_pm.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index fb39d99cd6ee..60f0e2fbe55c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -12,11 +12,6 @@
 #include "intel_gt_types.h"
 #include "intel_wakeref.h"
 
-enum {
-	INTEL_GT_UNPARK,
-	INTEL_GT_PARK,
-};
-
 static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt)
 {
 	return intel_wakeref_is_active(&gt->wakeref);
@@ -27,6 +22,11 @@ static inline void intel_gt_pm_get(struct intel_gt *gt)
 	intel_wakeref_get(&gt->wakeref);
 }
 
+static inline void __intel_gt_pm_get(struct intel_gt *gt)
+{
+	__intel_wakeref_get(&gt->wakeref);
+}
+
 static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
 {
 	return intel_wakeref_get_if_active(&gt->wakeref);
@@ -37,16 +37,30 @@ static inline void intel_gt_pm_put(struct intel_gt *gt)
 	intel_wakeref_put(&gt->wakeref);
 }
 
+static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+{
+	intel_wakeref_put_async(&gt->wakeref);
+}
+
 static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
 {
 	return intel_wakeref_wait_for_idle(&gt->wakeref);
 }
 
 void intel_gt_pm_init_early(struct intel_gt *gt);
+void intel_gt_pm_init(struct intel_gt *gt);
+void intel_gt_pm_fini(struct intel_gt *gt);
 
-void intel_gt_sanitize(struct intel_gt *gt, bool force);
+void intel_gt_suspend_prepare(struct intel_gt *gt);
+void intel_gt_suspend_late(struct intel_gt *gt);
 int intel_gt_resume(struct intel_gt *gt);
+
 void intel_gt_runtime_suspend(struct intel_gt *gt);
 int intel_gt_runtime_resume(struct intel_gt *gt);
 
+static inline bool is_mock_gt(const struct intel_gt *gt)
+{
+	return I915_SELFTEST_ONLY(gt->awake == -ENODEV);
+}
+
 #endif /* INTEL_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
new file mode 100644
index 000000000000..7ef1d37970f6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -0,0 +1,225 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/workqueue.h>
+
+#include "i915_drv.h" /* for_each_engine() */
+#include "i915_request.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_timeline.h"
+
+static bool retire_requests(struct intel_timeline *tl)
+{
+	struct i915_request *rq, *rn;
+
+	list_for_each_entry_safe(rq, rn, &tl->requests, link)
+		if (!i915_request_retire(rq))
+			return false;
+
+	/* And check nothing new was submitted */
+	return !i915_active_fence_isset(&tl->last_request);
+}
+
+static bool flush_submission(struct intel_gt *gt)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	bool active = false;
+
+	if (!intel_gt_pm_is_awake(gt))
+		return false;
+
+	for_each_engine(engine, gt, id) {
+		intel_engine_flush_submission(engine);
+		active |= flush_work(&engine->retire_work);
+		active |= flush_work(&engine->wakeref.work);
+	}
+
+	return active;
+}
+
+static void engine_retire(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), retire_work);
+	struct intel_timeline *tl = xchg(&engine->retire, NULL);
+
+	do {
+		struct intel_timeline *next = xchg(&tl->retire, NULL);
+
+		/*
+		 * Our goal here is to retire _idle_ timelines as soon as
+		 * possible (as they are idle, we do not expect userspace
+		 * to be cleaning up anytime soon).
+		 *
+		 * If the timeline is currently locked, either it is being
+		 * retired elsewhere or about to be!
+		 */
+		if (mutex_trylock(&tl->mutex)) {
+			retire_requests(tl);
+			mutex_unlock(&tl->mutex);
+		}
+		intel_timeline_put(tl);
+
+		GEM_BUG_ON(!next);
+		tl = ptr_mask_bits(next, 1);
+	} while (tl);
+}
+
+static bool add_retire(struct intel_engine_cs *engine,
+		       struct intel_timeline *tl)
+{
+#define STUB ((struct intel_timeline *)1)
+	struct intel_timeline *first;
+
+	/*
+	 * We open-code a llist here to include the additional tag [BIT(0)]
+	 * so that we know when the timeline is already on a
+	 * retirement queue: either this engine or another.
+	 */
+
+	if (cmpxchg(&tl->retire, NULL, STUB)) /* already queued */
+		return false;
+
+	intel_timeline_get(tl);
+	first = READ_ONCE(engine->retire);
+	do
+		tl->retire = ptr_pack_bits(first, 1, 1);
+	while (!try_cmpxchg(&engine->retire, &first, tl));
+
+	return !first;
+}
+
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+			     struct intel_timeline *tl)
+{
+	if (add_retire(engine, tl))
+		schedule_work(&engine->retire_work);
+}
+
+void intel_engine_init_retire(struct intel_engine_cs *engine)
+{
+	INIT_WORK(&engine->retire_work, engine_retire);
+}
+
+void intel_engine_fini_retire(struct intel_engine_cs *engine)
+{
+	flush_work(&engine->retire_work);
+	GEM_BUG_ON(engine->retire);
+}
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
+{
+	struct intel_gt_timelines *timelines = &gt->timelines;
+	struct intel_timeline *tl, *tn;
+	unsigned long active_count = 0;
+	bool interruptible;
+	LIST_HEAD(free);
+
+	interruptible = true;
+	if (unlikely(timeout < 0))
+		timeout = -timeout, interruptible = false;
+
+	flush_submission(gt); /* kick the ksoftirqd tasklets */
+	spin_lock(&timelines->lock);
+	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+		if (!mutex_trylock(&tl->mutex)) {
+			active_count++; /* report busy to caller, try again? */
+			continue;
+		}
+
+		intel_timeline_get(tl);
+		GEM_BUG_ON(!atomic_read(&tl->active_count));
+		atomic_inc(&tl->active_count); /* pin the list element */
+		spin_unlock(&timelines->lock);
+
+		if (timeout > 0) {
+			struct dma_fence *fence;
+
+			fence = i915_active_fence_get(&tl->last_request);
+			if (fence) {
+				timeout = dma_fence_wait_timeout(fence,
+								 interruptible,
+								 timeout);
+				dma_fence_put(fence);
+			}
+		}
+
+		if (!retire_requests(tl) || flush_submission(gt))
+			active_count++;
+
+		spin_lock(&timelines->lock);
+
+		/* Resume iteration after dropping lock */
+		list_safe_reset_next(tl, tn, link);
+		if (atomic_dec_and_test(&tl->active_count))
+			list_del(&tl->link);
+
+		mutex_unlock(&tl->mutex);
+
+		/* Defer the final release to after the spinlock */
+		if (refcount_dec_and_test(&tl->kref.refcount)) {
+			GEM_BUG_ON(atomic_read(&tl->active_count));
+			list_add(&tl->link, &free);
+		}
+	}
+	spin_unlock(&timelines->lock);
+
+	list_for_each_entry_safe(tl, tn, &free, link)
+		__intel_timeline_free(&tl->kref);
+
+	return active_count ? timeout : 0;
+}
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
+{
+	/* If the device is asleep, we have no requests outstanding */
+	if (!intel_gt_pm_is_awake(gt))
+		return 0;
+
+	while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) {
+		cond_resched();
+		if (signal_pending(current))
+			return -EINTR;
+	}
+
+	return timeout;
+}
+
+static void retire_work_handler(struct work_struct *work)
+{
+	struct intel_gt *gt =
+		container_of(work, typeof(*gt), requests.retire_work.work);
+
+	schedule_delayed_work(&gt->requests.retire_work,
+			      round_jiffies_up_relative(HZ));
+	intel_gt_retire_requests(gt);
+}
+
+void intel_gt_init_requests(struct intel_gt *gt)
+{
+	INIT_DELAYED_WORK(&gt->requests.retire_work, retire_work_handler);
+}
+
+void intel_gt_park_requests(struct intel_gt *gt)
+{
+	cancel_delayed_work(&gt->requests.retire_work);
+}
+
+void intel_gt_unpark_requests(struct intel_gt *gt)
+{
+	schedule_delayed_work(&gt->requests.retire_work,
+			      round_jiffies_up_relative(HZ));
+}
+
+void intel_gt_fini_requests(struct intel_gt *gt)
+{
+	/* Wait until the work is marked as finished before unloading! */
+	cancel_delayed_work_sync(&gt->requests.retire_work);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
new file mode 100644
index 000000000000..dbac53baf1cb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -0,0 +1,32 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_REQUESTS_H
+#define INTEL_GT_REQUESTS_H
+
+struct intel_engine_cs;
+struct intel_gt;
+struct intel_timeline;
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
+static inline void intel_gt_retire_requests(struct intel_gt *gt)
+{
+	intel_gt_retire_requests_timeout(gt, 0);
+}
+
+void intel_engine_init_retire(struct intel_engine_cs *engine);
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+			     struct intel_timeline *tl);
+void intel_engine_fini_retire(struct intel_engine_cs *engine);
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
+
+void intel_gt_init_requests(struct intel_gt *gt);
+void intel_gt_park_requests(struct intel_gt *gt);
+void intel_gt_unpark_requests(struct intel_gt *gt);
+void intel_gt_fini_requests(struct intel_gt *gt);
+
+#endif /* INTEL_GT_REQUESTS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..96890dd12b5f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -17,7 +17,10 @@
 
 #include "i915_vma.h"
 #include "intel_engine_types.h"
+#include "intel_llc_types.h"
 #include "intel_reset_types.h"
+#include "intel_rc6_types.h"
+#include "intel_rps_types.h"
 #include "intel_wakeref.h"
 
 struct drm_i915_private;
@@ -25,14 +28,6 @@ struct i915_ggtt;
 struct intel_engine_cs;
 struct intel_uncore;
 
-struct intel_hangcheck {
-	/* For hangcheck timer */
-#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
-#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
-
-	struct delayed_work work;
-};
-
 struct intel_gt {
 	struct drm_i915_private *i915;
 	struct intel_uncore *uncore;
@@ -49,12 +44,23 @@ struct intel_gt {
 		struct list_head hwsp_free_list;
 	} timelines;
 
+	struct intel_gt_requests {
+		/**
+		 * We leave the user IRQ off as much as possible,
+		 * but this means that requests will finish and never
+		 * be retired once the system goes idle. Set a timer to
+		 * fire periodically while the ring is running. When it
+		 * fires, go retire requests.
+		 */
+		struct delayed_work retire_work;
+	} requests;
+
 	struct intel_wakeref wakeref;
+	atomic_t user_wakeref;
 
 	struct list_head closed_vma;
 	spinlock_t closed_lock; /* guards the list of closed_vma */
 
-	struct intel_hangcheck hangcheck;
 	struct intel_reset reset;
 
 	/**
@@ -66,7 +72,9 @@ struct intel_gt {
 	 */
 	intel_wakeref_t awake;
 
-	struct blocking_notifier_head pm_notifications;
+	struct intel_llc llc;
+	struct intel_rc6 rc6;
+	struct intel_rps rps;
 
 	ktime_t last_init_time;
 
@@ -82,6 +90,13 @@ struct intel_gt {
 	struct intel_engine_cs *engine[I915_NUM_ENGINES];
 	struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
 					    [MAX_ENGINE_INSTANCE + 1];
+
+	/*
+	 * Default address space (either GGTT or ppGTT depending on arch).
+	 *
+	 * Reserved for exclusive use by the kernel.
+	 */
+	struct i915_address_space *vm;
 };
 
 enum intel_gt_scratch_field {
@@ -89,14 +104,16 @@ enum intel_gt_scratch_field {
 	INTEL_GT_SCRATCH_FIELD_DEFAULT = 0,
 
 	/* 8 bytes */
-	INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA = 128,
-
-	/* 8 bytes */
 	INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128,
 
 	/* 8 bytes */
 	INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
 
+	/* 6 * 8 bytes */
+	INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+	/* 4 bytes */
+	INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
 };
 
 #endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
new file mode 100644
index 000000000000..16acdc5d6734
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/slab.h> /* fault-inject.h is not standalone! */
+
+#include <linux/fault-inject.h>
+
+#include "i915_trace.h"
+#include "intel_gt.h"
+#include "intel_gtt.h"
+
+void stash_init(struct pagestash *stash)
+{
+	pagevec_init(&stash->pvec);
+	spin_lock_init(&stash->lock);
+}
+
+static struct page *stash_pop_page(struct pagestash *stash)
+{
+	struct page *page = NULL;
+
+	spin_lock(&stash->lock);
+	if (likely(stash->pvec.nr))
+		page = stash->pvec.pages[--stash->pvec.nr];
+	spin_unlock(&stash->lock);
+
+	return page;
+}
+
+static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
+{
+	unsigned int nr;
+
+	spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
+
+	nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
+	memcpy(stash->pvec.pages + stash->pvec.nr,
+	       pvec->pages + pvec->nr - nr,
+	       sizeof(pvec->pages[0]) * nr);
+	stash->pvec.nr += nr;
+
+	spin_unlock(&stash->lock);
+
+	pvec->nr -= nr;
+}
+
+static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
+{
+	struct pagevec stack;
+	struct page *page;
+
+	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
+		i915_gem_shrink_all(vm->i915);
+
+	page = stash_pop_page(&vm->free_pages);
+	if (page)
+		return page;
+
+	if (!vm->pt_kmap_wc)
+		return alloc_page(gfp);
+
+	/* Look in our global stash of WC pages... */
+	page = stash_pop_page(&vm->i915->mm.wc_stash);
+	if (page)
+		return page;
+
+	/*
+	 * Otherwise batch allocate pages to amortize cost of set_pages_wc.
+	 *
+	 * We have to be careful as page allocation may trigger the shrinker
+	 * (via direct reclaim) which will fill up the WC stash underneath us.
+	 * So we add our WB pages into a temporary pvec on the stack and merge
+	 * them into the WC stash after all the allocations are complete.
+	 */
+	pagevec_init(&stack);
+	do {
+		struct page *page;
+
+		page = alloc_page(gfp);
+		if (unlikely(!page))
+			break;
+
+		stack.pages[stack.nr++] = page;
+	} while (pagevec_space(&stack));
+
+	if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
+		page = stack.pages[--stack.nr];
+
+		/* Merge spare WC pages to the global stash */
+		if (stack.nr)
+			stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
+
+		/* Push any surplus WC pages onto the local VM stash */
+		if (stack.nr)
+			stash_push_pagevec(&vm->free_pages, &stack);
+	}
+
+	/* Return unwanted leftovers */
+	if (unlikely(stack.nr)) {
+		WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
+		__pagevec_release(&stack);
+	}
+
+	return page;
+}
+
+static void vm_free_pages_release(struct i915_address_space *vm,
+				  bool immediate)
+{
+	struct pagevec *pvec = &vm->free_pages.pvec;
+	struct pagevec stack;
+
+	lockdep_assert_held(&vm->free_pages.lock);
+	GEM_BUG_ON(!pagevec_count(pvec));
+
+	if (vm->pt_kmap_wc) {
+		/*
+		 * When we use WC, first fill up the global stash and then
+		 * only if full immediately free the overflow.
+		 */
+		stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
+
+		/*
+		 * As we have made some room in the VM's free_pages,
+		 * we can wait for it to fill again. Unless we are
+		 * inside i915_address_space_fini() and must
+		 * immediately release the pages!
+		 */
+		if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
+			return;
+
+		/*
+		 * We have to drop the lock to allow ourselves to sleep,
+		 * so take a copy of the pvec and clear the stash for
+		 * others to use it as we sleep.
+		 */
+		stack = *pvec;
+		pagevec_reinit(pvec);
+		spin_unlock(&vm->free_pages.lock);
+
+		pvec = &stack;
+		set_pages_array_wb(pvec->pages, pvec->nr);
+
+		spin_lock(&vm->free_pages.lock);
+	}
+
+	__pagevec_release(pvec);
+}
+
+static void vm_free_page(struct i915_address_space *vm, struct page *page)
+{
+	/*
+	 * On !llc, we need to change the pages back to WB. We only do so
+	 * in bulk, so we rarely need to change the page attributes here,
+	 * but doing so requires a stop_machine() from deep inside arch/x86/mm.
+	 * To make detection of the possible sleep more likely, use an
+	 * unconditional might_sleep() for everybody.
+	 */
+	might_sleep();
+	spin_lock(&vm->free_pages.lock);
+	while (!pagevec_space(&vm->free_pages.pvec))
+		vm_free_pages_release(vm, false);
+	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
+	pagevec_add(&vm->free_pages.pvec, page);
+	spin_unlock(&vm->free_pages.lock);
+}
+
+void __i915_vm_close(struct i915_address_space *vm)
+{
+	struct i915_vma *vma, *vn;
+
+	mutex_lock(&vm->mutex);
+	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		/* Keep the obj (and hence the vma) alive as _we_ destroy it */
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
+
+		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+		WARN_ON(__i915_vma_unbind(vma));
+		__i915_vma_put(vma);
+
+		i915_gem_object_put(obj);
+	}
+	GEM_BUG_ON(!list_empty(&vm->bound_list));
+	mutex_unlock(&vm->mutex);
+}
+
+void i915_address_space_fini(struct i915_address_space *vm)
+{
+	spin_lock(&vm->free_pages.lock);
+	if (pagevec_count(&vm->free_pages.pvec))
+		vm_free_pages_release(vm, true);
+	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
+	spin_unlock(&vm->free_pages.lock);
+
+	drm_mm_takedown(&vm->mm);
+
+	mutex_destroy(&vm->mutex);
+}
+
+static void __i915_vm_release(struct work_struct *work)
+{
+	struct i915_address_space *vm =
+		container_of(work, struct i915_address_space, rcu.work);
+
+	vm->cleanup(vm);
+	i915_address_space_fini(vm);
+
+	kfree(vm);
+}
+
+void i915_vm_release(struct kref *kref)
+{
+	struct i915_address_space *vm =
+		container_of(kref, struct i915_address_space, ref);
+
+	GEM_BUG_ON(i915_is_ggtt(vm));
+	trace_i915_ppgtt_release(vm);
+
+	queue_rcu_work(vm->i915->wq, &vm->rcu);
+}
+
+void i915_address_space_init(struct i915_address_space *vm, int subclass)
+{
+	kref_init(&vm->ref);
+	INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
+	atomic_set(&vm->open, 1);
+
+	/*
+	 * The vm->mutex must be reclaim safe (for use in the shrinker).
+	 * Do a dummy acquire now under fs_reclaim so that any allocation
+	 * attempt holding the lock is immediately reported by lockdep.
+	 */
+	mutex_init(&vm->mutex);
+	lockdep_set_subclass(&vm->mutex, subclass);
+	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
+
+	GEM_BUG_ON(!vm->total);
+	drm_mm_init(&vm->mm, 0, vm->total);
+	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
+
+	stash_init(&vm->free_pages);
+
+	INIT_LIST_HEAD(&vm->bound_list);
+}
+
+void clear_pages(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!vma->pages);
+
+	if (vma->pages != vma->obj->mm.pages) {
+		sg_free_table(vma->pages);
+		kfree(vma->pages);
+	}
+	vma->pages = NULL;
+
+	memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
+}
+
+static int __setup_page_dma(struct i915_address_space *vm,
+			    struct i915_page_dma *p,
+			    gfp_t gfp)
+{
+	p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
+	if (unlikely(!p->page))
+		return -ENOMEM;
+
+	p->daddr = dma_map_page_attrs(vm->dma,
+				      p->page, 0, PAGE_SIZE,
+				      PCI_DMA_BIDIRECTIONAL,
+				      DMA_ATTR_SKIP_CPU_SYNC |
+				      DMA_ATTR_NO_WARN);
+	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
+		vm_free_page(vm, p->page);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+{
+	return __setup_page_dma(vm, p, __GFP_HIGHMEM);
+}
+
+void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+{
+	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	vm_free_page(vm, p->page);
+}
+
+void
+fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
+{
+	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
+}
+
+int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
+{
+	unsigned long size;
+
+	/*
+	 * In order to utilize 64K pages for an object with a size < 2M, we will
+	 * need to support a 64K scratch page, given that every 16th entry for a
+	 * page-table operating in 64K mode must point to a properly aligned 64K
+	 * region, including any PTEs which happen to point to scratch.
+	 *
+	 * This is only relevant for the 48b PPGTT where we support
+	 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
+	 * scratch (read-only) between all vm, we create one 64k scratch page
+	 * for all.
+	 */
+	size = I915_GTT_PAGE_SIZE_4K;
+	if (i915_vm_is_4lvl(vm) &&
+	    HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
+		size = I915_GTT_PAGE_SIZE_64K;
+		gfp |= __GFP_NOWARN;
+	}
+	gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
+
+	do {
+		unsigned int order = get_order(size);
+		struct page *page;
+		dma_addr_t addr;
+
+		page = alloc_pages(gfp, order);
+		if (unlikely(!page))
+			goto skip;
+
+		addr = dma_map_page_attrs(vm->dma,
+					  page, 0, size,
+					  PCI_DMA_BIDIRECTIONAL,
+					  DMA_ATTR_SKIP_CPU_SYNC |
+					  DMA_ATTR_NO_WARN);
+		if (unlikely(dma_mapping_error(vm->dma, addr)))
+			goto free_page;
+
+		if (unlikely(!IS_ALIGNED(addr, size)))
+			goto unmap_page;
+
+		vm->scratch[0].base.page = page;
+		vm->scratch[0].base.daddr = addr;
+		vm->scratch_order = order;
+		return 0;
+
+unmap_page:
+		dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
+free_page:
+		__free_pages(page, order);
+skip:
+		if (size == I915_GTT_PAGE_SIZE_4K)
+			return -ENOMEM;
+
+		size = I915_GTT_PAGE_SIZE_4K;
+		gfp &= ~__GFP_NOWARN;
+	} while (1);
+}
+
+void cleanup_scratch_page(struct i915_address_space *vm)
+{
+	struct i915_page_dma *p = px_base(&vm->scratch[0]);
+	unsigned int order = vm->scratch_order;
+
+	dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
+		       PCI_DMA_BIDIRECTIONAL);
+	__free_pages(p->page, order);
+}
+
+void free_scratch(struct i915_address_space *vm)
+{
+	int i;
+
+	if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
+		return;
+
+	for (i = 1; i <= vm->top; i++) {
+		if (!px_dma(&vm->scratch[i]))
+			break;
+		cleanup_page_dma(vm, px_base(&vm->scratch[i]));
+	}
+
+	cleanup_scratch_page(vm);
+}
+
+void gtt_write_workarounds(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_uncore *uncore = gt->uncore;
+
+	/*
+	 * This function is for gtt related workarounds. This function is
+	 * called on driver load and after a GPU reset, so you can place
+	 * workarounds here even if they get overwritten by GPU reset.
+	 */
+	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
+	if (IS_BROADWELL(i915))
+		intel_uncore_write(uncore,
+				   GEN8_L3_LRA_1_GPGPU,
+				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
+	else if (IS_CHERRYVIEW(i915))
+		intel_uncore_write(uncore,
+				   GEN8_L3_LRA_1_GPGPU,
+				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
+	else if (IS_GEN9_LP(i915))
+		intel_uncore_write(uncore,
+				   GEN8_L3_LRA_1_GPGPU,
+				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
+	else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
+		intel_uncore_write(uncore,
+				   GEN8_L3_LRA_1_GPGPU,
+				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
+
+	/*
+	 * To support 64K PTEs we need to first enable the use of the
+	 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
+	 * mmio, otherwise the page-walker will simply ignore the IPS bit. This
+	 * shouldn't be needed after GEN10.
+	 *
+	 * 64K pages were first introduced from BDW+, although technically they
+	 * only *work* from gen9+. For pre-BDW we instead have the option for
+	 * 32K pages, but we don't currently have any support for it in our
+	 * driver.
+	 */
+	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
+	    INTEL_GEN(i915) <= 10)
+		intel_uncore_rmw(uncore,
+				 GEN8_GAMW_ECO_DEV_RW_IA,
+				 0,
+				 GAMW_ECO_ENABLE_64K_IPS_FIELD);
+
+	if (IS_GEN_RANGE(i915, 8, 11)) {
+		bool can_use_gtt_cache = true;
+
+		/*
+		 * According to the BSpec if we use 2M/1G pages then we also
+		 * need to disable the GTT cache. At least on BDW we can see
+		 * visual corruption when using 2M pages, and not disabling the
+		 * GTT cache.
+		 */
+		if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
+			can_use_gtt_cache = false;
+
+		/* WaGttCachingOffByDefault */
+		intel_uncore_write(uncore,
+				   HSW_GTT_CACHE_EN,
+				   can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
+		WARN_ON_ONCE(can_use_gtt_cache &&
+			     intel_uncore_read(uncore,
+					       HSW_GTT_CACHE_EN) == 0);
+	}
+}
+
+u64 gen8_pte_encode(dma_addr_t addr,
+		    enum i915_cache_level level,
+		    u32 flags)
+{
+	gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
+
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~_PAGE_RW;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= PPAT_UNCACHED;
+		break;
+	case I915_CACHE_WT:
+		pte |= PPAT_DISPLAY_ELLC;
+		break;
+	default:
+		pte |= PPAT_CACHED;
+		break;
+	}
+
+	return pte;
+}
+
+static void tgl_setup_private_ppat(struct intel_uncore *uncore)
+{
+	/* TGL doesn't support LLC or AGE settings */
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
+}
+
+static void cnl_setup_private_ppat(struct intel_uncore *uncore)
+{
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(0),
+			   GEN8_PPAT_WB | GEN8_PPAT_LLC);
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(1),
+			   GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(2),
+			   GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(3),
+			   GEN8_PPAT_UC);
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(4),
+			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(5),
+			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(6),
+			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(7),
+			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+}
+
+/*
+ * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
+ * bits. When using advanced contexts each context stores its own PAT, but
+ * writing this data shouldn't be harmful even in those cases.
+ */
+static void bdw_setup_private_ppat(struct intel_uncore *uncore)
+{
+	u64 pat;
+
+	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |	/* for normal objects, no eLLC */
+	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |	/* for something pointing to ptes? */
+	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) |	/* for scanout with eLLC */
+	      GEN8_PPAT(3, GEN8_PPAT_UC) |			/* Uncached objects, mostly for scanout */
+	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
+	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
+	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
+	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+
+	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
+	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
+}
+
+static void chv_setup_private_ppat(struct intel_uncore *uncore)
+{
+	u64 pat;
+
+	/*
+	 * Map WB on BDW to snooped on CHV.
+	 *
+	 * Only the snoop bit has meaning for CHV, the rest is
+	 * ignored.
+	 *
+	 * The hardware will never snoop for certain types of accesses:
+	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
+	 * - PPGTT page tables
+	 * - some other special cycles
+	 *
+	 * As with BDW, we also need to consider the following for GT accesses:
+	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
+	 * so RTL will always use the value corresponding to
+	 * pat_sel = 000".
+	 * Which means we must set the snoop bit in PAT entry 0
+	 * in order to keep the global status page working.
+	 */
+
+	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(1, 0) |
+	      GEN8_PPAT(2, 0) |
+	      GEN8_PPAT(3, 0) |
+	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
+
+	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
+	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
+}
+
+void setup_private_pat(struct intel_uncore *uncore)
+{
+	struct drm_i915_private *i915 = uncore->i915;
+
+	GEM_BUG_ON(INTEL_GEN(i915) < 8);
+
+	if (INTEL_GEN(i915) >= 12)
+		tgl_setup_private_ppat(uncore);
+	else if (INTEL_GEN(i915) >= 10)
+		cnl_setup_private_ppat(uncore);
+	else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
+		chv_setup_private_ppat(uncore);
+	else
+		bdw_setup_private_ppat(uncore);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_gtt.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
new file mode 100644
index 000000000000..7da7681c20b1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -0,0 +1,587 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Please try to maintain the following order within this file unless it makes
+ * sense to do otherwise. From top to bottom:
+ * 1. typedefs
+ * 2. #defines, and macros
+ * 3. structure definitions
+ * 4. function prototypes
+ *
+ * Within each section, please try to order by generation in ascending order,
+ * from top to bottom (ie. gen6 on the top, gen8 on the bottom).
+ */
+
+#ifndef __INTEL_GTT_H__
+#define __INTEL_GTT_H__
+
+#include <linux/io-mapping.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/pagevec.h>
+#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
+
+#include <drm/drm_mm.h>
+
+#include "gt/intel_reset.h"
+#include "i915_gem_fence_reg.h"
+#include "i915_selftest.h"
+#include "i915_vma_types.h"
+
+#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+
+#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
+#define DBG(...) trace_printk(__VA_ARGS__)
+#else
+#define DBG(...)
+#endif
+
+#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */
+
+#define I915_GTT_PAGE_SIZE_4K	BIT_ULL(12)
+#define I915_GTT_PAGE_SIZE_64K	BIT_ULL(16)
+#define I915_GTT_PAGE_SIZE_2M	BIT_ULL(21)
+
+#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K
+#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M
+
+#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE
+
+#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE
+
+#define I915_FENCE_REG_NONE -1
+#define I915_MAX_NUM_FENCES 32
+/* 32 fences + sign bit for FENCE_REG_NONE */
+#define I915_MAX_NUM_FENCE_BITS 6
+
+typedef u32 gen6_pte_t;
+typedef u64 gen8_pte_t;
+
+#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT)
+
+#define I915_PTES(pte_len)		((unsigned int)(PAGE_SIZE / (pte_len)))
+#define I915_PTE_MASK(pte_len)		(I915_PTES(pte_len) - 1)
+#define I915_PDES			512
+#define I915_PDE_MASK			(I915_PDES - 1)
+
+/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */
+#define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
+#define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
+#define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
+#define GEN6_PTE_CACHE_LLC		(2 << 1)
+#define GEN6_PTE_UNCACHED		(1 << 1)
+#define GEN6_PTE_VALID			REG_BIT(0)
+
+#define GEN6_PTES			I915_PTES(sizeof(gen6_pte_t))
+#define GEN6_PD_SIZE		        (I915_PDES * PAGE_SIZE)
+#define GEN6_PD_ALIGN			(PAGE_SIZE * 16)
+#define GEN6_PDE_SHIFT			22
+#define GEN6_PDE_VALID			REG_BIT(0)
+#define NUM_PTE(pde_shift)     (1 << (pde_shift - PAGE_SHIFT))
+
+#define GEN7_PTE_CACHE_L3_LLC		(3 << 1)
+
+#define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
+#define BYT_PTE_WRITEABLE		REG_BIT(1)
+
+/*
+ * Cacheability Control is a 4-bit value. The low three bits are stored in bits
+ * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
+ */
+#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
+					 (((bits) & 0x8) << (11 - 3)))
+#define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
+#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
+#define HSW_WB_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x8)
+#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
+#define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
+#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
+#define HSW_PTE_UNCACHED		(0)
+#define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
+#define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
+
+/*
+ * GEN8 32b style address is defined as a 3 level page table:
+ * 31:30 | 29:21 | 20:12 |  11:0
+ * PDPE  |  PDE  |  PTE  | offset
+ * The difference as compared to normal x86 3 level page table is the PDPEs are
+ * programmed via register.
+ *
+ * GEN8 48b style address is defined as a 4 level page table:
+ * 47:39 | 38:30 | 29:21 | 20:12 |  11:0
+ * PML4E | PDPE  |  PDE  |  PTE  | offset
+ */
+#define GEN8_3LVL_PDPES			4
+
+#define PPAT_UNCACHED			(_PAGE_PWT | _PAGE_PCD)
+#define PPAT_CACHED_PDE			0 /* WB LLC */
+#define PPAT_CACHED			_PAGE_PAT /* WB LLCeLLC */
+#define PPAT_DISPLAY_ELLC		_PAGE_PCD /* WT eLLC */
+
+#define CHV_PPAT_SNOOP			REG_BIT(6)
+#define GEN8_PPAT_AGE(x)		((x)<<4)
+#define GEN8_PPAT_LLCeLLC		(3<<2)
+#define GEN8_PPAT_LLCELLC		(2<<2)
+#define GEN8_PPAT_LLC			(1<<2)
+#define GEN8_PPAT_WB			(3<<0)
+#define GEN8_PPAT_WT			(2<<0)
+#define GEN8_PPAT_WC			(1<<0)
+#define GEN8_PPAT_UC			(0<<0)
+#define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
+#define GEN8_PPAT(i, x)			((u64)(x) << ((i) * 8))
+
+#define GEN8_PDE_IPS_64K BIT(11)
+#define GEN8_PDE_PS_2M   BIT(7)
+
+#define for_each_sgt_daddr(__dp, __iter, __sgt) \
+	__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
+
+struct i915_page_dma {
+	struct page *page;
+	union {
+		dma_addr_t daddr;
+
+		/*
+		 * For gen6/gen7 only. This is the offset in the GGTT
+		 * where the page directory entries for PPGTT begin
+		 */
+		u32 ggtt_offset;
+	};
+};
+
+struct i915_page_scratch {
+	struct i915_page_dma base;
+	u64 encode;
+};
+
+struct i915_page_table {
+	struct i915_page_dma base;
+	atomic_t used;
+};
+
+struct i915_page_directory {
+	struct i915_page_table pt;
+	spinlock_t lock;
+	void *entry[512];
+};
+
+#define __px_choose_expr(x, type, expr, other) \
+	__builtin_choose_expr( \
+	__builtin_types_compatible_p(typeof(x), type) || \
+	__builtin_types_compatible_p(typeof(x), const type), \
+	({ type __x = (type)(x); expr; }), \
+	other)
+
+#define px_base(px) \
+	__px_choose_expr(px, struct i915_page_dma *, __x, \
+	__px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
+	__px_choose_expr(px, struct i915_page_table *, &__x->base, \
+	__px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
+	(void)0))))
+#define px_dma(px) (px_base(px)->daddr)
+
+#define px_pt(px) \
+	__px_choose_expr(px, struct i915_page_table *, __x, \
+	__px_choose_expr(px, struct i915_page_directory *, &__x->pt, \
+	(void)0))
+#define px_used(px) (&px_pt(px)->used)
+
+enum i915_cache_level;
+
+struct drm_i915_file_private;
+struct drm_i915_gem_object;
+struct i915_vma;
+struct intel_gt;
+
+struct i915_vma_ops {
+	/* Map an object into an address space with the given cache flags. */
+	int (*bind_vma)(struct i915_vma *vma,
+			enum i915_cache_level cache_level,
+			u32 flags);
+	/*
+	 * Unmap an object from an address space. This usually consists of
+	 * setting the valid PTE entries to a reserved scratch page.
+	 */
+	void (*unbind_vma)(struct i915_vma *vma);
+
+	int (*set_pages)(struct i915_vma *vma);
+	void (*clear_pages)(struct i915_vma *vma);
+};
+
+struct pagestash {
+	spinlock_t lock;
+	struct pagevec pvec;
+};
+
+void stash_init(struct pagestash *stash);
+
+struct i915_address_space {
+	struct kref ref;
+	struct rcu_work rcu;
+
+	struct drm_mm mm;
+	struct intel_gt *gt;
+	struct drm_i915_private *i915;
+	struct device *dma;
+	/*
+	 * Every address space belongs to a struct file - except for the global
+	 * GTT that is owned by the driver (and so @file is set to NULL). In
+	 * principle, no information should leak from one context to another
+	 * (or between files/processes etc) unless explicitly shared by the
+	 * owner. Tracking the owner is important in order to free up per-file
+	 * objects along with the file, to aide resource tracking, and to
+	 * assign blame.
+	 */
+	struct drm_i915_file_private *file;
+	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
+	u64 reserved;		/* size addr space reserved */
+
+	unsigned int bind_async_flags;
+
+	/*
+	 * Each active user context has its own address space (in full-ppgtt).
+	 * Since the vm may be shared between multiple contexts, we count how
+	 * many contexts keep us "open". Once open hits zero, we are closed
+	 * and do not allow any new attachments, and proceed to shutdown our
+	 * vma and page directories.
+	 */
+	atomic_t open;
+
+	struct mutex mutex; /* protects vma and our lists */
+#define VM_CLASS_GGTT 0
+#define VM_CLASS_PPGTT 1
+
+	struct i915_page_scratch scratch[4];
+	unsigned int scratch_order;
+	unsigned int top;
+
+	/**
+	 * List of vma currently bound.
+	 */
+	struct list_head bound_list;
+
+	struct pagestash free_pages;
+
+	/* Global GTT */
+	bool is_ggtt:1;
+
+	/* Some systems require uncached updates of the page directories */
+	bool pt_kmap_wc:1;
+
+	/* Some systems support read-only mappings for GGTT and/or PPGTT */
+	bool has_read_only:1;
+
+	u64 (*pte_encode)(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags); /* Create a valid PTE */
+#define PTE_READ_ONLY	BIT(0)
+
+	int (*allocate_va_range)(struct i915_address_space *vm,
+				 u64 start, u64 length);
+	void (*clear_range)(struct i915_address_space *vm,
+			    u64 start, u64 length);
+	void (*insert_page)(struct i915_address_space *vm,
+			    dma_addr_t addr,
+			    u64 offset,
+			    enum i915_cache_level cache_level,
+			    u32 flags);
+	void (*insert_entries)(struct i915_address_space *vm,
+			       struct i915_vma *vma,
+			       enum i915_cache_level cache_level,
+			       u32 flags);
+	void (*cleanup)(struct i915_address_space *vm);
+
+	struct i915_vma_ops vma_ops;
+
+	I915_SELFTEST_DECLARE(struct fault_attr fault_attr);
+	I915_SELFTEST_DECLARE(bool scrub_64K);
+};
+
+/*
+ * The Graphics Translation Table is the way in which GEN hardware translates a
+ * Graphics Virtual Address into a Physical Address. In addition to the normal
+ * collateral associated with any va->pa translations GEN hardware also has a
+ * portion of the GTT which can be mapped by the CPU and remain both coherent
+ * and correct (in cases like swizzling). That region is referred to as GMADR in
+ * the spec.
+ */
+struct i915_ggtt {
+	struct i915_address_space vm;
+
+	struct io_mapping iomap;	/* Mapping to our CPU mappable region */
+	struct resource gmadr;          /* GMADR resource */
+	resource_size_t mappable_end;	/* End offset that we can CPU map */
+
+	/** "Graphics Stolen Memory" holds the global PTEs */
+	void __iomem *gsm;
+	void (*invalidate)(struct i915_ggtt *ggtt);
+
+	/** PPGTT used for aliasing the PPGTT with the GTT */
+	struct i915_ppgtt *alias;
+
+	bool do_idle_maps;
+
+	int mtrr;
+
+	/** Bit 6 swizzling required for X tiling */
+	u32 bit_6_swizzle_x;
+	/** Bit 6 swizzling required for Y tiling */
+	u32 bit_6_swizzle_y;
+
+	u32 pin_bias;
+
+	unsigned int num_fences;
+	struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
+	struct list_head fence_list;
+
+	/**
+	 * List of all objects in gtt_space, currently mmaped by userspace.
+	 * All objects within this list must also be on bound_list.
+	 */
+	struct list_head userfault_list;
+
+	/* Manual runtime pm autosuspend delay for user GGTT mmaps */
+	struct intel_wakeref_auto userfault_wakeref;
+
+	struct mutex error_mutex;
+	struct drm_mm_node error_capture;
+	struct drm_mm_node uc_fw;
+};
+
+struct i915_ppgtt {
+	struct i915_address_space vm;
+
+	struct i915_page_directory *pd;
+};
+
+#define i915_is_ggtt(vm) ((vm)->is_ggtt)
+
+static inline bool
+i915_vm_is_4lvl(const struct i915_address_space *vm)
+{
+	return (vm->total - 1) >> 32;
+}
+
+static inline bool
+i915_vm_has_scratch_64K(struct i915_address_space *vm)
+{
+	return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K);
+}
+
+static inline bool
+i915_vm_has_cache_coloring(struct i915_address_space *vm)
+{
+	return i915_is_ggtt(vm) && vm->mm.color_adjust;
+}
+
+static inline struct i915_ggtt *
+i915_vm_to_ggtt(struct i915_address_space *vm)
+{
+	BUILD_BUG_ON(offsetof(struct i915_ggtt, vm));
+	GEM_BUG_ON(!i915_is_ggtt(vm));
+	return container_of(vm, struct i915_ggtt, vm);
+}
+
+static inline struct i915_ppgtt *
+i915_vm_to_ppgtt(struct i915_address_space *vm)
+{
+	BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm));
+	GEM_BUG_ON(i915_is_ggtt(vm));
+	return container_of(vm, struct i915_ppgtt, vm);
+}
+
+static inline struct i915_address_space *
+i915_vm_get(struct i915_address_space *vm)
+{
+	kref_get(&vm->ref);
+	return vm;
+}
+
+void i915_vm_release(struct kref *kref);
+
+static inline void i915_vm_put(struct i915_address_space *vm)
+{
+	kref_put(&vm->ref, i915_vm_release);
+}
+
+static inline struct i915_address_space *
+i915_vm_open(struct i915_address_space *vm)
+{
+	GEM_BUG_ON(!atomic_read(&vm->open));
+	atomic_inc(&vm->open);
+	return i915_vm_get(vm);
+}
+
+static inline bool
+i915_vm_tryopen(struct i915_address_space *vm)
+{
+	if (atomic_add_unless(&vm->open, 1, 0))
+		return i915_vm_get(vm);
+
+	return false;
+}
+
+void __i915_vm_close(struct i915_address_space *vm);
+
+static inline void
+i915_vm_close(struct i915_address_space *vm)
+{
+	GEM_BUG_ON(!atomic_read(&vm->open));
+	if (atomic_dec_and_test(&vm->open))
+		__i915_vm_close(vm);
+
+	i915_vm_put(vm);
+}
+
+void i915_address_space_init(struct i915_address_space *vm, int subclass);
+void i915_address_space_fini(struct i915_address_space *vm);
+
+static inline u32 i915_pte_index(u64 address, unsigned int pde_shift)
+{
+	const u32 mask = NUM_PTE(pde_shift) - 1;
+
+	return (address >> PAGE_SHIFT) & mask;
+}
+
+/*
+ * Helper to counts the number of PTEs within the given length. This count
+ * does not cross a page table boundary, so the max value would be
+ * GEN6_PTES for GEN6, and GEN8_PTES for GEN8.
+ */
+static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift)
+{
+	const u64 mask = ~((1ULL << pde_shift) - 1);
+	u64 end;
+
+	GEM_BUG_ON(length == 0);
+	GEM_BUG_ON(offset_in_page(addr | length));
+
+	end = addr + length;
+
+	if ((addr & mask) != (end & mask))
+		return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift);
+
+	return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
+}
+
+static inline u32 i915_pde_index(u64 addr, u32 shift)
+{
+	return (addr >> shift) & I915_PDE_MASK;
+}
+
+static inline struct i915_page_table *
+i915_pt_entry(const struct i915_page_directory * const pd,
+	      const unsigned short n)
+{
+	return pd->entry[n];
+}
+
+static inline struct i915_page_directory *
+i915_pd_entry(const struct i915_page_directory * const pdp,
+	      const unsigned short n)
+{
+	return pdp->entry[n];
+}
+
+static inline dma_addr_t
+i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
+{
+	struct i915_page_dma *pt = ppgtt->pd->entry[n];
+
+	return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));
+}
+
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
+
+int i915_ggtt_probe_hw(struct drm_i915_private *i915);
+int i915_ggtt_init_hw(struct drm_i915_private *i915);
+int i915_ggtt_enable_hw(struct drm_i915_private *i915);
+void i915_ggtt_enable_guc(struct i915_ggtt *ggtt);
+void i915_ggtt_disable_guc(struct i915_ggtt *ggtt);
+int i915_init_ggtt(struct drm_i915_private *i915);
+void i915_ggtt_driver_release(struct drm_i915_private *i915);
+
+static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt)
+{
+	return ggtt->mappable_end > 0;
+}
+
+int i915_ppgtt_init_hw(struct intel_gt *gt);
+
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
+
+void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915);
+void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915);
+
+u64 gen8_pte_encode(dma_addr_t addr,
+		    enum i915_cache_level level,
+		    u32 flags);
+
+int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
+void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
+
+#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
+
+void
+fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
+
+#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
+#define fill32_px(px, v) do {						\
+	u64 v__ = lower_32_bits(v);					\
+	fill_px((px), v__ << 32 | v__);					\
+} while (0)
+
+int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp);
+void cleanup_scratch_page(struct i915_address_space *vm);
+void free_scratch(struct i915_address_space *vm);
+
+struct i915_page_table *alloc_pt(struct i915_address_space *vm);
+struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
+struct i915_page_directory *__alloc_pd(size_t sz);
+
+void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd);
+
+#define free_px(vm, px) free_pd(vm, px_base(px))
+
+void
+__set_pd_entry(struct i915_page_directory * const pd,
+	       const unsigned short idx,
+	       struct i915_page_dma * const to,
+	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level));
+
+#define set_pd_entry(pd, idx, to) \
+	__set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
+
+void
+clear_pd_entry(struct i915_page_directory * const pd,
+	       const unsigned short idx,
+	       const struct i915_page_scratch * const scratch);
+
+bool
+release_pd_entry(struct i915_page_directory * const pd,
+		 const unsigned short idx,
+		 struct i915_page_table * const pt,
+		 const struct i915_page_scratch * const scratch);
+void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
+
+int ggtt_set_pages(struct i915_vma *vma);
+int ppgtt_set_pages(struct i915_vma *vma);
+void clear_pages(struct i915_vma *vma);
+
+void gtt_write_workarounds(struct intel_gt *gt);
+
+void setup_private_pat(struct intel_uncore *uncore);
+
+static inline struct sgt_dma {
+	struct scatterlist *sg;
+	dma_addr_t dma, max;
+} sgt_dma(struct i915_vma *vma) {
+	struct scatterlist *sg = vma->pages->sgl;
+	dma_addr_t addr = sg_dma_address(sg);
+
+	return (struct sgt_dma){ sg, addr, addr + sg->length };
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
deleted file mode 100644
index 05d042cdefe2..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "i915_drv.h"
-#include "intel_engine.h"
-#include "intel_gt.h"
-#include "intel_reset.h"
-
-struct hangcheck {
-	u64 acthd;
-	u32 ring;
-	u32 head;
-	enum intel_engine_hangcheck_action action;
-	unsigned long action_timestamp;
-	int deadlock;
-	struct intel_instdone instdone;
-	bool wedged:1;
-	bool stalled:1;
-};
-
-static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
-{
-	u32 tmp = current_instdone | *old_instdone;
-	bool unchanged;
-
-	unchanged = tmp == *old_instdone;
-	*old_instdone |= tmp;
-
-	return unchanged;
-}
-
-static bool subunits_stuck(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	struct intel_instdone instdone;
-	struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
-	bool stuck;
-	int slice;
-	int subslice;
-
-	intel_engine_get_instdone(engine, &instdone);
-
-	/* There might be unstable subunit states even when
-	 * actual head is not moving. Filter out the unstable ones by
-	 * accumulating the undone -> done transitions and only
-	 * consider those as progress.
-	 */
-	stuck = instdone_unchanged(instdone.instdone,
-				   &accu_instdone->instdone);
-	stuck &= instdone_unchanged(instdone.slice_common,
-				    &accu_instdone->slice_common);
-
-	for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
-		stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
-					    &accu_instdone->sampler[slice][subslice]);
-		stuck &= instdone_unchanged(instdone.row[slice][subslice],
-					    &accu_instdone->row[slice][subslice]);
-	}
-
-	return stuck;
-}
-
-static enum intel_engine_hangcheck_action
-head_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
-	if (acthd != engine->hangcheck.acthd) {
-
-		/* Clear subunit states on head movement */
-		memset(&engine->hangcheck.instdone, 0,
-		       sizeof(engine->hangcheck.instdone));
-
-		return ENGINE_ACTIVE_HEAD;
-	}
-
-	if (!subunits_stuck(engine))
-		return ENGINE_ACTIVE_SUBUNITS;
-
-	return ENGINE_DEAD;
-}
-
-static enum intel_engine_hangcheck_action
-engine_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
-	enum intel_engine_hangcheck_action ha;
-	u32 tmp;
-
-	ha = head_stuck(engine, acthd);
-	if (ha != ENGINE_DEAD)
-		return ha;
-
-	if (IS_GEN(engine->i915, 2))
-		return ENGINE_DEAD;
-
-	/* Is the chip hanging on a WAIT_FOR_EVENT?
-	 * If so we can simply poke the RB_WAIT bit
-	 * and break the hang. This should work on
-	 * all but the second generation chipsets.
-	 */
-	tmp = ENGINE_READ(engine, RING_CTL);
-	if (tmp & RING_WAIT) {
-		intel_gt_handle_error(engine->gt, engine->mask, 0,
-				      "stuck wait on %s", engine->name);
-		ENGINE_WRITE(engine, RING_CTL, tmp);
-		return ENGINE_WAIT_KICK;
-	}
-
-	return ENGINE_DEAD;
-}
-
-static void hangcheck_load_sample(struct intel_engine_cs *engine,
-				  struct hangcheck *hc)
-{
-	hc->acthd = intel_engine_get_active_head(engine);
-	hc->ring = ENGINE_READ(engine, RING_START);
-	hc->head = ENGINE_READ(engine, RING_HEAD);
-}
-
-static void hangcheck_store_sample(struct intel_engine_cs *engine,
-				   const struct hangcheck *hc)
-{
-	engine->hangcheck.acthd = hc->acthd;
-	engine->hangcheck.last_ring = hc->ring;
-	engine->hangcheck.last_head = hc->head;
-}
-
-static enum intel_engine_hangcheck_action
-hangcheck_get_action(struct intel_engine_cs *engine,
-		     const struct hangcheck *hc)
-{
-	if (intel_engine_is_idle(engine))
-		return ENGINE_IDLE;
-
-	if (engine->hangcheck.last_ring != hc->ring)
-		return ENGINE_ACTIVE_SEQNO;
-
-	if (engine->hangcheck.last_head != hc->head)
-		return ENGINE_ACTIVE_SEQNO;
-
-	return engine_stuck(engine, hc->acthd);
-}
-
-static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
-					struct hangcheck *hc)
-{
-	unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
-
-	hc->action = hangcheck_get_action(engine, hc);
-
-	/* We always increment the progress
-	 * if the engine is busy and still processing
-	 * the same request, so that no single request
-	 * can run indefinitely (such as a chain of
-	 * batches). The only time we do not increment
-	 * the hangcheck score on this ring, if this
-	 * engine is in a legitimate wait for another
-	 * engine. In that case the waiting engine is a
-	 * victim and we want to be sure we catch the
-	 * right culprit. Then every time we do kick
-	 * the ring, make it as a progress as the seqno
-	 * advancement might ensure and if not, it
-	 * will catch the hanging engine.
-	 */
-
-	switch (hc->action) {
-	case ENGINE_IDLE:
-	case ENGINE_ACTIVE_SEQNO:
-		/* Clear head and subunit states on seqno movement */
-		hc->acthd = 0;
-
-		memset(&engine->hangcheck.instdone, 0,
-		       sizeof(engine->hangcheck.instdone));
-
-		/* Intentional fall through */
-	case ENGINE_WAIT_KICK:
-	case ENGINE_WAIT:
-		engine->hangcheck.action_timestamp = jiffies;
-		break;
-
-	case ENGINE_ACTIVE_HEAD:
-	case ENGINE_ACTIVE_SUBUNITS:
-		/*
-		 * Seqno stuck with still active engine gets leeway,
-		 * in hopes that it is just a long shader.
-		 */
-		timeout = I915_SEQNO_DEAD_TIMEOUT;
-		break;
-
-	case ENGINE_DEAD:
-		break;
-
-	default:
-		MISSING_CASE(hc->action);
-	}
-
-	hc->stalled = time_after(jiffies,
-				 engine->hangcheck.action_timestamp + timeout);
-	hc->wedged = time_after(jiffies,
-				 engine->hangcheck.action_timestamp +
-				 I915_ENGINE_WEDGED_TIMEOUT);
-}
-
-static void hangcheck_declare_hang(struct intel_gt *gt,
-				   intel_engine_mask_t hung,
-				   intel_engine_mask_t stuck)
-{
-	struct intel_engine_cs *engine;
-	intel_engine_mask_t tmp;
-	char msg[80];
-	int len;
-
-	/* If some rings hung but others were still busy, only
-	 * blame the hanging rings in the synopsis.
-	 */
-	if (stuck != hung)
-		hung &= ~stuck;
-	len = scnprintf(msg, sizeof(msg),
-			"%s on ", stuck == hung ? "no progress" : "hang");
-	for_each_engine_masked(engine, gt->i915, hung, tmp)
-		len += scnprintf(msg + len, sizeof(msg) - len,
-				 "%s, ", engine->name);
-	msg[len-2] = '\0';
-
-	return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s", msg);
-}
-
-/*
- * This is called when the chip hasn't reported back with completed
- * batchbuffers in a long time. We keep track per ring seqno progress and
- * if there are no progress, hangcheck score for that ring is increased.
- * Further, acthd is inspected to see if the ring is stuck. On stuck case
- * we kick the ring. If we see no progress on three subsequent calls
- * we assume chip is wedged and try to fix it by resetting the chip.
- */
-static void hangcheck_elapsed(struct work_struct *work)
-{
-	struct intel_gt *gt =
-		container_of(work, typeof(*gt), hangcheck.work.work);
-	intel_engine_mask_t hung = 0, stuck = 0, wedged = 0;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
-
-	if (!i915_modparams.enable_hangcheck)
-		return;
-
-	if (!READ_ONCE(gt->awake))
-		return;
-
-	if (intel_gt_is_wedged(gt))
-		return;
-
-	wakeref = intel_runtime_pm_get_if_in_use(&gt->i915->runtime_pm);
-	if (!wakeref)
-		return;
-
-	/* As enabling the GPU requires fairly extensive mmio access,
-	 * periodically arm the mmio checker to see if we are triggering
-	 * any invalid access.
-	 */
-	intel_uncore_arm_unclaimed_mmio_detection(gt->uncore);
-
-	for_each_engine(engine, gt->i915, id) {
-		struct hangcheck hc;
-
-		intel_engine_signal_breadcrumbs(engine);
-
-		hangcheck_load_sample(engine, &hc);
-		hangcheck_accumulate_sample(engine, &hc);
-		hangcheck_store_sample(engine, &hc);
-
-		if (hc.stalled) {
-			hung |= engine->mask;
-			if (hc.action != ENGINE_DEAD)
-				stuck |= engine->mask;
-		}
-
-		if (hc.wedged)
-			wedged |= engine->mask;
-	}
-
-	if (GEM_SHOW_DEBUG() && (hung | stuck)) {
-		struct drm_printer p = drm_debug_printer("hangcheck");
-
-		for_each_engine(engine, gt->i915, id) {
-			if (intel_engine_is_idle(engine))
-				continue;
-
-			intel_engine_dump(engine, &p, "%s\n", engine->name);
-		}
-	}
-
-	if (wedged) {
-		dev_err(gt->i915->drm.dev,
-			"GPU recovery timed out,"
-			" cancelling all in-flight rendering.\n");
-		GEM_TRACE_DUMP();
-		intel_gt_set_wedged(gt);
-	}
-
-	if (hung)
-		hangcheck_declare_hang(gt, hung, stuck);
-
-	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
-
-	/* Reset timer in case GPU hangs without another request being added */
-	intel_gt_queue_hangcheck(gt);
-}
-
-void intel_gt_queue_hangcheck(struct intel_gt *gt)
-{
-	unsigned long delay;
-
-	if (unlikely(!i915_modparams.enable_hangcheck))
-		return;
-
-	/*
-	 * Don't continually defer the hangcheck so that it is always run at
-	 * least once after work has been scheduled on any ring. Otherwise,
-	 * we will ignore a hung ring if a second ring is kept busy.
-	 */
-
-	delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
-	queue_delayed_work(system_long_wq, &gt->hangcheck.work, delay);
-}
-
-void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
-{
-	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
-	engine->hangcheck.action_timestamp = jiffies;
-}
-
-void intel_gt_init_hangcheck(struct intel_gt *gt)
-{
-	INIT_DELAYED_WORK(&gt->hangcheck.work, hangcheck_elapsed);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftest_hangcheck.c"
-#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
new file mode 100644
index 000000000000..ceb785b75c25
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc.c
@@ -0,0 +1,161 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/cpufreq.h>
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_llc.h"
+#include "intel_sideband.h"
+
+struct ia_constants {
+	unsigned int min_gpu_freq;
+	unsigned int max_gpu_freq;
+
+	unsigned int min_ring_freq;
+	unsigned int max_ia_freq;
+};
+
+static struct intel_gt *llc_to_gt(struct intel_llc *llc)
+{
+	return container_of(llc, struct intel_gt, llc);
+}
+
+static unsigned int cpu_max_MHz(void)
+{
+	struct cpufreq_policy *policy;
+	unsigned int max_khz;
+
+	policy = cpufreq_cpu_get(0);
+	if (policy) {
+		max_khz = policy->cpuinfo.max_freq;
+		cpufreq_cpu_put(policy);
+	} else {
+		/*
+		 * Default to measured freq if none found, PCU will ensure we
+		 * don't go over
+		 */
+		max_khz = tsc_khz;
+	}
+
+	return max_khz / 1000;
+}
+
+static bool get_ia_constants(struct intel_llc *llc,
+			     struct ia_constants *consts)
+{
+	struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+	struct intel_rps *rps = &llc_to_gt(llc)->rps;
+
+	if (rps->max_freq <= rps->min_freq)
+		return false;
+
+	consts->max_ia_freq = cpu_max_MHz();
+
+	consts->min_ring_freq =
+		intel_uncore_read(llc_to_gt(llc)->uncore, DCLK) & 0xf;
+	/* convert DDR frequency from units of 266.6MHz to bandwidth */
+	consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3);
+
+	consts->min_gpu_freq = rps->min_freq;
+	consts->max_gpu_freq = rps->max_freq;
+	if (INTEL_GEN(i915) >= 9) {
+		/* Convert GT frequency to 50 HZ units */
+		consts->min_gpu_freq /= GEN9_FREQ_SCALER;
+		consts->max_gpu_freq /= GEN9_FREQ_SCALER;
+	}
+
+	return true;
+}
+
+static void calc_ia_freq(struct intel_llc *llc,
+			 unsigned int gpu_freq,
+			 const struct ia_constants *consts,
+			 unsigned int *out_ia_freq,
+			 unsigned int *out_ring_freq)
+{
+	struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+	const int diff = consts->max_gpu_freq - gpu_freq;
+	unsigned int ia_freq = 0, ring_freq = 0;
+
+	if (INTEL_GEN(i915) >= 9) {
+		/*
+		 * ring_freq = 2 * GT. ring_freq is in 100MHz units
+		 * No floor required for ring frequency on SKL.
+		 */
+		ring_freq = gpu_freq;
+	} else if (INTEL_GEN(i915) >= 8) {
+		/* max(2 * GT, DDR). NB: GT is 50MHz units */
+		ring_freq = max(consts->min_ring_freq, gpu_freq);
+	} else if (IS_HASWELL(i915)) {
+		ring_freq = mult_frac(gpu_freq, 5, 4);
+		ring_freq = max(consts->min_ring_freq, ring_freq);
+		/* leave ia_freq as the default, chosen by cpufreq */
+	} else {
+		const int min_freq = 15;
+		const int scale = 180;
+
+		/*
+		 * On older processors, there is no separate ring
+		 * clock domain, so in order to boost the bandwidth
+		 * of the ring, we need to upclock the CPU (ia_freq).
+		 *
+		 * For GPU frequencies less than 750MHz,
+		 * just use the lowest ring freq.
+		 */
+		if (gpu_freq < min_freq)
+			ia_freq = 800;
+		else
+			ia_freq = consts->max_ia_freq - diff * scale / 2;
+		ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+	}
+
+	*out_ia_freq = ia_freq;
+	*out_ring_freq = ring_freq;
+}
+
+static void gen6_update_ring_freq(struct intel_llc *llc)
+{
+	struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+	struct ia_constants consts;
+	unsigned int gpu_freq;
+
+	if (!get_ia_constants(llc, &consts))
+		return;
+
+	/*
+	 * For each potential GPU frequency, load a ring frequency we'd like
+	 * to use for memory access.  We do this by specifying the IA frequency
+	 * the PCU should use as a reference to determine the ring frequency.
+	 */
+	for (gpu_freq = consts.max_gpu_freq;
+	     gpu_freq >= consts.min_gpu_freq;
+	     gpu_freq--) {
+		unsigned int ia_freq, ring_freq;
+
+		calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq);
+		sandybridge_pcode_write(i915,
+					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
+					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
+					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
+					gpu_freq);
+	}
+}
+
+void intel_llc_enable(struct intel_llc *llc)
+{
+	if (HAS_LLC(llc_to_gt(llc)->i915))
+		gen6_update_ring_freq(llc);
+}
+
+void intel_llc_disable(struct intel_llc *llc)
+{
+	/* Currently there is no HW configuration to be done to disable. */
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_llc.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.h b/drivers/gpu/drm/i915/gt/intel_llc.h
new file mode 100644
index 000000000000..ef09a890d2b7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc.h
@@ -0,0 +1,15 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_LLC_H
+#define INTEL_LLC_H
+
+struct intel_llc;
+
+void intel_llc_enable(struct intel_llc *llc);
+void intel_llc_disable(struct intel_llc *llc);
+
+#endif /* INTEL_LLC_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_llc_types.h b/drivers/gpu/drm/i915/gt/intel_llc_types.h
new file mode 100644
index 000000000000..ecad4687b930
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc_types.h
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_LLC_TYPES_H
+#define INTEL_LLC_TYPES_H
+
+struct intel_llc {
+};
+
+#endif /* INTEL_LLC_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 06a506c29463..0cf0f6fae675 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -133,18 +133,19 @@
  */
 #include <linux/interrupt.h>
 
-#include "gem/i915_gem_context.h"
-
 #include "i915_drv.h"
 #include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
+#include "intel_context.h"
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_reset.h"
+#include "intel_ring.h"
 #include "intel_workarounds.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
@@ -230,17 +231,42 @@ static int __execlists_context_alloc(struct intel_context *ce,
 				     struct intel_engine_cs *engine);
 
 static void execlists_init_reg_state(u32 *reg_state,
-				     struct intel_context *ce,
-				     struct intel_engine_cs *engine,
-				     struct intel_ring *ring);
+				     const struct intel_context *ce,
+				     const struct intel_engine_cs *engine,
+				     const struct intel_ring *ring,
+				     bool close);
+static void
+__execlists_update_reg_state(const struct intel_context *ce,
+			     const struct intel_engine_cs *engine);
 
 static void mark_eio(struct i915_request *rq)
 {
-	if (!i915_request_signaled(rq))
-		dma_fence_set_error(&rq->fence, -EIO);
+	if (i915_request_completed(rq))
+		return;
+
+	GEM_BUG_ON(i915_request_signaled(rq));
+
+	dma_fence_set_error(&rq->fence, -EIO);
 	i915_request_mark_complete(rq);
 }
 
+static struct i915_request *
+active_request(const struct intel_timeline * const tl, struct i915_request *rq)
+{
+	struct i915_request *active = rq;
+
+	rcu_read_lock();
+	list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
+		if (i915_request_completed(rq))
+			break;
+
+		active = rq;
+	}
+	rcu_read_unlock();
+
+	return active;
+}
+
 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 {
 	return (i915_ggtt_offset(engine->status_page.vma) +
@@ -337,10 +363,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	 * However, the priority hint is a mere hint that we may need to
 	 * preempt. If that hint is stale or we may be trying to preempt
 	 * ourselves, ignore the request.
+	 *
+	 * More naturally we would write
+	 *      prio >= max(0, last);
+	 * except that we wish to prevent triggering preemption at the same
+	 * priority level: the task that is running should remain running
+	 * to preserve FIFO ordering of dependencies.
 	 */
-	last_prio = effective_prio(rq);
-	if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
-					 last_prio))
+	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
+	if (engine->execlists.queue_priority_hint <= last_prio)
 		return false;
 
 	/*
@@ -429,12 +460,8 @@ assert_priority_queue(const struct i915_request *prev,
 static u64
 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 {
-	struct i915_gem_context *ctx = ce->gem_context;
 	u64 desc;
 
-	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
-	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
-
 	desc = INTEL_LEGACY_32B_CONTEXT;
 	if (i915_vm_is_4lvl(ce->vm))
 		desc = INTEL_LEGACY_64B_CONTEXT;
@@ -444,37 +471,485 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 	if (IS_GEN(engine->i915, 8))
 		desc |= GEN8_CTX_L3LLC_COHERENT;
 
-	desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
-								/* bits 12-31 */
+	desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
 	/*
 	 * The following 32bits are copied into the OA reports (dword 2).
 	 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
 	 * anything below.
 	 */
 	if (INTEL_GEN(engine->i915) >= 11) {
-		GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
-		desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
-								/* bits 37-47 */
-
 		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
 								/* bits 48-53 */
 
-		/* TODO: decide what to do with SW counter (bits 55-60) */
-
 		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
 								/* bits 61-63 */
-	} else {
-		GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
-		desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;	/* bits 32-52 */
 	}
 
 	return desc;
 }
 
-static void unwind_wa_tail(struct i915_request *rq)
+static inline unsigned int dword_in_page(void *addr)
+{
+	return offset_in_page(addr) / sizeof(u32);
+}
+
+static void set_offsets(u32 *regs,
+			const u8 *data,
+			const struct intel_engine_cs *engine,
+			bool clear)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+	(((x) >> 2) & 0x7f)
+#define END(x) 0, (x)
+{
+	const u32 base = engine->mmio_base;
+
+	while (*data) {
+		u8 count, flags;
+
+		if (*data & BIT(7)) { /* skip */
+			count = *data++ & ~BIT(7);
+			if (clear)
+				memset32(regs, MI_NOOP, count);
+			regs += count;
+			continue;
+		}
+
+		count = *data & 0x3f;
+		flags = *data >> 6;
+		data++;
+
+		*regs = MI_LOAD_REGISTER_IMM(count);
+		if (flags & POSTED)
+			*regs |= MI_LRI_FORCE_POSTED;
+		if (INTEL_GEN(engine->i915) >= 11)
+			*regs |= MI_LRI_CS_MMIO;
+		regs++;
+
+		GEM_BUG_ON(!count);
+		do {
+			u32 offset = 0;
+			u8 v;
+
+			do {
+				v = *data++;
+				offset <<= 7;
+				offset |= v & ~BIT(7);
+			} while (v & BIT(7));
+
+			regs[0] = base + (offset << 2);
+			if (clear)
+				regs[1] = 0;
+			regs += 2;
+		} while (--count);
+	}
+
+	if (clear) {
+		u8 count = *++data;
+
+		/* Clear past the tail for HW access */
+		GEM_BUG_ON(dword_in_page(regs) > count);
+		memset32(regs, MI_NOOP, count - dword_in_page(regs));
+
+		/* Close the batch; used mainly by live_lrc_layout() */
+		*regs = MI_BATCH_BUFFER_END;
+		if (INTEL_GEN(engine->i915) >= 10)
+			*regs |= BIT(0);
+	}
+}
+
+static const u8 gen8_xcs_offsets[] = {
+	NOP(1),
+	LRI(11, 0),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x11c),
+	REG(0x114),
+	REG(0x118),
+
+	NOP(9),
+	LRI(9, 0),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	NOP(13),
+	LRI(2, 0),
+	REG16(0x200),
+	REG(0x028),
+
+	END(80)
+};
+
+static const u8 gen9_xcs_offsets[] = {
+	NOP(1),
+	LRI(14, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x11c),
+	REG(0x114),
+	REG(0x118),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+
+	NOP(3),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	NOP(13),
+	LRI(1, POSTED),
+	REG16(0x200),
+
+	NOP(13),
+	LRI(44, POSTED),
+	REG(0x028),
+	REG(0x09c),
+	REG(0x0c0),
+	REG(0x178),
+	REG(0x17c),
+	REG16(0x358),
+	REG(0x170),
+	REG(0x150),
+	REG(0x154),
+	REG(0x158),
+	REG16(0x41c),
+	REG16(0x600),
+	REG16(0x604),
+	REG16(0x608),
+	REG16(0x60c),
+	REG16(0x610),
+	REG16(0x614),
+	REG16(0x618),
+	REG16(0x61c),
+	REG16(0x620),
+	REG16(0x624),
+	REG16(0x628),
+	REG16(0x62c),
+	REG16(0x630),
+	REG16(0x634),
+	REG16(0x638),
+	REG16(0x63c),
+	REG16(0x640),
+	REG16(0x644),
+	REG16(0x648),
+	REG16(0x64c),
+	REG16(0x650),
+	REG16(0x654),
+	REG16(0x658),
+	REG16(0x65c),
+	REG16(0x660),
+	REG16(0x664),
+	REG16(0x668),
+	REG16(0x66c),
+	REG16(0x670),
+	REG16(0x674),
+	REG16(0x678),
+	REG16(0x67c),
+	REG(0x068),
+
+	END(176)
+};
+
+static const u8 gen12_xcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	END(80)
+};
+
+static const u8 gen8_rcs_offsets[] = {
+	NOP(1),
+	LRI(14, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x11c),
+	REG(0x114),
+	REG(0x118),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+
+	NOP(3),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	NOP(13),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END(80)
+};
+
+static const u8 gen9_rcs_offsets[] = {
+	NOP(1),
+	LRI(14, POSTED),
+	REG16(0x244),
+	REG(0x34),
+	REG(0x30),
+	REG(0x38),
+	REG(0x3c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x11c),
+	REG(0x114),
+	REG(0x118),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+
+	NOP(3),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	NOP(13),
+	LRI(1, 0),
+	REG(0xc8),
+
+	NOP(13),
+	LRI(44, POSTED),
+	REG(0x28),
+	REG(0x9c),
+	REG(0xc0),
+	REG(0x178),
+	REG(0x17c),
+	REG16(0x358),
+	REG(0x170),
+	REG(0x150),
+	REG(0x154),
+	REG(0x158),
+	REG16(0x41c),
+	REG16(0x600),
+	REG16(0x604),
+	REG16(0x608),
+	REG16(0x60c),
+	REG16(0x610),
+	REG16(0x614),
+	REG16(0x618),
+	REG16(0x61c),
+	REG16(0x620),
+	REG16(0x624),
+	REG16(0x628),
+	REG16(0x62c),
+	REG16(0x630),
+	REG16(0x634),
+	REG16(0x638),
+	REG16(0x63c),
+	REG16(0x640),
+	REG16(0x644),
+	REG16(0x648),
+	REG16(0x64c),
+	REG16(0x650),
+	REG16(0x654),
+	REG16(0x658),
+	REG16(0x65c),
+	REG16(0x660),
+	REG16(0x664),
+	REG16(0x668),
+	REG16(0x66c),
+	REG16(0x670),
+	REG16(0x674),
+	REG16(0x678),
+	REG16(0x67c),
+	REG(0x68),
+
+	END(176)
+};
+
+static const u8 gen11_rcs_offsets[] = {
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x11c),
+	REG(0x114),
+	REG(0x118),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(1, POSTED),
+	REG(0x1b0),
+
+	NOP(10),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END(80)
+};
+
+static const u8 gen12_rcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END(80)
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 {
-	rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
-	assert_ring_tail_valid(rq->ring, rq->tail);
+	/*
+	 * The gen12+ lists only have the registers we program in the basic
+	 * default state. We rely on the context image using relative
+	 * addressing to automatic fixup the register state between the
+	 * physical engines for virtual engine.
+	 */
+	GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
+		   !intel_engine_has_relative_mmio(engine));
+
+	if (engine->class == RENDER_CLASS) {
+		if (INTEL_GEN(engine->i915) >= 12)
+			return gen12_rcs_offsets;
+		else if (INTEL_GEN(engine->i915) >= 11)
+			return gen11_rcs_offsets;
+		else if (INTEL_GEN(engine->i915) >= 9)
+			return gen9_rcs_offsets;
+		else
+			return gen8_rcs_offsets;
+	} else {
+		if (INTEL_GEN(engine->i915) >= 12)
+			return gen12_xcs_offsets;
+		else if (INTEL_GEN(engine->i915) >= 9)
+			return gen9_xcs_offsets;
+		else
+			return gen8_xcs_offsets;
+	}
 }
 
 static struct i915_request *
@@ -489,13 +964,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->active.requests,
 					 sched.link) {
-		struct intel_engine_cs *owner;
-
 		if (i915_request_completed(rq))
 			continue; /* XXX */
 
 		__i915_request_unsubmit(rq);
-		unwind_wa_tail(rq);
 
 		/*
 		 * Push the request back into the queue for later resubmission.
@@ -504,8 +976,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		 * engine so that it can be moved across onto another physical
 		 * engine as load dictates.
 		 */
-		owner = rq->hw_context->engine;
-		if (likely(owner == engine)) {
+		if (likely(rq->execution_mask == engine->mask)) {
 			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 			if (rq_prio(rq) != prio) {
 				prio = rq_prio(rq);
@@ -516,6 +987,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			list_move(&rq->sched.link, pl);
 			active = rq;
 		} else {
+			struct intel_engine_cs *owner = rq->context->engine;
+
 			/*
 			 * Decouple the virtual breadcrumb before moving it
 			 * back to the virtual engine -- we don't want the
@@ -525,7 +998,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			 */
 			if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 				     &rq->fence.flags)) {
-				spin_lock(&rq->lock);
+				spin_lock_nested(&rq->lock,
+						 SINGLE_DEPTH_NESTING);
 				i915_request_cancel_breadcrumb(rq);
 				spin_unlock(&rq->lock);
 			}
@@ -561,15 +1035,193 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
 				   status, rq);
 }
 
+static void intel_engine_context_in(struct intel_engine_cs *engine)
+{
+	unsigned long flags;
+
+	if (READ_ONCE(engine->stats.enabled) == 0)
+		return;
+
+	write_seqlock_irqsave(&engine->stats.lock, flags);
+
+	if (engine->stats.enabled > 0) {
+		if (engine->stats.active++ == 0)
+			engine->stats.start = ktime_get();
+		GEM_BUG_ON(engine->stats.active == 0);
+	}
+
+	write_sequnlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static void intel_engine_context_out(struct intel_engine_cs *engine)
+{
+	unsigned long flags;
+
+	if (READ_ONCE(engine->stats.enabled) == 0)
+		return;
+
+	write_seqlock_irqsave(&engine->stats.lock, flags);
+
+	if (engine->stats.enabled > 0) {
+		ktime_t last;
+
+		if (engine->stats.active && --engine->stats.active == 0) {
+			/*
+			 * Decrement the active context count and in case GPU
+			 * is now idle add up to the running total.
+			 */
+			last = ktime_sub(ktime_get(), engine->stats.start);
+
+			engine->stats.total = ktime_add(engine->stats.total,
+							last);
+		} else if (engine->stats.active == 0) {
+			/*
+			 * After turning on engine stats, context out might be
+			 * the first event in which case we account from the
+			 * time stats gathering was turned on.
+			 */
+			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+
+			engine->stats.total = ktime_add(engine->stats.total,
+							last);
+		}
+	}
+
+	write_sequnlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+{
+	if (INTEL_GEN(engine->i915) >= 12)
+		return 0x60;
+	else if (INTEL_GEN(engine->i915) >= 9)
+		return 0x54;
+	else if (engine->class == RENDER_CLASS)
+		return 0x58;
+	else
+		return -1;
+}
+
+static void
+execlists_check_context(const struct intel_context *ce,
+			const struct intel_engine_cs *engine)
+{
+	const struct intel_ring *ring = ce->ring;
+	u32 *regs = ce->lrc_reg_state;
+	bool valid = true;
+	int x;
+
+	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
+		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
+		       engine->name,
+		       regs[CTX_RING_START],
+		       i915_ggtt_offset(ring->vma));
+		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+		valid = false;
+	}
+
+	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
+	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
+		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
+		       engine->name,
+		       regs[CTX_RING_CTL],
+		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
+		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+		valid = false;
+	}
+
+	x = lrc_ring_mi_mode(engine);
+	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
+		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
+		       engine->name, regs[x + 1]);
+		regs[x + 1] &= ~STOP_RING;
+		regs[x + 1] |= STOP_RING << 16;
+		valid = false;
+	}
+
+	WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
+}
+
+static void restore_default_state(struct intel_context *ce,
+				  struct intel_engine_cs *engine)
+{
+	u32 *regs = ce->lrc_reg_state;
+
+	if (engine->pinned_default_state)
+		memcpy(regs, /* skip restoring the vanilla PPHWSP */
+		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+		       engine->context_size - PAGE_SIZE);
+
+	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+}
+
+static void reset_active(struct i915_request *rq,
+			 struct intel_engine_cs *engine)
+{
+	struct intel_context * const ce = rq->context;
+	u32 head;
+
+	/*
+	 * The executing context has been cancelled. We want to prevent
+	 * further execution along this context and propagate the error on
+	 * to anything depending on its results.
+	 *
+	 * In __i915_request_submit(), we apply the -EIO and remove the
+	 * requests' payloads for any banned requests. But first, we must
+	 * rewind the context back to the start of the incomplete request so
+	 * that we do not jump back into the middle of the batch.
+	 *
+	 * We preserve the breadcrumbs and semaphores of the incomplete
+	 * requests so that inter-timeline dependencies (i.e other timelines)
+	 * remain correctly ordered. And we defer to __i915_request_submit()
+	 * so that all asynchronous waits are correctly handled.
+	 */
+	ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
+		     rq->fence.context, rq->fence.seqno);
+
+	/* On resubmission of the active request, payload will be scrubbed */
+	if (i915_request_completed(rq))
+		head = rq->tail;
+	else
+		head = active_request(ce->timeline, rq)->head;
+	ce->ring->head = intel_ring_wrap(ce->ring, head);
+	intel_ring_update_space(ce->ring);
+
+	/* Scrub the context image to prevent replaying the previous batch */
+	restore_default_state(ce, engine);
+	__execlists_update_reg_state(ce, engine);
+
+	/* We've switched away, so this should be a no-op, but intent matters */
+	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+}
+
 static inline struct intel_engine_cs *
 __execlists_schedule_in(struct i915_request *rq)
 {
 	struct intel_engine_cs * const engine = rq->engine;
-	struct intel_context * const ce = rq->hw_context;
+	struct intel_context * const ce = rq->context;
 
 	intel_context_get(ce);
 
-	intel_gt_pm_get(engine->gt);
+	if (unlikely(intel_context_is_banned(ce)))
+		reset_active(rq, engine);
+
+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		execlists_check_context(ce, engine);
+
+	if (ce->tag) {
+		/* Use a fixed tag for OA and friends */
+		ce->lrc_desc |= (u64)ce->tag << 32;
+	} else {
+		/* We don't need a strict matching tag, just different values */
+		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
+		ce->lrc_desc |=
+			(u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
+			GEN11_SW_CTX_ID_SHIFT;
+		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+	}
+
+	__intel_gt_pm_get(engine->gt);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
 	intel_engine_context_in(engine);
 
@@ -579,7 +1231,7 @@ __execlists_schedule_in(struct i915_request *rq)
 static inline struct i915_request *
 execlists_schedule_in(struct i915_request *rq, int idx)
 {
-	struct intel_context * const ce = rq->hw_context;
+	struct intel_context * const ce = rq->context;
 	struct intel_engine_cs *old;
 
 	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
@@ -610,11 +1262,25 @@ static inline void
 __execlists_schedule_out(struct i915_request *rq,
 			 struct intel_engine_cs * const engine)
 {
-	struct intel_context * const ce = rq->hw_context;
+	struct intel_context * const ce = rq->context;
+
+	/*
+	 * NB process_csb() is not under the engine->active.lock and hence
+	 * schedule_out can race with schedule_in meaning that we should
+	 * refrain from doing non-trivial work here.
+	 */
+
+	/*
+	 * If we have just completed this context, the engine may now be
+	 * idle and we want to re-enter powersaving.
+	 */
+	if (list_is_last(&rq->link, &ce->timeline->requests) &&
+	    i915_request_completed(rq))
+		intel_engine_add_retire(engine, ce->timeline);
 
 	intel_engine_context_out(engine);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
-	intel_gt_pm_put(engine->gt);
+	intel_gt_pm_put_async(engine->gt);
 
 	/*
 	 * If this is part of a virtual engine, its next request may
@@ -634,7 +1300,7 @@ __execlists_schedule_out(struct i915_request *rq,
 static inline void
 execlists_schedule_out(struct i915_request *rq)
 {
-	struct intel_context * const ce = rq->hw_context;
+	struct intel_context * const ce = rq->context;
 	struct intel_engine_cs *cur, *old;
 
 	trace_i915_request_out(rq);
@@ -649,13 +1315,29 @@ execlists_schedule_out(struct i915_request *rq)
 	i915_request_put(rq);
 }
 
-static u64 execlists_update_context(const struct i915_request *rq)
+static u64 execlists_update_context(struct i915_request *rq)
 {
-	struct intel_context *ce = rq->hw_context;
-	u64 desc;
+	struct intel_context *ce = rq->context;
+	u64 desc = ce->lrc_desc;
+	u32 tail;
 
-	ce->lrc_reg_state[CTX_RING_TAIL + 1] =
-		intel_ring_set_tail(rq->ring, rq->tail);
+	/*
+	 * WaIdleLiteRestore:bdw,skl
+	 *
+	 * We should never submit the context with the same RING_TAIL twice
+	 * just in case we submit an empty ring, which confuses the HW.
+	 *
+	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
+	 * the normal request to be able to always advance the RING_TAIL on
+	 * subsequent resubmissions (for lite restore). Should that fail us,
+	 * and we try and submit the same tail again, force the context
+	 * reload.
+	 */
+	tail = intel_ring_set_tail(rq->ring, rq->tail);
+	if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+		desc |= CTX_DESC_FORCE_RESTORE;
+	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
+	rq->tail = rq->wa_tail;
 
 	/*
 	 * Make sure the context image is complete before we submit it to HW.
@@ -666,17 +1348,10 @@ static u64 execlists_update_context(const struct i915_request *rq)
 	 * may not be visible to the HW prior to the completion of the UC
 	 * register write and that we may begin execution from the context
 	 * before its image is complete leading to invalid PD chasing.
-	 *
-	 * Furthermore, Braswell, at least, wants a full mb to be sure that
-	 * the writes are coherent in memory (visible to the GPU) prior to
-	 * execution, and not just visible to other CPUs (as is the result of
-	 * wmb).
 	 */
-	mb();
+	wmb();
 
-	desc = ce->lrc_desc;
 	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
-
 	return desc;
 }
 
@@ -699,15 +1374,17 @@ trace_ports(const struct intel_engine_execlists *execlists,
 	const struct intel_engine_cs *engine =
 		container_of(execlists, typeof(*engine), execlists);
 
-	GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
-		  engine->name, msg,
-		  ports[0]->fence.context,
-		  ports[0]->fence.seqno,
-		  i915_request_completed(ports[0]) ? "!" :
-		  i915_request_started(ports[0]) ? "*" :
-		  "",
-		  ports[1] ? ports[1]->fence.context : 0,
-		  ports[1] ? ports[1]->fence.seqno : 0);
+	if (!ports[0])
+		return;
+
+	ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
+		     ports[0]->fence.context,
+		     ports[0]->fence.seqno,
+		     i915_request_completed(ports[0]) ? "!" :
+		     i915_request_started(ports[0]) ? "*" :
+		     "",
+		     ports[1] ? ports[1]->fence.context : 0,
+		     ports[1] ? ports[1]->fence.seqno : 0);
 }
 
 static __maybe_unused bool
@@ -719,24 +1396,67 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 
 	trace_ports(execlists, msg, execlists->pending);
 
-	if (!execlists->pending[0])
+	if (!execlists->pending[0]) {
+		GEM_TRACE_ERR("Nothing pending for promotion!\n");
 		return false;
+	}
 
-	if (execlists->pending[execlists_num_ports(execlists)])
+	if (execlists->pending[execlists_num_ports(execlists)]) {
+		GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
+			      execlists_num_ports(execlists));
 		return false;
+	}
 
 	for (port = execlists->pending; (rq = *port); port++) {
-		if (ce == rq->hw_context)
+		unsigned long flags;
+		bool ok = true;
+
+		GEM_BUG_ON(!kref_read(&rq->fence.refcount));
+		GEM_BUG_ON(!i915_request_is_active(rq));
+
+		if (ce == rq->context) {
+			GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
+				      ce->timeline->fence_context,
+				      port - execlists->pending);
 			return false;
+		}
+		ce = rq->context;
 
-		ce = rq->hw_context;
-		if (i915_request_completed(rq))
+		/* Hold tightly onto the lock to prevent concurrent retires! */
+		if (!spin_trylock_irqsave(&rq->lock, flags))
 			continue;
 
-		if (i915_active_is_idle(&ce->active))
-			return false;
+		if (i915_request_completed(rq))
+			goto unlock;
+
+		if (i915_active_is_idle(&ce->active) &&
+		    !intel_context_is_barrier(ce)) {
+			GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
+				      ce->timeline->fence_context,
+				      port - execlists->pending);
+			ok = false;
+			goto unlock;
+		}
+
+		if (!i915_vma_is_pinned(ce->state)) {
+			GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
+				      ce->timeline->fence_context,
+				      port - execlists->pending);
+			ok = false;
+			goto unlock;
+		}
+
+		if (!i915_vma_is_pinned(ce->ring->vma)) {
+			GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
+				      ce->timeline->fence_context,
+				      port - execlists->pending);
+			ok = false;
+			goto unlock;
+		}
 
-		if (!i915_vma_is_pinned(ce->state))
+unlock:
+		spin_unlock_irqrestore(&rq->lock, flags);
+		if (!ok)
 			return false;
 	}
 
@@ -782,7 +1502,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 static bool ctx_single_port_submission(const struct intel_context *ce)
 {
 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
-		i915_gem_context_force_single_submission(ce->gem_context));
+		intel_context_force_single_submission(ce));
 }
 
 static bool can_merge_ctx(const struct intel_context *prev,
@@ -814,7 +1534,11 @@ static bool can_merge_rq(const struct i915_request *prev,
 	if (i915_request_completed(next))
 		return true;
 
-	if (!can_merge_ctx(prev->hw_context, next->hw_context))
+	if (unlikely((prev->fence.flags ^ next->fence.flags) &
+		     (I915_FENCE_FLAG_NOPREEMPT | I915_FENCE_FLAG_SENTINEL)))
+		return false;
+
+	if (!can_merge_ctx(prev->context, next->context))
 		return false;
 
 	return true;
@@ -823,47 +1547,7 @@ static bool can_merge_rq(const struct i915_request *prev,
 static void virtual_update_register_offsets(u32 *regs,
 					    struct intel_engine_cs *engine)
 {
-	u32 base = engine->mmio_base;
-
-	/* Must match execlists_init_reg_state()! */
-
-	regs[CTX_CONTEXT_CONTROL] =
-		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
-	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
-	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
-	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
-	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
-
-	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
-	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
-	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
-	regs[CTX_SECOND_BB_HEAD_U] =
-		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
-	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
-	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
-
-	regs[CTX_CTX_TIMESTAMP] =
-		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
-	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
-	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
-	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
-	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
-	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
-	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
-	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
-	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
-
-	if (engine->class == RENDER_CLASS) {
-		regs[CTX_RCS_INDIRECT_CTX] =
-			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
-		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
-			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
-		regs[CTX_BB_PER_CTX_PTR] =
-			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
-
-		regs[CTX_R_PWR_CLK_STATE] =
-			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
-	}
+	set_offsets(regs, reg_offsets(engine), engine, false);
 }
 
 static bool virtual_matches(const struct virtual_engine *ve,
@@ -902,7 +1586,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
 	if (!list_empty(&ve->context.signal_link)) {
 		list_move_tail(&ve->context.signal_link,
 			       &engine->breadcrumbs.signalers);
-		intel_engine_queue_breadcrumbs(engine);
+		intel_engine_signal_breadcrumbs(engine);
 	}
 	spin_unlock(&old->breadcrumbs.irq_lock);
 }
@@ -978,7 +1662,7 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
 {
 	int hint;
 
-	if (!intel_engine_has_semaphores(engine))
+	if (!intel_engine_has_timeslices(engine))
 		return false;
 
 	if (list_is_last(&rq->sched.link, &engine->active.requests))
@@ -999,15 +1683,32 @@ switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
 	return rq_prio(list_next_entry(rq, sched.link));
 }
 
-static bool
-enable_timeslice(const struct intel_engine_execlists *execlists)
+static inline unsigned long
+timeslice(const struct intel_engine_cs *engine)
 {
-	const struct i915_request *rq = *execlists->active;
+	return READ_ONCE(engine->props.timeslice_duration_ms);
+}
 
-	if (i915_request_completed(rq))
-		return false;
+static unsigned long
+active_timeslice(const struct intel_engine_cs *engine)
+{
+	const struct i915_request *rq = *engine->execlists.active;
+
+	if (!rq || i915_request_completed(rq))
+		return 0;
+
+	if (engine->execlists.switch_priority_hint < effective_prio(rq))
+		return 0;
 
-	return execlists->switch_priority_hint >= effective_prio(rq);
+	return timeslice(engine);
+}
+
+static void set_timeslice(struct intel_engine_cs *engine)
+{
+	if (!intel_engine_has_timeslices(engine))
+		return;
+
+	set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
 }
 
 static void record_preemption(struct intel_engine_execlists *execlists)
@@ -1015,6 +1716,35 @@ static void record_preemption(struct intel_engine_execlists *execlists)
 	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
 }
 
+static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	rq = last_active(&engine->execlists);
+	if (!rq)
+		return 0;
+
+	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
+	if (unlikely(intel_context_is_banned(rq->context)))
+		return 1;
+
+	return READ_ONCE(engine->props.preempt_timeout_ms);
+}
+
+static void set_preempt_timeout(struct intel_engine_cs *engine)
+{
+	if (!intel_engine_has_preempt_reset(engine))
+		return;
+
+	set_timer_ms(&engine->execlists.preempt,
+		     active_preempt_timeout(engine));
+}
+
+static inline void clear_ports(struct i915_request **ports, int count)
+{
+	memset_p((void **)ports, NULL, count);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1077,12 +1807,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	last = last_active(execlists);
 	if (last) {
 		if (need_preempt(engine, last, rb)) {
-			GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
-				  engine->name,
-				  last->fence.context,
-				  last->fence.seqno,
-				  last->sched.attr.priority,
-				  execlists->queue_priority_hint);
+			ENGINE_TRACE(engine,
+				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
+				     last->fence.context,
+				     last->fence.seqno,
+				     last->sched.attr.priority,
+				     execlists->queue_priority_hint);
 			record_preemption(execlists);
 
 			/*
@@ -1108,16 +1838,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * tendency to ignore us rewinding the TAIL to the
 			 * end of an earlier request.
 			 */
-			last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+			last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 			last = NULL;
 		} else if (need_timeslice(engine, last) &&
-			   !timer_pending(&engine->execlists.timer)) {
-			GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
-				  engine->name,
-				  last->fence.context,
-				  last->fence.seqno,
-				  last->sched.attr.priority,
-				  execlists->queue_priority_hint);
+			   timer_expired(&engine->execlists.timer)) {
+			ENGINE_TRACE(engine,
+				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
+				     last->fence.context,
+				     last->fence.seqno,
+				     last->sched.attr.priority,
+				     execlists->queue_priority_hint);
 
 			ring_set_paused(engine, 1);
 			defer_active(engine);
@@ -1147,18 +1877,18 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * submission.
 			 */
 			if (!list_is_last(&last->sched.link,
-					  &engine->active.requests))
-				return;
+					  &engine->active.requests)) {
+				/*
+				 * Even if ELSP[1] is occupied and not worthy
+				 * of timeslices, our queue might be.
+				 */
+				if (!execlists->timer.expires &&
+				    need_timeslice(engine, last))
+					set_timer_ms(&execlists->timer,
+						     timeslice(engine));
 
-			/*
-			 * WaIdleLiteRestore:bdw,skl
-			 * Apply the wa NOOPs to prevent
-			 * ring:HEAD == rq:TAIL as we resubmit the
-			 * request. See gen8_emit_fini_breadcrumb() for
-			 * where we prepare the padding after the
-			 * end of the request.
-			 */
-			last->tail = last->wa_tail;
+				return;
+			}
 		}
 	}
 
@@ -1180,7 +1910,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 		GEM_BUG_ON(rq != ve->request);
 		GEM_BUG_ON(rq->engine != &ve->base);
-		GEM_BUG_ON(rq->hw_context != &ve->context);
+		GEM_BUG_ON(rq->context != &ve->context);
 
 		if (rq_prio(rq) >= queue_prio(execlists)) {
 			if (!virtual_matches(ve, rq, engine)) {
@@ -1194,14 +1924,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				return; /* leave this for another */
 			}
 
-			GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
-				  engine->name,
-				  rq->fence.context,
-				  rq->fence.seqno,
-				  i915_request_completed(rq) ? "!" :
-				  i915_request_started(rq) ? "*" :
-				  "",
-				  yesno(engine != ve->siblings[0]));
+			ENGINE_TRACE(engine,
+				     "virtual rq=%llx:%lld%s, new engine? %s\n",
+				     rq->fence.context,
+				     rq->fence.seqno,
+				     i915_request_completed(rq) ? "!" :
+				     i915_request_started(rq) ? "*" :
+				     "",
+				     yesno(engine != ve->siblings[0]));
 
 			ve->request = NULL;
 			ve->base.execlists.queue_priority_hint = INT_MIN;
@@ -1216,7 +1946,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				unsigned int n;
 
 				GEM_BUG_ON(READ_ONCE(ve->context.inflight));
-				virtual_update_register_offsets(regs, engine);
+
+				if (!intel_engine_has_relative_mmio(engine))
+					virtual_update_register_offsets(regs,
+									engine);
 
 				if (!list_empty(&ve->context.signals))
 					virtual_xfer_breadcrumbs(ve, engine);
@@ -1296,7 +2029,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * same LRCA, i.e. we must submit 2 different
 				 * contexts if we submit 2 ELSP.
 				 */
-				if (last->hw_context == rq->hw_context)
+				if (last->context == rq->context)
+					goto done;
+
+				if (i915_request_has_sentinel(last))
 					goto done;
 
 				/*
@@ -1306,8 +2042,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * the same context (even though a different
 				 * request) to the second port.
 				 */
-				if (ctx_single_port_submission(last->hw_context) ||
-				    ctx_single_port_submission(rq->hw_context))
+				if (ctx_single_port_submission(last->context) ||
+				    ctx_single_port_submission(rq->context))
 					goto done;
 
 				merge = false;
@@ -1321,8 +2057,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				}
 
 				GEM_BUG_ON(last &&
-					   !can_merge_ctx(last->hw_context,
-							  rq->hw_context));
+					   !can_merge_ctx(last->context,
+							  rq->context));
 
 				submit = true;
 				last = rq;
@@ -1351,17 +2087,30 @@ done:
 	 * interrupt for secondary ports).
 	 */
 	execlists->queue_priority_hint = queue_prio(execlists);
-	GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
-		  engine->name, execlists->queue_priority_hint,
-		  yesno(submit));
 
 	if (submit) {
 		*port = execlists_schedule_in(last, port - execlists->pending);
-		memset(port + 1, 0, (last_port - port) * sizeof(*port));
 		execlists->switch_priority_hint =
 			switch_prio(engine, *execlists->pending);
+
+		/*
+		 * Skip if we ended up with exactly the same set of requests,
+		 * e.g. trying to timeslice a pair of ordered contexts
+		 */
+		if (!memcmp(execlists->active, execlists->pending,
+			    (port - execlists->pending + 1) * sizeof(*port))) {
+			do
+				execlists_schedule_out(fetch_and_zero(port));
+			while (port-- != execlists->pending);
+
+			goto skip_submit;
+		}
+		clear_ports(port + 1, last_port - port);
+
 		execlists_submit_ports(engine);
+		set_preempt_timeout(engine);
 	} else {
+skip_submit:
 		ring_set_paused(engine, 0);
 	}
 }
@@ -1369,16 +2118,18 @@ done:
 static void
 cancel_port_requests(struct intel_engine_execlists * const execlists)
 {
-	struct i915_request * const *port, *rq;
+	struct i915_request * const *port;
+
+	for (port = execlists->pending; *port; port++)
+		execlists_schedule_out(*port);
+	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
 
-	for (port = execlists->pending; (rq = *port); port++)
-		execlists_schedule_out(rq);
-	memset(execlists->pending, 0, sizeof(execlists->pending));
+	/* Mark the end of active before we overwrite *active */
+	for (port = xchg(&execlists->active, execlists->pending); *port; port++)
+		execlists_schedule_out(*port);
+	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
 
-	for (port = execlists->active; (rq = *port); port++)
-		execlists_schedule_out(rq);
-	execlists->active =
-		memset(execlists->inflight, 0, sizeof(execlists->inflight));
+	WRITE_ONCE(execlists->active, execlists->inflight);
 }
 
 static inline void
@@ -1394,13 +2145,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
 	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
 }
 
-enum csb_step {
-	CSB_NOP,
-	CSB_PROMOTE,
-	CSB_PREEMPT,
-	CSB_COMPLETE,
-};
-
 /*
  * Starting with Gen12, the status has a new format:
  *
@@ -1427,7 +2171,7 @@ enum csb_step {
  *     bits 47-57: sw context id of the lrc the GT switched away from
  *     bits 58-63: sw counter of the lrc the GT switched away from
  */
-static inline enum csb_step
+static inline bool
 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 {
 	u32 lower_dw = csb[0];
@@ -1436,9 +2180,6 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
 	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
-	if (!ctx_away_valid && ctx_to_valid)
-		return CSB_PROMOTE;
-
 	/*
 	 * The context switch detail is not guaranteed to be 5 when a preemption
 	 * occurs, so we can't just check for that. The check below works for
@@ -1446,8 +2187,10 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 	 * instructions and lite-restore. Preempt-to-idle via the CTRL register
 	 * would require some extra handling, but we don't support that.
 	 */
-	if (new_queue && ctx_away_valid)
-		return CSB_PREEMPT;
+	if (!ctx_away_valid || new_queue) {
+		GEM_BUG_ON(!ctx_to_valid);
+		return true;
+	}
 
 	/*
 	 * switch detail = 5 is covered by the case above and we do not expect a
@@ -1455,30 +2198,13 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 	 * use polling mode.
 	 */
 	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
-
-	if (*execlists->active) {
-		GEM_BUG_ON(!ctx_away_valid);
-		return CSB_COMPLETE;
-	}
-
-	return CSB_NOP;
+	return false;
 }
 
-static inline enum csb_step
+static inline bool
 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 {
-	unsigned int status = *csb;
-
-	if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
-		return CSB_PROMOTE;
-
-	if (status & GEN8_CTX_STATUS_PREEMPTED)
-		return CSB_PREEMPT;
-
-	if (*execlists->active)
-		return CSB_COMPLETE;
-
-	return CSB_NOP;
+	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
 }
 
 static void process_csb(struct intel_engine_cs *engine)
@@ -1488,7 +2214,14 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
-	GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
+	/*
+	 * As we modify our execlists state tracking we require exclusive
+	 * access. Either we are inside the tasklet, or the tasklet is disabled
+	 * and we assume that is only inside the reset paths and so serialised.
+	 */
+	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
+		   !reset_in_progress(execlists));
+	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
 
 	/*
 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
@@ -1502,7 +2235,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	 */
 	head = execlists->csb_head;
 	tail = READ_ONCE(*execlists->csb_write);
-	GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
 	if (unlikely(head == tail))
 		return;
 
@@ -1517,7 +2250,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	rmb();
 
 	do {
-		enum csb_step csb_step;
+		bool promote;
 
 		if (++head == num_entries)
 			head = 0;
@@ -1540,42 +2273,40 @@ static void process_csb(struct intel_engine_cs *engine)
 		 * status notifier.
 		 */
 
-		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
-			  engine->name, head,
-			  buf[2 * head + 0], buf[2 * head + 1]);
+		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
+			     head, buf[2 * head + 0], buf[2 * head + 1]);
 
 		if (INTEL_GEN(engine->i915) >= 12)
-			csb_step = gen12_csb_parse(execlists, buf + 2 * head);
+			promote = gen12_csb_parse(execlists, buf + 2 * head);
 		else
-			csb_step = gen8_csb_parse(execlists, buf + 2 * head);
+			promote = gen8_csb_parse(execlists, buf + 2 * head);
+		if (promote) {
+			struct i915_request * const *old = execlists->active;
 
-		switch (csb_step) {
-		case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
-			trace_ports(execlists, "preempted", execlists->active);
-
-			while (*execlists->active)
-				execlists_schedule_out(*execlists->active++);
-
-			/* fallthrough */
-		case CSB_PROMOTE: /* switch pending to inflight */
-			GEM_BUG_ON(*execlists->active);
-			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
-			execlists->active =
-				memcpy(execlists->inflight,
-				       execlists->pending,
-				       execlists_num_ports(execlists) *
-				       sizeof(*execlists->pending));
-
-			if (enable_timeslice(execlists))
-				mod_timer(&execlists->timer, jiffies + 1);
+			/* Point active to the new ELSP; prevent overwriting */
+			WRITE_ONCE(execlists->active, execlists->pending);
 
 			if (!inject_preempt_hang(execlists))
 				ring_set_paused(engine, 0);
 
+			/* cancel old inflight, prepare for switch */
+			trace_ports(execlists, "preempted", old);
+			while (*old)
+				execlists_schedule_out(*old++);
+
+			/* switch pending to inflight */
+			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+			WRITE_ONCE(execlists->active,
+				   memcpy(execlists->inflight,
+					  execlists->pending,
+					  execlists_num_ports(execlists) *
+					  sizeof(*execlists->pending)));
+
 			WRITE_ONCE(execlists->pending[0], NULL);
-			break;
+		} else {
+			GEM_BUG_ON(!*execlists->active);
 
-		case CSB_COMPLETE: /* port0 completed, advanced to port1 */
+			/* port0 completed, advanced to port1 */
 			trace_ports(execlists, "completed", execlists->active);
 
 			/*
@@ -1590,14 +2321,11 @@ static void process_csb(struct intel_engine_cs *engine)
 
 			GEM_BUG_ON(execlists->active - execlists->inflight >
 				   execlists_num_ports(execlists));
-			break;
-
-		case CSB_NOP:
-			break;
 		}
 	} while (head != tail);
 
 	execlists->csb_head = head;
+	set_timeslice(engine);
 
 	/*
 	 * Gen11 has proven to fail wrt global observation point between
@@ -1623,6 +2351,42 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 	}
 }
 
+static noinline void preempt_reset(struct intel_engine_cs *engine)
+{
+	const unsigned int bit = I915_RESET_ENGINE + engine->id;
+	unsigned long *lock = &engine->gt->reset.flags;
+
+	if (i915_modparams.reset < 3)
+		return;
+
+	if (test_and_set_bit(bit, lock))
+		return;
+
+	/* Mark this tasklet as disabled to avoid waiting for it to complete */
+	tasklet_disable_nosync(&engine->execlists.tasklet);
+
+	ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
+		     READ_ONCE(engine->props.preempt_timeout_ms),
+		     jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
+	intel_engine_reset(engine, "preemption time out");
+
+	tasklet_enable(&engine->execlists.tasklet);
+	clear_and_wake_up_bit(bit, lock);
+}
+
+static bool preempt_timeout(const struct intel_engine_cs *const engine)
+{
+	const struct timer_list *t = &engine->execlists.preempt;
+
+	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+		return false;
+
+	if (!timer_expired(t))
+		return false;
+
+	return READ_ONCE(engine->execlists.pending[0]);
+}
+
 /*
  * Check the unread Context Status Buffers and manage the submission of new
  * contexts to the ELSP accordingly.
@@ -1630,23 +2394,39 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 static void execlists_submission_tasklet(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
-	unsigned long flags;
+	bool timeout = preempt_timeout(engine);
 
 	process_csb(engine);
-	if (!READ_ONCE(engine->execlists.pending[0])) {
+	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
+		unsigned long flags;
+
 		spin_lock_irqsave(&engine->active.lock, flags);
 		__execlists_submission_tasklet(engine);
 		spin_unlock_irqrestore(&engine->active.lock, flags);
+
+		/* Recheck after serialising with direct-submission */
+		if (timeout && preempt_timeout(engine))
+			preempt_reset(engine);
 	}
 }
 
-static void execlists_submission_timer(struct timer_list *timer)
+static void __execlists_kick(struct intel_engine_execlists *execlists)
 {
-	struct intel_engine_cs *engine =
-		from_timer(engine, timer, execlists.timer);
-
 	/* Kick the tasklet for some interrupt coalescing and reset handling */
-	tasklet_hi_schedule(&engine->execlists.tasklet);
+	tasklet_hi_schedule(&execlists->tasklet);
+}
+
+#define execlists_kick(t, member) \
+	__execlists_kick(container_of(t, struct intel_engine_execlists, member))
+
+static void execlists_timeslice(struct timer_list *timer)
+{
+	execlists_kick(timer, timer);
+}
+
+static void execlists_preempt(struct timer_list *timer)
+{
+	execlists_kick(timer, preempt);
 }
 
 static void queue_request(struct intel_engine_cs *engine,
@@ -1726,10 +2506,9 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine)
 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 		return;
 
-	vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
 	vaddr += engine->context_size;
 
-	memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
+	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
 }
 
 static void
@@ -1738,10 +2517,9 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 		return;
 
-	vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
 	vaddr += engine->context_size;
 
-	if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
+	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
 		dev_err_once(engine->i915->drm.dev,
 			     "%s context redzone overwritten!\n",
 			     engine->name);
@@ -1752,14 +2530,13 @@ static void execlists_context_unpin(struct intel_context *ce)
 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
 		      ce->engine);
 
-	i915_gem_context_unpin_hw_id(ce->gem_context);
 	i915_gem_object_unpin_map(ce->state->obj);
 	intel_ring_reset(ce->ring, ce->ring->tail);
 }
 
 static void
-__execlists_update_reg_state(struct intel_context *ce,
-			     struct intel_engine_cs *engine)
+__execlists_update_reg_state(const struct intel_context *ce,
+			     const struct intel_engine_cs *engine)
 {
 	struct intel_ring *ring = ce->ring;
 	u32 *regs = ce->lrc_reg_state;
@@ -1767,16 +2544,16 @@ __execlists_update_reg_state(struct intel_context *ce,
 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
 
-	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
-	regs[CTX_RING_HEAD + 1] = ring->head;
-	regs[CTX_RING_TAIL + 1] = ring->tail;
+	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+	regs[CTX_RING_HEAD] = ring->head;
+	regs[CTX_RING_TAIL] = ring->tail;
 
 	/* RPCS */
 	if (engine->class == RENDER_CLASS) {
-		regs[CTX_R_PWR_CLK_STATE + 1] =
+		regs[CTX_R_PWR_CLK_STATE] =
 			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
 
-		i915_oa_init_reg_state(engine, ce, regs);
+		i915_oa_init_reg_state(ce, engine);
 	}
 }
 
@@ -1785,39 +2562,21 @@ __execlists_context_pin(struct intel_context *ce,
 			struct intel_engine_cs *engine)
 {
 	void *vaddr;
-	int ret;
 
 	GEM_BUG_ON(!ce->state);
-
-	ret = intel_context_active_acquire(ce);
-	if (ret)
-		goto err;
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
 	vaddr = i915_gem_object_pin_map(ce->state->obj,
 					i915_coherent_map_type(engine->i915) |
 					I915_MAP_OVERRIDE);
-	if (IS_ERR(vaddr)) {
-		ret = PTR_ERR(vaddr);
-		goto unpin_active;
-	}
-
-	ret = i915_gem_context_pin_hw_id(ce->gem_context);
-	if (ret)
-		goto unpin_map;
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
 
-	ce->lrc_desc = lrc_descriptor(ce, engine);
+	ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	__execlists_update_reg_state(ce, engine);
 
 	return 0;
-
-unpin_map:
-	i915_gem_object_unpin_map(ce->state->obj);
-unpin_active:
-	intel_context_active_release(ce);
-err:
-	return ret;
 }
 
 static int execlists_context_pin(struct intel_context *ce)
@@ -1832,6 +2591,9 @@ static int execlists_context_alloc(struct intel_context *ce)
 
 static void execlists_context_reset(struct intel_context *ce)
 {
+	CE_TRACE(ce, "reset\n");
+	GEM_BUG_ON(!intel_context_is_pinned(ce));
+
 	/*
 	 * Because we emit WA_TAIL_DWORDS there may be a disparity
 	 * between our bookkeeping in ce->ring->head and ce->ring->tail and
@@ -1848,8 +2610,14 @@ static void execlists_context_reset(struct intel_context *ce)
 	 * So to avoid that we reset the context images upon resume. For
 	 * simplicity, we just zero everything out.
 	 */
-	intel_ring_reset(ce->ring, 0);
+	intel_ring_reset(ce->ring, ce->ring->emit);
+
+	/* Scrub away the garbage */
+	execlists_init_reg_state(ce->lrc_reg_state,
+				 ce, ce->engine, ce->ring, true);
 	__execlists_update_reg_state(ce, ce->engine);
+
+	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 }
 
 static const struct intel_context_ops execlists_context_ops = {
@@ -1869,7 +2637,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 {
 	u32 *cs;
 
-	GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
+	GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
 
 	cs = intel_ring_begin(rq, 6);
 	if (IS_ERR(cs))
@@ -1885,7 +2653,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 	*cs++ = MI_NOOP;
 
 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
-	*cs++ = rq->timeline->hwsp_offset;
+	*cs++ = i915_request_timeline(rq)->hwsp_offset;
 	*cs++ = 0;
 	*cs++ = rq->fence.seqno - 1;
 
@@ -1897,65 +2665,11 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 	return 0;
 }
 
-static int emit_pdps(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm);
-	int err, i;
-	u32 *cs;
-
-	GEM_BUG_ON(intel_vgpu_active(rq->i915));
-
-	/*
-	 * Beware ye of the dragons, this sequence is magic!
-	 *
-	 * Small changes to this sequence can cause anything from
-	 * GPU hangs to forcewake errors and machine lockups!
-	 */
-
-	/* Flush any residual operations from the context load */
-	err = engine->emit_flush(rq, EMIT_FLUSH);
-	if (err)
-		return err;
-
-	/* Magic required to prevent forcewake errors! */
-	err = engine->emit_flush(rq, EMIT_INVALIDATE);
-	if (err)
-		return err;
-
-	cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/* Ensure the LRI have landed before we invalidate & continue */
-	*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
-	for (i = GEN8_3LVL_PDPES; i--; ) {
-		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
-		u32 base = engine->mmio_base;
-
-		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
-		*cs++ = upper_32_bits(pd_daddr);
-		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
-		*cs++ = lower_32_bits(pd_daddr);
-	}
-	*cs++ = MI_NOOP;
-
-	intel_ring_advance(rq, cs);
-
-	/* Be doubly sure the LRI have landed before proceeding */
-	err = engine->emit_flush(rq, EMIT_FLUSH);
-	if (err)
-		return err;
-
-	/* Re-invalidate the TLB for luck */
-	return engine->emit_flush(rq, EMIT_INVALIDATE);
-}
-
 static int execlists_request_alloc(struct i915_request *request)
 {
 	int ret;
 
-	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+	GEM_BUG_ON(!intel_context_is_pinned(request->context));
 
 	/*
 	 * Flush enough space to reduce the likelihood of waiting after
@@ -1973,10 +2687,7 @@ static int execlists_request_alloc(struct i915_request *request)
 	 */
 
 	/* Unconditionally invalidate GPU caches and TLBs. */
-	if (i915_vm_is_4lvl(request->hw_context->vm))
-		ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
-	else
-		ret = emit_pdps(request);
+	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
 	if (ret)
 		return ret;
 
@@ -2028,12 +2739,6 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 	return batch;
 }
 
-static u32 slm_offset(struct intel_engine_cs *engine)
-{
-	return intel_gt_scratch_offset(engine->gt,
-				       INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA);
-}
-
 /*
  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
  * initialized at the beginning and shared across all contexts but this field
@@ -2062,10 +2767,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	/* Actual scratch location is at 128 bytes offset */
 	batch = gen8_emit_pipe_control(batch,
 				       PIPE_CONTROL_FLUSH_L3 |
-				       PIPE_CONTROL_GLOBAL_GTT_IVB |
+				       PIPE_CONTROL_STORE_DATA_INDEX |
 				       PIPE_CONTROL_CS_STALL |
 				       PIPE_CONTROL_QW_WRITE,
-				       slm_offset(engine));
+				       LRC_PPHWSP_SCRATCH_ADDR);
 
 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 
@@ -2131,6 +2836,14 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
+	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
+	batch = gen8_emit_pipe_control(batch,
+				       PIPE_CONTROL_FLUSH_L3 |
+				       PIPE_CONTROL_STORE_DATA_INDEX |
+				       PIPE_CONTROL_CS_STALL |
+				       PIPE_CONTROL_QW_WRITE,
+				       LRC_PPHWSP_SCRATCH_ADDR);
+
 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
 
 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
@@ -2326,6 +3039,8 @@ static void enable_execlists(struct intel_engine_cs *engine)
 			RING_HWS_PGA,
 			i915_ggtt_offset(engine->status_page.vma));
 	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
+
+	engine->context_tag = 0;
 }
 
 static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -2365,8 +3080,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	unsigned long flags;
 
-	GEM_TRACE("%s: depth<-%d\n", engine->name,
-		  atomic_read(&execlists->tasklet.count));
+	ENGINE_TRACE(engine, "depth<-%d\n",
+		     atomic_read(&execlists->tasklet.count));
 
 	/*
 	 * Prevent request submission to the hardware until we have
@@ -2419,31 +3134,35 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 	WRITE_ONCE(*execlists->csb_write, reset_value);
 	wmb(); /* Make sure this is visible to HW (paranoia?) */
 
+	/*
+	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
+	 * Bludgeon them with a mmio update to be sure.
+	 */
+	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+		     reset_value << 8 | reset_value);
+	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
 	invalidate_csb_entries(&execlists->csb_status[0],
 			       &execlists->csb_status[reset_value]);
 }
 
-static struct i915_request *active_request(struct i915_request *rq)
+static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
 {
-	const struct intel_context * const ce = rq->hw_context;
-	struct i915_request *active = NULL;
-	struct list_head *list;
-
-	if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
-		return rq;
-
-	list = &rq->timeline->requests;
-	list_for_each_entry_from_reverse(rq, list, link) {
-		if (i915_request_completed(rq))
-			break;
+	int x;
 
-		if (rq->hw_context != ce)
-			break;
-
-		active = rq;
+	x = lrc_ring_mi_mode(engine);
+	if (x != -1) {
+		regs[x + 1] &= ~STOP_RING;
+		regs[x + 1] |= STOP_RING << 16;
 	}
+}
 
-	return active;
+static void __execlists_reset_reg_state(const struct intel_context *ce,
+					const struct intel_engine_cs *engine)
+{
+	u32 *regs = ce->lrc_reg_state;
+
+	__reset_stop_ring(regs, engine);
 }
 
 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
@@ -2451,7 +3170,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct intel_context *ce;
 	struct i915_request *rq;
-	u32 *regs;
+
+	mb(); /* paranoia: read the CSB pointers from after the reset */
+	clflush(execlists->csb_write);
+	mb();
 
 	process_csb(engine); /* drain preemption events */
 
@@ -2467,16 +3189,23 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	if (!rq)
 		goto unwind;
 
-	ce = rq->hw_context;
-	GEM_BUG_ON(i915_active_is_idle(&ce->active));
+	/* We still have requests in-flight; the engine should be active */
+	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
+	ce = rq->context;
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
-	rq = active_request(rq);
-	if (!rq) {
-		ce->ring->head = ce->ring->tail;
+
+	if (i915_request_completed(rq)) {
+		/* Idle context; tidy up the ring so we can restart afresh */
+		ce->ring->head = intel_ring_wrap(ce->ring, rq->tail);
 		goto out_replay;
 	}
 
+	/* Context has requests still in-flight; it should not be idle! */
+	GEM_BUG_ON(i915_active_is_idle(&ce->active));
+	rq = active_request(ce->timeline, rq);
 	ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
+	GEM_BUG_ON(ce->ring->head == ce->ring->tail);
 
 	/*
 	 * If this request hasn't started yet, e.g. it is waiting on a
@@ -2516,19 +3245,16 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	regs = ce->lrc_reg_state;
-	if (engine->pinned_default_state) {
-		memcpy(regs, /* skip restoring the vanilla PPHWSP */
-		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
-		       engine->context_size - PAGE_SIZE);
-	}
-	execlists_init_reg_state(regs, ce, engine, ce->ring);
+	GEM_BUG_ON(!intel_context_is_pinned(ce));
+	restore_default_state(ce, engine);
 
 out_replay:
-	GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
-		  engine->name, ce->ring->head, ce->ring->tail);
+	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
+		     ce->ring->head, ce->ring->tail);
 	intel_ring_update_space(ce->ring);
+	__execlists_reset_reg_state(ce, engine);
 	__execlists_update_reg_state(ce, engine);
+	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
 
 unwind:
 	/* Push back any incomplete requests for replay after the reset. */
@@ -2536,11 +3262,11 @@ unwind:
 	__unwind_incomplete_requests(engine);
 }
 
-static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
+static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
 	unsigned long flags;
 
-	GEM_TRACE("%s\n", engine->name);
+	ENGINE_TRACE(engine, "\n");
 
 	spin_lock_irqsave(&engine->active.lock, flags);
 
@@ -2554,14 +3280,14 @@ static void nop_submission_tasklet(unsigned long data)
 	/* The driver is wedged; don't process any more events. */
 }
 
-static void execlists_cancel_requests(struct intel_engine_cs *engine)
+static void execlists_reset_cancel(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_request *rq, *rn;
 	struct rb_node *rb;
 	unsigned long flags;
 
-	GEM_TRACE("%s\n", engine->name);
+	ENGINE_TRACE(engine, "\n");
 
 	/*
 	 * Before we call engine->cancel_requests(), we should have exclusive
@@ -2648,13 +3374,13 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
 	if (__tasklet_enable(&execlists->tasklet))
 		/* And kick in case we missed a new request submission. */
 		tasklet_hi_schedule(&execlists->tasklet);
-	GEM_TRACE("%s: depth->%d\n", engine->name,
-		  atomic_read(&execlists->tasklet.count));
+	ENGINE_TRACE(engine, "depth->%d\n",
+		     atomic_read(&execlists->tasklet.count));
 }
 
-static int gen8_emit_bb_start(struct i915_request *rq,
-			      u64 offset, u32 len,
-			      const unsigned int flags)
+static int gen8_emit_bb_start_noarb(struct i915_request *rq,
+				    u64 offset, u32 len,
+				    const unsigned int flags)
 {
 	u32 *cs;
 
@@ -2688,7 +3414,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
 	return 0;
 }
 
-static int gen9_emit_bb_start(struct i915_request *rq,
+static int gen8_emit_bb_start(struct i915_request *rq,
 			      u64 offset, u32 len,
 			      const unsigned int flags)
 {
@@ -2749,7 +3475,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
 	}
 
 	*cs++ = cmd;
-	*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
 	*cs++ = 0; /* upper addr */
 	*cs++ = 0; /* value */
 	intel_ring_advance(request, cs);
@@ -2760,10 +3486,6 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
 static int gen8_emit_flush_render(struct i915_request *request,
 				  u32 mode)
 {
-	struct intel_engine_cs *engine = request->engine;
-	u32 scratch_addr =
-		intel_gt_scratch_offset(engine->gt,
-					INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
 	bool vf_flush_wa = false, dc_flush_wa = false;
 	u32 *cs, flags = 0;
 	int len;
@@ -2785,7 +3507,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
 
 		/*
 		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
@@ -2818,7 +3540,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
 					    0);
 
-	cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
 
 	if (dc_flush_wa)
 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
@@ -2831,11 +3553,6 @@ static int gen8_emit_flush_render(struct i915_request *request,
 static int gen11_emit_flush_render(struct i915_request *request,
 				   u32 mode)
 {
-	struct intel_engine_cs *engine = request->engine;
-	const u32 scratch_addr =
-		intel_gt_scratch_offset(engine->gt,
-					INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
-
 	if (mode & EMIT_FLUSH) {
 		u32 *cs;
 		u32 flags = 0;
@@ -2848,13 +3565,13 @@ static int gen11_emit_flush_render(struct i915_request *request,
 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
 		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
 
 		cs = intel_ring_begin(request, 6);
 		if (IS_ERR(cs))
 			return PTR_ERR(cs);
 
-		cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
 		intel_ring_advance(request, cs);
 	}
 
@@ -2872,19 +3589,111 @@ static int gen11_emit_flush_render(struct i915_request *request,
 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
 
 		cs = intel_ring_begin(request, 6);
 		if (IS_ERR(cs))
 			return PTR_ERR(cs);
 
-		cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
 		intel_ring_advance(request, cs);
 	}
 
 	return 0;
 }
 
+static u32 preparser_disable(bool state)
+{
+	return MI_ARB_CHECK | 1 << 8 | state;
+}
+
+static int gen12_emit_flush_render(struct i915_request *request,
+				   u32 mode)
+{
+	if (mode & EMIT_FLUSH) {
+		u32 flags = 0;
+		u32 *cs;
+
+		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		/* Wa_1409600907:tgl */
+		flags |= PIPE_CONTROL_DEPTH_STALL;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
+
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+		flags |= PIPE_CONTROL_QW_WRITE;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		cs = intel_ring_begin(request, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+		intel_ring_advance(request, cs);
+	}
+
+	if (mode & EMIT_INVALIDATE) {
+		u32 flags = 0;
+		u32 *cs;
+
+		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
+
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+		flags |= PIPE_CONTROL_QW_WRITE;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		cs = intel_ring_begin(request, 8);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		/*
+		 * Prevent the pre-parser from skipping past the TLB
+		 * invalidate and loading a stale page for the batch
+		 * buffer / request payload.
+		 */
+		*cs++ = preparser_disable(true);
+
+		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+
+		*cs++ = preparser_disable(false);
+		intel_ring_advance(request, cs);
+
+		/*
+		 * Wa_1604544889:tgl
+		 */
+		if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
+			flags = 0;
+			flags |= PIPE_CONTROL_CS_STALL;
+			flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
+
+			flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+			flags |= PIPE_CONTROL_QW_WRITE;
+
+			cs = intel_ring_begin(request, 6);
+			if (IS_ERR(cs))
+				return PTR_ERR(cs);
+
+			cs = gen8_emit_pipe_control(cs, flags,
+						    LRC_PPHWSP_SCRATCH_ADDR);
+			intel_ring_advance(request, cs);
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Reserve space for 2 NOOPs at the end of each request to be
  * used as a workaround for not being allowed to do lite
@@ -2933,7 +3742,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write(cs,
 				  request->fence.seqno,
-				  request->timeline->hwsp_offset,
+				  i915_request_active_timeline(request)->hwsp_offset,
 				  0);
 
 	return gen8_emit_fini_breadcrumb_footer(request, cs);
@@ -2941,28 +3750,28 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 
 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
-	cs = gen8_emit_ggtt_write_rcs(cs,
-				      request->fence.seqno,
-				      request->timeline->hwsp_offset,
-				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
-				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-				      PIPE_CONTROL_DC_FLUSH_ENABLE);
-
-	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
 	cs = gen8_emit_pipe_control(cs,
-				    PIPE_CONTROL_FLUSH_ENABLE |
-				    PIPE_CONTROL_CS_STALL,
+				    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+				    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+				    PIPE_CONTROL_DC_FLUSH_ENABLE,
 				    0);
 
+	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
+	cs = gen8_emit_ggtt_write_rcs(cs,
+				      request->fence.seqno,
+				      i915_request_active_timeline(request)->hwsp_offset,
+				      PIPE_CONTROL_FLUSH_ENABLE |
+				      PIPE_CONTROL_CS_STALL);
+
 	return gen8_emit_fini_breadcrumb_footer(request, cs);
 }
 
-static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
-					   u32 *cs)
+static u32 *
+gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write_rcs(cs,
 				      request->fence.seqno,
-				      request->timeline->hwsp_offset,
+				      i915_request_active_timeline(request)->hwsp_offset,
 				      PIPE_CONTROL_CS_STALL |
 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -2973,20 +3782,99 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
 	return gen8_emit_fini_breadcrumb_footer(request, cs);
 }
 
+/*
+ * Note that the CS instruction pre-parser will not stall on the breadcrumb
+ * flush and will continue pre-fetching the instructions after it before the
+ * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
+ * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
+ * of the next request before the memory has been flushed, we're guaranteed that
+ * we won't access the batch itself too early.
+ * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
+ * so, if the current request is modifying an instruction in the next request on
+ * the same intel_context, we might pre-fetch and then execute the pre-update
+ * instruction. To avoid this, the users of self-modifying code should either
+ * disable the parser around the code emitting the memory writes, via a new flag
+ * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
+ * the in-kernel use-cases we've opted to use a separate context, see
+ * reloc_gpu() as an example.
+ * All the above applies only to the instructions themselves. Non-inline data
+ * used by the instructions is not pre-fetched.
+ */
+
+static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_EQ_SDD;
+	*cs++ = 0;
+	*cs++ = intel_hws_preempt_address(request->engine);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = MI_NOOP;
+
+	return cs;
+}
+
+static __always_inline u32*
+gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+{
+	*cs++ = MI_USER_INTERRUPT;
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	if (intel_engine_has_semaphores(request->engine))
+		cs = gen12_emit_preempt_busywait(request, cs);
+
+	request->tail = intel_ring_offset(request, cs);
+	assert_ring_tail_valid(request->ring, request->tail);
+
+	return gen8_emit_wa_tail(request, cs);
+}
+
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+	cs = gen8_emit_ggtt_write(cs,
+				  request->fence.seqno,
+				  i915_request_active_timeline(request)->hwsp_offset,
+				  0);
+
+	return gen12_emit_fini_breadcrumb_footer(request, cs);
+}
+
+static u32 *
+gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+{
+	cs = gen8_emit_ggtt_write_rcs(cs,
+				      request->fence.seqno,
+				      i915_request_active_timeline(request)->hwsp_offset,
+				      PIPE_CONTROL_CS_STALL |
+				      PIPE_CONTROL_TILE_CACHE_FLUSH |
+				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+				      /* Wa_1409600907:tgl */
+				      PIPE_CONTROL_DEPTH_STALL |
+				      PIPE_CONTROL_DC_FLUSH_ENABLE |
+				      PIPE_CONTROL_FLUSH_ENABLE |
+				      PIPE_CONTROL_HDC_PIPELINE_FLUSH);
+
+	return gen12_emit_fini_breadcrumb_footer(request, cs);
+}
+
 static void execlists_park(struct intel_engine_cs *engine)
 {
-	del_timer(&engine->execlists.timer);
+	cancel_timer(&engine->execlists.timer);
+	cancel_timer(&engine->execlists.preempt);
 }
 
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
-	engine->cancel_requests = execlists_cancel_requests;
 	engine->schedule = i915_schedule;
 	engine->execlists.tasklet.func = execlists_submission_tasklet;
 
 	engine->reset.prepare = execlists_reset_prepare;
-	engine->reset.reset = execlists_reset;
+	engine->reset.rewind = execlists_reset_rewind;
+	engine->reset.cancel = execlists_reset_cancel;
 	engine->reset.finish = execlists_reset_finish;
 
 	engine->park = execlists_park;
@@ -2998,13 +3886,30 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 	}
+
+	if (INTEL_GEN(engine->i915) >= 12)
+		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
+
+	if (intel_engine_has_preemption(engine))
+		engine->emit_bb_start = gen8_emit_bb_start;
+	else
+		engine->emit_bb_start = gen8_emit_bb_start_noarb;
 }
 
-static void execlists_destroy(struct intel_engine_cs *engine)
+static void execlists_shutdown(struct intel_engine_cs *engine)
 {
+	/* Synchronise with residual timers and any softirq they raise */
+	del_timer_sync(&engine->execlists.timer);
+	del_timer_sync(&engine->execlists.preempt);
+	tasklet_kill(&engine->execlists.tasklet);
+}
+
+static void execlists_release(struct intel_engine_cs *engine)
+{
+	execlists_shutdown(engine);
+
 	intel_engine_cleanup_common(engine);
 	lrc_destroy_wa_ctx(engine);
-	kfree(engine);
 }
 
 static void
@@ -3012,19 +3917,16 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
 
-	engine->destroy = execlists_destroy;
 	engine->resume = execlists_resume;
 
-	engine->reset.prepare = execlists_reset_prepare;
-	engine->reset.reset = execlists_reset;
-	engine->reset.finish = execlists_reset_finish;
-
 	engine->cops = &execlists_context_ops;
 	engine->request_alloc = execlists_request_alloc;
 
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
+	if (INTEL_GEN(engine->i915) >= 12)
+		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
 
 	engine->set_default_submission = intel_execlists_set_default_submission;
 
@@ -3039,10 +3941,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 		 * until a more refined solution exists.
 		 */
 	}
-	if (IS_GEN(engine->i915, 8))
-		engine->emit_bb_start = gen8_emit_bb_start;
-	else
-		engine->emit_bb_start = gen9_emit_bb_start;
 }
 
 static inline void
@@ -3070,6 +3968,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
 {
 	switch (INTEL_GEN(engine->i915)) {
 	case 12:
+		engine->emit_flush = gen12_emit_flush_render;
+		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
+		break;
 	case 11:
 		engine->emit_flush = gen11_emit_flush_render;
 		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
@@ -3083,9 +3984,15 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
 
 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 {
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_uncore *uncore = engine->uncore;
+	u32 base = engine->mmio_base;
+
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
-	timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
+	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
+	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
 
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
@@ -3093,21 +4000,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 	if (engine->class == RENDER_CLASS)
 		rcs_submission_override(engine);
 
-	return 0;
-}
-
-int intel_execlists_submission_init(struct intel_engine_cs *engine)
-{
-	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct drm_i915_private *i915 = engine->i915;
-	struct intel_uncore *uncore = engine->uncore;
-	u32 base = engine->mmio_base;
-	int ret;
-
-	ret = intel_engine_init_common(engine);
-	if (ret)
-		return ret;
-
 	if (intel_init_workaround_bb(engine))
 		/*
 		 * We continue even if we fail to initialize WA batch
@@ -3139,10 +4031,13 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
 
 	reset_csb_pointers(engine);
 
+	/* Finally, take ownership and responsibility for cleanup! */
+	engine->release = execlists_release;
+
 	return 0;
 }
 
-static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
+static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
 {
 	u32 indirect_ctx_offset;
 
@@ -3175,86 +4070,53 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
 	return indirect_ctx_offset;
 }
 
-static void execlists_init_reg_state(u32 *regs,
-				     struct intel_context *ce,
-				     struct intel_engine_cs *engine,
-				     struct intel_ring *ring)
-{
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
-	bool rcs = engine->class == RENDER_CLASS;
-	u32 base = engine->mmio_base;
 
-	/*
-	 * A context is actually a big batch buffer with several
-	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
-	 * values we are setting here are only for the first context restore:
-	 * on a subsequent save, the GPU will recreate this batchbuffer with new
-	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
-	 * we are not initializing here).
-	 *
-	 * Must keep consistent with virtual_update_register_offsets().
-	 */
-	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
-				 MI_LRI_FORCE_POSTED;
+static void init_common_reg_state(u32 * const regs,
+				  const struct intel_engine_cs *engine,
+				  const struct intel_ring *ring,
+				  bool inhibit)
+{
+	u32 ctl;
+
+	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+	if (inhibit)
+		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+	if (INTEL_GEN(engine->i915) < 11)
+		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
+					   CTX_CTRL_RS_CTX_ENABLE);
+	regs[CTX_CONTEXT_CONTROL] = ctl;
+
+	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+}
 
-	CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
-		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
-		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
-	if (INTEL_GEN(engine->i915) < 11) {
-		regs[CTX_CONTEXT_CONTROL + 1] |=
-			_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
-					    CTX_CTRL_RS_CTX_ENABLE);
-	}
-	CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
-	CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
-	CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
-	CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
-		RING_CTL_SIZE(ring->size) | RING_VALID);
-	CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
-	CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
-	CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
-	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
-	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
-	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
-	if (rcs) {
-		struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-
-		CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
-		CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
-			RING_INDIRECT_CTX_OFFSET(base), 0);
-		if (wa_ctx->indirect_ctx.size) {
-			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
-			regs[CTX_RCS_INDIRECT_CTX + 1] =
-				(ggtt_offset + wa_ctx->indirect_ctx.offset) |
-				(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
-
-			regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
-				intel_lr_indirect_ctx_offset(engine) << 6;
-		}
+static void init_wa_bb_reg_state(u32 * const regs,
+				 const struct intel_engine_cs *engine,
+				 u32 pos_bb_per_ctx)
+{
+	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
 
-		CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
-		if (wa_ctx->per_ctx.size) {
-			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+	if (wa_ctx->per_ctx.size) {
+		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 
-			regs[CTX_BB_PER_CTX_PTR + 1] =
-				(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
-		}
+		regs[pos_bb_per_ctx] =
+			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
 	}
 
-	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+	if (wa_ctx->indirect_ctx.size) {
+		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+
+		regs[pos_bb_per_ctx + 2] =
+			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
+			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
 
-	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
-	/* PDP values well be assigned later if needed */
-	CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
-	CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
-	CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
-	CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
-	CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
-	CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
-	CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
-	CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
+		regs[pos_bb_per_ctx + 4] =
+			intel_lr_indirect_ctx_offset(engine) << 6;
+	}
+}
 
+static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
+{
 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
 		/* 64b PPGTT (48bit canonical)
 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
@@ -3267,15 +4129,43 @@ static void execlists_init_reg_state(u32 *regs,
 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
 	}
+}
 
-	if (rcs) {
-		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
-		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
-	}
+static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
+{
+	if (i915_is_ggtt(vm))
+		return i915_vm_to_ggtt(vm)->alias;
+	else
+		return i915_vm_to_ppgtt(vm);
+}
 
-	regs[CTX_END] = MI_BATCH_BUFFER_END;
-	if (INTEL_GEN(engine->i915) >= 10)
-		regs[CTX_END] |= BIT(0);
+static void execlists_init_reg_state(u32 *regs,
+				     const struct intel_context *ce,
+				     const struct intel_engine_cs *engine,
+				     const struct intel_ring *ring,
+				     bool inhibit)
+{
+	/*
+	 * A context is actually a big batch buffer with several
+	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
+	 * values we are setting here are only for the first context restore:
+	 * on a subsequent save, the GPU will recreate this batchbuffer with new
+	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
+	 * we are not initializing here).
+	 *
+	 * Must keep consistent with virtual_update_register_offsets().
+	 */
+	set_offsets(regs, reg_offsets(engine), engine, inhibit);
+
+	init_common_reg_state(regs, engine, ring, inhibit);
+	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
+
+	init_wa_bb_reg_state(regs, engine,
+			     INTEL_GEN(engine->i915) >= 12 ?
+			     GEN12_CTX_BB_PER_CTX_PTR :
+			     CTX_BB_PER_CTX_PTR);
+
+	__reset_stop_ring(regs, engine);
 }
 
 static int
@@ -3284,8 +4174,8 @@ populate_lr_context(struct intel_context *ce,
 		    struct intel_engine_cs *engine,
 		    struct intel_ring *ring)
 {
+	bool inhibit = true;
 	void *vaddr;
-	u32 *regs;
 	int ret;
 
 	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
@@ -3298,12 +4188,6 @@ populate_lr_context(struct intel_context *ce,
 	set_redzone(vaddr, engine);
 
 	if (engine->default_state) {
-		/*
-		 * We only want to copy over the template context state;
-		 * skipping over the headers reserved for GuC communication,
-		 * leaving those as zero.
-		 */
-		const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE;
 		void *defaults;
 
 		defaults = i915_gem_object_pin_map(engine->default_state,
@@ -3313,23 +4197,20 @@ populate_lr_context(struct intel_context *ce,
 			goto err_unpin_ctx;
 		}
 
-		memcpy(vaddr + start, defaults + start, engine->context_size);
+		memcpy(vaddr, defaults, engine->context_size);
 		i915_gem_object_unpin_map(engine->default_state);
+		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
+		inhibit = false;
 	}
 
 	/* The second page of the context object contains some fields which must
 	 * be set up prior to the first execution. */
-	regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
-	execlists_init_reg_state(regs, ce, engine, ring);
-	if (!engine->default_state)
-		regs[CTX_CONTEXT_CONTROL + 1] |=
-			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+				 ce, engine, ring, inhibit);
 
 	ret = 0;
 err_unpin_ctx:
-	__i915_gem_object_flush_map(ctx_obj,
-				    LRC_HEADER_PAGES * PAGE_SIZE,
-				    engine->context_size);
+	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
 	i915_gem_object_unpin_map(ctx_obj);
 	return ret;
 }
@@ -3346,11 +4227,6 @@ static int __execlists_context_alloc(struct intel_context *ce,
 	GEM_BUG_ON(ce->state);
 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
 
-	/*
-	 * Before the actual start of the context image, we insert a few pages
-	 * for our own use and for sharing with the GuC.
-	 */
-	context_size += LRC_HEADER_PAGES * PAGE_SIZE;
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
 
@@ -3418,17 +4294,18 @@ static void virtual_context_destroy(struct kref *kref)
 	for (n = 0; n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
 		struct rb_node *node = &ve->nodes[sibling->id].rb;
+		unsigned long flags;
 
 		if (RB_EMPTY_NODE(node))
 			continue;
 
-		spin_lock_irq(&sibling->active.lock);
+		spin_lock_irqsave(&sibling->active.lock, flags);
 
 		/* Detachment is lazily performed in the execlists tasklet */
 		if (!RB_EMPTY_NODE(node))
 			rb_erase_cached(node, &sibling->execlists.virtual);
 
-		spin_unlock_irq(&sibling->active.lock);
+		spin_unlock_irqrestore(&sibling->active.lock, flags);
 	}
 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
 
@@ -3462,8 +4339,16 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
 		return;
 
 	swap(ve->siblings[swp], ve->siblings[0]);
-	virtual_update_register_offsets(ve->context.lrc_reg_state,
-					ve->siblings[0]);
+	if (!intel_engine_has_relative_mmio(ve->siblings[0]))
+		virtual_update_register_offsets(ve->context.lrc_reg_state,
+						ve->siblings[0]);
+}
+
+static int virtual_context_alloc(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+
+	return __execlists_context_alloc(ce, ve->siblings[0]);
 }
 
 static int virtual_context_pin(struct intel_context *ce)
@@ -3503,6 +4388,8 @@ static void virtual_context_exit(struct intel_context *ce)
 }
 
 static const struct intel_context_ops virtual_context_ops = {
+	.alloc = virtual_context_alloc,
+
 	.pin = virtual_context_pin,
 	.unpin = execlists_context_unpin,
 
@@ -3529,10 +4416,9 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
 		mask = ve->siblings[0]->mask;
 	}
 
-	GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
-		  ve->base.name,
-		  rq->fence.context, rq->fence.seqno,
-		  mask, ve->base.execlists.queue_priority_hint);
+	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
+		     rq->fence.context, rq->fence.seqno,
+		     mask, ve->base.execlists.queue_priority_hint);
 
 	return mask;
 }
@@ -3623,10 +4509,9 @@ static void virtual_submit_request(struct i915_request *rq)
 	struct i915_request *old;
 	unsigned long flags;
 
-	GEM_TRACE("%s: rq=%llx:%lld\n",
-		  ve->base.name,
-		  rq->fence.context,
-		  rq->fence.seqno);
+	ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
+		     rq->fence.context,
+		     rq->fence.seqno);
 
 	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
 
@@ -3694,8 +4579,7 @@ virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
 }
 
 struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
-			       struct intel_engine_cs **siblings,
+intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 			       unsigned int count)
 {
 	struct virtual_engine *ve;
@@ -3706,18 +4590,21 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 		return ERR_PTR(-EINVAL);
 
 	if (count == 1)
-		return intel_context_create(ctx, siblings[0]);
+		return intel_context_create(siblings[0]);
 
 	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
 	if (!ve)
 		return ERR_PTR(-ENOMEM);
 
-	ve->base.i915 = ctx->i915;
+	ve->base.i915 = siblings[0]->i915;
 	ve->base.gt = siblings[0]->gt;
+	ve->base.uncore = siblings[0]->uncore;
 	ve->base.id = -1;
+
 	ve->base.class = OTHER_CLASS;
 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
 
 	/*
 	 * The decision on whether to submit a request using semaphores
@@ -3737,7 +4624,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
 
 	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
-
+	intel_engine_init_breadcrumbs(&ve->base);
 	intel_engine_init_execlists(&ve->base);
 
 	ve->base.cops = &virtual_context_ops;
@@ -3753,7 +4640,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 		     virtual_submission_tasklet,
 		     (unsigned long)ve);
 
-	intel_context_init(&ve->context, ctx, &ve->base);
+	intel_context_init(&ve->context, &ve->base);
 
 	for (n = 0; n < count; n++) {
 		struct intel_engine_cs *sibling = siblings[n];
@@ -3820,12 +4707,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 
 	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
 
-	err = __execlists_context_alloc(&ve->context, siblings[0]);
-	if (err)
-		goto err_put;
-
-	__set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
-
 	return &ve->context;
 
 err_put:
@@ -3834,14 +4715,12 @@ err_put:
 }
 
 struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
-			      struct intel_engine_cs *src)
+intel_execlists_clone_virtual(struct intel_engine_cs *src)
 {
 	struct virtual_engine *se = to_virtual_engine(src);
 	struct intel_context *dst;
 
-	dst = intel_execlists_create_virtual(ctx,
-					     se->siblings,
+	dst = intel_execlists_create_virtual(se->siblings,
 					     se->num_siblings);
 	if (IS_ERR(dst))
 		return dst;
@@ -3899,6 +4778,18 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
 	return 0;
 }
 
+struct intel_engine_cs *
+intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
+				 unsigned int sibling)
+{
+	struct virtual_engine *ve = to_virtual_engine(engine);
+
+	if (sibling >= ve->num_siblings)
+		return NULL;
+
+	return ve->siblings[sibling];
+}
+
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
 				   struct drm_printer *m,
 				   void (*show_request)(struct drm_printer *m,
@@ -3987,6 +4878,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
 			    u32 head,
 			    bool scrub)
 {
+	GEM_BUG_ON(!intel_context_is_pinned(ce));
+
 	/*
 	 * We want a simple context + ring to execute the breadcrumb update.
 	 * We cannot rely on the context being intact across the GPU hang,
@@ -3995,16 +4888,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	if (scrub) {
-		u32 *regs = ce->lrc_reg_state;
-
-		if (engine->pinned_default_state) {
-			memcpy(regs, /* skip restoring the vanilla PPHWSP */
-			       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
-			       engine->context_size - PAGE_SIZE);
-		}
-		execlists_init_reg_state(regs, ce, engine, ce->ring);
-	}
+	if (scrub)
+		restore_default_state(ce, engine);
 
 	/* Rerun the request; its payload has been neutered (if guilty). */
 	ce->ring->head = head;
@@ -4013,6 +4898,13 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
 	__execlists_update_reg_state(ce, engine);
 }
 
+bool
+intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
+{
+	return engine->set_default_submission ==
+	       intel_execlists_set_default_submission;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_lrc.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index c2bba82bcc16..dfbc214e14f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -43,6 +43,7 @@ struct intel_engine_cs;
 #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	(1 << 0)
 #define   CTX_CTRL_RS_CTX_ENABLE		(1 << 1)
 #define	  CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT	(1 << 2)
+#define	  GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE	(1 << 8)
 #define RING_CONTEXT_STATUS_PTR(base)		_MMIO((base) + 0x3a0)
 #define RING_EXECLIST_SQ_CONTENTS(base)		_MMIO((base) + 0x510)
 #define RING_EXECLIST_CONTROL(base)		_MMIO((base) + 0x550)
@@ -66,6 +67,12 @@ struct intel_engine_cs;
 #define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8)
 #define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0)
 
+#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
+#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
+#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
+/* in Gen12 ID 0x7FF is reserved to indicate idle */
+#define GEN12_MAX_CONTEXT_HW_ID	(GEN11_MAX_CONTEXT_HW_ID - 1)
+
 enum {
 	INTEL_CONTEXT_SCHEDULE_IN = 0,
 	INTEL_CONTEXT_SCHEDULE_OUT,
@@ -76,33 +83,17 @@ enum {
 void intel_logical_ring_cleanup(struct intel_engine_cs *engine);
 
 int intel_execlists_submission_setup(struct intel_engine_cs *engine);
-int intel_execlists_submission_init(struct intel_engine_cs *engine);
 
 /* Logical Ring Contexts */
-
-/*
- * We allocate a header at the start of the context image for our own
- * use, therefore the actual location of the logical state is offset
- * from the start of the VMA. The layout is
- *
- * | [guc]          | [hwsp] [logical state] |
- * |<- our header ->|<- context image      ->|
- *
- */
-/* The first page is used for sharing data with the GuC */
-#define LRC_GUCSHR_PN	(0)
-#define LRC_GUCSHR_SZ	(1)
 /* At the start of the context image is its per-process HWS page */
-#define LRC_PPHWSP_PN	(LRC_GUCSHR_PN + LRC_GUCSHR_SZ)
+#define LRC_PPHWSP_PN	(0)
 #define LRC_PPHWSP_SZ	(1)
-/* Finally we have the logical state for the context */
+/* After the PPHWSP we have the logical state for the context */
 #define LRC_STATE_PN	(LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
 
-/*
- * Currently we include the PPHWSP in __intel_engine_context_size() so
- * the size of the header is synonymous with the start of the PPHWSP.
- */
-#define LRC_HEADER_PAGES LRC_PPHWSP_PN
+/* Space within PPHWSP reserved to be used as scratch */
+#define LRC_PPHWSP_SCRATCH		0x34
+#define LRC_PPHWSP_SCRATCH_ADDR		(LRC_PPHWSP_SCRATCH * sizeof(u32))
 
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
@@ -119,16 +110,21 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 				   unsigned int max);
 
 struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
-			       struct intel_engine_cs **siblings,
+intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 			       unsigned int count);
 
 struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
-			      struct intel_engine_cs *src);
+intel_execlists_clone_virtual(struct intel_engine_cs *src);
 
 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
 				     const struct intel_engine_cs *master,
 				     const struct intel_engine_cs *sibling);
 
+struct intel_engine_cs *
+intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
+				 unsigned int sibling);
+
+bool
+intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
+
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index b8f20ad71169..08a3be65f700 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -9,55 +9,41 @@
 
 #include <linux/types.h>
 
-/* GEN8+ Reg State Context */
-#define CTX_LRI_HEADER_0		0x01
-#define CTX_CONTEXT_CONTROL		0x02
-#define CTX_RING_HEAD			0x04
-#define CTX_RING_TAIL			0x06
-#define CTX_RING_BUFFER_START		0x08
-#define CTX_RING_BUFFER_CONTROL		0x0a
-#define CTX_BB_HEAD_U			0x0c
-#define CTX_BB_HEAD_L			0x0e
-#define CTX_BB_STATE			0x10
-#define CTX_SECOND_BB_HEAD_U		0x12
-#define CTX_SECOND_BB_HEAD_L		0x14
-#define CTX_SECOND_BB_STATE		0x16
-#define CTX_BB_PER_CTX_PTR		0x18
-#define CTX_RCS_INDIRECT_CTX		0x1a
-#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
-#define CTX_LRI_HEADER_1		0x21
-#define CTX_CTX_TIMESTAMP		0x22
-#define CTX_PDP3_UDW			0x24
-#define CTX_PDP3_LDW			0x26
-#define CTX_PDP2_UDW			0x28
-#define CTX_PDP2_LDW			0x2a
-#define CTX_PDP1_UDW			0x2c
-#define CTX_PDP1_LDW			0x2e
-#define CTX_PDP0_UDW			0x30
-#define CTX_PDP0_LDW			0x32
-#define CTX_LRI_HEADER_2		0x41
-#define CTX_R_PWR_CLK_STATE		0x42
-#define CTX_END				0x44
-
-#define CTX_REG(reg_state, pos, reg, val) do { \
-	u32 *reg_state__ = (reg_state); \
-	const u32 pos__ = (pos); \
-	(reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
-	(reg_state__)[(pos__) + 1] = (val); \
-} while (0)
+/* GEN8 to GEN11 Reg State Context */
+#define CTX_CONTEXT_CONTROL		(0x02 + 1)
+#define CTX_RING_HEAD			(0x04 + 1)
+#define CTX_RING_TAIL			(0x06 + 1)
+#define CTX_RING_START			(0x08 + 1)
+#define CTX_RING_CTL			(0x0a + 1)
+#define CTX_BB_STATE			(0x10 + 1)
+#define CTX_BB_PER_CTX_PTR		(0x18 + 1)
+#define CTX_PDP3_UDW			(0x24 + 1)
+#define CTX_PDP3_LDW			(0x26 + 1)
+#define CTX_PDP2_UDW			(0x28 + 1)
+#define CTX_PDP2_LDW			(0x2a + 1)
+#define CTX_PDP1_UDW			(0x2c + 1)
+#define CTX_PDP1_LDW			(0x2e + 1)
+#define CTX_PDP0_UDW			(0x30 + 1)
+#define CTX_PDP0_LDW			(0x32 + 1)
+#define CTX_R_PWR_CLK_STATE		(0x42 + 1)
+
+#define GEN9_CTX_RING_MI_MODE		0x54
+
+/* GEN12+ Reg State Context */
+#define GEN12_CTX_BB_PER_CTX_PTR		(0x12 + 1)
 
 #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
 	u32 *reg_state__ = (reg_state); \
 	const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \
-	(reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \
-	(reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \
+	(reg_state__)[CTX_PDP ## n ## _UDW] = upper_32_bits(addr__); \
+	(reg_state__)[CTX_PDP ## n ## _LDW] = lower_32_bits(addr__); \
 } while (0)
 
 #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
 	u32 *reg_state__ = (reg_state); \
 	const u64 addr__ = px_dma(ppgtt->pd); \
-	(reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \
-	(reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \
+	(reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \
+	(reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \
 } while (0)
 
 #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x17
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index cea184a7dde9..eeef90b55c64 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -26,6 +26,7 @@
 #include "intel_gt.h"
 #include "intel_mocs.h"
 #include "intel_lrc.h"
+#include "intel_ring.h"
 
 /* structures required */
 struct drm_i915_mocs_entry {
@@ -126,7 +127,7 @@ struct drm_i915_mocs_table {
 		   LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
 		   L3_3_WB)
 
-static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
+static const struct drm_i915_mocs_entry skl_mocs_table[] = {
 	GEN9_MOCS_ENTRIES,
 	MOCS_ENTRY(I915_MOCS_CACHED,
 		   LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
@@ -232,7 +233,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
 		   L3_1_UC)
 
-static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
+static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
 	/* Base - Error (Reserved for Non-Use) */
 	MOCS_ENTRY(0, 0x0, 0x0),
 	/* Base - Reserved */
@@ -266,7 +267,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
 		   L3_3_WB),
 };
 
-static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+static const struct drm_i915_mocs_entry icl_mocs_table[] = {
 	/* Base - Uncached (Deprecated) */
 	MOCS_ENTRY(I915_MOCS_UNCACHED,
 		   LE_1_UC | LE_TC_1_LLC,
@@ -279,69 +280,45 @@ static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
 	GEN11_MOCS_ENTRIES
 };
 
-static bool get_mocs_settings(struct intel_gt *gt,
+static bool get_mocs_settings(const struct drm_i915_private *i915,
 			      struct drm_i915_mocs_table *table)
 {
-	struct drm_i915_private *i915 = gt->i915;
-	bool result = false;
-
 	if (INTEL_GEN(i915) >= 12) {
-		table->size  = ARRAY_SIZE(tigerlake_mocs_table);
-		table->table = tigerlake_mocs_table;
+		table->size  = ARRAY_SIZE(tgl_mocs_table);
+		table->table = tgl_mocs_table;
 		table->n_entries = GEN11_NUM_MOCS_ENTRIES;
-		result = true;
 	} else if (IS_GEN(i915, 11)) {
-		table->size  = ARRAY_SIZE(icelake_mocs_table);
-		table->table = icelake_mocs_table;
+		table->size  = ARRAY_SIZE(icl_mocs_table);
+		table->table = icl_mocs_table;
 		table->n_entries = GEN11_NUM_MOCS_ENTRIES;
-		result = true;
 	} else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) {
-		table->size  = ARRAY_SIZE(skylake_mocs_table);
+		table->size  = ARRAY_SIZE(skl_mocs_table);
 		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
-		table->table = skylake_mocs_table;
-		result = true;
+		table->table = skl_mocs_table;
 	} else if (IS_GEN9_LP(i915)) {
 		table->size  = ARRAY_SIZE(broxton_mocs_table);
 		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 		table->table = broxton_mocs_table;
-		result = true;
 	} else {
 		WARN_ONCE(INTEL_GEN(i915) >= 9,
 			  "Platform that should have a MOCS table does not.\n");
+		return false;
 	}
 
+	if (GEM_DEBUG_WARN_ON(table->size > table->n_entries))
+		return false;
+
 	/* WaDisableSkipCaching:skl,bxt,kbl,glk */
 	if (IS_GEN(i915, 9)) {
 		int i;
 
 		for (i = 0; i < table->size; i++)
-			if (WARN_ON(table->table[i].l3cc_value &
-				    (L3_ESC(1) | L3_SCC(0x7))))
+			if (GEM_DEBUG_WARN_ON(table->table[i].l3cc_value &
+					      (L3_ESC(1) | L3_SCC(0x7))))
 				return false;
 	}
 
-	return result;
-}
-
-static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
-{
-	switch (engine_id) {
-	case RCS0:
-		return GEN9_GFX_MOCS(index);
-	case VCS0:
-		return GEN9_MFX0_MOCS(index);
-	case BCS0:
-		return GEN9_BLT_MOCS(index);
-	case VECS0:
-		return GEN9_VEBOX_MOCS(index);
-	case VCS1:
-		return GEN9_MFX1_MOCS(index);
-	case VCS2:
-		return GEN11_MFX2_MOCS(index);
-	default:
-		MISSING_CASE(engine_id);
-		return INVALID_MMIO_REG;
-	}
+	return true;
 }
 
 /*
@@ -351,122 +328,47 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
 static u32 get_entry_control(const struct drm_i915_mocs_table *table,
 			     unsigned int index)
 {
-	if (table->table[index].used)
+	if (index < table->size && table->table[index].used)
 		return table->table[index].control_value;
 
 	return table->table[I915_MOCS_PTE].control_value;
 }
 
-/**
- * intel_mocs_init_engine() - emit the mocs control table
- * @engine:	The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
- */
-void intel_mocs_init_engine(struct intel_engine_cs *engine)
-{
-	struct intel_gt *gt = engine->gt;
-	struct intel_uncore *uncore = gt->uncore;
-	struct drm_i915_mocs_table table;
-	unsigned int index;
-	u32 unused_value;
-
-	/* Platforms with global MOCS do not need per-engine initialization. */
-	if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
-		return;
-
-	/* Called under a blanket forcewake */
-	assert_forcewakes_active(uncore, FORCEWAKE_ALL);
-
-	if (!get_mocs_settings(gt, &table))
-		return;
-
-	/* Set unused values to PTE */
-	unused_value = table.table[I915_MOCS_PTE].control_value;
-
-	for (index = 0; index < table.size; index++) {
-		u32 value = get_entry_control(&table, index);
+#define for_each_mocs(mocs, t, i) \
+	for (i = 0; \
+	     i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\
+	     i++)
 
-		intel_uncore_write_fw(uncore,
-				      mocs_register(engine->id, index),
-				      value);
-	}
+static void __init_mocs_table(struct intel_uncore *uncore,
+			      const struct drm_i915_mocs_table *table,
+			      u32 addr)
+{
+	unsigned int i;
+	u32 mocs;
 
-	/* All remaining entries are also unused */
-	for (; index < table.n_entries; index++)
-		intel_uncore_write_fw(uncore,
-				      mocs_register(engine->id, index),
-				      unused_value);
+	for_each_mocs(mocs, table, i)
+		intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs);
 }
 
-static void intel_mocs_init_global(struct intel_gt *gt)
+static u32 mocs_offset(const struct intel_engine_cs *engine)
 {
-	struct intel_uncore *uncore = gt->uncore;
-	struct drm_i915_mocs_table table;
-	unsigned int index;
-
-	GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
-
-	if (!get_mocs_settings(gt, &table))
-		return;
-
-	if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
-		return;
-
-	for (index = 0; index < table.size; index++)
-		intel_uncore_write(uncore,
-				   GEN12_GLOBAL_MOCS(index),
-				   table.table[index].control_value);
-
-	/*
-	 * Ok, now set the unused entries to the invalid entry (index 0). These
-	 * entries are officially undefined and no contract for the contents and
-	 * settings is given for these entries.
-	 */
-	for (; index < table.n_entries; index++)
-		intel_uncore_write(uncore,
-				   GEN12_GLOBAL_MOCS(index),
-				   table.table[0].control_value);
+	static const u32 offset[] = {
+		[RCS0]  =  __GEN9_RCS0_MOCS0,
+		[VCS0]  =  __GEN9_VCS0_MOCS0,
+		[VCS1]  =  __GEN9_VCS1_MOCS0,
+		[VECS0] =  __GEN9_VECS0_MOCS0,
+		[BCS0]  =  __GEN9_BCS0_MOCS0,
+		[VCS2]  = __GEN11_VCS2_MOCS0,
+	};
+
+	GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset));
+	return offset[engine->id];
 }
 
-static int emit_mocs_control_table(struct i915_request *rq,
-				   const struct drm_i915_mocs_table *table)
+static void init_mocs_table(struct intel_engine_cs *engine,
+			    const struct drm_i915_mocs_table *table)
 {
-	enum intel_engine_id engine = rq->engine->id;
-	unsigned int index;
-	u32 unused_value;
-	u32 *cs;
-
-	if (GEM_WARN_ON(table->size > table->n_entries))
-		return -ENODEV;
-
-	/* Set unused values to PTE */
-	unused_value = table->table[I915_MOCS_PTE].control_value;
-
-	cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
-
-	for (index = 0; index < table->size; index++) {
-		u32 value = get_entry_control(table, index);
-
-		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
-		*cs++ = value;
-	}
-
-	/* All remaining entries are also unused */
-	for (; index < table->n_entries; index++) {
-		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
-		*cs++ = unused_value;
-	}
-
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
+	__init_mocs_table(engine->uncore, table, mocs_offset(engine));
 }
 
 /*
@@ -476,151 +378,81 @@ static int emit_mocs_control_table(struct i915_request *rq,
 static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
 			  unsigned int index)
 {
-	if (table->table[index].used)
+	if (index < table->size && table->table[index].used)
 		return table->table[index].l3cc_value;
 
 	return table->table[I915_MOCS_PTE].l3cc_value;
 }
 
-static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
-			       u16 low,
-			       u16 high)
+static inline u32 l3cc_combine(u16 low, u16 high)
 {
-	return low | high << 16;
+	return low | (u32)high << 16;
 }
 
-static int emit_mocs_l3cc_table(struct i915_request *rq,
-				const struct drm_i915_mocs_table *table)
+#define for_each_l3cc(l3cc, t, i) \
+	for (i = 0; \
+	     i < ((t)->n_entries + 1) / 2 ? \
+	     (l3cc = l3cc_combine(get_entry_l3cc((t), 2 * i), \
+				  get_entry_l3cc((t), 2 * i + 1))), 1 : \
+	     0; \
+	     i++)
+
+static void init_l3cc_table(struct intel_engine_cs *engine,
+			    const struct drm_i915_mocs_table *table)
 {
-	u16 unused_value;
+	struct intel_uncore *uncore = engine->uncore;
 	unsigned int i;
-	u32 *cs;
-
-	if (GEM_WARN_ON(table->size > table->n_entries))
-		return -ENODEV;
-
-	/* Set unused values to PTE */
-	unused_value = table->table[I915_MOCS_PTE].l3cc_value;
+	u32 l3cc;
 
-	cs = intel_ring_begin(rq, 2 + table->n_entries);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
-
-	for (i = 0; i < table->size / 2; i++) {
-		u16 low = get_entry_l3cc(table, 2 * i);
-		u16 high = get_entry_l3cc(table, 2 * i + 1);
+	for_each_l3cc(l3cc, table, i)
+		intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
+}
 
-		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
-		*cs++ = l3cc_combine(table, low, high);
-	}
+void intel_mocs_init_engine(struct intel_engine_cs *engine)
+{
+	struct drm_i915_mocs_table table;
 
-	/* Odd table size - 1 left over */
-	if (table->size & 0x01) {
-		u16 low = get_entry_l3cc(table, 2 * i);
+	/* Called under a blanket forcewake */
+	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
 
-		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
-		*cs++ = l3cc_combine(table, low, unused_value);
-		i++;
-	}
+	if (!get_mocs_settings(engine->i915, &table))
+		return;
 
-	/* All remaining entries are also unused */
-	for (; i < table->n_entries / 2; i++) {
-		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
-		*cs++ = l3cc_combine(table, unused_value, unused_value);
-	}
+	/* Platforms with global MOCS do not need per-engine initialization. */
+	if (!HAS_GLOBAL_MOCS_REGISTERS(engine->i915))
+		init_mocs_table(engine, &table);
 
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
+	if (engine->class == RENDER_CLASS)
+		init_l3cc_table(engine, &table);
+}
 
-	return 0;
+static u32 global_mocs_offset(void)
+{
+	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
 }
 
-static void intel_mocs_init_l3cc_table(struct intel_gt *gt)
+static void init_global_mocs(struct intel_gt *gt)
 {
-	struct intel_uncore *uncore = gt->uncore;
 	struct drm_i915_mocs_table table;
-	unsigned int i;
-	u16 unused_value;
 
-	if (!get_mocs_settings(gt, &table))
+	/*
+	 * LLC and eDRAM control values are not applicable to dgfx
+	 */
+	if (IS_DGFX(gt->i915))
 		return;
 
-	/* Set unused values to PTE */
-	unused_value = table.table[I915_MOCS_PTE].l3cc_value;
-
-	for (i = 0; i < table.size / 2; i++) {
-		u16 low = get_entry_l3cc(&table, 2 * i);
-		u16 high = get_entry_l3cc(&table, 2 * i + 1);
-
-		intel_uncore_write(uncore,
-				   GEN9_LNCFCMOCS(i),
-				   l3cc_combine(&table, low, high));
-	}
-
-	/* Odd table size - 1 left over */
-	if (table.size & 0x01) {
-		u16 low = get_entry_l3cc(&table, 2 * i);
-
-		intel_uncore_write(uncore,
-				   GEN9_LNCFCMOCS(i),
-				   l3cc_combine(&table, low, unused_value));
-		i++;
-	}
-
-	/* All remaining entries are also unused */
-	for (; i < table.n_entries / 2; i++)
-		intel_uncore_write(uncore,
-				   GEN9_LNCFCMOCS(i),
-				   l3cc_combine(&table, unused_value,
-						unused_value));
-}
-
-/**
- * intel_mocs_emit() - program the MOCS register.
- * @rq:	Request to use to set up the MOCS tables.
- *
- * This function will emit a batch buffer with the values required for
- * programming the MOCS register values for all the currently supported
- * rings.
- *
- * These registers are partially stored in the RCS context, so they are
- * emitted at the same time so that when a context is created these registers
- * are set up. These registers have to be emitted into the start of the
- * context as setting the ELSP will re-init some of these registers back
- * to the hw values.
- *
- * Return: 0 on success, otherwise the error status.
- */
-int intel_mocs_emit(struct i915_request *rq)
-{
-	struct drm_i915_mocs_table t;
-	int ret;
-
-	if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915) ||
-	    rq->engine->class != RENDER_CLASS)
-		return 0;
-
-	if (get_mocs_settings(rq->engine->gt, &t)) {
-		/* Program the RCS control registers */
-		ret = emit_mocs_control_table(rq, &t);
-		if (ret)
-			return ret;
-
-		/* Now program the l3cc registers */
-		ret = emit_mocs_l3cc_table(rq, &t);
-		if (ret)
-			return ret;
-	}
+	if (!get_mocs_settings(gt->i915, &table))
+		return;
 
-	return 0;
+	__init_mocs_table(gt->uncore, &table, global_mocs_offset());
 }
 
 void intel_mocs_init(struct intel_gt *gt)
 {
-	intel_mocs_init_l3cc_table(gt);
-
 	if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
-		intel_mocs_init_global(gt);
+		init_global_mocs(gt);
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_mocs.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
index 2ae816b7ca19..83371f3e6ba1 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -49,13 +49,10 @@
  * context handling keep the MOCS in step.
  */
 
-struct i915_request;
 struct intel_engine_cs;
 struct intel_gt;
 
 void intel_mocs_init(struct intel_gt *gt);
 void intel_mocs_init_engine(struct intel_engine_cs *engine);
 
-int intel_mocs_emit(struct i915_request *rq);
-
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
new file mode 100644
index 000000000000..f86f7e68ce5e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/slab.h>
+
+#include "i915_trace.h"
+#include "intel_gtt.h"
+#include "gen6_ppgtt.h"
+#include "gen8_ppgtt.h"
+
+struct i915_page_table *alloc_pt(struct i915_address_space *vm)
+{
+	struct i915_page_table *pt;
+
+	pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
+	if (unlikely(!pt))
+		return ERR_PTR(-ENOMEM);
+
+	if (unlikely(setup_page_dma(vm, &pt->base))) {
+		kfree(pt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	atomic_set(&pt->used, 0);
+	return pt;
+}
+
+struct i915_page_directory *__alloc_pd(size_t sz)
+{
+	struct i915_page_directory *pd;
+
+	pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
+	if (unlikely(!pd))
+		return NULL;
+
+	spin_lock_init(&pd->lock);
+	return pd;
+}
+
+struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
+{
+	struct i915_page_directory *pd;
+
+	pd = __alloc_pd(sizeof(*pd));
+	if (unlikely(!pd))
+		return ERR_PTR(-ENOMEM);
+
+	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+		kfree(pd);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return pd;
+}
+
+void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
+{
+	cleanup_page_dma(vm, pd);
+	kfree(pd);
+}
+
+static inline void
+write_dma_entry(struct i915_page_dma * const pdma,
+		const unsigned short idx,
+		const u64 encoded_entry)
+{
+	u64 * const vaddr = kmap_atomic(pdma->page);
+
+	vaddr[idx] = encoded_entry;
+	kunmap_atomic(vaddr);
+}
+
+void
+__set_pd_entry(struct i915_page_directory * const pd,
+	       const unsigned short idx,
+	       struct i915_page_dma * const to,
+	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
+{
+	/* Each thread pre-pins the pd, and we may have a thread per pde. */
+	GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry));
+
+	atomic_inc(px_used(pd));
+	pd->entry[idx] = to;
+	write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
+}
+
+void
+clear_pd_entry(struct i915_page_directory * const pd,
+	       const unsigned short idx,
+	       const struct i915_page_scratch * const scratch)
+{
+	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
+
+	write_dma_entry(px_base(pd), idx, scratch->encode);
+	pd->entry[idx] = NULL;
+	atomic_dec(px_used(pd));
+}
+
+bool
+release_pd_entry(struct i915_page_directory * const pd,
+		 const unsigned short idx,
+		 struct i915_page_table * const pt,
+		 const struct i915_page_scratch * const scratch)
+{
+	bool free = false;
+
+	if (atomic_add_unless(&pt->used, -1, 1))
+		return false;
+
+	spin_lock(&pd->lock);
+	if (atomic_dec_and_test(&pt->used)) {
+		clear_pd_entry(pd, idx, scratch);
+		free = true;
+	}
+	spin_unlock(&pd->lock);
+
+	return free;
+}
+
+int i915_ppgtt_init_hw(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+
+	gtt_write_workarounds(gt);
+
+	if (IS_GEN(i915, 6))
+		gen6_ppgtt_enable(gt);
+	else if (IS_GEN(i915, 7))
+		gen7_ppgtt_enable(gt);
+
+	return 0;
+}
+
+static struct i915_ppgtt *
+__ppgtt_create(struct intel_gt *gt)
+{
+	if (INTEL_GEN(gt->i915) < 8)
+		return gen6_ppgtt_create(gt);
+	else
+		return gen8_ppgtt_create(gt);
+}
+
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
+{
+	struct i915_ppgtt *ppgtt;
+
+	ppgtt = __ppgtt_create(gt);
+	if (IS_ERR(ppgtt))
+		return ppgtt;
+
+	trace_i915_ppgtt_create(&ppgtt->vm);
+
+	return ppgtt;
+}
+
+static int ppgtt_bind_vma(struct i915_vma *vma,
+			  enum i915_cache_level cache_level,
+			  u32 flags)
+{
+	u32 pte_flags;
+	int err;
+
+	if (flags & I915_VMA_ALLOC) {
+		err = vma->vm->allocate_va_range(vma->vm,
+						 vma->node.start, vma->size);
+		if (err)
+			return err;
+
+		set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
+	}
+
+	/* Applicable to VLV, and gen8+ */
+	pte_flags = 0;
+	if (i915_gem_object_is_readonly(vma->obj))
+		pte_flags |= PTE_READ_ONLY;
+
+	GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
+	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+	wmb();
+
+	return 0;
+}
+
+static void ppgtt_unbind_vma(struct i915_vma *vma)
+{
+	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
+		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+}
+
+int ppgtt_set_pages(struct i915_vma *vma)
+{
+	GEM_BUG_ON(vma->pages);
+
+	vma->pages = vma->obj->mm.pages;
+
+	vma->page_sizes = vma->obj->mm.page_sizes;
+
+	return 0;
+}
+
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+
+	ppgtt->vm.gt = gt;
+	ppgtt->vm.i915 = i915;
+	ppgtt->vm.dma = &i915->drm.pdev->dev;
+	ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
+
+	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
+
+	ppgtt->vm.vma_ops.bind_vma    = ppgtt_bind_vma;
+	ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
+	ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
+	ppgtt->vm.vma_ops.clear_pages = clear_pages;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
new file mode 100644
index 000000000000..9e303c29d6e3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -0,0 +1,776 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/pm_runtime.h>
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_rc6.h"
+#include "intel_sideband.h"
+
+/**
+ * DOC: RC6
+ *
+ * RC6 is a special power stage which allows the GPU to enter an very
+ * low-voltage mode when idle, using down to 0V while at this stage.  This
+ * stage is entered automatically when the GPU is idle when RC6 support is
+ * enabled, and as soon as new workload arises GPU wakes up automatically as
+ * well.
+ *
+ * There are different RC6 modes available in Intel GPU, which differentiate
+ * among each other with the latency required to enter and leave RC6 and
+ * voltage consumed by the GPU in different states.
+ *
+ * The combination of the following flags define which states GPU is allowed
+ * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
+ * RC6pp is deepest RC6. Their support by hardware varies according to the
+ * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
+ * which brings the most power savings; deeper states save more power, but
+ * require higher latency to switch to and wake up.
+ */
+
+static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6)
+{
+	return container_of(rc6, struct intel_gt, rc6);
+}
+
+static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc)
+{
+	return rc6_to_gt(rc)->uncore;
+}
+
+static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
+{
+	return rc6_to_gt(rc)->i915;
+}
+
+static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
+{
+	intel_uncore_write_fw(uncore, reg, val);
+}
+
+static void gen11_rc6_enable(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/* 2b: Program RC6 thresholds.*/
+	set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+	set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+
+	set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+	set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+	for_each_engine(engine, rc6_to_gt(rc6), id)
+		set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+	set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
+
+	set(uncore, GEN6_RC_SLEEP, 0);
+
+	set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
+
+	/*
+	 * 2c: Program Coarse Power Gating Policies.
+	 *
+	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
+	 * use instead is a more conservative estimate for the maximum time
+	 * it takes us to service a CS interrupt and submit a new ELSP - that
+	 * is the time which the GPU is idle waiting for the CPU to select the
+	 * next request to execute. If the idle hysteresis is less than that
+	 * interrupt service latency, the hardware will automatically gate
+	 * the power well and we will then incur the wake up cost on top of
+	 * the service latency. A similar guide from plane_state is that we
+	 * do not want the enable hysteresis to less than the wakeup latency.
+	 *
+	 * igt/gem_exec_nop/sequential provides a rough estimate for the
+	 * service latency, and puts it under 10us for Icelake, similar to
+	 * Broadwell+, To be conservative, we want to factor in a context
+	 * switch on top (due to ksoftirqd).
+	 */
+	set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
+	set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
+
+	/* 3a: Enable RC6 */
+	rc6->ctl_enable =
+		GEN6_RC_CTL_HW_ENABLE |
+		GEN6_RC_CTL_RC6_ENABLE |
+		GEN6_RC_CTL_EI_MODE(1);
+
+	set(uncore, GEN9_PG_ENABLE,
+	    GEN9_RENDER_PG_ENABLE |
+	    GEN9_MEDIA_PG_ENABLE |
+	    GEN11_MEDIA_SAMPLER_PG_ENABLE);
+}
+
+static void gen9_rc6_enable(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 rc6_mode;
+
+	/* 2b: Program RC6 thresholds.*/
+	if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) {
+		set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+		set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+	} else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
+		/*
+		 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
+		 * when CPG is enabled
+		 */
+		set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
+	} else {
+		set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
+	}
+
+	set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+	set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+	for_each_engine(engine, rc6_to_gt(rc6), id)
+		set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+	set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
+
+	set(uncore, GEN6_RC_SLEEP, 0);
+
+	/*
+	 * 2c: Program Coarse Power Gating Policies.
+	 *
+	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
+	 * use instead is a more conservative estimate for the maximum time
+	 * it takes us to service a CS interrupt and submit a new ELSP - that
+	 * is the time which the GPU is idle waiting for the CPU to select the
+	 * next request to execute. If the idle hysteresis is less than that
+	 * interrupt service latency, the hardware will automatically gate
+	 * the power well and we will then incur the wake up cost on top of
+	 * the service latency. A similar guide from plane_state is that we
+	 * do not want the enable hysteresis to less than the wakeup latency.
+	 *
+	 * igt/gem_exec_nop/sequential provides a rough estimate for the
+	 * service latency, and puts it around 10us for Broadwell (and other
+	 * big core) and around 40us for Broxton (and other low power cores).
+	 * [Note that for legacy ringbuffer submission, this is less than 1us!]
+	 * However, the wakeup latency on Broxton is closer to 100us. To be
+	 * conservative, we have to factor in a context switch on top (due
+	 * to ksoftirqd).
+	 */
+	set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
+	set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
+
+	/* 3a: Enable RC6 */
+	set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
+
+	/* WaRsUseTimeoutMode:cnl (pre-prod) */
+	if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0))
+		rc6_mode = GEN7_RC_CTL_TO_MODE;
+	else
+		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
+
+	rc6->ctl_enable =
+		GEN6_RC_CTL_HW_ENABLE |
+		GEN6_RC_CTL_RC6_ENABLE |
+		rc6_mode;
+
+	/*
+	 * WaRsDisableCoarsePowerGating:skl,cnl
+	 *   - Render/Media PG need to be disabled with RC6.
+	 */
+	if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
+		set(uncore, GEN9_PG_ENABLE,
+		    GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
+}
+
+static void gen8_rc6_enable(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/* 2b: Program RC6 thresholds.*/
+	set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+	set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+	set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+	for_each_engine(engine, rc6_to_gt(rc6), id)
+		set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+	set(uncore, GEN6_RC_SLEEP, 0);
+	set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
+
+	/* 3: Enable RC6 */
+	rc6->ctl_enable =
+	    GEN6_RC_CTL_HW_ENABLE |
+	    GEN7_RC_CTL_TO_MODE |
+	    GEN6_RC_CTL_RC6_ENABLE;
+}
+
+static void gen6_rc6_enable(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	struct drm_i915_private *i915 = rc6_to_i915(rc6);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 rc6vids, rc6_mask;
+	int ret;
+
+	set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
+	set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
+	set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
+	set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
+	set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
+
+	for_each_engine(engine, rc6_to_gt(rc6), id)
+		set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+	set(uncore, GEN6_RC_SLEEP, 0);
+	set(uncore, GEN6_RC1e_THRESHOLD, 1000);
+	if (IS_IVYBRIDGE(i915))
+		set(uncore, GEN6_RC6_THRESHOLD, 125000);
+	else
+		set(uncore, GEN6_RC6_THRESHOLD, 50000);
+	set(uncore, GEN6_RC6p_THRESHOLD, 150000);
+	set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
+
+	/* We don't use those on Haswell */
+	rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
+	if (HAS_RC6p(i915))
+		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
+	if (HAS_RC6pp(i915))
+		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
+	rc6->ctl_enable =
+	    rc6_mask |
+	    GEN6_RC_CTL_EI_MODE(1) |
+	    GEN6_RC_CTL_HW_ENABLE;
+
+	rc6vids = 0;
+	ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
+				     &rc6vids, NULL);
+	if (IS_GEN(i915, 6) && ret) {
+		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
+	} else if (IS_GEN(i915, 6) &&
+		   (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
+		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+				 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
+		rc6vids &= 0xffff00;
+		rc6vids |= GEN6_ENCODE_RC6_VID(450);
+		ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
+		if (ret)
+			DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
+	}
+}
+
+/* Check that the pcbr address is not empty. */
+static int chv_rc6_init(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	resource_size_t pctx_paddr, paddr;
+	resource_size_t pctx_size = 32 * SZ_1K;
+	u32 pcbr;
+
+	pcbr = intel_uncore_read(uncore, VLV_PCBR);
+	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
+		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+		paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size;
+		GEM_BUG_ON(paddr > U32_MAX);
+
+		pctx_paddr = (paddr & ~4095);
+		intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
+	}
+
+	return 0;
+}
+
+static int vlv_rc6_init(struct intel_rc6 *rc6)
+{
+	struct drm_i915_private *i915 = rc6_to_i915(rc6);
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	struct drm_i915_gem_object *pctx;
+	resource_size_t pctx_paddr;
+	resource_size_t pctx_size = 24 * SZ_1K;
+	u32 pcbr;
+
+	pcbr = intel_uncore_read(uncore, VLV_PCBR);
+	if (pcbr) {
+		/* BIOS set it up already, grab the pre-alloc'd space */
+		resource_size_t pcbr_offset;
+
+		pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
+		pctx = i915_gem_object_create_stolen_for_preallocated(i915,
+								      pcbr_offset,
+								      I915_GTT_OFFSET_NONE,
+								      pctx_size);
+		if (IS_ERR(pctx))
+			return PTR_ERR(pctx);
+
+		goto out;
+	}
+
+	DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+
+	/*
+	 * From the Gunit register HAS:
+	 * The Gfx driver is expected to program this register and ensure
+	 * proper allocation within Gfx stolen memory.  For example, this
+	 * register should be programmed such than the PCBR range does not
+	 * overlap with other ranges, such as the frame buffer, protected
+	 * memory, or any other relevant ranges.
+	 */
+	pctx = i915_gem_object_create_stolen(i915, pctx_size);
+	if (IS_ERR(pctx)) {
+		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
+		return PTR_ERR(pctx);
+	}
+
+	GEM_BUG_ON(range_overflows_t(u64,
+				     i915->dsm.start,
+				     pctx->stolen->start,
+				     U32_MAX));
+	pctx_paddr = i915->dsm.start + pctx->stolen->start;
+	intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
+
+out:
+	rc6->pctx = pctx;
+	return 0;
+}
+
+static void chv_rc6_enable(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/* 2a: Program RC6 thresholds.*/
+	set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+	set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+	set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+
+	for_each_engine(engine, rc6_to_gt(rc6), id)
+		set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+	set(uncore, GEN6_RC_SLEEP, 0);
+
+	/* TO threshold set to 500 us (0x186 * 1.28 us) */
+	set(uncore, GEN6_RC6_THRESHOLD, 0x186);
+
+	/* Allows RC6 residency counter to work */
+	set(uncore, VLV_COUNTER_CONTROL,
+	    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+			       VLV_MEDIA_RC6_COUNT_EN |
+			       VLV_RENDER_RC6_COUNT_EN));
+
+	/* 3: Enable RC6 */
+	rc6->ctl_enable = GEN7_RC_CTL_TO_MODE;
+}
+
+static void vlv_rc6_enable(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
+	set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
+	set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
+
+	for_each_engine(engine, rc6_to_gt(rc6), id)
+		set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+	set(uncore, GEN6_RC6_THRESHOLD, 0x557);
+
+	/* Allows RC6 residency counter to work */
+	set(uncore, VLV_COUNTER_CONTROL,
+	    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+			       VLV_MEDIA_RC0_COUNT_EN |
+			       VLV_RENDER_RC0_COUNT_EN |
+			       VLV_MEDIA_RC6_COUNT_EN |
+			       VLV_RENDER_RC6_COUNT_EN));
+
+	rc6->ctl_enable =
+	    GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
+}
+
+static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	struct drm_i915_private *i915 = rc6_to_i915(rc6);
+	u32 rc6_ctx_base, rc_ctl, rc_sw_target;
+	bool enable_rc6 = true;
+
+	rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+	rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
+	rc_sw_target &= RC_SW_TARGET_STATE_MASK;
+	rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
+	DRM_DEBUG_DRIVER("BIOS enabled RC states: "
+			 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
+			 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
+			 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
+			 rc_sw_target);
+
+	if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
+		DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
+		enable_rc6 = false;
+	}
+
+	/*
+	 * The exact context size is not known for BXT, so assume a page size
+	 * for this check.
+	 */
+	rc6_ctx_base =
+		intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
+	if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
+	      rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
+		DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
+		enable_rc6 = false;
+	}
+
+	if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 &&
+	      (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 &&
+	      (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 &&
+	      (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) {
+		DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
+		enable_rc6 = false;
+	}
+
+	if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
+	    !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
+	    !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
+		DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
+		enable_rc6 = false;
+	}
+
+	if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
+		DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
+		enable_rc6 = false;
+	}
+
+	if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
+		DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
+		enable_rc6 = false;
+	}
+
+	return enable_rc6;
+}
+
+static bool rc6_supported(struct intel_rc6 *rc6)
+{
+	struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+	if (!HAS_RC6(i915))
+		return false;
+
+	if (intel_vgpu_active(i915))
+		return false;
+
+	if (is_mock_gt(rc6_to_gt(rc6)))
+		return false;
+
+	if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
+		dev_notice(i915->drm.dev,
+			   "RC6 and powersaving disabled by BIOS\n");
+		return false;
+	}
+
+	return true;
+}
+
+static void rpm_get(struct intel_rc6 *rc6)
+{
+	GEM_BUG_ON(rc6->wakeref);
+	pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev);
+	rc6->wakeref = true;
+}
+
+static void rpm_put(struct intel_rc6 *rc6)
+{
+	GEM_BUG_ON(!rc6->wakeref);
+	pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev);
+	rc6->wakeref = false;
+}
+
+static bool pctx_corrupted(struct intel_rc6 *rc6)
+{
+	struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+	if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+		return false;
+
+	if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO))
+		return false;
+
+	dev_notice(i915->drm.dev,
+		   "RC6 context corruption, disabling runtime power management\n");
+	return true;
+}
+
+static void __intel_rc6_disable(struct intel_rc6 *rc6)
+{
+	struct drm_i915_private *i915 = rc6_to_i915(rc6);
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+	if (INTEL_GEN(i915) >= 9)
+		set(uncore, GEN9_PG_ENABLE, 0);
+	set(uncore, GEN6_RC_CONTROL, 0);
+	set(uncore, GEN6_RC_STATE, 0);
+	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+}
+
+void intel_rc6_init(struct intel_rc6 *rc6)
+{
+	struct drm_i915_private *i915 = rc6_to_i915(rc6);
+	int err;
+
+	/* Disable runtime-pm until we can save the GPU state with rc6 pctx */
+	rpm_get(rc6);
+
+	if (!rc6_supported(rc6))
+		return;
+
+	if (IS_CHERRYVIEW(i915))
+		err = chv_rc6_init(rc6);
+	else if (IS_VALLEYVIEW(i915))
+		err = vlv_rc6_init(rc6);
+	else
+		err = 0;
+
+	/* Sanitize rc6, ensure it is disabled before we are ready. */
+	__intel_rc6_disable(rc6);
+
+	rc6->supported = err == 0;
+}
+
+void intel_rc6_sanitize(struct intel_rc6 *rc6)
+{
+	if (rc6->enabled) { /* unbalanced suspend/resume */
+		rpm_get(rc6);
+		rc6->enabled = false;
+	}
+
+	if (rc6->supported)
+		__intel_rc6_disable(rc6);
+}
+
+void intel_rc6_enable(struct intel_rc6 *rc6)
+{
+	struct drm_i915_private *i915 = rc6_to_i915(rc6);
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+	if (!rc6->supported)
+		return;
+
+	GEM_BUG_ON(rc6->enabled);
+
+	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+	if (IS_CHERRYVIEW(i915))
+		chv_rc6_enable(rc6);
+	else if (IS_VALLEYVIEW(i915))
+		vlv_rc6_enable(rc6);
+	else if (INTEL_GEN(i915) >= 11)
+		gen11_rc6_enable(rc6);
+	else if (INTEL_GEN(i915) >= 9)
+		gen9_rc6_enable(rc6);
+	else if (IS_BROADWELL(i915))
+		gen8_rc6_enable(rc6);
+	else if (INTEL_GEN(i915) >= 6)
+		gen6_rc6_enable(rc6);
+
+	rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE;
+	if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+		rc6->ctl_enable = 0;
+
+	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+	if (unlikely(pctx_corrupted(rc6)))
+		return;
+
+	/* rc6 is ready, runtime-pm is go! */
+	rpm_put(rc6);
+	rc6->enabled = true;
+}
+
+void intel_rc6_unpark(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+	if (!rc6->enabled)
+		return;
+
+	/* Restore HW timers for automatic RC6 entry while busy */
+	set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
+}
+
+void intel_rc6_park(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+	if (!rc6->enabled)
+		return;
+
+	if (unlikely(pctx_corrupted(rc6))) {
+		intel_rc6_disable(rc6);
+		return;
+	}
+
+	if (!rc6->manual)
+		return;
+
+	/* Turn off the HW timers and go directly to rc6 */
+	set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
+	set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT);
+}
+
+void intel_rc6_disable(struct intel_rc6 *rc6)
+{
+	if (!rc6->enabled)
+		return;
+
+	rpm_get(rc6);
+	rc6->enabled = false;
+
+	__intel_rc6_disable(rc6);
+}
+
+void intel_rc6_fini(struct intel_rc6 *rc6)
+{
+	struct drm_i915_gem_object *pctx;
+
+	intel_rc6_disable(rc6);
+
+	pctx = fetch_and_zero(&rc6->pctx);
+	if (pctx)
+		i915_gem_object_put(pctx);
+
+	if (rc6->wakeref)
+		rpm_put(rc6);
+}
+
+static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
+{
+	u32 lower, upper, tmp;
+	int loop = 2;
+
+	/*
+	 * The register accessed do not need forcewake. We borrow
+	 * uncore lock to prevent concurrent access to range reg.
+	 */
+	lockdep_assert_held(&uncore->lock);
+
+	/*
+	 * vlv and chv residency counters are 40 bits in width.
+	 * With a control bit, we can choose between upper or lower
+	 * 32bit window into this counter.
+	 *
+	 * Although we always use the counter in high-range mode elsewhere,
+	 * userspace may attempt to read the value before rc6 is initialised,
+	 * before we have set the default VLV_COUNTER_CONTROL value. So always
+	 * set the high bit to be safe.
+	 */
+	set(uncore, VLV_COUNTER_CONTROL,
+	    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+	upper = intel_uncore_read_fw(uncore, reg);
+	do {
+		tmp = upper;
+
+		set(uncore, VLV_COUNTER_CONTROL,
+		    _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
+		lower = intel_uncore_read_fw(uncore, reg);
+
+		set(uncore, VLV_COUNTER_CONTROL,
+		    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+		upper = intel_uncore_read_fw(uncore, reg);
+	} while (upper != tmp && --loop);
+
+	/*
+	 * Everywhere else we always use VLV_COUNTER_CONTROL with the
+	 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
+	 * now.
+	 */
+
+	return lower | (u64)upper << 8;
+}
+
+u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg)
+{
+	struct drm_i915_private *i915 = rc6_to_i915(rc6);
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	u64 time_hw, prev_hw, overflow_hw;
+	unsigned int fw_domains;
+	unsigned long flags;
+	unsigned int i;
+	u32 mul, div;
+
+	if (!rc6->supported)
+		return 0;
+
+	/*
+	 * Store previous hw counter values for counter wrap-around handling.
+	 *
+	 * There are only four interesting registers and they live next to each
+	 * other so we can use the relative address, compared to the smallest
+	 * one as the index into driver storage.
+	 */
+	i = (i915_mmio_reg_offset(reg) -
+	     i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
+	if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency)))
+		return 0;
+
+	fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
+
+	spin_lock_irqsave(&uncore->lock, flags);
+	intel_uncore_forcewake_get__locked(uncore, fw_domains);
+
+	/* On VLV and CHV, residency time is in CZ units rather than 1.28us */
+	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+		mul = 1000000;
+		div = i915->czclk_freq;
+		overflow_hw = BIT_ULL(40);
+		time_hw = vlv_residency_raw(uncore, reg);
+	} else {
+		/* 833.33ns units on Gen9LP, 1.28us elsewhere. */
+		if (IS_GEN9_LP(i915)) {
+			mul = 10000;
+			div = 12;
+		} else {
+			mul = 1280;
+			div = 1;
+		}
+
+		overflow_hw = BIT_ULL(32);
+		time_hw = intel_uncore_read_fw(uncore, reg);
+	}
+
+	/*
+	 * Counter wrap handling.
+	 *
+	 * But relying on a sufficient frequency of queries otherwise counters
+	 * can still wrap.
+	 */
+	prev_hw = rc6->prev_hw_residency[i];
+	rc6->prev_hw_residency[i] = time_hw;
+
+	/* RC6 delta from last sample. */
+	if (time_hw >= prev_hw)
+		time_hw -= prev_hw;
+	else
+		time_hw += overflow_hw - prev_hw;
+
+	/* Add delta to RC6 extended raw driver copy. */
+	time_hw += rc6->cur_residency[i];
+	rc6->cur_residency[i] = time_hw;
+
+	intel_uncore_forcewake_put__locked(uncore, fw_domains);
+	spin_unlock_irqrestore(&uncore->lock, flags);
+
+	return mul_u64_u32_div(time_hw, mul, div);
+}
+
+u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg)
+{
+	return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_rc6.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
new file mode 100644
index 000000000000..9f0f23fca8af
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RC6_H
+#define INTEL_RC6_H
+
+#include "i915_reg.h"
+
+struct intel_engine_cs;
+struct intel_rc6;
+
+void intel_rc6_init(struct intel_rc6 *rc6);
+void intel_rc6_fini(struct intel_rc6 *rc6);
+
+void intel_rc6_unpark(struct intel_rc6 *rc6);
+void intel_rc6_park(struct intel_rc6 *rc6);
+
+void intel_rc6_sanitize(struct intel_rc6 *rc6);
+void intel_rc6_enable(struct intel_rc6 *rc6);
+void intel_rc6_disable(struct intel_rc6 *rc6);
+
+u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg);
+u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg);
+
+#endif /* INTEL_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
new file mode 100644
index 000000000000..bfbb623f7a4f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -0,0 +1,31 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RC6_TYPES_H
+#define INTEL_RC6_TYPES_H
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include "intel_engine_types.h"
+
+struct drm_i915_gem_object;
+
+struct intel_rc6 {
+	u64 prev_hw_residency[4];
+	u64 cur_residency[4];
+
+	u32 ctl_enable;
+
+	struct drm_i915_gem_object *pctx;
+
+	bool supported : 1;
+	bool enabled : 1;
+	bool manual : 1;
+	bool wakeref : 1;
+};
+
+#endif /* INTEL_RC6_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 6d05f9c64178..5954ecc3207f 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -27,16 +27,7 @@
 
 #include "i915_drv.h"
 #include "intel_renderstate.h"
-
-struct intel_renderstate {
-	const struct intel_renderstate_rodata *rodata;
-	struct drm_i915_gem_object *obj;
-	struct i915_vma *vma;
-	u32 batch_offset;
-	u32 batch_size;
-	u32 aux_offset;
-	u32 aux_size;
-};
+#include "intel_ring.h"
 
 static const struct intel_renderstate_rodata *
 render_state_get_rodata(const struct intel_engine_cs *engine)
@@ -83,11 +74,11 @@ static int render_state_setup(struct intel_renderstate *so,
 	u32 *d;
 	int ret;
 
-	ret = i915_gem_object_prepare_write(so->obj, &needs_clflush);
+	ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush);
 	if (ret)
 		return ret;
 
-	d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0));
+	d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0));
 
 	while (i < rodata->batch_items) {
 		u32 s = rodata->batch[i];
@@ -165,7 +156,7 @@ static int render_state_setup(struct intel_renderstate *so,
 
 	ret = 0;
 out:
-	i915_gem_object_finish_access(so->obj);
+	i915_gem_object_finish_access(so->vma->obj);
 	return ret;
 
 err:
@@ -176,61 +167,84 @@ err:
 
 #undef OUT_BATCH
 
-int intel_renderstate_emit(struct i915_request *rq)
+int intel_renderstate_init(struct intel_renderstate *so,
+			   struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine = rq->engine;
-	struct intel_renderstate so = {}; /* keep the compiler happy */
+	struct drm_i915_gem_object *obj;
 	int err;
 
-	so.rodata = render_state_get_rodata(engine);
-	if (!so.rodata)
+	memset(so, 0, sizeof(*so));
+
+	so->rodata = render_state_get_rodata(engine);
+	if (!so->rodata)
 		return 0;
 
-	if (so.rodata->batch_items * 4 > PAGE_SIZE)
+	if (so->rodata->batch_items * 4 > PAGE_SIZE)
 		return -EINVAL;
 
-	so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
-	if (IS_ERR(so.obj))
-		return PTR_ERR(so.obj);
+	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
 
-	so.vma = i915_vma_instance(so.obj, &engine->gt->ggtt->vm, NULL);
-	if (IS_ERR(so.vma)) {
-		err = PTR_ERR(so.vma);
+	so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+	if (IS_ERR(so->vma)) {
+		err = PTR_ERR(so->vma);
 		goto err_obj;
 	}
 
-	err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
 		goto err_vma;
 
-	err = render_state_setup(&so, rq->i915);
+	err = render_state_setup(so, engine->i915);
 	if (err)
 		goto err_unpin;
 
+	return 0;
+
+err_unpin:
+	i915_vma_unpin(so->vma);
+err_vma:
+	i915_vma_close(so->vma);
+err_obj:
+	i915_gem_object_put(obj);
+	so->vma = NULL;
+	return err;
+}
+
+int intel_renderstate_emit(struct intel_renderstate *so,
+			   struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	int err;
+
+	if (!so->vma)
+		return 0;
+
 	err = engine->emit_bb_start(rq,
-				    so.batch_offset, so.batch_size,
+				    so->batch_offset, so->batch_size,
 				    I915_DISPATCH_SECURE);
 	if (err)
-		goto err_unpin;
+		return err;
 
-	if (so.aux_size > 8) {
+	if (so->aux_size > 8) {
 		err = engine->emit_bb_start(rq,
-					    so.aux_offset, so.aux_size,
+					    so->aux_offset, so->aux_size,
 					    I915_DISPATCH_SECURE);
 		if (err)
-			goto err_unpin;
+			return err;
 	}
 
-	i915_vma_lock(so.vma);
-	err = i915_request_await_object(rq, so.vma->obj, false);
+	i915_vma_lock(so->vma);
+	err = i915_request_await_object(rq, so->vma->obj, false);
 	if (err == 0)
-		err = i915_vma_move_to_active(so.vma, rq, 0);
-	i915_vma_unlock(so.vma);
-err_unpin:
-	i915_vma_unpin(so.vma);
-err_vma:
-	i915_vma_close(so.vma);
-err_obj:
-	i915_gem_object_put(so.obj);
+		err = i915_vma_move_to_active(so->vma, rq, 0);
+	i915_vma_unlock(so->vma);
+
 	return err;
 }
+
+void intel_renderstate_fini(struct intel_renderstate *so)
+{
+	i915_vma_unpin_and_release(&so->vma, 0);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h
index 8d5079145054..5700be69a05a 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.h
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h
@@ -27,6 +27,8 @@
 #include <linux/types.h>
 
 struct i915_request;
+struct intel_engine_cs;
+struct i915_vma;
 
 struct intel_renderstate_rodata {
 	const u32 *reloc;
@@ -46,6 +48,19 @@ extern const struct intel_renderstate_rodata gen7_null_state;
 extern const struct intel_renderstate_rodata gen8_null_state;
 extern const struct intel_renderstate_rodata gen9_null_state;
 
-int intel_renderstate_emit(struct i915_request *rq);
+struct intel_renderstate {
+	const struct intel_renderstate_rodata *rodata;
+	struct i915_vma *vma;
+	u32 batch_offset;
+	u32 batch_size;
+	u32 aux_offset;
+	u32 aux_size;
+};
+
+int intel_renderstate_init(struct intel_renderstate *so,
+			   struct intel_engine_cs *engine);
+int intel_renderstate_emit(struct intel_renderstate *so,
+			   struct i915_request *rq);
+void intel_renderstate_fini(struct intel_renderstate *so);
 
 #endif /* _INTEL_RENDERSTATE_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 8cea42379dd7..beee0cf89bce 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -21,6 +21,7 @@
 #include "intel_reset.h"
 
 #include "uc/intel_guc.h"
+#include "uc/intel_guc_submission.h"
 
 #define RESET_MAX_RETRIES 3
 
@@ -40,27 +41,29 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
 static void engine_skip_context(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
-	struct i915_gem_context *hung_ctx = rq->gem_context;
+	struct intel_context *hung_ctx = rq->context;
 
 	if (!i915_request_is_active(rq))
 		return;
 
 	lockdep_assert_held(&engine->active.lock);
 	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
-		if (rq->gem_context == hung_ctx)
+		if (rq->context == hung_ctx)
 			i915_request_skip(rq, -EIO);
 }
 
-static void client_mark_guilty(struct drm_i915_file_private *file_priv,
-			       const struct i915_gem_context *ctx)
+static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
 {
-	unsigned int score;
+	struct drm_i915_file_private *file_priv = ctx->file_priv;
 	unsigned long prev_hang;
+	unsigned int score;
+
+	if (IS_ERR_OR_NULL(file_priv))
+		return;
 
-	if (i915_gem_context_is_banned(ctx))
+	score = 0;
+	if (banned)
 		score = I915_CLIENT_SCORE_CONTEXT_BAN;
-	else
-		score = 0;
 
 	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
 	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
@@ -75,17 +78,38 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
 	}
 }
 
-static bool context_mark_guilty(struct i915_gem_context *ctx)
+static bool mark_guilty(struct i915_request *rq)
 {
+	struct i915_gem_context *ctx;
 	unsigned long prev_hang;
 	bool banned;
 	int i;
 
+	rcu_read_lock();
+	ctx = rcu_dereference(rq->context->gem_context);
+	if (ctx && !kref_get_unless_zero(&ctx->ref))
+		ctx = NULL;
+	rcu_read_unlock();
+	if (!ctx)
+		return false;
+
+	if (i915_gem_context_is_closed(ctx)) {
+		intel_context_set_banned(rq->context);
+		banned = true;
+		goto out;
+	}
+
 	atomic_inc(&ctx->guilty_count);
 
 	/* Cool contexts are too cool to be banned! (Used for reset testing.) */
-	if (!i915_gem_context_is_bannable(ctx))
-		return false;
+	if (!i915_gem_context_is_bannable(ctx)) {
+		banned = false;
+		goto out;
+	}
+
+	dev_notice(ctx->i915->drm.dev,
+		   "%s context reset due to GPU hang\n",
+		   ctx->name);
 
 	/* Record the timestamp for the last N hangs */
 	prev_hang = ctx->hang_timestamp[0];
@@ -100,38 +124,43 @@ static bool context_mark_guilty(struct i915_gem_context *ctx)
 	if (banned) {
 		DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
 				 ctx->name, atomic_read(&ctx->guilty_count));
-		i915_gem_context_set_banned(ctx);
+		intel_context_set_banned(rq->context);
 	}
 
-	if (!IS_ERR_OR_NULL(ctx->file_priv))
-		client_mark_guilty(ctx->file_priv, ctx);
+	client_mark_guilty(ctx, banned);
 
+out:
+	i915_gem_context_put(ctx);
 	return banned;
 }
 
-static void context_mark_innocent(struct i915_gem_context *ctx)
+static void mark_innocent(struct i915_request *rq)
 {
-	atomic_inc(&ctx->active_count);
+	struct i915_gem_context *ctx;
+
+	rcu_read_lock();
+	ctx = rcu_dereference(rq->context->gem_context);
+	if (ctx)
+		atomic_inc(&ctx->active_count);
+	rcu_read_unlock();
 }
 
 void __i915_request_reset(struct i915_request *rq, bool guilty)
 {
-	GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n",
-		  rq->engine->name,
-		  rq->fence.context,
-		  rq->fence.seqno,
-		  yesno(guilty));
+	RQ_TRACE(rq, "guilty? %s\n", yesno(guilty));
 
 	GEM_BUG_ON(i915_request_completed(rq));
 
+	rcu_read_lock(); /* protect the GEM context */
 	if (guilty) {
 		i915_request_skip(rq, -EIO);
-		if (context_mark_guilty(rq->gem_context))
+		if (mark_guilty(rq))
 			engine_skip_context(rq);
 	} else {
 		dma_fence_set_error(&rq->fence, -EAGAIN);
-		context_mark_innocent(rq->gem_context);
+		mark_innocent(rq);
 	}
+	rcu_read_unlock();
 }
 
 static bool i915_in_reset(struct pci_dev *pdev)
@@ -218,9 +247,8 @@ out:
 	return ret;
 }
 
-static int ironlake_do_reset(struct intel_gt *gt,
-			     intel_engine_mask_t engine_mask,
-			     unsigned int retry)
+static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
+			unsigned int retry)
 {
 	struct intel_uncore *uncore = gt->uncore;
 	int ret;
@@ -282,14 +310,14 @@ static int gen6_reset_engines(struct intel_gt *gt,
 			      intel_engine_mask_t engine_mask,
 			      unsigned int retry)
 {
-	struct intel_engine_cs *engine;
-	const u32 hw_engine_mask[] = {
+	static const u32 hw_engine_mask[] = {
 		[RCS0]  = GEN6_GRDOM_RENDER,
 		[BCS0]  = GEN6_GRDOM_BLT,
 		[VCS0]  = GEN6_GRDOM_MEDIA,
 		[VCS1]  = GEN8_GRDOM_MEDIA2,
 		[VECS0] = GEN6_GRDOM_VECS,
 	};
+	struct intel_engine_cs *engine;
 	u32 hw_mask;
 
 	if (engine_mask == ALL_ENGINES) {
@@ -298,7 +326,7 @@ static int gen6_reset_engines(struct intel_gt *gt,
 		intel_engine_mask_t tmp;
 
 		hw_mask = 0;
-		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+		for_each_engine_masked(engine, gt, engine_mask, tmp) {
 			GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
 			hw_mask |= hw_engine_mask[engine->id];
 		}
@@ -307,7 +335,7 @@ static int gen6_reset_engines(struct intel_gt *gt,
 	return gen6_hw_domain_reset(gt, hw_mask);
 }
 
-static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
+static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
 {
 	struct intel_uncore *uncore = engine->uncore;
 	u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
@@ -316,6 +344,7 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
 	i915_reg_t sfc_usage;
 	u32 sfc_usage_bit;
 	u32 sfc_reset_bit;
+	int ret;
 
 	switch (engine->class) {
 	case VIDEO_DECODE_CLASS:
@@ -350,27 +379,33 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
 	}
 
 	/*
-	 * Tell the engine that a software reset is going to happen. The engine
-	 * will then try to force lock the SFC (if currently locked, it will
-	 * remain so until we tell the engine it is safe to unlock; if currently
-	 * unlocked, it will ignore this and all new lock requests). If SFC
-	 * ends up being locked to the engine we want to reset, we have to reset
-	 * it as well (we will unlock it once the reset sequence is completed).
+	 * If the engine is using a SFC, tell the engine that a software reset
+	 * is going to happen. The engine will then try to force lock the SFC.
+	 * If SFC ends up being locked to the engine we want to reset, we have
+	 * to reset it as well (we will unlock it once the reset sequence is
+	 * completed).
 	 */
+	if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
+		return 0;
+
 	rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
 
-	if (__intel_wait_for_register_fw(uncore,
-					 sfc_forced_lock_ack,
-					 sfc_forced_lock_ack_bit,
-					 sfc_forced_lock_ack_bit,
-					 1000, 0, NULL)) {
-		DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+	ret = __intel_wait_for_register_fw(uncore,
+					   sfc_forced_lock_ack,
+					   sfc_forced_lock_ack_bit,
+					   sfc_forced_lock_ack_bit,
+					   1000, 0, NULL);
+
+	/* Was the SFC released while we were trying to lock it? */
+	if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
 		return 0;
-	}
 
-	if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)
-		return sfc_reset_bit;
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+		return ret;
+	}
 
+	*hw_mask |= sfc_reset_bit;
 	return 0;
 }
 
@@ -406,7 +441,7 @@ static int gen11_reset_engines(struct intel_gt *gt,
 			       intel_engine_mask_t engine_mask,
 			       unsigned int retry)
 {
-	const u32 hw_engine_mask[] = {
+	static const u32 hw_engine_mask[] = {
 		[RCS0]  = GEN11_GRDOM_RENDER,
 		[BCS0]  = GEN11_GRDOM_BLT,
 		[VCS0]  = GEN11_GRDOM_MEDIA,
@@ -425,17 +460,26 @@ static int gen11_reset_engines(struct intel_gt *gt,
 		hw_mask = GEN11_GRDOM_FULL;
 	} else {
 		hw_mask = 0;
-		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+		for_each_engine_masked(engine, gt, engine_mask, tmp) {
 			GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
 			hw_mask |= hw_engine_mask[engine->id];
-			hw_mask |= gen11_lock_sfc(engine);
+			ret = gen11_lock_sfc(engine, &hw_mask);
+			if (ret)
+				goto sfc_unlock;
 		}
 	}
 
 	ret = gen6_hw_domain_reset(gt, hw_mask);
 
+sfc_unlock:
+	/*
+	 * We unlock the SFC based on the lock status and not the result of
+	 * gen11_lock_sfc to make sure that we clean properly if something
+	 * wrong happened during the lock (e.g. lock acquired after timeout
+	 * expiration).
+	 */
 	if (engine_mask != ALL_ENGINES)
-		for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
+		for_each_engine_masked(engine, gt, engine_mask, tmp)
 			gen11_unlock_sfc(engine);
 
 	return ret;
@@ -494,7 +538,7 @@ static int gen8_reset_engines(struct intel_gt *gt,
 	intel_engine_mask_t tmp;
 	int ret;
 
-	for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+	for_each_engine_masked(engine, gt, engine_mask, tmp) {
 		ret = gen8_engine_reset_prepare(engine);
 		if (ret && !reset_non_ready)
 			goto skip_reset;
@@ -520,24 +564,35 @@ static int gen8_reset_engines(struct intel_gt *gt,
 		ret = gen6_reset_engines(gt, engine_mask, retry);
 
 skip_reset:
-	for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
+	for_each_engine_masked(engine, gt, engine_mask, tmp)
 		gen8_engine_reset_cancel(engine);
 
 	return ret;
 }
 
+static int mock_reset(struct intel_gt *gt,
+		      intel_engine_mask_t mask,
+		      unsigned int retry)
+{
+	return 0;
+}
+
 typedef int (*reset_func)(struct intel_gt *,
 			  intel_engine_mask_t engine_mask,
 			  unsigned int retry);
 
-static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
+static reset_func intel_get_gpu_reset(const struct intel_gt *gt)
 {
-	if (INTEL_GEN(i915) >= 8)
+	struct drm_i915_private *i915 = gt->i915;
+
+	if (is_mock_gt(gt))
+		return mock_reset;
+	else if (INTEL_GEN(i915) >= 8)
 		return gen8_reset_engines;
 	else if (INTEL_GEN(i915) >= 6)
 		return gen6_reset_engines;
 	else if (INTEL_GEN(i915) >= 5)
-		return ironlake_do_reset;
+		return ilk_do_reset;
 	else if (IS_G4X(i915))
 		return g4x_do_reset;
 	else if (IS_G33(i915) || IS_PINEVIEW(i915))
@@ -555,7 +610,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 	int ret = -ETIMEDOUT;
 	int retry;
 
-	reset = intel_get_gpu_reset(gt->i915);
+	reset = intel_get_gpu_reset(gt);
 	if (!reset)
 		return -ENODEV;
 
@@ -565,7 +620,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 	 */
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 	for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
-		GEM_TRACE("engine_mask=%x\n", engine_mask);
+		GT_TRACE(gt, "engine_mask=%x\n", engine_mask);
 		preempt_disable();
 		ret = reset(gt, engine_mask, retry);
 		preempt_enable();
@@ -575,17 +630,20 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 	return ret;
 }
 
-bool intel_has_gpu_reset(struct drm_i915_private *i915)
+bool intel_has_gpu_reset(const struct intel_gt *gt)
 {
 	if (!i915_modparams.reset)
 		return NULL;
 
-	return intel_get_gpu_reset(i915);
+	return intel_get_gpu_reset(gt);
 }
 
-bool intel_has_reset_engine(struct drm_i915_private *i915)
+bool intel_has_reset_engine(const struct intel_gt *gt)
 {
-	return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2;
+	if (i915_modparams.reset < 2)
+		return false;
+
+	return INTEL_INFO(gt->i915)->has_reset_engine;
 }
 
 int intel_reset_guc(struct intel_gt *gt)
@@ -617,7 +675,8 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
 	 * GPU state upon resume, i.e. fail to restart after a reset.
 	 */
 	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
-	engine->reset.prepare(engine);
+	if (engine->reset.prepare)
+		engine->reset.prepare(engine);
 }
 
 static void revoke_mmaps(struct intel_gt *gt)
@@ -637,8 +696,13 @@ static void revoke_mmaps(struct intel_gt *gt)
 			continue;
 
 		GEM_BUG_ON(vma->fence != &gt->ggtt->fence_regs[i]);
-		node = &vma->obj->base.vma_node;
+
+		if (!vma->mmo)
+			continue;
+
+		node = &vma->mmo->vma_node;
 		vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
+
 		unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping,
 				    drm_vma_node_offset_addr(node) + vma_offset,
 				    vma->size,
@@ -652,7 +716,7 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
 	intel_engine_mask_t awake = 0;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		if (intel_engine_pm_get_if_awake(engine))
 			awake |= engine->mask;
 		reset_prepare_engine(engine);
@@ -682,17 +746,18 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
 	if (err)
 		return err;
 
-	for_each_engine(engine, gt->i915, id)
+	for_each_engine(engine, gt, id)
 		__intel_engine_reset(engine, stalled_mask & engine->mask);
 
-	i915_gem_restore_fences(gt->i915);
+	i915_gem_restore_fences(gt->ggtt);
 
 	return err;
 }
 
 static void reset_finish_engine(struct intel_engine_cs *engine)
 {
-	engine->reset.finish(engine);
+	if (engine->reset.finish)
+		engine->reset.finish(engine);
 	intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
 
 	intel_engine_signal_breadcrumbs(engine);
@@ -703,7 +768,7 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		reset_finish_engine(engine);
 		if (awake & engine->mask)
 			intel_engine_pm_put(engine);
@@ -715,8 +780,7 @@ static void nop_submit_request(struct i915_request *request)
 	struct intel_engine_cs *engine = request->engine;
 	unsigned long flags;
 
-	GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
-		  engine->name, request->fence.context, request->fence.seqno);
+	RQ_TRACE(request, "-EIO\n");
 	dma_fence_set_error(&request->fence, -EIO);
 
 	spin_lock_irqsave(&engine->active.lock, flags);
@@ -724,7 +788,7 @@ static void nop_submit_request(struct i915_request *request)
 	i915_request_mark_complete(request);
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 
-	intel_engine_queue_breadcrumbs(engine);
+	intel_engine_signal_breadcrumbs(engine);
 }
 
 static void __intel_gt_set_wedged(struct intel_gt *gt)
@@ -739,11 +803,11 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
 	if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) {
 		struct drm_printer p = drm_debug_printer(__func__);
 
-		for_each_engine(engine, gt->i915, id)
+		for_each_engine(engine, gt, id)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 	}
 
-	GEM_TRACE("start\n");
+	GT_TRACE(gt, "start\n");
 
 	/*
 	 * First, stop submission to hw, but do not yet complete requests by
@@ -756,7 +820,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
 	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
 		__intel_gt_reset(gt, ALL_ENGINES);
 
-	for_each_engine(engine, gt->i915, id)
+	for_each_engine(engine, gt, id)
 		engine->submit_request = nop_submit_request;
 
 	/*
@@ -768,12 +832,13 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
 	set_bit(I915_WEDGED, &gt->reset.flags);
 
 	/* Mark all executing requests as skipped */
-	for_each_engine(engine, gt->i915, id)
-		engine->cancel_requests(engine);
+	for_each_engine(engine, gt, id)
+		if (engine->reset.cancel)
+			engine->reset.cancel(engine);
 
 	reset_finish(gt, awake);
 
-	GEM_TRACE("end\n");
+	GT_TRACE(gt, "end\n");
 }
 
 void intel_gt_set_wedged(struct intel_gt *gt)
@@ -781,7 +846,7 @@ void intel_gt_set_wedged(struct intel_gt *gt)
 	intel_wakeref_t wakeref;
 
 	mutex_lock(&gt->reset.mutex);
-	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
 		__intel_gt_set_wedged(gt);
 	mutex_unlock(&gt->reset.mutex);
 }
@@ -790,15 +855,16 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 {
 	struct intel_gt_timelines *timelines = &gt->timelines;
 	struct intel_timeline *tl;
-	unsigned long flags;
+	bool ok;
 
 	if (!test_bit(I915_WEDGED, &gt->reset.flags))
 		return true;
 
-	if (!gt->scratch) /* Never full initialised, recovery impossible */
+	/* Never fully initialised, recovery impossible */
+	if (test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags))
 		return false;
 
-	GEM_TRACE("start\n");
+	GT_TRACE(gt, "start\n");
 
 	/*
 	 * Before unwedging, make sure that all pending operations
@@ -810,15 +876,15 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 	 *
 	 * No more can be submitted until we reset the wedged bit.
 	 */
-	spin_lock_irqsave(&timelines->lock, flags);
+	spin_lock(&timelines->lock);
 	list_for_each_entry(tl, &timelines->active_list, link) {
-		struct i915_request *rq;
+		struct dma_fence *fence;
 
-		rq = i915_active_request_get_unlocked(&tl->last_request);
-		if (!rq)
+		fence = i915_active_fence_get(&tl->last_request);
+		if (!fence)
 			continue;
 
-		spin_unlock_irqrestore(&timelines->lock, flags);
+		spin_unlock(&timelines->lock);
 
 		/*
 		 * All internal dependencies (i915_requests) will have
@@ -827,16 +893,27 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 		 * (I915_FENCE_TIMEOUT) so this wait should not be unbounded
 		 * in the worst case.
 		 */
-		dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
-		i915_request_put(rq);
+		dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
+		dma_fence_put(fence);
 
 		/* Restart iteration after droping lock */
-		spin_lock_irqsave(&timelines->lock, flags);
+		spin_lock(&timelines->lock);
 		tl = list_entry(&timelines->active_list, typeof(*tl), link);
 	}
-	spin_unlock_irqrestore(&timelines->lock, flags);
+	spin_unlock(&timelines->lock);
 
-	intel_gt_sanitize(gt, false);
+	/* We must reset pending GPU events before restoring our submission */
+	ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
+	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+		ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
+	if (!ok) {
+		/*
+		 * Warn CI about the unrecoverable wedged condition.
+		 * Time for a reboot.
+		 */
+		add_taint_for_CI(TAINT_WARN);
+		return false;
+	}
 
 	/*
 	 * Undo nop_submit_request. We prevent all new i915 requests from
@@ -849,7 +926,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 	 */
 	intel_engines_reset_default_submission(gt);
 
-	GEM_TRACE("end\n");
+	GT_TRACE(gt, "end\n");
 
 	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
 	clear_bit(I915_WEDGED, &gt->reset.flags);
@@ -891,7 +968,7 @@ static int resume(struct intel_gt *gt)
 	enum intel_engine_id id;
 	int ret;
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		ret = engine->resume(engine);
 		if (ret)
 			return ret;
@@ -924,7 +1001,7 @@ void intel_gt_reset(struct intel_gt *gt,
 	intel_engine_mask_t awake;
 	int ret;
 
-	GEM_TRACE("flags=%lx\n", gt->reset.flags);
+	GT_TRACE(gt, "flags=%lx\n", gt->reset.flags);
 
 	might_sleep();
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
@@ -941,7 +1018,7 @@ void intel_gt_reset(struct intel_gt *gt,
 
 	awake = reset_prepare(gt);
 
-	if (!intel_has_gpu_reset(gt->i915)) {
+	if (!intel_has_gpu_reset(gt)) {
 		if (i915_modparams.reset)
 			dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
 		else
@@ -970,7 +1047,7 @@ void intel_gt_reset(struct intel_gt *gt,
 	 * was running at the time of the reset (i.e. we weren't VT
 	 * switched away).
 	 */
-	ret = i915_gem_init_hw(gt->i915);
+	ret = intel_gt_init_hw(gt);
 	if (ret) {
 		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
 			  ret);
@@ -981,8 +1058,6 @@ void intel_gt_reset(struct intel_gt *gt,
 	if (ret)
 		goto taint;
 
-	intel_gt_queue_hangcheck(gt);
-
 finish:
 	reset_finish(gt, awake);
 unlock:
@@ -1029,9 +1104,10 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
 int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
 {
 	struct intel_gt *gt = engine->gt;
+	bool uses_guc = intel_engine_in_guc_submission_mode(engine);
 	int ret;
 
-	GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags);
+	ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags));
 
 	if (!intel_engine_pm_get_if_awake(engine))
@@ -1044,14 +1120,14 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
 			   "Resetting %s for %s\n", engine->name, msg);
 	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
 
-	if (!engine->gt->uc.guc.execbuf_client)
+	if (!uses_guc)
 		ret = intel_gt_reset_engine(engine);
 	else
 		ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
 	if (ret) {
 		/* If we fail here, we expect to fallback to a global reset */
 		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
-				 engine->gt->uc.guc.execbuf_client ? "GuC " : "",
+				 uses_guc ? "GuC " : "",
 				 engine->name, ret);
 		goto out;
 	}
@@ -1073,7 +1149,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
 out:
 	intel_engine_cancel_stop_cs(engine);
 	reset_finish_engine(engine);
-	intel_engine_pm_put(engine);
+	intel_engine_pm_put_async(engine);
 	return ret;
 }
 
@@ -1149,12 +1225,12 @@ void intel_gt_handle_error(struct intel_gt *gt,
 	 * isn't the case at least when we get here by doing a
 	 * simulated reset via debugfs, so get an RPM reference.
 	 */
-	wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
 
 	engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
 
 	if (flags & I915_ERROR_CAPTURE) {
-		i915_capture_error_state(gt->i915, engine_mask, msg);
+		i915_capture_error_state(gt->i915);
 		intel_gt_clear_error_registers(gt, engine_mask);
 	}
 
@@ -1162,8 +1238,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
 	 * Try engine reset when available. We fall back to full reset if
 	 * single reset fails.
 	 */
-	if (intel_has_reset_engine(gt->i915) && !intel_gt_is_wedged(gt)) {
-		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+	if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
+		for_each_engine_masked(engine, gt, engine_mask, tmp) {
 			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
 			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
 					     &gt->reset.flags))
@@ -1191,7 +1267,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
 	synchronize_rcu_expedited();
 
 	/* Prevent any other reset-engine attempt. */
-	for_each_engine(engine, gt->i915, tmp) {
+	for_each_engine(engine, gt, tmp) {
 		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
 					&gt->reset.flags))
 			wait_on_bit(&gt->reset.flags,
@@ -1201,7 +1277,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
 
 	intel_gt_reset_global(gt, engine_mask, msg);
 
-	for_each_engine(engine, gt->i915, tmp)
+	for_each_engine(engine, gt, tmp)
 		clear_bit_unlock(I915_RESET_ENGINE + engine->id,
 				 &gt->reset.flags);
 	clear_bit_unlock(I915_RESET_BACKOFF, &gt->reset.flags);
@@ -1209,7 +1285,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
 	wake_up_all(&gt->reset.queue);
 
 out:
-	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
 }
 
 int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
@@ -1247,14 +1323,10 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
 	if (!intel_gt_is_wedged(gt))
 		return 0;
 
-	/* Reset still in progress? Maybe we will recover? */
-	if (!test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
+	if (intel_gt_has_init_error(gt))
 		return -EIO;
 
-	/* XXX intel_reset_finish() still takes struct_mutex!!! */
-	if (mutex_is_locked(&gt->i915->drm.struct_mutex))
-		return -EAGAIN;
-
+	/* Reset still in progress? Maybe we will recover? */
 	if (wait_event_interruptible(gt->reset.queue,
 				     !test_bit(I915_RESET_BACKOFF,
 					       &gt->reset.flags)))
@@ -1263,11 +1335,22 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
 	return intel_gt_is_wedged(gt) ? -EIO : 0;
 }
 
+void intel_gt_set_wedged_on_init(struct intel_gt *gt)
+{
+	BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES >
+		     I915_WEDGED_ON_INIT);
+	intel_gt_set_wedged(gt);
+	set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
+}
+
 void intel_gt_init_reset(struct intel_gt *gt)
 {
 	init_waitqueue_head(&gt->reset.queue);
 	mutex_init(&gt->reset.mutex);
 	init_srcu_struct(&gt->reset.backoff_srcu);
+
+	/* no GPU until we are ready! */
+	__set_bit(I915_WEDGED, &gt->reset.flags);
 }
 
 void intel_gt_fini_reset(struct intel_gt *gt)
@@ -1306,4 +1389,5 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_reset.c"
+#include "selftest_hangcheck.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 52c00199e069..8e8d5f761166 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -14,7 +14,6 @@
 #include "intel_engine_types.h"
 #include "intel_reset_types.h"
 
-struct drm_i915_private;
 struct i915_request;
 struct intel_engine_cs;
 struct intel_gt;
@@ -45,6 +44,12 @@ void intel_gt_set_wedged(struct intel_gt *gt);
 bool intel_gt_unset_wedged(struct intel_gt *gt);
 int intel_gt_terminally_wedged(struct intel_gt *gt);
 
+/*
+ * There's no unset_wedged_on_init paired with this one.
+ * Once we're wedged on init, there's no going back.
+ */
+void intel_gt_set_wedged_on_init(struct intel_gt *gt);
+
 int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask);
 
 int intel_reset_guc(struct intel_gt *gt);
@@ -68,10 +73,13 @@ void __intel_fini_wedge(struct intel_wedge_me *w);
 
 static inline bool __intel_reset_failed(const struct intel_reset *reset)
 {
+	GEM_BUG_ON(test_bit(I915_WEDGED_ON_INIT, &reset->flags) ?
+		   !test_bit(I915_WEDGED, &reset->flags) : false);
+
 	return unlikely(test_bit(I915_WEDGED, &reset->flags));
 }
 
-bool intel_has_gpu_reset(struct drm_i915_private *i915);
-bool intel_has_reset_engine(struct drm_i915_private *i915);
+bool intel_has_gpu_reset(const struct intel_gt *gt);
+bool intel_has_reset_engine(const struct intel_gt *gt);
 
 #endif /* I915_RESET_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h
index 31968356e0c0..f43bc3a0fe4f 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h
@@ -29,11 +29,17 @@ struct intel_reset {
 	 * we set the #I915_WEDGED bit. Prior to command submission, e.g.
 	 * i915_request_alloc(), this bit is checked and the sequence
 	 * aborted (with -EIO reported to userspace) if set.
+	 *
+	 * #I915_WEDGED_ON_INIT - If we fail to initialize the GPU we can no
+	 * longer use the GPU - similar to #I915_WEDGED bit. The difference in
+	 * in the way we're handling "forced" unwedged (e.g. through debugfs),
+	 * which is not allowed in case we failed to initialize.
 	 */
 	unsigned long flags;
 #define I915_RESET_BACKOFF	0
 #define I915_RESET_MODESET	1
 #define I915_RESET_ENGINE	2
+#define I915_WEDGED_ON_INIT	(BITS_PER_LONG - 2)
 #define I915_WEDGED		(BITS_PER_LONG - 1)
 
 	struct mutex mutex; /* serialises wedging/unwedging */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
new file mode 100644
index 000000000000..374b28f13ca0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -0,0 +1,318 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_object.h"
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_engine.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
+
+unsigned int intel_ring_update_space(struct intel_ring *ring)
+{
+	unsigned int space;
+
+	space = __intel_ring_space(ring->head, ring->emit, ring->size);
+
+	ring->space = space;
+	return space;
+}
+
+int intel_ring_pin(struct intel_ring *ring)
+{
+	struct i915_vma *vma = ring->vma;
+	unsigned int flags;
+	void *addr;
+	int ret;
+
+	if (atomic_fetch_inc(&ring->pin_count))
+		return 0;
+
+	flags = PIN_GLOBAL;
+
+	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
+	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+
+	if (vma->obj->stolen)
+		flags |= PIN_MAPPABLE;
+	else
+		flags |= PIN_HIGH;
+
+	ret = i915_vma_pin(vma, 0, 0, flags);
+	if (unlikely(ret))
+		goto err_unpin;
+
+	if (i915_vma_is_map_and_fenceable(vma))
+		addr = (void __force *)i915_vma_pin_iomap(vma);
+	else
+		addr = i915_gem_object_pin_map(vma->obj,
+					       i915_coherent_map_type(vma->vm->i915));
+	if (IS_ERR(addr)) {
+		ret = PTR_ERR(addr);
+		goto err_ring;
+	}
+
+	i915_vma_make_unshrinkable(vma);
+
+	/* Discard any unused bytes beyond that submitted to hw. */
+	intel_ring_reset(ring, ring->emit);
+
+	ring->vaddr = addr;
+	return 0;
+
+err_ring:
+	i915_vma_unpin(vma);
+err_unpin:
+	atomic_dec(&ring->pin_count);
+	return ret;
+}
+
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+	tail = intel_ring_wrap(ring, tail);
+	ring->tail = tail;
+	ring->head = tail;
+	ring->emit = tail;
+	intel_ring_update_space(ring);
+}
+
+void intel_ring_unpin(struct intel_ring *ring)
+{
+	struct i915_vma *vma = ring->vma;
+
+	if (!atomic_dec_and_test(&ring->pin_count))
+		return;
+
+	i915_vma_unset_ggtt_write(vma);
+	if (i915_vma_is_map_and_fenceable(vma))
+		i915_vma_unpin_iomap(vma);
+	else
+		i915_gem_object_unpin_map(vma->obj);
+
+	i915_vma_make_purgeable(vma);
+	i915_vma_unpin(vma);
+}
+
+static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+{
+	struct i915_address_space *vm = &ggtt->vm;
+	struct drm_i915_private *i915 = vm->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	obj = ERR_PTR(-ENODEV);
+	if (i915_ggtt_has_aperture(ggtt))
+		obj = i915_gem_object_create_stolen(i915, size);
+	if (IS_ERR(obj))
+		obj = i915_gem_object_create_internal(i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	/*
+	 * Mark ring buffers as read-only from GPU side (so no stray overwrites)
+	 * if supported by the platform's GGTT.
+	 */
+	if (vm->has_read_only)
+		i915_gem_object_set_readonly(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma))
+		goto err;
+
+	return vma;
+
+err:
+	i915_gem_object_put(obj);
+	return vma;
+}
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_ring *ring;
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!is_power_of_2(size));
+	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
+
+	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+	if (!ring)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&ring->ref);
+	ring->size = size;
+
+	/*
+	 * Workaround an erratum on the i830 which causes a hang if
+	 * the TAIL pointer points to within the last 2 cachelines
+	 * of the buffer.
+	 */
+	ring->effective_size = size;
+	if (IS_I830(i915) || IS_I845G(i915))
+		ring->effective_size -= 2 * CACHELINE_BYTES;
+
+	intel_ring_update_space(ring);
+
+	vma = create_ring_vma(engine->gt->ggtt, size);
+	if (IS_ERR(vma)) {
+		kfree(ring);
+		return ERR_CAST(vma);
+	}
+	ring->vma = vma;
+
+	return ring;
+}
+
+void intel_ring_free(struct kref *ref)
+{
+	struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
+
+	i915_vma_put(ring->vma);
+	kfree(ring);
+}
+
+static noinline int
+wait_for_space(struct intel_ring *ring,
+	       struct intel_timeline *tl,
+	       unsigned int bytes)
+{
+	struct i915_request *target;
+	long timeout;
+
+	if (intel_ring_update_space(ring) >= bytes)
+		return 0;
+
+	GEM_BUG_ON(list_empty(&tl->requests));
+	list_for_each_entry(target, &tl->requests, link) {
+		if (target->ring != ring)
+			continue;
+
+		/* Would completion of this request free enough space? */
+		if (bytes <= __intel_ring_space(target->postfix,
+						ring->emit, ring->size))
+			break;
+	}
+
+	if (GEM_WARN_ON(&target->link == &tl->requests))
+		return -ENOSPC;
+
+	timeout = i915_request_wait(target,
+				    I915_WAIT_INTERRUPTIBLE,
+				    MAX_SCHEDULE_TIMEOUT);
+	if (timeout < 0)
+		return timeout;
+
+	i915_request_retire_upto(target);
+
+	intel_ring_update_space(ring);
+	GEM_BUG_ON(ring->space < bytes);
+	return 0;
+}
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
+{
+	struct intel_ring *ring = rq->ring;
+	const unsigned int remain_usable = ring->effective_size - ring->emit;
+	const unsigned int bytes = num_dwords * sizeof(u32);
+	unsigned int need_wrap = 0;
+	unsigned int total_bytes;
+	u32 *cs;
+
+	/* Packets must be qword aligned. */
+	GEM_BUG_ON(num_dwords & 1);
+
+	total_bytes = bytes + rq->reserved_space;
+	GEM_BUG_ON(total_bytes > ring->effective_size);
+
+	if (unlikely(total_bytes > remain_usable)) {
+		const int remain_actual = ring->size - ring->emit;
+
+		if (bytes > remain_usable) {
+			/*
+			 * Not enough space for the basic request. So need to
+			 * flush out the remainder and then wait for
+			 * base + reserved.
+			 */
+			total_bytes += remain_actual;
+			need_wrap = remain_actual | 1;
+		} else  {
+			/*
+			 * The base request will fit but the reserved space
+			 * falls off the end. So we don't need an immediate
+			 * wrap and only need to effectively wait for the
+			 * reserved size from the start of ringbuffer.
+			 */
+			total_bytes = rq->reserved_space + remain_actual;
+		}
+	}
+
+	if (unlikely(total_bytes > ring->space)) {
+		int ret;
+
+		/*
+		 * Space is reserved in the ringbuffer for finalising the
+		 * request, as that cannot be allowed to fail. During request
+		 * finalisation, reserved_space is set to 0 to stop the
+		 * overallocation and the assumption is that then we never need
+		 * to wait (which has the risk of failing with EINTR).
+		 *
+		 * See also i915_request_alloc() and i915_request_add().
+		 */
+		GEM_BUG_ON(!rq->reserved_space);
+
+		ret = wait_for_space(ring,
+				     i915_request_timeline(rq),
+				     total_bytes);
+		if (unlikely(ret))
+			return ERR_PTR(ret);
+	}
+
+	if (unlikely(need_wrap)) {
+		need_wrap &= ~1;
+		GEM_BUG_ON(need_wrap > ring->space);
+		GEM_BUG_ON(ring->emit + need_wrap > ring->size);
+		GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
+
+		/* Fill the tail with MI_NOOP */
+		memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
+		ring->space -= need_wrap;
+		ring->emit = 0;
+	}
+
+	GEM_BUG_ON(ring->emit > ring->size - bytes);
+	GEM_BUG_ON(ring->space < bytes);
+	cs = ring->vaddr + ring->emit;
+	GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
+	ring->emit += bytes;
+	ring->space -= bytes;
+
+	return cs;
+}
+
+/* Align the ring tail to a cacheline boundary */
+int intel_ring_cacheline_align(struct i915_request *rq)
+{
+	int num_dwords;
+	void *cs;
+
+	num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
+	if (num_dwords == 0)
+		return 0;
+
+	num_dwords = CACHELINE_DWORDS - num_dwords;
+	GEM_BUG_ON(num_dwords & 1);
+
+	cs = intel_ring_begin(rq, num_dwords);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
+	intel_ring_advance(rq, cs + num_dwords);
+
+	GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
new file mode 100644
index 000000000000..ea2839d9e044
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -0,0 +1,131 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_H
+#define INTEL_RING_H
+
+#include "i915_gem.h" /* GEM_BUG_ON */
+#include "i915_request.h"
+#include "intel_ring_types.h"
+
+struct intel_engine_cs;
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size);
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
+int intel_ring_cacheline_align(struct i915_request *rq);
+
+unsigned int intel_ring_update_space(struct intel_ring *ring);
+
+int intel_ring_pin(struct intel_ring *ring);
+void intel_ring_unpin(struct intel_ring *ring);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+
+void intel_ring_free(struct kref *ref);
+
+static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
+{
+	kref_get(&ring->ref);
+	return ring;
+}
+
+static inline void intel_ring_put(struct intel_ring *ring)
+{
+	kref_put(&ring->ref, intel_ring_free);
+}
+
+static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
+{
+	/* Dummy function.
+	 *
+	 * This serves as a placeholder in the code so that the reader
+	 * can compare against the preceding intel_ring_begin() and
+	 * check that the number of dwords emitted matches the space
+	 * reserved for the command packet (i.e. the value passed to
+	 * intel_ring_begin()).
+	 */
+	GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
+}
+
+static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+	return pos & (ring->size - 1);
+}
+
+static inline bool
+intel_ring_offset_valid(const struct intel_ring *ring,
+			unsigned int pos)
+{
+	if (pos & -ring->size) /* must be strictly within the ring */
+		return false;
+
+	if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
+		return false;
+
+	return true;
+}
+
+static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
+{
+	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
+	u32 offset = addr - rq->ring->vaddr;
+	GEM_BUG_ON(offset > rq->ring->size);
+	return intel_ring_wrap(rq->ring, offset);
+}
+
+static inline void
+assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
+{
+	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
+
+	/*
+	 * "Ring Buffer Use"
+	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
+	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
+	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
+	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+	 * same cacheline, the Head Pointer must not be greater than the Tail
+	 * Pointer."
+	 *
+	 * We use ring->head as the last known location of the actual RING_HEAD,
+	 * it may have advanced but in the worst case it is equally the same
+	 * as ring->head and so we should never program RING_TAIL to advance
+	 * into the same cacheline as ring->head.
+	 */
+#define cacheline(a) round_down(a, CACHELINE_BYTES)
+	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
+		   tail < ring->head);
+#undef cacheline
+}
+
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+	/* Whilst writes to the tail are strictly order, there is no
+	 * serialisation between readers and the writers. The tail may be
+	 * read by i915_request_retire() just as it is being updated
+	 * by execlists, as although the breadcrumb is complete, the context
+	 * switch hasn't been seen.
+	 */
+	assert_ring_tail_valid(ring, tail);
+	ring->tail = tail;
+	return tail;
+}
+
+static inline unsigned int
+__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
+{
+	/*
+	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+	 * same cacheline, the Head Pointer must not be greater than the Tail
+	 * Pointer."
+	 */
+	GEM_BUG_ON(!is_power_of_2(size));
+	return (head - tail - CACHELINE_BYTES) & (size - 1);
+}
+
+#endif /* INTEL_RING_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
new file mode 100644
index 000000000000..bc44fe8e5ffa
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -0,0 +1,2009 @@
+/*
+ * Copyright © 2008-2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Zou Nan hai <nanhai.zou@intel.com>
+ *    Xiang Hai hao<haihao.xiang@intel.com>
+ *
+ */
+
+#include <linux/log2.h>
+
+#include <drm/i915_drm.h>
+
+#include "gem/i915_gem_context.h"
+
+#include "gen6_ppgtt.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_context.h"
+#include "intel_gt.h"
+#include "intel_gt_irq.h"
+#include "intel_gt_pm_irq.h"
+#include "intel_reset.h"
+#include "intel_ring.h"
+#include "intel_workarounds.h"
+
+/* Rough estimate of the typical request size, performing a flush,
+ * set-context and then emitting the batch.
+ */
+#define LEGACY_REQUEST_SIZE 200
+
+static int
+gen2_render_ring_flush(struct i915_request *rq, u32 mode)
+{
+	unsigned int num_store_dw;
+	u32 cmd, *cs;
+
+	cmd = MI_FLUSH;
+	num_store_dw = 0;
+	if (mode & EMIT_INVALIDATE)
+		cmd |= MI_READ_FLUSH;
+	if (mode & EMIT_FLUSH)
+		num_store_dw = 4;
+
+	cs = intel_ring_begin(rq, 2 + 3 * num_store_dw);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = cmd;
+	while (num_store_dw--) {
+		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+		*cs++ = intel_gt_scratch_offset(rq->engine->gt,
+						INTEL_GT_SCRATCH_FIELD_DEFAULT);
+		*cs++ = 0;
+	}
+	*cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen4_render_ring_flush(struct i915_request *rq, u32 mode)
+{
+	u32 cmd, *cs;
+	int i;
+
+	/*
+	 * read/write caches:
+	 *
+	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
+	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
+	 * also flushed at 2d versus 3d pipeline switches.
+	 *
+	 * read-only caches:
+	 *
+	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
+	 * MI_READ_FLUSH is set, and is always flushed on 965.
+	 *
+	 * I915_GEM_DOMAIN_COMMAND may not exist?
+	 *
+	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
+	 * invalidated when MI_EXE_FLUSH is set.
+	 *
+	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
+	 * invalidated with every MI_FLUSH.
+	 *
+	 * TLBs:
+	 *
+	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
+	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
+	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
+	 * are flushed at any MI_FLUSH.
+	 */
+
+	cmd = MI_FLUSH;
+	if (mode & EMIT_INVALIDATE) {
+		cmd |= MI_EXE_FLUSH;
+		if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5))
+			cmd |= MI_INVALIDATE_ISP;
+	}
+
+	i = 2;
+	if (mode & EMIT_INVALIDATE)
+		i += 20;
+
+	cs = intel_ring_begin(rq, i);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = cmd;
+
+	/*
+	 * A random delay to let the CS invalidate take effect? Without this
+	 * delay, the GPU relocation path fails as the CS does not see
+	 * the updated contents. Just as important, if we apply the flushes
+	 * to the EMIT_FLUSH branch (i.e. immediately after the relocation
+	 * write and before the invalidate on the next batch), the relocations
+	 * still fail. This implies that is a delay following invalidation
+	 * that is required to reset the caches as opposed to a delay to
+	 * ensure the memory is written.
+	 */
+	if (mode & EMIT_INVALIDATE) {
+		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
+		*cs++ = intel_gt_scratch_offset(rq->engine->gt,
+						INTEL_GT_SCRATCH_FIELD_DEFAULT) |
+			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = 0;
+		*cs++ = 0;
+
+		for (i = 0; i < 12; i++)
+			*cs++ = MI_FLUSH;
+
+		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
+		*cs++ = intel_gt_scratch_offset(rq->engine->gt,
+						INTEL_GT_SCRATCH_FIELD_DEFAULT) |
+			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = 0;
+		*cs++ = 0;
+	}
+
+	*cs++ = cmd;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+/*
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6.  From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
+ */
+static int
+gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
+{
+	u32 scratch_addr =
+		intel_gt_scratch_offset(rq->engine->gt,
+					INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 6);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = GFX_OP_PIPE_CONTROL(5);
+	*cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+	*cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = 0; /* low dword */
+	*cs++ = 0; /* high dword */
+	*cs++ = MI_NOOP;
+	intel_ring_advance(rq, cs);
+
+	cs = intel_ring_begin(rq, 6);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = GFX_OP_PIPE_CONTROL(5);
+	*cs++ = PIPE_CONTROL_QW_WRITE;
+	*cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = MI_NOOP;
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen6_render_ring_flush(struct i915_request *rq, u32 mode)
+{
+	u32 scratch_addr =
+		intel_gt_scratch_offset(rq->engine->gt,
+					INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
+	u32 *cs, flags = 0;
+	int ret;
+
+	/* Force SNB workarounds for PIPE_CONTROL flushes */
+	ret = gen6_emit_post_sync_nonzero_flush(rq);
+	if (ret)
+		return ret;
+
+	/* Just flush everything.  Experiments have shown that reducing the
+	 * number of bits based on the write domains has little performance
+	 * impact.
+	 */
+	if (mode & EMIT_FLUSH) {
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		/*
+		 * Ensure that any following seqno writes only happen
+		 * when the render cache is indeed flushed.
+		 */
+		flags |= PIPE_CONTROL_CS_STALL;
+	}
+	if (mode & EMIT_INVALIDATE) {
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		/*
+		 * TLB invalidate requires a post-sync write.
+		 */
+		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
+	}
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = flags;
+	*cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = 0;
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	/* First we do the gen6_emit_post_sync_nonzero_flush w/a */
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_QW_WRITE;
+	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
+					INTEL_GT_SCRATCH_FIELD_DEFAULT) |
+		PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = 0;
+
+	/* Finally we can flush and with it emit the breadcrumb */
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		 PIPE_CONTROL_DC_FLUSH_ENABLE |
+		 PIPE_CONTROL_QW_WRITE |
+		 PIPE_CONTROL_CS_STALL);
+	*cs++ = i915_request_active_timeline(rq)->hwsp_offset |
+		PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
+}
+
+static int
+gen7_render_ring_cs_stall_wa(struct i915_request *rq)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+	*cs++ = 0;
+	*cs++ = 0;
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen7_render_ring_flush(struct i915_request *rq, u32 mode)
+{
+	u32 scratch_addr =
+		intel_gt_scratch_offset(rq->engine->gt,
+					INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
+	u32 *cs, flags = 0;
+
+	/*
+	 * Ensure that any following seqno writes only happen when the render
+	 * cache is indeed flushed.
+	 *
+	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
+	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
+	 * don't try to be clever and just set it unconditionally.
+	 */
+	flags |= PIPE_CONTROL_CS_STALL;
+
+	/*
+	 * CS_STALL suggests at least a post-sync write.
+	 */
+	flags |= PIPE_CONTROL_QW_WRITE;
+	flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+	/* Just flush everything.  Experiments have shown that reducing the
+	 * number of bits based on the write domains has little performance
+	 * impact.
+	 */
+	if (mode & EMIT_FLUSH) {
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_FLUSH_ENABLE;
+	}
+	if (mode & EMIT_INVALIDATE) {
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
+
+		/* Workaround: we must issue a pipe_control with CS-stall bit
+		 * set before a pipe_control command that has the state cache
+		 * invalidate bit set. */
+		gen7_render_ring_cs_stall_wa(rq);
+	}
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = flags;
+	*cs++ = scratch_addr;
+	*cs++ = 0;
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		 PIPE_CONTROL_DC_FLUSH_ENABLE |
+		 PIPE_CONTROL_FLUSH_ENABLE |
+		 PIPE_CONTROL_QW_WRITE |
+		 PIPE_CONTROL_GLOBAL_GTT_IVB |
+		 PIPE_CONTROL_CS_STALL);
+	*cs++ = i915_request_active_timeline(rq)->hwsp_offset;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
+}
+
+static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = MI_USER_INTERRUPT;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
+}
+
+#define GEN7_XCS_WA 32
+static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	int i;
+
+	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+	*cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB |
+		MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = rq->fence.seqno;
+
+	for (i = 0; i < GEN7_XCS_WA; i++) {
+		*cs++ = MI_STORE_DWORD_INDEX;
+		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+		*cs++ = rq->fence.seqno;
+	}
+
+	*cs++ = MI_FLUSH_DW;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
+}
+#undef GEN7_XCS_WA
+
+static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
+{
+	/*
+	 * Keep the render interrupt unmasked as this papers over
+	 * lost interrupts following a reset.
+	 */
+	if (engine->class == RENDER_CLASS) {
+		if (INTEL_GEN(engine->i915) >= 6)
+			mask &= ~BIT(0);
+		else
+			mask &= ~I915_USER_INTERRUPT;
+	}
+
+	intel_engine_set_hwsp_writemask(engine, mask);
+}
+
+static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
+{
+	u32 addr;
+
+	addr = lower_32_bits(phys);
+	if (INTEL_GEN(engine->i915) >= 4)
+		addr |= (phys >> 28) & 0xf0;
+
+	intel_uncore_write(engine->uncore, HWS_PGA, addr);
+}
+
+static struct page *status_page(struct intel_engine_cs *engine)
+{
+	struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	return sg_page(obj->mm.pages->sgl);
+}
+
+static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+{
+	set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
+	set_hwstam(engine, ~0u);
+}
+
+static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
+{
+	i915_reg_t hwsp;
+
+	/*
+	 * The ring status page addresses are no longer next to the rest of
+	 * the ring registers as of gen7.
+	 */
+	if (IS_GEN(engine->i915, 7)) {
+		switch (engine->id) {
+		/*
+		 * No more rings exist on Gen7. Default case is only to shut up
+		 * gcc switch check warning.
+		 */
+		default:
+			GEM_BUG_ON(engine->id);
+			/* fallthrough */
+		case RCS0:
+			hwsp = RENDER_HWS_PGA_GEN7;
+			break;
+		case BCS0:
+			hwsp = BLT_HWS_PGA_GEN7;
+			break;
+		case VCS0:
+			hwsp = BSD_HWS_PGA_GEN7;
+			break;
+		case VECS0:
+			hwsp = VEBOX_HWS_PGA_GEN7;
+			break;
+		}
+	} else if (IS_GEN(engine->i915, 6)) {
+		hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
+	} else {
+		hwsp = RING_HWS_PGA(engine->mmio_base);
+	}
+
+	intel_uncore_write(engine->uncore, hwsp, offset);
+	intel_uncore_posting_read(engine->uncore, hwsp);
+}
+
+static void flush_cs_tlb(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	if (!IS_GEN_RANGE(dev_priv, 6, 7))
+		return;
+
+	/* ring should be idle before issuing a sync flush*/
+	WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+
+	ENGINE_WRITE(engine, RING_INSTPM,
+		     _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
+					INSTPM_SYNC_FLUSH));
+	if (intel_wait_for_register(engine->uncore,
+				    RING_INSTPM(engine->mmio_base),
+				    INSTPM_SYNC_FLUSH, 0,
+				    1000))
+		DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
+			  engine->name);
+}
+
+static void ring_setup_status_page(struct intel_engine_cs *engine)
+{
+	set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
+	set_hwstam(engine, ~0u);
+
+	flush_cs_tlb(engine);
+}
+
+static bool stop_ring(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	if (INTEL_GEN(dev_priv) > 2) {
+		ENGINE_WRITE(engine,
+			     RING_MI_MODE, _MASKED_BIT_ENABLE(STOP_RING));
+		if (intel_wait_for_register(engine->uncore,
+					    RING_MI_MODE(engine->mmio_base),
+					    MODE_IDLE,
+					    MODE_IDLE,
+					    1000)) {
+			DRM_ERROR("%s : timed out trying to stop ring\n",
+				  engine->name);
+
+			/*
+			 * Sometimes we observe that the idle flag is not
+			 * set even though the ring is empty. So double
+			 * check before giving up.
+			 */
+			if (ENGINE_READ(engine, RING_HEAD) !=
+			    ENGINE_READ(engine, RING_TAIL))
+				return false;
+		}
+	}
+
+	ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL));
+
+	ENGINE_WRITE(engine, RING_HEAD, 0);
+	ENGINE_WRITE(engine, RING_TAIL, 0);
+
+	/* The ring must be empty before it is disabled */
+	ENGINE_WRITE(engine, RING_CTL, 0);
+
+	return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
+}
+
+static int xcs_resume(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct intel_ring *ring = engine->legacy.ring;
+	int ret = 0;
+
+	ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
+		     ring->head, ring->tail);
+
+	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
+
+	/* WaClearRingBufHeadRegAtInit:ctg,elk */
+	if (!stop_ring(engine)) {
+		/* G45 ring initialization often fails to reset head to zero */
+		DRM_DEBUG_DRIVER("%s head not reset to zero "
+				"ctl %08x head %08x tail %08x start %08x\n",
+				engine->name,
+				ENGINE_READ(engine, RING_CTL),
+				ENGINE_READ(engine, RING_HEAD),
+				ENGINE_READ(engine, RING_TAIL),
+				ENGINE_READ(engine, RING_START));
+
+		if (!stop_ring(engine)) {
+			DRM_ERROR("failed to set %s head to zero "
+				  "ctl %08x head %08x tail %08x start %08x\n",
+				  engine->name,
+				  ENGINE_READ(engine, RING_CTL),
+				  ENGINE_READ(engine, RING_HEAD),
+				  ENGINE_READ(engine, RING_TAIL),
+				  ENGINE_READ(engine, RING_START));
+			ret = -EIO;
+			goto out;
+		}
+	}
+
+	if (HWS_NEEDS_PHYSICAL(dev_priv))
+		ring_setup_phys_status_page(engine);
+	else
+		ring_setup_status_page(engine);
+
+	intel_engine_reset_breadcrumbs(engine);
+
+	/* Enforce ordering by reading HEAD register back */
+	ENGINE_POSTING_READ(engine, RING_HEAD);
+
+	/*
+	 * Initialize the ring. This must happen _after_ we've cleared the ring
+	 * registers with the above sequence (the readback of the HEAD registers
+	 * also enforces ordering), otherwise the hw might lose the new ring
+	 * register values.
+	 */
+	ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma));
+
+	/* Check that the ring offsets point within the ring! */
+	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
+	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
+	intel_ring_update_space(ring);
+
+	/* First wake the ring up to an empty/idle ring */
+	ENGINE_WRITE(engine, RING_HEAD, ring->head);
+	ENGINE_WRITE(engine, RING_TAIL, ring->head);
+	ENGINE_POSTING_READ(engine, RING_TAIL);
+
+	ENGINE_WRITE(engine, RING_CTL, RING_CTL_SIZE(ring->size) | RING_VALID);
+
+	/* If the head is still not zero, the ring is dead */
+	if (intel_wait_for_register(engine->uncore,
+				    RING_CTL(engine->mmio_base),
+				    RING_VALID, RING_VALID,
+				    50)) {
+		DRM_ERROR("%s initialization failed "
+			  "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
+			  engine->name,
+			  ENGINE_READ(engine, RING_CTL),
+			  ENGINE_READ(engine, RING_CTL) & RING_VALID,
+			  ENGINE_READ(engine, RING_HEAD), ring->head,
+			  ENGINE_READ(engine, RING_TAIL), ring->tail,
+			  ENGINE_READ(engine, RING_START),
+			  i915_ggtt_offset(ring->vma));
+		ret = -EIO;
+		goto out;
+	}
+
+	if (INTEL_GEN(dev_priv) > 2)
+		ENGINE_WRITE(engine,
+			     RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
+
+	/* Now awake, let it get started */
+	if (ring->tail != ring->head) {
+		ENGINE_WRITE(engine, RING_TAIL, ring->tail);
+		ENGINE_POSTING_READ(engine, RING_TAIL);
+	}
+
+	/* Papering over lost _interrupts_ immediately following the restart */
+	intel_engine_signal_breadcrumbs(engine);
+out:
+	intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
+
+	return ret;
+}
+
+static void reset_prepare(struct intel_engine_cs *engine)
+{
+	struct intel_uncore *uncore = engine->uncore;
+	const u32 base = engine->mmio_base;
+
+	/*
+	 * We stop engines, otherwise we might get failed reset and a
+	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
+	 * from system hang if batchbuffer is progressing when
+	 * the reset is issued, regardless of READY_TO_RESET ack.
+	 * Thus assume it is best to stop engines on all gens
+	 * where we have a gpu reset.
+	 *
+	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+	 *
+	 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
+	 *
+	 * FIXME: Wa for more modern gens needs to be validated
+	 */
+	ENGINE_TRACE(engine, "\n");
+
+	if (intel_engine_stop_cs(engine))
+		ENGINE_TRACE(engine, "timed out on STOP_RING\n");
+
+	intel_uncore_write_fw(uncore,
+			      RING_HEAD(base),
+			      intel_uncore_read_fw(uncore, RING_TAIL(base)));
+	intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */
+
+	intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
+	intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
+	intel_uncore_posting_read_fw(uncore, RING_TAIL(base));
+
+	/* The ring must be empty before it is disabled */
+	intel_uncore_write_fw(uncore, RING_CTL(base), 0);
+
+	/* Check acts as a post */
+	if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
+		ENGINE_TRACE(engine, "ring head [%x] not parked\n",
+			     intel_uncore_read_fw(uncore, RING_HEAD(base)));
+}
+
+static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
+{
+	struct i915_request *pos, *rq;
+	unsigned long flags;
+	u32 head;
+
+	rq = NULL;
+	spin_lock_irqsave(&engine->active.lock, flags);
+	list_for_each_entry(pos, &engine->active.requests, sched.link) {
+		if (!i915_request_completed(pos)) {
+			rq = pos;
+			break;
+		}
+	}
+
+	/*
+	 * The guilty request will get skipped on a hung engine.
+	 *
+	 * Users of client default contexts do not rely on logical
+	 * state preserved between batches so it is safe to execute
+	 * queued requests following the hang. Non default contexts
+	 * rely on preserved state, so skipping a batch loses the
+	 * evolution of the state and it needs to be considered corrupted.
+	 * Executing more queued batches on top of corrupted state is
+	 * risky. But we take the risk by trying to advance through
+	 * the queued requests in order to make the client behaviour
+	 * more predictable around resets, by not throwing away random
+	 * amount of batches it has prepared for execution. Sophisticated
+	 * clients can use gem_reset_stats_ioctl and dma fence status
+	 * (exported via sync_file info ioctl on explicit fences) to observe
+	 * when it loses the context state and should rebuild accordingly.
+	 *
+	 * The context ban, and ultimately the client ban, mechanism are safety
+	 * valves if client submission ends up resulting in nothing more than
+	 * subsequent hangs.
+	 */
+
+	if (rq) {
+		/*
+		 * Try to restore the logical GPU state to match the
+		 * continuation of the request queue. If we skip the
+		 * context/PD restore, then the next request may try to execute
+		 * assuming that its context is valid and loaded on the GPU and
+		 * so may try to access invalid memory, prompting repeated GPU
+		 * hangs.
+		 *
+		 * If the request was guilty, we still restore the logical
+		 * state in case the next request requires it (e.g. the
+		 * aliasing ppgtt), but skip over the hung batch.
+		 *
+		 * If the request was innocent, we try to replay the request
+		 * with the restored context.
+		 */
+		__i915_request_reset(rq, stalled);
+
+		GEM_BUG_ON(rq->ring != engine->legacy.ring);
+		head = rq->head;
+	} else {
+		head = engine->legacy.ring->tail;
+	}
+	engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head);
+
+	spin_unlock_irqrestore(&engine->active.lock, flags);
+}
+
+static void reset_finish(struct intel_engine_cs *engine)
+{
+}
+
+static int rcs_resume(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_uncore *uncore = engine->uncore;
+
+	/*
+	 * Disable CONSTANT_BUFFER before it is loaded from the context
+	 * image. For as it is loaded, it is executed and the stored
+	 * address may no longer be valid, leading to a GPU hang.
+	 *
+	 * This imposes the requirement that userspace reload their
+	 * CONSTANT_BUFFER on every batch, fortunately a requirement
+	 * they are already accustomed to from before contexts were
+	 * enabled.
+	 */
+	if (IS_GEN(i915, 4))
+		intel_uncore_write(uncore, ECOSKPD,
+			   _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE));
+
+	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
+	if (IS_GEN_RANGE(i915, 4, 6))
+		intel_uncore_write(uncore, MI_MODE,
+				   _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
+
+	/* We need to disable the AsyncFlip performance optimisations in order
+	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
+	 * programmed to '1' on all products.
+	 *
+	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
+	 */
+	if (IS_GEN_RANGE(i915, 6, 7))
+		intel_uncore_write(uncore, MI_MODE,
+				   _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
+
+	/* Required for the hardware to program scanline values for waiting */
+	/* WaEnableFlushTlbInvalidationMode:snb */
+	if (IS_GEN(i915, 6))
+		intel_uncore_write(uncore, GFX_MODE,
+			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
+
+	/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
+	if (IS_GEN(i915, 7))
+		intel_uncore_write(uncore, GFX_MODE_GEN7,
+			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
+			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
+
+	if (IS_GEN(i915, 6)) {
+		/* From the Sandybridge PRM, volume 1 part 3, page 24:
+		 * "If this bit is set, STCunit will have LRA as replacement
+		 *  policy. [...] This bit must be reset.  LRA replacement
+		 *  policy is not supported."
+		 */
+		intel_uncore_write(uncore, CACHE_MODE_0,
+			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
+	}
+
+	if (IS_GEN_RANGE(i915, 6, 7))
+		intel_uncore_write(uncore, INSTPM,
+				   _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
+
+	return xcs_resume(engine);
+}
+
+static void reset_cancel(struct intel_engine_cs *engine)
+{
+	struct i915_request *request;
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->active.lock, flags);
+
+	/* Mark all submitted requests as skipped. */
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
+		if (!i915_request_signaled(request))
+			dma_fence_set_error(&request->fence, -EIO);
+
+		i915_request_mark_complete(request);
+	}
+
+	/* Remaining _unready_ requests will be nop'ed when submitted */
+
+	spin_unlock_irqrestore(&engine->active.lock, flags);
+}
+
+static void i9xx_submit_request(struct i915_request *request)
+{
+	i915_request_submit(request);
+	wmb(); /* paranoid flush writes out of the WCB before mmio */
+
+	ENGINE_WRITE(request->engine, RING_TAIL,
+		     intel_ring_set_tail(request->ring, request->tail));
+}
+
+static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+	*cs++ = MI_FLUSH;
+
+	*cs++ = MI_STORE_DWORD_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
+}
+
+#define GEN5_WA_STORES 8 /* must be at least 1! */
+static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	int i;
+
+	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+	*cs++ = MI_FLUSH;
+
+	BUILD_BUG_ON(GEN5_WA_STORES < 1);
+	for (i = 0; i < GEN5_WA_STORES; i++) {
+		*cs++ = MI_STORE_DWORD_INDEX;
+		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+		*cs++ = rq->fence.seqno;
+	}
+
+	*cs++ = MI_USER_INTERRUPT;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
+}
+#undef GEN5_WA_STORES
+
+static void
+gen5_irq_enable(struct intel_engine_cs *engine)
+{
+	gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
+}
+
+static void
+gen5_irq_disable(struct intel_engine_cs *engine)
+{
+	gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
+}
+
+static void
+i9xx_irq_enable(struct intel_engine_cs *engine)
+{
+	engine->i915->irq_mask &= ~engine->irq_enable_mask;
+	intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
+	intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
+}
+
+static void
+i9xx_irq_disable(struct intel_engine_cs *engine)
+{
+	engine->i915->irq_mask |= engine->irq_enable_mask;
+	intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
+}
+
+static void
+i8xx_irq_enable(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	i915->irq_mask &= ~engine->irq_enable_mask;
+	intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
+	ENGINE_POSTING_READ16(engine, RING_IMR);
+}
+
+static void
+i8xx_irq_disable(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	i915->irq_mask |= engine->irq_enable_mask;
+	intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
+}
+
+static int
+bsd_ring_flush(struct i915_request *rq, u32 mode)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_FLUSH;
+	*cs++ = MI_NOOP;
+	intel_ring_advance(rq, cs);
+	return 0;
+}
+
+static void
+gen6_irq_enable(struct intel_engine_cs *engine)
+{
+	ENGINE_WRITE(engine, RING_IMR,
+		     ~(engine->irq_enable_mask | engine->irq_keep_mask));
+
+	/* Flush/delay to ensure the RING_IMR is active before the GT IMR */
+	ENGINE_POSTING_READ(engine, RING_IMR);
+
+	gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
+}
+
+static void
+gen6_irq_disable(struct intel_engine_cs *engine)
+{
+	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
+	gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
+}
+
+static void
+hsw_vebox_irq_enable(struct intel_engine_cs *engine)
+{
+	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask);
+
+	/* Flush/delay to ensure the RING_IMR is active before the GT IMR */
+	ENGINE_POSTING_READ(engine, RING_IMR);
+
+	gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask);
+}
+
+static void
+hsw_vebox_irq_disable(struct intel_engine_cs *engine)
+{
+	ENGINE_WRITE(engine, RING_IMR, ~0);
+	gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask);
+}
+
+static int
+i965_emit_bb_start(struct i915_request *rq,
+		   u64 offset, u32 length,
+		   unsigned int dispatch_flags)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags &
+		I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965);
+	*cs++ = offset;
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
+#define I830_BATCH_LIMIT SZ_256K
+#define I830_TLB_ENTRIES (2)
+#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
+static int
+i830_emit_bb_start(struct i915_request *rq,
+		   u64 offset, u32 len,
+		   unsigned int dispatch_flags)
+{
+	u32 *cs, cs_offset =
+		intel_gt_scratch_offset(rq->engine->gt,
+					INTEL_GT_SCRATCH_FIELD_DEFAULT);
+
+	GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
+
+	cs = intel_ring_begin(rq, 6);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/* Evict the invalid PTE TLBs */
+	*cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
+	*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
+	*cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
+	*cs++ = cs_offset;
+	*cs++ = 0xdeadbeef;
+	*cs++ = MI_NOOP;
+	intel_ring_advance(rq, cs);
+
+	if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
+		if (len > I830_BATCH_LIMIT)
+			return -ENOSPC;
+
+		cs = intel_ring_begin(rq, 6 + 2);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		/* Blit the batch (which has now all relocs applied) to the
+		 * stable batch scratch bo area (so that the CS never
+		 * stumbles over its tlb invalidation bug) ...
+		 */
+		*cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
+		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
+		*cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
+		*cs++ = cs_offset;
+		*cs++ = 4096;
+		*cs++ = offset;
+
+		*cs++ = MI_FLUSH;
+		*cs++ = MI_NOOP;
+		intel_ring_advance(rq, cs);
+
+		/* ... and execute it. */
+		offset = cs_offset;
+	}
+
+	cs = intel_ring_begin(rq, 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
+	*cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
+		MI_BATCH_NON_SECURE);
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+i915_emit_bb_start(struct i915_request *rq,
+		   u64 offset, u32 len,
+		   unsigned int dispatch_flags)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
+	*cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
+		MI_BATCH_NON_SECURE);
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static void __ring_context_fini(struct intel_context *ce)
+{
+	i915_vma_put(ce->state);
+}
+
+static void ring_context_destroy(struct kref *ref)
+{
+	struct intel_context *ce = container_of(ref, typeof(*ce), ref);
+
+	GEM_BUG_ON(intel_context_is_pinned(ce));
+
+	if (ce->state)
+		__ring_context_fini(ce);
+
+	intel_context_fini(ce);
+	intel_context_free(ce);
+}
+
+static struct i915_address_space *vm_alias(struct intel_context *ce)
+{
+	struct i915_address_space *vm;
+
+	vm = ce->vm;
+	if (i915_is_ggtt(vm))
+		vm = &i915_vm_to_ggtt(vm)->alias->vm;
+
+	return vm;
+}
+
+static int __context_pin_ppgtt(struct intel_context *ce)
+{
+	struct i915_address_space *vm;
+	int err = 0;
+
+	vm = vm_alias(ce);
+	if (vm)
+		err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
+
+	return err;
+}
+
+static void __context_unpin_ppgtt(struct intel_context *ce)
+{
+	struct i915_address_space *vm;
+
+	vm = vm_alias(ce);
+	if (vm)
+		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
+}
+
+static void ring_context_unpin(struct intel_context *ce)
+{
+	__context_unpin_ppgtt(ce);
+}
+
+static struct i915_vma *
+alloc_context_vma(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int err;
+
+	obj = i915_gem_object_create_shmem(i915, engine->context_size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	/*
+	 * Try to make the context utilize L3 as well as LLC.
+	 *
+	 * On VLV we don't have L3 controls in the PTEs so we
+	 * shouldn't touch the cache level, especially as that
+	 * would make the object snooped which might have a
+	 * negative performance impact.
+	 *
+	 * Snooping is required on non-llc platforms in execlist
+	 * mode, but since all GGTT accesses use PAT entry 0 we
+	 * get snooping anyway regardless of cache_level.
+	 *
+	 * This is only applicable for Ivy Bridge devices since
+	 * later platforms don't have L3 control bits in the PTE.
+	 */
+	if (IS_IVYBRIDGE(i915))
+		i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
+
+	if (engine->default_state) {
+		void *defaults, *vaddr;
+
+		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_obj;
+		}
+
+		defaults = i915_gem_object_pin_map(engine->default_state,
+						   I915_MAP_WB);
+		if (IS_ERR(defaults)) {
+			err = PTR_ERR(defaults);
+			goto err_map;
+		}
+
+		memcpy(vaddr, defaults, engine->context_size);
+		i915_gem_object_unpin_map(engine->default_state);
+
+		i915_gem_object_flush_map(obj);
+		i915_gem_object_unpin_map(obj);
+	}
+
+	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	return vma;
+
+err_map:
+	i915_gem_object_unpin_map(obj);
+err_obj:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+static int ring_context_alloc(struct intel_context *ce)
+{
+	struct intel_engine_cs *engine = ce->engine;
+
+	/* One ringbuffer to rule them all */
+	GEM_BUG_ON(!engine->legacy.ring);
+	ce->ring = engine->legacy.ring;
+	ce->timeline = intel_timeline_get(engine->legacy.timeline);
+
+	GEM_BUG_ON(ce->state);
+	if (engine->context_size) {
+		struct i915_vma *vma;
+
+		vma = alloc_context_vma(engine);
+		if (IS_ERR(vma))
+			return PTR_ERR(vma);
+
+		ce->state = vma;
+		if (engine->default_state)
+			__set_bit(CONTEXT_VALID_BIT, &ce->flags);
+	}
+
+	return 0;
+}
+
+static int ring_context_pin(struct intel_context *ce)
+{
+	return __context_pin_ppgtt(ce);
+}
+
+static void ring_context_reset(struct intel_context *ce)
+{
+	intel_ring_reset(ce->ring, ce->ring->emit);
+}
+
+static const struct intel_context_ops ring_context_ops = {
+	.alloc = ring_context_alloc,
+
+	.pin = ring_context_pin,
+	.unpin = ring_context_unpin,
+
+	.enter = intel_context_enter_engine,
+	.exit = intel_context_exit_engine,
+
+	.reset = ring_context_reset,
+	.destroy = ring_context_destroy,
+};
+
+static int load_pd_dir(struct i915_request *rq,
+		       const struct i915_ppgtt *ppgtt,
+		       u32 valid)
+{
+	const struct intel_engine_cs * const engine = rq->engine;
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 12);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
+	*cs++ = valid;
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
+	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+
+	/* Stall until the page table load is complete? */
+	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
+	*cs++ = intel_gt_scratch_offset(engine->gt,
+					INTEL_GT_SCRATCH_FIELD_DEFAULT);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
+
+	intel_ring_advance(rq, cs);
+
+	return rq->engine->emit_flush(rq, EMIT_FLUSH);
+}
+
+static inline int mi_set_context(struct i915_request *rq, u32 flags)
+{
+	struct drm_i915_private *i915 = rq->i915;
+	struct intel_engine_cs *engine = rq->engine;
+	enum intel_engine_id id;
+	const int num_engines =
+		IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
+	bool force_restore = false;
+	int len;
+	u32 *cs;
+
+	len = 4;
+	if (IS_GEN(i915, 7))
+		len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
+	else if (IS_GEN(i915, 5))
+		len += 2;
+	if (flags & MI_FORCE_RESTORE) {
+		GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
+		flags &= ~MI_FORCE_RESTORE;
+		force_restore = true;
+		len += 2;
+	}
+
+	cs = intel_ring_begin(rq, len);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
+	if (IS_GEN(i915, 7)) {
+		*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+		if (num_engines) {
+			struct intel_engine_cs *signaller;
+
+			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+			for_each_engine(signaller, engine->gt, id) {
+				if (signaller == engine)
+					continue;
+
+				*cs++ = i915_mmio_reg_offset(
+					   RING_PSMI_CTL(signaller->mmio_base));
+				*cs++ = _MASKED_BIT_ENABLE(
+						GEN6_PSMI_SLEEP_MSG_DISABLE);
+			}
+		}
+	} else if (IS_GEN(i915, 5)) {
+		/*
+		 * This w/a is only listed for pre-production ilk a/b steppings,
+		 * but is also mentioned for programming the powerctx. To be
+		 * safe, just apply the workaround; we do not use SyncFlush so
+		 * this should never take effect and so be a no-op!
+		 */
+		*cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
+	}
+
+	if (force_restore) {
+		/*
+		 * The HW doesn't handle being told to restore the current
+		 * context very well. Quite often it likes goes to go off and
+		 * sulk, especially when it is meant to be reloading PP_DIR.
+		 * A very simple fix to force the reload is to simply switch
+		 * away from the current context and back again.
+		 *
+		 * Note that the kernel_context will contain random state
+		 * following the INHIBIT_RESTORE. We accept this since we
+		 * never use the kernel_context state; it is merely a
+		 * placeholder we use to flush other contexts.
+		 */
+		*cs++ = MI_SET_CONTEXT;
+		*cs++ = i915_ggtt_offset(engine->kernel_context->state) |
+			MI_MM_SPACE_GTT |
+			MI_RESTORE_INHIBIT;
+	}
+
+	*cs++ = MI_NOOP;
+	*cs++ = MI_SET_CONTEXT;
+	*cs++ = i915_ggtt_offset(rq->context->state) | flags;
+	/*
+	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
+	 * WaMiSetContext_Hang:snb,ivb,vlv
+	 */
+	*cs++ = MI_NOOP;
+
+	if (IS_GEN(i915, 7)) {
+		if (num_engines) {
+			struct intel_engine_cs *signaller;
+			i915_reg_t last_reg = {}; /* keep gcc quiet */
+
+			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+			for_each_engine(signaller, engine->gt, id) {
+				if (signaller == engine)
+					continue;
+
+				last_reg = RING_PSMI_CTL(signaller->mmio_base);
+				*cs++ = i915_mmio_reg_offset(last_reg);
+				*cs++ = _MASKED_BIT_DISABLE(
+						GEN6_PSMI_SLEEP_MSG_DISABLE);
+			}
+
+			/* Insert a delay before the next switch! */
+			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+			*cs++ = i915_mmio_reg_offset(last_reg);
+			*cs++ = intel_gt_scratch_offset(engine->gt,
+							INTEL_GT_SCRATCH_FIELD_DEFAULT);
+			*cs++ = MI_NOOP;
+		}
+		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	} else if (IS_GEN(i915, 5)) {
+		*cs++ = MI_SUSPEND_FLUSH;
+	}
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int remap_l3_slice(struct i915_request *rq, int slice)
+{
+	u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
+	int i;
+
+	if (!remap_info)
+		return 0;
+
+	cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/*
+	 * Note: We do not worry about the concurrent register cacheline hang
+	 * here because no other code should access these registers other than
+	 * at initialization time.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
+	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
+		*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+		*cs++ = remap_info[i];
+	}
+	*cs++ = MI_NOOP;
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int remap_l3(struct i915_request *rq)
+{
+	struct i915_gem_context *ctx = i915_request_gem_context(rq);
+	int i, err;
+
+	if (!ctx || !ctx->remap_slice)
+		return 0;
+
+	for (i = 0; i < MAX_L3_SLICES; i++) {
+		if (!(ctx->remap_slice & BIT(i)))
+			continue;
+
+		err = remap_l3_slice(rq, i);
+		if (err)
+			return err;
+	}
+
+	ctx->remap_slice = 0;
+	return 0;
+}
+
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
+{
+	int ret;
+
+	if (!vm)
+		return 0;
+
+	ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
+
+	/*
+	 * Not only do we need a full barrier (post-sync write) after
+	 * invalidating the TLBs, but we need to wait a little bit
+	 * longer. Whether this is merely delaying us, or the
+	 * subsequent flush is a key part of serialising with the
+	 * post-sync op, this extra pass appears vital before a
+	 * mm switch!
+	 */
+	ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+	if (ret)
+		return ret;
+
+	return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+}
+
+static int switch_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->context;
+	int ret;
+
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+
+	ret = switch_mm(rq, vm_alias(ce));
+	if (ret)
+		return ret;
+
+	if (ce->state) {
+		u32 flags;
+
+		GEM_BUG_ON(rq->engine->id != RCS0);
+
+		/* For resource streamer on HSW+ and power context elsewhere */
+		BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
+		BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
+
+		flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT;
+		if (test_bit(CONTEXT_VALID_BIT, &ce->flags))
+			flags |= MI_RESTORE_EXT_STATE_EN;
+		else
+			flags |= MI_RESTORE_INHIBIT;
+
+		ret = mi_set_context(rq, flags);
+		if (ret)
+			return ret;
+	}
+
+	ret = remap_l3(rq);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int ring_request_alloc(struct i915_request *request)
+{
+	int ret;
+
+	GEM_BUG_ON(!intel_context_is_pinned(request->context));
+	GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
+
+	/*
+	 * Flush enough space to reduce the likelihood of waiting after
+	 * we start building the request - in which case we will just
+	 * have to repeat work.
+	 */
+	request->reserved_space += LEGACY_REQUEST_SIZE;
+
+	/* Unconditionally invalidate GPU caches and TLBs. */
+	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+	if (ret)
+		return ret;
+
+	ret = switch_context(request);
+	if (ret)
+		return ret;
+
+	request->reserved_space -= LEGACY_REQUEST_SIZE;
+	return 0;
+}
+
+static void gen6_bsd_submit_request(struct i915_request *request)
+{
+	struct intel_uncore *uncore = request->engine->uncore;
+
+	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+       /* Every tail move must follow the sequence below */
+
+	/* Disable notification that the ring is IDLE. The GT
+	 * will then assume that it is busy and bring it out of rc6.
+	 */
+	intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
+			      _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
+
+	/* Clear the context id. Here be magic! */
+	intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
+
+	/* Wait for the ring not to be idle, i.e. for it to wake up. */
+	if (__intel_wait_for_register_fw(uncore,
+					 GEN6_BSD_SLEEP_PSMI_CONTROL,
+					 GEN6_BSD_SLEEP_INDICATOR,
+					 0,
+					 1000, 0, NULL))
+		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
+
+	/* Now that the ring is fully powered up, update the tail */
+	i9xx_submit_request(request);
+
+	/* Let the ring send IDLE messages to the GT again,
+	 * and so let it sleep to conserve power when idle.
+	 */
+	intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
+			      _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
+
+	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+}
+
+static int mi_flush_dw(struct i915_request *rq, u32 flags)
+{
+	u32 cmd, *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	cmd = MI_FLUSH_DW;
+
+	/*
+	 * We always require a command barrier so that subsequent
+	 * commands, such as breadcrumb interrupts, are strictly ordered
+	 * wrt the contents of the write cache being flushed to memory
+	 * (and thus being coherent from the CPU).
+	 */
+	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+	/*
+	 * Bspec vol 1c.3 - blitter engine command streamer:
+	 * "If ENABLED, all TLBs will be invalidated once the flush
+	 * operation is complete. This bit is only valid when the
+	 * Post-Sync Operation field is a value of 1h or 3h."
+	 */
+	cmd |= flags;
+
+	*cs++ = cmd;
+	*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = 0;
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
+{
+	return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0);
+}
+
+static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
+{
+	return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
+}
+
+static int
+hsw_emit_bb_start(struct i915_request *rq,
+		  u64 offset, u32 len,
+		  unsigned int dispatch_flags)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
+		0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW);
+	/* bit0-7 is the length on GEN6+ */
+	*cs++ = offset;
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen6_emit_bb_start(struct i915_request *rq,
+		   u64 offset, u32 len,
+		   unsigned int dispatch_flags)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
+		0 : MI_BATCH_NON_SECURE_I965);
+	/* bit0-7 is the length on GEN6+ */
+	*cs++ = offset;
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+/* Blitter support (SandyBridge+) */
+
+static int gen6_ring_flush(struct i915_request *rq, u32 mode)
+{
+	return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
+}
+
+static void i9xx_set_default_submission(struct intel_engine_cs *engine)
+{
+	engine->submit_request = i9xx_submit_request;
+
+	engine->park = NULL;
+	engine->unpark = NULL;
+}
+
+static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
+{
+	i9xx_set_default_submission(engine);
+	engine->submit_request = gen6_bsd_submit_request;
+}
+
+static void ring_release(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
+		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+
+	intel_engine_cleanup_common(engine);
+
+	intel_ring_unpin(engine->legacy.ring);
+	intel_ring_put(engine->legacy.ring);
+
+	intel_timeline_unpin(engine->legacy.timeline);
+	intel_timeline_put(engine->legacy.timeline);
+}
+
+static void setup_irq(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	if (INTEL_GEN(i915) >= 6) {
+		engine->irq_enable = gen6_irq_enable;
+		engine->irq_disable = gen6_irq_disable;
+	} else if (INTEL_GEN(i915) >= 5) {
+		engine->irq_enable = gen5_irq_enable;
+		engine->irq_disable = gen5_irq_disable;
+	} else if (INTEL_GEN(i915) >= 3) {
+		engine->irq_enable = i9xx_irq_enable;
+		engine->irq_disable = i9xx_irq_disable;
+	} else {
+		engine->irq_enable = i8xx_irq_enable;
+		engine->irq_disable = i8xx_irq_disable;
+	}
+}
+
+static void setup_common(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	/* gen8+ are only supported with execlists */
+	GEM_BUG_ON(INTEL_GEN(i915) >= 8);
+
+	setup_irq(engine);
+
+	engine->resume = xcs_resume;
+	engine->reset.prepare = reset_prepare;
+	engine->reset.rewind = reset_rewind;
+	engine->reset.cancel = reset_cancel;
+	engine->reset.finish = reset_finish;
+
+	engine->cops = &ring_context_ops;
+	engine->request_alloc = ring_request_alloc;
+
+	/*
+	 * Using a global execution timeline; the previous final breadcrumb is
+	 * equivalent to our next initial bread so we can elide
+	 * engine->emit_init_breadcrumb().
+	 */
+	engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
+	if (IS_GEN(i915, 5))
+		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
+
+	engine->set_default_submission = i9xx_set_default_submission;
+
+	if (INTEL_GEN(i915) >= 6)
+		engine->emit_bb_start = gen6_emit_bb_start;
+	else if (INTEL_GEN(i915) >= 4)
+		engine->emit_bb_start = i965_emit_bb_start;
+	else if (IS_I830(i915) || IS_I845G(i915))
+		engine->emit_bb_start = i830_emit_bb_start;
+	else
+		engine->emit_bb_start = i915_emit_bb_start;
+}
+
+static void setup_rcs(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	if (HAS_L3_DPF(i915))
+		engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+
+	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
+
+	if (INTEL_GEN(i915) >= 7) {
+		engine->emit_flush = gen7_render_ring_flush;
+		engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
+	} else if (IS_GEN(i915, 6)) {
+		engine->emit_flush = gen6_render_ring_flush;
+		engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
+	} else if (IS_GEN(i915, 5)) {
+		engine->emit_flush = gen4_render_ring_flush;
+	} else {
+		if (INTEL_GEN(i915) < 4)
+			engine->emit_flush = gen2_render_ring_flush;
+		else
+			engine->emit_flush = gen4_render_ring_flush;
+		engine->irq_enable_mask = I915_USER_INTERRUPT;
+	}
+
+	if (IS_HASWELL(i915))
+		engine->emit_bb_start = hsw_emit_bb_start;
+
+	engine->resume = rcs_resume;
+}
+
+static void setup_vcs(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	if (INTEL_GEN(i915) >= 6) {
+		/* gen6 bsd needs a special wa for tail updates */
+		if (IS_GEN(i915, 6))
+			engine->set_default_submission = gen6_bsd_set_default_submission;
+		engine->emit_flush = gen6_bsd_ring_flush;
+		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
+
+		if (IS_GEN(i915, 6))
+			engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+		else
+			engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+	} else {
+		engine->emit_flush = bsd_ring_flush;
+		if (IS_GEN(i915, 5))
+			engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
+		else
+			engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
+	}
+}
+
+static void setup_bcs(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	engine->emit_flush = gen6_ring_flush;
+	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
+
+	if (IS_GEN(i915, 6))
+		engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+	else
+		engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+}
+
+static void setup_vecs(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	GEM_BUG_ON(INTEL_GEN(i915) < 7);
+
+	engine->emit_flush = gen6_ring_flush;
+	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
+	engine->irq_enable = hsw_vebox_irq_enable;
+	engine->irq_disable = hsw_vebox_irq_disable;
+
+	engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+}
+
+int intel_ring_submission_setup(struct intel_engine_cs *engine)
+{
+	struct intel_timeline *timeline;
+	struct intel_ring *ring;
+	int err;
+
+	setup_common(engine);
+
+	switch (engine->class) {
+	case RENDER_CLASS:
+		setup_rcs(engine);
+		break;
+	case VIDEO_DECODE_CLASS:
+		setup_vcs(engine);
+		break;
+	case COPY_ENGINE_CLASS:
+		setup_bcs(engine);
+		break;
+	case VIDEO_ENHANCEMENT_CLASS:
+		setup_vecs(engine);
+		break;
+	default:
+		MISSING_CASE(engine->class);
+		return -ENODEV;
+	}
+
+	timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
+	if (IS_ERR(timeline)) {
+		err = PTR_ERR(timeline);
+		goto err;
+	}
+	GEM_BUG_ON(timeline->has_initial_breadcrumb);
+
+	err = intel_timeline_pin(timeline);
+	if (err)
+		goto err_timeline;
+
+	ring = intel_engine_create_ring(engine, SZ_16K);
+	if (IS_ERR(ring)) {
+		err = PTR_ERR(ring);
+		goto err_timeline_unpin;
+	}
+
+	err = intel_ring_pin(ring);
+	if (err)
+		goto err_ring;
+
+	GEM_BUG_ON(engine->legacy.ring);
+	engine->legacy.ring = ring;
+	engine->legacy.timeline = timeline;
+
+	GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
+
+	/* Finally, take ownership and responsibility for cleanup! */
+	engine->release = ring_release;
+
+	return 0;
+
+err_ring:
+	intel_ring_put(ring);
+err_timeline_unpin:
+	intel_timeline_unpin(timeline);
+err_timeline:
+	intel_timeline_put(timeline);
+err:
+	intel_engine_cleanup_common(engine);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h
new file mode 100644
index 000000000000..d9f17f38e0cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h
@@ -0,0 +1,51 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_TYPES_H
+#define INTEL_RING_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+
+/*
+ * Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
+ * but keeps the logic simple. Indeed, the whole purpose of this macro is just
+ * to give some inclination as to some of the magic values used in the various
+ * workarounds!
+ */
+#define CACHELINE_BYTES 64
+#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
+
+struct i915_vma;
+
+struct intel_ring {
+	struct kref ref;
+	struct i915_vma *vma;
+	void *vaddr;
+
+	/*
+	 * As we have two types of rings, one global to the engine used
+	 * by ringbuffer submission and those that are exclusive to a
+	 * context used by execlists, we have to play safe and allow
+	 * atomic updates to the pin_count. However, the actual pinning
+	 * of the context is either done during initialisation for
+	 * ringbuffer submission or serialised as part of the context
+	 * pinning for execlists, and so we do not need a mutex ourselves
+	 * to serialise intel_ring_pin/intel_ring_unpin.
+	 */
+	atomic_t pin_count;
+
+	u32 head;
+	u32 tail;
+	u32 emit;
+
+	u32 space;
+	u32 size;
+	u32 effective_size;
+};
+
+#endif /* INTEL_RING_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
deleted file mode 100644
index bacaa7bb8c9a..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ /dev/null
@@ -1,2385 +0,0 @@
-/*
- * Copyright © 2008-2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *    Zou Nan hai <nanhai.zou@intel.com>
- *    Xiang Hai hao<haihao.xiang@intel.com>
- *
- */
-
-#include <linux/log2.h>
-
-#include <drm/i915_drm.h>
-
-#include "gem/i915_gem_context.h"
-
-#include "i915_drv.h"
-#include "i915_trace.h"
-#include "intel_context.h"
-#include "intel_gt.h"
-#include "intel_gt_irq.h"
-#include "intel_gt_pm_irq.h"
-#include "intel_reset.h"
-#include "intel_workarounds.h"
-
-/* Rough estimate of the typical request size, performing a flush,
- * set-context and then emitting the batch.
- */
-#define LEGACY_REQUEST_SIZE 200
-
-unsigned int intel_ring_update_space(struct intel_ring *ring)
-{
-	unsigned int space;
-
-	space = __intel_ring_space(ring->head, ring->emit, ring->size);
-
-	ring->space = space;
-	return space;
-}
-
-static int
-gen2_render_ring_flush(struct i915_request *rq, u32 mode)
-{
-	unsigned int num_store_dw;
-	u32 cmd, *cs;
-
-	cmd = MI_FLUSH;
-	num_store_dw = 0;
-	if (mode & EMIT_INVALIDATE)
-		cmd |= MI_READ_FLUSH;
-	if (mode & EMIT_FLUSH)
-		num_store_dw = 4;
-
-	cs = intel_ring_begin(rq, 2 + 3 * num_store_dw);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = cmd;
-	while (num_store_dw--) {
-		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-		*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-						INTEL_GT_SCRATCH_FIELD_DEFAULT);
-		*cs++ = 0;
-	}
-	*cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
-
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static int
-gen4_render_ring_flush(struct i915_request *rq, u32 mode)
-{
-	u32 cmd, *cs;
-	int i;
-
-	/*
-	 * read/write caches:
-	 *
-	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
-	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
-	 * also flushed at 2d versus 3d pipeline switches.
-	 *
-	 * read-only caches:
-	 *
-	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
-	 * MI_READ_FLUSH is set, and is always flushed on 965.
-	 *
-	 * I915_GEM_DOMAIN_COMMAND may not exist?
-	 *
-	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
-	 * invalidated when MI_EXE_FLUSH is set.
-	 *
-	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
-	 * invalidated with every MI_FLUSH.
-	 *
-	 * TLBs:
-	 *
-	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
-	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
-	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
-	 * are flushed at any MI_FLUSH.
-	 */
-
-	cmd = MI_FLUSH;
-	if (mode & EMIT_INVALIDATE) {
-		cmd |= MI_EXE_FLUSH;
-		if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5))
-			cmd |= MI_INVALIDATE_ISP;
-	}
-
-	i = 2;
-	if (mode & EMIT_INVALIDATE)
-		i += 20;
-
-	cs = intel_ring_begin(rq, i);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = cmd;
-
-	/*
-	 * A random delay to let the CS invalidate take effect? Without this
-	 * delay, the GPU relocation path fails as the CS does not see
-	 * the updated contents. Just as important, if we apply the flushes
-	 * to the EMIT_FLUSH branch (i.e. immediately after the relocation
-	 * write and before the invalidate on the next batch), the relocations
-	 * still fail. This implies that is a delay following invalidation
-	 * that is required to reset the caches as opposed to a delay to
-	 * ensure the memory is written.
-	 */
-	if (mode & EMIT_INVALIDATE) {
-		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
-		*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-						INTEL_GT_SCRATCH_FIELD_DEFAULT) |
-			PIPE_CONTROL_GLOBAL_GTT;
-		*cs++ = 0;
-		*cs++ = 0;
-
-		for (i = 0; i < 12; i++)
-			*cs++ = MI_FLUSH;
-
-		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
-		*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-						INTEL_GT_SCRATCH_FIELD_DEFAULT) |
-			PIPE_CONTROL_GLOBAL_GTT;
-		*cs++ = 0;
-		*cs++ = 0;
-	}
-
-	*cs++ = cmd;
-
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-/*
- * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
- * implementing two workarounds on gen6.  From section 1.4.7.1
- * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
- *
- * [DevSNB-C+{W/A}] Before any depth stall flush (including those
- * produced by non-pipelined state commands), software needs to first
- * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
- * 0.
- *
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
- * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
- *
- * And the workaround for these two requires this workaround first:
- *
- * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
- * BEFORE the pipe-control with a post-sync op and no write-cache
- * flushes.
- *
- * And this last workaround is tricky because of the requirements on
- * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
- * volume 2 part 1:
- *
- *     "1 of the following must also be set:
- *      - Render Target Cache Flush Enable ([12] of DW1)
- *      - Depth Cache Flush Enable ([0] of DW1)
- *      - Stall at Pixel Scoreboard ([1] of DW1)
- *      - Depth Stall ([13] of DW1)
- *      - Post-Sync Operation ([13] of DW1)
- *      - Notify Enable ([8] of DW1)"
- *
- * The cache flushes require the workaround flush that triggered this
- * one, so we can't use it.  Depth stall would trigger the same.
- * Post-sync nonzero is what triggered this second workaround, so we
- * can't use that one either.  Notify enable is IRQs, which aren't
- * really our business.  That leaves only stall at scoreboard.
- */
-static int
-gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
-{
-	u32 scratch_addr =
-		intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 6);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = GFX_OP_PIPE_CONTROL(5);
-	*cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
-	*cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
-	*cs++ = 0; /* low dword */
-	*cs++ = 0; /* high dword */
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	cs = intel_ring_begin(rq, 6);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = GFX_OP_PIPE_CONTROL(5);
-	*cs++ = PIPE_CONTROL_QW_WRITE;
-	*cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
-	*cs++ = 0;
-	*cs++ = 0;
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static int
-gen6_render_ring_flush(struct i915_request *rq, u32 mode)
-{
-	u32 scratch_addr =
-		intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
-	u32 *cs, flags = 0;
-	int ret;
-
-	/* Force SNB workarounds for PIPE_CONTROL flushes */
-	ret = gen6_emit_post_sync_nonzero_flush(rq);
-	if (ret)
-		return ret;
-
-	/* Just flush everything.  Experiments have shown that reducing the
-	 * number of bits based on the write domains has little performance
-	 * impact.
-	 */
-	if (mode & EMIT_FLUSH) {
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-		/*
-		 * Ensure that any following seqno writes only happen
-		 * when the render cache is indeed flushed.
-		 */
-		flags |= PIPE_CONTROL_CS_STALL;
-	}
-	if (mode & EMIT_INVALIDATE) {
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		/*
-		 * TLB invalidate requires a post-sync write.
-		 */
-		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
-	}
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = flags;
-	*cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
-	*cs++ = 0;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
-	/* First we do the gen6_emit_post_sync_nonzero_flush w/a */
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
-	*cs++ = 0;
-	*cs++ = 0;
-
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = PIPE_CONTROL_QW_WRITE;
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT) |
-		PIPE_CONTROL_GLOBAL_GTT;
-	*cs++ = 0;
-
-	/* Finally we can flush and with it emit the breadcrumb */
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
-		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-		 PIPE_CONTROL_DC_FLUSH_ENABLE |
-		 PIPE_CONTROL_QW_WRITE |
-		 PIPE_CONTROL_CS_STALL);
-	*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
-	*cs++ = rq->fence.seqno;
-
-	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
-
-	rq->tail = intel_ring_offset(rq, cs);
-	assert_ring_tail_valid(rq->ring, rq->tail);
-
-	return cs;
-}
-
-static int
-gen7_render_ring_cs_stall_wa(struct i915_request *rq)
-{
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
-	*cs++ = 0;
-	*cs++ = 0;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static int
-gen7_render_ring_flush(struct i915_request *rq, u32 mode)
-{
-	u32 scratch_addr =
-		intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
-	u32 *cs, flags = 0;
-
-	/*
-	 * Ensure that any following seqno writes only happen when the render
-	 * cache is indeed flushed.
-	 *
-	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
-	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
-	 * don't try to be clever and just set it unconditionally.
-	 */
-	flags |= PIPE_CONTROL_CS_STALL;
-
-	/* Just flush everything.  Experiments have shown that reducing the
-	 * number of bits based on the write domains has little performance
-	 * impact.
-	 */
-	if (mode & EMIT_FLUSH) {
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_FLUSH_ENABLE;
-	}
-	if (mode & EMIT_INVALIDATE) {
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
-		/*
-		 * TLB invalidate requires a post-sync write.
-		 */
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-
-		flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
-
-		/* Workaround: we must issue a pipe_control with CS-stall bit
-		 * set before a pipe_control command that has the state cache
-		 * invalidate bit set. */
-		gen7_render_ring_cs_stall_wa(rq);
-	}
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = flags;
-	*cs++ = scratch_addr;
-	*cs++ = 0;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
-		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-		 PIPE_CONTROL_DC_FLUSH_ENABLE |
-		 PIPE_CONTROL_FLUSH_ENABLE |
-		 PIPE_CONTROL_QW_WRITE |
-		 PIPE_CONTROL_GLOBAL_GTT_IVB |
-		 PIPE_CONTROL_CS_STALL);
-	*cs++ = rq->timeline->hwsp_offset;
-	*cs++ = rq->fence.seqno;
-
-	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
-
-	rq->tail = intel_ring_offset(rq, cs);
-	assert_ring_tail_valid(rq->ring, rq->tail);
-
-	return cs;
-}
-
-static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
-	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
-	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
-	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
-	*cs++ = rq->fence.seqno;
-
-	*cs++ = MI_USER_INTERRUPT;
-
-	rq->tail = intel_ring_offset(rq, cs);
-	assert_ring_tail_valid(rq->ring, rq->tail);
-
-	return cs;
-}
-
-#define GEN7_XCS_WA 32
-static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
-	int i;
-
-	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
-	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
-	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
-	*cs++ = rq->fence.seqno;
-
-	for (i = 0; i < GEN7_XCS_WA; i++) {
-		*cs++ = MI_STORE_DWORD_INDEX;
-		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
-		*cs++ = rq->fence.seqno;
-	}
-
-	*cs++ = MI_FLUSH_DW;
-	*cs++ = 0;
-	*cs++ = 0;
-
-	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
-
-	rq->tail = intel_ring_offset(rq, cs);
-	assert_ring_tail_valid(rq->ring, rq->tail);
-
-	return cs;
-}
-#undef GEN7_XCS_WA
-
-static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
-{
-	/*
-	 * Keep the render interrupt unmasked as this papers over
-	 * lost interrupts following a reset.
-	 */
-	if (engine->class == RENDER_CLASS) {
-		if (INTEL_GEN(engine->i915) >= 6)
-			mask &= ~BIT(0);
-		else
-			mask &= ~I915_USER_INTERRUPT;
-	}
-
-	intel_engine_set_hwsp_writemask(engine, mask);
-}
-
-static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	u32 addr;
-
-	addr = lower_32_bits(phys);
-	if (INTEL_GEN(dev_priv) >= 4)
-		addr |= (phys >> 28) & 0xf0;
-
-	I915_WRITE(HWS_PGA, addr);
-}
-
-static struct page *status_page(struct intel_engine_cs *engine)
-{
-	struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
-
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-	return sg_page(obj->mm.pages->sgl);
-}
-
-static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
-{
-	set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
-	set_hwstam(engine, ~0u);
-}
-
-static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	i915_reg_t hwsp;
-
-	/*
-	 * The ring status page addresses are no longer next to the rest of
-	 * the ring registers as of gen7.
-	 */
-	if (IS_GEN(dev_priv, 7)) {
-		switch (engine->id) {
-		/*
-		 * No more rings exist on Gen7. Default case is only to shut up
-		 * gcc switch check warning.
-		 */
-		default:
-			GEM_BUG_ON(engine->id);
-			/* fallthrough */
-		case RCS0:
-			hwsp = RENDER_HWS_PGA_GEN7;
-			break;
-		case BCS0:
-			hwsp = BLT_HWS_PGA_GEN7;
-			break;
-		case VCS0:
-			hwsp = BSD_HWS_PGA_GEN7;
-			break;
-		case VECS0:
-			hwsp = VEBOX_HWS_PGA_GEN7;
-			break;
-		}
-	} else if (IS_GEN(dev_priv, 6)) {
-		hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
-	} else {
-		hwsp = RING_HWS_PGA(engine->mmio_base);
-	}
-
-	I915_WRITE(hwsp, offset);
-	POSTING_READ(hwsp);
-}
-
-static void flush_cs_tlb(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	if (!IS_GEN_RANGE(dev_priv, 6, 7))
-		return;
-
-	/* ring should be idle before issuing a sync flush*/
-	WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
-
-	ENGINE_WRITE(engine, RING_INSTPM,
-		     _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
-					INSTPM_SYNC_FLUSH));
-	if (intel_wait_for_register(engine->uncore,
-				    RING_INSTPM(engine->mmio_base),
-				    INSTPM_SYNC_FLUSH, 0,
-				    1000))
-		DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
-			  engine->name);
-}
-
-static void ring_setup_status_page(struct intel_engine_cs *engine)
-{
-	set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
-	set_hwstam(engine, ~0u);
-
-	flush_cs_tlb(engine);
-}
-
-static bool stop_ring(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	if (INTEL_GEN(dev_priv) > 2) {
-		ENGINE_WRITE(engine,
-			     RING_MI_MODE, _MASKED_BIT_ENABLE(STOP_RING));
-		if (intel_wait_for_register(engine->uncore,
-					    RING_MI_MODE(engine->mmio_base),
-					    MODE_IDLE,
-					    MODE_IDLE,
-					    1000)) {
-			DRM_ERROR("%s : timed out trying to stop ring\n",
-				  engine->name);
-
-			/*
-			 * Sometimes we observe that the idle flag is not
-			 * set even though the ring is empty. So double
-			 * check before giving up.
-			 */
-			if (ENGINE_READ(engine, RING_HEAD) !=
-			    ENGINE_READ(engine, RING_TAIL))
-				return false;
-		}
-	}
-
-	ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL));
-
-	ENGINE_WRITE(engine, RING_HEAD, 0);
-	ENGINE_WRITE(engine, RING_TAIL, 0);
-
-	/* The ring must be empty before it is disabled */
-	ENGINE_WRITE(engine, RING_CTL, 0);
-
-	return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
-}
-
-static int xcs_resume(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	struct intel_ring *ring = engine->legacy.ring;
-	int ret = 0;
-
-	GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n",
-		  engine->name, ring->head, ring->tail);
-
-	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
-
-	/* WaClearRingBufHeadRegAtInit:ctg,elk */
-	if (!stop_ring(engine)) {
-		/* G45 ring initialization often fails to reset head to zero */
-		DRM_DEBUG_DRIVER("%s head not reset to zero "
-				"ctl %08x head %08x tail %08x start %08x\n",
-				engine->name,
-				ENGINE_READ(engine, RING_CTL),
-				ENGINE_READ(engine, RING_HEAD),
-				ENGINE_READ(engine, RING_TAIL),
-				ENGINE_READ(engine, RING_START));
-
-		if (!stop_ring(engine)) {
-			DRM_ERROR("failed to set %s head to zero "
-				  "ctl %08x head %08x tail %08x start %08x\n",
-				  engine->name,
-				  ENGINE_READ(engine, RING_CTL),
-				  ENGINE_READ(engine, RING_HEAD),
-				  ENGINE_READ(engine, RING_TAIL),
-				  ENGINE_READ(engine, RING_START));
-			ret = -EIO;
-			goto out;
-		}
-	}
-
-	if (HWS_NEEDS_PHYSICAL(dev_priv))
-		ring_setup_phys_status_page(engine);
-	else
-		ring_setup_status_page(engine);
-
-	intel_engine_reset_breadcrumbs(engine);
-
-	/* Enforce ordering by reading HEAD register back */
-	ENGINE_POSTING_READ(engine, RING_HEAD);
-
-	/*
-	 * Initialize the ring. This must happen _after_ we've cleared the ring
-	 * registers with the above sequence (the readback of the HEAD registers
-	 * also enforces ordering), otherwise the hw might lose the new ring
-	 * register values.
-	 */
-	ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma));
-
-	/* Check that the ring offsets point within the ring! */
-	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
-	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
-	intel_ring_update_space(ring);
-
-	/* First wake the ring up to an empty/idle ring */
-	ENGINE_WRITE(engine, RING_HEAD, ring->head);
-	ENGINE_WRITE(engine, RING_TAIL, ring->head);
-	ENGINE_POSTING_READ(engine, RING_TAIL);
-
-	ENGINE_WRITE(engine, RING_CTL, RING_CTL_SIZE(ring->size) | RING_VALID);
-
-	/* If the head is still not zero, the ring is dead */
-	if (intel_wait_for_register(engine->uncore,
-				    RING_CTL(engine->mmio_base),
-				    RING_VALID, RING_VALID,
-				    50)) {
-		DRM_ERROR("%s initialization failed "
-			  "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
-			  engine->name,
-			  ENGINE_READ(engine, RING_CTL),
-			  ENGINE_READ(engine, RING_CTL) & RING_VALID,
-			  ENGINE_READ(engine, RING_HEAD), ring->head,
-			  ENGINE_READ(engine, RING_TAIL), ring->tail,
-			  ENGINE_READ(engine, RING_START),
-			  i915_ggtt_offset(ring->vma));
-		ret = -EIO;
-		goto out;
-	}
-
-	if (INTEL_GEN(dev_priv) > 2)
-		ENGINE_WRITE(engine,
-			     RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
-
-	/* Now awake, let it get started */
-	if (ring->tail != ring->head) {
-		ENGINE_WRITE(engine, RING_TAIL, ring->tail);
-		ENGINE_POSTING_READ(engine, RING_TAIL);
-	}
-
-	/* Papering over lost _interrupts_ immediately following the restart */
-	intel_engine_queue_breadcrumbs(engine);
-out:
-	intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
-
-	return ret;
-}
-
-static void reset_prepare(struct intel_engine_cs *engine)
-{
-	struct intel_uncore *uncore = engine->uncore;
-	const u32 base = engine->mmio_base;
-
-	/*
-	 * We stop engines, otherwise we might get failed reset and a
-	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
-	 * from system hang if batchbuffer is progressing when
-	 * the reset is issued, regardless of READY_TO_RESET ack.
-	 * Thus assume it is best to stop engines on all gens
-	 * where we have a gpu reset.
-	 *
-	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
-	 *
-	 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
-	 *
-	 * FIXME: Wa for more modern gens needs to be validated
-	 */
-	GEM_TRACE("%s\n", engine->name);
-
-	if (intel_engine_stop_cs(engine))
-		GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
-
-	intel_uncore_write_fw(uncore,
-			      RING_HEAD(base),
-			      intel_uncore_read_fw(uncore, RING_TAIL(base)));
-	intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */
-
-	intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
-	intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
-	intel_uncore_posting_read_fw(uncore, RING_TAIL(base));
-
-	/* The ring must be empty before it is disabled */
-	intel_uncore_write_fw(uncore, RING_CTL(base), 0);
-
-	/* Check acts as a post */
-	if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
-		GEM_TRACE("%s: ring head [%x] not parked\n",
-			  engine->name,
-			  intel_uncore_read_fw(uncore, RING_HEAD(base)));
-}
-
-static void reset_ring(struct intel_engine_cs *engine, bool stalled)
-{
-	struct i915_request *pos, *rq;
-	unsigned long flags;
-	u32 head;
-
-	rq = NULL;
-	spin_lock_irqsave(&engine->active.lock, flags);
-	list_for_each_entry(pos, &engine->active.requests, sched.link) {
-		if (!i915_request_completed(pos)) {
-			rq = pos;
-			break;
-		}
-	}
-
-	/*
-	 * The guilty request will get skipped on a hung engine.
-	 *
-	 * Users of client default contexts do not rely on logical
-	 * state preserved between batches so it is safe to execute
-	 * queued requests following the hang. Non default contexts
-	 * rely on preserved state, so skipping a batch loses the
-	 * evolution of the state and it needs to be considered corrupted.
-	 * Executing more queued batches on top of corrupted state is
-	 * risky. But we take the risk by trying to advance through
-	 * the queued requests in order to make the client behaviour
-	 * more predictable around resets, by not throwing away random
-	 * amount of batches it has prepared for execution. Sophisticated
-	 * clients can use gem_reset_stats_ioctl and dma fence status
-	 * (exported via sync_file info ioctl on explicit fences) to observe
-	 * when it loses the context state and should rebuild accordingly.
-	 *
-	 * The context ban, and ultimately the client ban, mechanism are safety
-	 * valves if client submission ends up resulting in nothing more than
-	 * subsequent hangs.
-	 */
-
-	if (rq) {
-		/*
-		 * Try to restore the logical GPU state to match the
-		 * continuation of the request queue. If we skip the
-		 * context/PD restore, then the next request may try to execute
-		 * assuming that its context is valid and loaded on the GPU and
-		 * so may try to access invalid memory, prompting repeated GPU
-		 * hangs.
-		 *
-		 * If the request was guilty, we still restore the logical
-		 * state in case the next request requires it (e.g. the
-		 * aliasing ppgtt), but skip over the hung batch.
-		 *
-		 * If the request was innocent, we try to replay the request
-		 * with the restored context.
-		 */
-		__i915_request_reset(rq, stalled);
-
-		GEM_BUG_ON(rq->ring != engine->legacy.ring);
-		head = rq->head;
-	} else {
-		head = engine->legacy.ring->tail;
-	}
-	engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head);
-
-	spin_unlock_irqrestore(&engine->active.lock, flags);
-}
-
-static void reset_finish(struct intel_engine_cs *engine)
-{
-}
-
-static int rcs_resume(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	/*
-	 * Disable CONSTANT_BUFFER before it is loaded from the context
-	 * image. For as it is loaded, it is executed and the stored
-	 * address may no longer be valid, leading to a GPU hang.
-	 *
-	 * This imposes the requirement that userspace reload their
-	 * CONSTANT_BUFFER on every batch, fortunately a requirement
-	 * they are already accustomed to from before contexts were
-	 * enabled.
-	 */
-	if (IS_GEN(dev_priv, 4))
-		I915_WRITE(ECOSKPD,
-			   _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE));
-
-	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
-	if (IS_GEN_RANGE(dev_priv, 4, 6))
-		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
-
-	/* We need to disable the AsyncFlip performance optimisations in order
-	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
-	 * programmed to '1' on all products.
-	 *
-	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
-	 */
-	if (IS_GEN_RANGE(dev_priv, 6, 7))
-		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
-
-	/* Required for the hardware to program scanline values for waiting */
-	/* WaEnableFlushTlbInvalidationMode:snb */
-	if (IS_GEN(dev_priv, 6))
-		I915_WRITE(GFX_MODE,
-			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
-
-	/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
-	if (IS_GEN(dev_priv, 7))
-		I915_WRITE(GFX_MODE_GEN7,
-			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
-			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
-
-	if (IS_GEN(dev_priv, 6)) {
-		/* From the Sandybridge PRM, volume 1 part 3, page 24:
-		 * "If this bit is set, STCunit will have LRA as replacement
-		 *  policy. [...] This bit must be reset.  LRA replacement
-		 *  policy is not supported."
-		 */
-		I915_WRITE(CACHE_MODE_0,
-			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
-	}
-
-	if (IS_GEN_RANGE(dev_priv, 6, 7))
-		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
-
-	return xcs_resume(engine);
-}
-
-static void cancel_requests(struct intel_engine_cs *engine)
-{
-	struct i915_request *request;
-	unsigned long flags;
-
-	spin_lock_irqsave(&engine->active.lock, flags);
-
-	/* Mark all submitted requests as skipped. */
-	list_for_each_entry(request, &engine->active.requests, sched.link) {
-		if (!i915_request_signaled(request))
-			dma_fence_set_error(&request->fence, -EIO);
-
-		i915_request_mark_complete(request);
-	}
-
-	/* Remaining _unready_ requests will be nop'ed when submitted */
-
-	spin_unlock_irqrestore(&engine->active.lock, flags);
-}
-
-static void i9xx_submit_request(struct i915_request *request)
-{
-	i915_request_submit(request);
-
-	ENGINE_WRITE(request->engine, RING_TAIL,
-		     intel_ring_set_tail(request->ring, request->tail));
-}
-
-static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
-	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
-	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
-	*cs++ = MI_FLUSH;
-
-	*cs++ = MI_STORE_DWORD_INDEX;
-	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
-	*cs++ = rq->fence.seqno;
-
-	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
-
-	rq->tail = intel_ring_offset(rq, cs);
-	assert_ring_tail_valid(rq->ring, rq->tail);
-
-	return cs;
-}
-
-#define GEN5_WA_STORES 8 /* must be at least 1! */
-static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
-	int i;
-
-	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
-	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
-	*cs++ = MI_FLUSH;
-
-	BUILD_BUG_ON(GEN5_WA_STORES < 1);
-	for (i = 0; i < GEN5_WA_STORES; i++) {
-		*cs++ = MI_STORE_DWORD_INDEX;
-		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
-		*cs++ = rq->fence.seqno;
-	}
-
-	*cs++ = MI_USER_INTERRUPT;
-
-	rq->tail = intel_ring_offset(rq, cs);
-	assert_ring_tail_valid(rq->ring, rq->tail);
-
-	return cs;
-}
-#undef GEN5_WA_STORES
-
-static void
-gen5_irq_enable(struct intel_engine_cs *engine)
-{
-	gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static void
-gen5_irq_disable(struct intel_engine_cs *engine)
-{
-	gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static void
-i9xx_irq_enable(struct intel_engine_cs *engine)
-{
-	engine->i915->irq_mask &= ~engine->irq_enable_mask;
-	intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
-	intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
-}
-
-static void
-i9xx_irq_disable(struct intel_engine_cs *engine)
-{
-	engine->i915->irq_mask |= engine->irq_enable_mask;
-	intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
-}
-
-static void
-i8xx_irq_enable(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-
-	i915->irq_mask &= ~engine->irq_enable_mask;
-	intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
-	ENGINE_POSTING_READ16(engine, RING_IMR);
-}
-
-static void
-i8xx_irq_disable(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-
-	i915->irq_mask |= engine->irq_enable_mask;
-	intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
-}
-
-static int
-bsd_ring_flush(struct i915_request *rq, u32 mode)
-{
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_FLUSH;
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-	return 0;
-}
-
-static void
-gen6_irq_enable(struct intel_engine_cs *engine)
-{
-	ENGINE_WRITE(engine, RING_IMR,
-		     ~(engine->irq_enable_mask | engine->irq_keep_mask));
-
-	/* Flush/delay to ensure the RING_IMR is active before the GT IMR */
-	ENGINE_POSTING_READ(engine, RING_IMR);
-
-	gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static void
-gen6_irq_disable(struct intel_engine_cs *engine)
-{
-	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
-	gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static void
-hsw_vebox_irq_enable(struct intel_engine_cs *engine)
-{
-	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask);
-
-	/* Flush/delay to ensure the RING_IMR is active before the GT IMR */
-	ENGINE_POSTING_READ(engine, RING_IMR);
-
-	gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static void
-hsw_vebox_irq_disable(struct intel_engine_cs *engine)
-{
-	ENGINE_WRITE(engine, RING_IMR, ~0);
-	gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static int
-i965_emit_bb_start(struct i915_request *rq,
-		   u64 offset, u32 length,
-		   unsigned int dispatch_flags)
-{
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags &
-		I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965);
-	*cs++ = offset;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
-#define I830_BATCH_LIMIT SZ_256K
-#define I830_TLB_ENTRIES (2)
-#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
-static int
-i830_emit_bb_start(struct i915_request *rq,
-		   u64 offset, u32 len,
-		   unsigned int dispatch_flags)
-{
-	u32 *cs, cs_offset =
-		intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT);
-
-	GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
-
-	cs = intel_ring_begin(rq, 6);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/* Evict the invalid PTE TLBs */
-	*cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
-	*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
-	*cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
-	*cs++ = cs_offset;
-	*cs++ = 0xdeadbeef;
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
-		if (len > I830_BATCH_LIMIT)
-			return -ENOSPC;
-
-		cs = intel_ring_begin(rq, 6 + 2);
-		if (IS_ERR(cs))
-			return PTR_ERR(cs);
-
-		/* Blit the batch (which has now all relocs applied) to the
-		 * stable batch scratch bo area (so that the CS never
-		 * stumbles over its tlb invalidation bug) ...
-		 */
-		*cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
-		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
-		*cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
-		*cs++ = cs_offset;
-		*cs++ = 4096;
-		*cs++ = offset;
-
-		*cs++ = MI_FLUSH;
-		*cs++ = MI_NOOP;
-		intel_ring_advance(rq, cs);
-
-		/* ... and execute it. */
-		offset = cs_offset;
-	}
-
-	cs = intel_ring_begin(rq, 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
-	*cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
-		MI_BATCH_NON_SECURE);
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static int
-i915_emit_bb_start(struct i915_request *rq,
-		   u64 offset, u32 len,
-		   unsigned int dispatch_flags)
-{
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
-	*cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
-		MI_BATCH_NON_SECURE);
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-int intel_ring_pin(struct intel_ring *ring)
-{
-	struct i915_vma *vma = ring->vma;
-	unsigned int flags;
-	void *addr;
-	int ret;
-
-	if (atomic_fetch_inc(&ring->pin_count))
-		return 0;
-
-	flags = PIN_GLOBAL;
-
-	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
-	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
-	if (vma->obj->stolen)
-		flags |= PIN_MAPPABLE;
-	else
-		flags |= PIN_HIGH;
-
-	ret = i915_vma_pin(vma, 0, 0, flags);
-	if (unlikely(ret))
-		goto err_unpin;
-
-	if (i915_vma_is_map_and_fenceable(vma))
-		addr = (void __force *)i915_vma_pin_iomap(vma);
-	else
-		addr = i915_gem_object_pin_map(vma->obj,
-					       i915_coherent_map_type(vma->vm->i915));
-	if (IS_ERR(addr)) {
-		ret = PTR_ERR(addr);
-		goto err_ring;
-	}
-
-	i915_vma_make_unshrinkable(vma);
-
-	GEM_BUG_ON(ring->vaddr);
-	ring->vaddr = addr;
-
-	return 0;
-
-err_ring:
-	i915_vma_unpin(vma);
-err_unpin:
-	atomic_dec(&ring->pin_count);
-	return ret;
-}
-
-void intel_ring_reset(struct intel_ring *ring, u32 tail)
-{
-	tail = intel_ring_wrap(ring, tail);
-	ring->tail = tail;
-	ring->head = tail;
-	ring->emit = tail;
-	intel_ring_update_space(ring);
-}
-
-void intel_ring_unpin(struct intel_ring *ring)
-{
-	struct i915_vma *vma = ring->vma;
-
-	if (!atomic_dec_and_test(&ring->pin_count))
-		return;
-
-	/* Discard any unused bytes beyond that submitted to hw. */
-	intel_ring_reset(ring, ring->emit);
-
-	i915_vma_unset_ggtt_write(vma);
-	if (i915_vma_is_map_and_fenceable(vma))
-		i915_vma_unpin_iomap(vma);
-	else
-		i915_gem_object_unpin_map(vma->obj);
-
-	GEM_BUG_ON(!ring->vaddr);
-	ring->vaddr = NULL;
-
-	i915_vma_unpin(vma);
-	i915_vma_make_purgeable(vma);
-}
-
-static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
-{
-	struct i915_address_space *vm = &ggtt->vm;
-	struct drm_i915_private *i915 = vm->i915;
-	struct drm_i915_gem_object *obj;
-	struct i915_vma *vma;
-
-	obj = i915_gem_object_create_stolen(i915, size);
-	if (!obj)
-		obj = i915_gem_object_create_internal(i915, size);
-	if (IS_ERR(obj))
-		return ERR_CAST(obj);
-
-	/*
-	 * Mark ring buffers as read-only from GPU side (so no stray overwrites)
-	 * if supported by the platform's GGTT.
-	 */
-	if (vm->has_read_only)
-		i915_gem_object_set_readonly(obj);
-
-	vma = i915_vma_instance(obj, vm, NULL);
-	if (IS_ERR(vma))
-		goto err;
-
-	return vma;
-
-err:
-	i915_gem_object_put(obj);
-	return vma;
-}
-
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size)
-{
-	struct drm_i915_private *i915 = engine->i915;
-	struct intel_ring *ring;
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!is_power_of_2(size));
-	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
-
-	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-	if (!ring)
-		return ERR_PTR(-ENOMEM);
-
-	kref_init(&ring->ref);
-
-	ring->size = size;
-	/* Workaround an erratum on the i830 which causes a hang if
-	 * the TAIL pointer points to within the last 2 cachelines
-	 * of the buffer.
-	 */
-	ring->effective_size = size;
-	if (IS_I830(i915) || IS_I845G(i915))
-		ring->effective_size -= 2 * CACHELINE_BYTES;
-
-	intel_ring_update_space(ring);
-
-	vma = create_ring_vma(engine->gt->ggtt, size);
-	if (IS_ERR(vma)) {
-		kfree(ring);
-		return ERR_CAST(vma);
-	}
-	ring->vma = vma;
-
-	return ring;
-}
-
-void intel_ring_free(struct kref *ref)
-{
-	struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
-
-	i915_vma_close(ring->vma);
-	i915_vma_put(ring->vma);
-
-	kfree(ring);
-}
-
-static void __ring_context_fini(struct intel_context *ce)
-{
-	i915_gem_object_put(ce->state->obj);
-}
-
-static void ring_context_destroy(struct kref *ref)
-{
-	struct intel_context *ce = container_of(ref, typeof(*ce), ref);
-
-	GEM_BUG_ON(intel_context_is_pinned(ce));
-
-	if (ce->state)
-		__ring_context_fini(ce);
-
-	intel_context_fini(ce);
-	intel_context_free(ce);
-}
-
-static struct i915_address_space *vm_alias(struct intel_context *ce)
-{
-	struct i915_address_space *vm;
-
-	vm = ce->vm;
-	if (i915_is_ggtt(vm))
-		vm = &i915_vm_to_ggtt(vm)->alias->vm;
-
-	return vm;
-}
-
-static int __context_pin_ppgtt(struct intel_context *ce)
-{
-	struct i915_address_space *vm;
-	int err = 0;
-
-	vm = vm_alias(ce);
-	if (vm)
-		err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
-
-	return err;
-}
-
-static void __context_unpin_ppgtt(struct intel_context *ce)
-{
-	struct i915_address_space *vm;
-
-	vm = vm_alias(ce);
-	if (vm)
-		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
-}
-
-static void ring_context_unpin(struct intel_context *ce)
-{
-	__context_unpin_ppgtt(ce);
-}
-
-static struct i915_vma *
-alloc_context_vma(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-	struct drm_i915_gem_object *obj;
-	struct i915_vma *vma;
-	int err;
-
-	obj = i915_gem_object_create_shmem(i915, engine->context_size);
-	if (IS_ERR(obj))
-		return ERR_CAST(obj);
-
-	/*
-	 * Try to make the context utilize L3 as well as LLC.
-	 *
-	 * On VLV we don't have L3 controls in the PTEs so we
-	 * shouldn't touch the cache level, especially as that
-	 * would make the object snooped which might have a
-	 * negative performance impact.
-	 *
-	 * Snooping is required on non-llc platforms in execlist
-	 * mode, but since all GGTT accesses use PAT entry 0 we
-	 * get snooping anyway regardless of cache_level.
-	 *
-	 * This is only applicable for Ivy Bridge devices since
-	 * later platforms don't have L3 control bits in the PTE.
-	 */
-	if (IS_IVYBRIDGE(i915))
-		i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
-
-	if (engine->default_state) {
-		void *defaults, *vaddr;
-
-		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
-		if (IS_ERR(vaddr)) {
-			err = PTR_ERR(vaddr);
-			goto err_obj;
-		}
-
-		defaults = i915_gem_object_pin_map(engine->default_state,
-						   I915_MAP_WB);
-		if (IS_ERR(defaults)) {
-			err = PTR_ERR(defaults);
-			goto err_map;
-		}
-
-		memcpy(vaddr, defaults, engine->context_size);
-		i915_gem_object_unpin_map(engine->default_state);
-
-		i915_gem_object_flush_map(obj);
-		i915_gem_object_unpin_map(obj);
-	}
-
-	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
-	if (IS_ERR(vma)) {
-		err = PTR_ERR(vma);
-		goto err_obj;
-	}
-
-	return vma;
-
-err_map:
-	i915_gem_object_unpin_map(obj);
-err_obj:
-	i915_gem_object_put(obj);
-	return ERR_PTR(err);
-}
-
-static int ring_context_alloc(struct intel_context *ce)
-{
-	struct intel_engine_cs *engine = ce->engine;
-
-	/* One ringbuffer to rule them all */
-	GEM_BUG_ON(!engine->legacy.ring);
-	ce->ring = engine->legacy.ring;
-	ce->timeline = intel_timeline_get(engine->legacy.timeline);
-
-	GEM_BUG_ON(ce->state);
-	if (engine->context_size) {
-		struct i915_vma *vma;
-
-		vma = alloc_context_vma(engine);
-		if (IS_ERR(vma))
-			return PTR_ERR(vma);
-
-		ce->state = vma;
-	}
-
-	return 0;
-}
-
-static int ring_context_pin(struct intel_context *ce)
-{
-	int err;
-
-	err = intel_context_active_acquire(ce);
-	if (err)
-		return err;
-
-	err = __context_pin_ppgtt(ce);
-	if (err)
-		goto err_active;
-
-	return 0;
-
-err_active:
-	intel_context_active_release(ce);
-	return err;
-}
-
-static void ring_context_reset(struct intel_context *ce)
-{
-	intel_ring_reset(ce->ring, 0);
-}
-
-static const struct intel_context_ops ring_context_ops = {
-	.alloc = ring_context_alloc,
-
-	.pin = ring_context_pin,
-	.unpin = ring_context_unpin,
-
-	.enter = intel_context_enter_engine,
-	.exit = intel_context_exit_engine,
-
-	.reset = ring_context_reset,
-	.destroy = ring_context_destroy,
-};
-
-static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 6);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
-	*cs++ = PP_DIR_DCLV_2G;
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
-
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static int flush_pd_dir(struct i915_request *rq)
-{
-	const struct intel_engine_cs * const engine = rq->engine;
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/* Stall until the page table load is complete */
-	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-	*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-					INTEL_GT_SCRATCH_FIELD_DEFAULT);
-	*cs++ = MI_NOOP;
-
-	intel_ring_advance(rq, cs);
-	return 0;
-}
-
-static inline int mi_set_context(struct i915_request *rq, u32 flags)
-{
-	struct drm_i915_private *i915 = rq->i915;
-	struct intel_engine_cs *engine = rq->engine;
-	enum intel_engine_id id;
-	const int num_engines =
-		IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
-	bool force_restore = false;
-	int len;
-	u32 *cs;
-
-	flags |= MI_MM_SPACE_GTT;
-	if (IS_HASWELL(i915))
-		/* These flags are for resource streamer on HSW+ */
-		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
-	else
-		/* We need to save the extended state for powersaving modes */
-		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
-
-	len = 4;
-	if (IS_GEN(i915, 7))
-		len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
-	else if (IS_GEN(i915, 5))
-		len += 2;
-	if (flags & MI_FORCE_RESTORE) {
-		GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
-		flags &= ~MI_FORCE_RESTORE;
-		force_restore = true;
-		len += 2;
-	}
-
-	cs = intel_ring_begin(rq, len);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
-	if (IS_GEN(i915, 7)) {
-		*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-		if (num_engines) {
-			struct intel_engine_cs *signaller;
-
-			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
-			for_each_engine(signaller, i915, id) {
-				if (signaller == engine)
-					continue;
-
-				*cs++ = i915_mmio_reg_offset(
-					   RING_PSMI_CTL(signaller->mmio_base));
-				*cs++ = _MASKED_BIT_ENABLE(
-						GEN6_PSMI_SLEEP_MSG_DISABLE);
-			}
-		}
-	} else if (IS_GEN(i915, 5)) {
-		/*
-		 * This w/a is only listed for pre-production ilk a/b steppings,
-		 * but is also mentioned for programming the powerctx. To be
-		 * safe, just apply the workaround; we do not use SyncFlush so
-		 * this should never take effect and so be a no-op!
-		 */
-		*cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
-	}
-
-	if (force_restore) {
-		/*
-		 * The HW doesn't handle being told to restore the current
-		 * context very well. Quite often it likes goes to go off and
-		 * sulk, especially when it is meant to be reloading PP_DIR.
-		 * A very simple fix to force the reload is to simply switch
-		 * away from the current context and back again.
-		 *
-		 * Note that the kernel_context will contain random state
-		 * following the INHIBIT_RESTORE. We accept this since we
-		 * never use the kernel_context state; it is merely a
-		 * placeholder we use to flush other contexts.
-		 */
-		*cs++ = MI_SET_CONTEXT;
-		*cs++ = i915_ggtt_offset(engine->kernel_context->state) |
-			MI_MM_SPACE_GTT |
-			MI_RESTORE_INHIBIT;
-	}
-
-	*cs++ = MI_NOOP;
-	*cs++ = MI_SET_CONTEXT;
-	*cs++ = i915_ggtt_offset(rq->hw_context->state) | flags;
-	/*
-	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
-	 * WaMiSetContext_Hang:snb,ivb,vlv
-	 */
-	*cs++ = MI_NOOP;
-
-	if (IS_GEN(i915, 7)) {
-		if (num_engines) {
-			struct intel_engine_cs *signaller;
-			i915_reg_t last_reg = {}; /* keep gcc quiet */
-
-			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
-			for_each_engine(signaller, i915, id) {
-				if (signaller == engine)
-					continue;
-
-				last_reg = RING_PSMI_CTL(signaller->mmio_base);
-				*cs++ = i915_mmio_reg_offset(last_reg);
-				*cs++ = _MASKED_BIT_DISABLE(
-						GEN6_PSMI_SLEEP_MSG_DISABLE);
-			}
-
-			/* Insert a delay before the next switch! */
-			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-			*cs++ = i915_mmio_reg_offset(last_reg);
-			*cs++ = intel_gt_scratch_offset(rq->engine->gt,
-							INTEL_GT_SCRATCH_FIELD_DEFAULT);
-			*cs++ = MI_NOOP;
-		}
-		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	} else if (IS_GEN(i915, 5)) {
-		*cs++ = MI_SUSPEND_FLUSH;
-	}
-
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static int remap_l3_slice(struct i915_request *rq, int slice)
-{
-	u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
-	int i;
-
-	if (!remap_info)
-		return 0;
-
-	cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/*
-	 * Note: We do not worry about the concurrent register cacheline hang
-	 * here because no other code should access these registers other than
-	 * at initialization time.
-	 */
-	*cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
-	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
-		*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
-		*cs++ = remap_info[i];
-	}
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static int remap_l3(struct i915_request *rq)
-{
-	struct i915_gem_context *ctx = rq->gem_context;
-	int i, err;
-
-	if (!ctx->remap_slice)
-		return 0;
-
-	for (i = 0; i < MAX_L3_SLICES; i++) {
-		if (!(ctx->remap_slice & BIT(i)))
-			continue;
-
-		err = remap_l3_slice(rq, i);
-		if (err)
-			return err;
-	}
-
-	ctx->remap_slice = 0;
-	return 0;
-}
-
-static int switch_context(struct i915_request *rq)
-{
-	struct intel_engine_cs *engine = rq->engine;
-	struct i915_address_space *vm = vm_alias(rq->hw_context);
-	unsigned int unwind_mm = 0;
-	u32 hw_flags = 0;
-	int ret;
-
-	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
-
-	if (vm) {
-		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-		int loops;
-
-		/*
-		 * Baytail takes a little more convincing that it really needs
-		 * to reload the PD between contexts. It is not just a little
-		 * longer, as adding more stalls after the load_pd_dir (i.e.
-		 * adding a long loop around flush_pd_dir) is not as effective
-		 * as reloading the PD umpteen times. 32 is derived from
-		 * experimentation (gem_exec_parallel/fds) and has no good
-		 * explanation.
-		 */
-		loops = 1;
-		if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915))
-			loops = 32;
-
-		do {
-			ret = load_pd_dir(rq, ppgtt);
-			if (ret)
-				goto err;
-		} while (--loops);
-
-		if (ppgtt->pd_dirty_engines & engine->mask) {
-			unwind_mm = engine->mask;
-			ppgtt->pd_dirty_engines &= ~unwind_mm;
-			hw_flags = MI_FORCE_RESTORE;
-		}
-	}
-
-	if (rq->hw_context->state) {
-		GEM_BUG_ON(engine->id != RCS0);
-
-		/*
-		 * The kernel context(s) is treated as pure scratch and is not
-		 * expected to retain any state (as we sacrifice it during
-		 * suspend and on resume it may be corrupted). This is ok,
-		 * as nothing actually executes using the kernel context; it
-		 * is purely used for flushing user contexts.
-		 */
-		if (i915_gem_context_is_kernel(rq->gem_context))
-			hw_flags = MI_RESTORE_INHIBIT;
-
-		ret = mi_set_context(rq, hw_flags);
-		if (ret)
-			goto err_mm;
-	}
-
-	if (vm) {
-		ret = engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			goto err_mm;
-
-		ret = flush_pd_dir(rq);
-		if (ret)
-			goto err_mm;
-
-		/*
-		 * Not only do we need a full barrier (post-sync write) after
-		 * invalidating the TLBs, but we need to wait a little bit
-		 * longer. Whether this is merely delaying us, or the
-		 * subsequent flush is a key part of serialising with the
-		 * post-sync op, this extra pass appears vital before a
-		 * mm switch!
-		 */
-		ret = engine->emit_flush(rq, EMIT_INVALIDATE);
-		if (ret)
-			goto err_mm;
-
-		ret = engine->emit_flush(rq, EMIT_FLUSH);
-		if (ret)
-			goto err_mm;
-	}
-
-	ret = remap_l3(rq);
-	if (ret)
-		goto err_mm;
-
-	return 0;
-
-err_mm:
-	if (unwind_mm)
-		i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm;
-err:
-	return ret;
-}
-
-static int ring_request_alloc(struct i915_request *request)
-{
-	int ret;
-
-	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
-	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
-
-	/*
-	 * Flush enough space to reduce the likelihood of waiting after
-	 * we start building the request - in which case we will just
-	 * have to repeat work.
-	 */
-	request->reserved_space += LEGACY_REQUEST_SIZE;
-
-	/* Unconditionally invalidate GPU caches and TLBs. */
-	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
-	if (ret)
-		return ret;
-
-	ret = switch_context(request);
-	if (ret)
-		return ret;
-
-	request->reserved_space -= LEGACY_REQUEST_SIZE;
-	return 0;
-}
-
-static noinline int
-wait_for_space(struct intel_ring *ring,
-	       struct intel_timeline *tl,
-	       unsigned int bytes)
-{
-	struct i915_request *target;
-	long timeout;
-
-	if (intel_ring_update_space(ring) >= bytes)
-		return 0;
-
-	GEM_BUG_ON(list_empty(&tl->requests));
-	list_for_each_entry(target, &tl->requests, link) {
-		if (target->ring != ring)
-			continue;
-
-		/* Would completion of this request free enough space? */
-		if (bytes <= __intel_ring_space(target->postfix,
-						ring->emit, ring->size))
-			break;
-	}
-
-	if (GEM_WARN_ON(&target->link == &tl->requests))
-		return -ENOSPC;
-
-	timeout = i915_request_wait(target,
-				    I915_WAIT_INTERRUPTIBLE,
-				    MAX_SCHEDULE_TIMEOUT);
-	if (timeout < 0)
-		return timeout;
-
-	i915_request_retire_upto(target);
-
-	intel_ring_update_space(ring);
-	GEM_BUG_ON(ring->space < bytes);
-	return 0;
-}
-
-u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
-{
-	struct intel_ring *ring = rq->ring;
-	const unsigned int remain_usable = ring->effective_size - ring->emit;
-	const unsigned int bytes = num_dwords * sizeof(u32);
-	unsigned int need_wrap = 0;
-	unsigned int total_bytes;
-	u32 *cs;
-
-	/* Packets must be qword aligned. */
-	GEM_BUG_ON(num_dwords & 1);
-
-	total_bytes = bytes + rq->reserved_space;
-	GEM_BUG_ON(total_bytes > ring->effective_size);
-
-	if (unlikely(total_bytes > remain_usable)) {
-		const int remain_actual = ring->size - ring->emit;
-
-		if (bytes > remain_usable) {
-			/*
-			 * Not enough space for the basic request. So need to
-			 * flush out the remainder and then wait for
-			 * base + reserved.
-			 */
-			total_bytes += remain_actual;
-			need_wrap = remain_actual | 1;
-		} else  {
-			/*
-			 * The base request will fit but the reserved space
-			 * falls off the end. So we don't need an immediate
-			 * wrap and only need to effectively wait for the
-			 * reserved size from the start of ringbuffer.
-			 */
-			total_bytes = rq->reserved_space + remain_actual;
-		}
-	}
-
-	if (unlikely(total_bytes > ring->space)) {
-		int ret;
-
-		/*
-		 * Space is reserved in the ringbuffer for finalising the
-		 * request, as that cannot be allowed to fail. During request
-		 * finalisation, reserved_space is set to 0 to stop the
-		 * overallocation and the assumption is that then we never need
-		 * to wait (which has the risk of failing with EINTR).
-		 *
-		 * See also i915_request_alloc() and i915_request_add().
-		 */
-		GEM_BUG_ON(!rq->reserved_space);
-
-		ret = wait_for_space(ring, rq->timeline, total_bytes);
-		if (unlikely(ret))
-			return ERR_PTR(ret);
-	}
-
-	if (unlikely(need_wrap)) {
-		need_wrap &= ~1;
-		GEM_BUG_ON(need_wrap > ring->space);
-		GEM_BUG_ON(ring->emit + need_wrap > ring->size);
-		GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
-
-		/* Fill the tail with MI_NOOP */
-		memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
-		ring->space -= need_wrap;
-		ring->emit = 0;
-	}
-
-	GEM_BUG_ON(ring->emit > ring->size - bytes);
-	GEM_BUG_ON(ring->space < bytes);
-	cs = ring->vaddr + ring->emit;
-	GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
-	ring->emit += bytes;
-	ring->space -= bytes;
-
-	return cs;
-}
-
-/* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct i915_request *rq)
-{
-	int num_dwords;
-	void *cs;
-
-	num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
-	if (num_dwords == 0)
-		return 0;
-
-	num_dwords = CACHELINE_DWORDS - num_dwords;
-	GEM_BUG_ON(num_dwords & 1);
-
-	cs = intel_ring_begin(rq, num_dwords);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
-	intel_ring_advance(rq, cs);
-
-	GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
-	return 0;
-}
-
-static void gen6_bsd_submit_request(struct i915_request *request)
-{
-	struct intel_uncore *uncore = request->engine->uncore;
-
-	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-
-       /* Every tail move must follow the sequence below */
-
-	/* Disable notification that the ring is IDLE. The GT
-	 * will then assume that it is busy and bring it out of rc6.
-	 */
-	intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
-			      _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
-
-	/* Clear the context id. Here be magic! */
-	intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
-
-	/* Wait for the ring not to be idle, i.e. for it to wake up. */
-	if (__intel_wait_for_register_fw(uncore,
-					 GEN6_BSD_SLEEP_PSMI_CONTROL,
-					 GEN6_BSD_SLEEP_INDICATOR,
-					 0,
-					 1000, 0, NULL))
-		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
-
-	/* Now that the ring is fully powered up, update the tail */
-	i9xx_submit_request(request);
-
-	/* Let the ring send IDLE messages to the GT again,
-	 * and so let it sleep to conserve power when idle.
-	 */
-	intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
-			      _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
-
-	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
-}
-
-static int mi_flush_dw(struct i915_request *rq, u32 flags)
-{
-	u32 cmd, *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	cmd = MI_FLUSH_DW;
-
-	/*
-	 * We always require a command barrier so that subsequent
-	 * commands, such as breadcrumb interrupts, are strictly ordered
-	 * wrt the contents of the write cache being flushed to memory
-	 * (and thus being coherent from the CPU).
-	 */
-	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
-	/*
-	 * Bspec vol 1c.3 - blitter engine command streamer:
-	 * "If ENABLED, all TLBs will be invalidated once the flush
-	 * operation is complete. This bit is only valid when the
-	 * Post-Sync Operation field is a value of 1h or 3h."
-	 */
-	cmd |= flags;
-
-	*cs++ = cmd;
-	*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
-	*cs++ = 0;
-	*cs++ = MI_NOOP;
-
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
-{
-	return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0);
-}
-
-static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
-{
-	return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
-}
-
-static int
-hsw_emit_bb_start(struct i915_request *rq,
-		  u64 offset, u32 len,
-		  unsigned int dispatch_flags)
-{
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
-		0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW);
-	/* bit0-7 is the length on GEN6+ */
-	*cs++ = offset;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static int
-gen6_emit_bb_start(struct i915_request *rq,
-		   u64 offset, u32 len,
-		   unsigned int dispatch_flags)
-{
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
-		0 : MI_BATCH_NON_SECURE_I965);
-	/* bit0-7 is the length on GEN6+ */
-	*cs++ = offset;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-/* Blitter support (SandyBridge+) */
-
-static int gen6_ring_flush(struct i915_request *rq, u32 mode)
-{
-	return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
-}
-
-static void i9xx_set_default_submission(struct intel_engine_cs *engine)
-{
-	engine->submit_request = i9xx_submit_request;
-	engine->cancel_requests = cancel_requests;
-
-	engine->park = NULL;
-	engine->unpark = NULL;
-}
-
-static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
-{
-	i9xx_set_default_submission(engine);
-	engine->submit_request = gen6_bsd_submit_request;
-}
-
-static void ring_destroy(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
-		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
-
-	intel_engine_cleanup_common(engine);
-
-	intel_ring_unpin(engine->legacy.ring);
-	intel_ring_put(engine->legacy.ring);
-
-	intel_timeline_unpin(engine->legacy.timeline);
-	intel_timeline_put(engine->legacy.timeline);
-
-	kfree(engine);
-}
-
-static void setup_irq(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-
-	if (INTEL_GEN(i915) >= 6) {
-		engine->irq_enable = gen6_irq_enable;
-		engine->irq_disable = gen6_irq_disable;
-	} else if (INTEL_GEN(i915) >= 5) {
-		engine->irq_enable = gen5_irq_enable;
-		engine->irq_disable = gen5_irq_disable;
-	} else if (INTEL_GEN(i915) >= 3) {
-		engine->irq_enable = i9xx_irq_enable;
-		engine->irq_disable = i9xx_irq_disable;
-	} else {
-		engine->irq_enable = i8xx_irq_enable;
-		engine->irq_disable = i8xx_irq_disable;
-	}
-}
-
-static void setup_common(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-
-	/* gen8+ are only supported with execlists */
-	GEM_BUG_ON(INTEL_GEN(i915) >= 8);
-
-	setup_irq(engine);
-
-	engine->destroy = ring_destroy;
-
-	engine->resume = xcs_resume;
-	engine->reset.prepare = reset_prepare;
-	engine->reset.reset = reset_ring;
-	engine->reset.finish = reset_finish;
-
-	engine->cops = &ring_context_ops;
-	engine->request_alloc = ring_request_alloc;
-
-	/*
-	 * Using a global execution timeline; the previous final breadcrumb is
-	 * equivalent to our next initial bread so we can elide
-	 * engine->emit_init_breadcrumb().
-	 */
-	engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
-	if (IS_GEN(i915, 5))
-		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
-
-	engine->set_default_submission = i9xx_set_default_submission;
-
-	if (INTEL_GEN(i915) >= 6)
-		engine->emit_bb_start = gen6_emit_bb_start;
-	else if (INTEL_GEN(i915) >= 4)
-		engine->emit_bb_start = i965_emit_bb_start;
-	else if (IS_I830(i915) || IS_I845G(i915))
-		engine->emit_bb_start = i830_emit_bb_start;
-	else
-		engine->emit_bb_start = i915_emit_bb_start;
-}
-
-static void setup_rcs(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-
-	if (HAS_L3_DPF(i915))
-		engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-
-	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
-
-	if (INTEL_GEN(i915) >= 7) {
-		engine->emit_flush = gen7_render_ring_flush;
-		engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
-	} else if (IS_GEN(i915, 6)) {
-		engine->emit_flush = gen6_render_ring_flush;
-		engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
-	} else if (IS_GEN(i915, 5)) {
-		engine->emit_flush = gen4_render_ring_flush;
-	} else {
-		if (INTEL_GEN(i915) < 4)
-			engine->emit_flush = gen2_render_ring_flush;
-		else
-			engine->emit_flush = gen4_render_ring_flush;
-		engine->irq_enable_mask = I915_USER_INTERRUPT;
-	}
-
-	if (IS_HASWELL(i915))
-		engine->emit_bb_start = hsw_emit_bb_start;
-
-	engine->resume = rcs_resume;
-}
-
-static void setup_vcs(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-
-	if (INTEL_GEN(i915) >= 6) {
-		/* gen6 bsd needs a special wa for tail updates */
-		if (IS_GEN(i915, 6))
-			engine->set_default_submission = gen6_bsd_set_default_submission;
-		engine->emit_flush = gen6_bsd_ring_flush;
-		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
-
-		if (IS_GEN(i915, 6))
-			engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
-		else
-			engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
-	} else {
-		engine->emit_flush = bsd_ring_flush;
-		if (IS_GEN(i915, 5))
-			engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
-		else
-			engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
-	}
-}
-
-static void setup_bcs(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-
-	engine->emit_flush = gen6_ring_flush;
-	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
-
-	if (IS_GEN(i915, 6))
-		engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
-	else
-		engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
-}
-
-static void setup_vecs(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-
-	GEM_BUG_ON(INTEL_GEN(i915) < 7);
-
-	engine->emit_flush = gen6_ring_flush;
-	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
-	engine->irq_enable = hsw_vebox_irq_enable;
-	engine->irq_disable = hsw_vebox_irq_disable;
-
-	engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
-}
-
-int intel_ring_submission_setup(struct intel_engine_cs *engine)
-{
-	setup_common(engine);
-
-	switch (engine->class) {
-	case RENDER_CLASS:
-		setup_rcs(engine);
-		break;
-	case VIDEO_DECODE_CLASS:
-		setup_vcs(engine);
-		break;
-	case COPY_ENGINE_CLASS:
-		setup_bcs(engine);
-		break;
-	case VIDEO_ENHANCEMENT_CLASS:
-		setup_vecs(engine);
-		break;
-	default:
-		MISSING_CASE(engine->class);
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-int intel_ring_submission_init(struct intel_engine_cs *engine)
-{
-	struct intel_timeline *timeline;
-	struct intel_ring *ring;
-	int err;
-
-	timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
-	if (IS_ERR(timeline)) {
-		err = PTR_ERR(timeline);
-		goto err;
-	}
-	GEM_BUG_ON(timeline->has_initial_breadcrumb);
-
-	err = intel_timeline_pin(timeline);
-	if (err)
-		goto err_timeline;
-
-	ring = intel_engine_create_ring(engine, SZ_16K);
-	if (IS_ERR(ring)) {
-		err = PTR_ERR(ring);
-		goto err_timeline_unpin;
-	}
-
-	err = intel_ring_pin(ring);
-	if (err)
-		goto err_ring;
-
-	GEM_BUG_ON(engine->legacy.ring);
-	engine->legacy.ring = ring;
-	engine->legacy.timeline = timeline;
-
-	err = intel_engine_init_common(engine);
-	if (err)
-		goto err_ring_unpin;
-
-	GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
-
-	return 0;
-
-err_ring_unpin:
-	intel_ring_unpin(ring);
-err_ring:
-	intel_ring_put(ring);
-err_timeline_unpin:
-	intel_timeline_unpin(timeline);
-err_timeline:
-	intel_timeline_put(timeline);
-err:
-	intel_engine_cleanup_common(engine);
-	return err;
-}
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
new file mode 100644
index 000000000000..d2a3d935d186
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -0,0 +1,1903 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_irq.h"
+#include "intel_gt_pm_irq.h"
+#include "intel_rps.h"
+#include "intel_sideband.h"
+#include "../../../platform/x86/intel_ips.h"
+
+/*
+ * Lock protecting IPS related data structures
+ */
+static DEFINE_SPINLOCK(mchdev_lock);
+
+static struct intel_gt *rps_to_gt(struct intel_rps *rps)
+{
+	return container_of(rps, struct intel_gt, rps);
+}
+
+static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
+{
+	return rps_to_gt(rps)->i915;
+}
+
+static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
+{
+	return rps_to_gt(rps)->uncore;
+}
+
+static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
+{
+	return mask & ~rps->pm_intrmsk_mbz;
+}
+
+static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
+{
+	intel_uncore_write_fw(uncore, reg, val);
+}
+
+static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
+{
+	u32 mask = 0;
+
+	/* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
+	if (val > rps->min_freq_softlimit)
+		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
+			 GEN6_PM_RP_DOWN_THRESHOLD |
+			 GEN6_PM_RP_DOWN_TIMEOUT);
+
+	if (val < rps->max_freq_softlimit)
+		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
+
+	mask &= rps->pm_events;
+
+	return rps_pm_sanitize_mask(rps, ~mask);
+}
+
+static void rps_reset_ei(struct intel_rps *rps)
+{
+	memset(&rps->ei, 0, sizeof(rps->ei));
+}
+
+static void rps_enable_interrupts(struct intel_rps *rps)
+{
+	struct intel_gt *gt = rps_to_gt(rps);
+
+	rps_reset_ei(rps);
+
+	if (IS_VALLEYVIEW(gt->i915))
+		/* WaGsvRC0ResidencyMethod:vlv */
+		rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+	else
+		rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+				  GEN6_PM_RP_DOWN_THRESHOLD |
+				  GEN6_PM_RP_DOWN_TIMEOUT);
+
+	spin_lock_irq(&gt->irq_lock);
+	gen6_gt_pm_enable_irq(gt, rps->pm_events);
+	spin_unlock_irq(&gt->irq_lock);
+
+	set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq));
+}
+
+static void gen6_rps_reset_interrupts(struct intel_rps *rps)
+{
+	gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
+}
+
+static void gen11_rps_reset_interrupts(struct intel_rps *rps)
+{
+	while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
+		;
+}
+
+static void rps_reset_interrupts(struct intel_rps *rps)
+{
+	struct intel_gt *gt = rps_to_gt(rps);
+
+	spin_lock_irq(&gt->irq_lock);
+	if (INTEL_GEN(gt->i915) >= 11)
+		gen11_rps_reset_interrupts(rps);
+	else
+		gen6_rps_reset_interrupts(rps);
+
+	rps->pm_iir = 0;
+	spin_unlock_irq(&gt->irq_lock);
+}
+
+static void rps_disable_interrupts(struct intel_rps *rps)
+{
+	struct intel_gt *gt = rps_to_gt(rps);
+
+	rps->pm_events = 0;
+
+	set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
+
+	spin_lock_irq(&gt->irq_lock);
+	gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
+	spin_unlock_irq(&gt->irq_lock);
+
+	intel_synchronize_irq(gt->i915);
+
+	/*
+	 * Now that we will not be generating any more work, flush any
+	 * outstanding tasks. As we are called on the RPS idle path,
+	 * we will reset the GPU to minimum frequencies, so the current
+	 * state of the worker can be discarded.
+	 */
+	cancel_work_sync(&rps->work);
+
+	rps_reset_interrupts(rps);
+}
+
+static const struct cparams {
+	u16 i;
+	u16 t;
+	u16 m;
+	u16 c;
+} cparams[] = {
+	{ 1, 1333, 301, 28664 },
+	{ 1, 1066, 294, 24460 },
+	{ 1, 800, 294, 25192 },
+	{ 0, 1333, 276, 27605 },
+	{ 0, 1066, 276, 27605 },
+	{ 0, 800, 231, 23784 },
+};
+
+static void gen5_rps_init(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	u8 fmax, fmin, fstart;
+	u32 rgvmodectl;
+	int c_m, i;
+
+	if (i915->fsb_freq <= 3200)
+		c_m = 0;
+	else if (i915->fsb_freq <= 4800)
+		c_m = 1;
+	else
+		c_m = 2;
+
+	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
+		if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
+			rps->ips.m = cparams[i].m;
+			rps->ips.c = cparams[i].c;
+			break;
+		}
+	}
+
+	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+
+	/* Set up min, max, and cur for interrupt handling */
+	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
+	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
+	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+		MEMMODE_FSTART_SHIFT;
+	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
+			 fmax, fmin, fstart);
+
+	rps->min_freq = fmax;
+	rps->max_freq = fmin;
+
+	rps->idle_freq = rps->min_freq;
+	rps->cur_freq = rps->idle_freq;
+}
+
+static unsigned long
+__ips_chipset_val(struct intel_ips *ips)
+{
+	struct intel_uncore *uncore =
+		rps_to_uncore(container_of(ips, struct intel_rps, ips));
+	unsigned long now = jiffies_to_msecs(jiffies), dt;
+	unsigned long result;
+	u64 total, delta;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	/*
+	 * Prevent division-by-zero if we are asking too fast.
+	 * Also, we don't get interesting results if we are polling
+	 * faster than once in 10ms, so just return the saved value
+	 * in such cases.
+	 */
+	dt = now - ips->last_time1;
+	if (dt <= 10)
+		return ips->chipset_power;
+
+	/* FIXME: handle per-counter overflow */
+	total = intel_uncore_read(uncore, DMIEC);
+	total += intel_uncore_read(uncore, DDREC);
+	total += intel_uncore_read(uncore, CSIEC);
+
+	delta = total - ips->last_count1;
+
+	result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
+
+	ips->last_count1 = total;
+	ips->last_time1 = now;
+
+	ips->chipset_power = result;
+
+	return result;
+}
+
+static unsigned long ips_mch_val(struct intel_uncore *uncore)
+{
+	unsigned int m, x, b;
+	u32 tsfs;
+
+	tsfs = intel_uncore_read(uncore, TSFS);
+	x = intel_uncore_read8(uncore, TR1);
+
+	b = tsfs & TSFS_INTR_MASK;
+	m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
+
+	return m * x / 127 - b;
+}
+
+static int _pxvid_to_vd(u8 pxvid)
+{
+	if (pxvid == 0)
+		return 0;
+
+	if (pxvid >= 8 && pxvid < 31)
+		pxvid = 31;
+
+	return (pxvid + 2) * 125;
+}
+
+static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
+{
+	const int vd = _pxvid_to_vd(pxvid);
+
+	if (INTEL_INFO(i915)->is_mobile)
+		return max(vd - 1125, 0);
+
+	return vd;
+}
+
+static void __gen5_ips_update(struct intel_ips *ips)
+{
+	struct intel_uncore *uncore =
+		rps_to_uncore(container_of(ips, struct intel_rps, ips));
+	u64 now, delta, dt;
+	u32 count;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	now = ktime_get_raw_ns();
+	dt = now - ips->last_time2;
+	do_div(dt, NSEC_PER_MSEC);
+
+	/* Don't divide by 0 */
+	if (dt <= 10)
+		return;
+
+	count = intel_uncore_read(uncore, GFXEC);
+	delta = count - ips->last_count2;
+
+	ips->last_count2 = count;
+	ips->last_time2 = now;
+
+	/* More magic constants... */
+	ips->gfx_power = div_u64(delta * 1181, dt * 10);
+}
+
+static void gen5_rps_update(struct intel_rps *rps)
+{
+	spin_lock_irq(&mchdev_lock);
+	__gen5_ips_update(&rps->ips);
+	spin_unlock_irq(&mchdev_lock);
+}
+
+static bool gen5_rps_set(struct intel_rps *rps, u8 val)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	u16 rgvswctl;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+	if (rgvswctl & MEMCTL_CMD_STS) {
+		DRM_DEBUG("gpu busy, RCS change rejected\n");
+		return false; /* still busy with another command */
+	}
+
+	/* Invert the frequency bin into an ips delay */
+	val = rps->max_freq - val;
+	val = rps->min_freq + val;
+
+	rgvswctl =
+		(MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+		(val << MEMCTL_FREQ_SHIFT) |
+		MEMCTL_SFCAVM;
+	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
+	intel_uncore_posting_read16(uncore, MEMSWCTL);
+
+	rgvswctl |= MEMCTL_CMD_STS;
+	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
+
+	return true;
+}
+
+static unsigned long intel_pxfreq(u32 vidfreq)
+{
+	int div = (vidfreq & 0x3f0000) >> 16;
+	int post = (vidfreq & 0x3000) >> 12;
+	int pre = (vidfreq & 0x7);
+
+	if (!pre)
+		return 0;
+
+	return div * 133333 / (pre << post);
+}
+
+static unsigned int init_emon(struct intel_uncore *uncore)
+{
+	u8 pxw[16];
+	int i;
+
+	/* Disable to program */
+	intel_uncore_write(uncore, ECR, 0);
+	intel_uncore_posting_read(uncore, ECR);
+
+	/* Program energy weights for various events */
+	intel_uncore_write(uncore, SDEW, 0x15040d00);
+	intel_uncore_write(uncore, CSIEW0, 0x007f0000);
+	intel_uncore_write(uncore, CSIEW1, 0x1e220004);
+	intel_uncore_write(uncore, CSIEW2, 0x04000004);
+
+	for (i = 0; i < 5; i++)
+		intel_uncore_write(uncore, PEW(i), 0);
+	for (i = 0; i < 3; i++)
+		intel_uncore_write(uncore, DEW(i), 0);
+
+	/* Program P-state weights to account for frequency power adjustment */
+	for (i = 0; i < 16; i++) {
+		u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
+		unsigned int freq = intel_pxfreq(pxvidfreq);
+		unsigned int vid =
+			(pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
+		unsigned int val;
+
+		val = vid * vid * freq / 1000 * 255;
+		val /= 127 * 127 * 900;
+
+		pxw[i] = val;
+	}
+	/* Render standby states get 0 weight */
+	pxw[14] = 0;
+	pxw[15] = 0;
+
+	for (i = 0; i < 4; i++) {
+		intel_uncore_write(uncore, PXW(i),
+				   pxw[i * 4 + 0] << 24 |
+				   pxw[i * 4 + 1] << 16 |
+				   pxw[i * 4 + 2] <<  8 |
+				   pxw[i * 4 + 3] <<  0);
+	}
+
+	/* Adjust magic regs to magic values (more experimental results) */
+	intel_uncore_write(uncore, OGW0, 0);
+	intel_uncore_write(uncore, OGW1, 0);
+	intel_uncore_write(uncore, EG0, 0x00007f00);
+	intel_uncore_write(uncore, EG1, 0x0000000e);
+	intel_uncore_write(uncore, EG2, 0x000e0000);
+	intel_uncore_write(uncore, EG3, 0x68000300);
+	intel_uncore_write(uncore, EG4, 0x42000000);
+	intel_uncore_write(uncore, EG5, 0x00140031);
+	intel_uncore_write(uncore, EG6, 0);
+	intel_uncore_write(uncore, EG7, 0);
+
+	for (i = 0; i < 8; i++)
+		intel_uncore_write(uncore, PXWL(i), 0);
+
+	/* Enable PMON + select events */
+	intel_uncore_write(uncore, ECR, 0x80000019);
+
+	return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
+}
+
+static bool gen5_rps_enable(struct intel_rps *rps)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	u8 fstart, vstart;
+	u32 rgvmodectl;
+
+	spin_lock_irq(&mchdev_lock);
+
+	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+
+	/* Enable temp reporting */
+	intel_uncore_write16(uncore, PMMISC,
+			     intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
+	intel_uncore_write16(uncore, TSC1,
+			     intel_uncore_read16(uncore, TSC1) | TSE);
+
+	/* 100ms RC evaluation intervals */
+	intel_uncore_write(uncore, RCUPEI, 100000);
+	intel_uncore_write(uncore, RCDNEI, 100000);
+
+	/* Set max/min thresholds to 90ms and 80ms respectively */
+	intel_uncore_write(uncore, RCBMAXAVG, 90000);
+	intel_uncore_write(uncore, RCBMINAVG, 80000);
+
+	intel_uncore_write(uncore, MEMIHYST, 1);
+
+	/* Set up min, max, and cur for interrupt handling */
+	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+		MEMMODE_FSTART_SHIFT;
+
+	vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
+		  PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
+
+	intel_uncore_write(uncore,
+			   MEMINTREN,
+			   MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
+
+	intel_uncore_write(uncore, VIDSTART, vstart);
+	intel_uncore_posting_read(uncore, VIDSTART);
+
+	rgvmodectl |= MEMMODE_SWMODE_EN;
+	intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
+
+	if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
+			     MEMCTL_CMD_STS) == 0, 10))
+		DRM_ERROR("stuck trying to change perf mode\n");
+	mdelay(1);
+
+	gen5_rps_set(rps, rps->cur_freq);
+
+	rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
+	rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
+	rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
+	rps->ips.last_time1 = jiffies_to_msecs(jiffies);
+
+	rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
+	rps->ips.last_time2 = ktime_get_raw_ns();
+
+	spin_unlock_irq(&mchdev_lock);
+
+	rps->ips.corr = init_emon(uncore);
+
+	return true;
+}
+
+static void gen5_rps_disable(struct intel_rps *rps)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	u16 rgvswctl;
+
+	spin_lock_irq(&mchdev_lock);
+
+	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+
+	/* Ack interrupts, disable EFC interrupt */
+	intel_uncore_write(uncore, MEMINTREN,
+			   intel_uncore_read(uncore, MEMINTREN) &
+			   ~MEMINT_EVAL_CHG_EN);
+	intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
+	intel_uncore_write(uncore, DEIER,
+			   intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT);
+	intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT);
+	intel_uncore_write(uncore, DEIMR,
+			   intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT);
+
+	/* Go back to the starting frequency */
+	gen5_rps_set(rps, rps->idle_freq);
+	mdelay(1);
+	rgvswctl |= MEMCTL_CMD_STS;
+	intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
+	mdelay(1);
+
+	spin_unlock_irq(&mchdev_lock);
+}
+
+static u32 rps_limits(struct intel_rps *rps, u8 val)
+{
+	u32 limits;
+
+	/*
+	 * Only set the down limit when we've reached the lowest level to avoid
+	 * getting more interrupts, otherwise leave this clear. This prevents a
+	 * race in the hw when coming out of rc6: There's a tiny window where
+	 * the hw runs at the minimal clock before selecting the desired
+	 * frequency, if the down threshold expires in that window we will not
+	 * receive a down interrupt.
+	 */
+	if (INTEL_GEN(rps_to_i915(rps)) >= 9) {
+		limits = rps->max_freq_softlimit << 23;
+		if (val <= rps->min_freq_softlimit)
+			limits |= rps->min_freq_softlimit << 14;
+	} else {
+		limits = rps->max_freq_softlimit << 24;
+		if (val <= rps->min_freq_softlimit)
+			limits |= rps->min_freq_softlimit << 16;
+	}
+
+	return limits;
+}
+
+static void rps_set_power(struct intel_rps *rps, int new_power)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 threshold_up = 0, threshold_down = 0; /* in % */
+	u32 ei_up = 0, ei_down = 0;
+
+	lockdep_assert_held(&rps->power.mutex);
+
+	if (new_power == rps->power.mode)
+		return;
+
+	/* Note the units here are not exactly 1us, but 1280ns. */
+	switch (new_power) {
+	case LOW_POWER:
+		/* Upclock if more than 95% busy over 16ms */
+		ei_up = 16000;
+		threshold_up = 95;
+
+		/* Downclock if less than 85% busy over 32ms */
+		ei_down = 32000;
+		threshold_down = 85;
+		break;
+
+	case BETWEEN:
+		/* Upclock if more than 90% busy over 13ms */
+		ei_up = 13000;
+		threshold_up = 90;
+
+		/* Downclock if less than 75% busy over 32ms */
+		ei_down = 32000;
+		threshold_down = 75;
+		break;
+
+	case HIGH_POWER:
+		/* Upclock if more than 85% busy over 10ms */
+		ei_up = 10000;
+		threshold_up = 85;
+
+		/* Downclock if less than 60% busy over 32ms */
+		ei_down = 32000;
+		threshold_down = 60;
+		break;
+	}
+
+	/* When byt can survive without system hang with dynamic
+	 * sw freq adjustments, this restriction can be lifted.
+	 */
+	if (IS_VALLEYVIEW(i915))
+		goto skip_hw_write;
+
+	set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up));
+	set(uncore, GEN6_RP_UP_THRESHOLD,
+	    GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100));
+
+	set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down));
+	set(uncore, GEN6_RP_DOWN_THRESHOLD,
+	    GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100));
+
+	set(uncore, GEN6_RP_CONTROL,
+	    (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
+	    GEN6_RP_MEDIA_HW_NORMAL_MODE |
+	    GEN6_RP_MEDIA_IS_GFX |
+	    GEN6_RP_ENABLE |
+	    GEN6_RP_UP_BUSY_AVG |
+	    GEN6_RP_DOWN_IDLE_AVG);
+
+skip_hw_write:
+	rps->power.mode = new_power;
+	rps->power.up_threshold = threshold_up;
+	rps->power.down_threshold = threshold_down;
+}
+
+static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
+{
+	int new_power;
+
+	new_power = rps->power.mode;
+	switch (rps->power.mode) {
+	case LOW_POWER:
+		if (val > rps->efficient_freq + 1 &&
+		    val > rps->cur_freq)
+			new_power = BETWEEN;
+		break;
+
+	case BETWEEN:
+		if (val <= rps->efficient_freq &&
+		    val < rps->cur_freq)
+			new_power = LOW_POWER;
+		else if (val >= rps->rp0_freq &&
+			 val > rps->cur_freq)
+			new_power = HIGH_POWER;
+		break;
+
+	case HIGH_POWER:
+		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
+		    val < rps->cur_freq)
+			new_power = BETWEEN;
+		break;
+	}
+	/* Max/min bins are special */
+	if (val <= rps->min_freq_softlimit)
+		new_power = LOW_POWER;
+	if (val >= rps->max_freq_softlimit)
+		new_power = HIGH_POWER;
+
+	mutex_lock(&rps->power.mutex);
+	if (rps->power.interactive)
+		new_power = HIGH_POWER;
+	rps_set_power(rps, new_power);
+	mutex_unlock(&rps->power.mutex);
+}
+
+void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
+{
+	mutex_lock(&rps->power.mutex);
+	if (interactive) {
+		if (!rps->power.interactive++ && rps->active)
+			rps_set_power(rps, HIGH_POWER);
+	} else {
+		GEM_BUG_ON(!rps->power.interactive);
+		rps->power.interactive--;
+	}
+	mutex_unlock(&rps->power.mutex);
+}
+
+static int gen6_rps_set(struct intel_rps *rps, u8 val)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 swreq;
+
+	if (INTEL_GEN(i915) >= 9)
+		swreq = GEN9_FREQUENCY(val);
+	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+		swreq = HSW_FREQUENCY(val);
+	else
+		swreq = (GEN6_FREQUENCY(val) |
+			 GEN6_OFFSET(0) |
+			 GEN6_AGGRESSIVE_TURBO);
+	set(uncore, GEN6_RPNSWREQ, swreq);
+
+	return 0;
+}
+
+static int vlv_rps_set(struct intel_rps *rps, u8 val)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	int err;
+
+	vlv_punit_get(i915);
+	err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
+	vlv_punit_put(i915);
+
+	return err;
+}
+
+static int rps_set(struct intel_rps *rps, u8 val, bool update)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	int err;
+
+	if (INTEL_GEN(i915) < 6)
+		return 0;
+
+	if (val == rps->last_freq)
+		return 0;
+
+	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+		err = vlv_rps_set(rps, val);
+	else
+		err = gen6_rps_set(rps, val);
+	if (err)
+		return err;
+
+	if (update)
+		gen6_rps_set_thresholds(rps, val);
+	rps->last_freq = val;
+
+	return 0;
+}
+
+void intel_rps_unpark(struct intel_rps *rps)
+{
+	u8 freq;
+
+	if (!rps->enabled)
+		return;
+
+	/*
+	 * Use the user's desired frequency as a guide, but for better
+	 * performance, jump directly to RPe as our starting frequency.
+	 */
+	mutex_lock(&rps->lock);
+	rps->active = true;
+	freq = max(rps->cur_freq, rps->efficient_freq),
+	freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
+	intel_rps_set(rps, freq);
+	rps->last_adj = 0;
+	mutex_unlock(&rps->lock);
+
+	if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+		rps_enable_interrupts(rps);
+
+	if (IS_GEN(rps_to_i915(rps), 5))
+		gen5_rps_update(rps);
+}
+
+void intel_rps_park(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+
+	if (!rps->enabled)
+		return;
+
+	if (INTEL_GEN(i915) >= 6)
+		rps_disable_interrupts(rps);
+
+	rps->active = false;
+	if (rps->last_freq <= rps->idle_freq)
+		return;
+
+	/*
+	 * The punit delays the write of the frequency and voltage until it
+	 * determines the GPU is awake. During normal usage we don't want to
+	 * waste power changing the frequency if the GPU is sleeping (rc6).
+	 * However, the GPU and driver is now idle and we do not want to delay
+	 * switching to minimum voltage (reducing power whilst idle) as we do
+	 * not expect to be woken in the near future and so must flush the
+	 * change by waking the device.
+	 *
+	 * We choose to take the media powerwell (either would do to trick the
+	 * punit into committing the voltage change) as that takes a lot less
+	 * power than the render powerwell.
+	 */
+	intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
+	rps_set(rps, rps->idle_freq, false);
+	intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
+}
+
+void intel_rps_boost(struct i915_request *rq)
+{
+	struct intel_rps *rps = &rq->engine->gt->rps;
+	unsigned long flags;
+
+	if (i915_request_signaled(rq) || !rps->active)
+		return;
+
+	/* Serializes with i915_request_retire() */
+	spin_lock_irqsave(&rq->lock, flags);
+	if (!i915_request_has_waitboost(rq) &&
+	    !dma_fence_is_signaled_locked(&rq->fence)) {
+		set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
+
+		if (!atomic_fetch_inc(&rps->num_waiters) &&
+		    READ_ONCE(rps->cur_freq) < rps->boost_freq)
+			schedule_work(&rps->work);
+
+		atomic_inc(&rps->boosts);
+	}
+	spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+int intel_rps_set(struct intel_rps *rps, u8 val)
+{
+	int err;
+
+	lockdep_assert_held(&rps->lock);
+	GEM_BUG_ON(val > rps->max_freq);
+	GEM_BUG_ON(val < rps->min_freq);
+
+	if (rps->active) {
+		err = rps_set(rps, val, true);
+		if (err)
+			return err;
+
+		/*
+		 * Make sure we continue to get interrupts
+		 * until we hit the minimum or maximum frequencies.
+		 */
+		if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
+			struct intel_uncore *uncore = rps_to_uncore(rps);
+
+			set(uncore,
+			    GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
+
+			set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
+		}
+	}
+
+	rps->cur_freq = val;
+	return 0;
+}
+
+static void gen6_rps_init(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+
+	/* All of these values are in units of 50MHz */
+
+	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
+	if (IS_GEN9_LP(i915)) {
+		u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+
+		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
+		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
+		rps->min_freq = (rp_state_cap >>  0) & 0xff;
+	} else {
+		u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+
+		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
+		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
+		rps->min_freq = (rp_state_cap >> 16) & 0xff;
+	}
+
+	/* hw_max = RP0 until we check for overclocking */
+	rps->max_freq = rps->rp0_freq;
+
+	rps->efficient_freq = rps->rp1_freq;
+	if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
+	    IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+		u32 ddcc_status = 0;
+
+		if (sandybridge_pcode_read(i915,
+					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
+					   &ddcc_status, NULL) == 0)
+			rps->efficient_freq =
+				clamp_t(u8,
+					(ddcc_status >> 8) & 0xff,
+					rps->min_freq,
+					rps->max_freq);
+	}
+
+	if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+		/* Store the frequency values in 16.66 MHZ units, which is
+		 * the natural hardware unit for SKL
+		 */
+		rps->rp0_freq *= GEN9_FREQ_SCALER;
+		rps->rp1_freq *= GEN9_FREQ_SCALER;
+		rps->min_freq *= GEN9_FREQ_SCALER;
+		rps->max_freq *= GEN9_FREQ_SCALER;
+		rps->efficient_freq *= GEN9_FREQ_SCALER;
+	}
+}
+
+static bool rps_reset(struct intel_rps *rps)
+{
+	/* force a reset */
+	rps->power.mode = -1;
+	rps->last_freq = -1;
+
+	if (rps_set(rps, rps->min_freq, true)) {
+		DRM_ERROR("Failed to reset RPS to initial values\n");
+		return false;
+	}
+
+	rps->cur_freq = rps->min_freq;
+	return true;
+}
+
+/* See the Gen9_GT_PM_Programming_Guide doc for the below */
+static bool gen9_rps_enable(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+
+	/* Program defaults and thresholds for RPS */
+	if (IS_GEN(i915, 9))
+		intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
+				      GEN9_FREQUENCY(rps->rp1_freq));
+
+	/* 1 second timeout */
+	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
+			      GT_INTERVAL_FROM_US(i915, 1000000));
+
+	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
+
+	return rps_reset(rps);
+}
+
+static bool gen8_rps_enable(struct intel_rps *rps)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+
+	intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
+			      HSW_FREQUENCY(rps->rp1_freq));
+
+	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
+	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
+			      100000000 / 128); /* 1 second timeout */
+
+	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	return rps_reset(rps);
+}
+
+static bool gen6_rps_enable(struct intel_rps *rps)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+
+	/* Power down if completely idle for over 50ms */
+	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
+	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	return rps_reset(rps);
+}
+
+static int chv_rps_max_freq(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val;
+
+	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+
+	switch (RUNTIME_INFO(i915)->sseu.eu_total) {
+	case 8:
+		/* (2 * 4) config */
+		val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
+		break;
+	case 12:
+		/* (2 * 6) config */
+		val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
+		break;
+	case 16:
+		/* (2 * 8) config */
+	default:
+		/* Setting (2 * 8) Min RP0 for any other combination */
+		val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
+		break;
+	}
+
+	return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static int chv_rps_rpe_freq(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val;
+
+	val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
+	val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
+
+	return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
+}
+
+static int chv_rps_guar_freq(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val;
+
+	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+
+	return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static u32 chv_rps_min_freq(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val;
+
+	val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
+	val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
+
+	return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static bool chv_rps_enable(struct intel_rps *rps)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val;
+
+	/* 1: Program defaults and thresholds for RPS*/
+	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
+	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
+	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
+	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
+	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
+
+	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	/* 2: Enable RPS */
+	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
+			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
+			      GEN6_RP_MEDIA_IS_GFX |
+			      GEN6_RP_ENABLE |
+			      GEN6_RP_UP_BUSY_AVG |
+			      GEN6_RP_DOWN_IDLE_AVG);
+
+	/* Setting Fixed Bias */
+	vlv_punit_get(i915);
+
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
+	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+
+	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+
+	vlv_punit_put(i915);
+
+	/* RPS code assumes GPLL is used */
+	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+	return rps_reset(rps);
+}
+
+static int vlv_rps_guar_freq(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val, rp1;
+
+	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+	rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
+	rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
+
+	return rp1;
+}
+
+static int vlv_rps_max_freq(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val, rp0;
+
+	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
+	/* Clamp to max */
+	rp0 = min_t(u32, rp0, 0xea);
+
+	return rp0;
+}
+
+static int vlv_rps_rpe_freq(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val, rpe;
+
+	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
+	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
+	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
+	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
+
+	return rpe;
+}
+
+static int vlv_rps_min_freq(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val;
+
+	val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
+	/*
+	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
+	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
+	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
+	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
+	 * to make sure it matches what Punit accepts.
+	 */
+	return max_t(u32, val, 0xc0);
+}
+
+static bool vlv_rps_enable(struct intel_rps *rps)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val;
+
+	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
+	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
+	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
+	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
+	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
+
+	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
+			      GEN6_RP_MEDIA_TURBO |
+			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
+			      GEN6_RP_MEDIA_IS_GFX |
+			      GEN6_RP_ENABLE |
+			      GEN6_RP_UP_BUSY_AVG |
+			      GEN6_RP_DOWN_IDLE_CONT);
+
+	vlv_punit_get(i915);
+
+	/* Setting Fixed Bias */
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
+	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+
+	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+
+	vlv_punit_put(i915);
+
+	/* RPS code assumes GPLL is used */
+	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+	return rps_reset(rps);
+}
+
+static unsigned long __ips_gfx_val(struct intel_ips *ips)
+{
+	struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	unsigned long t, corr, state1, corr2, state2;
+	u32 pxvid, ext_v;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
+	pxvid = (pxvid >> 24) & 0x7f;
+	ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
+
+	state1 = ext_v;
+
+	/* Revel in the empirically derived constants */
+
+	/* Correction factor in 1/100000 units */
+	t = ips_mch_val(uncore);
+	if (t > 80)
+		corr = t * 2349 + 135940;
+	else if (t >= 50)
+		corr = t * 964 + 29317;
+	else /* < 50 */
+		corr = t * 301 + 1004;
+
+	corr = corr * 150142 * state1 / 10000 - 78642;
+	corr /= 100000;
+	corr2 = corr * ips->corr;
+
+	state2 = corr2 * state1 / 10000;
+	state2 /= 100; /* convert to mW */
+
+	__gen5_ips_update(ips);
+
+	return ips->gfx_power + state2;
+}
+
+void intel_rps_enable(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+
+	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+	if (IS_CHERRYVIEW(i915))
+		rps->enabled = chv_rps_enable(rps);
+	else if (IS_VALLEYVIEW(i915))
+		rps->enabled = vlv_rps_enable(rps);
+	else if (INTEL_GEN(i915) >= 9)
+		rps->enabled = gen9_rps_enable(rps);
+	else if (INTEL_GEN(i915) >= 8)
+		rps->enabled = gen8_rps_enable(rps);
+	else if (INTEL_GEN(i915) >= 6)
+		rps->enabled = gen6_rps_enable(rps);
+	else if (IS_IRONLAKE_M(i915))
+		rps->enabled = gen5_rps_enable(rps);
+	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+	if (!rps->enabled)
+		return;
+
+	WARN_ON(rps->max_freq < rps->min_freq);
+	WARN_ON(rps->idle_freq > rps->max_freq);
+
+	WARN_ON(rps->efficient_freq < rps->min_freq);
+	WARN_ON(rps->efficient_freq > rps->max_freq);
+}
+
+static void gen6_rps_disable(struct intel_rps *rps)
+{
+	set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
+}
+
+void intel_rps_disable(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+
+	rps->enabled = false;
+
+	if (INTEL_GEN(i915) >= 6)
+		gen6_rps_disable(rps);
+	else if (IS_IRONLAKE_M(i915))
+		gen5_rps_disable(rps);
+}
+
+static int byt_gpu_freq(struct intel_rps *rps, int val)
+{
+	/*
+	 * N = val - 0xb7
+	 * Slow = Fast = GPLL ref * N
+	 */
+	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
+}
+
+static int byt_freq_opcode(struct intel_rps *rps, int val)
+{
+	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
+}
+
+static int chv_gpu_freq(struct intel_rps *rps, int val)
+{
+	/*
+	 * N = val / 2
+	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
+	 */
+	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
+}
+
+static int chv_freq_opcode(struct intel_rps *rps, int val)
+{
+	/* CHV needs even values */
+	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
+}
+
+int intel_gpu_freq(struct intel_rps *rps, int val)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+
+	if (INTEL_GEN(i915) >= 9)
+		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
+					 GEN9_FREQ_SCALER);
+	else if (IS_CHERRYVIEW(i915))
+		return chv_gpu_freq(rps, val);
+	else if (IS_VALLEYVIEW(i915))
+		return byt_gpu_freq(rps, val);
+	else
+		return val * GT_FREQUENCY_MULTIPLIER;
+}
+
+int intel_freq_opcode(struct intel_rps *rps, int val)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+
+	if (INTEL_GEN(i915) >= 9)
+		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
+					 GT_FREQUENCY_MULTIPLIER);
+	else if (IS_CHERRYVIEW(i915))
+		return chv_freq_opcode(rps, val);
+	else if (IS_VALLEYVIEW(i915))
+		return byt_freq_opcode(rps, val);
+	else
+		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
+}
+
+static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+
+	rps->gpll_ref_freq =
+		vlv_get_cck_clock(i915, "GPLL ref",
+				  CCK_GPLL_CLOCK_CONTROL,
+				  i915->czclk_freq);
+
+	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq);
+}
+
+static void vlv_rps_init(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val;
+
+	vlv_iosf_sb_get(i915,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
+	vlv_init_gpll_ref_freq(rps);
+
+	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+	switch ((val >> 6) & 3) {
+	case 0:
+	case 1:
+		i915->mem_freq = 800;
+		break;
+	case 2:
+		i915->mem_freq = 1066;
+		break;
+	case 3:
+		i915->mem_freq = 1333;
+		break;
+	}
+	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+
+	rps->max_freq = vlv_rps_max_freq(rps);
+	rps->rp0_freq = rps->max_freq;
+	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(rps, rps->max_freq),
+			 rps->max_freq);
+
+	rps->efficient_freq = vlv_rps_rpe_freq(rps);
+	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(rps, rps->efficient_freq),
+			 rps->efficient_freq);
+
+	rps->rp1_freq = vlv_rps_guar_freq(rps);
+	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(rps, rps->rp1_freq),
+			 rps->rp1_freq);
+
+	rps->min_freq = vlv_rps_min_freq(rps);
+	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(rps, rps->min_freq),
+			 rps->min_freq);
+
+	vlv_iosf_sb_put(i915,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+}
+
+static void chv_rps_init(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 val;
+
+	vlv_iosf_sb_get(i915,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
+	vlv_init_gpll_ref_freq(rps);
+
+	val = vlv_cck_read(i915, CCK_FUSE_REG);
+
+	switch ((val >> 2) & 0x7) {
+	case 3:
+		i915->mem_freq = 2000;
+		break;
+	default:
+		i915->mem_freq = 1600;
+		break;
+	}
+	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+
+	rps->max_freq = chv_rps_max_freq(rps);
+	rps->rp0_freq = rps->max_freq;
+	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(rps, rps->max_freq),
+			 rps->max_freq);
+
+	rps->efficient_freq = chv_rps_rpe_freq(rps);
+	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(rps, rps->efficient_freq),
+			 rps->efficient_freq);
+
+	rps->rp1_freq = chv_rps_guar_freq(rps);
+	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(rps, rps->rp1_freq),
+			 rps->rp1_freq);
+
+	rps->min_freq = chv_rps_min_freq(rps);
+	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(rps, rps->min_freq),
+			 rps->min_freq);
+
+	vlv_iosf_sb_put(i915,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
+	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
+		   rps->min_freq) & 1,
+		  "Odd GPU freq values\n");
+}
+
+static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
+{
+	ei->ktime = ktime_get_raw();
+	ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
+	ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
+}
+
+static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	const struct intel_rps_ei *prev = &rps->ei;
+	struct intel_rps_ei now;
+	u32 events = 0;
+
+	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+		return 0;
+
+	vlv_c0_read(uncore, &now);
+
+	if (prev->ktime) {
+		u64 time, c0;
+		u32 render, media;
+
+		time = ktime_us_delta(now.ktime, prev->ktime);
+
+		time *= rps_to_i915(rps)->czclk_freq;
+
+		/* Workload can be split between render + media,
+		 * e.g. SwapBuffers being blitted in X after being rendered in
+		 * mesa. To account for this we need to combine both engines
+		 * into our activity counter.
+		 */
+		render = now.render_c0 - prev->render_c0;
+		media = now.media_c0 - prev->media_c0;
+		c0 = max(render, media);
+		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
+
+		if (c0 > time * rps->power.up_threshold)
+			events = GEN6_PM_RP_UP_THRESHOLD;
+		else if (c0 < time * rps->power.down_threshold)
+			events = GEN6_PM_RP_DOWN_THRESHOLD;
+	}
+
+	rps->ei = now;
+	return events;
+}
+
+static void rps_work(struct work_struct *work)
+{
+	struct intel_rps *rps = container_of(work, typeof(*rps), work);
+	struct intel_gt *gt = rps_to_gt(rps);
+	bool client_boost = false;
+	int new_freq, adj, min, max;
+	u32 pm_iir = 0;
+
+	spin_lock_irq(&gt->irq_lock);
+	pm_iir = fetch_and_zero(&rps->pm_iir);
+	client_boost = atomic_read(&rps->num_waiters);
+	spin_unlock_irq(&gt->irq_lock);
+
+	/* Make sure we didn't queue anything we're not going to process. */
+	if ((pm_iir & rps->pm_events) == 0 && !client_boost)
+		goto out;
+
+	mutex_lock(&rps->lock);
+
+	pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
+
+	adj = rps->last_adj;
+	new_freq = rps->cur_freq;
+	min = rps->min_freq_softlimit;
+	max = rps->max_freq_softlimit;
+	if (client_boost)
+		max = rps->max_freq;
+	if (client_boost && new_freq < rps->boost_freq) {
+		new_freq = rps->boost_freq;
+		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
+		if (adj > 0)
+			adj *= 2;
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
+
+		if (new_freq >= rps->max_freq_softlimit)
+			adj = 0;
+	} else if (client_boost) {
+		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
+		if (rps->cur_freq > rps->efficient_freq)
+			new_freq = rps->efficient_freq;
+		else if (rps->cur_freq > rps->min_freq_softlimit)
+			new_freq = rps->min_freq_softlimit;
+		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
+		if (adj < 0)
+			adj *= 2;
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
+
+		if (new_freq <= rps->min_freq_softlimit)
+			adj = 0;
+	} else { /* unknown event */
+		adj = 0;
+	}
+
+	rps->last_adj = adj;
+
+	/*
+	 * Limit deboosting and boosting to keep ourselves at the extremes
+	 * when in the respective power modes (i.e. slowly decrease frequencies
+	 * while in the HIGH_POWER zone and slowly increase frequencies while
+	 * in the LOW_POWER zone). On idle, we will hit the timeout and drop
+	 * to the next level quickly, and conversely if busy we expect to
+	 * hit a waitboost and rapidly switch into max power.
+	 */
+	if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
+	    (adj > 0 && rps->power.mode == LOW_POWER))
+		rps->last_adj = 0;
+
+	/* sysfs frequency interfaces may have snuck in while servicing the
+	 * interrupt
+	 */
+	new_freq += adj;
+	new_freq = clamp_t(int, new_freq, min, max);
+
+	if (intel_rps_set(rps, new_freq)) {
+		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
+		rps->last_adj = 0;
+	}
+
+	mutex_unlock(&rps->lock);
+
+out:
+	spin_lock_irq(&gt->irq_lock);
+	gen6_gt_pm_unmask_irq(gt, rps->pm_events);
+	spin_unlock_irq(&gt->irq_lock);
+}
+
+void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
+{
+	struct intel_gt *gt = rps_to_gt(rps);
+	const u32 events = rps->pm_events & pm_iir;
+
+	lockdep_assert_held(&gt->irq_lock);
+
+	if (unlikely(!events))
+		return;
+
+	gen6_gt_pm_mask_irq(gt, events);
+
+	rps->pm_iir |= events;
+	schedule_work(&rps->work);
+}
+
+void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
+{
+	struct intel_gt *gt = rps_to_gt(rps);
+
+	if (pm_iir & rps->pm_events) {
+		spin_lock(&gt->irq_lock);
+		gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events);
+		rps->pm_iir |= pm_iir & rps->pm_events;
+		schedule_work(&rps->work);
+		spin_unlock(&gt->irq_lock);
+	}
+
+	if (INTEL_GEN(gt->i915) >= 8)
+		return;
+
+	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
+		intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
+
+	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
+		DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
+}
+
+void gen5_rps_irq_handler(struct intel_rps *rps)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	u32 busy_up, busy_down, max_avg, min_avg;
+	u8 new_freq;
+
+	spin_lock(&mchdev_lock);
+
+	intel_uncore_write16(uncore,
+			     MEMINTRSTS,
+			     intel_uncore_read(uncore, MEMINTRSTS));
+
+	intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
+	busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
+	busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
+	max_avg = intel_uncore_read(uncore, RCBMAXAVG);
+	min_avg = intel_uncore_read(uncore, RCBMINAVG);
+
+	/* Handle RCS change request from hw */
+	new_freq = rps->cur_freq;
+	if (busy_up > max_avg)
+		new_freq++;
+	else if (busy_down < min_avg)
+		new_freq--;
+	new_freq = clamp(new_freq,
+			 rps->min_freq_softlimit,
+			 rps->max_freq_softlimit);
+
+	if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq))
+		rps->cur_freq = new_freq;
+
+	spin_unlock(&mchdev_lock);
+}
+
+void intel_rps_init_early(struct intel_rps *rps)
+{
+	mutex_init(&rps->lock);
+	mutex_init(&rps->power.mutex);
+
+	INIT_WORK(&rps->work, rps_work);
+
+	atomic_set(&rps->num_waiters, 0);
+}
+
+void intel_rps_init(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+
+	if (IS_CHERRYVIEW(i915))
+		chv_rps_init(rps);
+	else if (IS_VALLEYVIEW(i915))
+		vlv_rps_init(rps);
+	else if (INTEL_GEN(i915) >= 6)
+		gen6_rps_init(rps);
+	else if (IS_IRONLAKE_M(i915))
+		gen5_rps_init(rps);
+
+	/* Derive initial user preferences/limits from the hardware limits */
+	rps->max_freq_softlimit = rps->max_freq;
+	rps->min_freq_softlimit = rps->min_freq;
+
+	/* After setting max-softlimit, find the overclock max freq */
+	if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
+		u32 params = 0;
+
+		sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
+				       &params, NULL);
+		if (params & BIT(31)) { /* OC supported */
+			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
+					 (rps->max_freq & 0xff) * 50,
+					 (params & 0xff) * 50);
+			rps->max_freq = params & 0xff;
+		}
+	}
+
+	/* Finally allow us to boost to max by default */
+	rps->boost_freq = rps->max_freq;
+	rps->idle_freq = rps->min_freq;
+	rps->cur_freq = rps->idle_freq;
+
+	rps->pm_intrmsk_mbz = 0;
+
+	/*
+	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
+	 * if GEN6_PM_UP_EI_EXPIRED is masked.
+	 *
+	 * TODO: verify if this can be reproduced on VLV,CHV.
+	 */
+	if (INTEL_GEN(i915) <= 7)
+		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
+
+	if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11)
+		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+}
+
+u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 cagf;
+
+	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+		cagf = (rpstat >> 8) & 0xff;
+	else if (INTEL_GEN(i915) >= 9)
+		cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
+	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+		cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
+	else
+		cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
+
+	return cagf;
+}
+
+static u32 read_cagf(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	u32 freq;
+
+	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+		vlv_punit_get(i915);
+		freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+		vlv_punit_put(i915);
+	} else {
+		freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1);
+	}
+
+	return intel_rps_get_cagf(rps, freq);
+}
+
+u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
+{
+	struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm;
+	intel_wakeref_t wakeref;
+	u32 freq = 0;
+
+	with_intel_runtime_pm_if_in_use(rpm, wakeref)
+		freq = intel_gpu_freq(rps, read_cagf(rps));
+
+	return freq;
+}
+
+/* External interface for intel_ips.ko */
+
+static struct drm_i915_private __rcu *ips_mchdev;
+
+/**
+ * Tells the intel_ips driver that the i915 driver is now loaded, if
+ * IPS got loaded first.
+ *
+ * This awkward dance is so that neither module has to depend on the
+ * other in order for IPS to do the appropriate communication of
+ * GPU turbo limits to i915.
+ */
+static void
+ips_ping_for_i915_load(void)
+{
+	void (*link)(void);
+
+	link = symbol_get(ips_link_to_i915_driver);
+	if (link) {
+		link();
+		symbol_put(ips_link_to_i915_driver);
+	}
+}
+
+void intel_rps_driver_register(struct intel_rps *rps)
+{
+	struct intel_gt *gt = rps_to_gt(rps);
+
+	/*
+	 * We only register the i915 ips part with intel-ips once everything is
+	 * set up, to avoid intel-ips sneaking in and reading bogus values.
+	 */
+	if (IS_GEN(gt->i915, 5)) {
+		GEM_BUG_ON(ips_mchdev);
+		rcu_assign_pointer(ips_mchdev, gt->i915);
+		ips_ping_for_i915_load();
+	}
+}
+
+void intel_rps_driver_unregister(struct intel_rps *rps)
+{
+	if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
+		rcu_assign_pointer(ips_mchdev, NULL);
+}
+
+static struct drm_i915_private *mchdev_get(void)
+{
+	struct drm_i915_private *i915;
+
+	rcu_read_lock();
+	i915 = rcu_dereference(ips_mchdev);
+	if (!kref_get_unless_zero(&i915->drm.ref))
+		i915 = NULL;
+	rcu_read_unlock();
+
+	return i915;
+}
+
+/**
+ * i915_read_mch_val - return value for IPS use
+ *
+ * Calculate and return a value for the IPS driver to use when deciding whether
+ * we have thermal and power headroom to increase CPU or GPU power budget.
+ */
+unsigned long i915_read_mch_val(void)
+{
+	struct drm_i915_private *i915;
+	unsigned long chipset_val = 0;
+	unsigned long graphics_val = 0;
+	intel_wakeref_t wakeref;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return 0;
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+		struct intel_ips *ips = &i915->gt.rps.ips;
+
+		spin_lock_irq(&mchdev_lock);
+		chipset_val = __ips_chipset_val(ips);
+		graphics_val = __ips_gfx_val(ips);
+		spin_unlock_irq(&mchdev_lock);
+	}
+
+	drm_dev_put(&i915->drm);
+	return chipset_val + graphics_val;
+}
+EXPORT_SYMBOL_GPL(i915_read_mch_val);
+
+/**
+ * i915_gpu_raise - raise GPU frequency limit
+ *
+ * Raise the limit; IPS indicates we have thermal headroom.
+ */
+bool i915_gpu_raise(void)
+{
+	struct drm_i915_private *i915;
+	struct intel_rps *rps;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	rps = &i915->gt.rps;
+
+	spin_lock_irq(&mchdev_lock);
+	if (rps->max_freq_softlimit < rps->max_freq)
+		rps->max_freq_softlimit++;
+	spin_unlock_irq(&mchdev_lock);
+
+	drm_dev_put(&i915->drm);
+	return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_raise);
+
+/**
+ * i915_gpu_lower - lower GPU frequency limit
+ *
+ * IPS indicates we're close to a thermal limit, so throttle back the GPU
+ * frequency maximum.
+ */
+bool i915_gpu_lower(void)
+{
+	struct drm_i915_private *i915;
+	struct intel_rps *rps;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	rps = &i915->gt.rps;
+
+	spin_lock_irq(&mchdev_lock);
+	if (rps->max_freq_softlimit > rps->min_freq)
+		rps->max_freq_softlimit--;
+	spin_unlock_irq(&mchdev_lock);
+
+	drm_dev_put(&i915->drm);
+	return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_lower);
+
+/**
+ * i915_gpu_busy - indicate GPU business to IPS
+ *
+ * Tell the IPS driver whether or not the GPU is busy.
+ */
+bool i915_gpu_busy(void)
+{
+	struct drm_i915_private *i915;
+	bool ret;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	ret = i915->gt.awake;
+
+	drm_dev_put(&i915->drm);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_busy);
+
+/**
+ * i915_gpu_turbo_disable - disable graphics turbo
+ *
+ * Disable graphics turbo by resetting the max frequency and setting the
+ * current frequency to the default.
+ */
+bool i915_gpu_turbo_disable(void)
+{
+	struct drm_i915_private *i915;
+	struct intel_rps *rps;
+	bool ret;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	rps = &i915->gt.rps;
+
+	spin_lock_irq(&mchdev_lock);
+	rps->max_freq_softlimit = rps->min_freq;
+	ret = gen5_rps_set(&i915->gt.rps, rps->min_freq);
+	spin_unlock_irq(&mchdev_lock);
+
+	drm_dev_put(&i915->drm);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
new file mode 100644
index 000000000000..dfa98194f3b2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RPS_H
+#define INTEL_RPS_H
+
+#include "intel_rps_types.h"
+
+struct i915_request;
+
+void intel_rps_init_early(struct intel_rps *rps);
+void intel_rps_init(struct intel_rps *rps);
+
+void intel_rps_driver_register(struct intel_rps *rps);
+void intel_rps_driver_unregister(struct intel_rps *rps);
+
+void intel_rps_enable(struct intel_rps *rps);
+void intel_rps_disable(struct intel_rps *rps);
+
+void intel_rps_park(struct intel_rps *rps);
+void intel_rps_unpark(struct intel_rps *rps);
+void intel_rps_boost(struct i915_request *rq);
+
+int intel_rps_set(struct intel_rps *rps, u8 val);
+void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive);
+
+int intel_gpu_freq(struct intel_rps *rps, int val);
+int intel_freq_opcode(struct intel_rps *rps, int val);
+u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
+u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
+
+void gen5_rps_irq_handler(struct intel_rps *rps);
+void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
+void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
+
+#endif /* INTEL_RPS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h
new file mode 100644
index 000000000000..c2e279154bd5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -0,0 +1,93 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RPS_TYPES_H
+#define INTEL_RPS_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/ktime.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct intel_ips {
+	u64 last_count1;
+	unsigned long last_time1;
+	unsigned long chipset_power;
+	u64 last_count2;
+	u64 last_time2;
+	unsigned long gfx_power;
+	u8 corr;
+
+	int c, m;
+};
+
+struct intel_rps_ei {
+	ktime_t ktime;
+	u32 render_c0;
+	u32 media_c0;
+};
+
+struct intel_rps {
+	struct mutex lock; /* protects enabling and the worker */
+
+	/*
+	 * work, interrupts_enabled and pm_iir are protected by
+	 * dev_priv->irq_lock
+	 */
+	struct work_struct work;
+	bool enabled;
+	bool active;
+	u32 pm_iir;
+
+	/* PM interrupt bits that should never be masked */
+	u32 pm_intrmsk_mbz;
+	u32 pm_events;
+
+	/* Frequencies are stored in potentially platform dependent multiples.
+	 * In other words, *_freq needs to be multiplied by X to be interesting.
+	 * Soft limits are those which are used for the dynamic reclocking done
+	 * by the driver (raise frequencies under heavy loads, and lower for
+	 * lighter loads). Hard limits are those imposed by the hardware.
+	 *
+	 * A distinction is made for overclocking, which is never enabled by
+	 * default, and is considered to be above the hard limit if it's
+	 * possible at all.
+	 */
+	u8 cur_freq;		/* Current frequency (cached, may not == HW) */
+	u8 last_freq;		/* Last SWREQ frequency */
+	u8 min_freq_softlimit;	/* Minimum frequency permitted by the driver */
+	u8 max_freq_softlimit;	/* Max frequency permitted by the driver */
+	u8 max_freq;		/* Maximum frequency, RP0 if not overclocking */
+	u8 min_freq;		/* AKA RPn. Minimum frequency */
+	u8 boost_freq;		/* Frequency to request when wait boosting */
+	u8 idle_freq;		/* Frequency to request when we are idle */
+	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
+	u8 rp1_freq;		/* "less than" RP0 power/freqency */
+	u8 rp0_freq;		/* Non-overclocked max frequency. */
+	u16 gpll_ref_freq;	/* vlv/chv GPLL reference frequency */
+
+	int last_adj;
+
+	struct {
+		struct mutex mutex;
+
+		enum { LOW_POWER, BETWEEN, HIGH_POWER } mode;
+		unsigned int interactive;
+
+		u8 up_threshold; /* Current %busy required to uplock */
+		u8 down_threshold; /* Current %busy required to downclock */
+	} power;
+
+	atomic_t num_waiters;
+	atomic_t boosts;
+
+	/* manual wa residency calculations */
+	struct intel_rps_ei ei;
+	struct intel_ips ips;
+};
+
+#endif /* INTEL_RPS_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 6bf2d87da109..74f793423231 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -8,6 +8,19 @@
 #include "intel_lrc_reg.h"
 #include "intel_sseu.h"
 
+void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
+			 u8 max_subslices, u8 max_eus_per_subslice)
+{
+	sseu->max_slices = max_slices;
+	sseu->max_subslices = max_subslices;
+	sseu->max_eus_per_subslice = max_eus_per_subslice;
+
+	sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
+	GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
+	sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
+	GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
+}
+
 unsigned int
 intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
 {
@@ -19,10 +32,32 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
 	return total;
 }
 
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
+{
+	int i, offset = slice * sseu->ss_stride;
+	u32 mask = 0;
+
+	GEM_BUG_ON(slice >= sseu->max_slices);
+
+	for (i = 0; i < sseu->ss_stride; i++)
+		mask |= (u32)sseu->subslice_mask[offset + i] <<
+			i * BITS_PER_BYTE;
+
+	return mask;
+}
+
+void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
+			      u32 ss_mask)
+{
+	int offset = slice * sseu->ss_stride;
+
+	memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride);
+}
+
 unsigned int
 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
 {
-	return hweight8(sseu->subslice_mask[slice]);
+	return hweight32(intel_sseu_get_subslices(sseu, slice));
 }
 
 u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index b50d0401a4e2..d1d225204f09 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -10,15 +10,21 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 
+#include "i915_gem.h"
+
 struct drm_i915_private;
 
 #define GEN_MAX_SLICES		(6) /* CNL upper bound */
 #define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
 #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
+#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
+#define GEN_MAX_EUS		(16) /* TGL upper bound */
+#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
 
 struct sseu_dev_info {
 	u8 slice_mask;
-	u8 subslice_mask[GEN_MAX_SLICES];
+	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
+	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
 	u16 eu_total;
 	u8 eu_per_subslice;
 	u8 min_eu_in_pool;
@@ -33,11 +39,8 @@ struct sseu_dev_info {
 	u8 max_subslices;
 	u8 max_eus_per_subslice;
 
-	/* We don't have more than 8 eus per subslice at the moment and as we
-	 * store eus enabled using bits, no need to multiply by eus per
-	 * subslice.
-	 */
-	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
+	u8 ss_stride;
+	u8 eu_stride;
 };
 
 /*
@@ -63,12 +66,34 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
 	return value;
 }
 
+static inline bool
+intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
+			int subslice)
+{
+	u8 mask;
+	int ss_idx = subslice / BITS_PER_BYTE;
+
+	GEM_BUG_ON(ss_idx >= sseu->ss_stride);
+
+	mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx];
+
+	return mask & BIT(subslice % BITS_PER_BYTE);
+}
+
+void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
+			 u8 max_subslices, u8 max_eus_per_subslice);
+
 unsigned int
 intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
 
 unsigned int
 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
 
+u32  intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
+
+void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
+			      u32 ss_mask);
+
 u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
 			 const struct intel_sseu *req_sseu);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 9cb01d9828f1..87716529cd2f 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -4,17 +4,20 @@
  * Copyright © 2016-2018 Intel Corporation
  */
 
-#include "gt/intel_gt_types.h"
-
 #include "i915_drv.h"
 
 #include "i915_active.h"
 #include "i915_syncmap.h"
-#include "gt/intel_timeline.h"
+#include "intel_gt.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
 
 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
 
+#define CACHELINE_BITS 6
+#define CACHELINE_FREE CACHELINE_BITS
+
 struct intel_timeline_hwsp {
 	struct intel_gt *gt;
 	struct intel_gt_timelines *gt_timelines;
@@ -23,14 +26,6 @@ struct intel_timeline_hwsp {
 	u64 free_bitmap;
 };
 
-struct intel_timeline_cacheline {
-	struct i915_active active;
-	struct intel_timeline_hwsp *hwsp;
-	void *vaddr;
-#define CACHELINE_BITS 6
-#define CACHELINE_FREE CACHELINE_BITS
-};
-
 static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
 {
 	struct drm_i915_private *i915 = gt->i915;
@@ -133,9 +128,10 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
 
 	i915_active_fini(&cl->active);
-	kfree(cl);
+	kfree_rcu(cl, rcu);
 }
 
+__i915_active_call
 static void __cacheline_retire(struct i915_active *active)
 {
 	struct intel_timeline_cacheline *cl =
@@ -177,8 +173,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
 	cl->hwsp = hwsp;
 	cl->vaddr = page_pack_bits(vaddr, cacheline);
 
-	i915_active_init(hwsp->gt->i915, &cl->active,
-			 __cacheline_active, __cacheline_retire);
+	i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
 
 	return cl;
 }
@@ -254,7 +249,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+	INIT_ACTIVE_FENCE(&timeline->last_request);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
@@ -262,7 +257,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
 	return 0;
 }
 
-static void timelines_init(struct intel_gt *gt)
+void intel_gt_init_timelines(struct intel_gt *gt)
 {
 	struct intel_gt_timelines *timelines = &gt->timelines;
 
@@ -273,15 +268,11 @@ static void timelines_init(struct intel_gt *gt)
 	INIT_LIST_HEAD(&timelines->hwsp_free_list);
 }
 
-void intel_timelines_init(struct drm_i915_private *i915)
-{
-	timelines_init(&i915->gt);
-}
-
 void intel_timeline_fini(struct intel_timeline *timeline)
 {
 	GEM_BUG_ON(atomic_read(&timeline->pin_count));
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(timeline->retire);
 
 	if (timeline->hwsp_cacheline)
 		cacheline_free(timeline->hwsp_cacheline);
@@ -337,34 +328,51 @@ int intel_timeline_pin(struct intel_timeline *tl)
 void intel_timeline_enter(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
-	unsigned long flags;
 
+	/*
+	 * Pretend we are serialised by the timeline->mutex.
+	 *
+	 * While generally true, there are a few exceptions to the rule
+	 * for the engine->kernel_context being used to manage power
+	 * transitions. As the engine_park may be called from under any
+	 * timeline, it uses the power mutex as a global serialisation
+	 * lock to prevent any other request entering its timeline.
+	 *
+	 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
+	 *
+	 * However, intel_gt_retire_request() does not know which engine
+	 * it is retiring along and so cannot partake in the engine-pm
+	 * barrier, and there we use the tl->active_count as a means to
+	 * pin the timeline in the active_list while the locks are dropped.
+	 * Ergo, as that is outside of the engine-pm barrier, we need to
+	 * use atomic to manipulate tl->active_count.
+	 */
 	lockdep_assert_held(&tl->mutex);
 
-	GEM_BUG_ON(!atomic_read(&tl->pin_count));
-	if (tl->active_count++)
+	if (atomic_add_unless(&tl->active_count, 1, 0))
 		return;
-	GEM_BUG_ON(!tl->active_count); /* overflow? */
 
-	spin_lock_irqsave(&timelines->lock, flags);
-	list_add(&tl->link, &timelines->active_list);
-	spin_unlock_irqrestore(&timelines->lock, flags);
+	spin_lock(&timelines->lock);
+	if (!atomic_fetch_inc(&tl->active_count))
+		list_add_tail(&tl->link, &timelines->active_list);
+	spin_unlock(&timelines->lock);
 }
 
 void intel_timeline_exit(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
-	unsigned long flags;
 
+	/* See intel_timeline_enter() */
 	lockdep_assert_held(&tl->mutex);
 
-	GEM_BUG_ON(!tl->active_count);
-	if (--tl->active_count)
+	GEM_BUG_ON(!atomic_read(&tl->active_count));
+	if (atomic_add_unless(&tl->active_count, -1, 1))
 		return;
 
-	spin_lock_irqsave(&timelines->lock, flags);
-	list_del(&tl->link);
-	spin_unlock_irqrestore(&timelines->lock, flags);
+	spin_lock(&timelines->lock);
+	if (atomic_dec_and_test(&tl->active_count))
+		list_del(&tl->link);
+	spin_unlock(&timelines->lock);
 
 	/*
 	 * Since this timeline is idle, all bariers upon which we were waiting
@@ -442,7 +450,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 	 * free it after the current request is retired, which ensures that
 	 * all writes into the cacheline from previous requests are complete.
 	 */
-	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
+	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
 	if (err)
 		goto err_cacheline;
 
@@ -493,35 +501,42 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
 static int cacheline_ref(struct intel_timeline_cacheline *cl,
 			 struct i915_request *rq)
 {
-	return i915_active_ref(&cl->active, rq->timeline, rq);
+	return i915_active_add_request(&cl->active, rq);
 }
 
 int intel_timeline_read_hwsp(struct i915_request *from,
 			     struct i915_request *to,
 			     u32 *hwsp)
 {
-	struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
-	struct intel_timeline *tl = from->timeline;
+	struct intel_timeline_cacheline *cl;
 	int err;
 
-	GEM_BUG_ON(to->timeline == tl);
-
-	mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
-	err = i915_request_completed(from);
-	if (!err)
-		err = cacheline_ref(cl, to);
-	if (!err) {
-		if (likely(cl == tl->hwsp_cacheline)) {
-			*hwsp = tl->hwsp_offset;
-		} else { /* across a seqno wrap, recover the original offset */
-			*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
-				ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
-				CACHELINE_BYTES;
-		}
-	}
-	mutex_unlock(&tl->mutex);
+	GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
 
+	rcu_read_lock();
+	cl = rcu_dereference(from->hwsp_cacheline);
+	if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
+		goto unlock; /* seqno wrapped and completed! */
+	if (unlikely(i915_request_completed(from)))
+		goto release;
+	rcu_read_unlock();
+
+	err = cacheline_ref(cl, to);
+	if (err)
+		goto out;
+
+	*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
+		ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
+
+out:
+	i915_active_release(&cl->active);
 	return err;
+
+release:
+	i915_active_release(&cl->active);
+unlock:
+	rcu_read_unlock();
+	return 1;
 }
 
 void intel_timeline_unpin(struct intel_timeline *tl)
@@ -541,10 +556,10 @@ void __intel_timeline_free(struct kref *kref)
 		container_of(kref, typeof(*timeline), kref);
 
 	intel_timeline_fini(timeline);
-	kfree(timeline);
+	kfree_rcu(timeline, rcu);
 }
 
-static void timelines_fini(struct intel_gt *gt)
+void intel_gt_fini_timelines(struct intel_gt *gt)
 {
 	struct intel_gt_timelines *timelines = &gt->timelines;
 
@@ -552,11 +567,6 @@ static void timelines_fini(struct intel_gt *gt)
 	GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
 }
 
-void intel_timelines_fini(struct drm_i915_private *i915)
-{
-	timelines_fini(&i915->gt);
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "gt/selftests/mock_timeline.c"
 #include "gt/selftest_timeline.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index f583af1ba18d..f5b7eade3809 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -88,7 +88,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
 			     struct i915_request *until,
 			     u32 *hwsp_offset);
 
-void intel_timelines_init(struct drm_i915_private *i915);
-void intel_timelines_fini(struct drm_i915_private *i915);
+void intel_gt_init_timelines(struct intel_gt *gt);
+void intel_gt_fini_timelines(struct intel_gt *gt);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 2b1baf2fcc8e..02181c5020db 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -10,14 +10,15 @@
 #include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/rcupdate.h>
 #include <linux/types.h>
 
 #include "i915_active_types.h"
 
-struct drm_i915_private;
 struct i915_vma;
-struct intel_timeline_cacheline;
 struct i915_syncmap;
+struct intel_gt;
+struct intel_timeline_hwsp;
 
 struct intel_timeline {
 	u64 fence_context;
@@ -42,7 +43,7 @@ struct intel_timeline {
 	 * from the intel_context caller plus internal atomicity.
 	 */
 	atomic_t pin_count;
-	unsigned int active_count;
+	atomic_t active_count;
 
 	const u32 *hwsp_seqno;
 	struct i915_vma *hwsp_ggtt;
@@ -58,12 +59,16 @@ struct intel_timeline {
 	 */
 	struct list_head requests;
 
-	/* Contains an RCU guarded pointer to the last request. No reference is
+	/*
+	 * Contains an RCU guarded pointer to the last request. No reference is
 	 * held to the request, users must carefully acquire a reference to
-	 * the request using i915_active_request_get_request_rcu(), or hold the
-	 * struct_mutex.
+	 * the request using i915_active_fence_get(), or manage the RCU
+	 * protection themselves (cf the i915_active_fence API).
 	 */
-	struct i915_active_request last_request;
+	struct i915_active_fence last_request;
+
+	/** A chain of completed timelines ready for early retirement. */
+	struct intel_timeline *retire;
 
 	/**
 	 * We track the most recent seqno that we wait on in every context so
@@ -80,6 +85,16 @@ struct intel_timeline {
 	struct intel_gt *gt;
 
 	struct kref kref;
+	struct rcu_head rcu;
+};
+
+struct intel_timeline_cacheline {
+	struct i915_active active;
+
+	struct intel_timeline_hwsp *hwsp;
+	void *vaddr;
+
+	struct rcu_head rcu;
 };
 
 #endif /* __I915_TIMELINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 5f6ec2fd29a0..4e292d4bf7b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -6,7 +6,9 @@
 
 #include "i915_drv.h"
 #include "intel_context.h"
+#include "intel_engine_pm.h"
 #include "intel_gt.h"
+#include "intel_ring.h"
 #include "intel_workarounds.h"
 
 /**
@@ -145,21 +147,27 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
 	}
 }
 
-static void
-wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
-		   u32 val)
+static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+		   u32 val, u32 read_mask)
 {
 	struct i915_wa wa = {
 		.reg  = reg,
 		.mask = mask,
 		.val  = val,
-		.read = mask,
+		.read = read_mask,
 	};
 
 	_wa_add(wal, &wa);
 }
 
 static void
+wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+		   u32 val)
+{
+	wa_add(wal, reg, mask, val, mask);
+}
+
+static void
 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
 	wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
@@ -246,7 +254,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
 
 	/* WaDisableDopClockGating:bdw
 	 *
-	 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
+	 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
 	 * to disable EUTC clock gating.
 	 */
 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
@@ -567,6 +575,24 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
 static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
 				     struct i915_wa_list *wal)
 {
+	u32 val;
+
+	/* Wa_1409142259:tgl */
+	WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
+			  GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+
+	/* Wa_1604555607:tgl */
+	val = intel_uncore_read(engine->uncore, FF_MODE2);
+	val &= ~FF_MODE2_TDS_TIMER_MASK;
+	val |= FF_MODE2_TDS_TIMER_128;
+	/*
+	 * FIXME: FF_MODE2 register is not readable till TGL B0. We can
+	 * enable verification of WA from the later steppings, which enables
+	 * the read of FF_MODE2.
+	 */
+	wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val,
+	       IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 :
+			    FF_MODE2_TDS_TIMER_MASK);
 }
 
 static void
@@ -796,11 +822,10 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 	}
 
 	slice = fls(sseu->slice_mask) - 1;
-	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
-	subslice = fls(l3_en & sseu->subslice_mask[slice]);
+	subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
 	if (!subslice) {
 		DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
-			 sseu->subslice_mask[slice], l3_en);
+			 intel_sseu_get_subslices(sseu, slice), l3_en);
 		subslice = fls(l3_en);
 		WARN_ON(!subslice);
 	}
@@ -890,11 +915,27 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 	wa_write_or(wal,
 		    GAMT_CHKN_BIT_REG,
 		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
+
+	/* Wa_1607087056:icl */
+	wa_write_or(wal,
+		    SLICE_UNIT_LEVEL_CLKGATE,
+		    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 }
 
 static void
 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 {
+	/* Wa_1409420604:tgl */
+	if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
+		wa_write_or(wal,
+			    SUBSLICE_UNIT_LEVEL_CLKGATE2,
+			    CPSSUNIT_CLKGATE_DIS);
+
+	/* Wa_1409180338:tgl */
+	if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
+		wa_write_or(wal,
+			    SLICE_UNIT_LEVEL_CLKGATE,
+			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 }
 
 static void
@@ -1197,6 +1238,26 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
 
 static void tgl_whitelist_build(struct intel_engine_cs *engine)
 {
+	struct i915_wa_list *w = &engine->whitelist;
+
+	switch (engine->class) {
+	case RENDER_CLASS:
+		/*
+		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
+		 *
+		 * This covers 4 registers which are next to one another :
+		 *   - PS_INVOCATION_COUNT
+		 *   - PS_INVOCATION_COUNT_UDW
+		 *   - PS_DEPTH_COUNT
+		 *   - PS_DEPTH_COUNT_UDW
+		 */
+		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
+				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
+				  RING_FORCE_TO_NONPRIV_RANGE_4);
+		break;
+	default:
+		break;
+	}
 }
 
 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
@@ -1258,6 +1319,34 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
 	struct drm_i915_private *i915 = engine->i915;
 
+	if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
+		/* Wa_1606700617:tgl */
+		wa_masked_en(wal,
+			     GEN9_CS_DEBUG_MODE1,
+			     FF_DOP_CLOCK_GATE_DISABLE);
+
+		/* Wa_1607138336:tgl */
+		wa_write_or(wal,
+			    GEN9_CTX_PREEMPT_REG,
+			    GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
+
+		/* Wa_1607030317:tgl */
+		/* Wa_1607186500:tgl */
+		/* Wa_1607297627:tgl */
+		wa_masked_en(wal,
+			     GEN6_RC_SLEEP_PSMI_CONTROL,
+			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     GEN8_RC_SEMA_IDLE_MSG_DISABLE);
+
+		/*
+		 * Wa_1606679103:tgl
+		 * (see also Wa_1606682166:icl)
+		 */
+		wa_write_or(wal,
+			    GEN7_SARCHKMD,
+			    GEN7_DISABLE_SAMPLER_PREFETCH);
+	}
+
 	if (IS_GEN(i915, 11)) {
 		/* This is not an Wa. Enable for better image quality */
 		wa_masked_en(wal,
@@ -1452,7 +1541,7 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset)
 	 * which only controls CPU initiated MMIO. Routing does not
 	 * work for CS access so we cannot verify them on this path.
 	 */
-	if (INTEL_GEN(i915) >= 8 && (offset >= 0xb100 && offset <= 0xb3ff))
+	if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff))
 		return true;
 
 	return false;
@@ -1515,7 +1604,9 @@ static int engine_wa_list_verify(struct intel_context *ce,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	intel_engine_pm_get(ce->engine);
 	rq = intel_context_create_request(ce);
+	intel_engine_pm_put(ce->engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_vma;
@@ -1525,16 +1616,17 @@ static int engine_wa_list_verify(struct intel_context *ce,
 	if (err)
 		goto err_vma;
 
+	i915_request_get(rq);
 	i915_request_add(rq);
 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 		err = -ETIME;
-		goto err_vma;
+		goto err_rq;
 	}
 
 	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
 	if (IS_ERR(results)) {
 		err = PTR_ERR(results);
-		goto err_vma;
+		goto err_rq;
 	}
 
 	err = 0;
@@ -1548,6 +1640,8 @@ static int engine_wa_list_verify(struct intel_context *ce,
 
 	i915_gem_object_unpin_map(vma->obj);
 
+err_rq:
+	i915_request_put(rq);
 err_vma:
 	i915_vma_unpin(vma);
 	i915_vma_put(vma);
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 5d43cbc3f345..a560b7eee2cd 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -23,6 +23,7 @@
  */
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "intel_context.h"
@@ -76,7 +77,7 @@ static void advance(struct i915_request *request)
 	i915_request_mark_complete(request);
 	GEM_BUG_ON(!i915_request_completed(request));
 
-	intel_engine_queue_breadcrumbs(request->engine);
+	intel_engine_signal_breadcrumbs(request->engine);
 }
 
 static void hw_delay_complete(struct timer_list *t)
@@ -148,7 +149,11 @@ static int mock_context_alloc(struct intel_context *ce)
 
 static int mock_context_pin(struct intel_context *ce)
 {
-	return intel_context_active_acquire(ce);
+	return 0;
+}
+
+static void mock_context_reset(struct intel_context *ce)
+{
 }
 
 static const struct intel_context_ops mock_context_ops = {
@@ -160,6 +165,7 @@ static const struct intel_context_ops mock_context_ops = {
 	.enter = intel_context_enter_engine,
 	.exit = intel_context_exit_engine,
 
+	.reset = mock_context_reset,
 	.destroy = mock_context_destroy,
 };
 
@@ -206,16 +212,12 @@ static void mock_reset_prepare(struct intel_engine_cs *engine)
 {
 }
 
-static void mock_reset(struct intel_engine_cs *engine, bool stalled)
+static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
 	GEM_BUG_ON(stalled);
 }
 
-static void mock_reset_finish(struct intel_engine_cs *engine)
-{
-}
-
-static void mock_cancel_requests(struct intel_engine_cs *engine)
+static void mock_reset_cancel(struct intel_engine_cs *engine)
 {
 	struct i915_request *request;
 	unsigned long flags;
@@ -233,6 +235,24 @@ static void mock_cancel_requests(struct intel_engine_cs *engine)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
+static void mock_reset_finish(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_engine_release(struct intel_engine_cs *engine)
+{
+	struct mock_engine *mock =
+		container_of(engine, typeof(*mock), base);
+
+	GEM_BUG_ON(timer_pending(&mock->hw_delay));
+
+	intel_context_unpin(engine->kernel_context);
+	intel_context_put(engine->kernel_context);
+
+	intel_engine_fini_retire(engine);
+	intel_engine_fini_breadcrumbs(engine);
+}
+
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 				    const char *name,
 				    int id)
@@ -240,6 +260,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	struct mock_engine *engine;
 
 	GEM_BUG_ON(id >= I915_NUM_ENGINES);
+	GEM_BUG_ON(!i915->gt.uncore);
 
 	engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL);
 	if (!engine)
@@ -248,9 +269,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	/* minimal engine setup for requests */
 	engine->base.i915 = i915;
 	engine->base.gt = &i915->gt;
+	engine->base.uncore = i915->gt.uncore;
 	snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
 	engine->base.id = id;
 	engine->base.mask = BIT(id);
+	engine->base.legacy_idx = INVALID_ENGINE;
 	engine->base.instance = id;
 	engine->base.status_page.addr = (void *)(engine + 1);
 
@@ -261,9 +284,14 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.submit_request = mock_submit_request;
 
 	engine->base.reset.prepare = mock_reset_prepare;
-	engine->base.reset.reset = mock_reset;
+	engine->base.reset.rewind = mock_reset_rewind;
+	engine->base.reset.cancel = mock_reset_cancel;
 	engine->base.reset.finish = mock_reset_finish;
-	engine->base.cancel_requests = mock_cancel_requests;
+
+	engine->base.release = mock_engine_release;
+
+	i915->gt.engine[id] = &engine->base;
+	i915->gt.engine_class[0][id] = &engine->base;
 
 	/* fake hw queue */
 	spin_lock_init(&engine->hw_lock);
@@ -283,6 +311,7 @@ int mock_engine_init(struct intel_engine_cs *engine)
 	intel_engine_init_breadcrumbs(engine);
 	intel_engine_init_execlists(engine);
 	intel_engine_init__pm(engine);
+	intel_engine_init_retire(engine);
 	intel_engine_pool_init(&engine->pool);
 
 	ce = create_kernel_context(engine);
@@ -314,18 +343,3 @@ void mock_engine_flush(struct intel_engine_cs *engine)
 void mock_engine_reset(struct intel_engine_cs *engine)
 {
 }
-
-void mock_engine_free(struct intel_engine_cs *engine)
-{
-	struct mock_engine *mock =
-		container_of(engine, typeof(*mock), base);
-
-	GEM_BUG_ON(timer_pending(&mock->hw_delay));
-
-	intel_context_unpin(engine->kernel_context);
-	intel_context_put(engine->kernel_context);
-
-	intel_engine_fini_breadcrumbs(engine);
-
-	kfree(engine);
-}
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 9d1ea26c7a2d..e874dfaa5316 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -5,6 +5,7 @@
  */
 
 #include "i915_selftest.h"
+#include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 
@@ -14,22 +15,28 @@
 
 static int request_sync(struct i915_request *rq)
 {
+	struct intel_timeline *tl = i915_request_timeline(rq);
 	long timeout;
 	int err = 0;
 
+	intel_timeline_get(tl);
 	i915_request_get(rq);
 
-	i915_request_add(rq);
+	/* Opencode i915_request_add() so we can keep the timeline locked. */
+	__i915_request_commit(rq);
+	__i915_request_queue(rq, NULL);
+
 	timeout = i915_request_wait(rq, 0, HZ / 10);
-	if (timeout < 0) {
+	if (timeout < 0)
 		err = timeout;
-	} else {
-		mutex_lock(&rq->timeline->mutex);
+	else
 		i915_request_retire_upto(rq);
-		mutex_unlock(&rq->timeline->mutex);
-	}
+
+	lockdep_unpin_lock(&tl->mutex, rq->cookie);
+	mutex_unlock(&tl->mutex);
 
 	i915_request_put(rq);
+	intel_timeline_put(tl);
 
 	return err;
 }
@@ -44,14 +51,12 @@ static int context_sync(struct intel_context *ce)
 		struct i915_request *rq;
 		long timeout;
 
-		rcu_read_lock();
-		rq = rcu_dereference(tl->last_request.request);
-		if (rq)
-			rq = i915_request_get_rcu(rq);
-		rcu_read_unlock();
-		if (!rq)
+		if (list_empty(&tl->requests))
 			break;
 
+		rq = list_last_entry(&tl->requests, typeof(*rq), link);
+		i915_request_get(rq);
+
 		timeout = i915_request_wait(rq, 0, HZ / 10);
 		if (timeout < 0)
 			err = timeout;
@@ -65,15 +70,14 @@ static int context_sync(struct intel_context *ce)
 	return err;
 }
 
-static int __live_context_size(struct intel_engine_cs *engine,
-			       struct i915_gem_context *fixme)
+static int __live_context_size(struct intel_engine_cs *engine)
 {
 	struct intel_context *ce;
 	struct i915_request *rq;
 	void *vaddr;
 	int err;
 
-	ce = intel_context_create(fixme, engine);
+	ce = intel_context_create(engine);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
@@ -101,9 +105,6 @@ static int __live_context_size(struct intel_engine_cs *engine,
 	 *
 	 * TLDR; this overlaps with the execlists redzone.
 	 */
-	if (HAS_EXECLISTS(engine->i915))
-		vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
-
 	vaddr += engine->context_size - I915_GTT_PAGE_SIZE;
 	memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
 
@@ -119,7 +120,7 @@ static int __live_context_size(struct intel_engine_cs *engine,
 		goto err_unpin;
 
 	/* Force the context switch */
-	rq = i915_request_create(engine->kernel_context);
+	rq = intel_engine_create_kernel_request(engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
@@ -144,7 +145,6 @@ static int live_context_size(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
-	struct i915_gem_context *fixme;
 	enum intel_engine_id id;
 	int err = 0;
 
@@ -153,15 +153,7 @@ static int live_context_size(void *arg)
 	 * HW tries to write past the end of one.
 	 */
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
-	fixme = kernel_context(gt->i915);
-	if (IS_ERR(fixme)) {
-		err = PTR_ERR(fixme);
-		goto unlock;
-	}
-
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct {
 			struct drm_i915_gem_object *state;
 			void *pinned;
@@ -185,7 +177,7 @@ static int live_context_size(void *arg)
 		/* Overlaps with the execlists redzone */
 		engine->context_size += I915_GTT_PAGE_SIZE;
 
-		err = __live_context_size(engine, fixme);
+		err = __live_context_size(engine);
 
 		engine->context_size -= I915_GTT_PAGE_SIZE;
 
@@ -198,15 +190,12 @@ static int live_context_size(void *arg)
 			break;
 	}
 
-	kernel_context_close(fixme);
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	return err;
 }
 
-static int __live_active_context(struct intel_engine_cs *engine,
-				 struct i915_gem_context *fixme)
+static int __live_active_context(struct intel_engine_cs *engine)
 {
+	unsigned long saved_heartbeat;
 	struct intel_context *ce;
 	int pass;
 	int err;
@@ -230,40 +219,55 @@ static int __live_active_context(struct intel_engine_cs *engine,
 		return -EINVAL;
 	}
 
-	ce = intel_context_create(fixme, engine);
+	ce = intel_context_create(engine);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
+	saved_heartbeat = engine->props.heartbeat_interval_ms;
+	engine->props.heartbeat_interval_ms = 0;
+
 	for (pass = 0; pass <= 2; pass++) {
 		struct i915_request *rq;
 
+		intel_engine_pm_get(engine);
+
 		rq = intel_context_create_request(ce);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
-			goto err;
+			goto out_engine;
 		}
 
 		err = request_sync(rq);
 		if (err)
-			goto err;
+			goto out_engine;
 
 		/* Context will be kept active until after an idle-barrier. */
 		if (i915_active_is_idle(&ce->active)) {
 			pr_err("context is not active; expected idle-barrier (%s pass %d)\n",
 			       engine->name, pass);
 			err = -EINVAL;
-			goto err;
+			goto out_engine;
 		}
 
 		if (!intel_engine_pm_is_awake(engine)) {
 			pr_err("%s is asleep before idle-barrier\n",
 			       engine->name);
 			err = -EINVAL;
-			goto err;
+			goto out_engine;
 		}
+
+out_engine:
+		intel_engine_pm_put(engine);
+		if (err)
+			goto err;
 	}
 
 	/* Now make sure our idle-barriers are flushed */
+	err = intel_engine_flush_barriers(engine);
+	if (err)
+		goto err;
+
+	/* Wait for the barrier and in the process wait for engine to park */
 	err = context_sync(engine->kernel_context);
 	if (err)
 		goto err;
@@ -273,12 +277,15 @@ static int __live_active_context(struct intel_engine_cs *engine,
 		err = -EINVAL;
 	}
 
+	intel_engine_pm_flush(engine);
+
 	if (intel_engine_pm_is_awake(engine)) {
 		struct drm_printer p = drm_debug_printer(__func__);
 
 		intel_engine_dump(engine, &p,
-				  "%s is still awake after idle-barriers\n",
-				  engine->name);
+				  "%s is still awake:%d after idle-barriers\n",
+				  engine->name,
+				  atomic_read(&engine->wakeref.count));
 		GEM_TRACE_DUMP();
 
 		err = -EINVAL;
@@ -286,6 +293,7 @@ static int __live_active_context(struct intel_engine_cs *engine,
 	}
 
 err:
+	engine->props.heartbeat_interval_ms = saved_heartbeat;
 	intel_context_put(ce);
 	return err;
 }
@@ -294,36 +302,19 @@ static int live_active_context(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
-	struct i915_gem_context *fixme;
 	enum intel_engine_id id;
-	struct drm_file *file;
 	int err = 0;
 
-	file = mock_file(gt->i915);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
-	fixme = live_context(gt->i915, file);
-	if (IS_ERR(fixme)) {
-		err = PTR_ERR(fixme);
-		goto unlock;
-	}
-
-	for_each_engine(engine, gt->i915, id) {
-		err = __live_active_context(engine, fixme);
+	for_each_engine(engine, gt, id) {
+		err = __live_active_context(engine);
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
 
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-	mock_file_free(gt->i915, file);
 	return err;
 }
 
@@ -355,10 +346,10 @@ unpin:
 	return err;
 }
 
-static int __live_remote_context(struct intel_engine_cs *engine,
-				 struct i915_gem_context *fixme)
+static int __live_remote_context(struct intel_engine_cs *engine)
 {
 	struct intel_context *local, *remote;
+	unsigned long saved_heartbeat;
 	int pass;
 	int err;
 
@@ -370,16 +361,26 @@ static int __live_remote_context(struct intel_engine_cs *engine,
 	 * clobber the idle-barrier.
 	 */
 
-	remote = intel_context_create(fixme, engine);
+	if (intel_engine_pm_is_awake(engine)) {
+		pr_err("%s is awake before starting %s!\n",
+		       engine->name, __func__);
+		return -EINVAL;
+	}
+
+	remote = intel_context_create(engine);
 	if (IS_ERR(remote))
 		return PTR_ERR(remote);
 
-	local = intel_context_create(fixme, engine);
+	local = intel_context_create(engine);
 	if (IS_ERR(local)) {
 		err = PTR_ERR(local);
 		goto err_remote;
 	}
 
+	saved_heartbeat = engine->props.heartbeat_interval_ms;
+	engine->props.heartbeat_interval_ms = 0;
+	intel_engine_pm_get(engine);
+
 	for (pass = 0; pass <= 2; pass++) {
 		err = __remote_sync(local, remote);
 		if (err)
@@ -397,6 +398,9 @@ static int __live_remote_context(struct intel_engine_cs *engine,
 		}
 	}
 
+	intel_engine_pm_put(engine);
+	engine->props.heartbeat_interval_ms = saved_heartbeat;
+
 	intel_context_put(local);
 err_remote:
 	intel_context_put(remote);
@@ -407,36 +411,19 @@ static int live_remote_context(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
-	struct i915_gem_context *fixme;
 	enum intel_engine_id id;
-	struct drm_file *file;
 	int err = 0;
 
-	file = mock_file(gt->i915);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
-	fixme = live_context(gt->i915, file);
-	if (IS_ERR(fixme)) {
-		err = PTR_ERR(fixme);
-		goto unlock;
-	}
-
-	for_each_engine(engine, gt->i915, id) {
-		err = __live_remote_context(engine, fixme);
+	for_each_engine(engine, gt, id) {
+		err = __live_remote_context(engine);
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
 
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-	mock_file_free(gt->i915, file);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 3880f07c29b8..f88e445a1cae 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -4,7 +4,365 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "../i915_selftest.h"
+#include <linux/sort.h>
+
+#include "intel_gt_pm.h"
+#include "intel_rps.h"
+
+#include "i915_selftest.h"
+#include "selftests/igt_flush_test.h"
+
+#define COUNT 5
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const u32 *a = A, *b = B;
+
+	return *a - *b;
+}
+
+static void perf_begin(struct intel_gt *gt)
+{
+	intel_gt_pm_get(gt);
+
+	/* Boost gpufreq to max [waitboost] and keep it fixed */
+	atomic_inc(&gt->rps.num_waiters);
+	schedule_work(&gt->rps.work);
+	flush_work(&gt->rps.work);
+}
+
+static int perf_end(struct intel_gt *gt)
+{
+	atomic_dec(&gt->rps.num_waiters);
+	intel_gt_pm_put(gt);
+
+	return igt_flush_test(gt->i915);
+}
+
+static int write_timestamp(struct i915_request *rq, int slot)
+{
+	u32 cmd;
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
+	if (INTEL_GEN(rq->i915) >= 8)
+		cmd++;
+	*cs++ = cmd;
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
+	*cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32);
+	*cs++ = 0;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static struct i915_vma *create_empty_batch(struct intel_context *ce)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	u32 *cs;
+	int err;
+
+	obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_put;
+	}
+
+	cs[0] = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(obj);
+
+	vma = i915_vma_instance(obj, ce->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_unpin;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto err_unpin;
+
+	i915_gem_object_unpin_map(obj);
+	return vma;
+
+err_unpin:
+	i915_gem_object_unpin_map(obj);
+err_put:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+static u32 trifilter(u32 *a)
+{
+	u64 sum;
+
+	sort(a, COUNT, sizeof(*a), cmp_u32, NULL);
+
+	sum = mul_u32_u32(a[2], 2);
+	sum += a[1];
+	sum += a[3];
+
+	return sum >> 2;
+}
+
+static int perf_mi_bb_start(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+		return 0;
+
+	perf_begin(gt);
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce = engine->kernel_context;
+		struct i915_vma *batch;
+		u32 cycles[COUNT];
+		int i;
+
+		intel_engine_pm_get(engine);
+
+		batch = create_empty_batch(ce);
+		if (IS_ERR(batch)) {
+			err = PTR_ERR(batch);
+			intel_engine_pm_put(engine);
+			break;
+		}
+
+		err = i915_vma_sync(batch);
+		if (err) {
+			intel_engine_pm_put(engine);
+			i915_vma_put(batch);
+			break;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(cycles); i++) {
+			struct i915_request *rq;
+
+			rq = i915_request_create(ce);
+			if (IS_ERR(rq)) {
+				err = PTR_ERR(rq);
+				break;
+			}
+
+			err = write_timestamp(rq, 2);
+			if (err)
+				goto out;
+
+			err = rq->engine->emit_bb_start(rq,
+							batch->node.start, 8,
+							0);
+			if (err)
+				goto out;
+
+			err = write_timestamp(rq, 3);
+			if (err)
+				goto out;
+
+out:
+			i915_request_get(rq);
+			i915_request_add(rq);
+
+			if (i915_request_wait(rq, 0, HZ / 5) < 0)
+				err = -EIO;
+			i915_request_put(rq);
+			if (err)
+				break;
+
+			cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2];
+		}
+		i915_vma_put(batch);
+		intel_engine_pm_put(engine);
+		if (err)
+			break;
+
+		pr_info("%s: MI_BB_START cycles: %u\n",
+			engine->name, trifilter(cycles));
+	}
+	if (perf_end(gt))
+		err = -EIO;
+
+	return err;
+}
+
+static struct i915_vma *create_nop_batch(struct intel_context *ce)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	u32 *cs;
+	int err;
+
+	obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_put;
+	}
+
+	memset(cs, 0, SZ_64K);
+	cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(obj);
+
+	vma = i915_vma_instance(obj, ce->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_unpin;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto err_unpin;
+
+	i915_gem_object_unpin_map(obj);
+	return vma;
+
+err_unpin:
+	i915_gem_object_unpin_map(obj);
+err_put:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+static int perf_mi_noop(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+		return 0;
+
+	perf_begin(gt);
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce = engine->kernel_context;
+		struct i915_vma *base, *nop;
+		u32 cycles[COUNT];
+		int i;
+
+		intel_engine_pm_get(engine);
+
+		base = create_empty_batch(ce);
+		if (IS_ERR(base)) {
+			err = PTR_ERR(base);
+			intel_engine_pm_put(engine);
+			break;
+		}
+
+		err = i915_vma_sync(base);
+		if (err) {
+			i915_vma_put(base);
+			intel_engine_pm_put(engine);
+			break;
+		}
+
+		nop = create_nop_batch(ce);
+		if (IS_ERR(nop)) {
+			err = PTR_ERR(nop);
+			i915_vma_put(base);
+			intel_engine_pm_put(engine);
+			break;
+		}
+
+		err = i915_vma_sync(nop);
+		if (err) {
+			i915_vma_put(nop);
+			i915_vma_put(base);
+			intel_engine_pm_put(engine);
+			break;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(cycles); i++) {
+			struct i915_request *rq;
+
+			rq = i915_request_create(ce);
+			if (IS_ERR(rq)) {
+				err = PTR_ERR(rq);
+				break;
+			}
+
+			err = write_timestamp(rq, 2);
+			if (err)
+				goto out;
+
+			err = rq->engine->emit_bb_start(rq,
+							base->node.start, 8,
+							0);
+			if (err)
+				goto out;
+
+			err = write_timestamp(rq, 3);
+			if (err)
+				goto out;
+
+			err = rq->engine->emit_bb_start(rq,
+							nop->node.start,
+							nop->node.size,
+							0);
+			if (err)
+				goto out;
+
+			err = write_timestamp(rq, 4);
+			if (err)
+				goto out;
+
+out:
+			i915_request_get(rq);
+			i915_request_add(rq);
+
+			if (i915_request_wait(rq, 0, HZ / 5) < 0)
+				err = -EIO;
+			i915_request_put(rq);
+			if (err)
+				break;
+
+			cycles[i] =
+				(rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) -
+				(rq->hwsp_seqno[3] - rq->hwsp_seqno[2]);
+		}
+		i915_vma_put(nop);
+		i915_vma_put(base);
+		intel_engine_pm_put(engine);
+		if (err)
+			break;
+
+		pr_info("%s: 16K MI_NOOP cycles: %u\n",
+			engine->name, trifilter(cycles));
+	}
+	if (perf_end(gt))
+		err = -EIO;
+
+	return err;
+}
+
+int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_mi_bb_start),
+		SUBTEST(perf_mi_noop),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return intel_gt_live_subtests(tests, &i915->gt);
+}
 
 static int intel_mmio_bases_check(void *arg)
 {
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
new file mode 100644
index 000000000000..43d4d589749f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -0,0 +1,374 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/sort.h>
+
+#include "i915_drv.h"
+
+#include "intel_gt_requests.h"
+#include "i915_selftest.h"
+
+static int timeline_sync(struct intel_timeline *tl)
+{
+	struct dma_fence *fence;
+	long timeout;
+
+	fence = i915_active_fence_get(&tl->last_request);
+	if (!fence)
+		return 0;
+
+	timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
+	dma_fence_put(fence);
+	if (timeout < 0)
+		return timeout;
+
+	return 0;
+}
+
+static int engine_sync_barrier(struct intel_engine_cs *engine)
+{
+	return timeline_sync(engine->kernel_context->timeline);
+}
+
+struct pulse {
+	struct i915_active active;
+	struct kref kref;
+};
+
+static int pulse_active(struct i915_active *active)
+{
+	kref_get(&container_of(active, struct pulse, active)->kref);
+	return 0;
+}
+
+static void pulse_free(struct kref *kref)
+{
+	kfree(container_of(kref, struct pulse, kref));
+}
+
+static void pulse_put(struct pulse *p)
+{
+	kref_put(&p->kref, pulse_free);
+}
+
+static void pulse_retire(struct i915_active *active)
+{
+	pulse_put(container_of(active, struct pulse, active));
+}
+
+static struct pulse *pulse_create(void)
+{
+	struct pulse *p;
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return p;
+
+	kref_init(&p->kref);
+	i915_active_init(&p->active, pulse_active, pulse_retire);
+
+	return p;
+}
+
+static void pulse_unlock_wait(struct pulse *p)
+{
+	i915_active_unlock_wait(&p->active);
+}
+
+static int __live_idle_pulse(struct intel_engine_cs *engine,
+			     int (*fn)(struct intel_engine_cs *cs))
+{
+	struct pulse *p;
+	int err;
+
+	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
+	p = pulse_create();
+	if (!p)
+		return -ENOMEM;
+
+	err = i915_active_acquire(&p->active);
+	if (err)
+		goto out;
+
+	err = i915_active_acquire_preallocate_barrier(&p->active, engine);
+	if (err) {
+		i915_active_release(&p->active);
+		goto out;
+	}
+
+	i915_active_acquire_barrier(&p->active);
+	i915_active_release(&p->active);
+
+	GEM_BUG_ON(i915_active_is_idle(&p->active));
+	GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
+
+	err = fn(engine);
+	if (err)
+		goto out;
+
+	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
+
+	if (engine_sync_barrier(engine)) {
+		struct drm_printer m = drm_err_printer("pulse");
+
+		pr_err("%s: no heartbeat pulse?\n", engine->name);
+		intel_engine_dump(engine, &m, "%s", engine->name);
+
+		err = -ETIME;
+		goto out;
+	}
+
+	GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
+
+	pulse_unlock_wait(p); /* synchronize with the retirement callback */
+
+	if (!i915_active_is_idle(&p->active)) {
+		struct drm_printer m = drm_err_printer("pulse");
+
+		pr_err("%s: heartbeat pulse did not flush idle tasks\n",
+		       engine->name);
+		i915_active_print(&p->active, &m);
+
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	pulse_put(p);
+	return err;
+}
+
+static int live_idle_flush(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/* Check that we can flush the idle barriers */
+
+	for_each_engine(engine, gt, id) {
+		intel_engine_pm_get(engine);
+		err = __live_idle_pulse(engine, intel_engine_flush_barriers);
+		intel_engine_pm_put(engine);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int live_idle_pulse(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/* Check that heartbeat pulses flush the idle barriers */
+
+	for_each_engine(engine, gt, id) {
+		intel_engine_pm_get(engine);
+		err = __live_idle_pulse(engine, intel_engine_pulse);
+		intel_engine_pm_put(engine);
+		if (err && err != -ENODEV)
+			break;
+
+		err = 0;
+	}
+
+	return err;
+}
+
+static int cmp_u32(const void *_a, const void *_b)
+{
+	const u32 *a = _a, *b = _b;
+
+	return *a - *b;
+}
+
+static int __live_heartbeat_fast(struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+	ktime_t t0, t1;
+	u32 times[5];
+	int err;
+	int i;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	intel_engine_pm_get(engine);
+
+	err = intel_engine_set_heartbeat(engine, 1);
+	if (err)
+		goto err_pm;
+
+	for (i = 0; i < ARRAY_SIZE(times); i++) {
+		/* Manufacture a tick */
+		do {
+			while (READ_ONCE(engine->heartbeat.systole))
+				flush_delayed_work(&engine->heartbeat.work);
+
+			engine->serial++; /* quick, pretend we are not idle! */
+			flush_delayed_work(&engine->heartbeat.work);
+			if (!delayed_work_pending(&engine->heartbeat.work)) {
+				pr_err("%s: heartbeat did not start\n",
+				       engine->name);
+				err = -EINVAL;
+				goto err_pm;
+			}
+
+			rcu_read_lock();
+			rq = READ_ONCE(engine->heartbeat.systole);
+			if (rq)
+				rq = i915_request_get_rcu(rq);
+			rcu_read_unlock();
+		} while (!rq);
+
+		t0 = ktime_get();
+		while (rq == READ_ONCE(engine->heartbeat.systole))
+			yield(); /* work is on the local cpu! */
+		t1 = ktime_get();
+
+		i915_request_put(rq);
+		times[i] = ktime_us_delta(t1, t0);
+	}
+
+	sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
+
+	pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
+		engine->name,
+		times[ARRAY_SIZE(times) / 2],
+		times[0],
+		times[ARRAY_SIZE(times) - 1]);
+
+	/* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */
+	if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) {
+		pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
+		       engine->name,
+		       times[ARRAY_SIZE(times) / 2],
+		       jiffies_to_usecs(6));
+		err = -EINVAL;
+	}
+
+	intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
+err_pm:
+	intel_engine_pm_put(engine);
+	intel_context_put(ce);
+	return err;
+}
+
+static int live_heartbeat_fast(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/* Check that the heartbeat ticks at the desired rate. */
+	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
+		return 0;
+
+	for_each_engine(engine, gt, id) {
+		err = __live_heartbeat_fast(engine);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int __live_heartbeat_off(struct intel_engine_cs *engine)
+{
+	int err;
+
+	intel_engine_pm_get(engine);
+
+	engine->serial++;
+	flush_delayed_work(&engine->heartbeat.work);
+	if (!delayed_work_pending(&engine->heartbeat.work)) {
+		pr_err("%s: heartbeat not running\n",
+		       engine->name);
+		err = -EINVAL;
+		goto err_pm;
+	}
+
+	err = intel_engine_set_heartbeat(engine, 0);
+	if (err)
+		goto err_pm;
+
+	engine->serial++;
+	flush_delayed_work(&engine->heartbeat.work);
+	if (delayed_work_pending(&engine->heartbeat.work)) {
+		pr_err("%s: heartbeat still running\n",
+		       engine->name);
+		err = -EINVAL;
+		goto err_beat;
+	}
+
+	if (READ_ONCE(engine->heartbeat.systole)) {
+		pr_err("%s: heartbeat still allocated\n",
+		       engine->name);
+		err = -EINVAL;
+		goto err_beat;
+	}
+
+err_beat:
+	intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
+err_pm:
+	intel_engine_pm_put(engine);
+	return err;
+}
+
+static int live_heartbeat_off(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/* Check that we can turn off heartbeat and not interrupt VIP */
+	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
+		return 0;
+
+	for_each_engine(engine, gt, id) {
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
+		err = __live_heartbeat_off(engine);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_idle_flush),
+		SUBTEST(live_idle_pulse),
+		SUBTEST(live_heartbeat_fast),
+		SUBTEST(live_heartbeat_off),
+	};
+	int saved_hangcheck;
+	int err;
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	saved_hangcheck = i915_modparams.enable_hangcheck;
+	i915_modparams.enable_hangcheck = INT_MAX;
+
+	err = intel_gt_live_subtests(tests, &i915->gt);
+
+	i915_modparams.enable_hangcheck = saved_hangcheck;
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index 3a1419376912..cbf6b0735272 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -25,7 +25,7 @@ static int live_engine_pm(void *arg)
 	}
 
 	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		const typeof(*igt_atomic_phases) *p;
 
 		for (p = igt_atomic_phases; p->name; p++) {
@@ -51,11 +51,12 @@ static int live_engine_pm(void *arg)
 				pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
 				       engine->name, p->name);
 			else
-				intel_engine_pm_put(engine);
-			intel_engine_pm_put(engine);
+				intel_engine_pm_put_async(engine);
+			intel_engine_pm_put_async(engine);
 			p->critical_section_end();
 
-			/* engine wakeref is sync (instant) */
+			intel_engine_pm_flush(engine);
+
 			if (intel_engine_pm_is_awake(engine)) {
 				pr_err("%s is still awake after flushing pm\n",
 				       engine->name);
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
new file mode 100644
index 000000000000..09ff8e4f88af
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -0,0 +1,79 @@
+
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "selftest_llc.h"
+#include "selftest_rc6.h"
+
+static int live_gt_resume(void *arg)
+{
+	struct intel_gt *gt = arg;
+	IGT_TIMEOUT(end_time);
+	int err;
+
+	/* Do several suspend/resume cycles to check we don't explode! */
+	do {
+		intel_gt_suspend_prepare(gt);
+		intel_gt_suspend_late(gt);
+
+		if (gt->rc6.enabled) {
+			pr_err("rc6 still enabled after suspend!\n");
+			intel_gt_set_wedged_on_init(gt);
+			err = -EINVAL;
+			break;
+		}
+
+		err = intel_gt_resume(gt);
+		if (err)
+			break;
+
+		if (gt->rc6.supported && !gt->rc6.enabled) {
+			pr_err("rc6 not enabled upon resume!\n");
+			intel_gt_set_wedged_on_init(gt);
+			err = -EINVAL;
+			break;
+		}
+
+		err = st_llc_verify(&gt->llc);
+		if (err) {
+			pr_err("llc state not restored upon resume!\n");
+			intel_gt_set_wedged_on_init(gt);
+			break;
+		}
+	} while (!__igt_timeout(end_time, NULL));
+
+	return err;
+}
+
+int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_rc6_manual),
+		SUBTEST(live_gt_resume),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return intel_gt_live_subtests(tests, &i915->gt);
+}
+
+int intel_gt_pm_late_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		/*
+		 * These tests may leave the system in an undesirable state.
+		 * They are intended to be run last in CI and the system
+		 * rebooted afterwards.
+		 */
+		SUBTEST(live_rc6_ctx_wa),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index a0098fc35921..3e5e6c86e843 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -25,7 +25,9 @@
 #include <linux/kthread.h>
 
 #include "gem/i915_gem_context.h"
-#include "gt/intel_gt.h"
+
+#include "intel_gt.h"
+#include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
 
 #include "i915_selftest.h"
@@ -131,7 +133,7 @@ static struct i915_request *
 hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 {
 	struct intel_gt *gt = h->gt;
-	struct i915_address_space *vm = h->ctx->vm ?: &engine->gt->ggtt->vm;
+	struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx);
 	struct drm_i915_gem_object *obj;
 	struct i915_request *rq = NULL;
 	struct i915_vma *hws, *vma;
@@ -141,12 +143,15 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 	int err;
 
 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
-	if (IS_ERR(obj))
+	if (IS_ERR(obj)) {
+		i915_vm_put(vm);
 		return ERR_CAST(obj);
+	}
 
 	vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915));
 	if (IS_ERR(vaddr)) {
 		i915_gem_object_put(obj);
+		i915_vm_put(vm);
 		return ERR_CAST(vaddr);
 	}
 
@@ -157,16 +162,22 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 	h->batch = vaddr;
 
 	vma = i915_vma_instance(h->obj, vm, NULL);
-	if (IS_ERR(vma))
+	if (IS_ERR(vma)) {
+		i915_vm_put(vm);
 		return ERR_CAST(vma);
+	}
 
 	hws = i915_vma_instance(h->hws, vm, NULL);
-	if (IS_ERR(hws))
+	if (IS_ERR(hws)) {
+		i915_vm_put(vm);
 		return ERR_CAST(hws);
+	}
 
 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (err)
+	if (err) {
+		i915_vm_put(vm);
 		return ERR_PTR(err);
+	}
 
 	err = i915_vma_pin(hws, 0, 0, PIN_USER);
 	if (err)
@@ -264,6 +275,7 @@ unpin_hws:
 	i915_vma_unpin(hws);
 unpin_vma:
 	i915_vma_unpin(vma);
+	i915_vm_put(vm);
 	return err ? ERR_PTR(err) : rq;
 }
 
@@ -285,7 +297,7 @@ static void hang_fini(struct hang *h)
 
 	kernel_context_close(h->ctx);
 
-	igt_flush_test(h->gt->i915, I915_WAIT_LOCKED);
+	igt_flush_test(h->gt->i915);
 }
 
 static bool wait_until_running(struct hang *h, struct i915_request *rq)
@@ -298,6 +310,24 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq)
 			  1000));
 }
 
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+				     unsigned long *saved)
+{
+	*saved = engine->props.heartbeat_interval_ms;
+	engine->props.heartbeat_interval_ms = 0;
+
+	intel_engine_pm_get(engine);
+	intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+				    unsigned long saved)
+{
+	intel_engine_pm_put(engine);
+
+	engine->props.heartbeat_interval_ms = saved;
+}
+
 static int igt_hang_sanitycheck(void *arg)
 {
 	struct intel_gt *gt = arg;
@@ -309,12 +339,11 @@ static int igt_hang_sanitycheck(void *arg)
 
 	/* Basic check that we can execute our hanging batch */
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
-		goto unlock;
+		return err;
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct intel_wedge_me w;
 		long timeout;
 
@@ -355,8 +384,6 @@ static int igt_hang_sanitycheck(void *arg)
 
 fini:
 	hang_fini(&h);
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	return err;
 }
 
@@ -370,40 +397,30 @@ static int igt_reset_nop(void *arg)
 	struct intel_gt *gt = arg;
 	struct i915_gpu_error *global = &gt->i915->gpu_error;
 	struct intel_engine_cs *engine;
-	struct i915_gem_context *ctx;
 	unsigned int reset_count, count;
 	enum intel_engine_id id;
-	struct drm_file *file;
 	IGT_TIMEOUT(end_time);
 	int err = 0;
 
 	/* Check that we can reset during non-user portions of requests */
 
-	file = mock_file(gt->i915);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	ctx = live_context(gt->i915, file);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-	if (IS_ERR(ctx)) {
-		err = PTR_ERR(ctx);
-		goto out;
-	}
-
-	i915_gem_context_clear_bannable(ctx);
 	reset_count = i915_reset_count(global);
 	count = 0;
 	do {
-		mutex_lock(&gt->i915->drm.struct_mutex);
-
-		for_each_engine(engine, gt->i915, id) {
+		for_each_engine(engine, gt, id) {
+			struct intel_context *ce;
 			int i;
 
+			ce = intel_context_create(engine);
+			if (IS_ERR(ce)) {
+				err = PTR_ERR(ce);
+				break;
+			}
+
 			for (i = 0; i < 16; i++) {
 				struct i915_request *rq;
 
-				rq = igt_request_alloc(ctx, engine);
+				rq = intel_context_create_request(ce);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					break;
@@ -411,13 +428,14 @@ static int igt_reset_nop(void *arg)
 
 				i915_request_add(rq);
 			}
+
+			intel_context_put(ce);
 		}
 
 		igt_global_reset_lock(gt);
 		intel_gt_reset(gt, ALL_ENGINES, NULL);
 		igt_global_reset_unlock(gt);
 
-		mutex_unlock(&gt->i915->drm.struct_mutex);
 		if (intel_gt_is_wedged(gt)) {
 			err = -EIO;
 			break;
@@ -429,19 +447,13 @@ static int igt_reset_nop(void *arg)
 			break;
 		}
 
-		err = igt_flush_test(gt->i915, 0);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	} while (time_before(jiffies, end_time));
 	pr_info("%s: %d resets\n", __func__, count);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
-out:
-	mock_file_free(gt->i915, file);
-	if (intel_gt_is_wedged(gt))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
 	return err;
 }
@@ -451,38 +463,29 @@ static int igt_reset_nop_engine(void *arg)
 	struct intel_gt *gt = arg;
 	struct i915_gpu_error *global = &gt->i915->gpu_error;
 	struct intel_engine_cs *engine;
-	struct i915_gem_context *ctx;
 	enum intel_engine_id id;
-	struct drm_file *file;
-	int err = 0;
 
 	/* Check that we can engine-reset during non-user portions */
 
-	if (!intel_has_reset_engine(gt->i915))
+	if (!intel_has_reset_engine(gt))
 		return 0;
 
-	file = mock_file(gt->i915);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	ctx = live_context(gt->i915, file);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-	if (IS_ERR(ctx)) {
-		err = PTR_ERR(ctx);
-		goto out;
-	}
-
-	i915_gem_context_clear_bannable(ctx);
-	for_each_engine(engine, gt->i915, id) {
-		unsigned int reset_count, reset_engine_count;
-		unsigned int count;
+	for_each_engine(engine, gt, id) {
+		unsigned int reset_count, reset_engine_count, count;
+		struct intel_context *ce;
+		unsigned long heartbeat;
 		IGT_TIMEOUT(end_time);
+		int err;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			return PTR_ERR(ce);
 
 		reset_count = i915_reset_count(global);
 		reset_engine_count = i915_reset_engine_count(global, engine);
 		count = 0;
 
+		engine_heartbeat_disable(engine, &heartbeat);
 		set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
 		do {
 			int i;
@@ -494,11 +497,10 @@ static int igt_reset_nop_engine(void *arg)
 				break;
 			}
 
-			mutex_lock(&gt->i915->drm.struct_mutex);
 			for (i = 0; i < 16; i++) {
 				struct i915_request *rq;
 
-				rq = igt_request_alloc(ctx, engine);
+				rq = intel_context_create_request(ce);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					break;
@@ -507,7 +509,6 @@ static int igt_reset_nop_engine(void *arg)
 				i915_request_add(rq);
 			}
 			err = intel_engine_reset(engine, NULL);
-			mutex_unlock(&gt->i915->drm.struct_mutex);
 			if (err) {
 				pr_err("i915_reset_engine failed\n");
 				break;
@@ -528,25 +529,18 @@ static int igt_reset_nop_engine(void *arg)
 			}
 		} while (time_before(jiffies, end_time));
 		clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
-		pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
+		engine_heartbeat_enable(engine, heartbeat);
 
-		if (err)
-			break;
+		pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
 
-		err = igt_flush_test(gt->i915, 0);
+		intel_context_put(ce);
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
 		if (err)
-			break;
+			return err;
 	}
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
-out:
-	mock_file_free(gt->i915, file);
-	if (intel_gt_is_wedged(gt))
-		err = -EIO;
-	return err;
+	return 0;
 }
 
 static int __igt_reset_engine(struct intel_gt *gt, bool active)
@@ -559,19 +553,18 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 
 	/* Check that we can issue an engine reset on an idle engine (no-op) */
 
-	if (!intel_has_reset_engine(gt->i915))
+	if (!intel_has_reset_engine(gt))
 		return 0;
 
 	if (active) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
 		err = hang_init(&h, gt);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
 		if (err)
 			return err;
 	}
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		unsigned int reset_count, reset_engine_count;
+		unsigned long heartbeat;
 		IGT_TIMEOUT(end_time);
 
 		if (active && !intel_engine_can_store_dword(engine))
@@ -587,23 +580,20 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 		reset_count = i915_reset_count(global);
 		reset_engine_count = i915_reset_engine_count(global, engine);
 
-		intel_engine_pm_get(engine);
+		engine_heartbeat_disable(engine, &heartbeat);
 		set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
 		do {
 			if (active) {
 				struct i915_request *rq;
 
-				mutex_lock(&gt->i915->drm.struct_mutex);
 				rq = hang_create_request(&h, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
-					mutex_unlock(&gt->i915->drm.struct_mutex);
 					break;
 				}
 
 				i915_request_get(rq);
 				i915_request_add(rq);
-				mutex_unlock(&gt->i915->drm.struct_mutex);
 
 				if (!wait_until_running(&h, rq)) {
 					struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -642,12 +632,12 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 			}
 		} while (time_before(jiffies, end_time));
 		clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
-		intel_engine_pm_put(engine);
+		engine_heartbeat_enable(engine, heartbeat);
 
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, 0);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -655,11 +645,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 	if (intel_gt_is_wedged(gt))
 		err = -EIO;
 
-	if (active) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
+	if (active)
 		hang_fini(&h);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
-	}
 
 	return err;
 }
@@ -715,47 +702,42 @@ static int active_engine(void *data)
 	struct active_engine *arg = data;
 	struct intel_engine_cs *engine = arg->engine;
 	struct i915_request *rq[8] = {};
-	struct i915_gem_context *ctx[ARRAY_SIZE(rq)];
-	struct drm_file *file;
-	unsigned long count = 0;
+	struct intel_context *ce[ARRAY_SIZE(rq)];
+	unsigned long count;
 	int err = 0;
 
-	file = mock_file(engine->i915);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-
-	for (count = 0; count < ARRAY_SIZE(ctx); count++) {
-		mutex_lock(&engine->i915->drm.struct_mutex);
-		ctx[count] = live_context(engine->i915, file);
-		mutex_unlock(&engine->i915->drm.struct_mutex);
-		if (IS_ERR(ctx[count])) {
-			err = PTR_ERR(ctx[count]);
+	for (count = 0; count < ARRAY_SIZE(ce); count++) {
+		ce[count] = intel_context_create(engine);
+		if (IS_ERR(ce[count])) {
+			err = PTR_ERR(ce[count]);
 			while (--count)
-				i915_gem_context_put(ctx[count]);
-			goto err_file;
+				intel_context_put(ce[count]);
+			return err;
 		}
 	}
 
+	count = 0;
 	while (!kthread_should_stop()) {
 		unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
 		struct i915_request *old = rq[idx];
 		struct i915_request *new;
 
-		mutex_lock(&engine->i915->drm.struct_mutex);
-		new = igt_request_alloc(ctx[idx], engine);
+		new = intel_context_create_request(ce[idx]);
 		if (IS_ERR(new)) {
-			mutex_unlock(&engine->i915->drm.struct_mutex);
 			err = PTR_ERR(new);
 			break;
 		}
 
-		if (arg->flags & TEST_PRIORITY)
-			ctx[idx]->sched.priority =
-				i915_prandom_u32_max_state(512, &prng);
-
 		rq[idx] = i915_request_get(new);
 		i915_request_add(new);
-		mutex_unlock(&engine->i915->drm.struct_mutex);
+
+		if (engine->schedule && arg->flags & TEST_PRIORITY) {
+			struct i915_sched_attr attr = {
+				.priority =
+					i915_prandom_u32_max_state(512, &prng),
+			};
+			engine->schedule(rq[idx], &attr);
+		}
 
 		err = active_request_put(old);
 		if (err)
@@ -770,10 +752,10 @@ static int active_engine(void *data)
 		/* Keep the first error */
 		if (!err)
 			err = err__;
+
+		intel_context_put(ce[count]);
 	}
 
-err_file:
-	mock_file_free(engine->i915, file);
 	return err;
 }
 
@@ -791,13 +773,11 @@ static int __igt_reset_engines(struct intel_gt *gt,
 	 * with any other engine.
 	 */
 
-	if (!intel_has_reset_engine(gt->i915))
+	if (!intel_has_reset_engine(gt))
 		return 0;
 
 	if (flags & TEST_ACTIVE) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
 		err = hang_init(&h, gt);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
 		if (err)
 			return err;
 
@@ -805,10 +785,11 @@ static int __igt_reset_engines(struct intel_gt *gt,
 			h.ctx->sched.priority = 1024;
 	}
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct active_engine threads[I915_NUM_ENGINES] = {};
 		unsigned long device = i915_reset_count(global);
 		unsigned long count = 0, reported;
+		unsigned long heartbeat;
 		IGT_TIMEOUT(end_time);
 
 		if (flags & TEST_ACTIVE &&
@@ -823,7 +804,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
 		}
 
 		memset(threads, 0, sizeof(threads));
-		for_each_engine(other, gt->i915, tmp) {
+		for_each_engine(other, gt, tmp) {
 			struct task_struct *tsk;
 
 			threads[tmp].resets =
@@ -849,23 +830,22 @@ static int __igt_reset_engines(struct intel_gt *gt,
 			get_task_struct(tsk);
 		}
 
-		intel_engine_pm_get(engine);
+		yield(); /* start all threads before we begin */
+
+		engine_heartbeat_disable(engine, &heartbeat);
 		set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
 		do {
 			struct i915_request *rq = NULL;
 
 			if (flags & TEST_ACTIVE) {
-				mutex_lock(&gt->i915->drm.struct_mutex);
 				rq = hang_create_request(&h, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
-					mutex_unlock(&gt->i915->drm.struct_mutex);
 					break;
 				}
 
 				i915_request_get(rq);
 				i915_request_add(rq);
-				mutex_unlock(&gt->i915->drm.struct_mutex);
 
 				if (!wait_until_running(&h, rq)) {
 					struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -926,7 +906,8 @@ static int __igt_reset_engines(struct intel_gt *gt,
 			}
 		} while (time_before(jiffies, end_time));
 		clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
-		intel_engine_pm_put(engine);
+		engine_heartbeat_enable(engine, heartbeat);
+
 		pr_info("i915_reset_engine(%s:%s): %lu resets\n",
 			engine->name, test_name, count);
 
@@ -940,7 +921,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
 		}
 
 unwind:
-		for_each_engine(other, gt->i915, tmp) {
+		for_each_engine(other, gt, tmp) {
 			int ret;
 
 			if (!threads[tmp].task)
@@ -977,9 +958,7 @@ unwind:
 		if (err)
 			break;
 
-		mutex_lock(&gt->i915->drm.struct_mutex);
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -987,11 +966,8 @@ unwind:
 	if (intel_gt_is_wedged(gt))
 		err = -EIO;
 
-	if (flags & TEST_ACTIVE) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
+	if (flags & TEST_ACTIVE)
 		hang_fini(&h);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
-	}
 
 	return err;
 }
@@ -1047,7 +1023,7 @@ static int igt_reset_wait(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct i915_gpu_error *global = &gt->i915->gpu_error;
-	struct intel_engine_cs *engine = gt->i915->engine[RCS0];
+	struct intel_engine_cs *engine = gt->engine[RCS0];
 	struct i915_request *rq;
 	unsigned int reset_count;
 	struct hang h;
@@ -1061,7 +1037,6 @@ static int igt_reset_wait(void *arg)
 
 	igt_global_reset_lock(gt);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
 		goto unlock;
@@ -1109,7 +1084,6 @@ out_rq:
 fini:
 	hang_fini(&h);
 unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	igt_global_reset_unlock(gt);
 
 	if (intel_gt_is_wedged(gt))
@@ -1127,15 +1101,14 @@ static int evict_vma(void *data)
 {
 	struct evict_vma *arg = data;
 	struct i915_address_space *vm = arg->vma->vm;
-	struct drm_i915_private *i915 = vm->i915;
 	struct drm_mm_node evict = arg->vma->node;
 	int err;
 
 	complete(&arg->completion);
 
-	mutex_lock(&i915->drm.struct_mutex);
+	mutex_lock(&vm->mutex);
 	err = i915_gem_evict_for_node(vm, &evict, 0);
-	mutex_unlock(&i915->drm.struct_mutex);
+	mutex_unlock(&vm->mutex);
 
 	return err;
 }
@@ -1143,39 +1116,33 @@ static int evict_vma(void *data)
 static int evict_fence(void *data)
 {
 	struct evict_vma *arg = data;
-	struct drm_i915_private *i915 = arg->vma->vm->i915;
 	int err;
 
 	complete(&arg->completion);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	/* Mark the fence register as dirty to force the mmio update. */
 	err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
 	if (err) {
 		pr_err("Invalid Y-tiling settings; err:%d\n", err);
-		goto out_unlock;
+		return err;
 	}
 
 	err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE);
 	if (err) {
 		pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err);
-		goto out_unlock;
+		return err;
 	}
 
 	err = i915_vma_pin_fence(arg->vma);
 	i915_vma_unpin(arg->vma);
 	if (err) {
 		pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
-		goto out_unlock;
+		return err;
 	}
 
 	i915_vma_unpin_fence(arg->vma);
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return err;
+	return 0;
 }
 
 static int __igt_reset_evict_vma(struct intel_gt *gt,
@@ -1183,23 +1150,26 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
 				 int (*fn)(void *),
 				 unsigned int flags)
 {
-	struct intel_engine_cs *engine = gt->i915->engine[RCS0];
+	struct intel_engine_cs *engine = gt->engine[RCS0];
 	struct drm_i915_gem_object *obj;
 	struct task_struct *tsk = NULL;
 	struct i915_request *rq;
 	struct evict_vma arg;
 	struct hang h;
+	unsigned int pin_flags;
 	int err;
 
+	if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE)
+		return 0;
+
 	if (!engine || !intel_engine_can_store_dword(engine))
 		return 0;
 
 	/* Check that we can recover an unbind stuck on a hanging request */
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
-		goto unlock;
+		return err;
 
 	obj = i915_gem_object_create_internal(gt->i915, SZ_1M);
 	if (IS_ERR(obj)) {
@@ -1227,10 +1197,12 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
 		goto out_obj;
 	}
 
-	err = i915_vma_pin(arg.vma, 0, 0,
-			   i915_vma_is_ggtt(arg.vma) ?
-			   PIN_GLOBAL | PIN_MAPPABLE :
-			   PIN_USER);
+	pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER;
+
+	if (flags & EXEC_OBJECT_NEEDS_FENCE)
+		pin_flags |= PIN_MAPPABLE;
+
+	err = i915_vma_pin(arg.vma, 0, 0, pin_flags);
 	if (err) {
 		i915_request_add(rq);
 		goto out_obj;
@@ -1262,8 +1234,6 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
 	if (err)
 		goto out_rq;
 
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	if (!wait_until_running(&h, rq)) {
 		struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
 
@@ -1312,16 +1282,12 @@ out_reset:
 		put_task_struct(tsk);
 	}
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 out_rq:
 	i915_request_put(rq);
 out_obj:
 	i915_gem_object_put(obj);
 fini:
 	hang_fini(&h);
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	if (intel_gt_is_wedged(gt))
 		return -EIO;
 
@@ -1339,29 +1305,21 @@ static int igt_reset_evict_ggtt(void *arg)
 static int igt_reset_evict_ppgtt(void *arg)
 {
 	struct intel_gt *gt = arg;
-	struct i915_gem_context *ctx;
-	struct drm_file *file;
+	struct i915_ppgtt *ppgtt;
 	int err;
 
-	file = mock_file(gt->i915);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
+	/* aliasing == global gtt locking, covered above */
+	if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL)
+		return 0;
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	ctx = live_context(gt->i915, file);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-	if (IS_ERR(ctx)) {
-		err = PTR_ERR(ctx);
-		goto out;
-	}
+	ppgtt = i915_ppgtt_create(gt);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
 
-	err = 0;
-	if (ctx->vm) /* aliasing == global gtt locking, covered above */
-		err = __igt_reset_evict_vma(gt, ctx->vm,
-					    evict_vma, EXEC_OBJECT_WRITE);
+	err = __igt_reset_evict_vma(gt, &ppgtt->vm,
+				    evict_vma, EXEC_OBJECT_WRITE);
+	i915_vm_put(&ppgtt->vm);
 
-out:
-	mock_file_free(gt->i915, file);
 	return err;
 }
 
@@ -1379,7 +1337,7 @@ static int wait_for_others(struct intel_gt *gt,
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		if (engine == exclude)
 			continue;
 
@@ -1403,12 +1361,11 @@ static int igt_reset_queue(void *arg)
 
 	igt_global_reset_lock(gt);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
 		goto unlock;
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_request *prev;
 		IGT_TIMEOUT(end_time);
 		unsigned int count;
@@ -1518,7 +1475,7 @@ static int igt_reset_queue(void *arg)
 
 		i915_request_put(prev);
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -1526,7 +1483,6 @@ static int igt_reset_queue(void *arg)
 fini:
 	hang_fini(&h);
 unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	igt_global_reset_unlock(gt);
 
 	if (intel_gt_is_wedged(gt))
@@ -1539,25 +1495,23 @@ static int igt_handle_error(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct i915_gpu_error *global = &gt->i915->gpu_error;
-	struct intel_engine_cs *engine = gt->i915->engine[RCS0];
+	struct intel_engine_cs *engine = gt->engine[RCS0];
 	struct hang h;
 	struct i915_request *rq;
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 	int err;
 
 	/* Check that we can issue a global GPU and engine reset */
 
-	if (!intel_has_reset_engine(gt->i915))
+	if (!intel_has_reset_engine(gt))
 		return 0;
 
 	if (!engine || !intel_engine_can_store_dword(engine))
 		return 0;
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
 	err = hang_init(&h, gt);
 	if (err)
-		goto err_unlock;
+		return err;
 
 	rq = hang_create_request(&h, engine);
 	if (IS_ERR(rq)) {
@@ -1581,8 +1535,6 @@ static int igt_handle_error(void *arg)
 		goto err_request;
 	}
 
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	/* Temporarily disable error capture */
 	error = xchg(&global->first_error, (void *)-1);
 
@@ -1590,8 +1542,6 @@ static int igt_handle_error(void *arg)
 
 	xchg(&global->first_error, error);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
 	if (rq->fence.error != -EIO) {
 		pr_err("Guilty request not identified!\n");
 		err = -EINVAL;
@@ -1602,8 +1552,6 @@ err_request:
 	i915_request_put(rq);
 err_fini:
 	hang_fini(&h);
-err_unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1617,7 +1565,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
 	GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
 		  engine->name, mode, p->name);
 
-	tasklet_disable_nosync(t);
+	tasklet_disable(t);
 	p->critical_section_begin();
 
 	err = intel_engine_reset(engine, NULL);
@@ -1689,14 +1637,13 @@ static int igt_reset_engines_atomic(void *arg)
 
 	/* Check that the engines resets are usable from atomic context */
 
-	if (!intel_has_reset_engine(gt->i915))
+	if (!intel_has_reset_engine(gt))
 		return 0;
 
 	if (USES_GUC_SUBMISSION(gt->i915))
 		return 0;
 
 	igt_global_reset_lock(gt);
-	mutex_lock(&gt->i915->drm.struct_mutex);
 
 	/* Flush any requests before we get started and check basics */
 	if (!igt_force_reset(gt))
@@ -1706,7 +1653,7 @@ static int igt_reset_engines_atomic(void *arg)
 		struct intel_engine_cs *engine;
 		enum intel_engine_id id;
 
-		for_each_engine(engine, gt->i915, id) {
+		for_each_engine(engine, gt, id) {
 			err = igt_atomic_reset_engine(engine, p);
 			if (err)
 				goto out;
@@ -1716,9 +1663,7 @@ static int igt_reset_engines_atomic(void *arg)
 out:
 	/* As we poke around the guts, do a full reset before continuing. */
 	igt_force_reset(gt);
-
 unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	igt_global_reset_unlock(gt);
 
 	return err;
@@ -1743,27 +1688,19 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 	};
 	struct intel_gt *gt = &i915->gt;
 	intel_wakeref_t wakeref;
-	bool saved_hangcheck;
 	int err;
 
-	if (!intel_has_gpu_reset(gt->i915))
+	if (!intel_has_gpu_reset(gt))
 		return 0;
 
 	if (intel_gt_is_wedged(gt))
 		return -EIO; /* we're long past hope of a successful reset */
 
-	wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
-	saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
-	drain_delayed_work(&gt->hangcheck.work); /* flush param */
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
 
 	err = intel_gt_live_subtests(tests, gt);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
-	i915_modparams.enable_hangcheck = saved_hangcheck;
-	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c
new file mode 100644
index 000000000000..fd3770e48ac7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.c
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_pm.h" /* intel_gpu_freq() */
+#include "selftest_llc.h"
+#include "intel_rps.h"
+
+static int gen6_verify_ring_freq(struct intel_llc *llc)
+{
+	struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+	struct ia_constants consts;
+	intel_wakeref_t wakeref;
+	unsigned int gpu_freq;
+	int err = 0;
+
+	wakeref = intel_runtime_pm_get(llc_to_gt(llc)->uncore->rpm);
+
+	if (!get_ia_constants(llc, &consts)) {
+		err = -ENODEV;
+		goto out_rpm;
+	}
+
+	for (gpu_freq = consts.min_gpu_freq;
+	     gpu_freq <= consts.max_gpu_freq;
+	     gpu_freq++) {
+		struct intel_rps *rps = &llc_to_gt(llc)->rps;
+
+		unsigned int ia_freq, ring_freq, found;
+		u32 val;
+
+		calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq);
+
+		val = gpu_freq;
+		if (sandybridge_pcode_read(i915,
+					   GEN6_PCODE_READ_MIN_FREQ_TABLE,
+					   &val, NULL)) {
+			pr_err("Failed to read freq table[%d], range [%d, %d]\n",
+			       gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq);
+			err = -ENXIO;
+			break;
+		}
+
+		found = (val >> 0) & 0xff;
+		if (found != ia_freq) {
+			pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n",
+			       gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq,
+			       intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
+			       found, ia_freq);
+			err = -EINVAL;
+			break;
+		}
+
+		found = (val >> 8) & 0xff;
+		if (found != ring_freq) {
+			pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n",
+			       gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq,
+			       intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
+			       found, ring_freq);
+			err = -EINVAL;
+			break;
+		}
+	}
+
+out_rpm:
+	intel_runtime_pm_put(llc_to_gt(llc)->uncore->rpm, wakeref);
+	return err;
+}
+
+int st_llc_verify(struct intel_llc *llc)
+{
+	int err = 0;
+
+	if (HAS_LLC(llc_to_gt(llc)->i915))
+		err = gen6_verify_ring_freq(llc);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.h b/drivers/gpu/drm/i915/gt/selftest_llc.h
new file mode 100644
index 000000000000..873f896e72f2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.h
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef SELFTEST_LLC_H
+#define SELFTEST_LLC_H
+
+struct intel_llc;
+
+int st_llc_verify(struct intel_llc *llc);
+
+#endif /* SELFTEST_LLC_H */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index d791158988d6..15cda024e3e4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -7,6 +7,7 @@
 #include <linux/prime_numbers.h>
 
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_reset.h"
 
 #include "i915_selftest.h"
@@ -19,66 +20,271 @@
 #include "gem/selftests/igt_gem_utils.h"
 #include "gem/selftests/mock_context.h"
 
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
+
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int err;
+
+	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(obj);
+		return vma;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+	if (err) {
+		i915_gem_object_put(obj);
+		return ERR_PTR(err);
+	}
+
+	return vma;
+}
+
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+				     unsigned long *saved)
+{
+	*saved = engine->props.heartbeat_interval_ms;
+	engine->props.heartbeat_interval_ms = 0;
+
+	intel_engine_pm_get(engine);
+	intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+				    unsigned long saved)
+{
+	intel_engine_pm_put(engine);
+
+	engine->props.heartbeat_interval_ms = saved;
+}
+
 static int live_sanitycheck(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
-	struct i915_gem_engines_iter it;
-	struct i915_gem_context *ctx;
-	struct intel_context *ce;
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	struct igt_spinner spin;
-	intel_wakeref_t wakeref;
-	int err = -ENOMEM;
+	int err = 0;
 
-	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (igt_spinner_init(&spin, &i915->gt))
-		goto err_unlock;
-
-	ctx = kernel_context(i915);
-	if (!ctx)
-		goto err_spin;
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
 
-	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
 		struct i915_request *rq;
 
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			break;
+		}
+
 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
-			goto err_ctx;
+			goto out_ctx;
 		}
 
 		i915_request_add(rq);
 		if (!igt_wait_for_spinner(&spin, rq)) {
 			GEM_TRACE("spinner failed to start\n");
 			GEM_TRACE_DUMP();
-			intel_gt_set_wedged(&i915->gt);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
-			goto err_ctx;
+			goto out_ctx;
 		}
 
 		igt_spinner_end(&spin);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+		if (igt_flush_test(gt->i915)) {
 			err = -EIO;
-			goto err_ctx;
+			goto out_ctx;
 		}
+
+out_ctx:
+		intel_context_put(ce);
+		if (err)
+			break;
 	}
 
+	igt_spinner_fini(&spin);
+	return err;
+}
+
+static int live_unlite_restore(struct intel_gt *gt, int prio)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	int err = -ENOMEM;
+
+	/*
+	 * Check that we can correctly context switch between 2 instances
+	 * on the same engine from the same parent context.
+	 */
+
+	if (igt_spinner_init(&spin, gt))
+		return err;
+
 	err = 0;
-err_ctx:
-	i915_gem_context_unlock_engines(ctx);
-	kernel_context_close(ctx);
-err_spin:
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce[2] = {};
+		struct i915_request *rq[2];
+		struct igt_live_test t;
+		unsigned long saved;
+		int n;
+
+		if (prio && !intel_engine_has_preemption(engine))
+			continue;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
+			err = -EIO;
+			break;
+		}
+		engine_heartbeat_disable(engine, &saved);
+
+		for (n = 0; n < ARRAY_SIZE(ce); n++) {
+			struct intel_context *tmp;
+
+			tmp = intel_context_create(engine);
+			if (IS_ERR(tmp)) {
+				err = PTR_ERR(tmp);
+				goto err_ce;
+			}
+
+			err = intel_context_pin(tmp);
+			if (err) {
+				intel_context_put(tmp);
+				goto err_ce;
+			}
+
+			/*
+			 * Setup the pair of contexts such that if we
+			 * lite-restore using the RING_TAIL from ce[1] it
+			 * will execute garbage from ce[0]->ring.
+			 */
+			memset(tmp->ring->vaddr,
+			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
+			       tmp->ring->vma->size);
+
+			ce[n] = tmp;
+		}
+		GEM_BUG_ON(!ce[1]->ring->size);
+		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
+		__execlists_update_reg_state(ce[1], engine);
+
+		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			goto err_ce;
+		}
+
+		i915_request_get(rq[0]);
+		i915_request_add(rq[0]);
+		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
+
+		if (!igt_wait_for_spinner(&spin, rq[0])) {
+			i915_request_put(rq[0]);
+			goto err_ce;
+		}
+
+		rq[1] = i915_request_create(ce[1]);
+		if (IS_ERR(rq[1])) {
+			err = PTR_ERR(rq[1]);
+			i915_request_put(rq[0]);
+			goto err_ce;
+		}
+
+		if (!prio) {
+			/*
+			 * Ensure we do the switch to ce[1] on completion.
+			 *
+			 * rq[0] is already submitted, so this should reduce
+			 * to a no-op (a wait on a request on the same engine
+			 * uses the submit fence, not the completion fence),
+			 * but it will install a dependency on rq[1] for rq[0]
+			 * that will prevent the pair being reordered by
+			 * timeslicing.
+			 */
+			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+		}
+
+		i915_request_get(rq[1]);
+		i915_request_add(rq[1]);
+		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
+		i915_request_put(rq[0]);
+
+		if (prio) {
+			struct i915_sched_attr attr = {
+				.priority = prio,
+			};
+
+			/* Alternatively preempt the spinner with ce[1] */
+			engine->schedule(rq[1], &attr);
+		}
+
+		/* And switch back to ce[0] for good measure */
+		rq[0] = i915_request_create(ce[0]);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			i915_request_put(rq[1]);
+			goto err_ce;
+		}
+
+		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
+		i915_request_get(rq[0]);
+		i915_request_add(rq[0]);
+		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
+		i915_request_put(rq[1]);
+		i915_request_put(rq[0]);
+
+err_ce:
+		tasklet_kill(&engine->execlists.tasklet); /* flush submission */
+		igt_spinner_end(&spin);
+		for (n = 0; n < ARRAY_SIZE(ce); n++) {
+			if (IS_ERR_OR_NULL(ce[n]))
+				break;
+
+			intel_context_unpin(ce[n]);
+			intel_context_put(ce[n]);
+		}
+
+		engine_heartbeat_enable(engine, saved);
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+	}
+
 	igt_spinner_fini(&spin);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
+static int live_unlite_switch(void *arg)
+{
+	return live_unlite_restore(arg, 0);
+}
+
+static int live_unlite_preempt(void *arg)
+{
+	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
+}
+
 static int
 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 {
@@ -119,40 +325,46 @@ emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 static struct i915_request *
 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
 {
-	struct i915_gem_context *ctx;
+	struct intel_context *ce;
 	struct i915_request *rq;
 	int err;
 
-	ctx = kernel_context(engine->i915);
-	if (!ctx)
-		return ERR_PTR(-ENOMEM);
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
 
-	rq = igt_request_alloc(ctx, engine);
+	rq = intel_context_create_request(ce);
 	if (IS_ERR(rq))
-		goto out_ctx;
+		goto out_ce;
 
-	err = emit_semaphore_chain(rq, vma, idx);
+	err = 0;
+	if (rq->engine->emit_init_breadcrumb)
+		err = rq->engine->emit_init_breadcrumb(rq);
+	if (err == 0)
+		err = emit_semaphore_chain(rq, vma, idx);
+	if (err == 0)
+		i915_request_get(rq);
 	i915_request_add(rq);
 	if (err)
 		rq = ERR_PTR(err);
 
-out_ctx:
-	kernel_context_close(ctx);
+out_ce:
+	intel_context_put(ce);
 	return rq;
 }
 
 static int
 release_queue(struct intel_engine_cs *engine,
 	      struct i915_vma *vma,
-	      int idx)
+	      int idx, int prio)
 {
 	struct i915_sched_attr attr = {
-		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+		.priority = prio,
 	};
 	struct i915_request *rq;
 	u32 *cs;
 
-	rq = i915_request_create(engine->kernel_context);
+	rq = intel_engine_create_kernel_request(engine);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -168,9 +380,15 @@ release_queue(struct intel_engine_cs *engine,
 	*cs++ = 1;
 
 	intel_ring_advance(rq, cs);
+
+	i915_request_get(rq);
 	i915_request_add(rq);
 
+	local_bh_disable();
 	engine->schedule(rq, &attr);
+	local_bh_enable(); /* kick tasklet */
+
+	i915_request_put(rq);
 
 	return 0;
 }
@@ -189,8 +407,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
 	if (IS_ERR(head))
 		return PTR_ERR(head);
 
-	i915_request_get(head);
-	for_each_engine(engine, outer->i915, id) {
+	for_each_engine(engine, outer->gt, id) {
 		for (i = 0; i < count; i++) {
 			struct i915_request *rq;
 
@@ -199,15 +416,16 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
 				err = PTR_ERR(rq);
 				goto out;
 			}
+
+			i915_request_put(rq);
 		}
 	}
 
-	err = release_queue(outer, vma, n);
+	err = release_queue(outer, vma, n, INT_MAX);
 	if (err)
 		goto out;
 
-	if (i915_request_wait(head,
-			      I915_WAIT_LOCKED,
+	if (i915_request_wait(head, 0,
 			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
 		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
 		       count, n);
@@ -223,9 +441,8 @@ out:
 
 static int live_timeslice_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct drm_i915_gem_object *obj;
-	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	void *vaddr;
 	int err = 0;
@@ -239,17 +456,14 @@ static int live_timeslice_preempt(void *arg)
 	 * need to preempt the current task and replace it with another
 	 * ready task.
 	 */
+	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto err_unlock;
-	}
+	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
 
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err_obj;
@@ -269,17 +483,21 @@ static int live_timeslice_preempt(void *arg)
 		struct intel_engine_cs *engine;
 		enum intel_engine_id id;
 
-		for_each_engine(engine, i915, id) {
+		for_each_engine(engine, gt, id) {
+			unsigned long saved;
+
 			if (!intel_engine_has_preemption(engine))
 				continue;
 
 			memset(vaddr, 0, PAGE_SIZE);
 
+			engine_heartbeat_disable(engine, &saved);
 			err = slice_semaphore_queue(engine, vma, count);
+			engine_heartbeat_enable(engine, saved);
 			if (err)
 				goto err_pin;
 
-			if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+			if (igt_flush_test(gt->i915)) {
 				err = -EIO;
 				goto err_pin;
 			}
@@ -292,22 +510,185 @@ err_map:
 	i915_gem_object_unpin_map(obj);
 err_obj:
 	i915_gem_object_put(obj);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+static struct i915_request *nop_request(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	rq = intel_engine_create_kernel_request(engine);
+	if (IS_ERR(rq))
+		return rq;
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	return rq;
+}
+
+static int wait_for_submit(struct intel_engine_cs *engine,
+			   struct i915_request *rq,
+			   unsigned long timeout)
+{
+	timeout += jiffies;
+	do {
+		cond_resched();
+		intel_engine_flush_submission(engine);
+		if (i915_request_is_active(rq))
+			return 0;
+	} while (time_before(jiffies, timeout));
+
+	return -ETIME;
+}
+
+static long timeslice_threshold(const struct intel_engine_cs *engine)
+{
+	return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
+}
+
+static int live_timeslice_queue(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct drm_i915_gem_object *obj;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct i915_vma *vma;
+	void *vaddr;
+	int err = 0;
+
+	/*
+	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
+	 * timeslicing between them disabled, we *do* enable timeslicing
+	 * if the queue demands it. (Normally, we do not submit if
+	 * ELSP[1] is already occupied, so must rely on timeslicing to
+	 * eject ELSP[0] in favour of the queue.)
+	 */
+	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+		return 0;
+
+	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(vaddr)) {
+		err = PTR_ERR(vaddr);
+		goto err_obj;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+	if (err)
+		goto err_map;
+
+	for_each_engine(engine, gt, id) {
+		struct i915_sched_attr attr = {
+			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+		};
+		struct i915_request *rq, *nop;
+		unsigned long saved;
+
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
+		engine_heartbeat_disable(engine, &saved);
+		memset(vaddr, 0, PAGE_SIZE);
+
+		/* ELSP[0]: semaphore wait */
+		rq = semaphore_queue(engine, vma, 0);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_heartbeat;
+		}
+		engine->schedule(rq, &attr);
+		err = wait_for_submit(engine, rq, HZ / 2);
+		if (err) {
+			pr_err("%s: Timed out trying to submit semaphores\n",
+			       engine->name);
+			goto err_rq;
+		}
+
+		/* ELSP[1]: nop request */
+		nop = nop_request(engine);
+		if (IS_ERR(nop)) {
+			err = PTR_ERR(nop);
+			goto err_rq;
+		}
+		err = wait_for_submit(engine, nop, HZ / 2);
+		i915_request_put(nop);
+		if (err) {
+			pr_err("%s: Timed out trying to submit nop\n",
+			       engine->name);
+			goto err_rq;
+		}
+
+		GEM_BUG_ON(i915_request_completed(rq));
+		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+		/* Queue: semaphore signal, matching priority as semaphore */
+		err = release_queue(engine, vma, 1, effective_prio(rq));
+		if (err)
+			goto err_rq;
+
+		intel_engine_flush_submission(engine);
+		if (!READ_ONCE(engine->execlists.timer.expires) &&
+		    !i915_request_completed(rq)) {
+			struct drm_printer p =
+				drm_info_printer(gt->i915->drm.dev);
+
+			GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
+				      engine->name);
+			intel_engine_dump(engine, &p,
+					  "%s\n", engine->name);
+			GEM_TRACE_DUMP();
+
+			memset(vaddr, 0xff, PAGE_SIZE);
+			err = -EINVAL;
+		}
+
+		/* Timeslice every jiffy, so within 2 we should signal */
+		if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
+			struct drm_printer p =
+				drm_info_printer(gt->i915->drm.dev);
+
+			pr_err("%s: Failed to timeslice into queue\n",
+			       engine->name);
+			intel_engine_dump(engine, &p,
+					  "%s\n", engine->name);
+
+			memset(vaddr, 0xff, PAGE_SIZE);
+			err = -EIO;
+		}
+err_rq:
+		i915_request_put(rq);
+err_heartbeat:
+		engine_heartbeat_enable(engine, saved);
+		if (err)
+			break;
+	}
 
+	i915_vma_unpin(vma);
+err_map:
+	i915_gem_object_unpin_map(obj);
+err_obj:
+	i915_gem_object_put(obj);
 	return err;
 }
 
 static int live_busywait_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct i915_gem_context *ctx_hi, *ctx_lo;
 	struct intel_engine_cs *engine;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 	u32 *map;
 
@@ -316,22 +697,19 @@ static int live_busywait_preempt(void *arg)
 	 * preempt the busywaits used to synchronise between rings.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	ctx_hi = kernel_context(i915);
+	ctx_hi = kernel_context(gt->i915);
 	if (!ctx_hi)
-		goto err_unlock;
+		return -ENOMEM;
 	ctx_hi->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
-	ctx_lo = kernel_context(i915);
+	ctx_lo = kernel_context(gt->i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
 	ctx_lo->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
-	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto err_ctx_lo;
@@ -343,7 +721,7 @@ static int live_busywait_preempt(void *arg)
 		goto err_obj;
 	}
 
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err_map;
@@ -353,7 +731,7 @@ static int live_busywait_preempt(void *arg)
 	if (err)
 		goto err_map;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_request *lo, *hi;
 		struct igt_live_test t;
 		u32 *cs;
@@ -364,7 +742,7 @@ static int live_busywait_preempt(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
-		if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 			err = -EIO;
 			goto err_vma;
 		}
@@ -407,15 +785,19 @@ static int live_busywait_preempt(void *arg)
 		*cs++ = 0;
 
 		intel_ring_advance(lo, cs);
+
+		i915_request_get(lo);
 		i915_request_add(lo);
 
 		if (wait_for(READ_ONCE(*map), 10)) {
+			i915_request_put(lo);
 			err = -ETIMEDOUT;
 			goto err_vma;
 		}
 
 		/* Low priority request should be busywaiting now */
 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
+			i915_request_put(lo);
 			pr_err("%s: Busywaiting request did not!\n",
 			       engine->name);
 			err = -EIO;
@@ -425,6 +807,7 @@ static int live_busywait_preempt(void *arg)
 		hi = igt_request_alloc(ctx_hi, engine);
 		if (IS_ERR(hi)) {
 			err = PTR_ERR(hi);
+			i915_request_put(lo);
 			goto err_vma;
 		}
 
@@ -432,6 +815,7 @@ static int live_busywait_preempt(void *arg)
 		if (IS_ERR(cs)) {
 			err = PTR_ERR(cs);
 			i915_request_add(hi);
+			i915_request_put(lo);
 			goto err_vma;
 		}
 
@@ -444,7 +828,7 @@ static int live_busywait_preempt(void *arg)
 		i915_request_add(hi);
 
 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
-			struct drm_printer p = drm_info_printer(i915->drm.dev);
+			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
 
 			pr_err("%s: Failed to preempt semaphore busywait!\n",
 			       engine->name);
@@ -452,11 +836,13 @@ static int live_busywait_preempt(void *arg)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 			GEM_TRACE_DUMP();
 
-			intel_gt_set_wedged(&i915->gt);
+			i915_request_put(lo);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_vma;
 		}
 		GEM_BUG_ON(READ_ONCE(*map));
+		i915_request_put(lo);
 
 		if (igt_live_test_end(&t)) {
 			err = -EIO;
@@ -475,9 +861,6 @@ err_ctx_lo:
 	kernel_context_close(ctx_lo);
 err_ctx_hi:
 	kernel_context_close(ctx_hi);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -501,49 +884,45 @@ spinner_create_request(struct igt_spinner *spin,
 
 static int live_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct i915_gem_context *ctx_hi, *ctx_lo;
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+	if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
 		pr_err("Logical preemption supported, but not exposed\n");
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (igt_spinner_init(&spin_hi, &i915->gt))
-		goto err_unlock;
+	if (igt_spinner_init(&spin_hi, gt))
+		return -ENOMEM;
 
-	if (igt_spinner_init(&spin_lo, &i915->gt))
+	if (igt_spinner_init(&spin_lo, gt))
 		goto err_spin_hi;
 
-	ctx_hi = kernel_context(i915);
+	ctx_hi = kernel_context(gt->i915);
 	if (!ctx_hi)
 		goto err_spin_lo;
 	ctx_hi->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
-	ctx_lo = kernel_context(i915);
+	ctx_lo = kernel_context(gt->i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
 	ctx_lo->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct igt_live_test t;
 		struct i915_request *rq;
 
 		if (!intel_engine_has_preemption(engine))
 			continue;
 
-		if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 			err = -EIO;
 			goto err_ctx_lo;
 		}
@@ -559,7 +938,7 @@ static int live_preempt(void *arg)
 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
 			GEM_TRACE("lo spinner failed to start\n");
 			GEM_TRACE_DUMP();
-			intel_gt_set_wedged(&i915->gt);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_ctx_lo;
 		}
@@ -576,7 +955,7 @@ static int live_preempt(void *arg)
 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
 			GEM_TRACE("hi spinner failed to start\n");
 			GEM_TRACE_DUMP();
-			intel_gt_set_wedged(&i915->gt);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_ctx_lo;
 		}
@@ -599,54 +978,47 @@ err_spin_lo:
 	igt_spinner_fini(&spin_lo);
 err_spin_hi:
 	igt_spinner_fini(&spin_hi);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
 static int live_late_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct i915_gem_context *ctx_hi, *ctx_lo;
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	struct i915_sched_attr attr = {};
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (igt_spinner_init(&spin_hi, &i915->gt))
-		goto err_unlock;
+	if (igt_spinner_init(&spin_hi, gt))
+		return -ENOMEM;
 
-	if (igt_spinner_init(&spin_lo, &i915->gt))
+	if (igt_spinner_init(&spin_lo, gt))
 		goto err_spin_hi;
 
-	ctx_hi = kernel_context(i915);
+	ctx_hi = kernel_context(gt->i915);
 	if (!ctx_hi)
 		goto err_spin_lo;
 
-	ctx_lo = kernel_context(i915);
+	ctx_lo = kernel_context(gt->i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
 
 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
 	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct igt_live_test t;
 		struct i915_request *rq;
 
 		if (!intel_engine_has_preemption(engine))
 			continue;
 
-		if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 			err = -EIO;
 			goto err_ctx_lo;
 		}
@@ -705,15 +1077,12 @@ err_spin_lo:
 	igt_spinner_fini(&spin_lo);
 err_spin_hi:
 	igt_spinner_fini(&spin_hi);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
 	igt_spinner_end(&spin_hi);
 	igt_spinner_end(&spin_lo);
-	intel_gt_set_wedged(&i915->gt);
+	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_ctx_lo;
 }
@@ -723,14 +1092,13 @@ struct preempt_client {
 	struct i915_gem_context *ctx;
 };
 
-static int preempt_client_init(struct drm_i915_private *i915,
-			       struct preempt_client *c)
+static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
 {
-	c->ctx = kernel_context(i915);
+	c->ctx = kernel_context(gt->i915);
 	if (!c->ctx)
 		return -ENOMEM;
 
-	if (igt_spinner_init(&c->spin, &i915->gt))
+	if (igt_spinner_init(&c->spin, gt))
 		goto err_ctx;
 
 	return 0;
@@ -748,11 +1116,10 @@ static void preempt_client_fini(struct preempt_client *c)
 
 static int live_nopreempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	struct preempt_client a, b;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	/*
@@ -760,19 +1127,16 @@ static int live_nopreempt(void *arg)
 	 * that may be being observed and not want to be interrupted.
 	 */
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (preempt_client_init(i915, &a))
-		goto err_unlock;
-	if (preempt_client_init(i915, &b))
+	if (preempt_client_init(gt, &a))
+		return -ENOMEM;
+	if (preempt_client_init(gt, &b))
 		goto err_client_a;
 	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_request *rq_a, *rq_b;
 
 		if (!intel_engine_has_preemption(engine))
@@ -789,7 +1153,7 @@ static int live_nopreempt(void *arg)
 		}
 
 		/* Low priority client, but unpreemptable! */
-		rq_a->flags |= I915_REQUEST_NOPREEMPT;
+		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
 
 		i915_request_add(rq_a);
 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
@@ -832,7 +1196,7 @@ static int live_nopreempt(void *arg)
 			goto err_wedged;
 		}
 
-		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		if (igt_flush_test(gt->i915))
 			goto err_wedged;
 	}
 
@@ -841,29 +1205,344 @@ err_client_b:
 	preempt_client_fini(&b);
 err_client_a:
 	preempt_client_fini(&a);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
 	igt_spinner_end(&b.spin);
 	igt_spinner_end(&a.spin);
-	intel_gt_set_wedged(&i915->gt);
+	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_client_b;
 }
 
+struct live_preempt_cancel {
+	struct intel_engine_cs *engine;
+	struct preempt_client a, b;
+};
+
+static int __cancel_active0(struct live_preempt_cancel *arg)
+{
+	struct i915_request *rq;
+	struct igt_live_test t;
+	int err;
+
+	/* Preempt cancel of ELSP0 */
+	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+	if (igt_live_test_begin(&t, arg->engine->i915,
+				__func__, arg->engine->name))
+		return -EIO;
+
+	rq = spinner_create_request(&arg->a.spin,
+				    arg->a.ctx, arg->engine,
+				    MI_ARB_CHECK);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	clear_bit(CONTEXT_BANNED, &rq->context->flags);
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+		err = -EIO;
+		goto out;
+	}
+
+	intel_context_set_banned(rq->context);
+	err = intel_engine_pulse(arg->engine);
+	if (err)
+		goto out;
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (rq->fence.error != -EIO) {
+		pr_err("Cancelled inflight0 request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_request_put(rq);
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	return err;
+}
+
+static int __cancel_active1(struct live_preempt_cancel *arg)
+{
+	struct i915_request *rq[2] = {};
+	struct igt_live_test t;
+	int err;
+
+	/* Preempt cancel of ELSP1 */
+	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+	if (igt_live_test_begin(&t, arg->engine->i915,
+				__func__, arg->engine->name))
+		return -EIO;
+
+	rq[0] = spinner_create_request(&arg->a.spin,
+				       arg->a.ctx, arg->engine,
+				       MI_NOOP); /* no preemption */
+	if (IS_ERR(rq[0]))
+		return PTR_ERR(rq[0]);
+
+	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
+	i915_request_get(rq[0]);
+	i915_request_add(rq[0]);
+	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+		err = -EIO;
+		goto out;
+	}
+
+	rq[1] = spinner_create_request(&arg->b.spin,
+				       arg->b.ctx, arg->engine,
+				       MI_ARB_CHECK);
+	if (IS_ERR(rq[1])) {
+		err = PTR_ERR(rq[1]);
+		goto out;
+	}
+
+	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
+	i915_request_get(rq[1]);
+	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+	i915_request_add(rq[1]);
+	if (err)
+		goto out;
+
+	intel_context_set_banned(rq[1]->context);
+	err = intel_engine_pulse(arg->engine);
+	if (err)
+		goto out;
+
+	igt_spinner_end(&arg->a.spin);
+	if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (rq[0]->fence.error != 0) {
+		pr_err("Normal inflight0 request did not complete\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (rq[1]->fence.error != -EIO) {
+		pr_err("Cancelled inflight1 request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_request_put(rq[1]);
+	i915_request_put(rq[0]);
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	return err;
+}
+
+static int __cancel_queued(struct live_preempt_cancel *arg)
+{
+	struct i915_request *rq[3] = {};
+	struct igt_live_test t;
+	int err;
+
+	/* Full ELSP and one in the wings */
+	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+	if (igt_live_test_begin(&t, arg->engine->i915,
+				__func__, arg->engine->name))
+		return -EIO;
+
+	rq[0] = spinner_create_request(&arg->a.spin,
+				       arg->a.ctx, arg->engine,
+				       MI_ARB_CHECK);
+	if (IS_ERR(rq[0]))
+		return PTR_ERR(rq[0]);
+
+	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
+	i915_request_get(rq[0]);
+	i915_request_add(rq[0]);
+	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+		err = -EIO;
+		goto out;
+	}
+
+	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
+	if (IS_ERR(rq[1])) {
+		err = PTR_ERR(rq[1]);
+		goto out;
+	}
+
+	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
+	i915_request_get(rq[1]);
+	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+	i915_request_add(rq[1]);
+	if (err)
+		goto out;
+
+	rq[2] = spinner_create_request(&arg->b.spin,
+				       arg->a.ctx, arg->engine,
+				       MI_ARB_CHECK);
+	if (IS_ERR(rq[2])) {
+		err = PTR_ERR(rq[2]);
+		goto out;
+	}
+
+	i915_request_get(rq[2]);
+	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
+	i915_request_add(rq[2]);
+	if (err)
+		goto out;
+
+	intel_context_set_banned(rq[2]->context);
+	err = intel_engine_pulse(arg->engine);
+	if (err)
+		goto out;
+
+	if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (rq[0]->fence.error != -EIO) {
+		pr_err("Cancelled inflight0 request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (rq[1]->fence.error != 0) {
+		pr_err("Normal inflight1 request did not complete\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (rq[2]->fence.error != -EIO) {
+		pr_err("Cancelled queued request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_request_put(rq[2]);
+	i915_request_put(rq[1]);
+	i915_request_put(rq[0]);
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	return err;
+}
+
+static int __cancel_hostile(struct live_preempt_cancel *arg)
+{
+	struct i915_request *rq;
+	int err;
+
+	/* Preempt cancel non-preemptible spinner in ELSP0 */
+	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+		return 0;
+
+	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+	rq = spinner_create_request(&arg->a.spin,
+				    arg->a.ctx, arg->engine,
+				    MI_NOOP); /* preemption disabled */
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	clear_bit(CONTEXT_BANNED, &rq->context->flags);
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+		err = -EIO;
+		goto out;
+	}
+
+	intel_context_set_banned(rq->context);
+	err = intel_engine_pulse(arg->engine); /* force reset */
+	if (err)
+		goto out;
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (rq->fence.error != -EIO) {
+		pr_err("Cancelled inflight0 request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_request_put(rq);
+	if (igt_flush_test(arg->engine->i915))
+		err = -EIO;
+	return err;
+}
+
+static int live_preempt_cancel(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct live_preempt_cancel data;
+	enum intel_engine_id id;
+	int err = -ENOMEM;
+
+	/*
+	 * To cancel an inflight context, we need to first remove it from the
+	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
+	 */
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+		return 0;
+
+	if (preempt_client_init(gt, &data.a))
+		return -ENOMEM;
+	if (preempt_client_init(gt, &data.b))
+		goto err_client_a;
+
+	for_each_engine(data.engine, gt, id) {
+		if (!intel_engine_has_preemption(data.engine))
+			continue;
+
+		err = __cancel_active0(&data);
+		if (err)
+			goto err_wedged;
+
+		err = __cancel_active1(&data);
+		if (err)
+			goto err_wedged;
+
+		err = __cancel_queued(&data);
+		if (err)
+			goto err_wedged;
+
+		err = __cancel_hostile(&data);
+		if (err)
+			goto err_wedged;
+	}
+
+	err = 0;
+err_client_b:
+	preempt_client_fini(&data.b);
+err_client_a:
+	preempt_client_fini(&data.a);
+	return err;
+
+err_wedged:
+	GEM_TRACE_DUMP();
+	igt_spinner_end(&data.b.spin);
+	igt_spinner_end(&data.a.spin);
+	intel_gt_set_wedged(gt);
+	goto err_client_b;
+}
+
 static int live_suppress_self_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	struct i915_sched_attr attr = {
 		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
 	};
 	struct preempt_client a, b;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	/*
@@ -873,30 +1552,31 @@ static int live_suppress_self_preempt(void *arg)
 	 * completion event.
 	 */
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	if (USES_GUC_SUBMISSION(i915))
+	if (USES_GUC_SUBMISSION(gt->i915))
 		return 0; /* presume black blox */
 
-	if (intel_vgpu_active(i915))
+	if (intel_vgpu_active(gt->i915))
 		return 0; /* GVT forces single port & request submission */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (preempt_client_init(i915, &a))
-		goto err_unlock;
-	if (preempt_client_init(i915, &b))
+	if (preempt_client_init(gt, &a))
+		return -ENOMEM;
+	if (preempt_client_init(gt, &b))
 		goto err_client_a;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_request *rq_a, *rq_b;
 		int depth;
 
 		if (!intel_engine_has_preemption(engine))
 			continue;
 
+		if (igt_flush_test(gt->i915))
+			goto err_wedged;
+
+		intel_engine_pm_get(engine);
 		engine->execlists.preempt_hang.count = 0;
 
 		rq_a = spinner_create_request(&a.spin,
@@ -904,12 +1584,14 @@ static int live_suppress_self_preempt(void *arg)
 					      MI_NOOP);
 		if (IS_ERR(rq_a)) {
 			err = PTR_ERR(rq_a);
+			intel_engine_pm_put(engine);
 			goto err_client_b;
 		}
 
 		i915_request_add(rq_a);
 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
 			pr_err("First client failed to start\n");
+			intel_engine_pm_put(engine);
 			goto err_wedged;
 		}
 
@@ -921,6 +1603,7 @@ static int live_suppress_self_preempt(void *arg)
 						      MI_NOOP);
 			if (IS_ERR(rq_b)) {
 				err = PTR_ERR(rq_b);
+				intel_engine_pm_put(engine);
 				goto err_client_b;
 			}
 			i915_request_add(rq_b);
@@ -931,6 +1614,7 @@ static int live_suppress_self_preempt(void *arg)
 
 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
 				pr_err("Second client failed to start\n");
+				intel_engine_pm_put(engine);
 				goto err_wedged;
 			}
 
@@ -944,11 +1628,13 @@ static int live_suppress_self_preempt(void *arg)
 			       engine->name,
 			       engine->execlists.preempt_hang.count,
 			       depth);
+			intel_engine_pm_put(engine);
 			err = -EINVAL;
 			goto err_client_b;
 		}
 
-		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		intel_engine_pm_put(engine);
+		if (igt_flush_test(gt->i915))
 			goto err_wedged;
 	}
 
@@ -957,15 +1643,12 @@ err_client_b:
 	preempt_client_fini(&b);
 err_client_a:
 	preempt_client_fini(&a);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
 	igt_spinner_end(&b.spin);
 	igt_spinner_end(&a.spin);
-	intel_gt_set_wedged(&i915->gt);
+	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_client_b;
 }
@@ -984,9 +1667,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 	if (!rq)
 		return NULL;
 
-	INIT_LIST_HEAD(&rq->active_list);
 	rq->engine = engine;
 
+	spin_lock_init(&rq->lock);
+	INIT_LIST_HEAD(&rq->fence.cb_list);
+	rq->fence.lock = &rq->lock;
+	rq->fence.ops = &i915_fence_ops;
+
 	i915_sched_node_init(&rq->sched);
 
 	/* mark this request as permanently incomplete */
@@ -1021,11 +1708,11 @@ static void dummy_request_free(struct i915_request *dummy)
 
 static int live_suppress_wait_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct preempt_client client[4];
+	struct i915_request *rq[ARRAY_SIZE(client)] = {};
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 	int i;
 
@@ -1035,22 +1722,19 @@ static int live_suppress_wait_preempt(void *arg)
 	 * not needlessly generate preempt-to-idle cycles.
 	 */
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
-		goto err_unlock;
-	if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
+	if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
+		return -ENOMEM;
+	if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
 		goto err_client_0;
-	if (preempt_client_init(i915, &client[2])) /* head of queue */
+	if (preempt_client_init(gt, &client[2])) /* head of queue */
 		goto err_client_1;
-	if (preempt_client_init(i915, &client[3])) /* bystander */
+	if (preempt_client_init(gt, &client[3])) /* bystander */
 		goto err_client_2;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		int depth;
 
 		if (!intel_engine_has_preemption(engine))
@@ -1060,7 +1744,6 @@ static int live_suppress_wait_preempt(void *arg)
 			continue;
 
 		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
-			struct i915_request *rq[ARRAY_SIZE(client)];
 			struct i915_request *dummy;
 
 			engine->execlists.preempt_hang.count = 0;
@@ -1070,18 +1753,22 @@ static int live_suppress_wait_preempt(void *arg)
 				goto err_client_3;
 
 			for (i = 0; i < ARRAY_SIZE(client); i++) {
-				rq[i] = spinner_create_request(&client[i].spin,
-							       client[i].ctx, engine,
-							       MI_NOOP);
-				if (IS_ERR(rq[i])) {
-					err = PTR_ERR(rq[i]);
+				struct i915_request *this;
+
+				this = spinner_create_request(&client[i].spin,
+							      client[i].ctx, engine,
+							      MI_NOOP);
+				if (IS_ERR(this)) {
+					err = PTR_ERR(this);
 					goto err_wedged;
 				}
 
 				/* Disable NEWCLIENT promotion */
-				__i915_active_request_set(&rq[i]->timeline->last_request,
-							  dummy);
-				i915_request_add(rq[i]);
+				__i915_active_fence_set(&i915_request_timeline(this)->last_request,
+							&dummy->fence);
+
+				rq[i] = i915_request_get(this);
+				i915_request_add(this);
 			}
 
 			dummy_request_free(dummy);
@@ -1102,10 +1789,13 @@ static int live_suppress_wait_preempt(void *arg)
 				goto err_wedged;
 			}
 
-			for (i = 0; i < ARRAY_SIZE(client); i++)
+			for (i = 0; i < ARRAY_SIZE(client); i++) {
 				igt_spinner_end(&client[i].spin);
+				i915_request_put(rq[i]);
+				rq[i] = NULL;
+			}
 
-			if (igt_flush_test(i915, I915_WAIT_LOCKED))
+			if (igt_flush_test(gt->i915))
 				goto err_wedged;
 
 			if (engine->execlists.preempt_hang.count) {
@@ -1128,26 +1818,24 @@ err_client_1:
 	preempt_client_fini(&client[1]);
 err_client_0:
 	preempt_client_fini(&client[0]);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
-	for (i = 0; i < ARRAY_SIZE(client); i++)
+	for (i = 0; i < ARRAY_SIZE(client); i++) {
 		igt_spinner_end(&client[i].spin);
-	intel_gt_set_wedged(&i915->gt);
+		i915_request_put(rq[i]);
+	}
+	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_client_3;
 }
 
 static int live_chain_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	struct preempt_client hi, lo;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	/*
@@ -1156,19 +1844,16 @@ static int live_chain_preempt(void *arg)
 	 * the previously submitted spinner in B.
 	 */
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (preempt_client_init(i915, &hi))
-		goto err_unlock;
+	if (preempt_client_init(gt, &hi))
+		return -ENOMEM;
 
-	if (preempt_client_init(i915, &lo))
+	if (preempt_client_init(gt, &lo))
 		goto err_client_hi;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_sched_attr attr = {
 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
 		};
@@ -1184,6 +1869,8 @@ static int live_chain_preempt(void *arg)
 					    MI_ARB_CHECK);
 		if (IS_ERR(rq))
 			goto err_wedged;
+
+		i915_request_get(rq);
 		i915_request_add(rq);
 
 		ring_size = rq->wa_tail - rq->head;
@@ -1196,10 +1883,12 @@ static int live_chain_preempt(void *arg)
 		igt_spinner_end(&lo.spin);
 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
 			pr_err("Timed out waiting to flush %s\n", engine->name);
+			i915_request_put(rq);
 			goto err_wedged;
 		}
+		i915_request_put(rq);
 
-		if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 			err = -EIO;
 			goto err_wedged;
 		}
@@ -1231,36 +1920,46 @@ static int live_chain_preempt(void *arg)
 			rq = igt_request_alloc(hi.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
+
+			i915_request_get(rq);
 			i915_request_add(rq);
 			engine->schedule(rq, &attr);
 
 			igt_spinner_end(&hi.spin);
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				struct drm_printer p =
-					drm_info_printer(i915->drm.dev);
+					drm_info_printer(gt->i915->drm.dev);
 
 				pr_err("Failed to preempt over chain of %d\n",
 				       count);
 				intel_engine_dump(engine, &p,
 						  "%s\n", engine->name);
+				i915_request_put(rq);
 				goto err_wedged;
 			}
 			igt_spinner_end(&lo.spin);
+			i915_request_put(rq);
 
 			rq = igt_request_alloc(lo.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
+
+			i915_request_get(rq);
 			i915_request_add(rq);
+
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				struct drm_printer p =
-					drm_info_printer(i915->drm.dev);
+					drm_info_printer(gt->i915->drm.dev);
 
 				pr_err("Failed to flush low priority chain of %d requests\n",
 				       count);
 				intel_engine_dump(engine, &p,
 						  "%s\n", engine->name);
+
+				i915_request_put(rq);
 				goto err_wedged;
 			}
+			i915_request_put(rq);
 		}
 
 		if (igt_live_test_end(&t)) {
@@ -1274,57 +1973,245 @@ err_client_lo:
 	preempt_client_fini(&lo);
 err_client_hi:
 	preempt_client_fini(&hi);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
 	igt_spinner_end(&hi.spin);
 	igt_spinner_end(&lo.spin);
-	intel_gt_set_wedged(&i915->gt);
+	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_client_lo;
 }
 
+static int create_gang(struct intel_engine_cs *engine,
+		       struct i915_request **prev)
+{
+	struct drm_i915_gem_object *obj;
+	struct intel_context *ce;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	u32 *cs;
+	int err;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	obj = i915_gem_object_create_internal(engine->i915, 4096);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto err_ce;
+	}
+
+	vma = i915_vma_instance(obj, ce->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto err_obj;
+
+	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(cs))
+		goto err_obj;
+
+	/* Semaphore target: spin until zero */
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_EQ_SDD;
+	*cs++ = 0;
+	*cs++ = lower_32_bits(vma->node.start);
+	*cs++ = upper_32_bits(vma->node.start);
+
+	if (*prev) {
+		u64 offset = (*prev)->batch->node.start;
+
+		/* Terminate the spinner in the next lower priority batch. */
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
+		*cs++ = lower_32_bits(offset);
+		*cs++ = upper_32_bits(offset);
+		*cs++ = 0;
+	}
+
+	*cs++ = MI_BATCH_BUFFER_END;
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		goto err_obj;
+
+	rq->batch = vma;
+	i915_request_get(rq);
+
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, false);
+	if (!err)
+		err = i915_vma_move_to_active(vma, rq, 0);
+	if (!err)
+		err = rq->engine->emit_bb_start(rq,
+						vma->node.start,
+						PAGE_SIZE, 0);
+	i915_vma_unlock(vma);
+	i915_request_add(rq);
+	if (err)
+		goto err_rq;
+
+	i915_gem_object_put(obj);
+	intel_context_put(ce);
+
+	rq->client_link.next = &(*prev)->client_link;
+	*prev = rq;
+	return 0;
+
+err_rq:
+	i915_request_put(rq);
+err_obj:
+	i915_gem_object_put(obj);
+err_ce:
+	intel_context_put(ce);
+	return err;
+}
+
+static int live_preempt_gang(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+		return 0;
+
+	/*
+	 * Build as long a chain of preempters as we can, with each
+	 * request higher priority than the last. Once we are ready, we release
+	 * the last batch which then precolates down the chain, each releasing
+	 * the next oldest in turn. The intent is to simply push as hard as we
+	 * can with the number of preemptions, trying to exceed narrow HW
+	 * limits. At a minimum, we insist that we can sort all the user
+	 * high priority levels into execution order.
+	 */
+
+	for_each_engine(engine, gt, id) {
+		struct i915_request *rq = NULL;
+		struct igt_live_test t;
+		IGT_TIMEOUT(end_time);
+		int prio = 0;
+		int err = 0;
+		u32 *cs;
+
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
+			return -EIO;
+
+		do {
+			struct i915_sched_attr attr = {
+				.priority = I915_USER_PRIORITY(prio++),
+			};
+
+			err = create_gang(engine, &rq);
+			if (err)
+				break;
+
+			/* Submit each spinner at increasing priority */
+			engine->schedule(rq, &attr);
+
+			if (prio <= I915_PRIORITY_MAX)
+				continue;
+
+			if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
+				break;
+
+			if (__igt_timeout(end_time, NULL))
+				break;
+		} while (1);
+		pr_debug("%s: Preempt chain of %d requests\n",
+			 engine->name, prio);
+
+		/*
+		 * Such that the last spinner is the highest priority and
+		 * should execute first. When that spinner completes,
+		 * it will terminate the next lowest spinner until there
+		 * are no more spinners and the gang is complete.
+		 */
+		cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
+		if (!IS_ERR(cs)) {
+			*cs = 0;
+			i915_gem_object_unpin_map(rq->batch->obj);
+		} else {
+			err = PTR_ERR(cs);
+			intel_gt_set_wedged(gt);
+		}
+
+		while (rq) { /* wait for each rq from highest to lowest prio */
+			struct i915_request *n =
+				list_next_entry(rq, client_link);
+
+			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
+				struct drm_printer p =
+					drm_info_printer(engine->i915->drm.dev);
+
+				pr_err("Failed to flush chain of %d requests, at %d\n",
+				       prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
+				intel_engine_dump(engine, &p,
+						  "%s\n", engine->name);
+
+				err = -ETIME;
+			}
+
+			i915_request_put(rq);
+			rq = n;
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int live_preempt_hang(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct i915_gem_context *ctx_hi, *ctx_lo;
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	if (!intel_has_reset_engine(i915))
+	if (!intel_has_reset_engine(gt))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (igt_spinner_init(&spin_hi, &i915->gt))
-		goto err_unlock;
+	if (igt_spinner_init(&spin_hi, gt))
+		return -ENOMEM;
 
-	if (igt_spinner_init(&spin_lo, &i915->gt))
+	if (igt_spinner_init(&spin_lo, gt))
 		goto err_spin_hi;
 
-	ctx_hi = kernel_context(i915);
+	ctx_hi = kernel_context(gt->i915);
 	if (!ctx_hi)
 		goto err_spin_lo;
 	ctx_hi->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
-	ctx_lo = kernel_context(i915);
+	ctx_lo = kernel_context(gt->i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
 	ctx_lo->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_request *rq;
 
 		if (!intel_engine_has_preemption(engine))
@@ -1341,7 +2228,7 @@ static int live_preempt_hang(void *arg)
 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
 			GEM_TRACE("lo spinner failed to start\n");
 			GEM_TRACE_DUMP();
-			intel_gt_set_wedged(&i915->gt);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_ctx_lo;
 		}
@@ -1363,28 +2250,28 @@ static int live_preempt_hang(void *arg)
 						 HZ / 10)) {
 			pr_err("Preemption did not occur within timeout!");
 			GEM_TRACE_DUMP();
-			intel_gt_set_wedged(&i915->gt);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_ctx_lo;
 		}
 
-		set_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags);
+		set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
 		intel_engine_reset(engine, NULL);
-		clear_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags);
+		clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
 
 		engine->execlists.preempt_hang.inject_hang = false;
 
 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
 			GEM_TRACE("hi spinner failed to start\n");
 			GEM_TRACE_DUMP();
-			intel_gt_set_wedged(&i915->gt);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_ctx_lo;
 		}
 
 		igt_spinner_end(&spin_hi);
 		igt_spinner_end(&spin_lo);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+		if (igt_flush_test(gt->i915)) {
 			err = -EIO;
 			goto err_ctx_lo;
 		}
@@ -1399,9 +2286,105 @@ err_spin_lo:
 	igt_spinner_fini(&spin_lo);
 err_spin_hi:
 	igt_spinner_fini(&spin_hi);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+static int live_preempt_timeout(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct i915_gem_context *ctx_hi, *ctx_lo;
+	struct igt_spinner spin_lo;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = -ENOMEM;
+
+	/*
+	 * Check that we force preemption to occur by cancelling the previous
+	 * context if it refuses to yield the GPU.
+	 */
+	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+		return 0;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+		return 0;
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	if (igt_spinner_init(&spin_lo, gt))
+		return -ENOMEM;
+
+	ctx_hi = kernel_context(gt->i915);
+	if (!ctx_hi)
+		goto err_spin_lo;
+	ctx_hi->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
+
+	ctx_lo = kernel_context(gt->i915);
+	if (!ctx_lo)
+		goto err_ctx_hi;
+	ctx_lo->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
+
+	for_each_engine(engine, gt, id) {
+		unsigned long saved_timeout;
+		struct i915_request *rq;
+
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
+		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+					    MI_NOOP); /* preemption disabled */
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		i915_request_add(rq);
+		if (!igt_wait_for_spinner(&spin_lo, rq)) {
+			intel_gt_set_wedged(gt);
+			err = -EIO;
+			goto err_ctx_lo;
+		}
+
+		rq = igt_request_alloc(ctx_hi, engine);
+		if (IS_ERR(rq)) {
+			igt_spinner_end(&spin_lo);
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		/* Flush the previous CS ack before changing timeouts */
+		while (READ_ONCE(engine->execlists.pending[0]))
+			cpu_relax();
+
+		saved_timeout = engine->props.preempt_timeout_ms;
+		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		intel_engine_flush_submission(engine);
+		engine->props.preempt_timeout_ms = saved_timeout;
+
+		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
+			intel_gt_set_wedged(gt);
+			i915_request_put(rq);
+			err = -ETIME;
+			goto err_ctx_lo;
+		}
+
+		igt_spinner_end(&spin_lo);
+		i915_request_put(rq);
+	}
+
+	err = 0;
+err_ctx_lo:
+	kernel_context_close(ctx_lo);
+err_ctx_hi:
+	kernel_context_close(ctx_hi);
+err_spin_lo:
+	igt_spinner_fini(&spin_lo);
 	return err;
 }
 
@@ -1416,7 +2399,7 @@ static int random_priority(struct rnd_state *rnd)
 }
 
 struct preempt_smoke {
-	struct drm_i915_private *i915;
+	struct intel_gt *gt;
 	struct i915_gem_context **contexts;
 	struct intel_engine_cs *engine;
 	struct drm_i915_gem_object *batch;
@@ -1440,7 +2423,11 @@ static int smoke_submit(struct preempt_smoke *smoke,
 	int err = 0;
 
 	if (batch) {
-		vma = i915_vma_instance(batch, ctx->vm, NULL);
+		struct i915_address_space *vm;
+
+		vm = i915_gem_context_get_vm_rcu(ctx);
+		vma = i915_vma_instance(batch, vm, NULL);
+		i915_vm_put(vm);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
 
@@ -1489,11 +2476,9 @@ static int smoke_crescendo_thread(void *arg)
 		struct i915_gem_context *ctx = smoke_context(smoke);
 		int err;
 
-		mutex_lock(&smoke->i915->drm.struct_mutex);
 		err = smoke_submit(smoke,
 				   ctx, count % I915_PRIORITY_MAX,
 				   smoke->batch);
-		mutex_unlock(&smoke->i915->drm.struct_mutex);
 		if (err)
 			return err;
 
@@ -1514,9 +2499,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
 	unsigned long count;
 	int err = 0;
 
-	mutex_unlock(&smoke->i915->drm.struct_mutex);
-
-	for_each_engine(engine, smoke->i915, id) {
+	for_each_engine(engine, smoke->gt, id) {
 		arg[id] = *smoke;
 		arg[id].engine = engine;
 		if (!(flags & BATCH))
@@ -1532,8 +2515,10 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
 		get_task_struct(tsk[id]);
 	}
 
+	yield(); /* start all threads before we kthread_stop() */
+
 	count = 0;
-	for_each_engine(engine, smoke->i915, id) {
+	for_each_engine(engine, smoke->gt, id) {
 		int status;
 
 		if (IS_ERR_OR_NULL(tsk[id]))
@@ -1548,11 +2533,9 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
 		put_task_struct(tsk[id]);
 	}
 
-	mutex_lock(&smoke->i915->drm.struct_mutex);
-
 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
 		count, flags,
-		RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
 	return 0;
 }
 
@@ -1564,7 +2547,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
 
 	count = 0;
 	do {
-		for_each_engine(smoke->engine, smoke->i915, id) {
+		for_each_engine(smoke->engine, smoke->gt, id) {
 			struct i915_gem_context *ctx = smoke_context(smoke);
 			int err;
 
@@ -1580,25 +2563,24 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
 
 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
 		count, flags,
-		RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
 	return 0;
 }
 
 static int live_preempt_smoke(void *arg)
 {
 	struct preempt_smoke smoke = {
-		.i915 = arg,
+		.gt = arg,
 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
 		.ncontext = 1024,
 	};
 	const unsigned int phase[] = { 0, BATCH };
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	int err = -ENOMEM;
 	u32 *cs;
 	int n;
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
 		return 0;
 
 	smoke.contexts = kmalloc_array(smoke.ncontext,
@@ -1607,13 +2589,11 @@ static int live_preempt_smoke(void *arg)
 	if (!smoke.contexts)
 		return -ENOMEM;
 
-	mutex_lock(&smoke.i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm);
-
-	smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
+	smoke.batch =
+		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
 	if (IS_ERR(smoke.batch)) {
 		err = PTR_ERR(smoke.batch);
-		goto err_unlock;
+		goto err_free;
 	}
 
 	cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
@@ -1627,13 +2607,13 @@ static int live_preempt_smoke(void *arg)
 	i915_gem_object_flush_map(smoke.batch);
 	i915_gem_object_unpin_map(smoke.batch);
 
-	if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) {
+	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
 		err = -EIO;
 		goto err_batch;
 	}
 
 	for (n = 0; n < smoke.ncontext; n++) {
-		smoke.contexts[n] = kernel_context(smoke.i915);
+		smoke.contexts[n] = kernel_context(smoke.gt->i915);
 		if (!smoke.contexts[n])
 			goto err_ctx;
 	}
@@ -1660,15 +2640,13 @@ err_ctx:
 
 err_batch:
 	i915_gem_object_put(smoke.batch);
-err_unlock:
-	intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref);
-	mutex_unlock(&smoke.i915->drm.struct_mutex);
+err_free:
 	kfree(smoke.contexts);
 
 	return err;
 }
 
-static int nop_virtual_engine(struct drm_i915_private *i915,
+static int nop_virtual_engine(struct intel_gt *gt,
 			      struct intel_engine_cs **siblings,
 			      unsigned int nsibling,
 			      unsigned int nctx,
@@ -1676,28 +2654,18 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 #define CHAIN BIT(0)
 {
 	IGT_TIMEOUT(end_time);
-	struct i915_request *request[16];
-	struct i915_gem_context *ctx[16];
+	struct i915_request *request[16] = {};
 	struct intel_context *ve[16];
 	unsigned long n, prime, nc;
 	struct igt_live_test t;
 	ktime_t times[2] = {};
 	int err;
 
-	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
 
 	for (n = 0; n < nctx; n++) {
-		ctx[n] = kernel_context(i915);
-		if (!ctx[n]) {
-			err = -ENOMEM;
-			nctx = n;
-			goto out;
-		}
-
-		ve[n] = intel_execlists_create_virtual(ctx[n],
-						       siblings, nsibling);
+		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
 		if (IS_ERR(ve[n])) {
-			kernel_context_close(ctx[n]);
 			err = PTR_ERR(ve[n]);
 			nctx = n;
 			goto out;
@@ -1706,13 +2674,12 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 		err = intel_context_pin(ve[n]);
 		if (err) {
 			intel_context_put(ve[n]);
-			kernel_context_close(ctx[n]);
 			nctx = n;
 			goto out;
 		}
 	}
 
-	err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
+	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
 	if (err)
 		goto out;
 
@@ -1722,27 +2689,35 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 		if (flags & CHAIN) {
 			for (nc = 0; nc < nctx; nc++) {
 				for (n = 0; n < prime; n++) {
-					request[nc] =
-						i915_request_create(ve[nc]);
-					if (IS_ERR(request[nc])) {
-						err = PTR_ERR(request[nc]);
+					struct i915_request *rq;
+
+					rq = i915_request_create(ve[nc]);
+					if (IS_ERR(rq)) {
+						err = PTR_ERR(rq);
 						goto out;
 					}
 
-					i915_request_add(request[nc]);
+					if (request[nc])
+						i915_request_put(request[nc]);
+					request[nc] = i915_request_get(rq);
+					i915_request_add(rq);
 				}
 			}
 		} else {
 			for (n = 0; n < prime; n++) {
 				for (nc = 0; nc < nctx; nc++) {
-					request[nc] =
-						i915_request_create(ve[nc]);
-					if (IS_ERR(request[nc])) {
-						err = PTR_ERR(request[nc]);
+					struct i915_request *rq;
+
+					rq = i915_request_create(ve[nc]);
+					if (IS_ERR(rq)) {
+						err = PTR_ERR(rq);
 						goto out;
 					}
 
-					i915_request_add(request[nc]);
+					if (request[nc])
+						i915_request_put(request[nc]);
+					request[nc] = i915_request_get(rq);
+					i915_request_add(rq);
 				}
 			}
 		}
@@ -1759,7 +2734,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 					  request[nc]->fence.context,
 					  request[nc]->fence.seqno);
 				GEM_TRACE_DUMP();
-				intel_gt_set_wedged(&i915->gt);
+				intel_gt_set_wedged(gt);
 				break;
 			}
 		}
@@ -1768,6 +2743,11 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 		if (prime == 1)
 			times[0] = times[1];
 
+		for (nc = 0; nc < nctx; nc++) {
+			i915_request_put(request[nc]);
+			request[nc] = NULL;
+		}
+
 		if (__igt_timeout(end_time, NULL))
 			break;
 	}
@@ -1781,38 +2761,35 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 		prime, div64_u64(ktime_to_ns(times[1]), prime));
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
 	for (nc = 0; nc < nctx; nc++) {
+		i915_request_put(request[nc]);
 		intel_context_unpin(ve[nc]);
 		intel_context_put(ve[nc]);
-		kernel_context_close(ctx[nc]);
 	}
 	return err;
 }
 
 static int live_virtual_engine(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
 	struct intel_engine_cs *engine;
-	struct intel_gt *gt = &i915->gt;
 	enum intel_engine_id id;
 	unsigned int class, inst;
-	int err = -ENODEV;
+	int err;
 
-	if (USES_GUC_SUBMISSION(i915))
+	if (USES_GUC_SUBMISSION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
-	for_each_engine(engine, i915, id) {
-		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+	for_each_engine(engine, gt, id) {
+		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
 		if (err) {
 			pr_err("Failed to wrap engine %s: err=%d\n",
 			       engine->name, err);
-			goto out_unlock;
+			return err;
 		}
 	}
 
@@ -1830,28 +2807,25 @@ static int live_virtual_engine(void *arg)
 			continue;
 
 		for (n = 1; n <= nsibling + 1; n++) {
-			err = nop_virtual_engine(i915, siblings, nsibling,
+			err = nop_virtual_engine(gt, siblings, nsibling,
 						 n, 0);
 			if (err)
-				goto out_unlock;
+				return err;
 		}
 
-		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
 		if (err)
-			goto out_unlock;
+			return err;
 	}
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	return err;
+	return 0;
 }
 
-static int mask_virtual_engine(struct drm_i915_private *i915,
+static int mask_virtual_engine(struct intel_gt *gt,
 			       struct intel_engine_cs **siblings,
 			       unsigned int nsibling)
 {
 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
-	struct i915_gem_context *ctx;
 	struct intel_context *ve;
 	struct igt_live_test t;
 	unsigned int n;
@@ -1862,11 +2836,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 	 * restrict it to our desired engine within the virtual engine.
 	 */
 
-	ctx = kernel_context(i915);
-	if (!ctx)
-		return -ENOMEM;
-
-	ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+	ve = intel_execlists_create_virtual(siblings, nsibling);
 	if (IS_ERR(ve)) {
 		err = PTR_ERR(ve);
 		goto out_close;
@@ -1876,7 +2846,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 	if (err)
 		goto out_put;
 
-	err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
+	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
 	if (err)
 		goto out_unpin;
 
@@ -1907,7 +2877,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 				  request[n]->fence.context,
 				  request[n]->fence.seqno);
 			GEM_TRACE_DUMP();
-			intel_gt_set_wedged(&i915->gt);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto out;
 		}
@@ -1922,11 +2892,8 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 	}
 
 	err = igt_live_test_end(&t);
-	if (err)
-		goto out;
-
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
 	for (n = 0; n < nsibling; n++)
@@ -1937,23 +2904,19 @@ out_unpin:
 out_put:
 	intel_context_put(ve);
 out_close:
-	kernel_context_close(ctx);
 	return err;
 }
 
 static int live_virtual_mask(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
-	struct intel_gt *gt = &i915->gt;
 	unsigned int class, inst;
-	int err = 0;
+	int err;
 
-	if (USES_GUC_SUBMISSION(i915))
+	if (USES_GUC_SUBMISSION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
 		unsigned int nsibling;
 
@@ -1967,17 +2930,157 @@ static int live_virtual_mask(void *arg)
 		if (nsibling < 2)
 			continue;
 
-		err = mask_virtual_engine(i915, siblings, nsibling);
+		err = mask_virtual_engine(gt, siblings, nsibling);
 		if (err)
-			goto out_unlock;
+			return err;
+	}
+
+	return 0;
+}
+
+static int preserved_virtual_engine(struct intel_gt *gt,
+				    struct intel_engine_cs **siblings,
+				    unsigned int nsibling)
+{
+	struct i915_request *last = NULL;
+	struct intel_context *ve;
+	struct i915_vma *scratch;
+	struct igt_live_test t;
+	unsigned int n;
+	int err = 0;
+	u32 *cs;
+
+	scratch = create_scratch(siblings[0]->gt);
+	if (IS_ERR(scratch))
+		return PTR_ERR(scratch);
+
+	ve = intel_execlists_create_virtual(siblings, nsibling);
+	if (IS_ERR(ve)) {
+		err = PTR_ERR(ve);
+		goto out_scratch;
+	}
+
+	err = intel_context_pin(ve);
+	if (err)
+		goto out_put;
+
+	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
+	if (err)
+		goto out_unpin;
+
+	for (n = 0; n < NUM_GPR_DW; n++) {
+		struct intel_engine_cs *engine = siblings[n % nsibling];
+		struct i915_request *rq;
+
+		rq = i915_request_create(ve);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out_end;
+		}
+
+		i915_request_put(last);
+		last = i915_request_get(rq);
+
+		cs = intel_ring_begin(rq, 8);
+		if (IS_ERR(cs)) {
+			i915_request_add(rq);
+			err = PTR_ERR(cs);
+			goto out_end;
+		}
+
+		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+		*cs++ = CS_GPR(engine, n);
+		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+		*cs++ = 0;
+
+		*cs++ = MI_LOAD_REGISTER_IMM(1);
+		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
+		*cs++ = n + 1;
+
+		*cs++ = MI_NOOP;
+		intel_ring_advance(rq, cs);
+
+		/* Restrict this request to run on a particular engine */
+		rq->execution_mask = engine->mask;
+		i915_request_add(rq);
 	}
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
+	if (i915_request_wait(last, 0, HZ / 5) < 0) {
+		err = -ETIME;
+		goto out_end;
+	}
+
+	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto out_end;
+	}
+
+	for (n = 0; n < NUM_GPR_DW; n++) {
+		if (cs[n] != n) {
+			pr_err("Incorrect value[%d] found for GPR[%d]\n",
+			       cs[n], n);
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	i915_gem_object_unpin_map(scratch->obj);
+
+out_end:
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	i915_request_put(last);
+out_unpin:
+	intel_context_unpin(ve);
+out_put:
+	intel_context_put(ve);
+out_scratch:
+	i915_vma_unpin_and_release(&scratch, 0);
 	return err;
 }
 
-static int bond_virtual_engine(struct drm_i915_private *i915,
+static int live_virtual_preserved(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+
+	/*
+	 * Check that the context image retains non-privileged (user) registers
+	 * from one engine to the next. For this we check that the CS_GPR
+	 * are preserved.
+	 */
+
+	if (USES_GUC_SUBMISSION(gt->i915))
+		return 0;
+
+	/* As we use CS_GPR we cannot run before they existed on all engines. */
+	if (INTEL_GEN(gt->i915) < 9)
+		return 0;
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		int nsibling, err;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!gt->engine_class[class][inst])
+				continue;
+
+			siblings[nsibling++] = gt->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		err = preserved_virtual_engine(gt, siblings, nsibling);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int bond_virtual_engine(struct intel_gt *gt,
 			       unsigned int class,
 			       struct intel_engine_cs **siblings,
 			       unsigned int nsibling,
@@ -1985,21 +3088,59 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 #define BOND_SCHEDULE BIT(0)
 {
 	struct intel_engine_cs *master;
-	struct i915_gem_context *ctx;
 	struct i915_request *rq[16];
 	enum intel_engine_id id;
+	struct igt_spinner spin;
 	unsigned long n;
 	int err;
 
+	/*
+	 * A set of bonded requests is intended to be run concurrently
+	 * across a number of engines. We use one request per-engine
+	 * and a magic fence to schedule each of the bonded requests
+	 * at the same time. A consequence of our current scheduler is that
+	 * we only move requests to the HW ready queue when the request
+	 * becomes ready, that is when all of its prerequisite fences have
+	 * been signaled. As one of those fences is the master submit fence,
+	 * there is a delay on all secondary fences as the HW may be
+	 * currently busy. Equally, as all the requests are independent,
+	 * they may have other fences that delay individual request
+	 * submission to HW. Ergo, we do not guarantee that all requests are
+	 * immediately submitted to HW at the same time, just that if the
+	 * rules are abided by, they are ready at the same time as the
+	 * first is submitted. Userspace can embed semaphores in its batch
+	 * to ensure parallel execution of its phases as it requires.
+	 * Though naturally it gets requested that perhaps the scheduler should
+	 * take care of parallel execution, even across preemption events on
+	 * different HW. (The proper answer is of course "lalalala".)
+	 *
+	 * With the submit-fence, we have identified three possible phases
+	 * of synchronisation depending on the master fence: queued (not
+	 * ready), executing, and signaled. The first two are quite simple
+	 * and checked below. However, the signaled master fence handling is
+	 * contentious. Currently we do not distinguish between a signaled
+	 * fence and an expired fence, as once signaled it does not convey
+	 * any information about the previous execution. It may even be freed
+	 * and hence checking later it may not exist at all. Ergo we currently
+	 * do not apply the bonding constraint for an already signaled fence,
+	 * as our expectation is that it should not constrain the secondaries
+	 * and is outside of the scope of the bonded request API (i.e. all
+	 * userspace requests are meant to be running in parallel). As
+	 * it imposes no constraint, and is effectively a no-op, we do not
+	 * check below as normal execution flows are checked extensively above.
+	 *
+	 * XXX Is the degenerate handling of signaled submit fences the
+	 * expected behaviour for userpace?
+	 */
+
 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
 
-	ctx = kernel_context(i915);
-	if (!ctx)
+	if (igt_spinner_init(&spin, gt))
 		return -ENOMEM;
 
 	err = 0;
 	rq[0] = ERR_PTR(-ENOMEM);
-	for_each_engine(master, i915, id) {
+	for_each_engine(master, gt, id) {
 		struct i915_sw_fence fence = {};
 
 		if (master->class == class)
@@ -2007,7 +3148,9 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 
 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
 
-		rq[0] = igt_request_alloc(ctx, master);
+		rq[0] = igt_spinner_create_request(&spin,
+						   master->kernel_context,
+						   MI_NOOP);
 		if (IS_ERR(rq[0])) {
 			err = PTR_ERR(rq[0]);
 			goto out;
@@ -2020,16 +3163,21 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 							       &fence,
 							       GFP_KERNEL);
 		}
+
 		i915_request_add(rq[0]);
 		if (err < 0)
 			goto out;
 
+		if (!(flags & BOND_SCHEDULE) &&
+		    !igt_wait_for_spinner(&spin, rq[0])) {
+			err = -EIO;
+			goto out;
+		}
+
 		for (n = 0; n < nsibling; n++) {
 			struct intel_context *ve;
 
-			ve = intel_execlists_create_virtual(ctx,
-							    siblings,
-							    nsibling);
+			ve = intel_execlists_create_virtual(siblings, nsibling);
 			if (IS_ERR(ve)) {
 				err = PTR_ERR(ve);
 				onstack_fence_fini(&fence);
@@ -2071,6 +3219,8 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 			}
 		}
 		onstack_fence_fini(&fence);
+		intel_engine_flush_submission(master);
+		igt_spinner_end(&spin);
 
 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
 			pr_err("Master request did not execute (on %s)!\n",
@@ -2104,10 +3254,10 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 out:
 	for (n = 0; !IS_ERR(rq[n]); n++)
 		i915_request_put(rq[n]);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
-	kernel_context_close(ctx);
+	igt_spinner_fini(&spin);
 	return err;
 }
 
@@ -2121,17 +3271,14 @@ static int live_virtual_bond(void *arg)
 		{ "schedule", BOND_SCHEDULE },
 		{ },
 	};
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
-	struct intel_gt *gt = &i915->gt;
 	unsigned int class, inst;
-	int err = 0;
+	int err;
 
-	if (USES_GUC_SUBMISSION(i915))
+	if (USES_GUC_SUBMISSION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
 		const struct phase *p;
 		int nsibling;
@@ -2148,38 +3295,43 @@ static int live_virtual_bond(void *arg)
 			continue;
 
 		for (p = phases; p->name; p++) {
-			err = bond_virtual_engine(i915,
+			err = bond_virtual_engine(gt,
 						  class, siblings, nsibling,
 						  p->flags);
 			if (err) {
 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
 				       __func__, p->name, class, nsibling, err);
-				goto out_unlock;
+				return err;
 			}
 		}
 	}
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	return err;
+	return 0;
 }
 
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_sanitycheck),
+		SUBTEST(live_unlite_switch),
+		SUBTEST(live_unlite_preempt),
 		SUBTEST(live_timeslice_preempt),
+		SUBTEST(live_timeslice_queue),
 		SUBTEST(live_busywait_preempt),
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
 		SUBTEST(live_nopreempt),
+		SUBTEST(live_preempt_cancel),
 		SUBTEST(live_suppress_self_preempt),
 		SUBTEST(live_suppress_wait_preempt),
 		SUBTEST(live_chain_preempt),
+		SUBTEST(live_preempt_gang),
 		SUBTEST(live_preempt_hang),
+		SUBTEST(live_preempt_timeout),
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
 		SUBTEST(live_virtual_mask),
+		SUBTEST(live_virtual_preserved),
 		SUBTEST(live_virtual_bond),
 	};
 
@@ -2189,5 +3341,498 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 	if (intel_gt_is_wedged(&i915->gt))
 		return 0;
 
-	return i915_live_subtests(tests, i915);
+	return intel_gt_live_subtests(tests, &i915->gt);
+}
+
+static void hexdump(const void *buf, size_t len)
+{
+	const size_t rowsize = 8 * sizeof(u32);
+	const void *prev = NULL;
+	bool skip = false;
+	size_t pos;
+
+	for (pos = 0; pos < len; pos += rowsize) {
+		char line[128];
+
+		if (prev && !memcmp(prev, buf + pos, rowsize)) {
+			if (!skip) {
+				pr_info("*\n");
+				skip = true;
+			}
+			continue;
+		}
+
+		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+						rowsize, sizeof(u32),
+						line, sizeof(line),
+						false) >= sizeof(line));
+		pr_info("[%04zx] %s\n", pos, line);
+
+		prev = buf + pos;
+		skip = false;
+	}
+}
+
+static int live_lrc_layout(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 *lrc;
+	int err;
+
+	/*
+	 * Check the registers offsets we use to create the initial reg state
+	 * match the layout saved by HW.
+	 */
+
+	lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!lrc)
+		return -ENOMEM;
+
+	err = 0;
+	for_each_engine(engine, gt, id) {
+		u32 *hw;
+		int dw;
+
+		if (!engine->default_state)
+			continue;
+
+		hw = i915_gem_object_pin_map(engine->default_state,
+					     I915_MAP_WB);
+		if (IS_ERR(hw)) {
+			err = PTR_ERR(hw);
+			break;
+		}
+		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+		execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
+					 engine->kernel_context,
+					 engine,
+					 engine->kernel_context->ring,
+					 true);
+
+		dw = 0;
+		do {
+			u32 lri = hw[dw];
+
+			if (lri == 0) {
+				dw++;
+				continue;
+			}
+
+			if (lrc[dw] == 0) {
+				pr_debug("%s: skipped instruction %x at dword %d\n",
+					 engine->name, lri, dw);
+				dw++;
+				continue;
+			}
+
+			if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
+				       engine->name, dw, lri);
+				err = -EINVAL;
+				break;
+			}
+
+			if (lrc[dw] != lri) {
+				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
+				       engine->name, dw, lri, lrc[dw]);
+				err = -EINVAL;
+				break;
+			}
+
+			lri &= 0x7f;
+			lri++;
+			dw++;
+
+			while (lri) {
+				if (hw[dw] != lrc[dw]) {
+					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
+					       engine->name, dw, hw[dw], lrc[dw]);
+					err = -EINVAL;
+					break;
+				}
+
+				/*
+				 * Skip over the actual register value as we
+				 * expect that to differ.
+				 */
+				dw += 2;
+				lri -= 2;
+			}
+		} while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+		if (err) {
+			pr_info("%s: HW register image:\n", engine->name);
+			hexdump(hw, PAGE_SIZE);
+
+			pr_info("%s: SW register image:\n", engine->name);
+			hexdump(lrc, PAGE_SIZE);
+		}
+
+		i915_gem_object_unpin_map(engine->default_state);
+		if (err)
+			break;
+	}
+
+	kfree(lrc);
+	return err;
+}
+
+static int find_offset(const u32 *lri, u32 offset)
+{
+	int i;
+
+	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
+		if (lri[i] == offset)
+			return i;
+
+	return -1;
+}
+
+static int live_lrc_fixed(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * Check the assumed register offsets match the actual locations in
+	 * the context image.
+	 */
+
+	for_each_engine(engine, gt, id) {
+		const struct {
+			u32 reg;
+			u32 offset;
+			const char *name;
+		} tbl[] = {
+			{
+				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
+				CTX_RING_START - 1,
+				"RING_START"
+			},
+			{
+				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
+				CTX_RING_CTL - 1,
+				"RING_CTL"
+			},
+			{
+				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
+				CTX_RING_HEAD - 1,
+				"RING_HEAD"
+			},
+			{
+				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
+				CTX_RING_TAIL - 1,
+				"RING_TAIL"
+			},
+			{
+				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
+				lrc_ring_mi_mode(engine),
+				"RING_MI_MODE"
+			},
+			{
+				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
+				CTX_BB_STATE - 1,
+				"BB_STATE"
+			},
+			{ },
+		}, *t;
+		u32 *hw;
+
+		if (!engine->default_state)
+			continue;
+
+		hw = i915_gem_object_pin_map(engine->default_state,
+					     I915_MAP_WB);
+		if (IS_ERR(hw)) {
+			err = PTR_ERR(hw);
+			break;
+		}
+		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+		for (t = tbl; t->name; t++) {
+			int dw = find_offset(hw, t->reg);
+
+			if (dw != t->offset) {
+				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
+				       engine->name,
+				       t->name,
+				       t->reg,
+				       dw,
+				       t->offset);
+				err = -EINVAL;
+			}
+		}
+
+		i915_gem_object_unpin_map(engine->default_state);
+	}
+
+	return err;
+}
+
+static int __live_lrc_state(struct intel_engine_cs *engine,
+			    struct i915_vma *scratch)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+	enum {
+		RING_START_IDX = 0,
+		RING_TAIL_IDX,
+		MAX_IDX
+	};
+	u32 expected[MAX_IDX];
+	u32 *cs;
+	int err;
+	int n;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err)
+		goto err_put;
+
+	rq = i915_request_create(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_unpin;
+	}
+
+	cs = intel_ring_begin(rq, 4 * MAX_IDX);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		i915_request_add(rq);
+		goto err_unpin;
+	}
+
+	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
+	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
+	*cs++ = 0;
+
+	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
+
+	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
+	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
+	*cs++ = 0;
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	intel_engine_flush_submission(engine);
+	expected[RING_TAIL_IDX] = ce->ring->tail;
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		err = -ETIME;
+		goto err_rq;
+	}
+
+	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_rq;
+	}
+
+	for (n = 0; n < MAX_IDX; n++) {
+		if (cs[n] != expected[n]) {
+			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
+			       engine->name, n, cs[n], expected[n]);
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+	i915_request_put(rq);
+err_unpin:
+	intel_context_unpin(ce);
+err_put:
+	intel_context_put(ce);
+	return err;
+}
+
+static int live_lrc_state(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	struct i915_vma *scratch;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * Check the live register state matches what we expect for this
+	 * intel_context.
+	 */
+
+	scratch = create_scratch(gt);
+	if (IS_ERR(scratch))
+		return PTR_ERR(scratch);
+
+	for_each_engine(engine, gt, id) {
+		err = __live_lrc_state(engine, scratch);
+		if (err)
+			break;
+	}
+
+	if (igt_flush_test(gt->i915))
+		err = -EIO;
+
+	i915_vma_unpin_and_release(&scratch, 0);
+	return err;
+}
+
+static int gpr_make_dirty(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+	u32 *cs;
+	int n;
+
+	rq = intel_engine_create_kernel_request(engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		return PTR_ERR(cs);
+	}
+
+	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
+	for (n = 0; n < NUM_GPR_DW; n++) {
+		*cs++ = CS_GPR(engine, n);
+		*cs++ = STACK_MAGIC;
+	}
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+	i915_request_add(rq);
+
+	return 0;
+}
+
+static int __live_gpr_clear(struct intel_engine_cs *engine,
+			    struct i915_vma *scratch)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+	u32 *cs;
+	int err;
+	int n;
+
+	if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
+		return 0; /* GPR only on rcs0 for gen8 */
+
+	err = gpr_make_dirty(engine);
+	if (err)
+		return err;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_put;
+	}
+
+	cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		i915_request_add(rq);
+		goto err_put;
+	}
+
+	for (n = 0; n < NUM_GPR_DW; n++) {
+		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+		*cs++ = CS_GPR(engine, n);
+		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+		*cs++ = 0;
+	}
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		err = -ETIME;
+		goto err_rq;
+	}
+
+	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_rq;
+	}
+
+	for (n = 0; n < NUM_GPR_DW; n++) {
+		if (cs[n]) {
+			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
+			       engine->name,
+			       n / 2, n & 1 ? "udw" : "ldw",
+			       cs[n]);
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+	i915_request_put(rq);
+err_put:
+	intel_context_put(ce);
+	return err;
+}
+
+static int live_gpr_clear(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	struct i915_vma *scratch;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * Check that GPR registers are cleared in new contexts as we need
+	 * to avoid leaking any information from previous contexts.
+	 */
+
+	scratch = create_scratch(gt);
+	if (IS_ERR(scratch))
+		return PTR_ERR(scratch);
+
+	for_each_engine(engine, gt, id) {
+		err = __live_gpr_clear(engine, scratch);
+		if (err)
+			break;
+	}
+
+	if (igt_flush_test(gt->i915))
+		err = -EIO;
+
+	i915_vma_unpin_and_release(&scratch, 0);
+	return err;
+}
+
+int intel_lrc_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_lrc_layout),
+		SUBTEST(live_lrc_fixed),
+		SUBTEST(live_lrc_state),
+		SUBTEST(live_gpr_clear),
+	};
+
+	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+		return 0;
+
+	return intel_gt_live_subtests(tests, &i915->gt);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
new file mode 100644
index 000000000000..de1f83100fb6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -0,0 +1,419 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gt/intel_engine_pm.h"
+#include "i915_selftest.h"
+
+#include "gem/selftests/mock_context.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+
+struct live_mocs {
+	struct drm_i915_mocs_table table;
+	struct i915_vma *scratch;
+	void *vaddr;
+};
+
+static int request_add_sync(struct i915_request *rq, int err)
+{
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 5) < 0)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	return err;
+}
+
+static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
+{
+	int err = 0;
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (spin && !igt_wait_for_spinner(spin, rq))
+		err = -ETIME;
+	i915_request_put(rq);
+
+	return err;
+}
+
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int err;
+
+	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(obj);
+		return vma;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+	if (err) {
+		i915_gem_object_put(obj);
+		return ERR_PTR(err);
+	}
+
+	return vma;
+}
+
+static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt)
+{
+	int err;
+
+	if (!get_mocs_settings(gt->i915, &arg->table))
+		return -EINVAL;
+
+	arg->scratch = create_scratch(gt);
+	if (IS_ERR(arg->scratch))
+		return PTR_ERR(arg->scratch);
+
+	arg->vaddr = i915_gem_object_pin_map(arg->scratch->obj, I915_MAP_WB);
+	if (IS_ERR(arg->vaddr)) {
+		err = PTR_ERR(arg->vaddr);
+		goto err_scratch;
+	}
+
+	return 0;
+
+err_scratch:
+	i915_vma_unpin_and_release(&arg->scratch, 0);
+	return err;
+}
+
+static void live_mocs_fini(struct live_mocs *arg)
+{
+	i915_vma_unpin_and_release(&arg->scratch, I915_VMA_RELEASE_MAP);
+}
+
+static int read_regs(struct i915_request *rq,
+		     u32 addr, unsigned int count,
+		     uint32_t *offset)
+{
+	unsigned int i;
+	u32 *cs;
+
+	GEM_BUG_ON(!IS_ALIGNED(*offset, sizeof(u32)));
+
+	cs = intel_ring_begin(rq, 4 * count);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	for (i = 0; i < count; i++) {
+		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+		*cs++ = addr;
+		*cs++ = *offset;
+		*cs++ = 0;
+
+		addr += sizeof(u32);
+		*offset += sizeof(u32);
+	}
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int read_mocs_table(struct i915_request *rq,
+			   const struct drm_i915_mocs_table *table,
+			   uint32_t *offset)
+{
+	u32 addr;
+
+	if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915))
+		addr = global_mocs_offset();
+	else
+		addr = mocs_offset(rq->engine);
+
+	return read_regs(rq, addr, table->n_entries, offset);
+}
+
+static int read_l3cc_table(struct i915_request *rq,
+			   const struct drm_i915_mocs_table *table,
+			   uint32_t *offset)
+{
+	u32 addr = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0));
+
+	return read_regs(rq, addr, (table->n_entries + 1) / 2, offset);
+}
+
+static int check_mocs_table(struct intel_engine_cs *engine,
+			    const struct drm_i915_mocs_table *table,
+			    uint32_t **vaddr)
+{
+	unsigned int i;
+	u32 expect;
+
+	for_each_mocs(expect, table, i) {
+		if (**vaddr != expect) {
+			pr_err("%s: Invalid MOCS[%d] entry, found %08x, expected %08x\n",
+			       engine->name, i, **vaddr, expect);
+			return -EINVAL;
+		}
+		++*vaddr;
+	}
+
+	return 0;
+}
+
+static bool mcr_range(struct drm_i915_private *i915, u32 offset)
+{
+	/*
+	 * Registers in this range are affected by the MCR selector
+	 * which only controls CPU initiated MMIO. Routing does not
+	 * work for CS access so we cannot verify them on this path.
+	 */
+	return INTEL_GEN(i915) >= 8 && offset >= 0xb000 && offset <= 0xb4ff;
+}
+
+static int check_l3cc_table(struct intel_engine_cs *engine,
+			    const struct drm_i915_mocs_table *table,
+			    uint32_t **vaddr)
+{
+	/* Can we read the MCR range 0xb00 directly? See intel_workarounds! */
+	u32 reg = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0));
+	unsigned int i;
+	u32 expect;
+
+	for_each_l3cc(expect, table, i) {
+		if (!mcr_range(engine->i915, reg) && **vaddr != expect) {
+			pr_err("%s: Invalid L3CC[%d] entry, found %08x, expected %08x\n",
+			       engine->name, i, **vaddr, expect);
+			return -EINVAL;
+		}
+		++*vaddr;
+		reg += 4;
+	}
+
+	return 0;
+}
+
+static int check_mocs_engine(struct live_mocs *arg,
+			     struct intel_context *ce)
+{
+	struct i915_vma *vma = arg->scratch;
+	struct i915_request *rq;
+	u32 offset;
+	u32 *vaddr;
+	int err;
+
+	memset32(arg->vaddr, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (!err)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+
+	/* Read the mocs tables back using SRM */
+	offset = i915_ggtt_offset(vma);
+	if (!err)
+		err = read_mocs_table(rq, &arg->table, &offset);
+	if (!err && ce->engine->class == RENDER_CLASS)
+		err = read_l3cc_table(rq, &arg->table, &offset);
+	offset -= i915_ggtt_offset(vma);
+	GEM_BUG_ON(offset > PAGE_SIZE);
+
+	err = request_add_sync(rq, err);
+	if (err)
+		return err;
+
+	/* Compare the results against the expected tables */
+	vaddr = arg->vaddr;
+	if (!err)
+		err = check_mocs_table(ce->engine, &arg->table, &vaddr);
+	if (!err && ce->engine->class == RENDER_CLASS)
+		err = check_l3cc_table(ce->engine, &arg->table, &vaddr);
+	if (err)
+		return err;
+
+	GEM_BUG_ON(arg->vaddr + offset != vaddr);
+	return 0;
+}
+
+static int live_mocs_kernel(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct live_mocs mocs;
+	int err;
+
+	/* Basic check the system is configured with the expected mocs table */
+
+	err = live_mocs_init(&mocs, gt);
+	if (err)
+		return err;
+
+	for_each_engine(engine, gt, id) {
+		intel_engine_pm_get(engine);
+		err = check_mocs_engine(&mocs, engine->kernel_context);
+		intel_engine_pm_put(engine);
+		if (err)
+			break;
+	}
+
+	live_mocs_fini(&mocs);
+	return err;
+}
+
+static int live_mocs_clean(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct live_mocs mocs;
+	int err;
+
+	/* Every new context should see the same mocs table */
+
+	err = live_mocs_init(&mocs, gt);
+	if (err)
+		return err;
+
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			break;
+		}
+
+		err = check_mocs_engine(&mocs, ce);
+		intel_context_put(ce);
+		if (err)
+			break;
+	}
+
+	live_mocs_fini(&mocs);
+	return err;
+}
+
+static int active_engine_reset(struct intel_context *ce,
+			       const char *reason)
+{
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	int err;
+
+	err = igt_spinner_init(&spin, ce->engine->gt);
+	if (err)
+		return err;
+
+	rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+	if (IS_ERR(rq)) {
+		igt_spinner_fini(&spin);
+		return PTR_ERR(rq);
+	}
+
+	err = request_add_spin(rq, &spin);
+	if (err == 0)
+		err = intel_engine_reset(ce->engine, reason);
+
+	igt_spinner_end(&spin);
+	igt_spinner_fini(&spin);
+
+	return err;
+}
+
+static int __live_mocs_reset(struct live_mocs *mocs,
+			     struct intel_context *ce)
+{
+	int err;
+
+	err = intel_engine_reset(ce->engine, "mocs");
+	if (err)
+		return err;
+
+	err = check_mocs_engine(mocs, ce);
+	if (err)
+		return err;
+
+	err = active_engine_reset(ce, "mocs");
+	if (err)
+		return err;
+
+	err = check_mocs_engine(mocs, ce);
+	if (err)
+		return err;
+
+	intel_gt_reset(ce->engine->gt, ce->engine->mask, "mocs");
+
+	err = check_mocs_engine(mocs, ce);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int live_mocs_reset(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct live_mocs mocs;
+	int err = 0;
+
+	/* Check the mocs setup is retained over per-engine and global resets */
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	err = live_mocs_init(&mocs, gt);
+	if (err)
+		return err;
+
+	igt_global_reset_lock(gt);
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			break;
+		}
+
+		intel_engine_pm_get(engine);
+		err = __live_mocs_reset(&mocs, ce);
+		intel_engine_pm_put(engine);
+
+		intel_context_put(ce);
+		if (err)
+			break;
+	}
+	igt_global_reset_unlock(gt);
+
+	live_mocs_fini(&mocs);
+	return err;
+}
+
+int intel_mocs_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_mocs_kernel),
+		SUBTEST(live_mocs_clean),
+		SUBTEST(live_mocs_reset),
+	};
+	struct drm_i915_mocs_table table;
+
+	if (!get_mocs_settings(i915, &table))
+		return 0;
+
+	return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
new file mode 100644
index 000000000000..8cc55a0e9e06
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -0,0 +1,203 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_context.h"
+#include "intel_engine_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_ring.h"
+#include "selftest_rc6.h"
+
+#include "selftests/i915_random.h"
+
+int live_rc6_manual(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_rc6 *rc6 = &gt->rc6;
+	intel_wakeref_t wakeref;
+	u64 res[2];
+	int err = 0;
+
+	/*
+	 * Our claim is that we can "encourage" the GPU to enter rc6 at will.
+	 * Let's try it!
+	 */
+
+	if (!rc6->enabled)
+		return 0;
+
+	/* bsw/byt use a PCU and decouple RC6 from our manual control */
+	if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
+		return 0;
+
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+	/* Force RC6 off for starters */
+	__intel_rc6_disable(rc6);
+	msleep(1); /* wakeup is not immediate, takes about 100us on icl */
+
+	res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+	msleep(250);
+	res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+	if ((res[1] - res[0]) >> 10) {
+		pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n",
+		       (res[1] - res[0]) >> 10);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* Manually enter RC6 */
+	intel_rc6_park(rc6);
+
+	res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+	msleep(100);
+	res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+
+	if (res[1] == res[0]) {
+		pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x\n",
+		       intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE),
+		       intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL));
+		err = -EINVAL;
+	}
+
+	/* Restore what should have been the original state! */
+	intel_rc6_unpark(rc6);
+
+out_unlock:
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+	return err;
+}
+
+static const u32 *__live_rc6_ctx(struct intel_context *ce)
+{
+	struct i915_request *rq;
+	const u32 *result;
+	u32 cmd;
+	u32 *cs;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		return ERR_CAST(rq);
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		return cs;
+	}
+
+	cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
+	if (INTEL_GEN(rq->i915) >= 8)
+		cmd++;
+
+	*cs++ = cmd;
+	*cs++ = i915_mmio_reg_offset(GEN8_RC6_CTX_INFO);
+	*cs++ = ce->timeline->hwsp_offset + 8;
+	*cs++ = 0;
+	intel_ring_advance(rq, cs);
+
+	result = rq->hwsp_seqno + 2;
+	i915_request_add(rq);
+
+	return result;
+}
+
+static struct intel_engine_cs **
+randomised_engines(struct intel_gt *gt,
+		   struct rnd_state *prng,
+		   unsigned int *count)
+{
+	struct intel_engine_cs *engine, **engines;
+	enum intel_engine_id id;
+	int n;
+
+	n = 0;
+	for_each_engine(engine, gt, id)
+		n++;
+	if (!n)
+		return NULL;
+
+	engines = kmalloc_array(n, sizeof(*engines), GFP_KERNEL);
+	if (!engines)
+		return NULL;
+
+	n = 0;
+	for_each_engine(engine, gt, id)
+		engines[n++] = engine;
+
+	i915_prandom_shuffle(engines, sizeof(*engines), n, prng);
+
+	*count = n;
+	return engines;
+}
+
+int live_rc6_ctx_wa(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs **engines;
+	unsigned int n, count;
+	I915_RND_STATE(prng);
+	int err = 0;
+
+	/* A read of CTX_INFO upsets rc6. Poke the bear! */
+	if (INTEL_GEN(gt->i915) < 8)
+		return 0;
+
+	engines = randomised_engines(gt, &prng, &count);
+	if (!engines)
+		return 0;
+
+	for (n = 0; n < count; n++) {
+		struct intel_engine_cs *engine = engines[n];
+		int pass;
+
+		for (pass = 0; pass < 2; pass++) {
+			struct intel_context *ce;
+			unsigned int resets =
+				i915_reset_engine_count(&gt->i915->gpu_error,
+							engine);
+			const u32 *res;
+
+			/* Use a sacrifical context */
+			ce = intel_context_create(engine);
+			if (IS_ERR(ce)) {
+				err = PTR_ERR(ce);
+				goto out;
+			}
+
+			intel_engine_pm_get(engine);
+			res = __live_rc6_ctx(ce);
+			intel_engine_pm_put(engine);
+			intel_context_put(ce);
+			if (IS_ERR(res)) {
+				err = PTR_ERR(res);
+				goto out;
+			}
+
+			if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
+				intel_gt_set_wedged(gt);
+				err = -ETIME;
+				goto out;
+			}
+
+			intel_gt_pm_wait_for_idle(gt);
+			pr_debug("%s: CTX_INFO=%0x\n",
+				 engine->name, READ_ONCE(*res));
+
+			if (resets !=
+			    i915_reset_engine_count(&gt->i915->gpu_error,
+						    engine)) {
+				pr_err("%s: GPU reset required\n",
+				       engine->name);
+				add_taint_for_CI(TAINT_WARN);
+				err = -EIO;
+				goto out;
+			}
+		}
+	}
+
+out:
+	kfree(engines);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.h b/drivers/gpu/drm/i915/gt/selftest_rc6.h
new file mode 100644
index 000000000000..762fd442d7b2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.h
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef SELFTEST_RC6_H
+#define SELFTEST_RC6_H
+
+int live_rc6_ctx_wa(void *arg);
+int live_rc6_manual(void *arg);
+
+#endif /* SELFTEST_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 00a4f60cdfd5..6ad6aca315f6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -17,7 +17,7 @@ static int igt_global_reset(void *arg)
 	/* Check that we can issue a global GPU reset */
 
 	igt_global_reset_lock(gt);
-	wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
 
 	reset_count = i915_reset_count(&gt->i915->gpu_error);
 
@@ -28,7 +28,7 @@ static int igt_global_reset(void *arg)
 		err = -EINVAL;
 	}
 
-	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
 	igt_global_reset_unlock(gt);
 
 	if (intel_gt_is_wedged(gt))
@@ -45,14 +45,14 @@ static int igt_wedged_reset(void *arg)
 	/* Check that we can recover a wedged device with a GPU reset */
 
 	igt_global_reset_lock(gt);
-	wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
 
 	intel_gt_set_wedged(gt);
 
 	GEM_BUG_ON(!intel_gt_is_wedged(gt));
 	intel_gt_reset(gt, ALL_ENGINES, NULL);
 
-	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
 	igt_global_reset_unlock(gt);
 
 	return intel_gt_is_wedged(gt) ? -EIO : 0;
@@ -112,7 +112,7 @@ static int igt_atomic_engine_reset(void *arg)
 
 	/* Check that the resets are usable from atomic context */
 
-	if (!intel_has_reset_engine(gt->i915))
+	if (!intel_has_reset_engine(gt))
 		return 0;
 
 	if (USES_GUC_SUBMISSION(gt->i915))
@@ -125,8 +125,8 @@ static int igt_atomic_engine_reset(void *arg)
 	if (!igt_force_reset(gt))
 		goto out_unlock;
 
-	for_each_engine(engine, gt->i915, id) {
-		tasklet_disable_nosync(&engine->execlists.tasklet);
+	for_each_engine(engine, gt, id) {
+		tasklet_disable(&engine->execlists.tasklet);
 		intel_engine_pm_get(engine);
 
 		for (p = igt_atomic_phases; p->name; p++) {
@@ -170,7 +170,7 @@ int intel_reset_live_selftests(struct drm_i915_private *i915)
 	};
 	struct intel_gt *gt = &i915->gt;
 
-	if (!intel_has_gpu_reset(gt->i915))
+	if (!intel_has_gpu_reset(gt))
 		return 0;
 
 	if (intel_gt_is_wedged(gt))
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 321481403165..e2d78cc22fb4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -6,8 +6,10 @@
 
 #include <linux/prime_numbers.h>
 
-#include "gem/i915_gem_pm.h"
+#include "intel_engine_pm.h"
 #include "intel_gt.h"
+#include "intel_gt_requests.h"
+#include "intel_ring.h"
 
 #include "../selftests/i915_random.h"
 #include "../i915_selftest.h"
@@ -34,7 +36,7 @@ static unsigned long hwsp_cacheline(struct intel_timeline *tl)
 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
 
 struct mock_hwsp_freelist {
-	struct drm_i915_private *i915;
+	struct intel_gt *gt;
 	struct radix_tree_root cachelines;
 	struct intel_timeline **history;
 	unsigned long count, max;
@@ -67,7 +69,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
 		unsigned long cacheline;
 		int err;
 
-		tl = intel_timeline_create(&state->i915->gt, NULL);
+		tl = intel_timeline_create(state->gt, NULL);
 		if (IS_ERR(tl))
 			return PTR_ERR(tl);
 
@@ -105,6 +107,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
 static int mock_hwsp_freelist(void *arg)
 {
 	struct mock_hwsp_freelist state;
+	struct drm_i915_private *i915;
 	const struct {
 		const char *name;
 		unsigned int flags;
@@ -116,12 +119,14 @@ static int mock_hwsp_freelist(void *arg)
 	unsigned int na;
 	int err = 0;
 
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
 	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
 	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
 
-	state.i915 = mock_gem_device();
-	if (!state.i915)
-		return -ENOMEM;
+	state.gt = &i915->gt;
 
 	/*
 	 * Create a bunch of timelines and check that their HWSP do not overlap.
@@ -136,7 +141,6 @@ static int mock_hwsp_freelist(void *arg)
 		goto err_put;
 	}
 
-	mutex_lock(&state.i915->drm.struct_mutex);
 	for (p = phases; p->name; p++) {
 		pr_debug("%s(%s)\n", __func__, p->name);
 		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
@@ -149,10 +153,9 @@ static int mock_hwsp_freelist(void *arg)
 out:
 	for (na = 0; na < state.max; na++)
 		__mock_hwsp_record(&state, na, NULL);
-	mutex_unlock(&state.i915->drm.struct_mutex);
 	kfree(state.history);
 err_put:
-	drm_dev_put(&state.i915->drm);
+	drm_dev_put(&i915->drm);
 	return err;
 }
 
@@ -449,22 +452,24 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 	struct i915_request *rq;
 	int err;
 
-	lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */
-
 	err = intel_timeline_pin(tl);
 	if (err) {
 		rq = ERR_PTR(err);
 		goto out;
 	}
 
-	rq = i915_request_create(engine->kernel_context);
+	rq = intel_engine_create_kernel_request(engine);
 	if (IS_ERR(rq))
 		goto out_unpin;
 
+	i915_request_get(rq);
+
 	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
 	i915_request_add(rq);
-	if (err)
+	if (err) {
+		i915_request_put(rq);
 		rq = ERR_PTR(err);
+	}
 
 out_unpin:
 	intel_timeline_unpin(tl);
@@ -475,11 +480,11 @@ out:
 }
 
 static struct intel_timeline *
-checked_intel_timeline_create(struct drm_i915_private *i915)
+checked_intel_timeline_create(struct intel_gt *gt)
 {
 	struct intel_timeline *tl;
 
-	tl = intel_timeline_create(&i915->gt, NULL);
+	tl = intel_timeline_create(gt, NULL);
 	if (IS_ERR(tl))
 		return tl;
 
@@ -496,11 +501,10 @@ checked_intel_timeline_create(struct drm_i915_private *i915)
 static int live_hwsp_engine(void *arg)
 {
 #define NUM_TIMELINES 4096
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_timeline **timelines;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	unsigned long count, n;
 	int err = 0;
 
@@ -515,37 +519,40 @@ static int live_hwsp_engine(void *arg)
 	if (!timelines)
 		return -ENOMEM;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	count = 0;
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
+		intel_engine_pm_get(engine);
+
 		for (n = 0; n < NUM_TIMELINES; n++) {
 			struct intel_timeline *tl;
 			struct i915_request *rq;
 
-			tl = checked_intel_timeline_create(i915);
+			tl = checked_intel_timeline_create(gt);
 			if (IS_ERR(tl)) {
 				err = PTR_ERR(tl);
-				goto out;
+				break;
 			}
 
 			rq = tl_write(tl, engine, count);
 			if (IS_ERR(rq)) {
 				intel_timeline_put(tl);
 				err = PTR_ERR(rq);
-				goto out;
+				break;
 			}
 
 			timelines[count++] = tl;
+			i915_request_put(rq);
 		}
+
+		intel_engine_pm_put(engine);
+		if (err)
+			break;
 	}
 
-out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
 	for (n = 0; n < count; n++) {
@@ -559,11 +566,7 @@ out:
 		intel_timeline_put(tl);
 	}
 
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	kvfree(timelines);
-
 	return err;
 #undef NUM_TIMELINES
 }
@@ -571,11 +574,10 @@ out:
 static int live_hwsp_alternate(void *arg)
 {
 #define NUM_TIMELINES 4096
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_timeline **timelines;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	unsigned long count, n;
 	int err = 0;
 
@@ -591,25 +593,25 @@ static int live_hwsp_alternate(void *arg)
 	if (!timelines)
 		return -ENOMEM;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	count = 0;
 	for (n = 0; n < NUM_TIMELINES; n++) {
-		for_each_engine(engine, i915, id) {
+		for_each_engine(engine, gt, id) {
 			struct intel_timeline *tl;
 			struct i915_request *rq;
 
 			if (!intel_engine_can_store_dword(engine))
 				continue;
 
-			tl = checked_intel_timeline_create(i915);
+			tl = checked_intel_timeline_create(gt);
 			if (IS_ERR(tl)) {
+				intel_engine_pm_put(engine);
 				err = PTR_ERR(tl);
 				goto out;
 			}
 
+			intel_engine_pm_get(engine);
 			rq = tl_write(tl, engine, count);
+			intel_engine_pm_put(engine);
 			if (IS_ERR(rq)) {
 				intel_timeline_put(tl);
 				err = PTR_ERR(rq);
@@ -617,11 +619,12 @@ static int live_hwsp_alternate(void *arg)
 			}
 
 			timelines[count++] = tl;
+			i915_request_put(rq);
 		}
 	}
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
 	for (n = 0; n < count; n++) {
@@ -635,22 +638,17 @@ out:
 		intel_timeline_put(tl);
 	}
 
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	kvfree(timelines);
-
 	return err;
 #undef NUM_TIMELINES
 }
 
 static int live_hwsp_wrap(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	struct intel_timeline *tl;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/*
@@ -658,14 +656,10 @@ static int live_hwsp_wrap(void *arg)
 	 * foreign GPU references.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	tl = intel_timeline_create(gt, NULL);
+	if (IS_ERR(tl))
+		return PTR_ERR(tl);
 
-	tl = intel_timeline_create(&i915->gt, NULL);
-	if (IS_ERR(tl)) {
-		err = PTR_ERR(tl);
-		goto out_rpm;
-	}
 	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 		goto out_free;
 
@@ -673,7 +667,7 @@ static int live_hwsp_wrap(void *arg)
 	if (err)
 		goto out_free;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		const u32 *hwsp_seqno[2];
 		struct i915_request *rq;
 		u32 seqno[2];
@@ -681,7 +675,7 @@ static int live_hwsp_wrap(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
-		rq = i915_request_create(engine->kernel_context);
+		rq = intel_engine_create_kernel_request(engine);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto out;
@@ -743,29 +737,24 @@ static int live_hwsp_wrap(void *arg)
 			goto out;
 		}
 
-		i915_retire_requests(i915); /* recycle HWSP */
+		intel_gt_retire_requests(gt); /* recycle HWSP */
 	}
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
 	intel_timeline_unpin(tl);
 out_free:
 	intel_timeline_put(tl);
-out_rpm:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return err;
 }
 
 static int live_hwsp_recycle(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	unsigned long count;
 	int err = 0;
 
@@ -775,38 +764,38 @@ static int live_hwsp_recycle(void *arg)
 	 * want to confuse ourselves or the GPU.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	count = 0;
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		IGT_TIMEOUT(end_time);
 
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
+		intel_engine_pm_get(engine);
+
 		do {
 			struct intel_timeline *tl;
 			struct i915_request *rq;
 
-			tl = checked_intel_timeline_create(i915);
+			tl = checked_intel_timeline_create(gt);
 			if (IS_ERR(tl)) {
 				err = PTR_ERR(tl);
-				goto out;
+				break;
 			}
 
 			rq = tl_write(tl, engine, count);
 			if (IS_ERR(rq)) {
 				intel_timeline_put(tl);
 				err = PTR_ERR(rq);
-				goto out;
+				break;
 			}
 
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				pr_err("Wait for timeline writes timed out!\n");
+				i915_request_put(rq);
 				intel_timeline_put(tl);
 				err = -EIO;
-				goto out;
+				break;
 			}
 
 			if (*tl->hwsp_seqno != count) {
@@ -815,17 +804,18 @@ static int live_hwsp_recycle(void *arg)
 				err = -EINVAL;
 			}
 
+			i915_request_put(rq);
 			intel_timeline_put(tl);
 			count++;
 
 			if (err)
-				goto out;
+				break;
 		} while (!__igt_timeout(end_time, NULL));
-	}
 
-out:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+		intel_engine_pm_put(engine);
+		if (err)
+			break;
+	}
 
 	return err;
 }
@@ -842,5 +832,5 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915)
 	if (intel_gt_is_wedged(&i915->gt))
 		return 0;
 
-	return i915_live_subtests(tests, i915);
+	return intel_gt_live_subtests(tests, &i915->gt);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index d06d68ac2a3b..ac1921854cbf 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -33,8 +33,32 @@ struct wa_lists {
 	} engine[I915_NUM_ENGINES];
 };
 
+static int request_add_sync(struct i915_request *rq, int err)
+{
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 5) < 0)
+		err = -EIO;
+	i915_request_put(rq);
+
+	return err;
+}
+
+static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
+{
+	int err = 0;
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (spin && !igt_wait_for_spinner(spin, rq))
+		err = -ETIMEDOUT;
+	i915_request_put(rq);
+
+	return err;
+}
+
 static void
-reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
+reference_lists_init(struct intel_gt *gt, struct wa_lists *lists)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
@@ -42,10 +66,10 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
 	memset(lists, 0, sizeof(*lists));
 
 	wa_init_start(&lists->gt_wa_list, "GT_REF", "global");
-	gt_init_workarounds(i915, &lists->gt_wa_list);
+	gt_init_workarounds(gt->i915, &lists->gt_wa_list);
 	wa_init_finish(&lists->gt_wa_list);
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_wa_list *wal = &lists->engine[id].wa_list;
 
 		wa_init_start(wal, "REF", engine->name);
@@ -59,12 +83,12 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
 }
 
 static void
-reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists)
+reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, i915, id)
+	for_each_engine(engine, gt, id)
 		intel_wa_list_free(&lists->engine[id].wa_list);
 
 	intel_wa_list_free(&lists->gt_wa_list);
@@ -191,10 +215,10 @@ static int check_whitelist(struct i915_gem_context *ctx,
 
 	err = 0;
 	i915_gem_object_lock(results);
-	intel_wedge_on_timeout(&wedge, &ctx->i915->gt, HZ / 5) /* safety net! */
+	intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
 		err = i915_gem_object_set_to_cpu_domain(results, false);
 	i915_gem_object_unlock(results);
-	if (intel_gt_is_wedged(&ctx->i915->gt))
+	if (intel_gt_is_wedged(engine->gt))
 		err = -EIO;
 	if (err)
 		goto out_put;
@@ -240,27 +264,16 @@ static int
 switch_to_scratch_context(struct intel_engine_cs *engine,
 			  struct igt_spinner *spin)
 {
-	struct i915_gem_context *ctx;
 	struct intel_context *ce;
 	struct i915_request *rq;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
-	ctx = kernel_context(engine->i915);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
-
-	GEM_BUG_ON(i915_gem_context_is_bannable(ctx));
-
-	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
-	GEM_BUG_ON(IS_ERR(ce));
-
-	rq = ERR_PTR(-ENODEV);
-	with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref)
-		rq = igt_spinner_create_request(spin, ce, MI_NOOP);
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
 
+	rq = igt_spinner_create_request(spin, ce, MI_NOOP);
 	intel_context_put(ce);
-	kernel_context_close(ctx);
 
 	if (IS_ERR(rq)) {
 		spin = NULL;
@@ -268,13 +281,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
 		goto err;
 	}
 
-	i915_request_add(rq);
-
-	if (spin && !igt_wait_for_spinner(spin, rq)) {
-		pr_err("Spinner failed to start\n");
-		err = -ETIMEDOUT;
-	}
-
+	err = request_add_spin(rq, spin);
 err:
 	if (err && spin)
 		igt_spinner_end(spin);
@@ -313,7 +320,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
 	if (err)
 		goto out_spin;
 
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+	with_intel_runtime_pm(engine->uncore->rpm, wakeref)
 		err = reset(engine);
 
 	igt_spinner_end(&spin);
@@ -352,17 +359,17 @@ out_ctx:
 	return err;
 }
 
-static struct i915_vma *create_batch(struct i915_gem_context *ctx)
+static struct i915_vma *create_batch(struct i915_address_space *vm)
 {
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create_internal(ctx->i915, 16 * PAGE_SIZE);
+	obj = i915_gem_object_create_internal(vm->i915, 16 * PAGE_SIZE);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
-	vma = i915_vma_instance(obj, ctx->vm, NULL);
+	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err_obj;
@@ -434,8 +441,7 @@ static int whitelist_writable_count(struct intel_engine_cs *engine)
 	return count;
 }
 
-static int check_dirty_whitelist(struct i915_gem_context *ctx,
-				 struct intel_engine_cs *engine)
+static int check_dirty_whitelist(struct intel_context *ce)
 {
 	const u32 values[] = {
 		0x00000000,
@@ -463,16 +469,17 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 		0xffff00ff,
 		0xffffffff,
 	};
+	struct intel_engine_cs *engine = ce->engine;
 	struct i915_vma *scratch;
 	struct i915_vma *batch;
 	int err = 0, i, v;
 	u32 *cs, *results;
 
-	scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1);
+	scratch = create_scratch(ce->vm, 2 * ARRAY_SIZE(values) + 1);
 	if (IS_ERR(scratch))
 		return PTR_ERR(scratch);
 
-	batch = create_batch(ctx);
+	batch = create_batch(ce->vm);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
 		goto out_scratch;
@@ -492,9 +499,12 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 
 		ro_reg = ro_register(reg);
 
+		/* Clear non priv flags */
+		reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
+
 		srm = MI_STORE_REGISTER_MEM;
 		lrm = MI_LOAD_REGISTER_MEM;
-		if (INTEL_GEN(ctx->i915) >= 8)
+		if (INTEL_GEN(engine->i915) >= 8)
 			lrm++, srm++;
 
 		pr_debug("%s: Writing garbage to %x\n",
@@ -553,7 +563,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 		i915_gem_object_unpin_map(batch->obj);
 		intel_gt_chipset_flush(engine->gt);
 
-		rq = igt_request_alloc(ctx, engine);
+		rq = intel_context_create_request(ce);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto out_batch;
@@ -565,6 +575,14 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 				goto err_request;
 		}
 
+		i915_vma_lock(batch);
+		err = i915_request_await_object(rq, batch->obj, false);
+		if (err == 0)
+			err = i915_vma_move_to_active(batch, rq, 0);
+		i915_vma_unlock(batch);
+		if (err)
+			goto err_request;
+
 		err = engine->emit_bb_start(rq,
 					    batch->node.start, PAGE_SIZE,
 					    0);
@@ -572,15 +590,11 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 			goto err_request;
 
 err_request:
-		i915_request_add(rq);
-		if (err)
-			goto out_batch;
-
-		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		err = request_add_sync(rq, err);
+		if (err) {
 			pr_err("%s: Futzing %x timedout; cancelling test\n",
 			       engine->name, reg);
-			intel_gt_set_wedged(&ctx->i915->gt);
-			err = -EIO;
+			intel_gt_set_wedged(engine->gt);
 			goto out_batch;
 		}
 
@@ -668,7 +682,7 @@ out_unpin:
 			break;
 	}
 
-	if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(engine->i915))
 		err = -EIO;
 out_batch:
 	i915_vma_unpin_and_release(&batch, 0);
@@ -679,84 +693,68 @@ out_scratch:
 
 static int live_dirty_whitelist(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
-	struct i915_gem_context *ctx;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
-	struct drm_file *file;
-	int err = 0;
 
 	/* Can the user write to the whitelisted registers? */
 
-	if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */
+	if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */
 		return 0;
 
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	mutex_unlock(&i915->drm.struct_mutex);
-	file = mock_file(i915);
-	mutex_lock(&i915->drm.struct_mutex);
-	if (IS_ERR(file)) {
-		err = PTR_ERR(file);
-		goto out_rpm;
-	}
-
-	ctx = live_context(i915, file);
-	if (IS_ERR(ctx)) {
-		err = PTR_ERR(ctx);
-		goto out_file;
-	}
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
+		int err;
 
-	for_each_engine(engine, i915, id) {
 		if (engine->whitelist.count == 0)
 			continue;
 
-		err = check_dirty_whitelist(ctx, engine);
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			return PTR_ERR(ce);
+
+		err = check_dirty_whitelist(ce);
+		intel_context_put(ce);
 		if (err)
-			goto out_file;
+			return err;
 	}
 
-out_file:
-	mutex_unlock(&i915->drm.struct_mutex);
-	mock_file_free(i915, file);
-	mutex_lock(&i915->drm.struct_mutex);
-out_rpm:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	return err;
+	return 0;
 }
 
 static int live_reset_whitelist(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
-	struct intel_engine_cs *engine = i915->engine[RCS0];
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int err = 0;
 
 	/* If we reset the gpu, we should not lose the RING_NONPRIV */
+	igt_global_reset_lock(gt);
 
-	if (!engine || engine->whitelist.count == 0)
-		return 0;
-
-	igt_global_reset_lock(&i915->gt);
+	for_each_engine(engine, gt, id) {
+		if (engine->whitelist.count == 0)
+			continue;
 
-	if (intel_has_reset_engine(i915)) {
-		err = check_whitelist_across_reset(engine,
-						   do_engine_reset,
-						   "engine");
-		if (err)
-			goto out;
-	}
+		if (intel_has_reset_engine(gt)) {
+			err = check_whitelist_across_reset(engine,
+							   do_engine_reset,
+							   "engine");
+			if (err)
+				goto out;
+		}
 
-	if (intel_has_gpu_reset(i915)) {
-		err = check_whitelist_across_reset(engine,
-						   do_device_reset,
-						   "device");
-		if (err)
-			goto out;
+		if (intel_has_gpu_reset(gt)) {
+			err = check_whitelist_across_reset(engine,
+							   do_device_reset,
+							   "device");
+			if (err)
+				goto out;
+		}
 	}
 
 out:
-	igt_global_reset_unlock(&i915->gt);
+	igt_global_reset_unlock(gt);
 	return err;
 }
 
@@ -772,6 +770,14 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
+	i915_vma_lock(results);
+	err = i915_request_await_object(rq, results->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(results);
+	if (err)
+		goto err_req;
+
 	srm = MI_STORE_REGISTER_MEM;
 	if (INTEL_GEN(ctx->i915) >= 8)
 		srm++;
@@ -786,8 +792,8 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
 		u64 offset = results->node.start + sizeof(u32) * i;
 		u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
 
-		/* Clear access permission field */
-		reg &= ~RING_FORCE_TO_NONPRIV_ACCESS_MASK;
+		/* Clear non priv flags */
+		reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
 
 		*cs++ = srm;
 		*cs++ = reg;
@@ -797,23 +803,21 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
 	intel_ring_advance(rq, cs);
 
 err_req:
-	i915_request_add(rq);
-
-	if (i915_request_wait(rq, 0, HZ / 5) < 0)
-		err = -EIO;
-
-	return err;
+	return request_add_sync(rq, err);
 }
 
 static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
 				       struct intel_engine_cs *engine)
 {
+	struct i915_address_space *vm;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	int i, err = 0;
 	u32 *cs;
 
-	batch = create_batch(ctx);
+	vm = i915_gem_context_get_vm_rcu(ctx);
+	batch = create_batch(vm);
+	i915_vm_put(vm);
 	if (IS_ERR(batch))
 		return PTR_ERR(batch);
 
@@ -830,6 +834,9 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
 		if (ro_register(reg))
 			continue;
 
+		/* Clear non priv flags */
+		reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
+
 		*cs++ = reg;
 		*cs++ = 0xffffffff;
 	}
@@ -850,13 +857,19 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
 			goto err_request;
 	}
 
+	i915_vma_lock(batch);
+	err = i915_request_await_object(rq, batch->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
+	if (err)
+		goto err_request;
+
 	/* Perform the writes from an unprivileged "user" batch */
 	err = engine->emit_bb_start(rq, batch->node.start, 0, 0);
 
 err_request:
-	i915_request_add(rq);
-	if (i915_request_wait(rq, 0, HZ / 5) < 0)
-		err = -EIO;
+	err = request_add_sync(rq, err);
 
 err_unpin:
 	i915_gem_object_unpin_map(batch->obj);
@@ -973,7 +986,7 @@ err_a:
 
 static int live_isolated_whitelist(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct {
 		struct i915_gem_context *ctx;
 		struct i915_vma *scratch[2];
@@ -987,40 +1000,46 @@ static int live_isolated_whitelist(void *arg)
 	 * invisible to a second context.
 	 */
 
-	if (!intel_engines_has_context_isolation(i915))
-		return 0;
-
-	if (!i915->kernel_context->vm)
+	if (!intel_engines_has_context_isolation(gt->i915))
 		return 0;
 
 	for (i = 0; i < ARRAY_SIZE(client); i++) {
+		struct i915_address_space *vm;
 		struct i915_gem_context *c;
 
-		c = kernel_context(i915);
+		c = kernel_context(gt->i915);
 		if (IS_ERR(c)) {
 			err = PTR_ERR(c);
 			goto err;
 		}
 
-		client[i].scratch[0] = create_scratch(c->vm, 1024);
+		vm = i915_gem_context_get_vm_rcu(c);
+
+		client[i].scratch[0] = create_scratch(vm, 1024);
 		if (IS_ERR(client[i].scratch[0])) {
 			err = PTR_ERR(client[i].scratch[0]);
+			i915_vm_put(vm);
 			kernel_context_close(c);
 			goto err;
 		}
 
-		client[i].scratch[1] = create_scratch(c->vm, 1024);
+		client[i].scratch[1] = create_scratch(vm, 1024);
 		if (IS_ERR(client[i].scratch[1])) {
 			err = PTR_ERR(client[i].scratch[1]);
 			i915_vma_unpin_and_release(&client[i].scratch[0], 0);
+			i915_vm_put(vm);
 			kernel_context_close(c);
 			goto err;
 		}
 
 		client[i].ctx = c;
+		i915_vm_put(vm);
 	}
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
+		if (!engine->kernel_context->vm)
+			continue;
+
 		if (!whitelist_writable_count(engine))
 			continue;
 
@@ -1074,7 +1093,7 @@ err:
 		kernel_context_close(client[i].ctx);
 	}
 
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
 	return err;
@@ -1109,16 +1128,16 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists,
 static int
 live_gpu_reset_workarounds(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct i915_gem_context *ctx;
 	intel_wakeref_t wakeref;
 	struct wa_lists lists;
 	bool ok;
 
-	if (!intel_has_gpu_reset(i915))
+	if (!intel_has_gpu_reset(gt))
 		return 0;
 
-	ctx = kernel_context(i915);
+	ctx = kernel_context(gt->i915);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
@@ -1126,25 +1145,25 @@ live_gpu_reset_workarounds(void *arg)
 
 	pr_info("Verifying after GPU reset...\n");
 
-	igt_global_reset_lock(&i915->gt);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	igt_global_reset_lock(gt);
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
 
-	reference_lists_init(i915, &lists);
+	reference_lists_init(gt, &lists);
 
 	ok = verify_wa_lists(ctx, &lists, "before reset");
 	if (!ok)
 		goto out;
 
-	intel_gt_reset(&i915->gt, ALL_ENGINES, "live_workarounds");
+	intel_gt_reset(gt, ALL_ENGINES, "live_workarounds");
 
 	ok = verify_wa_lists(ctx, &lists, "after reset");
 
 out:
 	i915_gem_context_unlock_engines(ctx);
 	kernel_context_close(ctx);
-	reference_lists_fini(i915, &lists);
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	igt_global_reset_unlock(&i915->gt);
+	reference_lists_fini(gt, &lists);
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+	igt_global_reset_unlock(gt);
 
 	return ok ? 0 : -ESRCH;
 }
@@ -1152,7 +1171,7 @@ out:
 static int
 live_engine_reset_workarounds(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct i915_gem_engines_iter it;
 	struct i915_gem_context *ctx;
 	struct intel_context *ce;
@@ -1162,17 +1181,17 @@ live_engine_reset_workarounds(void *arg)
 	struct wa_lists lists;
 	int ret = 0;
 
-	if (!intel_has_reset_engine(i915))
+	if (!intel_has_reset_engine(gt))
 		return 0;
 
-	ctx = kernel_context(i915);
+	ctx = kernel_context(gt->i915);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	igt_global_reset_lock(&i915->gt);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	igt_global_reset_lock(gt);
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
 
-	reference_lists_init(i915, &lists);
+	reference_lists_init(gt, &lists);
 
 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 		struct intel_engine_cs *engine = ce->engine;
@@ -1205,12 +1224,10 @@ live_engine_reset_workarounds(void *arg)
 			goto err;
 		}
 
-		i915_request_add(rq);
-
-		if (!igt_wait_for_spinner(&spin, rq)) {
+		ret = request_add_spin(rq, &spin);
+		if (ret) {
 			pr_err("Spinner failed to start\n");
 			igt_spinner_fini(&spin);
-			ret = -ETIMEDOUT;
 			goto err;
 		}
 
@@ -1227,12 +1244,12 @@ live_engine_reset_workarounds(void *arg)
 	}
 err:
 	i915_gem_context_unlock_engines(ctx);
-	reference_lists_fini(i915, &lists);
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	igt_global_reset_unlock(&i915->gt);
+	reference_lists_fini(gt, &lists);
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+	igt_global_reset_unlock(gt);
 	kernel_context_close(ctx);
 
-	igt_flush_test(i915, I915_WAIT_LOCKED);
+	igt_flush_test(gt->i915);
 
 	return ret;
 }
@@ -1246,14 +1263,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_gpu_reset_workarounds),
 		SUBTEST(live_engine_reset_workarounds),
 	};
-	int err;
 
 	if (intel_gt_is_wedged(&i915->gt))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_subtests(tests, i915);
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return err;
+	return intel_gt_live_subtests(tests, &i915->gt);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
index 598170efcaf6..aeb1d1f616e8 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+	INIT_ACTIVE_FENCE(&timeline->last_request);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h
index 689efc66c908..d2bcc3df6183 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h
@@ -7,6 +7,8 @@
 #ifndef __MOCK_TIMELINE__
 #define __MOCK_TIMELINE__
 
+#include <linux/types.h>
+
 struct intel_timeline;
 
 void mock_timeline_init(struct intel_timeline *timeline, u64 context);
diff --git a/drivers/gpu/drm/i915/gt/uc/Makefile b/drivers/gpu/drm/i915/gt/uc/Makefile
deleted file mode 100644
index bec94d434cb6..000000000000
--- a/drivers/gpu/drm/i915/gt/uc/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/../..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 249c747e9756..5d00a3b2d914 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -4,23 +4,45 @@
  */
 
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_irq.h"
+#include "gt/intel_gt_pm_irq.h"
 #include "intel_guc.h"
 #include "intel_guc_ads.h"
 #include "intel_guc_submission.h"
 #include "i915_drv.h"
 
-static void gen8_guc_raise_irq(struct intel_guc *guc)
-{
-	struct intel_gt *gt = guc_to_gt(guc);
-
-	intel_uncore_write(gt->uncore, GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
-}
+/**
+ * DOC: GuC
+ *
+ * The GuC is a microcontroller inside the GT HW, introduced in gen9. The GuC is
+ * designed to offload some of the functionality usually performed by the host
+ * driver; currently the main operations it can take care of are:
+ *
+ * - Authentication of the HuC, which is required to fully enable HuC usage.
+ * - Low latency graphics context scheduling (a.k.a. GuC submission).
+ * - GT Power management.
+ *
+ * The enable_guc module parameter can be used to select which of those
+ * operations to enable within GuC. Note that not all the operations are
+ * supported on all gen9+ platforms.
+ *
+ * Enabling the GuC is not mandatory and therefore the firmware is only loaded
+ * if at least one of the operations is selected. However, not loading the GuC
+ * might result in the loss of some features that do require the GuC (currently
+ * just the HuC, but more are expected to land in the future).
+ */
 
-static void gen11_guc_raise_irq(struct intel_guc *guc)
+void intel_guc_notify(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
 
-	intel_uncore_write(gt->uncore, GEN11_GUC_HOST_INTERRUPT, 0);
+	/*
+	 * On Gen11+, the value written to the register is passes as a payload
+	 * to the FW. However, the FW currently treats all values the same way
+	 * (H2G interrupt), so we can just write the value that the HW expects
+	 * on older gens.
+	 */
+	intel_uncore_write(gt->uncore, guc->notify_reg, GUC_SEND_TRIGGER);
 }
 
 static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i)
@@ -56,6 +78,93 @@ void intel_guc_init_send_regs(struct intel_guc *guc)
 	guc->send_regs.fw_domains = fw_domains;
 }
 
+static void gen9_reset_guc_interrupts(struct intel_guc *guc)
+{
+	struct intel_gt *gt = guc_to_gt(guc);
+
+	assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+	spin_lock_irq(&gt->irq_lock);
+	gen6_gt_pm_reset_iir(gt, gt->pm_guc_events);
+	spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen9_enable_guc_interrupts(struct intel_guc *guc)
+{
+	struct intel_gt *gt = guc_to_gt(guc);
+
+	assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+	spin_lock_irq(&gt->irq_lock);
+	if (!guc->interrupts.enabled) {
+		WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) &
+			     gt->pm_guc_events);
+		guc->interrupts.enabled = true;
+		gen6_gt_pm_enable_irq(gt, gt->pm_guc_events);
+	}
+	spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen9_disable_guc_interrupts(struct intel_guc *guc)
+{
+	struct intel_gt *gt = guc_to_gt(guc);
+
+	assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+	spin_lock_irq(&gt->irq_lock);
+	guc->interrupts.enabled = false;
+
+	gen6_gt_pm_disable_irq(gt, gt->pm_guc_events);
+
+	spin_unlock_irq(&gt->irq_lock);
+	intel_synchronize_irq(gt->i915);
+
+	gen9_reset_guc_interrupts(guc);
+}
+
+static void gen11_reset_guc_interrupts(struct intel_guc *guc)
+{
+	struct intel_gt *gt = guc_to_gt(guc);
+
+	spin_lock_irq(&gt->irq_lock);
+	gen11_gt_reset_one_iir(gt, 0, GEN11_GUC);
+	spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen11_enable_guc_interrupts(struct intel_guc *guc)
+{
+	struct intel_gt *gt = guc_to_gt(guc);
+
+	spin_lock_irq(&gt->irq_lock);
+	if (!guc->interrupts.enabled) {
+		u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
+
+		WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC));
+		intel_uncore_write(gt->uncore,
+				   GEN11_GUC_SG_INTR_ENABLE, events);
+		intel_uncore_write(gt->uncore,
+				   GEN11_GUC_SG_INTR_MASK, ~events);
+		guc->interrupts.enabled = true;
+	}
+	spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen11_disable_guc_interrupts(struct intel_guc *guc)
+{
+	struct intel_gt *gt = guc_to_gt(guc);
+
+	spin_lock_irq(&gt->irq_lock);
+	guc->interrupts.enabled = false;
+
+	intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0);
+	intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
+
+	spin_unlock_irq(&gt->irq_lock);
+	intel_synchronize_irq(gt->i915);
+
+	gen11_reset_guc_interrupts(guc);
+}
+
 void intel_guc_init_early(struct intel_guc *guc)
 {
 	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
@@ -67,47 +176,19 @@ void intel_guc_init_early(struct intel_guc *guc)
 
 	mutex_init(&guc->send_mutex);
 	spin_lock_init(&guc->irq_lock);
-	guc->send = intel_guc_send_nop;
-	guc->handler = intel_guc_to_host_event_handler_nop;
 	if (INTEL_GEN(i915) >= 11) {
-		guc->notify = gen11_guc_raise_irq;
+		guc->notify_reg = GEN11_GUC_HOST_INTERRUPT;
 		guc->interrupts.reset = gen11_reset_guc_interrupts;
 		guc->interrupts.enable = gen11_enable_guc_interrupts;
 		guc->interrupts.disable = gen11_disable_guc_interrupts;
 	} else {
-		guc->notify = gen8_guc_raise_irq;
+		guc->notify_reg = GUC_SEND_INTERRUPT;
 		guc->interrupts.reset = gen9_reset_guc_interrupts;
 		guc->interrupts.enable = gen9_enable_guc_interrupts;
 		guc->interrupts.disable = gen9_disable_guc_interrupts;
 	}
 }
 
-static int guc_shared_data_create(struct intel_guc *guc)
-{
-	struct i915_vma *vma;
-	void *vaddr;
-
-	vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
-
-	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
-	if (IS_ERR(vaddr)) {
-		i915_vma_unpin_and_release(&vma, 0);
-		return PTR_ERR(vaddr);
-	}
-
-	guc->shared_data = vma;
-	guc->shared_data_vaddr = vaddr;
-
-	return 0;
-}
-
-static void guc_shared_data_destroy(struct intel_guc *guc)
-{
-	i915_vma_unpin_and_release(&guc->shared_data, I915_VMA_RELEASE_MAP);
-}
-
 static u32 guc_ctl_debug_flags(struct intel_guc *guc)
 {
 	u32 level = intel_guc_log_get_level(&guc->log);
@@ -254,14 +335,9 @@ int intel_guc_init(struct intel_guc *guc)
 	if (ret)
 		goto err_fetch;
 
-	ret = guc_shared_data_create(guc);
-	if (ret)
-		goto err_fw;
-	GEM_BUG_ON(!guc->shared_data);
-
 	ret = intel_guc_log_create(&guc->log);
 	if (ret)
-		goto err_shared;
+		goto err_fw;
 
 	ret = intel_guc_ads_create(guc);
 	if (ret)
@@ -296,8 +372,6 @@ err_ads:
 	intel_guc_ads_destroy(guc);
 err_log:
 	intel_guc_log_destroy(&guc->log);
-err_shared:
-	guc_shared_data_destroy(guc);
 err_fw:
 	intel_uc_fw_fini(&guc->fw);
 err_fetch:
@@ -322,21 +396,10 @@ void intel_guc_fini(struct intel_guc *guc)
 
 	intel_guc_ads_destroy(guc);
 	intel_guc_log_destroy(&guc->log);
-	guc_shared_data_destroy(guc);
 	intel_uc_fw_fini(&guc->fw);
 	intel_uc_fw_cleanup_fetch(&guc->fw);
-}
-
-int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len,
-		       u32 *response_buf, u32 response_buf_size)
-{
-	WARN(1, "Unexpected send: action=%#x\n", *action);
-	return -ENODEV;
-}
 
-void intel_guc_to_host_event_handler_nop(struct intel_guc *guc)
-{
-	WARN(1, "Unexpected event: no suitable handler\n");
+	intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED);
 }
 
 /*
@@ -478,6 +541,13 @@ int intel_guc_suspend(struct intel_guc *guc)
 	};
 
 	/*
+	 * If GuC communication is enabled but submission is not supported,
+	 * we do not need to suspend the GuC.
+	 */
+	if (!intel_guc_submission_is_enabled(guc))
+		return 0;
+
+	/*
 	 * The ENTER_S_STATE action queues the save/restore operation in GuC FW
 	 * and then returns, so waiting on the H2G is not enough to guarantee
 	 * GuC is done. When all the processing is done, GuC writes
@@ -518,19 +588,9 @@ int intel_guc_suspend(struct intel_guc *guc)
 int intel_guc_reset_engine(struct intel_guc *guc,
 			   struct intel_engine_cs *engine)
 {
-	u32 data[7];
+	/* XXX: to be implemented with submission interface rework */
 
-	GEM_BUG_ON(!guc->execbuf_client);
-
-	data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET;
-	data[1] = engine->guc_id;
-	data[2] = 0;
-	data[3] = 0;
-	data[4] = 0;
-	data[5] = guc->execbuf_client->stage_id;
-	data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
-
-	return intel_guc_send(guc, data, ARRAY_SIZE(data));
+	return -ENODEV;
 }
 
 /**
@@ -544,13 +604,27 @@ int intel_guc_resume(struct intel_guc *guc)
 		GUC_POWER_D0,
 	};
 
+	/*
+	 * If GuC communication is enabled but submission is not supported,
+	 * we do not need to resume the GuC but we do need to enable the
+	 * GuC communication on resume (above).
+	 */
+	if (!intel_guc_submission_is_enabled(guc))
+		return 0;
+
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
 /**
- * DOC: GuC Address Space
+ * DOC: GuC Memory Management
  *
- * The layout of GuC address space is shown below:
+ * GuC can't allocate any memory for its own usage, so all the allocations must
+ * be handled by the host driver. GuC accesses the memory via the GGTT, with the
+ * exception of the top and bottom parts of the 4GB address space, which are
+ * instead re-mapped by the GuC HW to memory location of the FW itself (WOPCM)
+ * or other parts of the HW. The driver must take care not to place objects that
+ * the GuC is going to access in these reserved ranges. The layout of the GuC
+ * address space is shown below:
  *
  * ::
  *
@@ -617,3 +691,37 @@ err:
 	i915_gem_object_put(obj);
 	return vma;
 }
+
+/**
+ * intel_guc_allocate_and_map_vma() - Allocate and map VMA for GuC usage
+ * @guc:	the guc
+ * @size:	size of area to allocate (both virtual space and memory)
+ * @out_vma:	return variable for the allocated vma pointer
+ * @out_vaddr:	return variable for the obj mapping
+ *
+ * This wrapper calls intel_guc_allocate_vma() and then maps the allocated
+ * object with I915_MAP_WB.
+ *
+ * Return:	0 if successful, a negative errno code otherwise.
+ */
+int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
+				   struct i915_vma **out_vma, void **out_vaddr)
+{
+	struct i915_vma *vma;
+	void *vaddr;
+
+	vma = intel_guc_allocate_vma(guc, size);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		i915_vma_unpin_and_release(&vma, 0);
+		return PTR_ERR(vaddr);
+	}
+
+	*out_vma = vma;
+	*out_vaddr = vaddr;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2b2f046d3cc3..910d49590068 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -20,8 +20,8 @@ struct __guc_ads_blob;
 
 /*
  * Top level structure of GuC. It handles firmware loading and manages client
- * pool and doorbells. intel_guc owns a intel_guc_client to replace the legacy
- * ExecList submission.
+ * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList
+ * submission.
  */
 struct intel_guc {
 	struct intel_uc_fw fw;
@@ -46,15 +46,13 @@ struct intel_guc {
 
 	struct i915_vma *stage_desc_pool;
 	void *stage_desc_pool_vaddr;
-	struct ida stage_ids;
-	struct i915_vma *shared_data;
-	void *shared_data_vaddr;
 
-	struct intel_guc_client *execbuf_client;
+	struct i915_vma *workqueue;
+	void *workqueue_vaddr;
+	spinlock_t wq_lock;
 
-	DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
-	/* Cyclic counter mod pagesize	*/
-	u32 db_cacheline;
+	struct i915_vma *proc_desc;
+	void *proc_desc_vaddr;
 
 	/* Control params for fw initialization */
 	u32 params[GUC_CTL_MAX_DWORDS];
@@ -66,44 +64,33 @@ struct intel_guc {
 		enum forcewake_domains fw_domains;
 	} send_regs;
 
+	/* register used to send interrupts to the GuC FW */
+	i915_reg_t notify_reg;
+
 	/* Store msg (e.g. log flush) that we see while CTBs are disabled */
 	u32 mmio_msg;
 
 	/* To serialize the intel_guc_send actions */
 	struct mutex send_mutex;
-
-	/* GuC's FW specific send function */
-	int (*send)(struct intel_guc *guc, const u32 *data, u32 len,
-		    u32 *response_buf, u32 response_buf_size);
-
-	/* GuC's FW specific event handler function */
-	void (*handler)(struct intel_guc *guc);
-
-	/* GuC's FW specific notify function */
-	void (*notify)(struct intel_guc *guc);
 };
 
 static
 inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
 {
-	return guc->send(guc, action, len, NULL, 0);
+	return intel_guc_ct_send(&guc->ct, action, len, NULL, 0);
 }
 
 static inline int
 intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len,
 			   u32 *response_buf, u32 response_buf_size)
 {
-	return guc->send(guc, action, len, response_buf, response_buf_size);
-}
-
-static inline void intel_guc_notify(struct intel_guc *guc)
-{
-	guc->notify(guc);
+	return intel_guc_ct_send(&guc->ct, action, len,
+				 response_buf, response_buf_size);
 }
 
 static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
 {
-	guc->handler(guc);
+	intel_guc_ct_event_handler(&guc->ct);
 }
 
 /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
@@ -138,12 +125,9 @@ void intel_guc_init_send_regs(struct intel_guc *guc);
 void intel_guc_write_params(struct intel_guc *guc);
 int intel_guc_init(struct intel_guc *guc);
 void intel_guc_fini(struct intel_guc *guc);
-int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len,
-		       u32 *response_buf, u32 response_buf_size);
+void intel_guc_notify(struct intel_guc *guc);
 int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len,
 			u32 *response_buf, u32 response_buf_size);
-void intel_guc_to_host_event_handler(struct intel_guc *guc);
-void intel_guc_to_host_event_handler_nop(struct intel_guc *guc);
 int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
 				       const u32 *payload, u32 len);
 int intel_guc_sample_forcewake(struct intel_guc *guc);
@@ -151,6 +135,8 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset);
 int intel_guc_suspend(struct intel_guc *guc);
 int intel_guc_resume(struct intel_guc *guc);
 struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
+int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
+				   struct i915_vma **out_vma, void **out_vaddr);
 
 static inline bool intel_guc_is_supported(struct intel_guc *guc)
 {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index ca6674b8e00c..101728006ae9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -93,7 +93,8 @@ static void __guc_ads_init(struct intel_guc *guc)
 		 */
 		blob->ads.golden_context_lrca[engine_class] = 0;
 		blob->ads.eng_state_size[engine_class] =
-			intel_engine_context_size(dev_priv, engine_class) -
+			intel_engine_context_size(guc_to_gt(guc),
+						  engine_class) -
 			skipped_size;
 	}
 
@@ -135,32 +136,19 @@ static void __guc_ads_init(struct intel_guc *guc)
 int intel_guc_ads_create(struct intel_guc *guc)
 {
 	const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob));
-	struct i915_vma *vma;
-	void *blob;
 	int ret;
 
 	GEM_BUG_ON(guc->ads_vma);
 
-	vma = intel_guc_allocate_vma(guc, size);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+	ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma,
+					     (void **)&guc->ads_blob);
 
-	blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
-	if (IS_ERR(blob)) {
-		ret = PTR_ERR(blob);
-		goto err_vma;
-	}
-
-	guc->ads_vma = vma;
-	guc->ads_blob = blob;
+	if (ret)
+		return ret;
 
 	__guc_ads_init(guc);
 
 	return 0;
-
-err_vma:
-	i915_vma_unpin_and_release(&guc->ads_vma, 0);
-	return ret;
 }
 
 void intel_guc_ads_destroy(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index b49115517510..c6f971a049f9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -37,13 +37,10 @@ static void ct_incoming_request_worker_func(struct work_struct *w);
  */
 void intel_guc_ct_init_early(struct intel_guc_ct *ct)
 {
-	/* we're using static channel owners */
-	ct->host_channel.owner = CTB_OWNER_HOST;
-
-	spin_lock_init(&ct->lock);
-	INIT_LIST_HEAD(&ct->pending_requests);
-	INIT_LIST_HEAD(&ct->incoming_requests);
-	INIT_WORK(&ct->worker, ct_incoming_request_worker_func);
+	spin_lock_init(&ct->requests.lock);
+	INIT_LIST_HEAD(&ct->requests.pending);
+	INIT_LIST_HEAD(&ct->requests.incoming);
+	INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func);
 }
 
 static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
@@ -64,14 +61,13 @@ static inline const char *guc_ct_buffer_type_to_str(u32 type)
 }
 
 static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
-				    u32 cmds_addr, u32 size, u32 owner)
+				    u32 cmds_addr, u32 size)
 {
-	CT_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
-			desc, cmds_addr, size, owner);
+	CT_DEBUG_DRIVER("CT: init addr=%#x size=%u\n", cmds_addr, size);
 	memset(desc, 0, sizeof(*desc));
 	desc->addr = cmds_addr;
 	desc->size = size;
-	desc->owner = owner;
+	desc->owner = CTB_OWNER_HOST;
 }
 
 static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
@@ -104,12 +100,11 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc,
 }
 
 static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
-					   u32 owner,
 					   u32 type)
 {
 	u32 action[] = {
 		INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
-		owner,
+		CTB_OWNER_HOST,
 		type
 	};
 	int err;
@@ -117,20 +112,27 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
 	/* Can't use generic send(), CT deregistration must go over MMIO */
 	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
 	if (err)
-		DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n",
-			  guc_ct_buffer_type_to_str(type), owner, err);
+		DRM_ERROR("CT: deregister %s buffer failed; err=%d\n",
+			  guc_ct_buffer_type_to_str(type), err);
 	return err;
 }
 
-static int ctch_init(struct intel_guc *guc,
-		     struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_init - Init buffer-based communication
+ * @ct: pointer to CT struct
+ *
+ * Allocate memory required for buffer-based communication.
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+int intel_guc_ct_init(struct intel_guc_ct *ct)
 {
-	struct i915_vma *vma;
+	struct intel_guc *guc = ct_to_guc(ct);
 	void *blob;
 	int err;
 	int i;
 
-	GEM_BUG_ON(ctch->vma);
+	GEM_BUG_ON(ct->vma);
 
 	/* We allocate 1 page to hold both descriptors and both buffers.
 	 *       ___________.....................
@@ -154,71 +156,65 @@ static int ctch_init(struct intel_guc *guc,
 	 * other code will need updating as well.
 	 */
 
-	/* allocate vma */
-	vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
-	if (IS_ERR(vma)) {
-		err = PTR_ERR(vma);
-		goto err_out;
+	err = intel_guc_allocate_and_map_vma(guc, PAGE_SIZE, &ct->vma, &blob);
+	if (err) {
+		DRM_ERROR("CT: channel allocation failed; err=%d\n", err);
+		return err;
 	}
-	ctch->vma = vma;
 
-	/* map first page */
-	blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
-	if (IS_ERR(blob)) {
-		err = PTR_ERR(blob);
-		goto err_vma;
-	}
 	CT_DEBUG_DRIVER("CT: vma base=%#x\n",
-			intel_guc_ggtt_offset(guc, ctch->vma));
+			intel_guc_ggtt_offset(guc, ct->vma));
 
 	/* store pointers to desc and cmds */
-	for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
-		GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
-		ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
-		ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
+	for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
+		GEM_BUG_ON((i !=  CTB_SEND) && (i != CTB_RECV));
+		ct->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
+		ct->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
 	}
 
 	return 0;
-
-err_vma:
-	i915_vma_unpin_and_release(&ctch->vma, 0);
-err_out:
-	CT_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n",
-			ctch->owner, err);
-	return err;
 }
 
-static void ctch_fini(struct intel_guc *guc,
-		      struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_fini - Fini buffer-based communication
+ * @ct: pointer to CT struct
+ *
+ * Deallocate memory required for buffer-based communication.
+ */
+void intel_guc_ct_fini(struct intel_guc_ct *ct)
 {
-	GEM_BUG_ON(ctch->enabled);
+	GEM_BUG_ON(ct->enabled);
 
-	i915_vma_unpin_and_release(&ctch->vma, I915_VMA_RELEASE_MAP);
+	i915_vma_unpin_and_release(&ct->vma, I915_VMA_RELEASE_MAP);
 }
 
-static int ctch_enable(struct intel_guc *guc,
-		       struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_enable - Enable buffer based command transport.
+ * @ct: pointer to CT struct
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+int intel_guc_ct_enable(struct intel_guc_ct *ct)
 {
+	struct intel_guc *guc = ct_to_guc(ct);
 	u32 base;
 	int err;
 	int i;
 
-	GEM_BUG_ON(!ctch->vma);
-
-	GEM_BUG_ON(ctch->enabled);
+	GEM_BUG_ON(ct->enabled);
 
 	/* vma should be already allocated and map'ed */
-	base = intel_guc_ggtt_offset(guc, ctch->vma);
+	GEM_BUG_ON(!ct->vma);
+	base = intel_guc_ggtt_offset(guc, ct->vma);
 
 	/* (re)initialize descriptors
 	 * cmds buffers are in the second half of the blob page
 	 */
-	for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
+	for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
 		GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
-		guc_ct_buffer_desc_init(ctch->ctbs[i].desc,
+		guc_ct_buffer_desc_init(ct->ctbs[i].desc,
 					base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
-					PAGE_SIZE/4,
-					ctch->owner);
+					PAGE_SIZE/4);
 	}
 
 	/* register buffers, starting wirh RECV buffer
@@ -236,38 +232,42 @@ static int ctch_enable(struct intel_guc *guc,
 	if (unlikely(err))
 		goto err_deregister;
 
-	ctch->enabled = true;
+	ct->enabled = true;
 
 	return 0;
 
 err_deregister:
 	guc_action_deregister_ct_buffer(guc,
-					ctch->owner,
 					INTEL_GUC_CT_BUFFER_TYPE_RECV);
 err_out:
-	DRM_ERROR("CT: can't open channel %d; err=%d\n", ctch->owner, err);
+	DRM_ERROR("CT: can't open channel; err=%d\n", err);
 	return err;
 }
 
-static void ctch_disable(struct intel_guc *guc,
-			 struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_disable - Disable buffer based command transport.
+ * @ct: pointer to CT struct
+ */
+void intel_guc_ct_disable(struct intel_guc_ct *ct)
 {
-	GEM_BUG_ON(!ctch->enabled);
+	struct intel_guc *guc = ct_to_guc(ct);
 
-	ctch->enabled = false;
+	GEM_BUG_ON(!ct->enabled);
 
-	guc_action_deregister_ct_buffer(guc,
-					ctch->owner,
-					INTEL_GUC_CT_BUFFER_TYPE_SEND);
-	guc_action_deregister_ct_buffer(guc,
-					ctch->owner,
-					INTEL_GUC_CT_BUFFER_TYPE_RECV);
+	ct->enabled = false;
+
+	if (intel_guc_is_running(guc)) {
+		guc_action_deregister_ct_buffer(guc,
+						INTEL_GUC_CT_BUFFER_TYPE_SEND);
+		guc_action_deregister_ct_buffer(guc,
+						INTEL_GUC_CT_BUFFER_TYPE_RECV);
+	}
 }
 
-static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
+static u32 ct_get_next_fence(struct intel_guc_ct *ct)
 {
 	/* For now it's trivial */
-	return ++ctch->next_fence;
+	return ++ct->requests.next_fence;
 }
 
 /**
@@ -440,35 +440,34 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
 	return err;
 }
 
-static int ctch_send(struct intel_guc_ct *ct,
-		     struct intel_guc_ct_channel *ctch,
-		     const u32 *action,
-		     u32 len,
-		     u32 *response_buf,
-		     u32 response_buf_size,
-		     u32 *status)
+static int ct_send(struct intel_guc_ct *ct,
+		   const u32 *action,
+		   u32 len,
+		   u32 *response_buf,
+		   u32 response_buf_size,
+		   u32 *status)
 {
-	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
+	struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND];
 	struct guc_ct_buffer_desc *desc = ctb->desc;
 	struct ct_request request;
 	unsigned long flags;
 	u32 fence;
 	int err;
 
-	GEM_BUG_ON(!ctch->enabled);
+	GEM_BUG_ON(!ct->enabled);
 	GEM_BUG_ON(!len);
 	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
 	GEM_BUG_ON(!response_buf && response_buf_size);
 
-	fence = ctch_get_next_fence(ctch);
+	fence = ct_get_next_fence(ct);
 	request.fence = fence;
 	request.status = 0;
 	request.response_len = response_buf_size;
 	request.response_buf = response_buf;
 
-	spin_lock_irqsave(&ct->lock, flags);
-	list_add_tail(&request.link, &ct->pending_requests);
-	spin_unlock_irqrestore(&ct->lock, flags);
+	spin_lock_irqsave(&ct->requests.lock, flags);
+	list_add_tail(&request.link, &ct->requests.pending);
+	spin_unlock_irqrestore(&ct->requests.lock, flags);
 
 	err = ctb_write(ctb, action, len, fence, !!response_buf);
 	if (unlikely(err))
@@ -501,9 +500,9 @@ static int ctch_send(struct intel_guc_ct *ct,
 	}
 
 unlink:
-	spin_lock_irqsave(&ct->lock, flags);
+	spin_lock_irqsave(&ct->requests.lock, flags);
 	list_del(&request.link);
-	spin_unlock_irqrestore(&ct->lock, flags);
+	spin_unlock_irqrestore(&ct->requests.lock, flags);
 
 	return err;
 }
@@ -511,18 +510,21 @@ unlink:
 /*
  * Command Transport (CT) buffer based GuC send function.
  */
-int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len,
+int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
 		      u32 *response_buf, u32 response_buf_size)
 {
-	struct intel_guc_ct *ct = &guc->ct;
-	struct intel_guc_ct_channel *ctch = &ct->host_channel;
+	struct intel_guc *guc = ct_to_guc(ct);
 	u32 status = ~0; /* undefined */
 	int ret;
 
+	if (unlikely(!ct->enabled)) {
+		WARN(1, "Unexpected send: action=%#x\n", *action);
+		return -ENODEV;
+	}
+
 	mutex_lock(&guc->send_mutex);
 
-	ret = ctch_send(ct, ctch, action, len, response_buf, response_buf_size,
-			&status);
+	ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
 	if (unlikely(ret < 0)) {
 		DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
 			  action[0], ret, status);
@@ -653,8 +655,8 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
 
 	CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status);
 
-	spin_lock(&ct->lock);
-	list_for_each_entry(req, &ct->pending_requests, link) {
+	spin_lock(&ct->requests.lock);
+	list_for_each_entry(req, &ct->requests.pending, link) {
 		if (unlikely(fence != req->fence)) {
 			CT_DEBUG_DRIVER("CT: request %u awaits response\n",
 					req->fence);
@@ -672,7 +674,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
 		found = true;
 		break;
 	}
-	spin_unlock(&ct->lock);
+	spin_unlock(&ct->requests.lock);
 
 	if (!found)
 		DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg);
@@ -710,13 +712,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
 	u32 *payload;
 	bool done;
 
-	spin_lock_irqsave(&ct->lock, flags);
-	request = list_first_entry_or_null(&ct->incoming_requests,
+	spin_lock_irqsave(&ct->requests.lock, flags);
+	request = list_first_entry_or_null(&ct->requests.incoming,
 					   struct ct_incoming_request, link);
 	if (request)
 		list_del(&request->link);
-	done = !!list_empty(&ct->incoming_requests);
-	spin_unlock_irqrestore(&ct->lock, flags);
+	done = !!list_empty(&ct->requests.incoming);
+	spin_unlock_irqrestore(&ct->requests.lock, flags);
 
 	if (!request)
 		return true;
@@ -734,12 +736,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
 
 static void ct_incoming_request_worker_func(struct work_struct *w)
 {
-	struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, worker);
+	struct intel_guc_ct *ct =
+		container_of(w, struct intel_guc_ct, requests.worker);
 	bool done;
 
 	done = ct_process_incoming_requests(ct);
 	if (!done)
-		queue_work(system_unbound_wq, &ct->worker);
+		queue_work(system_unbound_wq, &ct->requests.worker);
 }
 
 /**
@@ -777,23 +780,28 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg)
 	}
 	memcpy(request->msg, msg, 4 * msglen);
 
-	spin_lock_irqsave(&ct->lock, flags);
-	list_add_tail(&request->link, &ct->incoming_requests);
-	spin_unlock_irqrestore(&ct->lock, flags);
+	spin_lock_irqsave(&ct->requests.lock, flags);
+	list_add_tail(&request->link, &ct->requests.incoming);
+	spin_unlock_irqrestore(&ct->requests.lock, flags);
 
-	queue_work(system_unbound_wq, &ct->worker);
+	queue_work(system_unbound_wq, &ct->requests.worker);
 	return 0;
 }
 
-static void ct_process_host_channel(struct intel_guc_ct *ct)
+/*
+ * When we're communicating with the GuC over CT, GuC uses events
+ * to notify us about new messages being posted on the RECV buffer.
+ */
+void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
 {
-	struct intel_guc_ct_channel *ctch = &ct->host_channel;
-	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_RECV];
+	struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV];
 	u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */
 	int err = 0;
 
-	if (!ctch->enabled)
+	if (unlikely(!ct->enabled)) {
+		WARN(1, "Unexpected GuC event received while CT disabled!\n");
 		return;
+	}
 
 	do {
 		err = ctb_read(ctb, msg);
@@ -812,86 +820,3 @@ static void ct_process_host_channel(struct intel_guc_ct *ct)
 	}
 }
 
-/*
- * When we're communicating with the GuC over CT, GuC uses events
- * to notify us about new messages being posted on the RECV buffer.
- */
-void intel_guc_to_host_event_handler_ct(struct intel_guc *guc)
-{
-	struct intel_guc_ct *ct = &guc->ct;
-
-	ct_process_host_channel(ct);
-}
-
-/**
- * intel_guc_ct_init - Init CT communication
- * @ct: pointer to CT struct
- *
- * Allocate memory required for communication via
- * the CT channel.
- *
- * Return: 0 on success, a negative errno code on failure.
- */
-int intel_guc_ct_init(struct intel_guc_ct *ct)
-{
-	struct intel_guc *guc = ct_to_guc(ct);
-	struct intel_guc_ct_channel *ctch = &ct->host_channel;
-	int err;
-
-	err = ctch_init(guc, ctch);
-	if (unlikely(err)) {
-		DRM_ERROR("CT: can't open channel %d; err=%d\n",
-			  ctch->owner, err);
-		return err;
-	}
-
-	GEM_BUG_ON(!ctch->vma);
-	return 0;
-}
-
-/**
- * intel_guc_ct_fini - Fini CT communication
- * @ct: pointer to CT struct
- *
- * Deallocate memory required for communication via
- * the CT channel.
- */
-void intel_guc_ct_fini(struct intel_guc_ct *ct)
-{
-	struct intel_guc *guc = ct_to_guc(ct);
-	struct intel_guc_ct_channel *ctch = &ct->host_channel;
-
-	ctch_fini(guc, ctch);
-}
-
-/**
- * intel_guc_ct_enable - Enable buffer based command transport.
- * @ct: pointer to CT struct
- *
- * Return: 0 on success, a negative errno code on failure.
- */
-int intel_guc_ct_enable(struct intel_guc_ct *ct)
-{
-	struct intel_guc *guc = ct_to_guc(ct);
-	struct intel_guc_ct_channel *ctch = &ct->host_channel;
-
-	if (ctch->enabled)
-		return 0;
-
-	return ctch_enable(guc, ctch);
-}
-
-/**
- * intel_guc_ct_disable - Disable buffer based command transport.
- * @ct: pointer to CT struct
- */
-void intel_guc_ct_disable(struct intel_guc_ct *ct)
-{
-	struct intel_guc *guc = ct_to_guc(ct);
-	struct intel_guc_ct_channel *ctch = &ct->host_channel;
-
-	if (!ctch->enabled)
-		return;
-
-	ctch_disable(guc, ctch);
-}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index 7c24d83f5c24..3e7fe237cfa5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -35,44 +35,28 @@ struct intel_guc_ct_buffer {
 	u32 *cmds;
 };
 
-/** Represents pair of command transport buffers.
- *
- * Buffers go in pairs to allow bi-directional communication.
- * To simplify the code we place both of them in the same vma.
- * Buffers from the same pair must share unique owner id.
- *
- * @vma: pointer to the vma with pair of CT buffers
- * @ctbs: buffers for sending(0) and receiving(1) commands
- * @owner: unique identifier
- * @next_fence: fence to be used with next send command
- */
-struct intel_guc_ct_channel {
-	struct i915_vma *vma;
-	struct intel_guc_ct_buffer ctbs[2];
-	u32 owner;
-	u32 next_fence;
-	bool enabled;
-};
 
-/** Holds all command transport channels.
+/** Top-level structure for Command Transport related data
  *
- * @host_channel: main channel used by the host
+ * Includes a pair of CT buffers for bi-directional communication and tracking
+ * for the H2G and G2H requests sent and received through the buffers.
  */
 struct intel_guc_ct {
-	struct intel_guc_ct_channel host_channel;
-	/* other channels are tbd */
+	struct i915_vma *vma;
+	bool enabled;
 
-	/** @lock: protects pending requests list */
-	spinlock_t lock;
+	/* buffers for sending(0) and receiving(1) commands */
+	struct intel_guc_ct_buffer ctbs[2];
 
-	/** @pending_requests: list of requests waiting for response */
-	struct list_head pending_requests;
+	struct {
+		u32 next_fence; /* fence to be used with next request to send */
 
-	/** @incoming_requests: list of incoming requests */
-	struct list_head incoming_requests;
+		spinlock_t lock; /* protects pending requests list */
+		struct list_head pending; /* requests waiting for response */
 
-	/** @worker: worker for handling incoming requests */
-	struct work_struct worker;
+		struct list_head incoming; /* incoming requests */
+		struct work_struct worker; /* handler for incoming requests */
+	} requests;
 };
 
 void intel_guc_ct_init_early(struct intel_guc_ct *ct);
@@ -81,13 +65,13 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct);
 int intel_guc_ct_enable(struct intel_guc_ct *ct);
 void intel_guc_ct_disable(struct intel_guc_ct *ct);
 
-static inline void intel_guc_ct_stop(struct intel_guc_ct *ct)
+static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct)
 {
-	ct->host_channel.enabled = false;
+	return ct->enabled;
 }
 
-int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len,
+int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
 		      u32 *response_buf, u32 response_buf_size);
-void intel_guc_to_host_event_handler_ct(struct intel_guc *guc);
+void intel_guc_ct_event_handler(struct intel_guc_ct *ct);
 
 #endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 5528224448f6..3a1c47d600ea 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -149,7 +149,7 @@ int intel_guc_fw_upload(struct intel_guc *guc)
 	 * Current uCode expects the code to be loaded at 8k; locations below
 	 * this are used for the stack.
 	 */
-	ret = intel_uc_fw_upload(&guc->fw, gt, 0x2000, UOS_MOVE);
+	ret = intel_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE);
 	if (ret)
 		goto out;
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 1d3cdd67ca2f..a6b733c146c9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -31,7 +31,6 @@
 
 #define GUC_DOORBELL_INVALID		256
 
-#define GUC_DB_SIZE			(PAGE_SIZE)
 #define GUC_WQ_SIZE			(PAGE_SIZE * 2)
 
 /* Work queue item header definitions */
@@ -548,6 +547,7 @@ enum intel_guc_action {
 	INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
 	INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
 	INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30,
+	INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40,
 	INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302,
 	INTEL_GUC_ACTION_ENTER_S_STATE = 0x501,
 	INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
@@ -556,7 +556,6 @@ enum intel_guc_action {
 	INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
 	INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
 	INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
-	INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000,
 	INTEL_GUC_ACTION_LIMIT
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 36332064de9c..caed0d57e704 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -226,7 +226,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
 
 	mutex_lock(&log->relay.lock);
 
-	if (WARN_ON(!intel_guc_log_relay_enabled(log)))
+	if (WARN_ON(!intel_guc_log_relay_created(log)))
 		goto out_unlock;
 
 	/* Get the pointer to shared GuC log buffer */
@@ -361,6 +361,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log)
 {
 	mutex_init(&log->relay.lock);
 	INIT_WORK(&log->relay.flush_work, capture_logs_work);
+	log->relay.started = false;
 }
 
 static int guc_log_relay_create(struct intel_guc_log *log)
@@ -546,7 +547,7 @@ out_unlock:
 	return ret;
 }
 
-bool intel_guc_log_relay_enabled(const struct intel_guc_log *log)
+bool intel_guc_log_relay_created(const struct intel_guc_log *log)
 {
 	return log->relay.buf_addr;
 }
@@ -560,7 +561,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
 
 	mutex_lock(&log->relay.lock);
 
-	if (intel_guc_log_relay_enabled(log)) {
+	if (intel_guc_log_relay_created(log)) {
 		ret = -EEXIST;
 		goto out_unlock;
 	}
@@ -585,6 +586,21 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
 
 	mutex_unlock(&log->relay.lock);
 
+	return 0;
+
+out_relay:
+	guc_log_relay_destroy(log);
+out_unlock:
+	mutex_unlock(&log->relay.lock);
+
+	return ret;
+}
+
+int intel_guc_log_relay_start(struct intel_guc_log *log)
+{
+	if (log->relay.started)
+		return -EEXIST;
+
 	guc_log_enable_flush_events(log);
 
 	/*
@@ -594,47 +610,59 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
 	 */
 	queue_work(system_highpri_wq, &log->relay.flush_work);
 
-	return 0;
+	log->relay.started = true;
 
-out_relay:
-	guc_log_relay_destroy(log);
-out_unlock:
-	mutex_unlock(&log->relay.lock);
-
-	return ret;
+	return 0;
 }
 
 void intel_guc_log_relay_flush(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
 	intel_wakeref_t wakeref;
 
+	if (!log->relay.started)
+		return;
+
 	/*
 	 * Before initiating the forceful flush, wait for any pending/ongoing
 	 * flush to complete otherwise forceful flush may not actually happen.
 	 */
 	flush_work(&log->relay.flush_work);
 
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+	with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref)
 		guc_action_flush_log(guc);
 
 	/* GuC would have updated log buffer by now, so capture it */
 	guc_log_capture_logs(log);
 }
 
-void intel_guc_log_relay_close(struct intel_guc_log *log)
+/*
+ * Stops the relay log. Called from intel_guc_log_relay_close(), so no
+ * possibility of race with start/flush since relay_write cannot race
+ * relay_close.
+ */
+static void guc_log_relay_stop(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
 
+	if (!log->relay.started)
+		return;
+
 	guc_log_disable_flush_events(log);
 	intel_synchronize_irq(i915);
 
 	flush_work(&log->relay.flush_work);
 
+	log->relay.started = false;
+}
+
+void intel_guc_log_relay_close(struct intel_guc_log *log)
+{
+	guc_log_relay_stop(log);
+
 	mutex_lock(&log->relay.lock);
-	GEM_BUG_ON(!intel_guc_log_relay_enabled(log));
+	GEM_BUG_ON(!intel_guc_log_relay_created(log));
 	guc_log_unmap(log);
 	guc_log_relay_destroy(log);
 	mutex_unlock(&log->relay.lock);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index 6f764879acb1..c252c022c5fc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -47,6 +47,7 @@ struct intel_guc_log {
 	struct i915_vma *vma;
 	struct {
 		void *buf_addr;
+		bool started;
 		struct work_struct flush_work;
 		struct rchan *channel;
 		struct mutex lock;
@@ -65,8 +66,9 @@ int intel_guc_log_create(struct intel_guc_log *log);
 void intel_guc_log_destroy(struct intel_guc_log *log);
 
 int intel_guc_log_set_level(struct intel_guc_log *log, u32 level);
-bool intel_guc_log_relay_enabled(const struct intel_guc_log *log);
+bool intel_guc_log_relay_created(const struct intel_guc_log *log);
 int intel_guc_log_relay_open(struct intel_guc_log *log);
+int intel_guc_log_relay_start(struct intel_guc_log *log);
 void intel_guc_log_relay_flush(struct intel_guc_log *log);
 void intel_guc_log_relay_close(struct intel_guc_log *log);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index edf194d23c6b..1949346e714e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -83,6 +83,9 @@
 #define GEN8_GTCR			_MMIO(0x4274)
 #define   GEN8_GTCR_INVALIDATE		  (1<<0)
 
+#define GEN12_GUC_TLB_INV_CR		_MMIO(0xcee8)
+#define   GEN12_GUC_TLB_INV_CR_INVALIDATE	(1 << 0)
+
 #define GUC_ARAT_C6DIS			_MMIO(0xA178)
 
 #define GUC_SHIM_CONTROL		_MMIO(0xc064)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f325d3dd564f..9e42324fdecd 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -6,48 +6,35 @@
 #include <linux/circ_buf.h>
 
 #include "gem/i915_gem_context.h"
-
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
+
 #include "intel_guc_submission.h"
 
 #include "i915_drv.h"
 #include "i915_trace.h"
 
-enum {
-	GUC_PREEMPT_NONE = 0,
-	GUC_PREEMPT_INPROGRESS,
-	GUC_PREEMPT_FINISHED,
-};
-#define GUC_PREEMPT_BREADCRUMB_DWORDS	0x8
-#define GUC_PREEMPT_BREADCRUMB_BYTES	\
-	(sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS)
-
 /**
  * DOC: GuC-based command submission
  *
- * GuC client:
- * A intel_guc_client refers to a submission path through GuC. Currently, there
- * is only one client, which is charged with all submissions to the GuC. This
- * struct is the owner of a doorbell, a process descriptor and a workqueue (all
- * of them inside a single gem object that contains all required pages for these
- * elements).
+ * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC
+ * firmware is moving to an updated submission interface and we plan to
+ * turn submission back on when that lands. The below documentation (and related
+ * code) matches the old submission model and will be updated as part of the
+ * upgrade to the new flow.
  *
  * GuC stage descriptor:
  * During initialization, the driver allocates a static pool of 1024 such
- * descriptors, and shares them with the GuC.
- * Currently, there exists a 1:1 mapping between a intel_guc_client and a
- * guc_stage_desc (via the client's stage_id), so effectively only one
- * gets used. This stage descriptor lets the GuC know about the doorbell,
- * workqueue and process descriptor. Theoretically, it also lets the GuC
- * know about our HW contexts (context ID, etc...), but we actually
- * employ a kind of submission where the GuC uses the LRCA sent via the work
- * item instead (the single guc_stage_desc associated to execbuf client
- * contains information about the default kernel context only, but this is
- * essentially unused). This is called a "proxy" submission.
+ * descriptors, and shares them with the GuC. Currently, we only use one
+ * descriptor. This stage descriptor lets the GuC know about the workqueue and
+ * process descriptor. Theoretically, it also lets the GuC know about our HW
+ * contexts (context ID, etc...), but we actually employ a kind of submission
+ * where the GuC uses the LRCA sent via the work item instead. This is called
+ * a "proxy" submission.
  *
  * The Scratch registers:
  * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
@@ -56,11 +43,6 @@ enum {
  * Firmware writes a success/fail code back to the action register after
  * processes the request. The kernel driver polls waiting for this update and
  * then proceeds.
- * See intel_guc_send()
- *
- * Doorbells:
- * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW)
- * mapped into process space.
  *
  * Work Items:
  * There are several types of work items that the host may place into a
@@ -77,213 +59,45 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 	return rb_entry(rb, struct i915_priolist, node);
 }
 
-static inline bool is_high_priority(struct intel_guc_client *client)
-{
-	return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
-		client->priority == GUC_CLIENT_PRIORITY_HIGH);
-}
-
-static int reserve_doorbell(struct intel_guc_client *client)
-{
-	unsigned long offset;
-	unsigned long end;
-	u16 id;
-
-	GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID);
-
-	/*
-	 * The bitmap tracks which doorbell registers are currently in use.
-	 * It is split into two halves; the first half is used for normal
-	 * priority contexts, the second half for high-priority ones.
-	 */
-	offset = 0;
-	end = GUC_NUM_DOORBELLS / 2;
-	if (is_high_priority(client)) {
-		offset = end;
-		end += offset;
-	}
-
-	id = find_next_zero_bit(client->guc->doorbell_bitmap, end, offset);
-	if (id == end)
-		return -ENOSPC;
-
-	__set_bit(id, client->guc->doorbell_bitmap);
-	client->doorbell_id = id;
-	DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n",
-			 client->stage_id, yesno(is_high_priority(client)),
-			 id);
-	return 0;
-}
-
-static bool has_doorbell(struct intel_guc_client *client)
-{
-	if (client->doorbell_id == GUC_DOORBELL_INVALID)
-		return false;
-
-	return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
-}
-
-static void unreserve_doorbell(struct intel_guc_client *client)
-{
-	GEM_BUG_ON(!has_doorbell(client));
-
-	__clear_bit(client->doorbell_id, client->guc->doorbell_bitmap);
-	client->doorbell_id = GUC_DOORBELL_INVALID;
-}
-
-/*
- * Tell the GuC to allocate or deallocate a specific doorbell
- */
-
-static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id)
+static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
 {
-	u32 action[] = {
-		INTEL_GUC_ACTION_ALLOCATE_DOORBELL,
-		stage_id
-	};
+	struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
 
-	return intel_guc_send(guc, action, ARRAY_SIZE(action));
+	return &base[id];
 }
 
-static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id)
+static int guc_workqueue_create(struct intel_guc *guc)
 {
-	u32 action[] = {
-		INTEL_GUC_ACTION_DEALLOCATE_DOORBELL,
-		stage_id
-	};
-
-	return intel_guc_send(guc, action, ARRAY_SIZE(action));
+	return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue,
+					      &guc->workqueue_vaddr);
 }
 
-static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client)
+static void guc_workqueue_destroy(struct intel_guc *guc)
 {
-	struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr;
-
-	return &base[client->stage_id];
+	i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP);
 }
 
 /*
- * Initialise, update, or clear doorbell data shared with the GuC
- *
- * These functions modify shared data and so need access to the mapped
- * client object which contains the page being used for the doorbell
+ * Initialise the process descriptor shared with the GuC firmware.
  */
-
-static void __update_doorbell_desc(struct intel_guc_client *client, u16 new_id)
+static int guc_proc_desc_create(struct intel_guc *guc)
 {
-	struct guc_stage_desc *desc;
+	const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc));
 
-	/* Update the GuC's idea of the doorbell ID */
-	desc = __get_stage_desc(client);
-	desc->db_id = new_id;
+	return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc,
+					      &guc->proc_desc_vaddr);
 }
 
-static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client)
+static void guc_proc_desc_destroy(struct intel_guc *guc)
 {
-	return client->vaddr + client->doorbell_offset;
+	i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP);
 }
 
-static bool __doorbell_valid(struct intel_guc *guc, u16 db_id)
-{
-	struct intel_uncore *uncore = guc_to_gt(guc)->uncore;
-
-	GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS);
-	return intel_uncore_read(uncore, GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID;
-}
-
-static void __init_doorbell(struct intel_guc_client *client)
-{
-	struct guc_doorbell_info *doorbell;
-
-	doorbell = __get_doorbell(client);
-	doorbell->db_status = GUC_DOORBELL_ENABLED;
-	doorbell->cookie = 0;
-}
-
-static void __fini_doorbell(struct intel_guc_client *client)
-{
-	struct guc_doorbell_info *doorbell;
-	u16 db_id = client->doorbell_id;
-
-	doorbell = __get_doorbell(client);
-	doorbell->db_status = GUC_DOORBELL_DISABLED;
-
-	/* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit
-	 * to go to zero after updating db_status before we call the GuC to
-	 * release the doorbell
-	 */
-	if (wait_for_us(!__doorbell_valid(client->guc, db_id), 10))
-		WARN_ONCE(true, "Doorbell never became invalid after disable\n");
-}
-
-static int create_doorbell(struct intel_guc_client *client)
-{
-	int ret;
-
-	if (WARN_ON(!has_doorbell(client)))
-		return -ENODEV; /* internal setup error, should never happen */
-
-	__update_doorbell_desc(client, client->doorbell_id);
-	__init_doorbell(client);
-
-	ret = __guc_allocate_doorbell(client->guc, client->stage_id);
-	if (ret) {
-		__fini_doorbell(client);
-		__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
-		DRM_DEBUG_DRIVER("Couldn't create client %u doorbell: %d\n",
-				 client->stage_id, ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int destroy_doorbell(struct intel_guc_client *client)
-{
-	int ret;
-
-	GEM_BUG_ON(!has_doorbell(client));
-
-	__fini_doorbell(client);
-	ret = __guc_deallocate_doorbell(client->guc, client->stage_id);
-	if (ret)
-		DRM_ERROR("Couldn't destroy client %u doorbell: %d\n",
-			  client->stage_id, ret);
-
-	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
-
-	return ret;
-}
-
-static unsigned long __select_cacheline(struct intel_guc *guc)
-{
-	unsigned long offset;
-
-	/* Doorbell uses a single cache line within a page */
-	offset = offset_in_page(guc->db_cacheline);
-
-	/* Moving to next cache line to reduce contention */
-	guc->db_cacheline += cache_line_size();
-
-	DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n",
-			 offset, guc->db_cacheline, cache_line_size());
-	return offset;
-}
-
-static inline struct guc_process_desc *
-__get_process_desc(struct intel_guc_client *client)
-{
-	return client->vaddr + client->proc_desc_offset;
-}
-
-/*
- * Initialise the process descriptor shared with the GuC firmware.
- */
-static void guc_proc_desc_init(struct intel_guc_client *client)
+static void guc_proc_desc_init(struct intel_guc *guc)
 {
 	struct guc_process_desc *desc;
 
-	desc = memset(__get_process_desc(client), 0, sizeof(*desc));
+	desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc));
 
 	/*
 	 * XXX: pDoorbell and WQVBaseAddress are pointers in process address
@@ -294,47 +108,27 @@ static void guc_proc_desc_init(struct intel_guc_client *client)
 	desc->wq_base_addr = 0;
 	desc->db_base_addr = 0;
 
-	desc->stage_id = client->stage_id;
 	desc->wq_size_bytes = GUC_WQ_SIZE;
 	desc->wq_status = WQ_STATUS_ACTIVE;
-	desc->priority = client->priority;
+	desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
 }
 
-static void guc_proc_desc_fini(struct intel_guc_client *client)
+static void guc_proc_desc_fini(struct intel_guc *guc)
 {
-	struct guc_process_desc *desc;
-
-	desc = __get_process_desc(client);
-	memset(desc, 0, sizeof(*desc));
+	memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc));
 }
 
 static int guc_stage_desc_pool_create(struct intel_guc *guc)
 {
-	struct i915_vma *vma;
-	void *vaddr;
-
-	vma = intel_guc_allocate_vma(guc,
-				     PAGE_ALIGN(sizeof(struct guc_stage_desc) *
-				     GUC_MAX_STAGE_DESCRIPTORS));
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
-
-	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
-	if (IS_ERR(vaddr)) {
-		i915_vma_unpin_and_release(&vma, 0);
-		return PTR_ERR(vaddr);
-	}
-
-	guc->stage_desc_pool = vma;
-	guc->stage_desc_pool_vaddr = vaddr;
-	ida_init(&guc->stage_ids);
+	u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) *
+			      GUC_MAX_STAGE_DESCRIPTORS);
 
-	return 0;
+	return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool,
+					      &guc->stage_desc_pool_vaddr);
 }
 
 static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
 {
-	ida_destroy(&guc->stage_ids);
 	i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP);
 }
 
@@ -342,63 +136,49 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
  * Initialise/clear the stage descriptor shared with the GuC firmware.
  *
  * This descriptor tells the GuC where (in GGTT space) to find the important
- * data structures relating to this client (doorbell, process descriptor,
- * write queue, etc).
+ * data structures related to work submission (process descriptor, write queue,
+ * etc).
  */
-static void guc_stage_desc_init(struct intel_guc_client *client)
+static void guc_stage_desc_init(struct intel_guc *guc)
 {
-	struct intel_guc *guc = client->guc;
 	struct guc_stage_desc *desc;
-	u32 gfx_addr;
 
-	desc = __get_stage_desc(client);
+	/* we only use 1 stage desc, so hardcode it to 0 */
+	desc = __get_stage_desc(guc, 0);
 	memset(desc, 0, sizeof(*desc));
 
 	desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
 			  GUC_STAGE_DESC_ATTR_KERNEL;
-	if (is_high_priority(client))
-		desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT;
-	desc->stage_id = client->stage_id;
-	desc->priority = client->priority;
-	desc->db_id = client->doorbell_id;
 
-	/*
-	 * The doorbell, process descriptor, and workqueue are all parts
-	 * of the client object, which the GuC will reference via the GGTT
-	 */
-	gfx_addr = intel_guc_ggtt_offset(guc, client->vma);
-	desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
-				client->doorbell_offset;
-	desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client));
-	desc->db_trigger_uk = gfx_addr + client->doorbell_offset;
-	desc->process_desc = gfx_addr + client->proc_desc_offset;
-	desc->wq_addr = gfx_addr + GUC_DB_SIZE;
-	desc->wq_size = GUC_WQ_SIZE;
+	desc->stage_id = 0;
+	desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
 
-	desc->desc_private = ptr_to_u64(client);
+	desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc);
+	desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue);
+	desc->wq_size = GUC_WQ_SIZE;
 }
 
-static void guc_stage_desc_fini(struct intel_guc_client *client)
+static void guc_stage_desc_fini(struct intel_guc *guc)
 {
 	struct guc_stage_desc *desc;
 
-	desc = __get_stage_desc(client);
+	desc = __get_stage_desc(guc, 0);
 	memset(desc, 0, sizeof(*desc));
 }
 
 /* Construct a Work Item and append it to the GuC's Work Queue */
-static void guc_wq_item_append(struct intel_guc_client *client,
+static void guc_wq_item_append(struct intel_guc *guc,
 			       u32 target_engine, u32 context_desc,
 			       u32 ring_tail, u32 fence_id)
 {
 	/* wqi_len is in DWords, and does not include the one-word header */
 	const size_t wqi_size = sizeof(struct guc_wq_item);
 	const u32 wqi_len = wqi_size / sizeof(u32) - 1;
-	struct guc_process_desc *desc = __get_process_desc(client);
+	struct guc_process_desc *desc = guc->proc_desc_vaddr;
 	struct guc_wq_item *wqi;
 	u32 wq_off;
 
-	lockdep_assert_held(&client->wq_lock);
+	lockdep_assert_held(&guc->wq_lock);
 
 	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
 	 * should not have the case where structure wqi is across page, neither
@@ -418,58 +198,30 @@ static void guc_wq_item_append(struct intel_guc_client *client,
 			      GUC_WQ_SIZE) < wqi_size);
 	GEM_BUG_ON(wq_off & (wqi_size - 1));
 
-	/* WQ starts from the page after doorbell / process_desc */
-	wqi = client->vaddr + wq_off + GUC_DB_SIZE;
-
-	if (I915_SELFTEST_ONLY(client->use_nop_wqi)) {
-		wqi->header = WQ_TYPE_NOOP | (wqi_len << WQ_LEN_SHIFT);
-	} else {
-		/* Now fill in the 4-word work queue item */
-		wqi->header = WQ_TYPE_INORDER |
-			      (wqi_len << WQ_LEN_SHIFT) |
-			      (target_engine << WQ_TARGET_SHIFT) |
-			      WQ_NO_WCFLUSH_WAIT;
-		wqi->context_desc = context_desc;
-		wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
-		GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
-		wqi->fence_id = fence_id;
-	}
+	wqi = guc->workqueue_vaddr + wq_off;
+
+	/* Now fill in the 4-word work queue item */
+	wqi->header = WQ_TYPE_INORDER |
+		      (wqi_len << WQ_LEN_SHIFT) |
+		      (target_engine << WQ_TARGET_SHIFT) |
+		      WQ_NO_WCFLUSH_WAIT;
+	wqi->context_desc = context_desc;
+	wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
+	GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
+	wqi->fence_id = fence_id;
 
 	/* Make the update visible to GuC */
 	WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1));
 }
 
-static void guc_ring_doorbell(struct intel_guc_client *client)
-{
-	struct guc_doorbell_info *db;
-	u32 cookie;
-
-	lockdep_assert_held(&client->wq_lock);
-
-	/* pointer of current doorbell cacheline */
-	db = __get_doorbell(client);
-
-	/*
-	 * We're not expecting the doorbell cookie to change behind our back,
-	 * we also need to treat 0 as a reserved value.
-	 */
-	cookie = READ_ONCE(db->cookie);
-	WARN_ON_ONCE(xchg(&db->cookie, cookie + 1 ?: cookie + 2) != cookie);
-
-	/* XXX: doorbell was lost and need to acquire it again */
-	GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED);
-}
-
 static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
-	struct intel_guc_client *client = guc->execbuf_client;
 	struct intel_engine_cs *engine = rq->engine;
-	u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
+	u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
 	u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 
-	guc_wq_item_append(client, engine->guc_id, ctx_desc,
+	guc_wq_item_append(guc, engine->guc_id, ctx_desc,
 			   ring_tail, rq->fence.seqno);
-	guc_ring_doorbell(client);
 }
 
 /*
@@ -481,10 +233,9 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
  */
 static void flush_ggtt_writes(struct i915_vma *vma)
 {
-	struct drm_i915_private *i915 = vma->vm->i915;
-
 	if (i915_vma_is_map_and_fenceable(vma))
-		intel_uncore_posting_read_fw(&i915->uncore, GUC_STATUS);
+		intel_uncore_posting_read_fw(vma->vm->gt->uncore,
+					     GUC_STATUS);
 }
 
 static void guc_submit(struct intel_engine_cs *engine,
@@ -492,9 +243,8 @@ static void guc_submit(struct intel_engine_cs *engine,
 		       struct i915_request **end)
 {
 	struct intel_guc *guc = &engine->gt->uc.guc;
-	struct intel_guc_client *client = guc->execbuf_client;
 
-	spin_lock(&client->wq_lock);
+	spin_lock(&guc->wq_lock);
 
 	do {
 		struct i915_request *rq = *out++;
@@ -503,7 +253,7 @@ static void guc_submit(struct intel_engine_cs *engine,
 		guc_add_request(guc, rq);
 	} while (out != end);
 
-	spin_unlock(&client->wq_lock);
+	spin_unlock(&guc->wq_lock);
 }
 
 static inline int rq_prio(const struct i915_request *rq)
@@ -522,7 +272,7 @@ static struct i915_request *schedule_in(struct i915_request *rq, int idx)
 	 * required if we generalise the inflight tracking.
 	 */
 
-	intel_gt_pm_get(rq->engine->gt);
+	__intel_gt_pm_get(rq->engine->gt);
 	return i915_request_get(rq);
 }
 
@@ -530,7 +280,7 @@ static void schedule_out(struct i915_request *rq)
 {
 	trace_i915_request_out(rq);
 
-	intel_gt_pm_put(rq->engine->gt);
+	intel_gt_pm_put_async(rq->engine->gt);
 	i915_request_put(rq);
 }
 
@@ -565,7 +315,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
-			if (last && rq->hw_context != last->hw_context) {
+			if (last && rq->context != last->context) {
 				if (port == last_port)
 					goto done;
 
@@ -624,7 +374,7 @@ static void guc_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
-	GEM_TRACE("%s\n", engine->name);
+	ENGINE_TRACE(engine, "\n");
 
 	/*
 	 * Prevent request submission to the hardware until we have
@@ -651,7 +401,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
 		memset(execlists->inflight, 0, sizeof(execlists->inflight));
 }
 
-static void guc_reset(struct intel_engine_cs *engine, bool stalled)
+static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_request *rq;
@@ -670,20 +420,20 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 		stalled = false;
 
 	__i915_request_reset(rq, stalled);
-	intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled);
+	intel_lr_context_reset(engine, rq->context, rq->head, stalled);
 
 out_unlock:
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
-static void guc_cancel_requests(struct intel_engine_cs *engine)
+static void guc_reset_cancel(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_request *rq, *rn;
 	struct rb_node *rb;
 	unsigned long flags;
 
-	GEM_TRACE("%s\n", engine->name);
+	ENGINE_TRACE(engine, "\n");
 
 	/*
 	 * Before we call engine->cancel_requests(), we should have exclusive
@@ -744,8 +494,8 @@ static void guc_reset_finish(struct intel_engine_cs *engine)
 		/* And kick in case we missed a new request submission. */
 		tasklet_hi_schedule(&execlists->tasklet);
 
-	GEM_TRACE("%s: depth->%d\n", engine->name,
-		  atomic_read(&execlists->tasklet.count));
+	ENGINE_TRACE(engine, "depth->%d\n",
+		     atomic_read(&execlists->tasklet.count));
 }
 
 /*
@@ -754,213 +504,6 @@ static void guc_reset_finish(struct intel_engine_cs *engine)
  * path of guc_submit() above.
  */
 
-/* Check that a doorbell register is in the expected state */
-static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
-{
-	bool valid;
-
-	GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS);
-
-	valid = __doorbell_valid(guc, db_id);
-
-	if (test_bit(db_id, guc->doorbell_bitmap) == valid)
-		return true;
-
-	DRM_DEBUG_DRIVER("Doorbell %u has unexpected state: valid=%s\n",
-			 db_id, yesno(valid));
-
-	return false;
-}
-
-static bool guc_verify_doorbells(struct intel_guc *guc)
-{
-	bool doorbells_ok = true;
-	u16 db_id;
-
-	for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id)
-		if (!doorbell_ok(guc, db_id))
-			doorbells_ok = false;
-
-	return doorbells_ok;
-}
-
-/**
- * guc_client_alloc() - Allocate an intel_guc_client
- * @guc:	the intel_guc structure
- * @priority:	four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW
- *		The kernel client to replace ExecList submission is created with
- *		NORMAL priority. Priority of a client for scheduler can be HIGH,
- *		while a preemption context can use CRITICAL.
- *
- * Return:	An intel_guc_client object if success, else NULL.
- */
-static struct intel_guc_client *
-guc_client_alloc(struct intel_guc *guc, u32 priority)
-{
-	struct intel_guc_client *client;
-	struct i915_vma *vma;
-	void *vaddr;
-	int ret;
-
-	client = kzalloc(sizeof(*client), GFP_KERNEL);
-	if (!client)
-		return ERR_PTR(-ENOMEM);
-
-	client->guc = guc;
-	client->priority = priority;
-	client->doorbell_id = GUC_DOORBELL_INVALID;
-	spin_lock_init(&client->wq_lock);
-
-	ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS,
-			     GFP_KERNEL);
-	if (ret < 0)
-		goto err_client;
-
-	client->stage_id = ret;
-
-	/* The first page is doorbell/proc_desc. Two followed pages are wq. */
-	vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto err_id;
-	}
-
-	/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
-	client->vma = vma;
-
-	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
-	if (IS_ERR(vaddr)) {
-		ret = PTR_ERR(vaddr);
-		goto err_vma;
-	}
-	client->vaddr = vaddr;
-
-	ret = reserve_doorbell(client);
-	if (ret)
-		goto err_vaddr;
-
-	client->doorbell_offset = __select_cacheline(guc);
-
-	/*
-	 * Since the doorbell only requires a single cacheline, we can save
-	 * space by putting the application process descriptor in the same
-	 * page. Use the half of the page that doesn't include the doorbell.
-	 */
-	if (client->doorbell_offset >= (GUC_DB_SIZE / 2))
-		client->proc_desc_offset = 0;
-	else
-		client->proc_desc_offset = (GUC_DB_SIZE / 2);
-
-	DRM_DEBUG_DRIVER("new priority %u client %p: stage_id %u\n",
-			 priority, client, client->stage_id);
-	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n",
-			 client->doorbell_id, client->doorbell_offset);
-
-	return client;
-
-err_vaddr:
-	i915_gem_object_unpin_map(client->vma->obj);
-err_vma:
-	i915_vma_unpin_and_release(&client->vma, 0);
-err_id:
-	ida_simple_remove(&guc->stage_ids, client->stage_id);
-err_client:
-	kfree(client);
-	return ERR_PTR(ret);
-}
-
-static void guc_client_free(struct intel_guc_client *client)
-{
-	unreserve_doorbell(client);
-	i915_vma_unpin_and_release(&client->vma, I915_VMA_RELEASE_MAP);
-	ida_simple_remove(&client->guc->stage_ids, client->stage_id);
-	kfree(client);
-}
-
-static inline bool ctx_save_restore_disabled(struct intel_context *ce)
-{
-	u32 sr = ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1];
-
-#define SR_DISABLED \
-	_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | \
-			   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)
-
-	return (sr & SR_DISABLED) == SR_DISABLED;
-
-#undef SR_DISABLED
-}
-
-static int guc_clients_create(struct intel_guc *guc)
-{
-	struct intel_guc_client *client;
-
-	GEM_BUG_ON(guc->execbuf_client);
-
-	client = guc_client_alloc(guc, GUC_CLIENT_PRIORITY_KMD_NORMAL);
-	if (IS_ERR(client)) {
-		DRM_ERROR("Failed to create GuC client for submission!\n");
-		return PTR_ERR(client);
-	}
-	guc->execbuf_client = client;
-
-	return 0;
-}
-
-static void guc_clients_destroy(struct intel_guc *guc)
-{
-	struct intel_guc_client *client;
-
-	client = fetch_and_zero(&guc->execbuf_client);
-	if (client)
-		guc_client_free(client);
-}
-
-static int __guc_client_enable(struct intel_guc_client *client)
-{
-	int ret;
-
-	guc_proc_desc_init(client);
-	guc_stage_desc_init(client);
-
-	ret = create_doorbell(client);
-	if (ret)
-		goto fail;
-
-	return 0;
-
-fail:
-	guc_stage_desc_fini(client);
-	guc_proc_desc_fini(client);
-	return ret;
-}
-
-static void __guc_client_disable(struct intel_guc_client *client)
-{
-	/*
-	 * By the time we're here, GuC may have already been reset. if that is
-	 * the case, instead of trying (in vain) to communicate with it, let's
-	 * just cleanup the doorbell HW and our internal state.
-	 */
-	if (intel_guc_is_running(client->guc))
-		destroy_doorbell(client);
-	else
-		__fini_doorbell(client);
-
-	guc_stage_desc_fini(client);
-	guc_proc_desc_fini(client);
-}
-
-static int guc_clients_enable(struct intel_guc *guc)
-{
-	return __guc_client_enable(guc->execbuf_client);
-}
-
-static void guc_clients_disable(struct intel_guc *guc)
-{
-	if (guc->execbuf_client)
-		__guc_client_disable(guc->execbuf_client);
-}
-
 /*
  * Set up the memory resources to be shared with the GuC (via the GGTT)
  * at firmware loading time.
@@ -981,13 +524,20 @@ int intel_guc_submission_init(struct intel_guc *guc)
 	 */
 	GEM_BUG_ON(!guc->stage_desc_pool);
 
-	WARN_ON(!guc_verify_doorbells(guc));
-	ret = guc_clients_create(guc);
+	ret = guc_workqueue_create(guc);
 	if (ret)
 		goto err_pool;
 
+	ret = guc_proc_desc_create(guc);
+	if (ret)
+		goto err_workqueue;
+
+	spin_lock_init(&guc->wq_lock);
+
 	return 0;
 
+err_workqueue:
+	guc_workqueue_destroy(guc);
 err_pool:
 	guc_stage_desc_pool_destroy(guc);
 	return ret;
@@ -995,83 +545,37 @@ err_pool:
 
 void intel_guc_submission_fini(struct intel_guc *guc)
 {
-	guc_clients_destroy(guc);
-	WARN_ON(!guc_verify_doorbells(guc));
-
-	if (guc->stage_desc_pool)
+	if (guc->stage_desc_pool) {
+		guc_proc_desc_destroy(guc);
+		guc_workqueue_destroy(guc);
 		guc_stage_desc_pool_destroy(guc);
+	}
 }
 
 static void guc_interrupts_capture(struct intel_gt *gt)
 {
-	struct intel_rps *rps = &gt->i915->gt_pm.rps;
 	struct intel_uncore *uncore = gt->uncore;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int irqs;
+	u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
+	u32 dmask = irqs << 16 | irqs;
 
-	/* tell all command streamers to forward interrupts (but not vblank)
-	 * to GuC
-	 */
-	irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING);
-	for_each_engine(engine, gt->i915, id)
-		ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
-
-	/* route USER_INTERRUPT to Host, all others are sent to GuC. */
-	irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
-	       GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
-	/* These three registers have the same bit definitions */
-	intel_uncore_write(uncore, GUC_BCS_RCS_IER, ~irqs);
-	intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, ~irqs);
-	intel_uncore_write(uncore, GUC_WD_VECS_IER, ~irqs);
+	GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
 
-	/*
-	 * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all
-	 * (unmasked) PM interrupts to the GuC. All other bits of this
-	 * register *disable* generation of a specific interrupt.
-	 *
-	 * 'pm_intrmsk_mbz' indicates bits that are NOT to be set when
-	 * writing to the PM interrupt mask register, i.e. interrupts
-	 * that must not be disabled.
-	 *
-	 * If the GuC is handling these interrupts, then we must not let
-	 * the PM code disable ANY interrupt that the GuC is expecting.
-	 * So for each ENABLED (0) bit in this register, we must SET the
-	 * bit in pm_intrmsk_mbz so that it's left enabled for the GuC.
-	 * GuC needs ARAT expired interrupt unmasked hence it is set in
-	 * pm_intrmsk_mbz.
-	 *
-	 * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will
-	 * result in the register bit being left SET!
-	 */
-	rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
-	rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+	/* Don't handle the ctx switch interrupt in GuC submission mode */
+	intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0);
+	intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0);
 }
 
 static void guc_interrupts_release(struct intel_gt *gt)
 {
-	struct intel_rps *rps = &gt->i915->gt_pm.rps;
 	struct intel_uncore *uncore = gt->uncore;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int irqs;
+	u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
+	u32 dmask = irqs << 16 | irqs;
 
-	/*
-	 * tell all command streamers NOT to forward interrupts or vblank
-	 * to GuC.
-	 */
-	irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER);
-	irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING);
-	for_each_engine(engine, gt->i915, id)
-		ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
-
-	/* route all GT interrupts to the host */
-	intel_uncore_write(uncore, GUC_BCS_RCS_IER, 0);
-	intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, 0);
-	intel_uncore_write(uncore, GUC_WD_VECS_IER, 0);
-
-	rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
-	rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK;
+	GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
+
+	/* Handle ctx switch interrupts again */
+	intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask);
+	intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask);
 }
 
 static void guc_set_default_submission(struct intel_engine_cs *engine)
@@ -1095,11 +599,10 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
 	engine->park = engine->unpark = NULL;
 
 	engine->reset.prepare = guc_reset_prepare;
-	engine->reset.reset = guc_reset;
+	engine->reset.rewind = guc_reset_rewind;
+	engine->reset.cancel = guc_reset_cancel;
 	engine->reset.finish = guc_reset_finish;
 
-	engine->cancel_requests = guc_cancel_requests;
-
 	engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
 	engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
 
@@ -1112,16 +615,11 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
 	GEM_BUG_ON(engine->irq_enable || engine->irq_disable);
 }
 
-int intel_guc_submission_enable(struct intel_guc *guc)
+void intel_guc_submission_enable(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	int err;
-
-	err = i915_inject_load_error(gt->i915, -ENXIO);
-	if (err)
-		return err;
 
 	/*
 	 * We're using GuC work items for submitting work through GuC. Since
@@ -1136,21 +634,16 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 		     sizeof(struct guc_wq_item) *
 		     I915_NUM_ENGINES > GUC_WQ_SIZE);
 
-	GEM_BUG_ON(!guc->execbuf_client);
-
-	err = guc_clients_enable(guc);
-	if (err)
-		return err;
+	guc_proc_desc_init(guc);
+	guc_stage_desc_init(guc);
 
 	/* Take over from manual control of ELSP (execlists) */
 	guc_interrupts_capture(gt);
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		engine->set_default_submission = guc_set_default_submission;
 		engine->set_default_submission(engine);
 	}
-
-	return 0;
 }
 
 void intel_guc_submission_disable(struct intel_guc *guc)
@@ -1159,8 +652,12 @@ void intel_guc_submission_disable(struct intel_guc *guc)
 
 	GEM_BUG_ON(gt->awake); /* GT should be parked first */
 
+	/* Note: By the time we're here, GuC may have already been reset */
+
 	guc_interrupts_release(gt);
-	guc_clients_disable(guc);
+
+	guc_stage_desc_fini(guc);
+	guc_proc_desc_fini(guc);
 }
 
 static bool __guc_submission_support(struct intel_guc *guc)
@@ -1179,6 +676,7 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
 	guc->submission_supported = __guc_submission_support(guc);
 }
 
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftest_guc.c"
-#endif
+bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
+{
+	return engine->set_default_submission == guc_set_default_submission;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index 54d716828352..e402a2932592 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -6,62 +6,18 @@
 #ifndef _INTEL_GUC_SUBMISSION_H_
 #define _INTEL_GUC_SUBMISSION_H_
 
-#include <linux/spinlock.h>
+#include <linux/types.h>
 
-#include "gt/intel_engine_types.h"
-
-#include "i915_gem.h"
-#include "i915_selftest.h"
-
-struct drm_i915_private;
-
-/*
- * This structure primarily describes the GEM object shared with the GuC.
- * The specs sometimes refer to this object as a "GuC context", but we use
- * the term "client" to avoid confusion with hardware contexts. This
- * GEM object is held for the entire lifetime of our interaction with
- * the GuC, being allocated before the GuC is loaded with its firmware.
- * Because there's no way to update the address used by the GuC after
- * initialisation, the shared object must stay pinned into the GGTT as
- * long as the GuC is in use. We also keep the first page (only) mapped
- * into kernel address space, as it includes shared data that must be
- * updated on every request submission.
- *
- * The single GEM object described here is actually made up of several
- * separate areas, as far as the GuC is concerned. The first page (kept
- * kmap'd) includes the "process descriptor" which holds sequence data for
- * the doorbell, and one cacheline which actually *is* the doorbell; a
- * write to this will "ring the doorbell" (i.e. send an interrupt to the
- * GuC). The subsequent  pages of the client object constitute the work
- * queue (a circular array of work items), again described in the process
- * descriptor. Work queue pages are mapped momentarily as required.
- */
-struct intel_guc_client {
-	struct i915_vma *vma;
-	void *vaddr;
-	struct intel_guc *guc;
-
-	/* bitmap of (host) engine ids */
-	u32 priority;
-	u32 stage_id;
-	u32 proc_desc_offset;
-
-	u16 doorbell_id;
-	unsigned long doorbell_offset;
-
-	/* Protects GuC client's WQ access */
-	spinlock_t wq_lock;
-
-	/* For testing purposes, use nop WQ items instead of real ones */
-	I915_SELFTEST_DECLARE(bool use_nop_wqi);
-};
+struct intel_guc;
+struct intel_engine_cs;
 
 void intel_guc_submission_init_early(struct intel_guc *guc);
 int intel_guc_submission_init(struct intel_guc *guc);
-int intel_guc_submission_enable(struct intel_guc *guc);
+void intel_guc_submission_enable(struct intel_guc *guc);
 void intel_guc_submission_disable(struct intel_guc *guc);
 void intel_guc_submission_fini(struct intel_guc *guc);
 int intel_guc_preempt_work_create(struct intel_guc *guc);
 void intel_guc_preempt_work_destroy(struct intel_guc *guc);
+bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index d4625c97b4f9..32a069841c14 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -9,6 +9,34 @@
 #include "intel_huc.h"
 #include "i915_drv.h"
 
+/**
+ * DOC: HuC
+ *
+ * The HuC is a dedicated microcontroller for usage in media HEVC (High
+ * Efficiency Video Coding) operations. Userspace can directly use the firmware
+ * capabilities by adding HuC specific commands to batch buffers.
+ *
+ * The kernel driver is only responsible for loading the HuC firmware and
+ * triggering its security authentication, which is performed by the GuC. For
+ * The GuC to correctly perform the authentication, the HuC binary must be
+ * loaded before the GuC one. Loading the HuC is optional; however, not using
+ * the HuC might negatively impact power usage and/or performance of media
+ * workloads, depending on the use-cases.
+ *
+ * See https://github.com/intel/media-driver for the latest details on HuC
+ * functionality.
+ */
+
+/**
+ * DOC: HuC Memory Management
+ *
+ * Similarly to the GuC, the HuC can't do any memory allocations on its own,
+ * with the difference being that the allocations for HuC usage are handled by
+ * the userspace driver instead of the kernel one. The HuC accesses the memory
+ * via the PPGTT belonging to the context loaded on the VCS executing the
+ * HuC-specific commands.
+ */
+
 void intel_huc_init_early(struct intel_huc *huc)
 {
 	struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
@@ -35,7 +63,7 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc)
 	void *vaddr;
 	int err;
 
-	err = i915_inject_load_error(gt->i915, -ENXIO);
+	err = i915_inject_probe_error(gt->i915, -ENXIO);
 	if (err)
 		return err;
 
@@ -118,10 +146,9 @@ void intel_huc_fini(struct intel_huc *huc)
  *
  * Called after HuC and GuC firmware loading during intel_uc_init_hw().
  *
- * This function pins HuC firmware image object into GGTT.
- * Then it invokes GuC action to authenticate passing the offset to RSA
- * signature through intel_guc_auth_huc(). It then waits for 50ms for
- * firmware verification ACK and unpins the object.
+ * This function invokes the GuC action to authenticate the HuC firmware,
+ * passing the offset of the RSA signature to intel_guc_auth_huc(). It then
+ * waits for up to 50ms for firmware verification ACK.
  */
 int intel_huc_auth(struct intel_huc *huc)
 {
@@ -134,7 +161,7 @@ int intel_huc_auth(struct intel_huc *huc)
 	if (!intel_uc_fw_is_loaded(&huc->fw))
 		return -ENOEXEC;
 
-	ret = i915_inject_load_error(gt->i915, -ENXIO);
+	ret = i915_inject_probe_error(gt->i915, -ENXIO);
 	if (ret)
 		goto fail;
 
@@ -185,7 +212,7 @@ int intel_huc_check_status(struct intel_huc *huc)
 	if (!intel_huc_is_supported(huc))
 		return -ENODEV;
 
-	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
 		status = intel_uncore_read(gt->uncore, huc->status.reg);
 
 	return (status & huc->status.mask) == huc->status.value;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index 74602487ed67..eee193bf2cc4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -8,21 +8,6 @@
 #include "i915_drv.h"
 
 /**
- * DOC: HuC Firmware
- *
- * Motivation:
- * GEN9 introduces a new dedicated firmware for usage in media HEVC (High
- * Efficiency Video Coding) operations. Userspace can use the firmware
- * capabilities by adding HuC specific commands to batch buffers.
- *
- * Implementation:
- * The same firmware loader is used as the GuC. However, the actual
- * loading to HW is deferred until GEM initialization is done.
- *
- * Note that HuC firmware loading must be done before GuC loading.
- */
-
-/**
  * intel_huc_fw_init_early() - initializes HuC firmware struct
  * @huc: intel_huc struct
  *
@@ -54,5 +39,5 @@ void intel_huc_fw_init_early(struct intel_huc *huc)
 int intel_huc_fw_upload(struct intel_huc *huc)
 {
 	/* HW doesn't look at destination address for HuC, so set it to 0 */
-	return intel_uc_fw_upload(&huc->fw, huc_to_gt(huc), 0, HUC_UKERNEL);
+	return intel_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 71ee7ab035cc..64934a876a50 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -12,6 +12,9 @@
 
 #include "i915_drv.h"
 
+static const struct intel_uc_ops uc_ops_off;
+static const struct intel_uc_ops uc_ops_on;
+
 /* Reset GuC providing us with fresh state for both GuC and HuC.
  */
 static int __intel_uc_reset_hw(struct intel_uc *uc)
@@ -20,7 +23,7 @@ static int __intel_uc_reset_hw(struct intel_uc *uc)
 	int ret;
 	u32 guc_status;
 
-	ret = i915_inject_load_error(gt->i915, -ENXIO);
+	ret = i915_inject_probe_error(gt->i915, -ENXIO);
 	if (ret)
 		return ret;
 
@@ -89,6 +92,11 @@ void intel_uc_init_early(struct intel_uc *uc)
 	intel_huc_init_early(&uc->huc);
 
 	__confirm_options(uc);
+
+	if (intel_uc_uses_guc(uc))
+		uc->ops = &uc_ops_on;
+	else
+		uc->ops = &uc_ops_off;
 }
 
 void intel_uc_driver_late_release(struct intel_uc *uc)
@@ -123,6 +131,11 @@ static void __uc_free_load_err_log(struct intel_uc *uc)
 		i915_gem_object_put(log);
 }
 
+static inline bool guc_communication_enabled(struct intel_guc *guc)
+{
+	return intel_guc_ct_enabled(&guc->ct);
+}
+
 /*
  * Events triggered while CT buffers are disabled are logged in the SCRATCH_15
  * register using the same bits used in the CT message payload. Since our
@@ -158,7 +171,7 @@ static void guc_handle_mmio_msg(struct intel_guc *guc)
 	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
 
 	/* we need communication to be enabled to reply to GuC */
-	GEM_BUG_ON(guc->handler == intel_guc_to_host_event_handler_nop);
+	GEM_BUG_ON(!guc_communication_enabled(guc));
 
 	if (!guc->mmio_msg)
 		return;
@@ -185,11 +198,6 @@ static void guc_disable_interrupts(struct intel_guc *guc)
 	guc->interrupts.disable(guc);
 }
 
-static inline bool guc_communication_enabled(struct intel_guc *guc)
-{
-	return guc->send != intel_guc_send_nop;
-}
-
 static int guc_enable_communication(struct intel_guc *guc)
 {
 	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
@@ -197,7 +205,7 @@ static int guc_enable_communication(struct intel_guc *guc)
 
 	GEM_BUG_ON(guc_communication_enabled(guc));
 
-	ret = i915_inject_load_error(i915, -ENXIO);
+	ret = i915_inject_probe_error(i915, -ENXIO);
 	if (ret)
 		return ret;
 
@@ -205,9 +213,6 @@ static int guc_enable_communication(struct intel_guc *guc)
 	if (ret)
 		return ret;
 
-	guc->send = intel_guc_send_ct;
-	guc->handler = intel_guc_to_host_event_handler_ct;
-
 	/* check for mmio messages received before/during the CT enable */
 	guc_get_mmio_msg(guc);
 	guc_handle_mmio_msg(guc);
@@ -216,7 +221,7 @@ static int guc_enable_communication(struct intel_guc *guc)
 
 	/* check for CT messages received before we enabled interrupts */
 	spin_lock_irq(&i915->irq_lock);
-	intel_guc_to_host_event_handler_ct(guc);
+	intel_guc_ct_event_handler(&guc->ct);
 	spin_unlock_irq(&i915->irq_lock);
 
 	DRM_INFO("GuC communication enabled\n");
@@ -224,16 +229,6 @@ static int guc_enable_communication(struct intel_guc *guc)
 	return 0;
 }
 
-static void guc_stop_communication(struct intel_guc *guc)
-{
-	intel_guc_ct_stop(&guc->ct);
-
-	guc->send = intel_guc_send_nop;
-	guc->handler = intel_guc_to_host_event_handler_nop;
-
-	guc_clear_mmio_msg(guc);
-}
-
 static void guc_disable_communication(struct intel_guc *guc)
 {
 	/*
@@ -245,9 +240,6 @@ static void guc_disable_communication(struct intel_guc *guc)
 
 	guc_disable_interrupts(guc);
 
-	guc->send = intel_guc_send_nop;
-	guc->handler = intel_guc_to_host_event_handler_nop;
-
 	intel_guc_ct_disable(&guc->ct);
 
 	/*
@@ -261,41 +253,33 @@ static void guc_disable_communication(struct intel_guc *guc)
 	DRM_INFO("GuC communication disabled\n");
 }
 
-void intel_uc_fetch_firmwares(struct intel_uc *uc)
+static void __uc_fetch_firmwares(struct intel_uc *uc)
 {
-	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
 	int err;
 
-	if (!intel_uc_uses_guc(uc))
-		return;
+	GEM_BUG_ON(!intel_uc_uses_guc(uc));
 
-	err = intel_uc_fw_fetch(&uc->guc.fw, i915);
+	err = intel_uc_fw_fetch(&uc->guc.fw);
 	if (err)
 		return;
 
 	if (intel_uc_uses_huc(uc))
-		intel_uc_fw_fetch(&uc->huc.fw, i915);
+		intel_uc_fw_fetch(&uc->huc.fw);
 }
 
-void intel_uc_cleanup_firmwares(struct intel_uc *uc)
+static void __uc_cleanup_firmwares(struct intel_uc *uc)
 {
-	if (!intel_uc_uses_guc(uc))
-		return;
-
-	if (intel_uc_uses_huc(uc))
-		intel_uc_fw_cleanup_fetch(&uc->huc.fw);
-
+	intel_uc_fw_cleanup_fetch(&uc->huc.fw);
 	intel_uc_fw_cleanup_fetch(&uc->guc.fw);
 }
 
-void intel_uc_init(struct intel_uc *uc)
+static void __uc_init(struct intel_uc *uc)
 {
 	struct intel_guc *guc = &uc->guc;
 	struct intel_huc *huc = &uc->huc;
 	int ret;
 
-	if (!intel_uc_uses_guc(uc))
-		return;
+	GEM_BUG_ON(!intel_uc_uses_guc(uc));
 
 	/* XXX: GuC submission is unavailable for now */
 	GEM_BUG_ON(intel_uc_supports_guc_submission(uc));
@@ -310,17 +294,10 @@ void intel_uc_init(struct intel_uc *uc)
 		intel_huc_init(huc);
 }
 
-void intel_uc_fini(struct intel_uc *uc)
+static void __uc_fini(struct intel_uc *uc)
 {
-	struct intel_guc *guc = &uc->guc;
-
-	if (!intel_uc_uses_guc(uc))
-		return;
-
-	if (intel_uc_uses_huc(uc))
-		intel_huc_fini(&uc->huc);
-
-	intel_guc_fini(guc);
+	intel_huc_fini(&uc->huc);
+	intel_guc_fini(&uc->guc);
 
 	__uc_free_load_err_log(uc);
 }
@@ -338,14 +315,6 @@ static int __uc_sanitize(struct intel_uc *uc)
 	return __intel_uc_reset_hw(uc);
 }
 
-void intel_uc_sanitize(struct intel_uc *uc)
-{
-	if (!intel_uc_supports_guc(uc))
-		return;
-
-	__uc_sanitize(uc);
-}
-
 /* Initialize and verify the uC regs related to uC positioning in WOPCM */
 static int uc_init_wopcm(struct intel_uc *uc)
 {
@@ -368,7 +337,7 @@ static int uc_init_wopcm(struct intel_uc *uc)
 	GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
 	GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
 
-	err = i915_inject_load_error(gt->i915, -ENXIO);
+	err = i915_inject_probe_error(gt->i915, -ENXIO);
 	if (err)
 		return err;
 
@@ -409,13 +378,8 @@ static bool uc_is_wopcm_locked(struct intel_uc *uc)
 	       (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID);
 }
 
-int intel_uc_init_hw(struct intel_uc *uc)
+static int __uc_check_hw(struct intel_uc *uc)
 {
-	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
-	struct intel_guc *guc = &uc->guc;
-	struct intel_huc *huc = &uc->huc;
-	int ret, attempts;
-
 	if (!intel_uc_supports_guc(uc))
 		return 0;
 
@@ -424,11 +388,24 @@ int intel_uc_init_hw(struct intel_uc *uc)
 	 * before on this system after reboot, otherwise we risk GPU hangs.
 	 * To check if GuC was loaded before we look at WOPCM registers.
 	 */
-	if (!intel_uc_uses_guc(uc) && !uc_is_wopcm_locked(uc))
-		return 0;
+	if (uc_is_wopcm_locked(uc))
+		return -EIO;
+
+	return 0;
+}
+
+static int __uc_init_hw(struct intel_uc *uc)
+{
+	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
+	struct intel_guc *guc = &uc->guc;
+	struct intel_huc *huc = &uc->huc;
+	int ret, attempts;
+
+	GEM_BUG_ON(!intel_uc_supports_guc(uc));
+	GEM_BUG_ON(!intel_uc_uses_guc(uc));
 
 	if (!intel_uc_fw_is_available(&guc->fw)) {
-		ret = uc_is_wopcm_locked(uc) ||
+		ret = __uc_check_hw(uc) ||
 		      intel_uc_fw_is_overridden(&guc->fw) ||
 		      intel_uc_supports_guc_submission(uc) ?
 		      intel_uc_fw_status_to_error(guc->fw.status) : 0;
@@ -482,11 +459,8 @@ int intel_uc_init_hw(struct intel_uc *uc)
 	if (ret)
 		goto err_communication;
 
-	if (intel_uc_supports_guc_submission(uc)) {
-		ret = intel_guc_submission_enable(guc);
-		if (ret)
-			goto err_communication;
-	}
+	if (intel_uc_supports_guc_submission(uc))
+		intel_guc_submission_enable(guc);
 
 	dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
 		 intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
@@ -527,7 +501,7 @@ err_out:
 	return -EIO;
 }
 
-void intel_uc_fini_hw(struct intel_uc *uc)
+static void __uc_fini_hw(struct intel_uc *uc)
 {
 	struct intel_guc *guc = &uc->guc;
 
@@ -537,7 +511,9 @@ void intel_uc_fini_hw(struct intel_uc *uc)
 	if (intel_uc_supports_guc_submission(uc))
 		intel_guc_submission_disable(guc);
 
-	guc_disable_communication(guc);
+	if (guc_communication_enabled(guc))
+		guc_disable_communication(guc);
+
 	__uc_sanitize(uc);
 }
 
@@ -554,7 +530,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
 	if (!intel_guc_is_running(guc))
 		return;
 
-	guc_stop_communication(guc);
+	guc_disable_communication(guc);
 	__uc_sanitize(uc);
 }
 
@@ -581,7 +557,7 @@ void intel_uc_suspend(struct intel_uc *uc)
 	if (!intel_guc_is_running(guc))
 		return;
 
-	with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref)
+	with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref)
 		intel_uc_runtime_suspend(uc);
 }
 
@@ -625,3 +601,20 @@ int intel_uc_runtime_resume(struct intel_uc *uc)
 	 */
 	return __uc_resume(uc, true);
 }
+
+static const struct intel_uc_ops uc_ops_off = {
+	.init_hw = __uc_check_hw,
+};
+
+static const struct intel_uc_ops uc_ops_on = {
+	.sanitize = __uc_sanitize,
+
+	.init_fw = __uc_fetch_firmwares,
+	.fini_fw = __uc_cleanup_firmwares,
+
+	.init = __uc_init,
+	.fini = __uc_fini,
+
+	.init_hw = __uc_init_hw,
+	.fini_hw = __uc_fini_hw,
+};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 527995c21196..49c913524686 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -10,7 +10,20 @@
 #include "intel_huc.h"
 #include "i915_params.h"
 
+struct intel_uc;
+
+struct intel_uc_ops {
+	int (*sanitize)(struct intel_uc *uc);
+	void (*init_fw)(struct intel_uc *uc);
+	void (*fini_fw)(struct intel_uc *uc);
+	void (*init)(struct intel_uc *uc);
+	void (*fini)(struct intel_uc *uc);
+	int (*init_hw)(struct intel_uc *uc);
+	void (*fini_hw)(struct intel_uc *uc);
+};
+
 struct intel_uc {
+	struct intel_uc_ops const *ops;
 	struct intel_guc guc;
 	struct intel_huc huc;
 
@@ -21,13 +34,6 @@ struct intel_uc {
 void intel_uc_init_early(struct intel_uc *uc);
 void intel_uc_driver_late_release(struct intel_uc *uc);
 void intel_uc_init_mmio(struct intel_uc *uc);
-void intel_uc_fetch_firmwares(struct intel_uc *uc);
-void intel_uc_cleanup_firmwares(struct intel_uc *uc);
-void intel_uc_sanitize(struct intel_uc *uc);
-void intel_uc_init(struct intel_uc *uc);
-int intel_uc_init_hw(struct intel_uc *uc);
-void intel_uc_fini_hw(struct intel_uc *uc);
-void intel_uc_fini(struct intel_uc *uc);
 void intel_uc_reset_prepare(struct intel_uc *uc);
 void intel_uc_suspend(struct intel_uc *uc);
 void intel_uc_runtime_suspend(struct intel_uc *uc);
@@ -64,4 +70,20 @@ static inline bool intel_uc_uses_huc(struct intel_uc *uc)
 	return intel_huc_is_enabled(&uc->huc);
 }
 
+#define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \
+static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
+{ \
+	if (uc->ops->_OPS) \
+		return uc->ops->_OPS(uc); \
+	return _RET; \
+}
+intel_uc_ops_function(sanitize, sanitize, int, 0);
+intel_uc_ops_function(fetch_firmwares, init_fw, void, );
+intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
+intel_uc_ops_function(init, init, void, );
+intel_uc_ops_function(fini, fini, void, );
+intel_uc_ops_function(init_hw, init_hw, int, 0);
+intel_uc_ops_function(fini_hw, fini_hw, void, );
+#undef intel_uc_ops_function
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index bd22bf11adad..8ee0a0c7f447 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -11,7 +11,6 @@
 #include "intel_uc_fw_abi.h"
 #include "i915_drv.h"
 
-#ifdef CONFIG_DRM_I915_DEBUG_GUC
 static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
 {
 	GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED);
@@ -22,6 +21,7 @@ static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
 	return container_of(uc_fw, struct intel_gt, uc.huc.fw);
 }
 
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
 void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
 			       enum intel_uc_fw_status status)
 {
@@ -37,27 +37,34 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
 /*
  * List of required GuC and HuC binaries per-platform.
  * Must be ordered based on platform + revid, from newer to older.
+ *
+ * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas
+ * between 33.0 and 35.2 are only related to new additions to support new Gen12
+ * features.
  */
 #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
-	fw_def(ICELAKE,    0, guc_def(icl, 33, 0, 0), huc_def(icl,  8,  4, 3238)) \
-	fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \
-	fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 03, 01, 2893)) \
-	fw_def(KABYLAKE,   0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \
-	fw_def(BROXTON,    0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 01,  8, 2893)) \
-	fw_def(SKYLAKE,    0, guc_def(skl, 33, 0, 0), huc_def(skl, 01, 07, 1398))
-
-#define __MAKE_UC_FW_PATH(prefix_, name_, separator_, major_, minor_, patch_) \
+	fw_def(TIGERLAKE,   0, guc_def(tgl, 35, 2, 0), huc_def(tgl,  7, 0, 3)) \
+	fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl,  9, 0, 0)) \
+	fw_def(ICELAKE,     0, guc_def(icl, 33, 0, 0), huc_def(icl,  9, 0, 0)) \
+	fw_def(COFFEELAKE,  5, guc_def(cml, 33, 0, 0), huc_def(cml,  4, 0, 0)) \
+	fw_def(COFFEELAKE,  0, guc_def(kbl, 33, 0, 0), huc_def(kbl,  4, 0, 0)) \
+	fw_def(GEMINILAKE,  0, guc_def(glk, 33, 0, 0), huc_def(glk,  4, 0, 0)) \
+	fw_def(KABYLAKE,    0, guc_def(kbl, 33, 0, 0), huc_def(kbl,  4, 0, 0)) \
+	fw_def(BROXTON,     0, guc_def(bxt, 33, 0, 0), huc_def(bxt,  2, 0, 0)) \
+	fw_def(SKYLAKE,     0, guc_def(skl, 33, 0, 0), huc_def(skl,  2, 0, 0))
+
+#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
 	"i915/" \
 	__stringify(prefix_) name_ \
-	__stringify(major_) separator_ \
-	__stringify(minor_) separator_ \
+	__stringify(major_) "." \
+	__stringify(minor_) "." \
 	__stringify(patch_) ".bin"
 
 #define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \
-	__MAKE_UC_FW_PATH(prefix_, "_guc_", ".", major_, minor_, patch_)
+	__MAKE_UC_FW_PATH(prefix_, "_guc_", major_, minor_, patch_)
 
 #define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \
-	__MAKE_UC_FW_PATH(prefix_, "_huc_ver", "_", major_, minor_, bld_num_)
+	__MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)
 
 /* All blobs need to be declared via MODULE_FIRMWARE() */
 #define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \
@@ -212,35 +219,36 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
 				  INTEL_UC_FIRMWARE_NOT_SUPPORTED);
 }
 
-static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw,
-				      struct drm_i915_private *i915,
-				      int e)
+static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e)
 {
+	struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915;
 	bool user = e == -EINVAL;
 
-	if (i915_inject_load_error(i915, e)) {
+	if (i915_inject_probe_error(i915, e)) {
 		/* non-existing blob */
 		uc_fw->path = "<invalid>";
 		uc_fw->user_overridden = user;
-	} else if (i915_inject_load_error(i915, e)) {
+	} else if (i915_inject_probe_error(i915, e)) {
 		/* require next major version */
 		uc_fw->major_ver_wanted += 1;
 		uc_fw->minor_ver_wanted = 0;
 		uc_fw->user_overridden = user;
-	} else if (i915_inject_load_error(i915, e)) {
+	} else if (i915_inject_probe_error(i915, e)) {
 		/* require next minor version */
 		uc_fw->minor_ver_wanted += 1;
 		uc_fw->user_overridden = user;
-	} else if (uc_fw->major_ver_wanted && i915_inject_load_error(i915, e)) {
+	} else if (uc_fw->major_ver_wanted &&
+		   i915_inject_probe_error(i915, e)) {
 		/* require prev major version */
 		uc_fw->major_ver_wanted -= 1;
 		uc_fw->minor_ver_wanted = 0;
 		uc_fw->user_overridden = user;
-	} else if (uc_fw->minor_ver_wanted && i915_inject_load_error(i915, e)) {
+	} else if (uc_fw->minor_ver_wanted &&
+		   i915_inject_probe_error(i915, e)) {
 		/* require prev minor version - hey, this should work! */
 		uc_fw->minor_ver_wanted -= 1;
 		uc_fw->user_overridden = user;
-	} else if (user && i915_inject_load_error(i915, e)) {
+	} else if (user && i915_inject_probe_error(i915, e)) {
 		/* officially unsupported platform */
 		uc_fw->major_ver_wanted = 0;
 		uc_fw->minor_ver_wanted = 0;
@@ -251,14 +259,14 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw,
 /**
  * intel_uc_fw_fetch - fetch uC firmware
  * @uc_fw: uC firmware
- * @i915: device private
  *
  * Fetch uC firmware into GEM obj.
  *
  * Return: 0 on success, a negative errno code on failure.
  */
-int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915)
+int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 {
+	struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915;
 	struct device *dev = i915->drm.dev;
 	struct drm_i915_gem_object *obj;
 	const struct firmware *fw = NULL;
@@ -269,12 +277,12 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915)
 	GEM_BUG_ON(!i915->wopcm.size);
 	GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw));
 
-	err = i915_inject_load_error(i915, -ENXIO);
+	err = i915_inject_probe_error(i915, -ENXIO);
 	if (err)
 		return err;
 
-	__force_fw_fetch_failures(uc_fw, i915, -EINVAL);
-	__force_fw_fetch_failures(uc_fw, i915, -ESTALE);
+	__force_fw_fetch_failures(uc_fw, -EINVAL);
+	__force_fw_fetch_failures(uc_fw, -ESTALE);
 
 	err = request_firmware(&fw, uc_fw->path, dev);
 	if (err)
@@ -337,25 +345,10 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915)
 	}
 
 	/* Get version numbers from the CSS header */
-	switch (uc_fw->type) {
-	case INTEL_UC_FW_TYPE_GUC:
-		uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MAJOR,
-						   css->sw_version);
-		uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MINOR,
-						   css->sw_version);
-		break;
-
-	case INTEL_UC_FW_TYPE_HUC:
-		uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MAJOR,
-						   css->sw_version);
-		uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MINOR,
-						   css->sw_version);
-		break;
-
-	default:
-		MISSING_CASE(uc_fw->type);
-		break;
-	}
+	uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
+					   css->sw_version);
+	uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
+					   css->sw_version);
 
 	if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
 	    uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
@@ -396,24 +389,24 @@ fail:
 	return err;
 }
 
-static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt)
+static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw)
 {
+	struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
 	struct drm_mm_node *node = &ggtt->uc_fw;
 
-	GEM_BUG_ON(!node->allocated);
+	GEM_BUG_ON(!drm_mm_node_allocated(node));
 	GEM_BUG_ON(upper_32_bits(node->start));
 	GEM_BUG_ON(upper_32_bits(node->start + node->size - 1));
 
 	return lower_32_bits(node->start);
 }
 
-static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw,
-				  struct intel_gt *gt)
+static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
 {
 	struct drm_i915_gem_object *obj = uc_fw->obj;
-	struct i915_ggtt *ggtt = gt->ggtt;
+	struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
 	struct i915_vma dummy = {
-		.node.start = uc_fw_ggtt_offset(uc_fw, ggtt),
+		.node.start = uc_fw_ggtt_offset(uc_fw),
 		.node.size = obj->base.size,
 		.pages = obj->mm.pages,
 		.vm = &ggtt->vm,
@@ -428,37 +421,36 @@ static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw,
 	ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0);
 }
 
-static void intel_uc_fw_ggtt_unbind(struct intel_uc_fw *uc_fw,
-				    struct intel_gt *gt)
+static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
 {
 	struct drm_i915_gem_object *obj = uc_fw->obj;
-	struct i915_ggtt *ggtt = gt->ggtt;
-	u64 start = uc_fw_ggtt_offset(uc_fw, ggtt);
+	struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
+	u64 start = uc_fw_ggtt_offset(uc_fw);
 
 	ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size);
 }
 
-static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
-		      u32 wopcm_offset, u32 dma_flags)
+static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
 {
+	struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
 	struct intel_uncore *uncore = gt->uncore;
 	u64 offset;
 	int ret;
 
-	ret = i915_inject_load_error(gt->i915, -ETIMEDOUT);
+	ret = i915_inject_probe_error(gt->i915, -ETIMEDOUT);
 	if (ret)
 		return ret;
 
 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
 	/* Set the source address for the uCode */
-	offset = uc_fw_ggtt_offset(uc_fw, gt->ggtt);
+	offset = uc_fw_ggtt_offset(uc_fw);
 	GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000);
 	intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset));
 	intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset));
 
 	/* Set the DMA destination */
-	intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, wopcm_offset);
+	intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, dst_offset);
 	intel_uncore_write_fw(uncore, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
 
 	/*
@@ -490,23 +482,22 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
 /**
  * intel_uc_fw_upload - load uC firmware using custom loader
  * @uc_fw: uC firmware
- * @gt: the intel_gt structure
- * @wopcm_offset: destination offset in wopcm
+ * @dst_offset: destination offset
  * @dma_flags: flags for flags for dma ctrl
  *
  * Loads uC firmware and updates internal flags.
  *
  * Return: 0 on success, non-zero on failure.
  */
-int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
-		       u32 wopcm_offset, u32 dma_flags)
+int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
 {
+	struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
 	int err;
 
 	/* make sure the status was cleared the last time we reset the uc */
 	GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw));
 
-	err = i915_inject_load_error(gt->i915, -ENOEXEC);
+	err = i915_inject_probe_error(gt->i915, -ENOEXEC);
 	if (err)
 		return err;
 
@@ -514,9 +505,9 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
 		return -ENOEXEC;
 
 	/* Call custom loader */
-	intel_uc_fw_ggtt_bind(uc_fw, gt);
-	err = uc_fw_xfer(uc_fw, gt, wopcm_offset, dma_flags);
-	intel_uc_fw_ggtt_unbind(uc_fw, gt);
+	uc_fw_bind_ggtt(uc_fw);
+	err = uc_fw_xfer(uc_fw, dst_offset, dma_flags);
+	uc_fw_unbind_ggtt(uc_fw);
 	if (err)
 		goto fail;
 
@@ -553,10 +544,7 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
 
 void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
 {
-	if (!intel_uc_fw_is_available(uc_fw))
-		return;
-
-	i915_gem_object_unpin_pages(uc_fw->obj);
+	intel_uc_fw_cleanup_fetch(uc_fw);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 7a0a5989afc9..1f30543d0d2d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -229,10 +229,9 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
 void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
 			    enum intel_uc_fw_type type, bool supported,
 			    enum intel_platform platform, u8 rev);
-int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915);
+int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
 void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
-int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
-		       u32 wopcm_offset, u32 dma_flags);
+int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);
 int intel_uc_fw_init(struct intel_uc_fw *uc_fw);
 void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
 size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
index ae58e8a8c53b..029214cdedd5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
@@ -39,9 +39,6 @@
  * 3. Length info of each component can be found in header, in dwords.
  * 4. Modulus and exponent key are not required by driver. They may not appear
  *    in fw. So driver will load a truncated firmware in this case.
- *
- * The only difference between GuC and HuC firmwares is how the version
- * information is saved.
  */
 
 struct uc_css_header {
@@ -69,11 +66,9 @@ struct uc_css_header {
 	char username[8];
 	char buildnumber[12];
 	u32 sw_version;
-#define CSS_SW_VERSION_GUC_MAJOR	(0xFF << 16)
-#define CSS_SW_VERSION_GUC_MINOR	(0xFF << 8)
-#define CSS_SW_VERSION_GUC_PATCH	(0xFF << 0)
-#define CSS_SW_VERSION_HUC_MAJOR	(0xFFFF << 16)
-#define CSS_SW_VERSION_HUC_MINOR	(0xFFFF << 0)
+#define CSS_SW_VERSION_UC_MAJOR		(0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR		(0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH		(0xFF << 0)
 	u32 reserved[14];
 	u32 header_info;
 } __packed;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
deleted file mode 100644
index bba0eafe1cdb..000000000000
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ /dev/null
@@ -1,317 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2017 Intel Corporation
- */
-
-#include "i915_selftest.h"
-#include "gem/i915_gem_pm.h"
-
-/* max doorbell number + negative test for each client type */
-#define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM)
-
-static struct intel_guc_client *clients[ATTEMPTS];
-
-static bool available_dbs(struct intel_guc *guc, u32 priority)
-{
-	unsigned long offset;
-	unsigned long end;
-	u16 id;
-
-	/* first half is used for normal priority, second half for high */
-	offset = 0;
-	end = GUC_NUM_DOORBELLS / 2;
-	if (priority <= GUC_CLIENT_PRIORITY_HIGH) {
-		offset = end;
-		end += offset;
-	}
-
-	id = find_next_zero_bit(guc->doorbell_bitmap, end, offset);
-	if (id < end)
-		return true;
-
-	return false;
-}
-
-static int check_all_doorbells(struct intel_guc *guc)
-{
-	u16 db_id;
-
-	pr_info_once("Max number of doorbells: %d", GUC_NUM_DOORBELLS);
-	for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) {
-		if (!doorbell_ok(guc, db_id)) {
-			pr_err("doorbell %d, not ok\n", db_id);
-			return -EIO;
-		}
-	}
-
-	return 0;
-}
-
-static int ring_doorbell_nop(struct intel_guc_client *client)
-{
-	struct guc_process_desc *desc = __get_process_desc(client);
-	int err;
-
-	client->use_nop_wqi = true;
-
-	spin_lock_irq(&client->wq_lock);
-
-	guc_wq_item_append(client, 0, 0, 0, 0);
-	guc_ring_doorbell(client);
-
-	spin_unlock_irq(&client->wq_lock);
-
-	client->use_nop_wqi = false;
-
-	/* if there are no issues GuC will update the WQ head and keep the
-	 * WQ in active status
-	 */
-	err = wait_for(READ_ONCE(desc->head) == READ_ONCE(desc->tail), 10);
-	if (err) {
-		pr_err("doorbell %u ring failed!\n", client->doorbell_id);
-		return -EIO;
-	}
-
-	if (desc->wq_status != WQ_STATUS_ACTIVE) {
-		pr_err("doorbell %u ring put WQ in bad state (%u)!\n",
-		       client->doorbell_id, desc->wq_status);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-/*
- * Basic client sanity check, handy to validate create_clients.
- */
-static int validate_client(struct intel_guc_client *client, int client_priority)
-{
-	if (client->priority != client_priority ||
-	    client->doorbell_id == GUC_DOORBELL_INVALID)
-		return -EINVAL;
-	else
-		return 0;
-}
-
-static bool client_doorbell_in_sync(struct intel_guc_client *client)
-{
-	return !client || doorbell_ok(client->guc, client->doorbell_id);
-}
-
-/*
- * Check that we're able to synchronize guc_clients with their doorbells
- *
- * We're creating clients and reserving doorbells once, at module load. During
- * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to
- * GuC being reset. In other words - GuC clients are still around, but the
- * status of their doorbells may be incorrect. This is the reason behind
- * validating that the doorbells status expected by the driver matches what the
- * GuC/HW have.
- */
-static int igt_guc_clients(void *args)
-{
-	struct drm_i915_private *dev_priv = args;
-	intel_wakeref_t wakeref;
-	struct intel_guc *guc;
-	int err = 0;
-
-	GEM_BUG_ON(!HAS_GT_UC(dev_priv));
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
-
-	guc = &dev_priv->gt.uc.guc;
-	if (!guc) {
-		pr_err("No guc object!\n");
-		err = -EINVAL;
-		goto unlock;
-	}
-
-	err = check_all_doorbells(guc);
-	if (err)
-		goto unlock;
-
-	/*
-	 * Get rid of clients created during driver load because the test will
-	 * recreate them.
-	 */
-	guc_clients_disable(guc);
-	guc_clients_destroy(guc);
-	if (guc->execbuf_client) {
-		pr_err("guc_clients_destroy lied!\n");
-		err = -EINVAL;
-		goto unlock;
-	}
-
-	err = guc_clients_create(guc);
-	if (err) {
-		pr_err("Failed to create clients\n");
-		goto unlock;
-	}
-	GEM_BUG_ON(!guc->execbuf_client);
-
-	err = validate_client(guc->execbuf_client,
-			      GUC_CLIENT_PRIORITY_KMD_NORMAL);
-	if (err) {
-		pr_err("execbug client validation failed\n");
-		goto out;
-	}
-
-	/* the client should now have reserved a doorbell */
-	if (!has_doorbell(guc->execbuf_client)) {
-		pr_err("guc_clients_create didn't reserve doorbells\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	/* Now enable the clients */
-	guc_clients_enable(guc);
-
-	/* each client should now have received a doorbell */
-	if (!client_doorbell_in_sync(guc->execbuf_client)) {
-		pr_err("failed to initialize the doorbells\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	/*
-	 * Basic test - an attempt to reallocate a valid doorbell to the
-	 * client it is currently assigned should not cause a failure.
-	 */
-	err = create_doorbell(guc->execbuf_client);
-
-out:
-	/*
-	 * Leave clean state for other test, plus the driver always destroy the
-	 * clients during unload.
-	 */
-	guc_clients_disable(guc);
-	guc_clients_destroy(guc);
-	guc_clients_create(guc);
-	guc_clients_enable(guc);
-unlock:
-	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-	return err;
-}
-
-/*
- * Create as many clients as number of doorbells. Note that there's already
- * client(s)/doorbell(s) created during driver load, but this test creates
- * its own and do not interact with the existing ones.
- */
-static int igt_guc_doorbells(void *arg)
-{
-	struct drm_i915_private *dev_priv = arg;
-	intel_wakeref_t wakeref;
-	struct intel_guc *guc;
-	int i, err = 0;
-	u16 db_id;
-
-	GEM_BUG_ON(!HAS_GT_UC(dev_priv));
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
-
-	guc = &dev_priv->gt.uc.guc;
-	if (!guc) {
-		pr_err("No guc object!\n");
-		err = -EINVAL;
-		goto unlock;
-	}
-
-	err = check_all_doorbells(guc);
-	if (err)
-		goto unlock;
-
-	for (i = 0; i < ATTEMPTS; i++) {
-		clients[i] = guc_client_alloc(guc, i % GUC_CLIENT_PRIORITY_NUM);
-
-		if (!clients[i]) {
-			pr_err("[%d] No guc client\n", i);
-			err = -EINVAL;
-			goto out;
-		}
-
-		if (IS_ERR(clients[i])) {
-			if (PTR_ERR(clients[i]) != -ENOSPC) {
-				pr_err("[%d] unexpected error\n", i);
-				err = PTR_ERR(clients[i]);
-				goto out;
-			}
-
-			if (available_dbs(guc, i % GUC_CLIENT_PRIORITY_NUM)) {
-				pr_err("[%d] non-db related alloc fail\n", i);
-				err = -EINVAL;
-				goto out;
-			}
-
-			/* expected, ran out of dbs for this client type */
-			continue;
-		}
-
-		/*
-		 * The check below is only valid because we keep a doorbell
-		 * assigned during the whole life of the client.
-		 */
-		if (clients[i]->stage_id >= GUC_NUM_DOORBELLS) {
-			pr_err("[%d] more clients than doorbells (%d >= %d)\n",
-			       i, clients[i]->stage_id, GUC_NUM_DOORBELLS);
-			err = -EINVAL;
-			goto out;
-		}
-
-		err = validate_client(clients[i], i % GUC_CLIENT_PRIORITY_NUM);
-		if (err) {
-			pr_err("[%d] client_alloc sanity check failed!\n", i);
-			err = -EINVAL;
-			goto out;
-		}
-
-		db_id = clients[i]->doorbell_id;
-
-		err = __guc_client_enable(clients[i]);
-		if (err) {
-			pr_err("[%d] Failed to create a doorbell\n", i);
-			goto out;
-		}
-
-		/* doorbell id shouldn't change, we are holding the mutex */
-		if (db_id != clients[i]->doorbell_id) {
-			pr_err("[%d] doorbell id changed (%d != %d)\n",
-			       i, db_id, clients[i]->doorbell_id);
-			err = -EINVAL;
-			goto out;
-		}
-
-		err = check_all_doorbells(guc);
-		if (err)
-			goto out;
-
-		err = ring_doorbell_nop(clients[i]);
-		if (err)
-			goto out;
-	}
-
-out:
-	for (i = 0; i < ATTEMPTS; i++)
-		if (!IS_ERR_OR_NULL(clients[i])) {
-			__guc_client_disable(clients[i]);
-			guc_client_free(clients[i]);
-		}
-unlock:
-	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-	return err;
-}
-
-int intel_guc_live_selftest(struct drm_i915_private *dev_priv)
-{
-	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_guc_clients),
-		SUBTEST(igt_guc_doorbells),
-	};
-
-	if (!USES_GUC_SUBMISSION(dev_priv))
-		return 0;
-
-	return i915_subtests(tests, dev_priv);
-}
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 5ff2437b2998..771420453f82 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -61,14 +61,14 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
 		flags = PIN_MAPPABLE;
 	}
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	mutex_lock(&dev_priv->ggtt.vm.mutex);
 	mmio_hw_access_pre(dev_priv);
 	ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node,
 				  size, I915_GTT_PAGE_SIZE,
 				  I915_COLOR_UNEVICTABLE,
 				  start, end, flags);
 	mmio_hw_access_post(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&dev_priv->ggtt.vm.mutex);
 	if (ret)
 		gvt_err("fail to alloc %s gm space from host\n",
 			high_gm ? "high" : "low");
@@ -98,9 +98,9 @@ static int alloc_vgpu_gm(struct intel_vgpu *vgpu)
 
 	return 0;
 out_free_aperture:
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	mutex_lock(&dev_priv->ggtt.vm.mutex);
 	drm_mm_remove_node(&vgpu->gm.low_gm_node);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&dev_priv->ggtt.vm.mutex);
 	return ret;
 }
 
@@ -108,10 +108,10 @@ static void free_vgpu_gm(struct intel_vgpu *vgpu)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	mutex_lock(&dev_priv->ggtt.vm.mutex);
 	drm_mm_remove_node(&vgpu->gm.low_gm_node);
 	drm_mm_remove_node(&vgpu->gm.high_gm_node);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&dev_priv->ggtt.vm.mutex);
 }
 
 /**
@@ -198,7 +198,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
 	mutex_lock(&dev_priv->ggtt.vm.mutex);
 
 	for (i = 0; i < vgpu_fence_sz(vgpu); i++) {
-		reg = i915_reserve_fence(dev_priv);
+		reg = i915_reserve_fence(&dev_priv->ggtt);
 		if (IS_ERR(reg))
 			goto out_free_fence;
 
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index e753b1e706e2..21a176cd8acc 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -35,7 +35,9 @@
  */
 
 #include <linux/slab.h>
+
 #include "i915_drv.h"
+#include "gt/intel_ring.h"
 #include "gvt.h"
 #include "i915_pvinfo.h"
 #include "trace.h"
@@ -1597,9 +1599,9 @@ static int cmd_handler_mi_op_2f(struct parser_exec_state *s)
 	if (!(cmd_val(s, 0) & (1 << 22)))
 		return ret;
 
-	/* check if QWORD */
-	if (DWORD_FIELD(0, 20, 19) == 1)
-		valid_len += 8;
+	/* check inline data */
+	if (cmd_val(s, 0) & BIT(18))
+		valid_len = CMD_LEN(9);
 	ret = gvt_check_valid_cmd_length(cmd_length(s),
 			valid_len);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.h b/drivers/gpu/drm/i915/gvt/cmd_parser.h
index 286703643002..ab25d151932a 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.h
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.h
@@ -38,6 +38,10 @@
 
 #define GVT_CMD_HASH_BITS 7
 
+struct intel_gvt;
+struct intel_shadow_wa_ctx;
+struct intel_vgpu_workload;
+
 void intel_gvt_clean_cmd_parser(struct intel_gvt *gvt);
 
 int intel_gvt_init_cmd_parser(struct intel_gvt *gvt);
diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h
index a87f33e6a23c..b59b34046e1e 100644
--- a/drivers/gpu/drm/i915/gvt/display.h
+++ b/drivers/gpu/drm/i915/gvt/display.h
@@ -35,6 +35,11 @@
 #ifndef _GVT_DISPLAY_H_
 #define _GVT_DISPLAY_H_
 
+#include <linux/types.h>
+
+struct intel_gvt;
+struct intel_vgpu;
+
 #define SBI_REG_MAX	20
 #define DPCD_SIZE	0x700
 
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 4bfaefdf548d..2477a1e5a166 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -36,13 +36,32 @@
 
 #define GEN8_DECODE_PTE(pte) (pte & GENMASK_ULL(63, 12))
 
+static int vgpu_pin_dma_address(struct intel_vgpu *vgpu,
+				unsigned long size,
+				dma_addr_t dma_addr)
+{
+	int ret = 0;
+
+	if (intel_gvt_hypervisor_dma_pin_guest_page(vgpu, dma_addr))
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static void vgpu_unpin_dma_address(struct intel_vgpu *vgpu,
+				   dma_addr_t dma_addr)
+{
+	intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, dma_addr);
+}
+
 static int vgpu_gem_get_pages(
 		struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct intel_vgpu *vgpu;
 	struct sg_table *st;
 	struct scatterlist *sg;
-	int i, ret;
+	int i, j, ret;
 	gen8_pte_t __iomem *gtt_entries;
 	struct intel_vgpu_fb_info *fb_info;
 	u32 page_num;
@@ -51,6 +70,10 @@ static int vgpu_gem_get_pages(
 	if (WARN_ON(!fb_info))
 		return -ENODEV;
 
+	vgpu = fb_info->obj->vgpu;
+	if (WARN_ON(!vgpu))
+		return -ENODEV;
+
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (unlikely(!st))
 		return -ENOMEM;
@@ -64,21 +87,53 @@ static int vgpu_gem_get_pages(
 	gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
 		(fb_info->start >> PAGE_SHIFT);
 	for_each_sg(st->sgl, sg, page_num, i) {
+		dma_addr_t dma_addr =
+			GEN8_DECODE_PTE(readq(&gtt_entries[i]));
+		if (vgpu_pin_dma_address(vgpu, PAGE_SIZE, dma_addr)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
 		sg->offset = 0;
 		sg->length = PAGE_SIZE;
-		sg_dma_address(sg) =
-			GEN8_DECODE_PTE(readq(&gtt_entries[i]));
 		sg_dma_len(sg) = PAGE_SIZE;
+		sg_dma_address(sg) = dma_addr;
 	}
 
 	__i915_gem_object_set_pages(obj, st, PAGE_SIZE);
+out:
+	if (ret) {
+		dma_addr_t dma_addr;
+
+		for_each_sg(st->sgl, sg, i, j) {
+			dma_addr = sg_dma_address(sg);
+			if (dma_addr)
+				vgpu_unpin_dma_address(vgpu, dma_addr);
+		}
+		sg_free_table(st);
+		kfree(st);
+	}
+
+	return ret;
 
-	return 0;
 }
 
 static void vgpu_gem_put_pages(struct drm_i915_gem_object *obj,
 		struct sg_table *pages)
 {
+	struct scatterlist *sg;
+
+	if (obj->base.dma_buf) {
+		struct intel_vgpu_fb_info *fb_info = obj->gvt_info;
+		struct intel_vgpu_dmabuf_obj *obj = fb_info->obj;
+		struct intel_vgpu *vgpu = obj->vgpu;
+		int i;
+
+		for_each_sg(pages->sgl, sg, fb_info->size, i)
+			vgpu_unpin_dma_address(vgpu,
+					       sg_dma_address(sg));
+	}
+
 	sg_free_table(pages);
 	kfree(pages);
 }
@@ -152,6 +207,7 @@ static const struct drm_i915_gem_object_ops intel_vgpu_gem_ops = {
 static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
 		struct intel_vgpu_fb_info *info)
 {
+	static struct lock_class_key lock_class;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_object *obj;
 
@@ -161,7 +217,8 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
 
 	drm_gem_private_object_init(dev, &obj->base,
 		roundup(info->size, PAGE_SIZE));
-	i915_gem_object_init(obj, &intel_vgpu_gem_ops);
+	i915_gem_object_init(obj, &intel_vgpu_gem_ops, &lock_class);
+	i915_gem_object_set_readonly(obj);
 
 	obj->read_domains = I915_GEM_DOMAIN_GTT;
 	obj->write_domain = 0;
diff --git a/drivers/gpu/drm/i915/gvt/edid.h b/drivers/gpu/drm/i915/gvt/edid.h
index f6dfc8b795ec..dfe0cbc6aad8 100644
--- a/drivers/gpu/drm/i915/gvt/edid.h
+++ b/drivers/gpu/drm/i915/gvt/edid.h
@@ -35,6 +35,10 @@
 #ifndef _GVT_EDID_H_
 #define _GVT_EDID_H_
 
+#include <linux/types.h>
+
+struct intel_vgpu;
+
 #define EDID_SIZE		128
 #define EDID_ADDR		0x50 /* Linux hvm EDID addr */
 
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index f21b8fb5b37e..d6e7a1189bad 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -534,7 +534,7 @@ static void clean_execlist(struct intel_vgpu *vgpu,
 	struct intel_vgpu_submission *s = &vgpu->submission;
 	intel_engine_mask_t tmp;
 
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+	for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) {
 		kfree(s->ring_scan_buffer[engine->id]);
 		s->ring_scan_buffer[engine->id] = NULL;
 		s->ring_scan_buffer_size[engine->id] = 0;
@@ -548,7 +548,7 @@ static void reset_execlist(struct intel_vgpu *vgpu,
 	struct intel_engine_cs *engine;
 	intel_engine_mask_t tmp;
 
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
+	for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp)
 		init_vgpu_execlist(vgpu, engine->id);
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/execlist.h b/drivers/gpu/drm/i915/gvt/execlist.h
index 5ccc2c695848..5c0c1fd30c83 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.h
+++ b/drivers/gpu/drm/i915/gvt/execlist.h
@@ -35,6 +35,8 @@
 #ifndef _GVT_EXECLIST_H_
 #define _GVT_EXECLIST_H_
 
+#include <linux/types.h>
+
 struct execlist_ctx_descriptor_format {
 	union {
 		u32 ldw;
diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.h b/drivers/gpu/drm/i915/gvt/fb_decoder.h
index 60c155085029..67b6ede9e707 100644
--- a/drivers/gpu/drm/i915/gvt/fb_decoder.h
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.h
@@ -36,6 +36,8 @@
 #ifndef _GVT_FB_DECODER_H_
 #define _GVT_FB_DECODER_H_
 
+#include <linux/types.h>
+
 #define _PLANE_CTL_FORMAT_SHIFT		24
 #define _PLANE_CTL_TILED_SHIFT		10
 #define _PIPE_V_SRCSZ_SHIFT		0
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 4b04af569c05..34cb404ba4b7 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1282,7 +1282,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
 		return -EINVAL;
 	default:
 		GEM_BUG_ON(1);
-	};
+	}
 
 	/* direct shadow */
 	ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, page_size,
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 25f78196b964..6d28d72e6c7e 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -341,6 +341,10 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 			gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id);
 			engine_mask |= BIT(VCS1);
 		}
+		if (data & GEN9_GRDOM_GUC) {
+			gvt_dbg_mmio("vgpu%d: request GUC Reset\n", vgpu->id);
+			vgpu_vreg_t(vgpu, GUC_STATUS) |= GS_MIA_IN_RESET;
+		}
 		engine_mask &= INTEL_INFO(vgpu->gvt->dev_priv)->engine_mask;
 	}
 
@@ -460,6 +464,7 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 static i915_reg_t force_nonpriv_white_list[] = {
 	GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec)
 	GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248)
+	PS_INVOCATION_COUNT,//_MMIO(0x2348)
 	GEN8_CS_CHICKEN1,//_MMIO(0x2580)
 	_MMIO(0x2690),
 	_MMIO(0x2694),
@@ -508,7 +513,7 @@ static inline bool in_whitelist(unsigned int reg)
 static int force_nonpriv_write(struct intel_vgpu *vgpu,
 	unsigned int offset, void *p_data, unsigned int bytes)
 {
-	u32 reg_nonpriv = *(u32 *)p_data;
+	u32 reg_nonpriv = (*(u32 *)p_data) & REG_GENMASK(25, 2);
 	int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset);
 	u32 ring_base;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
@@ -528,7 +533,7 @@ static int force_nonpriv_write(struct intel_vgpu *vgpu,
 			bytes);
 	} else
 		gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n",
-			vgpu->id, reg_nonpriv, offset);
+			vgpu->id, *(u32 *)p_data, offset);
 
 	return 0;
 }
@@ -819,13 +824,16 @@ static int trigger_aux_channel_interrupt(struct intel_vgpu *vgpu,
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	enum intel_gvt_event_type event;
 
-	if (reg == _DPA_AUX_CH_CTL)
+	if (reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_A)))
 		event = AUX_CHANNEL_A;
-	else if (reg == _PCH_DPB_AUX_CH_CTL || reg == _DPB_AUX_CH_CTL)
+	else if (reg == _PCH_DPB_AUX_CH_CTL ||
+		 reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_B)))
 		event = AUX_CHANNEL_B;
-	else if (reg == _PCH_DPC_AUX_CH_CTL || reg == _DPC_AUX_CH_CTL)
+	else if (reg == _PCH_DPC_AUX_CH_CTL ||
+		 reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_C)))
 		event = AUX_CHANNEL_C;
-	else if (reg == _PCH_DPD_AUX_CH_CTL || reg == _DPD_AUX_CH_CTL)
+	else if (reg == _PCH_DPD_AUX_CH_CTL ||
+		 reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_D)))
 		event = AUX_CHANNEL_D;
 	else {
 		WARN_ON(true);
@@ -1632,6 +1640,16 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
 	return 0;
 }
 
+static int guc_status_read(struct intel_vgpu *vgpu,
+			   unsigned int offset, void *p_data,
+			   unsigned int bytes)
+{
+	/* keep MIA_IN_RESET before clearing */
+	read_vreg(vgpu, offset, p_data, bytes);
+	vgpu_vreg(vgpu, offset) &= ~GS_MIA_IN_RESET;
+	return 0;
+}
+
 static int mmio_read_from_hw(struct intel_vgpu *vgpu,
 		unsigned int offset, void *p_data, unsigned int bytes)
 {
@@ -2668,10 +2686,12 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_DH(EDP_PSR_IMR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
 	MMIO_DH(EDP_PSR_IIR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
+	MMIO_DH(GUC_STATUS, D_ALL, guc_status_read, NULL);
+
 	return 0;
 }
 
-static int init_broadwell_mmio_info(struct intel_gvt *gvt)
+static int init_bdw_mmio_info(struct intel_gvt *gvt)
 {
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
 	int ret;
@@ -2796,7 +2816,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS);
 
 	MMIO_D(WM_MISC, D_BDW);
-	MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW);
+	MMIO_D(_MMIO(_SRD_CTL_EDP), D_BDW);
 
 	MMIO_D(_MMIO(0x6671c), D_BDW_PLUS);
 	MMIO_D(_MMIO(0x66c00), D_BDW_PLUS);
@@ -2872,11 +2892,11 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write);
 	MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL);
 
-	MMIO_F(_MMIO(_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+	MMIO_F(DP_AUX_CH_CTL(AUX_CH_B), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
 						dp_aux_ch_ctl_mmio_write);
-	MMIO_F(_MMIO(_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+	MMIO_F(DP_AUX_CH_CTL(AUX_CH_C), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
 						dp_aux_ch_ctl_mmio_write);
-	MMIO_F(_MMIO(_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+	MMIO_F(DP_AUX_CH_CTL(AUX_CH_D), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
 						dp_aux_ch_ctl_mmio_write);
 
 	MMIO_D(HSW_PWR_WELL_CTL1, D_SKL_PLUS);
@@ -3360,20 +3380,20 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt)
 		goto err;
 
 	if (IS_BROADWELL(dev_priv)) {
-		ret = init_broadwell_mmio_info(gvt);
+		ret = init_bdw_mmio_info(gvt);
 		if (ret)
 			goto err;
 	} else if (IS_SKYLAKE(dev_priv)
 		|| IS_KABYLAKE(dev_priv)
 		|| IS_COFFEELAKE(dev_priv)) {
-		ret = init_broadwell_mmio_info(gvt);
+		ret = init_bdw_mmio_info(gvt);
 		if (ret)
 			goto err;
 		ret = init_skl_mmio_info(gvt);
 		if (ret)
 			goto err;
 	} else if (IS_BROXTON(dev_priv)) {
-		ret = init_broadwell_mmio_info(gvt);
+		ret = init_bdw_mmio_info(gvt);
 		if (ret)
 			goto err;
 		ret = init_skl_mmio_info(gvt);
@@ -3417,6 +3437,10 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt,
 	}
 
 	for (i = 0; i < gvt->mmio.num_mmio_block; i++, block++) {
+		/* pvinfo data doesn't come from hw mmio */
+		if (i915_mmio_reg_offset(block->offset) == VGT_PVINFO_PAGE)
+			continue;
+
 		for (j = 0; j < block->size; j += 4) {
 			ret = handler(gvt,
 				      i915_mmio_reg_offset(block->offset) + j,
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index 4862fb12778e..b17c4a1599cd 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -33,6 +33,10 @@
 #ifndef _GVT_HYPERCALL_H_
 #define _GVT_HYPERCALL_H_
 
+#include <linux/types.h>
+
+struct device;
+
 enum hypervisor_type {
 	INTEL_GVT_HYPERVISOR_XEN = 0,
 	INTEL_GVT_HYPERVISOR_KVM,
@@ -62,6 +66,8 @@ struct intel_gvt_mpt {
 				  unsigned long size, dma_addr_t *dma_addr);
 	void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr);
 
+	int (*dma_pin_guest_page)(unsigned long handle, dma_addr_t dma_addr);
+
 	int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn,
 			      unsigned long mfn, unsigned int nr, bool map);
 	int (*set_trap_area)(unsigned long handle, u64 start, u64 end,
diff --git a/drivers/gpu/drm/i915/gvt/interrupt.h b/drivers/gpu/drm/i915/gvt/interrupt.h
index 5313fb1b33e1..fcd663811d37 100644
--- a/drivers/gpu/drm/i915/gvt/interrupt.h
+++ b/drivers/gpu/drm/i915/gvt/interrupt.h
@@ -32,6 +32,8 @@
 #ifndef _GVT_INTERRUPT_H_
 #define _GVT_INTERRUPT_H_
 
+#include <linux/types.h>
+
 enum intel_gvt_event_type {
 	RCS_MI_USER_INTERRUPT = 0,
 	RCS_DEBUG,
@@ -135,6 +137,7 @@ enum intel_gvt_event_type {
 
 struct intel_gvt_irq;
 struct intel_gvt;
+struct intel_vgpu;
 
 typedef void (*gvt_event_virt_handler_t)(struct intel_gvt_irq *irq,
 	enum intel_gvt_event_type event, struct intel_vgpu *vgpu);
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 343d79c1cb7e..3259a1fa69e1 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1564,27 +1564,10 @@ vgpu_id_show(struct device *dev, struct device_attribute *attr,
 	return sprintf(buf, "\n");
 }
 
-static ssize_t
-hw_id_show(struct device *dev, struct device_attribute *attr,
-	   char *buf)
-{
-	struct mdev_device *mdev = mdev_from_dev(dev);
-
-	if (mdev) {
-		struct intel_vgpu *vgpu = (struct intel_vgpu *)
-			mdev_get_drvdata(mdev);
-		return sprintf(buf, "%u\n",
-			       vgpu->submission.shadow[0]->gem_context->hw_id);
-	}
-	return sprintf(buf, "\n");
-}
-
 static DEVICE_ATTR_RO(vgpu_id);
-static DEVICE_ATTR_RO(hw_id);
 
 static struct attribute *intel_vgpu_attrs[] = {
 	&dev_attr_vgpu_id.attr,
-	&dev_attr_hw_id.attr,
 	NULL
 };
 
@@ -1933,6 +1916,28 @@ err_unlock:
 	return ret;
 }
 
+static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr)
+{
+	struct kvmgt_guest_info *info;
+	struct gvt_dma *entry;
+	int ret = 0;
+
+	if (!handle_valid(handle))
+		return -ENODEV;
+
+	info = (struct kvmgt_guest_info *)handle;
+
+	mutex_lock(&info->vgpu->vdev.cache_lock);
+	entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr);
+	if (entry)
+		kref_get(&entry->ref);
+	else
+		ret = -ENOMEM;
+	mutex_unlock(&info->vgpu->vdev.cache_lock);
+
+	return ret;
+}
+
 static void __gvt_dma_release(struct kref *ref)
 {
 	struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
@@ -2044,6 +2049,7 @@ static struct intel_gvt_mpt kvmgt_mpt = {
 	.gfn_to_mfn = kvmgt_gfn_to_pfn,
 	.dma_map_guest_page = kvmgt_dma_map_guest_page,
 	.dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
+	.dma_pin_guest_page = kvmgt_dma_pin_guest_page,
 	.set_opregion = kvmgt_set_opregion,
 	.set_edid = kvmgt_set_edid,
 	.get_vfio_device = kvmgt_get_vfio_device,
diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h
index 5874f1cb4306..2e68f4b02c94 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.h
+++ b/drivers/gpu/drm/i915/gvt/mmio.h
@@ -36,6 +36,8 @@
 #ifndef _GVT_MMIO_H_
 #define _GVT_MMIO_H_
 
+#include <linux/types.h>
+
 struct intel_gvt;
 struct intel_vgpu;
 
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 4208e40445b1..aaf15916d29a 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -35,6 +35,7 @@
 
 #include "i915_drv.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
 #include "gvt.h"
 #include "trace.h"
 
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
index 0f9440128123..9ad224df9c68 100644
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -255,6 +255,21 @@ static inline void intel_gvt_hypervisor_dma_unmap_guest_page(
 }
 
 /**
+ * intel_gvt_hypervisor_dma_pin_guest_page - pin guest dma buf
+ * @vgpu: a vGPU
+ * @dma_addr: guest dma addr
+ *
+ * Returns:
+ * 0 on success, negative error code if failed.
+ */
+static inline int
+intel_gvt_hypervisor_dma_pin_guest_page(struct intel_vgpu *vgpu,
+					dma_addr_t dma_addr)
+{
+	return intel_gvt_host.mpt->dma_pin_guest_page(vgpu->handle, dma_addr);
+}
+
+/**
  * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN
  * @vgpu: a vGPU
  * @gfn: guest PFN
diff --git a/drivers/gpu/drm/i915/gvt/page_track.h b/drivers/gpu/drm/i915/gvt/page_track.h
index fa607a71c3c0..f6eb7135583c 100644
--- a/drivers/gpu/drm/i915/gvt/page_track.h
+++ b/drivers/gpu/drm/i915/gvt/page_track.h
@@ -25,6 +25,9 @@
 #ifndef _GVT_PAGE_TRACK_H_
 #define _GVT_PAGE_TRACK_H_
 
+#include <linux/types.h>
+
+struct intel_vgpu;
 struct intel_vgpu_page_track;
 
 typedef int (*gvt_page_track_handler_t)(
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.h b/drivers/gpu/drm/i915/gvt/sched_policy.h
index 7b59e3e88b8b..3dacdad5f529 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.h
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.h
@@ -34,6 +34,9 @@
 #ifndef __GVT_SCHED_POLICY__
 #define __GVT_SCHED_POLICY__
 
+struct intel_gvt;
+struct intel_vgpu;
+
 struct intel_gvt_sched_policy_ops {
 	int (*init)(struct intel_gvt *gvt);
 	void (*clean)(struct intel_gvt *gvt);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 6c79d16b381e..685d1e04a5ff 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -35,11 +35,12 @@
 
 #include <linux/kthread.h>
 
-#include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
+#include "i915_gem_gtt.h"
 #include "gvt.h"
 
 #define RING_CTX_OFF(x) \
@@ -58,7 +59,7 @@ static void set_context_pdp_root_pointer(
 static void update_shadow_pdps(struct intel_vgpu_workload *workload)
 {
 	struct drm_i915_gem_object *ctx_obj =
-		workload->req->hw_context->state->obj;
+		workload->req->context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 
@@ -129,7 +130,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	struct intel_gvt *gvt = vgpu->gvt;
 	int ring_id = workload->ring_id;
 	struct drm_i915_gem_object *ctx_obj =
-		workload->req->hw_context->state->obj;
+		workload->req->context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 	void *dst;
@@ -194,7 +195,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 			return -EFAULT;
 		}
 
-		page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
+		page = i915_gem_object_get_page(ctx_obj, i);
 		dst = kmap(page);
 		intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
 				I915_GTT_PAGE_SIZE);
@@ -204,9 +205,9 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	return 0;
 }
 
-static inline bool is_gvt_request(struct i915_request *req)
+static inline bool is_gvt_request(struct i915_request *rq)
 {
-	return i915_gem_context_force_single_submission(req->gem_context);
+	return intel_context_force_single_submission(rq->context);
 }
 
 static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id)
@@ -306,7 +307,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
 	u32 *cs;
 	int err;
 
-	if (IS_GEN(req->i915, 9) && is_inhibit_context(req->hw_context))
+	if (IS_GEN(req->i915, 9) && is_inhibit_context(req->context))
 		intel_vgpu_restore_inhibit_context(vgpu, req);
 
 	/*
@@ -362,10 +363,10 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 }
 
 static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
-					  struct i915_gem_context *ctx)
+					  struct intel_context *ce)
 {
 	struct intel_vgpu_mm *mm = workload->shadow_mm;
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm);
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
 	int i = 0;
 
 	if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
@@ -385,11 +386,8 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct i915_request *rq;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	if (workload->req)
 		return 0;
 
@@ -415,10 +413,9 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	int ret;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	lockdep_assert_held(&vgpu->vgpu_lock);
 
 	if (workload->shadow)
 		return 0;
@@ -529,7 +526,7 @@ static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 		container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx);
 	struct i915_request *rq = workload->req;
 	struct execlist_ring_context *shadow_ring_context =
-		(struct execlist_ring_context *)rq->hw_context->lrc_reg_state;
+		(struct execlist_ring_context *)rq->context->lrc_reg_state;
 
 	shadow_ring_context->bb_per_ctx_ptr.val =
 		(shadow_ring_context->bb_per_ctx_ptr.val &
@@ -580,8 +577,6 @@ static void update_vreg_in_ctx(struct intel_vgpu_workload *workload)
 
 static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 {
-	struct intel_vgpu *vgpu = workload->vgpu;
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_vgpu_shadow_bb *bb, *pos;
 
 	if (list_empty(&workload->shadow_bb))
@@ -590,8 +585,6 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 	bb = list_first_entry(&workload->shadow_bb,
 			struct intel_vgpu_shadow_bb, list);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-
 	list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
 		if (bb->obj) {
 			if (bb->accessing)
@@ -609,8 +602,6 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 		list_del(&bb->list);
 		kfree(bb);
 	}
-
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 }
 
 static int prepare_workload(struct intel_vgpu_workload *workload)
@@ -634,7 +625,7 @@ static int prepare_workload(struct intel_vgpu_workload *workload)
 
 	update_shadow_pdps(workload);
 
-	set_context_ppgtt_from_shadow(workload, s->shadow[ring]->gem_context);
+	set_context_ppgtt_from_shadow(workload, s->shadow[ring]);
 
 	ret = intel_vgpu_sync_oos_pages(workload->vgpu);
 	if (ret) {
@@ -685,7 +676,6 @@ err_unpin_mm:
 static int dispatch_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct i915_request *rq;
 	int ring_id = workload->ring_id;
 	int ret;
@@ -694,7 +684,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 		ring_id, workload);
 
 	mutex_lock(&vgpu->vgpu_lock);
-	mutex_lock(&dev_priv->drm.struct_mutex);
 
 	ret = intel_gvt_workload_req_alloc(workload);
 	if (ret)
@@ -729,7 +718,6 @@ out:
 err_req:
 	if (ret)
 		workload->status = ret;
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	mutex_unlock(&vgpu->vgpu_lock);
 	return ret;
 }
@@ -796,7 +784,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 	struct i915_request *rq = workload->req;
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_gvt *gvt = vgpu->gvt;
-	struct drm_i915_gem_object *ctx_obj = rq->hw_context->state->obj;
+	struct drm_i915_gem_object *ctx_obj = rq->context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 	void *src;
@@ -844,7 +832,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 			return;
 		}
 
-		page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
+		page = i915_gem_object_get_page(ctx_obj, i);
 		src = kmap(page);
 		intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
 				I915_GTT_PAGE_SIZE);
@@ -887,7 +875,7 @@ void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
 	intel_engine_mask_t tmp;
 
 	/* free the unsubmited workloads in the queues. */
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+	for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) {
 		list_for_each_entry_safe(pos, n,
 			&s->workload_q_head[engine->id], list) {
 			list_del_init(&pos->list);
@@ -1232,21 +1220,15 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 	struct drm_i915_private *i915 = vgpu->gvt->dev_priv;
 	struct intel_vgpu_submission *s = &vgpu->submission;
 	struct intel_engine_cs *engine;
-	struct i915_gem_context *ctx;
+	struct i915_ppgtt *ppgtt;
 	enum intel_engine_id i;
 	int ret;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
-	ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MAX);
-	if (IS_ERR(ctx)) {
-		ret = PTR_ERR(ctx);
-		goto out_unlock;
-	}
-
-	i915_gem_context_set_force_single_submission(ctx);
+	ppgtt = i915_ppgtt_create(&i915->gt);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
 
-	i915_context_ppgtt_root_save(s, i915_vm_to_ppgtt(ctx->vm));
+	i915_context_ppgtt_root_save(s, ppgtt);
 
 	for_each_engine(engine, i915, i) {
 		struct intel_context *ce;
@@ -1254,12 +1236,16 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		INIT_LIST_HEAD(&s->workload_q_head[i]);
 		s->shadow[i] = ERR_PTR(-EINVAL);
 
-		ce = intel_context_create(ctx, engine);
+		ce = intel_context_create(engine);
 		if (IS_ERR(ce)) {
 			ret = PTR_ERR(ce);
 			goto out_shadow_ctx;
 		}
 
+		i915_vm_put(ce->vm);
+		ce->vm = i915_vm_get(&ppgtt->vm);
+		intel_context_set_single_submission(ce);
+
 		if (!USES_GUC_SUBMISSION(i915)) { /* Max ring buffer size */
 			const unsigned int ring_size = 512 * SZ_4K;
 
@@ -1291,12 +1277,11 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 	atomic_set(&s->running_workload_num, 0);
 	bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
 
-	i915_gem_context_put(ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
+	i915_vm_put(&ppgtt->vm);
 	return 0;
 
 out_shadow_ctx:
-	i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(ctx->vm));
+	i915_context_ppgtt_root_restore(s, ppgtt);
 	for_each_engine(engine, i915, i) {
 		if (IS_ERR(s->shadow[i]))
 			break;
@@ -1304,9 +1289,7 @@ out_shadow_ctx:
 		intel_context_unpin(s->shadow[i]);
 		intel_context_put(s->shadow[i]);
 	}
-	i915_gem_context_put(ctx);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
+	i915_vm_put(&ppgtt->vm);
 	return ret;
 }
 
@@ -1597,9 +1580,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 	 */
 	if (list_empty(workload_q_head(vgpu, ring_id))) {
 		intel_runtime_pm_get(&dev_priv->runtime_pm);
-		mutex_lock(&dev_priv->drm.struct_mutex);
 		ret = intel_gvt_scan_and_shadow_workload(workload);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 		intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
 	}
 
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index d5a6e4e3d0fd..85bd9bf4f6ee 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -212,9 +212,9 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
  */
 void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu)
 {
-	mutex_lock(&vgpu->gvt->lock);
+	mutex_lock(&vgpu->vgpu_lock);
 	vgpu->active = true;
-	mutex_unlock(&vgpu->gvt->lock);
+	mutex_unlock(&vgpu->vgpu_lock);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 48e16ad93bbd..f3da5c06f331 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -6,14 +6,14 @@
 
 #include <linux/debugobjects.h>
 
+#include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_active.h"
 #include "i915_globals.h"
 
-#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
-
 /*
  * Active refs memory management
  *
@@ -27,35 +27,35 @@ static struct i915_global_active {
 } global;
 
 struct active_node {
-	struct i915_active_request base;
+	struct i915_active_fence base;
 	struct i915_active *ref;
 	struct rb_node node;
 	u64 timeline;
 };
 
 static inline struct active_node *
-node_from_active(struct i915_active_request *active)
+node_from_active(struct i915_active_fence *active)
 {
 	return container_of(active, struct active_node, base);
 }
 
 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
 
-static inline bool is_barrier(const struct i915_active_request *active)
+static inline bool is_barrier(const struct i915_active_fence *active)
 {
-	return IS_ERR(rcu_access_pointer(active->request));
+	return IS_ERR(rcu_access_pointer(active->fence));
 }
 
 static inline struct llist_node *barrier_to_ll(struct active_node *node)
 {
 	GEM_BUG_ON(!is_barrier(&node->base));
-	return (struct llist_node *)&node->base.link;
+	return (struct llist_node *)&node->base.cb.node;
 }
 
 static inline struct intel_engine_cs *
 __barrier_to_engine(struct active_node *node)
 {
-	return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev);
+	return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
 }
 
 static inline struct intel_engine_cs *
@@ -68,7 +68,7 @@ barrier_to_engine(struct active_node *node)
 static inline struct active_node *barrier_from_ll(struct llist_node *x)
 {
 	return container_of((struct list_head *)x,
-			    struct active_node, base.link);
+			    struct active_node, base.cb.node);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
@@ -92,12 +92,16 @@ static void debug_active_init(struct i915_active *ref)
 
 static void debug_active_activate(struct i915_active *ref)
 {
-	debug_object_activate(ref, &active_debug_desc);
+	lockdep_assert_held(&ref->tree_lock);
+	if (!atomic_read(&ref->count)) /* before the first inc */
+		debug_object_activate(ref, &active_debug_desc);
 }
 
 static void debug_active_deactivate(struct i915_active *ref)
 {
-	debug_object_deactivate(ref, &active_debug_desc);
+	lockdep_assert_held(&ref->tree_lock);
+	if (!atomic_read(&ref->count)) /* after the last dec */
+		debug_object_deactivate(ref, &active_debug_desc);
 }
 
 static void debug_active_fini(struct i915_active *ref)
@@ -125,31 +129,46 @@ __active_retire(struct i915_active *ref)
 {
 	struct active_node *it, *n;
 	struct rb_root root;
-	bool retire = false;
+	unsigned long flags;
 
-	lockdep_assert_held(&ref->mutex);
+	GEM_BUG_ON(i915_active_is_idle(ref));
 
 	/* return the unused nodes to our slabcache -- flushing the allocator */
-	if (atomic_dec_and_test(&ref->count)) {
-		debug_active_deactivate(ref);
-		root = ref->tree;
-		ref->tree = RB_ROOT;
-		ref->cache = NULL;
-		retire = true;
-	}
-
-	mutex_unlock(&ref->mutex);
-	if (!retire)
+	if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
 		return;
 
-	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
-		GEM_BUG_ON(i915_active_request_isset(&it->base));
-		kmem_cache_free(global.slab_cache, it);
-	}
+	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
+	debug_active_deactivate(ref);
+
+	root = ref->tree;
+	ref->tree = RB_ROOT;
+	ref->cache = NULL;
+
+	spin_unlock_irqrestore(&ref->tree_lock, flags);
 
 	/* After the final retire, the entire struct may be freed */
 	if (ref->retire)
 		ref->retire(ref);
+
+	/* ... except if you wait on it, you must manage your own references! */
+	wake_up_var(ref);
+
+	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
+		GEM_BUG_ON(i915_active_fence_isset(&it->base));
+		kmem_cache_free(global.slab_cache, it);
+	}
+}
+
+static void
+active_work(struct work_struct *wrk)
+{
+	struct i915_active *ref = container_of(wrk, typeof(*ref), work);
+
+	GEM_BUG_ON(!atomic_read(&ref->count));
+	if (atomic_add_unless(&ref->count, -1, 1))
+		return;
+
+	__active_retire(ref);
 }
 
 static void
@@ -159,18 +178,44 @@ active_retire(struct i915_active *ref)
 	if (atomic_add_unless(&ref->count, -1, 1))
 		return;
 
-	/* One active may be flushed from inside the acquire of another */
-	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
+	if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) {
+		queue_work(system_unbound_wq, &ref->work);
+		return;
+	}
+
 	__active_retire(ref);
 }
 
+static inline struct dma_fence **
+__active_fence_slot(struct i915_active_fence *active)
+{
+	return (struct dma_fence ** __force)&active->fence;
+}
+
+static inline bool
+active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct i915_active_fence *active =
+		container_of(cb, typeof(*active), cb);
+
+	return cmpxchg(__active_fence_slot(active), fence, NULL) == fence;
+}
+
 static void
-node_retire(struct i915_active_request *base, struct i915_request *rq)
+node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
-	active_retire(node_from_active(base)->ref);
+	if (active_fence_cb(fence, cb))
+		active_retire(container_of(cb, struct active_node, base.cb)->ref);
 }
 
-static struct i915_active_request *
+static void
+excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	if (active_fence_cb(fence, cb))
+		active_retire(container_of(cb, struct i915_active, excl.cb));
+}
+
+static struct i915_active_fence *
 active_instance(struct i915_active *ref, struct intel_timeline *tl)
 {
 	struct active_node *node, *prealloc;
@@ -193,7 +238,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 	if (!prealloc)
 		return NULL;
 
-	mutex_lock(&ref->mutex);
+	spin_lock_irq(&ref->tree_lock);
 	GEM_BUG_ON(i915_active_is_idle(ref));
 
 	parent = NULL;
@@ -214,7 +259,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 	}
 
 	node = prealloc;
-	i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire);
+	__i915_active_fence_init(&node->base, NULL, node_retire);
 	node->ref = ref;
 	node->timeline = idx;
 
@@ -223,29 +268,40 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 
 out:
 	ref->cache = node;
-	mutex_unlock(&ref->mutex);
+	spin_unlock_irq(&ref->tree_lock);
 
 	BUILD_BUG_ON(offsetof(typeof(*node), base));
 	return &node->base;
 }
 
-void __i915_active_init(struct drm_i915_private *i915,
-			struct i915_active *ref,
+void __i915_active_init(struct i915_active *ref,
 			int (*active)(struct i915_active *ref),
 			void (*retire)(struct i915_active *ref),
-			struct lock_class_key *key)
+			struct lock_class_key *mkey,
+			struct lock_class_key *wkey)
 {
+	unsigned long bits;
+
 	debug_active_init(ref);
 
-	ref->i915 = i915;
 	ref->flags = 0;
 	ref->active = active;
-	ref->retire = retire;
+	ref->retire = ptr_unpack_bits(retire, &bits, 2);
+	if (bits & I915_ACTIVE_MAY_SLEEP)
+		ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
+
+	spin_lock_init(&ref->tree_lock);
 	ref->tree = RB_ROOT;
 	ref->cache = NULL;
+
 	init_llist_head(&ref->preallocated_barriers);
 	atomic_set(&ref->count, 0);
-	__mutex_init(&ref->mutex, "i915_active", key);
+	__mutex_init(&ref->mutex, "i915_active", mkey);
+	__i915_active_fence_init(&ref->excl, NULL, excl_retire);
+	INIT_WORK(&ref->work, active_work);
+#if IS_ENABLED(CONFIG_LOCKDEP)
+	lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0);
+#endif
 }
 
 static bool ____active_del_barrier(struct i915_active *ref,
@@ -298,9 +354,9 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node)
 
 int i915_active_ref(struct i915_active *ref,
 		    struct intel_timeline *tl,
-		    struct i915_request *rq)
+		    struct dma_fence *fence)
 {
-	struct i915_active_request *active;
+	struct i915_active_fence *active;
 	int err;
 
 	lockdep_assert_held(&tl->mutex);
@@ -323,26 +379,37 @@ int i915_active_ref(struct i915_active *ref,
 		 * request that we want to emit on the kernel_context.
 		 */
 		__active_del_barrier(ref, node_from_active(active));
-		RCU_INIT_POINTER(active->request, NULL);
-		INIT_LIST_HEAD(&active->link);
-	} else {
-		if (!i915_active_request_isset(active))
-			atomic_inc(&ref->count);
+		RCU_INIT_POINTER(active->fence, NULL);
+		atomic_dec(&ref->count);
 	}
-	GEM_BUG_ON(!atomic_read(&ref->count));
-	__i915_active_request_set(active, rq);
+	if (!__i915_active_fence_set(active, fence))
+		atomic_inc(&ref->count);
 
 out:
 	i915_active_release(ref);
 	return err;
 }
 
+void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+{
+	/* We expect the caller to manage the exclusive timeline ordering */
+	GEM_BUG_ON(i915_active_is_idle(ref));
+
+	if (!__i915_active_fence_set(&ref->excl, f))
+		atomic_inc(&ref->count);
+}
+
+bool i915_active_acquire_if_busy(struct i915_active *ref)
+{
+	debug_active_assert(ref);
+	return atomic_add_unless(&ref->count, 1, 0);
+}
+
 int i915_active_acquire(struct i915_active *ref)
 {
 	int err;
 
-	debug_active_assert(ref);
-	if (atomic_add_unless(&ref->count, 1, 0))
+	if (i915_active_acquire_if_busy(ref))
 		return 0;
 
 	err = mutex_lock_interruptible(&ref->mutex);
@@ -352,8 +419,10 @@ int i915_active_acquire(struct i915_active *ref)
 	if (!atomic_read(&ref->count) && ref->active)
 		err = ref->active(ref);
 	if (!err) {
+		spin_lock_irq(&ref->tree_lock); /* vs __active_retire() */
 		debug_active_activate(ref);
 		atomic_inc(&ref->count);
+		spin_unlock_irq(&ref->tree_lock);
 	}
 
 	mutex_unlock(&ref->mutex);
@@ -367,109 +436,67 @@ void i915_active_release(struct i915_active *ref)
 	active_retire(ref);
 }
 
-static void __active_ungrab(struct i915_active *ref)
+static void enable_signaling(struct i915_active_fence *active)
 {
-	clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags);
-}
-
-bool i915_active_trygrab(struct i915_active *ref)
-{
-	debug_active_assert(ref);
+	struct dma_fence *fence;
 
-	if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags))
-		return false;
-
-	if (!atomic_add_unless(&ref->count, 1, 0)) {
-		__active_ungrab(ref);
-		return false;
-	}
-
-	return true;
-}
-
-void i915_active_ungrab(struct i915_active *ref)
-{
-	GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags));
+	fence = i915_active_fence_get(active);
+	if (!fence)
+		return;
 
-	active_retire(ref);
-	__active_ungrab(ref);
+	dma_fence_enable_sw_signaling(fence);
+	dma_fence_put(fence);
 }
 
 int i915_active_wait(struct i915_active *ref)
 {
 	struct active_node *it, *n;
-	int err;
+	int err = 0;
 
 	might_sleep();
-	might_lock(&ref->mutex);
 
-	if (i915_active_is_idle(ref))
+	if (!i915_active_acquire_if_busy(ref))
 		return 0;
 
-	err = mutex_lock_interruptible(&ref->mutex);
-	if (err)
-		return err;
-
-	if (!atomic_add_unless(&ref->count, 1, 0)) {
-		mutex_unlock(&ref->mutex);
-		return 0;
-	}
-
+	/* Flush lazy signals */
+	enable_signaling(&ref->excl);
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		if (is_barrier(&it->base)) { /* unconnected idle-barrier */
-			err = -EBUSY;
-			break;
-		}
+		if (is_barrier(&it->base)) /* unconnected idle barrier */
+			continue;
 
-		err = i915_active_request_retire(&it->base, BKL(ref));
-		if (err)
-			break;
+		enable_signaling(&it->base);
 	}
+	/* Any fence added after the wait begins will not be auto-signaled */
 
-	__active_retire(ref);
+	i915_active_release(ref);
 	if (err)
 		return err;
 
-	if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE))
+	if (wait_var_event_interruptible(ref, i915_active_is_idle(ref)))
 		return -EINTR;
 
-	if (!i915_active_is_idle(ref))
-		return -EBUSY;
-
+	flush_work(&ref->work);
 	return 0;
 }
 
-int i915_request_await_active_request(struct i915_request *rq,
-				      struct i915_active_request *active)
-{
-	struct i915_request *barrier =
-		i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
-
-	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
-}
-
 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 {
-	struct active_node *it, *n;
-	int err;
+	int err = 0;
 
-	if (RB_EMPTY_ROOT(&ref->tree))
-		return 0;
+	if (rcu_access_pointer(ref->excl.fence)) {
+		struct dma_fence *fence;
 
-	/* await allocates and so we need to avoid hitting the shrinker */
-	err = i915_active_acquire(ref);
-	if (err)
-		return err;
-
-	mutex_lock(&ref->mutex);
-	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		err = i915_request_await_active_request(rq, &it->base);
-		if (err)
-			break;
+		rcu_read_lock();
+		fence = dma_fence_get_rcu_safe(&ref->excl.fence);
+		rcu_read_unlock();
+		if (fence) {
+			err = i915_request_await_dma_fence(rq, fence);
+			dma_fence_put(fence);
+		}
 	}
-	mutex_unlock(&ref->mutex);
 
-	i915_active_release(ref);
+	/* In the future we may choose to await on all fences */
+
 	return err;
 }
 
@@ -477,15 +504,16 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 void i915_active_fini(struct i915_active *ref)
 {
 	debug_active_fini(ref);
-	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 	GEM_BUG_ON(atomic_read(&ref->count));
+	GEM_BUG_ON(work_pending(&ref->work));
+	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 	mutex_destroy(&ref->mutex);
 }
 #endif
 
 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
 {
-	return node->timeline == idx && !i915_active_request_isset(&node->base);
+	return node->timeline == idx && !i915_active_fence_isset(&node->base);
 }
 
 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
@@ -495,7 +523,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
 	if (RB_EMPTY_ROOT(&ref->tree))
 		return NULL;
 
-	mutex_lock(&ref->mutex);
+	spin_lock_irq(&ref->tree_lock);
 	GEM_BUG_ON(i915_active_is_idle(ref));
 
 	/*
@@ -560,7 +588,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
 			goto match;
 	}
 
-	mutex_unlock(&ref->mutex);
+	spin_unlock_irq(&ref->tree_lock);
 
 	return NULL;
 
@@ -568,7 +596,7 @@ match:
 	rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
 	if (p == &ref->cache->node)
 		ref->cache = NULL;
-	mutex_unlock(&ref->mutex);
+	spin_unlock_irq(&ref->tree_lock);
 
 	return rb_entry(p, struct active_node, node);
 }
@@ -576,12 +604,16 @@ match:
 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 					    struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *i915 = engine->i915;
 	intel_engine_mask_t tmp, mask = engine->mask;
-	struct llist_node *pos, *next;
+	struct llist_node *pos = NULL, *next;
+	struct intel_gt *gt = engine->gt;
 	int err;
 
-	GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
+	GEM_BUG_ON(i915_active_is_idle(ref));
+
+	/* Wait until the previous preallocation is completed */
+	while (!llist_empty(&ref->preallocated_barriers))
+		cond_resched();
 
 	/*
 	 * Preallocate a node for each physical engine supporting the target
@@ -589,7 +621,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 	 * We can then use the preallocated nodes in
 	 * i915_active_acquire_barrier()
 	 */
-	for_each_engine_masked(engine, i915, mask, tmp) {
+	for_each_engine_masked(engine, gt, mask, tmp) {
 		u64 idx = engine->kernel_context->timeline->fence_context;
 		struct active_node *node;
 
@@ -601,17 +633,13 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 				goto unwind;
 			}
 
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
-			node->base.lock =
-				&engine->kernel_context->timeline->mutex;
-#endif
-			RCU_INIT_POINTER(node->base.request, NULL);
-			node->base.retire = node_retire;
+			RCU_INIT_POINTER(node->base.fence, NULL);
+			node->base.cb.func = node_retire;
 			node->timeline = idx;
 			node->ref = ref;
 		}
 
-		if (!i915_active_request_isset(&node->base)) {
+		if (!i915_active_fence_isset(&node->base)) {
 			/*
 			 * Mark this as being *our* unconnected proto-node.
 			 *
@@ -621,22 +649,31 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 			 * and then we can use the rb_node and list pointers
 			 * for our tracking of the pending barrier.
 			 */
-			RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
-			node->base.link.prev = (void *)engine;
+			RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
+			node->base.cb.node.prev = (void *)engine;
 			atomic_inc(&ref->count);
 		}
+		GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
 
 		GEM_BUG_ON(barrier_to_engine(node) != engine);
-		llist_add(barrier_to_ll(node), &ref->preallocated_barriers);
+		next = barrier_to_ll(node);
+		next->next = pos;
+		if (!pos)
+			pos = next;
 		intel_engine_pm_get(engine);
 	}
 
+	GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
+	llist_add_batch(next, pos, &ref->preallocated_barriers);
+
 	return 0;
 
 unwind:
-	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
+	while (pos) {
 		struct active_node *node = barrier_from_ll(pos);
 
+		pos = pos->next;
+
 		atomic_dec(&ref->count);
 		intel_engine_pm_put(barrier_to_engine(node));
 
@@ -648,6 +685,7 @@ unwind:
 void i915_active_acquire_barrier(struct i915_active *ref)
 {
 	struct llist_node *pos, *next;
+	unsigned long flags;
 
 	GEM_BUG_ON(i915_active_is_idle(ref));
 
@@ -657,12 +695,13 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 	 * populated by i915_request_add_active_barriers() to point to the
 	 * request that will eventually release them.
 	 */
-	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
 	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
 		struct active_node *node = barrier_from_ll(pos);
 		struct intel_engine_cs *engine = barrier_to_engine(node);
 		struct rb_node **p, *parent;
 
+		spin_lock_irqsave_nested(&ref->tree_lock, flags,
+					 SINGLE_DEPTH_NESTING);
 		parent = NULL;
 		p = &ref->tree.rb_node;
 		while (*p) {
@@ -678,55 +717,128 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 		}
 		rb_link_node(&node->node, parent, p);
 		rb_insert_color(&node->node, &ref->tree);
+		spin_unlock_irqrestore(&ref->tree_lock, flags);
 
+		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
 		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
 		intel_engine_pm_put(engine);
 	}
-	mutex_unlock(&ref->mutex);
+}
+
+static struct dma_fence **ll_to_fence_slot(struct llist_node *node)
+{
+	return __active_fence_slot(&barrier_from_ll(node)->base);
 }
 
 void i915_request_add_active_barriers(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
 	struct llist_node *node, *next;
+	unsigned long flags;
 
+	GEM_BUG_ON(!intel_context_is_barrier(rq->context));
 	GEM_BUG_ON(intel_engine_is_virtual(engine));
-	GEM_BUG_ON(rq->timeline != engine->kernel_context->timeline);
+	GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
 
+	node = llist_del_all(&engine->barrier_tasks);
+	if (!node)
+		return;
 	/*
 	 * Attach the list of proto-fences to the in-flight request such
 	 * that the parent i915_active will be released when this request
 	 * is retired.
 	 */
-	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
-		RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq);
-		smp_wmb(); /* serialise with reuse_idle_barrier */
-		list_add_tail((struct list_head *)node, &rq->active_list);
+	spin_lock_irqsave(&rq->lock, flags);
+	llist_for_each_safe(node, next, node) {
+		/* serialise with reuse_idle_barrier */
+		smp_store_mb(*ll_to_fence_slot(node), &rq->fence);
+		list_add_tail((struct list_head *)node, &rq->fence.cb_list);
 	}
+	spin_unlock_irqrestore(&rq->lock, flags);
 }
 
-int i915_active_request_set(struct i915_active_request *active,
-			    struct i915_request *rq)
+/*
+ * __i915_active_fence_set: Update the last active fence along its timeline
+ * @active: the active tracker
+ * @fence: the new fence (under construction)
+ *
+ * Records the new @fence as the last active fence along its timeline in
+ * this active tracker, moving the tracking callbacks from the previous
+ * fence onto this one. Returns the previous fence (if not already completed),
+ * which the caller must ensure is executed before the new fence. To ensure
+ * that the order of fences within the timeline of the i915_active_fence is
+ * understood, it should be locked by the caller.
+ */
+struct dma_fence *
+__i915_active_fence_set(struct i915_active_fence *active,
+			struct dma_fence *fence)
 {
-	int err;
+	struct dma_fence *prev;
+	unsigned long flags;
 
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
-	lockdep_assert_held(active->lock);
-#endif
+	if (fence == rcu_access_pointer(active->fence))
+		return fence;
 
-	/* Must maintain ordering wrt previous active requests */
-	err = i915_request_await_active_request(rq, active);
-	if (err)
-		return err;
+	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
 
-	__i915_active_request_set(active, rq);
-	return 0;
+	/*
+	 * Consider that we have two threads arriving (A and B), with
+	 * C already resident as the active->fence.
+	 *
+	 * A does the xchg first, and so it sees C or NULL depending
+	 * on the timing of the interrupt handler. If it is NULL, the
+	 * previous fence must have been signaled and we know that
+	 * we are first on the timeline. If it is still present,
+	 * we acquire the lock on that fence and serialise with the interrupt
+	 * handler, in the process removing it from any future interrupt
+	 * callback. A will then wait on C before executing (if present).
+	 *
+	 * As B is second, it sees A as the previous fence and so waits for
+	 * it to complete its transition and takes over the occupancy for
+	 * itself -- remembering that it needs to wait on A before executing.
+	 *
+	 * Note the strong ordering of the timeline also provides consistent
+	 * nesting rules for the fence->lock; the inner lock is always the
+	 * older lock.
+	 */
+	spin_lock_irqsave(fence->lock, flags);
+	prev = xchg(__active_fence_slot(active), fence);
+	if (prev) {
+		GEM_BUG_ON(prev == fence);
+		spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+		__list_del_entry(&active->cb.node);
+		spin_unlock(prev->lock); /* serialise with prev->cb_list */
+	}
+	GEM_BUG_ON(rcu_access_pointer(active->fence) != fence);
+	list_add_tail(&active->cb.node, &fence->cb_list);
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	return prev;
+}
+
+int i915_active_fence_set(struct i915_active_fence *active,
+			  struct i915_request *rq)
+{
+	struct dma_fence *fence;
+	int err = 0;
+
+	/* Must maintain timeline ordering wrt previous active requests */
+	rcu_read_lock();
+	fence = __i915_active_fence_set(active, &rq->fence);
+	if (fence) /* but the previous fence may not belong to that timeline! */
+		fence = dma_fence_get_rcu(fence);
+	rcu_read_unlock();
+	if (fence) {
+		err = i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	return err;
 }
 
-void i915_active_retire_noop(struct i915_active_request *active,
-			     struct i915_request *request)
+void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
-	/* Space left intentionally blank */
+	active_fence_cb(fence, cb);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index f95058f99057..b571f675c795 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -12,6 +12,10 @@
 #include "i915_active_types.h"
 #include "i915_request.h"
 
+struct i915_request;
+struct intel_engine_cs;
+struct intel_timeline;
+
 /*
  * We treat requests as fences. This is not be to confused with our
  * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
@@ -28,308 +32,95 @@
  * write access so that we can perform concurrent read operations between
  * the CPU and GPU engines, as well as waiting for all rendering to
  * complete, or waiting for the last GPU user of a "fence register". The
- * object then embeds a #i915_active_request to track the most recent (in
+ * object then embeds a #i915_active_fence to track the most recent (in
  * retirement order) request relevant for the desired mode of access.
- * The #i915_active_request is updated with i915_active_request_set() to
+ * The #i915_active_fence is updated with i915_active_fence_set() to
  * track the most recent fence request, typically this is done as part of
  * i915_vma_move_to_active().
  *
- * When the #i915_active_request completes (is retired), it will
+ * When the #i915_active_fence completes (is retired), it will
  * signal its completion to the owner through a callback as well as mark
- * itself as idle (i915_active_request.request == NULL). The owner
+ * itself as idle (i915_active_fence.request == NULL). The owner
  * can then perform any action, such as delayed freeing of an active
  * resource including itself.
  */
 
-void i915_active_retire_noop(struct i915_active_request *active,
-			     struct i915_request *request);
+void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb);
 
 /**
- * i915_active_request_init - prepares the activity tracker for use
+ * __i915_active_fence_init - prepares the activity tracker for use
  * @active - the active tracker
- * @rq - initial request to track, can be NULL
+ * @fence - initial fence to track, can be NULL
  * @func - a callback when then the tracker is retired (becomes idle),
  *         can be NULL
  *
- * i915_active_request_init() prepares the embedded @active struct for use as
- * an activity tracker, that is for tracking the last known active request
- * associated with it. When the last request becomes idle, when it is retired
+ * i915_active_fence_init() prepares the embedded @active struct for use as
+ * an activity tracker, that is for tracking the last known active fence
+ * associated with it. When the last fence becomes idle, when it is retired
  * after completion, the optional callback @func is invoked.
  */
 static inline void
-i915_active_request_init(struct i915_active_request *active,
-			 struct mutex *lock,
-			 struct i915_request *rq,
-			 i915_active_retire_fn retire)
-{
-	RCU_INIT_POINTER(active->request, rq);
-	INIT_LIST_HEAD(&active->link);
-	active->retire = retire ?: i915_active_retire_noop;
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
-	active->lock = lock;
-#endif
-}
-
-#define INIT_ACTIVE_REQUEST(name, lock) \
-	i915_active_request_init((name), (lock), NULL, NULL)
-
-/**
- * i915_active_request_set - updates the tracker to watch the current request
- * @active - the active tracker
- * @request - the request to watch
- *
- * __i915_active_request_set() watches the given @request for completion. Whilst
- * that @request is busy, the @active reports busy. When that @request is
- * retired, the @active tracker is updated to report idle.
- */
-static inline void
-__i915_active_request_set(struct i915_active_request *active,
-			  struct i915_request *request)
-{
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
-	lockdep_assert_held(active->lock);
-#endif
-	list_move(&active->link, &request->active_list);
-	rcu_assign_pointer(active->request, request);
-}
-
-int __must_check
-i915_active_request_set(struct i915_active_request *active,
-			struct i915_request *rq);
-
-/**
- * i915_active_request_raw - return the active request
- * @active - the active tracker
- *
- * i915_active_request_raw() returns the current request being tracked, or NULL.
- * It does not obtain a reference on the request for the caller, so the caller
- * must hold struct_mutex.
- */
-static inline struct i915_request *
-i915_active_request_raw(const struct i915_active_request *active,
-			struct mutex *mutex)
+__i915_active_fence_init(struct i915_active_fence *active,
+			 void *fence,
+			 dma_fence_func_t fn)
 {
-	return rcu_dereference_protected(active->request,
-					 lockdep_is_held(mutex));
+	RCU_INIT_POINTER(active->fence, fence);
+	active->cb.func = fn ?: i915_active_noop;
 }
 
-/**
- * i915_active_request_peek - report the active request being monitored
- * @active - the active tracker
- *
- * i915_active_request_peek() returns the current request being tracked if
- * still active, or NULL. It does not obtain a reference on the request
- * for the caller, so the caller must hold struct_mutex.
- */
-static inline struct i915_request *
-i915_active_request_peek(const struct i915_active_request *active,
-			 struct mutex *mutex)
-{
-	struct i915_request *request;
+#define INIT_ACTIVE_FENCE(A) \
+	__i915_active_fence_init((A), NULL, NULL)
 
-	request = i915_active_request_raw(active, mutex);
-	if (!request || i915_request_completed(request))
-		return NULL;
-
-	return request;
-}
+struct dma_fence *
+__i915_active_fence_set(struct i915_active_fence *active,
+			struct dma_fence *fence);
 
 /**
- * i915_active_request_get - return a reference to the active request
+ * i915_active_fence_set - updates the tracker to watch the current fence
  * @active - the active tracker
+ * @rq - the request to watch
  *
- * i915_active_request_get() returns a reference to the active request, or NULL
- * if the active tracker is idle. The caller must hold struct_mutex.
+ * i915_active_fence_set() watches the given @rq for completion. While
+ * that @rq is busy, the @active reports busy. When that @rq is signaled
+ * (or else retired) the @active tracker is updated to report idle.
  */
-static inline struct i915_request *
-i915_active_request_get(const struct i915_active_request *active,
-			struct mutex *mutex)
-{
-	return i915_request_get(i915_active_request_peek(active, mutex));
-}
-
-/**
- * __i915_active_request_get_rcu - return a reference to the active request
- * @active - the active tracker
- *
- * __i915_active_request_get() returns a reference to the active request,
- * or NULL if the active tracker is idle. The caller must hold the RCU read
- * lock, but the returned pointer is safe to use outside of RCU.
- */
-static inline struct i915_request *
-__i915_active_request_get_rcu(const struct i915_active_request *active)
-{
-	/*
-	 * Performing a lockless retrieval of the active request is super
-	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
-	 * slab of request objects will not be freed whilst we hold the
-	 * RCU read lock. It does not guarantee that the request itself
-	 * will not be freed and then *reused*. Viz,
-	 *
-	 * Thread A			Thread B
-	 *
-	 * rq = active.request
-	 *				retire(rq) -> free(rq);
-	 *				(rq is now first on the slab freelist)
-	 *				active.request = NULL
-	 *
-	 *				rq = new submission on a new object
-	 * ref(rq)
-	 *
-	 * To prevent the request from being reused whilst the caller
-	 * uses it, we take a reference like normal. Whilst acquiring
-	 * the reference we check that it is not in a destroyed state
-	 * (refcnt == 0). That prevents the request being reallocated
-	 * whilst the caller holds on to it. To check that the request
-	 * was not reallocated as we acquired the reference we have to
-	 * check that our request remains the active request across
-	 * the lookup, in the same manner as a seqlock. The visibility
-	 * of the pointer versus the reference counting is controlled
-	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
-	 *
-	 * In the middle of all that, we inspect whether the request is
-	 * complete. Retiring is lazy so the request may be completed long
-	 * before the active tracker is updated. Querying whether the
-	 * request is complete is far cheaper (as it involves no locked
-	 * instructions setting cachelines to exclusive) than acquiring
-	 * the reference, so we do it first. The RCU read lock ensures the
-	 * pointer dereference is valid, but does not ensure that the
-	 * seqno nor HWS is the right one! However, if the request was
-	 * reallocated, that means the active tracker's request was complete.
-	 * If the new request is also complete, then both are and we can
-	 * just report the active tracker is idle. If the new request is
-	 * incomplete, then we acquire a reference on it and check that
-	 * it remained the active request.
-	 *
-	 * It is then imperative that we do not zero the request on
-	 * reallocation, so that we can chase the dangling pointers!
-	 * See i915_request_alloc().
-	 */
-	do {
-		struct i915_request *request;
-
-		request = rcu_dereference(active->request);
-		if (!request || i915_request_completed(request))
-			return NULL;
-
-		/*
-		 * An especially silly compiler could decide to recompute the
-		 * result of i915_request_completed, more specifically
-		 * re-emit the load for request->fence.seqno. A race would catch
-		 * a later seqno value, which could flip the result from true to
-		 * false. Which means part of the instructions below might not
-		 * be executed, while later on instructions are executed. Due to
-		 * barriers within the refcounting the inconsistency can't reach
-		 * past the call to i915_request_get_rcu, but not executing
-		 * that while still executing i915_request_put() creates
-		 * havoc enough.  Prevent this with a compiler barrier.
-		 */
-		barrier();
-
-		request = i915_request_get_rcu(request);
-
-		/*
-		 * What stops the following rcu_access_pointer() from occurring
-		 * before the above i915_request_get_rcu()? If we were
-		 * to read the value before pausing to get the reference to
-		 * the request, we may not notice a change in the active
-		 * tracker.
-		 *
-		 * The rcu_access_pointer() is a mere compiler barrier, which
-		 * means both the CPU and compiler are free to perform the
-		 * memory read without constraint. The compiler only has to
-		 * ensure that any operations after the rcu_access_pointer()
-		 * occur afterwards in program order. This means the read may
-		 * be performed earlier by an out-of-order CPU, or adventurous
-		 * compiler.
-		 *
-		 * The atomic operation at the heart of
-		 * i915_request_get_rcu(), see dma_fence_get_rcu(), is
-		 * atomic_inc_not_zero() which is only a full memory barrier
-		 * when successful. That is, if i915_request_get_rcu()
-		 * returns the request (and so with the reference counted
-		 * incremented) then the following read for rcu_access_pointer()
-		 * must occur after the atomic operation and so confirm
-		 * that this request is the one currently being tracked.
-		 *
-		 * The corresponding write barrier is part of
-		 * rcu_assign_pointer().
-		 */
-		if (!request || request == rcu_access_pointer(active->request))
-			return rcu_pointer_handoff(request);
-
-		i915_request_put(request);
-	} while (1);
-}
-
+int __must_check
+i915_active_fence_set(struct i915_active_fence *active,
+		      struct i915_request *rq);
 /**
- * i915_active_request_get_unlocked - return a reference to the active request
+ * i915_active_fence_get - return a reference to the active fence
  * @active - the active tracker
  *
- * i915_active_request_get_unlocked() returns a reference to the active request,
+ * i915_active_fence_get() returns a reference to the active fence,
  * or NULL if the active tracker is idle. The reference is obtained under RCU,
  * so no locking is required by the caller.
  *
- * The reference should be freed with i915_request_put().
+ * The reference should be freed with dma_fence_put().
  */
-static inline struct i915_request *
-i915_active_request_get_unlocked(const struct i915_active_request *active)
+static inline struct dma_fence *
+i915_active_fence_get(struct i915_active_fence *active)
 {
-	struct i915_request *request;
+	struct dma_fence *fence;
 
 	rcu_read_lock();
-	request = __i915_active_request_get_rcu(active);
+	fence = dma_fence_get_rcu_safe(&active->fence);
 	rcu_read_unlock();
 
-	return request;
+	return fence;
 }
 
 /**
- * i915_active_request_isset - report whether the active tracker is assigned
+ * i915_active_fence_isset - report whether the active tracker is assigned
  * @active - the active tracker
  *
- * i915_active_request_isset() returns true if the active tracker is currently
- * assigned to a request. Due to the lazy retiring, that request may be idle
+ * i915_active_fence_isset() returns true if the active tracker is currently
+ * assigned to a fence. Due to the lazy retiring, that fence may be idle
  * and this may report stale information.
  */
 static inline bool
-i915_active_request_isset(const struct i915_active_request *active)
+i915_active_fence_isset(const struct i915_active_fence *active)
 {
-	return rcu_access_pointer(active->request);
-}
-
-/**
- * i915_active_request_retire - waits until the request is retired
- * @active - the active request on which to wait
- *
- * i915_active_request_retire() waits until the request is completed,
- * and then ensures that at least the retirement handler for this
- * @active tracker is called before returning. If the @active
- * tracker is idle, the function returns immediately.
- */
-static inline int __must_check
-i915_active_request_retire(struct i915_active_request *active,
-			   struct mutex *mutex)
-{
-	struct i915_request *request;
-	long ret;
-
-	request = i915_active_request_raw(active, mutex);
-	if (!request)
-		return 0;
-
-	ret = i915_request_wait(request,
-				I915_WAIT_INTERRUPTIBLE,
-				MAX_SCHEDULE_TIMEOUT);
-	if (ret < 0)
-		return ret;
-
-	list_del_init(&active->link);
-	RCU_INIT_POINTER(active->request, NULL);
-
-	active->retire(active, request);
-
-	return 0;
+	return rcu_access_pointer(active->fence);
 }
 
 /*
@@ -358,34 +149,44 @@ i915_active_request_retire(struct i915_active_request *active,
  * synchronisation.
  */
 
-void __i915_active_init(struct drm_i915_private *i915,
-			struct i915_active *ref,
+void __i915_active_init(struct i915_active *ref,
 			int (*active)(struct i915_active *ref),
 			void (*retire)(struct i915_active *ref),
-			struct lock_class_key *key);
-#define i915_active_init(i915, ref, active, retire) do {		\
-	static struct lock_class_key __key;				\
+			struct lock_class_key *mkey,
+			struct lock_class_key *wkey);
+
+/* Specialise each class of i915_active to avoid impossible lockdep cycles. */
+#define i915_active_init(ref, active, retire) do {		\
+	static struct lock_class_key __mkey;				\
+	static struct lock_class_key __wkey;				\
 									\
-	__i915_active_init(i915, ref, active, retire, &__key);		\
+	__i915_active_init(ref, active, retire, &__mkey, &__wkey);	\
 } while (0)
 
 int i915_active_ref(struct i915_active *ref,
 		    struct intel_timeline *tl,
-		    struct i915_request *rq);
+		    struct dma_fence *fence);
+
+static inline int
+i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
+{
+	return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
+}
+
+void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
+
+static inline bool i915_active_has_exclusive(struct i915_active *ref)
+{
+	return rcu_access_pointer(ref->excl.fence);
+}
 
 int i915_active_wait(struct i915_active *ref);
 
-int i915_request_await_active(struct i915_request *rq,
-			      struct i915_active *ref);
-int i915_request_await_active_request(struct i915_request *rq,
-				      struct i915_active_request *active);
+int i915_request_await_active(struct i915_request *rq, struct i915_active *ref);
 
 int i915_active_acquire(struct i915_active *ref);
+bool i915_active_acquire_if_busy(struct i915_active *ref);
 void i915_active_release(struct i915_active *ref);
-void __i915_active_release_nested(struct i915_active *ref, int subclass);
-
-bool i915_active_trygrab(struct i915_active *ref);
-void i915_active_ungrab(struct i915_active *ref);
 
 static inline bool
 i915_active_is_idle(const struct i915_active *ref)
@@ -404,4 +205,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 void i915_active_acquire_barrier(struct i915_active *ref);
 void i915_request_add_active_barriers(struct i915_request *rq);
 
+void i915_active_print(struct i915_active *ref, struct drm_printer *m);
+void i915_active_unlock_wait(struct i915_active *ref);
+
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 1854e7d168c1..6360c3e4b765 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -8,55 +8,46 @@
 #define _I915_ACTIVE_TYPES_H_
 
 #include <linux/atomic.h>
+#include <linux/dma-fence.h>
 #include <linux/llist.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/rcupdate.h>
+#include <linux/workqueue.h>
 
-struct drm_i915_private;
-struct i915_active_request;
-struct i915_request;
-
-typedef void (*i915_active_retire_fn)(struct i915_active_request *,
-				      struct i915_request *);
-
-struct i915_active_request {
-	struct i915_request __rcu *request;
-	struct list_head link;
-	i915_active_retire_fn retire;
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
-	/*
-	 * Incorporeal!
-	 *
-	 * Updates to the i915_active_request must be serialised under a lock
-	 * to ensure that the timeline is ordered. Normally, this is the
-	 * timeline->mutex, but another mutex may be used so long as it is
-	 * done so consistently.
-	 *
-	 * For lockdep tracking of the above, we store the lock we intend
-	 * to always use for updates of this i915_active_request during
-	 * construction and assert that is held on every update.
-	 */
-	struct mutex *lock;
-#endif
+#include "i915_utils.h"
+
+struct i915_active_fence {
+	struct dma_fence __rcu *fence;
+	struct dma_fence_cb cb;
 };
 
 struct active_node;
 
+#define I915_ACTIVE_MAY_SLEEP BIT(0)
+
+#define __i915_active_call __aligned(4)
+#define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2)
+
 struct i915_active {
-	struct drm_i915_private *i915;
+	atomic_t count;
+	struct mutex mutex;
 
+	spinlock_t tree_lock;
 	struct active_node *cache;
 	struct rb_root tree;
-	struct mutex mutex;
-	atomic_t count;
+
+	/* Preallocated "exclusive" node */
+	struct i915_active_fence excl;
 
 	unsigned long flags;
-#define I915_ACTIVE_GRAB_BIT 0
+#define I915_ACTIVE_RETIRE_SLEEPS BIT(0)
 
 	int (*active)(struct i915_active *ref);
 	void (*retire)(struct i915_active *ref);
 
+	struct work_struct work;
+
 	struct llist_head preallocated_barriers;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c
index fe1871d7c126..66883af64ca1 100644
--- a/drivers/gpu/drm/i915/i915_buddy.c
+++ b/drivers/gpu/drm/i915/i915_buddy.c
@@ -38,6 +38,7 @@ int __init i915_global_buddy_init(void)
 	if (!global.slab_blocks)
 		return -ENOMEM;
 
+	i915_global_register(&global.base);
 	return 0;
 }
 
@@ -261,8 +262,10 @@ void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects)
 {
 	struct i915_buddy_block *block, *on;
 
-	list_for_each_entry_safe(block, on, objects, link)
+	list_for_each_entry_safe(block, on, objects, link) {
 		i915_buddy_free(mm, block);
+		cond_resched();
+	}
 	INIT_LIST_HEAD(objects);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index f24096e27bef..a0e437aa65b7 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -235,7 +235,7 @@ static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
 	/*
 	 * MI_BATCH_BUFFER_START requires some special handling. It's not
 	 * really a 'skip' action but it doesn't seem like it's worth adding
-	 * a new action. See i915_parse_cmds().
+	 * a new action. See intel_engine_cmd_parser().
 	 */
 	CMD(  MI_BATCH_BUFFER_START,            SMI,   !F,  0xFF,   S  ),
 };
@@ -731,7 +731,7 @@ static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
 			return 0xFF;
 	}
 
-	DRM_DEBUG_DRIVER("CMD: Abnormal rcs cmd length! 0x%08X\n", cmd_header);
+	DRM_DEBUG("CMD: Abnormal rcs cmd length! 0x%08X\n", cmd_header);
 	return 0;
 }
 
@@ -754,7 +754,7 @@ static u32 gen7_bsd_get_cmd_length_mask(u32 cmd_header)
 			return 0xFF;
 	}
 
-	DRM_DEBUG_DRIVER("CMD: Abnormal bsd cmd length! 0x%08X\n", cmd_header);
+	DRM_DEBUG("CMD: Abnormal bsd cmd length! 0x%08X\n", cmd_header);
 	return 0;
 }
 
@@ -767,7 +767,7 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
 	else if (client == INSTR_BC_CLIENT)
 		return 0xFF;
 
-	DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
+	DRM_DEBUG("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
 	return 0;
 }
 
@@ -778,7 +778,7 @@ static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header)
 	if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT)
 		return 0xFF;
 
-	DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
+	DRM_DEBUG("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
 	return 0;
 }
 
@@ -1127,79 +1127,71 @@ find_reg(const struct intel_engine_cs *engine, u32 addr)
 /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
 static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		       struct drm_i915_gem_object *src_obj,
-		       u32 batch_start_offset,
-		       u32 batch_len,
-		       bool *needs_clflush_after)
+		       u32 offset, u32 length)
 {
-	unsigned int src_needs_clflush;
-	unsigned int dst_needs_clflush;
+	bool needs_clflush;
 	void *dst, *src;
 	int ret;
 
-	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
-	if (ret)
-		return ERR_PTR(ret);
-
 	dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB);
-	i915_gem_object_finish_access(dst_obj);
 	if (IS_ERR(dst))
 		return dst;
 
-	ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+	ret = i915_gem_object_pin_pages(src_obj);
 	if (ret) {
 		i915_gem_object_unpin_map(dst_obj);
 		return ERR_PTR(ret);
 	}
 
+	needs_clflush =
+		!(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
+
 	src = ERR_PTR(-ENODEV);
-	if (src_needs_clflush &&
-	    i915_can_memcpy_from_wc(NULL, batch_start_offset, 0)) {
+	if (needs_clflush && i915_has_memcpy_from_wc()) {
 		src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
 		if (!IS_ERR(src)) {
-			i915_memcpy_from_wc(dst,
-					    src + batch_start_offset,
-					    ALIGN(batch_len, 16));
+			i915_unaligned_memcpy_from_wc(dst,
+						      src + offset,
+						      length);
 			i915_gem_object_unpin_map(src_obj);
 		}
 	}
 	if (IS_ERR(src)) {
 		void *ptr;
-		int offset, n;
+		int x, n;
 
-		offset = offset_in_page(batch_start_offset);
-
-		/* We can avoid clflushing partial cachelines before the write
+		/*
+		 * We can avoid clflushing partial cachelines before the write
 		 * if we only every write full cache-lines. Since we know that
 		 * both the source and destination are in multiples of
 		 * PAGE_SIZE, we can simply round up to the next cacheline.
 		 * We don't care about copying too much here as we only
 		 * validate up to the end of the batch.
 		 */
-		if (dst_needs_clflush & CLFLUSH_BEFORE)
-			batch_len = roundup(batch_len,
-					    boot_cpu_data.x86_clflush_size);
+		if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+			length = round_up(length,
+					  boot_cpu_data.x86_clflush_size);
 
 		ptr = dst;
-		for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
-			int len = min_t(int, batch_len, PAGE_SIZE - offset);
+		x = offset_in_page(offset);
+		for (n = offset >> PAGE_SHIFT; length; n++) {
+			int len = min_t(int, length, PAGE_SIZE - x);
 
 			src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
-			if (src_needs_clflush)
-				drm_clflush_virt_range(src + offset, len);
-			memcpy(ptr, src + offset, len);
+			if (needs_clflush)
+				drm_clflush_virt_range(src + x, len);
+			memcpy(ptr, src + x, len);
 			kunmap_atomic(src);
 
 			ptr += len;
-			batch_len -= len;
-			offset = 0;
+			length -= len;
+			x = 0;
 		}
 	}
 
-	i915_gem_object_finish_access(src_obj);
+	i915_gem_object_unpin_pages(src_obj);
 
 	/* dst_obj is returned with vmap pinned */
-	*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
-
 	return dst;
 }
 
@@ -1211,7 +1203,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 		return true;
 
 	if (desc->flags & CMD_DESC_REJECT) {
-		DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
+		DRM_DEBUG("CMD: Rejected command: 0x%08X\n", *cmd);
 		return false;
 	}
 
@@ -1231,8 +1223,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 				find_reg(engine, reg_addr);
 
 			if (!reg) {
-				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
-						 reg_addr, *cmd, engine->name);
+				DRM_DEBUG("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
+					  reg_addr, *cmd, engine->name);
 				return false;
 			}
 
@@ -1242,22 +1234,22 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 			 */
 			if (reg->mask) {
 				if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
-					DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n",
-							 reg_addr);
+					DRM_DEBUG("CMD: Rejected LRM to masked register 0x%08X\n",
+						  reg_addr);
 					return false;
 				}
 
 				if (desc->cmd.value == MI_LOAD_REGISTER_REG) {
-					DRM_DEBUG_DRIVER("CMD: Rejected LRR to masked register 0x%08X\n",
-							 reg_addr);
+					DRM_DEBUG("CMD: Rejected LRR to masked register 0x%08X\n",
+						  reg_addr);
 					return false;
 				}
 
 				if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
 				    (offset + 2 > length ||
 				     (cmd[offset + 1] & reg->mask) != reg->value)) {
-					DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
-							 reg_addr);
+					DRM_DEBUG("CMD: Rejected LRI to masked register 0x%08X\n",
+						  reg_addr);
 					return false;
 				}
 			}
@@ -1284,8 +1276,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 			}
 
 			if (desc->bits[i].offset >= length) {
-				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X, too short to check bitmask (%s)\n",
-						 *cmd, engine->name);
+				DRM_DEBUG("CMD: Rejected command 0x%08X, too short to check bitmask (%s)\n",
+					  *cmd, engine->name);
 				return false;
 			}
 
@@ -1293,11 +1285,11 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 				desc->bits[i].mask;
 
 			if (dword != desc->bits[i].expected) {
-				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n",
-						 *cmd,
-						 desc->bits[i].mask,
-						 desc->bits[i].expected,
-						 dword, engine->name);
+				DRM_DEBUG("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n",
+					  *cmd,
+					  desc->bits[i].mask,
+					  desc->bits[i].expected,
+					  dword, engine->name);
 				return false;
 			}
 		}
@@ -1306,17 +1298,17 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 	return true;
 }
 
-static int check_bbstart(const struct i915_gem_context *ctx,
-			 u32 *cmd, u32 offset, u32 length,
-			 u32 batch_len,
-			 u64 batch_start,
-			 u64 shadow_batch_start)
+static int check_bbstart(u32 *cmd, u32 offset, u32 length,
+			 u32 batch_length,
+			 u64 batch_addr,
+			 u64 shadow_addr,
+			 const unsigned long *jump_whitelist)
 {
 	u64 jump_offset, jump_target;
 	u32 target_cmd_offset, target_cmd_index;
 
 	/* For igt compatibility on older platforms */
-	if (CMDPARSER_USES_GGTT(ctx->i915)) {
+	if (!jump_whitelist) {
 		DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n");
 		return -EACCES;
 	}
@@ -1327,14 +1319,14 @@ static int check_bbstart(const struct i915_gem_context *ctx,
 		return -EINVAL;
 	}
 
-	jump_target = *(u64*)(cmd+1);
-	jump_offset = jump_target - batch_start;
+	jump_target = *(u64 *)(cmd + 1);
+	jump_offset = jump_target - batch_addr;
 
 	/*
 	 * Any underflow of jump_target is guaranteed to be outside the range
 	 * of a u32, so >= test catches both too large and too small
 	 */
-	if (jump_offset >= batch_len) {
+	if (jump_offset >= batch_length) {
 		DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n",
 			  jump_target);
 		return -EINVAL;
@@ -1342,20 +1334,20 @@ static int check_bbstart(const struct i915_gem_context *ctx,
 
 	/*
 	 * This cannot overflow a u32 because we already checked jump_offset
-	 * is within the BB, and the batch_len is a u32
+	 * is within the BB, and the batch_length is a u32
 	 */
 	target_cmd_offset = lower_32_bits(jump_offset);
 	target_cmd_index = target_cmd_offset / sizeof(u32);
 
-	*(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset;
+	*(u64 *)(cmd + 1) = shadow_addr + target_cmd_offset;
 
 	if (target_cmd_index == offset)
 		return 0;
 
-	if (ctx->jump_whitelist_cmds <= target_cmd_index) {
-		DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n");
-		return -EINVAL;
-	} else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) {
+	if (IS_ERR(jump_whitelist))
+		return PTR_ERR(jump_whitelist);
+
+	if (!test_bit(target_cmd_index, jump_whitelist)) {
 		DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
 			  jump_target);
 		return -EINVAL;
@@ -1364,54 +1356,40 @@ static int check_bbstart(const struct i915_gem_context *ctx,
 	return 0;
 }
 
-static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len)
+static unsigned long *alloc_whitelist(u32 batch_length)
 {
-	const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32));
-	const u32 exact_size = BITS_TO_LONGS(batch_cmds);
-	u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds));
-	unsigned long *next_whitelist;
+	unsigned long *jmp;
 
-	if (CMDPARSER_USES_GGTT(ctx->i915))
-		return;
-
-	if (batch_cmds <= ctx->jump_whitelist_cmds) {
-		bitmap_zero(ctx->jump_whitelist, batch_cmds);
-		return;
-	}
-
-again:
-	next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL);
-	if (next_whitelist) {
-		kfree(ctx->jump_whitelist);
-		ctx->jump_whitelist = next_whitelist;
-		ctx->jump_whitelist_cmds =
-			next_size * BITS_PER_BYTE * sizeof(long);
-		return;
-	}
-
-	if (next_size > exact_size) {
-		next_size = exact_size;
-		goto again;
-	}
+	/*
+	 * We expect batch_length to be less than 256KiB for known users,
+	 * i.e. we need at most an 8KiB bitmap allocation which should be
+	 * reasonably cheap due to kmalloc caches.
+	 */
 
-	DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
-	bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds);
+	/* Prefer to report transient allocation failure rather than hit oom */
+	jmp = bitmap_zalloc(DIV_ROUND_UP(batch_length, sizeof(u32)),
+			    GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+	if (!jmp)
+		return ERR_PTR(-ENOMEM);
 
-	return;
+	return jmp;
 }
 
 #define LENGTH_BIAS 2
 
+static bool shadow_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
+}
+
 /**
- * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
- * @ctx: the context in which the batch is to execute
+ * intel_engine_cmd_parser() - parse a batch buffer for privilege violations
  * @engine: the engine on which the batch is to execute
- * @batch_obj: the batch buffer in question
- * @batch_start: Canonical base address of batch
- * @batch_start_offset: byte offset in the batch at which execution starts
- * @batch_len: length of the commands in batch_obj
- * @shadow_batch_obj: copy of the batch buffer in question
- * @shadow_batch_start: Canonical base address of shadow_batch_obj
+ * @batch: the batch buffer in question
+ * @batch_offset: byte offset in the batch at which execution starts
+ * @batch_length: length of the commands in batch_obj
+ * @shadow: validated copy of the batch buffer in question
+ * @trampoline: whether to emit a conditional trampoline at the end of the batch
  *
  * Parses the specified batch buffer looking for privilege violations as
  * described in the overview.
@@ -1419,38 +1397,46 @@ again:
  * Return: non-zero if the parser finds violations or otherwise fails; -EACCES
  * if the batch appears legal but should use hardware parsing
  */
-
-int intel_engine_cmd_parser(struct i915_gem_context *ctx,
-			    struct intel_engine_cs *engine,
-			    struct drm_i915_gem_object *batch_obj,
-			    u64 batch_start,
-			    u32 batch_start_offset,
-			    u32 batch_len,
-			    struct drm_i915_gem_object *shadow_batch_obj,
-			    u64 shadow_batch_start)
+int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+			    struct i915_vma *batch,
+			    u32 batch_offset,
+			    u32 batch_length,
+			    struct i915_vma *shadow,
+			    bool trampoline)
 {
 	u32 *cmd, *batch_end, offset = 0;
 	struct drm_i915_cmd_descriptor default_desc = noop_desc;
 	const struct drm_i915_cmd_descriptor *desc = &default_desc;
-	bool needs_clflush_after = false;
+	unsigned long *jump_whitelist;
+	u64 batch_addr, shadow_addr;
 	int ret = 0;
 
-	cmd = copy_batch(shadow_batch_obj, batch_obj,
-			 batch_start_offset, batch_len,
-			 &needs_clflush_after);
+	GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd)));
+	GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd)));
+	GEM_BUG_ON(range_overflows_t(u64, batch_offset, batch_length,
+				     batch->size));
+	GEM_BUG_ON(!batch_length);
+
+	cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length);
 	if (IS_ERR(cmd)) {
-		DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
+		DRM_DEBUG("CMD: Failed to copy batch\n");
 		return PTR_ERR(cmd);
 	}
 
-	init_whitelist(ctx, batch_len);
+	jump_whitelist = NULL;
+	if (!trampoline)
+		/* Defer failure until attempted use */
+		jump_whitelist = alloc_whitelist(batch_length);
+
+	shadow_addr = gen8_canonical_addr(shadow->node.start);
+	batch_addr = gen8_canonical_addr(batch->node.start + batch_offset);
 
 	/*
 	 * We use the batch length as size because the shadow object is as
 	 * large or larger and copy_batch() will write MI_NOPs to the extra
 	 * space. Parsing should be faster in some cases this way.
 	 */
-	batch_end = cmd + (batch_len / sizeof(*batch_end));
+	batch_end = cmd + batch_length / sizeof(*batch_end);
 	do {
 		u32 length;
 
@@ -1459,61 +1445,99 @@ int intel_engine_cmd_parser(struct i915_gem_context *ctx,
 
 		desc = find_cmd(engine, *cmd, desc, &default_desc);
 		if (!desc) {
-			DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
-					 *cmd);
+			DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd);
 			ret = -EINVAL;
-			goto err;
+			break;
 		}
 
 		if (desc->flags & CMD_DESC_FIXED)
 			length = desc->length.fixed;
 		else
-			length = ((*cmd & desc->length.mask) + LENGTH_BIAS);
+			length = (*cmd & desc->length.mask) + LENGTH_BIAS;
 
 		if ((batch_end - cmd) < length) {
-			DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
-					 *cmd,
-					 length,
-					 batch_end - cmd);
+			DRM_DEBUG("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
+				  *cmd,
+				  length,
+				  batch_end - cmd);
 			ret = -EINVAL;
-			goto err;
+			break;
 		}
 
 		if (!check_cmd(engine, desc, cmd, length)) {
 			ret = -EACCES;
-			goto err;
+			break;
 		}
 
 		if (desc->cmd.value == MI_BATCH_BUFFER_START) {
-			ret = check_bbstart(ctx, cmd, offset, length,
-					    batch_len, batch_start,
-					    shadow_batch_start);
-
-			if (ret)
-				goto err;
+			ret = check_bbstart(cmd, offset, length, batch_length,
+					    batch_addr, shadow_addr,
+					    jump_whitelist);
 			break;
 		}
 
-		if (ctx->jump_whitelist_cmds > offset)
-			set_bit(offset, ctx->jump_whitelist);
+		if (!IS_ERR_OR_NULL(jump_whitelist))
+			__set_bit(offset, jump_whitelist);
 
 		cmd += length;
 		offset += length;
 		if  (cmd >= batch_end) {
-			DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
+			DRM_DEBUG("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
 			ret = -EINVAL;
-			goto err;
+			break;
 		}
 	} while (1);
 
-	if (needs_clflush_after) {
-		void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
+	if (trampoline) {
+		/*
+		 * With the trampoline, the shadow is executed twice.
+		 *
+		 *   1 - starting at offset 0, in privileged mode
+		 *   2 - starting at offset batch_len, as non-privileged
+		 *
+		 * Only if the batch is valid and safe to execute, do we
+		 * allow the first privileged execution to proceed. If not,
+		 * we terminate the first batch and use the second batchbuffer
+		 * entry to chain to the original unsafe non-privileged batch,
+		 * leaving it to the HW to validate.
+		 */
+		*batch_end = MI_BATCH_BUFFER_END;
+
+		if (ret) {
+			/* Batch unsafe to execute with privileges, cancel! */
+			cmd = page_mask_bits(shadow->obj->mm.mapping);
+			*cmd = MI_BATCH_BUFFER_END;
+
+			/* If batch is unsafe but valid, jump to the original */
+			if (ret == -EACCES) {
+				unsigned int flags;
+
+				flags = MI_BATCH_NON_SECURE_I965;
+				if (IS_HASWELL(engine->i915))
+					flags = MI_BATCH_NON_SECURE_HSW;
+
+				GEM_BUG_ON(!IS_GEN_RANGE(engine->i915, 6, 7));
+				__gen6_emit_bb_start(batch_end,
+						     batch_addr,
+						     flags);
+
+				ret = 0; /* allow execution */
+			}
+		}
+
+		if (shadow_needs_clflush(shadow->obj))
+			drm_clflush_virt_range(batch_end, 8);
+	}
+
+	if (shadow_needs_clflush(shadow->obj)) {
+		void *ptr = page_mask_bits(shadow->obj->mm.mapping);
 
 		drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
 	}
 
-err:
-	i915_gem_object_unpin_map(shadow_batch_obj);
+	if (!IS_ERR_OR_NULL(jump_whitelist))
+		kfree(jump_whitelist);
+	i915_gem_object_unpin_map(shadow->obj);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b0f51591f2e4..d5a9b8a964c2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -41,7 +41,10 @@
 
 #include "gem/i915_gem_context.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_gt_requests.h"
 #include "gt/intel_reset.h"
+#include "gt/intel_rc6.h"
+#include "gt/intel_rps.h"
 #include "gt/uc/intel_guc_submission.h"
 
 #include "i915_debugfs.h"
@@ -58,17 +61,14 @@ static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
 
 static int i915_capabilities(struct seq_file *m, void *data)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	const struct intel_device_info *info = INTEL_INFO(dev_priv);
+	struct drm_i915_private *i915 = node_to_i915(m->private);
 	struct drm_printer p = drm_seq_file_printer(m);
 
-	seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv));
-	seq_printf(m, "platform: %s\n", intel_platform_name(info->platform));
-	seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv));
+	seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(i915));
 
-	intel_device_info_dump_flags(info, &p);
-	intel_device_info_dump_runtime(RUNTIME_INFO(dev_priv), &p);
-	intel_driver_caps_print(&dev_priv->caps, &p);
+	intel_device_info_print_static(INTEL_INFO(i915), &p);
+	intel_device_info_print_runtime(RUNTIME_INFO(i915), &p);
+	intel_driver_caps_print(&i915->caps, &p);
 
 	kernel_param_lock(THIS_MODULE);
 	i915_params_dump(&i915_modparams, &p);
@@ -77,11 +77,6 @@ static int i915_capabilities(struct seq_file *m, void *data)
 	return 0;
 }
 
-static char get_pin_flag(struct drm_i915_gem_object *obj)
-{
-	return obj->pin_global ? 'p' : ' ';
-}
-
 static char get_tiling_flag(struct drm_i915_gem_object *obj)
 {
 	switch (i915_gem_object_get_tiling(obj)) {
@@ -140,9 +135,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	struct i915_vma *vma;
 	int pin_count = 0;
 
-	seq_printf(m, "%pK: %c%c%c%c %8zdKiB %02x %02x %s%s%s",
+	seq_printf(m, "%pK: %c%c%c %8zdKiB %02x %02x %s%s%s",
 		   &obj->base,
-		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
 		   get_global_flag(obj),
 		   get_pin_mapped_flag(obj),
@@ -221,8 +215,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	seq_printf(m, " (pinned x %d)", pin_count);
 	if (obj->stolen)
 		seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
-	if (obj->pin_global)
-		seq_printf(m, " (global)");
+	if (i915_gem_object_is_framebuffer(obj))
+		seq_printf(m, " (fb)");
 
 	engine = i915_gem_object_last_write_engine(obj);
 	if (engine)
@@ -243,6 +237,9 @@ static int per_file_stats(int id, void *ptr, void *data)
 	struct file_stats *stats = data;
 	struct i915_vma *vma;
 
+	if (!kref_get_unless_zero(&obj->base.refcount))
+		return 0;
+
 	stats->count++;
 	stats->total += obj->base.size;
 	if (!atomic_read(&obj->bind_count))
@@ -290,6 +287,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 	}
 	spin_unlock(&obj->vma.lock);
 
+	i915_gem_object_put(obj);
 	return 0;
 }
 
@@ -309,34 +307,43 @@ static void print_context_stats(struct seq_file *m,
 				struct drm_i915_private *i915)
 {
 	struct file_stats kstats = {};
-	struct i915_gem_context *ctx;
+	struct i915_gem_context *ctx, *cn;
 
-	list_for_each_entry(ctx, &i915->contexts.list, link) {
+	spin_lock(&i915->gem.contexts.lock);
+	list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) {
 		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
+		if (!kref_get_unless_zero(&ctx->ref))
+			continue;
+
+		spin_unlock(&i915->gem.contexts.lock);
+
 		for_each_gem_engine(ce,
 				    i915_gem_context_lock_engines(ctx), it) {
-			intel_context_lock_pinned(ce);
-			if (intel_context_is_pinned(ce)) {
+			if (intel_context_pin_if_active(ce)) {
+				rcu_read_lock();
 				if (ce->state)
 					per_file_stats(0,
 						       ce->state->obj, &kstats);
 				per_file_stats(0, ce->ring->vma->obj, &kstats);
+				rcu_read_unlock();
+				intel_context_unpin(ce);
 			}
-			intel_context_unlock_pinned(ce);
 		}
 		i915_gem_context_unlock_engines(ctx);
 
 		if (!IS_ERR_OR_NULL(ctx->file_priv)) {
-			struct file_stats stats = { .vm = ctx->vm, };
+			struct file_stats stats = {
+				.vm = rcu_access_pointer(ctx->vm),
+			};
 			struct drm_file *file = ctx->file_priv->file;
 			struct task_struct *task;
 			char name[80];
 
-			spin_lock(&file->table_lock);
+			rcu_read_lock();
 			idr_for_each(&file->object_idr, per_file_stats, &stats);
-			spin_unlock(&file->table_lock);
+			rcu_read_unlock();
 
 			rcu_read_lock();
 			task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID);
@@ -346,7 +353,12 @@ static void print_context_stats(struct seq_file *m,
 
 			print_file_stats(m, name, stats);
 		}
+
+		spin_lock(&i915->gem.contexts.lock);
+		list_safe_reset_next(ctx, cn, link);
+		i915_gem_context_put(ctx);
 	}
+	spin_unlock(&i915->gem.contexts.lock);
 
 	print_file_stats(m, "[k]contexts", kstats);
 }
@@ -354,21 +366,19 @@ static void print_context_stats(struct seq_file *m,
 static int i915_gem_object_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
-	int ret;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
 
 	seq_printf(m, "%u shrinkable [%u free] objects, %llu bytes\n",
 		   i915->mm.shrink_count,
 		   atomic_read(&i915->mm.free_count),
 		   i915->mm.shrink_memory);
-
+	for_each_memory_region(mr, i915, id)
+		seq_printf(m, "%s: total:%pa, available:%pa bytes\n",
+			   mr->name, &mr->total, &mr->avail);
 	seq_putc(m, '\n');
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	print_context_stats(m, i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return 0;
 }
@@ -376,7 +386,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 static void gen8_display_interrupt_info(struct seq_file *m)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	int pipe;
+	enum pipe pipe;
 
 	for_each_pipe(dev_priv, pipe) {
 		enum intel_display_power_domain power_domain;
@@ -527,6 +537,8 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 
 		gen8_display_interrupt_info(m);
 	} else if (IS_VALLEYVIEW(dev_priv)) {
+		intel_wakeref_t pref;
+
 		seq_printf(m, "Display IER:\t%08x\n",
 			   I915_READ(VLV_IER));
 		seq_printf(m, "Display IIR:\t%08x\n",
@@ -537,7 +549,6 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 			   I915_READ(VLV_IMR));
 		for_each_pipe(dev_priv, pipe) {
 			enum intel_display_power_domain power_domain;
-			intel_wakeref_t pref;
 
 			power_domain = POWER_DOMAIN_PIPE(pipe);
 			pref = intel_display_power_get_if_enabled(dev_priv,
@@ -571,12 +582,14 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 		seq_printf(m, "PM IMR:\t\t%08x\n",
 			   I915_READ(GEN6_PMIMR));
 
+		pref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 		seq_printf(m, "Port hotplug:\t%08x\n",
 			   I915_READ(PORT_HOTPLUG_EN));
 		seq_printf(m, "DPFLIPSTAT:\t%08x\n",
 			   I915_READ(VLV_DPFLIPSTAT));
 		seq_printf(m, "DPINVGTT:\t%08x\n",
 			   I915_READ(DPINVGTT));
+		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, pref);
 
 	} else if (!HAS_PCH_SPLIT(dev_priv)) {
 		seq_printf(m, "Interrupt enable:    %08x\n",
@@ -672,7 +685,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
 			      size_t count, loff_t *pos)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 	ssize_t ret;
 	void *buf;
 
@@ -685,7 +698,7 @@ static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
 	if (!buf)
 		return -ENOMEM;
 
-	ret = i915_gpu_state_copy_to_buffer(error, buf, *pos, count);
+	ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count);
 	if (ret <= 0)
 		goto out;
 
@@ -701,19 +714,19 @@ out:
 
 static int gpu_state_release(struct inode *inode, struct file *file)
 {
-	i915_gpu_state_put(file->private_data);
+	i915_gpu_coredump_put(file->private_data);
 	return 0;
 }
 
 static int i915_gpu_info_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
-	struct i915_gpu_state *gpu;
+	struct i915_gpu_coredump *gpu;
 	intel_wakeref_t wakeref;
 
 	gpu = NULL;
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		gpu = i915_capture_gpu_state(i915);
+		gpu = i915_gpu_coredump(i915);
 	if (IS_ERR(gpu))
 		return PTR_ERR(gpu);
 
@@ -735,7 +748,7 @@ i915_error_state_write(struct file *filp,
 		       size_t cnt,
 		       loff_t *ppos)
 {
-	struct i915_gpu_state *error = filp->private_data;
+	struct i915_gpu_coredump *error = filp->private_data;
 
 	if (!error)
 		return 0;
@@ -748,7 +761,7 @@ i915_error_state_write(struct file *filp,
 
 static int i915_error_state_open(struct inode *inode, struct file *file)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 
 	error = i915_first_error_state(inode->i_private);
 	if (IS_ERR(error))
@@ -772,7 +785,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_uncore *uncore = &dev_priv->uncore;
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	struct intel_rps *rps = &dev_priv->gt.rps;
 	intel_wakeref_t wakeref;
 	int ret = 0;
 
@@ -808,23 +821,23 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 		seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq);
 
 		seq_printf(m, "actual GPU freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff));
+			   intel_gpu_freq(rps, (freq_sts >> 8) & 0xff));
 
 		seq_printf(m, "current GPU freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->cur_freq));
+			   intel_gpu_freq(rps, rps->cur_freq));
 
 		seq_printf(m, "max GPU freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->max_freq));
+			   intel_gpu_freq(rps, rps->max_freq));
 
 		seq_printf(m, "min GPU freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->min_freq));
+			   intel_gpu_freq(rps, rps->min_freq));
 
 		seq_printf(m, "idle GPU freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->idle_freq));
+			   intel_gpu_freq(rps, rps->idle_freq));
 
 		seq_printf(m,
 			   "efficient (RPe) frequency: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->efficient_freq));
+			   intel_gpu_freq(rps, rps->efficient_freq));
 	} else if (INTEL_GEN(dev_priv) >= 6) {
 		u32 rp_state_limits;
 		u32 gt_perf_status;
@@ -858,7 +871,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 			else
 				reqf >>= 25;
 		}
-		reqf = intel_gpu_freq(dev_priv, reqf);
+		reqf = intel_gpu_freq(rps, reqf);
 
 		rpmodectl = I915_READ(GEN6_RP_CONTROL);
 		rpinclimit = I915_READ(GEN6_RP_UP_THRESHOLD);
@@ -871,8 +884,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 		rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
 		rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
 		rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
-		cagf = intel_gpu_freq(dev_priv,
-				      intel_get_cagf(dev_priv, rpstat));
+		cagf = intel_rps_read_actual_frequency(rps);
 
 		intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 
@@ -949,37 +961,37 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 		max_freq *= (IS_GEN9_BC(dev_priv) ||
 			     INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
-			   intel_gpu_freq(dev_priv, max_freq));
+			   intel_gpu_freq(rps, max_freq));
 
 		max_freq = (rp_state_cap & 0xff00) >> 8;
 		max_freq *= (IS_GEN9_BC(dev_priv) ||
 			     INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
-			   intel_gpu_freq(dev_priv, max_freq));
+			   intel_gpu_freq(rps, max_freq));
 
 		max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 :
 			    rp_state_cap >> 0) & 0xff;
 		max_freq *= (IS_GEN9_BC(dev_priv) ||
 			     INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
-			   intel_gpu_freq(dev_priv, max_freq));
+			   intel_gpu_freq(rps, max_freq));
 		seq_printf(m, "Max overclocked frequency: %dMHz\n",
-			   intel_gpu_freq(dev_priv, rps->max_freq));
+			   intel_gpu_freq(rps, rps->max_freq));
 
 		seq_printf(m, "Current freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->cur_freq));
+			   intel_gpu_freq(rps, rps->cur_freq));
 		seq_printf(m, "Actual freq: %d MHz\n", cagf);
 		seq_printf(m, "Idle freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->idle_freq));
+			   intel_gpu_freq(rps, rps->idle_freq));
 		seq_printf(m, "Min freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->min_freq));
+			   intel_gpu_freq(rps, rps->min_freq));
 		seq_printf(m, "Boost freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->boost_freq));
+			   intel_gpu_freq(rps, rps->boost_freq));
 		seq_printf(m, "Max freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->max_freq));
+			   intel_gpu_freq(rps, rps->max_freq));
 		seq_printf(m,
 			   "efficient (RPe) frequency: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->efficient_freq));
+			   intel_gpu_freq(rps, rps->efficient_freq));
 	} else {
 		seq_puts(m, "no P-state info available\n");
 	}
@@ -992,92 +1004,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 	return ret;
 }
 
-static void i915_instdone_info(struct drm_i915_private *dev_priv,
-			       struct seq_file *m,
-			       struct intel_instdone *instdone)
-{
-	int slice;
-	int subslice;
-
-	seq_printf(m, "\t\tINSTDONE: 0x%08x\n",
-		   instdone->instdone);
-
-	if (INTEL_GEN(dev_priv) <= 3)
-		return;
-
-	seq_printf(m, "\t\tSC_INSTDONE: 0x%08x\n",
-		   instdone->slice_common);
-
-	if (INTEL_GEN(dev_priv) <= 6)
-		return;
-
-	for_each_instdone_slice_subslice(dev_priv, slice, subslice)
-		seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
-			   slice, subslice, instdone->sampler[slice][subslice]);
-
-	for_each_instdone_slice_subslice(dev_priv, slice, subslice)
-		seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n",
-			   slice, subslice, instdone->row[slice][subslice]);
-}
-
-static int i915_hangcheck_info(struct seq_file *m, void *unused)
-{
-	struct drm_i915_private *i915 = node_to_i915(m->private);
-	struct intel_gt *gt = &i915->gt;
-	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
-	enum intel_engine_id id;
-
-	seq_printf(m, "Reset flags: %lx\n", gt->reset.flags);
-	if (test_bit(I915_WEDGED, &gt->reset.flags))
-		seq_puts(m, "\tWedged\n");
-	if (test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
-		seq_puts(m, "\tDevice (global) reset in progress\n");
-
-	if (!i915_modparams.enable_hangcheck) {
-		seq_puts(m, "Hangcheck disabled\n");
-		return 0;
-	}
-
-	if (timer_pending(&gt->hangcheck.work.timer))
-		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
-			   jiffies_to_msecs(gt->hangcheck.work.timer.expires -
-					    jiffies));
-	else if (delayed_work_pending(&gt->hangcheck.work))
-		seq_puts(m, "Hangcheck active, work pending\n");
-	else
-		seq_puts(m, "Hangcheck inactive\n");
-
-	seq_printf(m, "GT active? %s\n", yesno(gt->awake));
-
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		for_each_engine(engine, i915, id) {
-			struct intel_instdone instdone;
-
-			seq_printf(m, "%s: %d ms ago\n",
-				   engine->name,
-				   jiffies_to_msecs(jiffies -
-						    engine->hangcheck.action_timestamp));
-
-			seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
-				   (long long)engine->hangcheck.acthd,
-				   intel_engine_get_active_head(engine));
-
-			intel_engine_get_instdone(engine, &instdone);
-
-			seq_puts(m, "\tinstdone read =\n");
-			i915_instdone_info(i915, m, &instdone);
-
-			seq_puts(m, "\tinstdone accu =\n");
-			i915_instdone_info(i915, m,
-					   &engine->hangcheck.instdone);
-		}
-	}
-
-	return 0;
-}
-
-static int ironlake_drpc_info(struct seq_file *m)
+static int ilk_drpc_info(struct seq_file *m)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
 	struct intel_uncore *uncore = &i915->uncore;
@@ -1157,11 +1084,13 @@ static void print_rc6_res(struct seq_file *m,
 			  const char *title,
 			  const i915_reg_t reg)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+	intel_wakeref_t wakeref;
 
-	seq_printf(m, "%s %u (%llu us)\n",
-		   title, I915_READ(reg),
-		   intel_rc6_residency_us(dev_priv, reg));
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		seq_printf(m, "%s %u (%llu us)\n", title,
+			   intel_uncore_read(&i915->uncore, reg),
+			   intel_rc6_residency_us(&i915->gt.rc6, reg));
 }
 
 static int vlv_drpc_info(struct seq_file *m)
@@ -1283,7 +1212,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
 		else if (INTEL_GEN(dev_priv) >= 6)
 			err = gen6_drpc_info(m);
 		else
-			err = ironlake_drpc_info(m);
+			err = ilk_drpc_info(m);
 	}
 
 	return err;
@@ -1439,7 +1368,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 static int i915_ring_freq_table(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	struct intel_rps *rps = &dev_priv->gt.rps;
 	unsigned int max_gpu_freq, min_gpu_freq;
 	intel_wakeref_t wakeref;
 	int gpu_freq, ia_freq;
@@ -1464,10 +1393,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
 				       &ia_freq, NULL);
 		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
-			   intel_gpu_freq(dev_priv, (gpu_freq *
-						     (IS_GEN9_BC(dev_priv) ||
-						      INTEL_GEN(dev_priv) >= 10 ?
-						      GEN9_FREQ_SCALER : 1))),
+			   intel_gpu_freq(rps,
+					  (gpu_freq *
+					   (IS_GEN9_BC(dev_priv) ||
+					    INTEL_GEN(dev_priv) >= 10 ?
+					    GEN9_FREQ_SCALER : 1))),
 			   ((ia_freq >> 0) & 0xff) * 100,
 			   ((ia_freq >> 8) & 0xff) * 100);
 	}
@@ -1478,21 +1408,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 
 static int i915_opregion(struct seq_file *m, void *unused)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct intel_opregion *opregion = &dev_priv->opregion;
-	int ret;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		goto out;
+	struct intel_opregion *opregion = &node_to_i915(m->private)->opregion;
 
 	if (opregion->header)
 		seq_write(m, opregion->header, OPREGION_SIZE);
 
-	mutex_unlock(&dev->struct_mutex);
-
-out:
 	return 0;
 }
 
@@ -1512,11 +1432,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 	struct drm_device *dev = &dev_priv->drm;
 	struct intel_framebuffer *fbdev_fb = NULL;
 	struct drm_framebuffer *drm_fb;
-	int ret;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 	if (dev_priv->fbdev && dev_priv->fbdev->helper.fb) {
@@ -1551,7 +1466,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 		seq_putc(m, '\n');
 	}
 	mutex_unlock(&dev->mode_config.fb_lock);
-	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
@@ -1564,23 +1478,20 @@ static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
 
 static int i915_context_status(struct seq_file *m, void *unused)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct i915_gem_context *ctx;
-	int ret;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+	struct i915_gem_context *ctx, *cn;
 
-	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
+	spin_lock(&i915->gem.contexts.lock);
+	list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) {
 		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
+		if (!kref_get_unless_zero(&ctx->ref))
+			continue;
+
+		spin_unlock(&i915->gem.contexts.lock);
+
 		seq_puts(m, "HW context ");
-		if (!list_empty(&ctx->hw_id_link))
-			seq_printf(m, "%x [pin %u]", ctx->hw_id,
-				   atomic_read(&ctx->hw_id_pin_count));
 		if (ctx->pid) {
 			struct task_struct *task;
 
@@ -1601,22 +1512,24 @@ static int i915_context_status(struct seq_file *m, void *unused)
 
 		for_each_gem_engine(ce,
 				    i915_gem_context_lock_engines(ctx), it) {
-			intel_context_lock_pinned(ce);
-			if (intel_context_is_pinned(ce)) {
+			if (intel_context_pin_if_active(ce)) {
 				seq_printf(m, "%s: ", ce->engine->name);
 				if (ce->state)
 					describe_obj(m, ce->state->obj);
 				describe_ctx_ring(m, ce->ring);
 				seq_putc(m, '\n');
+				intel_context_unpin(ce);
 			}
-			intel_context_unlock_pinned(ce);
 		}
 		i915_gem_context_unlock_engines(ctx);
 
 		seq_putc(m, '\n');
-	}
 
-	mutex_unlock(&dev->struct_mutex);
+		spin_lock(&i915->gem.contexts.lock);
+		list_safe_reset_next(ctx, cn, link);
+		i915_gem_context_put(ctx);
+	}
+	spin_unlock(&i915->gem.contexts.lock);
 
 	return 0;
 }
@@ -1654,9 +1567,9 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
-		   swizzle_string(dev_priv->mm.bit_6_swizzle_x));
+		   swizzle_string(dev_priv->ggtt.bit_6_swizzle_x));
 	seq_printf(m, "bit6 swizzle for Y-tiling = %s\n",
-		   swizzle_string(dev_priv->mm.bit_6_swizzle_y));
+		   swizzle_string(dev_priv->ggtt.bit_6_swizzle_y));
 
 	if (IS_GEN_RANGE(dev_priv, 3, 4)) {
 		seq_printf(m, "DDC = 0x%08x\n",
@@ -1711,22 +1624,7 @@ static const char *rps_power_to_str(unsigned int power)
 static int i915_rps_boost_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 act_freq = rps->cur_freq;
-	intel_wakeref_t wakeref;
-
-	with_intel_runtime_pm_if_in_use(&dev_priv->runtime_pm, wakeref) {
-		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-			vlv_punit_get(dev_priv);
-			act_freq = vlv_punit_read(dev_priv,
-						  PUNIT_REG_GPU_FREQ_STS);
-			vlv_punit_put(dev_priv);
-			act_freq = (act_freq >> 8) & 0xff;
-		} else {
-			act_freq = intel_get_cagf(dev_priv,
-						  I915_READ(GEN6_RPSTAT1));
-		}
-	}
+	struct intel_rps *rps = &dev_priv->gt.rps;
 
 	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
 	seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake));
@@ -1734,17 +1632,17 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 		   atomic_read(&rps->num_waiters));
 	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
 	seq_printf(m, "Frequency requested %d, actual %d\n",
-		   intel_gpu_freq(dev_priv, rps->cur_freq),
-		   intel_gpu_freq(dev_priv, act_freq));
+		   intel_gpu_freq(rps, rps->cur_freq),
+		   intel_rps_read_actual_frequency(rps));
 	seq_printf(m, "  min hard:%d, soft:%d; max soft:%d, hard:%d\n",
-		   intel_gpu_freq(dev_priv, rps->min_freq),
-		   intel_gpu_freq(dev_priv, rps->min_freq_softlimit),
-		   intel_gpu_freq(dev_priv, rps->max_freq_softlimit),
-		   intel_gpu_freq(dev_priv, rps->max_freq));
+		   intel_gpu_freq(rps, rps->min_freq),
+		   intel_gpu_freq(rps, rps->min_freq_softlimit),
+		   intel_gpu_freq(rps, rps->max_freq_softlimit),
+		   intel_gpu_freq(rps, rps->max_freq));
 	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
-		   intel_gpu_freq(dev_priv, rps->idle_freq),
-		   intel_gpu_freq(dev_priv, rps->efficient_freq),
-		   intel_gpu_freq(dev_priv, rps->boost_freq));
+		   intel_gpu_freq(rps, rps->idle_freq),
+		   intel_gpu_freq(rps, rps->efficient_freq),
+		   intel_gpu_freq(rps, rps->boost_freq));
 
 	seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
 
@@ -1860,8 +1758,8 @@ static void i915_guc_log_info(struct seq_file *m,
 	struct intel_guc_log *log = &dev_priv->gt.uc.guc.log;
 	enum guc_log_buffer_type type;
 
-	if (!intel_guc_log_relay_enabled(log)) {
-		seq_puts(m, "GuC log relay disabled\n");
+	if (!intel_guc_log_relay_created(log)) {
+		seq_puts(m, "GuC log relay not created\n");
 		return;
 	}
 
@@ -1881,30 +1779,12 @@ static void i915_guc_log_info(struct seq_file *m,
 static int i915_guc_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	const struct intel_guc *guc = &dev_priv->gt.uc.guc;
-	struct intel_guc_client *client = guc->execbuf_client;
 
 	if (!USES_GUC(dev_priv))
 		return -ENODEV;
 
 	i915_guc_log_info(m, dev_priv);
 
-	if (!USES_GUC_SUBMISSION(dev_priv))
-		return 0;
-
-	GEM_BUG_ON(!guc->execbuf_client);
-
-	seq_printf(m, "\nDoorbell map:\n");
-	seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
-	seq_printf(m, "Doorbell next cacheline: 0x%x\n", guc->db_cacheline);
-
-	seq_printf(m, "\nGuC execbuf client @ %p:\n", client);
-	seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n",
-		   client->priority,
-		   client->stage_id,
-		   client->proc_desc_offset);
-	seq_printf(m, "\tDoorbell id %d, offset: 0x%lx\n",
-		   client->doorbell_id, client->doorbell_offset);
 	/* Add more as required ... */
 
 	return 0;
@@ -2048,9 +1928,23 @@ i915_guc_log_relay_write(struct file *filp,
 			 loff_t *ppos)
 {
 	struct intel_guc_log *log = filp->private_data;
+	int val;
+	int ret;
 
-	intel_guc_log_relay_flush(log);
-	return cnt;
+	ret = kstrtoint_from_user(ubuf, cnt, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Enable and start the guc log relay on value of 1.
+	 * Flush log relay for any other value.
+	 */
+	if (val == 1)
+		ret = intel_guc_log_relay_start(log);
+	else
+		intel_guc_log_relay_flush(log);
+
+	return ret ?: cnt;
 }
 
 static int i915_guc_log_relay_release(struct inode *inode, struct file *file)
@@ -2085,7 +1979,7 @@ static int i915_psr_sink_status_show(struct seq_file *m, void *data)
 	struct drm_connector *connector = m->private;
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_dp *intel_dp =
-		enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+		enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector)));
 	int ret;
 
 	if (!CAN_PSR(dev_priv)) {
@@ -2133,7 +2027,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m)
 			"BUF_ON",
 			"TG_ON"
 		};
-		val = I915_READ(EDP_PSR2_STATUS);
+		val = I915_READ(EDP_PSR2_STATUS(dev_priv->psr.transcoder));
 		status_val = (val & EDP_PSR2_STATUS_STATE_MASK) >>
 			      EDP_PSR2_STATUS_STATE_SHIFT;
 		if (status_val < ARRAY_SIZE(live_status))
@@ -2149,7 +2043,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m)
 			"SRDOFFACK",
 			"SRDENT_ON",
 		};
-		val = I915_READ(EDP_PSR_STATUS);
+		val = I915_READ(EDP_PSR_STATUS(dev_priv->psr.transcoder));
 		status_val = (val & EDP_PSR_STATUS_STATE_MASK) >>
 			      EDP_PSR_STATUS_STATE_SHIFT;
 		if (status_val < ARRAY_SIZE(live_status))
@@ -2188,14 +2082,18 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 		status = "disabled";
 	seq_printf(m, "PSR mode: %s\n", status);
 
-	if (!psr->enabled)
+	if (!psr->enabled) {
+		seq_printf(m, "PSR sink not reliable: %s\n",
+			   yesno(psr->sink_not_reliable));
+
 		goto unlock;
+	}
 
 	if (psr->psr2_enabled) {
-		val = I915_READ(EDP_PSR2_CTL);
+		val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder));
 		enabled = val & EDP_PSR2_ENABLE;
 	} else {
-		val = I915_READ(EDP_PSR_CTL);
+		val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder));
 		enabled = val & EDP_PSR_ENABLE;
 	}
 	seq_printf(m, "Source PSR ctl: %s [0x%08x]\n",
@@ -2208,7 +2106,8 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 	 * SKL+ Perf counter is reset to 0 everytime DC state is entered
 	 */
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
-		val = I915_READ(EDP_PSR_PERF_CNT) & EDP_PSR_PERF_CNT_MASK;
+		val = I915_READ(EDP_PSR_PERF_CNT(dev_priv->psr.transcoder));
+		val &= EDP_PSR_PERF_CNT_MASK;
 		seq_printf(m, "Performance counter: %u\n", val);
 	}
 
@@ -2226,8 +2125,11 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 		 * Reading all 3 registers before hand to minimize crossing a
 		 * frame boundary between register reads
 		 */
-		for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame += 3)
-			su_frames_val[frame / 3] = I915_READ(PSR2_SU_STATUS(frame));
+		for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame += 3) {
+			val = I915_READ(PSR2_SU_STATUS(dev_priv->psr.transcoder,
+						       frame));
+			su_frames_val[frame / 3] = val;
+		}
 
 		seq_puts(m, "Frame:\tPSR2 SU blocks:\n");
 
@@ -2360,8 +2262,7 @@ static int i915_power_domain_info(struct seq_file *m, void *unused)
 
 		for_each_power_domain(power_domain, power_well->desc->domains)
 			seq_printf(m, "  %-23s %d\n",
-				 intel_display_power_domain_str(dev_priv,
-								power_domain),
+				 intel_display_power_domain_str(power_domain),
 				 power_domains->domain_use_count[power_domain]);
 	}
 
@@ -2396,6 +2297,13 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
 	if (INTEL_GEN(dev_priv) >= 12) {
 		dc5_reg = TGL_DMC_DEBUG_DC5_COUNT;
 		dc6_reg = TGL_DMC_DEBUG_DC6_COUNT;
+		/*
+		 * NOTE: DMC_DEBUG3 is a general purpose reg.
+		 * According to B.Specs:49196 DMC f/w reuses DC5/6 counter
+		 * reg for DC3CO debugging and validation,
+		 * but TGL DMC f/w is using DMC_DEBUG3 reg for DC3CO counter.
+		 */
+		seq_printf(m, "DC3CO count: %d\n", I915_READ(DMC_DEBUG3));
 	} else {
 		dc5_reg = IS_BROXTON(dev_priv) ? BXT_CSR_DC3_DC5_COUNT :
 						 SKL_CSR_DC3_DC5_COUNT;
@@ -2418,7 +2326,7 @@ out:
 }
 
 static void intel_seq_print_mode(struct seq_file *m, int tabs,
-				 struct drm_display_mode *mode)
+				 const struct drm_display_mode *mode)
 {
 	int i;
 
@@ -2429,59 +2337,35 @@ static void intel_seq_print_mode(struct seq_file *m, int tabs,
 }
 
 static void intel_encoder_info(struct seq_file *m,
-			       struct intel_crtc *intel_crtc,
-			       struct intel_encoder *intel_encoder)
+			       struct intel_crtc *crtc,
+			       struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct drm_crtc *crtc = &intel_crtc->base;
-	struct intel_connector *intel_connector;
-	struct drm_encoder *encoder;
-
-	encoder = &intel_encoder->base;
-	seq_printf(m, "\tencoder %d: type: %s, connectors:\n",
-		   encoder->base.id, encoder->name);
-	for_each_connector_on_encoder(dev, encoder, intel_connector) {
-		struct drm_connector *connector = &intel_connector->base;
-		seq_printf(m, "\t\tconnector %d: type: %s, status: %s",
-			   connector->base.id,
-			   connector->name,
-			   drm_get_connector_status_name(connector->status));
-		if (connector->status == connector_status_connected) {
-			struct drm_display_mode *mode = &crtc->mode;
-			seq_printf(m, ", mode:\n");
-			intel_seq_print_mode(m, 2, mode);
-		} else {
-			seq_putc(m, '\n');
-		}
-	}
-}
+	struct drm_connector_list_iter conn_iter;
+	struct drm_connector *connector;
 
-static void intel_crtc_info(struct seq_file *m, struct intel_crtc *intel_crtc)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct drm_crtc *crtc = &intel_crtc->base;
-	struct intel_encoder *intel_encoder;
-	struct drm_plane_state *plane_state = crtc->primary->state;
-	struct drm_framebuffer *fb = plane_state->fb;
+	seq_printf(m, "\t[ENCODER:%d:%s]: connectors:\n",
+		   encoder->base.base.id, encoder->base.name);
 
-	if (fb)
-		seq_printf(m, "\tfb: %d, pos: %dx%d, size: %dx%d\n",
-			   fb->base.id, plane_state->src_x >> 16,
-			   plane_state->src_y >> 16, fb->width, fb->height);
-	else
-		seq_puts(m, "\tprimary plane disabled\n");
-	for_each_encoder_on_crtc(dev, crtc, intel_encoder)
-		intel_encoder_info(m, intel_crtc, intel_encoder);
+	drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter);
+	drm_for_each_connector_iter(connector, &conn_iter) {
+		const struct drm_connector_state *conn_state =
+			connector->state;
+
+		if (conn_state->best_encoder != &encoder->base)
+			continue;
+
+		seq_printf(m, "\t\t[CONNECTOR:%d:%s]\n",
+			   connector->base.id, connector->name);
+	}
+	drm_connector_list_iter_end(&conn_iter);
 }
 
 static void intel_panel_info(struct seq_file *m, struct intel_panel *panel)
 {
-	struct drm_display_mode *mode = panel->fixed_mode;
+	const struct drm_display_mode *mode = panel->fixed_mode;
 
-	seq_printf(m, "\tfixed mode:\n");
-	intel_seq_print_mode(m, 2, mode);
+	seq_printf(m, "\tfixed mode: " DRM_MODE_FMT "\n", DRM_MODE_ARG(mode));
 }
 
 static void intel_hdcp_info(struct seq_file *m,
@@ -2507,7 +2391,7 @@ static void intel_dp_info(struct seq_file *m,
 			  struct intel_connector *intel_connector)
 {
 	struct intel_encoder *intel_encoder = intel_connector->encoder;
-	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder);
 
 	seq_printf(m, "\tDPCD rev: %x\n", intel_dp->dpcd[DP_DPCD_REV]);
 	seq_printf(m, "\taudio support: %s\n", yesno(intel_dp->has_audio));
@@ -2527,7 +2411,7 @@ static void intel_dp_mst_info(struct seq_file *m,
 {
 	struct intel_encoder *intel_encoder = intel_connector->encoder;
 	struct intel_dp_mst_encoder *intel_mst =
-		enc_to_mst(&intel_encoder->base);
+		enc_to_mst(intel_encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	bool has_audio = drm_dp_mst_port_has_audio(&intel_dp->mst_mgr,
@@ -2540,7 +2424,7 @@ static void intel_hdmi_info(struct seq_file *m,
 			    struct intel_connector *intel_connector)
 {
 	struct intel_encoder *intel_encoder = intel_connector->encoder;
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&intel_encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(intel_encoder);
 
 	seq_printf(m, "\taudio support: %s\n", yesno(intel_hdmi->has_audio));
 	if (intel_connector->hdcp.shim) {
@@ -2559,10 +2443,12 @@ static void intel_connector_info(struct seq_file *m,
 				 struct drm_connector *connector)
 {
 	struct intel_connector *intel_connector = to_intel_connector(connector);
-	struct intel_encoder *intel_encoder = intel_connector->encoder;
-	struct drm_display_mode *mode;
+	const struct drm_connector_state *conn_state = connector->state;
+	struct intel_encoder *encoder =
+		to_intel_encoder(conn_state->best_encoder);
+	const struct drm_display_mode *mode;
 
-	seq_printf(m, "connector %d: type %s, status: %s\n",
+	seq_printf(m, "[CONNECTOR:%d:%s]: status: %s\n",
 		   connector->base.id, connector->name,
 		   drm_get_connector_status_name(connector->status));
 
@@ -2576,24 +2462,24 @@ static void intel_connector_info(struct seq_file *m,
 		   drm_get_subpixel_order_name(connector->display_info.subpixel_order));
 	seq_printf(m, "\tCEA rev: %d\n", connector->display_info.cea_rev);
 
-	if (!intel_encoder)
+	if (!encoder)
 		return;
 
 	switch (connector->connector_type) {
 	case DRM_MODE_CONNECTOR_DisplayPort:
 	case DRM_MODE_CONNECTOR_eDP:
-		if (intel_encoder->type == INTEL_OUTPUT_DP_MST)
+		if (encoder->type == INTEL_OUTPUT_DP_MST)
 			intel_dp_mst_info(m, intel_connector);
 		else
 			intel_dp_info(m, intel_connector);
 		break;
 	case DRM_MODE_CONNECTOR_LVDS:
-		if (intel_encoder->type == INTEL_OUTPUT_LVDS)
+		if (encoder->type == INTEL_OUTPUT_LVDS)
 			intel_lvds_info(m, intel_connector);
 		break;
 	case DRM_MODE_CONNECTOR_HDMIA:
-		if (intel_encoder->type == INTEL_OUTPUT_HDMI ||
-		    intel_encoder->type == INTEL_OUTPUT_DDI)
+		if (encoder->type == INTEL_OUTPUT_HDMI ||
+		    encoder->type == INTEL_OUTPUT_DDI)
 			intel_hdmi_info(m, intel_connector);
 		break;
 	default:
@@ -2640,70 +2526,88 @@ static void plane_rotation(char *buf, size_t bufsize, unsigned int rotation)
 		 rotation);
 }
 
-static void intel_plane_info(struct seq_file *m, struct intel_crtc *intel_crtc)
+static void intel_plane_uapi_info(struct seq_file *m, struct intel_plane *plane)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct intel_plane *intel_plane;
+	const struct intel_plane_state *plane_state =
+		to_intel_plane_state(plane->base.state);
+	const struct drm_framebuffer *fb = plane_state->uapi.fb;
+	struct drm_format_name_buf format_name;
+	struct drm_rect src, dst;
+	char rot_str[48];
 
-	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
-		struct drm_plane_state *state;
-		struct drm_plane *plane = &intel_plane->base;
-		struct drm_format_name_buf format_name;
-		char rot_str[48];
+	src = drm_plane_state_src(&plane_state->uapi);
+	dst = drm_plane_state_dest(&plane_state->uapi);
 
-		if (!plane->state) {
-			seq_puts(m, "plane->state is NULL!\n");
-			continue;
-		}
+	if (fb)
+		drm_get_format_name(fb->format->format, &format_name);
 
-		state = plane->state;
+	plane_rotation(rot_str, sizeof(rot_str),
+		       plane_state->uapi.rotation);
 
-		if (state->fb) {
-			drm_get_format_name(state->fb->format->format,
-					    &format_name);
-		} else {
-			sprintf(format_name.str, "N/A");
-		}
+	seq_printf(m, "\t\tuapi: fb=%d,%s,%dx%d, src=" DRM_RECT_FP_FMT ", dst=" DRM_RECT_FMT ", rotation=%s\n",
+		   fb ? fb->base.id : 0, fb ? format_name.str : "n/a",
+		   fb ? fb->width : 0, fb ? fb->height : 0,
+		   DRM_RECT_FP_ARG(&src),
+		   DRM_RECT_ARG(&dst),
+		   rot_str);
+}
+
+static void intel_plane_hw_info(struct seq_file *m, struct intel_plane *plane)
+{
+	const struct intel_plane_state *plane_state =
+		to_intel_plane_state(plane->base.state);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	struct drm_format_name_buf format_name;
+	char rot_str[48];
 
-		plane_rotation(rot_str, sizeof(rot_str), state->rotation);
-
-		seq_printf(m, "\t--Plane id %d: type=%s, crtc_pos=%4dx%4d, crtc_size=%4dx%4d, src_pos=%d.%04ux%d.%04u, src_size=%d.%04ux%d.%04u, format=%s, rotation=%s\n",
-			   plane->base.id,
-			   plane_type(intel_plane->base.type),
-			   state->crtc_x, state->crtc_y,
-			   state->crtc_w, state->crtc_h,
-			   (state->src_x >> 16),
-			   ((state->src_x & 0xffff) * 15625) >> 10,
-			   (state->src_y >> 16),
-			   ((state->src_y & 0xffff) * 15625) >> 10,
-			   (state->src_w >> 16),
-			   ((state->src_w & 0xffff) * 15625) >> 10,
-			   (state->src_h >> 16),
-			   ((state->src_h & 0xffff) * 15625) >> 10,
-			   format_name.str,
-			   rot_str);
+	if (!fb)
+		return;
+
+	drm_get_format_name(fb->format->format, &format_name);
+
+	plane_rotation(rot_str, sizeof(rot_str),
+		       plane_state->hw.rotation);
+
+	seq_printf(m, "\t\thw: fb=%d,%s,%dx%d, visible=%s, src=" DRM_RECT_FP_FMT ", dst=" DRM_RECT_FMT ", rotation=%s\n",
+		   fb->base.id, format_name.str,
+		   fb->width, fb->height,
+		   yesno(plane_state->uapi.visible),
+		   DRM_RECT_FP_ARG(&plane_state->uapi.src),
+		   DRM_RECT_ARG(&plane_state->uapi.dst),
+		   rot_str);
+}
+
+static void intel_plane_info(struct seq_file *m, struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	struct intel_plane *plane;
+
+	for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+		seq_printf(m, "\t[PLANE:%d:%s]: type=%s\n",
+			   plane->base.base.id, plane->base.name,
+			   plane_type(plane->base.type));
+		intel_plane_uapi_info(m, plane);
+		intel_plane_hw_info(m, plane);
 	}
 }
 
-static void intel_scaler_info(struct seq_file *m, struct intel_crtc *intel_crtc)
+static void intel_scaler_info(struct seq_file *m, struct intel_crtc *crtc)
 {
-	struct intel_crtc_state *pipe_config;
-	int num_scalers = intel_crtc->num_scalers;
+	const struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
+	int num_scalers = crtc->num_scalers;
 	int i;
 
-	pipe_config = to_intel_crtc_state(intel_crtc->base.state);
-
 	/* Not all platformas have a scaler */
 	if (num_scalers) {
 		seq_printf(m, "\tnum_scalers=%d, scaler_users=%x scaler_id=%d",
 			   num_scalers,
-			   pipe_config->scaler_state.scaler_users,
-			   pipe_config->scaler_state.scaler_id);
+			   crtc_state->scaler_state.scaler_users,
+			   crtc_state->scaler_state.scaler_id);
 
 		for (i = 0; i < num_scalers; i++) {
-			struct intel_scaler *sc =
-					&pipe_config->scaler_state.scalers[i];
+			const struct intel_scaler *sc =
+				&crtc_state->scaler_state.scalers[i];
 
 			seq_printf(m, ", scalers[%d]: use=%s, mode=%x",
 				   i, yesno(sc->in_use), sc->mode);
@@ -2714,6 +2618,44 @@ static void intel_scaler_info(struct seq_file *m, struct intel_crtc *intel_crtc)
 	}
 }
 
+static void intel_crtc_info(struct seq_file *m, struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	const struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
+	struct intel_encoder *encoder;
+
+	seq_printf(m, "[CRTC:%d:%s]:\n",
+		   crtc->base.base.id, crtc->base.name);
+
+	seq_printf(m, "\tuapi: enable=%s, active=%s, mode=" DRM_MODE_FMT "\n",
+		   yesno(crtc_state->uapi.enable),
+		   yesno(crtc_state->uapi.active),
+		   DRM_MODE_ARG(&crtc_state->uapi.mode));
+
+	if (crtc_state->hw.enable) {
+		seq_printf(m, "\thw: active=%s, adjusted_mode=" DRM_MODE_FMT "\n",
+			   yesno(crtc_state->hw.active),
+			   DRM_MODE_ARG(&crtc_state->hw.adjusted_mode));
+
+		seq_printf(m, "\tpipe src size=%dx%d, dither=%s, bpp=%d\n",
+			   crtc_state->pipe_src_w, crtc_state->pipe_src_h,
+			   yesno(crtc_state->dither), crtc_state->pipe_bpp);
+
+		intel_scaler_info(m, crtc);
+	}
+
+	for_each_intel_encoder_mask(&dev_priv->drm, encoder,
+				    crtc_state->uapi.encoder_mask)
+		intel_encoder_info(m, crtc, encoder);
+
+	intel_plane_info(m, crtc);
+
+	seq_printf(m, "\tunderrun reporting: cpu=%s pch=%s\n",
+		   yesno(!crtc->cpu_fifo_underrun_disabled),
+		   yesno(!crtc->pch_fifo_underrun_disabled));
+}
+
 static int i915_display_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -2725,52 +2667,22 @@ static int i915_display_info(struct seq_file *m, void *unused)
 
 	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
+	drm_modeset_lock_all(dev);
+
 	seq_printf(m, "CRTC info\n");
 	seq_printf(m, "---------\n");
-	for_each_intel_crtc(dev, crtc) {
-		struct intel_crtc_state *pipe_config;
-
-		drm_modeset_lock(&crtc->base.mutex, NULL);
-		pipe_config = to_intel_crtc_state(crtc->base.state);
-
-		seq_printf(m, "CRTC %d: pipe: %c, active=%s, (size=%dx%d), dither=%s, bpp=%d\n",
-			   crtc->base.base.id, pipe_name(crtc->pipe),
-			   yesno(pipe_config->base.active),
-			   pipe_config->pipe_src_w, pipe_config->pipe_src_h,
-			   yesno(pipe_config->dither), pipe_config->pipe_bpp);
-
-		if (pipe_config->base.active) {
-			struct intel_plane *cursor =
-				to_intel_plane(crtc->base.cursor);
-
-			intel_crtc_info(m, crtc);
-
-			seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x\n",
-				   yesno(cursor->base.state->visible),
-				   cursor->base.state->crtc_x,
-				   cursor->base.state->crtc_y,
-				   cursor->base.state->crtc_w,
-				   cursor->base.state->crtc_h,
-				   cursor->cursor.base);
-			intel_scaler_info(m, crtc);
-			intel_plane_info(m, crtc);
-		}
-
-		seq_printf(m, "\tunderrun reporting: cpu=%s pch=%s \n",
-			   yesno(!crtc->cpu_fifo_underrun_disabled),
-			   yesno(!crtc->pch_fifo_underrun_disabled));
-		drm_modeset_unlock(&crtc->base.mutex);
-	}
+	for_each_intel_crtc(dev, crtc)
+		intel_crtc_info(m, crtc);
 
 	seq_printf(m, "\n");
 	seq_printf(m, "Connector info\n");
 	seq_printf(m, "--------------\n");
-	mutex_lock(&dev->mode_config.mutex);
 	drm_connector_list_iter_begin(dev, &conn_iter);
 	drm_for_each_connector_iter(connector, &conn_iter)
 		intel_connector_info(m, connector);
 	drm_connector_list_iter_end(&conn_iter);
-	mutex_unlock(&dev->mode_config.mutex);
+
+	drm_modeset_unlock_all(dev);
 
 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
@@ -2806,7 +2718,7 @@ static int i915_rcs_topology(struct seq_file *m, void *unused)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct drm_printer p = drm_seq_file_printer(m);
 
-	intel_device_info_dump_topology(&RUNTIME_INFO(dev_priv)->sseu, &p);
+	intel_device_info_print_topology(&RUNTIME_INFO(dev_priv)->sseu, &p);
 
 	return 0;
 }
@@ -3102,16 +3014,17 @@ static int i915_dp_mst_info(struct seq_file *m, void *unused)
 		if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort)
 			continue;
 
-		intel_encoder = intel_attached_encoder(connector);
+		intel_encoder = intel_attached_encoder(to_intel_connector(connector));
 		if (!intel_encoder || intel_encoder->type == INTEL_OUTPUT_DP_MST)
 			continue;
 
-		intel_dig_port = enc_to_dig_port(&intel_encoder->base);
+		intel_dig_port = enc_to_dig_port(intel_encoder);
 		if (!intel_dig_port->dp.can_mst)
 			continue;
 
-		seq_printf(m, "MST Source Port %c\n",
-			   port_name(intel_dig_port->base.port));
+		seq_printf(m, "MST Source Port [ENCODER:%d:%s]\n",
+			   intel_dig_port->base.base.base.id,
+			   intel_dig_port->base.base.name);
 		drm_dp_mst_dump_topology(m, &intel_dig_port->dp.mst_mgr);
 	}
 	drm_connector_list_iter_end(&conn_iter);
@@ -3155,7 +3068,7 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
 			continue;
 
 		if (encoder && connector->status == connector_status_connected) {
-			intel_dp = enc_to_intel_dp(&encoder->base);
+			intel_dp = enc_to_intel_dp(encoder);
 			status = kstrtoint(input_buffer, 10, &val);
 			if (status < 0)
 				break;
@@ -3164,9 +3077,9 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
 			 * testing code, only accept an actual value of 1 here
 			 */
 			if (val == 1)
-				intel_dp->compliance.test_active = 1;
+				intel_dp->compliance.test_active = true;
 			else
-				intel_dp->compliance.test_active = 0;
+				intel_dp->compliance.test_active = false;
 		}
 	}
 	drm_connector_list_iter_end(&conn_iter);
@@ -3199,7 +3112,7 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data)
 			continue;
 
 		if (encoder && connector->status == connector_status_connected) {
-			intel_dp = enc_to_intel_dp(&encoder->base);
+			intel_dp = enc_to_intel_dp(encoder);
 			if (intel_dp->compliance.test_active)
 				seq_puts(m, "1");
 			else
@@ -3249,7 +3162,7 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data)
 			continue;
 
 		if (encoder && connector->status == connector_status_connected) {
-			intel_dp = enc_to_intel_dp(&encoder->base);
+			intel_dp = enc_to_intel_dp(encoder);
 			if (intel_dp->compliance.test_type ==
 			    DP_TEST_LINK_EDID_READ)
 				seq_printf(m, "%lx",
@@ -3293,7 +3206,7 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data)
 			continue;
 
 		if (encoder && connector->status == connector_status_connected) {
-			intel_dp = enc_to_intel_dp(&encoder->base);
+			intel_dp = enc_to_intel_dp(encoder);
 			seq_printf(m, "%02lx", intel_dp->compliance.test_type);
 		} else
 			seq_puts(m, "0");
@@ -3573,6 +3486,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
 			i915_wedged_get, i915_wedged_set,
 			"%llu\n");
 
+static int
+i915_perf_noa_delay_set(void *data, u64 val)
+{
+	struct drm_i915_private *i915 = data;
+	const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
+
+	/*
+	 * This would lead to infinite waits as we're doing timestamp
+	 * difference on the CS with only 32bits.
+	 */
+	if (val > mul_u32_u32(U32_MAX, clk))
+		return -EINVAL;
+
+	atomic64_set(&i915->perf.noa_programming_delay, val);
+	return 0;
+}
+
+static int
+i915_perf_noa_delay_get(void *data, u64 *val)
+{
+	struct drm_i915_private *i915 = data;
+
+	*val = atomic64_read(&i915->perf.noa_programming_delay);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
+			i915_perf_noa_delay_get,
+			i915_perf_noa_delay_set,
+			"%llu\n");
+
 #define DROP_UNBOUND	BIT(0)
 #define DROP_BOUND	BIT(1)
 #define DROP_RETIRE	BIT(2)
@@ -3582,6 +3526,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
 #define DROP_IDLE	BIT(6)
 #define DROP_RESET_ACTIVE	BIT(7)
 #define DROP_RESET_SEQNO	BIT(8)
+#define DROP_RCU	BIT(9)
 #define DROP_ALL (DROP_UNBOUND	| \
 		  DROP_BOUND	| \
 		  DROP_RETIRE	| \
@@ -3590,7 +3535,8 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
 		  DROP_SHRINK_ALL |\
 		  DROP_IDLE	| \
 		  DROP_RESET_ACTIVE | \
-		  DROP_RESET_SEQNO)
+		  DROP_RESET_SEQNO | \
+		  DROP_RCU)
 static int
 i915_drop_caches_get(void *data, u64 *val)
 {
@@ -3598,58 +3544,48 @@ i915_drop_caches_get(void *data, u64 *val)
 
 	return 0;
 }
-
 static int
-i915_drop_caches_set(void *data, u64 val)
+gt_drop_caches(struct intel_gt *gt, u64 val)
 {
-	struct drm_i915_private *i915 = data;
-
-	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
-		  val, val & DROP_ALL);
+	int ret;
 
 	if (val & DROP_RESET_ACTIVE &&
-	    wait_for(intel_engines_are_idle(&i915->gt),
-		     I915_IDLE_ENGINES_TIMEOUT))
-		intel_gt_set_wedged(&i915->gt);
+	    wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT))
+		intel_gt_set_wedged(gt);
 
-	/* No need to check and wait for gpu resets, only libdrm auto-restarts
-	 * on ioctls on -EAGAIN. */
-	if (val & (DROP_ACTIVE | DROP_IDLE | DROP_RETIRE | DROP_RESET_SEQNO)) {
-		int ret;
+	if (val & DROP_RETIRE)
+		intel_gt_retire_requests(gt);
 
-		ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+	if (val & (DROP_IDLE | DROP_ACTIVE)) {
+		ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
+	}
 
-		/*
-		 * To finish the flush of the idle_worker, we must complete
-		 * the switch-to-kernel-context, which requires a double
-		 * pass through wait_for_idle: first queues the switch,
-		 * second waits for the switch.
-		 */
-		if (ret == 0 && val & (DROP_IDLE | DROP_ACTIVE))
-			ret = i915_gem_wait_for_idle(i915,
-						     I915_WAIT_INTERRUPTIBLE |
-						     I915_WAIT_LOCKED,
-						     MAX_SCHEDULE_TIMEOUT);
+	if (val & DROP_IDLE) {
+		ret = intel_gt_pm_wait_for_idle(gt);
+		if (ret)
+			return ret;
+	}
 
-		if (ret == 0 && val & DROP_IDLE)
-			ret = i915_gem_wait_for_idle(i915,
-						     I915_WAIT_INTERRUPTIBLE |
-						     I915_WAIT_LOCKED,
-						     MAX_SCHEDULE_TIMEOUT);
+	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt))
+		intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL);
 
-		if (val & DROP_RETIRE)
-			i915_retire_requests(i915);
+	return 0;
+}
 
-		mutex_unlock(&i915->drm.struct_mutex);
+static int
+i915_drop_caches_set(void *data, u64 val)
+{
+	struct drm_i915_private *i915 = data;
+	int ret;
 
-		if (ret == 0 && val & DROP_IDLE)
-			ret = intel_gt_pm_wait_for_idle(&i915->gt);
-	}
+	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
+		  val, val & DROP_ALL);
 
-	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt))
-		intel_gt_handle_error(&i915->gt, ALL_ENGINES, 0, NULL);
+	ret = gt_drop_caches(&i915->gt, val);
+	if (ret)
+		return ret;
 
 	fs_reclaim_acquire(GFP_KERNEL);
 	if (val & DROP_BOUND)
@@ -3662,10 +3598,8 @@ i915_drop_caches_set(void *data, u64 val)
 		i915_gem_shrink_all(i915);
 	fs_reclaim_release(GFP_KERNEL);
 
-	if (val & DROP_IDLE) {
-		flush_delayed_work(&i915->gem.retire_work);
-		flush_work(&i915->gem.idle_work);
-	}
+	if (val & DROP_RCU)
+		rcu_barrier();
 
 	if (val & DROP_FREED)
 		i915_gem_drain_freed_objects(i915);
@@ -3721,6 +3655,15 @@ i915_cache_sharing_set(void *data, u64 val)
 	return 0;
 }
 
+static void
+intel_sseu_copy_subslices(const struct sseu_dev_info *sseu, int slice,
+			  u8 *to_mask)
+{
+	int offset = slice * sseu->ss_stride;
+
+	memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride);
+}
+
 DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops,
 			i915_cache_sharing_get, i915_cache_sharing_set,
 			"%llu\n");
@@ -3794,12 +3737,13 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 			continue;
 
 		sseu->slice_mask |= BIT(s);
-		sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
+		intel_sseu_copy_subslices(&info->sseu, s, sseu->subslice_mask);
 
 		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
 
-			if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
+			if (info->sseu.has_subslice_pg &&
+			    !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
 				/* skip disabled subslice */
 				continue;
 
@@ -3845,18 +3789,21 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 		sseu->slice_mask |= BIT(s);
 
 		if (IS_GEN9_BC(dev_priv))
-			sseu->subslice_mask[s] =
-				RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s];
+			intel_sseu_copy_subslices(&info->sseu, s,
+						  sseu->subslice_mask);
 
 		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
+			u8 ss_idx = s * info->sseu.ss_stride +
+				    ss / BITS_PER_BYTE;
 
 			if (IS_GEN9_LP(dev_priv)) {
 				if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
 					/* skip disabled subslice */
 					continue;
 
-				sseu->subslice_mask[s] |= BIT(ss);
+				sseu->subslice_mask[ss_idx] |=
+					BIT(ss % BITS_PER_BYTE);
 			}
 
 			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
@@ -3870,28 +3817,26 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 #undef SS_MAX
 }
 
-static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
-					 struct sseu_dev_info *sseu)
+static void bdw_sseu_device_status(struct drm_i915_private *dev_priv,
+				   struct sseu_dev_info *sseu)
 {
+	const struct intel_runtime_info *info = RUNTIME_INFO(dev_priv);
 	u32 slice_info = I915_READ(GEN8_GT_SLICE_INFO);
 	int s;
 
 	sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
 
 	if (sseu->slice_mask) {
-		sseu->eu_per_subslice =
-			RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice;
-		for (s = 0; s < fls(sseu->slice_mask); s++) {
-			sseu->subslice_mask[s] =
-				RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s];
-		}
+		sseu->eu_per_subslice = info->sseu.eu_per_subslice;
+		for (s = 0; s < fls(sseu->slice_mask); s++)
+			intel_sseu_copy_subslices(&info->sseu, s,
+						  sseu->subslice_mask);
 		sseu->eu_total = sseu->eu_per_subslice *
 				 intel_sseu_subslice_total(sseu);
 
 		/* subtract fused off EU(s) from enabled slice(s) */
 		for (s = 0; s < fls(sseu->slice_mask); s++) {
-			u8 subslice_7eu =
-				RUNTIME_INFO(dev_priv)->sseu.subslice_7eu[s];
+			u8 subslice_7eu = info->sseu.subslice_7eu[s];
 
 			sseu->eu_total -= hweight8(subslice_7eu);
 		}
@@ -3938,6 +3883,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
 static int i915_sseu_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	const struct intel_runtime_info *info = RUNTIME_INFO(dev_priv);
 	struct sseu_dev_info sseu;
 	intel_wakeref_t wakeref;
 
@@ -3945,20 +3891,19 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 		return -ENODEV;
 
 	seq_puts(m, "SSEU Device Info\n");
-	i915_print_sseu_info(m, true, &RUNTIME_INFO(dev_priv)->sseu);
+	i915_print_sseu_info(m, true, &info->sseu);
 
 	seq_puts(m, "SSEU Device Status\n");
 	memset(&sseu, 0, sizeof(sseu));
-	sseu.max_slices = RUNTIME_INFO(dev_priv)->sseu.max_slices;
-	sseu.max_subslices = RUNTIME_INFO(dev_priv)->sseu.max_subslices;
-	sseu.max_eus_per_subslice =
-		RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice;
+	intel_sseu_set_info(&sseu, info->sseu.max_slices,
+			    info->sseu.max_subslices,
+			    info->sseu.max_eus_per_subslice);
 
 	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
 		if (IS_CHERRYVIEW(dev_priv))
 			cherryview_sseu_device_status(dev_priv, &sseu);
 		else if (IS_BROADWELL(dev_priv))
-			broadwell_sseu_device_status(dev_priv, &sseu);
+			bdw_sseu_device_status(dev_priv, &sseu);
 		else if (IS_GEN(dev_priv, 9))
 			gen9_sseu_device_status(dev_priv, &sseu);
 		else if (INTEL_GEN(dev_priv) >= 10)
@@ -3973,13 +3918,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	file->private_data =
-		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
-	intel_uncore_forcewake_user_get(&i915->uncore);
+	atomic_inc(&gt->user_wakeref);
+	intel_gt_pm_get(gt);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_get(gt->uncore);
 
 	return 0;
 }
@@ -3987,13 +3931,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	intel_uncore_forcewake_user_put(&i915->uncore);
-	intel_runtime_pm_put(&i915->runtime_pm,
-			     (intel_wakeref_t)(uintptr_t)file->private_data);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_put(&i915->uncore);
+	intel_gt_pm_put(gt);
+	atomic_dec(&gt->user_wakeref);
 
 	return 0;
 }
@@ -4181,11 +4124,11 @@ static int i915_drrs_ctl_set(void *data, u64 val)
 
 		crtc_state = to_intel_crtc_state(crtc->base.state);
 
-		if (!crtc_state->base.active ||
+		if (!crtc_state->hw.active ||
 		    !crtc_state->has_drrs)
 			goto out;
 
-		commit = crtc_state->base.commit;
+		commit = crtc_state->uapi.commit;
 		if (commit) {
 			ret = wait_for_completion_interruptible(&commit->hw_done);
 			if (ret)
@@ -4197,18 +4140,18 @@ static int i915_drrs_ctl_set(void *data, u64 val)
 			struct intel_encoder *encoder;
 			struct intel_dp *intel_dp;
 
-			if (!(crtc_state->base.connector_mask &
+			if (!(crtc_state->uapi.connector_mask &
 			      drm_connector_mask(connector)))
 				continue;
 
-			encoder = intel_attached_encoder(connector);
+			encoder = intel_attached_encoder(to_intel_connector(connector));
 			if (encoder->type != INTEL_OUTPUT_EDP)
 				continue;
 
 			DRM_DEBUG_DRIVER("Manually %sabling DRRS. %llu\n",
 						val ? "en" : "dis", val);
 
-			intel_dp = enc_to_intel_dp(&encoder->base);
+			intel_dp = enc_to_intel_dp(encoder);
 			if (val)
 				intel_edp_drrs_enable(intel_dp,
 						      crtc_state);
@@ -4256,14 +4199,14 @@ i915_fifo_underrun_reset_write(struct file *filp,
 			return ret;
 
 		crtc_state = to_intel_crtc_state(intel_crtc->base.state);
-		commit = crtc_state->base.commit;
+		commit = crtc_state->uapi.commit;
 		if (commit) {
 			ret = wait_for_completion_interruptible(&commit->hw_done);
 			if (!ret)
 				ret = wait_for_completion_interruptible(&commit->flip_done);
 		}
 
-		if (!ret && crtc_state->base.active) {
+		if (!ret && crtc_state->hw.active) {
 			DRM_DEBUG_KMS("Re-arming FIFO underruns on pipe %c\n",
 				      pipe_name(intel_crtc->pipe));
 
@@ -4302,7 +4245,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_guc_stage_pool", i915_guc_stage_pool, 0},
 	{"i915_huc_load_status", i915_huc_load_status_info, 0},
 	{"i915_frequency_info", i915_frequency_info, 0},
-	{"i915_hangcheck_info", i915_hangcheck_info, 0},
 	{"i915_drpc_info", i915_drpc_info, 0},
 	{"i915_ring_freq_table", i915_ring_freq_table, 0},
 	{"i915_frontbuffer_tracking", i915_frontbuffer_tracking, 0},
@@ -4339,6 +4281,7 @@ static const struct i915_debugfs_files {
 	const char *name;
 	const struct file_operations *fops;
 } i915_debugfs_files[] = {
+	{"i915_perf_noa_delay", &i915_perf_noa_delay_fops},
 	{"i915_wedged", &i915_wedged_fops},
 	{"i915_cache_sharing", &i915_cache_sharing_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
@@ -4412,7 +4355,7 @@ static int i915_dpcd_show(struct seq_file *m, void *data)
 {
 	struct drm_connector *connector = m->private;
 	struct intel_dp *intel_dp =
-		enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+		enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector)));
 	u8 buf[16];
 	ssize_t err;
 	int i;
@@ -4447,7 +4390,7 @@ static int i915_panel_show(struct seq_file *m, void *data)
 {
 	struct drm_connector *connector = m->private;
 	struct intel_dp *intel_dp =
-		enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+		enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector)));
 
 	if (connector->status != connector_status_connected)
 		return -ENODEV;
@@ -4525,10 +4468,10 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data)
 		} else if (ret) {
 			break;
 		}
-		intel_dp = enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+		intel_dp = enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector)));
 		crtc_state = to_intel_crtc_state(crtc->state);
 		seq_printf(m, "DSC_Enabled: %s\n",
-			   yesno(crtc_state->dsc_params.compression_enable));
+			   yesno(crtc_state->dsc.compression_enable));
 		seq_printf(m, "DSC_Sink_Support: %s\n",
 			   yesno(drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd)));
 		seq_printf(m, "Force_DSC_Enable: %s\n",
@@ -4552,8 +4495,8 @@ static ssize_t i915_dsc_fec_support_write(struct file *file,
 	int ret;
 	struct drm_connector *connector =
 		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(connector);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 	if (len == 0)
 		return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3d717e282908..f7385abdd74b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -36,7 +36,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/pnp.h>
 #include <linux/slab.h>
-#include <linux/vgaarb.h>
 #include <linux/vga_switcheroo.h>
 #include <linux/vt.h>
 #include <acpi/video.h>
@@ -54,16 +53,18 @@
 #include "display/intel_display_types.h"
 #include "display/intel_dp.h"
 #include "display/intel_fbdev.h"
-#include "display/intel_gmbus.h"
 #include "display/intel_hotplug.h"
 #include "display/intel_overlay.h"
 #include "display/intel_pipe_crc.h"
 #include "display/intel_sprite.h"
+#include "display/intel_vga.h"
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
+#include "gem/i915_gem_mman.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_rc6.h"
 
 #include "i915_debugfs.h"
 #include "i915_drv.h"
@@ -72,10 +73,12 @@
 #include "i915_perf.h"
 #include "i915_query.h"
 #include "i915_suspend.h"
+#include "i915_switcheroo.h"
 #include "i915_sysfs.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 #include "intel_csr.h"
+#include "intel_memory_region.h"
 #include "intel_pm.h"
 
 static struct drm_driver driver;
@@ -269,179 +272,97 @@ intel_teardown_mchbar(struct drm_i915_private *dev_priv)
 		release_resource(&dev_priv->mch_res);
 }
 
-/* true = enable decode, false = disable decoder */
-static unsigned int i915_vga_set_decode(void *cookie, bool state)
+static int i915_driver_modeset_probe(struct drm_i915_private *i915)
 {
-	struct drm_i915_private *dev_priv = cookie;
-
-	intel_modeset_vga_set_state(dev_priv, state);
-	if (state)
-		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
-		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
-	else
-		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
-}
-
-static int i915_resume_switcheroo(struct drm_i915_private *i915);
-static int i915_suspend_switcheroo(struct drm_i915_private *i915,
-				   pm_message_t state);
-
-static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
-{
-	struct drm_i915_private *i915 = pdev_to_i915(pdev);
-	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
-
-	if (!i915) {
-		dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n");
-		return;
-	}
-
-	if (state == VGA_SWITCHEROO_ON) {
-		pr_info("switched on\n");
-		i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING;
-		/* i915 resume handler doesn't set to D0 */
-		pci_set_power_state(pdev, PCI_D0);
-		i915_resume_switcheroo(i915);
-		i915->drm.switch_power_state = DRM_SWITCH_POWER_ON;
-	} else {
-		pr_info("switched off\n");
-		i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING;
-		i915_suspend_switcheroo(i915, pmm);
-		i915->drm.switch_power_state = DRM_SWITCH_POWER_OFF;
-	}
-}
-
-static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
-{
-	struct drm_i915_private *i915 = pdev_to_i915(pdev);
-
-	/*
-	 * FIXME: open_count is protected by drm_global_mutex but that would lead to
-	 * locking inversion with the driver load path. And the access here is
-	 * completely racy anyway. So don't bother with locking for now.
-	 */
-	return i915 && i915->drm.open_count == 0;
-}
-
-static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
-	.set_gpu_state = i915_switcheroo_set_state,
-	.reprobe = NULL,
-	.can_switch = i915_switcheroo_can_switch,
-};
-
-static int i915_driver_modeset_probe(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct pci_dev *pdev = dev_priv->drm.pdev;
 	int ret;
 
-	if (i915_inject_probe_failure(dev_priv))
+	if (i915_inject_probe_failure(i915))
 		return -ENODEV;
 
-	if (HAS_DISPLAY(dev_priv)) {
-		ret = drm_vblank_init(&dev_priv->drm,
-				      INTEL_INFO(dev_priv)->num_pipes);
+	if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) {
+		ret = drm_vblank_init(&i915->drm,
+				      INTEL_NUM_PIPES(i915));
 		if (ret)
 			goto out;
 	}
 
-	intel_bios_init(dev_priv);
+	intel_bios_init(i915);
 
-	/* If we have > 1 VGA cards, then we need to arbitrate access
-	 * to the common VGA resources.
-	 *
-	 * If we are a secondary display controller (!PCI_DISPLAY_CLASS_VGA),
-	 * then we do not take part in VGA arbitration and the
-	 * vga_client_register() fails with -ENODEV.
-	 */
-	ret = vga_client_register(pdev, dev_priv, NULL, i915_vga_set_decode);
-	if (ret && ret != -ENODEV)
+	ret = intel_vga_register(i915);
+	if (ret)
 		goto out;
 
 	intel_register_dsm_handler();
 
-	ret = vga_switcheroo_register_client(pdev, &i915_switcheroo_ops, false);
+	ret = i915_switcheroo_register(i915);
 	if (ret)
 		goto cleanup_vga_client;
 
-	intel_power_domains_init_hw(dev_priv, false);
+	intel_power_domains_init_hw(i915, false);
 
-	intel_csr_ucode_init(dev_priv);
+	intel_csr_ucode_init(i915);
 
-	ret = intel_irq_install(dev_priv);
+	ret = intel_irq_install(i915);
 	if (ret)
 		goto cleanup_csr;
 
-	intel_gmbus_setup(dev_priv);
-
 	/* Important: The output setup functions called by modeset_init need
 	 * working irqs for e.g. gmbus and dp aux transfers. */
-	ret = intel_modeset_init(dev);
+	ret = intel_modeset_init(i915);
 	if (ret)
 		goto cleanup_irq;
 
-	ret = i915_gem_init(dev_priv);
+	ret = i915_gem_init(i915);
 	if (ret)
 		goto cleanup_modeset;
 
-	intel_overlay_setup(dev_priv);
+	intel_overlay_setup(i915);
 
-	if (!HAS_DISPLAY(dev_priv))
+	if (!HAS_DISPLAY(i915) || !INTEL_DISPLAY_ENABLED(i915))
 		return 0;
 
-	ret = intel_fbdev_init(dev);
+	ret = intel_fbdev_init(&i915->drm);
 	if (ret)
 		goto cleanup_gem;
 
 	/* Only enable hotplug handling once the fbdev is fully set up. */
-	intel_hpd_init(dev_priv);
+	intel_hpd_init(i915);
 
-	intel_init_ipc(dev_priv);
+	intel_init_ipc(i915);
 
 	return 0;
 
 cleanup_gem:
-	i915_gem_suspend(dev_priv);
-	i915_gem_driver_remove(dev_priv);
-	i915_gem_driver_release(dev_priv);
+	i915_gem_suspend(i915);
+	i915_gem_driver_remove(i915);
+	i915_gem_driver_release(i915);
 cleanup_modeset:
-	intel_modeset_driver_remove(dev);
+	intel_modeset_driver_remove(i915);
 cleanup_irq:
-	intel_irq_uninstall(dev_priv);
-	intel_gmbus_teardown(dev_priv);
+	intel_irq_uninstall(i915);
 cleanup_csr:
-	intel_csr_ucode_fini(dev_priv);
-	intel_power_domains_driver_remove(dev_priv);
-	vga_switcheroo_unregister_client(pdev);
+	intel_csr_ucode_fini(i915);
+	intel_power_domains_driver_remove(i915);
+	i915_switcheroo_unregister(i915);
 cleanup_vga_client:
-	vga_client_register(pdev, NULL, NULL, NULL);
+	intel_vga_unregister(i915);
 out:
 	return ret;
 }
 
-static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv)
+static void i915_driver_modeset_remove(struct drm_i915_private *i915)
 {
-	struct apertures_struct *ap;
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	bool primary;
-	int ret;
-
-	ap = alloc_apertures(1);
-	if (!ap)
-		return -ENOMEM;
+	intel_modeset_driver_remove(i915);
 
-	ap->ranges[0].base = ggtt->gmadr.start;
-	ap->ranges[0].size = ggtt->mappable_end;
+	intel_irq_uninstall(i915);
 
-	primary =
-		pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
+	intel_bios_driver_remove(i915);
 
-	ret = drm_fb_helper_remove_conflicting_framebuffers(ap, "inteldrmfb", primary);
+	i915_switcheroo_unregister(i915);
 
-	kfree(ap);
+	intel_vga_unregister(i915);
 
-	return ret;
+	intel_csr_ucode_fini(i915);
 }
 
 static void intel_init_dpio(struct drm_i915_private *dev_priv)
@@ -548,6 +469,12 @@ static void vlv_free_s0ix_state(struct drm_i915_private *i915)
 	i915->vlv_s0ix_state = NULL;
 }
 
+static void sanitize_gpu(struct drm_i915_private *i915)
+{
+	if (!INTEL_INFO(i915)->gpu_reset_clobbers_display)
+		__intel_gt_reset(&i915->gt, ALL_ENGINES);
+}
+
 /**
  * i915_driver_early_probe - setup state not requiring device access
  * @dev_priv: device private
@@ -598,9 +525,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 
 	intel_gt_init_early(&dev_priv->gt, dev_priv);
 
-	ret = i915_gem_init_early(dev_priv);
-	if (ret < 0)
-		goto err_gt;
+	i915_gem_init_early(dev_priv);
 
 	/* This must be called before any calls to HAS_PCH_* */
 	intel_detect_pch(dev_priv);
@@ -622,7 +547,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 
 err_gem:
 	i915_gem_cleanup_early(dev_priv);
-err_gt:
 	intel_gt_driver_late_release(&dev_priv->gt);
 	vlv_free_s0ix_state(dev_priv);
 err_workqueues:
@@ -680,11 +604,12 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
 
 	intel_uc_init_mmio(&dev_priv->gt.uc);
 
-	ret = intel_engines_init_mmio(dev_priv);
+	ret = intel_engines_init_mmio(&dev_priv->gt);
 	if (ret)
 		goto err_uncore;
 
-	i915_gem_init_mmio(dev_priv);
+	/* As early as possible, scrub existing GPU state before clobbering */
+	sanitize_gpu(dev_priv);
 
 	return 0;
 
@@ -703,7 +628,6 @@ err_bridge:
  */
 static void i915_driver_mmio_release(struct drm_i915_private *dev_priv)
 {
-	intel_engines_cleanup(dev_priv);
 	intel_teardown_mchbar(dev_priv);
 	intel_uncore_fini_mmio(&dev_priv->uncore);
 	pci_dev_put(dev_priv->bridge_dev);
@@ -1137,7 +1061,7 @@ intel_get_dram_info(struct drm_i915_private *dev_priv)
 	 */
 	dram_info->is_16gb_dimm = !IS_GEN9_LP(dev_priv);
 
-	if (INTEL_GEN(dev_priv) < 9)
+	if (INTEL_GEN(dev_priv) < 9 || !HAS_DISPLAY(dev_priv))
 		return;
 
 	if (IS_GEN9_LP(dev_priv))
@@ -1157,8 +1081,8 @@ intel_get_dram_info(struct drm_i915_private *dev_priv)
 
 static u32 gen9_edram_size_mb(struct drm_i915_private *dev_priv, u32 cap)
 {
-	const unsigned int ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 };
-	const unsigned int sets[4] = { 1, 1, 2, 2 };
+	static const u8 ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 };
+	static const u8 sets[4] = { 1, 1, 2, 2 };
 
 	return EDRAM_NUM_BANKS(cap) *
 		ways[EDRAM_WAYS_IDX(cap)] *
@@ -1246,32 +1170,24 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto err_perf;
 
-	/*
-	 * WARNING: Apparently we must kick fbdev drivers before vgacon,
-	 * otherwise the vga fbdev driver falls over.
-	 */
-	ret = i915_kick_out_firmware_fb(dev_priv);
-	if (ret) {
-		DRM_ERROR("failed to remove conflicting framebuffer drivers\n");
+	ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "inteldrmfb");
+	if (ret)
 		goto err_ggtt;
-	}
 
-	ret = vga_remove_vgacon(pdev);
-	if (ret) {
-		DRM_ERROR("failed to remove conflicting VGA console\n");
+	ret = i915_ggtt_init_hw(dev_priv);
+	if (ret)
 		goto err_ggtt;
-	}
 
-	ret = i915_ggtt_init_hw(dev_priv);
+	ret = intel_memory_regions_hw_probe(dev_priv);
 	if (ret)
 		goto err_ggtt;
 
-	intel_gt_init_hw(dev_priv);
+	intel_gt_init_hw_early(&dev_priv->gt, &dev_priv->ggtt);
 
 	ret = i915_ggtt_enable_hw(dev_priv);
 	if (ret) {
 		DRM_ERROR("failed to enable GGTT\n");
-		goto err_ggtt;
+		goto err_mem_regions;
 	}
 
 	pci_set_master(pdev);
@@ -1288,7 +1204,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 		if (ret) {
 			DRM_ERROR("failed to set DMA mask\n");
 
-			goto err_ggtt;
+			goto err_mem_regions;
 		}
 	}
 
@@ -1306,16 +1222,13 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 		if (ret) {
 			DRM_ERROR("failed to set DMA mask\n");
 
-			goto err_ggtt;
+			goto err_mem_regions;
 		}
 	}
 
 	pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY,
 			   PM_QOS_DEFAULT_VALUE);
 
-	/* BIOS often leaves RC6 enabled, but disable it for hw init */
-	intel_sanitize_gt_powersave(dev_priv);
-
 	intel_gt_init_workarounds(dev_priv);
 
 	/* On the 945G/GM, the chipset reports the MSI capability on the
@@ -1361,6 +1274,8 @@ err_msi:
 	if (pdev->msi_enabled)
 		pci_disable_msi(pdev);
 	pm_qos_remove_request(&dev_priv->pm_qos);
+err_mem_regions:
+	intel_memory_regions_driver_release(dev_priv);
 err_ggtt:
 	i915_ggtt_driver_release(dev_priv);
 err_perf:
@@ -1415,14 +1330,13 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
 	} else
 		DRM_ERROR("Failed to register driver for userspace access!\n");
 
-	if (HAS_DISPLAY(dev_priv)) {
+	if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) {
 		/* Must be done after probing outputs */
 		intel_opregion_register(dev_priv);
 		acpi_video_register();
 	}
 
-	if (IS_GEN(dev_priv, 5))
-		intel_gpu_ips_init(dev_priv);
+	intel_gt_driver_register(&dev_priv->gt);
 
 	intel_audio_init(dev_priv);
 
@@ -1439,7 +1353,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
 	 * We need to coordinate the hotplugs with the asynchronous fbdev
 	 * configuration, for which we use the fbdev->async_cookie.
 	 */
-	if (HAS_DISPLAY(dev_priv))
+	if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv))
 		drm_kms_helper_poll_init(dev);
 
 	intel_power_domains_enable(dev_priv);
@@ -1465,7 +1379,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
 	 */
 	drm_kms_helper_poll_fini(&dev_priv->drm);
 
-	intel_gpu_ips_teardown();
+	intel_gt_driver_unregister(&dev_priv->gt);
 	acpi_video_unregister();
 	intel_opregion_unregister(dev_priv);
 
@@ -1480,7 +1394,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
 
 static void i915_welcome_messages(struct drm_i915_private *dev_priv)
 {
-	if (drm_debug & DRM_UT_DRIVER) {
+	if (drm_debug_enabled(DRM_UT_DRIVER)) {
 		struct drm_printer p = drm_debug_printer("i915 device info:");
 
 		drm_printf(&p, "pciid=0x%04x rev=0x%02x platform=%s (subplatform=0x%x) gen=%i\n",
@@ -1491,8 +1405,8 @@ static void i915_welcome_messages(struct drm_i915_private *dev_priv)
 					     INTEL_INFO(dev_priv)->platform),
 			   INTEL_GEN(dev_priv));
 
-		intel_device_info_dump_flags(INTEL_INFO(dev_priv), &p);
-		intel_device_info_dump_runtime(RUNTIME_INFO(dev_priv), &p);
+		intel_device_info_print_static(INTEL_INFO(dev_priv), &p);
+		intel_device_info_print_runtime(RUNTIME_INFO(dev_priv), &p);
 	}
 
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
@@ -1574,6 +1488,23 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!i915_modparams.nuclear_pageflip && match_info->gen < 5)
 		dev_priv->drm.driver_features &= ~DRIVER_ATOMIC;
 
+	/*
+	 * Check if we support fake LMEM -- for now we only unleash this for
+	 * the live selftests(test-and-exit).
+	 */
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+	if (IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) {
+		if (INTEL_GEN(dev_priv) >= 9 && i915_selftest.live < 0 &&
+		    i915_modparams.fake_lmem_start) {
+			mkwrite_device_info(dev_priv)->memory_regions =
+				REGION_SMEM | REGION_LMEM | REGION_STOLEN;
+			mkwrite_device_info(dev_priv)->is_dgfx = true;
+			GEM_BUG_ON(!HAS_LMEM(dev_priv));
+			GEM_BUG_ON(!IS_DGFX(dev_priv));
+		}
+	}
+#endif
+
 	ret = pci_enable_device(pdev);
 	if (ret)
 		goto out_fini;
@@ -1594,7 +1525,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret < 0)
 		goto out_cleanup_mmio;
 
-	ret = i915_driver_modeset_probe(&dev_priv->drm);
+	ret = i915_driver_modeset_probe(dev_priv);
 	if (ret < 0)
 		goto out_cleanup_hw;
 
@@ -1608,10 +1539,8 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 out_cleanup_hw:
 	i915_driver_hw_remove(dev_priv);
+	intel_memory_regions_driver_release(dev_priv);
 	i915_ggtt_driver_release(dev_priv);
-
-	/* Paranoia: make sure we have disabled everything before we exit. */
-	intel_sanitize_gt_powersave(dev_priv);
 out_cleanup_mmio:
 	i915_driver_mmio_release(dev_priv);
 out_runtime_pm_put:
@@ -1627,8 +1556,6 @@ out_fini:
 
 void i915_driver_remove(struct drm_i915_private *i915)
 {
-	struct pci_dev *pdev = i915->drm.pdev;
-
 	disable_rpm_wakeref_asserts(&i915->runtime_pm);
 
 	i915_driver_unregister(i915);
@@ -1649,19 +1576,9 @@ void i915_driver_remove(struct drm_i915_private *i915)
 
 	intel_gvt_driver_remove(i915);
 
-	intel_modeset_driver_remove(&i915->drm);
-
-	intel_bios_driver_remove(i915);
-
-	vga_switcheroo_unregister_client(pdev);
-	vga_client_register(pdev, NULL, NULL, NULL);
+	i915_driver_modeset_remove(i915);
 
-	intel_csr_ucode_fini(i915);
-
-	/* Free error state after interrupts are fully disabled. */
-	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
 	i915_reset_error_state(i915);
-
 	i915_gem_driver_remove(i915);
 
 	intel_power_domains_driver_remove(i915);
@@ -1680,11 +1597,9 @@ static void i915_driver_release(struct drm_device *dev)
 
 	i915_gem_driver_release(dev_priv);
 
+	intel_memory_regions_driver_release(dev_priv);
 	i915_ggtt_driver_release(dev_priv);
 
-	/* Paranoia: make sure we have disabled everything before we exit. */
-	intel_sanitize_gt_powersave(dev_priv);
-
 	i915_driver_mmio_release(dev_priv);
 
 	enable_rpm_wakeref_asserts(rpm);
@@ -1728,12 +1643,10 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	mutex_lock(&dev->struct_mutex);
 	i915_gem_context_close(file);
 	i915_gem_release(dev, file);
-	mutex_unlock(&dev->struct_mutex);
 
-	kfree(file_priv);
+	kfree_rcu(file_priv, rcu);
 
 	/* Catch up with all the deferred frees from "this" client */
 	i915_gem_flush_free_objects(to_i915(dev));
@@ -1847,8 +1760,6 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 
 	i915_gem_suspend_late(dev_priv);
 
-	i915_rc6_ctx_wa_suspend(dev_priv);
-
 	intel_uncore_suspend(&dev_priv->uncore);
 
 	intel_power_domains_suspend(dev_priv,
@@ -1890,8 +1801,7 @@ out:
 	return ret;
 }
 
-static int
-i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state)
+int i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state)
 {
 	int error;
 
@@ -1915,18 +1825,15 @@ static int i915_drm_resume(struct drm_device *dev)
 	int ret;
 
 	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
-	intel_sanitize_gt_powersave(dev_priv);
 
-	i915_gem_sanitize(dev_priv);
+	sanitize_gpu(dev_priv);
 
 	ret = i915_ggtt_enable_hw(dev_priv);
 	if (ret)
 		DRM_ERROR("failed to re-enable GGTT\n");
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	i915_gem_restore_gtt_mappings(dev_priv);
-	i915_gem_restore_fences(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	i915_gem_restore_fences(&dev_priv->ggtt);
 
 	intel_csr_ucode_resume(dev_priv);
 
@@ -1951,7 +1858,7 @@ static int i915_drm_resume(struct drm_device *dev)
 
 	i915_gem_resume(dev_priv);
 
-	intel_modeset_init_hw(dev);
+	intel_modeset_init_hw(dev_priv);
 	intel_init_clock_gating(dev_priv);
 
 	spin_lock_irq(&dev_priv->irq_lock);
@@ -2048,20 +1955,14 @@ static int i915_drm_resume_early(struct drm_device *dev)
 
 	intel_display_power_resume_early(dev_priv);
 
-	intel_sanitize_gt_powersave(dev_priv);
-
 	intel_power_domains_resume(dev_priv);
 
-	i915_rc6_ctx_wa_resume(dev_priv);
-
-	intel_gt_sanitize(&dev_priv->gt, true);
-
 	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
 }
 
-static int i915_resume_switcheroo(struct drm_i915_private *i915)
+int i915_resume_switcheroo(struct drm_i915_private *i915)
 {
 	int ret;
 
@@ -2594,9 +2495,6 @@ static int intel_runtime_suspend(struct device *kdev)
 	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
 	int ret = 0;
 
-	if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && HAS_RC6(dev_priv))))
-		return -ENODEV;
-
 	if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv)))
 		return -ENODEV;
 
@@ -2629,7 +2527,7 @@ static int intel_runtime_suspend(struct device *kdev)
 
 		intel_gt_runtime_resume(&dev_priv->gt);
 
-		i915_gem_restore_fences(dev_priv);
+		i915_gem_restore_fences(&dev_priv->ggtt);
 
 		enable_rpm_wakeref_asserts(rpm);
 
@@ -2709,7 +2607,7 @@ static int intel_runtime_resume(struct device *kdev)
 	 * we can do is to hope that things will still work (and disable RPM).
 	 */
 	intel_gt_runtime_resume(&dev_priv->gt);
-	i915_gem_restore_fences(dev_priv);
+	i915_gem_restore_fences(&dev_priv->ggtt);
 
 	/*
 	 * On VLV/CHV display interrupts are part of the display
@@ -2771,18 +2669,12 @@ const struct dev_pm_ops i915_pm_ops = {
 	.runtime_resume = intel_runtime_resume,
 };
 
-static const struct vm_operations_struct i915_gem_vm_ops = {
-	.fault = i915_gem_fault,
-	.open = drm_gem_vm_open,
-	.close = drm_gem_vm_close,
-};
-
 static const struct file_operations i915_driver_fops = {
 	.owner = THIS_MODULE,
 	.open = drm_open,
 	.release = drm_release,
 	.unlocked_ioctl = drm_ioctl,
-	.mmap = drm_gem_mmap,
+	.mmap = i915_gem_mmap,
 	.poll = drm_poll,
 	.read = drm_read,
 	.compat_ioctl = i915_compat_ioctl,
@@ -2829,7 +2721,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_OFFSET, i915_gem_mmap_offset_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling_ioctl, DRM_RENDER_ALLOW),
@@ -2871,7 +2763,6 @@ static struct drm_driver driver = {
 
 	.gem_close_object = i915_gem_close_object,
 	.gem_free_object_unlocked = i915_gem_free_object,
-	.gem_vm_ops = &i915_gem_vm_ops,
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
@@ -2882,7 +2773,8 @@ static struct drm_driver driver = {
 	.get_scanout_position = i915_get_crtc_scanoutpos,
 
 	.dumb_create = i915_gem_dumb_create,
-	.dumb_map_offset = i915_gem_mmap_gtt,
+	.dumb_map_offset = i915_gem_dumb_mmap_offset,
+
 	.ioctls = i915_ioctls,
 	.num_ioctls = ARRAY_SIZE(i915_ioctls),
 	.fops = &i915_driver_fops,
@@ -2893,7 +2785,3 @@ static struct drm_driver driver = {
 	.minor = DRIVER_MINOR,
 	.patchlevel = DRIVER_PATCHLEVEL,
 };
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/mock_drm.c"
-#endif
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 89b6112bd66b..077af22b8340 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -46,6 +46,7 @@
 #include <linux/dma-resv.h>
 #include <linux/shmem_fs.h>
 #include <linux/stackdepot.h>
+#include <linux/xarray.h>
 
 #include <drm/intel-gtt.h>
 #include <drm/drm_legacy.h> /* for struct drm_dma_handle */
@@ -67,6 +68,7 @@
 #include "display/intel_display.h"
 #include "display/intel_display_power.h"
 #include "display/intel_dpll_mgr.h"
+#include "display/intel_dsb.h"
 #include "display/intel_frontbuffer.h"
 #include "display/intel_gmbus.h"
 #include "display/intel_opregion.h"
@@ -84,6 +86,7 @@
 #include "intel_device_info.h"
 #include "intel_pch.h"
 #include "intel_runtime_pm.h"
+#include "intel_memory_region.h"
 #include "intel_uncore.h"
 #include "intel_wakeref.h"
 #include "intel_wopcm.h"
@@ -92,12 +95,15 @@
 #include "i915_gem_fence_reg.h"
 #include "i915_gem_gtt.h"
 #include "i915_gpu_error.h"
+#include "i915_perf_types.h"
 #include "i915_request.h"
 #include "i915_scheduler.h"
 #include "gt/intel_timeline.h"
 #include "i915_vma.h"
 #include "i915_irq.h"
 
+#include "intel_region_lmem.h"
+
 #include "intel_gvt.h"
 
 /* General customization:
@@ -105,8 +111,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20190822"
-#define DRIVER_TIMESTAMP	1566477988
+#define DRIVER_DATE		"20200114"
+#define DRIVER_TIMESTAMP	1579001978
 
 struct drm_i915_gem_object;
 
@@ -185,15 +191,18 @@ struct i915_mmu_object;
 
 struct drm_i915_file_private {
 	struct drm_i915_private *dev_priv;
-	struct drm_file *file;
+
+	union {
+		struct drm_file *file;
+		struct rcu_head rcu;
+	};
 
 	struct {
 		spinlock_t lock;
 		struct list_head request_list;
 	} mm;
 
-	struct idr context_idr;
-	struct mutex context_idr_lock; /* guards context_idr */
+	struct xarray context_xa;
 
 	struct idr vm_idr;
 	struct mutex vm_idr_lock; /* guards vm_idr */
@@ -264,14 +273,15 @@ struct drm_i915_display_funcs {
 	int (*compute_pipe_wm)(struct intel_crtc_state *crtc_state);
 	int (*compute_intermediate_wm)(struct intel_crtc_state *crtc_state);
 	void (*initial_watermarks)(struct intel_atomic_state *state,
-				   struct intel_crtc_state *crtc_state);
+				   struct intel_crtc *crtc);
 	void (*atomic_update_watermarks)(struct intel_atomic_state *state,
-					 struct intel_crtc_state *crtc_state);
+					 struct intel_crtc *crtc);
 	void (*optimize_watermarks)(struct intel_atomic_state *state,
-				    struct intel_crtc_state *crtc_state);
+				    struct intel_crtc *crtc);
 	int (*compute_global_watermarks)(struct intel_atomic_state *state);
 	void (*update_wm)(struct intel_crtc *crtc);
 	int (*modeset_calc_cdclk)(struct intel_atomic_state *state);
+	u8 (*calc_voltage_level)(int cdclk);
 	/* Returns the active state of the crtc, and if the crtc is active,
 	 * fills out the pipe-config with the hw state. */
 	bool (*get_pipe_config)(struct intel_crtc *,
@@ -280,11 +290,12 @@ struct drm_i915_display_funcs {
 					 struct intel_initial_plane_config *);
 	int (*crtc_compute_clock)(struct intel_crtc *crtc,
 				  struct intel_crtc_state *crtc_state);
-	void (*crtc_enable)(struct intel_crtc_state *pipe_config,
-			    struct intel_atomic_state *old_state);
-	void (*crtc_disable)(struct intel_crtc_state *old_crtc_state,
-			     struct intel_atomic_state *old_state);
-	void (*update_crtcs)(struct intel_atomic_state *state);
+	void (*crtc_enable)(struct intel_atomic_state *state,
+			    struct intel_crtc *crtc);
+	void (*crtc_disable)(struct intel_atomic_state *state,
+			     struct intel_crtc *crtc);
+	void (*commit_modeset_enables)(struct intel_atomic_state *state);
+	void (*commit_modeset_disables)(struct intel_atomic_state *state);
 	void (*audio_codec_enable)(struct intel_encoder *encoder,
 				   const struct intel_crtc_state *crtc_state,
 				   const struct drm_connector_state *conn_state);
@@ -331,6 +342,7 @@ struct intel_csr {
 	i915_reg_t mmioaddr[20];
 	u32 mmiodata[20];
 	u32 dc_state;
+	u32 target_dc_state;
 	u32 allowed_dc_mask;
 	intel_wakeref_t wakeref;
 };
@@ -354,7 +366,6 @@ struct intel_fbc {
 	unsigned threshold;
 	unsigned int possible_framebuffer_bits;
 	unsigned int busy_bits;
-	unsigned int visible_pipes_mask;
 	struct intel_crtc *crtc;
 
 	struct drm_mm_node compressed_fb;
@@ -362,8 +373,8 @@ struct intel_fbc {
 
 	bool false_color;
 
-	bool enabled;
 	bool active;
+	bool activated;
 	bool flip_pending;
 
 	bool underrun_detected;
@@ -375,9 +386,6 @@ struct intel_fbc {
 	 * these problems.
 	 */
 	struct intel_fbc_state_cache {
-		struct i915_vma *vma;
-		unsigned long flags;
-
 		struct {
 			unsigned int mode_flags;
 			u32 hsw_bdw_pixel_rate;
@@ -406,6 +414,8 @@ struct intel_fbc {
 			const struct drm_format_info *format;
 			unsigned int stride;
 		} fb;
+		u16 gen9_wa_cfb_stride;
+		s8 fence_id;
 	} state_cache;
 
 	/*
@@ -416,9 +426,6 @@ struct intel_fbc {
 	 * are supposed to read from it in order to program the registers.
 	 */
 	struct intel_fbc_reg_params {
-		struct i915_vma *vma;
-		unsigned long flags;
-
 		struct {
 			enum pipe pipe;
 			enum i9xx_plane_id i9xx_plane;
@@ -431,7 +438,9 @@ struct intel_fbc {
 		} fb;
 
 		int cfb_size;
-		unsigned int gen9_wa_cfb_stride;
+		u16 gen9_wa_cfb_stride;
+		s8 fence_id;
+		bool plane_visible;
 	} params;
 
 	const char *no_fbc_reason;
@@ -479,6 +488,7 @@ struct i915_psr {
 	bool enabled;
 	struct intel_dp *dp;
 	enum pipe pipe;
+	enum transcoder transcoder;
 	bool active;
 	struct work_struct work;
 	unsigned busy_frontbuffer_bits;
@@ -492,6 +502,10 @@ struct i915_psr {
 	bool sink_not_reliable;
 	bool irq_aux_error;
 	u16 su_x_granularity;
+	bool dc3co_enabled;
+	u32 dc3co_exit_delay;
+	struct delayed_work idle_work;
+	bool initially_probed;
 };
 
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
@@ -529,108 +543,6 @@ struct i915_suspend_saved_registers {
 
 struct vlv_s0ix_state;
 
-struct intel_rps_ei {
-	ktime_t ktime;
-	u32 render_c0;
-	u32 media_c0;
-};
-
-struct intel_rps {
-	struct mutex lock; /* protects enabling and the worker */
-
-	/*
-	 * work, interrupts_enabled and pm_iir are protected by
-	 * dev_priv->irq_lock
-	 */
-	struct work_struct work;
-	bool interrupts_enabled;
-	u32 pm_iir;
-
-	/* PM interrupt bits that should never be masked */
-	u32 pm_intrmsk_mbz;
-
-	/* Frequencies are stored in potentially platform dependent multiples.
-	 * In other words, *_freq needs to be multiplied by X to be interesting.
-	 * Soft limits are those which are used for the dynamic reclocking done
-	 * by the driver (raise frequencies under heavy loads, and lower for
-	 * lighter loads). Hard limits are those imposed by the hardware.
-	 *
-	 * A distinction is made for overclocking, which is never enabled by
-	 * default, and is considered to be above the hard limit if it's
-	 * possible at all.
-	 */
-	u8 cur_freq;		/* Current frequency (cached, may not == HW) */
-	u8 min_freq_softlimit;	/* Minimum frequency permitted by the driver */
-	u8 max_freq_softlimit;	/* Max frequency permitted by the driver */
-	u8 max_freq;		/* Maximum frequency, RP0 if not overclocking */
-	u8 min_freq;		/* AKA RPn. Minimum frequency */
-	u8 boost_freq;		/* Frequency to request when wait boosting */
-	u8 idle_freq;		/* Frequency to request when we are idle */
-	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
-	u8 rp1_freq;		/* "less than" RP0 power/freqency */
-	u8 rp0_freq;		/* Non-overclocked max frequency. */
-	u16 gpll_ref_freq;	/* vlv/chv GPLL reference frequency */
-
-	int last_adj;
-
-	struct {
-		struct mutex mutex;
-
-		enum { LOW_POWER, BETWEEN, HIGH_POWER } mode;
-		unsigned int interactive;
-
-		u8 up_threshold; /* Current %busy required to uplock */
-		u8 down_threshold; /* Current %busy required to downclock */
-	} power;
-
-	bool enabled;
-	atomic_t num_waiters;
-	atomic_t boosts;
-
-	/* manual wa residency calculations */
-	struct intel_rps_ei ei;
-};
-
-struct intel_rc6 {
-	bool enabled;
-	bool ctx_corrupted;
-	intel_wakeref_t ctx_corrupted_wakeref;
-	u64 prev_hw_residency[4];
-	u64 cur_residency[4];
-};
-
-struct intel_llc_pstate {
-	bool enabled;
-};
-
-struct intel_gen6_power_mgmt {
-	struct intel_rps rps;
-	struct intel_rc6 rc6;
-	struct intel_llc_pstate llc_pstate;
-};
-
-/* defined intel_pm.c */
-extern spinlock_t mchdev_lock;
-
-struct intel_ilk_power_mgmt {
-	u8 cur_delay;
-	u8 min_delay;
-	u8 max_delay;
-	u8 fmax;
-	u8 fstart;
-
-	u64 last_count1;
-	unsigned long last_time1;
-	unsigned long chipset_power;
-	u64 last_count2;
-	u64 last_time2;
-	unsigned long gfx_power;
-	u8 corr;
-
-	int c_m;
-	int r_t;
-};
-
 #define MAX_L3_SLICES 2
 struct intel_l3_parity {
 	u32 *remap_info[MAX_L3_SLICES];
@@ -679,6 +591,8 @@ struct i915_gem_mm {
 	 */
 	struct vfsmount *gemfs;
 
+	struct intel_memory_region *regions[INTEL_REGION_UNKNOWN];
+
 	struct notifier_block oom_notifier;
 	struct notifier_block vmap_notifier;
 	struct shrinker shrinker;
@@ -690,11 +604,6 @@ struct i915_gem_mm {
 	 */
 	struct workqueue_struct *userptr_wq;
 
-	/** Bit 6 swizzling required for X tiling */
-	u32 bit_6_swizzle_x;
-	/** Bit 6 swizzling required for Y tiling */
-	u32 bit_6_swizzle_y;
-
 	/* shrinker accounting, also useful for userland debugging */
 	u64 shrink_memory;
 	u32 shrink_count;
@@ -710,19 +619,18 @@ struct i915_gem_mm {
 
 #define I915_ENGINE_WEDGED_TIMEOUT  (60 * HZ)  /* Reset but no recovery? */
 
+/* Amount of SAGV/QGV points, BSpec precisely defines this */
+#define I915_NUM_QGV_POINTS 8
+
 struct ddi_vbt_port_info {
 	/* Non-NULL if port present. */
 	const struct child_device_config *child;
 
 	int max_tmds_clock;
 
-	/*
-	 * This is an index in the HDMI/DVI DDI buffer translation table.
-	 * The special value HDMI_LEVEL_SHIFT_UNKNOWN means the VBT didn't
-	 * populate this field.
-	 */
-#define HDMI_LEVEL_SHIFT_UNKNOWN	0xff
+	/* This is an index in the HDMI/DVI DDI buffer translation table. */
 	u8 hdmi_level_shift;
+	u8 hdmi_level_shift_set:1;
 
 	u8 supports_dvi:1;
 	u8 supports_hdmi:1;
@@ -813,8 +721,7 @@ struct intel_vbt_data {
 
 	int crt_ddc_pin;
 
-	int child_dev_num;
-	struct child_device_config *child_dev;
+	struct list_head display_devices;
 
 	struct ddi_vbt_port_info ddi_port_info[I915_MAX_PORTS];
 	struct sdvo_device_mapping sdvo_mappings[2];
@@ -975,310 +882,15 @@ struct intel_wm_config {
 	bool sprites_scaled;
 };
 
-struct i915_oa_format {
-	u32 format;
-	int size;
-};
-
-struct i915_oa_reg {
-	i915_reg_t addr;
-	u32 value;
-};
-
-struct i915_oa_config {
-	char uuid[UUID_STRING_LEN + 1];
-	int id;
-
-	const struct i915_oa_reg *mux_regs;
-	u32 mux_regs_len;
-	const struct i915_oa_reg *b_counter_regs;
-	u32 b_counter_regs_len;
-	const struct i915_oa_reg *flex_regs;
-	u32 flex_regs_len;
-
-	struct attribute_group sysfs_metric;
-	struct attribute *attrs[2];
-	struct device_attribute sysfs_metric_id;
-
-	atomic_t ref_count;
-};
-
-struct i915_perf_stream;
-
-/**
- * struct i915_perf_stream_ops - the OPs to support a specific stream type
- */
-struct i915_perf_stream_ops {
-	/**
-	 * @enable: Enables the collection of HW samples, either in response to
-	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
-	 * without `I915_PERF_FLAG_DISABLED`.
-	 */
-	void (*enable)(struct i915_perf_stream *stream);
-
-	/**
-	 * @disable: Disables the collection of HW samples, either in response
-	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
-	 * the stream.
-	 */
-	void (*disable)(struct i915_perf_stream *stream);
-
-	/**
-	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
-	 * once there is something ready to read() for the stream
-	 */
-	void (*poll_wait)(struct i915_perf_stream *stream,
-			  struct file *file,
-			  poll_table *wait);
-
-	/**
-	 * @wait_unlocked: For handling a blocking read, wait until there is
-	 * something to ready to read() for the stream. E.g. wait on the same
-	 * wait queue that would be passed to poll_wait().
-	 */
-	int (*wait_unlocked)(struct i915_perf_stream *stream);
-
-	/**
-	 * @read: Copy buffered metrics as records to userspace
-	 * **buf**: the userspace, destination buffer
-	 * **count**: the number of bytes to copy, requested by userspace
-	 * **offset**: zero at the start of the read, updated as the read
-	 * proceeds, it represents how many bytes have been copied so far and
-	 * the buffer offset for copying the next record.
-	 *
-	 * Copy as many buffered i915 perf samples and records for this stream
-	 * to userspace as will fit in the given buffer.
-	 *
-	 * Only write complete records; returning -%ENOSPC if there isn't room
-	 * for a complete record.
-	 *
-	 * Return any error condition that results in a short read such as
-	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
-	 * returning to userspace.
-	 */
-	int (*read)(struct i915_perf_stream *stream,
-		    char __user *buf,
-		    size_t count,
-		    size_t *offset);
-
-	/**
-	 * @destroy: Cleanup any stream specific resources.
-	 *
-	 * The stream will always be disabled before this is called.
-	 */
-	void (*destroy)(struct i915_perf_stream *stream);
-};
-
-/**
- * struct i915_perf_stream - state for a single open stream FD
- */
-struct i915_perf_stream {
-	/**
-	 * @dev_priv: i915 drm device
-	 */
-	struct drm_i915_private *dev_priv;
-
-	/**
-	 * @link: Links the stream into ``&drm_i915_private->streams``
-	 */
-	struct list_head link;
-
-	/**
-	 * @wakeref: As we keep the device awake while the perf stream is
-	 * active, we track our runtime pm reference for later release.
-	 */
-	intel_wakeref_t wakeref;
-
-	/**
-	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
-	 * properties given when opening a stream, representing the contents
-	 * of a single sample as read() by userspace.
-	 */
-	u32 sample_flags;
-
-	/**
-	 * @sample_size: Considering the configured contents of a sample
-	 * combined with the required header size, this is the total size
-	 * of a single sample record.
-	 */
-	int sample_size;
-
-	/**
-	 * @ctx: %NULL if measuring system-wide across all contexts or a
-	 * specific context that is being monitored.
-	 */
-	struct i915_gem_context *ctx;
-
-	/**
-	 * @enabled: Whether the stream is currently enabled, considering
-	 * whether the stream was opened in a disabled state and based
-	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
-	 */
-	bool enabled;
-
-	/**
-	 * @ops: The callbacks providing the implementation of this specific
-	 * type of configured stream.
-	 */
-	const struct i915_perf_stream_ops *ops;
-
-	/**
-	 * @oa_config: The OA configuration used by the stream.
-	 */
-	struct i915_oa_config *oa_config;
-
-	/**
-	 * The OA context specific information.
-	 */
-	struct intel_context *pinned_ctx;
-	u32 specific_ctx_id;
-	u32 specific_ctx_id_mask;
-
-	struct hrtimer poll_check_timer;
-	wait_queue_head_t poll_wq;
-	bool pollin;
-
-	bool periodic;
-	int period_exponent;
-
-	/**
-	 * State of the OA buffer.
-	 */
-	struct {
-		struct i915_vma *vma;
-		u8 *vaddr;
-		u32 last_ctx_id;
-		int format;
-		int format_size;
-		int size_exponent;
-
-		/**
-		 * Locks reads and writes to all head/tail state
-		 *
-		 * Consider: the head and tail pointer state needs to be read
-		 * consistently from a hrtimer callback (atomic context) and
-		 * read() fop (user context) with tail pointer updates happening
-		 * in atomic context and head updates in user context and the
-		 * (unlikely) possibility of read() errors needing to reset all
-		 * head/tail state.
-		 *
-		 * Note: Contention/performance aren't currently a significant
-		 * concern here considering the relatively low frequency of
-		 * hrtimer callbacks (5ms period) and that reads typically only
-		 * happen in response to a hrtimer event and likely complete
-		 * before the next callback.
-		 *
-		 * Note: This lock is not held *while* reading and copying data
-		 * to userspace so the value of head observed in htrimer
-		 * callbacks won't represent any partial consumption of data.
-		 */
-		spinlock_t ptr_lock;
-
-		/**
-		 * One 'aging' tail pointer and one 'aged' tail pointer ready to
-		 * used for reading.
-		 *
-		 * Initial values of 0xffffffff are invalid and imply that an
-		 * update is required (and should be ignored by an attempted
-		 * read)
-		 */
-		struct {
-			u32 offset;
-		} tails[2];
-
-		/**
-		 * Index for the aged tail ready to read() data up to.
-		 */
-		unsigned int aged_tail_idx;
-
-		/**
-		 * A monotonic timestamp for when the current aging tail pointer
-		 * was read; used to determine when it is old enough to trust.
-		 */
-		u64 aging_timestamp;
-
-		/**
-		 * Although we can always read back the head pointer register,
-		 * we prefer to avoid trusting the HW state, just to avoid any
-		 * risk that some hardware condition could * somehow bump the
-		 * head pointer unpredictably and cause us to forward the wrong
-		 * OA buffer data to userspace.
-		 */
-		u32 head;
-	} oa_buffer;
-};
-
-/**
- * struct i915_oa_ops - Gen specific implementation of an OA unit stream
- */
-struct i915_oa_ops {
-	/**
-	 * @is_valid_b_counter_reg: Validates register's address for
-	 * programming boolean counters for a particular platform.
-	 */
-	bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv,
-				       u32 addr);
-
-	/**
-	 * @is_valid_mux_reg: Validates register's address for programming mux
-	 * for a particular platform.
-	 */
-	bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr);
-
-	/**
-	 * @is_valid_flex_reg: Validates register's address for programming
-	 * flex EU filtering for a particular platform.
-	 */
-	bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr);
-
-	/**
-	 * @enable_metric_set: Selects and applies any MUX configuration to set
-	 * up the Boolean and Custom (B/C) counters that are part of the
-	 * counter reports being sampled. May apply system constraints such as
-	 * disabling EU clock gating as required.
-	 */
-	int (*enable_metric_set)(struct i915_perf_stream *stream);
-
-	/**
-	 * @disable_metric_set: Remove system constraints associated with using
-	 * the OA unit.
-	 */
-	void (*disable_metric_set)(struct i915_perf_stream *stream);
-
-	/**
-	 * @oa_enable: Enable periodic sampling
-	 */
-	void (*oa_enable)(struct i915_perf_stream *stream);
-
-	/**
-	 * @oa_disable: Disable periodic sampling
-	 */
-	void (*oa_disable)(struct i915_perf_stream *stream);
-
-	/**
-	 * @read: Copy data from the circular OA buffer into a given userspace
-	 * buffer.
-	 */
-	int (*read)(struct i915_perf_stream *stream,
-		    char __user *buf,
-		    size_t count,
-		    size_t *offset);
-
-	/**
-	 * @oa_hw_tail_read: read the OA tail pointer register
-	 *
-	 * In particular this enables us to share all the fiddly code for
-	 * handling the OA unit tail pointer race that affects multiple
-	 * generations.
-	 */
-	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
-};
-
 struct intel_cdclk_state {
 	unsigned int cdclk, vco, ref, bypass;
 	u8 voltage_level;
 };
 
+struct i915_selftest_stash {
+	atomic_t counter;
+};
+
 struct drm_i915_private {
 	struct drm_device drm;
 
@@ -1333,20 +945,17 @@ struct drm_i915_private {
 	 */
 	u32 gpio_mmio_base;
 
+	u32 hsw_psr_mmio_adjust;
+
 	/* MMIO base address for MIPI regs */
 	u32 mipi_mmio_base;
 
-	u32 psr_mmio_base;
-
 	u32 pps_mmio_base;
 
 	wait_queue_head_t gmbus_wait_queue;
 
 	struct pci_dev *bridge_dev;
 
-	/* Context used internally to idle the GPU and setup initial state */
-	struct i915_gem_context *kernel_context;
-
 	struct intel_engine_cs *engine[I915_NUM_ENGINES];
 	struct rb_root uabi_engines;
 
@@ -1369,7 +978,6 @@ struct drm_i915_private {
 		u32 irq_mask;
 		u32 de_irq_mask[I915_MAX_PIPES];
 	};
-	u32 pm_rps_events;
 	u32 pipestat_irq_mask[I915_MAX_PIPES];
 
 	struct i915_hotplug hotplug;
@@ -1399,13 +1007,14 @@ struct drm_i915_private {
 	unsigned int fdi_pll_freq;
 	unsigned int czclk_freq;
 
+	/*
+	 * For reading holding any crtc lock is sufficient,
+	 * for writing must hold all of them.
+	 */
 	struct {
 		/*
 		 * The current logical cdclk state.
 		 * See intel_atomic_state.cdclk.logical
-		 *
-		 * For reading holding any crtc lock is sufficient,
-		 * for writing must hold all of them.
 		 */
 		struct intel_cdclk_state logical;
 		/*
@@ -1416,6 +1025,9 @@ struct drm_i915_private {
 		/* The current hardware cdclk state */
 		struct intel_cdclk_state hw;
 
+		/* cdclk, divider, and ratio table from bspec */
+		const struct intel_cdclk_vals *table;
+
 		int force_min_cdclk;
 	} cdclk;
 
@@ -1430,6 +1042,8 @@ struct drm_i915_private {
 
 	/* ordered wq for modesets */
 	struct workqueue_struct *modeset_wq;
+	/* unbound hipri wq for page flips/plane updates */
+	struct workqueue_struct *flip_wq;
 
 	/* Display functions */
 	struct drm_i915_display_funcs display;
@@ -1470,7 +1084,11 @@ struct drm_i915_private {
 	 */
 	struct mutex dpll_lock;
 
-	unsigned int active_crtcs;
+	/*
+	 * For reading active_pipes, min_cdclk, min_voltage_level holding
+	 * any crtc lock is sufficient, for writing must hold all of them.
+	 */
+	u8 active_pipes;
 	/* minimum acceptable cdclk for each pipe */
 	int min_cdclk[I915_MAX_PIPES];
 	/* minimum acceptable voltage level for each pipe */
@@ -1499,13 +1117,6 @@ struct drm_i915_private {
 	 */
 	u32 edram_size_mb;
 
-	/* gen6+ GT PM state */
-	struct intel_gen6_power_mgmt gt_pm;
-
-	/* ilk-only ips/rps state. Everything in here is protected by the global
-	 * mchdev_lock in intel_pm.c */
-	struct intel_ilk_power_mgmt ips;
-
 	struct i915_power_domains power_domains;
 
 	struct i915_psr psr;
@@ -1530,25 +1141,7 @@ struct drm_i915_private {
 	 */
 	struct mutex av_mutex;
 	int audio_power_refcount;
-
-	struct {
-		struct mutex mutex;
-		struct list_head list;
-		struct llist_head free_list;
-		struct work_struct free_work;
-
-		/* The hw wants to have a stable context identifier for the
-		 * lifetime of the context (for OA, PASID, faults, etc).
-		 * This is limited in execlists to 21 bits.
-		 */
-		struct ida hw_ida;
-#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
-#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
-#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
-/* in Gen12 ID 0x7FF is reserved to indicate idle */
-#define GEN12_MAX_CONTEXT_HW_ID	(GEN11_MAX_CONTEXT_HW_ID - 1)
-		struct list_head hw_id_list;
-	} contexts;
+	u32 audio_freq_cntrl;
 
 	u32 fdi_rx_config;
 
@@ -1574,6 +1167,8 @@ struct drm_i915_private {
 		I915_SAGV_NOT_CONTROLLED
 	} sagv_status;
 
+	u32 sagv_block_time_us;
+
 	struct {
 		/*
 		 * Raw watermark latency values:
@@ -1635,7 +1230,8 @@ struct drm_i915_private {
 	} dram_info;
 
 	struct intel_bw_info {
-		unsigned int deratedbw[3]; /* for each QGV point */
+		/* for each QGV point */
+		unsigned int deratedbw[I915_NUM_QGV_POINTS];
 		u8 num_qgv_points;
 		u8 num_planes;
 	} max_bw[6];
@@ -1644,96 +1240,35 @@ struct drm_i915_private {
 
 	struct intel_runtime_pm runtime_pm;
 
-	struct {
-		bool initialized;
-
-		struct kobject *metrics_kobj;
-		struct ctl_table_header *sysctl_header;
-
-		/*
-		 * Lock associated with adding/modifying/removing OA configs
-		 * in dev_priv->perf.metrics_idr.
-		 */
-		struct mutex metrics_lock;
-
-		/*
-		 * List of dynamic configurations, you need to hold
-		 * dev_priv->perf.metrics_lock to access it.
-		 */
-		struct idr metrics_idr;
-
-		/*
-		 * Lock associated with anything below within this structure
-		 * except exclusive_stream.
-		 */
-		struct mutex lock;
-		struct list_head streams;
-
-		/*
-		 * The stream currently using the OA unit. If accessed
-		 * outside a syscall associated to its file
-		 * descriptor, you need to hold
-		 * dev_priv->drm.struct_mutex.
-		 */
-		struct i915_perf_stream *exclusive_stream;
-
-		/**
-		 * For rate limiting any notifications of spurious
-		 * invalid OA reports
-		 */
-		struct ratelimit_state spurious_report_rs;
-
-		struct i915_oa_config test_config;
-
-		u32 gen7_latched_oastatus1;
-		u32 ctx_oactxctrl_offset;
-		u32 ctx_flexeu0_offset;
-
-		/**
-		 * The RPT_ID/reason field for Gen8+ includes a bit
-		 * to determine if the CTX ID in the report is valid
-		 * but the specific bit differs between Gen 8 and 9
-		 */
-		u32 gen8_valid_ctx_bit;
-
-		struct i915_oa_ops ops;
-		const struct i915_oa_format *oa_formats;
-	} perf;
+	struct i915_perf perf;
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct intel_gt gt;
 
 	struct {
-		struct notifier_block pm_notifier;
-
-		/**
-		 * We leave the user IRQ off as much as possible,
-		 * but this means that requests will finish and never
-		 * be retired once the system goes idle. Set a timer to
-		 * fire periodically while the ring is running. When it
-		 * fires, go retire requests.
-		 */
-		struct delayed_work retire_work;
-
-		/**
-		 * When we detect an idle GPU, we want to turn on
-		 * powersaving features. So once we see that there
-		 * are no more requests outstanding and no more
-		 * arrive within a small period of time, we fire
-		 * off the idle_work.
+		struct i915_gem_contexts {
+			spinlock_t lock; /* locks list */
+			struct list_head list;
+
+			struct llist_head free_list;
+			struct work_struct free_work;
+		} contexts;
+
+		/*
+		 * We replace the local file with a global mappings as the
+		 * backing storage for the mmap is on the device and not
+		 * on the struct file, and we do not want to prolong the
+		 * lifetime of the local fd. To minimise the number of
+		 * anonymous inodes we create, we use a global singleton to
+		 * share the global mapping.
 		 */
-		struct work_struct idle_work;
+		struct file *mmap_singleton;
 	} gem;
 
 	u8 pch_ssc_use;
 
-	/* For i945gm vblank irq vs. C3 workaround */
-	struct {
-		struct work_struct work;
-		struct pm_qos_request pm_qos;
-		u8 c3_disable_latency;
-		u8 enabled;
-	} i945gm_vblank;
+	/* For i915gm/i945gm vblank irq workaround */
+	u8 vblank_enabled;
 
 	/* perform PHY state sanity checks? */
 	bool chv_phy_assert[2];
@@ -1757,6 +1292,8 @@ struct drm_i915_private {
 	/* Mutex to protect the above hdcp component related values. */
 	struct mutex hdcp_comp_mutex;
 
+	I915_SELFTEST_DECLARE(struct i915_selftest_stash selftest;)
+
 	/*
 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 	 * will be rejected. Instead look for a better place.
@@ -1796,10 +1333,10 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev)
 		for_each_if ((engine__) = (dev_priv__)->engine[(id__)])
 
 /* Iterator over subset of engines selected by mask */
-#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
-	for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->engine_mask; \
+#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \
+	for ((tmp__) = (mask__) & INTEL_INFO((gt__)->i915)->engine_mask; \
 	     (tmp__) ? \
-	     ((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \
+	     ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \
 	     0;)
 
 #define rb_to_uabi_engine(rb) \
@@ -1855,6 +1392,8 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev)
 	(BUILD_BUG_ON_ZERO(!__builtin_constant_p(n)) + \
 	 INTEL_INFO(dev_priv)->gen == (n))
 
+#define HAS_DSB(dev_priv)	(INTEL_INFO(dev_priv)->display.has_dsb)
+
 /*
  * Return true if revision is in range [since,until] inclusive.
  *
@@ -1926,6 +1465,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 }
 
 #define IS_MOBILE(dev_priv)	(INTEL_INFO(dev_priv)->is_mobile)
+#define IS_DGFX(dev_priv)   (INTEL_INFO(dev_priv)->is_dgfx)
 
 #define IS_I830(dev_priv)	IS_PLATFORM(dev_priv, INTEL_I830)
 #define IS_I845G(dev_priv)	IS_PLATFORM(dev_priv, INTEL_I845G)
@@ -2060,6 +1600,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define IS_ICL_REVID(p, since, until) \
 	(IS_ICELAKE(p) && IS_REVID(p, since, until))
 
+#define TGL_REVID_A0		0x0
+
+#define IS_TGL_REVID(p, since, until) \
+	(IS_TIGERLAKE(p) && IS_REVID(p, since, until))
+
 #define IS_LP(dev_priv)	(INTEL_INFO(dev_priv)->is_lp)
 #define IS_GEN9_LP(dev_priv)	(IS_GEN(dev_priv, 9) && IS_LP(dev_priv))
 #define IS_GEN9_BC(dev_priv)	(IS_GEN(dev_priv, 9) && !IS_LP(dev_priv))
@@ -2123,8 +1668,10 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 	(IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9))
 
 /* WaRsDisableCoarsePowerGating:skl,cnl */
-#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
-	(IS_CANNONLAKE(dev_priv) || IS_GEN(dev_priv, 9))
+#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv)			\
+	(IS_CANNONLAKE(dev_priv) ||					\
+	 IS_SKL_GT3(dev_priv) ||					\
+	 IS_SKL_GT4(dev_priv))
 
 #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4)
 #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \
@@ -2166,6 +1713,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 
 #define HAS_IPC(dev_priv)		 (INTEL_INFO(dev_priv)->display.has_ipc)
 
+#define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i))
+#define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM)
+
 #define HAS_GT_UC(dev_priv)	(INTEL_INFO(dev_priv)->has_gt_uc)
 
 /* Having GuC is not the same as using GuC */
@@ -2189,7 +1739,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3
 
-#define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->num_pipes > 0)
+#define INTEL_NUM_PIPES(dev_priv) (hweight8(INTEL_INFO(dev_priv)->pipe_mask))
+
+#define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->pipe_mask != 0)
+
+/* Only valid when HAS_DISPLAY() is true */
+#define INTEL_DISPLAY_ENABLED(dev_priv) (WARN_ON(!HAS_DISPLAY(dev_priv)), !i915_modparams.disable_display)
 
 static inline bool intel_vtd_active(void)
 {
@@ -2222,7 +1777,9 @@ extern const struct dev_pm_ops i915_pm_ops;
 int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
 void i915_driver_remove(struct drm_i915_private *i915);
 
-void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
+int i915_resume_switcheroo(struct drm_i915_private *i915);
+int i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state);
+
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
 static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
@@ -2241,12 +1798,13 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 /* i915_gem.c */
 int i915_gem_init_userptr(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv);
-void i915_gem_sanitize(struct drm_i915_private *i915);
-int i915_gem_init_early(struct drm_i915_private *dev_priv);
+void i915_gem_init_early(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
 int i915_gem_freeze(struct drm_i915_private *dev_priv);
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
 
+struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915);
+
 static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
 {
 	/*
@@ -2294,14 +1852,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 			   unsigned long flags);
 #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0)
-
-struct i915_vma * __must_check
-i915_gem_object_pin(struct drm_i915_gem_object *obj,
-		    struct i915_address_space *vm,
-		    const struct i915_ggtt_view *view,
-		    u64 size,
-		    u64 alignment,
-		    u64 flags);
+#define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
@@ -2314,9 +1865,6 @@ i915_mutex_lock_interruptible(struct drm_device *dev)
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
-int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
-		      u32 handle, u64 *offset);
-int i915_gem_mmap_gtt_version(void);
 
 int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
 
@@ -2326,24 +1874,19 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 }
 
 static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
-					  struct intel_engine_cs *engine)
+					  const struct intel_engine_cs *engine)
 {
 	return atomic_read(&error->reset_engine_count[engine->uabi_class]);
 }
 
-void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
-int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
 void i915_gem_driver_register(struct drm_i915_private *i915);
 void i915_gem_driver_unregister(struct drm_i915_private *i915);
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv);
 void i915_gem_driver_release(struct drm_i915_private *dev_priv);
-int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-			   unsigned int flags, long timeout);
 void i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
-vm_fault_t i915_gem_fault(struct vm_fault *vmf);
 
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
@@ -2359,7 +1902,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
 static inline struct i915_gem_context *
 __i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id)
 {
-	return idr_find(&file_priv->context_idr, id);
+	return xa_load(&file_priv->context_xa, id);
 }
 
 static inline struct i915_gem_context *
@@ -2379,7 +1922,7 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id)
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
 					  u64 min_size, u64 alignment,
-					  unsigned cache_level,
+					  unsigned long color,
 					  u64 start, u64 end,
 					  unsigned flags);
 int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
@@ -2395,9 +1938,9 @@ i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
 /* i915_gem_tiling.c */
 static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
-	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
+	return i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
 		i915_gem_object_is_tiled(obj);
 }
 
@@ -2412,14 +1955,13 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
 void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
 void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
-int intel_engine_cmd_parser(struct i915_gem_context *cxt,
-			    struct intel_engine_cs *engine,
-			    struct drm_i915_gem_object *batch_obj,
-			    u64 user_batch_start,
-			    u32 batch_start_offset,
-			    u32 batch_len,
-			    struct drm_i915_gem_object *shadow_batch_obj,
-			    u64 shadow_batch_start);
+int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+			    struct i915_vma *batch,
+			    u32 batch_offset,
+			    u32 batch_length,
+			    struct i915_vma *shadow,
+			    bool trampoline);
+#define I915_CMD_PARSER_TRAMPOLINE_SIZE 8
 
 /* intel_device_info.c */
 static inline struct intel_device_info *
@@ -2486,6 +2028,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 int remap_io_mapping(struct vm_area_struct *vma,
 		     unsigned long addr, unsigned long pfn, unsigned long size,
 		     struct io_mapping *iomap);
+int remap_io_sg(struct vm_area_struct *vma,
+		unsigned long addr, unsigned long size,
+		struct scatterlist *sgl, resource_size_t iobase);
 
 static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
 {
@@ -2501,4 +2046,10 @@ i915_coherent_map_type(struct drm_i915_private *i915)
 	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
 }
 
+static inline bool intel_guc_submission_is_enabled(struct intel_guc *guc)
+{
+	return intel_guc_is_submission_supported(guc) &&
+		intel_guc_is_running(guc);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 98305d987ac1..94f993e4c12f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -44,38 +44,45 @@
 #include "gem/i915_gem_clflush.h"
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
-#include "gem/i915_gem_pm.h"
-#include "gem/i915_gemfs.h"
+#include "gem/i915_gem_mman.h"
+#include "gem/i915_gem_region.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
-#include "gt/intel_mocs.h"
-#include "gt/intel_reset.h"
-#include "gt/intel_renderstate.h"
 #include "gt/intel_workarounds.h"
 
 #include "i915_drv.h"
-#include "i915_scatterlist.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
 #include "intel_pm.h"
 
 static int
-insert_mappable_node(struct i915_ggtt *ggtt,
-                     struct drm_mm_node *node, u32 size)
+insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size)
 {
+	int err;
+
+	err = mutex_lock_interruptible(&ggtt->vm.mutex);
+	if (err)
+		return err;
+
 	memset(node, 0, sizeof(*node));
-	return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
-					   size, 0, I915_COLOR_UNEVICTABLE,
-					   0, ggtt->mappable_end,
-					   DRM_MM_INSERT_LOW);
+	err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
+					  size, 0, I915_COLOR_UNEVICTABLE,
+					  0, ggtt->mappable_end,
+					  DRM_MM_INSERT_LOW);
+
+	mutex_unlock(&ggtt->vm.mutex);
+
+	return err;
 }
 
 static void
-remove_mappable_node(struct drm_mm_node *node)
+remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node)
 {
+	mutex_lock(&ggtt->vm.mutex);
 	drm_mm_remove_node(node);
+	mutex_unlock(&ggtt->vm.mutex);
 }
 
 int
@@ -87,7 +94,8 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	struct i915_vma *vma;
 	u64 pinned;
 
-	mutex_lock(&ggtt->vm.mutex);
+	if (mutex_lock_interruptible(&ggtt->vm.mutex))
+		return -EINTR;
 
 	pinned = ggtt->vm.reserved;
 	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
@@ -105,29 +113,65 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 			   unsigned long flags)
 {
-	struct i915_vma *vma;
+	struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm;
 	LIST_HEAD(still_in_list);
-	int ret = 0;
+	intel_wakeref_t wakeref;
+	struct i915_vma *vma;
+	int ret;
+
+	if (!atomic_read(&obj->bind_count))
+		return 0;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	/*
+	 * As some machines use ACPI to handle runtime-resume callbacks, and
+	 * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex
+	 * as they are required by the shrinker. Ergo, we wake the device up
+	 * first just in case.
+	 */
+	wakeref = intel_runtime_pm_get(rpm);
 
+try_again:
+	ret = 0;
 	spin_lock(&obj->vma.lock);
 	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
 						       struct i915_vma,
 						       obj_link))) {
+		struct i915_address_space *vm = vma->vm;
+
 		list_move_tail(&vma->obj_link, &still_in_list);
+		if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK))
+			continue;
+
+		ret = -EAGAIN;
+		if (!i915_vm_tryopen(vm))
+			break;
+
+		/* Prevent vma being freed by i915_vma_parked as we unbind */
+		vma = __i915_vma_get(vma);
 		spin_unlock(&obj->vma.lock);
 
-		ret = -EBUSY;
-		if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
-		    !i915_vma_is_active(vma))
-			ret = i915_vma_unbind(vma);
+		if (vma) {
+			ret = -EBUSY;
+			if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
+			    !i915_vma_is_active(vma))
+				ret = i915_vma_unbind(vma);
 
+			__i915_vma_put(vma);
+		}
+
+		i915_vm_close(vm);
 		spin_lock(&obj->vma.lock);
 	}
-	list_splice(&still_in_list, &obj->vma.list);
+	list_splice_init(&still_in_list, &obj->vma.list);
 	spin_unlock(&obj->vma.lock);
 
+	if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_BARRIER) {
+		rcu_barrier(); /* flush the i915_vm_release() */
+		goto try_again;
+	}
+
+	intel_runtime_pm_put(rpm, wakeref);
+
 	return ret;
 }
 
@@ -143,7 +187,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 	 * We manually control the domain here and pretend that it
 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
 	 */
-	intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
+	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 
 	if (copy_from_user(vaddr, user_data, args->size))
 		return -EFAULT;
@@ -151,13 +195,13 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 	drm_clflush_virt_range(vaddr, args->size);
 	intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
 
-	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
+	i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
 	return 0;
 }
 
 static int
 i915_gem_create(struct drm_file *file,
-		struct drm_i915_private *dev_priv,
+		struct intel_memory_region *mr,
 		u64 *size_p,
 		u32 *handle_p)
 {
@@ -166,12 +210,16 @@ i915_gem_create(struct drm_file *file,
 	u64 size;
 	int ret;
 
-	size = round_up(*size_p, PAGE_SIZE);
+	GEM_BUG_ON(!is_power_of_2(mr->min_page_size));
+	size = round_up(*size_p, mr->min_page_size);
 	if (size == 0)
 		return -EINVAL;
 
+	/* For most of the ABI (e.g. mmap) we think in system pages */
+	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
 	/* Allocate the new object */
-	obj = i915_gem_object_create_shmem(dev_priv, size);
+	obj = i915_gem_object_create_region(mr, size, 0);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -191,6 +239,7 @@ i915_gem_dumb_create(struct drm_file *file,
 		     struct drm_device *dev,
 		     struct drm_mode_create_dumb *args)
 {
+	enum intel_memory_type mem_type;
 	int cpp = DIV_ROUND_UP(args->bpp, 8);
 	u32 format;
 
@@ -217,7 +266,14 @@ i915_gem_dumb_create(struct drm_file *file,
 		args->pitch = ALIGN(args->pitch, 4096);
 
 	args->size = args->pitch * args->height;
-	return i915_gem_create(file, to_i915(dev),
+
+	mem_type = INTEL_MEMORY_SYSTEM;
+	if (HAS_LMEM(to_i915(dev)))
+		mem_type = INTEL_MEMORY_LOCAL;
+
+	return i915_gem_create(file,
+			       intel_memory_region_by_type(to_i915(dev),
+							   mem_type),
 			       &args->size, &args->handle);
 }
 
@@ -231,12 +287,14 @@ int
 i915_gem_create_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_gem_create *args = data;
 
-	i915_gem_flush_free_objects(dev_priv);
+	i915_gem_flush_free_objects(i915);
 
-	return i915_gem_create(file, dev_priv,
+	return i915_gem_create(file,
+			       intel_memory_region_by_type(i915,
+							   INTEL_MEMORY_SYSTEM),
 			       &args->size, &args->handle);
 }
 
@@ -338,10 +396,6 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	u64 remain, offset;
 	int ret;
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	vma = ERR_PTR(-ENODEV);
 	if (!i915_gem_object_is_tiled(obj))
@@ -351,16 +405,14 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 					       PIN_NOEVICT);
 	if (!IS_ERR(vma)) {
 		node.start = i915_ggtt_offset(vma);
-		node.allocated = false;
+		node.flags = 0;
 	} else {
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 		if (ret)
-			goto out_unlock;
-		GEM_BUG_ON(!node.allocated);
+			goto out_rpm;
+		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
@@ -393,7 +445,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		unsigned page_offset = offset_in_page(offset);
 		unsigned page_length = PAGE_SIZE - page_offset;
 		page_length = remain < page_length ? remain : page_length;
-		if (node.allocated) {
+		if (drm_mm_node_allocated(&node)) {
 			ggtt->vm.insert_page(&ggtt->vm,
 					     i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
 					     node.start, I915_CACHE_NONE, 0);
@@ -414,17 +466,14 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
-	mutex_lock(&i915->drm.struct_mutex);
-	if (node.allocated) {
+	if (drm_mm_node_allocated(&node)) {
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
-		remove_mappable_node(&node);
+		remove_mappable_node(ggtt, &node);
 	} else {
 		i915_vma_unpin(vma);
 	}
-out_unlock:
+out_rpm:
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return ret;
 }
 
@@ -531,10 +580,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 	void __user *user_data;
 	int ret;
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	if (i915_gem_object_has_struct_page(obj)) {
 		/*
 		 * Avoid waking the device up if we can fallback, as
@@ -544,10 +589,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		 * using the cache bypass of indirect GGTT access.
 		 */
 		wakeref = intel_runtime_pm_get_if_in_use(rpm);
-		if (!wakeref) {
-			ret = -EFAULT;
-			goto out_unlock;
-		}
+		if (!wakeref)
+			return -EFAULT;
 	} else {
 		/* No backing pages, no fallback, we must force GGTT access */
 		wakeref = intel_runtime_pm_get(rpm);
@@ -561,16 +604,14 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 					       PIN_NOEVICT);
 	if (!IS_ERR(vma)) {
 		node.start = i915_ggtt_offset(vma);
-		node.allocated = false;
+		node.flags = 0;
 	} else {
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 		if (ret)
 			goto out_rpm;
-		GEM_BUG_ON(!node.allocated);
+		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
@@ -588,7 +629,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		goto out_unpin;
 	}
 
-	intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
+	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 
 	user_data = u64_to_user_ptr(args->data_ptr);
 	offset = args->offset;
@@ -604,7 +645,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		unsigned int page_offset = offset_in_page(offset);
 		unsigned int page_length = PAGE_SIZE - page_offset;
 		page_length = remain < page_length ? remain : page_length;
-		if (node.allocated) {
+		if (drm_mm_node_allocated(&node)) {
 			/* flush the write before we modify the GGTT */
 			intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 			ggtt->vm.insert_page(&ggtt->vm,
@@ -630,22 +671,20 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		user_data += page_length;
 		offset += page_length;
 	}
-	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
+
+	intel_gt_flush_ggtt_writes(ggtt->vm.gt);
+	i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
 
 	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
-	mutex_lock(&i915->drm.struct_mutex);
-	intel_gt_flush_ggtt_writes(ggtt->vm.gt);
-	if (node.allocated) {
+	if (drm_mm_node_allocated(&node)) {
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
-		remove_mappable_node(&node);
+		remove_mappable_node(ggtt, &node);
 	} else {
 		i915_vma_unpin(vma);
 	}
 out_rpm:
 	intel_runtime_pm_put(rpm, wakeref);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return ret;
 }
 
@@ -723,7 +762,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 		offset = 0;
 	}
 
-	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
+	i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
 	i915_gem_object_unlock_fence(obj, fence);
 
 	return ret;
@@ -857,7 +896,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 
 	list_for_each_entry_safe(obj, on,
 				 &i915->ggtt.userfault_list, userfault_link)
-		__i915_gem_object_release_mmap(obj);
+		__i915_gem_object_release_mmap_gtt(obj);
 
 	/*
 	 * The fence will be lost when the device powers down. If any were
@@ -887,74 +926,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 	}
 }
 
-static long
-wait_for_timelines(struct drm_i915_private *i915,
-		   unsigned int wait, long timeout)
-{
-	struct intel_gt_timelines *timelines = &i915->gt.timelines;
-	struct intel_timeline *tl;
-	unsigned long flags;
-
-	spin_lock_irqsave(&timelines->lock, flags);
-	list_for_each_entry(tl, &timelines->active_list, link) {
-		struct i915_request *rq;
-
-		rq = i915_active_request_get_unlocked(&tl->last_request);
-		if (!rq)
-			continue;
-
-		spin_unlock_irqrestore(&timelines->lock, flags);
-
-		/*
-		 * "Race-to-idle".
-		 *
-		 * Switching to the kernel context is often used a synchronous
-		 * step prior to idling, e.g. in suspend for flushing all
-		 * current operations to memory before sleeping. These we
-		 * want to complete as quickly as possible to avoid prolonged
-		 * stalls, so allow the gpu to boost to maximum clocks.
-		 */
-		if (wait & I915_WAIT_FOR_IDLE_BOOST)
-			gen6_rps_boost(rq);
-
-		timeout = i915_request_wait(rq, wait, timeout);
-		i915_request_put(rq);
-		if (timeout < 0)
-			return timeout;
-
-		/* restart after reacquiring the lock */
-		spin_lock_irqsave(&timelines->lock, flags);
-		tl = list_entry(&timelines->active_list, typeof(*tl), link);
-	}
-	spin_unlock_irqrestore(&timelines->lock, flags);
-
-	return timeout;
-}
-
-int i915_gem_wait_for_idle(struct drm_i915_private *i915,
-			   unsigned int flags, long timeout)
-{
-	/* If the device is asleep, we have no requests outstanding */
-	if (!intel_gt_pm_is_awake(&i915->gt))
-		return 0;
-
-	GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
-		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
-		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
-
-	timeout = wait_for_timelines(i915, flags, timeout);
-	if (timeout < 0)
-		return timeout;
-
-	if (flags & I915_WAIT_LOCKED) {
-		lockdep_assert_held(&i915->drm.struct_mutex);
-
-		i915_retire_requests(i915);
-	}
-
-	return 0;
-}
-
 struct i915_vma *
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
@@ -962,43 +933,29 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 u64 alignment,
 			 u64 flags)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct i915_address_space *vm = &dev_priv->ggtt.vm;
-
-	return i915_gem_object_pin(obj, vm, view, size, alignment,
-				   flags | PIN_GLOBAL);
-}
-
-struct i915_vma *
-i915_gem_object_pin(struct drm_i915_gem_object *obj,
-		    struct i915_address_space *vm,
-		    const struct i915_ggtt_view *view,
-		    u64 size,
-		    u64 alignment,
-		    u64 flags)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	if (i915_gem_object_never_bind_ggtt(obj))
 		return ERR_PTR(-ENODEV);
 
 	if (flags & PIN_MAPPABLE &&
 	    (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
-		/* If the required space is larger than the available
+		/*
+		 * If the required space is larger than the available
 		 * aperture, we will not able to find a slot for the
 		 * object and unbinding the object now will be in
 		 * vain. Worse, doing so may cause us to ping-pong
 		 * the object in and out of the Global GTT and
 		 * waste a lot of cycles under the mutex.
 		 */
-		if (obj->base.size > dev_priv->ggtt.mappable_end)
+		if (obj->base.size > ggtt->mappable_end)
 			return ERR_PTR(-E2BIG);
 
-		/* If NONBLOCK is set the caller is optimistically
+		/*
+		 * If NONBLOCK is set the caller is optimistically
 		 * trying to cache the full object within the mappable
 		 * aperture, and *must* have a fallback in place for
 		 * situations where we cannot bind the object. We
@@ -1014,11 +971,11 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		 * we could try to minimise harm to others.
 		 */
 		if (flags & PIN_NONBLOCK &&
-		    obj->base.size > dev_priv->ggtt.mappable_end / 2)
+		    obj->base.size > ggtt->mappable_end / 2)
 			return ERR_PTR(-ENOSPC);
 	}
 
-	vma = i915_vma_instance(obj, vm, view);
+	vma = i915_vma_instance(obj, &ggtt->vm, view);
 	if (IS_ERR(vma))
 		return vma;
 
@@ -1028,31 +985,24 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
 				return ERR_PTR(-ENOSPC);
 
 			if (flags & PIN_MAPPABLE &&
-			    vma->fence_size > dev_priv->ggtt.mappable_end / 2)
+			    vma->fence_size > ggtt->mappable_end / 2)
 				return ERR_PTR(-ENOSPC);
 		}
 
-		WARN(i915_vma_is_pinned(vma),
-		     "bo is already pinned in ggtt with incorrect alignment:"
-		     " offset=%08x, req.alignment=%llx,"
-		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
-		     i915_ggtt_offset(vma), alignment,
-		     !!(flags & PIN_MAPPABLE),
-		     i915_vma_is_map_and_fenceable(vma));
 		ret = i915_vma_unbind(vma);
 		if (ret)
 			return ERR_PTR(ret);
 	}
 
 	if (vma->fence && !i915_gem_object_is_tiled(obj)) {
-		mutex_lock(&vma->vm->mutex);
+		mutex_lock(&ggtt->vm.mutex);
 		ret = i915_vma_revoke_fence(vma);
-		mutex_unlock(&vma->vm->mutex);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (ret)
 			return ERR_PTR(ret);
 	}
 
-	ret = i915_vma_pin(vma, size, alignment, flags);
+	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -1133,293 +1083,6 @@ out:
 	return err;
 }
 
-void i915_gem_sanitize(struct drm_i915_private *i915)
-{
-	intel_wakeref_t wakeref;
-
-	GEM_TRACE("\n");
-
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
-
-	/*
-	 * As we have just resumed the machine and woken the device up from
-	 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
-	 * back to defaults, recovering from whatever wedged state we left it
-	 * in and so worth trying to use the device once more.
-	 */
-	if (intel_gt_is_wedged(&i915->gt))
-		intel_gt_unset_wedged(&i915->gt);
-
-	/*
-	 * If we inherit context state from the BIOS or earlier occupants
-	 * of the GPU, the GPU may be in an inconsistent state when we
-	 * try to take over. The only way to remove the earlier state
-	 * is by resetting. However, resetting on earlier gen is tricky as
-	 * it may impact the display and we are uncertain about the stability
-	 * of the reset, so this could be applied to even earlier gen.
-	 */
-	intel_gt_sanitize(&i915->gt, false);
-
-	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-}
-
-static void init_unused_ring(struct intel_gt *gt, u32 base)
-{
-	struct intel_uncore *uncore = gt->uncore;
-
-	intel_uncore_write(uncore, RING_CTL(base), 0);
-	intel_uncore_write(uncore, RING_HEAD(base), 0);
-	intel_uncore_write(uncore, RING_TAIL(base), 0);
-	intel_uncore_write(uncore, RING_START(base), 0);
-}
-
-static void init_unused_rings(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-
-	if (IS_I830(i915)) {
-		init_unused_ring(gt, PRB1_BASE);
-		init_unused_ring(gt, SRB0_BASE);
-		init_unused_ring(gt, SRB1_BASE);
-		init_unused_ring(gt, SRB2_BASE);
-		init_unused_ring(gt, SRB3_BASE);
-	} else if (IS_GEN(i915, 2)) {
-		init_unused_ring(gt, SRB0_BASE);
-		init_unused_ring(gt, SRB1_BASE);
-	} else if (IS_GEN(i915, 3)) {
-		init_unused_ring(gt, PRB1_BASE);
-		init_unused_ring(gt, PRB2_BASE);
-	}
-}
-
-int i915_gem_init_hw(struct drm_i915_private *i915)
-{
-	struct intel_uncore *uncore = &i915->uncore;
-	struct intel_gt *gt = &i915->gt;
-	int ret;
-
-	BUG_ON(!i915->kernel_context);
-	ret = intel_gt_terminally_wedged(gt);
-	if (ret)
-		return ret;
-
-	gt->last_init_time = ktime_get();
-
-	/* Double layer security blanket, see i915_gem_init() */
-	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-
-	if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9)
-		intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf));
-
-	if (IS_HASWELL(i915))
-		intel_uncore_write(uncore,
-				   MI_PREDICATE_RESULT_2,
-				   IS_HSW_GT3(i915) ?
-				   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
-
-	/* Apply the GT workarounds... */
-	intel_gt_apply_workarounds(gt);
-	/* ...and determine whether they are sticking. */
-	intel_gt_verify_workarounds(gt, "init");
-
-	intel_gt_init_swizzling(gt);
-
-	/*
-	 * At least 830 can leave some of the unused rings
-	 * "active" (ie. head != tail) after resume which
-	 * will prevent c3 entry. Makes sure all unused rings
-	 * are totally idle.
-	 */
-	init_unused_rings(gt);
-
-	ret = i915_ppgtt_init_hw(gt);
-	if (ret) {
-		DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
-		goto out;
-	}
-
-	/* We can't enable contexts until all firmware is loaded */
-	ret = intel_uc_init_hw(&gt->uc);
-	if (ret) {
-		i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
-		goto out;
-	}
-
-	intel_mocs_init(gt);
-
-out:
-	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
-	return ret;
-}
-
-static int __intel_engines_record_defaults(struct drm_i915_private *i915)
-{
-	struct i915_request *requests[I915_NUM_ENGINES] = {};
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int err = 0;
-
-	/*
-	 * As we reset the gpu during very early sanitisation, the current
-	 * register state on the GPU should reflect its defaults values.
-	 * We load a context onto the hw (with restore-inhibit), then switch
-	 * over to a second context to save that default register state. We
-	 * can then prime every new context with that state so they all start
-	 * from the same default HW values.
-	 */
-
-	for_each_engine(engine, i915, id) {
-		struct intel_context *ce;
-		struct i915_request *rq;
-
-		/* We must be able to switch to something! */
-		GEM_BUG_ON(!engine->kernel_context);
-		engine->serial++; /* force the kernel context switch */
-
-		ce = intel_context_create(i915->kernel_context, engine);
-		if (IS_ERR(ce)) {
-			err = PTR_ERR(ce);
-			goto out;
-		}
-
-		rq = intel_context_create_request(ce);
-		if (IS_ERR(rq)) {
-			err = PTR_ERR(rq);
-			intel_context_put(ce);
-			goto out;
-		}
-
-		err = intel_engine_emit_ctx_wa(rq);
-		if (err)
-			goto err_rq;
-
-		/*
-		 * Failing to program the MOCS is non-fatal.The system will not
-		 * run at peak performance. So warn the user and carry on.
-		 */
-		err = intel_mocs_emit(rq);
-		if (err)
-			dev_notice(i915->drm.dev,
-				   "Failed to program MOCS registers; expect performance issues.\n");
-
-		err = intel_renderstate_emit(rq);
-		if (err)
-			goto err_rq;
-
-err_rq:
-		requests[id] = i915_request_get(rq);
-		i915_request_add(rq);
-		if (err)
-			goto out;
-	}
-
-	/* Flush the default context image to memory, and enable powersaving. */
-	if (!i915_gem_load_power_context(i915)) {
-		err = -EIO;
-		goto out;
-	}
-
-	for (id = 0; id < ARRAY_SIZE(requests); id++) {
-		struct i915_request *rq;
-		struct i915_vma *state;
-		void *vaddr;
-
-		rq = requests[id];
-		if (!rq)
-			continue;
-
-		/* We want to be able to unbind the state from the GGTT */
-		GEM_BUG_ON(intel_context_is_pinned(rq->hw_context));
-
-		state = rq->hw_context->state;
-		if (!state)
-			continue;
-
-		/*
-		 * As we will hold a reference to the logical state, it will
-		 * not be torn down with the context, and importantly the
-		 * object will hold onto its vma (making it possible for a
-		 * stray GTT write to corrupt our defaults). Unmap the vma
-		 * from the GTT to prevent such accidents and reclaim the
-		 * space.
-		 */
-		err = i915_vma_unbind(state);
-		if (err)
-			goto out;
-
-		i915_gem_object_lock(state->obj);
-		err = i915_gem_object_set_to_cpu_domain(state->obj, false);
-		i915_gem_object_unlock(state->obj);
-		if (err)
-			goto out;
-
-		i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC);
-
-		/* Check we can acquire the image of the context state */
-		vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB);
-		if (IS_ERR(vaddr)) {
-			err = PTR_ERR(vaddr);
-			goto out;
-		}
-
-		rq->engine->default_state = i915_gem_object_get(state->obj);
-		i915_gem_object_unpin_map(state->obj);
-	}
-
-out:
-	/*
-	 * If we have to abandon now, we expect the engines to be idle
-	 * and ready to be torn-down. The quickest way we can accomplish
-	 * this is by declaring ourselves wedged.
-	 */
-	if (err)
-		intel_gt_set_wedged(&i915->gt);
-
-	for (id = 0; id < ARRAY_SIZE(requests); id++) {
-		struct intel_context *ce;
-		struct i915_request *rq;
-
-		rq = requests[id];
-		if (!rq)
-			continue;
-
-		ce = rq->hw_context;
-		i915_request_put(rq);
-		intel_context_put(ce);
-	}
-	return err;
-}
-
-static int
-i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
-{
-	return intel_gt_init_scratch(&i915->gt, size);
-}
-
-static void i915_gem_fini_scratch(struct drm_i915_private *i915)
-{
-	intel_gt_fini_scratch(&i915->gt);
-}
-
-static int intel_engines_verify_workarounds(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int err = 0;
-
-	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-		return 0;
-
-	for_each_engine(engine, i915, id) {
-		if (intel_engine_verify_workarounds(engine, "load"))
-			err = -EIO;
-	}
-
-	return err;
-}
-
 int i915_gem_init(struct drm_i915_private *dev_priv)
 {
 	int ret;
@@ -1429,8 +1092,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		mkwrite_device_info(dev_priv)->page_sizes =
 			I915_GTT_PAGE_SIZE_4K;
 
-	intel_timelines_init(dev_priv);
-
 	ret = i915_gem_init_userptr(dev_priv);
 	if (ret)
 		return ret;
@@ -1438,59 +1099,12 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_uc_fetch_firmwares(&dev_priv->gt.uc);
 	intel_wopcm_init(&dev_priv->wopcm);
 
-	/* This is just a security blanket to placate dragons.
-	 * On some systems, we very sporadically observe that the first TLBs
-	 * used by the CS may be stale, despite us poking the TLB reset. If
-	 * we hold the forcewake during initialisation these problems
-	 * just magically go away.
-	 */
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
 	ret = i915_init_ggtt(dev_priv);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
 		goto err_unlock;
 	}
 
-	ret = i915_gem_init_scratch(dev_priv,
-				    IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
-	if (ret) {
-		GEM_BUG_ON(ret == -EIO);
-		goto err_ggtt;
-	}
-
-	ret = intel_engines_setup(dev_priv);
-	if (ret) {
-		GEM_BUG_ON(ret == -EIO);
-		goto err_unlock;
-	}
-
-	ret = i915_gem_contexts_init(dev_priv);
-	if (ret) {
-		GEM_BUG_ON(ret == -EIO);
-		goto err_scratch;
-	}
-
-	ret = intel_engines_init(dev_priv);
-	if (ret) {
-		GEM_BUG_ON(ret == -EIO);
-		goto err_context;
-	}
-
-	intel_init_gt_powersave(dev_priv);
-
-	intel_uc_init(&dev_priv->gt.uc);
-
-	ret = i915_gem_init_hw(dev_priv);
-	if (ret)
-		goto err_uc_init;
-
-	/* Only when the HW is re-initialised, can we replay the requests */
-	ret = intel_gt_resume(&dev_priv->gt);
-	if (ret)
-		goto err_init_hw;
-
 	/*
 	 * Despite its name intel_init_clock_gating applies both display
 	 * clock gating workarounds; GT mmio workarounds and the occasional
@@ -1502,24 +1116,9 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	 */
 	intel_init_clock_gating(dev_priv);
 
-	ret = intel_engines_verify_workarounds(dev_priv);
+	ret = intel_gt_init(&dev_priv->gt);
 	if (ret)
-		goto err_gt;
-
-	ret = __intel_engines_record_defaults(dev_priv);
-	if (ret)
-		goto err_gt;
-
-	ret = i915_inject_load_error(dev_priv, -ENODEV);
-	if (ret)
-		goto err_gt;
-
-	ret = i915_inject_load_error(dev_priv, -EIO);
-	if (ret)
-		goto err_gt;
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+		goto err_unlock;
 
 	return 0;
 
@@ -1529,43 +1128,15 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	 * HW as irrevisibly wedged, but keep enough state around that the
 	 * driver doesn't explode during runtime.
 	 */
-err_gt:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
-	intel_gt_set_wedged(&dev_priv->gt);
-	i915_gem_suspend(dev_priv);
-	i915_gem_suspend_late(dev_priv);
-
-	i915_gem_drain_workqueue(dev_priv);
-
-	mutex_lock(&dev_priv->drm.struct_mutex);
-err_init_hw:
-	intel_uc_fini_hw(&dev_priv->gt.uc);
-err_uc_init:
-	if (ret != -EIO) {
-		intel_uc_fini(&dev_priv->gt.uc);
-		intel_cleanup_gt_powersave(dev_priv);
-		intel_engines_cleanup(dev_priv);
-	}
-err_context:
-	if (ret != -EIO)
-		i915_gem_contexts_fini(dev_priv);
-err_scratch:
-	i915_gem_fini_scratch(dev_priv);
-err_ggtt:
 err_unlock:
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	i915_gem_drain_workqueue(dev_priv);
 
 	if (ret != -EIO) {
 		intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
 		i915_gem_cleanup_userptr(dev_priv);
-		intel_timelines_fini(dev_priv);
 	}
 
 	if (ret == -EIO) {
-		mutex_lock(&dev_priv->drm.struct_mutex);
-
 		/*
 		 * Allow engines or uC initialisation to fail by marking the GPU
 		 * as wedged. But we only want to do this when the GPU is angry,
@@ -1580,10 +1151,8 @@ err_unlock:
 		/* Minimal basic recovery for KMS */
 		ret = i915_ggtt_enable_hw(dev_priv);
 		i915_gem_restore_gtt_mappings(dev_priv);
-		i915_gem_restore_fences(dev_priv);
+		i915_gem_restore_fences(&dev_priv->ggtt);
 		intel_init_clock_gating(dev_priv);
-
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 
 	i915_gem_drain_freed_objects(dev_priv);
@@ -1604,48 +1173,32 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915)
 
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 {
-	GEM_BUG_ON(dev_priv->gt.awake);
-
 	intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);
 
 	i915_gem_suspend_late(dev_priv);
-	intel_disable_gt_powersave(dev_priv);
+	intel_gt_driver_remove(&dev_priv->gt);
+	dev_priv->uabi_engines = RB_ROOT;
 
 	/* Flush any outstanding unpin_work. */
 	i915_gem_drain_workqueue(dev_priv);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	intel_uc_fini_hw(&dev_priv->gt.uc);
-	intel_uc_fini(&dev_priv->gt.uc);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
 	i915_gem_drain_freed_objects(dev_priv);
 }
 
 void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 {
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	intel_engines_cleanup(dev_priv);
-	i915_gem_contexts_fini(dev_priv);
-	i915_gem_fini_scratch(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	i915_gem_driver_release__contexts(dev_priv);
 
-	intel_wa_list_free(&dev_priv->gt_wa_list);
+	intel_gt_driver_release(&dev_priv->gt);
 
-	intel_cleanup_gt_powersave(dev_priv);
+	intel_wa_list_free(&dev_priv->gt_wa_list);
 
 	intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
 	i915_gem_cleanup_userptr(dev_priv);
-	intel_timelines_fini(dev_priv);
 
 	i915_gem_drain_freed_objects(dev_priv);
 
-	WARN_ON(!list_empty(&dev_priv->contexts.list));
-}
-
-void i915_gem_init_mmio(struct drm_i915_private *i915)
-{
-	i915_gem_sanitize(i915);
+	WARN_ON(!list_empty(&dev_priv->gem.contexts.list));
 }
 
 static void i915_gem_init__mm(struct drm_i915_private *i915)
@@ -1660,20 +1213,12 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
 	i915_gem_init__objects(i915);
 }
 
-int i915_gem_init_early(struct drm_i915_private *dev_priv)
+void i915_gem_init_early(struct drm_i915_private *dev_priv)
 {
-	int err;
-
 	i915_gem_init__mm(dev_priv);
-	i915_gem_init__pm(dev_priv);
+	i915_gem_init__contexts(dev_priv);
 
 	spin_lock_init(&dev_priv->fb_tracking.lock);
-
-	err = i915_gemfs_init(dev_priv);
-	if (err)
-		DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
-
-	return 0;
 }
 
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
@@ -1682,8 +1227,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
 	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
 	WARN_ON(dev_priv->mm.shrink_count);
-
-	i915_gemfs_fini(dev_priv);
 }
 
 int i915_gem_freeze(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 6795f1daa3d5..1753c84d6c0d 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -30,17 +30,18 @@
 
 #include <drm/drm_drv.h>
 
+#include "i915_utils.h"
+
 struct drm_i915_private;
 
 #ifdef CONFIG_DRM_I915_DEBUG_GEM
 
-#define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER)
+#define GEM_SHOW_DEBUG() drm_debug_enabled(DRM_UT_DRIVER)
 
 #define GEM_BUG_ON(condition) do { if (unlikely((condition))) {	\
-		pr_err("%s:%d GEM_BUG_ON(%s)\n", \
-		       __func__, __LINE__, __stringify(condition)); \
-		GEM_TRACE("%s:%d GEM_BUG_ON(%s)\n", \
-			  __func__, __LINE__, __stringify(condition)); \
+		GEM_TRACE_ERR("%s:%d GEM_BUG_ON(%s)\n", \
+			      __func__, __LINE__, __stringify(condition)); \
+		GEM_TRACE_DUMP(); \
 		BUG(); \
 		} \
 	} while(0)
@@ -66,11 +67,17 @@ struct drm_i915_private;
 
 #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM)
 #define GEM_TRACE(...) trace_printk(__VA_ARGS__)
-#define GEM_TRACE_DUMP() ftrace_dump(DUMP_ALL)
+#define GEM_TRACE_ERR(...) do {						\
+	pr_err(__VA_ARGS__);						\
+	trace_printk(__VA_ARGS__);					\
+} while (0)
+#define GEM_TRACE_DUMP() \
+	do { ftrace_dump(DUMP_ALL); add_taint_for_CI(TAINT_WARN); } while (0)
 #define GEM_TRACE_DUMP_ON(expr) \
-	do { if (expr) ftrace_dump(DUMP_ALL); } while (0)
+	do { if (expr) GEM_TRACE_DUMP(); } while (0)
 #else
 #define GEM_TRACE(...) do { } while (0)
+#define GEM_TRACE_ERR(...) do { } while (0)
 #define GEM_TRACE_DUMP() do { } while (0)
 #define GEM_TRACE_DUMP_ON(expr) BUILD_BUG_ON_INVALID(expr)
 #endif
@@ -83,6 +90,11 @@ static inline void tasklet_lock(struct tasklet_struct *t)
 		cpu_relax();
 }
 
+static inline bool tasklet_is_locked(const struct tasklet_struct *t)
+{
+	return test_bit(TASKLET_STATE_RUN, &t->state);
+}
+
 static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
 {
 	if (!atomic_fetch_inc(&t->count))
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 52c86c6e0673..0697bedebeef 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -29,6 +29,7 @@
 #include <drm/i915_drm.h>
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_trace.h"
@@ -37,7 +38,7 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
 	bool fail_if_busy:1;
 } igt_evict_ctl;)
 
-static int ggtt_flush(struct drm_i915_private *i915)
+static int ggtt_flush(struct intel_gt *gt)
 {
 	/*
 	 * Not everything in the GGTT is tracked via vma (otherwise we
@@ -46,10 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	return i915_gem_wait_for_idle(i915,
-				      I915_WAIT_INTERRUPTIBLE |
-				      I915_WAIT_LOCKED,
-				      MAX_SCHEDULE_TIMEOUT);
+	return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
 }
 
 static bool
@@ -70,7 +68,7 @@ mark_free(struct drm_mm_scan *scan,
  * @vm: address space to evict from
  * @min_size: size of the desired free space
  * @alignment: alignment constraint of the desired free space
- * @cache_level: cache_level for the desired space
+ * @color: color for the desired space
  * @start: start (inclusive) of the range from which to evict objects
  * @end: end (exclusive) of the range from which to evict objects
  * @flags: additional flags to control the eviction algorithm
@@ -91,11 +89,10 @@ mark_free(struct drm_mm_scan *scan,
 int
 i915_gem_evict_something(struct i915_address_space *vm,
 			 u64 min_size, u64 alignment,
-			 unsigned cache_level,
+			 unsigned long color,
 			 u64 start, u64 end,
 			 unsigned flags)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct drm_mm_scan scan;
 	struct list_head eviction_list;
 	struct i915_vma *vma, *next;
@@ -104,7 +101,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	struct i915_vma *active;
 	int ret;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	trace_i915_gem_evict(vm, min_size, alignment, flags);
 
 	/*
@@ -124,17 +121,10 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	if (flags & PIN_MAPPABLE)
 		mode = DRM_MM_INSERT_LOW;
 	drm_mm_scan_init_with_range(&scan, &vm->mm,
-				    min_size, alignment, cache_level,
+				    min_size, alignment, color,
 				    start, end, mode);
 
-	/*
-	 * Retire before we search the active list. Although we have
-	 * reasonable accuracy in our retirement lists, we may have
-	 * a stray pin (preventing eviction) that can only be resolved by
-	 * retiring.
-	 */
-	if (!(flags & PIN_NONBLOCK))
-		i915_retire_requests(dev_priv);
+	intel_gt_retire_requests(vm->gt);
 
 search_again:
 	active = NULL;
@@ -207,7 +197,7 @@ search_again:
 	if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
 		return -EBUSY;
 
-	ret = ggtt_flush(dev_priv);
+	ret = ggtt_flush(vm->gt);
 	if (ret)
 		return ret;
 
@@ -235,12 +225,12 @@ found:
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
 		__i915_vma_unpin(vma);
 		if (ret == 0)
-			ret = i915_vma_unbind(vma);
+			ret = __i915_vma_unbind(vma);
 	}
 
 	while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) {
 		vma = container_of(node, struct i915_vma, node);
-		ret = i915_vma_unbind(vma);
+		ret = __i915_vma_unbind(vma);
 	}
 
 	return ret;
@@ -266,25 +256,23 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 	u64 start = target->start;
 	u64 end = start + target->size;
 	struct i915_vma *vma, *next;
-	bool check_color;
 	int ret = 0;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
 
 	trace_i915_gem_evict_node(vm, target, flags);
 
-	/* Retire before we search the active list. Although we have
+	/*
+	 * Retire before we search the active list. Although we have
 	 * reasonable accuracy in our retirement lists, we may have
 	 * a stray pin (preventing eviction) that can only be resolved by
 	 * retiring.
 	 */
-	if (!(flags & PIN_NONBLOCK))
-		i915_retire_requests(vm->i915);
+	intel_gt_retire_requests(vm->gt);
 
-	check_color = vm->mm.color_adjust;
-	if (check_color) {
+	if (i915_vm_has_cache_coloring(vm)) {
 		/* Expand search to cover neighbouring guard pages (or lack!) */
 		if (start)
 			start -= I915_GTT_PAGE_SIZE;
@@ -301,7 +289,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 			break;
 		}
 
-		GEM_BUG_ON(!node->allocated);
+		GEM_BUG_ON(!drm_mm_node_allocated(node));
 		vma = container_of(node, typeof(*vma), node);
 
 		/* If we are using coloring to insert guard pages between
@@ -310,7 +298,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 		 * abutt and conflict. If they are in conflict, then we evict
 		 * those as well to make room for our guard pages.
 		 */
-		if (check_color) {
+		if (i915_vm_has_cache_coloring(vm)) {
 			if (node->start + node->size == target->start) {
 				if (node->color == target->color)
 					continue;
@@ -351,7 +339,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
 		__i915_vma_unpin(vma);
 		if (ret == 0)
-			ret = i915_vma_unbind(vma);
+			ret = __i915_vma_unbind(vma);
 	}
 
 	return ret;
@@ -371,11 +359,9 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
  */
 int i915_gem_evict_vm(struct i915_address_space *vm)
 {
-	struct list_head eviction_list;
-	struct i915_vma *vma, *next;
-	int ret;
+	int ret = 0;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	trace_i915_gem_evict_vm(vm);
 
 	/* Switch back to the default context in order to unpin
@@ -384,28 +370,35 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	 * switch otherwise is ineffective.
 	 */
 	if (i915_is_ggtt(vm)) {
-		ret = ggtt_flush(vm->i915);
+		ret = ggtt_flush(vm->gt);
 		if (ret)
 			return ret;
 	}
 
-	INIT_LIST_HEAD(&eviction_list);
-	mutex_lock(&vm->mutex);
-	list_for_each_entry(vma, &vm->bound_list, vm_link) {
-		if (i915_vma_is_pinned(vma))
-			continue;
+	do {
+		struct i915_vma *vma, *vn;
+		LIST_HEAD(eviction_list);
 
-		__i915_vma_pin(vma);
-		list_add(&vma->evict_link, &eviction_list);
-	}
-	mutex_unlock(&vm->mutex);
+		list_for_each_entry(vma, &vm->bound_list, vm_link) {
+			if (i915_vma_is_pinned(vma))
+				continue;
+
+			__i915_vma_pin(vma);
+			list_add(&vma->evict_link, &eviction_list);
+		}
+		if (list_empty(&eviction_list))
+			break;
+
+		ret = 0;
+		list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
+			__i915_vma_unpin(vma);
+			if (ret == 0)
+				ret = __i915_vma_unbind(vma);
+			if (ret != -EINTR) /* "Get me out of here!" */
+				ret = 0;
+		}
+	} while (ret == 0);
 
-	ret = 0;
-	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
-		__i915_vma_unpin(vma);
-		if (ret == 0)
-			ret = i915_vma_unbind(vma);
-	}
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 615a9f4ef30c..d9c34a23cd67 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -59,6 +59,16 @@
 
 #define pipelined 0
 
+static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence)
+{
+	return fence->ggtt->vm.i915;
+}
+
+static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence)
+{
+	return fence->ggtt->vm.gt->uncore;
+}
+
 static void i965_write_fence_reg(struct i915_fence_reg *fence,
 				 struct i915_vma *vma)
 {
@@ -66,7 +76,7 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence,
 	int fence_pitch_shift;
 	u64 val;
 
-	if (INTEL_GEN(fence->i915) >= 6) {
+	if (INTEL_GEN(fence_to_i915(fence)) >= 6) {
 		fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
 		fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
 		fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
@@ -95,7 +105,7 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence,
 	}
 
 	if (!pipelined) {
-		struct intel_uncore *uncore = &fence->i915->uncore;
+		struct intel_uncore *uncore = fence_to_uncore(fence);
 
 		/*
 		 * To w/a incoherency with non-atomic 64-bit register updates,
@@ -132,7 +142,7 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence,
 		GEM_BUG_ON(!is_power_of_2(vma->fence_size));
 		GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size));
 
-		if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
+		if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence)))
 			stride /= 128;
 		else
 			stride /= 512;
@@ -148,7 +158,7 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence,
 	}
 
 	if (!pipelined) {
-		struct intel_uncore *uncore = &fence->i915->uncore;
+		struct intel_uncore *uncore = fence_to_uncore(fence);
 		i915_reg_t reg = FENCE_REG(fence->id);
 
 		intel_uncore_write_fw(uncore, reg, val);
@@ -180,7 +190,7 @@ static void i830_write_fence_reg(struct i915_fence_reg *fence,
 	}
 
 	if (!pipelined) {
-		struct intel_uncore *uncore = &fence->i915->uncore;
+		struct intel_uncore *uncore = fence_to_uncore(fence);
 		i915_reg_t reg = FENCE_REG(fence->id);
 
 		intel_uncore_write_fw(uncore, reg, val);
@@ -191,15 +201,17 @@ static void i830_write_fence_reg(struct i915_fence_reg *fence,
 static void fence_write(struct i915_fence_reg *fence,
 			struct i915_vma *vma)
 {
+	struct drm_i915_private *i915 = fence_to_i915(fence);
+
 	/*
 	 * Previous access through the fence register is marshalled by
 	 * the mb() inside the fault handlers (i915_gem_release_mmaps)
 	 * and explicitly managed for internal users.
 	 */
 
-	if (IS_GEN(fence->i915, 2))
+	if (IS_GEN(i915, 2))
 		i830_write_fence_reg(fence, vma);
-	else if (IS_GEN(fence->i915, 3))
+	else if (IS_GEN(i915, 3))
 		i915_write_fence_reg(fence, vma);
 	else
 		i965_write_fence_reg(fence, vma);
@@ -215,6 +227,8 @@ static void fence_write(struct i915_fence_reg *fence,
 static int fence_update(struct i915_fence_reg *fence,
 			struct i915_vma *vma)
 {
+	struct i915_ggtt *ggtt = fence->ggtt;
+	struct intel_uncore *uncore = fence_to_uncore(fence);
 	intel_wakeref_t wakeref;
 	struct i915_vma *old;
 	int ret;
@@ -230,14 +244,15 @@ static int fence_update(struct i915_fence_reg *fence,
 			 i915_gem_object_get_tiling(vma->obj)))
 			return -EINVAL;
 
-		ret = i915_active_wait(&vma->active);
+		ret = i915_vma_sync(vma);
 		if (ret)
 			return ret;
 	}
 
 	old = xchg(&fence->vma, NULL);
 	if (old) {
-		ret = i915_active_wait(&old->active);
+		/* XXX Ideally we would move the waiting to outside the mutex */
+		ret = i915_vma_sync(old);
 		if (ret) {
 			fence->vma = old;
 			return ret;
@@ -255,7 +270,7 @@ static int fence_update(struct i915_fence_reg *fence,
 			old->fence = NULL;
 		}
 
-		list_move(&fence->link, &fence->i915->ggtt.fence_list);
+		list_move(&fence->link, &ggtt->fence_list);
 	}
 
 	/*
@@ -268,7 +283,7 @@ static int fence_update(struct i915_fence_reg *fence,
 	 * be cleared before we can use any other fences to ensure that
 	 * the new fences do not overlap the elided clears, confusing HW.
 	 */
-	wakeref = intel_runtime_pm_get_if_in_use(&fence->i915->runtime_pm);
+	wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm);
 	if (!wakeref) {
 		GEM_BUG_ON(vma);
 		return 0;
@@ -279,10 +294,10 @@ static int fence_update(struct i915_fence_reg *fence,
 
 	if (vma) {
 		vma->fence = fence;
-		list_move_tail(&fence->link, &fence->i915->ggtt.fence_list);
+		list_move_tail(&fence->link, &ggtt->fence_list);
 	}
 
-	intel_runtime_pm_put(&fence->i915->runtime_pm, wakeref);
+	intel_runtime_pm_put(uncore->rpm, wakeref);
 	return 0;
 }
 
@@ -311,11 +326,11 @@ int i915_vma_revoke_fence(struct i915_vma *vma)
 	return fence_update(fence, NULL);
 }
 
-static struct i915_fence_reg *fence_find(struct drm_i915_private *i915)
+static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
 {
 	struct i915_fence_reg *fence;
 
-	list_for_each_entry(fence, &i915->ggtt.fence_list, link) {
+	list_for_each_entry(fence, &ggtt->fence_list, link) {
 		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
 
 		if (atomic_read(&fence->pin_count))
@@ -325,19 +340,21 @@ static struct i915_fence_reg *fence_find(struct drm_i915_private *i915)
 	}
 
 	/* Wait for completion of pending flips which consume fences */
-	if (intel_has_pending_fb_unpin(i915))
+	if (intel_has_pending_fb_unpin(ggtt->vm.i915))
 		return ERR_PTR(-EAGAIN);
 
 	return ERR_PTR(-EDEADLK);
 }
 
-static int __i915_vma_pin_fence(struct i915_vma *vma)
+int __i915_vma_pin_fence(struct i915_vma *vma)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
 	struct i915_fence_reg *fence;
 	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
 	int err;
 
+	lockdep_assert_held(&vma->vm->mutex);
+
 	/* Just update our place in the LRU if our fence is getting reused. */
 	if (vma->fence) {
 		fence = vma->fence;
@@ -348,7 +365,7 @@ static int __i915_vma_pin_fence(struct i915_vma *vma)
 			return 0;
 		}
 	} else if (set) {
-		fence = fence_find(vma->vm->i915);
+		fence = fence_find(ggtt);
 		if (IS_ERR(fence))
 			return PTR_ERR(fence);
 
@@ -395,11 +412,14 @@ int i915_vma_pin_fence(struct i915_vma *vma)
 {
 	int err;
 
+	if (!vma->fence && !i915_gem_object_is_tiled(vma->obj))
+		return 0;
+
 	/*
 	 * Note that we revoke fences on runtime suspend. Therefore the user
 	 * must keep the device awake whilst using the fence.
 	 */
-	assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm);
+	assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm);
 	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 
@@ -415,14 +435,13 @@ int i915_vma_pin_fence(struct i915_vma *vma)
 
 /**
  * i915_reserve_fence - Reserve a fence for vGPU
- * @i915: i915 device private
+ * @ggtt: Global GTT
  *
  * This function walks the fence regs looking for a free one and remove
  * it from the fence_list. It is used to reserve fence for vGPU to use.
  */
-struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915)
+struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_fence_reg *fence;
 	int count;
 	int ret;
@@ -436,7 +455,7 @@ struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915)
 	if (count <= 1)
 		return ERR_PTR(-ENOSPC);
 
-	fence = fence_find(i915);
+	fence = fence_find(ggtt);
 	if (IS_ERR(fence))
 		return fence;
 
@@ -460,7 +479,7 @@ struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915)
  */
 void i915_unreserve_fence(struct i915_fence_reg *fence)
 {
-	struct i915_ggtt *ggtt = &fence->i915->ggtt;
+	struct i915_ggtt *ggtt = fence->ggtt;
 
 	lockdep_assert_held(&ggtt->vm.mutex);
 
@@ -469,19 +488,19 @@ void i915_unreserve_fence(struct i915_fence_reg *fence)
 
 /**
  * i915_gem_restore_fences - restore fence state
- * @i915: i915 device private
+ * @ggtt: Global GTT
  *
  * Restore the hw fence state to match the software tracking again, to be called
  * after a gpu reset and on resume. Note that on runtime suspend we only cancel
  * the fences, to be reacquired by the user later.
  */
-void i915_gem_restore_fences(struct drm_i915_private *i915)
+void i915_gem_restore_fences(struct i915_ggtt *ggtt)
 {
 	int i;
 
 	rcu_read_lock(); /* keep obj alive as we dereference */
-	for (i = 0; i < i915->ggtt.num_fences; i++) {
-		struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i];
+	for (i = 0; i < ggtt->num_fences; i++) {
+		struct i915_fence_reg *reg = &ggtt->fence_regs[i];
 		struct i915_vma *vma = READ_ONCE(reg->vma);
 
 		GEM_BUG_ON(vma && vma->fence != reg);
@@ -547,15 +566,16 @@ void i915_gem_restore_fences(struct drm_i915_private *i915)
  */
 
 /**
- * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
- * @i915: i915 device private
+ * detect_bit_6_swizzle - detect bit 6 swizzling pattern
+ * @ggtt: Global GGTT
  *
  * Detects bit 6 swizzling of address lookup between IGD access and CPU
  * access through main memory.
  */
-static void detect_bit_6_swizzle(struct drm_i915_private *i915)
+static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
 {
-	struct intel_uncore *uncore = &i915->uncore;
+	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+	struct drm_i915_private *i915 = ggtt->vm.i915;
 	u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 	u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 
@@ -717,8 +737,8 @@ static void detect_bit_6_swizzle(struct drm_i915_private *i915)
 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 	}
 
-	i915->mm.bit_6_swizzle_x = swizzle_x;
-	i915->mm.bit_6_swizzle_y = swizzle_y;
+	i915->ggtt.bit_6_swizzle_x = swizzle_x;
+	i915->ggtt.bit_6_swizzle_y = swizzle_y;
 }
 
 /*
@@ -819,17 +839,20 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
 void i915_ggtt_init_fences(struct i915_ggtt *ggtt)
 {
 	struct drm_i915_private *i915 = ggtt->vm.i915;
+	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 	int num_fences;
 	int i;
 
 	INIT_LIST_HEAD(&ggtt->fence_list);
 	INIT_LIST_HEAD(&ggtt->userfault_list);
-	intel_wakeref_auto_init(&ggtt->userfault_wakeref, &i915->runtime_pm);
+	intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm);
 
-	detect_bit_6_swizzle(i915);
+	detect_bit_6_swizzle(ggtt);
 
-	if (INTEL_GEN(i915) >= 7 &&
-	    !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)))
+	if (!i915_ggtt_has_aperture(ggtt))
+		num_fences = 0;
+	else if (INTEL_GEN(i915) >= 7 &&
+		 !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)))
 		num_fences = 32;
 	else if (INTEL_GEN(i915) >= 4 ||
 		 IS_I945G(i915) || IS_I945GM(i915) ||
@@ -839,20 +862,20 @@ void i915_ggtt_init_fences(struct i915_ggtt *ggtt)
 		num_fences = 8;
 
 	if (intel_vgpu_active(i915))
-		num_fences = intel_uncore_read(&i915->uncore,
+		num_fences = intel_uncore_read(uncore,
 					       vgtif_reg(avail_rs.fence_num));
 
 	/* Initialize fence registers to zero */
 	for (i = 0; i < num_fences; i++) {
 		struct i915_fence_reg *fence = &ggtt->fence_regs[i];
 
-		fence->i915 = i915;
+		fence->ggtt = ggtt;
 		fence->id = i;
 		list_add_tail(&fence->link, &ggtt->fence_list);
 	}
 	ggtt->num_fences = num_fences;
 
-	i915_gem_restore_fences(i915);
+	i915_gem_restore_fences(ggtt);
 }
 
 void intel_gt_init_swizzling(struct intel_gt *gt)
@@ -861,7 +884,7 @@ void intel_gt_init_swizzling(struct intel_gt *gt)
 	struct intel_uncore *uncore = gt->uncore;
 
 	if (INTEL_GEN(i915) < 5 ||
-	    i915->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+	    i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
 		return;
 
 	intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index 99866fb9d94f..7bd521cd7cd7 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -29,7 +29,6 @@
 #include <linux/types.h>
 
 struct drm_i915_gem_object;
-struct drm_i915_private;
 struct i915_ggtt;
 struct i915_vma;
 struct intel_gt;
@@ -39,7 +38,7 @@ struct sg_table;
 
 struct i915_fence_reg {
 	struct list_head link;
-	struct drm_i915_private *i915;
+	struct i915_ggtt *ggtt;
 	struct i915_vma *vma;
 	atomic_t pin_count;
 	int id;
@@ -55,10 +54,10 @@ struct i915_fence_reg {
 };
 
 /* i915_gem_fence_reg.c */
-struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915);
+struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt);
 void i915_unreserve_fence(struct i915_fence_reg *fence);
 
-void i915_gem_restore_fences(struct drm_i915_private *i915);
+void i915_gem_restore_fences(struct i915_ggtt *ggtt);
 
 void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
 				       struct sg_table *pages);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b1a7a8b9b46a..e039eb56900f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1,26 +1,7 @@
+// SPDX-License-Identifier: MIT
 /*
  * Copyright © 2010 Daniel Vetter
- * Copyright © 2011-2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
+ * Copyright © 2020 Intel Corporation
  */
 
 #include <linux/slab.h> /* fault-inject.h is not standalone! */
@@ -38,2106 +19,13 @@
 
 #include "display/intel_frontbuffer.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_scatterlist.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
-#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
-
-#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
-#define DBG(...) trace_printk(__VA_ARGS__)
-#else
-#define DBG(...)
-#endif
-
-/**
- * DOC: Global GTT views
- *
- * Background and previous state
- *
- * Historically objects could exists (be bound) in global GTT space only as
- * singular instances with a view representing all of the object's backing pages
- * in a linear fashion. This view will be called a normal view.
- *
- * To support multiple views of the same object, where the number of mapped
- * pages is not equal to the backing store, or where the layout of the pages
- * is not linear, concept of a GGTT view was added.
- *
- * One example of an alternative view is a stereo display driven by a single
- * image. In this case we would have a framebuffer looking like this
- * (2x2 pages):
- *
- *    12
- *    34
- *
- * Above would represent a normal GGTT view as normally mapped for GPU or CPU
- * rendering. In contrast, fed to the display engine would be an alternative
- * view which could look something like this:
- *
- *   1212
- *   3434
- *
- * In this example both the size and layout of pages in the alternative view is
- * different from the normal view.
- *
- * Implementation and usage
- *
- * GGTT views are implemented using VMAs and are distinguished via enum
- * i915_ggtt_view_type and struct i915_ggtt_view.
- *
- * A new flavour of core GEM functions which work with GGTT bound objects were
- * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
- * renaming  in large amounts of code. They take the struct i915_ggtt_view
- * parameter encapsulating all metadata required to implement a view.
- *
- * As a helper for callers which are only interested in the normal view,
- * globally const i915_ggtt_view_normal singleton instance exists. All old core
- * GEM API functions, the ones not taking the view parameter, are operating on,
- * or with the normal GGTT view.
- *
- * Code wanting to add or use a new GGTT view needs to:
- *
- * 1. Add a new enum with a suitable name.
- * 2. Extend the metadata in the i915_ggtt_view structure if required.
- * 3. Add support to i915_get_vma_pages().
- *
- * New views are required to build a scatter-gather table from within the
- * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
- * exists for the lifetime of an VMA.
- *
- * Core API is designed to have copy semantics which means that passed in
- * struct i915_ggtt_view does not need to be persistent (left around after
- * calling the core API functions).
- *
- */
-
-#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
-
-static int
-i915_get_ggtt_vma_pages(struct i915_vma *vma);
-
-static void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
-{
-	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
-
-	/*
-	 * Note that as an uncached mmio write, this will flush the
-	 * WCB of the writes into the GGTT before it triggers the invalidate.
-	 */
-	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
-}
-
-static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
-{
-	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
-
-	gen6_ggtt_invalidate(ggtt);
-	intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-}
-
-static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
-{
-	intel_gtt_chipset_flush();
-}
-
-static int ppgtt_bind_vma(struct i915_vma *vma,
-			  enum i915_cache_level cache_level,
-			  u32 unused)
-{
-	u32 pte_flags;
-	int err;
-
-	if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
-		err = vma->vm->allocate_va_range(vma->vm,
-						 vma->node.start, vma->size);
-		if (err)
-			return err;
-	}
-
-	/* Applicable to VLV, and gen8+ */
-	pte_flags = 0;
-	if (i915_gem_object_is_readonly(vma->obj))
-		pte_flags |= PTE_READ_ONLY;
-
-	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-
-	return 0;
-}
-
-static void ppgtt_unbind_vma(struct i915_vma *vma)
-{
-	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-}
-
-static int ppgtt_set_pages(struct i915_vma *vma)
-{
-	GEM_BUG_ON(vma->pages);
-
-	vma->pages = vma->obj->mm.pages;
-
-	vma->page_sizes = vma->obj->mm.page_sizes;
-
-	return 0;
-}
-
-static void clear_pages(struct i915_vma *vma)
-{
-	GEM_BUG_ON(!vma->pages);
-
-	if (vma->pages != vma->obj->mm.pages) {
-		sg_free_table(vma->pages);
-		kfree(vma->pages);
-	}
-	vma->pages = NULL;
-
-	memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
-}
-
-static u64 gen8_pte_encode(dma_addr_t addr,
-			   enum i915_cache_level level,
-			   u32 flags)
-{
-	gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
-
-	if (unlikely(flags & PTE_READ_ONLY))
-		pte &= ~_PAGE_RW;
-
-	switch (level) {
-	case I915_CACHE_NONE:
-		pte |= PPAT_UNCACHED;
-		break;
-	case I915_CACHE_WT:
-		pte |= PPAT_DISPLAY_ELLC;
-		break;
-	default:
-		pte |= PPAT_CACHED;
-		break;
-	}
-
-	return pte;
-}
-
-static u64 gen8_pde_encode(const dma_addr_t addr,
-			   const enum i915_cache_level level)
-{
-	u64 pde = _PAGE_PRESENT | _PAGE_RW;
-	pde |= addr;
-	if (level != I915_CACHE_NONE)
-		pde |= PPAT_CACHED_PDE;
-	else
-		pde |= PPAT_UNCACHED;
-	return pde;
-}
-
-static u64 snb_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_VALID;
-	pte |= GEN6_PTE_ADDR_ENCODE(addr);
-
-	switch (level) {
-	case I915_CACHE_L3_LLC:
-	case I915_CACHE_LLC:
-		pte |= GEN6_PTE_CACHE_LLC;
-		break;
-	case I915_CACHE_NONE:
-		pte |= GEN6_PTE_UNCACHED;
-		break;
-	default:
-		MISSING_CASE(level);
-	}
-
-	return pte;
-}
-
-static u64 ivb_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_VALID;
-	pte |= GEN6_PTE_ADDR_ENCODE(addr);
-
-	switch (level) {
-	case I915_CACHE_L3_LLC:
-		pte |= GEN7_PTE_CACHE_L3_LLC;
-		break;
-	case I915_CACHE_LLC:
-		pte |= GEN6_PTE_CACHE_LLC;
-		break;
-	case I915_CACHE_NONE:
-		pte |= GEN6_PTE_UNCACHED;
-		break;
-	default:
-		MISSING_CASE(level);
-	}
-
-	return pte;
-}
-
-static u64 byt_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_VALID;
-	pte |= GEN6_PTE_ADDR_ENCODE(addr);
-
-	if (!(flags & PTE_READ_ONLY))
-		pte |= BYT_PTE_WRITEABLE;
-
-	if (level != I915_CACHE_NONE)
-		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
-
-	return pte;
-}
-
-static u64 hsw_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_VALID;
-	pte |= HSW_PTE_ADDR_ENCODE(addr);
-
-	if (level != I915_CACHE_NONE)
-		pte |= HSW_WB_LLC_AGE3;
-
-	return pte;
-}
-
-static u64 iris_pte_encode(dma_addr_t addr,
-			   enum i915_cache_level level,
-			   u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_VALID;
-	pte |= HSW_PTE_ADDR_ENCODE(addr);
-
-	switch (level) {
-	case I915_CACHE_NONE:
-		break;
-	case I915_CACHE_WT:
-		pte |= HSW_WT_ELLC_LLC_AGE3;
-		break;
-	default:
-		pte |= HSW_WB_ELLC_LLC_AGE3;
-		break;
-	}
-
-	return pte;
-}
-
-static void stash_init(struct pagestash *stash)
-{
-	pagevec_init(&stash->pvec);
-	spin_lock_init(&stash->lock);
-}
-
-static struct page *stash_pop_page(struct pagestash *stash)
-{
-	struct page *page = NULL;
-
-	spin_lock(&stash->lock);
-	if (likely(stash->pvec.nr))
-		page = stash->pvec.pages[--stash->pvec.nr];
-	spin_unlock(&stash->lock);
-
-	return page;
-}
-
-static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
-{
-	unsigned int nr;
-
-	spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
-
-	nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
-	memcpy(stash->pvec.pages + stash->pvec.nr,
-	       pvec->pages + pvec->nr - nr,
-	       sizeof(pvec->pages[0]) * nr);
-	stash->pvec.nr += nr;
-
-	spin_unlock(&stash->lock);
-
-	pvec->nr -= nr;
-}
-
-static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
-{
-	struct pagevec stack;
-	struct page *page;
-
-	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
-		i915_gem_shrink_all(vm->i915);
-
-	page = stash_pop_page(&vm->free_pages);
-	if (page)
-		return page;
-
-	if (!vm->pt_kmap_wc)
-		return alloc_page(gfp);
-
-	/* Look in our global stash of WC pages... */
-	page = stash_pop_page(&vm->i915->mm.wc_stash);
-	if (page)
-		return page;
-
-	/*
-	 * Otherwise batch allocate pages to amortize cost of set_pages_wc.
-	 *
-	 * We have to be careful as page allocation may trigger the shrinker
-	 * (via direct reclaim) which will fill up the WC stash underneath us.
-	 * So we add our WB pages into a temporary pvec on the stack and merge
-	 * them into the WC stash after all the allocations are complete.
-	 */
-	pagevec_init(&stack);
-	do {
-		struct page *page;
-
-		page = alloc_page(gfp);
-		if (unlikely(!page))
-			break;
-
-		stack.pages[stack.nr++] = page;
-	} while (pagevec_space(&stack));
-
-	if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
-		page = stack.pages[--stack.nr];
-
-		/* Merge spare WC pages to the global stash */
-		if (stack.nr)
-			stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
-
-		/* Push any surplus WC pages onto the local VM stash */
-		if (stack.nr)
-			stash_push_pagevec(&vm->free_pages, &stack);
-	}
-
-	/* Return unwanted leftovers */
-	if (unlikely(stack.nr)) {
-		WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
-		__pagevec_release(&stack);
-	}
-
-	return page;
-}
-
-static void vm_free_pages_release(struct i915_address_space *vm,
-				  bool immediate)
-{
-	struct pagevec *pvec = &vm->free_pages.pvec;
-	struct pagevec stack;
-
-	lockdep_assert_held(&vm->free_pages.lock);
-	GEM_BUG_ON(!pagevec_count(pvec));
-
-	if (vm->pt_kmap_wc) {
-		/*
-		 * When we use WC, first fill up the global stash and then
-		 * only if full immediately free the overflow.
-		 */
-		stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
-
-		/*
-		 * As we have made some room in the VM's free_pages,
-		 * we can wait for it to fill again. Unless we are
-		 * inside i915_address_space_fini() and must
-		 * immediately release the pages!
-		 */
-		if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
-			return;
-
-		/*
-		 * We have to drop the lock to allow ourselves to sleep,
-		 * so take a copy of the pvec and clear the stash for
-		 * others to use it as we sleep.
-		 */
-		stack = *pvec;
-		pagevec_reinit(pvec);
-		spin_unlock(&vm->free_pages.lock);
-
-		pvec = &stack;
-		set_pages_array_wb(pvec->pages, pvec->nr);
-
-		spin_lock(&vm->free_pages.lock);
-	}
-
-	__pagevec_release(pvec);
-}
-
-static void vm_free_page(struct i915_address_space *vm, struct page *page)
-{
-	/*
-	 * On !llc, we need to change the pages back to WB. We only do so
-	 * in bulk, so we rarely need to change the page attributes here,
-	 * but doing so requires a stop_machine() from deep inside arch/x86/mm.
-	 * To make detection of the possible sleep more likely, use an
-	 * unconditional might_sleep() for everybody.
-	 */
-	might_sleep();
-	spin_lock(&vm->free_pages.lock);
-	while (!pagevec_space(&vm->free_pages.pvec))
-		vm_free_pages_release(vm, false);
-	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
-	pagevec_add(&vm->free_pages.pvec, page);
-	spin_unlock(&vm->free_pages.lock);
-}
-
-static void i915_address_space_fini(struct i915_address_space *vm)
-{
-	spin_lock(&vm->free_pages.lock);
-	if (pagevec_count(&vm->free_pages.pvec))
-		vm_free_pages_release(vm, true);
-	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
-	spin_unlock(&vm->free_pages.lock);
-
-	drm_mm_takedown(&vm->mm);
-
-	mutex_destroy(&vm->mutex);
-}
-
-static void ppgtt_destroy_vma(struct i915_address_space *vm)
-{
-	struct list_head *phases[] = {
-		&vm->bound_list,
-		&vm->unbound_list,
-		NULL,
-	}, **phase;
-
-	mutex_lock(&vm->i915->drm.struct_mutex);
-	for (phase = phases; *phase; phase++) {
-		struct i915_vma *vma, *vn;
-
-		list_for_each_entry_safe(vma, vn, *phase, vm_link)
-			i915_vma_destroy(vma);
-	}
-	mutex_unlock(&vm->i915->drm.struct_mutex);
-}
-
-static void __i915_vm_release(struct work_struct *work)
-{
-	struct i915_address_space *vm =
-		container_of(work, struct i915_address_space, rcu.work);
-
-	ppgtt_destroy_vma(vm);
-
-	GEM_BUG_ON(!list_empty(&vm->bound_list));
-	GEM_BUG_ON(!list_empty(&vm->unbound_list));
-
-	vm->cleanup(vm);
-	i915_address_space_fini(vm);
-
-	kfree(vm);
-}
-
-void i915_vm_release(struct kref *kref)
-{
-	struct i915_address_space *vm =
-		container_of(kref, struct i915_address_space, ref);
-
-	GEM_BUG_ON(i915_is_ggtt(vm));
-	trace_i915_ppgtt_release(vm);
-
-	vm->closed = true;
-	queue_rcu_work(vm->i915->wq, &vm->rcu);
-}
-
-static void i915_address_space_init(struct i915_address_space *vm, int subclass)
-{
-	kref_init(&vm->ref);
-	INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
-
-	/*
-	 * The vm->mutex must be reclaim safe (for use in the shrinker).
-	 * Do a dummy acquire now under fs_reclaim so that any allocation
-	 * attempt holding the lock is immediately reported by lockdep.
-	 */
-	mutex_init(&vm->mutex);
-	lockdep_set_subclass(&vm->mutex, subclass);
-	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
-
-	GEM_BUG_ON(!vm->total);
-	drm_mm_init(&vm->mm, 0, vm->total);
-	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
-
-	stash_init(&vm->free_pages);
-
-	INIT_LIST_HEAD(&vm->unbound_list);
-	INIT_LIST_HEAD(&vm->bound_list);
-}
-
-static int __setup_page_dma(struct i915_address_space *vm,
-			    struct i915_page_dma *p,
-			    gfp_t gfp)
-{
-	p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
-	if (unlikely(!p->page))
-		return -ENOMEM;
-
-	p->daddr = dma_map_page_attrs(vm->dma,
-				      p->page, 0, PAGE_SIZE,
-				      PCI_DMA_BIDIRECTIONAL,
-				      DMA_ATTR_SKIP_CPU_SYNC |
-				      DMA_ATTR_NO_WARN);
-	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
-		vm_free_page(vm, p->page);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static int setup_page_dma(struct i915_address_space *vm,
-			  struct i915_page_dma *p)
-{
-	return __setup_page_dma(vm, p, __GFP_HIGHMEM);
-}
-
-static void cleanup_page_dma(struct i915_address_space *vm,
-			     struct i915_page_dma *p)
-{
-	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	vm_free_page(vm, p->page);
-}
-
-#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
-
-static void
-fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
-{
-	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
-}
-
-#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
-#define fill32_px(px, v) do {						\
-	u64 v__ = lower_32_bits(v);					\
-	fill_px((px), v__ << 32 | v__);					\
-} while (0)
-
-static int
-setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
-{
-	unsigned long size;
-
-	/*
-	 * In order to utilize 64K pages for an object with a size < 2M, we will
-	 * need to support a 64K scratch page, given that every 16th entry for a
-	 * page-table operating in 64K mode must point to a properly aligned 64K
-	 * region, including any PTEs which happen to point to scratch.
-	 *
-	 * This is only relevant for the 48b PPGTT where we support
-	 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
-	 * scratch (read-only) between all vm, we create one 64k scratch page
-	 * for all.
-	 */
-	size = I915_GTT_PAGE_SIZE_4K;
-	if (i915_vm_is_4lvl(vm) &&
-	    HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
-		size = I915_GTT_PAGE_SIZE_64K;
-		gfp |= __GFP_NOWARN;
-	}
-	gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
-
-	do {
-		unsigned int order = get_order(size);
-		struct page *page;
-		dma_addr_t addr;
-
-		page = alloc_pages(gfp, order);
-		if (unlikely(!page))
-			goto skip;
-
-		addr = dma_map_page_attrs(vm->dma,
-					  page, 0, size,
-					  PCI_DMA_BIDIRECTIONAL,
-					  DMA_ATTR_SKIP_CPU_SYNC |
-					  DMA_ATTR_NO_WARN);
-		if (unlikely(dma_mapping_error(vm->dma, addr)))
-			goto free_page;
-
-		if (unlikely(!IS_ALIGNED(addr, size)))
-			goto unmap_page;
-
-		vm->scratch[0].base.page = page;
-		vm->scratch[0].base.daddr = addr;
-		vm->scratch_order = order;
-		return 0;
-
-unmap_page:
-		dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
-free_page:
-		__free_pages(page, order);
-skip:
-		if (size == I915_GTT_PAGE_SIZE_4K)
-			return -ENOMEM;
-
-		size = I915_GTT_PAGE_SIZE_4K;
-		gfp &= ~__GFP_NOWARN;
-	} while (1);
-}
-
-static void cleanup_scratch_page(struct i915_address_space *vm)
-{
-	struct i915_page_dma *p = px_base(&vm->scratch[0]);
-	unsigned int order = vm->scratch_order;
-
-	dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
-		       PCI_DMA_BIDIRECTIONAL);
-	__free_pages(p->page, order);
-}
-
-static void free_scratch(struct i915_address_space *vm)
-{
-	int i;
-
-	if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
-		return;
-
-	for (i = 1; i <= vm->top; i++) {
-		if (!px_dma(&vm->scratch[i]))
-			break;
-		cleanup_page_dma(vm, px_base(&vm->scratch[i]));
-	}
-
-	cleanup_scratch_page(vm);
-}
-
-static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
-{
-	struct i915_page_table *pt;
-
-	pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
-	if (unlikely(!pt))
-		return ERR_PTR(-ENOMEM);
-
-	if (unlikely(setup_page_dma(vm, &pt->base))) {
-		kfree(pt);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	atomic_set(&pt->used, 0);
-	return pt;
-}
-
-static struct i915_page_directory *__alloc_pd(size_t sz)
-{
-	struct i915_page_directory *pd;
-
-	pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
-	if (unlikely(!pd))
-		return NULL;
-
-	spin_lock_init(&pd->lock);
-	return pd;
-}
-
-static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
-{
-	struct i915_page_directory *pd;
-
-	pd = __alloc_pd(sizeof(*pd));
-	if (unlikely(!pd))
-		return ERR_PTR(-ENOMEM);
-
-	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
-		kfree(pd);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	return pd;
-}
-
-static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
-{
-	cleanup_page_dma(vm, pd);
-	kfree(pd);
-}
-
-#define free_px(vm, px) free_pd(vm, px_base(px))
-
-static inline void
-write_dma_entry(struct i915_page_dma * const pdma,
-		const unsigned short idx,
-		const u64 encoded_entry)
-{
-	u64 * const vaddr = kmap_atomic(pdma->page);
-
-	vaddr[idx] = encoded_entry;
-	kunmap_atomic(vaddr);
-}
-
-static inline void
-__set_pd_entry(struct i915_page_directory * const pd,
-	       const unsigned short idx,
-	       struct i915_page_dma * const to,
-	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
-{
-	/* Each thread pre-pins the pd, and we may have a thread per pde. */
-	GEM_BUG_ON(atomic_read(px_used(pd)) > 2 * ARRAY_SIZE(pd->entry));
-
-	atomic_inc(px_used(pd));
-	pd->entry[idx] = to;
-	write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
-}
-
-#define set_pd_entry(pd, idx, to) \
-	__set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
-
-static inline void
-clear_pd_entry(struct i915_page_directory * const pd,
-	       const unsigned short idx,
-	       const struct i915_page_scratch * const scratch)
-{
-	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
-
-	write_dma_entry(px_base(pd), idx, scratch->encode);
-	pd->entry[idx] = NULL;
-	atomic_dec(px_used(pd));
-}
-
-static bool
-release_pd_entry(struct i915_page_directory * const pd,
-		 const unsigned short idx,
-		 struct i915_page_table * const pt,
-		 const struct i915_page_scratch * const scratch)
-{
-	bool free = false;
-
-	if (atomic_add_unless(&pt->used, -1, 1))
-		return false;
-
-	spin_lock(&pd->lock);
-	if (atomic_dec_and_test(&pt->used)) {
-		clear_pd_entry(pd, idx, scratch);
-		free = true;
-	}
-	spin_unlock(&pd->lock);
-
-	return free;
-}
-
-/*
- * PDE TLBs are a pain to invalidate on GEN8+. When we modify
- * the page table structures, we mark them dirty so that
- * context switching/execlist queuing code takes extra steps
- * to ensure that tlbs are flushed.
- */
-static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt)
-{
-	ppgtt->pd_dirty_engines = ALL_ENGINES;
-}
-
-static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
-{
-	struct drm_i915_private *dev_priv = ppgtt->vm.i915;
-	enum vgt_g2v_type msg;
-	int i;
-
-	if (create)
-		atomic_inc(px_used(ppgtt->pd)); /* never remove */
-	else
-		atomic_dec(px_used(ppgtt->pd));
-
-	mutex_lock(&dev_priv->vgpu.lock);
-
-	if (i915_vm_is_4lvl(&ppgtt->vm)) {
-		const u64 daddr = px_dma(ppgtt->pd);
-
-		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
-		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
-
-		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
-				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
-	} else {
-		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
-			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
-
-			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
-			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
-		}
-
-		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
-				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
-	}
-
-	/* g2v_notify atomically (via hv trap) consumes the message packet. */
-	I915_WRITE(vgtif_reg(g2v_notify), msg);
-
-	mutex_unlock(&dev_priv->vgpu.lock);
-}
-
-/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
-#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
-#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
-#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
-#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
-#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
-#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
-#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
-
-static inline unsigned int
-gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
-{
-	const int shift = gen8_pd_shift(lvl);
-	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
-
-	GEM_BUG_ON(start >= end);
-	end += ~mask >> gen8_pd_shift(1);
-
-	*idx = i915_pde_index(start, shift);
-	if ((start ^ end) & mask)
-		return GEN8_PDES - *idx;
-	else
-		return i915_pde_index(end, shift) - *idx;
-}
-
-static inline bool gen8_pd_contains(u64 start, u64 end, int lvl)
-{
-	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
-
-	GEM_BUG_ON(start >= end);
-	return (start ^ end) & mask && (start & ~mask) == 0;
-}
-
-static inline unsigned int gen8_pt_count(u64 start, u64 end)
-{
-	GEM_BUG_ON(start >= end);
-	if ((start ^ end) >> gen8_pd_shift(1))
-		return GEN8_PDES - (start & (GEN8_PDES - 1));
-	else
-		return end - start;
-}
-
-static inline unsigned int gen8_pd_top_count(const struct i915_address_space *vm)
-{
-	unsigned int shift = __gen8_pte_shift(vm->top);
-	return (vm->total + (1ull << shift) - 1) >> shift;
-}
-
-static inline struct i915_page_directory *
-gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
-{
-	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
-
-	if (vm->top == 2)
-		return ppgtt->pd;
-	else
-		return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
-}
-
-static inline struct i915_page_directory *
-gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
-{
-	return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
-}
-
-static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
-				 struct i915_page_directory *pd,
-				 int count, int lvl)
-{
-	if (lvl) {
-		void **pde = pd->entry;
-
-		do {
-			if (!*pde)
-				continue;
-
-			__gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
-		} while (pde++, --count);
-	}
-
-	free_px(vm, pd);
-}
-
-static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
-{
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
-	if (intel_vgpu_active(vm->i915))
-		gen8_ppgtt_notify_vgt(ppgtt, false);
-
-	__gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
-	free_scratch(vm);
-}
-
-static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
-			      struct i915_page_directory * const pd,
-			      u64 start, const u64 end, int lvl)
-{
-	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
-	unsigned int idx, len;
-
-	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
-
-	len = gen8_pd_range(start, end, lvl--, &idx);
-	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
-	    __func__, vm, lvl + 1, start, end,
-	    idx, len, atomic_read(px_used(pd)));
-	GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
-
-	do {
-		struct i915_page_table *pt = pd->entry[idx];
-
-		if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
-		    gen8_pd_contains(start, end, lvl)) {
-			DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
-			    __func__, vm, lvl + 1, idx, start, end);
-			clear_pd_entry(pd, idx, scratch);
-			__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
-			start += (u64)I915_PDES << gen8_pd_shift(lvl);
-			continue;
-		}
-
-		if (lvl) {
-			start = __gen8_ppgtt_clear(vm, as_pd(pt),
-						   start, end, lvl);
-		} else {
-			unsigned int count;
-			u64 *vaddr;
-
-			count = gen8_pt_count(start, end);
-			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
-			    __func__, vm, lvl, start, end,
-			    gen8_pd_index(start, 0), count,
-			    atomic_read(&pt->used));
-			GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
-
-			vaddr = kmap_atomic_px(pt);
-			memset64(vaddr + gen8_pd_index(start, 0),
-				 vm->scratch[0].encode,
-				 count);
-			kunmap_atomic(vaddr);
-
-			atomic_sub(count, &pt->used);
-			start += count;
-		}
-
-		if (release_pd_entry(pd, idx, pt, scratch))
-			free_px(vm, pt);
-	} while (idx++, --len);
-
-	return start;
-}
-
-static void gen8_ppgtt_clear(struct i915_address_space *vm,
-			     u64 start, u64 length)
-{
-	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(range_overflows(start, length, vm->total));
-
-	start >>= GEN8_PTE_SHIFT;
-	length >>= GEN8_PTE_SHIFT;
-	GEM_BUG_ON(length == 0);
-
-	__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
-			   start, start + length, vm->top);
-}
-
-static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
-			      struct i915_page_directory * const pd,
-			      u64 * const start, const u64 end, int lvl)
-{
-	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
-	struct i915_page_table *alloc = NULL;
-	unsigned int idx, len;
-	int ret = 0;
-
-	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
-
-	len = gen8_pd_range(*start, end, lvl--, &idx);
-	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
-	    __func__, vm, lvl + 1, *start, end,
-	    idx, len, atomic_read(px_used(pd)));
-	GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
-
-	spin_lock(&pd->lock);
-	GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
-	do {
-		struct i915_page_table *pt = pd->entry[idx];
-
-		if (!pt) {
-			spin_unlock(&pd->lock);
-
-			DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
-			    __func__, vm, lvl + 1, idx);
-
-			pt = fetch_and_zero(&alloc);
-			if (lvl) {
-				if (!pt) {
-					pt = &alloc_pd(vm)->pt;
-					if (IS_ERR(pt)) {
-						ret = PTR_ERR(pt);
-						goto out;
-					}
-				}
-
-				fill_px(pt, vm->scratch[lvl].encode);
-			} else {
-				if (!pt) {
-					pt = alloc_pt(vm);
-					if (IS_ERR(pt)) {
-						ret = PTR_ERR(pt);
-						goto out;
-					}
-				}
-
-				if (intel_vgpu_active(vm->i915) ||
-				    gen8_pt_count(*start, end) < I915_PDES)
-					fill_px(pt, vm->scratch[lvl].encode);
-			}
-
-			spin_lock(&pd->lock);
-			if (likely(!pd->entry[idx]))
-				set_pd_entry(pd, idx, pt);
-			else
-				alloc = pt, pt = pd->entry[idx];
-		}
-
-		if (lvl) {
-			atomic_inc(&pt->used);
-			spin_unlock(&pd->lock);
-
-			ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
-						 start, end, lvl);
-			if (unlikely(ret)) {
-				if (release_pd_entry(pd, idx, pt, scratch))
-					free_px(vm, pt);
-				goto out;
-			}
-
-			spin_lock(&pd->lock);
-			atomic_dec(&pt->used);
-			GEM_BUG_ON(!atomic_read(&pt->used));
-		} else {
-			unsigned int count = gen8_pt_count(*start, end);
-
-			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
-			    __func__, vm, lvl, *start, end,
-			    gen8_pd_index(*start, 0), count,
-			    atomic_read(&pt->used));
-
-			atomic_add(count, &pt->used);
-			/* All other pdes may be simultaneously removed */
-			GEM_BUG_ON(atomic_read(&pt->used) > 2 * I915_PDES);
-			*start += count;
-		}
-	} while (idx++, --len);
-	spin_unlock(&pd->lock);
-out:
-	if (alloc)
-		free_px(vm, alloc);
-	return ret;
-}
-
-static int gen8_ppgtt_alloc(struct i915_address_space *vm,
-			    u64 start, u64 length)
-{
-	u64 from;
-	int err;
-
-	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(range_overflows(start, length, vm->total));
-
-	start >>= GEN8_PTE_SHIFT;
-	length >>= GEN8_PTE_SHIFT;
-	GEM_BUG_ON(length == 0);
-	from = start;
-
-	err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
-				 &start, start + length, vm->top);
-	if (unlikely(err && from != start))
-		__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
-				   from, start, vm->top);
-
-	return err;
-}
-
-static inline struct sgt_dma {
-	struct scatterlist *sg;
-	dma_addr_t dma, max;
-} sgt_dma(struct i915_vma *vma) {
-	struct scatterlist *sg = vma->pages->sgl;
-	dma_addr_t addr = sg_dma_address(sg);
-	return (struct sgt_dma) { sg, addr, addr + sg->length };
-}
-
-static __always_inline u64
-gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
-		      struct i915_page_directory *pdp,
-		      struct sgt_dma *iter,
-		      u64 idx,
-		      enum i915_cache_level cache_level,
-		      u32 flags)
-{
-	struct i915_page_directory *pd;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
-	gen8_pte_t *vaddr;
-
-	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
-	vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
-	do {
-		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
-
-		iter->dma += I915_GTT_PAGE_SIZE;
-		if (iter->dma >= iter->max) {
-			iter->sg = __sg_next(iter->sg);
-			if (!iter->sg) {
-				idx = 0;
-				break;
-			}
-
-			iter->dma = sg_dma_address(iter->sg);
-			iter->max = iter->dma + iter->sg->length;
-		}
-
-		if (gen8_pd_index(++idx, 0) == 0) {
-			if (gen8_pd_index(idx, 1) == 0) {
-				/* Limited by sg length for 3lvl */
-				if (gen8_pd_index(idx, 2) == 0)
-					break;
-
-				pd = pdp->entry[gen8_pd_index(idx, 2)];
-			}
-
-			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
-		}
-	} while (1);
-	kunmap_atomic(vaddr);
-
-	return idx;
-}
-
-static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
-				   struct sgt_dma *iter,
-				   enum i915_cache_level cache_level,
-				   u32 flags)
-{
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
-	u64 start = vma->node.start;
-	dma_addr_t rem = iter->sg->length;
-
-	GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
-
-	do {
-		struct i915_page_directory * const pdp =
-			gen8_pdp_for_page_address(vma->vm, start);
-		struct i915_page_directory * const pd =
-			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
-		gen8_pte_t encode = pte_encode;
-		unsigned int maybe_64K = -1;
-		unsigned int page_size;
-		gen8_pte_t *vaddr;
-		u16 index;
-
-		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
-		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
-		    rem >= I915_GTT_PAGE_SIZE_2M &&
-		    !__gen8_pte_index(start, 0)) {
-			index = __gen8_pte_index(start, 1);
-			encode |= GEN8_PDE_PS_2M;
-			page_size = I915_GTT_PAGE_SIZE_2M;
-
-			vaddr = kmap_atomic_px(pd);
-		} else {
-			struct i915_page_table *pt =
-				i915_pt_entry(pd, __gen8_pte_index(start, 1));
-
-			index = __gen8_pte_index(start, 0);
-			page_size = I915_GTT_PAGE_SIZE;
-
-			if (!index &&
-			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
-			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
-			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
-				maybe_64K = __gen8_pte_index(start, 1);
-
-			vaddr = kmap_atomic_px(pt);
-		}
-
-		do {
-			GEM_BUG_ON(iter->sg->length < page_size);
-			vaddr[index++] = encode | iter->dma;
-
-			start += page_size;
-			iter->dma += page_size;
-			rem -= page_size;
-			if (iter->dma >= iter->max) {
-				iter->sg = __sg_next(iter->sg);
-				if (!iter->sg)
-					break;
-
-				rem = iter->sg->length;
-				iter->dma = sg_dma_address(iter->sg);
-				iter->max = iter->dma + rem;
-
-				if (maybe_64K != -1 && index < I915_PDES &&
-				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
-				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-				       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
-					maybe_64K = -1;
-
-				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
-					break;
-			}
-		} while (rem >= page_size && index < I915_PDES);
-
-		kunmap_atomic(vaddr);
-
-		/*
-		 * Is it safe to mark the 2M block as 64K? -- Either we have
-		 * filled whole page-table with 64K entries, or filled part of
-		 * it and have reached the end of the sg table and we have
-		 * enough padding.
-		 */
-		if (maybe_64K != -1 &&
-		    (index == I915_PDES ||
-		     (i915_vm_has_scratch_64K(vma->vm) &&
-		      !iter->sg && IS_ALIGNED(vma->node.start +
-					      vma->node.size,
-					      I915_GTT_PAGE_SIZE_2M)))) {
-			vaddr = kmap_atomic_px(pd);
-			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
-			kunmap_atomic(vaddr);
-			page_size = I915_GTT_PAGE_SIZE_64K;
-
-			/*
-			 * We write all 4K page entries, even when using 64K
-			 * pages. In order to verify that the HW isn't cheating
-			 * by using the 4K PTE instead of the 64K PTE, we want
-			 * to remove all the surplus entries. If the HW skipped
-			 * the 64K PTE, it will read/write into the scratch page
-			 * instead - which we detect as missing results during
-			 * selftests.
-			 */
-			if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
-				u16 i;
-
-				encode = vma->vm->scratch[0].encode;
-				vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
-
-				for (i = 1; i < index; i += 16)
-					memset64(vaddr + i, encode, 15);
-
-				kunmap_atomic(vaddr);
-			}
-		}
-
-		vma->page_sizes.gtt |= page_size;
-	} while (iter->sg);
-}
-
-static void gen8_ppgtt_insert(struct i915_address_space *vm,
-			      struct i915_vma *vma,
-			      enum i915_cache_level cache_level,
-			      u32 flags)
-{
-	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
-	struct sgt_dma iter = sgt_dma(vma);
-
-	if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
-		gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
-	} else  {
-		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
-
-		do {
-			struct i915_page_directory * const pdp =
-				gen8_pdp_for_page_index(vm, idx);
-
-			idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
-						    cache_level, flags);
-		} while (idx);
-
-		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
-	}
-}
-
-static int gen8_init_scratch(struct i915_address_space *vm)
-{
-	int ret;
-	int i;
-
-	/*
-	 * If everybody agrees to not to write into the scratch page,
-	 * we can reuse it for all vm, keeping contexts and processes separate.
-	 */
-	if (vm->has_read_only &&
-	    vm->i915->kernel_context &&
-	    vm->i915->kernel_context->vm) {
-		struct i915_address_space *clone = vm->i915->kernel_context->vm;
-
-		GEM_BUG_ON(!clone->has_read_only);
-
-		vm->scratch_order = clone->scratch_order;
-		memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
-		px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
-		return 0;
-	}
-
-	ret = setup_scratch_page(vm, __GFP_HIGHMEM);
-	if (ret)
-		return ret;
-
-	vm->scratch[0].encode =
-		gen8_pte_encode(px_dma(&vm->scratch[0]),
-				I915_CACHE_LLC, vm->has_read_only);
-
-	for (i = 1; i <= vm->top; i++) {
-		if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
-			goto free_scratch;
-
-		fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
-		vm->scratch[i].encode =
-			gen8_pde_encode(px_dma(&vm->scratch[i]),
-					I915_CACHE_LLC);
-	}
-
-	return 0;
-
-free_scratch:
-	free_scratch(vm);
-	return -ENOMEM;
-}
-
-static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
-{
-	struct i915_address_space *vm = &ppgtt->vm;
-	struct i915_page_directory *pd = ppgtt->pd;
-	unsigned int idx;
-
-	GEM_BUG_ON(vm->top != 2);
-	GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
-
-	for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
-		struct i915_page_directory *pde;
-
-		pde = alloc_pd(vm);
-		if (IS_ERR(pde))
-			return PTR_ERR(pde);
-
-		fill_px(pde, vm->scratch[1].encode);
-		set_pd_entry(pd, idx, pde);
-		atomic_inc(px_used(pde)); /* keep pinned */
-	}
-
-	return 0;
-}
-
-static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-
-	ppgtt->vm.gt = gt;
-	ppgtt->vm.i915 = i915;
-	ppgtt->vm.dma = &i915->drm.pdev->dev;
-	ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
-
-	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
-
-	ppgtt->vm.vma_ops.bind_vma    = ppgtt_bind_vma;
-	ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
-	ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
-	ppgtt->vm.vma_ops.clear_pages = clear_pages;
-}
-
-static struct i915_page_directory *
-gen8_alloc_top_pd(struct i915_address_space *vm)
-{
-	const unsigned int count = gen8_pd_top_count(vm);
-	struct i915_page_directory *pd;
-
-	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
-
-	pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
-	if (unlikely(!pd))
-		return ERR_PTR(-ENOMEM);
-
-	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
-		kfree(pd);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
-	atomic_inc(px_used(pd)); /* mark as pinned */
-	return pd;
-}
-
-/*
- * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
- * with a net effect resembling a 2-level page table in normal x86 terms. Each
- * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
- * space.
- *
- */
-static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
-{
-	struct i915_ppgtt *ppgtt;
-	int err;
-
-	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-	if (!ppgtt)
-		return ERR_PTR(-ENOMEM);
-
-	ppgtt_init(ppgtt, &i915->gt);
-	ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
-
-	/*
-	 * From bdw, there is hw support for read-only pages in the PPGTT.
-	 *
-	 * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
-	 * for now.
-	 */
-	ppgtt->vm.has_read_only = INTEL_GEN(i915) != 11;
-
-	/* There are only few exceptions for gen >=6. chv and bxt.
-	 * And we are not sure about the latter so play safe for now.
-	 */
-	if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915))
-		ppgtt->vm.pt_kmap_wc = true;
-
-	err = gen8_init_scratch(&ppgtt->vm);
-	if (err)
-		goto err_free;
-
-	ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm);
-	if (IS_ERR(ppgtt->pd)) {
-		err = PTR_ERR(ppgtt->pd);
-		goto err_free_scratch;
-	}
-
-	if (!i915_vm_is_4lvl(&ppgtt->vm)) {
-		if (intel_vgpu_active(i915)) {
-			err = gen8_preallocate_top_level_pdp(ppgtt);
-			if (err)
-				goto err_free_pd;
-		}
-	}
-
-	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
-	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
-	ppgtt->vm.clear_range = gen8_ppgtt_clear;
-
-	if (intel_vgpu_active(i915))
-		gen8_ppgtt_notify_vgt(ppgtt, true);
-
-	ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
-
-	return ppgtt;
-
-err_free_pd:
-	__gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
-			     gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top);
-err_free_scratch:
-	free_scratch(&ppgtt->vm);
-err_free:
-	kfree(ppgtt);
-	return ERR_PTR(err);
-}
-
-/* Write pde (index) from the page directory @pd to the page table @pt */
-static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
-				  const unsigned int pde,
-				  const struct i915_page_table *pt)
-{
-	/* Caller needs to make sure the write completes if necessary */
-	iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
-		  ppgtt->pd_addr + pde);
-}
-
-static void gen7_ppgtt_enable(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	struct intel_uncore *uncore = gt->uncore;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 ecochk;
-
-	intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
-
-	ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
-	if (IS_HASWELL(i915)) {
-		ecochk |= ECOCHK_PPGTT_WB_HSW;
-	} else {
-		ecochk |= ECOCHK_PPGTT_LLC_IVB;
-		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
-	}
-	intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
-
-	for_each_engine(engine, i915, id) {
-		/* GFX_MODE is per-ring on gen7+ */
-		ENGINE_WRITE(engine,
-			     RING_MODE_GEN7,
-			     _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-	}
-}
-
-static void gen6_ppgtt_enable(struct intel_gt *gt)
-{
-	struct intel_uncore *uncore = gt->uncore;
-
-	intel_uncore_rmw(uncore,
-			 GAC_ECO_BITS,
-			 0,
-			 ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
-
-	intel_uncore_rmw(uncore,
-			 GAB_CTL,
-			 0,
-			 GAB_CTL_CONT_AFTER_PAGEFAULT);
-
-	intel_uncore_rmw(uncore,
-			 GAM_ECOCHK,
-			 0,
-			 ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
-
-	if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
-		intel_uncore_write(uncore,
-				   GFX_MODE,
-				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-}
-
-/* PPGTT support for Sandybdrige/Gen6 and later */
-static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
-				   u64 start, u64 length)
-{
-	struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
-	const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
-	const gen6_pte_t scratch_pte = vm->scratch[0].encode;
-	unsigned int pde = first_entry / GEN6_PTES;
-	unsigned int pte = first_entry % GEN6_PTES;
-	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
-
-	while (num_entries) {
-		struct i915_page_table * const pt =
-			i915_pt_entry(ppgtt->base.pd, pde++);
-		const unsigned int count = min(num_entries, GEN6_PTES - pte);
-		gen6_pte_t *vaddr;
-
-		GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
-
-		num_entries -= count;
-
-		GEM_BUG_ON(count > atomic_read(&pt->used));
-		if (!atomic_sub_return(count, &pt->used))
-			ppgtt->scan_for_unused_pt = true;
-
-		/*
-		 * Note that the hw doesn't support removing PDE on the fly
-		 * (they are cached inside the context with no means to
-		 * invalidate the cache), so we can only reset the PTE
-		 * entries back to scratch.
-		 */
-
-		vaddr = kmap_atomic_px(pt);
-		memset32(vaddr + pte, scratch_pte, count);
-		kunmap_atomic(vaddr);
-
-		pte = 0;
-	}
-}
-
-static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
-				      struct i915_vma *vma,
-				      enum i915_cache_level cache_level,
-				      u32 flags)
-{
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct i915_page_directory * const pd = ppgtt->pd;
-	unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
-	unsigned act_pt = first_entry / GEN6_PTES;
-	unsigned act_pte = first_entry % GEN6_PTES;
-	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
-	struct sgt_dma iter = sgt_dma(vma);
-	gen6_pte_t *vaddr;
-
-	GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
-
-	vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
-	do {
-		vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
-
-		iter.dma += I915_GTT_PAGE_SIZE;
-		if (iter.dma == iter.max) {
-			iter.sg = __sg_next(iter.sg);
-			if (!iter.sg)
-				break;
-
-			iter.dma = sg_dma_address(iter.sg);
-			iter.max = iter.dma + iter.sg->length;
-		}
-
-		if (++act_pte == GEN6_PTES) {
-			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
-			act_pte = 0;
-		}
-	} while (1);
-	kunmap_atomic(vaddr);
-
-	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
-}
-
-static int gen6_alloc_va_range(struct i915_address_space *vm,
-			       u64 start, u64 length)
-{
-	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
-	struct i915_page_directory * const pd = ppgtt->base.pd;
-	struct i915_page_table *pt, *alloc = NULL;
-	intel_wakeref_t wakeref;
-	u64 from = start;
-	unsigned int pde;
-	bool flush = false;
-	int ret = 0;
-
-	wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
-
-	spin_lock(&pd->lock);
-	gen6_for_each_pde(pt, pd, start, length, pde) {
-		const unsigned int count = gen6_pte_count(start, length);
-
-		if (px_base(pt) == px_base(&vm->scratch[1])) {
-			spin_unlock(&pd->lock);
-
-			pt = fetch_and_zero(&alloc);
-			if (!pt)
-				pt = alloc_pt(vm);
-			if (IS_ERR(pt)) {
-				ret = PTR_ERR(pt);
-				goto unwind_out;
-			}
-
-			fill32_px(pt, vm->scratch[0].encode);
-
-			spin_lock(&pd->lock);
-			if (pd->entry[pde] == &vm->scratch[1]) {
-				pd->entry[pde] = pt;
-				if (i915_vma_is_bound(ppgtt->vma,
-						      I915_VMA_GLOBAL_BIND)) {
-					gen6_write_pde(ppgtt, pde, pt);
-					flush = true;
-				}
-			} else {
-				alloc = pt;
-				pt = pd->entry[pde];
-			}
-		}
-
-		atomic_add(count, &pt->used);
-	}
-	spin_unlock(&pd->lock);
-
-	if (flush) {
-		mark_tlbs_dirty(&ppgtt->base);
-		gen6_ggtt_invalidate(vm->gt->ggtt);
-	}
-
-	goto out;
-
-unwind_out:
-	gen6_ppgtt_clear_range(vm, from, start - from);
-out:
-	if (alloc)
-		free_px(vm, alloc);
-	intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
-	return ret;
-}
-
-static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
-{
-	struct i915_address_space * const vm = &ppgtt->base.vm;
-	struct i915_page_directory * const pd = ppgtt->base.pd;
-	int ret;
-
-	ret = setup_scratch_page(vm, __GFP_HIGHMEM);
-	if (ret)
-		return ret;
-
-	vm->scratch[0].encode =
-		vm->pte_encode(px_dma(&vm->scratch[0]),
-			       I915_CACHE_NONE, PTE_READ_ONLY);
-
-	if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
-		cleanup_scratch_page(vm);
-		return -ENOMEM;
-	}
-
-	fill32_px(&vm->scratch[1], vm->scratch[0].encode);
-	memset_p(pd->entry, &vm->scratch[1], I915_PDES);
-
-	return 0;
-}
-
-static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
-{
-	struct i915_page_directory * const pd = ppgtt->base.pd;
-	struct i915_page_dma * const scratch =
-		px_base(&ppgtt->base.vm.scratch[1]);
-	struct i915_page_table *pt;
-	u32 pde;
-
-	gen6_for_all_pdes(pt, pd, pde)
-		if (px_base(pt) != scratch)
-			free_px(&ppgtt->base.vm, pt);
-}
-
-static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
-{
-	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
-	struct drm_i915_private *i915 = vm->i915;
-
-	/* FIXME remove the struct_mutex to bring the locking under control */
-	mutex_lock(&i915->drm.struct_mutex);
-	i915_vma_destroy(ppgtt->vma);
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	gen6_ppgtt_free_pd(ppgtt);
-	free_scratch(vm);
-	kfree(ppgtt->base.pd);
-}
-
-static int pd_vma_set_pages(struct i915_vma *vma)
-{
-	vma->pages = ERR_PTR(-ENODEV);
-	return 0;
-}
-
-static void pd_vma_clear_pages(struct i915_vma *vma)
-{
-	GEM_BUG_ON(!vma->pages);
-
-	vma->pages = NULL;
-}
-
-static int pd_vma_bind(struct i915_vma *vma,
-		       enum i915_cache_level cache_level,
-		       u32 unused)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
-	struct gen6_ppgtt *ppgtt = vma->private;
-	u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
-	struct i915_page_table *pt;
-	unsigned int pde;
-
-	px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
-	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
-
-	gen6_for_all_pdes(pt, ppgtt->base.pd, pde)
-		gen6_write_pde(ppgtt, pde, pt);
-
-	mark_tlbs_dirty(&ppgtt->base);
-	gen6_ggtt_invalidate(ggtt);
-
-	return 0;
-}
-
-static void pd_vma_unbind(struct i915_vma *vma)
-{
-	struct gen6_ppgtt *ppgtt = vma->private;
-	struct i915_page_directory * const pd = ppgtt->base.pd;
-	struct i915_page_dma * const scratch =
-		px_base(&ppgtt->base.vm.scratch[1]);
-	struct i915_page_table *pt;
-	unsigned int pde;
-
-	if (!ppgtt->scan_for_unused_pt)
-		return;
-
-	/* Free all no longer used page tables */
-	gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
-		if (px_base(pt) == scratch || atomic_read(&pt->used))
-			continue;
-
-		free_px(&ppgtt->base.vm, pt);
-		pd->entry[pde] = scratch;
-	}
-
-	ppgtt->scan_for_unused_pt = false;
-}
-
-static const struct i915_vma_ops pd_vma_ops = {
-	.set_pages = pd_vma_set_pages,
-	.clear_pages = pd_vma_clear_pages,
-	.bind_vma = pd_vma_bind,
-	.unbind_vma = pd_vma_unbind,
-};
-
-static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
-{
-	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
-	struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
-	GEM_BUG_ON(size > ggtt->vm.total);
-
-	vma = i915_vma_alloc();
-	if (!vma)
-		return ERR_PTR(-ENOMEM);
-
-	i915_active_init(i915, &vma->active, NULL, NULL);
-
-	vma->vm = &ggtt->vm;
-	vma->ops = &pd_vma_ops;
-	vma->private = ppgtt;
-
-	vma->size = size;
-	vma->fence_size = size;
-	vma->flags = I915_VMA_GGTT;
-	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
-
-	INIT_LIST_HEAD(&vma->obj_link);
-	INIT_LIST_HEAD(&vma->closed_link);
-
-	mutex_lock(&vma->vm->mutex);
-	list_add(&vma->vm_link, &vma->vm->unbound_list);
-	mutex_unlock(&vma->vm->mutex);
-
-	return vma;
-}
-
-int gen6_ppgtt_pin(struct i915_ppgtt *base)
-{
-	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
-	int err;
-
-	GEM_BUG_ON(ppgtt->base.vm.closed);
-
-	/*
-	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
-	 * which will be pinned into every active context.
-	 * (When vma->pin_count becomes atomic, I expect we will naturally
-	 * need a larger, unpacked, type and kill this redundancy.)
-	 */
-	if (ppgtt->pin_count++)
-		return 0;
-
-	/*
-	 * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
-	 * allocator works in address space sizes, so it's multiplied by page
-	 * size. We allocate at the top of the GTT to avoid fragmentation.
-	 */
-	err = i915_vma_pin(ppgtt->vma,
-			   0, GEN6_PD_ALIGN,
-			   PIN_GLOBAL | PIN_HIGH);
-	if (err)
-		goto unpin;
-
-	return 0;
-
-unpin:
-	ppgtt->pin_count = 0;
-	return err;
-}
-
-void gen6_ppgtt_unpin(struct i915_ppgtt *base)
-{
-	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
-
-	GEM_BUG_ON(!ppgtt->pin_count);
-	if (--ppgtt->pin_count)
-		return;
-
-	i915_vma_unpin(ppgtt->vma);
-}
-
-void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
-{
-	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
-
-	if (!ppgtt->pin_count)
-		return;
-
-	ppgtt->pin_count = 0;
-	i915_vma_unpin(ppgtt->vma);
-}
-
-static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
-{
-	struct i915_ggtt * const ggtt = &i915->ggtt;
-	struct gen6_ppgtt *ppgtt;
-	int err;
-
-	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-	if (!ppgtt)
-		return ERR_PTR(-ENOMEM);
-
-	ppgtt_init(&ppgtt->base, &i915->gt);
-	ppgtt->base.vm.top = 1;
-
-	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
-	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
-	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
-	ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
-
-	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
-
-	ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
-	if (!ppgtt->base.pd) {
-		err = -ENOMEM;
-		goto err_free;
-	}
-
-	err = gen6_ppgtt_init_scratch(ppgtt);
-	if (err)
-		goto err_pd;
-
-	ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
-	if (IS_ERR(ppgtt->vma)) {
-		err = PTR_ERR(ppgtt->vma);
-		goto err_scratch;
-	}
-
-	return &ppgtt->base;
-
-err_scratch:
-	free_scratch(&ppgtt->base.vm);
-err_pd:
-	kfree(ppgtt->base.pd);
-err_free:
-	kfree(ppgtt);
-	return ERR_PTR(err);
-}
-
-static void gtt_write_workarounds(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	struct intel_uncore *uncore = gt->uncore;
-
-	/* This function is for gtt related workarounds. This function is
-	 * called on driver load and after a GPU reset, so you can place
-	 * workarounds here even if they get overwritten by GPU reset.
-	 */
-	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
-	if (IS_BROADWELL(i915))
-		intel_uncore_write(uncore,
-				   GEN8_L3_LRA_1_GPGPU,
-				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
-	else if (IS_CHERRYVIEW(i915))
-		intel_uncore_write(uncore,
-				   GEN8_L3_LRA_1_GPGPU,
-				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
-	else if (IS_GEN9_LP(i915))
-		intel_uncore_write(uncore,
-				   GEN8_L3_LRA_1_GPGPU,
-				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
-	else if (INTEL_GEN(i915) >= 9)
-		intel_uncore_write(uncore,
-				   GEN8_L3_LRA_1_GPGPU,
-				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
-
-	/*
-	 * To support 64K PTEs we need to first enable the use of the
-	 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
-	 * mmio, otherwise the page-walker will simply ignore the IPS bit. This
-	 * shouldn't be needed after GEN10.
-	 *
-	 * 64K pages were first introduced from BDW+, although technically they
-	 * only *work* from gen9+. For pre-BDW we instead have the option for
-	 * 32K pages, but we don't currently have any support for it in our
-	 * driver.
-	 */
-	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
-	    INTEL_GEN(i915) <= 10)
-		intel_uncore_rmw(uncore,
-				 GEN8_GAMW_ECO_DEV_RW_IA,
-				 0,
-				 GAMW_ECO_ENABLE_64K_IPS_FIELD);
-
-	if (IS_GEN_RANGE(i915, 8, 11)) {
-		bool can_use_gtt_cache = true;
-
-		/*
-		 * According to the BSpec if we use 2M/1G pages then we also
-		 * need to disable the GTT cache. At least on BDW we can see
-		 * visual corruption when using 2M pages, and not disabling the
-		 * GTT cache.
-		 */
-		if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
-			can_use_gtt_cache = false;
-
-		/* WaGttCachingOffByDefault */
-		intel_uncore_write(uncore,
-				   HSW_GTT_CACHE_EN,
-				   can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
-		WARN_ON_ONCE(can_use_gtt_cache &&
-			     intel_uncore_read(uncore,
-					       HSW_GTT_CACHE_EN) == 0);
-	}
-}
-
-int i915_ppgtt_init_hw(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-
-	gtt_write_workarounds(gt);
-
-	if (IS_GEN(i915, 6))
-		gen6_ppgtt_enable(gt);
-	else if (IS_GEN(i915, 7))
-		gen7_ppgtt_enable(gt);
-
-	return 0;
-}
-
-static struct i915_ppgtt *
-__ppgtt_create(struct drm_i915_private *i915)
-{
-	if (INTEL_GEN(i915) < 8)
-		return gen6_ppgtt_create(i915);
-	else
-		return gen8_ppgtt_create(i915);
-}
-
-struct i915_ppgtt *
-i915_ppgtt_create(struct drm_i915_private *i915)
-{
-	struct i915_ppgtt *ppgtt;
-
-	ppgtt = __ppgtt_create(i915);
-	if (IS_ERR(ppgtt))
-		return ppgtt;
-
-	trace_i915_ppgtt_create(&ppgtt->vm);
-
-	return ppgtt;
-}
-
-/* Certain Gen5 chipsets require require idling the GPU before
- * unmapping anything from the GTT when VT-d is enabled.
- */
-static bool needs_idle_maps(struct drm_i915_private *dev_priv)
-{
-	/* Query intel_iommu to see if we need the workaround. Presumably that
-	 * was loaded first.
-	 */
-	return IS_GEN(dev_priv, 5) && IS_MOBILE(dev_priv) && intel_vtd_active();
-}
-
-static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-
-	/* Don't bother messing with faults pre GEN6 as we have little
-	 * documentation supporting that it's a good idea.
-	 */
-	if (INTEL_GEN(i915) < 6)
-		return;
-
-	intel_gt_check_and_clear_faults(ggtt->vm.gt);
-
-	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-
-	ggtt->invalidate(ggtt);
-}
-
-void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
-{
-	ggtt_suspend_mappings(&i915->ggtt);
-}
-
 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages)
 {
@@ -2164,364 +52,6 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 	return -ENOSPC;
 }
 
-static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
-{
-	writeq(pte, addr);
-}
-
-static void gen8_ggtt_insert_page(struct i915_address_space *vm,
-				  dma_addr_t addr,
-				  u64 offset,
-				  enum i915_cache_level level,
-				  u32 unused)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	gen8_pte_t __iomem *pte =
-		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
-
-	gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
-
-	ggtt->invalidate(ggtt);
-}
-
-static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
-				     enum i915_cache_level level,
-				     u32 flags)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	struct sgt_iter sgt_iter;
-	gen8_pte_t __iomem *gtt_entries;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
-	dma_addr_t addr;
-
-	/*
-	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
-	 * not to allow the user to override access to a read only page.
-	 */
-
-	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
-	gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
-	for_each_sgt_dma(addr, sgt_iter, vma->pages)
-		gen8_set_pte(gtt_entries++, pte_encode | addr);
-
-	/*
-	 * We want to flush the TLBs only after we're certain all the PTE
-	 * updates have finished.
-	 */
-	ggtt->invalidate(ggtt);
-}
-
-static void gen6_ggtt_insert_page(struct i915_address_space *vm,
-				  dma_addr_t addr,
-				  u64 offset,
-				  enum i915_cache_level level,
-				  u32 flags)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	gen6_pte_t __iomem *pte =
-		(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
-
-	iowrite32(vm->pte_encode(addr, level, flags), pte);
-
-	ggtt->invalidate(ggtt);
-}
-
-/*
- * Binds an object into the global gtt with the specified cache level. The object
- * will be accessible to the GPU via commands whose operands reference offsets
- * within the global GTT as well as accessible by the GPU through the GMADR
- * mapped BAR (dev_priv->mm.gtt->gtt).
- */
-static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
-				     enum i915_cache_level level,
-				     u32 flags)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
-	unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
-	struct sgt_iter iter;
-	dma_addr_t addr;
-	for_each_sgt_dma(addr, iter, vma->pages)
-		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
-
-	/*
-	 * We want to flush the TLBs only after we're certain all the PTE
-	 * updates have finished.
-	 */
-	ggtt->invalidate(ggtt);
-}
-
-static void nop_clear_range(struct i915_address_space *vm,
-			    u64 start, u64 length)
-{
-}
-
-static void gen8_ggtt_clear_range(struct i915_address_space *vm,
-				  u64 start, u64 length)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	unsigned first_entry = start / I915_GTT_PAGE_SIZE;
-	unsigned num_entries = length / I915_GTT_PAGE_SIZE;
-	const gen8_pte_t scratch_pte = vm->scratch[0].encode;
-	gen8_pte_t __iomem *gtt_base =
-		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
-	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-	int i;
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	for (i = 0; i < num_entries; i++)
-		gen8_set_pte(&gtt_base[i], scratch_pte);
-}
-
-static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
-{
-	struct drm_i915_private *dev_priv = vm->i915;
-
-	/*
-	 * Make sure the internal GAM fifo has been cleared of all GTT
-	 * writes before exiting stop_machine(). This guarantees that
-	 * any aperture accesses waiting to start in another process
-	 * cannot back up behind the GTT writes causing a hang.
-	 * The register can be any arbitrary GAM register.
-	 */
-	POSTING_READ(GFX_FLSH_CNTL_GEN6);
-}
-
-struct insert_page {
-	struct i915_address_space *vm;
-	dma_addr_t addr;
-	u64 offset;
-	enum i915_cache_level level;
-};
-
-static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
-{
-	struct insert_page *arg = _arg;
-
-	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
-	bxt_vtd_ggtt_wa(arg->vm);
-
-	return 0;
-}
-
-static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
-					  dma_addr_t addr,
-					  u64 offset,
-					  enum i915_cache_level level,
-					  u32 unused)
-{
-	struct insert_page arg = { vm, addr, offset, level };
-
-	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
-}
-
-struct insert_entries {
-	struct i915_address_space *vm;
-	struct i915_vma *vma;
-	enum i915_cache_level level;
-	u32 flags;
-};
-
-static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
-{
-	struct insert_entries *arg = _arg;
-
-	gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
-	bxt_vtd_ggtt_wa(arg->vm);
-
-	return 0;
-}
-
-static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
-					     struct i915_vma *vma,
-					     enum i915_cache_level level,
-					     u32 flags)
-{
-	struct insert_entries arg = { vm, vma, level, flags };
-
-	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
-}
-
-struct clear_range {
-	struct i915_address_space *vm;
-	u64 start;
-	u64 length;
-};
-
-static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
-{
-	struct clear_range *arg = _arg;
-
-	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
-	bxt_vtd_ggtt_wa(arg->vm);
-
-	return 0;
-}
-
-static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
-					  u64 start,
-					  u64 length)
-{
-	struct clear_range arg = { vm, start, length };
-
-	stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
-}
-
-static void gen6_ggtt_clear_range(struct i915_address_space *vm,
-				  u64 start, u64 length)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	unsigned first_entry = start / I915_GTT_PAGE_SIZE;
-	unsigned num_entries = length / I915_GTT_PAGE_SIZE;
-	gen6_pte_t scratch_pte, __iomem *gtt_base =
-		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
-	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-	int i;
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	scratch_pte = vm->scratch[0].encode;
-	for (i = 0; i < num_entries; i++)
-		iowrite32(scratch_pte, &gtt_base[i]);
-}
-
-static void i915_ggtt_insert_page(struct i915_address_space *vm,
-				  dma_addr_t addr,
-				  u64 offset,
-				  enum i915_cache_level cache_level,
-				  u32 unused)
-{
-	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
-	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
-}
-
-static void i915_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
-				     enum i915_cache_level cache_level,
-				     u32 unused)
-{
-	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
-	intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
-				    flags);
-}
-
-static void i915_ggtt_clear_range(struct i915_address_space *vm,
-				  u64 start, u64 length)
-{
-	intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
-}
-
-static int ggtt_bind_vma(struct i915_vma *vma,
-			 enum i915_cache_level cache_level,
-			 u32 flags)
-{
-	struct drm_i915_private *i915 = vma->vm->i915;
-	struct drm_i915_gem_object *obj = vma->obj;
-	intel_wakeref_t wakeref;
-	u32 pte_flags;
-
-	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
-	pte_flags = 0;
-	if (i915_gem_object_is_readonly(obj))
-		pte_flags |= PTE_READ_ONLY;
-
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-
-	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
-
-	/*
-	 * Without aliasing PPGTT there's no difference between
-	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
-	 * upgrade to both bound if we bind either to avoid double-binding.
-	 */
-	vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
-
-	return 0;
-}
-
-static void ggtt_unbind_vma(struct i915_vma *vma)
-{
-	struct drm_i915_private *i915 = vma->vm->i915;
-	intel_wakeref_t wakeref;
-
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-}
-
-static int aliasing_gtt_bind_vma(struct i915_vma *vma,
-				 enum i915_cache_level cache_level,
-				 u32 flags)
-{
-	struct drm_i915_private *i915 = vma->vm->i915;
-	u32 pte_flags;
-	int ret;
-
-	/* Currently applicable only to VLV */
-	pte_flags = 0;
-	if (i915_gem_object_is_readonly(vma->obj))
-		pte_flags |= PTE_READ_ONLY;
-
-	if (flags & I915_VMA_LOCAL_BIND) {
-		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
-
-		if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
-			ret = alias->vm.allocate_va_range(&alias->vm,
-							  vma->node.start,
-							  vma->size);
-			if (ret)
-				return ret;
-		}
-
-		alias->vm.insert_entries(&alias->vm, vma,
-					 cache_level, pte_flags);
-	}
-
-	if (flags & I915_VMA_GLOBAL_BIND) {
-		intel_wakeref_t wakeref;
-
-		with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-			vma->vm->insert_entries(vma->vm, vma,
-						cache_level, pte_flags);
-		}
-	}
-
-	return 0;
-}
-
-static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
-{
-	struct drm_i915_private *i915 = vma->vm->i915;
-
-	if (vma->flags & I915_VMA_GLOBAL_BIND) {
-		struct i915_address_space *vm = vma->vm;
-		intel_wakeref_t wakeref;
-
-		with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-			vm->clear_range(vm, vma->node.start, vma->size);
-	}
-
-	if (vma->flags & I915_VMA_LOCAL_BIND) {
-		struct i915_address_space *vm =
-			&i915_vm_to_ggtt(vma->vm)->alias->vm;
-
-		vm->clear_range(vm, vma->node.start, vma->size);
-	}
-}
-
 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages)
 {
@@ -2530,7 +60,9 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) {
+		/* XXX This does not prevent more requests being submitted! */
+		if (intel_gt_retire_requests_timeout(ggtt->vm.gt,
+						     -MAX_SCHEDULE_TIMEOUT)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
@@ -2540,1067 +72,6 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 	dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
 }
 
-static int ggtt_set_pages(struct i915_vma *vma)
-{
-	int ret;
-
-	GEM_BUG_ON(vma->pages);
-
-	ret = i915_get_ggtt_vma_pages(vma);
-	if (ret)
-		return ret;
-
-	vma->page_sizes = vma->obj->mm.page_sizes;
-
-	return 0;
-}
-
-static void i915_gtt_color_adjust(const struct drm_mm_node *node,
-				  unsigned long color,
-				  u64 *start,
-				  u64 *end)
-{
-	if (node->allocated && node->color != color)
-		*start += I915_GTT_PAGE_SIZE;
-
-	/* Also leave a space between the unallocated reserved node after the
-	 * GTT and any objects within the GTT, i.e. we use the color adjustment
-	 * to insert a guard page to prevent prefetches crossing over the
-	 * GTT boundary.
-	 */
-	node = list_next_entry(node, node_list);
-	if (node->color != color)
-		*end -= I915_GTT_PAGE_SIZE;
-}
-
-static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
-{
-	struct i915_ppgtt *ppgtt;
-	int err;
-
-	ppgtt = i915_ppgtt_create(ggtt->vm.i915);
-	if (IS_ERR(ppgtt))
-		return PTR_ERR(ppgtt);
-
-	if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
-		err = -ENODEV;
-		goto err_ppgtt;
-	}
-
-	/*
-	 * Note we only pre-allocate as far as the end of the global
-	 * GTT. On 48b / 4-level page-tables, the difference is very,
-	 * very significant! We have to preallocate as GVT/vgpu does
-	 * not like the page directory disappearing.
-	 */
-	err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
-	if (err)
-		goto err_ppgtt;
-
-	ggtt->alias = ppgtt;
-
-	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
-	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
-
-	GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
-	ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
-
-	return 0;
-
-err_ppgtt:
-	i915_vm_put(&ppgtt->vm);
-	return err;
-}
-
-static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-	struct i915_ppgtt *ppgtt;
-
-	mutex_lock(&i915->drm.struct_mutex);
-
-	ppgtt = fetch_and_zero(&ggtt->alias);
-	if (!ppgtt)
-		goto out;
-
-	i915_vm_put(&ppgtt->vm);
-
-	ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
-	ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
-
-out:
-	mutex_unlock(&i915->drm.struct_mutex);
-}
-
-static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
-{
-	u64 size;
-	int ret;
-
-	if (!USES_GUC(ggtt->vm.i915))
-		return 0;
-
-	GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
-	size = ggtt->vm.total - GUC_GGTT_TOP;
-
-	ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
-				   GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
-				   PIN_NOEVICT);
-	if (ret)
-		DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
-
-	return ret;
-}
-
-static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
-{
-	if (drm_mm_node_allocated(&ggtt->uc_fw))
-		drm_mm_remove_node(&ggtt->uc_fw);
-}
-
-static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
-{
-	ggtt_release_guc_top(ggtt);
-	drm_mm_remove_node(&ggtt->error_capture);
-}
-
-static int init_ggtt(struct i915_ggtt *ggtt)
-{
-	/* Let GEM Manage all of the aperture.
-	 *
-	 * However, leave one page at the end still bound to the scratch page.
-	 * There are a number of places where the hardware apparently prefetches
-	 * past the end of the object, and we've seen multiple hangs with the
-	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
-	 * aperture.  One page should be enough to keep any prefetching inside
-	 * of the aperture.
-	 */
-	unsigned long hole_start, hole_end;
-	struct drm_mm_node *entry;
-	int ret;
-
-	/*
-	 * GuC requires all resources that we're sharing with it to be placed in
-	 * non-WOPCM memory. If GuC is not present or not in use we still need a
-	 * small bias as ring wraparound at offset 0 sometimes hangs. No idea
-	 * why.
-	 */
-	ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
-			       intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
-
-	ret = intel_vgt_balloon(ggtt);
-	if (ret)
-		return ret;
-
-	/* Reserve a mappable slot for our lockless error capture */
-	ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture,
-					  PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
-					  0, ggtt->mappable_end,
-					  DRM_MM_INSERT_LOW);
-	if (ret)
-		return ret;
-
-	/*
-	 * The upper portion of the GuC address space has a sizeable hole
-	 * (several MB) that is inaccessible by GuC. Reserve this range within
-	 * GGTT as it can comfortably hold GuC/HuC firmware images.
-	 */
-	ret = ggtt_reserve_guc_top(ggtt);
-	if (ret)
-		goto err;
-
-	/* Clear any non-preallocated blocks */
-	drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
-		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
-			      hole_start, hole_end);
-		ggtt->vm.clear_range(&ggtt->vm, hole_start,
-				     hole_end - hole_start);
-	}
-
-	/* And finally clear the reserved guard page */
-	ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
-
-	return 0;
-
-err:
-	cleanup_init_ggtt(ggtt);
-	return ret;
-}
-
-int i915_init_ggtt(struct drm_i915_private *i915)
-{
-	int ret;
-
-	ret = init_ggtt(&i915->ggtt);
-	if (ret)
-		return ret;
-
-	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
-		ret = init_aliasing_ppgtt(&i915->ggtt);
-		if (ret)
-			cleanup_init_ggtt(&i915->ggtt);
-	}
-
-	return 0;
-}
-
-static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-	struct i915_vma *vma, *vn;
-
-	ggtt->vm.closed = true;
-
-	rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
-	flush_workqueue(i915->wq);
-
-	mutex_lock(&i915->drm.struct_mutex);
-
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
-		WARN_ON(i915_vma_unbind(vma));
-
-	if (drm_mm_node_allocated(&ggtt->error_capture))
-		drm_mm_remove_node(&ggtt->error_capture);
-
-	ggtt_release_guc_top(ggtt);
-
-	if (drm_mm_initialized(&ggtt->vm.mm)) {
-		intel_vgt_deballoon(ggtt);
-		i915_address_space_fini(&ggtt->vm);
-	}
-
-	ggtt->vm.cleanup(&ggtt->vm);
-
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	arch_phys_wc_del(ggtt->mtrr);
-	io_mapping_fini(&ggtt->iomap);
-}
-
-/**
- * i915_ggtt_driver_release - Clean up GGTT hardware initialization
- * @i915: i915 device
- */
-void i915_ggtt_driver_release(struct drm_i915_private *i915)
-{
-	struct pagevec *pvec;
-
-	fini_aliasing_ppgtt(&i915->ggtt);
-
-	ggtt_cleanup_hw(&i915->ggtt);
-
-	pvec = &i915->mm.wc_stash.pvec;
-	if (pvec->nr) {
-		set_pages_array_wb(pvec->pages, pvec->nr);
-		__pagevec_release(pvec);
-	}
-
-	i915_gem_cleanup_stolen(i915);
-}
-
-static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
-{
-	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
-	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
-	return snb_gmch_ctl << 20;
-}
-
-static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
-{
-	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
-	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
-	if (bdw_gmch_ctl)
-		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
-
-#ifdef CONFIG_X86_32
-	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
-	if (bdw_gmch_ctl > 4)
-		bdw_gmch_ctl = 4;
-#endif
-
-	return bdw_gmch_ctl << 20;
-}
-
-static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
-{
-	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
-	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
-
-	if (gmch_ctrl)
-		return 1 << (20 + gmch_ctrl);
-
-	return 0;
-}
-
-static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
-{
-	struct drm_i915_private *dev_priv = ggtt->vm.i915;
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	phys_addr_t phys_addr;
-	int ret;
-
-	/* For Modern GENs the PTEs and register space are split in the BAR */
-	phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
-
-	/*
-	 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
-	 * will be dropped. For WC mappings in general we have 64 byte burst
-	 * writes when the WC buffer is flushed, so we can't use it, but have to
-	 * resort to an uncached mapping. The WC issue is easily caught by the
-	 * readback check when writing GTT PTE entries.
-	 */
-	if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
-		ggtt->gsm = ioremap_nocache(phys_addr, size);
-	else
-		ggtt->gsm = ioremap_wc(phys_addr, size);
-	if (!ggtt->gsm) {
-		DRM_ERROR("Failed to map the ggtt page table\n");
-		return -ENOMEM;
-	}
-
-	ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
-	if (ret) {
-		DRM_ERROR("Scratch setup failed\n");
-		/* iounmap will also get called at remove, but meh */
-		iounmap(ggtt->gsm);
-		return ret;
-	}
-
-	ggtt->vm.scratch[0].encode =
-		ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
-				    I915_CACHE_NONE, 0);
-
-	return 0;
-}
-
-static void tgl_setup_private_ppat(struct drm_i915_private *dev_priv)
-{
-	/* TGL doesn't support LLC or AGE settings */
-	I915_WRITE(GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
-	I915_WRITE(GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
-	I915_WRITE(GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
-	I915_WRITE(GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
-	I915_WRITE(GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
-	I915_WRITE(GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
-	I915_WRITE(GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
-	I915_WRITE(GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
-}
-
-static void cnl_setup_private_ppat(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_WB | GEN8_PPAT_LLC);
-	I915_WRITE(GEN10_PAT_INDEX(1), GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
-	I915_WRITE(GEN10_PAT_INDEX(2), GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
-	I915_WRITE(GEN10_PAT_INDEX(3), GEN8_PPAT_UC);
-	I915_WRITE(GEN10_PAT_INDEX(4), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
-	I915_WRITE(GEN10_PAT_INDEX(5), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
-	I915_WRITE(GEN10_PAT_INDEX(6), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
-	I915_WRITE(GEN10_PAT_INDEX(7), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
-}
-
-/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
- * bits. When using advanced contexts each context stores its own PAT, but
- * writing this data shouldn't be harmful even in those cases. */
-static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
-{
-	u64 pat;
-
-	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |	/* for normal objects, no eLLC */
-	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |	/* for something pointing to ptes? */
-	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) |	/* for scanout with eLLC */
-	      GEN8_PPAT(3, GEN8_PPAT_UC) |			/* Uncached objects, mostly for scanout */
-	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
-	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
-	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
-	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
-
-	I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
-	I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
-}
-
-static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
-{
-	u64 pat;
-
-	/*
-	 * Map WB on BDW to snooped on CHV.
-	 *
-	 * Only the snoop bit has meaning for CHV, the rest is
-	 * ignored.
-	 *
-	 * The hardware will never snoop for certain types of accesses:
-	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
-	 * - PPGTT page tables
-	 * - some other special cycles
-	 *
-	 * As with BDW, we also need to consider the following for GT accesses:
-	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
-	 * so RTL will always use the value corresponding to
-	 * pat_sel = 000".
-	 * Which means we must set the snoop bit in PAT entry 0
-	 * in order to keep the global status page working.
-	 */
-
-	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(1, 0) |
-	      GEN8_PPAT(2, 0) |
-	      GEN8_PPAT(3, 0) |
-	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
-
-	I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
-	I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
-}
-
-static void gen6_gmch_remove(struct i915_address_space *vm)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-
-	iounmap(ggtt->gsm);
-	cleanup_scratch_page(vm);
-}
-
-static void setup_private_pat(struct drm_i915_private *dev_priv)
-{
-	GEM_BUG_ON(INTEL_GEN(dev_priv) < 8);
-
-	if (INTEL_GEN(dev_priv) >= 12)
-		tgl_setup_private_ppat(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 10)
-		cnl_setup_private_ppat(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
-		chv_setup_private_ppat(dev_priv);
-	else
-		bdw_setup_private_ppat(dev_priv);
-}
-
-static int gen8_gmch_probe(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *dev_priv = ggtt->vm.i915;
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	unsigned int size;
-	u16 snb_gmch_ctl;
-	int err;
-
-	/* TODO: We're not aware of mappable constraints on gen8 yet */
-	ggtt->gmadr =
-		(struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
-						 pci_resource_len(pdev, 2));
-	ggtt->mappable_end = resource_size(&ggtt->gmadr);
-
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
-	if (!err)
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
-	if (err)
-		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
-
-	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-	if (IS_CHERRYVIEW(dev_priv))
-		size = chv_get_total_gtt_size(snb_gmch_ctl);
-	else
-		size = gen8_get_total_gtt_size(snb_gmch_ctl);
-
-	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
-	ggtt->vm.cleanup = gen6_gmch_remove;
-	ggtt->vm.insert_page = gen8_ggtt_insert_page;
-	ggtt->vm.clear_range = nop_clear_range;
-	if (intel_scanout_needs_vtd_wa(dev_priv))
-		ggtt->vm.clear_range = gen8_ggtt_clear_range;
-
-	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
-
-	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
-	if (intel_ggtt_update_needs_vtd_wa(dev_priv) ||
-	    IS_CHERRYVIEW(dev_priv) /* fails with concurrent use/update */) {
-		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
-		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
-		if (ggtt->vm.clear_range != nop_clear_range)
-			ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
-	}
-
-	ggtt->invalidate = gen6_ggtt_invalidate;
-
-	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
-	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
-	ggtt->vm.vma_ops.clear_pages = clear_pages;
-
-	ggtt->vm.pte_encode = gen8_pte_encode;
-
-	setup_private_pat(dev_priv);
-
-	return ggtt_probe_common(ggtt, size);
-}
-
-static int gen6_gmch_probe(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *dev_priv = ggtt->vm.i915;
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	unsigned int size;
-	u16 snb_gmch_ctl;
-	int err;
-
-	ggtt->gmadr =
-		(struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
-						 pci_resource_len(pdev, 2));
-	ggtt->mappable_end = resource_size(&ggtt->gmadr);
-
-	/* 64/512MB is the current min/max we actually know of, but this is just
-	 * a coarse sanity check.
-	 */
-	if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
-		DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
-		return -ENXIO;
-	}
-
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
-	if (!err)
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
-	if (err)
-		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
-	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-
-	size = gen6_get_total_gtt_size(snb_gmch_ctl);
-	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
-
-	ggtt->vm.clear_range = nop_clear_range;
-	if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
-		ggtt->vm.clear_range = gen6_ggtt_clear_range;
-	ggtt->vm.insert_page = gen6_ggtt_insert_page;
-	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
-	ggtt->vm.cleanup = gen6_gmch_remove;
-
-	ggtt->invalidate = gen6_ggtt_invalidate;
-
-	if (HAS_EDRAM(dev_priv))
-		ggtt->vm.pte_encode = iris_pte_encode;
-	else if (IS_HASWELL(dev_priv))
-		ggtt->vm.pte_encode = hsw_pte_encode;
-	else if (IS_VALLEYVIEW(dev_priv))
-		ggtt->vm.pte_encode = byt_pte_encode;
-	else if (INTEL_GEN(dev_priv) >= 7)
-		ggtt->vm.pte_encode = ivb_pte_encode;
-	else
-		ggtt->vm.pte_encode = snb_pte_encode;
-
-	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
-	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
-	ggtt->vm.vma_ops.clear_pages = clear_pages;
-
-	return ggtt_probe_common(ggtt, size);
-}
-
-static void i915_gmch_remove(struct i915_address_space *vm)
-{
-	intel_gmch_remove();
-}
-
-static int i915_gmch_probe(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *dev_priv = ggtt->vm.i915;
-	phys_addr_t gmadr_base;
-	int ret;
-
-	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
-	if (!ret) {
-		DRM_ERROR("failed to set up gmch\n");
-		return -EIO;
-	}
-
-	intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
-
-	ggtt->gmadr =
-		(struct resource) DEFINE_RES_MEM(gmadr_base,
-						 ggtt->mappable_end);
-
-	ggtt->do_idle_maps = needs_idle_maps(dev_priv);
-	ggtt->vm.insert_page = i915_ggtt_insert_page;
-	ggtt->vm.insert_entries = i915_ggtt_insert_entries;
-	ggtt->vm.clear_range = i915_ggtt_clear_range;
-	ggtt->vm.cleanup = i915_gmch_remove;
-
-	ggtt->invalidate = gmch_ggtt_invalidate;
-
-	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
-	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
-	ggtt->vm.vma_ops.clear_pages = clear_pages;
-
-	if (unlikely(ggtt->do_idle_maps))
-		dev_notice(dev_priv->drm.dev,
-			   "Applying Ironlake quirks for intel_iommu\n");
-
-	return 0;
-}
-
-static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	int ret;
-
-	ggtt->vm.gt = gt;
-	ggtt->vm.i915 = i915;
-	ggtt->vm.dma = &i915->drm.pdev->dev;
-
-	if (INTEL_GEN(i915) <= 5)
-		ret = i915_gmch_probe(ggtt);
-	else if (INTEL_GEN(i915) < 8)
-		ret = gen6_gmch_probe(ggtt);
-	else
-		ret = gen8_gmch_probe(ggtt);
-	if (ret)
-		return ret;
-
-	if ((ggtt->vm.total - 1) >> 32) {
-		DRM_ERROR("We never expected a Global GTT with more than 32bits"
-			  " of address space! Found %lldM!\n",
-			  ggtt->vm.total >> 20);
-		ggtt->vm.total = 1ULL << 32;
-		ggtt->mappable_end =
-			min_t(u64, ggtt->mappable_end, ggtt->vm.total);
-	}
-
-	if (ggtt->mappable_end > ggtt->vm.total) {
-		DRM_ERROR("mappable aperture extends past end of GGTT,"
-			  " aperture=%pa, total=%llx\n",
-			  &ggtt->mappable_end, ggtt->vm.total);
-		ggtt->mappable_end = ggtt->vm.total;
-	}
-
-	/* GMADR is the PCI mmio aperture into the global GTT. */
-	DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
-	DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
-	DRM_DEBUG_DRIVER("DSM size = %lluM\n",
-			 (u64)resource_size(&intel_graphics_stolen_res) >> 20);
-
-	return 0;
-}
-
-/**
- * i915_ggtt_probe_hw - Probe GGTT hardware location
- * @i915: i915 device
- */
-int i915_ggtt_probe_hw(struct drm_i915_private *i915)
-{
-	int ret;
-
-	ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
-	if (ret)
-		return ret;
-
-	if (intel_vtd_active())
-		dev_info(i915->drm.dev, "VT-d active for gfx access\n");
-
-	return 0;
-}
-
-static int ggtt_init_hw(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-	int ret = 0;
-
-	mutex_lock(&i915->drm.struct_mutex);
-
-	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
-
-	ggtt->vm.is_ggtt = true;
-
-	/* Only VLV supports read-only GGTT mappings */
-	ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
-
-	if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
-		ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
-
-	if (!io_mapping_init_wc(&ggtt->iomap,
-				ggtt->gmadr.start,
-				ggtt->mappable_end)) {
-		ggtt->vm.cleanup(&ggtt->vm);
-		ret = -EIO;
-		goto out;
-	}
-
-	ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
-
-	i915_ggtt_init_fences(ggtt);
-
-out:
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return ret;
-}
-
-/**
- * i915_ggtt_init_hw - Initialize GGTT hardware
- * @dev_priv: i915 device
- */
-int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
-{
-	int ret;
-
-	stash_init(&dev_priv->mm.wc_stash);
-
-	/* Note that we use page colouring to enforce a guard page at the
-	 * end of the address space. This is required as the CS may prefetch
-	 * beyond the end of the batch buffer, across the page boundary,
-	 * and beyond the end of the GTT if we do not provide a guard.
-	 */
-	ret = ggtt_init_hw(&dev_priv->ggtt);
-	if (ret)
-		return ret;
-
-	/*
-	 * Initialise stolen early so that we may reserve preallocated
-	 * objects for the BIOS to KMS transition.
-	 */
-	ret = i915_gem_init_stolen(dev_priv);
-	if (ret)
-		goto out_gtt_cleanup;
-
-	return 0;
-
-out_gtt_cleanup:
-	dev_priv->ggtt.vm.cleanup(&dev_priv->ggtt.vm);
-	return ret;
-}
-
-int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
-{
-	if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
-		return -EIO;
-
-	return 0;
-}
-
-void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
-{
-	GEM_BUG_ON(ggtt->invalidate != gen6_ggtt_invalidate);
-
-	ggtt->invalidate = guc_ggtt_invalidate;
-
-	ggtt->invalidate(ggtt);
-}
-
-void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
-{
-	/* XXX Temporary pardon for error unload */
-	if (ggtt->invalidate == gen6_ggtt_invalidate)
-		return;
-
-	/* We should only be called after i915_ggtt_enable_guc() */
-	GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
-
-	ggtt->invalidate = gen6_ggtt_invalidate;
-
-	ggtt->invalidate(ggtt);
-}
-
-static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
-{
-	struct i915_vma *vma, *vn;
-	bool flush = false;
-
-	intel_gt_check_and_clear_faults(ggtt->vm.gt);
-
-	mutex_lock(&ggtt->vm.mutex);
-
-	/* First fill our portion of the GTT with scratch pages */
-	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
-
-	/* clflush objects bound into the GGTT and rebind them. */
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
-		struct drm_i915_gem_object *obj = vma->obj;
-
-		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
-			continue;
-
-		mutex_unlock(&ggtt->vm.mutex);
-
-		if (!i915_vma_unbind(vma))
-			goto lock;
-
-		WARN_ON(i915_vma_bind(vma,
-				      obj ? obj->cache_level : 0,
-				      PIN_UPDATE));
-		if (obj) { /* only used during resume => exclusive access */
-			flush |= fetch_and_zero(&obj->write_domain);
-			obj->read_domains |= I915_GEM_DOMAIN_GTT;
-		}
-
-lock:
-		mutex_lock(&ggtt->vm.mutex);
-	}
-
-	ggtt->vm.closed = false;
-	ggtt->invalidate(ggtt);
-
-	mutex_unlock(&ggtt->vm.mutex);
-
-	if (flush)
-		wbinvd_on_all_cpus();
-}
-
-void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
-{
-	ggtt_restore_mappings(&i915->ggtt);
-
-	if (INTEL_GEN(i915) >= 8)
-		setup_private_pat(i915);
-}
-
-static struct scatterlist *
-rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
-	     unsigned int width, unsigned int height,
-	     unsigned int stride,
-	     struct sg_table *st, struct scatterlist *sg)
-{
-	unsigned int column, row;
-	unsigned int src_idx;
-
-	for (column = 0; column < width; column++) {
-		src_idx = stride * (height - 1) + column + offset;
-		for (row = 0; row < height; row++) {
-			st->nents++;
-			/* We don't need the pages, but need to initialize
-			 * the entries so the sg list can be happily traversed.
-			 * The only thing we need are DMA addresses.
-			 */
-			sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
-			sg_dma_address(sg) =
-				i915_gem_object_get_dma_address(obj, src_idx);
-			sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
-			sg = sg_next(sg);
-			src_idx -= stride;
-		}
-	}
-
-	return sg;
-}
-
-static noinline struct sg_table *
-intel_rotate_pages(struct intel_rotation_info *rot_info,
-		   struct drm_i915_gem_object *obj)
-{
-	unsigned int size = intel_rotation_info_size(rot_info);
-	struct sg_table *st;
-	struct scatterlist *sg;
-	int ret = -ENOMEM;
-	int i;
-
-	/* Allocate target SG list. */
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		goto err_st_alloc;
-
-	ret = sg_alloc_table(st, size, GFP_KERNEL);
-	if (ret)
-		goto err_sg_alloc;
-
-	st->nents = 0;
-	sg = st->sgl;
-
-	for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
-		sg = rotate_pages(obj, rot_info->plane[i].offset,
-				  rot_info->plane[i].width, rot_info->plane[i].height,
-				  rot_info->plane[i].stride, st, sg);
-	}
-
-	return st;
-
-err_sg_alloc:
-	kfree(st);
-err_st_alloc:
-
-	DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
-			 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
-
-	return ERR_PTR(ret);
-}
-
-static struct scatterlist *
-remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
-	    unsigned int width, unsigned int height,
-	    unsigned int stride,
-	    struct sg_table *st, struct scatterlist *sg)
-{
-	unsigned int row;
-
-	for (row = 0; row < height; row++) {
-		unsigned int left = width * I915_GTT_PAGE_SIZE;
-
-		while (left) {
-			dma_addr_t addr;
-			unsigned int length;
-
-			/* We don't need the pages, but need to initialize
-			 * the entries so the sg list can be happily traversed.
-			 * The only thing we need are DMA addresses.
-			 */
-
-			addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
-
-			length = min(left, length);
-
-			st->nents++;
-
-			sg_set_page(sg, NULL, length, 0);
-			sg_dma_address(sg) = addr;
-			sg_dma_len(sg) = length;
-			sg = sg_next(sg);
-
-			offset += length / I915_GTT_PAGE_SIZE;
-			left -= length;
-		}
-
-		offset += stride - width;
-	}
-
-	return sg;
-}
-
-static noinline struct sg_table *
-intel_remap_pages(struct intel_remapped_info *rem_info,
-		  struct drm_i915_gem_object *obj)
-{
-	unsigned int size = intel_remapped_info_size(rem_info);
-	struct sg_table *st;
-	struct scatterlist *sg;
-	int ret = -ENOMEM;
-	int i;
-
-	/* Allocate target SG list. */
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		goto err_st_alloc;
-
-	ret = sg_alloc_table(st, size, GFP_KERNEL);
-	if (ret)
-		goto err_sg_alloc;
-
-	st->nents = 0;
-	sg = st->sgl;
-
-	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
-		sg = remap_pages(obj, rem_info->plane[i].offset,
-				 rem_info->plane[i].width, rem_info->plane[i].height,
-				 rem_info->plane[i].stride, st, sg);
-	}
-
-	i915_sg_trim(st);
-
-	return st;
-
-err_sg_alloc:
-	kfree(st);
-err_st_alloc:
-
-	DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
-			 obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
-
-	return ERR_PTR(ret);
-}
-
-static noinline struct sg_table *
-intel_partial_pages(const struct i915_ggtt_view *view,
-		    struct drm_i915_gem_object *obj)
-{
-	struct sg_table *st;
-	struct scatterlist *sg, *iter;
-	unsigned int count = view->partial.size;
-	unsigned int offset;
-	int ret = -ENOMEM;
-
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		goto err_st_alloc;
-
-	ret = sg_alloc_table(st, count, GFP_KERNEL);
-	if (ret)
-		goto err_sg_alloc;
-
-	iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
-	GEM_BUG_ON(!iter);
-
-	sg = st->sgl;
-	st->nents = 0;
-	do {
-		unsigned int len;
-
-		len = min(iter->length - (offset << PAGE_SHIFT),
-			  count << PAGE_SHIFT);
-		sg_set_page(sg, NULL, len, 0);
-		sg_dma_address(sg) =
-			sg_dma_address(iter) + (offset << PAGE_SHIFT);
-		sg_dma_len(sg) = len;
-
-		st->nents++;
-		count -= len >> PAGE_SHIFT;
-		if (count == 0) {
-			sg_mark_end(sg);
-			i915_sg_trim(st); /* Drop any unused tail entries. */
-
-			return st;
-		}
-
-		sg = __sg_next(sg);
-		iter = __sg_next(iter);
-		offset = 0;
-	} while (1);
-
-err_sg_alloc:
-	kfree(st);
-err_st_alloc:
-	return ERR_PTR(ret);
-}
-
-static int
-i915_get_ggtt_vma_pages(struct i915_vma *vma)
-{
-	int ret;
-
-	/* The vma->pages are only valid within the lifespan of the borrowed
-	 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
-	 * must be the vma->pages. A simple rule is that vma->pages must only
-	 * be accessed when the obj->mm.pages are pinned.
-	 */
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
-
-	switch (vma->ggtt_view.type) {
-	default:
-		GEM_BUG_ON(vma->ggtt_view.type);
-		/* fall through */
-	case I915_GGTT_VIEW_NORMAL:
-		vma->pages = vma->obj->mm.pages;
-		return 0;
-
-	case I915_GGTT_VIEW_ROTATED:
-		vma->pages =
-			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
-		break;
-
-	case I915_GGTT_VIEW_REMAPPED:
-		vma->pages =
-			intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
-		break;
-
-	case I915_GGTT_VIEW_PARTIAL:
-		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
-		break;
-	}
-
-	ret = 0;
-	if (IS_ERR(vma->pages)) {
-		ret = PTR_ERR(vma->pages);
-		vma->pages = NULL;
-		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
-			  vma->ggtt_view.type, ret);
-	}
-	return ret;
-}
-
 /**
  * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
  * @vm: the &struct i915_address_space
@@ -3726,7 +197,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	u64 offset;
 	int err;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
+
 	GEM_BUG_ON(!size);
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(alignment && !is_power_of_2(alignment));
@@ -3821,6 +293,5 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/mock_gtt.c"
 #include "selftests/i915_gem_gtt.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index b97a47fc7a68..f6226df9f972 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -1,588 +1,21 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Please try to maintain the following order within this file unless it makes
- * sense to do otherwise. From top to bottom:
- * 1. typedefs
- * 2. #defines, and macros
- * 3. structure definitions
- * 4. function prototypes
- *
- * Within each section, please try to order by generation in ascending order,
- * from top to bottom (ie. gen6 on the top, gen8 on the bottom).
+ * Copyright © 2020 Intel Corporation
  */
 
 #ifndef __I915_GEM_GTT_H__
 #define __I915_GEM_GTT_H__
 
 #include <linux/io-mapping.h>
-#include <linux/kref.h>
-#include <linux/mm.h>
-#include <linux/pagevec.h>
-#include <linux/workqueue.h>
+#include <linux/types.h>
 
 #include <drm/drm_mm.h>
 
-#include "gt/intel_reset.h"
-#include "i915_gem_fence_reg.h"
-#include "i915_request.h"
+#include "gt/intel_gtt.h"
 #include "i915_scatterlist.h"
-#include "i915_selftest.h"
-#include "gt/intel_timeline.h"
 
-#define I915_GTT_PAGE_SIZE_4K	BIT_ULL(12)
-#define I915_GTT_PAGE_SIZE_64K	BIT_ULL(16)
-#define I915_GTT_PAGE_SIZE_2M	BIT_ULL(21)
-
-#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K
-#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M
-
-#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE
-
-#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE
-
-#define I915_FENCE_REG_NONE -1
-#define I915_MAX_NUM_FENCES 32
-/* 32 fences + sign bit for FENCE_REG_NONE */
-#define I915_MAX_NUM_FENCE_BITS 6
-
-struct drm_i915_file_private;
 struct drm_i915_gem_object;
-struct i915_vma;
-struct intel_gt;
-
-typedef u32 gen6_pte_t;
-typedef u64 gen8_pte_t;
-
-#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT)
-
-/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */
-#define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
-#define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
-#define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
-#define GEN6_PTE_CACHE_LLC		(2 << 1)
-#define GEN6_PTE_UNCACHED		(1 << 1)
-#define GEN6_PTE_VALID			(1 << 0)
-
-#define I915_PTES(pte_len)		((unsigned int)(PAGE_SIZE / (pte_len)))
-#define I915_PTE_MASK(pte_len)		(I915_PTES(pte_len) - 1)
-#define I915_PDES			512
-#define I915_PDE_MASK			(I915_PDES - 1)
-#define NUM_PTE(pde_shift)     (1 << (pde_shift - PAGE_SHIFT))
-
-#define GEN6_PTES			I915_PTES(sizeof(gen6_pte_t))
-#define GEN6_PD_SIZE		        (I915_PDES * PAGE_SIZE)
-#define GEN6_PD_ALIGN			(PAGE_SIZE * 16)
-#define GEN6_PDE_SHIFT			22
-#define GEN6_PDE_VALID			(1 << 0)
-
-#define GEN7_PTE_CACHE_L3_LLC		(3 << 1)
-
-#define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
-#define BYT_PTE_WRITEABLE		(1 << 1)
-
-/* Cacheability Control is a 4-bit value. The low three bits are stored in bits
- * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
- */
-#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
-					 (((bits) & 0x8) << (11 - 3)))
-#define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
-#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
-#define HSW_WB_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x8)
-#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
-#define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
-#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
-#define HSW_PTE_UNCACHED		(0)
-#define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
-#define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
-
-/*
- * GEN8 32b style address is defined as a 3 level page table:
- * 31:30 | 29:21 | 20:12 |  11:0
- * PDPE  |  PDE  |  PTE  | offset
- * The difference as compared to normal x86 3 level page table is the PDPEs are
- * programmed via register.
- *
- * GEN8 48b style address is defined as a 4 level page table:
- * 47:39 | 38:30 | 29:21 | 20:12 |  11:0
- * PML4E | PDPE  |  PDE  |  PTE  | offset
- */
-#define GEN8_3LVL_PDPES			4
-
-#define PPAT_UNCACHED			(_PAGE_PWT | _PAGE_PCD)
-#define PPAT_CACHED_PDE			0 /* WB LLC */
-#define PPAT_CACHED			_PAGE_PAT /* WB LLCeLLC */
-#define PPAT_DISPLAY_ELLC		_PAGE_PCD /* WT eLLC */
-
-#define CHV_PPAT_SNOOP			(1<<6)
-#define GEN8_PPAT_AGE(x)		((x)<<4)
-#define GEN8_PPAT_LLCeLLC		(3<<2)
-#define GEN8_PPAT_LLCELLC		(2<<2)
-#define GEN8_PPAT_LLC			(1<<2)
-#define GEN8_PPAT_WB			(3<<0)
-#define GEN8_PPAT_WT			(2<<0)
-#define GEN8_PPAT_WC			(1<<0)
-#define GEN8_PPAT_UC			(0<<0)
-#define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
-#define GEN8_PPAT(i, x)			((u64)(x) << ((i) * 8))
-
-#define GEN8_PDE_IPS_64K BIT(11)
-#define GEN8_PDE_PS_2M   BIT(7)
-
-#define for_each_sgt_dma(__dmap, __iter, __sgt) \
-	__for_each_sgt_dma(__dmap, __iter, __sgt, I915_GTT_PAGE_SIZE)
-
-struct intel_remapped_plane_info {
-	/* in gtt pages */
-	unsigned int width, height, stride, offset;
-} __packed;
-
-struct intel_remapped_info {
-	struct intel_remapped_plane_info plane[2];
-	unsigned int unused_mbz;
-} __packed;
-
-struct intel_rotation_info {
-	struct intel_remapped_plane_info plane[2];
-} __packed;
-
-struct intel_partial_info {
-	u64 offset;
-	unsigned int size;
-} __packed;
-
-enum i915_ggtt_view_type {
-	I915_GGTT_VIEW_NORMAL = 0,
-	I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
-	I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
-	I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
-};
-
-static inline void assert_i915_gem_gtt_types(void)
-{
-	BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int));
-	BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int));
-	BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 9*sizeof(unsigned int));
-
-	/* Check that rotation/remapped shares offsets for simplicity */
-	BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
-		     offsetof(struct intel_rotation_info, plane[0]));
-	BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) !=
-		     offsetofend(struct intel_rotation_info, plane[1]));
-
-	/* As we encode the size of each branch inside the union into its type,
-	 * we have to be careful that each branch has a unique size.
-	 */
-	switch ((enum i915_ggtt_view_type)0) {
-	case I915_GGTT_VIEW_NORMAL:
-	case I915_GGTT_VIEW_PARTIAL:
-	case I915_GGTT_VIEW_ROTATED:
-	case I915_GGTT_VIEW_REMAPPED:
-		/* gcc complains if these are identical cases */
-		break;
-	}
-}
-
-struct i915_ggtt_view {
-	enum i915_ggtt_view_type type;
-	union {
-		/* Members need to contain no holes/padding */
-		struct intel_partial_info partial;
-		struct intel_rotation_info rotated;
-		struct intel_remapped_info remapped;
-	};
-};
-
-enum i915_cache_level;
-
-struct i915_vma;
-
-struct i915_page_dma {
-	struct page *page;
-	union {
-		dma_addr_t daddr;
-
-		/* For gen6/gen7 only. This is the offset in the GGTT
-		 * where the page directory entries for PPGTT begin
-		 */
-		u32 ggtt_offset;
-	};
-};
-
-struct i915_page_scratch {
-	struct i915_page_dma base;
-	u64 encode;
-};
-
-struct i915_page_table {
-	struct i915_page_dma base;
-	atomic_t used;
-};
-
-struct i915_page_directory {
-	struct i915_page_table pt;
-	spinlock_t lock;
-	void *entry[512];
-};
-
-#define __px_choose_expr(x, type, expr, other) \
-	__builtin_choose_expr( \
-	__builtin_types_compatible_p(typeof(x), type) || \
-	__builtin_types_compatible_p(typeof(x), const type), \
-	({ type __x = (type)(x); expr; }), \
-	other)
-
-#define px_base(px) \
-	__px_choose_expr(px, struct i915_page_dma *, __x, \
-	__px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
-	__px_choose_expr(px, struct i915_page_table *, &__x->base, \
-	__px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
-	(void)0))))
-#define px_dma(px) (px_base(px)->daddr)
-
-#define px_pt(px) \
-	__px_choose_expr(px, struct i915_page_table *, __x, \
-	__px_choose_expr(px, struct i915_page_directory *, &__x->pt, \
-	(void)0))
-#define px_used(px) (&px_pt(px)->used)
-
-struct i915_vma_ops {
-	/* Map an object into an address space with the given cache flags. */
-	int (*bind_vma)(struct i915_vma *vma,
-			enum i915_cache_level cache_level,
-			u32 flags);
-	/*
-	 * Unmap an object from an address space. This usually consists of
-	 * setting the valid PTE entries to a reserved scratch page.
-	 */
-	void (*unbind_vma)(struct i915_vma *vma);
-
-	int (*set_pages)(struct i915_vma *vma);
-	void (*clear_pages)(struct i915_vma *vma);
-};
-
-struct pagestash {
-	spinlock_t lock;
-	struct pagevec pvec;
-};
-
-struct i915_address_space {
-	struct kref ref;
-	struct rcu_work rcu;
-
-	struct drm_mm mm;
-	struct intel_gt *gt;
-	struct drm_i915_private *i915;
-	struct device *dma;
-	/* Every address space belongs to a struct file - except for the global
-	 * GTT that is owned by the driver (and so @file is set to NULL). In
-	 * principle, no information should leak from one context to another
-	 * (or between files/processes etc) unless explicitly shared by the
-	 * owner. Tracking the owner is important in order to free up per-file
-	 * objects along with the file, to aide resource tracking, and to
-	 * assign blame.
-	 */
-	struct drm_i915_file_private *file;
-	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
-	u64 reserved;		/* size addr space reserved */
-
-	bool closed;
-
-	struct mutex mutex; /* protects vma and our lists */
-#define VM_CLASS_GGTT 0
-#define VM_CLASS_PPGTT 1
-
-	struct i915_page_scratch scratch[4];
-	unsigned int scratch_order;
-	unsigned int top;
-
-	/**
-	 * List of vma currently bound.
-	 */
-	struct list_head bound_list;
-
-	/**
-	 * List of vma that are not unbound.
-	 */
-	struct list_head unbound_list;
-
-	struct pagestash free_pages;
-
-	/* Global GTT */
-	bool is_ggtt:1;
-
-	/* Some systems require uncached updates of the page directories */
-	bool pt_kmap_wc:1;
-
-	/* Some systems support read-only mappings for GGTT and/or PPGTT */
-	bool has_read_only:1;
-
-	u64 (*pte_encode)(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags); /* Create a valid PTE */
-#define PTE_READ_ONLY	(1<<0)
-
-	int (*allocate_va_range)(struct i915_address_space *vm,
-				 u64 start, u64 length);
-	void (*clear_range)(struct i915_address_space *vm,
-			    u64 start, u64 length);
-	void (*insert_page)(struct i915_address_space *vm,
-			    dma_addr_t addr,
-			    u64 offset,
-			    enum i915_cache_level cache_level,
-			    u32 flags);
-	void (*insert_entries)(struct i915_address_space *vm,
-			       struct i915_vma *vma,
-			       enum i915_cache_level cache_level,
-			       u32 flags);
-	void (*cleanup)(struct i915_address_space *vm);
-
-	struct i915_vma_ops vma_ops;
-
-	I915_SELFTEST_DECLARE(struct fault_attr fault_attr);
-	I915_SELFTEST_DECLARE(bool scrub_64K);
-};
-
-#define i915_is_ggtt(vm) ((vm)->is_ggtt)
-
-static inline bool
-i915_vm_is_4lvl(const struct i915_address_space *vm)
-{
-	return (vm->total - 1) >> 32;
-}
-
-static inline bool
-i915_vm_has_scratch_64K(struct i915_address_space *vm)
-{
-	return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K);
-}
-
-/* The Graphics Translation Table is the way in which GEN hardware translates a
- * Graphics Virtual Address into a Physical Address. In addition to the normal
- * collateral associated with any va->pa translations GEN hardware also has a
- * portion of the GTT which can be mapped by the CPU and remain both coherent
- * and correct (in cases like swizzling). That region is referred to as GMADR in
- * the spec.
- */
-struct i915_ggtt {
-	struct i915_address_space vm;
-
-	struct io_mapping iomap;	/* Mapping to our CPU mappable region */
-	struct resource gmadr;          /* GMADR resource */
-	resource_size_t mappable_end;	/* End offset that we can CPU map */
-
-	/** "Graphics Stolen Memory" holds the global PTEs */
-	void __iomem *gsm;
-	void (*invalidate)(struct i915_ggtt *ggtt);
-
-	/** PPGTT used for aliasing the PPGTT with the GTT */
-	struct i915_ppgtt *alias;
-
-	bool do_idle_maps;
-
-	int mtrr;
-
-	u32 pin_bias;
-
-	unsigned int num_fences;
-	struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
-	struct list_head fence_list;
-
-	/** List of all objects in gtt_space, currently mmaped by userspace.
-	 * All objects within this list must also be on bound_list.
-	 */
-	struct list_head userfault_list;
-
-	/* Manual runtime pm autosuspend delay for user GGTT mmaps */
-	struct intel_wakeref_auto userfault_wakeref;
-
-	struct drm_mm_node error_capture;
-	struct drm_mm_node uc_fw;
-};
-
-struct i915_ppgtt {
-	struct i915_address_space vm;
-
-	intel_engine_mask_t pd_dirty_engines;
-	struct i915_page_directory *pd;
-};
-
-struct gen6_ppgtt {
-	struct i915_ppgtt base;
-
-	struct i915_vma *vma;
-	gen6_pte_t __iomem *pd_addr;
-
-	unsigned int pin_count;
-	bool scan_for_unused_pt;
-};
-
-#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base)
-
-static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
-{
-	BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base));
-	return __to_gen6_ppgtt(base);
-}
-
-/*
- * gen6_for_each_pde() iterates over every pde from start until start+length.
- * If start and start+length are not perfectly divisible, the macro will round
- * down and up as needed. Start=0 and length=2G effectively iterates over
- * every PDE in the system. The macro modifies ALL its parameters except 'pd',
- * so each of the other parameters should preferably be a simple variable, or
- * at most an lvalue with no side-effects!
- */
-#define gen6_for_each_pde(pt, pd, start, length, iter)			\
-	for (iter = gen6_pde_index(start);				\
-	     length > 0 && iter < I915_PDES &&				\
-		     (pt = i915_pt_entry(pd, iter), true);		\
-	     ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT);		\
-		    temp = min(temp - start, length);			\
-		    start += temp, length -= temp; }), ++iter)
-
-#define gen6_for_all_pdes(pt, pd, iter)					\
-	for (iter = 0;							\
-	     iter < I915_PDES &&					\
-		     (pt = i915_pt_entry(pd, iter), true);		\
-	     ++iter)
-
-static inline u32 i915_pte_index(u64 address, unsigned int pde_shift)
-{
-	const u32 mask = NUM_PTE(pde_shift) - 1;
-
-	return (address >> PAGE_SHIFT) & mask;
-}
-
-/* Helper to counts the number of PTEs within the given length. This count
- * does not cross a page table boundary, so the max value would be
- * GEN6_PTES for GEN6, and GEN8_PTES for GEN8.
-*/
-static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift)
-{
-	const u64 mask = ~((1ULL << pde_shift) - 1);
-	u64 end;
-
-	GEM_BUG_ON(length == 0);
-	GEM_BUG_ON(offset_in_page(addr | length));
-
-	end = addr + length;
-
-	if ((addr & mask) != (end & mask))
-		return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift);
-
-	return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
-}
-
-static inline u32 i915_pde_index(u64 addr, u32 shift)
-{
-	return (addr >> shift) & I915_PDE_MASK;
-}
-
-static inline u32 gen6_pte_index(u32 addr)
-{
-	return i915_pte_index(addr, GEN6_PDE_SHIFT);
-}
-
-static inline u32 gen6_pte_count(u32 addr, u32 length)
-{
-	return i915_pte_count(addr, length, GEN6_PDE_SHIFT);
-}
-
-static inline u32 gen6_pde_index(u32 addr)
-{
-	return i915_pde_index(addr, GEN6_PDE_SHIFT);
-}
-
-static inline struct i915_page_table *
-i915_pt_entry(const struct i915_page_directory * const pd,
-	      const unsigned short n)
-{
-	return pd->entry[n];
-}
-
-static inline struct i915_page_directory *
-i915_pd_entry(const struct i915_page_directory * const pdp,
-	      const unsigned short n)
-{
-	return pdp->entry[n];
-}
-
-static inline dma_addr_t
-i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
-{
-	struct i915_page_dma *pt = ppgtt->pd->entry[n];
-
-	return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));
-}
-
-static inline struct i915_ggtt *
-i915_vm_to_ggtt(struct i915_address_space *vm)
-{
-	BUILD_BUG_ON(offsetof(struct i915_ggtt, vm));
-	GEM_BUG_ON(!i915_is_ggtt(vm));
-	return container_of(vm, struct i915_ggtt, vm);
-}
-
-static inline struct i915_ppgtt *
-i915_vm_to_ppgtt(struct i915_address_space *vm)
-{
-	BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm));
-	GEM_BUG_ON(i915_is_ggtt(vm));
-	return container_of(vm, struct i915_ppgtt, vm);
-}
-
-int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv);
-int i915_ggtt_init_hw(struct drm_i915_private *dev_priv);
-int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv);
-void i915_ggtt_enable_guc(struct i915_ggtt *ggtt);
-void i915_ggtt_disable_guc(struct i915_ggtt *ggtt);
-int i915_init_ggtt(struct drm_i915_private *dev_priv);
-void i915_ggtt_driver_release(struct drm_i915_private *dev_priv);
-
-int i915_ppgtt_init_hw(struct intel_gt *gt);
-
-struct i915_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv);
-
-static inline struct i915_address_space *
-i915_vm_get(struct i915_address_space *vm)
-{
-	kref_get(&vm->ref);
-	return vm;
-}
-
-void i915_vm_release(struct kref *kref);
-
-static inline void i915_vm_put(struct i915_address_space *vm)
-{
-	kref_put(&vm->ref, i915_vm_release);
-}
-
-int gen6_ppgtt_pin(struct i915_ppgtt *base);
-void gen6_ppgtt_unpin(struct i915_ppgtt *base);
-void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
-
-void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv);
-void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv);
+struct i915_address_space;
 
 int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 					    struct sg_table *pages);
@@ -609,11 +42,10 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 #define PIN_OFFSET_BIAS		BIT_ULL(6)
 #define PIN_OFFSET_FIXED	BIT_ULL(7)
 
-#define PIN_MBZ			BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */
-#define PIN_GLOBAL		BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */
-#define PIN_USER		BIT_ULL(10) /* I915_VMA_LOCAL_BIND */
-#define PIN_UPDATE		BIT_ULL(11)
+#define PIN_UPDATE		BIT_ULL(9)
+#define PIN_GLOBAL		BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
+#define PIN_USER		BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
 
-#define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)
+#define PIN_OFFSET_MASK		I915_GTT_PAGE_MASK
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 9f1517af5b7f..54fce81d5724 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -2,9 +2,11 @@
  * SPDX-License-Identifier: MIT
  */
 
+#include "gem/i915_gem_mman.h"
 #include "gt/intel_engine_user.h"
 
 #include "i915_drv.h"
+#include "i915_perf.h"
 
 int i915_getparam_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
@@ -79,8 +81,8 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		break;
 	case I915_PARAM_HAS_GPU_RESET:
 		value = i915_modparams.enable_hangcheck &&
-			intel_has_gpu_reset(i915);
-		if (value && intel_has_reset_engine(i915))
+			intel_has_gpu_reset(&i915->gt);
+		if (value && intel_has_reset_engine(&i915->gt))
 			value = 2;
 		break;
 	case I915_PARAM_HAS_RESOURCE_STREAMER:
@@ -156,6 +158,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_MMAP_GTT_COHERENT:
 		value = INTEL_INFO(i915)->has_coherent_ggtt;
 		break;
+	case I915_PARAM_PERF_REVISION:
+		value = i915_perf_ioctl_version();
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index be127cd28931..3aa213684293 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -20,7 +20,10 @@ static LIST_HEAD(globals);
 static atomic_t active;
 static atomic_t epoch;
 static struct park_work {
-	struct rcu_work work;
+	struct delayed_work work;
+	struct rcu_head rcu;
+	unsigned long flags;
+#define PENDING 0
 	int epoch;
 } park;
 
@@ -37,11 +40,33 @@ static void i915_globals_shrink(void)
 		global->shrink();
 }
 
+static void __i915_globals_grace(struct rcu_head *rcu)
+{
+	/* Ratelimit parking as shrinking is quite slow */
+	schedule_delayed_work(&park.work, round_jiffies_up_relative(2 * HZ));
+}
+
+static void __i915_globals_queue_rcu(void)
+{
+	park.epoch = atomic_inc_return(&epoch);
+	if (!atomic_read(&active)) {
+		init_rcu_head(&park.rcu);
+		call_rcu(&park.rcu, __i915_globals_grace);
+	}
+}
+
 static void __i915_globals_park(struct work_struct *work)
 {
+	destroy_rcu_head(&park.rcu);
+
 	/* Confirm nothing woke up in the last grace period */
-	if (park.epoch == atomic_read(&epoch))
-		i915_globals_shrink();
+	if (park.epoch != atomic_read(&epoch)) {
+		__i915_globals_queue_rcu();
+		return;
+	}
+
+	clear_bit(PENDING, &park.flags);
+	i915_globals_shrink();
 }
 
 void __init i915_global_register(struct i915_global *global)
@@ -85,7 +110,7 @@ int __init i915_globals_init(void)
 		}
 	}
 
-	INIT_RCU_WORK(&park.work, __i915_globals_park);
+	INIT_DELAYED_WORK(&park.work, __i915_globals_park);
 	return 0;
 }
 
@@ -103,8 +128,9 @@ void i915_globals_park(void)
 	if (!atomic_dec_and_test(&active))
 		return;
 
-	park.epoch = atomic_inc_return(&epoch);
-	queue_rcu_work(system_wq, &park.work);
+	/* Queue cleanup after the next RCU grace period has freed slabs */
+	if (!test_and_set_bit(PENDING, &park.flags))
+		__i915_globals_queue_rcu();
 }
 
 void i915_globals_unpark(void)
@@ -113,12 +139,21 @@ void i915_globals_unpark(void)
 	atomic_inc(&active);
 }
 
+static void __exit __i915_globals_flush(void)
+{
+	atomic_inc(&active); /* skip shrinking */
+
+	rcu_barrier(); /* wait for the work to be queued */
+	flush_delayed_work(&park.work);
+
+	atomic_dec(&active);
+}
+
 void __exit i915_globals_exit(void)
 {
-	/* Flush any residual park_work */
-	atomic_inc(&epoch);
-	flush_rcu_work(&park.work);
+	GEM_BUG_ON(atomic_read(&active));
 
+	__i915_globals_flush();
 	__i915_globals_cleanup();
 
 	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index e284bd76fa86..4c1836f0a991 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -40,6 +40,8 @@
 #include "display/intel_overlay.h"
 
 #include "gem/i915_gem_context.h"
+#include "gem/i915_gem_lmem.h"
+#include "gt/intel_gt_pm.h"
 
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
@@ -231,13 +233,13 @@ static void pool_free(struct pagevec *pv, void *addr)
 
 #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
 
-struct compress {
+struct i915_vma_compress {
 	struct pagevec pool;
 	struct z_stream_s zstream;
 	void *tmp;
 };
 
-static bool compress_init(struct compress *c)
+static bool compress_init(struct i915_vma_compress *c)
 {
 	struct z_stream_s *zstream = &c->zstream;
 
@@ -259,7 +261,7 @@ static bool compress_init(struct compress *c)
 	return true;
 }
 
-static bool compress_start(struct compress *c)
+static bool compress_start(struct i915_vma_compress *c)
 {
 	struct z_stream_s *zstream = &c->zstream;
 	void *workspace = zstream->workspace;
@@ -270,8 +272,8 @@ static bool compress_start(struct compress *c)
 	return zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) == Z_OK;
 }
 
-static void *compress_next_page(struct compress *c,
-				struct drm_i915_error_object *dst)
+static void *compress_next_page(struct i915_vma_compress *c,
+				struct i915_vma_coredump *dst)
 {
 	void *page;
 
@@ -285,14 +287,15 @@ static void *compress_next_page(struct compress *c,
 	return dst->pages[dst->page_count++] = page;
 }
 
-static int compress_page(struct compress *c,
+static int compress_page(struct i915_vma_compress *c,
 			 void *src,
-			 struct drm_i915_error_object *dst)
+			 struct i915_vma_coredump *dst,
+			 bool wc)
 {
 	struct z_stream_s *zstream = &c->zstream;
 
 	zstream->next_in = src;
-	if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
+	if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
 		zstream->next_in = c->tmp;
 	zstream->avail_in = PAGE_SIZE;
 
@@ -316,8 +319,8 @@ static int compress_page(struct compress *c,
 	return 0;
 }
 
-static int compress_flush(struct compress *c,
-			  struct drm_i915_error_object *dst)
+static int compress_flush(struct i915_vma_compress *c,
+			  struct i915_vma_coredump *dst)
 {
 	struct z_stream_s *zstream = &c->zstream;
 
@@ -345,12 +348,12 @@ end:
 	return 0;
 }
 
-static void compress_finish(struct compress *c)
+static void compress_finish(struct i915_vma_compress *c)
 {
 	zlib_deflateEnd(&c->zstream);
 }
 
-static void compress_fini(struct compress *c)
+static void compress_fini(struct i915_vma_compress *c)
 {
 	kfree(c->zstream.workspace);
 	if (c->tmp)
@@ -365,23 +368,24 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
 
 #else
 
-struct compress {
+struct i915_vma_compress {
 	struct pagevec pool;
 };
 
-static bool compress_init(struct compress *c)
+static bool compress_init(struct i915_vma_compress *c)
 {
 	return pool_init(&c->pool, ALLOW_FAIL) == 0;
 }
 
-static bool compress_start(struct compress *c)
+static bool compress_start(struct i915_vma_compress *c)
 {
 	return true;
 }
 
-static int compress_page(struct compress *c,
+static int compress_page(struct i915_vma_compress *c,
 			 void *src,
-			 struct drm_i915_error_object *dst)
+			 struct i915_vma_coredump *dst,
+			 bool wc)
 {
 	void *ptr;
 
@@ -389,24 +393,24 @@ static int compress_page(struct compress *c,
 	if (!ptr)
 		return -ENOMEM;
 
-	if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE))
+	if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
 		memcpy(ptr, src, PAGE_SIZE);
 	dst->pages[dst->page_count++] = ptr;
 
 	return 0;
 }
 
-static int compress_flush(struct compress *c,
-			  struct drm_i915_error_object *dst)
+static int compress_flush(struct i915_vma_compress *c,
+			  struct i915_vma_coredump *dst)
 {
 	return 0;
 }
 
-static void compress_finish(struct compress *c)
+static void compress_finish(struct i915_vma_compress *c)
 {
 }
 
-static void compress_fini(struct compress *c)
+static void compress_fini(struct i915_vma_compress *c)
 {
 	pool_fini(&c->pool);
 }
@@ -419,8 +423,9 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
 #endif
 
 static void error_print_instdone(struct drm_i915_error_state_buf *m,
-				 const struct drm_i915_error_engine *ee)
+				 const struct intel_engine_coredump *ee)
 {
+	const struct sseu_dev_info *sseu = &RUNTIME_INFO(m->i915)->sseu;
 	int slice;
 	int subslice;
 
@@ -436,12 +441,12 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
 	if (INTEL_GEN(m->i915) <= 6)
 		return;
 
-	for_each_instdone_slice_subslice(m->i915, slice, subslice)
+	for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice)
 		err_printf(m, "  SAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
 			   slice, subslice,
 			   ee->instdone.sampler[slice][subslice]);
 
-	for_each_instdone_slice_subslice(m->i915, slice, subslice)
+	for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice)
 		err_printf(m, "  ROW_INSTDONE[%d][%d]: 0x%08x\n",
 			   slice, subslice,
 			   ee->instdone.row[slice][subslice]);
@@ -449,40 +454,56 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
 
 static void error_print_request(struct drm_i915_error_state_buf *m,
 				const char *prefix,
-				const struct drm_i915_error_request *erq,
-				const unsigned long epoch)
+				const struct i915_request_coredump *erq)
 {
 	if (!erq->seqno)
 		return;
 
-	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
+	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, start %08x, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->context, erq->seqno,
 		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 			    &erq->flags) ? "!" : "",
 		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 			    &erq->flags) ? "+" : "",
 		   erq->sched_attr.priority,
-		   jiffies_to_msecs(erq->jiffies - epoch),
 		   erq->start, erq->head, erq->tail);
 }
 
 static void error_print_context(struct drm_i915_error_state_buf *m,
 				const char *header,
-				const struct drm_i915_error_context *ctx)
+				const struct i915_gem_context_coredump *ctx)
 {
-	err_printf(m, "%s%s[%d] hw_id %d, prio %d, guilty %d active %d\n",
-		   header, ctx->comm, ctx->pid, ctx->hw_id,
-		   ctx->sched_attr.priority, ctx->guilty, ctx->active);
+	err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
+		   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
+		   ctx->guilty, ctx->active);
+}
+
+static struct i915_vma_coredump *
+__find_vma(struct i915_vma_coredump *vma, const char *name)
+{
+	while (vma) {
+		if (strcmp(vma->name, name) == 0)
+			return vma;
+		vma = vma->next;
+	}
+
+	return NULL;
+}
+
+static struct i915_vma_coredump *
+find_batch(const struct intel_engine_coredump *ee)
+{
+	return __find_vma(ee->vma, "batch");
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
-			       const struct drm_i915_error_engine *ee,
-			       const unsigned long epoch)
+			       const struct intel_engine_coredump *ee)
 {
+	struct i915_vma_coredump *batch;
 	int n;
 
 	err_printf(m, "%s command stream:\n", ee->engine->name);
-	err_printf(m, "  IDLE?: %s\n", yesno(ee->idle));
+	err_printf(m, "  CCID:  0x%08x\n", ee->ccid);
 	err_printf(m, "  START: 0x%08x\n", ee->start);
 	err_printf(m, "  HEAD:  0x%08x [0x%08x]\n", ee->head, ee->rq_head);
 	err_printf(m, "  TAIL:  0x%08x [0x%08x, 0x%08x]\n",
@@ -497,9 +518,10 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 
 	error_print_instdone(m, ee);
 
-	if (ee->batchbuffer) {
-		u64 start = ee->batchbuffer->gtt_offset;
-		u64 end = start + ee->batchbuffer->gtt_size;
+	batch = find_batch(ee);
+	if (batch) {
+		u64 start = batch->gtt_offset;
+		u64 end = start + batch->gtt_size;
 
 		err_printf(m, "  batch: [0x%08x_%08x, 0x%08x_%08x]\n",
 			   upper_32_bits(start), lower_32_bits(start),
@@ -531,17 +553,11 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 				   ee->vm_info.pp_dir_base);
 		}
 	}
-	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
-	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
-	err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",
-		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
-		   ee->hangcheck_timestamp,
-		   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	for (n = 0; n < ee->num_ports; n++) {
 		err_printf(m, "  ELSP[%d]:", n);
-		error_print_request(m, " ", &ee->execlist[n], epoch);
+		error_print_request(m, " ", &ee->execlist[n]);
 	}
 
 	error_print_context(m, "  Active context: ", &ee->context);
@@ -556,35 +572,35 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
-static void print_error_obj(struct drm_i915_error_state_buf *m,
+static void print_error_vma(struct drm_i915_error_state_buf *m,
 			    const struct intel_engine_cs *engine,
-			    const char *name,
-			    const struct drm_i915_error_object *obj)
+			    const struct i915_vma_coredump *vma)
 {
 	char out[ASCII85_BUFSZ];
 	int page;
 
-	if (!obj)
+	if (!vma)
 		return;
 
-	if (name) {
-		err_printf(m, "%s --- %s = 0x%08x %08x\n",
-			   engine ? engine->name : "global", name,
-			   upper_32_bits(obj->gtt_offset),
-			   lower_32_bits(obj->gtt_offset));
-	}
+	err_printf(m, "%s --- %s = 0x%08x %08x\n",
+		   engine ? engine->name : "global", vma->name,
+		   upper_32_bits(vma->gtt_offset),
+		   lower_32_bits(vma->gtt_offset));
+
+	if (vma->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K)
+		err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes);
 
 	err_compression_marker(m);
-	for (page = 0; page < obj->page_count; page++) {
+	for (page = 0; page < vma->page_count; page++) {
 		int i, len;
 
 		len = PAGE_SIZE;
-		if (page == obj->page_count - 1)
-			len -= obj->unused;
+		if (page == vma->page_count - 1)
+			len -= vma->unused;
 		len = ascii85_encode_len(len);
 
 		for (i = 0; i < len; i++)
-			err_puts(m, ascii85_encode(obj->pages[page][i], out));
+			err_puts(m, ascii85_encode(vma->pages[page][i], out));
 	}
 	err_puts(m, "\n");
 }
@@ -596,9 +612,10 @@ static void err_print_capabilities(struct drm_i915_error_state_buf *m,
 {
 	struct drm_printer p = i915_error_printer(m);
 
-	intel_device_info_dump_flags(info, &p);
+	intel_device_info_print_static(info, &p);
+	intel_device_info_print_runtime(runtime, &p);
+	intel_device_info_print_topology(&runtime->sseu, &p);
 	intel_driver_caps_print(caps, &p);
-	intel_device_info_dump_topology(&runtime->sseu, &p);
 }
 
 static void err_print_params(struct drm_i915_error_state_buf *m,
@@ -622,18 +639,13 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m,
 }
 
 static void err_print_uc(struct drm_i915_error_state_buf *m,
-			 const struct i915_error_uc *error_uc)
+			 const struct intel_uc_coredump *error_uc)
 {
 	struct drm_printer p = i915_error_printer(m);
-	const struct i915_gpu_state *error =
-		container_of(error_uc, typeof(*error), uc);
-
-	if (!error->device_info.has_gt_uc)
-		return;
 
 	intel_uc_fw_dump(&error_uc->guc_fw, &p);
 	intel_uc_fw_dump(&error_uc->huc_fw, &p);
-	print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log);
+	print_error_vma(m, NULL, error_uc->guc_log);
 }
 
 static void err_free_sgl(struct scatterlist *sgl)
@@ -653,12 +665,69 @@ static void err_free_sgl(struct scatterlist *sgl)
 	}
 }
 
+static void err_print_gt(struct drm_i915_error_state_buf *m,
+			 struct intel_gt_coredump *gt)
+{
+	const struct intel_engine_coredump *ee;
+	int i;
+
+	err_printf(m, "GT awake: %s\n", yesno(gt->awake));
+	err_printf(m, "EIR: 0x%08x\n", gt->eir);
+	err_printf(m, "IER: 0x%08x\n", gt->ier);
+	for (i = 0; i < gt->ngtier; i++)
+		err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]);
+	err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
+	err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake);
+	err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr);
+
+	for (i = 0; i < gt->nfence; i++)
+		err_printf(m, "  fence[%d] = %08llx\n", i, gt->fence[i]);
+
+	if (IS_GEN_RANGE(m->i915, 6, 11)) {
+		err_printf(m, "ERROR: 0x%08x\n", gt->error);
+		err_printf(m, "DONE_REG: 0x%08x\n", gt->done_reg);
+	}
+
+	if (INTEL_GEN(m->i915) >= 8)
+		err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
+			   gt->fault_data1, gt->fault_data0);
+
+	if (IS_GEN(m->i915, 7))
+		err_printf(m, "ERR_INT: 0x%08x\n", gt->err_int);
+
+	if (IS_GEN_RANGE(m->i915, 8, 11))
+		err_printf(m, "GTT_CACHE_EN: 0x%08x\n", gt->gtt_cache);
+
+	if (IS_GEN(m->i915, 12))
+		err_printf(m, "AUX_ERR_DBG: 0x%08x\n", gt->aux_err);
+
+	if (INTEL_GEN(m->i915) >= 12) {
+		int i;
+
+		for (i = 0; i < GEN12_SFC_DONE_MAX; i++)
+			err_printf(m, "  SFC_DONE[%d]: 0x%08x\n", i,
+				   gt->sfc_done[i]);
+
+		err_printf(m, "  GAM_DONE: 0x%08x\n", gt->gam_done);
+	}
+
+	for (ee = gt->engine; ee; ee = ee->next) {
+		const struct i915_vma_coredump *vma;
+
+		error_print_engine(m, ee);
+		for (vma = ee->vma; vma; vma = vma->next)
+			print_error_vma(m, ee->engine, vma);
+	}
+
+	if (gt->uc)
+		err_print_uc(m, gt->uc);
+}
+
 static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
-			       struct i915_gpu_state *error)
+			       struct i915_gpu_coredump *error)
 {
-	const struct drm_i915_error_engine *ee;
+	const struct intel_engine_coredump *ee;
 	struct timespec64 ts;
-	int i, j;
 
 	if (*error->error_msg)
 		err_printf(m, "%s\n", error->error_msg);
@@ -675,13 +744,10 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	ts = ktime_to_timespec64(error->uptime);
 	err_printf(m, "Uptime: %lld s %ld us\n",
 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
-	err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
-	err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
-		   error->capture,
-		   jiffies_to_msecs(jiffies - error->capture),
-		   jiffies_to_msecs(error->capture - error->epoch));
+	err_printf(m, "Capture: %lu jiffies; %d ms ago\n",
+		   error->capture, jiffies_to_msecs(jiffies - error->capture));
 
-	for (ee = error->engine; ee; ee = ee->next)
+	for (ee = error->gt ? error->gt->engine : NULL; ee; ee = ee->next)
 		err_printf(m, "Active process (on ring %s): %s [%d]\n",
 			   ee->engine->name,
 			   ee->context.comm,
@@ -707,74 +773,11 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 			   CSR_VERSION_MINOR(csr->version));
 	}
 
-	err_printf(m, "GT awake: %s\n", yesno(error->awake));
 	err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock));
 	err_printf(m, "PM suspended: %s\n", yesno(error->suspended));
-	err_printf(m, "EIR: 0x%08x\n", error->eir);
-	err_printf(m, "IER: 0x%08x\n", error->ier);
-	for (i = 0; i < error->ngtier; i++)
-		err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]);
-	err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
-	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
-	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
-	err_printf(m, "CCID: 0x%08x\n", error->ccid);
-
-	for (i = 0; i < error->nfence; i++)
-		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
 
-	if (IS_GEN_RANGE(m->i915, 6, 11)) {
-		err_printf(m, "ERROR: 0x%08x\n", error->error);
-		err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
-	}
-
-	if (INTEL_GEN(m->i915) >= 8)
-		err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
-			   error->fault_data1, error->fault_data0);
-
-	if (IS_GEN(m->i915, 7))
-		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
-
-	for (ee = error->engine; ee; ee = ee->next)
-		error_print_engine(m, ee, error->epoch);
-
-	for (ee = error->engine; ee; ee = ee->next) {
-		const struct drm_i915_error_object *obj;
-
-		obj = ee->batchbuffer;
-		if (obj) {
-			err_puts(m, ee->engine->name);
-			if (ee->context.pid)
-				err_printf(m, " (submitted by %s [%d])",
-					   ee->context.comm,
-					   ee->context.pid);
-			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
-				   upper_32_bits(obj->gtt_offset),
-				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, ee->engine, NULL, obj);
-		}
-
-		for (j = 0; j < ee->user_bo_count; j++)
-			print_error_obj(m, ee->engine, "user", ee->user_bo[j]);
-
-		if (ee->num_requests) {
-			err_printf(m, "%s --- %d requests\n",
-				   ee->engine->name,
-				   ee->num_requests);
-			for (j = 0; j < ee->num_requests; j++)
-				error_print_request(m, " ",
-						    &ee->requests[j],
-						    error->epoch);
-		}
-
-		print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer);
-		print_error_obj(m, ee->engine, "HW Status", ee->hws_page);
-		print_error_obj(m, ee->engine, "HW context", ee->ctx);
-		print_error_obj(m, ee->engine, "WA context", ee->wa_ctx);
-		print_error_obj(m, ee->engine,
-				"WA batchbuffer", ee->wa_batchbuffer);
-		print_error_obj(m, ee->engine,
-				"NULL context", ee->default_state);
-	}
+	if (error->gt)
+		err_print_gt(m, error->gt);
 
 	if (error->overlay)
 		intel_overlay_print_error_state(m, error->overlay);
@@ -785,10 +788,9 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	err_print_capabilities(m, &error->device_info, &error->runtime_info,
 			       &error->driver_caps);
 	err_print_params(m, &error->params);
-	err_print_uc(m, &error->uc);
 }
 
-static int err_print_to_sgl(struct i915_gpu_state *error)
+static int err_print_to_sgl(struct i915_gpu_coredump *error)
 {
 	struct drm_i915_error_state_buf m;
 
@@ -825,8 +827,8 @@ static int err_print_to_sgl(struct i915_gpu_state *error)
 	return 0;
 }
 
-ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
-				      char *buf, loff_t off, size_t rem)
+ssize_t i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
+					 char *buf, loff_t off, size_t rem)
 {
 	struct scatterlist *sg;
 	size_t count;
@@ -889,86 +891,88 @@ ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
 	return count;
 }
 
-static void i915_error_object_free(struct drm_i915_error_object *obj)
+static void i915_vma_coredump_free(struct i915_vma_coredump *vma)
 {
-	int page;
+	while (vma) {
+		struct i915_vma_coredump *next = vma->next;
+		int page;
 
-	if (obj == NULL)
-		return;
+		for (page = 0; page < vma->page_count; page++)
+			free_page((unsigned long)vma->pages[page]);
 
-	for (page = 0; page < obj->page_count; page++)
-		free_page((unsigned long)obj->pages[page]);
-
-	kfree(obj);
+		kfree(vma);
+		vma = next;
+	}
 }
 
-
-static void cleanup_params(struct i915_gpu_state *error)
+static void cleanup_params(struct i915_gpu_coredump *error)
 {
 	i915_params_free(&error->params);
 }
 
-static void cleanup_uc_state(struct i915_gpu_state *error)
+static void cleanup_uc(struct intel_uc_coredump *uc)
 {
-	struct i915_error_uc *error_uc = &error->uc;
+	kfree(uc->guc_fw.path);
+	kfree(uc->huc_fw.path);
+	i915_vma_coredump_free(uc->guc_log);
 
-	kfree(error_uc->guc_fw.path);
-	kfree(error_uc->huc_fw.path);
-	i915_error_object_free(error_uc->guc_log);
+	kfree(uc);
 }
 
-void __i915_gpu_state_free(struct kref *error_ref)
+static void cleanup_gt(struct intel_gt_coredump *gt)
 {
-	struct i915_gpu_state *error =
-		container_of(error_ref, typeof(*error), ref);
-	long i;
+	while (gt->engine) {
+		struct intel_engine_coredump *ee = gt->engine;
+
+		gt->engine = ee->next;
+
+		i915_vma_coredump_free(ee->vma);
+		kfree(ee);
+	}
 
-	while (error->engine) {
-		struct drm_i915_error_engine *ee = error->engine;
+	if (gt->uc)
+		cleanup_uc(gt->uc);
 
-		error->engine = ee->next;
+	kfree(gt);
+}
 
-		for (i = 0; i < ee->user_bo_count; i++)
-			i915_error_object_free(ee->user_bo[i]);
-		kfree(ee->user_bo);
+void __i915_gpu_coredump_free(struct kref *error_ref)
+{
+	struct i915_gpu_coredump *error =
+		container_of(error_ref, typeof(*error), ref);
 
-		i915_error_object_free(ee->batchbuffer);
-		i915_error_object_free(ee->wa_batchbuffer);
-		i915_error_object_free(ee->ringbuffer);
-		i915_error_object_free(ee->hws_page);
-		i915_error_object_free(ee->ctx);
-		i915_error_object_free(ee->wa_ctx);
+	while (error->gt) {
+		struct intel_gt_coredump *gt = error->gt;
 
-		kfree(ee->requests);
-		kfree(ee);
+		error->gt = gt->next;
+		cleanup_gt(gt);
 	}
 
 	kfree(error->overlay);
 	kfree(error->display);
 
 	cleanup_params(error);
-	cleanup_uc_state(error);
 
 	err_free_sgl(error->sgl);
 	kfree(error);
 }
 
-static struct drm_i915_error_object *
-i915_error_object_create(struct drm_i915_private *i915,
-			 struct i915_vma *vma,
-			 struct compress *compress)
+static struct i915_vma_coredump *
+i915_vma_coredump_create(const struct intel_gt *gt,
+			 const struct i915_vma *vma,
+			 const char *name,
+			 struct i915_vma_compress *compress)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = gt->ggtt;
 	const u64 slot = ggtt->error_capture.start;
-	struct drm_i915_error_object *dst;
+	struct i915_vma_coredump *dst;
 	unsigned long num_pages;
 	struct sgt_iter iter;
-	dma_addr_t dma;
 	int ret;
 
 	might_sleep();
 
-	if (!vma || !vma->pages)
+	if (!vma || !vma->pages || !compress)
 		return NULL;
 
 	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
@@ -982,23 +986,66 @@ i915_error_object_create(struct drm_i915_private *i915,
 		return NULL;
 	}
 
+	strcpy(dst->name, name);
+	dst->next = NULL;
+
 	dst->gtt_offset = vma->node.start;
 	dst->gtt_size = vma->node.size;
+	dst->gtt_page_sizes = vma->page_sizes.gtt;
 	dst->num_pages = num_pages;
 	dst->page_count = 0;
 	dst->unused = 0;
 
 	ret = -EINVAL;
-	for_each_sgt_dma(dma, iter, vma->pages) {
+	if (drm_mm_node_allocated(&ggtt->error_capture)) {
 		void __iomem *s;
+		dma_addr_t dma;
+
+		for_each_sgt_daddr(dma, iter, vma->pages) {
+			ggtt->vm.insert_page(&ggtt->vm, dma, slot,
+					     I915_CACHE_NONE, 0);
+			mb();
+
+			s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE);
+			ret = compress_page(compress,
+					    (void  __force *)s, dst,
+					    true);
+			io_mapping_unmap(s);
+			if (ret)
+				break;
+		}
+	} else if (i915_gem_object_is_lmem(vma->obj)) {
+		struct intel_memory_region *mem = vma->obj->mm.region;
+		dma_addr_t dma;
+
+		for_each_sgt_daddr(dma, iter, vma->pages) {
+			void __iomem *s;
+
+			s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE);
+			ret = compress_page(compress,
+					    (void __force *)s, dst,
+					    true);
+			io_mapping_unmap(s);
+			if (ret)
+				break;
+		}
+	} else {
+		struct page *page;
 
-		ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0);
+		for_each_sgt_page(page, iter, vma->pages) {
+			void *s;
 
-		s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE);
-		ret = compress_page(compress, (void  __force *)s, dst);
-		io_mapping_unmap(s);
-		if (ret)
-			break;
+			drm_clflush_pages(&page, 1);
+
+			s = kmap(page);
+			ret = compress_page(compress, s, dst, false);
+			kunmap(page);
+
+			drm_clflush_pages(&page, 1);
+
+			if (ret)
+				break;
+		}
 	}
 
 	if (ret || compress_flush(compress, dst)) {
@@ -1012,77 +1059,56 @@ i915_error_object_create(struct drm_i915_private *i915,
 	return dst;
 }
 
-/*
- * Generate a semi-unique error code. The code is not meant to have meaning, The
- * code's only purpose is to try to prevent false duplicated bug reports by
- * grossly estimating a GPU error state.
- *
- * TODO Ideally, hashing the batchbuffer would be a very nice way to determine
- * the hang if we could strip the GTT offset information from it.
- *
- * It's only a small step better than a random number in its current form.
- */
-static u32 i915_error_generate_code(struct i915_gpu_state *error)
-{
-	const struct drm_i915_error_engine *ee = error->engine;
-
-	/*
-	 * IPEHR would be an ideal way to detect errors, as it's the gross
-	 * measure of "the command that hung." However, has some very common
-	 * synchronization commands which almost always appear in the case
-	 * strictly a client bug. Use instdone to differentiate those some.
-	 */
-	return ee ? ee->ipehr ^ ee->instdone.instdone : 0;
-}
-
-static void gem_record_fences(struct i915_gpu_state *error)
+static void gt_record_fences(struct intel_gt_coredump *gt)
 {
-	struct drm_i915_private *dev_priv = error->i915;
-	struct intel_uncore *uncore = &dev_priv->uncore;
+	struct i915_ggtt *ggtt = gt->_gt->ggtt;
+	struct intel_uncore *uncore = gt->_gt->uncore;
 	int i;
 
-	if (INTEL_GEN(dev_priv) >= 6) {
-		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
-			error->fence[i] =
+	if (INTEL_GEN(uncore->i915) >= 6) {
+		for (i = 0; i < ggtt->num_fences; i++)
+			gt->fence[i] =
 				intel_uncore_read64(uncore,
 						    FENCE_REG_GEN6_LO(i));
-	} else if (INTEL_GEN(dev_priv) >= 4) {
-		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
-			error->fence[i] =
+	} else if (INTEL_GEN(uncore->i915) >= 4) {
+		for (i = 0; i < ggtt->num_fences; i++)
+			gt->fence[i] =
 				intel_uncore_read64(uncore,
 						    FENCE_REG_965_LO(i));
 	} else {
-		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
-			error->fence[i] =
+		for (i = 0; i < ggtt->num_fences; i++)
+			gt->fence[i] =
 				intel_uncore_read(uncore, FENCE_REG(i));
 	}
-	error->nfence = i;
+	gt->nfence = i;
 }
 
-static void error_record_engine_registers(struct i915_gpu_state *error,
-					  struct intel_engine_cs *engine,
-					  struct drm_i915_error_engine *ee)
+static void engine_record_registers(struct intel_engine_coredump *ee)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
+	const struct intel_engine_cs *engine = ee->engine;
+	struct drm_i915_private *i915 = engine->i915;
 
-	if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(i915) >= 6) {
 		ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL);
 
-		if (INTEL_GEN(dev_priv) >= 12)
-			ee->fault_reg = I915_READ(GEN12_RING_FAULT_REG);
-		else if (INTEL_GEN(dev_priv) >= 8)
-			ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG);
+		if (INTEL_GEN(i915) >= 12)
+			ee->fault_reg = intel_uncore_read(engine->uncore,
+							  GEN12_RING_FAULT_REG);
+		else if (INTEL_GEN(i915) >= 8)
+			ee->fault_reg = intel_uncore_read(engine->uncore,
+							  GEN8_RING_FAULT_REG);
 		else
 			ee->fault_reg = GEN6_RING_FAULT_REG_READ(engine);
 	}
 
-	if (INTEL_GEN(dev_priv) >= 4) {
+	if (INTEL_GEN(i915) >= 4) {
 		ee->faddr = ENGINE_READ(engine, RING_DMA_FADD);
 		ee->ipeir = ENGINE_READ(engine, RING_IPEIR);
 		ee->ipehr = ENGINE_READ(engine, RING_IPEHR);
 		ee->instps = ENGINE_READ(engine, RING_INSTPS);
 		ee->bbaddr = ENGINE_READ(engine, RING_BBADDR);
-		if (INTEL_GEN(dev_priv) >= 8) {
+		ee->ccid = ENGINE_READ(engine, CCID);
+		if (INTEL_GEN(i915) >= 8) {
 			ee->faddr |= (u64)ENGINE_READ(engine, RING_DMA_FADD_UDW) << 32;
 			ee->bbaddr |= (u64)ENGINE_READ(engine, RING_BBADDR_UDW) << 32;
 		}
@@ -1101,13 +1127,13 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	ee->head = ENGINE_READ(engine, RING_HEAD);
 	ee->tail = ENGINE_READ(engine, RING_TAIL);
 	ee->ctl = ENGINE_READ(engine, RING_CTL);
-	if (INTEL_GEN(dev_priv) > 2)
+	if (INTEL_GEN(i915) > 2)
 		ee->mode = ENGINE_READ(engine, RING_MI_MODE);
 
-	if (!HWS_NEEDS_PHYSICAL(dev_priv)) {
+	if (!HWS_NEEDS_PHYSICAL(i915)) {
 		i915_reg_t mmio;
 
-		if (IS_GEN(dev_priv, 7)) {
+		if (IS_GEN(i915, 7)) {
 			switch (engine->id) {
 			default:
 				MISSING_CASE(engine->id);
@@ -1132,112 +1158,63 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 			mmio = RING_HWS_PGA(engine->mmio_base);
 		}
 
-		ee->hws = I915_READ(mmio);
+		ee->hws = intel_uncore_read(engine->uncore, mmio);
 	}
 
-	ee->idle = intel_engine_is_idle(engine);
-	if (!ee->idle)
-		ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
-	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
-						  engine);
+	ee->reset_count = i915_reset_engine_count(&i915->gpu_error, engine);
 
-	if (HAS_PPGTT(dev_priv)) {
+	if (HAS_PPGTT(i915)) {
 		int i;
 
 		ee->vm_info.gfx_mode = ENGINE_READ(engine, RING_MODE_GEN7);
 
-		if (IS_GEN(dev_priv, 6)) {
+		if (IS_GEN(i915, 6)) {
 			ee->vm_info.pp_dir_base =
 				ENGINE_READ(engine, RING_PP_DIR_BASE_READ);
-		} else if (IS_GEN(dev_priv, 7)) {
+		} else if (IS_GEN(i915, 7)) {
 			ee->vm_info.pp_dir_base =
 				ENGINE_READ(engine, RING_PP_DIR_BASE);
-		} else if (INTEL_GEN(dev_priv) >= 8) {
+		} else if (INTEL_GEN(i915) >= 8) {
 			u32 base = engine->mmio_base;
 
 			for (i = 0; i < 4; i++) {
 				ee->vm_info.pdp[i] =
-					I915_READ(GEN8_RING_PDP_UDW(base, i));
+					intel_uncore_read(engine->uncore,
+							  GEN8_RING_PDP_UDW(base, i));
 				ee->vm_info.pdp[i] <<= 32;
 				ee->vm_info.pdp[i] |=
-					I915_READ(GEN8_RING_PDP_LDW(base, i));
+					intel_uncore_read(engine->uncore,
+							  GEN8_RING_PDP_LDW(base, i));
 			}
 		}
 	}
 }
 
 static void record_request(const struct i915_request *request,
-			   struct drm_i915_error_request *erq)
+			   struct i915_request_coredump *erq)
 {
-	const struct i915_gem_context *ctx = request->gem_context;
+	const struct i915_gem_context *ctx;
 
 	erq->flags = request->fence.flags;
 	erq->context = request->fence.context;
 	erq->seqno = request->fence.seqno;
 	erq->sched_attr = request->sched.attr;
-	erq->jiffies = request->emitted_jiffies;
 	erq->start = i915_ggtt_offset(request->ring->vma);
 	erq->head = request->head;
 	erq->tail = request->tail;
 
+	erq->pid = 0;
 	rcu_read_lock();
-	erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0;
+	ctx = rcu_dereference(request->context->gem_context);
+	if (ctx)
+		erq->pid = pid_nr(ctx->pid);
 	rcu_read_unlock();
 }
 
-static void engine_record_requests(struct intel_engine_cs *engine,
-				   struct i915_request *first,
-				   struct drm_i915_error_engine *ee)
+static void engine_record_execlists(struct intel_engine_coredump *ee)
 {
-	struct i915_request *request;
-	int count;
-
-	count = 0;
-	request = first;
-	list_for_each_entry_from(request, &engine->active.requests, sched.link)
-		count++;
-	if (!count)
-		return;
-
-	ee->requests = kcalloc(count, sizeof(*ee->requests), ATOMIC_MAYFAIL);
-	if (!ee->requests)
-		return;
-
-	ee->num_requests = count;
-
-	count = 0;
-	request = first;
-	list_for_each_entry_from(request,
-				 &engine->active.requests, sched.link) {
-		if (count >= ee->num_requests) {
-			/*
-			 * If the ring request list was changed in
-			 * between the point where the error request
-			 * list was created and dimensioned and this
-			 * point then just exit early to avoid crashes.
-			 *
-			 * We don't need to communicate that the
-			 * request list changed state during error
-			 * state capture and that the error state is
-			 * slightly incorrect as a consequence since we
-			 * are typically only interested in the request
-			 * list state at the point of error state
-			 * capture, not in any changes happening during
-			 * the capture.
-			 */
-			break;
-		}
-
-		record_request(request, &ee->requests[count++]);
-	}
-	ee->num_requests = count;
-}
-
-static void error_record_engine_execlists(const struct intel_engine_cs *engine,
-					  struct drm_i915_error_engine *ee)
-{
-	const struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct i915_request * const *port = execlists->active;
+	const struct intel_engine_execlists * const el = &ee->engine->execlists;
+	struct i915_request * const *port = el->active;
 	unsigned int n = 0;
 
 	while (*port)
@@ -1246,104 +1223,90 @@ static void error_record_engine_execlists(const struct intel_engine_cs *engine,
 	ee->num_ports = n;
 }
 
-static bool record_context(struct drm_i915_error_context *e,
+static bool record_context(struct i915_gem_context_coredump *e,
 			   const struct i915_request *rq)
 {
-	const struct i915_gem_context *ctx = rq->gem_context;
+	struct i915_gem_context *ctx;
+	struct task_struct *task;
+	bool capture;
 
-	if (ctx->pid) {
-		struct task_struct *task;
+	rcu_read_lock();
+	ctx = rcu_dereference(rq->context->gem_context);
+	if (ctx && !kref_get_unless_zero(&ctx->ref))
+		ctx = NULL;
+	rcu_read_unlock();
+	if (!ctx)
+		return false;
 
-		rcu_read_lock();
-		task = pid_task(ctx->pid, PIDTYPE_PID);
-		if (task) {
-			strcpy(e->comm, task->comm);
-			e->pid = task->pid;
-		}
-		rcu_read_unlock();
+	rcu_read_lock();
+	task = pid_task(ctx->pid, PIDTYPE_PID);
+	if (task) {
+		strcpy(e->comm, task->comm);
+		e->pid = task->pid;
 	}
+	rcu_read_unlock();
 
-	e->hw_id = ctx->hw_id;
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
 
-	return i915_gem_context_no_error_capture(ctx);
+	capture = i915_gem_context_no_error_capture(ctx);
+
+	i915_gem_context_put(ctx);
+	return capture;
 }
 
-struct capture_vma {
-	struct capture_vma *next;
-	void **slot;
+struct intel_engine_capture_vma {
+	struct intel_engine_capture_vma *next;
+	struct i915_vma *vma;
+	char name[16];
 };
 
-static struct capture_vma *
-capture_vma(struct capture_vma *next,
+static struct intel_engine_capture_vma *
+capture_vma(struct intel_engine_capture_vma *next,
 	    struct i915_vma *vma,
-	    struct drm_i915_error_object **out)
+	    const char *name,
+	    gfp_t gfp)
 {
-	struct capture_vma *c;
+	struct intel_engine_capture_vma *c;
 
-	*out = NULL;
 	if (!vma)
 		return next;
 
-	c = kmalloc(sizeof(*c), ATOMIC_MAYFAIL);
+	c = kmalloc(sizeof(*c), gfp);
 	if (!c)
 		return next;
 
-	if (!i915_active_trygrab(&vma->active)) {
+	if (!i915_active_acquire_if_busy(&vma->active)) {
 		kfree(c);
 		return next;
 	}
 
-	c->slot = (void **)out;
-	*c->slot = i915_vma_get(vma);
+	strcpy(c->name, name);
+	c->vma = i915_vma_get(vma);
 
 	c->next = next;
 	return c;
 }
 
-static struct capture_vma *
-request_record_user_bo(struct i915_request *request,
-		       struct drm_i915_error_engine *ee,
-		       struct capture_vma *capture)
+static struct intel_engine_capture_vma *
+capture_user(struct intel_engine_capture_vma *capture,
+	     const struct i915_request *rq,
+	     gfp_t gfp)
 {
 	struct i915_capture_list *c;
-	struct drm_i915_error_object **bo;
-	long count, max;
-
-	max = 0;
-	for (c = request->capture_list; c; c = c->next)
-		max++;
-	if (!max)
-		return capture;
-
-	bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
-	if (!bo) {
-		/* If we can't capture everything, try to capture something. */
-		max = min_t(long, max, PAGE_SIZE / sizeof(*bo));
-		bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
-	}
-	if (!bo)
-		return capture;
-
-	count = 0;
-	for (c = request->capture_list; c; c = c->next) {
-		capture = capture_vma(capture, c->vma, &bo[count]);
-		if (++count == max)
-			break;
-	}
 
-	ee->user_bo = bo;
-	ee->user_bo_count = count;
+	for (c = rq->capture_list; c; c = c->next)
+		capture = capture_vma(capture, c->vma, "user", gfp);
 
 	return capture;
 }
 
-static struct drm_i915_error_object *
-capture_object(struct drm_i915_private *dev_priv,
+static struct i915_vma_coredump *
+capture_object(const struct intel_gt *gt,
 	       struct drm_i915_gem_object *obj,
-	       struct compress *compress)
+	       const char *name,
+	       struct i915_vma_compress *compress)
 {
 	if (obj && i915_gem_object_has_pages(obj)) {
 		struct i915_vma fake = {
@@ -1353,127 +1316,175 @@ capture_object(struct drm_i915_private *dev_priv,
 			.obj = obj,
 		};
 
-		return i915_error_object_create(dev_priv, &fake, compress);
+		return i915_vma_coredump_create(gt, &fake, name, compress);
 	} else {
 		return NULL;
 	}
 }
 
-static void
-gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
+static void add_vma(struct intel_engine_coredump *ee,
+		    struct i915_vma_coredump *vma)
 {
-	struct drm_i915_private *i915 = error->i915;
-	struct intel_engine_cs *engine;
-	struct drm_i915_error_engine *ee;
+	if (vma) {
+		vma->next = ee->vma;
+		ee->vma = vma;
+	}
+}
 
-	ee = kzalloc(sizeof(*ee), GFP_KERNEL);
+struct intel_engine_coredump *
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+{
+	struct intel_engine_coredump *ee;
+
+	ee = kzalloc(sizeof(*ee), gfp);
 	if (!ee)
-		return;
+		return NULL;
 
-	for_each_uabi_engine(engine, i915) {
-		struct capture_vma *capture = NULL;
-		struct i915_request *request;
-		unsigned long flags;
+	ee->engine = engine;
 
-		/* Refill our page pool before entering atomic section */
-		pool_refill(&compress->pool, ALLOW_FAIL);
+	engine_record_registers(ee);
+	engine_record_execlists(ee);
 
-		spin_lock_irqsave(&engine->active.lock, flags);
-		request = intel_engine_find_active_request(engine);
-		if (!request) {
-			spin_unlock_irqrestore(&engine->active.lock, flags);
-			continue;
-		}
+	return ee;
+}
 
-		error->simulated |= record_context(&ee->context, request);
+struct intel_engine_capture_vma *
+intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
+				  struct i915_request *rq,
+				  gfp_t gfp)
+{
+	struct intel_engine_capture_vma *vma = NULL;
 
-		/*
-		 * We need to copy these to an anonymous buffer
-		 * as the simplest method to avoid being overwritten
-		 * by userspace.
-		 */
-		capture = capture_vma(capture,
-				      request->batch,
-				      &ee->batchbuffer);
+	ee->simulated |= record_context(&ee->context, rq);
+	if (ee->simulated)
+		return NULL;
 
-		if (HAS_BROKEN_CS_TLB(i915))
-			capture = capture_vma(capture,
-					      engine->gt->scratch,
-					      &ee->wa_batchbuffer);
+	/*
+	 * We need to copy these to an anonymous buffer
+	 * as the simplest method to avoid being overwritten
+	 * by userspace.
+	 */
+	vma = capture_vma(vma, rq->batch, "batch", gfp);
+	vma = capture_user(vma, rq, gfp);
+	vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
+	vma = capture_vma(vma, rq->context->state, "HW context", gfp);
 
-		capture = request_record_user_bo(request, ee, capture);
+	ee->rq_head = rq->head;
+	ee->rq_post = rq->postfix;
+	ee->rq_tail = rq->tail;
 
-		capture = capture_vma(capture,
-				      request->hw_context->state,
-				      &ee->ctx);
+	return vma;
+}
 
-		capture = capture_vma(capture,
-				      request->ring->vma,
-				      &ee->ringbuffer);
+void
+intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
+			      struct intel_engine_capture_vma *capture,
+			      struct i915_vma_compress *compress)
+{
+	const struct intel_engine_cs *engine = ee->engine;
 
-		ee->cpu_ring_head = request->ring->head;
-		ee->cpu_ring_tail = request->ring->tail;
+	while (capture) {
+		struct intel_engine_capture_vma *this = capture;
+		struct i915_vma *vma = this->vma;
 
-		ee->rq_head = request->head;
-		ee->rq_post = request->postfix;
-		ee->rq_tail = request->tail;
+		add_vma(ee,
+			i915_vma_coredump_create(engine->gt,
+						 vma, this->name,
+						 compress));
 
-		engine_record_requests(engine, request, ee);
-		spin_unlock_irqrestore(&engine->active.lock, flags);
+		i915_active_release(&vma->active);
+		i915_vma_put(vma);
 
-		error_record_engine_registers(error, engine, ee);
-		error_record_engine_execlists(engine, ee);
+		capture = this->next;
+		kfree(this);
+	}
 
-		while (capture) {
-			struct capture_vma *this = capture;
-			struct i915_vma *vma = *this->slot;
+	add_vma(ee,
+		i915_vma_coredump_create(engine->gt,
+					 engine->status_page.vma,
+					 "HW Status",
+					 compress));
 
-			*this->slot =
-				i915_error_object_create(i915, vma, compress);
+	add_vma(ee,
+		i915_vma_coredump_create(engine->gt,
+					 engine->wa_ctx.vma,
+					 "WA context",
+					 compress));
 
-			i915_active_ungrab(&vma->active);
-			i915_vma_put(vma);
+	add_vma(ee,
+		capture_object(engine->gt,
+			       engine->default_state,
+			       "NULL context",
+			       compress));
+}
 
-			capture = this->next;
-			kfree(this);
-		}
+static struct intel_engine_coredump *
+capture_engine(struct intel_engine_cs *engine,
+	       struct i915_vma_compress *compress)
+{
+	struct intel_engine_capture_vma *capture = NULL;
+	struct intel_engine_coredump *ee;
+	struct i915_request *rq;
+	unsigned long flags;
 
-		ee->hws_page =
-			i915_error_object_create(i915,
-						 engine->status_page.vma,
-						 compress);
+	ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
+	if (!ee)
+		return NULL;
 
-		ee->wa_ctx =
-			i915_error_object_create(i915,
-						 engine->wa_ctx.vma,
-						 compress);
+	spin_lock_irqsave(&engine->active.lock, flags);
+	rq = intel_engine_find_active_request(engine);
+	if (rq)
+		capture = intel_engine_coredump_add_request(ee, rq,
+							    ATOMIC_MAYFAIL);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
+	if (!capture) {
+		kfree(ee);
+		return NULL;
+	}
+
+	intel_engine_coredump_add_vma(ee, capture, compress);
+
+	return ee;
+}
 
-		ee->default_state =
-			capture_object(i915, engine->default_state, compress);
+static void
+gt_record_engines(struct intel_gt_coredump *gt,
+		  struct i915_vma_compress *compress)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
-		ee->engine = engine;
+	for_each_engine(engine, gt->_gt, id) {
+		struct intel_engine_coredump *ee;
 
-		ee->next = error->engine;
-		error->engine = ee;
+		/* Refill our page pool before entering atomic section */
+		pool_refill(&compress->pool, ALLOW_FAIL);
 
-		ee = kzalloc(sizeof(*ee), GFP_KERNEL);
+		ee = capture_engine(engine, compress);
 		if (!ee)
-			return;
-	}
+			continue;
+
+		gt->simulated |= ee->simulated;
+		if (ee->simulated) {
+			kfree(ee);
+			continue;
+		}
 
-	kfree(ee);
+		ee->next = gt->engine;
+		gt->engine = ee;
+	}
 }
 
-static void
-capture_uc_state(struct i915_gpu_state *error, struct compress *compress)
+static struct intel_uc_coredump *
+gt_record_uc(struct intel_gt_coredump *gt,
+	     struct i915_vma_compress *compress)
 {
-	struct drm_i915_private *i915 = error->i915;
-	struct i915_error_uc *error_uc = &error->uc;
-	struct intel_uc *uc = &i915->gt.uc;
+	const struct intel_uc *uc = &gt->_gt->uc;
+	struct intel_uc_coredump *error_uc;
 
-	/* Capturing uC state won't be useful if there is no GuC */
-	if (!error->device_info.has_gt_uc)
-		return;
+	error_uc = kzalloc(sizeof(*error_uc), ALLOW_FAIL);
+	if (!error_uc)
+		return NULL;
 
 	memcpy(&error_uc->guc_fw, &uc->guc.fw, sizeof(uc->guc.fw));
 	memcpy(&error_uc->huc_fw, &uc->huc.fw, sizeof(uc->huc.fw));
@@ -1484,19 +1495,42 @@ capture_uc_state(struct i915_gpu_state *error, struct compress *compress)
 	 */
 	error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
 	error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
-	error_uc->guc_log = i915_error_object_create(i915,
-						     uc->guc.log.vma,
-						     compress);
+	error_uc->guc_log =
+		i915_vma_coredump_create(gt->_gt,
+					 uc->guc.log.vma, "GuC log buffer",
+					 compress);
+
+	return error_uc;
+}
+
+static void gt_capture_prepare(struct intel_gt_coredump *gt)
+{
+	struct i915_ggtt *ggtt = gt->_gt->ggtt;
+
+	mutex_lock(&ggtt->error_mutex);
+}
+
+static void gt_capture_finish(struct intel_gt_coredump *gt)
+{
+	struct i915_ggtt *ggtt = gt->_gt->ggtt;
+
+	if (drm_mm_node_allocated(&ggtt->error_capture))
+		ggtt->vm.clear_range(&ggtt->vm,
+				     ggtt->error_capture.start,
+				     PAGE_SIZE);
+
+	mutex_unlock(&ggtt->error_mutex);
 }
 
 /* Capture all registers which don't fit into another category. */
-static void capture_reg_state(struct i915_gpu_state *error)
+static void gt_record_regs(struct intel_gt_coredump *gt)
 {
-	struct drm_i915_private *i915 = error->i915;
-	struct intel_uncore *uncore = &i915->uncore;
+	struct intel_uncore *uncore = gt->_gt->uncore;
+	struct drm_i915_private *i915 = uncore->i915;
 	int i;
 
-	/* General organization
+	/*
+	 * General organization
 	 * 1. Registers specific to a single generation
 	 * 2. Registers which belong to multiple generations
 	 * 3. Feature specific registers.
@@ -1506,123 +1540,162 @@ static void capture_reg_state(struct i915_gpu_state *error)
 
 	/* 1: Registers specific to a single generation */
 	if (IS_VALLEYVIEW(i915)) {
-		error->gtier[0] = intel_uncore_read(uncore, GTIER);
-		error->ier = intel_uncore_read(uncore, VLV_IER);
-		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV);
+		gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+		gt->ier = intel_uncore_read(uncore, VLV_IER);
+		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV);
 	}
 
 	if (IS_GEN(i915, 7))
-		error->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
+		gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
 
 	if (INTEL_GEN(i915) >= 12) {
-		error->fault_data0 = intel_uncore_read(uncore,
-						       GEN12_FAULT_TLB_DATA0);
-		error->fault_data1 = intel_uncore_read(uncore,
-						       GEN12_FAULT_TLB_DATA1);
+		gt->fault_data0 = intel_uncore_read(uncore,
+						    GEN12_FAULT_TLB_DATA0);
+		gt->fault_data1 = intel_uncore_read(uncore,
+						    GEN12_FAULT_TLB_DATA1);
 	} else if (INTEL_GEN(i915) >= 8) {
-		error->fault_data0 = intel_uncore_read(uncore,
-						       GEN8_FAULT_TLB_DATA0);
-		error->fault_data1 = intel_uncore_read(uncore,
-						       GEN8_FAULT_TLB_DATA1);
+		gt->fault_data0 = intel_uncore_read(uncore,
+						    GEN8_FAULT_TLB_DATA0);
+		gt->fault_data1 = intel_uncore_read(uncore,
+						    GEN8_FAULT_TLB_DATA1);
 	}
 
 	if (IS_GEN(i915, 6)) {
-		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE);
-		error->gab_ctl = intel_uncore_read(uncore, GAB_CTL);
-		error->gfx_mode = intel_uncore_read(uncore, GFX_MODE);
+		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE);
+		gt->gab_ctl = intel_uncore_read(uncore, GAB_CTL);
+		gt->gfx_mode = intel_uncore_read(uncore, GFX_MODE);
 	}
 
 	/* 2: Registers which belong to multiple generations */
 	if (INTEL_GEN(i915) >= 7)
-		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
+		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
 
 	if (INTEL_GEN(i915) >= 6) {
-		error->derrmr = intel_uncore_read(uncore, DERRMR);
+		gt->derrmr = intel_uncore_read(uncore, DERRMR);
 		if (INTEL_GEN(i915) < 12) {
-			error->error = intel_uncore_read(uncore, ERROR_GEN6);
-			error->done_reg = intel_uncore_read(uncore, DONE_REG);
+			gt->error = intel_uncore_read(uncore, ERROR_GEN6);
+			gt->done_reg = intel_uncore_read(uncore, DONE_REG);
 		}
 	}
 
-	if (INTEL_GEN(i915) >= 5)
-		error->ccid = intel_uncore_read(uncore, CCID(RENDER_RING_BASE));
-
 	/* 3: Feature specific registers */
 	if (IS_GEN_RANGE(i915, 6, 7)) {
-		error->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
-		error->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS);
+		gt->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
+		gt->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS);
+	}
+
+	if (IS_GEN_RANGE(i915, 8, 11))
+		gt->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN);
+
+	if (IS_GEN(i915, 12))
+		gt->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG);
+
+	if (INTEL_GEN(i915) >= 12) {
+		for (i = 0; i < GEN12_SFC_DONE_MAX; i++) {
+			gt->sfc_done[i] =
+				intel_uncore_read(uncore, GEN12_SFC_DONE(i));
+		}
+
+		gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE);
 	}
 
 	/* 4: Everything else */
 	if (INTEL_GEN(i915) >= 11) {
-		error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
-		error->gtier[0] =
+		gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
+		gt->gtier[0] =
 			intel_uncore_read(uncore,
 					  GEN11_RENDER_COPY_INTR_ENABLE);
-		error->gtier[1] =
+		gt->gtier[1] =
 			intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE);
-		error->gtier[2] =
+		gt->gtier[2] =
 			intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE);
-		error->gtier[3] =
+		gt->gtier[3] =
 			intel_uncore_read(uncore,
 					  GEN11_GPM_WGBOXPERF_INTR_ENABLE);
-		error->gtier[4] =
+		gt->gtier[4] =
 			intel_uncore_read(uncore,
 					  GEN11_CRYPTO_RSVD_INTR_ENABLE);
-		error->gtier[5] =
+		gt->gtier[5] =
 			intel_uncore_read(uncore,
 					  GEN11_GUNIT_CSME_INTR_ENABLE);
-		error->ngtier = 6;
+		gt->ngtier = 6;
 	} else if (INTEL_GEN(i915) >= 8) {
-		error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
+		gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
 		for (i = 0; i < 4; i++)
-			error->gtier[i] = intel_uncore_read(uncore,
-							    GEN8_GT_IER(i));
-		error->ngtier = 4;
+			gt->gtier[i] =
+				intel_uncore_read(uncore, GEN8_GT_IER(i));
+		gt->ngtier = 4;
 	} else if (HAS_PCH_SPLIT(i915)) {
-		error->ier = intel_uncore_read(uncore, DEIER);
-		error->gtier[0] = intel_uncore_read(uncore, GTIER);
-		error->ngtier = 1;
+		gt->ier = intel_uncore_read(uncore, DEIER);
+		gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+		gt->ngtier = 1;
 	} else if (IS_GEN(i915, 2)) {
-		error->ier = intel_uncore_read16(uncore, GEN2_IER);
+		gt->ier = intel_uncore_read16(uncore, GEN2_IER);
 	} else if (!IS_VALLEYVIEW(i915)) {
-		error->ier = intel_uncore_read(uncore, GEN2_IER);
+		gt->ier = intel_uncore_read(uncore, GEN2_IER);
 	}
-	error->eir = intel_uncore_read(uncore, EIR);
-	error->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
+	gt->eir = intel_uncore_read(uncore, EIR);
+	gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
+}
+
+/*
+ * Generate a semi-unique error code. The code is not meant to have meaning, The
+ * code's only purpose is to try to prevent false duplicated bug reports by
+ * grossly estimating a GPU error state.
+ *
+ * TODO Ideally, hashing the batchbuffer would be a very nice way to determine
+ * the hang if we could strip the GTT offset information from it.
+ *
+ * It's only a small step better than a random number in its current form.
+ */
+static u32 generate_ecode(const struct intel_engine_coredump *ee)
+{
+	/*
+	 * IPEHR would be an ideal way to detect errors, as it's the gross
+	 * measure of "the command that hung." However, has some very common
+	 * synchronization commands which almost always appear in the case
+	 * strictly a client bug. Use instdone to differentiate those some.
+	 */
+	return ee ? ee->ipehr ^ ee->instdone.instdone : 0;
 }
 
-static const char *
-error_msg(struct i915_gpu_state *error,
-	  intel_engine_mask_t engines, const char *msg)
+static const char *error_msg(struct i915_gpu_coredump *error)
 {
+	struct intel_engine_coredump *first = NULL;
+	struct intel_gt_coredump *gt;
+	intel_engine_mask_t engines;
 	int len;
 
+	engines = 0;
+	for (gt = error->gt; gt; gt = gt->next) {
+		struct intel_engine_coredump *cs;
+
+		if (gt->engine && !first)
+			first = gt->engine;
+
+		for (cs = gt->engine; cs; cs = cs->next)
+			engines |= cs->engine->mask;
+	}
+
 	len = scnprintf(error->error_msg, sizeof(error->error_msg),
-			"GPU HANG: ecode %d:%x:0x%08x",
+			"GPU HANG: ecode %d:%x:%08x",
 			INTEL_GEN(error->i915), engines,
-			i915_error_generate_code(error));
-	if (error->engine) {
+			generate_ecode(first));
+	if (first) {
 		/* Just show the first executing process, more is confusing */
 		len += scnprintf(error->error_msg + len,
 				 sizeof(error->error_msg) - len,
 				 ", in %s [%d]",
-				 error->engine->context.comm,
-				 error->engine->context.pid);
+				 first->context.comm, first->context.pid);
 	}
-	if (msg)
-		len += scnprintf(error->error_msg + len,
-				 sizeof(error->error_msg) - len,
-				 ", %s", msg);
 
 	return error->error_msg;
 }
 
-static void capture_gen_state(struct i915_gpu_state *error)
+static void capture_gen(struct i915_gpu_coredump *error)
 {
 	struct drm_i915_private *i915 = error->i915;
 
-	error->awake = i915->gt.awake;
 	error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count);
 	error->suspended = i915->runtime_pm.suspended;
 
@@ -1633,6 +1706,7 @@ static void capture_gen_state(struct i915_gpu_state *error)
 	error->reset_count = i915_reset_count(&i915->gpu_error);
 	error->suspend_count = i915->suspend_count;
 
+	i915_params_copy(&error->params, &i915_modparams);
 	memcpy(&error->device_info,
 	       INTEL_INFO(i915),
 	       sizeof(error->device_info));
@@ -1642,128 +1716,138 @@ static void capture_gen_state(struct i915_gpu_state *error)
 	error->driver_caps = i915->caps;
 }
 
-static void capture_params(struct i915_gpu_state *error)
+struct i915_gpu_coredump *
+i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
 {
-	i915_params_copy(&error->params, &i915_modparams);
+	struct i915_gpu_coredump *error;
+
+	if (!i915_modparams.error_capture)
+		return NULL;
+
+	error = kzalloc(sizeof(*error), gfp);
+	if (!error)
+		return NULL;
+
+	kref_init(&error->ref);
+	error->i915 = i915;
+
+	error->time = ktime_get_real();
+	error->boottime = ktime_get_boottime();
+	error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
+	error->capture = jiffies;
+
+	capture_gen(error);
+
+	return error;
 }
 
-static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
+#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
+
+struct intel_gt_coredump *
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
 {
-	const struct drm_i915_error_engine *ee;
-	unsigned long epoch = error->capture;
+	struct intel_gt_coredump *gc;
 
-	for (ee = error->engine; ee; ee = ee->next) {
-		if (ee->hangcheck_timestamp &&
-		    time_before(ee->hangcheck_timestamp, epoch))
-			epoch = ee->hangcheck_timestamp;
-	}
+	gc = kzalloc(sizeof(*gc), gfp);
+	if (!gc)
+		return NULL;
+
+	gc->_gt = gt;
+	gc->awake = intel_gt_pm_is_awake(gt);
+
+	gt_record_regs(gc);
+	gt_record_fences(gc);
 
-	return epoch;
+	return gc;
 }
 
-static void capture_finish(struct i915_gpu_state *error)
+struct i915_vma_compress *
+i915_vma_capture_prepare(struct intel_gt_coredump *gt)
 {
-	struct i915_ggtt *ggtt = &error->i915->ggtt;
-	const u64 slot = ggtt->error_capture.start;
+	struct i915_vma_compress *compress;
+
+	compress = kmalloc(sizeof(*compress), ALLOW_FAIL);
+	if (!compress)
+		return NULL;
+
+	if (!compress_init(compress)) {
+		kfree(compress);
+		return NULL;
+	}
 
-	ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
+	gt_capture_prepare(gt);
+
+	return compress;
 }
 
-#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
+void i915_vma_capture_finish(struct intel_gt_coredump *gt,
+			     struct i915_vma_compress *compress)
+{
+	if (!compress)
+		return;
+
+	gt_capture_finish(gt);
 
-struct i915_gpu_state *
-i915_capture_gpu_state(struct drm_i915_private *i915)
+	compress_fini(compress);
+	kfree(compress);
+}
+
+struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915)
 {
-	struct i915_gpu_state *error;
-	struct compress compress;
+	struct i915_gpu_coredump *error;
 
 	/* Check if GPU capture has been disabled */
 	error = READ_ONCE(i915->gpu_error.first_error);
 	if (IS_ERR(error))
 		return error;
 
-	error = kzalloc(sizeof(*error), ALLOW_FAIL);
-	if (!error) {
-		i915_disable_error_state(i915, -ENOMEM);
+	error = i915_gpu_coredump_alloc(i915, ALLOW_FAIL);
+	if (!error)
 		return ERR_PTR(-ENOMEM);
-	}
 
-	if (!compress_init(&compress)) {
-		kfree(error);
-		i915_disable_error_state(i915, -ENOMEM);
-		return ERR_PTR(-ENOMEM);
-	}
+	error->gt = intel_gt_coredump_alloc(&i915->gt, ALLOW_FAIL);
+	if (error->gt) {
+		struct i915_vma_compress *compress;
 
-	kref_init(&error->ref);
-	error->i915 = i915;
+		compress = i915_vma_capture_prepare(error->gt);
+		if (!compress) {
+			kfree(error->gt);
+			kfree(error);
+			return ERR_PTR(-ENOMEM);
+		}
 
-	error->time = ktime_get_real();
-	error->boottime = ktime_get_boottime();
-	error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
-	error->capture = jiffies;
+		gt_record_engines(error->gt, compress);
 
-	capture_params(error);
-	capture_gen_state(error);
-	capture_uc_state(error, &compress);
-	capture_reg_state(error);
-	gem_record_fences(error);
-	gem_record_rings(error, &compress);
+		if (INTEL_INFO(i915)->has_gt_uc)
+			error->gt->uc = gt_record_uc(error->gt, compress);
 
-	error->overlay = intel_overlay_capture_error_state(i915);
-	error->display = intel_display_capture_error_state(i915);
+		i915_vma_capture_finish(error->gt, compress);
 
-	error->epoch = capture_find_epoch(error);
+		error->simulated |= error->gt->simulated;
+	}
 
-	capture_finish(error);
-	compress_fini(&compress);
+	error->overlay = intel_overlay_capture_error_state(i915);
+	error->display = intel_display_capture_error_state(i915);
 
 	return error;
 }
 
-/**
- * i915_capture_error_state - capture an error record for later analysis
- * @i915: i915 device
- * @engine_mask: the mask of engines triggering the hang
- * @msg: a message to insert into the error capture header
- *
- * Should be called when an error is detected (either a hang or an error
- * interrupt) to capture error state from the time of the error.  Fills
- * out a structure which becomes available in debugfs for user level tools
- * to pick up.
- */
-void i915_capture_error_state(struct drm_i915_private *i915,
-			      intel_engine_mask_t engine_mask,
-			      const char *msg)
+void i915_error_state_store(struct i915_gpu_coredump *error)
 {
+	struct drm_i915_private *i915;
 	static bool warned;
-	struct i915_gpu_state *error;
-	unsigned long flags;
 
-	if (!i915_modparams.error_capture)
+	if (IS_ERR_OR_NULL(error))
 		return;
 
-	if (READ_ONCE(i915->gpu_error.first_error))
-		return;
+	i915 = error->i915;
+	dev_info(i915->drm.dev, "%s\n", error_msg(error));
 
-	error = i915_capture_gpu_state(i915);
-	if (IS_ERR(error))
+	if (error->simulated ||
+	    cmpxchg(&i915->gpu_error.first_error, NULL, error))
 		return;
 
-	dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg));
-
-	if (!error->simulated) {
-		spin_lock_irqsave(&i915->gpu_error.lock, flags);
-		if (!i915->gpu_error.first_error) {
-			i915->gpu_error.first_error = error;
-			error = NULL;
-		}
-		spin_unlock_irqrestore(&i915->gpu_error.lock, flags);
-	}
-
-	if (error) {
-		__i915_gpu_state_free(&error->ref);
-		return;
-	}
+	i915_gpu_coredump_get(error);
 
 	if (!xchg(&warned, true) &&
 	    ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) {
@@ -1776,15 +1860,38 @@ void i915_capture_error_state(struct drm_i915_private *i915,
 	}
 }
 
-struct i915_gpu_state *
+/**
+ * i915_capture_error_state - capture an error record for later analysis
+ * @i915: i915 device
+ *
+ * Should be called when an error is detected (either a hang or an error
+ * interrupt) to capture error state from the time of the error.  Fills
+ * out a structure which becomes available in debugfs for user level tools
+ * to pick up.
+ */
+void i915_capture_error_state(struct drm_i915_private *i915)
+{
+	struct i915_gpu_coredump *error;
+
+	error = i915_gpu_coredump(i915);
+	if (IS_ERR(error)) {
+		cmpxchg(&i915->gpu_error.first_error, NULL, error);
+		return;
+	}
+
+	i915_error_state_store(error);
+	i915_gpu_coredump_put(error);
+}
+
+struct i915_gpu_coredump *
 i915_first_error_state(struct drm_i915_private *i915)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 
 	spin_lock_irq(&i915->gpu_error.lock);
 	error = i915->gpu_error.first_error;
 	if (!IS_ERR_OR_NULL(error))
-		i915_gpu_state_get(error);
+		i915_gpu_coredump_get(error);
 	spin_unlock_irq(&i915->gpu_error.lock);
 
 	return error;
@@ -1792,7 +1899,7 @@ i915_first_error_state(struct drm_i915_private *i915)
 
 void i915_reset_error_state(struct drm_i915_private *i915)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 
 	spin_lock_irq(&i915->gpu_error.lock);
 	error = i915->gpu_error.first_error;
@@ -1801,7 +1908,7 @@ void i915_reset_error_state(struct drm_i915_private *i915)
 	spin_unlock_irq(&i915->gpu_error.lock);
 
 	if (!IS_ERR_OR_NULL(error))
-		i915_gpu_state_put(error);
+		i915_gpu_coredump_put(error);
 }
 
 void i915_disable_error_state(struct drm_i915_private *i915, int err)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index df9f57766626..9109004956bd 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -25,44 +25,100 @@
 #include "i915_scheduler.h"
 
 struct drm_i915_private;
+struct i915_vma_compress;
+struct intel_engine_capture_vma;
 struct intel_overlay_error_state;
 struct intel_display_error_state;
 
-struct i915_gpu_state {
-	struct kref ref;
-	ktime_t time;
-	ktime_t boottime;
-	ktime_t uptime;
-	unsigned long capture;
-	unsigned long epoch;
+struct i915_vma_coredump {
+	struct i915_vma_coredump *next;
 
-	struct drm_i915_private *i915;
+	char name[20];
+
+	u64 gtt_offset;
+	u64 gtt_size;
+	u32 gtt_page_sizes;
+
+	int num_pages;
+	int page_count;
+	int unused;
+	u32 *pages[0];
+};
+
+struct i915_request_coredump {
+	unsigned long flags;
+	pid_t pid;
+	u32 context;
+	u32 seqno;
+	u32 start;
+	u32 head;
+	u32 tail;
+	struct i915_sched_attr sched_attr;
+};
+
+struct intel_engine_coredump {
+	const struct intel_engine_cs *engine;
 
-	char error_msg[128];
 	bool simulated;
-	bool awake;
-	bool wakelock;
-	bool suspended;
-	int iommu;
 	u32 reset_count;
-	u32 suspend_count;
-	struct intel_device_info device_info;
-	struct intel_runtime_info runtime_info;
-	struct intel_driver_caps driver_caps;
-	struct i915_params params;
 
-	struct i915_error_uc {
-		struct intel_uc_fw guc_fw;
-		struct intel_uc_fw huc_fw;
-		struct drm_i915_error_object *guc_log;
-	} uc;
+	/* position of active request inside the ring */
+	u32 rq_head, rq_post, rq_tail;
+
+	/* Register state */
+	u32 ccid;
+	u32 start;
+	u32 tail;
+	u32 head;
+	u32 ctl;
+	u32 mode;
+	u32 hws;
+	u32 ipeir;
+	u32 ipehr;
+	u32 bbstate;
+	u32 instpm;
+	u32 instps;
+	u64 bbaddr;
+	u64 acthd;
+	u32 fault_reg;
+	u64 faddr;
+	u32 rc_psmi; /* sleep state */
+	struct intel_instdone instdone;
+
+	struct i915_gem_context_coredump {
+		char comm[TASK_COMM_LEN];
+		pid_t pid;
+		int active;
+		int guilty;
+		struct i915_sched_attr sched_attr;
+	} context;
+
+	struct i915_vma_coredump *vma;
+
+	struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
+	unsigned int num_ports;
+
+	struct {
+		u32 gfx_mode;
+		union {
+			u64 pdp[4];
+			u32 pp_dir_base;
+		};
+	} vm_info;
+
+	struct intel_engine_coredump *next;
+};
+
+struct intel_gt_coredump {
+	const struct intel_gt *_gt;
+	bool awake;
+	bool simulated;
 
 	/* Generic register state */
 	u32 eir;
 	u32 pgtbl_er;
 	u32 ier;
 	u32 gtier[6], ngtier;
-	u32 ccid;
 	u32 derrmr;
 	u32 forcewake;
 	u32 error; /* gen6+ */
@@ -74,95 +130,52 @@ struct i915_gpu_state {
 	u32 gam_ecochk;
 	u32 gab_ctl;
 	u32 gfx_mode;
+	u32 gtt_cache;
+	u32 aux_err; /* gen12 */
+	u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
+	u32 gam_done; /* gen12 */
 
 	u32 nfence;
 	u64 fence[I915_MAX_NUM_FENCES];
+
+	struct intel_engine_coredump *engine;
+
+	struct intel_uc_coredump {
+		struct intel_uc_fw guc_fw;
+		struct intel_uc_fw huc_fw;
+		struct i915_vma_coredump *guc_log;
+	} *uc;
+
+	struct intel_gt_coredump *next;
+};
+
+struct i915_gpu_coredump {
+	struct kref ref;
+	ktime_t time;
+	ktime_t boottime;
+	ktime_t uptime;
+	unsigned long capture;
+
+	struct drm_i915_private *i915;
+
+	struct intel_gt_coredump *gt;
+
+	char error_msg[128];
+	bool simulated;
+	bool wakelock;
+	bool suspended;
+	int iommu;
+	u32 reset_count;
+	u32 suspend_count;
+
+	struct intel_device_info device_info;
+	struct intel_runtime_info runtime_info;
+	struct intel_driver_caps driver_caps;
+	struct i915_params params;
+
 	struct intel_overlay_error_state *overlay;
 	struct intel_display_error_state *display;
 
-	struct drm_i915_error_engine {
-		const struct intel_engine_cs *engine;
-
-		/* Software tracked state */
-		bool idle;
-		unsigned long hangcheck_timestamp;
-		int num_requests;
-		u32 reset_count;
-
-		/* position of active request inside the ring */
-		u32 rq_head, rq_post, rq_tail;
-
-		/* our own tracking of ring head and tail */
-		u32 cpu_ring_head;
-		u32 cpu_ring_tail;
-
-		/* Register state */
-		u32 start;
-		u32 tail;
-		u32 head;
-		u32 ctl;
-		u32 mode;
-		u32 hws;
-		u32 ipeir;
-		u32 ipehr;
-		u32 bbstate;
-		u32 instpm;
-		u32 instps;
-		u64 bbaddr;
-		u64 acthd;
-		u32 fault_reg;
-		u64 faddr;
-		u32 rc_psmi; /* sleep state */
-		struct intel_instdone instdone;
-
-		struct drm_i915_error_context {
-			char comm[TASK_COMM_LEN];
-			pid_t pid;
-			u32 hw_id;
-			int active;
-			int guilty;
-			struct i915_sched_attr sched_attr;
-		} context;
-
-		struct drm_i915_error_object {
-			u64 gtt_offset;
-			u64 gtt_size;
-			int num_pages;
-			int page_count;
-			int unused;
-			u32 *pages[0];
-		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
-
-		struct drm_i915_error_object **user_bo;
-		long user_bo_count;
-
-		struct drm_i915_error_object *wa_ctx;
-		struct drm_i915_error_object *default_state;
-
-		struct drm_i915_error_request {
-			unsigned long flags;
-			long jiffies;
-			pid_t pid;
-			u32 context;
-			u32 seqno;
-			u32 start;
-			u32 head;
-			u32 tail;
-			struct i915_sched_attr sched_attr;
-		} *requests, execlist[EXECLIST_MAX_PORTS];
-		unsigned int num_ports;
-
-		struct {
-			u32 gfx_mode;
-			union {
-				u64 pdp[4];
-				u32 pp_dir_base;
-			};
-		} vm_info;
-
-		struct drm_i915_error_engine *next;
-	} *engine;
-
 	struct scatterlist *sgl, *fit;
 };
 
@@ -170,7 +183,7 @@ struct i915_gpu_error {
 	/* For reset and error_state handling. */
 	spinlock_t lock;
 	/* Protected by the above dev->gpu_error.lock. */
-	struct i915_gpu_state *first_error;
+	struct i915_gpu_coredump *first_error;
 
 	atomic_t pending_fb_pin;
 
@@ -198,41 +211,115 @@ struct drm_i915_error_state_buf {
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 
-struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
-			      intel_engine_mask_t engine_mask,
-			      const char *error_msg);
+struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915);
+void i915_capture_error_state(struct drm_i915_private *i915);
+
+struct i915_gpu_coredump *
+i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
+
+struct intel_gt_coredump *
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
+
+struct intel_engine_coredump *
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
+
+struct intel_engine_capture_vma *
+intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
+				  struct i915_request *rq,
+				  gfp_t gfp);
 
-static inline struct i915_gpu_state *
-i915_gpu_state_get(struct i915_gpu_state *gpu)
+void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
+				   struct intel_engine_capture_vma *capture,
+				   struct i915_vma_compress *compress);
+
+struct i915_vma_compress *
+i915_vma_capture_prepare(struct intel_gt_coredump *gt);
+
+void i915_vma_capture_finish(struct intel_gt_coredump *gt,
+			     struct i915_vma_compress *compress);
+
+void i915_error_state_store(struct i915_gpu_coredump *error);
+
+static inline struct i915_gpu_coredump *
+i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
 {
 	kref_get(&gpu->ref);
 	return gpu;
 }
 
-ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
-				      char *buf, loff_t offset, size_t count);
+ssize_t
+i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
+				 char *buf, loff_t offset, size_t count);
 
-void __i915_gpu_state_free(struct kref *kref);
-static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
+void __i915_gpu_coredump_free(struct kref *kref);
+static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 {
 	if (gpu)
-		kref_put(&gpu->ref, __i915_gpu_state_free);
+		kref_put(&gpu->ref, __i915_gpu_coredump_free);
 }
 
-struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
+struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
 void i915_reset_error_state(struct drm_i915_private *i915);
 void i915_disable_error_state(struct drm_i915_private *i915, int err);
 
 #else
 
-static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
-					    u32 engine_mask,
-					    const char *error_msg)
+static inline void i915_capture_error_state(struct drm_i915_private *i915)
+{
+}
+
+static inline struct i915_gpu_coredump *
+i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline struct intel_gt_coredump *
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline struct intel_engine_coredump *
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline struct intel_engine_capture_vma *
+intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
+				  struct i915_request *rq,
+				  gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline void
+intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
+			      struct intel_engine_capture_vma *capture,
+			      struct i915_vma_compress *compress)
+{
+}
+
+static inline struct i915_vma_compress *
+i915_vma_capture_prepare(struct intel_gt_coredump *gt)
+{
+	return NULL;
+}
+
+static inline void
+i915_vma_capture_finish(struct intel_gt_coredump *gt,
+			struct i915_vma_compress *compress)
+{
+}
+
+static inline void
+i915_error_state_store(struct drm_i915_private *i915,
+		       struct i915_gpu_coredump *error)
 {
 }
 
-static inline struct i915_gpu_state *
+static inline struct i915_gpu_coredump *
 i915_first_error_state(struct drm_i915_private *i915)
 {
 	return ERR_PTR(-ENODEV);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 37e3dd3c1a9d..afc6aad9bf8c 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -29,7 +29,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/circ_buf.h>
-#include <linux/cpuidle.h>
 #include <linux/slab.h>
 #include <linux/sysrq.h>
 
@@ -46,6 +45,7 @@
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_irq.h"
 #include "gt/intel_gt_pm_irq.h"
+#include "gt/intel_rps.h"
 
 #include "i915_drv.h"
 #include "i915_irq.h"
@@ -149,30 +149,24 @@ static const u32 hpd_gen12[HPD_NUM_PINS] = {
 };
 
 static const u32 hpd_icp[HPD_NUM_PINS] = {
-	[HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP,
-	[HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP,
-	[HPD_PORT_C] = SDE_TC1_HOTPLUG_ICP,
-	[HPD_PORT_D] = SDE_TC2_HOTPLUG_ICP,
-	[HPD_PORT_E] = SDE_TC3_HOTPLUG_ICP,
-	[HPD_PORT_F] = SDE_TC4_HOTPLUG_ICP
-};
-
-static const u32 hpd_mcc[HPD_NUM_PINS] = {
-	[HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP,
-	[HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP,
-	[HPD_PORT_C] = SDE_TC1_HOTPLUG_ICP
+	[HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A),
+	[HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B),
+	[HPD_PORT_C] = SDE_TC_HOTPLUG_ICP(PORT_TC1),
+	[HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC2),
+	[HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC3),
+	[HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC4),
 };
 
 static const u32 hpd_tgp[HPD_NUM_PINS] = {
-	[HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP,
-	[HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP,
-	[HPD_PORT_C] = SDE_DDIC_HOTPLUG_TGP,
-	[HPD_PORT_D] = SDE_TC1_HOTPLUG_ICP,
-	[HPD_PORT_E] = SDE_TC2_HOTPLUG_ICP,
-	[HPD_PORT_F] = SDE_TC3_HOTPLUG_ICP,
-	[HPD_PORT_G] = SDE_TC4_HOTPLUG_ICP,
-	[HPD_PORT_H] = SDE_TC5_HOTPLUG_TGP,
-	[HPD_PORT_I] = SDE_TC6_HOTPLUG_TGP,
+	[HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A),
+	[HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B),
+	[HPD_PORT_C] = SDE_DDI_HOTPLUG_ICP(PORT_C),
+	[HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC1),
+	[HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC2),
+	[HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC3),
+	[HPD_PORT_G] = SDE_TC_HOTPLUG_ICP(PORT_TC4),
+	[HPD_PORT_H] = SDE_TC_HOTPLUG_ICP(PORT_TC5),
+	[HPD_PORT_I] = SDE_TC_HOTPLUG_ICP(PORT_TC6),
 };
 
 void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr,
@@ -327,180 +321,6 @@ void ilk_update_display_irq(struct drm_i915_private *dev_priv,
 	}
 }
 
-static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
-{
-	WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11);
-
-	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR;
-}
-
-void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv)
-{
-	struct intel_gt *gt = &dev_priv->gt;
-
-	spin_lock_irq(&gt->irq_lock);
-
-	while (gen11_gt_reset_one_iir(gt, 0, GEN11_GTPM))
-		;
-
-	dev_priv->gt_pm.rps.pm_iir = 0;
-
-	spin_unlock_irq(&gt->irq_lock);
-}
-
-void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
-{
-	struct intel_gt *gt = &dev_priv->gt;
-
-	spin_lock_irq(&gt->irq_lock);
-	gen6_gt_pm_reset_iir(gt, GEN6_PM_RPS_EVENTS);
-	dev_priv->gt_pm.rps.pm_iir = 0;
-	spin_unlock_irq(&gt->irq_lock);
-}
-
-void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
-{
-	struct intel_gt *gt = &dev_priv->gt;
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	if (READ_ONCE(rps->interrupts_enabled))
-		return;
-
-	spin_lock_irq(&gt->irq_lock);
-	WARN_ON_ONCE(rps->pm_iir);
-
-	if (INTEL_GEN(dev_priv) >= 11)
-		WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GTPM));
-	else
-		WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
-
-	rps->interrupts_enabled = true;
-	gen6_gt_pm_enable_irq(gt, dev_priv->pm_rps_events);
-
-	spin_unlock_irq(&gt->irq_lock);
-}
-
-u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, u32 mask)
-{
-	return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz;
-}
-
-void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	struct intel_gt *gt = &dev_priv->gt;
-
-	if (!READ_ONCE(rps->interrupts_enabled))
-		return;
-
-	spin_lock_irq(&gt->irq_lock);
-	rps->interrupts_enabled = false;
-
-	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
-
-	gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
-
-	spin_unlock_irq(&gt->irq_lock);
-	intel_synchronize_irq(dev_priv);
-
-	/* Now that we will not be generating any more work, flush any
-	 * outstanding tasks. As we are called on the RPS idle path,
-	 * we will reset the GPU to minimum frequencies, so the current
-	 * state of the worker can be discarded.
-	 */
-	cancel_work_sync(&rps->work);
-	if (INTEL_GEN(dev_priv) >= 11)
-		gen11_reset_rps_interrupts(dev_priv);
-	else
-		gen6_reset_rps_interrupts(dev_priv);
-}
-
-void gen9_reset_guc_interrupts(struct intel_guc *guc)
-{
-	struct intel_gt *gt = guc_to_gt(guc);
-
-	assert_rpm_wakelock_held(&gt->i915->runtime_pm);
-
-	spin_lock_irq(&gt->irq_lock);
-	gen6_gt_pm_reset_iir(gt, gt->pm_guc_events);
-	spin_unlock_irq(&gt->irq_lock);
-}
-
-void gen9_enable_guc_interrupts(struct intel_guc *guc)
-{
-	struct intel_gt *gt = guc_to_gt(guc);
-
-	assert_rpm_wakelock_held(&gt->i915->runtime_pm);
-
-	spin_lock_irq(&gt->irq_lock);
-	if (!guc->interrupts.enabled) {
-		WARN_ON_ONCE(intel_uncore_read(gt->uncore,
-					       gen6_pm_iir(gt->i915)) &
-			     gt->pm_guc_events);
-		guc->interrupts.enabled = true;
-		gen6_gt_pm_enable_irq(gt, gt->pm_guc_events);
-	}
-	spin_unlock_irq(&gt->irq_lock);
-}
-
-void gen9_disable_guc_interrupts(struct intel_guc *guc)
-{
-	struct intel_gt *gt = guc_to_gt(guc);
-
-	assert_rpm_wakelock_held(&gt->i915->runtime_pm);
-
-	spin_lock_irq(&gt->irq_lock);
-	guc->interrupts.enabled = false;
-
-	gen6_gt_pm_disable_irq(gt, gt->pm_guc_events);
-
-	spin_unlock_irq(&gt->irq_lock);
-	intel_synchronize_irq(gt->i915);
-
-	gen9_reset_guc_interrupts(guc);
-}
-
-void gen11_reset_guc_interrupts(struct intel_guc *guc)
-{
-	struct intel_gt *gt = guc_to_gt(guc);
-
-	spin_lock_irq(&gt->irq_lock);
-	gen11_gt_reset_one_iir(gt, 0, GEN11_GUC);
-	spin_unlock_irq(&gt->irq_lock);
-}
-
-void gen11_enable_guc_interrupts(struct intel_guc *guc)
-{
-	struct intel_gt *gt = guc_to_gt(guc);
-
-	spin_lock_irq(&gt->irq_lock);
-	if (!guc->interrupts.enabled) {
-		u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
-
-		WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC));
-		intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, events);
-		intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~events);
-		guc->interrupts.enabled = true;
-	}
-	spin_unlock_irq(&gt->irq_lock);
-}
-
-void gen11_disable_guc_interrupts(struct intel_guc *guc)
-{
-	struct intel_gt *gt = guc_to_gt(guc);
-
-	spin_lock_irq(&gt->irq_lock);
-	guc->interrupts.enabled = false;
-
-	intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0);
-	intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
-
-	spin_unlock_irq(&gt->irq_lock);
-	intel_synchronize_irq(gt->i915);
-
-	gen11_reset_guc_interrupts(guc);
-}
-
 /**
  * bdw_update_port_irq - update DE port interrupt
  * @dev_priv: driver private
@@ -942,14 +762,14 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc)
 	return (position + crtc->scanline_offset) % vtotal;
 }
 
-bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
+bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int index,
 			      bool in_vblank_irq, int *vpos, int *hpos,
 			      ktime_t *stime, ktime_t *etime,
 			      const struct drm_display_mode *mode)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = intel_get_crtc_for_pipe(dev_priv,
-								pipe);
+	struct intel_crtc *crtc = to_intel_crtc(drm_crtc_from_index(dev, index));
+	enum pipe pipe = crtc->pipe;
 	int position;
 	int vbl_start, vbl_end, hsync_start, htotal, vtotal;
 	unsigned long irqflags;
@@ -992,7 +812,7 @@ bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 		/* No obvious pixelcount register. Only query vertical
 		 * scanout position from Display scan line register.
 		 */
-		position = __intel_get_crtc_scanline(intel_crtc);
+		position = __intel_get_crtc_scanline(crtc);
 	} else {
 		/* Have access to pixelcount since start of frame.
 		 * We can split this into vertical and horizontal
@@ -1072,201 +892,8 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
 	return position;
 }
 
-static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
-{
-	struct intel_uncore *uncore = &dev_priv->uncore;
-	u32 busy_up, busy_down, max_avg, min_avg;
-	u8 new_delay;
-
-	spin_lock(&mchdev_lock);
-
-	intel_uncore_write16(uncore,
-			     MEMINTRSTS,
-			     intel_uncore_read(uncore, MEMINTRSTS));
-
-	new_delay = dev_priv->ips.cur_delay;
-
-	intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
-	busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
-	busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
-	max_avg = intel_uncore_read(uncore, RCBMAXAVG);
-	min_avg = intel_uncore_read(uncore, RCBMINAVG);
-
-	/* Handle RCS change request from hw */
-	if (busy_up > max_avg) {
-		if (dev_priv->ips.cur_delay != dev_priv->ips.max_delay)
-			new_delay = dev_priv->ips.cur_delay - 1;
-		if (new_delay < dev_priv->ips.max_delay)
-			new_delay = dev_priv->ips.max_delay;
-	} else if (busy_down < min_avg) {
-		if (dev_priv->ips.cur_delay != dev_priv->ips.min_delay)
-			new_delay = dev_priv->ips.cur_delay + 1;
-		if (new_delay > dev_priv->ips.min_delay)
-			new_delay = dev_priv->ips.min_delay;
-	}
-
-	if (ironlake_set_drps(dev_priv, new_delay))
-		dev_priv->ips.cur_delay = new_delay;
-
-	spin_unlock(&mchdev_lock);
-
-	return;
-}
-
-static void vlv_c0_read(struct drm_i915_private *dev_priv,
-			struct intel_rps_ei *ei)
-{
-	ei->ktime = ktime_get_raw();
-	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
-	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
-}
-
-void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
-{
-	memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei));
-}
-
-static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	const struct intel_rps_ei *prev = &rps->ei;
-	struct intel_rps_ei now;
-	u32 events = 0;
-
-	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
-		return 0;
-
-	vlv_c0_read(dev_priv, &now);
-
-	if (prev->ktime) {
-		u64 time, c0;
-		u32 render, media;
-
-		time = ktime_us_delta(now.ktime, prev->ktime);
-
-		time *= dev_priv->czclk_freq;
-
-		/* Workload can be split between render + media,
-		 * e.g. SwapBuffers being blitted in X after being rendered in
-		 * mesa. To account for this we need to combine both engines
-		 * into our activity counter.
-		 */
-		render = now.render_c0 - prev->render_c0;
-		media = now.media_c0 - prev->media_c0;
-		c0 = max(render, media);
-		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
-
-		if (c0 > time * rps->power.up_threshold)
-			events = GEN6_PM_RP_UP_THRESHOLD;
-		else if (c0 < time * rps->power.down_threshold)
-			events = GEN6_PM_RP_DOWN_THRESHOLD;
-	}
-
-	rps->ei = now;
-	return events;
-}
-
-static void gen6_pm_rps_work(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, struct drm_i915_private, gt_pm.rps.work);
-	struct intel_gt *gt = &dev_priv->gt;
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	bool client_boost = false;
-	int new_delay, adj, min, max;
-	u32 pm_iir = 0;
-
-	spin_lock_irq(&gt->irq_lock);
-	if (rps->interrupts_enabled) {
-		pm_iir = fetch_and_zero(&rps->pm_iir);
-		client_boost = atomic_read(&rps->num_waiters);
-	}
-	spin_unlock_irq(&gt->irq_lock);
-
-	/* Make sure we didn't queue anything we're not going to process. */
-	WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
-	if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
-		goto out;
-
-	mutex_lock(&rps->lock);
-
-	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
-
-	adj = rps->last_adj;
-	new_delay = rps->cur_freq;
-	min = rps->min_freq_softlimit;
-	max = rps->max_freq_softlimit;
-	if (client_boost)
-		max = rps->max_freq;
-	if (client_boost && new_delay < rps->boost_freq) {
-		new_delay = rps->boost_freq;
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
-		if (adj > 0)
-			adj *= 2;
-		else /* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
-
-		if (new_delay >= rps->max_freq_softlimit)
-			adj = 0;
-	} else if (client_boost) {
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
-		if (rps->cur_freq > rps->efficient_freq)
-			new_delay = rps->efficient_freq;
-		else if (rps->cur_freq > rps->min_freq_softlimit)
-			new_delay = rps->min_freq_softlimit;
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
-		if (adj < 0)
-			adj *= 2;
-		else /* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
-
-		if (new_delay <= rps->min_freq_softlimit)
-			adj = 0;
-	} else { /* unknown event */
-		adj = 0;
-	}
-
-	rps->last_adj = adj;
-
-	/*
-	 * Limit deboosting and boosting to keep ourselves at the extremes
-	 * when in the respective power modes (i.e. slowly decrease frequencies
-	 * while in the HIGH_POWER zone and slowly increase frequencies while
-	 * in the LOW_POWER zone). On idle, we will hit the timeout and drop
-	 * to the next level quickly, and conversely if busy we expect to
-	 * hit a waitboost and rapidly switch into max power.
-	 */
-	if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
-	    (adj > 0 && rps->power.mode == LOW_POWER))
-		rps->last_adj = 0;
-
-	/* sysfs frequency interfaces may have snuck in while servicing the
-	 * interrupt
-	 */
-	new_delay += adj;
-	new_delay = clamp_t(int, new_delay, min, max);
-
-	if (intel_set_rps(dev_priv, new_delay)) {
-		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
-		rps->last_adj = 0;
-	}
-
-	mutex_unlock(&rps->lock);
-
-out:
-	/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
-	spin_lock_irq(&gt->irq_lock);
-	if (rps->interrupts_enabled)
-		gen6_gt_pm_unmask_irq(gt, dev_priv->pm_rps_events);
-	spin_unlock_irq(&gt->irq_lock);
-}
-
-
 /**
- * ivybridge_parity_work - Workqueue called when a parity error interrupt
+ * ivb_parity_work - Workqueue called when a parity error interrupt
  * occurred.
  * @work: workqueue struct
  *
@@ -1274,7 +901,7 @@ out:
  * this event, userspace should try to remap the bad rows since statistically
  * it is likely the same row is more likely to go bad again.
  */
-static void ivybridge_parity_work(struct work_struct *work)
+static void ivb_parity_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), l3_parity.error_work);
@@ -1401,11 +1028,11 @@ static bool icp_ddi_port_hotplug_long_detect(enum hpd_pin pin, u32 val)
 {
 	switch (pin) {
 	case HPD_PORT_A:
-		return val & ICP_DDIA_HPD_LONG_DETECT;
+		return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_A);
 	case HPD_PORT_B:
-		return val & ICP_DDIB_HPD_LONG_DETECT;
+		return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_B);
 	case HPD_PORT_C:
-		return val & TGP_DDIC_HPD_LONG_DETECT;
+		return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_C);
 	default:
 		return false;
 	}
@@ -1427,20 +1054,6 @@ static bool icp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val)
 	}
 }
 
-static bool tgp_ddi_port_hotplug_long_detect(enum hpd_pin pin, u32 val)
-{
-	switch (pin) {
-	case HPD_PORT_A:
-		return val & ICP_DDIA_HPD_LONG_DETECT;
-	case HPD_PORT_B:
-		return val & ICP_DDIB_HPD_LONG_DETECT;
-	case HPD_PORT_C:
-		return val & TGP_DDIC_HPD_LONG_DETECT;
-	default:
-		return false;
-	}
-}
-
 static bool tgp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val)
 {
 	switch (pin) {
@@ -1652,54 +1265,6 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv,
 				     res1, res2);
 }
 
-/* The RPS events need forcewake, so we add them to a work queue and mask their
- * IMR bits until the work is done. Other interrupts can be processed without
- * the work queue. */
-void gen11_rps_irq_handler(struct intel_gt *gt, u32 pm_iir)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	struct intel_rps *rps = &i915->gt_pm.rps;
-	const u32 events = i915->pm_rps_events & pm_iir;
-
-	lockdep_assert_held(&gt->irq_lock);
-
-	if (unlikely(!events))
-		return;
-
-	gen6_gt_pm_mask_irq(gt, events);
-
-	if (!rps->interrupts_enabled)
-		return;
-
-	rps->pm_iir |= events;
-	schedule_work(&rps->work);
-}
-
-void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	struct intel_gt *gt = &dev_priv->gt;
-
-	if (pm_iir & dev_priv->pm_rps_events) {
-		spin_lock(&gt->irq_lock);
-		gen6_gt_pm_mask_irq(gt, pm_iir & dev_priv->pm_rps_events);
-		if (rps->interrupts_enabled) {
-			rps->pm_iir |= pm_iir & dev_priv->pm_rps_events;
-			schedule_work(&rps->work);
-		}
-		spin_unlock(&gt->irq_lock);
-	}
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		return;
-
-	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-		intel_engine_breadcrumbs_irq(dev_priv->engine[VECS0]);
-
-	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
-		DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
-}
-
 static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv)
 {
 	enum pipe pipe;
@@ -1716,7 +1281,7 @@ static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv)
 static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv,
 				  u32 iir, u32 pipe_stats[I915_MAX_PIPES])
 {
-	int pipe;
+	enum pipe pipe;
 
 	spin_lock(&dev_priv->irq_lock);
 
@@ -1741,6 +1306,7 @@ static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv,
 		status_mask = PIPE_FIFO_UNDERRUN_STATUS;
 
 		switch (pipe) {
+		default:
 		case PIPE_A:
 			iir_bit = I915_DISPLAY_PIPE_A_EVENT_INTERRUPT;
 			break;
@@ -2009,7 +1575,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 		if (gt_iir)
 			gen6_gt_irq_handler(&dev_priv->gt, gt_iir);
 		if (pm_iir)
-			gen6_rps_irq_handler(dev_priv, pm_iir);
+			gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir);
 
 		if (hotplug_status)
 			i9xx_hpd_irq_handler(dev_priv, hotplug_status);
@@ -2136,7 +1702,7 @@ static void ibx_hpd_irq_handler(struct drm_i915_private *dev_priv,
 
 static void ibx_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 {
-	int pipe;
+	enum pipe pipe;
 	u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK;
 
 	ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ibx);
@@ -2222,7 +1788,7 @@ static void cpt_serr_int_handler(struct drm_i915_private *dev_priv)
 
 static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 {
-	int pipe;
+	enum pipe pipe;
 	u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK_CPT;
 
 	ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_cpt);
@@ -2256,19 +1822,35 @@ static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 		cpt_serr_int_handler(dev_priv);
 }
 
-static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir,
-			    const u32 *pins)
+static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 {
-	u32 ddi_hotplug_trigger;
-	u32 tc_hotplug_trigger;
+	u32 ddi_hotplug_trigger, tc_hotplug_trigger;
 	u32 pin_mask = 0, long_mask = 0;
+	bool (*tc_port_hotplug_long_detect)(enum hpd_pin pin, u32 val);
+	const u32 *pins;
 
-	if (HAS_PCH_MCC(dev_priv)) {
+	if (HAS_PCH_TGP(dev_priv)) {
+		ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP;
+		tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP;
+		tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect;
+		pins = hpd_tgp;
+	} else if (HAS_PCH_JSP(dev_priv)) {
 		ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP;
 		tc_hotplug_trigger = 0;
+		pins = hpd_tgp;
+	} else if (HAS_PCH_MCC(dev_priv)) {
+		ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
+		tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1);
+		tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect;
+		pins = hpd_icp;
 	} else {
+		WARN(!HAS_PCH_ICP(dev_priv),
+		     "Unrecognized PCH type 0x%x\n", INTEL_PCH_TYPE(dev_priv));
+
 		ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
 		tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP;
+		tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect;
+		pins = hpd_icp;
 	}
 
 	if (ddi_hotplug_trigger) {
@@ -2292,44 +1874,7 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir,
 		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
 				   tc_hotplug_trigger,
 				   dig_hotplug_reg, pins,
-				   icp_tc_port_hotplug_long_detect);
-	}
-
-	if (pin_mask)
-		intel_hpd_irq_handler(dev_priv, pin_mask, long_mask);
-
-	if (pch_iir & SDE_GMBUS_ICP)
-		gmbus_irq_handler(dev_priv);
-}
-
-static void tgp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
-{
-	u32 ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP;
-	u32 tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP;
-	u32 pin_mask = 0, long_mask = 0;
-
-	if (ddi_hotplug_trigger) {
-		u32 dig_hotplug_reg;
-
-		dig_hotplug_reg = I915_READ(SHOTPLUG_CTL_DDI);
-		I915_WRITE(SHOTPLUG_CTL_DDI, dig_hotplug_reg);
-
-		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
-				   ddi_hotplug_trigger,
-				   dig_hotplug_reg, hpd_tgp,
-				   tgp_ddi_port_hotplug_long_detect);
-	}
-
-	if (tc_hotplug_trigger) {
-		u32 dig_hotplug_reg;
-
-		dig_hotplug_reg = I915_READ(SHOTPLUG_CTL_TC);
-		I915_WRITE(SHOTPLUG_CTL_TC, dig_hotplug_reg);
-
-		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
-				   tc_hotplug_trigger,
-				   dig_hotplug_reg, hpd_tgp,
-				   tgp_tc_port_hotplug_long_detect);
+				   tc_port_hotplug_long_detect);
 	}
 
 	if (pin_mask)
@@ -2434,7 +1979,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv,
 	}
 
 	if (IS_GEN(dev_priv, 5) && de_iir & DE_PCU_EVENT)
-		ironlake_rps_change_irq_handler(dev_priv);
+		gen5_rps_irq_handler(&dev_priv->gt.rps);
 }
 
 static void ivb_display_irq_handler(struct drm_i915_private *dev_priv,
@@ -2486,7 +2031,7 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv,
  * 4 - Process the interrupt(s) that had bits set in the IIRs.
  * 5 - Re-enable Master Interrupt Control.
  */
-static irqreturn_t ironlake_irq_handler(int irq, void *arg)
+static irqreturn_t ilk_irq_handler(int irq, void *arg)
 {
 	struct drm_i915_private *dev_priv = arg;
 	u32 de_iir, gt_iir, de_ier, sde_ier = 0;
@@ -2539,7 +2084,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 		if (pm_iir) {
 			I915_WRITE(GEN6_PMIIR, pm_iir);
 			ret = IRQ_HANDLED;
-			gen6_rps_irq_handler(dev_priv, pm_iir);
+			gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir);
 		}
 	}
 
@@ -2616,10 +2161,16 @@ static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv)
 	u32 mask;
 
 	if (INTEL_GEN(dev_priv) >= 12)
-		/* TODO: Add AUX entries for USBC */
 		return TGL_DE_PORT_AUX_DDIA |
 			TGL_DE_PORT_AUX_DDIB |
-			TGL_DE_PORT_AUX_DDIC;
+			TGL_DE_PORT_AUX_DDIC |
+			TGL_DE_PORT_AUX_USBC1 |
+			TGL_DE_PORT_AUX_USBC2 |
+			TGL_DE_PORT_AUX_USBC3 |
+			TGL_DE_PORT_AUX_USBC4 |
+			TGL_DE_PORT_AUX_USBC5 |
+			TGL_DE_PORT_AUX_USBC6;
+
 
 	mask = GEN8_AUX_CHANNEL_A;
 	if (INTEL_GEN(dev_priv) >= 9)
@@ -2638,7 +2189,9 @@ static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv)
 
 static u32 gen8_de_pipe_fault_mask(struct drm_i915_private *dev_priv)
 {
-	if (INTEL_GEN(dev_priv) >= 9)
+	if (INTEL_GEN(dev_priv) >= 11)
+		return GEN11_DE_PIPE_IRQ_FAULT_ERRORS;
+	else if (INTEL_GEN(dev_priv) >= 9)
 		return GEN9_DE_PIPE_IRQ_FAULT_ERRORS;
 	else
 		return GEN8_DE_PIPE_IRQ_FAULT_ERRORS;
@@ -2655,11 +2208,21 @@ gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir)
 	}
 
 	if (iir & GEN8_DE_EDP_PSR) {
-		u32 psr_iir = I915_READ(EDP_PSR_IIR);
+		u32 psr_iir;
+		i915_reg_t iir_reg;
+
+		if (INTEL_GEN(dev_priv) >= 12)
+			iir_reg = TRANS_PSR_IIR(dev_priv->psr.transcoder);
+		else
+			iir_reg = EDP_PSR_IIR;
+
+		psr_iir = I915_READ(iir_reg);
+		I915_WRITE(iir_reg, psr_iir);
+
+		if (psr_iir)
+			found = true;
 
 		intel_psr_irq_handler(dev_priv, psr_iir);
-		I915_WRITE(EDP_PSR_IIR, psr_iir);
-		found = true;
 	}
 
 	if (!found)
@@ -2780,12 +2343,8 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl)
 			I915_WRITE(SDEIIR, iir);
 			ret = IRQ_HANDLED;
 
-			if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP)
-				tgp_irq_handler(dev_priv, iir);
-			else if (INTEL_PCH_TYPE(dev_priv) >= PCH_MCC)
-				icp_irq_handler(dev_priv, iir, hpd_mcc);
-			else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
-				icp_irq_handler(dev_priv, iir, hpd_icp);
+			if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
+				icp_irq_handler(dev_priv, iir);
 			else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT)
 				spt_irq_handler(dev_priv, iir);
 			else
@@ -2894,9 +2453,30 @@ static inline void gen11_master_intr_enable(void __iomem * const regs)
 	raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ);
 }
 
-static irqreturn_t gen11_irq_handler(int irq, void *arg)
+static void
+gen11_display_irq_handler(struct drm_i915_private *i915)
+{
+	void __iomem * const regs = i915->uncore.regs;
+	const u32 disp_ctl = raw_reg_read(regs, GEN11_DISPLAY_INT_CTL);
+
+	disable_rpm_wakeref_asserts(&i915->runtime_pm);
+	/*
+	 * GEN11_DISPLAY_INT_CTL has same format as GEN8_MASTER_IRQ
+	 * for the display related bits.
+	 */
+	raw_reg_write(regs, GEN11_DISPLAY_INT_CTL, 0x0);
+	gen8_de_irq_handler(i915, disp_ctl);
+	raw_reg_write(regs, GEN11_DISPLAY_INT_CTL,
+		      GEN11_DISPLAY_IRQ_ENABLE);
+
+	enable_rpm_wakeref_asserts(&i915->runtime_pm);
+}
+
+static __always_inline irqreturn_t
+__gen11_irq_handler(struct drm_i915_private * const i915,
+		    u32 (*intr_disable)(void __iomem * const regs),
+		    void (*intr_enable)(void __iomem * const regs))
 {
-	struct drm_i915_private * const i915 = arg;
 	void __iomem * const regs = i915->uncore.regs;
 	struct intel_gt *gt = &i915->gt;
 	u32 master_ctl;
@@ -2905,9 +2485,9 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
 	if (!intel_irqs_enabled(i915))
 		return IRQ_NONE;
 
-	master_ctl = gen11_master_intr_disable(regs);
+	master_ctl = intr_disable(regs);
 	if (!master_ctl) {
-		gen11_master_intr_enable(regs);
+		intr_enable(regs);
 		return IRQ_NONE;
 	}
 
@@ -2915,27 +2495,25 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
 	gen11_gt_irq_handler(gt, master_ctl);
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
-	if (master_ctl & GEN11_DISPLAY_IRQ) {
-		const u32 disp_ctl = raw_reg_read(regs, GEN11_DISPLAY_INT_CTL);
-
-		disable_rpm_wakeref_asserts(&i915->runtime_pm);
-		/*
-		 * GEN11_DISPLAY_INT_CTL has same format as GEN8_MASTER_IRQ
-		 * for the display related bits.
-		 */
-		gen8_de_irq_handler(i915, disp_ctl);
-		enable_rpm_wakeref_asserts(&i915->runtime_pm);
-	}
+	if (master_ctl & GEN11_DISPLAY_IRQ)
+		gen11_display_irq_handler(i915);
 
 	gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl);
 
-	gen11_master_intr_enable(regs);
+	intr_enable(regs);
 
 	gen11_gu_misc_irq_handler(gt, gu_misc_iir);
 
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t gen11_irq_handler(int irq, void *arg)
+{
+	return __gen11_irq_handler(arg,
+				   gen11_master_intr_disable,
+				   gen11_master_intr_enable);
+}
+
 /* Called from drm generic code, passed 'crtc' which
  * we use as a pipe index
  */
@@ -2952,12 +2530,18 @@ int i8xx_enable_vblank(struct drm_crtc *crtc)
 	return 0;
 }
 
-int i945gm_enable_vblank(struct drm_crtc *crtc)
+int i915gm_enable_vblank(struct drm_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 
-	if (dev_priv->i945gm_vblank.enabled++ == 0)
-		schedule_work(&dev_priv->i945gm_vblank.work);
+	/*
+	 * Vblank interrupts fail to wake the device up from C2+.
+	 * Disabling render clock gating during C-states avoids
+	 * the problem. There is a small power cost so we do this
+	 * only when vblank interrupts are actually enabled.
+	 */
+	if (dev_priv->vblank_enabled++ == 0)
+		I915_WRITE(SCPD0, _MASKED_BIT_ENABLE(CSTATE_RENDER_CLOCK_GATE_DISABLE));
 
 	return i8xx_enable_vblank(crtc);
 }
@@ -3030,14 +2614,14 @@ void i8xx_disable_vblank(struct drm_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
-void i945gm_disable_vblank(struct drm_crtc *crtc)
+void i915gm_disable_vblank(struct drm_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 
 	i8xx_disable_vblank(crtc);
 
-	if (--dev_priv->i945gm_vblank.enabled == 0)
-		schedule_work(&dev_priv->i945gm_vblank.work);
+	if (--dev_priv->vblank_enabled == 0)
+		I915_WRITE(SCPD0, _MASKED_BIT_DISABLE(CSTATE_RENDER_CLOCK_GATE_DISABLE));
 }
 
 void i965_disable_vblank(struct drm_crtc *crtc)
@@ -3076,60 +2660,6 @@ void bdw_disable_vblank(struct drm_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
-static void i945gm_vblank_work_func(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, struct drm_i915_private, i945gm_vblank.work);
-
-	/*
-	 * Vblank interrupts fail to wake up the device from C3,
-	 * hence we want to prevent C3 usage while vblank interrupts
-	 * are enabled.
-	 */
-	pm_qos_update_request(&dev_priv->i945gm_vblank.pm_qos,
-			      READ_ONCE(dev_priv->i945gm_vblank.enabled) ?
-			      dev_priv->i945gm_vblank.c3_disable_latency :
-			      PM_QOS_DEFAULT_VALUE);
-}
-
-static int cstate_disable_latency(const char *name)
-{
-	const struct cpuidle_driver *drv;
-	int i;
-
-	drv = cpuidle_get_driver();
-	if (!drv)
-		return 0;
-
-	for (i = 0; i < drv->state_count; i++) {
-		const struct cpuidle_state *state = &drv->states[i];
-
-		if (!strcmp(state->name, name))
-			return state->exit_latency ?
-				state->exit_latency - 1 : 0;
-	}
-
-	return 0;
-}
-
-static void i945gm_vblank_work_init(struct drm_i915_private *dev_priv)
-{
-	INIT_WORK(&dev_priv->i945gm_vblank.work,
-		  i945gm_vblank_work_func);
-
-	dev_priv->i945gm_vblank.c3_disable_latency =
-		cstate_disable_latency("C3");
-	pm_qos_add_request(&dev_priv->i945gm_vblank.pm_qos,
-			   PM_QOS_CPU_DMA_LATENCY,
-			   PM_QOS_DEFAULT_VALUE);
-}
-
-static void i945gm_vblank_work_fini(struct drm_i915_private *dev_priv)
-{
-	cancel_work_sync(&dev_priv->i945gm_vblank.work);
-	pm_qos_remove_request(&dev_priv->i945gm_vblank.pm_qos);
-}
-
 static void ibx_irq_reset(struct drm_i915_private *dev_priv)
 {
 	struct intel_uncore *uncore = &dev_priv->uncore;
@@ -3212,7 +2742,7 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 
 /* drm_dma.h hooks
 */
-static void ironlake_irq_reset(struct drm_i915_private *dev_priv)
+static void ilk_irq_reset(struct drm_i915_private *dev_priv)
 {
 	struct intel_uncore *uncore = &dev_priv->uncore;
 
@@ -3246,7 +2776,7 @@ static void valleyview_irq_reset(struct drm_i915_private *dev_priv)
 static void gen8_irq_reset(struct drm_i915_private *dev_priv)
 {
 	struct intel_uncore *uncore = &dev_priv->uncore;
-	int pipe;
+	enum pipe pipe;
 
 	gen8_master_intr_disable(dev_priv->uncore.regs);
 
@@ -3268,19 +2798,30 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv)
 		ibx_irq_reset(dev_priv);
 }
 
-static void gen11_irq_reset(struct drm_i915_private *dev_priv)
+static void gen11_display_irq_reset(struct drm_i915_private *dev_priv)
 {
 	struct intel_uncore *uncore = &dev_priv->uncore;
-	int pipe;
+	enum pipe pipe;
 
-	gen11_master_intr_disable(dev_priv->uncore.regs);
+	intel_uncore_write(uncore, GEN11_DISPLAY_INT_CTL, 0);
 
-	gen11_gt_irq_reset(&dev_priv->gt);
+	if (INTEL_GEN(dev_priv) >= 12) {
+		enum transcoder trans;
 
-	intel_uncore_write(uncore, GEN11_DISPLAY_INT_CTL, 0);
+		for (trans = TRANSCODER_A; trans <= TRANSCODER_D; trans++) {
+			enum intel_display_power_domain domain;
 
-	intel_uncore_write(uncore, EDP_PSR_IMR, 0xffffffff);
-	intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff);
+			domain = POWER_DOMAIN_TRANSCODER(trans);
+			if (!intel_display_power_is_enabled(dev_priv, domain))
+				continue;
+
+			intel_uncore_write(uncore, TRANS_PSR_IMR(trans), 0xffffffff);
+			intel_uncore_write(uncore, TRANS_PSR_IIR(trans), 0xffffffff);
+		}
+	} else {
+		intel_uncore_write(uncore, EDP_PSR_IMR, 0xffffffff);
+		intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff);
+	}
 
 	for_each_pipe(dev_priv, pipe)
 		if (intel_display_power_is_enabled(dev_priv,
@@ -3290,13 +2831,24 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv)
 	GEN3_IRQ_RESET(uncore, GEN8_DE_PORT_);
 	GEN3_IRQ_RESET(uncore, GEN8_DE_MISC_);
 	GEN3_IRQ_RESET(uncore, GEN11_DE_HPD_);
-	GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_);
-	GEN3_IRQ_RESET(uncore, GEN8_PCU_);
 
 	if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
 		GEN3_IRQ_RESET(uncore, SDE);
 }
 
+static void gen11_irq_reset(struct drm_i915_private *dev_priv)
+{
+	struct intel_uncore *uncore = &dev_priv->uncore;
+
+	gen11_master_intr_disable(dev_priv->uncore.regs);
+
+	gen11_gt_irq_reset(&dev_priv->gt);
+	gen11_display_irq_reset(dev_priv);
+
+	GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_);
+	GEN3_IRQ_RESET(uncore, GEN8_PCU_);
+}
+
 void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv,
 				     u8 pipe_mask)
 {
@@ -3431,42 +2983,46 @@ static void icp_hpd_detection_setup(struct drm_i915_private *dev_priv,
 	}
 }
 
-static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv)
+static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv,
+			      u32 sde_ddi_mask, u32 sde_tc_mask,
+			      u32 ddi_enable_mask, u32 tc_enable_mask,
+			      const u32 *pins)
 {
 	u32 hotplug_irqs, enabled_irqs;
 
-	hotplug_irqs = SDE_DDI_MASK_ICP | SDE_TC_MASK_ICP;
-	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_icp);
+	hotplug_irqs = sde_ddi_mask | sde_tc_mask;
+	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, pins);
+
+	I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ);
 
 	ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs);
 
-	icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK,
-				ICP_TC_HPD_ENABLE_MASK);
+	icp_hpd_detection_setup(dev_priv, ddi_enable_mask, tc_enable_mask);
 }
 
+/*
+ * EHL doesn't need most of gen11_hpd_irq_setup, it's handling only the
+ * equivalent of SDE.
+ */
 static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv)
 {
-	u32 hotplug_irqs, enabled_irqs;
-
-	hotplug_irqs = SDE_DDI_MASK_TGP;
-	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_mcc);
-
-	ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs);
-
-	icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0);
+	icp_hpd_irq_setup(dev_priv,
+			  SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1),
+			  ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1),
+			  hpd_icp);
 }
 
-static void tgp_hpd_irq_setup(struct drm_i915_private *dev_priv)
+/*
+ * JSP behaves exactly the same as MCC above except that port C is mapped to
+ * the DDI-C pins instead of the TC1 pins.  This means we should follow TGP's
+ * masks & tables rather than ICP's masks & tables.
+ */
+static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv)
 {
-	u32 hotplug_irqs, enabled_irqs;
-
-	hotplug_irqs = SDE_DDI_MASK_TGP | SDE_TC_MASK_TGP;
-	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_tgp);
-
-	ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs);
-
-	icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK,
-				TGP_TC_HPD_ENABLE_MASK);
+	icp_hpd_irq_setup(dev_priv,
+			  SDE_DDI_MASK_TGP, 0,
+			  TGP_DDI_HPD_ENABLE_MASK, 0,
+			  hpd_tgp);
 }
 
 static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv)
@@ -3506,9 +3062,13 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	gen11_hpd_detection_setup(dev_priv);
 
 	if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP)
-		tgp_hpd_irq_setup(dev_priv);
+		icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, SDE_TC_MASK_TGP,
+				  TGP_DDI_HPD_ENABLE_MASK,
+				  TGP_TC_HPD_ENABLE_MASK, hpd_tgp);
 	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
-		icp_hpd_irq_setup(dev_priv);
+		icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_ICP, SDE_TC_MASK_ICP,
+				  ICP_DDI_HPD_ENABLE_MASK,
+				  ICP_TC_HPD_ENABLE_MASK, hpd_icp);
 }
 
 static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv)
@@ -3540,6 +3100,9 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv)
 {
 	u32 hotplug_irqs, enabled_irqs;
 
+	if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP)
+		I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ);
+
 	hotplug_irqs = SDE_HOTPLUG_MASK_SPT;
 	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt);
 
@@ -3662,7 +3225,7 @@ static void ibx_irq_postinstall(struct drm_i915_private *dev_priv)
 		spt_hpd_detection_setup(dev_priv);
 }
 
-static void ironlake_irq_postinstall(struct drm_i915_private *dev_priv)
+static void ilk_irq_postinstall(struct drm_i915_private *dev_priv)
 {
 	struct intel_uncore *uncore = &dev_priv->uncore;
 	u32 display_mask, extra_mask;
@@ -3684,7 +3247,6 @@ static void ironlake_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	if (IS_HASWELL(dev_priv)) {
 		gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR);
-		intel_psr_irq_control(dev_priv, dev_priv->psr.debug);
 		display_mask |= DE_EDP_PSR_INT_HSW;
 	}
 
@@ -3794,8 +3356,21 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
 	else if (IS_BROADWELL(dev_priv))
 		de_port_enables |= GEN8_PORT_DP_A_HOTPLUG;
 
-	gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR);
-	intel_psr_irq_control(dev_priv, dev_priv->psr.debug);
+	if (INTEL_GEN(dev_priv) >= 12) {
+		enum transcoder trans;
+
+		for (trans = TRANSCODER_A; trans <= TRANSCODER_D; trans++) {
+			enum intel_display_power_domain domain;
+
+			domain = POWER_DOMAIN_TRANSCODER(trans);
+			if (!intel_display_power_is_enabled(dev_priv, domain))
+				continue;
+
+			gen3_assert_iir_is_zero(uncore, TRANS_PSR_IIR(trans));
+		}
+	} else {
+		gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR);
+	}
 
 	for_each_pipe(dev_priv, pipe) {
 		dev_priv->de_irq_mask[pipe] = ~de_pipe_masked;
@@ -3853,8 +3428,11 @@ static void icp_irq_postinstall(struct drm_i915_private *dev_priv)
 	if (HAS_PCH_TGP(dev_priv))
 		icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK,
 					TGP_TC_HPD_ENABLE_MASK);
-	else if (HAS_PCH_MCC(dev_priv))
+	else if (HAS_PCH_JSP(dev_priv))
 		icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0);
+	else if (HAS_PCH_MCC(dev_priv))
+		icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK,
+					ICP_TC_HPD_ENABLE(PORT_TC1));
 	else
 		icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK,
 					ICP_TC_HPD_ENABLE_MASK);
@@ -4041,7 +3619,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]);
+			intel_engine_signal_breadcrumbs(dev_priv->engine[RCS0]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4146,7 +3724,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		I915_WRITE(GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]);
+			intel_engine_signal_breadcrumbs(dev_priv->engine[RCS0]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4288,10 +3866,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		I915_WRITE(GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]);
+			intel_engine_signal_breadcrumbs(dev_priv->engine[RCS0]);
 
 		if (iir & I915_BSD_USER_INTERRUPT)
-			intel_engine_breadcrumbs_irq(dev_priv->engine[VCS0]);
+			intel_engine_signal_breadcrumbs(dev_priv->engine[VCS0]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4317,17 +3895,11 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 void intel_irq_init(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	int i;
 
-	if (IS_I945GM(dev_priv))
-		i945gm_vblank_work_init(dev_priv);
-
 	intel_hpd_init_work(dev_priv);
 
-	INIT_WORK(&rps->work, gen6_pm_rps_work);
-
-	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
+	INIT_WORK(&dev_priv->l3_parity.error_work, ivb_parity_work);
 	for (i = 0; i < MAX_L3_SLICES; ++i)
 		dev_priv->l3_parity.remap_info[i] = NULL;
 
@@ -4335,33 +3907,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	if (HAS_GT_UC(dev_priv) && INTEL_GEN(dev_priv) < 11)
 		dev_priv->gt.pm_guc_events = GUC_INTR_GUC2HOST << 16;
 
-	/* Let's track the enabled rps events */
-	if (IS_VALLEYVIEW(dev_priv))
-		/* WaGsvRC0ResidencyMethod:vlv */
-		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
-	else
-		dev_priv->pm_rps_events = (GEN6_PM_RP_UP_THRESHOLD |
-					   GEN6_PM_RP_DOWN_THRESHOLD |
-					   GEN6_PM_RP_DOWN_TIMEOUT);
-
-	/* We share the register with other engine */
-	if (INTEL_GEN(dev_priv) > 9)
-		GEM_WARN_ON(dev_priv->pm_rps_events & 0xffff0000);
-
-	rps->pm_intrmsk_mbz = 0;
-
-	/*
-	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
-	 * if GEN6_PM_UP_EI_EXPIRED is masked.
-	 *
-	 * TODO: verify if this can be reproduced on VLV,CHV.
-	 */
-	if (INTEL_GEN(dev_priv) <= 7)
-		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
-
 	dev->vblank_disable_immediate = true;
 
 	/* Most platforms treat the display irq block as an always-on
@@ -4387,8 +3932,9 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 		if (I915_HAS_HOTPLUG(dev_priv))
 			dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
 	} else {
-		if (HAS_PCH_MCC(dev_priv))
-			/* EHL doesn't need most of gen11_hpd_irq_setup */
+		if (HAS_PCH_JSP(dev_priv))
+			dev_priv->display.hpd_irq_setup = jsp_hpd_irq_setup;
+		else if (HAS_PCH_MCC(dev_priv))
 			dev_priv->display.hpd_irq_setup = mcc_hpd_irq_setup;
 		else if (INTEL_GEN(dev_priv) >= 11)
 			dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup;
@@ -4411,9 +3957,6 @@ void intel_irq_fini(struct drm_i915_private *i915)
 {
 	int i;
 
-	if (IS_I945GM(i915))
-		i945gm_vblank_work_fini(i915);
-
 	for (i = 0; i < MAX_L3_SLICES; ++i)
 		kfree(i915->l3_parity.remap_info[i]);
 }
@@ -4437,7 +3980,7 @@ static irq_handler_t intel_irq_handler(struct drm_i915_private *dev_priv)
 		else if (INTEL_GEN(dev_priv) >= 8)
 			return gen8_irq_handler;
 		else
-			return ironlake_irq_handler;
+			return ilk_irq_handler;
 	}
 }
 
@@ -4460,7 +4003,7 @@ static void intel_irq_reset(struct drm_i915_private *dev_priv)
 		else if (INTEL_GEN(dev_priv) >= 8)
 			gen8_irq_reset(dev_priv);
 		else
-			ironlake_irq_reset(dev_priv);
+			ilk_irq_reset(dev_priv);
 	}
 }
 
@@ -4483,7 +4026,7 @@ static void intel_irq_postinstall(struct drm_i915_private *dev_priv)
 		else if (INTEL_GEN(dev_priv) >= 8)
 			gen8_irq_postinstall(dev_priv);
 		else
-			ironlake_irq_postinstall(dev_priv);
+			ilk_irq_postinstall(dev_priv);
 	}
 }
 
@@ -4538,10 +4081,10 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv)
 	int irq = dev_priv->drm.pdev->irq;
 
 	/*
-	 * FIXME we can get called twice during driver load
-	 * error handling due to intel_modeset_cleanup()
-	 * calling us out of sequence. Would be nice if
-	 * it didn't do that...
+	 * FIXME we can get called twice during driver probe
+	 * error handling as well as during driver remove due to
+	 * intel_modeset_driver_remove() calling us out of sequence.
+	 * Would be nice if it didn't do that...
 	 */
 	if (!dev_priv->drm.irq_enabled)
 		return;
diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h
index 8e7e6071777e..812c47a9c2d6 100644
--- a/drivers/gpu/drm/i915/i915_irq.h
+++ b/drivers/gpu/drm/i915/i915_irq.h
@@ -17,14 +17,8 @@ struct drm_device;
 struct drm_display_mode;
 struct drm_i915_private;
 struct intel_crtc;
-struct intel_crtc;
-struct intel_gt;
-struct intel_guc;
 struct intel_uncore;
 
-void gen11_rps_irq_handler(struct intel_gt *gt, u32 pm_iir);
-void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
-
 void intel_irq_init(struct drm_i915_private *dev_priv);
 void intel_irq_fini(struct drm_i915_private *dev_priv);
 int intel_irq_install(struct drm_i915_private *dev_priv);
@@ -106,12 +100,6 @@ void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv,
 				     u8 pipe_mask);
 void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv,
 				     u8 pipe_mask);
-void gen9_reset_guc_interrupts(struct intel_guc *guc);
-void gen9_enable_guc_interrupts(struct intel_guc *guc);
-void gen9_disable_guc_interrupts(struct intel_guc *guc);
-void gen11_reset_guc_interrupts(struct intel_guc *guc);
-void gen11_enable_guc_interrupts(struct intel_guc *guc);
-void gen11_disable_guc_interrupts(struct intel_guc *guc);
 
 bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 			      bool in_vblank_irq, int *vpos, int *hpos,
@@ -122,12 +110,12 @@ u32 i915_get_vblank_counter(struct drm_crtc *crtc);
 u32 g4x_get_vblank_counter(struct drm_crtc *crtc);
 
 int i8xx_enable_vblank(struct drm_crtc *crtc);
-int i945gm_enable_vblank(struct drm_crtc *crtc);
+int i915gm_enable_vblank(struct drm_crtc *crtc);
 int i965_enable_vblank(struct drm_crtc *crtc);
 int ilk_enable_vblank(struct drm_crtc *crtc);
 int bdw_enable_vblank(struct drm_crtc *crtc);
 void i8xx_disable_vblank(struct drm_crtc *crtc);
-void i945gm_disable_vblank(struct drm_crtc *crtc);
+void i915gm_disable_vblank(struct drm_crtc *crtc);
 void i965_disable_vblank(struct drm_crtc *crtc);
 void ilk_disable_vblank(struct drm_crtc *crtc);
 void bdw_disable_vblank(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c
index 07b04b0acb77..fdd550405fd3 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -27,6 +27,12 @@
 
 #include "i915_memcpy.h"
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
+#define CI_BUG_ON(expr) BUG_ON(expr)
+#else
+#define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
+#endif
+
 static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
 
 #ifdef CONFIG_AS_MOVNTDQA
@@ -34,7 +40,6 @@ static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
 {
 	kernel_fpu_begin();
 
-	len >>= 4;
 	while (len >= 4) {
 		asm("movntdqa   (%0), %%xmm0\n"
 		    "movntdqa 16(%0), %%xmm1\n"
@@ -59,6 +64,38 @@ static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
 
 	kernel_fpu_end();
 }
+
+static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
+{
+	kernel_fpu_begin();
+
+	while (len >= 4) {
+		asm("movntdqa   (%0), %%xmm0\n"
+		    "movntdqa 16(%0), %%xmm1\n"
+		    "movntdqa 32(%0), %%xmm2\n"
+		    "movntdqa 48(%0), %%xmm3\n"
+		    "movups %%xmm0,   (%1)\n"
+		    "movups %%xmm1, 16(%1)\n"
+		    "movups %%xmm2, 32(%1)\n"
+		    "movups %%xmm3, 48(%1)\n"
+		    :: "r" (src), "r" (dst) : "memory");
+		src += 64;
+		dst += 64;
+		len -= 4;
+	}
+	while (len--) {
+		asm("movntdqa (%0), %%xmm0\n"
+		    "movups %%xmm0, (%1)\n"
+		    :: "r" (src), "r" (dst) : "memory");
+		src += 16;
+		dst += 16;
+	}
+
+	kernel_fpu_end();
+}
+#else
+static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) {}
+static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len) {}
 #endif
 
 /**
@@ -83,17 +120,47 @@ bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 	if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
 		return false;
 
-#ifdef CONFIG_AS_MOVNTDQA
 	if (static_branch_likely(&has_movntdqa)) {
 		if (likely(len))
-			__memcpy_ntdqa(dst, src, len);
+			__memcpy_ntdqa(dst, src, len >> 4);
 		return true;
 	}
-#endif
 
 	return false;
 }
 
+/**
+ * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC
+ * @dst: destination pointer
+ * @src: source pointer
+ * @len: how many bytes to copy
+ *
+ * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from
+ * @src to @dst using * non-temporal instructions where available, but
+ * accepts that its arguments may not be aligned, but are valid for the
+ * potential 16-byte read past the end.
+ */
+void i915_unaligned_memcpy_from_wc(void *dst, void *src, unsigned long len)
+{
+	unsigned long addr;
+
+	CI_BUG_ON(!i915_has_memcpy_from_wc());
+
+	addr = (unsigned long)src;
+	if (!IS_ALIGNED(addr, 16)) {
+		unsigned long x = min(ALIGN(addr, 16) - addr, len);
+
+		memcpy(dst, src, x);
+
+		len -= x;
+		dst += x;
+		src += x;
+	}
+
+	if (likely(len))
+		__memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16));
+}
+
 void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
 {
 	/*
diff --git a/drivers/gpu/drm/i915/i915_memcpy.h b/drivers/gpu/drm/i915/i915_memcpy.h
index 970d84b16987..e36d30edd987 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.h
+++ b/drivers/gpu/drm/i915/i915_memcpy.h
@@ -11,7 +11,9 @@
 struct drm_i915_private;
 
 void i915_memcpy_init_early(struct drm_i915_private *i915);
+
 bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+void i915_unaligned_memcpy_from_wc(void *dst, void *src, unsigned long len);
 
 /* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment,
  * as well as SSE4.1 support. i915_memcpy_from_wc() will report if it cannot
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
index 318562ce64c0..b6376b25ef63 100644
--- a/drivers/gpu/drm/i915/i915_mm.c
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -33,6 +33,9 @@ struct remap_pfn {
 	struct mm_struct *mm;
 	unsigned long pfn;
 	pgprot_t prot;
+
+	struct sgt_iter sgt;
+	resource_size_t iobase;
 };
 
 static int remap_pfn(pte_t *pte, unsigned long addr, void *data)
@@ -46,6 +49,35 @@ static int remap_pfn(pte_t *pte, unsigned long addr, void *data)
 	return 0;
 }
 
+#define use_dma(io) ((io) != -1)
+
+static inline unsigned long sgt_pfn(const struct remap_pfn *r)
+{
+	if (use_dma(r->iobase))
+		return (r->sgt.dma + r->sgt.curr + r->iobase) >> PAGE_SHIFT;
+	else
+		return r->sgt.pfn + (r->sgt.curr >> PAGE_SHIFT);
+}
+
+static int remap_sg(pte_t *pte, unsigned long addr, void *data)
+{
+	struct remap_pfn *r = data;
+
+	if (GEM_WARN_ON(!r->sgt.pfn))
+		return -EINVAL;
+
+	/* Special PTE are not associated with any struct page */
+	set_pte_at(r->mm, addr, pte,
+		   pte_mkspecial(pfn_pte(sgt_pfn(r), r->prot)));
+	r->pfn++; /* track insertions in case we need to unwind later */
+
+	r->sgt.curr += PAGE_SIZE;
+	if (r->sgt.curr >= r->sgt.max)
+		r->sgt = __sgt_iter(__sg_next(r->sgt.sgp), use_dma(r->iobase));
+
+	return 0;
+}
+
 /**
  * remap_io_mapping - remap an IO mapping to userspace
  * @vma: user vma to map to
@@ -80,3 +112,40 @@ int remap_io_mapping(struct vm_area_struct *vma,
 
 	return 0;
 }
+
+/**
+ * remap_io_sg - remap an IO mapping to userspace
+ * @vma: user vma to map to
+ * @addr: target user address to start at
+ * @size: size of map area
+ * @sgl: Start sg entry
+ * @iobase: Use stored dma address offset by this address or pfn if -1
+ *
+ *  Note: this is only safe if the mm semaphore is held when called.
+ */
+int remap_io_sg(struct vm_area_struct *vma,
+		unsigned long addr, unsigned long size,
+		struct scatterlist *sgl, resource_size_t iobase)
+{
+	struct remap_pfn r = {
+		.mm = vma->vm_mm,
+		.prot = vma->vm_page_prot,
+		.sgt = __sgt_iter(sgl, use_dma(iobase)),
+		.iobase = iobase,
+	};
+	int err;
+
+	/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
+	GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS);
+
+	if (!use_dma(iobase))
+		flush_cache_range(vma, addr, size);
+
+	err = apply_to_page_range(r.mm, addr, size, remap_sg, &r);
+	if (unlikely(err)) {
+		zap_vma_ptes(vma, addr, r.pfn << PAGE_SHIFT);
+		return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 296452f9efe4..1dd1f3652795 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -46,7 +46,8 @@ i915_param_named(modeset, int, 0400,
 
 i915_param_named_unsafe(enable_dc, int, 0400,
 	"Enable power-saving display C-states. "
-	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6)");
+	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; "
+	"3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)");
 
 i915_param_named_unsafe(enable_fbc, int, 0600,
 	"Enable frame buffer compression for power savings "
@@ -165,7 +166,7 @@ i915_param_named_unsafe(enable_dp_mst, bool, 0600,
 	"Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)");
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
-i915_param_named_unsafe(inject_load_failure, uint, 0400,
+i915_param_named_unsafe(inject_probe_failure, uint, 0400,
 	"Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)");
 #endif
 
@@ -178,6 +179,11 @@ i915_param_named(enable_gvt, bool, 0400,
 	"Enable support for Intel GVT-g graphics virtualization host support(default:false)");
 #endif
 
+#if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)
+i915_param_named_unsafe(fake_lmem_start, ulong, 0600,
+	"Fake LMEM start offset (default: 0)");
+#endif
+
 static __always_inline void _print_param(struct drm_printer *p,
 					 const char *name,
 					 const char *type,
@@ -189,6 +195,8 @@ static __always_inline void _print_param(struct drm_printer *p,
 		drm_printf(p, "i915.%s=%d\n", name, *(const int *)x);
 	else if (!__builtin_strcmp(type, "unsigned int"))
 		drm_printf(p, "i915.%s=%u\n", name, *(const unsigned int *)x);
+	else if (!__builtin_strcmp(type, "unsigned long"))
+		drm_printf(p, "i915.%s=%lu\n", name, *(const unsigned long *)x);
 	else if (!__builtin_strcmp(type, "char *"))
 		drm_printf(p, "i915.%s=%s\n", name, *(const char **)x);
 	else
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index d29ade3b7de6..31b88f297fbc 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -61,11 +61,12 @@ struct drm_printer;
 	param(char *, dmc_firmware_path, NULL) \
 	param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO)) \
 	param(int, edp_vswing, 0) \
-	param(int, reset, 2) \
-	param(unsigned int, inject_load_failure, 0) \
+	param(int, reset, 3) \
+	param(unsigned int, inject_probe_failure, 0) \
 	param(int, fastboot, -1) \
 	param(int, enable_dpcd_backlight, 0) \
 	param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE) \
+	param(unsigned long, fake_lmem_start, 0) \
 	/* leave bools at the end to not create holes */ \
 	param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \
 	param(bool, enable_hangcheck, true) \
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 1974e4c78a43..83f01401b8b5 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -23,7 +23,6 @@
  */
 
 #include <linux/console.h>
-#include <linux/vgaarb.h>
 #include <linux/vga_switcheroo.h>
 
 #include <drm/drm_drv.h>
@@ -31,6 +30,7 @@
 #include "display/intel_fbdev.h"
 
 #include "i915_drv.h"
+#include "i915_perf.h"
 #include "i915_globals.h"
 #include "i915_selftest.h"
 
@@ -118,6 +118,14 @@
 		[PIPE_C] = IVB_CURSOR_C_OFFSET, \
 	}
 
+#define TGL_CURSOR_OFFSETS \
+	.cursor_offsets = { \
+		[PIPE_A] = CURSOR_A_OFFSET, \
+		[PIPE_B] = IVB_CURSOR_B_OFFSET, \
+		[PIPE_C] = IVB_CURSOR_C_OFFSET, \
+		[PIPE_D] = TGL_CURSOR_D_OFFSET, \
+	}
+
 #define I9XX_COLORS \
 	.color = { .gamma_lut_size = 256 }
 #define I965_COLORS \
@@ -144,10 +152,13 @@
 #define GEN_DEFAULT_PAGE_SIZES \
 	.page_sizes = I915_GTT_PAGE_SIZE_4K
 
+#define GEN_DEFAULT_REGIONS \
+	.memory_regions = REGION_SMEM | REGION_STOLEN
+
 #define I830_FEATURES \
 	GEN(2), \
 	.is_mobile = 1, \
-	.num_pipes = 2, \
+	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \
 	.display.has_overlay = 1, \
 	.display.cursor_needs_physical = 1, \
 	.display.overlay_needs_physical = 1, \
@@ -161,11 +172,12 @@
 	I9XX_PIPE_OFFSETS, \
 	I9XX_CURSOR_OFFSETS, \
 	I9XX_COLORS, \
-	GEN_DEFAULT_PAGE_SIZES
+	GEN_DEFAULT_PAGE_SIZES, \
+	GEN_DEFAULT_REGIONS
 
 #define I845_FEATURES \
 	GEN(2), \
-	.num_pipes = 1, \
+	.pipe_mask = BIT(PIPE_A), \
 	.display.has_overlay = 1, \
 	.display.overlay_needs_physical = 1, \
 	.display.has_gmch = 1, \
@@ -178,32 +190,33 @@
 	I845_PIPE_OFFSETS, \
 	I845_CURSOR_OFFSETS, \
 	I9XX_COLORS, \
-	GEN_DEFAULT_PAGE_SIZES
+	GEN_DEFAULT_PAGE_SIZES, \
+	GEN_DEFAULT_REGIONS
 
-static const struct intel_device_info intel_i830_info = {
+static const struct intel_device_info i830_info = {
 	I830_FEATURES,
 	PLATFORM(INTEL_I830),
 };
 
-static const struct intel_device_info intel_i845g_info = {
+static const struct intel_device_info i845g_info = {
 	I845_FEATURES,
 	PLATFORM(INTEL_I845G),
 };
 
-static const struct intel_device_info intel_i85x_info = {
+static const struct intel_device_info i85x_info = {
 	I830_FEATURES,
 	PLATFORM(INTEL_I85X),
 	.display.has_fbc = 1,
 };
 
-static const struct intel_device_info intel_i865g_info = {
+static const struct intel_device_info i865g_info = {
 	I845_FEATURES,
 	PLATFORM(INTEL_I865G),
 };
 
 #define GEN3_FEATURES \
 	GEN(3), \
-	.num_pipes = 2, \
+	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \
 	.display.has_gmch = 1, \
 	.gpu_reset_clobbers_display = true, \
 	.engine_mask = BIT(RCS0), \
@@ -212,9 +225,10 @@ static const struct intel_device_info intel_i865g_info = {
 	I9XX_PIPE_OFFSETS, \
 	I9XX_CURSOR_OFFSETS, \
 	I9XX_COLORS, \
-	GEN_DEFAULT_PAGE_SIZES
+	GEN_DEFAULT_PAGE_SIZES, \
+	GEN_DEFAULT_REGIONS
 
-static const struct intel_device_info intel_i915g_info = {
+static const struct intel_device_info i915g_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_I915G),
 	.has_coherent_ggtt = false,
@@ -225,7 +239,7 @@ static const struct intel_device_info intel_i915g_info = {
 	.unfenced_needs_alignment = 1,
 };
 
-static const struct intel_device_info intel_i915gm_info = {
+static const struct intel_device_info i915gm_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_I915GM),
 	.is_mobile = 1,
@@ -238,7 +252,7 @@ static const struct intel_device_info intel_i915gm_info = {
 	.unfenced_needs_alignment = 1,
 };
 
-static const struct intel_device_info intel_i945g_info = {
+static const struct intel_device_info i945g_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_I945G),
 	.display.has_hotplug = 1,
@@ -249,7 +263,7 @@ static const struct intel_device_info intel_i945g_info = {
 	.unfenced_needs_alignment = 1,
 };
 
-static const struct intel_device_info intel_i945gm_info = {
+static const struct intel_device_info i945gm_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_I945GM),
 	.is_mobile = 1,
@@ -263,21 +277,21 @@ static const struct intel_device_info intel_i945gm_info = {
 	.unfenced_needs_alignment = 1,
 };
 
-static const struct intel_device_info intel_g33_info = {
+static const struct intel_device_info g33_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_G33),
 	.display.has_hotplug = 1,
 	.display.has_overlay = 1,
 };
 
-static const struct intel_device_info intel_pineview_g_info = {
+static const struct intel_device_info pnv_g_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_PINEVIEW),
 	.display.has_hotplug = 1,
 	.display.has_overlay = 1,
 };
 
-static const struct intel_device_info intel_pineview_m_info = {
+static const struct intel_device_info pnv_m_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_PINEVIEW),
 	.is_mobile = 1,
@@ -287,7 +301,7 @@ static const struct intel_device_info intel_pineview_m_info = {
 
 #define GEN4_FEATURES \
 	GEN(4), \
-	.num_pipes = 2, \
+	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \
 	.display.has_hotplug = 1, \
 	.display.has_gmch = 1, \
 	.gpu_reset_clobbers_display = true, \
@@ -297,9 +311,10 @@ static const struct intel_device_info intel_pineview_m_info = {
 	I9XX_PIPE_OFFSETS, \
 	I9XX_CURSOR_OFFSETS, \
 	I965_COLORS, \
-	GEN_DEFAULT_PAGE_SIZES
+	GEN_DEFAULT_PAGE_SIZES, \
+	GEN_DEFAULT_REGIONS
 
-static const struct intel_device_info intel_i965g_info = {
+static const struct intel_device_info i965g_info = {
 	GEN4_FEATURES,
 	PLATFORM(INTEL_I965G),
 	.display.has_overlay = 1,
@@ -307,7 +322,7 @@ static const struct intel_device_info intel_i965g_info = {
 	.has_snoop = false,
 };
 
-static const struct intel_device_info intel_i965gm_info = {
+static const struct intel_device_info i965gm_info = {
 	GEN4_FEATURES,
 	PLATFORM(INTEL_I965GM),
 	.is_mobile = 1,
@@ -318,14 +333,14 @@ static const struct intel_device_info intel_i965gm_info = {
 	.has_snoop = false,
 };
 
-static const struct intel_device_info intel_g45_info = {
+static const struct intel_device_info g45_info = {
 	GEN4_FEATURES,
 	PLATFORM(INTEL_G45),
 	.engine_mask = BIT(RCS0) | BIT(VCS0),
 	.gpu_reset_clobbers_display = false,
 };
 
-static const struct intel_device_info intel_gm45_info = {
+static const struct intel_device_info gm45_info = {
 	GEN4_FEATURES,
 	PLATFORM(INTEL_GM45),
 	.is_mobile = 1,
@@ -337,7 +352,7 @@ static const struct intel_device_info intel_gm45_info = {
 
 #define GEN5_FEATURES \
 	GEN(5), \
-	.num_pipes = 2, \
+	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \
 	.display.has_hotplug = 1, \
 	.engine_mask = BIT(RCS0) | BIT(VCS0), \
 	.has_snoop = true, \
@@ -347,14 +362,15 @@ static const struct intel_device_info intel_gm45_info = {
 	I9XX_PIPE_OFFSETS, \
 	I9XX_CURSOR_OFFSETS, \
 	ILK_COLORS, \
-	GEN_DEFAULT_PAGE_SIZES
+	GEN_DEFAULT_PAGE_SIZES, \
+	GEN_DEFAULT_REGIONS
 
-static const struct intel_device_info intel_ironlake_d_info = {
+static const struct intel_device_info ilk_d_info = {
 	GEN5_FEATURES,
 	PLATFORM(INTEL_IRONLAKE),
 };
 
-static const struct intel_device_info intel_ironlake_m_info = {
+static const struct intel_device_info ilk_m_info = {
 	GEN5_FEATURES,
 	PLATFORM(INTEL_IRONLAKE),
 	.is_mobile = 1,
@@ -363,7 +379,7 @@ static const struct intel_device_info intel_ironlake_m_info = {
 
 #define GEN6_FEATURES \
 	GEN(6), \
-	.num_pipes = 2, \
+	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \
 	.display.has_hotplug = 1, \
 	.display.has_fbc = 1, \
 	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
@@ -377,18 +393,19 @@ static const struct intel_device_info intel_ironlake_m_info = {
 	I9XX_PIPE_OFFSETS, \
 	I9XX_CURSOR_OFFSETS, \
 	ILK_COLORS, \
-	GEN_DEFAULT_PAGE_SIZES
+	GEN_DEFAULT_PAGE_SIZES, \
+	GEN_DEFAULT_REGIONS
 
 #define SNB_D_PLATFORM \
 	GEN6_FEATURES, \
 	PLATFORM(INTEL_SANDYBRIDGE)
 
-static const struct intel_device_info intel_sandybridge_d_gt1_info = {
+static const struct intel_device_info snb_d_gt1_info = {
 	SNB_D_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_sandybridge_d_gt2_info = {
+static const struct intel_device_info snb_d_gt2_info = {
 	SNB_D_PLATFORM,
 	.gt = 2,
 };
@@ -399,19 +416,19 @@ static const struct intel_device_info intel_sandybridge_d_gt2_info = {
 	.is_mobile = 1
 
 
-static const struct intel_device_info intel_sandybridge_m_gt1_info = {
+static const struct intel_device_info snb_m_gt1_info = {
 	SNB_M_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_sandybridge_m_gt2_info = {
+static const struct intel_device_info snb_m_gt2_info = {
 	SNB_M_PLATFORM,
 	.gt = 2,
 };
 
 #define GEN7_FEATURES  \
 	GEN(7), \
-	.num_pipes = 3, \
+	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \
 	.display.has_hotplug = 1, \
 	.display.has_fbc = 1, \
 	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
@@ -425,19 +442,20 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
 	IVB_PIPE_OFFSETS, \
 	IVB_CURSOR_OFFSETS, \
 	IVB_COLORS, \
-	GEN_DEFAULT_PAGE_SIZES
+	GEN_DEFAULT_PAGE_SIZES, \
+	GEN_DEFAULT_REGIONS
 
 #define IVB_D_PLATFORM \
 	GEN7_FEATURES, \
 	PLATFORM(INTEL_IVYBRIDGE), \
 	.has_l3_dpf = 1
 
-static const struct intel_device_info intel_ivybridge_d_gt1_info = {
+static const struct intel_device_info ivb_d_gt1_info = {
 	IVB_D_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_ivybridge_d_gt2_info = {
+static const struct intel_device_info ivb_d_gt2_info = {
 	IVB_D_PLATFORM,
 	.gt = 2,
 };
@@ -448,29 +466,29 @@ static const struct intel_device_info intel_ivybridge_d_gt2_info = {
 	.is_mobile = 1, \
 	.has_l3_dpf = 1
 
-static const struct intel_device_info intel_ivybridge_m_gt1_info = {
+static const struct intel_device_info ivb_m_gt1_info = {
 	IVB_M_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_ivybridge_m_gt2_info = {
+static const struct intel_device_info ivb_m_gt2_info = {
 	IVB_M_PLATFORM,
 	.gt = 2,
 };
 
-static const struct intel_device_info intel_ivybridge_q_info = {
+static const struct intel_device_info ivb_q_info = {
 	GEN7_FEATURES,
 	PLATFORM(INTEL_IVYBRIDGE),
 	.gt = 2,
-	.num_pipes = 0, /* legal, last one wins */
+	.pipe_mask = 0, /* legal, last one wins */
 	.has_l3_dpf = 1,
 };
 
-static const struct intel_device_info intel_valleyview_info = {
+static const struct intel_device_info vlv_info = {
 	PLATFORM(INTEL_VALLEYVIEW),
 	GEN(7),
 	.is_lp = 1,
-	.num_pipes = 2,
+	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B),
 	.has_runtime_pm = 1,
 	.has_rc6 = 1,
 	.has_rps = true,
@@ -486,6 +504,7 @@ static const struct intel_device_info intel_valleyview_info = {
 	I9XX_CURSOR_OFFSETS,
 	I965_COLORS,
 	GEN_DEFAULT_PAGE_SIZES,
+	GEN_DEFAULT_REGIONS,
 };
 
 #define G75_FEATURES  \
@@ -504,17 +523,17 @@ static const struct intel_device_info intel_valleyview_info = {
 	PLATFORM(INTEL_HASWELL), \
 	.has_l3_dpf = 1
 
-static const struct intel_device_info intel_haswell_gt1_info = {
+static const struct intel_device_info hsw_gt1_info = {
 	HSW_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_haswell_gt2_info = {
+static const struct intel_device_info hsw_gt2_info = {
 	HSW_PLATFORM,
 	.gt = 2,
 };
 
-static const struct intel_device_info intel_haswell_gt3_info = {
+static const struct intel_device_info hsw_gt3_info = {
 	HSW_PLATFORM,
 	.gt = 3,
 };
@@ -532,17 +551,17 @@ static const struct intel_device_info intel_haswell_gt3_info = {
 	GEN8_FEATURES, \
 	PLATFORM(INTEL_BROADWELL)
 
-static const struct intel_device_info intel_broadwell_gt1_info = {
+static const struct intel_device_info bdw_gt1_info = {
 	BDW_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_broadwell_gt2_info = {
+static const struct intel_device_info bdw_gt2_info = {
 	BDW_PLATFORM,
 	.gt = 2,
 };
 
-static const struct intel_device_info intel_broadwell_rsvd_info = {
+static const struct intel_device_info bdw_rsvd_info = {
 	BDW_PLATFORM,
 	.gt = 3,
 	/* According to the device ID those devices are GT3, they were
@@ -550,17 +569,17 @@ static const struct intel_device_info intel_broadwell_rsvd_info = {
 	 */
 };
 
-static const struct intel_device_info intel_broadwell_gt3_info = {
+static const struct intel_device_info bdw_gt3_info = {
 	BDW_PLATFORM,
 	.gt = 3,
 	.engine_mask =
 		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1),
 };
 
-static const struct intel_device_info intel_cherryview_info = {
+static const struct intel_device_info chv_info = {
 	PLATFORM(INTEL_CHERRYVIEW),
 	GEN(8),
-	.num_pipes = 3,
+	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C),
 	.display.has_hotplug = 1,
 	.is_lp = 1,
 	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0),
@@ -570,7 +589,7 @@ static const struct intel_device_info intel_cherryview_info = {
 	.has_rps = true,
 	.has_logical_ring_contexts = 1,
 	.display.has_gmch = 1,
-	.ppgtt_type = INTEL_PPGTT_FULL,
+	.ppgtt_type = INTEL_PPGTT_ALIASING,
 	.ppgtt_size = 32,
 	.has_reset_engine = 1,
 	.has_snoop = true,
@@ -580,6 +599,7 @@ static const struct intel_device_info intel_cherryview_info = {
 	CHV_CURSOR_OFFSETS,
 	CHV_COLORS,
 	GEN_DEFAULT_PAGE_SIZES,
+	GEN_DEFAULT_REGIONS,
 };
 
 #define GEN9_DEFAULT_PAGE_SIZES \
@@ -593,6 +613,7 @@ static const struct intel_device_info intel_cherryview_info = {
 	.has_logical_ring_preemption = 1, \
 	.display.has_csr = 1, \
 	.has_gt_uc = 1, \
+	.display.has_hdcp = 1, \
 	.display.has_ipc = 1, \
 	.ddb_size = 896
 
@@ -600,12 +621,12 @@ static const struct intel_device_info intel_cherryview_info = {
 	GEN9_FEATURES, \
 	PLATFORM(INTEL_SKYLAKE)
 
-static const struct intel_device_info intel_skylake_gt1_info = {
+static const struct intel_device_info skl_gt1_info = {
 	SKL_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_skylake_gt2_info = {
+static const struct intel_device_info skl_gt2_info = {
 	SKL_PLATFORM,
 	.gt = 2,
 };
@@ -616,12 +637,12 @@ static const struct intel_device_info intel_skylake_gt2_info = {
 		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)
 
 
-static const struct intel_device_info intel_skylake_gt3_info = {
+static const struct intel_device_info skl_gt3_info = {
 	SKL_GT3_PLUS_PLATFORM,
 	.gt = 3,
 };
 
-static const struct intel_device_info intel_skylake_gt4_info = {
+static const struct intel_device_info skl_gt4_info = {
 	SKL_GT3_PLUS_PLATFORM,
 	.gt = 4,
 };
@@ -631,11 +652,12 @@ static const struct intel_device_info intel_skylake_gt4_info = {
 	.is_lp = 1, \
 	.display.has_hotplug = 1, \
 	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \
-	.num_pipes = 3, \
+	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \
 	.has_64bit_reloc = 1, \
 	.display.has_ddi = 1, \
 	.has_fpga_dbg = 1, \
 	.display.has_fbc = 1, \
+	.display.has_hdcp = 1, \
 	.display.has_psr = 1, \
 	.has_runtime_pm = 1, \
 	.display.has_csr = 1, \
@@ -654,15 +676,16 @@ static const struct intel_device_info intel_skylake_gt4_info = {
 	HSW_PIPE_OFFSETS, \
 	IVB_CURSOR_OFFSETS, \
 	IVB_COLORS, \
-	GEN9_DEFAULT_PAGE_SIZES
+	GEN9_DEFAULT_PAGE_SIZES, \
+	GEN_DEFAULT_REGIONS
 
-static const struct intel_device_info intel_broxton_info = {
+static const struct intel_device_info bxt_info = {
 	GEN9_LP_FEATURES,
 	PLATFORM(INTEL_BROXTON),
 	.ddb_size = 512,
 };
 
-static const struct intel_device_info intel_geminilake_info = {
+static const struct intel_device_info glk_info = {
 	GEN9_LP_FEATURES,
 	PLATFORM(INTEL_GEMINILAKE),
 	.ddb_size = 1024,
@@ -673,17 +696,17 @@ static const struct intel_device_info intel_geminilake_info = {
 	GEN9_FEATURES, \
 	PLATFORM(INTEL_KABYLAKE)
 
-static const struct intel_device_info intel_kabylake_gt1_info = {
+static const struct intel_device_info kbl_gt1_info = {
 	KBL_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_kabylake_gt2_info = {
+static const struct intel_device_info kbl_gt2_info = {
 	KBL_PLATFORM,
 	.gt = 2,
 };
 
-static const struct intel_device_info intel_kabylake_gt3_info = {
+static const struct intel_device_info kbl_gt3_info = {
 	KBL_PLATFORM,
 	.gt = 3,
 	.engine_mask =
@@ -694,17 +717,17 @@ static const struct intel_device_info intel_kabylake_gt3_info = {
 	GEN9_FEATURES, \
 	PLATFORM(INTEL_COFFEELAKE)
 
-static const struct intel_device_info intel_coffeelake_gt1_info = {
+static const struct intel_device_info cfl_gt1_info = {
 	CFL_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_coffeelake_gt2_info = {
+static const struct intel_device_info cfl_gt2_info = {
 	CFL_PLATFORM,
 	.gt = 2,
 };
 
-static const struct intel_device_info intel_coffeelake_gt3_info = {
+static const struct intel_device_info cfl_gt3_info = {
 	CFL_PLATFORM,
 	.gt = 3,
 	.engine_mask =
@@ -715,10 +738,11 @@ static const struct intel_device_info intel_coffeelake_gt3_info = {
 	GEN9_FEATURES, \
 	GEN(10), \
 	.ddb_size = 1024, \
+	.display.has_dsc = 1, \
 	.has_coherent_ggtt = false, \
 	GLK_COLORS
 
-static const struct intel_device_info intel_cannonlake_info = {
+static const struct intel_device_info cnl_info = {
 	GEN10_FEATURES,
 	PLATFORM(INTEL_CANNONLAKE),
 	.gt = 2,
@@ -753,14 +777,14 @@ static const struct intel_device_info intel_cannonlake_info = {
 	.has_logical_ring_elsq = 1, \
 	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
 
-static const struct intel_device_info intel_icelake_11_info = {
+static const struct intel_device_info icl_info = {
 	GEN11_FEATURES,
 	PLATFORM(INTEL_ICELAKE),
 	.engine_mask =
 		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
 };
 
-static const struct intel_device_info intel_elkhartlake_info = {
+static const struct intel_device_info ehl_info = {
 	GEN11_FEATURES,
 	PLATFORM(INTEL_ELKHARTLAKE),
 	.require_force_probe = 1,
@@ -787,18 +811,25 @@ static const struct intel_device_info intel_elkhartlake_info = {
 		[TRANSCODER_DSI_0] = TRANSCODER_DSI0_OFFSET, \
 		[TRANSCODER_DSI_1] = TRANSCODER_DSI1_OFFSET, \
 	}, \
-	.has_global_mocs = 1
+	TGL_CURSOR_OFFSETS, \
+	.has_global_mocs = 1, \
+	.display.has_dsb = 1
 
-static const struct intel_device_info intel_tigerlake_12_info = {
+static const struct intel_device_info tgl_info = {
 	GEN12_FEATURES,
 	PLATFORM(INTEL_TIGERLAKE),
-	.num_pipes = 4,
+	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D),
 	.require_force_probe = 1,
 	.display.has_modular_fia = 1,
 	.engine_mask =
 		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
+	.has_rps = false, /* XXX disabled for debugging */
 };
 
+#define GEN12_DGFX_FEATURES \
+	GEN12_FEATURES, \
+	.is_dgfx = 1
+
 #undef GEN
 #undef PLATFORM
 
@@ -809,68 +840,70 @@ static const struct intel_device_info intel_tigerlake_12_info = {
  * PCI ID matches, otherwise we'll use the wrong info struct above.
  */
 static const struct pci_device_id pciidlist[] = {
-	INTEL_I830_IDS(&intel_i830_info),
-	INTEL_I845G_IDS(&intel_i845g_info),
-	INTEL_I85X_IDS(&intel_i85x_info),
-	INTEL_I865G_IDS(&intel_i865g_info),
-	INTEL_I915G_IDS(&intel_i915g_info),
-	INTEL_I915GM_IDS(&intel_i915gm_info),
-	INTEL_I945G_IDS(&intel_i945g_info),
-	INTEL_I945GM_IDS(&intel_i945gm_info),
-	INTEL_I965G_IDS(&intel_i965g_info),
-	INTEL_G33_IDS(&intel_g33_info),
-	INTEL_I965GM_IDS(&intel_i965gm_info),
-	INTEL_GM45_IDS(&intel_gm45_info),
-	INTEL_G45_IDS(&intel_g45_info),
-	INTEL_PINEVIEW_G_IDS(&intel_pineview_g_info),
-	INTEL_PINEVIEW_M_IDS(&intel_pineview_m_info),
-	INTEL_IRONLAKE_D_IDS(&intel_ironlake_d_info),
-	INTEL_IRONLAKE_M_IDS(&intel_ironlake_m_info),
-	INTEL_SNB_D_GT1_IDS(&intel_sandybridge_d_gt1_info),
-	INTEL_SNB_D_GT2_IDS(&intel_sandybridge_d_gt2_info),
-	INTEL_SNB_M_GT1_IDS(&intel_sandybridge_m_gt1_info),
-	INTEL_SNB_M_GT2_IDS(&intel_sandybridge_m_gt2_info),
-	INTEL_IVB_Q_IDS(&intel_ivybridge_q_info), /* must be first IVB */
-	INTEL_IVB_M_GT1_IDS(&intel_ivybridge_m_gt1_info),
-	INTEL_IVB_M_GT2_IDS(&intel_ivybridge_m_gt2_info),
-	INTEL_IVB_D_GT1_IDS(&intel_ivybridge_d_gt1_info),
-	INTEL_IVB_D_GT2_IDS(&intel_ivybridge_d_gt2_info),
-	INTEL_HSW_GT1_IDS(&intel_haswell_gt1_info),
-	INTEL_HSW_GT2_IDS(&intel_haswell_gt2_info),
-	INTEL_HSW_GT3_IDS(&intel_haswell_gt3_info),
-	INTEL_VLV_IDS(&intel_valleyview_info),
-	INTEL_BDW_GT1_IDS(&intel_broadwell_gt1_info),
-	INTEL_BDW_GT2_IDS(&intel_broadwell_gt2_info),
-	INTEL_BDW_GT3_IDS(&intel_broadwell_gt3_info),
-	INTEL_BDW_RSVD_IDS(&intel_broadwell_rsvd_info),
-	INTEL_CHV_IDS(&intel_cherryview_info),
-	INTEL_SKL_GT1_IDS(&intel_skylake_gt1_info),
-	INTEL_SKL_GT2_IDS(&intel_skylake_gt2_info),
-	INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info),
-	INTEL_SKL_GT4_IDS(&intel_skylake_gt4_info),
-	INTEL_BXT_IDS(&intel_broxton_info),
-	INTEL_GLK_IDS(&intel_geminilake_info),
-	INTEL_KBL_GT1_IDS(&intel_kabylake_gt1_info),
-	INTEL_KBL_GT2_IDS(&intel_kabylake_gt2_info),
-	INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info),
-	INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info),
-	INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info),
-	INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info),
-	INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_CFL_H_GT1_IDS(&intel_coffeelake_gt1_info),
-	INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info),
-	INTEL_WHL_U_GT1_IDS(&intel_coffeelake_gt1_info),
-	INTEL_WHL_U_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_AML_CFL_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_WHL_U_GT3_IDS(&intel_coffeelake_gt3_info),
-	INTEL_CML_GT1_IDS(&intel_coffeelake_gt1_info),
-	INTEL_CML_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_CNL_IDS(&intel_cannonlake_info),
-	INTEL_ICL_11_IDS(&intel_icelake_11_info),
-	INTEL_EHL_IDS(&intel_elkhartlake_info),
-	INTEL_TGL_12_IDS(&intel_tigerlake_12_info),
+	INTEL_I830_IDS(&i830_info),
+	INTEL_I845G_IDS(&i845g_info),
+	INTEL_I85X_IDS(&i85x_info),
+	INTEL_I865G_IDS(&i865g_info),
+	INTEL_I915G_IDS(&i915g_info),
+	INTEL_I915GM_IDS(&i915gm_info),
+	INTEL_I945G_IDS(&i945g_info),
+	INTEL_I945GM_IDS(&i945gm_info),
+	INTEL_I965G_IDS(&i965g_info),
+	INTEL_G33_IDS(&g33_info),
+	INTEL_I965GM_IDS(&i965gm_info),
+	INTEL_GM45_IDS(&gm45_info),
+	INTEL_G45_IDS(&g45_info),
+	INTEL_PINEVIEW_G_IDS(&pnv_g_info),
+	INTEL_PINEVIEW_M_IDS(&pnv_m_info),
+	INTEL_IRONLAKE_D_IDS(&ilk_d_info),
+	INTEL_IRONLAKE_M_IDS(&ilk_m_info),
+	INTEL_SNB_D_GT1_IDS(&snb_d_gt1_info),
+	INTEL_SNB_D_GT2_IDS(&snb_d_gt2_info),
+	INTEL_SNB_M_GT1_IDS(&snb_m_gt1_info),
+	INTEL_SNB_M_GT2_IDS(&snb_m_gt2_info),
+	INTEL_IVB_Q_IDS(&ivb_q_info), /* must be first IVB */
+	INTEL_IVB_M_GT1_IDS(&ivb_m_gt1_info),
+	INTEL_IVB_M_GT2_IDS(&ivb_m_gt2_info),
+	INTEL_IVB_D_GT1_IDS(&ivb_d_gt1_info),
+	INTEL_IVB_D_GT2_IDS(&ivb_d_gt2_info),
+	INTEL_HSW_GT1_IDS(&hsw_gt1_info),
+	INTEL_HSW_GT2_IDS(&hsw_gt2_info),
+	INTEL_HSW_GT3_IDS(&hsw_gt3_info),
+	INTEL_VLV_IDS(&vlv_info),
+	INTEL_BDW_GT1_IDS(&bdw_gt1_info),
+	INTEL_BDW_GT2_IDS(&bdw_gt2_info),
+	INTEL_BDW_GT3_IDS(&bdw_gt3_info),
+	INTEL_BDW_RSVD_IDS(&bdw_rsvd_info),
+	INTEL_CHV_IDS(&chv_info),
+	INTEL_SKL_GT1_IDS(&skl_gt1_info),
+	INTEL_SKL_GT2_IDS(&skl_gt2_info),
+	INTEL_SKL_GT3_IDS(&skl_gt3_info),
+	INTEL_SKL_GT4_IDS(&skl_gt4_info),
+	INTEL_BXT_IDS(&bxt_info),
+	INTEL_GLK_IDS(&glk_info),
+	INTEL_KBL_GT1_IDS(&kbl_gt1_info),
+	INTEL_KBL_GT2_IDS(&kbl_gt2_info),
+	INTEL_KBL_GT3_IDS(&kbl_gt3_info),
+	INTEL_KBL_GT4_IDS(&kbl_gt3_info),
+	INTEL_AML_KBL_GT2_IDS(&kbl_gt2_info),
+	INTEL_CFL_S_GT1_IDS(&cfl_gt1_info),
+	INTEL_CFL_S_GT2_IDS(&cfl_gt2_info),
+	INTEL_CFL_H_GT1_IDS(&cfl_gt1_info),
+	INTEL_CFL_H_GT2_IDS(&cfl_gt2_info),
+	INTEL_CFL_U_GT2_IDS(&cfl_gt2_info),
+	INTEL_CFL_U_GT3_IDS(&cfl_gt3_info),
+	INTEL_WHL_U_GT1_IDS(&cfl_gt1_info),
+	INTEL_WHL_U_GT2_IDS(&cfl_gt2_info),
+	INTEL_AML_CFL_GT2_IDS(&cfl_gt2_info),
+	INTEL_WHL_U_GT3_IDS(&cfl_gt3_info),
+	INTEL_CML_GT1_IDS(&cfl_gt1_info),
+	INTEL_CML_GT2_IDS(&cfl_gt2_info),
+	INTEL_CML_U_GT1_IDS(&cfl_gt1_info),
+	INTEL_CML_U_GT2_IDS(&cfl_gt2_info),
+	INTEL_CNL_IDS(&cnl_info),
+	INTEL_ICL_11_IDS(&icl_info),
+	INTEL_EHL_IDS(&ehl_info),
+	INTEL_TGL_12_IDS(&tgl_info),
 	{0, 0, 0}
 };
 MODULE_DEVICE_TABLE(pci, pciidlist);
@@ -973,6 +1006,12 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return err > 0 ? -ENOTTY : err;
 	}
 
+	err = i915_perf_selftests(pdev);
+	if (err) {
+		i915_pci_remove(pdev);
+		return err > 0 ? -ENOTTY : err;
+	}
+
 	return 0;
 }
 
@@ -1015,7 +1054,12 @@ static int __init i915_init(void)
 		return 0;
 	}
 
-	return pci_register_driver(&i915_pci_driver);
+	err = pci_register_driver(&i915_pci_driver);
+	if (err)
+		return err;
+
+	i915_perf_sysctl_register();
+	return 0;
 }
 
 static void __exit i915_exit(void)
@@ -1023,6 +1067,7 @@ static void __exit i915_exit(void)
 	if (!i915_pci_driver.driver.owner)
 		return;
 
+	i915_perf_sysctl_unregister();
 	pci_unregister_driver(&i915_pci_driver);
 	i915_globals_exit();
 }
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e42b86827d6b..0f556d80ba36 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -196,8 +196,11 @@
 #include <linux/uuid.h>
 
 #include "gem/i915_gem_context.h"
-#include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_user.h"
+#include "gt/intel_gt.h"
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_perf.h"
@@ -215,6 +218,7 @@
 #include "oa/i915_oa_cflgt3.h"
 #include "oa/i915_oa_cnl.h"
 #include "oa/i915_oa_icl.h"
+#include "oa/i915_oa_tgl.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
  * is currently generally designed assuming the largest 16M size is used such
@@ -291,6 +295,7 @@ static u32 i915_perf_stream_paranoid = true;
 
 /* On Gen8+ automatically triggered OA reports include a 'reason' field... */
 #define OAREPORT_REASON_MASK           0x3f
+#define OAREPORT_REASON_MASK_EXTENDED  0x7f
 #define OAREPORT_REASON_SHIFT          19
 #define OAREPORT_REASON_TIMER          (1<<0)
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
@@ -336,17 +341,24 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
 	[I915_OA_FORMAT_C4_B8]		    = { 7, 64 },
 };
 
+static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
+	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
+};
+
 #define SAMPLE_OA_REPORT      (1<<0)
 
 /**
  * struct perf_open_properties - for validated properties given to open a stream
  * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
  * @single_context: Whether a single or all gpu contexts should be monitored
+ * @hold_preemption: Whether the preemption is disabled for the filtered
+ *                   context
  * @ctx_handle: A gem ctx handle for use with @single_context
  * @metrics_set: An ID for an OA unit metric set advertised via sysfs
  * @oa_format: An OA unit HW report format
  * @oa_periodic: Whether to enable periodic OA unit sampling
  * @oa_period_exponent: The OA unit sampling period is derived from this
+ * @engine: The engine (typically rcs0) being monitored by the OA unit
  *
  * As read_properties_unlocked() enumerates and validates the properties given
  * to open a stream of metrics the configuration is built up in the structure
@@ -356,6 +368,7 @@ struct perf_open_properties {
 	u32 sample_flags;
 
 	u64 single_context:1;
+	u64 hold_preemption:1;
 	u64 ctx_handle;
 
 	/* OA sampling state */
@@ -363,69 +376,76 @@ struct perf_open_properties {
 	int oa_format;
 	bool oa_periodic;
 	int oa_period_exponent;
+
+	struct intel_engine_cs *engine;
 };
 
+struct i915_oa_config_bo {
+	struct llist_node node;
+
+	struct i915_oa_config *oa_config;
+	struct i915_vma *vma;
+};
+
+static struct ctl_table_header *sysctl_header;
+
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
-static void free_oa_config(struct drm_i915_private *dev_priv,
-			   struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
-	if (!PTR_ERR(oa_config->flex_regs))
-		kfree(oa_config->flex_regs);
-	if (!PTR_ERR(oa_config->b_counter_regs))
-		kfree(oa_config->b_counter_regs);
-	if (!PTR_ERR(oa_config->mux_regs))
-		kfree(oa_config->mux_regs);
-	kfree(oa_config);
-}
+	struct i915_oa_config *oa_config =
+		container_of(ref, typeof(*oa_config), ref);
 
-static void put_oa_config(struct drm_i915_private *dev_priv,
-			  struct i915_oa_config *oa_config)
-{
-	if (!atomic_dec_and_test(&oa_config->ref_count))
-		return;
+	kfree(oa_config->flex_regs);
+	kfree(oa_config->b_counter_regs);
+	kfree(oa_config->mux_regs);
 
-	free_oa_config(dev_priv, oa_config);
+	kfree_rcu(oa_config, rcu);
 }
 
-static int get_oa_config(struct drm_i915_private *dev_priv,
-			 int metrics_set,
-			 struct i915_oa_config **out_config)
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
 {
-	int ret;
+	struct i915_oa_config *oa_config;
 
-	if (metrics_set == 1) {
-		*out_config = &dev_priv->perf.test_config;
-		atomic_inc(&dev_priv->perf.test_config.ref_count);
-		return 0;
-	}
+	rcu_read_lock();
+	if (metrics_set == 1)
+		oa_config = &perf->test_config;
+	else
+		oa_config = idr_find(&perf->metrics_idr, metrics_set);
+	if (oa_config)
+		oa_config = i915_oa_config_get(oa_config);
+	rcu_read_unlock();
 
-	ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
-	if (ret)
-		return ret;
+	return oa_config;
+}
 
-	*out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
-	if (!*out_config)
-		ret = -EINVAL;
-	else
-		atomic_inc(&(*out_config)->ref_count);
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+	i915_oa_config_put(oa_bo->oa_config);
+	i915_vma_put(oa_bo->vma);
+	kfree(oa_bo);
+}
 
-	mutex_unlock(&dev_priv->perf.metrics_lock);
+static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
+{
+	struct intel_uncore *uncore = stream->uncore;
 
-	return ret;
+	return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
+	       GEN12_OAG_OATAILPTR_MASK;
 }
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 
-	return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
+	return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
 }
 
 static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
-	u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
+	struct intel_uncore *uncore = stream->uncore;
+	u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
 
 	return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
 }
@@ -456,7 +476,6 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
  */
 static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
 	int report_size = stream->oa_buffer.format_size;
 	unsigned long flags;
 	unsigned int aged_idx;
@@ -479,7 +498,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 	aged_tail = stream->oa_buffer.tails[aged_idx].offset;
 	aging_tail = stream->oa_buffer.tails[!aged_idx].offset;
 
-	hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream);
+	hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
 
 	/* The tail pointer increases in 64 byte increments,
 	 * not in report_size steps...
@@ -536,7 +555,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 				aging_tail = hw_tail;
 			stream->oa_buffer.aging_timestamp = now;
 		} else {
-			DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
+			DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %x\n",
 				  hw_tail);
 		}
 	}
@@ -655,7 +674,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 				  size_t count,
 				  size_t *offset)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 	int report_size = stream->oa_buffer.format_size;
 	u8 *oa_buf_base = stream->oa_buffer.vaddr;
 	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
@@ -738,9 +757,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		 * it to userspace...
 		 */
 		reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
-			  OAREPORT_REASON_MASK);
+			  (IS_GEN(stream->perf->i915, 12) ?
+			   OAREPORT_REASON_MASK_EXTENDED :
+			   OAREPORT_REASON_MASK));
 		if (reason == 0) {
-			if (__ratelimit(&dev_priv->perf.spurious_report_rs))
+			if (__ratelimit(&stream->perf->spurious_report_rs))
 				DRM_NOTE("Skipping spurious, invalid OA report\n");
 			continue;
 		}
@@ -755,7 +776,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		 * Note: that we don't clear the valid_ctx_bit so userspace can
 		 * understand that the ID has been squashed by the kernel.
 		 */
-		if (!(report32[0] & dev_priv->perf.gen8_valid_ctx_bit))
+		if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
+		    INTEL_GEN(stream->perf->i915) <= 11)
 			ctx_id = report32[2] = INVALID_CTX_ID;
 
 		/*
@@ -789,7 +811,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		 * switches since it's not-uncommon for periodic samples to
 		 * identify a switch before any 'context switch' report.
 		 */
-		if (!dev_priv->perf.exclusive_stream->ctx ||
+		if (!stream->perf->exclusive_stream->ctx ||
 		    stream->specific_ctx_id == ctx_id ||
 		    stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
 		    reason & OAREPORT_REASON_CTX_SWITCH) {
@@ -798,7 +820,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 			 * While filtering for a single context we avoid
 			 * leaking the IDs of other contexts.
 			 */
-			if (dev_priv->perf.exclusive_stream->ctx &&
+			if (stream->perf->exclusive_stream->ctx &&
 			    stream->specific_ctx_id != ctx_id) {
 				report32[2] = INVALID_CTX_ID;
 			}
@@ -822,6 +844,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	}
 
 	if (start_offset != *offset) {
+		i915_reg_t oaheadptr;
+
+		oaheadptr = IS_GEN(stream->perf->i915, 12) ?
+			    GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
+
 		spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
 
 		/*
@@ -829,8 +856,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		 * relative to oa_buf_base so put back here...
 		 */
 		head += gtt_offset;
-
-		I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
+		intel_uncore_write(uncore, oaheadptr,
+				   head & GEN12_OAG_OAHEADPTR_MASK);
 		stream->oa_buffer.head = head;
 
 		spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -864,14 +891,18 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
 			size_t count,
 			size_t *offset)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 	u32 oastatus;
+	i915_reg_t oastatus_reg;
 	int ret;
 
 	if (WARN_ON(!stream->oa_buffer.vaddr))
 		return -EIO;
 
-	oastatus = I915_READ(GEN8_OASTATUS);
+	oastatus_reg = IS_GEN(stream->perf->i915, 12) ?
+		       GEN12_OAG_OASTATUS : GEN8_OASTATUS;
+
+	oastatus = intel_uncore_read(uncore, oastatus_reg);
 
 	/*
 	 * We treat OABUFFER_OVERFLOW as a significant error:
@@ -896,14 +927,14 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
 		DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
 			  stream->period_exponent);
 
-		dev_priv->perf.ops.oa_disable(stream);
-		dev_priv->perf.ops.oa_enable(stream);
+		stream->perf->ops.oa_disable(stream);
+		stream->perf->ops.oa_enable(stream);
 
 		/*
 		 * Note: .oa_enable() is expected to re-init the oabuffer and
 		 * reset GEN8_OASTATUS for us
 		 */
-		oastatus = I915_READ(GEN8_OASTATUS);
+		oastatus = intel_uncore_read(uncore, oastatus_reg);
 	}
 
 	if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
@@ -911,8 +942,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
 				       DRM_I915_PERF_RECORD_OA_REPORT_LOST);
 		if (ret)
 			return ret;
-		I915_WRITE(GEN8_OASTATUS,
-			   oastatus & ~GEN8_OASTATUS_REPORT_LOST);
+		intel_uncore_write(uncore, oastatus_reg,
+				   oastatus & ~GEN8_OASTATUS_REPORT_LOST);
 	}
 
 	return gen8_append_oa_reports(stream, buf, count, offset);
@@ -943,7 +974,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 				  size_t count,
 				  size_t *offset)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 	int report_size = stream->oa_buffer.format_size;
 	u8 *oa_buf_base = stream->oa_buffer.vaddr;
 	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
@@ -1017,7 +1048,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 		 * copying it to userspace...
 		 */
 		if (report32[0] == 0) {
-			if (__ratelimit(&dev_priv->perf.spurious_report_rs))
+			if (__ratelimit(&stream->perf->spurious_report_rs))
 				DRM_NOTE("Skipping spurious, invalid OA report\n");
 			continue;
 		}
@@ -1043,9 +1074,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 		 */
 		head += gtt_offset;
 
-		I915_WRITE(GEN7_OASTATUS2,
-			   ((head & GEN7_OASTATUS2_HEAD_MASK) |
-			    GEN7_OASTATUS2_MEM_SELECT_GGTT));
+		intel_uncore_write(uncore, GEN7_OASTATUS2,
+				   (head & GEN7_OASTATUS2_HEAD_MASK) |
+				   GEN7_OASTATUS2_MEM_SELECT_GGTT);
 		stream->oa_buffer.head = head;
 
 		spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -1075,21 +1106,21 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
 			size_t count,
 			size_t *offset)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 	u32 oastatus1;
 	int ret;
 
 	if (WARN_ON(!stream->oa_buffer.vaddr))
 		return -EIO;
 
-	oastatus1 = I915_READ(GEN7_OASTATUS1);
+	oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
 
 	/* XXX: On Haswell we don't have a safe way to clear oastatus1
 	 * bits while the OA unit is enabled (while the tail pointer
 	 * may be updated asynchronously) so we ignore status bits
 	 * that have already been reported to userspace.
 	 */
-	oastatus1 &= ~dev_priv->perf.gen7_latched_oastatus1;
+	oastatus1 &= ~stream->perf->gen7_latched_oastatus1;
 
 	/* We treat OABUFFER_OVERFLOW as a significant error:
 	 *
@@ -1120,10 +1151,10 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
 		DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
 			  stream->period_exponent);
 
-		dev_priv->perf.ops.oa_disable(stream);
-		dev_priv->perf.ops.oa_enable(stream);
+		stream->perf->ops.oa_disable(stream);
+		stream->perf->ops.oa_enable(stream);
 
-		oastatus1 = I915_READ(GEN7_OASTATUS1);
+		oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
 	}
 
 	if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
@@ -1131,7 +1162,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
 				       DRM_I915_PERF_RECORD_OA_REPORT_LOST);
 		if (ret)
 			return ret;
-		dev_priv->perf.gen7_latched_oastatus1 |=
+		stream->perf->gen7_latched_oastatus1 |=
 			GEN7_OASTATUS1_REPORT_LOST;
 	}
 
@@ -1196,25 +1227,18 @@ static int i915_oa_read(struct i915_perf_stream *stream,
 			size_t count,
 			size_t *offset)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
-
-	return dev_priv->perf.ops.read(stream, buf, count, offset);
+	return stream->perf->ops.read(stream, buf, count, offset);
 }
 
 static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
 {
 	struct i915_gem_engines_iter it;
-	struct drm_i915_private *i915 = stream->dev_priv;
 	struct i915_gem_context *ctx = stream->ctx;
 	struct intel_context *ce;
 	int err;
 
-	err = i915_mutex_lock_interruptible(&i915->drm);
-	if (err)
-		return ERR_PTR(err);
-
 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
-		if (ce->engine->class != RENDER_CLASS)
+		if (ce->engine != stream->engine) /* first match! */
 			continue;
 
 		/*
@@ -1229,10 +1253,6 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
 	}
 	i915_gem_context_unlock_engines(ctx);
 
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (err)
-		return ERR_PTR(err);
-
 	return stream->pinned_ctx;
 }
 
@@ -1248,14 +1268,13 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
  */
 static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *i915 = stream->dev_priv;
 	struct intel_context *ce;
 
 	ce = oa_pin_context(stream);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
-	switch (INTEL_GEN(i915)) {
+	switch (INTEL_GEN(ce->engine->i915)) {
 	case 7: {
 		/*
 		 * On Haswell we don't do any post processing of the reports
@@ -1269,7 +1288,11 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 	case 8:
 	case 9:
 	case 10:
-		if (USES_GUC_SUBMISSION(i915)) {
+		if (intel_engine_in_execlists_submission_mode(ce->engine)) {
+			stream->specific_ctx_id_mask =
+				(1U << GEN8_CTX_ID_WIDTH) - 1;
+			stream->specific_ctx_id = stream->specific_ctx_id_mask;
+		} else {
 			/*
 			 * When using GuC, the context descriptor we write in
 			 * i915 is read by GuC and rewritten before it's
@@ -1289,31 +1312,23 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 			 */
 			stream->specific_ctx_id_mask =
 				(1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
-		} else {
-			stream->specific_ctx_id_mask =
-				(1U << GEN8_CTX_ID_WIDTH) - 1;
-			stream->specific_ctx_id =
-				upper_32_bits(ce->lrc_desc);
-			stream->specific_ctx_id &=
-				stream->specific_ctx_id_mask;
 		}
 		break;
 
-	case 11: {
+	case 11:
+	case 12: {
 		stream->specific_ctx_id_mask =
-			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
-			((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
-			((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
-		stream->specific_ctx_id = upper_32_bits(ce->lrc_desc);
-		stream->specific_ctx_id &=
-			stream->specific_ctx_id_mask;
+			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
+		stream->specific_ctx_id = stream->specific_ctx_id_mask;
 		break;
 	}
 
 	default:
-		MISSING_CASE(INTEL_GEN(i915));
+		MISSING_CASE(INTEL_GEN(ce->engine->i915));
 	}
 
+	ce->tag = stream->specific_ctx_id_mask;
+
 	DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
 			 stream->specific_ctx_id,
 			 stream->specific_ctx_id_mask);
@@ -1330,69 +1345,76 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
  */
 static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
 	struct intel_context *ce;
 
-	stream->specific_ctx_id = INVALID_CTX_ID;
-	stream->specific_ctx_id_mask = 0;
-
 	ce = fetch_and_zero(&stream->pinned_ctx);
 	if (ce) {
-		mutex_lock(&dev_priv->drm.struct_mutex);
+		ce->tag = 0; /* recomputed on next submission after parking */
 		intel_context_unpin(ce);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
+
+	stream->specific_ctx_id = INVALID_CTX_ID;
+	stream->specific_ctx_id_mask = 0;
 }
 
 static void
 free_oa_buffer(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *i915 = stream->dev_priv;
-
-	mutex_lock(&i915->drm.struct_mutex);
-
 	i915_vma_unpin_and_release(&stream->oa_buffer.vma,
 				   I915_VMA_RELEASE_MAP);
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+	struct i915_oa_config_bo *oa_bo, *tmp;
+
+	i915_oa_config_put(stream->oa_config);
+	llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
+		free_oa_config_bo(oa_bo);
+}
+
+static void
+free_noa_wait(struct i915_perf_stream *stream)
+{
+	i915_vma_unpin_and_release(&stream->noa_wait, 0);
+}
+
 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct i915_perf *perf = stream->perf;
 
-	BUG_ON(stream != dev_priv->perf.exclusive_stream);
+	BUG_ON(stream != perf->exclusive_stream);
 
 	/*
 	 * Unset exclusive_stream first, it will be checked while disabling
 	 * the metric set on gen8+.
 	 */
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	dev_priv->perf.exclusive_stream = NULL;
-	dev_priv->perf.ops.disable_metric_set(stream);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	perf->exclusive_stream = NULL;
+	perf->ops.disable_metric_set(stream);
 
 	free_oa_buffer(stream);
 
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-	intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
+	intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
+	intel_engine_pm_put(stream->engine);
 
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
-	put_oa_config(dev_priv, stream->oa_config);
+	free_oa_configs(stream);
+	free_noa_wait(stream);
 
-	if (dev_priv->perf.spurious_report_rs.missed) {
+	if (perf->spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
-			 dev_priv->perf.spurious_report_rs.missed);
+			 perf->spurious_report_rs.missed);
 	}
 }
 
 static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
 	unsigned long flags;
 
@@ -1401,13 +1423,14 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 	/* Pre-DevBDW: OABUFFER must be set with counters off,
 	 * before OASTATUS1, but after OASTATUS2
 	 */
-	I915_WRITE(GEN7_OASTATUS2,
-		   gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */
+	intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */
+			   gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT);
 	stream->oa_buffer.head = gtt_offset;
 
-	I915_WRITE(GEN7_OABUFFER, gtt_offset);
+	intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset);
 
-	I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
+	intel_uncore_write(uncore, GEN7_OASTATUS1, /* tail */
+			   gtt_offset | OABUFFER_SIZE_16M);
 
 	/* Mark that we need updated tail pointers to read from... */
 	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
@@ -1419,7 +1442,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 	 * already seen since they can't be cleared while periodic
 	 * sampling is enabled.
 	 */
-	dev_priv->perf.gen7_latched_oastatus1 = 0;
+	stream->perf->gen7_latched_oastatus1 = 0;
 
 	/* NB: although the OA buffer will initially be allocated
 	 * zeroed via shmfs (and so this memset is redundant when
@@ -1434,25 +1457,22 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 	 */
 	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
 
-	/* Maybe make ->pollin per-stream state if we support multiple
-	 * concurrent streams in the future.
-	 */
 	stream->pollin = false;
 }
 
 static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
 	unsigned long flags;
 
 	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
 
-	I915_WRITE(GEN8_OASTATUS, 0);
-	I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
+	intel_uncore_write(uncore, GEN8_OASTATUS, 0);
+	intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset);
 	stream->oa_buffer.head = gtt_offset;
 
-	I915_WRITE(GEN8_OABUFFER_UDW, 0);
+	intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0);
 
 	/*
 	 * PRM says:
@@ -1462,9 +1482,9 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 	 *  to enable proper functionality of the overflow
 	 *  bit."
 	 */
-	I915_WRITE(GEN8_OABUFFER, gtt_offset |
+	intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset |
 		   OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
-	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
+	intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
 
 	/* Mark that we need updated tail pointers to read from... */
 	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
@@ -1493,35 +1513,82 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 	 */
 	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
 
+	stream->pollin = false;
+}
+
+static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
+{
+	struct intel_uncore *uncore = stream->uncore;
+	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
+	unsigned long flags;
+
+	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
+
+	intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
+	intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
+			   gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
+	stream->oa_buffer.head = gtt_offset;
+
+	/*
+	 * PRM says:
+	 *
+	 *  "This MMIO must be set before the OATAILPTR
+	 *  register and after the OAHEADPTR register. This is
+	 *  to enable proper functionality of the overflow
+	 *  bit."
+	 */
+	intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
+			   OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
+	intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
+			   gtt_offset & GEN12_OAG_OATAILPTR_MASK);
+
+	/* Mark that we need updated tail pointers to read from... */
+	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
+	stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+
 	/*
-	 * Maybe make ->pollin per-stream state if we support multiple
-	 * concurrent streams in the future.
+	 * Reset state used to recognise context switches, affecting which
+	 * reports we will forward to userspace while filtering for a single
+	 * context.
 	 */
+	stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
+
+	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
+
+	/*
+	 * NB: although the OA buffer will initially be allocated
+	 * zeroed via shmfs (and so this memset is redundant when
+	 * first allocating), we may re-init the OA buffer, either
+	 * when re-enabling a stream or in error/reset paths.
+	 *
+	 * The reason we clear the buffer for each re-init is for the
+	 * sanity check in gen8_append_oa_reports() that looks at the
+	 * reason field to make sure it's non-zero which relies on
+	 * the assumption that new reports are being written to zeroed
+	 * memory...
+	 */
+	memset(stream->oa_buffer.vaddr, 0,
+	       stream->oa_buffer.vma->size);
+
 	stream->pollin = false;
 }
 
 static int alloc_oa_buffer(struct i915_perf_stream *stream)
 {
 	struct drm_i915_gem_object *bo;
-	struct drm_i915_private *dev_priv = stream->dev_priv;
 	struct i915_vma *vma;
 	int ret;
 
 	if (WARN_ON(stream->oa_buffer.vma))
 		return -ENODEV;
 
-	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
-	if (ret)
-		return ret;
-
 	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
 	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
 
-	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
+	bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE);
 	if (IS_ERR(bo)) {
 		DRM_ERROR("Failed to allocate OA buffer\n");
-		ret = PTR_ERR(bo);
-		goto unlock;
+		return PTR_ERR(bo);
 	}
 
 	i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
@@ -1541,11 +1608,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
 		goto err_unpin;
 	}
 
-	DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
-			 i915_ggtt_offset(stream->oa_buffer.vma),
-			 stream->oa_buffer.vaddr);
-
-	goto unlock;
+	return 0;
 
 err_unpin:
 	__i915_vma_unpin(vma);
@@ -1556,55 +1619,393 @@ err_unref:
 	stream->oa_buffer.vaddr = NULL;
 	stream->oa_buffer.vma = NULL;
 
-unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return ret;
 }
 
-static void config_oa_regs(struct drm_i915_private *dev_priv,
-			   const struct i915_oa_reg *regs,
-			   u32 n_regs)
+static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
+				  bool save, i915_reg_t reg, u32 offset,
+				  u32 dword_count)
+{
+	u32 cmd;
+	u32 d;
+
+	cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM;
+	if (INTEL_GEN(stream->perf->i915) >= 8)
+		cmd++;
+
+	for (d = 0; d < dword_count; d++) {
+		*cs++ = cmd;
+		*cs++ = i915_mmio_reg_offset(reg) + 4 * d;
+		*cs++ = intel_gt_scratch_offset(stream->engine->gt,
+						offset) + 4 * d;
+		*cs++ = 0;
+	}
+
+	return cs;
+}
+
+static int alloc_noa_wait(struct i915_perf_stream *stream)
+{
+	struct drm_i915_private *i915 = stream->perf->i915;
+	struct drm_i915_gem_object *bo;
+	struct i915_vma *vma;
+	const u64 delay_ticks = 0xffffffffffffffff -
+		DIV64_U64_ROUND_UP(
+			atomic64_read(&stream->perf->noa_programming_delay) *
+			RUNTIME_INFO(i915)->cs_timestamp_frequency_khz,
+			1000000ull);
+	const u32 base = stream->engine->mmio_base;
+#define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
+	u32 *batch, *ts0, *cs, *jump;
+	int ret, i;
+	enum {
+		START_TS,
+		NOW_TS,
+		DELTA_TS,
+		JUMP_PREDICATE,
+		DELTA_TARGET,
+		N_CS_GPR
+	};
+
+	bo = i915_gem_object_create_internal(i915, 4096);
+	if (IS_ERR(bo)) {
+		DRM_ERROR("Failed to allocate NOA wait batchbuffer\n");
+		return PTR_ERR(bo);
+	}
+
+	/*
+	 * We pin in GGTT because we jump into this buffer now because
+	 * multiple OA config BOs will have a jump to this address and it
+	 * needs to be fixed during the lifetime of the i915/perf stream.
+	 */
+	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unref;
+	}
+
+	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
+	if (IS_ERR(batch)) {
+		ret = PTR_ERR(batch);
+		goto err_unpin;
+	}
+
+	/* Save registers. */
+	for (i = 0; i < N_CS_GPR; i++)
+		cs = save_restore_register(
+			stream, cs, true /* save */, CS_GPR(i),
+			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+	cs = save_restore_register(
+		stream, cs, true /* save */, MI_PREDICATE_RESULT_1,
+		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+	/* First timestamp snapshot location. */
+	ts0 = cs;
+
+	/*
+	 * Initial snapshot of the timestamp register to implement the wait.
+	 * We work with 32b values, so clear out the top 32b bits of the
+	 * register because the ALU works 64bits.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
+	*cs++ = i915_mmio_reg_offset(CS_GPR(START_TS));
+
+	/*
+	 * This is the location we're going to jump back into until the
+	 * required amount of time has passed.
+	 */
+	jump = cs;
+
+	/*
+	 * Take another snapshot of the timestamp register. Take care to clear
+	 * up the top 32bits of CS_GPR(1) as we're using it for other
+	 * operations below.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
+	*cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS));
+
+	/*
+	 * Do a diff between the 2 timestamps and store the result back into
+	 * CS_GPR(1).
+	 */
+	*cs++ = MI_MATH(5);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+	/*
+	 * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the
+	 * timestamp have rolled over the 32bits) into the predicate register
+	 * to be used for the predicated jump.
+	 */
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
+	*cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+	/* Restart from the beginning if we had timestamps roll over. */
+	*cs++ = (INTEL_GEN(i915) < 8 ?
+		 MI_BATCH_BUFFER_START :
+		 MI_BATCH_BUFFER_START_GEN8) |
+		MI_BATCH_PREDICATE;
+	*cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
+	*cs++ = 0;
+
+	/*
+	 * Now add the diff between to previous timestamps and add it to :
+	 *      (((1 * << 64) - 1) - delay_ns)
+	 *
+	 * When the Carry Flag contains 1 this means the elapsed time is
+	 * longer than the expected delay, and we can exit the wait loop.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET));
+	*cs++ = lower_32_bits(delay_ticks);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4;
+	*cs++ = upper_32_bits(delay_ticks);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+	*cs++ = MI_ARB_CHECK;
+
+	/*
+	 * Transfer the result into the predicate register to be used for the
+	 * predicated jump.
+	 */
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
+	*cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+	/* Predicate the jump.  */
+	*cs++ = (INTEL_GEN(i915) < 8 ?
+		 MI_BATCH_BUFFER_START :
+		 MI_BATCH_BUFFER_START_GEN8) |
+		MI_BATCH_PREDICATE;
+	*cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
+	*cs++ = 0;
+
+	/* Restore registers. */
+	for (i = 0; i < N_CS_GPR; i++)
+		cs = save_restore_register(
+			stream, cs, false /* restore */, CS_GPR(i),
+			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+	cs = save_restore_register(
+		stream, cs, false /* restore */, MI_PREDICATE_RESULT_1,
+		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+	/* And return to the ring. */
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch));
+
+	i915_gem_object_flush_map(bo);
+	i915_gem_object_unpin_map(bo);
+
+	stream->noa_wait = vma;
+	return 0;
+
+err_unpin:
+	i915_vma_unpin_and_release(&vma, 0);
+err_unref:
+	i915_gem_object_put(bo);
+	return ret;
+}
+
+static u32 *write_cs_mi_lri(u32 *cs,
+			    const struct i915_oa_reg *reg_data,
+			    u32 n_regs)
 {
 	u32 i;
 
 	for (i = 0; i < n_regs; i++) {
-		const struct i915_oa_reg *reg = regs + i;
+		if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+			u32 n_lri = min_t(u32,
+					  n_regs - i,
+					  MI_LOAD_REGISTER_IMM_MAX_REGS);
+
+			*cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+		}
+		*cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+		*cs++ = reg_data[i].value;
+	}
 
-		I915_WRITE(reg->addr, reg->value);
+	return cs;
+}
+
+static int num_lri_dwords(int num_regs)
+{
+	int count = 0;
+
+	if (num_regs > 0) {
+		count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
+		count += num_regs * 2;
 	}
+
+	return count;
 }
 
-static void delay_after_mux(void)
+static struct i915_oa_config_bo *
+alloc_oa_config_buffer(struct i915_perf_stream *stream,
+		       struct i915_oa_config *oa_config)
 {
+	struct drm_i915_gem_object *obj;
+	struct i915_oa_config_bo *oa_bo;
+	size_t config_length = 0;
+	u32 *cs;
+	int err;
+
+	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+	if (!oa_bo)
+		return ERR_PTR(-ENOMEM);
+
+	config_length += num_lri_dwords(oa_config->mux_regs_len);
+	config_length += num_lri_dwords(oa_config->b_counter_regs_len);
+	config_length += num_lri_dwords(oa_config->flex_regs_len);
+	config_length += 3; /* MI_BATCH_BUFFER_START */
+	config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
+
+	obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto err_free;
+	}
+
+	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_oa_bo;
+	}
+
+	cs = write_cs_mi_lri(cs,
+			     oa_config->mux_regs,
+			     oa_config->mux_regs_len);
+	cs = write_cs_mi_lri(cs,
+			     oa_config->b_counter_regs,
+			     oa_config->b_counter_regs_len);
+	cs = write_cs_mi_lri(cs,
+			     oa_config->flex_regs,
+			     oa_config->flex_regs_len);
+
+	/* Jump into the active wait. */
+	*cs++ = (INTEL_GEN(stream->perf->i915) < 8 ?
+		 MI_BATCH_BUFFER_START :
+		 MI_BATCH_BUFFER_START_GEN8);
+	*cs++ = i915_ggtt_offset(stream->noa_wait);
+	*cs++ = 0;
+
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+
+	oa_bo->vma = i915_vma_instance(obj,
+				       &stream->engine->gt->ggtt->vm,
+				       NULL);
+	if (IS_ERR(oa_bo->vma)) {
+		err = PTR_ERR(oa_bo->vma);
+		goto err_oa_bo;
+	}
+
+	oa_bo->oa_config = i915_oa_config_get(oa_config);
+	llist_add(&oa_bo->node, &stream->oa_config_bos);
+
+	return oa_bo;
+
+err_oa_bo:
+	i915_gem_object_put(obj);
+err_free:
+	kfree(oa_bo);
+	return ERR_PTR(err);
+}
+
+static struct i915_vma *
+get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
+{
+	struct i915_oa_config_bo *oa_bo;
+
 	/*
-	 * It apparently takes a fairly long time for a new MUX
-	 * configuration to be be applied after these register writes.
-	 * This delay duration was derived empirically based on the
-	 * render_basic config but hopefully it covers the maximum
-	 * configuration latency.
-	 *
-	 * As a fallback, the checks in _append_oa_reports() to skip
-	 * invalid OA reports do also seem to work to discard reports
-	 * generated before this config has completed - albeit not
-	 * silently.
-	 *
-	 * Unfortunately this is essentially a magic number, since we
-	 * don't currently know of a reliable mechanism for predicting
-	 * how long the MUX config will take to apply and besides
-	 * seeing invalid reports we don't know of a reliable way to
-	 * explicitly check that the MUX config has landed.
-	 *
-	 * It's even possible we've miss characterized the underlying
-	 * problem - it just seems like the simplest explanation why
-	 * a delay at this location would mitigate any invalid reports.
+	 * Look for the buffer in the already allocated BOs attached
+	 * to the stream.
 	 */
-	usleep_range(15000, 20000);
+	llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) {
+		if (oa_bo->oa_config == oa_config &&
+		    memcmp(oa_bo->oa_config->uuid,
+			   oa_config->uuid,
+			   sizeof(oa_config->uuid)) == 0)
+			goto out;
+	}
+
+	oa_bo = alloc_oa_config_buffer(stream, oa_config);
+	if (IS_ERR(oa_bo))
+		return ERR_CAST(oa_bo);
+
+out:
+	return i915_vma_get(oa_bo->vma);
+}
+
+static int emit_oa_config(struct i915_perf_stream *stream,
+			  struct i915_oa_config *oa_config,
+			  struct intel_context *ce)
+{
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	int err;
+
+	vma = get_oa_vma(stream, oa_config);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	if (err)
+		goto err_vma_put;
+
+	intel_engine_pm_get(ce->engine);
+	rq = i915_request_create(ce);
+	intel_engine_pm_put(ce->engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_vma_unpin;
+	}
+
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, 0);
+	if (!err)
+		err = i915_vma_move_to_active(vma, rq, 0);
+	i915_vma_unlock(vma);
+	if (err)
+		goto err_add_request;
+
+	err = rq->engine->emit_bb_start(rq,
+					vma->node.start, 0,
+					I915_DISPATCH_SECURE);
+err_add_request:
+	i915_request_add(rq);
+err_vma_unpin:
+	i915_vma_unpin(vma);
+err_vma_put:
+	i915_vma_put(vma);
+	return err;
+}
+
+static struct intel_context *oa_context(struct i915_perf_stream *stream)
+{
+	return stream->pinned_ctx ?: stream->engine->kernel_context;
 }
 
 static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
-	const struct i915_oa_config *oa_config = stream->oa_config;
+	struct intel_uncore *uncore = stream->uncore;
 
 	/*
 	 * PRM:
@@ -1616,31 +2017,24 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 	 * count the events from non-render domain. Unit level clock
 	 * gating for RCS should also be disabled.
 	 */
-	I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
-				    ~GEN7_DOP_CLOCK_GATE_ENABLE));
-	I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
-				  GEN6_CSUNIT_CLOCK_GATE_DISABLE));
-
-	config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
-	delay_after_mux();
+	intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
+			 GEN7_DOP_CLOCK_GATE_ENABLE, 0);
+	intel_uncore_rmw(uncore, GEN6_UCGCTL1,
+			 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-	config_oa_regs(dev_priv, oa_config->b_counter_regs,
-		       oa_config->b_counter_regs_len);
-
-	return 0;
+	return emit_oa_config(stream, stream->oa_config, oa_context(stream));
 }
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 
-	I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
-				  ~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
-	I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
-				    GEN7_DOP_CLOCK_GATE_ENABLE));
+	intel_uncore_rmw(uncore, GEN6_UCGCTL1,
+			 GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0);
+	intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
+			 0, GEN7_DOP_CLOCK_GATE_ENABLE);
 
-	I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
-				      ~GT_NOA_ENABLE));
+	intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
 }
 
 static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
@@ -1672,14 +2066,11 @@ static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
  * in the case that the OA unit has been disabled.
  */
 static void
-gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
-			       struct intel_context *ce,
-			       u32 *reg_state,
-			       const struct i915_oa_config *oa_config)
-{
-	struct drm_i915_private *i915 = ce->engine->i915;
-	u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset;
-	u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset;
+gen8_update_reg_state_unlocked(const struct intel_context *ce,
+			       const struct i915_perf_stream *stream)
+{
+	u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
+	u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
 	/* The MMIO offsets for Flex EU registers aren't contiguous */
 	i915_reg_t flex_regs[] = {
 		EU_PERF_CNTL0,
@@ -1690,21 +2081,20 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
 		EU_PERF_CNTL5,
 		EU_PERF_CNTL6,
 	};
+	u32 *reg_state = ce->lrc_reg_state;
 	int i;
 
-	CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
+	reg_state[ctx_oactxctrl + 1] =
 		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
 		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
-		GEN8_OA_COUNTER_RESUME);
+		GEN8_OA_COUNTER_RESUME;
 
-	for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
-		CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i],
-			oa_config_flex_reg(oa_config, flex_regs[i]));
-	}
+	for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
+		reg_state[ctx_flexeu0 + i * 2 + 1] =
+			oa_config_flex_reg(stream->oa_config, flex_regs[i]);
 
-	CTX_REG(reg_state,
-		CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
-		intel_sseu_make_rpcs(i915, &ce->sseu));
+	reg_state[CTX_R_PWR_CLK_STATE] =
+		intel_sseu_make_rpcs(ce->engine->i915, &ce->sseu);
 }
 
 struct flex {
@@ -1728,7 +2118,7 @@ gen8_store_flex(struct i915_request *rq,
 	offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
 	do {
 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
-		*cs++ = offset + (flex->offset + 1) * sizeof(u32);
+		*cs++ = offset + flex->offset * sizeof(u32);
 		*cs++ = 0;
 		*cs++ = flex->value;
 	} while (flex++, --count);
@@ -1769,9 +2159,7 @@ static int gen8_modify_context(struct intel_context *ce,
 	struct i915_request *rq;
 	int err;
 
-	lockdep_assert_held(&ce->pin_mutex);
-
-	rq = i915_request_create(ce->engine->kernel_context);
+	rq = intel_engine_create_kernel_request(ce->engine);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -1813,17 +2201,14 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
 		if (ce->engine->class != RENDER_CLASS)
 			continue;
 
-		err = intel_context_lock_pinned(ce);
-		if (err)
-			break;
+		/* Otherwise OA settings will be set upon first use */
+		if (!intel_context_pin_if_active(ce))
+			continue;
 
 		flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu);
+		err = gen8_modify_context(ce, flex, count);
 
-		/* Otherwise OA settings will be set upon first use */
-		if (intel_context_is_pinned(ce))
-			err = gen8_modify_context(ce, flex, count);
-
-		intel_context_unlock_pinned(ce);
+		intel_context_unpin(ce);
 		if (err)
 			break;
 	}
@@ -1832,6 +2217,53 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
 	return err;
 }
 
+static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
+{
+	int err;
+	struct intel_context *ce = stream->pinned_ctx;
+	u32 format = stream->oa_buffer.format;
+	struct flex regs_context[] = {
+		{
+			GEN8_OACTXCONTROL,
+			stream->perf->ctx_oactxctrl_offset + 1,
+			enable ? GEN8_OA_COUNTER_RESUME : 0,
+		},
+	};
+	/* Offsets in regs_lri are not used since this configuration is only
+	 * applied using LRI. Initialize the correct offsets for posterity.
+	 */
+#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
+	struct flex regs_lri[] = {
+		{
+			GEN12_OAR_OACONTROL,
+			GEN12_OAR_OACONTROL_OFFSET + 1,
+			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
+			(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
+		},
+		{
+			RING_CONTEXT_CONTROL(ce->engine->mmio_base),
+			CTX_CONTEXT_CONTROL,
+			_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
+				      enable ?
+				      GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
+				      0)
+		},
+	};
+
+	/* Modify the context image of pinned context with regs_context*/
+	err = intel_context_lock_pinned(ce);
+	if (err)
+		return err;
+
+	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
+	intel_context_unlock_pinned(ce);
+	if (err)
+		return err;
+
+	/* Apply regs_lri using LRI with pinned context */
+	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
+}
+
 /*
  * Manages updating the per-context aspects of the OA stream
  * configuration across all contexts.
@@ -1855,43 +2287,18 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
  *   per-context OA state.
  *
  * Note: it's only the RCS/Render context that has any OA state.
+ * Note: the first flex register passed must always be R_PWR_CLK_STATE
  */
-static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
-				       const struct i915_oa_config *oa_config)
+static int oa_configure_all_contexts(struct i915_perf_stream *stream,
+				     struct flex *regs,
+				     size_t num_regs)
 {
-	struct drm_i915_private *i915 = stream->dev_priv;
-	/* The MMIO offsets for Flex EU registers aren't contiguous */
-	const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset;
-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N))
-	struct flex regs[] = {
-		{
-			GEN8_R_PWR_CLK_STATE,
-			CTX_R_PWR_CLK_STATE,
-		},
-		{
-			GEN8_OACTXCONTROL,
-			i915->perf.ctx_oactxctrl_offset,
-			((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
-			 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
-			 GEN8_OA_COUNTER_RESUME)
-		},
-		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
-		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
-		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
-		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
-		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
-		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
-		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
-	};
-#undef ctx_flexeuN
+	struct drm_i915_private *i915 = stream->perf->i915;
 	struct intel_engine_cs *engine;
-	struct i915_gem_context *ctx;
-	int i;
-
-	for (i = 2; i < ARRAY_SIZE(regs); i++)
-		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+	struct i915_gem_context *ctx, *cn;
+	int err;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
+	lockdep_assert_held(&stream->perf->lock);
 
 	/*
 	 * The OA register config is setup through the context image. This image
@@ -1909,16 +2316,24 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
 	 * context. Contexts idle at the time of reconfiguration are not
 	 * trapped behind the barrier.
 	 */
-	list_for_each_entry(ctx, &i915->contexts.list, link) {
-		int err;
-
-		if (ctx == i915->kernel_context)
+	spin_lock(&i915->gem.contexts.lock);
+	list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) {
+		if (!kref_get_unless_zero(&ctx->ref))
 			continue;
 
-		err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs));
-		if (err)
+		spin_unlock(&i915->gem.contexts.lock);
+
+		err = gen8_configure_context(ctx, regs, num_regs);
+		if (err) {
+			i915_gem_context_put(ctx);
 			return err;
+		}
+
+		spin_lock(&i915->gem.contexts.lock);
+		list_safe_reset_next(ctx, cn, link);
+		i915_gem_context_put(ctx);
 	}
+	spin_unlock(&i915->gem.contexts.lock);
 
 	/*
 	 * After updating all other contexts, we need to modify ourselves.
@@ -1927,14 +2342,13 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
 	 */
 	for_each_uabi_engine(engine, i915) {
 		struct intel_context *ce = engine->kernel_context;
-		int err;
 
 		if (engine->class != RENDER_CLASS)
 			continue;
 
 		regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
 
-		err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs));
+		err = gen8_modify_self(ce, regs, num_regs);
 		if (err)
 			return err;
 	}
@@ -1942,10 +2356,60 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
 	return 0;
 }
 
+static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
+					const struct i915_oa_config *oa_config)
+{
+	struct flex regs[] = {
+		{
+			GEN8_R_PWR_CLK_STATE,
+			CTX_R_PWR_CLK_STATE,
+		},
+	};
+
+	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
+static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
+				      const struct i915_oa_config *oa_config)
+{
+	/* The MMIO offsets for Flex EU registers aren't contiguous */
+	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
+	struct flex regs[] = {
+		{
+			GEN8_R_PWR_CLK_STATE,
+			CTX_R_PWR_CLK_STATE,
+		},
+		{
+			GEN8_OACTXCONTROL,
+			stream->perf->ctx_oactxctrl_offset + 1,
+		},
+		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
+		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
+		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
+		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
+		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
+		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
+		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
+	};
+#undef ctx_flexeuN
+	int i;
+
+	regs[1].value =
+		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+		GEN8_OA_COUNTER_RESUME;
+
+	for (i = 2; i < ARRAY_SIZE(regs); i++)
+		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+
+	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
 static int gen8_enable_metric_set(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
-	const struct i915_oa_config *oa_config = stream->oa_config;
+	struct intel_uncore *uncore = stream->uncore;
+	struct i915_oa_config *oa_config = stream->oa_config;
 	int ret;
 
 	/*
@@ -1971,10 +2435,10 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
 	 * be read back from automatically triggered reports, as part of the
 	 * RPT_ID field.
 	 */
-	if (IS_GEN_RANGE(dev_priv, 9, 11)) {
-		I915_WRITE(GEN8_OA_DEBUG,
-			   _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
-					      GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
+	if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) {
+		intel_uncore_write(uncore, GEN8_OA_DEBUG,
+				   _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
+						      GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
 	}
 
 	/*
@@ -1982,45 +2446,106 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
 	 * to make sure all slices/subslices are ON before writing to NOA
 	 * registers.
 	 */
-	ret = gen8_configure_all_contexts(stream, oa_config);
+	ret = lrc_configure_all_contexts(stream, oa_config);
 	if (ret)
 		return ret;
 
-	config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
-	delay_after_mux();
+	return emit_oa_config(stream, oa_config, oa_context(stream));
+}
 
-	config_oa_regs(dev_priv, oa_config->b_counter_regs,
-		       oa_config->b_counter_regs_len);
+static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
+{
+	return _MASKED_FIELD(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS,
+			     (stream->sample_flags & SAMPLE_OA_REPORT) ?
+			     0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
+}
 
-	return 0;
+static int gen12_enable_metric_set(struct i915_perf_stream *stream)
+{
+	struct intel_uncore *uncore = stream->uncore;
+	struct i915_oa_config *oa_config = stream->oa_config;
+	bool periodic = stream->periodic;
+	u32 period_exponent = stream->period_exponent;
+	int ret;
+
+	intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
+			   /* Disable clk ratio reports, like previous Gens. */
+			   _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
+					      GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
+			   /*
+			    * If the user didn't require OA reports, instruct
+			    * the hardware not to emit ctx switch reports.
+			    */
+			   oag_report_ctx_switches(stream));
+
+	intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
+			   (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
+			    GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
+			    (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
+			    : 0);
+
+	/*
+	 * Update all contexts prior writing the mux configurations as we need
+	 * to make sure all slices/subslices are ON before writing to NOA
+	 * registers.
+	 */
+	ret = gen12_configure_all_contexts(stream, oa_config);
+	if (ret)
+		return ret;
+
+	/*
+	 * For Gen12, performance counters are context
+	 * saved/restored. Only enable it for the context that
+	 * requested this.
+	 */
+	if (stream->ctx) {
+		ret = gen12_configure_oar_context(stream, true);
+		if (ret)
+			return ret;
+	}
+
+	return emit_oa_config(stream, oa_config, oa_context(stream));
 }
 
 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 
 	/* Reset all contexts' slices/subslices configurations. */
-	gen8_configure_all_contexts(stream, NULL);
+	lrc_configure_all_contexts(stream, NULL);
 
-	I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
-				      ~GT_NOA_ENABLE));
+	intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
 }
 
 static void gen10_disable_metric_set(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 
 	/* Reset all contexts' slices/subslices configurations. */
-	gen8_configure_all_contexts(stream, NULL);
+	lrc_configure_all_contexts(stream, NULL);
 
 	/* Make sure we disable noa to save power. */
-	I915_WRITE(RPM_CONFIG1,
-		   I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE);
+	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
+}
+
+static void gen12_disable_metric_set(struct i915_perf_stream *stream)
+{
+	struct intel_uncore *uncore = stream->uncore;
+
+	/* Reset all contexts' slices/subslices configurations. */
+	gen12_configure_all_contexts(stream, NULL);
+
+	/* disable the context save/restore or OAR counters */
+	if (stream->ctx)
+		gen12_configure_oar_context(stream, false);
+
+	/* Make sure we disable noa to save power. */
+	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
 }
 
 static void gen7_oa_enable(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 	struct i915_gem_context *ctx = stream->ctx;
 	u32 ctx_id = stream->specific_ctx_id;
 	bool periodic = stream->periodic;
@@ -2038,19 +2563,19 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
 	 */
 	gen7_init_oa_buffer(stream);
 
-	I915_WRITE(GEN7_OACONTROL,
-		   (ctx_id & GEN7_OACONTROL_CTX_MASK) |
-		   (period_exponent <<
-		    GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
-		   (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
-		   (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
-		   (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
-		   GEN7_OACONTROL_ENABLE);
+	intel_uncore_write(uncore, GEN7_OACONTROL,
+			   (ctx_id & GEN7_OACONTROL_CTX_MASK) |
+			   (period_exponent <<
+			    GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
+			   (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
+			   (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
+			   (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
+			   GEN7_OACONTROL_ENABLE);
 }
 
 static void gen8_oa_enable(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_uncore *uncore = stream->uncore;
 	u32 report_format = stream->oa_buffer.format;
 
 	/*
@@ -2069,9 +2594,28 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
 	 * filtering and instead filter on the cpu based on the context-id
 	 * field of reports
 	 */
-	I915_WRITE(GEN8_OACONTROL, (report_format <<
-				    GEN8_OA_REPORT_FORMAT_SHIFT) |
-				   GEN8_OA_COUNTER_ENABLE);
+	intel_uncore_write(uncore, GEN8_OACONTROL,
+			   (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) |
+			   GEN8_OA_COUNTER_ENABLE);
+}
+
+static void gen12_oa_enable(struct i915_perf_stream *stream)
+{
+	struct intel_uncore *uncore = stream->uncore;
+	u32 report_format = stream->oa_buffer.format;
+
+	/*
+	 * If we don't want OA reports from the OA buffer, then we don't even
+	 * need to program the OAG unit.
+	 */
+	if (!(stream->sample_flags & SAMPLE_OA_REPORT))
+		return;
+
+	gen12_init_oa_buffer(stream);
+
+	intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
+			   (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
+			   GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
 }
 
 /**
@@ -2085,9 +2629,7 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
  */
 static void i915_oa_stream_enable(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
-
-	dev_priv->perf.ops.oa_enable(stream);
+	stream->perf->ops.oa_enable(stream);
 
 	if (stream->periodic)
 		hrtimer_start(&stream->poll_check_timer,
@@ -2097,7 +2639,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream)
 
 static void gen7_oa_disable(struct i915_perf_stream *stream)
 {
-	struct intel_uncore *uncore = &stream->dev_priv->uncore;
+	struct intel_uncore *uncore = stream->uncore;
 
 	intel_uncore_write(uncore, GEN7_OACONTROL, 0);
 	if (intel_wait_for_register(uncore,
@@ -2108,7 +2650,7 @@ static void gen7_oa_disable(struct i915_perf_stream *stream)
 
 static void gen8_oa_disable(struct i915_perf_stream *stream)
 {
-	struct intel_uncore *uncore = &stream->dev_priv->uncore;
+	struct intel_uncore *uncore = stream->uncore;
 
 	intel_uncore_write(uncore, GEN8_OACONTROL, 0);
 	if (intel_wait_for_register(uncore,
@@ -2117,6 +2659,18 @@ static void gen8_oa_disable(struct i915_perf_stream *stream)
 		DRM_ERROR("wait for OA to be disabled timed out\n");
 }
 
+static void gen12_oa_disable(struct i915_perf_stream *stream)
+{
+	struct intel_uncore *uncore = stream->uncore;
+
+	intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
+	if (intel_wait_for_register(uncore,
+				    GEN12_OAG_OACONTROL,
+				    GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
+				    50))
+		DRM_ERROR("wait for OA to be disabled timed out\n");
+}
+
 /**
  * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream
  * @stream: An i915 perf stream opened for OA metrics
@@ -2127,9 +2681,7 @@ static void gen8_oa_disable(struct i915_perf_stream *stream)
  */
 static void i915_oa_stream_disable(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
-
-	dev_priv->perf.ops.oa_disable(stream);
+	stream->perf->ops.oa_disable(stream);
 
 	if (stream->periodic)
 		hrtimer_cancel(&stream->poll_check_timer);
@@ -2166,34 +2718,42 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 			       struct drm_i915_perf_open_param *param,
 			       struct perf_open_properties *props)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct i915_perf *perf = stream->perf;
 	int format_size;
 	int ret;
 
-	/* If the sysfs metrics/ directory wasn't registered for some
+	if (!props->engine) {
+		DRM_DEBUG("OA engine not specified\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * If the sysfs metrics/ directory wasn't registered for some
 	 * reason then don't let userspace try their luck with config
 	 * IDs
 	 */
-	if (!dev_priv->perf.metrics_kobj) {
+	if (!perf->metrics_kobj) {
 		DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
 		return -EINVAL;
 	}
 
-	if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
+	if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
+	    (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {
 		DRM_DEBUG("Only OA report sampling supported\n");
 		return -EINVAL;
 	}
 
-	if (!dev_priv->perf.ops.enable_metric_set) {
+	if (!perf->ops.enable_metric_set) {
 		DRM_DEBUG("OA unit not supported\n");
 		return -ENODEV;
 	}
 
-	/* To avoid the complexity of having to accurately filter
+	/*
+	 * To avoid the complexity of having to accurately filter
 	 * counter reports and marshal to the appropriate client
 	 * we currently only allow exclusive access
 	 */
-	if (dev_priv->perf.exclusive_stream) {
+	if (perf->exclusive_stream) {
 		DRM_DEBUG("OA unit already in use\n");
 		return -EBUSY;
 	}
@@ -2203,19 +2763,24 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		return -EINVAL;
 	}
 
+	stream->engine = props->engine;
+	stream->uncore = stream->engine->gt->uncore;
+
 	stream->sample_size = sizeof(struct drm_i915_perf_record_header);
 
-	format_size = dev_priv->perf.oa_formats[props->oa_format].size;
+	format_size = perf->oa_formats[props->oa_format].size;
 
-	stream->sample_flags |= SAMPLE_OA_REPORT;
+	stream->sample_flags = props->sample_flags;
 	stream->sample_size += format_size;
 
 	stream->oa_buffer.format_size = format_size;
 	if (WARN_ON(stream->oa_buffer.format_size == 0))
 		return -EINVAL;
 
+	stream->hold_preemption = props->hold_preemption;
+
 	stream->oa_buffer.format =
-		dev_priv->perf.oa_formats[props->oa_format].format;
+		perf->oa_formats[props->oa_format].format;
 
 	stream->periodic = props->oa_periodic;
 	if (stream->periodic)
@@ -2229,9 +2794,16 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		}
 	}
 
-	ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
+	ret = alloc_noa_wait(stream);
 	if (ret) {
+		DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
+		goto err_noa_wait_alloc;
+	}
+
+	stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
+	if (!stream->oa_config) {
 		DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
+		ret = -EINVAL;
 		goto err_config;
 	}
 
@@ -2247,27 +2819,24 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	 *   In our case we are expecting that taking pm + FORCEWAKE
 	 *   references will effectively disable RC6.
 	 */
-	stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
+	intel_engine_pm_get(stream->engine);
+	intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
 
 	ret = alloc_oa_buffer(stream);
 	if (ret)
 		goto err_oa_buf_alloc;
 
-	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
-	if (ret)
-		goto err_lock;
-
 	stream->ops = &i915_oa_stream_ops;
-	dev_priv->perf.exclusive_stream = stream;
+	perf->exclusive_stream = stream;
 
-	ret = dev_priv->perf.ops.enable_metric_set(stream);
+	ret = perf->ops.enable_metric_set(stream);
 	if (ret) {
 		DRM_DEBUG("Unable to enable metric set\n");
 		goto err_enable;
 	}
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	DRM_DEBUG("opening stream oa config uuid=%s\n",
+		  stream->oa_config->uuid);
 
 	hrtimer_init(&stream->poll_check_timer,
 		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -2278,38 +2847,44 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	return 0;
 
 err_enable:
-	dev_priv->perf.exclusive_stream = NULL;
-	dev_priv->perf.ops.disable_metric_set(stream);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	perf->exclusive_stream = NULL;
+	perf->ops.disable_metric_set(stream);
 
-err_lock:
 	free_oa_buffer(stream);
 
 err_oa_buf_alloc:
-	put_oa_config(dev_priv, stream->oa_config);
+	free_oa_configs(stream);
 
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-	intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
+	intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
+	intel_engine_pm_put(stream->engine);
 
 err_config:
+	free_noa_wait(stream);
+
+err_noa_wait_alloc:
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
 	return ret;
 }
 
-void i915_oa_init_reg_state(struct intel_engine_cs *engine,
-			    struct intel_context *ce,
-			    u32 *regs)
+void i915_oa_init_reg_state(const struct intel_context *ce,
+			    const struct intel_engine_cs *engine)
 {
 	struct i915_perf_stream *stream;
 
+	/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
+
 	if (engine->class != RENDER_CLASS)
 		return;
 
 	stream = engine->i915->perf.exclusive_stream;
-	if (stream)
-		gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config);
+	/*
+	 * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
+	 * is already doing that, so nothing to be done for gen12 here.
+	 */
+	if (stream && INTEL_GEN(stream->perf->i915) < 12)
+		gen8_update_reg_state_unlocked(ce, stream);
 }
 
 /**
@@ -2379,7 +2954,7 @@ static ssize_t i915_perf_read(struct file *file,
 			      loff_t *ppos)
 {
 	struct i915_perf_stream *stream = file->private_data;
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct i915_perf *perf = stream->perf;
 	ssize_t ret;
 
 	/* To ensure it's handled consistently we simply treat all reads of a
@@ -2402,15 +2977,15 @@ static ssize_t i915_perf_read(struct file *file,
 			if (ret)
 				return ret;
 
-			mutex_lock(&dev_priv->perf.lock);
+			mutex_lock(&perf->lock);
 			ret = i915_perf_read_locked(stream, file,
 						    buf, count, ppos);
-			mutex_unlock(&dev_priv->perf.lock);
+			mutex_unlock(&perf->lock);
 		} while (ret == -EAGAIN);
 	} else {
-		mutex_lock(&dev_priv->perf.lock);
+		mutex_lock(&perf->lock);
 		ret = i915_perf_read_locked(stream, file, buf, count, ppos);
-		mutex_unlock(&dev_priv->perf.lock);
+		mutex_unlock(&perf->lock);
 	}
 
 	/* We allow the poll checking to sometimes report false positive EPOLLIN
@@ -2448,7 +3023,6 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
 
 /**
  * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream
- * @dev_priv: i915 device instance
  * @stream: An i915 perf stream
  * @file: An i915 perf stream file
  * @wait: poll() state table
@@ -2457,15 +3031,14 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
  * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
  * will be woken for new stream data.
  *
- * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
+ * Note: The &perf->lock mutex has been taken to serialize
  * with any non-file-operation driver hooks.
  *
  * Returns: any poll events that are ready without sleeping
  */
-static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
-					  struct i915_perf_stream *stream,
-					  struct file *file,
-					  poll_table *wait)
+static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
+				      struct file *file,
+				      poll_table *wait)
 {
 	__poll_t events = 0;
 
@@ -2499,12 +3072,12 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
 static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
 {
 	struct i915_perf_stream *stream = file->private_data;
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct i915_perf *perf = stream->perf;
 	__poll_t ret;
 
-	mutex_lock(&dev_priv->perf.lock);
-	ret = i915_perf_poll_locked(dev_priv, stream, file, wait);
-	mutex_unlock(&dev_priv->perf.lock);
+	mutex_lock(&perf->lock);
+	ret = i915_perf_poll_locked(stream, file, wait);
+	mutex_unlock(&perf->lock);
 
 	return ret;
 }
@@ -2529,6 +3102,9 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream)
 
 	if (stream->ops->enable)
 		stream->ops->enable(stream);
+
+	if (stream->hold_preemption)
+		intel_context_set_nopreempt(stream->pinned_ctx);
 }
 
 /**
@@ -2553,17 +3129,54 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream)
 	/* Allow stream->ops->disable() to refer to this */
 	stream->enabled = false;
 
+	if (stream->hold_preemption)
+		intel_context_clear_nopreempt(stream->pinned_ctx);
+
 	if (stream->ops->disable)
 		stream->ops->disable(stream);
 }
 
+static long i915_perf_config_locked(struct i915_perf_stream *stream,
+				    unsigned long metrics_set)
+{
+	struct i915_oa_config *config;
+	long ret = stream->oa_config->id;
+
+	config = i915_perf_get_oa_config(stream->perf, metrics_set);
+	if (!config)
+		return -EINVAL;
+
+	if (config != stream->oa_config) {
+		int err;
+
+		/*
+		 * If OA is bound to a specific context, emit the
+		 * reconfiguration inline from that context. The update
+		 * will then be ordered with respect to submission on that
+		 * context.
+		 *
+		 * When set globally, we use a low priority kernel context,
+		 * so it will effectively take effect when idle.
+		 */
+		err = emit_oa_config(stream, config, oa_context(stream));
+		if (err == 0)
+			config = xchg(&stream->oa_config, config);
+		else
+			ret = err;
+	}
+
+	i915_oa_config_put(config);
+
+	return ret;
+}
+
 /**
  * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
  * @stream: An i915 perf stream
  * @cmd: the ioctl request
  * @arg: the ioctl data
  *
- * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
+ * Note: The &perf->lock mutex has been taken to serialize
  * with any non-file-operation driver hooks.
  *
  * Returns: zero on success or a negative error code. Returns -EINVAL for
@@ -2580,6 +3193,8 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
 	case I915_PERF_IOCTL_DISABLE:
 		i915_perf_disable_locked(stream);
 		return 0;
+	case I915_PERF_IOCTL_CONFIG:
+		return i915_perf_config_locked(stream, arg);
 	}
 
 	return -EINVAL;
@@ -2601,12 +3216,12 @@ static long i915_perf_ioctl(struct file *file,
 			    unsigned long arg)
 {
 	struct i915_perf_stream *stream = file->private_data;
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct i915_perf *perf = stream->perf;
 	long ret;
 
-	mutex_lock(&dev_priv->perf.lock);
+	mutex_lock(&perf->lock);
 	ret = i915_perf_ioctl_locked(stream, cmd, arg);
-	mutex_unlock(&dev_priv->perf.lock);
+	mutex_unlock(&perf->lock);
 
 	return ret;
 }
@@ -2618,7 +3233,7 @@ static long i915_perf_ioctl(struct file *file,
  * Frees all resources associated with the given i915 perf @stream, disabling
  * any associated data capture in the process.
  *
- * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
+ * Note: The &perf->lock mutex has been taken to serialize
  * with any non-file-operation driver hooks.
  */
 static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
@@ -2629,8 +3244,6 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
 	if (stream->ops->destroy)
 		stream->ops->destroy(stream);
 
-	list_del(&stream->link);
-
 	if (stream->ctx)
 		i915_gem_context_put(stream->ctx);
 
@@ -2651,14 +3264,14 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
 static int i915_perf_release(struct inode *inode, struct file *file)
 {
 	struct i915_perf_stream *stream = file->private_data;
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct i915_perf *perf = stream->perf;
 
-	mutex_lock(&dev_priv->perf.lock);
+	mutex_lock(&perf->lock);
 	i915_perf_destroy_locked(stream);
-	mutex_unlock(&dev_priv->perf.lock);
+	mutex_unlock(&perf->lock);
 
 	/* Release the reference the perf stream kept on the driver. */
-	drm_dev_put(&dev_priv->drm);
+	drm_dev_put(&perf->i915->drm);
 
 	return 0;
 }
@@ -2680,7 +3293,7 @@ static const struct file_operations fops = {
 
 /**
  * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD
- * @dev_priv: i915 device instance
+ * @perf: i915 perf instance
  * @param: The open parameters passed to 'DRM_I915_PERF_OPEN`
  * @props: individually validated u64 property value pairs
  * @file: drm file
@@ -2688,7 +3301,7 @@ static const struct file_operations fops = {
  * See i915_perf_ioctl_open() for interface details.
  *
  * Implements further stream config validation and stream initialization on
- * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex
+ * behalf of i915_perf_open_ioctl() with the &perf->lock mutex
  * taken to serialize with any non-file-operation driver hooks.
  *
  * Note: at this point the @props have only been validated in isolation and
@@ -2703,7 +3316,7 @@ static const struct file_operations fops = {
  * Returns: zero on success or a negative error code.
  */
 static int
-i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
+i915_perf_open_ioctl_locked(struct i915_perf *perf,
 			    struct drm_i915_perf_open_param *param,
 			    struct perf_open_properties *props,
 			    struct drm_file *file)
@@ -2734,17 +3347,34 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	 * rest of the system, which we consider acceptable for a
 	 * non-privileged client.
 	 *
-	 * For Gen8+ the OA unit no longer supports clock gating off for a
+	 * For Gen8->11 the OA unit no longer supports clock gating off for a
 	 * specific context and the kernel can't securely stop the counters
 	 * from updating as system-wide / global values. Even though we can
 	 * filter reports based on the included context ID we can't block
 	 * clients from seeing the raw / global counter values via
 	 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
 	 * enable the OA unit by default.
+	 *
+	 * For Gen12+ we gain a new OAR unit that only monitors the RCS on a
+	 * per context basis. So we can relax requirements there if the user
+	 * doesn't request global stream access (i.e. query based sampling
+	 * using MI_RECORD_PERF_COUNT.
 	 */
-	if (IS_HASWELL(dev_priv) && specific_ctx)
+	if (IS_HASWELL(perf->i915) && specific_ctx)
+		privileged_op = false;
+	else if (IS_GEN(perf->i915, 12) && specific_ctx &&
+		 (props->sample_flags & SAMPLE_OA_REPORT) == 0)
 		privileged_op = false;
 
+	if (props->hold_preemption) {
+		if (!props->single_context) {
+			DRM_DEBUG("preemption disable with no context\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		privileged_op = true;
+	}
+
 	/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
 	 * we check a dev.i915.perf_stream_paranoid sysctl option
 	 * to determine if it's ok to access system wide OA counters
@@ -2752,7 +3382,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	 */
 	if (privileged_op &&
 	    i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
-		DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
+		DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
 		ret = -EACCES;
 		goto err_ctx;
 	}
@@ -2763,7 +3393,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 		goto err_ctx;
 	}
 
-	stream->dev_priv = dev_priv;
+	stream->perf = perf;
 	stream->ctx = specific_ctx;
 
 	ret = i915_oa_stream_init(stream, param, props);
@@ -2779,8 +3409,6 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 		goto err_flags;
 	}
 
-	list_add(&stream->link, &dev_priv->perf.streams);
-
 	if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
 		f_flags |= O_CLOEXEC;
 	if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
@@ -2789,7 +3417,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
 	if (stream_fd < 0) {
 		ret = stream_fd;
-		goto err_open;
+		goto err_flags;
 	}
 
 	if (!(param->flags & I915_PERF_FLAG_DISABLED))
@@ -2798,12 +3426,10 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	/* Take a reference on the driver that will be kept with stream_fd
 	 * until its release.
 	 */
-	drm_dev_get(&dev_priv->drm);
+	drm_dev_get(&perf->i915->drm);
 
 	return stream_fd;
 
-err_open:
-	list_del(&stream->link);
 err_flags:
 	if (stream->ops->destroy)
 		stream->ops->destroy(stream);
@@ -2816,15 +3442,15 @@ err:
 	return ret;
 }
 
-static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
+static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
 {
 	return div64_u64(1000000000ULL * (2ULL << exponent),
-			 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
+			 1000ULL * RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz);
 }
 
 /**
  * read_properties_unlocked - validate + copy userspace stream open properties
- * @dev_priv: i915 device instance
+ * @perf: i915 perf instance
  * @uprops: The array of u64 key value pairs given by userspace
  * @n_props: The number of key value pairs expected in @uprops
  * @props: The stream configuration built up while validating properties
@@ -2837,7 +3463,7 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
  * we shouldn't validate or assume anything about ordering here. This doesn't
  * rule out defining new properties with ordering requirements in the future.
  */
-static int read_properties_unlocked(struct drm_i915_private *dev_priv,
+static int read_properties_unlocked(struct i915_perf *perf,
 				    u64 __user *uprops,
 				    u32 n_props,
 				    struct perf_open_properties *props)
@@ -2852,6 +3478,15 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 		return -EINVAL;
 	}
 
+	/* At the moment we only support using i915-perf on the RCS. */
+	props->engine = intel_engine_lookup_user(perf->i915,
+						 I915_ENGINE_CLASS_RENDER,
+						 0);
+	if (!props->engine) {
+		DRM_DEBUG("No RENDER-capable engines\n");
+		return -EINVAL;
+	}
+
 	/* Considering that ID = 0 is reserved and assuming that we don't
 	 * (currently) expect any configurations to ever specify duplicate
 	 * values for a particular property ID then the last _PROP_MAX value is
@@ -2903,7 +3538,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 					  value);
 				return -EINVAL;
 			}
-			if (!dev_priv->perf.oa_formats[value].size) {
+			if (!perf->oa_formats[value].size) {
 				DRM_DEBUG("Unsupported OA report format %llu\n",
 					  value);
 				return -EINVAL;
@@ -2924,7 +3559,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			 */
 
 			BUILD_BUG_ON(sizeof(oa_period) != 8);
-			oa_period = oa_exponent_to_ns(dev_priv, value);
+			oa_period = oa_exponent_to_ns(perf, value);
 
 			/* This check is primarily to ensure that oa_period <=
 			 * UINT32_MAX (before passing to do_div which only
@@ -2949,6 +3584,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			props->oa_periodic = true;
 			props->oa_period_exponent = value;
 			break;
+		case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
+			props->hold_preemption = !!value;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			return -EINVAL;
@@ -2978,7 +3616,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
  * mutex to avoid an awkward lockdep with mmap_sem.
  *
  * Most of the implementation details are handled by
- * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock
+ * i915_perf_open_ioctl_locked() after taking the &perf->lock
  * mutex for serializing with any non-file-operation driver hooks.
  *
  * Return: A newly opened i915 Perf stream file descriptor or negative
@@ -2987,13 +3625,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_perf *perf = &to_i915(dev)->perf;
 	struct drm_i915_perf_open_param *param = data;
 	struct perf_open_properties props;
 	u32 known_open_flags;
 	int ret;
 
-	if (!dev_priv->perf.initialized) {
+	if (!perf->i915) {
 		DRM_DEBUG("i915 perf interface not available for this system\n");
 		return -ENOTSUPP;
 	}
@@ -3006,124 +3644,130 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	ret = read_properties_unlocked(dev_priv,
+	ret = read_properties_unlocked(perf,
 				       u64_to_user_ptr(param->properties_ptr),
 				       param->num_properties,
 				       &props);
 	if (ret)
 		return ret;
 
-	mutex_lock(&dev_priv->perf.lock);
-	ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file);
-	mutex_unlock(&dev_priv->perf.lock);
+	mutex_lock(&perf->lock);
+	ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
+	mutex_unlock(&perf->lock);
 
 	return ret;
 }
 
 /**
  * i915_perf_register - exposes i915-perf to userspace
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * In particular OA metric sets are advertised under a sysfs metrics/
  * directory allowing userspace to enumerate valid IDs that can be
  * used to open an i915-perf stream.
  */
-void i915_perf_register(struct drm_i915_private *dev_priv)
+void i915_perf_register(struct drm_i915_private *i915)
 {
+	struct i915_perf *perf = &i915->perf;
 	int ret;
 
-	if (!dev_priv->perf.initialized)
+	if (!perf->i915)
 		return;
 
 	/* To be sure we're synchronized with an attempted
 	 * i915_perf_open_ioctl(); considering that we register after
 	 * being exposed to userspace.
 	 */
-	mutex_lock(&dev_priv->perf.lock);
+	mutex_lock(&perf->lock);
 
-	dev_priv->perf.metrics_kobj =
+	perf->metrics_kobj =
 		kobject_create_and_add("metrics",
-				       &dev_priv->drm.primary->kdev->kobj);
-	if (!dev_priv->perf.metrics_kobj)
+				       &i915->drm.primary->kdev->kobj);
+	if (!perf->metrics_kobj)
 		goto exit;
 
-	sysfs_attr_init(&dev_priv->perf.test_config.sysfs_metric_id.attr);
-
-	if (INTEL_GEN(dev_priv) >= 11) {
-		i915_perf_load_test_config_icl(dev_priv);
-	} else if (IS_CANNONLAKE(dev_priv)) {
-		i915_perf_load_test_config_cnl(dev_priv);
-	} else if (IS_COFFEELAKE(dev_priv)) {
-		if (IS_CFL_GT2(dev_priv))
-			i915_perf_load_test_config_cflgt2(dev_priv);
-		if (IS_CFL_GT3(dev_priv))
-			i915_perf_load_test_config_cflgt3(dev_priv);
-	} else if (IS_GEMINILAKE(dev_priv)) {
-		i915_perf_load_test_config_glk(dev_priv);
-	} else if (IS_KABYLAKE(dev_priv)) {
-		if (IS_KBL_GT2(dev_priv))
-			i915_perf_load_test_config_kblgt2(dev_priv);
-		else if (IS_KBL_GT3(dev_priv))
-			i915_perf_load_test_config_kblgt3(dev_priv);
-	} else if (IS_BROXTON(dev_priv)) {
-		i915_perf_load_test_config_bxt(dev_priv);
-	} else if (IS_SKYLAKE(dev_priv)) {
-		if (IS_SKL_GT2(dev_priv))
-			i915_perf_load_test_config_sklgt2(dev_priv);
-		else if (IS_SKL_GT3(dev_priv))
-			i915_perf_load_test_config_sklgt3(dev_priv);
-		else if (IS_SKL_GT4(dev_priv))
-			i915_perf_load_test_config_sklgt4(dev_priv);
-	} else if (IS_CHERRYVIEW(dev_priv)) {
-		i915_perf_load_test_config_chv(dev_priv);
-	} else if (IS_BROADWELL(dev_priv)) {
-		i915_perf_load_test_config_bdw(dev_priv);
-	} else if (IS_HASWELL(dev_priv)) {
-		i915_perf_load_test_config_hsw(dev_priv);
-}
-
-	if (dev_priv->perf.test_config.id == 0)
+	sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr);
+
+	if (IS_TIGERLAKE(i915)) {
+		i915_perf_load_test_config_tgl(i915);
+	} else if (INTEL_GEN(i915) >= 11) {
+		i915_perf_load_test_config_icl(i915);
+	} else if (IS_CANNONLAKE(i915)) {
+		i915_perf_load_test_config_cnl(i915);
+	} else if (IS_COFFEELAKE(i915)) {
+		if (IS_CFL_GT2(i915))
+			i915_perf_load_test_config_cflgt2(i915);
+		if (IS_CFL_GT3(i915))
+			i915_perf_load_test_config_cflgt3(i915);
+	} else if (IS_GEMINILAKE(i915)) {
+		i915_perf_load_test_config_glk(i915);
+	} else if (IS_KABYLAKE(i915)) {
+		if (IS_KBL_GT2(i915))
+			i915_perf_load_test_config_kblgt2(i915);
+		else if (IS_KBL_GT3(i915))
+			i915_perf_load_test_config_kblgt3(i915);
+	} else if (IS_BROXTON(i915)) {
+		i915_perf_load_test_config_bxt(i915);
+	} else if (IS_SKYLAKE(i915)) {
+		if (IS_SKL_GT2(i915))
+			i915_perf_load_test_config_sklgt2(i915);
+		else if (IS_SKL_GT3(i915))
+			i915_perf_load_test_config_sklgt3(i915);
+		else if (IS_SKL_GT4(i915))
+			i915_perf_load_test_config_sklgt4(i915);
+	} else if (IS_CHERRYVIEW(i915)) {
+		i915_perf_load_test_config_chv(i915);
+	} else if (IS_BROADWELL(i915)) {
+		i915_perf_load_test_config_bdw(i915);
+	} else if (IS_HASWELL(i915)) {
+		i915_perf_load_test_config_hsw(i915);
+	}
+
+	if (perf->test_config.id == 0)
 		goto sysfs_error;
 
-	ret = sysfs_create_group(dev_priv->perf.metrics_kobj,
-				 &dev_priv->perf.test_config.sysfs_metric);
+	ret = sysfs_create_group(perf->metrics_kobj,
+				 &perf->test_config.sysfs_metric);
 	if (ret)
 		goto sysfs_error;
 
-	atomic_set(&dev_priv->perf.test_config.ref_count, 1);
+	perf->test_config.perf = perf;
+	kref_init(&perf->test_config.ref);
 
 	goto exit;
 
 sysfs_error:
-	kobject_put(dev_priv->perf.metrics_kobj);
-	dev_priv->perf.metrics_kobj = NULL;
+	kobject_put(perf->metrics_kobj);
+	perf->metrics_kobj = NULL;
 
 exit:
-	mutex_unlock(&dev_priv->perf.lock);
+	mutex_unlock(&perf->lock);
 }
 
 /**
  * i915_perf_unregister - hide i915-perf from userspace
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  *
  * i915-perf state cleanup is split up into an 'unregister' and
  * 'deinit' phase where the interface is first hidden from
  * userspace by i915_perf_unregister() before cleaning up
  * remaining state in i915_perf_fini().
  */
-void i915_perf_unregister(struct drm_i915_private *dev_priv)
+void i915_perf_unregister(struct drm_i915_private *i915)
 {
-	if (!dev_priv->perf.metrics_kobj)
+	struct i915_perf *perf = &i915->perf;
+
+	if (!perf->metrics_kobj)
 		return;
 
-	sysfs_remove_group(dev_priv->perf.metrics_kobj,
-			   &dev_priv->perf.test_config.sysfs_metric);
+	sysfs_remove_group(perf->metrics_kobj,
+			   &perf->test_config.sysfs_metric);
 
-	kobject_put(dev_priv->perf.metrics_kobj);
-	dev_priv->perf.metrics_kobj = NULL;
+	kobject_put(perf->metrics_kobj);
+	perf->metrics_kobj = NULL;
 }
 
-static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
 {
 	static const i915_reg_t flex_eu_regs[] = {
 		EU_PERF_CNTL0,
@@ -3143,56 +3787,80 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr)
 	return false;
 }
 
-static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr)
+#define ADDR_IN_RANGE(addr, start, end) \
+	((addr) >= (start) && \
+	 (addr) <= (end))
+
+#define REG_IN_RANGE(addr, start, end) \
+	((addr) >= i915_mmio_reg_offset(start) && \
+	 (addr) <= i915_mmio_reg_offset(end))
+
+#define REG_EQUAL(addr, mmio) \
+	((addr) == i915_mmio_reg_offset(mmio))
+
+static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
+{
+	return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) ||
+	       REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) ||
+	       REG_IN_RANGE(addr, OACEC0_0, OACEC7_1);
+}
+
+static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
+{
+	return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) ||
+	       REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) ||
+	       REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) ||
+	       REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI);
+}
+
+static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
 {
-	return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) &&
-		addr <= i915_mmio_reg_offset(OASTARTTRIG8)) ||
-		(addr >= i915_mmio_reg_offset(OAREPORTTRIG1) &&
-		 addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) ||
-		(addr >= i915_mmio_reg_offset(OACEC0_0) &&
-		 addr <= i915_mmio_reg_offset(OACEC7_1));
+	return gen7_is_valid_mux_addr(perf, addr) ||
+	       REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) ||
+	       REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8));
 }
 
-static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
 {
-	return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) ||
-		(addr >= i915_mmio_reg_offset(MICRO_BP0_0) &&
-		 addr <= i915_mmio_reg_offset(NOA_WRITE)) ||
-		(addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) &&
-		 addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) ||
-		(addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) &&
-		 addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI));
+	return gen8_is_valid_mux_addr(perf, addr) ||
+	       REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) ||
+	       REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI);
 }
 
-static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
 {
-	return gen7_is_valid_mux_addr(dev_priv, addr) ||
-		addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) ||
-		(addr >= i915_mmio_reg_offset(RPM_CONFIG0) &&
-		 addr <= i915_mmio_reg_offset(NOA_CONFIG(8)));
+	return gen7_is_valid_mux_addr(perf, addr) ||
+	       ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) ||
+	       REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) ||
+	       REG_EQUAL(addr, HSW_MBVID2_MISR0);
 }
 
-static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
 {
-	return gen8_is_valid_mux_addr(dev_priv, addr) ||
-		addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) ||
-		(addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) &&
-		 addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI));
+	return gen7_is_valid_mux_addr(perf, addr) ||
+	       ADDR_IN_RANGE(addr, 0x182300, 0x1823A4);
 }
 
-static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
 {
-	return gen7_is_valid_mux_addr(dev_priv, addr) ||
-		(addr >= 0x25100 && addr <= 0x2FF90) ||
-		(addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) &&
-		 addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) ||
-		addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0);
+	return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) ||
+	       REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) ||
+	       REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) ||
+	       REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) ||
+	       REG_EQUAL(addr, GEN12_OAA_DBG_REG) ||
+	       REG_EQUAL(addr, GEN12_OAG_OA_PESS) ||
+	       REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF);
 }
 
-static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
 {
-	return gen7_is_valid_mux_addr(dev_priv, addr) ||
-		(addr >= 0x182300 && addr <= 0x1823A4);
+	return REG_EQUAL(addr, NOA_WRITE) ||
+	       REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) ||
+	       REG_EQUAL(addr, GDT_CHICKEN_BITS) ||
+	       REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) ||
+	       REG_EQUAL(addr, RPM_CONFIG0) ||
+	       REG_EQUAL(addr, RPM_CONFIG1) ||
+	       REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8));
 }
 
 static u32 mask_reg_value(u32 reg, u32 val)
@@ -3201,21 +3869,21 @@ static u32 mask_reg_value(u32 reg, u32 val)
 	 * WaDisableSTUnitPowerOptimization workaround. Make sure the value
 	 * programmed by userspace doesn't change this.
 	 */
-	if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg)
+	if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2))
 		val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
 
 	/* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function
 	 * indicated by its name and a bunch of selection fields used by OA
 	 * configs.
 	 */
-	if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg)
+	if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT))
 		val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
 
 	return val;
 }
 
-static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv,
-					 bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr),
+static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf,
+					 bool (*is_valid)(struct i915_perf *perf, u32 addr),
 					 u32 __user *regs,
 					 u32 n_regs)
 {
@@ -3245,7 +3913,7 @@ static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv,
 		if (err)
 			goto addr_err;
 
-		if (!is_valid(dev_priv, addr)) {
+		if (!is_valid(perf, addr)) {
 			DRM_DEBUG("Invalid oa_reg address: %X\n", addr);
 			err = -EINVAL;
 			goto addr_err;
@@ -3278,7 +3946,7 @@ static ssize_t show_dynamic_id(struct device *dev,
 	return sprintf(buf, "%d\n", oa_config->id);
 }
 
-static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv,
+static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf,
 					 struct i915_oa_config *oa_config)
 {
 	sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
@@ -3293,7 +3961,7 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv,
 	oa_config->sysfs_metric.name = oa_config->uuid;
 	oa_config->sysfs_metric.attrs = oa_config->attrs;
 
-	return sysfs_create_group(dev_priv->perf.metrics_kobj,
+	return sysfs_create_group(perf->metrics_kobj,
 				  &oa_config->sysfs_metric);
 }
 
@@ -3313,17 +3981,18 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv,
 int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_perf *perf = &to_i915(dev)->perf;
 	struct drm_i915_perf_oa_config *args = data;
 	struct i915_oa_config *oa_config, *tmp;
+	struct i915_oa_reg *regs;
 	int err, id;
 
-	if (!dev_priv->perf.initialized) {
+	if (!perf->i915) {
 		DRM_DEBUG("i915 perf interface not available for this system\n");
 		return -ENOTSUPP;
 	}
 
-	if (!dev_priv->perf.metrics_kobj) {
+	if (!perf->metrics_kobj) {
 		DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
 		return -EINVAL;
 	}
@@ -3346,7 +4015,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 		return -ENOMEM;
 	}
 
-	atomic_set(&oa_config->ref_count, 1);
+	oa_config->perf = perf;
+	kref_init(&oa_config->ref);
 
 	if (!uuid_is_valid(args->uuid)) {
 		DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3360,59 +4030,59 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 	memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
 
 	oa_config->mux_regs_len = args->n_mux_regs;
-	oa_config->mux_regs =
-		alloc_oa_regs(dev_priv,
-			      dev_priv->perf.ops.is_valid_mux_reg,
-			      u64_to_user_ptr(args->mux_regs_ptr),
-			      args->n_mux_regs);
+	regs = alloc_oa_regs(perf,
+			     perf->ops.is_valid_mux_reg,
+			     u64_to_user_ptr(args->mux_regs_ptr),
+			     args->n_mux_regs);
 
-	if (IS_ERR(oa_config->mux_regs)) {
+	if (IS_ERR(regs)) {
 		DRM_DEBUG("Failed to create OA config for mux_regs\n");
-		err = PTR_ERR(oa_config->mux_regs);
+		err = PTR_ERR(regs);
 		goto reg_err;
 	}
+	oa_config->mux_regs = regs;
 
 	oa_config->b_counter_regs_len = args->n_boolean_regs;
-	oa_config->b_counter_regs =
-		alloc_oa_regs(dev_priv,
-			      dev_priv->perf.ops.is_valid_b_counter_reg,
-			      u64_to_user_ptr(args->boolean_regs_ptr),
-			      args->n_boolean_regs);
+	regs = alloc_oa_regs(perf,
+			     perf->ops.is_valid_b_counter_reg,
+			     u64_to_user_ptr(args->boolean_regs_ptr),
+			     args->n_boolean_regs);
 
-	if (IS_ERR(oa_config->b_counter_regs)) {
+	if (IS_ERR(regs)) {
 		DRM_DEBUG("Failed to create OA config for b_counter_regs\n");
-		err = PTR_ERR(oa_config->b_counter_regs);
+		err = PTR_ERR(regs);
 		goto reg_err;
 	}
+	oa_config->b_counter_regs = regs;
 
-	if (INTEL_GEN(dev_priv) < 8) {
+	if (INTEL_GEN(perf->i915) < 8) {
 		if (args->n_flex_regs != 0) {
 			err = -EINVAL;
 			goto reg_err;
 		}
 	} else {
 		oa_config->flex_regs_len = args->n_flex_regs;
-		oa_config->flex_regs =
-			alloc_oa_regs(dev_priv,
-				      dev_priv->perf.ops.is_valid_flex_reg,
-				      u64_to_user_ptr(args->flex_regs_ptr),
-				      args->n_flex_regs);
+		regs = alloc_oa_regs(perf,
+				     perf->ops.is_valid_flex_reg,
+				     u64_to_user_ptr(args->flex_regs_ptr),
+				     args->n_flex_regs);
 
-		if (IS_ERR(oa_config->flex_regs)) {
+		if (IS_ERR(regs)) {
 			DRM_DEBUG("Failed to create OA config for flex_regs\n");
-			err = PTR_ERR(oa_config->flex_regs);
+			err = PTR_ERR(regs);
 			goto reg_err;
 		}
+		oa_config->flex_regs = regs;
 	}
 
-	err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
+	err = mutex_lock_interruptible(&perf->metrics_lock);
 	if (err)
 		goto reg_err;
 
 	/* We shouldn't have too many configs, so this iteration shouldn't be
 	 * too costly.
 	 */
-	idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) {
+	idr_for_each_entry(&perf->metrics_idr, tmp, id) {
 		if (!strcmp(tmp->uuid, oa_config->uuid)) {
 			DRM_DEBUG("OA config already exists with this uuid\n");
 			err = -EADDRINUSE;
@@ -3420,14 +4090,14 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config);
+	err = create_dynamic_oa_sysfs_entry(perf, oa_config);
 	if (err) {
 		DRM_DEBUG("Failed to create sysfs entry for OA config\n");
 		goto sysfs_err;
 	}
 
 	/* Config id 0 is invalid, id 1 for kernel stored test config. */
-	oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr,
+	oa_config->id = idr_alloc(&perf->metrics_idr,
 				  oa_config, 2,
 				  0, GFP_KERNEL);
 	if (oa_config->id < 0) {
@@ -3436,16 +4106,16 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 		goto sysfs_err;
 	}
 
-	mutex_unlock(&dev_priv->perf.metrics_lock);
+	mutex_unlock(&perf->metrics_lock);
 
 	DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id);
 
 	return oa_config->id;
 
 sysfs_err:
-	mutex_unlock(&dev_priv->perf.metrics_lock);
+	mutex_unlock(&perf->metrics_lock);
 reg_err:
-	put_oa_config(dev_priv, oa_config);
+	i915_oa_config_put(oa_config);
 	DRM_DEBUG("Failed to add new OA config\n");
 	return err;
 }
@@ -3464,12 +4134,12 @@ reg_err:
 int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_perf *perf = &to_i915(dev)->perf;
 	u64 *arg = data;
 	struct i915_oa_config *oa_config;
 	int ret;
 
-	if (!dev_priv->perf.initialized) {
+	if (!perf->i915) {
 		DRM_DEBUG("i915 perf interface not available for this system\n");
 		return -ENOTSUPP;
 	}
@@ -3479,31 +4149,33 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 		return -EACCES;
 	}
 
-	ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
+	ret = mutex_lock_interruptible(&perf->metrics_lock);
 	if (ret)
-		goto lock_err;
+		return ret;
 
-	oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg);
+	oa_config = idr_find(&perf->metrics_idr, *arg);
 	if (!oa_config) {
 		DRM_DEBUG("Failed to remove unknown OA config\n");
 		ret = -ENOENT;
-		goto config_err;
+		goto err_unlock;
 	}
 
 	GEM_BUG_ON(*arg != oa_config->id);
 
-	sysfs_remove_group(dev_priv->perf.metrics_kobj,
-			   &oa_config->sysfs_metric);
+	sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
 
-	idr_remove(&dev_priv->perf.metrics_idr, *arg);
+	idr_remove(&perf->metrics_idr, *arg);
+
+	mutex_unlock(&perf->metrics_lock);
 
 	DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
 
-	put_oa_config(dev_priv, oa_config);
+	i915_oa_config_put(oa_config);
+
+	return 0;
 
-config_err:
-	mutex_unlock(&dev_priv->perf.metrics_lock);
-lock_err:
+err_unlock:
+	mutex_unlock(&perf->metrics_lock);
 	return ret;
 }
 
@@ -3550,104 +4222,126 @@ static struct ctl_table dev_root[] = {
 };
 
 /**
- * i915_perf_init - initialize i915-perf state on module load
- * @dev_priv: i915 device instance
+ * i915_perf_init - initialize i915-perf state on module bind
+ * @i915: i915 device instance
  *
  * Initializes i915-perf state without exposing anything to userspace.
  *
  * Note: i915-perf initialization is split into an 'init' and 'register'
  * phase with the i915_perf_register() exposing state to userspace.
  */
-void i915_perf_init(struct drm_i915_private *dev_priv)
-{
-	if (IS_HASWELL(dev_priv)) {
-		dev_priv->perf.ops.is_valid_b_counter_reg =
-			gen7_is_valid_b_counter_addr;
-		dev_priv->perf.ops.is_valid_mux_reg =
-			hsw_is_valid_mux_addr;
-		dev_priv->perf.ops.is_valid_flex_reg = NULL;
-		dev_priv->perf.ops.enable_metric_set = hsw_enable_metric_set;
-		dev_priv->perf.ops.disable_metric_set = hsw_disable_metric_set;
-		dev_priv->perf.ops.oa_enable = gen7_oa_enable;
-		dev_priv->perf.ops.oa_disable = gen7_oa_disable;
-		dev_priv->perf.ops.read = gen7_oa_read;
-		dev_priv->perf.ops.oa_hw_tail_read =
-			gen7_oa_hw_tail_read;
-
-		dev_priv->perf.oa_formats = hsw_oa_formats;
-	} else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
+void i915_perf_init(struct drm_i915_private *i915)
+{
+	struct i915_perf *perf = &i915->perf;
+
+	/* XXX const struct i915_perf_ops! */
+
+	if (IS_HASWELL(i915)) {
+		perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
+		perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
+		perf->ops.is_valid_flex_reg = NULL;
+		perf->ops.enable_metric_set = hsw_enable_metric_set;
+		perf->ops.disable_metric_set = hsw_disable_metric_set;
+		perf->ops.oa_enable = gen7_oa_enable;
+		perf->ops.oa_disable = gen7_oa_disable;
+		perf->ops.read = gen7_oa_read;
+		perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
+
+		perf->oa_formats = hsw_oa_formats;
+	} else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
 		/* Note: that although we could theoretically also support the
 		 * legacy ringbuffer mode on BDW (and earlier iterations of
 		 * this driver, before upstreaming did this) it didn't seem
 		 * worth the complexity to maintain now that BDW+ enable
 		 * execlist mode by default.
 		 */
-		dev_priv->perf.oa_formats = gen8_plus_oa_formats;
+		perf->ops.read = gen8_oa_read;
 
-		dev_priv->perf.ops.oa_enable = gen8_oa_enable;
-		dev_priv->perf.ops.oa_disable = gen8_oa_disable;
-		dev_priv->perf.ops.read = gen8_oa_read;
-		dev_priv->perf.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
+		if (IS_GEN_RANGE(i915, 8, 9)) {
+			perf->oa_formats = gen8_plus_oa_formats;
 
-		if (IS_GEN_RANGE(dev_priv, 8, 9)) {
-			dev_priv->perf.ops.is_valid_b_counter_reg =
+			perf->ops.is_valid_b_counter_reg =
 				gen7_is_valid_b_counter_addr;
-			dev_priv->perf.ops.is_valid_mux_reg =
+			perf->ops.is_valid_mux_reg =
 				gen8_is_valid_mux_addr;
-			dev_priv->perf.ops.is_valid_flex_reg =
+			perf->ops.is_valid_flex_reg =
 				gen8_is_valid_flex_addr;
 
-			if (IS_CHERRYVIEW(dev_priv)) {
-				dev_priv->perf.ops.is_valid_mux_reg =
+			if (IS_CHERRYVIEW(i915)) {
+				perf->ops.is_valid_mux_reg =
 					chv_is_valid_mux_addr;
 			}
 
-			dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set;
-			dev_priv->perf.ops.disable_metric_set = gen8_disable_metric_set;
+			perf->ops.oa_enable = gen8_oa_enable;
+			perf->ops.oa_disable = gen8_oa_disable;
+			perf->ops.enable_metric_set = gen8_enable_metric_set;
+			perf->ops.disable_metric_set = gen8_disable_metric_set;
+			perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
 
-			if (IS_GEN(dev_priv, 8)) {
-				dev_priv->perf.ctx_oactxctrl_offset = 0x120;
-				dev_priv->perf.ctx_flexeu0_offset = 0x2ce;
+			if (IS_GEN(i915, 8)) {
+				perf->ctx_oactxctrl_offset = 0x120;
+				perf->ctx_flexeu0_offset = 0x2ce;
 
-				dev_priv->perf.gen8_valid_ctx_bit = BIT(25);
+				perf->gen8_valid_ctx_bit = BIT(25);
 			} else {
-				dev_priv->perf.ctx_oactxctrl_offset = 0x128;
-				dev_priv->perf.ctx_flexeu0_offset = 0x3de;
+				perf->ctx_oactxctrl_offset = 0x128;
+				perf->ctx_flexeu0_offset = 0x3de;
 
-				dev_priv->perf.gen8_valid_ctx_bit = BIT(16);
+				perf->gen8_valid_ctx_bit = BIT(16);
 			}
-		} else if (IS_GEN_RANGE(dev_priv, 10, 11)) {
-			dev_priv->perf.ops.is_valid_b_counter_reg =
+		} else if (IS_GEN_RANGE(i915, 10, 11)) {
+			perf->oa_formats = gen8_plus_oa_formats;
+
+			perf->ops.is_valid_b_counter_reg =
 				gen7_is_valid_b_counter_addr;
-			dev_priv->perf.ops.is_valid_mux_reg =
+			perf->ops.is_valid_mux_reg =
 				gen10_is_valid_mux_addr;
-			dev_priv->perf.ops.is_valid_flex_reg =
+			perf->ops.is_valid_flex_reg =
 				gen8_is_valid_flex_addr;
 
-			dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set;
-			dev_priv->perf.ops.disable_metric_set = gen10_disable_metric_set;
+			perf->ops.oa_enable = gen8_oa_enable;
+			perf->ops.oa_disable = gen8_oa_disable;
+			perf->ops.enable_metric_set = gen8_enable_metric_set;
+			perf->ops.disable_metric_set = gen10_disable_metric_set;
+			perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
 
-			if (IS_GEN(dev_priv, 10)) {
-				dev_priv->perf.ctx_oactxctrl_offset = 0x128;
-				dev_priv->perf.ctx_flexeu0_offset = 0x3de;
+			if (IS_GEN(i915, 10)) {
+				perf->ctx_oactxctrl_offset = 0x128;
+				perf->ctx_flexeu0_offset = 0x3de;
 			} else {
-				dev_priv->perf.ctx_oactxctrl_offset = 0x124;
-				dev_priv->perf.ctx_flexeu0_offset = 0x78e;
+				perf->ctx_oactxctrl_offset = 0x124;
+				perf->ctx_flexeu0_offset = 0x78e;
 			}
-			dev_priv->perf.gen8_valid_ctx_bit = BIT(16);
+			perf->gen8_valid_ctx_bit = BIT(16);
+		} else if (IS_GEN(i915, 12)) {
+			perf->oa_formats = gen12_oa_formats;
+
+			perf->ops.is_valid_b_counter_reg =
+				gen12_is_valid_b_counter_addr;
+			perf->ops.is_valid_mux_reg =
+				gen12_is_valid_mux_addr;
+			perf->ops.is_valid_flex_reg =
+				gen8_is_valid_flex_addr;
+
+			perf->ops.oa_enable = gen12_oa_enable;
+			perf->ops.oa_disable = gen12_oa_disable;
+			perf->ops.enable_metric_set = gen12_enable_metric_set;
+			perf->ops.disable_metric_set = gen12_disable_metric_set;
+			perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
+
+			perf->ctx_flexeu0_offset = 0;
+			perf->ctx_oactxctrl_offset = 0x144;
 		}
 	}
 
-	if (dev_priv->perf.ops.enable_metric_set) {
-		INIT_LIST_HEAD(&dev_priv->perf.streams);
-		mutex_init(&dev_priv->perf.lock);
+	if (perf->ops.enable_metric_set) {
+		mutex_init(&perf->lock);
 
 		oa_sample_rate_hard_limit = 1000 *
-			(RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
-		dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
+			(RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2);
 
-		mutex_init(&dev_priv->perf.metrics_lock);
-		idr_init(&dev_priv->perf.metrics_idr);
+		mutex_init(&perf->metrics_lock);
+		idr_init(&perf->metrics_idr);
 
 		/* We set up some ratelimit state to potentially throttle any
 		 * _NOTES about spurious, invalid OA reports which we don't
@@ -3659,44 +4353,78 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 		 *
 		 * Using the same limiting factors as printk_ratelimit()
 		 */
-		ratelimit_state_init(&dev_priv->perf.spurious_report_rs,
-				     5 * HZ, 10);
+		ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10);
 		/* Since we use a DRM_NOTE for spurious reports it would be
 		 * inconsistent to let __ratelimit() automatically print a
 		 * warning for throttling.
 		 */
-		ratelimit_set_flags(&dev_priv->perf.spurious_report_rs,
+		ratelimit_set_flags(&perf->spurious_report_rs,
 				    RATELIMIT_MSG_ON_RELEASE);
 
-		dev_priv->perf.initialized = true;
+		atomic64_set(&perf->noa_programming_delay,
+			     500 * 1000 /* 500us */);
+
+		perf->i915 = i915;
 	}
 }
 
 static int destroy_config(int id, void *p, void *data)
 {
-	struct drm_i915_private *dev_priv = data;
-	struct i915_oa_config *oa_config = p;
+	i915_oa_config_put(p);
+	return 0;
+}
 
-	put_oa_config(dev_priv, oa_config);
+void i915_perf_sysctl_register(void)
+{
+	sysctl_header = register_sysctl_table(dev_root);
+}
 
-	return 0;
+void i915_perf_sysctl_unregister(void)
+{
+	unregister_sysctl_table(sysctl_header);
 }
 
 /**
  * i915_perf_fini - Counter part to i915_perf_init()
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
  */
-void i915_perf_fini(struct drm_i915_private *dev_priv)
+void i915_perf_fini(struct drm_i915_private *i915)
 {
-	if (!dev_priv->perf.initialized)
-		return;
+	struct i915_perf *perf = &i915->perf;
 
-	idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv);
-	idr_destroy(&dev_priv->perf.metrics_idr);
+	if (!perf->i915)
+		return;
 
-	unregister_sysctl_table(dev_priv->perf.sysctl_header);
+	idr_for_each(&perf->metrics_idr, destroy_config, perf);
+	idr_destroy(&perf->metrics_idr);
 
-	memset(&dev_priv->perf.ops, 0, sizeof(dev_priv->perf.ops));
+	memset(&perf->ops, 0, sizeof(perf->ops));
+	perf->i915 = NULL;
+}
 
-	dev_priv->perf.initialized = false;
+/**
+ * i915_perf_ioctl_version - Version of the i915-perf subsystem
+ *
+ * This version number is used by userspace to detect available features.
+ */
+int i915_perf_ioctl_version(void)
+{
+	/*
+	 * 1: Initial version
+	 *   I915_PERF_IOCTL_ENABLE
+	 *   I915_PERF_IOCTL_DISABLE
+	 *
+	 * 2: Added runtime modification of OA config.
+	 *   I915_PERF_IOCTL_CONFIG
+	 *
+	 * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold
+	 *    preemption on a particular context so that performance data is
+	 *    accessible from a delta of MI_RPC reports without looking at the
+	 *    OA buffer.
+	 */
+	return 3;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_perf.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index a412b16d9ffc..882fdd0a7680 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -6,11 +6,15 @@
 #ifndef __I915_PERF_H__
 #define __I915_PERF_H__
 
+#include <linux/kref.h>
 #include <linux/types.h>
 
+#include "i915_perf_types.h"
+
 struct drm_device;
 struct drm_file;
 struct drm_i915_private;
+struct i915_oa_config;
 struct intel_context;
 struct intel_engine_cs;
 
@@ -18,6 +22,9 @@ void i915_perf_init(struct drm_i915_private *i915);
 void i915_perf_fini(struct drm_i915_private *i915);
 void i915_perf_register(struct drm_i915_private *i915);
 void i915_perf_unregister(struct drm_i915_private *i915);
+int i915_perf_ioctl_version(void);
+void i915_perf_sysctl_register(void);
+void i915_perf_sysctl_unregister(void);
 
 int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file);
@@ -25,8 +32,29 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file);
 int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file);
-void i915_oa_init_reg_state(struct intel_engine_cs *engine,
-			    struct intel_context *ce,
-			    u32 *reg_state);
+
+void i915_oa_init_reg_state(const struct intel_context *ce,
+			    const struct intel_engine_cs *engine);
+
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set);
+
+static inline struct i915_oa_config *
+i915_oa_config_get(struct i915_oa_config *oa_config)
+{
+	if (kref_get_unless_zero(&oa_config->ref))
+		return oa_config;
+	else
+		return NULL;
+}
+
+void i915_oa_config_release(struct kref *ref);
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+	if (!oa_config)
+		return;
+
+	kref_put(&oa_config->ref, i915_oa_config_release);
+}
 
 #endif /* __I915_PERF_H__ */
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
new file mode 100644
index 000000000000..45e581455f5d
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -0,0 +1,434 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_PERF_TYPES_H_
+#define _I915_PERF_TYPES_H_
+
+#include <linux/atomic.h>
+#include <linux/device.h>
+#include <linux/hrtimer.h>
+#include <linux/llist.h>
+#include <linux/poll.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <linux/wait.h>
+
+#include "i915_reg.h"
+#include "intel_wakeref.h"
+
+struct drm_i915_private;
+struct file;
+struct i915_gem_context;
+struct i915_perf;
+struct i915_vma;
+struct intel_context;
+struct intel_engine_cs;
+
+struct i915_oa_format {
+	u32 format;
+	int size;
+};
+
+struct i915_oa_reg {
+	i915_reg_t addr;
+	u32 value;
+};
+
+struct i915_oa_config {
+	struct i915_perf *perf;
+
+	char uuid[UUID_STRING_LEN + 1];
+	int id;
+
+	const struct i915_oa_reg *mux_regs;
+	u32 mux_regs_len;
+	const struct i915_oa_reg *b_counter_regs;
+	u32 b_counter_regs_len;
+	const struct i915_oa_reg *flex_regs;
+	u32 flex_regs_len;
+
+	struct attribute_group sysfs_metric;
+	struct attribute *attrs[2];
+	struct device_attribute sysfs_metric_id;
+
+	struct kref ref;
+	struct rcu_head rcu;
+};
+
+struct i915_perf_stream;
+
+/**
+ * struct i915_perf_stream_ops - the OPs to support a specific stream type
+ */
+struct i915_perf_stream_ops {
+	/**
+	 * @enable: Enables the collection of HW samples, either in response to
+	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
+	 * without `I915_PERF_FLAG_DISABLED`.
+	 */
+	void (*enable)(struct i915_perf_stream *stream);
+
+	/**
+	 * @disable: Disables the collection of HW samples, either in response
+	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
+	 * the stream.
+	 */
+	void (*disable)(struct i915_perf_stream *stream);
+
+	/**
+	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
+	 * once there is something ready to read() for the stream
+	 */
+	void (*poll_wait)(struct i915_perf_stream *stream,
+			  struct file *file,
+			  poll_table *wait);
+
+	/**
+	 * @wait_unlocked: For handling a blocking read, wait until there is
+	 * something to ready to read() for the stream. E.g. wait on the same
+	 * wait queue that would be passed to poll_wait().
+	 */
+	int (*wait_unlocked)(struct i915_perf_stream *stream);
+
+	/**
+	 * @read: Copy buffered metrics as records to userspace
+	 * **buf**: the userspace, destination buffer
+	 * **count**: the number of bytes to copy, requested by userspace
+	 * **offset**: zero at the start of the read, updated as the read
+	 * proceeds, it represents how many bytes have been copied so far and
+	 * the buffer offset for copying the next record.
+	 *
+	 * Copy as many buffered i915 perf samples and records for this stream
+	 * to userspace as will fit in the given buffer.
+	 *
+	 * Only write complete records; returning -%ENOSPC if there isn't room
+	 * for a complete record.
+	 *
+	 * Return any error condition that results in a short read such as
+	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
+	 * returning to userspace.
+	 */
+	int (*read)(struct i915_perf_stream *stream,
+		    char __user *buf,
+		    size_t count,
+		    size_t *offset);
+
+	/**
+	 * @destroy: Cleanup any stream specific resources.
+	 *
+	 * The stream will always be disabled before this is called.
+	 */
+	void (*destroy)(struct i915_perf_stream *stream);
+};
+
+/**
+ * struct i915_perf_stream - state for a single open stream FD
+ */
+struct i915_perf_stream {
+	/**
+	 * @perf: i915_perf backpointer
+	 */
+	struct i915_perf *perf;
+
+	/**
+	 * @uncore: mmio access path
+	 */
+	struct intel_uncore *uncore;
+
+	/**
+	 * @engine: Engine associated with this performance stream.
+	 */
+	struct intel_engine_cs *engine;
+
+	/**
+	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
+	 * properties given when opening a stream, representing the contents
+	 * of a single sample as read() by userspace.
+	 */
+	u32 sample_flags;
+
+	/**
+	 * @sample_size: Considering the configured contents of a sample
+	 * combined with the required header size, this is the total size
+	 * of a single sample record.
+	 */
+	int sample_size;
+
+	/**
+	 * @ctx: %NULL if measuring system-wide across all contexts or a
+	 * specific context that is being monitored.
+	 */
+	struct i915_gem_context *ctx;
+
+	/**
+	 * @enabled: Whether the stream is currently enabled, considering
+	 * whether the stream was opened in a disabled state and based
+	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
+	 */
+	bool enabled;
+
+	/**
+	 * @hold_preemption: Whether preemption is put on hold for command
+	 * submissions done on the @ctx. This is useful for some drivers that
+	 * cannot easily post process the OA buffer context to subtract delta
+	 * of performance counters not associated with @ctx.
+	 */
+	bool hold_preemption;
+
+	/**
+	 * @ops: The callbacks providing the implementation of this specific
+	 * type of configured stream.
+	 */
+	const struct i915_perf_stream_ops *ops;
+
+	/**
+	 * @oa_config: The OA configuration used by the stream.
+	 */
+	struct i915_oa_config *oa_config;
+
+	/**
+	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
+	 * each time @oa_config changes.
+	 */
+	struct llist_head oa_config_bos;
+
+	/**
+	 * @pinned_ctx: The OA context specific information.
+	 */
+	struct intel_context *pinned_ctx;
+
+	/**
+	 * @specific_ctx_id: The id of the specific context.
+	 */
+	u32 specific_ctx_id;
+
+	/**
+	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
+	 */
+	u32 specific_ctx_id_mask;
+
+	/**
+	 * @poll_check_timer: High resolution timer that will periodically
+	 * check for data in the circular OA buffer for notifying userspace
+	 * (e.g. during a read() or poll()).
+	 */
+	struct hrtimer poll_check_timer;
+
+	/**
+	 * @poll_wq: The wait queue that hrtimer callback wakes when it
+	 * sees data ready to read in the circular OA buffer.
+	 */
+	wait_queue_head_t poll_wq;
+
+	/**
+	 * @pollin: Whether there is data available to read.
+	 */
+	bool pollin;
+
+	/**
+	 * @periodic: Whether periodic sampling is currently enabled.
+	 */
+	bool periodic;
+
+	/**
+	 * @period_exponent: The OA unit sampling frequency is derived from this.
+	 */
+	int period_exponent;
+
+	/**
+	 * @oa_buffer: State of the OA buffer.
+	 */
+	struct {
+		struct i915_vma *vma;
+		u8 *vaddr;
+		u32 last_ctx_id;
+		int format;
+		int format_size;
+		int size_exponent;
+
+		/**
+		 * @ptr_lock: Locks reads and writes to all head/tail state
+		 *
+		 * Consider: the head and tail pointer state needs to be read
+		 * consistently from a hrtimer callback (atomic context) and
+		 * read() fop (user context) with tail pointer updates happening
+		 * in atomic context and head updates in user context and the
+		 * (unlikely) possibility of read() errors needing to reset all
+		 * head/tail state.
+		 *
+		 * Note: Contention/performance aren't currently a significant
+		 * concern here considering the relatively low frequency of
+		 * hrtimer callbacks (5ms period) and that reads typically only
+		 * happen in response to a hrtimer event and likely complete
+		 * before the next callback.
+		 *
+		 * Note: This lock is not held *while* reading and copying data
+		 * to userspace so the value of head observed in htrimer
+		 * callbacks won't represent any partial consumption of data.
+		 */
+		spinlock_t ptr_lock;
+
+		/**
+		 * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
+		 * used for reading.
+		 *
+		 * Initial values of 0xffffffff are invalid and imply that an
+		 * update is required (and should be ignored by an attempted
+		 * read)
+		 */
+		struct {
+			u32 offset;
+		} tails[2];
+
+		/**
+		 * @aged_tail_idx: Index for the aged tail ready to read() data up to.
+		 */
+		unsigned int aged_tail_idx;
+
+		/**
+		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
+		 * was read; used to determine when it is old enough to trust.
+		 */
+		u64 aging_timestamp;
+
+		/**
+		 * @head: Although we can always read back the head pointer register,
+		 * we prefer to avoid trusting the HW state, just to avoid any
+		 * risk that some hardware condition could * somehow bump the
+		 * head pointer unpredictably and cause us to forward the wrong
+		 * OA buffer data to userspace.
+		 */
+		u32 head;
+	} oa_buffer;
+
+	/**
+	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
+	 * reprogrammed.
+	 */
+	struct i915_vma *noa_wait;
+};
+
+/**
+ * struct i915_oa_ops - Gen specific implementation of an OA unit stream
+ */
+struct i915_oa_ops {
+	/**
+	 * @is_valid_b_counter_reg: Validates register's address for
+	 * programming boolean counters for a particular platform.
+	 */
+	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
+
+	/**
+	 * @is_valid_mux_reg: Validates register's address for programming mux
+	 * for a particular platform.
+	 */
+	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
+
+	/**
+	 * @is_valid_flex_reg: Validates register's address for programming
+	 * flex EU filtering for a particular platform.
+	 */
+	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
+
+	/**
+	 * @enable_metric_set: Selects and applies any MUX configuration to set
+	 * up the Boolean and Custom (B/C) counters that are part of the
+	 * counter reports being sampled. May apply system constraints such as
+	 * disabling EU clock gating as required.
+	 */
+	int (*enable_metric_set)(struct i915_perf_stream *stream);
+
+	/**
+	 * @disable_metric_set: Remove system constraints associated with using
+	 * the OA unit.
+	 */
+	void (*disable_metric_set)(struct i915_perf_stream *stream);
+
+	/**
+	 * @oa_enable: Enable periodic sampling
+	 */
+	void (*oa_enable)(struct i915_perf_stream *stream);
+
+	/**
+	 * @oa_disable: Disable periodic sampling
+	 */
+	void (*oa_disable)(struct i915_perf_stream *stream);
+
+	/**
+	 * @read: Copy data from the circular OA buffer into a given userspace
+	 * buffer.
+	 */
+	int (*read)(struct i915_perf_stream *stream,
+		    char __user *buf,
+		    size_t count,
+		    size_t *offset);
+
+	/**
+	 * @oa_hw_tail_read: read the OA tail pointer register
+	 *
+	 * In particular this enables us to share all the fiddly code for
+	 * handling the OA unit tail pointer race that affects multiple
+	 * generations.
+	 */
+	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
+};
+
+struct i915_perf {
+	struct drm_i915_private *i915;
+
+	struct kobject *metrics_kobj;
+
+	/*
+	 * Lock associated with adding/modifying/removing OA configs
+	 * in perf->metrics_idr.
+	 */
+	struct mutex metrics_lock;
+
+	/*
+	 * List of dynamic configurations (struct i915_oa_config), you
+	 * need to hold perf->metrics_lock to access it.
+	 */
+	struct idr metrics_idr;
+
+	/*
+	 * Lock associated with anything below within this structure
+	 * except exclusive_stream.
+	 */
+	struct mutex lock;
+
+	/*
+	 * The stream currently using the OA unit. If accessed
+	 * outside a syscall associated to its file
+	 * descriptor.
+	 */
+	struct i915_perf_stream *exclusive_stream;
+
+	/**
+	 * For rate limiting any notifications of spurious
+	 * invalid OA reports
+	 */
+	struct ratelimit_state spurious_report_rs;
+
+	struct i915_oa_config test_config;
+
+	u32 gen7_latched_oastatus1;
+	u32 ctx_oactxctrl_offset;
+	u32 ctx_flexeu0_offset;
+
+	/**
+	 * The RPT_ID/reason field for Gen8+ includes a bit
+	 * to determine if the CTX ID in the report is valid
+	 * but the specific bit differs between Gen 8 and 9
+	 */
+	u32 gen8_valid_ctx_bit;
+
+	struct i915_oa_ops ops;
+	const struct i915_oa_format *oa_formats;
+
+	atomic64_t noa_programming_delay;
+};
+
+#endif /* _I915_PERF_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 212acaef581e..28a82c849bac 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -11,6 +11,8 @@
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_rc6.h"
+#include "gt/intel_rps.h"
 
 #include "i915_drv.h"
 #include "i915_pmu.h"
@@ -116,22 +118,93 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
 	return enable;
 }
 
-void i915_pmu_gt_parked(struct drm_i915_private *i915)
+static u64 __get_rc6(struct intel_gt *gt)
 {
+	struct drm_i915_private *i915 = gt->i915;
+	u64 val;
+
+	val = intel_rc6_residency_ns(&gt->rc6,
+				     IS_VALLEYVIEW(i915) ?
+				     VLV_GT_RENDER_RC6 :
+				     GEN6_GT_GFX_RC6);
+
+	if (HAS_RC6p(i915))
+		val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6p);
+
+	if (HAS_RC6pp(i915))
+		val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6pp);
+
+	return val;
+}
+
+#if IS_ENABLED(CONFIG_PM)
+
+static inline s64 ktime_since(const ktime_t kt)
+{
+	return ktime_to_ns(ktime_sub(ktime_get(), kt));
+}
+
+static u64 get_rc6(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
 	struct i915_pmu *pmu = &i915->pmu;
+	unsigned long flags;
+	bool awake = false;
+	u64 val;
 
-	if (!pmu->base.event_init)
-		return;
+	if (intel_gt_pm_get_if_awake(gt)) {
+		val = __get_rc6(gt);
+		intel_gt_pm_put_async(gt);
+		awake = true;
+	}
 
-	spin_lock_irq(&pmu->lock);
-	/*
-	 * Signal sampling timer to stop if only engine events are enabled and
-	 * GPU went idle.
-	 */
-	pmu->timer_enabled = pmu_needs_timer(pmu, false);
-	spin_unlock_irq(&pmu->lock);
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	if (awake) {
+		pmu->sample[__I915_SAMPLE_RC6].cur = val;
+	} else {
+		/*
+		 * We think we are runtime suspended.
+		 *
+		 * Report the delta from when the device was suspended to now,
+		 * on top of the last known real value, as the approximated RC6
+		 * counter value.
+		 */
+		val = ktime_since(pmu->sleep_last);
+		val += pmu->sample[__I915_SAMPLE_RC6].cur;
+	}
+
+	if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
+		val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
+	else
+		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return val;
+}
+
+static void park_rc6(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
+		pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
+
+	pmu->sleep_last = ktime_get();
+}
+
+#else
+
+static u64 get_rc6(struct intel_gt *gt)
+{
+	return __get_rc6(gt);
 }
 
+static void park_rc6(struct drm_i915_private *i915) {}
+
+#endif
+
 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 {
 	if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
@@ -143,6 +216,26 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 	}
 }
 
+void i915_pmu_gt_parked(struct drm_i915_private *i915)
+{
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (!pmu->base.event_init)
+		return;
+
+	spin_lock_irq(&pmu->lock);
+
+	park_rc6(i915);
+
+	/*
+	 * Signal sampling timer to stop if only engine events are enabled and
+	 * GPU went idle.
+	 */
+	pmu->timer_enabled = pmu_needs_timer(pmu, false);
+
+	spin_unlock_irq(&pmu->lock);
+}
+
 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 {
 	struct i915_pmu *pmu = &i915->pmu;
@@ -151,10 +244,12 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 		return;
 
 	spin_lock_irq(&pmu->lock);
+
 	/*
 	 * Re-enable sampling timer when GPU goes active.
 	 */
 	__i915_pmu_maybe_start_timer(pmu);
+
 	spin_unlock_irq(&pmu->lock);
 }
 
@@ -164,6 +259,16 @@ add_sample(struct i915_pmu_sample *sample, u32 val)
 	sample->cur += val;
 }
 
+static bool exclusive_mmio_access(const struct drm_i915_private *i915)
+{
+	/*
+	 * We have to avoid concurrent mmio cache line access on gen7 or
+	 * risk a machine hang. For a fun history lesson dig out the old
+	 * userspace intel_gpu_top and run it on Ivybridge or Haswell!
+	 */
+	return IS_GEN(i915, 7);
+}
+
 static void
 engines_sample(struct intel_gt *gt, unsigned int period_ns)
 {
@@ -174,8 +279,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
 	if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
 		return;
 
-	for_each_engine(engine, i915, id) {
+	if (!intel_gt_pm_is_awake(gt))
+		return;
+
+	for_each_engine(engine, gt, id) {
 		struct intel_engine_pmu *pmu = &engine->pmu;
+		spinlock_t *mmio_lock;
 		unsigned long flags;
 		bool busy;
 		u32 val;
@@ -183,7 +292,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
 		if (!intel_engine_pm_get_if_awake(engine))
 			continue;
 
-		spin_lock_irqsave(&engine->uncore->lock, flags);
+		mmio_lock = NULL;
+		if (exclusive_mmio_access(i915))
+			mmio_lock = &engine->uncore->lock;
+
+		if (unlikely(mmio_lock))
+			spin_lock_irqsave(mmio_lock, flags);
 
 		val = ENGINE_READ_FW(engine, RING_CTL);
 		if (val == 0) /* powerwell off => engine idle */
@@ -194,6 +308,10 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
 		if (val & RING_WAIT_SEMAPHORE)
 			add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
 
+		/* No need to sample when busy stats are supported. */
+		if (intel_engine_supports_stats(engine))
+			goto skip;
+
 		/*
 		 * While waiting on a semaphore or event, MI_MODE reports the
 		 * ring as idle. However, previously using the seqno, and with
@@ -210,8 +328,9 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
 			add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
 
 skip:
-		spin_unlock_irqrestore(&engine->uncore->lock, flags);
-		intel_engine_pm_put(engine);
+		if (unlikely(mmio_lock))
+			spin_unlock_irqrestore(mmio_lock, flags);
+		intel_engine_pm_put_async(engine);
 	}
 }
 
@@ -221,33 +340,57 @@ add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
 	sample->cur += mul_u32_u32(val, mul);
 }
 
+static bool frequency_sampling_enabled(struct i915_pmu *pmu)
+{
+	return pmu->enable &
+	       (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
+		config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY));
+}
+
 static void
 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
 {
 	struct drm_i915_private *i915 = gt->i915;
 	struct intel_uncore *uncore = gt->uncore;
 	struct i915_pmu *pmu = &i915->pmu;
+	struct intel_rps *rps = &gt->rps;
+
+	if (!frequency_sampling_enabled(pmu))
+		return;
+
+	/* Report 0/0 (actual/requested) frequency while parked. */
+	if (!intel_gt_pm_get_if_awake(gt))
+		return;
 
 	if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
 		u32 val;
 
-		val = i915->gt_pm.rps.cur_freq;
-		if (intel_gt_pm_get_if_awake(gt)) {
-			val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1);
-			val = intel_get_cagf(i915, val);
-			intel_gt_pm_put(gt);
-		}
+		/*
+		 * We take a quick peek here without using forcewake
+		 * so that we don't perturb the system under observation
+		 * (forcewake => !rc6 => increased power use). We expect
+		 * that if the read fails because it is outside of the
+		 * mmio power well, then it will return 0 -- in which
+		 * case we assume the system is running at the intended
+		 * frequency. Fortunately, the read should rarely fail!
+		 */
+		val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
+		if (val)
+			val = intel_rps_get_cagf(rps, val);
+		else
+			val = rps->cur_freq;
 
 		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
-				intel_gpu_freq(i915, val),
-				period_ns / 1000);
+				intel_gpu_freq(rps, val), period_ns / 1000);
 	}
 
 	if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
 		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
-				intel_gpu_freq(i915, i915->gt_pm.rps.cur_freq),
+				intel_gpu_freq(rps, rps->cur_freq),
 				period_ns / 1000);
 	}
+
+	intel_gt_pm_put_async(gt);
 }
 
 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
@@ -426,104 +569,6 @@ static int i915_pmu_event_init(struct perf_event *event)
 	return 0;
 }
 
-static u64 __get_rc6(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	u64 val;
-
-	val = intel_rc6_residency_ns(i915,
-				     IS_VALLEYVIEW(i915) ?
-				     VLV_GT_RENDER_RC6 :
-				     GEN6_GT_GFX_RC6);
-
-	if (HAS_RC6p(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-
-	if (HAS_RC6pp(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-
-	return val;
-}
-
-static u64 get_rc6(struct intel_gt *gt)
-{
-#if IS_ENABLED(CONFIG_PM)
-	struct drm_i915_private *i915 = gt->i915;
-	struct intel_runtime_pm *rpm = &i915->runtime_pm;
-	struct i915_pmu *pmu = &i915->pmu;
-	intel_wakeref_t wakeref;
-	unsigned long flags;
-	u64 val;
-
-	wakeref = intel_runtime_pm_get_if_in_use(rpm);
-	if (wakeref) {
-		val = __get_rc6(gt);
-		intel_runtime_pm_put(rpm, wakeref);
-
-		/*
-		 * If we are coming back from being runtime suspended we must
-		 * be careful not to report a larger value than returned
-		 * previously.
-		 */
-
-		spin_lock_irqsave(&pmu->lock, flags);
-
-		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
-			pmu->sample[__I915_SAMPLE_RC6].cur = val;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
-	} else {
-		struct device *kdev = rpm->kdev;
-
-		/*
-		 * We are runtime suspended.
-		 *
-		 * Report the delta from when the device was suspended to now,
-		 * on top of the last known real value, as the approximated RC6
-		 * counter value.
-		 */
-		spin_lock_irqsave(&pmu->lock, flags);
-
-		/*
-		 * After the above branch intel_runtime_pm_get_if_in_use failed
-		 * to get the runtime PM reference we cannot assume we are in
-		 * runtime suspend since we can either: a) race with coming out
-		 * of it before we took the power.lock, or b) there are other
-		 * states than suspended which can bring us here.
-		 *
-		 * We need to double-check that we are indeed currently runtime
-		 * suspended and if not we cannot do better than report the last
-		 * known RC6 value.
-		 */
-		if (pm_runtime_status_suspended(kdev)) {
-			val = pm_runtime_suspended_time(kdev);
-
-			if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-				pmu->suspended_time_last = val;
-
-			val -= pmu->suspended_time_last;
-			val += pmu->sample[__I915_SAMPLE_RC6].cur;
-
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
-		} else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
-	}
-
-	return val;
-#else
-	return __get_rc6(gt);
-#endif
-}
-
 static u64 __i915_pmu_event_read(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -1047,21 +1092,48 @@ static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
 	cpuhp_remove_multi_state(cpuhp_slot);
 }
 
+static bool is_igp(struct drm_i915_private *i915)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+
+	/* IGP is 0000:00:02.0 */
+	return pci_domain_nr(pdev->bus) == 0 &&
+	       pdev->bus->number == 0 &&
+	       PCI_SLOT(pdev->devfn) == 2 &&
+	       PCI_FUNC(pdev->devfn) == 0;
+}
+
 void i915_pmu_register(struct drm_i915_private *i915)
 {
 	struct i915_pmu *pmu = &i915->pmu;
-	int ret;
+	int ret = -ENOMEM;
 
 	if (INTEL_GEN(i915) <= 2) {
 		dev_info(i915->drm.dev, "PMU not supported for this GPU.");
 		return;
 	}
 
-	i915_pmu_events_attr_group.attrs = create_event_attributes(pmu);
-	if (!i915_pmu_events_attr_group.attrs) {
-		ret = -ENOMEM;
-		goto err;
+	spin_lock_init(&pmu->lock);
+	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	pmu->timer.function = i915_sample;
+
+	if (!is_igp(i915)) {
+		pmu->name = kasprintf(GFP_KERNEL,
+				      "i915_%s",
+				      dev_name(i915->drm.dev));
+		if (pmu->name) {
+			/* tools/perf reserves colons as special. */
+			strreplace((char *)pmu->name, ':', '_');
+		}
+	} else {
+		pmu->name = "i915";
 	}
+	if (!pmu->name)
+		goto err;
+
+	i915_pmu_events_attr_group.attrs = create_event_attributes(pmu);
+	if (!i915_pmu_events_attr_group.attrs)
+		goto err_name;
 
 	pmu->base.attr_groups	= i915_pmu_attr_groups;
 	pmu->base.task_ctx_nr	= perf_invalid_context;
@@ -1073,13 +1145,9 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	pmu->base.read		= i915_pmu_event_read;
 	pmu->base.event_idx	= i915_pmu_event_event_idx;
 
-	spin_lock_init(&pmu->lock);
-	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	pmu->timer.function = i915_sample;
-
-	ret = perf_pmu_register(&pmu->base, "i915", -1);
+	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
 	if (ret)
-		goto err;
+		goto err_attr;
 
 	ret = i915_pmu_register_cpuhp_state(pmu);
 	if (ret)
@@ -1089,10 +1157,14 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 err_unreg:
 	perf_pmu_unregister(&pmu->base);
-err:
+err_attr:
 	pmu->base.event_init = NULL;
 	free_event_attributes(pmu);
-	DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
+err_name:
+	if (!is_igp(i915))
+		kfree(pmu->name);
+err:
+	dev_notice(i915->drm.dev, "Failed to register PMU!\n");
 }
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
@@ -1110,5 +1182,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
 	perf_pmu_unregister(&pmu->base);
 	pmu->base.event_init = NULL;
+	if (!is_igp(i915))
+		kfree(pmu->name);
 	free_event_attributes(pmu);
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 4fc4f2478301..6c1647c5daf2 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -18,7 +18,7 @@ enum {
 	__I915_SAMPLE_FREQ_ACT = 0,
 	__I915_SAMPLE_FREQ_REQ,
 	__I915_SAMPLE_RC6,
-	__I915_SAMPLE_RC6_ESTIMATED,
+	__I915_SAMPLE_RC6_LAST_REPORTED,
 	__I915_NUM_PMU_SAMPLERS
 };
 
@@ -47,6 +47,10 @@ struct i915_pmu {
 	 */
 	struct pmu base;
 	/**
+	 * @name: Name as registered with perf core.
+	 */
+	const char *name;
+	/**
 	 * @lock: Lock protecting enable mask and ref count handling.
 	 */
 	spinlock_t lock;
@@ -97,9 +101,9 @@ struct i915_pmu {
 	 */
 	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
 	/**
-	 * @suspended_time_last: Cached suspend time from PM core.
+	 * @sleep_last: Last time GT parked for RC6 estimation.
 	 */
-	u64 suspended_time_last;
+	ktime_t sleep_last;
 	/**
 	 * @i915_attr: Memory block holding device attributes.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index 21037a2e2038..732aad148881 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -16,6 +16,12 @@ enum {
 	I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
 	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
 	I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1,
+
+	/* A preemptive pulse used to monitor the health of each engine */
+	I915_PRIORITY_HEARTBEAT,
+
+	/* Interactive workload, scheduled for immediate pageflipping */
+	I915_PRIORITY_DISPLAY,
 };
 
 #define I915_USER_PRIORITY_SHIFT 2
@@ -39,6 +45,7 @@ enum {
  * active request.
  */
 #define I915_PRIORITY_UNPREEMPTABLE INT_MAX
+#define I915_PRIORITY_BARRIER INT_MAX
 
 #define __NO_PREEMPTION (I915_PRIORITY_WAIT)
 
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index ad9240a0817a..ef25ce6e395e 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -7,6 +7,7 @@
 #include <linux/nospec.h>
 
 #include "i915_drv.h"
+#include "i915_perf.h"
 #include "i915_query.h"
 #include <uapi/drm/i915_drm.h>
 
@@ -37,8 +38,6 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 	struct drm_i915_query_topology_info topo;
 	u32 slice_length, subslice_length, eu_length, total_length;
-	u8 subslice_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
-	u8 eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
 	int ret;
 
 	if (query_item->flags != 0)
@@ -50,8 +49,8 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
 
 	slice_length = sizeof(sseu->slice_mask);
-	subslice_length = sseu->max_slices * subslice_stride;
-	eu_length = sseu->max_slices * sseu->max_subslices * eu_stride;
+	subslice_length = sseu->max_slices * sseu->ss_stride;
+	eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
 	total_length = sizeof(topo) + slice_length + subslice_length +
 		       eu_length;
 
@@ -69,9 +68,9 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 	topo.max_eus_per_subslice = sseu->max_eus_per_subslice;
 
 	topo.subslice_offset = slice_length;
-	topo.subslice_stride = subslice_stride;
+	topo.subslice_stride = sseu->ss_stride;
 	topo.eu_offset = slice_length + subslice_length;
-	topo.eu_stride = eu_stride;
+	topo.eu_stride = sseu->eu_stride;
 
 	if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr),
 			   &topo, sizeof(topo)))
@@ -104,15 +103,18 @@ query_engine_info(struct drm_i915_private *i915,
 	struct drm_i915_engine_info __user *info_ptr;
 	struct drm_i915_query_engine_info query;
 	struct drm_i915_engine_info info = { };
+	unsigned int num_uabi_engines = 0;
 	struct intel_engine_cs *engine;
 	int len, ret;
 
 	if (query_item->flags)
 		return -EINVAL;
 
+	for_each_uabi_engine(engine, i915)
+		num_uabi_engines++;
+
 	len = sizeof(struct drm_i915_query_engine_info) +
-	      RUNTIME_INFO(i915)->num_engines *
-	      sizeof(struct drm_i915_engine_info);
+	      num_uabi_engines * sizeof(struct drm_i915_engine_info);
 
 	ret = copy_query_item(&query, sizeof(query), len, query_item);
 	if (ret != 0)
@@ -142,10 +144,305 @@ query_engine_info(struct drm_i915_private *i915,
 	return len;
 }
 
+static int can_copy_perf_config_registers_or_number(u32 user_n_regs,
+						    u64 user_regs_ptr,
+						    u32 kernel_n_regs)
+{
+	/*
+	 * We'll just put the number of registers, and won't copy the
+	 * register.
+	 */
+	if (user_n_regs == 0)
+		return 0;
+
+	if (user_n_regs < kernel_n_regs)
+		return -EINVAL;
+
+	if (!access_ok(u64_to_user_ptr(user_regs_ptr),
+		       2 * sizeof(u32) * kernel_n_regs))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int copy_perf_config_registers_or_number(const struct i915_oa_reg *kernel_regs,
+						u32 kernel_n_regs,
+						u64 user_regs_ptr,
+						u32 *user_n_regs)
+{
+	u32 r;
+
+	if (*user_n_regs == 0) {
+		*user_n_regs = kernel_n_regs;
+		return 0;
+	}
+
+	*user_n_regs = kernel_n_regs;
+
+	for (r = 0; r < kernel_n_regs; r++) {
+		u32 __user *user_reg_ptr =
+			u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2);
+		u32 __user *user_val_ptr =
+			u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2 +
+					sizeof(u32));
+		int ret;
+
+		ret = __put_user(i915_mmio_reg_offset(kernel_regs[r].addr),
+				 user_reg_ptr);
+		if (ret)
+			return -EFAULT;
+
+		ret = __put_user(kernel_regs[r].value, user_val_ptr);
+		if (ret)
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int query_perf_config_data(struct drm_i915_private *i915,
+				  struct drm_i915_query_item *query_item,
+				  bool use_uuid)
+{
+	struct drm_i915_query_perf_config __user *user_query_config_ptr =
+		u64_to_user_ptr(query_item->data_ptr);
+	struct drm_i915_perf_oa_config __user *user_config_ptr =
+		u64_to_user_ptr(query_item->data_ptr +
+				sizeof(struct drm_i915_query_perf_config));
+	struct drm_i915_perf_oa_config user_config;
+	struct i915_perf *perf = &i915->perf;
+	struct i915_oa_config *oa_config;
+	char uuid[UUID_STRING_LEN + 1];
+	u64 config_id;
+	u32 flags, total_size;
+	int ret;
+
+	if (!perf->i915)
+		return -ENODEV;
+
+	total_size =
+		sizeof(struct drm_i915_query_perf_config) +
+		sizeof(struct drm_i915_perf_oa_config);
+
+	if (query_item->length == 0)
+		return total_size;
+
+	if (query_item->length < total_size) {
+		DRM_DEBUG("Invalid query config data item size=%u expected=%u\n",
+			  query_item->length, total_size);
+		return -EINVAL;
+	}
+
+	if (!access_ok(user_query_config_ptr, total_size))
+		return -EFAULT;
+
+	if (__get_user(flags, &user_query_config_ptr->flags))
+		return -EFAULT;
+
+	if (flags != 0)
+		return -EINVAL;
+
+	if (use_uuid) {
+		struct i915_oa_config *tmp;
+		int id;
+
+		BUILD_BUG_ON(sizeof(user_query_config_ptr->uuid) >= sizeof(uuid));
+
+		memset(&uuid, 0, sizeof(uuid));
+		if (__copy_from_user(uuid, user_query_config_ptr->uuid,
+				     sizeof(user_query_config_ptr->uuid)))
+			return -EFAULT;
+
+		oa_config = NULL;
+		rcu_read_lock();
+		idr_for_each_entry(&perf->metrics_idr, tmp, id) {
+			if (!strcmp(tmp->uuid, uuid)) {
+				oa_config = i915_oa_config_get(tmp);
+				break;
+			}
+		}
+		rcu_read_unlock();
+	} else {
+		if (__get_user(config_id, &user_query_config_ptr->config))
+			return -EFAULT;
+
+		oa_config = i915_perf_get_oa_config(perf, config_id);
+	}
+	if (!oa_config)
+		return -ENOENT;
+
+	if (__copy_from_user(&user_config, user_config_ptr,
+			     sizeof(user_config))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = can_copy_perf_config_registers_or_number(user_config.n_boolean_regs,
+						       user_config.boolean_regs_ptr,
+						       oa_config->b_counter_regs_len);
+	if (ret)
+		goto out;
+
+	ret = can_copy_perf_config_registers_or_number(user_config.n_flex_regs,
+						       user_config.flex_regs_ptr,
+						       oa_config->flex_regs_len);
+	if (ret)
+		goto out;
+
+	ret = can_copy_perf_config_registers_or_number(user_config.n_mux_regs,
+						       user_config.mux_regs_ptr,
+						       oa_config->mux_regs_len);
+	if (ret)
+		goto out;
+
+	ret = copy_perf_config_registers_or_number(oa_config->b_counter_regs,
+						   oa_config->b_counter_regs_len,
+						   user_config.boolean_regs_ptr,
+						   &user_config.n_boolean_regs);
+	if (ret)
+		goto out;
+
+	ret = copy_perf_config_registers_or_number(oa_config->flex_regs,
+						   oa_config->flex_regs_len,
+						   user_config.flex_regs_ptr,
+						   &user_config.n_flex_regs);
+	if (ret)
+		goto out;
+
+	ret = copy_perf_config_registers_or_number(oa_config->mux_regs,
+						   oa_config->mux_regs_len,
+						   user_config.mux_regs_ptr,
+						   &user_config.n_mux_regs);
+	if (ret)
+		goto out;
+
+	memcpy(user_config.uuid, oa_config->uuid, sizeof(user_config.uuid));
+
+	if (__copy_to_user(user_config_ptr, &user_config,
+			   sizeof(user_config))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = total_size;
+
+out:
+	i915_oa_config_put(oa_config);
+	return ret;
+}
+
+static size_t sizeof_perf_config_list(size_t count)
+{
+	return sizeof(struct drm_i915_query_perf_config) + sizeof(u64) * count;
+}
+
+static size_t sizeof_perf_metrics(struct i915_perf *perf)
+{
+	struct i915_oa_config *tmp;
+	size_t i;
+	int id;
+
+	i = 1;
+	rcu_read_lock();
+	idr_for_each_entry(&perf->metrics_idr, tmp, id)
+		i++;
+	rcu_read_unlock();
+
+	return sizeof_perf_config_list(i);
+}
+
+static int query_perf_config_list(struct drm_i915_private *i915,
+				  struct drm_i915_query_item *query_item)
+{
+	struct drm_i915_query_perf_config __user *user_query_config_ptr =
+		u64_to_user_ptr(query_item->data_ptr);
+	struct i915_perf *perf = &i915->perf;
+	u64 *oa_config_ids = NULL;
+	int alloc, n_configs;
+	u32 flags;
+	int ret;
+
+	if (!perf->i915)
+		return -ENODEV;
+
+	if (query_item->length == 0)
+		return sizeof_perf_metrics(perf);
+
+	if (get_user(flags, &user_query_config_ptr->flags))
+		return -EFAULT;
+
+	if (flags != 0)
+		return -EINVAL;
+
+	n_configs = 1;
+	do {
+		struct i915_oa_config *tmp;
+		u64 *ids;
+		int id;
+
+		ids = krealloc(oa_config_ids,
+			       n_configs * sizeof(*oa_config_ids),
+			       GFP_KERNEL);
+		if (!ids)
+			return -ENOMEM;
+
+		alloc = fetch_and_zero(&n_configs);
+
+		ids[n_configs++] = 1ull; /* reserved for test_config */
+		rcu_read_lock();
+		idr_for_each_entry(&perf->metrics_idr, tmp, id) {
+			if (n_configs < alloc)
+				ids[n_configs] = id;
+			n_configs++;
+		}
+		rcu_read_unlock();
+
+		oa_config_ids = ids;
+	} while (n_configs > alloc);
+
+	if (query_item->length < sizeof_perf_config_list(n_configs)) {
+		DRM_DEBUG("Invalid query config list item size=%u expected=%zu\n",
+			  query_item->length,
+			  sizeof_perf_config_list(n_configs));
+		kfree(oa_config_ids);
+		return -EINVAL;
+	}
+
+	if (put_user(n_configs, &user_query_config_ptr->config)) {
+		kfree(oa_config_ids);
+		return -EFAULT;
+	}
+
+	ret = copy_to_user(user_query_config_ptr + 1,
+			   oa_config_ids,
+			   n_configs * sizeof(*oa_config_ids));
+	kfree(oa_config_ids);
+	if (ret)
+		return -EFAULT;
+
+	return sizeof_perf_config_list(n_configs);
+}
+
+static int query_perf_config(struct drm_i915_private *i915,
+			     struct drm_i915_query_item *query_item)
+{
+	switch (query_item->flags) {
+	case DRM_I915_QUERY_PERF_CONFIG_LIST:
+		return query_perf_config_list(i915, query_item);
+	case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID:
+		return query_perf_config_data(i915, query_item, true);
+	case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID:
+		return query_perf_config_data(i915, query_item, false);
+	default:
+		return -EINVAL;
+	}
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
 					struct drm_i915_query_item *query_item) = {
 	query_topology_info,
 	query_engine_info,
+	query_perf_config,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f8ee9aba3955..6cc55c103f67 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -413,6 +413,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GEN11_VECS_SFC_USAGE(engine)		_MMIO((engine)->mmio_base + 0x2014)
 #define   GEN11_VECS_SFC_USAGE_BIT		(1 << 0)
 
+#define GEN12_SFC_DONE(n)		_MMIO(0x1cc00 + (n) * 0x100)
+#define GEN12_SFC_DONE_MAX		4
+
 #define RING_PP_DIR_BASE(base)		_MMIO((base) + 0x228)
 #define RING_PP_DIR_BASE_READ(base)	_MMIO((base) + 0x518)
 #define RING_PP_DIR_DCLV(base)		_MMIO((base) + 0x220)
@@ -547,7 +550,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MI_PREDICATE_SRC0_UDW	_MMIO(0x2400 + 4)
 #define MI_PREDICATE_SRC1	_MMIO(0x2408)
 #define MI_PREDICATE_SRC1_UDW	_MMIO(0x2408 + 4)
-
+#define MI_PREDICATE_DATA       _MMIO(0x2410)
+#define MI_PREDICATE_RESULT     _MMIO(0x2418)
+#define MI_PREDICATE_RESULT_1   _MMIO(0x241c)
 #define MI_PREDICATE_RESULT_2	_MMIO(0x2214)
 #define  LOWER_SLICE_ENABLED	(1 << 0)
 #define  LOWER_SLICE_DISABLED	(0 << 0)
@@ -688,6 +693,45 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define OABUFFER_SIZE_8M    (6 << 3)
 #define OABUFFER_SIZE_16M   (7 << 3)
 
+/* Gen12 OAR unit */
+#define GEN12_OAR_OACONTROL _MMIO(0x2960)
+#define  GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
+#define  GEN12_OAR_OACONTROL_COUNTER_ENABLE       (1 << 0)
+
+#define GEN12_OACTXCONTROL _MMIO(0x2360)
+#define GEN12_OAR_OASTATUS _MMIO(0x2968)
+
+/* Gen12 OAG unit */
+#define GEN12_OAG_OAHEADPTR _MMIO(0xdb00)
+#define  GEN12_OAG_OAHEADPTR_MASK 0xffffffc0
+#define GEN12_OAG_OATAILPTR _MMIO(0xdb04)
+#define  GEN12_OAG_OATAILPTR_MASK 0xffffffc0
+
+#define GEN12_OAG_OABUFFER  _MMIO(0xdb08)
+#define  GEN12_OAG_OABUFFER_BUFFER_SIZE_MASK  (0x7)
+#define  GEN12_OAG_OABUFFER_BUFFER_SIZE_SHIFT (3)
+#define  GEN12_OAG_OABUFFER_MEMORY_SELECT     (1 << 0) /* 0: PPGTT, 1: GGTT */
+
+#define GEN12_OAG_OAGLBCTXCTRL _MMIO(0x2b28)
+#define  GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT 2
+#define  GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE       (1 << 1)
+#define  GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME     (1 << 0)
+
+#define GEN12_OAG_OACONTROL _MMIO(0xdaf4)
+#define  GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT 2
+#define  GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE       (1 << 0)
+
+#define GEN12_OAG_OA_DEBUG _MMIO(0xdaf8)
+#define  GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO          (1 << 6)
+#define  GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS  (1 << 5)
+#define  GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS     (1 << 2)
+#define  GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1 << 1)
+
+#define GEN12_OAG_OASTATUS _MMIO(0xdafc)
+#define  GEN12_OAG_OASTATUS_COUNTER_OVERFLOW (1 << 2)
+#define  GEN12_OAG_OASTATUS_BUFFER_OVERFLOW  (1 << 1)
+#define  GEN12_OAG_OASTATUS_REPORT_LOST      (1 << 0)
+
 /*
  * Flexible, Aggregate EU Counter Registers.
  * Note: these aren't contiguous
@@ -924,6 +968,26 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define OAREPORTTRIG8_NOA_SELECT_6_SHIFT    24
 #define OAREPORTTRIG8_NOA_SELECT_7_SHIFT    28
 
+/* Same layout as OASTARTTRIGX */
+#define GEN12_OAG_OASTARTTRIG1 _MMIO(0xd900)
+#define GEN12_OAG_OASTARTTRIG2 _MMIO(0xd904)
+#define GEN12_OAG_OASTARTTRIG3 _MMIO(0xd908)
+#define GEN12_OAG_OASTARTTRIG4 _MMIO(0xd90c)
+#define GEN12_OAG_OASTARTTRIG5 _MMIO(0xd910)
+#define GEN12_OAG_OASTARTTRIG6 _MMIO(0xd914)
+#define GEN12_OAG_OASTARTTRIG7 _MMIO(0xd918)
+#define GEN12_OAG_OASTARTTRIG8 _MMIO(0xd91c)
+
+/* Same layout as OAREPORTTRIGX */
+#define GEN12_OAG_OAREPORTTRIG1 _MMIO(0xd920)
+#define GEN12_OAG_OAREPORTTRIG2 _MMIO(0xd924)
+#define GEN12_OAG_OAREPORTTRIG3 _MMIO(0xd928)
+#define GEN12_OAG_OAREPORTTRIG4 _MMIO(0xd92c)
+#define GEN12_OAG_OAREPORTTRIG5 _MMIO(0xd930)
+#define GEN12_OAG_OAREPORTTRIG6 _MMIO(0xd934)
+#define GEN12_OAG_OAREPORTTRIG7 _MMIO(0xd938)
+#define GEN12_OAG_OAREPORTTRIG8 _MMIO(0xd93c)
+
 /* CECX_0 */
 #define OACEC_COMPARE_LESS_OR_EQUAL	6
 #define OACEC_COMPARE_NOT_EQUAL		5
@@ -940,6 +1004,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define OACEC_SELECT_PREV	(1 << 19)
 #define OACEC_SELECT_BOOLEAN	(2 << 19)
 
+/* 11-bit array 0: pass-through, 1: negated */
+#define GEN12_OASCEC_NEGATE_MASK  0x7ff
+#define GEN12_OASCEC_NEGATE_SHIFT 21
+
 /* CECX_1 */
 #define OACEC_MASK_MASK		    0xffff
 #define OACEC_CONSIDERATIONS_MASK   0xffff
@@ -962,6 +1030,42 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define OACEC7_0 _MMIO(0x27a8)
 #define OACEC7_1 _MMIO(0x27ac)
 
+/* Same layout as CECX_Y */
+#define GEN12_OAG_CEC0_0 _MMIO(0xd940)
+#define GEN12_OAG_CEC0_1 _MMIO(0xd944)
+#define GEN12_OAG_CEC1_0 _MMIO(0xd948)
+#define GEN12_OAG_CEC1_1 _MMIO(0xd94c)
+#define GEN12_OAG_CEC2_0 _MMIO(0xd950)
+#define GEN12_OAG_CEC2_1 _MMIO(0xd954)
+#define GEN12_OAG_CEC3_0 _MMIO(0xd958)
+#define GEN12_OAG_CEC3_1 _MMIO(0xd95c)
+#define GEN12_OAG_CEC4_0 _MMIO(0xd960)
+#define GEN12_OAG_CEC4_1 _MMIO(0xd964)
+#define GEN12_OAG_CEC5_0 _MMIO(0xd968)
+#define GEN12_OAG_CEC5_1 _MMIO(0xd96c)
+#define GEN12_OAG_CEC6_0 _MMIO(0xd970)
+#define GEN12_OAG_CEC6_1 _MMIO(0xd974)
+#define GEN12_OAG_CEC7_0 _MMIO(0xd978)
+#define GEN12_OAG_CEC7_1 _MMIO(0xd97c)
+
+/* Same layout as CECX_Y + negate 11-bit array */
+#define GEN12_OAG_SCEC0_0 _MMIO(0xdc00)
+#define GEN12_OAG_SCEC0_1 _MMIO(0xdc04)
+#define GEN12_OAG_SCEC1_0 _MMIO(0xdc08)
+#define GEN12_OAG_SCEC1_1 _MMIO(0xdc0c)
+#define GEN12_OAG_SCEC2_0 _MMIO(0xdc10)
+#define GEN12_OAG_SCEC2_1 _MMIO(0xdc14)
+#define GEN12_OAG_SCEC3_0 _MMIO(0xdc18)
+#define GEN12_OAG_SCEC3_1 _MMIO(0xdc1c)
+#define GEN12_OAG_SCEC4_0 _MMIO(0xdc20)
+#define GEN12_OAG_SCEC4_1 _MMIO(0xdc24)
+#define GEN12_OAG_SCEC5_0 _MMIO(0xdc28)
+#define GEN12_OAG_SCEC5_1 _MMIO(0xdc2c)
+#define GEN12_OAG_SCEC6_0 _MMIO(0xdc30)
+#define GEN12_OAG_SCEC6_1 _MMIO(0xdc34)
+#define GEN12_OAG_SCEC7_0 _MMIO(0xdc38)
+#define GEN12_OAG_SCEC7_1 _MMIO(0xdc3c)
+
 /* OA perf counters */
 #define OA_PERFCNT1_LO      _MMIO(0x91B8)
 #define OA_PERFCNT1_HI      _MMIO(0x91BC)
@@ -1042,6 +1146,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MICRO_BP3_COUNT_STATUS23	_MMIO(0x9838)
 #define MICRO_BP_FIRED_ARMED		_MMIO(0x983C)
 
+#define GEN12_OAA_DBG_REG _MMIO(0xdc44)
+#define GEN12_OAG_OA_PESS _MMIO(0x2b2c)
+#define GEN12_OAG_SPCTR_CNF _MMIO(0xdc40)
+
 #define GDT_CHICKEN_BITS    _MMIO(0x9840)
 #define   GT_NOA_ENABLE	    0x00000080
 
@@ -1962,8 +2070,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define ICL_DPHY_CHKN(port)			_MMIO(_ICL_COMBOPHY(port) + _ICL_DPHY_CHKN_REG)
 #define   ICL_DPHY_CHKN_AFE_OVER_PPI_STRAP	REG_BIT(7)
 
-#define MG_PHY_PORT_LN(ln, port, ln0p1, ln0p2, ln1p1) \
-	_MMIO(_PORT((port) - PORT_C, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1)))
+#define MG_PHY_PORT_LN(ln, tc_port, ln0p1, ln0p2, ln1p1) \
+	_MMIO(_PORT(tc_port, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1)))
 
 #define MG_TX_LINK_PARAMS_TX1LN0_PORT1		0x16812C
 #define MG_TX_LINK_PARAMS_TX1LN1_PORT1		0x16852C
@@ -1973,10 +2081,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_TX_LINK_PARAMS_TX1LN1_PORT3		0x16A52C
 #define MG_TX_LINK_PARAMS_TX1LN0_PORT4		0x16B12C
 #define MG_TX_LINK_PARAMS_TX1LN1_PORT4		0x16B52C
-#define MG_TX1_LINK_PARAMS(ln, port) \
-	MG_PHY_PORT_LN(ln, port, MG_TX_LINK_PARAMS_TX1LN0_PORT1, \
-				 MG_TX_LINK_PARAMS_TX1LN0_PORT2, \
-				 MG_TX_LINK_PARAMS_TX1LN1_PORT1)
+#define MG_TX1_LINK_PARAMS(ln, tc_port) \
+	MG_PHY_PORT_LN(ln, tc_port, MG_TX_LINK_PARAMS_TX1LN0_PORT1, \
+				    MG_TX_LINK_PARAMS_TX1LN0_PORT2, \
+				    MG_TX_LINK_PARAMS_TX1LN1_PORT1)
 
 #define MG_TX_LINK_PARAMS_TX2LN0_PORT1		0x1680AC
 #define MG_TX_LINK_PARAMS_TX2LN1_PORT1		0x1684AC
@@ -1986,10 +2094,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_TX_LINK_PARAMS_TX2LN1_PORT3		0x16A4AC
 #define MG_TX_LINK_PARAMS_TX2LN0_PORT4		0x16B0AC
 #define MG_TX_LINK_PARAMS_TX2LN1_PORT4		0x16B4AC
-#define MG_TX2_LINK_PARAMS(ln, port) \
-	MG_PHY_PORT_LN(ln, port, MG_TX_LINK_PARAMS_TX2LN0_PORT1, \
-				 MG_TX_LINK_PARAMS_TX2LN0_PORT2, \
-				 MG_TX_LINK_PARAMS_TX2LN1_PORT1)
+#define MG_TX2_LINK_PARAMS(ln, tc_port) \
+	MG_PHY_PORT_LN(ln, tc_port, MG_TX_LINK_PARAMS_TX2LN0_PORT1, \
+				    MG_TX_LINK_PARAMS_TX2LN0_PORT2, \
+				    MG_TX_LINK_PARAMS_TX2LN1_PORT1)
 #define   CRI_USE_FS32			(1 << 5)
 
 #define MG_TX_PISO_READLOAD_TX1LN0_PORT1		0x16814C
@@ -2000,10 +2108,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_TX_PISO_READLOAD_TX1LN1_PORT3		0x16A54C
 #define MG_TX_PISO_READLOAD_TX1LN0_PORT4		0x16B14C
 #define MG_TX_PISO_READLOAD_TX1LN1_PORT4		0x16B54C
-#define MG_TX1_PISO_READLOAD(ln, port) \
-	MG_PHY_PORT_LN(ln, port, MG_TX_PISO_READLOAD_TX1LN0_PORT1, \
-				 MG_TX_PISO_READLOAD_TX1LN0_PORT2, \
-				 MG_TX_PISO_READLOAD_TX1LN1_PORT1)
+#define MG_TX1_PISO_READLOAD(ln, tc_port) \
+	MG_PHY_PORT_LN(ln, tc_port, MG_TX_PISO_READLOAD_TX1LN0_PORT1, \
+				    MG_TX_PISO_READLOAD_TX1LN0_PORT2, \
+				    MG_TX_PISO_READLOAD_TX1LN1_PORT1)
 
 #define MG_TX_PISO_READLOAD_TX2LN0_PORT1		0x1680CC
 #define MG_TX_PISO_READLOAD_TX2LN1_PORT1		0x1684CC
@@ -2013,10 +2121,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_TX_PISO_READLOAD_TX2LN1_PORT3		0x16A4CC
 #define MG_TX_PISO_READLOAD_TX2LN0_PORT4		0x16B0CC
 #define MG_TX_PISO_READLOAD_TX2LN1_PORT4		0x16B4CC
-#define MG_TX2_PISO_READLOAD(ln, port) \
-	MG_PHY_PORT_LN(ln, port, MG_TX_PISO_READLOAD_TX2LN0_PORT1, \
-				 MG_TX_PISO_READLOAD_TX2LN0_PORT2, \
-				 MG_TX_PISO_READLOAD_TX2LN1_PORT1)
+#define MG_TX2_PISO_READLOAD(ln, tc_port) \
+	MG_PHY_PORT_LN(ln, tc_port, MG_TX_PISO_READLOAD_TX2LN0_PORT1, \
+				    MG_TX_PISO_READLOAD_TX2LN0_PORT2, \
+				    MG_TX_PISO_READLOAD_TX2LN1_PORT1)
 #define   CRI_CALCINIT					(1 << 1)
 
 #define MG_TX_SWINGCTRL_TX1LN0_PORT1		0x168148
@@ -2027,10 +2135,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_TX_SWINGCTRL_TX1LN1_PORT3		0x16A548
 #define MG_TX_SWINGCTRL_TX1LN0_PORT4		0x16B148
 #define MG_TX_SWINGCTRL_TX1LN1_PORT4		0x16B548
-#define MG_TX1_SWINGCTRL(ln, port) \
-	MG_PHY_PORT_LN(ln, port, MG_TX_SWINGCTRL_TX1LN0_PORT1, \
-				 MG_TX_SWINGCTRL_TX1LN0_PORT2, \
-				 MG_TX_SWINGCTRL_TX1LN1_PORT1)
+#define MG_TX1_SWINGCTRL(ln, tc_port) \
+	MG_PHY_PORT_LN(ln, tc_port, MG_TX_SWINGCTRL_TX1LN0_PORT1, \
+				    MG_TX_SWINGCTRL_TX1LN0_PORT2, \
+				    MG_TX_SWINGCTRL_TX1LN1_PORT1)
 
 #define MG_TX_SWINGCTRL_TX2LN0_PORT1		0x1680C8
 #define MG_TX_SWINGCTRL_TX2LN1_PORT1		0x1684C8
@@ -2040,10 +2148,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_TX_SWINGCTRL_TX2LN1_PORT3		0x16A4C8
 #define MG_TX_SWINGCTRL_TX2LN0_PORT4		0x16B0C8
 #define MG_TX_SWINGCTRL_TX2LN1_PORT4		0x16B4C8
-#define MG_TX2_SWINGCTRL(ln, port) \
-	MG_PHY_PORT_LN(ln, port, MG_TX_SWINGCTRL_TX2LN0_PORT1, \
-				 MG_TX_SWINGCTRL_TX2LN0_PORT2, \
-				 MG_TX_SWINGCTRL_TX2LN1_PORT1)
+#define MG_TX2_SWINGCTRL(ln, tc_port) \
+	MG_PHY_PORT_LN(ln, tc_port, MG_TX_SWINGCTRL_TX2LN0_PORT1, \
+				    MG_TX_SWINGCTRL_TX2LN0_PORT2, \
+				    MG_TX_SWINGCTRL_TX2LN1_PORT1)
 #define   CRI_TXDEEMPH_OVERRIDE_17_12(x)		((x) << 0)
 #define   CRI_TXDEEMPH_OVERRIDE_17_12_MASK		(0x3F << 0)
 
@@ -2055,10 +2163,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_TX_DRVCTRL_TX1LN1_TXPORT3			0x16A544
 #define MG_TX_DRVCTRL_TX1LN0_TXPORT4			0x16B144
 #define MG_TX_DRVCTRL_TX1LN1_TXPORT4			0x16B544
-#define MG_TX1_DRVCTRL(ln, port) \
-	MG_PHY_PORT_LN(ln, port, MG_TX_DRVCTRL_TX1LN0_TXPORT1, \
-				 MG_TX_DRVCTRL_TX1LN0_TXPORT2, \
-				 MG_TX_DRVCTRL_TX1LN1_TXPORT1)
+#define MG_TX1_DRVCTRL(ln, tc_port) \
+	MG_PHY_PORT_LN(ln, tc_port, MG_TX_DRVCTRL_TX1LN0_TXPORT1, \
+				    MG_TX_DRVCTRL_TX1LN0_TXPORT2, \
+				    MG_TX_DRVCTRL_TX1LN1_TXPORT1)
 
 #define MG_TX_DRVCTRL_TX2LN0_PORT1			0x1680C4
 #define MG_TX_DRVCTRL_TX2LN1_PORT1			0x1684C4
@@ -2068,10 +2176,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_TX_DRVCTRL_TX2LN1_PORT3			0x16A4C4
 #define MG_TX_DRVCTRL_TX2LN0_PORT4			0x16B0C4
 #define MG_TX_DRVCTRL_TX2LN1_PORT4			0x16B4C4
-#define MG_TX2_DRVCTRL(ln, port) \
-	MG_PHY_PORT_LN(ln, port, MG_TX_DRVCTRL_TX2LN0_PORT1, \
-				 MG_TX_DRVCTRL_TX2LN0_PORT2, \
-				 MG_TX_DRVCTRL_TX2LN1_PORT1)
+#define MG_TX2_DRVCTRL(ln, tc_port) \
+	MG_PHY_PORT_LN(ln, tc_port, MG_TX_DRVCTRL_TX2LN0_PORT1, \
+				    MG_TX_DRVCTRL_TX2LN0_PORT2, \
+				    MG_TX_DRVCTRL_TX2LN1_PORT1)
 #define   CRI_TXDEEMPH_OVERRIDE_11_6(x)			((x) << 24)
 #define   CRI_TXDEEMPH_OVERRIDE_11_6_MASK		(0x3F << 24)
 #define   CRI_TXDEEMPH_OVERRIDE_EN			(1 << 22)
@@ -2088,10 +2196,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_CLKHUB_LN1_PORT3			0x16A79C
 #define MG_CLKHUB_LN0_PORT4			0x16B39C
 #define MG_CLKHUB_LN1_PORT4			0x16B79C
-#define MG_CLKHUB(ln, port) \
-	MG_PHY_PORT_LN(ln, port, MG_CLKHUB_LN0_PORT1, \
-				 MG_CLKHUB_LN0_PORT2, \
-				 MG_CLKHUB_LN1_PORT1)
+#define MG_CLKHUB(ln, tc_port) \
+	MG_PHY_PORT_LN(ln, tc_port, MG_CLKHUB_LN0_PORT1, \
+				    MG_CLKHUB_LN0_PORT2, \
+				    MG_CLKHUB_LN1_PORT1)
 #define   CFG_LOW_RATE_LKREN_EN				(1 << 11)
 
 #define MG_TX_DCC_TX1LN0_PORT1			0x168110
@@ -2102,10 +2210,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_TX_DCC_TX1LN1_PORT3			0x16A510
 #define MG_TX_DCC_TX1LN0_PORT4			0x16B110
 #define MG_TX_DCC_TX1LN1_PORT4			0x16B510
-#define MG_TX1_DCC(ln, port) \
-	MG_PHY_PORT_LN(ln, port, MG_TX_DCC_TX1LN0_PORT1, \
-				 MG_TX_DCC_TX1LN0_PORT2, \
-				 MG_TX_DCC_TX1LN1_PORT1)
+#define MG_TX1_DCC(ln, tc_port) \
+	MG_PHY_PORT_LN(ln, tc_port, MG_TX_DCC_TX1LN0_PORT1, \
+				    MG_TX_DCC_TX1LN0_PORT2, \
+				    MG_TX_DCC_TX1LN1_PORT1)
 #define MG_TX_DCC_TX2LN0_PORT1			0x168090
 #define MG_TX_DCC_TX2LN1_PORT1			0x168490
 #define MG_TX_DCC_TX2LN0_PORT2			0x169090
@@ -2114,10 +2222,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_TX_DCC_TX2LN1_PORT3			0x16A490
 #define MG_TX_DCC_TX2LN0_PORT4			0x16B090
 #define MG_TX_DCC_TX2LN1_PORT4			0x16B490
-#define MG_TX2_DCC(ln, port) \
-	MG_PHY_PORT_LN(ln, port, MG_TX_DCC_TX2LN0_PORT1, \
-				 MG_TX_DCC_TX2LN0_PORT2, \
-				 MG_TX_DCC_TX2LN1_PORT1)
+#define MG_TX2_DCC(ln, tc_port) \
+	MG_PHY_PORT_LN(ln, tc_port, MG_TX_DCC_TX2LN0_PORT1, \
+				    MG_TX_DCC_TX2LN0_PORT2, \
+				    MG_TX_DCC_TX2LN1_PORT1)
 #define   CFG_AMI_CK_DIV_OVERRIDE_VAL(x)	((x) << 25)
 #define   CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK	(0x3 << 25)
 #define   CFG_AMI_CK_DIV_OVERRIDE_EN		(1 << 24)
@@ -2130,32 +2238,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MG_DP_MODE_LN1_ACU_PORT3			0x16A7A0
 #define MG_DP_MODE_LN0_ACU_PORT4			0x16B3A0
 #define MG_DP_MODE_LN1_ACU_PORT4			0x16B7A0
-#define MG_DP_MODE(ln, port)	\
-	MG_PHY_PORT_LN(ln, port, MG_DP_MODE_LN0_ACU_PORT1, \
-				 MG_DP_MODE_LN0_ACU_PORT2, \
-				 MG_DP_MODE_LN1_ACU_PORT1)
+#define MG_DP_MODE(ln, tc_port)	\
+	MG_PHY_PORT_LN(ln, tc_port, MG_DP_MODE_LN0_ACU_PORT1, \
+				    MG_DP_MODE_LN0_ACU_PORT2, \
+				    MG_DP_MODE_LN1_ACU_PORT1)
 #define   MG_DP_MODE_CFG_DP_X2_MODE			(1 << 7)
 #define   MG_DP_MODE_CFG_DP_X1_MODE			(1 << 6)
-#define   MG_DP_MODE_CFG_TR2PWR_GATING			(1 << 5)
-#define   MG_DP_MODE_CFG_TRPWR_GATING			(1 << 4)
-#define   MG_DP_MODE_CFG_CLNPWR_GATING			(1 << 3)
-#define   MG_DP_MODE_CFG_DIGPWR_GATING			(1 << 2)
-#define   MG_DP_MODE_CFG_GAONPWR_GATING			(1 << 1)
-
-#define MG_MISC_SUS0_PORT1				0x168814
-#define MG_MISC_SUS0_PORT2				0x169814
-#define MG_MISC_SUS0_PORT3				0x16A814
-#define MG_MISC_SUS0_PORT4				0x16B814
-#define MG_MISC_SUS0(tc_port) \
-	_MMIO(_PORT(tc_port, MG_MISC_SUS0_PORT1, MG_MISC_SUS0_PORT2))
-#define   MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK	(3 << 14)
-#define   MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(x)	((x) << 14)
-#define   MG_MISC_SUS0_CFG_TR2PWR_GATING		(1 << 12)
-#define   MG_MISC_SUS0_CFG_CL2PWR_GATING		(1 << 11)
-#define   MG_MISC_SUS0_CFG_GAONPWR_GATING		(1 << 10)
-#define   MG_MISC_SUS0_CFG_TRPWR_GATING			(1 << 7)
-#define   MG_MISC_SUS0_CFG_CL1PWR_GATING		(1 << 6)
-#define   MG_MISC_SUS0_CFG_DGPWR_GATING			(1 << 5)
 
 /* The spec defines this only for BXT PHY0, but lets assume that this
  * would exist for PHY1 too if it had a second channel.
@@ -2172,13 +2260,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define _MMIO_FIA(fia, off)		_MMIO(_FIA(fia) + (off))
 
 /* ICL PHY DFLEX registers */
-#define PORT_TX_DFLEXDPMLE1(fia)	_MMIO_FIA((fia),  0x008C0)
-#define   DFLEXDPMLE1_DPMLETC_MASK(tc_port)	(0xf << (4 * (tc_port)))
-#define   DFLEXDPMLE1_DPMLETC_ML0(tc_port)	(1 << (4 * (tc_port)))
-#define   DFLEXDPMLE1_DPMLETC_ML1_0(tc_port)	(3 << (4 * (tc_port)))
-#define   DFLEXDPMLE1_DPMLETC_ML3(tc_port)	(8 << (4 * (tc_port)))
-#define   DFLEXDPMLE1_DPMLETC_ML3_2(tc_port)	(12 << (4 * (tc_port)))
-#define   DFLEXDPMLE1_DPMLETC_ML3_0(tc_port)	(15 << (4 * (tc_port)))
+#define PORT_TX_DFLEXDPMLE1(fia)		_MMIO_FIA((fia),  0x008C0)
+#define   DFLEXDPMLE1_DPMLETC_MASK(idx)		(0xf << (4 * (idx)))
+#define   DFLEXDPMLE1_DPMLETC_ML0(idx)		(1 << (4 * (idx)))
+#define   DFLEXDPMLE1_DPMLETC_ML1_0(idx)	(3 << (4 * (idx)))
+#define   DFLEXDPMLE1_DPMLETC_ML3(idx)		(8 << (4 * (idx)))
+#define   DFLEXDPMLE1_DPMLETC_ML3_2(idx)	(12 << (4 * (idx)))
+#define   DFLEXDPMLE1_DPMLETC_ML3_0(idx)	(15 << (4 * (idx)))
 
 /* BXT PHY Ref registers */
 #define _PORT_REF_DW3_A			0x16218C
@@ -2459,6 +2547,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3)
 #define   RING_FAULT_VALID	(1 << 0)
 #define DONE_REG		_MMIO(0x40b0)
+#define GEN12_GAM_DONE		_MMIO(0xcf68)
 #define GEN8_PRIVATE_PAT_LO	_MMIO(0x40e0)
 #define GEN8_PRIVATE_PAT_HI	_MMIO(0x40e0 + 4)
 #define GEN10_PAT_INDEX(index)	_MMIO(0x40e0 + (index) * 4)
@@ -2489,7 +2578,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   RING_WAIT		(1 << 11) /* gen3+, PRBx_CTL */
 #define   RING_WAIT_SEMAPHORE	(1 << 10) /* gen6+ */
 
+/* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */
+#define GEN8_RING_CS_GPR(base, n)	_MMIO((base) + 0x600 + (n) * 8)
+#define GEN8_RING_CS_GPR_UDW(base, n)	_MMIO((base) + 0x600 + (n) * 8 + 4)
+
 #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4)
+#define   RING_FORCE_TO_NONPRIV_ADDRESS_MASK	REG_GENMASK(25, 2)
 #define   RING_FORCE_TO_NONPRIV_ACCESS_RW	(0 << 28)    /* CFL+ & Gen11+ */
 #define   RING_FORCE_TO_NONPRIV_ACCESS_RD	(1 << 28)
 #define   RING_FORCE_TO_NONPRIV_ACCESS_WR	(2 << 28)
@@ -2602,6 +2696,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   FAULT_VA_HIGH_BITS		(0xf << 0)
 #define   FAULT_GTT_SEL			(1 << 4)
 
+#define GEN12_AUX_ERR_DBG		_MMIO(0x43f4)
+
 #define FPGA_DBG		_MMIO(0x42300)
 #define   FPGA_DBG_RM_NOCLAIM	(1 << 31)
 
@@ -2711,6 +2807,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define VLV_GU_CTL0	_MMIO(VLV_DISPLAY_BASE + 0x2030)
 #define VLV_GU_CTL1	_MMIO(VLV_DISPLAY_BASE + 0x2034)
 #define SCPD0		_MMIO(0x209c) /* 915+ only */
+#define  CSTATE_RENDER_CLOCK_GATE_DISABLE	(1 << 5)
 #define GEN2_IER	_MMIO(0x20a0)
 #define GEN2_IIR	_MMIO(0x20a4)
 #define GEN2_IMR	_MMIO(0x20a8)
@@ -2884,6 +2981,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 #define GEN6_RC_SLEEP_PSMI_CONTROL	_MMIO(0x2050)
 #define   GEN6_PSMI_SLEEP_MSG_DISABLE	(1 << 0)
+#define   GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7)
 #define   GEN8_RC_SEMA_IDLE_MSG_DISABLE	(1 << 12)
 #define   GEN8_FF_DOP_CLOCK_GATE_DISABLE	(1 << 10)
 
@@ -2962,6 +3060,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
 
+#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
+
 #define GEN6_BSD_SLEEP_PSMI_CONTROL	_MMIO(0x12050)
 #define   GEN6_BSD_SLEEP_MSG_DISABLE	(1 << 0)
 #define   GEN6_BSD_SLEEP_FLUSH_DISABLE	(1 << 2)
@@ -3564,6 +3664,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define _PALETTE_A		0xa000
 #define _PALETTE_B		0xa800
 #define _CHV_PALETTE_C		0xc000
+#define PALETTE_RED_MASK        REG_GENMASK(23, 16)
+#define PALETTE_GREEN_MASK      REG_GENMASK(15, 8)
+#define PALETTE_BLUE_MASK       REG_GENMASK(7, 0)
 #define PALETTE(pipe, i)	_MMIO(DISPLAY_MMIO_BASE(dev_priv) + \
 				      _PICK((pipe), _PALETTE_A,		\
 					    _PALETTE_B, _CHV_PALETTE_C) + \
@@ -4044,12 +4147,23 @@ enum {
 #define  SARBUNIT_CLKGATE_DIS		(1 << 5)
 #define  RCCUNIT_CLKGATE_DIS		(1 << 7)
 #define  MSCUNIT_CLKGATE_DIS		(1 << 10)
+#define  L3_CLKGATE_DIS			REG_BIT(16)
+#define  L3_CR2X_CLKGATE_DIS		REG_BIT(17)
 
 #define SUBSLICE_UNIT_LEVEL_CLKGATE	_MMIO(0x9524)
 #define  GWUNIT_CLKGATE_DIS		(1 << 16)
 
+#define SUBSLICE_UNIT_LEVEL_CLKGATE2	_MMIO(0x9528)
+#define  CPSSUNIT_CLKGATE_DIS		REG_BIT(9)
+
 #define UNSLICE_UNIT_LEVEL_CLKGATE	_MMIO(0x9434)
-#define  VFUNIT_CLKGATE_DIS		(1 << 20)
+#define   VFUNIT_CLKGATE_DIS		REG_BIT(20)
+#define   HSUNIT_CLKGATE_DIS		REG_BIT(8)
+#define   VSUNIT_CLKGATE_DIS		REG_BIT(3)
+
+#define UNSLICE_UNIT_LEVEL_CLKGATE2	_MMIO(0x94e4)
+#define   VSUNIT_CLKGATE_DIS_TGL	REG_BIT(19)
+#define   PSDUNIT_CLKGATE_DIS		REG_BIT(5)
 
 #define INF_UNIT_LEVEL_CLKGATE		_MMIO(0x9560)
 #define   CGPSF_CLKGATE_DIS		(1 << 3)
@@ -4141,6 +4255,7 @@ enum {
 #define _VTOTAL_A	0x6000c
 #define _VBLANK_A	0x60010
 #define _VSYNC_A	0x60014
+#define _EXITLINE_A	0x60018
 #define _PIPEASRC	0x6001c
 #define _BCLRPAT_A	0x60020
 #define _VSYNCSHIFT_A	0x60028
@@ -4192,10 +4307,22 @@ enum {
 #define PIPESRC(trans)		_MMIO_TRANS2(trans, _PIPEASRC)
 #define PIPE_MULT(trans)	_MMIO_TRANS2(trans, _PIPE_MULT_A)
 
-/* HSW+ eDP PSR registers */
-#define HSW_EDP_PSR_BASE	0x64800
-#define BDW_EDP_PSR_BASE	0x6f800
-#define EDP_PSR_CTL				_MMIO(dev_priv->psr_mmio_base + 0)
+#define EXITLINE(trans)		_MMIO_TRANS2(trans, _EXITLINE_A)
+#define   EXITLINE_ENABLE	REG_BIT(31)
+#define   EXITLINE_MASK		REG_GENMASK(12, 0)
+#define   EXITLINE_SHIFT	0
+
+/*
+ * HSW+ eDP PSR registers
+ *
+ * HSW PSR registers are relative to DDIA(_DDI_BUF_CTL_A + 0x800) with just one
+ * instance of it
+ */
+#define _HSW_EDP_PSR_BASE			0x64800
+#define _SRD_CTL_A				0x60800
+#define _SRD_CTL_EDP				0x6f800
+#define _PSR_ADJ(tran, reg)			(_TRANS2(tran, reg) - dev_priv->hsw_psr_mmio_adjust)
+#define EDP_PSR_CTL(tran)			_MMIO(_PSR_ADJ(tran, _SRD_CTL_A))
 #define   EDP_PSR_ENABLE			(1 << 31)
 #define   BDW_PSR_SINGLE_FRAME			(1 << 30)
 #define   EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK	(1 << 29) /* SW can't modify */
@@ -4221,27 +4348,40 @@ enum {
 #define   EDP_PSR_TP1_TIME_0us			(3 << 4)
 #define   EDP_PSR_IDLE_FRAME_SHIFT		0
 
-/* Bspec claims those aren't shifted but stay at 0x64800 */
+/*
+ * Until TGL, IMR/IIR are fixed at 0x648xx. On TGL+ those registers are relative
+ * to transcoder and bits defined for each one as if using no shift (i.e. as if
+ * it was for TRANSCODER_EDP)
+ */
 #define EDP_PSR_IMR				_MMIO(0x64834)
 #define EDP_PSR_IIR				_MMIO(0x64838)
-#define   EDP_PSR_ERROR(shift)			(1 << ((shift) + 2))
-#define   EDP_PSR_POST_EXIT(shift)		(1 << ((shift) + 1))
-#define   EDP_PSR_PRE_ENTRY(shift)		(1 << (shift))
-#define   EDP_PSR_TRANSCODER_C_SHIFT		24
-#define   EDP_PSR_TRANSCODER_B_SHIFT		16
-#define   EDP_PSR_TRANSCODER_A_SHIFT		8
-#define   EDP_PSR_TRANSCODER_EDP_SHIFT		0
-
-#define EDP_PSR_AUX_CTL				_MMIO(dev_priv->psr_mmio_base + 0x10)
+#define _PSR_IMR_A				0x60814
+#define _PSR_IIR_A				0x60818
+#define TRANS_PSR_IMR(tran)			_MMIO_TRANS2(tran, _PSR_IMR_A)
+#define TRANS_PSR_IIR(tran)			_MMIO_TRANS2(tran, _PSR_IIR_A)
+#define   _EDP_PSR_TRANS_SHIFT(trans)		((trans) == TRANSCODER_EDP ? \
+						 0 : ((trans) - TRANSCODER_A + 1) * 8)
+#define   EDP_PSR_TRANS_MASK(trans)		(0x7 << _EDP_PSR_TRANS_SHIFT(trans))
+#define   EDP_PSR_ERROR(trans)			(0x4 << _EDP_PSR_TRANS_SHIFT(trans))
+#define   EDP_PSR_POST_EXIT(trans)		(0x2 << _EDP_PSR_TRANS_SHIFT(trans))
+#define   EDP_PSR_PRE_ENTRY(trans)		(0x1 << _EDP_PSR_TRANS_SHIFT(trans))
+
+#define _SRD_AUX_CTL_A				0x60810
+#define _SRD_AUX_CTL_EDP			0x6f810
+#define EDP_PSR_AUX_CTL(tran)			_MMIO(_PSR_ADJ(tran, _SRD_AUX_CTL_A))
 #define   EDP_PSR_AUX_CTL_TIME_OUT_MASK		(3 << 26)
 #define   EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK	(0x1f << 20)
 #define   EDP_PSR_AUX_CTL_PRECHARGE_2US_MASK	(0xf << 16)
 #define   EDP_PSR_AUX_CTL_ERROR_INTERRUPT	(1 << 11)
 #define   EDP_PSR_AUX_CTL_BIT_CLOCK_2X_MASK	(0x7ff)
 
-#define EDP_PSR_AUX_DATA(i)			_MMIO(dev_priv->psr_mmio_base + 0x14 + (i) * 4) /* 5 registers */
+#define _SRD_AUX_DATA_A				0x60814
+#define _SRD_AUX_DATA_EDP			0x6f814
+#define EDP_PSR_AUX_DATA(tran, i)		_MMIO(_PSR_ADJ(tran, _SRD_AUX_DATA_A) + (i) + 4) /* 5 registers */
 
-#define EDP_PSR_STATUS				_MMIO(dev_priv->psr_mmio_base + 0x40)
+#define _SRD_STATUS_A				0x60840
+#define _SRD_STATUS_EDP				0x6f840
+#define EDP_PSR_STATUS(tran)			_MMIO(_PSR_ADJ(tran, _SRD_STATUS_A))
 #define   EDP_PSR_STATUS_STATE_MASK		(7 << 29)
 #define   EDP_PSR_STATUS_STATE_SHIFT		29
 #define   EDP_PSR_STATUS_STATE_IDLE		(0 << 29)
@@ -4266,10 +4406,15 @@ enum {
 #define   EDP_PSR_STATUS_SENDING_TP1		(1 << 4)
 #define   EDP_PSR_STATUS_IDLE_MASK		0xf
 
-#define EDP_PSR_PERF_CNT		_MMIO(dev_priv->psr_mmio_base + 0x44)
+#define _SRD_PERF_CNT_A			0x60844
+#define _SRD_PERF_CNT_EDP		0x6f844
+#define EDP_PSR_PERF_CNT(tran)		_MMIO(_PSR_ADJ(tran, _SRD_PERF_CNT_A))
 #define   EDP_PSR_PERF_CNT_MASK		0xffffff
 
-#define EDP_PSR_DEBUG				_MMIO(dev_priv->psr_mmio_base + 0x60) /* PSR_MASK on SKL+ */
+/* PSR_MASK on SKL+ */
+#define _SRD_DEBUG_A				0x60860
+#define _SRD_DEBUG_EDP				0x6f860
+#define EDP_PSR_DEBUG(tran)			_MMIO(_PSR_ADJ(tran, _SRD_DEBUG_A))
 #define   EDP_PSR_DEBUG_MASK_MAX_SLEEP         (1 << 28)
 #define   EDP_PSR_DEBUG_MASK_LPSP              (1 << 27)
 #define   EDP_PSR_DEBUG_MASK_MEMUP             (1 << 26)
@@ -4277,7 +4422,9 @@ enum {
 #define   EDP_PSR_DEBUG_MASK_DISP_REG_WRITE    (1 << 16) /* Reserved in ICL+ */
 #define   EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1 << 15) /* SKL+ */
 
-#define EDP_PSR2_CTL			_MMIO(0x6f900)
+#define _PSR2_CTL_A			0x60900
+#define _PSR2_CTL_EDP			0x6f900
+#define EDP_PSR2_CTL(tran)		_MMIO_TRANS2(tran, _PSR2_CTL_A)
 #define   EDP_PSR2_ENABLE		(1 << 31)
 #define   EDP_SU_TRACK_ENABLE		(1 << 30)
 #define   EDP_Y_COORDINATE_VALID	(1 << 26) /* GLK and CNL+ */
@@ -4299,8 +4446,8 @@ enum {
 #define _PSR_EVENT_TRANS_B			0x61848
 #define _PSR_EVENT_TRANS_C			0x62848
 #define _PSR_EVENT_TRANS_D			0x63848
-#define _PSR_EVENT_TRANS_EDP			0x6F848
-#define PSR_EVENT(trans)			_MMIO_TRANS2(trans, _PSR_EVENT_TRANS_A)
+#define _PSR_EVENT_TRANS_EDP			0x6f848
+#define PSR_EVENT(tran)				_MMIO_TRANS2(tran, _PSR_EVENT_TRANS_A)
 #define  PSR_EVENT_PSR2_WD_TIMER_EXPIRE		(1 << 17)
 #define  PSR_EVENT_PSR2_DISABLED		(1 << 16)
 #define  PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN	(1 << 15)
@@ -4318,15 +4465,16 @@ enum {
 #define  PSR_EVENT_LPSP_MODE_EXIT		(1 << 1)
 #define  PSR_EVENT_PSR_DISABLE			(1 << 0)
 
-#define EDP_PSR2_STATUS			_MMIO(0x6f940)
+#define _PSR2_STATUS_A			0x60940
+#define _PSR2_STATUS_EDP		0x6f940
+#define EDP_PSR2_STATUS(tran)		_MMIO_TRANS2(tran, _PSR2_STATUS_A)
 #define EDP_PSR2_STATUS_STATE_MASK     (0xf << 28)
 #define EDP_PSR2_STATUS_STATE_SHIFT    28
 
-#define _PSR2_SU_STATUS_0		0x6F914
-#define _PSR2_SU_STATUS_1		0x6F918
-#define _PSR2_SU_STATUS_2		0x6F91C
-#define _PSR2_SU_STATUS(index)		_MMIO(_PICK_EVEN((index), _PSR2_SU_STATUS_0, _PSR2_SU_STATUS_1))
-#define PSR2_SU_STATUS(frame)		(_PSR2_SU_STATUS((frame) / 3))
+#define _PSR2_SU_STATUS_A		0x60914
+#define _PSR2_SU_STATUS_EDP		0x6f914
+#define _PSR2_SU_STATUS(tran, index)	_MMIO(_TRANS2(tran, _PSR2_SU_STATUS_A) + (index) * 4)
+#define PSR2_SU_STATUS(tran, frame)	(_PSR2_SU_STATUS(tran, (frame) / 3))
 #define PSR2_SU_STATUS_SHIFT(frame)	(((frame) % 3) * 10)
 #define PSR2_SU_STATUS_MASK(frame)	(0x3ff << PSR2_SU_STATUS_SHIFT(frame))
 #define PSR2_SU_STATUS_FRAMES		8
@@ -4652,6 +4800,7 @@ enum {
  * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte
  * of the infoframe structure specified by CEA-861. */
 #define   VIDEO_DIP_DATA_SIZE	32
+#define   VIDEO_DIP_GMP_DATA_SIZE	36
 #define   VIDEO_DIP_VSC_DATA_SIZE	36
 #define   VIDEO_DIP_PPS_DATA_SIZE	132
 #define VIDEO_DIP_CTL		_MMIO(0x61170)
@@ -4879,14 +5028,20 @@ enum {
 #define   BLM_PCH_POLARITY			(1 << 29)
 #define BLC_PWM_PCH_CTL2	_MMIO(0xc8254)
 
-#define UTIL_PIN_CTL		_MMIO(0x48400)
-#define   UTIL_PIN_ENABLE	(1 << 31)
-
-#define   UTIL_PIN_PIPE(x)     ((x) << 29)
-#define   UTIL_PIN_PIPE_MASK   (3 << 29)
-#define   UTIL_PIN_MODE_PWM    (1 << 24)
-#define   UTIL_PIN_MODE_MASK   (0xf << 24)
-#define   UTIL_PIN_POLARITY    (1 << 22)
+#define UTIL_PIN_CTL			_MMIO(0x48400)
+#define   UTIL_PIN_ENABLE		(1 << 31)
+#define   UTIL_PIN_PIPE_MASK		(3 << 29)
+#define   UTIL_PIN_PIPE(x)		((x) << 29)
+#define   UTIL_PIN_MODE_MASK		(0xf << 24)
+#define   UTIL_PIN_MODE_DATA		(0 << 24)
+#define   UTIL_PIN_MODE_PWM		(1 << 24)
+#define   UTIL_PIN_MODE_VBLANK		(4 << 24)
+#define   UTIL_PIN_MODE_VSYNC		(5 << 24)
+#define   UTIL_PIN_MODE_EYE_LEVEL	(8 << 24)
+#define   UTIL_PIN_OUTPUT_DATA		(1 << 23)
+#define   UTIL_PIN_POLARITY		(1 << 22)
+#define   UTIL_PIN_DIRECTION_INPUT	(1 << 19)
+#define   UTIL_PIN_INPUT_DATA		(1 << 16)
 
 /* BXT backlight register definition. */
 #define _BXT_BLC_PWM_CTL1			0xC8250
@@ -5488,45 +5643,9 @@ enum {
  */
 #define _DPA_AUX_CH_CTL		(DISPLAY_MMIO_BASE(dev_priv) + 0x64010)
 #define _DPA_AUX_CH_DATA1	(DISPLAY_MMIO_BASE(dev_priv) + 0x64014)
-#define _DPA_AUX_CH_DATA2	(DISPLAY_MMIO_BASE(dev_priv) + 0x64018)
-#define _DPA_AUX_CH_DATA3	(DISPLAY_MMIO_BASE(dev_priv) + 0x6401c)
-#define _DPA_AUX_CH_DATA4	(DISPLAY_MMIO_BASE(dev_priv) + 0x64020)
-#define _DPA_AUX_CH_DATA5	(DISPLAY_MMIO_BASE(dev_priv) + 0x64024)
 
 #define _DPB_AUX_CH_CTL		(DISPLAY_MMIO_BASE(dev_priv) + 0x64110)
 #define _DPB_AUX_CH_DATA1	(DISPLAY_MMIO_BASE(dev_priv) + 0x64114)
-#define _DPB_AUX_CH_DATA2	(DISPLAY_MMIO_BASE(dev_priv) + 0x64118)
-#define _DPB_AUX_CH_DATA3	(DISPLAY_MMIO_BASE(dev_priv) + 0x6411c)
-#define _DPB_AUX_CH_DATA4	(DISPLAY_MMIO_BASE(dev_priv) + 0x64120)
-#define _DPB_AUX_CH_DATA5	(DISPLAY_MMIO_BASE(dev_priv) + 0x64124)
-
-#define _DPC_AUX_CH_CTL		(DISPLAY_MMIO_BASE(dev_priv) + 0x64210)
-#define _DPC_AUX_CH_DATA1	(DISPLAY_MMIO_BASE(dev_priv) + 0x64214)
-#define _DPC_AUX_CH_DATA2	(DISPLAY_MMIO_BASE(dev_priv) + 0x64218)
-#define _DPC_AUX_CH_DATA3	(DISPLAY_MMIO_BASE(dev_priv) + 0x6421c)
-#define _DPC_AUX_CH_DATA4	(DISPLAY_MMIO_BASE(dev_priv) + 0x64220)
-#define _DPC_AUX_CH_DATA5	(DISPLAY_MMIO_BASE(dev_priv) + 0x64224)
-
-#define _DPD_AUX_CH_CTL		(DISPLAY_MMIO_BASE(dev_priv) + 0x64310)
-#define _DPD_AUX_CH_DATA1	(DISPLAY_MMIO_BASE(dev_priv) + 0x64314)
-#define _DPD_AUX_CH_DATA2	(DISPLAY_MMIO_BASE(dev_priv) + 0x64318)
-#define _DPD_AUX_CH_DATA3	(DISPLAY_MMIO_BASE(dev_priv) + 0x6431c)
-#define _DPD_AUX_CH_DATA4	(DISPLAY_MMIO_BASE(dev_priv) + 0x64320)
-#define _DPD_AUX_CH_DATA5	(DISPLAY_MMIO_BASE(dev_priv) + 0x64324)
-
-#define _DPE_AUX_CH_CTL		(DISPLAY_MMIO_BASE(dev_priv) + 0x64410)
-#define _DPE_AUX_CH_DATA1	(DISPLAY_MMIO_BASE(dev_priv) + 0x64414)
-#define _DPE_AUX_CH_DATA2	(DISPLAY_MMIO_BASE(dev_priv) + 0x64418)
-#define _DPE_AUX_CH_DATA3	(DISPLAY_MMIO_BASE(dev_priv) + 0x6441c)
-#define _DPE_AUX_CH_DATA4	(DISPLAY_MMIO_BASE(dev_priv) + 0x64420)
-#define _DPE_AUX_CH_DATA5	(DISPLAY_MMIO_BASE(dev_priv) + 0x64424)
-
-#define _DPF_AUX_CH_CTL		(DISPLAY_MMIO_BASE(dev_priv) + 0x64510)
-#define _DPF_AUX_CH_DATA1	(DISPLAY_MMIO_BASE(dev_priv) + 0x64514)
-#define _DPF_AUX_CH_DATA2	(DISPLAY_MMIO_BASE(dev_priv) + 0x64518)
-#define _DPF_AUX_CH_DATA3	(DISPLAY_MMIO_BASE(dev_priv) + 0x6451c)
-#define _DPF_AUX_CH_DATA4	(DISPLAY_MMIO_BASE(dev_priv) + 0x64520)
-#define _DPF_AUX_CH_DATA5	(DISPLAY_MMIO_BASE(dev_priv) + 0x64524)
 
 #define DP_AUX_CH_CTL(aux_ch)	_MMIO_PORT(aux_ch, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL)
 #define DP_AUX_CH_DATA(aux_ch, i)	_MMIO(_PORT(aux_ch, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */
@@ -5624,7 +5743,8 @@ enum {
 #define   PIPECONF_DOUBLE_WIDE	(1 << 30)
 #define   I965_PIPECONF_ACTIVE	(1 << 30)
 #define   PIPECONF_DSI_PLL_LOCKED	(1 << 29) /* vlv & pipe A only */
-#define   PIPECONF_FRAME_START_DELAY_MASK (3 << 27)
+#define   PIPECONF_FRAME_START_DELAY_MASK	(3 << 27) /* pre-hsw */
+#define   PIPECONF_FRAME_START_DELAY(x)		((x) << 27) /* pre-hsw: 0-3 */
 #define   PIPECONF_SINGLE_WIDE	0
 #define   PIPECONF_PIPE_UNLOCKED 0
 #define   PIPECONF_PIPE_LOCKED	(1 << 25)
@@ -5658,6 +5778,11 @@ enum {
 #define   PIPECONF_CXSR_DOWNCLOCK	(1 << 16)
 #define   PIPECONF_EDP_RR_MODE_SWITCH_VLV	(1 << 14)
 #define   PIPECONF_COLOR_RANGE_SELECT	(1 << 13)
+#define   PIPECONF_OUTPUT_COLORSPACE_MASK	(3 << 11) /* ilk-ivb */
+#define   PIPECONF_OUTPUT_COLORSPACE_RGB	(0 << 11) /* ilk-ivb */
+#define   PIPECONF_OUTPUT_COLORSPACE_YUV601	(1 << 11) /* ilk-ivb */
+#define   PIPECONF_OUTPUT_COLORSPACE_YUV709	(2 << 11) /* ilk-ivb */
+#define   PIPECONF_OUTPUT_COLORSPACE_YUV_HSW	(1 << 11) /* hsw only */
 #define   PIPECONF_BPC_MASK	(0x7 << 5)
 #define   PIPECONF_8BPC		(0 << 5)
 #define   PIPECONF_10BPC	(1 << 5)
@@ -5745,12 +5870,13 @@ enum {
 
 #define  _PIPEAGCMAX           0x70010
 #define  _PIPEBGCMAX           0x71010
+#define PIPEGCMAX_RGB_MASK     REG_GENMASK(15, 0)
 #define PIPEGCMAX(pipe, i)     _MMIO_PIPE2(pipe, _PIPEAGCMAX + (i) * 4)
 
 #define _PIPE_MISC_A			0x70030
 #define _PIPE_MISC_B			0x71030
-#define   PIPEMISC_YUV420_ENABLE	(1 << 27)
-#define   PIPEMISC_YUV420_MODE_FULL_BLEND (1 << 26)
+#define   PIPEMISC_YUV420_ENABLE	(1 << 27) /* glk+ */
+#define   PIPEMISC_YUV420_MODE_FULL_BLEND (1 << 26) /* glk+ */
 #define   PIPEMISC_HDR_MODE_PRECISION	(1 << 23) /* icl+ */
 #define   PIPEMISC_OUTPUT_COLORSPACE_YUV  (1 << 11)
 #define   PIPEMISC_DITHER_BPC_MASK	(7 << 5)
@@ -6207,6 +6333,7 @@ enum {
 #define CHV_CURSOR_C_OFFSET 0x700e0
 #define IVB_CURSOR_B_OFFSET 0x71080
 #define IVB_CURSOR_C_OFFSET 0x72080
+#define TGL_CURSOR_D_OFFSET 0x73080
 
 /* Display A control */
 #define _DSPACNTR				0x70180
@@ -6225,6 +6352,7 @@ enum {
 #define   DISPPLANE_RGBX101010			(0x8 << 26)
 #define   DISPPLANE_RGBA101010			(0x9 << 26)
 #define   DISPPLANE_BGRX101010			(0xa << 26)
+#define   DISPPLANE_BGRA101010			(0xb << 26)
 #define   DISPPLANE_RGBX161616			(0xc << 26)
 #define   DISPPLANE_RGBX888			(0xe << 26)
 #define   DISPPLANE_RGBA888			(0xf << 26)
@@ -6499,12 +6627,15 @@ enum {
 #define   SP_ENABLE			(1 << 31)
 #define   SP_GAMMA_ENABLE		(1 << 30)
 #define   SP_PIXFORMAT_MASK		(0xf << 26)
-#define   SP_FORMAT_YUV422		(0 << 26)
-#define   SP_FORMAT_BGR565		(5 << 26)
-#define   SP_FORMAT_BGRX8888		(6 << 26)
-#define   SP_FORMAT_BGRA8888		(7 << 26)
-#define   SP_FORMAT_RGBX1010102		(8 << 26)
-#define   SP_FORMAT_RGBA1010102		(9 << 26)
+#define   SP_FORMAT_YUV422		(0x0 << 26)
+#define   SP_FORMAT_8BPP		(0x2 << 26)
+#define   SP_FORMAT_BGR565		(0x5 << 26)
+#define   SP_FORMAT_BGRX8888		(0x6 << 26)
+#define   SP_FORMAT_BGRA8888		(0x7 << 26)
+#define   SP_FORMAT_RGBX1010102		(0x8 << 26)
+#define   SP_FORMAT_RGBA1010102		(0x9 << 26)
+#define   SP_FORMAT_BGRX1010102		(0xa << 26) /* CHV pipe B */
+#define   SP_FORMAT_BGRA1010102		(0xb << 26) /* CHV pipe B */
 #define   SP_FORMAT_RGBX8888		(0xe << 26)
 #define   SP_FORMAT_RGBA8888		(0xf << 26)
 #define   SP_ALPHA_PREMULTIPLY		(1 << 23) /* CHV pipe B */
@@ -6655,6 +6786,7 @@ enum {
 #define   PLANE_CTL_YUV422_VYUY			(3 << 16)
 #define   PLANE_CTL_RENDER_DECOMPRESSION_ENABLE	(1 << 15)
 #define   PLANE_CTL_TRICKLE_FEED_DISABLE	(1 << 14)
+#define   PLANE_CTL_CLEAR_COLOR_DISABLE		(1 << 13) /* TGL+ */
 #define   PLANE_CTL_PLANE_GAMMA_DISABLE		(1 << 13) /* Pre-GLK */
 #define   PLANE_CTL_TILED_MASK			(0x7 << 10)
 #define   PLANE_CTL_TILED_LINEAR		(0 << 10)
@@ -6662,6 +6794,7 @@ enum {
 #define   PLANE_CTL_TILED_Y			(4 << 10)
 #define   PLANE_CTL_TILED_YF			(5 << 10)
 #define   PLANE_CTL_FLIP_HORIZONTAL		(1 << 8)
+#define   PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE	(1 << 4) /* TGL+ */
 #define   PLANE_CTL_ALPHA_MASK			(0x3 << 4) /* Pre-GLK */
 #define   PLANE_CTL_ALPHA_DISABLE		(0 << 4)
 #define   PLANE_CTL_ALPHA_SW_PREMULTIPLY	(2 << 4)
@@ -7177,11 +7310,17 @@ enum {
 /* legacy palette */
 #define _LGC_PALETTE_A           0x4a000
 #define _LGC_PALETTE_B           0x4a800
+#define LGC_PALETTE_RED_MASK     REG_GENMASK(23, 16)
+#define LGC_PALETTE_GREEN_MASK   REG_GENMASK(15, 8)
+#define LGC_PALETTE_BLUE_MASK    REG_GENMASK(7, 0)
 #define LGC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _LGC_PALETTE_A, _LGC_PALETTE_B) + (i) * 4)
 
 /* ilk/snb precision palette */
 #define _PREC_PALETTE_A           0x4b000
 #define _PREC_PALETTE_B           0x4c000
+#define   PREC_PALETTE_RED_MASK   REG_GENMASK(29, 20)
+#define   PREC_PALETTE_GREEN_MASK REG_GENMASK(19, 10)
+#define   PREC_PALETTE_BLUE_MASK  REG_GENMASK(9, 0)
 #define PREC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _PREC_PALETTE_A, _PREC_PALETTE_B) + (i) * 4)
 
 #define  _PREC_PIPEAGCMAX              0x4d000
@@ -7217,6 +7356,8 @@ enum {
 #define TGL_DMC_DEBUG_DC5_COUNT	_MMIO(0x101084)
 #define TGL_DMC_DEBUG_DC6_COUNT	_MMIO(0x101088)
 
+#define DMC_DEBUG3		_MMIO(0x101090)
+
 /* Display Internal Timeout Register */
 #define RM_TIMEOUT		_MMIO(0x42060)
 #define  MMIO_TIMEOUT_US(us)	((us) << 0)
@@ -7332,6 +7473,9 @@ enum {
 #define  GEN8_PIPE_VSYNC		(1 << 1)
 #define  GEN8_PIPE_VBLANK		(1 << 0)
 #define  GEN9_PIPE_CURSOR_FAULT		(1 << 11)
+#define  GEN11_PIPE_PLANE7_FAULT	(1 << 22)
+#define  GEN11_PIPE_PLANE6_FAULT	(1 << 21)
+#define  GEN11_PIPE_PLANE5_FAULT	(1 << 20)
 #define  GEN9_PIPE_PLANE4_FAULT		(1 << 10)
 #define  GEN9_PIPE_PLANE3_FAULT		(1 << 9)
 #define  GEN9_PIPE_PLANE2_FAULT		(1 << 8)
@@ -7351,16 +7495,25 @@ enum {
 	 GEN9_PIPE_PLANE3_FAULT | \
 	 GEN9_PIPE_PLANE2_FAULT | \
 	 GEN9_PIPE_PLANE1_FAULT)
+#define GEN11_DE_PIPE_IRQ_FAULT_ERRORS \
+	(GEN9_DE_PIPE_IRQ_FAULT_ERRORS | \
+	 GEN11_PIPE_PLANE7_FAULT | \
+	 GEN11_PIPE_PLANE6_FAULT | \
+	 GEN11_PIPE_PLANE5_FAULT)
 
 #define GEN8_DE_PORT_ISR _MMIO(0x44440)
 #define GEN8_DE_PORT_IMR _MMIO(0x44444)
 #define GEN8_DE_PORT_IIR _MMIO(0x44448)
 #define GEN8_DE_PORT_IER _MMIO(0x4444c)
+#define  DSI1_NON_TE			(1 << 31)
+#define  DSI0_NON_TE			(1 << 30)
 #define  ICL_AUX_CHANNEL_E		(1 << 29)
 #define  CNL_AUX_CHANNEL_F		(1 << 28)
 #define  GEN9_AUX_CHANNEL_D		(1 << 27)
 #define  GEN9_AUX_CHANNEL_C		(1 << 26)
 #define  GEN9_AUX_CHANNEL_B		(1 << 25)
+#define  DSI1_TE			(1 << 24)
+#define  DSI0_TE			(1 << 23)
 #define  BXT_DE_PORT_HP_DDIC		(1 << 5)
 #define  BXT_DE_PORT_HP_DDIB		(1 << 4)
 #define  BXT_DE_PORT_HP_DDIA		(1 << 3)
@@ -7370,6 +7523,12 @@ enum {
 #define  GEN8_PORT_DP_A_HOTPLUG		(1 << 3)
 #define  BXT_DE_PORT_GMBUS		(1 << 1)
 #define  GEN8_AUX_CHANNEL_A		(1 << 0)
+#define  TGL_DE_PORT_AUX_USBC6		(1 << 13)
+#define  TGL_DE_PORT_AUX_USBC5		(1 << 12)
+#define  TGL_DE_PORT_AUX_USBC4		(1 << 11)
+#define  TGL_DE_PORT_AUX_USBC3		(1 << 10)
+#define  TGL_DE_PORT_AUX_USBC2		(1 << 9)
+#define  TGL_DE_PORT_AUX_USBC1		(1 << 8)
 #define  TGL_DE_PORT_AUX_DDIC		(1 << 2)
 #define  TGL_DE_PORT_AUX_DDIB		(1 << 1)
 #define  TGL_DE_PORT_AUX_DDIA		(1 << 0)
@@ -7558,10 +7717,19 @@ enum {
 #define  BDW_DPRS_MASK_VBLANK_SRD	(1 << 0)
 #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B)
 
-#define CHICKEN_TRANS_A		_MMIO(0x420c0)
-#define CHICKEN_TRANS_B		_MMIO(0x420c4)
-#define CHICKEN_TRANS_C		_MMIO(0x420c8)
-#define CHICKEN_TRANS_EDP	_MMIO(0x420cc)
+#define _CHICKEN_TRANS_A	0x420c0
+#define _CHICKEN_TRANS_B	0x420c4
+#define _CHICKEN_TRANS_C	0x420c8
+#define _CHICKEN_TRANS_EDP	0x420cc
+#define _CHICKEN_TRANS_D	0x420d8
+#define CHICKEN_TRANS(trans)	_MMIO(_PICK((trans), \
+					    [TRANSCODER_EDP] = _CHICKEN_TRANS_EDP, \
+					    [TRANSCODER_A] = _CHICKEN_TRANS_A, \
+					    [TRANSCODER_B] = _CHICKEN_TRANS_B, \
+					    [TRANSCODER_C] = _CHICKEN_TRANS_C, \
+					    [TRANSCODER_D] = _CHICKEN_TRANS_D))
+#define  HSW_FRAME_START_DELAY_MASK	(3 << 27)
+#define  HSW_FRAME_START_DELAY(x)	((x) << 27) /* 0-3 */
 #define  VSC_DATA_SEL_SOFTWARE_CONTROL	(1 << 25) /* GLK and CNL+ */
 #define  DDI_TRAINING_OVERRIDE_ENABLE	(1 << 19)
 #define  DDI_TRAINING_OVERRIDE_VALUE	(1 << 18)
@@ -7585,6 +7753,14 @@ enum {
 #define GEN7_MSG_CTL	_MMIO(0x45010)
 #define  WAIT_FOR_PCH_RESET_ACK		(1 << 1)
 #define  WAIT_FOR_PCH_FLR_ACK		(1 << 0)
+
+#define BW_BUDDY1_CTL			_MMIO(0x45140)
+#define BW_BUDDY2_CTL			_MMIO(0x45150)
+#define   BW_BUDDY_DISABLE		REG_BIT(31)
+
+#define BW_BUDDY1_PAGE_MASK		_MMIO(0x45144)
+#define BW_BUDDY2_PAGE_MASK		_MMIO(0x45154)
+
 #define HSW_NDE_RSTWRN_OPT	_MMIO(0x46408)
 #define  RESET_PCH_HANDSHAKE_ENABLE	(1 << 4)
 
@@ -7594,15 +7770,19 @@ enum {
 #define   CNL_DDI_CLOCK_REG_ACCESS_ON	(1 << 7)
 
 #define SKL_DFSM			_MMIO(0x51000)
-#define SKL_DFSM_CDCLK_LIMIT_MASK	(3 << 23)
-#define SKL_DFSM_CDCLK_LIMIT_675	(0 << 23)
-#define SKL_DFSM_CDCLK_LIMIT_540	(1 << 23)
-#define SKL_DFSM_CDCLK_LIMIT_450	(2 << 23)
-#define SKL_DFSM_CDCLK_LIMIT_337_5	(3 << 23)
-#define SKL_DFSM_PIPE_A_DISABLE		(1 << 30)
-#define SKL_DFSM_PIPE_B_DISABLE		(1 << 21)
-#define SKL_DFSM_PIPE_C_DISABLE		(1 << 28)
-#define TGL_DFSM_PIPE_D_DISABLE		(1 << 22)
+#define   SKL_DFSM_DISPLAY_PM_DISABLE	(1 << 27)
+#define   SKL_DFSM_DISPLAY_HDCP_DISABLE	(1 << 25)
+#define   SKL_DFSM_CDCLK_LIMIT_MASK	(3 << 23)
+#define   SKL_DFSM_CDCLK_LIMIT_675	(0 << 23)
+#define   SKL_DFSM_CDCLK_LIMIT_540	(1 << 23)
+#define   SKL_DFSM_CDCLK_LIMIT_450	(2 << 23)
+#define   SKL_DFSM_CDCLK_LIMIT_337_5	(3 << 23)
+#define   ICL_DFSM_DMC_DISABLE		(1 << 23)
+#define   SKL_DFSM_PIPE_A_DISABLE	(1 << 30)
+#define   SKL_DFSM_PIPE_B_DISABLE	(1 << 21)
+#define   SKL_DFSM_PIPE_C_DISABLE	(1 << 28)
+#define   TGL_DFSM_PIPE_D_DISABLE	(1 << 22)
+#define   CNL_DFSM_DISPLAY_DSC_DISABLE	(1 << 7)
 
 #define SKL_DSSM				_MMIO(0x51004)
 #define CNL_DSSM_CDCLK_PLL_REFCLK_24MHz		(1 << 31)
@@ -7619,7 +7799,10 @@ enum {
 #define  GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE  (1 << 10)
 
 #define GEN9_CS_DEBUG_MODE1		_MMIO(0x20ec)
+#define   FF_DOP_CLOCK_GATE_DISABLE	REG_BIT(1)
 #define GEN9_CTX_PREEMPT_REG		_MMIO(0x2248)
+#define   GEN12_DISABLE_POSH_BUSY_FF_DOP_CG REG_BIT(11)
+
 #define GEN8_CS_CHICKEN1		_MMIO(0x2580)
 #define GEN9_PREEMPT_3D_OBJECT_LEVEL		(1 << 0)
 #define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo)	(((hi) << 2) | ((lo) << 1))
@@ -7644,6 +7827,7 @@ enum {
 
 #define GEN11_COMMON_SLICE_CHICKEN3		_MMIO(0x7304)
   #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC	(1 << 11)
+  #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE	(1 << 9)
 
 #define HIZ_CHICKEN					_MMIO(0x7018)
 # define CHV_HZ_8X8_MODE_IN_1X				(1 << 15)
@@ -7734,6 +7918,10 @@ enum {
 #define   PIXEL_ROUNDING_TRUNC_FB_PASSTHRU 	(1 << 15)
 #define   PER_PIXEL_ALPHA_BYPASS_EN		(1 << 7)
 
+#define FF_MODE2			_MMIO(0x6604)
+#define   FF_MODE2_TDS_TIMER_MASK	REG_GENMASK(23, 16)
+#define   FF_MODE2_TDS_TIMER_128	REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
+
 /* PCH */
 
 #define PCH_DISPLAY_BASE	0xc0000u
@@ -7828,29 +8016,24 @@ enum {
 				 SDE_FDI_RXA_CPT)
 
 /* south display engine interrupt: ICP/TGP */
-#define SDE_TC6_HOTPLUG_TGP		(1 << 29)
-#define SDE_TC5_HOTPLUG_TGP		(1 << 28)
-#define SDE_TC4_HOTPLUG_ICP		(1 << 27)
-#define SDE_TC3_HOTPLUG_ICP		(1 << 26)
-#define SDE_TC2_HOTPLUG_ICP		(1 << 25)
-#define SDE_TC1_HOTPLUG_ICP		(1 << 24)
 #define SDE_GMBUS_ICP			(1 << 23)
-#define SDE_DDIC_HOTPLUG_TGP		(1 << 18)
-#define SDE_DDIB_HOTPLUG_ICP		(1 << 17)
-#define SDE_DDIA_HOTPLUG_ICP		(1 << 16)
 #define SDE_TC_HOTPLUG_ICP(tc_port)	(1 << ((tc_port) + 24))
 #define SDE_DDI_HOTPLUG_ICP(port)	(1 << ((port) + 16))
-#define SDE_DDI_MASK_ICP		(SDE_DDIB_HOTPLUG_ICP |	\
-					 SDE_DDIA_HOTPLUG_ICP)
-#define SDE_TC_MASK_ICP			(SDE_TC4_HOTPLUG_ICP |	\
-					 SDE_TC3_HOTPLUG_ICP |	\
-					 SDE_TC2_HOTPLUG_ICP |	\
-					 SDE_TC1_HOTPLUG_ICP)
-#define SDE_DDI_MASK_TGP		(SDE_DDIC_HOTPLUG_TGP | \
-					 SDE_DDI_MASK_ICP)
-#define SDE_TC_MASK_TGP			(SDE_TC6_HOTPLUG_TGP |	\
-					 SDE_TC5_HOTPLUG_TGP |	\
-					 SDE_TC_MASK_ICP)
+#define SDE_DDI_MASK_ICP		(SDE_DDI_HOTPLUG_ICP(PORT_B) | \
+					 SDE_DDI_HOTPLUG_ICP(PORT_A))
+#define SDE_TC_MASK_ICP			(SDE_TC_HOTPLUG_ICP(PORT_TC4) | \
+					 SDE_TC_HOTPLUG_ICP(PORT_TC3) | \
+					 SDE_TC_HOTPLUG_ICP(PORT_TC2) | \
+					 SDE_TC_HOTPLUG_ICP(PORT_TC1))
+#define SDE_DDI_MASK_TGP		(SDE_DDI_HOTPLUG_ICP(PORT_C) | \
+					 SDE_DDI_HOTPLUG_ICP(PORT_B) | \
+					 SDE_DDI_HOTPLUG_ICP(PORT_A))
+#define SDE_TC_MASK_TGP			(SDE_TC_HOTPLUG_ICP(PORT_TC6) | \
+					 SDE_TC_HOTPLUG_ICP(PORT_TC5) | \
+					 SDE_TC_HOTPLUG_ICP(PORT_TC4) | \
+					 SDE_TC_HOTPLUG_ICP(PORT_TC3) | \
+					 SDE_TC_HOTPLUG_ICP(PORT_TC2) | \
+					 SDE_TC_HOTPLUG_ICP(PORT_TC1))
 
 #define SDEISR  _MMIO(0xc4000)
 #define SDEIMR  _MMIO(0xc4004)
@@ -7917,29 +8100,20 @@ enum {
  * SHOTPLUG_CTL_DDI and SHOTPLUG_CTL_TC.
  */
 
-#define SHOTPLUG_CTL_DDI			_MMIO(0xc4030)
-#define   TGP_DDIC_HPD_ENABLE			(1 << 11)
-#define   TGP_DDIC_HPD_STATUS_MASK		(3 << 8)
-#define   TGP_DDIC_HPD_NO_DETECT		(0 << 8)
-#define   TGP_DDIC_HPD_SHORT_DETECT		(1 << 8)
-#define   TGP_DDIC_HPD_LONG_DETECT		(2 << 8)
-#define   TGP_DDIC_HPD_SHORT_LONG_DETECT	(3 << 8)
-#define   ICP_DDIB_HPD_ENABLE			(1 << 7)
-#define   ICP_DDIB_HPD_STATUS_MASK		(3 << 4)
-#define   ICP_DDIB_HPD_NO_DETECT		(0 << 4)
-#define   ICP_DDIB_HPD_SHORT_DETECT		(1 << 4)
-#define   ICP_DDIB_HPD_LONG_DETECT		(2 << 4)
-#define   ICP_DDIB_HPD_SHORT_LONG_DETECT	(3 << 4)
-#define   ICP_DDIA_HPD_ENABLE			(1 << 3)
-#define   ICP_DDIA_HPD_OP_DRIVE_1		(1 << 2)
-#define   ICP_DDIA_HPD_STATUS_MASK		(3 << 0)
-#define   ICP_DDIA_HPD_NO_DETECT		(0 << 0)
-#define   ICP_DDIA_HPD_SHORT_DETECT		(1 << 0)
-#define   ICP_DDIA_HPD_LONG_DETECT		(2 << 0)
-#define   ICP_DDIA_HPD_SHORT_LONG_DETECT	(3 << 0)
+#define SHOTPLUG_CTL_DDI				_MMIO(0xc4030)
+#define   SHOTPLUG_CTL_DDI_HPD_ENABLE(port)		(0x8 << (4 * (port)))
+#define   SHOTPLUG_CTL_DDI_HPD_STATUS_MASK(port)	(0x3 << (4 * (port)))
+#define   SHOTPLUG_CTL_DDI_HPD_NO_DETECT(port)		(0x0 << (4 * (port)))
+#define   SHOTPLUG_CTL_DDI_HPD_SHORT_DETECT(port)	(0x1 << (4 * (port)))
+#define   SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(port)	(0x2 << (4 * (port)))
+#define   SHOTPLUG_CTL_DDI_HPD_SHORT_LONG_DETECT(port)	(0x3 << (4 * (port)))
 
 #define SHOTPLUG_CTL_TC				_MMIO(0xc4034)
 #define   ICP_TC_HPD_ENABLE(tc_port)		(8 << (tc_port) * 4)
+
+#define SHPD_FILTER_CNT				_MMIO(0xc4038)
+#define   SHPD_FILTER_CNT_500_ADJ		0x001D9
+
 /* Icelake DSC Rate Control Range Parameter Registers */
 #define DSCA_RC_RANGE_PARAMETERS_0		_MMIO(0x6B240)
 #define DSCA_RC_RANGE_PARAMETERS_0_UDW		_MMIO(0x6B240 + 4)
@@ -8047,14 +8221,15 @@ enum {
 #define   ICP_TC_HPD_LONG_DETECT(tc_port)	(2 << (tc_port) * 4)
 #define   ICP_TC_HPD_SHORT_DETECT(tc_port)	(1 << (tc_port) * 4)
 
-#define ICP_DDI_HPD_ENABLE_MASK		(ICP_DDIB_HPD_ENABLE |	\
-					 ICP_DDIA_HPD_ENABLE)
+#define ICP_DDI_HPD_ENABLE_MASK		(SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_B) | \
+					 SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_A))
 #define ICP_TC_HPD_ENABLE_MASK		(ICP_TC_HPD_ENABLE(PORT_TC4) | \
 					 ICP_TC_HPD_ENABLE(PORT_TC3) | \
 					 ICP_TC_HPD_ENABLE(PORT_TC2) | \
 					 ICP_TC_HPD_ENABLE(PORT_TC1))
-#define TGP_DDI_HPD_ENABLE_MASK		(TGP_DDIC_HPD_ENABLE |	\
-					 ICP_DDI_HPD_ENABLE_MASK)
+#define TGP_DDI_HPD_ENABLE_MASK		(SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_C) | \
+					 SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_B) | \
+					 SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_A))
 #define TGP_TC_HPD_ENABLE_MASK		(ICP_TC_HPD_ENABLE(PORT_TC6) | \
 					 ICP_TC_HPD_ENABLE(PORT_TC5) | \
 					 ICP_TC_HPD_ENABLE_MASK)
@@ -8289,10 +8464,8 @@ enum {
 #define  TRANS_STATE_MASK       (1 << 30)
 #define  TRANS_STATE_DISABLE    (0 << 30)
 #define  TRANS_STATE_ENABLE     (1 << 30)
-#define  TRANS_FSYNC_DELAY_HB1  (0 << 27)
-#define  TRANS_FSYNC_DELAY_HB2  (1 << 27)
-#define  TRANS_FSYNC_DELAY_HB3  (2 << 27)
-#define  TRANS_FSYNC_DELAY_HB4  (3 << 27)
+#define  TRANS_FRAME_START_DELAY_MASK	(3 << 27) /* ibx */
+#define  TRANS_FRAME_START_DELAY(x)	((x) << 27) /* ibx: 0-3 */
 #define  TRANS_INTERLACE_MASK   (7 << 21)
 #define  TRANS_PROGRESSIVE      (0 << 21)
 #define  TRANS_INTERLACED       (3 << 21)
@@ -8313,6 +8486,7 @@ enum {
 #define  TRANS_CHICKEN2_TIMING_OVERRIDE			(1 << 31)
 #define  TRANS_CHICKEN2_FDI_POLARITY_REVERSED		(1 << 29)
 #define  TRANS_CHICKEN2_FRAME_START_DELAY_MASK		(3 << 27)
+#define  TRANS_CHICKEN2_FRAME_START_DELAY(x)		((x) << 27) /* 0-3 */
 #define  TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER	(1 << 26)
 #define  TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH	(1 << 25)
 
@@ -8604,6 +8778,10 @@ enum {
 #define   GEN9_PWRGT_MEDIA_STATUS_MASK		(1 << 0)
 #define   GEN9_PWRGT_RENDER_STATUS_MASK		(1 << 1)
 
+#define POWERGATE_ENABLE			_MMIO(0xa210)
+#define    VDN_HCP_POWERGATE_ENABLE(n)		BIT(((n) * 2) + 3)
+#define    VDN_MFX_POWERGATE_ENABLE(n)		BIT(((n) * 2) + 4)
+
 #define  GTFIFODBG				_MMIO(0x120000)
 #define    GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV	(0x1f << 20)
 #define    GT_FIFO_FREE_ENTRIES_CHV		(0x7f << 13)
@@ -8841,6 +9019,7 @@ enum {
 #define     GEN9_SAGV_DISABLE			0x0
 #define     GEN9_SAGV_IS_DISABLED		0x1
 #define     GEN9_SAGV_ENABLE			0x3
+#define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US	0x23
 #define GEN6_PCODE_DATA				_MMIO(0x138128)
 #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT	8
 #define   GEN6_PCODE_FREQ_RING_RATIO_SHIFT	16
@@ -9104,6 +9283,10 @@ enum {
 #define HSW_AUD_CHICKENBIT			_MMIO(0x65f10)
 #define   SKL_AUD_CODEC_WAKE_SIGNAL		(1 << 15)
 
+#define AUD_FREQ_CNTRL			_MMIO(0x65900)
+#define AUD_PIN_BUF_CTL		_MMIO(0x48414)
+#define   AUD_PIN_BUF_ENABLE		REG_BIT(31)
+
 /*
  * HSW - ICL power wells
  *
@@ -9242,11 +9425,9 @@ enum skl_power_gate {
 #define _ICL_AUX_REG_IDX(pw_idx)	((pw_idx) - ICL_PW_CTL_IDX_AUX_A)
 #define _ICL_AUX_ANAOVRD1_A		0x162398
 #define _ICL_AUX_ANAOVRD1_B		0x6C398
-#define _TGL_AUX_ANAOVRD1_C		0x160398
 #define ICL_AUX_ANAOVRD1(pw_idx)	_MMIO(_PICK(_ICL_AUX_REG_IDX(pw_idx), \
 						    _ICL_AUX_ANAOVRD1_A, \
-						    _ICL_AUX_ANAOVRD1_B, \
-						    _TGL_AUX_ANAOVRD1_C))
+						    _ICL_AUX_ANAOVRD1_B))
 #define   ICL_AUX_ANAOVRD1_LDO_BYPASS	(1 << 7)
 #define   ICL_AUX_ANAOVRD1_ENABLE	(1 << 0)
 
@@ -9266,12 +9447,20 @@ enum skl_power_gate {
 
 /* HDCP Repeater Registers */
 #define HDCP_REP_CTL			_MMIO(0x66d00)
+#define  HDCP_TRANSA_REP_PRESENT	BIT(31)
+#define  HDCP_TRANSB_REP_PRESENT	BIT(30)
+#define  HDCP_TRANSC_REP_PRESENT	BIT(29)
+#define  HDCP_TRANSD_REP_PRESENT	BIT(28)
 #define  HDCP_DDIB_REP_PRESENT		BIT(30)
 #define  HDCP_DDIA_REP_PRESENT		BIT(29)
 #define  HDCP_DDIC_REP_PRESENT		BIT(28)
 #define  HDCP_DDID_REP_PRESENT		BIT(27)
 #define  HDCP_DDIF_REP_PRESENT		BIT(26)
 #define  HDCP_DDIE_REP_PRESENT		BIT(25)
+#define  HDCP_TRANSA_SHA1_M0		(1 << 20)
+#define  HDCP_TRANSB_SHA1_M0		(2 << 20)
+#define  HDCP_TRANSC_SHA1_M0		(3 << 20)
+#define  HDCP_TRANSD_SHA1_M0		(4 << 20)
 #define  HDCP_DDIB_SHA1_M0		(1 << 20)
 #define  HDCP_DDIA_SHA1_M0		(2 << 20)
 #define  HDCP_DDIC_SHA1_M0		(3 << 20)
@@ -9311,15 +9500,92 @@ enum skl_power_gate {
 					  _PORTE_HDCP_AUTHENC, \
 					  _PORTF_HDCP_AUTHENC) + (x))
 #define PORT_HDCP_CONF(port)		_PORT_HDCP_AUTHENC(port, 0x0)
+#define _TRANSA_HDCP_CONF		0x66400
+#define _TRANSB_HDCP_CONF		0x66500
+#define TRANS_HDCP_CONF(trans)		_MMIO_TRANS(trans, _TRANSA_HDCP_CONF, \
+						    _TRANSB_HDCP_CONF)
+#define HDCP_CONF(dev_priv, trans, port) \
+					(INTEL_GEN(dev_priv) >= 12 ? \
+					 TRANS_HDCP_CONF(trans) : \
+					 PORT_HDCP_CONF(port))
+
 #define  HDCP_CONF_CAPTURE_AN		BIT(0)
 #define  HDCP_CONF_AUTH_AND_ENC		(BIT(1) | BIT(0))
 #define PORT_HDCP_ANINIT(port)		_PORT_HDCP_AUTHENC(port, 0x4)
+#define _TRANSA_HDCP_ANINIT		0x66404
+#define _TRANSB_HDCP_ANINIT		0x66504
+#define TRANS_HDCP_ANINIT(trans)	_MMIO_TRANS(trans, \
+						    _TRANSA_HDCP_ANINIT, \
+						    _TRANSB_HDCP_ANINIT)
+#define HDCP_ANINIT(dev_priv, trans, port) \
+					(INTEL_GEN(dev_priv) >= 12 ? \
+					 TRANS_HDCP_ANINIT(trans) : \
+					 PORT_HDCP_ANINIT(port))
+
 #define PORT_HDCP_ANLO(port)		_PORT_HDCP_AUTHENC(port, 0x8)
+#define _TRANSA_HDCP_ANLO		0x66408
+#define _TRANSB_HDCP_ANLO		0x66508
+#define TRANS_HDCP_ANLO(trans)		_MMIO_TRANS(trans, _TRANSA_HDCP_ANLO, \
+						    _TRANSB_HDCP_ANLO)
+#define HDCP_ANLO(dev_priv, trans, port) \
+					(INTEL_GEN(dev_priv) >= 12 ? \
+					 TRANS_HDCP_ANLO(trans) : \
+					 PORT_HDCP_ANLO(port))
+
 #define PORT_HDCP_ANHI(port)		_PORT_HDCP_AUTHENC(port, 0xC)
+#define _TRANSA_HDCP_ANHI		0x6640C
+#define _TRANSB_HDCP_ANHI		0x6650C
+#define TRANS_HDCP_ANHI(trans)		_MMIO_TRANS(trans, _TRANSA_HDCP_ANHI, \
+						    _TRANSB_HDCP_ANHI)
+#define HDCP_ANHI(dev_priv, trans, port) \
+					(INTEL_GEN(dev_priv) >= 12 ? \
+					 TRANS_HDCP_ANHI(trans) : \
+					 PORT_HDCP_ANHI(port))
+
 #define PORT_HDCP_BKSVLO(port)		_PORT_HDCP_AUTHENC(port, 0x10)
+#define _TRANSA_HDCP_BKSVLO		0x66410
+#define _TRANSB_HDCP_BKSVLO		0x66510
+#define TRANS_HDCP_BKSVLO(trans)	_MMIO_TRANS(trans, \
+						    _TRANSA_HDCP_BKSVLO, \
+						    _TRANSB_HDCP_BKSVLO)
+#define HDCP_BKSVLO(dev_priv, trans, port) \
+					(INTEL_GEN(dev_priv) >= 12 ? \
+					 TRANS_HDCP_BKSVLO(trans) : \
+					 PORT_HDCP_BKSVLO(port))
+
 #define PORT_HDCP_BKSVHI(port)		_PORT_HDCP_AUTHENC(port, 0x14)
+#define _TRANSA_HDCP_BKSVHI		0x66414
+#define _TRANSB_HDCP_BKSVHI		0x66514
+#define TRANS_HDCP_BKSVHI(trans)	_MMIO_TRANS(trans, \
+						    _TRANSA_HDCP_BKSVHI, \
+						    _TRANSB_HDCP_BKSVHI)
+#define HDCP_BKSVHI(dev_priv, trans, port) \
+					(INTEL_GEN(dev_priv) >= 12 ? \
+					 TRANS_HDCP_BKSVHI(trans) : \
+					 PORT_HDCP_BKSVHI(port))
+
 #define PORT_HDCP_RPRIME(port)		_PORT_HDCP_AUTHENC(port, 0x18)
+#define _TRANSA_HDCP_RPRIME		0x66418
+#define _TRANSB_HDCP_RPRIME		0x66518
+#define TRANS_HDCP_RPRIME(trans)	_MMIO_TRANS(trans, \
+						    _TRANSA_HDCP_RPRIME, \
+						    _TRANSB_HDCP_RPRIME)
+#define HDCP_RPRIME(dev_priv, trans, port) \
+					(INTEL_GEN(dev_priv) >= 12 ? \
+					 TRANS_HDCP_RPRIME(trans) : \
+					 PORT_HDCP_RPRIME(port))
+
 #define PORT_HDCP_STATUS(port)		_PORT_HDCP_AUTHENC(port, 0x1C)
+#define _TRANSA_HDCP_STATUS		0x6641C
+#define _TRANSB_HDCP_STATUS		0x6651C
+#define TRANS_HDCP_STATUS(trans)	_MMIO_TRANS(trans, \
+						    _TRANSA_HDCP_STATUS, \
+						    _TRANSB_HDCP_STATUS)
+#define HDCP_STATUS(dev_priv, trans, port) \
+					(INTEL_GEN(dev_priv) >= 12 ? \
+					 TRANS_HDCP_STATUS(trans) : \
+					 PORT_HDCP_STATUS(port))
+
 #define  HDCP_STATUS_STREAM_A_ENC	BIT(31)
 #define  HDCP_STATUS_STREAM_B_ENC	BIT(30)
 #define  HDCP_STATUS_STREAM_C_ENC	BIT(29)
@@ -9346,23 +9612,44 @@ enum skl_power_gate {
 					  _PORTD_HDCP2_BASE, \
 					  _PORTE_HDCP2_BASE, \
 					  _PORTF_HDCP2_BASE) + (x))
-
-#define HDCP2_AUTH_DDI(port)		_PORT_HDCP2_BASE(port, 0x98)
+#define PORT_HDCP2_AUTH(port)		_PORT_HDCP2_BASE(port, 0x98)
+#define _TRANSA_HDCP2_AUTH		0x66498
+#define _TRANSB_HDCP2_AUTH		0x66598
+#define TRANS_HDCP2_AUTH(trans)		_MMIO_TRANS(trans, _TRANSA_HDCP2_AUTH, \
+						    _TRANSB_HDCP2_AUTH)
 #define   AUTH_LINK_AUTHENTICATED	BIT(31)
 #define   AUTH_LINK_TYPE		BIT(30)
 #define   AUTH_FORCE_CLR_INPUTCTR	BIT(19)
 #define   AUTH_CLR_KEYS			BIT(18)
-
-#define HDCP2_CTL_DDI(port)		_PORT_HDCP2_BASE(port, 0xB0)
+#define HDCP2_AUTH(dev_priv, trans, port) \
+					(INTEL_GEN(dev_priv) >= 12 ? \
+					 TRANS_HDCP2_AUTH(trans) : \
+					 PORT_HDCP2_AUTH(port))
+
+#define PORT_HDCP2_CTL(port)		_PORT_HDCP2_BASE(port, 0xB0)
+#define _TRANSA_HDCP2_CTL		0x664B0
+#define _TRANSB_HDCP2_CTL		0x665B0
+#define TRANS_HDCP2_CTL(trans)		_MMIO_TRANS(trans, _TRANSA_HDCP2_CTL, \
+						    _TRANSB_HDCP2_CTL)
 #define   CTL_LINK_ENCRYPTION_REQ	BIT(31)
-
-#define HDCP2_STATUS_DDI(port)		_PORT_HDCP2_BASE(port, 0xB4)
-#define   STREAM_ENCRYPTION_STATUS_A	BIT(31)
-#define   STREAM_ENCRYPTION_STATUS_B	BIT(30)
-#define   STREAM_ENCRYPTION_STATUS_C	BIT(29)
+#define HDCP2_CTL(dev_priv, trans, port) \
+					(INTEL_GEN(dev_priv) >= 12 ? \
+					 TRANS_HDCP2_CTL(trans) : \
+					 PORT_HDCP2_CTL(port))
+
+#define PORT_HDCP2_STATUS(port)		_PORT_HDCP2_BASE(port, 0xB4)
+#define _TRANSA_HDCP2_STATUS		0x664B4
+#define _TRANSB_HDCP2_STATUS		0x665B4
+#define TRANS_HDCP2_STATUS(trans)	_MMIO_TRANS(trans, \
+						    _TRANSA_HDCP2_STATUS, \
+						    _TRANSB_HDCP2_STATUS)
 #define   LINK_TYPE_STATUS		BIT(22)
 #define   LINK_AUTH_STATUS		BIT(21)
 #define   LINK_ENCRYPTION_STATUS	BIT(20)
+#define HDCP2_STATUS(dev_priv, trans, port) \
+					(INTEL_GEN(dev_priv) >= 12 ? \
+					 TRANS_HDCP2_STATUS(trans) : \
+					 PORT_HDCP2_STATUS(port))
 
 /* Per-pipe DDI Function Control */
 #define _TRANS_DDI_FUNC_CTL_A		0x60400
@@ -9402,6 +9689,10 @@ enum skl_power_gate {
 #define  TRANS_DDI_EDP_INPUT_A_ONOFF	(4 << 12)
 #define  TRANS_DDI_EDP_INPUT_B_ONOFF	(5 << 12)
 #define  TRANS_DDI_EDP_INPUT_C_ONOFF	(6 << 12)
+#define  TRANS_DDI_EDP_INPUT_D_ONOFF	(7 << 12)
+#define  TRANS_DDI_MST_TRANSPORT_SELECT_MASK	REG_GENMASK(11, 10)
+#define  TRANS_DDI_MST_TRANSPORT_SELECT(trans)	\
+	REG_FIELD_PREP(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, trans)
 #define  TRANS_DDI_HDCP_SIGNALLING	(1 << 9)
 #define  TRANS_DDI_DP_VC_PAYLOAD_ALLOC	(1 << 8)
 #define  TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1 << 7)
@@ -9429,7 +9720,9 @@ enum skl_power_gate {
 /* DisplayPort Transport Control */
 #define _DP_TP_CTL_A			0x64040
 #define _DP_TP_CTL_B			0x64140
+#define _TGL_DP_TP_CTL_A		0x60540
 #define DP_TP_CTL(port) _MMIO_PORT(port, _DP_TP_CTL_A, _DP_TP_CTL_B)
+#define TGL_DP_TP_CTL(tran) _MMIO_TRANS2((tran), _TGL_DP_TP_CTL_A)
 #define  DP_TP_CTL_ENABLE			(1 << 31)
 #define  DP_TP_CTL_FEC_ENABLE			(1 << 30)
 #define  DP_TP_CTL_MODE_SST			(0 << 27)
@@ -9449,7 +9742,9 @@ enum skl_power_gate {
 /* DisplayPort Transport Status */
 #define _DP_TP_STATUS_A			0x64044
 #define _DP_TP_STATUS_B			0x64144
+#define _TGL_DP_TP_STATUS_A		0x60544
 #define DP_TP_STATUS(port) _MMIO_PORT(port, _DP_TP_STATUS_A, _DP_TP_STATUS_B)
+#define TGL_DP_TP_STATUS(tran) _MMIO_TRANS2((tran), _TGL_DP_TP_STATUS_A)
 #define  DP_TP_STATUS_FEC_ENABLE_LIVE		(1 << 28)
 #define  DP_TP_STATUS_IDLE_DONE			(1 << 25)
 #define  DP_TP_STATUS_ACT_SENT			(1 << 24)
@@ -9604,17 +9899,7 @@ enum skl_power_gate {
 #define _TRANSC_MSA_MISC		0x62410
 #define _TRANS_EDP_MSA_MISC		0x6f410
 #define TRANS_MSA_MISC(tran) _MMIO_TRANS2(tran, _TRANSA_MSA_MISC)
-
-#define  TRANS_MSA_SYNC_CLK		(1 << 0)
-#define  TRANS_MSA_SAMPLING_444		(2 << 1)
-#define  TRANS_MSA_CLRSP_YCBCR		(2 << 3)
-#define  TRANS_MSA_6_BPC		(0 << 5)
-#define  TRANS_MSA_8_BPC		(1 << 5)
-#define  TRANS_MSA_10_BPC		(2 << 5)
-#define  TRANS_MSA_12_BPC		(3 << 5)
-#define  TRANS_MSA_16_BPC		(4 << 5)
-#define  TRANS_MSA_CEA_RANGE		(1 << 3)
-#define  TRANS_MSA_USE_VSC_SDP		(1 << 14)
+/* See DP_MSA_MISC_* for the bit definitions */
 
 /* LCPLL Control */
 #define LCPLL_CTL			_MMIO(0x130040)
@@ -9655,7 +9940,10 @@ enum skl_power_gate {
 #define  BXT_CDCLK_CD2X_PIPE(pipe)	((pipe) << 20)
 #define  CDCLK_DIVMUX_CD_OVERRIDE	(1 << 19)
 #define  BXT_CDCLK_CD2X_PIPE_NONE	BXT_CDCLK_CD2X_PIPE(3)
+#define  ICL_CDCLK_CD2X_PIPE(pipe)	(_PICK(pipe, 0, 2, 6) << 19)
 #define  ICL_CDCLK_CD2X_PIPE_NONE	(7 << 19)
+#define  TGL_CDCLK_CD2X_PIPE(pipe)	BXT_CDCLK_CD2X_PIPE(pipe)
+#define  TGL_CDCLK_CD2X_PIPE_NONE	ICL_CDCLK_CD2X_PIPE_NONE
 #define  BXT_CDCLK_SSA_PRECHARGE_ENABLE	(1 << 16)
 #define  CDCLK_FREQ_DECIMAL_MASK	(0x7ff)
 
@@ -9976,6 +10264,166 @@ enum skl_power_gate {
 						   _TGL_DPLL1_CFGCR1, \
 						   _TGL_TBTPLL_CFGCR1)
 
+#define _DKL_PHY1_BASE			0x168000
+#define _DKL_PHY2_BASE			0x169000
+#define _DKL_PHY3_BASE			0x16A000
+#define _DKL_PHY4_BASE			0x16B000
+#define _DKL_PHY5_BASE			0x16C000
+#define _DKL_PHY6_BASE			0x16D000
+
+/* DEKEL PHY MMIO Address = Phy base + (internal address & ~index_mask) */
+#define _DKL_PLL_DIV0			0x200
+#define   DKL_PLL_DIV0_INTEG_COEFF(x)	((x) << 16)
+#define   DKL_PLL_DIV0_INTEG_COEFF_MASK	(0x1F << 16)
+#define   DKL_PLL_DIV0_PROP_COEFF(x)	((x) << 12)
+#define   DKL_PLL_DIV0_PROP_COEFF_MASK	(0xF << 12)
+#define   DKL_PLL_DIV0_FBPREDIV_SHIFT   (8)
+#define   DKL_PLL_DIV0_FBPREDIV(x)	((x) << DKL_PLL_DIV0_FBPREDIV_SHIFT)
+#define   DKL_PLL_DIV0_FBPREDIV_MASK	(0xF << DKL_PLL_DIV0_FBPREDIV_SHIFT)
+#define   DKL_PLL_DIV0_FBDIV_INT(x)	((x) << 0)
+#define   DKL_PLL_DIV0_FBDIV_INT_MASK	(0xFF << 0)
+#define DKL_PLL_DIV0(tc_port)		_MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \
+						    _DKL_PHY2_BASE) + \
+						    _DKL_PLL_DIV0)
+
+#define _DKL_PLL_DIV1				0x204
+#define   DKL_PLL_DIV1_IREF_TRIM(x)		((x) << 16)
+#define   DKL_PLL_DIV1_IREF_TRIM_MASK		(0x1F << 16)
+#define   DKL_PLL_DIV1_TDC_TARGET_CNT(x)	((x) << 0)
+#define   DKL_PLL_DIV1_TDC_TARGET_CNT_MASK	(0xFF << 0)
+#define DKL_PLL_DIV1(tc_port)		_MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \
+						    _DKL_PHY2_BASE) + \
+						    _DKL_PLL_DIV1)
+
+#define _DKL_PLL_SSC				0x210
+#define   DKL_PLL_SSC_IREF_NDIV_RATIO(x)	((x) << 29)
+#define   DKL_PLL_SSC_IREF_NDIV_RATIO_MASK	(0x7 << 29)
+#define   DKL_PLL_SSC_STEP_LEN(x)		((x) << 16)
+#define   DKL_PLL_SSC_STEP_LEN_MASK		(0xFF << 16)
+#define   DKL_PLL_SSC_STEP_NUM(x)		((x) << 11)
+#define   DKL_PLL_SSC_STEP_NUM_MASK		(0x7 << 11)
+#define   DKL_PLL_SSC_EN			(1 << 9)
+#define DKL_PLL_SSC(tc_port)		_MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \
+						    _DKL_PHY2_BASE) + \
+						    _DKL_PLL_SSC)
+
+#define _DKL_PLL_BIAS			0x214
+#define   DKL_PLL_BIAS_FRAC_EN_H	(1 << 30)
+#define   DKL_PLL_BIAS_FBDIV_SHIFT	(8)
+#define   DKL_PLL_BIAS_FBDIV_FRAC(x)	((x) << DKL_PLL_BIAS_FBDIV_SHIFT)
+#define   DKL_PLL_BIAS_FBDIV_FRAC_MASK	(0x3FFFFF << DKL_PLL_BIAS_FBDIV_SHIFT)
+#define DKL_PLL_BIAS(tc_port)		_MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \
+						    _DKL_PHY2_BASE) + \
+						    _DKL_PLL_BIAS)
+
+#define _DKL_PLL_TDC_COLDST_BIAS		0x218
+#define   DKL_PLL_TDC_SSC_STEP_SIZE(x)		((x) << 8)
+#define   DKL_PLL_TDC_SSC_STEP_SIZE_MASK	(0xFF << 8)
+#define   DKL_PLL_TDC_FEED_FWD_GAIN(x)		((x) << 0)
+#define   DKL_PLL_TDC_FEED_FWD_GAIN_MASK	(0xFF << 0)
+#define DKL_PLL_TDC_COLDST_BIAS(tc_port) _MMIO(_PORT(tc_port, \
+						     _DKL_PHY1_BASE, \
+						     _DKL_PHY2_BASE) + \
+						     _DKL_PLL_TDC_COLDST_BIAS)
+
+#define _DKL_REFCLKIN_CTL		0x12C
+/* Bits are the same as MG_REFCLKIN_CTL */
+#define DKL_REFCLKIN_CTL(tc_port)	_MMIO(_PORT(tc_port, \
+						    _DKL_PHY1_BASE, \
+						    _DKL_PHY2_BASE) + \
+					      _DKL_REFCLKIN_CTL)
+
+#define _DKL_CLKTOP2_HSCLKCTL		0xD4
+/* Bits are the same as MG_CLKTOP2_HSCLKCTL */
+#define DKL_CLKTOP2_HSCLKCTL(tc_port)	_MMIO(_PORT(tc_port, \
+						    _DKL_PHY1_BASE, \
+						    _DKL_PHY2_BASE) + \
+					      _DKL_CLKTOP2_HSCLKCTL)
+
+#define _DKL_CLKTOP2_CORECLKCTL1		0xD8
+/* Bits are the same as MG_CLKTOP2_CORECLKCTL1 */
+#define DKL_CLKTOP2_CORECLKCTL1(tc_port)	_MMIO(_PORT(tc_port, \
+							    _DKL_PHY1_BASE, \
+							    _DKL_PHY2_BASE) + \
+						      _DKL_CLKTOP2_CORECLKCTL1)
+
+#define _DKL_TX_DPCNTL0				0x2C0
+#define  DKL_TX_PRESHOOT_COEFF(x)			((x) << 13)
+#define  DKL_TX_PRESHOOT_COEFF_MASK			(0x1f << 13)
+#define  DKL_TX_DE_EMPHASIS_COEFF(x)		((x) << 8)
+#define  DKL_TX_DE_EMPAHSIS_COEFF_MASK		(0x1f << 8)
+#define  DKL_TX_VSWING_CONTROL(x)			((x) << 0)
+#define  DKL_TX_VSWING_CONTROL_MASK			(0x7 << 0)
+#define DKL_TX_DPCNTL0(tc_port) _MMIO(_PORT(tc_port, \
+						     _DKL_PHY1_BASE, \
+						     _DKL_PHY2_BASE) + \
+						     _DKL_TX_DPCNTL0)
+
+#define _DKL_TX_DPCNTL1				0x2C4
+/* Bits are the same as DKL_TX_DPCNTRL0 */
+#define DKL_TX_DPCNTL1(tc_port) _MMIO(_PORT(tc_port, \
+						     _DKL_PHY1_BASE, \
+						     _DKL_PHY2_BASE) + \
+						     _DKL_TX_DPCNTL1)
+
+#define _DKL_TX_DPCNTL2				0x2C8
+#define  DKL_TX_DP20BITMODE				(1 << 2)
+#define DKL_TX_DPCNTL2(tc_port) _MMIO(_PORT(tc_port, \
+						     _DKL_PHY1_BASE, \
+						     _DKL_PHY2_BASE) + \
+						     _DKL_TX_DPCNTL2)
+
+#define _DKL_TX_FW_CALIB				0x2F8
+#define  DKL_TX_CFG_DISABLE_WAIT_INIT			(1 << 7)
+#define DKL_TX_FW_CALIB(tc_port) _MMIO(_PORT(tc_port, \
+						     _DKL_PHY1_BASE, \
+						     _DKL_PHY2_BASE) + \
+						     _DKL_TX_FW_CALIB)
+
+#define _DKL_TX_PMD_LANE_SUS				0xD00
+#define DKL_TX_PMD_LANE_SUS(tc_port) _MMIO(_PORT(tc_port, \
+							  _DKL_PHY1_BASE, \
+							  _DKL_PHY2_BASE) + \
+							  _DKL_TX_PMD_LANE_SUS)
+
+#define _DKL_TX_DW17					0xDC4
+#define DKL_TX_DW17(tc_port) _MMIO(_PORT(tc_port, \
+						     _DKL_PHY1_BASE, \
+						     _DKL_PHY2_BASE) + \
+						     _DKL_TX_DW17)
+
+#define _DKL_TX_DW18					0xDC8
+#define DKL_TX_DW18(tc_port) _MMIO(_PORT(tc_port, \
+						     _DKL_PHY1_BASE, \
+						     _DKL_PHY2_BASE) + \
+						     _DKL_TX_DW18)
+
+#define _DKL_DP_MODE					0xA0
+#define DKL_DP_MODE(tc_port) _MMIO(_PORT(tc_port, \
+						     _DKL_PHY1_BASE, \
+						     _DKL_PHY2_BASE) + \
+						     _DKL_DP_MODE)
+
+#define _DKL_CMN_UC_DW27			0x36C
+#define  DKL_CMN_UC_DW27_UC_HEALTH		(0x1 << 15)
+#define DKL_CMN_UC_DW_27(tc_port)		_MMIO(_PORT(tc_port, \
+							    _DKL_PHY1_BASE, \
+							    _DKL_PHY2_BASE) + \
+							    _DKL_CMN_UC_DW27)
+
+/*
+ * Each Dekel PHY is addressed through a 4KB aperture. Each PHY has more than
+ * 4KB of register space, so a separate index is programmed in HIP_INDEX_REG0
+ * or HIP_INDEX_REG1, based on the port number, to set the upper 2 address
+ * bits that point the 4KB window into the full PHY register space.
+ */
+#define _HIP_INDEX_REG0			0x1010A0
+#define _HIP_INDEX_REG1			0x1010A4
+#define HIP_INDEX_REG(tc_port)		_MMIO((tc_port) < 4 ? _HIP_INDEX_REG0 \
+					      : _HIP_INDEX_REG1)
+#define _HIP_INDEX_SHIFT(tc_port)	(8 * ((tc_port) % 4))
+#define HIP_INDEX_VAL(tc_port, val)	((val) << _HIP_INDEX_SHIFT(tc_port))
+
 /* BXT display engine PLL */
 #define BXT_DE_PLL_CTL			_MMIO(0x6d000)
 #define   BXT_DE_PLL_RATIO(x)		(x)	/* {60,65,100} * 19.2MHz */
@@ -9990,6 +10438,8 @@ enum skl_power_gate {
 /* GEN9 DC */
 #define DC_STATE_EN			_MMIO(0x45504)
 #define  DC_STATE_DISABLE		0
+#define  DC_STATE_EN_DC3CO		REG_BIT(30)
+#define  DC_STATE_DC3CO_STATUS		REG_BIT(29)
 #define  DC_STATE_EN_UPTO_DC5		(1 << 0)
 #define  DC_STATE_EN_DC9		(1 << 3)
 #define  DC_STATE_EN_UPTO_DC6		(2 << 0)
@@ -10118,11 +10568,11 @@ enum skl_power_gate {
 #define _PIPE_A_CSC_COEFF_BV	0x49024
 
 #define _PIPE_A_CSC_MODE	0x49028
-#define  ICL_CSC_ENABLE			(1 << 31)
-#define  ICL_OUTPUT_CSC_ENABLE		(1 << 30)
-#define  CSC_BLACK_SCREEN_OFFSET	(1 << 2)
-#define  CSC_POSITION_BEFORE_GAMMA	(1 << 1)
-#define  CSC_MODE_YUV_TO_RGB		(1 << 0)
+#define  ICL_CSC_ENABLE			(1 << 31) /* icl+ */
+#define  ICL_OUTPUT_CSC_ENABLE		(1 << 30) /* icl+ */
+#define  CSC_BLACK_SCREEN_OFFSET	(1 << 2) /* ilk/snb */
+#define  CSC_POSITION_BEFORE_GAMMA	(1 << 1) /* pre-glk */
+#define  CSC_MODE_YUV_TO_RGB		(1 << 0) /* ilk/snb */
 
 #define _PIPE_A_CSC_PREOFF_HI	0x49030
 #define _PIPE_A_CSC_PREOFF_ME	0x49034
@@ -10238,6 +10688,9 @@ enum skl_power_gate {
 #define _PAL_PREC_GC_MAX_A	0x4A410
 #define _PAL_PREC_GC_MAX_B	0x4AC10
 #define _PAL_PREC_GC_MAX_C	0x4B410
+#define   PREC_PAL_DATA_RED_MASK	REG_GENMASK(29, 20)
+#define   PREC_PAL_DATA_GREEN_MASK	REG_GENMASK(19, 10)
+#define   PREC_PAL_DATA_BLUE_MASK	REG_GENMASK(9, 0)
 #define _PAL_PREC_EXT_GC_MAX_A	0x4A420
 #define _PAL_PREC_EXT_GC_MAX_B	0x4AC20
 #define _PAL_PREC_EXT_GC_MAX_C	0x4B420
@@ -10290,6 +10743,9 @@ enum skl_power_gate {
 #define   CGM_PIPE_MODE_GAMMA	(1 << 2)
 #define   CGM_PIPE_MODE_CSC	(1 << 1)
 #define   CGM_PIPE_MODE_DEGAMMA	(1 << 0)
+#define   CGM_PIPE_GAMMA_RED_MASK   REG_GENMASK(9, 0)
+#define   CGM_PIPE_GAMMA_GREEN_MASK REG_GENMASK(25, 16)
+#define   CGM_PIPE_GAMMA_BLUE_MASK  REG_GENMASK(9, 0)
 
 #define _CGM_PIPE_B_CSC_COEFF01	(VLV_DISPLAY_BASE + 0x69900)
 #define _CGM_PIPE_B_CSC_COEFF23	(VLV_DISPLAY_BASE + 0x69904)
@@ -10339,6 +10795,57 @@ enum skl_power_gate {
 #define  ICL_ESC_CLK_DIV_SHIFT			0
 #define DSI_MAX_ESC_CLK			20000		/* in KHz */
 
+#define _DSI_CMD_FRMCTL_0		0x6b034
+#define _DSI_CMD_FRMCTL_1		0x6b834
+#define DSI_CMD_FRMCTL(port)		_MMIO_PORT(port,	\
+						   _DSI_CMD_FRMCTL_0,\
+						   _DSI_CMD_FRMCTL_1)
+#define   DSI_FRAME_UPDATE_REQUEST		(1 << 31)
+#define   DSI_PERIODIC_FRAME_UPDATE_ENABLE	(1 << 29)
+#define   DSI_NULL_PACKET_ENABLE		(1 << 28)
+#define   DSI_FRAME_IN_PROGRESS			(1 << 0)
+
+#define _DSI_INTR_MASK_REG_0		0x6b070
+#define _DSI_INTR_MASK_REG_1		0x6b870
+#define DSI_INTR_MASK_REG(port)		_MMIO_PORT(port,	\
+						   _DSI_INTR_MASK_REG_0,\
+						   _DSI_INTR_MASK_REG_1)
+
+#define _DSI_INTR_IDENT_REG_0		0x6b074
+#define _DSI_INTR_IDENT_REG_1		0x6b874
+#define DSI_INTR_IDENT_REG(port)	_MMIO_PORT(port,	\
+						   _DSI_INTR_IDENT_REG_0,\
+						   _DSI_INTR_IDENT_REG_1)
+#define   DSI_TE_EVENT				(1 << 31)
+#define   DSI_RX_DATA_OR_BTA_TERMINATED		(1 << 30)
+#define   DSI_TX_DATA				(1 << 29)
+#define   DSI_ULPS_ENTRY_DONE			(1 << 28)
+#define   DSI_NON_TE_TRIGGER_RECEIVED		(1 << 27)
+#define   DSI_HOST_CHKSUM_ERROR			(1 << 26)
+#define   DSI_HOST_MULTI_ECC_ERROR		(1 << 25)
+#define   DSI_HOST_SINGL_ECC_ERROR		(1 << 24)
+#define   DSI_HOST_CONTENTION_DETECTED		(1 << 23)
+#define   DSI_HOST_FALSE_CONTROL_ERROR		(1 << 22)
+#define   DSI_HOST_TIMEOUT_ERROR		(1 << 21)
+#define   DSI_HOST_LOW_POWER_TX_SYNC_ERROR	(1 << 20)
+#define   DSI_HOST_ESCAPE_MODE_ENTRY_ERROR	(1 << 19)
+#define   DSI_FRAME_UPDATE_DONE			(1 << 16)
+#define   DSI_PROTOCOL_VIOLATION_REPORTED	(1 << 15)
+#define   DSI_INVALID_TX_LENGTH			(1 << 13)
+#define   DSI_INVALID_VC			(1 << 12)
+#define   DSI_INVALID_DATA_TYPE			(1 << 11)
+#define   DSI_PERIPHERAL_CHKSUM_ERROR		(1 << 10)
+#define   DSI_PERIPHERAL_MULTI_ECC_ERROR	(1 << 9)
+#define   DSI_PERIPHERAL_SINGLE_ECC_ERROR	(1 << 8)
+#define   DSI_PERIPHERAL_CONTENTION_DETECTED	(1 << 7)
+#define   DSI_PERIPHERAL_FALSE_CTRL_ERROR	(1 << 6)
+#define   DSI_PERIPHERAL_TIMEOUT_ERROR		(1 << 5)
+#define   DSI_PERIPHERAL_LP_TX_SYNC_ERROR	(1 << 4)
+#define   DSI_PERIPHERAL_ESC_MODE_ENTRY_CMD_ERR	(1 << 3)
+#define   DSI_EOT_SYNC_ERROR			(1 << 2)
+#define   DSI_SOT_SYNC_ERROR			(1 << 1)
+#define   DSI_SOT_ERROR				(1 << 0)
+
 /* Gen4+ Timestamp and Pipe Frame time stamp registers */
 #define GEN4_TIMESTAMP		_MMIO(0x2358)
 #define ILK_TIMESTAMP_HI	_MMIO(0x70070)
@@ -10943,6 +11450,7 @@ enum skl_power_gate {
 #define  CMD_MODE_TE_GATE		(0x1 << 28)
 #define  VIDEO_MODE_SYNC_EVENT		(0x2 << 28)
 #define  VIDEO_MODE_SYNC_PULSE		(0x3 << 28)
+#define  TE_SOURCE_GPIO			(1 << 27)
 #define  LINK_READY			(1 << 20)
 #define  PIX_FMT_MASK			(0x3 << 16)
 #define  PIX_FMT_SHIFT			16
@@ -11195,13 +11703,18 @@ enum skl_power_gate {
 /* MOCS (Memory Object Control State) registers */
 #define GEN9_LNCFCMOCS(i)	_MMIO(0xb020 + (i) * 4)	/* L3 Cache Control */
 
-#define GEN9_GFX_MOCS(i)	_MMIO(0xc800 + (i) * 4)	/* Graphics MOCS registers */
-#define GEN9_MFX0_MOCS(i)	_MMIO(0xc900 + (i) * 4)	/* Media 0 MOCS registers */
-#define GEN9_MFX1_MOCS(i)	_MMIO(0xca00 + (i) * 4)	/* Media 1 MOCS registers */
-#define GEN9_VEBOX_MOCS(i)	_MMIO(0xcb00 + (i) * 4)	/* Video MOCS registers */
-#define GEN9_BLT_MOCS(i)	_MMIO(0xcc00 + (i) * 4)	/* Blitter MOCS registers */
-/* Media decoder 2 MOCS registers */
-#define GEN11_MFX2_MOCS(i)	_MMIO(0x10000 + (i) * 4)
+#define __GEN9_RCS0_MOCS0	0xc800
+#define GEN9_GFX_MOCS(i)	_MMIO(__GEN9_RCS0_MOCS0 + (i) * 4)
+#define __GEN9_VCS0_MOCS0	0xc900
+#define GEN9_MFX0_MOCS(i)	_MMIO(__GEN9_VCS0_MOCS0 + (i) * 4)
+#define __GEN9_VCS1_MOCS0	0xca00
+#define GEN9_MFX1_MOCS(i)	_MMIO(__GEN9_VCS1_MOCS0 + (i) * 4)
+#define __GEN9_VECS0_MOCS0	0xcb00
+#define GEN9_VEBOX_MOCS(i)	_MMIO(__GEN9_VECS0_MOCS0 + (i) * 4)
+#define __GEN9_BCS0_MOCS0	0xcc00
+#define GEN9_BLT_MOCS(i)	_MMIO(__GEN9_BCS0_MOCS0 + (i) * 4)
+#define __GEN11_VCS2_MOCS0	0x10000
+#define GEN11_MFX2_MOCS(i)	_MMIO(__GEN11_VCS2_MOCS0 + (i) * 4)
 
 #define GEN10_SCRATCH_LNCF2		_MMIO(0xb0a0)
 #define   PMFLUSHDONE_LNICRSDROP	(1 << 20)
@@ -11537,16 +12050,31 @@ enum skl_power_gate {
 
 #define PORT_TX_DFLEXDPSP(fia)			_MMIO_FIA((fia), 0x008A0)
 #define   MODULAR_FIA_MASK			(1 << 4)
-#define   TC_LIVE_STATE_TBT(tc_port)		(1 << ((tc_port) * 8 + 6))
-#define   TC_LIVE_STATE_TC(tc_port)		(1 << ((tc_port) * 8 + 5))
-#define   DP_LANE_ASSIGNMENT_SHIFT(tc_port)	((tc_port) * 8)
-#define   DP_LANE_ASSIGNMENT_MASK(tc_port)	(0xf << ((tc_port) * 8))
-#define   DP_LANE_ASSIGNMENT(tc_port, x)	((x) << ((tc_port) * 8))
+#define   TC_LIVE_STATE_TBT(idx)		(1 << ((idx) * 8 + 6))
+#define   TC_LIVE_STATE_TC(idx)			(1 << ((idx) * 8 + 5))
+#define   DP_LANE_ASSIGNMENT_SHIFT(idx)		((idx) * 8)
+#define   DP_LANE_ASSIGNMENT_MASK(idx)		(0xf << ((idx) * 8))
+#define   DP_LANE_ASSIGNMENT(idx, x)		((x) << ((idx) * 8))
 
 #define PORT_TX_DFLEXDPPMS(fia)			_MMIO_FIA((fia), 0x00890)
-#define   DP_PHY_MODE_STATUS_COMPLETED(tc_port)		(1 << (tc_port))
+#define   DP_PHY_MODE_STATUS_COMPLETED(idx)	(1 << (idx))
 
 #define PORT_TX_DFLEXDPCSSS(fia)		_MMIO_FIA((fia), 0x00894)
-#define   DP_PHY_MODE_STATUS_NOT_SAFE(tc_port)		(1 << (tc_port))
+#define   DP_PHY_MODE_STATUS_NOT_SAFE(idx)	(1 << (idx))
+
+#define PORT_TX_DFLEXPA1(fia)			_MMIO_FIA((fia), 0x00880)
+#define   DP_PIN_ASSIGNMENT_SHIFT(idx)		((idx) * 4)
+#define   DP_PIN_ASSIGNMENT_MASK(idx)		(0xf << ((idx) * 4))
+#define   DP_PIN_ASSIGNMENT(idx, x)		((x) << ((idx) * 4))
+
+/* This register controls the Display State Buffer (DSB) engines. */
+#define _DSBSL_INSTANCE_BASE		0x70B00
+#define DSBSL_INSTANCE(pipe, id)	(_DSBSL_INSTANCE_BASE + \
+					 (pipe) * 0x1000 + (id) * 0x100)
+#define DSB_HEAD(pipe, id)		_MMIO(DSBSL_INSTANCE(pipe, id) + 0x0)
+#define DSB_TAIL(pipe, id)		_MMIO(DSBSL_INSTANCE(pipe, id) + 0x4)
+#define DSB_CTRL(pipe, id)		_MMIO(DSBSL_INSTANCE(pipe, id) + 0x8)
+#define   DSB_ENABLE			(1 << 31)
+#define   DSB_STATUS			(1 << 0)
 
 #endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 1c5506822dc7..be185886e4fc 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -31,6 +31,8 @@
 
 #include "gem/i915_gem_context.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
+#include "gt/intel_rps.h"
 
 #include "i915_active.h"
 #include "i915_drv.h"
@@ -55,11 +57,13 @@ static struct i915_global_request {
 
 static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 {
-	return "i915";
+	return dev_name(to_request(fence)->i915->drm.dev);
 }
 
 static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
 {
+	const struct i915_gem_context *ctx;
+
 	/*
 	 * The timeline struct (as part of the ppgtt underneath a context)
 	 * may be freed when the request is no longer in use by the GPU.
@@ -72,7 +76,11 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		return "signaled";
 
-	return to_request(fence)->gem_context->name ?: "[i915]";
+	ctx = i915_request_gem_context(to_request(fence));
+	if (!ctx)
+		return "[" DRIVER_NAME "]";
+
+	return ctx->name;
 }
 
 static bool i915_fence_signaled(struct dma_fence *fence)
@@ -169,23 +177,24 @@ remove_from_client(struct i915_request *request)
 {
 	struct drm_i915_file_private *file_priv;
 
-	file_priv = READ_ONCE(request->file_priv);
-	if (!file_priv)
+	if (!READ_ONCE(request->file_priv))
 		return;
 
-	spin_lock(&file_priv->mm.lock);
-	if (request->file_priv) {
+	rcu_read_lock();
+	file_priv = xchg(&request->file_priv, NULL);
+	if (file_priv) {
+		spin_lock(&file_priv->mm.lock);
 		list_del(&request->client_link);
-		request->file_priv = NULL;
+		spin_unlock(&file_priv->mm.lock);
 	}
-	spin_unlock(&file_priv->mm.lock);
+	rcu_read_unlock();
 }
 
 static void free_capture_list(struct i915_request *request)
 {
 	struct i915_capture_list *capture;
 
-	capture = request->capture_list;
+	capture = fetch_and_zero(&request->capture_list);
 	while (capture) {
 		struct i915_capture_list *next = capture->next;
 
@@ -205,28 +214,22 @@ static void remove_from_engine(struct i915_request *rq)
 	 * check that the rq still belongs to the newly locked engine.
 	 */
 	locked = READ_ONCE(rq->engine);
-	spin_lock(&locked->active.lock);
+	spin_lock_irq(&locked->active.lock);
 	while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
 		spin_unlock(&locked->active.lock);
 		spin_lock(&engine->active.lock);
 		locked = engine;
 	}
-	list_del(&rq->sched.link);
-	spin_unlock(&locked->active.lock);
+	list_del_init(&rq->sched.link);
+	spin_unlock_irq(&locked->active.lock);
 }
 
-static bool i915_request_retire(struct i915_request *rq)
+bool i915_request_retire(struct i915_request *rq)
 {
-	struct i915_active_request *active, *next;
-
-	lockdep_assert_held(&rq->timeline->mutex);
 	if (!i915_request_completed(rq))
 		return false;
 
-	GEM_TRACE("%s fence %llx:%lld, current %d\n",
-		  rq->engine->name,
-		  rq->fence.context, rq->fence.seqno,
-		  hwsp_seqno(rq));
+	RQ_TRACE(rq, "\n");
 
 	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
 	trace_i915_request_retire(rq);
@@ -240,41 +243,11 @@ static bool i915_request_retire(struct i915_request *rq)
 	 * Note this requires that we are always called in request
 	 * completion order.
 	 */
-	GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
+	GEM_BUG_ON(!list_is_first(&rq->link,
+				  &i915_request_timeline(rq)->requests));
 	rq->ring->head = rq->postfix;
 
 	/*
-	 * Walk through the active list, calling retire on each. This allows
-	 * objects to track their GPU activity and mark themselves as idle
-	 * when their *last* active request is completed (updating state
-	 * tracking lists for eviction, active references for GEM, etc).
-	 *
-	 * As the ->retire() may free the node, we decouple it first and
-	 * pass along the auxiliary information (to avoid dereferencing
-	 * the node after the callback).
-	 */
-	list_for_each_entry_safe(active, next, &rq->active_list, link) {
-		/*
-		 * In microbenchmarks or focusing upon time inside the kernel,
-		 * we may spend an inordinate amount of time simply handling
-		 * the retirement of requests and processing their callbacks.
-		 * Of which, this loop itself is particularly hot due to the
-		 * cache misses when jumping around the list of
-		 * i915_active_request.  So we try to keep this loop as
-		 * streamlined as possible and also prefetch the next
-		 * i915_active_request to try and hide the likely cache miss.
-		 */
-		prefetchw(next);
-
-		INIT_LIST_HEAD(&active->link);
-		RCU_INIT_POINTER(active->request, NULL);
-
-		active->retire(active, rq);
-	}
-
-	local_irq_disable();
-
-	/*
 	 * We only loosely track inflight requests across preemption,
 	 * and so we may find ourselves attempting to retire a _completed_
 	 * request that we have removed from the HW and put back on a run
@@ -282,30 +255,28 @@ static bool i915_request_retire(struct i915_request *rq)
 	 */
 	remove_from_engine(rq);
 
-	spin_lock(&rq->lock);
+	spin_lock_irq(&rq->lock);
 	i915_request_mark_complete(rq);
 	if (!i915_request_signaled(rq))
 		dma_fence_signal_locked(&rq->fence);
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
 		i915_request_cancel_breadcrumb(rq);
 	if (i915_request_has_waitboost(rq)) {
-		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
-		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
+		GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
+		atomic_dec(&rq->engine->gt->rps.num_waiters);
 	}
 	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
 		set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
 		__notify_execute_cb(rq);
 	}
 	GEM_BUG_ON(!list_empty(&rq->execute_cb));
-	spin_unlock(&rq->lock);
-
-	local_irq_enable();
+	spin_unlock_irq(&rq->lock);
 
 	remove_from_client(rq);
 	list_del(&rq->link);
 
-	intel_context_exit(rq->hw_context);
-	intel_context_unpin(rq->hw_context);
+	intel_context_exit(rq->context);
+	intel_context_unpin(rq->context);
 
 	free_capture_list(rq);
 	i915_sched_node_fini(&rq->sched);
@@ -316,15 +287,11 @@ static bool i915_request_retire(struct i915_request *rq)
 
 void i915_request_retire_upto(struct i915_request *rq)
 {
-	struct intel_timeline * const tl = rq->timeline;
+	struct intel_timeline * const tl = i915_request_timeline(rq);
 	struct i915_request *tmp;
 
-	GEM_TRACE("%s fence %llx:%lld, current %d\n",
-		  rq->engine->name,
-		  rq->fence.context, rq->fence.seqno,
-		  hwsp_seqno(rq));
+	RQ_TRACE(rq, "\n");
 
-	lockdep_assert_held(&tl->mutex);
 	GEM_BUG_ON(!i915_request_completed(rq));
 
 	do {
@@ -333,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq)
 }
 
 static int
-__i915_request_await_execution(struct i915_request *rq,
-			       struct i915_request *signal,
-			       void (*hook)(struct i915_request *rq,
-					    struct dma_fence *signal),
-			       gfp_t gfp)
+__await_execution(struct i915_request *rq,
+		  struct i915_request *signal,
+		  void (*hook)(struct i915_request *rq,
+			       struct dma_fence *signal),
+		  gfp_t gfp)
 {
 	struct execute_cb *cb;
 
@@ -374,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq,
 	}
 	spin_unlock_irq(&signal->lock);
 
+	/* Copy across semaphore status as we need the same behaviour */
+	rq->sched.flags |= signal->sched.flags;
 	return 0;
 }
 
@@ -382,10 +351,7 @@ bool __i915_request_submit(struct i915_request *request)
 	struct intel_engine_cs *engine = request->engine;
 	bool result = false;
 
-	GEM_TRACE("%s fence %llx:%lld, current %d\n",
-		  engine->name,
-		  request->fence.context, request->fence.seqno,
-		  hwsp_seqno(request));
+	RQ_TRACE(request, "\n");
 
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&engine->active.lock);
@@ -409,7 +375,7 @@ bool __i915_request_submit(struct i915_request *request)
 	if (i915_request_completed(request))
 		goto xfer;
 
-	if (i915_gem_context_is_banned(request->gem_context))
+	if (intel_context_is_banned(request->context))
 		i915_request_skip(request, -EIO);
 
 	/*
@@ -448,7 +414,7 @@ xfer:	/* We may be recursing from the signal callback of another i915 fence */
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
 	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
 	    !i915_request_enable_breadcrumb(request))
-		intel_engine_queue_breadcrumbs(engine);
+		intel_engine_signal_breadcrumbs(engine);
 
 	__notify_execute_cb(request);
 
@@ -474,10 +440,7 @@ void __i915_request_unsubmit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 
-	GEM_TRACE("%s fence %llx:%lld, current %d\n",
-		  engine->name,
-		  request->fence.context, request->fence.seqno,
-		  hwsp_seqno(request));
+	RQ_TRACE(request, "\n");
 
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&engine->active.lock);
@@ -619,6 +582,21 @@ out:
 	return kmem_cache_alloc(global.slab_requests, gfp);
 }
 
+static void __i915_request_ctor(void *arg)
+{
+	struct i915_request *rq = arg;
+
+	spin_lock_init(&rq->lock);
+	i915_sched_node_init(&rq->sched);
+	i915_sw_fence_init(&rq->submit, submit_notify);
+	i915_sw_fence_init(&rq->semaphore, semaphore_notify);
+
+	rq->file_priv = NULL;
+	rq->capture_list = NULL;
+
+	INIT_LIST_HEAD(&rq->execute_cb);
+}
+
 struct i915_request *
 __i915_request_create(struct intel_context *ce, gfp_t gfp)
 {
@@ -676,34 +654,31 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 		goto err_free;
 
 	rq->i915 = ce->engine->i915;
-	rq->hw_context = ce;
-	rq->gem_context = ce->gem_context;
+	rq->context = ce;
 	rq->engine = ce->engine;
 	rq->ring = ce->ring;
-	rq->timeline = tl;
+	rq->execution_mask = ce->engine->mask;
+
+	RCU_INIT_POINTER(rq->timeline, tl);
+	RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
 	rq->hwsp_seqno = tl->hwsp_seqno;
-	rq->hwsp_cacheline = tl->hwsp_cacheline;
+
 	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
 
-	spin_lock_init(&rq->lock);
 	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
 		       tl->fence_context, seqno);
 
 	/* We bump the ref for the fence chain */
-	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
-	i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
+	i915_sw_fence_reinit(&i915_request_get(rq)->submit);
+	i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
 
-	i915_sched_node_init(&rq->sched);
+	i915_sched_node_reinit(&rq->sched);
 
-	/* No zalloc, must clear what we need by hand */
-	rq->file_priv = NULL;
+	/* No zalloc, everything must be cleared after use */
 	rq->batch = NULL;
-	rq->capture_list = NULL;
-	rq->flags = 0;
-	rq->execution_mask = ALL_ENGINES;
-
-	INIT_LIST_HEAD(&rq->active_list);
-	INIT_LIST_HEAD(&rq->execute_cb);
+	GEM_BUG_ON(rq->file_priv);
+	GEM_BUG_ON(rq->capture_list);
+	GEM_BUG_ON(!list_empty(&rq->execute_cb));
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
@@ -741,7 +716,6 @@ err_unwind:
 	ce->ring->emit = rq->head;
 
 	/* Make sure we didn't add ourselves to external state before freeing */
-	GEM_BUG_ON(!list_empty(&rq->active_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
@@ -786,16 +760,46 @@ err_unlock:
 static int
 i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
 {
-	if (list_is_first(&signal->link, &signal->timeline->requests))
-		return 0;
+	struct dma_fence *fence;
+	int err;
+
+	GEM_BUG_ON(i915_request_timeline(rq) ==
+		   rcu_access_pointer(signal->timeline));
+
+	fence = NULL;
+	rcu_read_lock();
+	spin_lock_irq(&signal->lock);
+	if (!i915_request_started(signal) &&
+	    !list_is_first(&signal->link,
+			   &rcu_dereference(signal->timeline)->requests)) {
+		struct i915_request *prev = list_prev_entry(signal, link);
 
-	signal = list_prev_entry(signal, link);
-	if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
+		/*
+		 * Peek at the request before us in the timeline. That
+		 * request will only be valid before it is retired, so
+		 * after acquiring a reference to it, confirm that it is
+		 * still part of the signaler's timeline.
+		 */
+		if (i915_request_get_rcu(prev)) {
+			if (list_next_entry(prev, link) == signal)
+				fence = &prev->fence;
+			else
+				i915_request_put(prev);
+		}
+	}
+	spin_unlock_irq(&signal->lock);
+	rcu_read_unlock();
+	if (!fence)
 		return 0;
 
-	return i915_sw_fence_await_dma_fence(&rq->submit,
-					     &signal->fence, 0,
-					     I915_FENCE_GFP);
+	err = 0;
+	if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
+		err = i915_sw_fence_await_dma_fence(&rq->submit,
+						    fence, 0,
+						    I915_FENCE_GFP);
+	dma_fence_put(fence);
+
+	return err;
 }
 
 static intel_engine_mask_t
@@ -817,38 +821,27 @@ already_busywaiting(struct i915_request *rq)
 }
 
 static int
-emit_semaphore_wait(struct i915_request *to,
-		    struct i915_request *from,
-		    gfp_t gfp)
+__emit_semaphore_wait(struct i915_request *to,
+		      struct i915_request *from,
+		      u32 seqno)
 {
+	const int has_token = INTEL_GEN(to->i915) >= 12;
 	u32 hwsp_offset;
+	int len, err;
 	u32 *cs;
-	int err;
 
-	GEM_BUG_ON(!from->timeline->has_initial_breadcrumb);
 	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
 
-	/* Just emit the first semaphore we see as request space is limited. */
-	if (already_busywaiting(to) & from->engine->mask)
-		return i915_sw_fence_await_dma_fence(&to->submit,
-						     &from->fence, 0,
-						     I915_FENCE_GFP);
-
-	err = i915_request_await_start(to, from);
-	if (err < 0)
-		return err;
-
-	/* Only submit our spinner after the signaler is running! */
-	err = __i915_request_await_execution(to, from, NULL, gfp);
-	if (err)
-		return err;
-
 	/* We need to pin the signaler's HWSP until we are finished reading. */
 	err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
 	if (err)
 		return err;
 
-	cs = intel_ring_begin(to, 4);
+	len = 4;
+	if (has_token)
+		len += 2;
+
+	cs = intel_ring_begin(to, len);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
@@ -860,18 +853,50 @@ emit_semaphore_wait(struct i915_request *to,
 	 * (post-wrap) values than they were expecting (and so wait
 	 * forever).
 	 */
-	*cs++ = MI_SEMAPHORE_WAIT |
-		MI_SEMAPHORE_GLOBAL_GTT |
-		MI_SEMAPHORE_POLL |
-		MI_SEMAPHORE_SAD_GTE_SDD;
-	*cs++ = from->fence.seqno;
+	*cs++ = (MI_SEMAPHORE_WAIT |
+		 MI_SEMAPHORE_GLOBAL_GTT |
+		 MI_SEMAPHORE_POLL |
+		 MI_SEMAPHORE_SAD_GTE_SDD) +
+		has_token;
+	*cs++ = seqno;
 	*cs++ = hwsp_offset;
 	*cs++ = 0;
+	if (has_token) {
+		*cs++ = 0;
+		*cs++ = MI_NOOP;
+	}
 
 	intel_ring_advance(to, cs);
+	return 0;
+}
+
+static int
+emit_semaphore_wait(struct i915_request *to,
+		    struct i915_request *from,
+		    gfp_t gfp)
+{
+	/* Just emit the first semaphore we see as request space is limited. */
+	if (already_busywaiting(to) & from->engine->mask)
+		goto await_fence;
+
+	if (i915_request_await_start(to, from) < 0)
+		goto await_fence;
+
+	/* Only submit our spinner after the signaler is running! */
+	if (__await_execution(to, from, NULL, gfp))
+		goto await_fence;
+
+	if (__emit_semaphore_wait(to, from, from->fence.seqno))
+		goto await_fence;
+
 	to->sched.semaphores |= from->engine->mask;
 	to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
 	return 0;
+
+await_fence:
+	return i915_sw_fence_await_dma_fence(&to->submit,
+					     &from->fence, 0,
+					     I915_FENCE_GFP);
 }
 
 static int
@@ -891,18 +916,16 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 			return ret;
 	}
 
-	if (to->engine == from->engine) {
+	if (to->engine == from->engine)
 		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
 						       &from->submit,
 						       I915_FENCE_GFP);
-	} else if (intel_engine_has_semaphores(to->engine) &&
-		   to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) {
+	else if (intel_context_use_semaphores(to->context))
 		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
-	} else {
+	else
 		ret = i915_sw_fence_await_dma_fence(&to->submit,
 						    &from->fence, 0,
 						    I915_FENCE_GFP);
-	}
 	if (ret < 0)
 		return ret;
 
@@ -942,8 +965,10 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 
 	do {
 		fence = *child++;
-		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+			i915_sw_fence_set_error_once(&rq->submit, fence->error);
 			continue;
+		}
 
 		/*
 		 * Requests on the same timeline are explicitly ordered, along
@@ -955,26 +980,79 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 
 		/* Squash repeated waits to the same timelines */
 		if (fence->context &&
-		    intel_timeline_sync_is_later(rq->timeline, fence))
+		    intel_timeline_sync_is_later(i915_request_timeline(rq),
+						 fence))
 			continue;
 
 		if (dma_fence_is_i915(fence))
 			ret = i915_request_await_request(rq, to_request(fence));
 		else
 			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
-							    I915_FENCE_TIMEOUT,
+							    fence->context ? I915_FENCE_TIMEOUT : 0,
 							    I915_FENCE_GFP);
 		if (ret < 0)
 			return ret;
 
 		/* Record the latest fence used against each timeline */
 		if (fence->context)
-			intel_timeline_sync_set(rq->timeline, fence);
+			intel_timeline_sync_set(i915_request_timeline(rq),
+						fence);
 	} while (--nchild);
 
 	return 0;
 }
 
+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
+					  struct dma_fence *fence)
+{
+	return __intel_timeline_sync_is_later(tl,
+					      fence->context,
+					      fence->seqno - 1);
+}
+
+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
+					 const struct dma_fence *fence)
+{
+	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
+}
+
+static int
+__i915_request_await_execution(struct i915_request *to,
+			       struct i915_request *from,
+			       void (*hook)(struct i915_request *rq,
+					    struct dma_fence *signal))
+{
+	int err;
+
+	/* Submit both requests at the same time */
+	err = __await_execution(to, from, hook, I915_FENCE_GFP);
+	if (err)
+		return err;
+
+	/* Squash repeated depenendices to the same timelines */
+	if (intel_timeline_sync_has_start(i915_request_timeline(to),
+					  &from->fence))
+		return 0;
+
+	/* Ensure both start together [after all semaphores in signal] */
+	if (intel_engine_has_semaphores(to->engine))
+		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+	else
+		err = i915_request_await_start(to, from);
+	if (err < 0)
+		return err;
+
+	/* Couple the dependency tree for PI on this exposed to->fence */
+	if (to->engine->schedule) {
+		err = i915_sched_node_add_dependency(&to->sched, &from->sched);
+		if (err < 0)
+			return err;
+	}
+
+	return intel_timeline_sync_set_start(i915_request_timeline(to),
+					     &from->fence);
+}
+
 int
 i915_request_await_execution(struct i915_request *rq,
 			     struct dma_fence *fence,
@@ -997,8 +1075,10 @@ i915_request_await_execution(struct i915_request *rq,
 
 	do {
 		fence = *child++;
-		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+			i915_sw_fence_set_error_once(&rq->submit, fence->error);
 			continue;
+		}
 
 		/*
 		 * We don't squash repeated fence dependencies here as we
@@ -1008,8 +1088,7 @@ i915_request_await_execution(struct i915_request *rq,
 		if (dma_fence_is_i915(fence))
 			ret = __i915_request_await_execution(rq,
 							     to_request(fence),
-							     hook,
-							     I915_FENCE_GFP);
+							     hook);
 		else
 			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
 							    I915_FENCE_TIMEOUT,
@@ -1111,7 +1190,7 @@ void i915_request_skip(struct i915_request *rq, int error)
 static struct i915_request *
 __i915_request_add_to_timeline(struct i915_request *rq)
 {
-	struct intel_timeline *timeline = rq->timeline;
+	struct intel_timeline *timeline = i915_request_timeline(rq);
 	struct i915_request *prev;
 
 	/*
@@ -1134,8 +1213,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * precludes optimising to use semaphores serialisation of a single
 	 * timeline across engines.
 	 */
-	prev = rcu_dereference_protected(timeline->last_request.request,
-					 lockdep_is_held(&timeline->mutex));
+	prev = to_request(__i915_active_fence_set(&timeline->last_request,
+						  &rq->fence));
 	if (prev && !i915_request_completed(prev)) {
 		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
 			i915_sw_fence_await_sw_fence(&rq->submit,
@@ -1160,7 +1239,6 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * us, the timeline will hold its seqno which is later than ours.
 	 */
 	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
-	__i915_active_request_set(&timeline->last_request, rq);
 
 	return prev;
 }
@@ -1176,8 +1254,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	struct intel_ring *ring = rq->ring;
 	u32 *cs;
 
-	GEM_TRACE("%s fence %llx:%lld\n",
-		  engine->name, rq->fence.context, rq->fence.seqno);
+	RQ_TRACE(rq, "\n");
 
 	/*
 	 * To ensure that this call will not fail, space for its emissions
@@ -1223,8 +1300,8 @@ void __i915_request_queue(struct i915_request *rq,
 
 void i915_request_add(struct i915_request *rq)
 {
-	struct i915_sched_attr attr = rq->gem_context->sched;
-	struct intel_timeline * const tl = rq->timeline;
+	struct intel_timeline * const tl = i915_request_timeline(rq);
+	struct i915_sched_attr attr = {};
 	struct i915_request *prev;
 
 	lockdep_assert_held(&tl->mutex);
@@ -1234,6 +1311,9 @@ void i915_request_add(struct i915_request *rq)
 
 	prev = __i915_request_commit(rq);
 
+	if (rcu_access_pointer(rq->context->gem_context))
+		attr = i915_request_gem_context(rq)->sched;
+
 	/*
 	 * Boost actual workloads past semaphores!
 	 *
@@ -1279,7 +1359,9 @@ void i915_request_add(struct i915_request *rq)
 	 * work on behalf of others -- but instead we should benefit from
 	 * improved resource management. (Well, that's the theory at least.)
 	 */
-	if (prev && i915_request_completed(prev) && prev->timeline == tl)
+	if (prev &&
+	    i915_request_completed(prev) &&
+	    rcu_access_pointer(prev->timeline) == tl)
 		i915_request_retire_upto(prev);
 
 	mutex_unlock(&tl->mutex);
@@ -1442,7 +1524,7 @@ long i915_request_wait(struct i915_request *rq,
 	 * completion. That requires having a good predictor for the request
 	 * duration, which we currently lack.
 	 */
-	if (CONFIG_DRM_I915_SPIN_REQUEST &&
+	if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) &&
 	    __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
 		dma_fence_signal(&rq->fence);
 		goto out;
@@ -1462,7 +1544,7 @@ long i915_request_wait(struct i915_request *rq,
 	 */
 	if (flags & I915_WAIT_PRIORITY) {
 		if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
-			gen6_rps_boost(rq);
+			intel_rps_boost(rq);
 		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 	}
 
@@ -1488,6 +1570,7 @@ long i915_request_wait(struct i915_request *rq,
 			break;
 		}
 
+		intel_engine_flush_submission(rq->engine);
 		timeout = io_schedule_timeout(timeout);
 	}
 	__set_current_state(TASK_RUNNING);
@@ -1495,53 +1578,11 @@ long i915_request_wait(struct i915_request *rq,
 	dma_fence_remove_callback(&rq->fence, &wait.cb);
 
 out:
-	mutex_release(&rq->engine->gt->reset.mutex.dep_map, 0, _THIS_IP_);
+	mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
 	trace_i915_request_wait_end(rq);
 	return timeout;
 }
 
-bool i915_retire_requests(struct drm_i915_private *i915)
-{
-	struct intel_gt_timelines *timelines = &i915->gt.timelines;
-	struct intel_timeline *tl, *tn;
-	unsigned long flags;
-	LIST_HEAD(free);
-
-	spin_lock_irqsave(&timelines->lock, flags);
-	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
-		if (!mutex_trylock(&tl->mutex))
-			continue;
-
-		intel_timeline_get(tl);
-		GEM_BUG_ON(!tl->active_count);
-		tl->active_count++; /* pin the list element */
-		spin_unlock_irqrestore(&timelines->lock, flags);
-
-		retire_requests(tl);
-
-		spin_lock_irqsave(&timelines->lock, flags);
-
-		/* Resume iteration after dropping lock */
-		list_safe_reset_next(tl, tn, link);
-		if (!--tl->active_count)
-			list_del(&tl->link);
-
-		mutex_unlock(&tl->mutex);
-
-		/* Defer the final release to after the spinlock */
-		if (refcount_dec_and_test(&tl->kref.refcount)) {
-			GEM_BUG_ON(tl->active_count);
-			list_add(&tl->link, &free);
-		}
-	}
-	spin_unlock_irqrestore(&timelines->lock, flags);
-
-	list_for_each_entry_safe(tl, tn, &free, link)
-		__intel_timeline_free(&tl->kref);
-
-	return !list_empty(&timelines->active_list);
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_request.c"
 #include "selftests/i915_request.c"
@@ -1568,10 +1609,14 @@ static struct i915_global_request global = { {
 
 int __init i915_global_request_init(void)
 {
-	global.slab_requests = KMEM_CACHE(i915_request,
-					  SLAB_HWCACHE_ALIGN |
-					  SLAB_RECLAIM_ACCOUNT |
-					  SLAB_TYPESAFE_BY_RCU);
+	global.slab_requests =
+		kmem_cache_create("i915_request",
+				  sizeof(struct i915_request),
+				  __alignof__(struct i915_request),
+				  SLAB_HWCACHE_ALIGN |
+				  SLAB_RECLAIM_ACCOUNT |
+				  SLAB_TYPESAFE_BY_RCU,
+				  __i915_request_ctor);
 	if (!global.slab_requests)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index e4dd013761e8..031433691a06 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -28,8 +28,10 @@
 #include <linux/dma-fence.h>
 #include <linux/lockdep.h>
 
+#include "gem/i915_gem_context_types.h"
 #include "gt/intel_context_types.h"
 #include "gt/intel_engine_types.h"
+#include "gt/intel_timeline_types.h"
 
 #include "i915_gem.h"
 #include "i915_scheduler.h"
@@ -41,14 +43,19 @@
 struct drm_file;
 struct drm_i915_gem_object;
 struct i915_request;
-struct intel_timeline;
-struct intel_timeline_cacheline;
 
 struct i915_capture_list {
 	struct i915_capture_list *next;
 	struct i915_vma *vma;
 };
 
+#define RQ_TRACE(rq, fmt, ...) do {					\
+	const struct i915_request *rq__ = (rq);				\
+	ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt,	\
+		     rq__->fence.context, rq__->fence.seqno,		\
+		     hwsp_seqno(rq__), ##__VA_ARGS__);			\
+} while (0)
+
 enum {
 	/*
 	 * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW.
@@ -70,6 +77,38 @@ enum {
 	 * a request is on the various signal_list.
 	 */
 	I915_FENCE_FLAG_SIGNAL,
+
+	/*
+	 * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
+	 *
+	 * The execution of some requests should not be interrupted. This is
+	 * a sensitive operation as it makes the request super important,
+	 * blocking other higher priority work. Abuse of this flag will
+	 * lead to quality of service issues.
+	 */
+	I915_FENCE_FLAG_NOPREEMPT,
+
+	/*
+	 * I915_FENCE_FLAG_SENTINEL - this request should be last in the queue
+	 *
+	 * A high priority sentinel request may be submitted to clear the
+	 * submission queue. As it will be the only request in-flight, upon
+	 * execution all other active requests will have been preempted and
+	 * unsubmitted. This preemptive pulse is used to re-evaluate the
+	 * in-flight requests, particularly in cases where an active context
+	 * is banned and those active requests need to be cancelled.
+	 */
+	I915_FENCE_FLAG_SENTINEL,
+
+	/*
+	 * I915_FENCE_FLAG_BOOST - upclock the gpu for this request
+	 *
+	 * Some requests are more important than others! In particular, a
+	 * request that the user is waiting on is typically required for
+	 * interactive latency, for which we want to minimise by upclocking
+	 * the GPU. Here we track such boost requests on a per-request basis.
+	 */
+	I915_FENCE_FLAG_BOOST,
 };
 
 /**
@@ -109,11 +148,10 @@ struct i915_request {
 	 * i915_request_free() will then decrement the refcount on the
 	 * context.
 	 */
-	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
-	struct intel_context *hw_context;
+	struct intel_context *context;
 	struct intel_ring *ring;
-	struct intel_timeline *timeline;
+	struct intel_timeline __rcu *timeline;
 	struct list_head signal_link;
 
 	/*
@@ -144,6 +182,10 @@ struct i915_request {
 	union {
 		wait_queue_entry_t submitq;
 		struct i915_sw_dma_fence_cb dmaq;
+		struct i915_request_duration_cb {
+			struct dma_fence_cb cb;
+			ktime_t emitted;
+		} duration;
 	};
 	struct list_head execute_cb;
 	struct i915_sw_fence semaphore;
@@ -176,7 +218,7 @@ struct i915_request {
 	 * inside the timeline's HWSP vma, but it is only valid while this
 	 * request has not completed and guarded by the timeline mutex.
 	 */
-	struct intel_timeline_cacheline *hwsp_cacheline;
+	struct intel_timeline_cacheline __rcu *hwsp_cacheline;
 
 	/** Position in the ring of the start of the request */
 	u32 head;
@@ -211,15 +253,10 @@ struct i915_request {
 	 * on the active_list (of their final request).
 	 */
 	struct i915_capture_list *capture_list;
-	struct list_head active_list;
 
 	/** Time at which this request was emitted, in jiffies. */
 	unsigned long emitted_jiffies;
 
-	unsigned long flags;
-#define I915_REQUEST_WAITBOOST BIT(0)
-#define I915_REQUEST_NOPREEMPT BIT(1)
-
 	/** timeline->request entry for this request */
 	struct list_head link;
 
@@ -251,6 +288,7 @@ struct i915_request *__i915_request_commit(struct i915_request *request);
 void __i915_request_queue(struct i915_request *rq,
 			  const struct i915_sched_attr *attr);
 
+bool i915_request_retire(struct i915_request *rq);
 void i915_request_retire_upto(struct i915_request *rq);
 
 static inline struct i915_request *
@@ -309,10 +347,8 @@ long i915_request_wait(struct i915_request *rq,
 		       long timeout)
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
-#define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
-#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
+#define I915_WAIT_PRIORITY	BIT(1) /* small priority bump for the request */
+#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
 
 static inline bool i915_request_signaled(const struct i915_request *rq)
 {
@@ -433,15 +469,45 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
 
 static inline bool i915_request_has_waitboost(const struct i915_request *rq)
 {
-	return rq->flags & I915_REQUEST_WAITBOOST;
+	return test_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
 }
 
 static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
 {
 	/* Preemption should only be disabled very rarely */
-	return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
+	return unlikely(test_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags));
 }
 
-bool i915_retire_requests(struct drm_i915_private *i915);
+static inline bool i915_request_has_sentinel(const struct i915_request *rq)
+{
+	return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
+}
+
+static inline struct intel_timeline *
+i915_request_timeline(struct i915_request *rq)
+{
+	/* Valid only while the request is being constructed (or retired). */
+	return rcu_dereference_protected(rq->timeline,
+					 lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
+}
+
+static inline struct i915_gem_context *
+i915_request_gem_context(struct i915_request *rq)
+{
+	/* Valid only while the request is being constructed (or retired). */
+	return rcu_dereference_protected(rq->context->gem_context, true);
+}
+
+static inline struct intel_timeline *
+i915_request_active_timeline(struct i915_request *rq)
+{
+	/*
+	 * When in use during submission, we are protected by a guarantee that
+	 * the context/timeline is pinned and must remain pinned until after
+	 * this submission.
+	 */
+	return rcu_dereference_protected(rq->timeline,
+					 lockdep_is_held(&rq->engine->active.lock));
+}
 
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
index 6617963df9ed..b7b59328cb76 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.h
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -67,15 +67,15 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
 }
 
 /**
- * __for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
- * @__dmap:	DMA address (output)
+ * __for_each_sgt_daddr - iterate over the device addresses of the given sg_table
+ * @__dp:	Device address (output)
  * @__iter:	'struct sgt_iter' (iterator state, internal)
  * @__sgt:	sg_table to iterate over (input)
  * @__step:	step size
  */
-#define __for_each_sgt_dma(__dmap, __iter, __sgt, __step)		\
+#define __for_each_sgt_daddr(__dp, __iter, __sgt, __step)		\
 	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
-	     ((__dmap) = (__iter).dma + (__iter).curr);			\
+	     ((__dp) = (__iter).dma + (__iter).curr), (__iter).sgp;	\
 	     (((__iter).curr += (__step)) >= (__iter).max) ?		\
 	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 3eba8a2b39c2..bf87c70bfdd9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -211,12 +211,9 @@ static void kick_submission(struct intel_engine_cs *engine,
 
 	/*
 	 * If we are already the currently executing context, don't
-	 * bother evaluating if we should preempt ourselves, or if
-	 * we expect nothing to change as a result of running the
-	 * tasklet, i.e. we have not change the priority queue
-	 * sufficiently to oust the running context.
+	 * bother evaluating if we should preempt ourselves.
 	 */
-	if (inflight->hw_context == rq->hw_context)
+	if (inflight->context == rq->context)
 		goto unlock;
 
 	engine->execlists.queue_priority_hint = prio;
@@ -390,9 +387,19 @@ void i915_sched_node_init(struct i915_sched_node *node)
 	INIT_LIST_HEAD(&node->signalers_list);
 	INIT_LIST_HEAD(&node->waiters_list);
 	INIT_LIST_HEAD(&node->link);
+
+	i915_sched_node_reinit(node);
+}
+
+void i915_sched_node_reinit(struct i915_sched_node *node)
+{
 	node->attr.priority = I915_PRIORITY_INVALID;
 	node->semaphores = 0;
 	node->flags = 0;
+
+	GEM_BUG_ON(!list_empty(&node->signalers_list));
+	GEM_BUG_ON(!list_empty(&node->waiters_list));
+	GEM_BUG_ON(!list_empty(&node->link));
 }
 
 static struct i915_dependency *
@@ -477,13 +484,13 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 	 * so we may be called out-of-order.
 	 */
 	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
-		GEM_BUG_ON(!node_signaled(dep->signaler));
 		GEM_BUG_ON(!list_empty(&dep->dfs_link));
 
 		list_del(&dep->wait_link);
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
 			i915_dependency_free(dep);
 	}
+	INIT_LIST_HEAD(&node->signalers_list);
 
 	/* Remove ourselves from everyone who depends upon us */
 	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
@@ -494,6 +501,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
 			i915_dependency_free(dep);
 	}
+	INIT_LIST_HEAD(&node->waiters_list);
 
 	spin_unlock_irq(&schedule_lock);
 }
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 7eefccff39bf..d1dc4efef77b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -26,6 +26,7 @@
 					 sched.link)
 
 void i915_sched_node_init(struct i915_sched_node *node);
+void i915_sched_node_reinit(struct i915_sched_node *node);
 
 bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 				      struct i915_sched_node *signal,
@@ -52,22 +53,4 @@ static inline void i915_priolist_free(struct i915_priolist *p)
 		__i915_priolist_free(p);
 }
 
-static inline bool i915_scheduler_need_preempt(int prio, int active)
-{
-	/*
-	 * Allow preemption of low -> normal -> high, but we do
-	 * not allow low priority tasks to preempt other low priority
-	 * tasks under the impression that latency for low priority
-	 * tasks does not matter (as much as background throughput),
-	 * so kiss.
-	 *
-	 * More naturally we would write
-	 *	prio >= max(0, last);
-	 * except that we wish to prevent triggering preemption at the same
-	 * priority level: the task that is running should remain running
-	 * to preserve FIFO ordering of dependencies.
-	 */
-	return prio > max(I915_PRIORITY_NORMAL - 1, active);
-}
-
 #endif /* _I915_SCHEDULER_H_ */
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index aad81acba9dc..d18e70550054 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -49,6 +49,15 @@ struct i915_sched_attr {
  * DAG of each request, we are able to insert it into a sorted queue when it
  * is ready, and are able to reorder its portion of the graph to accommodate
  * dynamic priority changes.
+ *
+ * Ok, there is now one active element to the "scheduler" in the backends.
+ * We let a new context run for a small amount of time before re-evaluating
+ * the run order. As we re-evaluate, we maintain the strict ordering of
+ * dependencies, but attempt to rotate the active contexts (the current context
+ * is put to the back of its priority queue, then reshuffling its dependents).
+ * This provides minimal timeslicing and prevents a userspace hog (e.g.
+ * something waiting on a user semaphore [VkEvent]) from denying service to
+ * others.
  */
 struct i915_sched_node {
 	struct list_head signalers_list; /* those before us, we depend upon */
diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h
index 4d88205de51b..98bcb6fa0ab4 100644
--- a/drivers/gpu/drm/i915/i915_selftest.h
+++ b/drivers/gpu/drm/i915/i915_selftest.h
@@ -36,6 +36,7 @@ struct i915_selftest {
 	char *filter;
 	int mock;
 	int live;
+	int perf;
 };
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -45,6 +46,7 @@ extern struct i915_selftest i915_selftest;
 
 int i915_mock_selftests(void);
 int i915_live_selftests(struct pci_dev *pdev);
+int i915_perf_selftests(struct pci_dev *pdev);
 
 /* We extract the function declarations from i915_mock_selftests.h and
  * i915_live_selftests.h Add your unit test declarations there!
@@ -61,6 +63,7 @@ int i915_live_selftests(struct pci_dev *pdev);
 #undef selftest
 #define selftest(name, func) int func(struct drm_i915_private *i915);
 #include "selftests/i915_live_selftests.h"
+#include "selftests/i915_perf_selftests.h"
 #undef selftest
 
 struct i915_subtest {
@@ -109,6 +112,7 @@ int __i915_subtests(const char *caller,
 
 static inline int i915_mock_selftests(void) { return 0; }
 static inline int i915_live_selftests(struct pci_dev *pdev) { return 0; }
+static inline int i915_perf_selftests(struct pci_dev *pdev) { return 0; }
 
 #define I915_SELFTEST_DECLARE(x)
 #define I915_SELFTEST_ONLY(x) 0
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 8508a01ad8b9..8812cdd9007f 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -28,6 +28,7 @@
 
 #include "display/intel_fbc.h"
 #include "display/intel_gmbus.h"
+#include "display/intel_vga.h"
 
 #include "i915_drv.h"
 #include "i915_reg.h"
@@ -57,7 +58,7 @@ static void i915_restore_display(struct drm_i915_private *dev_priv)
 	if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv))
 		I915_WRITE(FBC_CONTROL, dev_priv->regfile.saveFBC_CONTROL);
 
-	i915_redisable_vga(dev_priv);
+	intel_vga_redisable(dev_priv);
 }
 
 int i915_save_state(struct drm_i915_private *dev_priv)
@@ -65,8 +66,6 @@ int i915_save_state(struct drm_i915_private *dev_priv)
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	int i;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-
 	i915_save_display(dev_priv);
 
 	if (IS_GEN(dev_priv, 4))
@@ -100,8 +99,6 @@ int i915_save_state(struct drm_i915_private *dev_priv)
 			dev_priv->regfile.saveSWF3[i] = I915_READ(SWF3(i));
 	}
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
 	return 0;
 }
 
@@ -110,8 +107,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv)
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	int i;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-
 	if (IS_GEN(dev_priv, 4))
 		pci_write_config_word(pdev, GCDGMBUS,
 				      dev_priv->regfile.saveGCDGMBUS);
@@ -145,8 +140,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv)
 			I915_WRITE(SWF3(i), dev_priv->regfile.saveSWF3[i]);
 	}
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
 	intel_gmbus_reset(dev_priv);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 6a88db291252..51ba97daf2a0 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -12,6 +12,12 @@
 #include "i915_sw_fence.h"
 #include "i915_selftest.h"
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
+#define I915_SW_FENCE_BUG_ON(expr) BUG_ON(expr)
+#else
+#define I915_SW_FENCE_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
+#endif
+
 #define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */
 
 static DEFINE_SPINLOCK(i915_sw_fence_lock);
@@ -218,13 +224,21 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
 {
 	BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK);
 
+	__init_waitqueue_head(&fence->wait, name, key);
+	fence->flags = (unsigned long)fn;
+
+	i915_sw_fence_reinit(fence);
+}
+
+void i915_sw_fence_reinit(struct i915_sw_fence *fence)
+{
 	debug_fence_init(fence);
 
-	__init_waitqueue_head(&fence->wait, name, key);
 	atomic_set(&fence->pending, 1);
 	fence->error = 0;
 
-	fence->flags = (unsigned long)fn;
+	I915_SW_FENCE_BUG_ON(!fence->flags);
+	I915_SW_FENCE_BUG_ON(!list_empty(&fence->wait.head));
 }
 
 void i915_sw_fence_commit(struct i915_sw_fence *fence)
@@ -414,8 +428,10 @@ static void dma_i915_sw_fence_wake_timer(struct dma_fence *dma,
 	struct i915_sw_fence *fence;
 
 	fence = xchg(&cb->base.fence, NULL);
-	if (fence)
+	if (fence) {
+		i915_sw_fence_set_error_once(fence, dma->error);
 		i915_sw_fence_complete(fence);
+	}
 
 	irq_work_queue(&cb->work);
 }
@@ -443,8 +459,10 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	debug_fence_assert(fence);
 	might_sleep_if(gfpflags_allow_blocking(gfp));
 
-	if (dma_fence_is_signaled(dma))
+	if (dma_fence_is_signaled(dma)) {
+		i915_sw_fence_set_error_once(fence, dma->error);
 		return 0;
+	}
 
 	cb = kmalloc(timeout ?
 		     sizeof(struct i915_sw_dma_fence_cb_timer) :
@@ -454,7 +472,12 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 		if (!gfpflags_allow_blocking(gfp))
 			return -ENOMEM;
 
-		return dma_fence_wait(dma, false);
+		ret = dma_fence_wait(dma, false);
+		if (ret)
+			return ret;
+
+		i915_sw_fence_set_error_once(fence, dma->error);
+		return 0;
 	}
 
 	cb->fence = fence;
@@ -504,8 +527,10 @@ int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 
 	debug_fence_assert(fence);
 
-	if (dma_fence_is_signaled(dma))
+	if (dma_fence_is_signaled(dma)) {
+		i915_sw_fence_set_error_once(fence, dma->error);
 		return 0;
+	}
 
 	cb->fence = fence;
 	i915_sw_fence_await(fence);
@@ -539,8 +564,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 		struct dma_fence **shared;
 		unsigned int count, i;
 
-		ret = dma_resv_get_fences_rcu(resv,
-							&excl, &count, &shared);
+		ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index ab7d58bd0b9d..19e806ce43bc 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -54,6 +54,8 @@ do {								\
 	__i915_sw_fence_init((fence), (fn), NULL, NULL)
 #endif
 
+void i915_sw_fence_reinit(struct i915_sw_fence *fence);
+
 #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
 void i915_sw_fence_fini(struct i915_sw_fence *fence);
 #else
@@ -110,7 +112,8 @@ static inline void i915_sw_fence_wait(struct i915_sw_fence *fence)
 static inline void
 i915_sw_fence_set_error_once(struct i915_sw_fence *fence, int error)
 {
-	cmpxchg(&fence->error, 0, error);
+	if (unlikely(error))
+		cmpxchg(&fence->error, 0, error);
 }
 
 #endif /* _I915_SW_FENCE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c
index 07552cd544f2..997b2998f1f2 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence_work.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c
@@ -6,6 +6,13 @@
 
 #include "i915_sw_fence_work.h"
 
+static void fence_complete(struct dma_fence_work *f)
+{
+	if (f->ops->release)
+		f->ops->release(f);
+	dma_fence_signal(&f->dma);
+}
+
 static void fence_work(struct work_struct *work)
 {
 	struct dma_fence_work *f = container_of(work, typeof(*f), work);
@@ -14,7 +21,8 @@ static void fence_work(struct work_struct *work)
 	err = f->ops->work(f);
 	if (err)
 		dma_fence_set_error(&f->dma, err);
-	dma_fence_signal(&f->dma);
+
+	fence_complete(f);
 	dma_fence_put(&f->dma);
 }
 
@@ -32,7 +40,7 @@ fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 			dma_fence_get(&f->dma);
 			queue_work(system_unbound_wq, &f->work);
 		} else {
-			dma_fence_signal(&f->dma);
+			fence_complete(f);
 		}
 		break;
 
@@ -60,9 +68,6 @@ static void fence_release(struct dma_fence *fence)
 {
 	struct dma_fence_work *f = container_of(fence, typeof(*f), dma);
 
-	if (f->ops->release)
-		f->ops->release(f);
-
 	i915_sw_fence_fini(&f->chain);
 
 	BUILD_BUG_ON(offsetof(typeof(*f), dma));
@@ -78,12 +83,11 @@ static const struct dma_fence_ops fence_ops = {
 void dma_fence_work_init(struct dma_fence_work *f,
 			 const struct dma_fence_work_ops *ops)
 {
+	f->ops = ops;
 	spin_lock_init(&f->lock);
 	dma_fence_init(&f->dma, &fence_ops, &f->lock, 0, 0);
 	i915_sw_fence_init(&f->chain, fence_notify);
 	INIT_WORK(&f->work, fence_work);
-
-	f->ops = ops;
 }
 
 int dma_fence_work_chain(struct dma_fence_work *f, struct dma_fence *signal)
diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c
new file mode 100644
index 000000000000..39c79e1c5b52
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_switcheroo.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/vga_switcheroo.h>
+
+#include "i915_drv.h"
+#include "i915_switcheroo.h"
+
+static void i915_switcheroo_set_state(struct pci_dev *pdev,
+				      enum vga_switcheroo_state state)
+{
+	struct drm_i915_private *i915 = pdev_to_i915(pdev);
+	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
+
+	if (!i915) {
+		dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n");
+		return;
+	}
+
+	if (state == VGA_SWITCHEROO_ON) {
+		pr_info("switched on\n");
+		i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING;
+		/* i915 resume handler doesn't set to D0 */
+		pci_set_power_state(pdev, PCI_D0);
+		i915_resume_switcheroo(i915);
+		i915->drm.switch_power_state = DRM_SWITCH_POWER_ON;
+	} else {
+		pr_info("switched off\n");
+		i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING;
+		i915_suspend_switcheroo(i915, pmm);
+		i915->drm.switch_power_state = DRM_SWITCH_POWER_OFF;
+	}
+}
+
+static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
+{
+	struct drm_i915_private *i915 = pdev_to_i915(pdev);
+
+	/*
+	 * FIXME: open_count is protected by drm_global_mutex but that would lead to
+	 * locking inversion with the driver load path. And the access here is
+	 * completely racy anyway. So don't bother with locking for now.
+	 */
+	return i915 && i915->drm.open_count == 0;
+}
+
+static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
+	.set_gpu_state = i915_switcheroo_set_state,
+	.reprobe = NULL,
+	.can_switch = i915_switcheroo_can_switch,
+};
+
+int i915_switcheroo_register(struct drm_i915_private *i915)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+
+	return vga_switcheroo_register_client(pdev, &i915_switcheroo_ops, false);
+}
+
+void i915_switcheroo_unregister(struct drm_i915_private *i915)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+
+	vga_switcheroo_unregister_client(pdev);
+}
diff --git a/drivers/gpu/drm/i915/i915_switcheroo.h b/drivers/gpu/drm/i915/i915_switcheroo.h
new file mode 100644
index 000000000000..59b6c1e07d75
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_switcheroo.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_SWITCHEROO__
+#define __I915_SWITCHEROO__
+
+struct drm_i915_private;
+
+int i915_switcheroo_register(struct drm_i915_private *i915);
+void i915_switcheroo_unregister(struct drm_i915_private *i915);
+
+#endif /* __I915_SWITCHEROO__ */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index d8a3b180c084..0cef3130db05 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -30,6 +30,9 @@
 #include <linux/stat.h>
 #include <linux/sysfs.h>
 
+#include "gt/intel_rc6.h"
+#include "gt/intel_rps.h"
+
 #include "i915_drv.h"
 #include "i915_sysfs.h"
 #include "intel_pm.h"
@@ -49,7 +52,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
 	u64 res = 0;
 
 	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
-		res = intel_rc6_residency_us(dev_priv, reg);
+		res = intel_rc6_residency_us(&dev_priv->gt.rc6, reg);
 
 	return DIV_ROUND_CLOSEST_ULL(res, 1000);
 }
@@ -142,12 +145,12 @@ static const struct attribute_group media_rc6_attr_group = {
 };
 #endif
 
-static int l3_access_valid(struct drm_i915_private *dev_priv, loff_t offset)
+static int l3_access_valid(struct drm_i915_private *i915, loff_t offset)
 {
-	if (!HAS_L3_DPF(dev_priv))
+	if (!HAS_L3_DPF(i915))
 		return -EPERM;
 
-	if (offset % 4 != 0)
+	if (!IS_ALIGNED(offset, sizeof(u32)))
 		return -EINVAL;
 
 	if (offset >= GEN7_L3LOG_SIZE)
@@ -162,31 +165,24 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
 	     loff_t offset, size_t count)
 {
 	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	struct drm_device *dev = &dev_priv->drm;
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
 	int slice = (int)(uintptr_t)attr->private;
 	int ret;
 
-	count = round_down(count, 4);
-
-	ret = l3_access_valid(dev_priv, offset);
+	ret = l3_access_valid(i915, offset);
 	if (ret)
 		return ret;
 
+	count = round_down(count, sizeof(u32));
 	count = min_t(size_t, GEN7_L3LOG_SIZE - offset, count);
+	memset(buf, 0, count);
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
-	if (dev_priv->l3_parity.remap_info[slice])
+	spin_lock(&i915->gem.contexts.lock);
+	if (i915->l3_parity.remap_info[slice])
 		memcpy(buf,
-		       dev_priv->l3_parity.remap_info[slice] + (offset/4),
+		       i915->l3_parity.remap_info[slice] + offset / sizeof(u32),
 		       count);
-	else
-		memset(buf, 0, count);
-
-	mutex_unlock(&dev->struct_mutex);
+	spin_unlock(&i915->gem.contexts.lock);
 
 	return count;
 }
@@ -197,46 +193,49 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	      loff_t offset, size_t count)
 {
 	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	struct drm_device *dev = &dev_priv->drm;
-	struct i915_gem_context *ctx;
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
 	int slice = (int)(uintptr_t)attr->private;
-	u32 **remap_info;
+	u32 *remap_info, *freeme = NULL;
+	struct i915_gem_context *ctx;
 	int ret;
 
-	ret = l3_access_valid(dev_priv, offset);
+	ret = l3_access_valid(i915, offset);
 	if (ret)
 		return ret;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
+	if (count < sizeof(u32))
+		return -EINVAL;
 
-	remap_info = &dev_priv->l3_parity.remap_info[slice];
-	if (!*remap_info) {
-		*remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
-		if (!*remap_info) {
-			ret = -ENOMEM;
-			goto out;
-		}
+	remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
+	if (!remap_info)
+		return -ENOMEM;
+
+	spin_lock(&i915->gem.contexts.lock);
+
+	if (i915->l3_parity.remap_info[slice]) {
+		freeme = remap_info;
+		remap_info = i915->l3_parity.remap_info[slice];
+	} else {
+		i915->l3_parity.remap_info[slice] = remap_info;
 	}
 
-	/* TODO: Ideally we really want a GPU reset here to make sure errors
-	 * aren't propagated. Since I cannot find a stable way to reset the GPU
-	 * at this point it is left as a TODO.
-	*/
-	memcpy(*remap_info + (offset/4), buf, count);
+	count = round_down(count, sizeof(u32));
+	memcpy(remap_info + offset / sizeof(u32), buf, count);
 
 	/* NB: We defer the remapping until we switch to the context */
-	list_for_each_entry(ctx, &dev_priv->contexts.list, link)
-		ctx->remap_slice |= (1<<slice);
+	list_for_each_entry(ctx, &i915->gem.contexts.list, link)
+		ctx->remap_slice |= BIT(slice);
 
-	ret = count;
+	spin_unlock(&i915->gem.contexts.lock);
+	kfree(freeme);
 
-out:
-	mutex_unlock(&dev->struct_mutex);
+	/*
+	 * TODO: Ideally we really want a GPU reset here to make sure errors
+	 * aren't propagated. Since I cannot find a stable way to reset the GPU
+	 * at this point it is left as a TODO.
+	*/
 
-	return ret;
+	return count;
 }
 
 static const struct bin_attribute dpf_attrs = {
@@ -260,44 +259,30 @@ static const struct bin_attribute dpf_attrs_1 = {
 static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 				    struct device_attribute *attr, char *buf)
 {
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	intel_wakeref_t wakeref;
-	u32 freq;
-
-	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		vlv_punit_get(dev_priv);
-		freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-		vlv_punit_put(dev_priv);
-
-		freq = (freq >> 8) & 0xff;
-	} else {
-		freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1));
-	}
-
-	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
+	struct intel_rps *rps = &i915->gt.rps;
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, freq));
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			intel_rps_read_actual_frequency(rps));
 }
 
 static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
 				    struct device_attribute *attr, char *buf)
 {
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
+	struct intel_rps *rps = &i915->gt.rps;
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
-			intel_gpu_freq(dev_priv,
-				       dev_priv->gt_pm.rps.cur_freq));
+			intel_gpu_freq(rps, rps->cur_freq));
 }
 
 static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
 {
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
+	struct intel_rps *rps = &i915->gt.rps;
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
-			intel_gpu_freq(dev_priv,
-				       dev_priv->gt_pm.rps.boost_freq));
+			intel_gpu_freq(rps, rps->boost_freq));
 }
 
 static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
@@ -305,7 +290,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 				       const char *buf, size_t count)
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	struct intel_rps *rps = &dev_priv->gt.rps;
 	bool boost = false;
 	ssize_t ret;
 	u32 val;
@@ -315,7 +300,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 		return ret;
 
 	/* Validate against (static) hardware limits */
-	val = intel_freq_opcode(dev_priv, val);
+	val = intel_freq_opcode(rps, val);
 	if (val < rps->min_freq || val > rps->max_freq)
 		return -EINVAL;
 
@@ -335,19 +320,19 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev,
 				     struct device_attribute *attr, char *buf)
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+	struct intel_rps *rps = &dev_priv->gt.rps;
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
-			intel_gpu_freq(dev_priv,
-				       dev_priv->gt_pm.rps.efficient_freq));
+			intel_gpu_freq(rps, rps->efficient_freq));
 }
 
 static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+	struct intel_rps *rps = &dev_priv->gt.rps;
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
-			intel_gpu_freq(dev_priv,
-				       dev_priv->gt_pm.rps.max_freq_softlimit));
+			intel_gpu_freq(rps, rps->max_freq_softlimit));
 }
 
 static ssize_t gt_max_freq_mhz_store(struct device *kdev,
@@ -355,19 +340,17 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 				     const char *buf, size_t count)
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	intel_wakeref_t wakeref;
-	u32 val;
+	struct intel_rps *rps = &dev_priv->gt.rps;
 	ssize_t ret;
+	u32 val;
 
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
 		return ret;
 
-	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 	mutex_lock(&rps->lock);
 
-	val = intel_freq_opcode(dev_priv, val);
+	val = intel_freq_opcode(rps, val);
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val < rps->min_freq_softlimit) {
@@ -377,7 +360,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 
 	if (val > rps->rp0_freq)
 		DRM_DEBUG("User requested overclocking to %d\n",
-			  intel_gpu_freq(dev_priv, val));
+			  intel_gpu_freq(rps, val));
 
 	rps->max_freq_softlimit = val;
 
@@ -385,14 +368,15 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 		      rps->min_freq_softlimit,
 		      rps->max_freq_softlimit);
 
-	/* We still need *_set_rps to process the new max_delay and
+	/*
+	 * We still need *_set_rps to process the new max_delay and
 	 * update the interrupt limits and PMINTRMSK even though
-	 * frequency request may be unchanged. */
-	ret = intel_set_rps(dev_priv, val);
+	 * frequency request may be unchanged.
+	 */
+	intel_rps_set(rps, val);
 
 unlock:
 	mutex_unlock(&rps->lock);
-	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return ret ?: count;
 }
@@ -400,10 +384,10 @@ unlock:
 static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+	struct intel_rps *rps = &dev_priv->gt.rps;
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
-			intel_gpu_freq(dev_priv,
-				       dev_priv->gt_pm.rps.min_freq_softlimit));
+			intel_gpu_freq(rps, rps->min_freq_softlimit));
 }
 
 static ssize_t gt_min_freq_mhz_store(struct device *kdev,
@@ -411,19 +395,17 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 				     const char *buf, size_t count)
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	intel_wakeref_t wakeref;
-	u32 val;
+	struct intel_rps *rps = &dev_priv->gt.rps;
 	ssize_t ret;
+	u32 val;
 
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
 		return ret;
 
-	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 	mutex_lock(&rps->lock);
 
-	val = intel_freq_opcode(dev_priv, val);
+	val = intel_freq_opcode(rps, val);
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val > rps->max_freq_softlimit) {
@@ -437,14 +419,15 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 		      rps->min_freq_softlimit,
 		      rps->max_freq_softlimit);
 
-	/* We still need *_set_rps to process the new min_delay and
+	/*
+	 * We still need *_set_rps to process the new min_delay and
 	 * update the interrupt limits and PMINTRMSK even though
-	 * frequency request may be unchanged. */
-	ret = intel_set_rps(dev_priv, val);
+	 * frequency request may be unchanged.
+	 */
+	intel_rps_set(rps, val);
 
 unlock:
 	mutex_unlock(&rps->lock);
-	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return ret ?: count;
 }
@@ -466,15 +449,15 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL);
 static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	struct intel_rps *rps = &dev_priv->gt.rps;
 	u32 val;
 
 	if (attr == &dev_attr_gt_RP0_freq_mhz)
-		val = intel_gpu_freq(dev_priv, rps->rp0_freq);
+		val = intel_gpu_freq(rps, rps->rp0_freq);
 	else if (attr == &dev_attr_gt_RP1_freq_mhz)
-		val = intel_gpu_freq(dev_priv, rps->rp1_freq);
+		val = intel_gpu_freq(rps, rps->rp1_freq);
 	else if (attr == &dev_attr_gt_RPn_freq_mhz)
-		val = intel_gpu_freq(dev_priv, rps->min_freq);
+		val = intel_gpu_freq(rps, rps->min_freq);
 	else
 		BUG();
 
@@ -515,15 +498,15 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 
 	struct device *kdev = kobj_to_dev(kobj);
 	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
-	struct i915_gpu_state *gpu;
+	struct i915_gpu_coredump *gpu;
 	ssize_t ret;
 
 	gpu = i915_first_error_state(i915);
 	if (IS_ERR(gpu)) {
 		ret = PTR_ERR(gpu);
 	} else if (gpu) {
-		ret = i915_gpu_state_copy_to_buffer(gpu, buf, off, count);
-		i915_gpu_state_put(gpu);
+		ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
+		i915_gpu_coredump_put(gpu);
 	} else {
 		const char *str = "No error state collected\n";
 		size_t len = strlen(str);
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 24f2944da09d..233a97a2c276 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -341,7 +341,7 @@ TRACE_EVENT(intel_disable_plane,
 
 /* pipe updates */
 
-TRACE_EVENT(i915_pipe_update_start,
+TRACE_EVENT(intel_pipe_update_start,
 	    TP_PROTO(struct intel_crtc *crtc),
 	    TP_ARGS(crtc),
 
@@ -366,7 +366,7 @@ TRACE_EVENT(i915_pipe_update_start,
 		       __entry->scanline, __entry->min, __entry->max)
 );
 
-TRACE_EVENT(i915_pipe_update_vblank_evaded,
+TRACE_EVENT(intel_pipe_update_vblank_evaded,
 	    TP_PROTO(struct intel_crtc *crtc),
 	    TP_ARGS(crtc),
 
@@ -391,7 +391,7 @@ TRACE_EVENT(i915_pipe_update_vblank_evaded,
 		       __entry->scanline, __entry->min, __entry->max)
 );
 
-TRACE_EVENT(i915_pipe_update_end,
+TRACE_EVENT(intel_pipe_update_end,
 	    TP_PROTO(struct intel_crtc *crtc, u32 frame, int scanline_end),
 	    TP_ARGS(crtc, frame, scanline_end),
 
@@ -665,7 +665,6 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -675,7 +674,6 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -683,10 +681,9 @@ TRACE_EVENT(i915_request_queue,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
-		      __entry->flags)
+		      __entry->ctx, __entry->seqno, __entry->flags)
 );
 
 DECLARE_EVENT_CLASS(i915_request,
@@ -695,7 +692,6 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -704,16 +700,15 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno)
+		      __entry->ctx, __entry->seqno)
 );
 
 DEFINE_EVENT(i915_request, i915_request_add,
@@ -738,7 +733,6 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -749,7 +743,6 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -758,9 +751,9 @@ TRACE_EVENT(i915_request_in,
 			   __entry->port = port;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
+		      __entry->ctx, __entry->seqno,
 		      __entry->prio, __entry->port)
 );
 
@@ -770,7 +763,6 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -780,7 +772,6 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -788,10 +779,9 @@ TRACE_EVENT(i915_request_out,
 			   __entry->completed = i915_request_completed(rq);
 			   ),
 
-		    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u",
+		    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, completed?=%u",
 			      __entry->dev, __entry->class, __entry->instance,
-			      __entry->hw_id, __entry->ctx, __entry->seqno,
-			      __entry->completed)
+			      __entry->ctx, __entry->seqno, __entry->completed)
 );
 
 #else
@@ -829,7 +819,6 @@ TRACE_EVENT(i915_request_wait_begin,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -845,7 +834,6 @@ TRACE_EVENT(i915_request_wait_begin,
 	     */
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -853,9 +841,9 @@ TRACE_EVENT(i915_request_wait_begin,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
+		      __entry->ctx, __entry->seqno,
 		      __entry->flags)
 );
 
@@ -958,19 +946,17 @@ DECLARE_EVENT_CLASS(i915_context,
 	TP_STRUCT__entry(
 			__field(u32, dev)
 			__field(struct i915_gem_context *, ctx)
-			__field(u32, hw_id)
 			__field(struct i915_address_space *, vm)
 	),
 
 	TP_fast_assign(
 			__entry->dev = ctx->i915->drm.primary->index;
 			__entry->ctx = ctx;
-			__entry->hw_id = ctx->hw_id;
-			__entry->vm = ctx->vm;
+			__entry->vm = rcu_access_pointer(ctx->vm);
 	),
 
-	TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u",
-		  __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id)
+	TP_printk("dev=%u, ctx=%p, ctx_vm=%p",
+		  __entry->dev, __entry->ctx, __entry->vm)
 )
 
 DEFINE_EVENT(i915_context, i915_context_create,
diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c
index 16acdf7bdbe6..c47261ae86ea 100644
--- a/drivers/gpu/drm/i915/i915_utils.c
+++ b/drivers/gpu/drm/i915/i915_utils.c
@@ -23,7 +23,7 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level,
 	struct va_format vaf;
 	va_list args;
 
-	if (is_debug && !(drm_debug & DRM_UT_DRIVER))
+	if (is_debug && !drm_debug_enabled(DRM_UT_DRIVER))
 		return;
 
 	va_start(args, fmt);
@@ -54,25 +54,54 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level,
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
 static unsigned int i915_probe_fail_count;
 
-int __i915_inject_load_error(struct drm_i915_private *i915, int err,
-			     const char *func, int line)
+int __i915_inject_probe_error(struct drm_i915_private *i915, int err,
+			      const char *func, int line)
 {
-	if (i915_probe_fail_count >= i915_modparams.inject_load_failure)
+	if (i915_probe_fail_count >= i915_modparams.inject_probe_failure)
 		return 0;
 
-	if (++i915_probe_fail_count < i915_modparams.inject_load_failure)
+	if (++i915_probe_fail_count < i915_modparams.inject_probe_failure)
 		return 0;
 
 	__i915_printk(i915, KERN_INFO,
 		      "Injecting failure %d at checkpoint %u [%s:%d]\n",
-		      err, i915_modparams.inject_load_failure, func, line);
-	i915_modparams.inject_load_failure = 0;
+		      err, i915_modparams.inject_probe_failure, func, line);
+	i915_modparams.inject_probe_failure = 0;
 	return err;
 }
 
 bool i915_error_injected(void)
 {
-	return i915_probe_fail_count && !i915_modparams.inject_load_failure;
+	return i915_probe_fail_count && !i915_modparams.inject_probe_failure;
 }
 
 #endif
+
+void cancel_timer(struct timer_list *t)
+{
+	if (!READ_ONCE(t->expires))
+		return;
+
+	del_timer(t);
+	WRITE_ONCE(t->expires, 0);
+}
+
+void set_timer_ms(struct timer_list *t, unsigned long timeout)
+{
+	if (!timeout) {
+		cancel_timer(t);
+		return;
+	}
+
+	timeout = msecs_to_jiffies_timeout(timeout);
+
+	/*
+	 * Paranoia to make sure the compiler computes the timeout before
+	 * loading 'jiffies' as jiffies is volatile and may be updated in
+	 * the background by a timer tick. All to reduce the complexity
+	 * of the addition and reduce the risk of losing a jiffie.
+	 */
+	barrier();
+
+	mod_timer(t, jiffies + timeout);
+}
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 562f756da421..b0ade76bec90 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -32,6 +32,7 @@
 #include <linux/workqueue.h>
 
 struct drm_i915_private;
+struct timer_list;
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -60,20 +61,20 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level,
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
 
-int __i915_inject_load_error(struct drm_i915_private *i915, int err,
-			     const char *func, int line);
-#define i915_inject_load_error(_i915, _err) \
-	__i915_inject_load_error((_i915), (_err), __func__, __LINE__)
+int __i915_inject_probe_error(struct drm_i915_private *i915, int err,
+			      const char *func, int line);
+#define i915_inject_probe_error(_i915, _err) \
+	__i915_inject_probe_error((_i915), (_err), __func__, __LINE__)
 bool i915_error_injected(void);
 
 #else
 
-#define i915_inject_load_error(_i915, _err) 0
+#define i915_inject_probe_error(i915, e) ({ BUILD_BUG_ON_INVALID(i915); 0; })
 #define i915_error_injected() false
 
 #endif
 
-#define i915_inject_probe_failure(i915) i915_inject_load_error((i915), -ENODEV)
+#define i915_inject_probe_failure(i915) i915_inject_probe_error((i915), -ENODEV)
 
 #define i915_probe_error(i915, fmt, ...)				   \
 	__i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
@@ -421,4 +422,25 @@ static inline void add_taint_for_CI(unsigned int taint)
 	add_taint(taint, LOCKDEP_STILL_OK);
 }
 
+void cancel_timer(struct timer_list *t);
+void set_timer_ms(struct timer_list *t, unsigned long timeout);
+
+static inline bool timer_expired(const struct timer_list *t)
+{
+	return READ_ONCE(t->expires) && !timer_pending(t);
+}
+
+/*
+ * This is a lookalike for IS_ENABLED() that takes a kconfig value,
+ * e.g. CONFIG_DRM_I915_SPIN_REQUEST, and evaluates whether it is non-zero
+ * i.e. whether the configuration is active. Wrapping up the config inside
+ * a boolean context prevents clang and smatch from complaining about potential
+ * issues in confusing logical-&& with bitwise-& for constants.
+ *
+ * Sadly IS_ENABLED() itself does not work with kconfig values.
+ *
+ * Returns 0 if @config is 0, 1 if set to any value.
+ */
+#define IS_ACTIVE(config) ((config) != 0)
+
 #endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index e0e677b2a3a9..17d7c525ea5c 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -28,10 +28,13 @@
 #include "display/intel_frontbuffer.h"
 
 #include "gt/intel_engine.h"
+#include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_globals.h"
+#include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_vma.h"
 
@@ -90,6 +93,7 @@ static int __i915_vma_active(struct i915_active *ref)
 	return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT;
 }
 
+__i915_active_call
 static void __i915_vma_retire(struct i915_active *ref)
 {
 	i915_vma_put(active_to_vma(ref));
@@ -104,21 +108,22 @@ vma_create(struct drm_i915_gem_object *obj,
 	struct rb_node *rb, **p;
 
 	/* The aliasing_ppgtt should never be used directly! */
-	GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
+	GEM_BUG_ON(vm == &vm->gt->ggtt->alias->vm);
 
 	vma = i915_vma_alloc();
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	vma->vm = vm;
+	kref_init(&vma->ref);
+	mutex_init(&vma->pages_mutex);
+	vma->vm = i915_vm_get(vm);
 	vma->ops = &vm->vma_ops;
 	vma->obj = obj;
 	vma->resv = obj->base.resv;
 	vma->size = obj->base.size;
 	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
-	i915_active_init(vm->i915, &vma->active,
-			 __i915_vma_active, __i915_vma_retire);
+	i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire);
 
 	/* Declare ourselves safe for use inside shrinkers */
 	if (IS_ENABLED(CONFIG_LOCKDEP)) {
@@ -171,7 +176,7 @@ vma_create(struct drm_i915_gem_object *obj,
 								i915_gem_object_get_stride(obj));
 		GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
 
-		vma->flags |= I915_VMA_GGTT;
+		__set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
 	}
 
 	spin_lock(&obj->vma.lock);
@@ -218,10 +223,6 @@ vma_create(struct drm_i915_gem_object *obj,
 
 	spin_unlock(&obj->vma.lock);
 
-	mutex_lock(&vm->mutex);
-	list_add(&vma->vm_link, &vm->unbound_list);
-	mutex_unlock(&vm->mutex);
-
 	return vma;
 
 err_vma:
@@ -265,8 +266,6 @@ vma_lookup(struct drm_i915_gem_object *obj,
  * Once created, the VMA is kept until either the object is freed, or the
  * address space is closed.
  *
- * Must be called with struct_mutex held.
- *
  * Returns the vma, or an error pointer.
  */
 struct i915_vma *
@@ -277,7 +276,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 
 	GEM_BUG_ON(view && !i915_is_ggtt(vm));
-	GEM_BUG_ON(vm->closed);
+	GEM_BUG_ON(!atomic_read(&vm->open));
 
 	spin_lock(&obj->vma.lock);
 	vma = vma_lookup(obj, vm, view);
@@ -291,18 +290,70 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
+struct i915_vma_work {
+	struct dma_fence_work base;
+	struct i915_vma *vma;
+	struct drm_i915_gem_object *pinned;
+	enum i915_cache_level cache_level;
+	unsigned int flags;
+};
+
+static int __vma_bind(struct dma_fence_work *work)
+{
+	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
+	struct i915_vma *vma = vw->vma;
+	int err;
+
+	err = vma->ops->bind_vma(vma, vw->cache_level, vw->flags);
+	if (err)
+		atomic_or(I915_VMA_ERROR, &vma->flags);
+
+	return err;
+}
+
+static void __vma_release(struct dma_fence_work *work)
+{
+	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
+
+	if (vw->pinned)
+		__i915_gem_object_unpin_pages(vw->pinned);
+}
+
+static const struct dma_fence_work_ops bind_ops = {
+	.name = "bind",
+	.work = __vma_bind,
+	.release = __vma_release,
+};
+
+struct i915_vma_work *i915_vma_work(void)
+{
+	struct i915_vma_work *vw;
+
+	vw = kzalloc(sizeof(*vw), GFP_KERNEL);
+	if (!vw)
+		return NULL;
+
+	dma_fence_work_init(&vw->base, &bind_ops);
+	vw->base.dma.error = -EAGAIN; /* disable the worker by default */
+
+	return vw;
+}
+
 /**
  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  * @vma: VMA to map
  * @cache_level: mapping cache level
  * @flags: flags like global or local mapping
+ * @work: preallocated worker for allocating and binding the PTE
  *
  * DMA addresses are taken from the scatter-gather table of this object (or of
  * this VMA in case of non-default GGTT views) and PTE entries set up.
  * Note that DMA addresses are also the only part of the SG table we care about.
  */
-int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
-		  u32 flags)
+int i915_vma_bind(struct i915_vma *vma,
+		  enum i915_cache_level cache_level,
+		  u32 flags,
+		  struct i915_vma_work *work)
 {
 	u32 bind_flags;
 	u32 vma_flags;
@@ -319,13 +370,11 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	if (GEM_DEBUG_WARN_ON(!flags))
 		return -EINVAL;
 
-	bind_flags = 0;
-	if (flags & PIN_GLOBAL)
-		bind_flags |= I915_VMA_GLOBAL_BIND;
-	if (flags & PIN_USER)
-		bind_flags |= I915_VMA_LOCAL_BIND;
+	bind_flags = flags;
+	bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 
-	vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
+	vma_flags = atomic_read(&vma->flags);
+	vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 	if (flags & PIN_UPDATE)
 		bind_flags |= vma_flags;
 	else
@@ -336,11 +385,36 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	GEM_BUG_ON(!vma->pages);
 
 	trace_i915_vma_bind(vma, bind_flags);
-	ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
-	if (ret)
-		return ret;
+	if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) {
+		work->vma = vma;
+		work->cache_level = cache_level;
+		work->flags = bind_flags | I915_VMA_ALLOC;
+
+		/*
+		 * Note we only want to chain up to the migration fence on
+		 * the pages (not the object itself). As we don't track that,
+		 * yet, we have to use the exclusive fence instead.
+		 *
+		 * Also note that we do not want to track the async vma as
+		 * part of the obj->resv->excl_fence as it only affects
+		 * execution and not content or object's backing store lifetime.
+		 */
+		GEM_BUG_ON(i915_active_has_exclusive(&vma->active));
+		i915_active_set_exclusive(&vma->active, &work->base.dma);
+		work->base.dma.error = 0; /* enable the queue_work() */
+
+		if (vma->obj) {
+			__i915_gem_object_pin_pages(vma->obj);
+			work->pinned = vma->obj;
+		}
+	} else {
+		GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_async_flags);
+		ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
+		if (ret)
+			return ret;
+	}
 
-	vma->flags |= bind_flags;
+	atomic_or(bind_flags, &vma->flags);
 	return 0;
 }
 
@@ -349,19 +423,15 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	void __iomem *ptr;
 	int err;
 
-	/* Access through the GTT requires the device to be awake. */
-	assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm);
-
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-	if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
+	if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
 		err = -ENODEV;
 		goto err;
 	}
 
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-	GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
+	GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
 
-	ptr = vma->iomap;
+	ptr = READ_ONCE(vma->iomap);
 	if (ptr == NULL) {
 		ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
 					vma->node.start,
@@ -371,7 +441,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 			goto err;
 		}
 
-		vma->iomap = ptr;
+		if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) {
+			io_mapping_unmap(ptr);
+			ptr = vma->iomap;
+		}
 	}
 
 	__i915_vma_pin(vma);
@@ -381,6 +454,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 		goto err_unpin;
 
 	i915_vma_set_ggtt_write(vma);
+
+	/* NB Access through the GTT requires the device to be awake. */
 	return ptr;
 
 err_unpin:
@@ -391,18 +466,12 @@ err:
 
 void i915_vma_flush_writes(struct i915_vma *vma)
 {
-	if (!i915_vma_has_ggtt_write(vma))
-		return;
-
-	intel_gt_flush_ggtt_writes(vma->vm->gt);
-
-	i915_vma_unset_ggtt_write(vma);
+	if (i915_vma_unset_ggtt_write(vma))
+		intel_gt_flush_ggtt_writes(vma->vm->gt);
 }
 
 void i915_vma_unpin_iomap(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
 	GEM_BUG_ON(vma->iomap == NULL);
 
 	i915_vma_flush_writes(vma);
@@ -438,6 +507,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 	if (!drm_mm_node_allocated(&vma->node))
 		return false;
 
+	if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
+		return true;
+
 	if (vma->node.size < size)
 		return true;
 
@@ -472,17 +544,12 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end;
 
 	if (mappable && fenceable)
-		vma->flags |= I915_VMA_CAN_FENCE;
+		set_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
 	else
-		vma->flags &= ~I915_VMA_CAN_FENCE;
-}
-
-static bool color_differs(struct drm_mm_node *node, unsigned long color)
-{
-	return node->allocated && node->color != color;
+		clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
 }
 
-bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level)
+bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
 {
 	struct drm_mm_node *node = &vma->node;
 	struct drm_mm_node *other;
@@ -494,7 +561,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level)
 	 * these constraints apply and set the drm_mm.color_adjust
 	 * appropriately.
 	 */
-	if (vma->vm->mm.color_adjust == NULL)
+	if (!i915_vm_has_cache_coloring(vma->vm))
 		return true;
 
 	/* Only valid to be called on an already inserted vma */
@@ -502,11 +569,13 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level)
 	GEM_BUG_ON(list_empty(&node->node_list));
 
 	other = list_prev_entry(node, node_list);
-	if (color_differs(other, cache_level) && !drm_mm_hole_follows(other))
+	if (i915_node_color_differs(other, color) &&
+	    !drm_mm_hole_follows(other))
 		return false;
 
 	other = list_next_entry(node, node_list);
-	if (color_differs(other, cache_level) && !drm_mm_hole_follows(node))
+	if (i915_node_color_differs(other, color) &&
+	    !drm_mm_hole_follows(node))
 		return false;
 
 	return true;
@@ -541,13 +610,12 @@ static void assert_bind_count(const struct drm_i915_gem_object *obj)
 static int
 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	struct drm_i915_private *dev_priv = vma->vm->i915;
-	unsigned int cache_level;
+	unsigned long color;
 	u64 start, end;
 	int ret;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
-	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 
 	size = max(size, vma->size);
@@ -567,7 +635,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 
 	end = vma->vm->total;
 	if (flags & PIN_MAPPABLE)
-		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
+		end = min_t(u64, end, i915_vm_to_ggtt(vma->vm)->mappable_end);
 	if (flags & PIN_ZONE_4G)
 		end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
@@ -583,35 +651,21 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 		return -ENOSPC;
 	}
 
-	if (vma->obj) {
-		ret = i915_gem_object_pin_pages(vma->obj);
-		if (ret)
-			return ret;
-
-		cache_level = vma->obj->cache_level;
-	} else {
-		cache_level = 0;
-	}
-
-	GEM_BUG_ON(vma->pages);
-
-	ret = vma->ops->set_pages(vma);
-	if (ret)
-		goto err_unpin;
+	color = 0;
+	if (vma->obj && i915_vm_has_cache_coloring(vma->vm))
+		color = vma->obj->cache_level;
 
 	if (flags & PIN_OFFSET_FIXED) {
 		u64 offset = flags & PIN_OFFSET_MASK;
 		if (!IS_ALIGNED(offset, alignment) ||
-		    range_overflows(offset, size, end)) {
-			ret = -EINVAL;
-			goto err_clear;
-		}
+		    range_overflows(offset, size, end))
+			return -EINVAL;
 
 		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
-					   size, offset, cache_level,
+					   size, offset, color,
 					   flags);
 		if (ret)
-			goto err_clear;
+			return ret;
 	} else {
 		/*
 		 * We only support huge gtt pages through the 48b PPGTT,
@@ -647,116 +701,297 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 		}
 
 		ret = i915_gem_gtt_insert(vma->vm, &vma->node,
-					  size, alignment, cache_level,
+					  size, alignment, color,
 					  start, end, flags);
 		if (ret)
-			goto err_clear;
+			return ret;
 
 		GEM_BUG_ON(vma->node.start < start);
 		GEM_BUG_ON(vma->node.start + vma->node.size > end);
 	}
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
-
-	mutex_lock(&vma->vm->mutex);
-	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
-	mutex_unlock(&vma->vm->mutex);
+	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
 
 	if (vma->obj) {
-		atomic_inc(&vma->obj->bind_count);
-		assert_bind_count(vma->obj);
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		atomic_inc(&obj->bind_count);
+		assert_bind_count(obj);
 	}
+	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
 
 	return 0;
-
-err_clear:
-	vma->ops->clear_pages(vma);
-err_unpin:
-	if (vma->obj)
-		i915_gem_object_unpin_pages(vma->obj);
-	return ret;
 }
 
 static void
-i915_vma_remove(struct i915_vma *vma)
+i915_vma_detach(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
-
-	vma->ops->clear_pages(vma);
-
-	mutex_lock(&vma->vm->mutex);
-	drm_mm_remove_node(&vma->node);
-	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
-	mutex_unlock(&vma->vm->mutex);
+	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 
 	/*
-	 * Since the unbound list is global, only move to that list if
-	 * no more VMAs exist.
+	 * And finally now the object is completely decoupled from this
+	 * vma, we can drop its hold on the backing storage and allow
+	 * it to be reaped by the shrinker.
 	 */
+	list_del(&vma->vm_link);
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
+		assert_bind_count(obj);
 		atomic_dec(&obj->bind_count);
+	}
+}
 
-		/*
-		 * And finally now the object is completely decoupled from this
-		 * vma, we can drop its hold on the backing storage and allow
-		 * it to be reaped by the shrinker.
-		 */
-		i915_gem_object_unpin_pages(obj);
-		assert_bind_count(obj);
+static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
+{
+	unsigned int bound;
+	bool pinned = true;
+
+	bound = atomic_read(&vma->flags);
+	do {
+		if (unlikely(flags & ~bound))
+			return false;
+
+		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
+			return false;
+
+		if (!(bound & I915_VMA_PIN_MASK))
+			goto unpinned;
+
+		GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0);
+	} while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
+
+	return true;
+
+unpinned:
+	/*
+	 * If pin_count==0, but we are bound, check under the lock to avoid
+	 * racing with a concurrent i915_vma_unbind().
+	 */
+	mutex_lock(&vma->vm->mutex);
+	do {
+		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
+			pinned = false;
+			break;
+		}
+
+		if (unlikely(flags & ~bound)) {
+			pinned = false;
+			break;
+		}
+	} while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
+	mutex_unlock(&vma->vm->mutex);
+
+	return pinned;
+}
+
+static int vma_get_pages(struct i915_vma *vma)
+{
+	int err = 0;
+
+	if (atomic_add_unless(&vma->pages_count, 1, 0))
+		return 0;
+
+	/* Allocations ahoy! */
+	if (mutex_lock_interruptible(&vma->pages_mutex))
+		return -EINTR;
+
+	if (!atomic_read(&vma->pages_count)) {
+		if (vma->obj) {
+			err = i915_gem_object_pin_pages(vma->obj);
+			if (err)
+				goto unlock;
+		}
+
+		err = vma->ops->set_pages(vma);
+		if (err) {
+			if (vma->obj)
+				i915_gem_object_unpin_pages(vma->obj);
+			goto unlock;
+		}
 	}
+	atomic_inc(&vma->pages_count);
+
+unlock:
+	mutex_unlock(&vma->pages_mutex);
+
+	return err;
 }
 
-int __i915_vma_do_pin(struct i915_vma *vma,
-		      u64 size, u64 alignment, u64 flags)
+static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
 {
-	const unsigned int bound = vma->flags;
-	int ret;
+	/* We allocate under vma_get_pages, so beware the shrinker */
+	mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING);
+	GEM_BUG_ON(atomic_read(&vma->pages_count) < count);
+	if (atomic_sub_return(count, &vma->pages_count) == 0) {
+		vma->ops->clear_pages(vma);
+		GEM_BUG_ON(vma->pages);
+		if (vma->obj)
+			i915_gem_object_unpin_pages(vma->obj);
+	}
+	mutex_unlock(&vma->pages_mutex);
+}
 
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
-	GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
+static void vma_put_pages(struct i915_vma *vma)
+{
+	if (atomic_add_unless(&vma->pages_count, -1, 1))
+		return;
 
-	if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
-		ret = -EBUSY;
-		goto err_unpin;
+	__vma_put_pages(vma, 1);
+}
+
+static void vma_unbind_pages(struct i915_vma *vma)
+{
+	unsigned int count;
+
+	lockdep_assert_held(&vma->vm->mutex);
+
+	/* The upper portion of pages_count is the number of bindings */
+	count = atomic_read(&vma->pages_count);
+	count >>= I915_VMA_PAGES_BIAS;
+	GEM_BUG_ON(!count);
+
+	__vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
+}
+
+int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+{
+	struct i915_vma_work *work = NULL;
+	intel_wakeref_t wakeref = 0;
+	unsigned int bound;
+	int err;
+
+	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
+	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
+
+	GEM_BUG_ON(flags & PIN_UPDATE);
+	GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL)));
+
+	/* First try and grab the pin without rebinding the vma */
+	if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK))
+		return 0;
+
+	err = vma_get_pages(vma);
+	if (err)
+		return err;
+
+	if (flags & vma->vm->bind_async_flags) {
+		work = i915_vma_work();
+		if (!work) {
+			err = -ENOMEM;
+			goto err_pages;
+		}
 	}
 
-	if ((bound & I915_VMA_BIND_MASK) == 0) {
-		ret = i915_vma_insert(vma, size, alignment, flags);
-		if (ret)
-			goto err_unpin;
+	if (flags & PIN_GLOBAL)
+		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
+
+	/* No more allocations allowed once we hold vm->mutex */
+	err = mutex_lock_interruptible(&vma->vm->mutex);
+	if (err)
+		goto err_fence;
+
+	bound = atomic_read(&vma->flags);
+	if (unlikely(bound & I915_VMA_ERROR)) {
+		err = -ENOMEM;
+		goto err_unlock;
 	}
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
-	ret = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0, flags);
-	if (ret)
-		goto err_remove;
+	if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
+		err = -EAGAIN; /* pins are meant to be fairly temporary */
+		goto err_unlock;
+	}
+
+	if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) {
+		__i915_vma_pin(vma);
+		goto err_unlock;
+	}
+
+	err = i915_active_acquire(&vma->active);
+	if (err)
+		goto err_unlock;
 
-	GEM_BUG_ON((vma->flags & I915_VMA_BIND_MASK) == 0);
+	if (!(bound & I915_VMA_BIND_MASK)) {
+		err = i915_vma_insert(vma, size, alignment, flags);
+		if (err)
+			goto err_active;
 
-	if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
-		__i915_vma_set_map_and_fenceable(vma);
+		if (i915_is_ggtt(vma->vm))
+			__i915_vma_set_map_and_fenceable(vma);
+	}
+
+	GEM_BUG_ON(!vma->pages);
+	err = i915_vma_bind(vma,
+			    vma->obj ? vma->obj->cache_level : 0,
+			    flags, work);
+	if (err)
+		goto err_remove;
+
+	/* There should only be at most 2 active bindings (user, global) */
+	GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound);
+	atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count);
+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 
+	__i915_vma_pin(vma);
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+	GEM_BUG_ON(!i915_vma_is_bound(vma, flags));
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
-	return 0;
 
 err_remove:
-	if ((bound & I915_VMA_BIND_MASK) == 0) {
-		i915_vma_remove(vma);
-		GEM_BUG_ON(vma->pages);
-		GEM_BUG_ON(vma->flags & I915_VMA_BIND_MASK);
+	if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) {
+		i915_vma_detach(vma);
+		drm_mm_remove_node(&vma->node);
 	}
-err_unpin:
-	__i915_vma_unpin(vma);
-	return ret;
+err_active:
+	i915_active_release(&vma->active);
+err_unlock:
+	mutex_unlock(&vma->vm->mutex);
+err_fence:
+	if (work)
+		dma_fence_work_commit(&work->base);
+	if (wakeref)
+		intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
+err_pages:
+	vma_put_pages(vma);
+	return err;
+}
+
+static void flush_idle_contexts(struct intel_gt *gt)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, gt, id)
+		intel_engine_flush_barriers(engine);
+
+	intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
+}
+
+int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
+{
+	struct i915_address_space *vm = vma->vm;
+	int err;
+
+	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+
+	do {
+		err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
+		if (err != -ENOSPC)
+			return err;
+
+		/* Unlike i915_vma_pin, we don't take no for an answer! */
+		flush_idle_contexts(vm->gt);
+		if (mutex_lock_interruptible(&vm->mutex) == 0) {
+			i915_gem_evict_vm(vm);
+			mutex_unlock(&vm->mutex);
+		}
+	} while (1);
 }
 
 void i915_vma_close(struct i915_vma *vma)
 {
-	struct drm_i915_private *i915 = vma->vm->i915;
+	struct intel_gt *gt = vma->vm->gt;
 	unsigned long flags;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
@@ -773,79 +1008,91 @@ void i915_vma_close(struct i915_vma *vma)
 	 * causing us to rebind the VMA once more. This ends up being a lot
 	 * of wasted work for the steady state.
 	 */
-	spin_lock_irqsave(&i915->gt.closed_lock, flags);
-	list_add(&vma->closed_link, &i915->gt.closed_vma);
-	spin_unlock_irqrestore(&i915->gt.closed_lock, flags);
+	spin_lock_irqsave(&gt->closed_lock, flags);
+	list_add(&vma->closed_link, &gt->closed_vma);
+	spin_unlock_irqrestore(&gt->closed_lock, flags);
 }
 
 static void __i915_vma_remove_closed(struct i915_vma *vma)
 {
-	struct drm_i915_private *i915 = vma->vm->i915;
+	struct intel_gt *gt = vma->vm->gt;
 
-	if (!i915_vma_is_closed(vma))
-		return;
-
-	spin_lock_irq(&i915->gt.closed_lock);
+	spin_lock_irq(&gt->closed_lock);
 	list_del_init(&vma->closed_link);
-	spin_unlock_irq(&i915->gt.closed_lock);
+	spin_unlock_irq(&gt->closed_lock);
 }
 
 void i915_vma_reopen(struct i915_vma *vma)
 {
-	__i915_vma_remove_closed(vma);
+	if (i915_vma_is_closed(vma))
+		__i915_vma_remove_closed(vma);
 }
 
-static void __i915_vma_destroy(struct i915_vma *vma)
+void i915_vma_release(struct kref *ref)
 {
-	GEM_BUG_ON(vma->node.allocated);
-	GEM_BUG_ON(vma->fence);
+	struct i915_vma *vma = container_of(ref, typeof(*vma), ref);
 
-	mutex_lock(&vma->vm->mutex);
-	list_del(&vma->vm_link);
-	mutex_unlock(&vma->vm->mutex);
+	if (drm_mm_node_allocated(&vma->node)) {
+		mutex_lock(&vma->vm->mutex);
+		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+		WARN_ON(__i915_vma_unbind(vma));
+		mutex_unlock(&vma->vm->mutex);
+		GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
+	}
+	GEM_BUG_ON(i915_vma_is_active(vma));
 
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		spin_lock(&obj->vma.lock);
 		list_del(&vma->obj_link);
-		rb_erase(&vma->obj_node, &vma->obj->vma.tree);
+		rb_erase(&vma->obj_node, &obj->vma.tree);
 		spin_unlock(&obj->vma.lock);
 	}
 
-	i915_active_fini(&vma->active);
+	__i915_vma_remove_closed(vma);
+	i915_vm_put(vma->vm);
 
+	i915_active_fini(&vma->active);
 	i915_vma_free(vma);
 }
 
-void i915_vma_destroy(struct i915_vma *vma)
+void i915_vma_parked(struct intel_gt *gt)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct i915_vma *vma, *next;
 
-	GEM_BUG_ON(i915_vma_is_pinned(vma));
+	spin_lock_irq(&gt->closed_lock);
+	list_for_each_entry_safe(vma, next, &gt->closed_vma, closed_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+		struct i915_address_space *vm = vma->vm;
 
-	__i915_vma_remove_closed(vma);
+		/* XXX All to avoid keeping a reference on i915_vma itself */
 
-	WARN_ON(i915_vma_unbind(vma));
-	GEM_BUG_ON(i915_vma_is_active(vma));
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
 
-	__i915_vma_destroy(vma);
-}
+		if (i915_vm_tryopen(vm)) {
+			list_del_init(&vma->closed_link);
+		} else {
+			i915_gem_object_put(obj);
+			obj = NULL;
+		}
 
-void i915_vma_parked(struct drm_i915_private *i915)
-{
-	struct i915_vma *vma, *next;
+		spin_unlock_irq(&gt->closed_lock);
 
-	spin_lock_irq(&i915->gt.closed_lock);
-	list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) {
-		list_del_init(&vma->closed_link);
-		spin_unlock_irq(&i915->gt.closed_lock);
+		if (obj) {
+			__i915_vma_put(vma);
+			i915_gem_object_put(obj);
+		}
 
-		i915_vma_destroy(vma);
+		i915_vm_close(vm);
 
-		spin_lock_irq(&i915->gt.closed_lock);
+		/* Restart after dropping lock */
+		spin_lock_irq(&gt->closed_lock);
+		next = list_first_entry(&gt->closed_vma,
+					typeof(*next), closed_link);
 	}
-	spin_unlock_irq(&i915->gt.closed_lock);
+	spin_unlock_irq(&gt->closed_lock);
 }
 
 static void __i915_vma_iounmap(struct i915_vma *vma)
@@ -861,17 +1108,16 @@ static void __i915_vma_iounmap(struct i915_vma *vma)
 
 void i915_vma_revoke_mmap(struct i915_vma *vma)
 {
-	struct drm_vma_offset_node *node = &vma->obj->base.vma_node;
+	struct drm_vma_offset_node *node;
 	u64 vma_offset;
 
-	lockdep_assert_held(&vma->vm->mutex);
-
 	if (!i915_vma_has_userfault(vma))
 		return;
 
 	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
 	GEM_BUG_ON(!vma->obj->userfault_count);
 
+	node = &vma->mmo->vma_node;
 	vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
 	unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping,
 			    drm_vma_node_offset_addr(node) + vma_offset,
@@ -883,6 +1129,20 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
 		list_del(&vma->obj->userfault_link);
 }
 
+int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
+{
+	int err;
+
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+
+	/* Wait for the vma to be bound before we start! */
+	err = i915_request_await_active(rq, &vma->active);
+	if (err)
+		return err;
+
+	return i915_active_add_request(&vma->active, rq);
+}
+
 int i915_vma_move_to_active(struct i915_vma *vma,
 			    struct i915_request *rq,
 			    unsigned int flags)
@@ -890,27 +1150,21 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	int err;
 
-	assert_vma_held(vma);
 	assert_object_held(obj);
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
-	/*
-	 * Add a reference if we're newly entering the active list.
-	 * The order in which we add operations to the retirement queue is
-	 * vital here: mark_active adds to the start of the callback list,
-	 * such that subsequent callbacks are called first. Therefore we
-	 * add the active reference first and queue for it to be dropped
-	 * *last*.
-	 */
-	err = i915_active_ref(&vma->active, rq->timeline, rq);
+	err = __i915_vma_move_to_active(vma, rq);
 	if (unlikely(err))
 		return err;
 
 	if (flags & EXEC_OBJECT_WRITE) {
-		if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS))
-			i915_active_ref(&obj->frontbuffer->write,
-					rq->timeline,
-					rq);
+		struct intel_frontbuffer *front;
+
+		front = __intel_frontbuffer_get(obj);
+		if (unlikely(front)) {
+			if (intel_frontbuffer_invalidate(front, ORIGIN_CS))
+				i915_active_add_request(&front->write, rq);
+			intel_frontbuffer_put(front);
+		}
 
 		dma_resv_add_excl_fence(vma->resv, &rq->fence);
 		obj->write_domain = I915_GEM_DOMAIN_RENDER;
@@ -930,44 +1184,31 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	return 0;
 }
 
-int i915_vma_unbind(struct i915_vma *vma)
+int __i915_vma_unbind(struct i915_vma *vma)
 {
 	int ret;
 
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vma->vm->mutex);
 
 	/*
 	 * First wait upon any activity as retiring the request may
 	 * have side-effects such as unpinning or even unbinding this vma.
+	 *
+	 * XXX Actually waiting under the vm->mutex is a hinderance and
+	 * should be pipelined wherever possible. In cases where that is
+	 * unavoidable, we should lift the wait to before the mutex.
 	 */
-	might_sleep();
-	if (i915_vma_is_active(vma)) {
-		/*
-		 * When a closed VMA is retired, it is unbound - eek.
-		 * In order to prevent it from being recursively closed,
-		 * take a pin on the vma so that the second unbind is
-		 * aborted.
-		 *
-		 * Even more scary is that the retire callback may free
-		 * the object (last active vma). To prevent the explosion
-		 * we defer the actual object free to a worker that can
-		 * only proceed once it acquires the struct_mutex (which
-		 * we currently hold, therefore it cannot free this object
-		 * before we are finished).
-		 */
-		__i915_vma_pin(vma);
-		ret = i915_active_wait(&vma->active);
-		__i915_vma_unpin(vma);
-		if (ret)
-			return ret;
-	}
-	GEM_BUG_ON(i915_vma_is_active(vma));
+	ret = i915_vma_sync(vma);
+	if (ret)
+		return ret;
 
+	GEM_BUG_ON(i915_vma_is_active(vma));
 	if (i915_vma_is_pinned(vma)) {
 		vma_print_allocator(vma, "is pinned");
-		return -EBUSY;
+		return -EAGAIN;
 	}
 
+	GEM_BUG_ON(i915_vma_is_active(vma));
 	if (!drm_mm_node_allocated(&vma->node))
 		return 0;
 
@@ -982,34 +1223,58 @@ int i915_vma_unbind(struct i915_vma *vma)
 		GEM_BUG_ON(i915_vma_has_ggtt_write(vma));
 
 		/* release the fence reg _after_ flushing */
-		mutex_lock(&vma->vm->mutex);
 		ret = i915_vma_revoke_fence(vma);
-		mutex_unlock(&vma->vm->mutex);
 		if (ret)
 			return ret;
 
 		/* Force a pagefault for domain tracking on next user access */
-		mutex_lock(&vma->vm->mutex);
 		i915_vma_revoke_mmap(vma);
-		mutex_unlock(&vma->vm->mutex);
 
 		__i915_vma_iounmap(vma);
-		vma->flags &= ~I915_VMA_CAN_FENCE;
+		clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
 	}
 	GEM_BUG_ON(vma->fence);
 	GEM_BUG_ON(i915_vma_has_userfault(vma));
 
-	if (likely(!vma->vm->closed)) {
+	if (likely(atomic_read(&vma->vm->open))) {
 		trace_i915_vma_unbind(vma);
 		vma->ops->unbind_vma(vma);
 	}
-	vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
+	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR), &vma->flags);
 
-	i915_vma_remove(vma);
+	i915_vma_detach(vma);
+	vma_unbind_pages(vma);
 
+	drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
 	return 0;
 }
 
+int i915_vma_unbind(struct i915_vma *vma)
+{
+	struct i915_address_space *vm = vma->vm;
+	intel_wakeref_t wakeref = 0;
+	int err;
+
+	if (!drm_mm_node_allocated(&vma->node))
+		return 0;
+
+	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+		/* XXX not always required: nop_clear_range */
+		wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
+	err = mutex_lock_interruptible(&vm->mutex);
+	if (err)
+		return err;
+
+	err = __i915_vma_unbind(vma);
+	mutex_unlock(&vm->mutex);
+
+	if (wakeref)
+		intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+
+	return err;
+}
+
 struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
 {
 	i915_gem_object_make_unshrinkable(vma->obj);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 889fc7cb910a..02b31a62951e 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -30,120 +30,14 @@
 
 #include <drm/drm_mm.h>
 
+#include "gem/i915_gem_object.h"
+
 #include "i915_gem_gtt.h"
 #include "i915_gem_fence_reg.h"
-#include "gem/i915_gem_object.h"
 
 #include "i915_active.h"
 #include "i915_request.h"
-
-enum i915_cache_level;
-
-/**
- * DOC: Virtual Memory Address
- *
- * A VMA represents a GEM BO that is bound into an address space. Therefore, a
- * VMA's presence cannot be guaranteed before binding, or after unbinding the
- * object into/from the address space.
- *
- * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
- * will always be <= an objects lifetime. So object refcounting should cover us.
- */
-struct i915_vma {
-	struct drm_mm_node node;
-	struct drm_i915_gem_object *obj;
-	struct i915_address_space *vm;
-	const struct i915_vma_ops *ops;
-	struct i915_fence_reg *fence;
-	struct dma_resv *resv; /** Alias of obj->resv */
-	struct sg_table *pages;
-	void __iomem *iomap;
-	void *private; /* owned by creator */
-	u64 size;
-	u64 display_alignment;
-	struct i915_page_sizes page_sizes;
-
-	u32 fence_size;
-	u32 fence_alignment;
-
-	/**
-	 * Count of the number of times this vma has been opened by different
-	 * handles (but same file) for execbuf, i.e. the number of aliases
-	 * that exist in the ctx->handle_vmas LUT for this vma.
-	 */
-	atomic_t open_count;
-	unsigned long flags;
-	/**
-	 * How many users have pinned this object in GTT space.
-	 *
-	 * This is a tightly bound, fairly small number of users, so we
-	 * stuff inside the flags field so that we can both check for overflow
-	 * and detect a no-op i915_vma_pin() in a single check, while also
-	 * pinning the vma.
-	 *
-	 * The worst case display setup would have the same vma pinned for
-	 * use on each plane on each crtc, while also building the next atomic
-	 * state and holding a pin for the length of the cleanup queue. In the
-	 * future, the flip queue may be increased from 1.
-	 * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84
-	 *
-	 * For GEM, the number of concurrent users for pwrite/pread is
-	 * unbounded. For execbuffer, it is currently one but will in future
-	 * be extended to allow multiple clients to pin vma concurrently.
-	 *
-	 * We also use suballocated pages, with each suballocation claiming
-	 * its own pin on the shared vma. At present, this is limited to
-	 * exclusive cachelines of a single page, so a maximum of 64 possible
-	 * users.
-	 */
-#define I915_VMA_PIN_MASK 0xff
-#define I915_VMA_PIN_OVERFLOW	BIT(8)
-
-	/** Flags and address space this VMA is bound to */
-#define I915_VMA_GLOBAL_BIND	BIT(9)
-#define I915_VMA_LOCAL_BIND	BIT(10)
-#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
-
-#define I915_VMA_GGTT		BIT(11)
-#define I915_VMA_CAN_FENCE	BIT(12)
-#define I915_VMA_USERFAULT_BIT	13
-#define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
-#define I915_VMA_GGTT_WRITE	BIT(14)
-
-	struct i915_active active;
-
-	/**
-	 * Support different GGTT views into the same object.
-	 * This means there can be multiple VMA mappings per object and per VM.
-	 * i915_ggtt_view_type is used to distinguish between those entries.
-	 * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also
-	 * assumed in GEM functions which take no ggtt view parameter.
-	 */
-	struct i915_ggtt_view ggtt_view;
-
-	/** This object's place on the active/inactive lists */
-	struct list_head vm_link;
-
-	struct list_head obj_link; /* Link in the object's VMA list */
-	struct rb_node obj_node;
-	struct hlist_node obj_hash;
-
-	/** This vma's place in the execbuf reservation list */
-	struct list_head exec_link;
-	struct list_head reloc_link;
-
-	/** This vma's place in the eviction list */
-	struct list_head evict_link;
-
-	struct list_head closed_link;
-
-	/**
-	 * Used for performing relocations during execbuffer insertion.
-	 */
-	unsigned int *exec_flags;
-	struct hlist_node exec_node;
-	u32 exec_handle;
-};
+#include "i915_vma_types.h"
 
 struct i915_vma *
 i915_vma_instance(struct drm_i915_gem_object *obj,
@@ -158,52 +52,57 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma)
 	return !i915_active_is_idle(&vma->active);
 }
 
+int __must_check __i915_vma_move_to_active(struct i915_vma *vma,
+					   struct i915_request *rq);
 int __must_check i915_vma_move_to_active(struct i915_vma *vma,
 					 struct i915_request *rq,
 					 unsigned int flags);
 
+#define __i915_vma_flags(v) ((unsigned long *)&(v)->flags.counter)
+
 static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 {
-	return vma->flags & I915_VMA_GGTT;
+	return test_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
 }
 
 static inline bool i915_vma_has_ggtt_write(const struct i915_vma *vma)
 {
-	return vma->flags & I915_VMA_GGTT_WRITE;
+	return test_bit(I915_VMA_GGTT_WRITE_BIT, __i915_vma_flags(vma));
 }
 
 static inline void i915_vma_set_ggtt_write(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-	vma->flags |= I915_VMA_GGTT_WRITE;
+	set_bit(I915_VMA_GGTT_WRITE_BIT, __i915_vma_flags(vma));
 }
 
-static inline void i915_vma_unset_ggtt_write(struct i915_vma *vma)
+static inline bool i915_vma_unset_ggtt_write(struct i915_vma *vma)
 {
-	vma->flags &= ~I915_VMA_GGTT_WRITE;
+	return test_and_clear_bit(I915_VMA_GGTT_WRITE_BIT,
+				  __i915_vma_flags(vma));
 }
 
 void i915_vma_flush_writes(struct i915_vma *vma);
 
 static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
 {
-	return vma->flags & I915_VMA_CAN_FENCE;
+	return test_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
 }
 
 static inline bool i915_vma_set_userfault(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
-	return __test_and_set_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
+	return test_and_set_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma));
 }
 
 static inline void i915_vma_unset_userfault(struct i915_vma *vma)
 {
-	return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
+	return clear_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma));
 }
 
 static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 {
-	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
+	return test_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma));
 }
 
 static inline bool i915_vma_is_closed(const struct i915_vma *vma)
@@ -214,7 +113,7 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-	GEM_BUG_ON(!vma->node.allocated);
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(upper_32_bits(vma->node.start));
 	GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1));
 	return lower_32_bits(vma->node.start);
@@ -293,18 +192,36 @@ i915_vma_compare(struct i915_vma *vma,
 	return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
 }
 
-int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
-		  u32 flags);
-bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level);
+struct i915_vma_work *i915_vma_work(void);
+int i915_vma_bind(struct i915_vma *vma,
+		  enum i915_cache_level cache_level,
+		  u32 flags,
+		  struct i915_vma_work *work);
+
+bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
 bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags);
 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 void i915_vma_revoke_mmap(struct i915_vma *vma);
+int __i915_vma_unbind(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
 void i915_vma_unlink_ctx(struct i915_vma *vma);
 void i915_vma_close(struct i915_vma *vma);
 void i915_vma_reopen(struct i915_vma *vma);
-void i915_vma_destroy(struct i915_vma *vma);
+
+static inline struct i915_vma *__i915_vma_get(struct i915_vma *vma)
+{
+	if (kref_get_unless_zero(&vma->ref))
+		return vma;
+
+	return NULL;
+}
+
+void i915_vma_release(struct kref *ref);
+static inline void __i915_vma_put(struct i915_vma *vma)
+{
+	kref_put(&vma->ref, i915_vma_release);
+}
 
 #define assert_vma_held(vma) dma_resv_assert_held((vma)->resv)
 
@@ -318,30 +235,13 @@ static inline void i915_vma_unlock(struct i915_vma *vma)
 	dma_resv_unlock(vma->resv);
 }
 
-int __i915_vma_do_pin(struct i915_vma *vma,
-		      u64 size, u64 alignment, u64 flags);
-static inline int __must_check
-i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
-{
-	BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
-	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
-	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
-
-	/* Pin early to prevent the shrinker/eviction logic from destroying
-	 * our vma as we insert and bind.
-	 */
-	if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) {
-		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-		GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
-		return 0;
-	}
-
-	return __i915_vma_do_pin(vma, size, alignment, flags);
-}
+int __must_check
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
+int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags);
 
 static inline int i915_vma_pin_count(const struct i915_vma *vma)
 {
-	return vma->flags & I915_VMA_PIN_MASK;
+	return atomic_read(&vma->flags) & I915_VMA_PIN_MASK;
 }
 
 static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
@@ -351,18 +251,18 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
 
 static inline void __i915_vma_pin(struct i915_vma *vma)
 {
-	vma->flags++;
-	GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
+	atomic_inc(&vma->flags);
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 }
 
 static inline void __i915_vma_unpin(struct i915_vma *vma)
 {
-	vma->flags--;
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+	atomic_dec(&vma->flags);
 }
 
 static inline void i915_vma_unpin(struct i915_vma *vma)
 {
-	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	__i915_vma_unpin(vma);
 }
@@ -370,7 +270,13 @@ static inline void i915_vma_unpin(struct i915_vma *vma)
 static inline bool i915_vma_is_bound(const struct i915_vma *vma,
 				     unsigned int where)
 {
-	return vma->flags & where;
+	return atomic_read(&vma->flags) & where;
+}
+
+static inline bool i915_node_color_differs(const struct drm_mm_node *node,
+					   unsigned long color)
+{
+	return drm_mm_node_allocated(node) && node->color != color;
 }
 
 /**
@@ -382,8 +288,6 @@ static inline bool i915_vma_is_bound(const struct i915_vma *vma,
  * the caller must call i915_vma_unpin_iomap to relinquish the pinning
  * after the iomapping is no longer required.
  *
- * Callers must hold the struct_mutex.
- *
  * Returns a valid iomapped pointer or ERR_PTR.
  */
 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
@@ -395,8 +299,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
  *
  * Unpins the previously iomapped VMA from i915_vma_pin_iomap().
  *
- * Callers must hold the struct_mutex. This function is only valid to be
- * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap().
+ * This function is only valid to be called on a VMA previously
+ * iomapped by the caller with i915_vma_pin_iomap().
  */
 void i915_vma_unpin_iomap(struct i915_vma *vma);
 
@@ -424,6 +328,8 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
 int __must_check i915_vma_pin_fence(struct i915_vma *vma);
 int __must_check i915_vma_revoke_fence(struct i915_vma *vma);
 
+int __i915_vma_pin_fence(struct i915_vma *vma);
+
 static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
 {
 	GEM_BUG_ON(atomic_read(&vma->fence->pin_count) <= 0);
@@ -441,12 +347,11 @@ static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
 static inline void
 i915_vma_unpin_fence(struct i915_vma *vma)
 {
-	/* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */
 	if (vma->fence)
 		__i915_vma_unpin_fence(vma);
 }
 
-void i915_vma_parked(struct drm_i915_private *i915);
+void i915_vma_parked(struct intel_gt *gt);
 
 #define for_each_until(cond) if (cond) break; else
 
@@ -470,4 +375,10 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
 void i915_vma_make_shrinkable(struct i915_vma *vma);
 void i915_vma_make_purgeable(struct i915_vma *vma);
 
+static inline int i915_vma_sync(struct i915_vma *vma)
+{
+	/* Wait for the asynchronous bindings and pending GPU reads */
+	return i915_active_wait(&vma->active);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
new file mode 100644
index 000000000000..e0942efd5236
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_VMA_TYPES_H__
+#define __I915_VMA_TYPES_H__
+
+#include <linux/rbtree.h>
+
+#include <drm/drm_mm.h>
+
+#include "gem/i915_gem_object_types.h"
+
+enum i915_cache_level;
+
+/**
+ * DOC: Global GTT views
+ *
+ * Background and previous state
+ *
+ * Historically objects could exists (be bound) in global GTT space only as
+ * singular instances with a view representing all of the object's backing pages
+ * in a linear fashion. This view will be called a normal view.
+ *
+ * To support multiple views of the same object, where the number of mapped
+ * pages is not equal to the backing store, or where the layout of the pages
+ * is not linear, concept of a GGTT view was added.
+ *
+ * One example of an alternative view is a stereo display driven by a single
+ * image. In this case we would have a framebuffer looking like this
+ * (2x2 pages):
+ *
+ *    12
+ *    34
+ *
+ * Above would represent a normal GGTT view as normally mapped for GPU or CPU
+ * rendering. In contrast, fed to the display engine would be an alternative
+ * view which could look something like this:
+ *
+ *   1212
+ *   3434
+ *
+ * In this example both the size and layout of pages in the alternative view is
+ * different from the normal view.
+ *
+ * Implementation and usage
+ *
+ * GGTT views are implemented using VMAs and are distinguished via enum
+ * i915_ggtt_view_type and struct i915_ggtt_view.
+ *
+ * A new flavour of core GEM functions which work with GGTT bound objects were
+ * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
+ * renaming  in large amounts of code. They take the struct i915_ggtt_view
+ * parameter encapsulating all metadata required to implement a view.
+ *
+ * As a helper for callers which are only interested in the normal view,
+ * globally const i915_ggtt_view_normal singleton instance exists. All old core
+ * GEM API functions, the ones not taking the view parameter, are operating on,
+ * or with the normal GGTT view.
+ *
+ * Code wanting to add or use a new GGTT view needs to:
+ *
+ * 1. Add a new enum with a suitable name.
+ * 2. Extend the metadata in the i915_ggtt_view structure if required.
+ * 3. Add support to i915_get_vma_pages().
+ *
+ * New views are required to build a scatter-gather table from within the
+ * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
+ * exists for the lifetime of an VMA.
+ *
+ * Core API is designed to have copy semantics which means that passed in
+ * struct i915_ggtt_view does not need to be persistent (left around after
+ * calling the core API functions).
+ *
+ */
+
+struct intel_remapped_plane_info {
+	/* in gtt pages */
+	unsigned int width, height, stride, offset;
+} __packed;
+
+struct intel_remapped_info {
+	struct intel_remapped_plane_info plane[2];
+	unsigned int unused_mbz;
+} __packed;
+
+struct intel_rotation_info {
+	struct intel_remapped_plane_info plane[2];
+} __packed;
+
+struct intel_partial_info {
+	u64 offset;
+	unsigned int size;
+} __packed;
+
+enum i915_ggtt_view_type {
+	I915_GGTT_VIEW_NORMAL = 0,
+	I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
+	I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
+	I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
+};
+
+static inline void assert_i915_gem_gtt_types(void)
+{
+	BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int));
+	BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int));
+	BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 9*sizeof(unsigned int));
+
+	/* Check that rotation/remapped shares offsets for simplicity */
+	BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
+		     offsetof(struct intel_rotation_info, plane[0]));
+	BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) !=
+		     offsetofend(struct intel_rotation_info, plane[1]));
+
+	/* As we encode the size of each branch inside the union into its type,
+	 * we have to be careful that each branch has a unique size.
+	 */
+	switch ((enum i915_ggtt_view_type)0) {
+	case I915_GGTT_VIEW_NORMAL:
+	case I915_GGTT_VIEW_PARTIAL:
+	case I915_GGTT_VIEW_ROTATED:
+	case I915_GGTT_VIEW_REMAPPED:
+		/* gcc complains if these are identical cases */
+		break;
+	}
+}
+
+struct i915_ggtt_view {
+	enum i915_ggtt_view_type type;
+	union {
+		/* Members need to contain no holes/padding */
+		struct intel_partial_info partial;
+		struct intel_rotation_info rotated;
+		struct intel_remapped_info remapped;
+	};
+};
+
+/**
+ * DOC: Virtual Memory Address
+ *
+ * A VMA represents a GEM BO that is bound into an address space. Therefore, a
+ * VMA's presence cannot be guaranteed before binding, or after unbinding the
+ * object into/from the address space.
+ *
+ * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
+ * will always be <= an objects lifetime. So object refcounting should cover us.
+ */
+struct i915_vma {
+	struct drm_mm_node node;
+
+	struct i915_address_space *vm;
+	const struct i915_vma_ops *ops;
+
+	struct drm_i915_gem_object *obj;
+	struct dma_resv *resv; /** Alias of obj->resv */
+
+	struct sg_table *pages;
+	void __iomem *iomap;
+	void *private; /* owned by creator */
+
+	struct i915_fence_reg *fence;
+
+	u64 size;
+	u64 display_alignment;
+	struct i915_page_sizes page_sizes;
+
+	/* mmap-offset associated with fencing for this vma */
+	struct i915_mmap_offset	*mmo;
+
+	u32 fence_size;
+	u32 fence_alignment;
+
+	/**
+	 * Count of the number of times this vma has been opened by different
+	 * handles (but same file) for execbuf, i.e. the number of aliases
+	 * that exist in the ctx->handle_vmas LUT for this vma.
+	 */
+	struct kref ref;
+	atomic_t open_count;
+	atomic_t flags;
+	/**
+	 * How many users have pinned this object in GTT space.
+	 *
+	 * This is a tightly bound, fairly small number of users, so we
+	 * stuff inside the flags field so that we can both check for overflow
+	 * and detect a no-op i915_vma_pin() in a single check, while also
+	 * pinning the vma.
+	 *
+	 * The worst case display setup would have the same vma pinned for
+	 * use on each plane on each crtc, while also building the next atomic
+	 * state and holding a pin for the length of the cleanup queue. In the
+	 * future, the flip queue may be increased from 1.
+	 * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84
+	 *
+	 * For GEM, the number of concurrent users for pwrite/pread is
+	 * unbounded. For execbuffer, it is currently one but will in future
+	 * be extended to allow multiple clients to pin vma concurrently.
+	 *
+	 * We also use suballocated pages, with each suballocation claiming
+	 * its own pin on the shared vma. At present, this is limited to
+	 * exclusive cachelines of a single page, so a maximum of 64 possible
+	 * users.
+	 */
+#define I915_VMA_PIN_MASK 0x3ff
+#define I915_VMA_OVERFLOW 0x200
+
+	/** Flags and address space this VMA is bound to */
+#define I915_VMA_GLOBAL_BIND_BIT 10
+#define I915_VMA_LOCAL_BIND_BIT  11
+
+#define I915_VMA_GLOBAL_BIND	((int)BIT(I915_VMA_GLOBAL_BIND_BIT))
+#define I915_VMA_LOCAL_BIND	((int)BIT(I915_VMA_LOCAL_BIND_BIT))
+
+#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)
+
+#define I915_VMA_ALLOC_BIT	12
+#define I915_VMA_ALLOC		((int)BIT(I915_VMA_ALLOC_BIT))
+
+#define I915_VMA_ERROR_BIT	13
+#define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
+
+#define I915_VMA_GGTT_BIT	14
+#define I915_VMA_CAN_FENCE_BIT	15
+#define I915_VMA_USERFAULT_BIT	16
+#define I915_VMA_GGTT_WRITE_BIT	17
+
+#define I915_VMA_GGTT		((int)BIT(I915_VMA_GGTT_BIT))
+#define I915_VMA_CAN_FENCE	((int)BIT(I915_VMA_CAN_FENCE_BIT))
+#define I915_VMA_USERFAULT	((int)BIT(I915_VMA_USERFAULT_BIT))
+#define I915_VMA_GGTT_WRITE	((int)BIT(I915_VMA_GGTT_WRITE_BIT))
+
+	struct i915_active active;
+
+#define I915_VMA_PAGES_BIAS 24
+#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1)
+	atomic_t pages_count; /* number of active binds to the pages */
+	struct mutex pages_mutex; /* protect acquire/release of backing pages */
+
+	/**
+	 * Support different GGTT views into the same object.
+	 * This means there can be multiple VMA mappings per object and per VM.
+	 * i915_ggtt_view_type is used to distinguish between those entries.
+	 * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also
+	 * assumed in GEM functions which take no ggtt view parameter.
+	 */
+	struct i915_ggtt_view ggtt_view;
+
+	/** This object's place on the active/inactive lists */
+	struct list_head vm_link;
+
+	struct list_head obj_link; /* Link in the object's VMA list */
+	struct rb_node obj_node;
+	struct hlist_node obj_hash;
+
+	/** This vma's place in the execbuf reservation list */
+	struct list_head exec_link;
+	struct list_head reloc_link;
+
+	/** This vma's place in the eviction list */
+	struct list_head evict_link;
+
+	struct list_head closed_link;
+
+	/**
+	 * Used for performing relocations during execbuffer insertion.
+	 */
+	unsigned int *exec_flags;
+	struct hlist_node exec_node;
+	u32 exec_handle;
+};
+
+#endif
+
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 546577e39b4e..09870a31b4f0 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -44,8 +44,8 @@
 #define TGL_CSR_MAX_FW_SIZE		0x6000
 MODULE_FIRMWARE(TGL_CSR_PATH);
 
-#define ICL_CSR_PATH			"i915/icl_dmc_ver1_07.bin"
-#define ICL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 7)
+#define ICL_CSR_PATH			"i915/icl_dmc_ver1_09.bin"
+#define ICL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 9)
 #define ICL_CSR_MAX_FW_SIZE		0x6000
 MODULE_FIRMWARE(ICL_CSR_PATH);
 
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index d0ed44d33484..6670a0763be2 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -73,9 +73,30 @@ const char *intel_platform_name(enum intel_platform platform)
 	return platform_names[platform];
 }
 
-void intel_device_info_dump_flags(const struct intel_device_info *info,
-				  struct drm_printer *p)
+static const char *iommu_name(void)
 {
+	const char *msg = "n/a";
+
+#ifdef CONFIG_INTEL_IOMMU
+	msg = enableddisabled(intel_iommu_gfx_mapped);
+#endif
+
+	return msg;
+}
+
+void intel_device_info_print_static(const struct intel_device_info *info,
+				    struct drm_printer *p)
+{
+	drm_printf(p, "engines: %x\n", info->engine_mask);
+	drm_printf(p, "gen: %d\n", info->gen);
+	drm_printf(p, "gt: %d\n", info->gt);
+	drm_printf(p, "iommu: %s\n", iommu_name());
+	drm_printf(p, "memory-regions: %x\n", info->memory_regions);
+	drm_printf(p, "page-sizes: %x\n", info->page_sizes);
+	drm_printf(p, "platform: %s\n", intel_platform_name(info->platform));
+	drm_printf(p, "ppgtt-size: %d\n", info->ppgtt_size);
+	drm_printf(p, "ppgtt-type: %d\n", info->ppgtt_type);
+
 #define PRINT_FLAG(name) drm_printf(p, "%s: %s\n", #name, yesno(info->name));
 	DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG);
 #undef PRINT_FLAG
@@ -93,9 +114,9 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
 		   hweight8(sseu->slice_mask), sseu->slice_mask);
 	drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
 	for (s = 0; s < sseu->max_slices; s++) {
-		drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
+		drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
 			   s, intel_sseu_subslices_per_slice(sseu, s),
-			   sseu->subslice_mask[s]);
+			   intel_sseu_get_subslices(sseu, s));
 	}
 	drm_printf(p, "EU total: %u\n", sseu->eu_total);
 	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
@@ -106,8 +127,8 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
 	drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg));
 }
 
-void intel_device_info_dump_runtime(const struct intel_runtime_info *info,
-				    struct drm_printer *p)
+void intel_device_info_print_runtime(const struct intel_runtime_info *info,
+				     struct drm_printer *p)
 {
 	sseu_dump(&info->sseu, p);
 
@@ -118,10 +139,9 @@ void intel_device_info_dump_runtime(const struct intel_runtime_info *info,
 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
 		       int subslice)
 {
-	int subslice_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
-	int slice_stride = sseu->max_subslices * subslice_stride;
+	int slice_stride = sseu->max_subslices * sseu->eu_stride;
 
-	return slice * slice_stride + subslice * subslice_stride;
+	return slice * slice_stride + subslice * sseu->eu_stride;
 }
 
 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
@@ -130,7 +150,7 @@ static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
 	int i, offset = sseu_eu_idx(sseu, slice, subslice);
 	u16 eu_mask = 0;
 
-	for (i = 0; i < GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); i++) {
+	for (i = 0; i < sseu->eu_stride; i++) {
 		eu_mask |= ((u16)sseu->eu_mask[offset + i]) <<
 			(i * BITS_PER_BYTE);
 	}
@@ -143,14 +163,14 @@ static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
 {
 	int i, offset = sseu_eu_idx(sseu, slice, subslice);
 
-	for (i = 0; i < GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); i++) {
+	for (i = 0; i < sseu->eu_stride; i++) {
 		sseu->eu_mask[offset + i] =
 			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
 	}
 }
 
-void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
-				     struct drm_printer *p)
+void intel_device_info_print_topology(const struct sseu_dev_info *sseu,
+				      struct drm_printer *p)
 {
 	int s, ss;
 
@@ -160,9 +180,9 @@ void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
 	}
 
 	for (s = 0; s < sseu->max_slices; s++) {
-		drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
+		drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
 			   s, intel_sseu_subslices_per_slice(sseu, s),
-			   sseu->subslice_mask[s]);
+			   intel_sseu_get_subslices(sseu, s));
 
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			u16 enabled_eus = sseu_get_eus(sseu, s, ss);
@@ -183,44 +203,80 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu)
 	return total;
 }
 
+static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
+				    u8 s_en, u32 ss_en, u16 eu_en)
+{
+	int s, ss;
+
+	/* ss_en represents entire subslice mask across all slices */
+	GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
+		   sizeof(ss_en) * BITS_PER_BYTE);
+
+	for (s = 0; s < sseu->max_slices; s++) {
+		if ((s_en & BIT(s)) == 0)
+			continue;
+
+		sseu->slice_mask |= BIT(s);
+
+		intel_sseu_set_subslices(sseu, s, ss_en);
+
+		for (ss = 0; ss < sseu->max_subslices; ss++)
+			if (intel_sseu_has_subslice(sseu, s, ss))
+				sseu_set_eus(sseu, s, ss, eu_en);
+	}
+	sseu->eu_per_subslice = hweight16(eu_en);
+	sseu->eu_total = compute_eu_total(sseu);
+}
+
+static void gen12_sseu_info_init(struct drm_i915_private *dev_priv)
+{
+	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
+	u8 s_en;
+	u32 dss_en;
+	u16 eu_en = 0;
+	u8 eu_en_fuse;
+	int eu;
+
+	/*
+	 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
+	 * Instead of splitting these, provide userspace with an array
+	 * of DSS to more closely represent the hardware resource.
+	 */
+	intel_sseu_set_info(sseu, 1, 6, 16);
+
+	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
+
+	dss_en = I915_READ(GEN12_GT_DSS_ENABLE);
+
+	/* one bit per pair of EUs */
+	eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
+	for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
+		if (eu_en_fuse & BIT(eu))
+			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
+
+	gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
+
+	/* TGL only supports slice-level power gating */
+	sseu->has_slice_pg = 1;
+}
+
 static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 	u8 s_en;
-	u32 ss_en, ss_en_mask;
+	u32 ss_en;
 	u8 eu_en;
-	int s;
 
-	if (IS_ELKHARTLAKE(dev_priv)) {
-		sseu->max_slices = 1;
-		sseu->max_subslices = 4;
-		sseu->max_eus_per_subslice = 8;
-	} else {
-		sseu->max_slices = 1;
-		sseu->max_subslices = 8;
-		sseu->max_eus_per_subslice = 8;
-	}
+	if (IS_ELKHARTLAKE(dev_priv))
+		intel_sseu_set_info(sseu, 1, 4, 8);
+	else
+		intel_sseu_set_info(sseu, 1, 8, 8);
 
 	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
 	ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
-	ss_en_mask = BIT(sseu->max_subslices) - 1;
 	eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
 
-	for (s = 0; s < sseu->max_slices; s++) {
-		if (s_en & BIT(s)) {
-			int ss_idx = sseu->max_subslices * s;
-			int ss;
-
-			sseu->slice_mask |= BIT(s);
-			sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask;
-			for (ss = 0; ss < sseu->max_subslices; ss++) {
-				if (sseu->subslice_mask[s] & BIT(ss))
-					sseu_set_eus(sseu, s, ss, eu_en);
-			}
-		}
-	}
-	sseu->eu_per_subslice = hweight8(eu_en);
-	sseu->eu_total = compute_eu_total(sseu);
+	gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
 
 	/* ICL has no power gating restrictions. */
 	sseu->has_slice_pg = 1;
@@ -236,23 +292,10 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 	const int eu_mask = 0xff;
 	u32 subslice_mask, eu_en;
 
+	intel_sseu_set_info(sseu, 6, 4, 8);
+
 	sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
 			    GEN10_F2_S_ENA_SHIFT;
-	sseu->max_slices = 6;
-	sseu->max_subslices = 4;
-	sseu->max_eus_per_subslice = 8;
-
-	subslice_mask = (1 << 4) - 1;
-	subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
-			   GEN10_F2_SS_DIS_SHIFT);
-
-	/*
-	 * Slice0 can have up to 3 subslices, but there are only 2 in
-	 * slice1/2.
-	 */
-	sseu->subslice_mask[0] = subslice_mask;
-	for (s = 1; s < sseu->max_slices; s++)
-		sseu->subslice_mask[s] = subslice_mask & 0x3;
 
 	/* Slice0 */
 	eu_en = ~I915_READ(GEN8_EU_DISABLE0);
@@ -277,14 +320,25 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 	eu_en = ~I915_READ(GEN10_EU_DISABLE3);
 	sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
 
-	/* Do a second pass where we mark the subslices disabled if all their
-	 * eus are off.
-	 */
+	subslice_mask = (1 << 4) - 1;
+	subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
+			   GEN10_F2_SS_DIS_SHIFT);
+
 	for (s = 0; s < sseu->max_slices; s++) {
+		u32 subslice_mask_with_eus = subslice_mask;
+
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			if (sseu_get_eus(sseu, s, ss) == 0)
-				sseu->subslice_mask[s] &= ~BIT(ss);
+				subslice_mask_with_eus &= ~BIT(ss);
 		}
+
+		/*
+		 * Slice0 can have up to 3 subslices, but there are only 2 in
+		 * slice1/2.
+		 */
+		intel_sseu_set_subslices(sseu, s, s == 0 ?
+						  subslice_mask_with_eus :
+						  subslice_mask_with_eus & 0x3);
 	}
 
 	sseu->eu_total = compute_eu_total(sseu);
@@ -310,13 +364,12 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 	u32 fuse;
+	u8 subslice_mask = 0;
 
 	fuse = I915_READ(CHV_FUSE_GT);
 
 	sseu->slice_mask = BIT(0);
-	sseu->max_slices = 1;
-	sseu->max_subslices = 2;
-	sseu->max_eus_per_subslice = 8;
+	intel_sseu_set_info(sseu, 1, 2, 8);
 
 	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
 		u8 disabled_mask =
@@ -325,7 +378,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 			(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
 			  CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
 
-		sseu->subslice_mask[0] |= BIT(0);
+		subslice_mask |= BIT(0);
 		sseu_set_eus(sseu, 0, 0, ~disabled_mask);
 	}
 
@@ -336,10 +389,12 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 			(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
 			  CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
 
-		sseu->subslice_mask[0] |= BIT(1);
+		subslice_mask |= BIT(1);
 		sseu_set_eus(sseu, 0, 1, ~disabled_mask);
 	}
 
+	intel_sseu_set_subslices(sseu, 0, subslice_mask);
+
 	sseu->eu_total = compute_eu_total(sseu);
 
 	/*
@@ -372,9 +427,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
 
 	/* BXT has a single slice and at most 3 subslices. */
-	sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
-	sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
-	sseu->max_eus_per_subslice = 8;
+	intel_sseu_set_info(sseu, IS_GEN9_LP(dev_priv) ? 1 : 3,
+			    IS_GEN9_LP(dev_priv) ? 3 : 4, 8);
 
 	/*
 	 * The subslice disable field is global, i.e. it applies
@@ -393,14 +447,14 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 			/* skip disabled slice */
 			continue;
 
-		sseu->subslice_mask[s] = subslice_mask;
+		intel_sseu_set_subslices(sseu, s, subslice_mask);
 
 		eu_disable = I915_READ(GEN9_EU_DISABLE(s));
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			int eu_per_ss;
 			u8 eu_disabled_mask;
 
-			if (!(sseu->subslice_mask[s] & BIT(ss)))
+			if (!intel_sseu_has_subslice(sseu, s, ss))
 				/* skip disabled subslice */
 				continue;
 
@@ -465,7 +519,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 	}
 }
 
-static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
+static void bdw_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 	int s, ss;
@@ -473,9 +527,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 
 	fuse2 = I915_READ(GEN8_FUSE2);
 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
-	sseu->max_slices = 3;
-	sseu->max_subslices = 3;
-	sseu->max_eus_per_subslice = 8;
+	intel_sseu_set_info(sseu, 3, 3, 8);
 
 	/*
 	 * The subslice disable field is global, i.e. it applies
@@ -502,13 +554,13 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 			/* skip disabled slice */
 			continue;
 
-		sseu->subslice_mask[s] = subslice_mask;
+		intel_sseu_set_subslices(sseu, s, subslice_mask);
 
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			u8 eu_disabled_mask;
 			u32 n_disabled;
 
-			if (!(sseu->subslice_mask[s] & BIT(ss)))
+			if (!intel_sseu_has_subslice(sseu, s, ss))
 				/* skip disabled subslice */
 				continue;
 
@@ -548,10 +600,11 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 	sseu->has_eu_pg = 0;
 }
 
-static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
+static void hsw_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 	u32 fuse1;
+	u8 subslice_mask = 0;
 	int s, ss;
 
 	/*
@@ -564,22 +617,18 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
 		/* fall through */
 	case 1:
 		sseu->slice_mask = BIT(0);
-		sseu->subslice_mask[0] = BIT(0);
+		subslice_mask = BIT(0);
 		break;
 	case 2:
 		sseu->slice_mask = BIT(0);
-		sseu->subslice_mask[0] = BIT(0) | BIT(1);
+		subslice_mask = BIT(0) | BIT(1);
 		break;
 	case 3:
 		sseu->slice_mask = BIT(0) | BIT(1);
-		sseu->subslice_mask[0] = BIT(0) | BIT(1);
-		sseu->subslice_mask[1] = BIT(0) | BIT(1);
+		subslice_mask = BIT(0) | BIT(1);
 		break;
 	}
 
-	sseu->max_slices = hweight8(sseu->slice_mask);
-	sseu->max_subslices = hweight8(sseu->subslice_mask[0]);
-
 	fuse1 = I915_READ(HSW_PAVP_FUSE1);
 	switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
 	default:
@@ -596,9 +645,14 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
 		sseu->eu_per_subslice = 6;
 		break;
 	}
-	sseu->max_eus_per_subslice = sseu->eu_per_subslice;
+
+	intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
+			    hweight8(subslice_mask),
+			    sseu->eu_per_subslice);
 
 	for (s = 0; s < sseu->max_slices; s++) {
+		intel_sseu_set_subslices(sseu, s, subslice_mask);
+
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			sseu_set_eus(sseu, s, ss,
 				     (1UL << sseu->eu_per_subslice) - 1);
@@ -775,6 +829,8 @@ static const u16 subplatform_ult_ids[] = {
 	INTEL_WHL_U_GT1_IDS(0),
 	INTEL_WHL_U_GT2_IDS(0),
 	INTEL_WHL_U_GT3_IDS(0),
+	INTEL_CML_U_GT1_IDS(0),
+	INTEL_CML_U_GT2_IDS(0),
 };
 
 static const u16 subplatform_ulx_ids[] = {
@@ -900,12 +956,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 			runtime->num_sprites[pipe] = 1;
 	}
 
-	if (i915_modparams.disable_display) {
-		DRM_INFO("Display disabled (module parameter)\n");
-		info->num_pipes = 0;
-	} else if (HAS_DISPLAY(dev_priv) &&
-		   (IS_GEN_RANGE(dev_priv, 7, 8)) &&
-		   HAS_PCH_SPLIT(dev_priv)) {
+	if (HAS_DISPLAY(dev_priv) && IS_GEN_RANGE(dev_priv, 7, 8) &&
+	    HAS_PCH_SPLIT(dev_priv)) {
 		u32 fuse_strap = I915_READ(FUSE_STRAP);
 		u32 sfuse_strap = I915_READ(SFUSE_STRAP);
 
@@ -923,14 +975,14 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 		    (HAS_PCH_CPT(dev_priv) &&
 		     !(sfuse_strap & SFUSE_STRAP_FUSE_LOCK))) {
 			DRM_INFO("Display fused off, disabling\n");
-			info->num_pipes = 0;
+			info->pipe_mask = 0;
 		} else if (fuse_strap & IVB_PIPE_C_DISABLE) {
 			DRM_INFO("PipeC fused off\n");
-			info->num_pipes -= 1;
+			info->pipe_mask &= ~BIT(PIPE_C);
 		}
 	} else if (HAS_DISPLAY(dev_priv) && INTEL_GEN(dev_priv) >= 9) {
 		u32 dfsm = I915_READ(SKL_DFSM);
-		u8 enabled_mask = BIT(info->num_pipes) - 1;
+		u8 enabled_mask = info->pipe_mask;
 
 		if (dfsm & SKL_DFSM_PIPE_A_DISABLE)
 			enabled_mask &= ~BIT(PIPE_A);
@@ -951,22 +1003,37 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 			DRM_ERROR("invalid pipe fuse configuration: enabled_mask=0x%x\n",
 				  enabled_mask);
 		else
-			info->num_pipes = hweight8(enabled_mask);
+			info->pipe_mask = enabled_mask;
+
+		if (dfsm & SKL_DFSM_DISPLAY_HDCP_DISABLE)
+			info->display.has_hdcp = 0;
+
+		if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE)
+			info->display.has_fbc = 0;
+
+		if (INTEL_GEN(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE))
+			info->display.has_csr = 0;
+
+		if (INTEL_GEN(dev_priv) >= 10 &&
+		    (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE))
+			info->display.has_dsc = 0;
 	}
 
 	/* Initialize slice/subslice/EU info */
 	if (IS_HASWELL(dev_priv))
-		haswell_sseu_info_init(dev_priv);
+		hsw_sseu_info_init(dev_priv);
 	else if (IS_CHERRYVIEW(dev_priv))
 		cherryview_sseu_info_init(dev_priv);
 	else if (IS_BROADWELL(dev_priv))
-		broadwell_sseu_info_init(dev_priv);
+		bdw_sseu_info_init(dev_priv);
 	else if (IS_GEN(dev_priv, 9))
 		gen9_sseu_info_init(dev_priv);
 	else if (IS_GEN(dev_priv, 10))
 		gen10_sseu_info_init(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 11)
+	else if (IS_GEN(dev_priv, 11))
 		gen11_sseu_info_init(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 12)
+		gen12_sseu_info_init(dev_priv);
 
 	if (IS_GEN(dev_priv, 6) && intel_vtd_active()) {
 		DRM_INFO("Disabling ppGTT for VT-d support\n");
@@ -1010,8 +1077,10 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
 		      GEN11_GT_VEBOX_DISABLE_SHIFT;
 
 	for (i = 0; i < I915_MAX_VCS; i++) {
-		if (!HAS_ENGINE(dev_priv, _VCS(i)))
+		if (!HAS_ENGINE(dev_priv, _VCS(i))) {
+			vdbox_mask &= ~BIT(i);
 			continue;
+		}
 
 		if (!(BIT(i) & vdbox_mask)) {
 			info->engine_mask &= ~BIT(_VCS(i));
@@ -1024,7 +1093,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
 		 * hooked up to an SFC (Scaler & Format Converter) unit.
 		 * In TGL each VDBOX has access to an SFC.
 		 */
-		if (IS_TIGERLAKE(dev_priv) || logical_vdbox++ % 2 == 0)
+		if (INTEL_GEN(dev_priv) >= 12 || logical_vdbox++ % 2 == 0)
 			RUNTIME_INFO(dev_priv)->vdbox_sfc_access |= BIT(i);
 	}
 	DRM_DEBUG_DRIVER("vdbox enable: %04x, instances: %04lx\n",
@@ -1032,8 +1101,10 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(vdbox_mask != VDBOX_MASK(dev_priv));
 
 	for (i = 0; i < I915_MAX_VECS; i++) {
-		if (!HAS_ENGINE(dev_priv, _VECS(i)))
+		if (!HAS_ENGINE(dev_priv, _VECS(i))) {
+			vebox_mask &= ~BIT(i);
 			continue;
+		}
 
 		if (!(BIT(i) & vebox_mask)) {
 			info->engine_mask &= ~BIT(_VECS(i));
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 92e0c2e0954c..2725cb7fc169 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -107,6 +107,7 @@ enum intel_ppgtt_type {
 	func(is_mobile); \
 	func(is_lp); \
 	func(require_force_probe); \
+	func(is_dgfx); \
 	/* Keep has_* in alphabetical order */ \
 	func(has_64bit_reloc); \
 	func(gpu_reset_clobbers_display); \
@@ -135,8 +136,11 @@ enum intel_ppgtt_type {
 	func(has_csr); \
 	func(has_ddi); \
 	func(has_dp_mst); \
+	func(has_dsb); \
+	func(has_dsc); \
 	func(has_fbc); \
 	func(has_gmch); \
+	func(has_hdcp); \
 	func(has_hotplug); \
 	func(has_ipc); \
 	func(has_modular_fia); \
@@ -159,9 +163,11 @@ struct intel_device_info {
 
 	unsigned int page_sizes; /* page sizes supported by the HW */
 
+	u32 memory_regions; /* regions supported by the HW */
+
 	u32 display_mmio_offset;
 
-	u8 num_pipes;
+	u8 pipe_mask;
 
 #define DEFINE_FLAG(name) u8 name:1
 	DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
@@ -224,12 +230,13 @@ const char *intel_platform_name(enum intel_platform platform);
 
 void intel_device_info_subplatform_init(struct drm_i915_private *dev_priv);
 void intel_device_info_runtime_init(struct drm_i915_private *dev_priv);
-void intel_device_info_dump_flags(const struct intel_device_info *info,
-				  struct drm_printer *p);
-void intel_device_info_dump_runtime(const struct intel_runtime_info *info,
+
+void intel_device_info_print_static(const struct intel_device_info *info,
 				    struct drm_printer *p);
-void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
+void intel_device_info_print_runtime(const struct intel_runtime_info *info,
 				     struct drm_printer *p);
+void intel_device_info_print_topology(const struct sseu_dev_info *sseu,
+				      struct drm_printer *p);
 
 void intel_device_info_init_mmio(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
new file mode 100644
index 000000000000..d0d038b3cd79
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_memory_region.h"
+#include "i915_drv.h"
+
+/* XXX: Hysterical raisins. BIT(inst) needs to just be (inst) at some point. */
+#define REGION_MAP(type, inst) \
+	BIT((type) + INTEL_MEMORY_TYPE_SHIFT) | BIT(inst)
+
+const u32 intel_region_map[] = {
+	[INTEL_REGION_SMEM] = REGION_MAP(INTEL_MEMORY_SYSTEM, 0),
+	[INTEL_REGION_LMEM] = REGION_MAP(INTEL_MEMORY_LOCAL, 0),
+	[INTEL_REGION_STOLEN] = REGION_MAP(INTEL_MEMORY_STOLEN, 0),
+};
+
+struct intel_memory_region *
+intel_memory_region_by_type(struct drm_i915_private *i915,
+			    enum intel_memory_type mem_type)
+{
+	struct intel_memory_region *mr;
+	int id;
+
+	for_each_memory_region(mr, i915, id)
+		if (mr->type == mem_type)
+			return mr;
+
+	return NULL;
+}
+
+static u64
+intel_memory_region_free_pages(struct intel_memory_region *mem,
+			       struct list_head *blocks)
+{
+	struct i915_buddy_block *block, *on;
+	u64 size = 0;
+
+	list_for_each_entry_safe(block, on, blocks, link) {
+		size += i915_buddy_block_size(&mem->mm, block);
+		i915_buddy_free(&mem->mm, block);
+	}
+	INIT_LIST_HEAD(blocks);
+
+	return size;
+}
+
+void
+__intel_memory_region_put_pages_buddy(struct intel_memory_region *mem,
+				      struct list_head *blocks)
+{
+	mutex_lock(&mem->mm_lock);
+	mem->avail += intel_memory_region_free_pages(mem, blocks);
+	mutex_unlock(&mem->mm_lock);
+}
+
+void
+__intel_memory_region_put_block_buddy(struct i915_buddy_block *block)
+{
+	struct list_head blocks;
+
+	INIT_LIST_HEAD(&blocks);
+	list_add(&block->link, &blocks);
+	__intel_memory_region_put_pages_buddy(block->private, &blocks);
+}
+
+int
+__intel_memory_region_get_pages_buddy(struct intel_memory_region *mem,
+				      resource_size_t size,
+				      unsigned int flags,
+				      struct list_head *blocks)
+{
+	unsigned int min_order = 0;
+	unsigned long n_pages;
+
+	GEM_BUG_ON(!IS_ALIGNED(size, mem->mm.chunk_size));
+	GEM_BUG_ON(!list_empty(blocks));
+
+	if (flags & I915_ALLOC_MIN_PAGE_SIZE) {
+		min_order = ilog2(mem->min_page_size) -
+			    ilog2(mem->mm.chunk_size);
+	}
+
+	if (flags & I915_ALLOC_CONTIGUOUS) {
+		size = roundup_pow_of_two(size);
+		min_order = ilog2(size) - ilog2(mem->mm.chunk_size);
+	}
+
+	if (size > BIT(mem->mm.max_order) * mem->mm.chunk_size)
+		return -E2BIG;
+
+	n_pages = size >> ilog2(mem->mm.chunk_size);
+
+	mutex_lock(&mem->mm_lock);
+
+	do {
+		struct i915_buddy_block *block;
+		unsigned int order;
+
+		order = fls(n_pages) - 1;
+		GEM_BUG_ON(order > mem->mm.max_order);
+		GEM_BUG_ON(order < min_order);
+
+		do {
+			block = i915_buddy_alloc(&mem->mm, order);
+			if (!IS_ERR(block))
+				break;
+
+			if (order-- == min_order)
+				goto err_free_blocks;
+		} while (1);
+
+		n_pages -= BIT(order);
+
+		block->private = mem;
+		list_add(&block->link, blocks);
+
+		if (!n_pages)
+			break;
+	} while (1);
+
+	mem->avail -= size;
+	mutex_unlock(&mem->mm_lock);
+	return 0;
+
+err_free_blocks:
+	intel_memory_region_free_pages(mem, blocks);
+	mutex_unlock(&mem->mm_lock);
+	return -ENXIO;
+}
+
+struct i915_buddy_block *
+__intel_memory_region_get_block_buddy(struct intel_memory_region *mem,
+				      resource_size_t size,
+				      unsigned int flags)
+{
+	struct i915_buddy_block *block;
+	LIST_HEAD(blocks);
+	int ret;
+
+	ret = __intel_memory_region_get_pages_buddy(mem, size, flags, &blocks);
+	if (ret)
+		return ERR_PTR(ret);
+
+	block = list_first_entry(&blocks, typeof(*block), link);
+	list_del_init(&block->link);
+	return block;
+}
+
+int intel_memory_region_init_buddy(struct intel_memory_region *mem)
+{
+	return i915_buddy_init(&mem->mm, resource_size(&mem->region),
+			       PAGE_SIZE);
+}
+
+void intel_memory_region_release_buddy(struct intel_memory_region *mem)
+{
+	i915_buddy_fini(&mem->mm);
+}
+
+struct intel_memory_region *
+intel_memory_region_create(struct drm_i915_private *i915,
+			   resource_size_t start,
+			   resource_size_t size,
+			   resource_size_t min_page_size,
+			   resource_size_t io_start,
+			   const struct intel_memory_region_ops *ops)
+{
+	struct intel_memory_region *mem;
+	int err;
+
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	if (!mem)
+		return ERR_PTR(-ENOMEM);
+
+	mem->i915 = i915;
+	mem->region = (struct resource)DEFINE_RES_MEM(start, size);
+	mem->io_start = io_start;
+	mem->min_page_size = min_page_size;
+	mem->ops = ops;
+	mem->total = size;
+	mem->avail = mem->total;
+
+	mutex_init(&mem->objects.lock);
+	INIT_LIST_HEAD(&mem->objects.list);
+	INIT_LIST_HEAD(&mem->objects.purgeable);
+
+	mutex_init(&mem->mm_lock);
+
+	if (ops->init) {
+		err = ops->init(mem);
+		if (err)
+			goto err_free;
+	}
+
+	kref_init(&mem->kref);
+	return mem;
+
+err_free:
+	kfree(mem);
+	return ERR_PTR(err);
+}
+
+void intel_memory_region_set_name(struct intel_memory_region *mem,
+				  const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vsnprintf(mem->name, sizeof(mem->name), fmt, ap);
+	va_end(ap);
+}
+
+static void __intel_memory_region_destroy(struct kref *kref)
+{
+	struct intel_memory_region *mem =
+		container_of(kref, typeof(*mem), kref);
+
+	if (mem->ops->release)
+		mem->ops->release(mem);
+
+	mutex_destroy(&mem->mm_lock);
+	mutex_destroy(&mem->objects.lock);
+	kfree(mem);
+}
+
+struct intel_memory_region *
+intel_memory_region_get(struct intel_memory_region *mem)
+{
+	kref_get(&mem->kref);
+	return mem;
+}
+
+void intel_memory_region_put(struct intel_memory_region *mem)
+{
+	kref_put(&mem->kref, __intel_memory_region_destroy);
+}
+
+/* Global memory region registration -- only slight layer inversions! */
+
+int intel_memory_regions_hw_probe(struct drm_i915_private *i915)
+{
+	int err, i;
+
+	for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) {
+		struct intel_memory_region *mem = ERR_PTR(-ENODEV);
+		u32 type;
+
+		if (!HAS_REGION(i915, BIT(i)))
+			continue;
+
+		type = MEMORY_TYPE_FROM_REGION(intel_region_map[i]);
+		switch (type) {
+		case INTEL_MEMORY_SYSTEM:
+			mem = i915_gem_shmem_setup(i915);
+			break;
+		case INTEL_MEMORY_STOLEN:
+			mem = i915_gem_stolen_setup(i915);
+			break;
+		case INTEL_MEMORY_LOCAL:
+			mem = intel_setup_fake_lmem(i915);
+			break;
+		}
+
+		if (IS_ERR(mem)) {
+			err = PTR_ERR(mem);
+			DRM_ERROR("Failed to setup region(%d) type=%d\n", err, type);
+			goto out_cleanup;
+		}
+
+		mem->id = intel_region_map[i];
+		mem->type = type;
+		mem->instance = MEMORY_INSTANCE_FROM_REGION(intel_region_map[i]);
+
+		i915->mm.regions[i] = mem;
+	}
+
+	return 0;
+
+out_cleanup:
+	intel_memory_regions_driver_release(i915);
+	return err;
+}
+
+void intel_memory_regions_driver_release(struct drm_i915_private *i915)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) {
+		struct intel_memory_region *region =
+			fetch_and_zero(&i915->mm.regions[i]);
+
+		if (region)
+			intel_memory_region_put(region);
+	}
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_memory_region.c"
+#include "selftests/mock_region.c"
+#endif
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h
new file mode 100644
index 000000000000..232490d89a83
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_MEMORY_REGION_H__
+#define __INTEL_MEMORY_REGION_H__
+
+#include <linux/kref.h>
+#include <linux/ioport.h>
+#include <linux/mutex.h>
+#include <linux/io-mapping.h>
+#include <drm/drm_mm.h>
+
+#include "i915_buddy.h"
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+struct intel_memory_region;
+struct sg_table;
+
+/**
+ *  Base memory type
+ */
+enum intel_memory_type {
+	INTEL_MEMORY_SYSTEM = 0,
+	INTEL_MEMORY_LOCAL,
+	INTEL_MEMORY_STOLEN,
+};
+
+enum intel_region_id {
+	INTEL_REGION_SMEM = 0,
+	INTEL_REGION_LMEM,
+	INTEL_REGION_STOLEN,
+	INTEL_REGION_UNKNOWN, /* Should be last */
+};
+
+#define REGION_SMEM     BIT(INTEL_REGION_SMEM)
+#define REGION_LMEM     BIT(INTEL_REGION_LMEM)
+#define REGION_STOLEN   BIT(INTEL_REGION_STOLEN)
+
+#define INTEL_MEMORY_TYPE_SHIFT 16
+
+#define MEMORY_TYPE_FROM_REGION(r) (ilog2((r) >> INTEL_MEMORY_TYPE_SHIFT))
+#define MEMORY_INSTANCE_FROM_REGION(r) (ilog2((r) & 0xffff))
+
+#define I915_ALLOC_MIN_PAGE_SIZE  BIT(0)
+#define I915_ALLOC_CONTIGUOUS     BIT(1)
+
+#define for_each_memory_region(mr, i915, id) \
+	for (id = 0; id < ARRAY_SIZE((i915)->mm.regions); id++) \
+		for_each_if((mr) = (i915)->mm.regions[id])
+
+/**
+ * Memory regions encoded as type | instance
+ */
+extern const u32 intel_region_map[];
+
+struct intel_memory_region_ops {
+	unsigned int flags;
+
+	int (*init)(struct intel_memory_region *mem);
+	void (*release)(struct intel_memory_region *mem);
+
+	struct drm_i915_gem_object *
+	(*create_object)(struct intel_memory_region *mem,
+			 resource_size_t size,
+			 unsigned int flags);
+};
+
+struct intel_memory_region {
+	struct drm_i915_private *i915;
+
+	const struct intel_memory_region_ops *ops;
+
+	struct io_mapping iomap;
+	struct resource region;
+
+	/* For fake LMEM */
+	struct drm_mm_node fake_mappable;
+
+	struct i915_buddy_mm mm;
+	struct mutex mm_lock;
+
+	struct kref kref;
+
+	resource_size_t io_start;
+	resource_size_t min_page_size;
+	resource_size_t total;
+	resource_size_t avail;
+
+	unsigned int type;
+	unsigned int instance;
+	unsigned int id;
+	char name[8];
+
+	dma_addr_t remap_addr;
+
+	struct {
+		struct mutex lock; /* Protects access to objects */
+		struct list_head list;
+		struct list_head purgeable;
+	} objects;
+};
+
+int intel_memory_region_init_buddy(struct intel_memory_region *mem);
+void intel_memory_region_release_buddy(struct intel_memory_region *mem);
+
+int __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem,
+					  resource_size_t size,
+					  unsigned int flags,
+					  struct list_head *blocks);
+struct i915_buddy_block *
+__intel_memory_region_get_block_buddy(struct intel_memory_region *mem,
+				      resource_size_t size,
+				      unsigned int flags);
+void __intel_memory_region_put_pages_buddy(struct intel_memory_region *mem,
+					   struct list_head *blocks);
+void __intel_memory_region_put_block_buddy(struct i915_buddy_block *block);
+
+struct intel_memory_region *
+intel_memory_region_create(struct drm_i915_private *i915,
+			   resource_size_t start,
+			   resource_size_t size,
+			   resource_size_t min_page_size,
+			   resource_size_t io_start,
+			   const struct intel_memory_region_ops *ops);
+
+struct intel_memory_region *
+intel_memory_region_get(struct intel_memory_region *mem);
+void intel_memory_region_put(struct intel_memory_region *mem);
+
+int intel_memory_regions_hw_probe(struct drm_i915_private *i915);
+void intel_memory_regions_driver_release(struct drm_i915_private *i915);
+struct intel_memory_region *
+intel_memory_region_by_type(struct drm_i915_private *i915,
+			    enum intel_memory_type mem_type);
+
+__printf(2, 3) void
+intel_memory_region_set_name(struct intel_memory_region *mem,
+			     const char *fmt, ...);
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c
index 15f8bff141f9..4ed60e1f01db 100644
--- a/drivers/gpu/drm/i915/intel_pch.c
+++ b/drivers/gpu/drm/i915/intel_pch.c
@@ -12,81 +12,93 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
 {
 	switch (id) {
 	case INTEL_PCH_IBX_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Ibex Peak PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Ibex Peak PCH\n");
 		WARN_ON(!IS_GEN(dev_priv, 5));
 		return PCH_IBX;
 	case INTEL_PCH_CPT_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found CougarPoint PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found CougarPoint PCH\n");
 		WARN_ON(!IS_GEN(dev_priv, 6) && !IS_IVYBRIDGE(dev_priv));
 		return PCH_CPT;
 	case INTEL_PCH_PPT_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found PantherPoint PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found PantherPoint PCH\n");
 		WARN_ON(!IS_GEN(dev_priv, 6) && !IS_IVYBRIDGE(dev_priv));
 		/* PantherPoint is CPT compatible */
 		return PCH_CPT;
 	case INTEL_PCH_LPT_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found LynxPoint PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found LynxPoint PCH\n");
 		WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
 		WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv));
 		return PCH_LPT;
 	case INTEL_PCH_LPT_LP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found LynxPoint LP PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found LynxPoint LP PCH\n");
 		WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
 		WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv));
 		return PCH_LPT;
 	case INTEL_PCH_WPT_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found WildcatPoint PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint PCH\n");
 		WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
 		WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv));
 		/* WildcatPoint is LPT compatible */
 		return PCH_LPT;
 	case INTEL_PCH_WPT_LP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found WildcatPoint LP PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint LP PCH\n");
 		WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
 		WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv));
 		/* WildcatPoint is LPT compatible */
 		return PCH_LPT;
 	case INTEL_PCH_SPT_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found SunrisePoint PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint PCH\n");
 		WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv));
 		return PCH_SPT;
 	case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n");
-		WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv));
+		drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint LP PCH\n");
+		WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) &&
+			!IS_COFFEELAKE(dev_priv));
 		return PCH_SPT;
 	case INTEL_PCH_KBP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Kaby Lake PCH (KBP)\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Kaby Lake PCH (KBP)\n");
 		WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) &&
 			!IS_COFFEELAKE(dev_priv));
 		/* KBP is SPT compatible */
 		return PCH_SPT;
 	case INTEL_PCH_CNP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake PCH (CNP)\n");
 		WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
 		return PCH_CNP;
 	case INTEL_PCH_CNP_LP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Cannon Lake LP PCH (CNP-LP)\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Found Cannon Lake LP PCH (CNP-LP)\n");
 		WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
 		return PCH_CNP;
 	case INTEL_PCH_CMP_DEVICE_ID_TYPE:
 	case INTEL_PCH_CMP2_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Comet Lake PCH (CMP)\n");
 		WARN_ON(!IS_COFFEELAKE(dev_priv));
 		/* CometPoint is CNP Compatible */
 		return PCH_CNP;
+	case INTEL_PCH_CMP_V_DEVICE_ID_TYPE:
+		drm_dbg_kms(&dev_priv->drm, "Found Comet Lake V PCH (CMP-V)\n");
+		WARN_ON(!IS_COFFEELAKE(dev_priv));
+		/* Comet Lake V PCH is based on KBP, which is SPT compatible */
+		return PCH_SPT;
 	case INTEL_PCH_ICP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Ice Lake PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Ice Lake PCH\n");
 		WARN_ON(!IS_ICELAKE(dev_priv));
 		return PCH_ICP;
 	case INTEL_PCH_MCC_DEVICE_ID_TYPE:
-	case INTEL_PCH_MCC2_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Mule Creek Canyon PCH\n");
 		WARN_ON(!IS_ELKHARTLAKE(dev_priv));
 		return PCH_MCC;
 	case INTEL_PCH_TGP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Tiger Lake LP PCH\n");
+	case INTEL_PCH_TGP2_DEVICE_ID_TYPE:
+		drm_dbg_kms(&dev_priv->drm, "Found Tiger Lake LP PCH\n");
 		WARN_ON(!IS_TIGERLAKE(dev_priv));
 		return PCH_TGP;
+	case INTEL_PCH_JSP_DEVICE_ID_TYPE:
+	case INTEL_PCH_JSP2_DEVICE_ID_TYPE:
+		drm_dbg_kms(&dev_priv->drm, "Found Jasper Lake PCH\n");
+		WARN_ON(!IS_ELKHARTLAKE(dev_priv));
+		return PCH_JSP;
 	default:
 		return PCH_NONE;
 	}
@@ -134,9 +146,9 @@ intel_virt_detect_pch(const struct drm_i915_private *dev_priv)
 		id = INTEL_PCH_IBX_DEVICE_ID_TYPE;
 
 	if (id)
-		DRM_DEBUG_KMS("Assuming PCH ID %04x\n", id);
+		drm_dbg_kms(&dev_priv->drm, "Assuming PCH ID %04x\n", id);
 	else
-		DRM_DEBUG_KMS("Assuming no PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Assuming no PCH\n");
 
 	return id;
 }
@@ -190,13 +202,14 @@ void intel_detect_pch(struct drm_i915_private *dev_priv)
 	 * display.
 	 */
 	if (pch && !HAS_DISPLAY(dev_priv)) {
-		DRM_DEBUG_KMS("Display disabled, reverting to NOP PCH\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Display disabled, reverting to NOP PCH\n");
 		dev_priv->pch_type = PCH_NOP;
 		dev_priv->pch_id = 0;
 	}
 
 	if (!pch)
-		DRM_DEBUG_KMS("No PCH found.\n");
+		drm_dbg_kms(&dev_priv->drm, "No PCH found.\n");
 
 	pci_dev_put(pch);
 }
diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h
index c29c81ec7971..3053d1ce398b 100644
--- a/drivers/gpu/drm/i915/intel_pch.h
+++ b/drivers/gpu/drm/i915/intel_pch.h
@@ -23,6 +23,7 @@ enum intel_pch {
 	PCH_SPT,        /* Sunrisepoint/Kaby Lake PCH */
 	PCH_CNP,        /* Cannon/Comet Lake PCH */
 	PCH_ICP,	/* Ice Lake PCH */
+	PCH_JSP,	/* Jasper Lake PCH */
 	PCH_MCC,        /* Mule Creek Canyon PCH */
 	PCH_TGP,	/* Tiger Lake PCH */
 };
@@ -42,16 +43,20 @@ enum intel_pch {
 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE		0x9D80
 #define INTEL_PCH_CMP_DEVICE_ID_TYPE		0x0280
 #define INTEL_PCH_CMP2_DEVICE_ID_TYPE		0x0680
+#define INTEL_PCH_CMP_V_DEVICE_ID_TYPE		0xA380
 #define INTEL_PCH_ICP_DEVICE_ID_TYPE		0x3480
 #define INTEL_PCH_MCC_DEVICE_ID_TYPE		0x4B00
-#define INTEL_PCH_MCC2_DEVICE_ID_TYPE		0x3880
 #define INTEL_PCH_TGP_DEVICE_ID_TYPE		0xA080
+#define INTEL_PCH_TGP2_DEVICE_ID_TYPE		0x4380
+#define INTEL_PCH_JSP_DEVICE_ID_TYPE		0x4D80
+#define INTEL_PCH_JSP2_DEVICE_ID_TYPE		0x3880
 #define INTEL_PCH_P2X_DEVICE_ID_TYPE		0x7100
 #define INTEL_PCH_P3X_DEVICE_ID_TYPE		0x7000
 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE		0x2900 /* qemu q35 has 2918 */
 
 #define INTEL_PCH_TYPE(dev_priv)		((dev_priv)->pch_type)
 #define INTEL_PCH_ID(dev_priv)			((dev_priv)->pch_id)
+#define HAS_PCH_JSP(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_JSP)
 #define HAS_PCH_MCC(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_MCC)
 #define HAS_PCH_TGP(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_TGP)
 #define HAS_PCH_ICP(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_ICP)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 2efe1d12d5a9..bd2d30ecc030 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -25,7 +25,6 @@
  *
  */
 
-#include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 
@@ -38,6 +37,8 @@
 #include "display/intel_fbc.h"
 #include "display/intel_sprite.h"
 
+#include "gt/intel_llc.h"
+
 #include "i915_drv.h"
 #include "i915_irq.h"
 #include "i915_trace.h"
@@ -45,26 +46,6 @@
 #include "intel_sideband.h"
 #include "../../../platform/x86/intel_ips.h"
 
-/**
- * DOC: RC6
- *
- * RC6 is a special power stage which allows the GPU to enter an very
- * low-voltage mode when idle, using down to 0V while at this stage.  This
- * stage is entered automatically when the GPU is idle when RC6 support is
- * enabled, and as soon as new workload arises GPU wakes up automatically as well.
- *
- * There are different RC6 modes available in Intel GPU, which differentiate
- * among each other with the latency required to enter and leave RC6 and
- * voltage consumed by the GPU in different states.
- *
- * The combination of the following flags define which states GPU is allowed
- * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
- * RC6pp is deepest RC6. Their support by hardware varies according to the
- * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
- * which brings the most power savings; deeper states save more power, but
- * require higher latency to switch to and wake up.
- */
-
 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
 {
 	if (HAS_LLC(dev_priv)) {
@@ -159,7 +140,7 @@ static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
 
 }
 
-static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
+static void pnv_get_mem_freq(struct drm_i915_private *dev_priv)
 {
 	u32 tmp;
 
@@ -197,7 +178,7 @@ static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
 	dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
 }
 
-static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
+static void ilk_get_mem_freq(struct drm_i915_private *dev_priv)
 {
 	u16 ddrpll, csipll;
 
@@ -218,14 +199,12 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
 		dev_priv->mem_freq = 1600;
 		break;
 	default:
-		DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
-				 ddrpll & 0xff);
+		drm_dbg(&dev_priv->drm, "unknown memory frequency 0x%02x\n",
+			ddrpll & 0xff);
 		dev_priv->mem_freq = 0;
 		break;
 	}
 
-	dev_priv->ips.r_t = dev_priv->mem_freq;
-
 	switch (csipll & 0x3ff) {
 	case 0x00c:
 		dev_priv->fsb_freq = 3200;
@@ -249,19 +228,11 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
 		dev_priv->fsb_freq = 6400;
 		break;
 	default:
-		DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
-				 csipll & 0x3ff);
+		drm_dbg(&dev_priv->drm, "unknown fsb frequency 0x%04x\n",
+			csipll & 0x3ff);
 		dev_priv->fsb_freq = 0;
 		break;
 	}
-
-	if (dev_priv->fsb_freq == 3200) {
-		dev_priv->ips.c_m = 0;
-	} else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
-		dev_priv->ips.c_m = 1;
-	} else {
-		dev_priv->ips.c_m = 2;
-	}
 }
 
 static const struct cxsr_latency cxsr_latency_table[] = {
@@ -343,7 +314,8 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 
 	if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
 		      FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
-		DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
+		drm_err(&dev_priv->drm,
+			"timed out waiting for Punit DDR DVFS request\n");
 
 	vlv_punit_put(dev_priv);
 }
@@ -412,9 +384,9 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl
 
 	trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
 
-	DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
-		      enableddisabled(enable),
-		      enableddisabled(was_enabled));
+	drm_dbg_kms(&dev_priv->drm, "memory self-refresh is %s (was %s)\n",
+		    enableddisabled(enable),
+		    enableddisabled(was_enabled));
 
 	return was_enabled;
 }
@@ -492,7 +464,7 @@ static const int pessimal_latency_ns = 5000;
 
 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
 	enum pipe pipe = crtc->pipe;
@@ -539,8 +511,8 @@ static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
 	if (i9xx_plane == PLANE_B)
 		size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
 
-	DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
-		      dsparb, plane_name(i9xx_plane), size);
+	drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n",
+		    dsparb, plane_name(i9xx_plane), size);
 
 	return size;
 }
@@ -556,8 +528,8 @@ static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
 		size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
 	size >>= 1; /* Convert to cachelines */
 
-	DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
-		      dsparb, plane_name(i9xx_plane), size);
+	drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n",
+		    dsparb, plane_name(i9xx_plane), size);
 
 	return size;
 }
@@ -571,41 +543,45 @@ static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
 	size = dsparb & 0x7f;
 	size >>= 2; /* Convert to cachelines */
 
-	DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
-		      dsparb, plane_name(i9xx_plane), size);
+	drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n",
+		    dsparb, plane_name(i9xx_plane), size);
 
 	return size;
 }
 
 /* Pineview has different values for various configs */
-static const struct intel_watermark_params pineview_display_wm = {
+static const struct intel_watermark_params pnv_display_wm = {
 	.fifo_size = PINEVIEW_DISPLAY_FIFO,
 	.max_wm = PINEVIEW_MAX_WM,
 	.default_wm = PINEVIEW_DFT_WM,
 	.guard_size = PINEVIEW_GUARD_WM,
 	.cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 };
-static const struct intel_watermark_params pineview_display_hplloff_wm = {
+
+static const struct intel_watermark_params pnv_display_hplloff_wm = {
 	.fifo_size = PINEVIEW_DISPLAY_FIFO,
 	.max_wm = PINEVIEW_MAX_WM,
 	.default_wm = PINEVIEW_DFT_HPLLOFF_WM,
 	.guard_size = PINEVIEW_GUARD_WM,
 	.cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 };
-static const struct intel_watermark_params pineview_cursor_wm = {
+
+static const struct intel_watermark_params pnv_cursor_wm = {
 	.fifo_size = PINEVIEW_CURSOR_FIFO,
 	.max_wm = PINEVIEW_CURSOR_MAX_WM,
 	.default_wm = PINEVIEW_CURSOR_DFT_WM,
 	.guard_size = PINEVIEW_CURSOR_GUARD_WM,
 	.cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 };
-static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
+
+static const struct intel_watermark_params pnv_cursor_hplloff_wm = {
 	.fifo_size = PINEVIEW_CURSOR_FIFO,
 	.max_wm = PINEVIEW_CURSOR_MAX_WM,
 	.default_wm = PINEVIEW_CURSOR_DFT_WM,
 	.guard_size = PINEVIEW_CURSOR_GUARD_WM,
 	.cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i965_cursor_wm_info = {
 	.fifo_size = I965_CURSOR_FIFO,
 	.max_wm = I965_CURSOR_MAX_WM,
@@ -613,6 +589,7 @@ static const struct intel_watermark_params i965_cursor_wm_info = {
 	.guard_size = 2,
 	.cacheline_size = I915_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i945_wm_info = {
 	.fifo_size = I945_FIFO_SIZE,
 	.max_wm = I915_MAX_WM,
@@ -620,6 +597,7 @@ static const struct intel_watermark_params i945_wm_info = {
 	.guard_size = 2,
 	.cacheline_size = I915_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i915_wm_info = {
 	.fifo_size = I915_FIFO_SIZE,
 	.max_wm = I915_MAX_WM,
@@ -627,6 +605,7 @@ static const struct intel_watermark_params i915_wm_info = {
 	.guard_size = 2,
 	.cacheline_size = I915_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i830_a_wm_info = {
 	.fifo_size = I855GM_FIFO_SIZE,
 	.max_wm = I915_MAX_WM,
@@ -634,6 +613,7 @@ static const struct intel_watermark_params i830_a_wm_info = {
 	.guard_size = 2,
 	.cacheline_size = I830_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i830_bc_wm_info = {
 	.fifo_size = I855GM_FIFO_SIZE,
 	.max_wm = I915_MAX_WM/2,
@@ -641,6 +621,7 @@ static const struct intel_watermark_params i830_bc_wm_info = {
 	.guard_size = 2,
 	.cacheline_size = I830_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i845_wm_info = {
 	.fifo_size = I830_FIFO_SIZE,
 	.max_wm = I915_MAX_WM,
@@ -823,10 +804,10 @@ static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
 				   const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 
 	/* FIXME check the 'enable' instead */
-	if (!crtc_state->base.active)
+	if (!crtc_state->hw.active)
 		return false;
 
 	/*
@@ -838,9 +819,28 @@ static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
 	 * around this problem with the watermark code.
 	 */
 	if (plane->id == PLANE_CURSOR)
-		return plane_state->base.fb != NULL;
+		return plane_state->hw.fb != NULL;
 	else
-		return plane_state->base.visible;
+		return plane_state->uapi.visible;
+}
+
+static bool intel_crtc_active(struct intel_crtc *crtc)
+{
+	/* Be paranoid as we can arrive here with only partial
+	 * state retrieved from the hardware during setup.
+	 *
+	 * We can ditch the adjusted_mode.crtc_clock check as soon
+	 * as Haswell has gained clock readout/fastboot support.
+	 *
+	 * We can ditch the crtc->primary->state->fb check as soon as we can
+	 * properly reconstruct framebuffers.
+	 *
+	 * FIXME: The intel_crtc->active here should be switched to
+	 * crtc->state->active once we have proper CRTC states wired up
+	 * for atomic.
+	 */
+	return crtc->active && crtc->base.primary->state->fb &&
+		crtc->config->hw.adjusted_mode.crtc_clock;
 }
 
 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
@@ -858,7 +858,7 @@ static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
 	return enabled;
 }
 
-static void pineview_update_wm(struct intel_crtc *unused_crtc)
+static void pnv_update_wm(struct intel_crtc *unused_crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
 	struct intel_crtc *crtc;
@@ -871,7 +871,8 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 					 dev_priv->fsb_freq,
 					 dev_priv->mem_freq);
 	if (!latency) {
-		DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Unknown FSB/MEM found, disable CxSR\n");
 		intel_set_memory_cxsr(dev_priv, false);
 		return;
 	}
@@ -879,25 +880,25 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 	crtc = single_enabled_crtc(dev_priv);
 	if (crtc) {
 		const struct drm_display_mode *adjusted_mode =
-			&crtc->config->base.adjusted_mode;
+			&crtc->config->hw.adjusted_mode;
 		const struct drm_framebuffer *fb =
 			crtc->base.primary->state->fb;
 		int cpp = fb->format->cpp[0];
 		int clock = adjusted_mode->crtc_clock;
 
 		/* Display SR */
-		wm = intel_calculate_wm(clock, &pineview_display_wm,
-					pineview_display_wm.fifo_size,
+		wm = intel_calculate_wm(clock, &pnv_display_wm,
+					pnv_display_wm.fifo_size,
 					cpp, latency->display_sr);
 		reg = I915_READ(DSPFW1);
 		reg &= ~DSPFW_SR_MASK;
 		reg |= FW_WM(wm, SR);
 		I915_WRITE(DSPFW1, reg);
-		DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
+		drm_dbg_kms(&dev_priv->drm, "DSPFW1 register is %x\n", reg);
 
 		/* cursor SR */
-		wm = intel_calculate_wm(clock, &pineview_cursor_wm,
-					pineview_display_wm.fifo_size,
+		wm = intel_calculate_wm(clock, &pnv_cursor_wm,
+					pnv_display_wm.fifo_size,
 					4, latency->cursor_sr);
 		reg = I915_READ(DSPFW3);
 		reg &= ~DSPFW_CURSOR_SR_MASK;
@@ -905,8 +906,8 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 		I915_WRITE(DSPFW3, reg);
 
 		/* Display HPLL off SR */
-		wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
-					pineview_display_hplloff_wm.fifo_size,
+		wm = intel_calculate_wm(clock, &pnv_display_hplloff_wm,
+					pnv_display_hplloff_wm.fifo_size,
 					cpp, latency->display_hpll_disable);
 		reg = I915_READ(DSPFW3);
 		reg &= ~DSPFW_HPLL_SR_MASK;
@@ -914,14 +915,14 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 		I915_WRITE(DSPFW3, reg);
 
 		/* cursor HPLL off SR */
-		wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
-					pineview_display_hplloff_wm.fifo_size,
+		wm = intel_calculate_wm(clock, &pnv_cursor_hplloff_wm,
+					pnv_display_hplloff_wm.fifo_size,
 					4, latency->cursor_hpll_disable);
 		reg = I915_READ(DSPFW3);
 		reg &= ~DSPFW_HPLL_CURSOR_MASK;
 		reg |= FW_WM(wm, HPLL_CURSOR);
 		I915_WRITE(DSPFW3, reg);
-		DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
+		drm_dbg_kms(&dev_priv->drm, "DSPFW3 register is %x\n", reg);
 
 		intel_set_memory_cxsr(dev_priv, true);
 	} else {
@@ -1112,10 +1113,10 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state,
 			  const struct intel_plane_state *plane_state,
 			  int level)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
 	unsigned int clock, htotal, cpp, width, wm;
 
@@ -1125,7 +1126,7 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state,
 	if (!intel_wm_plane_visible(crtc_state, plane_state))
 		return 0;
 
-	cpp = plane_state->base.fb->format->cpp[0];
+	cpp = plane_state->hw.fb->format->cpp[0];
 
 	/*
 	 * Not 100% sure which way ELK should go here as the
@@ -1145,10 +1146,7 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state,
 	clock = adjusted_mode->crtc_clock;
 	htotal = adjusted_mode->crtc_htotal;
 
-	if (plane->id == PLANE_CURSOR)
-		width = plane_state->base.crtc_w;
-	else
-		width = drm_rect_width(&plane_state->base.dst);
+	width = drm_rect_width(&plane_state->uapi.dst);
 
 	if (plane->id == PLANE_CURSOR) {
 		wm = intel_wm_method2(clock, htotal, width, cpp, latency);
@@ -1175,7 +1173,7 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state,
 static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
 				 int level, enum plane_id plane_id, u16 value)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	bool dirty = false;
 
 	for (; level < intel_wm_num_levels(dev_priv); level++) {
@@ -1191,7 +1189,7 @@ static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
 			       int level, u16 value)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	bool dirty = false;
 
 	/* NORMAL level doesn't have an FBC watermark */
@@ -1214,7 +1212,8 @@ static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *crtc_state,
 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
 				     const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
 	enum plane_id plane_id = plane->id;
 	bool dirty = false;
@@ -1267,16 +1266,18 @@ static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
 
  out:
 	if (dirty) {
-		DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
-			      plane->base.name,
-			      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
-			      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
-			      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
+		drm_dbg_kms(&dev_priv->drm,
+			    "%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
+			    plane->base.name,
+			    crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
+			    crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
+			    crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
 
 		if (plane_id == PLANE_PRIMARY)
-			DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
-				      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
-				      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
+			drm_dbg_kms(&dev_priv->drm,
+				    "FBC watermarks: SR=%d, HPLL=%d\n",
+				    crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
+				    crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
 	}
 
 	return dirty;
@@ -1293,7 +1294,7 @@ static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
 				     int level)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
 	if (level > dev_priv->wm.max_level)
 		return false;
@@ -1331,12 +1332,12 @@ static void g4x_invalidate_wms(struct intel_crtc *crtc,
 
 static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct intel_atomic_state *state =
-		to_intel_atomic_state(crtc_state->base.state);
+		to_intel_atomic_state(crtc_state->uapi.state);
 	struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
-	int num_active_planes = hweight32(crtc_state->active_planes &
-					  ~BIT(PLANE_CURSOR));
+	int num_active_planes = hweight8(crtc_state->active_planes &
+					 ~BIT(PLANE_CURSOR));
 	const struct g4x_pipe_wm *raw;
 	const struct intel_plane_state *old_plane_state;
 	const struct intel_plane_state *new_plane_state;
@@ -1348,8 +1349,8 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 	for_each_oldnew_intel_plane_in_state(state, plane,
 					     old_plane_state,
 					     new_plane_state, i) {
-		if (new_plane_state->base.crtc != &crtc->base &&
-		    old_plane_state->base.crtc != &crtc->base)
+		if (new_plane_state->hw.crtc != &crtc->base &&
+		    old_plane_state->hw.crtc != &crtc->base)
 			continue;
 
 		if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
@@ -1420,17 +1421,17 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 
 static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
 	const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
 	struct intel_atomic_state *intel_state =
-		to_intel_atomic_state(new_crtc_state->base.state);
+		to_intel_atomic_state(new_crtc_state->uapi.state);
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(intel_state, crtc);
 	const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
 	enum plane_id plane_id;
 
-	if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
+	if (!new_crtc_state->hw.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) {
 		*intermediate = *optimal;
 
 		intermediate->cxsr = false;
@@ -1498,7 +1499,7 @@ static void g4x_merge_wm(struct drm_i915_private *dev_priv,
 			 struct g4x_wm_values *wm)
 {
 	struct intel_crtc *crtc;
-	int num_active_crtcs = 0;
+	int num_active_pipes = 0;
 
 	wm->cxsr = true;
 	wm->hpll_en = true;
@@ -1517,10 +1518,10 @@ static void g4x_merge_wm(struct drm_i915_private *dev_priv,
 		if (!wm_state->fbc_en)
 			wm->fbc_en = false;
 
-		num_active_crtcs++;
+		num_active_pipes++;
 	}
 
-	if (num_active_crtcs != 1) {
+	if (num_active_pipes != 1) {
 		wm->cxsr = false;
 		wm->hpll_en = false;
 		wm->fbc_en = false;
@@ -1560,10 +1561,11 @@ static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
 }
 
 static void g4x_initial_watermarks(struct intel_atomic_state *state,
-				   struct intel_crtc_state *crtc_state)
+				   struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 
 	mutex_lock(&dev_priv->wm.wm_mutex);
 	crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
@@ -1572,10 +1574,11 @@ static void g4x_initial_watermarks(struct intel_atomic_state *state,
 }
 
 static void g4x_optimize_watermarks(struct intel_atomic_state *state,
-				    struct intel_crtc_state *crtc_state)
+				    struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 
 	if (!crtc_state->wm.need_postvbl_update)
 		return;
@@ -1621,10 +1624,10 @@ static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
 				const struct intel_plane_state *plane_state,
 				int level)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	unsigned int clock, htotal, cpp, width, wm;
 
 	if (dev_priv->wm.pri_latency[level] == 0)
@@ -1633,7 +1636,7 @@ static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
 	if (!intel_wm_plane_visible(crtc_state, plane_state))
 		return 0;
 
-	cpp = plane_state->base.fb->format->cpp[0];
+	cpp = plane_state->hw.fb->format->cpp[0];
 	clock = adjusted_mode->crtc_clock;
 	htotal = adjusted_mode->crtc_htotal;
 	width = crtc_state->pipe_src_w;
@@ -1662,12 +1665,12 @@ static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
 
 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	const struct g4x_pipe_wm *raw =
 		&crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
 	struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
 	unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
-	int num_active_planes = hweight32(active_planes);
+	int num_active_planes = hweight8(active_planes);
 	const int fifo_size = 511;
 	int fifo_extra, fifo_left = fifo_size;
 	int sprite0_fifo_extra = 0;
@@ -1774,7 +1777,7 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
 				 int level, enum plane_id plane_id, u16 value)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	int num_levels = intel_wm_num_levels(dev_priv);
 	bool dirty = false;
 
@@ -1791,7 +1794,8 @@ static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
 				     const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	enum plane_id plane_id = plane->id;
 	int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
 	int level;
@@ -1819,11 +1823,12 @@ static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
 
 out:
 	if (dirty)
-		DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
-			      plane->base.name,
-			      crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
-			      crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
-			      crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
+		drm_dbg_kms(&dev_priv->drm,
+			    "%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
+			    plane->base.name,
+			    crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
+			    crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
+			    crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
 
 	return dirty;
 }
@@ -1849,16 +1854,16 @@ static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
 
 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_atomic_state *state =
-		to_intel_atomic_state(crtc_state->base.state);
+		to_intel_atomic_state(crtc_state->uapi.state);
 	struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
 	const struct vlv_fifo_state *fifo_state =
 		&crtc_state->wm.vlv.fifo_state;
-	int num_active_planes = hweight32(crtc_state->active_planes &
-					  ~BIT(PLANE_CURSOR));
-	bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
+	int num_active_planes = hweight8(crtc_state->active_planes &
+					 ~BIT(PLANE_CURSOR));
+	bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->uapi);
 	const struct intel_plane_state *old_plane_state;
 	const struct intel_plane_state *new_plane_state;
 	struct intel_plane *plane;
@@ -1869,8 +1874,8 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 	for_each_oldnew_intel_plane_in_state(state, plane,
 					     old_plane_state,
 					     new_plane_state, i) {
-		if (new_plane_state->base.crtc != &crtc->base &&
-		    old_plane_state->base.crtc != &crtc->base)
+		if (new_plane_state->hw.crtc != &crtc->base &&
+		    old_plane_state->hw.crtc != &crtc->base)
 			continue;
 
 		if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
@@ -1917,7 +1922,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 
 	for (level = 0; level < wm_state->num_levels; level++) {
 		const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
-		const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
+		const int sr_fifo_size = INTEL_NUM_PIPES(dev_priv) * 512 - 1;
 
 		if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
 			break;
@@ -1955,11 +1960,12 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 	(((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
 
 static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
-				   struct intel_crtc_state *crtc_state)
+				   struct intel_crtc *crtc)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_uncore *uncore = &dev_priv->uncore;
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 	const struct vlv_fifo_state *fifo_state =
 		&crtc_state->wm.vlv.fifo_state;
 	int sprite0_start, sprite1_start, fifo_size;
@@ -2053,17 +2059,17 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
 
 static int vlv_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
 	const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
 	struct intel_atomic_state *intel_state =
-		to_intel_atomic_state(new_crtc_state->base.state);
+		to_intel_atomic_state(new_crtc_state->uapi.state);
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(intel_state, crtc);
 	const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
 	int level;
 
-	if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
+	if (!new_crtc_state->hw.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) {
 		*intermediate = *optimal;
 
 		intermediate->cxsr = false;
@@ -2106,7 +2112,7 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv,
 			 struct vlv_wm_values *wm)
 {
 	struct intel_crtc *crtc;
-	int num_active_crtcs = 0;
+	int num_active_pipes = 0;
 
 	wm->level = dev_priv->wm.max_level;
 	wm->cxsr = true;
@@ -2120,14 +2126,14 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv,
 		if (!wm_state->cxsr)
 			wm->cxsr = false;
 
-		num_active_crtcs++;
+		num_active_pipes++;
 		wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
 	}
 
-	if (num_active_crtcs != 1)
+	if (num_active_pipes != 1)
 		wm->cxsr = false;
 
-	if (num_active_crtcs > 1)
+	if (num_active_pipes > 1)
 		wm->level = VLV_WM_LEVEL_PM2;
 
 	for_each_intel_crtc(&dev_priv->drm, crtc) {
@@ -2179,10 +2185,11 @@ static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
 }
 
 static void vlv_initial_watermarks(struct intel_atomic_state *state,
-				   struct intel_crtc_state *crtc_state)
+				   struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 
 	mutex_lock(&dev_priv->wm.wm_mutex);
 	crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
@@ -2191,10 +2198,11 @@ static void vlv_initial_watermarks(struct intel_atomic_state *state,
 }
 
 static void vlv_optimize_watermarks(struct intel_atomic_state *state,
-				    struct intel_crtc_state *crtc_state)
+				    struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 
 	if (!crtc_state->wm.need_postvbl_update)
 		return;
@@ -2219,7 +2227,7 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
 		/* self-refresh has much higher latency */
 		static const int sr_latency_ns = 12000;
 		const struct drm_display_mode *adjusted_mode =
-			&crtc->config->base.adjusted_mode;
+			&crtc->config->hw.adjusted_mode;
 		const struct drm_framebuffer *fb =
 			crtc->base.primary->state->fb;
 		int clock = adjusted_mode->crtc_clock;
@@ -2235,8 +2243,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
 		if (srwm < 0)
 			srwm = 1;
 		srwm &= 0x1ff;
-		DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
-			      entries, srwm);
+		drm_dbg_kms(&dev_priv->drm,
+			    "self-refresh entries: %d, wm: %d\n",
+			    entries, srwm);
 
 		entries = intel_wm_method2(clock, htotal,
 					   crtc->base.cursor->state->crtc_w, 4,
@@ -2249,8 +2258,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
 		if (cursor_sr > i965_cursor_wm_info.max_wm)
 			cursor_sr = i965_cursor_wm_info.max_wm;
 
-		DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
-			      "cursor %d\n", srwm, cursor_sr);
+		drm_dbg_kms(&dev_priv->drm,
+			    "self-refresh watermark: display plane %d "
+			    "cursor %d\n", srwm, cursor_sr);
 
 		cxsr_enabled = true;
 	} else {
@@ -2259,8 +2269,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
 		intel_set_memory_cxsr(dev_priv, false);
 	}
 
-	DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
-		      srwm);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
+		    srwm);
 
 	/* 965 has limitations... */
 	I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
@@ -2300,7 +2311,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 	crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
 	if (intel_crtc_active(crtc)) {
 		const struct drm_display_mode *adjusted_mode =
-			&crtc->config->base.adjusted_mode;
+			&crtc->config->hw.adjusted_mode;
 		const struct drm_framebuffer *fb =
 			crtc->base.primary->state->fb;
 		int cpp;
@@ -2327,7 +2338,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 	crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
 	if (intel_crtc_active(crtc)) {
 		const struct drm_display_mode *adjusted_mode =
-			&crtc->config->base.adjusted_mode;
+			&crtc->config->hw.adjusted_mode;
 		const struct drm_framebuffer *fb =
 			crtc->base.primary->state->fb;
 		int cpp;
@@ -2350,7 +2361,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 			planeb_wm = wm_info->max_wm;
 	}
 
-	DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
+	drm_dbg_kms(&dev_priv->drm,
+		    "FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
 
 	if (IS_I915GM(dev_priv) && enabled) {
 		struct drm_i915_gem_object *obj;
@@ -2375,7 +2387,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 		/* self-refresh has much higher latency */
 		static const int sr_latency_ns = 6000;
 		const struct drm_display_mode *adjusted_mode =
-			&enabled->config->base.adjusted_mode;
+			&enabled->config->hw.adjusted_mode;
 		const struct drm_framebuffer *fb =
 			enabled->base.primary->state->fb;
 		int clock = adjusted_mode->crtc_clock;
@@ -2392,7 +2404,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 		entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
 					   sr_latency_ns / 100);
 		entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
-		DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
+		drm_dbg_kms(&dev_priv->drm,
+			    "self-refresh entries: %d\n", entries);
 		srwm = wm_info->fifo_size - entries;
 		if (srwm < 0)
 			srwm = 1;
@@ -2404,8 +2417,9 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 			I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
 	}
 
-	DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
-		      planea_wm, planeb_wm, cwm, srwm);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
+		     planea_wm, planeb_wm, cwm, srwm);
 
 	fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
 	fwater_hi = (cwm & 0x1f);
@@ -2433,7 +2447,7 @@ static void i845_update_wm(struct intel_crtc *unused_crtc)
 	if (crtc == NULL)
 		return;
 
-	adjusted_mode = &crtc->config->base.adjusted_mode;
+	adjusted_mode = &crtc->config->hw.adjusted_mode;
 	planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
 				       &i845_wm_info,
 				       dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
@@ -2441,7 +2455,8 @@ static void i845_update_wm(struct intel_crtc *unused_crtc)
 	fwater_lo = I915_READ(FW_BLC) & ~0xfff;
 	fwater_lo |= (3<<8) | planea_wm;
 
-	DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Setting FIFO watermarks - A: %d\n", planea_wm);
 
 	I915_WRITE(FW_BLC, fwater_lo);
 }
@@ -2515,7 +2530,7 @@ static u32 ilk_compute_pri_wm(const struct intel_crtc_state *crtc_state,
 	if (!intel_wm_plane_visible(crtc_state, plane_state))
 		return 0;
 
-	cpp = plane_state->base.fb->format->cpp[0];
+	cpp = plane_state->hw.fb->format->cpp[0];
 
 	method1 = ilk_wm_method1(crtc_state->pixel_rate, cpp, mem_value);
 
@@ -2523,8 +2538,8 @@ static u32 ilk_compute_pri_wm(const struct intel_crtc_state *crtc_state,
 		return method1;
 
 	method2 = ilk_wm_method2(crtc_state->pixel_rate,
-				 crtc_state->base.adjusted_mode.crtc_htotal,
-				 drm_rect_width(&plane_state->base.dst),
+				 crtc_state->hw.adjusted_mode.crtc_htotal,
+				 drm_rect_width(&plane_state->uapi.dst),
 				 cpp, mem_value);
 
 	return min(method1, method2);
@@ -2547,12 +2562,12 @@ static u32 ilk_compute_spr_wm(const struct intel_crtc_state *crtc_state,
 	if (!intel_wm_plane_visible(crtc_state, plane_state))
 		return 0;
 
-	cpp = plane_state->base.fb->format->cpp[0];
+	cpp = plane_state->hw.fb->format->cpp[0];
 
 	method1 = ilk_wm_method1(crtc_state->pixel_rate, cpp, mem_value);
 	method2 = ilk_wm_method2(crtc_state->pixel_rate,
-				 crtc_state->base.adjusted_mode.crtc_htotal,
-				 drm_rect_width(&plane_state->base.dst),
+				 crtc_state->hw.adjusted_mode.crtc_htotal,
+				 drm_rect_width(&plane_state->uapi.dst),
 				 cpp, mem_value);
 	return min(method1, method2);
 }
@@ -2573,11 +2588,12 @@ static u32 ilk_compute_cur_wm(const struct intel_crtc_state *crtc_state,
 	if (!intel_wm_plane_visible(crtc_state, plane_state))
 		return 0;
 
-	cpp = plane_state->base.fb->format->cpp[0];
+	cpp = plane_state->hw.fb->format->cpp[0];
 
 	return ilk_wm_method2(crtc_state->pixel_rate,
-			      crtc_state->base.adjusted_mode.crtc_htotal,
-			      plane_state->base.crtc_w, cpp, mem_value);
+			      crtc_state->hw.adjusted_mode.crtc_htotal,
+			      drm_rect_width(&plane_state->uapi.dst),
+			      cpp, mem_value);
 }
 
 /* Only for WM_LP. */
@@ -2590,9 +2606,10 @@ static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *crtc_state,
 	if (!intel_wm_plane_visible(crtc_state, plane_state))
 		return 0;
 
-	cpp = plane_state->base.fb->format->cpp[0];
+	cpp = plane_state->hw.fb->format->cpp[0];
 
-	return ilk_wm_fbc(pri_val, drm_rect_width(&plane_state->base.dst), cpp);
+	return ilk_wm_fbc(pri_val, drm_rect_width(&plane_state->uapi.dst),
+			  cpp);
 }
 
 static unsigned int
@@ -2656,7 +2673,7 @@ static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv,
 
 	/* HSW allows LP1+ watermarks even with multiple pipes */
 	if (level == 0 || config->num_pipes_active > 1) {
-		fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
+		fifo_size /= INTEL_NUM_PIPES(dev_priv);
 
 		/*
 		 * For some reason the non self refresh
@@ -2797,12 +2814,12 @@ static u32
 hsw_compute_linetime_wm(const struct intel_crtc_state *crtc_state)
 {
 	const struct intel_atomic_state *intel_state =
-		to_intel_atomic_state(crtc_state->base.state);
+		to_intel_atomic_state(crtc_state->uapi.state);
 	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->base.adjusted_mode;
+		&crtc_state->hw.adjusted_mode;
 	u32 linetime, ips_linetime;
 
-	if (!crtc_state->base.active)
+	if (!crtc_state->hw.active)
 		return 0;
 	if (WARN_ON(adjusted_mode->crtc_clock == 0))
 		return 0;
@@ -2838,7 +2855,8 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 					     &val, NULL);
 
 		if (ret) {
-			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
+			drm_err(&dev_priv->drm,
+				"SKL Mailbox read error = %d\n", ret);
 			return;
 		}
 
@@ -2856,7 +2874,8 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 					     GEN9_PCODE_READ_MEM_LATENCY,
 					     &val, NULL);
 		if (ret) {
-			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
+			drm_err(&dev_priv->drm,
+				"SKL Mailbox read error = %d\n", ret);
 			return;
 		}
 
@@ -2974,8 +2993,9 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
 		unsigned int latency = wm[level];
 
 		if (latency == 0) {
-			DRM_DEBUG_KMS("%s WM%d latency not provided\n",
-				      name, level);
+			drm_dbg_kms(&dev_priv->drm,
+				    "%s WM%d latency not provided\n",
+				    name, level);
 			continue;
 		}
 
@@ -2988,9 +3008,9 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
 		else if (level > 0)
 			latency *= 5;
 
-		DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
-			      name, level, wm[level],
-			      latency / 10, latency % 10);
+		drm_dbg_kms(&dev_priv->drm,
+			    "%s WM%d latency %u (%u.%u usec)\n", name, level,
+			    wm[level], latency / 10, latency % 10);
 	}
 }
 
@@ -3024,7 +3044,8 @@ static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
 	if (!changed)
 		return;
 
-	DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
+	drm_dbg_kms(&dev_priv->drm,
+		    "WM latency values increased to avoid potential underruns\n");
 	intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
 	intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
 	intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
@@ -3052,7 +3073,8 @@ static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
 	dev_priv->wm.spr_latency[3] = 0;
 	dev_priv->wm.cur_latency[3] = 0;
 
-	DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
+	drm_dbg_kms(&dev_priv->drm,
+		    "LP3 watermarks disabled due to potential for lost interrupts\n");
 	intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
 	intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
 	intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
@@ -3102,7 +3124,7 @@ static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv,
 
 	/* At least LP0 must be valid */
 	if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
-		DRM_DEBUG_KMS("LP0 watermark invalid\n");
+		drm_dbg_kms(&dev_priv->drm, "LP0 watermark invalid\n");
 		return false;
 	}
 
@@ -3112,13 +3134,11 @@ static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv,
 /* Compute new watermarks for the pipe */
 static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 {
-	struct drm_atomic_state *state = crtc_state->base.state;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct intel_pipe_wm *pipe_wm;
-	struct drm_device *dev = state->dev;
-	const struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_plane *plane;
-	const struct drm_plane_state *plane_state;
+	struct intel_plane *plane;
+	const struct intel_plane_state *plane_state;
 	const struct intel_plane_state *pristate = NULL;
 	const struct intel_plane_state *sprstate = NULL;
 	const struct intel_plane_state *curstate = NULL;
@@ -3127,23 +3147,21 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 
 	pipe_wm = &crtc_state->wm.ilk.optimal;
 
-	drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &crtc_state->base) {
-		const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
-
-		if (plane->type == DRM_PLANE_TYPE_PRIMARY)
-			pristate = ps;
-		else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
-			sprstate = ps;
-		else if (plane->type == DRM_PLANE_TYPE_CURSOR)
-			curstate = ps;
+	intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
+		if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
+			pristate = plane_state;
+		else if (plane->base.type == DRM_PLANE_TYPE_OVERLAY)
+			sprstate = plane_state;
+		else if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
+			curstate = plane_state;
 	}
 
-	pipe_wm->pipe_enabled = crtc_state->base.active;
+	pipe_wm->pipe_enabled = crtc_state->hw.active;
 	if (sprstate) {
-		pipe_wm->sprites_enabled = sprstate->base.visible;
-		pipe_wm->sprites_scaled = sprstate->base.visible &&
-			(drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
-			 drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
+		pipe_wm->sprites_enabled = sprstate->uapi.visible;
+		pipe_wm->sprites_scaled = sprstate->uapi.visible &&
+			(drm_rect_width(&sprstate->uapi.dst) != drm_rect_width(&sprstate->uapi.src) >> 16 ||
+			 drm_rect_height(&sprstate->uapi.dst) != drm_rect_height(&sprstate->uapi.src) >> 16);
 	}
 
 	usable_level = max_level;
@@ -3195,11 +3213,11 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state)
  */
 static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(newstate->base.crtc);
+	struct intel_crtc *intel_crtc = to_intel_crtc(newstate->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
 	struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
 	struct intel_atomic_state *intel_state =
-		to_intel_atomic_state(newstate->base.state);
+		to_intel_atomic_state(newstate->uapi.state);
 	const struct intel_crtc_state *oldstate =
 		intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
 	const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
@@ -3211,7 +3229,7 @@ static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate)
 	 * and after the vblank.
 	 */
 	*a = newstate->wm.ilk.optimal;
-	if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base) ||
+	if (!newstate->hw.active || drm_atomic_crtc_needs_modeset(&newstate->uapi) ||
 	    intel_state->skip_intermediate_wm)
 		return 0;
 
@@ -3621,10 +3639,8 @@ static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
 	dev_priv->wm.hw = *results;
 }
 
-bool ilk_disable_lp_wm(struct drm_device *dev)
+bool ilk_disable_lp_wm(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
 	return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
 }
 
@@ -3662,10 +3678,47 @@ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)
 static bool
 intel_has_sagv(struct drm_i915_private *dev_priv)
 {
+	/* HACK! */
+	if (IS_GEN(dev_priv, 12))
+		return false;
+
 	return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) &&
 		dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED;
 }
 
+static void
+skl_setup_sagv_block_time(struct drm_i915_private *dev_priv)
+{
+	if (INTEL_GEN(dev_priv) >= 12) {
+		u32 val = 0;
+		int ret;
+
+		ret = sandybridge_pcode_read(dev_priv,
+					     GEN12_PCODE_READ_SAGV_BLOCK_TIME_US,
+					     &val, NULL);
+		if (!ret) {
+			dev_priv->sagv_block_time_us = val;
+			return;
+		}
+
+		drm_dbg(&dev_priv->drm, "Couldn't read SAGV block time!\n");
+	} else if (IS_GEN(dev_priv, 11)) {
+		dev_priv->sagv_block_time_us = 10;
+		return;
+	} else if (IS_GEN(dev_priv, 10)) {
+		dev_priv->sagv_block_time_us = 20;
+		return;
+	} else if (IS_GEN(dev_priv, 9)) {
+		dev_priv->sagv_block_time_us = 30;
+		return;
+	} else {
+		MISSING_CASE(INTEL_GEN(dev_priv));
+	}
+
+	/* Default to an unusable block time */
+	dev_priv->sagv_block_time_us = -1;
+}
+
 /*
  * SAGV dynamically adjusts the system agent voltage and clock frequencies
  * depending on power and performance requirements. The display engine access
@@ -3688,7 +3741,7 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)
 	if (dev_priv->sagv_status == I915_SAGV_ENABLED)
 		return 0;
 
-	DRM_DEBUG_KMS("Enabling SAGV\n");
+	drm_dbg_kms(&dev_priv->drm, "Enabling SAGV\n");
 	ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				      GEN9_SAGV_ENABLE);
 
@@ -3699,11 +3752,11 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)
 	 * don't actually have SAGV.
 	 */
 	if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
-		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
+		drm_dbg(&dev_priv->drm, "No SAGV found on system, ignoring\n");
 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
 		return 0;
 	} else if (ret < 0) {
-		DRM_ERROR("Failed to enable SAGV\n");
+		drm_err(&dev_priv->drm, "Failed to enable SAGV\n");
 		return ret;
 	}
 
@@ -3722,7 +3775,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 	if (dev_priv->sagv_status == I915_SAGV_DISABLED)
 		return 0;
 
-	DRM_DEBUG_KMS("Disabling SAGV\n");
+	drm_dbg_kms(&dev_priv->drm, "Disabling SAGV\n");
 	/* bspec says to keep retrying for at least 1 ms */
 	ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				GEN9_SAGV_DISABLE,
@@ -3733,11 +3786,11 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 	 * don't actually have SAGV.
 	 */
 	if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
-		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
+		drm_dbg(&dev_priv->drm, "No SAGV found on system, ignoring\n");
 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
 		return 0;
 	} else if (ret < 0) {
-		DRM_ERROR("Failed to disable SAGV (%d)\n", ret);
+		drm_err(&dev_priv->drm, "Failed to disable SAGV (%d)\n", ret);
 		return ret;
 	}
 
@@ -3754,37 +3807,29 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state)
 	struct intel_crtc_state *crtc_state;
 	enum pipe pipe;
 	int level, latency;
-	int sagv_block_time_us;
 
 	if (!intel_has_sagv(dev_priv))
 		return false;
 
-	if (IS_GEN(dev_priv, 9))
-		sagv_block_time_us = 30;
-	else if (IS_GEN(dev_priv, 10))
-		sagv_block_time_us = 20;
-	else
-		sagv_block_time_us = 10;
-
 	/*
 	 * If there are no active CRTCs, no additional checks need be performed
 	 */
-	if (hweight32(state->active_crtcs) == 0)
+	if (hweight8(state->active_pipes) == 0)
 		return true;
 
 	/*
 	 * SKL+ workaround: bspec recommends we disable SAGV when we have
 	 * more then one pipe enabled
 	 */
-	if (hweight32(state->active_crtcs) > 1)
+	if (hweight8(state->active_pipes) > 1)
 		return false;
 
 	/* Since we're now guaranteed to only have one active CRTC... */
-	pipe = ffs(state->active_crtcs) - 1;
+	pipe = ffs(state->active_pipes) - 1;
 	crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
 	crtc_state = to_intel_crtc_state(crtc->base.state);
 
-	if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
+	if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
 		return false;
 
 	for_each_intel_plane_on_crtc(dev, crtc, plane) {
@@ -3812,7 +3857,7 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state)
 		 * incur memory latencies higher than sagv_block_time_us we
 		 * can't enable SAGV.
 		 */
-		if (latency < sagv_block_time_us)
+		if (latency < dev_priv->sagv_block_time_us)
 			return false;
 	}
 
@@ -3834,7 +3879,7 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
 	if (INTEL_GEN(dev_priv) < 11)
 		return ddb_size - 4; /* 4 blocks for bypass path allocation */
 
-	adjusted_mode = &crtc_state->base.adjusted_mode;
+	adjusted_mode = &crtc_state->hw.adjusted_mode;
 	total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
 
 	/*
@@ -3863,26 +3908,26 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
 				   struct skl_ddb_entry *alloc, /* out */
 				   int *num_active /* out */)
 {
-	struct drm_atomic_state *state = crtc_state->base.state;
+	struct drm_atomic_state *state = crtc_state->uapi.state;
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
-	struct drm_crtc *for_crtc = crtc_state->base.crtc;
+	struct drm_crtc *for_crtc = crtc_state->uapi.crtc;
 	const struct intel_crtc *crtc;
 	u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
 	enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
 	u16 ddb_size;
 	u32 i;
 
-	if (WARN_ON(!state) || !crtc_state->base.active) {
+	if (WARN_ON(!state) || !crtc_state->hw.active) {
 		alloc->start = 0;
 		alloc->end = 0;
-		*num_active = hweight32(dev_priv->active_crtcs);
+		*num_active = hweight8(dev_priv->active_pipes);
 		return;
 	}
 
 	if (intel_state->active_pipe_changes)
-		*num_active = hweight32(intel_state->active_crtcs);
+		*num_active = hweight8(intel_state->active_pipes);
 	else
-		*num_active = hweight32(dev_priv->active_crtcs);
+		*num_active = hweight8(dev_priv->active_pipes);
 
 	ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate,
 				      *num_active, ddb);
@@ -3911,11 +3956,11 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
 	 */
 	for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
 		const struct drm_display_mode *adjusted_mode =
-			&crtc_state->base.adjusted_mode;
+			&crtc_state->hw.adjusted_mode;
 		enum pipe pipe = crtc->pipe;
 		int hdisplay, vdisplay;
 
-		if (!crtc_state->base.enable)
+		if (!crtc_state->hw.enable)
 			continue;
 
 		drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
@@ -3946,7 +3991,7 @@ static unsigned int
 skl_cursor_allocation(const struct intel_crtc_state *crtc_state,
 		      int num_active)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	int level, max_level = ilk_wm_max_level(dev_priv);
 	struct skl_wm_level wm = {};
 	int ret, min_ddb_alloc = 0;
@@ -4013,7 +4058,8 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
 		val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
 		val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
 
-		if (is_planar_yuv_format(fourcc))
+		if (fourcc &&
+		    drm_format_info_is_yuv_semiplanar(drm_format_info(fourcc)))
 			swap(val, val2);
 
 		skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
@@ -4071,7 +4117,6 @@ static uint_fixed_16_16_t
 skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state,
 			   const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
 	u32 src_w, src_h, dst_w, dst_h;
 	uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
 	uint_fixed_16_16_t downscale_h, downscale_w;
@@ -4079,27 +4124,17 @@ skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state,
 	if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state)))
 		return u32_to_fixed16(0);
 
-	/* n.b., src is 16.16 fixed point, dst is whole integer */
-	if (plane->id == PLANE_CURSOR) {
-		/*
-		 * Cursors only support 0/180 degree rotation,
-		 * hence no need to account for rotation here.
-		 */
-		src_w = plane_state->base.src_w >> 16;
-		src_h = plane_state->base.src_h >> 16;
-		dst_w = plane_state->base.crtc_w;
-		dst_h = plane_state->base.crtc_h;
-	} else {
-		/*
-		 * Src coordinates are already rotated by 270 degrees for
-		 * the 90/270 degree plane rotation cases (to match the
-		 * GTT mapping), hence no need to account for rotation here.
-		 */
-		src_w = drm_rect_width(&plane_state->base.src) >> 16;
-		src_h = drm_rect_height(&plane_state->base.src) >> 16;
-		dst_w = drm_rect_width(&plane_state->base.dst);
-		dst_h = drm_rect_height(&plane_state->base.dst);
-	}
+	/*
+	 * Src coordinates are already rotated by 270 degrees for
+	 * the 90/270 degree plane rotation cases (to match the
+	 * GTT mapping), hence no need to account for rotation here.
+	 *
+	 * n.b., src is 16.16 fixed point, dst is whole integer.
+	 */
+	src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
+	dst_w = drm_rect_width(&plane_state->uapi.dst);
+	dst_h = drm_rect_height(&plane_state->uapi.dst);
 
 	fp_w_ratio = div_fixed16(src_w, dst_w);
 	fp_h_ratio = div_fixed16(src_h, dst_h);
@@ -4109,117 +4144,26 @@ skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state,
 	return mul_fixed16(downscale_w, downscale_h);
 }
 
-static uint_fixed_16_16_t
-skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
-{
-	uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
-
-	if (!crtc_state->base.enable)
-		return pipe_downscale;
-
-	if (crtc_state->pch_pfit.enabled) {
-		u32 src_w, src_h, dst_w, dst_h;
-		u32 pfit_size = crtc_state->pch_pfit.size;
-		uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
-		uint_fixed_16_16_t downscale_h, downscale_w;
-
-		src_w = crtc_state->pipe_src_w;
-		src_h = crtc_state->pipe_src_h;
-		dst_w = pfit_size >> 16;
-		dst_h = pfit_size & 0xffff;
-
-		if (!dst_w || !dst_h)
-			return pipe_downscale;
-
-		fp_w_ratio = div_fixed16(src_w, dst_w);
-		fp_h_ratio = div_fixed16(src_h, dst_h);
-		downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
-		downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
-
-		pipe_downscale = mul_fixed16(downscale_w, downscale_h);
-	}
-
-	return pipe_downscale;
-}
-
-int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
-				  struct intel_crtc_state *crtc_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
-	struct drm_atomic_state *state = crtc_state->base.state;
-	struct drm_plane *plane;
-	const struct drm_plane_state *drm_plane_state;
-	int crtc_clock, dotclk;
-	u32 pipe_max_pixel_rate;
-	uint_fixed_16_16_t pipe_downscale;
-	uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
-
-	if (!crtc_state->base.enable)
-		return 0;
-
-	drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) {
-		uint_fixed_16_16_t plane_downscale;
-		uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
-		int bpp;
-		const struct intel_plane_state *plane_state =
-			to_intel_plane_state(drm_plane_state);
-
-		if (!intel_wm_plane_visible(crtc_state, plane_state))
-			continue;
-
-		if (WARN_ON(!plane_state->base.fb))
-			return -EINVAL;
-
-		plane_downscale = skl_plane_downscale_amount(crtc_state, plane_state);
-		bpp = plane_state->base.fb->format->cpp[0] * 8;
-		if (bpp == 64)
-			plane_downscale = mul_fixed16(plane_downscale,
-						      fp_9_div_8);
-
-		max_downscale = max_fixed16(plane_downscale, max_downscale);
-	}
-	pipe_downscale = skl_pipe_downscale_amount(crtc_state);
-
-	pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
-
-	crtc_clock = crtc_state->base.adjusted_mode.crtc_clock;
-	dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
-
-	if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
-		dotclk *= 2;
-
-	pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
-
-	if (pipe_max_pixel_rate < crtc_clock) {
-		DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static u64
 skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state,
 			     const struct intel_plane_state *plane_state,
-			     const int plane)
+			     int color_plane)
 {
-	struct intel_plane *intel_plane = to_intel_plane(plane_state->base.plane);
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	u32 data_rate;
 	u32 width = 0, height = 0;
-	struct drm_framebuffer *fb;
-	u32 format;
 	uint_fixed_16_16_t down_scale_amount;
 	u64 rate;
 
-	if (!plane_state->base.visible)
+	if (!plane_state->uapi.visible)
 		return 0;
 
-	fb = plane_state->base.fb;
-	format = fb->format->format;
-
-	if (intel_plane->id == PLANE_CURSOR)
+	if (plane->id == PLANE_CURSOR)
 		return 0;
-	if (plane == 1 && !is_planar_yuv_format(format))
+
+	if (color_plane == 1 &&
+	    !intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier))
 		return 0;
 
 	/*
@@ -4227,11 +4171,11 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state,
 	 * the 90/270 degree plane rotation cases (to match the
 	 * GTT mapping), hence no need to account for rotation here.
 	 */
-	width = drm_rect_width(&plane_state->base.src) >> 16;
-	height = drm_rect_height(&plane_state->base.src) >> 16;
+	width = drm_rect_width(&plane_state->uapi.src) >> 16;
+	height = drm_rect_height(&plane_state->uapi.src) >> 16;
 
 	/* UV plane does 1/2 pixel sub-sampling */
-	if (plane == 1 && is_planar_yuv_format(format)) {
+	if (color_plane == 1) {
 		width /= 2;
 		height /= 2;
 	}
@@ -4242,7 +4186,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state,
 
 	rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount);
 
-	rate *= fb->format->cpp[plane];
+	rate *= fb->format->cpp[color_plane];
 	return rate;
 }
 
@@ -4251,19 +4195,17 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
 				 u64 *plane_data_rate,
 				 u64 *uv_plane_data_rate)
 {
-	struct drm_atomic_state *state = crtc_state->base.state;
-	struct drm_plane *plane;
-	const struct drm_plane_state *drm_plane_state;
+	struct drm_atomic_state *state = crtc_state->uapi.state;
+	struct intel_plane *plane;
+	const struct intel_plane_state *plane_state;
 	u64 total_data_rate = 0;
 
 	if (WARN_ON(!state))
 		return 0;
 
 	/* Calculate and cache data rate for each plane */
-	drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) {
-		enum plane_id plane_id = to_intel_plane(plane)->id;
-		const struct intel_plane_state *plane_state =
-			to_intel_plane_state(drm_plane_state);
+	intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
+		enum plane_id plane_id = plane->id;
 		u64 rate;
 
 		/* packed/y */
@@ -4284,21 +4226,19 @@ static u64
 icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
 				 u64 *plane_data_rate)
 {
-	struct drm_plane *plane;
-	const struct drm_plane_state *drm_plane_state;
+	struct intel_plane *plane;
+	const struct intel_plane_state *plane_state;
 	u64 total_data_rate = 0;
 
-	if (WARN_ON(!crtc_state->base.state))
+	if (WARN_ON(!crtc_state->uapi.state))
 		return 0;
 
 	/* Calculate and cache data rate for each plane */
-	drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) {
-		const struct intel_plane_state *plane_state =
-			to_intel_plane_state(drm_plane_state);
-		enum plane_id plane_id = to_intel_plane(plane)->id;
+	intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
+		enum plane_id plane_id = plane->id;
 		u64 rate;
 
-		if (!plane_state->linked_plane) {
+		if (!plane_state->planar_linked_plane) {
 			rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0);
 			plane_data_rate[plane_id] = rate;
 			total_data_rate += rate;
@@ -4307,17 +4247,17 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
 
 			/*
 			 * The slave plane might not iterate in
-			 * drm_atomic_crtc_state_for_each_plane_state(),
+			 * intel_atomic_crtc_state_for_each_plane_state(),
 			 * and needs the master plane state which may be
 			 * NULL if we try get_new_plane_state(), so we
 			 * always calculate from the master.
 			 */
-			if (plane_state->slave)
+			if (plane_state->planar_slave)
 				continue;
 
 			/* Y plane rate is calculated on the slave */
 			rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0);
-			y_plane_id = plane_state->linked_plane->id;
+			y_plane_id = plane_state->planar_linked_plane->id;
 			plane_data_rate[y_plane_id] = rate;
 			total_data_rate += rate;
 
@@ -4334,8 +4274,8 @@ static int
 skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state,
 		      struct skl_ddb_allocation *ddb /* out */)
 {
-	struct drm_atomic_state *state = crtc_state->base.state;
-	struct drm_crtc *crtc = crtc_state->base.crtc;
+	struct drm_atomic_state *state = crtc_state->uapi.state;
+	struct drm_crtc *crtc = crtc_state->uapi.crtc;
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct skl_ddb_entry *alloc = &crtc_state->wm.skl.ddb;
@@ -4357,7 +4297,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state,
 	if (WARN_ON(!state))
 		return 0;
 
-	if (!crtc_state->base.active) {
+	if (!crtc_state->hw.active) {
 		alloc->start = alloc->end = 0;
 		return 0;
 	}
@@ -4400,8 +4340,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state,
 				&crtc_state->wm.skl.optimal.planes[plane_id];
 
 			if (plane_id == PLANE_CURSOR) {
-				if (WARN_ON(wm->wm[level].min_ddb_alloc >
-					    total[PLANE_CURSOR])) {
+				if (wm->wm[level].min_ddb_alloc > total[PLANE_CURSOR]) {
+					WARN_ON(wm->wm[level].min_ddb_alloc != U16_MAX);
 					blocks = U32_MAX;
 					break;
 				}
@@ -4419,9 +4359,10 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state,
 	}
 
 	if (level < 0) {
-		DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
-		DRM_DEBUG_KMS("minimum required %d/%d\n", blocks,
-			      alloc_size);
+		drm_dbg_kms(&dev_priv->drm,
+			    "Requested display configuration exceeds system DDB limitations");
+		drm_dbg_kms(&dev_priv->drm, "minimum required %d/%d\n",
+			    blocks, alloc_size);
 		return -EINVAL;
 	}
 
@@ -4599,7 +4540,7 @@ intel_get_linetime_us(const struct intel_crtc_state *crtc_state)
 	u32 crtc_htotal;
 	uint_fixed_16_16_t linetime_us;
 
-	if (!crtc_state->base.active)
+	if (!crtc_state->hw.active)
 		return u32_to_fixed16(0);
 
 	pixel_rate = crtc_state->pixel_rate;
@@ -4607,7 +4548,7 @@ intel_get_linetime_us(const struct intel_crtc_state *crtc_state)
 	if (WARN_ON(pixel_rate == 0))
 		return u32_to_fixed16(0);
 
-	crtc_htotal = crtc_state->base.adjusted_mode.crtc_htotal;
+	crtc_htotal = crtc_state->hw.adjusted_mode.crtc_htotal;
 	linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
 
 	return linetime_us;
@@ -4642,13 +4583,15 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
 		      u32 plane_pixel_rate, struct skl_wm_params *wp,
 		      int color_plane)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	u32 interm_pbpl;
 
 	/* only planar format has two planes */
-	if (color_plane == 1 && !is_planar_yuv_format(format->format)) {
-		DRM_DEBUG_KMS("Non planar format have single plane\n");
+	if (color_plane == 1 &&
+	    !intel_format_info_is_yuv_semiplanar(format, modifier)) {
+		drm_dbg_kms(&dev_priv->drm,
+			    "Non planar format have single plane\n");
 		return -EINVAL;
 	}
 
@@ -4659,7 +4602,7 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
 	wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED;
 	wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
 			 modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
-	wp->is_planar = is_planar_yuv_format(format->format);
+	wp->is_planar = intel_format_info_is_yuv_semiplanar(format, modifier);
 
 	wp->width = width;
 	if (color_plane == 1 && wp->is_planar)
@@ -4731,24 +4674,19 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *crtc_state,
 			    const struct intel_plane_state *plane_state,
 			    struct skl_wm_params *wp, int color_plane)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	int width;
 
-	if (plane->id == PLANE_CURSOR) {
-		width = plane_state->base.crtc_w;
-	} else {
-		/*
-		 * Src coordinates are already rotated by 270 degrees for
-		 * the 90/270 degree plane rotation cases (to match the
-		 * GTT mapping), hence no need to account for rotation here.
-		 */
-		width = drm_rect_width(&plane_state->base.src) >> 16;
-	}
+	/*
+	 * Src coordinates are already rotated by 270 degrees for
+	 * the 90/270 degree plane rotation cases (to match the
+	 * GTT mapping), hence no need to account for rotation here.
+	 */
+	width = drm_rect_width(&plane_state->uapi.src) >> 16;
 
 	return skl_compute_wm_params(crtc_state, width,
 				     fb->format, fb->modifier,
-				     plane_state->base.rotation,
+				     plane_state->hw.rotation,
 				     skl_adjusted_plane_pixel_rate(crtc_state, plane_state),
 				     wp, color_plane);
 }
@@ -4768,7 +4706,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
 				 const struct skl_wm_level *result_prev,
 				 struct skl_wm_level *result /* out */)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	u32 latency = dev_priv->wm.skl_latency[level];
 	uint_fixed_16_16_t method1, method2;
 	uint_fixed_16_16_t selected_result;
@@ -4794,14 +4732,14 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
 	method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
 				 wp->cpp, latency, wp->dbuf_block_size);
 	method2 = skl_wm_method2(wp->plane_pixel_rate,
-				 crtc_state->base.adjusted_mode.crtc_htotal,
+				 crtc_state->hw.adjusted_mode.crtc_htotal,
 				 latency,
 				 wp->plane_blocks_per_line);
 
 	if (wp->y_tiled) {
 		selected_result = max_fixed16(method2, wp->y_tile_minimum);
 	} else {
-		if ((wp->cpp * crtc_state->base.adjusted_mode.crtc_htotal /
+		if ((wp->cpp * crtc_state->hw.adjusted_mode.crtc_htotal /
 		     wp->dbuf_block_size < 1) &&
 		     (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) {
 			selected_result = method2;
@@ -4892,7 +4830,7 @@ skl_compute_wm_levels(const struct intel_crtc_state *crtc_state,
 		      const struct skl_wm_params *wm_params,
 		      struct skl_wm_level *levels)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	int level, max_level = ilk_wm_max_level(dev_priv);
 	struct skl_wm_level *result_prev = &levels[0];
 
@@ -4909,7 +4847,7 @@ skl_compute_wm_levels(const struct intel_crtc_state *crtc_state,
 static u32
 skl_compute_linetime_wm(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_atomic_state *state = crtc_state->base.state;
+	struct drm_atomic_state *state = crtc_state->uapi.state;
 	struct drm_i915_private *dev_priv = to_i915(state->dev);
 	uint_fixed_16_16_t linetime_us;
 	u32 linetime_wm;
@@ -4928,7 +4866,7 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state,
 				      const struct skl_wm_params *wp,
 				      struct skl_plane_wm *wm)
 {
-	struct drm_device *dev = crtc_state->base.crtc->dev;
+	struct drm_device *dev = crtc_state->uapi.crtc->dev;
 	const struct drm_i915_private *dev_priv = to_i915(dev);
 	u16 trans_min, trans_y_tile_min;
 	const u16 trans_amount = 10; /* This is configurable amount */
@@ -5026,8 +4964,8 @@ static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state,
 static int skl_build_plane_wm(struct intel_crtc_state *crtc_state,
 			      const struct intel_plane_state *plane_state)
 {
-	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	enum plane_id plane_id = plane->id;
 	int ret;
 
@@ -5052,16 +4990,16 @@ static int skl_build_plane_wm(struct intel_crtc_state *crtc_state,
 static int icl_build_plane_wm(struct intel_crtc_state *crtc_state,
 			      const struct intel_plane_state *plane_state)
 {
-	enum plane_id plane_id = to_intel_plane(plane_state->base.plane)->id;
+	enum plane_id plane_id = to_intel_plane(plane_state->uapi.plane)->id;
 	int ret;
 
 	/* Watermarks calculated in master */
-	if (plane_state->slave)
+	if (plane_state->planar_slave)
 		return 0;
 
-	if (plane_state->linked_plane) {
-		const struct drm_framebuffer *fb = plane_state->base.fb;
-		enum plane_id y_plane_id = plane_state->linked_plane->id;
+	if (plane_state->planar_linked_plane) {
+		const struct drm_framebuffer *fb = plane_state->hw.fb;
+		enum plane_id y_plane_id = plane_state->planar_linked_plane->id;
 
 		WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state));
 		WARN_ON(!fb->format->is_yuv ||
@@ -5088,10 +5026,10 @@ static int icl_build_plane_wm(struct intel_crtc_state *crtc_state,
 
 static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal;
-	struct drm_plane *plane;
-	const struct drm_plane_state *drm_plane_state;
+	struct intel_plane *plane;
+	const struct intel_plane_state *plane_state;
 	int ret;
 
 	/*
@@ -5100,10 +5038,8 @@ static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state)
 	 */
 	memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
 
-	drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state,
-						   &crtc_state->base) {
-		const struct intel_plane_state *plane_state =
-			to_intel_plane_state(drm_plane_state);
+	intel_atomic_crtc_state_for_each_plane_state(plane, plane_state,
+						     crtc_state) {
 
 		if (INTEL_GEN(dev_priv) >= 11)
 			ret = icl_build_plane_wm(crtc_state, plane_state);
@@ -5263,25 +5199,12 @@ bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
 	return false;
 }
 
-static u32
-pipes_modified(struct intel_atomic_state *state)
-{
-	struct intel_crtc *crtc;
-	struct intel_crtc_state *crtc_state;
-	u32 i, ret = 0;
-
-	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i)
-		ret |= drm_crtc_mask(&crtc->base);
-
-	return ret;
-}
-
 static int
 skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state,
 			    struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->base.state);
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->uapi.state);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_plane *plane;
 
@@ -5347,7 +5270,7 @@ skl_print_wm_changes(struct intel_atomic_state *state)
 	struct intel_crtc *crtc;
 	int i;
 
-	if ((drm_debug & DRM_UT_KMS) == 0)
+	if (!drm_debug_enabled(DRM_UT_KMS))
 		return;
 
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
@@ -5367,10 +5290,11 @@ skl_print_wm_changes(struct intel_atomic_state *state)
 			if (skl_ddb_entry_equal(old, new))
 				continue;
 
-			DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n",
-				      plane->base.base.id, plane->base.name,
-				      old->start, old->end, new->start, new->end,
-				      skl_ddb_entry_size(old), skl_ddb_entry_size(new));
+			drm_dbg_kms(&dev_priv->drm,
+				    "[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n",
+				    plane->base.base.id, plane->base.name,
+				    old->start, old->end, new->start, new->end,
+				    skl_ddb_entry_size(old), skl_ddb_entry_size(new));
 		}
 
 		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
@@ -5383,104 +5307,99 @@ skl_print_wm_changes(struct intel_atomic_state *state)
 			if (skl_plane_wm_equals(dev_priv, old_wm, new_wm))
 				continue;
 
-			DRM_DEBUG_KMS("[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm"
-				      " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n",
-				      plane->base.base.id, plane->base.name,
-				      enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),
-				      enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),
-				      enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),
-				      enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),
-				      enast(old_wm->trans_wm.plane_en),
-				      enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),
-				      enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),
-				      enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),
-				      enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en),
-				      enast(new_wm->trans_wm.plane_en));
-
-			DRM_DEBUG_KMS("[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
+			drm_dbg_kms(&dev_priv->drm,
+				    "[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm"
+				    " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n",
+				    plane->base.base.id, plane->base.name,
+				    enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),
+				    enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),
+				    enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),
+				    enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),
+				    enast(old_wm->trans_wm.plane_en),
+				    enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),
+				    enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),
+				    enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),
+				    enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en),
+				    enast(new_wm->trans_wm.plane_en));
+
+			drm_dbg_kms(&dev_priv->drm,
+				    "[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
 				      " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n",
-				      plane->base.base.id, plane->base.name,
-				      enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,
-				      enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l,
-				      enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l,
-				      enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l,
-				      enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l,
-				      enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l,
-				      enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,
-				      enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,
-				      enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l,
-
-				      enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,
-				      enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l,
-				      enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l,
-				      enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l,
-				      enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l,
-				      enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,
-				      enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,
-				      enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l,
-				      enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l);
-
-			DRM_DEBUG_KMS("[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
-				      " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
-				      plane->base.base.id, plane->base.name,
-				      old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,
-				      old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,
-				      old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,
-				      old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,
-				      old_wm->trans_wm.plane_res_b,
-				      new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,
-				      new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,
-				      new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,
-				      new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b,
-				      new_wm->trans_wm.plane_res_b);
-
-			DRM_DEBUG_KMS("[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
-				      " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
-				      plane->base.base.id, plane->base.name,
-				      old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,
-				      old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,
-				      old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,
-				      old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,
-				      old_wm->trans_wm.min_ddb_alloc,
-				      new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,
-				      new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,
-				      new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,
-				      new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc,
-				      new_wm->trans_wm.min_ddb_alloc);
+				    plane->base.base.id, plane->base.name,
+				    enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,
+				    enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l,
+				    enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l,
+				    enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l,
+				    enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l,
+				    enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l,
+				    enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,
+				    enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,
+				    enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l,
+
+				    enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,
+				    enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l,
+				    enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l,
+				    enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l,
+				    enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l,
+				    enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,
+				    enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,
+				    enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l,
+				    enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l);
+
+			drm_dbg_kms(&dev_priv->drm,
+				    "[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
+				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
+				    plane->base.base.id, plane->base.name,
+				    old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,
+				    old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,
+				    old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,
+				    old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,
+				    old_wm->trans_wm.plane_res_b,
+				    new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,
+				    new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,
+				    new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,
+				    new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b,
+				    new_wm->trans_wm.plane_res_b);
+
+			drm_dbg_kms(&dev_priv->drm,
+				    "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
+				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
+				    plane->base.base.id, plane->base.name,
+				    old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,
+				    old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,
+				    old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,
+				    old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,
+				    old_wm->trans_wm.min_ddb_alloc,
+				    new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,
+				    new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,
+				    new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,
+				    new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc,
+				    new_wm->trans_wm.min_ddb_alloc);
 		}
 	}
 }
 
-static int
-skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
+static int intel_add_all_pipes(struct intel_atomic_state *state)
 {
-	struct drm_device *dev = state->base.dev;
-	const struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	struct intel_crtc *crtc;
-	struct intel_crtc_state *crtc_state;
-	u32 realloc_pipes = pipes_modified(state);
-	int ret, i;
 
-	/*
-	 * When we distrust bios wm we always need to recompute to set the
-	 * expected DDB allocations for each CRTC.
-	 */
-	if (dev_priv->wm.distrust_bios_wm)
-		(*changed) = true;
+	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		struct intel_crtc_state *crtc_state;
 
-	/*
-	 * If this transaction isn't actually touching any CRTC's, don't
-	 * bother with watermark calculation.  Note that if we pass this
-	 * test, we're guaranteed to hold at least one CRTC state mutex,
-	 * which means we can safely use values like dev_priv->active_crtcs
-	 * since any racing commits that want to update them would need to
-	 * hold _all_ CRTC state mutexes.
-	 */
-	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i)
-		(*changed) = true;
+		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+	}
 
-	if (!*changed)
-		return 0;
+	return 0;
+}
+
+static int
+skl_ddb_add_affected_pipes(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	int ret;
 
 	/*
 	 * If this is our first atomic update following hardware readout,
@@ -5489,21 +5408,21 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
 	 * ensure a full DDB recompute.
 	 */
 	if (dev_priv->wm.distrust_bios_wm) {
-		ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
+		ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
 				       state->base.acquire_ctx);
 		if (ret)
 			return ret;
 
-		state->active_pipe_changes = ~0;
+		state->active_pipe_changes = INTEL_INFO(dev_priv)->pipe_mask;
 
 		/*
-		 * We usually only initialize state->active_crtcs if we
+		 * We usually only initialize state->active_pipes if we
 		 * we're doing a modeset; make sure this field is always
 		 * initialized during the sanitization process that happens
 		 * on the first commit too.
 		 */
 		if (!state->modeset)
-			state->active_crtcs = dev_priv->active_crtcs;
+			state->active_pipes = dev_priv->active_pipes;
 	}
 
 	/*
@@ -5520,18 +5439,11 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
 	 * to grab the lock on *all* CRTC's.
 	 */
 	if (state->active_pipe_changes || state->modeset) {
-		realloc_pipes = ~0;
-		state->wm_results.dirty_pipes = ~0;
-	}
+		state->wm_results.dirty_pipes = INTEL_INFO(dev_priv)->pipe_mask;
 
-	/*
-	 * We're not recomputing for the pipes not included in the commit, so
-	 * make sure we start with the current state.
-	 */
-	for_each_intel_crtc_mask(dev, crtc, realloc_pipes) {
-		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
-		if (IS_ERR(crtc_state))
-			return PTR_ERR(crtc_state);
+		ret = intel_add_all_pipes(state);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -5581,7 +5493,7 @@ static int skl_wm_add_affected_planes(struct intel_atomic_state *state,
 		 * power well the hardware state will go out of sync
 		 * with the software state.
 		 */
-		if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->base) &&
+		if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi) &&
 		    skl_plane_wm_equals(dev_priv,
 					&old_crtc_state->wm.skl.optimal.planes[plane_id],
 					&new_crtc_state->wm.skl.optimal.planes[plane_id]))
@@ -5604,14 +5516,13 @@ skl_compute_wm(struct intel_atomic_state *state)
 	struct intel_crtc_state *new_crtc_state;
 	struct intel_crtc_state *old_crtc_state;
 	struct skl_ddb_values *results = &state->wm_results;
-	bool changed = false;
 	int ret, i;
 
 	/* Clear all dirty flags */
 	results->dirty_pipes = 0;
 
-	ret = skl_ddb_add_affected_pipes(state, &changed);
-	if (ret || !changed)
+	ret = skl_ddb_add_affected_pipes(state);
+	if (ret)
 		return ret;
 
 	/*
@@ -5633,7 +5544,7 @@ skl_compute_wm(struct intel_atomic_state *state)
 		if (!skl_pipe_wm_equals(crtc,
 					&old_crtc_state->wm.skl.optimal,
 					&new_crtc_state->wm.skl.optimal))
-			results->dirty_pipes |= drm_crtc_mask(&crtc->base);
+			results->dirty_pipes |= BIT(crtc->pipe);
 	}
 
 	ret = skl_compute_ddb(state);
@@ -5646,34 +5557,35 @@ skl_compute_wm(struct intel_atomic_state *state)
 }
 
 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
-				      struct intel_crtc_state *crtc_state)
+				      struct intel_crtc *crtc)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-	struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal;
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal;
 	enum pipe pipe = crtc->pipe;
 
-	if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
+	if ((state->wm_results.dirty_pipes & BIT(crtc->pipe)) == 0)
 		return;
 
 	I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
 }
 
 static void skl_initial_wm(struct intel_atomic_state *state,
-			   struct intel_crtc_state *crtc_state)
+			   struct intel_crtc *crtc)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_device *dev = intel_crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 	struct skl_ddb_values *results = &state->wm_results;
 
-	if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
+	if ((results->dirty_pipes & BIT(crtc->pipe)) == 0)
 		return;
 
 	mutex_lock(&dev_priv->wm.wm_mutex);
 
-	if (crtc_state->base.active_changed)
-		skl_atomic_update_crtc_wm(state, crtc_state);
+	if (crtc_state->uapi.active_changed)
+		skl_atomic_update_crtc_wm(state, crtc);
 
 	mutex_unlock(&dev_priv->wm.wm_mutex);
 }
@@ -5729,10 +5641,11 @@ static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
 }
 
 static void ilk_initial_watermarks(struct intel_atomic_state *state,
-				   struct intel_crtc_state *crtc_state)
+				   struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 
 	mutex_lock(&dev_priv->wm.wm_mutex);
 	crtc->wm.active.ilk = crtc_state->wm.ilk.intermediate;
@@ -5741,10 +5654,11 @@ static void ilk_initial_watermarks(struct intel_atomic_state *state,
 }
 
 static void ilk_optimize_watermarks(struct intel_atomic_state *state,
-				    struct intel_crtc_state *crtc_state)
+				    struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 
 	if (!crtc_state->wm.need_postvbl_update)
 		return;
@@ -5816,10 +5730,10 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
 		skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal);
 
 		if (crtc->active)
-			hw->dirty_pipes |= drm_crtc_mask(&crtc->base);
+			hw->dirty_pipes |= BIT(crtc->pipe);
 	}
 
-	if (dev_priv->active_crtcs) {
+	if (dev_priv->active_pipes) {
 		/* Fully recompute DDB on first atomic commit */
 		dev_priv->wm.distrust_bios_wm = true;
 	}
@@ -6052,19 +5966,22 @@ void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv)
 		crtc_state->wm.g4x.optimal = *active;
 		crtc_state->wm.g4x.intermediate = *active;
 
-		DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
-			      pipe_name(pipe),
-			      wm->pipe[pipe].plane[PLANE_PRIMARY],
-			      wm->pipe[pipe].plane[PLANE_CURSOR],
-			      wm->pipe[pipe].plane[PLANE_SPRITE0]);
+		drm_dbg_kms(&dev_priv->drm,
+			    "Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
+			    pipe_name(pipe),
+			    wm->pipe[pipe].plane[PLANE_PRIMARY],
+			    wm->pipe[pipe].plane[PLANE_CURSOR],
+			    wm->pipe[pipe].plane[PLANE_SPRITE0]);
 	}
 
-	DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
-		      wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
-	DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
-		      wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
-	DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
-		      yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
+	drm_dbg_kms(&dev_priv->drm,
+		    "Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
+		    wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
+		    wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
+	drm_dbg_kms(&dev_priv->drm, "Initial SR=%s HPLL=%s FBC=%s\n",
+		    yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
 }
 
 void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
@@ -6085,7 +6002,7 @@ void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
 		enum plane_id plane_id = plane->id;
 		int level;
 
-		if (plane_state->base.visible)
+		if (plane_state->uapi.visible)
 			continue;
 
 		for (level = 0; level < 3; level++) {
@@ -6156,8 +6073,9 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
 
 		if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
 			      FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
-			DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
-				      "assuming DDR DVFS is disabled\n");
+			drm_dbg_kms(&dev_priv->drm,
+				    "Punit not acking DDR DVFS request, "
+				    "assuming DDR DVFS is disabled\n");
 			dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
 		} else {
 			val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
@@ -6208,16 +6126,18 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
 		crtc_state->wm.vlv.optimal = *active;
 		crtc_state->wm.vlv.intermediate = *active;
 
-		DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
-			      pipe_name(pipe),
-			      wm->pipe[pipe].plane[PLANE_PRIMARY],
-			      wm->pipe[pipe].plane[PLANE_CURSOR],
-			      wm->pipe[pipe].plane[PLANE_SPRITE0],
-			      wm->pipe[pipe].plane[PLANE_SPRITE1]);
+		drm_dbg_kms(&dev_priv->drm,
+			    "Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
+			    pipe_name(pipe),
+			    wm->pipe[pipe].plane[PLANE_PRIMARY],
+			    wm->pipe[pipe].plane[PLANE_CURSOR],
+			    wm->pipe[pipe].plane[PLANE_SPRITE0],
+			    wm->pipe[pipe].plane[PLANE_SPRITE1]);
 	}
 
-	DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
-		      wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
+		    wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
 }
 
 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
@@ -6240,7 +6160,7 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
 		enum plane_id plane_id = plane->id;
 		int level;
 
-		if (plane_state->base.visible)
+		if (plane_state->uapi.visible)
 			continue;
 
 		for (level = 0; level < wm_state->num_levels; level++) {
@@ -6397,2488 +6317,6 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
 	intel_enable_ipc(dev_priv);
 }
 
-/*
- * Lock protecting IPS related data structures
- */
-DEFINE_SPINLOCK(mchdev_lock);
-
-bool ironlake_set_drps(struct drm_i915_private *i915, u8 val)
-{
-	struct intel_uncore *uncore = &i915->uncore;
-	u16 rgvswctl;
-
-	lockdep_assert_held(&mchdev_lock);
-
-	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
-	if (rgvswctl & MEMCTL_CMD_STS) {
-		DRM_DEBUG("gpu busy, RCS change rejected\n");
-		return false; /* still busy with another command */
-	}
-
-	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
-		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
-	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
-	intel_uncore_posting_read16(uncore, MEMSWCTL);
-
-	rgvswctl |= MEMCTL_CMD_STS;
-	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
-
-	return true;
-}
-
-static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
-{
-	struct intel_uncore *uncore = &dev_priv->uncore;
-	u32 rgvmodectl;
-	u8 fmax, fmin, fstart, vstart;
-
-	spin_lock_irq(&mchdev_lock);
-
-	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
-
-	/* Enable temp reporting */
-	intel_uncore_write16(uncore, PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
-	intel_uncore_write16(uncore, TSC1, I915_READ(TSC1) | TSE);
-
-	/* 100ms RC evaluation intervals */
-	intel_uncore_write(uncore, RCUPEI, 100000);
-	intel_uncore_write(uncore, RCDNEI, 100000);
-
-	/* Set max/min thresholds to 90ms and 80ms respectively */
-	intel_uncore_write(uncore, RCBMAXAVG, 90000);
-	intel_uncore_write(uncore, RCBMINAVG, 80000);
-
-	intel_uncore_write(uncore, MEMIHYST, 1);
-
-	/* Set up min, max, and cur for interrupt handling */
-	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
-	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
-	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
-		MEMMODE_FSTART_SHIFT;
-
-	vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
-		  PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
-
-	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
-	dev_priv->ips.fstart = fstart;
-
-	dev_priv->ips.max_delay = fstart;
-	dev_priv->ips.min_delay = fmin;
-	dev_priv->ips.cur_delay = fstart;
-
-	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
-			 fmax, fmin, fstart);
-
-	intel_uncore_write(uncore,
-			   MEMINTREN,
-			   MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
-
-	/*
-	 * Interrupts will be enabled in ironlake_irq_postinstall
-	 */
-
-	intel_uncore_write(uncore, VIDSTART, vstart);
-	intel_uncore_posting_read(uncore, VIDSTART);
-
-	rgvmodectl |= MEMMODE_SWMODE_EN;
-	intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
-
-	if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
-			     MEMCTL_CMD_STS) == 0, 10))
-		DRM_ERROR("stuck trying to change perf mode\n");
-	mdelay(1);
-
-	ironlake_set_drps(dev_priv, fstart);
-
-	dev_priv->ips.last_count1 =
-		intel_uncore_read(uncore, DMIEC) +
-		intel_uncore_read(uncore, DDREC) +
-		intel_uncore_read(uncore, CSIEC);
-	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
-	dev_priv->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
-	dev_priv->ips.last_time2 = ktime_get_raw_ns();
-
-	spin_unlock_irq(&mchdev_lock);
-}
-
-static void ironlake_disable_drps(struct drm_i915_private *i915)
-{
-	struct intel_uncore *uncore = &i915->uncore;
-	u16 rgvswctl;
-
-	spin_lock_irq(&mchdev_lock);
-
-	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
-
-	/* Ack interrupts, disable EFC interrupt */
-	intel_uncore_write(uncore,
-			   MEMINTREN,
-			   intel_uncore_read(uncore, MEMINTREN) &
-			   ~MEMINT_EVAL_CHG_EN);
-	intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
-	intel_uncore_write(uncore,
-			   DEIER,
-			   intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT);
-	intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT);
-	intel_uncore_write(uncore,
-			   DEIMR,
-			   intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT);
-
-	/* Go back to the starting frequency */
-	ironlake_set_drps(i915, i915->ips.fstart);
-	mdelay(1);
-	rgvswctl |= MEMCTL_CMD_STS;
-	intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
-	mdelay(1);
-
-	spin_unlock_irq(&mchdev_lock);
-}
-
-/* There's a funny hw issue where the hw returns all 0 when reading from
- * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
- * ourselves, instead of doing a rmw cycle (which might result in us clearing
- * all limits and the gpu stuck at whatever frequency it is at atm).
- */
-static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 limits;
-
-	/* Only set the down limit when we've reached the lowest level to avoid
-	 * getting more interrupts, otherwise leave this clear. This prevents a
-	 * race in the hw when coming out of rc6: There's a tiny window where
-	 * the hw runs at the minimal clock before selecting the desired
-	 * frequency, if the down threshold expires in that window we will not
-	 * receive a down interrupt. */
-	if (INTEL_GEN(dev_priv) >= 9) {
-		limits = (rps->max_freq_softlimit) << 23;
-		if (val <= rps->min_freq_softlimit)
-			limits |= (rps->min_freq_softlimit) << 14;
-	} else {
-		limits = rps->max_freq_softlimit << 24;
-		if (val <= rps->min_freq_softlimit)
-			limits |= rps->min_freq_softlimit << 16;
-	}
-
-	return limits;
-}
-
-static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 threshold_up = 0, threshold_down = 0; /* in % */
-	u32 ei_up = 0, ei_down = 0;
-
-	lockdep_assert_held(&rps->power.mutex);
-
-	if (new_power == rps->power.mode)
-		return;
-
-	/* Note the units here are not exactly 1us, but 1280ns. */
-	switch (new_power) {
-	case LOW_POWER:
-		/* Upclock if more than 95% busy over 16ms */
-		ei_up = 16000;
-		threshold_up = 95;
-
-		/* Downclock if less than 85% busy over 32ms */
-		ei_down = 32000;
-		threshold_down = 85;
-		break;
-
-	case BETWEEN:
-		/* Upclock if more than 90% busy over 13ms */
-		ei_up = 13000;
-		threshold_up = 90;
-
-		/* Downclock if less than 75% busy over 32ms */
-		ei_down = 32000;
-		threshold_down = 75;
-		break;
-
-	case HIGH_POWER:
-		/* Upclock if more than 85% busy over 10ms */
-		ei_up = 10000;
-		threshold_up = 85;
-
-		/* Downclock if less than 60% busy over 32ms */
-		ei_down = 32000;
-		threshold_down = 60;
-		break;
-	}
-
-	/* When byt can survive without system hang with dynamic
-	 * sw freq adjustments, this restriction can be lifted.
-	 */
-	if (IS_VALLEYVIEW(dev_priv))
-		goto skip_hw_write;
-
-	I915_WRITE(GEN6_RP_UP_EI,
-		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
-	I915_WRITE(GEN6_RP_UP_THRESHOLD,
-		   GT_INTERVAL_FROM_US(dev_priv,
-				       ei_up * threshold_up / 100));
-
-	I915_WRITE(GEN6_RP_DOWN_EI,
-		   GT_INTERVAL_FROM_US(dev_priv, ei_down));
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
-		   GT_INTERVAL_FROM_US(dev_priv,
-				       ei_down * threshold_down / 100));
-
-	I915_WRITE(GEN6_RP_CONTROL,
-		   (INTEL_GEN(dev_priv) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_AVG);
-
-skip_hw_write:
-	rps->power.mode = new_power;
-	rps->power.up_threshold = threshold_up;
-	rps->power.down_threshold = threshold_down;
-}
-
-static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	int new_power;
-
-	new_power = rps->power.mode;
-	switch (rps->power.mode) {
-	case LOW_POWER:
-		if (val > rps->efficient_freq + 1 &&
-		    val > rps->cur_freq)
-			new_power = BETWEEN;
-		break;
-
-	case BETWEEN:
-		if (val <= rps->efficient_freq &&
-		    val < rps->cur_freq)
-			new_power = LOW_POWER;
-		else if (val >= rps->rp0_freq &&
-			 val > rps->cur_freq)
-			new_power = HIGH_POWER;
-		break;
-
-	case HIGH_POWER:
-		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
-		    val < rps->cur_freq)
-			new_power = BETWEEN;
-		break;
-	}
-	/* Max/min bins are special */
-	if (val <= rps->min_freq_softlimit)
-		new_power = LOW_POWER;
-	if (val >= rps->max_freq_softlimit)
-		new_power = HIGH_POWER;
-
-	mutex_lock(&rps->power.mutex);
-	if (rps->power.interactive)
-		new_power = HIGH_POWER;
-	rps_set_power(dev_priv, new_power);
-	mutex_unlock(&rps->power.mutex);
-}
-
-void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
-{
-	struct intel_rps *rps = &i915->gt_pm.rps;
-
-	if (INTEL_GEN(i915) < 6)
-		return;
-
-	mutex_lock(&rps->power.mutex);
-	if (interactive) {
-		if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
-			rps_set_power(i915, HIGH_POWER);
-	} else {
-		GEM_BUG_ON(!rps->power.interactive);
-		rps->power.interactive--;
-	}
-	mutex_unlock(&rps->power.mutex);
-}
-
-static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 mask = 0;
-
-	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
-	if (val > rps->min_freq_softlimit)
-		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
-	if (val < rps->max_freq_softlimit)
-		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
-
-	mask &= dev_priv->pm_rps_events;
-
-	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
-}
-
-/* gen6_set_rps is called to update the frequency request, but should also be
- * called when the range (min_delay and max_delay) is modified so that we can
- * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
-static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* min/max delay may still have been modified so be sure to
-	 * write the limits value.
-	 */
-	if (val != rps->cur_freq) {
-		gen6_set_rps_thresholds(dev_priv, val);
-
-		if (INTEL_GEN(dev_priv) >= 9)
-			I915_WRITE(GEN6_RPNSWREQ,
-				   GEN9_FREQUENCY(val));
-		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-			I915_WRITE(GEN6_RPNSWREQ,
-				   HSW_FREQUENCY(val));
-		else
-			I915_WRITE(GEN6_RPNSWREQ,
-				   GEN6_FREQUENCY(val) |
-				   GEN6_OFFSET(0) |
-				   GEN6_AGGRESSIVE_TURBO);
-	}
-
-	/* Make sure we continue to get interrupts
-	 * until we hit the minimum or maximum frequencies.
-	 */
-	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
-	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
-
-	rps->cur_freq = val;
-	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
-
-	return 0;
-}
-
-static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
-{
-	int err;
-
-	if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
-		      "Odd GPU freq value\n"))
-		val &= ~1;
-
-	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
-
-	if (val != dev_priv->gt_pm.rps.cur_freq) {
-		vlv_punit_get(dev_priv);
-		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
-		vlv_punit_put(dev_priv);
-		if (err)
-			return err;
-
-		gen6_set_rps_thresholds(dev_priv, val);
-	}
-
-	dev_priv->gt_pm.rps.cur_freq = val;
-	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
-
-	return 0;
-}
-
-/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
- *
- * * If Gfx is Idle, then
- * 1. Forcewake Media well.
- * 2. Request idle freq.
- * 3. Release Forcewake of Media well.
-*/
-static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val = rps->idle_freq;
-	int err;
-
-	if (rps->cur_freq <= val)
-		return;
-
-	/* The punit delays the write of the frequency and voltage until it
-	 * determines the GPU is awake. During normal usage we don't want to
-	 * waste power changing the frequency if the GPU is sleeping (rc6).
-	 * However, the GPU and driver is now idle and we do not want to delay
-	 * switching to minimum voltage (reducing power whilst idle) as we do
-	 * not expect to be woken in the near future and so must flush the
-	 * change by waking the device.
-	 *
-	 * We choose to take the media powerwell (either would do to trick the
-	 * punit into committing the voltage change) as that takes a lot less
-	 * power than the render powerwell.
-	 */
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_MEDIA);
-	err = valleyview_set_rps(dev_priv, val);
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_MEDIA);
-
-	if (err)
-		DRM_ERROR("Failed to set RPS for idle\n");
-}
-
-void gen6_rps_busy(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	mutex_lock(&rps->lock);
-	if (rps->enabled) {
-		u8 freq;
-
-		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
-			gen6_rps_reset_ei(dev_priv);
-		I915_WRITE(GEN6_PMINTRMSK,
-			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
-
-		gen6_enable_rps_interrupts(dev_priv);
-
-		/* Use the user's desired frequency as a guide, but for better
-		 * performance, jump directly to RPe as our starting frequency.
-		 */
-		freq = max(rps->cur_freq,
-			   rps->efficient_freq);
-
-		if (intel_set_rps(dev_priv,
-				  clamp(freq,
-					rps->min_freq_softlimit,
-					rps->max_freq_softlimit)))
-			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
-	}
-	mutex_unlock(&rps->lock);
-}
-
-void gen6_rps_idle(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* Flush our bottom-half so that it does not race with us
-	 * setting the idle frequency and so that it is bounded by
-	 * our rpm wakeref. And then disable the interrupts to stop any
-	 * futher RPS reclocking whilst we are asleep.
-	 */
-	gen6_disable_rps_interrupts(dev_priv);
-
-	mutex_lock(&rps->lock);
-	if (rps->enabled) {
-		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-			vlv_set_rps_idle(dev_priv);
-		else
-			gen6_set_rps(dev_priv, rps->idle_freq);
-		rps->last_adj = 0;
-		I915_WRITE(GEN6_PMINTRMSK,
-			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
-	}
-	mutex_unlock(&rps->lock);
-}
-
-void gen6_rps_boost(struct i915_request *rq)
-{
-	struct intel_rps *rps = &rq->i915->gt_pm.rps;
-	unsigned long flags;
-	bool boost;
-
-	/* This is intentionally racy! We peek at the state here, then
-	 * validate inside the RPS worker.
-	 */
-	if (!rps->enabled)
-		return;
-
-	if (i915_request_signaled(rq))
-		return;
-
-	/* Serializes with i915_request_retire() */
-	boost = false;
-	spin_lock_irqsave(&rq->lock, flags);
-	if (!i915_request_has_waitboost(rq) &&
-	    !dma_fence_is_signaled_locked(&rq->fence)) {
-		boost = !atomic_fetch_inc(&rps->num_waiters);
-		rq->flags |= I915_REQUEST_WAITBOOST;
-	}
-	spin_unlock_irqrestore(&rq->lock, flags);
-	if (!boost)
-		return;
-
-	if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
-		schedule_work(&rps->work);
-
-	atomic_inc(&rps->boosts);
-}
-
-int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	int err;
-
-	lockdep_assert_held(&rps->lock);
-	GEM_BUG_ON(val > rps->max_freq);
-	GEM_BUG_ON(val < rps->min_freq);
-
-	if (!rps->enabled) {
-		rps->cur_freq = val;
-		return 0;
-	}
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		err = valleyview_set_rps(dev_priv, val);
-	else
-		err = gen6_set_rps(dev_priv, val);
-
-	return err;
-}
-
-static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-	I915_WRITE(GEN9_PG_ENABLE, 0);
-}
-
-static void gen9_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-}
-
-static void gen6_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-}
-
-static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	/* We're doing forcewake before Disabling RC6,
-	 * This what the BIOS expects when going into suspend */
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
-{
-	bool enable_rc6 = true;
-	unsigned long rc6_ctx_base;
-	u32 rc_ctl;
-	int rc_sw_target;
-
-	rc_ctl = I915_READ(GEN6_RC_CONTROL);
-	rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
-		       RC_SW_TARGET_STATE_SHIFT;
-	DRM_DEBUG_DRIVER("BIOS enabled RC states: "
-			 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
-			 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
-			 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
-			 rc_sw_target);
-
-	if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
-		DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
-		enable_rc6 = false;
-	}
-
-	/*
-	 * The exact context size is not known for BXT, so assume a page size
-	 * for this check.
-	 */
-	rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
-	if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
-	      (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
-		DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
-		enable_rc6 = false;
-	}
-
-	if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
-	      ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
-	      ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
-	      ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
-		DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
-		enable_rc6 = false;
-	}
-
-	if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
-	    !I915_READ(GEN8_PUSHBUS_ENABLE) ||
-	    !I915_READ(GEN8_PUSHBUS_SHIFT)) {
-		DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
-		enable_rc6 = false;
-	}
-
-	if (!I915_READ(GEN6_GFXPAUSE)) {
-		DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
-		enable_rc6 = false;
-	}
-
-	if (!I915_READ(GEN8_MISC_CTRL0)) {
-		DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
-		enable_rc6 = false;
-	}
-
-	return enable_rc6;
-}
-
-static bool sanitize_rc6(struct drm_i915_private *i915)
-{
-	struct intel_device_info *info = mkwrite_device_info(i915);
-
-	/* Powersaving is controlled by the host when inside a VM */
-	if (intel_vgpu_active(i915)) {
-		info->has_rc6 = 0;
-		info->has_rps = false;
-	}
-
-	if (info->has_rc6 &&
-	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
-		DRM_INFO("RC6 disabled by BIOS\n");
-		info->has_rc6 = 0;
-	}
-
-	/*
-	 * We assume that we do not have any deep rc6 levels if we don't have
-	 * have the previous rc6 level supported, i.e. we use HAS_RC6()
-	 * as the initial coarse check for rc6 in general, moving on to
-	 * progressively finer/deeper levels.
-	 */
-	if (!info->has_rc6 && info->has_rc6p)
-		info->has_rc6p = 0;
-
-	return info->has_rc6;
-}
-
-static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* All of these values are in units of 50MHz */
-
-	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
-	if (IS_GEN9_LP(dev_priv)) {
-		u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
-		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
-		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
-		rps->min_freq = (rp_state_cap >>  0) & 0xff;
-	} else {
-		u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
-		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
-		rps->min_freq = (rp_state_cap >> 16) & 0xff;
-	}
-	/* hw_max = RP0 until we check for overclocking */
-	rps->max_freq = rps->rp0_freq;
-
-	rps->efficient_freq = rps->rp1_freq;
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
-	    IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
-		u32 ddcc_status = 0;
-
-		if (sandybridge_pcode_read(dev_priv,
-					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
-					   &ddcc_status, NULL) == 0)
-			rps->efficient_freq =
-				clamp_t(u8,
-					((ddcc_status >> 8) & 0xff),
-					rps->min_freq,
-					rps->max_freq);
-	}
-
-	if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
-		/* Store the frequency values in 16.66 MHZ units, which is
-		 * the natural hardware unit for SKL
-		 */
-		rps->rp0_freq *= GEN9_FREQ_SCALER;
-		rps->rp1_freq *= GEN9_FREQ_SCALER;
-		rps->min_freq *= GEN9_FREQ_SCALER;
-		rps->max_freq *= GEN9_FREQ_SCALER;
-		rps->efficient_freq *= GEN9_FREQ_SCALER;
-	}
-}
-
-static void reset_rps(struct drm_i915_private *dev_priv,
-		      int (*set)(struct drm_i915_private *, u8))
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u8 freq = rps->cur_freq;
-
-	/* force a reset */
-	rps->power.mode = -1;
-	rps->cur_freq = -1;
-
-	if (set(dev_priv, freq))
-		DRM_ERROR("Failed to reset RPS to initial values\n");
-}
-
-/* See the Gen9_GT_PM_Programming_Guide doc for the below */
-static void gen9_enable_rps(struct drm_i915_private *dev_priv)
-{
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	/* Program defaults and thresholds for RPS */
-	if (IS_GEN(dev_priv, 9))
-		I915_WRITE(GEN6_RC_VIDEO_FREQ,
-			GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
-
-	/* 1 second timeout*/
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
-		GT_INTERVAL_FROM_US(dev_priv, 1000000));
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
-
-	/* Leaning on the below call to gen6_set_rps to program/setup the
-	 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
-	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
-	reset_rps(dev_priv, gen6_set_rps);
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static void gen11_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	/* 1a: Software RC state - RC0 */
-	I915_WRITE(GEN6_RC_STATE, 0);
-
-	/*
-	 * 1b: Get forcewake during program sequence. Although the driver
-	 * hasn't enabled a state yet where we need forcewake, BIOS may have.
-	 */
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	/* 2a: Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	/* 2b: Program RC6 thresholds.*/
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
-	I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
-
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-
-	if (HAS_GT_UC(dev_priv))
-		I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
-
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-
-	I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
-
-	/*
-	 * 2c: Program Coarse Power Gating Policies.
-	 *
-	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
-	 * use instead is a more conservative estimate for the maximum time
-	 * it takes us to service a CS interrupt and submit a new ELSP - that
-	 * is the time which the GPU is idle waiting for the CPU to select the
-	 * next request to execute. If the idle hysteresis is less than that
-	 * interrupt service latency, the hardware will automatically gate
-	 * the power well and we will then incur the wake up cost on top of
-	 * the service latency. A similar guide from plane_state is that we
-	 * do not want the enable hysteresis to less than the wakeup latency.
-	 *
-	 * igt/gem_exec_nop/sequential provides a rough estimate for the
-	 * service latency, and puts it around 10us for Broadwell (and other
-	 * big core) and around 40us for Broxton (and other low power cores).
-	 * [Note that for legacy ringbuffer submission, this is less than 1us!]
-	 * However, the wakeup latency on Broxton is closer to 100us. To be
-	 * conservative, we have to factor in a context switch on top (due
-	 * to ksoftirqd).
-	 */
-	I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
-	I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
-
-	/* 3a: Enable RC6 */
-	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN6_RC_CTL_HW_ENABLE |
-		   GEN6_RC_CTL_RC6_ENABLE |
-		   GEN6_RC_CTL_EI_MODE(1));
-
-	/* 3b: Enable Coarse Power Gating only when RC6 is enabled. */
-	I915_WRITE(GEN9_PG_ENABLE,
-		   GEN9_RENDER_PG_ENABLE |
-		   GEN9_MEDIA_PG_ENABLE |
-		   GEN11_MEDIA_SAMPLER_PG_ENABLE);
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 rc6_mode;
-
-	/* 1a: Software RC state - RC0 */
-	I915_WRITE(GEN6_RC_STATE, 0);
-
-	/* 1b: Get forcewake during program sequence. Although the driver
-	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	/* 2a: Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	/* 2b: Program RC6 thresholds.*/
-	if (INTEL_GEN(dev_priv) >= 10) {
-		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
-		I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
-	} else if (IS_SKYLAKE(dev_priv)) {
-		/*
-		 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
-		 * when CPG is enabled
-		 */
-		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
-	} else {
-		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
-	}
-
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-
-	if (HAS_GT_UC(dev_priv))
-		I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
-
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-
-	/*
-	 * 2c: Program Coarse Power Gating Policies.
-	 *
-	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
-	 * use instead is a more conservative estimate for the maximum time
-	 * it takes us to service a CS interrupt and submit a new ELSP - that
-	 * is the time which the GPU is idle waiting for the CPU to select the
-	 * next request to execute. If the idle hysteresis is less than that
-	 * interrupt service latency, the hardware will automatically gate
-	 * the power well and we will then incur the wake up cost on top of
-	 * the service latency. A similar guide from plane_state is that we
-	 * do not want the enable hysteresis to less than the wakeup latency.
-	 *
-	 * igt/gem_exec_nop/sequential provides a rough estimate for the
-	 * service latency, and puts it around 10us for Broadwell (and other
-	 * big core) and around 40us for Broxton (and other low power cores).
-	 * [Note that for legacy ringbuffer submission, this is less than 1us!]
-	 * However, the wakeup latency on Broxton is closer to 100us. To be
-	 * conservative, we have to factor in a context switch on top (due
-	 * to ksoftirqd).
-	 */
-	I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
-	I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
-
-	/* 3a: Enable RC6 */
-	I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
-
-	/* WaRsUseTimeoutMode:cnl (pre-prod) */
-	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
-		rc6_mode = GEN7_RC_CTL_TO_MODE;
-	else
-		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
-
-	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN6_RC_CTL_HW_ENABLE |
-		   GEN6_RC_CTL_RC6_ENABLE |
-		   rc6_mode);
-
-	/*
-	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
-	 * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
-	 */
-	if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
-		I915_WRITE(GEN9_PG_ENABLE, 0);
-	else
-		I915_WRITE(GEN9_PG_ENABLE,
-			   GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	/* 1a: Software RC state - RC0 */
-	I915_WRITE(GEN6_RC_STATE, 0);
-
-	/* 1b: Get forcewake during program sequence. Although the driver
-	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	/* 2a: Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	/* 2b: Program RC6 thresholds.*/
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-	I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
-
-	/* 3: Enable RC6 */
-
-	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN6_RC_CTL_HW_ENABLE |
-		   GEN7_RC_CTL_TO_MODE |
-		   GEN6_RC_CTL_RC6_ENABLE);
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static void gen8_enable_rps(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	/* 1 Program defaults and thresholds for RPS*/
-	I915_WRITE(GEN6_RPNSWREQ,
-		   HSW_FREQUENCY(rps->rp1_freq));
-	I915_WRITE(GEN6_RC_VIDEO_FREQ,
-		   HSW_FREQUENCY(rps->rp1_freq));
-	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
-
-	/* Docs recommend 900MHz, and 300 MHz respectively */
-	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
-		   rps->max_freq_softlimit << 24 |
-		   rps->min_freq_softlimit << 16);
-
-	I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
-	I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
-	I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	/* 2: Enable RPS */
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_TURBO |
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_AVG);
-
-	reset_rps(dev_priv, gen6_set_rps);
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 rc6vids, rc6_mask;
-	u32 gtfifodbg;
-	int ret;
-
-	I915_WRITE(GEN6_RC_STATE, 0);
-
-	/* Clear the DBG now so we don't confuse earlier errors */
-	gtfifodbg = I915_READ(GTFIFODBG);
-	if (gtfifodbg) {
-		DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
-		I915_WRITE(GTFIFODBG, gtfifodbg);
-	}
-
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	/* disable the counters and set deterministic thresholds */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
-	I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
-
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
-	if (IS_IVYBRIDGE(dev_priv))
-		I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
-	else
-		I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
-	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
-	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
-
-	/* We don't use those on Haswell */
-	rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
-	if (HAS_RC6p(dev_priv))
-		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
-	if (HAS_RC6pp(dev_priv))
-		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
-	I915_WRITE(GEN6_RC_CONTROL,
-		   rc6_mask |
-		   GEN6_RC_CTL_EI_MODE(1) |
-		   GEN6_RC_CTL_HW_ENABLE);
-
-	rc6vids = 0;
-	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
-				     &rc6vids, NULL);
-	if (IS_GEN(dev_priv, 6) && ret) {
-		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
-	} else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
-		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
-			  GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
-		rc6vids &= 0xffff00;
-		rc6vids |= GEN6_ENCODE_RC6_VID(450);
-		ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
-		if (ret)
-			DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
-	}
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static void gen6_enable_rps(struct drm_i915_private *dev_priv)
-{
-	/* Here begins a magic sequence of register writes to enable
-	 * auto-downclocking.
-	 *
-	 * Perhaps there might be some value in exposing these to
-	 * userspace...
-	 */
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	/* Power down if completely idle for over 50ms */
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	reset_rps(dev_priv, gen6_set_rps);
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	const int min_freq = 15;
-	const int scaling_factor = 180;
-	unsigned int gpu_freq;
-	unsigned int max_ia_freq, min_ring_freq;
-	unsigned int max_gpu_freq, min_gpu_freq;
-	struct cpufreq_policy *policy;
-
-	lockdep_assert_held(&rps->lock);
-
-	if (rps->max_freq <= rps->min_freq)
-		return;
-
-	policy = cpufreq_cpu_get(0);
-	if (policy) {
-		max_ia_freq = policy->cpuinfo.max_freq;
-		cpufreq_cpu_put(policy);
-	} else {
-		/*
-		 * Default to measured freq if none found, PCU will ensure we
-		 * don't go over
-		 */
-		max_ia_freq = tsc_khz;
-	}
-
-	/* Convert from kHz to MHz */
-	max_ia_freq /= 1000;
-
-	min_ring_freq = I915_READ(DCLK) & 0xf;
-	/* convert DDR frequency from units of 266.6MHz to bandwidth */
-	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
-
-	min_gpu_freq = rps->min_freq;
-	max_gpu_freq = rps->max_freq;
-	if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
-		/* Convert GT frequency to 50 HZ units */
-		min_gpu_freq /= GEN9_FREQ_SCALER;
-		max_gpu_freq /= GEN9_FREQ_SCALER;
-	}
-
-	/*
-	 * For each potential GPU frequency, load a ring frequency we'd like
-	 * to use for memory access.  We do this by specifying the IA frequency
-	 * the PCU should use as a reference to determine the ring frequency.
-	 */
-	for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
-		const int diff = max_gpu_freq - gpu_freq;
-		unsigned int ia_freq = 0, ring_freq = 0;
-
-		if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
-			/*
-			 * ring_freq = 2 * GT. ring_freq is in 100MHz units
-			 * No floor required for ring frequency on SKL.
-			 */
-			ring_freq = gpu_freq;
-		} else if (INTEL_GEN(dev_priv) >= 8) {
-			/* max(2 * GT, DDR). NB: GT is 50MHz units */
-			ring_freq = max(min_ring_freq, gpu_freq);
-		} else if (IS_HASWELL(dev_priv)) {
-			ring_freq = mult_frac(gpu_freq, 5, 4);
-			ring_freq = max(min_ring_freq, ring_freq);
-			/* leave ia_freq as the default, chosen by cpufreq */
-		} else {
-			/* On older processors, there is no separate ring
-			 * clock domain, so in order to boost the bandwidth
-			 * of the ring, we need to upclock the CPU (ia_freq).
-			 *
-			 * For GPU frequencies less than 750MHz,
-			 * just use the lowest ring freq.
-			 */
-			if (gpu_freq < min_freq)
-				ia_freq = 800;
-			else
-				ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
-			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
-		}
-
-		sandybridge_pcode_write(dev_priv,
-					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
-					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
-					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
-					gpu_freq);
-	}
-}
-
-static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp0;
-
-	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
-
-	switch (RUNTIME_INFO(dev_priv)->sseu.eu_total) {
-	case 8:
-		/* (2 * 4) config */
-		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
-		break;
-	case 12:
-		/* (2 * 6) config */
-		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
-		break;
-	case 16:
-		/* (2 * 8) config */
-	default:
-		/* Setting (2 * 8) Min RP0 for any other combination */
-		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
-		break;
-	}
-
-	rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
-
-	return rp0;
-}
-
-static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rpe;
-
-	val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
-	rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
-
-	return rpe;
-}
-
-static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp1;
-
-	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
-	rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
-
-	return rp1;
-}
-
-static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rpn;
-
-	val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
-	rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
-		       FB_GFX_FREQ_FUSE_MASK);
-
-	return rpn;
-}
-
-static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp1;
-
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
-
-	rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
-
-	return rp1;
-}
-
-static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp0;
-
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
-
-	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
-	/* Clamp to max */
-	rp0 = min_t(u32, rp0, 0xea);
-
-	return rp0;
-}
-
-static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rpe;
-
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
-	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
-	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
-
-	return rpe;
-}
-
-static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
-	/*
-	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
-	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
-	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
-	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
-	 * to make sure it matches what Punit accepts.
-	 */
-	return max_t(u32, val, 0xc0);
-}
-
-/* Check that the pctx buffer wasn't move under us. */
-static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
-{
-	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
-
-	WARN_ON(pctx_addr != dev_priv->dsm.start +
-			     dev_priv->vlv_pctx->stolen->start);
-}
-
-
-/* Check that the pcbr address is not empty. */
-static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
-{
-	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
-
-	WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
-}
-
-static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
-{
-	resource_size_t pctx_paddr, paddr;
-	resource_size_t pctx_size = 32*1024;
-	u32 pcbr;
-
-	pcbr = I915_READ(VLV_PCBR);
-	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
-		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
-		paddr = dev_priv->dsm.end + 1 - pctx_size;
-		GEM_BUG_ON(paddr > U32_MAX);
-
-		pctx_paddr = (paddr & (~4095));
-		I915_WRITE(VLV_PCBR, pctx_paddr);
-	}
-
-	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
-}
-
-static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
-{
-	struct drm_i915_gem_object *pctx;
-	resource_size_t pctx_paddr;
-	resource_size_t pctx_size = 24*1024;
-	u32 pcbr;
-
-	pcbr = I915_READ(VLV_PCBR);
-	if (pcbr) {
-		/* BIOS set it up already, grab the pre-alloc'd space */
-		resource_size_t pcbr_offset;
-
-		pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
-		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
-								      pcbr_offset,
-								      I915_GTT_OFFSET_NONE,
-								      pctx_size);
-		goto out;
-	}
-
-	DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
-
-	/*
-	 * From the Gunit register HAS:
-	 * The Gfx driver is expected to program this register and ensure
-	 * proper allocation within Gfx stolen memory.  For example, this
-	 * register should be programmed such than the PCBR range does not
-	 * overlap with other ranges, such as the frame buffer, protected
-	 * memory, or any other relevant ranges.
-	 */
-	pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
-	if (!pctx) {
-		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
-		goto out;
-	}
-
-	GEM_BUG_ON(range_overflows_t(u64,
-				     dev_priv->dsm.start,
-				     pctx->stolen->start,
-				     U32_MAX));
-	pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
-	I915_WRITE(VLV_PCBR, pctx_paddr);
-
-out:
-	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
-	dev_priv->vlv_pctx = pctx;
-}
-
-static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
-{
-	struct drm_i915_gem_object *pctx;
-
-	pctx = fetch_and_zero(&dev_priv->vlv_pctx);
-	if (pctx)
-		i915_gem_object_put(pctx);
-}
-
-static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
-{
-	dev_priv->gt_pm.rps.gpll_ref_freq =
-		vlv_get_cck_clock(dev_priv, "GPLL ref",
-				  CCK_GPLL_CLOCK_CONTROL,
-				  dev_priv->czclk_freq);
-
-	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
-			 dev_priv->gt_pm.rps.gpll_ref_freq);
-}
-
-static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
-
-	valleyview_setup_pctx(dev_priv);
-
-	vlv_iosf_sb_get(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-
-	vlv_init_gpll_ref_freq(dev_priv);
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-	switch ((val >> 6) & 3) {
-	case 0:
-	case 1:
-		dev_priv->mem_freq = 800;
-		break;
-	case 2:
-		dev_priv->mem_freq = 1066;
-		break;
-	case 3:
-		dev_priv->mem_freq = 1333;
-		break;
-	}
-	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
-
-	rps->max_freq = valleyview_rps_max_freq(dev_priv);
-	rps->rp0_freq = rps->max_freq;
-	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->max_freq),
-			 rps->max_freq);
-
-	rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->efficient_freq),
-			 rps->efficient_freq);
-
-	rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->rp1_freq),
-			 rps->rp1_freq);
-
-	rps->min_freq = valleyview_rps_min_freq(dev_priv);
-	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->min_freq),
-			 rps->min_freq);
-
-	vlv_iosf_sb_put(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-}
-
-static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
-
-	cherryview_setup_pctx(dev_priv);
-
-	vlv_iosf_sb_get(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-
-	vlv_init_gpll_ref_freq(dev_priv);
-
-	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
-
-	switch ((val >> 2) & 0x7) {
-	case 3:
-		dev_priv->mem_freq = 2000;
-		break;
-	default:
-		dev_priv->mem_freq = 1600;
-		break;
-	}
-	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
-
-	rps->max_freq = cherryview_rps_max_freq(dev_priv);
-	rps->rp0_freq = rps->max_freq;
-	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->max_freq),
-			 rps->max_freq);
-
-	rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->efficient_freq),
-			 rps->efficient_freq);
-
-	rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->rp1_freq),
-			 rps->rp1_freq);
-
-	rps->min_freq = cherryview_rps_min_freq(dev_priv);
-	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->min_freq),
-			 rps->min_freq);
-
-	vlv_iosf_sb_put(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-
-	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
-		   rps->min_freq) & 1,
-		  "Odd GPU freq values\n");
-}
-
-static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	valleyview_cleanup_pctx(dev_priv);
-}
-
-static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 gtfifodbg, rc6_mode, pcbr;
-
-	gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
-					     GT_FIFO_FREE_ENTRIES_CHV);
-	if (gtfifodbg) {
-		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
-				 gtfifodbg);
-		I915_WRITE(GTFIFODBG, gtfifodbg);
-	}
-
-	cherryview_check_pctx(dev_priv);
-
-	/* 1a & 1b: Get forcewake during program sequence. Although the driver
-	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	/*  Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	/* 2a: Program RC6 thresholds.*/
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-
-	/* TO threshold set to 500 us ( 0x186 * 1.28 us) */
-	I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
-
-	/* Allows RC6 residency counter to work */
-	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
-				      VLV_MEDIA_RC6_COUNT_EN |
-				      VLV_RENDER_RC6_COUNT_EN));
-
-	/* For now we assume BIOS is allocating and populating the PCBR  */
-	pcbr = I915_READ(VLV_PCBR);
-
-	/* 3: Enable RC6 */
-	rc6_mode = 0;
-	if (pcbr >> VLV_PCBR_ADDR_SHIFT)
-		rc6_mode = GEN7_RC_CTL_TO_MODE;
-	I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	/* 1: Program defaults and thresholds for RPS*/
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
-	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
-	I915_WRITE(GEN6_RP_UP_EI, 66000);
-	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	/* 2: Enable RPS */
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_AVG);
-
-	/* Setting Fixed Bias */
-	vlv_punit_get(dev_priv);
-
-	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
-	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-
-	vlv_punit_put(dev_priv);
-
-	/* RPS code assumes GPLL is used */
-	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
-
-	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
-	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
-
-	reset_rps(dev_priv, valleyview_set_rps);
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 gtfifodbg;
-
-	valleyview_check_pctx(dev_priv);
-
-	gtfifodbg = I915_READ(GTFIFODBG);
-	if (gtfifodbg) {
-		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
-				 gtfifodbg);
-		I915_WRITE(GTFIFODBG, gtfifodbg);
-	}
-
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	/*  Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
-
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-
-	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
-
-	/* Allows RC6 residency counter to work */
-	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
-				      VLV_MEDIA_RC0_COUNT_EN |
-				      VLV_RENDER_RC0_COUNT_EN |
-				      VLV_MEDIA_RC6_COUNT_EN |
-				      VLV_RENDER_RC6_COUNT_EN));
-
-	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
-	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
-	I915_WRITE(GEN6_RP_UP_EI, 66000);
-	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_TURBO |
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_CONT);
-
-	vlv_punit_get(dev_priv);
-
-	/* Setting Fixed Bias */
-	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
-	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-
-	vlv_punit_put(dev_priv);
-
-	/* RPS code assumes GPLL is used */
-	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
-
-	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
-	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
-
-	reset_rps(dev_priv, valleyview_set_rps);
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-}
-
-static unsigned long intel_pxfreq(u32 vidfreq)
-{
-	unsigned long freq;
-	int div = (vidfreq & 0x3f0000) >> 16;
-	int post = (vidfreq & 0x3000) >> 12;
-	int pre = (vidfreq & 0x7);
-
-	if (!pre)
-		return 0;
-
-	freq = ((div * 133333) / ((1<<post) * pre));
-
-	return freq;
-}
-
-static const struct cparams {
-	u16 i;
-	u16 t;
-	u16 m;
-	u16 c;
-} cparams[] = {
-	{ 1, 1333, 301, 28664 },
-	{ 1, 1066, 294, 24460 },
-	{ 1, 800, 294, 25192 },
-	{ 0, 1333, 276, 27605 },
-	{ 0, 1066, 276, 27605 },
-	{ 0, 800, 231, 23784 },
-};
-
-static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
-{
-	u64 total_count, diff, ret;
-	u32 count1, count2, count3, m = 0, c = 0;
-	unsigned long now = jiffies_to_msecs(jiffies), diff1;
-	int i;
-
-	lockdep_assert_held(&mchdev_lock);
-
-	diff1 = now - dev_priv->ips.last_time1;
-
-	/* Prevent division-by-zero if we are asking too fast.
-	 * Also, we don't get interesting results if we are polling
-	 * faster than once in 10ms, so just return the saved value
-	 * in such cases.
-	 */
-	if (diff1 <= 10)
-		return dev_priv->ips.chipset_power;
-
-	count1 = I915_READ(DMIEC);
-	count2 = I915_READ(DDREC);
-	count3 = I915_READ(CSIEC);
-
-	total_count = count1 + count2 + count3;
-
-	/* FIXME: handle per-counter overflow */
-	if (total_count < dev_priv->ips.last_count1) {
-		diff = ~0UL - dev_priv->ips.last_count1;
-		diff += total_count;
-	} else {
-		diff = total_count - dev_priv->ips.last_count1;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
-		if (cparams[i].i == dev_priv->ips.c_m &&
-		    cparams[i].t == dev_priv->ips.r_t) {
-			m = cparams[i].m;
-			c = cparams[i].c;
-			break;
-		}
-	}
-
-	diff = div_u64(diff, diff1);
-	ret = ((m * diff) + c);
-	ret = div_u64(ret, 10);
-
-	dev_priv->ips.last_count1 = total_count;
-	dev_priv->ips.last_time1 = now;
-
-	dev_priv->ips.chipset_power = ret;
-
-	return ret;
-}
-
-unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
-{
-	intel_wakeref_t wakeref;
-	unsigned long val = 0;
-
-	if (!IS_GEN(dev_priv, 5))
-		return 0;
-
-	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
-		spin_lock_irq(&mchdev_lock);
-		val = __i915_chipset_val(dev_priv);
-		spin_unlock_irq(&mchdev_lock);
-	}
-
-	return val;
-}
-
-unsigned long i915_mch_val(struct drm_i915_private *i915)
-{
-	unsigned long m, x, b;
-	u32 tsfs;
-
-	tsfs = intel_uncore_read(&i915->uncore, TSFS);
-
-	m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
-	x = intel_uncore_read8(&i915->uncore, TR1);
-
-	b = tsfs & TSFS_INTR_MASK;
-
-	return ((m * x) / 127) - b;
-}
-
-static int _pxvid_to_vd(u8 pxvid)
-{
-	if (pxvid == 0)
-		return 0;
-
-	if (pxvid >= 8 && pxvid < 31)
-		pxvid = 31;
-
-	return (pxvid + 2) * 125;
-}
-
-static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
-{
-	const int vd = _pxvid_to_vd(pxvid);
-	const int vm = vd - 1125;
-
-	if (INTEL_INFO(dev_priv)->is_mobile)
-		return vm > 0 ? vm : 0;
-
-	return vd;
-}
-
-static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
-{
-	u64 now, diff, diffms;
-	u32 count;
-
-	lockdep_assert_held(&mchdev_lock);
-
-	now = ktime_get_raw_ns();
-	diffms = now - dev_priv->ips.last_time2;
-	do_div(diffms, NSEC_PER_MSEC);
-
-	/* Don't divide by 0 */
-	if (!diffms)
-		return;
-
-	count = I915_READ(GFXEC);
-
-	if (count < dev_priv->ips.last_count2) {
-		diff = ~0UL - dev_priv->ips.last_count2;
-		diff += count;
-	} else {
-		diff = count - dev_priv->ips.last_count2;
-	}
-
-	dev_priv->ips.last_count2 = count;
-	dev_priv->ips.last_time2 = now;
-
-	/* More magic constants... */
-	diff = diff * 1181;
-	diff = div_u64(diff, diffms * 10);
-	dev_priv->ips.gfx_power = diff;
-}
-
-void i915_update_gfx_val(struct drm_i915_private *dev_priv)
-{
-	intel_wakeref_t wakeref;
-
-	if (!IS_GEN(dev_priv, 5))
-		return;
-
-	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
-		spin_lock_irq(&mchdev_lock);
-		__i915_update_gfx_val(dev_priv);
-		spin_unlock_irq(&mchdev_lock);
-	}
-}
-
-static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
-{
-	unsigned long t, corr, state1, corr2, state2;
-	u32 pxvid, ext_v;
-
-	lockdep_assert_held(&mchdev_lock);
-
-	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
-	pxvid = (pxvid >> 24) & 0x7f;
-	ext_v = pvid_to_extvid(dev_priv, pxvid);
-
-	state1 = ext_v;
-
-	t = i915_mch_val(dev_priv);
-
-	/* Revel in the empirically derived constants */
-
-	/* Correction factor in 1/100000 units */
-	if (t > 80)
-		corr = ((t * 2349) + 135940);
-	else if (t >= 50)
-		corr = ((t * 964) + 29317);
-	else /* < 50 */
-		corr = ((t * 301) + 1004);
-
-	corr = corr * ((150142 * state1) / 10000 - 78642);
-	corr /= 100000;
-	corr2 = (corr * dev_priv->ips.corr);
-
-	state2 = (corr2 * state1) / 10000;
-	state2 /= 100; /* convert to mW */
-
-	__i915_update_gfx_val(dev_priv);
-
-	return dev_priv->ips.gfx_power + state2;
-}
-
-unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
-{
-	intel_wakeref_t wakeref;
-	unsigned long val = 0;
-
-	if (!IS_GEN(dev_priv, 5))
-		return 0;
-
-	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
-		spin_lock_irq(&mchdev_lock);
-		val = __i915_gfx_val(dev_priv);
-		spin_unlock_irq(&mchdev_lock);
-	}
-
-	return val;
-}
-
-static struct drm_i915_private __rcu *i915_mch_dev;
-
-static struct drm_i915_private *mchdev_get(void)
-{
-	struct drm_i915_private *i915;
-
-	rcu_read_lock();
-	i915 = rcu_dereference(i915_mch_dev);
-	if (!kref_get_unless_zero(&i915->drm.ref))
-		i915 = NULL;
-	rcu_read_unlock();
-
-	return i915;
-}
-
-/**
- * i915_read_mch_val - return value for IPS use
- *
- * Calculate and return a value for the IPS driver to use when deciding whether
- * we have thermal and power headroom to increase CPU or GPU power budget.
- */
-unsigned long i915_read_mch_val(void)
-{
-	struct drm_i915_private *i915;
-	unsigned long chipset_val = 0;
-	unsigned long graphics_val = 0;
-	intel_wakeref_t wakeref;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return 0;
-
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		spin_lock_irq(&mchdev_lock);
-		chipset_val = __i915_chipset_val(i915);
-		graphics_val = __i915_gfx_val(i915);
-		spin_unlock_irq(&mchdev_lock);
-	}
-
-	drm_dev_put(&i915->drm);
-	return chipset_val + graphics_val;
-}
-EXPORT_SYMBOL_GPL(i915_read_mch_val);
-
-/**
- * i915_gpu_raise - raise GPU frequency limit
- *
- * Raise the limit; IPS indicates we have thermal headroom.
- */
-bool i915_gpu_raise(void)
-{
-	struct drm_i915_private *i915;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	spin_lock_irq(&mchdev_lock);
-	if (i915->ips.max_delay > i915->ips.fmax)
-		i915->ips.max_delay--;
-	spin_unlock_irq(&mchdev_lock);
-
-	drm_dev_put(&i915->drm);
-	return true;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_raise);
-
-/**
- * i915_gpu_lower - lower GPU frequency limit
- *
- * IPS indicates we're close to a thermal limit, so throttle back the GPU
- * frequency maximum.
- */
-bool i915_gpu_lower(void)
-{
-	struct drm_i915_private *i915;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	spin_lock_irq(&mchdev_lock);
-	if (i915->ips.max_delay < i915->ips.min_delay)
-		i915->ips.max_delay++;
-	spin_unlock_irq(&mchdev_lock);
-
-	drm_dev_put(&i915->drm);
-	return true;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_lower);
-
-/**
- * i915_gpu_busy - indicate GPU business to IPS
- *
- * Tell the IPS driver whether or not the GPU is busy.
- */
-bool i915_gpu_busy(void)
-{
-	struct drm_i915_private *i915;
-	bool ret;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	ret = i915->gt.awake;
-
-	drm_dev_put(&i915->drm);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_busy);
-
-/**
- * i915_gpu_turbo_disable - disable graphics turbo
- *
- * Disable graphics turbo by resetting the max frequency and setting the
- * current frequency to the default.
- */
-bool i915_gpu_turbo_disable(void)
-{
-	struct drm_i915_private *i915;
-	bool ret;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	spin_lock_irq(&mchdev_lock);
-	i915->ips.max_delay = i915->ips.fstart;
-	ret = ironlake_set_drps(i915, i915->ips.fstart);
-	spin_unlock_irq(&mchdev_lock);
-
-	drm_dev_put(&i915->drm);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
-
-/**
- * Tells the intel_ips driver that the i915 driver is now loaded, if
- * IPS got loaded first.
- *
- * This awkward dance is so that neither module has to depend on the
- * other in order for IPS to do the appropriate communication of
- * GPU turbo limits to i915.
- */
-static void
-ips_ping_for_i915_load(void)
-{
-	void (*link)(void);
-
-	link = symbol_get(ips_link_to_i915_driver);
-	if (link) {
-		link();
-		symbol_put(ips_link_to_i915_driver);
-	}
-}
-
-void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
-{
-	/* We only register the i915 ips part with intel-ips once everything is
-	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
-	rcu_assign_pointer(i915_mch_dev, dev_priv);
-
-	ips_ping_for_i915_load();
-}
-
-void intel_gpu_ips_teardown(void)
-{
-	rcu_assign_pointer(i915_mch_dev, NULL);
-}
-
-static void intel_init_emon(struct drm_i915_private *dev_priv)
-{
-	u32 lcfuse;
-	u8 pxw[16];
-	int i;
-
-	/* Disable to program */
-	I915_WRITE(ECR, 0);
-	POSTING_READ(ECR);
-
-	/* Program energy weights for various events */
-	I915_WRITE(SDEW, 0x15040d00);
-	I915_WRITE(CSIEW0, 0x007f0000);
-	I915_WRITE(CSIEW1, 0x1e220004);
-	I915_WRITE(CSIEW2, 0x04000004);
-
-	for (i = 0; i < 5; i++)
-		I915_WRITE(PEW(i), 0);
-	for (i = 0; i < 3; i++)
-		I915_WRITE(DEW(i), 0);
-
-	/* Program P-state weights to account for frequency power adjustment */
-	for (i = 0; i < 16; i++) {
-		u32 pxvidfreq = I915_READ(PXVFREQ(i));
-		unsigned long freq = intel_pxfreq(pxvidfreq);
-		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
-			PXVFREQ_PX_SHIFT;
-		unsigned long val;
-
-		val = vid * vid;
-		val *= (freq / 1000);
-		val *= 255;
-		val /= (127*127*900);
-		if (val > 0xff)
-			DRM_ERROR("bad pxval: %ld\n", val);
-		pxw[i] = val;
-	}
-	/* Render standby states get 0 weight */
-	pxw[14] = 0;
-	pxw[15] = 0;
-
-	for (i = 0; i < 4; i++) {
-		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
-			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
-		I915_WRITE(PXW(i), val);
-	}
-
-	/* Adjust magic regs to magic values (more experimental results) */
-	I915_WRITE(OGW0, 0);
-	I915_WRITE(OGW1, 0);
-	I915_WRITE(EG0, 0x00007f00);
-	I915_WRITE(EG1, 0x0000000e);
-	I915_WRITE(EG2, 0x000e0000);
-	I915_WRITE(EG3, 0x68000300);
-	I915_WRITE(EG4, 0x42000000);
-	I915_WRITE(EG5, 0x00140031);
-	I915_WRITE(EG6, 0);
-	I915_WRITE(EG7, 0);
-
-	for (i = 0; i < 8; i++)
-		I915_WRITE(PXWL(i), 0);
-
-	/* Enable PMON + select events */
-	I915_WRITE(ECR, 0x80000019);
-
-	lcfuse = I915_READ(LCFUSE02);
-
-	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
-}
-
-static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv)
-{
-	return !I915_READ(GEN8_RC6_CTX_INFO);
-}
-
-static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915)
-{
-	if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
-		return;
-
-	if (i915_rc6_ctx_corrupted(i915)) {
-		DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
-		i915->gt_pm.rc6.ctx_corrupted = true;
-		i915->gt_pm.rc6.ctx_corrupted_wakeref =
-			intel_runtime_pm_get(&i915->runtime_pm);
-	}
-}
-
-static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915)
-{
-	if (i915->gt_pm.rc6.ctx_corrupted) {
-		intel_runtime_pm_put(&i915->runtime_pm,
-				     i915->gt_pm.rc6.ctx_corrupted_wakeref);
-		i915->gt_pm.rc6.ctx_corrupted = false;
-	}
-}
-
-/**
- * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA
- * @i915: i915 device
- *
- * Perform any steps needed to clean up the RC6 CTX WA before system suspend.
- */
-void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915)
-{
-	if (i915->gt_pm.rc6.ctx_corrupted)
-		intel_runtime_pm_put(&i915->runtime_pm,
-				     i915->gt_pm.rc6.ctx_corrupted_wakeref);
-}
-
-/**
- * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
- * @i915: i915 device
- *
- * Perform any steps needed to re-init the RC6 CTX WA after system resume.
- */
-void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915)
-{
-	if (!i915->gt_pm.rc6.ctx_corrupted)
-		return;
-
-	if (i915_rc6_ctx_corrupted(i915)) {
-		i915->gt_pm.rc6.ctx_corrupted_wakeref =
-			intel_runtime_pm_get(&i915->runtime_pm);
-		return;
-	}
-
-	DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
-	i915->gt_pm.rc6.ctx_corrupted = false;
-}
-
-static void intel_disable_rc6(struct drm_i915_private *dev_priv);
-
-/**
- * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption
- * @i915: i915 device
- *
- * Check if an RC6 CTX corruption has happened since the last check and if so
- * disable RC6 and runtime power management.
- *
- * Return false if no context corruption has happened since the last call of
- * this function, true otherwise.
-*/
-bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915)
-{
-	if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
-		return false;
-
-	if (i915->gt_pm.rc6.ctx_corrupted)
-		return false;
-
-	if (!i915_rc6_ctx_corrupted(i915))
-		return false;
-
-	DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
-
-	intel_disable_rc6(i915);
-	i915->gt_pm.rc6.ctx_corrupted = true;
-	i915->gt_pm.rc6.ctx_corrupted_wakeref =
-		intel_runtime_pm_get_noresume(&i915->runtime_pm);
-
-	return true;
-}
-
-void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/*
-	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
-	 * requirement.
-	 */
-	if (!sanitize_rc6(dev_priv)) {
-		DRM_INFO("RC6 disabled, disabling runtime PM support\n");
-		pm_runtime_get(&dev_priv->drm.pdev->dev);
-	}
-
-	i915_rc6_ctx_wa_init(dev_priv);
-
-	/* Initialize RPS limits (for userspace) */
-	if (IS_CHERRYVIEW(dev_priv))
-		cherryview_init_gt_powersave(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_init_gt_powersave(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_init_rps_frequencies(dev_priv);
-
-	/* Derive initial user preferences/limits from the hardware limits */
-	rps->max_freq_softlimit = rps->max_freq;
-	rps->min_freq_softlimit = rps->min_freq;
-
-	/* After setting max-softlimit, find the overclock max freq */
-	if (IS_GEN(dev_priv, 6) ||
-	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
-		u32 params = 0;
-
-		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS,
-				       &params, NULL);
-		if (params & BIT(31)) { /* OC supported */
-			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
-					 (rps->max_freq & 0xff) * 50,
-					 (params & 0xff) * 50);
-			rps->max_freq = params & 0xff;
-		}
-	}
-
-	/* Finally allow us to boost to max by default */
-	rps->boost_freq = rps->max_freq;
-	rps->idle_freq = rps->min_freq;
-	rps->cur_freq = rps->idle_freq;
-}
-
-void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	if (IS_VALLEYVIEW(dev_priv))
-		valleyview_cleanup_gt_powersave(dev_priv);
-
-	i915_rc6_ctx_wa_cleanup(dev_priv);
-
-	if (!HAS_RC6(dev_priv))
-		pm_runtime_put(&dev_priv->drm.pdev->dev);
-}
-
-void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
-	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
-	intel_disable_gt_powersave(dev_priv);
-
-	if (INTEL_GEN(dev_priv) >= 11)
-		gen11_reset_rps_interrupts(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_reset_rps_interrupts(dev_priv);
-}
-
-static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (!i915->gt_pm.llc_pstate.enabled)
-		return;
-
-	/* Currently there is no HW configuration to be done to disable. */
-
-	i915->gt_pm.llc_pstate.enabled = false;
-}
-
-static void __intel_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
-
-	if (!dev_priv->gt_pm.rc6.enabled)
-		return;
-
-	if (INTEL_GEN(dev_priv) >= 9)
-		gen9_disable_rc6(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
-		cherryview_disable_rc6(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_disable_rc6(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_disable_rc6(dev_priv);
-
-	dev_priv->gt_pm.rc6.enabled = false;
-}
-
-static void intel_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	mutex_lock(&rps->lock);
-	__intel_disable_rc6(dev_priv);
-	mutex_unlock(&rps->lock);
-}
-
-static void intel_disable_rps(struct drm_i915_private *dev_priv)
-{
-	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
-
-	if (!dev_priv->gt_pm.rps.enabled)
-		return;
-
-	if (INTEL_GEN(dev_priv) >= 9)
-		gen9_disable_rps(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
-		cherryview_disable_rps(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_disable_rps(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_disable_rps(dev_priv);
-	else if (IS_IRONLAKE_M(dev_priv))
-		ironlake_disable_drps(dev_priv);
-
-	dev_priv->gt_pm.rps.enabled = false;
-}
-
-void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	mutex_lock(&dev_priv->gt_pm.rps.lock);
-
-	__intel_disable_rc6(dev_priv);
-	intel_disable_rps(dev_priv);
-	if (HAS_LLC(dev_priv))
-		intel_disable_llc_pstate(dev_priv);
-
-	mutex_unlock(&dev_priv->gt_pm.rps.lock);
-}
-
-static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (i915->gt_pm.llc_pstate.enabled)
-		return;
-
-	gen6_update_ring_freq(i915);
-
-	i915->gt_pm.llc_pstate.enabled = true;
-}
-
-static void intel_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
-
-	if (dev_priv->gt_pm.rc6.enabled)
-		return;
-
-	if (dev_priv->gt_pm.rc6.ctx_corrupted)
-		return;
-
-	if (IS_CHERRYVIEW(dev_priv))
-		cherryview_enable_rc6(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_enable_rc6(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 11)
-		gen11_enable_rc6(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 9)
-		gen9_enable_rc6(dev_priv);
-	else if (IS_BROADWELL(dev_priv))
-		gen8_enable_rc6(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_enable_rc6(dev_priv);
-
-	dev_priv->gt_pm.rc6.enabled = true;
-}
-
-static void intel_enable_rps(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	lockdep_assert_held(&rps->lock);
-
-	if (rps->enabled)
-		return;
-
-	if (IS_CHERRYVIEW(dev_priv)) {
-		cherryview_enable_rps(dev_priv);
-	} else if (IS_VALLEYVIEW(dev_priv)) {
-		valleyview_enable_rps(dev_priv);
-	} else if (INTEL_GEN(dev_priv) >= 9) {
-		gen9_enable_rps(dev_priv);
-	} else if (IS_BROADWELL(dev_priv)) {
-		gen8_enable_rps(dev_priv);
-	} else if (INTEL_GEN(dev_priv) >= 6) {
-		gen6_enable_rps(dev_priv);
-	} else if (IS_IRONLAKE_M(dev_priv)) {
-		ironlake_enable_drps(dev_priv);
-		intel_init_emon(dev_priv);
-	}
-
-	WARN_ON(rps->max_freq < rps->min_freq);
-	WARN_ON(rps->idle_freq > rps->max_freq);
-
-	WARN_ON(rps->efficient_freq < rps->min_freq);
-	WARN_ON(rps->efficient_freq > rps->max_freq);
-
-	rps->enabled = true;
-}
-
-void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	/* Powersaving is controlled by the host when inside a VM */
-	if (intel_vgpu_active(dev_priv))
-		return;
-
-	mutex_lock(&dev_priv->gt_pm.rps.lock);
-
-	if (HAS_RC6(dev_priv))
-		intel_enable_rc6(dev_priv);
-	if (HAS_RPS(dev_priv))
-		intel_enable_rps(dev_priv);
-	if (HAS_LLC(dev_priv))
-		intel_enable_llc_pstate(dev_priv);
-
-	mutex_unlock(&dev_priv->gt_pm.rps.lock);
-}
-
 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
 {
 	/*
@@ -8976,7 +6414,7 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
 
 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	int pipe;
+	enum pipe pipe;
 	u32 val;
 
 	/*
@@ -8998,7 +6436,6 @@ static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
 		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
 		if (dev_priv->vbt.fdi_rx_polarity_inverted)
 			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
-		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
 		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
 		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
 		I915_WRITE(TRANS_CHICKEN2(pipe), val);
@@ -9016,8 +6453,9 @@ static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
 
 	tmp = I915_READ(MCH_SSKPD);
 	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
-		DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
-			      tmp);
+		drm_dbg_kms(&dev_priv->drm,
+			    "Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
+			    tmp);
 }
 
 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -9194,6 +6632,37 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
 	/* WaEnable32PlaneMode:icl */
 	I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
 		   _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE));
+
+	/*
+	 * Wa_1408615072:icl,ehl  (vsunit)
+	 * Wa_1407596294:icl,ehl  (hsunit)
+	 */
+	intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE,
+			 0, VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
+
+	/* Wa_1407352427:icl,ehl */
+	intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE2,
+			 0, PSDUNIT_CLKGATE_DIS);
+}
+
+static void tgl_init_clock_gating(struct drm_i915_private *dev_priv)
+{
+	u32 vd_pg_enable = 0;
+	unsigned int i;
+
+	/* Wa_1408615072:tgl */
+	intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE2,
+			 0, VSUNIT_CLKGATE_DIS_TGL);
+
+	/* This is not a WA. Enable VD HCP & MFX_ENC powergate */
+	for (i = 0; i < I915_MAX_VCS; i++) {
+		if (HAS_ENGINE(dev_priv, _VCS(i)))
+			vd_pg_enable |= VDN_HCP_POWERGATE_ENABLE(i) |
+					VDN_MFX_POWERGATE_ENABLE(i);
+	}
+
+	I915_WRITE(POWERGATE_ENABLE,
+		   I915_READ(POWERGATE_ENABLE) | vd_pg_enable);
 }
 
 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -9701,7 +7170,8 @@ void intel_suspend_hw(struct drm_i915_private *dev_priv)
 
 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
+	drm_dbg_kms(&dev_priv->drm,
+		    "No clock gating settings or workarounds applied.\n");
 }
 
 /**
@@ -9716,7 +7186,7 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
 {
 	if (IS_GEN(dev_priv, 12))
-		dev_priv->display.init_clock_gating = nop_init_clock_gating;
+		dev_priv->display.init_clock_gating = tgl_init_clock_gating;
 	else if (IS_GEN(dev_priv, 11))
 		dev_priv->display.init_clock_gating = icl_init_clock_gating;
 	else if (IS_CANNONLAKE(dev_priv))
@@ -9768,9 +7238,12 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 {
 	/* For cxsr */
 	if (IS_PINEVIEW(dev_priv))
-		i915_pineview_get_mem_freq(dev_priv);
+		pnv_get_mem_freq(dev_priv);
 	else if (IS_GEN(dev_priv, 5))
-		i915_ironlake_get_mem_freq(dev_priv);
+		ilk_get_mem_freq(dev_priv);
+
+	if (intel_has_sagv(dev_priv))
+		skl_setup_sagv_block_time(dev_priv);
 
 	/* For FIFO watermark updates */
 	if (INTEL_GEN(dev_priv) >= 9) {
@@ -9793,8 +7266,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 			dev_priv->display.optimize_watermarks =
 				ilk_optimize_watermarks;
 		} else {
-			DRM_DEBUG_KMS("Failed to read display plane latency. "
-				      "Disable CxSR\n");
+			drm_dbg_kms(&dev_priv->drm,
+				    "Failed to read display plane latency. "
+				    "Disable CxSR\n");
 		}
 	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		vlv_setup_wm_latency(dev_priv);
@@ -9814,7 +7288,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 					    dev_priv->is_ddr3,
 					    dev_priv->fsb_freq,
 					    dev_priv->mem_freq)) {
-			DRM_INFO("failed to find known CxSR latency "
+			drm_info(&dev_priv->drm,
+				 "failed to find known CxSR latency "
 				 "(found ddr%s fsb freq %d, mem freq %d), "
 				 "disabling CxSR\n",
 				 (dev_priv->is_ddr3 == 1) ? "3" : "2",
@@ -9823,14 +7298,14 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 			intel_set_memory_cxsr(dev_priv, false);
 			dev_priv->display.update_wm = NULL;
 		} else
-			dev_priv->display.update_wm = pineview_update_wm;
+			dev_priv->display.update_wm = pnv_update_wm;
 	} else if (IS_GEN(dev_priv, 4)) {
 		dev_priv->display.update_wm = i965_update_wm;
 	} else if (IS_GEN(dev_priv, 3)) {
 		dev_priv->display.update_wm = i9xx_update_wm;
 		dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
 	} else if (IS_GEN(dev_priv, 2)) {
-		if (INTEL_INFO(dev_priv)->num_pipes == 1) {
+		if (INTEL_NUM_PIPES(dev_priv) == 1) {
 			dev_priv->display.update_wm = i845_update_wm;
 			dev_priv->display.get_fifo_size = i845_get_fifo_size;
 		} else {
@@ -9838,221 +7313,13 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 			dev_priv->display.get_fifo_size = i830_get_fifo_size;
 		}
 	} else {
-		DRM_ERROR("unexpected fall-through in intel_init_pm\n");
+		drm_err(&dev_priv->drm,
+			"unexpected fall-through in %s\n", __func__);
 	}
 }
 
-static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/*
-	 * N = val - 0xb7
-	 * Slow = Fast = GPLL ref * N
-	 */
-	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
-}
-
-static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
-}
-
-static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/*
-	 * N = val / 2
-	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
-	 */
-	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
-}
-
-static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* CHV needs even values */
-	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
-}
-
-int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
-{
-	if (INTEL_GEN(dev_priv) >= 9)
-		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
-					 GEN9_FREQ_SCALER);
-	else if (IS_CHERRYVIEW(dev_priv))
-		return chv_gpu_freq(dev_priv, val);
-	else if (IS_VALLEYVIEW(dev_priv))
-		return byt_gpu_freq(dev_priv, val);
-	else
-		return val * GT_FREQUENCY_MULTIPLIER;
-}
-
-int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
-{
-	if (INTEL_GEN(dev_priv) >= 9)
-		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
-					 GT_FREQUENCY_MULTIPLIER);
-	else if (IS_CHERRYVIEW(dev_priv))
-		return chv_freq_opcode(dev_priv, val);
-	else if (IS_VALLEYVIEW(dev_priv))
-		return byt_freq_opcode(dev_priv, val);
-	else
-		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
-}
-
 void intel_pm_setup(struct drm_i915_private *dev_priv)
 {
-	mutex_init(&dev_priv->gt_pm.rps.lock);
-	mutex_init(&dev_priv->gt_pm.rps.power.mutex);
-
-	atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
-
 	dev_priv->runtime_pm.suspended = false;
 	atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
 }
-
-static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
-			     const i915_reg_t reg)
-{
-	u32 lower, upper, tmp;
-	int loop = 2;
-
-	/*
-	 * The register accessed do not need forcewake. We borrow
-	 * uncore lock to prevent concurrent access to range reg.
-	 */
-	lockdep_assert_held(&dev_priv->uncore.lock);
-
-	/*
-	 * vlv and chv residency counters are 40 bits in width.
-	 * With a control bit, we can choose between upper or lower
-	 * 32bit window into this counter.
-	 *
-	 * Although we always use the counter in high-range mode elsewhere,
-	 * userspace may attempt to read the value before rc6 is initialised,
-	 * before we have set the default VLV_COUNTER_CONTROL value. So always
-	 * set the high bit to be safe.
-	 */
-	I915_WRITE_FW(VLV_COUNTER_CONTROL,
-		      _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
-	upper = I915_READ_FW(reg);
-	do {
-		tmp = upper;
-
-		I915_WRITE_FW(VLV_COUNTER_CONTROL,
-			      _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
-		lower = I915_READ_FW(reg);
-
-		I915_WRITE_FW(VLV_COUNTER_CONTROL,
-			      _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
-		upper = I915_READ_FW(reg);
-	} while (upper != tmp && --loop);
-
-	/*
-	 * Everywhere else we always use VLV_COUNTER_CONTROL with the
-	 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
-	 * now.
-	 */
-
-	return lower | (u64)upper << 8;
-}
-
-u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
-			   const i915_reg_t reg)
-{
-	struct intel_uncore *uncore = &dev_priv->uncore;
-	u64 time_hw, prev_hw, overflow_hw;
-	unsigned int fw_domains;
-	unsigned long flags;
-	unsigned int i;
-	u32 mul, div;
-
-	if (!HAS_RC6(dev_priv))
-		return 0;
-
-	/*
-	 * Store previous hw counter values for counter wrap-around handling.
-	 *
-	 * There are only four interesting registers and they live next to each
-	 * other so we can use the relative address, compared to the smallest
-	 * one as the index into driver storage.
-	 */
-	i = (i915_mmio_reg_offset(reg) -
-	     i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
-	if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency)))
-		return 0;
-
-	fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
-
-	spin_lock_irqsave(&uncore->lock, flags);
-	intel_uncore_forcewake_get__locked(uncore, fw_domains);
-
-	/* On VLV and CHV, residency time is in CZ units rather than 1.28us */
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		mul = 1000000;
-		div = dev_priv->czclk_freq;
-		overflow_hw = BIT_ULL(40);
-		time_hw = vlv_residency_raw(dev_priv, reg);
-	} else {
-		/* 833.33ns units on Gen9LP, 1.28us elsewhere. */
-		if (IS_GEN9_LP(dev_priv)) {
-			mul = 10000;
-			div = 12;
-		} else {
-			mul = 1280;
-			div = 1;
-		}
-
-		overflow_hw = BIT_ULL(32);
-		time_hw = intel_uncore_read_fw(uncore, reg);
-	}
-
-	/*
-	 * Counter wrap handling.
-	 *
-	 * But relying on a sufficient frequency of queries otherwise counters
-	 * can still wrap.
-	 */
-	prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i];
-	dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw;
-
-	/* RC6 delta from last sample. */
-	if (time_hw >= prev_hw)
-		time_hw -= prev_hw;
-	else
-		time_hw += overflow_hw - prev_hw;
-
-	/* Add delta to RC6 extended raw driver copy. */
-	time_hw += dev_priv->gt_pm.rc6.cur_residency[i];
-	dev_priv->gt_pm.rc6.cur_residency[i] = time_hw;
-
-	intel_uncore_forcewake_put__locked(uncore, fw_domains);
-	spin_unlock_irqrestore(&uncore->lock, flags);
-
-	return mul_u64_u32_div(time_hw, mul, div);
-}
-
-u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
-			   i915_reg_t reg)
-{
-	return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000);
-}
-
-u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
-{
-	u32 cagf;
-
-	if (INTEL_GEN(dev_priv) >= 9)
-		cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
-	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
-	else
-		cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
-
-	return  cagf;
-}
diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h
index 0f7390c850ec..c06c6a846d9a 100644
--- a/drivers/gpu/drm/i915/intel_pm.h
+++ b/drivers/gpu/drm/i915/intel_pm.h
@@ -29,19 +29,6 @@ void intel_update_watermarks(struct intel_crtc *crtc);
 void intel_init_pm(struct drm_i915_private *dev_priv);
 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv);
 void intel_pm_setup(struct drm_i915_private *dev_priv);
-void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
-void intel_gpu_ips_teardown(void);
-void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
-bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915);
-void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915);
-void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915);
-void gen6_rps_busy(struct drm_i915_private *dev_priv);
-void gen6_rps_idle(struct drm_i915_private *dev_priv);
-void gen6_rps_boost(struct i915_request *rq);
 void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv);
 void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv);
 void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv);
@@ -67,27 +54,10 @@ void skl_write_plane_wm(struct intel_plane *plane,
 			const struct intel_crtc_state *crtc_state);
 void skl_write_cursor_wm(struct intel_plane *plane,
 			 const struct intel_crtc_state *crtc_state);
-bool ilk_disable_lp_wm(struct drm_device *dev);
-int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
-				  struct intel_crtc_state *cstate);
+bool ilk_disable_lp_wm(struct drm_i915_private *dev_priv);
 void intel_init_ipc(struct drm_i915_private *dev_priv);
 void intel_enable_ipc(struct drm_i915_private *dev_priv);
 
-int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
-int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
-u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, i915_reg_t reg);
-u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, i915_reg_t reg);
-
-u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1);
-
-unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
-unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
-unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
-void i915_update_gfx_val(struct drm_i915_private *dev_priv);
-
-bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
-int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
-void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive);
 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable);
 
 #endif /* __INTEL_PM_H__ */
diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c
new file mode 100644
index 000000000000..14b59b899c9b
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_region_lmem.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_memory_region.h"
+#include "gem/i915_gem_lmem.h"
+#include "gem/i915_gem_region.h"
+#include "intel_region_lmem.h"
+
+static int init_fake_lmem_bar(struct intel_memory_region *mem)
+{
+	struct drm_i915_private *i915 = mem->i915;
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	unsigned long n;
+	int ret;
+
+	/* We want to 1:1 map the mappable aperture to our reserved region */
+
+	mem->fake_mappable.start = 0;
+	mem->fake_mappable.size = resource_size(&mem->region);
+	mem->fake_mappable.color = I915_COLOR_UNEVICTABLE;
+
+	ret = drm_mm_reserve_node(&ggtt->vm.mm, &mem->fake_mappable);
+	if (ret)
+		return ret;
+
+	mem->remap_addr = dma_map_resource(&i915->drm.pdev->dev,
+					   mem->region.start,
+					   mem->fake_mappable.size,
+					   PCI_DMA_BIDIRECTIONAL,
+					   DMA_ATTR_FORCE_CONTIGUOUS);
+	if (dma_mapping_error(&i915->drm.pdev->dev, mem->remap_addr)) {
+		drm_mm_remove_node(&mem->fake_mappable);
+		return -EINVAL;
+	}
+
+	for (n = 0; n < mem->fake_mappable.size >> PAGE_SHIFT; ++n) {
+		ggtt->vm.insert_page(&ggtt->vm,
+				     mem->remap_addr + (n << PAGE_SHIFT),
+				     n << PAGE_SHIFT,
+				     I915_CACHE_NONE, 0);
+	}
+
+	mem->region = (struct resource)DEFINE_RES_MEM(mem->remap_addr,
+						      mem->fake_mappable.size);
+
+	return 0;
+}
+
+static void release_fake_lmem_bar(struct intel_memory_region *mem)
+{
+	if (!drm_mm_node_allocated(&mem->fake_mappable))
+		return;
+
+	drm_mm_remove_node(&mem->fake_mappable);
+
+	dma_unmap_resource(&mem->i915->drm.pdev->dev,
+			   mem->remap_addr,
+			   mem->fake_mappable.size,
+			   PCI_DMA_BIDIRECTIONAL,
+			   DMA_ATTR_FORCE_CONTIGUOUS);
+}
+
+static void
+region_lmem_release(struct intel_memory_region *mem)
+{
+	release_fake_lmem_bar(mem);
+	io_mapping_fini(&mem->iomap);
+	intel_memory_region_release_buddy(mem);
+}
+
+static int
+region_lmem_init(struct intel_memory_region *mem)
+{
+	int ret;
+
+	if (i915_modparams.fake_lmem_start) {
+		ret = init_fake_lmem_bar(mem);
+		GEM_BUG_ON(ret);
+	}
+
+	if (!io_mapping_init_wc(&mem->iomap,
+				mem->io_start,
+				resource_size(&mem->region)))
+		return -EIO;
+
+	ret = intel_memory_region_init_buddy(mem);
+	if (ret)
+		io_mapping_fini(&mem->iomap);
+
+	intel_memory_region_set_name(mem, "local");
+
+	return ret;
+}
+
+const struct intel_memory_region_ops intel_region_lmem_ops = {
+	.init = region_lmem_init,
+	.release = region_lmem_release,
+	.create_object = __i915_gem_lmem_object_create,
+};
+
+struct intel_memory_region *
+intel_setup_fake_lmem(struct drm_i915_private *i915)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+	struct intel_memory_region *mem;
+	resource_size_t mappable_end;
+	resource_size_t io_start;
+	resource_size_t start;
+
+	GEM_BUG_ON(i915_ggtt_has_aperture(&i915->ggtt));
+	GEM_BUG_ON(!i915_modparams.fake_lmem_start);
+
+	/* Your mappable aperture belongs to me now! */
+	mappable_end = pci_resource_len(pdev, 2);
+	io_start = pci_resource_start(pdev, 2),
+	start = i915_modparams.fake_lmem_start;
+
+	mem = intel_memory_region_create(i915,
+					 start,
+					 mappable_end,
+					 PAGE_SIZE,
+					 io_start,
+					 &intel_region_lmem_ops);
+	if (!IS_ERR(mem)) {
+		drm_info(&i915->drm, "Intel graphics fake LMEM: %pR\n",
+			 &mem->region);
+		drm_info(&i915->drm,
+			 "Intel graphics fake LMEM IO start: %llx\n",
+			(u64)mem->io_start);
+		drm_info(&i915->drm, "Intel graphics fake LMEM size: %llx\n",
+			 (u64)resource_size(&mem->region));
+	}
+
+	return mem;
+}
diff --git a/drivers/gpu/drm/i915/intel_region_lmem.h b/drivers/gpu/drm/i915/intel_region_lmem.h
new file mode 100644
index 000000000000..213def7c7b8a
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_region_lmem.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_REGION_LMEM_H
+#define __INTEL_REGION_LMEM_H
+
+struct drm_i915_private;
+
+extern const struct intel_memory_region_ops intel_region_lmem_ops;
+
+struct intel_memory_region *
+intel_setup_fake_lmem(struct drm_i915_private *i915);
+
+#endif /* !__INTEL_REGION_LMEM_H */
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 2fd3c097e1f5..ad719c9602af 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -27,7 +27,6 @@
  */
 
 #include <linux/pm_runtime.h>
-#include <linux/vgaarb.h>
 
 #include <drm/drm_print.h>
 
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index e06b35b844a0..cbfb7171d62d 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -105,8 +105,8 @@ static int vlv_sideband_rw(struct drm_i915_private *i915,
 	if (intel_wait_for_register(uncore,
 				    VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
 				    5)) {
-		DRM_DEBUG_DRIVER("IOSF sideband idle wait (%s) timed out\n",
-				 is_read ? "read" : "write");
+		drm_dbg(&i915->drm, "IOSF sideband idle wait (%s) timed out\n",
+			is_read ? "read" : "write");
 		return -EAGAIN;
 	}
 
@@ -129,8 +129,8 @@ static int vlv_sideband_rw(struct drm_i915_private *i915,
 			*val = intel_uncore_read_fw(uncore, VLV_IOSF_DATA);
 		err = 0;
 	} else {
-		DRM_DEBUG_DRIVER("IOSF sideband finish wait (%s) timed out\n",
-				 is_read ? "read" : "write");
+		drm_dbg(&i915->drm, "IOSF sideband finish wait (%s) timed out\n",
+			is_read ? "read" : "write");
 		err = -ETIMEDOUT;
 	}
 
@@ -283,7 +283,8 @@ static int intel_sbi_rw(struct drm_i915_private *i915, u16 reg,
 	if (intel_wait_for_register_fw(uncore,
 				       SBI_CTL_STAT, SBI_BUSY, 0,
 				       100)) {
-		DRM_ERROR("timeout waiting for SBI to become ready\n");
+		drm_err(&i915->drm,
+			"timeout waiting for SBI to become ready\n");
 		return -EBUSY;
 	}
 
@@ -301,12 +302,13 @@ static int intel_sbi_rw(struct drm_i915_private *i915, u16 reg,
 	if (__intel_wait_for_register_fw(uncore,
 					 SBI_CTL_STAT, SBI_BUSY, 0,
 					 100, 100, &cmd)) {
-		DRM_ERROR("timeout waiting for SBI to complete read\n");
+		drm_err(&i915->drm,
+			"timeout waiting for SBI to complete read\n");
 		return -ETIMEDOUT;
 	}
 
 	if (cmd & SBI_RESPONSE_FAIL) {
-		DRM_ERROR("error during SBI read of reg %x\n", reg);
+		drm_err(&i915->drm, "error during SBI read of reg %x\n", reg);
 		return -ENXIO;
 	}
 
@@ -426,8 +428,9 @@ int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
 	mutex_unlock(&i915->sb_lock);
 
 	if (err) {
-		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
-				 mbox, __builtin_return_address(0), err);
+		drm_dbg(&i915->drm,
+			"warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
+			mbox, __builtin_return_address(0), err);
 	}
 
 	return err;
@@ -447,8 +450,9 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *i915,
 	mutex_unlock(&i915->sb_lock);
 
 	if (err) {
-		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
-				 val, mbox, __builtin_return_address(0), err);
+		drm_dbg(&i915->drm,
+			"warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
+			val, mbox, __builtin_return_address(0), err);
 	}
 
 	return err;
@@ -519,7 +523,8 @@ int skl_pcode_request(struct drm_i915_private *i915, u32 mbox, u32 request,
 	 * requests, and for any quirks of the PCODE firmware that delays
 	 * the request completion.
 	 */
-	DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
+	drm_dbg_kms(&i915->drm,
+		    "PCODE timeout, retrying with preemption disabled\n");
 	WARN_ON_ONCE(timeout_base_ms > 3);
 	preempt_disable();
 	ret = wait_for_atomic(COND, 50);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 9e583f13a9e4..5f2cf6f43b8b 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -359,7 +359,8 @@ static void __gen6_gt_wait_for_fifo(struct intel_uncore *uncore)
 		if (wait_for_atomic((n = fifo_free_entries(uncore)) >
 				    GT_FIFO_NUM_RESERVED_ENTRIES,
 				    GT_FIFO_TIMEOUT_MS)) {
-			DRM_DEBUG("GT_FIFO timeout, entries: %u\n", n);
+			drm_dbg(&uncore->i915->drm,
+				"GT_FIFO timeout, entries: %u\n", n);
 			return;
 		}
 	}
@@ -432,7 +433,7 @@ intel_uncore_forcewake_reset(struct intel_uncore *uncore)
 			break;
 
 		if (--retry_count == 0) {
-			DRM_ERROR("Timed out waiting for forcewake timers to finish\n");
+			drm_err(&uncore->i915->drm, "Timed out waiting for forcewake timers to finish\n");
 			break;
 		}
 
@@ -490,7 +491,7 @@ gen6_check_for_fifo_debug(struct intel_uncore *uncore)
 	fifodbg = __raw_uncore_read32(uncore, GTFIFODBG);
 
 	if (unlikely(fifodbg)) {
-		DRM_DEBUG_DRIVER("GTFIFODBG = 0x08%x\n", fifodbg);
+		drm_dbg(&uncore->i915->drm, "GTFIFODBG = 0x08%x\n", fifodbg);
 		__raw_uncore_write32(uncore, GTFIFODBG, fifodbg);
 	}
 
@@ -562,7 +563,7 @@ void intel_uncore_resume_early(struct intel_uncore *uncore)
 	unsigned int restore_forcewake;
 
 	if (intel_uncore_unclaimed_mmio(uncore))
-		DRM_DEBUG("unclaimed mmio detected on resume, clearing\n");
+		drm_dbg(&uncore->i915->drm, "unclaimed mmio detected on resume, clearing\n");
 
 	if (!intel_uncore_has_forcewake(uncore))
 		return;
@@ -805,9 +806,6 @@ void assert_forcewakes_active(struct intel_uncore *uncore,
 /* We give fast paths for the really cool registers */
 #define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000)
 
-#define GEN11_NEEDS_FORCE_WAKE(reg) \
-	((reg) < 0x40000 || ((reg) >= 0x1c0000 && (reg) < 0x1dc000))
-
 #define __gen6_reg_read_fw_domains(uncore, offset) \
 ({ \
 	enum forcewake_domains __fwd; \
@@ -903,12 +901,10 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = {
 })
 
 #define __gen11_fwtable_reg_read_fw_domains(uncore, offset) \
-({ \
-	enum forcewake_domains __fwd = 0; \
-	if (GEN11_NEEDS_FORCE_WAKE((offset))) \
-		__fwd = find_fw_domain(uncore, offset); \
-	__fwd; \
-})
+	find_fw_domain(uncore, offset)
+
+#define __gen12_fwtable_reg_read_fw_domains(uncore, offset) \
+	find_fw_domain(uncore, offset)
 
 /* *Must* be sorted by offset! See intel_shadow_table_check(). */
 static const i915_reg_t gen8_shadowed_regs[] = {
@@ -935,6 +931,20 @@ static const i915_reg_t gen11_shadowed_regs[] = {
 	/* TODO: Other registers are not yet used */
 };
 
+static const i915_reg_t gen12_shadowed_regs[] = {
+	RING_TAIL(RENDER_RING_BASE),		/* 0x2000 (base) */
+	GEN6_RPNSWREQ,				/* 0xA008 */
+	GEN6_RC_VIDEO_FREQ,			/* 0xA00C */
+	RING_TAIL(BLT_RING_BASE),		/* 0x22000 (base) */
+	RING_TAIL(GEN11_BSD_RING_BASE),		/* 0x1C0000 (base) */
+	RING_TAIL(GEN11_BSD2_RING_BASE),	/* 0x1C4000 (base) */
+	RING_TAIL(GEN11_VEBOX_RING_BASE),	/* 0x1C8000 (base) */
+	RING_TAIL(GEN11_BSD3_RING_BASE),	/* 0x1D0000 (base) */
+	RING_TAIL(GEN11_BSD4_RING_BASE),	/* 0x1D4000 (base) */
+	RING_TAIL(GEN11_VEBOX2_RING_BASE),	/* 0x1D8000 (base) */
+	/* TODO: Other registers are not yet used */
+};
+
 static int mmio_reg_cmp(u32 key, const i915_reg_t *reg)
 {
 	u32 offset = i915_mmio_reg_offset(*reg);
@@ -957,6 +967,7 @@ static bool is_gen##x##_shadowed(u32 offset) \
 
 __is_genX_shadowed(8)
 __is_genX_shadowed(11)
+__is_genX_shadowed(12)
 
 static enum forcewake_domains
 gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg)
@@ -1005,8 +1016,18 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = {
 #define __gen11_fwtable_reg_write_fw_domains(uncore, offset) \
 ({ \
 	enum forcewake_domains __fwd = 0; \
-	if (GEN11_NEEDS_FORCE_WAKE((offset)) && !is_gen11_shadowed(offset)) \
-		__fwd = find_fw_domain(uncore, offset); \
+	const u32 __offset = (offset); \
+	if (!is_gen11_shadowed(__offset)) \
+		__fwd = find_fw_domain(uncore, __offset); \
+	__fwd; \
+})
+
+#define __gen12_fwtable_reg_write_fw_domains(uncore, offset) \
+({ \
+	enum forcewake_domains __fwd = 0; \
+	const u32 __offset = (offset); \
+	if (!is_gen12_shadowed(__offset)) \
+		__fwd = find_fw_domain(uncore, __offset); \
 	__fwd; \
 })
 
@@ -1065,9 +1086,51 @@ static const struct intel_forcewake_range __gen11_fw_ranges[] = {
 	GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL),
 	GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER),
 	GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0xb480, 0xdeff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0xdf00, 0xe8ff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0xe900, 0x16dff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x16e00, 0x19fff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x1a000, 0x243ff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x40000, 0x1bffff, 0),
+	GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0),
+	GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1),
+	GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0),
+	GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2),
+	GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3),
+	GEN_FW_RANGE(0x1d8000, 0x1dbfff, FORCEWAKE_MEDIA_VEBOX1)
+};
+
+/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */
+static const struct intel_forcewake_range __gen12_fw_ranges[] = {
+	GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */
+	GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x4000, 0x51ff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x8160, 0x82ff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x8500, 0x8bff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x8c00, 0x8cff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x8d00, 0x93ff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL),
+	GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER),
 	GEN_FW_RANGE(0xb480, 0xdfff, FORCEWAKE_BLITTER),
 	GEN_FW_RANGE(0xe000, 0xe8ff, FORCEWAKE_RENDER),
-	GEN_FW_RANGE(0xe900, 0x243ff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0xe900, 0x147ff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x14800, 0x148ff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x14900, 0x19fff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x1a000, 0x1a7ff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x1a800, 0x1afff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x1b000, 0x1bfff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x1c000, 0x243ff, FORCEWAKE_BLITTER),
 	GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER),
 	GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER),
 	GEN_FW_RANGE(0x40000, 0x1bffff, 0),
@@ -1228,6 +1291,7 @@ __gen_read(func, 16) \
 __gen_read(func, 32) \
 __gen_read(func, 64)
 
+__gen_reg_read_funcs(gen12_fwtable);
 __gen_reg_read_funcs(gen11_fwtable);
 __gen_reg_read_funcs(fwtable);
 __gen_reg_read_funcs(gen6);
@@ -1319,6 +1383,7 @@ __gen_write(func, 8) \
 __gen_write(func, 16) \
 __gen_write(func, 32)
 
+__gen_reg_write_funcs(gen12_fwtable);
 __gen_reg_write_funcs(gen11_fwtable);
 __gen_reg_write_funcs(fwtable);
 __gen_reg_write_funcs(gen8);
@@ -1531,8 +1596,8 @@ static int intel_uncore_fw_domains_init(struct intel_uncore *uncore)
 		spin_unlock_irq(&uncore->lock);
 
 		if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
-			DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n");
-			DRM_INFO("when using vblank-synced partial screen updates.\n");
+			drm_info(&i915->drm, "No MT forcewake available on Ivybridge, this can result in issues\n");
+			drm_info(&i915->drm, "when using vblank-synced partial screen updates.\n");
 			fw_domain_fini(uncore, FW_DOMAIN_ID_RENDER);
 			fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
 				       FORCEWAKE, FORCEWAKE_ACK);
@@ -1619,8 +1684,7 @@ static int uncore_mmio_setup(struct intel_uncore *uncore)
 		mmio_size = 2 * 1024 * 1024;
 	uncore->regs = pci_iomap(pdev, mmio_bar, mmio_size);
 	if (uncore->regs == NULL) {
-		DRM_ERROR("failed to map registers\n");
-
+		drm_err(&i915->drm, "failed to map registers\n");
 		return -EIO;
 	}
 
@@ -1690,10 +1754,14 @@ static int uncore_forcewake_init(struct intel_uncore *uncore)
 		ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges);
 		ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable);
 		ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable);
-	} else {
+	} else if (IS_GEN(i915, 11)) {
 		ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges);
 		ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable);
 		ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable);
+	} else {
+		ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges);
+		ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable);
+		ASSIGN_READ_MMIO_VFUNCS(uncore, gen12_fwtable);
 	}
 
 	uncore->pmic_bus_access_nb.notifier_call = i915_pmic_bus_access_notifier;
@@ -1739,7 +1807,7 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore)
 
 	/* clear out unclaimed reg detection bit */
 	if (intel_uncore_unclaimed_mmio(uncore))
-		DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
+		drm_dbg(&i915->drm, "unclaimed mmio detected on uncore init, clearing\n");
 
 	return 0;
 
@@ -2004,9 +2072,10 @@ intel_uncore_arm_unclaimed_mmio_detection(struct intel_uncore *uncore)
 
 	if (unlikely(check_for_unclaimed_mmio(uncore))) {
 		if (!i915_modparams.mmio_debug) {
-			DRM_DEBUG("Unclaimed register detected, "
-				  "enabling oneshot unclaimed register reporting. "
-				  "Please use i915.mmio_debug=N for more information.\n");
+			drm_dbg(&uncore->i915->drm,
+				"Unclaimed register detected, "
+				"enabling oneshot unclaimed register reporting. "
+				"Please use i915.mmio_debug=N for more information.\n");
 			i915_modparams.mmio_debug++;
 		}
 		uncore->debug->unclaimed_mmio_check--;
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index 414fc2cb0459..dcfa243892c6 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -378,23 +378,23 @@ intel_uncore_read64_2x32(struct intel_uncore *uncore,
 static inline void intel_uncore_rmw(struct intel_uncore *uncore,
 				    i915_reg_t reg, u32 clear, u32 set)
 {
-	u32 val;
+	u32 old, val;
 
-	val = intel_uncore_read(uncore, reg);
-	val &= ~clear;
-	val |= set;
-	intel_uncore_write(uncore, reg, val);
+	old = intel_uncore_read(uncore, reg);
+	val = (old & ~clear) | set;
+	if (val != old)
+		intel_uncore_write(uncore, reg, val);
 }
 
 static inline void intel_uncore_rmw_fw(struct intel_uncore *uncore,
 				       i915_reg_t reg, u32 clear, u32 set)
 {
-	u32 val;
+	u32 old, val;
 
-	val = intel_uncore_read_fw(uncore, reg);
-	val &= ~clear;
-	val |= set;
-	intel_uncore_write_fw(uncore, reg, val);
+	old = intel_uncore_read_fw(uncore, reg);
+	val = (old & ~clear) | set;
+	if (val != old)
+		intel_uncore_write_fw(uncore, reg, val);
 }
 
 static inline int intel_uncore_write_and_verify(struct intel_uncore *uncore,
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 868cc78048d0..8fbf6f4d3f26 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf)
 
 static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
 {
-	if (!atomic_dec_and_test(&wf->count))
+	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
+	if (unlikely(!atomic_dec_and_test(&wf->count)))
 		goto unlock;
 
 	/* ops->put() must reschedule its own release on error/deferral */
@@ -67,13 +68,12 @@ unlock:
 	mutex_unlock(&wf->mutex);
 }
 
-void __intel_wakeref_put_last(struct intel_wakeref *wf)
+void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
 {
 	INTEL_WAKEREF_BUG_ON(work_pending(&wf->work));
 
 	/* Assume we are not in process context and so cannot sleep. */
-	if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC ||
-	    !mutex_trylock(&wf->mutex)) {
+	if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
 		schedule_work(&wf->work);
 		return;
 	}
@@ -95,22 +95,32 @@ static void __intel_wakeref_put_work(struct work_struct *wrk)
 void __intel_wakeref_init(struct intel_wakeref *wf,
 			  struct intel_runtime_pm *rpm,
 			  const struct intel_wakeref_ops *ops,
-			  struct lock_class_key *key)
+			  struct intel_wakeref_lockclass *key)
 {
 	wf->rpm = rpm;
 	wf->ops = ops;
 
-	__mutex_init(&wf->mutex, "wakeref", key);
+	__mutex_init(&wf->mutex, "wakeref.mutex", &key->mutex);
 	atomic_set(&wf->count, 0);
 	wf->wakeref = 0;
 
 	INIT_WORK(&wf->work, __intel_wakeref_put_work);
+	lockdep_init_map(&wf->work.lockdep_map, "wakeref.work", &key->work, 0);
 }
 
 int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
 {
-	return wait_var_event_killable(&wf->wakeref,
-				       !intel_wakeref_is_active(wf));
+	int err;
+
+	might_sleep();
+
+	err = wait_var_event_killable(&wf->wakeref,
+				      !intel_wakeref_is_active(wf));
+	if (err)
+		return err;
+
+	intel_wakeref_unlock_wait(wf);
+	return 0;
 }
 
 static void wakeref_auto_timeout(struct timer_list *t)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 5f0c972a80fb..7d1e676b71ef 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -9,6 +9,7 @@
 
 #include <linux/atomic.h>
 #include <linux/bits.h>
+#include <linux/lockdep.h>
 #include <linux/mutex.h>
 #include <linux/refcount.h>
 #include <linux/stackdepot.h>
@@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t;
 struct intel_wakeref_ops {
 	int (*get)(struct intel_wakeref *wf);
 	int (*put)(struct intel_wakeref *wf);
-
-	unsigned long flags;
-#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
 };
 
 struct intel_wakeref {
@@ -46,24 +44,27 @@ struct intel_wakeref {
 	struct work_struct work;
 };
 
+struct intel_wakeref_lockclass {
+	struct lock_class_key mutex;
+	struct lock_class_key work;
+};
+
 void __intel_wakeref_init(struct intel_wakeref *wf,
 			  struct intel_runtime_pm *rpm,
 			  const struct intel_wakeref_ops *ops,
-			  struct lock_class_key *key);
+			  struct intel_wakeref_lockclass *key);
 #define intel_wakeref_init(wf, rpm, ops) do {				\
-	static struct lock_class_key __key;				\
+	static struct intel_wakeref_lockclass __key;			\
 									\
 	__intel_wakeref_init((wf), (rpm), (ops), &__key);		\
 } while (0)
 
 int __intel_wakeref_get_first(struct intel_wakeref *wf);
-void __intel_wakeref_put_last(struct intel_wakeref *wf);
+void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags);
 
 /**
  * intel_wakeref_get: Acquire the wakeref
- * @i915: the drm_i915_private device
  * @wf: the wakeref
- * @fn: callback for acquired the wakeref, called only on first acquire.
  *
  * Acquire a hold on the wakeref. The first user to do so, will acquire
  * the runtime pm wakeref and then call the @fn underneath the wakeref
@@ -78,6 +79,7 @@ void __intel_wakeref_put_last(struct intel_wakeref *wf);
 static inline int
 intel_wakeref_get(struct intel_wakeref *wf)
 {
+	might_sleep();
 	if (unlikely(!atomic_inc_not_zero(&wf->count)))
 		return __intel_wakeref_get_first(wf);
 
@@ -85,6 +87,22 @@ intel_wakeref_get(struct intel_wakeref *wf)
 }
 
 /**
+ * __intel_wakeref_get: Acquire the wakeref, again
+ * @wf: the wakeref
+ *
+ * Increment the wakeref counter, only valid if it is already held by
+ * the caller.
+ *
+ * See intel_wakeref_get().
+ */
+static inline void
+__intel_wakeref_get(struct intel_wakeref *wf)
+{
+	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
+	atomic_inc(&wf->count);
+}
+
+/**
  * intel_wakeref_get_if_in_use: Acquire the wakeref
  * @wf: the wakeref
  *
@@ -100,10 +118,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
 }
 
 /**
- * intel_wakeref_put: Release the wakeref
- * @i915: the drm_i915_private device
+ * intel_wakeref_put_flags: Release the wakeref
  * @wf: the wakeref
- * @fn: callback for releasing the wakeref, called only on final release.
+ * @flags: control flags
  *
  * Release our hold on the wakeref. When there are no more users,
  * the runtime pm wakeref will be released after the @fn callback is called
@@ -116,11 +133,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
  * code otherwise.
  */
 static inline void
-intel_wakeref_put(struct intel_wakeref *wf)
+__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
+#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
 {
 	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
 	if (unlikely(!atomic_add_unless(&wf->count, -1, 1)))
-		__intel_wakeref_put_last(wf);
+		__intel_wakeref_put_last(wf, flags);
+}
+
+static inline void
+intel_wakeref_put(struct intel_wakeref *wf)
+{
+	might_sleep();
+	__intel_wakeref_put(wf, 0);
+}
+
+static inline void
+intel_wakeref_put_async(struct intel_wakeref *wf)
+{
+	__intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC);
 }
 
 /**
@@ -152,6 +183,21 @@ intel_wakeref_unlock(struct intel_wakeref *wf)
 }
 
 /**
+ * intel_wakeref_unlock_wait: Wait until the active callback is complete
+ * @wf: the wakeref
+ *
+ * Waits for the active callback (under the @wf->mutex or another CPU) is
+ * complete.
+ */
+static inline void
+intel_wakeref_unlock_wait(struct intel_wakeref *wf)
+{
+	mutex_lock(&wf->mutex);
+	mutex_unlock(&wf->mutex);
+	flush_work(&wf->work);
+}
+
+/**
  * intel_wakeref_is_active: Query whether the wakeref is currently held
  * @wf: the wakeref
  *
@@ -170,6 +216,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
 static inline void
 __intel_wakeref_defer_park(struct intel_wakeref *wf)
 {
+	lockdep_assert_held(&wf->mutex);
 	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
 	atomic_set_release(&wf->count, 1);
 }
diff --git a/drivers/gpu/drm/i915/oa/Makefile b/drivers/gpu/drm/i915/oa/Makefile
deleted file mode 100644
index df028e2b0d64..000000000000
--- a/drivers/gpu/drm/i915/oa/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: MIT
-
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.c b/drivers/gpu/drm/i915/oa/i915_oa_tgl.c
new file mode 100644
index 000000000000..a29d93707345
--- /dev/null
+++ b/drivers/gpu/drm/i915/oa/i915_oa_tgl.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Autogenerated file by GPU Top : https://github.com/rib/gputop
+ * DO NOT EDIT manually!
+ */
+
+#include <linux/sysfs.h>
+
+#include "i915_drv.h"
+#include "i915_oa_tgl.h"
+
+static const struct i915_oa_reg b_counter_config_test_oa[] = {
+	{ _MMIO(0xD920), 0x00000000 },
+	{ _MMIO(0xD900), 0x00000000 },
+	{ _MMIO(0xD904), 0xF0800000 },
+	{ _MMIO(0xD910), 0x00000000 },
+	{ _MMIO(0xD914), 0xF0800000 },
+	{ _MMIO(0xDC40), 0x00FF0000 },
+	{ _MMIO(0xD940), 0x00000004 },
+	{ _MMIO(0xD944), 0x0000FFFF },
+	{ _MMIO(0xDC00), 0x00000004 },
+	{ _MMIO(0xDC04), 0x0000FFFF },
+	{ _MMIO(0xD948), 0x00000003 },
+	{ _MMIO(0xD94C), 0x0000FFFF },
+	{ _MMIO(0xDC08), 0x00000003 },
+	{ _MMIO(0xDC0C), 0x0000FFFF },
+	{ _MMIO(0xD950), 0x00000007 },
+	{ _MMIO(0xD954), 0x0000FFFF },
+	{ _MMIO(0xDC10), 0x00000007 },
+	{ _MMIO(0xDC14), 0x0000FFFF },
+	{ _MMIO(0xD958), 0x00100002 },
+	{ _MMIO(0xD95C), 0x0000FFF7 },
+	{ _MMIO(0xDC18), 0x00100002 },
+	{ _MMIO(0xDC1C), 0x0000FFF7 },
+	{ _MMIO(0xD960), 0x00100002 },
+	{ _MMIO(0xD964), 0x0000FFCF },
+	{ _MMIO(0xDC20), 0x00100002 },
+	{ _MMIO(0xDC24), 0x0000FFCF },
+	{ _MMIO(0xD968), 0x00100082 },
+	{ _MMIO(0xD96C), 0x0000FFEF },
+	{ _MMIO(0xDC28), 0x00100082 },
+	{ _MMIO(0xDC2C), 0x0000FFEF },
+	{ _MMIO(0xD970), 0x001000C2 },
+	{ _MMIO(0xD974), 0x0000FFE7 },
+	{ _MMIO(0xDC30), 0x001000C2 },
+	{ _MMIO(0xDC34), 0x0000FFE7 },
+	{ _MMIO(0xD978), 0x00100001 },
+	{ _MMIO(0xD97C), 0x0000FFE7 },
+	{ _MMIO(0xDC38), 0x00100001 },
+	{ _MMIO(0xDC3C), 0x0000FFE7 },
+};
+
+static const struct i915_oa_reg flex_eu_config_test_oa[] = {
+};
+
+static const struct i915_oa_reg mux_config_test_oa[] = {
+	{ _MMIO(0x0D04), 0x00000200 },
+	{ _MMIO(0x9840), 0x00000000 },
+	{ _MMIO(0x9884), 0x00000000 },
+	{ _MMIO(0x9888), 0x280E0000 },
+	{ _MMIO(0x9888), 0x1E0E0147 },
+	{ _MMIO(0x9888), 0x180E0000 },
+	{ _MMIO(0x9888), 0x160E0000 },
+	{ _MMIO(0x9888), 0x1E0F1000 },
+	{ _MMIO(0x9888), 0x1E104000 },
+	{ _MMIO(0x9888), 0x2E020100 },
+	{ _MMIO(0x9888), 0x2C030004 },
+	{ _MMIO(0x9888), 0x38003000 },
+	{ _MMIO(0x9888), 0x1E0A8000 },
+	{ _MMIO(0x9884), 0x00000003 },
+	{ _MMIO(0x9888), 0x49110000 },
+	{ _MMIO(0x9888), 0x5D101400 },
+	{ _MMIO(0x9888), 0x1D140020 },
+	{ _MMIO(0x9888), 0x1D1103A3 },
+	{ _MMIO(0x9888), 0x01110000 },
+	{ _MMIO(0x9888), 0x61111000 },
+	{ _MMIO(0x9888), 0x1F128000 },
+	{ _MMIO(0x9888), 0x17100000 },
+	{ _MMIO(0x9888), 0x55100630 },
+	{ _MMIO(0x9888), 0x57100000 },
+	{ _MMIO(0x9888), 0x31100000 },
+	{ _MMIO(0x9884), 0x00000003 },
+	{ _MMIO(0x9888), 0x65100002 },
+	{ _MMIO(0x9884), 0x00000000 },
+	{ _MMIO(0x9888), 0x42000001 },
+};
+
+static ssize_t
+show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "1\n");
+}
+
+void
+i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv)
+{
+	strlcpy(dev_priv->perf.test_config.uuid,
+		"80a833f0-2504-4321-8894-e9277844ce7b",
+		sizeof(dev_priv->perf.test_config.uuid));
+	dev_priv->perf.test_config.id = 1;
+
+	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
+	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
+
+	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
+	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
+
+	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
+	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
+
+	dev_priv->perf.test_config.sysfs_metric.name = "80a833f0-2504-4321-8894-e9277844ce7b";
+	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
+
+	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
+
+	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
+	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
+	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
+}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.h b/drivers/gpu/drm/i915/oa/i915_oa_tgl.h
new file mode 100644
index 000000000000..4c25f0be825c
--- /dev/null
+++ b/drivers/gpu/drm/i915/oa/i915_oa_tgl.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Autogenerated file by GPU Top : https://github.com/rib/gputop
+ * DO NOT EDIT manually!
+ */
+
+#ifndef __I915_OA_TGL_H__
+#define __I915_OA_TGL_H__
+
+struct drm_i915_private;
+
+void i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv);
+
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 77d844ac8b71..ef572a0c2566 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -68,7 +68,7 @@ static struct live_active *__live_alloc(struct drm_i915_private *i915)
 		return NULL;
 
 	kref_init(&active->ref);
-	i915_active_init(i915, &active->base, __live_active, __live_retire);
+	i915_active_init(&active->base, __live_active, __live_retire);
 
 	return active;
 }
@@ -79,7 +79,6 @@ __live_active_setup(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	struct i915_sw_fence *submit;
 	struct live_active *active;
-	enum intel_engine_id id;
 	unsigned int count = 0;
 	int err = 0;
 
@@ -97,10 +96,10 @@ __live_active_setup(struct drm_i915_private *i915)
 	if (err)
 		goto out;
 
-	for_each_engine(engine, i915, id) {
+	for_each_uabi_engine(engine, i915) {
 		struct i915_request *rq;
 
-		rq = i915_request_create(engine->kernel_context);
+		rq = intel_engine_create_kernel_request(engine);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			break;
@@ -110,7 +109,7 @@ __live_active_setup(struct drm_i915_private *i915)
 						       submit,
 						       GFP_KERNEL);
 		if (err >= 0)
-			err = i915_active_ref(&active->base, rq->timeline, rq);
+			err = i915_active_add_request(&active->base, rq);
 		i915_request_add(rq);
 		if (err) {
 			pr_err("Failed to track active ref!\n");
@@ -121,7 +120,7 @@ __live_active_setup(struct drm_i915_private *i915)
 	}
 
 	i915_active_release(&active->base);
-	if (active->retired && count) {
+	if (READ_ONCE(active->retired) && count) {
 		pr_err("i915_active retired before submission!\n");
 		err = -EINVAL;
 	}
@@ -146,35 +145,29 @@ static int live_active_wait(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct live_active *active;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that we get a callback when requests retire upon waiting */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	active = __live_active_setup(i915);
-	if (IS_ERR(active)) {
-		err = PTR_ERR(active);
-		goto err;
-	}
+	if (IS_ERR(active))
+		return PTR_ERR(active);
 
 	i915_active_wait(&active->base);
-	if (!active->retired) {
+	if (!READ_ONCE(active->retired)) {
+		struct drm_printer p = drm_err_printer(__func__);
+
 		pr_err("i915_active not retired after waiting!\n");
+		i915_active_print(&active->base, &p);
+
 		err = -EINVAL;
 	}
 
 	__live_put(active);
 
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
-err:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return err;
 }
 
@@ -182,35 +175,29 @@ static int live_active_retire(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct live_active *active;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that we get a callback when requests are indirectly retired */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	active = __live_active_setup(i915);
-	if (IS_ERR(active)) {
-		err = PTR_ERR(active);
-		goto err;
-	}
+	if (IS_ERR(active))
+		return PTR_ERR(active);
 
 	/* waits for & retires all requests */
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
-	if (!active->retired) {
+	if (!READ_ONCE(active->retired)) {
+		struct drm_printer p = drm_err_printer(__func__);
+
 		pr_err("i915_active not retired after flushing!\n");
+		i915_active_print(&active->base, &p);
+
 		err = -EINVAL;
 	}
 
 	__live_put(active);
 
-err:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return err;
 }
 
@@ -226,3 +213,81 @@ int i915_active_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+static struct intel_engine_cs *node_to_barrier(struct active_node *it)
+{
+	struct intel_engine_cs *engine;
+
+	if (!is_barrier(&it->base))
+		return NULL;
+
+	engine = __barrier_to_engine(it);
+	smp_rmb(); /* serialise with add_active_barriers */
+	if (!is_barrier(&it->base))
+		return NULL;
+
+	return engine;
+}
+
+void i915_active_print(struct i915_active *ref, struct drm_printer *m)
+{
+	drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire);
+	drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count));
+	drm_printf(m, "\tpreallocated barriers? %s\n",
+		   yesno(!llist_empty(&ref->preallocated_barriers)));
+
+	if (i915_active_acquire_if_busy(ref)) {
+		struct active_node *it, *n;
+
+		rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+			struct intel_engine_cs *engine;
+
+			engine = node_to_barrier(it);
+			if (engine) {
+				drm_printf(m, "\tbarrier: %s\n", engine->name);
+				continue;
+			}
+
+			if (i915_active_fence_isset(&it->base)) {
+				drm_printf(m,
+					   "\ttimeline: %llx\n", it->timeline);
+				continue;
+			}
+		}
+
+		i915_active_release(ref);
+	}
+}
+
+static void spin_unlock_wait(spinlock_t *lock)
+{
+	spin_lock_irq(lock);
+	spin_unlock_irq(lock);
+}
+
+void i915_active_unlock_wait(struct i915_active *ref)
+{
+	if (i915_active_acquire_if_busy(ref)) {
+		struct active_node *it, *n;
+
+		rcu_read_lock();
+		rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+			struct dma_fence *f;
+
+			/* Wait for all active callbacks */
+			f = rcu_dereference(it->base.fence);
+			if (f)
+				spin_unlock_wait(f->lock);
+		}
+		rcu_read_unlock();
+
+		i915_active_release(ref);
+	}
+
+	/* And wait for the retire callback */
+	spin_lock_irq(&ref->tree_lock);
+	spin_unlock_irq(&ref->tree_lock);
+
+	/* ... which may have been on a thread instead */
+	flush_work(&ref->work);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c
index 23f784eae1e7..1b856bae67b5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_buddy.c
+++ b/drivers/gpu/drm/i915/selftests/i915_buddy.c
@@ -375,6 +375,8 @@ retry:
 
 		if (err)
 			break;
+
+		cond_resched();
 	}
 
 	if (err == -ENOMEM)
@@ -687,6 +689,8 @@ static int igt_buddy_alloc_range(void *arg)
 		rem -= size;
 		if (!rem)
 			break;
+
+		cond_resched();
 	}
 
 	if (err == -ENOMEM)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 37593831b539..78f36faf2bbe 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -9,29 +9,33 @@
 #include "gem/selftests/igt_gem_utils.h"
 #include "gem/selftests/mock_context.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
 
 #include "i915_selftest.h"
 
 #include "igt_flush_test.h"
 #include "mock_drm.h"
 
-static int switch_to_context(struct drm_i915_private *i915,
-			     struct i915_gem_context *ctx)
+static int switch_to_context(struct i915_gem_context *ctx)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	int err = 0;
 
-	for_each_engine(engine, i915, id) {
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 		struct i915_request *rq;
 
-		rq = igt_request_alloc(ctx, engine);
-		if (IS_ERR(rq))
-			return PTR_ERR(rq);
+		rq = intel_context_create_request(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
 
 		i915_request_add(rq);
 	}
+	i915_gem_context_unlock_engines(ctx);
 
-	return 0;
+	return err;
 }
 
 static void trash_stolen(struct drm_i915_private *i915)
@@ -42,6 +46,10 @@ static void trash_stolen(struct drm_i915_private *i915)
 	unsigned long page;
 	u32 prng = 0x12345678;
 
+	/* XXX: fsck. needs some more thought... */
+	if (!i915_ggtt_has_aperture(ggtt))
+		return;
+
 	for (page = 0; page < size; page += PAGE_SIZE) {
 		const dma_addr_t dma = i915->dsm.start + page;
 		u32 __iomem *s;
@@ -116,13 +124,8 @@ static void pm_resume(struct drm_i915_private *i915)
 	 * that runtime-pm just works.
 	 */
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		intel_gt_sanitize(&i915->gt, false);
-		i915_gem_sanitize(i915);
-
-		mutex_lock(&i915->drm.struct_mutex);
 		i915_gem_restore_gtt_mappings(i915);
-		i915_gem_restore_fences(i915);
-		mutex_unlock(&i915->drm.struct_mutex);
+		i915_gem_restore_fences(&i915->ggtt);
 
 		i915_gem_resume(i915);
 	}
@@ -132,7 +135,7 @@ static int igt_gem_suspend(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct i915_gem_context *ctx;
-	struct drm_file *file;
+	struct file *file;
 	int err;
 
 	file = mock_file(i915);
@@ -140,11 +143,9 @@ static int igt_gem_suspend(void *arg)
 		return PTR_ERR(file);
 
 	err = -ENOMEM;
-	mutex_lock(&i915->drm.struct_mutex);
 	ctx = live_context(i915, file);
 	if (!IS_ERR(ctx))
-		err = switch_to_context(i915, ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
+		err = switch_to_context(ctx);
 	if (err)
 		goto out;
 
@@ -159,11 +160,9 @@ static int igt_gem_suspend(void *arg)
 
 	pm_resume(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	err = switch_to_context(i915, ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
+	err = switch_to_context(ctx);
 out:
-	mock_file_free(i915, file);
+	fput(file);
 	return err;
 }
 
@@ -171,7 +170,7 @@ static int igt_gem_hibernate(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct i915_gem_context *ctx;
-	struct drm_file *file;
+	struct file *file;
 	int err;
 
 	file = mock_file(i915);
@@ -179,11 +178,9 @@ static int igt_gem_hibernate(void *arg)
 		return PTR_ERR(file);
 
 	err = -ENOMEM;
-	mutex_lock(&i915->drm.struct_mutex);
 	ctx = live_context(i915, file);
 	if (!IS_ERR(ctx))
-		err = switch_to_context(i915, ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
+		err = switch_to_context(ctx);
 	if (err)
 		goto out;
 
@@ -198,11 +195,9 @@ static int igt_gem_hibernate(void *arg)
 
 	pm_resume(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	err = switch_to_context(i915, ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
+	err = switch_to_context(ctx);
 out:
-	mock_file_free(i915, file);
+	fput(file);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index cb30c669b1b7..06ef88510209 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -43,8 +43,7 @@ static void quirk_add(struct drm_i915_gem_object *obj,
 	list_add(&obj->st_link, objects);
 }
 
-static int populate_ggtt(struct drm_i915_private *i915,
-			 struct list_head *objects)
+static int populate_ggtt(struct i915_ggtt *ggtt, struct list_head *objects)
 {
 	unsigned long unbound, bound, count;
 	struct drm_i915_gem_object *obj;
@@ -53,7 +52,8 @@ static int populate_ggtt(struct drm_i915_private *i915,
 	do {
 		struct i915_vma *vma;
 
-		obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE);
+		obj = i915_gem_object_create_internal(ggtt->vm.i915,
+						      I915_GTT_PAGE_SIZE);
 		if (IS_ERR(obj))
 			return PTR_ERR(obj);
 
@@ -70,7 +70,7 @@ static int populate_ggtt(struct drm_i915_private *i915,
 		count++;
 	} while (1);
 	pr_debug("Filled GGTT with %lu pages [%llu total]\n",
-		 count, i915->ggtt.vm.total / PAGE_SIZE);
+		 count, ggtt->vm.total / PAGE_SIZE);
 
 	bound = 0;
 	unbound = 0;
@@ -96,7 +96,7 @@ static int populate_ggtt(struct drm_i915_private *i915,
 		return -EINVAL;
 	}
 
-	if (list_empty(&i915->ggtt.vm.bound_list)) {
+	if (list_empty(&ggtt->vm.bound_list)) {
 		pr_err("No objects on the GGTT inactive list!\n");
 		return -EINVAL;
 	}
@@ -104,20 +104,16 @@ static int populate_ggtt(struct drm_i915_private *i915,
 	return 0;
 }
 
-static void unpin_ggtt(struct drm_i915_private *i915)
+static void unpin_ggtt(struct i915_ggtt *ggtt)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
 
-	mutex_lock(&ggtt->vm.mutex);
-	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
+	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
 		if (vma->obj->mm.quirked)
 			i915_vma_unpin(vma);
-	mutex_unlock(&ggtt->vm.mutex);
 }
 
-static void cleanup_objects(struct drm_i915_private *i915,
-			    struct list_head *list)
+static void cleanup_objects(struct i915_ggtt *ggtt, struct list_head *list)
 {
 	struct drm_i915_gem_object *obj, *on;
 
@@ -127,44 +123,44 @@ static void cleanup_objects(struct drm_i915_private *i915,
 		i915_gem_object_put(obj);
 	}
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	i915_gem_drain_freed_objects(i915);
-
-	mutex_lock(&i915->drm.struct_mutex);
+	i915_gem_drain_freed_objects(ggtt->vm.i915);
 }
 
 static int igt_evict_something(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct intel_gt *gt = arg;
+	struct i915_ggtt *ggtt = gt->ggtt;
 	LIST_HEAD(objects);
 	int err;
 
 	/* Fill the GGTT with pinned objects and try to evict one. */
 
-	err = populate_ggtt(i915, &objects);
+	err = populate_ggtt(ggtt, &objects);
 	if (err)
 		goto cleanup;
 
 	/* Everything is pinned, nothing should happen */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_something(&ggtt->vm,
 				       I915_GTT_PAGE_SIZE, 0, 0,
 				       0, U64_MAX,
 				       0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err != -ENOSPC) {
 		pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n",
 		       err);
 		goto cleanup;
 	}
 
-	unpin_ggtt(i915);
+	unpin_ggtt(ggtt);
 
 	/* Everything is unpinned, we should be able to evict something */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_something(&ggtt->vm,
 				       I915_GTT_PAGE_SIZE, 0, 0,
 				       0, U64_MAX,
 				       0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n",
 		       err);
@@ -172,13 +168,14 @@ static int igt_evict_something(void *arg)
 	}
 
 cleanup:
-	cleanup_objects(i915, &objects);
+	cleanup_objects(ggtt, &objects);
 	return err;
 }
 
 static int igt_overcommit(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
+	struct i915_ggtt *ggtt = gt->ggtt;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	LIST_HEAD(objects);
@@ -188,11 +185,11 @@ static int igt_overcommit(void *arg)
 	 * We expect it to fail.
 	 */
 
-	err = populate_ggtt(i915, &objects);
+	err = populate_ggtt(ggtt, &objects);
 	if (err)
 		goto cleanup;
 
-	obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE);
+	obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto cleanup;
@@ -201,21 +198,21 @@ static int igt_overcommit(void *arg)
 	quirk_add(obj, &objects);
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
-	if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) {
-		pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma));
+	if (vma != ERR_PTR(-ENOSPC)) {
+		pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR_OR_ZERO(vma));
 		err = -EINVAL;
 		goto cleanup;
 	}
 
 cleanup:
-	cleanup_objects(i915, &objects);
+	cleanup_objects(ggtt, &objects);
 	return err;
 }
 
 static int igt_evict_for_vma(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct intel_gt *gt = arg;
+	struct i915_ggtt *ggtt = gt->ggtt;
 	struct drm_mm_node target = {
 		.start = 0,
 		.size = 4096,
@@ -225,22 +222,26 @@ static int igt_evict_for_vma(void *arg)
 
 	/* Fill the GGTT with pinned objects and try to evict a range. */
 
-	err = populate_ggtt(i915, &objects);
+	err = populate_ggtt(ggtt, &objects);
 	if (err)
 		goto cleanup;
 
 	/* Everything is pinned, nothing should happen */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err != -ENOSPC) {
 		pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n",
 		       err);
 		goto cleanup;
 	}
 
-	unpin_ggtt(i915);
+	unpin_ggtt(ggtt);
 
 	/* Everything is unpinned, we should be able to evict the node */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_for_node returned err=%d\n",
 		       err);
@@ -248,7 +249,7 @@ static int igt_evict_for_vma(void *arg)
 	}
 
 cleanup:
-	cleanup_objects(i915, &objects);
+	cleanup_objects(ggtt, &objects);
 	return err;
 }
 
@@ -261,8 +262,8 @@ static void mock_color_adjust(const struct drm_mm_node *node,
 
 static int igt_evict_for_cache_color(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct intel_gt *gt = arg;
+	struct i915_ggtt *ggtt = gt->ggtt;
 	const unsigned long flags = PIN_OFFSET_FIXED;
 	struct drm_mm_node target = {
 		.start = I915_GTT_PAGE_SIZE * 2,
@@ -274,14 +275,16 @@ static int igt_evict_for_cache_color(void *arg)
 	LIST_HEAD(objects);
 	int err;
 
-	/* Currently the use of color_adjust is limited to cache domains within
-	 * the ggtt, and so the presence of mm.color_adjust is assumed to be
-	 * i915_gtt_color_adjust throughout our driver, so using a mock color
-	 * adjust will work just fine for our purposes.
+	/*
+	 * Currently the use of color_adjust for the GGTT is limited to cache
+	 * coloring and guard pages, and so the presence of mm.color_adjust for
+	 * the GGTT is assumed to be i915_ggtt_color_adjust, hence using a mock
+	 * color adjust will work just fine for our purposes.
 	 */
 	ggtt->vm.mm.color_adjust = mock_color_adjust;
+	GEM_BUG_ON(!i915_vm_has_cache_coloring(&ggtt->vm));
 
-	obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE);
+	obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto cleanup;
@@ -297,7 +300,7 @@ static int igt_evict_for_cache_color(void *arg)
 		goto cleanup;
 	}
 
-	obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE);
+	obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto cleanup;
@@ -317,7 +320,9 @@ static int igt_evict_for_cache_color(void *arg)
 	i915_vma_unpin(vma);
 
 	/* Remove just the second vma */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err);
 		goto cleanup;
@@ -328,7 +333,9 @@ static int igt_evict_for_cache_color(void *arg)
 	 */
 	target.color = I915_CACHE_L3_LLC;
 
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (!err) {
 		pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err);
 		err = -EINVAL;
@@ -338,36 +345,40 @@ static int igt_evict_for_cache_color(void *arg)
 	err = 0;
 
 cleanup:
-	unpin_ggtt(i915);
-	cleanup_objects(i915, &objects);
+	unpin_ggtt(ggtt);
+	cleanup_objects(ggtt, &objects);
 	ggtt->vm.mm.color_adjust = NULL;
 	return err;
 }
 
 static int igt_evict_vm(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct intel_gt *gt = arg;
+	struct i915_ggtt *ggtt = gt->ggtt;
 	LIST_HEAD(objects);
 	int err;
 
 	/* Fill the GGTT with pinned objects and try to evict everything. */
 
-	err = populate_ggtt(i915, &objects);
+	err = populate_ggtt(ggtt, &objects);
 	if (err)
 		goto cleanup;
 
 	/* Everything is pinned, nothing should happen */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_vm(&ggtt->vm);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
 		       err);
 		goto cleanup;
 	}
 
-	unpin_ggtt(i915);
+	unpin_ggtt(ggtt);
 
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_vm(&ggtt->vm);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
 		       err);
@@ -375,14 +386,16 @@ static int igt_evict_vm(void *arg)
 	}
 
 cleanup:
-	cleanup_objects(i915, &objects);
+	cleanup_objects(ggtt, &objects);
 	return err;
 }
 
 static int igt_evict_contexts(void *arg)
 {
 	const u64 PRETEND_GGTT_SIZE = 16ull << 20;
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
+	struct i915_ggtt *ggtt = gt->ggtt;
+	struct drm_i915_private *i915 = gt->i915;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	struct reserved {
@@ -408,14 +421,14 @@ static int igt_evict_contexts(void *arg)
 	if (!HAS_FULL_PPGTT(i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	/* Reserve a block so that we know we have enough to fit a few rq */
 	memset(&hole, 0, sizeof(hole));
-	err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole,
+	mutex_lock(&ggtt->vm.mutex);
+	err = i915_gem_gtt_insert(&ggtt->vm, &hole,
 				  PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE,
-				  0, i915->ggtt.vm.total,
+				  0, ggtt->vm.total,
 				  PIN_NOEVICT);
 	if (err)
 		goto out_locked;
@@ -425,15 +438,17 @@ static int igt_evict_contexts(void *arg)
 	do {
 		struct reserved *r;
 
+		mutex_unlock(&ggtt->vm.mutex);
 		r = kcalloc(1, sizeof(*r), GFP_KERNEL);
+		mutex_lock(&ggtt->vm.mutex);
 		if (!r) {
 			err = -ENOMEM;
 			goto out_locked;
 		}
 
-		if (i915_gem_gtt_insert(&i915->ggtt.vm, &r->node,
+		if (i915_gem_gtt_insert(&ggtt->vm, &r->node,
 					1ul << 20, 0, I915_COLOR_UNEVICTABLE,
-					0, i915->ggtt.vm.total,
+					0, ggtt->vm.total,
 					PIN_NOEVICT)) {
 			kfree(r);
 			break;
@@ -445,13 +460,13 @@ static int igt_evict_contexts(void *arg)
 		count++;
 	} while (1);
 	drm_mm_remove_node(&hole);
-	mutex_unlock(&i915->drm.struct_mutex);
+	mutex_unlock(&ggtt->vm.mutex);
 	pr_info("Filled GGTT with %lu 1MiB nodes\n", count);
 
 	/* Overfill the GGTT with context objects and so try to evict one. */
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_sw_fence fence;
-		struct drm_file *file;
+		struct file *file;
 
 		file = mock_file(i915);
 		if (IS_ERR(file)) {
@@ -460,7 +475,6 @@ static int igt_evict_contexts(void *arg)
 		}
 
 		count = 0;
-		mutex_lock(&i915->drm.struct_mutex);
 		onstack_fence_init(&fence);
 		do {
 			struct i915_request *rq;
@@ -478,8 +492,8 @@ static int igt_evict_contexts(void *arg)
 			if (IS_ERR(rq)) {
 				/* When full, fail_if_busy will trigger EBUSY */
 				if (PTR_ERR(rq) != -EBUSY) {
-					pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n",
-					       ctx->hw_id, engine->name,
+					pr_err("Unexpected error from request alloc (on %s): %d\n",
+					       engine->name,
 					       (int)PTR_ERR(rq));
 					err = PTR_ERR(rq);
 				}
@@ -497,20 +511,18 @@ static int igt_evict_contexts(void *arg)
 			count++;
 			err = 0;
 		} while(1);
-		mutex_unlock(&i915->drm.struct_mutex);
-
 		onstack_fence_fini(&fence);
 		pr_info("Submitted %lu contexts/requests on %s\n",
 			count, engine->name);
 
-		mock_file_free(i915, file);
+		fput(file);
 		if (err)
 			break;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
+	mutex_lock(&ggtt->vm.mutex);
 out_locked:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 	while (reserved) {
 		struct reserved *next = reserved->next;
@@ -522,8 +534,8 @@ out_locked:
 	}
 	if (drm_mm_node_allocated(&hole))
 		drm_mm_remove_node(&hole);
+	mutex_unlock(&ggtt->vm.mutex);
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
 }
@@ -545,11 +557,8 @@ int i915_gem_evict_mock_selftests(void)
 	if (!i915)
 		return -ENOMEM;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		err = i915_subtests(tests, i915);
-
-	mutex_unlock(&i915->drm.struct_mutex);
+		err = i915_subtests(tests, &i915->gt);
 
 	drm_dev_put(&i915->drm);
 	return err;
@@ -564,5 +573,5 @@ int i915_gem_evict_live_selftests(struct drm_i915_private *i915)
 	if (intel_gt_is_wedged(&i915->gt))
 		return 0;
 
-	return i915_subtests(tests, i915);
+	return intel_gt_live_subtests(tests, &i915->gt);
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 31a51ca1ddcb..b342bef5e7c9 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -25,26 +25,21 @@
 #include <linux/list_sort.h>
 #include <linux/prime_numbers.h>
 
+#include "gem/i915_gem_context.h"
 #include "gem/selftests/mock_context.h"
+#include "gt/intel_context.h"
 
 #include "i915_random.h"
 #include "i915_selftest.h"
 
 #include "mock_drm.h"
 #include "mock_gem_device.h"
+#include "mock_gtt.h"
+#include "igt_flush_test.h"
 
 static void cleanup_freed_objects(struct drm_i915_private *i915)
 {
-	/*
-	 * As we may hold onto the struct_mutex for inordinate lengths of
-	 * time, the NMI khungtaskd detector may fire for the free objects
-	 * worker.
-	 */
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_drain_freed_objects(i915);
-
-	mutex_lock(&i915->drm.struct_mutex);
 }
 
 static void fake_free_pages(struct drm_i915_gem_object *obj,
@@ -88,8 +83,6 @@ static int fake_get_pages(struct drm_i915_gem_object *obj)
 	}
 	GEM_BUG_ON(rem);
 
-	obj->mm.madv = I915_MADV_DONTNEED;
-
 	__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
 
 	return 0;
@@ -101,7 +94,6 @@ static void fake_put_pages(struct drm_i915_gem_object *obj,
 {
 	fake_free_pages(obj, pages);
 	obj->mm.dirty = false;
-	obj->mm.madv = I915_MADV_WILLNEED;
 }
 
 static const struct drm_i915_gem_object_ops fake_ops = {
@@ -113,6 +105,7 @@ static const struct drm_i915_gem_object_ops fake_ops = {
 static struct drm_i915_gem_object *
 fake_dma_object(struct drm_i915_private *i915, u64 size)
 {
+	static struct lock_class_key lock_class;
 	struct drm_i915_gem_object *obj;
 
 	GEM_BUG_ON(!size);
@@ -126,7 +119,9 @@ fake_dma_object(struct drm_i915_private *i915, u64 size)
 		goto err;
 
 	drm_gem_private_object_init(&i915->drm, &obj->base, size);
-	i915_gem_object_init(obj, &fake_ops);
+	i915_gem_object_init(obj, &fake_ops, &lock_class);
+
+	i915_gem_object_set_volatile(obj);
 
 	obj->write_domain = I915_GEM_DOMAIN_CPU;
 	obj->read_domains = I915_GEM_DOMAIN_CPU;
@@ -157,7 +152,7 @@ static int igt_ppgtt_alloc(void *arg)
 	if (!HAS_PPGTT(dev_priv))
 		return 0;
 
-	ppgtt = __ppgtt_create(dev_priv);
+	ppgtt = i915_ppgtt_create(&dev_priv->gt);
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
@@ -212,16 +207,17 @@ err_ppgtt_cleanup:
 	return err;
 }
 
-static int lowlevel_hole(struct drm_i915_private *i915,
-			 struct i915_address_space *vm,
+static int lowlevel_hole(struct i915_address_space *vm,
 			 u64 hole_start, u64 hole_end,
 			 unsigned long end_time)
 {
 	I915_RND_STATE(seed_prng);
+	struct i915_vma *mock_vma;
 	unsigned int size;
-	struct i915_vma mock_vma;
 
-	memset(&mock_vma, 0, sizeof(struct i915_vma));
+	mock_vma = kzalloc(sizeof(*mock_vma), GFP_KERNEL);
+	if (!mock_vma)
+		return -ENOMEM;
 
 	/* Keep creating larger objects until one cannot fit into the hole */
 	for (size = 12; (hole_end - hole_start) >> size; size++) {
@@ -245,8 +241,10 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 			if (order)
 				break;
 		} while (count >>= 1);
-		if (!count)
+		if (!count) {
+			kfree(mock_vma);
 			return -ENOMEM;
+		}
 		GEM_BUG_ON(!order);
 
 		GEM_BUG_ON(count * BIT_ULL(size) > vm->total);
@@ -258,7 +256,7 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 		 * memory. We expect to hit -ENOMEM.
 		 */
 
-		obj = fake_dma_object(i915, BIT_ULL(size));
+		obj = fake_dma_object(vm->i915, BIT_ULL(size));
 		if (IS_ERR(obj)) {
 			kfree(order);
 			break;
@@ -289,22 +287,24 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 			    vm->allocate_va_range(vm, addr, BIT_ULL(size)))
 				break;
 
-			mock_vma.pages = obj->mm.pages;
-			mock_vma.node.size = BIT_ULL(size);
-			mock_vma.node.start = addr;
+			mock_vma->pages = obj->mm.pages;
+			mock_vma->node.size = BIT_ULL(size);
+			mock_vma->node.start = addr;
 
-			wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-			vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0);
-			intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+			with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
+				vm->insert_entries(vm, mock_vma,
+						   I915_CACHE_NONE, 0);
 		}
 		count = n;
 
 		i915_random_reorder(order, count, &prng);
 		for (n = 0; n < count; n++) {
 			u64 addr = hole_start + order[n] * BIT_ULL(size);
+			intel_wakeref_t wakeref;
 
 			GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
-			vm->clear_range(vm, addr, BIT_ULL(size));
+			with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
+				vm->clear_range(vm, addr, BIT_ULL(size));
 		}
 
 		i915_gem_object_unpin_pages(obj);
@@ -312,9 +312,10 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 
 		kfree(order);
 
-		cleanup_freed_objects(i915);
+		cleanup_freed_objects(vm->i915);
 	}
 
+	kfree(mock_vma);
 	return 0;
 }
 
@@ -339,8 +340,7 @@ static void close_object_list(struct list_head *objects,
 	}
 }
 
-static int fill_hole(struct drm_i915_private *i915,
-		     struct i915_address_space *vm,
+static int fill_hole(struct i915_address_space *vm,
 		     u64 hole_start, u64 hole_end,
 		     unsigned long end_time)
 {
@@ -373,7 +373,7 @@ static int fill_hole(struct drm_i915_private *i915,
 				{ }
 			}, *p;
 
-			obj = fake_dma_object(i915, full_size);
+			obj = fake_dma_object(vm->i915, full_size);
 			if (IS_ERR(obj))
 				break;
 
@@ -541,7 +541,7 @@ static int fill_hole(struct drm_i915_private *i915,
 		}
 
 		close_object_list(&objects, vm);
-		cleanup_freed_objects(i915);
+		cleanup_freed_objects(vm->i915);
 	}
 
 	return 0;
@@ -551,8 +551,7 @@ err:
 	return err;
 }
 
-static int walk_hole(struct drm_i915_private *i915,
-		     struct i915_address_space *vm,
+static int walk_hole(struct i915_address_space *vm,
 		     u64 hole_start, u64 hole_end,
 		     unsigned long end_time)
 {
@@ -574,7 +573,7 @@ static int walk_hole(struct drm_i915_private *i915,
 		u64 addr;
 		int err = 0;
 
-		obj = fake_dma_object(i915, size << PAGE_SHIFT);
+		obj = fake_dma_object(vm->i915, size << PAGE_SHIFT);
 		if (IS_ERR(obj))
 			break;
 
@@ -629,14 +628,13 @@ err_put:
 		if (err)
 			return err;
 
-		cleanup_freed_objects(i915);
+		cleanup_freed_objects(vm->i915);
 	}
 
 	return 0;
 }
 
-static int pot_hole(struct drm_i915_private *i915,
-		    struct i915_address_space *vm,
+static int pot_hole(struct i915_address_space *vm,
 		    u64 hole_start, u64 hole_end,
 		    unsigned long end_time)
 {
@@ -650,7 +648,7 @@ static int pot_hole(struct drm_i915_private *i915,
 	if (i915_is_ggtt(vm))
 		flags |= PIN_GLOBAL;
 
-	obj = i915_gem_object_create_internal(i915, 2 * I915_GTT_PAGE_SIZE);
+	obj = i915_gem_object_create_internal(vm->i915, 2 * I915_GTT_PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -711,8 +709,7 @@ err_obj:
 	return err;
 }
 
-static int drunk_hole(struct drm_i915_private *i915,
-		      struct i915_address_space *vm,
+static int drunk_hole(struct i915_address_space *vm,
 		      u64 hole_start, u64 hole_end,
 		      unsigned long end_time)
 {
@@ -757,7 +754,7 @@ static int drunk_hole(struct drm_i915_private *i915,
 		 * memory. We expect to hit -ENOMEM.
 		 */
 
-		obj = fake_dma_object(i915, BIT_ULL(size));
+		obj = fake_dma_object(vm->i915, BIT_ULL(size));
 		if (IS_ERR(obj)) {
 			kfree(order);
 			break;
@@ -815,14 +812,13 @@ err_obj:
 		if (err)
 			return err;
 
-		cleanup_freed_objects(i915);
+		cleanup_freed_objects(vm->i915);
 	}
 
 	return 0;
 }
 
-static int __shrink_hole(struct drm_i915_private *i915,
-			 struct i915_address_space *vm,
+static int __shrink_hole(struct i915_address_space *vm,
 			 u64 hole_start, u64 hole_end,
 			 unsigned long end_time)
 {
@@ -839,7 +835,7 @@ static int __shrink_hole(struct drm_i915_private *i915,
 		u64 size = BIT_ULL(order++);
 
 		size = min(size, hole_end - addr);
-		obj = fake_dma_object(i915, size);
+		obj = fake_dma_object(vm->i915, size);
 		if (IS_ERR(obj)) {
 			err = PTR_ERR(obj);
 			break;
@@ -875,6 +871,15 @@ static int __shrink_hole(struct drm_i915_private *i915,
 		i915_vma_unpin(vma);
 		addr += size;
 
+		/*
+		 * Since we are injecting allocation faults at random intervals,
+		 * wait for this allocation to complete before we change the
+		 * faultinjection.
+		 */
+		err = i915_vma_sync(vma);
+		if (err)
+			break;
+
 		if (igt_timeout(end_time,
 				"%s timed out at ofset %llx [%llx - %llx]\n",
 				__func__, addr, hole_start, hole_end)) {
@@ -884,12 +889,11 @@ static int __shrink_hole(struct drm_i915_private *i915,
 	}
 
 	close_object_list(&objects, vm);
-	cleanup_freed_objects(i915);
+	cleanup_freed_objects(vm->i915);
 	return err;
 }
 
-static int shrink_hole(struct drm_i915_private *i915,
-		       struct i915_address_space *vm,
+static int shrink_hole(struct i915_address_space *vm,
 		       u64 hole_start, u64 hole_end,
 		       unsigned long end_time)
 {
@@ -901,7 +905,7 @@ static int shrink_hole(struct drm_i915_private *i915,
 
 	for_each_prime_number_from(prime, 0, ULONG_MAX - 1) {
 		vm->fault_attr.interval = prime;
-		err = __shrink_hole(i915, vm, hole_start, hole_end, end_time);
+		err = __shrink_hole(vm, hole_start, hole_end, end_time);
 		if (err)
 			break;
 	}
@@ -911,8 +915,7 @@ static int shrink_hole(struct drm_i915_private *i915,
 	return err;
 }
 
-static int shrink_boom(struct drm_i915_private *i915,
-		       struct i915_address_space *vm,
+static int shrink_boom(struct i915_address_space *vm,
 		       u64 hole_start, u64 hole_end,
 		       unsigned long end_time)
 {
@@ -934,7 +937,7 @@ static int shrink_boom(struct drm_i915_private *i915,
 		unsigned int size = sizes[i];
 		struct i915_vma *vma;
 
-		purge = fake_dma_object(i915, size);
+		purge = fake_dma_object(vm->i915, size);
 		if (IS_ERR(purge))
 			return PTR_ERR(purge);
 
@@ -951,7 +954,7 @@ static int shrink_boom(struct drm_i915_private *i915,
 		/* Should now be ripe for purging */
 		i915_vma_unpin(vma);
 
-		explode = fake_dma_object(i915, size);
+		explode = fake_dma_object(vm->i915, size);
 		if (IS_ERR(explode)) {
 			err = PTR_ERR(explode);
 			goto err_purge;
@@ -977,7 +980,7 @@ static int shrink_boom(struct drm_i915_private *i915,
 		i915_gem_object_put(explode);
 
 		memset(&vm->fault_attr, 0, sizeof(vm->fault_attr));
-		cleanup_freed_objects(i915);
+		cleanup_freed_objects(vm->i915);
 	}
 
 	return 0;
@@ -991,14 +994,13 @@ err_purge:
 }
 
 static int exercise_ppgtt(struct drm_i915_private *dev_priv,
-			  int (*func)(struct drm_i915_private *i915,
-				      struct i915_address_space *vm,
+			  int (*func)(struct i915_address_space *vm,
 				      u64 hole_start, u64 hole_end,
 				      unsigned long end_time))
 {
-	struct drm_file *file;
 	struct i915_ppgtt *ppgtt;
 	IGT_TIMEOUT(end_time);
+	struct file *file;
 	int err;
 
 	if (!HAS_FULL_PPGTT(dev_priv))
@@ -1008,22 +1010,20 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	ppgtt = i915_ppgtt_create(dev_priv);
+	ppgtt = i915_ppgtt_create(&dev_priv->gt);
 	if (IS_ERR(ppgtt)) {
 		err = PTR_ERR(ppgtt);
-		goto out_unlock;
+		goto out_free;
 	}
 	GEM_BUG_ON(offset_in_page(ppgtt->vm.total));
-	GEM_BUG_ON(ppgtt->vm.closed);
+	GEM_BUG_ON(!atomic_read(&ppgtt->vm.open));
 
-	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
+	err = func(&ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
 	i915_vm_put(&ppgtt->vm);
-out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	mock_file_free(dev_priv, file);
+out_free:
+	fput(file);
 	return err;
 }
 
@@ -1074,8 +1074,7 @@ static int sort_holes(void *priv, struct list_head *A, struct list_head *B)
 }
 
 static int exercise_ggtt(struct drm_i915_private *i915,
-			 int (*func)(struct drm_i915_private *i915,
-				     struct i915_address_space *vm,
+			 int (*func)(struct i915_address_space *vm,
 				     u64 hole_start, u64 hole_end,
 				     unsigned long end_time))
 {
@@ -1085,7 +1084,6 @@ static int exercise_ggtt(struct drm_i915_private *i915,
 	IGT_TIMEOUT(end_time);
 	int err = 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
 restart:
 	list_sort(NULL, &ggtt->vm.mm.hole_stack, sort_holes);
 	drm_mm_for_each_hole(node, &ggtt->vm.mm, hole_start, hole_end) {
@@ -1098,7 +1096,7 @@ restart:
 		if (hole_start >= hole_end)
 			continue;
 
-		err = func(i915, &ggtt->vm, hole_start, hole_end, end_time);
+		err = func(&ggtt->vm, hole_start, hole_end, end_time);
 		if (err)
 			break;
 
@@ -1106,7 +1104,6 @@ restart:
 		last = hole_end;
 		goto restart;
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
 }
@@ -1148,24 +1145,25 @@ static int igt_ggtt_page(void *arg)
 	unsigned int *order, n;
 	int err;
 
-	mutex_lock(&i915->drm.struct_mutex);
+	if (!i915_ggtt_has_aperture(ggtt))
+		return 0;
 
 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto out_unlock;
-	}
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
 
 	err = i915_gem_object_pin_pages(obj);
 	if (err)
 		goto out_free;
 
 	memset(&tmp, 0, sizeof(tmp));
+	mutex_lock(&ggtt->vm.mutex);
 	err = drm_mm_insert_node_in_range(&ggtt->vm.mm, &tmp,
 					  count * PAGE_SIZE, 0,
 					  I915_COLOR_UNEVICTABLE,
 					  0, ggtt->mappable_end,
 					  DRM_MM_INSERT_LOW);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err)
 		goto out_unpin;
 
@@ -1217,13 +1215,13 @@ static int igt_ggtt_page(void *arg)
 out_remove:
 	ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size);
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	mutex_lock(&ggtt->vm.mutex);
 	drm_mm_remove_node(&tmp);
+	mutex_unlock(&ggtt->vm.mutex);
 out_unpin:
 	i915_gem_object_unpin_pages(obj);
 out_free:
 	i915_gem_object_put(obj);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1234,20 +1232,23 @@ static void track_vma_bind(struct i915_vma *vma)
 	atomic_inc(&obj->bind_count); /* track for eviction later */
 	__i915_gem_object_pin_pages(obj);
 
+	GEM_BUG_ON(vma->pages);
+	atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
+	__i915_gem_object_pin_pages(obj);
 	vma->pages = obj->mm.pages;
 
 	mutex_lock(&vma->vm->mutex);
-	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
 	mutex_unlock(&vma->vm->mutex);
 }
 
 static int exercise_mock(struct drm_i915_private *i915,
-			 int (*func)(struct drm_i915_private *i915,
-				     struct i915_address_space *vm,
+			 int (*func)(struct i915_address_space *vm,
 				     u64 hole_start, u64 hole_end,
 				     unsigned long end_time))
 {
 	const u64 limit = totalram_pages() << PAGE_SHIFT;
+	struct i915_address_space *vm;
 	struct i915_gem_context *ctx;
 	IGT_TIMEOUT(end_time);
 	int err;
@@ -1256,7 +1257,9 @@ static int exercise_mock(struct drm_i915_private *i915,
 	if (!ctx)
 		return -ENOMEM;
 
-	err = func(i915, ctx->vm, 0, min(ctx->vm->total, limit), end_time);
+	vm = i915_gem_context_get_vm_rcu(ctx);
+	err = func(vm, 0, min(vm->total, limit), end_time);
+	i915_vm_put(vm);
 
 	mock_context_close(ctx);
 	return err;
@@ -1294,6 +1297,7 @@ static int igt_gtt_reserve(void *arg)
 {
 	struct i915_ggtt *ggtt = arg;
 	struct drm_i915_gem_object *obj, *on;
+	I915_RND_STATE(prng);
 	LIST_HEAD(objects);
 	u64 total;
 	int err = -ENODEV;
@@ -1330,11 +1334,13 @@ static int igt_gtt_reserve(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 					   obj->base.size,
 					   total,
 					   obj->cache_level,
 					   0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1380,11 +1386,13 @@ static int igt_gtt_reserve(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 					   obj->base.size,
 					   total,
 					   obj->cache_level,
 					   0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1420,15 +1428,18 @@ static int igt_gtt_reserve(void *arg)
 			goto out;
 		}
 
-		offset = random_offset(0, ggtt->vm.total,
-				       2*I915_GTT_PAGE_SIZE,
-				       I915_GTT_MIN_ALIGNMENT);
+		offset = igt_random_offset(&prng,
+					   0, ggtt->vm.total,
+					   2 * I915_GTT_PAGE_SIZE,
+					   I915_GTT_MIN_ALIGNMENT);
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 					   obj->base.size,
 					   offset,
 					   obj->cache_level,
 					   0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1497,11 +1508,13 @@ static int igt_gtt_insert(void *arg)
 
 	/* Check a couple of obviously invalid requests */
 	for (ii = invalid_insert; ii->size; ii++) {
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &tmp,
 					  ii->size, ii->alignment,
 					  I915_COLOR_UNEVICTABLE,
 					  ii->start, ii->end,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err != -ENOSPC) {
 			pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n",
 			       ii->size, ii->alignment, ii->start, ii->end,
@@ -1537,10 +1550,12 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
 					  obj->base.size, 0, obj->cache_level,
 					  0, ggtt->vm.total,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err == -ENOSPC) {
 			/* maxed out the GGTT space */
 			i915_gem_object_put(obj);
@@ -1595,10 +1610,12 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
 					  obj->base.size, 0, obj->cache_level,
 					  0, ggtt->vm.total,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1642,10 +1659,12 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
 					  obj->base.size, 0, obj->cache_level,
 					  0, ggtt->vm.total,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1689,13 +1708,10 @@ int i915_gem_gtt_mock_selftests(void)
 	}
 	mock_init_ggtt(i915, ggtt);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	err = i915_subtests(tests, ggtt);
-	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 
+	mock_device_flush(i915);
 	i915_gem_drain_freed_objects(i915);
-
 	mock_fini_ggtt(ggtt);
 	kfree(ggtt);
 out_put:
@@ -1703,6 +1719,312 @@ out_put:
 	return err;
 }
 
+static int context_sync(struct intel_context *ce)
+{
+	struct i915_request *rq;
+	long timeout;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	timeout = i915_request_wait(rq, 0, HZ / 5);
+	i915_request_put(rq);
+
+	return timeout < 0 ? -EIO : 0;
+}
+
+static struct i915_request *
+submit_batch(struct intel_context *ce, u64 addr)
+{
+	struct i915_request *rq;
+	int err;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		return rq;
+
+	err = 0;
+	if (rq->engine->emit_init_breadcrumb) /* detect a hang */
+		err = rq->engine->emit_init_breadcrumb(rq);
+	if (err == 0)
+		err = rq->engine->emit_bb_start(rq, addr, 0, 0);
+
+	if (err == 0)
+		i915_request_get(rq);
+	i915_request_add(rq);
+
+	return err ? ERR_PTR(err) : rq;
+}
+
+static u32 *spinner(u32 *batch, int i)
+{
+	return batch + i * 64 / sizeof(*batch) + 4;
+}
+
+static void end_spin(u32 *batch, int i)
+{
+	*spinner(batch, i) = MI_BATCH_BUFFER_END;
+	wmb();
+}
+
+static int igt_cs_tlb(void *arg)
+{
+	const unsigned int count = PAGE_SIZE / 64;
+	const unsigned int chunk_size = count * PAGE_SIZE;
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *bbe, *act, *out;
+	struct i915_gem_engines_iter it;
+	struct i915_address_space *vm;
+	struct i915_gem_context *ctx;
+	struct intel_context *ce;
+	struct i915_vma *vma;
+	I915_RND_STATE(prng);
+	struct file *file;
+	unsigned int i;
+	u32 *result;
+	u32 *batch;
+	int err = 0;
+
+	/*
+	 * Our mission here is to fool the hardware to execute something
+	 * from scratch as it has not seen the batch move (due to missing
+	 * the TLB invalidate).
+	 */
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = live_context(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out_unlock;
+	}
+
+	vm = i915_gem_context_get_vm_rcu(ctx);
+	if (i915_is_ggtt(vm))
+		goto out_vm;
+
+	/* Create two pages; dummy we prefill the TLB, and intended */
+	bbe = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(bbe)) {
+		err = PTR_ERR(bbe);
+		goto out_vm;
+	}
+
+	batch = i915_gem_object_pin_map(bbe, I915_MAP_WC);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto out_put_bbe;
+	}
+	memset32(batch, MI_BATCH_BUFFER_END, PAGE_SIZE / sizeof(u32));
+	i915_gem_object_flush_map(bbe);
+	i915_gem_object_unpin_map(bbe);
+
+	act = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(act)) {
+		err = PTR_ERR(act);
+		goto out_put_bbe;
+	}
+
+	/* Track the execution of each request by writing into different slot */
+	batch = i915_gem_object_pin_map(act, I915_MAP_WC);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto out_put_act;
+	}
+	for (i = 0; i < count; i++) {
+		u32 *cs = batch + i * 64 / sizeof(*cs);
+		u64 addr = (vm->total - PAGE_SIZE) + i * sizeof(u32);
+
+		GEM_BUG_ON(INTEL_GEN(i915) < 6);
+		cs[0] = MI_STORE_DWORD_IMM_GEN4;
+		if (INTEL_GEN(i915) >= 8) {
+			cs[1] = lower_32_bits(addr);
+			cs[2] = upper_32_bits(addr);
+			cs[3] = i;
+			cs[4] = MI_NOOP;
+			cs[5] = MI_BATCH_BUFFER_START_GEN8;
+		} else {
+			cs[1] = 0;
+			cs[2] = lower_32_bits(addr);
+			cs[3] = i;
+			cs[4] = MI_NOOP;
+			cs[5] = MI_BATCH_BUFFER_START;
+		}
+	}
+
+	out = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(out)) {
+		err = PTR_ERR(out);
+		goto out_put_batch;
+	}
+	i915_gem_object_set_cache_coherency(out, I915_CACHING_CACHED);
+
+	vma = i915_vma_instance(out, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_put_batch;
+	}
+
+	err = i915_vma_pin(vma, 0, 0,
+			   PIN_USER |
+			   PIN_OFFSET_FIXED |
+			   (vm->total - PAGE_SIZE));
+	if (err)
+		goto out_put_out;
+	GEM_BUG_ON(vma->node.start != vm->total - PAGE_SIZE);
+
+	result = i915_gem_object_pin_map(out, I915_MAP_WB);
+	if (IS_ERR(result)) {
+		err = PTR_ERR(result);
+		goto out_put_out;
+	}
+
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		IGT_TIMEOUT(end_time);
+		unsigned long pass = 0;
+
+		if (!intel_engine_can_store_dword(ce->engine))
+			continue;
+
+		while (!__igt_timeout(end_time, NULL)) {
+			struct i915_request *rq;
+			u64 offset;
+
+			offset = igt_random_offset(&prng,
+						   0, vm->total - PAGE_SIZE,
+						   chunk_size, PAGE_SIZE);
+
+			err = vm->allocate_va_range(vm, offset, chunk_size);
+			if (err)
+				goto end;
+
+			memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
+
+			vma = i915_vma_instance(bbe, vm, NULL);
+			if (IS_ERR(vma)) {
+				err = PTR_ERR(vma);
+				goto end;
+			}
+
+			err = vma->ops->set_pages(vma);
+			if (err)
+				goto end;
+
+			/* Prime the TLB with the dummy pages */
+			for (i = 0; i < count; i++) {
+				vma->node.start = offset + i * PAGE_SIZE;
+				vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
+
+				rq = submit_batch(ce, vma->node.start);
+				if (IS_ERR(rq)) {
+					err = PTR_ERR(rq);
+					goto end;
+				}
+				i915_request_put(rq);
+			}
+
+			vma->ops->clear_pages(vma);
+
+			err = context_sync(ce);
+			if (err) {
+				pr_err("%s: dummy setup timed out\n",
+				       ce->engine->name);
+				goto end;
+			}
+
+			vma = i915_vma_instance(act, vm, NULL);
+			if (IS_ERR(vma)) {
+				err = PTR_ERR(vma);
+				goto end;
+			}
+
+			err = vma->ops->set_pages(vma);
+			if (err)
+				goto end;
+
+			/* Replace the TLB with target batches */
+			for (i = 0; i < count; i++) {
+				struct i915_request *rq;
+				u32 *cs = batch + i * 64 / sizeof(*cs);
+				u64 addr;
+
+				vma->node.start = offset + i * PAGE_SIZE;
+				vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
+
+				addr = vma->node.start + i * 64;
+				cs[4] = MI_NOOP;
+				cs[6] = lower_32_bits(addr);
+				cs[7] = upper_32_bits(addr);
+				wmb();
+
+				rq = submit_batch(ce, addr);
+				if (IS_ERR(rq)) {
+					err = PTR_ERR(rq);
+					goto end;
+				}
+
+				/* Wait until the context chain has started */
+				if (i == 0) {
+					while (READ_ONCE(result[i]) &&
+					       !i915_request_completed(rq))
+						cond_resched();
+				} else {
+					end_spin(batch, i - 1);
+				}
+
+				i915_request_put(rq);
+			}
+			end_spin(batch, count - 1);
+
+			vma->ops->clear_pages(vma);
+
+			err = context_sync(ce);
+			if (err) {
+				pr_err("%s: writes timed out\n",
+				       ce->engine->name);
+				goto end;
+			}
+
+			for (i = 0; i < count; i++) {
+				if (result[i] != i) {
+					pr_err("%s: Write lost on pass %lu, at offset %llx, index %d, found %x, expected %x\n",
+					       ce->engine->name, pass,
+					       offset, i, result[i], i);
+					err = -EINVAL;
+					goto end;
+				}
+			}
+
+			vm->clear_range(vm, offset, chunk_size);
+			pass++;
+		}
+	}
+end:
+	if (igt_flush_test(i915))
+		err = -EIO;
+	i915_gem_context_unlock_engines(ctx);
+	i915_gem_object_unpin_map(out);
+out_put_out:
+	i915_gem_object_put(out);
+out_put_batch:
+	i915_gem_object_unpin_map(act);
+out_put_act:
+	i915_gem_object_put(act);
+out_put_bbe:
+	i915_gem_object_put(bbe);
+out_vm:
+	i915_vm_put(vm);
+out_unlock:
+	fput(file);
+	return err;
+}
+
 int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1720,6 +2042,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_ggtt_pot),
 		SUBTEST(igt_ggtt_fill),
 		SUBTEST(igt_ggtt_page),
+		SUBTEST(igt_cs_tlb),
 	};
 
 	GEM_BUG_ON(offset_in_page(i915->ggtt.vm.total));
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 1ccf0f731ac0..34138c7bdd15 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -1,5 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* List each unit test as selftest(name, function)
+
+#ifndef selftest
+#define selftest(x, y)
+#endif
+
+/*
+ * List each unit test as selftest(name, function)
  *
  * The name is used as both an enum and expanded as subtest__name to create
  * a module parameter. It must be unique and legal for a C identifier.
@@ -15,6 +21,10 @@ selftest(workarounds, intel_workarounds_live_selftests)
 selftest(gt_engines, intel_engine_live_selftests)
 selftest(gt_timelines, intel_timeline_live_selftests)
 selftest(gt_contexts, intel_context_live_selftests)
+selftest(gt_lrc, intel_lrc_live_selftests)
+selftest(gt_mocs, intel_mocs_live_selftests)
+selftest(gt_pm, intel_gt_pm_live_selftests)
+selftest(gt_heartbeat, intel_heartbeat_live_selftests)
 selftest(requests, i915_request_live_selftests)
 selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
@@ -30,6 +40,9 @@ selftest(gem_contexts, i915_gem_context_live_selftests)
 selftest(blt, i915_gem_object_blt_live_selftests)
 selftest(client, i915_gem_client_blt_live_selftests)
 selftest(reset, intel_reset_live_selftests)
+selftest(memory_region, intel_memory_region_live_selftests)
 selftest(hangcheck, intel_hangcheck_live_selftests)
 selftest(execlists, intel_execlists_live_selftests)
-selftest(guc, intel_guc_live_selftest)
+selftest(perf, i915_perf_live_selftests)
+/* Here be dragons: keep last to run last! */
+selftest(late_gt_pm, intel_gt_pm_late_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index b88084fe3269..5b39bab4da1d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -1,5 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* List each unit test as selftest(name, function)
+
+#ifndef selftest
+#define selftest(x, y)
+#endif
+
+/*
+ * List each unit test as selftest(name, function)
  *
  * The name is used as both an enum and expanded as subtest__name to create
  * a module parameter. It must be unique and legal for a C identifier.
@@ -26,3 +32,4 @@ selftest(gtt, i915_gem_gtt_mock_selftests)
 selftest(hugepages, i915_gem_huge_page_mock_selftests)
 selftest(contexts, i915_gem_context_mock_selftests)
 selftest(buddy, i915_buddy_mock_selftests)
+selftest(memory_region, intel_memory_region_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c
new file mode 100644
index 000000000000..d1a1568c47ba
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_perf.c
@@ -0,0 +1,217 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/kref.h>
+
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_gt.h"
+
+#include "i915_selftest.h"
+
+#include "igt_flush_test.h"
+#include "lib_sw_fence.h"
+
+static struct i915_perf_stream *
+test_stream(struct i915_perf *perf)
+{
+	struct drm_i915_perf_open_param param = {};
+	struct perf_open_properties props = {
+		.engine = intel_engine_lookup_user(perf->i915,
+						   I915_ENGINE_CLASS_RENDER,
+						   0),
+		.sample_flags = SAMPLE_OA_REPORT,
+		.oa_format = IS_GEN(perf->i915, 12) ?
+		I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
+		.metrics_set = 1,
+	};
+	struct i915_perf_stream *stream;
+
+	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+	if (!stream)
+		return NULL;
+
+	stream->perf = perf;
+
+	mutex_lock(&perf->lock);
+	if (i915_oa_stream_init(stream, &param, &props)) {
+		kfree(stream);
+		stream =  NULL;
+	}
+	mutex_unlock(&perf->lock);
+
+	return stream;
+}
+
+static void stream_destroy(struct i915_perf_stream *stream)
+{
+	struct i915_perf *perf = stream->perf;
+
+	mutex_lock(&perf->lock);
+	i915_perf_destroy_locked(stream);
+	mutex_unlock(&perf->lock);
+}
+
+static int live_sanitycheck(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_perf_stream *stream;
+
+	/* Quick check we can create a perf stream */
+
+	stream = test_stream(&i915->perf);
+	if (!stream)
+		return -EINVAL;
+
+	stream_destroy(stream);
+	return 0;
+}
+
+static int write_timestamp(struct i915_request *rq, int slot)
+{
+	u32 *cs;
+	int len;
+
+	cs = intel_ring_begin(rq, 6);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	len = 5;
+	if (INTEL_GEN(rq->i915) >= 8)
+		len++;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(len);
+	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
+		PIPE_CONTROL_STORE_DATA_INDEX |
+		PIPE_CONTROL_WRITE_TIMESTAMP;
+	*cs++ = slot * sizeof(u32);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static ktime_t poll_status(struct i915_request *rq, int slot)
+{
+	while (!intel_read_status_page(rq->engine, slot) &&
+	       !i915_request_completed(rq))
+		cpu_relax();
+
+	return ktime_get();
+}
+
+static int live_noa_delay(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_perf_stream *stream;
+	struct i915_request *rq;
+	ktime_t t0, t1;
+	u64 expected;
+	u32 delay;
+	int err;
+	int i;
+
+	/* Check that the GPU delays matches expectations */
+
+	stream = test_stream(&i915->perf);
+	if (!stream)
+		return -ENOMEM;
+
+	expected = atomic64_read(&stream->perf->noa_programming_delay);
+
+	if (stream->engine->class != RENDER_CLASS) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	for (i = 0; i < 4; i++)
+		intel_write_status_page(stream->engine, 0x100 + i, 0);
+
+	rq = intel_engine_create_kernel_request(stream->engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out;
+	}
+
+	if (rq->engine->emit_init_breadcrumb &&
+	    i915_request_timeline(rq)->has_initial_breadcrumb) {
+		err = rq->engine->emit_init_breadcrumb(rq);
+		if (err) {
+			i915_request_add(rq);
+			goto out;
+		}
+	}
+
+	err = write_timestamp(rq, 0x100);
+	if (err) {
+		i915_request_add(rq);
+		goto out;
+	}
+
+	err = rq->engine->emit_bb_start(rq,
+					i915_ggtt_offset(stream->noa_wait), 0,
+					I915_DISPATCH_SECURE);
+	if (err) {
+		i915_request_add(rq);
+		goto out;
+	}
+
+	err = write_timestamp(rq, 0x102);
+	if (err) {
+		i915_request_add(rq);
+		goto out;
+	}
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	preempt_disable();
+	t0 = poll_status(rq, 0x100);
+	t1 = poll_status(rq, 0x102);
+	preempt_enable();
+
+	pr_info("CPU delay: %lluns, expected %lluns\n",
+		ktime_sub(t1, t0), expected);
+
+	delay = intel_read_status_page(stream->engine, 0x102);
+	delay -= intel_read_status_page(stream->engine, 0x100);
+	delay = div_u64(mul_u32_u32(delay, 1000 * 1000),
+			RUNTIME_INFO(i915)->cs_timestamp_frequency_khz);
+	pr_info("GPU delay: %uns, expected %lluns\n",
+		delay, expected);
+
+	if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
+		pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
+		       delay / 1000,
+		       div_u64(3 * expected, 4000),
+		       div_u64(3 * expected, 2000));
+		err = -EINVAL;
+	}
+
+	i915_request_put(rq);
+out:
+	stream_destroy(stream);
+	return err;
+}
+
+int i915_perf_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_sanitycheck),
+		SUBTEST(live_noa_delay),
+	};
+	struct i915_perf *perf = &i915->perf;
+
+	if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
+		return 0;
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
new file mode 100644
index 000000000000..5a577a1332f5
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef selftest
+#define selftest(x, y)
+#endif
+
+/*
+ * List each unit test as selftest(name, function)
+ *
+ * The name is used as both an enum and expanded as subtest__name to create
+ * a module parameter. It must be unique and legal for a C identifier.
+ *
+ * The function should be of type int function(void). It may be conditionally
+ * compiled using #if IS_ENABLED(DRM_I915_SELFTEST).
+ *
+ * Tests are executed in order by igt/i915_selftest
+ */
+selftest(engine_cs, intel_engine_cs_perf_selftests)
+selftest(blt, i915_gem_object_blt_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c
index 716a3f19f030..abdfadcf626b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.c
+++ b/drivers/gpu/drm/i915/selftests/i915_random.c
@@ -29,6 +29,7 @@
 #include <linux/types.h>
 
 #include "i915_random.h"
+#include "i915_utils.h"
 
 u64 i915_prandom_u64_state(struct rnd_state *rnd)
 {
@@ -87,3 +88,22 @@ unsigned int *i915_random_order(unsigned int count, struct rnd_state *state)
 	i915_random_reorder(order, count, state);
 	return order;
 }
+
+u64 igt_random_offset(struct rnd_state *state,
+		      u64 start, u64 end,
+		      u64 len, u64 align)
+{
+	u64 range, addr;
+
+	BUG_ON(range_overflows(start, len, end));
+	BUG_ON(round_up(start, align) > round_down(end - len, align));
+
+	range = round_down(end - len, align) - round_up(start, align);
+	if (range) {
+		addr = i915_prandom_u64_state(state);
+		div64_u64_rem(addr, range, &addr);
+		start += addr;
+	}
+
+	return round_up(start, align);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h
index 8e1ff9c105b6..05364eca20f7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.h
+++ b/drivers/gpu/drm/i915/selftests/i915_random.h
@@ -25,6 +25,7 @@
 #ifndef __I915_SELFTESTS_RANDOM_H__
 #define __I915_SELFTESTS_RANDOM_H__
 
+#include <linux/math64.h>
 #include <linux/random.h>
 
 #include "../i915_selftest.h"
@@ -57,4 +58,8 @@ void i915_random_reorder(unsigned int *order,
 void i915_prandom_shuffle(void *arr, size_t elsz, size_t count,
 			  struct rnd_state *state);
 
+u64 igt_random_offset(struct rnd_state *state,
+		      u64 start, u64 end,
+		      u64 len, u64 align);
+
 #endif /* !__I915_SELFTESTS_RANDOM_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index b3688543ed7d..f89d9c42f1fa 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -27,35 +27,44 @@
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
 
+#include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
 
 #include "i915_random.h"
 #include "i915_selftest.h"
 #include "igt_live_test.h"
+#include "igt_spinner.h"
 #include "lib_sw_fence.h"
 
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
+static unsigned int num_uabi_engines(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	unsigned int count;
+
+	count = 0;
+	for_each_uabi_engine(engine, i915)
+		count++;
+
+	return count;
+}
+
 static int igt_add_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct i915_request *request;
-	int err = -ENOMEM;
 
 	/* Basic preliminary test to create a request and let it loose! */
 
-	mutex_lock(&i915->drm.struct_mutex);
 	request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
 	if (!request)
-		goto out_unlock;
+		return -ENOMEM;
 
 	i915_request_add(request);
 
-	err = 0;
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	return err;
+	return 0;
 }
 
 static int igt_wait_request(void *arg)
@@ -67,12 +76,10 @@ static int igt_wait_request(void *arg)
 
 	/* Submit a request, then wait upon it */
 
-	mutex_lock(&i915->drm.struct_mutex);
 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
-	if (!request) {
-		err = -ENOMEM;
-		goto out_unlock;
-	}
+	if (!request)
+		return -ENOMEM;
+
 	i915_request_get(request);
 
 	if (i915_request_wait(request, 0, 0) != -ETIME) {
@@ -125,9 +132,7 @@ static int igt_wait_request(void *arg)
 	err = 0;
 out_request:
 	i915_request_put(request);
-out_unlock:
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -140,52 +145,45 @@ static int igt_fence_wait(void *arg)
 
 	/* Submit a request, treat it as a fence and wait upon it */
 
-	mutex_lock(&i915->drm.struct_mutex);
 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
-	if (!request) {
-		err = -ENOMEM;
-		goto out_locked;
-	}
+	if (!request)
+		return -ENOMEM;
 
 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
 		pr_err("fence wait success before submit (expected timeout)!\n");
-		goto out_locked;
+		goto out;
 	}
 
 	i915_request_add(request);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (dma_fence_is_signaled(&request->fence)) {
 		pr_err("fence signaled immediately!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
 		pr_err("fence wait success after submit (expected timeout)!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 		pr_err("fence wait timed out (expected success)!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (!dma_fence_is_signaled(&request->fence)) {
 		pr_err("fence unsignaled after waiting!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 		pr_err("fence wait timed out when complete (expected success)!\n");
-		goto out_device;
+		goto out;
 	}
 
 	err = 0;
-out_device:
-	mutex_lock(&i915->drm.struct_mutex);
-out_locked:
+out:
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -197,8 +195,8 @@ static int igt_request_rewind(void *arg)
 	struct intel_context *ce;
 	int err = -EINVAL;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	ctx[0] = mock_context(i915, "A");
+
 	ce = i915_gem_context_get_engine(ctx[0], RCS0);
 	GEM_BUG_ON(IS_ERR(ce));
 	request = mock_request(ce, 2 * HZ);
@@ -212,6 +210,7 @@ static int igt_request_rewind(void *arg)
 	i915_request_add(request);
 
 	ctx[1] = mock_context(i915, "B");
+
 	ce = i915_gem_context_get_engine(ctx[1], RCS0);
 	GEM_BUG_ON(IS_ERR(ce));
 	vip = mock_request(ce, 0);
@@ -233,7 +232,6 @@ static int igt_request_rewind(void *arg)
 	request->engine->submit_request(request);
 	rcu_read_unlock();
 
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
 		pr_err("timed out waiting for high priority request\n");
@@ -248,14 +246,12 @@ static int igt_request_rewind(void *arg)
 	err = 0;
 err:
 	i915_request_put(vip);
-	mutex_lock(&i915->drm.struct_mutex);
 err_context_1:
 	mock_context_close(ctx[1]);
 	i915_request_put(request);
 err_context_0:
 	mock_context_close(ctx[0]);
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -282,7 +278,6 @@ __live_request_alloc(struct intel_context *ce)
 static int __igt_breadcrumbs_smoketest(void *arg)
 {
 	struct smoketest *t = arg;
-	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
 	const unsigned int total = 4 * t->ncontexts + 1;
 	unsigned int num_waits = 0, num_fences = 0;
@@ -300,7 +295,7 @@ static int __igt_breadcrumbs_smoketest(void *arg)
 	 * that the fences were marked as signaled.
 	 */
 
-	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
+	requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
 	if (!requests)
 		return -ENOMEM;
 
@@ -337,14 +332,11 @@ static int __igt_breadcrumbs_smoketest(void *arg)
 			struct i915_request *rq;
 			struct intel_context *ce;
 
-			mutex_lock(BKL);
-
 			ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
 			GEM_BUG_ON(IS_ERR(ce));
 			rq = t->request_alloc(ce);
 			intel_context_put(ce);
 			if (IS_ERR(rq)) {
-				mutex_unlock(BKL);
 				err = PTR_ERR(rq);
 				count = n;
 				break;
@@ -357,8 +349,6 @@ static int __igt_breadcrumbs_smoketest(void *arg)
 			requests[n] = i915_request_get(rq);
 			i915_request_add(rq);
 
-			mutex_unlock(BKL);
-
 			if (err >= 0)
 				err = i915_sw_fence_await_dma_fence(wait,
 								    &rq->fence,
@@ -446,18 +436,16 @@ static int mock_breadcrumbs_smoketest(void *arg)
 	 * See __igt_breadcrumbs_smoketest();
 	 */
 
-	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
+	threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
 	if (!threads)
 		return -ENOMEM;
 
-	t.contexts =
-		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
+	t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
 	if (!t.contexts) {
 		ret = -ENOMEM;
 		goto out_threads;
 	}
 
-	mutex_lock(&t.engine->i915->drm.struct_mutex);
 	for (n = 0; n < t.ncontexts; n++) {
 		t.contexts[n] = mock_context(t.engine->i915, "mock");
 		if (!t.contexts[n]) {
@@ -465,7 +453,6 @@ static int mock_breadcrumbs_smoketest(void *arg)
 			goto out_contexts;
 		}
 	}
-	mutex_unlock(&t.engine->i915->drm.struct_mutex);
 
 	for (n = 0; n < ncpus; n++) {
 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
@@ -479,6 +466,7 @@ static int mock_breadcrumbs_smoketest(void *arg)
 		get_task_struct(threads[n]);
 	}
 
+	yield(); /* start all threads before we begin */
 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
 
 	for (n = 0; n < ncpus; n++) {
@@ -495,18 +483,15 @@ static int mock_breadcrumbs_smoketest(void *arg)
 		atomic_long_read(&t.num_fences),
 		ncpus);
 
-	mutex_lock(&t.engine->i915->drm.struct_mutex);
 out_contexts:
 	for (n = 0; n < t.ncontexts; n++) {
 		if (!t.contexts[n])
 			break;
 		mock_context_close(t.contexts[n]);
 	}
-	mutex_unlock(&t.engine->i915->drm.struct_mutex);
 	kfree(t.contexts);
 out_threads:
 	kfree(threads);
-
 	return ret;
 }
 
@@ -539,40 +524,38 @@ static int live_nop_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
-	unsigned int id;
 	int err = -ENODEV;
 
-	/* Submit various sized batches of empty requests, to each engine
+	/*
+	 * Submit various sized batches of empty requests, to each engine
 	 * (individually), and wait for the batch to complete. We can check
 	 * the overhead of submitting requests to the hardware.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	for_each_engine(engine, i915, id) {
-		struct i915_request *request = NULL;
+	for_each_uabi_engine(engine, i915) {
 		unsigned long n, prime;
 		IGT_TIMEOUT(end_time);
 		ktime_t times[2] = {};
 
 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 		if (err)
-			goto out_unlock;
+			return err;
 
+		intel_engine_pm_get(engine);
 		for_each_prime_number_from(prime, 1, 8192) {
+			struct i915_request *request = NULL;
+
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
+				i915_request_put(request);
 				request = i915_request_create(engine->kernel_context);
-				if (IS_ERR(request)) {
-					err = PTR_ERR(request);
-					goto out_unlock;
-				}
+				if (IS_ERR(request))
+					return PTR_ERR(request);
 
-				/* This space is left intentionally blank.
+				/*
+				 * This space is left intentionally blank.
 				 *
 				 * We do not actually want to perform any
 				 * action with this request, we just want
@@ -585,9 +568,11 @@ static int live_nop_request(void *arg)
 				 * for latency.
 				 */
 
+				i915_request_get(request);
 				i915_request_add(request);
 			}
 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
+			i915_request_put(request);
 
 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
 			if (prime == 1)
@@ -596,10 +581,11 @@ static int live_nop_request(void *arg)
 			if (__igt_timeout(end_time, NULL))
 				break;
 		}
+		intel_engine_pm_put(engine);
 
 		err = igt_live_test_end(&t);
 		if (err)
-			goto out_unlock;
+			return err;
 
 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
 			engine->name,
@@ -607,9 +593,6 @@ static int live_nop_request(void *arg)
 			prime, div64_u64(ktime_to_ns(times[1]), prime));
 	}
 
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -647,8 +630,15 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915)
 	if (err)
 		goto err;
 
+	/* Force the wait wait now to avoid including it in the benchmark */
+	err = i915_vma_sync(vma);
+	if (err)
+		goto err_pin;
+
 	return vma;
 
+err_pin:
+	i915_vma_unpin(vma);
 err:
 	i915_gem_object_put(obj);
 	return ERR_PTR(err);
@@ -672,6 +662,7 @@ empty_request(struct intel_engine_cs *engine,
 	if (err)
 		goto out_request;
 
+	i915_request_get(request);
 out_request:
 	i915_request_add(request);
 	return err ? ERR_PTR(err) : request;
@@ -681,27 +672,21 @@ static int live_empty_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	struct i915_vma *batch;
-	unsigned int id;
 	int err = 0;
 
-	/* Submit various sized batches of empty requests, to each engine
+	/*
+	 * Submit various sized batches of empty requests, to each engine
 	 * (individually), and wait for the batch to complete. We can check
 	 * the overhead of submitting requests to the hardware.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	batch = empty_batch(i915);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto out_unlock;
-	}
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
 
-	for_each_engine(engine, i915, id) {
+	for_each_uabi_engine(engine, i915) {
 		IGT_TIMEOUT(end_time);
 		struct i915_request *request;
 		unsigned long n, prime;
@@ -711,10 +696,13 @@ static int live_empty_request(void *arg)
 		if (err)
 			goto out_batch;
 
+		intel_engine_pm_get(engine);
+
 		/* Warmup / preload */
 		request = empty_request(engine, batch);
 		if (IS_ERR(request)) {
 			err = PTR_ERR(request);
+			intel_engine_pm_put(engine);
 			goto out_batch;
 		}
 		i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
@@ -723,9 +711,11 @@ static int live_empty_request(void *arg)
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
+				i915_request_put(request);
 				request = empty_request(engine, batch);
 				if (IS_ERR(request)) {
 					err = PTR_ERR(request);
+					intel_engine_pm_put(engine);
 					goto out_batch;
 				}
 			}
@@ -738,6 +728,8 @@ static int live_empty_request(void *arg)
 			if (__igt_timeout(end_time, NULL))
 				break;
 		}
+		i915_request_put(request);
+		intel_engine_pm_put(engine);
 
 		err = igt_live_test_end(&t);
 		if (err)
@@ -752,16 +744,11 @@ static int live_empty_request(void *arg)
 out_batch:
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 {
-	struct i915_gem_context *ctx = i915->kernel_context;
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	struct drm_i915_gem_object *obj;
 	const int gen = INTEL_GEN(i915);
 	struct i915_vma *vma;
@@ -772,7 +759,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
-	vma = i915_vma_instance(obj, vm, NULL);
+	vma = i915_vma_instance(obj, i915->gt.vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err;
@@ -832,67 +819,73 @@ static int recursive_batch_resolve(struct i915_vma *batch)
 static int live_all_engines(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
+	const unsigned int nengines = num_uabi_engines(i915);
 	struct intel_engine_cs *engine;
-	struct i915_request *request[I915_NUM_ENGINES];
-	intel_wakeref_t wakeref;
+	struct i915_request **request;
 	struct igt_live_test t;
 	struct i915_vma *batch;
-	unsigned int id;
+	unsigned int idx;
 	int err;
 
-	/* Check we can submit requests to all engines simultaneously. We
+	/*
+	 * Check we can submit requests to all engines simultaneously. We
 	 * send a recursive batch to each engine - checking that we don't
 	 * block doing so, and that they don't complete too soon.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
+	if (!request)
+		return -ENOMEM;
 
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		goto out_free;
 
 	batch = recursive_batch(i915);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
-		goto out_unlock;
+		goto out_free;
 	}
 
-	for_each_engine(engine, i915, id) {
-		request[id] = i915_request_create(engine->kernel_context);
-		if (IS_ERR(request[id])) {
-			err = PTR_ERR(request[id]);
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		request[idx] = intel_engine_create_kernel_request(engine);
+		if (IS_ERR(request[idx])) {
+			err = PTR_ERR(request[idx]);
 			pr_err("%s: Request allocation failed with err=%d\n",
 			       __func__, err);
 			goto out_request;
 		}
 
-		err = engine->emit_bb_start(request[id],
+		err = engine->emit_bb_start(request[idx],
 					    batch->node.start,
 					    batch->node.size,
 					    0);
 		GEM_BUG_ON(err);
-		request[id]->batch = batch;
+		request[idx]->batch = batch;
 
 		i915_vma_lock(batch);
-		err = i915_request_await_object(request[id], batch->obj, 0);
+		err = i915_request_await_object(request[idx], batch->obj, 0);
 		if (err == 0)
-			err = i915_vma_move_to_active(batch, request[id], 0);
+			err = i915_vma_move_to_active(batch, request[idx], 0);
 		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
-		i915_request_get(request[id]);
-		i915_request_add(request[id]);
+		i915_request_get(request[idx]);
+		i915_request_add(request[idx]);
+		idx++;
 	}
 
-	for_each_engine(engine, i915, id) {
-		if (i915_request_completed(request[id])) {
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		if (i915_request_completed(request[idx])) {
 			pr_err("%s(%s): request completed too early!\n",
 			       __func__, engine->name);
 			err = -EINVAL;
 			goto out_request;
 		}
+		idx++;
 	}
 
 	err = recursive_batch_resolve(batch);
@@ -901,10 +894,11 @@ static int live_all_engines(void *arg)
 		goto out_request;
 	}
 
-	for_each_engine(engine, i915, id) {
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
 		long timeout;
 
-		timeout = i915_request_wait(request[id], 0,
+		timeout = i915_request_wait(request[idx], 0,
 					    MAX_SCHEDULE_TIMEOUT);
 		if (timeout < 0) {
 			err = timeout;
@@ -913,50 +907,56 @@ static int live_all_engines(void *arg)
 			goto out_request;
 		}
 
-		GEM_BUG_ON(!i915_request_completed(request[id]));
-		i915_request_put(request[id]);
-		request[id] = NULL;
+		GEM_BUG_ON(!i915_request_completed(request[idx]));
+		i915_request_put(request[idx]);
+		request[idx] = NULL;
+		idx++;
 	}
 
 	err = igt_live_test_end(&t);
 
 out_request:
-	for_each_engine(engine, i915, id)
-		if (request[id])
-			i915_request_put(request[id]);
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		if (request[idx])
+			i915_request_put(request[idx]);
+		idx++;
+	}
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+out_free:
+	kfree(request);
 	return err;
 }
 
 static int live_sequential_engines(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct i915_request *request[I915_NUM_ENGINES] = {};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct i915_request **request;
 	struct i915_request *prev = NULL;
 	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
-	unsigned int id;
+	unsigned int idx;
 	int err;
 
-	/* Check we can submit requests to all engines sequentially, such
+	/*
+	 * Check we can submit requests to all engines sequentially, such
 	 * that each successive request waits for the earlier ones. This
 	 * tests that we don't execute requests out of order, even though
 	 * they are running on independent engines.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
+	if (!request)
+		return -ENOMEM;
 
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		goto out_free;
 
-	for_each_engine(engine, i915, id) {
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
 		struct i915_vma *batch;
 
 		batch = recursive_batch(i915);
@@ -964,66 +964,69 @@ static int live_sequential_engines(void *arg)
 			err = PTR_ERR(batch);
 			pr_err("%s: Unable to create batch for %s, err=%d\n",
 			       __func__, engine->name, err);
-			goto out_unlock;
+			goto out_free;
 		}
 
-		request[id] = i915_request_create(engine->kernel_context);
-		if (IS_ERR(request[id])) {
-			err = PTR_ERR(request[id]);
+		request[idx] = intel_engine_create_kernel_request(engine);
+		if (IS_ERR(request[idx])) {
+			err = PTR_ERR(request[idx]);
 			pr_err("%s: Request allocation failed for %s with err=%d\n",
 			       __func__, engine->name, err);
 			goto out_request;
 		}
 
 		if (prev) {
-			err = i915_request_await_dma_fence(request[id],
+			err = i915_request_await_dma_fence(request[idx],
 							   &prev->fence);
 			if (err) {
-				i915_request_add(request[id]);
+				i915_request_add(request[idx]);
 				pr_err("%s: Request await failed for %s with err=%d\n",
 				       __func__, engine->name, err);
 				goto out_request;
 			}
 		}
 
-		err = engine->emit_bb_start(request[id],
+		err = engine->emit_bb_start(request[idx],
 					    batch->node.start,
 					    batch->node.size,
 					    0);
 		GEM_BUG_ON(err);
-		request[id]->batch = batch;
+		request[idx]->batch = batch;
 
 		i915_vma_lock(batch);
-		err = i915_request_await_object(request[id], batch->obj, false);
+		err = i915_request_await_object(request[idx],
+						batch->obj, false);
 		if (err == 0)
-			err = i915_vma_move_to_active(batch, request[id], 0);
+			err = i915_vma_move_to_active(batch, request[idx], 0);
 		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
-		i915_request_get(request[id]);
-		i915_request_add(request[id]);
+		i915_request_get(request[idx]);
+		i915_request_add(request[idx]);
 
-		prev = request[id];
+		prev = request[idx];
+		idx++;
 	}
 
-	for_each_engine(engine, i915, id) {
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
 		long timeout;
 
-		if (i915_request_completed(request[id])) {
+		if (i915_request_completed(request[idx])) {
 			pr_err("%s(%s): request completed too early!\n",
 			       __func__, engine->name);
 			err = -EINVAL;
 			goto out_request;
 		}
 
-		err = recursive_batch_resolve(request[id]->batch);
+		err = recursive_batch_resolve(request[idx]->batch);
 		if (err) {
 			pr_err("%s: failed to resolve batch, err=%d\n",
 			       __func__, err);
 			goto out_request;
 		}
 
-		timeout = i915_request_wait(request[id], 0,
+		timeout = i915_request_wait(request[idx], 0,
 					    MAX_SCHEDULE_TIMEOUT);
 		if (timeout < 0) {
 			err = timeout;
@@ -1032,33 +1035,244 @@ static int live_sequential_engines(void *arg)
 			goto out_request;
 		}
 
-		GEM_BUG_ON(!i915_request_completed(request[id]));
+		GEM_BUG_ON(!i915_request_completed(request[idx]));
+		idx++;
 	}
 
 	err = igt_live_test_end(&t);
 
 out_request:
-	for_each_engine(engine, i915, id) {
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
 		u32 *cmd;
 
-		if (!request[id])
+		if (!request[idx])
 			break;
 
-		cmd = i915_gem_object_pin_map(request[id]->batch->obj,
+		cmd = i915_gem_object_pin_map(request[idx]->batch->obj,
 					      I915_MAP_WC);
 		if (!IS_ERR(cmd)) {
 			*cmd = MI_BATCH_BUFFER_END;
 			intel_gt_chipset_flush(engine->gt);
 
-			i915_gem_object_unpin_map(request[id]->batch->obj);
+			i915_gem_object_unpin_map(request[idx]->batch->obj);
 		}
 
-		i915_vma_put(request[id]->batch);
-		i915_request_put(request[id]);
+		i915_vma_put(request[idx]->batch);
+		i915_request_put(request[idx]);
+		idx++;
 	}
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+out_free:
+	kfree(request);
+	return err;
+}
+
+static int __live_parallel_engine1(void *arg)
+{
+	struct intel_engine_cs *engine = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+
+	count = 0;
+	intel_engine_pm_get(engine);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(engine->kernel_context);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	intel_engine_pm_put(engine);
+
+	pr_info("%s: %lu request + sync\n", engine->name, count);
+	return err;
+}
+
+static int __live_parallel_engineN(void *arg)
+{
+	struct intel_engine_cs *engine = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+
+	count = 0;
+	intel_engine_pm_get(engine);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(engine->kernel_context);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	intel_engine_pm_put(engine);
+
+	pr_info("%s: %lu requests\n", engine->name, count);
+	return err;
+}
+
+static bool wake_all(struct drm_i915_private *i915)
+{
+	if (atomic_dec_and_test(&i915->selftest.counter)) {
+		wake_up_var(&i915->selftest.counter);
+		return true;
+	}
+
+	return false;
+}
+
+static int wait_for_all(struct drm_i915_private *i915)
+{
+	if (wake_all(i915))
+		return 0;
+
+	if (wait_var_event_timeout(&i915->selftest.counter,
+				   !atomic_read(&i915->selftest.counter),
+				   i915_selftest.timeout_jiffies))
+		return 0;
+
+	return -ETIME;
+}
+
+static int __live_parallel_spin(void *arg)
+{
+	struct intel_engine_cs *engine = arg;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	int err = 0;
+
+	/*
+	 * Create a spinner running for eternity on each engine. If a second
+	 * spinner is incorrectly placed on the same engine, it will not be
+	 * able to start in time.
+	 */
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		wake_all(engine->i915);
+		return -ENOMEM;
+	}
+
+	intel_engine_pm_get(engine);
+	rq = igt_spinner_create_request(&spin,
+					engine->kernel_context,
+					MI_NOOP); /* no preemption */
+	intel_engine_pm_put(engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		if (err == -ENODEV)
+			err = 0;
+		wake_all(engine->i915);
+		goto out_spin;
+	}
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (igt_wait_for_spinner(&spin, rq)) {
+		/* Occupy this engine for the whole test */
+		err = wait_for_all(engine->i915);
+	} else {
+		pr_err("Failed to start spinner on %s\n", engine->name);
+		err = -EINVAL;
+	}
+	igt_spinner_end(&spin);
+
+	if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0)
+		err = -EIO;
+	i915_request_put(rq);
+
+out_spin:
+	igt_spinner_fini(&spin);
+	return err;
+}
+
+static int live_parallel_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		__live_parallel_engine1,
+		__live_parallel_engineN,
+		__live_parallel_spin,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct task_struct **tsk;
+	int err = 0;
+
+	/*
+	 * Check we can submit requests to all engines concurrently. This
+	 * tests that we load up the system maximally.
+	 */
+
+	tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
+	if (!tsk)
+		return -ENOMEM;
+
+	for (fn = func; !err && *fn; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+		unsigned int idx;
+
+		snprintf(name, sizeof(name), "%pS", fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		atomic_set(&i915->selftest.counter, nengines);
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			tsk[idx] = kthread_run(*fn, engine,
+					       "igt/parallel:%s",
+					       engine->name);
+			if (IS_ERR(tsk[idx])) {
+				err = PTR_ERR(tsk[idx]);
+				break;
+			}
+			get_task_struct(tsk[idx++]);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			int status;
+
+			if (IS_ERR(tsk[idx]))
+				break;
+
+			status = kthread_stop(tsk[idx]);
+			if (status && !err)
+				err = status;
+
+			put_task_struct(tsk[idx++]);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+	}
+
+	kfree(tsk);
 	return err;
 }
 
@@ -1102,16 +1316,16 @@ max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 static int live_breadcrumbs_smoketest(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct smoketest t[I915_NUM_ENGINES];
-	unsigned int ncpus = num_online_cpus();
+	const unsigned int nengines = num_uabi_engines(i915);
+	const unsigned int ncpus = num_online_cpus();
 	unsigned long num_waits, num_fences;
 	struct intel_engine_cs *engine;
 	struct task_struct **threads;
 	struct igt_live_test live;
-	enum intel_engine_id id;
 	intel_wakeref_t wakeref;
-	struct drm_file *file;
-	unsigned int n;
+	struct smoketest *smoke;
+	unsigned int n, idx;
+	struct file *file;
 	int ret = 0;
 
 	/*
@@ -1130,29 +1344,31 @@ static int live_breadcrumbs_smoketest(void *arg)
 		goto out_rpm;
 	}
 
-	threads = kcalloc(ncpus * I915_NUM_ENGINES,
-			  sizeof(*threads),
-			  GFP_KERNEL);
-	if (!threads) {
+	smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
+	if (!smoke) {
 		ret = -ENOMEM;
 		goto out_file;
 	}
 
-	memset(&t[0], 0, sizeof(t[0]));
-	t[0].request_alloc = __live_request_alloc;
-	t[0].ncontexts = 64;
-	t[0].contexts = kmalloc_array(t[0].ncontexts,
-				      sizeof(*t[0].contexts),
-				      GFP_KERNEL);
-	if (!t[0].contexts) {
+	threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
+	if (!threads) {
+		ret = -ENOMEM;
+		goto out_smoke;
+	}
+
+	smoke[0].request_alloc = __live_request_alloc;
+	smoke[0].ncontexts = 64;
+	smoke[0].contexts = kcalloc(smoke[0].ncontexts,
+				    sizeof(*smoke[0].contexts),
+				    GFP_KERNEL);
+	if (!smoke[0].contexts) {
 		ret = -ENOMEM;
 		goto out_threads;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
-	for (n = 0; n < t[0].ncontexts; n++) {
-		t[0].contexts[n] = live_context(i915, file);
-		if (!t[0].contexts[n]) {
+	for (n = 0; n < smoke[0].ncontexts; n++) {
+		smoke[0].contexts[n] = live_context(i915, file);
+		if (!smoke[0].contexts[n]) {
 			ret = -ENOMEM;
 			goto out_contexts;
 		}
@@ -1162,45 +1378,48 @@ static int live_breadcrumbs_smoketest(void *arg)
 	if (ret)
 		goto out_contexts;
 
-	for_each_engine(engine, i915, id) {
-		t[id] = t[0];
-		t[id].engine = engine;
-		t[id].max_batch = max_batches(t[0].contexts[0], engine);
-		if (t[id].max_batch < 0) {
-			ret = t[id].max_batch;
-			mutex_unlock(&i915->drm.struct_mutex);
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		smoke[idx] = smoke[0];
+		smoke[idx].engine = engine;
+		smoke[idx].max_batch =
+			max_batches(smoke[0].contexts[0], engine);
+		if (smoke[idx].max_batch < 0) {
+			ret = smoke[idx].max_batch;
 			goto out_flush;
 		}
 		/* One ring interleaved between requests from all cpus */
-		t[id].max_batch /= num_online_cpus() + 1;
+		smoke[idx].max_batch /= num_online_cpus() + 1;
 		pr_debug("Limiting batches to %d requests on %s\n",
-			 t[id].max_batch, engine->name);
+			 smoke[idx].max_batch, engine->name);
 
 		for (n = 0; n < ncpus; n++) {
 			struct task_struct *tsk;
 
 			tsk = kthread_run(__igt_breadcrumbs_smoketest,
-					  &t[id], "igt/%d.%d", id, n);
+					  &smoke[idx], "igt/%d.%d", idx, n);
 			if (IS_ERR(tsk)) {
 				ret = PTR_ERR(tsk);
-				mutex_unlock(&i915->drm.struct_mutex);
 				goto out_flush;
 			}
 
 			get_task_struct(tsk);
-			threads[id * ncpus + n] = tsk;
+			threads[idx * ncpus + n] = tsk;
 		}
+
+		idx++;
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
+	yield(); /* start all threads before we begin */
 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
 
 out_flush:
+	idx = 0;
 	num_waits = 0;
 	num_fences = 0;
-	for_each_engine(engine, i915, id) {
+	for_each_uabi_engine(engine, i915) {
 		for (n = 0; n < ncpus; n++) {
-			struct task_struct *tsk = threads[id * ncpus + n];
+			struct task_struct *tsk = threads[idx * ncpus + n];
 			int err;
 
 			if (!tsk)
@@ -1213,21 +1432,22 @@ out_flush:
 			put_task_struct(tsk);
 		}
 
-		num_waits += atomic_long_read(&t[id].num_waits);
-		num_fences += atomic_long_read(&t[id].num_fences);
+		num_waits += atomic_long_read(&smoke[idx].num_waits);
+		num_fences += atomic_long_read(&smoke[idx].num_fences);
+		idx++;
 	}
 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	ret = igt_live_test_end(&live) ?: ret;
 out_contexts:
-	mutex_unlock(&i915->drm.struct_mutex);
-	kfree(t[0].contexts);
+	kfree(smoke[0].contexts);
 out_threads:
 	kfree(threads);
+out_smoke:
+	kfree(smoke);
 out_file:
-	mock_file_free(i915, file);
+	fput(file);
 out_rpm:
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
@@ -1240,6 +1460,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_nop_request),
 		SUBTEST(live_all_engines),
 		SUBTEST(live_sequential_engines),
+		SUBTEST(live_parallel_engines),
 		SUBTEST(live_empty_request),
 		SUBTEST(live_breadcrumbs_smoketest),
 	};
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 438ea0eaa416..d3bf9eefb682 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -23,13 +23,14 @@
 
 #include <linux/random.h>
 
-#include "../i915_drv.h"
-#include "../i915_selftest.h"
+#include "gt/intel_gt_pm.h"
+#include "i915_drv.h"
+#include "i915_selftest.h"
 
 #include "igt_flush_test.h"
 
 struct i915_selftest i915_selftest __read_mostly = {
-	.timeout_ms = 1000,
+	.timeout_ms = 500,
 };
 
 int i915_mock_sanitycheck(void)
@@ -56,6 +57,12 @@ enum {
 #undef selftest
 };
 
+enum {
+#define selftest(name, func) perf_##name,
+#include "i915_perf_selftests.h"
+#undef selftest
+};
+
 struct selftest {
 	bool enabled;
 	const char *name;
@@ -77,6 +84,12 @@ static struct selftest live_selftests[] = {
 };
 #undef selftest
 
+#define selftest(n, f) [perf_##n] = { .name = #n, { .live = f } },
+static struct selftest perf_selftests[] = {
+#include "i915_perf_selftests.h"
+};
+#undef selftest
+
 /* Embed the line number into the parameter name so that we can order tests */
 #define selftest(n, func) selftest_0(n, func, param(n))
 #define param(n) __PASTE(igt__, __PASTE(__LINE__, __mock_##n))
@@ -92,6 +105,13 @@ module_param_named(id, live_selftests[live_##n].enabled, bool, 0400);
 #include "i915_live_selftests.h"
 #undef selftest_0
 #undef param
+
+#define param(n) __PASTE(igt__, __PASTE(__LINE__, __perf_##n))
+#define selftest_0(n, func, id) \
+module_param_named(id, perf_selftests[perf_##n].enabled, bool, 0400);
+#include "i915_perf_selftests.h"
+#undef selftest_0
+#undef param
 #undef selftest
 
 static void set_default_test_all(struct selftest *st, unsigned int count)
@@ -199,6 +219,27 @@ int i915_live_selftests(struct pci_dev *pdev)
 	return 0;
 }
 
+int i915_perf_selftests(struct pci_dev *pdev)
+{
+	int err;
+
+	if (!i915_selftest.perf)
+		return 0;
+
+	err = run_selftests(perf, pdev_to_i915(pdev));
+	if (err) {
+		i915_selftest.perf = err;
+		return err;
+	}
+
+	if (i915_selftest.perf < 0) {
+		i915_selftest.perf = -ENOTTY;
+		return 1;
+	}
+
+	return 0;
+}
+
 static bool apply_subtest_filter(const char *caller, const char *name)
 {
 	char *filter, *sep, *tok;
@@ -256,6 +297,10 @@ int __i915_live_setup(void *data)
 {
 	struct drm_i915_private *i915 = data;
 
+	/* The selftests expect an idle system */
+	if (intel_gt_pm_wait_for_idle(&i915->gt))
+		return -EIO;
+
 	return intel_gt_terminally_wedged(&i915->gt);
 }
 
@@ -263,10 +308,8 @@ int __i915_live_teardown(int err, void *data)
 {
 	struct drm_i915_private *i915 = data;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	i915_gem_drain_freed_objects(i915);
 
@@ -277,6 +320,10 @@ int __intel_gt_live_setup(void *data)
 {
 	struct intel_gt *gt = data;
 
+	/* The selftests expect an idle system */
+	if (intel_gt_pm_wait_for_idle(gt))
+		return -EIO;
+
 	return intel_gt_terminally_wedged(gt);
 }
 
@@ -284,10 +331,8 @@ int __intel_gt_live_teardown(int err, void *data)
 {
 	struct intel_gt *gt = data;
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	if (igt_flush_test(gt->i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 
 	i915_gem_drain_freed_objects(gt->i915);
 
@@ -360,3 +405,6 @@ MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardw
 
 module_param_named_unsafe(live_selftests, i915_selftest.live, int, 0400);
 MODULE_PARM_DESC(live_selftests, "Run selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)");
+
+module_param_named_unsafe(perf_selftests, i915_selftest.perf, int, 0400);
+MODULE_PARM_DESC(perf_selftests, "Run performance orientated selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)");
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index a5bec0a4cdcc..58b5f40a07dd 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -24,6 +24,7 @@
 
 #include <linux/prime_numbers.h>
 
+#include "gem/i915_gem_context.h"
 #include "gem/selftests/mock_context.h"
 
 #include "i915_scatterlist.h"
@@ -38,7 +39,7 @@ static bool assert_vma(struct i915_vma *vma,
 {
 	bool ok = true;
 
-	if (vma->vm != ctx->vm) {
+	if (vma->vm != rcu_access_pointer(ctx->vm)) {
 		pr_err("VMA created with wrong VM\n");
 		ok = false;
 	}
@@ -113,11 +114,13 @@ static int create_vmas(struct drm_i915_private *i915,
 	list_for_each_entry(obj, objects, st_link) {
 		for (pinned = 0; pinned <= 1; pinned++) {
 			list_for_each_entry(ctx, contexts, link) {
-				struct i915_address_space *vm = ctx->vm;
+				struct i915_address_space *vm;
 				struct i915_vma *vma;
 				int err;
 
+				vm = i915_gem_context_get_vm_rcu(ctx);
 				vma = checked_vma_instance(obj, vm, NULL);
+				i915_vm_put(vm);
 				if (IS_ERR(vma))
 					return PTR_ERR(vma);
 
@@ -170,7 +173,7 @@ static int igt_vma_create(void *arg)
 		}
 
 		nc = 0;
-		for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) {
+		for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) {
 			for (; nc < num_ctx; nc++) {
 				ctx = mock_context(i915, "mock");
 				if (!ctx)
@@ -623,7 +626,7 @@ static bool assert_partial(struct drm_i915_gem_object *obj,
 	struct sgt_iter sgt;
 	dma_addr_t dma;
 
-	for_each_sgt_dma(dma, sgt, vma->pages) {
+	for_each_sgt_daddr(dma, sgt, vma->pages) {
 		dma_addr_t src;
 
 		if (!size) {
@@ -831,13 +834,10 @@ int i915_vma_mock_selftests(void)
 	}
 	mock_init_ggtt(i915, ggtt);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	err = i915_subtests(tests, ggtt);
-	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 
+	mock_device_flush(i915);
 	i915_gem_drain_freed_objects(i915);
-
 	mock_fini_ggtt(ggtt);
 	kfree(ggtt);
 out_put:
@@ -879,8 +879,6 @@ static int igt_vma_remapped_gtt(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	for (t = types; *t; t++) {
@@ -976,7 +974,6 @@ static int igt_vma_remapped_gtt(void *arg)
 
 out:
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	i915_gem_object_put(obj);
 
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/igt_atomic.c b/drivers/gpu/drm/i915/selftests/igt_atomic.c
new file mode 100644
index 000000000000..fb506b699095
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_atomic.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/preempt.h>
+#include <linux/bottom_half.h>
+#include <linux/irqflags.h>
+
+#include "igt_atomic.h"
+
+static void __preempt_begin(void)
+{
+	preempt_disable();
+}
+
+static void __preempt_end(void)
+{
+	preempt_enable();
+}
+
+static void __softirq_begin(void)
+{
+	local_bh_disable();
+}
+
+static void __softirq_end(void)
+{
+	local_bh_enable();
+}
+
+static void __hardirq_begin(void)
+{
+	local_irq_disable();
+}
+
+static void __hardirq_end(void)
+{
+	local_irq_enable();
+}
+
+const struct igt_atomic_section igt_atomic_phases[] = {
+	{ "preempt", __preempt_begin, __preempt_end },
+	{ "softirq", __softirq_begin, __softirq_end },
+	{ "hardirq", __hardirq_begin, __hardirq_end },
+	{ }
+};
diff --git a/drivers/gpu/drm/i915/selftests/igt_atomic.h b/drivers/gpu/drm/i915/selftests/igt_atomic.h
index 93ec89f487ec..1991798abf4b 100644
--- a/drivers/gpu/drm/i915/selftests/igt_atomic.h
+++ b/drivers/gpu/drm/i915/selftests/igt_atomic.h
@@ -6,51 +6,12 @@
 #ifndef IGT_ATOMIC_H
 #define IGT_ATOMIC_H
 
-#include <linux/preempt.h>
-#include <linux/bottom_half.h>
-#include <linux/irqflags.h>
-
-static void __preempt_begin(void)
-{
-	preempt_disable();
-}
-
-static void __preempt_end(void)
-{
-	preempt_enable();
-}
-
-static void __softirq_begin(void)
-{
-	local_bh_disable();
-}
-
-static void __softirq_end(void)
-{
-	local_bh_enable();
-}
-
-static void __hardirq_begin(void)
-{
-	local_irq_disable();
-}
-
-static void __hardirq_end(void)
-{
-	local_irq_enable();
-}
-
 struct igt_atomic_section {
 	const char *name;
 	void (*critical_section_begin)(void);
 	void (*critical_section_end)(void);
 };
 
-static const struct igt_atomic_section igt_atomic_phases[] = {
-	{ "preempt", __preempt_begin, __preempt_end },
-	{ "softirq", __softirq_begin, __softirq_end },
-	{ "hardirq", __hardirq_begin, __hardirq_end },
-	{ }
-};
+extern const struct igt_atomic_section igt_atomic_phases[];
 
 #endif /* IGT_ATOMIC_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index d3b5eb402d33..7b0939e3f007 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -4,39 +4,32 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "gem/i915_gem_context.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_selftest.h"
 
 #include "igt_flush_test.h"
 
-int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
+int igt_flush_test(struct drm_i915_private *i915)
 {
-	int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0;
-	int repeat = !!(flags & I915_WAIT_LOCKED);
+	struct intel_gt *gt = &i915->gt;
+	int ret = intel_gt_is_wedged(gt) ? -EIO : 0;
 
 	cond_resched();
 
-	do {
-		if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
-			pr_err("%pS timed out, cancelling all further testing.\n",
-			       __builtin_return_address(0));
+	if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
+		pr_err("%pS timed out, cancelling all further testing.\n",
+		       __builtin_return_address(0));
 
-			GEM_TRACE("%pS timed out.\n",
-				  __builtin_return_address(0));
-			GEM_TRACE_DUMP();
+		GEM_TRACE("%pS timed out.\n",
+			  __builtin_return_address(0));
+		GEM_TRACE_DUMP();
 
-			intel_gt_set_wedged(&i915->gt);
-			repeat = 0;
-			ret = -EIO;
-		}
-
-		/* Ensure we also flush after wedging. */
-		if (flags & I915_WAIT_LOCKED)
-			i915_retire_requests(i915);
-	} while (repeat--);
+		intel_gt_set_wedged(gt);
+		ret = -EIO;
+	}
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h
index 63e009927c43..7541fa74e641 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.h
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h
@@ -9,6 +9,6 @@
 
 struct drm_i915_private;
 
-int igt_flush_test(struct drm_i915_private *i915, unsigned int flags);
+int igt_flush_test(struct drm_i915_private *i915);
 
 #endif /* IGT_FLUSH_TEST_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 3e902761cd16..c130010a7033 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -4,7 +4,8 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "../i915_drv.h"
+#include "i915_drv.h"
+#include "gt/intel_gt_requests.h"
 
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
@@ -15,20 +16,16 @@ int igt_live_test_begin(struct igt_live_test *t,
 			const char *func,
 			const char *name)
 {
+	struct intel_gt *gt = &i915->gt;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	int err;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	t->i915 = i915;
 	t->func = func;
 	t->name = name;
 
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE |
-				     I915_WAIT_LOCKED,
-				     MAX_SCHEDULE_TIMEOUT);
+	err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
 	if (err) {
 		pr_err("%s(%s): failed to idle before, with err=%d!",
 		       func, name, err);
@@ -37,7 +34,7 @@ int igt_live_test_begin(struct igt_live_test *t,
 
 	t->reset_global = i915_reset_count(&i915->gpu_error);
 
-	for_each_engine(engine, i915, id)
+	for_each_engine(engine, gt, id)
 		t->reset_engine[id] =
 			i915_reset_engine_count(&i915->gpu_error, engine);
 
@@ -50,9 +47,7 @@ int igt_live_test_end(struct igt_live_test *t)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		return -EIO;
 
 	if (t->reset_global != i915_reset_count(&i915->gpu_error)) {
@@ -62,7 +57,7 @@ int igt_live_test_end(struct igt_live_test *t)
 		return -EIO;
 	}
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, &i915->gt, id) {
 		if (t->reset_engine[id] ==
 		    i915_reset_engine_count(&i915->gpu_error, engine))
 			continue;
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h
index c0e9f99d50de..36ed42736c52 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.h
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h
@@ -7,7 +7,7 @@
 #ifndef IGT_LIVE_TEST_H
 #define IGT_LIVE_TEST_H
 
-#include "../i915_gem.h"
+#include "gt/intel_engine.h" /* for I915_NUM_ENGINES */
 
 struct drm_i915_private;
 
diff --git a/drivers/gpu/drm/i915/selftests/igt_mmap.c b/drivers/gpu/drm/i915/selftests/igt_mmap.c
new file mode 100644
index 000000000000..583a4ff8b8c9
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_mmap.c
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <drm/drm_file.h>
+
+#include "i915_drv.h"
+#include "igt_mmap.h"
+
+unsigned long igt_mmap_node(struct drm_i915_private *i915,
+			    struct drm_vma_offset_node *node,
+			    unsigned long addr,
+			    unsigned long prot,
+			    unsigned long flags)
+{
+	struct file *file;
+	int err;
+
+	/* Pretend to open("/dev/dri/card0") */
+	file = mock_drm_getfile(i915->drm.primary, O_RDWR);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	err = drm_vma_node_allow(node, file->private_data);
+	if (err) {
+		addr = err;
+		goto out_file;
+	}
+
+	addr = vm_mmap(file, addr, drm_vma_node_size(node) << PAGE_SHIFT,
+		       prot, flags, drm_vma_node_offset_addr(node));
+
+	drm_vma_node_revoke(node, file->private_data);
+out_file:
+	fput(file);
+	return addr;
+}
diff --git a/drivers/gpu/drm/i915/selftests/igt_mmap.h b/drivers/gpu/drm/i915/selftests/igt_mmap.h
new file mode 100644
index 000000000000..6e716cb59d7e
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_mmap.h
@@ -0,0 +1,19 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef IGT_MMAP_H
+#define IGT_MMAP_H
+
+struct drm_i915_private;
+struct drm_vma_offset_node;
+
+unsigned long igt_mmap_node(struct drm_i915_private *i915,
+			    struct drm_vma_offset_node *node,
+			    unsigned long addr,
+			    unsigned long prot,
+			    unsigned long flags);
+
+#endif /* IGT_MMAP_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c
index 7ec8f8b049c6..9f8590b868a9 100644
--- a/drivers/gpu/drm/i915/selftests/igt_reset.c
+++ b/drivers/gpu/drm/i915/selftests/igt_reset.c
@@ -22,7 +22,7 @@ void igt_global_reset_lock(struct intel_gt *gt)
 		wait_event(gt->reset.queue,
 			   !test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
 
-	for_each_engine(engine, gt->i915, id) {
+	for_each_engine(engine, gt, id) {
 		while (test_and_set_bit(I915_RESET_ENGINE + id,
 					&gt->reset.flags))
 			wait_on_bit(&gt->reset.flags, I915_RESET_ENGINE + id,
@@ -35,7 +35,7 @@ void igt_global_reset_unlock(struct intel_gt *gt)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, gt->i915, id)
+	for_each_engine(engine, gt, id)
 		clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
 
 	clear_bit(I915_RESET_BACKOFF, &gt->reset.flags);
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 11f04ad48e68..e8a58fe49c39 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -15,8 +15,6 @@ int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt)
 	void *vaddr;
 	int err;
 
-	GEM_BUG_ON(INTEL_GEN(gt->i915) < 8);
-
 	memset(spin, 0, sizeof(*spin));
 	spin->gt = gt;
 
@@ -95,11 +93,15 @@ igt_spinner_create_request(struct igt_spinner *spin,
 	struct intel_engine_cs *engine = ce->engine;
 	struct i915_request *rq = NULL;
 	struct i915_vma *hws, *vma;
+	unsigned int flags;
 	u32 *batch;
 	int err;
 
 	GEM_BUG_ON(spin->gt != ce->vm->gt);
 
+	if (!intel_engine_can_store_dword(ce->engine))
+		return ERR_PTR(-ENODEV);
+
 	vma = i915_vma_instance(spin->obj, ce->vm, NULL);
 	if (IS_ERR(vma))
 		return ERR_CAST(vma);
@@ -132,28 +134,52 @@ igt_spinner_create_request(struct igt_spinner *spin,
 
 	batch = spin->batch;
 
-	*batch++ = MI_STORE_DWORD_IMM_GEN4;
-	*batch++ = lower_32_bits(hws_address(hws, rq));
-	*batch++ = upper_32_bits(hws_address(hws, rq));
+	if (INTEL_GEN(rq->i915) >= 8) {
+		*batch++ = MI_STORE_DWORD_IMM_GEN4;
+		*batch++ = lower_32_bits(hws_address(hws, rq));
+		*batch++ = upper_32_bits(hws_address(hws, rq));
+	} else if (INTEL_GEN(rq->i915) >= 6) {
+		*batch++ = MI_STORE_DWORD_IMM_GEN4;
+		*batch++ = 0;
+		*batch++ = hws_address(hws, rq);
+	} else if (INTEL_GEN(rq->i915) >= 4) {
+		*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*batch++ = 0;
+		*batch++ = hws_address(hws, rq);
+	} else {
+		*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+		*batch++ = hws_address(hws, rq);
+	}
 	*batch++ = rq->fence.seqno;
 
 	*batch++ = arbitration_command;
 
-	*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
+	if (INTEL_GEN(rq->i915) >= 8)
+		*batch++ = MI_BATCH_BUFFER_START | BIT(8) | 1;
+	else if (IS_HASWELL(rq->i915))
+		*batch++ = MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW;
+	else if (INTEL_GEN(rq->i915) >= 6)
+		*batch++ = MI_BATCH_BUFFER_START;
+	else
+		*batch++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
 	*batch++ = lower_32_bits(vma->node.start);
 	*batch++ = upper_32_bits(vma->node.start);
+
 	*batch++ = MI_BATCH_BUFFER_END; /* not reached */
 
 	intel_gt_chipset_flush(engine->gt);
 
 	if (engine->emit_init_breadcrumb &&
-	    rq->timeline->has_initial_breadcrumb) {
+	    i915_request_timeline(rq)->has_initial_breadcrumb) {
 		err = engine->emit_init_breadcrumb(rq);
 		if (err)
 			goto cancel_rq;
 	}
 
-	err = engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0);
+	flags = 0;
+	if (INTEL_GEN(rq->i915) <= 5)
+		flags |= I915_DISPATCH_SECURE;
+	err = engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
 
 cancel_rq:
 	if (err) {
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
new file mode 100644
index 000000000000..3ef3620e0da5
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "../i915_selftest.h"
+
+#include "mock_drm.h"
+#include "mock_gem_device.h"
+#include "mock_region.h"
+
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_lmem.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_object_blt.h"
+#include "gem/selftests/igt_gem_utils.h"
+#include "gem/selftests/mock_context.h"
+#include "gt/intel_engine_user.h"
+#include "gt/intel_gt.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/i915_random.h"
+
+static void close_objects(struct intel_memory_region *mem,
+			  struct list_head *objects)
+{
+	struct drm_i915_private *i915 = mem->i915;
+	struct drm_i915_gem_object *obj, *on;
+
+	list_for_each_entry_safe(obj, on, objects, st_link) {
+		if (i915_gem_object_has_pinned_pages(obj))
+			i915_gem_object_unpin_pages(obj);
+		/* No polluting the memory region between tests */
+		__i915_gem_object_put_pages(obj);
+		list_del(&obj->st_link);
+		i915_gem_object_put(obj);
+	}
+
+	cond_resched();
+
+	i915_gem_drain_freed_objects(i915);
+}
+
+static int igt_mock_fill(void *arg)
+{
+	struct intel_memory_region *mem = arg;
+	resource_size_t total = resource_size(&mem->region);
+	resource_size_t page_size;
+	resource_size_t rem;
+	unsigned long max_pages;
+	unsigned long page_num;
+	LIST_HEAD(objects);
+	int err = 0;
+
+	page_size = mem->mm.chunk_size;
+	max_pages = div64_u64(total, page_size);
+	rem = total;
+
+	for_each_prime_number_from(page_num, 1, max_pages) {
+		resource_size_t size = page_num * page_size;
+		struct drm_i915_gem_object *obj;
+
+		obj = i915_gem_object_create_region(mem, size, 0);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			break;
+		}
+
+		err = i915_gem_object_pin_pages(obj);
+		if (err) {
+			i915_gem_object_put(obj);
+			break;
+		}
+
+		list_add(&obj->st_link, &objects);
+		rem -= size;
+	}
+
+	if (err == -ENOMEM)
+		err = 0;
+	if (err == -ENXIO) {
+		if (page_num * page_size <= rem) {
+			pr_err("%s failed, space still left in region\n",
+			       __func__);
+			err = -EINVAL;
+		} else {
+			err = 0;
+		}
+	}
+
+	close_objects(mem, &objects);
+
+	return err;
+}
+
+static struct drm_i915_gem_object *
+igt_object_create(struct intel_memory_region *mem,
+		  struct list_head *objects,
+		  u64 size,
+		  unsigned int flags)
+{
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	obj = i915_gem_object_create_region(mem, size, flags);
+	if (IS_ERR(obj))
+		return obj;
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		goto put;
+
+	list_add(&obj->st_link, objects);
+	return obj;
+
+put:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+static void igt_object_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+	__i915_gem_object_put_pages(obj);
+	list_del(&obj->st_link);
+	i915_gem_object_put(obj);
+}
+
+static int igt_mock_contiguous(void *arg)
+{
+	struct intel_memory_region *mem = arg;
+	struct drm_i915_gem_object *obj;
+	unsigned long n_objects;
+	LIST_HEAD(objects);
+	LIST_HEAD(holes);
+	I915_RND_STATE(prng);
+	resource_size_t total;
+	resource_size_t min;
+	u64 target;
+	int err = 0;
+
+	total = resource_size(&mem->region);
+
+	/* Min size */
+	obj = igt_object_create(mem, &objects, mem->mm.chunk_size,
+				I915_BO_ALLOC_CONTIGUOUS);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	if (obj->mm.pages->nents != 1) {
+		pr_err("%s min object spans multiple sg entries\n", __func__);
+		err = -EINVAL;
+		goto err_close_objects;
+	}
+
+	igt_object_release(obj);
+
+	/* Max size */
+	obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	if (obj->mm.pages->nents != 1) {
+		pr_err("%s max object spans multiple sg entries\n", __func__);
+		err = -EINVAL;
+		goto err_close_objects;
+	}
+
+	igt_object_release(obj);
+
+	/* Internal fragmentation should not bleed into the object size */
+	target = i915_prandom_u64_state(&prng);
+	div64_u64_rem(target, total, &target);
+	target = round_up(target, PAGE_SIZE);
+	target = max_t(u64, PAGE_SIZE, target);
+
+	obj = igt_object_create(mem, &objects, target,
+				I915_BO_ALLOC_CONTIGUOUS);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	if (obj->base.size != target) {
+		pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__,
+		       obj->base.size, target);
+		err = -EINVAL;
+		goto err_close_objects;
+	}
+
+	if (obj->mm.pages->nents != 1) {
+		pr_err("%s object spans multiple sg entries\n", __func__);
+		err = -EINVAL;
+		goto err_close_objects;
+	}
+
+	igt_object_release(obj);
+
+	/*
+	 * Try to fragment the address space, such that half of it is free, but
+	 * the max contiguous block size is SZ_64K.
+	 */
+
+	target = SZ_64K;
+	n_objects = div64_u64(total, target);
+
+	while (n_objects--) {
+		struct list_head *list;
+
+		if (n_objects % 2)
+			list = &holes;
+		else
+			list = &objects;
+
+		obj = igt_object_create(mem, list, target,
+					I915_BO_ALLOC_CONTIGUOUS);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto err_close_objects;
+		}
+	}
+